From 7ff8c604e6e8b56a3f3b3d491e9af6ef3781f423 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 10 Nov 2022 14:59:42 +0000
Subject: [PATCH 001/314] Ignore MSVC CRT secure warnings

---
 CMakeLists.txt             | 2 ++
 niftyreg_build_version.txt | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a510dbe3..612ab3a6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,8 @@ if(GIT_FOUND)
 endif(GIT_FOUND)
 #-----------------------------------------------------------------------------
 if(MSVC)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
   set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
 endif(MSVC)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 00750edc..c67f579c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-3
+93

From 97f78357c31f7652cf3d77f25767df243143cf52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 10 Nov 2022 15:03:25 +0000
Subject: [PATCH 002/314] Ignore folders of Visual Studio Code

---
 .gitignore                 | 2 +-
 niftyreg_build_version.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5aae76d9..d96bb96f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,7 +37,7 @@
 .vs
 CMakeSettings.json
 
-# Mac trash folder 
+# Mac trash folder
 .DS_Store
 
 # Build
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c67f579c..f906e184 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-93
+96

From bce9d515fe68a557bac5fdfdce747d98865cde42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 14 Nov 2022 15:23:37 +0000
Subject: [PATCH 003/314] Put the old F3D GPU code back

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_f3d.cpp                          |  36 +++--
 reg-lib/CMakeLists.txt                        |   2 +
 reg-lib/cuda/CMakeLists.txt                   |   9 +-
 reg-lib/cuda/_reg_common_cuda.cu              | 126 ++++++++++--------
 reg-lib/cuda/_reg_common_cuda.h               |   8 +-
 reg-lib/cuda/_reg_cudainfo.cpp                |  50 ++++++-
 reg-lib/cuda/_reg_cudainfo.h                  |   4 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp                 | 119 ++++++-----------
 reg-lib/cuda/_reg_f3d_gpu.h                   |   4 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   4 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |   4 +-
 .../cuda/_reg_globalTransformation_kernels.cu |   2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  28 ++--
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   2 +-
 .../cuda/_reg_localTransformation_kernels.cu  |   2 +-
 reg-lib/cuda/_reg_measure_gpu.h               |   9 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  14 +-
 reg-lib/cuda/_reg_nmi_gpu.h                   |   1 +
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  14 +-
 reg-lib/cuda/_reg_optimiser_gpu.h             |   2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           |   4 +-
 reg-lib/cuda/_reg_resampling_gpu.h            |   3 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  14 +-
 reg-lib/cuda/_reg_ssd_gpu.h                   |   1 +
 reg-lib/cuda/_reg_tools_gpu.cu                |  21 ++-
 reg-lib/cuda/_reg_tools_gpu.h                 |   4 +-
 27 files changed, 272 insertions(+), 217 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f906e184..c17e934b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-96
+97
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index b7ae8384..4aa9be5b 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -17,6 +17,10 @@
 #include <float.h>
 //#include <libgen.h> //DOES NOT WORK ON WINDOWS !
 
+#ifdef _USE_CUDA
+#   include "_reg_f3d_gpu.h"
+#endif
+
 #ifdef _WIN32
 #   include <time.h>
 #endif
@@ -104,7 +108,7 @@ void Usage(char *exec)
    reg_print_info(exec, "\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
    reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points");
    reg_print_info(exec, "\t-kld <tp>\t\tKLD. Used for the specified timepoint");
-   reg_print_info(exec, "\t* For the Kullback–Leibler divergence, reference and floating are expected to be probabilities");
+   reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
    reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
    reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity");
    reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1");
@@ -132,13 +136,13 @@ void Usage(char *exec)
    reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
    reg_print_info(exec, "");
 
-//   reg_print_info(exec, "*** Platform options:");
+   reg_print_info(exec, "*** Platform options:");
 //#if defined(_USE_CUDA) && defined(_USE_OPENCL)
 //   reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
 //#else
-//#ifdef _USE_CUDA
-//   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
-//#endif
+#ifdef _USE_CUDA
+   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
+#endif
 //#ifdef _USE_OPENCL
 //   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]");
 //#endif
@@ -212,11 +216,6 @@ int main(int argc, char **argv)
          printf("%s",xml_f3d);
          return EXIT_SUCCESS;
       }
-      if(strcmp(argv[i], "-gpu")==0 || strcmp(argv[i], "--gpu")==0)
-      {
-         reg_print_msg_error("The reg_f3d GPU capability has been de-activated in the current release.");
-         return EXIT_FAILURE;
-      }
       if(strcmp(argv[i], "-voff")==0)
       {
 #ifndef NDEBUG
@@ -297,6 +296,9 @@ int main(int argc, char **argv)
    }
    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
    // Check the type of registration object to create
+#ifdef _USE_CUDA
+   CUcontext ctx;
+#endif // _USE_CUDA
    reg_f3d<float> *REG=NULL;
    float *referenceLandmark=NULL;
    float *floatingLandmark=NULL;
@@ -312,6 +314,17 @@ int main(int argc, char **argv)
          REG=new reg_f3d_sym<float>(referenceImage->nt,floatingImage->nt);
          break;
       }
+#ifdef _USE_CUDA
+      if (strcmp(argv[i], "-gpu") == 0 || strcmp(argv[i], "-mem") == 0) {
+         // Set up the cuda card and display some relevant information and check if the card is suitable
+         if (cudaCommon_setCUDACard(&ctx, true)) {
+            fprintf(stderr, "\n[NiftyReg CUDA ERROR] Error while detecting a CUDA card\n");
+            fprintf(stderr, "[NiftyReg CUDA WARNING] GPU implementation has been turned off.\n");
+         } else
+            REG = new reg_f3d_gpu(referenceImage->nt, floatingImage->nt);
+         break;
+      }
+#endif // _USE_CUDA
    }
    if(REG==NULL)
       REG=new reg_f3d<float>(referenceImage->nt,floatingImage->nt);
@@ -927,6 +940,9 @@ int main(int argc, char **argv)
    free(referenceLandmark);
    free(floatingLandmark);
 
+#ifdef _USE_CUDA
+   cudaCommon_unsetCUDACard(&ctx);
+#endif
    // Erase the registration object
    delete REG;
 
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 9c14fce6..6eabb852 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -212,6 +212,8 @@ set(_reg_f3d_libraries
   _reg_measure
   _reg_tools
   _reg_ReadWriteImage
+  ${NR_OPENCL_LIBRARIES}
+  ${NR_CUDA_LIBRARIES}
 )
 add_library(_reg_f3d ${NIFTYREG_LIBRARY_TYPE} ${_reg_f3d_files})
 target_link_libraries(_reg_f3d ${_reg_f3d_libraries})
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 238601d9..a4acfe91 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -87,6 +87,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CUDAResampleImageKernel.cpp
     ../AladinContent.cpp
     ../Platform.cpp
+    _reg_resampling_gpu.cu
+    _reg_blocksize_gpu.cu
+    _reg_tools_gpu.cu
+    _reg_localTransformation_gpu.cu
+    _reg_nmi_gpu.cu
+    _reg_ssd_gpu.cu
+    _reg_optimiser_gpu.cu
     )
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
 install(TARGETS ${NAME}
@@ -100,7 +107,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cudainfo)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h)
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
+target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 5645451b..29f30546 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -1,5 +1,5 @@
 /**
- * @file _reg_comon_gpu.cu
+ * @file _reg_common_cuda.cu
  * @author Marc Modat
  * @date 25/03/2009
  *  Copyright (c) 2009-2018, University College London
@@ -9,11 +9,81 @@
  *
  */
 
-#ifndef _REG_COMMON_GPU_CU
-#define _REG_COMMON_GPU_CU
+#ifndef _REG_COMMON_CUDA_CU
+#define _REG_COMMON_CUDA_CU
 
 #include "_reg_common_cuda.h"
 #include "_reg_tools.h"
+#include "_reg_blocksize_gpu.h"
+
+ /* ******************************** */
+ /* ******************************** */
+int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) {
+	// The CUDA card is setup
+	cuInit(0);
+	struct cudaDeviceProp deviceProp;
+	int device_count = 0;
+	cudaGetDeviceCount(&device_count);
+	if (verbose)
+		printf("[NiftyReg CUDA] %i card(s) detected\n", device_count);
+	// following code is from cutGetMaxGflopsDeviceId()
+	int max_gflops_device = 0;
+	int max_gflops = 0;
+	int current_device = 0;
+	while (current_device < device_count) {
+		cudaGetDeviceProperties(&deviceProp, current_device);
+		int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
+		if (gflops > max_gflops) {
+			max_gflops = gflops;
+			max_gflops_device = current_device;
+		}
+		++current_device;
+	}
+	NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
+	NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device))
+		NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
+
+	if (deviceProp.major < 1) {
+		fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
+		return EXIT_FAILURE;
+	} else {
+		size_t free = 0;
+		size_t total = 0;
+		cuMemGetInfo(&free, &total);
+		if (deviceProp.totalGlobalMem != total) {
+			fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n",
+					deviceProp.name);
+			fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %lu Mb - Recovered total memory: %lu Mb\n",
+					deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024));
+			return EXIT_FAILURE;
+		}
+		if (verbose) {
+			printf("[NiftyReg CUDA] The following device is used: %s\n",
+				   deviceProp.name);
+			printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
+				   (unsigned long int)(free / (1024 * 1024)),
+				   (unsigned long int)(total / (1024 * 1024)));
+			printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
+				   deviceProp.major,
+				   deviceProp.minor);
+			printf("[NiftyReg CUDA] Shared memory size in bytes: %lu\n",
+				   deviceProp.sharedMemPerBlock);
+			printf("[NiftyReg CUDA] CUDA version %i\n",
+				   CUDART_VERSION);
+			printf("[NiftyReg CUDA] Card clock rate: %i MHz\n",
+				   deviceProp.clockRate / 1000);
+			printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
+				   deviceProp.multiProcessorCount);
+		}
+		NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(deviceProp.major);
+	}
+	return EXIT_SUCCESS;
+}
+/* ******************************** */
+void cudaCommon_unsetCUDACard(CUcontext *ctx) {
+	//    cuCtxDetach(*ctx);
+	cuCtxDestroy(*ctx);
+}
 /* ******************************** */
 /* ******************************** */
 template <class NIFTI_TYPE>
@@ -678,55 +748,5 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, c
 template int cudaCommon_transferArrayFromDeviceToCpu<int>(int *array_cpu, int **array_d, const unsigned int nElements);
 template int cudaCommon_transferArrayFromDeviceToCpu<float>(float *array_cpu, float **array_d, const unsigned int nElements);
 template int cudaCommon_transferArrayFromDeviceToCpu<double>(double *array_cpu, double **array_d, const unsigned int nElements);
-/* ******************************** */
-void showCUDACardInfo(void)
-{
-   // The CUDA card is setup
-   cuInit(0);
-
-   int device_count=0;
-   cudaGetDeviceCount(&device_count);
-   printf("-----------------------------------\n");
-   printf("[NiftyReg CUDA] %i device(s) detected\n", device_count);
-   printf("-----------------------------------\n");
-
-   CUcontext cucontext;
 
-   struct cudaDeviceProp deviceProp;
-   // following code is from cutGetMaxGflopsDeviceId()
-   int current_device = 0;
-   while(current_device<device_count){
-       cudaGetDeviceProperties(&deviceProp, current_device);
-       if(deviceProp.major>0){
-
-          NR_CUDA_SAFE_CALL(cudaSetDevice(current_device));
-          NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device));
-
-          printf("[NiftyReg CUDA] Device id [%i]\n", current_device);
-          printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name);
-          size_t free=0;
-          size_t total=0;
-          cuMemGetInfo(&free, &total);
-          printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-                 (unsigned long int)(free/(1024*1024)),
-                 (unsigned long int)(total/(1024*1024)));
-          printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
-                 deviceProp.major,
-                 deviceProp.minor);
-          printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
-                 deviceProp.sharedMemPerBlock);
-          printf("[NiftyReg CUDA] CUDA version %i\n",
-                 CUDART_VERSION);
-          printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n",
-                 deviceProp.clockRate/1000);
-          printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
-                 deviceProp.multiProcessorCount);
-       }
-       cuCtxDestroy(cucontext);
-       ++current_device;
-       printf("-----------------------------------\n");
-   }
-}
 #endif
-/* ******************************** */
-/* ******************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index cb82f6d8..0fa6731d 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -1,4 +1,4 @@
-/** @file _reg_common_gpu.h
+/** @file _reg_common_cuda.h
  * @author Marc Modat
  * @date 25/03/2009.
  *  Copyright (c) 2009-2018, University College London
@@ -7,8 +7,8 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_COMMON_GPU_H
-#define _REG_COMMON_GPU_H
+#ifndef _REG_COMMON_CUDA_H
+#define _REG_COMMON_CUDA_H
 
 #include "nifti1_io.h"
 #include "cuda_runtime.h"
@@ -171,6 +171,4 @@ template <class DTYPE>
 int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements);
 /* ******************************** */
 /* ******************************** */
-void showCUDACardInfo(void);
-/* ******************************** */
 #endif
diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp
index d01a730c..7d52161f 100644
--- a/reg-lib/cuda/_reg_cudainfo.cpp
+++ b/reg-lib/cuda/_reg_cudainfo.cpp
@@ -1,7 +1,51 @@
 #include <iostream>
 #include "_reg_common_cuda.h"
+#include "_reg_tools.h"
 
-void showCUDAInfo(void)
-{
-    showCUDACardInfo();
+void showCUDAInfo(void) {
+    // The CUDA card is setup
+    cuInit(0);
+
+    int device_count = 0;
+    cudaGetDeviceCount(&device_count);
+    printf("-----------------------------------\n");
+    printf("[NiftyReg CUDA] %i device(s) detected\n", device_count);
+    printf("-----------------------------------\n");
+
+    CUcontext cucontext;
+
+    struct cudaDeviceProp deviceProp;
+    // following code is from cutGetMaxGflopsDeviceId()
+    int current_device = 0;
+    while (current_device < device_count) {
+        cudaGetDeviceProperties(&deviceProp, current_device);
+        if (deviceProp.major > 0) {
+
+            NR_CUDA_SAFE_CALL(cudaSetDevice(current_device));
+            NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device));
+
+            printf("[NiftyReg CUDA] Device id [%i]\n", current_device);
+            printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name);
+            size_t free = 0;
+            size_t total = 0;
+            cuMemGetInfo(&free, &total);
+            printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
+                   (unsigned long int)(free / (1024 * 1024)),
+                   (unsigned long int)(total / (1024 * 1024)));
+            printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
+                   deviceProp.major,
+                   deviceProp.minor);
+            printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
+                   deviceProp.sharedMemPerBlock);
+            printf("[NiftyReg CUDA] CUDA version %i\n",
+                   CUDART_VERSION);
+            printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n",
+                   deviceProp.clockRate / 1000);
+            printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
+                   deviceProp.multiProcessorCount);
+        }
+        cuCtxDestroy(cucontext);
+        ++current_device;
+        printf("-----------------------------------\n");
+    }
 }
diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h
index e0bac835..1e2b4486 100644
--- a/reg-lib/cuda/_reg_cudainfo.h
+++ b/reg-lib/cuda/_reg_cudainfo.h
@@ -1,5 +1,5 @@
-#ifndef _REG_COMMON_GPU_H
-#define _REG_COMMON_GPU_H
+#ifndef _REG_CUDAINFO_H
+#define _REG_CUDAINFO_H
 
 void showCUDAInfo(void);
 
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index dcfc3f09..e2338253 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -36,7 +36,6 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint,int floTimePoint)
    this->measure_gpu_dti=NULL;
    this->measure_gpu_lncc=NULL;
    this->measure_gpu_nmi=NULL;
-   this->measure_gpu_multichannel_nmi=NULL;
 
    this->currentReference2_gpu=NULL;
    this->currentFloating2_gpu=NULL;
@@ -91,12 +90,6 @@ reg_f3d_gpu::~reg_f3d_gpu()
       this->measure_gpu_nmi=NULL;
       this->measure_nmi=NULL;
    }
-   if(this->measure_gpu_multichannel_nmi!=NULL)
-   {
-      delete this->measure_gpu_multichannel_nmi;
-      this->measure_gpu_multichannel_nmi=NULL;
-      this->measure_multichannel_nmi=NULL;
-   }
    if(this->measure_gpu_ssd!=NULL)
    {
       delete this->measure_gpu_ssd;
@@ -137,7 +130,7 @@ void reg_f3d_gpu::AllocateWarped()
    if(this->currentReference==NULL)
    {
       printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-      reg_exit(1);
+      reg_exit();
    }
    this->ClearWarped();
    this->warped = nifti_copy_nim_info(this->currentReference);
@@ -156,7 +149,7 @@ void reg_f3d_gpu::AllocateWarped()
       if(cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
    else if(this->warped->nt==2)
@@ -164,13 +157,13 @@ void reg_f3d_gpu::AllocateWarped()
       if(cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, &this->warped2_gpu, this->warped->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
    else
    {
       printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n");
@@ -248,7 +241,7 @@ void reg_f3d_gpu::AllocateWarpedGradient()
    else
    {
       printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n");
@@ -283,7 +276,7 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()
                                        this->currentReference->dim))
    {
       printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n");
@@ -312,7 +305,7 @@ void reg_f3d_gpu::AllocateTransformationGradient()
                                        this->controlPointGrid->dim))
    {
       printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n");
@@ -486,9 +479,6 @@ void reg_f3d_gpu::GetVoxelBasedGradient()
    if(this->measure_gpu_nmi!=NULL)
       this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
 
-   if(this->measure_gpu_multichannel_nmi!=NULL)
-      this->measure_gpu_multichannel_nmi->GetVoxelBasedSimilarityMeasureGradient();
-
    if(this->measure_gpu_ssd!=NULL)
       this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient();
 
@@ -700,13 +690,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel()
             (&this->currentReference_gpu, this->currentReference->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
-         reg_exit(1);
+         reg_exit();
       }
       if(cudaCommon_transferNiftiToArrayOnDevice<float>
             (&this->currentReference_gpu, this->currentReference))
       {
          printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
    else if(this->currentReference->nt==2)
@@ -715,13 +705,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel()
             (&this->currentReference_gpu,&this->currentReference2_gpu, this->currentReference->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
-         reg_exit(1);
+         reg_exit();
       }
       if(cudaCommon_transferNiftiToArrayOnDevice<float>
             (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference))
       {
          printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
 
@@ -733,13 +723,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel()
             (&this->currentFloating_gpu, this->currentFloating->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
-         reg_exit(1);
+         reg_exit();
       }
       if(cudaCommon_transferNiftiToArrayOnDevice<float>
             (&this->currentFloating_gpu, this->currentFloating))
       {
          printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
    else if(this->currentReference->nt==2)
@@ -748,13 +738,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel()
             (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim))
       {
          printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
-         reg_exit(1);
+         reg_exit();
       }
       if(cudaCommon_transferNiftiToArrayOnDevice<float>
             (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating))
       {
          printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-         reg_exit(1);
+         reg_exit();
       }
    }
    if(this->controlPointGrid_gpu!=NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
@@ -762,14 +752,14 @@ float reg_f3d_gpu::InitialiseCurrentLevel()
          (&this->controlPointGrid_gpu, this->controlPointGrid->dim))
    {
       printf("[NiftyReg ERROR] Error when allocating the control point image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 
    if(cudaCommon_transferNiftiToArrayOnDevice<float4>
          (&this->controlPointGrid_gpu, this->controlPointGrid))
    {
       printf("[NiftyReg ERROR] Error when transfering the control point image.\n");
-      reg_exit(1);
+      reg_exit();
    }
 
    int *targetMask_h;
@@ -800,7 +790,7 @@ void reg_f3d_gpu::ClearCurrentInputImage()
          (this->controlPointGrid, &this->controlPointGrid_gpu))
    {
       printf("[NiftyReg ERROR] Error when transfering back the control point image.\n");
-      reg_exit(1);
+      reg_exit();
    }
    cudaCommon_free<float4>(&this->controlPointGrid_gpu);
    this->controlPointGrid_gpu=NULL;
@@ -945,7 +935,7 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber)
 {
    if(this->measure_gpu_nmi==NULL)
       this->measure_gpu_nmi=new reg_nmi_gpu;
-   this->measure_gpu_nmi->SetActiveTimepoint(timepoint);
+   this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
    // I am here adding 4 to the specified bin number to accomodate for
    // the spline support
    this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint);
@@ -956,27 +946,18 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber)
 {
    if(this->measure_gpu_nmi==NULL)
       this->measure_gpu_nmi=new reg_nmi_gpu;
-   this->measure_gpu_nmi->SetActiveTimepoint(timepoint);
+   this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
    // I am here adding 4 to the specified bin number to accomodate for
    // the spline support
    this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint);
    return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseMultiChannelNMI(int timepointNumber, int *timepoint)
-{
-   if(this->measure_gpu_multichannel_nmi==NULL)
-      this->measure_gpu_multichannel_nmi=new reg_multichannel_nmi_gpu;
-   for(int i=0; i<timepointNumber; ++i)
-      this->measure_gpu_multichannel_nmi->SetActiveTimepoint(timepoint[i]);
-   return;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseSSD(int timepoint)
 {
    if(this->measure_gpu_ssd==NULL)
       this->measure_gpu_ssd=new reg_ssd_gpu;
-   this->measure_gpu_ssd->SetActiveTimepoint(timepoint);
+   this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
    return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -984,7 +965,7 @@ void reg_f3d_gpu::UseKLDivergence(int timepoint)
 {
    if(this->measure_gpu_kld==NULL)
       this->measure_gpu_kld=new reg_kld_gpu;
-   this->measure_gpu_kld->SetActiveTimepoint(timepoint);
+   this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
    return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -992,18 +973,20 @@ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev)
 {
    if(this->measure_gpu_lncc==NULL)
       this->measure_gpu_lncc=new reg_lncc_gpu;
-   this->measure_gpu_lncc->SetActiveTimepoint(timepoint);
+   this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
    this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint,stddev);
    return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseDTI(int timepoint[6])
 {
-   if(this->measure_gpu_dti==NULL)
-      this->measure_gpu_dti=new reg_dti_gpu;
-   for(int i=0; i<6; ++i)
-      this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
-   return;
+   reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
+   reg_exit();
+
+   // if(this->measure_gpu_dti==NULL)
+   //    this->measure_gpu_dti=new reg_dti_gpu;
+   // for(int i=0; i<6; ++i)
+   //    this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -1018,7 +1001,7 @@ void reg_f3d_gpu::InitialiseSimilarity()
    {
       measure_gpu_nmi=new reg_nmi_gpu;
       for(int i=0; i<this->inputReference->nt; ++i)
-         measure_gpu_nmi->SetActiveTimepoint(i);
+         measure_gpu_nmi->SetTimepointWeight(i, 1.0);
    }
    if(this->measure_gpu_nmi!=NULL)
    {
@@ -1027,8 +1010,8 @@ void reg_f3d_gpu::InitialiseSimilarity()
             this->currentMask,
             this->activeVoxelNumber[this->currentLevel],
             this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
+            this->warImgGradient,
+            this->voxelBasedMeasureGradient,
             &this->currentReference_gpu,
             &this->currentFloating_gpu,
             &this->currentMask_gpu,
@@ -1039,25 +1022,6 @@ void reg_f3d_gpu::InitialiseSimilarity()
       this->measure_nmi=this->measure_gpu_nmi;
    }
 
-   if(this->measure_gpu_multichannel_nmi!=NULL)
-   {
-      this->measure_gpu_multichannel_nmi->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                                           );
-      this->measure_multichannel_nmi=this->measure_gpu_multichannel_nmi;
-   }
-
    if(this->measure_gpu_ssd!=NULL)
    {
       this->measure_gpu_ssd->InitialiseMeasure(this->currentReference,
@@ -1065,8 +1029,9 @@ void reg_f3d_gpu::InitialiseSimilarity()
             this->currentMask,
             this->activeVoxelNumber[this->currentLevel],
             this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
+            this->warImgGradient,
+            this->voxelBasedMeasureGradient,
+            this->localWeightSimCurrent,
             &this->currentReference_gpu,
             &this->currentFloating_gpu,
             &this->currentMask_gpu,
@@ -1084,8 +1049,8 @@ void reg_f3d_gpu::InitialiseSimilarity()
             this->currentMask,
             this->activeVoxelNumber[this->currentLevel],
             this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
+            this->warImgGradient,
+            this->voxelBasedMeasureGradient,
             &this->currentReference_gpu,
             &this->currentFloating_gpu,
             &this->currentMask_gpu,
@@ -1103,8 +1068,8 @@ void reg_f3d_gpu::InitialiseSimilarity()
             this->currentMask,
             this->activeVoxelNumber[this->currentLevel],
             this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
+            this->warImgGradient,
+            this->voxelBasedMeasureGradient,
             &this->currentReference_gpu,
             &this->currentFloating_gpu,
             &this->currentMask_gpu,
@@ -1122,8 +1087,8 @@ void reg_f3d_gpu::InitialiseSimilarity()
             this->currentMask,
             this->activeVoxelNumber[this->currentLevel],
             this->warped,
-            this->warpedGradientImage,
-            this->voxelBasedMeasureGradientImage,
+            this->warImgGradient,
+            this->voxelBasedMeasureGradient,
             &this->currentReference_gpu,
             &this->currentFloating_gpu,
             &this->currentMask_gpu,
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index bf0e542b..edbed4fc 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -19,7 +19,7 @@
 #include "_reg_nmi_gpu.h"
 #include "_reg_ssd_gpu.h"
 #include "_reg_tools_gpu.h"
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 #include "_reg_optimiser_gpu.h"
 #include "_reg_f3d.h"
 
@@ -49,7 +49,6 @@ class reg_f3d_gpu : public reg_f3d<float>
    reg_dti_gpu *measure_gpu_dti;
    reg_lncc_gpu *measure_gpu_lncc;
    reg_nmi_gpu *measure_gpu_nmi;
-   reg_multichannel_nmi_gpu *measure_gpu_multichannel_nmi;
 
    float InitialiseCurrentLevel();
    void ClearCurrentInputImage();
@@ -82,7 +81,6 @@ class reg_f3d_gpu : public reg_f3d<float>
 public:
    void UseNMISetReferenceBinNumber(int,int);
    void UseNMISetFloatingBinNumber(int,int);
-   void UseMultiChannelNMI(int timepointNumber, int *timepoint);
    void UseSSD(int timepoint);
    void UseKLDivergence(int timepoint);
    void UseDTI(int timepoint[6]);
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 2a5a5237..90cbb2f0 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -22,7 +22,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
 					nifti_image *targetImage,
 					float4 **array_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz);
@@ -53,7 +53,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
     NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice));
 	cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4));
     NR_CUDA_SAFE_CALL(cudaFreeHost((void *)transformationMatrix_h));
-	
+
         const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField));
     dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1);
         dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1);
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index b5ab884a..7779358e 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -13,8 +13,8 @@
 #ifndef _REG_AFFINETRANSFORMATION_GPU_H
 #define _REG_AFFINETRANSFORMATION_GPU_H
 
-#include "_reg_common_gpu.h"
-#include "_reg_globalTransformation.h"
+#include "_reg_common_cuda.h"
+// #include "_reg_globalTransformation.h"
 
 extern "C++"
 void reg_affine_positionField_gpu(mat44 *,
diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
index a53e99a8..acd92d24 100755
--- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
@@ -13,7 +13,7 @@
 #ifndef _REG_AFFINETRANSFORMATION_KERNELS_CU
 #define _REG_AFFINETRANSFORMATION_KERNELS_CU
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 9155aed7..36e064bd 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -1,6 +1,6 @@
 /*
  *  _reg_spline_gpu.cu
- *  
+ *
  *
  *  Created by Marc Modat on 24/03/2009.
  *  Copyright (c) 2009-2018, University College London
@@ -26,7 +26,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 										int activeVoxelNumber,
 										bool bspline)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int voxelNumber = reference->nx * reference->ny * reference->nz;
@@ -82,7 +82,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
 										  float4 **controlPointImageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
@@ -157,7 +157,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 												float4 **nodeGradientArray_d,
 												float bendingEnergyWeight)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
@@ -192,7 +192,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 
 	// Compute the gradient
 	bendingEnergyWeight *= 1.f / (float)controlPointNumber;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float)))            
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float)))
 	if(controlPointImage->nz>1){
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
@@ -227,7 +227,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 											 float **jacobianMatrices_d,
 											 float **jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
@@ -278,7 +278,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 									   float **jacobianMatrices_d,
 									   float **jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
@@ -343,7 +343,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 											 bool approx
 											 )
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -410,7 +410,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 												   float jacobianWeight,
 												   bool approx)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -529,7 +529,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 									  float4 **controlPointImageArray_d,
 									  bool approx)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -644,7 +644,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 /* *************************************************************** */
 void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Bind the qform or sform
@@ -674,7 +674,7 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA
 /* *************************************************************** */
 void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Bind the qform or sform
@@ -773,7 +773,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 							  int **mask_gpu,
 							  int activeVoxel)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int voxelNumber=def->nx*def->ny*def->nz;
@@ -832,7 +832,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 										float4 **deformationField_gpu,
 										float **jacobianMatrices_gpu)
 {
-	// Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index a0aadfcb..3e86da50 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -13,7 +13,7 @@
 #ifndef _REG_LOCALTRANSFORMATION_GPU_H
 #define _REG_LOCALTRANSFORMATION_GPU_H
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 #include "_reg_maths.h"
 #include "_reg_tools_gpu.h"
 #include <limits>
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index e999d123..450b1747 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -13,7 +13,7 @@
 #ifndef _reg_spline_KERNELS_CU
 #define _reg_spline_KERNELS_CU
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 
 __device__ __constant__ int c_UseBSpline;
 __device__ __constant__ int c_VoxelNumber;
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index ae51a1ba..16089c27 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -11,7 +11,8 @@
 #include "_reg_lncc.h"
 #include "_reg_dti.h"
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
+#include "_reg_kld.h"
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -60,7 +61,7 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
    reg_lncc_gpu()
    {
       fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
-      reg_exit(1);
+      reg_exit();
    }
    /// @brief reg_lncc class destructor
    ~reg_lncc_gpu() {}
@@ -100,7 +101,7 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu
    reg_kld_gpu()
    {
       fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
-      reg_exit(1);
+      reg_exit();
    }
    /// @brief reg_kld_gpu class destructor
    ~reg_kld_gpu() {}
@@ -140,7 +141,7 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu
    reg_dti_gpu()
    {
       fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
-      reg_exit(1);
+      reg_exit();
    }
    /// @brief reg_dti_gpu class destructor
    ~reg_dti_gpu() {}
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 5f667f37..b0dac95a 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -76,21 +76,21 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 	if(this->isSymmetric){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	// Check if the input images have multiple timepoints
 	if(this->referenceTimePoint>1 ||
        this->floatingImagePointer->nt>1){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		fprintf(stderr,"[NiftyReg ERROR] This class can only be \n");
-		reg_exit(1);
+		reg_exit();
     }
     // Check that the input image are of type float
     if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 ||
        this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){
         fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         fprintf(stderr,"[NiftyReg ERROR] This class can only be \n");
-        reg_exit(1);
+        reg_exit();
     }
 	// Bind the required pointers
 	this->referenceDevicePointer = *refDevicePtr;
@@ -105,13 +105,13 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 			(&this->referenceDevicePointer, this->referenceImagePointer)){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	if(cudaCommon_transferNiftiToArrayOnDevice<float>
 			(&this->floatingDevicePointer, this->floatingImagePointer)){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	// Allocate the required joint histogram on the GPU
 	cudaMalloc(&this->forwardJointHistogramLog_device,
@@ -137,7 +137,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValue()
     reg_getNMIValue<float>
             (this->referenceImagePointer,
 			 this->warpedFloatingImagePointer,
-             this->activeTimePoint,
+			 this->timePointWeight,
              this->referenceBinNumber,
              this->floatingBinNumber,
              this->totalBinNumber,
@@ -171,7 +171,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
 									  int refBinning,
 									  int floBinning)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index c033a37c..99525856 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -15,6 +15,7 @@
 
 #include "_reg_nmi.h"
 #include "_reg_measure_gpu.h"
+#include "_reg_blocksize_gpu.h"
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 08e3026c..6ea2736d 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -65,7 +65,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu,
 									   (int)(this->GetVoxNumber()))){
         printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n");
-        reg_exit(1);
+        reg_exit();
     }
 
 	this->StoreCurrentDOF();
@@ -165,12 +165,12 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
 	if(cudaCommon_allocateArrayToDevice<float4>(&this->array1,
 												(int)(this->GetVoxNumber()))){
         printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n");
-        reg_exit(1);
+        reg_exit();
     }
 	if(cudaCommon_allocateArrayToDevice<float4>(&this->array2,
 												(int)(this->GetVoxNumber()))){
         printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n");
-        reg_exit(1);
+        reg_exit();
     }
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n");
@@ -227,7 +227,7 @@ void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d,
                                          float4 **conjugateH_d,
                                          int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
@@ -250,7 +250,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
                                   float4 **conjugateH_d,
                                   int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
@@ -296,7 +296,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
 float reg_getMaximalLength_gpu(float4 **gradientArray_d,
                                int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     // Copy constant memory value and bind texture
@@ -328,7 +328,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
                                         float currentLength)
 
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 701bb202..2e8c9eec 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -1,7 +1,7 @@
 #ifndef _REG_OPTIMISER_GPU_H
 #define _REG_OPTIMISER_GPU_H
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 #include "_reg_optimiser.h"
 #include "_reg_tools_gpu.h"
 
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 1611ddeb..ca16e747 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -26,7 +26,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                            int activeVoxelNumber,
                            float paddingValue)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
@@ -103,7 +103,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
                               int activeVoxelNumber,
                               float paddingValue)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 13e9e662..49f60cc5 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -13,7 +13,8 @@
 #ifndef _REG_RESAMPLING_GPU_H
 #define _REG_RESAMPLING_GPU_H
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
+#include "_reg_blocksize_gpu.h"
 
 extern "C++"
 void reg_resampleImage_gpu(nifti_image *sourceImage,
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 6c7428d6..b6a4b42c 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -34,6 +34,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 									nifti_image *warFloImgPtr,
 									nifti_image *warFloGraPtr,
 									nifti_image *forVoxBasedGraPtr,
+									nifti_image *localWeightSimPtr,
 									cudaArray **refDevicePtr,
 									cudaArray **floDevicePtr,
 									int **refMskDevicePtr,
@@ -46,25 +47,26 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 							   maskRefPtr,
 							   warFloImgPtr,
 							   warFloGraPtr,
-							   forVoxBasedGraPtr);
+							   forVoxBasedGraPtr,
+							   localWeightSimPtr);
 	// Check if a symmetric measure is required
 	if(this->isSymmetric){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	// Check that the input image are of type float
 	if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 ||
 	   this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		fprintf(stderr,"[NiftyReg ERROR] The input images are expected to be float\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	// Check that the input images have only one time point
 	if(this->referenceImagePointer->nt>1 || this->floatingImagePointer->nt>1){
 		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
 		fprintf(stderr,"[NiftyReg ERROR] Both input images should have only one time point\n");
-		reg_exit(1);
+		reg_exit();
 	}
 	// Bind the required pointers
 	this->referenceDevicePointer = *refDevicePtr;
@@ -86,7 +88,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
 						  int activeVoxelNumber
 						  )
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Copy the constant memory variables
@@ -151,7 +153,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
 									  int activeVoxelNumber
 									  )
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	// Copy the constant memory variables
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 8dbfbef8..6cc8fac2 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -32,6 +32,7 @@ class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu
                                   nifti_image *warFloImgPtr,
                                   nifti_image *warFloGraPtr,
                                   nifti_image *forVoxBasedGraPtr,
+                                  nifti_image *localWeightSimPtr,
                                   cudaArray **refDevicePtr,
                                   cudaArray **floDevicePtr,
                                   int **refMskDevicePtr,
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 13685ad8..cdc9fc4c 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -13,7 +13,7 @@
 #ifndef _REG_TOOLS_GPU_CU
 #define _REG_TOOLS_GPU_CU
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 #include "_reg_tools_gpu.h"
 #include "_reg_tools_kernels.cu"
 
@@ -26,7 +26,7 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       float4 **nodeNMIGradientArray_d,
                                       float weight)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
@@ -62,7 +62,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(	mat44 *sourceMatrix_xyz,
                             nifti_image *controlPointImage,
                             float4 **nodeNMIGradientArray_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
@@ -96,7 +96,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                                 bool smoothXYZ[8])
 
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
 	const unsigned int voxelNumber = image->nx * image->ny * image->nz;
@@ -186,7 +186,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
                                         float4 **imageArray_d,
 										float *spacingVoxel)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     const int voxelNumber = image->nx * image->ny * image->nz;
@@ -263,7 +263,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 /* *************************************************************** */
 void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -278,7 +278,7 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
 /* *************************************************************** */
 void reg_addValue_gpu(int num, float4 **array_d, float value)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -293,7 +293,7 @@ void reg_addValue_gpu(int num, float4 **array_d, float value)
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -307,7 +307,7 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 /* *************************************************************** */
 void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -321,7 +321,7 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int num, int **array1_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -352,4 +352,3 @@ float reg_minReduction_gpu(float *array_d,int size)
 }
 /* *************************************************************** */
 #endif
-
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 5dde9e99..a486fd7d 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -13,8 +13,9 @@
 #ifndef _REG_TOOLS_GPU_H
 #define _REG_TOOLS_GPU_H
 
-#include "_reg_common_gpu.h"
+#include "_reg_common_cuda.h"
 #include "_reg_tools.h"
+#include "_reg_blocksize_gpu.h"
 #include <thrust/device_ptr.h>
 #include <thrust/reduce.h>
 
@@ -85,4 +86,3 @@ float reg_minReduction_gpu(float *array_d,
 /* ******************************** */
 
 #endif
-

From b3d56b9b3e1ea09e2cba591d23e1bddb9775b5d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 14 Nov 2022 15:25:31 +0000
Subject: [PATCH 004/314] Fix a bug occurring while reading PNGs

---
 niftyreg_build_version.txt | 2 +-
 reg-io/png/reg_png.cpp     | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c17e934b..6529ff88 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-97
+98
diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp
index 7d5b0de7..410e9bab 100644
--- a/reg-io/png/reg_png.cpp
+++ b/reg-io/png/reg_png.cpp
@@ -21,7 +21,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
 {
    // We first read the png file
    FILE *pngFile=NULL;
-   pngFile = fopen (pngFileName, "r");
+   pngFile = fopen(pngFileName, "rb");
    if(pngFile==NULL)
    {
       char text[255];
@@ -32,10 +32,11 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    }
 
    uch sig[8];
-   if(!fread(sig, 1, 8, fopen (pngFileName, "r")))
+   if (!fread(sig, 1, 8, pngFile))
       reg_exit();
-   if(!png_check_sig(sig, 8))
+   if (!png_check_sig(sig, 8))
       reg_exit();
+   rewind(pngFile);
 
    png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
    if (!png_ptr)

From 1a13a2ae9e0691463335ae28e283b56c28997111 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 14 Nov 2022 18:22:11 +0000
Subject: [PATCH 005/314] Fix some deprecated CUDA functions

---
 niftyreg_build_version.txt                    | 2 +-
 reg-lib/cuda/_reg_blocksize_gpu.h             | 6 +++---
 reg-lib/cuda/_reg_common_cuda.cu              | 2 +-
 reg-lib/cuda/_reg_common_cuda.h               | 6 +++---
 reg-lib/cuda/_reg_f3d_gpu.cpp                 | 2 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu | 2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           | 2 +-
 reg-lib/cuda/affineDeformationKernel.cu       | 2 +-
 reg-lib/cuda/blockMatchingKernel.cu           | 2 +-
 reg-lib/cuda/optimizeKernel.cu                | 2 +-
 reg-lib/cuda/resampleKernel.cu                | 2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6529ff88..3ad5abd0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-98
+99
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 2620500e..11f98204 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -36,7 +36,7 @@ struct __attribute__((aligned(4))) float4
 		} \
 	}
 #   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaThreadSynchronize(); \
+		cudaDeviceSynchronize(); \
 		cudaError err = cudaPeekAtLastError(); \
 		if( err != cudaSuccess) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
@@ -52,7 +52,7 @@ struct __attribute__((aligned(4))) float4
 #else //CUDART_VERSION >= 3200
 #   define NR_CUDA_SAFE_CALL(call) { \
 		call; \
-		cudaError err = cudaThreadSynchronize(); \
+		cudaError err = cudaDeviceSynchronize(); \
 		if( cudaSuccess != err) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
 			__FILE__, __LINE__, cudaGetErrorString(err)); \
@@ -60,7 +60,7 @@ struct __attribute__((aligned(4))) float4
 		} \
 	}
 #   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaError err = cudaThreadSynchronize(); \
+		cudaError err = cudaDeviceSynchronize(); \
 		if( err != cudaSuccess) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
 			__FILE__, __LINE__, cudaGetErrorString(err)); \
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 29f30546..5fcfee5f 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -511,7 +511,7 @@ int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsig
 {
 
 	NR_CUDA_SAFE_CALL(cudaMemcpy((void *)cpuPtr, (void *)*cuPtr, nElements*sizeof(DTYPE), cudaMemcpyDeviceToHost));
-	//NR_CUDA_SAFE_CALL(cudaThreadSynchronize());
+	//NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 	return EXIT_SUCCESS;
 }
 template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float **cuPtr, const unsigned int nElements);
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 0fa6731d..d1f5d776 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -36,7 +36,7 @@ struct __attribute__((aligned(4))) float4
 		} \
 	}
 #   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaThreadSynchronize(); \
+		cudaDeviceSynchronize(); \
 		cudaError err = cudaPeekAtLastError(); \
 		if( err != cudaSuccess) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
@@ -53,7 +53,7 @@ struct __attribute__((aligned(4))) float4
 #else //CUDART_VERSION >= 3200
 #   define NR_CUDA_SAFE_CALL(call) { \
 		call; \
-		cudaError err = cudaThreadSynchronize(); \
+		cudaError err = cudaDeviceSynchronize(); \
 		if( cudaSuccess != err) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
 			__FILE__, __LINE__, cudaGetErrorString(err)); \
@@ -61,7 +61,7 @@ struct __attribute__((aligned(4))) float4
 		} \
 	}
 #   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaError err = cudaThreadSynchronize(); \
+		cudaError err = cudaDeviceSynchronize(); \
 		if( err != cudaSuccess) { \
 			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
 			__FILE__, __LINE__, cudaGetErrorString(err)); \
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index e2338253..d605ae3b 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -115,7 +115,7 @@ reg_f3d_gpu::~reg_f3d_gpu()
       this->measure_lncc=NULL;
    }
 
-   NR_CUDA_SAFE_CALL(cudaThreadExit())
+   cudaDeviceReset();
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
 #endif
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 90cbb2f0..d8dd6a24 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -59,7 +59,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
         dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1);
 
     reg_affine_deformationField_kernel <<< G1, B1 >>> (*array_d);
-        NR_CUDA_SAFE_CALL(cudaThreadSynchronize());
+        NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #ifndef NDEBUG
     printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
 	       cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index ca16e747..f8a40dbf 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -75,7 +75,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
         dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1);
         dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1);
         reg_resampleImage3D_kernel <<< G1, B1 >>> (*warpedImageArray_d);
-		cudaThreadSynchronize();
+		cudaDeviceSynchronize();
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index e6b544e2..ad225837 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -93,6 +93,6 @@ void launchAffine(mat44 *affineTransformation,
 #ifndef NDEBUG
    NR_CUDA_CHECK_KERNEL(G1_b, B1_b)
 #else
-   NR_CUDA_SAFE_CALL(cudaThreadSynchronize());
+   NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif
 }
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 637a0330..04d428b0 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -629,7 +629,7 @@ void block_matching_method_gpu(nifti_image *targetImage,
 #ifndef NDEBUG
     NR_CUDA_CHECK_KERNEL(BlocksGrid3D, BlockDims1D);
         #else
-    NR_CUDA_SAFE_CALL(cudaThreadSynchronize());
+    NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif
 
 	NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned int), cudaMemcpyDeviceToHost));
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index 2c940b6f..7778affe 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -291,7 +291,7 @@ void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, flo
     checkCublasStatus(cublasSgemv(handle, CUBLAS_OP_N, n, m, &alpha, R_d, ldr, warped_d, 1, &beta, transformation, 1));
     checkCublasStatus(cublasDestroy(handle));
     permuteAffineMatrix <<<1, 16 >>>(transformation);
-    cudaThreadSynchronize();
+    cudaDeviceSynchronize();
 
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 6a910aae..9fdb69c3 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -440,7 +440,7 @@ void launchResample(nifti_image *floatingImage,
 #ifndef NDEBUG
 	NR_CUDA_CHECK_KERNEL(mygrid, myblocks)
 #else
-	NR_CUDA_SAFE_CALL(cudaThreadSynchronize());
+	NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif
 }
 /* *************************************************************** */

From 5152e0ab46d9b4b7557181b7eac5e98af736b441 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 16 Nov 2022 18:04:07 +0000
Subject: [PATCH 006/314] Disable unsupported options in reg_f3d_gpu

---
 niftyreg_build_version.txt    |  2 +-
 reg-apps/reg_f3d.cpp          |  2 +-
 reg-lib/_reg_f3d.cpp          |  2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 53 +++++++++++++++++++++++++++++++++--
 reg-lib/cuda/_reg_f3d_gpu.h   |  5 ++++
 5 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3ad5abd0..29d6383b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-99
+100
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 4aa9be5b..260bf79d 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -82,7 +82,7 @@ void Usage(char *exec)
    reg_print_info(exec, "");
    reg_print_info(exec, "*** Regularisation options:");
    reg_print_info(exec, "\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
-   reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]");
+   reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]");
    reg_print_info(exec, "\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
    reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
    reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 6af78418..7559620e 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -26,7 +26,7 @@ reg_f3d<T>::reg_f3d(int refTimePoint,int floTimePoint)
    this->inputControlPointGrid=NULL; // pointer to external
    this->controlPointGrid=NULL;
    this->bendingEnergyWeight=0.001;
-   this->linearEnergyWeight=0.01;
+   this->linearEnergyWeight=0.00;
    this->jacobianLogWeight=0.;
    this->jacobianLogApproximation=true;
    this->spacing[0]=-5;
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index d605ae3b..7c50f939 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -115,7 +115,6 @@ reg_f3d_gpu::~reg_f3d_gpu()
       this->measure_lncc=NULL;
    }
 
-   cudaDeviceReset();
 #ifndef NDEBUG
    printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
 #endif
@@ -400,6 +399,28 @@ double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() {
+   if (this->linearEnergyWeight <= 0)
+      return 0;
+
+   reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()");
+   reg_print_msg_error("Option not supported!");
+   reg_exit();
+   return 0;
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() {
+   if (this->landmarkRegWeight <= 0)
+      return 0;
+
+   reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()");
+   reg_print_msg_error("Option not supported!");
+   reg_exit();
+   return 0;
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetDeformationField()
 {
    if(this->controlPointGrid_gpu==NULL)
@@ -548,6 +569,16 @@ void reg_f3d_gpu::GetBendingEnergyGradient()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+void reg_f3d_gpu::GetLinearEnergyGradient() {
+   if (this->linearEnergyWeight <= 0)
+      return;
+
+   reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()");
+   reg_print_msg_error("Option not supported!");
+   reg_exit();
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetJacobianBasedGradient()
 {
    if(this->jacobianLogWeight<=0) return;
@@ -562,9 +593,18 @@ void reg_f3d_gpu::GetJacobianBasedGradient()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+void reg_f3d_gpu::GetLandmarkDistanceGradient() {
+   if (this->landmarkRegWeight <= 0)
+      return;
+
+   reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()");
+   reg_print_msg_error("Option not supported!");
+   reg_exit();
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UpdateParameters(float scale)
 {
-
    float4 *currentDOF=reinterpret_cast<float4 *>(this->optimiser->GetCurrentDOF());
    float4 *bestDOF=reinterpret_cast<float4 *>(this->optimiser->GetBestDOF());
    float4 *gradient=reinterpret_cast<float4 *>(this->optimiser->GetGradient());
@@ -578,6 +618,15 @@ void reg_f3d_gpu::UpdateParameters(float scale)
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+void reg_f3d_gpu::SmoothGradient() {
+   if (this->gradientSmoothingSigma != 0) {
+      reg_print_fct_error("reg_f3d_gpu::SmoothGradient()");
+      reg_print_msg_error("Option not supported!");
+      reg_exit();
+   }
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetApproximatedGradient()
 {
    float4 *gridValue=NULL;
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index edbed4fc..8f764436 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -65,12 +65,17 @@ class reg_f3d_gpu : public reg_f3d<float>
 
    double ComputeJacobianBasedPenaltyTerm(int);
    double ComputeBendingEnergyPenaltyTerm();
+   double ComputeLinearEnergyPenaltyTerm();
+   double ComputeLandmarkDistancePenaltyTerm();
    void GetDeformationField();
    void WarpFloatingImage(int);
    void GetVoxelBasedGradient();
    void GetSimilarityMeasureGradient();
    void GetBendingEnergyGradient();
+   void GetLinearEnergyGradient();
    void GetJacobianBasedGradient();
+   void GetLandmarkDistanceGradient();
+   void SmoothGradient();
    void GetApproximatedGradient();
    void UpdateParameters(float);
    void SetOptimiser();

From df539747cb52899fc8987bb1bd333d32303e3022 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 16 Nov 2022 18:06:14 +0000
Subject: [PATCH 007/314] Implement GetWarpedImage() for reg_f3d_gpu

---
 niftyreg_build_version.txt    |  2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 60 +++++++++++++++++++++++++++++++++++
 reg-lib/cuda/_reg_f3d_gpu.h   |  3 ++
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 29d6383b..398050c6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-100
+101
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 7c50f939..0a63d571 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -723,6 +723,66 @@ void reg_f3d_gpu::GetApproximatedGradient()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) {
+   size_t size = image->nvox;
+   float *buffer = (float*)malloc(size * sizeof(float));
+
+   if (buffer == NULL) {
+      reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
+   }
+
+   cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
+
+   free(image->data);
+   image->datatype = NIFTI_TYPE_FLOAT32;
+   image->nbyper = sizeof(float);
+   image->data = (void*)malloc(image->nvox * image->nbyper);
+   float *dataT = static_cast<float*>(image->data);
+   for (size_t i = 0; i < size; ++i)
+      dataT[i] = static_cast<float>(buffer[i]);
+   free(buffer);
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+nifti_image** reg_f3d_gpu::GetWarpedImage() {
+   // The initial images are used
+   if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) {
+      reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()");
+      reg_print_msg_error("The reference, floating and control point grid images have to be defined");
+      reg_exit();
+   }
+
+   this->currentReference = this->inputReference;
+   this->currentFloating = this->inputFloating;
+   this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int));
+
+   reg_tools_changeDatatype<float>(this->currentReference);
+   reg_tools_changeDatatype<float>(this->currentFloating);
+
+   this->AllocateWarped();
+   this->AllocateDeformationField();
+   this->InitialiseCurrentLevel();
+   this->WarpFloatingImage(3); // cubic spline interpolation
+   this->ClearDeformationField();
+
+   nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
+   warpedImage[0] = nifti_copy_nim_info(this->warped);
+   warpedImage[0]->cal_min = this->inputFloating->cal_min;
+   warpedImage[0]->cal_max = this->inputFloating->cal_max;
+   warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
+   warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
+   this->fillImageData(warpedImage[0], this->warped_gpu);
+   if (this->currentFloating->nt == 2)
+      this->fillImageData(warpedImage[1], this->warped2_gpu);
+
+   this->ClearWarped();
+#ifndef NDEBUG
+   reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage");
+#endif
+   return warpedImage;
+}
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 float reg_f3d_gpu::InitialiseCurrentLevel()
 {
    float maxStepSize=reg_f3d<float>::InitialiseCurrentLevel();
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index 8f764436..a8003d46 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -83,6 +83,8 @@ class reg_f3d_gpu : public reg_f3d<float>
    float NormaliseGradient();
    void InitialiseSimilarity();
 
+   void fillImageData(nifti_image *image, float* memoryObject);
+
 public:
    void UseNMISetReferenceBinNumber(int,int);
    void UseNMISetFloatingBinNumber(int,int);
@@ -90,6 +92,7 @@ class reg_f3d_gpu : public reg_f3d<float>
    void UseKLDivergence(int timepoint);
    void UseDTI(int timepoint[6]);
    void UseLNCC(int timepoint, float stdDevKernel);
+   nifti_image** GetWarpedImage();
 
    reg_f3d_gpu(int refTimePoint,int floTimePoint);
    ~reg_f3d_gpu();

From c79bc30038721d8bd7856f6f9b1f752558e845cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 16 Nov 2022 18:06:42 +0000
Subject: [PATCH 008/314] Fix some bugs

---
 niftyreg_build_version.txt |  2 +-
 reg-apps/reg_f3d.cpp       | 10 ++++------
 reg-lib/_reg_base.cpp      |  6 +-----
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 398050c6..257e5632 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-101
+102
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 260bf79d..64fb6d47 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -891,10 +891,7 @@ int main(int argc, char **argv)
    }
 
    // Save the warped image(s)
-   nifti_image **outputWarpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *));
-   outputWarpedImage[0]=NULL;
-   outputWarpedImage[1]=NULL;
-   outputWarpedImage = REG->GetWarpedImage();
+   nifti_image **outputWarpedImage = REG->GetWarpedImage();
    if(outputWarpedImageName==NULL)
       outputWarpedImageName=(char *)"outputResult.nii";
    memset(outputWarpedImage[0]->descrip, 0, 80);
@@ -940,11 +937,12 @@ int main(int argc, char **argv)
    free(referenceLandmark);
    free(floatingLandmark);
 
+   // Erase the registration object
+   delete REG;
+
 #ifdef _USE_CUDA
    cudaCommon_unsetCUDACard(&ctx);
 #endif
-   // Erase the registration object
-   delete REG;
 
    // Clean the allocated images
    if(refLocalWeightSim!=NULL) nifti_image_free(refLocalWeightSim);
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 8124f185..a7b9bfaa 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -209,11 +209,6 @@ reg_base<T>::~reg_base()
       delete []this->floatingThresholdLow;
       this->floatingThresholdLow=NULL;
    }
-   if(this->activeVoxelNumber!=NULL)
-   {
-      delete []this->activeVoxelNumber;
-      this->activeVoxelNumber=NULL;
-   }
    if(this->optimiser!=NULL)
    {
       delete this->optimiser;
@@ -1657,6 +1652,7 @@ void reg_base<T>::Run()
       // Update the number of level for the next level
       this->maxiterationNumber /= 2;
    } // level this->levelToPerform
+   this->currentLevel--;
 
 #ifndef NDEBUG
    reg_print_fct_debug("reg_base<T>::Run");

From 78151cadd9c58288640fdd0a9f7b5ef52ef5fb2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 17 Nov 2022 13:07:59 +0000
Subject: [PATCH 009/314] Reformat reg_f3d_gpu

---
 niftyreg_build_version.txt    |    2 +-
 reg-lib/_reg_base.cpp         |    1 +
 reg-lib/cuda/_reg_f3d_gpu.cpp | 1854 +++++++++++++++------------------
 reg-lib/cuda/_reg_f3d_gpu.h   |  129 ++-
 4 files changed, 928 insertions(+), 1058 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 257e5632..a9c8fe82 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-102
+103
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index a7b9bfaa..dddd2654 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1652,6 +1652,7 @@ void reg_base<T>::Run()
       // Update the number of level for the next level
       this->maxiterationNumber /= 2;
    } // level this->levelToPerform
+   // Set this to the last value since it's used somewhere else
    this->currentLevel--;
 
 #ifndef NDEBUG
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 0a63d571..1c613dd0 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -15,1202 +15,1072 @@
 
 #include "_reg_f3d_gpu.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_f3d_gpu::reg_f3d_gpu(int refTimePoint,int floTimePoint)
-   : reg_f3d<float>::reg_f3d(refTimePoint,floTimePoint)
-{
-   this->executableName=(char *)"NiftyReg F3D GPU";
-   this->currentReference_gpu=NULL;
-   this->currentFloating_gpu=NULL;
-   this->currentMask_gpu=NULL;
-   this->warped_gpu=NULL;
-   this->controlPointGrid_gpu=NULL;
-   this->deformationFieldImage_gpu=NULL;
-   this->warpedGradientImage_gpu=NULL;
-   this->voxelBasedMeasureGradientImage_gpu=NULL;
-   this->transformationGradient_gpu=NULL;
-
-   this->measure_gpu_ssd=NULL;
-   this->measure_gpu_kld=NULL;
-   this->measure_gpu_dti=NULL;
-   this->measure_gpu_lncc=NULL;
-   this->measure_gpu_nmi=NULL;
-
-   this->currentReference2_gpu=NULL;
-   this->currentFloating2_gpu=NULL;
-   this->warped2_gpu=NULL;
-   this->warpedGradientImage2_gpu=NULL;
+ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
+    : reg_f3d<float>::reg_f3d(refTimePoint, floTimePoint) {
+    this->executableName = (char *)"NiftyReg F3D GPU";
+    this->currentReference_gpu = NULL;
+    this->currentFloating_gpu = NULL;
+    this->currentMask_gpu = NULL;
+    this->warped_gpu = NULL;
+    this->controlPointGrid_gpu = NULL;
+    this->deformationFieldImage_gpu = NULL;
+    this->warpedGradientImage_gpu = NULL;
+    this->voxelBasedMeasureGradientImage_gpu = NULL;
+    this->transformationGradient_gpu = NULL;
+
+    this->measure_gpu_ssd = NULL;
+    this->measure_gpu_kld = NULL;
+    this->measure_gpu_dti = NULL;
+    this->measure_gpu_lncc = NULL;
+    this->measure_gpu_nmi = NULL;
+
+    this->currentReference2_gpu = NULL;
+    this->currentFloating2_gpu = NULL;
+    this->warped2_gpu = NULL;
+    this->warpedGradientImage2_gpu = NULL;
 
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_f3d_gpu::~reg_f3d_gpu()
-{
-   if(this->currentReference_gpu!=NULL)
-      cudaCommon_free(&this->currentReference_gpu);
-   if(this->currentFloating_gpu!=NULL)
-      cudaCommon_free(&this->currentFloating_gpu);
-   if(this->currentMask_gpu!=NULL)
-      cudaCommon_free<int>(&this->currentMask_gpu);
-   if(this->warped_gpu!=NULL)
-      cudaCommon_free<float>(&this->warped_gpu);
-   if(this->controlPointGrid_gpu!=NULL)
-      cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-   if(this->deformationFieldImage_gpu!=NULL)
-      cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
-   if(this->warpedGradientImage_gpu!=NULL)
-      cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
-   if(this->voxelBasedMeasureGradientImage_gpu!=NULL)
-      cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
-   if(this->transformationGradient_gpu!=NULL)
-      cudaCommon_free<float4>(&this->transformationGradient_gpu);
-
-   if(this->currentReference2_gpu!=NULL)
-      cudaCommon_free(&this->currentReference2_gpu);
-   if(this->currentFloating2_gpu!=NULL)
-      cudaCommon_free(&this->currentFloating2_gpu);
-   if(this->warped2_gpu!=NULL)
-      cudaCommon_free<float>(&this->warped2_gpu);
-   if(this->warpedGradientImage2_gpu!=NULL)
-      cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
-
-   if(this->optimiser!=NULL)
-   {
-      delete this->optimiser;
-      this->optimiser=NULL;
-   }
-
-   if(this->measure_gpu_nmi!=NULL)
-   {
-      delete this->measure_gpu_nmi;
-      this->measure_gpu_nmi=NULL;
-      this->measure_nmi=NULL;
-   }
-   if(this->measure_gpu_ssd!=NULL)
-   {
-      delete this->measure_gpu_ssd;
-      this->measure_gpu_ssd=NULL;
-      this->measure_ssd=NULL;
-   }
-   if(this->measure_gpu_kld!=NULL)
-   {
-      delete this->measure_gpu_kld;
-      this->measure_gpu_kld=NULL;
-      this->measure_kld=NULL;
-   }
-   if(this->measure_gpu_dti!=NULL)
-   {
-      delete this->measure_gpu_dti;
-      this->measure_gpu_dti=NULL;
-      this->measure_dti=NULL;
-   }
-   if(this->measure_gpu_lncc!=NULL)
-   {
-      delete this->measure_gpu_lncc;
-      this->measure_gpu_lncc=NULL;
-      this->measure_lncc=NULL;
-   }
+reg_f3d_gpu::~reg_f3d_gpu() {
+    if (this->currentReference_gpu != NULL)
+        cudaCommon_free(&this->currentReference_gpu);
+    if (this->currentFloating_gpu != NULL)
+        cudaCommon_free(&this->currentFloating_gpu);
+    if (this->currentMask_gpu != NULL)
+        cudaCommon_free<int>(&this->currentMask_gpu);
+    if (this->warped_gpu != NULL)
+        cudaCommon_free<float>(&this->warped_gpu);
+    if (this->controlPointGrid_gpu != NULL)
+        cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    if (this->deformationFieldImage_gpu != NULL)
+        cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
+    if (this->warpedGradientImage_gpu != NULL)
+        cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
+    if (this->voxelBasedMeasureGradientImage_gpu != NULL)
+        cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
+    if (this->transformationGradient_gpu != NULL)
+        cudaCommon_free<float4>(&this->transformationGradient_gpu);
+
+    if (this->currentReference2_gpu != NULL)
+        cudaCommon_free(&this->currentReference2_gpu);
+    if (this->currentFloating2_gpu != NULL)
+        cudaCommon_free(&this->currentFloating2_gpu);
+    if (this->warped2_gpu != NULL)
+        cudaCommon_free<float>(&this->warped2_gpu);
+    if (this->warpedGradientImage2_gpu != NULL)
+        cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
+
+    if (this->optimiser != NULL) {
+        delete this->optimiser;
+        this->optimiser = NULL;
+    }
+
+    if (this->measure_gpu_nmi != NULL) {
+        delete this->measure_gpu_nmi;
+        this->measure_gpu_nmi = NULL;
+        this->measure_nmi = NULL;
+    }
+    if (this->measure_gpu_ssd != NULL) {
+        delete this->measure_gpu_ssd;
+        this->measure_gpu_ssd = NULL;
+        this->measure_ssd = NULL;
+    }
+    if (this->measure_gpu_kld != NULL) {
+        delete this->measure_gpu_kld;
+        this->measure_gpu_kld = NULL;
+        this->measure_kld = NULL;
+    }
+    if (this->measure_gpu_dti != NULL) {
+        delete this->measure_gpu_dti;
+        this->measure_gpu_dti = NULL;
+        this->measure_dti = NULL;
+    }
+    if (this->measure_gpu_lncc != NULL) {
+        delete this->measure_gpu_lncc;
+        this->measure_gpu_lncc = NULL;
+        this->measure_lncc = NULL;
+    }
 
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateWarped()
-{
+void reg_f3d_gpu::AllocateWarped() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n");
 #endif
-   if(this->currentReference==NULL)
-   {
-      printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-      reg_exit();
-   }
-   this->ClearWarped();
-   this->warped = nifti_copy_nim_info(this->currentReference);
-   this->warped->dim[0]=this->warped->ndim=this->currentFloating->ndim;
-   this->warped->dim[4]=this->warped->nt=this->currentFloating->nt;
-   this->warped->pixdim[4]=this->warped->dt=1.0;
-   this->warped->nvox = this->warped->nx *
-                        this->warped->ny *
-                        this->warped->nz *
-                        this->warped->nt;
-   this->warped->datatype = this->currentFloating->datatype;
-   this->warped->nbyper = this->currentFloating->nbyper;
-   NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox*this->warped->nbyper))
-   if(this->warped->nt==1)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-         reg_exit();
-      }
-   }
-   else if(this->warped->nt==2)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, &this->warped2_gpu, this->warped->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
-         reg_exit();
-      }
-   }
-   else
-   {
-      printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
-      reg_exit();
-   }
+    if (this->currentReference == NULL) {
+        printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+        reg_exit();
+    }
+    this->ClearWarped();
+    this->warped = nifti_copy_nim_info(this->currentReference);
+    this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim;
+    this->warped->dim[4] = this->warped->nt = this->currentFloating->nt;
+    this->warped->pixdim[4] = this->warped->dt = 1.0;
+    this->warped->nvox =
+        (size_t)this->warped->nx *
+        (size_t)this->warped->ny *
+        (size_t)this->warped->nz *
+        (size_t)this->warped->nt;
+    this->warped->scl_slope = 1.f;
+    this->warped->scl_inter = 0.f;
+    this->warped->datatype = this->currentFloating->datatype;
+    this->warped->nbyper = this->currentFloating->nbyper;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper));
+    if (this->warped->nt == 1) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+            reg_exit();
+        }
+    } else if (this->warped->nt == 2) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+            reg_exit();
+        }
+    } else {
+        printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
+        reg_exit();
+    }
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearWarped()
-{
-   if(this->warped!=NULL)
-   {
-      NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data))
-      this->warped->data = NULL;
-      nifti_image_free(this->warped);
-      this->warped=NULL;
-   }
-   if(this->warped_gpu!=NULL)
-   {
-      cudaCommon_free<float>(&this->warped_gpu);
-      this->warped_gpu=NULL;
-   }
-   if(this->warped2_gpu!=NULL)
-   {
-      cudaCommon_free<float>(&this->warped2_gpu);
-      this->warped2_gpu=NULL;
-   }
-   return;
+void reg_f3d_gpu::ClearWarped() {
+    if (this->warped != NULL) {
+        NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data));
+        this->warped->data = NULL;
+        nifti_image_free(this->warped);
+        this->warped = NULL;
+    }
+    if (this->warped_gpu != NULL) {
+        cudaCommon_free<float>(&this->warped_gpu);
+        this->warped_gpu = NULL;
+    }
+    if (this->warped2_gpu != NULL) {
+        cudaCommon_free<float>(&this->warped2_gpu);
+        this->warped2_gpu = NULL;
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateDeformationField()
-{
+void reg_f3d_gpu::AllocateDeformationField() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n");
 #endif
-   this->ClearDeformationField();
-   NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu,
-                                this->activeVoxelNumber[this->currentLevel]*sizeof(float4)))
+    this->ClearDeformationField();
+    NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu,
+                                 this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
 
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearDeformationField()
-{
-   if(this->deformationFieldImage_gpu!=NULL)
-   {
-      cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
-      this->deformationFieldImage_gpu=NULL;
-   }
-   return;
+void reg_f3d_gpu::ClearDeformationField() {
+    if (this->deformationFieldImage_gpu != NULL) {
+        cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
+        this->deformationFieldImage_gpu = NULL;
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateWarpedGradient()
-{
+void reg_f3d_gpu::AllocateWarpedGradient() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n");
 #endif
-   this->ClearWarpedGradient();
-   if(this->inputFloating->nt==1)
-   {
-      NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
-                                   this->activeVoxelNumber[this->currentLevel]*sizeof(float4)))
-   }
-   else if(this->inputFloating->nt==2)
-   {
-      NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
-                                   this->activeVoxelNumber[this->currentLevel]*sizeof(float4)))
-      NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu,
-                                   this->activeVoxelNumber[this->currentLevel]*sizeof(float4)))
-   }
-   else
-   {
-      printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
-      reg_exit();
-   }
+    this->ClearWarpedGradient();
+    if (this->inputFloating->nt == 1) {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
+                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
+    } else if (this->inputFloating->nt == 2) {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
+                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu,
+                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
+    } else {
+        printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
+        reg_exit();
+    }
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n");
 #endif
-
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearWarpedGradient()
-{
-   if(this->warpedGradientImage_gpu!=NULL)
-   {
-      cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
-      this->warpedGradientImage_gpu=NULL;
-   }
-   if(this->warpedGradientImage2_gpu!=NULL)
-   {
-      cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
-      this->warpedGradientImage2_gpu=NULL;
-   }
-   return;
+void reg_f3d_gpu::ClearWarpedGradient() {
+    if (this->warpedGradientImage_gpu != NULL) {
+        cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
+        this->warpedGradientImage_gpu = NULL;
+    }
+    if (this->warpedGradientImage2_gpu != NULL) {
+        cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
+        this->warpedGradientImage2_gpu = NULL;
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()
-{
+void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n");
 #endif
-   this->ClearVoxelBasedMeasureGradient();
-   if(cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu,
-                                       this->currentReference->dim))
-   {
-      printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n");
-      reg_exit();
-   }
+    this->ClearVoxelBasedMeasureGradient();
+    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu,
+                                         this->currentReference->dim)) {
+        printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n");
+        reg_exit();
+    }
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearVoxelBasedMeasureGradient()
-{
-   if(this->voxelBasedMeasureGradientImage_gpu!=NULL)
-   {
-      cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
-      this->voxelBasedMeasureGradientImage_gpu=NULL;
-   }
-   return;
+void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() {
+    if (this->voxelBasedMeasureGradientImage_gpu != NULL) {
+        cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
+        this->voxelBasedMeasureGradientImage_gpu = NULL;
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateTransformationGradient()
-{
+void reg_f3d_gpu::AllocateTransformationGradient() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n");
 #endif
-   this->ClearTransformationGradient();
-   if(cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu,
-                                       this->controlPointGrid->dim))
-   {
-      printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n");
-      reg_exit();
-   }
+    this->ClearTransformationGradient();
+    if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu,
+                                         this->controlPointGrid->dim)) {
+        printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n");
+        reg_exit();
+    }
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearTransformationGradient()
-{
-   if(this->transformationGradient_gpu!=NULL)
-   {
-      cudaCommon_free<float4>(&this->transformationGradient_gpu);
-      this->transformationGradient_gpu=NULL;
-   }
-   return;
+void reg_f3d_gpu::ClearTransformationGradient() {
+    if (this->transformationGradient_gpu != NULL) {
+        cudaCommon_free<float4>(&this->transformationGradient_gpu);
+        this->transformationGradient_gpu = NULL;
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type)
-{
-   if(this->jacobianLogWeight<=0) return 0.;
-
-   double value;
-   if(type==2)
-   {
-      value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
-              this->controlPointGrid,
-              &this->controlPointGrid_gpu,
-              false);
-   }
-   else
-   {
-      value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
-              this->controlPointGrid,
-              &this->controlPointGrid_gpu,
-              this->jacobianLogApproximation);
-   }
-   unsigned int maxit=5;
-   if(type>0) maxit=20;
-   unsigned int it=0;
-   while(value!=value && it<maxit)
-   {
-      if(type==2)
-      {
-         value = reg_spline_correctFolding_gpu(this->currentReference,
-                                               this->controlPointGrid,
-                                               &this->controlPointGrid_gpu,
-                                               false);
-      }
-      else
-      {
-         value = reg_spline_correctFolding_gpu(this->currentReference,
-                                               this->controlPointGrid,
-                                               &this->controlPointGrid_gpu,
-                                               this->jacobianLogApproximation);
-      }
+double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
+    if (this->jacobianLogWeight <= 0) return 0.;
+
+    double value;
+    if (type == 2) {
+        value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
+                                                      this->controlPointGrid,
+                                                      &this->controlPointGrid_gpu,
+                                                      false);
+    } else {
+        value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
+                                                      this->controlPointGrid,
+                                                      &this->controlPointGrid_gpu,
+                                                      this->jacobianLogApproximation);
+    }
+    unsigned int maxit = 5;
+    if (type > 0) maxit = 20;
+    unsigned int it = 0;
+    while (value != value && it < maxit) {
+        if (type == 2) {
+            value = reg_spline_correctFolding_gpu(this->currentReference,
+                                                  this->controlPointGrid,
+                                                  &this->controlPointGrid_gpu,
+                                                  false);
+        } else {
+            value = reg_spline_correctFolding_gpu(this->currentReference,
+                                                  this->controlPointGrid,
+                                                  &this->controlPointGrid_gpu,
+                                                  this->jacobianLogApproximation);
+        }
 #ifndef NDEBUG
-      printf("[NiftyReg DEBUG] Folding correction\n");
+        printf("[NiftyReg DEBUG] Folding correction\n");
 #endif
-      it++;
-   }
-   if(type>0)
-   {
-      if(value!=value)
-      {
-         this->optimiser->RestoreBestDOF();
-         fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n");
-      }
-      else
-      {
+        it++;
+    }
+    if (type > 0) {
+        if (value != value) {
+            this->optimiser->RestoreBestDOF();
+            fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n");
+        } else {
 #ifdef NDEBUG
-         if(this->verbose)
-         {
+            if (this->verbose) {
 #endif
-            printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it);
+                printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it);
 #ifdef NDEBUG
-         }
+            }
 #endif
-      }
-   }
-   return (double)this->jacobianLogWeight * value;
+        }
+    }
+    return (double)this->jacobianLogWeight * value;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm()
-{
-   if(this->bendingEnergyWeight<=0) return 0.;
+double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
+    if (this->bendingEnergyWeight <= 0) return 0.;
 
-   double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
-                  &this->controlPointGrid_gpu);
-   return this->bendingEnergyWeight * value;
+    double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
+                                                      &this->controlPointGrid_gpu);
+    return this->bendingEnergyWeight * value;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() {
-   if (this->linearEnergyWeight <= 0)
-      return 0;
+    if (this->linearEnergyWeight <= 0)
+        return 0;
 
-   reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()");
-   reg_print_msg_error("Option not supported!");
-   reg_exit();
-   return 0;
+    reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()");
+    reg_print_msg_error("Option not supported!");
+    reg_exit();
+    return 0;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() {
-   if (this->landmarkRegWeight <= 0)
-      return 0;
+    if (this->landmarkRegWeight <= 0)
+        return 0;
 
-   reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()");
-   reg_print_msg_error("Option not supported!");
-   reg_exit();
-   return 0;
+    reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()");
+    reg_print_msg_error("Option not supported!");
+    reg_exit();
+    return 0;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetDeformationField()
-{
-   if(this->controlPointGrid_gpu==NULL)
-   {
-      reg_f3d<float>::GetDeformationField();
-   }
-   else
-   {
-      // Compute the deformation field
-      reg_spline_getDeformationField_gpu(this->controlPointGrid,
-                                         this->currentReference,
-                                         &this->controlPointGrid_gpu,
-                                         &this->deformationFieldImage_gpu,
-                                         &this->currentMask_gpu,
-                                         this->activeVoxelNumber[this->currentLevel],
-                                         true // use B-splines
-                                        );
-   }
-   return;
+void reg_f3d_gpu::GetDeformationField() {
+    if (this->controlPointGrid_gpu == NULL) {
+        reg_f3d<float>::GetDeformationField();
+    } else {
+        // Compute the deformation field
+        reg_spline_getDeformationField_gpu(this->controlPointGrid,
+                                           this->currentReference,
+                                           &this->controlPointGrid_gpu,
+                                           &this->deformationFieldImage_gpu,
+                                           &this->currentMask_gpu,
+                                           this->activeVoxelNumber[this->currentLevel],
+                                           true // use B-splines
+        );
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::WarpFloatingImage(int inter)
-{
-   // Interpolation is linear by default when using GPU, the inter variable is not used.
-   inter=inter; // just to avoid a compiler warning
-
-   // Compute the deformation field
-   this->GetDeformationField();
-
-   // Resample the floating image
-   reg_resampleImage_gpu(this->currentFloating,
-                         &this->warped_gpu,
-                         &this->currentFloating_gpu,
-                         &this->deformationFieldImage_gpu,
-                         &this->currentMask_gpu,
-                         this->activeVoxelNumber[this->currentLevel],
-                         this->warpedPaddingValue);
-   if(this->currentFloating->nt==2)
-   {
-      reg_resampleImage_gpu(this->currentFloating,
-                            &this->warped2_gpu,
-                            &this->currentFloating2_gpu,
-                            &this->deformationFieldImage_gpu,
-                            &this->currentMask_gpu,
-                            this->activeVoxelNumber[this->currentLevel],
-                            this->warpedPaddingValue);
-   }
-
-   return;
+void reg_f3d_gpu::WarpFloatingImage(int inter) {
+    // Interpolation is linear by default when using GPU, the inter variable is not used.
+    inter = inter; // just to avoid a compiler warning
+
+    // Compute the deformation field
+    this->GetDeformationField();
+
+    // Resample the floating image
+    reg_resampleImage_gpu(this->currentFloating,
+                          &this->warped_gpu,
+                          &this->currentFloating_gpu,
+                          &this->deformationFieldImage_gpu,
+                          &this->currentMask_gpu,
+                          this->activeVoxelNumber[this->currentLevel],
+                          this->warpedPaddingValue);
+
+    if (this->currentFloating->nt == 2) {
+        reg_resampleImage_gpu(this->currentFloating,
+                              &this->warped2_gpu,
+                              &this->currentFloating2_gpu,
+                              &this->deformationFieldImage_gpu,
+                              &this->currentMask_gpu,
+                              this->activeVoxelNumber[this->currentLevel],
+                              this->warpedPaddingValue);
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::SetGradientImageToZero()
-{
-   cudaMemset(this->transformationGradient_gpu,0,
-              this->controlPointGrid->nx*this->controlPointGrid->ny*this->controlPointGrid->nz*
-              sizeof(float4));
+void reg_f3d_gpu::SetGradientImageToZero() {
+    cudaMemset(this->transformationGradient_gpu, 0,
+               this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz *
+               sizeof(float4));
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetVoxelBasedGradient()
-{
-   // The intensity gradient is first computed
-   reg_getImageGradient_gpu(this->currentFloating,
-                            &this->currentFloating_gpu,
-                            &this->deformationFieldImage_gpu,
-                            &this->warpedGradientImage_gpu,
-                            this->activeVoxelNumber[this->currentLevel],
-                            this->warpedPaddingValue);
+void reg_f3d_gpu::GetVoxelBasedGradient() {
+    // The intensity gradient is first computed
+    reg_getImageGradient_gpu(this->currentFloating,
+                             &this->currentFloating_gpu,
+                             &this->deformationFieldImage_gpu,
+                             &this->warpedGradientImage_gpu,
+                             this->activeVoxelNumber[this->currentLevel],
+                             this->warpedPaddingValue);
 
-   // The voxel based gradient image is filled with zeros
-   cudaMemset(this->voxelBasedMeasureGradientImage_gpu,0,
-              this->currentReference->nx*this->currentReference->ny*this->currentReference->nz*
-              sizeof(float4));
-   // The gradient of the various measures of similarity are computed
-   if(this->measure_gpu_nmi!=NULL)
-      this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
+    // The voxel based gradient image is filled with zeros
+    cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0,
+               this->currentReference->nx * this->currentReference->ny * this->currentReference->nz *
+               sizeof(float4));
+    // The gradient of the various measures of similarity are computed
+    if (this->measure_gpu_nmi != NULL)
+        this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
 
-   if(this->measure_gpu_ssd!=NULL)
-      this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient();
+    if (this->measure_gpu_ssd != NULL)
+        this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient();
 
-   if(this->measure_gpu_kld!=NULL)
-      this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient();
+    if (this->measure_gpu_kld != NULL)
+        this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient();
 
-   if(this->measure_gpu_lncc!=NULL)
-      this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient();
+    if (this->measure_gpu_lncc != NULL)
+        this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient();
 
-   if(this->measure_gpu_dti!=NULL)
-      this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient();
+    if (this->measure_gpu_dti != NULL)
+        this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient();
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetSimilarityMeasureGradient()
-{
+void reg_f3d_gpu::GetSimilarityMeasureGradient() {
+
+    this->GetVoxelBasedGradient();
 
-   this->GetVoxelBasedGradient();
+    // The voxel based gradient is smoothed
+    float smoothingRadius[3] = {
+        this->controlPointGrid->dx / this->currentReference->dx,
+        this->controlPointGrid->dy / this->currentReference->dy,
+        this->controlPointGrid->dz / this->currentReference->dz
+    };
+    reg_smoothImageForCubicSpline_gpu(this->warped,
+                                      &this->voxelBasedMeasureGradientImage_gpu,
+                                      smoothingRadius);
 
-   // The voxel based gradient is smoothed
-   float smoothingRadius[3]=
-   {
-      this->controlPointGrid->dx/this->currentReference->dx,
-      this->controlPointGrid->dy/this->currentReference->dy,
-      this->controlPointGrid->dz/this->currentReference->dz
-   };
-   reg_smoothImageForCubicSpline_gpu(this->warped,
+    // The node gradient is extracted
+    reg_voxelCentric2NodeCentric_gpu(this->warped,
+                                     this->controlPointGrid,
                                      &this->voxelBasedMeasureGradientImage_gpu,
-                                     smoothingRadius);
-
-   // The node gradient is extracted
-   reg_voxelCentric2NodeCentric_gpu(this->warped,
-                                    this->controlPointGrid,
-                                    &this->voxelBasedMeasureGradientImage_gpu,
-                                    &this->transformationGradient_gpu,
-                                    this->similarityWeight);
-
-   /* The similarity measure gradient is converted from voxel space to real space */
-   mat44 *floatingMatrix_xyz=NULL;
-   if(this->currentFloating->sform_code>0)
-      floatingMatrix_xyz = &(this->currentFloating->sto_xyz);
-   else floatingMatrix_xyz = &(this->currentFloating->qto_xyz);
-   reg_convertNMIGradientFromVoxelToRealSpace_gpu( floatingMatrix_xyz,
-         this->controlPointGrid,
-         &this->transformationGradient_gpu);
-   // The gradient is smoothed using a Gaussian kernel if it is required
-   if(this->gradientSmoothingSigma!=0)
-   {
-      reg_gaussianSmoothing_gpu(this->controlPointGrid,
-                                &this->transformationGradient_gpu,
-                                this->gradientSmoothingSigma,
-                                NULL);
-   }
-   return;
+                                     &this->transformationGradient_gpu,
+                                     this->similarityWeight);
+
+    /* The similarity measure gradient is converted from voxel space to real space */
+    mat44 *floatingMatrix_xyz = NULL;
+    if (this->currentFloating->sform_code > 0)
+        floatingMatrix_xyz = &(this->currentFloating->sto_xyz);
+    else floatingMatrix_xyz = &(this->currentFloating->qto_xyz);
+    reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz,
+                                                   this->controlPointGrid,
+                                                   &this->transformationGradient_gpu);
+    // The gradient is smoothed using a Gaussian kernel if it is required
+    if (this->gradientSmoothingSigma != 0) {
+        reg_gaussianSmoothing_gpu(this->controlPointGrid,
+                                  &this->transformationGradient_gpu,
+                                  this->gradientSmoothingSigma,
+                                  NULL);
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetBendingEnergyGradient()
-{
-   if(this->bendingEnergyWeight<=0) return;
+void reg_f3d_gpu::GetBendingEnergyGradient() {
+    if (this->bendingEnergyWeight <= 0) return;
 
-   reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid,
-         &this->controlPointGrid_gpu,
-         &this->transformationGradient_gpu,
-         this->bendingEnergyWeight);
-   return;
+    reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid,
+                                               &this->controlPointGrid_gpu,
+                                               &this->transformationGradient_gpu,
+                                               this->bendingEnergyWeight);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetLinearEnergyGradient() {
-   if (this->linearEnergyWeight <= 0)
-      return;
+    if (this->linearEnergyWeight <= 0)
+        return;
 
-   reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()");
-   reg_print_msg_error("Option not supported!");
-   reg_exit();
+    reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()");
+    reg_print_msg_error("Option not supported!");
+    reg_exit();
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetJacobianBasedGradient()
-{
-   if(this->jacobianLogWeight<=0) return;
+void reg_f3d_gpu::GetJacobianBasedGradient() {
+    if (this->jacobianLogWeight <= 0) return;
 
-   reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference,
-         this->controlPointGrid,
-         &this->controlPointGrid_gpu,
-         &this->transformationGradient_gpu,
-         this->jacobianLogWeight,
-         this->jacobianLogApproximation);
-   return;
+    reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference,
+                                                  this->controlPointGrid,
+                                                  &this->controlPointGrid_gpu,
+                                                  &this->transformationGradient_gpu,
+                                                  this->jacobianLogWeight,
+                                                  this->jacobianLogApproximation);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetLandmarkDistanceGradient() {
-   if (this->landmarkRegWeight <= 0)
-      return;
+    if (this->landmarkRegWeight <= 0)
+        return;
 
-   reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()");
-   reg_print_msg_error("Option not supported!");
-   reg_exit();
+    reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()");
+    reg_print_msg_error("Option not supported!");
+    reg_exit();
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UpdateParameters(float scale)
-{
-   float4 *currentDOF=reinterpret_cast<float4 *>(this->optimiser->GetCurrentDOF());
-   float4 *bestDOF=reinterpret_cast<float4 *>(this->optimiser->GetBestDOF());
-   float4 *gradient=reinterpret_cast<float4 *>(this->optimiser->GetGradient());
+void reg_f3d_gpu::UpdateParameters(float scale) {
+    float4 *currentDOF = reinterpret_cast<float4 *>(this->optimiser->GetCurrentDOF());
+    float4 *bestDOF = reinterpret_cast<float4 *>(this->optimiser->GetBestDOF());
+    float4 *gradient = reinterpret_cast<float4 *>(this->optimiser->GetGradient());
 
-   reg_updateControlPointPosition_gpu(this->controlPointGrid,
-                                      &currentDOF,
-                                      &bestDOF,
-                                      &gradient,
-                                      scale);
-   return;
+    reg_updateControlPointPosition_gpu(this->controlPointGrid,
+                                       &currentDOF,
+                                       &bestDOF,
+                                       &gradient,
+                                       scale);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::SmoothGradient() {
-   if (this->gradientSmoothingSigma != 0) {
-      reg_print_fct_error("reg_f3d_gpu::SmoothGradient()");
-      reg_print_msg_error("Option not supported!");
-      reg_exit();
-   }
+    if (this->gradientSmoothingSigma != 0) {
+        reg_print_fct_error("reg_f3d_gpu::SmoothGradient()");
+        reg_print_msg_error("Option not supported!");
+        reg_exit();
+    }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetApproximatedGradient()
-{
-   float4 *gridValue=NULL;
-   float4 *modifiedValue=NULL;
-   float4 *gradientValue=NULL;
-   cudaMallocHost(&gridValue,sizeof(float4));
-   cudaMallocHost(&modifiedValue,sizeof(float4));
-   cudaMallocHost(&gradientValue,sizeof(float4));
-
-   float eps = this->controlPointGrid->dx / 1000.f;
-
-   for(size_t i=0; i<this->optimiser->GetVoxNumber(); ++i)
-   {
-      // Extract the current value
-      cudaMemcpy(gridValue,
-                 &this->controlPointGrid_gpu[i],
-                 sizeof(float4),
-                 cudaMemcpyDeviceToHost);
-      modifiedValue[0]=gridValue[0];
-      // -- X axis
-      // Modify the current value along the x axis
-      modifiedValue[0].x = gridValue[0].x + eps;
-      cudaMemcpy(&this->controlPointGrid_gpu[i],
-                 modifiedValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-      // Evaluate the objective function value
-      gradientValue[0].x=this->GetObjectiveFunctionValue();
-      // Modify the current value along the x axis
-      modifiedValue[0].x = gridValue[0].x - eps;
-      cudaMemcpy(&this->controlPointGrid_gpu[i],
-                 modifiedValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-      // Evaluate the objective function value
-      gradientValue[0].x -= this->GetObjectiveFunctionValue();
-      gradientValue[0].x /= 2.f*eps;
-      modifiedValue[0].x = gridValue[0].x;
-      // -- Y axis
-      // Modify the current value along the y axis
-      modifiedValue[0].y = gridValue[0].y + eps;
-      cudaMemcpy(&this->controlPointGrid_gpu[i],
-                 modifiedValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-      // Evaluate the objective function value
-      gradientValue[0].y=this->GetObjectiveFunctionValue();
-      // Modify the current value the y axis
-      modifiedValue[0].y = gridValue[0].y - eps;
-      cudaMemcpy(&this->controlPointGrid_gpu[i],
-                 modifiedValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-      // Evaluate the objective function value
-      gradientValue[0].y -= this->GetObjectiveFunctionValue();
-      gradientValue[0].y /= 2.f*eps;
-      modifiedValue[0].y = gridValue[0].y;
-      if(this->optimiser->GetNDim()>2)
-      {
-         // -- Z axis
-         // Modify the current value along the y axis
-         modifiedValue[0].z = gridValue[0].z + eps;
-         cudaMemcpy(&this->controlPointGrid_gpu[i],
-                    modifiedValue,
-                    sizeof(float4),
-                    cudaMemcpyHostToDevice);
-         // Evaluate the objective function value
-         gradientValue[0].z=this->GetObjectiveFunctionValue();
-         // Modify the current value the y axis
-         modifiedValue[0].z = gridValue[0].z - eps;
-         cudaMemcpy(&this->controlPointGrid_gpu[i],
-                    modifiedValue,
-                    sizeof(float4),
-                    cudaMemcpyHostToDevice);
-         // Evaluate the objective function value
-         gradientValue[0].z -= this->GetObjectiveFunctionValue();
-         gradientValue[0].z /= 2.f*eps;
-      }
-      // Restore the initial parametrisation
-      cudaMemcpy(&this->controlPointGrid_gpu[i],
-                 gridValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-
-      // Save the assessed gradient
-      cudaMemcpy(&this->transformationGradient_gpu[i],
-                 gradientValue,
-                 sizeof(float4),
-                 cudaMemcpyHostToDevice);
-   }
-   cudaFreeHost(gridValue);
-   cudaFreeHost(modifiedValue);
-   cudaFreeHost(gradientValue);
+void reg_f3d_gpu::GetApproximatedGradient() {
+    float4 *gridValue = NULL;
+    float4 *modifiedValue = NULL;
+    float4 *gradientValue = NULL;
+    cudaMallocHost(&gridValue, sizeof(float4));
+    cudaMallocHost(&modifiedValue, sizeof(float4));
+    cudaMallocHost(&gradientValue, sizeof(float4));
+
+    float eps = this->controlPointGrid->dx / 1000.f;
+
+    for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) {
+        // Extract the current value
+        cudaMemcpy(gridValue,
+                   &this->controlPointGrid_gpu[i],
+                   sizeof(float4),
+                   cudaMemcpyDeviceToHost);
+        modifiedValue[0] = gridValue[0];
+        // -- X axis
+        // Modify the current value along the x axis
+        modifiedValue[0].x = gridValue[0].x + eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i],
+                   modifiedValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+        // Evaluate the objective function value
+        gradientValue[0].x = this->GetObjectiveFunctionValue();
+        // Modify the current value along the x axis
+        modifiedValue[0].x = gridValue[0].x - eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i],
+                   modifiedValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+        // Evaluate the objective function value
+        gradientValue[0].x -= this->GetObjectiveFunctionValue();
+        gradientValue[0].x /= 2.f * eps;
+        modifiedValue[0].x = gridValue[0].x;
+        // -- Y axis
+        // Modify the current value along the y axis
+        modifiedValue[0].y = gridValue[0].y + eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i],
+                   modifiedValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+        // Evaluate the objective function value
+        gradientValue[0].y = this->GetObjectiveFunctionValue();
+        // Modify the current value the y axis
+        modifiedValue[0].y = gridValue[0].y - eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i],
+                   modifiedValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+        // Evaluate the objective function value
+        gradientValue[0].y -= this->GetObjectiveFunctionValue();
+        gradientValue[0].y /= 2.f * eps;
+        modifiedValue[0].y = gridValue[0].y;
+        if (this->optimiser->GetNDim() > 2) {
+            // -- Z axis
+            // Modify the current value along the y axis
+            modifiedValue[0].z = gridValue[0].z + eps;
+            cudaMemcpy(&this->controlPointGrid_gpu[i],
+                       modifiedValue,
+                       sizeof(float4),
+                       cudaMemcpyHostToDevice);
+            // Evaluate the objective function value
+            gradientValue[0].z = this->GetObjectiveFunctionValue();
+            // Modify the current value the y axis
+            modifiedValue[0].z = gridValue[0].z - eps;
+            cudaMemcpy(&this->controlPointGrid_gpu[i],
+                       modifiedValue,
+                       sizeof(float4),
+                       cudaMemcpyHostToDevice);
+            // Evaluate the objective function value
+            gradientValue[0].z -= this->GetObjectiveFunctionValue();
+            gradientValue[0].z /= 2.f * eps;
+        }
+        // Restore the initial parametrisation
+        cudaMemcpy(&this->controlPointGrid_gpu[i],
+                   gridValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+
+        // Save the assessed gradient
+        cudaMemcpy(&this->transformationGradient_gpu[i],
+                   gradientValue,
+                   sizeof(float4),
+                   cudaMemcpyHostToDevice);
+    }
+    cudaFreeHost(gridValue);
+    cudaFreeHost(modifiedValue);
+    cudaFreeHost(gradientValue);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) {
-   size_t size = image->nvox;
-   float *buffer = (float*)malloc(size * sizeof(float));
-
-   if (buffer == NULL) {
-      reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
-   }
-
-   cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
-
-   free(image->data);
-   image->datatype = NIFTI_TYPE_FLOAT32;
-   image->nbyper = sizeof(float);
-   image->data = (void*)malloc(image->nvox * image->nbyper);
-   float *dataT = static_cast<float*>(image->data);
-   for (size_t i = 0; i < size; ++i)
-      dataT[i] = static_cast<float>(buffer[i]);
-   free(buffer);
+    size_t size = image->nvox;
+    float *buffer = (float*)malloc(size * sizeof(float));
+
+    if (buffer == NULL) {
+        reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
+    }
+
+    cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
+
+    free(image->data);
+    image->datatype = NIFTI_TYPE_FLOAT32;
+    image->nbyper = sizeof(float);
+    image->data = (void*)malloc(image->nvox * image->nbyper);
+    float *dataT = static_cast<float*>(image->data);
+    for (size_t i = 0; i < size; ++i)
+        dataT[i] = static_cast<float>(buffer[i]);
+    free(buffer);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 nifti_image** reg_f3d_gpu::GetWarpedImage() {
-   // The initial images are used
-   if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) {
-      reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()");
-      reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-      reg_exit();
-   }
-
-   this->currentReference = this->inputReference;
-   this->currentFloating = this->inputFloating;
-   this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int));
-
-   reg_tools_changeDatatype<float>(this->currentReference);
-   reg_tools_changeDatatype<float>(this->currentFloating);
-
-   this->AllocateWarped();
-   this->AllocateDeformationField();
-   this->InitialiseCurrentLevel();
-   this->WarpFloatingImage(3); // cubic spline interpolation
-   this->ClearDeformationField();
-
-   nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-   warpedImage[0] = nifti_copy_nim_info(this->warped);
-   warpedImage[0]->cal_min = this->inputFloating->cal_min;
-   warpedImage[0]->cal_max = this->inputFloating->cal_max;
-   warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
-   warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
-   this->fillImageData(warpedImage[0], this->warped_gpu);
-   if (this->currentFloating->nt == 2)
-      this->fillImageData(warpedImage[1], this->warped2_gpu);
-
-   this->ClearWarped();
+    // The initial images are used
+    if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) {
+        reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()");
+        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
+        reg_exit();
+    }
+
+    this->currentReference = this->inputReference;
+    this->currentFloating = this->inputFloating;
+    this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int));
+
+    reg_tools_changeDatatype<float>(this->currentReference);
+    reg_tools_changeDatatype<float>(this->currentFloating);
+
+    this->AllocateWarped();
+    this->AllocateDeformationField();
+    this->InitialiseCurrentLevel();
+    this->WarpFloatingImage(3); // cubic spline interpolation
+    this->ClearDeformationField();
+
+    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
+    warpedImage[0] = nifti_copy_nim_info(this->warped);
+    warpedImage[0]->cal_min = this->inputFloating->cal_min;
+    warpedImage[0]->cal_max = this->inputFloating->cal_max;
+    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
+    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
+    this->fillImageData(warpedImage[0], this->warped_gpu);
+    if (this->currentFloating->nt == 2)
+        this->fillImageData(warpedImage[1], this->warped2_gpu);
+
+    this->ClearWarped();
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage");
+    reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage");
 #endif
-   return warpedImage;
+    return warpedImage;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_f3d_gpu::InitialiseCurrentLevel()
-{
-   float maxStepSize=reg_f3d<float>::InitialiseCurrentLevel();
+float reg_f3d_gpu::InitialiseCurrentLevel() {
+    float maxStepSize = reg_f3d<float>::InitialiseCurrentLevel();
 
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n");
 #endif
 
-   if(this->currentReference_gpu!=NULL) cudaCommon_free(&this->currentReference_gpu);
-   if(this->currentReference2_gpu!=NULL) cudaCommon_free(&this->currentReference2_gpu);
-   if(this->currentReference->nt==1)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>
-            (&this->currentReference_gpu, this->currentReference->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
-         reg_exit();
-      }
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentReference_gpu, this->currentReference))
-      {
-         printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-         reg_exit();
-      }
-   }
-   else if(this->currentReference->nt==2)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>
-            (&this->currentReference_gpu,&this->currentReference2_gpu, this->currentReference->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
-         reg_exit();
-      }
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference))
-      {
-         printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-         reg_exit();
-      }
-   }
-
-   if(this->currentFloating_gpu!=NULL) cudaCommon_free(&this->currentFloating_gpu);
-   if(this->currentFloating2_gpu!=NULL) cudaCommon_free(&this->currentFloating2_gpu);
-   if(this->currentReference->nt==1)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>
-            (&this->currentFloating_gpu, this->currentFloating->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
-         reg_exit();
-      }
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentFloating_gpu, this->currentFloating))
-      {
-         printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-         reg_exit();
-      }
-   }
-   else if(this->currentReference->nt==2)
-   {
-      if(cudaCommon_allocateArrayToDevice<float>
-            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim))
-      {
-         printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
-         reg_exit();
-      }
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating))
-      {
-         printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-         reg_exit();
-      }
-   }
-   if(this->controlPointGrid_gpu!=NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-   if(cudaCommon_allocateArrayToDevice<float4>
-         (&this->controlPointGrid_gpu, this->controlPointGrid->dim))
-   {
-      printf("[NiftyReg ERROR] Error when allocating the control point image.\n");
-      reg_exit();
-   }
-
-   if(cudaCommon_transferNiftiToArrayOnDevice<float4>
-         (&this->controlPointGrid_gpu, this->controlPointGrid))
-   {
-      printf("[NiftyReg ERROR] Error when transfering the control point image.\n");
-      reg_exit();
-   }
-
-   int *targetMask_h;
-   NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h,this->activeVoxelNumber[this->currentLevel]*sizeof(int)))
-   int *targetMask_h_ptr = &targetMask_h[0];
-   for(int i=0; i<this->currentReference->nx*this->currentReference->ny*this->currentReference->nz; i++)
-   {
-      if( this->currentMask[i]!=-1) *targetMask_h_ptr++=i;
-   }
-   NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu,
-                                this->activeVoxelNumber[this->currentLevel]*sizeof(int)))
-   NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h,
-                                this->activeVoxelNumber[this->currentLevel]*sizeof(int),
-                                cudaMemcpyHostToDevice))
-   NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h))
+    if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu);
+    if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu);
+    if (this->currentReference->nt == 1) {
+        if (cudaCommon_allocateArrayToDevice<float>
+            (&this->currentReference_gpu, this->currentReference->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
+            reg_exit();
+        }
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>
+            (&this->currentReference_gpu, this->currentReference)) {
+            printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+            reg_exit();
+        }
+    } else if (this->currentReference->nt == 2) {
+        if (cudaCommon_allocateArrayToDevice<float>
+            (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
+            reg_exit();
+        }
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>
+            (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) {
+            printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+            reg_exit();
+        }
+    }
+
+    if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu);
+    if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu);
+    if (this->currentReference->nt == 1) {
+        if (cudaCommon_allocateArrayToDevice<float>
+            (&this->currentFloating_gpu, this->currentFloating->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
+            reg_exit();
+        }
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>
+            (&this->currentFloating_gpu, this->currentFloating)) {
+            printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+            reg_exit();
+        }
+    } else if (this->currentReference->nt == 2) {
+        if (cudaCommon_allocateArrayToDevice<float>
+            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) {
+            printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
+            reg_exit();
+        }
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>
+            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) {
+            printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+            reg_exit();
+        }
+    }
+    if (this->controlPointGrid_gpu != NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    if (cudaCommon_allocateArrayToDevice<float4>
+        (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
+        printf("[NiftyReg ERROR] Error when allocating the control point image.\n");
+        reg_exit();
+    }
+
+    if (cudaCommon_transferNiftiToArrayOnDevice<float4>
+        (&this->controlPointGrid_gpu, this->controlPointGrid)) {
+        printf("[NiftyReg ERROR] Error when transfering the control point image.\n");
+        reg_exit();
+    }
+
+    int *targetMask_h;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
+    int *targetMask_h_ptr = &targetMask_h[0];
+    for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) {
+        if (this->currentMask[i] != -1) *targetMask_h_ptr++ = i;
+    }
+    NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu,
+                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h,
+                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int),
+                                 cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h));
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n");
 #endif
-   return maxStepSize;
+    return maxStepSize;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearCurrentInputImage()
-{
+void reg_f3d_gpu::ClearCurrentInputImage() {
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n");
 #endif
-   if(cudaCommon_transferFromDeviceToNifti<float4>
-         (this->controlPointGrid, &this->controlPointGrid_gpu))
-   {
-      printf("[NiftyReg ERROR] Error when transfering back the control point image.\n");
-      reg_exit();
-   }
-   cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-   this->controlPointGrid_gpu=NULL;
-   cudaCommon_free(&this->currentReference_gpu);
-   this->currentReference_gpu=NULL;
-   cudaCommon_free(&this->currentFloating_gpu);
-   this->currentFloating_gpu=NULL;
-   NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu))
-   this->currentMask_gpu=NULL;
-
-   if(this->currentReference2_gpu!=NULL)
-      cudaCommon_free(&this->currentReference2_gpu);
-   this->currentReference2_gpu=NULL;
-   if(this->currentFloating2_gpu!=NULL)
-      cudaCommon_free(&this->currentFloating2_gpu);
-   this->currentFloating2_gpu=NULL;
-
-   this->currentReference=NULL;
-   this->currentMask=NULL;
-   this->currentFloating=NULL;
+    if (cudaCommon_transferFromDeviceToNifti<float4>
+        (this->controlPointGrid, &this->controlPointGrid_gpu)) {
+        printf("[NiftyReg ERROR] Error when transfering back the control point image.\n");
+        reg_exit();
+    }
+    cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    this->controlPointGrid_gpu = NULL;
+    cudaCommon_free(&this->currentReference_gpu);
+    this->currentReference_gpu = NULL;
+    cudaCommon_free(&this->currentFloating_gpu);
+    this->currentFloating_gpu = NULL;
+    NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu));
+    this->currentMask_gpu = NULL;
+
+    if (this->currentReference2_gpu != NULL)
+        cudaCommon_free(&this->currentReference2_gpu);
+    this->currentReference2_gpu = NULL;
+    if (this->currentFloating2_gpu != NULL)
+        cudaCommon_free(&this->currentFloating2_gpu);
+    this->currentFloating2_gpu = NULL;
+
+    this->currentReference = NULL;
+    this->currentMask = NULL;
+    this->currentFloating = NULL;
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::SetOptimiser()
-{
-   if(this->useConjGradient)
-      this->optimiser=new reg_conjugateGradient_gpu();
-   else this->optimiser=new reg_optimiser_gpu();
-   // The cpp and grad images are converted to float * instead of float4
-   // to enable compatibility with cpu class
-   this->optimiser->Initialise(this->controlPointGrid->nvox,
-                               this->controlPointGrid->nz>1?3:2,
-                               this->optimiseX,
-                               this->optimiseY,
-                               this->optimiseZ,
-                               this->maxiterationNumber,
-                               0, // currentIterationNumber,
-                               this,
-                               reinterpret_cast<float *>(this->controlPointGrid_gpu),
-                               reinterpret_cast<float *>(this->transformationGradient_gpu)
-                              );
+void reg_f3d_gpu::SetOptimiser() {
+    if (this->useConjGradient)
+        this->optimiser = new reg_conjugateGradient_gpu();
+    else this->optimiser = new reg_optimiser_gpu();
+    // The cpp and grad images are converted to float * instead of float4
+    // to enable compatibility with cpu class
+    this->optimiser->Initialise(this->controlPointGrid->nvox,
+                                this->controlPointGrid->nz > 1 ? 3 : 2,
+                                this->optimiseX,
+                                this->optimiseY,
+                                this->optimiseZ,
+                                this->maxiterationNumber,
+                                0, // currentIterationNumber,
+                                this,
+                                reinterpret_cast<float *>(this->controlPointGrid_gpu),
+                                reinterpret_cast<float *>(this->transformationGradient_gpu)
+    );
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_f3d_gpu::NormaliseGradient()
-{
-   // First compute the gradient max length for normalisation purpose
-   float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu,
-                                           this->optimiser->GetVoxNumber()
-                                          );
+float reg_f3d_gpu::NormaliseGradient() {
+    // First compute the gradient max length for normalisation purpose
+    float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu,
+                                            this->optimiser->GetVoxNumber()
+    );
 
-   if(strcmp(this->executableName,"NiftyReg F3D GPU")==0)
-   {
-      // The gradient is normalised if we are running F3D
-      // It will be normalised later when running symmetric or F3D2
+    if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
+        // The gradient is normalised if we are running F3D
+        // It will be normalised later when running symmetric or F3D2
 #ifndef NDEBUG
-      printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length);
+        printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length);
 #endif
-      reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(),
-                            &this->transformationGradient_gpu,
-                            1.f/length);
+        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(),
+                              &this->transformationGradient_gpu,
+                              1.f / length);
 
-   }
-   // Returns the largest gradient distance
-   return length;
+    }
+    // Returns the largest gradient distance
+    return length;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-int reg_f3d_gpu::CheckMemoryMB()
-{
-   if(!this->initialised) reg_f3d<float>::Initialise();
+int reg_f3d_gpu::CheckMemoryMB() {
+    if (!this->initialised) reg_f3d<float>::Initialise();
 
-   size_t referenceVoxelNumber=this->referencePyramid[this->levelToPerform-1]->nx *
-                               this->referencePyramid[this->levelToPerform-1]->ny *
-                               this->referencePyramid[this->levelToPerform-1]->nz;
+    size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx *
+        this->referencePyramid[this->levelToPerform - 1]->ny *
+        this->referencePyramid[this->levelToPerform - 1]->nz;
 
-   size_t warpedVoxelNumber=this->referencePyramid[this->levelToPerform-1]->nx *
-                            this->referencePyramid[this->levelToPerform-1]->ny *
-                            this->referencePyramid[this->levelToPerform-1]->nz *
-                            this->floatingPyramid[this->levelToPerform-1]->nt ;
+    size_t warpedVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx *
+        this->referencePyramid[this->levelToPerform - 1]->ny *
+        this->referencePyramid[this->levelToPerform - 1]->nz *
+        this->floatingPyramid[this->levelToPerform - 1]->nt;
 
-   size_t totalMemoryRequiered=0;
-   // reference image
-   totalMemoryRequiered += this->referencePyramid[this->levelToPerform-1]->nvox * sizeof(float);
+    size_t totalMemoryRequiered = 0;
+    // reference image
+    totalMemoryRequiered += this->referencePyramid[this->levelToPerform - 1]->nvox * sizeof(float);
 
-   // floating image
-   totalMemoryRequiered += this->floatingPyramid[this->levelToPerform-1]->nvox * sizeof(float);
+    // floating image
+    totalMemoryRequiered += this->floatingPyramid[this->levelToPerform - 1]->nvox * sizeof(float);
 
-   // warped image
-   totalMemoryRequiered += warpedVoxelNumber * sizeof(float);
+    // warped image
+    totalMemoryRequiered += warpedVoxelNumber * sizeof(float);
 
-   // mask image
-   totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform-1] * sizeof(int);
+    // mask image
+    totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform - 1] * sizeof(int);
 
-   // deformation field
-   totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
+    // deformation field
+    totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
 
-   // voxel based intensity gradient
-   totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
+    // voxel based intensity gradient
+    totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
 
-   // voxel based NMI gradient + smoothing
-   totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4);
+    // voxel based NMI gradient + smoothing
+    totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4);
 
-   // control point grid
-   size_t cp=1;
-   cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->nx*
-                    this->referencePyramid[this->levelToPerform-1]->dx/
-                    this->spacing[0])+5;
-   cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->ny*
-                    this->referencePyramid[this->levelToPerform-1]->dy/
-                    this->spacing[1])+5;
-   if(this->referencePyramid[this->levelToPerform-1]->nz>1)
-      cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->nz*
-                       this->referencePyramid[this->levelToPerform-1]->dz/
-                       this->spacing[2])+5;
-   totalMemoryRequiered += cp * sizeof(float4);
+    // control point grid
+    size_t cp = 1;
+    cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nx *
+                     this->referencePyramid[this->levelToPerform - 1]->dx /
+                     this->spacing[0]) + 5;
+    cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->ny *
+                     this->referencePyramid[this->levelToPerform - 1]->dy /
+                     this->spacing[1]) + 5;
+    if (this->referencePyramid[this->levelToPerform - 1]->nz > 1)
+        cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nz *
+                         this->referencePyramid[this->levelToPerform - 1]->dz /
+                         this->spacing[2]) + 5;
+    totalMemoryRequiered += cp * sizeof(float4);
 
-   // node based NMI gradient
-   totalMemoryRequiered += cp * sizeof(float4);
+    // node based NMI gradient
+    totalMemoryRequiered += cp * sizeof(float4);
 
-   // conjugate gradient
-   totalMemoryRequiered += 2 * cp * sizeof(float4);
+    // conjugate gradient
+    totalMemoryRequiered += 2 * cp * sizeof(float4);
 
 
-   // HERE TODO
+    // HERE TODO
 
-   // jacobian array
-   if(this->jacobianLogWeight>0)
-      totalMemoryRequiered += 10 * referenceVoxelNumber *
-                              sizeof(float);
+    // jacobian array
+    if (this->jacobianLogWeight > 0)
+        totalMemoryRequiered += 10 * referenceVoxelNumber *
+        sizeof(float);
 
-   return (int)(ceil((float)totalMemoryRequiered/float(1024*1024)));
+    return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024)));
 
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber)
-{
-   if(this->measure_gpu_nmi==NULL)
-      this->measure_gpu_nmi=new reg_nmi_gpu;
-   this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
-   // I am here adding 4 to the specified bin number to accomodate for
-   // the spline support
-   this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint);
-   return;
+void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
+    if (this->measure_gpu_nmi == NULL)
+        this->measure_gpu_nmi = new reg_nmi_gpu;
+    this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
+    // I am here adding 4 to the specified bin number to accomodate for
+    // the spline support
+    this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber)
-{
-   if(this->measure_gpu_nmi==NULL)
-      this->measure_gpu_nmi=new reg_nmi_gpu;
-   this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
-   // I am here adding 4 to the specified bin number to accomodate for
-   // the spline support
-   this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint);
-   return;
+void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
+    if (this->measure_gpu_nmi == NULL)
+        this->measure_gpu_nmi = new reg_nmi_gpu;
+    this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
+    // I am here adding 4 to the specified bin number to accomodate for
+    // the spline support
+    this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseSSD(int timepoint)
-{
-   if(this->measure_gpu_ssd==NULL)
-      this->measure_gpu_ssd=new reg_ssd_gpu;
-   this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
-   return;
+void reg_f3d_gpu::UseSSD(int timepoint) {
+    if (this->measure_gpu_ssd == NULL)
+        this->measure_gpu_ssd = new reg_ssd_gpu;
+    this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseKLDivergence(int timepoint)
-{
-   if(this->measure_gpu_kld==NULL)
-      this->measure_gpu_kld=new reg_kld_gpu;
-   this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
-   return;
+void reg_f3d_gpu::UseKLDivergence(int timepoint) {
+    if (this->measure_gpu_kld == NULL)
+        this->measure_gpu_kld = new reg_kld_gpu;
+    this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseLNCC(int timepoint, float stddev)
-{
-   if(this->measure_gpu_lncc==NULL)
-      this->measure_gpu_lncc=new reg_lncc_gpu;
-   this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
-   this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint,stddev);
-   return;
+void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) {
+    if (this->measure_gpu_lncc == NULL)
+        this->measure_gpu_lncc = new reg_lncc_gpu;
+    this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
+    this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseDTI(int timepoint[6])
-{
-   reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
-   reg_exit();
+void reg_f3d_gpu::UseDTI(int timepoint[6]) {
+    reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
+    reg_exit();
 
-   // if(this->measure_gpu_dti==NULL)
-   //    this->measure_gpu_dti=new reg_dti_gpu;
-   // for(int i=0; i<6; ++i)
-   //    this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
+    // if(this->measure_gpu_dti==NULL)
+    //    this->measure_gpu_dti=new reg_dti_gpu;
+    // for(int i=0; i<6; ++i)
+    //    this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::InitialiseSimilarity()
-{
-   // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-   if(this->measure_gpu_nmi==NULL &&
-         this->measure_gpu_ssd==NULL &&
-         this->measure_gpu_dti==NULL &&
-         this->measure_gpu_kld==NULL &&
-         this->measure_gpu_lncc==NULL)
-   {
-      measure_gpu_nmi=new reg_nmi_gpu;
-      for(int i=0; i<this->inputReference->nt; ++i)
-         measure_gpu_nmi->SetTimepointWeight(i, 1.0);
-   }
-   if(this->measure_gpu_nmi!=NULL)
-   {
-      this->measure_gpu_nmi->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warImgGradient,
-            this->voxelBasedMeasureGradient,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                              );
-      this->measure_nmi=this->measure_gpu_nmi;
-   }
-
-   if(this->measure_gpu_ssd!=NULL)
-   {
-      this->measure_gpu_ssd->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warImgGradient,
-            this->voxelBasedMeasureGradient,
-            this->localWeightSimCurrent,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                              );
-      this->measure_ssd=this->measure_gpu_ssd;
-   }
-
-   if(this->measure_gpu_kld!=NULL)
-   {
-      this->measure_gpu_kld->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warImgGradient,
-            this->voxelBasedMeasureGradient,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                              );
-      this->measure_kld=this->measure_gpu_kld;
-   }
-
-   if(this->measure_gpu_lncc!=NULL)
-   {
-      this->measure_gpu_lncc->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warImgGradient,
-            this->voxelBasedMeasureGradient,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                               );
-      this->measure_lncc=this->measure_gpu_lncc;
-   }
-
-   if(this->measure_gpu_dti!=NULL)
-   {
-      this->measure_gpu_dti->InitialiseMeasure(this->currentReference,
-            this->currentFloating,
-            this->currentMask,
-            this->activeVoxelNumber[this->currentLevel],
-            this->warped,
-            this->warImgGradient,
-            this->voxelBasedMeasureGradient,
-            &this->currentReference_gpu,
-            &this->currentFloating_gpu,
-            &this->currentMask_gpu,
-            &this->warped_gpu,
-            &this->warpedGradientImage_gpu,
-            &this->voxelBasedMeasureGradientImage_gpu
-                                              );
-      this->measure_dti=this->measure_gpu_dti;
-   }
+void reg_f3d_gpu::InitialiseSimilarity() {
+    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
+    if (this->measure_gpu_nmi == NULL &&
+        this->measure_gpu_ssd == NULL &&
+        this->measure_gpu_dti == NULL &&
+        this->measure_gpu_kld == NULL &&
+        this->measure_gpu_lncc == NULL) {
+        measure_gpu_nmi = new reg_nmi_gpu;
+        for (int i = 0; i < this->inputReference->nt; ++i)
+            measure_gpu_nmi->SetTimepointWeight(i, 1.0);
+    }
+    if (this->measure_gpu_nmi != NULL) {
+        this->measure_gpu_nmi->InitialiseMeasure(this->currentReference,
+                                                 this->currentFloating,
+                                                 this->currentMask,
+                                                 this->activeVoxelNumber[this->currentLevel],
+                                                 this->warped,
+                                                 this->warImgGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 &this->currentReference_gpu,
+                                                 &this->currentFloating_gpu,
+                                                 &this->currentMask_gpu,
+                                                 &this->warped_gpu,
+                                                 &this->warpedGradientImage_gpu,
+                                                 &this->voxelBasedMeasureGradientImage_gpu
+        );
+        this->measure_nmi = this->measure_gpu_nmi;
+    }
+
+    if (this->measure_gpu_ssd != NULL) {
+        this->measure_gpu_ssd->InitialiseMeasure(this->currentReference,
+                                                 this->currentFloating,
+                                                 this->currentMask,
+                                                 this->activeVoxelNumber[this->currentLevel],
+                                                 this->warped,
+                                                 this->warImgGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 this->localWeightSimCurrent,
+                                                 &this->currentReference_gpu,
+                                                 &this->currentFloating_gpu,
+                                                 &this->currentMask_gpu,
+                                                 &this->warped_gpu,
+                                                 &this->warpedGradientImage_gpu,
+                                                 &this->voxelBasedMeasureGradientImage_gpu
+        );
+        this->measure_ssd = this->measure_gpu_ssd;
+    }
+
+    if (this->measure_gpu_kld != NULL) {
+        this->measure_gpu_kld->InitialiseMeasure(this->currentReference,
+                                                 this->currentFloating,
+                                                 this->currentMask,
+                                                 this->activeVoxelNumber[this->currentLevel],
+                                                 this->warped,
+                                                 this->warImgGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 &this->currentReference_gpu,
+                                                 &this->currentFloating_gpu,
+                                                 &this->currentMask_gpu,
+                                                 &this->warped_gpu,
+                                                 &this->warpedGradientImage_gpu,
+                                                 &this->voxelBasedMeasureGradientImage_gpu
+        );
+        this->measure_kld = this->measure_gpu_kld;
+    }
+
+    if (this->measure_gpu_lncc != NULL) {
+        this->measure_gpu_lncc->InitialiseMeasure(this->currentReference,
+                                                  this->currentFloating,
+                                                  this->currentMask,
+                                                  this->activeVoxelNumber[this->currentLevel],
+                                                  this->warped,
+                                                  this->warImgGradient,
+                                                  this->voxelBasedMeasureGradient,
+                                                  &this->currentReference_gpu,
+                                                  &this->currentFloating_gpu,
+                                                  &this->currentMask_gpu,
+                                                  &this->warped_gpu,
+                                                  &this->warpedGradientImage_gpu,
+                                                  &this->voxelBasedMeasureGradientImage_gpu
+        );
+        this->measure_lncc = this->measure_gpu_lncc;
+    }
+
+    if (this->measure_gpu_dti != NULL) {
+        this->measure_gpu_dti->InitialiseMeasure(this->currentReference,
+                                                 this->currentFloating,
+                                                 this->currentMask,
+                                                 this->activeVoxelNumber[this->currentLevel],
+                                                 this->warped,
+                                                 this->warImgGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 &this->currentReference_gpu,
+                                                 &this->currentFloating_gpu,
+                                                 &this->currentMask_gpu,
+                                                 &this->warped_gpu,
+                                                 &this->warpedGradientImage_gpu,
+                                                 &this->voxelBasedMeasureGradientImage_gpu
+        );
+        this->measure_dti = this->measure_gpu_dti;
+    }
 #ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n");
+    printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n");
 #endif
-   return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index a8003d46..2b141134 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -23,80 +23,79 @@
 #include "_reg_optimiser_gpu.h"
 #include "_reg_f3d.h"
 
-class reg_f3d_gpu : public reg_f3d<float>
-{
+class reg_f3d_gpu: public reg_f3d<float> {
 protected:
-   // cuda variables
-   cudaArray *currentReference_gpu;
-   cudaArray *currentFloating_gpu;
-   int *currentMask_gpu;
-   float *warped_gpu;
-   float4 *controlPointGrid_gpu;
-   float4 *deformationFieldImage_gpu;
-   float4 *warpedGradientImage_gpu;
-   float4 *voxelBasedMeasureGradientImage_gpu;
-   float4 *transformationGradient_gpu;
+    // cuda variables
+    cudaArray *currentReference_gpu;
+    cudaArray *currentFloating_gpu;
+    int *currentMask_gpu;
+    float *warped_gpu;
+    float4 *controlPointGrid_gpu;
+    float4 *deformationFieldImage_gpu;
+    float4 *warpedGradientImage_gpu;
+    float4 *voxelBasedMeasureGradientImage_gpu;
+    float4 *transformationGradient_gpu;
 
-   // cuda variable for multispectral registration
-   cudaArray *currentReference2_gpu;
-   cudaArray *currentFloating2_gpu;
-   float *warped2_gpu;
-   float4 *warpedGradientImage2_gpu;
+    // cuda variable for multispectral registration
+    cudaArray *currentReference2_gpu;
+    cudaArray *currentFloating2_gpu;
+    float *warped2_gpu;
+    float4 *warpedGradientImage2_gpu;
 
-   // Measure related variables
-   reg_ssd_gpu *measure_gpu_ssd;
-   reg_kld_gpu *measure_gpu_kld;
-   reg_dti_gpu *measure_gpu_dti;
-   reg_lncc_gpu *measure_gpu_lncc;
-   reg_nmi_gpu *measure_gpu_nmi;
+    // Measure related variables
+    reg_ssd_gpu *measure_gpu_ssd;
+    reg_kld_gpu *measure_gpu_kld;
+    reg_dti_gpu *measure_gpu_dti;
+    reg_lncc_gpu *measure_gpu_lncc;
+    reg_nmi_gpu *measure_gpu_nmi;
 
-   float InitialiseCurrentLevel();
-   void ClearCurrentInputImage();
-   void AllocateWarped();
-   void ClearWarped();
-   void AllocateDeformationField();
-   void ClearDeformationField();
-   void AllocateWarpedGradient();
-   void ClearWarpedGradient();
-   void AllocateVoxelBasedMeasureGradient();
-   void ClearVoxelBasedMeasureGradient();
-   void AllocateTransformationGradient();
-   void ClearTransformationGradient();
+    float InitialiseCurrentLevel();
+    void ClearCurrentInputImage();
+    void AllocateWarped();
+    void ClearWarped();
+    void AllocateDeformationField();
+    void ClearDeformationField();
+    void AllocateWarpedGradient();
+    void ClearWarpedGradient();
+    void AllocateVoxelBasedMeasureGradient();
+    void ClearVoxelBasedMeasureGradient();
+    void AllocateTransformationGradient();
+    void ClearTransformationGradient();
 
-   double ComputeJacobianBasedPenaltyTerm(int);
-   double ComputeBendingEnergyPenaltyTerm();
-   double ComputeLinearEnergyPenaltyTerm();
-   double ComputeLandmarkDistancePenaltyTerm();
-   void GetDeformationField();
-   void WarpFloatingImage(int);
-   void GetVoxelBasedGradient();
-   void GetSimilarityMeasureGradient();
-   void GetBendingEnergyGradient();
-   void GetLinearEnergyGradient();
-   void GetJacobianBasedGradient();
-   void GetLandmarkDistanceGradient();
-   void SmoothGradient();
-   void GetApproximatedGradient();
-   void UpdateParameters(float);
-   void SetOptimiser();
-   void SetGradientImageToZero();
-   float NormaliseGradient();
-   void InitialiseSimilarity();
+    double ComputeJacobianBasedPenaltyTerm(int);
+    double ComputeBendingEnergyPenaltyTerm();
+    double ComputeLinearEnergyPenaltyTerm();
+    double ComputeLandmarkDistancePenaltyTerm();
+    void GetDeformationField();
+    void WarpFloatingImage(int);
+    void GetVoxelBasedGradient();
+    void GetSimilarityMeasureGradient();
+    void GetBendingEnergyGradient();
+    void GetLinearEnergyGradient();
+    void GetJacobianBasedGradient();
+    void GetLandmarkDistanceGradient();
+    void SmoothGradient();
+    void GetApproximatedGradient();
+    void UpdateParameters(float);
+    void SetOptimiser();
+    void SetGradientImageToZero();
+    float NormaliseGradient();
+    void InitialiseSimilarity();
 
-   void fillImageData(nifti_image *image, float* memoryObject);
+    void fillImageData(nifti_image *image, float* memoryObject);
 
 public:
-   void UseNMISetReferenceBinNumber(int,int);
-   void UseNMISetFloatingBinNumber(int,int);
-   void UseSSD(int timepoint);
-   void UseKLDivergence(int timepoint);
-   void UseDTI(int timepoint[6]);
-   void UseLNCC(int timepoint, float stdDevKernel);
-   nifti_image** GetWarpedImage();
+    void UseNMISetReferenceBinNumber(int, int);
+    void UseNMISetFloatingBinNumber(int, int);
+    void UseSSD(int timepoint);
+    void UseKLDivergence(int timepoint);
+    void UseDTI(int timepoint[6]);
+    void UseLNCC(int timepoint, float stdDevKernel);
+    nifti_image** GetWarpedImage();
 
-   reg_f3d_gpu(int refTimePoint,int floTimePoint);
-   ~reg_f3d_gpu();
-   int CheckMemoryMB();
+    reg_f3d_gpu(int refTimePoint, int floTimePoint);
+    ~reg_f3d_gpu();
+    int CheckMemoryMB();
 };
 
 #include "_reg_f3d_gpu.cpp"

From 8dcdbc3ed3ab2cff8fe19cf5831ec88d5da5ec64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 17 Nov 2022 20:19:16 +0000
Subject: [PATCH 010/314] Add more debug prints to reg_f3d_gpu

---
 niftyreg_build_version.txt    |   2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 307 ++++++++++++++++++++--------------
 2 files changed, 178 insertions(+), 131 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a9c8fe82..b16e5f75 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-103
+104
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 1c613dd0..8f008e63 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -42,7 +42,7 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
     this->warpedGradientImage2_gpu = NULL;
 
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n");
+    reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -108,17 +108,15 @@ reg_f3d_gpu::~reg_f3d_gpu() {
     }
 
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n");
+    reg_print_fct_debug("reg_f3d_gpu::~reg_f3d_gpu");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateWarped() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n");
-#endif
     if (this->currentReference == NULL) {
-        printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+        reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
+        reg_print_msg_error("Error when allocating the warped image");
         reg_exit();
     }
     this->ClearWarped();
@@ -138,20 +136,23 @@ void reg_f3d_gpu::AllocateWarped() {
     NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper));
     if (this->warped->nt == 1) {
         if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+            reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
+            reg_print_msg_error("Error when allocating the warped image");
             reg_exit();
         }
     } else if (this->warped->nt == 2) {
         if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the warped image.\n");
+            reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
+            reg_print_msg_error("Error when allocating the warped image");
             reg_exit();
         }
     } else {
-        printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
+        reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
+        reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image");
         reg_exit();
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::AllocateWarped");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -170,19 +171,18 @@ void reg_f3d_gpu::ClearWarped() {
         cudaCommon_free<float>(&this->warped2_gpu);
         this->warped2_gpu = NULL;
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ClearWarped");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateDeformationField() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n");
-#endif
     this->ClearDeformationField();
     NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu,
                                  this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
-
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::AllocateDeformationField");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -191,13 +191,13 @@ void reg_f3d_gpu::ClearDeformationField() {
         cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
         this->deformationFieldImage_gpu = NULL;
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateWarpedGradient() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n");
-#endif
     this->ClearWarpedGradient();
     if (this->inputFloating->nt == 1) {
         NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
@@ -208,11 +208,12 @@ void reg_f3d_gpu::AllocateWarpedGradient() {
         NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu,
                                      this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
     } else {
-        printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n");
+        reg_print_fct_error("reg_f3d_gpu::AllocateWarpedGradient()");
+        reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image");
         reg_exit();
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::AllocateWarpedGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -225,21 +226,21 @@ void reg_f3d_gpu::ClearWarpedGradient() {
         cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
         this->warpedGradientImage2_gpu = NULL;
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n");
-#endif
     this->ClearVoxelBasedMeasureGradient();
-    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu,
-                                         this->currentReference->dim)) {
-        printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n");
+    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->currentReference->dim)) {
+        reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()");
+        reg_print_msg_error("Error when allocating the voxel based measure gradient image");
         reg_exit();
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -248,21 +249,21 @@ void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() {
         cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
         this->voxelBasedMeasureGradientImage_gpu = NULL;
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateTransformationGradient() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n");
-#endif
     this->ClearTransformationGradient();
-    if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu,
-                                         this->controlPointGrid->dim)) {
-        printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n");
+    if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) {
+        reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()");
+        reg_print_msg_error("Error when allocating the node based gradient image");
         reg_exit();
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::AllocateNodeBasedGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -271,6 +272,9 @@ void reg_f3d_gpu::ClearTransformationGradient() {
         cudaCommon_free<float4>(&this->transformationGradient_gpu);
         this->transformationGradient_gpu = NULL;
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -305,33 +309,40 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
                                                   this->jacobianLogApproximation);
         }
 #ifndef NDEBUG
-        printf("[NiftyReg DEBUG] Folding correction\n");
+        reg_print_msg_debug("Folding correction");
 #endif
         it++;
     }
     if (type > 0) {
         if (value != value) {
             this->optimiser->RestoreBestDOF();
-            fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n");
+            reg_print_fct_error("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm()");
+            reg_print_msg_error("The folding correction scheme failed");
         } else {
-#ifdef NDEBUG
-            if (this->verbose) {
-#endif
-                printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it);
-#ifdef NDEBUG
+#ifndef NDEBUG
+            if (it > 0) {
+                char text[255];
+                sprintf(text, "Folding correction, %i step(s)", it);
+                reg_print_msg_debug(text);
             }
 #endif
         }
     }
-    return (double)this->jacobianLogWeight * value;
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm");
+#endif
+    return this->jacobianLogWeight * value;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
-    if (this->bendingEnergyWeight <= 0) return 0.;
+    if (this->bendingEnergyWeight <= 0) return 0;
 
     double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
                                                       &this->controlPointGrid_gpu);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm");
+#endif
     return this->bendingEnergyWeight * value;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -369,9 +380,11 @@ void reg_f3d_gpu::GetDeformationField() {
                                            &this->deformationFieldImage_gpu,
                                            &this->currentMask_gpu,
                                            this->activeVoxelNumber[this->currentLevel],
-                                           true // use B-splines
-        );
+                                           true); // use B-splines
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetDeformationField");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -400,13 +413,18 @@ void reg_f3d_gpu::WarpFloatingImage(int inter) {
                               this->activeVoxelNumber[this->currentLevel],
                               this->warpedPaddingValue);
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::WarpFloatingImage");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::SetGradientImageToZero() {
     cudaMemset(this->transformationGradient_gpu, 0,
-               this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz *
-               sizeof(float4));
+               this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * sizeof(float4));
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::SetGradientImageToZero");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -438,11 +456,14 @@ void reg_f3d_gpu::GetVoxelBasedGradient() {
 
     if (this->measure_gpu_dti != NULL)
         this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient();
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetVoxelBasedGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetSimilarityMeasureGradient() {
-
     this->GetVoxelBasedGradient();
 
     // The voxel based gradient is smoothed
@@ -477,6 +498,9 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() {
                                   this->gradientSmoothingSigma,
                                   NULL);
     }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -487,6 +511,9 @@ void reg_f3d_gpu::GetBendingEnergyGradient() {
                                                &this->controlPointGrid_gpu,
                                                &this->transformationGradient_gpu,
                                                this->bendingEnergyWeight);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -509,6 +536,9 @@ void reg_f3d_gpu::GetJacobianBasedGradient() {
                                                   &this->transformationGradient_gpu,
                                                   this->jacobianLogWeight,
                                                   this->jacobianLogApproximation);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetJacobianBasedGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -523,15 +553,15 @@ void reg_f3d_gpu::GetLandmarkDistanceGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UpdateParameters(float scale) {
-    float4 *currentDOF = reinterpret_cast<float4 *>(this->optimiser->GetCurrentDOF());
-    float4 *bestDOF = reinterpret_cast<float4 *>(this->optimiser->GetBestDOF());
-    float4 *gradient = reinterpret_cast<float4 *>(this->optimiser->GetGradient());
-
-    reg_updateControlPointPosition_gpu(this->controlPointGrid,
-                                       &currentDOF,
-                                       &bestDOF,
-                                       &gradient,
-                                       scale);
+    float4 *currentDOF = reinterpret_cast<float4*>(this->optimiser->GetCurrentDOF());
+    float4 *bestDOF = reinterpret_cast<float4*>(this->optimiser->GetBestDOF());
+    float4 *gradient = reinterpret_cast<float4*>(this->optimiser->GetGradient());
+
+    reg_updateControlPointPosition_gpu(this->controlPointGrid, &currentDOF, &bestDOF, &gradient, scale);
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UpdateParameters");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -634,6 +664,10 @@ void reg_f3d_gpu::GetApproximatedGradient() {
     cudaFreeHost(gridValue);
     cudaFreeHost(modifiedValue);
     cudaFreeHost(gradientValue);
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::GetApproximatedGradient");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -642,7 +676,9 @@ void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) {
     float *buffer = (float*)malloc(size * sizeof(float));
 
     if (buffer == NULL) {
-        reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
+        reg_print_fct_error("reg_f3d_gpu::fillImageData()");
+        reg_print_msg_error("Memory allocation did not complete successfully!");
+        reg_exit();
     }
 
     cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
@@ -700,32 +736,30 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
 float reg_f3d_gpu::InitialiseCurrentLevel() {
     float maxStepSize = reg_f3d<float>::InitialiseCurrentLevel();
 
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n");
-#endif
-
     if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu);
     if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu);
     if (this->currentReference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>
-            (&this->currentReference_gpu, this->currentReference->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
+        if (cudaCommon_allocateArrayToDevice<float>(&this->currentReference_gpu, this->currentReference->dim)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when allocating the reference image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentReference_gpu, this->currentReference)) {
-            printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu, this->currentReference)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when transfering the reference image");
             reg_exit();
         }
     } else if (this->currentReference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>
-            (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the reference image.\n");
+        if (cudaCommon_allocateArrayToDevice<float>(&this->currentReference_gpu,
+                                                    &this->currentReference2_gpu, this->currentReference->dim)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when allocating the reference image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) {
-            printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu,
+                                                           &this->currentReference2_gpu, this->currentReference)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when transfering the reference image");
             reg_exit();
         }
     }
@@ -733,38 +767,40 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
     if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu);
     if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu);
     if (this->currentReference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>
-            (&this->currentFloating_gpu, this->currentFloating->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
+        if (cudaCommon_allocateArrayToDevice<float>(&this->currentFloating_gpu, this->currentFloating->dim)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when allocating the floating image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentFloating_gpu, this->currentFloating)) {
-            printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu, this->currentFloating)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when transfering the floating image");
             reg_exit();
         }
     } else if (this->currentReference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>
-            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) {
-            printf("[NiftyReg ERROR] Error when allocating the floating image.\n");
+        if (cudaCommon_allocateArrayToDevice<float>(&this->currentFloating_gpu,
+                                                    &this->currentFloating2_gpu, this->currentFloating->dim)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when allocating the floating image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>
-            (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) {
-            printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu,
+                                                           &this->currentFloating2_gpu, this->currentFloating)) {
+            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+            reg_print_msg_error("Error when transfering the floating image");
             reg_exit();
         }
     }
+
     if (this->controlPointGrid_gpu != NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-    if (cudaCommon_allocateArrayToDevice<float4>
-        (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
-        printf("[NiftyReg ERROR] Error when allocating the control point image.\n");
+    if (cudaCommon_allocateArrayToDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
+        reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+        reg_print_msg_error("Error when allocating the control point image");
         reg_exit();
     }
-
-    if (cudaCommon_transferNiftiToArrayOnDevice<float4>
-        (&this->controlPointGrid_gpu, this->controlPointGrid)) {
-        printf("[NiftyReg ERROR] Error when transfering the control point image.\n");
+    if (cudaCommon_transferNiftiToArrayOnDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid)) {
+        reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
+        reg_print_msg_error("Error when transfering the control point image");
         reg_exit();
     }
 
@@ -772,27 +808,24 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
     NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
     int *targetMask_h_ptr = &targetMask_h[0];
     for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) {
-        if (this->currentMask[i] != -1) *targetMask_h_ptr++ = i;
+        if (this->currentMask[i] != -1)
+            *targetMask_h_ptr++ = i;
     }
-    NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu,
-                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h,
-                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int),
-                                 cudaMemcpyHostToDevice));
+                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int), cudaMemcpyHostToDevice));
     NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h));
+
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::InitialiseCurrentLevel");
 #endif
     return maxStepSize;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearCurrentInputImage() {
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n");
-#endif
-    if (cudaCommon_transferFromDeviceToNifti<float4>
-        (this->controlPointGrid, &this->controlPointGrid_gpu)) {
-        printf("[NiftyReg ERROR] Error when transfering back the control point image.\n");
+    if (cudaCommon_transferFromDeviceToNifti<float4>(this->controlPointGrid, &this->controlPointGrid_gpu)) {
+        reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()");
+        reg_print_msg_error("Error when transfering back the control point image");
         reg_exit();
     }
     cudaCommon_free<float4>(&this->controlPointGrid_gpu);
@@ -815,7 +848,7 @@ void reg_f3d_gpu::ClearCurrentInputImage() {
     this->currentMask = NULL;
     this->currentFloating = NULL;
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n");
+    reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -835,28 +868,31 @@ void reg_f3d_gpu::SetOptimiser() {
                                 0, // currentIterationNumber,
                                 this,
                                 reinterpret_cast<float *>(this->controlPointGrid_gpu),
-                                reinterpret_cast<float *>(this->transformationGradient_gpu)
-    );
+                                reinterpret_cast<float *>(this->transformationGradient_gpu));
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::SetOptimiser");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 float reg_f3d_gpu::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu,
-                                            this->optimiser->GetVoxNumber()
-    );
+    float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, this->optimiser->GetVoxNumber());
 
     if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
         // The gradient is normalised if we are running F3D
         // It will be normalised later when running symmetric or F3D2
 #ifndef NDEBUG
-        printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length);
+        char text[255];
+        sprintf(text, "Objective function gradient maximal length: %g", length);
+        reg_print_msg_debug(text);
 #endif
-        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(),
-                              &this->transformationGradient_gpu,
-                              1.f / length);
-
+        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), &this->transformationGradient_gpu, 1.f / length);
     }
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::NormaliseGradient");
+#endif
     // Returns the largest gradient distance
     return length;
 }
@@ -921,11 +957,12 @@ int reg_f3d_gpu::CheckMemoryMB() {
 
     // jacobian array
     if (this->jacobianLogWeight > 0)
-        totalMemoryRequiered += 10 * referenceVoxelNumber *
-        sizeof(float);
+        totalMemoryRequiered += 10 * referenceVoxelNumber * sizeof(float);
 
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::CheckMemoryMB");
+#endif
     return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024)));
-
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -936,6 +973,9 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
     // I am here adding 4 to the specified bin number to accomodate for
     // the spline support
     this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UseNMISetFloatingBinNumber");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
@@ -945,18 +985,27 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
     // I am here adding 4 to the specified bin number to accomodate for
     // the spline support
     this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UseNMISetReferenceBinNumber");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseSSD(int timepoint) {
     if (this->measure_gpu_ssd == NULL)
         this->measure_gpu_ssd = new reg_ssd_gpu;
     this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UseSSD");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseKLDivergence(int timepoint) {
     if (this->measure_gpu_kld == NULL)
         this->measure_gpu_kld = new reg_kld_gpu;
     this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UseKLDivergence");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) {
@@ -964,6 +1013,9 @@ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) {
         this->measure_gpu_lncc = new reg_lncc_gpu;
     this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
     this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d_gpu::UseLNCC");
+#endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseDTI(int timepoint[6]) {
@@ -1001,8 +1053,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu
-        );
+                                                 &this->voxelBasedMeasureGradientImage_gpu);
         this->measure_nmi = this->measure_gpu_nmi;
     }
 
@@ -1020,8 +1071,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu
-        );
+                                                 &this->voxelBasedMeasureGradientImage_gpu);
         this->measure_ssd = this->measure_gpu_ssd;
     }
 
@@ -1038,8 +1088,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu
-        );
+                                                 &this->voxelBasedMeasureGradientImage_gpu);
         this->measure_kld = this->measure_gpu_kld;
     }
 
@@ -1056,8 +1105,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
                                                   &this->currentMask_gpu,
                                                   &this->warped_gpu,
                                                   &this->warpedGradientImage_gpu,
-                                                  &this->voxelBasedMeasureGradientImage_gpu
-        );
+                                                  &this->voxelBasedMeasureGradientImage_gpu);
         this->measure_lncc = this->measure_gpu_lncc;
     }
 
@@ -1074,12 +1122,11 @@ void reg_f3d_gpu::InitialiseSimilarity() {
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu
-        );
+                                                 &this->voxelBasedMeasureGradientImage_gpu);
         this->measure_dti = this->measure_gpu_dti;
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n");
+    reg_print_fct_debug("reg_f3d_gpu::InitialiseSimilarity()");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */

From 0610a5ef1ef80ed44e317c5f967758e8cbbeb536 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 17 Nov 2022 20:33:59 +0000
Subject: [PATCH 011/314] Refactor
 reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm()

---
 niftyreg_build_version.txt    |  2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 38 ++++++++++++-----------------------
 2 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b16e5f75..f96ac067 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-104
+105
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 8f008e63..e0990ba7 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -279,35 +279,23 @@ void reg_f3d_gpu::ClearTransformationGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
-    if (this->jacobianLogWeight <= 0) return 0.;
-
-    double value;
-    if (type == 2) {
-        value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
-                                                      this->controlPointGrid,
-                                                      &this->controlPointGrid_gpu,
-                                                      false);
-    } else {
-        value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
-                                                      this->controlPointGrid,
-                                                      &this->controlPointGrid_gpu,
-                                                      this->jacobianLogApproximation);
-    }
+    if (this->jacobianLogWeight <= 0) return 0;
+
+    bool approx = type == 2 ? false : this->jacobianLogApproximation;
+
+    double value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
+                                                         this->controlPointGrid,
+                                                         &this->controlPointGrid_gpu,
+                                                         approx);
+
     unsigned int maxit = 5;
     if (type > 0) maxit = 20;
     unsigned int it = 0;
     while (value != value && it < maxit) {
-        if (type == 2) {
-            value = reg_spline_correctFolding_gpu(this->currentReference,
-                                                  this->controlPointGrid,
-                                                  &this->controlPointGrid_gpu,
-                                                  false);
-        } else {
-            value = reg_spline_correctFolding_gpu(this->currentReference,
-                                                  this->controlPointGrid,
-                                                  &this->controlPointGrid_gpu,
-                                                  this->jacobianLogApproximation);
-        }
+        value = reg_spline_correctFolding_gpu(this->currentReference,
+                                              this->controlPointGrid,
+                                              &this->controlPointGrid_gpu,
+                                              approx);
 #ifndef NDEBUG
         reg_print_msg_debug("Folding correction");
 #endif

From 46f59d462471b5c8272d3d42d9a7a5971d9ca83c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 17 Nov 2022 20:37:40 +0000
Subject: [PATCH 012/314] Some refactorisations

---
 niftyreg_build_version.txt       |  2 +-
 reg-lib/_reg_f3d.cpp             | 18 ++++++++----------
 reg-lib/cuda/_reg_common_cuda.cu |  2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp    | 19 ++++++++++---------
 4 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f96ac067..fe4afb0d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-105
+106
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 7559620e..0ed31a57 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -429,10 +429,9 @@ void reg_f3d<T>::GetDeformationField()
 template <class T>
 double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type)
 {
-   if(this->jacobianLogWeight<=0) return 0.;
-
-   double value=0.;
+   if(this->jacobianLogWeight<=0) return 0;
 
+   double value;
    if(type==2)
    {
       value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
@@ -489,14 +488,14 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type)
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm");
 #endif
-   return (double)this->jacobianLogWeight * value;
+   return this->jacobianLogWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm()
 {
-   if(this->bendingEnergyWeight<=0) return 0.;
+   if(this->bendingEnergyWeight<=0) return 0;
 
    double value = reg_spline_approxBendingEnergy(this->controlPointGrid);
 #ifndef NDEBUG
@@ -510,7 +509,7 @@ template <class T>
 double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm()
 {
    if(this->linearEnergyWeight<=0)
-      return 0.;
+      return 0;
 
    double value = reg_spline_approxLinearEnergy(this->controlPointGrid);
 
@@ -525,7 +524,7 @@ template <class T>
 double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm()
 {
    if(this->landmarkRegWeight<=0)
-      return 0.;
+      return 0;
 
    double value = reg_spline_getLandmarkDistance(this->controlPointGrid,
                                                  this->landmarkRegNumber,
@@ -988,8 +987,8 @@ template <class T>
 void reg_f3d<T>::GetApproximatedGradient()
 {
    // Loop over every control point
-   T *gridPtr = static_cast<T *>(this->controlPointGrid->data);
-   T *gradPtr = static_cast<T *>(this->transformationGradient->data);
+   T *gridPtr = static_cast<T*>(this->controlPointGrid->data);
+   T *gradPtr = static_cast<T*>(this->transformationGradient->data);
    T eps = this->controlPointGrid->dx / 100.f;
    for(size_t i=0; i<this->controlPointGrid->nvox; ++i)
    {
@@ -1122,7 +1121,6 @@ void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize)
 template<class T>
 void reg_f3d<T>::GetObjectiveFunctionGradient()
 {
-
    if(!this->useApproxGradient)
    {
       // Compute the gradient of the similarity measure
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 5fcfee5f..0dde9b0c 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -678,7 +678,7 @@ template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image *, float4
 /* ******************************** */
 void cudaCommon_free(cudaArray **cuArray_d)
 {
-		NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d));
+	NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d));
 	return;
 }
 /* ******************************** */
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index e0990ba7..2ab69055 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -359,7 +359,7 @@ double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetDeformationField() {
     if (this->controlPointGrid_gpu == NULL) {
-        reg_f3d<float>::GetDeformationField();
+        reg_f3d::GetDeformationField();
     } else {
         // Compute the deformation field
         reg_spline_getDeformationField_gpu(this->controlPointGrid,
@@ -417,6 +417,11 @@ void reg_f3d_gpu::SetGradientImageToZero() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetVoxelBasedGradient() {
+    // The voxel based gradient image is filled with zeros
+    cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0,
+               this->currentReference->nx * this->currentReference->ny * this->currentReference->nz *
+               sizeof(float4));
+
     // The intensity gradient is first computed
     reg_getImageGradient_gpu(this->currentFloating,
                              &this->currentFloating_gpu,
@@ -425,10 +430,6 @@ void reg_f3d_gpu::GetVoxelBasedGradient() {
                              this->activeVoxelNumber[this->currentLevel],
                              this->warpedPaddingValue);
 
-    // The voxel based gradient image is filled with zeros
-    cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0,
-               this->currentReference->nx * this->currentReference->ny * this->currentReference->nz *
-               sizeof(float4));
     // The gradient of the various measures of similarity are computed
     if (this->measure_gpu_nmi != NULL)
         this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
@@ -811,6 +812,8 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearCurrentInputImage() {
+    reg_f3d::ClearCurrentInputImage();
+
     if (cudaCommon_transferFromDeviceToNifti<float4>(this->controlPointGrid, &this->controlPointGrid_gpu)) {
         reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()");
         reg_print_msg_error("Error when transfering back the control point image");
@@ -832,9 +835,6 @@ void reg_f3d_gpu::ClearCurrentInputImage() {
         cudaCommon_free(&this->currentFloating2_gpu);
     this->currentFloating2_gpu = NULL;
 
-    this->currentReference = NULL;
-    this->currentMask = NULL;
-    this->currentFloating = NULL;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage");
 #endif
@@ -887,7 +887,8 @@ float reg_f3d_gpu::NormaliseGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 int reg_f3d_gpu::CheckMemoryMB() {
-    if (!this->initialised) reg_f3d<float>::Initialise();
+    if (!this->initialised)
+        reg_f3d::Initialise();
 
     size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx *
         this->referencePyramid[this->levelToPerform - 1]->ny *

From a99999c3426f01a875ae689e286ba083f8582e7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 17 Nov 2022 21:33:28 +0000
Subject: [PATCH 013/314] Make reg_f3d_gpu::GetApproximatedGradient() on a par
 with reg_f3d::GetApproximatedGradient()

---
 niftyreg_build_version.txt    |   2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 110 +++++++++++++---------------------
 2 files changed, 44 insertions(+), 68 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fe4afb0d..e34885bb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-106
+107
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 2ab69055..c6aea7d0 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -564,94 +564,70 @@ void reg_f3d_gpu::SmoothGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetApproximatedGradient() {
-    float4 *gridValue = NULL;
-    float4 *modifiedValue = NULL;
-    float4 *gradientValue = NULL;
+    float4 *gridValue, *currentValue, *gradientValue;
     cudaMallocHost(&gridValue, sizeof(float4));
-    cudaMallocHost(&modifiedValue, sizeof(float4));
+    cudaMallocHost(&currentValue, sizeof(float4));
     cudaMallocHost(&gradientValue, sizeof(float4));
 
-    float eps = this->controlPointGrid->dx / 1000.f;
+    float eps = this->controlPointGrid->dx / 100.f;
 
     for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) {
-        // Extract the current value
-        cudaMemcpy(gridValue,
-                   &this->controlPointGrid_gpu[i],
-                   sizeof(float4),
-                   cudaMemcpyDeviceToHost);
-        modifiedValue[0] = gridValue[0];
+        // Extract the grid value
+        cudaMemcpy(gridValue, &this->controlPointGrid_gpu[i], sizeof(float4), cudaMemcpyDeviceToHost);
+        cudaMemcpy(currentValue, &(reinterpret_cast<float4*>(this->optimiser->GetBestDOF()))[i], sizeof(float4), cudaMemcpyDeviceToHost);
+
         // -- X axis
-        // Modify the current value along the x axis
-        modifiedValue[0].x = gridValue[0].x + eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i],
-                   modifiedValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        // Modify the grid value along the x axis
+        gridValue->x = currentValue->x + eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
         // Evaluate the objective function value
-        gradientValue[0].x = this->GetObjectiveFunctionValue();
-        // Modify the current value along the x axis
-        modifiedValue[0].x = gridValue[0].x - eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i],
-                   modifiedValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        gradientValue->x = this->GetObjectiveFunctionValue();
+        // Modify the grid value along the x axis
+        gridValue->x = currentValue->x - eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
         // Evaluate the objective function value
-        gradientValue[0].x -= this->GetObjectiveFunctionValue();
-        gradientValue[0].x /= 2.f * eps;
-        modifiedValue[0].x = gridValue[0].x;
+        gradientValue->x -= this->GetObjectiveFunctionValue();
+        gradientValue->x /= 2.f * eps;
+        gridValue->x = currentValue->x;
+
         // -- Y axis
-        // Modify the current value along the y axis
-        modifiedValue[0].y = gridValue[0].y + eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i],
-                   modifiedValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        // Modify the grid value along the y axis
+        gridValue->y = currentValue->y + eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
         // Evaluate the objective function value
-        gradientValue[0].y = this->GetObjectiveFunctionValue();
-        // Modify the current value the y axis
-        modifiedValue[0].y = gridValue[0].y - eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i],
-                   modifiedValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        gradientValue->y = this->GetObjectiveFunctionValue();
+        // Modify the grid value the y axis
+        gridValue->y = currentValue->y - eps;
+        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
         // Evaluate the objective function value
-        gradientValue[0].y -= this->GetObjectiveFunctionValue();
-        gradientValue[0].y /= 2.f * eps;
-        modifiedValue[0].y = gridValue[0].y;
+        gradientValue->y -= this->GetObjectiveFunctionValue();
+        gradientValue->y /= 2.f * eps;
+        gridValue->y = currentValue->y;
+
         if (this->optimiser->GetNDim() > 2) {
             // -- Z axis
-            // Modify the current value along the y axis
-            modifiedValue[0].z = gridValue[0].z + eps;
-            cudaMemcpy(&this->controlPointGrid_gpu[i],
-                       modifiedValue,
-                       sizeof(float4),
-                       cudaMemcpyHostToDevice);
+            // Modify the grid value along the y axis
+            gridValue->z = currentValue->z + eps;
+            cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
             // Evaluate the objective function value
-            gradientValue[0].z = this->GetObjectiveFunctionValue();
-            // Modify the current value the y axis
-            modifiedValue[0].z = gridValue[0].z - eps;
-            cudaMemcpy(&this->controlPointGrid_gpu[i],
-                       modifiedValue,
-                       sizeof(float4),
-                       cudaMemcpyHostToDevice);
+            gradientValue->z = this->GetObjectiveFunctionValue();
+            // Modify the grid value the y axis
+            gridValue->z = currentValue->z - eps;
+            cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
             // Evaluate the objective function value
-            gradientValue[0].z -= this->GetObjectiveFunctionValue();
-            gradientValue[0].z /= 2.f * eps;
+            gradientValue->z -= this->GetObjectiveFunctionValue();
+            gradientValue->z /= 2.f * eps;
         }
+
         // Restore the initial parametrisation
-        cudaMemcpy(&this->controlPointGrid_gpu[i],
-                   gridValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
 
         // Save the assessed gradient
-        cudaMemcpy(&this->transformationGradient_gpu[i],
-                   gradientValue,
-                   sizeof(float4),
-                   cudaMemcpyHostToDevice);
+        cudaMemcpy(&this->transformationGradient_gpu[i], gradientValue, sizeof(float4), cudaMemcpyHostToDevice);
     }
+
     cudaFreeHost(gridValue);
-    cudaFreeHost(modifiedValue);
+    cudaFreeHost(currentValue);
     cudaFreeHost(gradientValue);
 
 #ifndef NDEBUG

From 5fd686ee33101f87cdfb5ec92d88b5550bc56856 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 18 Nov 2022 20:12:26 +0000
Subject: [PATCH 014/314] Add cudaArray specialisation of
 cudaCommon_transferFromDeviceToNifti()

---
 niftyreg_build_version.txt       |  2 +-
 reg-lib/cuda/_reg_common_cuda.cu | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3b20426c..e2a9fee0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-108
+109
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 0dde9b0c..806f3765 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -589,6 +589,25 @@ template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image *, double
 template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image *, float4 **); // for deformation field
 /* ******************************** */
 /* ******************************** */
+template<>
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray **cuArray_d) {
+	if (img->datatype != NIFTI_TYPE_FLOAT32) {
+		reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+		reg_print_msg_error("The image data type is not supported");
+		return EXIT_FAILURE;
+	}
+
+	cudaMemcpy3DParms copyParams = {0};
+	copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+	copyParams.srcArray = *cuArray_d;
+	copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float),
+											copyParams.extent.width, copyParams.extent.height);
+	copyParams.kind = cudaMemcpyDeviceToHost;
+	NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+	return EXIT_SUCCESS;
+}
+/* ******************************** */
+/* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d, DTYPE **array2_d)
 {

From 97800dbb03e2da0e9ed3dbae96016f61b617c56a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 22 Nov 2022 14:09:14 +0000
Subject: [PATCH 015/314] Some refactorisations

---
 niftyreg_build_version.txt    |  2 +-
 reg-lib/_reg_f3d.cpp          | 39 +++++++++--------------
 reg-lib/cuda/_reg_f3d_gpu.cpp | 59 +++++++++++++----------------------
 3 files changed, 38 insertions(+), 62 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e2a9fee0..bc6298e8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-109
+110
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 0ed31a57..6e1cb9f9 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -244,15 +244,12 @@ void reg_f3d<T>::Initialise()
          gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber-1));
 
       // Create and allocate the control point image
-      reg_createControlPointGrid<T>(&this->controlPointGrid,
-                                    this->referencePyramid[0],
-            gridSpacing);
+      reg_createControlPointGrid<T>(&this->controlPointGrid, this->referencePyramid[0], gridSpacing);
 
       // The control point position image is initialised with the affine transformation
       if(this->affineTransformation==NULL)
       {
-         memset(this->controlPointGrid->data,0,
-                this->controlPointGrid->nvox*this->controlPointGrid->nbyper);
+         memset(this->controlPointGrid->data,0, this->controlPointGrid->nvox*this->controlPointGrid->nbyper);
          reg_tools_multiplyValueToImage(this->controlPointGrid,this->controlPointGrid,0.f);
          reg_getDeformationFromDisplacement(this->controlPointGrid);
       }
@@ -262,8 +259,7 @@ void reg_f3d<T>::Initialise()
    {
       // The control point grid image is initialised with the provided grid
       this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
-      this->controlPointGrid->data = (void *)malloc( this->controlPointGrid->nvox *
-                                                     this->controlPointGrid->nbyper);
+      this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
       memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data,
               this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
       // The final grid spacing is computed
@@ -285,12 +281,11 @@ void reg_f3d<T>::Initialise()
       text = stringFormat("\t* name: %s", this->inputReference->fname);
       reg_print_info(this->executableName, text.c_str());
       text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-              this->inputReference->nx, this->inputReference->ny,
-              this->inputReference->nz, this->inputReference->nt);
+                          this->inputReference->nx, this->inputReference->ny,
+                          this->inputReference->nz, this->inputReference->nt);
       reg_print_info(this->executableName, text.c_str());
       text = stringFormat("\t* image spacing: %g x %g x %g mm",
-              this->inputReference->dx,
-              this->inputReference->dy, this->inputReference->dz);
+                          this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
       reg_print_info(this->executableName, text.c_str());
       for(int i=0; i<this->inputReference->nt; i++)
       {
@@ -313,12 +308,10 @@ void reg_f3d<T>::Initialise()
       text = stringFormat("\t* name: %s", this->inputFloating->fname);
       reg_print_info(this->executableName, text.c_str());
       text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-              this->inputFloating->nx, this->inputFloating->ny,
-              this->inputFloating->nz, this->inputFloating->nt);
+                          this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt);
       reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* image spacing: %g x %g x %g mm",
-              this->inputFloating->dx,
-              this->inputFloating->dy, this->inputFloating->dz);
+      text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
+                          this->inputFloating->dy, this->inputFloating->dz);
       reg_print_info(this->executableName, text.c_str());
       for(int i=0; i<this->inputFloating->nt; i++)
       {
@@ -350,8 +343,7 @@ void reg_f3d<T>::Initialise()
       reg_print_info(this->executableName, text.c_str());
       reg_print_info(this->executableName, "");
 
-      text = stringFormat("Final spacing in mm: %g %g %g",
-              this->spacing[0], this->spacing[1], this->spacing[2]);
+      text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]);
       reg_print_info(this->executableName, text.c_str());
       reg_print_info(this->executableName, "");
       if(this->measure_ssd!=NULL)
@@ -380,23 +372,22 @@ void reg_f3d<T>::Initialise()
          reg_print_info(this->executableName, "");
       }
       if((this->linearEnergyWeight)>0){
-         text = stringFormat("Linear energy penalty term weight: %g",
-                 this->linearEnergyWeight);
+         text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight);
          reg_print_info(this->executableName, text.c_str());
          reg_print_info(this->executableName, "");
       }
       if(this->jacobianLogWeight>0){
          text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight);
          reg_print_info(this->executableName, text.c_str());
-         if(this->jacobianLogApproximation){
+         if(this->jacobianLogApproximation) {
             reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
+         } else {
+            reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
          }
-         else reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
          reg_print_info(this->executableName, "");
       }
       if((this->landmarkRegWeight)>0){
-         text = stringFormat("Landmark distance regularisation term weight: %g",
-                 this->landmarkRegWeight);
+         text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
          reg_print_info(this->executableName, text.c_str());
          reg_print_info(this->executableName, "");
       }
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index c6aea7d0..d99f0c87 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -114,26 +114,8 @@ reg_f3d_gpu::~reg_f3d_gpu() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateWarped() {
-    if (this->currentReference == NULL) {
-        reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
-        reg_print_msg_error("Error when allocating the warped image");
-        reg_exit();
-    }
-    this->ClearWarped();
-    this->warped = nifti_copy_nim_info(this->currentReference);
-    this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim;
-    this->warped->dim[4] = this->warped->nt = this->currentFloating->nt;
-    this->warped->pixdim[4] = this->warped->dt = 1.0;
-    this->warped->nvox =
-        (size_t)this->warped->nx *
-        (size_t)this->warped->ny *
-        (size_t)this->warped->nz *
-        (size_t)this->warped->nt;
-    this->warped->scl_slope = 1.f;
-    this->warped->scl_inter = 0.f;
-    this->warped->datatype = this->currentFloating->datatype;
-    this->warped->nbyper = this->currentFloating->nbyper;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper));
+    reg_f3d::AllocateWarped();
+
     if (this->warped->nt == 1) {
         if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim)) {
             reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
@@ -156,13 +138,10 @@ void reg_f3d_gpu::AllocateWarped() {
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearWarped() {
-    if (this->warped != NULL) {
-        NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data));
-        this->warped->data = NULL;
-        nifti_image_free(this->warped);
-        this->warped = NULL;
-    }
+    reg_f3d::ClearWarped();
+
     if (this->warped_gpu != NULL) {
         cudaCommon_free<float>(&this->warped_gpu);
         this->warped_gpu = NULL;
@@ -186,6 +165,7 @@ void reg_f3d_gpu::AllocateDeformationField() {
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearDeformationField() {
     if (this->deformationFieldImage_gpu != NULL) {
         cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
@@ -217,6 +197,7 @@ void reg_f3d_gpu::AllocateWarpedGradient() {
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearWarpedGradient() {
     if (this->warpedGradientImage_gpu != NULL) {
         cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
@@ -244,6 +225,7 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() {
     if (this->voxelBasedMeasureGradientImage_gpu != NULL) {
         cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
@@ -267,6 +249,7 @@ void reg_f3d_gpu::AllocateTransformationGradient() {
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearTransformationGradient() {
     if (this->transformationGradient_gpu != NULL) {
         cudaCommon_free<float4>(&this->transformationGradient_gpu);
@@ -326,6 +309,7 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
 double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
     if (this->bendingEnergyWeight <= 0) return 0;
 
+    // CHECKED: Similar output
     double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
                                                       &this->controlPointGrid_gpu);
 #ifndef NDEBUG
@@ -699,7 +683,7 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 float reg_f3d_gpu::InitialiseCurrentLevel() {
-    float maxStepSize = reg_f3d<float>::InitialiseCurrentLevel();
+    float maxStepSize = reg_f3d::InitialiseCurrentLevel();
 
     if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu);
     if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu);
@@ -711,7 +695,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         }
         if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu, this->currentReference)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transfering the reference image");
+            reg_print_msg_error("Error when transferring the reference image");
             reg_exit();
         }
     } else if (this->currentReference->nt == 2) {
@@ -724,7 +708,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu,
                                                            &this->currentReference2_gpu, this->currentReference)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transfering the reference image");
+            reg_print_msg_error("Error when transferring the reference image");
             reg_exit();
         }
     }
@@ -739,7 +723,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         }
         if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu, this->currentFloating)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transfering the floating image");
+            reg_print_msg_error("Error when transferring the floating image");
             reg_exit();
         }
     } else if (this->currentReference->nt == 2) {
@@ -752,7 +736,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu,
                                                            &this->currentFloating2_gpu, this->currentFloating)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transfering the floating image");
+            reg_print_msg_error("Error when transferring the floating image");
             reg_exit();
         }
     }
@@ -765,7 +749,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
     }
     if (cudaCommon_transferNiftiToArrayOnDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid)) {
         reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-        reg_print_msg_error("Error when transfering the control point image");
+        reg_print_msg_error("Error when transferring the control point image");
         reg_exit();
     }
 
@@ -787,12 +771,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
     return maxStepSize;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearCurrentInputImage() {
     reg_f3d::ClearCurrentInputImage();
 
     if (cudaCommon_transferFromDeviceToNifti<float4>(this->controlPointGrid, &this->controlPointGrid_gpu)) {
         reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()");
-        reg_print_msg_error("Error when transfering back the control point image");
+        reg_print_msg_error("Error when transferring back the control point image");
         reg_exit();
     }
     cudaCommon_free<float4>(&this->controlPointGrid_gpu);
@@ -821,8 +806,8 @@ void reg_f3d_gpu::SetOptimiser() {
     if (this->useConjGradient)
         this->optimiser = new reg_conjugateGradient_gpu();
     else this->optimiser = new reg_optimiser_gpu();
-    // The cpp and grad images are converted to float * instead of float4
-    // to enable compatibility with cpu class
+    // The cpp and grad images are converted to float* instead of float4
+    // to enable compatibility with the CPU class
     this->optimiser->Initialise(this->controlPointGrid->nvox,
                                 this->controlPointGrid->nz > 1 ? 3 : 2,
                                 this->optimiseX,
@@ -831,8 +816,8 @@ void reg_f3d_gpu::SetOptimiser() {
                                 this->maxiterationNumber,
                                 0, // currentIterationNumber,
                                 this,
-                                reinterpret_cast<float *>(this->controlPointGrid_gpu),
-                                reinterpret_cast<float *>(this->transformationGradient_gpu));
+                                reinterpret_cast<float*>(this->controlPointGrid_gpu),
+                                reinterpret_cast<float*>(this->transformationGradient_gpu));
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::SetOptimiser");
 #endif

From f862f956a0e68818b2e4cd005af1eef0cb6e7468 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 22 Nov 2022 14:09:53 +0000
Subject: [PATCH 016/314] Get rid of reg_f3d_gpu::fillImageData()

---
 niftyreg_build_version.txt    |  2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp | 33 +++++++--------------------------
 reg-lib/cuda/_reg_f3d_gpu.h   |  2 --
 3 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bc6298e8..58c9bdf9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-110
+111
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index d99f0c87..75f19eff 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -620,29 +620,6 @@ void reg_f3d_gpu::GetApproximatedGradient() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) {
-    size_t size = image->nvox;
-    float *buffer = (float*)malloc(size * sizeof(float));
-
-    if (buffer == NULL) {
-        reg_print_fct_error("reg_f3d_gpu::fillImageData()");
-        reg_print_msg_error("Memory allocation did not complete successfully!");
-        reg_exit();
-    }
-
-    cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
-
-    free(image->data);
-    image->datatype = NIFTI_TYPE_FLOAT32;
-    image->nbyper = sizeof(float);
-    image->data = (void*)malloc(image->nvox * image->nbyper);
-    float *dataT = static_cast<float*>(image->data);
-    for (size_t i = 0; i < size; ++i)
-        dataT[i] = static_cast<float>(buffer[i]);
-    free(buffer);
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 nifti_image** reg_f3d_gpu::GetWarpedImage() {
     // The initial images are used
     if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) {
@@ -670,9 +647,13 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
     warpedImage[0]->cal_max = this->inputFloating->cal_max;
     warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
     warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
-    this->fillImageData(warpedImage[0], this->warped_gpu);
-    if (this->currentFloating->nt == 2)
-        this->fillImageData(warpedImage[1], this->warped2_gpu);
+    warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
+    cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu);
+    if (this->currentFloating->nt == 2) {
+        warpedImage[1] = warpedImage[0];
+        warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper);
+        cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu);
+    }
 
     this->ClearWarped();
 #ifndef NDEBUG
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index 2b141134..3fefb0e8 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -82,8 +82,6 @@ class reg_f3d_gpu: public reg_f3d<float> {
     float NormaliseGradient();
     void InitialiseSimilarity();
 
-    void fillImageData(nifti_image *image, float* memoryObject);
-
 public:
     void UseNMISetReferenceBinNumber(int, int);
     void UseNMISetFloatingBinNumber(int, int);

From ab9651c68fdb5e23ed61b24f7fcccfaf34bb74e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 23 Nov 2022 11:21:17 +0000
Subject: [PATCH 017/314] More refactorisations

More refactorisations
---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_aladin.cpp                       |   56 +-
 reg-apps/reg_average.cpp                      |   72 +-
 reg-apps/reg_f3d.cpp                          |   76 +-
 reg-apps/reg_jacobian.cpp                     |   32 +-
 reg-apps/reg_measure.cpp                      |   44 +-
 reg-apps/reg_ppcnr.cpp                        |    8 +-
 reg-apps/reg_resample.cpp                     |   34 +-
 reg-apps/reg_tools.cpp                        |   64 +-
 reg-apps/reg_transform.cpp                    |  156 +-
 reg-io/_reg_ReadWriteBinary.h                 |    5 +-
 reg-io/_reg_ReadWriteImage.cpp                |    8 +-
 reg-io/_reg_ReadWriteImage.h                  |    4 +-
 reg-io/_reg_ReadWriteMatrix.h                 |    7 +-
 reg-io/nifti/nifti1.h                         |    5 +-
 reg-io/nifti/nifti1_io.h                      |    6 +-
 reg-io/nifti/znzlib.h                         |    5 +-
 reg-io/nrrd/NrrdIO/NrrdConfigure.h.in         |    5 +-
 reg-io/nrrd/NrrdIO/biff.h                     |    5 +-
 reg-io/nrrd/reg_nrrd.cpp                      |   26 +-
 reg-io/nrrd/reg_nrrd.h                        |    6 +-
 reg-io/png/lpng1510/png.h                     |    4 +-
 reg-io/png/lpng1510/pngconf.h                 |    5 +-
 reg-io/png/lpng1510/pngdebug.h                |    4 +-
 reg-io/png/lpng1510/pnginfo.h                 |    4 +-
 reg-io/png/lpng1510/pnglibconf.h.prebuilt     |    4 +-
 reg-io/png/lpng1510/pngpriv.h                 |    5 +-
 reg-io/png/lpng1510/pngstruct.h               |    5 +-
 reg-io/png/readpng.cpp                        |   48 +-
 reg-io/png/reg_png.cpp                        |   30 +-
 reg-io/png/reg_png.h                          |    5 +-
 reg-lib/AffineDeformationFieldKernel.h        |    9 +-
 reg-lib/AladinContent.cpp                     |  363 ++-
 reg-lib/AladinContent.h                       |  109 +-
 reg-lib/BlockMatchingKernel.h                 |    9 +-
 reg-lib/CMakeLists.txt                        |   36 +-
 reg-lib/ConvolutionKernel.h                   |    9 +-
 reg-lib/Kernel.h                              |   23 +-
 reg-lib/KernelFactory.h                       |   15 +-
 reg-lib/OptimiseKernel.h                      |    9 +-
 reg-lib/Platform.cpp                          |   85 +-
 reg-lib/Platform.h                            |   25 +-
 reg-lib/ResampleImageKernel.h                 |    9 +-
 reg-lib/_reg_aladin.cpp                       | 1072 ++++----
 reg-lib/_reg_aladin.h                         |  450 ++--
 reg-lib/_reg_aladin_sym.cpp                   |  216 +-
 reg-lib/_reg_aladin_sym.h                     |   15 +-
 reg-lib/_reg_base.cpp                         | 2323 ++++++++---------
 reg-lib/_reg_base.h                           |  451 ++--
 reg-lib/_reg_f3d.cpp                          | 1588 ++++++-----
 reg-lib/_reg_f3d.h                            |  222 +-
 reg-lib/_reg_f3d2.cpp                         |   57 +-
 reg-lib/_reg_f3d2.h                           |    8 +-
 reg-lib/_reg_f3d_sym.cpp                      |  186 +-
 reg-lib/_reg_f3d_sym.h                        |    5 +-
 reg-lib/_reg_polyAffine.cpp                   |    5 -
 reg-lib/_reg_polyAffine.h                     |    5 +-
 reg-lib/cl/CLAladinContent.cpp                |  453 ----
 reg-lib/cl/CLAladinContent.h                  |  115 -
 reg-lib/cl/CLConvolutionKernel.h              |   17 -
 reg-lib/cl/CLKernelFactory.cpp                |   17 -
 reg-lib/cl/CLKernelFactory.h                  |   13 -
 reg-lib/cl/CLOptimiseKernel.h                 |   21 -
 reg-lib/cl/CMakeLists.txt                     |   28 +-
 ...cpp => ClAffineDeformationFieldKernel.cpp} |   44 +-
 ...nel.h => ClAffineDeformationFieldKernel.h} |   15 +-
 reg-lib/cl/ClAladinContent.cpp                |  413 +++
 reg-lib/cl/ClAladinContent.h                  |  102 +
 ...ngKernel.cpp => ClBlockMatchingKernel.cpp} |   54 +-
 ...tchingKernel.h => ClBlockMatchingKernel.h} |   15 +-
 ...tSingletton.cpp => ClContextSingleton.cpp} |  110 +-
 ...ntextSingletton.h => ClContextSingleton.h} |   47 +-
 ...tionKernel.cpp => ClConvolutionKernel.cpp} |   10 +-
 reg-lib/cl/ClConvolutionKernel.h              |   14 +
 reg-lib/cl/ClKernelFactory.cpp                |   17 +
 reg-lib/cl/ClKernelFactory.h                  |    9 +
 ...ptimiseKernel.cpp => ClOptimiseKernel.cpp} |   20 +-
 reg-lib/cl/ClOptimiseKernel.h                 |   18 +
 ...geKernel.cpp => ClResampleImageKernel.cpp} |   48 +-
 ...eImageKernel.h => ClResampleImageKernel.h} |   15 +-
 reg-lib/cl/InfoDevice.h                       |   22 +-
 reg-lib/cl/_reg_openclinfo.cpp                |    8 +-
 reg-lib/cl/_reg_openclinfo.h                  |    5 +-
 reg-lib/cl/config.h.in                        |    5 +-
 .../cpu/CPUAffineDeformationFieldKernel.cpp   |   15 -
 reg-lib/cpu/CPUAffineDeformationFieldKernel.h |   20 -
 reg-lib/cpu/CPUBlockMatchingKernel.cpp        |   13 -
 reg-lib/cpu/CPUBlockMatchingKernel.h          |   23 -
 reg-lib/cpu/CPUConvolutionKernel.h            |   14 -
 reg-lib/cpu/CPUKernelFactory.cpp              |   18 -
 reg-lib/cpu/CPUKernelFactory.h                |   14 -
 reg-lib/cpu/CPUOptimiseKernel.cpp             |   10 -
 reg-lib/cpu/CPUOptimiseKernel.h               |   20 -
 reg-lib/cpu/CPUResampleImageKernel.h          |   20 -
 .../cpu/CpuAffineDeformationFieldKernel.cpp   |   15 +
 reg-lib/cpu/CpuAffineDeformationFieldKernel.h |   16 +
 reg-lib/cpu/CpuBlockMatchingKernel.cpp        |   13 +
 reg-lib/cpu/CpuBlockMatchingKernel.h          |   20 +
 ...ionKernel.cpp => CpuConvolutionKernel.cpp} |    6 +-
 reg-lib/cpu/CpuConvolutionKernel.h            |   11 +
 reg-lib/cpu/CpuKernelFactory.cpp              |   16 +
 reg-lib/cpu/CpuKernelFactory.h                |   10 +
 reg-lib/cpu/CpuOptimiseKernel.cpp             |   10 +
 reg-lib/cpu/CpuOptimiseKernel.h               |   17 +
 ...eKernel.cpp => CpuResampleImageKernel.cpp} |   14 +-
 reg-lib/cpu/CpuResampleImageKernel.h          |   17 +
 reg-lib/cpu/_reg_blockMatching.cpp            |   42 +-
 reg-lib/cpu/_reg_blockMatching.h              |    4 +-
 reg-lib/cpu/_reg_discrete_init.cpp            |   34 +-
 reg-lib/cpu/_reg_discrete_init.h              |    6 +-
 reg-lib/cpu/_reg_dti.h                        |   18 +-
 reg-lib/cpu/_reg_femTrans.cpp                 |    5 -
 reg-lib/cpu/_reg_femTrans.h                   |    4 +-
 reg-lib/cpu/_reg_globalTrans.cpp              |    8 +-
 reg-lib/cpu/_reg_globalTrans.h                |    6 +-
 reg-lib/cpu/_reg_kld.cpp                      |   40 +-
 reg-lib/cpu/_reg_kld.h                        |   23 +-
 reg-lib/cpu/_reg_lncc.cpp                     |  133 +-
 reg-lib/cpu/_reg_lncc.h                       |   19 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  106 +-
 reg-lib/cpu/_reg_localTrans.h                 |   10 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  132 +-
 reg-lib/cpu/_reg_localTrans_jac.h             |    6 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp         |    6 +-
 reg-lib/cpu/_reg_localTrans_regul.h           |    4 +-
 reg-lib/cpu/_reg_macros.h                     |   60 +-
 reg-lib/cpu/_reg_maths.cpp                    |    8 +-
 reg-lib/cpu/_reg_maths.h                      |    5 +-
 reg-lib/cpu/_reg_maths_eigen.cpp              |    2 +-
 reg-lib/cpu/_reg_maths_eigen.h                |    6 +-
 reg-lib/cpu/_reg_measure.h                    |   24 +-
 reg-lib/cpu/_reg_mind.cpp                     |   60 +-
 reg-lib/cpu/_reg_mind.h                       |   15 +-
 reg-lib/cpu/_reg_mrf.cpp                      |   46 +-
 reg-lib/cpu/_reg_mrf.h                        |    6 +-
 reg-lib/cpu/_reg_nmi.cpp                      |   77 +-
 reg-lib/cpu/_reg_nmi.h                        |   15 +-
 reg-lib/cpu/_reg_optimiser.cpp                |  112 +-
 reg-lib/cpu/_reg_optimiser.h                  |   27 +-
 reg-lib/cpu/_reg_polyAffine.cpp               |    5 -
 reg-lib/cpu/_reg_polyAffine.h                 |    5 +-
 reg-lib/cpu/_reg_resampling.cpp               |   51 +-
 reg-lib/cpu/_reg_resampling.h                 |   17 +-
 reg-lib/cpu/_reg_splineBasis.cpp              |   15 +-
 reg-lib/cpu/_reg_splineBasis.h                |    5 +-
 reg-lib/cpu/_reg_ssd.cpp                      |   54 +-
 reg-lib/cpu/_reg_ssd.h                        |   20 +-
 reg-lib/cpu/_reg_thinPlateSpline.cpp          |   39 +-
 reg-lib/cpu/_reg_thinPlateSpline.h            |    5 +-
 reg-lib/cpu/_reg_tools.cpp                    |   36 +-
 reg-lib/cpu/_reg_tools.h                      |   14 +-
 reg-lib/cuda/CMakeLists.txt                   |   20 +-
 .../cuda/CUDAAffineDeformationFieldKernel.h   |   26 -
 reg-lib/cuda/CUDAAladinContent.cpp            |  561 ----
 reg-lib/cuda/CUDAAladinContent.h              |  114 -
 reg-lib/cuda/CUDAContextSingletton.cpp        |  134 -
 reg-lib/cuda/CUDAContextSingletton.h          |   38 -
 reg-lib/cuda/CUDAConvolutionKernel.h          |   25 -
 reg-lib/cuda/CUDAKernelFactory.cpp            |   16 -
 reg-lib/cuda/CUDAKernelFactory.h              |   10 -
 reg-lib/cuda/CUDAOptimiseKernel.h             |   28 -
 reg-lib/cuda/CUDAPlatform.h                   |   15 -
 ...p => CudaAffineDeformationFieldKernel.cpp} |   20 +-
 .../cuda/CudaAffineDeformationFieldKernel.h   |   23 +
 reg-lib/cuda/CudaAladinContent.cpp            |  525 ++++
 reg-lib/cuda/CudaAladinContent.h              |  112 +
 ...Kernel.cpp => CudaBlockMatchingKernel.cpp} |   24 +-
 ...hingKernel.h => CudaBlockMatchingKernel.h} |   15 +-
 reg-lib/cuda/CudaContextSingleton.cpp         |  125 +
 reg-lib/cuda/CudaContextSingleton.h           |   34 +
 ...onKernel.cpp => CudaConvolutionKernel.cpp} |    8 +-
 reg-lib/cuda/CudaConvolutionKernel.h          |   22 +
 reg-lib/cuda/CudaKernelFactory.cpp            |   16 +
 reg-lib/cuda/CudaKernelFactory.h              |    9 +
 ...imiseKernel.cpp => CudaOptimiseKernel.cpp} |   38 +-
 reg-lib/cuda/CudaOptimiseKernel.h             |   25 +
 ...Kernel.cpp => CudaResampleImageKernel.cpp} |   20 +-
 ...mageKernel.h => CudaResampleImageKernel.h} |   19 +-
 reg-lib/cuda/_reg_blocksize_gpu.cu            |    7 +-
 reg-lib/cuda/_reg_blocksize_gpu.h             |    7 +-
 reg-lib/cuda/_reg_common_cuda.cu              |   11 +-
 reg-lib/cuda/_reg_common_cuda.h               |    4 +-
 reg-lib/cuda/_reg_cudainfo.h                  |    5 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp                 |  211 +-
 reg-lib/cuda/_reg_f3d_gpu.h                   |    5 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |    7 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |    5 +-
 .../cuda/_reg_globalTransformation_kernels.cu |    5 -
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |   32 +-
 reg-lib/cuda/_reg_localTransformation_gpu.h   |    4 +-
 .../cuda/_reg_localTransformation_kernels.cu  |    4 -
 reg-lib/cuda/_reg_measure_gpu.h               |    4 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |   15 +-
 reg-lib/cuda/_reg_nmi_gpu.h                   |    5 +-
 reg-lib/cuda/_reg_nmi_kernels.cu              |    5 -
 reg-lib/cuda/_reg_optimiser_gpu.cu            |   41 +-
 reg-lib/cuda/_reg_optimiser_gpu.h             |   18 +-
 reg-lib/cuda/_reg_optimiser_kernels.cu        |    5 -
 reg-lib/cuda/_reg_resampling_gpu.cu           |    9 +-
 reg-lib/cuda/_reg_resampling_gpu.h            |    4 +-
 reg-lib/cuda/_reg_resampling_kernels.cu       |    4 -
 reg-lib/cuda/_reg_ssd_gpu.cu                  |    8 +-
 reg-lib/cuda/_reg_ssd_gpu.h                   |    5 +-
 reg-lib/cuda/_reg_ssd_kernels.cu              |    6 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |   24 +-
 reg-lib/cuda/_reg_tools_gpu.h                 |    5 +-
 reg-lib/cuda/_reg_tools_kernels.cu            |    5 -
 reg-lib/cuda/blockMatchingKernel.cu           |    4 -
 reg-lib/cuda/blockMatchingKernel.h            |    7 +-
 reg-lib/cuda/optimizeKernel.cu                |    4 +-
 reg-lib/cuda/optimizeKernel.h                 |    4 +-
 reg-lib/cuda/resampleKernel.cu                |    2 +-
 .../reg_test_affine_deformation_field.cpp     |   22 +-
 reg-test/reg_test_blockMatching.cpp           |   25 +-
 .../reg_test_bspline_deformation_field.cpp    |   11 +-
 reg-test/reg_test_changeDataType.cpp          |    4 +-
 ...est_coherence_affine_deformation_field.cpp |   28 +-
 reg-test/reg_test_coherence_blockMatching.cpp |   33 +-
 reg-test/reg_test_coherence_interpolation.cpp |   44 +-
 .../reg_test_compose_deformation_field.cpp    |    6 +-
 reg-test/reg_test_computation_time.cpp        |   19 +-
 reg-test/reg_test_convolution.cpp             |    5 +-
 reg-test/reg_test_fullAffine.cpp              |    7 +-
 reg-test/reg_test_fullAffine_cl.cpp           |    9 +-
 reg-test/reg_test_fullAffine_cuda.cpp         |    7 +-
 reg-test/reg_test_fullNonlinear.cpp           |    7 +-
 reg-test/reg_test_fullSymNonlinear.cpp        |    7 +-
 reg-test/reg_test_imageGradient.cpp           |    8 +-
 reg-test/reg_test_interpolation.cpp           |   63 +-
 reg-test/reg_test_leastTrimmedSquares.cpp     |   21 +-
 reg-test/reg_test_linearElasticity.cpp        |    5 +-
 .../reg_test_linearElasticityGradient.cpp     |    7 +-
 reg-test/reg_test_measure.cpp                 |   14 +-
 reg-test/reg_test_mindDescriptor.cpp          |    5 +-
 reg-test/reg_test_mindsscDescriptor.cpp       |    4 +-
 .../reg_test_nonlinear_deformation_field.cpp  |    8 +-
 reg-test/reg_test_svd_cuda.cpp                |    4 +-
 237 files changed, 6584 insertions(+), 7668 deletions(-)
 delete mode 100755 reg-lib/cl/CLAladinContent.cpp
 delete mode 100755 reg-lib/cl/CLAladinContent.h
 delete mode 100644 reg-lib/cl/CLConvolutionKernel.h
 delete mode 100755 reg-lib/cl/CLKernelFactory.cpp
 delete mode 100755 reg-lib/cl/CLKernelFactory.h
 delete mode 100644 reg-lib/cl/CLOptimiseKernel.h
 rename reg-lib/cl/{CLAffineDeformationFieldKernel.cpp => ClAffineDeformationFieldKernel.cpp} (80%)
 rename reg-lib/cl/{CLAffineDeformationFieldKernel.h => ClAffineDeformationFieldKernel.h} (52%)
 create mode 100644 reg-lib/cl/ClAladinContent.cpp
 create mode 100644 reg-lib/cl/ClAladinContent.h
 rename reg-lib/cl/{CLBlockMatchingKernel.cpp => ClBlockMatchingKernel.cpp} (79%)
 rename reg-lib/cl/{CLBlockMatchingKernel.h => ClBlockMatchingKernel.h} (62%)
 rename reg-lib/cl/{CLContextSingletton.cpp => ClContextSingleton.cpp} (83%)
 mode change 100755 => 100644
 rename reg-lib/cl/{CLContextSingletton.h => ClContextSingleton.h} (54%)
 mode change 100755 => 100644
 rename reg-lib/cl/{CLConvolutionKernel.cpp => ClConvolutionKernel.cpp} (65%)
 create mode 100644 reg-lib/cl/ClConvolutionKernel.h
 create mode 100644 reg-lib/cl/ClKernelFactory.cpp
 create mode 100644 reg-lib/cl/ClKernelFactory.h
 rename reg-lib/cl/{CLOptimiseKernel.cpp => ClOptimiseKernel.cpp} (53%)
 create mode 100644 reg-lib/cl/ClOptimiseKernel.h
 rename reg-lib/cl/{CLResampleImageKernel.cpp => ClResampleImageKernel.cpp} (82%)
 rename reg-lib/cl/{CLResampleImageKernel.h => ClResampleImageKernel.h} (55%)
 delete mode 100644 reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp
 delete mode 100644 reg-lib/cpu/CPUAffineDeformationFieldKernel.h
 delete mode 100644 reg-lib/cpu/CPUBlockMatchingKernel.cpp
 delete mode 100644 reg-lib/cpu/CPUBlockMatchingKernel.h
 delete mode 100644 reg-lib/cpu/CPUConvolutionKernel.h
 delete mode 100755 reg-lib/cpu/CPUKernelFactory.cpp
 delete mode 100755 reg-lib/cpu/CPUKernelFactory.h
 delete mode 100644 reg-lib/cpu/CPUOptimiseKernel.cpp
 delete mode 100644 reg-lib/cpu/CPUOptimiseKernel.h
 delete mode 100644 reg-lib/cpu/CPUResampleImageKernel.h
 create mode 100644 reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
 create mode 100644 reg-lib/cpu/CpuAffineDeformationFieldKernel.h
 create mode 100644 reg-lib/cpu/CpuBlockMatchingKernel.cpp
 create mode 100644 reg-lib/cpu/CpuBlockMatchingKernel.h
 rename reg-lib/cpu/{CPUConvolutionKernel.cpp => CpuConvolutionKernel.cpp} (56%)
 create mode 100644 reg-lib/cpu/CpuConvolutionKernel.h
 create mode 100644 reg-lib/cpu/CpuKernelFactory.cpp
 create mode 100644 reg-lib/cpu/CpuKernelFactory.h
 create mode 100644 reg-lib/cpu/CpuOptimiseKernel.cpp
 create mode 100644 reg-lib/cpu/CpuOptimiseKernel.h
 rename reg-lib/cpu/{CPUResampleImageKernel.cpp => CpuResampleImageKernel.cpp} (62%)
 create mode 100644 reg-lib/cpu/CpuResampleImageKernel.h
 delete mode 100644 reg-lib/cuda/CUDAAffineDeformationFieldKernel.h
 delete mode 100755 reg-lib/cuda/CUDAAladinContent.cpp
 delete mode 100755 reg-lib/cuda/CUDAAladinContent.h
 delete mode 100644 reg-lib/cuda/CUDAContextSingletton.cpp
 delete mode 100644 reg-lib/cuda/CUDAContextSingletton.h
 delete mode 100644 reg-lib/cuda/CUDAConvolutionKernel.h
 delete mode 100755 reg-lib/cuda/CUDAKernelFactory.cpp
 delete mode 100755 reg-lib/cuda/CUDAKernelFactory.h
 delete mode 100644 reg-lib/cuda/CUDAOptimiseKernel.h
 delete mode 100755 reg-lib/cuda/CUDAPlatform.h
 rename reg-lib/cuda/{CUDAAffineDeformationFieldKernel.cpp => CudaAffineDeformationFieldKernel.cpp} (55%)
 create mode 100644 reg-lib/cuda/CudaAffineDeformationFieldKernel.h
 create mode 100644 reg-lib/cuda/CudaAladinContent.cpp
 create mode 100644 reg-lib/cuda/CudaAladinContent.h
 rename reg-lib/cuda/{CUDABlockMatchingKernel.cpp => CudaBlockMatchingKernel.cpp} (58%)
 rename reg-lib/cuda/{CUDABlockMatchingKernel.h => CudaBlockMatchingKernel.h} (54%)
 create mode 100644 reg-lib/cuda/CudaContextSingleton.cpp
 create mode 100644 reg-lib/cuda/CudaContextSingleton.h
 rename reg-lib/cuda/{CUDAConvolutionKernel.cpp => CudaConvolutionKernel.cpp} (76%)
 create mode 100644 reg-lib/cuda/CudaConvolutionKernel.h
 create mode 100644 reg-lib/cuda/CudaKernelFactory.cpp
 create mode 100644 reg-lib/cuda/CudaKernelFactory.h
 rename reg-lib/cuda/{CUDAOptimiseKernel.cpp => CudaOptimiseKernel.cpp} (67%)
 create mode 100644 reg-lib/cuda/CudaOptimiseKernel.h
 rename reg-lib/cuda/{CUDAResampleImageKernel.cpp => CudaResampleImageKernel.cpp} (75%)
 rename reg-lib/cuda/{CUDAResampleImageKernel.h => CudaResampleImageKernel.h} (50%)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 58c9bdf9..0a3e7b04 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-111
+126
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index d0cf1578..9b6d8984 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -98,7 +98,7 @@ void Usage(char *exec)
 //   reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    sprintf(text,"\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
           defaultOpenMPValue, omp_get_num_procs());
@@ -129,25 +129,25 @@ int main(int argc, char **argv)
 
    int symFlag=1;
 
-   char *referenceImageName=NULL;
+   char *referenceImageName=nullptr;
    int referenceImageFlag=0;
 
-   char *floatingImageName=NULL;
+   char *floatingImageName=nullptr;
    int floatingImageFlag=0;
 
-   char *outputAffineName=NULL;
+   char *outputAffineName=nullptr;
    int outputAffineFlag=0;
 
-   char *inputAffineName=NULL;
+   char *inputAffineName=nullptr;
    int inputAffineFlag=0;
 
-   char *referenceMaskName=NULL;
+   char *referenceMaskName=nullptr;
    int referenceMaskFlag=0;
 
-   char *floatingMaskName=NULL;
+   char *floatingMaskName=nullptr;
    int floatingMaskFlag=0;
 
-   char *outputResultName=NULL;
+   char *outputResultName=nullptr;
    int outputResultFlag=0;
 
    int maxIter=5;
@@ -179,7 +179,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -445,7 +445,7 @@ int main(int argc, char **argv)
 
    /* Read the reference image and check its dimension */
    nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName);
-   if(referenceHeader == NULL)
+   if(referenceHeader == nullptr)
    {
       sprintf(text,"Error when reading the reference image: %s", referenceImageName);
       reg_print_msg_error(text);
@@ -454,7 +454,7 @@ int main(int argc, char **argv)
 
    /* Read the floating image and check its dimension */
    nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName);
-   if(floatingHeader == NULL)
+   if(floatingHeader == nullptr)
    {
       sprintf(text,"Error when reading the floating image: %s", floatingImageName);
       reg_print_msg_error(text);
@@ -462,8 +462,8 @@ int main(int argc, char **argv)
    }
 
    // Set the reference and floating images
-   nifti_image *isoRefImage=NULL;
-   nifti_image *isoFloImage=NULL;
+   nifti_image *isoRefImage=nullptr;
+   nifti_image *isoFloImage=nullptr;
    if(iso)
    {
       // make the images isotropic if required
@@ -479,12 +479,12 @@ int main(int argc, char **argv)
    }
 
    /* read the reference mask image */
-   nifti_image *referenceMaskImage=NULL;
-   nifti_image *isoRefMaskImage=NULL;
+   nifti_image *referenceMaskImage=nullptr;
+   nifti_image *isoRefMaskImage=nullptr;
    if(referenceMaskFlag)
    {
       referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
-      if(referenceMaskImage == NULL)
+      if(referenceMaskImage == nullptr)
       {
          sprintf(text,"Error when reading the reference mask image: %s", referenceMaskName);
          reg_print_msg_error(text);
@@ -508,12 +508,12 @@ int main(int argc, char **argv)
       else REG->SetInputMask(referenceMaskImage);
    }
    /* Read the floating mask image */
-   nifti_image *floatingMaskImage=NULL;
-   nifti_image *isoFloMaskImage=NULL;
+   nifti_image *floatingMaskImage=nullptr;
+   nifti_image *isoFloMaskImage=nullptr;
    if(floatingMaskFlag && symFlag)
    {
       floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
-      if(floatingMaskImage == NULL)
+      if(floatingMaskImage == nullptr)
       {
          sprintf(text,"Error when reading the floating mask image: %s", floatingMaskName);
          reg_print_msg_error(text);
@@ -550,9 +550,9 @@ int main(int argc, char **argv)
    REG->SetBlockPercentage(blockPercentage);
    REG->SetInlierLts(inlierLts);
    REG->SetInterpolation(interpolation);
-   REG->setCaptureRangeVox(captureRangeVox);
-   REG->setPlatformCode(platformFlag);
-   REG->setGpuIdx(gpuIdx);
+   REG->SetCaptureRangeVox(captureRangeVox);
+   REG->SetPlatformCode(platformFlag);
+   REG->SetGpuIdx(gpuIdx);
 
    if (referenceLowerThr != referenceUpperThr)
    {
@@ -618,17 +618,17 @@ int main(int argc, char **argv)
 
    nifti_image_free(referenceHeader);
    nifti_image_free(floatingHeader);
-   if(isoRefImage!=NULL)
+   if(isoRefImage!=nullptr)
       nifti_image_free(isoRefImage);
-   if(isoFloImage!=NULL)
+   if(isoFloImage!=nullptr)
       nifti_image_free(isoFloImage);
-   if(referenceMaskImage!=NULL)
+   if(referenceMaskImage!=nullptr)
       nifti_image_free(referenceMaskImage);
-   if(floatingMaskImage!=NULL)
+   if(floatingMaskImage!=nullptr)
       nifti_image_free(floatingMaskImage);
-   if(isoRefMaskImage!=NULL)
+   if(isoRefMaskImage!=nullptr)
       nifti_image_free(isoRefMaskImage);
-   if(isoFloMaskImage!=NULL)
+   if(isoFloMaskImage!=nullptr)
       nifti_image_free(isoFloMaskImage);
 
    delete REG;
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index ab887b2d..e4b88244 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -117,7 +117,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
                                float lts_inlier=1.f)
 {
    // Read all input images
-   mat44 *matrices=NULL;
+   mat44 *matrices=nullptr;
    matrices = (mat44 *)malloc(matrixNumber*sizeof(mat44));
    for(size_t m=0; m<matrixNumber; ++m)
       reg_tool_ReadAffineFile(&matrices[m],inputAffName[m]);
@@ -231,7 +231,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
    // Free the allocated array
    free(matrixWeight);
    free(matrixIndexSorted);
-   if(matrices!=NULL) free(matrices);
+   if(matrices!=nullptr) free(matrices);
    return average_matrix;
 }
 
@@ -247,7 +247,7 @@ mat44 compute_affine_demean(size_t matrixNumber,
       reg_tool_ReadAffineFile(&current_affine,inputAffName[m]);
       // extract the rigid matrix from the affine
       float qb,qc,qd,qx,qy,qz,qfac;
-      nifti_mat44_to_quatern(current_affine,&qb,&qc,&qd,&qx,&qy,&qz,NULL,NULL,NULL,&qfac);
+      nifti_mat44_to_quatern(current_affine,&qb,&qc,&qd,&qx,&qy,&qz,nullptr,nullptr,nullptr,&qfac);
       tempMatrix=nifti_quatern_to_mat44(qb,qc,qd,qx,qy,qz,1.f,1.f,1.f,qfac);
       // remove the rigid componenent from the affine matrix
       tempMatrix=nifti_mat44_inverse(tempMatrix);
@@ -268,7 +268,7 @@ mat44 compute_affine_demean(size_t matrixNumber,
 int compute_nrr_demean(nifti_image *demean_field,
                        size_t transformationNumber,
                        char **inputNRRName,
-                       char **inputAffName=NULL)
+                       char **inputAffName=nullptr)
 {
    // Set the demean field to zero
    reg_tools_multiplyValueToImage(demean_field,demean_field,0.f);
@@ -290,15 +290,15 @@ int compute_nrr_demean(nifti_image *demean_field,
       case DISP_FIELD:
          reg_getDeformationFromDisplacement(transformation);
       case DEF_FIELD:
-         reg_defField_compose(transformation,deformationField,NULL);
+         reg_defField_compose(transformation,deformationField,nullptr);
          break;
       case CUB_SPLINE_GRID:
-         reg_spline_getDeformationField(transformation,deformationField,NULL,true,true);
+         reg_spline_getDeformationField(transformation,deformationField,nullptr,true,true);
          break;
       case DISP_VEL_FIELD:
          reg_getDeformationFromDisplacement(transformation);
       case DEF_VEL_FIELD:
-         reg_defField_compose(transformation,deformationField,NULL);
+         reg_defField_compose(transformation,deformationField,nullptr);
          break;
       case SPLINE_VEL_GRID:
          reg_spline_getFlowFieldFromVelocityGrid(transformation,deformationField);
@@ -309,7 +309,7 @@ int compute_nrr_demean(nifti_image *demean_field,
          return EXIT_FAILURE;
       }
       // The affine component is removed
-      if(inputAffName!=NULL || transformation->num_ext>0){
+      if(inputAffName!=nullptr || transformation->num_ext>0){
          mat44 affineTransformation;
          if(transformation->num_ext>0)
          {
@@ -351,21 +351,21 @@ int compute_nrr_demean(nifti_image *demean_field,
 int compute_average_image(nifti_image *averageImage,
                           size_t imageNumber,
                           char **inputImageName,
-                          char **inputAffName=NULL,
-                          char **inputNRRName=NULL,
+                          char **inputAffName=nullptr,
+                          char **inputNRRName=nullptr,
                           bool demean=false,
                           int interpolation_order=3)
 {
    // Compute the matrix required for demeaning if required
    mat44 demeanMatrix;
-   nifti_image *demeanField = NULL;
-   if(demean && inputAffName!=NULL && inputNRRName==NULL){
+   nifti_image *demeanField = nullptr;
+   if(demean && inputAffName!=nullptr && inputNRRName==nullptr){
       demeanMatrix = compute_affine_demean(imageNumber, inputAffName);
 #ifndef NDEBUG
       reg_print_msg_debug("Matrix to use for demeaning computed");
 #endif
    }
-   if(demean && inputNRRName!=NULL){
+   if(demean && inputNRRName!=nullptr){
       demeanField=nifti_copy_nim_info(averageImage);
       demeanField->ndim=demeanField->dim[0]=5;
       demeanField->nt=demeanField->dim[4]=1;
@@ -416,16 +416,16 @@ int compute_average_image(nifti_image *averageImage,
       // Set the transformation to identity
       reg_getDeformationFromDisplacement(deformationField);
       // Compute the transformation if required
-      if(inputNRRName!=NULL){
+      if(inputNRRName!=nullptr){
          nifti_image *current_transformation = reg_io_ReadImageFile(inputNRRName[i]);
          switch(static_cast<int>(current_transformation->intent_p1)){
          case DISP_FIELD:
             reg_getDeformationFromDisplacement(current_transformation);
          case DEF_FIELD:
-            reg_defField_compose(current_transformation, deformationField, NULL);
+            reg_defField_compose(current_transformation, deformationField, nullptr);
             break;
          case CUB_SPLINE_GRID:
-            reg_spline_getDeformationField(current_transformation, deformationField, NULL, true, true);
+            reg_spline_getDeformationField(current_transformation, deformationField, nullptr, true, true);
             break;
          case SPLINE_VEL_GRID:
             if(current_transformation->num_ext>0)
@@ -435,13 +435,13 @@ int compute_average_image(nifti_image *averageImage,
          case DISP_VEL_FIELD:
             reg_getDeformationFromDisplacement(current_transformation);
          case DEF_VEL_FIELD:
-            reg_defField_compose(current_transformation,deformationField,NULL);
+            reg_defField_compose(current_transformation,deformationField,nullptr);
             break;
          default: reg_print_msg_error("Unsupported transformation type")
                   reg_exit();
          }
          nifti_image_free(current_transformation);
-         if(demeanField!=NULL){
+         if(demeanField!=nullptr){
             if(deformationField->intent_p1==DEF_VEL_FIELD){
                reg_tools_substractImageToImage(deformationField,demeanField,deformationField);
                nifti_image *tempDef = nifti_copy_nim_info(deformationField);
@@ -460,10 +460,10 @@ int compute_average_image(nifti_image *averageImage,
 #endif
          }
       }
-      else if(inputAffName!=NULL){
+      else if(inputAffName!=nullptr){
          mat44 current_affine;
          reg_tool_ReadAffineFile(&current_affine,inputAffName[i]);
-         if(demean && inputAffName!=NULL && inputNRRName==NULL){
+         if(demean && inputAffName!=nullptr && inputNRRName==nullptr){
             current_affine = demeanMatrix * current_affine;
 #ifndef NDEBUG
       reg_print_msg_debug("Input affine transformation has been demeaned");
@@ -483,7 +483,7 @@ int compute_average_image(nifti_image *averageImage,
       reg_resampleImage(current_input_image,
                         warpedImage,
                         deformationField,
-                        NULL,
+                        nullptr,
                         interpolation_order,
                         std::numeric_limits<float>::quiet_NaN());
       nifti_image_free(deformationField);
@@ -493,7 +493,7 @@ int compute_average_image(nifti_image *averageImage,
       nifti_image_free(warpedImage);
    }
    // Clear the allocated demeanField if needed
-   if(demeanField!=NULL) nifti_image_free(demeanField);
+   if(demeanField!=nullptr) nifti_image_free(demeanField);
    // Normalised the average image
    reg_tools_divideImageToImage(averageImage,definedValue, averageImage);
    nifti_image_free(definedValue);
@@ -511,7 +511,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -548,12 +548,12 @@ int main(int argc, char **argv)
    }
 
    // Check if a command text file is provided
-   char **pointer_to_command = NULL;
+   char **pointer_to_command = nullptr;
    int arg_num_command = 0;
    if(strcmp(argv[1],"--cmd_file")==0 && argc==3){
       char buffer[512];
       FILE *cmd_file = fopen(argv[2], "r+");
-      if(cmd_file==NULL){
+      if(cmd_file==nullptr){
          reg_print_msg_error("Error when reading the provided command line file:");
          reg_print_msg_error(argv[2]);
          reg_exit();
@@ -638,7 +638,7 @@ int main(int argc, char **argv)
    int operation;
    bool use_demean=false;
    size_t image_number=0;
-   char *referenceImageName=NULL;
+   char *referenceImageName=nullptr;
 
    // Set the name of the file to output
    char *outputName = pointer_to_command[1];
@@ -696,9 +696,9 @@ int main(int argc, char **argv)
    }
 
    // Parse the input data
-   char **input_image_names = NULL;
-   char **input_affine_names = NULL;
-   char **input_nonrigid_names = NULL;
+   char **input_image_names = nullptr;
+   char **input_affine_names = nullptr;
+   char **input_nonrigid_names = nullptr;
    if(operation!=AVG_INPUT || trans_is_affine==false){
       input_image_names = (char **)malloc(image_number*sizeof(char *));
    }
@@ -743,7 +743,7 @@ int main(int argc, char **argv)
    }
 
    mat44 avg_output_matrix;
-   nifti_image *avg_output_image=NULL;
+   nifti_image *avg_output_image=nullptr;
 
    // Go over the different operations
    if(operation==AVG_INPUT && trans_is_affine==true){
@@ -756,7 +756,7 @@ int main(int argc, char **argv)
    }
    else{
       // Allocate the average warped image
-      if(referenceImageName==NULL)
+      if(referenceImageName==nullptr)
          referenceImageName=input_image_names[0];
       avg_output_image = reg_io_ReadImageFile(referenceImageName);
       // clean the data and reallocate them
@@ -781,7 +781,7 @@ int main(int argc, char **argv)
                             interpolation_order);
    }
    // Save the output
-   if(avg_output_image==NULL)
+   if(avg_output_image==nullptr)
       reg_tool_WriteAffineFile(&avg_output_matrix, outputName);
    else reg_io_WriteImageFile(avg_output_image, outputName);
 
@@ -791,15 +791,15 @@ int main(int argc, char **argv)
          free(pointer_to_command[i]);
       free(pointer_to_command);
    }
-   if(avg_output_image!=NULL)
+   if(avg_output_image!=nullptr)
       nifti_image_free(avg_output_image);
-   if(input_image_names!=NULL){
+   if(input_image_names!=nullptr){
       free(input_image_names);
    }
-   if(input_affine_names!=NULL){
+   if(input_affine_names!=nullptr){
       free(input_affine_names);
    }
-   if(input_nonrigid_names!=NULL){
+   if(input_nonrigid_names!=nullptr){
       free(input_nonrigid_names);
    }
 
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 64fb6d47..7593edab 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -156,7 +156,7 @@ void Usage(char *exec)
    reg_print_info(exec, "");
    reg_print_info(exec, "*** OpenMP-related options:");
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    sprintf(text,"\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
            defaultOpenMPValue, omp_get_num_procs());
@@ -188,7 +188,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -255,14 +255,14 @@ int main(int argc, char **argv)
 
    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
    // Read the reference and floating image
-   nifti_image *referenceImage=NULL;
-   nifti_image *floatingImage=NULL;
+   nifti_image *referenceImage=nullptr;
+   nifti_image *floatingImage=nullptr;
    for(int i=1; i<argc; i++)
    {
       if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) || (strcmp(argv[i],"--ref")==0))
       {
          referenceImage=reg_io_ReadImageFile(argv[++i]);
-         if(referenceImage==NULL)
+         if(referenceImage==nullptr)
          {
             reg_print_msg_error("Error when reading the reference image:");
             reg_print_msg_error(argv[i-1]);
@@ -272,7 +272,7 @@ int main(int argc, char **argv)
       if((strcmp(argv[i],"-flo")==0) || (strcmp(argv[i],"-source")==0) || (strcmp(argv[i],"--flo")==0))
       {
          floatingImage=reg_io_ReadImageFile(argv[++i]);
-         if(floatingImage==NULL)
+         if(floatingImage==nullptr)
          {
             reg_print_msg_error("Error when reading the floating image:");
             reg_print_msg_error(argv[i-1]);
@@ -281,14 +281,14 @@ int main(int argc, char **argv)
       }
    }
    // Check that both reference and floating image have been defined
-   if(referenceImage==NULL)
+   if(referenceImage==nullptr)
    {
       reg_print_msg_error("Error. No reference image has been defined");
       PetitUsage((argv[0]));
       return EXIT_FAILURE;
    }
    // Read the floating image
-   if(floatingImage==NULL)
+   if(floatingImage==nullptr)
    {
       reg_print_msg_error("Error. No floating image has been defined");
       PetitUsage((argv[0]));
@@ -299,9 +299,9 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    CUcontext ctx;
 #endif // _USE_CUDA
-   reg_f3d<float> *REG=NULL;
-   float *referenceLandmark=NULL;
-   float *floatingLandmark=NULL;
+   reg_f3d<float> *REG=nullptr;
+   float *referenceLandmark=nullptr;
+   float *floatingLandmark=nullptr;
    for(int i=1; i<argc; i++)
    {
       if(strcmp(argv[i], "-vel")==0 || strcmp(argv[i], "--vel")==0)
@@ -326,19 +326,19 @@ int main(int argc, char **argv)
       }
 #endif // _USE_CUDA
    }
-   if(REG==NULL)
+   if(REG==nullptr)
       REG=new reg_f3d<float>(referenceImage->nt,floatingImage->nt);
    REG->SetReferenceImage(referenceImage);
    REG->SetFloatingImage(floatingImage);
 
    // Create some pointers that could be used
    mat44 affineMatrix;
-   nifti_image *inputCCPImage=NULL;
-   nifti_image *referenceMaskImage=NULL;
-   nifti_image *floatingMaskImage=NULL;
-   nifti_image *refLocalWeightSim=NULL;
-   char *outputWarpedImageName=NULL;
-   char *outputCPPImageName=NULL;
+   nifti_image *inputCCPImage=nullptr;
+   nifti_image *referenceMaskImage=nullptr;
+   nifti_image *floatingMaskImage=nullptr;
+   nifti_image *refLocalWeightSim=nullptr;
+   char *outputWarpedImageName=nullptr;
+   char *outputCPPImageName=nullptr;
    bool useMeanLNCC=false;
    int refBinNumber=0;
    int floBinNumber=0;
@@ -381,7 +381,7 @@ int main(int argc, char **argv)
       else if(strcmp(argv[i], "-incpp")==0 || (strcmp(argv[i],"--incpp")==0))
       {
          inputCCPImage=reg_io_ReadImageFile(argv[++i]);
-         if(inputCCPImage==NULL)
+         if(inputCCPImage==nullptr)
          {
             reg_print_msg_error("Error when reading the input control point grid image:");
             reg_print_msg_error(argv[i-1]);
@@ -392,7 +392,7 @@ int main(int argc, char **argv)
       else if((strcmp(argv[i],"-rmask")==0) || (strcmp(argv[i],"-tmask")==0) || (strcmp(argv[i],"--rmask")==0))
       {
          referenceMaskImage=reg_io_ReadImageFile(argv[++i]);
-         if(referenceMaskImage==NULL)
+         if(referenceMaskImage==nullptr)
          {
             reg_print_msg_error("Error when reading the reference mask image:");
             reg_print_msg_error(argv[i-1]);
@@ -744,7 +744,7 @@ int main(int argc, char **argv)
          switch(interp)
          {
          case 0:
-            REG->UseNeareatNeighborInterpolation();
+            REG->UseNearestNeighborInterpolation();
             break;
          case 1:
             REG->UseLinearInterpolation();
@@ -758,7 +758,7 @@ int main(int argc, char **argv)
               (strcmp(argv[i],"--fmask")==0) || (strcmp(argv[i],"--smask")==0))
       {
          floatingMaskImage=reg_io_ReadImageFile(argv[++i]);
-         if(floatingMaskImage==NULL)
+         if(floatingMaskImage==nullptr)
          {
             reg_print_msg_error("Error when reading the floating mask image:");
             reg_print_msg_error(argv[i-1]);
@@ -851,14 +851,14 @@ int main(int argc, char **argv)
 
    // Save the control point image
    nifti_image *outputControlPointGridImage = REG->GetControlPointPositionImage();
-   if(outputCPPImageName==NULL) outputCPPImageName=(char *)"outputCPP.nii";
+   if(outputCPPImageName==nullptr) outputCPPImageName=(char *)"outputCPP.nii";
    memset(outputControlPointGridImage->descrip, 0, 80);
    strcpy (outputControlPointGridImage->descrip,"Control point position from NiftyReg (reg_f3d)");
    if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0)
       strcpy (outputControlPointGridImage->descrip,"Velocity field grid from NiftyReg (reg_f3d2)");
    reg_io_WriteImageFile(outputControlPointGridImage,outputCPPImageName);
    nifti_image_free(outputControlPointGridImage);
-   outputControlPointGridImage=NULL;
+   outputControlPointGridImage=nullptr;
 
    // Save the backward control point image
    if(REG->GetSymmetricStatus())
@@ -887,12 +887,12 @@ int main(int argc, char **argv)
          strcpy (outputBackwardControlPointGridImage->descrip,"Backward velocity field grid from NiftyReg (reg_f3d2)");
       reg_io_WriteImageFile(outputBackwardControlPointGridImage,b.c_str());
       nifti_image_free(outputBackwardControlPointGridImage);
-      outputBackwardControlPointGridImage=NULL;
+      outputBackwardControlPointGridImage=nullptr;
    }
 
    // Save the warped image(s)
    nifti_image **outputWarpedImage = REG->GetWarpedImage();
-   if(outputWarpedImageName==NULL)
+   if(outputWarpedImageName==nullptr)
       outputWarpedImageName=(char *)"outputResult.nii";
    memset(outputWarpedImage[0]->descrip, 0, 80);
    strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d)");
@@ -903,7 +903,7 @@ int main(int argc, char **argv)
    }
    if(REG->GetSymmetricStatus())
    {
-      if(outputWarpedImage[1]!=NULL)
+      if(outputWarpedImage[1]!=nullptr)
       {
          std::string b(outputWarpedImageName);
          if(b.find( ".nii.gz") != std::string::npos)
@@ -925,14 +925,14 @@ int main(int argc, char **argv)
       }
    }
    reg_io_WriteImageFile(outputWarpedImage[0],outputWarpedImageName);
-   if(outputWarpedImage[0]!=NULL)
+   if(outputWarpedImage[0]!=nullptr)
       nifti_image_free(outputWarpedImage[0]);
-   outputWarpedImage[0]=NULL;
-   if(outputWarpedImage[1]!=NULL)
+   outputWarpedImage[0]=nullptr;
+   if(outputWarpedImage[1]!=nullptr)
       nifti_image_free(outputWarpedImage[1]);
-   outputWarpedImage[1]=NULL;
+   outputWarpedImage[1]=nullptr;
    free(outputWarpedImage);
-   outputWarpedImage=NULL;
+   outputWarpedImage=nullptr;
    // Free the allocated landmarks if used
    free(referenceLandmark);
    free(floatingLandmark);
@@ -945,12 +945,12 @@ int main(int argc, char **argv)
 #endif
 
    // Clean the allocated images
-   if(refLocalWeightSim!=NULL) nifti_image_free(refLocalWeightSim);
-   if(referenceImage!=NULL) nifti_image_free(referenceImage);
-   if(floatingImage!=NULL) nifti_image_free(floatingImage);
-   if(inputCCPImage!=NULL) nifti_image_free(inputCCPImage);
-   if(referenceMaskImage!=NULL) nifti_image_free(referenceMaskImage);
-   if(floatingMaskImage!=NULL) nifti_image_free(floatingMaskImage);
+   if(refLocalWeightSim!=nullptr) nifti_image_free(refLocalWeightSim);
+   if(referenceImage!=nullptr) nifti_image_free(referenceImage);
+   if(floatingImage!=nullptr) nifti_image_free(floatingImage);
+   if(inputCCPImage!=nullptr) nifti_image_free(inputCCPImage);
+   if(referenceMaskImage!=nullptr) nifti_image_free(referenceMaskImage);
+   if(floatingMaskImage!=nullptr) nifti_image_free(floatingMaskImage);
 
 #ifdef NDEBUG
    if(verbose)
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index e7fea4b3..e5adc0d5 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -119,7 +119,7 @@ void Usage(char *exec)
    printf("\t\tFilename of the Log of the Jacobian determinant map.\n");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
           defaultOpenMPValue, omp_get_num_procs());
@@ -142,7 +142,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -228,7 +228,7 @@ int main(int argc, char **argv)
    /* ******************* */
    /* READ TRANSFORMATION */
    /* ******************* */
-   nifti_image *inputTransformation=NULL;
+   nifti_image *inputTransformation=nullptr;
    if(flag->inputTransFlag)
    {
       // Check of the input transformation is an affine
@@ -240,7 +240,7 @@ int main(int argc, char **argv)
       }
 
       inputTransformation = reg_io_ReadImageFile(param->inputTransName);
-      if(inputTransformation == NULL)
+      if(inputTransformation == nullptr)
       {
          fprintf(stderr,"** ERROR Error when reading the transformation image: %s\n",param->inputTransName);
          return EXIT_FAILURE;
@@ -256,7 +256,7 @@ int main(int argc, char **argv)
    /* COMPUTE JACOBIAN MAT OR DET */
    /* *************************** */
    // Create a deformation field if needed
-   nifti_image *referenceImage=NULL;
+   nifti_image *referenceImage=nullptr;
    if(inputTransformation->intent_p1==LIN_SPLINE_GRID ||
          inputTransformation->intent_p1==CUB_SPLINE_GRID ||
          inputTransformation->intent_p1==SPLINE_VEL_GRID){
@@ -266,7 +266,7 @@ int main(int argc, char **argv)
       }
       // Read the reference image
       referenceImage = reg_io_ReadImageHeader(param->refImageName);
-      if(referenceImage == NULL)
+      if(referenceImage == nullptr)
       {
          reg_print_msg_error("Error when reading the reference image.");
          reg_exit();
@@ -276,10 +276,10 @@ int main(int argc, char **argv)
    if(flag->outputJacDetFlag || flag->outputLogDetFlag){
       // Compute the map of Jacobian determinant
       // Create the Jacobian image
-      nifti_image *jacobianImage=NULL;
-      if(referenceImage!=NULL){
+      nifti_image *jacobianImage=nullptr;
+      if(referenceImage!=nullptr){
          jacobianImage=nifti_copy_nim_info(referenceImage);
-         nifti_image_free(referenceImage);referenceImage=NULL;
+         nifti_image_free(referenceImage);referenceImage=nullptr;
       }
       else jacobianImage=nifti_copy_nim_info(inputTransformation);
       jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2;
@@ -327,14 +327,14 @@ int main(int argc, char **argv)
          }
          reg_io_WriteImageFile(jacobianImage,param->outputLogDetName);
       }
-      nifti_image_free(jacobianImage);jacobianImage=NULL;
+      nifti_image_free(jacobianImage);jacobianImage=nullptr;
    }
    if(flag->outputJacMatFlag){
 
-      nifti_image *jacobianImage=NULL;
-      if(referenceImage!=NULL){
+      nifti_image *jacobianImage=nullptr;
+      if(referenceImage!=nullptr){
          jacobianImage=nifti_copy_nim_info(referenceImage);
-         nifti_image_free(referenceImage);referenceImage=NULL;
+         nifti_image_free(referenceImage);referenceImage=nullptr;
       }
       else jacobianImage=nifti_copy_nim_info(inputTransformation);
       jacobianImage->ndim=jacobianImage->dim[0]=5;
@@ -379,13 +379,13 @@ int main(int argc, char **argv)
          reg_jacobian_convertMat33ToNii<double>(jacobianMatriceArray,jacobianImage);
          break;
       }
-      free(jacobianMatriceArray);jacobianMatriceArray=NULL;
+      free(jacobianMatriceArray);jacobianMatriceArray=nullptr;
       reg_io_WriteImageFile(jacobianImage,param->outputJacMatName);
-      nifti_image_free(jacobianImage);jacobianImage=NULL;
+      nifti_image_free(jacobianImage);jacobianImage=nullptr;
    }
 
    // Free the allocated image
-   nifti_image_free(inputTransformation);inputTransformation=NULL;
+   nifti_image_free(inputTransformation);inputTransformation=nullptr;
 
    return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index 26e0d00c..e7e7fbc1 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -68,7 +68,7 @@ void Usage(char *exec)
    printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    printf("\t-omp <int>\tNumber of thread to use with OpenMP. [%i/%i]\n",
           defaultOpenMPValue, omp_get_num_procs());
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -216,7 +216,7 @@ int main(int argc, char **argv)
 
    /* Read the reference image */
    nifti_image *refImage = reg_io_ReadImageFile(param->refImageName);
-   if(refImage == NULL)
+   if(refImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
               param->refImageName);
@@ -226,7 +226,7 @@ int main(int argc, char **argv)
 
    /* Read the floating image */
    nifti_image *floImage = reg_io_ReadImageFile(param->floImageName);
-   if(floImage == NULL)
+   if(floImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
               param->floImageName);
@@ -235,11 +235,11 @@ int main(int argc, char **argv)
    reg_tools_changeDatatype<float>(floImage);
 
    /* Read and create the mask array */
-   int *refMask=NULL;
+   int *refMask=nullptr;
    int refMaskVoxNumber=refImage->nx*refImage->ny*refImage->nz;
    if(flag->refMaskImageFlag){
       nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName);
-      if(refMaskImage == NULL)
+      if(refMaskImage == nullptr)
       {
          fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n",
                  param->refMaskImageName);
@@ -292,7 +292,7 @@ int main(int argc, char **argv)
                      param->paddingValue);
    nifti_image_free(defField);
 
-   FILE *outFile=NULL;
+   FILE *outFile=nullptr;
    if(flag->outFileFlag)
       outFile=fopen(param->outFileName, "w");
 
@@ -329,7 +329,7 @@ int main(int argc, char **argv)
       warSTDValue /= (double)refMaskVoxNumber;
       measure /= sqrt(refSTDValue)*sqrt(warSTDValue)*
             (double)refMaskVoxNumber;
-      if(outFile!=NULL)
+      if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
       else printf("NCC: %g\n", measure);
    }
@@ -342,10 +342,10 @@ int main(int argc, char **argv)
                                     warpedFloImage,
                                     refMask,
                                     warpedFloImage,
-                                    NULL,
-                                    NULL);
+                                    nullptr,
+                                    nullptr);
       double measure=lncc_object->GetSimilarityMeasureValue();
-      if(outFile!=NULL)
+      if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
       else printf("LNCC: %g\n", measure);
       delete lncc_object;
@@ -359,10 +359,10 @@ int main(int argc, char **argv)
                                     warpedFloImage,
                                     refMask,
                                     warpedFloImage,
-                                    NULL,
-                                    NULL);
+                                    nullptr,
+                                    nullptr);
       double measure=nmi_object->GetSimilarityMeasureValue();
-      if(outFile!=NULL)
+      if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
       else printf("NMI: %g\n", measure);
       delete nmi_object;
@@ -376,11 +376,11 @@ int main(int argc, char **argv)
                                     warpedFloImage,
                                     refMask,
                                     warpedFloImage,
-                                    NULL,
-                                    NULL,
-                                    NULL);
+                                    nullptr,
+                                    nullptr,
+                                    nullptr);
       double measure=ssd_object->GetSimilarityMeasureValue();
-      if(outFile!=NULL)
+      if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
       else printf("SSD: %g\n", measure);
       delete ssd_object;
@@ -394,17 +394,17 @@ int main(int argc, char **argv)
                                     warpedFloImage,
                                     refMask,
                                     warpedFloImage,
-                                    NULL,
-                                    NULL);
+                                    nullptr,
+                                    nullptr);
       double measure=mind_object->GetSimilarityMeasureValue();
-      if(outFile!=NULL)
+      if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
       else printf("MIND: %g\n", measure);
       delete mind_object;
    }
 
    // Close the output file if required
-   if(outFile!=NULL)
+   if(outFile!=nullptr)
       fclose(outFile);
 
    // Free the allocated images
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 24a76cdd..b4dbc4ee 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -380,7 +380,7 @@ int main(int argc, char **argv)
    }
 
    nifti_image *image = nifti_image_read(param->sourceImageName,true);
-   if(image == NULL)
+   if(image == nullptr)
    {
       fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName);
       return EXIT_FAILURE;
@@ -388,11 +388,11 @@ int main(int argc, char **argv)
    reg_tools_changeDatatype<PrecisionTYPE>(image); // FIX DATA TYPE - DOES THIS WORK?
 
    // --- 2) READ/SET IMAGE MASK (4D VOLUME, [NS, SS]) ---
-   nifti_image *mask=NULL;
+   nifti_image *mask=nullptr;
    if(flag->pmask)
    {
       mask = nifti_image_read(param->pcaMaskName,true);
-      if(mask == NULL)
+      if(mask == nullptr)
       {
          fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName);
          return EXIT_FAILURE;
@@ -729,7 +729,7 @@ int main(int argc, char **argv)
          {
             d[k]=d[i];
             d[i]=p;
-            if(z != NULL)
+            if(z != nullptr)
                for(int j=0; j<n; j++)
                {
                   p=z[j+n*i];
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index dfe81654..c5bd8772 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -71,7 +71,7 @@ void Usage(char *exec)
    printf("\t-voff\n\t\tTurns verbose off [on]\n");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
           defaultOpenMPValue, omp_get_num_procs());
@@ -94,7 +94,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -254,7 +254,7 @@ int main(int argc, char **argv)
 
    /* Read the reference image */
    nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
-   if(referenceImage == NULL)
+   if(referenceImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
               param->referenceImageName);
@@ -263,7 +263,7 @@ int main(int argc, char **argv)
 
    /* Read the floating image */
    nifti_image *floatingImage = reg_io_ReadImageFile(param->floatingImageName);
-   if(floatingImage == NULL)
+   if(floatingImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
               param->floatingImageName);
@@ -291,7 +291,7 @@ int main(int argc, char **argv)
    /* *********************** */
    /* READ THE TRANSFORMATION */
    /* *********************** */
-   nifti_image *inputTransformationImage = NULL;
+   nifti_image *inputTransformationImage = nullptr;
    mat44 inputAffineTransformation;
    // Check if a transformation has been specified
    if(flag->inputTransFlag)
@@ -300,7 +300,7 @@ int main(int argc, char **argv)
       if(reg_isAnImageFileName(param->inputTransName))
       {
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransformationImage==NULL)
+         if(inputTransformationImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
                     param->inputTransName);
@@ -336,7 +336,7 @@ int main(int argc, char **argv)
          deformationFieldImage->nt*deformationFieldImage->nu;
    deformationFieldImage->scl_slope=1.f;
    deformationFieldImage->scl_inter=0.f;
-   if(inputTransformationImage!=NULL)
+   if(inputTransformationImage!=nullptr)
    {
       deformationFieldImage->datatype = inputTransformationImage->datatype;
       deformationFieldImage->nbyper = inputTransformationImage->nbyper;
@@ -354,7 +354,7 @@ int main(int argc, char **argv)
    deformationFieldImage->intent_p1=DEF_FIELD;
 
    // Compute the transformation to apply
-   if(inputTransformationImage!=NULL)
+   if(inputTransformationImage!=nullptr)
    {
       switch(static_cast<int>(inputTransformationImage->intent_p1))
       {
@@ -362,7 +362,7 @@ int main(int argc, char **argv)
       case CUB_SPLINE_GRID:
          reg_spline_getDeformationField(inputTransformationImage,
                                         deformationFieldImage,
-                                        NULL,
+                                        nullptr,
                                         false,
                                         true);
          break;
@@ -376,7 +376,7 @@ int main(int argc, char **argv)
                    tempFlowField->nvox*tempFlowField->nbyper);
             reg_defField_compose(inputTransformationImage,
                                  tempFlowField,
-                                 NULL);
+                                 nullptr);
             tempFlowField->intent_p1=inputTransformationImage->intent_p1;
             tempFlowField->intent_p2=inputTransformationImage->intent_p2;
             reg_defField_getDeformationFieldFromFlowField(tempFlowField,
@@ -395,18 +395,18 @@ int main(int argc, char **argv)
       default:
          reg_defField_compose(inputTransformationImage,
                               deformationFieldImage,
-                              NULL);
+                              nullptr);
          break;
       }
       nifti_image_free(inputTransformationImage);
-      inputTransformationImage=NULL;
+      inputTransformationImage=nullptr;
    }
    else
    {
       reg_affine_getDeformationField(&inputAffineTransformation,
                                      deformationFieldImage,
                                      false,
-                                     NULL);
+                                     nullptr);
    }
 
 
@@ -474,7 +474,7 @@ int main(int argc, char **argv)
          reg_resampleImage(floatingImage,
                            warpedImage,
                            deformationFieldImage,
-                           NULL,
+                           nullptr,
                            param->interpolation,
                            std::numeric_limits<float>::quiet_NaN(),
                            timepoints,
@@ -495,7 +495,7 @@ int main(int argc, char **argv)
             reg_resampleImage_PSF(floatingImage,
                                   warpedImage,
                                   deformationFieldImage,
-                                  NULL,
+                                  nullptr,
                                   param->interpolation,
                                   param->paddingValue,
                                   jacobian,
@@ -510,7 +510,7 @@ int main(int argc, char **argv)
             reg_resampleImage(floatingImage,
                               warpedImage,
                               deformationFieldImage,
-                              NULL,
+                              nullptr,
                               param->interpolation,
                               param->paddingValue);
          }
@@ -607,7 +607,7 @@ int main(int argc, char **argv)
       reg_resampleImage(gridImage,
                         warpedImage,
                         deformationFieldImage,
-                        NULL,
+                        nullptr,
                         1, // linear interpolation
                         0);
       memset(warpedImage->descrip, 0, 80);
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index bec588b0..2a98658b 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -29,10 +29,10 @@ std::vector<float> splitFloatVector(char* input)
 {
     std::vector<float> floatVector;
     char* charArray = strtok(input, ",");
-    while (charArray != NULL)
+    while (charArray != nullptr)
     {
         floatVector.push_back(atof(charArray));
-        charArray = strtok(NULL, ",");
+        charArray = strtok(nullptr, ",");
     }
 
     return floatVector;
@@ -40,7 +40,7 @@ std::vector<float> splitFloatVector(char* input)
 
 int isNumeric (const char *s)
 {
-    if(s==NULL || *s=='\0' || isspace(*s))
+    if(s==nullptr || *s=='\0' || isspace(*s))
         return EXIT_SUCCESS;
     char * p;
     strtod (s, &p);
@@ -130,7 +130,7 @@ void Usage(char *exec)
     printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    printf("\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
           defaultOpenMPValue, omp_get_num_procs());
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
     // Set the default number of thread
     int defaultOpenMPValue=omp_get_num_procs();
-    if(getenv("OMP_NUM_THREADS")!=NULL)
+    if(getenv("OMP_NUM_THREADS")!=nullptr)
         defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
     omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -464,7 +464,7 @@ int main(int argc, char **argv)
 
     /* Read the image */
     nifti_image *image = reg_io_ReadImageFile(param->inputImageName);
-    if(image == NULL)
+    if(image == nullptr)
     {
         fprintf(stderr,"** ERROR Error when reading the input image: %s\n",param->inputImageName);
         return EXIT_FAILURE;
@@ -524,24 +524,24 @@ int main(int argc, char **argv)
         bool boolX[3]= {1,0,0};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueX;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolX);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolX);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolX);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolX);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolX);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolX);
         bool boolY[3]= {0,1,0};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueY;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolY);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolY);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolY);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolY);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolY);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolY);
         bool boolZ[3]= {0,0,1};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueZ;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolZ);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolZ);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolZ);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolZ);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolZ);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolZ);
         delete []kernelSize;
         delete []timePoint;
         if(flag->outputImageFlag)
@@ -566,7 +566,7 @@ int main(int argc, char **argv)
         float varY=param->smoothValueY;
         float varZ=param->smoothValueZ;
 
-        reg_tools_labelKernelConvolution(smoothImg,varX,varY,varZ,NULL,timePoint);
+        reg_tools_labelKernelConvolution(smoothImg,varX,varY,varZ,nullptr,timePoint);
 
         delete []timePoint;
         if(flag->outputImageFlag)
@@ -579,18 +579,18 @@ int main(int argc, char **argv)
 
     if(flag->operationTypeFlag>-1)
     {
-        nifti_image *image2=NULL;
-        if(param->operationImageName!=NULL)
+        nifti_image *image2=nullptr;
+        if(param->operationImageName!=nullptr)
         {
             image2 = reg_io_ReadImageFile(param->operationImageName);
-            if(image2 == NULL)
+            if(image2 == nullptr)
             {
                 fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName);
                 return EXIT_FAILURE;
             }
         }
         // Images are converted to the higher datatype
-        if(image2!=NULL){
+        if(image2!=nullptr){
             switch(image->datatype>image2->datatype?image->datatype:image2->datatype)
             {
             case NIFTI_TYPE_UINT8:
@@ -634,7 +634,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
 
-        if(image2!=NULL)
+        if(image2!=nullptr)
         {
             switch(flag->operationTypeFlag)
             {
@@ -675,7 +675,7 @@ int main(int argc, char **argv)
         else reg_io_WriteImageFile(outputImage,"output.nii");
 
         nifti_image_free(outputImage);
-        if(image2!=NULL) nifti_image_free(image2);
+        if(image2!=nullptr) nifti_image_free(image2);
     }
 
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
@@ -683,7 +683,7 @@ int main(int argc, char **argv)
     if(flag->rmsImageFlag)
     {
         nifti_image *image2 = reg_io_ReadImageFile(param->rmsImageName);
-        if(image2 == NULL)
+        if(image2 == nullptr)
         {
             fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->rmsImageName);
             return EXIT_FAILURE;
@@ -728,7 +728,7 @@ int main(int argc, char **argv)
     if(flag->nanMaskFlag)
     {
         nifti_image *maskImage = reg_io_ReadImageFile(param->operationImageName);
-        if(maskImage == NULL)
+        if(maskImage == nullptr)
         {
             fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName);
             return EXIT_FAILURE;
@@ -916,7 +916,7 @@ int main(int argc, char **argv)
             reg_resampleImage_PSF(image,
                                   newImg,
                                   def,
-                                  NULL,
+                                  nullptr,
                                   param->interpOrder,
                                   0.f,
                                   jacobian,
@@ -929,7 +929,7 @@ int main(int argc, char **argv)
             reg_resampleImage(image,
                               newImg,
                               def,
-                              NULL,
+                              nullptr,
                               param->interpOrder,
                               0.f);
 #ifndef NDEBUG
@@ -983,13 +983,13 @@ int main(int argc, char **argv)
         }
         // Free the scaled image
         nifti_image_free(scaledImage);
-        scaledImage=NULL;
+        scaledImage=nullptr;
         // Save the rgb image
         if(flag->outputImageFlag)
             reg_io_WriteImageFile(outputImage,param->outputImageName);
         else reg_io_WriteImageFile(outputImage,"output.nii");
         nifti_image_free(outputImage);
-        outputImage=NULL;
+        outputImage=nullptr;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->bsi2rgbFlag)
@@ -1031,7 +1031,7 @@ int main(int argc, char **argv)
             reg_io_WriteImageFile(outputImage,param->outputImageName);
         else reg_io_WriteImageFile(outputImage,"output.nii");
         nifti_image_free(outputImage);
-        outputImage=NULL;
+        outputImage=nullptr;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->mindFlag)
@@ -1058,7 +1058,7 @@ int main(int argc, char **argv)
             reg_io_WriteImageFile(outputImage,param->outputImageName);
         else reg_io_WriteImageFile(outputImage,"output.nii");
         nifti_image_free(outputImage);
-        outputImage=NULL;
+        outputImage=nullptr;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->mindSSCFlag)
@@ -1085,7 +1085,7 @@ int main(int argc, char **argv)
             reg_io_WriteImageFile(outputImage,param->outputImageName);
         else reg_io_WriteImageFile(outputImage,"output.nii");
         nifti_image_free(outputImage);
-        outputImage=NULL;
+        outputImage=nullptr;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->testActiveBlocksFlag){
@@ -1189,7 +1189,7 @@ int main(int argc, char **argv)
             reg_io_WriteImageFile(outputImage,param->outputImageName);
         else reg_io_WriteImageFile(outputImage,"output.nii");
         nifti_image_free(outputImage);
-        outputImage=NULL;
+        outputImage=nullptr;
     }
 
     nifti_image_free(image);
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 30936bca..cdddf4ab 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -143,7 +143,7 @@ void Usage(char *exec)
    printf("\t\tfilename4 - Output affine transformation file name\n\n");
 #if defined (_OPENMP)
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
           defaultOpenMPValue, omp_get_num_procs());
@@ -178,7 +178,7 @@ int main(int argc, char **argv)
 #if defined (_OPENMP)
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=NULL)
+   if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
    omp_set_num_threads(defaultOpenMPValue);
 #endif
@@ -322,15 +322,15 @@ int main(int argc, char **argv)
    if(flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag)
    {
       // Create some variables
-      mat44 *affineTransformation=NULL;
-      nifti_image *referenceImage=NULL;
-      nifti_image *inputTransformationImage=NULL;
-      nifti_image *outputTransformationImage=NULL;
+      mat44 *affineTransformation=nullptr;
+      nifti_image *referenceImage=nullptr;
+      nifti_image *inputTransformationImage=nullptr;
+      nifti_image *outputTransformationImage=nullptr;
       // First check if the input filename is an image
       if(reg_isAnImageFileName(param->inputTransName))
       {
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransformationImage==NULL)
+         if(inputTransformationImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
                     param->inputTransName);
@@ -349,7 +349,7 @@ int main(int argc, char **argv)
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==NULL)
+            if(referenceImage==nullptr)
             {
                fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                        param->referenceImageName);
@@ -370,7 +370,7 @@ int main(int argc, char **argv)
             return EXIT_FAILURE;
          }
          referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==NULL)
+         if(referenceImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                     param->referenceImageName);
@@ -378,7 +378,7 @@ int main(int argc, char **argv)
          }
       }
       // Create a dense field
-      if(affineTransformation!=NULL ||
+      if(affineTransformation!=nullptr ||
             inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
             inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
             inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
@@ -410,7 +410,7 @@ int main(int argc, char **argv)
       // Create a flow field image
       if(flag->outputFlowFlag)
       {
-         if(affineTransformation!=NULL)
+         if(affineTransformation!=nullptr)
          {
             fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from an affine transformation\n");
             return EXIT_FAILURE;
@@ -469,7 +469,7 @@ int main(int argc, char **argv)
       // Create a deformation or displacement field
       else if(flag->outputDefFlag || flag->outputDispFlag)
       {
-         if(affineTransformation!=NULL)
+         if(affineTransformation!=nullptr)
          {
             reg_affine_getDeformationField(affineTransformation,outputTransformationImage);
          }
@@ -504,7 +504,7 @@ int main(int argc, char **argv)
                // The spline transformation is composed with the identity field
                reg_spline_getDeformationField(inputTransformationImage,
                                               outputTransformationImage,
-                                              NULL, // no mask
+                                              nullptr, // no mask
                                               true, // composition is used,
                                               true // b-spline are used
                                              );
@@ -566,10 +566,10 @@ int main(int argc, char **argv)
          break;
       }
       // Free the allocated images and arrays
-      if(affineTransformation!=NULL) free(affineTransformation);
-      if(referenceImage!=NULL) nifti_image_free(referenceImage);
-      if(inputTransformationImage!=NULL) nifti_image_free(inputTransformationImage);
-      if(outputTransformationImage!=NULL) nifti_image_free(outputTransformationImage);
+      if(affineTransformation!=nullptr) free(affineTransformation);
+      if(referenceImage!=nullptr) nifti_image_free(referenceImage);
+      if(inputTransformationImage!=nullptr) nifti_image_free(inputTransformationImage);
+      if(outputTransformationImage!=nullptr) nifti_image_free(outputTransformationImage);
    }
 
    /* ************************************ */
@@ -579,14 +579,14 @@ int main(int argc, char **argv)
    {
       printf("[NiftyReg] Starting the composition of two transformations\n");
       // Create some variables
-      mat44 *affine1Trans=NULL;
-      mat44 *affine2Trans=NULL;
-      nifti_image *referenceImage=NULL;
-      nifti_image *referenceImage2=NULL;
-      nifti_image *input1TransImage=NULL;
-      nifti_image *input2TransImage=NULL;
-      nifti_image *output1TransImage=NULL;
-      nifti_image *output2TransImage=NULL;
+      mat44 *affine1Trans=nullptr;
+      mat44 *affine2Trans=nullptr;
+      nifti_image *referenceImage=nullptr;
+      nifti_image *referenceImage2=nullptr;
+      nifti_image *input1TransImage=nullptr;
+      nifti_image *input2TransImage=nullptr;
+      nifti_image *output1TransImage=nullptr;
+      nifti_image *output2TransImage=nullptr;
       // Read the first transformation
       if(!reg_isAnImageFileName(param->inputTransName))
       {
@@ -598,7 +598,7 @@ int main(int argc, char **argv)
       else
       {
          input1TransImage = reg_io_ReadImageFile(param->inputTransName);
-         if(input1TransImage==NULL)
+         if(input1TransImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n",
                     param->inputTransName);
@@ -614,7 +614,7 @@ int main(int argc, char **argv)
       else
       {
          input2TransImage = reg_io_ReadImageFile(param->input2TransName);
-         if(input2TransImage==NULL)
+         if(input2TransImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n",
                     param->input2TransName);
@@ -622,7 +622,7 @@ int main(int argc, char **argv)
          }
       }
       // Check if the two input transformations are affine transformation
-      if(affine1Trans!=NULL && affine2Trans!=NULL)
+      if(affine1Trans!=nullptr && affine2Trans!=nullptr)
       {
          printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n",
                 param->input2TransName);
@@ -632,7 +632,7 @@ int main(int argc, char **argv)
       else
       {
          // Check if the reference image is required
-         if(affine1Trans!=NULL)
+         if(affine1Trans!=nullptr)
          {
             if(!flag->referenceImageFlag)
             {
@@ -642,7 +642,7 @@ int main(int argc, char **argv)
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==NULL)
+            if(referenceImage==nullptr)
             {
                fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                        param->referenceImageName);
@@ -661,7 +661,7 @@ int main(int argc, char **argv)
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==NULL)
+            if(referenceImage==nullptr)
             {
                fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                        param->referenceImageName);
@@ -672,7 +672,7 @@ int main(int argc, char **argv)
          if(flag->referenceImage2Flag==true)
          {
             referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name);
-            if(referenceImage2==NULL)
+            if(referenceImage2==nullptr)
             {
                fprintf(stderr, "[NiftyReg ERROR] Error when reading the second reference image: %s\n",
                        param->referenceImage2Name);
@@ -680,7 +680,7 @@ int main(int argc, char **argv)
             }
          }
          // Generate the first deformation field
-         if(referenceImage!=NULL)
+         if(referenceImage!=nullptr)
          {
             // The field is created using the reference image space
             output1TransImage=nifti_copy_nim_info(referenceImage);
@@ -711,7 +711,7 @@ int main(int argc, char **argv)
          output1TransImage->intent_p1=DEF_FIELD;
          output1TransImage->data=(void *)calloc
                                  (output1TransImage->nvox,output1TransImage->nbyper);
-         if(affine1Trans!=NULL)
+         if(affine1Trans!=nullptr)
          {
             reg_affine_getDeformationField(affine1Trans,output1TransImage);
          }
@@ -726,7 +726,7 @@ int main(int argc, char **argv)
                reg_getDeformationFromDisplacement(output1TransImage);
                reg_spline_getDeformationField(input1TransImage,
                                               output1TransImage,
-                                              NULL,
+                                              nullptr,
                                               true,
                                               true);
                break;
@@ -773,7 +773,7 @@ int main(int argc, char **argv)
                        param->input2TransName);
                return EXIT_FAILURE;
             }
-         if(affine2Trans!=NULL)
+         if(affine2Trans!=nullptr)
          {
             printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n",
                    param->input2TransName);
@@ -786,7 +786,7 @@ int main(int argc, char **argv)
             output2TransImage->data=(void *)calloc
                                     (output2TransImage->nvox,output2TransImage->nbyper);
             reg_affine_getDeformationField(affine2Trans,output2TransImage);
-            reg_defField_compose(output2TransImage,output1TransImage,NULL);
+            reg_defField_compose(output2TransImage,output1TransImage,nullptr);
          }
          else
          {
@@ -798,7 +798,7 @@ int main(int argc, char **argv)
                       input2TransImage->fname);
                reg_spline_getDeformationField(input2TransImage,
                                               output1TransImage,
-                                              NULL,
+                                              nullptr,
                                               true, // composition
                                               true // b-spline
                                              );
@@ -806,17 +806,17 @@ int main(int argc, char **argv)
             case DEF_FIELD:
                printf("[NiftyReg] Transformation 2 is a deformation field:\n[NiftyReg] %s\n",
                       input2TransImage->fname);
-               reg_defField_compose(input2TransImage,output1TransImage,NULL);
+               reg_defField_compose(input2TransImage,output1TransImage,nullptr);
                break;
             case DISP_FIELD:
                printf("[NiftyReg] Transformation 2 is a displacement field:\n[NiftyReg] %s\n",
                       input2TransImage->fname);
                reg_getDeformationFromDisplacement(input2TransImage);
-               reg_defField_compose(input2TransImage,output1TransImage,NULL);
+               reg_defField_compose(input2TransImage,output1TransImage,nullptr);
                break;
             case SPLINE_VEL_GRID:
                // The field is created using the second reference image space
-               if(referenceImage2!=NULL)
+               if(referenceImage2!=nullptr)
                {
                   output2TransImage=nifti_copy_nim_info(referenceImage2);
                   output2TransImage->scl_slope=1.f;
@@ -844,7 +844,7 @@ int main(int argc, char **argv)
                      output2TransImage,
                      false // the number of step is not automatically updated
                                                              );
-               reg_defField_compose(output2TransImage,output1TransImage,NULL);
+               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             case DEF_VEL_FIELD:
                printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n",
@@ -857,7 +857,7 @@ int main(int argc, char **argv)
                      output2TransImage,
                      false // the number of step is not automatically updated
                                                             );
-               reg_defField_compose(output2TransImage,output1TransImage,NULL);
+               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             case DISP_VEL_FIELD:
                printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n",
@@ -871,7 +871,7 @@ int main(int argc, char **argv)
                      output2TransImage,
                      false // the number of step is not automatically updated
                                                             );
-               reg_defField_compose(output2TransImage,output1TransImage,NULL);
+               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             default:
                fprintf(stderr,"[NiftyReg ERROR] The specified second input transformation type is not recognised: %s\n",
@@ -887,14 +887,14 @@ int main(int argc, char **argv)
                 param->outputTransName);
       }
       // Free allocated object
-      if(affine1Trans!=NULL) free(affine1Trans);
-      if(affine2Trans!=NULL) free(affine2Trans);
-      if(referenceImage!=NULL) nifti_image_free(referenceImage);
-      if(referenceImage2!=NULL) nifti_image_free(referenceImage2);
-      if(input1TransImage!=NULL) nifti_image_free(input1TransImage);
-      if(input2TransImage!=NULL) nifti_image_free(input2TransImage);
-      if(output1TransImage!=NULL) nifti_image_free(output1TransImage);
-      if(output2TransImage!=NULL) nifti_image_free(output2TransImage);
+      if(affine1Trans!=nullptr) free(affine1Trans);
+      if(affine2Trans!=nullptr) free(affine2Trans);
+      if(referenceImage!=nullptr) nifti_image_free(referenceImage);
+      if(referenceImage2!=nullptr) nifti_image_free(referenceImage2);
+      if(input1TransImage!=nullptr) nifti_image_free(input1TransImage);
+      if(input2TransImage!=nullptr) nifti_image_free(input2TransImage);
+      if(output1TransImage!=nullptr) nifti_image_free(output1TransImage);
+      if(output2TransImage!=nullptr) nifti_image_free(output2TransImage);
    }
 
 
@@ -904,15 +904,15 @@ int main(int argc, char **argv)
    if(flag->outputLandFlag)
    {
       // Create some variables
-      mat44 *affineTransformation=NULL;
-      nifti_image *referenceImage=NULL;
-      nifti_image *inputTransformationImage=NULL;
-      nifti_image *deformationFieldImage=NULL;
+      mat44 *affineTransformation=nullptr;
+      nifti_image *referenceImage=nullptr;
+      nifti_image *inputTransformationImage=nullptr;
+      nifti_image *deformationFieldImage=nullptr;
       // First check if the input filename is an image
       if(reg_isAnImageFileName(param->inputTransName))
       {
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransformationImage==NULL)
+         if(inputTransformationImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
                     param->inputTransName);
@@ -931,7 +931,7 @@ int main(int argc, char **argv)
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==NULL)
+            if(referenceImage==nullptr)
             {
                fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                        param->referenceImageName);
@@ -952,7 +952,7 @@ int main(int argc, char **argv)
             return EXIT_FAILURE;
          }
          referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==NULL)
+         if(referenceImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                     param->referenceImageName);
@@ -960,7 +960,7 @@ int main(int argc, char **argv)
          }
       }
       // Create a dense field
-      if(affineTransformation!=NULL ||
+      if(affineTransformation!=nullptr ||
          inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
          inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
          inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
@@ -990,7 +990,7 @@ int main(int argc, char **argv)
       deformationFieldImage->data=(void *)malloc
             (deformationFieldImage->nvox*deformationFieldImage->nbyper);
       // Fill the deformation field
-      if(affineTransformation!=NULL)
+      if(affineTransformation!=nullptr)
       {
          reg_affine_getDeformationField(affineTransformation,deformationFieldImage);
       }
@@ -1025,7 +1025,7 @@ int main(int argc, char **argv)
             // The spline transformation is composed with the identity field
             reg_spline_getDeformationField(inputTransformationImage,
                                            deformationFieldImage,
-                                           NULL, // no mask
+                                           nullptr, // no mask
                                            true, // composition is used,
                                            true // b-spline are used
                                            );
@@ -1067,13 +1067,13 @@ int main(int argc, char **argv)
       deformationFieldImage->intent_p1=DEF_FIELD;
       deformationFieldImage->intent_p2=0;
       // Free all allocated input
-      if(affineTransformation!=NULL){
+      if(affineTransformation!=nullptr){
          free(affineTransformation);
       }
-      if(referenceImage!=NULL){
+      if(referenceImage!=nullptr){
          nifti_image_free(referenceImage);
       }
-      if(inputTransformationImage!=NULL){
+      if(inputTransformationImage!=nullptr){
          nifti_image_free(inputTransformationImage);
       }
       // Read the landmark file
@@ -1113,7 +1113,7 @@ int main(int argc, char **argv)
          }
          reg_defField_compose(deformationFieldImage,
                               landmarkImage,
-                              NULL);
+                              nullptr);
          for(size_t i=0;i<n;++i){
             allLandmarks[l][i]=landmarkImagePtr[i];
          }
@@ -1127,10 +1127,10 @@ int main(int argc, char **argv)
       for(size_t l=0; l<landmarkNumber; ++l)
          free(allLandmarks[l]);
       free(allLandmarks);
-      if(deformationFieldImage!=NULL){
+      if(deformationFieldImage!=nullptr){
          nifti_image_free(deformationFieldImage);
       }
-      if(landmarkImage!=NULL){
+      if(landmarkImage!=nullptr){
          nifti_image_free(landmarkImage);
       }
    }
@@ -1141,7 +1141,7 @@ int main(int argc, char **argv)
    {
       // Read the input image
       nifti_image *image = reg_io_ReadImageFile(param->inputTransName);
-      if(image==NULL)
+      if(image==nullptr)
       {
          fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
                  param->inputTransName);
@@ -1178,8 +1178,8 @@ int main(int argc, char **argv)
    if(flag->halfTransFlag)
    {
       // Read the input transformation
-      mat44 *affineTrans=NULL;
-      nifti_image *inputTransImage=NULL;
+      mat44 *affineTrans=nullptr;
+      nifti_image *inputTransImage=nullptr;
       if(!reg_isAnImageFileName(param->inputTransName))
       {
          // An affine transformation is considered
@@ -1196,7 +1196,7 @@ int main(int argc, char **argv)
       {
          // A non-rigid parametrisation is considered
          inputTransImage = reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransImage==NULL)
+         if(inputTransImage==nullptr)
          {
             fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
                     param->inputTransName);
@@ -1245,7 +1245,7 @@ int main(int argc, char **argv)
          reg_io_WriteImageFile(inputTransImage,param->outputTransName);
       }
       // Clear the allocated arrays
-      if(affineTrans!=NULL) free(affineTrans);
+      if(affineTrans!=nullptr) free(affineTrans);
    }
    /* ******************************************** */
    // Invert the provided non-rigid transformation //
@@ -1254,7 +1254,7 @@ int main(int argc, char **argv)
    {
       // Read the provided transformation
       nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName);
-      if(inputTransImage==NULL)
+      if(inputTransImage==nullptr)
       {
          fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
                  param->inputTransName);
@@ -1262,7 +1262,7 @@ int main(int argc, char **argv)
       }
       // Read the provided floating space image
       nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName);
-      if(floatingImage==NULL)
+      if(floatingImage==nullptr)
       {
          fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
                  param->input2TransName);
@@ -1282,7 +1282,7 @@ int main(int argc, char **argv)
             return EXIT_FAILURE;
          }
          nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==NULL)
+         if(referenceImage==nullptr)
          {
             fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
                     param->referenceImageName);
@@ -1314,7 +1314,7 @@ int main(int argc, char **argv)
                inputTransImage->intent_p1==CUB_SPLINE_GRID)
             reg_spline_getDeformationField(inputTransImage,
                                            tempField,
-                                           NULL,
+                                           nullptr,
                                            false,
                                            true);
          else
@@ -1324,7 +1324,7 @@ int main(int argc, char **argv)
          nifti_image_free(referenceImage);
          nifti_image_free(inputTransImage);
          inputTransImage=tempField;
-         tempField=NULL;
+         tempField=nullptr;
       }
      // Create a field to store the transformation
      nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage);
diff --git a/reg-io/_reg_ReadWriteBinary.h b/reg-io/_reg_ReadWriteBinary.h
index 568dba46..4bc0da83 100644
--- a/reg-io/_reg_ReadWriteBinary.h
+++ b/reg-io/_reg_ReadWriteBinary.h
@@ -1,5 +1,4 @@
-#ifndef _REG_READWRITEBINARY_H
-#define _REG_READWRITEBINARY_H
+#pragma once
 
 #include <fstream>      // std::ifstream
 #include <stdlib.h>
@@ -7,5 +6,3 @@
 extern "C++"
 void readFloatBinaryArray(const char* fileName, int lengthArray, float* outputArray);
 void readIntBinaryArray(const char* fileName, int lengthArray, int* outputArray);
-
-#endif
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 57612297..d21b0304 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -9,9 +9,6 @@
  *
  */
 
-#ifndef _REG_READWRITEIMAGE_CPP
-#define _REG_READWRITEIMAGE_CPP
-
 #include "_reg_ReadWriteImage.h"
 #include "_reg_tools.h"
 #include "_reg_stringFormat.h"
@@ -72,7 +69,7 @@ nifti_image *reg_io_ReadImageFile(const char *filename)
    int fileFormat=reg_io_checkFileFormat(filename);
 
    // Create the nifti image pointer
-   nifti_image *image=NULL;
+   nifti_image *image=nullptr;
 
    // Read the image and convert it to nifti format if required
    switch(fileFormat)
@@ -106,7 +103,7 @@ nifti_image *reg_io_ReadImageHeader(const char *filename)
    int fileFormat=reg_io_checkFileFormat(filename);
 
    // Create the nifti image pointer
-   nifti_image *image=NULL;
+   nifti_image *image=nullptr;
 
    // Read the image and convert it to nifti format if required
    switch(fileFormat)
@@ -243,4 +240,3 @@ void reg_io_diplayImageData(nifti_image *image)
    return;
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index 9caae2ba..771e1fc8 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_READWRITEIMAGE_H
-#define _REG_READWRITEIMAGE_H
+#pragma once
 
 #include "nifti1_io.h"
 #include <string>
@@ -72,4 +71,3 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename);
   */
 void reg_io_diplayImageData(nifti_image *image);
 /* *************************************************************** */
-#endif
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index 6b4b940c..446303c4 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -12,9 +12,7 @@
  *
  */
 
-
-#ifndef _REG_READWRITEMATRIX_H
-#define _REG_READWRITEMATRIX_H
+#pragma once
 
 #include "nifti1_io.h"
 //STD
@@ -102,6 +100,3 @@ void reg_tool_WriteMatrixFile(char *filename,
                               T **mat,
                               size_t nbLine,
                               size_t nbColumn);
-
-#endif // _REG_READWRITEMATRIX_H
-
diff --git a/reg-io/nifti/nifti1.h b/reg-io/nifti/nifti1.h
index f3feadfb..edc21db2 100755
--- a/reg-io/nifti/nifti1.h
+++ b/reg-io/nifti/nifti1.h
@@ -9,8 +9,7 @@
                 TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE
  */
 
-#ifndef _NIFTI_HEADER_
-#define _NIFTI_HEADER_
+#pragma once
 
 /*****************************************************************************
       ** This file defines the "NIFTI-1" header format.               **
@@ -1504,5 +1503,3 @@ extern "C" {
 }
 #endif
 /*=================*/
-
-#endif /* _NIFTI_HEADER_ */
diff --git a/reg-io/nifti/nifti1_io.h b/reg-io/nifti/nifti1_io.h
index 39a0c9b7..df0f9b1e 100755
--- a/reg-io/nifti/nifti1_io.h
+++ b/reg-io/nifti/nifti1_io.h
@@ -3,8 +3,8 @@
            - Written by Bob Cox, SSCC NIMH
            - Revisions by Rick Reynolds, SSCC NIMH
  */
-#ifndef _NIFTI_IO_HEADER_
-#define _NIFTI_IO_HEADER_
+
+#pragma once
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -547,5 +547,3 @@ extern "C" {
 }
 #endif
 /*=================*/
-
-#endif /* _NIFTI_IO_HEADER_ */
diff --git a/reg-io/nifti/znzlib.h b/reg-io/nifti/znzlib.h
index cdbb47f6..6f2f2936 100755
--- a/reg-io/nifti/znzlib.h
+++ b/reg-io/nifti/znzlib.h
@@ -1,5 +1,4 @@
-#ifndef _ZNZLIB_H_
-#define _ZNZLIB_H_
+#pragma once
 
 /*
 znzlib.h  (zipped or non-zipped library)
@@ -120,5 +119,3 @@ extern "C" {
 }
 #endif
 /*=================*/
-
-#endif
diff --git a/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in b/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in
index eacaf59b..3f3b94f2 100644
--- a/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in
+++ b/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in
@@ -1,5 +1,4 @@
-#ifndef __NrrdConfigure_h
-#define __NrrdConfigure_h
+#pragma once
 // Configure compile time dependent code
 // BUG: 0005904 shows that special action must be taken for Mac 64 bit systems.
 // See: http://public.kitware.com/Bug/view.php?id=5904
@@ -40,5 +39,3 @@
     #define TEEM_ENDIAN 1234
   #endif
 #endif
-
-#endif // __NrrdConfigure_h
diff --git a/reg-io/nrrd/NrrdIO/biff.h b/reg-io/nrrd/NrrdIO/biff.h
index 0f5d3a0a..2418bbf3 100644
--- a/reg-io/nrrd/NrrdIO/biff.h
+++ b/reg-io/nrrd/NrrdIO/biff.h
@@ -20,8 +20,7 @@
   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
-#ifndef BIFF_HAS_BEEN_INCLUDED
-#define BIFF_HAS_BEEN_INCLUDED
+#pragma once
 
 /* ---- BEGIN non-NrrdIO */
 
@@ -127,5 +126,3 @@ extern "C" {
 #ifdef __cplusplus
 }
 #endif
-
-#endif /* BIFF_HAS_BEEN_INCLUDED */
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 0b8fc4d5..b32a1124 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_NRRD_CPP
-#define _REG_NRRD_CPP
-
 #include "reg_nrrd.h"
 
 /* *************************************************************** */
@@ -24,7 +21,7 @@ void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage,
 
    DTYPE *inPtrX=static_cast<DTYPE *>(niiImage->data);
    DTYPE *inPtrY=&inPtrX[voxNumber];
-   DTYPE *inPtrZ=NULL;
+   DTYPE *inPtrZ=nullptr;
 
    DTYPE *outPtr=static_cast<DTYPE *>(nrrdImage->data);
 
@@ -60,7 +57,7 @@ void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage,
 
    DTYPE *inPtrX=static_cast<DTYPE *>(niiImage->data);
    DTYPE *inPtrY=&inPtrX[voxNumber];
-   DTYPE *inPtrZ=NULL;
+   DTYPE *inPtrZ=nullptr;
 
    if(nrrdImage->axis[0].size==3)
    {
@@ -111,7 +108,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
    }
 
    // The nifti_image pointer is created
-   nifti_image *niiImage=NULL;
+   nifti_image *niiImage=nullptr;
 
    // The nifti image is generated based on the nrrd image datatype
    switch(nrrdImage->type)
@@ -279,7 +276,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
    }
 
    // Set the space unit if it is defined
-   if(nrrdImage->spaceUnits[1]!=NULL)
+   if(nrrdImage->spaceUnits[1]!=nullptr)
    {
       if(strcmp(nrrdImage->spaceUnits[1],"m")==0)
          niiImage->xyz_units=NIFTI_UNITS_METER;
@@ -292,7 +289,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
    // Set the time unit if it is defined
    if(nrrdImage->axis[3].size>1)
    {
-      if(nrrdImage->spaceUnits[4]!=NULL)
+      if(nrrdImage->spaceUnits[4]!=nullptr)
       {
          if(strcmp(nrrdImage->spaceUnits[4],"sec"))
             niiImage->time_units=NIFTI_UNITS_SEC;
@@ -327,7 +324,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
       niiImage->intent_code=NIFTI_INTENT_VECTOR;
 
       // Check if the image is a stationary field from NiftyReg
-      if(nrrdImage->axis[0].label!=NULL)
+      if(nrrdImage->axis[0].label!=nullptr)
       {
          std::string str=nrrdImage->axis[0].label;
          size_t it;
@@ -486,7 +483,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
    for(int i=0; i<NRRD_SPACE_DIM_MAX; i++)
    {
       airFree(nrrdImage->spaceUnits[i]);
-      nrrdImage->spaceUnits[i] = NULL;
+      nrrdImage->spaceUnits[i] = nullptr;
    }
    switch(niiImage->xyz_units)
    {
@@ -567,7 +564,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       nrrdImage->axis[0].spaceDirection[1]=std::numeric_limits<double>::quiet_NaN();
       nrrdImage->axis[0].spaceDirection[2]=std::numeric_limits<double>::quiet_NaN();
       nrrdImage->axis[0].kind=nrrdKindVector;
-      nrrdImage->spaceUnits[0]=NULL;
+      nrrdImage->spaceUnits[0]=nullptr;
 
       nrrdImage->dim=niiImage->nu+1;
 
@@ -578,7 +575,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
          char temp[64];
          sprintf(temp,"NREG_VEL_STEP %f",niiImage->intent_p1);
          std::string str=temp;
-         if(nrrdImage->axis[0].label!=NULL) free(nrrdImage->axis[0].label);
+         if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label);
          nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char));
          strcpy(nrrdImage->axis[0].label,str.c_str());
 
@@ -586,7 +583,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       else if(strcmp(niiImage->intent_name,"NREG_CPP_FILE")==0)
       {
          std::string str="NREG_CPP_FILE";
-         if(nrrdImage->axis[0].label!=NULL) free(nrrdImage->axis[0].label);
+         if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label);
          nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char));
          strcpy(nrrdImage->axis[0].label, str.c_str());
       }
@@ -607,7 +604,7 @@ Nrrd *reg_io_readNRRDfile(const char *filename)
    char *err;
 
    /* read in the nrrd from file */
-   if (nrrdLoad(nrrdImage, filename, NULL))
+   if (nrrdLoad(nrrdImage, filename, nullptr))
    {
       err = biffGetDone(NRRD);
       char text[255];
@@ -649,4 +646,3 @@ void reg_io_writeNRRDfile(Nrrd *image, const char *filename)
    return;
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h
index c7e6a979..014f58c4 100644
--- a/reg-io/nrrd/reg_nrrd.h
+++ b/reg-io/nrrd/reg_nrrd.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_NRRD_H
-#define _REG_NRRD_H
+#pragma once
 
 #include "nifti1_io.h"
 #include "NrrdIO.h"
@@ -48,6 +47,3 @@ Nrrd *reg_io_readNRRDfile(const char *filename);
  */
 void reg_io_writeNRRDfile(Nrrd *image, const char *filename);
 /* *************************************************************** */
-
-
-#endif
diff --git a/reg-io/png/lpng1510/png.h b/reg-io/png/lpng1510/png.h
index aadd9a77..b4da5bb2 100644
--- a/reg-io/png/lpng1510/png.h
+++ b/reg-io/png/lpng1510/png.h
@@ -368,8 +368,7 @@
  *       PNG Development Group
  */
 
-#ifndef PNG_H
-#define PNG_H
+#pragma once
 
 /* This is not the place to learn how to use libpng. The file libpng-manual.txt
  * describes how to use libpng, and the file example.c summarizes it
@@ -2663,4 +2662,3 @@ extern "C" {
 
 #endif /* PNG_VERSION_INFO_ONLY */
 /* Do not put anything past this line */
-#endif /* PNG_H */
diff --git a/reg-io/png/lpng1510/pngconf.h b/reg-io/png/lpng1510/pngconf.h
index ddb8d0d6..d89e1206 100644
--- a/reg-io/png/lpng1510/pngconf.h
+++ b/reg-io/png/lpng1510/pngconf.h
@@ -19,8 +19,7 @@
  * and png_info.
  */
 
-#ifndef PNGCONF_H
-#define PNGCONF_H
+#pragma once
 
 #ifndef PNG_BUILDING_SYMBOL_TABLE
 /* PNG_NO_LIMITS_H may be used to turn off the use of the standard C
@@ -593,5 +592,3 @@ typedef png_size_t    png_alloc_size_t;
 #    endif
 #  endif
 #endif
-
-#endif /* PNGCONF_H */
diff --git a/reg-io/png/lpng1510/pngdebug.h b/reg-io/png/lpng1510/pngdebug.h
index 16f81fdd..3b3fa85a 100644
--- a/reg-io/png/lpng1510/pngdebug.h
+++ b/reg-io/png/lpng1510/pngdebug.h
@@ -32,8 +32,7 @@
  *            to the message.
  *   arg: 0 to 2 arguments for printf(3) style substitution in message.
  */
-#ifndef PNGDEBUG_H
-#define PNGDEBUG_H
+#pragma once
 /* These settings control the formatting of messages in png.c and pngerror.c */
 /* Moved to pngdebug.h at 1.5.0 */
 #  ifndef PNG_LITERAL_SHARP
@@ -154,4 +153,3 @@
 #ifndef png_debug2
 #  define png_debug2(l, m, p1, p2) ((void)0)
 #endif
-#endif /* PNGDEBUG_H */
diff --git a/reg-io/png/lpng1510/pnginfo.h b/reg-io/png/lpng1510/pnginfo.h
index f36ce8ca..926b66c8 100644
--- a/reg-io/png/lpng1510/pnginfo.h
+++ b/reg-io/png/lpng1510/pnginfo.h
@@ -50,8 +50,7 @@
 * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
 * functions do not make their own copies.
 */
-#ifndef PNGINFO_H
-#define PNGINFO_H
+#pragma once
 
 struct png_info_def
 {
@@ -266,4 +265,3 @@ defined(PNG_READ_BACKGROUND_SUPPORTED)
 #endif
 
 };
-#endif /* PNGINFO_H */
diff --git a/reg-io/png/lpng1510/pnglibconf.h.prebuilt b/reg-io/png/lpng1510/pnglibconf.h.prebuilt
index 0a579f79..5fba410d 100644
--- a/reg-io/png/lpng1510/pnglibconf.h.prebuilt
+++ b/reg-io/png/lpng1510/pnglibconf.h.prebuilt
@@ -18,8 +18,7 @@
 /* symbols.  It is much better to generate a new file using */
 /* scripts/libpngconf.mak */
 
-#ifndef PNGLCONF_H
-#define PNGLCONF_H
+#pragma once
 /* settings */
 #define PNG_API_RULE 0
 #define PNG_CALLOC_SUPPORTED
@@ -183,4 +182,3 @@
 #define PNG_WRITE_zTXt_SUPPORTED
 #define PNG_zTXt_SUPPORTED
 /* end of options */
-#endif /* PNGLCONF_H */
diff --git a/reg-io/png/lpng1510/pngpriv.h b/reg-io/png/lpng1510/pngpriv.h
index 3d43e70f..d64d47ed 100644
--- a/reg-io/png/lpng1510/pngpriv.h
+++ b/reg-io/png/lpng1510/pngpriv.h
@@ -22,8 +22,7 @@
  * they should be well aware of the issues that may arise from doing so.
  */
 
-#ifndef PNGPRIV_H
-#define PNGPRIV_H
+#pragma once
 
 /* Feature Test Macros.  The following are defined here to ensure that correctly
  * implemented libraries reveal the APIs libpng needs to build and hide those
@@ -1673,5 +1672,3 @@ extern "C" {
 #ifdef __cplusplus
 }
 #endif
-
-#endif /* PNGPRIV_H */
diff --git a/reg-io/png/lpng1510/pngstruct.h b/reg-io/png/lpng1510/pngstruct.h
index 815fcb5c..edc335f3 100644
--- a/reg-io/png/lpng1510/pngstruct.h
+++ b/reg-io/png/lpng1510/pngstruct.h
@@ -18,8 +18,8 @@
  * It should NOT be accessed directly by an application.
  */
 
-#ifndef PNGSTRUCT_H
-#define PNGSTRUCT_H
+#pragma once
+
 /* zlib.h defines the structure z_stream, an instance of which is included
  * in this structure and is required for decompressing the LZ compressed
  * data in PNG files.
@@ -354,4 +354,3 @@ struct png_struct_def
    void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
          png_bytep row, png_const_bytep prev_row);
 };
-#endif /* PNGSTRUCT_H */
diff --git a/reg-io/png/readpng.cpp b/reg-io/png/readpng.cpp
index 293f680f..e5614a75 100644
--- a/reg-io/png/readpng.cpp
+++ b/reg-io/png/readpng.cpp
@@ -64,12 +64,12 @@
 #endif
 
 
-static png_structp png_ptr = NULL;
-static png_infop info_ptr = NULL;
+static png_structp png_ptr = nullptr;
+static png_infop info_ptr = nullptr;
 
 png_uint_32  width, height;
 int  bit_depth, color_type;
-uch  *image_data = NULL;
+uch  *image_data = nullptr;
 
 
 void readpng_version_info(void)
@@ -99,14 +99,14 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight)
 
    /* could pass pointers to user-defined error handlers instead of NULLs: */
 
-   png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+   png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (!png_ptr)
       return 4;   /* out of memory */
 
    info_ptr = png_create_info_struct(png_ptr);
    if (!info_ptr)
    {
-      png_destroy_read_struct(&png_ptr, NULL, NULL);
+      png_destroy_read_struct(&png_ptr, nullptr, nullptr);
       return 4;   /* out of memory */
    }
 
@@ -121,7 +121,7 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight)
 
    if (setjmp(png_jmpbuf(png_ptr)))
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
       return 2;
    }
 
@@ -137,7 +137,7 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight)
     * compression_type and filter_type => NULLs] */
 
    png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type,
-                NULL, NULL, NULL);
+                nullptr, nullptr, nullptr);
    *pWidth = width;
    *pHeight = height;
 
@@ -163,7 +163,7 @@ int readpng_get_bgcolor(uch *red, uch *green, uch *blue)
 
    if (setjmp(png_jmpbuf(png_ptr)))
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
       return 2;
    }
 
@@ -215,7 +215,7 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
 {
    double  gamma;
    png_uint_32  i, rowbytes;
-   png_bytepp  row_pointers = NULL;
+   png_bytepp  row_pointers = nullptr;
 
 
    /* setjmp() must be called in every function that calls a PNG-reading
@@ -223,8 +223,8 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
 
    if (setjmp(png_jmpbuf(png_ptr)))
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
-      return NULL;
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+      return nullptr;
    }
 
 
@@ -261,17 +261,17 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
    *pRowbytes = rowbytes = png_get_rowbytes(png_ptr, info_ptr);
    *pChannels = (int)png_get_channels(png_ptr, info_ptr);
 
-   if ((image_data = (uch *)malloc(rowbytes*height)) == NULL)
+   if ((image_data = (uch *)malloc(rowbytes*height)) == nullptr)
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
-      return NULL;
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+      return nullptr;
    }
-   if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == NULL)
+   if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == nullptr)
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
       free(image_data);
-      image_data = NULL;
-      return NULL;
+      image_data = nullptr;
+      return nullptr;
    }
 
    Trace((stderr, "readpng_get_image:  channels = %d, rowbytes = %ld, height = %ld\n", *pChannels, rowbytes, height));
@@ -292,9 +292,9 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
     * post-IDAT text/time/etc. is desired) */
 
    free(row_pointers);
-   row_pointers = NULL;
+   row_pointers = nullptr;
 
-   png_read_end(png_ptr, NULL);
+   png_read_end(png_ptr, nullptr);
 
    return image_data;
 }
@@ -305,13 +305,13 @@ void readpng_cleanup(int free_image_data)
    if (free_image_data && image_data)
    {
       free(image_data);
-      image_data = NULL;
+      image_data = nullptr;
    }
 
    if (png_ptr && info_ptr)
    {
-      png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
-      png_ptr = NULL;
-      info_ptr = NULL;
+      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+      png_ptr = nullptr;
+      info_ptr = nullptr;
    }
 }
diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp
index 410e9bab..8c266d03 100644
--- a/reg-io/png/reg_png.cpp
+++ b/reg-io/png/reg_png.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_PNG_CPP
-#define _REG_PNG_CPP
-
 #include "reg_png.h"
 #include "readpng.h"
 
@@ -20,9 +17,9 @@
 nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
 {
    // We first read the png file
-   FILE *pngFile=NULL;
+   FILE *pngFile=nullptr;
    pngFile = fopen(pngFileName, "rb");
-   if(pngFile==NULL)
+   if(pngFile==nullptr)
    {
       char text[255];
       sprintf(text, "Can not open the png file %s", pngFileName);
@@ -38,7 +35,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
       reg_exit();
    rewind(pngFile);
 
-   png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+   png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (!png_ptr)
    {
       reg_print_fct_error("reg_io_readPNGfile");
@@ -49,7 +46,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    png_infop info_ptr = png_create_info_struct(png_ptr);
    if (!info_ptr)
    {
-      png_destroy_read_struct(&png_ptr, NULL, NULL);
+      png_destroy_read_struct(&png_ptr, nullptr, nullptr);
       reg_print_fct_error("reg_io_readPNGfile");
       reg_print_msg_error("Error when reading the png file - out of memory");
       reg_exit();
@@ -61,7 +58,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    png_uint_32 Width, Height;
    int bit_depth, color_type;
    png_get_IHDR(png_ptr, info_ptr, &Width, &Height, &bit_depth,
-                &color_type, NULL, NULL, NULL);
+                &color_type, nullptr, nullptr, nullptr);
 
    int Channels;
    ulg rowbytes;
@@ -100,12 +97,12 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    }
 
    int dim[8]= {2,static_cast<int>(Width),static_cast<int>(Height),1,1,1,1,1};
-   nifti_image *niiImage=NULL;
+   nifti_image *niiImage=nullptr;
    if(readData)
    {
 
       uch *image_data;
-      if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == NULL)
+      if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr)
          reg_exit();
 
       for (png_uint_32 i=0; i<Height; ++i)
@@ -114,7 +111,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
       }
 
       png_read_image(png_ptr, row_pointers);
-      png_read_end(png_ptr, NULL);
+      png_read_end(png_ptr, nullptr);
 
       niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,true);
       uch *niiPtr=static_cast<uch *>(niiImage->data);
@@ -152,7 +149,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
       niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,false);
    }
    delete []row_pointers;
-   png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+   png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
    fclose (pngFile);
 
    nifti_set_filenames(niiImage, pngFileName,0,0);
@@ -208,15 +205,15 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
       reg_exit();
    }
    // The png file structures are created
-   png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-   if (png_ptr==NULL)
+   png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
+   if (png_ptr==nullptr)
    {
       reg_print_fct_error("reg_io_writePNGfile");
       reg_print_msg_error("The png pointer could not be created");
       reg_exit();
    }
    png_infop info_ptr = png_create_info_struct (png_ptr);
-   if(info_ptr==NULL)
+   if(info_ptr==nullptr)
    {
       reg_print_fct_error("reg_io_writePNGfile");
       reg_print_msg_error("The png structure could not be created");
@@ -248,7 +245,7 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
    // Write the image data to the file
    png_init_io (png_ptr, fp);
    png_set_rows (png_ptr, info_ptr, row_pointers);
-   png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
+   png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, nullptr);
    // Free the allocated png arrays
    for(int y=0; y<image->ny; ++y)
       png_free(png_ptr, row_pointers[y]);
@@ -258,4 +255,3 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
    fclose (fp);
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h
index de9f18ce..900552f5 100644
--- a/reg-io/png/reg_png.h
+++ b/reg-io/png/reg_png.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_PNG_H
-#define _REG_PNG_H
+#pragma once
 
 #include "nifti1_io.h"
 #include "_reg_tools.h"
@@ -36,5 +35,3 @@ nifti_image *reg_io_readPNGfile(const char *filename, bool readData);
   */
 void reg_io_writePNGfile(nifti_image *image, const char *filename);
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h
index 8e4cd22d..25f7acdd 100644
--- a/reg-lib/AffineDeformationFieldKernel.h
+++ b/reg-lib/AffineDeformationFieldKernel.h
@@ -1,11 +1,10 @@
-#ifndef AFFINEDEFORMATIONFIELDKERNEL_H
-#define AFFINEDEFORMATIONFIELDKERNEL_H
+#pragma once
 
 #include "Kernel.h"
 
 class AffineDeformationFieldKernel : public Kernel {
 public:
-    static std::string getName() {
+    static std::string GetName() {
         return "AffineDeformationFieldKernel";
     }
 
@@ -13,7 +12,5 @@ class AffineDeformationFieldKernel : public Kernel {
     }
 
     virtual ~AffineDeformationFieldKernel(){}
-    virtual void calculate(bool compose = false) = 0;
+    virtual void Calculate(bool compose = false) = 0;
 };
-
-#endif // AFFINEDEFORMATIONFIELDKERNEL_H
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index bab532ef..b1787b27 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -3,224 +3,209 @@
 using namespace std;
 
 /* *************************************************************** */
-AladinContent::AladinContent()
-{
-	//int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 };
-	//this->CurrentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-	//this->CurrentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-	//this->CurrentReferenceMask = NULL;
-	//
-	this->CurrentReference = NULL;
-	this->CurrentReferenceMask = NULL;
-	this->CurrentFloating = NULL;
-	this->transformationMatrix = NULL;
-	this->blockMatchingParams = NULL;
-	this->bytes = sizeof(float);//Default
-	//
-	initVars();
+AladinContent::AladinContent() {
+    //int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 };
+    //this->currentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    //this->currentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    //this->currentReferenceMask = nullptr;
+
+    this->currentReference = nullptr;
+    this->currentReferenceMask = nullptr;
+    this->currentFloating = nullptr;
+    this->transformationMatrix = nullptr;
+    this->blockMatchingParams = nullptr;
+    this->bytes = sizeof(float);  // Default
+
+    InitVars();
 }
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *CurrentReferenceIn,
-									  nifti_image *CurrentFloatingIn,
-									  int *CurrentReferenceMaskIn,
-									  mat44 *transMat,
-									  size_t bytesIn,
-									  const unsigned int currentPercentageOfBlockToUseIn,
-									  const unsigned int inlierLtsIn,
-									  int stepSizeBlockIn) :
-	CurrentReference(CurrentReferenceIn),
-	CurrentFloating(CurrentFloatingIn),
-	CurrentReferenceMask(CurrentReferenceMaskIn),
-	transformationMatrix(transMat),
-	bytes(bytesIn),
-	currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
-	inlierLts(inlierLtsIn),
-	stepSizeBlock(stepSizeBlockIn)
-{
-	this->blockMatchingParams = new _reg_blockMatchingParam();
-	initVars();
+AladinContent::AladinContent(nifti_image *currentReferenceIn,
+                             nifti_image *currentFloatingIn,
+                             int *currentReferenceMaskIn,
+                             mat44 *transMat,
+                             size_t bytesIn,
+                             const unsigned int currentPercentageOfBlockToUseIn,
+                             const unsigned int inlierLtsIn,
+                             int stepSizeBlockIn) :
+    currentReference(currentReferenceIn),
+    currentFloating(currentFloatingIn),
+    currentReferenceMask(currentReferenceMaskIn),
+    transformationMatrix(transMat),
+    bytes(bytesIn),
+    currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
+    inlierLts(inlierLtsIn),
+    stepSizeBlock(stepSizeBlockIn) {
+    this->blockMatchingParams = new _reg_blockMatchingParam();
+    InitVars();
 }
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *CurrentReferenceIn,
-									  nifti_image *CurrentFloatingIn,
-									  int *CurrentReferenceMaskIn,
-									  mat44 *transMat,
-									  size_t bytesIn) :
-	CurrentReference(CurrentReferenceIn),
-	CurrentFloating(CurrentFloatingIn),
-	CurrentReferenceMask(CurrentReferenceMaskIn),
-	transformationMatrix(transMat),
-	bytes(bytesIn)
-{
-	this->blockMatchingParams = NULL;
-	initVars();
+AladinContent::AladinContent(nifti_image *currentReferenceIn,
+                             nifti_image *currentFloatingIn,
+                             int *currentReferenceMaskIn,
+                             mat44 *transMat,
+                             size_t bytesIn) :
+    currentReference(currentReferenceIn),
+    currentFloating(currentFloatingIn),
+    currentReferenceMask(currentReferenceMaskIn),
+    transformationMatrix(transMat),
+    bytes(bytesIn) {
+    this->blockMatchingParams = nullptr;
+    InitVars();
 }
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *CurrentReferenceIn,
-									  nifti_image *CurrentFloatingIn,
-									  int *CurrentReferenceMaskIn,
-									  size_t bytesIn,
-									  const unsigned int currentPercentageOfBlockToUseIn,
-									  const unsigned int inlierLtsIn,
-									  int stepSizeBlockIn) :
-	CurrentReference(CurrentReferenceIn),
-	CurrentFloating(CurrentFloatingIn),
-	CurrentReferenceMask(CurrentReferenceMaskIn),
-	bytes(bytesIn),
-	currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
-	inlierLts(inlierLtsIn),
-	stepSizeBlock(stepSizeBlockIn)
-{
-	this->transformationMatrix = NULL;
-	this->blockMatchingParams = new _reg_blockMatchingParam();
-	initVars();
+AladinContent::AladinContent(nifti_image *currentReferenceIn,
+                             nifti_image *currentFloatingIn,
+                             int *currentReferenceMaskIn,
+                             size_t bytesIn,
+                             const unsigned int currentPercentageOfBlockToUseIn,
+                             const unsigned int inlierLtsIn,
+                             int stepSizeBlockIn) :
+    currentReference(currentReferenceIn),
+    currentFloating(currentFloatingIn),
+    currentReferenceMask(currentReferenceMaskIn),
+    bytes(bytesIn),
+    currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
+    inlierLts(inlierLtsIn),
+    stepSizeBlock(stepSizeBlockIn) {
+    this->transformationMatrix = nullptr;
+    this->blockMatchingParams = new _reg_blockMatchingParam();
+    InitVars();
 }
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *CurrentReferenceIn,
-									  nifti_image *CurrentFloatingIn,
-									  int *CurrentReferenceMaskIn,
-									  size_t bytesIn) :
-	CurrentReference(CurrentReferenceIn),
-	CurrentFloating(CurrentFloatingIn),
-	CurrentReferenceMask(CurrentReferenceMaskIn),
-	bytes(bytesIn)
-{
-	this->transformationMatrix = NULL;
-	this->blockMatchingParams = NULL;
-	initVars();
+AladinContent::AladinContent(nifti_image *currentReferenceIn,
+                             nifti_image *currentFloatingIn,
+                             int *currentReferenceMaskIn,
+                             size_t bytesIn) :
+    currentReference(currentReferenceIn),
+    currentFloating(currentFloatingIn),
+    currentReferenceMask(currentReferenceMaskIn),
+    bytes(bytesIn) {
+    this->transformationMatrix = nullptr;
+    this->blockMatchingParams = nullptr;
+    InitVars();
 }
 /* *************************************************************** */
-AladinContent::~AladinContent()
-{
-   ClearWarpedImage();
-   ClearDeformationField();
-   if (this->blockMatchingParams != NULL)
-      delete this->blockMatchingParams;
+AladinContent::~AladinContent() {
+    ClearWarpedImage();
+    ClearDeformationField();
+    if (this->blockMatchingParams != nullptr)
+        delete this->blockMatchingParams;
 }
 /* *************************************************************** */
-void AladinContent::initVars()
-{
-   if (this->CurrentFloating != NULL && this->CurrentReference != NULL) {
-      this->AllocateWarpedImage();
-   }
-   else {
-      this->CurrentWarped = NULL;
-   }
+void AladinContent::InitVars() {
+    if (this->currentFloating != nullptr && this->currentReference != nullptr) {
+        this->AllocateWarpedImage();
+    } else {
+        this->currentWarped = nullptr;
+    }
 
-   if (this->CurrentReference != NULL){
-      this->AllocateDeformationField(bytes);
-      refMatrix_xyz = (CurrentReference->sform_code > 0) ? (CurrentReference->sto_xyz) : (CurrentReference->qto_xyz);
-   }
-   else {
-      this->CurrentDeformationField = NULL;
-   }
+    if (this->currentReference != nullptr) {
+        this->AllocateDeformationField(bytes);
+        refMatrix_xyz = (currentReference->sform_code > 0) ? (currentReference->sto_xyz) : (currentReference->qto_xyz);
+    } else {
+        this->currentDeformationField = nullptr;
+    }
 
-   if (this->CurrentReferenceMask == NULL && this->CurrentReference != NULL)
-      this->CurrentReferenceMask = (int *) calloc(this->CurrentReference->nx * this->CurrentReference->ny * this->CurrentReference->nz, sizeof(int));
+    if (this->currentReferenceMask == nullptr && this->currentReference != nullptr)
+        this->currentReferenceMask = (int *)calloc(this->currentReference->nx * this->currentReference->ny * this->currentReference->nz, sizeof(int));
 
-   if (this->CurrentFloating != NULL) {
-      floMatrix_ijk = (CurrentFloating->sform_code > 0) ? (CurrentFloating->sto_ijk) :  (CurrentFloating->qto_ijk);
-   }
-   if (blockMatchingParams != NULL) {
-      initialise_block_matching_method(CurrentReference,
-                                       blockMatchingParams,
-                                       currentPercentageOfBlockToUse,
-                                       inlierLts,
-                                       stepSizeBlock,
-                                       CurrentReferenceMask,
-                                       false);
-   }
+    if (this->currentFloating != nullptr) {
+        floMatrix_ijk = (currentFloating->sform_code > 0) ? (currentFloating->sto_ijk) : (currentFloating->qto_ijk);
+    }
+    if (blockMatchingParams != nullptr) {
+        initialise_block_matching_method(currentReference,
+                                         blockMatchingParams,
+                                         currentPercentageOfBlockToUse,
+                                         inlierLts,
+                                         stepSizeBlock,
+                                         currentReferenceMask,
+                                         false);
+    }
 #ifndef NDEBUG
-	if(this->CurrentReference==NULL) reg_print_msg_debug("CurrentReference image is NULL");
-	if(this->CurrentFloating==NULL) reg_print_msg_debug("CurrentFloating image is NULL");
-	if(this->CurrentDeformationField==NULL) reg_print_msg_debug("CurrentDeformationField image is NULL");
-	if(this->CurrentWarped==NULL) reg_print_msg_debug("CurrentWarped image is NULL");
-	if(this->CurrentReferenceMask==NULL) reg_print_msg_debug("CurrentReferenceMask image is NULL");
-	if(this->blockMatchingParams==NULL) reg_print_msg_debug("blockMatchingParams image is NULL");
+    if (this->currentReference == nullptr) reg_print_msg_debug("currentReference image is nullptr");
+    if (this->currentFloating == nullptr) reg_print_msg_debug("currentFloating image is nullptr");
+    if (this->currentDeformationField == nullptr) reg_print_msg_debug("currentDeformationField image is nullptr");
+    if (this->currentWarped == nullptr) reg_print_msg_debug("currentWarped image is nullptr");
+    if (this->currentReferenceMask == nullptr) reg_print_msg_debug("currentReferenceMask image is nullptr");
+    if (this->blockMatchingParams == nullptr) reg_print_msg_debug("blockMatchingParams image is nullptr");
 #endif
 }
 /* *************************************************************** */
-void AladinContent::AllocateWarpedImage()
-{
-	if (this->CurrentReference == NULL || this->CurrentFloating == NULL) {
-		reg_print_fct_error( "AladinContent::AllocateWarpedImage()");
-		reg_print_msg_error(" Reference and floating images are not defined. Exit.");
-		reg_exit();
-	}
+void AladinContent::AllocateWarpedImage() {
+    if (this->currentReference == nullptr || this->currentFloating == nullptr) {
+        reg_print_fct_error("AladinContent::AllocateWarpedImage()");
+        reg_print_msg_error(" Reference and floating images are not defined. Exit.");
+        reg_exit();
+    }
 
-	this->CurrentWarped = nifti_copy_nim_info(this->CurrentReference);
-	this->CurrentWarped->dim[0] = this->CurrentWarped->ndim = this->CurrentFloating->ndim;
-	this->CurrentWarped->dim[4] = this->CurrentWarped->nt = this->CurrentFloating->nt;
-	this->CurrentWarped->pixdim[4] = this->CurrentWarped->dt = 1.0;
-	this->CurrentWarped->nvox = (size_t) this->CurrentWarped->nx * (size_t) this->CurrentWarped->ny * (size_t) this->CurrentWarped->nz * (size_t) this->CurrentWarped->nt;
-	this->CurrentWarped->datatype = this->CurrentFloating->datatype;
-	this->CurrentWarped->nbyper = this->CurrentFloating->nbyper;
-	this->CurrentWarped->data = (void *) calloc(this->CurrentWarped->nvox, this->CurrentWarped->nbyper);
-	//this->floatingDatatype = this->CurrentFloating->datatype;
+    this->currentWarped = nifti_copy_nim_info(this->currentReference);
+    this->currentWarped->dim[0] = this->currentWarped->ndim = this->currentFloating->ndim;
+    this->currentWarped->dim[4] = this->currentWarped->nt = this->currentFloating->nt;
+    this->currentWarped->pixdim[4] = this->currentWarped->dt = 1.0;
+    this->currentWarped->nvox = (size_t)(this->currentWarped->nx * this->currentWarped->ny * this->currentWarped->nz * this->currentWarped->nt);
+    this->currentWarped->datatype = this->currentFloating->datatype;
+    this->currentWarped->nbyper = this->currentFloating->nbyper;
+    this->currentWarped->data = (void*)calloc(this->currentWarped->nvox, this->currentWarped->nbyper);
+    //this->floatingDatatype = this->currentFloating->datatype;
 }
 /* *************************************************************** */
-void AladinContent::AllocateDeformationField(size_t bytes)
-{
-	if (this->CurrentReference == NULL) {
-		reg_print_fct_error( "AladinContent::AllocateDeformationField()");
-		reg_print_msg_error("Reference image is not defined. Exit.");
-		reg_exit();
-	}
-	//ClearDeformationField();
+void AladinContent::AllocateDeformationField(size_t bytes) {
+    if (this->currentReference == nullptr) {
+        reg_print_fct_error("AladinContent::AllocateDeformationField()");
+        reg_print_msg_error("Reference image is not defined. Exit.");
+        reg_exit();
+    }
+    //ClearDeformationField();
 
-	this->CurrentDeformationField = nifti_copy_nim_info(this->CurrentReference);
-	this->CurrentDeformationField->dim[0] = this->CurrentDeformationField->ndim = 5;
-    if (this->CurrentReference->dim[0] == 2)
-        this->CurrentDeformationField->dim[3] = this->CurrentDeformationField->nz = 1;
-	this->CurrentDeformationField->dim[4] = this->CurrentDeformationField->nt = 1;
-	this->CurrentDeformationField->pixdim[4] = this->CurrentDeformationField->dt = 1.0;
-	if (this->CurrentReference->nz == 1)
-		this->CurrentDeformationField->dim[5] = this->CurrentDeformationField->nu = 2;
-	else
-		this->CurrentDeformationField->dim[5] = this->CurrentDeformationField->nu = 3;
-	this->CurrentDeformationField->pixdim[5] = this->CurrentDeformationField->du = 1.0;
-	this->CurrentDeformationField->dim[6] = this->CurrentDeformationField->nv = 1;
-	this->CurrentDeformationField->pixdim[6] = this->CurrentDeformationField->dv = 1.0;
-	this->CurrentDeformationField->dim[7] = this->CurrentDeformationField->nw = 1;
-	this->CurrentDeformationField->pixdim[7] = this->CurrentDeformationField->dw = 1.0;
-	this->CurrentDeformationField->nvox = (size_t) this->CurrentDeformationField->nx *
-			this->CurrentDeformationField->ny * this->CurrentDeformationField->nz *
-			this->CurrentDeformationField->nt * this->CurrentDeformationField->nu;
-	this->CurrentDeformationField->nbyper = bytes;
-	if (bytes == 4)
-		this->CurrentDeformationField->datatype = NIFTI_TYPE_FLOAT32;
-	else if (bytes == 8)
-		this->CurrentDeformationField->datatype = NIFTI_TYPE_FLOAT64;
-	else {
-		reg_print_fct_error( "AladinContent::AllocateDeformationField()");
-		reg_print_msg_error( "Only float or double are expected for the deformation field. Exit.");
-		reg_exit();
-	}
-	this->CurrentDeformationField->scl_slope = 1.f;
-	this->CurrentDeformationField->scl_inter = 0.f;
-	this->CurrentDeformationField->data = (void *) calloc(this->CurrentDeformationField->nvox, this->CurrentDeformationField->nbyper);
+    this->currentDeformationField = nifti_copy_nim_info(this->currentReference);
+    this->currentDeformationField->dim[0] = this->currentDeformationField->ndim = 5;
+    if (this->currentReference->dim[0] == 2)
+        this->currentDeformationField->dim[3] = this->currentDeformationField->nz = 1;
+    this->currentDeformationField->dim[4] = this->currentDeformationField->nt = 1;
+    this->currentDeformationField->pixdim[4] = this->currentDeformationField->dt = 1.0;
+    if (this->currentReference->nz == 1)
+        this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 2;
+    else
+        this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 3;
+    this->currentDeformationField->pixdim[5] = this->currentDeformationField->du = 1.0;
+    this->currentDeformationField->dim[6] = this->currentDeformationField->nv = 1;
+    this->currentDeformationField->pixdim[6] = this->currentDeformationField->dv = 1.0;
+    this->currentDeformationField->dim[7] = this->currentDeformationField->nw = 1;
+    this->currentDeformationField->pixdim[7] = this->currentDeformationField->dw = 1.0;
+    this->currentDeformationField->nvox = (size_t)this->currentDeformationField->nx *
+        this->currentDeformationField->ny * this->currentDeformationField->nz *
+        this->currentDeformationField->nt * this->currentDeformationField->nu;
+    this->currentDeformationField->nbyper = bytes;
+    if (bytes == 4)
+        this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT32;
+    else if (bytes == 8)
+        this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT64;
+    else {
+        reg_print_fct_error("AladinContent::AllocateDeformationField()");
+        reg_print_msg_error("Only float or double are expected for the deformation field. Exit.");
+        reg_exit();
+    }
+    this->currentDeformationField->scl_slope = 1;
+    this->currentDeformationField->scl_inter = 0;
+    this->currentDeformationField->data = (void*)calloc(this->currentDeformationField->nvox, this->currentDeformationField->nbyper);
 }
 /* *************************************************************** */
-void AladinContent::setCaptureRange(const int voxelCaptureRangeIn)
-{
-	this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn;
+void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) {
+    this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn;
 }
 /* *************************************************************** */
-void AladinContent::ClearDeformationField()
-{
-	if (this->CurrentDeformationField != NULL)
-		nifti_image_free(this->CurrentDeformationField);
-	this->CurrentDeformationField = NULL;
+void AladinContent::ClearDeformationField() {
+    if (this->currentDeformationField != nullptr)
+        nifti_image_free(this->currentDeformationField);
+    this->currentDeformationField = nullptr;
 }
 /* *************************************************************** */
-void AladinContent::ClearWarpedImage()
-{
-	if (this->CurrentWarped != NULL)
-		nifti_image_free(this->CurrentWarped);
-	this->CurrentWarped = NULL;
+void AladinContent::ClearWarpedImage() {
+    if (this->currentWarped != nullptr)
+        nifti_image_free(this->currentWarped);
+    this->currentWarped = nullptr;
 }
 /* *************************************************************** */
-bool AladinContent::isCurrentComputationDoubleCapable()
-{
-	return true;
+bool AladinContent::IsCurrentComputationDoubleCapable() {
+    return true;
 }
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index e4ba692b..c5276a2c 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -1,5 +1,4 @@
-#ifndef ALADINCONTENT_H_
-#define ALADINCONTENT_H_
+#pragma once
 
 #include <ctime>
 #include <iosfwd>
@@ -11,32 +10,31 @@
 
 class AladinContent {
 public:
-
 	AladinContent();
 	AladinContent(nifti_image *CurrentReferenceIn,
-					  nifti_image *CurrentFloatingIn,
-					  int *CurrentReferenceMaskIn,
-					  size_t byte,
-					  const unsigned int percentageOfBlocks,
-					  const unsigned int InlierLts,
-					  int BlockStepSize);
+				  nifti_image *CurrentFloatingIn,
+				  int *CurrentReferenceMaskIn,
+				  size_t byte,
+				  const unsigned int percentageOfBlocks,
+				  const unsigned int inlierLts,
+				  int blockStepSize);
 	AladinContent(nifti_image *CurrentReferenceIn,
-					  nifti_image *CurrentFloatingIn,
-					  int *CurrentReferenceMaskIn,
-					  size_t byte);
+				  nifti_image *CurrentFloatingIn,
+				  int *CurrentReferenceMaskIn,
+				  size_t byte);
 	AladinContent(nifti_image *CurrentReferenceIn,
-					  nifti_image *CurrentFloatingIn,
-					  int *CurrentReferenceMaskIn,
-					  mat44 *transMat,
-					  size_t byte,
-					  const unsigned int percentageOfBlocks,
-					  const unsigned int InlierLts,
-					  int BlockStepSize);
+				  nifti_image *CurrentFloatingIn,
+				  int *CurrentReferenceMaskIn,
+				  mat44 *transMat,
+				  size_t byte,
+				  const unsigned int percentageOfBlocks,
+				  const unsigned int inlierLts,
+				  int blockStepSize);
 	AladinContent(nifti_image *CurrentReferenceIn,
-					  nifti_image *CurrentFloatingIn,
-					  int *CurrentReferenceMaskIn,
-					  mat44 *transMat,
-					  size_t byte);
+				  nifti_image *CurrentFloatingIn,
+				  int *CurrentReferenceMaskIn,
+				  mat44 *transMat,
+				  size_t byte);
 
 	virtual ~AladinContent();
 
@@ -46,68 +44,59 @@ class AladinContent {
 	/* *************************************************************** */
 	void AllocateDeformationField(size_t bytes);
 	void ClearDeformationField();
-	virtual void initVars();
+	virtual void InitVars();
 
 	unsigned int floatingVoxels, referenceVoxels;
 
 	//getters
-	virtual nifti_image *getCurrentDeformationField()
-	{
-		return this->CurrentDeformationField;
+	virtual nifti_image* GetCurrentDeformationField() {
+		return this->currentDeformationField;
 	}
-	nifti_image *getCurrentReference()
-	{
-		return this->CurrentReference;
+	nifti_image* GetCurrentReference() {
+		return this->currentReference;
 	}
-	nifti_image *getCurrentFloating()
-	{
-		return this->CurrentFloating;
+	nifti_image* GetCurrentFloating() {
+		return this->currentFloating;
 	}
-	virtual nifti_image *getCurrentWarped(int = 0)
-	{
-		return this->CurrentWarped;
+	virtual nifti_image* GetCurrentWarped(int = 0) {
+		return this->currentWarped;
 	}
-	int *getCurrentReferenceMask()
-	{
-		return this->CurrentReferenceMask;
+	int* GetCurrentReferenceMask() {
+		return this->currentReferenceMask;
 	}
-	mat44 *getTransformationMatrix()
-	{
+	mat44* GetTransformationMatrix() {
 		return this->transformationMatrix;
 	}
-	virtual _reg_blockMatchingParam* getBlockMatchingParams() {
+	virtual _reg_blockMatchingParam* GetBlockMatchingParams() {
 		return this->blockMatchingParams;
 	}
 	//setters
-	virtual void setTransformationMatrix(mat44 *transformationMatrixIn)
-	{
+	virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
 		this->transformationMatrix = transformationMatrixIn;
 	}
-	virtual void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn)
-	{
-		this->CurrentDeformationField = CurrentDeformationFieldIn;
+	virtual void SetCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) {
+		this->currentDeformationField = CurrentDeformationFieldIn;
 	}
-	virtual void setCurrentWarped(nifti_image *CurrentWarpedImageIn)
-	{
-		this->CurrentWarped = CurrentWarpedImageIn;
+	virtual void SetCurrentWarped(nifti_image *CurrentWarpedImageIn) {
+		this->currentWarped = CurrentWarpedImageIn;
 	}
 
-	virtual void setCurrentReferenceMask(int *, size_t) {}
-	void setCaptureRange(const int captureRangeIn);
+	virtual void SetCurrentReferenceMask(int *, size_t) {}
+	void SetCaptureRange(const int captureRangeIn);
 	//
-	virtual void setBlockMatchingParams(_reg_blockMatchingParam* bmp) {
+	virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
 		blockMatchingParams = bmp;
 	}
 
-	virtual bool isCurrentComputationDoubleCapable();
+	virtual bool IsCurrentComputationDoubleCapable();
 
 protected:
-	nifti_image *CurrentReference;
-	nifti_image *CurrentFloating;
-	int *CurrentReferenceMask;
+	nifti_image *currentReference;
+	nifti_image *currentFloating;
+	int *currentReferenceMask;
 
-	nifti_image *CurrentDeformationField;
-	nifti_image *CurrentWarped;
+	nifti_image *currentDeformationField;
+	nifti_image *currentWarped;
 
 	mat44 *transformationMatrix;
 	mat44 refMatrix_xyz;
@@ -120,5 +109,3 @@ class AladinContent {
 	unsigned int inlierLts;
 	int stepSizeBlock;
 };
-
-#endif //ALADINCONTENT_H_
diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h
index d5c44cb0..b8271521 100644
--- a/reg-lib/BlockMatchingKernel.h
+++ b/reg-lib/BlockMatchingKernel.h
@@ -1,18 +1,15 @@
-#ifndef BLOCKMATCHINGKERNEL_H
-#define BLOCKMATCHINGKERNEL_H
+#pragma once
 
 #include "Kernel.h"
 
 class BlockMatchingKernel : public Kernel {
 public:
-    static std::string getName() {
+    static std::string GetName() {
         return "blockMatchingKernel";
     }
     BlockMatchingKernel(std::string name) : Kernel(name) {
 
     }
     virtual ~BlockMatchingKernel(){}
-    virtual void calculate() = 0;
+    virtual void Calculate() = 0;
 };
-
-#endif // BLOCKMATCHINGKERNEL_H
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 6eabb852..5b5505d9 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -144,18 +144,18 @@ set(_reg_aladin_files
   Platform.cpp
   Platform.h
   Kernel.h
-  cpu/CPUAffineDeformationFieldKernel.h
-  cpu/CPUAffineDeformationFieldKernel.cpp
-  cpu/CPUBlockMatchingKernel.h
-  cpu/CPUBlockMatchingKernel.cpp
-  cpu/CPUConvolutionKernel.h
-  cpu/CPUConvolutionKernel.cpp
-  cpu/CPUOptimiseKernel.h
-  cpu/CPUOptimiseKernel.cpp
-  cpu/CPUResampleImageKernel.h
-  cpu/CPUResampleImageKernel.cpp
-  cpu/CPUKernelFactory.cpp
-  cpu/CPUKernelFactory.h
+  cpu/CpuAffineDeformationFieldKernel.h
+  cpu/CpuAffineDeformationFieldKernel.cpp
+  cpu/CpuBlockMatchingKernel.h
+  cpu/CpuBlockMatchingKernel.cpp
+  cpu/CpuConvolutionKernel.h
+  cpu/CpuConvolutionKernel.cpp
+  cpu/CpuOptimiseKernel.h
+  cpu/CpuOptimiseKernel.cpp
+  cpu/CpuResampleImageKernel.h
+  cpu/CpuResampleImageKernel.cpp
+  cpu/CpuKernelFactory.cpp
+  cpu/CpuKernelFactory.h
 )
 set(_reg_aladin_libraries
   _reg_localTrans
@@ -185,12 +185,12 @@ install(FILES
         ConvolutionKernel.h
         OptimiseKernel.h
         ResampleImageKernel.h
-        cpu/CPUAffineDeformationFieldKernel.h
-        cpu/CPUBlockMatchingKernel.h
-        cpu/CPUConvolutionKernel.h
-        cpu/CPUOptimiseKernel.h
-        cpu/CPUResampleImageKernel.h
-        KernelFactory.h cpu/CPUKernelFactory.h DESTINATION include)
+        cpu/CpuAffineDeformationFieldKernel.h
+        cpu/CpuBlockMatchingKernel.h
+        cpu/CpuConvolutionKernel.h
+        cpu/CpuOptimiseKernel.h
+        cpu/CpuResampleImageKernel.h
+        KernelFactory.h cpu/CpuKernelFactory.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index 51adbc81..5c7d113c 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -1,18 +1,15 @@
-#ifndef CONVOLUTIONKERNEL_H
-#define CONVOLUTIONKERNEL_H
+#pragma once
 
 #include "Kernel.h"
 #include "nifti1_io.h"
 
 class ConvolutionKernel : public Kernel {
 public:
-    static std::string getName() {
+    static std::string GetName() {
         return "ConvolutionKernel";
     }
     ConvolutionKernel(std::string name) : Kernel(name) {
     }
     virtual ~ConvolutionKernel(){}
-    virtual void calculate(nifti_image *image, float *sigma, int kernelType, int *mask = NULL, bool *timePoints = NULL, bool *axis = NULL) = 0;
+    virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0;
 };
-
-#endif // CONVOLUTIONKERNEL_H
diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h
index 2184f08e..e5b7b031 100755
--- a/reg-lib/Kernel.h
+++ b/reg-lib/Kernel.h
@@ -1,25 +1,18 @@
-#ifndef KERNEL_H_
-#define KERNEL_H_
+#pragma once
 
 #include <iostream>
 #include <string>
 
 class Kernel {
 public:
+    Kernel(std::string nameIn) { name = nameIn; }
+    virtual ~Kernel() {}
 
+    std::string GetName() const;
 
-	Kernel(std::string nameIn){ name = nameIn; }
-	virtual ~Kernel(){}
+    template <class T>
+    T* castTo() { return dynamic_cast<T*>(this); }
 
-	std::string getName() const;
-	std::string name;
-
-	template <class T>
-	T* castTo() {
-		return dynamic_cast<T*>(this);
-	}
+private:
+    std::string name;
 };
-
-
-
-#endif /*KERNEL_H_*/
diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h
index 608c90f1..f7d99de2 100755
--- a/reg-lib/KernelFactory.h
+++ b/reg-lib/KernelFactory.h
@@ -1,16 +1,9 @@
-#ifndef KERNELFACTORY_H_
-#define KERNELFACTORY_H_
+#pragma once
 
 #include "AladinContent.h"
 
-class  KernelFactory {
+class KernelFactory {
 public:
-
-    virtual Kernel* produceKernel(std::string name, AladinContent* con) const = 0;
-    virtual ~KernelFactory() {
-    }
+    virtual Kernel* ProduceKernel(std::string name, AladinContent* con) const = 0;
+    virtual ~KernelFactory() {}
 };
-
-
-
-#endif /*KERNELFACTORY_H_*/
diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h
index 971f05ce..d0066298 100644
--- a/reg-lib/OptimiseKernel.h
+++ b/reg-lib/OptimiseKernel.h
@@ -1,17 +1,14 @@
-#ifndef OPTIMISEKERNEL_H
-#define OPTIMISEKERNEL_H
+#pragma once
 
 #include "Kernel.h"
 
 class OptimiseKernel : public Kernel{
 public:
-    static std::string getName() {
+    static std::string GetName() {
         return "OptimiseKernel";
     }
     OptimiseKernel(std::string name) : Kernel(name) {
     }
     virtual ~OptimiseKernel(){}
-    virtual void calculate(bool affine) = 0;
+    virtual void Calculate(bool affine) = 0;
 };
-
-#endif // OPTIMISEKERNEL_H
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 88517693..13aa6e64 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,101 +1,94 @@
 #include "Platform.h"
 #include "AladinContent.h"
 #include "KernelFactory.h"
-#include "CPUKernelFactory.h"
+#include "CpuKernelFactory.h"
 #ifdef _USE_CUDA
-#include "CUDAKernelFactory.h"
-#include "CUDAContextSingletton.h"
+#include "CudaKernelFactory.h"
+#include "CudaContextSingleton.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLKernelFactory.h"
-#include "CLContextSingletton.h"
+#include "ClKernelFactory.h"
+#include "ClContextSingleton.h"
 #endif
 
 using namespace std;
 
 /* *************************************************************** */
-Platform::Platform(int platformCode)
-{
+Platform::Platform(int platformCode) {
     this->platformCode = platformCode;
     if (platformCode == NR_PLATFORM_CPU) {
-        this->factory = new CPUKernelFactory();
+        this->factory = new CpuKernelFactory();
         this->platformName = "cpu_platform";
     }
 #ifdef _USE_CUDA
     else if (platformCode == NR_PLATFORM_CUDA) {
-        this->factory = new CUDAKernelFactory();
+        this->factory = new CudaKernelFactory();
         this->platformName = "cuda_platform";
     }
 #endif
 #ifdef _USE_OPENCL
     else if (platformCode == NR_PLATFORM_CL) {
-        this->factory = new CLKernelFactory();
+        this->factory = new ClKernelFactory();
         this->platformName = "cl_platform";
     }
 #endif
 }
 /* *************************************************************** */
-Kernel *Platform::createKernel(const string& name, AladinContent *con) const
-{
-    return this->factory->produceKernel(name, con);
+Kernel* Platform::CreateKernel(const string& name, AladinContent *con) const {
+    return this->factory->ProduceKernel(name, con);
 }
 /* *************************************************************** */
-std::string Platform::getName()
-{
+std::string Platform::GetName() {
     return this->platformName;
 }
 /* *************************************************************** */
-unsigned Platform::getGpuIdx()
-{
+unsigned Platform::GetGpuIdx() {
     return this->gpuIdx;
 }
 /* *************************************************************** */
-void Platform::setGpuIdx(unsigned gpuIdxIn)
-{
-    if(this->platformCode == NR_PLATFORM_CPU)
-    {
+void Platform::SetGpuIdx(unsigned gpuIdxIn) {
+    if (this->platformCode == NR_PLATFORM_CPU) {
         this->gpuIdx = 999;
     }
 #ifdef _USE_CUDA
-    else if(this->platformCode == NR_PLATFORM_CUDA) {
-            CUDAContextSingletton *cudaContext = &CUDAContextSingletton::Instance();
-            if(gpuIdxIn != 999) {
-                this->gpuIdx = gpuIdxIn;
-                cudaContext->setCudaIdx(gpuIdxIn);
-            }
+    else if (this->platformCode == NR_PLATFORM_CUDA) {
+        CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
+        if (gpuIdxIn != 999) {
+            this->gpuIdx = gpuIdxIn;
+            cudaContext->SetCudaIdx(gpuIdxIn);
         }
+    }
 #endif
 #ifdef _USE_OPENCL
-    else if(this->platformCode == NR_PLATFORM_CL) {
-            CLContextSingletton *sContext = &CLContextSingletton::Instance();
-            if(gpuIdxIn != 999) {
-                this->gpuIdx = gpuIdxIn;
-                sContext->setClIdx(gpuIdxIn);
-            }
+    else if (this->platformCode == NR_PLATFORM_CL) {
+        ClContextSingleton *sContext = &ClContextSingleton::Instance();
+        if (gpuIdxIn != 999) {
+            this->gpuIdx = gpuIdxIn;
+            sContext->SetClIdx(gpuIdxIn);
+        }
 
-            std::size_t paramValueSize;
-            sContext->checkErrNum(clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_TYPE, 0, NULL, &paramValueSize), "Failed to find OpenCL device info ");
-            cl_device_type *field = (cl_device_type *) alloca(sizeof(cl_device_type) * paramValueSize);
-            sContext->checkErrNum(clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, NULL), "Failed to find OpenCL device info ");
-            if(CL_DEVICE_TYPE_CPU==*field){
-                reg_print_fct_error("Platform::setClIdx");
-                reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
-                reg_exit();
-            }
+        std::size_t paramValueSize;
+        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
+        cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
+        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
+        if (CL_DEVICE_TYPE_CPU == *field) {
+            reg_print_fct_error("Platform::setClIdx");
+            reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
+            reg_exit();
         }
+    }
 #endif
 }
 /* *************************************************************** */
-int Platform::getPlatformCode() {
+int Platform::GetPlatformCode() {
     return this->platformCode;
 }
 /* *************************************************************** */
-//void Platform::setPlatformCode(const int platformCodeIn) {
+//void Platform::SetPlatformCode(const int platformCodeIn) {
 //    this->platformCode = platformCodeIn;
 //}
 /* *************************************************************** */
-Platform::~Platform()
-{
+Platform::~Platform() {
     delete this->factory;
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 07dc65e1..66ef2be1 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -1,5 +1,4 @@
-#ifndef PLATFORM_H_
-#define PLATFORM_H_
+#pragma once
 
 #include <map>
 #include <string>
@@ -15,24 +14,20 @@ class AladinContent;
 
 class Platform {
 public:
-	Platform(int platformCode);
-	virtual ~Platform();
+    Platform(int platformCode);
+    virtual ~Platform();
 
-    Kernel *createKernel(const std::string& name, AladinContent *con) const;
-    std::string getName();
+    Kernel* CreateKernel(const std::string& name, AladinContent *con) const;
+    std::string GetName();
 
-    int getPlatformCode();
-    //void setPlatformCode(const int platformCodeIn);
-    void setGpuIdx(unsigned gpuIdxIn);
-    unsigned getGpuIdx();
+    int GetPlatformCode();
+    //void SetPlatformCode(const int platformCodeIn);
+    void SetGpuIdx(unsigned gpuIdxIn);
+    unsigned GetGpuIdx();
 
 private:
-    KernelFactory* factory;
+    KernelFactory *factory;
     std::string platformName;
     int platformCode;
     unsigned gpuIdx;
 };
-
-
-
-#endif //PLATFORM_H_
diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h
index cfeb1a81..9ac7bfb5 100644
--- a/reg-lib/ResampleImageKernel.h
+++ b/reg-lib/ResampleImageKernel.h
@@ -1,12 +1,11 @@
-#ifndef RESAMPLEIMAGEKERNEL_H
-#define RESAMPLEIMAGEKERNEL_H
+#pragma once
 
 #include "Kernel.h"
 #include "nifti1_io.h"
 
 class ResampleImageKernel : public Kernel {
 public:
-    static std::string getName() {
+    static std::string GetName() {
         return "ResampleImageKernel";
     }
     ResampleImageKernel( std::string name) : Kernel(name) {
@@ -14,7 +13,5 @@ class ResampleImageKernel : public Kernel {
 
     virtual ~ResampleImageKernel(){}
 
-    virtual void calculate(int interp, float paddingValue, bool *dti_timepoint = NULL, mat33 * jacMat = NULL) = 0;
+    virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0;
 };
-
-#endif // RESAMPLEIMAGEKERNEL_H
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index fc9137cd..2ea21ec7 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -1,6 +1,3 @@
-#ifndef _REG_ALADIN_CPP
-#define _REG_ALADIN_CPP
-
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_aladin.h"
 #include "_reg_stringFormat.h"
@@ -13,7 +10,7 @@
 #include "AladinContent.h"
 
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -21,701 +18,656 @@
 #endif
 
 /* *************************************************************** */
-template<class T> reg_aladin<T>::reg_aladin()
-{
-  this->executableName = (char*) "Aladin";
-  this->InputReference = NULL;
-  this->InputFloating = NULL;
-  this->InputReferenceMask = NULL;
-  this->ReferencePyramid = NULL;
-  this->FloatingPyramid = NULL;
-  this->ReferenceMaskPyramid = NULL;
-  this->activeVoxelNumber = NULL;
+template<class T>
+reg_aladin<T>::reg_aladin() {
+    this->executableName = (char*)"Aladin";
+    this->inputReference = nullptr;
+    this->inputFloating = nullptr;
+    this->inputReferenceMask = nullptr;
+    this->referencePyramid = nullptr;
+    this->floatingPyramid = nullptr;
+    this->referenceMaskPyramid = nullptr;
+    this->activeVoxelNumber = nullptr;
 
-  this->TransformationMatrix = new mat44;
-  this->InputTransformName = NULL;
+    this->transformationMatrix = new mat44;
+    this->inputTransformName = nullptr;
 
-  this->affineTransformation3DKernel = NULL;
-  this->blockMatchingKernel = NULL;
-  this->optimiseKernel = NULL;
-  this->resamplingKernel = NULL;
+    this->affineTransformation3DKernel = nullptr;
+    this->blockMatchingKernel = nullptr;
+    this->optimiseKernel = nullptr;
+    this->resamplingKernel = nullptr;
 
-  this->con = NULL;
-  this->blockMatchingParams = NULL;
-  this->platform = NULL;
+    this->con = nullptr;
+    this->blockMatchingParams = nullptr;
+    this->platform = nullptr;
 
-  this->Verbose = true;
+    this->verbose = true;
 
-  this->MaxIterations = 5;
+    this->maxIterations = 5;
 
-  this->NumberOfLevels = 3;
-  this->LevelsToPerform = 3;
+    this->numberOfLevels = 3;
+    this->levelsToPerform = 3;
 
-  this->PerformRigid = 1;
-  this->PerformAffine = 1;
+    this->performRigid = 1;
+    this->performAffine = 1;
 
-  this->BlockStepSize = 1;
-  this->BlockPercentage = 50;
-  this->InlierLts = 50;
+    this->blockStepSize = 1;
+    this->blockPercentage = 50;
+    this->inlierLts = 50;
 
-  this->AlignCentre = 1;
-  this->AlignCentreMass = 0;
+    this->alignCentre = 1;
+    this->alignCentreMass = 0;
 
-  this->Interpolation = 1;
+    this->interpolation = 1;
 
-  this->FloatingSigma = 0.0;
-  this->ReferenceSigma = 0.0;
+    this->floatingSigma = 0.0;
+    this->referenceSigma = 0.0;
 
-  this->ReferenceUpperThreshold = std::numeric_limits<T>::max();
-  this->ReferenceLowerThreshold = -std::numeric_limits<T>::max();
+    this->referenceUpperThreshold = std::numeric_limits<T>::max();
+    this->referenceLowerThreshold = -std::numeric_limits<T>::max();
 
-  this->FloatingUpperThreshold = std::numeric_limits<T>::max();
-  this->FloatingLowerThreshold = -std::numeric_limits<T>::max();
+    this->floatingUpperThreshold = std::numeric_limits<T>::max();
+    this->floatingLowerThreshold = -std::numeric_limits<T>::max();
 
-  this->WarpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
+    this->warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
 
-  this->funcProgressCallback = NULL;
-  this->paramsProgressCallback = NULL;
+    this->funcProgressCallback = nullptr;
+    this->paramsProgressCallback = nullptr;
 
-  this->platformCode = NR_PLATFORM_CPU;
-  this->CurrentLevel = 0;
-  this->gpuIdx = 999;
+    this->platformCode = NR_PLATFORM_CPU;
+    this->currentLevel = 0;
+    this->gpuIdx = 999;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_aladin constructor called");
+    reg_print_msg_debug("reg_aladin constructor called");
 #endif
 }
 /* *************************************************************** */
-template<class T> reg_aladin<T>::~reg_aladin()
-{
-  if (this->TransformationMatrix != NULL)
-    delete this->TransformationMatrix;
-  this->TransformationMatrix = NULL;
-
-  if(this->ReferencePyramid!=NULL){
-    for (unsigned int l = 0; l < this->LevelsToPerform; ++l)
-    {
-      if(this->ReferencePyramid[l] != NULL)
-        nifti_image_free(this->ReferencePyramid[l]);
-      this->ReferencePyramid[l] = NULL;
+template<class T>
+reg_aladin<T>::~reg_aladin() {
+    if (this->transformationMatrix != nullptr)
+        delete this->transformationMatrix;
+    this->transformationMatrix = nullptr;
+
+    if (this->referencePyramid != nullptr) {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            if (this->referencePyramid[l] != nullptr)
+                nifti_image_free(this->referencePyramid[l]);
+            this->referencePyramid[l] = nullptr;
+        }
+        free(this->referencePyramid);
+        this->referencePyramid = nullptr;
     }
-    free(this->ReferencePyramid);
-    this->ReferencePyramid = NULL;
-  }
-  if(this->FloatingPyramid!=NULL){
-    for (unsigned int l = 0; l < this->LevelsToPerform; ++l)
-    {
-      if(this->FloatingPyramid[l] != NULL)
-        nifti_image_free(this->FloatingPyramid[l]);
-      this->FloatingPyramid[l] = NULL;
+    if (this->floatingPyramid != nullptr) {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            if (this->floatingPyramid[l] != nullptr)
+                nifti_image_free(this->floatingPyramid[l]);
+            this->floatingPyramid[l] = nullptr;
+        }
+        free(this->floatingPyramid);
+        this->floatingPyramid = nullptr;
     }
-    free(this->FloatingPyramid);
-    this->FloatingPyramid = NULL;
-  }
-  if(this->ReferenceMaskPyramid!=NULL){
-    for (unsigned int l = 0; l < this->LevelsToPerform; ++l)
-    {
-      if(this->ReferenceMaskPyramid[l] != NULL)
-        free(this->ReferenceMaskPyramid[l]);
-      this->ReferenceMaskPyramid[l] = NULL;
+    if (this->referenceMaskPyramid != nullptr) {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            if (this->referenceMaskPyramid[l] != nullptr)
+                free(this->referenceMaskPyramid[l]);
+            this->referenceMaskPyramid[l] = nullptr;
+        }
+        free(this->referenceMaskPyramid);
+        this->referenceMaskPyramid = nullptr;
     }
-    free(this->ReferenceMaskPyramid);
-    this->ReferenceMaskPyramid = NULL;
-  }
-  if(this->activeVoxelNumber!=NULL)
-    free(this->activeVoxelNumber);
-  if(this->platform!=NULL)
-    delete this->platform;
+    if (this->activeVoxelNumber != nullptr)
+        free(this->activeVoxelNumber);
+    if (this->platform != nullptr)
+        delete this->platform;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_aladin destructor called");
+    reg_print_msg_debug("reg_aladin destructor called");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-bool reg_aladin<T>::TestMatrixConvergence(mat44 *mat)
-{
-  bool convergence = true;
-  if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS)
-    convergence = false;
-
-  if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-
-  if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-
-  if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-  if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS)
-    convergence = false;
-
-  return convergence;
+bool reg_aladin<T>::TestMatrixConvergence(mat44 *mat) {
+    bool convergence = true;
+    if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS)
+        convergence = false;
+
+    if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+
+    if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+
+    if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+    if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS)
+        convergence = false;
+
+    return convergence;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::SetVerbose(bool _verbose)
-{
-  this->Verbose = _verbose;
+void reg_aladin<T>::SetVerbose(bool _verbose) {
+    this->verbose = _verbose;
 }
 /* *************************************************************** */
 template<class T>
-int reg_aladin<T>::Check()
-{
-  //This does all the initial checking
-  if (this->InputReference == NULL)
-  {
-    reg_print_fct_error("reg_aladin<T>::Check()");
-    reg_print_msg_error("No reference image has been specified or it can not be read");
-    return EXIT_FAILURE;
-  }
-
-  if (this->InputFloating == NULL)
-  {
-    reg_print_fct_error("reg_aladin<T>::Check()");
-    reg_print_msg_error("No floating image has been specified or it can not be read");
-    return EXIT_FAILURE;
-  }
-
-  return EXIT_SUCCESS;
+int reg_aladin<T>::Check() {
+    //This does all the initial checking
+    if (this->inputReference == nullptr) {
+        reg_print_fct_error("reg_aladin<T>::Check()");
+        reg_print_msg_error("No reference image has been specified or it can not be read");
+        return EXIT_FAILURE;
+    }
+
+    if (this->inputFloating == nullptr) {
+        reg_print_fct_error("reg_aladin<T>::Check()");
+        reg_print_msg_error("No floating image has been specified or it can not be read");
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template<class T>
-int reg_aladin<T>::Print()
-{
-  if (this->InputReference == NULL)
-  {
-    reg_print_fct_error("reg_aladin<T>::Print()");
-    reg_print_msg_error("No reference image has been specified");
-    return EXIT_FAILURE;
-  }
-  if (this->InputFloating == NULL)
-  {
-    reg_print_fct_error("reg_aladin<T>::Print()");
-    reg_print_msg_error("No floating image has been specified");
-    return EXIT_FAILURE;
-  }
-
-  /* *********************************** */
-  /* DISPLAY THE REGISTRATION PARAMETERS */
-  /* *********************************** */
+int reg_aladin<T>::Print() {
+    if (this->inputReference == nullptr) {
+        reg_print_fct_error("reg_aladin<T>::Print()");
+        reg_print_msg_error("No reference image has been specified");
+        return EXIT_FAILURE;
+    }
+    if (this->inputFloating == nullptr) {
+        reg_print_fct_error("reg_aladin<T>::Print()");
+        reg_print_msg_error("No floating image has been specified");
+        return EXIT_FAILURE;
+    }
+
+    /* *********************************** */
+    /* DISPLAY THE REGISTRATION PARAMETERS */
+    /* *********************************** */
 #ifdef NDEBUG
-  if(this->Verbose)
-  {
+    if (this->verbose) {
 #endif
-    std::string text;
-    reg_print_info(this->executableName, "Parameters");
-    text = stringFormat("Platform: %s", this->platform->getName().c_str());
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("Reference image name: %s", this->InputReference->fname);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("\t%ix%ix%i voxels", this->InputReference->nx, this->InputReference->ny, this->InputReference->nz);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("\t%gx%gx%g mm", this->InputReference->dx, this->InputReference->dy, this->InputReference->dz);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("Floating image name: %s", this->InputFloating->fname);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("\t%ix%ix%i voxels", this->InputFloating->nx, this->InputFloating->ny, this->InputFloating->nz);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("\t%gx%gx%g mm", this->InputFloating->dx, this->InputFloating->dy, this->InputFloating->dz);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("Maximum iteration number: %i", this->MaxIterations);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("\t(%i during the first level)", 2 * this->MaxIterations);
-    reg_print_info(this->executableName, text.c_str());
-    text = stringFormat("Percentage of blocks: %i %%", this->BlockPercentage);
-    reg_print_info(this->executableName, text.c_str());
-    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+        std::string text;
+        reg_print_info(this->executableName, "Parameters");
+        text = stringFormat("Platform: %s", this->platform->GetName().c_str());
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("Reference image name: %s", this->inputReference->fname);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t%ix%ix%i voxels", this->inputReference->nx, this->inputReference->ny, this->inputReference->nz);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t%gx%gx%g mm", this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("Floating image name: %s", this->inputFloating->fname);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t%ix%ix%i voxels", this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t%gx%gx%g mm", this->inputFloating->dx, this->inputFloating->dy, this->inputFloating->dz);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("Maximum iteration number: %i", this->maxIterations);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t(%i during the first level)", 2 * this->maxIterations);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("Percentage of blocks: %i %%", this->blockPercentage);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 #ifdef NDEBUG
-  }
+    }
 #endif
-  return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::SetInputTransform(const char *filename)
-{
-  this->InputTransformName = (char *) filename;
-  return;
+void reg_aladin<T>::SetInputTransform(const char *filename) {
+    this->inputTransformName = (char*)filename;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::InitialiseRegistration()
-{
+void reg_aladin<T>::InitialiseRegistration() {
 #ifndef NDEBUG
-  reg_print_fct_debug("reg_aladin::InitialiseRegistration()");
+    reg_print_fct_debug("reg_aladin::InitialiseRegistration()");
 #endif
 
-  this->platform = new Platform(this->platformCode);
-  this->platform->setGpuIdx(this->gpuIdx);
-
-  this->Print();
-
-  // CREATE THE PYRAMID IMAGES
-  this->ReferencePyramid = (nifti_image **) malloc(this->LevelsToPerform * sizeof(nifti_image *));
-  this->FloatingPyramid = (nifti_image **) malloc(this->LevelsToPerform * sizeof(nifti_image *));
-  this->ReferenceMaskPyramid = (int **) malloc(this->LevelsToPerform * sizeof(int *));
-  this->activeVoxelNumber = (int *) malloc(this->LevelsToPerform * sizeof(int));
-
-  // FINEST LEVEL OF REGISTRATION
-  reg_createImagePyramid<T>(this->InputReference,
-                            this->ReferencePyramid,
-                            this->NumberOfLevels,
-                            this->LevelsToPerform);
-  reg_createImagePyramid<T>(this->InputFloating,
-                            this->FloatingPyramid,
-                            this->NumberOfLevels,
-                            this->LevelsToPerform);
-
-  if (this->InputReferenceMask != NULL)
-    reg_createMaskPyramid<T>(this->InputReferenceMask,
-                             this->ReferenceMaskPyramid,
-                             this->NumberOfLevels,
-                             this->LevelsToPerform,
-                             this->activeVoxelNumber);
-  else {
-    for (unsigned int l = 0; l < this->LevelsToPerform; ++l) {
-      this->activeVoxelNumber[l] = this->ReferencePyramid[l]->nx * this->ReferencePyramid[l]->ny * this->ReferencePyramid[l]->nz;
-      this->ReferenceMaskPyramid[l] = (int *) calloc(activeVoxelNumber[l], sizeof(int));
-    }
-  }
-
-  Kernel *convolutionKernel = this->platform->createKernel(ConvolutionKernel::getName(), NULL);
-  // SMOOTH THE INPUT IMAGES IF REQUIRED
-  for (unsigned int l = 0; l < this->LevelsToPerform; l++) {
-    if (this->ReferenceSigma != 0.0) {
-      // Only the first image is smoothed
-      bool *active = new bool[this->ReferencePyramid[l]->nt];
-      float *sigma = new float[this->ReferencePyramid[l]->nt];
-      active[0] = true;
-      for (int i = 1; i < this->ReferencePyramid[l]->nt; ++i)
-        active[i] = false;
-      sigma[0] = this->ReferenceSigma;
-      convolutionKernel->castTo<ConvolutionKernel>()->calculate(this->ReferencePyramid[l], sigma, 0, NULL, active);
-      delete[] active;
-      delete[] sigma;
+    this->platform = new Platform(this->platformCode);
+    this->platform->SetGpuIdx(this->gpuIdx);
+
+    this->Print();
+
+    // CREATE THE PYRAMID IMAGES
+    this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
+    this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
+    this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *));
+    this->activeVoxelNumber = (int *)malloc(this->levelsToPerform * sizeof(int));
+
+    // FINEST LEVEL OF REGISTRATION
+    reg_createImagePyramid<T>(this->inputReference,
+                              this->referencePyramid,
+                              this->numberOfLevels,
+                              this->levelsToPerform);
+    reg_createImagePyramid<T>(this->inputFloating,
+                              this->floatingPyramid,
+                              this->numberOfLevels,
+                              this->levelsToPerform);
+
+    if (this->inputReferenceMask != nullptr)
+        reg_createMaskPyramid<T>(this->inputReferenceMask,
+                                 this->referenceMaskPyramid,
+                                 this->numberOfLevels,
+                                 this->levelsToPerform,
+                                 this->activeVoxelNumber);
+    else {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
+            this->referenceMaskPyramid[l] = (int *)calloc(activeVoxelNumber[l], sizeof(int));
+        }
     }
-    if (this->FloatingSigma != 0.0) {
-      // Only the first image is smoothed
-      bool *active = new bool[this->FloatingPyramid[l]->nt];
-      float *sigma = new float[this->FloatingPyramid[l]->nt];
-      active[0] = true;
-      for (int i = 1; i < this->FloatingPyramid[l]->nt; ++i)
-        active[i] = false;
-      sigma[0] = this->FloatingSigma;
-      convolutionKernel->castTo<ConvolutionKernel>()->calculate(this->FloatingPyramid[l], sigma, 0, NULL, active);
-      delete[] active;
-      delete[] sigma;
-    }
-  }
-  delete convolutionKernel;
-
-  // THRESHOLD THE INPUT IMAGES IF REQUIRED
-  for(unsigned int l=0; l<this->LevelsToPerform; l++)
-  {
-    reg_thresholdImage<T>(this->ReferencePyramid[l],this->ReferenceLowerThreshold, this->ReferenceUpperThreshold);
-    reg_thresholdImage<T>(this->FloatingPyramid[l],this->FloatingLowerThreshold, this->FloatingUpperThreshold);
-  }
-
-  // Initialise the transformation
-  if (this->InputTransformName != NULL)
-  {
-    if (FILE *aff = fopen(this->InputTransformName, "r")) {
-      fclose(aff);
-    }
-    else
-    {
-      std::string text;
-      text = stringFormat("The specified input affine file (%s) can not be read", this->InputTransformName);
-      reg_print_fct_error("reg_aladin<T>::InitialiseRegistration()");
-      reg_print_msg_error(text.c_str());
-      reg_exit();
+
+    Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr);
+    // SMOOTH THE INPUT IMAGES IF REQUIRED
+    for (unsigned int l = 0; l < this->levelsToPerform; l++) {
+        if (this->referenceSigma != 0.0) {
+            // Only the first image is smoothed
+            bool *active = new bool[this->referencePyramid[l]->nt];
+            float *sigma = new float[this->referencePyramid[l]->nt];
+            active[0] = true;
+            for (int i = 1; i < this->referencePyramid[l]->nt; ++i)
+                active[i] = false;
+            sigma[0] = this->referenceSigma;
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->referencePyramid[l], sigma, 0, nullptr, active);
+            delete[] active;
+            delete[] sigma;
+        }
+        if (this->floatingSigma != 0.0) {
+            // Only the first image is smoothed
+            bool *active = new bool[this->floatingPyramid[l]->nt];
+            float *sigma = new float[this->floatingPyramid[l]->nt];
+            active[0] = true;
+            for (int i = 1; i < this->floatingPyramid[l]->nt; ++i)
+                active[i] = false;
+            sigma[0] = this->floatingSigma;
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->floatingPyramid[l], sigma, 0, nullptr, active);
+            delete[] active;
+            delete[] sigma;
+        }
     }
-    reg_tool_ReadAffineFile(this->TransformationMatrix, this->InputTransformName);
-  }
-  else  // No input affine transformation
-  {
-    for (int i = 0; i < 4; i++) {
-      for (int j = 0; j < 4; j++) {
-        this->TransformationMatrix->m[i][j] = 0.0;
-      }
-      this->TransformationMatrix->m[i][i] = 1.0;
+    delete convolutionKernel;
+
+    // THRESHOLD THE INPUT IMAGES IF REQUIRED
+    for (unsigned int l = 0; l < this->levelsToPerform; l++) {
+        reg_thresholdImage<T>(this->referencePyramid[l], this->referenceLowerThreshold, this->referenceUpperThreshold);
+        reg_thresholdImage<T>(this->floatingPyramid[l], this->floatingLowerThreshold, this->floatingUpperThreshold);
     }
-    if (this->AlignCentre && this->AlignCentreMass==0)
-    {
-      const mat44 *floatingMatrix = (this->InputFloating->sform_code > 0) ? &(this->InputFloating->sto_xyz) : &(this->InputFloating->qto_xyz);
-      const mat44 *referenceMatrix = (this->InputReference->sform_code > 0) ? &(this->InputReference->sto_xyz) : &(this->InputReference->qto_xyz);
-      //In pixel coordinates
-      float floatingCenter[3];
-      floatingCenter[0] = (float) (this->InputFloating->nx) / 2.0f;
-      floatingCenter[1] = (float) (this->InputFloating->ny) / 2.0f;
-      floatingCenter[2] = (float) (this->InputFloating->nz) / 2.0f;
-      float referenceCenter[3];
-      referenceCenter[0] = (float) (this->InputReference->nx) / 2.0f;
-      referenceCenter[1] = (float) (this->InputReference->ny) / 2.0f;
-      referenceCenter[2] = (float) (this->InputReference->nz) / 2.0f;
-      //From pixel coordinates to real coordinates
-      float floatingRealPosition[3];
-      reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition);
-      float referenceRealPosition[3];
-      reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition);
-      //Set translation to the transformation matrix
-      this->TransformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0];
-      this->TransformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
-      this->TransformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2];
+
+    // Initialise the transformation
+    if (this->inputTransformName != nullptr) {
+        if (FILE *aff = fopen(this->inputTransformName, "r")) {
+            fclose(aff);
+        } else {
+            std::string text;
+            text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName);
+            reg_print_fct_error("reg_aladin<T>::InitialiseRegistration()");
+            reg_print_msg_error(text.c_str());
+            reg_exit();
+        }
+        reg_tool_ReadAffineFile(this->transformationMatrix, this->inputTransformName);
+    } else { // No input affine transformation
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++) {
+                this->transformationMatrix->m[i][j] = 0.0;
+            }
+            this->transformationMatrix->m[i][i] = 1.0;
+        }
+        if (this->alignCentre && this->alignCentreMass == 0) {
+            const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz);
+            const mat44 *referenceMatrix = (this->inputReference->sform_code > 0) ? &(this->inputReference->sto_xyz) : &(this->inputReference->qto_xyz);
+            //In pixel coordinates
+            float floatingCenter[3];
+            floatingCenter[0] = (float)(this->inputFloating->nx) / 2.0f;
+            floatingCenter[1] = (float)(this->inputFloating->ny) / 2.0f;
+            floatingCenter[2] = (float)(this->inputFloating->nz) / 2.0f;
+            float referenceCenter[3];
+            referenceCenter[0] = (float)(this->inputReference->nx) / 2.0f;
+            referenceCenter[1] = (float)(this->inputReference->ny) / 2.0f;
+            referenceCenter[2] = (float)(this->inputReference->nz) / 2.0f;
+            //From pixel coordinates to real coordinates
+            float floatingRealPosition[3];
+            reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition);
+            float referenceRealPosition[3];
+            reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition);
+            //Set translation to the transformation matrix
+            this->transformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0];
+            this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
+            this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2];
+        } else if (this->alignCentreMass == 2) {
+            float referenceCentre[3] = {0, 0, 0};
+            float referenceCount = 0;
+            reg_tools_changeDatatype<float>(this->inputReference);
+            float *refPtr = static_cast<float *>(this->inputReference->data);
+            size_t refIndex = 0;
+            for (int z = 0; z < this->inputReference->nz; ++z) {
+                for (int y = 0; y < this->inputReference->ny; ++y) {
+                    for (int x = 0; x < this->inputReference->nx; ++x) {
+                        float value = refPtr[refIndex];
+                        referenceCentre[0] += (float)x * value;
+                        referenceCentre[1] += (float)y * value;
+                        referenceCentre[2] += (float)z * value;
+                        referenceCount += value;
+                        refIndex++;
+                    }
+                }
+            }
+            referenceCentre[0] /= referenceCount;
+            referenceCentre[1] /= referenceCount;
+            referenceCentre[2] /= referenceCount;
+            float refCOM[3];
+            if (this->inputReference->sform_code > 0)
+                reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM);
+
+            float floatingCentre[3] = {0, 0, 0};
+            float floatingCount = 0;
+            reg_tools_changeDatatype<float>(this->inputFloating);
+            float *floPtr = static_cast<float *>(this->inputFloating->data);
+            size_t floIndex = 0;
+            for (int z = 0; z < this->inputFloating->nz; ++z) {
+                for (int y = 0; y < this->inputFloating->ny; ++y) {
+                    for (int x = 0; x < this->inputFloating->nx; ++x) {
+                        float value = floPtr[floIndex];
+                        floatingCentre[0] += (float)x * value;
+                        floatingCentre[1] += (float)y * value;
+                        floatingCentre[2] += (float)z * value;
+                        floatingCount += value;
+                        floIndex++;
+                    }
+                }
+            }
+            floatingCentre[0] /= floatingCount;
+            floatingCentre[1] /= floatingCount;
+            floatingCentre[2] /= floatingCount;
+            float floCOM[3];
+            if (this->inputFloating->sform_code > 0)
+                reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM);
+            reg_mat44_eye(this->transformationMatrix);
+            this->transformationMatrix->m[0][3] = floCOM[0] - refCOM[0];
+            this->transformationMatrix->m[1][3] = floCOM[1] - refCOM[1];
+            this->transformationMatrix->m[2][3] = floCOM[2] - refCOM[2];
+        }
     }
-	else if (this->AlignCentreMass == 2)
-	{
-		float referenceCentre[3] = { 0,0,0 };
-		float referenceCount = 0;
-		reg_tools_changeDatatype<float>(this->InputReference);
-		float *refPtr = static_cast<float *>(this->InputReference->data);
-		size_t refIndex = 0;
-		for (int z = 0; z < this->InputReference->nz; ++z) {
-			for (int y = 0; y < this->InputReference->ny; ++y) {
-				for (int x = 0; x < this->InputReference->nx; ++x) {
-					float value = refPtr[refIndex];
-					referenceCentre[0] += (float)x * value;
-					referenceCentre[1] += (float)y * value;
-					referenceCentre[2] += (float)z * value;
-					referenceCount+=value;
-					refIndex++;
-				}
-			}
-		}
-		referenceCentre[0] /= referenceCount;
-		referenceCentre[1] /= referenceCount;
-		referenceCentre[2] /= referenceCount;
-		float refCOM[3];
-		if (this->InputReference->sform_code > 0)
-			reg_mat44_mul(&(this->InputReference->sto_xyz), referenceCentre, refCOM);
-
-		float floatingCentre[3] = { 0,0,0 };
-		float floatingCount = 0;
-		reg_tools_changeDatatype<float>(this->InputFloating);
-		float *floPtr = static_cast<float *>(this->InputFloating->data);
-		size_t floIndex = 0;
-		for (int z = 0; z < this->InputFloating->nz; ++z) {
-			for (int y = 0; y < this->InputFloating->ny; ++y) {
-				for (int x = 0; x < this->InputFloating->nx; ++x) {
-					float value = floPtr[floIndex];
-					floatingCentre[0] += (float)x * value;
-					floatingCentre[1] += (float)y * value;
-					floatingCentre[2] += (float)z * value;
-					floatingCount += value;
-					floIndex++;
-				}
-			}
-		}
-		floatingCentre[0] /= floatingCount;
-		floatingCentre[1] /= floatingCount;
-		floatingCentre[2] /= floatingCount;
-		float floCOM[3];
-		if (this->InputFloating->sform_code > 0)
-			reg_mat44_mul(&(this->InputFloating->sto_xyz), floatingCentre, floCOM);
-		reg_mat44_eye(this->TransformationMatrix);
-		this->TransformationMatrix->m[0][3] = floCOM[0] - refCOM[0];
-		this->TransformationMatrix->m[1][3] = floCOM[1] - refCOM[1];
-		this->TransformationMatrix->m[2][3] = floCOM[2] - refCOM[2];
-	}
-  }
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::ClearCurrentInputImage()
-{
-  nifti_image_free(this->ReferencePyramid[this->CurrentLevel]);
-  this->ReferencePyramid[this->CurrentLevel] = NULL;
+void reg_aladin<T>::ClearCurrentInputImage() {
+    nifti_image_free(this->referencePyramid[this->currentLevel]);
+    this->referencePyramid[this->currentLevel] = nullptr;
 
-  nifti_image_free(this->FloatingPyramid[this->CurrentLevel]);
-  this->FloatingPyramid[this->CurrentLevel] = NULL;
+    nifti_image_free(this->floatingPyramid[this->currentLevel]);
+    this->floatingPyramid[this->currentLevel] = nullptr;
 
-  free(this->ReferenceMaskPyramid[this->CurrentLevel]);
-  this->ReferenceMaskPyramid[this->CurrentLevel] = NULL;
+    free(this->referenceMaskPyramid[this->currentLevel]);
+    this->referenceMaskPyramid[this->currentLevel] = nullptr;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::createKernels()
-{
-  this->affineTransformation3DKernel = platform->createKernel(AffineDeformationFieldKernel::getName(), this->con);
-  this->resamplingKernel = platform->createKernel(ResampleImageKernel::getName(), this->con);
-  if (this->blockMatchingParams != NULL) {
-    this->blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), this->con);
-    this->optimiseKernel = platform->createKernel(OptimiseKernel::getName(), this->con);
-  } else {
-    this->blockMatchingKernel = NULL;
-    this->optimiseKernel = NULL;
-  }
+void reg_aladin<T>::CreateKernels() {
+    this->affineTransformation3DKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con);
+    this->resamplingKernel = platform->CreateKernel(ResampleImageKernel::GetName(), this->con);
+    if (this->blockMatchingParams != nullptr) {
+        this->blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), this->con);
+        this->optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), this->con);
+    } else {
+        this->blockMatchingKernel = nullptr;
+        this->optimiseKernel = nullptr;
+    }
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::clearKernels()
-{
-  delete this->affineTransformation3DKernel;
-  delete this->resamplingKernel;
-  if (this->blockMatchingKernel != NULL)
-    delete this->blockMatchingKernel;
-  if (this->optimiseKernel != NULL)
-    delete this->optimiseKernel;
+void reg_aladin<T>::ClearKernels() {
+    delete this->affineTransformation3DKernel;
+    delete this->resamplingKernel;
+    if (this->blockMatchingKernel != nullptr)
+        delete this->blockMatchingKernel;
+    if (this->optimiseKernel != nullptr)
+        delete this->optimiseKernel;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::GetDeformationField()
-{
-  this->affineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->calculate();
+void reg_aladin<T>::GetDeformationField() {
+    this->affineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->Calculate();
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::GetWarpedImage(int interp, float padding)
-{
-  this->GetDeformationField();
-  this->resamplingKernel->template castTo<ResampleImageKernel>()->calculate(interp, padding);
+void reg_aladin<T>::GetWarpedImage(int interp, float padding) {
+    this->GetDeformationField();
+    this->resamplingKernel->template castTo<ResampleImageKernel>()->Calculate(interp, padding);
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::UpdateTransformationMatrix(int type)
-{
-  this->blockMatchingKernel->template castTo<BlockMatchingKernel>()->calculate();
-  this->optimiseKernel->template castTo<OptimiseKernel>()->calculate(type);
+void reg_aladin<T>::UpdateTransformationMatrix(int type) {
+    this->blockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
+    this->optimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
 
 #ifndef NDEBUG
-  reg_mat44_disp(this->TransformationMatrix, (char *) "[NiftyReg DEBUG] updated forward matrix");
+    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward matrix");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::initAladinContent(nifti_image *ref,
+void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       nifti_image *flo,
                                       int *mask,
                                       mat44 *transMat,
                                       size_t bytes,
                                       unsigned int blockPercentage,
                                       unsigned int inlierLts,
-                                      unsigned int blockStepSize)
-{
-  if (this->platformCode == NR_PLATFORM_CPU)
-    this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
+                                      unsigned int blockStepSize) {
+    if (this->platformCode == NR_PLATFORM_CPU)
+        this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #ifdef _USE_CUDA
-  else if(platformCode == NR_PLATFORM_CUDA)
-    this->con = new CudaAladinContent(ref, flo, mask,transMat, bytes, blockPercentage, inlierLts, blockStepSize);
+    else if (platformCode == NR_PLATFORM_CUDA)
+        this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
 #ifdef _USE_OPENCL
-  else if(platformCode == NR_PLATFORM_CL)
-    this->con = new ClAladinContent(ref, flo, mask,transMat, bytes, blockPercentage, inlierLts, blockStepSize);
+    else if (platformCode == NR_PLATFORM_CL)
+        this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
-  this->blockMatchingParams = this->con->AladinContent::getBlockMatchingParams();
+    this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::initAladinContent(nifti_image *ref,
+void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       nifti_image *flo,
                                       int *mask,
                                       mat44 *transMat,
-                                      size_t bytes)
-{
-  if (this->platformCode == NR_PLATFORM_CPU)
-    this->con = new AladinContent(ref, flo, mask, transMat, bytes);
+                                      size_t bytes) {
+    if (this->platformCode == NR_PLATFORM_CPU)
+        this->con = new AladinContent(ref, flo, mask, transMat, bytes);
 #ifdef _USE_CUDA
-  else if(platformCode == NR_PLATFORM_CUDA)
-    this->con = new CudaAladinContent(ref, flo, mask,transMat, bytes);
+    else if (platformCode == NR_PLATFORM_CUDA)
+        this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes);
 #endif
 #ifdef _USE_OPENCL
-  else if(platformCode == NR_PLATFORM_CL)
-    this->con = new ClAladinContent(ref, flo, mask,transMat, bytes);
+    else if (platformCode == NR_PLATFORM_CL)
+        this->con = new ClAladinContent(ref, flo, mask, transMat, bytes);
 #endif
-  this->blockMatchingParams = this->con->AladinContent::getBlockMatchingParams();
+    this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::clearAladinContent()
-{
-  delete this->con;
+void reg_aladin<T>::ClearAladinContent() {
+    delete this->con;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::resolveMatrix(unsigned int iterations, const unsigned int optimizationFlag)
-{
-  unsigned int iteration = 0;
-  while (iteration < iterations) {
+void reg_aladin<T>::ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag) {
+    unsigned int iteration = 0;
+    while (iteration < iterations) {
 #ifndef NDEBUG
-    char text[255];
-    sprintf(text, "%s - level: %i/%i - iteration %i/%i",
-            optimizationFlag ? (char *)"Affine" : (char *)"Rigid",
-            this->CurrentLevel+1, this->NumberOfLevels, iteration+1, iterations);
-    reg_print_msg_debug(text);
+        char text[255];
+        sprintf(text, "%s - level: %i/%i - iteration %i/%i",
+                optimizationFlag ? (char *)"Affine" : (char *)"Rigid",
+                this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations);
+        reg_print_msg_debug(text);
 #endif
-    this->GetWarpedImage(this->Interpolation, this->WarpedPaddingValue);
-    this->UpdateTransformationMatrix(optimizationFlag);
+        this->GetWarpedImage(this->interpolation, this->warpedPaddingValue);
+        this->UpdateTransformationMatrix(optimizationFlag);
 
-    iteration++;
-  }
+        iteration++;
+    }
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::Run()
-{
-  this->InitialiseRegistration();
-
-  //Main loop over the levels:
-  for (this->CurrentLevel = 0; this->CurrentLevel < this->LevelsToPerform; this->CurrentLevel++)
-  {
-    this->initAladinContent(this->ReferencePyramid[CurrentLevel], this->FloatingPyramid[CurrentLevel],
-                            this->ReferenceMaskPyramid[CurrentLevel], this->TransformationMatrix, sizeof(T), this->BlockPercentage,
-                            this->InlierLts, this->BlockStepSize);
-    this->createKernels();
-
-    // Twice more iterations are performed during the first level
-    // All the blocks are used during the first level
-    const unsigned int maxNumberOfIterationToPerform = (CurrentLevel == 0) ? this->MaxIterations*2 : this->MaxIterations;
+void reg_aladin<T>::Run() {
+    this->InitialiseRegistration();
+
+    //Main loop over the levels:
+    for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) {
+        this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel],
+                                this->referenceMaskPyramid[currentLevel], this->transformationMatrix, sizeof(T), this->blockPercentage,
+                                this->inlierLts, this->blockStepSize);
+        this->CreateKernels();
+
+        // Twice more iterations are performed during the first level
+        // All the blocks are used during the first level
+        const unsigned int maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations;
 
 #ifdef NDEBUG
-    if(this->Verbose)
-    {
+        if (this->verbose) {
 #endif
-      this->DebugPrintLevelInfoStart();
+            this->DebugPrintLevelInfoStart();
 #ifdef NDEBUG
-    }
+        }
 #endif
 
 #ifndef NDEBUG
-    if (this->con->getCurrentReference()->sform_code > 0)
-      reg_mat44_disp(&this->con->getCurrentReference()->sto_xyz, (char *) "[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
-    else
-      reg_mat44_disp(&this->con->getCurrentReference()->qto_xyz, (char *) "[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
-    if (this->con->getCurrentFloating()->sform_code > 0)
-      reg_mat44_disp(&this->con->getCurrentFloating()->sto_xyz, (char *) "[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
-    else
-      reg_mat44_disp(&this->con->getCurrentFloating()->qto_xyz, (char *) "[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
+        if (this->con->GetCurrentReference()->sform_code > 0)
+            reg_mat44_disp(&this->con->GetCurrentReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
+        else
+            reg_mat44_disp(&this->con->GetCurrentReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
+        if (this->con->GetCurrentFloating()->sform_code > 0)
+            reg_mat44_disp(&this->con->GetCurrentFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
+        else
+            reg_mat44_disp(&this->con->GetCurrentFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
 #endif
 
-    /* ****************** */
-    /* Rigid registration */
-    /* ****************** */
-    if ((this->PerformRigid && !this->PerformAffine) || (this->PerformAffine && this->PerformRigid && this->CurrentLevel == 0))
-    {
-      const unsigned int ratio = (this->PerformAffine && this->PerformRigid && this->CurrentLevel == 0) ? 4 : 1;
-      resolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID);
-    }
-
-    /* ******************* */
-    /* Affine registration */
-    /* ******************* */
-    if (this->PerformAffine)
-      resolveMatrix(maxNumberOfIterationToPerform, AFFINE);
-
-    // SOME CLEANING IS PERFORMED
-    this->clearKernels();
-    this->clearAladinContent();
-    this->ClearCurrentInputImage();
+        /* ****************** */
+        /* Rigid registration */
+        /* ****************** */
+        if ((this->performRigid && !this->performAffine) || (this->performAffine && this->performRigid && this->currentLevel == 0)) {
+            const unsigned int ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1;
+            ResolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID);
+        }
+
+        /* ******************* */
+        /* Affine registration */
+        /* ******************* */
+        if (this->performAffine)
+            ResolveMatrix(maxNumberOfIterationToPerform, AFFINE);
+
+        // SOME CLEANING IS PERFORMED
+        this->ClearKernels();
+        this->ClearAladinContent();
+        this->ClearCurrentInputImage();
 
 #ifdef NDEBUG
-    if(this->Verbose)
-    {
+        if (this->verbose) {
 #endif
-      this->DebugPrintLevelInfoEnd();
-      reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -");
+            this->DebugPrintLevelInfoEnd();
+            reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -");
 #ifdef NDEBUG
-    }
+        }
 #endif
 
-  }
+    }
 
 #ifndef NDEBUG
-  reg_print_msg_debug("reg_aladin::Run() done");
+    reg_print_msg_debug("reg_aladin::Run() done");
 #endif
-  return;
+    return;
 }
 /* *************************************************************** */
 template<class T>
-nifti_image *reg_aladin<T>::GetFinalWarpedImage()
-{
-  int floatingType = this->InputFloating->datatype; //t_dev ask before touching this!
-  // The initial images are used
-  if (this->InputReference == NULL || this->InputFloating == NULL || this->TransformationMatrix == NULL) {
-    reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
-    reg_print_msg_error("The reference, floating images and the transformation have to be defined");
-    reg_exit();
-  }
-
-  int *mask = (int *)calloc(this->InputReference->nx*this->InputReference->ny*this->InputReference->nz,
-                            sizeof(int));
-
-  reg_aladin<T>::initAladinContent(this->InputReference,
-                                   this->InputFloating,
-                                   mask,
-                                   this->TransformationMatrix,
-                                   sizeof(T));
-  reg_aladin<T>::createKernels();
-
-  reg_aladin<T>::GetWarpedImage(3, this->WarpedPaddingValue); // cubic spline interpolation
-  nifti_image *CurrentWarped = this->con->getCurrentWarped(floatingType);
-
-  free(mask);
-  nifti_image *resultImage = nifti_copy_nim_info(CurrentWarped);
-  resultImage->cal_min = this->InputFloating->cal_min;
-  resultImage->cal_max = this->InputFloating->cal_max;
-  resultImage->scl_slope = this->InputFloating->scl_slope;
-  resultImage->scl_inter = this->InputFloating->scl_inter;
-  resultImage->data = (void *) malloc(resultImage->nvox * resultImage->nbyper);
-  memcpy(resultImage->data, CurrentWarped->data, resultImage->nvox * resultImage->nbyper);
-
-  reg_aladin<T>::clearKernels();
-  reg_aladin<T>::clearAladinContent();
-  return resultImage;
+nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
+    int floatingType = this->inputFloating->datatype; //t_dev ask before touching this!
+    // The initial images are used
+    if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) {
+        reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
+        reg_print_msg_error("The reference, floating images and the transformation have to be defined");
+        reg_exit();
+    }
+
+    int *mask = (int *)calloc(this->inputReference->nx * this->inputReference->ny * this->inputReference->nz,
+                              sizeof(int));
+
+    reg_aladin<T>::InitAladinContent(this->inputReference,
+                                     this->inputFloating,
+                                     mask,
+                                     this->transformationMatrix,
+                                     sizeof(T));
+    reg_aladin<T>::CreateKernels();
+
+    reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
+    nifti_image *currentWarped = this->con->GetCurrentWarped(floatingType);
+
+    free(mask);
+    nifti_image *resultImage = nifti_copy_nim_info(currentWarped);
+    resultImage->cal_min = this->inputFloating->cal_min;
+    resultImage->cal_max = this->inputFloating->cal_max;
+    resultImage->scl_slope = this->inputFloating->scl_slope;
+    resultImage->scl_inter = this->inputFloating->scl_inter;
+    resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper);
+    memcpy(resultImage->data, currentWarped->data, resultImage->nvox * resultImage->nbyper);
+
+    reg_aladin<T>::ClearKernels();
+    reg_aladin<T>::ClearAladinContent();
+    return resultImage;
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::DebugPrintLevelInfoStart()
-{
-  /* Display some parameters specific to the current level */
-  char text[255];
-  sprintf(text, "Current level %i / %i", this->CurrentLevel + 1, this->NumberOfLevels);
-  reg_print_info(this->executableName,text);
-  sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-          this->con->getCurrentReference()->nx,
-          this->con->getCurrentReference()->ny,
-          this->con->getCurrentReference()->nz,
-          this->con->getCurrentReference()->dx,
-          this->con->getCurrentReference()->dy,
-          this->con->getCurrentReference()->dz);
-  reg_print_info(this->executableName,text);
-  sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-          this->con->getCurrentFloating()->nx,
-          this->con->getCurrentFloating()->ny,
-          this->con->getCurrentFloating()->nz,
-          this->con->getCurrentFloating()->dx,
-          this->con->getCurrentFloating()->dy,
-          this->con->getCurrentFloating()->dz);
-  reg_print_info(this->executableName,text);
-  if (this->con->getCurrentReference()->nz == 1){
-    reg_print_info(this->executableName, "Block size = [4 4 1]");
-  }
-  else reg_print_info(this->executableName, "Block size = [4 4 4]");
-  reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-  sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
-      this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
-  reg_print_info(this->executableName,text);
-  reg_mat44_disp(this->TransformationMatrix, (char *) "[reg_aladin] Initial transformation matrix:");
+void reg_aladin<T>::DebugPrintLevelInfoStart() {
+    /* Display some parameters specific to the current level */
+    char text[255];
+    sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels);
+    reg_print_info(this->executableName, text);
+    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
+            this->con->GetCurrentReference()->nx,
+            this->con->GetCurrentReference()->ny,
+            this->con->GetCurrentReference()->nz,
+            this->con->GetCurrentReference()->dx,
+            this->con->GetCurrentReference()->dy,
+            this->con->GetCurrentReference()->dz);
+    reg_print_info(this->executableName, text);
+    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
+            this->con->GetCurrentFloating()->nx,
+            this->con->GetCurrentFloating()->ny,
+            this->con->GetCurrentFloating()->nz,
+            this->con->GetCurrentFloating()->dx,
+            this->con->GetCurrentFloating()->dy,
+            this->con->GetCurrentFloating()->dz);
+    reg_print_info(this->executableName, text);
+    if (this->con->GetCurrentReference()->nz == 1) {
+        reg_print_info(this->executableName, "Block size = [4 4 1]");
+    } else reg_print_info(this->executableName, "Block size = [4 4 4]");
+    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
+            this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
+    reg_print_info(this->executableName, text);
+    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Initial transformation matrix:");
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::DebugPrintLevelInfoEnd()
-{
-  reg_mat44_disp(this->TransformationMatrix, (char *) "[reg_aladin] Final transformation matrix:");
+void reg_aladin<T>::DebugPrintLevelInfoEnd() {
+    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:");
 }
 /* *************************************************************** */
-
-#endif //#ifndef _REG_ALADIN_CPP
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 471d31aa..3485a303 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -10,11 +10,12 @@
  *
  */
 
-#ifndef _REG_ALADIN_H
-#define _REG_ALADIN_H
+#pragma once
+
 #define CONVERGENCE_EPS 0.00001
 #define RIGID 0
 #define AFFINE 1
+
 #include "_reg_macros.h"
 #include "_reg_resampling.h"
 #include "_reg_blockMatching.h"
@@ -56,238 +57,219 @@ class Kernel;
  * simple implementation.
  */
 template<class T>
-class reg_aladin
-{
-    protected:
-        char *executableName;
-        nifti_image *InputReference;
-        nifti_image *InputFloating;
-        nifti_image *InputReferenceMask;
-        nifti_image **ReferencePyramid;
-        nifti_image **FloatingPyramid;
-        int **ReferenceMaskPyramid;
-        int *activeVoxelNumber; ///TODO Needs to be removed
-
-        char *InputTransformName;
-        mat44 *TransformationMatrix;
-
-        bool Verbose;
-
-        unsigned int MaxIterations;
-
-        unsigned int CurrentLevel;
-        unsigned int NumberOfLevels;
-        unsigned int LevelsToPerform;
-
-        bool PerformRigid;
-        bool PerformAffine;
-        int captureRangeVox;
-
-        int BlockPercentage;
-        int InlierLts;
-        int BlockStepSize;
-        _reg_blockMatchingParam *blockMatchingParams;
-
-        bool AlignCentre;
-        int AlignCentreMass;
-
-        int Interpolation;
-
-        float FloatingSigma;
-        float ReferenceSigma;
-
-        float ReferenceUpperThreshold;
-        float ReferenceLowerThreshold;
-        float FloatingUpperThreshold;
-        float FloatingLowerThreshold;
-        float WarpedPaddingValue;
-
-        Platform *platform;
-        int platformCode;
-        unsigned gpuIdx;
-
-        bool TestMatrixConvergence(mat44 *mat);
-
-        virtual void InitialiseRegistration();
-        virtual void ClearCurrentInputImage();
-
-        virtual void GetDeformationField();
-        virtual void GetWarpedImage(int, float padding);
-        virtual void UpdateTransformationMatrix(int);
-
-        void (*funcProgressCallback)(float pcntProgress, void *params);
-        void *paramsProgressCallback;
-
-        //platform factory methods
-        virtual void initAladinContent(nifti_image *ref,
-                                 nifti_image *flo,
-                                 int *mask,
-                                 mat44 *transMat,
-                                 size_t bytes,
-                                 unsigned int blockPercentage,
-                                 unsigned int inlierLts,
-                                 unsigned int blockStepSize);
-        virtual void initAladinContent(nifti_image *ref,
-                                 nifti_image *flo,
-                                 int *mask,
-                                 mat44 *transMat,
-                                 size_t bytes);
-        virtual void clearAladinContent();
-        virtual void createKernels();
-        virtual void clearKernels();
-
-    public:
-        reg_aladin();
-        virtual ~reg_aladin();
-        GetStringMacro(executableName)
-
-        //No allocating of the images here...
-        void SetInputReference(nifti_image *input)
-        {
-            this->InputReference = input;
-        }
-        nifti_image *GetInputReference()
-        {
-            return this->InputReference;
-        }
-        void SetInputFloating(nifti_image *input)
-        {
-            this->InputFloating = input;
-        }
-        nifti_image *GetInputFloating()
-        {
-            return this->InputFloating;
-        }
-
-        void SetInputMask(nifti_image *input)
-        {
-            this->InputReferenceMask = input;
-        }
-        nifti_image *GetInputMask()
-        {
-            return this->InputReferenceMask;
-        }
-
-        void SetInputTransform(const char *filename);
-        mat44 *GetInputTransform()
-        {
-            return this->InputTransform;
-        }
-
-        mat44 *GetTransformationMatrix()
-        {
-            return this->TransformationMatrix;
-        }
-        nifti_image *GetFinalWarpedImage();
-
-        Platform* getPlaform();
-        void setPlatformCode(const int platformCodeIn)
-        {
-            this->platformCode = platformCodeIn;
-        }
-        void setGpuIdx(unsigned gpuIdxIn){
-           this->gpuIdx = gpuIdxIn;
-        }
-
-        SetMacro(MaxIterations,unsigned int)
-        GetMacro(MaxIterations,unsigned int)
-
-        SetMacro(NumberOfLevels,unsigned int)
-        GetMacro(NumberOfLevels,unsigned int)
-
-        SetMacro(LevelsToPerform,unsigned int)
-        GetMacro(LevelsToPerform,unsigned int)
-
-        SetMacro(BlockPercentage,int)
-        GetMacro(BlockPercentage,int)
-
-        SetMacro(BlockStepSize,int)
-        GetMacro(BlockStepSize,int)
-
-        SetMacro(InlierLts,float)
-        GetMacro(InlierLts,float)
-
-        SetMacro(ReferenceSigma,float)
-        GetMacro(ReferenceSigma,float)
-
-        SetMacro(ReferenceUpperThreshold,float)
-        GetMacro(ReferenceUpperThreshold,float)
-        SetMacro(ReferenceLowerThreshold,float)
-        GetMacro(ReferenceLowerThreshold,float)
-
-        SetMacro(FloatingUpperThreshold,float)
-        GetMacro(FloatingUpperThreshold,float)
-        SetMacro(FloatingLowerThreshold,float)
-        GetMacro(FloatingLowerThreshold,float)
-
-        SetMacro(WarpedPaddingValue,float)
-        GetMacro(WarpedPaddingValue,float)
-
-        SetMacro(FloatingSigma,float)
-        GetMacro(FloatingSigma,float)
-
-        SetMacro(PerformRigid,bool)
-        GetMacro(PerformRigid,bool)
-        BooleanMacro(PerformRigid, bool)
-
-        SetMacro(PerformAffine,bool)
-        GetMacro(PerformAffine,bool)
-        BooleanMacro(PerformAffine, bool)
-
-        GetMacro(AlignCentre,bool)
-        SetMacro(AlignCentre,bool)
-        BooleanMacro(AlignCentre, bool)
-        GetMacro(AlignCentreMass,int)
-        SetMacro(AlignCentreMass,int)
-
-        SetClampMacro(Interpolation,int,0,3)
-        GetMacro(Interpolation, int)
-
-        virtual void SetInputFloatingMask(nifti_image*)
-        {
-            reg_print_fct_warn("reg_aladin::SetInputFloatingMask()");
-            reg_print_msg_warn("Floating mask not used in the asymmetric global registration");
-        }
-        void SetInterpolationToNearestNeighbor()
-        {
-            this->SetInterpolation(0);
-        }
-        void SetInterpolationToTrilinear()
-        {
-            this->SetInterpolation(1);
-        }
-        void SetInterpolationToCubic()
-        {
-            this->SetInterpolation(3);
-        }
-        void setCaptureRangeVox(int captureRangeIn)
-        {
-            this->captureRangeVox = captureRangeIn;
-        }
-
-        virtual int Check();
-        virtual int Print();
-        virtual void Run();
-
-        virtual void DebugPrintLevelInfoStart();
-        virtual void DebugPrintLevelInfoEnd();
-        virtual void SetVerbose(bool _verbose);
-
-        void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress,
-                                                                  void *params),
-                                         void *paramsProgCallback)
-        {
-            funcProgressCallback = funcProgCallback;
-            paramsProgressCallback = paramsProgCallback;
-        }
-        AladinContent *con;
-
-    private:
-        Kernel *affineTransformation3DKernel,*blockMatchingKernel;
-        Kernel *optimiseKernel, *resamplingKernel;
-        void resolveMatrix(unsigned int iterations,
-                           const unsigned int optimizationFlag);
+class reg_aladin {
+protected:
+    char *executableName;
+    nifti_image *inputReference;
+    nifti_image *inputFloating;
+    nifti_image *inputReferenceMask;
+    nifti_image **referencePyramid;
+    nifti_image **floatingPyramid;
+    int **referenceMaskPyramid;
+    int *activeVoxelNumber; ///TODO Needs to be removed
+
+    char *inputTransformName;
+    mat44 *transformationMatrix;
+
+    bool verbose;
+
+    unsigned int maxIterations;
+
+    unsigned int currentLevel;
+    unsigned int numberOfLevels;
+    unsigned int levelsToPerform;
+
+    bool performRigid;
+    bool performAffine;
+    int captureRangeVox;
+
+    int blockPercentage;
+    int inlierLts;
+    int blockStepSize;
+    _reg_blockMatchingParam *blockMatchingParams;
+
+    bool alignCentre;
+    int alignCentreMass;
+
+    int interpolation;
+
+    float floatingSigma;
+    float referenceSigma;
+
+    float referenceUpperThreshold;
+    float referenceLowerThreshold;
+    float floatingUpperThreshold;
+    float floatingLowerThreshold;
+    float warpedPaddingValue;
+
+    Platform *platform;
+    int platformCode;
+    unsigned gpuIdx;
+
+    bool TestMatrixConvergence(mat44 *mat);
+
+    virtual void InitialiseRegistration();
+    virtual void ClearCurrentInputImage();
+
+    virtual void GetDeformationField();
+    virtual void GetWarpedImage(int, float padding);
+    virtual void UpdateTransformationMatrix(int);
+
+    void (*funcProgressCallback)(float pcntProgress, void *params);
+    void *paramsProgressCallback;
+
+    //platform factory methods
+    virtual void InitAladinContent(nifti_image *ref,
+                                   nifti_image *flo,
+                                   int *mask,
+                                   mat44 *transMat,
+                                   size_t bytes,
+                                   unsigned int blockPercentage,
+                                   unsigned int inlierLts,
+                                   unsigned int blockStepSize);
+    virtual void InitAladinContent(nifti_image *ref,
+                                   nifti_image *flo,
+                                   int *mask,
+                                   mat44 *transMat,
+                                   size_t bytes);
+    virtual void ClearAladinContent();
+    virtual void CreateKernels();
+    virtual void ClearKernels();
+
+public:
+    reg_aladin();
+    virtual ~reg_aladin();
+    GetStringMacro(ExecutableName, executableName);
+
+    //No allocating of the images here...
+    void SetInputReference(nifti_image *input) {
+        this->inputReference = input;
+    }
+    nifti_image* GetInputReference() {
+        return this->inputReference;
+    }
+    void SetInputFloating(nifti_image *input) {
+        this->inputFloating = input;
+    }
+    nifti_image* GetInputFloating() {
+        return this->inputFloating;
+    }
+
+    void SetInputMask(nifti_image *input) {
+        this->inputReferenceMask = input;
+    }
+    nifti_image* GetInputMask() {
+        return this->inputReferenceMask;
+    }
+
+    void SetInputTransform(const char *filename);
+    mat44* GetInputTransform() {
+        return this->InputTransform;
+    }
+
+    mat44* GetTransformationMatrix() {
+        return this->transformationMatrix;
+    }
+    nifti_image* GetFinalWarpedImage();
+
+    void SetPlatformCode(const int platformCodeIn) {
+        this->platformCode = platformCodeIn;
+    }
+    void SetGpuIdx(unsigned gpuIdxIn) {
+        this->gpuIdx = gpuIdxIn;
+    }
+
+    SetMacro(MaxIterations, maxIterations, unsigned int);
+    GetMacro(MaxIterations, maxIterations, unsigned int);
+
+    SetMacro(NumberOfLevels, numberOfLevels, unsigned int);
+    GetMacro(NumberOfLevels, numberOfLevels, unsigned int);
+
+    SetMacro(LevelsToPerform, levelsToPerform, unsigned int);
+    GetMacro(LevelsToPerform, levelsToPerform, unsigned int);
+
+    SetMacro(BlockPercentage, blockPercentage, int);
+    GetMacro(BlockPercentage, blockPercentage, int);
+
+    SetMacro(BlockStepSize, blockStepSize, int);
+    GetMacro(BlockStepSize, blockStepSize, int);
+
+    SetMacro(InlierLts, inlierLts, int);
+    GetMacro(InlierLts, inlierLts, int);
+
+    SetMacro(ReferenceSigma, referenceSigma, float);
+    GetMacro(ReferenceSigma, referenceSigma, float);
+
+    SetMacro(ReferenceUpperThreshold, referenceUpperThreshold, float);
+    GetMacro(ReferenceUpperThreshold, referenceUpperThreshold, float);
+    SetMacro(ReferenceLowerThreshold, referenceLowerThreshold, float);
+    GetMacro(ReferenceLowerThreshold, referenceLowerThreshold, float);
+
+    SetMacro(FloatingUpperThreshold, floatingUpperThreshold, float);
+    GetMacro(FloatingUpperThreshold, floatingUpperThreshold, float);
+    SetMacro(FloatingLowerThreshold, floatingLowerThreshold, float);
+    GetMacro(FloatingLowerThreshold, floatingLowerThreshold, float);
+
+    SetMacro(WarpedPaddingValue, warpedPaddingValue, float);
+    GetMacro(WarpedPaddingValue, warpedPaddingValue, float);
+
+    SetMacro(FloatingSigma, floatingSigma, float);
+    GetMacro(FloatingSigma, floatingSigma, float);
+
+    SetMacro(PerformRigid, performRigid, bool);
+    GetMacro(PerformRigid, performRigid, bool);
+    BooleanMacro(PerformRigid, bool);
+
+    SetMacro(PerformAffine, performAffine, bool);
+    GetMacro(PerformAffine, performAffine, bool);
+    BooleanMacro(PerformAffine, bool);
+
+    GetMacro(AlignCentre, alignCentre, bool);
+    SetMacro(AlignCentre, alignCentre, bool);
+    BooleanMacro(AlignCentre, bool);
+    GetMacro(AlignCentreMass, alignCentreMass, int);
+    SetMacro(AlignCentreMass, alignCentreMass, int);
+
+    SetClampMacro(Interpolation, interpolation, int, 0, 3);
+    GetMacro(Interpolation, interpolation, int);
+
+    virtual void SetInputFloatingMask(nifti_image*) {
+        reg_print_fct_warn("reg_aladin::SetInputFloatingMask()");
+        reg_print_msg_warn("Floating mask not used in the asymmetric global registration");
+    }
+    void SetInterpolationToNearestNeighbor() {
+        this->SetInterpolation(0);
+    }
+    void SetInterpolationToTrilinear() {
+        this->SetInterpolation(1);
+    }
+    void SetInterpolationToCubic() {
+        this->SetInterpolation(3);
+    }
+    void SetCaptureRangeVox(int captureRangeIn) {
+        this->captureRangeVox = captureRangeIn;
+    }
+
+    virtual int Check();
+    virtual int Print();
+    virtual void Run();
+
+    virtual void DebugPrintLevelInfoStart();
+    virtual void DebugPrintLevelInfoEnd();
+    virtual void SetVerbose(bool _verbose);
+
+    void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params),
+                                     void *paramsProgCallback) {
+        funcProgressCallback = funcProgCallback;
+        paramsProgressCallback = paramsProgCallback;
+    }
+    AladinContent *con;
+
+private:
+    Kernel *affineTransformation3DKernel, *blockMatchingKernel;
+    Kernel *optimiseKernel, *resamplingKernel;
+    void ResolveMatrix(unsigned int iterations,
+                       const unsigned int optimizationFlag);
 };
 
 #include "_reg_aladin.cpp"
-#endif // _REG_ALADIN_H
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 553e0477..88b68d3b 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -1,6 +1,3 @@
-#ifndef _REG_ALADIN_SYM_CPP
-#define _REG_ALADIN_SYM_CPP
-
 #include "_reg_aladin_sym.h"
 #include "_reg_maths_eigen.h"
 
@@ -11,23 +8,23 @@ reg_aladin_sym<T>::reg_aladin_sym ()
 {
    this->executableName=(char*) "reg_aladin_sym";
 
-   this->InputFloatingMask=NULL;
-   this->FloatingMaskPyramid=NULL;
-   this->BackwardActiveVoxelNumber=NULL;
+   this->InputFloatingMask=nullptr;
+   this->FloatingMaskPyramid=nullptr;
+   this->BackwardActiveVoxelNumber=nullptr;
 
    this->BackwardTransformationMatrix=new mat44;
 
-   this->bAffineTransformation3DKernel = NULL;
-   this->bConvolutionKernel=NULL;
-   this->bBlockMatchingKernel=NULL;
-   this->bOptimiseKernel=NULL;
-   this->bResamplingKernel=NULL;
+   this->bAffineTransformation3DKernel = nullptr;
+   this->bConvolutionKernel=nullptr;
+   this->bBlockMatchingKernel=nullptr;
+   this->bOptimiseKernel=nullptr;
+   this->bResamplingKernel=nullptr;
 
-   this->backCon = NULL;
-   this->BackwardBlockMatchingParams=NULL;
+   this->backCon = nullptr;
+   this->BackwardBlockMatchingParams=nullptr;
 
-   this->FloatingUpperThreshold=std::numeric_limits<T>::max();
-   this->FloatingLowerThreshold=-std::numeric_limits<T>::max();
+   this->floatingUpperThreshold=std::numeric_limits<T>::max();
+   this->floatingLowerThreshold=-std::numeric_limits<T>::max();
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_aladin_sym constructor called");
@@ -38,27 +35,27 @@ reg_aladin_sym<T>::reg_aladin_sym ()
 template <class T>
 reg_aladin_sym<T>::~reg_aladin_sym()
 {
-   if(this->BackwardTransformationMatrix!=NULL)
+   if(this->BackwardTransformationMatrix!=nullptr)
       delete this->BackwardTransformationMatrix;
-   this->BackwardTransformationMatrix=NULL;
+   this->BackwardTransformationMatrix=nullptr;
 
-   if(this->FloatingMaskPyramid!=NULL)
+   if(this->FloatingMaskPyramid!=nullptr)
    {
-      for(unsigned int i=0; i<this->LevelsToPerform; ++i)
+      for(unsigned int i=0; i<this->levelsToPerform; ++i)
       {
-         if(this->FloatingMaskPyramid[i]!=NULL)
+         if(this->FloatingMaskPyramid[i]!=nullptr)
          {
-           if(this->FloatingMaskPyramid!=NULL)
+           if(this->FloatingMaskPyramid!=nullptr)
              free(this->FloatingMaskPyramid[i]);
-            this->FloatingMaskPyramid[i]=NULL;
+            this->FloatingMaskPyramid[i]=nullptr;
          }
       }
       free(this->FloatingMaskPyramid);
-      this->FloatingMaskPyramid=NULL;
+      this->FloatingMaskPyramid=nullptr;
    }
-   if(this->BackwardActiveVoxelNumber!=NULL)
+   if(this->BackwardActiveVoxelNumber!=nullptr)
      free(this->BackwardActiveVoxelNumber);
-   this->BackwardActiveVoxelNumber=NULL;
+   this->BackwardActiveVoxelNumber=nullptr;
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_aladin_sym destructor called");
@@ -80,40 +77,40 @@ void reg_aladin_sym<T>::InitialiseRegistration()
 #endif
 
    reg_aladin<T>::InitialiseRegistration();
-   this->FloatingMaskPyramid = (int **) malloc(this->LevelsToPerform*sizeof(int *));
-   this->BackwardActiveVoxelNumber= (int *)malloc(this->LevelsToPerform*sizeof(int));
-   if (this->InputFloatingMask!=NULL)
+   this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *));
+   this->BackwardActiveVoxelNumber= (int *)malloc(this->levelsToPerform*sizeof(int));
+   if (this->InputFloatingMask!=nullptr)
    {
       reg_createMaskPyramid<T>(this->InputFloatingMask,
                                this->FloatingMaskPyramid,
-                               this->NumberOfLevels,
-                               this->LevelsToPerform,
+                               this->numberOfLevels,
+                               this->levelsToPerform,
                                this->BackwardActiveVoxelNumber);
    }
    else
    {
-      for(unsigned int l=0; l<this->LevelsToPerform; ++l)
+      for(unsigned int l=0; l<this->levelsToPerform; ++l)
       {
-         this->BackwardActiveVoxelNumber[l]=this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz;
+         this->BackwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
          this->FloatingMaskPyramid[l]=(int *)calloc(this->BackwardActiveVoxelNumber[l],sizeof(int));
       }
    }
 
    // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK
-   if(this->FloatingUpperThreshold!=std::numeric_limits<T>::max())
+   if(this->floatingUpperThreshold!=std::numeric_limits<T>::max())
    {
-      for(unsigned int l=0; l<this->LevelsToPerform; ++l)
+      for(unsigned int l=0; l<this->levelsToPerform; ++l)
       {
-         T *refPtr = static_cast<T *>(this->FloatingPyramid[l]->data);
+         T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
          int *mskPtr = this->FloatingMaskPyramid[l];
          size_t removedVoxel=0;
          for(size_t i=0;
-               i<(size_t)this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz;
+               i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
                ++i)
          {
             if(mskPtr[i]>-1)
             {
-               if(refPtr[i]>this->FloatingUpperThreshold)
+               if(refPtr[i]>this->floatingUpperThreshold)
                {
                   ++removedVoxel;
                   mskPtr[i]=-1;
@@ -123,20 +120,20 @@ void reg_aladin_sym<T>::InitialiseRegistration()
          this->BackwardActiveVoxelNumber[l] -= removedVoxel;
       }
    }
-   if(this->FloatingLowerThreshold!=-std::numeric_limits<T>::max())
+   if(this->floatingLowerThreshold!=-std::numeric_limits<T>::max())
    {
-      for(unsigned int l=0; l<this->LevelsToPerform; ++l)
+      for(unsigned int l=0; l<this->levelsToPerform; ++l)
       {
-         T *refPtr = static_cast<T *>(this->FloatingPyramid[l]->data);
+         T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
          int *mskPtr = this->FloatingMaskPyramid[l];
          size_t removedVoxel=0;
          for(size_t i=0;
-               i<(size_t)this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz;
+               i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
                ++i)
          {
             if(mskPtr[i]>-1)
             {
-               if(refPtr[i]<this->FloatingLowerThreshold)
+               if(refPtr[i]<this->floatingLowerThreshold)
                {
                   ++removedVoxel;
                   mskPtr[i]=-1;
@@ -147,20 +144,20 @@ void reg_aladin_sym<T>::InitialiseRegistration()
       }
    }
 
-   if(this->AlignCentreMass==1 && this->InputTransformName==NULL)
+   if(this->alignCentreMass==1 && this->inputTransformName==nullptr)
    {
-      if(!this->InputReferenceMask && !this->InputFloatingMask){
+      if(!this->inputReferenceMask && !this->InputFloatingMask){
          reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified");
          reg_exit();
       }
       float referenceCentre[3]={0,0,0};
       float referenceCount=0;
-      reg_tools_changeDatatype<float>(this->InputReferenceMask);
-      float *refMaskPtr=static_cast<float *>(this->InputReferenceMask->data);
+      reg_tools_changeDatatype<float>(this->inputReferenceMask);
+      float *refMaskPtr=static_cast<float *>(this->inputReferenceMask->data);
       size_t refIndex=0;
-      for(int z=0;z<this->InputReferenceMask->nz;++z){
-         for(int y=0;y<this->InputReferenceMask->ny;++y){
-            for(int x=0;x<this->InputReferenceMask->nx;++x){
+      for(int z=0;z<this->inputReferenceMask->nz;++z){
+         for(int y=0;y<this->inputReferenceMask->ny;++y){
+            for(int x=0;x<this->inputReferenceMask->nx;++x){
                if(refMaskPtr[refIndex]!=0.f){
                   referenceCentre[0]+=x;
                   referenceCentre[1]+=y;
@@ -175,8 +172,8 @@ void reg_aladin_sym<T>::InitialiseRegistration()
       referenceCentre[1]/=referenceCount;
       referenceCentre[2]/=referenceCount;
       float refCOG[3];
-      if(this->InputReference->sform_code>0)
-         reg_mat44_mul(&(this->InputReference->sto_xyz),referenceCentre,refCOG);
+      if(this->inputReference->sform_code>0)
+         reg_mat44_mul(&(this->inputReference->sto_xyz),referenceCentre,refCOG);
 
       float floatingCentre[3]={0,0,0};
       float floatingCount=0;
@@ -200,21 +197,21 @@ void reg_aladin_sym<T>::InitialiseRegistration()
       floatingCentre[1]/=floatingCount;
       floatingCentre[2]/=floatingCount;
       float floCOG[3];
-      if(this->InputFloating->sform_code>0)
-         reg_mat44_mul(&(this->InputFloating->sto_xyz),floatingCentre,floCOG);
-      reg_mat44_eye(this->TransformationMatrix);
-      this->TransformationMatrix->m[0][3]=floCOG[0]-refCOG[0];
-      this->TransformationMatrix->m[1][3]=floCOG[1]-refCOG[1];
-      this->TransformationMatrix->m[2][3]=floCOG[2]-refCOG[2];
+      if(this->inputFloating->sform_code>0)
+         reg_mat44_mul(&(this->inputFloating->sto_xyz),floatingCentre,floCOG);
+      reg_mat44_eye(this->transformationMatrix);
+      this->transformationMatrix->m[0][3]=floCOG[0]-refCOG[0];
+      this->transformationMatrix->m[1][3]=floCOG[1]-refCOG[1];
+      this->transformationMatrix->m[2][3]=floCOG[2]-refCOG[2];
    }
-   *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->TransformationMatrix));
+   *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->transformationMatrix));
 
 }
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::GetBackwardDeformationField()
 {
-   this->bAffineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->calculate();
+   this->bAffineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->Calculate();
 }
 /* *************************************************************** */
 template <class T>
@@ -222,7 +219,7 @@ void reg_aladin_sym<T>::GetWarpedImage(int interp, float padding)
 {
    reg_aladin<T>::GetWarpedImage(interp, padding);
    this->GetBackwardDeformationField();
-   this->bResamplingKernel->template castTo<ResampleImageKernel>()->calculate(interp, padding);
+   this->bResamplingKernel->template castTo<ResampleImageKernel>()->Calculate(interp, padding);
 
 }
 /* *************************************************************** */
@@ -232,61 +229,61 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type){
   reg_aladin<T>::UpdateTransformationMatrix(type);
 
   // Update now the backward transformation matrix
-  this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->calculate();
-  this->bOptimiseKernel->template castTo<OptimiseKernel>()->calculate(type);
+  this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
+  this->bOptimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
 
 #ifndef NDEBUG
-   reg_mat44_disp(this->TransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
+   reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
    reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
 #endif
    // Forward and backward matrix are inverted
-   mat44 fInverted = nifti_mat44_inverse(*(this->TransformationMatrix));
+   mat44 fInverted = nifti_mat44_inverse(*(this->transformationMatrix));
    mat44 bInverted = nifti_mat44_inverse(*(this->BackwardTransformationMatrix));
 
    // We average the forward and inverted backward matrix
-   *(this->TransformationMatrix)=reg_mat44_avg2(this->TransformationMatrix, &bInverted );
+   *(this->transformationMatrix)=reg_mat44_avg2(this->transformationMatrix, &bInverted );
    // We average the inverted forward and backward matrix
    *(this->BackwardTransformationMatrix)=reg_mat44_avg2(&fInverted, this->BackwardTransformationMatrix );
    for(int i=0;i<3;++i){
-      this->TransformationMatrix->m[3][i]=0.f;
+      this->transformationMatrix->m[3][i]=0.f;
       this->BackwardTransformationMatrix->m[3][i]=0.f;
    }
-   this->TransformationMatrix->m[3][3]=1.f;
+   this->transformationMatrix->m[3][3]=1.f;
    this->BackwardTransformationMatrix->m[3][3]=1.f;
 #ifndef NDEBUG
-   reg_mat44_disp(this->TransformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix");
+   reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix");
    reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::initAladinContent(nifti_image *ref,
+void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                         nifti_image *flo,
                         int *mask,
                         mat44 *transMat,
                         size_t bytes)
 {
-   reg_aladin<T>::initAladinContent(ref,
+   reg_aladin<T>::InitAladinContent(ref,
                                flo,
                                mask,
                                transMat,
                                bytes);
 
   if (this->platformCode == NR_PLATFORM_CPU)
-  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes);
+  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
 #ifdef _USE_CUDA
   else if (this->platformCode == NR_PLATFORM_CUDA)
-  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes);
+  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
 #endif
 #ifdef _USE_OPENCL
   else if (this->platformCode == NR_PLATFORM_CL)
-  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes);
+  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
 #endif
-  this->BackwardBlockMatchingParams = backCon->AladinContent::getBlockMatchingParams();
+  this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::initAladinContent(nifti_image *ref,
+void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                         nifti_image *flo,
                         int *mask,
                         mat44 *transMat,
@@ -295,7 +292,7 @@ void reg_aladin_sym<T>::initAladinContent(nifti_image *ref,
                         unsigned int inlierLts,
                         unsigned int blockStepSize)
 {
-    reg_aladin<T>::initAladinContent(ref,
+    reg_aladin<T>::InitAladinContent(ref,
                                flo,
                                mask,
                                transMat,
@@ -305,48 +302,48 @@ void reg_aladin_sym<T>::initAladinContent(nifti_image *ref,
                                blockStepSize);
 
   if (this->platformCode == NR_PLATFORM_CPU)
-  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
+  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #ifdef _USE_CUDA
   else if (this->platformCode == NR_PLATFORM_CUDA)
-  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
+  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
 #ifdef _USE_OPENCL
   else if (this->platformCode == NR_PLATFORM_CL)
-  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
+  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
-  this->BackwardBlockMatchingParams = backCon->AladinContent::getBlockMatchingParams();
+  this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::ClearCurrentInputImage()
 {
    reg_aladin<T>::ClearCurrentInputImage();
-   if(this->FloatingMaskPyramid[this->CurrentLevel]!=NULL)
-      free(this->FloatingMaskPyramid[this->CurrentLevel]);
-   this->FloatingMaskPyramid[this->CurrentLevel]=NULL;
+   if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr)
+      free(this->FloatingMaskPyramid[this->currentLevel]);
+   this->FloatingMaskPyramid[this->currentLevel]=nullptr;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::createKernels()
+void reg_aladin_sym<T>::CreateKernels()
 {
-  reg_aladin<T>::createKernels();
-  this->bAffineTransformation3DKernel = this->platform->createKernel (AffineDeformationFieldKernel::getName(), this->backCon);
-  this->bBlockMatchingKernel = this->platform->createKernel(BlockMatchingKernel::getName(), this->backCon);
-  this->bResamplingKernel = this->platform->createKernel(ResampleImageKernel::getName(), this->backCon);
-  this->bOptimiseKernel = this->platform->createKernel(OptimiseKernel::getName(), this->backCon);
+  reg_aladin<T>::CreateKernels();
+  this->bAffineTransformation3DKernel = this->platform->CreateKernel (AffineDeformationFieldKernel::GetName(), this->backCon);
+  this->bBlockMatchingKernel = this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon);
+  this->bResamplingKernel = this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon);
+  this->bOptimiseKernel = this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon);
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::clearAladinContent()
+void reg_aladin_sym<T>::ClearAladinContent()
 {
-  reg_aladin<T>::clearAladinContent();
+  reg_aladin<T>::ClearAladinContent();
   delete this->backCon;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::clearKernels()
+void reg_aladin_sym<T>::ClearKernels()
 {
-  reg_aladin<T>::clearKernels();
+  reg_aladin<T>::ClearKernels();
   delete this->bResamplingKernel;
   delete this->bAffineTransformation3DKernel;
   delete this->bBlockMatchingKernel;
@@ -357,25 +354,25 @@ template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoStart()
 {
    char text[255];
-   sprintf(text, "Current level %i / %i", this->CurrentLevel+1, this->NumberOfLevels);
+   sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels);
    reg_print_info(this->executableName,text);
    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->getCurrentReference()->nx,
-           this->con->getCurrentReference()->ny,
-           this->con->getCurrentReference()->nz,
-           this->con->getCurrentReference()->dx,
-           this->con->getCurrentReference()->dy,
-           this->con->getCurrentReference()->dz);
+           this->con->GetCurrentReference()->nx,
+           this->con->GetCurrentReference()->ny,
+           this->con->GetCurrentReference()->nz,
+           this->con->GetCurrentReference()->dx,
+           this->con->GetCurrentReference()->dy,
+           this->con->GetCurrentReference()->dz);
    reg_print_info(this->executableName,text);
    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->getCurrentFloating()->nx,
-           this->con->getCurrentFloating()->ny,
-           this->con->getCurrentFloating()->nz,
-           this->con->getCurrentFloating()->dx,
-           this->con->getCurrentFloating()->dy,
-           this->con->getCurrentFloating()->dz);
+           this->con->GetCurrentFloating()->nx,
+           this->con->GetCurrentFloating()->ny,
+           this->con->GetCurrentFloating()->nz,
+           this->con->GetCurrentFloating()->dx,
+           this->con->GetCurrentFloating()->dy,
+           this->con->GetCurrentFloating()->dz);
    reg_print_info(this->executableName,text);
-   if(this->con->getCurrentReference()->nz==1){
+   if(this->con->GetCurrentReference()->nz==1){
       reg_print_info(this->executableName, "Block size = [4 4 1]");
    }
    else reg_print_info(this->executableName, "Block size = [4 4 4]");
@@ -386,7 +383,7 @@ void reg_aladin_sym<T>::DebugPrintLevelInfoStart()
    sprintf(text, "Backward Block number = [%i %i %i]", this->BackwardBlockMatchingParams->blockNumber[0],
           this->BackwardBlockMatchingParams->blockNumber[1], this->BackwardBlockMatchingParams->blockNumber[2]);
    reg_print_info(this->executableName, text);
-   reg_mat44_disp(this->TransformationMatrix,
+   reg_mat44_disp(this->transformationMatrix,
                   (char *)"[reg_aladin_sym] Initial forward transformation matrix:");
    reg_mat44_disp(this->BackwardTransformationMatrix,
                   (char *)"[reg_aladin_sym] Initial backward transformation matrix:");
@@ -397,8 +394,7 @@ void reg_aladin_sym<T>::DebugPrintLevelInfoStart()
 template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoEnd()
 {
-   reg_mat44_disp(this->TransformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:");
+   reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:");
    reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:");
 }
 /* *************************************************************** */
-#endif //REG_ALADIN_SYM_CPP
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index ebbff483..fc1d11b2 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_ALADIN_SYM_H
-#define _REG_ALADIN_SYM_H
+#pragma once
 
 #include "_reg_aladin.h"
 
@@ -23,12 +22,12 @@ class reg_aladin_sym : public reg_aladin<T>
   AladinContent *backCon;
   Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel;
 
-  virtual void initAladinContent(nifti_image *ref,
+  virtual void InitAladinContent(nifti_image *ref,
                                  nifti_image *flo,
                                  int *mask,
                                  mat44 *transMat,
                                  size_t bytes);
-  virtual void initAladinContent(nifti_image *ref,
+  virtual void InitAladinContent(nifti_image *ref,
                                  nifti_image *flo,
                                  int *mask,
                                  mat44 *transMat,
@@ -36,9 +35,9 @@ class reg_aladin_sym : public reg_aladin<T>
                                  unsigned int blockPercentage,
                                  unsigned int inlierLts,
                                  unsigned int blockStepSize);
-  virtual void clearAladinContent();
-  virtual void createKernels();
-  virtual void clearKernels();
+  virtual void ClearAladinContent();
+  virtual void CreateKernels();
+  virtual void ClearKernels();
 
 protected:
   nifti_image *InputFloatingMask;
@@ -65,5 +64,3 @@ class reg_aladin_sym : public reg_aladin<T>
 };
 
 #include "_reg_aladin_sym.cpp"
-
-#endif // _REG_ALADIN_SYM_H
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index dddd2654..8b086faf 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -10,230 +10,195 @@
  *
  */
 
-#ifndef _REG_BASE_CPP
-#define _REG_BASE_CPP
-
 #include "_reg_base.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
+ /* *************************************************************** */
+ /* *************************************************************** */
 template <class T>
-reg_base<T>::reg_base(int refTimePoint,int floTimePoint)
-{
    //Platform
-//   this->platform = NULL;
+//   this->platform = nullptr;
 //   this->platformCode = NR_PLATFORM_CPU;
 //   this->gpuIdx = 999;
-
-   this->optimiser=NULL;
-   this->maxiterationNumber=150;
-   this->optimiseX=true;
-   this->optimiseY=true;
-   this->optimiseZ=true;
-   this->perturbationNumber=0;
-   this->useConjGradient=true;
-   this->useApproxGradient=false;
-
-   this->measure_ssd=NULL;
-   this->measure_kld=NULL;
-   this->measure_dti=NULL;
-   this->measure_lncc=NULL;
-   this->measure_nmi=NULL;
-   this->measure_mind=NULL;
-   this->measure_mindssc=NULL;
-   this->localWeightSimInput = NULL;
-   this->localWeightSimCurrent=NULL;
-
-   this->similarityWeight=0.; // is automatically set depending of the penalty term weights
-
-   this->executableName=(char *)"NiftyReg BASE";
-   this->referenceTimePoint=refTimePoint;
-   this->floatingTimePoint=floTimePoint;
-   this->inputReference=NULL; // pointer to external
-   this->inputFloating=NULL; // pointer to external
-   this->maskImage=NULL; // pointer to external
-   this->affineTransformation=NULL;  // pointer to external
-   this->referenceMask=NULL;
-   this->referenceSmoothingSigma=0.;
-   this->floatingSmoothingSigma=0.;
-   this->referenceThresholdUp=new float[this->referenceTimePoint];
-   this->referenceThresholdLow=new float[this->referenceTimePoint];
-   this->floatingThresholdUp=new float[this->floatingTimePoint];
-   this->floatingThresholdLow=new float[this->floatingTimePoint];
-   for(int i=0; i<this->referenceTimePoint; i++)
-   {
-      this->referenceThresholdUp[i]=std::numeric_limits<T>::max();
-      this->referenceThresholdLow[i]=-std::numeric_limits<T>::max();
-   }
-   for(int i=0; i<this->floatingTimePoint; i++)
-   {
-      this->floatingThresholdUp[i]=std::numeric_limits<T>::max();
-      this->floatingThresholdLow[i]=-std::numeric_limits<T>::max();
-   }
-   this->robustRange=false;
-   this->warpedPaddingValue=std::numeric_limits<T>::quiet_NaN();
-   this->levelNumber=3;
-   this->levelToPerform=0;
-   this->gradientSmoothingSigma=0;
-   this->verbose=true;
-   this->usePyramid=true;
-   this->forwardJacobianMatrix=NULL;
-
-   this->initialised=false;
-   this->referencePyramid=NULL;
-   this->floatingPyramid=NULL;
-   this->maskPyramid=NULL;
-   this->activeVoxelNumber=NULL;
-   this->currentReference=NULL;
-   this->currentFloating=NULL;
-   this->currentMask=NULL;
-   this->warped=NULL;
-   this->deformationFieldImage=NULL;
-   this->warImgGradient=NULL;
-   this->voxelBasedMeasureGradient=NULL;
-
-   this->interpolation=1;
-
-   this->landmarkRegWeight=0.f;
-   this->landmarkRegNumber=0;
-   this->landmarkReference=NULL;
-   this->landmarkFloating=NULL;
+reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
+
+    this->optimiser = nullptr;
+    this->maxIterationNumber = 150;
+    this->optimiseX = true;
+    this->optimiseY = true;
+    this->optimiseZ = true;
+    this->perturbationNumber = 0;
+    this->useConjGradient = true;
+    this->useApproxGradient = false;
+
+    this->measure_ssd = nullptr;
+    this->measure_kld = nullptr;
+    this->measure_dti = nullptr;
+    this->measure_lncc = nullptr;
+    this->measure_nmi = nullptr;
+    this->measure_mind = nullptr;
+    this->measure_mindssc = nullptr;
+    this->localWeightSimInput = nullptr;
+    this->localWeightSimCurrent = nullptr;
+
+    this->similarityWeight = 0; // automatically set depending of the penalty term weights
+
+    this->executableName = (char*)"NiftyReg BASE";
+    this->referenceTimePoint = refTimePoint;
+    this->floatingTimePoint = floTimePoint;
+    this->inputReference = nullptr; // pointer to external
+    this->inputFloating = nullptr; // pointer to external
+    this->maskImage = nullptr; // pointer to external
+    this->affineTransformation = nullptr;  // pointer to external
+    this->referenceMask = nullptr;
+    this->referenceSmoothingSigma = 0;
+    this->floatingSmoothingSigma = 0;
+    this->referenceThresholdUp = new float[this->referenceTimePoint];
+    this->referenceThresholdLow = new float[this->referenceTimePoint];
+    this->floatingThresholdUp = new float[this->floatingTimePoint];
+    this->floatingThresholdLow = new float[this->floatingTimePoint];
+    for (int i = 0; i < this->referenceTimePoint; i++) {
+        this->referenceThresholdUp[i] = std::numeric_limits<T>::max();
+        this->referenceThresholdLow[i] = -std::numeric_limits<T>::max();
+    }
+    for (int i = 0; i < this->floatingTimePoint; i++) {
+        this->floatingThresholdUp[i] = std::numeric_limits<T>::max();
+        this->floatingThresholdLow[i] = -std::numeric_limits<T>::max();
+    }
+    this->robustRange = false;
+    this->warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
+    this->levelNumber = 3;
+    this->levelToPerform = 0;
+    this->gradientSmoothingSigma = 0;
+    this->verbose = true;
+    this->usePyramid = true;
+    this->forwardJacobianMatrix = nullptr;
+
+    this->initialised = false;
+    this->referencePyramid = nullptr;
+    this->floatingPyramid = nullptr;
+    this->maskPyramid = nullptr;
+    this->activeVoxelNumber = nullptr;
+    this->currentReference = nullptr;
+    this->currentFloating = nullptr;
+    this->currentMask = nullptr;
+    this->warped = nullptr;
+    this->deformationFieldImage = nullptr;
+    this->warImgGradient = nullptr;
+    this->voxelBasedMeasureGradient = nullptr;
+
+    this->interpolation = 1;
+
+    this->landmarkRegWeight = 0;
+    this->landmarkRegNumber = 0;
+    this->landmarkReference = nullptr;
+    this->landmarkFloating = nullptr;
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::reg_base");
+    reg_print_fct_debug("reg_base<T>::reg_base");
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
-reg_base<T>::~reg_base()
-{
-   this->ClearWarped();
-   this->ClearWarpedGradient();
-   this->ClearDeformationField();
-   this->ClearVoxelBasedMeasureGradient();
-   if(this->referencePyramid!=NULL)
-   {
-      if(this->usePyramid)
-      {
-         for(unsigned int i=0; i<this->levelToPerform; i++)
-         {
-            if(referencePyramid[i]!=NULL)
-            {
-               nifti_image_free(referencePyramid[i]);
-               referencePyramid[i]=NULL;
+reg_base<T>::~reg_base() {
+    this->ClearWarped();
+    this->ClearWarpedGradient();
+    this->ClearDeformationField();
+    this->ClearVoxelBasedMeasureGradient();
+    if (this->referencePyramid != nullptr) {
+        if (this->usePyramid) {
+            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+                if (referencePyramid[i] != nullptr) {
+                    nifti_image_free(referencePyramid[i]);
+                    referencePyramid[i] = nullptr;
+                }
             }
-         }
-      }
-      else
-      {
-         if(referencePyramid[0]!=NULL)
-         {
-            nifti_image_free(referencePyramid[0]);
-            referencePyramid[0]=NULL;
-         }
-      }
-      free(referencePyramid);
-      referencePyramid=NULL;
-   }
-   if(this->maskPyramid!=NULL)
-   {
-      if(this->usePyramid)
-      {
-         for(unsigned int i=0; i<this->levelToPerform; i++)
-         {
-            if(this->maskPyramid[i]!=NULL)
-            {
-               free(this->maskPyramid[i]);
-               this->maskPyramid[i]=NULL;
+        } else {
+            if (referencePyramid[0] != nullptr) {
+                nifti_image_free(referencePyramid[0]);
+                referencePyramid[0] = nullptr;
             }
-         }
-      }
-      else
-      {
-         if(this->maskPyramid[0]!=NULL)
-         {
-            free(this->maskPyramid[0]);
-            this->maskPyramid[0]=NULL;
-         }
-      }
-      free(this->maskPyramid);
-      maskPyramid=NULL;
-   }
-   if(this->floatingPyramid!=NULL)
-   {
-      if(this->usePyramid)
-      {
-         for(unsigned int i=0; i<this->levelToPerform; i++)
-         {
-            if(floatingPyramid[i]!=NULL)
-            {
-               nifti_image_free(floatingPyramid[i]);
-               floatingPyramid[i]=NULL;
+        }
+        free(referencePyramid);
+        referencePyramid = nullptr;
+    }
+    if (this->maskPyramid != nullptr) {
+        if (this->usePyramid) {
+            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+                if (this->maskPyramid[i] != nullptr) {
+                    free(this->maskPyramid[i]);
+                    this->maskPyramid[i] = nullptr;
+                }
+            }
+        } else {
+            if (this->maskPyramid[0] != nullptr) {
+                free(this->maskPyramid[0]);
+                this->maskPyramid[0] = nullptr;
+            }
+        }
+        free(this->maskPyramid);
+        maskPyramid = nullptr;
+    }
+    if (this->floatingPyramid != nullptr) {
+        if (this->usePyramid) {
+            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+                if (floatingPyramid[i] != nullptr) {
+                    nifti_image_free(floatingPyramid[i]);
+                    floatingPyramid[i] = nullptr;
+                }
             }
-         }
-      }
-      else
-      {
-         if(floatingPyramid[0]!=NULL)
-         {
-            nifti_image_free(floatingPyramid[0]);
-            floatingPyramid[0]=NULL;
-         }
-      }
-      free(floatingPyramid);
-      floatingPyramid=NULL;
-   }
-   if(this->activeVoxelNumber!=NULL)
-   {
-      free(activeVoxelNumber);
-      this->activeVoxelNumber=NULL;
-   }
-   if(this->referenceThresholdUp!=NULL)
-   {
-      delete []this->referenceThresholdUp;
-      this->referenceThresholdUp=NULL;
-   }
-   if(this->referenceThresholdLow!=NULL)
-   {
-      delete []this->referenceThresholdLow;
-      this->referenceThresholdLow=NULL;
-   }
-   if(this->floatingThresholdUp!=NULL)
-   {
-      delete []this->floatingThresholdUp;
-      this->floatingThresholdUp=NULL;
-   }
-   if(this->floatingThresholdLow!=NULL)
-   {
-      delete []this->floatingThresholdLow;
-      this->floatingThresholdLow=NULL;
-   }
-   if(this->optimiser!=NULL)
-   {
-      delete this->optimiser;
-      this->optimiser=NULL;
-   }
-
-   if(this->measure_nmi!=NULL)
-      delete this->measure_nmi;
-   if(this->measure_ssd!=NULL)
-      delete this->measure_ssd;
-   if(this->measure_kld!=NULL)
-      delete this->measure_kld;
-   if(this->measure_dti!=NULL)
-      delete this->measure_dti;
-   if(this->measure_lncc!=NULL)
-      delete this->measure_lncc;
-   if(this->measure_mind!=NULL)
-      delete this->measure_mind;
-   if(this->measure_mindssc!=NULL)
-      delete this->measure_mindssc;
-
    //Platform
 //   delete this->platform;
+        } else {
+            if (floatingPyramid[0] != nullptr) {
+                nifti_image_free(floatingPyramid[0]);
+                floatingPyramid[0] = nullptr;
+            }
+        }
+        free(floatingPyramid);
+        floatingPyramid = nullptr;
+    }
+    if (this->activeVoxelNumber != nullptr) {
+        free(activeVoxelNumber);
+        this->activeVoxelNumber = nullptr;
+    }
+    if (this->referenceThresholdUp != nullptr) {
+        delete[]this->referenceThresholdUp;
+        this->referenceThresholdUp = nullptr;
+    }
+    if (this->referenceThresholdLow != nullptr) {
+        delete[]this->referenceThresholdLow;
+        this->referenceThresholdLow = nullptr;
+    }
+    if (this->floatingThresholdUp != nullptr) {
+        delete[]this->floatingThresholdUp;
+        this->floatingThresholdUp = nullptr;
+    }
+    if (this->floatingThresholdLow != nullptr) {
+        delete[]this->floatingThresholdLow;
+        this->floatingThresholdLow = nullptr;
+    }
+    if (this->optimiser != nullptr) {
+        delete this->optimiser;
+        this->optimiser = nullptr;
+    }
+
+    if (this->measure_nmi != nullptr)
+        delete this->measure_nmi;
+    if (this->measure_ssd != nullptr)
+        delete this->measure_ssd;
+    if (this->measure_kld != nullptr)
+        delete this->measure_kld;
+    if (this->measure_dti != nullptr)
+        delete this->measure_dti;
+    if (this->measure_lncc != nullptr)
+        delete this->measure_lncc;
+    if (this->measure_mind != nullptr)
+        delete this->measure_mind;
+    if (this->measure_mindssc != nullptr)
+        delete this->measure_mindssc;
+
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::~reg_base");
+    reg_print_fct_debug("reg_base<T>::~reg_base");
 #endif
 }
 /* *************************************************************** */
@@ -262,995 +227,887 @@ reg_base<T>::~reg_base()
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceImage(nifti_image *r)
-{
-   this->inputReference = r;
+void reg_base<T>::SetReferenceImage(nifti_image *r) {
+    this->inputReference = r;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetReferenceImage");
+    reg_print_fct_debug("reg_base<T>::SetReferenceImage");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingImage(nifti_image *f)
-{
-   this->inputFloating = f;
+void reg_base<T>::SetFloatingImage(nifti_image *f) {
+    this->inputFloating = f;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetFloatingImage");
+    reg_print_fct_debug("reg_base<T>::SetFloatingImage");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetMaximalIterationNumber(unsigned int iter)
-{
-   this->maxiterationNumber=iter;
+void reg_base<T>::SetMaximalIterationNumber(unsigned int iter) {
+    this->maxIterationNumber = iter;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetMaximalIterationNumber");
+    reg_print_fct_debug("reg_base<T>::SetMaximalIterationNumber");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceMask(nifti_image *m)
-{
-   this->maskImage = m;
+void reg_base<T>::SetReferenceMask(nifti_image *m) {
+    this->maskImage = m;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetReferenceMask");
+    reg_print_fct_debug("reg_base<T>::SetReferenceMask");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetAffineTransformation(mat44 *a)
-{
-   this->affineTransformation=a;
+void reg_base<T>::SetAffineTransformation(mat44 *a) {
+    this->affineTransformation = a;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
+    reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceSmoothingSigma(T s)
-{
-   this->referenceSmoothingSigma = s;
+void reg_base<T>::SetReferenceSmoothingSigma(T s) {
+    this->referenceSmoothingSigma = s;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetReferenceSmoothingSigma");
+    reg_print_fct_debug("reg_base<T>::SetReferenceSmoothingSigma");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingSmoothingSigma(T s)
-{
-   this->floatingSmoothingSigma = s;
+void reg_base<T>::SetFloatingSmoothingSigma(T s) {
+    this->floatingSmoothingSigma = s;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetFloatingSmoothingSigma");
+    reg_print_fct_debug("reg_base<T>::SetFloatingSmoothingSigma");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t)
-{
-   this->referenceThresholdUp[i] = t;
+void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t) {
+    this->referenceThresholdUp[i] = t;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetReferenceThresholdUp");
+    reg_print_fct_debug("reg_base<T>::SetReferenceThresholdUp");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t)
-{
-   this->referenceThresholdLow[i] = t;
+void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t) {
+    this->referenceThresholdLow[i] = t;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetReferenceThresholdLow");
+    reg_print_fct_debug("reg_base<T>::SetReferenceThresholdLow");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t)
-{
-   this->floatingThresholdUp[i] = t;
+void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t) {
+    this->floatingThresholdUp[i] = t;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetFloatingThresholdUp");
+    reg_print_fct_debug("reg_base<T>::SetFloatingThresholdUp");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t)
-{
-   this->floatingThresholdLow[i] = t;
+void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t) {
+    this->floatingThresholdLow[i] = t;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetFloatingThresholdLow");
+    reg_print_fct_debug("reg_base<T>::SetFloatingThresholdLow");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::UseRobustRange()
-{
-   this->robustRange=true;
+void reg_base<T>::UseRobustRange() {
+    this->robustRange = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseRobustRange");
+    reg_print_fct_debug("reg_base<T>::UseRobustRange");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::DoNotUseRobustRange()
-{
-   this->robustRange=false;
+void reg_base<T>::DoNotUseRobustRange() {
+    this->robustRange = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseRobustRange");
+    reg_print_fct_debug("reg_base<T>::UseRobustRange");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetWarpedPaddingValue(T p)
-{
-   this->warpedPaddingValue = p;
+void reg_base<T>::SetWarpedPaddingValue(T p) {
+    this->warpedPaddingValue = p;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetWarpedPaddingValue");
+    reg_print_fct_debug("reg_base<T>::SetWarpedPaddingValue");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelNumber(unsigned int l)
-{
-   this->levelNumber = l;
+void reg_base<T>::SetLevelNumber(unsigned int l) {
+    this->levelNumber = l;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetLevelNumber");
+    reg_print_fct_debug("reg_base<T>::SetLevelNumber");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelToPerform(unsigned int l)
-{
-   this->levelToPerform = l;
+void reg_base<T>::SetLevelToPerform(unsigned int l) {
+    this->levelToPerform = l;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
+    reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetGradientSmoothingSigma(T g)
-{
-   this->gradientSmoothingSigma = g;
+void reg_base<T>::SetGradientSmoothingSigma(T g) {
+    this->gradientSmoothingSigma = g;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetGradientSmoothingSigma");
+    reg_print_fct_debug("reg_base<T>::SetGradientSmoothingSigma");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseConjugateGradient()
-{
-   this->useConjGradient = true;
+void reg_base<T>::UseConjugateGradient() {
+    this->useConjGradient = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseConjugateGradient");
+    reg_print_fct_debug("reg_base<T>::UseConjugateGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::DoNotUseConjugateGradient()
-{
-   this->useConjGradient = false;
+void reg_base<T>::DoNotUseConjugateGradient() {
+    this->useConjGradient = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::DoNotUseConjugateGradient");
+    reg_print_fct_debug("reg_base<T>::DoNotUseConjugateGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseApproximatedGradient()
-{
-   this->useApproxGradient = true;
+void reg_base<T>::UseApproximatedGradient() {
+    this->useApproxGradient = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseApproximatedGradient");
+    reg_print_fct_debug("reg_base<T>::UseApproximatedGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::DoNotUseApproximatedGradient()
-{
-   this->useApproxGradient = false;
+void reg_base<T>::DoNotUseApproximatedGradient() {
+    this->useApproxGradient = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::DoNotUseApproximatedGradient");
+    reg_print_fct_debug("reg_base<T>::DoNotUseApproximatedGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::PrintOutInformation()
-{
-   this->verbose = true;
+void reg_base<T>::PrintOutInformation() {
+    this->verbose = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::PrintOutInformation");
+    reg_print_fct_debug("reg_base<T>::PrintOutInformation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::DoNotPrintOutInformation()
-{
-   this->verbose = false;
+void reg_base<T>::DoNotPrintOutInformation() {
+    this->verbose = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::DoNotPrintOutInformation");
+    reg_print_fct_debug("reg_base<T>::DoNotPrintOutInformation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::DoNotUsePyramidalApproach()
-{
-   this->usePyramid=false;
+void reg_base<T>::DoNotUsePyramidalApproach() {
+    this->usePyramid = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::DoNotUsePyramidalApproach");
+    reg_print_fct_debug("reg_base<T>::DoNotUsePyramidalApproach");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseNeareatNeighborInterpolation()
-{
-   this->interpolation=0;
+void reg_base<T>::UseNearestNeighborInterpolation() {
+    this->interpolation = 0;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseNeareatNeighborInterpolation");
+    reg_print_fct_debug("reg_base<T>::UseNearestNeighborInterpolation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseLinearInterpolation()
-{
-   this->interpolation=1;
+void reg_base<T>::UseLinearInterpolation() {
+    this->interpolation = 1;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseLinearInterpolation");
+    reg_print_fct_debug("reg_base<T>::UseLinearInterpolation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseCubicSplineInterpolation()
-{
-   this->interpolation=3;
+void reg_base<T>::UseCubicSplineInterpolation() {
+    this->interpolation = 3;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseCubicSplineInterpolation");
+    reg_print_fct_debug("reg_base<T>::UseCubicSplineInterpolation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w)
-{
-   this->landmarkRegNumber = n;
-   this->landmarkReference = r;
-   this->landmarkFloating = f;
-   this->landmarkRegWeight = w;
+void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w) {
+    this->landmarkRegNumber = n;
+    this->landmarkReference = r;
+    this->landmarkFloating = f;
+    this->landmarkRegWeight = w;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetLandmarkRegularisationParam");
+    reg_print_fct_debug("reg_base<T>::SetLandmarkRegularisationParam");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::ClearCurrentInputImage()
-{
-   this->currentReference=NULL;
-   this->currentMask=NULL;
-   this->currentFloating=NULL;
-   if(this->localWeightSimCurrent!=NULL)
-      nifti_image_free(this->localWeightSimCurrent);
-   this->localWeightSimCurrent=NULL;
+void reg_base<T>::ClearCurrentInputImage() {
+    this->currentReference = nullptr;
+    this->currentMask = nullptr;
+    this->currentFloating = nullptr;
+    if (this->localWeightSimCurrent != nullptr)
+        nifti_image_free(this->localWeightSimCurrent);
+    this->localWeightSimCurrent = nullptr;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ClearCurrentInputImage");
+    reg_print_fct_debug("reg_base<T>::ClearCurrentInputImage");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::AllocateWarped()
-{
-   if(this->currentReference==NULL)
-   {
-      reg_print_fct_error("reg_base::AllocateWarped()");
-      reg_print_msg_error("The reference image is not defined");
-      reg_exit();
-   }
-   reg_base<T>::ClearWarped();
-   this->warped = nifti_copy_nim_info(this->currentReference);
-   this->warped->dim[0]=this->warped->ndim=this->currentFloating->ndim;
-   this->warped->dim[4]=this->warped->nt=this->currentFloating->nt;
-   this->warped->pixdim[4]=this->warped->dt=1.0;
-   this->warped->nvox =
-      (size_t)this->warped->nx *
-      (size_t)this->warped->ny *
-      (size_t)this->warped->nz *
-      (size_t)this->warped->nt;
-   this->warped->scl_slope=1.f;
-   this->warped->scl_inter=0.f;
-   this->warped->datatype = this->currentFloating->datatype;
-   this->warped->nbyper = this->currentFloating->nbyper;
-   this->warped->data = (void *)calloc(this->warped->nvox, this->warped->nbyper);
+void reg_base<T>::AllocateWarped() {
+    if (this->currentReference == nullptr) {
+        reg_print_fct_error("reg_base::AllocateWarped()");
+        reg_print_msg_error("The reference image is not defined");
+        reg_exit();
+    }
+    reg_base<T>::ClearWarped();
+    this->warped = nifti_copy_nim_info(this->currentReference);
+    this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim;
+    this->warped->dim[4] = this->warped->nt = this->currentFloating->nt;
+    this->warped->pixdim[4] = this->warped->dt = 1;
+    this->warped->nvox = (size_t)(this->warped->nx * this->warped->ny * this->warped->nz * this->warped->nt);
+    this->warped->scl_slope = 1;
+    this->warped->scl_inter = 0;
+    this->warped->datatype = this->currentFloating->datatype;
+    this->warped->nbyper = this->currentFloating->nbyper;
+    this->warped->data = (void*)calloc(this->warped->nvox, this->warped->nbyper);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::AllocateWarped");
+    reg_print_fct_debug("reg_base<T>::AllocateWarped");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::ClearWarped()
-{
-   if(this->warped!=NULL)
-      nifti_image_free(this->warped);
-   this->warped=NULL;
+void reg_base<T>::ClearWarped() {
+    if (this->warped != nullptr)
+        nifti_image_free(this->warped);
+    this->warped = nullptr;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ClearWarped");
+    reg_print_fct_debug("reg_base<T>::ClearWarped");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::AllocateDeformationField()
-{
-   if(this->currentReference==NULL)
-   {
-      reg_print_fct_error("reg_base::AllocateDeformationField()");
-      reg_print_msg_error("The reference image is not defined");
-      reg_exit();
-   }
-   reg_base<T>::ClearDeformationField();
-   this->deformationFieldImage = nifti_copy_nim_info(this->currentReference);
-   this->deformationFieldImage->dim[0]=this->deformationFieldImage->ndim=5;
-   this->deformationFieldImage->dim[1]=this->deformationFieldImage->nx=this->currentReference->nx;
-   this->deformationFieldImage->dim[2]=this->deformationFieldImage->ny=this->currentReference->ny;
-   this->deformationFieldImage->dim[3]=this->deformationFieldImage->nz=this->currentReference->nz;
-   this->deformationFieldImage->dim[4]=this->deformationFieldImage->nt=1;
-   this->deformationFieldImage->pixdim[4]=this->deformationFieldImage->dt=1.0;
-   if(this->currentReference->nz==1)
-      this->deformationFieldImage->dim[5]=this->deformationFieldImage->nu=2;
-   else this->deformationFieldImage->dim[5]=this->deformationFieldImage->nu=3;
-   this->deformationFieldImage->pixdim[5]=this->deformationFieldImage->du=1.0;
-   this->deformationFieldImage->dim[6]=this->deformationFieldImage->nv=1;
-   this->deformationFieldImage->pixdim[6]=this->deformationFieldImage->dv=1.0;
-   this->deformationFieldImage->dim[7]=this->deformationFieldImage->nw=1;
-   this->deformationFieldImage->pixdim[7]=this->deformationFieldImage->dw=1.0;
-   this->deformationFieldImage->nvox =
-      (size_t)this->deformationFieldImage->nx *
-      (size_t)this->deformationFieldImage->ny *
-      (size_t)this->deformationFieldImage->nz *
-      (size_t)this->deformationFieldImage->nt *
-      (size_t)this->deformationFieldImage->nu;
-   this->deformationFieldImage->nbyper = sizeof(T);
-   if(sizeof(T)==sizeof(float))
-      this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
-   else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64;
-   this->deformationFieldImage->data = (void *)calloc(this->deformationFieldImage->nvox,
-                                       this->deformationFieldImage->nbyper);
-   this->deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR;
-   memset(this->deformationFieldImage->intent_name, 0, 16);
-   strcpy(this->deformationFieldImage->intent_name,"NREG_TRANS");
-   this->deformationFieldImage->intent_p1=DEF_FIELD;
-   this->deformationFieldImage->scl_slope=1.f;
-   this->deformationFieldImage->scl_inter=0.f;
-
-   if(this->measure_dti!=NULL)
-      this->forwardJacobianMatrix=(mat33 *)malloc(
-                                     this->deformationFieldImage->nx *
-                                     this->deformationFieldImage->ny *
-                                     this->deformationFieldImage->nz *
-                                     sizeof(mat33));
+void reg_base<T>::AllocateDeformationField() {
+    if (this->currentReference == nullptr) {
+        reg_print_fct_error("reg_base::AllocateDeformationField()");
+        reg_print_msg_error("The reference image is not defined");
+        reg_exit();
+    }
+    reg_base<T>::ClearDeformationField();
+    this->deformationFieldImage = nifti_copy_nim_info(this->currentReference);
+    this->deformationFieldImage->dim[0] = this->deformationFieldImage->ndim = 5;
+    this->deformationFieldImage->dim[1] = this->deformationFieldImage->nx = this->currentReference->nx;
+    this->deformationFieldImage->dim[2] = this->deformationFieldImage->ny = this->currentReference->ny;
+    this->deformationFieldImage->dim[3] = this->deformationFieldImage->nz = this->currentReference->nz;
+    this->deformationFieldImage->dim[4] = this->deformationFieldImage->nt = 1;
+    this->deformationFieldImage->pixdim[4] = this->deformationFieldImage->dt = 1.0;
+    if (this->currentReference->nz == 1)
+        this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 2;
+    else this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 3;
+    this->deformationFieldImage->pixdim[5] = this->deformationFieldImage->du = 1.0;
+    this->deformationFieldImage->dim[6] = this->deformationFieldImage->nv = 1;
+    this->deformationFieldImage->pixdim[6] = this->deformationFieldImage->dv = 1.0;
+    this->deformationFieldImage->dim[7] = this->deformationFieldImage->nw = 1;
+    this->deformationFieldImage->pixdim[7] = this->deformationFieldImage->dw = 1.0;
+    this->deformationFieldImage->nvox =
+        (size_t)this->deformationFieldImage->nx *
+        (size_t)this->deformationFieldImage->ny *
+        (size_t)this->deformationFieldImage->nz *
+        (size_t)this->deformationFieldImage->nt *
+        (size_t)this->deformationFieldImage->nu;
+    this->deformationFieldImage->nbyper = sizeof(T);
+    if (sizeof(T) == sizeof(float))
+        this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
+    else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64;
+    this->deformationFieldImage->data = (void*)calloc(this->deformationFieldImage->nvox,
+                                                      this->deformationFieldImage->nbyper);
+    this->deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+    memset(this->deformationFieldImage->intent_name, 0, 16);
+    strcpy(this->deformationFieldImage->intent_name, "NREG_TRANS");
+    this->deformationFieldImage->intent_p1 = DEF_FIELD;
+    this->deformationFieldImage->scl_slope = 1;
+    this->deformationFieldImage->scl_inter = 0;
+
+    if (this->measure_dti != nullptr)
+        this->forwardJacobianMatrix = (mat33*)malloc(this->deformationFieldImage->nx * this->deformationFieldImage->ny *
+                                                     this->deformationFieldImage->nz * sizeof(mat33));
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::AllocateDeformationField");
+    reg_print_fct_debug("reg_base<T>::AllocateDeformationField");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::ClearDeformationField()
-{
-   if(this->deformationFieldImage!=NULL)
-   {
-      nifti_image_free(this->deformationFieldImage);
-      this->deformationFieldImage=NULL;
-   }
-   if(this->forwardJacobianMatrix!=NULL)
-      free(this->forwardJacobianMatrix);
-   this->forwardJacobianMatrix=NULL;
+void reg_base<T>::ClearDeformationField() {
+    if (this->deformationFieldImage != nullptr) {
+        nifti_image_free(this->deformationFieldImage);
+        this->deformationFieldImage = nullptr;
+    }
+    if (this->forwardJacobianMatrix != nullptr)
+        free(this->forwardJacobianMatrix);
+    this->forwardJacobianMatrix = nullptr;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ClearDeformationField");
+    reg_print_fct_debug("reg_base<T>::ClearDeformationField");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::AllocateWarpedGradient()
-{
-   if(this->deformationFieldImage==NULL)
-   {
-      reg_print_fct_error("reg_base::AllocateWarpedGradient()");
-      reg_print_msg_error("The deformation field image is not defined");
-      reg_exit();
-   }
-   reg_base<T>::ClearWarpedGradient();
-   this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage);
-   this->warImgGradient->data = (void *)calloc(this->warImgGradient->nvox,
-                                     this->warImgGradient->nbyper);
+void reg_base<T>::AllocateWarpedGradient() {
+    if (this->deformationFieldImage == nullptr) {
+        reg_print_fct_error("reg_base::AllocateWarpedGradient()");
+        reg_print_msg_error("The deformation field image is not defined");
+        reg_exit();
+    }
+    reg_base<T>::ClearWarpedGradient();
+    this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage);
+    this->warImgGradient->data = (void*)calloc(this->warImgGradient->nvox,
+                                                this->warImgGradient->nbyper);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::AllocateWarpedGradient");
+    reg_print_fct_debug("reg_base<T>::AllocateWarpedGradient");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::ClearWarpedGradient()
-{
-   if(this->warImgGradient!=NULL)
-   {
-      nifti_image_free(this->warImgGradient);
-      this->warImgGradient=NULL;
-   }
+void reg_base<T>::ClearWarpedGradient() {
+    if (this->warImgGradient != nullptr) {
+        nifti_image_free(this->warImgGradient);
+        this->warImgGradient = nullptr;
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ClearWarpedGradient");
+    reg_print_fct_debug("reg_base<T>::ClearWarpedGradient");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::AllocateVoxelBasedMeasureGradient()
-{
-   if(this->deformationFieldImage==NULL)
-   {
-      reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
-      reg_print_msg_error("The deformation field image is not defined");
-      reg_exit();
-   }
-   reg_base<T>::ClearVoxelBasedMeasureGradient();
-   this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage);
-   this->voxelBasedMeasureGradient->data = (void *)calloc(this->voxelBasedMeasureGradient->nvox,
-         this->voxelBasedMeasureGradient->nbyper);
+void reg_base<T>::AllocateVoxelBasedMeasureGradient() {
+    if (this->deformationFieldImage == nullptr) {
+        reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
+        reg_print_msg_error("The deformation field image is not defined");
+        reg_exit();
+    }
+    reg_base<T>::ClearVoxelBasedMeasureGradient();
+    this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage);
+    this->voxelBasedMeasureGradient->data = (void*)calloc(this->voxelBasedMeasureGradient->nvox,
+                                                           this->voxelBasedMeasureGradient->nbyper);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::AllocateVoxelBasedMeasureGradient");
+    reg_print_fct_debug("reg_base<T>::AllocateVoxelBasedMeasureGradient");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::ClearVoxelBasedMeasureGradient()
-{
-   if(this->voxelBasedMeasureGradient!=NULL)
-   {
-      nifti_image_free(this->voxelBasedMeasureGradient);
-      this->voxelBasedMeasureGradient=NULL;
-   }
+void reg_base<T>::ClearVoxelBasedMeasureGradient() {
+    if (this->voxelBasedMeasureGradient != nullptr) {
+        nifti_image_free(this->voxelBasedMeasureGradient);
+        this->voxelBasedMeasureGradient = nullptr;
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ClearVoxelBasedMeasureGradient");
+    reg_print_fct_debug("reg_base<T>::ClearVoxelBasedMeasureGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::CheckParameters()
-{
-	// CHECK THAT BOTH INPUT IMAGES ARE DEFINED
-	if (this->inputReference == NULL)
-	{
-		reg_print_fct_error("reg_base::CheckParameters()");
-		reg_print_msg_error("The reference image is not defined");
-		reg_exit();
-	}
-	if (this->inputFloating == NULL)
-	{
-		reg_print_fct_error("reg_base::CheckParameters()");
-		reg_print_msg_error("The floating image is not defined");
-		reg_exit();
-	}
-
-	// CHECK THE MASK DIMENSION IF IT IS DEFINED
-	if (this->maskImage != NULL)
-	{
-		if (this->inputReference->nx != this->maskImage->nx ||
-			this->inputReference->ny != this->maskImage->ny ||
-			this->inputReference->nz != this->maskImage->nz)
-		{
-			reg_print_fct_error("reg_base::CheckParameters()");
-			reg_print_msg_error("The reference and mask images have different dimension");
-			reg_exit();
-		}
-	}
-
-	// CHECK THE NUMBER OF LEVEL TO PERFORM
-	if (this->levelToPerform > 0)
-	{
-		this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber;
-	}
-	else this->levelToPerform = this->levelNumber;
-	if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber)
-		this->levelToPerform = this->levelNumber;
-
-	// SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-	if (this->measure_nmi == NULL &&
-		this->measure_ssd == NULL &&
-		this->measure_dti == NULL &&
-		this->measure_lncc == NULL &&
-		this->measure_lncc == NULL &&
-		this->measure_kld == NULL &&
-		this->measure_mind == NULL &&
-		this->measure_mindssc == NULL)
-	{
-		this->measure_nmi = new reg_nmi;
-		for (int i = 0; i < this->inputReference->nt; ++i)
-			this->measure_nmi->SetTimepointWeight(i, 1.0);
-	}
-
-	// CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS)
-	// THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED
-	// AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL
-	// NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1
-	//
-	// NOTE - DTI currently ignored as needs fixing
-	//
-	// tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting
-	if (this->measure_mind == NULL && this->measure_mindssc == NULL)
-	{
-		if (this->inputFloating->nt != this->inputReference->nt)
-		{
-			reg_print_fct_error("reg_base::CheckParameters()");
-			reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
-			reg_exit();
-		}
-		double *chanWeightSum = new double[this->inputReference->nt]();
-		double simWeightSum, totWeightSum =0.;
-		double *nmiWeights=NULL, *ssdWeights=NULL, *kldWeights=NULL, *lnccWeights=NULL;
-		if (this->measure_nmi != NULL)
-		{
-			nmiWeights = this->measure_nmi->GetTimepointsWeights();
-			simWeightSum = 0.0;
-			for (int n = 0; n < this->inputReference->nt; n++)
-			{
-				if (nmiWeights[n] < 0)
-				{
-					char text[255];
-					sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n);
-					reg_print_fct_error("reg_base::CheckParameters()");
-					reg_print_msg_error(text);
-					reg_exit();
-				}
-				chanWeightSum[n] += nmiWeights[n];
-				simWeightSum += nmiWeights[n];
-				totWeightSum += nmiWeights[n];
-			}
-			if (simWeightSum == 0.0)
-			{
-				reg_print_fct_warn("reg_base::CheckParameters()");
-				reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
-			}
-		}
-		if (this->measure_ssd != NULL)
-		{
-			ssdWeights = this->measure_ssd->GetTimepointsWeights();
-			simWeightSum = 0.0;
-			for (int n = 0; n < this->inputReference->nt; n++)
-			{
-				if (ssdWeights[n] < 0)
-				{
-					char text[255];
-					sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n);
-					reg_print_fct_error("reg_base::CheckParameters()");
-					reg_print_msg_error(text);
-					reg_exit();
-				}
-				chanWeightSum[n] += ssdWeights[n];
-				simWeightSum += ssdWeights[n];
-				totWeightSum += ssdWeights[n];
-			}
-			if (simWeightSum == 0.0)
-			{
-				reg_print_fct_warn("reg_base::CheckParameters()");
-				reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
-			}
-		}
-		if (this->measure_kld != NULL)
-		{
-			kldWeights = this->measure_kld->GetTimepointsWeights();
-			simWeightSum = 0.0;
-			for (int n = 0; n < this->inputReference->nt; n++)
-			{
-				if (kldWeights[n] < 0)
-				{
-					char text[255];
-					sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n);
-					reg_print_fct_error("reg_base::CheckParameters()");
-					reg_print_msg_error(text);
-					reg_exit();
-				}
-				chanWeightSum[n] += kldWeights[n];
-				simWeightSum += kldWeights[n];
-				totWeightSum += kldWeights[n];
-			}
-			if (simWeightSum == 0.0)
-			{
-				reg_print_fct_warn("reg_base::CheckParameters()");
-				reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
-			}
-		}
-		if (this->measure_lncc != NULL)
-		{
-			lnccWeights = this->measure_lncc->GetTimepointsWeights();
-			simWeightSum = 0.0;
-			for (int n = 0; n < this->inputReference->nt; n++)
-			{
-				if (lnccWeights[n] < 0)
-				{
-					char text[255];
-					sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n);
-					reg_print_fct_error("reg_base::CheckParameters()");
-					reg_print_msg_error(text);
-					reg_exit();
-				}
-				chanWeightSum[n] += lnccWeights[n];
-				simWeightSum += lnccWeights[n];
-				totWeightSum += lnccWeights[n];
-			}
-			if (simWeightSum == 0.0)
-			{
-				reg_print_fct_warn("reg_base::CheckParameters()");
-				reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
-			}
-		}
-		for (int n = 0; n < this->inputReference->nt; n++)
-		{
-			if (chanWeightSum[n] == 0)
-			{
-				char text[255];
-				sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n);
-				reg_print_fct_warn("reg_base::CheckParameters()");
-				reg_print_msg_warn(text);
-			}
-			if (this->measure_nmi != NULL)
-				this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
-			if (this->measure_ssd != NULL)
-				this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
-			if (this->measure_kld != NULL)
-				this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
-			if (this->measure_lncc != NULL)
-				this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
-		}
+void reg_base<T>::CheckParameters() {
+    // CHECK THAT BOTH INPUT IMAGES ARE DEFINED
+    if (this->inputReference == nullptr) {
+        reg_print_fct_error("reg_base::CheckParameters()");
+        reg_print_msg_error("The reference image is not defined");
+        reg_exit();
+    }
+    if (this->inputFloating == nullptr) {
+        reg_print_fct_error("reg_base::CheckParameters()");
+        reg_print_msg_error("The floating image is not defined");
+        reg_exit();
+    }
+
+    // CHECK THE MASK DIMENSION IF IT IS DEFINED
+    if (this->maskImage != nullptr) {
+        if (this->inputReference->nx != this->maskImage->nx ||
+            this->inputReference->ny != this->maskImage->ny ||
+            this->inputReference->nz != this->maskImage->nz) {
+            reg_print_fct_error("reg_base::CheckParameters()");
+            reg_print_msg_error("The reference and mask images have different dimension");
+            reg_exit();
+        }
+    }
+
+    // CHECK THE NUMBER OF LEVEL TO PERFORM
+    if (this->levelToPerform > 0) {
+        this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber;
+    } else this->levelToPerform = this->levelNumber;
+    if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber)
+        this->levelToPerform = this->levelNumber;
+
+    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
+    if (this->measure_nmi == nullptr &&
+        this->measure_ssd == nullptr &&
+        this->measure_dti == nullptr &&
+        this->measure_lncc == nullptr &&
+        this->measure_lncc == nullptr &&
+        this->measure_kld == nullptr &&
+        this->measure_mind == nullptr &&
+        this->measure_mindssc == nullptr) {
+        this->measure_nmi = new reg_nmi;
+        for (int i = 0; i < this->inputReference->nt; ++i)
+            this->measure_nmi->SetTimepointWeight(i, 1.0);
+    }
+
+    // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS)
+    // THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED
+    // AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL
+    // NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1
+    //
+    // NOTE - DTI currently ignored as needs fixing
+    //
+    // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting
+    if (this->measure_mind == nullptr && this->measure_mindssc == nullptr) {
+        if (this->inputFloating->nt != this->inputReference->nt) {
+            reg_print_fct_error("reg_base::CheckParameters()");
+            reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
+            reg_exit();
+        }
+        double *chanWeightSum = new double[this->inputReference->nt]();
+        double simWeightSum, totWeightSum = 0.;
+        double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
+        if (this->measure_nmi != nullptr) {
+            nmiWeights = this->measure_nmi->GetTimepointsWeights();
+            simWeightSum = 0.0;
+            for (int n = 0; n < this->inputReference->nt; n++) {
+                if (nmiWeights[n] < 0) {
+                    char text[255];
+                    sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n);
+                    reg_print_fct_error("reg_base::CheckParameters()");
+                    reg_print_msg_error(text);
+                    reg_exit();
+                }
+                chanWeightSum[n] += nmiWeights[n];
+                simWeightSum += nmiWeights[n];
+                totWeightSum += nmiWeights[n];
+            }
+            if (simWeightSum == 0.0) {
+                reg_print_fct_warn("reg_base::CheckParameters()");
+                reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
+            }
+        }
+        if (this->measure_ssd != nullptr) {
+            ssdWeights = this->measure_ssd->GetTimepointsWeights();
+            simWeightSum = 0.0;
+            for (int n = 0; n < this->inputReference->nt; n++) {
+                if (ssdWeights[n] < 0) {
+                    char text[255];
+                    sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n);
+                    reg_print_fct_error("reg_base::CheckParameters()");
+                    reg_print_msg_error(text);
+                    reg_exit();
+                }
+                chanWeightSum[n] += ssdWeights[n];
+                simWeightSum += ssdWeights[n];
+                totWeightSum += ssdWeights[n];
+            }
+            if (simWeightSum == 0.0) {
+                reg_print_fct_warn("reg_base::CheckParameters()");
+                reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
+            }
+        }
+        if (this->measure_kld != nullptr) {
+            kldWeights = this->measure_kld->GetTimepointsWeights();
+            simWeightSum = 0.0;
+            for (int n = 0; n < this->inputReference->nt; n++) {
+                if (kldWeights[n] < 0) {
+                    char text[255];
+                    sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n);
+                    reg_print_fct_error("reg_base::CheckParameters()");
+                    reg_print_msg_error(text);
+                    reg_exit();
+                }
+                chanWeightSum[n] += kldWeights[n];
+                simWeightSum += kldWeights[n];
+                totWeightSum += kldWeights[n];
+            }
+            if (simWeightSum == 0.0) {
+                reg_print_fct_warn("reg_base::CheckParameters()");
+                reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
+            }
+        }
+        if (this->measure_lncc != nullptr) {
+            lnccWeights = this->measure_lncc->GetTimepointsWeights();
+            simWeightSum = 0.0;
+            for (int n = 0; n < this->inputReference->nt; n++) {
+                if (lnccWeights[n] < 0) {
+                    char text[255];
+                    sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n);
+                    reg_print_fct_error("reg_base::CheckParameters()");
+                    reg_print_msg_error(text);
+                    reg_exit();
+                }
+                chanWeightSum[n] += lnccWeights[n];
+                simWeightSum += lnccWeights[n];
+                totWeightSum += lnccWeights[n];
+            }
+            if (simWeightSum == 0.0) {
+                reg_print_fct_warn("reg_base::CheckParameters()");
+                reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
+            }
+        }
+        for (int n = 0; n < this->inputReference->nt; n++) {
+            if (chanWeightSum[n] == 0) {
+                char text[255];
+                sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n);
+                reg_print_fct_warn("reg_base::CheckParameters()");
+                reg_print_msg_warn(text);
+            }
+            if (this->measure_nmi != nullptr)
+                this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
+            if (this->measure_ssd != nullptr)
+                this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
+            if (this->measure_kld != nullptr)
+                this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
+            if (this->measure_lncc != nullptr)
+                this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
+        }
         delete[] chanWeightSum;
-	}
+    }
 
 #ifndef NDEBUG
-	reg_print_fct_debug("reg_base<T>::CheckParameters");
+    reg_print_fct_debug("reg_base<T>::CheckParameters");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::InitialiseSimilarity()
-{
-
-   if(this->localWeightSimInput!=NULL){
-      if(this->localWeightSimCurrent!=NULL)
-         nifti_image_free(this->localWeightSimCurrent);
-      this->localWeightSimCurrent=nifti_copy_nim_info(this->currentReference);
-      this->localWeightSimCurrent->dim[0]=this->localWeightSimCurrent->ndim=this->localWeightSimInput->dim[0];
-      this->localWeightSimCurrent->dim[4]=this->localWeightSimCurrent->nt=this->localWeightSimInput->dim[4];
-      this->localWeightSimCurrent->dim[5]=this->localWeightSimCurrent->nu=this->localWeightSimInput->dim[5];
-      this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx *
+void reg_base<T>::InitialiseSimilarity() {
+
+    if (this->localWeightSimInput != nullptr) {
+        if (this->localWeightSimCurrent != nullptr)
+            nifti_image_free(this->localWeightSimCurrent);
+        this->localWeightSimCurrent = nifti_copy_nim_info(this->currentReference);
+        this->localWeightSimCurrent->dim[0] = this->localWeightSimCurrent->ndim = this->localWeightSimInput->dim[0];
+        this->localWeightSimCurrent->dim[4] = this->localWeightSimCurrent->nt = this->localWeightSimInput->dim[4];
+        this->localWeightSimCurrent->dim[5] = this->localWeightSimCurrent->nu = this->localWeightSimInput->dim[5];
+        this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx *
             this->localWeightSimCurrent->ny * this->localWeightSimCurrent->nz *
             this->localWeightSimCurrent->nt * this->localWeightSimCurrent->nu;
-      this->localWeightSimCurrent->data = (void *)malloc(this->localWeightSimCurrent->nvox *
-                                                         this->localWeightSimCurrent->nbyper);
-      reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
-                                     this->voxelBasedMeasureGradient,
-                                     0.f);
-      reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient);
-      reg_tools_changeDatatype<T>(localWeightSimInput);
-      reg_resampleImage(this->localWeightSimInput,
-                        this->localWeightSimCurrent,
-                        this->voxelBasedMeasureGradient,
-                        NULL,
-                        1,
-                        0);
-   }
-   else this->localWeightSimCurrent=NULL;
-
-   if(this->measure_nmi!=NULL)
-      this->measure_nmi->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warImgGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent
-                                          );
-
-   if(this->measure_ssd!=NULL)
-      this->measure_ssd->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warImgGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent
-                                          );
-
-   if(this->measure_kld!=NULL)
-      this->measure_kld->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warImgGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent
-                                          );
-
-   if(this->measure_lncc!=NULL)
-      this->measure_lncc->InitialiseMeasure(this->currentReference,
-                                            this->currentFloating,
-                                            this->currentMask,
-                                            this->warped,
-                                            this->warImgGradient,
-                                            this->voxelBasedMeasureGradient,
-                                            this->localWeightSimCurrent
-                                           );
-
-   if(this->measure_dti!=NULL)
-      this->measure_dti->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warImgGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent
-                                          );
-
-   if(this->measure_mind!=NULL)
-      this->measure_mind->InitialiseMeasure(this->currentReference,
-                                            this->currentFloating,
-                                            this->currentMask,
-                                            this->warped,
-                                            this->warImgGradient,
-                                            this->voxelBasedMeasureGradient,
-                                            this->localWeightSimCurrent
-                                            );
-
-   if(this->measure_mindssc!=NULL)
-      this->measure_mindssc->InitialiseMeasure(this->currentReference,
-                                               this->currentFloating,
-                                               this->currentMask,
-                                               this->warped,
-                                               this->warImgGradient,
-                                               this->voxelBasedMeasureGradient,
-                                               this->localWeightSimCurrent
-                                               );
+        this->localWeightSimCurrent->data = (void*)malloc(this->localWeightSimCurrent->nvox *
+                                                           this->localWeightSimCurrent->nbyper);
+        reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, this->voxelBasedMeasureGradient, 0);
+        reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient);
+        reg_tools_changeDatatype<T>(localWeightSimInput);
+        reg_resampleImage(this->localWeightSimInput,
+                          this->localWeightSimCurrent,
+                          this->voxelBasedMeasureGradient,
+                          nullptr,
+                          1,
+                          0);
+    } else this->localWeightSimCurrent = nullptr;
+
+    if (this->measure_nmi != nullptr)
+        this->measure_nmi->InitialiseMeasure(this->currentReference,
+                                             this->currentFloating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warImgGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent);
+
+    if (this->measure_ssd != nullptr)
+        this->measure_ssd->InitialiseMeasure(this->currentReference,
+                                             this->currentFloating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warImgGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent);
+
+    if (this->measure_kld != nullptr)
+        this->measure_kld->InitialiseMeasure(this->currentReference,
+                                             this->currentFloating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warImgGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent);
+
+    if (this->measure_lncc != nullptr)
+        this->measure_lncc->InitialiseMeasure(this->currentReference,
+                                              this->currentFloating,
+                                              this->currentMask,
+                                              this->warped,
+                                              this->warImgGradient,
+                                              this->voxelBasedMeasureGradient,
+                                              this->localWeightSimCurrent);
+
+    if (this->measure_dti != nullptr)
+        this->measure_dti->InitialiseMeasure(this->currentReference,
+                                             this->currentFloating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warImgGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent);
+
+    if (this->measure_mind != nullptr)
+        this->measure_mind->InitialiseMeasure(this->currentReference,
+                                              this->currentFloating,
+                                              this->currentMask,
+                                              this->warped,
+                                              this->warImgGradient,
+                                              this->voxelBasedMeasureGradient,
+                                              this->localWeightSimCurrent);
+
+    if (this->measure_mindssc != nullptr)
+        this->measure_mindssc->InitialiseMeasure(this->currentReference,
+                                                 this->currentFloating,
+                                                 this->currentMask,
+                                                 this->warped,
+                                                 this->warImgGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 this->localWeightSimCurrent);
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::InitialiseSimilarity");
+    reg_print_fct_debug("reg_base<T>::InitialiseSimilarity");
 #endif
-   return;
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::Initialise()
-{
-   if(this->initialised) return;
-
-   this->CheckParameters();
-
    //PLATFORM
 //   this->platform = new Platform(this->platformCode);
 //   this->platform->setGpuIdx(this->gpuIdx);
-
-   // CREATE THE PYRAMIDE IMAGES
-   if(this->usePyramid)
-   {
-      this->referencePyramid = (nifti_image **)malloc(this->levelToPerform*sizeof(nifti_image *));
-      this->floatingPyramid = (nifti_image **)malloc(this->levelToPerform*sizeof(nifti_image *));
-      this->maskPyramid = (int **)malloc(this->levelToPerform*sizeof(int *));
-      this->activeVoxelNumber= (int *)malloc(this->levelToPerform*sizeof(int));
-   }
-   else
-   {
-      this->referencePyramid = (nifti_image **)malloc(sizeof(nifti_image *));
-      this->floatingPyramid = (nifti_image **)malloc(sizeof(nifti_image *));
-      this->maskPyramid = (int **)malloc(sizeof(int *));
-      this->activeVoxelNumber= (int *)malloc(sizeof(int));
-   }
-
-   // Update the input images threshold if required
-   if(this->robustRange==true){
-      // Create a copy of the reference image to extract the robust range
-      nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference);
-      temp_reference->data = (void *)malloc(temp_reference->nvox * temp_reference->nbyper);
-      memcpy(temp_reference->data, this->inputReference->data,temp_reference->nvox * temp_reference->nbyper);
-      reg_tools_changeDatatype<T>(temp_reference);
-      // Extract the robust range of the reference image
-      T *refDataPtr = static_cast<T *>(temp_reference->data);
-      reg_heapSort(refDataPtr, temp_reference->nvox);
-      // Update the reference threshold values if no value has been setup by the user
-      if(this->referenceThresholdLow[0]==-std::numeric_limits<T>::max())
-         this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox*0.02f)];
-      if(this->referenceThresholdUp[0]==std::numeric_limits<T>::max())
-         this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox*0.98f)];
-      // Free the temporarly allocated image
-      nifti_image_free(temp_reference);
-
-      // Create a copy of the floating image to extract the robust range
-      nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating);
-      temp_floating->data = (void *)malloc(temp_floating->nvox * temp_floating->nbyper);
-      memcpy(temp_floating->data, this->inputFloating->data,temp_floating->nvox * temp_floating->nbyper);
-      reg_tools_changeDatatype<T>(temp_floating);
-      // Extract the robust range of the floating image
-      T *floDataPtr = static_cast<T *>(temp_floating->data);
-      reg_heapSort(floDataPtr, temp_floating->nvox);
-      // Update the floating threshold values if no value has been setup by the user
-      if(this->floatingThresholdLow[0]==-std::numeric_limits<T>::max())
-         this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox*0.02f)];
-      if(this->floatingThresholdUp[0]==std::numeric_limits<T>::max())
-         this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox*0.98f)];
-      // Free the temporarly allocated image
-      nifti_image_free(temp_floating);
-   }
-
-   // FINEST LEVEL OF REGISTRATION
-   if(this->usePyramid)
-   {
-      reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform);
-      reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform);
-      if (this->maskImage!=NULL)
-         reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber);
-      else
-      {
-         for(unsigned int l=0; l<this->levelToPerform; ++l)
-         {
-            this->activeVoxelNumber[l]=this->referencePyramid[l]->nx*this->referencePyramid[l]->ny*this->referencePyramid[l]->nz;
-            this->maskPyramid[l]=(int *)calloc(activeVoxelNumber[l],sizeof(int));
-         }
-      }
-   }
-   else
-   {
-      reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, 1, 1);
-      reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, 1, 1);
-      if (this->maskImage!=NULL)
-         reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber);
-      else
-      {
-         this->activeVoxelNumber[0]=this->referencePyramid[0]->nx*this->referencePyramid[0]->ny*this->referencePyramid[0]->nz;
-         this->maskPyramid[0]=(int *)calloc(activeVoxelNumber[0],sizeof(int));
-      }
-   }
-
-   unsigned int pyramidalLevelNumber=1;
-   if(this->usePyramid) pyramidalLevelNumber=this->levelToPerform;
-
-   // SMOOTH THE INPUT IMAGES IF REQUIRED
-   for(unsigned int l=0; l<this->levelToPerform; l++)
-   {
-      if(this->referenceSmoothingSigma!=0.0)
-      {
-         bool *active = new bool[this->referencePyramid[l]->nt];
-         float *sigma = new float[this->referencePyramid[l]->nt];
-         active[0]=true;
-         for(int i=1; i<this->referencePyramid[l]->nt; ++i)
-            active[i]=false;
-         sigma[0]=this->referenceSmoothingSigma;
-         reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, NULL, active);
-         delete []active;
-         delete []sigma;
-      }
-      if(this->floatingSmoothingSigma!=0.0)
-      {
-         // Only the first image is smoothed
-         bool *active = new bool[this->floatingPyramid[l]->nt];
-         float *sigma = new float[this->floatingPyramid[l]->nt];
-         active[0]=true;
-         for(int i=1; i<this->floatingPyramid[l]->nt; ++i)
-            active[i]=false;
-         sigma[0]=this->floatingSmoothingSigma;
-         reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, NULL, active);
-         delete []active;
-         delete []sigma;
-      }
-   }
-
-   // THRESHOLD THE INPUT IMAGES IF REQUIRED
-   for(unsigned int l=0; l<pyramidalLevelNumber; l++)
-   {
-      reg_thresholdImage<T>(this->referencePyramid[l],this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
-      reg_thresholdImage<T>(this->floatingPyramid[l],this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
-   }
-
-   this->initialised=true;
+void reg_base<T>::Initialise() {
+    if (this->initialised) return;
+
+    this->CheckParameters();
+
+
+    // CREATE THE PYRAMIDE IMAGES
+    if (this->usePyramid) {
+        this->referencePyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*));
+        this->floatingPyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*));
+        this->maskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
+        this->activeVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int));
+    } else {
+        this->referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*));
+        this->floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*));
+        this->maskPyramid = (int**)malloc(sizeof(int*));
+        this->activeVoxelNumber = (int*)malloc(sizeof(int));
+    }
+
+    // Update the input images threshold if required
+    if (this->robustRange == true) {
+        // Create a copy of the reference image to extract the robust range
+        nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference);
+        temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper);
+        memcpy(temp_reference->data, this->inputReference->data, temp_reference->nvox * temp_reference->nbyper);
+        reg_tools_changeDatatype<T>(temp_reference);
+        // Extract the robust range of the reference image
+        T *refDataPtr = static_cast<T *>(temp_reference->data);
+        reg_heapSort(refDataPtr, temp_reference->nvox);
+        // Update the reference threshold values if no value has been setup by the user
+        if (this->referenceThresholdLow[0] == -std::numeric_limits<T>::max())
+            this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)];
+        if (this->referenceThresholdUp[0] == std::numeric_limits<T>::max())
+            this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)];
+        // Free the temporarly allocated image
+        nifti_image_free(temp_reference);
+
+        // Create a copy of the floating image to extract the robust range
+        nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating);
+        temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper);
+        memcpy(temp_floating->data, this->inputFloating->data, temp_floating->nvox * temp_floating->nbyper);
+        reg_tools_changeDatatype<T>(temp_floating);
+        // Extract the robust range of the floating image
+        T *floDataPtr = static_cast<T *>(temp_floating->data);
+        reg_heapSort(floDataPtr, temp_floating->nvox);
+        // Update the floating threshold values if no value has been setup by the user
+        if (this->floatingThresholdLow[0] == -std::numeric_limits<T>::max())
+            this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)];
+        if (this->floatingThresholdUp[0] == std::numeric_limits<T>::max())
+            this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)];
+        // Free the temporarly allocated image
+        nifti_image_free(temp_floating);
+    }
+
+    // FINEST LEVEL OF REGISTRATION
+    if (this->usePyramid) {
+        reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform);
+        reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform);
+        if (this->maskImage != nullptr)
+            reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber);
+        else {
+            for (unsigned int l = 0; l < this->levelToPerform; ++l) {
+                this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
+                this->maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int));
+            }
+        }
+    } else {
+        reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, 1, 1);
+        reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, 1, 1);
+        if (this->maskImage != nullptr)
+            reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber);
+        else {
+            this->activeVoxelNumber[0] = this->referencePyramid[0]->nx * this->referencePyramid[0]->ny * this->referencePyramid[0]->nz;
+            this->maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int));
+        }
+    }
+
+    unsigned int pyramidalLevelNumber = 1;
+    if (this->usePyramid) pyramidalLevelNumber = this->levelToPerform;
+
+    // SMOOTH THE INPUT IMAGES IF REQUIRED
+    for (unsigned int l = 0; l < this->levelToPerform; l++) {
+        if (this->referenceSmoothingSigma != 0.0) {
+            bool *active = new bool[this->referencePyramid[l]->nt];
+            float *sigma = new float[this->referencePyramid[l]->nt];
+            active[0] = true;
+            for (int i = 1; i < this->referencePyramid[l]->nt; ++i)
+                active[i] = false;
+            sigma[0] = this->referenceSmoothingSigma;
+            reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
+            delete[]active;
+            delete[]sigma;
+        }
+        if (this->floatingSmoothingSigma != 0.0) {
+            // Only the first image is smoothed
+            bool *active = new bool[this->floatingPyramid[l]->nt];
+            float *sigma = new float[this->floatingPyramid[l]->nt];
+            active[0] = true;
+            for (int i = 1; i < this->floatingPyramid[l]->nt; ++i)
+                active[i] = false;
+            sigma[0] = this->floatingSmoothingSigma;
+            reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
+            delete[]active;
+            delete[]sigma;
+        }
+    }
+
+    // THRESHOLD THE INPUT IMAGES IF REQUIRED
+    for (unsigned int l = 0; l < pyramidalLevelNumber; l++) {
+        reg_thresholdImage<T>(this->referencePyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
+        reg_thresholdImage<T>(this->floatingPyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
+    }
+
+    this->initialised = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::Initialise");
+    reg_print_fct_debug("reg_base<T>::Initialise");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::SetOptimiser()
-{
-   if(this->useConjGradient)
-      this->optimiser=new reg_conjugateGradient<T>();
-   else this->optimiser=new reg_optimiser<T>();
+void reg_base<T>::SetOptimiser() {
+    if (this->useConjGradient)
+        this->optimiser = new reg_conjugateGradient<T>();
+    else this->optimiser = new reg_optimiser<T>();
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetOptimiser");
+    reg_print_fct_debug("reg_base<T>::SetOptimiser");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_base<T>::ComputeSimilarityMeasure()
-{
-   double measure=0.;
-   if(this->measure_nmi!=NULL)
-      measure += this->measure_nmi->GetSimilarityMeasureValue();
+double reg_base<T>::ComputeSimilarityMeasure() {
+    double measure = 0.;
+    if (this->measure_nmi != nullptr)
+        measure += this->measure_nmi->GetSimilarityMeasureValue();
 
-   if(this->measure_ssd!=NULL)
-      measure += this->measure_ssd->GetSimilarityMeasureValue();
+    if (this->measure_ssd != nullptr)
+        measure += this->measure_ssd->GetSimilarityMeasureValue();
 
-   if(this->measure_kld!=NULL)
-      measure += this->measure_kld->GetSimilarityMeasureValue();
+    if (this->measure_kld != nullptr)
+        measure += this->measure_kld->GetSimilarityMeasureValue();
 
-   if(this->measure_lncc!=NULL)
-      measure += this->measure_lncc->GetSimilarityMeasureValue();
+    if (this->measure_lncc != nullptr)
+        measure += this->measure_lncc->GetSimilarityMeasureValue();
 
-   if(this->measure_dti!=NULL)
-      measure += this->measure_dti->GetSimilarityMeasureValue();
+    if (this->measure_dti != nullptr)
+        measure += this->measure_dti->GetSimilarityMeasureValue();
 
-   if(this->measure_mind!=NULL)
-      measure += this->measure_mind->GetSimilarityMeasureValue();
+    if (this->measure_mind != nullptr)
+        measure += this->measure_mind->GetSimilarityMeasureValue();
 
-   if(this->measure_mindssc!=NULL)
-      measure += this->measure_mindssc->GetSimilarityMeasureValue();
+    if (this->measure_mindssc != nullptr)
+        measure += this->measure_mindssc->GetSimilarityMeasureValue();
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::ComputeSimilarityMeasure");
+    reg_print_fct_debug("reg_base<T>::ComputeSimilarityMeasure");
 #endif
-   return double(this->similarityWeight) * measure;
+    return double(this->similarityWeight) * measure;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::GetVoxelBasedGradient()
-{
-   // The voxel based gradient image is filled with zeros
-   reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
-                                  this->voxelBasedMeasureGradient,
-                                  0.f);
-
-   // The intensity gradient is first computed
-   //   if(this->measure_nmi!=NULL || this->measure_ssd!=NULL ||
-   //         this->measure_kld!=NULL || this->measure_lncc!=NULL ||
-   //         this->measure_dti!=NULL)
-   //   {
-   //    if(this->measure_dti!=NULL){
-   //        reg_getImageGradient(this->currentFloating,
-   //                             this->warImgGradient,
-   //                             this->deformationFieldImage,
-   //                             this->currentMask,
-   //                             this->interpolation,
-   //                             this->warpedPaddingValue,
-   //                             this->measure_dti->GetActiveTimepoints(),
-   //		 					   this->forwardJacobianMatrix,
-   //							   this->warped);
-   //    }
-   //    else{
-   //    }
-   //   }
-
-   //   if(this->measure_dti!=NULL)
-   //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
-
-   for(int t=0; t<this->currentReference->nt; ++t){
-      reg_getImageGradient(this->currentFloating,
-                           this->warImgGradient,
-                           this->deformationFieldImage,
-                           this->currentMask,
-                           this->interpolation,
-                           this->warpedPaddingValue,
-                           t);
-
-      // The gradient of the various measures of similarity are computed
-      if(this->measure_nmi!=NULL)
-         this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_ssd!=NULL)
-         this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_kld!=NULL)
-         this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_lncc!=NULL)
-         this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_mind!=NULL)
-         this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_mindssc!=NULL)
-         this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
-   }
+void reg_base<T>::GetVoxelBasedGradient() {
+    // The voxel based gradient image is filled with zeros
+    reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
+                                   this->voxelBasedMeasureGradient,
+                                   0.f);
+
+    // The intensity gradient is first computed
+    //   if(this->measure_nmi!=nullptr || this->measure_ssd!=nullptr ||
+    //         this->measure_kld!=nullptr || this->measure_lncc!=nullptr ||
+    //         this->measure_dti!=nullptr)
+    //   {
+    //    if(this->measure_dti!=nullptr){
+    //        reg_getImageGradient(this->currentFloating,
+    //                             this->warImgGradient,
+    //                             this->deformationFieldImage,
+    //                             this->currentMask,
+    //                             this->interpolation,
+    //                             this->warpedPaddingValue,
+    //                             this->measure_dti->GetActiveTimepoints(),
+    //		 					   this->forwardJacobianMatrix,
+    //							   this->warped);
+    //    }
+    //    else{
+    //    }
+    //   }
+
+    //   if(this->measure_dti!=nullptr)
+    //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
+
+    for (int t = 0; t < this->currentReference->nt; ++t) {
+        reg_getImageGradient(this->currentFloating,
+                             this->warImgGradient,
+                             this->deformationFieldImage,
+                             this->currentMask,
+                             this->interpolation,
+                             this->warpedPaddingValue,
+                             t);
+
+        // The gradient of the various measures of similarity are computed
+        if (this->measure_nmi != nullptr)
+            this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_ssd != nullptr)
+            this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_kld != nullptr)
+            this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_lncc != nullptr)
+            this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_mind != nullptr)
+            this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_mindssc != nullptr)
+            this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
+    }
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::GetVoxelBasedGradient");
+    reg_print_fct_debug("reg_base<T>::GetVoxelBasedGradient");
 #endif
 }
 /* *************************************************************** */
@@ -1258,408 +1115,366 @@ void reg_base<T>::GetVoxelBasedGradient()
 //template<class T>
 //void reg_base<T>::ApproximateParzenWindow()
 //{
-//    if(this->measure_nmi==NULL)
+//    if(this->measure_nmi==nullptr)
 //        this->measure_nmi=new reg_nmi;
 //    this->measure_nmi=approxParzenWindow = true;
-//    return;
 //}
 ///* *************************************************************** */
 //template<class T>
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
-//    if(this->measure_nmi==NULL)
+//    if(this->measure_nmi==nullptr)
 //        this->measure_nmi=new reg_nmi;
 //    this->measure_nmi=approxParzenWindow = false;
-//    return;
 //}
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber)
-{
-   if(this->measure_nmi==NULL)
-      this->measure_nmi=new reg_nmi;
-   this->measure_nmi->SetTimepointWeight(timepoint,1.0);//weight initially set to default value of 1.0
-   // I am here adding 4 to the specified bin number to accomodate for
-   // the spline support
-   this->measure_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint);
+void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
+    if (this->measure_nmi == nullptr)
+        this->measure_nmi = new reg_nmi;
+    this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    // I am here adding 4 to the specified bin number to accomodate for
+    // the spline support
+    this->measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseNMISetReferenceBinNumber");
+    reg_print_fct_debug("reg_base<T>::UseNMISetReferenceBinNumber");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber)
-{
-   if(this->measure_nmi==NULL)
-      this->measure_nmi=new reg_nmi;
-   this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-   // I am here adding 4 to the specified bin number to accomodate for
-   // the spline support
-   this->measure_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint);
+void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
+    if (this->measure_nmi == nullptr)
+        this->measure_nmi = new reg_nmi;
+    this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    // I am here adding 4 to the specified bin number to accomodate for
+    // the spline support
+    this->measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseNMISetFloatingBinNumber");
+    reg_print_fct_debug("reg_base<T>::UseNMISetFloatingBinNumber");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseSSD(int timepoint, bool normalise)
-{
-   if(this->measure_ssd==NULL)
-      this->measure_ssd=new reg_ssd();
-   this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-   this->measure_ssd->SetNormaliseTimepoint(timepoint,normalise);
+void reg_base<T>::UseSSD(int timepoint, bool normalise) {
+    if (this->measure_ssd == nullptr)
+        this->measure_ssd = new reg_ssd();
+    this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    this->measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseSSD");
+    reg_print_fct_debug("reg_base<T>::UseSSD");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseMIND(int timepoint, int offset)
-{
-   if(this->measure_mind==NULL)
-      this->measure_mind=new reg_mind;
-   this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
-   this->measure_mind->SetDescriptorOffset(offset);
+void reg_base<T>::UseMIND(int timepoint, int offset) {
+    if (this->measure_mind == nullptr)
+        this->measure_mind = new reg_mind;
+    this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    this->measure_mind->SetDescriptorOffset(offset);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseMIND");
+    reg_print_fct_debug("reg_base<T>::UseMIND");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseMINDSSC(int timepoint, int offset)
-{
-   if(this->measure_mindssc==NULL)
-      this->measure_mindssc=new reg_mindssc;
-   this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
-   this->measure_mindssc->SetDescriptorOffset(offset);
+void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
+    if (this->measure_mindssc == nullptr)
+        this->measure_mindssc = new reg_mindssc;
+    this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    this->measure_mindssc->SetDescriptorOffset(offset);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseMINDSSC");
+    reg_print_fct_debug("reg_base<T>::UseMINDSSC");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseKLDivergence(int timepoint)
-{
-   if(this->measure_kld==NULL)
-      this->measure_kld=new reg_kld;
-   this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+void reg_base<T>::UseKLDivergence(int timepoint) {
+    if (this->measure_kld == nullptr)
+        this->measure_kld = new reg_kld;
+    this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseKLDivergence");
+    reg_print_fct_debug("reg_base<T>::UseKLDivergence");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseLNCC(int timepoint, float stddev)
-{
-   if(this->measure_lncc==NULL)
-      this->measure_lncc=new reg_lncc;
-   this->measure_lncc->SetKernelStandardDeviation(timepoint,
-         stddev);
-   this->measure_lncc->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+void reg_base<T>::UseLNCC(int timepoint, float stddev) {
+    if (this->measure_lncc == nullptr)
+        this->measure_lncc = new reg_lncc;
+    this->measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
+    this->measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseLNCC");
+    reg_print_fct_debug("reg_base<T>::UseLNCC");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLNCCKernelType(int type)
-{
-   if(this->measure_lncc==NULL)
-   {
-      reg_print_fct_error("reg_base<T>::SetLNCCKernelType");
-      reg_print_msg_error("The LNCC object has to be created first");
-      reg_exit();
-   }
-   this->measure_lncc->SetKernelType(type);
+void reg_base<T>::SetLNCCKernelType(int type) {
+    if (this->measure_lncc == nullptr) {
+        reg_print_fct_error("reg_base<T>::SetLNCCKernelType");
+        reg_print_msg_error("The LNCC object has to be created first");
+        reg_exit();
+    }
+    this->measure_lncc->SetKernelType(type);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::SetLNCCKernelType");
+    reg_print_fct_debug("reg_base<T>::SetLNCCKernelType");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseDTI(bool *timepoint)
-{
-   reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
-   reg_exit();
-
-   if(this->measure_dti==NULL)
-      this->measure_dti=new reg_dti;
-   for(int i=0; i<this->inputReference->nt; ++i)
-   {
-      if(timepoint[i]==true)
-        this->measure_dti->SetTimepointWeight(i, 1.0);//weight set to 1.0 to indicate timepoint is active
-   }
+void reg_base<T>::UseDTI(bool *timepoint) {
+    reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
+    reg_exit();
+
+    if (this->measure_dti == nullptr)
+        this->measure_dti = new reg_dti;
+    for (int i = 0; i < this->inputReference->nt; ++i) {
+        if (timepoint[i] == true)
+            this->measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::UseDTI");
+    reg_print_fct_debug("reg_base<T>::UseDTI");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetNMIWeight(int timepoint, double weight)
-{
-	if (this->measure_nmi == NULL)
-	{
-		reg_print_fct_error("reg_base<T>::SetNMIWeight");
-		reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set");
-		reg_exit();
-	}
-	this->measure_nmi->SetTimepointWeight(timepoint, weight);
+void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
+    if (this->measure_nmi == nullptr) {
+        reg_print_fct_error("reg_base<T>::SetNMIWeight");
+        reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set");
+        reg_exit();
+    }
+    this->measure_nmi->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLNCCWeight(int timepoint, double weight)
-{
-	if (this->measure_lncc == NULL)
-	{
-		reg_print_fct_error("reg_base<T>::SetLNCCWeight");
-		reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set");
-		reg_exit();
-	}
-	this->measure_lncc->SetTimepointWeight(timepoint, weight);
+void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
+    if (this->measure_lncc == nullptr) {
+        reg_print_fct_error("reg_base<T>::SetLNCCWeight");
+        reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set");
+        reg_exit();
+    }
+    this->measure_lncc->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetSSDWeight(int timepoint, double weight)
-{
-	if (this->measure_ssd == NULL)
-	{
-		reg_print_fct_error("reg_base<T>::SetSSDWeight");
-		reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set");
-		reg_exit();
-	}
-	this->measure_ssd->SetTimepointWeight(timepoint, weight);
+void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
+    if (this->measure_ssd == nullptr) {
+        reg_print_fct_error("reg_base<T>::SetSSDWeight");
+        reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set");
+        reg_exit();
+    }
+    this->measure_ssd->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetKLDWeight(int timepoint, double weight)
-{
-	if (this->measure_kld == NULL)
-	{
-		reg_print_fct_error("reg_base<T>::SetKLDWeight");
-		reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set");
-		reg_exit();
-	}
-	this->measure_kld->SetTimepointWeight(timepoint, weight);
+void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
+    if (this->measure_kld == nullptr) {
+        reg_print_fct_error("reg_base<T>::SetKLDWeight");
+        reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set");
+        reg_exit();
+    }
+    this->measure_kld->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLocalWeightSim(nifti_image *i)
-{
-	this->localWeightSimInput = i;
+void reg_base<T>::SetLocalWeightSim(nifti_image *i) {
+    this->localWeightSimInput = i;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::WarpFloatingImage(int inter)
-{
-   // Compute the deformation field
-   this->GetDeformationField();
-
-   if(this->measure_dti==NULL)
-   {
-      // Resample the floating image
-      reg_resampleImage(this->currentFloating,
-                        this->warped,
-                        this->deformationFieldImage,
-                        this->currentMask,
-                        inter,
-                        this->warpedPaddingValue);
-   }
-   else
-   {
-      reg_defField_getJacobianMatrix(this->deformationFieldImage,
-                                     this->forwardJacobianMatrix);
-      /*DTI needs fixing!
-     reg_resampleImage(this->currentFloating,
-                        this->warped,
-                        this->deformationFieldImage,
-                        this->currentMask,
-                        inter,
-                        this->warpedPaddingValue,
-                        this->measure_dti->GetActiveTimepoints(),
-                        this->forwardJacobianMatrix);*/
-   }
+void reg_base<T>::WarpFloatingImage(int inter) {
+    // Compute the deformation field
+    this->GetDeformationField();
+
+    if (this->measure_dti == nullptr) {
+        // Resample the floating image
+        reg_resampleImage(this->currentFloating,
+                          this->warped,
+                          this->deformationFieldImage,
+                          this->currentMask,
+                          inter,
+                          this->warpedPaddingValue);
+    } else {
+        reg_defField_getJacobianMatrix(this->deformationFieldImage,
+                                       this->forwardJacobianMatrix);
+        /*DTI needs fixing!
+       reg_resampleImage(this->currentFloating,
+                          this->warped,
+                          this->deformationFieldImage,
+                          this->currentMask,
+                          inter,
+                          this->warpedPaddingValue,
+                          this->measure_dti->GetActiveTimepoints(),
+                          this->forwardJacobianMatrix);*/
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::WarpFloatingImage");
+    reg_print_fct_debug("reg_base<T>::WarpFloatingImage");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::Run()
-{
+void reg_base<T>::Run() {
 #ifndef NDEBUG
-   char text[255];
-   sprintf(text, "%s::Run() called", this->executableName);
-   reg_print_msg_debug(text);
+    char text[255];
+    sprintf(text, "%s::Run() called", this->executableName);
+    reg_print_msg_debug(text);
 #endif
 
-   if(!this->initialised) this->Initialise();
+    if (!this->initialised) this->Initialise();
 #ifdef NDEBUG
-   if(this->verbose)
-   {
+    if (this->verbose) {
 #endif
-      reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(this->executableName, "***********************************************************");
 #ifdef NDEBUG
-   }
+    }
 #endif
 
-   // Update the maximal number of iteration to perform per level
-   this->maxiterationNumber = this->maxiterationNumber * pow(2, this->levelToPerform-1);
-
-   // Loop over the different resolution level to perform
-   for(this->currentLevel=0;
-         this->currentLevel<this->levelToPerform;
-         this->currentLevel++)
-   {
-
-      // Set the current input images
-      if(this->usePyramid)
-      {
-         this->currentReference = this->referencePyramid[this->currentLevel];
-         this->currentFloating = this->floatingPyramid[this->currentLevel];
-         this->currentMask = this->maskPyramid[this->currentLevel];
-      }
-      else
-      {
-         this->currentReference = this->referencePyramid[0];
-         this->currentFloating = this->floatingPyramid[0];
-         this->currentMask = this->maskPyramid[0];
-      }
-
-      // Allocate image that depends on the reference image
-      this->AllocateWarped();
-      this->AllocateDeformationField();
-      this->AllocateWarpedGradient();
-
-      // The grid is refined if necessary
-      T maxStepSize=this->InitialiseCurrentLevel();
-      T currentSize = maxStepSize;
-      T smallestSize = maxStepSize / (T)100.0;
-
-      this->DisplayCurrentLevelParameters();
-
-      // Allocate image that are required to compute the gradient
-      this->AllocateVoxelBasedMeasureGradient();
-      this->AllocateTransformationGradient();
-
-      // Initialise the measures of similarity
-      this->InitialiseSimilarity();
-
-      // initialise the optimiser
-      this->SetOptimiser();
-
-      // Loop over the number of perturbation to do
-      for(size_t perturbation=0;
-            perturbation<=this->perturbationNumber;
-            ++perturbation)
-      {
-         // Evalulate the objective function value
-         this->UpdateBestObjFunctionValue();
-         this->PrintInitialObjFunctionValue();
-
-         // Iterate until convergence or until the max number of iteration is reach
-         while(true)
-         {
-
-            if(currentSize==0)
-               break;
-
-            if(this->optimiser->GetCurrentIterationNumber()>=this->optimiser->GetMaxIterationNumber()){
-               reg_print_msg_warn("The current level reached the maximum number of iteration");
-               break;
-            }
+    // Update the maximal number of iteration to perform per level
+    this->maxIterationNumber = this->maxIterationNumber * pow(2, this->levelToPerform - 1);
+
+    // Loop over the different resolution level to perform
+    for (this->currentLevel = 0;
+         this->currentLevel < this->levelToPerform;
+         this->currentLevel++) {
+
+        // Set the current input images
+        if (this->usePyramid) {
+            this->currentReference = this->referencePyramid[this->currentLevel];
+            this->currentFloating = this->floatingPyramid[this->currentLevel];
+            this->currentMask = this->maskPyramid[this->currentLevel];
+        } else {
+            this->currentReference = this->referencePyramid[0];
+            this->currentFloating = this->floatingPyramid[0];
+            this->currentMask = this->maskPyramid[0];
+        }
+
+        // Allocate image that depends on the reference image
+        this->AllocateWarped();
+        this->AllocateDeformationField();
+        this->AllocateWarpedGradient();
+
+        // The grid is refined if necessary
+        T maxStepSize = this->InitialiseCurrentLevel();
+        T currentSize = maxStepSize;
+        T smallestSize = maxStepSize / (T)100.0;
+
+        this->DisplayCurrentLevelParameters();
 
-            // Compute the objective function gradient
-            this->GetObjectiveFunctionGradient();
+        // Allocate image that are required to compute the gradient
+        this->AllocateVoxelBasedMeasureGradient();
+        this->AllocateTransformationGradient();
 
-            // Normalise the gradient
-            this->NormaliseGradient();
+        // Initialise the measures of similarity
+        this->InitialiseSimilarity();
 
-            // Initialise the line search initial step size
-            currentSize=currentSize>maxStepSize?maxStepSize:currentSize;
+        // initialise the optimiser
+        this->SetOptimiser();
 
-            // A line search is performed
-            this->optimiser->Optimise(maxStepSize,smallestSize,currentSize);
+        // Loop over the number of perturbation to do
+        for (size_t perturbation = 0;
+             perturbation <= this->perturbationNumber;
+             ++perturbation) {
+            // Evalulate the objective function value
+            this->UpdateBestObjFunctionValue();
+            this->PrintInitialObjFunctionValue();
 
-            // Update the obecjtive function variables and print some information
-            this->PrintCurrentObjFunctionValue(currentSize);
+            // Iterate until convergence or until the max number of iteration is reach
+            while (true) {
 
-         } // while
-         if(perturbation<this->perturbationNumber)
-         {
+                if (currentSize == 0)
+                    break;
 
-            this->optimiser->Perturbation(smallestSize);
-            currentSize=maxStepSize;
+                if (this->optimiser->GetCurrentIterationNumber() >= this->optimiser->GetMaxIterationNumber()) {
+                    reg_print_msg_warn("The current level reached the maximum number of iteration");
+                    break;
+                }
+
+                // Compute the objective function gradient
+                this->GetObjectiveFunctionGradient();
+
+                // Normalise the gradient
+                this->NormaliseGradient();
+
+                // Initialise the line search initial step size
+                currentSize = currentSize > maxStepSize ? maxStepSize : currentSize;
+
+                // A line search is performed
+                this->optimiser->Optimise(maxStepSize, smallestSize, currentSize);
+
+                // Update the obecjtive function variables and print some information
+                this->PrintCurrentObjFunctionValue(currentSize);
+
+            } // while
+            if (perturbation < this->perturbationNumber) {
+
+                this->optimiser->Perturbation(smallestSize);
+                currentSize = maxStepSize;
 #ifdef NDEBUG
-            if(this->verbose)
-            {
+                if (this->verbose) {
 #endif
-               char text[255];
-               reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0");
-               sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]",
-                      smallestSize, smallestSize);
-               reg_print_info(this->executableName, text);
+                    char text[255];
+                    reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0");
+                    sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]",
+                            smallestSize, smallestSize);
+                    reg_print_info(this->executableName, text);
 
 #ifdef NDEBUG
-            }
+                }
 #endif
-         }
-      } // perturbation loop
-
-      // Final folding correction
-      this->CorrectTransformation();
-
-      // Some cleaning is performed
-      delete this->optimiser;
-      this->optimiser=NULL;
-      this->ClearWarped();
-      this->ClearDeformationField();
-      this->ClearWarpedGradient();
-      this->ClearVoxelBasedMeasureGradient();
-      this->ClearTransformationGradient();
-      if(this->usePyramid)
-      {
-         nifti_image_free(this->referencePyramid[this->currentLevel]);
-         this->referencePyramid[this->currentLevel]=NULL;
-         nifti_image_free(this->floatingPyramid[this->currentLevel]);
-         this->floatingPyramid[this->currentLevel]=NULL;
-         free(this->maskPyramid[this->currentLevel]);
-         this->maskPyramid[this->currentLevel]=NULL;
-      }
-      else if(this->currentLevel==this->levelToPerform-1)
-      {
-         nifti_image_free(this->referencePyramid[0]);
-         this->referencePyramid[0]=NULL;
-         nifti_image_free(this->floatingPyramid[0]);
-         this->floatingPyramid[0]=NULL;
-         free(this->maskPyramid[0]);
-         this->maskPyramid[0]=NULL;
-      }
-      this->ClearCurrentInputImage();
+            }
+        } // perturbation loop
+
+        // Final folding correction
+        this->CorrectTransformation();
+
+        // Some cleaning is performed
+        delete this->optimiser;
+        this->optimiser = nullptr;
+        this->ClearWarped();
+        this->ClearDeformationField();
+        this->ClearWarpedGradient();
+        this->ClearVoxelBasedMeasureGradient();
+        this->ClearTransformationGradient();
+        if (this->usePyramid) {
+            nifti_image_free(this->referencePyramid[this->currentLevel]);
+            this->referencePyramid[this->currentLevel] = nullptr;
+            nifti_image_free(this->floatingPyramid[this->currentLevel]);
+            this->floatingPyramid[this->currentLevel] = nullptr;
+            free(this->maskPyramid[this->currentLevel]);
+            this->maskPyramid[this->currentLevel] = nullptr;
+        } else if (this->currentLevel == this->levelToPerform - 1) {
+            nifti_image_free(this->referencePyramid[0]);
+            this->referencePyramid[0] = nullptr;
+            nifti_image_free(this->floatingPyramid[0]);
+            this->floatingPyramid[0] = nullptr;
+            free(this->maskPyramid[0]);
+            this->maskPyramid[0] = nullptr;
+        }
+        this->ClearCurrentInputImage();
 
 #ifdef NDEBUG
-      if(this->verbose)
-      {
+        if (this->verbose) {
 #endif
-         reg_print_info(this->executableName, "Current registration level done");
-         reg_print_info(this->executableName, "***********************************************************");
+            reg_print_info(this->executableName, "Current registration level done");
+            reg_print_info(this->executableName, "***********************************************************");
 #ifdef NDEBUG
-      }
+        }
 #endif
-      // Update the number of level for the next level
-      this->maxiterationNumber /= 2;
-   } // level this->levelToPerform
-   // Set this to the last value since it's used somewhere else
-   this->currentLevel--;
+        // Update the number of level for the next level
+        this->maxIterationNumber /= 2;
+    } // level this->levelToPerform
+    // Set this to the last value since it's used somewhere else
+    this->currentLevel--;
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_base<T>::Run");
+    reg_print_fct_debug("reg_base<T>::Run");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template class reg_base<float>;
-#endif // _REG_BASE_CPP
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 2c0ee7c5..0333d0d2 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_BASE_H
-#define _REG_BASE_H
+#pragma once
 
 #include "_reg_resampling.h"
 #include "_reg_globalTrans.h"
@@ -33,181 +32,127 @@
 
 /// @brief Base registration class
 template <class T>
-class reg_base : public InterfaceOptimiser
-{
+class reg_base: public InterfaceOptimiser {
 protected:
    // Platform !!!
 //   Platform *platform;
 //   int platformCode;
 //   unsigned gpuIdx;
 
-   // Optimiser related variables
-   reg_optimiser<T> *optimiser;
-   size_t maxiterationNumber;
-   size_t perturbationNumber;
-   bool optimiseX;
-   bool optimiseY;
-   bool optimiseZ;
-
-   // Optimiser related function
-   virtual void SetOptimiser();
-
-   // Measure related variables
-   reg_ssd *measure_ssd;
-   reg_kld *measure_kld;
-   reg_dti *measure_dti;
-   reg_lncc *measure_lncc;
-   reg_nmi *measure_nmi;
-   reg_mind *measure_mind;
-   reg_mindssc *measure_mindssc;
-   nifti_image *localWeightSimInput;
-   nifti_image *localWeightSimCurrent;
-
-   char *executableName;
-   int referenceTimePoint;
-   int floatingTimePoint;
-   nifti_image *inputReference; // pointer to external
-   nifti_image *inputFloating; // pointer to external
-   nifti_image *maskImage; // pointer to external
-   mat44 *affineTransformation; // pointer to external
-   int *referenceMask;
-   T referenceSmoothingSigma;
-   T floatingSmoothingSigma;
-   float *referenceThresholdUp;
-   float *referenceThresholdLow;
-   float *floatingThresholdUp;
-   float *floatingThresholdLow;
-   bool robustRange;
-   T warpedPaddingValue;
-   unsigned int levelNumber;
-   unsigned int levelToPerform;
-   T gradientSmoothingSigma;
-   T similarityWeight;
-   bool additive_mc_nmi;
-   bool useConjGradient;
-   bool useApproxGradient;
-   bool verbose;
-   bool usePyramid;
-   int interpolation;
-
-   bool initialised;
-   nifti_image **referencePyramid;
-   nifti_image **floatingPyramid;
-   int **maskPyramid;
-   int *activeVoxelNumber;
-   nifti_image *currentReference;
-   nifti_image *currentFloating;
-   int *currentMask;
-   nifti_image *warped;
-   nifti_image *deformationFieldImage;
-   nifti_image *warImgGradient;
-   nifti_image *voxelBasedMeasureGradient;
-   unsigned int currentLevel;
-
-   mat33 *forwardJacobianMatrix;
-
-   double bestWMeasure;
-   double currentWMeasure;
-
-   double currentWLand;
-   double bestWLand;
-
-   float landmarkRegWeight;
-   size_t landmarkRegNumber;
-   float *landmarkReference;
-   float *landmarkFloating;
-
-   virtual void AllocateWarped();
-   virtual void ClearWarped();
-   virtual void AllocateDeformationField();
-   virtual void ClearDeformationField();
-   virtual void AllocateWarpedGradient();
-   virtual void ClearWarpedGradient();
-   virtual void AllocateVoxelBasedMeasureGradient();
-   virtual void ClearVoxelBasedMeasureGradient();
-   virtual T InitialiseCurrentLevel()
-   {
-      return 0.;
-   }
-   virtual void ClearCurrentInputImage();
-
-   virtual void WarpFloatingImage(int);
-   virtual double ComputeSimilarityMeasure();
-   virtual void GetVoxelBasedGradient();
-   virtual void SmoothGradient()
-   {
-      return;
-   }
-   virtual void InitialiseSimilarity();
-
-   // Virtual empty functions that have to be filled
-   virtual void GetDeformationField()
-   {
-      return;  // Need to be filled
-   }
-   virtual void SetGradientImageToZero()
-   {
-      return;  // Need to be filled
-   }
-   virtual void GetApproximatedGradient()
-   {
-      return;  // Need to be filled
-   }
-   virtual double GetObjectiveFunctionValue()
-   {
-      return std::numeric_limits<double>::quiet_NaN();  // Need to be filled
-   }
-   virtual void UpdateParameters(float)
-   {
-      return;  // Need to be filled
-   }
-   virtual T NormaliseGradient()
-   {
-      return std::numeric_limits<float>::quiet_NaN();  // Need to be filled
-   }
-   virtual void GetSimilarityMeasureGradient()
-   {
-      return;  // Need to be filled
-   }
-   virtual void GetObjectiveFunctionGradient()
-   {
-      return;  // Need to be filled
-   }
-   virtual void DisplayCurrentLevelParameters()
-   {
-      return;  // Need to be filled
-   }
-   virtual void UpdateBestObjFunctionValue()
-   {
-      return;  // Need to be filled
-   }
-   virtual void PrintCurrentObjFunctionValue(T)
-   {
-      return;  // Need to be filled
-   }
-   virtual void PrintInitialObjFunctionValue()
-   {
-      return;  // Need to be filled
-   }
-   virtual void AllocateTransformationGradient()
-   {
-      return;  // Need to be filled
-   }
-   virtual void ClearTransformationGradient()
-   {
-      return;  // Need to be filled
-   }
-   virtual void CorrectTransformation()
-   {
-      return;  // Need to be filled
-   }
-
-   void (*funcProgressCallback)(float pcntProgress, void *params);
-   void *paramsProgressCallback;
+    // Optimiser related variables
+    reg_optimiser<T> *optimiser;
+    size_t maxIterationNumber;
+    size_t perturbationNumber;
+    bool optimiseX;
+    bool optimiseY;
+    bool optimiseZ;
+
+    // Optimiser related function
+    virtual void SetOptimiser();
+
+    // Measure related variables
+    reg_ssd *measure_ssd;
+    reg_kld *measure_kld;
+    reg_dti *measure_dti;
+    reg_lncc *measure_lncc;
+    reg_nmi *measure_nmi;
+    reg_mind *measure_mind;
+    reg_mindssc *measure_mindssc;
+    nifti_image *localWeightSimInput;
+    nifti_image *localWeightSimCurrent;
+
+    char *executableName;
+    int referenceTimePoint;
+    int floatingTimePoint;
+    nifti_image *inputReference; // pointer to external
+    nifti_image *inputFloating; // pointer to external
+    nifti_image *maskImage; // pointer to external
+    mat44 *affineTransformation; // pointer to external
+    int *referenceMask;
+    T referenceSmoothingSigma;
+    T floatingSmoothingSigma;
+    float *referenceThresholdUp;
+    float *referenceThresholdLow;
+    float *floatingThresholdUp;
+    float *floatingThresholdLow;
+    bool robustRange;
+    T warpedPaddingValue;
+    unsigned int levelNumber;
+    unsigned int levelToPerform;
+    T gradientSmoothingSigma;
+    T similarityWeight;
+    bool additive_mc_nmi;
+    bool useConjGradient;
+    bool useApproxGradient;
+    bool verbose;
+    bool usePyramid;
+    int interpolation;
+
+    bool initialised;
+    nifti_image **referencePyramid;
+    nifti_image **floatingPyramid;
+    int **maskPyramid;
+    int *activeVoxelNumber;
+    nifti_image *currentReference;
+    nifti_image *currentFloating;
+    int *currentMask;
+    nifti_image *warped;
+    nifti_image *deformationFieldImage;
+    nifti_image *warImgGradient;
+    nifti_image *voxelBasedMeasureGradient;
+    unsigned int currentLevel;
+
+    mat33 *forwardJacobianMatrix;
+
+    double bestWMeasure;
+    double currentWMeasure;
+
+    double currentWLand;
+    double bestWLand;
+
+    float landmarkRegWeight;
+    size_t landmarkRegNumber;
+    float *landmarkReference;
+    float *landmarkFloating;
+
+    virtual void AllocateWarped();
+    virtual void ClearWarped();
+    virtual void AllocateDeformationField();
+    virtual void ClearDeformationField();
+    virtual void AllocateWarpedGradient();
+    virtual void ClearWarpedGradient();
+    virtual void AllocateVoxelBasedMeasureGradient();
+    virtual void ClearVoxelBasedMeasureGradient();
+    virtual T InitialiseCurrentLevel() { return 0; }
+    virtual void ClearCurrentInputImage();
+
+    virtual void WarpFloatingImage(int);
+    virtual double ComputeSimilarityMeasure();
+    virtual void GetVoxelBasedGradient();
+    virtual void SmoothGradient() {}
+    virtual void InitialiseSimilarity();
+
+    // Virtual empty functions that have to be filled
+    virtual void GetDeformationField() {}
+    virtual void SetGradientImageToZero() {}
+    virtual void GetApproximatedGradient() {}
+    virtual double GetObjectiveFunctionValue() { return std::numeric_limits<float>::quiet_NaN(); }
+    virtual void UpdateParameters(float) {}
+    virtual T NormaliseGradient() { return std::numeric_limits<float>::quiet_NaN(); }
+    virtual void GetSimilarityMeasureGradient() {}
+    virtual void GetObjectiveFunctionGradient() {}
+    virtual void DisplayCurrentLevelParameters() {}
+    virtual void UpdateBestObjFunctionValue() {}
+    virtual void PrintCurrentObjFunctionValue(T) {}
+    virtual void PrintInitialObjFunctionValue() {}
+    virtual void AllocateTransformationGradient() {}
+    virtual void ClearTransformationGradient() {}
+    virtual void CorrectTransformation() {}
+
+    void (*funcProgressCallback)(float pcntProgress, void *params);
+    void* paramsProgressCallback;
 
 public:
-   reg_base(int refTimePoint,int floTimePoint);
-   virtual ~reg_base();
 
    //PLATFORM
 //   void setPlaform(Platform* inputPlatform);
@@ -215,101 +160,75 @@ class reg_base : public InterfaceOptimiser
 //   void setPlatformCode(int inputPlatformCode);
 //   void setGpuIdx(unsigned inputGPUIdx);
 
-   // Optimisation related functions
-   void SetMaximalIterationNumber(unsigned int);
-   void NoOptimisationAlongX()
-   {
-      this->optimiseX=false;
-   }
-   void NoOptimisationAlongY()
-   {
-      this->optimiseY=false;
-   }
-   void NoOptimisationAlongZ()
-   {
-      this->optimiseZ=false;
-   }
-   void SetPerturbationNumber(size_t v)
-   {
-      this->perturbationNumber=v;
-   }
-   void UseConjugateGradient();
-   void DoNotUseConjugateGradient();
-   void UseApproximatedGradient();
-   void DoNotUseApproximatedGradient();
-   // Measure of similarity related functions
-//    void ApproximateParzenWindow();
-//    void DoNotApproximateParzenWindow();
-   virtual void UseNMISetReferenceBinNumber(int,int);
-   virtual void UseNMISetFloatingBinNumber(int,int);
-   virtual void UseSSD(int timepoint, bool normalize);
-   virtual void UseMIND(int timepoint, int offset);
-   virtual void UseMINDSSC(int timepoint, int offset);
-   virtual void UseKLDivergence(int timepoint);
-   virtual void UseDTI(bool *timepoint);
-   virtual void UseLNCC(int timepoint, float stdDevKernel);
-   virtual void SetLNCCKernelType(int type);
-  void SetLocalWeightSim(nifti_image *);
-
-   void SetNMIWeight(int, double);
-   void SetSSDWeight(int, double);
-   void SetKLDWeight(int, double);
-   void SetLNCCWeight(int, double);
-
-   void SetReferenceImage(nifti_image *);
-   void SetFloatingImage(nifti_image *);
-   void SetReferenceMask(nifti_image *);
-   void SetAffineTransformation(mat44 *);
-   void SetReferenceSmoothingSigma(T);
-   void SetFloatingSmoothingSigma(T);
-   void SetGradientSmoothingSigma(T);
-   void SetReferenceThresholdUp(unsigned int,T);
-   void SetReferenceThresholdLow(unsigned int,T);
-   void SetFloatingThresholdUp(unsigned int, T);
-   void SetFloatingThresholdLow(unsigned int,T);
-   void UseRobustRange();
-   void DoNotUseRobustRange();
-   void SetWarpedPaddingValue(T);
-   void SetLevelNumber(unsigned int);
-   void SetLevelToPerform(unsigned int);
-   void PrintOutInformation();
-   void DoNotPrintOutInformation();
-   void DoNotUsePyramidalApproach();
-   void UseNeareatNeighborInterpolation();
-   void UseLinearInterpolation();
-   void UseCubicSplineInterpolation();
-   void SetLandmarkRegularisationParam(size_t, float *, float*, float);
-
-   virtual void CheckParameters();
-   void Run();
-   virtual void Initialise();
-   nifti_image **GetWarpedImage()
-   {
-      return NULL;  // Need to be filled
-   }
-   virtual char * GetExecutableName()
-   {
-      return this->executableName;
-   }
-   virtual bool GetSymmetricStatus()
-   {
-      return false;
-   }
-
-   // Function required for the NiftyReg pluggin in NiftyView
-   void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress,
-                                    void *params),
-                                    void *paramsProgCallback)
-   {
-      funcProgressCallback = funcProgCallback;
-      paramsProgressCallback = paramsProgCallback;
-   }
-
-   // Function used for testing
-   virtual void reg_test_setOptimiser(reg_optimiser<T> *opt)
-   {
-      this->optimiser=opt;
-   }
+    reg_base(int refTimePoint, int floTimePoint);
+    virtual ~reg_base();
+    // Optimisation related functions
+    void SetMaximalIterationNumber(unsigned int);
+    void NoOptimisationAlongX() { optimiseX = false; }
+    void NoOptimisationAlongY() { optimiseY = false; }
+    void NoOptimisationAlongZ() { optimiseZ = false; }
+    void SetPerturbationNumber(size_t v) { perturbationNumber = v; }
+    void UseConjugateGradient();
+    void DoNotUseConjugateGradient();
+    void UseApproximatedGradient();
+    void DoNotUseApproximatedGradient();
+    // Measure of similarity related functions
+ //    void ApproximateParzenWindow();
+ //    void DoNotApproximateParzenWindow();
+    virtual void UseNMISetReferenceBinNumber(int, int);
+    virtual void UseNMISetFloatingBinNumber(int, int);
+    virtual void UseSSD(int timepoint, bool normalize);
+    virtual void UseMIND(int timepoint, int offset);
+    virtual void UseMINDSSC(int timepoint, int offset);
+    virtual void UseKLDivergence(int timepoint);
+    virtual void UseDTI(bool *timepoint);
+    virtual void UseLNCC(int timepoint, float stdDevKernel);
+    virtual void SetLNCCKernelType(int type);
+    void SetLocalWeightSim(nifti_image*);
+
+    void SetNMIWeight(int, double);
+    void SetSSDWeight(int, double);
+    void SetKLDWeight(int, double);
+    void SetLNCCWeight(int, double);
+
+    void SetReferenceImage(nifti_image*);
+    void SetFloatingImage(nifti_image*);
+    void SetReferenceMask(nifti_image*);
+    void SetAffineTransformation(mat44*);
+    void SetReferenceSmoothingSigma(T);
+    void SetFloatingSmoothingSigma(T);
+    void SetGradientSmoothingSigma(T);
+    void SetReferenceThresholdUp(unsigned int, T);
+    void SetReferenceThresholdLow(unsigned int, T);
+    void SetFloatingThresholdUp(unsigned int, T);
+    void SetFloatingThresholdLow(unsigned int, T);
+    void UseRobustRange();
+    void DoNotUseRobustRange();
+    void SetWarpedPaddingValue(T);
+    void SetLevelNumber(unsigned int);
+    void SetLevelToPerform(unsigned int);
+    void PrintOutInformation();
+    void DoNotPrintOutInformation();
+    void DoNotUsePyramidalApproach();
+    void UseNearestNeighborInterpolation();
+    void UseLinearInterpolation();
+    void UseCubicSplineInterpolation();
+    void SetLandmarkRegularisationParam(size_t, float*, float*, float);
+
+    virtual void CheckParameters();
+    void Run();
+    virtual void Initialise();
+    nifti_image** GetWarpedImage() { return nullptr; } // Need to be filled
+    virtual char* GetExecutableName() { return this->executableName; }
+    virtual bool GetSymmetricStatus() { return false; }
+
+    // Function required for the NiftyReg plugin in NiftyView
+    void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params),
+                                     void *paramsProgCallback) {
+        funcProgressCallback = funcProgCallback;
+        paramsProgressCallback = paramsProgCallback;
+    }
+
+    // Function used for testing
+    virtual void reg_test_setOptimiser(reg_optimiser<T> *opt) { optimiser = opt; }
 };
-
-#endif // _REG_BASE_H
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 6e1cb9f9..21b2fd6d 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -10,1152 +10,1048 @@
  *
  */
 
-#ifndef _REG_F3D_CPP
-#define _REG_F3D_CPP
-
 #include "_reg_f3d.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
+ /* *************************************************************** */
+ /* *************************************************************** */
 template <class T>
-reg_f3d<T>::reg_f3d(int refTimePoint,int floTimePoint)
-   : reg_base<T>::reg_base(refTimePoint,floTimePoint)
-{
+reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
+    : reg_base<T>::reg_base(refTimePoint, floTimePoint) {
 
-   this->executableName=(char *)"NiftyReg F3D";
-   this->inputControlPointGrid=NULL; // pointer to external
-   this->controlPointGrid=NULL;
-   this->bendingEnergyWeight=0.001;
-   this->linearEnergyWeight=0.00;
-   this->jacobianLogWeight=0.;
-   this->jacobianLogApproximation=true;
-   this->spacing[0]=-5;
-   this->spacing[1]=std::numeric_limits<T>::quiet_NaN();
-   this->spacing[2]=std::numeric_limits<T>::quiet_NaN();
-   this->useConjGradient=true;
-   this->useApproxGradient=false;
+    this->executableName = (char *)"NiftyReg F3D";
+    this->inputControlPointGrid = nullptr; // pointer to external
+    this->controlPointGrid = nullptr;
+    this->bendingEnergyWeight = 0.001;
+    this->linearEnergyWeight = 0.00;
+    this->jacobianLogWeight = 0.;
+    this->jacobianLogApproximation = true;
+    this->spacing[0] = -5;
+    this->spacing[1] = std::numeric_limits<T>::quiet_NaN();
+    this->spacing[2] = std::numeric_limits<T>::quiet_NaN();
+    this->useConjGradient = true;
+    this->useApproxGradient = false;
 
-   //    this->approxParzenWindow=true;
+    //    this->approxParzenWindow=true;
 
-   this->transformationGradient=NULL;
+    this->transformationGradient = nullptr;
 
-   this->gridRefinement=true;
+    this->gridRefinement = true;
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::reg_f3d");
+    reg_print_fct_debug("reg_f3d<T>::reg_f3d");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_f3d<T>::~reg_f3d()
-{
-   this->ClearTransformationGradient();
-   if(this->controlPointGrid!=NULL)
-   {
-      nifti_image_free(this->controlPointGrid);
-      this->controlPointGrid=NULL;
-   }
+reg_f3d<T>::~reg_f3d() {
+    this->ClearTransformationGradient();
+    if (this->controlPointGrid != nullptr) {
+        nifti_image_free(this->controlPointGrid);
+        this->controlPointGrid = nullptr;
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::~reg_f3d");
+    reg_print_fct_debug("reg_f3d<T>::~reg_f3d");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp)
-{
-   this->inputControlPointGrid = cp;
+void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp) {
+    this->inputControlPointGrid = cp;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetControlPointGridImage");
+    reg_print_fct_debug("reg_f3d<T>::SetControlPointGridImage");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetBendingEnergyWeight(T be)
-{
-   this->bendingEnergyWeight = be;
+void reg_f3d<T>::SetBendingEnergyWeight(T be) {
+    this->bendingEnergyWeight = be;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetBendingEnergyWeight");
+    reg_print_fct_debug("reg_f3d<T>::SetBendingEnergyWeight");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetLinearEnergyWeight(T le)
-{
-   this->linearEnergyWeight=le;
+void reg_f3d<T>::SetLinearEnergyWeight(T le) {
+    this->linearEnergyWeight = le;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetLinearEnergyWeight");
+    reg_print_fct_debug("reg_f3d<T>::SetLinearEnergyWeight");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetJacobianLogWeight(T j)
-{
-   this->jacobianLogWeight = j;
+void reg_f3d<T>::SetJacobianLogWeight(T j) {
+    this->jacobianLogWeight = j;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetJacobianLogWeight");
+    reg_print_fct_debug("reg_f3d<T>::SetJacobianLogWeight");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::ApproximateJacobianLog()
-{
-   this->jacobianLogApproximation = true;
+void reg_f3d<T>::ApproximateJacobianLog() {
+    this->jacobianLogApproximation = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ApproximateJacobianLog");
+    reg_print_fct_debug("reg_f3d<T>::ApproximateJacobianLog");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::DoNotApproximateJacobianLog()
-{
-   this->jacobianLogApproximation = false;
+void reg_f3d<T>::DoNotApproximateJacobianLog() {
+    this->jacobianLogApproximation = false;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::DoNotApproximateJacobianLog");
+    reg_print_fct_debug("reg_f3d<T>::DoNotApproximateJacobianLog");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetSpacing(unsigned int i, T s)
-{
-   this->spacing[i] = s;
+void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
+    this->spacing[i] = s;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetSpacing");
+    reg_print_fct_debug("reg_f3d<T>::SetSpacing");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-T reg_f3d<T>::InitialiseCurrentLevel()
-{
-   // Set the initial step size for the gradient ascent
-   T maxStepSize = this->currentReference->dx>this->currentReference->dy?this->currentReference->dx:this->currentReference->dy;
-   if(this->currentReference->ndim>2)
-      maxStepSize = (this->currentReference->dz>maxStepSize)?this->currentReference->dz:maxStepSize;
+T reg_f3d<T>::InitialiseCurrentLevel() {
+    // Set the initial step size for the gradient ascent
+    T maxStepSize = this->currentReference->dx > this->currentReference->dy ? this->currentReference->dx : this->currentReference->dy;
+    if (this->currentReference->ndim > 2)
+        maxStepSize = (this->currentReference->dz > maxStepSize) ? this->currentReference->dz : maxStepSize;
 
-   // Refine the control point grid if required
-   if(this->gridRefinement==true)
-   {
-      if(this->currentLevel==0){
-         this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber-1));
-         this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber-1));
-      }
-      else
-      {
-         reg_spline_refineControlPointGrid(this->controlPointGrid,this->currentReference);
-         this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
-         this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
-      }
-   }
+    // Refine the control point grid if required
+    if (this->gridRefinement == true) {
+        if (this->currentLevel == 0) {
+            this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber - 1));
+            this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber - 1));
+        } else {
+            reg_spline_refineControlPointGrid(this->controlPointGrid, this->currentReference);
+            this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
+            this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
+        }
+    }
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::InitialiseCurrentLevel");
+    reg_print_fct_debug("reg_f3d<T>::InitialiseCurrentLevel");
 #endif
-   return maxStepSize;
+    return maxStepSize;
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::AllocateTransformationGradient()
-{
-   if(this->controlPointGrid==NULL)
-   {
-      reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
-      reg_print_msg_error("The control point image is not defined");
-      reg_exit();
-   }
-   reg_f3d<T>::ClearTransformationGradient();
-   this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid);
-   this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox,
-                                                       this->transformationGradient->nbyper);
+void reg_f3d<T>::AllocateTransformationGradient() {
+    if (this->controlPointGrid == nullptr) {
+        reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
+        reg_print_msg_error("The control point image is not defined");
+        reg_exit();
+    }
+    reg_f3d<T>::ClearTransformationGradient();
+    this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid);
+    this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox,
+                                                        this->transformationGradient->nbyper);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::AllocateTransformationGradient");
+    reg_print_fct_debug("reg_f3d<T>::AllocateTransformationGradient");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::ClearTransformationGradient()
-{
-   if(this->transformationGradient!=NULL)
-   {
-      nifti_image_free(this->transformationGradient);
-      this->transformationGradient=NULL;
-   }
+void reg_f3d<T>::ClearTransformationGradient() {
+    if (this->transformationGradient != nullptr) {
+        nifti_image_free(this->transformationGradient);
+        this->transformationGradient = nullptr;
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ClearTransformationGradient");
+    reg_print_fct_debug("reg_f3d<T>::ClearTransformationGradient");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::CheckParameters()
-{
-   reg_base<T>::CheckParameters();
-   // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
-   if(strcmp(this->executableName,"NiftyReg F3D")==0 ||
-         strcmp(this->executableName,"NiftyReg F3D GPU")==0)
-   {
-      T penaltySum=this->bendingEnergyWeight +
+void reg_f3d<T>::CheckParameters() {
+    reg_base<T>::CheckParameters();
+    // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
+    if (strcmp(this->executableName, "NiftyReg F3D") == 0 ||
+        strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
+        T penaltySum = this->bendingEnergyWeight +
             this->linearEnergyWeight +
             this->jacobianLogWeight +
             this->landmarkRegWeight;
-      if(penaltySum>=1.0)
-      {
-         this->similarityWeight=0;
-         this->similarityWeight /= penaltySum;
-         this->bendingEnergyWeight /= penaltySum;
-         this->linearEnergyWeight /= penaltySum;
-         this->jacobianLogWeight /= penaltySum;
-         this->landmarkRegWeight /= penaltySum;
-      }
-      else this->similarityWeight=1.0 - penaltySum;
-   }
+        if (penaltySum >= 1.0) {
+            this->similarityWeight = 0;
+            this->similarityWeight /= penaltySum;
+            this->bendingEnergyWeight /= penaltySum;
+            this->linearEnergyWeight /= penaltySum;
+            this->jacobianLogWeight /= penaltySum;
+            this->landmarkRegWeight /= penaltySum;
+        } else this->similarityWeight = 1.0 - penaltySum;
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::CheckParameters");
+    reg_print_fct_debug("reg_f3d<T>::CheckParameters");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::Initialise()
-{
-   if(this->initialised) return;
+void reg_f3d<T>::Initialise() {
+    if (this->initialised) return;
 
-   reg_base<T>::Initialise();
+    reg_base<T>::Initialise();
 
-   // DETERMINE THE GRID SPACING AND CREATE THE GRID
-   if(this->inputControlPointGrid==NULL)
-   {
-      // Set the spacing along y and z if undefined. Their values are set to match
-      // the spacing along the x axis
-      if(this->spacing[1]!=this->spacing[1]) this->spacing[1]=this->spacing[0];
-      if(this->spacing[2]!=this->spacing[2]) this->spacing[2]=this->spacing[0];
+    // DETERMINE THE GRID SPACING AND CREATE THE GRID
+    if (this->inputControlPointGrid == nullptr) {
+        // Set the spacing along y and z if undefined. Their values are set to match
+        // the spacing along the x axis
+        if (this->spacing[1] != this->spacing[1]) this->spacing[1] = this->spacing[0];
+        if (this->spacing[2] != this->spacing[2]) this->spacing[2] = this->spacing[0];
 
-      /* Convert the spacing from voxel to mm if necessary */
-      float spacingInMillimeter[3]= {this->spacing[0],this->spacing[1],this->spacing[2]};
-      if(spacingInMillimeter[0]<0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx;
-      if(spacingInMillimeter[1]<0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy;
-      if(spacingInMillimeter[2]<0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz;
+        /* Convert the spacing from voxel to mm if necessary */
+        float spacingInMillimeter[3] = {this->spacing[0], this->spacing[1], this->spacing[2]};
+        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx;
+        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy;
+        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz;
 
-      // Define the spacing for the first level
-      float gridSpacing[3];
-      gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber-1));
-      gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber-1));
-      gridSpacing[2] = 1.0f;
-      if(this->referencePyramid[0]->nz>1)
-         gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber-1));
+        // Define the spacing for the first level
+        float gridSpacing[3];
+        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1));
+        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1));
+        gridSpacing[2] = 1.0f;
+        if (this->referencePyramid[0]->nz > 1)
+            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1));
 
-      // Create and allocate the control point image
-      reg_createControlPointGrid<T>(&this->controlPointGrid, this->referencePyramid[0], gridSpacing);
+        // Create and allocate the control point image
+        reg_createControlPointGrid<T>(&this->controlPointGrid, this->referencePyramid[0], gridSpacing);
 
-      // The control point position image is initialised with the affine transformation
-      if(this->affineTransformation==NULL)
-      {
-         memset(this->controlPointGrid->data,0, this->controlPointGrid->nvox*this->controlPointGrid->nbyper);
-         reg_tools_multiplyValueToImage(this->controlPointGrid,this->controlPointGrid,0.f);
-         reg_getDeformationFromDisplacement(this->controlPointGrid);
-      }
-      else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid);
-   }
-   else
-   {
-      // The control point grid image is initialised with the provided grid
-      this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
-      this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-      memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data,
-              this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-      // The final grid spacing is computed
-      this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber-1));
-      this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber-1));
-      if(this->controlPointGrid->nz>1)
-         this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber-1));
-   }
+        // The control point position image is initialised with the affine transformation
+        if (this->affineTransformation == nullptr) {
+            memset(this->controlPointGrid->data, 0, this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+            reg_tools_multiplyValueToImage(this->controlPointGrid, this->controlPointGrid, 0.f);
+            reg_getDeformationFromDisplacement(this->controlPointGrid);
+        } else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid);
+    } else {
+        // The control point grid image is initialised with the provided grid
+        this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
+        this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data,
+               this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        // The final grid spacing is computed
+        this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1));
+        this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1));
+        if (this->controlPointGrid->nz > 1)
+            this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1));
+    }
 #ifdef NDEBUG
-   if(this->verbose)
-   {
+    if (this->verbose) {
 #endif
-     std::string text;
-      // Print out some global information about the registration
-      reg_print_info(this->executableName, "***********************************************************");
-      reg_print_info(this->executableName, "INPUT PARAMETERS");
-      reg_print_info(this->executableName, "***********************************************************");
-      reg_print_info(this->executableName, "Reference image:");
-      text = stringFormat("\t* name: %s", this->inputReference->fname);
-      reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                          this->inputReference->nx, this->inputReference->ny,
-                          this->inputReference->nz, this->inputReference->nt);
-      reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* image spacing: %g x %g x %g mm",
-                          this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
-      reg_print_info(this->executableName, text.c_str());
-      for(int i=0; i<this->inputReference->nt; i++)
-      {
-         text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                 i, this->inputReference->nt-1, this->referenceThresholdLow[i],this->referenceThresholdUp[i]);
-         reg_print_info(this->executableName, text.c_str());
-         if(this->measure_nmi!=NULL){
-            if(this->measure_nmi->GetTimepointsWeights()[i]>0.0){
-               text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
-                       i, this->inputFloating->nt-1, this->measure_nmi->GetReferenceBinNumber()[i]-4);
-               reg_print_info(this->executableName, text.c_str());
+        std::string text;
+        // Print out some global information about the registration
+        reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(this->executableName, "INPUT PARAMETERS");
+        reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(this->executableName, "Reference image:");
+        text = stringFormat("\t* name: %s", this->inputReference->fname);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* image dimension: %i x %i x %i x %i",
+                            this->inputReference->nx, this->inputReference->ny,
+                            this->inputReference->nz, this->inputReference->nt);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* image spacing: %g x %g x %g mm",
+                            this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
+        reg_print_info(this->executableName, text.c_str());
+        for (int i = 0; i < this->inputReference->nt; i++) {
+            text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
+                                i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
+            reg_print_info(this->executableName, text.c_str());
+            if (this->measure_nmi != nullptr) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                    text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
+                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
+                    reg_print_info(this->executableName, text.c_str());
+                }
             }
-         }
-      }
-      text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
-      reg_print_info(this->executableName, "Floating image:");
-      reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* name: %s", this->inputFloating->fname);
-      reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                          this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt);
-      reg_print_info(this->executableName, text.c_str());
-      text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
-                          this->inputFloating->dy, this->inputFloating->dz);
-      reg_print_info(this->executableName, text.c_str());
-      for(int i=0; i<this->inputFloating->nt; i++)
-      {
-         text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                 i, this->inputFloating->nt-1, this->floatingThresholdLow[i],this->floatingThresholdUp[i]);
-         reg_print_info(this->executableName, text.c_str());
-         if(this->measure_nmi!=NULL){
-          if (this->measure_nmi->GetTimepointsWeights()[i]>0.0){
-               text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
-                       i, this->inputFloating->nt-1, this->measure_nmi->GetFloatingBinNumber()[i]-4);
-               reg_print_info(this->executableName, text.c_str());
+        }
+        text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        reg_print_info(this->executableName, "Floating image:");
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* name: %s", this->inputFloating->fname);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* image dimension: %i x %i x %i x %i",
+                            this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
+                            this->inputFloating->dy, this->inputFloating->dz);
+        reg_print_info(this->executableName, text.c_str());
+        for (int i = 0; i < this->inputFloating->nt; i++) {
+            text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
+                                i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
+            reg_print_info(this->executableName, text.c_str());
+            if (this->measure_nmi != nullptr) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                    text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
+                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
+                    reg_print_info(this->executableName, text.c_str());
+                }
             }
-         }
-      }
-      text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
-      text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
-      text = stringFormat("Level number: %i", this->levelNumber);
-      reg_print_info(this->executableName, text.c_str());
-      if(this->levelNumber!=this->levelToPerform){
-         text = stringFormat("\t* Level to perform: %i", this->levelToPerform);
-         reg_print_info(this->executableName, text.c_str());
-      }
-      reg_print_info(this->executableName, "");
-      text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxiterationNumber);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
+        }
+        text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Level number: %i", this->levelNumber);
+        reg_print_info(this->executableName, text.c_str());
+        if (this->levelNumber != this->levelToPerform) {
+            text = stringFormat("\t* Level to perform: %i", this->levelToPerform);
+            reg_print_info(this->executableName, text.c_str());
+        }
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
 
-      text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
-      if(this->measure_ssd!=NULL)
-         reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
-      if(this->measure_kld!=NULL)
-         reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
-      if(this->measure_lncc!=NULL)
-         reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
-      if(this->measure_dti!=NULL)
-         reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
-      if(this->measure_mind!=NULL)
-         reg_print_info(this->executableName, "MIND is used as a similarity measure.");
-      if(this->measure_mindssc!=NULL)
-         reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
-      if(this->measure_nmi!=NULL || (this->measure_dti==NULL && this->measure_kld==NULL &&
-                                     this->measure_lncc==NULL &&  this->measure_nmi==NULL &&
-                                     this->measure_ssd==NULL && this->measure_mind==NULL  &&
-                                     this->measure_mindssc==NULL) )
-         reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
-      text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
-      reg_print_info(this->executableName, text.c_str());
-      reg_print_info(this->executableName, "");
-      if(this->bendingEnergyWeight>0){
-         text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight);
-         reg_print_info(this->executableName, text.c_str());
-         reg_print_info(this->executableName, "");
-      }
-      if((this->linearEnergyWeight)>0){
-         text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight);
-         reg_print_info(this->executableName, text.c_str());
-         reg_print_info(this->executableName, "");
-      }
-      if(this->jacobianLogWeight>0){
-         text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight);
-         reg_print_info(this->executableName, text.c_str());
-         if(this->jacobianLogApproximation) {
-            reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
-         } else {
-            reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
-         }
-         reg_print_info(this->executableName, "");
-      }
-      if((this->landmarkRegWeight)>0){
-         text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
-         reg_print_info(this->executableName, text.c_str());
-         reg_print_info(this->executableName, "");
-      }
+        text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        if (this->measure_ssd != nullptr)
+            reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
+        if (this->measure_kld != nullptr)
+            reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
+        if (this->measure_lncc != nullptr)
+            reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
+        if (this->measure_dti != nullptr)
+            reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
+        if (this->measure_mind != nullptr)
+            reg_print_info(this->executableName, "MIND is used as a similarity measure.");
+        if (this->measure_mindssc != nullptr)
+            reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
+        if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr &&
+                                          this->measure_lncc == nullptr && this->measure_nmi == nullptr &&
+                                          this->measure_ssd == nullptr && this->measure_mind == nullptr &&
+                                          this->measure_mindssc == nullptr))
+            reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
+        text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        if (this->bendingEnergyWeight > 0) {
+            text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight);
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
+        }
+        if ((this->linearEnergyWeight) > 0) {
+            text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight);
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
+        }
+        if (this->jacobianLogWeight > 0) {
+            text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight);
+            reg_print_info(this->executableName, text.c_str());
+            if (this->jacobianLogApproximation) {
+                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
+            } else {
+                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
+            }
+            reg_print_info(this->executableName, "");
+        }
+        if ((this->landmarkRegWeight) > 0) {
+            text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
+        }
 #ifdef NDEBUG
-   }
+    }
 #endif
 
-   this->initialised=true;
+    this->initialised = true;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::Initialise");
+    reg_print_fct_debug("reg_f3d<T>::Initialise");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetDeformationField()
-{
-   reg_spline_getDeformationField(this->controlPointGrid,
-                                  this->deformationFieldImage,
-                                  this->currentMask,
-                                  false, //composition
-                                  true // bspline
-                                  );
+void reg_f3d<T>::GetDeformationField() {
+    reg_spline_getDeformationField(this->controlPointGrid,
+                                   this->deformationFieldImage,
+                                   this->currentMask,
+                                   false, //composition
+                                   true // bspline
+    );
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetDeformationField");
+    reg_print_fct_debug("reg_f3d<T>::GetDeformationField");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type)
-{
-   if(this->jacobianLogWeight<=0) return 0;
+double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
+    if (this->jacobianLogWeight <= 0) return 0;
 
-   double value;
-   if(type==2)
-   {
-      value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
-                                                this->currentReference,
-                                                false);
-   }
-   else
-   {
-      value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
-                                                this->currentReference,
-                                                this->jacobianLogApproximation);
-   }
-   unsigned int maxit=5;
-   if(type>0) maxit=20;
-   unsigned int it=0;
-   while(value!=value && it<maxit)
-   {
-      if(type==2)
-      {
-         value = reg_spline_correctFolding(this->controlPointGrid,
-                                           this->currentReference,
-                                           false);
-      }
-      else
-      {
-         value = reg_spline_correctFolding(this->controlPointGrid,
-                                           this->currentReference,
-                                           this->jacobianLogApproximation);
-      }
+    double value;
+    if (type == 2) {
+        value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
+                                                  this->currentReference,
+                                                  false);
+    } else {
+        value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
+                                                  this->currentReference,
+                                                  this->jacobianLogApproximation);
+    }
+    unsigned int maxit = 5;
+    if (type > 0) maxit = 20;
+    unsigned int it = 0;
+    while (value != value && it < maxit) {
+        if (type == 2) {
+            value = reg_spline_correctFolding(this->controlPointGrid,
+                                              this->currentReference,
+                                              false);
+        } else {
+            value = reg_spline_correctFolding(this->controlPointGrid,
+                                              this->currentReference,
+                                              this->jacobianLogApproximation);
+        }
 #ifndef NDEBUG
-      reg_print_msg_debug("Folding correction");
+        reg_print_msg_debug("Folding correction");
 #endif
-      it++;
-   }
-   if(type>0)
-   {
-      if(value!=value)
-      {
-         this->optimiser->RestoreBestDOF();
-         reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
-         reg_print_msg_warn("The folding correction scheme failed");
-      }
-      else
-      {
+        it++;
+    }
+    if (type > 0) {
+        if (value != value) {
+            this->optimiser->RestoreBestDOF();
+            reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
+            reg_print_msg_warn("The folding correction scheme failed");
+        } else {
 #ifndef NDEBUG
-         if(it>0){
-            char text[255];
-            sprintf(text, "Folding correction, %i step(s)", it);
-            reg_print_msg_debug(text);
-         }
+            if (it > 0) {
+                char text[255];
+                sprintf(text, "Folding correction, %i step(s)", it);
+                reg_print_msg_debug(text);
+            }
 #endif
-      }
-   }
+        }
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm");
+    reg_print_fct_debug("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm");
 #endif
-   return this->jacobianLogWeight * value;
+    return this->jacobianLogWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm()
-{
-   if(this->bendingEnergyWeight<=0) return 0;
+double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
+    if (this->bendingEnergyWeight <= 0) return 0;
 
-   double value = reg_spline_approxBendingEnergy(this->controlPointGrid);
+    double value = reg_spline_approxBendingEnergy(this->controlPointGrid);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ComputeBendingEnergyPenaltyTerm");
+    reg_print_fct_debug("reg_f3d<T>::ComputeBendingEnergyPenaltyTerm");
 #endif
-   return this->bendingEnergyWeight * value;
+    return this->bendingEnergyWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm()
-{
-   if(this->linearEnergyWeight<=0)
-      return 0;
+double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
+    if (this->linearEnergyWeight <= 0)
+        return 0;
 
-   double value = reg_spline_approxLinearEnergy(this->controlPointGrid);
+    double value = reg_spline_approxLinearEnergy(this->controlPointGrid);
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ComputeLinearEnergyPenaltyTerm");
+    reg_print_fct_debug("reg_f3d<T>::ComputeLinearEnergyPenaltyTerm");
 #endif
-   return this->linearEnergyWeight*value;
+    return this->linearEnergyWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm()
-{
-   if(this->landmarkRegWeight<=0)
-      return 0;
+double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
+    if (this->landmarkRegWeight <= 0)
+        return 0;
 
-   double value = reg_spline_getLandmarkDistance(this->controlPointGrid,
-                                                 this->landmarkRegNumber,
-                                                 this->landmarkReference,
-                                                 this->landmarkFloating);
+    double value = reg_spline_getLandmarkDistance(this->controlPointGrid,
+                                                  this->landmarkRegNumber,
+                                                  this->landmarkReference,
+                                                  this->landmarkFloating);
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm");
+    reg_print_fct_debug("reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm");
 #endif
-   return this->landmarkRegWeight*value;
+    return this->landmarkRegWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetSimilarityMeasureGradient()
-{
-   this->GetVoxelBasedGradient();
+void reg_f3d<T>::GetSimilarityMeasureGradient() {
+    this->GetVoxelBasedGradient();
 
-   int kernel_type=CUBIC_SPLINE_KERNEL;
-   // The voxel based NMI gradient is convolved with a spline kernel
-   // Convolution along the x axis
-   float currentNodeSpacing[3];
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dx;
-   bool activeAxis[3]= {1,0,0};
-   reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
-                               currentNodeSpacing,
-                               kernel_type,
-                               NULL, // mask
-                               NULL, // all volumes are considered as active
-                               activeAxis
-                               );
-   // Convolution along the y axis
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dy;
-   activeAxis[0]=0;
-   activeAxis[1]=1;
-   reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
-                               currentNodeSpacing,
-                               kernel_type,
-                               NULL, // mask
-                               NULL, // all volumes are considered as active
-                               activeAxis
-                               );
-   // Convolution along the z axis if required
-   if(this->voxelBasedMeasureGradient->nz>1)
-   {
-      currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dz;
-      activeAxis[1]=0;
-      activeAxis[2]=1;
-      reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
-                                  currentNodeSpacing,
-                                  kernel_type,
-                                  NULL, // mask
-                                  NULL, // all volumes are considered as active
-                                  activeAxis
-                                  );
-   }
+    int kernel_type = CUBIC_SPLINE_KERNEL;
+    // The voxel based NMI gradient is convolved with a spline kernel
+    // Convolution along the x axis
+    float currentNodeSpacing[3];
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx;
+    bool activeAxis[3] = {1, 0, 0};
+    reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+                                currentNodeSpacing,
+                                kernel_type,
+                                nullptr, // mask
+                                nullptr, // all volumes are considered as active
+                                activeAxis
+    );
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+                                currentNodeSpacing,
+                                kernel_type,
+                                nullptr, // mask
+                                nullptr, // all volumes are considered as active
+                                activeAxis
+    );
+    // Convolution along the z axis if required
+    if (this->voxelBasedMeasureGradient->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+                                    currentNodeSpacing,
+                                    kernel_type,
+                                    nullptr, // mask
+                                    nullptr, // all volumes are considered as active
+                                    activeAxis
+        );
+    }
 
-   // The node based NMI gradient is extracted
-   mat44 reorientation;
-   if(this->currentFloating->sform_code>0)
-      reorientation = this->currentFloating->sto_ijk;
-   else reorientation = this->currentFloating->qto_ijk;
-   reg_voxelCentric2NodeCentric(this->transformationGradient,
-                                this->voxelBasedMeasureGradient,
-                                this->similarityWeight,
-                                false, // no update
-                                &reorientation
-                                );
+    // The node based NMI gradient is extracted
+    mat44 reorientation;
+    if (this->currentFloating->sform_code > 0)
+        reorientation = this->currentFloating->sto_ijk;
+    else reorientation = this->currentFloating->qto_ijk;
+    reg_voxelCentric2NodeCentric(this->transformationGradient,
+                                 this->voxelBasedMeasureGradient,
+                                 this->similarityWeight,
+                                 false, // no update
+                                 &reorientation
+    );
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
 #endif
-   return;
+    return;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetBendingEnergyGradient()
-{
-   if(this->bendingEnergyWeight<=0) return;
+void reg_f3d<T>::GetBendingEnergyGradient() {
+    if (this->bendingEnergyWeight <= 0) return;
 
-   reg_spline_approxBendingEnergyGradient(this->controlPointGrid,
-                                          this->transformationGradient,
-                                          this->bendingEnergyWeight);
+    reg_spline_approxBendingEnergyGradient(this->controlPointGrid,
+                                           this->transformationGradient,
+                                           this->bendingEnergyWeight);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetBendingEnergyGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetBendingEnergyGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetLinearEnergyGradient()
-{
-   if(this->linearEnergyWeight<=0) return;
+void reg_f3d<T>::GetLinearEnergyGradient() {
+    if (this->linearEnergyWeight <= 0) return;
 
-   reg_spline_approxLinearEnergyGradient(this->controlPointGrid,
-                                         this->transformationGradient,
-                                         this->linearEnergyWeight);
+    reg_spline_approxLinearEnergyGradient(this->controlPointGrid,
+                                          this->transformationGradient,
+                                          this->linearEnergyWeight);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetLinearEnergyGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetLinearEnergyGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetJacobianBasedGradient()
-{
-   if(this->jacobianLogWeight<=0) return;
+void reg_f3d<T>::GetJacobianBasedGradient() {
+    if (this->jacobianLogWeight <= 0) return;
 
-   reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid,
-                                             this->currentReference,
-                                             this->transformationGradient,
-                                             this->jacobianLogWeight,
-                                             this->jacobianLogApproximation);
+    reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid,
+                                              this->currentReference,
+                                              this->transformationGradient,
+                                              this->jacobianLogWeight,
+                                              this->jacobianLogApproximation);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetJacobianBasedGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetJacobianBasedGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetLandmarkDistanceGradient()
-{
-   if(this->landmarkRegWeight<=0) return;
+void reg_f3d<T>::GetLandmarkDistanceGradient() {
+    if (this->landmarkRegWeight <= 0) return;
 
-   reg_spline_getLandmarkDistanceGradient(this->controlPointGrid,
-                                          this->transformationGradient,
-                                          this->landmarkRegNumber,
-                                          this->landmarkReference,
-                                          this->landmarkFloating,
-                                          this->landmarkRegWeight);
+    reg_spline_getLandmarkDistanceGradient(this->controlPointGrid,
+                                           this->transformationGradient,
+                                           this->landmarkRegNumber,
+                                           this->landmarkReference,
+                                           this->landmarkFloating,
+                                           this->landmarkRegWeight);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetLandmarkDistanceGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetLandmarkDistanceGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::SetGradientImageToZero()
-{
-   T* nodeGradPtr = static_cast<T *>(this->transformationGradient->data);
-   for(size_t i=0; i<this->transformationGradient->nvox; ++i)
-      *nodeGradPtr++=0;
+void reg_f3d<T>::SetGradientImageToZero() {
+    T* nodeGradPtr = static_cast<T *>(this->transformationGradient->data);
+    for (size_t i = 0; i < this->transformationGradient->nvox; ++i)
+        *nodeGradPtr++ = 0;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetGradientImageToZero");
+    reg_print_fct_debug("reg_f3d<T>::SetGradientImageToZero");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-T reg_f3d<T>::NormaliseGradient()
-{
-   // First compute the gradient max length for normalisation purpose
-   //	T maxGradValue=0;
-   size_t voxNumber = this->transformationGradient->nx *
-         this->transformationGradient->ny *
-         this->transformationGradient->nz;
-   T *ptrX = static_cast<T *>(this->transformationGradient->data);
-   T *ptrY = &ptrX[voxNumber];
-   T *ptrZ = NULL;
-   T maxGradValue=0;
-   //	float *length=(float *)calloc(voxNumber,sizeof(float));
-   if(this->transformationGradient->nz>1)
-   {
-      ptrZ = &ptrY[voxNumber];
-      for(size_t i=0; i<voxNumber; i++)
-      {
-         T valX=0,valY=0,valZ=0;
-         if(this->optimiseX==true)
-            valX = *ptrX++;
-         if(this->optimiseY==true)
-            valY = *ptrY++;
-         if(this->optimiseZ==true)
-            valZ = *ptrZ++;
-         //			length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
-         T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ));
-         maxGradValue = (length>maxGradValue)?length:maxGradValue;
-      }
-   }
-   else
-   {
-      for(size_t i=0; i<voxNumber; i++)
-      {
-         T valX=0,valY=0;
-         if(this->optimiseX==true)
-            valX = *ptrX++;
-         if(this->optimiseY==true)
-            valY = *ptrY++;
-         //			length[i] = (float)(sqrt(valX*valX + valY*valY));
-         T length = (T)(sqrt(valX*valX + valY*valY));
-         maxGradValue = (length>maxGradValue)?length:maxGradValue;
-      }
-   }
-   //	reg_heapSort(length,voxNumber);
-   //	T maxGradValue = (T)(length[90*voxNumber/100 - 1]);
-   //	free(length);
+T reg_f3d<T>::NormaliseGradient() {
+    // First compute the gradient max length for normalisation purpose
+    //	T maxGradValue=0;
+    size_t voxNumber = this->transformationGradient->nx *
+        this->transformationGradient->ny *
+        this->transformationGradient->nz;
+    T *ptrX = static_cast<T *>(this->transformationGradient->data);
+    T *ptrY = &ptrX[voxNumber];
+    T *ptrZ = nullptr;
+    T maxGradValue = 0;
+    //	float *length=(float *)calloc(voxNumber,sizeof(float));
+    if (this->transformationGradient->nz > 1) {
+        ptrZ = &ptrY[voxNumber];
+        for (size_t i = 0; i < voxNumber; i++) {
+            T valX = 0, valY = 0, valZ = 0;
+            if (this->optimiseX == true)
+                valX = *ptrX++;
+            if (this->optimiseY == true)
+                valY = *ptrY++;
+            if (this->optimiseZ == true)
+                valZ = *ptrZ++;
+            //			length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
+            T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ));
+            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
+        }
+    } else {
+        for (size_t i = 0; i < voxNumber; i++) {
+            T valX = 0, valY = 0;
+            if (this->optimiseX == true)
+                valX = *ptrX++;
+            if (this->optimiseY == true)
+                valY = *ptrY++;
+            //			length[i] = (float)(sqrt(valX*valX + valY*valY));
+            T length = (T)(sqrt(valX * valX + valY * valY));
+            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
+        }
+    }
+    //	reg_heapSort(length,voxNumber);
+    //	T maxGradValue = (T)(length[90*voxNumber/100 - 1]);
+    //	free(length);
 
 
-   if(strcmp(this->executableName,"NiftyReg F3D")==0)
-   {
-      // The gradient is normalised if we are running f3d
-      // It will be normalised later when running f3d_sym or f3d2
+    if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
+        // The gradient is normalised if we are running f3d
+        // It will be normalised later when running f3d_sym or f3d2
 #ifndef NDEBUG
-      char text[255];
-      sprintf(text, "Objective function gradient maximal length: %g",maxGradValue);
-      reg_print_msg_debug(text);
+        char text[255];
+        sprintf(text, "Objective function gradient maximal length: %g", maxGradValue);
+        reg_print_msg_debug(text);
 #endif
-      ptrX = static_cast<T *>(this->transformationGradient->data);
-      if(this->transformationGradient->nz>1)
-      {
-         ptrX = static_cast<T *>(this->transformationGradient->data);
-         ptrY = &ptrX[voxNumber];
-         ptrZ = &ptrY[voxNumber];
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            T valX=0,valY=0,valZ=0;
-            if(this->optimiseX==true)
-               valX = *ptrX;
-            if(this->optimiseY==true)
-               valY = *ptrY;
-            if(this->optimiseZ==true)
-               valZ = *ptrZ;
-            //				T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
-            //				if(tempLength>maxGradValue){
-            //					*ptrX *= maxGradValue / tempLength;
-            //					*ptrY *= maxGradValue / tempLength;
-            //					*ptrZ *= maxGradValue / tempLength;
-            //				}
-            *ptrX++ = valX / maxGradValue;
-            *ptrY++ = valY / maxGradValue;
-            *ptrZ++ = valZ / maxGradValue;
-         }
-      }
-      else
-      {
-         ptrX = static_cast<T *>(this->transformationGradient->data);
-         ptrY = &ptrX[voxNumber];
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            T valX=0,valY=0;
-            if(this->optimiseX==true)
-               valX = *ptrX;
-            if(this->optimiseY==true)
-               valY = *ptrY;
-            //				T tempLength = (float)(sqrt(valX*valX + valY*valY));
-            //				if(tempLength>maxGradValue){
-            //					*ptrX *= maxGradValue / tempLength;
-            //					*ptrY *= maxGradValue / tempLength;
-            //				}
-            *ptrX++ = valX / maxGradValue;
-            *ptrY++ = valY / maxGradValue;
-         }
-      }
-   }
-   // Returns the largest gradient distance
+        ptrX = static_cast<T *>(this->transformationGradient->data);
+        if (this->transformationGradient->nz > 1) {
+            ptrX = static_cast<T *>(this->transformationGradient->data);
+            ptrY = &ptrX[voxNumber];
+            ptrZ = &ptrY[voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i) {
+                T valX = 0, valY = 0, valZ = 0;
+                if (this->optimiseX == true)
+                    valX = *ptrX;
+                if (this->optimiseY == true)
+                    valY = *ptrY;
+                if (this->optimiseZ == true)
+                    valZ = *ptrZ;
+                //				T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
+                //				if(tempLength>maxGradValue){
+                //					*ptrX *= maxGradValue / tempLength;
+                //					*ptrY *= maxGradValue / tempLength;
+                //					*ptrZ *= maxGradValue / tempLength;
+                //				}
+                *ptrX++ = valX / maxGradValue;
+                *ptrY++ = valY / maxGradValue;
+                *ptrZ++ = valZ / maxGradValue;
+            }
+        } else {
+            ptrX = static_cast<T *>(this->transformationGradient->data);
+            ptrY = &ptrX[voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i) {
+                T valX = 0, valY = 0;
+                if (this->optimiseX == true)
+                    valX = *ptrX;
+                if (this->optimiseY == true)
+                    valY = *ptrY;
+                //				T tempLength = (float)(sqrt(valX*valX + valY*valY));
+                //				if(tempLength>maxGradValue){
+                //					*ptrX *= maxGradValue / tempLength;
+                //					*ptrY *= maxGradValue / tempLength;
+                //				}
+                *ptrX++ = valX / maxGradValue;
+                *ptrY++ = valY / maxGradValue;
+            }
+        }
+    }
+    // Returns the largest gradient distance
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::NormaliseGradient");
+    reg_print_fct_debug("reg_f3d<T>::NormaliseGradient");
 #endif
 
-   //   reg_io_WriteImageFile(transformationGradient,
-   //                         "gradient.nii");
-   //   reg_exit();
+    //   reg_io_WriteImageFile(transformationGradient,
+    //                         "gradient.nii");
+    //   reg_exit();
 
-   return maxGradValue;
+    return maxGradValue;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::DisplayCurrentLevelParameters()
-{
+void reg_f3d<T>::DisplayCurrentLevelParameters() {
 #ifdef NDEBUG
-   if(this->verbose)
-   {
+    if (this->verbose) {
 #endif
-      char text[255];
-      sprintf(text, "Current level: %i / %i", this->currentLevel+1, this->levelNumber);
-      reg_print_info(this->executableName, text);
-      sprintf(text, "Maximum iteration number: %i", (int)this->maxiterationNumber);
-      reg_print_info(this->executableName, text);
-      reg_print_info(this->executableName, "Current reference image");
-      sprintf(text, "\t* image dimension: %i x %i x %i x %i",
-              this->currentReference->nx, this->currentReference->ny,
-              this->currentReference->nz,this->currentReference->nt);
-      reg_print_info(this->executableName, text);
-      sprintf(text, "\t* image spacing: %g x %g x %g mm",
-              this->currentReference->dx, this->currentReference->dy,
-              this->currentReference->dz);
-      reg_print_info(this->executableName, text);
-      reg_print_info(this->executableName, "Current floating image");
-      sprintf(text, "\t* image dimension: %i x %i x %i x %i",
-              this->currentFloating->nx, this->currentFloating->ny,
-              this->currentFloating->nz,this->currentFloating->nt);
-      reg_print_info(this->executableName, text);
-      sprintf(text, "\t* image spacing: %g x %g x %g mm",
-              this->currentFloating->dx, this->currentFloating->dy,
-              this->currentFloating->dz);
-      reg_print_info(this->executableName, text);
-      reg_print_info(this->executableName, "Current control point image");
-      sprintf(text, "\t* image dimension: %i x %i x %i",
-              this->controlPointGrid->nx, this->controlPointGrid->ny,
-              this->controlPointGrid->nz);
-      reg_print_info(this->executableName, text);
-      sprintf(text, "\t* image spacing: %g x %g x %g mm",
-              this->controlPointGrid->dx, this->controlPointGrid->dy,
-              this->controlPointGrid->dz);
-      reg_print_info(this->executableName, text);
+        char text[255];
+        sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber);
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current reference image");
+        sprintf(text, "\t* image dimension: %i x %i x %i x %i",
+                this->currentReference->nx, this->currentReference->ny,
+                this->currentReference->nz, this->currentReference->nt);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm",
+                this->currentReference->dx, this->currentReference->dy,
+                this->currentReference->dz);
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current floating image");
+        sprintf(text, "\t* image dimension: %i x %i x %i x %i",
+                this->currentFloating->nx, this->currentFloating->ny,
+                this->currentFloating->nz, this->currentFloating->nt);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm",
+                this->currentFloating->dx, this->currentFloating->dy,
+                this->currentFloating->dz);
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current control point image");
+        sprintf(text, "\t* image dimension: %i x %i x %i",
+                this->controlPointGrid->nx, this->controlPointGrid->ny,
+                this->controlPointGrid->nz);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm",
+                this->controlPointGrid->dx, this->controlPointGrid->dy,
+                this->controlPointGrid->dz);
+        reg_print_info(this->executableName, text);
 #ifdef NDEBUG
-   }
+    }
 #endif
 
 #ifndef NDEBUG
-   if(this->currentReference->sform_code>0)
-      reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform");
-   else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform");
+    if (this->currentReference->sform_code > 0)
+        reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform");
+    else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform");
 
-   if(this->currentFloating->sform_code>0)
-      reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform");
-   else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform");
+    if (this->currentFloating->sform_code > 0)
+        reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform");
+    else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform");
 
-   if(this->controlPointGrid->sform_code>0)
-      reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform");
-   else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform");
+    if (this->controlPointGrid->sform_code > 0)
+        reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform");
+    else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform");
 #endif
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::DisplayCurrentLevelParameters");
+    reg_print_fct_debug("reg_f3d<T>::DisplayCurrentLevelParameters");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-double reg_f3d<T>::GetObjectiveFunctionValue()
-{
-   this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
+double reg_f3d<T>::GetObjectiveFunctionValue() {
+    this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
 
-   this->currentWBE = this->ComputeBendingEnergyPenaltyTerm();
+    this->currentWBE = this->ComputeBendingEnergyPenaltyTerm();
 
-   this->currentWLE = this->ComputeLinearEnergyPenaltyTerm();
+    this->currentWLE = this->ComputeLinearEnergyPenaltyTerm();
 
-   this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm();
+    this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm();
 
-   // Compute initial similarity measure
-   this->currentWMeasure = 0.0;
-   if(this->similarityWeight>0)
-   {
-      this->WarpFloatingImage(this->interpolation);
-      this->currentWMeasure = this->ComputeSimilarityMeasure();
-   }
+    // Compute initial similarity measure
+    this->currentWMeasure = 0.0;
+    if (this->similarityWeight > 0) {
+        this->WarpFloatingImage(this->interpolation);
+        this->currentWMeasure = this->ComputeSimilarityMeasure();
+    }
 #ifndef NDEBUG
-   char text[255];
-   sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
-           this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand);
-   reg_print_msg_debug(text);
+    char text[255];
+    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
+            this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand);
+    reg_print_msg_debug(text);
 #endif
 
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionValue");
+    reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionValue");
 #endif
-   // Store the global objective function value
+    // Store the global objective function value
 
-   return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand;
+    return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::UpdateParameters(float scale)
-{
-   T *currentDOF=this->optimiser->GetCurrentDOF();
-   T *bestDOF=this->optimiser->GetBestDOF();
-   T *gradient=this->optimiser->GetGradient();
+void reg_f3d<T>::UpdateParameters(float scale) {
+    T *currentDOF = this->optimiser->GetCurrentDOF();
+    T *bestDOF = this->optimiser->GetBestDOF();
+    T *gradient = this->optimiser->GetGradient();
 
-   // Update the control point position
-   if(this->optimiser->GetOptimiseX()==true &&
-         this->optimiser->GetOptimiseY()==true &&
-         this->optimiser->GetOptimiseZ()==true)
-   {
-      // Update the values for all axis displacement
-      for(size_t i=0; i<this->optimiser->GetDOFNumber(); ++i)
-      {
-         currentDOF[i] = bestDOF[i] + scale * gradient[i];
-      }
-   }
-   else
-   {
-      size_t voxNumber = this->optimiser->GetVoxNumber();
-      // Update the values for the x-axis displacement
-      if(this->optimiser->GetOptimiseX()==true)
-      {
-         for(size_t i=0; i<voxNumber; ++i)
-         {
+    // Update the control point position
+    if (this->optimiser->GetOptimiseX() == true &&
+        this->optimiser->GetOptimiseY() == true &&
+        this->optimiser->GetOptimiseZ() == true) {
+        // Update the values for all axis displacement
+        for (size_t i = 0; i < this->optimiser->GetDOFNumber(); ++i) {
             currentDOF[i] = bestDOF[i] + scale * gradient[i];
-         }
-      }
-      // Update the values for the y-axis displacement
-      if(this->optimiser->GetOptimiseY()==true)
-      {
-         T *currentDOFY=&currentDOF[voxNumber];
-         T *bestDOFY=&bestDOF[voxNumber];
-         T *gradientY=&gradient[voxNumber];
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
-         }
-      }
-      // Update the values for the z-axis displacement
-      if(this->optimiser->GetOptimiseZ()==true && this->optimiser->GetNDim()>2)
-      {
-         T *currentDOFZ=&currentDOF[2*voxNumber];
-         T *bestDOFZ=&bestDOF[2*voxNumber];
-         T *gradientZ=&gradient[2*voxNumber];
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i];
-         }
-      }
-   }
+        }
+    } else {
+        size_t voxNumber = this->optimiser->GetVoxNumber();
+        // Update the values for the x-axis displacement
+        if (this->optimiser->GetOptimiseX() == true) {
+            for (size_t i = 0; i < voxNumber; ++i) {
+                currentDOF[i] = bestDOF[i] + scale * gradient[i];
+            }
+        }
+        // Update the values for the y-axis displacement
+        if (this->optimiser->GetOptimiseY() == true) {
+            T *currentDOFY = &currentDOF[voxNumber];
+            T *bestDOFY = &bestDOF[voxNumber];
+            T *gradientY = &gradient[voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i) {
+                currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
+            }
+        }
+        // Update the values for the z-axis displacement
+        if (this->optimiser->GetOptimiseZ() == true && this->optimiser->GetNDim() > 2) {
+            T *currentDOFZ = &currentDOF[2 * voxNumber];
+            T *bestDOFZ = &bestDOF[2 * voxNumber];
+            T *gradientZ = &gradient[2 * voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i) {
+                currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i];
+            }
+        }
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::UpdateParameters");
+    reg_print_fct_debug("reg_f3d<T>::UpdateParameters");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::SetOptimiser()
-{
-   reg_base<T>::SetOptimiser();
-   this->optimiser->Initialise(this->controlPointGrid->nvox,
-                               this->controlPointGrid->nz>1?3:2,
-                               this->optimiseX,
-                               this->optimiseY,
-                               this->optimiseZ,
-                               this->maxiterationNumber,
-                               0, // currentIterationNumber,
-                               this,
-                               static_cast<T *>(this->controlPointGrid->data),
-                               static_cast<T *>(this->transformationGradient->data)
-                               );
+void reg_f3d<T>::SetOptimiser() {
+    reg_base<T>::SetOptimiser();
+    this->optimiser->Initialise(this->controlPointGrid->nvox,
+                                this->controlPointGrid->nz > 1 ? 3 : 2,
+                                this->optimiseX,
+                                this->optimiseY,
+                                this->optimiseZ,
+                                this->maxIterationNumber,
+                                0, // currentIterationNumber,
+                                this,
+                                static_cast<T *>(this->controlPointGrid->data),
+                                static_cast<T *>(this->transformationGradient->data)
+    );
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
+    reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::SmoothGradient()
-{
-   // The gradient is smoothed using a Gaussian kernel if it is required
-   if(this->gradientSmoothingSigma!=0)
-   {
-      float kernel = fabs(this->gradientSmoothingSigma);
-      reg_tools_kernelConvolution(this->transformationGradient,
-                                  &kernel,
-                                  GAUSSIAN_KERNEL);
-   }
+void reg_f3d<T>::SmoothGradient() {
+    // The gradient is smoothed using a Gaussian kernel if it is required
+    if (this->gradientSmoothingSigma != 0) {
+        float kernel = fabs(this->gradientSmoothingSigma);
+        reg_tools_kernelConvolution(this->transformationGradient,
+                                    &kernel,
+                                    GAUSSIAN_KERNEL);
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
+    reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_f3d<T>::GetApproximatedGradient()
-{
-   // Loop over every control point
-   T *gridPtr = static_cast<T*>(this->controlPointGrid->data);
-   T *gradPtr = static_cast<T*>(this->transformationGradient->data);
-   T eps = this->controlPointGrid->dx / 100.f;
-   for(size_t i=0; i<this->controlPointGrid->nvox; ++i)
-   {
-      T currentValue = this->optimiser->GetBestDOF()[i];
-      gridPtr[i] = currentValue + eps;
-      double valPlus = this->GetObjectiveFunctionValue();
-      gridPtr[i] = currentValue - eps;
-      double valMinus = this->GetObjectiveFunctionValue();
-      gridPtr[i] = currentValue;
-      gradPtr[i] = -(T)((valPlus - valMinus ) / (2.0*eps));
-   }
+void reg_f3d<T>::GetApproximatedGradient() {
+    // Loop over every control point
+    T *gridPtr = static_cast<T*>(this->controlPointGrid->data);
+    T *gradPtr = static_cast<T*>(this->transformationGradient->data);
+    T eps = this->controlPointGrid->dx / 100.f;
+    for (size_t i = 0; i < this->controlPointGrid->nvox; ++i) {
+        T currentValue = this->optimiser->GetBestDOF()[i];
+        gridPtr[i] = currentValue + eps;
+        double valPlus = this->GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue - eps;
+        double valMinus = this->GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue;
+        gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
+    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-nifti_image **reg_f3d<T>::GetWarpedImage()
-{
-   // The initial images are used
-   if(this->inputReference==NULL ||
-         this->inputFloating==NULL ||
-         this->controlPointGrid==NULL)
-   {
-      reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
-      reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-      reg_exit();
-   }
+nifti_image** reg_f3d<T>::GetWarpedImage() {
+    // The initial images are used
+    if (this->inputReference == nullptr ||
+        this->inputFloating == nullptr ||
+        this->controlPointGrid == nullptr) {
+        reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
+        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
+        reg_exit();
+    }
 
-   this->currentReference = this->inputReference;
-   this->currentFloating = this->inputFloating;
-   this->currentMask=NULL;
+    this->currentReference = this->inputReference;
+    this->currentFloating = this->inputFloating;
+    this->currentMask = nullptr;
 
-   reg_base<T>::AllocateWarped();
-   reg_base<T>::AllocateDeformationField();
-   reg_base<T>::WarpFloatingImage(3); // cubic spline interpolation
-   reg_base<T>::ClearDeformationField();
+    reg_base<T>::AllocateWarped();
+    reg_base<T>::AllocateDeformationField();
+    reg_base<T>::WarpFloatingImage(3); // cubic spline interpolation
+    reg_base<T>::ClearDeformationField();
 
-   nifti_image **warpedImage= (nifti_image **)malloc(2*sizeof(nifti_image *));
-   warpedImage[0]=nifti_copy_nim_info(this->warped);
-   warpedImage[0]->cal_min=this->inputFloating->cal_min;
-   warpedImage[0]->cal_max=this->inputFloating->cal_max;
-   warpedImage[0]->scl_slope=this->inputFloating->scl_slope;
-   warpedImage[0]->scl_inter=this->inputFloating->scl_inter;
-   warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper);
-   memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper);
+    nifti_image **warpedImage = (nifti_image **)malloc(2 * sizeof(nifti_image *));
+    warpedImage[0] = nifti_copy_nim_info(this->warped);
+    warpedImage[0]->cal_min = this->inputFloating->cal_min;
+    warpedImage[0]->cal_max = this->inputFloating->cal_max;
+    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
+    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
+    warpedImage[0]->data = (void *)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
+    memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper);
 
-   warpedImage[1]=NULL;
+    warpedImage[1] = nullptr;
 
-   reg_f3d<T>::ClearWarped();
+    reg_f3d<T>::ClearWarped();
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
+    reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
 #endif
-   return warpedImage;
+    return warpedImage;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-nifti_image * reg_f3d<T>::GetControlPointPositionImage()
-{
-   nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid);
-   returnedControlPointGrid->data=(void *)malloc(returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper);
-   memcpy(returnedControlPointGrid->data, this->controlPointGrid->data,
-          returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper);
-   return returnedControlPointGrid;
+nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
+    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid);
+    returnedControlPointGrid->data = (void *)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    memcpy(returnedControlPointGrid->data, this->controlPointGrid->data,
+           returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    return returnedControlPointGrid;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetControlPointPositionImage");
+    reg_print_fct_debug("reg_f3d<T>::GetControlPointPositionImage");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::UpdateBestObjFunctionValue()
-{
-   this->bestWMeasure=this->currentWMeasure;
-   this->bestWBE=this->currentWBE;
-   this->bestWLE=this->currentWLE;
-   this->bestWJac=this->currentWJac;
-   this->bestWLand=this->currentWLand;
+void reg_f3d<T>::UpdateBestObjFunctionValue() {
+    this->bestWMeasure = this->currentWMeasure;
+    this->bestWBE = this->currentWBE;
+    this->bestWLE = this->currentWLE;
+    this->bestWJac = this->currentWJac;
+    this->bestWLand = this->currentWLand;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::UpdateBestObjFunctionValue");
+    reg_print_fct_debug("reg_f3d<T>::UpdateBestObjFunctionValue");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::PrintInitialObjFunctionValue()
-{
-   if(!this->verbose) return;
+void reg_f3d<T>::PrintInitialObjFunctionValue() {
+    if (!this->verbose) return;
 
-   double bestValue=this->optimiser->GetBestObjFunctionValue();
+    double bestValue = this->optimiser->GetBestObjFunctionValue();
 
-   char text[255];
-   sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g",
-           bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand);
-   reg_print_info(this->executableName, text);
+    char text[255];
+    sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g",
+            bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand);
+    reg_print_info(this->executableName, text);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::PrintInitialObjFunctionValue");
+    reg_print_fct_debug("reg_f3d<T>::PrintInitialObjFunctionValue");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize)
-{
-   if(!this->verbose) return;
+void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
+    if (!this->verbose) return;
 
-   char text[255];
-   sprintf(text, "[%i] Current objective function: %g",
-           (int)this->optimiser->GetCurrentIterationNumber(),
-           this->optimiser->GetBestObjFunctionValue());
-   sprintf(text+strlen(text), " = (wSIM)%g", this->bestWMeasure);
-   if(this->bendingEnergyWeight>0)
-      sprintf(text+strlen(text), " - (wBE)%.2e", this->bestWBE);
-   if(this->linearEnergyWeight>0)
-      sprintf(text+strlen(text), " - (wLE)%.2e", this->bestWLE);
-   if(this->jacobianLogWeight>0)
-      sprintf(text+strlen(text), " - (wJAC)%.2e", this->bestWJac);
-   if(this->landmarkRegWeight>0)
-      sprintf(text+strlen(text), " - (wLAN)%.2e", this->bestWLand);
-   sprintf(text+strlen(text), " [+ %g mm]", currentSize);
-   reg_print_info(this->executableName, text);
+    char text[255];
+    sprintf(text, "[%i] Current objective function: %g",
+            (int)this->optimiser->GetCurrentIterationNumber(),
+            this->optimiser->GetBestObjFunctionValue());
+    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
+    if (this->bendingEnergyWeight > 0)
+        sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE);
+    if (this->linearEnergyWeight > 0)
+        sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE);
+    if (this->jacobianLogWeight > 0)
+        sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac);
+    if (this->landmarkRegWeight > 0)
+        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
+    sprintf(text + strlen(text), " [+ %g mm]", currentSize);
+    reg_print_info(this->executableName, text);
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::PrintCurrentObjFunctionValue");
+    reg_print_fct_debug("reg_f3d<T>::PrintCurrentObjFunctionValue");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::GetObjectiveFunctionGradient()
-{
-   if(!this->useApproxGradient)
-   {
-      // Compute the gradient of the similarity measure
-      if(this->similarityWeight>0)
-      {
-         this->WarpFloatingImage(this->interpolation);
-         this->GetSimilarityMeasureGradient();
-      }
-      else
-      {
-         this->SetGradientImageToZero();
-      }
-      // Compute the penalty term gradients if required
-      this->GetBendingEnergyGradient();
-      this->GetJacobianBasedGradient();
-      this->GetLinearEnergyGradient();
-      this->GetLandmarkDistanceGradient();
-   }
-   else
-   {
-      this->GetApproximatedGradient();
-   }
+void reg_f3d<T>::GetObjectiveFunctionGradient() {
+    if (!this->useApproxGradient) {
+        // Compute the gradient of the similarity measure
+        if (this->similarityWeight > 0) {
+            this->WarpFloatingImage(this->interpolation);
+            this->GetSimilarityMeasureGradient();
+        } else {
+            this->SetGradientImageToZero();
+        }
+        // Compute the penalty term gradients if required
+        this->GetBendingEnergyGradient();
+        this->GetJacobianBasedGradient();
+        this->GetLinearEnergyGradient();
+        this->GetLandmarkDistanceGradient();
+    } else {
+        this->GetApproximatedGradient();
+    }
 
-   this->optimiser->IncrementCurrentIterationNumber();
+    this->optimiser->IncrementCurrentIterationNumber();
 
-   // Smooth the gradient if require
-   this->SmoothGradient();
+    // Smooth the gradient if require
+    this->SmoothGradient();
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionGradient");
+    reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::CorrectTransformation()
-{
-   if(this->jacobianLogWeight>0 && this->jacobianLogApproximation==true)
-      this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation
+void reg_f3d<T>::CorrectTransformation() {
+    if (this->jacobianLogWeight > 0 && this->jacobianLogApproximation == true)
+        this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d<T>::CorrectTransformation");
+    reg_print_fct_debug("reg_f3d<T>::CorrectTransformation");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 
 template class reg_f3d<float>;
-#endif
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index b585e586..537a9bdc 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -10,142 +10,102 @@
  *
  */
 
-#ifndef _REG_F3D_H
-#define _REG_F3D_H
+#pragma once
 
 #include "_reg_base.h"
 
-/// @brief Fast Free Form Deformation registration class
+ /// @brief Fast Free Form Deformation registration class
 template <class T>
-class reg_f3d : public reg_base<T>
-{
+class reg_f3d: public reg_base<T> {
 protected:
-   nifti_image *inputControlPointGrid; // pointer to external
-   nifti_image *controlPointGrid;
-   T bendingEnergyWeight;
-   T linearEnergyWeight;
-   T jacobianLogWeight;
-   bool jacobianLogApproximation;
-   T spacing[3];
-
-   nifti_image *transformationGradient;
-   bool gridRefinement;
-
-   double currentWJac;
-   double currentWBE;
-   double currentWLE;
-   double bestWJac;
-   double bestWBE;
-   double bestWLE;
-
-   virtual void AllocateTransformationGradient();
-   virtual void ClearTransformationGradient();
-   virtual T InitialiseCurrentLevel();
-
-   virtual double ComputeBendingEnergyPenaltyTerm();
-   virtual double ComputeLinearEnergyPenaltyTerm();
-   virtual double ComputeJacobianBasedPenaltyTerm(int);
-   virtual double ComputeLandmarkDistancePenaltyTerm();
-
-   virtual void GetBendingEnergyGradient();
-   virtual void GetLinearEnergyGradient();
-   virtual void GetJacobianBasedGradient();
-   virtual void GetLandmarkDistanceGradient();
-   virtual void SetGradientImageToZero();
-   virtual T NormaliseGradient();
-   virtual void SmoothGradient();
-   virtual void GetObjectiveFunctionGradient();
-   virtual void GetApproximatedGradient();
-   void GetSimilarityMeasureGradient();
-
-   virtual void GetDeformationField();
-   virtual void DisplayCurrentLevelParameters();
-
-   virtual double GetObjectiveFunctionValue();
-   virtual void UpdateBestObjFunctionValue();
-   virtual void UpdateParameters(float);
-   virtual void SetOptimiser();
-
-   virtual void PrintInitialObjFunctionValue();
-   virtual void PrintCurrentObjFunctionValue(T);
-
-   virtual void CorrectTransformation();
-
-   void (*funcProgressCallback)(float pcntProgress, void *params);
-   void *paramsProgressCallback;
+    nifti_image *inputControlPointGrid; // pointer to external
+    nifti_image *controlPointGrid;
+    T bendingEnergyWeight;
+    T linearEnergyWeight;
+    T jacobianLogWeight;
+    bool jacobianLogApproximation;
+    T spacing[3];
+
+    nifti_image *transformationGradient;
+    bool gridRefinement;
+
+    double currentWJac;
+    double currentWBE;
+    double currentWLE;
+    double bestWJac;
+    double bestWBE;
+    double bestWLE;
+
+    virtual void AllocateTransformationGradient();
+    virtual void ClearTransformationGradient();
+    virtual T InitialiseCurrentLevel();
+
+    virtual double ComputeBendingEnergyPenaltyTerm();
+    virtual double ComputeLinearEnergyPenaltyTerm();
+    virtual double ComputeJacobianBasedPenaltyTerm(int);
+    virtual double ComputeLandmarkDistancePenaltyTerm();
+
+    virtual void GetBendingEnergyGradient();
+    virtual void GetLinearEnergyGradient();
+    virtual void GetJacobianBasedGradient();
+    virtual void GetLandmarkDistanceGradient();
+    virtual void SetGradientImageToZero();
+    virtual T NormaliseGradient();
+    virtual void SmoothGradient();
+    virtual void GetObjectiveFunctionGradient();
+    virtual void GetApproximatedGradient();
+    void GetSimilarityMeasureGradient();
+
+    virtual void GetDeformationField();
+    virtual void DisplayCurrentLevelParameters();
+
+    virtual double GetObjectiveFunctionValue();
+    virtual void UpdateBestObjFunctionValue();
+    virtual void UpdateParameters(float);
+    virtual void SetOptimiser();
+
+    virtual void PrintInitialObjFunctionValue();
+    virtual void PrintCurrentObjFunctionValue(T);
+
+    virtual void CorrectTransformation();
+
+    void (*funcProgressCallback)(float pcntProgress, void *params);
+    void *paramsProgressCallback;
 
 public:
-   reg_f3d(int refTimePoint,int floTimePoint);
-   virtual ~reg_f3d();
-
-   void SetControlPointGridImage(nifti_image *);
-   void SetBendingEnergyWeight(T);
-   void SetLinearEnergyWeight(T);
-   void SetJacobianLogWeight(T);
-   void ApproximateJacobianLog();
-   void DoNotApproximateJacobianLog();
-   void SetSpacing(unsigned int ,T);
-
-   void NoGridRefinement()
-   {
-      this->gridRefinement=false;
-   }
-   // F3D2 specific options
-   virtual void SetCompositionStepNumber(int)
-   {
-      return;
-   }
-   virtual void ApproximateComposition()
-   {
-      return;
-   }
-   virtual void UseSimilaritySymmetry()
-   {
-      return;
-   }
-   virtual void UseBCHUpdate(int)
-   {
-      return;
-   }
-   virtual void UseGradientCumulativeExp()
-   {
-      return;
-   }
-   virtual void DoNotUseGradientCumulativeExp()
-   {
-      return;
-   }
-
-   // F3D_SYM specific options
-   virtual void SetFloatingMask(nifti_image *)
-   {
-      return;
-   }
-   virtual void SetInverseConsistencyWeight(T)
-   {
-      return;
-   }
-   virtual nifti_image *GetBackwardControlPointPositionImage()
-   {
-      return NULL;
-   }
-
-   // F3D_gpu specific option
-   virtual int CheckMemoryMB()
-   {
-      return EXIT_SUCCESS;
-   }
-
-   virtual void CheckParameters();
-   virtual void Initialise();
-   virtual nifti_image *GetControlPointPositionImage();
-   virtual nifti_image **GetWarpedImage();
-
-   // Function used for testing
-   virtual void reg_test_setControlPointGrid(nifti_image *cpp)
-   {
-      this->controlPointGrid=cpp;
-   }
+    reg_f3d(int refTimePoint, int floTimePoint);
+    virtual ~reg_f3d();
+
+    void SetControlPointGridImage(nifti_image*);
+    void SetBendingEnergyWeight(T);
+    void SetLinearEnergyWeight(T);
+    void SetJacobianLogWeight(T);
+    void ApproximateJacobianLog();
+    void DoNotApproximateJacobianLog();
+    void SetSpacing(unsigned int, T);
+
+    void NoGridRefinement() { gridRefinement = false; }
+    // F3D2 specific options
+    virtual void SetCompositionStepNumber(int) {}
+    virtual void ApproximateComposition() {}
+    virtual void UseSimilaritySymmetry() {}
+    virtual void UseBCHUpdate(int) {}
+    virtual void UseGradientCumulativeExp() {}
+    virtual void DoNotUseGradientCumulativeExp() {}
+
+    // f3d_sym specific options
+    virtual void SetFloatingMask(nifti_image*) {}
+    virtual void SetInverseConsistencyWeight(T) {}
+    virtual nifti_image *GetBackwardControlPointPositionImage() { return nullptr; }
+
+    // f3d_gpu specific option
+    virtual int CheckMemoryMB() { return EXIT_SUCCESS; }
+
+    virtual void CheckParameters();
+    virtual void Initialise();
+    virtual nifti_image* GetControlPointPositionImage();
+    virtual nifti_image** GetWarpedImage();
+
+    // Function used for testing
+    virtual void reg_test_setControlPointGrid(nifti_image *cpp) { controlPointGrid = cpp; }
 };
-
-#endif
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index b6828fd0..bcf3710a 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -10,10 +10,6 @@
  *
  */
 
-
-#ifndef _REG_F3D2_CPP
-#define _REG_F3D2_CPP
-
 #include "_reg_f3d2.h"
 
 /* *************************************************************** */
@@ -92,7 +88,7 @@ void reg_f3d2<T>::GetDeformationField()
    // By default the number of steps is automatically updated
    bool updateStepNumber=true;
    // The provided step number is used for the final resampling
-   if(this->optimiser==NULL)
+   if(this->optimiser==nullptr)
       updateStepNumber=false;
 #ifndef NDEBUG
    char text[255];
@@ -185,8 +181,8 @@ void reg_f3d2<T>::ExponentiateGradient()
          tempDef);
 
    // Remove the affine component
-   nifti_image *affine_disp=NULL;
-   if(this->affineTransformation!=NULL){
+   nifti_image *affine_disp=nullptr;
+   if(this->affineTransformation!=nullptr){
       affine_disp=nifti_copy_nim_info(this->deformationFieldImage);
       affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper);
       mat44 backwardAffineTransformation=nifti_mat44_inverse(*this->affineTransformation);
@@ -201,7 +197,7 @@ void reg_f3d2<T>::ExponentiateGradient()
    tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper);
    for(int i=0; i<(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i)
    {
-      if(affine_disp!=NULL)
+      if(affine_disp!=nullptr)
          reg_tools_substractImageToImage(tempDef[i],
                                          affine_disp,
                                          tempDef[i]);
@@ -219,17 +215,17 @@ void reg_f3d2<T>::ExponentiateGradient()
    for(int i=0; i<=(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i)
    {
       nifti_image_free(tempDef[i]);
-      tempDef[i]=NULL;
+      tempDef[i]=nullptr;
    }
    free(tempDef);
-   tempDef=NULL;
+   tempDef=nullptr;
    // Free the temporary gradient image
    nifti_image_free(tempGrad);
-   tempGrad=NULL;
+   tempGrad=nullptr;
    // Free the temporary affine displacement field
-   if(affine_disp!=NULL)
+   if(affine_disp!=nullptr)
       nifti_image_free(affine_disp);
-   affine_disp=NULL;
+   affine_disp=nullptr;
    // Normalise the forward gradient
    reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in
                                 this->voxelBasedMeasureGradient, // out
@@ -255,7 +251,7 @@ void reg_f3d2<T>::ExponentiateGradient()
          tempDef);
 
    // Remove the affine component
-   if(this->affineTransformation!=NULL){
+   if(this->affineTransformation!=nullptr){
       affine_disp=nifti_copy_nim_info(this->backwardDeformationFieldImage);
       affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper);
       reg_affine_getDeformationField(this->affineTransformation,
@@ -265,7 +261,7 @@ void reg_f3d2<T>::ExponentiateGradient()
 
    for(int i=0; i<(int)fabsf(this->controlPointGrid->intent_p2); ++i)
    {
-      if(affine_disp!=NULL)
+      if(affine_disp!=nullptr)
          reg_tools_substractImageToImage(tempDef[i],
                                          affine_disp,
                                          tempDef[i]);
@@ -283,17 +279,17 @@ void reg_f3d2<T>::ExponentiateGradient()
    for(int i=0; i<=(int)fabsf(this->controlPointGrid->intent_p2); ++i)
    {
       nifti_image_free(tempDef[i]);
-      tempDef[i]=NULL;
+      tempDef[i]=nullptr;
    }
    free(tempDef);
-   tempDef=NULL;
+   tempDef=nullptr;
    // Free the temporary gradient image
    nifti_image_free(tempGrad);
-   tempGrad=NULL;
+   tempGrad=nullptr;
    // Free the temporary affine displacement field
-   if(affine_disp!=NULL)
+   if(affine_disp!=nullptr)
       nifti_image_free(affine_disp);
-   affine_disp=NULL;
+   affine_disp=nullptr;
    // Normalise the backward gradient
    reg_tools_divideValueToImage(this->backwardVoxelBasedMeasureGradientImage, // in
                                 this->backwardVoxelBasedMeasureGradientImage, // out
@@ -346,7 +342,7 @@ void reg_f3d2<T>::UpdateParameters(float scale)
    }
    // Clean the temporary nifti_images
    nifti_image_free(forwardScaledGradient);
-   forwardScaledGradient=NULL;
+   forwardScaledGradient=nullptr;
 
    /************************/
    /**** Backward update ***/
@@ -385,7 +381,7 @@ void reg_f3d2<T>::UpdateParameters(float scale)
    }
    // Clean the temporary nifti_images
    nifti_image_free(backwardScaledGradient);
-   backwardScaledGradient=NULL;
+   backwardScaledGradient=nullptr;
 
    /****************************/
    /******** Symmetrise ********/
@@ -423,9 +419,9 @@ void reg_f3d2<T>::UpdateParameters(float scale)
                                   0.5f); // *(0.5)
    // Clean the temporary allocated velocity fields
    nifti_image_free(warpedForwardTrans);
-   warpedForwardTrans=NULL;
+   warpedForwardTrans=nullptr;
    nifti_image_free(warpedBackwardTrans);
-   warpedBackwardTrans=NULL;
+   warpedBackwardTrans=nullptr;
 
    // Convert the velocity field from displacement to deformation
    reg_getDeformationFromDisplacement(this->controlPointGrid);
@@ -439,10 +435,10 @@ template<class T>
 nifti_image **reg_f3d2<T>::GetWarpedImage()
 {
    // The initial images are used
-   if(this->inputReference==NULL ||
-         this->inputFloating==NULL ||
-         this->controlPointGrid==NULL ||
-         this->backwardControlPointGrid==NULL)
+   if(this->inputReference==nullptr ||
+         this->inputFloating==nullptr ||
+         this->controlPointGrid==nullptr ||
+         this->backwardControlPointGrid==nullptr)
    {
       reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
       reg_print_msg_error("The reference, floating and control point grid images have to be defined");
@@ -453,8 +449,8 @@ nifti_image **reg_f3d2<T>::GetWarpedImage()
    reg_f3d2<T>::currentReference = this->inputReference;
    reg_f3d2<T>::currentFloating = this->inputFloating;
    // No mask is used to perform the final resampling
-   reg_f3d2<T>::currentMask = NULL;
-   reg_f3d2<T>::currentFloatingMask = NULL;
+   reg_f3d2<T>::currentMask = nullptr;
+   reg_f3d2<T>::currentFloatingMask = nullptr;
 
    // Allocate the forward and backward warped images
    reg_f3d2<T>::AllocateWarped();
@@ -495,4 +491,3 @@ nifti_image **reg_f3d2<T>::GetWarpedImage()
 /* *************************************************************** */
 /* *************************************************************** */
 template class reg_f3d2<float>;
-#endif
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index c2aa7a26..8e86bcb1 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -10,11 +10,9 @@
  *
  */
 
-#include "_reg_f3d_sym.h"
-
-#ifndef _REG_F3D2_H
-#define _REG_F3D2_H
+#pragma once
 
+#include "_reg_f3d_sym.h"
 
 /// @brief Fast Free Form Diffeomorphic Deformation registration class
 template <class T>
@@ -41,5 +39,3 @@ class reg_f3d2 : public reg_f3d_sym<T>
    virtual void Initialise();
    virtual nifti_image **GetWarpedImage();
 };
-
-#endif
diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp
index 7e247eaa..3874e12b 100644
--- a/reg-lib/_reg_f3d_sym.cpp
+++ b/reg-lib/_reg_f3d_sym.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_F3D_SYM_CPP
-#define _REG_F3D_SYM_CPP
-
 #include "_reg_f3d_sym.h"
 
 /* *************************************************************** */
@@ -23,22 +20,22 @@ reg_f3d_sym<T>::reg_f3d_sym(int refTimePoint,int floTimePoint)
 {
    this->executableName=(char *)"NiftyReg F3D SYM";
 
-   this->backwardControlPointGrid=NULL;
-   this->backwardWarped=NULL;
-   this->backwardWarpedGradientImage=NULL;
-   this->backwardDeformationFieldImage=NULL;
-   this->backwardVoxelBasedMeasureGradientImage=NULL;
-   this->backwardTransformationGradient=NULL;
+   this->backwardControlPointGrid=nullptr;
+   this->backwardWarped=nullptr;
+   this->backwardWarpedGradientImage=nullptr;
+   this->backwardDeformationFieldImage=nullptr;
+   this->backwardVoxelBasedMeasureGradientImage=nullptr;
+   this->backwardTransformationGradient=nullptr;
 
-   this->backwardProbaJointHistogram=NULL;
-   this->backwardLogJointHistogram=NULL;
+   this->backwardProbaJointHistogram=nullptr;
+   this->backwardLogJointHistogram=nullptr;
 
-   this->floatingMaskImage=NULL;
-   this->currentFloatingMask=NULL;
-   this->floatingMaskPyramid=NULL;
-   this->backwardActiveVoxelNumber=NULL;
+   this->floatingMaskImage=nullptr;
+   this->currentFloatingMask=nullptr;
+   this->floatingMaskPyramid=nullptr;
+   this->backwardActiveVoxelNumber=nullptr;
 
-   this->backwardJacobianMatrix=NULL;
+   this->backwardJacobianMatrix=nullptr;
 
    this->inverseConsistencyWeight=0.1;
 
@@ -51,41 +48,41 @@ reg_f3d_sym<T>::reg_f3d_sym(int refTimePoint,int floTimePoint)
 template <class T>
 reg_f3d_sym<T>::~reg_f3d_sym()
 {
-   if(this->backwardControlPointGrid!=NULL)
+   if(this->backwardControlPointGrid!=nullptr)
    {
       nifti_image_free(this->backwardControlPointGrid);
-      this->backwardControlPointGrid=NULL;
+      this->backwardControlPointGrid=nullptr;
    }
 
-   if(this->floatingMaskPyramid!=NULL)
+   if(this->floatingMaskPyramid!=nullptr)
    {
       if(this->usePyramid)
       {
          for(unsigned int i=0; i<this->levelToPerform; i++)
          {
-            if(this->floatingMaskPyramid[i]!=NULL)
+            if(this->floatingMaskPyramid[i]!=nullptr)
             {
                free(this->floatingMaskPyramid[i]);
-               this->floatingMaskPyramid[i]=NULL;
+               this->floatingMaskPyramid[i]=nullptr;
             }
          }
       }
       else
       {
-         if(this->floatingMaskPyramid[0]!=NULL)
+         if(this->floatingMaskPyramid[0]!=nullptr)
          {
             free(this->floatingMaskPyramid[0]);
-            this->floatingMaskPyramid[0]=NULL;
+            this->floatingMaskPyramid[0]=nullptr;
          }
       }
       free(this->floatingMaskPyramid);
-      floatingMaskPyramid=NULL;
+      floatingMaskPyramid=nullptr;
    }
 
-   if(this->backwardActiveVoxelNumber!=NULL)
+   if(this->backwardActiveVoxelNumber!=nullptr)
    {
       free(this->backwardActiveVoxelNumber);
-      this->backwardActiveVoxelNumber=NULL;
+      this->backwardActiveVoxelNumber=nullptr;
    }
 
 #ifndef NDEBUG
@@ -180,7 +177,7 @@ void reg_f3d_sym<T>::AllocateWarped()
    this->ClearWarped();
 
    reg_f3d<T>::AllocateWarped();
-   if(this->currentFloating==NULL)
+   if(this->currentFloating==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateWarped()");
       reg_print_msg_error("The floating image is not defined");
@@ -208,10 +205,10 @@ template <class T>
 void reg_f3d_sym<T>::ClearWarped()
 {
    reg_f3d<T>::ClearWarped();
-   if(this->backwardWarped!=NULL)
+   if(this->backwardWarped!=nullptr)
    {
       nifti_image_free(this->backwardWarped);
-      this->backwardWarped=NULL;
+      this->backwardWarped=nullptr;
    }
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::ClearWarped");
@@ -226,13 +223,13 @@ void reg_f3d_sym<T>::AllocateDeformationField()
    this->ClearDeformationField();
 
    reg_f3d<T>::AllocateDeformationField();
-   if(this->currentFloating==NULL)
+   if(this->currentFloating==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateDeformationField()");
       reg_print_msg_error("The floating image is not defined");
       reg_exit();
    }
-   if(this->backwardControlPointGrid==NULL)
+   if(this->backwardControlPointGrid==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateDeformationField()");
       reg_print_msg_error("The backward control point image is not defined");
@@ -270,7 +267,7 @@ void reg_f3d_sym<T>::AllocateDeformationField()
    this->backwardDeformationFieldImage->scl_slope=1.f;
    this->backwardDeformationFieldImage->scl_inter=0.f;
 
-   if(this->measure_dti!=NULL)
+   if(this->measure_dti!=nullptr)
       this->backwardJacobianMatrix=(mat33 *)malloc(
             this->backwardDeformationFieldImage->nx *
             this->backwardDeformationFieldImage->ny *
@@ -287,15 +284,15 @@ template <class T>
 void reg_f3d_sym<T>::ClearDeformationField()
 {
    reg_f3d<T>::ClearDeformationField();
-   if(this->backwardDeformationFieldImage!=NULL)
+   if(this->backwardDeformationFieldImage!=nullptr)
    {
       nifti_image_free(this->backwardDeformationFieldImage);
-      this->backwardDeformationFieldImage=NULL;
+      this->backwardDeformationFieldImage=nullptr;
    }
-   if(this->backwardJacobianMatrix!=NULL)
+   if(this->backwardJacobianMatrix!=nullptr)
    {
       free(this->backwardJacobianMatrix);
-      this->backwardJacobianMatrix=NULL;
+      this->backwardJacobianMatrix=nullptr;
    }
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::ClearDeformationField");
@@ -310,7 +307,7 @@ void reg_f3d_sym<T>::AllocateWarpedGradient()
    this->ClearWarpedGradient();
 
    reg_f3d<T>::AllocateWarpedGradient();
-   if(this->backwardDeformationFieldImage==NULL)
+   if(this->backwardDeformationFieldImage==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateWarpedGradient()");
       reg_print_msg_error("The backward control point image is not defined");
@@ -329,10 +326,10 @@ template <class T>
 void reg_f3d_sym<T>::ClearWarpedGradient()
 {
    reg_f3d<T>::ClearWarpedGradient();
-   if(this->backwardWarpedGradientImage!=NULL)
+   if(this->backwardWarpedGradientImage!=nullptr)
    {
       nifti_image_free(this->backwardWarpedGradientImage);
-      this->backwardWarpedGradientImage=NULL;
+      this->backwardWarpedGradientImage=nullptr;
    }
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::ClearWarpedGradient");
@@ -347,7 +344,7 @@ void reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()
    this->ClearVoxelBasedMeasureGradient();
 
    reg_f3d<T>::AllocateVoxelBasedMeasureGradient();
-   if(this->backwardDeformationFieldImage==NULL)
+   if(this->backwardDeformationFieldImage==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()");
       reg_print_msg_error("The backward control point image is not defined");
@@ -367,10 +364,10 @@ template <class T>
 void reg_f3d_sym<T>::ClearVoxelBasedMeasureGradient()
 {
    reg_f3d<T>::ClearVoxelBasedMeasureGradient();
-   if(this->backwardVoxelBasedMeasureGradientImage!=NULL)
+   if(this->backwardVoxelBasedMeasureGradientImage!=nullptr)
    {
       nifti_image_free(this->backwardVoxelBasedMeasureGradientImage);
-      this->backwardVoxelBasedMeasureGradientImage=NULL;
+      this->backwardVoxelBasedMeasureGradientImage=nullptr;
    }
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::ClearVoxelBasedMeasureGradient");
@@ -385,7 +382,7 @@ void reg_f3d_sym<T>::AllocateTransformationGradient()
    this->ClearTransformationGradient();
 
    reg_f3d<T>::AllocateTransformationGradient();
-   if(this->backwardControlPointGrid==NULL)
+   if(this->backwardControlPointGrid==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateTransformationGradient()");
       reg_print_msg_error("The backward control point image is not defined");
@@ -405,9 +402,9 @@ template <class T>
 void reg_f3d_sym<T>::ClearTransformationGradient()
 {
    reg_f3d<T>::ClearTransformationGradient();
-   if(this->backwardTransformationGradient!=NULL)
+   if(this->backwardTransformationGradient!=nullptr)
       nifti_image_free(this->backwardTransformationGradient);
-   this->backwardTransformationGradient=NULL;
+   this->backwardTransformationGradient=nullptr;
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::ClearTransformationGradient");
 #endif
@@ -422,7 +419,7 @@ void reg_f3d_sym<T>::CheckParameters()
    reg_f3d<T>::CheckParameters();
 
    // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED
-   if(this->floatingMaskImage!=NULL)
+   if(this->floatingMaskImage!=nullptr)
    {
       if(this->inputFloating->nx != this->floatingMaskImage->nx ||
             this->inputFloating->ny != this->floatingMaskImage->ny ||
@@ -464,7 +461,7 @@ void reg_f3d_sym<T>::Initialise()
 {
    reg_f3d<T>::Initialise();
 
-   if(this->inputControlPointGrid==NULL){
+   if(this->inputControlPointGrid==nullptr){
       // Define the spacing for the first level
       float gridSpacing[3] = {this->spacing[0],this->spacing[1],this->spacing[2]};
       if(this->spacing[0]<0)
@@ -533,7 +530,7 @@ void reg_f3d_sym<T>::Initialise()
 
    if(this->usePyramid)
    {
-      if (this->floatingMaskImage!=NULL)
+      if (this->floatingMaskImage!=nullptr)
          reg_createMaskPyramid<T>(this->floatingMaskImage,
                                   this->floatingMaskPyramid,
                                   this->levelNumber,
@@ -550,7 +547,7 @@ void reg_f3d_sym<T>::Initialise()
    }
    else  // no pyramid
    {
-      if (this->floatingMaskImage!=NULL)
+      if (this->floatingMaskImage!=nullptr)
          reg_createMaskPyramid<T>(this->floatingMaskImage, this->floatingMaskPyramid, 1, 1, this->backwardActiveVoxelNumber);
       else
       {
@@ -609,7 +606,7 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
    this->GetDeformationField();
 
    // Resample the floating image
-   if(this->measure_dti==NULL)
+   if(this->measure_dti==nullptr)
    {
       reg_resampleImage(this->currentFloating,
                         this->warped,
@@ -634,7 +631,7 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
    }
 
    // Resample the reference image
-   if(this->measure_dti==NULL)
+   if(this->measure_dti==nullptr)
    {
       reg_resampleImage(this->currentReference, // input image
                         this->backwardWarped, // warped input image
@@ -801,7 +798,7 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
                                   this->backwardVoxelBasedMeasureGradientImage,
                                   0.f);
    // The intensity gradient is first computed
-   //    if(this->measure_dti!=NULL){
+   //    if(this->measure_dti!=nullptr){
    //        reg_getImageGradient(this->currentFloating,
    //                             this->warImgGradient,
    //                             this->deformationFieldImage,
@@ -821,7 +818,7 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
    //                             this->measure_dti->GetActiveTimepoints(),
    //                             this->backwardJacobianMatrix,
    //                             this->backwardWarped);
-   //   if(this->measure_dti!=NULL)
+   //   if(this->measure_dti!=nullptr)
    //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
    //    }
    //    else{
@@ -846,22 +843,22 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
                            t);
 
       // The gradient of the various measures of similarity are computed
-      if(this->measure_nmi!=NULL)
+      if(this->measure_nmi!=nullptr)
          this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
 
-      if(this->measure_ssd!=NULL)
+      if(this->measure_ssd!=nullptr)
          this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
 
-      if(this->measure_kld!=NULL)
+      if(this->measure_kld!=nullptr)
          this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
 
-      if(this->measure_lncc!=NULL)
+      if(this->measure_lncc!=nullptr)
          this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
 
-      if(this->measure_mind!=NULL)
+      if(this->measure_mind!=nullptr)
          this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
 
-      if(this->measure_mindssc!=NULL)
+      if(this->measure_mindssc!=nullptr)
          this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
    } // timepoint
 
@@ -885,8 +882,8 @@ void reg_f3d_sym<T>::GetSimilarityMeasureGradient()
    reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // mask
-                               NULL, // all volumes are active
+                               nullptr, // mask
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the y axis
@@ -896,8 +893,8 @@ void reg_f3d_sym<T>::GetSimilarityMeasureGradient()
    reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // mask
-                               NULL, // all volumes are active
+                               nullptr, // mask
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the z axis if required
@@ -909,8 +906,8 @@ void reg_f3d_sym<T>::GetSimilarityMeasureGradient()
       reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
                                   currentNodeSpacing,
                                   CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  NULL, // mask
-                                  NULL, // all volumes are active
+                                  nullptr, // mask
+                                  nullptr, // all volumes are active
                                   activeAxis
                                   );
    }
@@ -1370,7 +1367,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
    reg_tools_kernelConvolution(this->deformationFieldImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // all volumes are active
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the y axis
@@ -1380,7 +1377,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
    reg_tools_kernelConvolution(this->deformationFieldImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // all volumes are active
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the z axis if required
@@ -1392,7 +1389,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
       reg_tools_kernelConvolution(this->deformationFieldImage,
                                   currentNodeSpacing,
                                   CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  NULL, // all volumes are active
+                                  nullptr, // all volumes are active
                                   activeAxis
                                   );
    }
@@ -1401,7 +1398,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
                                 this->deformationFieldImage,
                                 2.f * this->inverseConsistencyWeight,
                                 true, // update the current value
-                                NULL // no voxel to mm conversion
+                                nullptr // no voxel to mm conversion
                                 );
 
    // We convolve the inverse consistency map with a cubic B-Spline kernel
@@ -1413,7 +1410,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
    reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // all volumes are active
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the y axis
@@ -1423,7 +1420,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
    reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
                                currentNodeSpacing,
                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               NULL, // all volumes are active
+                               nullptr, // all volumes are active
                                activeAxis
                                );
    // Convolution along the z axis if required
@@ -1435,7 +1432,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
       reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
                                   currentNodeSpacing,
                                   CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  NULL, // all volumes are active
+                                  nullptr, // all volumes are active
                                   activeAxis
                                   );
    }
@@ -1444,7 +1441,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
                                 this->backwardDeformationFieldImage,
                                 2.f * this->inverseConsistencyWeight,
                                 true, // update the current value
-                                NULL // no voxel to mm conversion
+                                nullptr // no voxel to mm conversion
                                 );
 
 #ifndef NDEBUG
@@ -1527,7 +1524,7 @@ void reg_f3d_sym<T>::SetOptimiser()
                                this->optimiseX,
                                this->optimiseY,
                                this->optimiseZ,
-                               this->maxiterationNumber,
+                               this->maxIterationNumber,
                                0, // currentIterationNumber
                                this,
                                static_cast<T *>(this->controlPointGrid->data),
@@ -1639,19 +1636,19 @@ template<class T>
 void reg_f3d_sym<T>::InitialiseSimilarity()
 {
    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-   if(this->measure_nmi==NULL &&
-         this->measure_ssd==NULL &&
-         this->measure_dti==NULL &&
-         this->measure_lncc==NULL &&
-         this->measure_kld==NULL &&
-         this->measure_mind==NULL &&
-         this->measure_mindssc==NULL)
+   if(this->measure_nmi==nullptr &&
+         this->measure_ssd==nullptr &&
+         this->measure_dti==nullptr &&
+         this->measure_lncc==nullptr &&
+         this->measure_kld==nullptr &&
+         this->measure_mind==nullptr &&
+         this->measure_mindssc==nullptr)
    {
       this->measure_nmi=new reg_nmi;
       for(int i=0; i<this->inputReference->nt; ++i)
          this->measure_nmi->SetTimepointWeight(i,1.0);
    }
-   if(this->measure_nmi!=NULL)
+   if(this->measure_nmi!=nullptr)
       this->measure_nmi->InitialiseMeasure(this->currentReference,
                                            this->currentFloating,
                                            this->currentMask,
@@ -1665,7 +1662,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
-   if(this->measure_ssd!=NULL)
+   if(this->measure_ssd!=nullptr)
       this->measure_ssd->InitialiseMeasure(this->currentReference,
                                            this->currentFloating,
                                            this->currentMask,
@@ -1679,7 +1676,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
-   if(this->measure_kld!=NULL)
+   if(this->measure_kld!=nullptr)
       this->measure_kld->InitialiseMeasure(this->currentReference,
                                            this->currentFloating,
                                            this->currentMask,
@@ -1693,7 +1690,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
-   if(this->measure_lncc!=NULL)
+   if(this->measure_lncc!=nullptr)
       this->measure_lncc->InitialiseMeasure(this->currentReference,
                                             this->currentFloating,
                                             this->currentMask,
@@ -1707,7 +1704,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                             this->backwardVoxelBasedMeasureGradientImage
                                             );
 
-   if(this->measure_dti!=NULL)
+   if(this->measure_dti!=nullptr)
       this->measure_dti->InitialiseMeasure(this->currentReference,
                                            this->currentFloating,
                                            this->currentMask,
@@ -1721,7 +1718,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
-   if(this->measure_mind!=NULL)
+   if(this->measure_mind!=nullptr)
       this->measure_mind->InitialiseMeasure(this->currentReference,
                                             this->currentFloating,
                                             this->currentMask,
@@ -1735,7 +1732,7 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
                                             this->backwardVoxelBasedMeasureGradientImage
                                             );
 
-   if(this->measure_mindssc!=NULL)
+   if(this->measure_mindssc!=nullptr)
       this->measure_mindssc->InitialiseMeasure(this->currentReference,
                                                this->currentFloating,
                                                this->currentMask,
@@ -1759,10 +1756,10 @@ template<class T>
 nifti_image **reg_f3d_sym<T>::GetWarpedImage()
 {
    // The initial images are used
-   if(this->inputReference==NULL ||
-         this->inputFloating==NULL ||
-         this->controlPointGrid==NULL ||
-         this->backwardControlPointGrid==NULL)
+   if(this->inputReference==nullptr ||
+         this->inputFloating==nullptr ||
+         this->controlPointGrid==nullptr ||
+         this->backwardControlPointGrid==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::GetWarpedImage()");
       reg_print_msg_error("The reference, floating and both control point grid images have to be defined");
@@ -1771,8 +1768,8 @@ nifti_image **reg_f3d_sym<T>::GetWarpedImage()
 
    reg_f3d_sym<T>::currentReference = this->inputReference;
    reg_f3d_sym<T>::currentFloating = this->inputFloating;
-   reg_f3d_sym<T>::currentMask = NULL;
-   reg_f3d_sym<T>::currentFloatingMask = NULL;
+   reg_f3d_sym<T>::currentMask = nullptr;
+   reg_f3d_sym<T>::currentFloatingMask = nullptr;
 
    reg_f3d_sym<T>::AllocateWarped();
    reg_f3d_sym<T>::AllocateDeformationField();
@@ -1825,4 +1822,3 @@ nifti_image * reg_f3d_sym<T>::GetBackwardControlPointPositionImage()
 /* *************************************************************** */
 /* *************************************************************** */
 template class reg_f3d_sym<float>;
-#endif
diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h
index 7a6cefb5..691bb966 100644
--- a/reg-lib/_reg_f3d_sym.h
+++ b/reg-lib/_reg_f3d_sym.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_F3D_SYM_H
-#define _REG_F3D_SYM_H
+#pragma once
 
 #include "_reg_f3d.h"
 
@@ -103,5 +102,3 @@ class reg_f3d_sym : public reg_f3d<T>
       return true;
    }
 };
-
-#endif
diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp
index 0316b8ea..9c059c0b 100644
--- a/reg-lib/_reg_polyAffine.cpp
+++ b/reg-lib/_reg_polyAffine.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_POLYAFFINE_CPP
-#define _REG_POLYAFFINE_CPP
-
 #include "_reg_polyAffine.h"
 
 /* *************************************************************** */
@@ -137,5 +134,3 @@ void reg_polyAffine<T>::ClearTransformationGradient()
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif // _REG_POLYAFFINE_CPP
diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h
index 11858866..661fa050 100644
--- a/reg-lib/_reg_polyAffine.h
+++ b/reg-lib/_reg_polyAffine.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_POLYAFFINE_H
-#define _REG_POLYAFFINE_H
+#pragma once
 
 #include "_reg_base.h"
 
@@ -40,5 +39,3 @@ class reg_polyAffine : public reg_base<T>
 };
 
 #include "_reg_polyAffine.cpp"
-
-#endif // _REG_POLYAFFINE_H
diff --git a/reg-lib/cl/CLAladinContent.cpp b/reg-lib/cl/CLAladinContent.cpp
deleted file mode 100755
index 492069da..00000000
--- a/reg-lib/cl/CLAladinContent.cpp
+++ /dev/null
@@ -1,453 +0,0 @@
-#include "CLAladinContent.h"
-#include "_reg_tools.h"
-
-/* *************************************************************** */
-ClAladinContent::ClAladinContent()
-{
-	initVars();
-	allocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn,
-											nifti_image *CurrentFloatingIn,
-											int *CurrentReferenceMaskIn,
-											size_t byte,
-											const unsigned int blockPercentage,
-											const unsigned int inlierLts,
-											int blockStep ) :
-	AladinContent(CurrentReferenceIn,
-					  CurrentFloatingIn,
-					  CurrentReferenceMaskIn,
-					  byte, blockPercentage,
-					  inlierLts,
-					  blockStep)
-{
-	initVars();
-	allocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn,
-											nifti_image *CurrentFloatingIn,
-											int *CurrentReferenceMaskIn,
-											size_t byte) :
-	AladinContent(CurrentReferenceIn,
-					  CurrentFloatingIn,
-					  CurrentReferenceMaskIn,
-					  byte)
-{
-	initVars();
-	allocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn,
-											nifti_image *CurrentFloatingIn,
-											int *CurrentReferenceMaskIn,
-											mat44 *transMat,
-											size_t byte,
-											const unsigned int blockPercentage,
-											const unsigned int inlierLts,
-											int blockStep) :
-	AladinContent(CurrentReferenceIn,
-					  CurrentFloatingIn,
-					  CurrentReferenceMaskIn,
-					  transMat,
-					  byte,
-					  blockPercentage,
-					  inlierLts,
-					  blockStep)
-{
-	initVars();
-	allocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn,
-											nifti_image *CurrentFloatingIn,
-											int *CurrentReferenceMaskIn,
-											mat44 *transMat,
-											size_t byte) :
-	AladinContent(CurrentReferenceIn,
-					  CurrentFloatingIn,
-					  CurrentReferenceMaskIn,
-					  transMat,
-					  byte)
-{
-	initVars();
-	allocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::~ClAladinContent()
-{
-	freeClPtrs();
-}
-/* *************************************************************** */
-void ClAladinContent::initVars()
-{
-
-	this->referenceImageClmem = 0;
-	this->floatingImageClmem = 0;
-	this->warpedImageClmem = 0;
-	this->deformationFieldClmem = 0;
-	this->referencePositionClmem = 0;
-	this->warpedPositionClmem = 0;
-	this->totalBlockClmem = 0;
-	this->maskClmem = 0;
-
-	if (this->CurrentReference != NULL && this->CurrentReference->nbyper != NIFTI_TYPE_FLOAT32)
-		reg_tools_changeDatatype<float>(this->CurrentReference);
-	if (this->CurrentFloating != NULL && this->CurrentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-		reg_tools_changeDatatype<float>(this->CurrentFloating);
-		if (this->CurrentWarped != NULL)
-			reg_tools_changeDatatype<float>(this->CurrentWarped);
-	}
-	this->sContext = &CLContextSingletton::Instance();
-	this->clContext = this->sContext->getContext();
-	this->commandQueue = this->sContext->getCommandQueue();
-	this->referenceVoxels = (this->CurrentReference != NULL) ? this->CurrentReference->nvox : 0;
-	this->floatingVoxels = (this->CurrentFloating != NULL) ? this->CurrentFloating->nvox : 0;
-	//this->numBlocks = (this->blockMatchingParams != NULL) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0;
-}
-/* *************************************************************** */
-void ClAladinContent::allocateClPtrs()
-{
-
-	if (this->CurrentWarped != NULL)
-	{
-		this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->CurrentWarped->nvox * sizeof(float), this->CurrentWarped->data, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (warpedImageClmem): ");
-	}
-	if (this->CurrentDeformationField != NULL)
-	{
-		this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->CurrentDeformationField->nvox, this->CurrentDeformationField->data, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (deformationFieldClmem): ");
-	}
-	if (this->CurrentFloating != NULL)
-	{
-		this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->CurrentFloating->nvox, this->CurrentFloating->data, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (CurrentFloating): ");
-
-		float *sourceIJKMatrix_h = (float*) malloc(16 * sizeof(float));
-		mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
-		this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-													  16 * sizeof(float),
-													  sourceIJKMatrix_h, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (floMatClmem): ");
-		free(sourceIJKMatrix_h);
-	}
-	if (this->CurrentReference != NULL)
-	{
-		this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-																 sizeof(float) * this->CurrentReference->nvox,
-																 this->CurrentReference->data, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (referenceImageClmem): ");
-
-		float* targetMat = (float *) malloc(16 * sizeof(float)); //freed
-		mat44ToCptr(this->refMatrix_xyz, targetMat);
-		this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-													  16 * sizeof(float),
-													  targetMat, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (refMatClmem): ");
-		free(targetMat);
-	}
-	if (this->blockMatchingParams != NULL) {
-		if (this->blockMatchingParams->referencePosition != NULL) {
-			//targetPositionClmem
-			this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-																		 this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
-																		 this->blockMatchingParams->referencePosition, &this->errNum);
-			this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (referencePositionClmem): ");
-		}
-		if (this->blockMatchingParams->warpedPosition != NULL) {
-			//resultPositionClmem
-			this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-																	 this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
-																	 this->blockMatchingParams->warpedPosition, &this->errNum);
-			this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (warpedPositionClmem): ");
-		}
-		if (this->blockMatchingParams->totalBlock != NULL) {
-			//totalBlockClmem
-			this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-																this->blockMatchingParams->totalBlockNumber * sizeof(int),
-																this->blockMatchingParams->totalBlock, &this->errNum);
-			this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (activeBlockClmem): ");
-		}
-	}
-	if (this->CurrentReferenceMask != NULL && this->CurrentReference != NULL) {
-		this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-													this->CurrentReference->nx * this->CurrentReference->ny * this->CurrentReference->nz * sizeof(int),
-													this->CurrentReferenceMask, &this->errNum);
-		this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (clCreateBuffer): ");
-	}
-}
-/* *************************************************************** */
-nifti_image *ClAladinContent::getCurrentWarped(int datatype)
-{
-	downloadImage(this->CurrentWarped, this->warpedImageClmem, datatype);
-	return this->CurrentWarped;
-}
-/* *************************************************************** */
-nifti_image *ClAladinContent::getCurrentDeformationField()
-{
-	this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->CurrentDeformationField->nvox * sizeof(float), this->CurrentDeformationField->data, 0, NULL, NULL); //CLCONTEXT
-	this->sContext->checkErrNum(errNum, "Get: failed CurrentDeformationField: ");
-	return this->CurrentDeformationField;
-}
-/* *************************************************************** */
-_reg_blockMatchingParam* ClAladinContent::getBlockMatchingParams()
-{
-   this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, NULL, NULL); //CLCONTEXT
-   this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: ");
-   this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, NULL, NULL); //CLCONTEXT
-   this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: ");
-   return this->blockMatchingParams;
-}
-/* *************************************************************** */
-void ClAladinContent::setTransformationMatrix(mat44 *transformationMatrixIn)
-{
-   AladinContent::setTransformationMatrix(transformationMatrixIn);
-}
-/* *************************************************************** */
-void ClAladinContent::setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn)
-{
-	if (this->CurrentDeformationField != NULL)
-		clReleaseMemObject(this->deformationFieldClmem);
-
-	AladinContent::setCurrentDeformationField(CurrentDeformationFieldIn);
-	this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->CurrentDeformationField->nvox * sizeof(float), this->CurrentDeformationField->data, &this->errNum);
-	this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentDeformationField failed to allocate memory (deformationFieldClmem): ");
-}
-/* *************************************************************** */
-void ClAladinContent::setCurrentReferenceMask(int *maskIn, size_t nvox)
-{
-	if (this->CurrentReferenceMask != NULL)
-		clReleaseMemObject(maskClmem);
-	this->CurrentReferenceMask = maskIn;
-	this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->CurrentReferenceMask, &this->errNum);
-	this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentReferenceMask failed to allocate memory (maskClmem): ");
-}
-/* *************************************************************** */
-void ClAladinContent::setCurrentWarped(nifti_image *currentWarped)
-{
-	if (this->CurrentWarped != NULL) {
-		clReleaseMemObject(this->warpedImageClmem);
-	}
-	if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) {
-		reg_tools_changeDatatype<float>(currentWarped);
-	}
-	AladinContent::setCurrentWarped(currentWarped);
-	this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->CurrentWarped->nvox * sizeof(float), this->CurrentWarped->data, &this->errNum);
-	this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentWarped failed to allocate memory (warpedImageClmem): ");
-}
-/* *************************************************************** */
-void ClAladinContent::setBlockMatchingParams(_reg_blockMatchingParam* bmp) {
-
-   AladinContent::setBlockMatchingParams(bmp);
-   if (this->blockMatchingParams->referencePosition != NULL) {
-      clReleaseMemObject(this->referencePositionClmem);
-      //referencePositionClmem
-      this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum);
-      this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
-   }
-   if (this->blockMatchingParams->warpedPosition != NULL) {
-      clReleaseMemObject(this->warpedPositionClmem);
-      //warpedPositionClmem
-      this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum);
-      this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
-   }
-   if (this->blockMatchingParams->totalBlock != NULL) {
-      clReleaseMemObject(this->totalBlockClmem);
-      //totalBlockClmem
-      this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum);
-      this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
-   }
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getReferenceImageArrayClmem()
-{
-   return this->referenceImageClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getFloatingImageArrayClmem()
-{
-   return this->floatingImageClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getWarpedImageClmem()
-{
-   return this->warpedImageClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getReferencePositionClmem()
-{
-   return this->referencePositionClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getWarpedPositionClmem()
-{
-   return this->warpedPositionClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getDeformationFieldArrayClmem()
-{
-   return this->deformationFieldClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getTotalBlockClmem()
-{
-   return this->totalBlockClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getMaskClmem()
-{
-   return this->maskClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getRefMatClmem()
-{
-   return this->refMatClmem;
-}
-/* *************************************************************** */
-cl_mem ClAladinContent::getFloMatClmem()
-{
-   return this->floMatClmem;
-}
-/* *************************************************************** */
-int *ClAladinContent::getReferenceDims()
-{
-	return this->referenceDims;
-}
-/* *************************************************************** */
-int *ClAladinContent::getFloatingDims() {
-	return this->floatingDims;
-}
-/* *************************************************************** */
-template<class DataType>
-DataType ClAladinContent::fillWarpedImageData(float intensity, int datatype)
-{
-	switch (datatype) {
-	case NIFTI_TYPE_FLOAT32:
-		return static_cast<float>(intensity);
-		break;
-	case NIFTI_TYPE_FLOAT64:
-		return static_cast<double>(intensity);
-		break;
-	case NIFTI_TYPE_UINT8:
-		if(intensity!=intensity)
-			intensity=0;
-		intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-		return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
-		break;
-	case NIFTI_TYPE_UINT16:
-		if(intensity!=intensity)
-			intensity=0;
-		intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-		return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
-		break;
-	case NIFTI_TYPE_UINT32:
-		if(intensity!=intensity)
-			intensity=0;
-		intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-		return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
-		break;
-	default:
-		if(intensity!=intensity)
-			intensity=0;
-		return static_cast<DataType>(reg_round(intensity));
-		break;
-	}
-}
-/* *************************************************************** */
-template<class T>
-void ClAladinContent::fillImageData(nifti_image *image,
-												cl_mem memoryObject,
-												int type)
-{
-	size_t size = image->nvox;
-	float* buffer = NULL;
-	buffer = (float*) malloc(size * sizeof(float));
-	if (buffer == NULL) {
-		reg_print_fct_error("ClAladinContent::fillImageData");
-		reg_print_msg_error("Memory allocation did not complete successfully. Exit.");
-		reg_exit();
-	}
-
-	this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0,
-												  size * sizeof(float), buffer, 0, NULL, NULL);
-	this->sContext->checkErrNum(this->errNum, "Error reading warped buffer.");
-
-    free(image->data);
-    image->datatype = type;
-    image->nbyper = sizeof(T);
-    image->data = (void *)malloc(image->nvox*image->nbyper);
-    T* dataT = static_cast<T*>(image->data);
-    for (size_t i = 0; i < size; ++i)
-        dataT[i] = fillWarpedImageData<T>(buffer[i], type);
-    free(buffer);
-}
-/* *************************************************************** */
-void ClAladinContent::downloadImage(nifti_image *image,
-												cl_mem memoryObject,
-												int datatype)
-{
-	switch (datatype) {
-	case NIFTI_TYPE_FLOAT32:
-		fillImageData<float>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_FLOAT64:
-		fillImageData<double>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_UINT8:
-		fillImageData<unsigned char>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_INT8:
-		fillImageData<char>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_UINT16:
-		fillImageData<unsigned short>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_INT16:
-		fillImageData<short>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_UINT32:
-		fillImageData<unsigned int>(image, memoryObject, datatype);
-		break;
-	case NIFTI_TYPE_INT32:
-		fillImageData<int>(image, memoryObject, datatype);
-		break;
-	default:
-		reg_print_fct_error("ClAladinContent::downloadImage");
-		reg_print_msg_error("Unsupported type");
-		reg_exit();
-		break;
-	}
-}
-/* *************************************************************** */
-void ClAladinContent::freeClPtrs()
-{
-	if(this->CurrentReference != NULL)
-	{
-		clReleaseMemObject(this->referenceImageClmem);
-		clReleaseMemObject(this->refMatClmem);
-	}
-	if(this->CurrentFloating != NULL)
-	{
-		clReleaseMemObject(this->floatingImageClmem);
-		clReleaseMemObject(this->floMatClmem);
-	}
-	if(this->CurrentWarped != NULL)
-		clReleaseMemObject(this->warpedImageClmem);
-	if(this->CurrentDeformationField != NULL)
-		clReleaseMemObject(this->deformationFieldClmem);
-	if(this->CurrentReferenceMask != NULL)
-		clReleaseMemObject(this->maskClmem);
-	if(this->blockMatchingParams != NULL)
-	{
-		clReleaseMemObject(this->totalBlockClmem);
-		clReleaseMemObject(this->referencePositionClmem);
-		clReleaseMemObject(this->warpedPositionClmem);
-	}
-}
-/* *************************************************************** */
-bool ClAladinContent::isCurrentComputationDoubleCapable() {
-	 return this->sContext->getIsCardDoubleCapable();
-}
-/* *************************************************************** */
diff --git a/reg-lib/cl/CLAladinContent.h b/reg-lib/cl/CLAladinContent.h
deleted file mode 100755
index d34b1a49..00000000
--- a/reg-lib/cl/CLAladinContent.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef CLCONTENT_H_
-#define CLCONTENT_H_
-
-#include "AladinContent.h"
-#include "CLContextSingletton.h"
-
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-class ClAladinContent: public AladinContent {
-
-public:
-
-	//constructors
-	ClAladinContent();
-	ClAladinContent(nifti_image *CurrentReferenceIn,
-						 nifti_image *CurrentFloatingIn,
-						 int *CurrentReferenceMaskIn,
-						 size_t byte,
-						 const unsigned int blockPercentage,
-						 const unsigned int inlierLts,
-						 int blockStep);
-	ClAladinContent(nifti_image *CurrentReferenceIn,
-						 nifti_image *CurrentFloatingIn,
-						 int *CurrentReferenceMaskIn,
-						 size_t byte);
-	ClAladinContent(nifti_image *CurrentReferenceIn,
-						 nifti_image *CurrentFloatingIn,
-						 int *CurrentReferenceMaskIn,
-						 mat44 *transMat,
-						 size_t byte,
-						 const unsigned int blockPercentage,
-						 const unsigned int inlierLts,
-						 int blockStep);
-	ClAladinContent(nifti_image *CurrentReferenceIn,
-						 nifti_image *CurrentFloatingIn,
-						 int *CurrentReferenceMaskIn,
-						 mat44 *transMat,
-						 size_t byte);
-	~ClAladinContent();
-
-    bool isCurrentComputationDoubleCapable();
-
-	//opencl getters
-	cl_mem getReferenceImageArrayClmem();
-	cl_mem getFloatingImageArrayClmem();
-	cl_mem getWarpedImageClmem();
-	cl_mem getReferencePositionClmem();
-	cl_mem getWarpedPositionClmem();
-	cl_mem getDeformationFieldArrayClmem();
-	cl_mem getTotalBlockClmem();
-	cl_mem getMaskClmem();
-	cl_mem getRefMatClmem();
-	cl_mem getFloMatClmem();
-	int *getReferenceDims();
-	int *getFloatingDims();
-
-	//cpu getters with data downloaded from device
-	_reg_blockMatchingParam* getBlockMatchingParams();
-	nifti_image *getCurrentDeformationField();
-	nifti_image *getCurrentWarped(int typ);
-
-	//setters
-	void setTransformationMatrix(mat44 *transformationMatrixIn);
-	void setCurrentWarped(nifti_image *warpedImageIn);
-	void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn);
-	void setCurrentReferenceMask(int *maskIn, size_t size);
-	void setBlockMatchingParams(_reg_blockMatchingParam* bmp);
-
-
-private:
-	void initVars();
-
-	void uploadContext();
-	void allocateClPtrs();
-	void freeClPtrs();
-
-	CLContextSingletton *sContext;
-	cl_context clContext;
-	cl_int errNum;
-	cl_command_queue commandQueue;
-
-	cl_mem referenceImageClmem;
-	cl_mem floatingImageClmem;
-	cl_mem warpedImageClmem;
-	cl_mem deformationFieldClmem;
-	cl_mem referencePositionClmem;
-	cl_mem warpedPositionClmem;
-	cl_mem totalBlockClmem;
-	cl_mem maskClmem;
-	cl_mem refMatClmem;
-	cl_mem floMatClmem;
-
-	int referenceDims[4];
-	int floatingDims[4];
-
-	unsigned int nVoxels;
-
-	void downloadImage(nifti_image *image,
-							 cl_mem memoryObject,
-							 int datatype);
-	template<class T>
-	void fillImageData(nifti_image *image,
-							 cl_mem memoryObject,
-							 int type);
-	template<class T>
-	T fillWarpedImageData(float intensity,
-								 int datatype);
-
-};
-
-#endif //CLCONTENT_H_
diff --git a/reg-lib/cl/CLConvolutionKernel.h b/reg-lib/cl/CLConvolutionKernel.h
deleted file mode 100644
index 0bffff58..00000000
--- a/reg-lib/cl/CLConvolutionKernel.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef CLCONVOLUTIONKERNEL_H
-#define CLCONVOLUTIONKERNEL_H
-
-#include "ConvolutionKernel.h"
-#include "CLContextSingletton.h"
-
-class CLConvolutionKernel : public ConvolutionKernel
-{
-    public:
-       CLConvolutionKernel(std::string name);
-       ~CLConvolutionKernel();
-       void calculate(nifti_image * image, float *sigma, int kernelType, int *mask = NULL, bool * timePoints = NULL, bool * axis = NULL);
-    private:
-       CLContextSingletton * sContext;
-};
-
-#endif // CLCONVOLUTIONKERNEL_H
diff --git a/reg-lib/cl/CLKernelFactory.cpp b/reg-lib/cl/CLKernelFactory.cpp
deleted file mode 100755
index 0e060e92..00000000
--- a/reg-lib/cl/CLKernelFactory.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "CLKernelFactory.h"
-#include "CLAffineDeformationFieldKernel.h"
-#include "CLConvolutionKernel.h"
-#include "CLBlockMatchingKernel.h"
-#include "CLResampleImageKernel.h"
-#include "CLOptimiseKernel.h"
-#include "AladinContent.h"
-
-Kernel *CLKernelFactory::produceKernel(std::string name, AladinContent *con) const {
-
-	if( name == AffineDeformationFieldKernel::getName() ) return new CLAffineDeformationFieldKernel(con, name);
-	else if( name == ConvolutionKernel::getName() ) return new CLConvolutionKernel(name);
-	else if (name == BlockMatchingKernel::getName()) return new CLBlockMatchingKernel(con, name);
-	else if( name == ResampleImageKernel::getName() ) return new CLResampleImageKernel(con, name);
-	else if( name == OptimiseKernel::getName() ) return new CLOptimiseKernel(con, name);
-	else return NULL;
-}
diff --git a/reg-lib/cl/CLKernelFactory.h b/reg-lib/cl/CLKernelFactory.h
deleted file mode 100755
index b40e7399..00000000
--- a/reg-lib/cl/CLKernelFactory.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef CLPKERNELFACTORY_H
-#define CLPKERNELFACTORY_H
-
-#include "KernelFactory.h"
-#include "AladinContent.h"
-
-class CLKernelFactory : public KernelFactory
-{
-public:
-   Kernel *produceKernel(std::string name, AladinContent *con) const;
-};
-
-#endif
diff --git a/reg-lib/cl/CLOptimiseKernel.h b/reg-lib/cl/CLOptimiseKernel.h
deleted file mode 100644
index 5201ce64..00000000
--- a/reg-lib/cl/CLOptimiseKernel.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef CLOPTIMISEKERNEL_H
-#define CLOPTIMISEKERNEL_H
-
-#include "OptimiseKernel.h"
-#include "CLAladinContent.h"
-
-class CLOptimiseKernel : public OptimiseKernel
-{
-    public:
-
-       CLOptimiseKernel(AladinContent * con, std::string name);
-       ~CLOptimiseKernel();
-       void calculate(bool affine);
-    private:
-       _reg_blockMatchingParam * blockMatchingParams;
-       mat44 *transformationMatrix;
-       CLContextSingletton *sContext;
-       ClAladinContent  *con;
-};
-
-#endif // CLOPTIMISEKERNEL_H
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index 0f46b947..b0589955 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -21,14 +21,14 @@ include_directories(${OpenCL_INCLUDE_DIRS})
 # Build the _reg_opencl_kernels library
 set(NAME _reg_opencl_kernels)
 add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
-  CLContextSingletton.cpp
+  ClContextSingleton.cpp
   CLAladinContent.cpp
-  CLKernelFactory.cpp
-  CLAffineDeformationFieldKernel.cpp
-  CLBlockMatchingKernel.cpp
-  CLConvolutionKernel.cpp
-  CLOptimiseKernel.cpp
-  CLResampleImageKernel.cpp
+  ClKernelFactory.cpp
+  ClAffineDeformationFieldKernel.cpp
+  ClBlockMatchingKernel.cpp
+  ClConvolutionKernel.cpp
+  ClOptimiseKernel.cpp
+  ClResampleImageKernel.cpp
   ../AladinContent.cpp
   ../Platform.cpp
 )
@@ -40,17 +40,17 @@ install(TARGETS ${NAME}
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
-install(FILES CLContextSingletton.h CLAladinContent.h CLKernelFactory.h
-        CLAffineDeformationFieldKernel.h
-        CLBlockMatchingKernel.h
-        CLConvolutionKernel.h
-        CLOptimiseKernel.h
-        CLResampleImageKernel.h
+install(FILES ClContextSingleton.h CLAladinContent.h ClKernelFactory.h
+        ClAffineDeformationFieldKernel.h
+        ClBlockMatchingKernel.h
+        ClConvolutionKernel.h
+        ClOptimiseKernel.h
+        ClResampleImageKernel.h
         DESTINATION include/cl)
 install(FILES resampleKernel.cl affineDeformationKernel.cl blockMatchingKernel.cl DESTINATION include/cl)
 #-----------------------------------------------------------------------------
 set(NAME _reg_openclinfo)
-add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h CLContextSingletton.cpp)
+add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp)
 target_link_libraries(${NAME} ${OpenCL_LIBRARIES})
 install(TARGETS ${NAME}
   RUNTIME DESTINATION lib
diff --git a/reg-lib/cl/CLAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
similarity index 80%
rename from reg-lib/cl/CLAffineDeformationFieldKernel.cpp
rename to reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index f5656cb2..20449a55 100644
--- a/reg-lib/cl/CLAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -1,9 +1,9 @@
-#include "CLAffineDeformationFieldKernel.h"
+#include "ClAffineDeformationFieldKernel.h"
 #include "config.h"
 
 #include "_reg_tools.h"
 
-CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
+ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
     AffineDeformationFieldKernel(nameIn) {
     //populate the CLAladinContent object ptr
     con = static_cast<ClAladinContent*>(conIn);
@@ -11,18 +11,18 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co
     //path to kernel files
     const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
     const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
-    
+
     std::string clInstallPath;
     std::string clSrcPath;
     //src dir
-    if (niftyreg_src_dir != NULL){
+    if (niftyreg_src_dir != nullptr){
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
         clSrcPath = opencl_kernel_path;
     }
     else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
-    if(niftyreg_install_dir!=NULL){
+    if(niftyreg_install_dir!=nullptr){
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
         clInstallPath = opencl_kernel_path;
@@ -40,14 +40,14 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co
     }
 
     //get opencl context params
-    sContext = &CLContextSingletton::Instance();
-    clContext = sContext->getContext();
-    commandQueue = sContext->getCommandQueue();
+    sContext = &ClContextSingleton::Instance();
+    clContext = sContext->GetContext();
+    commandQueue = sContext->GetCommandQueue();
     program = sContext->CreateProgram(clKernelPath.c_str());
 
     //get cpu ptrs
-    this->deformationFieldImage = con->AladinContent::getCurrentDeformationField();
-    this->affineTransformation = con->AladinContent::getTransformationMatrix();
+    this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
+    this->affineTransformation = con->AladinContent::GetTransformationMatrix();
     this->ReferenceMatrix = (this->deformationFieldImage->sform_code > 0) ? &(this->deformationFieldImage->sto_xyz) : &(this->deformationFieldImage->qto_xyz);
 
     cl_int errNum;
@@ -55,11 +55,11 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co
     if(this->deformationFieldImage->nz>1)
         kernel = clCreateKernel(program, "affineKernel3D", &errNum);
     else kernel = clCreateKernel(program, "affineKernel2D", &errNum);
-    sContext->checkErrNum(errNum, "Error setting kernel CLAffineDeformationFieldKernel.");
+    sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel.");
 
     //get cl ptrs
-    clDeformationField = con->getDeformationFieldArrayClmem();
-    clMask = con->getMaskClmem();
+    clDeformationField = con->GetDeformationFieldArrayClmem();
+    clMask = con->GetMaskClmem();
 
     //set some final kernel args
     errNum = clSetKernelArg(this->kernel, 2, sizeof(cl_mem), &this->clMask);
@@ -67,16 +67,16 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co
 
 }
 /* *************************************************************** */
-void CLAffineDeformationFieldKernel::calculate(bool compose) {
+void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     //localWorkSize[0]*localWorkSize[1]*localWorkSize[2]... should be lower than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE
     cl_uint maxWG = 0;
     cl_int errNum;
     std::size_t paramValueSize;
-    errNum = clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, NULL, &paramValueSize);
-    sContext->checkErrNum(errNum, "Failed to getDeviceId() OpenCL device info ");
+    errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
+    sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
     cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
-    errNum = clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, NULL);
-    sContext->checkErrNum(errNum, "Failed to getDeviceId() OpenCL device info ");
+    errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
+    sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
     maxWG = *info;
 
     //8=default value
@@ -116,7 +116,7 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) {
     cl_mem cltransMat = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                        sizeof(float) * 16, trans, &errNum);
     this->sContext->checkErrNum(errNum,
-                                "CLAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): ");
+                                "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): ");
 
     cl_uint composition = compose;
     errNum = clSetKernelArg(this->kernel, 0, sizeof(cl_mem), &cltransMat);
@@ -128,8 +128,8 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) {
     errNum |= clSetKernelArg(this->kernel, 4, sizeof(cl_uint), &composition);
     sContext->checkErrNum(errNum, "Error setting kernel arguments.");
 
-    errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
-    sContext->checkErrNum(errNum, "Error queuing CLAffineDeformationFieldKernel for execution");
+    errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
+    sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution");
     clFinish(commandQueue);
 
     free(trans);
@@ -137,7 +137,7 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) {
     return;
 }
 /* *************************************************************** */
-CLAffineDeformationFieldKernel::~CLAffineDeformationFieldKernel() {
+ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() {
     if (kernel != 0)
         clReleaseKernel(kernel);
     if (program != 0)
diff --git a/reg-lib/cl/CLAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h
similarity index 52%
rename from reg-lib/cl/CLAffineDeformationFieldKernel.h
rename to reg-lib/cl/ClAffineDeformationFieldKernel.h
index c3bfe1a3..c4897caa 100644
--- a/reg-lib/cl/CLAffineDeformationFieldKernel.h
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h
@@ -1,15 +1,14 @@
-#ifndef CLAFFINEDEFORMATIONFIELDKERNEL_H
-#define CLAFFINEDEFORMATIONFIELDKERNEL_H
+#pragma once
 
 #include "AffineDeformationFieldKernel.h"
 #include "CLAladinContent.h"
 
-class CLAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
+class ClAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
     public:
-       CLAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn);
-       ~CLAffineDeformationFieldKernel();
+       ClAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn);
+       ~ClAffineDeformationFieldKernel();
 
-       void calculate(bool compose = false);
+       void Calculate(bool compose = false);
     private:
        mat44 *affineTransformation, *ReferenceMatrix;
        nifti_image *deformationFieldImage;
@@ -19,7 +18,5 @@ class CLAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
        cl_context clContext;
        cl_program program;
        cl_mem clDeformationField, clMask;
-       CLContextSingletton *sContext;
+       ClContextSingleton *sContext;
 };
-
-#endif // CLAFFINEDEFORMATIONFIELDKERNEL_H
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
new file mode 100644
index 00000000..efa2c127
--- /dev/null
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -0,0 +1,413 @@
+#include "CLAladinContent.h"
+#include "_reg_tools.h"
+
+/* *************************************************************** */
+ClAladinContent::ClAladinContent() {
+    InitVars();
+    AllocateClPtrs();
+}
+/* *************************************************************** */
+ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
+                                 nifti_image *currentFloatingIn,
+                                 int *currentReferenceMaskIn,
+                                 size_t byte,
+                                 const unsigned int blockPercentage,
+                                 const unsigned int inlierLts,
+                                 int blockStep) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  byte, blockPercentage,
+                  inlierLts,
+                  blockStep) {
+    InitVars();
+    AllocateClPtrs();
+}
+/* *************************************************************** */
+ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
+                                 nifti_image *currentFloatingIn,
+                                 int *currentReferenceMaskIn,
+                                 size_t byte) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  byte) {
+    InitVars();
+    AllocateClPtrs();
+}
+/* *************************************************************** */
+ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
+                                 nifti_image *currentFloatingIn,
+                                 int *currentReferenceMaskIn,
+                                 mat44 *transMat,
+                                 size_t byte,
+                                 const unsigned int blockPercentage,
+                                 const unsigned int inlierLts,
+                                 int blockStep) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  transMat,
+                  byte,
+                  blockPercentage,
+                  inlierLts,
+                  blockStep) {
+    InitVars();
+    AllocateClPtrs();
+}
+/* *************************************************************** */
+ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
+                                 nifti_image *currentFloatingIn,
+                                 int *currentReferenceMaskIn,
+                                 mat44 *transMat,
+                                 size_t byte) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  transMat,
+                  byte) {
+    InitVars();
+    AllocateClPtrs();
+}
+/* *************************************************************** */
+ClAladinContent::~ClAladinContent() {
+    FreeClPtrs();
+}
+/* *************************************************************** */
+void ClAladinContent::InitVars() {
+    this->referenceImageClmem = 0;
+    this->floatingImageClmem = 0;
+    this->warpedImageClmem = 0;
+    this->deformationFieldClmem = 0;
+    this->referencePositionClmem = 0;
+    this->warpedPositionClmem = 0;
+    this->totalBlockClmem = 0;
+    this->maskClmem = 0;
+
+    if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(this->currentReference);
+    if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(this->currentFloating);
+        if (this->currentWarped != nullptr)
+            reg_tools_changeDatatype<float>(this->currentWarped);
+    }
+    this->sContext = &ClContextSingleton::Instance();
+    this->clContext = this->sContext->GetContext();
+    this->commandQueue = this->sContext->GetCommandQueue();
+    //this->numBlocks = (this->blockMatchingParams != nullptr) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0;
+}
+/* *************************************************************** */
+void ClAladinContent::AllocateClPtrs() {
+
+    if (this->currentWarped != nullptr) {
+        this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
+    }
+    if (this->currentDeformationField != nullptr) {
+        this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentDeformationField->nvox, this->currentDeformationField->data, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
+    }
+    if (this->currentFloating != nullptr) {
+        this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentFloating->nvox, this->currentFloating->data, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): ");
+
+        float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float));
+        mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
+        this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                           16 * sizeof(float),
+                                           sourceIJKMatrix_h, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
+        free(sourceIJKMatrix_h);
+    }
+    if (this->currentReference != nullptr) {
+        this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                                   sizeof(float) * this->currentReference->nvox,
+                                                   this->currentReference->data, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
+
+        float* targetMat = (float *)malloc(16 * sizeof(float)); //freed
+        mat44ToCptr(this->refMatrix_xyz, targetMat);
+        this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                           16 * sizeof(float),
+                                           targetMat, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
+        free(targetMat);
+    }
+    if (this->blockMatchingParams != nullptr) {
+        if (this->blockMatchingParams->referencePosition != nullptr) {
+            //targetPositionClmem
+            this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                                          this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
+                                                          this->blockMatchingParams->referencePosition, &this->errNum);
+            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
+        }
+        if (this->blockMatchingParams->warpedPosition != nullptr) {
+            //resultPositionClmem
+            this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                                       this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
+                                                       this->blockMatchingParams->warpedPosition, &this->errNum);
+            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
+        }
+        if (this->blockMatchingParams->totalBlock != nullptr) {
+            //totalBlockClmem
+            this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                                   this->blockMatchingParams->totalBlockNumber * sizeof(int),
+                                                   this->blockMatchingParams->totalBlock, &this->errNum);
+            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
+        }
+    }
+    if (this->currentReferenceMask != nullptr && this->currentReference != nullptr) {
+        this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                         this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * sizeof(int),
+                                         this->currentReferenceMask, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
+    }
+}
+/* *************************************************************** */
+nifti_image* ClAladinContent::GetCurrentWarped(int datatype) {
+    DownloadImage(this->currentWarped, this->warpedImageClmem, datatype);
+    return this->currentWarped;
+}
+/* *************************************************************** */
+nifti_image* ClAladinContent::GetCurrentDeformationField() {
+    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT
+    this->sContext->checkErrNum(errNum, "Get: failed currentDeformationField: ");
+    return this->currentDeformationField;
+}
+/* *************************************************************** */
+_reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() {
+    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT
+    this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: ");
+    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT
+    this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: ");
+    return this->blockMatchingParams;
+}
+/* *************************************************************** */
+void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
+    AladinContent::SetTransformationMatrix(transformationMatrixIn);
+}
+/* *************************************************************** */
+void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
+    if (this->currentDeformationField != nullptr)
+        clReleaseMemObject(this->deformationFieldClmem);
+
+    AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
+    this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, &this->errNum);
+    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): ");
+}
+/* *************************************************************** */
+void ClAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) {
+    if (this->currentReferenceMask != nullptr)
+        clReleaseMemObject(maskClmem);
+    this->currentReferenceMask = maskIn;
+    this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->currentReferenceMask, &this->errNum);
+    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): ");
+}
+/* *************************************************************** */
+void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
+    if (this->currentWarped != nullptr) {
+        clReleaseMemObject(this->warpedImageClmem);
+    }
+    if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(currentWarped);
+    }
+    AladinContent::SetCurrentWarped(currentWarped);
+    this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum);
+    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): ");
+}
+/* *************************************************************** */
+void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
+
+    AladinContent::SetBlockMatchingParams(bmp);
+    if (this->blockMatchingParams->referencePosition != nullptr) {
+        clReleaseMemObject(this->referencePositionClmem);
+        //referencePositionClmem
+        this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
+    }
+    if (this->blockMatchingParams->warpedPosition != nullptr) {
+        clReleaseMemObject(this->warpedPositionClmem);
+        //warpedPositionClmem
+        this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
+    }
+    if (this->blockMatchingParams->totalBlock != nullptr) {
+        clReleaseMemObject(this->totalBlockClmem);
+        //totalBlockClmem
+        this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum);
+        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
+    }
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetReferenceImageArrayClmem() {
+    return this->referenceImageClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetFloatingImageArrayClmem() {
+    return this->floatingImageClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetWarpedImageClmem() {
+    return this->warpedImageClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetReferencePositionClmem() {
+    return this->referencePositionClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetWarpedPositionClmem() {
+    return this->warpedPositionClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetDeformationFieldArrayClmem() {
+    return this->deformationFieldClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetTotalBlockClmem() {
+    return this->totalBlockClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetMaskClmem() {
+    return this->maskClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetRefMatClmem() {
+    return this->refMatClmem;
+}
+/* *************************************************************** */
+cl_mem ClAladinContent::GetFloMatClmem() {
+    return this->floMatClmem;
+}
+/* *************************************************************** */
+int *ClAladinContent::GetReferenceDims() {
+    return this->referenceDims;
+}
+/* *************************************************************** */
+int *ClAladinContent::GetFloatingDims() {
+    return this->floatingDims;
+}
+/* *************************************************************** */
+template<class DataType>
+DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return static_cast<float>(intensity);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return static_cast<double>(intensity);
+        break;
+    case NIFTI_TYPE_UINT8:
+        if (intensity != intensity)
+            intensity = 0;
+        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT16:
+        if (intensity != intensity)
+            intensity = 0;
+        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT32:
+        if (intensity != intensity)
+            intensity = 0;
+        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    default:
+        if (intensity != intensity)
+            intensity = 0;
+        return static_cast<DataType>(reg_round(intensity));
+        break;
+    }
+}
+/* *************************************************************** */
+template<class T>
+void ClAladinContent::FillImageData(nifti_image *image,
+                                    cl_mem memoryObject,
+                                    int type) {
+    size_t size = image->nvox;
+    float* buffer = nullptr;
+    buffer = (float*)malloc(size * sizeof(float));
+    if (buffer == nullptr) {
+        reg_print_fct_error("ClAladinContent::FillImageData");
+        reg_print_msg_error("Memory allocation did not complete successfully. Exit.");
+        reg_exit();
+    }
+
+    this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0,
+                                       size * sizeof(float), buffer, 0, nullptr, nullptr);
+    this->sContext->checkErrNum(this->errNum, "Error reading warped buffer.");
+
+    free(image->data);
+    image->datatype = type;
+    image->nbyper = sizeof(T);
+    image->data = (void *)malloc(image->nvox * image->nbyper);
+    T* dataT = static_cast<T*>(image->data);
+    for (size_t i = 0; i < size; ++i)
+        dataT[i] = FillWarpedImageData<T>(buffer[i], type);
+    free(buffer);
+}
+/* *************************************************************** */
+void ClAladinContent::DownloadImage(nifti_image *image,
+                                    cl_mem memoryObject,
+                                    int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        FillImageData<float>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        FillImageData<double>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT8:
+        FillImageData<unsigned char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT8:
+        FillImageData<char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT16:
+        FillImageData<unsigned short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT16:
+        FillImageData<short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT32:
+        FillImageData<unsigned int>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT32:
+        FillImageData<int>(image, memoryObject, datatype);
+        break;
+    default:
+        reg_print_fct_error("ClAladinContent::DownloadImage");
+        reg_print_msg_error("Unsupported type");
+        reg_exit();
+        break;
+    }
+}
+/* *************************************************************** */
+void ClAladinContent::FreeClPtrs() {
+    if (this->currentReference != nullptr) {
+        clReleaseMemObject(this->referenceImageClmem);
+        clReleaseMemObject(this->refMatClmem);
+    }
+    if (this->currentFloating != nullptr) {
+        clReleaseMemObject(this->floatingImageClmem);
+        clReleaseMemObject(this->floMatClmem);
+    }
+    if (this->currentWarped != nullptr)
+        clReleaseMemObject(this->warpedImageClmem);
+    if (this->currentDeformationField != nullptr)
+        clReleaseMemObject(this->deformationFieldClmem);
+    if (this->currentReferenceMask != nullptr)
+        clReleaseMemObject(this->maskClmem);
+    if (this->blockMatchingParams != nullptr) {
+        clReleaseMemObject(this->totalBlockClmem);
+        clReleaseMemObject(this->referencePositionClmem);
+        clReleaseMemObject(this->warpedPositionClmem);
+    }
+}
+/* *************************************************************** */
+bool ClAladinContent::IsCurrentComputationDoubleCapable() {
+    return this->sContext->GetIsCardDoubleCapable();
+}
+/* *************************************************************** */
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
new file mode 100644
index 00000000..00a94545
--- /dev/null
+++ b/reg-lib/cl/ClAladinContent.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include "AladinContent.h"
+#include "ClContextSingleton.h"
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+class ClAladinContent: public AladinContent {
+public:
+    //constructors
+    ClAladinContent();
+    ClAladinContent(nifti_image *currentReferenceIn,
+                    nifti_image *currentFloatingIn,
+                    int *currentReferenceMaskIn,
+                    size_t byte,
+                    const unsigned int blockPercentage,
+                    const unsigned int inlierLts,
+                    int blockStep);
+    ClAladinContent(nifti_image *currentReferenceIn,
+                    nifti_image *currentFloatingIn,
+                    int *currentReferenceMaskIn,
+                    size_t byte);
+    ClAladinContent(nifti_image *currentReferenceIn,
+                    nifti_image *currentFloatingIn,
+                    int *currentReferenceMaskIn,
+                    mat44 *transMat,
+                    size_t byte,
+                    const unsigned int blockPercentage,
+                    const unsigned int inlierLts,
+                    int blockStep);
+    ClAladinContent(nifti_image *currentReferenceIn,
+                    nifti_image *currentFloatingIn,
+                    int *currentReferenceMaskIn,
+                    mat44 *transMat,
+                    size_t byte);
+    ~ClAladinContent();
+
+    bool IsCurrentComputationDoubleCapable();
+
+    //opencl getters
+    cl_mem GetReferenceImageArrayClmem();
+    cl_mem GetFloatingImageArrayClmem();
+    cl_mem GetWarpedImageClmem();
+    cl_mem GetReferencePositionClmem();
+    cl_mem GetWarpedPositionClmem();
+    cl_mem GetDeformationFieldArrayClmem();
+    cl_mem GetTotalBlockClmem();
+    cl_mem GetMaskClmem();
+    cl_mem GetRefMatClmem();
+    cl_mem GetFloMatClmem();
+    int* GetReferenceDims();
+    int* GetFloatingDims();
+
+    //cpu getters with data downloaded from device
+    _reg_blockMatchingParam* GetBlockMatchingParams();
+    nifti_image* GetCurrentDeformationField();
+    nifti_image* GetCurrentWarped(int typ);
+
+    //setters
+    void SetTransformationMatrix(mat44 *transformationMatrixIn);
+    void SetCurrentWarped(nifti_image *warpedImageIn);
+    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn);
+    void SetCurrentReferenceMask(int *maskIn, size_t size);
+    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp);
+
+
+private:
+    void InitVars();
+    void AllocateClPtrs();
+    void FreeClPtrs();
+
+    ClContextSingleton *sContext;
+    cl_context clContext;
+    cl_int errNum;
+    cl_command_queue commandQueue;
+
+    cl_mem referenceImageClmem;
+    cl_mem floatingImageClmem;
+    cl_mem warpedImageClmem;
+    cl_mem deformationFieldClmem;
+    cl_mem referencePositionClmem;
+    cl_mem warpedPositionClmem;
+    cl_mem totalBlockClmem;
+    cl_mem maskClmem;
+    cl_mem refMatClmem;
+    cl_mem floMatClmem;
+
+    int referenceDims[4];
+    int floatingDims[4];
+
+    unsigned int nVoxels;
+
+    void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
+    template<class T>
+    void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
+    template<class T>
+    T FillWarpedImageData(float intensity, int datatype);
+};
diff --git a/reg-lib/cl/CLBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
similarity index 79%
rename from reg-lib/cl/CLBlockMatchingKernel.cpp
rename to reg-lib/cl/ClBlockMatchingKernel.cpp
index 929ab7dc..157b6214 100644
--- a/reg-lib/cl/CLBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -1,8 +1,8 @@
-#include "CLBlockMatchingKernel.h"
+#include "ClBlockMatchingKernel.h"
 #include "config.h"
 #include <fstream>
 
-CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string name) :
+ClBlockMatchingKernel::ClBlockMatchingKernel(AladinContent *conIn, std::string name) :
    BlockMatchingKernel(name) {
    //populate the CLAladinContent object ptr
    this->con = static_cast<ClAladinContent*>(conIn);
@@ -14,14 +14,14 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n
    std::string clInstallPath;
    std::string clSrcPath;
    //src dir
-   if (niftyreg_src_dir != NULL){
+   if (niftyreg_src_dir != nullptr){
       char opencl_kernel_path[255];
       sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
       clSrcPath = opencl_kernel_path;
    }
    else clSrcPath = CL_KERNELS_SRC_PATH;
    //install dir
-   if(niftyreg_install_dir!=NULL){
+   if(niftyreg_install_dir!=nullptr){
       char opencl_kernel_path[255];
       sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
       clInstallPath = opencl_kernel_path;
@@ -37,14 +37,14 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n
    }
 
    //get opencl context params
-   this->sContext = &CLContextSingletton::Instance();
-   this->clContext = this->sContext->getContext();
-   this->commandQueue = this->sContext->getCommandQueue();
+   this->sContext = &ClContextSingleton::Instance();
+   this->clContext = this->sContext->GetContext();
+   this->commandQueue = this->sContext->GetCommandQueue();
    this->program = this->sContext->CreateProgram(clKernelPath.c_str());
 
    // Create OpenCL kernel
    cl_int errNum;
-   if (this->con->getBlockMatchingParams()->dim == 3) {
+   if (this->con->GetBlockMatchingParams()->dim == 3) {
       this->kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum);
    }
    else {
@@ -53,21 +53,21 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n
    this->sContext->checkErrNum(errNum, "Error setting bm kernel.");
 
    //get cl ptrs
-   this->clTotalBlock = this->con->getTotalBlockClmem();
-   this->clReferenceImageArray = this->con->getReferenceImageArrayClmem();
-   this->clWarpedImageArray = this->con->getWarpedImageClmem();
-   this->clWarpedPosition = this->con->getWarpedPositionClmem();
-   this->clReferencePosition = this->con->getReferencePositionClmem();
-   this->clMask = this->con->getMaskClmem();
-   this->clReferenceMat = this->con->getRefMatClmem();
+   this->clTotalBlock = this->con->GetTotalBlockClmem();
+   this->clReferenceImageArray = this->con->GetReferenceImageArrayClmem();
+   this->clWarpedImageArray = this->con->GetWarpedImageClmem();
+   this->clWarpedPosition = this->con->GetWarpedPositionClmem();
+   this->clReferencePosition = this->con->GetReferencePositionClmem();
+   this->clMask = this->con->GetMaskClmem();
+   this->clReferenceMat = this->con->GetRefMatClmem();
 
    //get cpu ptrs
-   this->reference = this->con->AladinContent::getCurrentReference();
-   this->params = this->con->AladinContent::getBlockMatchingParams();
+   this->reference = this->con->AladinContent::GetCurrentReference();
+   this->params = this->con->AladinContent::GetBlockMatchingParams();
 
 }
 /* *************************************************************** */
-void CLBlockMatchingKernel::calculate()
+void ClBlockMatchingKernel::Calculate()
 {
    if (this->params->stepSize!=1 || this->params->voxelCaptureRange!=3){
       reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1");
@@ -77,7 +77,7 @@ void CLBlockMatchingKernel::calculate()
    this->params->definedActiveBlockNumber = 0;
    cl_mem cldefinedBlock = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                           sizeof(int), &(this->params->definedActiveBlockNumber), &errNum);
-   this->sContext->checkErrNum(errNum, "CLBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
+   this->sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
 
    const cl_uint4 imageSize ={{(cl_uint)this->reference->nx,
                                (cl_uint)this->reference->ny,
@@ -95,7 +95,7 @@ void CLBlockMatchingKernel::calculate()
       sMemSize = 144; // (3*4)^2
    }
 
-   errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), NULL);
+   errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr);
    this->sContext->checkErrNum(errNum, "Error setting shared memory.");
    errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clWarpedImageArray);
    this->sContext->checkErrNum(errNum, "Error setting resultImageArray.");
@@ -116,25 +116,25 @@ void CLBlockMatchingKernel::calculate()
    errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize);
    this->sContext->checkErrNum(errNum, "Error setting image size.");
 
-   errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, NULL,
-                                   globalWorkSize, localWorkSize, 0, NULL, NULL);
+   errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, nullptr,
+                                   globalWorkSize, localWorkSize, 0, nullptr, nullptr);
    this->sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution ");
 
    errNum = clFinish(this->commandQueue);
-   this->sContext->checkErrNum(errNum, "Error after clFinish CLBlockMatchingKernel");
+   this->sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel");
 
    errNum = clEnqueueReadBuffer(this->commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int),
-                                &(this->params->definedActiveBlockNumber), 0, NULL, NULL);
-   sContext->checkErrNum(errNum, "Error reading  var after CLBlockMatchingKernel execution ");
+                                &(this->params->definedActiveBlockNumber), 0, nullptr, nullptr);
+   sContext->checkErrNum(errNum, "Error reading  var after ClBlockMatchingKernel execution ");
 
    if(this->params->definedActiveBlockNumber == 0) {
-      reg_print_msg_error("Unexpected error in the CLBlockMatchingKernel execution");
+      reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution");
       reg_exit();
    }
    clReleaseMemObject(cldefinedBlock);
 }
 /* *************************************************************** */
-CLBlockMatchingKernel::~CLBlockMatchingKernel() {
+ClBlockMatchingKernel::~ClBlockMatchingKernel() {
    if (kernel != 0)
       clReleaseKernel(kernel);
    if (program != 0)
diff --git a/reg-lib/cl/CLBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h
similarity index 62%
rename from reg-lib/cl/CLBlockMatchingKernel.h
rename to reg-lib/cl/ClBlockMatchingKernel.h
index cc3d2761..5474c578 100644
--- a/reg-lib/cl/CLBlockMatchingKernel.h
+++ b/reg-lib/cl/ClBlockMatchingKernel.h
@@ -1,17 +1,16 @@
-#ifndef CLBLOCKMATCHINGKERNEL_H
-#define CLBLOCKMATCHINGKERNEL_H
+#pragma once
 
 #include "BlockMatchingKernel.h"
 #include "CLAladinContent.h"
 
-class CLBlockMatchingKernel : public BlockMatchingKernel {
+class ClBlockMatchingKernel : public BlockMatchingKernel {
     public:
-       CLBlockMatchingKernel(AladinContent * conIn, std::string name);
-       ~CLBlockMatchingKernel();
-       void calculate();
+       ClBlockMatchingKernel(AladinContent * conIn, std::string name);
+       ~ClBlockMatchingKernel();
+       void Calculate();
 
     private:
-       CLContextSingletton *sContext;
+       ClContextSingleton *sContext;
        ClAladinContent *con;
        nifti_image *reference;
        _reg_blockMatchingParam *params;
@@ -27,5 +26,3 @@ class CLBlockMatchingKernel : public BlockMatchingKernel {
        cl_mem clMask;
        cl_mem clReferenceMat;
 };
-
-#endif // CLBLOCKMATCHINGKERNEL_H
diff --git a/reg-lib/cl/CLContextSingletton.cpp b/reg-lib/cl/ClContextSingleton.cpp
old mode 100755
new mode 100644
similarity index 83%
rename from reg-lib/cl/CLContextSingletton.cpp
rename to reg-lib/cl/ClContextSingleton.cpp
index d8ef6ea0..38695780
--- a/reg-lib/cl/CLContextSingletton.cpp
+++ b/reg-lib/cl/ClContextSingleton.cpp
@@ -1,39 +1,39 @@
-#include "CLContextSingletton.h"
+#include "ClContextSingleton.h"
 
 /* *************************************************************** */
-CLContextSingletton::CLContextSingletton()
+ClContextSingleton::ClContextSingleton()
 {
-	this->commandQueue = NULL;
-	this->context = NULL;
+	this->commandQueue = nullptr;
+	this->context = nullptr;
 	this->clIdx = 999;
 	init();
 }
 /* *************************************************************** */
-void CLContextSingletton::init()
+void ClContextSingleton::init()
 {
 	// Query the number of platforms
-	cl_int errNum = clGetPlatformIDs(0, NULL, &this->numPlatforms);
+	cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms);
 	checkErrNum(errNum, "Failed to find CL platforms.");
 
 	this->platformIds = (cl_platform_id *) alloca(sizeof(cl_platform_id) * this->numPlatforms);
-	errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, NULL);
+	errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr);
 	checkErrNum(errNum, "Failed to find any OpenCL platforms.");
 
-	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, NULL, &this->numDevices);
+	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices);
 	checkErrNum(errNum, "Failed to find OpenCL devices.");
 
 	this->devices = new cl_device_id[this->numDevices];
-	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, NULL);
+	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr);
 
-	pickCard(this->clIdx);
+	PickCard(this->clIdx);
 
 	cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) this->platformIds[0], 0 };
-	this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
+	this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum);
 
 	if (errNum != CL_SUCCESS) {
 		std::cout << "Could not create GPU context, trying CPU..." << std::endl;
 		context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
-													 NULL, NULL, &errNum);
+													 nullptr, nullptr, &errNum);
 		if (errNum != CL_SUCCESS) {
 			std::cerr << "Failed to create an OpenCL GPU or CPU context."
 						 << std::endl;
@@ -41,33 +41,33 @@ void CLContextSingletton::init()
 		}
 	}
 
-	this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, NULL);
+	this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr);
 	checkErrNum(errNum, "Failed to create commandQueue for device ");
 
 	this->deviceId = this->devices[this->clIdx];
 	queryGridDims();
 }
 /* *************************************************************** */
-void CLContextSingletton::setClIdx(int clIdxIn)
+void ClContextSingleton::SetClIdx(int clIdxIn)
 {
    clIdx=clIdxIn;
    this->init();
 }
 /* *************************************************************** */
-void CLContextSingletton::queryGridDims()
+void ClContextSingleton::queryGridDims()
 {
 	std::size_t paramValueSize;
-	cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, NULL, &paramValueSize);
+	cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
 	checkErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE");
 
 	size_t* info = (size_t *) alloca(sizeof(size_t) * paramValueSize);
-	errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, NULL);
+	errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
 	checkErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE2");
 	this->maxThreads = *info;
 	this->maxBlocks = 65535;
 }
 /* *************************************************************** */
-void CLContextSingletton::pickCard(cl_uint deviceId)
+void ClContextSingleton::PickCard(cl_uint deviceId)
 {
    cl_int errNum;
    std::size_t paramValueSize;
@@ -80,25 +80,25 @@ void CLContextSingletton::pickCard(cl_uint deviceId)
 
    if(deviceId < this->numDevices){
       this->clIdx=deviceId;
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, NULL, &paramValueSize);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, NULL);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint numProcs = *info;
       maxProcs = numProcs;
 
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, NULL, &paramValueSizeDOUBE1);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, NULL);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint numD1 = *infoD1;
 
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, NULL, &paramValueSizeDOUBE2);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, NULL);
+      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
       checkErrNum(errNum, "Failed to find OpenCL device info ");
       cl_uint numD2 = *infoD2;
 
@@ -117,12 +117,12 @@ void CLContextSingletton::pickCard(cl_uint deviceId)
 
    for(cl_uint i = 0; i < this->numDevices; ++i) {
       cl_device_type dev_type;
-      clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL);
+      clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr);
       if (dev_type == CL_DEVICE_TYPE_GPU) {
-         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, NULL, &paramValueSize);
+         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
          checkErrNum(errNum, "Failed to find OpenCL device info ");
          cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
-         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, NULL);
+         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
          checkErrNum(errNum, "Failed to find OpenCL device info ");
          cl_uint numProcs = *info;
          const bool found = numProcs > maxProcs;
@@ -130,17 +130,17 @@ void CLContextSingletton::pickCard(cl_uint deviceId)
          maxProcs = found ? numProcs : maxProcs;
 
          if(found) {
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, NULL, &paramValueSizeDOUBE1);
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
             checkErrNum(errNum, "Failed to find OpenCL device info ");
             cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, NULL);
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
             checkErrNum(errNum, "Failed to find OpenCL device info ");
             cl_uint numD1 = *infoD1;
 
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, NULL, &paramValueSizeDOUBE2);
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
             checkErrNum(errNum, "Failed to find OpenCL device info ");
             cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, NULL);
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
             checkErrNum(errNum, "Failed to find OpenCL device info ");
             cl_uint numD2 = *infoD2;
 
@@ -154,23 +154,23 @@ void CLContextSingletton::pickCard(cl_uint deviceId)
    }
 }
 /* *************************************************************** */
-cl_program CLContextSingletton::CreateProgram(const char* fileName)
+cl_program ClContextSingleton::CreateProgram(const char* fileName)
 {
 	cl_int errNum;
 	cl_program program;
 	std::ifstream kernelFile(fileName, std::ios::in);
 	if (!kernelFile.is_open()) {
 		std::cerr << "Failed to open file for reading: " << fileName << std::endl;
-		return NULL;
+		return nullptr;
 	}
 	std::ostringstream oss;
 	oss << kernelFile.rdbuf();
 	std::string srcStdStr = oss.str();
 	const char *srcStr = srcStdStr.c_str();
-	program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, NULL, &errNum);
+	program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, nullptr, &errNum);
 	checkErrNum(errNum, "Failed to create CL program");
 
-	errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+	errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
 	if (errNum != CL_SUCCESS) {
 		checDebugKernelInfo(program,this->deviceId, (char *)"Errors in kernel: ");
 		//create log
@@ -184,7 +184,7 @@ cl_program CLContextSingletton::CreateProgram(const char* fileName)
 	return program;
 }
 /* *************************************************************** */
-void CLContextSingletton::shutDown()
+void ClContextSingleton::shutDown()
 {
 	/*std::cout << "Shutting down cl" << std::endl;*/
 	if (this->context != 0) clReleaseContext(this->context);
@@ -193,16 +193,16 @@ void CLContextSingletton::shutDown()
 	 delete[] this->devices;
 }
 /* *************************************************************** */
-void CLContextSingletton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message)
+void ClContextSingleton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message)
 {
 	char buffer[10240];
 
-	clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
+	clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
 	reg_print_fct_error(message);
 	reg_print_fct_error(buffer);
 }
 /* *************************************************************** */
-void CLContextSingletton::checkErrNum(cl_int errNum, std::string message)
+void ClContextSingleton::checkErrNum(cl_int errNum, std::string message)
 {
 	if (errNum != CL_SUCCESS)
 	{
@@ -272,67 +272,67 @@ void CLContextSingletton::checkErrNum(cl_int errNum, std::string message)
 	}
 }
 /* *************************************************************** */
-cl_context CLContextSingletton::getContext()
+cl_context ClContextSingleton::GetContext()
 {
 	return this->context;
 }
 /* *************************************************************** */
-cl_device_id CLContextSingletton::getDeviceId()
+cl_device_id ClContextSingleton::GetDeviceId()
 {
 	return this->deviceId;
 }
 /* *************************************************************** */
-cl_device_id* CLContextSingletton::getDevices()
+cl_device_id* ClContextSingleton::GetDevices()
 {
 	return this->devices;
 }
 /* *************************************************************** */
-cl_command_queue CLContextSingletton::getCommandQueue()
+cl_command_queue ClContextSingleton::GetCommandQueue()
 {
 	return this->commandQueue;
 }
 /* *************************************************************** */
-cl_uint CLContextSingletton::getNumPlatforms()
+cl_uint ClContextSingleton::GetNumPlatforms()
 {
 	return this->numPlatforms;
 }
 /* *************************************************************** */
-cl_platform_id* CLContextSingletton::getPlatformIds()
+cl_platform_id* ClContextSingleton::GetPlatformIds()
 {
 	return this->platformIds;
 }
 /* *************************************************************** */
-cl_uint CLContextSingletton::getNumDevices()
+cl_uint ClContextSingleton::GetNumDevices()
 {
 	return this->numDevices;
 }
 /* *************************************************************** */
-size_t CLContextSingletton::getMaxThreads()
+size_t ClContextSingleton::GetMaxThreads()
 {
 	return this->maxThreads;
 }
 /* *************************************************************** */
-bool CLContextSingletton::getIsCardDoubleCapable()
+bool ClContextSingleton::GetIsCardDoubleCapable()
 {
 	 return this->isCardDoubleCapable;
 }
 /* *************************************************************** */
-unsigned int CLContextSingletton::getMaxBlocks()
+unsigned int ClContextSingleton::GetMaxBlocks()
 {
 	return this->maxBlocks;
 }
 /* *************************************************************** */
-size_t CLContextSingletton::getwarpGroupLength(cl_kernel kernel)
+size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel)
 {
 	size_t local;
 	// Get the maximum work group size for executing the kernel on the device
-	cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, NULL);
+	cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
 	checkErrNum(err, "Error: Failed to retrieve kernel work group info!");
 
 	return local;
 }
 /* *************************************************************** */
-cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) {
+cl_kernel ClContextSingleton::dummyKernel(cl_device_id deviceIdIn) {
 
    const char *source = "\n"
             "__kernel void dummy(                                                \n"
@@ -347,9 +347,9 @@ cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) {
          "\n";
 
 	cl_int  err ;
-	cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, NULL, &err);
+	cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, nullptr, &err);
 	checkErrNum(err, "Failed to create CL program");
-	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+	err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
 	if (err != CL_SUCCESS) checDebugKernelInfo(program,deviceIdIn, (char *)"Errors in kernel: ");
 
 	// Create the compute kernel in the program we wish to run
@@ -358,7 +358,7 @@ cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) {
 	if (!kernel || err != CL_SUCCESS)
 	{
 		reg_print_fct_error("Error: Failed to create compute kernel!");
-		return NULL;
+		return nullptr;
 	}
 	return kernel;
 }
diff --git a/reg-lib/cl/CLContextSingletton.h b/reg-lib/cl/ClContextSingleton.h
old mode 100755
new mode 100644
similarity index 54%
rename from reg-lib/cl/CLContextSingletton.h
rename to reg-lib/cl/ClContextSingleton.h
index dd959b8a..99020b7a
--- a/reg-lib/cl/CLContextSingletton.h
+++ b/reg-lib/cl/ClContextSingleton.h
@@ -1,6 +1,4 @@
-#ifndef CLPCONTEXTSINGLETTON_H
-#define CLPCONTEXTSINGLETTON_H
-
+#pragma once
 
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
@@ -17,13 +15,13 @@
 
 
 // Declaration
-class CLContextSingletton
+class ClContextSingleton
 {
 public:
 
-	static CLContextSingletton& Instance()
+	static ClContextSingleton& Instance()
 	{
-		static CLContextSingletton instance; // Guaranteed to be destroyed.
+		static ClContextSingleton instance; // Guaranteed to be destroyed.
 		// Instantiated on first use.
 		return instance;
 	}
@@ -34,7 +32,7 @@ class CLContextSingletton
 	void CreateCommandQueue();
 	void init();
 	cl_kernel dummyKernel(cl_device_id deviceIdIn);
-	void setClIdx(int clIdxIn);
+	void SetClIdx(int clIdxIn);
 
 	cl_program CreateProgram( const char* fileName);
 
@@ -43,32 +41,32 @@ class CLContextSingletton
 	void checkErrNum(cl_int errNum, std::string message);
 	void shutDown();
 
-	cl_context getContext();
-	cl_device_id getDeviceId();
-	cl_device_id* getDevices();
-	cl_command_queue getCommandQueue();
-	cl_uint getNumPlatforms();
-	cl_platform_id* getPlatformIds();
-	cl_uint getNumDevices();
-	size_t getMaxThreads();
+	cl_context GetContext();
+	cl_device_id GetDeviceId();
+	cl_device_id* GetDevices();
+	cl_command_queue GetCommandQueue();
+	cl_uint GetNumPlatforms();
+	cl_platform_id* GetPlatformIds();
+	cl_uint GetNumDevices();
+	size_t GetMaxThreads();
 
-	unsigned int getMaxBlocks();
-    bool getIsCardDoubleCapable();
+	unsigned int GetMaxBlocks();
+    bool GetIsCardDoubleCapable();
 
-	size_t getwarpGroupLength(cl_kernel kernel);
+	size_t GetWarpGroupLength(cl_kernel kernel);
 
 private:
-	static CLContextSingletton* _instance;
+	static ClContextSingleton* _instance;
 
-	CLContextSingletton();
-    ~CLContextSingletton() {
+	ClContextSingleton();
+    ~ClContextSingleton() {
         shutDown();
 	}
 
-	CLContextSingletton(CLContextSingletton const&);// Don't Implement
-	void operator=(CLContextSingletton const&); // Don't implement
+	ClContextSingleton(ClContextSingleton const&);// Don't Implement
+	void operator=(ClContextSingleton const&); // Don't implement
 
-	void pickCard(cl_uint deviceId);
+	void PickCard(cl_uint deviceId);
 
 	cl_context context;
 	cl_device_id deviceId;
@@ -83,4 +81,3 @@ class CLContextSingletton
 	unsigned int maxBlocks;
 	unsigned clIdx;
 };
-#endif
diff --git a/reg-lib/cl/CLConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp
similarity index 65%
rename from reg-lib/cl/CLConvolutionKernel.cpp
rename to reg-lib/cl/ClConvolutionKernel.cpp
index 7d9f1437..7c30a2d9 100644
--- a/reg-lib/cl/CLConvolutionKernel.cpp
+++ b/reg-lib/cl/ClConvolutionKernel.cpp
@@ -1,15 +1,15 @@
-#include "CLConvolutionKernel.h"
+#include "ClConvolutionKernel.h"
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-CLConvolutionKernel::CLConvolutionKernel(std::string name) : ConvolutionKernel(name) {
-    sContext = &CLContextSingletton::Instance();
+ClConvolutionKernel::ClConvolutionKernel(std::string name) : ConvolutionKernel(name) {
+    sContext = &ClContextSingleton::Instance();
 }
 /* *************************************************************** */
-void CLConvolutionKernel::calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
+void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
     //cpu atm
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
 /* *************************************************************** */
-CLConvolutionKernel::~CLConvolutionKernel() {}
+ClConvolutionKernel::~ClConvolutionKernel() {}
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h
new file mode 100644
index 00000000..79ddbc2e
--- /dev/null
+++ b/reg-lib/cl/ClConvolutionKernel.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "ConvolutionKernel.h"
+#include "ClContextSingleton.h"
+
+class ClConvolutionKernel : public ConvolutionKernel
+{
+    public:
+       ClConvolutionKernel(std::string name);
+       ~ClConvolutionKernel();
+       void Calculate(nifti_image * image, float *sigma, int kernelType, int *mask = nullptr, bool * timePoints = nullptr, bool * axis = nullptr);
+    private:
+       ClContextSingleton * sContext;
+};
diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp
new file mode 100644
index 00000000..0c969b1e
--- /dev/null
+++ b/reg-lib/cl/ClKernelFactory.cpp
@@ -0,0 +1,17 @@
+#include "ClKernelFactory.h"
+#include "ClAffineDeformationFieldKernel.h"
+#include "ClConvolutionKernel.h"
+#include "ClBlockMatchingKernel.h"
+#include "ClResampleImageKernel.h"
+#include "ClOptimiseKernel.h"
+#include "AladinContent.h"
+
+Kernel* ClKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
+
+	if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con, name);
+	else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(name);
+	else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con, name);
+	else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con, name);
+	else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con, name);
+	else return nullptr;
+}
diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h
new file mode 100644
index 00000000..113907e3
--- /dev/null
+++ b/reg-lib/cl/ClKernelFactory.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "KernelFactory.h"
+#include "AladinContent.h"
+
+class ClKernelFactory: public KernelFactory {
+public:
+   Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+};
diff --git a/reg-lib/cl/CLOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp
similarity index 53%
rename from reg-lib/cl/CLOptimiseKernel.cpp
rename to reg-lib/cl/ClOptimiseKernel.cpp
index 0412fbbe..c46d65c7 100644
--- a/reg-lib/cl/CLOptimiseKernel.cpp
+++ b/reg-lib/cl/ClOptimiseKernel.cpp
@@ -1,25 +1,25 @@
-#include "CLOptimiseKernel.h"
+#include "ClOptimiseKernel.h"
 
 /* *************************************************************** */
-CLOptimiseKernel::CLOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) {
+ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) {
     //populate the CLAladinContent object ptr
     con = static_cast<ClAladinContent*>(conIn);
 
     //get opencl context params
-    sContext = &CLContextSingletton::Instance();
-    /*clContext = sContext->getContext();*/
-    /*commandQueue = sContext->getCommandQueue();*/
+    sContext = &ClContextSingleton::Instance();
+    /*clContext = sContext->GetContext();*/
+    /*commandQueue = sContext->GetCommandQueue();*/
 
     //get necessary cpu ptrs
-    transformationMatrix = con->AladinContent::getTransformationMatrix();
-    blockMatchingParams = con->AladinContent::getBlockMatchingParams();
+    transformationMatrix = con->AladinContent::GetTransformationMatrix();
+    blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
-void CLOptimiseKernel::calculate(bool affine) {
+void ClOptimiseKernel::Calculate(bool affine) {
     //cpu atm
-    this->blockMatchingParams = con->getBlockMatchingParams();
+    this->blockMatchingParams = con->GetBlockMatchingParams();
     optimize(this->blockMatchingParams, this->transformationMatrix, affine);
 }
 /* *************************************************************** */
-CLOptimiseKernel::~CLOptimiseKernel() {}
+ClOptimiseKernel::~ClOptimiseKernel() {}
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h
new file mode 100644
index 00000000..f369f592
--- /dev/null
+++ b/reg-lib/cl/ClOptimiseKernel.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "OptimiseKernel.h"
+#include "CLAladinContent.h"
+
+class ClOptimiseKernel : public OptimiseKernel
+{
+    public:
+
+       ClOptimiseKernel(AladinContent * con, std::string name);
+       ~ClOptimiseKernel();
+       void Calculate(bool affine);
+    private:
+       _reg_blockMatchingParam * blockMatchingParams;
+       mat44 *transformationMatrix;
+       ClContextSingleton *sContext;
+       ClAladinContent  *con;
+};
diff --git a/reg-lib/cl/CLResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
similarity index 82%
rename from reg-lib/cl/CLResampleImageKernel.cpp
rename to reg-lib/cl/ClResampleImageKernel.cpp
index 5057a997..7d73cc7b 100644
--- a/reg-lib/cl/CLResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -1,28 +1,28 @@
-#include "CLResampleImageKernel.h"
+#include "ClResampleImageKernel.h"
 #include "config.h"
 #include "_reg_tools.h"
 #include <algorithm>
 
 /* *************************************************************** */
-CLResampleImageKernel::CLResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) {
+ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) {
     //populate the CLContext object ptr
     con = static_cast<ClAladinContent*>(conIn);
 
     //path to kernel file
     const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
 	const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
-	
+
 	std::string clInstallPath;
     std::string clSrcPath;
     //src dir
-    if (niftyreg_src_dir != NULL){
+    if (niftyreg_src_dir != nullptr){
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
         clSrcPath = opencl_kernel_path;
     }
     else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
-    if(niftyreg_install_dir!=NULL){
+    if(niftyreg_install_dir!=nullptr){
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
         clInstallPath = opencl_kernel_path;
@@ -38,35 +38,35 @@ CLResampleImageKernel::CLResampleImageKernel(AladinContent *conIn, std::string n
     }
 
     //get opencl context params
-    sContext = &CLContextSingletton::Instance();
-    clContext = sContext->getContext();
-    commandQueue = sContext->getCommandQueue();
+    sContext = &ClContextSingleton::Instance();
+    clContext = sContext->GetContext();
+    commandQueue = sContext->GetCommandQueue();
     program = sContext->CreateProgram(clKernelPath.c_str());
 
     //get cpu ptrs
-    floatingImage = con->AladinContent::getCurrentFloating();
-    warpedImage = con->AladinContent::getCurrentWarped();
-    mask = con->AladinContent::getCurrentReferenceMask();
+    floatingImage = con->AladinContent::GetCurrentFloating();
+    warpedImage = con->AladinContent::GetCurrentWarped();
+    mask = con->AladinContent::GetCurrentReferenceMask();
 
     //get cl ptrs
-    clCurrentFloating = con->getFloatingImageArrayClmem();
-    clCurrentDeformationField = con->getDeformationFieldArrayClmem();
-    clCurrentWarped = con->getWarpedImageClmem();
-    clMask = con->getMaskClmem();
-    floMat = con->getFloMatClmem();
+    clCurrentFloating = con->GetFloatingImageArrayClmem();
+    clCurrentDeformationField = con->GetDeformationFieldArrayClmem();
+    clCurrentWarped = con->GetWarpedImageClmem();
+    clMask = con->GetMaskClmem();
+    floMat = con->GetFloMatClmem();
 
     //init kernel
     kernel = 0;
 }
 /* *************************************************************** */
-void CLResampleImageKernel::calculate(int interp,
+void ClResampleImageKernel::Calculate(int interp,
                                                   float paddingValue,
                                                   bool *dti_timepoint,
                                                   mat33 *jacMat) {
     cl_int errNum;
     // Define the DTI indices if required
-    if(dti_timepoint!=NULL || jacMat!=NULL){
-        reg_print_fct_error("CLResampleImageKernel::calculate");
+    if(dti_timepoint!=nullptr || jacMat!=nullptr){
+        reg_print_fct_error("ClResampleImageKernel::calculate");
         reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit.");
         reg_exit();
     }
@@ -79,15 +79,15 @@ void CLResampleImageKernel::calculate(int interp,
         this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum);
     }
     else {
-		reg_print_fct_error("CLResampleImageKernel::calculate");
+		reg_print_fct_error("ClResampleImageKernel::calculate");
         reg_print_msg_error("The image dimension is not supported. Exit.");
         reg_exit();
     }
     sContext->checkErrNum(errNum, "Error setting kernel ResampleImage.");
 
     long targetVoxelNumber = (long) this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz;
-    const unsigned int maxThreads = sContext->getMaxThreads();
-    const unsigned int maxBlocks = sContext->getMaxBlocks();
+    const unsigned int maxThreads = sContext->GetMaxThreads();
+    const unsigned int maxBlocks = sContext->GetMaxBlocks();
 
     unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads;
     blocks = std::min(blocks, maxBlocks);
@@ -131,13 +131,13 @@ void CLResampleImageKernel::calculate(int interp,
     errNum |= clSetKernelArg(kernel, 10, sizeof(cl_int), &datatype);
     sContext->checkErrNum(errNum, "Error setting interp kernel arguments 10.");
 
-    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
+    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
     sContext->checkErrNum(errNum, "Error queuing interp kernel for execution: ");
 
     clFinish(commandQueue);
 }
 /* *************************************************************** */
-CLResampleImageKernel::~CLResampleImageKernel() {
+ClResampleImageKernel::~ClResampleImageKernel() {
     if (kernel != 0)
         clReleaseKernel(kernel);
     if (program != 0)
diff --git a/reg-lib/cl/CLResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h
similarity index 55%
rename from reg-lib/cl/CLResampleImageKernel.h
rename to reg-lib/cl/ClResampleImageKernel.h
index 40da392e..5f10d203 100644
--- a/reg-lib/cl/CLResampleImageKernel.h
+++ b/reg-lib/cl/ClResampleImageKernel.h
@@ -1,23 +1,22 @@
-#ifndef CLRESAMPLEIMAGEKERNEL_H
-#define CLRESAMPLEIMAGEKERNEL_H
+#pragma once
 
 #include "ResampleImageKernel.h"
 #include "CLAladinContent.h"
 
-class CLResampleImageKernel : public ResampleImageKernel
+class ClResampleImageKernel : public ResampleImageKernel
 {
     public:
 
-       CLResampleImageKernel(AladinContent * conIn, std::string name);
-       ~CLResampleImageKernel();
+       ClResampleImageKernel(AladinContent * conIn, std::string name);
+       ~ClResampleImageKernel();
 
-       void calculate(int interp, float paddingValue, bool * dti_timepoint = NULL, mat33 * jacMat = NULL);
+       void Calculate(int interp, float paddingValue, bool * dti_timepoint = nullptr, mat33 * jacMat = nullptr);
     private:
 
        nifti_image *floatingImage;
        nifti_image *warpedImage;
        int *mask;
-       CLContextSingletton *sContext;
+       ClContextSingleton *sContext;
        ClAladinContent *con;
        cl_command_queue commandQueue;
        cl_kernel kernel;
@@ -29,5 +28,3 @@ class CLResampleImageKernel : public ResampleImageKernel
        cl_mem clMask;
        cl_mem floMat;
 };
-
-#endif // CLRESAMPLEIMAGEKERNEL_H
diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h
index 3291cdf0..6a51408b 100644
--- a/reg-lib/cl/InfoDevice.h
+++ b/reg-lib/cl/InfoDevice.h
@@ -1,12 +1,10 @@
+#pragma once
+
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <iomanip>
-
-#ifndef INFODEVICE_H_
-#define INFODEVICE_H_
-
-#include "CLContextSingletton.h"
+#include "ClContextSingleton.h"
 
 template<typename T>
 class DeviceLog {
@@ -24,12 +22,12 @@ class DeviceLog {
 	{
 		std::size_t paramValueSize;
 		std::string clInfo;
-		CLContextSingletton *sContext = &CLContextSingletton::Instance();
+		ClContextSingleton *sContext = &ClContextSingleton::Instance();
 
-		sContext->checkErrNum(clGetDeviceInfo(id, name, 0, NULL, &paramValueSize), "Failed to find OpenCL device info ");
+		sContext->checkErrNum(clGetDeviceInfo(id, name, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
 
 		T * field = (T *) alloca(sizeof(T) * paramValueSize);
-		sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, NULL), "Failed to find OpenCL device info ");
+		sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
 
 		switch (name) {
 		case CL_DEVICE_TYPE: {
@@ -79,7 +77,7 @@ class DeviceLog {
 		case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
 				cl_uint maxWorkItemDimensions;
 
-				sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, NULL), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
+				sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
 				std::cout << str << ":\t";
 				for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
 					std::cout << field[i] << " ";
@@ -103,9 +101,9 @@ class DeviceLog {
 	{
 		cl_int errNum;
 		size_t local;
-		CLContextSingletton *sContext = &CLContextSingletton::Instance();
+		ClContextSingleton *sContext = &ClContextSingleton::Instance();
 
-		errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, NULL);
+		errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
 
 		switch (name) {
 		case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
@@ -120,5 +118,3 @@ class DeviceLog {
 		}
 	}
 };
-
-#endif /* INFODEVICE_H_ */
diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp
index 0af15fd2..aa9a56d8 100644
--- a/reg-lib/cl/_reg_openclinfo.cpp
+++ b/reg-lib/cl/_reg_openclinfo.cpp
@@ -2,13 +2,13 @@
 
 void showCLInfo(void)
 {
-   CLContextSingletton *sContext = &CLContextSingletton::Instance();
-   cl_uint numPlatforms = sContext->getNumPlatforms();
+   ClContextSingleton *sContext = &ClContextSingleton::Instance();
+   cl_uint numPlatforms = sContext->GetNumPlatforms();
 
    for (cl_uint i = 0; i < numPlatforms; i++)
    {
-      cl_uint numDevices = sContext->getNumDevices();
-      cl_device_id * devices = sContext->getDevices();
+      cl_uint numDevices = sContext->GetNumDevices();
+      cl_device_id * devices = sContext->GetDevices();
       printf("-----------------------------------\n");
       printf("[NiftyReg OPENCL] %i device(s) detected\n", numDevices);
       printf("-----------------------------------\n");
diff --git a/reg-lib/cl/_reg_openclinfo.h b/reg-lib/cl/_reg_openclinfo.h
index 81b456a7..56f895e9 100644
--- a/reg-lib/cl/_reg_openclinfo.h
+++ b/reg-lib/cl/_reg_openclinfo.h
@@ -1,9 +1,6 @@
-#ifndef _REG_OPENCLINFO_H
-#define _REG_OPENCLINFO_H
+#pragma once
 
 #include <iostream>
 #include "InfoDevice.h"
 
 void showCLInfo(void);
-
-#endif
diff --git a/reg-lib/cl/config.h.in b/reg-lib/cl/config.h.in
index 37883e5e..d004ab2d 100755
--- a/reg-lib/cl/config.h.in
+++ b/reg-lib/cl/config.h.in
@@ -1,7 +1,4 @@
-#ifndef CONFIG_H
-#define CONFIG_H
+#pragma once
 
 #define CL_KERNELS_PATH "@CMAKE_INSTALL_PREFIX@/include/cl/"
 #define CL_KERNELS_SRC_PATH "@CMAKE_SOURCE_DIR@/reg-lib/cl/"
-
-#endif // CONFIG_H
diff --git a/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp
deleted file mode 100644
index 017e65a6..00000000
--- a/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "CPUAffineDeformationFieldKernel.h"
-#include "_reg_globalTrans.h"
-
-CPUAffineDeformationFieldKernel::CPUAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) {
-    this->deformationFieldImage = con->getCurrentDeformationField();
-    this->affineTransformation = con->getTransformationMatrix();
-    this->mask = con->getCurrentReferenceMask();
-}
-
-void CPUAffineDeformationFieldKernel::calculate(bool compose) {
-   reg_affine_getDeformationField(this->affineTransformation,
-                                  this->deformationFieldImage,
-                                  compose,
-                                  this->mask);
-}
diff --git a/reg-lib/cpu/CPUAffineDeformationFieldKernel.h b/reg-lib/cpu/CPUAffineDeformationFieldKernel.h
deleted file mode 100644
index d72397d4..00000000
--- a/reg-lib/cpu/CPUAffineDeformationFieldKernel.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef CPUAFFINEDEFORMATIONFIELDKERNEL_H
-#define CPUAFFINEDEFORMATIONFIELDKERNEL_H
-
-#include "AffineDeformationFieldKernel.h"
-#include "AladinContent.h"
-#include <string>
-
-
-class CPUAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
-public:
-        CPUAffineDeformationFieldKernel(AladinContent *con, std::string nameIn);
-
-        void calculate(bool compose = false);
-
-        mat44 *affineTransformation;
-        nifti_image *deformationFieldImage;
-        int *mask;
-};
-
-#endif // AFFINEDEFORMATIONFIELDKERNEL_H
diff --git a/reg-lib/cpu/CPUBlockMatchingKernel.cpp b/reg-lib/cpu/CPUBlockMatchingKernel.cpp
deleted file mode 100644
index 85d1529c..00000000
--- a/reg-lib/cpu/CPUBlockMatchingKernel.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "CPUBlockMatchingKernel.h"
-
-CPUBlockMatchingKernel::CPUBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) {
-    reference = con->getCurrentReference();
-    warped = con->getCurrentWarped();
-    params = con->getBlockMatchingParams();
-    mask = con->getCurrentReferenceMask();
-}
-
-void CPUBlockMatchingKernel::calculate() {
-    block_matching_method(this->reference, this->warped, this->params, this->mask);
-}
-//
diff --git a/reg-lib/cpu/CPUBlockMatchingKernel.h b/reg-lib/cpu/CPUBlockMatchingKernel.h
deleted file mode 100644
index 7c73dc37..00000000
--- a/reg-lib/cpu/CPUBlockMatchingKernel.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef CPUBLOCKMATCHINGKERNEL_H
-#define CPUBLOCKMATCHINGKERNEL_H
-
-#include "BlockMatchingKernel.h"
-#include "_reg_blockMatching.h"
-#include "nifti1_io.h"
-#include "AladinContent.h"
-
-class CPUBlockMatchingKernel : public BlockMatchingKernel {
-public:
-
-    CPUBlockMatchingKernel(AladinContent *con, std::string name);
-
-    void calculate();
-
-    nifti_image *reference;
-    nifti_image *warped;
-    _reg_blockMatchingParam* params;
-    int *mask;
-
-};
-
-#endif // CPUBLOCKMATCHINGKERNEL_H
diff --git a/reg-lib/cpu/CPUConvolutionKernel.h b/reg-lib/cpu/CPUConvolutionKernel.h
deleted file mode 100644
index 5c6cb4f1..00000000
--- a/reg-lib/cpu/CPUConvolutionKernel.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef CPUCONVOLUTIONKERNEL_H
-#define CPUCONVOLUTIONKERNEL_H
-
-#include "ConvolutionKernel.h"
-#include <string>
-
-class CPUConvolutionKernel : public ConvolutionKernel {
-public:
-    CPUConvolutionKernel(std::string name);
-
-    void calculate(nifti_image *image, float *sigma, int kernelType, int *mask = NULL, bool *timePoints = NULL, bool *axis = NULL);
-};
-
-#endif // CPUCONVOLUTIONKERNEL_H
diff --git a/reg-lib/cpu/CPUKernelFactory.cpp b/reg-lib/cpu/CPUKernelFactory.cpp
deleted file mode 100755
index d5d8fa48..00000000
--- a/reg-lib/cpu/CPUKernelFactory.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "CPUKernelFactory.h"
-#include "CPUAffineDeformationFieldKernel.h"
-#include "CPUConvolutionKernel.h"
-#include "CPUBlockMatchingKernel.h"
-#include "CPUResampleImageKernel.h"
-#include "CPUOptimiseKernel.h"
-//
-#include "AladinContent.h"
-
-Kernel *CPUKernelFactory::produceKernel(std::string name,  AladinContent *con) const
-{
-	if (name == AffineDeformationFieldKernel::getName()) return new CPUAffineDeformationFieldKernel(con, name);
-	else if (name == ConvolutionKernel::getName()) return new CPUConvolutionKernel(name);
-	else if (name == BlockMatchingKernel::getName()) return new CPUBlockMatchingKernel(con, name);
-	else if (name == ResampleImageKernel::getName()) return new CPUResampleImageKernel(con, name);
-	else if (name == OptimiseKernel::getName()) return new CPUOptimiseKernel(con, name);
-	else return NULL;
-}
diff --git a/reg-lib/cpu/CPUKernelFactory.h b/reg-lib/cpu/CPUKernelFactory.h
deleted file mode 100755
index b55ef6be..00000000
--- a/reg-lib/cpu/CPUKernelFactory.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef CPUKERNLFACTORY_H
-#define CPUKERNLFACTORY_H
-
-#include "KernelFactory.h"
-
-class AladinContent;
-
-class CPUKernelFactory : public KernelFactory
-{
-public:
-   Kernel *produceKernel(std::string name,  AladinContent *con) const;
-};
-
-#endif
diff --git a/reg-lib/cpu/CPUOptimiseKernel.cpp b/reg-lib/cpu/CPUOptimiseKernel.cpp
deleted file mode 100644
index 58554ef7..00000000
--- a/reg-lib/cpu/CPUOptimiseKernel.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "CPUOptimiseKernel.h"
-
-CPUOptimiseKernel::CPUOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) {
-    transformationMatrix = con->getTransformationMatrix();
-    blockMatchingParams = con->getBlockMatchingParams();
-}
-
-void CPUOptimiseKernel::calculate(bool affine) {
-    optimize(this->blockMatchingParams, this->transformationMatrix, affine);
-}
diff --git a/reg-lib/cpu/CPUOptimiseKernel.h b/reg-lib/cpu/CPUOptimiseKernel.h
deleted file mode 100644
index ceb2a3ac..00000000
--- a/reg-lib/cpu/CPUOptimiseKernel.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef CPUOPTIMISEKERNEL_H
-#define CPUOPTIMISEKERNEL_H
-
-#include "OptimiseKernel.h"
-#include "_reg_blockMatching.h"
-#include "nifti1_io.h"
-#include "AladinContent.h"
-
-class CPUOptimiseKernel : public OptimiseKernel {
-public:
-    CPUOptimiseKernel(AladinContent *con, std::string name);
-
-    _reg_blockMatchingParam *blockMatchingParams;
-    mat44 *transformationMatrix;
-
-    void calculate(bool affine);
-
-};
-
-#endif // CPUOPTIMISEKERNEL_H
diff --git a/reg-lib/cpu/CPUResampleImageKernel.h b/reg-lib/cpu/CPUResampleImageKernel.h
deleted file mode 100644
index aadb03cd..00000000
--- a/reg-lib/cpu/CPUResampleImageKernel.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef CPURESAMPLEIMAGEKERNEL_H
-#define CPURESAMPLEIMAGEKERNEL_H
-
-#include "ResampleImageKernel.h"
-#include "AladinContent.h"
-
-class CPUResampleImageKernel : public ResampleImageKernel
-{
-    public:
-        CPUResampleImageKernel(AladinContent *con, std::string name);
-
-        nifti_image *floatingImage;
-        nifti_image *warpedImage;
-        nifti_image *deformationField;
-        int *mask;
-
-        void calculate(int interp, float paddingValue, bool *dti_timepoint = NULL, mat33 * jacMat = NULL);
-};
-
-#endif // CPURESAMPLEIMAGEKERNEL_H
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
new file mode 100644
index 00000000..9cd44608
--- /dev/null
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
@@ -0,0 +1,15 @@
+#include "CpuAffineDeformationFieldKernel.h"
+#include "_reg_globalTrans.h"
+
+CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) {
+    this->deformationFieldImage = con->GetCurrentDeformationField();
+    this->affineTransformation = con->GetTransformationMatrix();
+    this->mask = con->GetCurrentReferenceMask();
+}
+
+void CpuAffineDeformationFieldKernel::Calculate(bool compose) {
+   reg_affine_getDeformationField(this->affineTransformation,
+                                  this->deformationFieldImage,
+                                  compose,
+                                  this->mask);
+}
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
new file mode 100644
index 00000000..7f850256
--- /dev/null
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "AffineDeformationFieldKernel.h"
+#include "AladinContent.h"
+#include <string>
+
+class CpuAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
+public:
+        CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn);
+
+        void Calculate(bool compose = false);
+
+        mat44 *affineTransformation;
+        nifti_image *deformationFieldImage;
+        int *mask;
+};
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
new file mode 100644
index 00000000..0626a136
--- /dev/null
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
@@ -0,0 +1,13 @@
+#include "CpuBlockMatchingKernel.h"
+
+CpuBlockMatchingKernel::CpuBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) {
+    reference = con->GetCurrentReference();
+    warped = con->GetCurrentWarped();
+    params = con->GetBlockMatchingParams();
+    mask = con->GetCurrentReferenceMask();
+}
+
+void CpuBlockMatchingKernel::Calculate() {
+    block_matching_method(this->reference, this->warped, this->params, this->mask);
+}
+//
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h
new file mode 100644
index 00000000..9ff19e01
--- /dev/null
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "BlockMatchingKernel.h"
+#include "_reg_blockMatching.h"
+#include "nifti1_io.h"
+#include "AladinContent.h"
+
+class CpuBlockMatchingKernel : public BlockMatchingKernel {
+public:
+
+    CpuBlockMatchingKernel(AladinContent *con, std::string name);
+
+    void Calculate();
+
+    nifti_image *reference;
+    nifti_image *warped;
+    _reg_blockMatchingParam* params;
+    int *mask;
+
+};
diff --git a/reg-lib/cpu/CPUConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp
similarity index 56%
rename from reg-lib/cpu/CPUConvolutionKernel.cpp
rename to reg-lib/cpu/CpuConvolutionKernel.cpp
index 54b0484c..f511b332 100644
--- a/reg-lib/cpu/CPUConvolutionKernel.cpp
+++ b/reg-lib/cpu/CpuConvolutionKernel.cpp
@@ -1,9 +1,9 @@
-#include "CPUConvolutionKernel.h"
+#include "CpuConvolutionKernel.h"
 #include "_reg_globalTrans.h"
 
-CPUConvolutionKernel::CPUConvolutionKernel(std::string name) : ConvolutionKernel(name) {
+CpuConvolutionKernel::CpuConvolutionKernel(std::string name) : ConvolutionKernel(name) {
 }
 
-void CPUConvolutionKernel::calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
+void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h
new file mode 100644
index 00000000..bba25ee4
--- /dev/null
+++ b/reg-lib/cpu/CpuConvolutionKernel.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "ConvolutionKernel.h"
+#include <string>
+
+class CpuConvolutionKernel : public ConvolutionKernel {
+public:
+    CpuConvolutionKernel(std::string name);
+
+    void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
+};
diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp
new file mode 100644
index 00000000..5e0b8926
--- /dev/null
+++ b/reg-lib/cpu/CpuKernelFactory.cpp
@@ -0,0 +1,16 @@
+#include "CpuKernelFactory.h"
+#include "CpuAffineDeformationFieldKernel.h"
+#include "CpuConvolutionKernel.h"
+#include "CpuBlockMatchingKernel.h"
+#include "CpuResampleImageKernel.h"
+#include "CpuOptimiseKernel.h"
+#include "AladinContent.h"
+
+Kernel* CpuKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
+	if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con, name);
+	else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(name);
+	else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con, name);
+	else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con, name);
+	else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con, name);
+	else return nullptr;
+}
diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h
new file mode 100644
index 00000000..fca556ff
--- /dev/null
+++ b/reg-lib/cpu/CpuKernelFactory.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "KernelFactory.h"
+
+class AladinContent;
+
+class CpuKernelFactory: public KernelFactory {
+public:
+   Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+};
diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuOptimiseKernel.cpp
new file mode 100644
index 00000000..52af770e
--- /dev/null
+++ b/reg-lib/cpu/CpuOptimiseKernel.cpp
@@ -0,0 +1,10 @@
+#include "CpuOptimiseKernel.h"
+
+CpuOptimiseKernel::CpuOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) {
+    transformationMatrix = con->GetTransformationMatrix();
+    blockMatchingParams = con->GetBlockMatchingParams();
+}
+
+void CpuOptimiseKernel::Calculate(bool affine) {
+    optimize(this->blockMatchingParams, this->transformationMatrix, affine);
+}
diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h
new file mode 100644
index 00000000..00914971
--- /dev/null
+++ b/reg-lib/cpu/CpuOptimiseKernel.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "OptimiseKernel.h"
+#include "_reg_blockMatching.h"
+#include "nifti1_io.h"
+#include "AladinContent.h"
+
+class CpuOptimiseKernel : public OptimiseKernel {
+public:
+    CpuOptimiseKernel(AladinContent *con, std::string name);
+
+    _reg_blockMatchingParam *blockMatchingParams;
+    mat44 *transformationMatrix;
+
+    void Calculate(bool affine);
+
+};
diff --git a/reg-lib/cpu/CPUResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp
similarity index 62%
rename from reg-lib/cpu/CPUResampleImageKernel.cpp
rename to reg-lib/cpu/CpuResampleImageKernel.cpp
index 7a3635d2..60121ce5 100644
--- a/reg-lib/cpu/CPUResampleImageKernel.cpp
+++ b/reg-lib/cpu/CpuResampleImageKernel.cpp
@@ -1,14 +1,14 @@
-#include "CPUResampleImageKernel.h"
+#include "CpuResampleImageKernel.h"
 #include "_reg_resampling.h"
 
-CPUResampleImageKernel::CPUResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) {
-   floatingImage = con->getCurrentFloating();
-   warpedImage = con->getCurrentWarped();
-   deformationField = con->getCurrentDeformationField();
-   mask = con->getCurrentReferenceMask();
+CpuResampleImageKernel::CpuResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) {
+   floatingImage = con->GetCurrentFloating();
+   warpedImage = con->GetCurrentWarped();
+   deformationField = con->GetCurrentDeformationField();
+   mask = con->GetCurrentReferenceMask();
 }
 
-void CPUResampleImageKernel::calculate(int interp,
+void CpuResampleImageKernel::Calculate(int interp,
                                        float paddingValue,
                                        bool *dti_timepoint,
                                        mat33 * jacMat)
diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h
new file mode 100644
index 00000000..5e787a16
--- /dev/null
+++ b/reg-lib/cpu/CpuResampleImageKernel.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "ResampleImageKernel.h"
+#include "AladinContent.h"
+
+class CpuResampleImageKernel : public ResampleImageKernel
+{
+    public:
+        CpuResampleImageKernel(AladinContent *con, std::string name);
+
+        nifti_image *floatingImage;
+        nifti_image *warpedImage;
+        nifti_image *deformationField;
+        int *mask;
+
+        void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
+};
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 56b9183e..65ce83b9 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -26,7 +26,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
 
    int unusableBlock = 0;
    size_t index;
-   DTYPE *referenceValues = NULL;
+   DTYPE *referenceValues = nullptr;
    if (referenceImage->nz > 1) {
       referenceValues = (DTYPE *)malloc(BLOCK_3D_SIZE * sizeof(DTYPE));
    }
@@ -187,18 +187,18 @@ void initialise_block_matching_method(nifti_image * reference,
                                       int stepSize_block,
                                       int *mask,
                                       bool runningOnGPU) {
-   if (params->totalBlock != NULL) {
+   if (params->totalBlock != nullptr) {
       free(params->totalBlock);
-      params->totalBlock = NULL;
+      params->totalBlock = nullptr;
    }
 
-   if (params->referencePosition != NULL) {
+   if (params->referencePosition != nullptr) {
       free(params->referencePosition);
-      params->referencePosition = NULL;
+      params->referencePosition = nullptr;
    }
-   if (params->warpedPosition != NULL) {
+   if (params->warpedPosition != nullptr) {
       free(params->warpedPosition);
-      params->warpedPosition = NULL;
+      params->warpedPosition = nullptr;
    }
 
    params->voxelCaptureRange = 3;
@@ -716,24 +716,24 @@ void optimize(_reg_blockMatchingParam *params,
    //    mat44 inverseMatrix = nifti_mat44_inverse(*transformation_matrix);
    if (params->blockNumber[2] == 1)  // 2D images
    {
-      //First let's check if we have enough correpondance points to estimate a transfomation
+      //First let's check if we have enough correspondence points to estimate a transformation
       if(affine) {
-         //3 = minimum number of corespondances needed
+         //3 = minimum number of correspondences needed
          if(params->definedActiveBlockNumber < 6)
          {
             char text[255];
-            sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber);
+            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
             reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transfomation");
+            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation");
             reg_exit();
          }
       } else {
          if(params->definedActiveBlockNumber < 4)
          {
             char text[255];
-            sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber);
+            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
             reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transfomation");
+            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
             reg_exit();
          }
       }
@@ -748,7 +748,7 @@ void optimize(_reg_blockMatchingParam *params,
          in[0] = params->warpedPosition[index];
          in[1] = params->warpedPosition[index + 1];
          //Can have undefined = NaN in the warped image now -
-         //to not loose the correspondance - so check that:
+         //to not loose the correspondence - so check that:
          if(in[0] == in[0]){
             reg_mat33_mul(transformation_matrix, in, out);
 
@@ -766,24 +766,24 @@ void optimize(_reg_blockMatchingParam *params,
    }
    else  // 3D images
    {
-      //First let's check if we have enough correpondance points to estimate a transfomation
+      //First let's check if we have enough correspondence points to estimate a transformation
       if(affine) {
-         //4 = minimum number of corespondances needed
+         //4 = minimum number of correspondences needed
          if(params->definedActiveBlockNumber < 8)
          {
             char text[255];
-            sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber);
+            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
             reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondances were found - it is impossible to estimate an affine tranfomation");
+            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation");
             reg_exit();
          }
       } else {
          if(params->definedActiveBlockNumber < 4)
          {
             char text[255];
-            sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber);
+            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
             reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondances were found - it is impossible to estimate a rigid tranfomation");
+            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
             reg_exit();
          }
       }
@@ -799,7 +799,7 @@ void optimize(_reg_blockMatchingParam *params,
          in[1] = params->warpedPosition[index + 1];
          in[2] = params->warpedPosition[index + 2];
          //Can have undefined = NaN in the warped image now -
-         //to not loose the correspondance - so check that:
+         //to not loose the correspondence - so check that:
          if(in[0] == in[0]){
             reg_mat44_mul(transformation_matrix, in, out);
 
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index 2c8fbd6b..483554d2 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef __REG_BLOCKMATCHING_H__
-#define __REG_BLOCKMATCHING_H__
+#pragma once
 
 #include "_reg_maths.h"
 #include <vector>
@@ -123,4 +122,3 @@ void block_matching_method(nifti_image * referenceImage,
 void optimize(_reg_blockMatchingParam *params,
               mat44 * transformation_matrix,
               bool affine = true);
-#endif
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 25e20257..ef2c121d 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -100,34 +100,34 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure,
 /*****************************************************/
 reg_discrete_init::~reg_discrete_init()
 {
-   if(this->discretised_measures!=NULL)
+   if(this->discretised_measures!=nullptr)
       free(this->discretised_measures);
-   this->discretised_measures=NULL;
+   this->discretised_measures=nullptr;
 
-   if(this->regularised_measures!=NULL)
+   if(this->regularised_measures!=nullptr)
       free(this->regularised_measures);
-   this->regularised_measures=NULL;
+   this->regularised_measures=nullptr;
 
-   if(this->l2_penalisation!=NULL)
+   if(this->l2_penalisation!=nullptr)
       free(this->l2_penalisation);
-   this->l2_penalisation=NULL;
+   this->l2_penalisation=nullptr;
 
-   if(this->optimal_label_index!=NULL)
+   if(this->optimal_label_index!=nullptr)
       free(this->optimal_label_index);
-   this->optimal_label_index=NULL;
+   this->optimal_label_index=nullptr;
 
    for(int i=0; i<this->image_dim; ++i){
-      if(this->discrete_values_mm[i]!=NULL)
+      if(this->discrete_values_mm[i]!=nullptr)
          free(this->discrete_values_mm[i]);
-      this->discrete_values_mm[i]=NULL;
+      this->discrete_values_mm[i]=nullptr;
    }
-   if(this->discrete_values_mm!=NULL)
+   if(this->discrete_values_mm!=nullptr)
       free(this->discrete_values_mm);
-   this->discrete_values_mm=NULL;
+   this->discrete_values_mm=nullptr;
 
-   if(this->input_transformation!=NULL)
+   if(this->input_transformation!=nullptr)
       nifti_image_free(this->input_transformation);
-   this->input_transformation=NULL;
+   this->input_transformation=nullptr;
 }
 /*****************************************************/
 /*****************************************************/
@@ -143,7 +143,7 @@ void reg_discrete_init::GetDiscretisedMeasure()
 }
 /*****************************************************/
 /*****************************************************/
-void reg_discrete_init::getOptimalLabel()
+void reg_discrete_init::GetOptimalLabel()
 {
    this->regularisation_convergence=0;
    size_t opt_label = 0;
@@ -393,13 +393,13 @@ void reg_discrete_init::Run()
           this->discretised_measures,
           this->label_nD_num*this->node_number*sizeof(float));
    // Extract the best label
-   this->getOptimalLabel();
+   this->GetOptimalLabel();
    // Update the control point positions
    this->UpdateTransformation();
    // Run the regularisation optimisation
    for(int i=0; i< this->reg_max_it; ++i){
       this->GetRegularisedMeasure();
-      this->getOptimalLabel();
+      this->GetOptimalLabel();
       this->UpdateTransformation();
       sprintf(text, "Regularisation %i/%i - BE=%.2f - [%2.2f%%]",
              i+1, this->reg_max_it,
diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h
index 53083400..553f6b3d 100644
--- a/reg-lib/cpu/_reg_discrete_init.h
+++ b/reg-lib/cpu/_reg_discrete_init.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _reg_discrete_init_H
-#define _reg_discrete_init_H
+#pragma once
 
 #include "_reg_measure.h"
 #include "_reg_optimiser.h"
@@ -47,7 +46,7 @@ class reg_discrete_init
    void GetDiscretisedMeasure();
    void AddL2Penalisation(float);
    void GetRegularisedMeasure();
-   void getOptimalLabel();
+   void GetOptimalLabel();
    void UpdateTransformation();
 
    reg_measure *measure; ///< Measure of similarity object to use for the data term
@@ -76,4 +75,3 @@ class reg_discrete_init
    float* l2_penalisation;
 };
 /********************************************************************************************************/
-#endif // _reg_discrete_init_H
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 6976c957..5738783c 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_DTI_H
-#define _REG_DTI_H
+#pragma once
 
 //#include "_reg_measure.h"
 #include "_reg_ssd.h" // HERE
@@ -33,11 +32,11 @@ class reg_dti : public reg_measure
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          nifti_image *forwardLocalWeightPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
 //    /// @brief Returns the value
    virtual double GetSimilarityMeasureValue();
 //    /// @brief Compute the voxel based gradient for DTI images
@@ -55,7 +54,7 @@ class reg_dti : public reg_measure
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors
  */
 extern "C++" template <class DTYPE>
@@ -74,7 +73,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
  * @param maxSD Input scalar that contain the difference value between
  * the highest and the lowest intensity.
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
@@ -83,4 +82,3 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *dtiMeasureGradientImage,
       int *mask,
       unsigned int * dtIndicies);
-#endif
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index 01ac7482..a6367ed6 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_FEMTRANS_CPP
-#define _REG_FEMTRANS_CPP
-
 #include "_reg_femTrans.h"
 
 float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4)
@@ -257,5 +254,3 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
 
    return;
 }// reg_fem_voxelToNodeGradient
-
-#endif
diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h
index 8293de69..8ea483cb 100644
--- a/reg-lib/cpu/_reg_femTrans.h
+++ b/reg-lib/cpu/_reg_femTrans.h
@@ -13,8 +13,7 @@
  *
  */
 
-#ifndef _REG_FEMTRANS_H
-#define _REG_FEMTRANS_H
+#pragma once
 
 #include "nifti1_io.h"
 #include <fstream>
@@ -71,4 +70,3 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
                                  float *femInterpolationWeight,
                                  unsigned int nodeNumber,
                                  float *femBasedGradient);
-#endif
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 23262681..1be923f0 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_AFFINETRANS_CPP
-#define _REG_AFFINETRANS_CPP
-
 #include "_reg_globalTrans.h"
 #include "_reg_maths.h"
 #include "_reg_maths_eigen.h"
@@ -154,7 +151,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
                                     int *mask)
 {
    int *tempMask=mask;
-   if(mask==NULL)
+   if(mask==nullptr)
    {
       tempMask=(int *)calloc(deformationField->nx*
                              deformationField->ny*
@@ -193,7 +190,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
          reg_exit();
       }
    }
-   if(mask==NULL)
+   if(mask==nullptr)
       free(tempMask);
 }
 /* *************************************************************** */
@@ -844,4 +841,3 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
    delete [] newWarpedPosition;
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index 40e64d3e..9d17b595 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_AFFINETRANS_H
-#define _REG_AFFINETRANS_H
+#pragma once
 
 #include "nifti1_io.h"
 #include "_reg_tools.h"
@@ -80,7 +79,7 @@ extern "C++"
 void reg_affine_getDeformationField(mat44 *affine,
                                     nifti_image *deformationField,
                                     bool compose=false,
-                                    int *mask = NULL);
+                                    int *mask = nullptr);
 /* *************************************************************** */
 void optimize_2D(float* referencePosition, float* warpedPosition,
     unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol,
@@ -98,4 +97,3 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points,
 /* *************************************************************** */
 void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44* transformation);
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index d98a2ab1..4acb641e 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -98,17 +98,17 @@ double reg_getKLDivergence(nifti_image *referenceImage,
 
    DTYPE *refPtr=static_cast<DTYPE *>(referenceImage->data);
    DTYPE *warPtr=static_cast<DTYPE *>(warpedImage->data);
-   int *maskPtr=NULL;
+   int *maskPtr=nullptr;
    bool MrClean=false;
-   if(mask==NULL)
+   if(mask==nullptr)
    {
       maskPtr=(int *)calloc(voxelNumber,sizeof(int));
       MrClean=true;
    }
    else maskPtr = &mask[0];
 
-   DTYPE *jacPtr=NULL;
-   if(jacobianDetImg!=NULL)
+   DTYPE *jacPtr=nullptr;
+   if(jacobianDetImg!=nullptr)
       jacPtr=static_cast<DTYPE *>(jacobianDetImg->data);
    double measure = 0., measure_tp = 0., num = 0., tempRefValue, tempWarValue, tempValue;
 
@@ -136,7 +136,7 @@ double reg_getKLDivergence(nifti_image *referenceImage,
                if(tempValue==tempValue &&
                      tempValue!=std::numeric_limits<double>::infinity())
                {
-                  if(jacobianDetImg==NULL)
+                  if(jacobianDetImg==nullptr)
                   {
                      measure_tp -= tempValue;
                      num++;
@@ -177,7 +177,7 @@ double reg_kld::GetSimilarityMeasureValue()
             (this->referenceImagePointer,
              this->warpedFloatingImagePointer,
              this->timePointWeight,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer
              );
       break;
@@ -186,7 +186,7 @@ double reg_kld::GetSimilarityMeasureValue()
             (this->referenceImagePointer,
              this->warpedFloatingImagePointer,
              this->timePointWeight,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer
              );
       break;
@@ -213,7 +213,7 @@ double reg_kld::GetSimilarityMeasureValue()
                (this->floatingImagePointer,
                 this->warpedReferenceImagePointer,
                 this->timePointWeight,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer
                 );
          break;
@@ -222,7 +222,7 @@ double reg_kld::GetSimilarityMeasureValue()
                (this->floatingImagePointer,
                 this->warpedReferenceImagePointer,
                 this->timePointWeight,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer
                 );
          break;
@@ -258,31 +258,31 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
    DTYPE *warImagePtr=static_cast<DTYPE *>(warpedImage->data);
    DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber];
    DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber];
-   int *maskPtr=NULL;
+   int *maskPtr=nullptr;
    bool MrClean=false;
-   if(mask==NULL)
+   if(mask==nullptr)
    {
       maskPtr=(int *)calloc(voxelNumber,sizeof(int));
       MrClean=true;
    }
    else maskPtr = &mask[0];
 
-   DTYPE *jacPtr=NULL;
-   if(jacobianDetImg!=NULL)
+   DTYPE *jacPtr=nullptr;
+   if(jacobianDetImg!=nullptr)
       jacPtr=static_cast<DTYPE *>(jacobianDetImg->data);
    double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue;
 
    // Create pointers to the spatial gradient of the current warped volume
    DTYPE *currentGradPtrX=static_cast<DTYPE *>(warpedImageGradient->data);
    DTYPE *currentGradPtrY=&currentGradPtrX[voxelNumber];
-   DTYPE *currentGradPtrZ=NULL;
+   DTYPE *currentGradPtrZ=nullptr;
    if(referenceImage->nz>1)
       currentGradPtrZ=&currentGradPtrY[voxelNumber];
 
    // Create pointers to the kld gradient image
    DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradient->data);
    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = NULL;
+   DTYPE *measureGradPtrZ = nullptr;
    if(referenceImage->nz>1)
       measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
@@ -325,7 +325,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
             tempValue *= adjusted_weight;
 
             // Jacobian modulation if the Jacobian determinant image is defined
-            if(jacobianDetImg!=NULL)
+            if(jacobianDetImg!=nullptr)
                tempValue *= jacPtr[voxel];
 
             // Ensure that gradient of the warpedImage image along x-axis is not NaN
@@ -386,7 +386,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
              this->warpedFloatingImagePointer,
              this->warpedFloatingGradientImagePointer,
              this->forwardVoxelBasedGradientImagePointer,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
              current_timepoint,
           this->timePointWeight[current_timepoint]
@@ -398,7 +398,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
              this->warpedFloatingImagePointer,
              this->warpedFloatingGradientImagePointer,
              this->forwardVoxelBasedGradientImagePointer,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
           current_timepoint,
           this->timePointWeight[current_timepoint]
@@ -431,7 +431,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedReferenceImagePointer,
                 this->warpedReferenceGradientImagePointer,
                 this->backwardVoxelBasedGradientImagePointer,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
             current_timepoint,
             this->timePointWeight[current_timepoint]
@@ -443,7 +443,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedReferenceImagePointer,
                 this->warpedReferenceGradientImagePointer,
                 this->backwardVoxelBasedGradientImagePointer,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
             current_timepoint,
             this->timePointWeight[current_timepoint]
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index 44d78d0d..40094be3 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_KLDIV_H
-#define _REG_KLDIV_H
+#pragma once
 
 #include "_reg_measure.h"
 
@@ -28,11 +27,11 @@ class reg_kld : public reg_measure
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          nifti_image *forwardLocalWeightPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
    /// @brief Returns the kld value
    virtual double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based kld gradient
@@ -49,9 +48,9 @@ class reg_kld : public reg_measure
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the KLD. The argument is ignored if the
- * pointer is set to NULL
+ * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed sum squared difference
  */
 extern "C++" template <class DTYPE>
@@ -72,9 +71,9 @@ double reg_getKLDivergence(nifti_image *reference,
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the KLD. The argument is ignored if the
- * pointer is set to NULL
+ * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DTYPE>
 void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
@@ -86,5 +85,3 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
                                            int current_timepoint,
                                  double timepoint_weight);
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index ab6dd3af..ca2a897b 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -10,9 +10,6 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_LNCC_CPP
-#define _REG_LNCC_CPP
-
 #include "_reg_lncc.h"
 
 /* *************************************************************** */
@@ -20,19 +17,19 @@
 reg_lncc::reg_lncc()
    : reg_measure()
 {
-   this->forwardCorrelationImage=NULL;
-   this->referenceMeanImage=NULL;
-   this->referenceSdevImage=NULL;
-   this->warpedFloatingMeanImage=NULL;
-   this->warpedFloatingSdevImage=NULL;
-   this->forwardMask = NULL;
-
-   this->backwardCorrelationImage=NULL;
-   this->floatingMeanImage=NULL;
-   this->floatingSdevImage=NULL;
-   this->warpedReferenceMeanImage=NULL;
-   this->warpedReferenceSdevImage=NULL;
-   this->backwardMask = NULL;
+   this->forwardCorrelationImage=nullptr;
+   this->referenceMeanImage=nullptr;
+   this->referenceSdevImage=nullptr;
+   this->warpedFloatingMeanImage=nullptr;
+   this->warpedFloatingSdevImage=nullptr;
+   this->forwardMask = nullptr;
+
+   this->backwardCorrelationImage=nullptr;
+   this->floatingMeanImage=nullptr;
+   this->floatingSdevImage=nullptr;
+   this->warpedReferenceMeanImage=nullptr;
+   this->warpedReferenceSdevImage=nullptr;
+   this->backwardMask = nullptr;
 
    // Gaussian kernel is used by default
    this->kernelType=GAUSSIAN_KERNEL;
@@ -47,43 +44,43 @@ reg_lncc::reg_lncc()
 /* *************************************************************** */
 reg_lncc::~reg_lncc()
 {
-   if(this->forwardCorrelationImage!=NULL)
+   if(this->forwardCorrelationImage!=nullptr)
       nifti_image_free(this->forwardCorrelationImage);
-   this->forwardCorrelationImage=NULL;
-   if(this->referenceMeanImage!=NULL)
+   this->forwardCorrelationImage=nullptr;
+   if(this->referenceMeanImage!=nullptr)
       nifti_image_free(this->referenceMeanImage);
-   this->referenceMeanImage=NULL;
-   if(this->referenceSdevImage!=NULL)
+   this->referenceMeanImage=nullptr;
+   if(this->referenceSdevImage!=nullptr)
       nifti_image_free(this->referenceSdevImage);
-   this->referenceSdevImage=NULL;
-   if(this->warpedFloatingMeanImage!=NULL)
+   this->referenceSdevImage=nullptr;
+   if(this->warpedFloatingMeanImage!=nullptr)
       nifti_image_free(this->warpedFloatingMeanImage);
-   this->warpedFloatingMeanImage=NULL;
-   if(this->warpedFloatingSdevImage!=NULL)
+   this->warpedFloatingMeanImage=nullptr;
+   if(this->warpedFloatingSdevImage!=nullptr)
       nifti_image_free(this->warpedFloatingSdevImage);
-   this->warpedFloatingSdevImage=NULL;
-   if(this->forwardMask!=NULL)
+   this->warpedFloatingSdevImage=nullptr;
+   if(this->forwardMask!=nullptr)
       free(this->forwardMask);
-   this->forwardMask=NULL;
+   this->forwardMask=nullptr;
 
-   if(this->backwardCorrelationImage!=NULL)
+   if(this->backwardCorrelationImage!=nullptr)
       nifti_image_free(this->backwardCorrelationImage);
-   this->backwardCorrelationImage=NULL;
-   if(this->floatingMeanImage!=NULL)
+   this->backwardCorrelationImage=nullptr;
+   if(this->floatingMeanImage!=nullptr)
       nifti_image_free(this->floatingMeanImage);
-   this->floatingMeanImage=NULL;
-   if(this->floatingSdevImage!=NULL)
+   this->floatingMeanImage=nullptr;
+   if(this->floatingSdevImage!=nullptr)
       nifti_image_free(this->floatingSdevImage);
-   this->floatingSdevImage=NULL;
-   if(this->warpedReferenceMeanImage!=NULL)
+   this->floatingSdevImage=nullptr;
+   if(this->warpedReferenceMeanImage!=nullptr)
       nifti_image_free(this->warpedReferenceMeanImage);
-   this->warpedReferenceMeanImage=NULL;
-   if(this->warpedReferenceSdevImage!=NULL)
+   this->warpedReferenceMeanImage=nullptr;
+   if(this->warpedReferenceSdevImage!=nullptr)
       nifti_image_free(this->warpedReferenceSdevImage);
-   this->warpedReferenceSdevImage=NULL;
-   if(this->backwardMask!=NULL)
+   this->warpedReferenceSdevImage=nullptr;
+   if(this->backwardMask!=nullptr)
       free(this->backwardMask);
-   this->backwardMask=NULL;
+   this->backwardMask=nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -194,42 +191,42 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
    }
 
    // Check that no images are already allocated
-   if(this->forwardCorrelationImage!=NULL)
+   if(this->forwardCorrelationImage!=nullptr)
       nifti_image_free(this->forwardCorrelationImage);
-   this->forwardCorrelationImage=NULL;
-   if(this->referenceMeanImage!=NULL)
+   this->forwardCorrelationImage=nullptr;
+   if(this->referenceMeanImage!=nullptr)
       nifti_image_free(this->referenceMeanImage);
-   this->referenceMeanImage=NULL;
-   if(this->referenceSdevImage!=NULL)
+   this->referenceMeanImage=nullptr;
+   if(this->referenceSdevImage!=nullptr)
       nifti_image_free(this->referenceSdevImage);
-   this->referenceSdevImage=NULL;
-   if(this->warpedFloatingMeanImage!=NULL)
+   this->referenceSdevImage=nullptr;
+   if(this->warpedFloatingMeanImage!=nullptr)
       nifti_image_free(this->warpedFloatingMeanImage);
-   this->warpedFloatingMeanImage=NULL;
-   if(this->warpedFloatingSdevImage!=NULL)
+   this->warpedFloatingMeanImage=nullptr;
+   if(this->warpedFloatingSdevImage!=nullptr)
       nifti_image_free(this->warpedFloatingSdevImage);
-   this->warpedFloatingSdevImage=NULL;
-   if(this->backwardCorrelationImage!=NULL)
+   this->warpedFloatingSdevImage=nullptr;
+   if(this->backwardCorrelationImage!=nullptr)
       nifti_image_free(this->backwardCorrelationImage);
-   this->backwardCorrelationImage=NULL;
-   if(this->floatingMeanImage!=NULL)
+   this->backwardCorrelationImage=nullptr;
+   if(this->floatingMeanImage!=nullptr)
       nifti_image_free(this->floatingMeanImage);
-   this->floatingMeanImage=NULL;
-   if(this->floatingSdevImage!=NULL)
+   this->floatingMeanImage=nullptr;
+   if(this->floatingSdevImage!=nullptr)
       nifti_image_free(this->floatingSdevImage);
-   this->floatingSdevImage=NULL;
-   if(this->warpedReferenceMeanImage!=NULL)
+   this->floatingSdevImage=nullptr;
+   if(this->warpedReferenceMeanImage!=nullptr)
       nifti_image_free(this->warpedReferenceMeanImage);
-   this->warpedReferenceMeanImage=NULL;
-   if(this->warpedReferenceSdevImage!=NULL)
+   this->warpedReferenceMeanImage=nullptr;
+   if(this->warpedReferenceSdevImage!=nullptr)
       nifti_image_free(this->warpedReferenceSdevImage);
-   this->warpedReferenceSdevImage=NULL;
-   if(this->forwardMask!=NULL)
+   this->warpedReferenceSdevImage=nullptr;
+   if(this->forwardMask!=nullptr)
       free(this->forwardMask);
-   this->forwardMask=NULL;
-   if(this->backwardMask!=NULL)
+   this->forwardMask=nullptr;
+   if(this->backwardMask!=nullptr)
       free(this->backwardMask);
-   this->backwardMask=NULL;
+   this->backwardMask=nullptr;
 
    //
    size_t voxelNumber = (size_t)this->referenceImagePointer->nx *
@@ -622,14 +619,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
    reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
    DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = NULL;
+   DTYPE *measureGradPtrZ = nullptr;
    if(referenceImage->nz>1)
       measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
    // Create pointers to the spatial gradient of the warped image
    DTYPE *warpGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
    DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber];
-   DTYPE *warpGradPtrZ = NULL;
+   DTYPE *warpGradPtrZ = nullptr;
    if(referenceImage->nz>1)
       warpGradPtrZ=&warpGradPtrY[voxelNumber];
 
@@ -653,7 +650,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
        common *= adjusted_weight;
          measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common;
          measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common;
-         if(warpGradPtrZ!=NULL)
+         if(warpGradPtrZ!=nullptr)
             measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common;
       }
    }
@@ -817,5 +814,3 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-#endif
-
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index b1694117..3de0713a 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -10,8 +10,7 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_LNCC_H
-#define _REG_LNCC_H
+#pragma once
 
 #include "_reg_measure.h"
 
@@ -31,11 +30,11 @@ class reg_lncc : public reg_measure
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          nifti_image *forwardLocalWeightPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
    /// @brief Returns the lncc value
    double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based lncc gradient
@@ -87,7 +86,7 @@ class reg_lncc : public reg_measure
  * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel
  * to use.
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed LNCC
  */
 extern "C++" template<class DTYPE>
@@ -112,7 +111,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
  *  @param gaussianStandardDeviation Standard deviation of the Gaussian kernel
  *  to use.
  *  @param mask Array that contains a mask to specify which voxel
- *  should be considered. If set to NULL, all voxels are considered
+ *  should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
@@ -129,5 +128,3 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    int kernelType,
                                    int current_timepoint,
                            double timepoint_weight);
-#endif
-
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index e8ffc713..755f6893 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -151,12 +151,12 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
                                           float *spacing)
 {
    // Delete the grid if they are already initialised
-   if(*forwardGridImage!=NULL)
+   if(*forwardGridImage!=nullptr)
       nifti_image_free(*forwardGridImage);
-   *forwardGridImage=NULL;
-   if(*backwardGridImage!=NULL)
+   *forwardGridImage=nullptr;
+   if(*backwardGridImage!=nullptr)
       nifti_image_free(*backwardGridImage);
-   *backwardGridImage=NULL;
+   *backwardGridImage=nullptr;
    // We specified a space which is in-between both input images
    // // Get the reference image space
    mat44 referenceImageSpace = referenceImage->qto_xyz;
@@ -174,7 +174,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
 #endif
    // Check if an affine transformation is specified
    mat44 halfForwardAffine, halfBackwardAffine;
-   if(forwardAffineTrans!=NULL)
+   if(forwardAffineTrans!=nullptr)
    {
       // Compute half of the affine transformation - ref to flo
       halfForwardAffine = reg_mat44_logm(forwardAffineTrans);
@@ -371,9 +371,9 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
    // Set the affine matrices
    mat44 identity;
    reg_mat44_eye(&identity);
-   if((*forwardGridImage)->ext_list!=NULL)
+   if((*forwardGridImage)->ext_list!=nullptr)
       free((*forwardGridImage)->ext_list);
-   if((*backwardGridImage)->ext_list!=NULL)
+   if((*backwardGridImage)->ext_list!=nullptr)
       free((*backwardGridImage)->ext_list);
    (*forwardGridImage)->num_ext=0;
    (*backwardGridImage)->num_ext=0;
@@ -1642,7 +1642,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
 #endif
 
    bool MrPropre=false;
-   if(mask==NULL)
+   if(mask==nullptr)
    {
       // Active voxel are all superior to -1, 0 thus will do !
       MrPropre=true;
@@ -1652,7 +1652,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
    // Check if an affine initialisation is required
    if(splineControlPoint->num_ext>0)
    {
-      if(splineControlPoint->ext_list[0].edata!=NULL)
+      if(splineControlPoint->ext_list[0].edata!=nullptr)
       {
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(splineControlPoint->ext_list[0].edata),
                deformationField,
@@ -1723,7 +1723,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
 
    if(splineControlPoint->num_ext>1)
    {
-      if(splineControlPoint->ext_list[1].edata!=NULL)
+      if(splineControlPoint->ext_list[1].edata!=nullptr)
       {
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(splineControlPoint->ext_list[1].edata),
                deformationField,
@@ -1734,7 +1734,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
    if(MrPropre==true)
    {
       free(mask);
-      mask=NULL;
+      mask=nullptr;
    }
 
    return;
@@ -1753,11 +1753,11 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
    size_t voxelNumber = (size_t)voxelImage->nx*voxelImage->ny*voxelImage->nz;
    DTYPE *nodePtrX = static_cast<DTYPE *>(nodeImage->data);
    DTYPE *nodePtrY = &nodePtrX[nodeNumber];
-   DTYPE *nodePtrZ = NULL;
+   DTYPE *nodePtrZ = nullptr;
 
    DTYPE *voxelPtrX = static_cast<DTYPE *>(voxelImage->data);
    DTYPE *voxelPtrY = &voxelPtrX[voxelNumber];
-   DTYPE *voxelPtrZ = NULL;
+   DTYPE *voxelPtrZ = nullptr;
 
    if(nodeImage->nz>1)
    {
@@ -1774,7 +1774,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
    // Affine transformation between the grid and the reference image
    if(nodeImage->num_ext>0)
    {
-      if(nodeImage->ext_list[0].edata!=NULL)
+      if(nodeImage->ext_list[0].edata!=nullptr)
       {
          mat44 temp=*(reinterpret_cast<mat44 *>(nodeImage->ext_list[0].edata));
          temp=nifti_mat44_inverse(temp);
@@ -1790,12 +1790,12 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
    mat33 reorientation;
    // Voxel to millimeter contains the orientation of the image that is used
    // to compute the spatial gradient (floating image)
-   if(voxelToMillimeter!=NULL)
+   if(voxelToMillimeter!=nullptr)
    {
       reorientation=reg_mat44_to_mat33(voxelToMillimeter);
       if(nodeImage->num_ext>0)
       {
-         if(nodeImage->ext_list[0].edata!=NULL)
+         if(nodeImage->ext_list[0].edata!=nullptr)
          {
             mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(nodeImage->ext_list[0].edata));
             temp=nifti_mat33_inverse(temp);
@@ -1843,7 +1843,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
             basisX[0]=static_cast<DTYPE>(1) - basisX[1];
             basisY[1]=voxelCoord[1]-static_cast<DTYPE>(pre[1]);
             basisY[0]=static_cast<DTYPE>(1) - basisY[1];
-            if(voxelPtrZ!=NULL)
+            if(voxelPtrZ!=nullptr)
             {
                basisZ[1]=voxelCoord[2]-static_cast<DTYPE>(pre[2]);
                basisZ[0]=static_cast<DTYPE>(1) - basisZ[1];
@@ -1867,10 +1867,10 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                               size_t index=(indexZ*voxelImage->ny+indexY) *
                                     voxelImage->nx+indexX;
                               DTYPE linearWeight = basisX[a] * basisY[b];
-                              if(voxelPtrZ!=NULL) linearWeight *= basisZ[c];
+                              if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c];
                               interpolatedValue[0] += linearWeight * voxelPtrX[index];
                               interpolatedValue[1] += linearWeight * voxelPtrY[index];
-                              if(voxelPtrZ!=NULL)
+                              if(voxelPtrZ!=nullptr)
                                  interpolatedValue[2] += linearWeight * voxelPtrZ[index];
                            }
                         }
@@ -1887,7 +1887,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                   reorientation.m[0][1] * interpolatedValue[0] +
                   reorientation.m[1][1] * interpolatedValue[1] +
                   reorientation.m[2][1] * interpolatedValue[2] ;
-            if(voxelPtrZ!=NULL)
+            if(voxelPtrZ!=nullptr)
                reorientedValue[2] =
                      reorientation.m[0][2] * interpolatedValue[0] +
                      reorientation.m[1][2] * interpolatedValue[1] +
@@ -1896,19 +1896,19 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
             {
                *nodePtrX += reorientedValue[0]*static_cast<DTYPE>(weight);
                *nodePtrY += reorientedValue[1]*static_cast<DTYPE>(weight);
-               if(voxelPtrZ!=NULL)
+               if(voxelPtrZ!=nullptr)
                   *nodePtrZ += reorientedValue[2]*static_cast<DTYPE>(weight);
             }
             else
             {
                *nodePtrX = reorientedValue[0]*static_cast<DTYPE>(weight);
                *nodePtrY = reorientedValue[1]*static_cast<DTYPE>(weight);
-               if(voxelPtrZ!=NULL)
+               if(voxelPtrZ!=nullptr)
                   *nodePtrZ = reorientedValue[2]*static_cast<DTYPE>(weight);
             }
             ++nodePtrX;
             ++nodePtrY;
-            if(voxelPtrZ!=NULL)
+            if(voxelPtrZ!=nullptr)
                ++nodePtrZ;
          } // loop over
       } // loop over y
@@ -1972,7 +1972,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
    SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper);
    SplineTYPE *gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper);
-   if(splineControlPoint->data!=NULL) free(splineControlPoint->data);
+   if(splineControlPoint->data!=nullptr) free(splineControlPoint->data);
    int oldDim[4];
    oldDim[0]=splineControlPoint->dim[0];
    oldDim[1]=splineControlPoint->dim[1];
@@ -1982,7 +1982,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
    splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
    splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
    splineControlPoint->dz = 1.0f;
-   if(referenceImage!=NULL)
+   if(referenceImage!=nullptr)
    {
       splineControlPoint->dim[1]=splineControlPoint->nx=static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f);
       splineControlPoint->dim[2]=splineControlPoint->ny=static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f);
@@ -2080,7 +2080,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
    SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper);
    SplineTYPE *gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper);
-   if(splineControlPoint->data!=NULL) free(splineControlPoint->data);
+   if(splineControlPoint->data!=nullptr) free(splineControlPoint->data);
    int oldDim[4];
    oldDim[0]=splineControlPoint->dim[0];
    oldDim[1]=splineControlPoint->dim[1];
@@ -2091,7 +2091,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
    splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
    splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f;
 
-   if(referenceImage!=NULL)
+   if(referenceImage!=nullptr)
    {
       splineControlPoint->dim[1]=splineControlPoint->nx=static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f);
       splineControlPoint->dim[2]=splineControlPoint->ny=static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f);
@@ -2447,7 +2447,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
          reg_exit();
       }
    }
-   if(referenceImage!=NULL)
+   if(referenceImage!=nullptr)
    {
       // Compute the new control point header
       // The qform (and sform) are set for the control point position image
@@ -2568,8 +2568,8 @@ void reg_defField_compose2D(nifti_image *deformationField,
    DTYPE *resPtrX = static_cast<DTYPE *>(dfToUpdate->data);
    DTYPE *resPtrY = &resPtrX[warVoxelNumber];
 
-   mat44 *df_real2Voxel=NULL;
-   mat44 *df_voxel2Real=NULL;
+   mat44 *df_real2Voxel=nullptr;
+   mat44 *df_voxel2Real=nullptr;
    if(deformationField->sform_code>0)
    {
       df_real2Voxel=&(dfToUpdate->sto_ijk);
@@ -2682,7 +2682,7 @@ void reg_defField_compose3D(nifti_image *deformationField,
 #else
    mat44 df_real2Voxel __attribute__((aligned(16)));
 #endif
-   mat44 *df_voxel2Real=NULL;
+   mat44 *df_voxel2Real=nullptr;
    if(deformationField->sform_code>0)
    {
       df_real2Voxel=deformationField->sto_ijk;
@@ -2809,7 +2809,7 @@ void reg_defField_compose(nifti_image *deformationField,
    }
 
    bool freeMask=false;
-   if(mask==NULL)
+   if(mask==nullptr)
    {
       mask=(int *)calloc(dfToUpdate->nx*
                          dfToUpdate->ny*
@@ -3517,8 +3517,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
    size_t coord;
 
    // read the xyz/ijk sform or qform, as appropriate
-   mat44 *matrix_real_to_voxel1=NULL;
-   mat44 *matrix_voxel_to_real2=NULL;
+   mat44 *matrix_real_to_voxel1=nullptr;
+   mat44 *matrix_voxel_to_real2=nullptr;
    if(grid1->sform_code>0)
       matrix_real_to_voxel1=&(grid1->sto_ijk);
    else matrix_real_to_voxel1=&(grid1->qto_ijk);
@@ -3703,8 +3703,8 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
    DTYPE xVoxel, yVoxel, zVoxel;
 
    // read the xyz/ijk sform or qform, as appropriate
-   mat44 *matrix_real_to_voxel1=NULL;
-   mat44 *matrix_voxel_to_real2=NULL;
+   mat44 *matrix_real_to_voxel1=nullptr;
+   mat44 *matrix_voxel_to_real2=nullptr;
    if(grid1->sform_code>0)
       matrix_real_to_voxel1=&(grid1->sto_ijk);
    else matrix_real_to_voxel1=&(grid1->qto_ijk);
@@ -3992,7 +3992,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
    // The initial flow field is generated using cubic B-Spline interpolation/approximation
    reg_spline_getDeformationField(velocityFieldGrid,
                                   flowField,
-                                  NULL, // mask
+                                  nullptr, // mask
                                   true, //composition
                                   true // bspline
                                   );
@@ -4013,10 +4013,10 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
    }
 
    // Remove the affine component from the flow field
-   nifti_image *affineOnly=NULL;
+   nifti_image *affineOnly=nullptr;
    if(flowFieldImage->num_ext>0)
    {
-      if(flowFieldImage->ext_list[0].edata!=NULL)
+      if(flowFieldImage->ext_list[0].edata!=nullptr)
       {
          // Create a field that contains the affine component only
          affineOnly = nifti_copy_nim_info(deformationFieldImage);
@@ -4095,7 +4095,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
       // The deformation field is applied to itself
       reg_defField_compose(deformationFieldImage,
                            flowFieldImage,
-                           NULL);
+                           nullptr);
       // The computed scaled deformation field is copied over
       memcpy(deformationFieldImage->data, flowFieldImage->data,
              deformationFieldImage->nvox*deformationFieldImage->nbyper);
@@ -4106,12 +4106,12 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
 #endif
    }
    // The affine conponent of the transformation is restored
-   if(affineOnly!=NULL)
+   if(affineOnly!=nullptr)
    {
       reg_getDisplacementFromDeformation(deformationFieldImage);
       reg_tools_addImageToImage(deformationFieldImage,affineOnly,deformationFieldImage);
       nifti_image_free(affineOnly);
-      affineOnly=NULL;
+      affineOnly=nullptr;
    }
    deformationFieldImage->intent_p1=DEF_FIELD;
    deformationFieldImage->intent_p2=0;
@@ -4137,7 +4137,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
       // Use the spline approximation to generate the deformation field
       reg_spline_getDeformationField(velocityFieldGrid,
                                      deformationFieldImage,
-                                     NULL,
+                                     nullptr,
                                      false, // composition
                                      true // bspline
                                      );
@@ -4191,17 +4191,17 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
       strcpy(flowFieldImage->intent_name,"NREG_TRANS");
       flowFieldImage->intent_p1=DEF_VEL_FIELD;
       flowFieldImage->intent_p2=velocityFieldGrid->intent_p2;
-      if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==NULL)
+      if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==nullptr)
          nifti_copy_extensions(flowFieldImage, velocityFieldGrid);
 
       // Generate the velocity field
       reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid,
                                               flowFieldImage);
       // Remove the affine component from the flow field
-      nifti_image *affineOnly=NULL;
+      nifti_image *affineOnly=nullptr;
       if(flowFieldImage->num_ext>0)
       {
-         if(flowFieldImage->ext_list[0].edata!=NULL)
+         if(flowFieldImage->ext_list[0].edata!=nullptr)
          {
             // Create a field that contains the affine component only
             affineOnly = nifti_copy_nim_info(deformationFieldImage[0]);
@@ -4232,7 +4232,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
 
       // Clear the allocated flow field
       nifti_image_free(flowFieldImage);
-      flowFieldImage=NULL;
+      flowFieldImage=nullptr;
 
       // Conversion from displacement to deformation
       reg_getDeformationFromDisplacement(deformationFieldImage[0]);
@@ -4246,7 +4246,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
          // The deformation field is applied to itself
          reg_defField_compose(deformationFieldImage[i], // to apply
                               deformationFieldImage[i+1], // to update
-               NULL);
+               nullptr);
    #ifndef NDEBUG
          char text[255];
          sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber);
@@ -4254,7 +4254,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
    #endif
       }
       // The affine conponent of the transformation is restored
-      if(affineOnly!=NULL)
+      if(affineOnly!=nullptr)
       {
          for(unsigned short i=0; i<=squaringNumber; ++i){
             reg_getDisplacementFromDeformation(deformationFieldImage[i]);
@@ -4263,7 +4263,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
             deformationFieldImage[i]->intent_p2=0;
          }
          nifti_image_free(affineOnly);
-         affineOnly=NULL;
+         affineOnly=nullptr;
       }
       // If required an affine component is composed
       if(velocityFieldGrid->num_ext>1)
@@ -4648,7 +4648,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img)
             restoreLine<double>(start,end,increment,coeffPtr,values);
          }
          delete[] values;
-         values=NULL;
+         values=nullptr;
 
          // Along the Y axis
          number = img->ny;
@@ -4663,7 +4663,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img)
             restoreLine<double>(start,end,increment,coeffPtr,values);
          }
          delete[] values;
-         values=NULL;
+         values=nullptr;
 
          // Along the Z axis
          if(img->nz>1)
@@ -4680,7 +4680,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img)
                restoreLine<double>(start,end,increment,coeffPtr,values);
             }
             delete[] values;
-            values=NULL;
+            values=nullptr;
          }
       }//t
    }//u
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index 37b78ddd..14c913d7 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -14,8 +14,7 @@
  * Marcel van Herk (CMIC / NKI / AVL)
  */
 
-#ifndef _REG_TRANS_H
-#define _REG_TRANS_H
+#pragma once
 
 #include "float.h"
 #include "_reg_globalTrans.h"
@@ -65,7 +64,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
 extern "C++"
 void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
                                     nifti_image *deformationField,
-                                    int *mask = NULL,
+                                    int *mask = nullptr,
                                     bool composition = false,
                                     bool bspline = true,
                                     bool force_no_lut = false);
@@ -87,7 +86,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
                                   bool update,
-                                  mat44 *voxelToMillimeter = NULL
+                                  mat44 *voxelToMillimeter = nullptr
       );
 /* *************************************************************** */
 /** @brief Refine a grid of control points
@@ -98,7 +97,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
  */
 extern "C++"
 void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage,
-                                       nifti_image *referenceImage = NULL
+                                       nifti_image *referenceImage = nullptr
       );
 /* *************************************************************** */
 /** @brief This function compose the a first control point image with a second one:
@@ -213,4 +212,3 @@ void compute_BCH_update(nifti_image *img1,
 extern "C++"
 void reg_spline_GetDeconvolvedCoefficents(nifti_image *img);
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index d034b20d..d98f471d 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -60,14 +60,14 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                                   bool approximation,
                                   bool useHeaderInformation)
 {
-   if(JacobianMatrices==NULL && JacobianDeterminants==NULL)
+   if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
    {
       reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("Both output pointers are NULL");
+      reg_print_msg_error("Both output pointers are nullptr");
       reg_print_msg_error("Nothing to be done");
       reg_exit();
    }
-   if(referenceImage==NULL && approximation==false)
+   if(referenceImage==nullptr && approximation==false)
    {
       reg_print_fct_error("reg_spline_jacobian3D");
       reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
@@ -114,9 +114,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                jacobianMatrix.m[2][2] = (coeffPtrZ[index+splineControlPoint->nx*splineControlPoint->ny] - coeffPtrZ[index-splineControlPoint->nx*splineControlPoint->ny])/2.;
 
                jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix);
-               if(JacobianMatrices!=NULL)
+               if(JacobianMatrices!=nullptr)
                   JacobianMatrices[index]=jacobianMatrix;
-               if(JacobianDeterminants!=NULL)
+               if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[index] =
                         static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                ++index;
@@ -195,9 +195,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                   // reorient the matrix
                   jacobianMatrix=nifti_mat33_mul(reorientation,
                                                  jacobianMatrix);
-                  if(JacobianMatrices!=NULL)
+                  if(JacobianMatrices!=nullptr)
                      JacobianMatrices[index]=jacobianMatrix;
-                  if(JacobianDeterminants!=NULL)
+                  if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[index] =
                            static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                   ++index;
@@ -239,9 +239,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                   jacobianMatrix=nifti_mat33_mul(reorientation,
                                                  jacobianMatrix);
 
-                  if(JacobianMatrices!=NULL)
+                  if(JacobianMatrices!=nullptr)
                      JacobianMatrices[index]=jacobianMatrix;
-                  if(JacobianDeterminants!=NULL)
+                  if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[index] =
                            static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                   ++index;
@@ -262,14 +262,14 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                            bool approximation,
                            bool useHeaderInformation)
 {
-   if(JacobianMatrices==NULL && JacobianDeterminants==NULL)
+   if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
    {
       reg_print_fct_error("reg_spline_jacobian2D");
-      reg_print_msg_error("Both output pointers are NULL");
+      reg_print_msg_error("Both output pointers are nullptr");
       reg_print_msg_error("Nothing to be done");
       reg_exit();
    }
-   if(referenceImage==NULL && approximation==false)
+   if(referenceImage==nullptr && approximation==false)
    {
       reg_print_fct_error("reg_spline_jacobian2D");
       reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
@@ -345,9 +345,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                jacobianMatrix.m[1][1] += basisY[incr0]*coeffY[incr0];
             }
             jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix);
-            if(JacobianMatrices!=NULL)
+            if(JacobianMatrices!=nullptr)
                JacobianMatrices[voxelIndex]=jacobianMatrix;
-            if(JacobianDeterminants!=NULL)
+            if(JacobianDeterminants!=nullptr)
                JacobianDeterminants[voxelIndex] =
                      static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
             ++voxelIndex;
@@ -452,9 +452,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                // reorient the matrix
                jacobianMatrix=nifti_mat33_mul(reorientation,
                                               jacobianMatrix);
-               if(JacobianMatrices!=NULL)
+               if(JacobianMatrices!=nullptr)
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
-               if(JacobianDeterminants!=NULL)
+               if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
                         static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
@@ -525,9 +525,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                }
                jacobianMatrix=nifti_mat33_mul(reorientation,
                                               jacobianMatrix);
-               if(JacobianMatrices!=NULL)
+               if(JacobianMatrices!=nullptr)
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
-               if(JacobianDeterminants!=NULL)
+               if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
                         static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
@@ -546,14 +546,14 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            bool approximation,
                            bool useHeaderInformation)
 {
-   if(JacobianMatrices==NULL && JacobianDeterminants==NULL)
+   if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
    {
       reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("Both output pointers are NULL");
+      reg_print_msg_error("Both output pointers are nullptr");
       reg_print_msg_error("Nothing to be done");
       reg_exit();
    }
-   if(referenceImage==NULL && approximation==false)
+   if(referenceImage==nullptr && approximation==false)
    {
       reg_print_fct_error("reg_spline_jacobian3D");
       reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
@@ -652,9 +652,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   jacobianMatrix.m[2][2] += basisZ[incr0]*coeffZ[incr0];
                }
                jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix);
-               if(JacobianMatrices!=NULL)
+               if(JacobianMatrices!=nullptr)
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
-               if(JacobianDeterminants!=NULL)
+               if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
                         static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
@@ -991,9 +991,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   // reorient the matrix
                   jacobianMatrix=nifti_mat33_mul(reorientation,
                                                  jacobianMatrix);
-                  if(JacobianMatrices!=NULL)
+                  if(JacobianMatrices!=nullptr)
                      JacobianMatrices[voxelIndex]=jacobianMatrix;
-                  if(JacobianDeterminants!=NULL)
+                  if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[voxelIndex] =
                            static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                   ++voxelIndex;
@@ -1218,9 +1218,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 #endif
                   jacobianMatrix=nifti_mat33_mul(reorientation,
                                                  jacobianMatrix);
-                  if(JacobianMatrices!=NULL)
+                  if(JacobianMatrices!=nullptr)
                      JacobianMatrices[voxelIndex]=jacobianMatrix;
-                  if(JacobianDeterminants!=NULL)
+                  if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[voxelIndex] =
                            static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
                   ++voxelIndex;
@@ -1261,7 +1261,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       case NIFTI_TYPE_FLOAT32:
          reg_cubic_spline_jacobian2D<float>(splineControlPoint,
                                       referenceImage,
-                                      NULL,
+                                      nullptr,
                                       static_cast<float *>(JacobianDetermiantArray),
                                       approximation,
                                       useHeaderInformation);
@@ -1269,7 +1269,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       case NIFTI_TYPE_FLOAT64:
          reg_cubic_spline_jacobian2D<double>(splineControlPoint,
                                        referenceImage,
-                                       NULL,
+                                       nullptr,
                                        static_cast<double *>(JacobianDetermiantArray),
                                        approximation,
                                        useHeaderInformation);
@@ -1287,7 +1287,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       case NIFTI_TYPE_FLOAT32:
          reg_cubic_spline_jacobian3D<float>(splineControlPoint,
                                       referenceImage,
-                                      NULL,
+                                      nullptr,
                                       static_cast<float *>(JacobianDetermiantArray),
                                       approximation,
                                       useHeaderInformation);
@@ -1295,7 +1295,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       case NIFTI_TYPE_FLOAT64:
          reg_cubic_spline_jacobian3D<double>(splineControlPoint,
                                        referenceImage,
-                                       NULL,
+                                       nullptr,
                                        static_cast<double *>(JacobianDetermiantArray),
                                        approximation,
                                        useHeaderInformation);
@@ -1342,7 +1342,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
    // The allocated array is free'ed
    if(JacobianDetermiantArray)
       free(JacobianDetermiantArray);
-   JacobianDetermiantArray=NULL;
+   JacobianDetermiantArray=nullptr;
    // The penalty term value is normalised and returned
    return penaltySum/(double)detNumber;
 }
@@ -2548,7 +2548,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT32:
             reg_linear_spline_jacobian3D<float>(splineControlPoint,
                                                jacobianImage,
-                                               NULL,
+                                               nullptr,
                                                static_cast<float *>(jacobianImage->data),
                                                false,
                                                true);
@@ -2556,7 +2556,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT64:
             reg_linear_spline_jacobian3D<double>(splineControlPoint,
                                                 jacobianImage,
-                                                NULL,
+                                                nullptr,
                                                 static_cast<double *>(jacobianImage->data),
                                                 false,
                                                 true);
@@ -2577,7 +2577,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT32:
             reg_cubic_spline_jacobian2D<float>(splineControlPoint,
                                                jacobianImage,
-                                               NULL,
+                                               nullptr,
                                                static_cast<float *>(jacobianImage->data),
                                                false,
                                                true);
@@ -2585,7 +2585,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT64:
             reg_cubic_spline_jacobian2D<double>(splineControlPoint,
                                                 jacobianImage,
-                                                NULL,
+                                                nullptr,
                                                 static_cast<double *>(jacobianImage->data),
                                                 false,
                                                 true);
@@ -2603,7 +2603,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT32:
             reg_cubic_spline_jacobian3D<float>(splineControlPoint,
                                                jacobianImage,
-                                               NULL,
+                                               nullptr,
                                                static_cast<float *>(jacobianImage->data),
                                                false,
                                                true);
@@ -2611,7 +2611,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
          case NIFTI_TYPE_FLOAT64:
             reg_cubic_spline_jacobian3D<double>(splineControlPoint,
                                                 jacobianImage,
-                                                NULL,
+                                                nullptr,
                                                 static_cast<double *>(jacobianImage->data),
                                                 false,
                                                 true);
@@ -2638,7 +2638,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
          reg_cubic_spline_jacobian2D<float>(splineControlPoint,
                                       referenceImage,
                                       jacobianMatrices,
-                                      NULL,
+                                      nullptr,
                                       false,
                                       true);
          break;
@@ -2646,7 +2646,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
          reg_cubic_spline_jacobian2D<double>(splineControlPoint,
                                        referenceImage,
                                        jacobianMatrices,
-                                       NULL,
+                                       nullptr,
                                        false,
                                        true);
          break;
@@ -2664,7 +2664,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
          reg_cubic_spline_jacobian3D<float>(splineControlPoint,
                                       referenceImage,
                                       jacobianMatrices,
-                                      NULL,
+                                      nullptr,
                                       false,
                                       true);
          break;
@@ -2672,7 +2672,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
          reg_cubic_spline_jacobian3D<double>(splineControlPoint,
                                        referenceImage,
                                        jacobianMatrices,
-                                       NULL,
+                                       nullptr,
                                        false,
                                        true);
          break;
@@ -2692,8 +2692,8 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
 {
    size_t voxelNumber=deformationField->nx*deformationField->ny;
 
-   DTYPE *jacDetPtr=NULL;
-   if(jacobianDeterminant!=NULL)
+   DTYPE *jacDetPtr=nullptr;
+   if(jacobianDeterminant!=nullptr)
       jacDetPtr=static_cast<DTYPE *>(jacobianDeterminant->data);
 
    float spacing[3];
@@ -2765,9 +2765,9 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
          jacobianMatrix.m[1][1] /= spacing[1];
 
          // Update the output arrays if required
-         if(jacobianDeterminant!=NULL)
+         if(jacobianDeterminant!=nullptr)
             jacDetPtr[currentIndex] = nifti_mat33_determ(jacobianMatrix);
-         if(jacobianMatrices!=NULL)
+         if(jacobianMatrices!=nullptr)
             jacobianMatrices[currentIndex]=jacobianMatrix;
          // Increment the pointer
          currentIndex++;
@@ -2785,9 +2785,9 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
          if(y==deformationField->ny-1) index -= deformationField->nx;
          if(currentIndex!=index)
          {
-            if(jacobianDeterminant!=NULL)
+            if(jacobianDeterminant!=nullptr)
                jacDetPtr[currentIndex] = jacDetPtr[index];
-            if(jacobianMatrices!=NULL)
+            if(jacobianMatrices!=nullptr)
                jacobianMatrices[currentIndex] = jacobianMatrices[index];
          }
          ++currentIndex;
@@ -2802,8 +2802,8 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
 {
    size_t voxelNumber=deformationField->nx*deformationField->ny*deformationField->nz;
 
-   DTYPE *jacDetPtr=NULL;
-   if(jacobianDeterminant!=NULL)
+   DTYPE *jacDetPtr=nullptr;
+   if(jacobianDeterminant!=nullptr)
       jacDetPtr=static_cast<DTYPE *>(jacobianDeterminant->data);
 
    float spacing[3];
@@ -2896,9 +2896,9 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
             jacobianMatrix.m[2][2] /= spacing[2];
 
             // Update the output arrays if required
-            if(jacobianDeterminant!=NULL)
+            if(jacobianDeterminant!=nullptr)
                jacDetPtr[currentIndex] = nifti_mat33_determ(jacobianMatrix);
-            if(jacobianMatrices!=NULL)
+            if(jacobianMatrices!=nullptr)
                jacobianMatrices[currentIndex]=jacobianMatrix;
             // Increment the pointer
             currentIndex++;
@@ -2919,9 +2919,9 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
             if(z==deformationField->nz-1) index -= deformationField->nx*deformationField->ny;
             if(currentIndex!=index)
             {
-               if(jacobianDeterminant!=NULL)
+               if(jacobianDeterminant!=nullptr)
                   jacDetPtr[currentIndex] = jacDetPtr[index];
-               if(jacobianMatrices!=NULL)
+               if(jacobianMatrices!=nullptr)
                   jacobianMatrices[currentIndex] = jacobianMatrices[index];
             }
             ++currentIndex;
@@ -2943,13 +2943,13 @@ void reg_defField_getJacobianMap(nifti_image *deformationField,
    {
    case NIFTI_TYPE_FLOAT32:
       if(deformationField->nz>1)
-         reg_defField_getJacobianMap3D<float>(deformationField,jacobianImage,NULL);
-      else reg_defField_getJacobianMap2D<float>(deformationField,jacobianImage,NULL);
+         reg_defField_getJacobianMap3D<float>(deformationField,jacobianImage,nullptr);
+      else reg_defField_getJacobianMap2D<float>(deformationField,jacobianImage,nullptr);
       break;
    case NIFTI_TYPE_FLOAT64:
       if(deformationField->nz>1)
-         reg_defField_getJacobianMap3D<double>(deformationField,jacobianImage,NULL);
-      else reg_defField_getJacobianMap2D<double>(deformationField,jacobianImage,NULL);
+         reg_defField_getJacobianMap3D<double>(deformationField,jacobianImage,nullptr);
+      else reg_defField_getJacobianMap2D<double>(deformationField,jacobianImage,nullptr);
       break;
    default:
       reg_print_fct_error("reg_defField_getJacobianMap");
@@ -2966,13 +2966,13 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField,
    {
    case NIFTI_TYPE_FLOAT32:
       if(deformationField->nz>1)
-         reg_defField_getJacobianMap3D<float>(deformationField,NULL,jacobianMatrices);
-      else reg_defField_getJacobianMap2D<float>(deformationField,NULL,jacobianMatrices);
+         reg_defField_getJacobianMap3D<float>(deformationField,nullptr,jacobianMatrices);
+      else reg_defField_getJacobianMap2D<float>(deformationField,nullptr,jacobianMatrices);
       break;
    case NIFTI_TYPE_FLOAT64:
       if(deformationField->nz>1)
-         reg_defField_getJacobianMap3D<double>(deformationField,NULL,jacobianMatrices);
-      else reg_defField_getJacobianMap2D<double>(deformationField,NULL,jacobianMatrices);
+         reg_defField_getJacobianMap3D<double>(deformationField,nullptr,jacobianMatrices);
+      else reg_defField_getJacobianMap2D<double>(deformationField,nullptr,jacobianMatrices);
       break;
    default:
       reg_print_fct_error("reg_defField_getJacobianMatrix");
@@ -2995,7 +2995,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    // Remove the affine component from the flow field
    if(flowFieldImage->num_ext>0)
    {
-      if(flowFieldImage->ext_list[0].edata!=NULL)
+      if(flowFieldImage->ext_list[0].edata!=nullptr)
       {
          // Create a field that contains the affine component only
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
@@ -3031,7 +3031,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    reg_mat33_eye(&affineMatrix);
    if(flowFieldImage->num_ext>0)
    {
-      if(flowFieldImage->ext_list[0].edata!=NULL)
+      if(flowFieldImage->ext_list[0].edata!=nullptr)
       {
          affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata));
       }
@@ -3057,7 +3057,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
       // The deformation field is applied to itself
       reg_defField_compose(defFieldImage,
                            flowFieldImage,
-                           NULL);
+                           nullptr);
       // The computed scaled deformation field is copied over
       memcpy(defFieldImage->data, flowFieldImage->data,
              defFieldImage->nvox*defFieldImage->nbyper);
@@ -3072,7 +3072,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    // The second half of the affine is added if required
    if(flowFieldImage->num_ext>1)
    {
-      if(flowFieldImage->ext_list[1].edata!=NULL)
+      if(flowFieldImage->ext_list[1].edata!=nullptr)
       {
          affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[1].edata));
       }
diff --git a/reg-lib/cpu/_reg_localTrans_jac.h b/reg-lib/cpu/_reg_localTrans_jac.h
index 409fda94..0db8d485 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.h
+++ b/reg-lib/cpu/_reg_localTrans_jac.h
@@ -11,8 +11,7 @@
  *
  */
 
-#ifndef _REG_TRANS_JAC_H
-#define _REG_TRANS_JAC_H
+#pragma once
 
 #include "_reg_localTrans.h"
 
@@ -154,6 +153,3 @@ extern "C++"
 int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image *jacobianDetImage,
                                               nifti_image *velocityGridImage);
 /* *************************************************************** */
-
-
-#endif
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index cf834058..f4b41325 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -1947,7 +1947,7 @@ double reg_spline_getLandmarkDistance_core(nifti_image *controlPointImage,
       gridRealToVox = &(controlPointImage->sto_ijk);
    DTYPE *gridPtrX = static_cast<DTYPE *>(controlPointImage->data);
    DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
-   DTYPE *gridPtrZ=NULL;
+   DTYPE *gridPtrZ=nullptr;
    if(imageDim>2)
       gridPtrZ = &gridPtrY[controlPointNumber];
 
@@ -2080,8 +2080,8 @@ void reg_spline_getLandmarkDistanceGradient_core(nifti_image *controlPointImage,
    DTYPE *gradPtrX = static_cast<DTYPE *>(gradientImage->data);
    DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
    DTYPE *gradPtrY = &gradPtrX[controlPointNumber];
-   DTYPE *gridPtrZ=NULL;
-   DTYPE *gradPtrZ=NULL;
+   DTYPE *gridPtrZ=nullptr;
+   DTYPE *gradPtrZ=nullptr;
    if(imageDim>2){
       gridPtrZ = &gridPtrY[controlPointNumber];
       gradPtrZ = &gradPtrY[controlPointNumber];
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 26e0e8f9..27a49dec 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -11,8 +11,7 @@
  *
  */
 
-#ifndef _REG_TRANS_REG_H
-#define _REG_TRANS_REG_H
+#pragma once
 
 #include "_reg_splineBasis.h"
 
@@ -157,4 +156,3 @@ void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage,
 extern "C++"
 double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage);
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cpu/_reg_macros.h b/reg-lib/cpu/_reg_macros.h
index ee5eed00..d2879898 100644
--- a/reg-lib/cpu/_reg_macros.h
+++ b/reg-lib/cpu/_reg_macros.h
@@ -1,30 +1,29 @@
 /*
  * Reg Macros - Helper macros based on vtkSetGet.h that makes
  * it easy to creat functions for simple Get and Set functions
- * of class memebers
+ * of class members
  */
 
-#ifndef _REG_MACROS_H
-#define _REG_MACROS_H
+#pragma once
 
 //
 // Set built-in type.  Creates member Set"name"() (e.g., SetVisibility());
 //
-#define SetMacro(name,type) \
+#define SetMacro(name,var,type) \
 virtual void Set##name (type _arg) \
   { \
-  if (this->name != _arg) \
+  if (this->var != _arg) \
     { \
-    this->name = _arg; \
+    this->var = _arg; \
     } \
   }
 
 //
 // Get built-in type.  Creates member Get"name"() (e.g., GetVisibility());
 //
-#define GetMacro(name,type) \
+#define GetMacro(name,var,type) \
 virtual type Get##name () { \
-  return this->name; \
+  return this->var; \
   }
 
 //
@@ -35,14 +34,14 @@ virtual type Get##name () { \
   virtual void name##On () { this->Set##name(static_cast<type>(1));}   \
   virtual void name##Off () { this->Set##name(static_cast<type>(0));}
 
-#define SetVector3Macro(name,type) \
+#define SetVector3Macro(name,var,type) \
 virtual void Set##name (type _arg1, type _arg2, type _arg3) \
   { \
-  if ((this->name[0] != _arg1)||(this->name[1] != _arg2)||(this->name[2] != _arg3)) \
+  if ((this->var[0] != _arg1)||(this->var[1] != _arg2)||(this->var[2] != _arg3)) \
     { \
-    this->name[0] = _arg1; \
-    this->name[1] = _arg2; \
-    this->name[2] = _arg3; \
+    this->var[0] = _arg1; \
+    this->var[1] = _arg2; \
+    this->var[2] = _arg3; \
     } \
   }; \
 virtual void Set##name (type _arg[3]) \
@@ -50,28 +49,28 @@ virtual void Set##name (type _arg[3]) \
   this->Set##name (_arg[0], _arg[1], _arg[2]);\
   }
 
-#define GetVector3Macro(name,type) \
+#define GetVector3Macro(name,var,type) \
 virtual type *Get##name () \
 { \
-  return this->name; \
+  return this->var; \
 } \
 virtual void Get##name (type &_arg1, type &_arg2, type &_arg3) \
   { \
-    _arg1 = this->name[0]; \
-    _arg2 = this->name[1]; \
-    _arg3 = this->name[2]; \
+    _arg1 = this->var[0]; \
+    _arg2 = this->var[1]; \
+    _arg3 = this->var[2]; \
   }; \
 virtual void Get##name (type _arg[3]) \
   { \
   this->Get##name (_arg[0], _arg[1], _arg[2]);\
   }
 
-#define SetClampMacro(name,type,min,max) \
+#define SetClampMacro(name,var,type,min,max) \
 virtual void Set##name (type _arg) \
   { \
-  if (this->name != (_arg<min?min:(_arg>max?max:_arg))) \
+  if (this->var != (_arg<min?min:(_arg>max?max:_arg))) \
     { \
-    this->name = (_arg<min?min:(_arg>max?max:_arg)); \
+    this->var = (_arg<min?min:(_arg>max?max:_arg)); \
     } \
   } \
 virtual type Get##name##MinValue () \
@@ -83,23 +82,23 @@ virtual type Get##name##MaxValue () \
   return max; \
   }
 
-#define SetStringMacro(name) \
+#define SetStringMacro(name,var) \
 virtual void Set##name (const char* _arg) \
   { \
-  if ( this->name == NULL && _arg == NULL) { return;} \
-  if ( this->name && _arg && (!strcmp(this->name,_arg))) { return;} \
-  if (this->name) { delete [] this->name; } \
+  if ( this->var == nullptr && _arg == nullptr) { return;} \
+  if ( this->var && _arg && (!strcmp(this->var,_arg))) { return;} \
+  if (this->var) { delete [] this->var; } \
   if (_arg) \
     { \
     size_t n = strlen(_arg) + 1; \
     char *cp1 =  new char[n]; \
     const char *cp2 = (_arg); \
-    this->name = cp1; \
+    this->var = cp1; \
     do { *cp1++ = *cp2++; } while ( --n ); \
     } \
    else \
     { \
-    this->name = NULL; \
+    this->var = nullptr; \
     } \
   }
 
@@ -107,10 +106,7 @@ virtual void Set##name (const char* _arg) \
 // Get character string.  Creates member Get"name"()
 // (e.g., char *GetFilename());
 //
-#define GetStringMacro(name) \
+#define GetStringMacro(name,var) \
 virtual char* Get##name () { \
-  return this->name; \
+  return this->var; \
   }
-
-
-#endif // _REG_MACROS_H
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index b21175c9..b587175e 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -1,6 +1,3 @@
-#ifndef _REG_MATHS_CPP
-#define _REG_MATHS_CPP
-
 #include "_reg_maths.h"
 //STD
 #include <map>
@@ -84,7 +81,7 @@ void reg_matrixInvertMultiply(T *mat,
                               T *vec)
 {
     // Perform the LU decomposition if necessary
-    if (index == NULL)
+    if (index == nullptr)
         reg_LUdecomposition(mat, dim, index);
 
     int ii = 0;
@@ -132,7 +129,7 @@ void reg_matrixMultiply(T *mat1,
     }
     size_t resDim[2] = {dim1[0], dim2[1]};
     // Allocate the result matrix
-    if (res != NULL)
+    if (res != nullptr)
         free(res);
     res = (T *)calloc(resDim[0] * resDim[1], sizeof(T));
     // Multiply both matrices
@@ -990,4 +987,3 @@ T pythag(T a, T b)
     else
         return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + SQR(absa / absb))));
 }
-#endif // _REG_MATHS_CPP
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 450ae1c1..e6feead6 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -11,8 +11,8 @@
  *  See the LICENSE.txt file in the nifty_reg root folder
  *
  */
-#ifndef _REG_MATHS_H
-#define _REG_MATHS_H
+
+#pragma once
 
 #include <limits>
 #include <stdio.h>
@@ -286,4 +286,3 @@ double get_square_distance3D(float * first_point3D, float * second_point3D);
 /* *************************************************************** */
 double get_square_distance2D(float * first_point2D, float * second_point2D);
 /* *************************************************************** */
-#endif // _REG_MATHS_H
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index b9dc020d..07965a5e 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -81,7 +81,7 @@ template void svd<double>(double **in, size_t m, size_t n, double * w, double **
 */
 template<class T>
 void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
-   if (in == NULL) {
+   if (in == nullptr) {
       reg_print_fct_error("svd");
       reg_print_msg_error("The specified matrix is empty");
       reg_exit();
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
index 5ac56dd9..6288764c 100644
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ b/reg-lib/cpu/_reg_maths_eigen.h
@@ -1,6 +1,4 @@
-//_reg_maths_eigen.h
-#ifndef _REG_MATHS_EIGEN_H
-#define _REG_MATHS_EIGEN_H
+#pragma once
 
 #include "nifti1_io.h"
 
@@ -47,5 +45,3 @@ mat44 reg_mat44_logm(const mat44 *mat);
 * framework
 */
 mat44 reg_mat44_avg2(mat44 const* A, mat44 const* b);
-
-#endif
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 226a09cc..2c036243 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -5,8 +5,7 @@
  * Also contains an interface class between reg_base and the measure class
  */
 
-#ifndef _REG_MEASURE_H
-#define _REG_MEASURE_H
+#pragma once
 
 #include "_reg_tools.h"
 #include <time.h>
@@ -23,11 +22,11 @@ class reg_measure
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *localWeightSimPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL)
+                          nifti_image *localWeightSimPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr)
    {
       this->isSymmetric=false;
       this->referenceImagePointer=refImgPtr;
@@ -38,7 +37,7 @@ class reg_measure
       this->warpedFloatingGradientImagePointer=warFloGraPtr;
       this->forwardVoxelBasedGradientImagePointer=forVoxBasedGraPtr;
       this->forwardLocalWeightSimImagePointer=localWeightSimPtr;
-      if(maskFloPtr != NULL && warRefImgPtr!=NULL && warRefGraPtr!=NULL && bckVoxBasedGraPtr!=NULL) {
+      if(maskFloPtr != nullptr && warRefImgPtr!=nullptr && warRefGraPtr!=nullptr && bckVoxBasedGraPtr!=nullptr) {
          this->isSymmetric=true;
          this->floatingMaskPointer=maskFloPtr;
          this->warpedReferenceImagePointer=warRefImgPtr;
@@ -46,10 +45,10 @@ class reg_measure
          this->backwardVoxelBasedGradientImagePointer=bckVoxBasedGraPtr;
       }
       else {
-          this->floatingMaskPointer=NULL;
-          this->warpedReferenceImagePointer=NULL;
-          this->warpedReferenceGradientImagePointer=NULL;
-          this->backwardVoxelBasedGradientImagePointer=NULL;
+          this->floatingMaskPointer=nullptr;
+          this->warpedReferenceImagePointer=nullptr;
+          this->warpedReferenceGradientImagePointer=nullptr;
+          this->backwardVoxelBasedGradientImagePointer=nullptr;
       }
 #ifndef NDEBUG
       printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
@@ -115,4 +114,3 @@ class reg_measure
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-#endif // _REG_MEASURE_H
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 8157a9c5..0601cdea 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -169,7 +169,7 @@ void GetMINDImageDesciptor_core(nifti_image* inputImage,
    nifti_image_free(diff_image);
    nifti_image_free(shiftedImage);
    nifti_image_free(meanImage);
-   currentInputImage->data=NULL;
+   currentInputImage->data=nullptr;
    nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
@@ -331,7 +331,7 @@ void GetMINDSSCImageDesciptor_core(nifti_image* inputImage,
    nifti_image_free(diff_image);
    nifti_image_free(shiftedImage);
    nifti_image_free(mean_img);
-   currentInputImage->data=NULL;
+   currentInputImage->data=nullptr;
    nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
@@ -369,10 +369,10 @@ reg_mind::reg_mind()
    : reg_ssd()
 {
    memset(this->timePointWeightDescriptor,0,255*sizeof(double) );
-   this->referenceImageDescriptor=NULL;
-   this->floatingImageDescriptor=NULL;
-   this->warpedFloatingImageDescriptor=NULL;
-   this->warpedReferenceImageDescriptor=NULL;
+   this->referenceImageDescriptor=nullptr;
+   this->floatingImageDescriptor=nullptr;
+   this->warpedFloatingImageDescriptor=nullptr;
+   this->warpedReferenceImageDescriptor=nullptr;
    this->mind_type=MIND_TYPE;
    this->descriptorOffset=1;
 #ifndef NDEBUG
@@ -391,21 +391,21 @@ int reg_mind::GetDescriptorOffset()
 }
 /* *************************************************************** */
 reg_mind::~reg_mind() {
-   if(this->referenceImageDescriptor != NULL)
+   if(this->referenceImageDescriptor != nullptr)
       nifti_image_free(this->referenceImageDescriptor);
-   this->referenceImageDescriptor = NULL;
+   this->referenceImageDescriptor = nullptr;
 
-   if(this->warpedFloatingImageDescriptor != NULL)
+   if(this->warpedFloatingImageDescriptor != nullptr)
       nifti_image_free(this->warpedFloatingImageDescriptor);
-   this->warpedFloatingImageDescriptor = NULL;
+   this->warpedFloatingImageDescriptor = nullptr;
 
-   if(this->floatingImageDescriptor != NULL)
+   if(this->floatingImageDescriptor != nullptr)
       nifti_image_free(this->floatingImageDescriptor);
-   this->floatingImageDescriptor = NULL;
+   this->floatingImageDescriptor = nullptr;
 
-   if(this->warpedReferenceImageDescriptor != NULL)
+   if(this->warpedReferenceImageDescriptor != nullptr)
       nifti_image_free(this->warpedReferenceImageDescriptor);
-   this->warpedReferenceImageDescriptor = NULL;
+   this->warpedReferenceImageDescriptor = nullptr;
 }
 /* *************************************************************** */
 void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
@@ -548,10 +548,10 @@ double reg_mind::GetSimilarityMeasureValue()
                   (this->referenceImageDescriptor,
                    this->warpedFloatingImageDescriptor,
                    this->timePointWeightDescriptor,
-                   NULL, // HERE TODO this->forwardJacDetImagePointer,
+                   nullptr, // HERE TODO this->forwardJacDetImagePointer,
                    combinedMask,
                    this->currentValue,
-                   NULL
+                   nullptr
                    );
             break;
          case NIFTI_TYPE_FLOAT64:
@@ -559,10 +559,10 @@ double reg_mind::GetSimilarityMeasureValue()
                   (this->referenceImageDescriptor,
                    this->warpedFloatingImageDescriptor,
                    this->timePointWeightDescriptor,
-                   NULL, // HERE TODO this->forwardJacDetImagePointer,
+                   nullptr, // HERE TODO this->forwardJacDetImagePointer,
                    combinedMask,
                    this->currentValue,
-                   NULL
+                   nullptr
                    );
             break;
          default:
@@ -614,10 +614,10 @@ double reg_mind::GetSimilarityMeasureValue()
                      (this->floatingImageDescriptor,
                       this->warpedReferenceImageDescriptor,
                       this->timePointWeightDescriptor,
-                      NULL, // HERE TODO this->backwardJacDetImagePointer,
+                      nullptr, // HERE TODO this->backwardJacDetImagePointer,
                       combinedMask,
                       this->currentValue,
-                      NULL
+                      nullptr
                       );
                break;
             case NIFTI_TYPE_FLOAT64:
@@ -625,10 +625,10 @@ double reg_mind::GetSimilarityMeasureValue()
                      (this->floatingImageDescriptor,
                       this->warpedReferenceImageDescriptor,
                       this->timePointWeightDescriptor,
-                      NULL, // HERE TODO this->backwardJacDetImagePointer,
+                      nullptr, // HERE TODO this->backwardJacDetImagePointer,
                       combinedMask,
                       this->currentValue,
-                      NULL
+                      nullptr
                       );
                break;
             default:
@@ -706,11 +706,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedFloatingImageDescriptor,
                 this->warpedFloatingGradientImagePointer,
                 this->forwardVoxelBasedGradientImagePointer,
-                NULL, // no Jacobian required here,
+                nullptr, // no Jacobian required here,
                 combinedMask,
                 desc_index,
                 1.0, //all discriptors given weight of 1
-                NULL
+                nullptr
                 );
          break;
       case NIFTI_TYPE_FLOAT64:
@@ -719,11 +719,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedFloatingImageDescriptor,
                 this->warpedFloatingGradientImagePointer,
                 this->forwardVoxelBasedGradientImagePointer,
-                NULL, // no Jacobian required here,
+                nullptr, // no Jacobian required here,
                 combinedMask,
                 desc_index,
                 1.0, //all discriptors given weight of 1
-                NULL
+                nullptr
                 );
          break;
       default:
@@ -785,11 +785,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                    this->warpedReferenceImageDescriptor,
                    this->warpedReferenceGradientImagePointer,
                    this->backwardVoxelBasedGradientImagePointer,
-                   NULL, // no Jacobian required here,
+                   nullptr, // no Jacobian required here,
                    combinedMask,
                    desc_index,
                    1.0, //all discriptors given weight of 1
-                   NULL
+                   nullptr
                    );
             break;
          case NIFTI_TYPE_FLOAT64:
@@ -798,11 +798,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                    this->warpedReferenceImageDescriptor,
                    this->warpedReferenceGradientImagePointer,
                    this->backwardVoxelBasedGradientImagePointer,
-                   NULL, // no Jacobian required here,
+                   nullptr, // no Jacobian required here,
                    combinedMask,
                    desc_index,
                    1.0, //all discriptors given weight of 1
-                   NULL
+                   nullptr
                    );
             break;
          default:
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 90a8e9f5..04404904 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -9,8 +9,8 @@
  *  See the LICENSE.txt file in the nifty_reg root folder
  *
  */
-#ifndef _REG_MIND_H
-#define _REG_MIND_H
+
+#pragma once
 
 #include "_reg_ssd.h"
 //#include "ConvolutionKernel.h"
@@ -38,11 +38,11 @@ class reg_mind : public reg_ssd
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          nifti_image *forwardLocalWeightPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
    /// @brief Returns the mind based measure of similarity value
    virtual double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based gradient
@@ -88,4 +88,3 @@ void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr,
                               int *mask,
                               int descriptorOffset,
                               int current_timepoint);
-#endif
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index c3c8c3d7..349eee33 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -12,9 +12,9 @@ reg_mrf::reg_mrf(int _discrete_radius,
                  int _img_dim,
                  size_t _node_number)
 {
-    this->measure = NULL;
-    this->referenceImage = NULL;
-    this->controlPointImage = NULL;
+    this->measure = nullptr;
+    this->referenceImage = nullptr;
+    this->controlPointImage = nullptr;
     this->discrete_radius = _discrete_radius;
     this->discrete_increment = _discrete_increment;
     this->regularisation_weight = _reg_weight;
@@ -124,42 +124,42 @@ reg_mrf::reg_mrf(reg_measure *_measure,
 /*****************************************************/
 reg_mrf::~reg_mrf()
 {
-   if(this->discretised_measures!=NULL)
+   if(this->discretised_measures!=nullptr)
       free(this->discretised_measures);
-   this->discretised_measures=NULL;
+   this->discretised_measures=nullptr;
 
-   if(this->orderedList!=NULL)
+   if(this->orderedList!=nullptr)
       free(this->orderedList);
-   this->orderedList=NULL;
+   this->orderedList=nullptr;
 
-   if(this->parentsList!=NULL)
+   if(this->parentsList!=nullptr)
       free(this->parentsList);
-   this->parentsList=NULL;
+   this->parentsList=nullptr;
 
-   if(this->edgeWeight!=NULL)
+   if(this->edgeWeight!=nullptr)
       free(this->edgeWeight);
-   this->edgeWeight=NULL;
+   this->edgeWeight=nullptr;
 
-   if(this->regularised_cost!=NULL)
+   if(this->regularised_cost!=nullptr)
       free(this->regularised_cost);
-   this->regularised_cost=NULL;
+   this->regularised_cost=nullptr;
 
-   if(this->optimal_label_index!=NULL)
+   if(this->optimal_label_index!=nullptr)
       free(this->optimal_label_index);
-   this->optimal_label_index=NULL;
+   this->optimal_label_index=nullptr;
 
    for(int i=0; i<this->image_dim; ++i){
-      if(this->discrete_values_mm[i]!=NULL)
+      if(this->discrete_values_mm[i]!=nullptr)
          free(this->discrete_values_mm[i]);
-      this->discrete_values_mm[i]=NULL;
+      this->discrete_values_mm[i]=nullptr;
    }
-   if(this->discrete_values_mm!=NULL)
+   if(this->discrete_values_mm!=nullptr)
       free(this->discrete_values_mm);
-   this->discrete_values_mm=NULL;
+   this->discrete_values_mm=nullptr;
 
-   if(this->input_transformation!=NULL)
+   if(this->input_transformation!=nullptr)
       nifti_image_free(this->input_transformation);
-   this->input_transformation=NULL;
+   this->input_transformation=nullptr;
 }
 /*****************************************************/
 void reg_mrf::Initialise()
@@ -282,7 +282,7 @@ for(int i=0;i<32388174;i++){
 #endif
 }
 /*****************************************************/
-void reg_mrf::getOptimalLabel()
+void reg_mrf::GetOptimalLabel()
 {
    for(size_t node=0; node<this->node_number; ++node) {
       this->optimal_label_index[node]=
@@ -335,7 +335,7 @@ void reg_mrf::Run()
        this->GetRegularisation();
        // Extract the best label
        //memcpy(this->regularised_cost, this->discretised_measures, this->node_number*this->label_nD_num*sizeof(float));
-       this->getOptimalLabel();
+       this->GetOptimalLabel();
        // Update the control point positions
        this->UpdateNodePositions();
    //}
diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h
index 6a59ac2c..4391b1de 100644
--- a/reg-lib/cpu/_reg_mrf.h
+++ b/reg-lib/cpu/_reg_mrf.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_MRF_H
-#define _REG_MRF_H
+#pragma once
 
 #include "_reg_measure.h"
 #include "_reg_localTrans_regul.h"
@@ -58,7 +57,7 @@ class reg_mrf
    //
    void GetRegularisation();
    //
-   void getOptimalLabel();
+   void GetOptimalLabel();
    int* GetOptimalLabelPtr();
    //
    int* GetOrderedListPtr();
@@ -123,4 +122,3 @@ void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float*
 extern "C++"
 void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz);
 /********************************************************************************************************/
-#endif // _REG_MRF_H
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 7a03d55d..6af365da 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_NMI_CPP
-#define _REG_NMI_CPP
-
 #include "_reg_nmi.h"
 
 /* *************************************************************** */
@@ -20,12 +17,12 @@
 reg_nmi::reg_nmi()
    : reg_measure()
 {
-   this->forwardJointHistogramPro=NULL;
-   this->forwardJointHistogramLog=NULL;
-   this->forwardEntropyValues=NULL;
-   this->backwardJointHistogramPro=NULL;
-   this->backwardJointHistogramLog=NULL;
-   this->backwardEntropyValues=NULL;
+   this->forwardJointHistogramPro=nullptr;
+   this->forwardJointHistogramLog=nullptr;
+   this->forwardEntropyValues=nullptr;
+   this->backwardJointHistogramPro=nullptr;
+   this->backwardJointHistogramLog=nullptr;
+   this->backwardEntropyValues=nullptr;
 
    for(int i=0; i<255; ++i)
    {
@@ -50,74 +47,74 @@ void reg_nmi::ClearHistogram()
 {
    int timepoint=this->referenceTimePoint;
    // Free the joint histograms and the entropy arrays
-   if(this->forwardJointHistogramPro!=NULL)
+   if(this->forwardJointHistogramPro!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->forwardJointHistogramPro[i]!=NULL)
+         if(this->forwardJointHistogramPro[i]!=nullptr)
             free(this->forwardJointHistogramPro[i]);
-         this->forwardJointHistogramPro[i]=NULL;
+         this->forwardJointHistogramPro[i]=nullptr;
       }
       free(this->forwardJointHistogramPro);
    }
-   this->forwardJointHistogramPro=NULL;
-   if(this->backwardJointHistogramPro!=NULL)
+   this->forwardJointHistogramPro=nullptr;
+   if(this->backwardJointHistogramPro!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->backwardJointHistogramPro[i]!=NULL)
+         if(this->backwardJointHistogramPro[i]!=nullptr)
             free(this->backwardJointHistogramPro[i]);
-         this->backwardJointHistogramPro[i]=NULL;
+         this->backwardJointHistogramPro[i]=nullptr;
       }
       free(this->backwardJointHistogramPro);
    }
-   this->backwardJointHistogramPro=NULL;
+   this->backwardJointHistogramPro=nullptr;
 
-   if(this->forwardJointHistogramLog!=NULL)
+   if(this->forwardJointHistogramLog!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->forwardJointHistogramLog[i]!=NULL)
+         if(this->forwardJointHistogramLog[i]!=nullptr)
             free(this->forwardJointHistogramLog[i]);
-         this->forwardJointHistogramLog[i]=NULL;
+         this->forwardJointHistogramLog[i]=nullptr;
       }
       free(this->forwardJointHistogramLog);
    }
-   this->forwardJointHistogramLog=NULL;
-   if(this->backwardJointHistogramLog!=NULL)
+   this->forwardJointHistogramLog=nullptr;
+   if(this->backwardJointHistogramLog!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->backwardJointHistogramLog[i]!=NULL)
+         if(this->backwardJointHistogramLog[i]!=nullptr)
             free(this->backwardJointHistogramLog[i]);
-         this->backwardJointHistogramLog[i]=NULL;
+         this->backwardJointHistogramLog[i]=nullptr;
       }
       free(this->backwardJointHistogramLog);
    }
-   this->backwardJointHistogramLog=NULL;
+   this->backwardJointHistogramLog=nullptr;
 
-   if(this->forwardEntropyValues!=NULL)
+   if(this->forwardEntropyValues!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->forwardEntropyValues[i]!=NULL)
+         if(this->forwardEntropyValues[i]!=nullptr)
             free(this->forwardEntropyValues[i]);
-         this->forwardEntropyValues[i]=NULL;
+         this->forwardEntropyValues[i]=nullptr;
       }
       free(this->forwardEntropyValues);
    }
-   this->forwardEntropyValues=NULL;
-   if(this->backwardEntropyValues!=NULL)
+   this->forwardEntropyValues=nullptr;
+   if(this->backwardEntropyValues!=nullptr)
    {
       for(int i=0; i<timepoint; ++i)
       {
-         if(this->backwardEntropyValues[i]!=NULL)
+         if(this->backwardEntropyValues[i]!=nullptr)
             free(this->backwardEntropyValues[i]);
-         this->backwardEntropyValues[i]=NULL;
+         this->backwardEntropyValues[i]=nullptr;
       }
       free(this->backwardEntropyValues);
    }
-   this->backwardEntropyValues=NULL;
+   this->backwardEntropyValues=nullptr;
 #ifndef NDEBUG
    reg_print_msg_debug("reg_nmi::ClearHistogram called");
 #endif
@@ -203,14 +200,14 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
       }
       else
       {
-         this->forwardJointHistogramLog[i]=NULL;
-         this->forwardJointHistogramPro[i]=NULL;
-         this->forwardEntropyValues[i]=NULL;
+         this->forwardJointHistogramLog[i]=nullptr;
+         this->forwardJointHistogramPro[i]=nullptr;
+         this->forwardEntropyValues[i]=nullptr;
          if(this->isSymmetric)
          {
-            this->backwardJointHistogramLog[i]=NULL;
-            this->backwardJointHistogramPro[i]=NULL;
-            this->backwardEntropyValues[i]=NULL;
+            this->backwardJointHistogramLog[i]=nullptr;
+            this->backwardJointHistogramPro[i]=nullptr;
+            this->backwardEntropyValues[i]=nullptr;
          }
       }
    }
@@ -977,5 +974,3 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif // _REG_NMI
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 5fdf67f5..413ff46b 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_MUTUALINFORMATION_H
-#define _REG_MUTUALINFORMATION_H
+#pragma once
 
 #include "_reg_measure.h"
 #include <vector>
@@ -33,11 +32,11 @@ class reg_nmi : public reg_measure
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = NULL,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          nifti_image *forwardLocalWeightPtr = nullptr,
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
    /// @brief Returns the nmi value
    double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based nmi gradient
@@ -330,5 +329,3 @@ void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages,
       bool approx);
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index a189a525..7a91c114 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -3,9 +3,6 @@
  * @date 20/07/2012
  */
 
-#ifndef _REG_OPTIMISER_CPP
-#define _REG_OPTIMISER_CPP
-
 #include "_reg_optimiser.h"
 
 /* *************************************************************** */
@@ -19,18 +16,18 @@ reg_optimiser<T>::reg_optimiser()
    this->optimiseX=true;
    this->optimiseY=true;
    this->optimiseZ=true;
-   this->currentDOF=NULL;
-   this->currentDOF_b=NULL;
-   this->bestDOF=NULL;
-   this->bestDOF_b=NULL;
+   this->currentDOF=nullptr;
+   this->currentDOF_b=nullptr;
+   this->bestDOF=nullptr;
+   this->bestDOF_b=nullptr;
    this->backward=false;
-   this->gradient=NULL;
+   this->gradient=nullptr;
    this->currentIterationNumber=0;
    this->currentObjFunctionValue=0.0;
    this->maxIterationNumber=0.0;
    this->bestObjFunctionValue=0.0;
-   this->objFunc=NULL;
-   this->gradient_b=NULL;
+   this->objFunc=nullptr;
+   this->gradient_b=nullptr;
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_optimiser<T>::reg_optimiser() called");
@@ -41,12 +38,12 @@ reg_optimiser<T>::reg_optimiser()
 template <class T>
 reg_optimiser<T>::~reg_optimiser()
 {
-   if(this->bestDOF!=NULL)
+   if(this->bestDOF!=nullptr)
       free(this->bestDOF);
-   this->bestDOF=NULL;
-   if(this->bestDOF_b!=NULL)
+   this->bestDOF=nullptr;
+   if(this->bestDOF_b!=nullptr)
       free(this->bestDOF_b);
-   this->bestDOF_b=NULL;
+   this->bestDOF_b=nullptr;
 #ifndef NDEBUG
    reg_print_msg_debug("reg_optimiser<T>::~reg_optimiser() called");
 #endif
@@ -77,23 +74,23 @@ void reg_optimiser<T>::Initialise(size_t nvox,
    this->maxIterationNumber=maxit;
    this->currentIterationNumber=start;
    this->currentDOF=cppData;
-   if(this->bestDOF!=NULL) free(this->bestDOF);
+   if(this->bestDOF!=nullptr) free(this->bestDOF);
    this->bestDOF=(T *)malloc(this->dofNumber*sizeof(T));
    memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T));
-   if( gradData!=NULL)
+   if( gradData!=nullptr)
       this->gradient=gradData;
 
    if(nvox_b>0)
       this->dofNumber_b=nvox_b;
-   if(cppData_b!=NULL)
+   if(cppData_b!=nullptr)
    {
       this->currentDOF_b=cppData_b;
       this->backward=true;
-      if(this->bestDOF_b!=NULL) free(this->bestDOF_b);
+      if(this->bestDOF_b!=nullptr) free(this->bestDOF_b);
       this->bestDOF_b=(T *)malloc(this->dofNumber_b*sizeof(T));
       memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T));
    }
-   if(gradData_b!=NULL)
+   if(gradData_b!=nullptr)
       this->gradient_b=gradData_b;
 
    this->objFunc=obj;
@@ -112,7 +109,7 @@ void reg_optimiser<T>::RestoreBestDOF()
    // restore forward transformation
    memcpy(this->currentDOF,this->bestDOF,this->dofNumber*sizeof(T));
    // restore backward transformation if required
-   if(this->currentDOF_b!=NULL && this->bestDOF_b!=NULL && this->dofNumber_b>0)
+   if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0)
       memcpy(this->currentDOF_b,this->bestDOF_b,this->dofNumber_b*sizeof(T));
 }
 /* *************************************************************** */
@@ -123,7 +120,7 @@ void reg_optimiser<T>::StoreCurrentDOF()
    // save forward transformation
    memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T));
    // save backward transformation if required
-   if(this->currentDOF_b!=NULL && this->bestDOF_b!=NULL && this->dofNumber_b>0)
+   if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0)
       memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T));
 }
 /* *************************************************************** */
@@ -132,7 +129,7 @@ template <class T>
 void reg_optimiser<T>::Perturbation(float length)
 {
    // initialise the randomiser
-   srand(time(NULL));
+   srand(time(nullptr));
    // Reset the number of iteration
    this->currentIterationNumber=0;
    // Create some perturbation for degree of freedom
@@ -232,10 +229,10 @@ template <class T>
 reg_conjugateGradient<T>::reg_conjugateGradient()
    :reg_optimiser<T>::reg_optimiser()
 {
-   this->array1=NULL;
-   this->array2=NULL;
-   this->array1_b=NULL;
-   this->array2_b=NULL;
+   this->array1=nullptr;
+   this->array2=nullptr;
+   this->array1_b=nullptr;
+   this->array2_b=nullptr;
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
@@ -246,21 +243,21 @@ reg_conjugateGradient<T>::reg_conjugateGradient()
 template <class T>
 reg_conjugateGradient<T>::~reg_conjugateGradient()
 {
-   if(this->array1!=NULL)
+   if(this->array1!=nullptr)
       free(this->array1);
-   this->array1=NULL;
+   this->array1=nullptr;
 
-   if(this->array2!=NULL)
+   if(this->array2!=nullptr)
       free(this->array2);
-   this->array2=NULL;
+   this->array2=nullptr;
 
-   if(this->array1_b!=NULL)
+   if(this->array1_b!=nullptr)
       free(this->array1_b);
-   this->array1_b=NULL;
+   this->array1_b=nullptr;
 
-   if(this->array2_b!=NULL)
+   if(this->array2_b!=nullptr)
       free(this->array2_b);
-   this->array2_b=NULL;
+   this->array2_b=nullptr;
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
@@ -299,15 +296,15 @@ void reg_conjugateGradient<T>::Initialise(size_t nvox,
                                 gradData_b
                                );
    this->firstcall=true;
-   if(this->array1!=NULL) free(this->array1);
-   if(this->array2!=NULL) free(this->array2);
+   if(this->array1!=nullptr) free(this->array1);
+   if(this->array2!=nullptr) free(this->array2);
    this->array1=(T *)malloc(this->dofNumber*sizeof(T));
    this->array2=(T *)malloc(this->dofNumber*sizeof(T));
 
-   if(cppData_b!=NULL && gradData_b!=NULL && nvox_b>0)
+   if(cppData_b!=nullptr && gradData_b!=nullptr && nvox_b>0)
    {
-      if(this->array1_b!=NULL) free(this->array1_b);
-      if(this->array2_b!=NULL) free(this->array2_b);
+      if(this->array1_b!=nullptr) free(this->array1_b);
+      if(this->array2_b!=nullptr) free(this->array2_b);
       this->array1_b=(T *)malloc(this->dofNumber_b*sizeof(T));
       this->array2_b=(T *)malloc(this->dofNumber_b*sizeof(T));
    }
@@ -469,37 +466,37 @@ reg_lbfgs<T>::reg_lbfgs()
    :reg_optimiser<T>::reg_optimiser()
 {
    this->stepToKeep=5;
-   this->oldDOF=NULL;
-   this->oldGrad=NULL;
-   this->diffDOF=NULL;
-   this->diffGrad=NULL;
+   this->oldDOF=nullptr;
+   this->oldGrad=nullptr;
+   this->diffDOF=nullptr;
+   this->diffGrad=nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 reg_lbfgs<T>::~reg_lbfgs()
 {
-   if(this->oldDOF!=NULL)
+   if(this->oldDOF!=nullptr)
       free(this->oldDOF);
-   this->oldDOF=NULL;
-   if(this->oldGrad!=NULL)
+   this->oldDOF=nullptr;
+   if(this->oldGrad!=nullptr)
       free(this->oldGrad);
-   this->oldGrad=NULL;
+   this->oldGrad=nullptr;
    for(size_t i=0; i<this->stepToKeep; ++i)
    {
-      if(this->diffDOF[i]!=NULL)
+      if(this->diffDOF[i]!=nullptr)
          free(this->diffDOF[i]);
-      this->diffDOF[i]=NULL;
-      if(this->diffGrad[i]!=NULL)
+      this->diffDOF[i]=nullptr;
+      if(this->diffGrad[i]!=nullptr)
          free(this->diffGrad[i]);
-      this->diffGrad[i]=NULL;
+      this->diffGrad[i]=nullptr;
    }
-   if(this->diffDOF!=NULL)
+   if(this->diffDOF!=nullptr)
       free(this->diffDOF);
-   this->diffDOF=NULL;
-   if(this->diffGrad!=NULL)
+   this->diffDOF=nullptr;
+   if(this->diffGrad!=nullptr)
       free(this->diffGrad);
-   this->diffGrad=NULL;
+   this->diffGrad=nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -538,7 +535,7 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
    {
       this->diffDOF[i]=(T *)malloc(this->dofNumber*sizeof(T));
       this->diffGrad[i]=(T *)malloc(this->dofNumber*sizeof(T));
-      if(this->diffDOF[i]==NULL || this->diffGrad[i]==NULL)
+      if(this->diffDOF[i]==nullptr || this->diffGrad[i]==nullptr)
       {
          reg_print_fct_error("reg_lbfgs<T>::Initialise");
          reg_print_msg_error("Out of memory");
@@ -547,7 +544,7 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
    }
    this->oldDOF=(T *)malloc(this->dofNumber*sizeof(T));
    this->oldGrad=(T *)malloc(this->dofNumber*sizeof(T));
-   if(this->oldDOF==NULL || this->oldGrad==NULL)
+   if(this->oldDOF==nullptr || this->oldGrad==nullptr)
    {
       reg_print_fct_error("reg_lbfgs<T>::Initialise");
       reg_print_msg_error("Out of memory");
@@ -579,4 +576,3 @@ void reg_lbfgs<T>::Optimise(T maxLength,
 //template class reg_optimiser<float>;
 //template class reg_conjugateGradient<float>;
 //template class reg_lbfgs<float>;
-#endif // _REG_OPTIMISER_CPP
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index 0473f047..806ef167 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -3,8 +3,7 @@
  * @date 20/07/2012
  */
 
-#ifndef _REG_OPTIMISER_H
-#define _REG_OPTIMISER_H
+#pragma once
 
 #include "_reg_maths.h"
 #include <stdlib.h>
@@ -157,10 +156,10 @@ class reg_optimiser
                            size_t start,
                            InterfaceOptimiser *o,
                            T *cppData,
-                           T *gradData=NULL,
+                           T *gradData=nullptr,
                            size_t nvox_b=0,
-                           T *cppData_b=NULL,
-                           T *gradData_b=NULL);
+                           T *cppData_b=nullptr,
+                           T *gradData_b=nullptr);
    virtual void Optimise(T maxLength,
                          T smallLength,
                          T &startLength);
@@ -197,11 +196,11 @@ class reg_conjugateGradient : public reg_optimiser<T>
                            size_t maxit,
                            size_t start,
                            InterfaceOptimiser *o,
-                           T *cppData=NULL,
-                           T *gradData=NULL,
+                           T *cppData=nullptr,
+                           T *gradData=nullptr,
                            size_t nvox_b=0,
-                           T *cppData_b=NULL,
-                           T *gradData_b=NULL);
+                           T *cppData_b=nullptr,
+                           T *gradData_b=nullptr);
    virtual void Optimise(T maxLength,
                          T smallLength,
                          T &startLength);
@@ -236,11 +235,11 @@ class reg_lbfgs : public reg_optimiser<T>
                            size_t maxit,
                            size_t start,
                            InterfaceOptimiser *o,
-                           T *cppData=NULL,
-                           T *gradData=NULL,
+                           T *cppData=nullptr,
+                           T *gradData=nullptr,
                            size_t nvox_b=0,
-                           T *cppData_b=NULL,
-                           T *gradData_b=NULL);
+                           T *cppData_b=nullptr,
+                           T *gradData_b=nullptr);
    virtual void Optimise(T maxLength,
                          T smallLength,
                          T &startLength);
@@ -249,5 +248,3 @@ class reg_lbfgs : public reg_optimiser<T>
 /* *************************************************************** */
 /* *************************************************************** */
 #include "_reg_optimiser.cpp"
-
-#endif // _REG_OPTIMISER_H
diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp
index 1e09accf..95d4a2f8 100644
--- a/reg-lib/cpu/_reg_polyAffine.cpp
+++ b/reg-lib/cpu/_reg_polyAffine.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_POLYAFFINE_CPP
-#define _REG_POLYAFFINE_CPP
-
 #include "_reg_polyAffine.h"
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -137,5 +134,3 @@ void reg_polyAffine<T>::ClearTransformationGradient()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
-#endif // _REG_POLYAFFINE_CPP
diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h
index 11858866..661fa050 100644
--- a/reg-lib/cpu/_reg_polyAffine.h
+++ b/reg-lib/cpu/_reg_polyAffine.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_POLYAFFINE_H
-#define _REG_POLYAFFINE_H
+#pragma once
 
 #include "_reg_base.h"
 
@@ -40,5 +39,3 @@ class reg_polyAffine : public reg_base<T>
 };
 
 #include "_reg_polyAffine.cpp"
-
-#endif // _REG_POLYAFFINE_H
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 70bcee84..6b0e645e 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_RESAMPLING_CPP
-#define _REG_RESAMPLING_CPP
-
 #include "_reg_resampling.h"
 #include "_reg_maths.h"
 #include "_reg_maths_eigen.h"
@@ -216,7 +213,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                                        int *mask,
                                        mat33 *jacMat,
                                        int *dtIndicies,
-                                       nifti_image *warpedImage = NULL)
+                                       nifti_image *warpedImage = nullptr)
 {
     // If we have some valid diffusion tensor indicies, we need to exponentiate the previously logged tensor components
     // we also need to reorient the tensors based on the local transformation Jacobians
@@ -230,7 +227,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
         size_t voxelNumber = (size_t)inputImage->nx*inputImage->ny*inputImage->nz;
 #endif
         DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ;
-        if(warpedImage!=NULL)
+        if(warpedImage!=nullptr)
         {
             warpVox = static_cast<DTYPE *>(warpedImage->data);
             // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
@@ -291,7 +288,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                     inputTensor[tid].m[2][1] = inputTensor[tid].m[1][2];
                     inputTensor[tid].m[2][2] = static_cast<double>(inputIntensityZZ[warpedIndex]);
                     // Exponentiate the warped tensor
-                    if(warpedImage==NULL)
+                    if(warpedImage==nullptr)
                     {
                         reg_mat33_expm(&inputTensor[tid]);
                         testSum=0;
@@ -742,7 +739,7 @@ void reg_resampleImage2(nifti_image *floatingImage,
                         mat33 * jacMat)
 {
     // The floating image data is copied in case one deal with DTI
-    void *originalFloatingData=NULL;
+    void *originalFloatingData=nullptr;
     // The DTI are logged
     reg_dti_resampling_preprocessing<FloatingTYPE>(floatingImage,
                                                    &originalFloatingData,
@@ -768,11 +765,11 @@ void reg_resampleImage2(nifti_image *floatingImage,
                                                 interp);
     }
     // The temporary logged floating array is deleted and the original restored
-    if(originalFloatingData!=NULL)
+    if(originalFloatingData!=nullptr)
     {
         free(floatingImage->data);
         floatingImage->data=originalFloatingData;
-        originalFloatingData=NULL;
+        originalFloatingData=nullptr;
     }
 
     // The interpolated tensors are reoriented and exponentiated
@@ -808,9 +805,9 @@ void reg_resampleImage(nifti_image *floatingImage,
     // Define the DTI indices if required
     int dtIndicies[6];
     for(int i=0; i<6; ++i) dtIndicies[i]=-1;
-    if(dti_timepoint!=NULL)
+    if(dti_timepoint!=nullptr)
     {
-        if(jacMat==NULL)
+        if(jacMat==nullptr)
         {
             reg_print_fct_error("reg_resampleImage");
             reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
@@ -832,7 +829,7 @@ void reg_resampleImage(nifti_image *floatingImage,
 
     // a mask array is created if no mask is specified
     bool MrPropreRules = false;
-    if(mask==NULL)
+    if(mask==nullptr)
     {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
         mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int));
@@ -1024,7 +1021,7 @@ void reg_resampleImage(nifti_image *floatingImage,
     if(MrPropreRules==true)
     {
         free(mask);
-        mask=NULL;
+        mask=nullptr;
     }
 }
 /* *************************************************************** */
@@ -1842,7 +1839,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
 
     // a mask array is created if no mask is specified
     bool MrPropreRules = false;
-    if(mask==NULL)
+    if(mask==nullptr)
     {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
         mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int));
@@ -2034,7 +2031,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
     if(MrPropreRules==true)
     {
         free(mask);
-        mask=NULL;
+        mask=nullptr;
     }
 }
 /* *************************************************************** */
@@ -3159,11 +3156,11 @@ void reg_getImageGradient3(nifti_image *floatingImage,
                            int active_timepoint,
                            int *dtIndicies,
                            mat33 *jacMat,
-                           nifti_image *warpedImage = NULL
+                           nifti_image *warpedImage = nullptr
         )
 {
     // The floating image data is copied in case one deal with DTI
-    void *originalFloatingData=NULL;
+    void *originalFloatingData=nullptr;
     // The DTI are logged
     reg_dti_resampling_preprocessing<FloatingTYPE>(floatingImage,
                                                    &originalFloatingData,
@@ -3216,11 +3213,11 @@ void reg_getImageGradient3(nifti_image *floatingImage,
         }
     }
     // The temporary logged floating array is deleted
-    if(originalFloatingData!=NULL)
+    if(originalFloatingData!=nullptr)
     {
         free(floatingImage->data);
         floatingImage->data=originalFloatingData;
-        originalFloatingData=NULL;
+        originalFloatingData=nullptr;
     }
     // The interpolated tensors are reoriented and exponentiated
     reg_dti_resampling_postprocessing<FloatingTYPE>(warImgGradient,
@@ -3329,7 +3326,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
 {
     // a mask array is created if no mask is specified
     bool MrPropreRule=false;
-    if(mask==NULL)
+    if(mask==nullptr)
     {
         // voxels in the backgreg_round are set to -1 so 0 will do the job here
         mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz,sizeof(int));
@@ -3339,10 +3336,10 @@ void reg_getImageGradient(nifti_image *floatingImage,
     // Define the DTI indices if required
     int dtIndicies[6];
     for(int i=0; i<6; ++i) dtIndicies[i]=-1;
-    if(dti_timepoint!=NULL)
+    if(dti_timepoint!=nullptr)
     {
 
-        if(jacMat==NULL)
+        if(jacMat==nullptr)
         {
             reg_print_fct_error("reg_getImageGradient");
             reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
@@ -3400,7 +3397,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
 
     DTYPE *gradPtrX = static_cast<DTYPE *>(gradImg->data);
     DTYPE *gradPtrY = &gradPtrX[voxelNumber];
-    DTYPE *gradPtrZ = NULL;
+    DTYPE *gradPtrZ = nullptr;
     if(dimImg==3)
         gradPtrZ = &gradPtrY[voxelNumber];
 
@@ -3429,7 +3426,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
                     if(y>0) pre = currentImgPtr[voxIndex-img->nx];
                     valY =  (post - pre) / 2.f;
 
-                    if(gradPtrZ!=NULL){
+                    if(gradPtrZ!=nullptr){
                         pre = post = padding_value;
                         if(z<img->nz-1) post = currentImgPtr[voxIndex+img->nx*img->ny];
                         if(z>0) pre = currentImgPtr[voxIndex-img->nx*img->ny];
@@ -3438,7 +3435,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
                 }
                 gradPtrX[voxIndex] = valX==valX?valX:0;
                 gradPtrY[voxIndex] = valY==valY?valY:0;
-                if(gradPtrZ!=NULL)
+                if(gradPtrZ!=nullptr)
                     gradPtrZ[voxIndex] = valZ==valZ?valZ:0;
                 ++voxIndex;
             } // x
@@ -3565,12 +3562,10 @@ nifti_image *reg_makeIsotropic(nifti_image *img,
     // Fill the deformation field with an identity transformation
     reg_getDeformationFromDisplacement(def);
     // resample the original image into the space of the new image
-    reg_resampleImage(img,newImg,def,NULL,inter,0.f);
+    reg_resampleImage(img,newImg,def,nullptr,inter,0.f);
     nifti_set_filenames(newImg,"tempIsotropicImage",0,0);
     nifti_image_free(def);
     return newImg;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index 210d32db..26c4c319 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -11,8 +11,7 @@
  *
  */
 
-#ifndef _REG_RESAMPLING_H
-#define _REG_RESAMPLING_H
+#pragma once
 
 #include "nifti1_io.h"
 
@@ -26,7 +25,7 @@
  * @param warpedImage Warped image that is being generated
  * @param deformationField Vector field image that contains the dense correspondences
  * @param mask Array that contains information about the mask. Only voxel with mask value different
- * from zero are being considered. If NULL, all voxels are considered
+ * from zero are being considered. If nullptr, all voxels are considered
  * @param interp Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic
  * interpolation
  * @param paddingValue Value to be used for padding when the correspondences are outside of the
@@ -41,8 +40,8 @@ void reg_resampleImage(nifti_image *floatingImage,
                        int *mask,
                        int interp,
                        float paddingValue,
-                       bool *dti_timepoint = NULL,
-                       mat33 * jacMat = NULL);
+                       bool *dti_timepoint = nullptr,
+                       mat33 * jacMat = nullptr);
 extern "C++"
 void reg_resampleImage_PSF(nifti_image *floatingImage,
                            nifti_image *warpedImage,
@@ -69,9 +68,9 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           int interp,
                           float paddingValue,
                           int active_timepoint,
-                          bool *dti_timepoint = NULL,
-                          mat33 *jacMat = NULL,
-                          nifti_image *warpedImage = NULL);
+                          bool *dti_timepoint = nullptr,
+                          mat33 *jacMat = nullptr,
+                          nifti_image *warpedImage = nullptr);
 
 extern "C++"
 void reg_getImageGradient_symDiff(nifti_image* inputImg,
@@ -81,5 +80,3 @@ void reg_getImageGradient_symDiff(nifti_image* inputImg,
                                   int timepoint);
 extern "C++"
 nifti_image *reg_makeIsotropic(nifti_image *, int);
-
-#endif
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index ddaffa6f..20639e32 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -11,9 +11,6 @@
  *
  */
 
-#ifndef _REG_SPLINE_CPP
-#define _REG_SPLINE_CPP
-
 #include "_reg_splineBasis.h"
 
 /* *************************************************************** */
@@ -625,9 +622,9 @@ void get_GridValues(int startX,
 
    size_t index;
    size_t coord=0;
-   DTYPE *xxPtr=NULL, *yyPtr=NULL;
+   DTYPE *xxPtr=nullptr, *yyPtr=nullptr;
 
-   mat44 *voxel2realMatrix=NULL;
+   mat44 *voxel2realMatrix=nullptr;
    if(splineControlPoint->sform_code>0)
       voxel2realMatrix=&(splineControlPoint->sto_xyz);
    else voxel2realMatrix=&(splineControlPoint->qto_xyz);
@@ -690,10 +687,10 @@ void get_GridValues(int startX,
 
    size_t index;
    size_t coord=0;
-   DTYPE *xPtr=NULL, *yPtr=NULL, *zPtr=NULL;
-   DTYPE *xxPtr=NULL, *yyPtr=NULL, *zzPtr=NULL;
+   DTYPE *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr;
+   DTYPE *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr;
 
-   mat44 *voxel2realMatrix=NULL;
+   mat44 *voxel2realMatrix=nullptr;
    if(splineControlPoint->sform_code>0)
       voxel2realMatrix=&(splineControlPoint->sto_xyz);
    else voxel2realMatrix=&(splineControlPoint->qto_xyz);
@@ -753,5 +750,3 @@ template void get_GridValues<double>(int, int, int, nifti_image *,
 double *, double *, double *, double *, double *, double *, bool, bool);
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h
index 95d7af8a..602f8d6b 100755
--- a/reg-lib/cpu/_reg_splineBasis.h
+++ b/reg-lib/cpu/_reg_splineBasis.h
@@ -11,8 +11,7 @@
  *
  */
 
-#ifndef _REG_SPLINE_H
-#define _REG_SPLINE_H
+#pragma once
 
 #include "_reg_tools.h"
 
@@ -130,5 +129,3 @@ void get_GridValues(int startX,
                     DTYPE *dispZ,
                     bool approx,
                     bool displacement);
-
-#endif
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 8e315f52..cd7a62ed 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -128,12 +128,12 @@ double reg_getSSDValue(nifti_image *referenceImage,
    DTYPE *referencePtr=static_cast<DTYPE *>(referenceImage->data);
    DTYPE *warpedPtr=static_cast<DTYPE *>(warpedImage->data);
    // Create a pointer to the Jacobian determinant image if defined
-   DTYPE *jacDetPtr=NULL;
-   if(jacobianDetImage!=NULL)
+   DTYPE *jacDetPtr=nullptr;
+   if(jacobianDetImage!=nullptr)
       jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
    // Create a pointer to the local weight image if defined
-   DTYPE *localWeightPtr=NULL;
-   if(localWeightSimImage!=NULL)
+   DTYPE *localWeightPtr=nullptr;
+   if(localWeightSimImage!=nullptr)
       localWeightPtr=static_cast<DTYPE *>(localWeightSimImage->data);
 
    double SSD_global=0.0;
@@ -176,12 +176,12 @@ double reg_getSSDValue(nifti_image *referenceImage,
                   diff = reg_pow2(refValue-warValue);
 #endif
                   // Jacobian determinant modulation of the ssd if required
-                  if(jacDetPtr!=NULL)
+                  if(jacDetPtr!=nullptr)
                   {
                      SSD_local += diff * jacDetPtr[voxel];
                      n += jacDetPtr[voxel];
                   }
-                  else if(localWeightPtr!=NULL)
+                  else if(localWeightPtr!=nullptr)
                   {
                      SSD_local += diff * localWeightPtr[voxel];
                      n += localWeightPtr[voxel];
@@ -222,7 +222,7 @@ double reg_ssd::GetSimilarityMeasureValue()
             (this->referenceImagePointer,
              this->warpedFloatingImagePointer,
              this->timePointWeight,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
              this->currentValue,
              this->forwardLocalWeightSimImagePointer
@@ -233,7 +233,7 @@ double reg_ssd::GetSimilarityMeasureValue()
             (this->referenceImagePointer,
              this->warpedFloatingImagePointer,
              this->timePointWeight,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
              this->currentValue,
              this->forwardLocalWeightSimImagePointer
@@ -262,10 +262,10 @@ double reg_ssd::GetSimilarityMeasureValue()
                (this->floatingImagePointer,
                 this->warpedReferenceImagePointer,
                 this->timePointWeight,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
                 this->currentValue,
-                NULL
+                nullptr
                 );
          break;
       case NIFTI_TYPE_FLOAT64:
@@ -273,10 +273,10 @@ double reg_ssd::GetSimilarityMeasureValue()
                (this->floatingImagePointer,
                 this->warpedReferenceImagePointer,
                 this->timePointWeight,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
                 this->currentValue,
-                NULL
+                nullptr
                 );
          break;
       default:
@@ -323,24 +323,24 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
    // Pointers to the spatial gradient of the warped image
    DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
    DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
-   DTYPE *spatialGradPtrZ = NULL;
+   DTYPE *spatialGradPtrZ = nullptr;
    if(referenceImage->nz>1)
       spatialGradPtrZ=&spatialGradPtrY[voxelNumber];
 
    // Pointers to the measure of similarity gradient
    DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = NULL;
+   DTYPE *measureGradPtrZ = nullptr;
    if(referenceImage->nz>1)
       measureGradPtrZ=&measureGradPtrY[voxelNumber];
 
    // Create a pointer to the Jacobian determinant values if defined
-   DTYPE *jacDetPtr=NULL;
-   if(jacobianDetImage!=NULL)
+   DTYPE *jacDetPtr=nullptr;
+   if(jacobianDetImage!=nullptr)
       jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
    // Create a pointer to the local weight image if defined
-   DTYPE *localWeightPtr=NULL;
-   if(localWeightSimImage!=NULL)
+   DTYPE *localWeightPtr=nullptr;
+   if(localWeightSimImage!=nullptr)
       localWeightPtr=static_cast<DTYPE *>(localWeightSimImage->data);
 
    // find number of active voxels and correct weight
@@ -381,9 +381,9 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
 #else
             common = -2.0 * (refValue - warValue);
 #endif
-            if(jacDetPtr!=NULL)
+            if(jacDetPtr!=nullptr)
                common *= jacDetPtr[voxel];
-            else if(localWeightPtr!=NULL)
+            else if(localWeightPtr!=nullptr)
                common *= localWeightPtr[voxel];
 
             common *= adjusted_weight;
@@ -393,7 +393,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
             if(spatialGradPtrY[voxel]==spatialGradPtrY[voxel])
                measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]);
 
-            if(measureGradPtrZ!=NULL)
+            if(measureGradPtrZ!=nullptr)
             {
                if(spatialGradPtrZ[voxel]==spatialGradPtrZ[voxel])
                   measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]);
@@ -435,7 +435,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
              this->warpedFloatingImagePointer,
              this->warpedFloatingGradientImagePointer,
              this->forwardVoxelBasedGradientImagePointer,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
              current_timepoint,
              this->timePointWeight[current_timepoint],
@@ -448,7 +448,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
              this->warpedFloatingImagePointer,
              this->warpedFloatingGradientImagePointer,
              this->forwardVoxelBasedGradientImagePointer,
-             NULL, // HERE TODO this->forwardJacDetImagePointer,
+             nullptr, // HERE TODO this->forwardJacDetImagePointer,
              this->referenceMaskPointer,
              current_timepoint,
              this->timePointWeight[current_timepoint],
@@ -482,11 +482,11 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedReferenceImagePointer,
                 this->warpedReferenceGradientImagePointer,
                 this->backwardVoxelBasedGradientImagePointer,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
                 current_timepoint,
                 this->timePointWeight[current_timepoint],
-                NULL
+                nullptr
                 );
          break;
       case NIFTI_TYPE_FLOAT64:
@@ -495,11 +495,11 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                 this->warpedReferenceImagePointer,
                 this->warpedReferenceGradientImagePointer,
                 this->backwardVoxelBasedGradientImagePointer,
-                NULL, // HERE TODO this->backwardJacDetImagePointer,
+                nullptr, // HERE TODO this->backwardJacDetImagePointer,
                 this->floatingMaskPointer,
                 current_timepoint,
                 this->timePointWeight[current_timepoint],
-                NULL
+                nullptr
                 );
          break;
       default:
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index eddf59f1..0401c4d2 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_SSD_H
-#define _REG_SSD_H
+#pragma once
 
 #include "_reg_measure.h"
 
@@ -33,10 +32,10 @@ class reg_ssd : public reg_measure
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
                           nifti_image *localWeightSimPtr,
-                          int *maskFloPtr = NULL,
-                          nifti_image *warRefImgPtr = NULL,
-                          nifti_image *warRefGraPtr = NULL,
-                          nifti_image *bckVoxBasedGraPtr = NULL);
+                          int *maskFloPtr = nullptr,
+                          nifti_image *warRefImgPtr = nullptr,
+                          nifti_image *warRefGraPtr = nullptr,
+                          nifti_image *bckVoxBasedGraPtr = nullptr);
 
    /// @brief Define if the specified time point should be normalised
    void SetNormaliseTimepoint(int timepoint, bool normalise);
@@ -66,9 +65,9 @@ class reg_ssd : public reg_measure
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
- * pointer is set to NULL
+ * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed sum squared difference
  */
 extern "C++" template <class DTYPE>
@@ -91,9 +90,9 @@ double reg_getSSDValue(nifti_image *referenceImage,
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
- * pointer is set to NULL
+ * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to NULL, all voxels are considered
+ * should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
@@ -106,4 +105,3 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   double timepoint_weight,
                                   nifti_image *localWeightImage
                                  );
-#endif
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp
index 4f80a584..4a197266 100644
--- a/reg-lib/cpu/_reg_thinPlateSpline.cpp
+++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_THINPLATESPLINE_CPP
-#define _REG_THINPLATESPLINE_CPP
-
 #include "_reg_thinPlateSpline.h"
 
 /* *************************************************************** */
@@ -33,8 +30,8 @@ reg_tps<T>::reg_tps(size_t d, size_t n)
    }
    else
    {
-      this->positionZ=NULL;
-      this->coefficientZ=NULL;
+      this->positionZ=nullptr;
+      this->coefficientZ=nullptr;
    }
    this->initialised=false;
    this->approxInter=0.;
@@ -44,18 +41,18 @@ reg_tps<T>::reg_tps(size_t d, size_t n)
 template <class T>
 reg_tps<T>::~reg_tps()
 {
-   if(this->positionX!=NULL) free(this->positionX);
-   this->positionX=NULL;
-   if(this->positionY!=NULL) free(this->positionY);
-   this->positionY=NULL;
-   if(this->positionZ!=NULL) free(this->positionZ);
-   this->positionZ=NULL;
-   if(this->coefficientX!=NULL) free(this->coefficientX);
-   this->coefficientX=NULL;
-   if(this->coefficientY!=NULL) free(this->coefficientY);
-   this->coefficientY=NULL;
-   if(this->coefficientZ!=NULL) free(this->coefficientZ);
-   this->coefficientZ=NULL;
+   if(this->positionX!=nullptr) free(this->positionX);
+   this->positionX=nullptr;
+   if(this->positionY!=nullptr) free(this->positionY);
+   this->positionY=nullptr;
+   if(this->positionZ!=nullptr) free(this->positionZ);
+   this->positionZ=nullptr;
+   if(this->coefficientX!=nullptr) free(this->coefficientX);
+   this->coefficientX=nullptr;
+   if(this->coefficientY!=nullptr) free(this->coefficientY);
+   this->coefficientY=nullptr;
+   if(this->coefficientZ!=nullptr) free(this->coefficientZ);
+   this->coefficientZ=nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -145,7 +142,7 @@ void reg_tps<T>::InitialiseTPS()
 {
    size_t matrix_side=this->number + this->dim + 1;
    T *matrixL=(T *)calloc(matrix_side*matrix_side,sizeof(T));
-   if(matrixL==NULL)
+   if(matrixL==nullptr)
    {
       char text[255];
       sprintf(text,"Size should be %g GB (%i x %i)",
@@ -220,11 +217,11 @@ void reg_tps<T>::FillDeformationField(nifti_image *deformationField)
    size_t voxelNumber = deformationField->nx*deformationField->ny*deformationField->nz;
    T *defX=static_cast<T *>(deformationField->data);
    T *defY=&defX[voxelNumber];
-   T *defZ=NULL;
+   T *defZ=nullptr;
    if(this->dim==3)
       defZ=&defY[voxelNumber];
 
-   mat44 *voxel2realDF=NULL;
+   mat44 *voxel2realDF=nullptr;
    if(deformationField->sform_code>0)
       voxel2realDF=&(deformationField->sto_xyz);
    else voxel2realDF=&(deformationField->qto_xyz);
@@ -305,5 +302,3 @@ void reg_tps<T>::FillDeformationField(nifti_image *deformationField)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif // _REG_THINPLATESPLINE_CPP
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.h b/reg-lib/cpu/_reg_thinPlateSpline.h
index 724d2db9..e06a4dbb 100644
--- a/reg-lib/cpu/_reg_thinPlateSpline.h
+++ b/reg-lib/cpu/_reg_thinPlateSpline.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_THINPLATESPLINE_H
-#define _REG_THINPLATESPLINE_H
+#pragma once
 
 #include "_reg_maths.h"
 
@@ -48,5 +47,3 @@ class reg_tps
 
 
 #include "_reg_thinPlateSpline.cpp"
-
-#endif // _REG_THINPLATESPLINE_H
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 371a4756..c2ef723f 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -11,9 +11,6 @@
  *
  */
 
-#ifndef _REG_TOOLS_CPP
-#define _REG_TOOLS_CPP
-
 #include <cmath>
 #include "_reg_tools.h"
 
@@ -1253,8 +1250,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                   size_t realIndex;
                   float *kernelPtr, kernelValue;
                   double densitySum, intensitySum;
-                  DTYPE *currentIntensityPtr=NULL;
-                  float *currentDensityPtr = NULL;
+                  DTYPE *currentIntensityPtr=nullptr;
+                  float *currentDensityPtr = nullptr;
                   DTYPE bufferIntensity[2048];
                   float bufferDensity[2048];
                   double bufferIntensitycur=0;
@@ -1475,16 +1472,16 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
    DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
 
    bool * activeTimePoint = (bool *)calloc(image->nt*image->nu,sizeof(bool));
-   // Check if input time points and masks are NULL
-   if(timePoint==NULL)
+   // Check if input time points and masks are nullptr
+   if(timePoint==nullptr)
    {
       // All time points are considered as active
       for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=true;
    }
    else for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=timePoint[i];
 
-   int *currentMask=NULL;
-   if(mask==NULL)
+   int *currentMask=nullptr;
+   if(mask==nullptr)
    {
       currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int));
    }
@@ -1696,22 +1693,22 @@ void reg_tools_kernelConvolution(nifti_image *image,
 
    bool *axisToSmooth = new bool[3];
    bool *activeTimePoint = new bool[image->nt*image->nu];
-   if(axis==NULL)
+   if(axis==nullptr)
    {
       // All axis are smoothed by default
       for(int i=0; i<3; i++) axisToSmooth[i]=true;
    }
    else for(int i=0; i<3; i++) axisToSmooth[i]=axis[i];
 
-   if(timePoint==NULL)
+   if(timePoint==nullptr)
    {
       // All time points are considered as active
       for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=true;
    }
    else for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=timePoint[i];
 
-   int *currentMask=NULL;
-   if(mask==NULL)
+   int *currentMask=nullptr;
+   if(mask==nullptr)
    {
       currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int));
    }
@@ -1731,7 +1728,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
       reg_exit();
    }
 
-   if(mask==NULL) free(currentMask);
+   if(mask==nullptr) free(currentMask);
    delete []axisToSmooth;
    delete []activeTimePoint;
 }
@@ -2079,10 +2076,10 @@ double reg_tools_getMeanRMS2(nifti_image *imageA, nifti_image *imageB)
 {
    ATYPE *imageAPtrX = static_cast<ATYPE *>(imageA->data);
    BTYPE *imageBPtrX = static_cast<BTYPE *>(imageB->data);
-   ATYPE *imageAPtrY=NULL;
-   BTYPE *imageBPtrY=NULL;
-   ATYPE *imageAPtrZ=NULL;
-   BTYPE *imageBPtrZ=NULL;
+   ATYPE *imageAPtrY=nullptr;
+   BTYPE *imageBPtrY=nullptr;
+   ATYPE *imageAPtrZ=nullptr;
+   BTYPE *imageBPtrZ=nullptr;
    if(imageA->dim[5]>1)
    {
       imageAPtrY = &imageAPtrX[imageA->nx*imageA->ny*imageA->nz];
@@ -2646,7 +2643,7 @@ void reg_flippAxis_type(int nx,
                         )
 {
    // Allocate the outputArray if it is not allocated yet
-   if(outputArray==NULL)
+   if(outputArray==nullptr)
       outputArray=(void *)malloc(nx*ny*nz*nt*nu*nv*nw*sizeof(DTYPE));
 
    // Parse the cmd to check which axis have to be flipped
@@ -3318,4 +3315,3 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x
     z = index;
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 246adfb1..d1253a07 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -12,8 +12,7 @@
  *
  */
 
-#ifndef _REG_TOOLS_H
-#define _REG_TOOLS_H
+#pragma once
 
 #include <fstream>
 #include <map>
@@ -91,9 +90,9 @@ extern "C++"
 void reg_tools_kernelConvolution(nifti_image *image,
                                  float *sigma,
                                  int kernelType,
-                                 int *mask = NULL,
-                                 bool *timePoints = NULL,
-                                 bool *axis = NULL);
+                                 int *mask = nullptr,
+                                 bool *timePoints = nullptr,
+                                 bool *axis = nullptr);
 
 /* *************************************************************** */
 /** @brief Smooth a label image using a Gaussian kernel
@@ -110,8 +109,8 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceX,
                                       float varianceY,
                                       float varianceZ,
-                                      int *mask=NULL,
-                                      bool *timePoint=NULL);
+                                      int *mask=nullptr,
+                                      bool *timePoint=nullptr);
 
 
 /* *************************************************************** */
@@ -464,4 +463,3 @@ void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z);
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index a4acfe91..204c9ab6 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -74,17 +74,17 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
-    CUDAContextSingletton.cpp
-    CUDAAladinContent.cpp
-    CUDAKernelFactory.cpp
+    CudaContextSingleton.cpp
+    CudaAladinContent.cpp
+    CudaKernelFactory.cpp
     affineDeformationKernel.cu
     blockMatchingKernel.cu
     resampleKernel.cu
-    CUDAAffineDeformationFieldKernel.cpp
-    CUDABlockMatchingKernel.cpp
-    CUDAConvolutionKernel.cpp
-    CUDAOptimiseKernel.cpp
-    CUDAResampleImageKernel.cpp
+    CudaAffineDeformationFieldKernel.cpp
+    CudaBlockMatchingKernel.cpp
+    CudaConvolutionKernel.cpp
+    CudaOptimiseKernel.cpp
+    CudaResampleImageKernel.cpp
     ../AladinContent.cpp
     ../Platform.cpp
     _reg_resampling_gpu.cu
@@ -101,8 +101,8 @@ install(TARGETS ${NAME}
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
     )
-install(FILES blockMatchingKernel.h CUDAContextSingletton.h CUDAAladinContent.h DESTINATION include/cuda)
-install(FILES CUDAKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CUDAAffineDeformationFieldKernel.h CUDABlockMatchingKernel.h CUDAConvolutionKernel.h CUDAOptimiseKernel.h CUDAResampleImageKernel.h DESTINATION include/cuda)
+install(FILES blockMatchingKernel.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
+install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cudainfo)
diff --git a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h b/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h
deleted file mode 100644
index 630ba4d0..00000000
--- a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef CUDAAFFINEDEFORMATIONFIELDKERNEL_H
-#define CUDAAFFINEDEFORMATIONFIELDKERNEL_H
-
-#include "AffineDeformationFieldKernel.h"
-#include "CUDAAladinContent.h"
-
-//Kernel functions for affine deformation field
-class CUDAAffineDeformationFieldKernel: public AffineDeformationFieldKernel
-{
-public:
-    CUDAAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn);
-    void calculate(bool compose = false);
-private:
-    mat44 *affineTransformation;
-    nifti_image *deformationFieldImage;
-
-    float *deformationFieldArray_d, *transformationMatrix_d;
-    int *mask_d;
-
-    CudaAladinContent *con;
-
-    //CUDAContextSingletton *cudaSContext;
-    //CUContext cudaContext;
-};
-
-#endif // CUDAAFFINEDEFORMATIONFIELDKERNEL_H
diff --git a/reg-lib/cuda/CUDAAladinContent.cpp b/reg-lib/cuda/CUDAAladinContent.cpp
deleted file mode 100755
index a8d8347f..00000000
--- a/reg-lib/cuda/CUDAAladinContent.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-#include "CUDAAladinContent.h"
-#include "_reg_common_cuda.h"
-#include "_reg_tools.h"
-#include <algorithm>
-
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent()
-{
-   initVars();
-   allocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn,
-                         nifti_image *CurrentFloatingIn,
-                         int *CurrentReferenceMaskIn,
-                         size_t byte,
-                         const unsigned int blockPercentage,
-                         const unsigned int inlierLts,
-                         int blockStep) :
-   AladinContent(CurrentReferenceIn,
-           CurrentFloatingIn,
-           CurrentReferenceMaskIn,
-           sizeof(float), // forcing float for CUDA
-           blockPercentage,
-           inlierLts,
-           blockStep)
-{
-   if(byte!=sizeof(float)){
-      reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-      reg_print_msg_warn("Datatype has been forced to float");
-   }
-   initVars();
-   allocateCuPtrs();
-
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn,
-                         nifti_image *CurrentFloatingIn,
-                         int *CurrentReferenceMaskIn,
-                         size_t byte) :
-   AladinContent(CurrentReferenceIn,
-           CurrentFloatingIn,
-           CurrentReferenceMaskIn,
-           sizeof(float)) // forcing float for CUDA
-{
-   if(byte!=sizeof(float)){
-      reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-      reg_print_msg_warn("Datatype has been forced to float");
-   }
-   initVars();
-   allocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn,
-                         nifti_image *CurrentFloatingIn,
-                         int *CurrentReferenceMaskIn,
-                         mat44 *transMat,
-                         size_t byte,
-                         const unsigned int blockPercentage,
-                         const unsigned int inlierLts,
-                         int blockStep) :
-   AladinContent(CurrentReferenceIn,
-           CurrentFloatingIn,
-           CurrentReferenceMaskIn,
-           transMat,
-           sizeof(float), // forcing float for CUDA
-           blockPercentage,
-           inlierLts,
-           blockStep)
-{
-   if(byte!=sizeof(float)){
-      reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-      reg_print_msg_warn("Datatype has been forced to float");
-   }
-   initVars();
-   allocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn,
-                         nifti_image *CurrentFloatingIn,
-                         int *CurrentReferenceMaskIn,
-                         mat44 *transMat,
-                         size_t byte) :
-   AladinContent(CurrentReferenceIn,
-           CurrentFloatingIn,
-           CurrentReferenceMaskIn,
-           transMat,
-           sizeof(float)) // forcing float for CUDA
-{
-   if(byte!=sizeof(float)){
-      reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-      reg_print_msg_warn("Datatype has been forced to float");
-   }
-   initVars();
-   allocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::~CudaAladinContent()
-{
-   freeCuPtrs();
-}
-/* *************************************************************** */
-void CudaAladinContent::initVars()
-{
-   this->referenceImageArray_d = 0;
-   this->floatingImageArray_d = 0;
-   this->warpedImageArray_d = 0;
-   this->deformationFieldArray_d = 0;
-   this->referencePosition_d = 0;
-   this->warpedPosition_d = 0;
-   this->totalBlock_d = 0;
-   this->mask_d = 0;
-   this->floIJKMat_d = 0;
-
-   if (this->CurrentReference != NULL && this->CurrentReference->nbyper != NIFTI_TYPE_FLOAT32)
-      reg_tools_changeDatatype<float>(this->CurrentReference);
-   if (this->CurrentFloating != NULL && this->CurrentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-      reg_tools_changeDatatype<float>(this->CurrentFloating);
-      if (this->CurrentWarped != NULL)
-         reg_tools_changeDatatype<float>(this->CurrentWarped);
-   }
-
-   this->cudaSContext = &CUDAContextSingletton::Instance();
-   this->cudaContext = this->cudaSContext->getContext();
-
-   this->referenceVoxels = (this->CurrentReference != NULL) ? this->CurrentReference->nvox : 0;
-   this->floatingVoxels = (this->CurrentFloating != NULL) ? this->CurrentFloating->nvox : 0;
-   //this->numBlocks = (this->blockMatchingParams->activeBlock != NULL) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
-}
-/* *************************************************************** */
-void CudaAladinContent::allocateCuPtrs()
-{
-
-   if (this->transformationMatrix != NULL) {
-      cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
-
-      float *tmpMat_h = (float*)malloc(16 * sizeof(float));
-      mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
-      NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
-
-      free(tmpMat_h);
-   }
-   if (this->CurrentReferenceMask != NULL) {
-      cudaCommon_allocateArrayToDevice<int>(&mask_d, this->referenceVoxels);
-      cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, this->CurrentReferenceMask, referenceVoxels);
-   }
-   if (this->CurrentReference != NULL) {
-      cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, referenceVoxels);
-      cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, 16);
-
-      cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, this->CurrentReference);
-
-      float* targetMat = (float *)malloc(16 * sizeof(float)); //freed
-      mat44ToCptr(this->refMatrix_xyz, targetMat);
-      cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, 16);
-      free(targetMat);
-   }
-   if (this->CurrentWarped != NULL) {
-      cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, this->CurrentWarped->nvox);
-      cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->CurrentWarped);
-   }
-   if (this->CurrentDeformationField != NULL) {
-      cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->CurrentDeformationField->nvox);
-      cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->CurrentDeformationField);
-   }
-   if (this->CurrentFloating != NULL) {
-      cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, floatingVoxels);
-      cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, 16);
-
-      cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, this->CurrentFloating);
-
-      float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float));
-      mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
-      NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
-      free(sourceIJKMatrix_h);
-   }
-
-   if (this->blockMatchingParams != NULL) {
-      if (this->blockMatchingParams->referencePosition != NULL) {
-         cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-         cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-      }
-      if (this->blockMatchingParams->warpedPosition != NULL) {
-         cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-         cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-      }
-      if (this->blockMatchingParams->totalBlock != NULL) {
-         cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-         cudaCommon_transferFromDeviceToNiftiSimple1<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
-      }
-      /* // Removed until CUDA SVD is added back
-      if (this->blockMatchingParams->activeBlockNumber > 0 ) {
-         unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
-         unsigned int n = 0;
-
-         if (this->blockMatchingParams->dim == 2) {
-            n = 6;
-         }
-         else {
-            n = 12;
-         }
-
-         cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
-         cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
-         cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
-         cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
-         cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-         cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-      }
-      */
-   }
-}
-/* *************************************************************** */
-nifti_image *CudaAladinContent::getCurrentWarped(int type)
-{
-   downloadImage(CurrentWarped, warpedImageArray_d, type);
-   return CurrentWarped;
-}
-/* *************************************************************** */
-nifti_image *CudaAladinContent::getCurrentDeformationField()
-{
-
-   cudaCommon_transferFromDeviceToCpu<float>((float*) CurrentDeformationField->data, &deformationFieldArray_d, CurrentDeformationField->nvox);
-   return CurrentDeformationField;
-}
-/* *************************************************************** */
-_reg_blockMatchingParam* CudaAladinContent::getBlockMatchingParams()
-{
-
-   cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-   cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-   return this->blockMatchingParams;
-}
-/* *************************************************************** */
-void CudaAladinContent::setTransformationMatrix(mat44 *transformationMatrixIn)
-{
-   if (this->transformationMatrix != NULL)
-      cudaCommon_free<float>(&transformationMatrix_d);
-
-   AladinContent::setTransformationMatrix(transformationMatrixIn);
-   float *tmpMat_h = (float*)malloc(16 * sizeof(float));
-   mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
-
-   cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
-   NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
-   free(tmpMat_h);
-}
-/* *************************************************************** */
-void CudaAladinContent::setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn)
-{
-   if (this->CurrentDeformationField != NULL)
-      cudaCommon_free<float>(&deformationFieldArray_d);
-   AladinContent::setCurrentDeformationField(CurrentDeformationFieldIn);
-
-   cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->CurrentDeformationField->nvox);
-   cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->CurrentDeformationField);
-}
-/* *************************************************************** */
-void CudaAladinContent::setCurrentReferenceMask(int *maskIn, size_t nvox)
-{
-   if (this->CurrentReferenceMask != NULL)
-      cudaCommon_free<int>(&mask_d);
-   this->CurrentReferenceMask = maskIn;
-   cudaCommon_allocateArrayToDevice<int>(&mask_d, nvox);
-   cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, maskIn, nvox);
-}
-/* *************************************************************** */
-void CudaAladinContent::setCurrentWarped(nifti_image *currentWarped)
-{
-   if (this->CurrentWarped != NULL)
-      cudaCommon_free<float>(&warpedImageArray_d);
-   AladinContent::setCurrentWarped(currentWarped);
-   reg_tools_changeDatatype<float>(this->CurrentWarped);
-
-   cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, CurrentWarped->nvox);
-   cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->CurrentWarped);
-}
-/* *************************************************************** */
-void CudaAladinContent::setBlockMatchingParams(_reg_blockMatchingParam* bmp)
-{
-
-   AladinContent::setBlockMatchingParams(bmp);
-   if (this->blockMatchingParams->referencePosition != NULL) {
-      cudaCommon_free<float>(&referencePosition_d);
-      //referencePosition
-      cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-      cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-   }
-   if (this->blockMatchingParams->warpedPosition != NULL) {
-      cudaCommon_free<float>(&warpedPosition_d);
-      //warpedPosition
-      cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-      cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-   }
-   if (this->blockMatchingParams->totalBlock != NULL) {
-      cudaCommon_free<int>(&totalBlock_d);
-      //activeBlock
-      cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlockNumber);
-      cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber);
-   }
-   /* // Removed until CUDA SVD is added back
-    if (this->blockMatchingParams->activeBlockNumber > 0) {
-        unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
-        unsigned int n = 0;
-
-        if (this->blockMatchingParams->dim == 2) {
-            n = 6;
-        }
-        else {
-            n = 12;
-        }
-
-        cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
-        cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
-        cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
-        cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
-        cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-        cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-    }
-    */
-}
-/* *************************************************************** */
-template<class DataType>
-DataType CudaAladinContent::fillWarpedImageData(float intensity, int datatype) {
-
-   switch (datatype) {
-   case NIFTI_TYPE_FLOAT32:
-      return static_cast<float>(intensity);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      return static_cast<double>(intensity);
-      break;
-   case NIFTI_TYPE_UINT8:
-      intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-      return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
-      break;
-   case NIFTI_TYPE_UINT16:
-      intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-      return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
-      break;
-   case NIFTI_TYPE_UINT32:
-      intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-      return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
-      break;
-   default:
-      return static_cast<DataType>(reg_round(intensity));
-      break;
-   }
-}
-/* *************************************************************** */
-template<class T>
-void CudaAladinContent::fillImageData(nifti_image *image,
-                                float* memoryObject,
-                                int type)
-{
-
-   size_t size = image->nvox;
-   float* buffer = NULL;
-   buffer = (float*) malloc(size * sizeof(float));
-
-   if (buffer == NULL) {
-      reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
-   }
-
-   cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
-
-   free(image->data);
-   image->datatype = type;
-   image->nbyper = sizeof(T);
-   image->data = (void *)malloc(image->nvox*image->nbyper);
-   T* dataT = static_cast<T*>(image->data);
-   for (size_t i = 0; i < size; ++i)
-       dataT[i] = fillWarpedImageData<T>(buffer[i], type);
-   free(buffer);
-}
-/* *************************************************************** */
-void CudaAladinContent::downloadImage(nifti_image *image,
-                                float* memoryObject,
-                                int datatype)
-{
-   switch (datatype) {
-   case NIFTI_TYPE_FLOAT32:
-      fillImageData<float>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      fillImageData<double>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_UINT8:
-      fillImageData<unsigned char>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_INT8:
-      fillImageData<char>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_UINT16:
-      fillImageData<unsigned short>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_INT16:
-      fillImageData<short>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_UINT32:
-      fillImageData<unsigned int>(image, memoryObject, datatype);
-      break;
-   case NIFTI_TYPE_INT32:
-      fillImageData<int>(image, memoryObject, datatype);
-      break;
-   default:
-      std::cout << "CUDA: unsupported type" << std::endl;
-      break;
-   }
-}
-/* *************************************************************** */
-float* CudaAladinContent::getReferenceImageArray_d()
-{
-   return referenceImageArray_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getFloatingImageArray_d()
-{
-   return floatingImageArray_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getWarpedImageArray_d()
-{
-   return warpedImageArray_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getTransformationMatrix_d()
-{
-   return transformationMatrix_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getReferencePosition_d()
-{
-   return referencePosition_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getWarpedPosition_d()
-{
-   return warpedPosition_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getDeformationFieldArray_d()
-{
-   return deformationFieldArray_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getReferenceMat_d()
-{
-   return referenceMat_d;
-}
-/* *************************************************************** */
-float* CudaAladinContent::getFloIJKMat_d()
-{
-   return floIJKMat_d;
-}
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getAR_d()
-{
-   return AR_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getU_d()
-{
-   return U_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getVT_d()
-{
-   return VT_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getSigma_d()
-{
-   return Sigma_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getLengths_d()
-{
-   return lengths_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::getNewWarpedPos_d()
-{
-   return newWarpedPos_d;
-}
-*/
-/* *************************************************************** */
-int *CudaAladinContent::getTotalBlock_d()
-{
-   return totalBlock_d;
-}
-/* *************************************************************** */
-int *CudaAladinContent::getMask_d()
-{
-   return mask_d;
-}
-/* *************************************************************** */
-int *CudaAladinContent::getReferenceDims()
-{
-   return referenceDims;
-}
-/* *************************************************************** */
-int *CudaAladinContent::getFloatingDims()
-{
-   return floatingDims;
-}
-/* *************************************************************** */
-void CudaAladinContent::freeCuPtrs()
-{
-   if (this->transformationMatrix != NULL)
-      cudaCommon_free<float>(&transformationMatrix_d);
-
-   if (this->CurrentReference != NULL) {
-      cudaCommon_free<float>(&referenceImageArray_d);
-      cudaCommon_free<float>(&referenceMat_d);
-   }
-
-   if (this->CurrentFloating != NULL) {
-      cudaCommon_free<float>(&floatingImageArray_d);
-      cudaCommon_free<float>(&floIJKMat_d);
-   }
-
-   if (this->CurrentWarped != NULL)
-      cudaCommon_free<float>(&warpedImageArray_d);
-
-   if (this->CurrentDeformationField != NULL)
-      cudaCommon_free<float>(&deformationFieldArray_d);
-
-   if (this->CurrentReferenceMask != NULL)
-      cudaCommon_free<int>(&mask_d);
-
-   if (this->blockMatchingParams != NULL) {
-      cudaCommon_free<int>(&totalBlock_d);
-      cudaCommon_free<float>(&referencePosition_d);
-      cudaCommon_free<float>(&warpedPosition_d);
-      /*
-      cudaCommon_free<float>(&AR_d);
-      cudaCommon_free<float>(&U_d);
-      cudaCommon_free<float>(&VT_d);
-      cudaCommon_free<float>(&Sigma_d);
-      cudaCommon_free<float>(&lengths_d);
-      cudaCommon_free<float>(&newWarpedPos_d);
-      */
-   }
-}
-/* *************************************************************** */
-bool CudaAladinContent::isCurrentComputationDoubleCapable() {
-    return this->cudaSContext->getIsCardDoubleCapable();
-}
-/* *************************************************************** */
diff --git a/reg-lib/cuda/CUDAAladinContent.h b/reg-lib/cuda/CUDAAladinContent.h
deleted file mode 100755
index 109abc0e..00000000
--- a/reg-lib/cuda/CUDAAladinContent.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#pragma once
-
-#include "AladinContent.h"
-#include "CUDAContextSingletton.h"
-
-#include "_reg_tools.h"
-
-class CudaAladinContent: public AladinContent {
-
-public:
-	CudaAladinContent();
-	CudaAladinContent(nifti_image *CurrentReferenceIn,
-							nifti_image *CurrentFloatingIn,
-							int *CurrentReferenceMaskIn,
-							size_t byte,
-							const unsigned int blockPercentage,
-							const unsigned int inlierLts,
-							int blockStep);
-	CudaAladinContent(nifti_image *CurrentReferenceIn,
-							nifti_image *CurrentFloatingIn,
-							int *CurrentReferenceMaskIn,
-							size_t byte);
-	CudaAladinContent(nifti_image *CurrentReferenceIn,
-							nifti_image *CurrentFloatingIn,
-							int *CurrentReferenceMaskIn,
-							mat44 *transMat,
-							size_t byte,
-							const unsigned int blockPercentage,
-							const unsigned int inlierLts,
-							int blockStep);
-	CudaAladinContent(nifti_image *CurrentReferenceIn,
-							nifti_image *CurrentFloatingIn,
-							int *CurrentReferenceMaskIn,
-							mat44 *transMat,
-							size_t byte);
-	~CudaAladinContent();
-
-	bool isCurrentComputationDoubleCapable();
-
-	//device getters
-	float* getReferenceImageArray_d();
-	float* getFloatingImageArray_d();
-	float* getWarpedImageArray_d();
-	float* getTransformationMatrix_d();
-	float* getReferencePosition_d();
-	float* getWarpedPosition_d();
-	float* getDeformationFieldArray_d();
-	float* getReferenceMat_d();
-	float* getFloIJKMat_d();
-
-	//	float* getAR_d(); // Removed until CUDA SVD is added back
-	//	float* getU_d(); // Removed until CUDA SVD is added back
-	//	float* getVT_d(); // Removed until CUDA SVD is added back
-	//	float* getSigma_d(); // Removed until CUDA SVD is added back
-	//	float* getLengths_d(); // Removed until CUDA SVD is added back
-	//	float* getNewWarpedPos_d(); // Removed until CUDA SVD is added back
-
-	int *getTotalBlock_d();
-	int *getMask_d();
-
-	int *getReferenceDims();
-	int *getFloatingDims();
-
-	//cpu getters and setters
-	_reg_blockMatchingParam* getBlockMatchingParams();
-	nifti_image *getCurrentDeformationField();
-	nifti_image *getCurrentWarped(int typ);
-
-	void setTransformationMatrix(mat44 *transformationMatrixIn);
-	void setCurrentWarped(nifti_image *warpedImageIn);
-	void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn);
-	void setCurrentReferenceMask(int *maskIn, size_t size);
-	void setBlockMatchingParams(_reg_blockMatchingParam* bmp);
-
-private:
-	void initVars();
-
-	//void uploadAladinContent();
-	void allocateCuPtrs();
-	void freeCuPtrs();
-
-	CUDAContextSingletton* cudaSContext;
-	CUcontext cudaContext;
-
-	float *referenceImageArray_d;
-	float *floatingImageArray_d;
-	float *warpedImageArray_d;
-	float *deformationFieldArray_d;
-	float *referencePosition_d;
-	float *warpedPosition_d;
-	int   *totalBlock_d, *mask_d;
-
-	float* transformationMatrix_d;
-	float* referenceMat_d;
-	float* floIJKMat_d;
-
-	//svd
-	//	float* AR_d;//A and then pseudoinverse  // Removed until CUDA SVD is added back
-	//	float* U_d; // Removed until CUDA SVD is added back
-	//	float* VT_d; // Removed until CUDA SVD is added back
-	//	float* Sigma_d; // Removed until CUDA SVD is added back
-	//	float* lengths_d; // Removed until CUDA SVD is added back
-	//	float* newWarpedPos_d; // Removed until CUDA SVD is added back
-
-	int referenceDims[4];
-	int floatingDims[4];
-
-	void downloadImage(nifti_image *image, float* memoryObject, int datatype);
-	template<class T>
-	void fillImageData(nifti_image *image, float* memoryObject, int type);
-
-	template<class FloatingTYPE>
-	FloatingTYPE fillWarpedImageData(float intensity, int datatype);
-};
diff --git a/reg-lib/cuda/CUDAContextSingletton.cpp b/reg-lib/cuda/CUDAContextSingletton.cpp
deleted file mode 100644
index 7764bac8..00000000
--- a/reg-lib/cuda/CUDAContextSingletton.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-#include "CUDAContextSingletton.h"
-#include "_reg_common_cuda.h"
-
-/* *************************************************************** */
-CUDAContextSingletton::CUDAContextSingletton()
-{
-   // The CUDA card is setup
-   cuInit(0);
-   int device_count=0;
-   cudaGetDeviceCount(&device_count);
-#ifndef NDEBUG
-   char text[255];
-   sprintf(text,"[NiftyReg CUDA] %i card(s) detected\n", device_count);
-   reg_print_msg_debug(text);
-#endif
-   this->cudaContext = NULL;
-   this->numDevices = device_count;
-   this->cudaIdx = 999;
-   pickCard(this->cudaIdx);
-}
-/* *************************************************************** */
-void CUDAContextSingletton::setCudaIdx(unsigned int cudaIdxIn)
-{
-   if (cudaIdxIn>=this->numDevices){
-      reg_print_msg_error("The specified cuda card id is not defined");
-      reg_print_msg_error("Run reg_gpuinfo to get the proper id");
-      reg_exit();
-   }
-   this->cudaIdx=cudaIdxIn;
-   NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
-   NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx))
-}
-/* *************************************************************** */
-CUcontext CUDAContextSingletton::getContext()
-{
-   return this->cudaContext;
-}
-/* *************************************************************** */
-void CUDAContextSingletton::pickCard(unsigned deviceId = 999)
-{
-    struct cudaDeviceProp deviceProp;
-    if(deviceId < this->numDevices) {
-      this->cudaIdx=deviceId;
-      //
-      NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
-      NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx));
-      //
-      cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
-      if(deviceProp.major > 1) {
-          this->isCardDoubleCapable = true;
-      }
-      else if(deviceProp.major == 1 && deviceProp.minor > 2) {
-          this->isCardDoubleCapable = true;
-      } else {
-          this->isCardDoubleCapable = false;
-      }
-      //
-      return;
-    }
-
-   // following code is from cutGetMaxGflopsDeviceId()
-   int max_gflops_device = 0;
-   int max_gflops = 0;
-   unsigned int current_device = 0;
-   while(current_device<this->numDevices ){
-      cudaGetDeviceProperties( &deviceProp, current_device );
-      int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
-      if( gflops > max_gflops ){
-         max_gflops = gflops;
-         max_gflops_device = current_device;
-      }
-      ++current_device;
-   }
-   NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
-   NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device))
-   NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
-
-   if(deviceProp.major<1){
-      reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
-      reg_exit();
-   }
-   else{
-      size_t free=0;
-      size_t total=0;
-      cuMemGetInfo(&free, &total);
-      if(deviceProp.totalGlobalMem != total){
-         fprintf(stderr,"[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n",
-                 deviceProp.name);
-         fprintf(stderr,"[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n",
-                 deviceProp.totalGlobalMem/(1024*1024), total/(1024*1024));
-         reg_exit();
-      }
-#ifndef NDEBUG
-      printf("[NiftyReg CUDA] The following device is used: %s\n",
-             deviceProp.name);
-      printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-             (unsigned long int)(free/(1024*1024)),
-             (unsigned long int)(total/(1024*1024)));
-      printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
-             deviceProp.major,
-             deviceProp.minor);
-      printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
-             deviceProp.sharedMemPerBlock);
-      printf("[NiftyReg CUDA] CUDA version %i\n",
-             CUDART_VERSION);
-      printf("[NiftyReg CUDA] Card clock rate: %i MHz\n",
-             deviceProp.clockRate/1000);
-      printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
-             deviceProp.multiProcessorCount);
-#endif
-      this->cudaIdx = max_gflops_device;
-      //
-      cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
-      if(deviceProp.major > 1) {
-          this->isCardDoubleCapable = true;
-      }
-      else if(deviceProp.major == 1 && deviceProp.minor > 2) {
-          this->isCardDoubleCapable = true;
-      } else {
-          this->isCardDoubleCapable = false;
-      }
-      //
-   }
-}
-/* *************************************************************** */
-bool CUDAContextSingletton::getIsCardDoubleCapable()
-{
-    return this->isCardDoubleCapable;
-}
-/* *************************************************************** */
-CUDAContextSingletton::~CUDAContextSingletton()
-{
-   cuCtxDestroy(this->cudaContext);
-}
diff --git a/reg-lib/cuda/CUDAContextSingletton.h b/reg-lib/cuda/CUDAContextSingletton.h
deleted file mode 100644
index d965b463..00000000
--- a/reg-lib/cuda/CUDAContextSingletton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef CUDACONTEXTSINGLETTON_H
-#define CUDACONTEXTSINGLETTON_H
-
-#include "_reg_maths.h"
-#include "cuda.h"
-
-class CUDAContextSingletton
-{
-    public:
-        static CUDAContextSingletton& Instance() {
-            static CUDAContextSingletton instance; // Guaranteed to be destroyed.
-            // Instantiated on first use.
-            return instance;
-        }
-        void setCudaIdx(unsigned int cudaIdxIn);
-        void pickCard(unsigned deviceId);
-
-        CUcontext getContext();
-
-        bool getIsCardDoubleCapable();
-
-     private:
-
-        static CUDAContextSingletton* _instance;
-
-        CUDAContextSingletton();
-        ~CUDAContextSingletton();
-
-        CUDAContextSingletton(CUDAContextSingletton const&);// Don't Implement
-        void operator=(CUDAContextSingletton const&); // Don't implement
-
-        bool isCardDoubleCapable;
-        CUcontext cudaContext;
-        unsigned numDevices;
-        unsigned cudaIdx;
-};
-
-#endif // CUDACONTEXTSINGLETTON_H
diff --git a/reg-lib/cuda/CUDAConvolutionKernel.h b/reg-lib/cuda/CUDAConvolutionKernel.h
deleted file mode 100644
index b8ca24dd..00000000
--- a/reg-lib/cuda/CUDAConvolutionKernel.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef CUDACONVOLUTIONKERNEL_H
-#define CUDACONVOLUTIONKERNEL_H
-
-#include "ConvolutionKernel.h"
-#include "CUDAContextSingletton.h"
-
-//a kernel function for convolution (gaussian smoothing?)
-class CUDAConvolutionKernel: public ConvolutionKernel
-{
-public:
-
-    CUDAConvolutionKernel(std::string name);
-    void calculate(nifti_image *image,
-                        float *sigma,
-                        int kernelType,
-                        int *mask = NULL,
-                        bool *timePoints = NULL,
-                        bool *axis = NULL);
-
-    private:
-       //CUDAContextSingletton * cudaSContext;
-
-};
-
-#endif // CUDACONVOLUTIONKERNEL_H
diff --git a/reg-lib/cuda/CUDAKernelFactory.cpp b/reg-lib/cuda/CUDAKernelFactory.cpp
deleted file mode 100755
index 7c983420..00000000
--- a/reg-lib/cuda/CUDAKernelFactory.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "CUDAKernelFactory.h"
-#include "CUDAAffineDeformationFieldKernel.h"
-#include "CUDAConvolutionKernel.h"
-#include "CUDABlockMatchingKernel.h"
-#include "CUDAResampleImageKernel.h"
-#include "CUDAOptimiseKernel.h"
-#include "AladinContent.h"
-
-Kernel *CUDAKernelFactory::produceKernel(std::string name,  AladinContent *con) const {
-    if( name == AffineDeformationFieldKernel::getName() ) return new CUDAAffineDeformationFieldKernel(con, name);
-    else if( name == ConvolutionKernel::getName() ) return new CUDAConvolutionKernel(name);
-    else if( name == BlockMatchingKernel::getName() ) return new CUDABlockMatchingKernel( con, name);
-    else if( name == ResampleImageKernel::getName() ) return new CUDAResampleImageKernel(con, name);
-    else if( name == OptimiseKernel::getName() ) return new CUDAOptimiseKernel(con, name);
-	else return NULL;
-}
diff --git a/reg-lib/cuda/CUDAKernelFactory.h b/reg-lib/cuda/CUDAKernelFactory.h
deleted file mode 100755
index acf9f368..00000000
--- a/reg-lib/cuda/CUDAKernelFactory.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-#include "KernelFactory.h"
-#include "AladinContent.h"
-
-class CUDAKernelFactory : public KernelFactory
-{
-public:
-	Kernel *produceKernel(std::string name, AladinContent *con) const;
-};
-
diff --git a/reg-lib/cuda/CUDAOptimiseKernel.h b/reg-lib/cuda/CUDAOptimiseKernel.h
deleted file mode 100644
index 6fbe25b8..00000000
--- a/reg-lib/cuda/CUDAOptimiseKernel.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef CUDAOPTIMISEKERNEL_H
-#define CUDAOPTIMISEKERNEL_H
-
-#include "OptimiseKernel.h"
-#include "CUDAAladinContent.h"
-
-//kernel functions for numerical optimisation
-class CUDAOptimiseKernel: public OptimiseKernel
-{
-public:
-    CUDAOptimiseKernel(AladinContent *conIn, std::string name);
-    void calculate(bool affine);
-
-private:
-    _reg_blockMatchingParam *blockMatchingParams;
-    mat44 *transformationMatrix;
-    CudaAladinContent *con;
-
-//    float* AR_d; // Removed until CUDA SVD is added back
-//    float* U_d; // Removed until CUDA SVD is added back
-//    float* Sigma_d; // Removed until CUDA SVD is added back
-//    float* VT_d; // Removed until CUDA SVD is added back
-//    float* lengths_d; // Removed until CUDA SVD is added back
-//    float* newWarpedPos_d; // Removed until CUDA SVD is added back
-
-};
-
-#endif // CUDAOPTIMISEKERNEL_H
diff --git a/reg-lib/cuda/CUDAPlatform.h b/reg-lib/cuda/CUDAPlatform.h
deleted file mode 100755
index afcc9e0b..00000000
--- a/reg-lib/cuda/CUDAPlatform.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef CudaPLATFORM_H_
-#define CudaPLATFORM_H_
-
-#include "Content.h"
-#include "Platform.h"
-
-class CudaPlatform : public Platform
-{
-public:
-	CudaPlatform();
-
-	std::string getName(){ return "cuda_platform"; }
-
-};
-#endif //CudaPLATFORM_H_
diff --git a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
similarity index 55%
rename from reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp
rename to reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
index 3e42740e..3b3a572c 100644
--- a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
@@ -1,26 +1,26 @@
-#include "CUDAAffineDeformationFieldKernel.h"
+#include "CudaAffineDeformationFieldKernel.h"
 #include "affineDeformationKernel.h"
 
 /* *************************************************************** */
-CUDAAffineDeformationFieldKernel::CUDAAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
+CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
    AffineDeformationFieldKernel(nameIn)
 {
    con = static_cast<CudaAladinContent*>(conIn);
 
    //get necessary cpu ptrs
-   this->deformationFieldImage = con->AladinContent::getCurrentDeformationField();
-   this->affineTransformation = con->AladinContent::getTransformationMatrix();
+   this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
+   this->affineTransformation = con->AladinContent::GetTransformationMatrix();
 
    //get necessary cuda ptrs
-   mask_d = con->getMask_d();
-   deformationFieldArray_d = con->getDeformationFieldArray_d();
-   transformationMatrix_d = con->getTransformationMatrix_d();
+   mask_d = con->GetMask_d();
+   deformationFieldArray_d = con->GetDeformationFieldArray_d();
+   transformationMatrix_d = con->GetTransformationMatrix_d();
 
-   //cudaSContext = &CUDAContextSingletton::Instance();
-   //cudaContext = cudaSContext->getContext();
+   //cudaSContext = &CudaContextSingleton::Instance();
+   //cudaContext = cudaSContext->GetContext();
 }
 /* *************************************************************** */
-void CUDAAffineDeformationFieldKernel::calculate(bool compose)
+void CudaAffineDeformationFieldKernel::Calculate(bool compose)
 {
    launchAffine(this->affineTransformation,
                 this->deformationFieldImage,
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
new file mode 100644
index 00000000..01614cff
--- /dev/null
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "AffineDeformationFieldKernel.h"
+#include "CudaAladinContent.h"
+
+//Kernel functions for affine deformation field
+class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel
+{
+public:
+    CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn);
+    void Calculate(bool compose = false);
+private:
+    mat44 *affineTransformation;
+    nifti_image *deformationFieldImage;
+
+    float *deformationFieldArray_d, *transformationMatrix_d;
+    int *mask_d;
+
+    CudaAladinContent *con;
+
+    //CudaContextSingleton *cudaSContext;
+    //CUContext cudaContext;
+};
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
new file mode 100644
index 00000000..b5ddab6c
--- /dev/null
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -0,0 +1,525 @@
+#include "CudaAladinContent.h"
+#include "_reg_common_cuda.h"
+#include "_reg_tools.h"
+#include <algorithm>
+
+/* *************************************************************** */
+CudaAladinContent::CudaAladinContent() {
+    InitVars();
+    AllocateCuPtrs();
+}
+/* *************************************************************** */
+CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
+                                     nifti_image *currentFloatingIn,
+                                     int *currentReferenceMaskIn,
+                                     size_t byte,
+                                     const unsigned int blockPercentage,
+                                     const unsigned int inlierLts,
+                                     int blockStep) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  sizeof(float), // forcing float for CUDA
+                  blockPercentage,
+                  inlierLts,
+                  blockStep) {
+    if (byte != sizeof(float)) {
+        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
+        reg_print_msg_warn("Datatype has been forced to float");
+    }
+    InitVars();
+    AllocateCuPtrs();
+
+}
+/* *************************************************************** */
+CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
+                                     nifti_image *currentFloatingIn,
+                                     int *currentReferenceMaskIn,
+                                     size_t byte) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  sizeof(float)) // forcing float for CUDA
+{
+    if (byte != sizeof(float)) {
+        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
+        reg_print_msg_warn("Datatype has been forced to float");
+    }
+    InitVars();
+    AllocateCuPtrs();
+}
+/* *************************************************************** */
+CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
+                                     nifti_image *currentFloatingIn,
+                                     int *currentReferenceMaskIn,
+                                     mat44 *transMat,
+                                     size_t byte,
+                                     const unsigned int blockPercentage,
+                                     const unsigned int inlierLts,
+                                     int blockStep) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  transMat,
+                  sizeof(float), // forcing float for CUDA
+                  blockPercentage,
+                  inlierLts,
+                  blockStep) {
+    if (byte != sizeof(float)) {
+        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
+        reg_print_msg_warn("Datatype has been forced to float");
+    }
+    InitVars();
+    AllocateCuPtrs();
+}
+/* *************************************************************** */
+CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
+                                     nifti_image *currentFloatingIn,
+                                     int *currentReferenceMaskIn,
+                                     mat44 *transMat,
+                                     size_t byte) :
+    AladinContent(currentReferenceIn,
+                  currentFloatingIn,
+                  currentReferenceMaskIn,
+                  transMat,
+                  sizeof(float)) // forcing float for CUDA
+{
+    if (byte != sizeof(float)) {
+        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
+        reg_print_msg_warn("Datatype has been forced to float");
+    }
+    InitVars();
+    AllocateCuPtrs();
+}
+/* *************************************************************** */
+CudaAladinContent::~CudaAladinContent() {
+    FreeCuPtrs();
+}
+/* *************************************************************** */
+void CudaAladinContent::InitVars() {
+    this->referenceImageArray_d = 0;
+    this->floatingImageArray_d = 0;
+    this->warpedImageArray_d = 0;
+    this->deformationFieldArray_d = 0;
+    this->referencePosition_d = 0;
+    this->warpedPosition_d = 0;
+    this->totalBlock_d = 0;
+    this->mask_d = 0;
+    this->floIJKMat_d = 0;
+
+    if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(this->currentReference);
+    if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(this->currentFloating);
+        if (this->currentWarped != nullptr)
+            reg_tools_changeDatatype<float>(this->currentWarped);
+    }
+
+    this->cudaSContext = &CudaContextSingleton::Instance();
+    this->cudaContext = this->cudaSContext->GetContext();
+
+    //this->numBlocks = (this->blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
+}
+/* *************************************************************** */
+void CudaAladinContent::AllocateCuPtrs() {
+    if (this->transformationMatrix != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
+
+        float *tmpMat_h = (float*)malloc(16 * sizeof(float));
+        mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+
+        free(tmpMat_h);
+    }
+    if (this->currentReferenceMask != nullptr) {
+        cudaCommon_allocateArrayToDevice<int>(&mask_d, currentReference->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, this->currentReferenceMask, currentReference->nvox);
+    }
+    if (this->currentReference != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, currentReference->nvox);
+        cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, 16);
+
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, this->currentReference);
+
+        float* targetMat = (float *)malloc(16 * sizeof(float)); //freed
+        mat44ToCptr(this->refMatrix_xyz, targetMat);
+        cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, 16);
+        free(targetMat);
+    }
+    if (this->currentWarped != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, this->currentWarped->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->currentWarped);
+    }
+    if (this->currentDeformationField != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->currentDeformationField->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->currentDeformationField);
+    }
+    if (this->currentFloating != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, this->currentFloating->nvox);
+        cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, 16);
+
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, this->currentFloating);
+
+        float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float));
+        mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+        free(sourceIJKMatrix_h);
+    }
+
+    if (this->blockMatchingParams != nullptr) {
+        if (this->blockMatchingParams->referencePosition != nullptr) {
+            cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        }
+        if (this->blockMatchingParams->warpedPosition != nullptr) {
+            cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        }
+        if (this->blockMatchingParams->totalBlock != nullptr) {
+            cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
+            cudaCommon_transferFromDeviceToNiftiSimple1<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+        }
+        /* // Removed until CUDA SVD is added back
+        if (this->blockMatchingParams->activeBlockNumber > 0 ) {
+           unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
+           unsigned int n = 0;
+
+           if (this->blockMatchingParams->dim == 2) {
+              n = 6;
+           }
+           else {
+              n = 12;
+           }
+
+           cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
+           cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
+           cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
+           cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
+           cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
+           cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        }
+        */
+    }
+}
+/* *************************************************************** */
+nifti_image* CudaAladinContent::GetCurrentWarped(int type) {
+    DownloadImage(currentWarped, warpedImageArray_d, type);
+    return currentWarped;
+}
+/* *************************************************************** */
+nifti_image* CudaAladinContent::GetCurrentDeformationField() {
+
+    cudaCommon_transferFromDeviceToCpu<float>((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox);
+    return currentDeformationField;
+}
+/* *************************************************************** */
+_reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
+
+    cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+    cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+    return this->blockMatchingParams;
+}
+/* *************************************************************** */
+void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
+    if (this->transformationMatrix != nullptr)
+        cudaCommon_free<float>(&transformationMatrix_d);
+
+    AladinContent::SetTransformationMatrix(transformationMatrixIn);
+    float *tmpMat_h = (float*)malloc(16 * sizeof(float));
+    mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
+
+    cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+    free(tmpMat_h);
+}
+/* *************************************************************** */
+void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
+    if (this->currentDeformationField != nullptr)
+        cudaCommon_free<float>(&deformationFieldArray_d);
+    AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
+
+    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->currentDeformationField->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->currentDeformationField);
+}
+/* *************************************************************** */
+void CudaAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) {
+    if (this->currentReferenceMask != nullptr)
+        cudaCommon_free<int>(&mask_d);
+    this->currentReferenceMask = maskIn;
+    cudaCommon_allocateArrayToDevice<int>(&mask_d, nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, maskIn, nvox);
+}
+/* *************************************************************** */
+void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
+    if (this->currentWarped != nullptr)
+        cudaCommon_free<float>(&warpedImageArray_d);
+    AladinContent::SetCurrentWarped(currentWarped);
+    reg_tools_changeDatatype<float>(this->currentWarped);
+
+    cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, currentWarped->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->currentWarped);
+}
+/* *************************************************************** */
+void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
+    AladinContent::SetBlockMatchingParams(bmp);
+    if (this->blockMatchingParams->referencePosition != nullptr) {
+        cudaCommon_free<float>(&referencePosition_d);
+        //referencePosition
+        cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+    }
+    if (this->blockMatchingParams->warpedPosition != nullptr) {
+        cudaCommon_free<float>(&warpedPosition_d);
+        //warpedPosition
+        cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+    }
+    if (this->blockMatchingParams->totalBlock != nullptr) {
+        cudaCommon_free<int>(&totalBlock_d);
+        //activeBlock
+        cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlockNumber);
+        cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber);
+    }
+    /* // Removed until CUDA SVD is added back
+     if (this->blockMatchingParams->activeBlockNumber > 0) {
+         unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
+         unsigned int n = 0;
+
+         if (this->blockMatchingParams->dim == 2) {
+             n = 6;
+         }
+         else {
+             n = 12;
+         }
+
+         cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
+         cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
+         cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
+         cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
+         cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
+         cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+     }
+     */
+}
+/* *************************************************************** */
+template<class DataType>
+DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return static_cast<float>(intensity);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return static_cast<double>(intensity);
+        break;
+    case NIFTI_TYPE_UINT8:
+        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT16:
+        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT32:
+        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    default:
+        return static_cast<DataType>(reg_round(intensity));
+        break;
+    }
+}
+/* *************************************************************** */
+template<class T>
+void CudaAladinContent::FillImageData(nifti_image *image,
+                                      float* memoryObject,
+                                      int type) {
+    size_t size = image->nvox;
+    float* buffer = nullptr;
+    buffer = (float*)malloc(size * sizeof(float));
+
+    if (buffer == nullptr) {
+        reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
+    }
+
+    cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
+
+    free(image->data);
+    image->datatype = type;
+    image->nbyper = sizeof(T);
+    image->data = (void *)malloc(image->nvox * image->nbyper);
+    T* dataT = static_cast<T*>(image->data);
+    for (size_t i = 0; i < size; ++i)
+        dataT[i] = FillWarpedImageData<T>(buffer[i], type);
+    free(buffer);
+}
+/* *************************************************************** */
+void CudaAladinContent::DownloadImage(nifti_image *image,
+                                      float* memoryObject,
+                                      int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        FillImageData<float>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        FillImageData<double>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT8:
+        FillImageData<unsigned char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT8:
+        FillImageData<char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT16:
+        FillImageData<unsigned short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT16:
+        FillImageData<short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT32:
+        FillImageData<unsigned int>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT32:
+        FillImageData<int>(image, memoryObject, datatype);
+        break;
+    default:
+        std::cout << "CUDA: unsupported type" << std::endl;
+        break;
+    }
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetReferenceImageArray_d() {
+    return referenceImageArray_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetFloatingImageArray_d() {
+    return floatingImageArray_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetWarpedImageArray_d() {
+    return warpedImageArray_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetTransformationMatrix_d() {
+    return transformationMatrix_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetReferencePosition_d() {
+    return referencePosition_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetWarpedPosition_d() {
+    return warpedPosition_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetDeformationFieldArray_d() {
+    return deformationFieldArray_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetReferenceMat_d() {
+    return referenceMat_d;
+}
+/* *************************************************************** */
+float* CudaAladinContent::GetFloIJKMat_d() {
+    return floIJKMat_d;
+}
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetAR_d()
+{
+   return AR_d;
+}
+*/
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetU_d()
+{
+   return U_d;
+}
+*/
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetVT_d()
+{
+   return VT_d;
+}
+*/
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetSigma_d()
+{
+   return Sigma_d;
+}
+*/
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetLengths_d()
+{
+   return lengths_d;
+}
+*/
+/* *************************************************************** */
+/* // Removed until CUDA SVD is added back
+float* CudaAladinContent::GetNewWarpedPos_d()
+{
+   return newWarpedPos_d;
+}
+*/
+/* *************************************************************** */
+int* CudaAladinContent::GetTotalBlock_d() {
+    return totalBlock_d;
+}
+/* *************************************************************** */
+int* CudaAladinContent::GetMask_d() {
+    return mask_d;
+}
+/* *************************************************************** */
+int* CudaAladinContent::GetReferenceDims() {
+    return referenceDims;
+}
+/* *************************************************************** */
+int* CudaAladinContent::GetFloatingDims() {
+    return floatingDims;
+}
+/* *************************************************************** */
+void CudaAladinContent::FreeCuPtrs() {
+    if (this->transformationMatrix != nullptr)
+        cudaCommon_free<float>(&transformationMatrix_d);
+
+    if (this->currentReference != nullptr) {
+        cudaCommon_free<float>(&referenceImageArray_d);
+        cudaCommon_free<float>(&referenceMat_d);
+    }
+
+    if (this->currentFloating != nullptr) {
+        cudaCommon_free<float>(&floatingImageArray_d);
+        cudaCommon_free<float>(&floIJKMat_d);
+    }
+
+    if (this->currentWarped != nullptr)
+        cudaCommon_free<float>(&warpedImageArray_d);
+
+    if (this->currentDeformationField != nullptr)
+        cudaCommon_free<float>(&deformationFieldArray_d);
+
+    if (this->currentReferenceMask != nullptr)
+        cudaCommon_free<int>(&mask_d);
+
+    if (this->blockMatchingParams != nullptr) {
+        cudaCommon_free<int>(&totalBlock_d);
+        cudaCommon_free<float>(&referencePosition_d);
+        cudaCommon_free<float>(&warpedPosition_d);
+        /*
+        cudaCommon_free<float>(&AR_d);
+        cudaCommon_free<float>(&U_d);
+        cudaCommon_free<float>(&VT_d);
+        cudaCommon_free<float>(&Sigma_d);
+        cudaCommon_free<float>(&lengths_d);
+        cudaCommon_free<float>(&newWarpedPos_d);
+        */
+    }
+}
+/* *************************************************************** */
+bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
+    return this->cudaSContext->GetIsCardDoubleCapable();
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
new file mode 100644
index 00000000..e3d76732
--- /dev/null
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include "AladinContent.h"
+#include "CudaContextSingleton.h"
+
+#include "_reg_tools.h"
+
+class CudaAladinContent: public AladinContent {
+public:
+    CudaAladinContent();
+    CudaAladinContent(nifti_image *currentReferenceIn,
+                      nifti_image *currentFloatingIn,
+                      int *currentReferenceMaskIn,
+                      size_t byte,
+                      const unsigned int blockPercentage,
+                      const unsigned int inlierLts,
+                      int blockStep);
+    CudaAladinContent(nifti_image *currentReferenceIn,
+                      nifti_image *currentFloatingIn,
+                      int *currentReferenceMaskIn,
+                      size_t byte);
+    CudaAladinContent(nifti_image *currentReferenceIn,
+                      nifti_image *currentFloatingIn,
+                      int *currentReferenceMaskIn,
+                      mat44 *transMat,
+                      size_t byte,
+                      const unsigned int blockPercentage,
+                      const unsigned int inlierLts,
+                      int blockStep);
+    CudaAladinContent(nifti_image *currentReferenceIn,
+                      nifti_image *currentFloatingIn,
+                      int *currentReferenceMaskIn,
+                      mat44 *transMat,
+                      size_t byte);
+    ~CudaAladinContent();
+
+    bool IsCurrentComputationDoubleCapable();
+
+    //device getters
+    float* GetReferenceImageArray_d();
+    float* GetFloatingImageArray_d();
+    float* GetWarpedImageArray_d();
+    float* GetTransformationMatrix_d();
+    float* GetReferencePosition_d();
+    float* GetWarpedPosition_d();
+    float* GetDeformationFieldArray_d();
+    float* GetReferenceMat_d();
+    float* GetFloIJKMat_d();
+
+    //	float* GetAR_d(); // Removed until CUDA SVD is added back
+    //	float* GetU_d(); // Removed until CUDA SVD is added back
+    //	float* GetVT_d(); // Removed until CUDA SVD is added back
+    //	float* GetSigma_d(); // Removed until CUDA SVD is added back
+    //	float* GetLengths_d(); // Removed until CUDA SVD is added back
+    //	float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
+
+    int* GetTotalBlock_d();
+    int* GetMask_d();
+
+    int* GetReferenceDims();
+    int* GetFloatingDims();
+
+    //cpu getters and setters
+    _reg_blockMatchingParam* GetBlockMatchingParams();
+    nifti_image* GetCurrentDeformationField();
+    nifti_image* GetCurrentWarped(int typ);
+
+    void SetTransformationMatrix(mat44 *transformationMatrixIn);
+    void SetCurrentWarped(nifti_image *warpedImageIn);
+    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn);
+    void SetCurrentReferenceMask(int *maskIn, size_t size);
+    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp);
+
+private:
+    void InitVars();
+
+    void AllocateCuPtrs();
+    void FreeCuPtrs();
+
+    CudaContextSingleton *cudaSContext;
+    CUcontext cudaContext;
+
+    float *referenceImageArray_d;
+    float *floatingImageArray_d;
+    float *warpedImageArray_d;
+    float *deformationFieldArray_d;
+    float *referencePosition_d;
+    float *warpedPosition_d;
+    int   *totalBlock_d, *mask_d;
+
+    float *transformationMatrix_d;
+    float *referenceMat_d;
+    float *floIJKMat_d;
+
+    //svd
+    //	float *AR_d;//A and then pseudoinverse  // Removed until CUDA SVD is added back
+    //	float *U_d; // Removed until CUDA SVD is added back
+    //	float *VT_d; // Removed until CUDA SVD is added back
+    //	float *Sigma_d; // Removed until CUDA SVD is added back
+    //	float *lengths_d; // Removed until CUDA SVD is added back
+    //	float *newWarpedPos_d; // Removed until CUDA SVD is added back
+
+    int referenceDims[4];
+    int floatingDims[4];
+
+    void DownloadImage(nifti_image *image, float* memoryObject, int datatype);
+    template<class T>
+    void FillImageData(nifti_image *image, float* memoryObject, int type);
+
+    template<class FloatingTYPE>
+    FloatingTYPE FillWarpedImageData(float intensity, int datatype);
+};
diff --git a/reg-lib/cuda/CUDABlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
similarity index 58%
rename from reg-lib/cuda/CUDABlockMatchingKernel.cpp
rename to reg-lib/cuda/CudaBlockMatchingKernel.cpp
index 3a1af0f4..2ef0a629 100644
--- a/reg-lib/cuda/CUDABlockMatchingKernel.cpp
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
@@ -1,28 +1,28 @@
-#include "CUDABlockMatchingKernel.h"
+#include "CudaBlockMatchingKernel.h"
 #include "blockMatchingKernel.h"
 
 /* *************************************************************** */
-CUDABlockMatchingKernel::CUDABlockMatchingKernel(AladinContent *conIn, std::string name) :
+CudaBlockMatchingKernel::CudaBlockMatchingKernel(AladinContent *conIn, std::string name) :
    BlockMatchingKernel(name)
 {
    //get CudaAladinContent ptr
    con = static_cast<CudaAladinContent*>(conIn);
 
    //get cpu ptrs
-   reference = con->AladinContent::getCurrentReference();
-   params = con->AladinContent::getBlockMatchingParams();
+   reference = con->AladinContent::GetCurrentReference();
+   params = con->AladinContent::GetBlockMatchingParams();
 
    //get cuda ptrs
-   referenceImageArray_d = con->getReferenceImageArray_d();
-   warpedImageArray_d = con->getWarpedImageArray_d();
-   referencePosition_d = con->getReferencePosition_d();
-   warpedPosition_d = con->getWarpedPosition_d();
-   totalBlock_d = con->getTotalBlock_d();
-   mask_d = con->getMask_d();
-   referenceMat_d = con->getReferenceMat_d();
+   referenceImageArray_d = con->GetReferenceImageArray_d();
+   warpedImageArray_d = con->GetWarpedImageArray_d();
+   referencePosition_d = con->GetReferencePosition_d();
+   warpedPosition_d = con->GetWarpedPosition_d();
+   totalBlock_d = con->GetTotalBlock_d();
+   mask_d = con->GetMask_d();
+   referenceMat_d = con->GetReferenceMat_d();
 }
 /* *************************************************************** */
-void CUDABlockMatchingKernel::calculate()
+void CudaBlockMatchingKernel::Calculate()
 {
    block_matching_method_gpu(reference,
                              params,
diff --git a/reg-lib/cuda/CUDABlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h
similarity index 54%
rename from reg-lib/cuda/CUDABlockMatchingKernel.h
rename to reg-lib/cuda/CudaBlockMatchingKernel.h
index dd1acdc4..797c499a 100644
--- a/reg-lib/cuda/CUDABlockMatchingKernel.h
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.h
@@ -1,20 +1,19 @@
-#ifndef CUDABLOCKMATCHINGKERNEL_H
-#define CUDABLOCKMATCHINGKERNEL_H
+#pragma once
 
 #include "../BlockMatchingKernel.h"
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 
 //Kernel functions for block matching
-class CUDABlockMatchingKernel : public BlockMatchingKernel {
+class CudaBlockMatchingKernel : public BlockMatchingKernel {
 public:
 
-    CUDABlockMatchingKernel(AladinContent *conIn, std::string name);
-    void calculate();
+    CudaBlockMatchingKernel(AladinContent *conIn, std::string name);
+    void Calculate();
 private:
     nifti_image *reference;
     _reg_blockMatchingParam* params;
 
-    //CUDAContextSingletton *cudaSContext;
+    //CudaContextSingleton *cudaSContext;
     //CUContext *cudaContext;
 
     CudaAladinContent *con;
@@ -24,5 +23,3 @@ class CUDABlockMatchingKernel : public BlockMatchingKernel {
     int   *totalBlock_d, *mask_d;
 
 };
-
-#endif // CUDABLOCKMATCHINGKERNEL_H
diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp
new file mode 100644
index 00000000..d3c0c165
--- /dev/null
+++ b/reg-lib/cuda/CudaContextSingleton.cpp
@@ -0,0 +1,125 @@
+#include "CudaContextSingleton.h"
+#include "_reg_common_cuda.h"
+
+/* *************************************************************** */
+CudaContextSingleton::CudaContextSingleton() {
+    // The CUDA card is setup
+    cuInit(0);
+    int device_count = 0;
+    cudaGetDeviceCount(&device_count);
+#ifndef NDEBUG
+    char text[255];
+    sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count);
+    reg_print_msg_debug(text);
+#endif
+    this->cudaContext = nullptr;
+    this->numDevices = device_count;
+    this->cudaIdx = 999;
+    PickCard(this->cudaIdx);
+}
+/* *************************************************************** */
+void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) {
+    if (cudaIdxIn >= this->numDevices) {
+        reg_print_msg_error("The specified cuda card id is not defined");
+        reg_print_msg_error("Run reg_gpuinfo to get the proper id");
+        reg_exit();
+    }
+    this->cudaIdx = cudaIdxIn;
+    NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
+    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx))
+}
+/* *************************************************************** */
+CUcontext CudaContextSingleton::GetContext() {
+    return this->cudaContext;
+}
+/* *************************************************************** */
+void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
+    struct cudaDeviceProp deviceProp;
+    if (deviceId < this->numDevices) {
+        this->cudaIdx = deviceId;
+        //
+        NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
+        NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx));
+        //
+        cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
+        if (deviceProp.major > 1) {
+            this->isCardDoubleCapable = true;
+        } else if (deviceProp.major == 1 && deviceProp.minor > 2) {
+            this->isCardDoubleCapable = true;
+        } else {
+            this->isCardDoubleCapable = false;
+        }
+        //
+        return;
+    }
+
+    // following code is from cutGetMaxGflopsDeviceId()
+    int max_gflops_device = 0;
+    int max_gflops = 0;
+    unsigned int current_device = 0;
+    while (current_device < this->numDevices) {
+        cudaGetDeviceProperties(&deviceProp, current_device);
+        int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
+        if (gflops > max_gflops) {
+            max_gflops = gflops;
+            max_gflops_device = current_device;
+        }
+        ++current_device;
+    }
+    NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
+    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device))
+        NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
+
+    if (deviceProp.major < 1) {
+        reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
+        reg_exit();
+    } else {
+        size_t free = 0;
+        size_t total = 0;
+        cuMemGetInfo(&free, &total);
+        if (deviceProp.totalGlobalMem != total) {
+            fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n",
+                    deviceProp.name);
+            fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n",
+                    deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024));
+            reg_exit();
+        }
+#ifndef NDEBUG
+        printf("[NiftyReg CUDA] The following device is used: %s\n",
+               deviceProp.name);
+        printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
+               (unsigned long int)(free / (1024 * 1024)),
+               (unsigned long int)(total / (1024 * 1024)));
+        printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
+               deviceProp.major,
+               deviceProp.minor);
+        printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
+               deviceProp.sharedMemPerBlock);
+        printf("[NiftyReg CUDA] CUDA version %i\n",
+               CUDART_VERSION);
+        printf("[NiftyReg CUDA] Card clock rate: %i MHz\n",
+               deviceProp.clockRate / 1000);
+        printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
+               deviceProp.multiProcessorCount);
+#endif
+        this->cudaIdx = max_gflops_device;
+        //
+        cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
+        if (deviceProp.major > 1) {
+            this->isCardDoubleCapable = true;
+        } else if (deviceProp.major == 1 && deviceProp.minor > 2) {
+            this->isCardDoubleCapable = true;
+        } else {
+            this->isCardDoubleCapable = false;
+        }
+        //
+    }
+}
+/* *************************************************************** */
+bool CudaContextSingleton::GetIsCardDoubleCapable() {
+    return this->isCardDoubleCapable;
+}
+/* *************************************************************** */
+CudaContextSingleton::~CudaContextSingleton() {
+    cuCtxDestroy(this->cudaContext);
+}
diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h
new file mode 100644
index 00000000..f9b0351e
--- /dev/null
+++ b/reg-lib/cuda/CudaContextSingleton.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "_reg_maths.h"
+#include "cuda.h"
+
+class CudaContextSingleton {
+public:
+    static CudaContextSingleton& Instance() {
+        static CudaContextSingleton instance; // Guaranteed to be destroyed.
+        // Instantiated on first use.
+        return instance;
+    }
+    void SetCudaIdx(unsigned int cudaIdxIn);
+    void PickCard(unsigned deviceId);
+
+    CUcontext GetContext();
+
+    bool GetIsCardDoubleCapable();
+
+private:
+
+    static CudaContextSingleton* _instance;
+
+    CudaContextSingleton();
+    ~CudaContextSingleton();
+
+    CudaContextSingleton(CudaContextSingleton const&);// Don't Implement
+    void operator=(CudaContextSingleton const&); // Don't implement
+
+    bool isCardDoubleCapable;
+    CUcontext cudaContext;
+    unsigned numDevices;
+    unsigned cudaIdx;
+};
diff --git a/reg-lib/cuda/CUDAConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp
similarity index 76%
rename from reg-lib/cuda/CUDAConvolutionKernel.cpp
rename to reg-lib/cuda/CudaConvolutionKernel.cpp
index f3dad63c..78d51165 100644
--- a/reg-lib/cuda/CUDAConvolutionKernel.cpp
+++ b/reg-lib/cuda/CudaConvolutionKernel.cpp
@@ -1,12 +1,12 @@
-#include "CUDAConvolutionKernel.h"
+#include "CudaConvolutionKernel.h"
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-CUDAConvolutionKernel::CUDAConvolutionKernel(std::string name) : ConvolutionKernel(name) {
-    //cudaSContext = &CUDAContextSingletton::Instance();
+CudaConvolutionKernel::CudaConvolutionKernel(std::string name) : ConvolutionKernel(name) {
+    //cudaSContext = &CudaContextSingleton::Instance();
 }
 /* *************************************************************** */
-void CUDAConvolutionKernel::calculate(nifti_image *image,
+void CudaConvolutionKernel::Calculate(nifti_image *image,
                                                   float *sigma,
                                                   int kernelType,
                                                   int *mask,
diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h
new file mode 100644
index 00000000..81697a96
--- /dev/null
+++ b/reg-lib/cuda/CudaConvolutionKernel.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "ConvolutionKernel.h"
+#include "CudaContextSingleton.h"
+
+//a kernel function for convolution (gaussian smoothing?)
+class CudaConvolutionKernel: public ConvolutionKernel
+{
+public:
+
+    CudaConvolutionKernel(std::string name);
+    void Calculate(nifti_image *image,
+                        float *sigma,
+                        int kernelType,
+                        int *mask = nullptr,
+                        bool *timePoints = nullptr,
+                        bool *axis = nullptr);
+
+    private:
+       //CudaContextSingleton * cudaSContext;
+
+};
diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp
new file mode 100644
index 00000000..a8b3e3ec
--- /dev/null
+++ b/reg-lib/cuda/CudaKernelFactory.cpp
@@ -0,0 +1,16 @@
+#include "CudaKernelFactory.h"
+#include "CudaAffineDeformationFieldKernel.h"
+#include "CudaConvolutionKernel.h"
+#include "CudaBlockMatchingKernel.h"
+#include "CudaResampleImageKernel.h"
+#include "CudaOptimiseKernel.h"
+#include "AladinContent.h"
+
+Kernel* CudaKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
+    if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con, name);
+    else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(name);
+    else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con, name);
+    else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con, name);
+    else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con, name);
+    else return nullptr;
+}
diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h
new file mode 100644
index 00000000..f2b6af17
--- /dev/null
+++ b/reg-lib/cuda/CudaKernelFactory.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "KernelFactory.h"
+#include "AladinContent.h"
+
+class CudaKernelFactory: public KernelFactory {
+public:
+	Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+};
diff --git a/reg-lib/cuda/CUDAOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp
similarity index 67%
rename from reg-lib/cuda/CUDAOptimiseKernel.cpp
rename to reg-lib/cuda/CudaOptimiseKernel.cpp
index 51a3bde0..b4ae8eab 100644
--- a/reg-lib/cuda/CUDAOptimiseKernel.cpp
+++ b/reg-lib/cuda/CudaOptimiseKernel.cpp
@@ -1,34 +1,34 @@
 #include "cuda_runtime.h"
 #include "cuda.h"
-#include "CUDAOptimiseKernel.h"
+#include "CudaOptimiseKernel.h"
 #include "optimizeKernel.h"
 
 /* *************************************************************** */
-CUDAOptimiseKernel::CUDAOptimiseKernel(AladinContent *conIn, std::string name) :
+CudaOptimiseKernel::CudaOptimiseKernel(AladinContent *conIn, std::string name) :
    OptimiseKernel(name)
 {
    //get CudaAladinContent ptr
    con = static_cast<CudaAladinContent*>(conIn);
 
-   //cudaSContext = &CUDAContextSingletton::Instance();
+   //cudaSContext = &CudaContextSingleton::Instance();
 
    //get cpu ptrs
-   transformationMatrix = con->AladinContent::getTransformationMatrix();
-   blockMatchingParams = con->AladinContent::getBlockMatchingParams();
+   transformationMatrix = con->AladinContent::GetTransformationMatrix();
+   blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
 
-//   transformationMatrix_d = con->getTransformationMatrix_d();
-//   AR_d = con->getAR_d(); // Removed until CUDA SVD is added back
-//   U_d = con->getU_d(); // Removed until CUDA SVD is added back
-//   Sigma_d = con->getSigma_d(); // Removed until CUDA SVD is added back
-//   VT_d = con->getVT_d(); // Removed until CUDA SVD is added back
-//   lengths_d = con->getLengths_d(); // Removed until CUDA SVD is added back
-//   referencePos_d = con->getReferencePosition_d();
-//   warpedPos_d = con->getWarpedPosition_d();
-//   newWarpedPos_d = con->getNewWarpedPos_d(); // Removed until CUDA SVD is added back
+//   transformationMatrix_d = con->GetTransformationMatrix_d();
+//   AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back
+//   U_d = con->GetU_d(); // Removed until CUDA SVD is added back
+//   Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back
+//   VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back
+//   lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back
+//   referencePos_d = con->GetReferencePosition_d();
+//   warpedPos_d = con->GetWarpedPosition_d();
+//   newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
 
 }
 /* *************************************************************** */
-void CUDAOptimiseKernel::calculate(bool affine) {
+void CudaOptimiseKernel::Calculate(bool affine) {
    /* // Removed until CUDA SVD is added back
 #if _WIN64 || __x86_64__ || __ppc64__
 
@@ -44,7 +44,7 @@ void CUDAOptimiseKernel::calculate(bool affine) {
 #endif
 
     if (*cudaRunTimeVersion < 7050) {
-        this->blockMatchingParams = con->getBlockMatchingParams();
+        this->blockMatchingParams = con->GetBlockMatchingParams();
         optimize(this->blockMatchingParams, transformationMatrix, affine);
     }
     else {
@@ -67,16 +67,16 @@ void CUDAOptimiseKernel::calculate(bool affine) {
                                    ils,
                                    affine);
         } else {
-            this->blockMatchingParams = con->getBlockMatchingParams();
+            this->blockMatchingParams = con->GetBlockMatchingParams();
             optimize(this->blockMatchingParams, transformationMatrix, affine);
         }
     }
 #else
-    this->blockMatchingParams = con->getBlockMatchingParams();
+    this->blockMatchingParams = con->GetBlockMatchingParams();
     optimize(this->blockMatchingParams, transformationMatrix, affine);
 #endif
 */
-   this->blockMatchingParams = con->getBlockMatchingParams();
+   this->blockMatchingParams = con->GetBlockMatchingParams();
    optimize(this->blockMatchingParams, transformationMatrix, affine);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaOptimiseKernel.h
new file mode 100644
index 00000000..29d31b92
--- /dev/null
+++ b/reg-lib/cuda/CudaOptimiseKernel.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "OptimiseKernel.h"
+#include "CudaAladinContent.h"
+
+//kernel functions for numerical optimisation
+class CudaOptimiseKernel: public OptimiseKernel
+{
+public:
+    CudaOptimiseKernel(AladinContent *conIn, std::string name);
+    void Calculate(bool affine);
+
+private:
+    _reg_blockMatchingParam *blockMatchingParams;
+    mat44 *transformationMatrix;
+    CudaAladinContent *con;
+
+//    float *AR_d; // Removed until CUDA SVD is added back
+//    float *U_d; // Removed until CUDA SVD is added back
+//    float *Sigma_d; // Removed until CUDA SVD is added back
+//    float *VT_d; // Removed until CUDA SVD is added back
+//    float *lengths_d; // Removed until CUDA SVD is added back
+//    float *newWarpedPos_d; // Removed until CUDA SVD is added back
+
+};
diff --git a/reg-lib/cuda/CUDAResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp
similarity index 75%
rename from reg-lib/cuda/CUDAResampleImageKernel.cpp
rename to reg-lib/cuda/CudaResampleImageKernel.cpp
index 95b1c183..c9049cda 100644
--- a/reg-lib/cuda/CUDAResampleImageKernel.cpp
+++ b/reg-lib/cuda/CudaResampleImageKernel.cpp
@@ -1,21 +1,21 @@
-#include "CUDAResampleImageKernel.h"
+#include "CudaResampleImageKernel.h"
 #include "resampleKernel.h"
 
 /* *************************************************************** */
-CUDAResampleImageKernel::CUDAResampleImageKernel(AladinContent *conIn, std::string name) :
+CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::string name) :
         ResampleImageKernel(name)
 {
     con = static_cast<CudaAladinContent*>(conIn);
 
-    floatingImage = con->AladinContent::getCurrentFloating();
-    warpedImage = con->AladinContent::getCurrentWarped();
+    floatingImage = con->AladinContent::GetCurrentFloating();
+    warpedImage = con->AladinContent::GetCurrentWarped();
 
     //cuda ptrs
-    floatingImageArray_d = con->getFloatingImageArray_d();
-    warpedImageArray_d = con->getWarpedImageArray_d();
-    deformationFieldImageArray_d = con->getDeformationFieldArray_d();
-    mask_d = con->getMask_d();
-    floIJKMat_d = con->getFloIJKMat_d();
+    floatingImageArray_d = con->GetFloatingImageArray_d();
+    warpedImageArray_d = con->GetWarpedImageArray_d();
+    deformationFieldImageArray_d = con->GetDeformationFieldArray_d();
+    mask_d = con->GetMask_d();
+    floIJKMat_d = con->GetFloIJKMat_d();
 
     if (floatingImage->datatype != warpedImage->datatype) {
         reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel");
@@ -30,7 +30,7 @@ CUDAResampleImageKernel::CUDAResampleImageKernel(AladinContent *conIn, std::stri
     }
 }
 /* *************************************************************** */
-void CUDAResampleImageKernel::calculate(int interp,
+void CudaResampleImageKernel::Calculate(int interp,
                                                      float paddingValue,
                                                      bool *dti_timepoint,
                                                      mat33 * jacMat)
diff --git a/reg-lib/cuda/CUDAResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h
similarity index 50%
rename from reg-lib/cuda/CUDAResampleImageKernel.h
rename to reg-lib/cuda/CudaResampleImageKernel.h
index e2e0a8e3..9aa978f8 100644
--- a/reg-lib/cuda/CUDAResampleImageKernel.h
+++ b/reg-lib/cuda/CudaResampleImageKernel.h
@@ -1,19 +1,18 @@
-#ifndef CUDARESAMPLEIMAGEKERNEL_H
-#define CUDARESAMPLEIMAGEKERNEL_H
+#pragma once
 
 #include "ResampleImageKernel.h"
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 
 /*
  * kernel functions for image resampling with three interpolation variations
  * */
-class CUDAResampleImageKernel: public ResampleImageKernel {
+class CudaResampleImageKernel: public ResampleImageKernel {
 public:
-    CUDAResampleImageKernel(AladinContent *conIn, std::string name);
-    void calculate(int interp,
+    CudaResampleImageKernel(AladinContent *conIn, std::string name);
+    void Calculate(int interp,
                         float paddingValue,
-                        bool *dti_timepoint = NULL,
-                        mat33 *jacMat = NULL);
+                        bool *dti_timepoint = nullptr,
+                        mat33 *jacMat = nullptr);
 private:
     nifti_image *floatingImage;
     nifti_image *warpedImage;
@@ -25,8 +24,6 @@ class CUDAResampleImageKernel: public ResampleImageKernel {
     float* deformationFieldImageArray_d;
     int *mask_d;
 
-    //CUDAContextSingletton *cudaSContext;
+    //CudaContextSingleton *cudaSContext;
     CudaAladinContent *con;
 };
-
-#endif // CUDARESAMPLEIMAGEKERNEL_H
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu
index 76066f43..63be0e5c 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.cu
+++ b/reg-lib/cuda/_reg_blocksize_gpu.cu
@@ -7,14 +7,11 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_BLOCKSIZE_GPU_CU
-#define _REG_BLOCKSIZE_GPU_CU
-
 #include "_reg_blocksize_gpu.h"
 
 /* ******************************** */
 /* ******************************** */
-NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = NULL;
+NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = nullptr;
 /* ******************************** */
 /* ******************************** */
 NiftyReg_CudaBlock100::NiftyReg_CudaBlock100()
@@ -223,5 +220,3 @@ NiftyReg_CudaBlock300::NiftyReg_CudaBlock300()
     printf("[NiftyReg DEBUG] NiftyReg_CudaBlock300 constructor called\n");
 #endif
 }
-
-#endif
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 11f98204..e04510cf 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -7,8 +7,7 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_BLOCKSIZE_GPU_H
-#define _REG_BLOCKSIZE_GPU_H
+#pragma once
 
 #include "nifti1_io.h"
 #include "cuda_runtime.h"
@@ -168,7 +167,7 @@ class NiftyReg_CudaBlock300 : public NiftyReg_CudaBlock100
 class NiftyReg_CudaBlock
 {
 public:
-   static NiftyReg_CudaBlock100 * getInstance(int major)
+   static NiftyReg_CudaBlock100 * GetInstance(int major)
    {
      if (instance) return instance;
      else
@@ -193,5 +192,3 @@ class NiftyReg_CudaBlock
 };
 /* ******************************** */
 /* ******************************** */
-
-#endif
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 806f3765..dec42d33 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -9,9 +9,6 @@
  *
  */
 
-#ifndef _REG_COMMON_CUDA_CU
-#define _REG_COMMON_CUDA_CU
-
 #include "_reg_common_cuda.h"
 #include "_reg_tools.h"
 #include "_reg_blocksize_gpu.h"
@@ -75,7 +72,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) {
 			printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
 				   deviceProp.multiProcessorCount);
 		}
-		NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(deviceProp.major);
+		NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(deviceProp.major);
 	}
 	return EXIT_SUCCESS;
 }
@@ -637,8 +634,8 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d, DTYP
 			return EXIT_FAILURE;
 		}
 		const int voxelNumber = img->nx*img->ny*img->nz;
-		float4 *array_h=NULL;
-		float4 *array2_h=NULL;
+		float4 *array_h=nullptr;
+		float4 *array2_h=nullptr;
 		NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4)));
 		NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber*sizeof(float4)));
 		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost));
@@ -767,5 +764,3 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, c
 template int cudaCommon_transferArrayFromDeviceToCpu<int>(int *array_cpu, int **array_d, const unsigned int nElements);
 template int cudaCommon_transferArrayFromDeviceToCpu<float>(float *array_cpu, float **array_d, const unsigned int nElements);
 template int cudaCommon_transferArrayFromDeviceToCpu<double>(double *array_cpu, double **array_d, const unsigned int nElements);
-
-#endif
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index d1f5d776..851bc03d 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -7,8 +7,7 @@
  * See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_COMMON_CUDA_H
-#define _REG_COMMON_CUDA_H
+#pragma once
 
 #include "nifti1_io.h"
 #include "cuda_runtime.h"
@@ -171,4 +170,3 @@ template <class DTYPE>
 int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements);
 /* ******************************** */
 /* ******************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h
index 1e2b4486..889b396e 100644
--- a/reg-lib/cuda/_reg_cudainfo.h
+++ b/reg-lib/cuda/_reg_cudainfo.h
@@ -1,6 +1,3 @@
-#ifndef _REG_CUDAINFO_H
-#define _REG_CUDAINFO_H
+#pragma once
 
 void showCUDAInfo(void);
-
-#endif
\ No newline at end of file
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index 75f19eff..dbbc286f 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_F3D_GPU_CPP
-#define _REG_F3D_GPU_CPP
-
 #include "_reg_f3d_gpu.h"
 
  /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -20,26 +17,26 @@
 reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
     : reg_f3d<float>::reg_f3d(refTimePoint, floTimePoint) {
     this->executableName = (char *)"NiftyReg F3D GPU";
-    this->currentReference_gpu = NULL;
-    this->currentFloating_gpu = NULL;
-    this->currentMask_gpu = NULL;
-    this->warped_gpu = NULL;
-    this->controlPointGrid_gpu = NULL;
-    this->deformationFieldImage_gpu = NULL;
-    this->warpedGradientImage_gpu = NULL;
-    this->voxelBasedMeasureGradientImage_gpu = NULL;
-    this->transformationGradient_gpu = NULL;
-
-    this->measure_gpu_ssd = NULL;
-    this->measure_gpu_kld = NULL;
-    this->measure_gpu_dti = NULL;
-    this->measure_gpu_lncc = NULL;
-    this->measure_gpu_nmi = NULL;
-
-    this->currentReference2_gpu = NULL;
-    this->currentFloating2_gpu = NULL;
-    this->warped2_gpu = NULL;
-    this->warpedGradientImage2_gpu = NULL;
+    this->currentReference_gpu = nullptr;
+    this->currentFloating_gpu = nullptr;
+    this->currentMask_gpu = nullptr;
+    this->warped_gpu = nullptr;
+    this->controlPointGrid_gpu = nullptr;
+    this->deformationFieldImage_gpu = nullptr;
+    this->warpedGradientImage_gpu = nullptr;
+    this->voxelBasedMeasureGradientImage_gpu = nullptr;
+    this->transformationGradient_gpu = nullptr;
+
+    this->measure_gpu_ssd = nullptr;
+    this->measure_gpu_kld = nullptr;
+    this->measure_gpu_dti = nullptr;
+    this->measure_gpu_lncc = nullptr;
+    this->measure_gpu_nmi = nullptr;
+
+    this->currentReference2_gpu = nullptr;
+    this->currentFloating2_gpu = nullptr;
+    this->warped2_gpu = nullptr;
+    this->warpedGradientImage2_gpu = nullptr;
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu");
@@ -48,63 +45,63 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 reg_f3d_gpu::~reg_f3d_gpu() {
-    if (this->currentReference_gpu != NULL)
+    if (this->currentReference_gpu != nullptr)
         cudaCommon_free(&this->currentReference_gpu);
-    if (this->currentFloating_gpu != NULL)
+    if (this->currentFloating_gpu != nullptr)
         cudaCommon_free(&this->currentFloating_gpu);
-    if (this->currentMask_gpu != NULL)
+    if (this->currentMask_gpu != nullptr)
         cudaCommon_free<int>(&this->currentMask_gpu);
-    if (this->warped_gpu != NULL)
+    if (this->warped_gpu != nullptr)
         cudaCommon_free<float>(&this->warped_gpu);
-    if (this->controlPointGrid_gpu != NULL)
+    if (this->controlPointGrid_gpu != nullptr)
         cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-    if (this->deformationFieldImage_gpu != NULL)
+    if (this->deformationFieldImage_gpu != nullptr)
         cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
-    if (this->warpedGradientImage_gpu != NULL)
+    if (this->warpedGradientImage_gpu != nullptr)
         cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
-    if (this->voxelBasedMeasureGradientImage_gpu != NULL)
+    if (this->voxelBasedMeasureGradientImage_gpu != nullptr)
         cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
-    if (this->transformationGradient_gpu != NULL)
+    if (this->transformationGradient_gpu != nullptr)
         cudaCommon_free<float4>(&this->transformationGradient_gpu);
 
-    if (this->currentReference2_gpu != NULL)
+    if (this->currentReference2_gpu != nullptr)
         cudaCommon_free(&this->currentReference2_gpu);
-    if (this->currentFloating2_gpu != NULL)
+    if (this->currentFloating2_gpu != nullptr)
         cudaCommon_free(&this->currentFloating2_gpu);
-    if (this->warped2_gpu != NULL)
+    if (this->warped2_gpu != nullptr)
         cudaCommon_free<float>(&this->warped2_gpu);
-    if (this->warpedGradientImage2_gpu != NULL)
+    if (this->warpedGradientImage2_gpu != nullptr)
         cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
 
-    if (this->optimiser != NULL) {
+    if (this->optimiser != nullptr) {
         delete this->optimiser;
-        this->optimiser = NULL;
+        this->optimiser = nullptr;
     }
 
-    if (this->measure_gpu_nmi != NULL) {
+    if (this->measure_gpu_nmi != nullptr) {
         delete this->measure_gpu_nmi;
-        this->measure_gpu_nmi = NULL;
-        this->measure_nmi = NULL;
+        this->measure_gpu_nmi = nullptr;
+        this->measure_nmi = nullptr;
     }
-    if (this->measure_gpu_ssd != NULL) {
+    if (this->measure_gpu_ssd != nullptr) {
         delete this->measure_gpu_ssd;
-        this->measure_gpu_ssd = NULL;
-        this->measure_ssd = NULL;
+        this->measure_gpu_ssd = nullptr;
+        this->measure_ssd = nullptr;
     }
-    if (this->measure_gpu_kld != NULL) {
+    if (this->measure_gpu_kld != nullptr) {
         delete this->measure_gpu_kld;
-        this->measure_gpu_kld = NULL;
-        this->measure_kld = NULL;
+        this->measure_gpu_kld = nullptr;
+        this->measure_kld = nullptr;
     }
-    if (this->measure_gpu_dti != NULL) {
+    if (this->measure_gpu_dti != nullptr) {
         delete this->measure_gpu_dti;
-        this->measure_gpu_dti = NULL;
-        this->measure_dti = NULL;
+        this->measure_gpu_dti = nullptr;
+        this->measure_dti = nullptr;
     }
-    if (this->measure_gpu_lncc != NULL) {
+    if (this->measure_gpu_lncc != nullptr) {
         delete this->measure_gpu_lncc;
-        this->measure_gpu_lncc = NULL;
-        this->measure_lncc = NULL;
+        this->measure_gpu_lncc = nullptr;
+        this->measure_lncc = nullptr;
     }
 
 #ifndef NDEBUG
@@ -142,13 +139,13 @@ void reg_f3d_gpu::AllocateWarped() {
 void reg_f3d_gpu::ClearWarped() {
     reg_f3d::ClearWarped();
 
-    if (this->warped_gpu != NULL) {
+    if (this->warped_gpu != nullptr) {
         cudaCommon_free<float>(&this->warped_gpu);
-        this->warped_gpu = NULL;
+        this->warped_gpu = nullptr;
     }
-    if (this->warped2_gpu != NULL) {
+    if (this->warped2_gpu != nullptr) {
         cudaCommon_free<float>(&this->warped2_gpu);
-        this->warped2_gpu = NULL;
+        this->warped2_gpu = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearWarped");
@@ -167,9 +164,9 @@ void reg_f3d_gpu::AllocateDeformationField() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearDeformationField() {
-    if (this->deformationFieldImage_gpu != NULL) {
+    if (this->deformationFieldImage_gpu != nullptr) {
         cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
-        this->deformationFieldImage_gpu = NULL;
+        this->deformationFieldImage_gpu = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField");
@@ -199,13 +196,13 @@ void reg_f3d_gpu::AllocateWarpedGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearWarpedGradient() {
-    if (this->warpedGradientImage_gpu != NULL) {
+    if (this->warpedGradientImage_gpu != nullptr) {
         cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
-        this->warpedGradientImage_gpu = NULL;
+        this->warpedGradientImage_gpu = nullptr;
     }
-    if (this->warpedGradientImage2_gpu != NULL) {
+    if (this->warpedGradientImage2_gpu != nullptr) {
         cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
-        this->warpedGradientImage2_gpu = NULL;
+        this->warpedGradientImage2_gpu = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient");
@@ -227,9 +224,9 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() {
-    if (this->voxelBasedMeasureGradientImage_gpu != NULL) {
+    if (this->voxelBasedMeasureGradientImage_gpu != nullptr) {
         cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
-        this->voxelBasedMeasureGradientImage_gpu = NULL;
+        this->voxelBasedMeasureGradientImage_gpu = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient");
@@ -251,9 +248,9 @@ void reg_f3d_gpu::AllocateTransformationGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::ClearTransformationGradient() {
-    if (this->transformationGradient_gpu != NULL) {
+    if (this->transformationGradient_gpu != nullptr) {
         cudaCommon_free<float4>(&this->transformationGradient_gpu);
-        this->transformationGradient_gpu = NULL;
+        this->transformationGradient_gpu = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient");
@@ -309,7 +306,6 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
 double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
     if (this->bendingEnergyWeight <= 0) return 0;
 
-    // CHECKED: Similar output
     double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
                                                       &this->controlPointGrid_gpu);
 #ifndef NDEBUG
@@ -342,7 +338,7 @@ double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::GetDeformationField() {
-    if (this->controlPointGrid_gpu == NULL) {
+    if (this->controlPointGrid_gpu == nullptr) {
         reg_f3d::GetDeformationField();
     } else {
         // Compute the deformation field
@@ -415,19 +411,19 @@ void reg_f3d_gpu::GetVoxelBasedGradient() {
                              this->warpedPaddingValue);
 
     // The gradient of the various measures of similarity are computed
-    if (this->measure_gpu_nmi != NULL)
+    if (this->measure_gpu_nmi != nullptr)
         this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
 
-    if (this->measure_gpu_ssd != NULL)
+    if (this->measure_gpu_ssd != nullptr)
         this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient();
 
-    if (this->measure_gpu_kld != NULL)
+    if (this->measure_gpu_kld != nullptr)
         this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient();
 
-    if (this->measure_gpu_lncc != NULL)
+    if (this->measure_gpu_lncc != nullptr)
         this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient();
 
-    if (this->measure_gpu_dti != NULL)
+    if (this->measure_gpu_dti != nullptr)
         this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient();
 
 #ifndef NDEBUG
@@ -457,7 +453,7 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() {
                                      this->similarityWeight);
 
     /* The similarity measure gradient is converted from voxel space to real space */
-    mat44 *floatingMatrix_xyz = NULL;
+    mat44 *floatingMatrix_xyz = nullptr;
     if (this->currentFloating->sform_code > 0)
         floatingMatrix_xyz = &(this->currentFloating->sto_xyz);
     else floatingMatrix_xyz = &(this->currentFloating->qto_xyz);
@@ -469,7 +465,7 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() {
         reg_gaussianSmoothing_gpu(this->controlPointGrid,
                                   &this->transformationGradient_gpu,
                                   this->gradientSmoothingSigma,
-                                  NULL);
+                                  nullptr);
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient");
@@ -622,7 +618,7 @@ void reg_f3d_gpu::GetApproximatedGradient() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 nifti_image** reg_f3d_gpu::GetWarpedImage() {
     // The initial images are used
-    if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) {
+    if (this->inputReference == nullptr || this->inputFloating == nullptr || this->controlPointGrid == nullptr) {
         reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()");
         reg_print_msg_error("The reference, floating and control point grid images have to be defined");
         reg_exit();
@@ -666,8 +662,8 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
 float reg_f3d_gpu::InitialiseCurrentLevel() {
     float maxStepSize = reg_f3d::InitialiseCurrentLevel();
 
-    if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu);
-    if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu);
+    if (this->currentReference_gpu != nullptr) cudaCommon_free(&this->currentReference_gpu);
+    if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu);
     if (this->currentReference->nt == 1) {
         if (cudaCommon_allocateArrayToDevice<float>(&this->currentReference_gpu, this->currentReference->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
@@ -694,8 +690,8 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         }
     }
 
-    if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu);
-    if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu);
+    if (this->currentFloating_gpu != nullptr) cudaCommon_free(&this->currentFloating_gpu);
+    if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu);
     if (this->currentReference->nt == 1) {
         if (cudaCommon_allocateArrayToDevice<float>(&this->currentFloating_gpu, this->currentFloating->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
@@ -722,7 +718,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
         }
     }
 
-    if (this->controlPointGrid_gpu != NULL) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    if (this->controlPointGrid_gpu != nullptr) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
     if (cudaCommon_allocateArrayToDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
         reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
         reg_print_msg_error("Error when allocating the control point image");
@@ -762,20 +758,20 @@ void reg_f3d_gpu::ClearCurrentInputImage() {
         reg_exit();
     }
     cudaCommon_free<float4>(&this->controlPointGrid_gpu);
-    this->controlPointGrid_gpu = NULL;
+    this->controlPointGrid_gpu = nullptr;
     cudaCommon_free(&this->currentReference_gpu);
-    this->currentReference_gpu = NULL;
+    this->currentReference_gpu = nullptr;
     cudaCommon_free(&this->currentFloating_gpu);
-    this->currentFloating_gpu = NULL;
+    this->currentFloating_gpu = nullptr;
     NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu));
-    this->currentMask_gpu = NULL;
+    this->currentMask_gpu = nullptr;
 
-    if (this->currentReference2_gpu != NULL)
+    if (this->currentReference2_gpu != nullptr)
         cudaCommon_free(&this->currentReference2_gpu);
-    this->currentReference2_gpu = NULL;
-    if (this->currentFloating2_gpu != NULL)
+    this->currentReference2_gpu = nullptr;
+    if (this->currentFloating2_gpu != nullptr)
         cudaCommon_free(&this->currentFloating2_gpu);
-    this->currentFloating2_gpu = NULL;
+    this->currentFloating2_gpu = nullptr;
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage");
@@ -794,7 +790,7 @@ void reg_f3d_gpu::SetOptimiser() {
                                 this->optimiseX,
                                 this->optimiseY,
                                 this->optimiseZ,
-                                this->maxiterationNumber,
+                                this->maxIterationNumber,
                                 0, // currentIterationNumber,
                                 this,
                                 reinterpret_cast<float*>(this->controlPointGrid_gpu),
@@ -898,7 +894,7 @@ int reg_f3d_gpu::CheckMemoryMB() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
-    if (this->measure_gpu_nmi == NULL)
+    if (this->measure_gpu_nmi == nullptr)
         this->measure_gpu_nmi = new reg_nmi_gpu;
     this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
     // I am here adding 4 to the specified bin number to accomodate for
@@ -910,7 +906,7 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
-    if (this->measure_gpu_nmi == NULL)
+    if (this->measure_gpu_nmi == nullptr)
         this->measure_gpu_nmi = new reg_nmi_gpu;
     this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
     // I am here adding 4 to the specified bin number to accomodate for
@@ -922,7 +918,7 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseSSD(int timepoint) {
-    if (this->measure_gpu_ssd == NULL)
+    if (this->measure_gpu_ssd == nullptr)
         this->measure_gpu_ssd = new reg_ssd_gpu;
     this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
 #ifndef NDEBUG
@@ -931,7 +927,7 @@ void reg_f3d_gpu::UseSSD(int timepoint) {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseKLDivergence(int timepoint) {
-    if (this->measure_gpu_kld == NULL)
+    if (this->measure_gpu_kld == nullptr)
         this->measure_gpu_kld = new reg_kld_gpu;
     this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
 #ifndef NDEBUG
@@ -940,7 +936,7 @@ void reg_f3d_gpu::UseKLDivergence(int timepoint) {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) {
-    if (this->measure_gpu_lncc == NULL)
+    if (this->measure_gpu_lncc == nullptr)
         this->measure_gpu_lncc = new reg_lncc_gpu;
     this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
     this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev);
@@ -953,7 +949,7 @@ void reg_f3d_gpu::UseDTI(int timepoint[6]) {
     reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
     reg_exit();
 
-    // if(this->measure_gpu_dti==NULL)
+    // if(this->measure_gpu_dti==nullptr)
     //    this->measure_gpu_dti=new reg_dti_gpu;
     // for(int i=0; i<6; ++i)
     //    this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
@@ -962,16 +958,16 @@ void reg_f3d_gpu::UseDTI(int timepoint[6]) {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::InitialiseSimilarity() {
     // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-    if (this->measure_gpu_nmi == NULL &&
-        this->measure_gpu_ssd == NULL &&
-        this->measure_gpu_dti == NULL &&
-        this->measure_gpu_kld == NULL &&
-        this->measure_gpu_lncc == NULL) {
+    if (this->measure_gpu_nmi == nullptr &&
+        this->measure_gpu_ssd == nullptr &&
+        this->measure_gpu_dti == nullptr &&
+        this->measure_gpu_kld == nullptr &&
+        this->measure_gpu_lncc == nullptr) {
         measure_gpu_nmi = new reg_nmi_gpu;
         for (int i = 0; i < this->inputReference->nt; ++i)
             measure_gpu_nmi->SetTimepointWeight(i, 1.0);
     }
-    if (this->measure_gpu_nmi != NULL) {
+    if (this->measure_gpu_nmi != nullptr) {
         this->measure_gpu_nmi->InitialiseMeasure(this->currentReference,
                                                  this->currentFloating,
                                                  this->currentMask,
@@ -988,7 +984,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
         this->measure_nmi = this->measure_gpu_nmi;
     }
 
-    if (this->measure_gpu_ssd != NULL) {
+    if (this->measure_gpu_ssd != nullptr) {
         this->measure_gpu_ssd->InitialiseMeasure(this->currentReference,
                                                  this->currentFloating,
                                                  this->currentMask,
@@ -1006,7 +1002,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
         this->measure_ssd = this->measure_gpu_ssd;
     }
 
-    if (this->measure_gpu_kld != NULL) {
+    if (this->measure_gpu_kld != nullptr) {
         this->measure_gpu_kld->InitialiseMeasure(this->currentReference,
                                                  this->currentFloating,
                                                  this->currentMask,
@@ -1023,7 +1019,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
         this->measure_kld = this->measure_gpu_kld;
     }
 
-    if (this->measure_gpu_lncc != NULL) {
+    if (this->measure_gpu_lncc != nullptr) {
         this->measure_gpu_lncc->InitialiseMeasure(this->currentReference,
                                                   this->currentFloating,
                                                   this->currentMask,
@@ -1040,7 +1036,7 @@ void reg_f3d_gpu::InitialiseSimilarity() {
         this->measure_lncc = this->measure_gpu_lncc;
     }
 
-    if (this->measure_gpu_dti != NULL) {
+    if (this->measure_gpu_dti != nullptr) {
         this->measure_gpu_dti->InitialiseMeasure(this->currentReference,
                                                  this->currentFloating,
                                                  this->currentMask,
@@ -1062,4 +1058,3 @@ void reg_f3d_gpu::InitialiseSimilarity() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-#endif
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index 3fefb0e8..b982236d 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_F3D_GPU_H
-#define _REG_F3D_GPU_H
+#pragma once
 
 #include "_reg_resampling_gpu.h"
 #include "_reg_globalTransformation_gpu.h"
@@ -97,5 +96,3 @@ class reg_f3d_gpu: public reg_f3d<float> {
 };
 
 #include "_reg_f3d_gpu.cpp"
-
-#endif
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index d8dd6a24..38d42a89 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_AFFINETRANSFORMATION_GPU_CU
-#define _REG_AFFINETRANSFORMATION_GPU_CU
-
 #include "_reg_globalTransformation_gpu.h"
 #include "_reg_globalTransformation_kernels.cu"
 
@@ -23,7 +20,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
 					float4 **array_d)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3)));
@@ -70,5 +67,3 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 7779358e..68db157c 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_AFFINETRANSFORMATION_GPU_H
-#define _REG_AFFINETRANSFORMATION_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 // #include "_reg_globalTransformation.h"
@@ -20,5 +19,3 @@ extern "C++"
 void reg_affine_positionField_gpu(mat44 *,
                                   nifti_image *,
                                   float4 **);
-
-#endif
diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
index acd92d24..fcf00af6 100755
--- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_AFFINETRANSFORMATION_KERNELS_CU
-#define _REG_AFFINETRANSFORMATION_KERNELS_CU
-
 #include "_reg_common_cuda.h"
 
 /* *************************************************************** */
@@ -55,5 +52,3 @@ void reg_affine_deformationField_kernel(float4 *PositionFieldArray)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 36e064bd..5d191f30 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _reg_spline_GPU_CU
-#define _reg_spline_GPU_CU
-
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_localTransformation_kernels.cu"
 
@@ -27,7 +24,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 										bool bspline)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber = reference->nx * reference->ny * reference->nz;
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
@@ -83,7 +80,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
 										  float4 **controlPointImageArray_d)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -158,7 +155,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 												float bendingEnergyWeight)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -228,7 +225,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 											 float **jacobianDet_d)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
 	mat33 reorientation;
@@ -279,7 +276,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 									   float **jacobianDet_d)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
 	mat33 reorientation;
@@ -344,7 +341,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 											 )
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -411,7 +408,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 												   bool approx)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -530,7 +527,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 									  bool approx)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -645,7 +642,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Bind the qform or sform
 	mat44 temp_mat=image->qto_xyz;
@@ -675,7 +672,7 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA
 void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Bind the qform or sform
 	mat44 temp_mat=image->qto_xyz;
@@ -710,12 +707,12 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 	const int voxelNumber = def_h->nx * def_h->ny * def_h->nz;
 
 	// Create a mask array where no voxel are excluded
-	int *mask_gpu=NULL;
+	int *mask_gpu=nullptr;
 	NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int)))
 	reg_fillMaskArray_gpu(voxelNumber,&mask_gpu);
 
 	// Define some variables for the deformation fields
-	float4 *tempDef_gpu=NULL;
+	float4 *tempDef_gpu=nullptr;
 	NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4)))
 
 	// The deformation field is computed
@@ -774,7 +771,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 							  int activeVoxel)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber=def->nx*def->ny*def->nz;
 
@@ -833,7 +830,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 										float **jacobianMatrices_gpu)
 {
 	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
 	const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz);
@@ -866,4 +863,3 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 3e86da50..621f6ff0 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_LOCALTRANSFORMATION_GPU_H
-#define _REG_LOCALTRANSFORMATION_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 #include "_reg_maths.h"
@@ -83,4 +82,3 @@ extern "C++"
 void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
                                         float4 **deformationField_gpu,
                                         float **jacobianMatrices_gpu);
-#endif //_REG_LOCALTRANSFORMATION_GPU_H
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 450b1747..2c34df01 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _reg_spline_KERNELS_CU
-#define _reg_spline_KERNELS_CU
-
 #include "_reg_common_cuda.h"
 
 __device__ __constant__ int c_UseBSpline;
@@ -2027,4 +2024,3 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices)
 /* *************************************************************** */
 /* *************************************************************** */
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 16089c27..29c084ab 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -5,8 +5,7 @@
  * Also contains an interface class between reg_base and the measure class
  */
 
-#ifndef _REG_MEASURE_GPU_H
-#define _REG_MEASURE_GPU_H
+#pragma once
 
 #include "_reg_lncc.h"
 #include "_reg_dti.h"
@@ -158,4 +157,3 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-#endif // _REG_MEASURE_GPU_H
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index b0dac95a..dd9b1bde 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_NMI_GPU_CU
-#define _REG_NMI_GPU_CU
-
 #include "_reg_nmi.h"
 #include "_reg_nmi_gpu.h"
 #include "_reg_nmi_kernels.cu"
@@ -22,8 +19,8 @@
 reg_nmi_gpu::reg_nmi_gpu():
 	reg_nmi::reg_nmi()
 {
-	this->forwardJointHistogramLog_device=NULL;
-//	this->backwardJointHistogramLog_device=NULL;
+	this->forwardJointHistogramLog_device=nullptr;
+//	this->backwardJointHistogramLog_device=nullptr;
 
 #ifndef NDEBUG
 		printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n");
@@ -41,10 +38,10 @@ reg_nmi_gpu::~reg_nmi_gpu()
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_nmi_gpu::ClearHistogram()
 {
-	if(this->forwardJointHistogramLog_device!=NULL){
+	if(this->forwardJointHistogramLog_device!=nullptr){
 		cudaFree(this->forwardJointHistogramLog_device);
 	}
-	this->forwardJointHistogramLog_device=NULL;
+	this->forwardJointHistogramLog_device=nullptr;
 #ifndef NDEBUG
 		printf("[NiftyReg DEBUG] reg_nmi_gpu::ClearHistogram() called\n");
 #endif
@@ -172,7 +169,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
 									  int floBinning)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
 	const int3 imageSize=make_int3(referenceImage->nx,referenceImage->ny,referenceImage->nz);
@@ -258,5 +255,3 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
-#endif
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 99525856..2e4dbac7 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_NMI_GPU_H
-#define _REG_NMI_GPU_H
+#pragma once
 
 #include "_reg_nmi.h"
 #include "_reg_measure_gpu.h"
@@ -103,5 +102,3 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
                                       double *entropies,
                                       int refBinning,
                                       int floBinning);
-
-#endif
diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu
index d4736dc0..939b5253 100755
--- a/reg-lib/cuda/_reg_nmi_kernels.cu
+++ b/reg-lib/cuda/_reg_nmi_kernels.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_MUTUALINFORMATION_kernels_CU
-#define _REG_MUTUALINFORMATION_kernels_CU
-
 #include <stdio.h>
 
 #define COEFF_L 0.16666666f
@@ -585,5 +582,3 @@ __global__ void reg_marginaliseResultXY_kernel(float *babyHisto)
         babyHisto[tid]=sum;
     }
 }
-
-#endif
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 6ea2736d..45f2baeb 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -1,6 +1,3 @@
-#ifndef _reg_optimiser_GPU_CU
-#define _reg_optimiser_GPU_CU
-
 #include "_reg_optimiser_gpu.h"
 #include "_reg_optimiser_kernels.cu"
 
@@ -9,9 +6,9 @@
 reg_optimiser_gpu::reg_optimiser_gpu()
     :reg_optimiser<float>::reg_optimiser()
 {
-    this->currentDOF_gpu=NULL;
-    this->bestDOF_gpu=NULL;
-    this->gradient_gpu=NULL;
+    this->currentDOF_gpu=nullptr;
+    this->bestDOF_gpu=nullptr;
+    this->gradient_gpu=nullptr;
 
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n");
@@ -21,9 +18,9 @@ reg_optimiser_gpu::reg_optimiser_gpu()
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 reg_optimiser_gpu::~reg_optimiser_gpu()
 {
-    if(this->bestDOF_gpu!=NULL)
+    if(this->bestDOF_gpu!=nullptr)
         cudaCommon_free<float4>(&this->bestDOF_gpu);;
-    this->bestDOF_gpu=NULL;
+    this->bestDOF_gpu=nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
 #endif
@@ -56,10 +53,10 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
 	// Arrays are converted from float to float4
     this->currentDOF_gpu=reinterpret_cast<float4 *>(cppData);
 
-    if(gradData!=NULL)
+    if(gradData!=nullptr)
         this->gradient_gpu=reinterpret_cast<float4 *>(gradData);
 
-    if(this->bestDOF_gpu!=NULL)
+    if(this->bestDOF_gpu!=nullptr)
         cudaCommon_free<float4>(&this->bestDOF_gpu);
 
     if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu,
@@ -113,8 +110,8 @@ void reg_optimiser_gpu::Perturbation(float length)
 reg_conjugateGradient_gpu::reg_conjugateGradient_gpu()
     :reg_optimiser_gpu::reg_optimiser_gpu()
 {
-    this->array1=NULL;
-    this->array2=NULL;
+    this->array1=nullptr;
+    this->array2=nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n");
 #endif
@@ -123,13 +120,13 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu()
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu()
 {
-    if(this->array1!=NULL)
+    if(this->array1!=nullptr)
         cudaCommon_free<float4>(&this->array1);
-    this->array1=NULL;
+    this->array1=nullptr;
 
-    if(this->array2!=NULL)
+    if(this->array2!=nullptr)
         cudaCommon_free<float4>(&this->array2);
-    this->array2=NULL;
+    this->array2=nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n");
 #endif
@@ -228,7 +225,7 @@ void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d,
                                          int nodeNumber)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4)))
@@ -251,7 +248,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
                                   int nodeNumber)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, *conjugateG_d, nodeNumber*sizeof(float4)))
@@ -297,13 +294,13 @@ float reg_getMaximalLength_gpu(float4 **gradientArray_d,
                                int nodeNumber)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     // Copy constant memory value and bind texture
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4)))
 
-    float *dist_d=NULL;
+    float *dist_d=nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float)))
 
     const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_getEuclideanDistance));
@@ -329,7 +326,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
 
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
@@ -355,5 +352,3 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
-#endif // _reg_optimiser_GPU_CU
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 2e8c9eec..2655294d 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -1,11 +1,9 @@
-#ifndef _REG_OPTIMISER_GPU_H
-#define _REG_OPTIMISER_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 #include "_reg_optimiser.h"
 #include "_reg_tools_gpu.h"
 
-
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /** @class reg_optimiser_gpu
@@ -48,10 +46,10 @@ class reg_optimiser_gpu : public reg_optimiser<float>
                            size_t start,
                            InterfaceOptimiser *o,
                            float *cppData,
-                           float *gradData=NULL,
+                           float *gradData=nullptr,
                            size_t a=0,
-                           float *b=NULL,
-                           float *c=NULL);
+                           float *b=nullptr,
+                           float *c=nullptr);
    virtual void Perturbation(float length);
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -80,10 +78,10 @@ class reg_conjugateGradient_gpu : public reg_optimiser_gpu
                            size_t start,
                            InterfaceOptimiser *o,
                            float *cppData,
-                           float *gradData=NULL,
+                           float *gradData=nullptr,
                            size_t a=0,
-                           float *b=NULL,
-                           float *c=NULL);
+                           float *b=nullptr,
+                           float *c=nullptr);
    virtual void Optimise(float maxLength,
                          float smallLength,
                          float &startLength);
@@ -124,5 +122,3 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
                                         float4 **bestControlPointPosition_d,
                                         float4 **gradientArray_d,
                                         float currentLength);
-
-#endif // _REG_OPTIMISER_GPU_H
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 763b85ce..5889d42d 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -1,6 +1,3 @@
-#ifndef _REG_OPTIMISER_KERNELS_CU
-#define _REG_OPTIMISER_KERNELS_CU
-
 __device__ __constant__ int c_NodeNumber;
 __device__ __constant__ float c_ScalingFactor;
 
@@ -83,5 +80,3 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageA
     }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
-#endif // _REG_OPTIMISER_KERNELS_CU
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index f8a40dbf..0f241094 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_RESAMPLING_GPU_CU
-#define _REG_RESAMPLING_GPU_CU
-
 #include "_reg_resampling_gpu.h"
 #include "_reg_resampling_kernels.cu"
 
@@ -27,7 +24,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                            float paddingValue)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
@@ -104,7 +101,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
                               float paddingValue)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
@@ -164,5 +161,3 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
-#endif
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 49f60cc5..b9b90dda 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_RESAMPLING_GPU_H
-#define _REG_RESAMPLING_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 #include "_reg_blocksize_gpu.h"
@@ -32,4 +31,3 @@ void reg_getImageGradient_gpu(nifti_image *sourceImage,
                               float4 **resultGradientArray_d,
                               int activeVoxelNumber,
                               float paddingValue);
-#endif
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 73a0ba73..dbcb5055 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_RESAMPLING_KERNELS_CU
-#define _REG_RESAMPLING_KERNELS_CU
-
 texture<float, 3, cudaReadModeElementType> floatingTexture;
 texture<float4, 1, cudaReadModeElementType> floatingMatrixTexture;
 texture<float4, 1, cudaReadModeElementType> deformationFieldTexture;
@@ -227,4 +224,3 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index b6a4b42c..a34ed7e9 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_SSD_GPU_CU
-#define _REG_SSD_GPU_CU
-
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
 
@@ -89,7 +86,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
 						  )
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Copy the constant memory variables
 	int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -154,7 +151,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
 									  )
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Copy the constant memory variables
 	int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -206,4 +203,3 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient()
 }
 /* *************************************************************** */
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 6cc8fac2..3f45d19b 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -10,12 +10,12 @@
  *
  */
 
-#ifndef _REG_SSD_GPU_H
-#define _REG_SSD_GPU_H
+#pragma once
 
 #include "_reg_tools_gpu.h"
 #include "_reg_measure_gpu.h"
 #include "_reg_ssd.h"
+
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief SSD measure of similarity class on the device
@@ -67,4 +67,3 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
                                       int **mask_d,
                                       int activeVoxelNumber
                                      );
-#endif
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 8d775a92..24b8fd10 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -10,11 +10,11 @@
  *
  */
 
-#ifndef _REG_SSD_KERNELS_CU
-#define _REG_SSD_KERNELS_CU
+#pragma once
 
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
+
 /* *************************************************************** */
 texture<float, 3, cudaReadModeElementType> referenceTexture;
 texture<float, 1, cudaReadModeElementType> warpedTexture;
@@ -147,5 +147,3 @@ __global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient)
     }
 }
 /* *************************************************************** */
-#endif
-
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index cdc9fc4c..8e4d3ab8 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -10,9 +10,6 @@
  *
  */
 
-#ifndef _REG_TOOLS_GPU_CU
-#define _REG_TOOLS_GPU_CU
-
 #include "_reg_common_cuda.h"
 #include "_reg_tools_gpu.h"
 #include "_reg_tools_kernels.cu"
@@ -27,7 +24,7 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       float weight)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
     const int voxelNumber = targetImage->nx * targetImage->ny * targetImage->nz;
@@ -63,7 +60,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(	mat44 *sourceMatrix_xyz,
                             float4 **nodeNMIGradientArray_d)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
@@ -97,7 +94,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
 
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const unsigned int voxelNumber = image->nx * image->ny * image->nz;
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -106,7 +103,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int3)))
 
     bool axisToSmooth[8];
-    if(smoothXYZ==NULL){
+    if(smoothXYZ==nullptr){
         for(int i=0; i<8; i++) axisToSmooth[i]=true;
     }
     else{
@@ -187,7 +184,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 										float *spacingVoxel)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int voxelNumber = image->nx * image->ny * image->nz;
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -264,7 +261,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)))
@@ -279,7 +276,7 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
 void reg_addValue_gpu(int num, float4 **array_d, float value)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)))
@@ -294,7 +291,7 @@ void reg_addValue_gpu(int num, float4 **array_d, float value)
 void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
 
@@ -308,7 +305,7 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
 
@@ -322,7 +319,7 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
 void reg_fillMaskArray_gpu(int num, int **array1_d)
 {
     // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0);
+    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
 
@@ -351,4 +348,3 @@ float reg_minReduction_gpu(float *array_d,int size)
     return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
-#endif
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index a486fd7d..300f6870 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_TOOLS_GPU_H
-#define _REG_TOOLS_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 #include "_reg_tools.h"
@@ -84,5 +83,3 @@ float reg_minReduction_gpu(float *array_d,
                            int size);
 /* ******************************** */
 /* ******************************** */
-
-#endif
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 45933c8e..584e274a 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -8,8 +8,6 @@
  *  See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#ifndef _REG_TOOLS_KERNELS_CU
-#define _REG_TOOLS_KERNELS_CU
 /* *************************************************************** */
 __device__ __constant__ int c_NodeNumber;
 __device__ __constant__ int c_VoxelNumber;
@@ -286,6 +284,3 @@ __global__ void reg_fillMaskArray_kernel(int *array1_d)
 		array1_d[tid] = tid;
 }
 /* *************************************************************** */
-
-#endif
-
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 04d428b0..52aec362 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -9,9 +9,6 @@
  *
  */
 
-#ifndef _REG_BLOCKMATCHING_GPU_CU
-#define _REG_BLOCKMATCHING_GPU_CU
-
 #include "blockMatchingKernel.h"
 
 #include "_reg_ReadWriteImage.h"
@@ -643,4 +640,3 @@ void block_matching_method_gpu(nifti_image *targetImage,
 
 }
 /* *************************************************************** */
-#endif //_REG_BLOCKMATCHING_GPU_CU
diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h
index 4841b2a8..dcf1452a 100644
--- a/reg-lib/cuda/blockMatchingKernel.h
+++ b/reg-lib/cuda/blockMatchingKernel.h
@@ -10,8 +10,7 @@
  *
  */
 
-#ifndef _REG_BLOCKMATCHING_GPU_H
-#define _REG_BLOCKMATCHING_GPU_H
+#pragma once
 
 #include "_reg_common_cuda.h"
 #include "_reg_blockMatching.h"
@@ -26,7 +25,3 @@
 
 extern "C++"
 void block_matching_method_gpu(nifti_image *targetImage, _reg_blockMatchingParam *params, float **targetImageArray_d, float **resultImageArray_d, float **targetPosition_d, float **resultPosition_d, int **activeBlock_d, int **mask_d, float** targetMat_d);
-
-
-#endif
-
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index 7778affe..9282047c 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -230,7 +230,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float*
     */
     const char jobvt = 'A';
 
-    cusolverDnHandle_t gH = NULL;
+    cusolverDnHandle_t gH = nullptr;
     int Lwork;
     //device ptrs
     float *Work;
@@ -245,7 +245,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float*
     cudaMalloc(&rwork, Lwork * sizeof(float));
     cudaMalloc(&devInfo, sizeof(int));
 
-    checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, NULL, devInfo), "cusolverDnSgesvd");
+    checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, nullptr, devInfo), "cusolverDnSgesvd");
     checkCUSOLVERStatus(cusolverDnDestroy(gH), "cusolverDnDestroy");
 
     //free vars
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
index d76b8cf6..19879dcc 100644
--- a/reg-lib/cuda/optimizeKernel.h
+++ b/reg-lib/cuda/optimizeKernel.h
@@ -1,5 +1,4 @@
-#ifndef _REG_OPTIMIZE_GPU_H
-#define _REG_OPTIMIZE_GPU_H
+#pragma once
 
 #include "nifti1_io.h"
 
@@ -29,4 +28,3 @@ void downloadMat44(mat44 *lastTransformation, float* transform_d);
 extern "C++"
 void uploadMat44(mat44 lastTransformation, float* transform_d);
 */
-#endif
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 9fdb69c3..50a97ee0 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -391,7 +391,7 @@ void launchResample(nifti_image *floatingImage,
 						  float **sourceIJKMatrix_d) {
 
 	// Define the DTI indices if required
-	if(dti_timepoint!=NULL || jacMat!=NULL){
+	if(dti_timepoint!=nullptr || jacMat!=nullptr){
 		reg_print_fct_error("launchResample");
 		reg_print_msg_error("The DTI resampling has not yet been implemented with the CUDA platform. Exit.");
 		reg_exit();
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index cdfd4d28..94a245e3 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -9,7 +9,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -66,7 +66,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
             nullptr)
     );
     // Identity use case - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] 
+    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
     float identity_result_3x[8] = {0, 1, 0, 1, 0, 1, 0, 1};
     float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1};
     float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1};
@@ -98,7 +98,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     );
 
     // Translation - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] 
+    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
     float translation_result_3x[8] = {-0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5};
     float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5};
     float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75};
@@ -111,7 +111,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
             translation_result_3z)
     );
 
-    
+
     // Full affine - 2D
     // Test order [0,0] [1,0] [0,1] [1,1]
     auto *affine = new mat44;
@@ -131,7 +131,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         auto y = identity_result_2y[i];
         affine_result_2x[i] = affine->m[0][3] + affine->m[0][0]*x + affine->m[0][1]*y;
         affine_result_2y[i] = affine->m[1][3] + affine->m[1][0]*x + affine->m[1][1]*y;
-        
+
     }
     test_use_cases.emplace_back(test_data(
             "full affine 2D",
@@ -155,7 +155,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         affine_result_3y[i] = affine->m[1][3] +
             affine->m[1][0]*x + affine->m[1][1]*y + affine->m[1][2]*z;
         affine_result_3z[i] = affine->m[2][3] +
-            affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z;        
+            affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z;
     }
     test_use_cases.emplace_back(test_data(
             "affine 3D",
@@ -176,7 +176,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         float *test_res_x;
         float *test_res_y;
         float *test_res_z;
-        std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = 
+        std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) =
             test_use_case;
 
         // Accumate all required contents with a vector
@@ -222,12 +222,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
             SECTION(test_name + " " + desc){
                 // Initialise the platform to run current content and retrieve deformation field
                 auto *platform = new Platform(plat_value);
-                Kernel *affineDeformKernel = platform->createKernel(
-                        AffineDeformationFieldKernel::getName(),
+                Kernel *affineDeformKernel = platform->CreateKernel(
+                        AffineDeformationFieldKernel::GetName(),
                         con);
-                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->calculate();
+                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
                 nifti_image *defField =
-                        con->getCurrentDeformationField();
+                        con->GetCurrentDeformationField();
 
                 // Check all values
                 auto *defFieldPtrX = static_cast<float *>(defField->data);
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index 4d18790b..2dd56ee0 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -9,7 +9,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -83,8 +83,8 @@ void test(AladinContent *con, int platformCode) {
 
    Platform *platform = new Platform(platformCode);
 
-   Kernel *blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), con);
-   blockMatchingKernel->castTo<BlockMatchingKernel>()->calculate();
+   Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
+   blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 
    delete blockMatchingKernel;
    delete platform;
@@ -105,7 +105,7 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if (referenceImage == NULL){
+   if (referenceImage == nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
@@ -115,7 +115,7 @@ int main(int argc, char **argv)
 
    // Read the input floating image
    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-   if (warpedImage == NULL){
+   if (warpedImage == nullptr){
       reg_print_msg_error("The input warped image could not be read");
       return EXIT_FAILURE;
    }
@@ -136,28 +136,28 @@ int main(int argc, char **argv)
    _reg_blockMatchingParam* blockMatchingParams;
 
    // Platforms
-   AladinContent *con = NULL;
+   AladinContent *con = nullptr;
    if (platformCode == NR_PLATFORM_CPU) {
-      con = new AladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
+      con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #ifdef _USE_CUDA
    else if (platformCode == NR_PLATFORM_CUDA) {
-      con = new CudaAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
+      con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
 #ifdef _USE_OPENCL
    else if (platformCode == NR_PLATFORM_CL) {
-      con = new ClAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
+      con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
    else {
       reg_print_msg_error("The platform code is not suppoted");
       return EXIT_FAILURE;
    }
-   con->setCurrentWarped(warpedImage);
-   //con->setCurrentWarped(referenceImage);
+   con->SetCurrentWarped(warpedImage);
+   //con->SetCurrentWarped(referenceImage);
    test(con, platformCode);
-   blockMatchingParams = con->getBlockMatchingParams();
+   blockMatchingParams = con->GetBlockMatchingParams();
 
 #ifndef NDEBUG
    std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl;
@@ -216,4 +216,3 @@ int main(int argc, char **argv)
 #endif
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
index f7102c03..2c234cfa 100644
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ b/reg-test/reg_test_bspline_deformation_field.cpp
@@ -22,19 +22,19 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
     nifti_image *cppImage = reg_io_ReadImageFile(inputCPPFileName);
-    if (cppImage == NULL) {
+    if (cppImage == nullptr) {
         reg_print_msg_error("The control point grid image could not be read");
         return EXIT_FAILURE;
     }
 
     // Read the input deformation field image image
     nifti_image *expectedDefField = reg_io_ReadImageFile(inputDefImageName);
-    if (expectedDefField == NULL){
+    if (expectedDefField == nullptr){
         reg_print_msg_error("The input deformation field image could not be read");
         return EXIT_FAILURE;
     }
@@ -61,7 +61,7 @@ int main(int argc, char **argv)
        // Compute the deformation field throught composition
        reg_spline_getDeformationField(cppImage,
                                       test_field,
-                                      NULL,
+                                      nullptr,
                                       true,
                                       true);
     }
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
        // Compute the deformation field from scratch
        reg_spline_getDeformationField(cppImage,
                                       test_field,
-                                      NULL,
+                                      nullptr,
                                       false,
                                       true);
     }
@@ -104,4 +104,3 @@ int main(int argc, char **argv)
     // return on a successful test
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp
index 0812fde3..cc17aec9 100644
--- a/reg-test/reg_test_changeDataType.cpp
+++ b/reg-test/reg_test_changeDataType.cpp
@@ -19,7 +19,7 @@ int main(int argc, char **argv)
     char *inputImageName = argv[1];
     // Read the input image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
@@ -33,7 +33,7 @@ int main(int argc, char **argv)
     char *expectedImageName = argv[3];
     // Read the input image
     nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == NULL) {
+    if (expectedImage == nullptr) {
         reg_print_msg_error("The expected image could not be read");
         return EXIT_FAILURE;
     }
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index 68a36c1e..e567292e 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -9,7 +9,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 
 #ifdef _USE_OPENCL
@@ -23,8 +23,8 @@ void test(AladinContent *con, int platformCode) {
 
     Platform *platform = new Platform(platformCode);
 
-    Kernel *affineDeformKernel = platform->createKernel(AffineDeformationFieldKernel::getName(), con);
-    affineDeformKernel->castTo<AffineDeformationFieldKernel>()->calculate();
+    Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
+    affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
 
     delete affineDeformKernel;
     delete platform;
@@ -44,7 +44,7 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
@@ -54,7 +54,7 @@ int main(int argc, char **argv)
 
     // Read the input deformation field image image
     nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if (inputDeformationField == NULL){
+    if (inputDeformationField == nullptr){
         reg_print_msg_error("The input deformation field image could not be read");
         return EXIT_FAILURE;
     }
@@ -75,16 +75,16 @@ int main(int argc, char **argv)
     test_field_gpu->data = (void *) malloc(test_field_gpu->nvox*test_field_gpu->nbyper);
 
     // Compute the affine deformation field
-    AladinContent *con_cpu = new AladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float));
-    AladinContent *con_gpu = NULL;
+    AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
+    AladinContent *con_gpu = nullptr;
 #ifdef _USE_CUDA
     if (platformCode == NR_PLATFORM_CUDA) {
-        con_gpu = new CudaAladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float));
+        con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
     }
 #endif
 #ifdef _USE_OPENCL
     if (platformCode == NR_PLATFORM_CL) {
-        con_gpu = new ClAladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float));
+        con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
     }
 #endif
     if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){
@@ -92,7 +92,7 @@ int main(int argc, char **argv)
        return EXIT_FAILURE;
     }
     //Check if the platform used is double capable
-    bool isDouble = con_gpu->isCurrentComputationDoubleCapable();
+    bool isDouble = con_gpu->IsCurrentComputationDoubleCapable();
     double proper_eps = EPS;
     if(isDouble == 0) {
         proper_eps = EPS_SINGLE;
@@ -101,17 +101,17 @@ int main(int argc, char **argv)
     //CPU or GPU code
     reg_tools_changeDatatype<float>(referenceImage);
     test(con_cpu, NR_PLATFORM_CPU);
-    test_field_cpu = con_cpu->getCurrentDeformationField();
+    test_field_cpu = con_cpu->GetCurrentDeformationField();
 
     test(con_gpu, NR_PLATFORM_CPU);
-    test_field_gpu = con_gpu->getCurrentDeformationField();
+    test_field_gpu = con_gpu->GetCurrentDeformationField();
 
     // Compute the difference between the computed and inputed deformation field
     nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
     diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
     reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field);
     reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_getMaxValue(diff_field, -1);
+    double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
     nifti_image_free(referenceImage);
     nifti_image_free(inputDeformationField);
@@ -132,5 +132,3 @@ int main(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
-
-
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index 4fa3ffeb..3c5f5acc 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -9,7 +9,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -93,8 +93,8 @@ void test(AladinContent *con, int platformCode) {
 
    Platform *platform = new Platform(platformCode);
 
-   Kernel *blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), con);
-   blockMatchingKernel->castTo<BlockMatchingKernel>()->calculate();
+   Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
+   blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 
    delete blockMatchingKernel;
    delete platform;
@@ -131,7 +131,7 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if (referenceImage == NULL){
+   if (referenceImage == nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
@@ -141,7 +141,7 @@ int main(int argc, char **argv)
 
    // Read the input floating image
    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-   if (warpedImage == NULL){
+   if (warpedImage == nullptr){
       reg_print_msg_error("The input warped image could not be read");
       return EXIT_FAILURE;
    }
@@ -152,12 +152,12 @@ int main(int argc, char **argv)
    for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i;
 
    // CPU Platform
-   _reg_blockMatchingParam* blockMatchingParams_cpu = NULL;
-   AladinContent *con_cpu = NULL;
-   con_cpu = new AladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
-   con_cpu->setCurrentWarped(warpedImage);
+   _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr;
+   AladinContent *con_cpu = nullptr;
+   con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
+   con_cpu->SetCurrentWarped(warpedImage);
    test(con_cpu, NR_PLATFORM_CPU);
-   blockMatchingParams_cpu = con_cpu->getBlockMatchingParams();
+   blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
    std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl;
@@ -165,21 +165,21 @@ int main(int argc, char **argv)
 #endif
 
    // GPU Platform
-   AladinContent *con_gpu = NULL;
-   _reg_blockMatchingParam* blockMatchingParams_gpu = NULL;
+   AladinContent *con_gpu = nullptr;
+   _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr;
 #ifdef _USE_CUDA
    if (platformCode == NR_PLATFORM_CUDA) {
-      con_gpu = new CudaAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
+      con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
 #ifdef _USE_OPENCL
    if (platformCode == NR_PLATFORM_CL) {
-      con_gpu = new ClAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1);
+      con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
-   con_gpu->setCurrentWarped(warpedImage);
+   con_gpu->SetCurrentWarped(warpedImage);
    test(con_gpu, platformCode);
-   blockMatchingParams_gpu = con_gpu->getBlockMatchingParams();
+   blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
    std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl;
@@ -235,4 +235,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 14a550e9..37dee12f 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -6,7 +6,7 @@
 #include "Platform.h"
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -45,14 +45,14 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if(referenceImage==NULL){
+    if(referenceImage==nullptr){
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
     reg_tools_changeDatatype<float>(referenceImage);
     // Read the input deformation field image image
     nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if(inputDeformationField==NULL){
+    if(inputDeformationField==nullptr){
         reg_print_msg_error("The input deformation field image could not be read");
         return EXIT_FAILURE;
     }
@@ -76,34 +76,34 @@ int main(int argc, char **argv)
     int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int));
 
     // CPU platform
-    AladinContent *con_cpu = new AladinContent(NULL, referenceImage, NULL, sizeof(float));
-    con_cpu->setCurrentWarped(cpu_warped);
-    con_cpu->setCurrentDeformationField(inputDeformationField);
-    con_cpu->setCurrentReferenceMask(tempMask, cpu_warped->nvox);
+    AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float));
+    con_cpu->SetCurrentWarped(cpu_warped);
+    con_cpu->SetCurrentDeformationField(inputDeformationField);
+    con_cpu->SetCurrentReferenceMask(tempMask, cpu_warped->nvox);
     Platform *platform_cpu = new Platform(NR_PLATFORM_CPU);
-    Kernel *resampleImageKernel_cpu = platform_cpu->createKernel(ResampleImageKernel::getName(), con_cpu);
-    resampleImageKernel_cpu->castTo<ResampleImageKernel>()->calculate(interpolation,
+    Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu);
+    resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_cpu;
     delete platform_cpu;
-    cpu_warped = con_cpu->getCurrentWarped(referenceImage->datatype);
+    cpu_warped = con_cpu->GetCurrentWarped(referenceImage->datatype);
 
     // GPU platform
-    AladinContent *con_gpu = NULL;
+    AladinContent *con_gpu = nullptr;
 #ifdef _USE_CUDA
     if (platformCode == NR_PLATFORM_CUDA) {
-        con_gpu = new CudaAladinContent(NULL, referenceImage, NULL, sizeof(float));
+        con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     }
 #endif
 #ifdef _USE_OPENCL
     if (platformCode == NR_PLATFORM_CL) {
-        con_gpu = new ClAladinContent(NULL, referenceImage, NULL, sizeof(float));
+        con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     }
 #endif
-    con_gpu->setCurrentWarped(gpu_warped);
-    con_gpu->setCurrentDeformationField(inputDeformationField);
-    con_gpu->setCurrentReferenceMask(tempMask, gpu_warped->nvox);
-    Platform *platform_gpu = NULL;
+    con_gpu->SetCurrentWarped(gpu_warped);
+    con_gpu->SetCurrentDeformationField(inputDeformationField);
+    con_gpu->SetCurrentReferenceMask(tempMask, gpu_warped->nvox);
+    Platform *platform_gpu = nullptr;
 #ifdef _USE_CUDA
     if (platformCode == NR_PLATFORM_CUDA)
        platform_gpu = new Platform(NR_PLATFORM_CUDA);
@@ -113,16 +113,16 @@ int main(int argc, char **argv)
        platform_gpu = new Platform(NR_PLATFORM_CL);
     }
 #endif
-    Kernel *resampleImageKernel_gpu = platform_gpu->createKernel(ResampleImageKernel::getName(), con_gpu);
-    resampleImageKernel_gpu->castTo<ResampleImageKernel>()->calculate(interpolation,
+    Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu);
+    resampleImageKernel_gpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_gpu;
     delete platform_gpu;
-    gpu_warped = con_gpu->getCurrentWarped(referenceImage->datatype);
+    gpu_warped = con_gpu->GetCurrentWarped(referenceImage->datatype);
 
     //Check if the platform used is double capable
     double proper_eps = EPS;
-    if(con_gpu->isCurrentComputationDoubleCapable() == 0) {
+    if(con_gpu->IsCurrentComputationDoubleCapable() == 0) {
         proper_eps = EPS_SINGLE;
     }
 
@@ -133,7 +133,7 @@ int main(int argc, char **argv)
     // Compute the difference between the computed and inputed warped image
     reg_tools_substractImageToImage(cpu_warped, gpu_warped, diff_field);
     reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_getMaxValue(diff_field, -1);
+    double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
     // free the allocated images
     nifti_image_free(referenceImage);
diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp
index 865e7f09..26349806 100644
--- a/reg-test/reg_test_compose_deformation_field.cpp
+++ b/reg-test/reg_test_compose_deformation_field.cpp
@@ -17,12 +17,12 @@ int main(int argc, char **argv)
 
    // Read the input deformation field image image
    nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefFieldImageName);
-   if(inputDeformationField==NULL){
+   if(inputDeformationField==nullptr){
       reg_print_msg_error("The input deformation field image could not be read");
       return EXIT_FAILURE;
    }
    nifti_image *inputComFieldImage = reg_io_ReadImageFile(inputComFieldImageName);
-   if(inputComFieldImage==NULL){
+   if(inputComFieldImage==nullptr){
       reg_print_msg_error("The input composed deformation field image could not be read");
       return EXIT_FAILURE;
    }
@@ -43,7 +43,7 @@ int main(int argc, char **argv)
    // Compute the non-linear deformation field
    reg_defField_compose(inputDeformationField,
                         test_field,
-                        NULL);
+                        nullptr);
 
    // Compute the difference between the computed and inputed deformation field
    reg_tools_substractImageToImage(inputComFieldImage,test_field,test_field);
diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp
index 3f7dafa3..f6306499 100644
--- a/reg-test/reg_test_computation_time.cpp
+++ b/reg-test/reg_test_computation_time.cpp
@@ -28,13 +28,13 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *inputImageOne = reg_io_ReadImageFile(inputImageOneName);
-    if (inputImageOne == NULL) {
+    if (inputImageOne == nullptr) {
         reg_print_msg_error("The first input image could not be read");
         return EXIT_FAILURE;
     }
     reg_tools_changeDatatype<float>(inputImageOne);
     nifti_image *inputImageTwo = reg_io_ReadImageFile(inputImageTwoName);
-    if (inputImageTwo == NULL) {
+    if (inputImageTwo == nullptr) {
         reg_print_msg_error("The second input image could not be read");
         return EXIT_FAILURE;
     }
@@ -70,7 +70,7 @@ int main(int argc, char **argv)
 
 
     // Generate a control point grids
-    nifti_image *splineGridOne = NULL;
+    nifti_image *splineGridOne = nullptr;
     float spacing[3] = {
         inputImageOne->dx * 5.f,
         inputImageOne->dz * 5.f,
@@ -335,8 +335,8 @@ int main(int argc, char **argv)
        reg_tools_kernelConvolution(defFieldThr,
                                    currentNodeSpacing,
                                    kernel_type,
-                                   NULL, // mask
-                                   NULL, // all volumes are considered as active
+                                   nullptr, // mask
+                                   nullptr, // all volumes are considered as active
                                    activeAxis
                                    );
        // Convolution along the y axis
@@ -346,8 +346,8 @@ int main(int argc, char **argv)
        reg_tools_kernelConvolution(defFieldThr,
                                    currentNodeSpacing,
                                    kernel_type,
-                                   NULL, // mask
-                                   NULL, // all volumes are considered as active
+                                   nullptr, // mask
+                                   nullptr, // all volumes are considered as active
                                    activeAxis
                                    );
        // Convolution along the z axis if required
@@ -359,8 +359,8 @@ int main(int argc, char **argv)
           reg_tools_kernelConvolution(defFieldThr,
                                       currentNodeSpacing,
                                       kernel_type,
-                                      NULL, // mask
-                                      NULL, // all volumes are considered as active
+                                      nullptr, // mask
+                                      nullptr, // all volumes are considered as active
                                       activeAxis
                                       );
        }
@@ -396,4 +396,3 @@ int main(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp
index 6263cfa1..4c2a509f 100644
--- a/reg-test/reg_test_convolution.cpp
+++ b/reg-test/reg_test_convolution.cpp
@@ -16,7 +16,7 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
@@ -31,7 +31,7 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *expectedFile = reg_io_ReadImageFile(expectedFileName);
-    if (expectedFile == NULL) {
+    if (expectedFile == nullptr) {
         reg_print_msg_error("The expected result image could not be read");
         return EXIT_FAILURE;
     }
@@ -61,4 +61,3 @@ int main(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp
index c54b62b9..2e4609fe 100644
--- a/reg-test/reg_test_fullAffine.cpp
+++ b/reg-test/reg_test_fullAffine.cpp
@@ -19,14 +19,14 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(referenceImage);
    // Read the input reference image
    nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==NULL){
+   if(floatingImage==nullptr){
       reg_print_msg_error("The input floating image could not be read");
       return EXIT_FAILURE;
    }
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
    reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->setPlatformCode(NR_PLATFORM_CPU);
+   affine->SetPlatformCode(NR_PLATFORM_CPU);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
 
@@ -67,4 +67,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp
index bc75104d..f4360541 100755
--- a/reg-test/reg_test_fullAffine_cl.cpp
+++ b/reg-test/reg_test_fullAffine_cl.cpp
@@ -19,14 +19,14 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(referenceImage);
    // Read the input reference image
    nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==NULL){
+   if(floatingImage==nullptr){
       reg_print_msg_error("The input floating image could not be read");
       return EXIT_FAILURE;
    }
@@ -40,8 +40,8 @@ int main(int argc, char **argv)
    reg_aladin<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->setPlatformCode(NR_PLATFORM_CL);
-   affine->setClIdx(1);
+   affine->SetPlatformCode(NR_PLATFORM_CL);
+   affine->SetClIdx(1);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
 
@@ -63,4 +63,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp
index 63eea4e3..65e874fd 100755
--- a/reg-test/reg_test_fullAffine_cuda.cpp
+++ b/reg-test/reg_test_fullAffine_cuda.cpp
@@ -19,14 +19,14 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(referenceImage);
    // Read the input reference image
    nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==NULL){
+   if(floatingImage==nullptr){
       reg_print_msg_error("The input floating image could not be read");
       return EXIT_FAILURE;
    }
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
    reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->setPlatformCode(NR_PLATFORM_CUDA);
+   affine->SetPlatformCode(NR_PLATFORM_CUDA);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
 
@@ -62,4 +62,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp
index c99e0ad4..3910fd8a 100644
--- a/reg-test/reg_test_fullNonlinear.cpp
+++ b/reg-test/reg_test_fullNonlinear.cpp
@@ -21,14 +21,14 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(referenceImage);
    // Read the input reference image
    nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==NULL){
+   if(floatingImage==nullptr){
       reg_print_msg_error("The input floating image could not be read");
       return EXIT_FAILURE;
    }
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
    reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
    // Read the input control point grid image
    nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName);
-   if(inputControlPointGridImage==NULL){
+   if(inputControlPointGridImage==nullptr){
       reg_print_msg_error("The input control point grid image could not be read");
       return EXIT_FAILURE;
    }
@@ -83,4 +83,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp
index 07d90e97..3f86334a 100644
--- a/reg-test/reg_test_fullSymNonlinear.cpp
+++ b/reg-test/reg_test_fullSymNonlinear.cpp
@@ -21,14 +21,14 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(referenceImage);
    // Read the input reference image
    nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==NULL){
+   if(floatingImage==nullptr){
       reg_print_msg_error("The input floating image could not be read");
       return EXIT_FAILURE;
    }
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
    reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
    // Read the input control point grid image
    nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName);
-   if(inputControlPointGridImage==NULL){
+   if(inputControlPointGridImage==nullptr){
       reg_print_msg_error("The input control point grid image could not be read");
       return EXIT_FAILURE;
    }
@@ -83,4 +83,3 @@ int main(int argc, char **argv)
 
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index fc7f9f48..ad732158 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -14,7 +14,7 @@ int main(int argc, char **argv)
     char *inputImageName = argv[1];
     // Read the input image
     nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == NULL) {
+    if (inputImage == nullptr) {
         reg_print_msg_error("The input image could not be read");
         return EXIT_FAILURE;
     }
@@ -24,7 +24,7 @@ int main(int argc, char **argv)
     char *expectedImageName = argv[2];
     // Read the expected image
     nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == NULL) {
+    if (expectedImage == nullptr) {
         reg_print_msg_error("The expected image could not be read");
         return EXIT_FAILURE;
     }
@@ -55,7 +55,7 @@ int main(int argc, char **argv)
     tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper);
 
     // Declare a deformation field image
-    nifti_image *defFieldImage = NULL;
+    nifti_image *defFieldImage = nullptr;
     // Allocate a deformation field image if required
     if(usedMethod > 0)
     {
@@ -139,7 +139,7 @@ int main(int argc, char **argv)
     }
 
     // Free the allocated arrays and images
-    if(defFieldImage!=NULL)
+    if(defFieldImage!=nullptr)
         nifti_image_free(defFieldImage);
     nifti_image_free(tempGradImage);
     free(mask);
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 35bef058..d448176d 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -10,7 +10,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -33,18 +33,15 @@ typedef std::tuple<AladinContent*, std::string, int> content_desc;
 
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
-    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
-    nifti_image* reference2D = nifti_make_new_nim(
-        dim,
-        NIFTI_TYPE_FLOAT32,
-        true);
+    int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1};
+    nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2D);
 
     // Fill image with distance from identity
     auto* ref2dPrt = static_cast<float*>(reference2D->data);
-    for (float y = 0; y<reference2D->ny; ++y) {
+    for (float y = 0; y < reference2D->ny; ++y) {
         for (float x = 0; x < reference2D->nx; ++x) {
-            *ref2dPrt = sqrtf(x*x + y*y);
+            *ref2dPrt = sqrtf(x * x + y * y);
             ref2dPrt++;
         }
     }
@@ -53,14 +50,11 @@ TEST_CASE("Resampling", "[resampling]") {
 
     // Create a reference 3D image
     dim[0] = 3; dim[3] = 2;
-    nifti_image* reference3D = nifti_make_new_nim(
-        dim,
-        NIFTI_TYPE_FLOAT32,
-        true);
+    nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference3D);
 
     // Fill image with distance from identity
-    auto* ref3dPrt = static_cast<float*>(reference3D->data);
+    auto *ref3dPrt = static_cast<float*>(reference3D->data);
     for (float z = 0; z < reference3D->nz; ++z) {
         for (float y = 0; y < reference3D->ny; ++y) {
             for (float x = 0; x < reference3D->nx; ++x) {
@@ -75,14 +69,14 @@ TEST_CASE("Resampling", "[resampling]") {
 
     // Identity use case - 2D
     // First create an identity displacement field and then convert it into a deformation
-    nifti_image* id_field_2D = nifti_copy_nim_info(reference2D);
+    nifti_image *id_field_2D = nifti_copy_nim_info(reference2D);
     id_field_2D->ndim = id_field_2D->dim[0] = 5;
     id_field_2D->nu = id_field_2D->dim[5] = 2;
     id_field_2D->nvox = id_field_2D->nx * id_field_2D->ny * id_field_2D->nu;
     id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper);
     reg_getDeformationFromDisplacement(id_field_2D);
     float res2[4];
-    memcpy(res2, reference2D->data, reference2D->nvox*sizeof(float));
+    memcpy(res2, reference2D->data, reference2D->nvox * sizeof(float));
     // create the test case
     test_use_cases.emplace_back(test_data(
         "identity 2D",
@@ -92,7 +86,7 @@ TEST_CASE("Resampling", "[resampling]") {
     );
 
     // Identity use case - 3D
-    nifti_image* id_field_3D = nifti_copy_nim_info(reference3D);
+    nifti_image *id_field_3D = nifti_copy_nim_info(reference3D);
     id_field_3D->ndim = id_field_3D->dim[0] = 5;
     id_field_3D->nu = id_field_3D->dim[5] = 3;
     id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu;
@@ -110,14 +104,12 @@ TEST_CASE("Resampling", "[resampling]") {
 
     // Loop over all generated test cases to create all content and run all tests
     for (auto&& test_use_case : test_use_cases) {
-
         // Retrieve test information
         std::string test_name;
         nifti_image *reference;
         nifti_image *def_field;
         float *test_res;
-        std::tie(test_name, reference, def_field, test_res) =
-            test_use_case;
+        std::tie(test_name, reference, def_field, test_res) = test_use_case;
 
         // Accumate all required contents with a vector
         std::vector<content_desc> listContent;
@@ -151,42 +143,35 @@ TEST_CASE("Resampling", "[resampling]") {
 #endif
         // Loop over all possibles contents for each test
         for (auto&& content : listContent) {
-
-            AladinContent* con;
+            AladinContent *con;
             std::string desc;
             int plat_value;
             std::tie(con, desc, plat_value) = content;
 
             SECTION(test_name + " " + desc) {
                 // Create and set a warped image to host the computation
-                nifti_image* warped = nifti_copy_nim_info(reference);
+                nifti_image *warped = nifti_copy_nim_info(reference);
                 warped->data = (void*)malloc(warped->nvox * warped->nbyper);
-                con->setCurrentWarped(warped);
+                con->SetCurrentWarped(warped);
                 // Set the deformation field
-                con->setCurrentDeformationField(def_field);
+                con->SetCurrentDeformationField(def_field);
                 // Set an empty mask to consider all voxels
-                int* tempMask = (int*)calloc(reference->nvox, sizeof(int));
-                con->setCurrentReferenceMask(tempMask, warped->nvox);
+                int *tempMask = (int*)calloc(reference->nvox, sizeof(int));
+                con->SetCurrentReferenceMask(tempMask, warped->nvox);
                 // Initialise the platform to run current content and retrieve deformation field
-                auto* platform = new Platform(plat_value);
-                Kernel* resampleKernel = platform->createKernel(
-                    ResampleImageKernel::getName(),
-                    con);
+                auto *platform = new Platform(plat_value);
+                Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con);
                 // args = interpolation and padding
-                std::list<int> interp = { 0, 1, 3 };
+                std::list<int> interp = {0, 1, 3};
                 for (auto it : interp) {
-                    resampleKernel->castTo<ResampleImageKernel>()->calculate(
-                        it,
-                        0);
-                    warped = con->getCurrentWarped(reference->datatype);
+                    resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
+                    warped = con->GetCurrentWarped(reference->datatype);
 
                     // Check all values
-                    auto* warpedPtr = static_cast<float*>(warped->data);
+                    auto *warpedPtr = static_cast<float*>(warped->data);
                     for (int i = 0; i < warped->nx * warped->ny * warped->nz; ++i) {
                         std::cout << i << " " << static_cast<float*>(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl;
-                        REQUIRE(fabs(
-                            warpedPtr[i] - test_res[i]) <
-                            EPS_SINGLE);
+                        REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE);
                     }
                 }
                 delete resampleKernel;
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index 8e0b6264..715be017 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -10,7 +10,7 @@
 
 #include "AladinContent.h"
 #ifdef _USE_CUDA
-#include "CUDAAladinContent.h"
+#include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
 #include "CLAladinContent.h"
@@ -37,8 +37,8 @@ void test(AladinContent *con, int platformCode, bool isAffine) {
 
    Platform *platform = new Platform(platformCode);
 
-   Kernel *optimiseKernel = platform->createKernel(OptimiseKernel::getName(), con);
-   optimiseKernel->castTo<OptimiseKernel>()->calculate(isAffine);
+   Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con);
+   optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
 
    delete optimiseKernel;
    delete platform;
@@ -76,7 +76,7 @@ int main(int argc, char **argv)
    mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename);
    ////////////////////////
    // Platforms
-   AladinContent *con = NULL;
+   AladinContent *con = nullptr;
    if (platformCode == NR_PLATFORM_CPU) {
       con = new AladinContent();
    }
@@ -110,7 +110,7 @@ int main(int argc, char **argv)
 
    mat44* test_LTS = (mat44 *)malloc(sizeof(mat44));
    reg_mat44_eye(test_LTS);
-   con->setTransformationMatrix(test_LTS);
+   con->SetTransformationMatrix(test_LTS);
 
    //2-D
    if (n1 == 2) {
@@ -151,21 +151,21 @@ int main(int argc, char **argv)
       return EXIT_FAILURE;
    }
 
-   con->setBlockMatchingParams(blockMatchingParams);
+   con->SetBlockMatchingParams(blockMatchingParams);
    test(con, platformCode, isAffine);
 
 #ifndef NDEBUG
    if (n1 == 2)
-      reg_mat44_disp(con->getTransformationMatrix(), (char *) "test_optimize_2D");
-   else reg_mat44_disp(con->getTransformationMatrix(), (char *) "test_optimize_3D");
+      reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_2D");
+   else reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_3D");
 #endif
 
    if (n1 == 2){
-      if (check_matrix_difference(*expectedLSMatrix, *con->getTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference))
+      if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference))
          return EXIT_FAILURE;
    }
    else{
-      if (check_matrix_difference(*expectedLSMatrix, *con->getTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference))
+      if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference))
          return EXIT_FAILURE;
    }
 
@@ -182,4 +182,3 @@ int main(int argc, char **argv)
 #endif
    return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_linearElasticity.cpp b/reg-test/reg_test_linearElasticity.cpp
index 58cbcd49..b339ac1a 100644
--- a/reg-test/reg_test_linearElasticity.cpp
+++ b/reg-test/reg_test_linearElasticity.cpp
@@ -21,13 +21,13 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
     // Read the transformation file
     nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName);
-    if (transImage == NULL) {
+    if (transImage == nullptr) {
         reg_print_msg_error("The transformation image could not be read");
         return EXIT_FAILURE;
     }
@@ -80,4 +80,3 @@ int main(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp
index 2c730f8e..9a10a005 100644
--- a/reg-test/reg_test_linearElasticityGradient.cpp
+++ b/reg-test/reg_test_linearElasticityGradient.cpp
@@ -17,19 +17,19 @@ int main(int argc, char **argv)
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == NULL) {
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
     // Read the transformation file
     nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName);
-    if (transImage == NULL) {
+    if (transImage == nullptr) {
         reg_print_msg_error("The transformation image could not be read");
         return EXIT_FAILURE;
     }
     // Read the expected gradient file
     nifti_image *expectedGradientImage = reg_io_ReadImageFile(expectedGradFileName);
-    if (expectedGradientImage == NULL) {
+    if (expectedGradientImage == nullptr) {
         reg_print_msg_error("The expected gradient image could not be read");
         return EXIT_FAILURE;
     }
@@ -84,4 +84,3 @@ int main(int argc, char **argv)
 
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_measure.cpp b/reg-test/reg_test_measure.cpp
index 16262333..f46467f9 100644
--- a/reg-test/reg_test_measure.cpp
+++ b/reg-test/reg_test_measure.cpp
@@ -26,7 +26,7 @@ int main(int argc, char **argv)
 
    /* Read the reference image */
    nifti_image *refImage = reg_io_ReadImageFile(inputRefImageName);
-   if(refImage == NULL)
+   if(refImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
               inputRefImageName);
@@ -36,7 +36,7 @@ int main(int argc, char **argv)
 
    /* Read the warped image */
    nifti_image *warImage = reg_io_ReadImageFile(inputWarImageName);
-   if(warImage == NULL)
+   if(warImage == nullptr)
    {
       fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
               inputWarImageName);
@@ -79,9 +79,9 @@ int main(int argc, char **argv)
                                         warImage,
                                         mask_image,
                                         warImage,
-                                        NULL,
-                                        NULL,
-                                        NULL);
+                                        nullptr,
+                                        nullptr,
+                                        nullptr);
       double measure=measure_object->GetSimilarityMeasureValue();
 
 #ifndef NDEBUG
@@ -110,8 +110,8 @@ int main(int argc, char **argv)
                                         warImage,
                                         mask_image,
                                         warImage,
-                                        NULL,
-                                        NULL);
+                                        nullptr,
+                                        nullptr);
       double measure=measure_object->GetSimilarityMeasureValue();
 #ifndef NDEBUG
       printf("reg_test_measure: MIND value %iD = %.7g\n",
diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp
index d46eee6c..b848f16d 100644
--- a/reg-test/reg_test_mindDescriptor.cpp
+++ b/reg-test/reg_test_mindDescriptor.cpp
@@ -15,7 +15,7 @@ int main(int argc, char **argv)
     char *inputImageName = argv[1];
     // Read the input image
     nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == NULL) {
+    if (inputImage == nullptr) {
         reg_print_msg_error("The input image could not be read");
         return EXIT_FAILURE;
     }
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
     char *expectedImageName = argv[2];
     // Read the expected image
     nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == NULL) {
+    if (expectedImage == nullptr) {
         reg_print_msg_error("The expected image could not be read");
         return EXIT_FAILURE;
     }
@@ -67,4 +67,3 @@ int main(int argc, char **argv)
 #endif
     return EXIT_SUCCESS;
 }
-
diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp
index c6eddb6d..c2090567 100644
--- a/reg-test/reg_test_mindsscDescriptor.cpp
+++ b/reg-test/reg_test_mindsscDescriptor.cpp
@@ -15,7 +15,7 @@ int main(int argc, char **argv)
     char *inputImageName = argv[1];
     // Read the input image
     nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == NULL) {
+    if (inputImage == nullptr) {
         reg_print_msg_error("The input image could not be read");
         return EXIT_FAILURE;
     }
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
     char *expectedImageName = argv[2];
     // Read the expected image
     nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == NULL) {
+    if (expectedImage == nullptr) {
         reg_print_msg_error("The expected image could not be read");
         return EXIT_FAILURE;
     }
diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp
index 40fc04a3..18f80687 100644
--- a/reg-test/reg_test_nonlinear_deformation_field.cpp
+++ b/reg-test/reg_test_nonlinear_deformation_field.cpp
@@ -18,19 +18,19 @@ int main(int argc, char **argv)
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageHeader(inputRefImageName);
-   if(referenceImage==NULL){
+   if(referenceImage==nullptr){
       reg_print_msg_error("The input reference image could not be read");
       return EXIT_FAILURE;
    }
    // Read the input deformation field image image
    nifti_image *controlPointGridImage = reg_io_ReadImageFile(inputCPPImageName);
-   if(controlPointGridImage==NULL){
+   if(controlPointGridImage==nullptr){
       reg_print_msg_error("The input control point grid image could not be read");
       return EXIT_FAILURE;
    }
    // Read the input deformation field image image
    nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-   if(inputDeformationField==NULL){
+   if(inputDeformationField==nullptr){
       reg_print_msg_error("The input deformation field image could not be read");
       return EXIT_FAILURE;
    }
@@ -52,7 +52,7 @@ int main(int argc, char **argv)
    reg_getDeformationFromDisplacement(test_field);
    reg_spline_getDeformationField(controlPointGridImage,
                                   test_field,
-                                  NULL,
+                                  nullptr,
                                   true,
                                   true);
 
diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp
index 7e824359..2f4b38b8 100644
--- a/reg-test/reg_test_svd_cuda.cpp
+++ b/reg-test/reg_test_svd_cuda.cpp
@@ -149,8 +149,8 @@ int main(int argc, char **argv)
             gpuErrchk(cudaMalloc(&work_d, Lwork * sizeof(double)));
 
             // --- CUDA SVD execution
-            stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, NULL, devInfo);
-            //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, NULL, devInfo);
+            stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, nullptr, devInfo);
+            //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, nullptr, devInfo);
             cudaDeviceSynchronize();
 
             int devInfo_h = 0;

From 0129fd3e5b97e452af03582a5cb02d88b45a589a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 25 Nov 2022 15:12:25 +0000
Subject: [PATCH 018/314] Create Content class

* Inherit AladinContent from this class
* Use this class in the kernels and the KernelFactory
* Get rid of unnecessary constructors
* Eliminate unnecessary InitAladinContent functions
* Remove the name property from the kernels
---
 reg-lib/AffineDeformationFieldKernel.h        |   9 +-
 reg-lib/AladinContent.cpp                     | 191 +---------
 reg-lib/AladinContent.h                       | 114 ++----
 reg-lib/BlockMatchingKernel.h                 |  10 +-
 reg-lib/CMakeLists.txt                        |  36 +-
 reg-lib/Content.cpp                           |  89 +++++
 reg-lib/Content.h                             |  61 ++++
 reg-lib/ConvolutionKernel.h                   |   7 +-
 reg-lib/Kernel.h                              |   5 +-
 reg-lib/KernelFactory.h                       |   5 +-
 reg-lib/OptimiseKernel.h                      |   7 +-
 reg-lib/Platform.cpp                          |   2 +-
 reg-lib/Platform.h                            |   4 +-
 reg-lib/ResampleImageKernel.h                 |   9 +-
 reg-lib/_reg_aladin.cpp                       |  19 -
 reg-lib/_reg_aladin.h                         |  11 +-
 reg-lib/_reg_aladin_sym.cpp                   |  26 --
 reg-lib/_reg_aladin_sym.h                     |  11 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |  86 +++--
 reg-lib/cl/ClAffineDeformationFieldKernel.h   |  29 +-
 reg-lib/cl/ClAladinContent.cpp                | 325 ++++++++----------
 reg-lib/cl/ClAladinContent.h                  |  54 +--
 reg-lib/cl/ClBlockMatchingKernel.cpp          | 137 ++++----
 reg-lib/cl/ClBlockMatchingKernel.h            |  41 ++-
 reg-lib/cl/ClConvolutionKernel.cpp            |   7 -
 reg-lib/cl/ClConvolutionKernel.h              |  13 +-
 reg-lib/cl/ClKernelFactory.cpp                |  13 +-
 reg-lib/cl/ClKernelFactory.h                  |   3 +-
 reg-lib/cl/ClOptimiseKernel.cpp               |  15 +-
 reg-lib/cl/ClOptimiseKernel.h                 |  19 +-
 reg-lib/cl/ClResampleImageKernel.cpp          |  58 ++--
 reg-lib/cl/ClResampleImageKernel.h            |  42 +--
 .../cpu/CpuAffineDeformationFieldKernel.cpp   |  21 +-
 reg-lib/cpu/CpuAffineDeformationFieldKernel.h |  14 +-
 reg-lib/cpu/CpuBlockMatchingKernel.cpp        |  10 +-
 reg-lib/cpu/CpuBlockMatchingKernel.h          |   8 +-
 reg-lib/cpu/CpuConvolutionKernel.cpp          |   5 +-
 reg-lib/cpu/CpuConvolutionKernel.h            |   5 +-
 reg-lib/cpu/CpuKernelFactory.cpp              |  12 +-
 reg-lib/cpu/CpuKernelFactory.h                |   4 +-
 reg-lib/cpu/CpuOptimiseKernel.cpp             |   9 +-
 reg-lib/cpu/CpuOptimiseKernel.h               |   9 +-
 reg-lib/cpu/CpuResampleImageKernel.cpp        |  34 +-
 reg-lib/cpu/CpuResampleImageKernel.h          |  19 +-
 .../cuda/CudaAffineDeformationFieldKernel.cpp |  12 +-
 .../cuda/CudaAffineDeformationFieldKernel.h   |  10 +-
 reg-lib/cuda/CudaAladinContent.cpp            | 283 ++++++---------
 reg-lib/cuda/CudaAladinContent.h              |  54 +--
 reg-lib/cuda/CudaBlockMatchingKernel.cpp      |  51 ++-
 reg-lib/cuda/CudaBlockMatchingKernel.h        |  16 +-
 reg-lib/cuda/CudaConvolutionKernel.cpp        |  15 +-
 reg-lib/cuda/CudaConvolutionKernel.h          |  22 +-
 reg-lib/cuda/CudaKernelFactory.cpp            |  12 +-
 reg-lib/cuda/CudaKernelFactory.h              |   3 +-
 reg-lib/cuda/CudaOptimiseKernel.cpp           | 126 ++++---
 reg-lib/cuda/CudaOptimiseKernel.h             |   8 +-
 reg-lib/cuda/CudaResampleImageKernel.cpp      |  35 +-
 reg-lib/cuda/CudaResampleImageKernel.h        |  12 +-
 .../reg_test_affine_deformation_field.cpp     | 205 +++++------
 reg-test/reg_test_coherence_interpolation.cpp |   4 +-
 reg-test/reg_test_interpolation.cpp           |  22 +-
 61 files changed, 1039 insertions(+), 1459 deletions(-)
 create mode 100644 reg-lib/Content.cpp
 create mode 100644 reg-lib/Content.h

diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h
index 25f7acdd..979fcc5c 100644
--- a/reg-lib/AffineDeformationFieldKernel.h
+++ b/reg-lib/AffineDeformationFieldKernel.h
@@ -2,15 +2,12 @@
 
 #include "Kernel.h"
 
-class AffineDeformationFieldKernel : public Kernel {
+class AffineDeformationFieldKernel: public Kernel {
 public:
     static std::string GetName() {
         return "AffineDeformationFieldKernel";
     }
-
-    AffineDeformationFieldKernel( std::string name) : Kernel(name) {
-    }
-
-    virtual ~AffineDeformationFieldKernel(){}
+    AffineDeformationFieldKernel() : Kernel() {}
+    virtual ~AffineDeformationFieldKernel() {}
     virtual void Calculate(bool compose = false) = 0;
 };
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index b1787b27..cfc0fe45 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -2,117 +2,21 @@
 
 using namespace std;
 
-/* *************************************************************** */
-AladinContent::AladinContent() {
-    //int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 };
-    //this->currentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    //this->currentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    //this->currentReferenceMask = nullptr;
-
-    this->currentReference = nullptr;
-    this->currentReferenceMask = nullptr;
-    this->currentFloating = nullptr;
-    this->transformationMatrix = nullptr;
-    this->blockMatchingParams = nullptr;
-    this->bytes = sizeof(float);  // Default
-
-    InitVars();
-}
 /* *************************************************************** */
 AladinContent::AladinContent(nifti_image *currentReferenceIn,
                              nifti_image *currentFloatingIn,
                              int *currentReferenceMaskIn,
-                             mat44 *transMat,
+                             mat44 *transformationMatrixIn,
                              size_t bytesIn,
                              const unsigned int currentPercentageOfBlockToUseIn,
                              const unsigned int inlierLtsIn,
                              int stepSizeBlockIn) :
-    currentReference(currentReferenceIn),
-    currentFloating(currentFloatingIn),
-    currentReferenceMask(currentReferenceMaskIn),
-    transformationMatrix(transMat),
-    bytes(bytesIn),
+    Content(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, transformationMatrixIn, bytesIn),
     currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
     inlierLts(inlierLtsIn),
     stepSizeBlock(stepSizeBlockIn) {
-    this->blockMatchingParams = new _reg_blockMatchingParam();
-    InitVars();
-}
-/* *************************************************************** */
-AladinContent::AladinContent(nifti_image *currentReferenceIn,
-                             nifti_image *currentFloatingIn,
-                             int *currentReferenceMaskIn,
-                             mat44 *transMat,
-                             size_t bytesIn) :
-    currentReference(currentReferenceIn),
-    currentFloating(currentFloatingIn),
-    currentReferenceMask(currentReferenceMaskIn),
-    transformationMatrix(transMat),
-    bytes(bytesIn) {
-    this->blockMatchingParams = nullptr;
-    InitVars();
-}
-/* *************************************************************** */
-AladinContent::AladinContent(nifti_image *currentReferenceIn,
-                             nifti_image *currentFloatingIn,
-                             int *currentReferenceMaskIn,
-                             size_t bytesIn,
-                             const unsigned int currentPercentageOfBlockToUseIn,
-                             const unsigned int inlierLtsIn,
-                             int stepSizeBlockIn) :
-    currentReference(currentReferenceIn),
-    currentFloating(currentFloatingIn),
-    currentReferenceMask(currentReferenceMaskIn),
-    bytes(bytesIn),
-    currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
-    inlierLts(inlierLtsIn),
-    stepSizeBlock(stepSizeBlockIn) {
-    this->transformationMatrix = nullptr;
-    this->blockMatchingParams = new _reg_blockMatchingParam();
-    InitVars();
-}
-/* *************************************************************** */
-AladinContent::AladinContent(nifti_image *currentReferenceIn,
-                             nifti_image *currentFloatingIn,
-                             int *currentReferenceMaskIn,
-                             size_t bytesIn) :
-    currentReference(currentReferenceIn),
-    currentFloating(currentFloatingIn),
-    currentReferenceMask(currentReferenceMaskIn),
-    bytes(bytesIn) {
-    this->transformationMatrix = nullptr;
-    this->blockMatchingParams = nullptr;
-    InitVars();
-}
-/* *************************************************************** */
-AladinContent::~AladinContent() {
-    ClearWarpedImage();
-    ClearDeformationField();
-    if (this->blockMatchingParams != nullptr)
-        delete this->blockMatchingParams;
-}
-/* *************************************************************** */
-void AladinContent::InitVars() {
-    if (this->currentFloating != nullptr && this->currentReference != nullptr) {
-        this->AllocateWarpedImage();
-    } else {
-        this->currentWarped = nullptr;
-    }
-
-    if (this->currentReference != nullptr) {
-        this->AllocateDeformationField(bytes);
-        refMatrix_xyz = (currentReference->sform_code > 0) ? (currentReference->sto_xyz) : (currentReference->qto_xyz);
-    } else {
-        this->currentDeformationField = nullptr;
-    }
-
-    if (this->currentReferenceMask == nullptr && this->currentReference != nullptr)
-        this->currentReferenceMask = (int *)calloc(this->currentReference->nx * this->currentReference->ny * this->currentReference->nz, sizeof(int));
-
-    if (this->currentFloating != nullptr) {
-        floMatrix_ijk = (currentFloating->sform_code > 0) ? (currentFloating->sto_ijk) : (currentFloating->qto_ijk);
-    }
-    if (blockMatchingParams != nullptr) {
+    if (currentPercentageOfBlockToUseIn || inlierLtsIn || stepSizeBlockIn) {
+        blockMatchingParams = new _reg_blockMatchingParam();
         initialise_block_matching_method(currentReference,
                                          blockMatchingParams,
                                          currentPercentageOfBlockToUse,
@@ -120,92 +24,17 @@ void AladinContent::InitVars() {
                                          stepSizeBlock,
                                          currentReferenceMask,
                                          false);
+    } else {
+        blockMatchingParams = nullptr;
     }
-#ifndef NDEBUG
-    if (this->currentReference == nullptr) reg_print_msg_debug("currentReference image is nullptr");
-    if (this->currentFloating == nullptr) reg_print_msg_debug("currentFloating image is nullptr");
-    if (this->currentDeformationField == nullptr) reg_print_msg_debug("currentDeformationField image is nullptr");
-    if (this->currentWarped == nullptr) reg_print_msg_debug("currentWarped image is nullptr");
-    if (this->currentReferenceMask == nullptr) reg_print_msg_debug("currentReferenceMask image is nullptr");
-    if (this->blockMatchingParams == nullptr) reg_print_msg_debug("blockMatchingParams image is nullptr");
-#endif
-}
-/* *************************************************************** */
-void AladinContent::AllocateWarpedImage() {
-    if (this->currentReference == nullptr || this->currentFloating == nullptr) {
-        reg_print_fct_error("AladinContent::AllocateWarpedImage()");
-        reg_print_msg_error(" Reference and floating images are not defined. Exit.");
-        reg_exit();
-    }
-
-    this->currentWarped = nifti_copy_nim_info(this->currentReference);
-    this->currentWarped->dim[0] = this->currentWarped->ndim = this->currentFloating->ndim;
-    this->currentWarped->dim[4] = this->currentWarped->nt = this->currentFloating->nt;
-    this->currentWarped->pixdim[4] = this->currentWarped->dt = 1.0;
-    this->currentWarped->nvox = (size_t)(this->currentWarped->nx * this->currentWarped->ny * this->currentWarped->nz * this->currentWarped->nt);
-    this->currentWarped->datatype = this->currentFloating->datatype;
-    this->currentWarped->nbyper = this->currentFloating->nbyper;
-    this->currentWarped->data = (void*)calloc(this->currentWarped->nvox, this->currentWarped->nbyper);
-    //this->floatingDatatype = this->currentFloating->datatype;
 }
 /* *************************************************************** */
-void AladinContent::AllocateDeformationField(size_t bytes) {
-    if (this->currentReference == nullptr) {
-        reg_print_fct_error("AladinContent::AllocateDeformationField()");
-        reg_print_msg_error("Reference image is not defined. Exit.");
-        reg_exit();
-    }
-    //ClearDeformationField();
-
-    this->currentDeformationField = nifti_copy_nim_info(this->currentReference);
-    this->currentDeformationField->dim[0] = this->currentDeformationField->ndim = 5;
-    if (this->currentReference->dim[0] == 2)
-        this->currentDeformationField->dim[3] = this->currentDeformationField->nz = 1;
-    this->currentDeformationField->dim[4] = this->currentDeformationField->nt = 1;
-    this->currentDeformationField->pixdim[4] = this->currentDeformationField->dt = 1.0;
-    if (this->currentReference->nz == 1)
-        this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 2;
-    else
-        this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 3;
-    this->currentDeformationField->pixdim[5] = this->currentDeformationField->du = 1.0;
-    this->currentDeformationField->dim[6] = this->currentDeformationField->nv = 1;
-    this->currentDeformationField->pixdim[6] = this->currentDeformationField->dv = 1.0;
-    this->currentDeformationField->dim[7] = this->currentDeformationField->nw = 1;
-    this->currentDeformationField->pixdim[7] = this->currentDeformationField->dw = 1.0;
-    this->currentDeformationField->nvox = (size_t)this->currentDeformationField->nx *
-        this->currentDeformationField->ny * this->currentDeformationField->nz *
-        this->currentDeformationField->nt * this->currentDeformationField->nu;
-    this->currentDeformationField->nbyper = bytes;
-    if (bytes == 4)
-        this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT32;
-    else if (bytes == 8)
-        this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT64;
-    else {
-        reg_print_fct_error("AladinContent::AllocateDeformationField()");
-        reg_print_msg_error("Only float or double are expected for the deformation field. Exit.");
-        reg_exit();
-    }
-    this->currentDeformationField->scl_slope = 1;
-    this->currentDeformationField->scl_inter = 0;
-    this->currentDeformationField->data = (void*)calloc(this->currentDeformationField->nvox, this->currentDeformationField->nbyper);
+AladinContent::~AladinContent() {
+    if (blockMatchingParams != nullptr)
+        delete blockMatchingParams;
 }
 /* *************************************************************** */
 void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) {
-    this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn;
-}
-/* *************************************************************** */
-void AladinContent::ClearDeformationField() {
-    if (this->currentDeformationField != nullptr)
-        nifti_image_free(this->currentDeformationField);
-    this->currentDeformationField = nullptr;
+    blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn;
 }
 /* *************************************************************** */
-void AladinContent::ClearWarpedImage() {
-    if (this->currentWarped != nullptr)
-        nifti_image_free(this->currentWarped);
-    this->currentWarped = nullptr;
-}
-/* *************************************************************** */
-bool AladinContent::IsCurrentComputationDoubleCapable() {
-    return true;
-}
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index c5276a2c..21b407f6 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -6,106 +6,32 @@
 #include <string>
 #include <vector>
 #include "Kernel.h"
+#include "Content.h"
 #include "_reg_blockMatching.h"
 
-class AladinContent {
+class AladinContent: public Content {
 public:
-	AladinContent();
-	AladinContent(nifti_image *CurrentReferenceIn,
-				  nifti_image *CurrentFloatingIn,
-				  int *CurrentReferenceMaskIn,
-				  size_t byte,
-				  const unsigned int percentageOfBlocks,
-				  const unsigned int inlierLts,
-				  int blockStepSize);
-	AladinContent(nifti_image *CurrentReferenceIn,
-				  nifti_image *CurrentFloatingIn,
-				  int *CurrentReferenceMaskIn,
-				  size_t byte);
-	AladinContent(nifti_image *CurrentReferenceIn,
-				  nifti_image *CurrentFloatingIn,
-				  int *CurrentReferenceMaskIn,
-				  mat44 *transMat,
-				  size_t byte,
-				  const unsigned int percentageOfBlocks,
-				  const unsigned int inlierLts,
-				  int blockStepSize);
-	AladinContent(nifti_image *CurrentReferenceIn,
-				  nifti_image *CurrentFloatingIn,
-				  int *CurrentReferenceMaskIn,
-				  mat44 *transMat,
-				  size_t byte);
+    AladinContent(nifti_image *currentReferenceIn,
+                  nifti_image *currentFloatingIn,
+                  int *currentReferenceMaskIn = nullptr,
+                  mat44 *transformationMatrixIn = nullptr,
+                  size_t bytesIn = sizeof(float),
+                  const unsigned int percentageOfBlocks = 0,
+                  const unsigned int inlierLts = 0,
+                  int blockStepSize = 0);
 
-	virtual ~AladinContent();
+    virtual ~AladinContent();
 
-	/* *************************************************************** */
-	void AllocateWarpedImage();
-	void ClearWarpedImage();
-	/* *************************************************************** */
-	void AllocateDeformationField(size_t bytes);
-	void ClearDeformationField();
-	virtual void InitVars();
+    // Getters
+    virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; }
 
-	unsigned int floatingVoxels, referenceVoxels;
-
-	//getters
-	virtual nifti_image* GetCurrentDeformationField() {
-		return this->currentDeformationField;
-	}
-	nifti_image* GetCurrentReference() {
-		return this->currentReference;
-	}
-	nifti_image* GetCurrentFloating() {
-		return this->currentFloating;
-	}
-	virtual nifti_image* GetCurrentWarped(int = 0) {
-		return this->currentWarped;
-	}
-	int* GetCurrentReferenceMask() {
-		return this->currentReferenceMask;
-	}
-	mat44* GetTransformationMatrix() {
-		return this->transformationMatrix;
-	}
-	virtual _reg_blockMatchingParam* GetBlockMatchingParams() {
-		return this->blockMatchingParams;
-	}
-	//setters
-	virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
-		this->transformationMatrix = transformationMatrixIn;
-	}
-	virtual void SetCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) {
-		this->currentDeformationField = CurrentDeformationFieldIn;
-	}
-	virtual void SetCurrentWarped(nifti_image *CurrentWarpedImageIn) {
-		this->currentWarped = CurrentWarpedImageIn;
-	}
-
-	virtual void SetCurrentReferenceMask(int *, size_t) {}
-	void SetCaptureRange(const int captureRangeIn);
-	//
-	virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
-		blockMatchingParams = bmp;
-	}
-
-	virtual bool IsCurrentComputationDoubleCapable();
+    // Setters
+    void SetCaptureRange(const int captureRangeIn);
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 
 protected:
-	nifti_image *currentReference;
-	nifti_image *currentFloating;
-	int *currentReferenceMask;
-
-	nifti_image *currentDeformationField;
-	nifti_image *currentWarped;
-
-	mat44 *transformationMatrix;
-	mat44 refMatrix_xyz;
-	mat44 floMatrix_ijk;
-	_reg_blockMatchingParam* blockMatchingParams;
-
-	//int floatingDatatype;
-	size_t bytes;
-	unsigned int currentPercentageOfBlockToUse;
-	unsigned int inlierLts;
-	int stepSizeBlock;
+    _reg_blockMatchingParam* blockMatchingParams;
+    unsigned int currentPercentageOfBlockToUse;
+    unsigned int inlierLts;
+    int stepSizeBlock;
 };
diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h
index b8271521..b78b05ab 100644
--- a/reg-lib/BlockMatchingKernel.h
+++ b/reg-lib/BlockMatchingKernel.h
@@ -2,14 +2,12 @@
 
 #include "Kernel.h"
 
-class BlockMatchingKernel : public Kernel {
+class BlockMatchingKernel: public Kernel {
 public:
     static std::string GetName() {
-        return "blockMatchingKernel";
+        return "BlockMatchingKernel";
     }
-    BlockMatchingKernel(std::string name) : Kernel(name) {
-
-    }
-    virtual ~BlockMatchingKernel(){}
+    BlockMatchingKernel() : Kernel() {}
+    virtual ~BlockMatchingKernel() {}
     virtual void Calculate() = 0;
 };
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 5b5505d9..7187ad7b 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -139,6 +139,8 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans")
 #-----------------------------------------------------------------------------
 ## BUILD THE ALADIN LIBRARY
 set(_reg_aladin_files
+  Content.cpp
+  Content.h
   AladinContent.cpp
   AladinContent.h
   Platform.cpp
@@ -196,16 +198,34 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin")
 #-----------------------------------------------------------------------------
 ## BUILD THE F3D LIBRARY
 set(_reg_f3d_files
-    _reg_base.h
-    _reg_base.cpp
-    _reg_f3d.h
-    _reg_f3d.cpp
-    _reg_f3d2.h
-    _reg_f3d2.cpp
-    _reg_f3d_sym.h
-    _reg_f3d_sym.cpp
+  Content.cpp
+  Content.h
+  Platform.cpp
+  Platform.h
+  Kernel.h
+  _reg_base.h
+  _reg_base.cpp
+  _reg_f3d.h
+  _reg_f3d.cpp
+  _reg_f3d2.h
+  _reg_f3d2.cpp
+  _reg_f3d_sym.h
+  _reg_f3d_sym.cpp
+  cpu/CpuAffineDeformationFieldKernel.h
+  cpu/CpuAffineDeformationFieldKernel.cpp
+  cpu/CpuBlockMatchingKernel.h
+  cpu/CpuBlockMatchingKernel.cpp
+  cpu/CpuConvolutionKernel.h
+  cpu/CpuConvolutionKernel.cpp
+  cpu/CpuOptimiseKernel.h
+  cpu/CpuOptimiseKernel.cpp
+  cpu/CpuResampleImageKernel.h
+  cpu/CpuResampleImageKernel.cpp
+  cpu/CpuKernelFactory.h
+  cpu/CpuKernelFactory.cpp
 )
 set(_reg_f3d_libraries
+  _reg_blockMatching
   _reg_localTrans
   _reg_globalTrans
   _reg_resampling
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
new file mode 100644
index 00000000..bf426b99
--- /dev/null
+++ b/reg-lib/Content.cpp
@@ -0,0 +1,89 @@
+#include "Content.h"
+#include "_reg_maths.h"
+
+/* *************************************************************** */
+Content::Content(nifti_image *currentReferenceIn,
+                 nifti_image *currentFloatingIn,
+                 int *currentReferenceMaskIn,
+                 mat44 *transformationMatrixIn,
+                 size_t bytesIn) :
+    currentReference(currentReferenceIn),
+    currentFloating(currentFloatingIn),
+    currentReferenceMask(currentReferenceMaskIn),
+    transformationMatrix(transformationMatrixIn) {
+    if (!currentReferenceIn || !currentFloatingIn) {
+        reg_print_fct_error("Content::Content()");
+        reg_print_msg_error("currentReferenceIn or currentFloatingIn can't be nullptr");
+        reg_exit();
+    }
+    AllocateWarpedImage();
+    AllocateDeformationField(bytesIn);
+    if (currentReferenceMask == nullptr)
+        currentReferenceMask = (int*)calloc(currentReference->nvox, sizeof(int));
+}
+/* *************************************************************** */
+Content::~Content() {
+    ClearWarpedImage();
+    ClearDeformationField();
+}
+/* *************************************************************** */
+void Content::AllocateWarpedImage() {
+    currentWarped = nifti_copy_nim_info(currentReference);
+    currentWarped->dim[0] = currentWarped->ndim = currentFloating->ndim;
+    currentWarped->dim[4] = currentWarped->nt = currentFloating->nt;
+    currentWarped->pixdim[4] = currentWarped->dt = 1.0;
+    currentWarped->nvox = (size_t)(currentWarped->nx * currentWarped->ny * currentWarped->nz * currentWarped->nt);
+    currentWarped->datatype = currentFloating->datatype;
+    currentWarped->nbyper = currentFloating->nbyper;
+    currentWarped->data = (void*)calloc(currentWarped->nvox, currentWarped->nbyper);
+}
+/* *************************************************************** */
+void Content::ClearWarpedImage() {
+    if (currentWarped)
+        nifti_image_free(currentWarped);
+    currentWarped = nullptr;
+}
+/* *************************************************************** */
+void Content::AllocateDeformationField(size_t bytes) {
+    currentDeformationField = nifti_copy_nim_info(currentReference);
+    currentDeformationField->dim[0] = currentDeformationField->ndim = 5;
+    if (currentReference->dim[0] == 2)
+        currentDeformationField->dim[3] = currentDeformationField->nz = 1;
+    currentDeformationField->dim[4] = currentDeformationField->nt = 1;
+    currentDeformationField->pixdim[4] = currentDeformationField->dt = 1;
+    if (currentReference->nz == 1)
+        currentDeformationField->dim[5] = currentDeformationField->nu = 2;
+    else
+        currentDeformationField->dim[5] = currentDeformationField->nu = 3;
+    currentDeformationField->pixdim[5] = currentDeformationField->du = 1;
+    currentDeformationField->dim[6] = currentDeformationField->nv = 1;
+    currentDeformationField->pixdim[6] = currentDeformationField->dv = 1;
+    currentDeformationField->dim[7] = currentDeformationField->nw = 1;
+    currentDeformationField->pixdim[7] = currentDeformationField->dw = 1;
+    currentDeformationField->nvox = (size_t)(currentDeformationField->nx * currentDeformationField->ny * currentDeformationField->nz *
+                                             currentDeformationField->nt * currentDeformationField->nu);
+    currentDeformationField->nbyper = (int)bytes;
+    if (bytes == 4)
+        currentDeformationField->datatype = NIFTI_TYPE_FLOAT32;
+    else if (bytes == 8)
+        currentDeformationField->datatype = NIFTI_TYPE_FLOAT64;
+    else {
+        reg_print_fct_error("Content::AllocateDeformationField()");
+        reg_print_msg_error("Only float or double are expected for the deformation field");
+        reg_exit();
+    }
+    currentDeformationField->intent_code = NIFTI_INTENT_VECTOR;
+    memset(currentDeformationField->intent_name, 0, sizeof(currentDeformationField->intent_name));
+    strcpy(currentDeformationField->intent_name, "NREG_TRANS");
+    currentDeformationField->intent_p1 = DEF_FIELD;
+    currentDeformationField->scl_slope = 1;
+    currentDeformationField->scl_inter = 0;
+    currentDeformationField->data = (void*)calloc(currentDeformationField->nvox, currentDeformationField->nbyper);
+}
+/* *************************************************************** */
+void Content::ClearDeformationField() {
+    if (currentDeformationField)
+        nifti_image_free(currentDeformationField);
+    currentDeformationField = nullptr;
+}
+/* *************************************************************** */
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
new file mode 100644
index 00000000..4530acd9
--- /dev/null
+++ b/reg-lib/Content.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include "nifti1_io.h"
+
+class Content {
+public:
+    Content() = delete; // Can't be initialised without reference and floating images
+    Content(nifti_image *currentReferenceIn,
+            nifti_image *currentFloatingIn,
+            int *currentReferenceMaskIn = nullptr,
+            mat44 *transformationMatrixIn = nullptr,
+            size_t bytesIn = sizeof(float));
+    virtual ~Content();
+
+    // Getters
+    virtual nifti_image* GetCurrentDeformationField() { return currentDeformationField; }
+    virtual nifti_image* GetCurrentReference() { return currentReference; }
+    virtual nifti_image* GetCurrentFloating() { return currentFloating; }
+    virtual nifti_image* GetCurrentWarped(int = 0) { return currentWarped; }
+    virtual int* GetCurrentReferenceMask() { return currentReferenceMask; }
+    virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
+
+    // Setters
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
+        transformationMatrix = transformationMatrixIn;
+    }
+    virtual void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
+        ClearDeformationField();
+        currentDeformationField = currentDeformationFieldIn;
+    }
+    virtual void SetCurrentWarped(nifti_image *currentWarpedImageIn) {
+        ClearWarpedImage();
+        currentWarped = currentWarpedImageIn;
+    }
+    virtual void SetCurrentReferenceMask(int *currentReferenceMaskIn) {
+        free(currentReferenceMask);
+        currentReferenceMask = currentReferenceMaskIn;
+    }
+
+    virtual bool IsCurrentComputationDoubleCapable() { return true; }
+
+    static mat44* GetXYZMatrix(nifti_image *image) {
+        return image->sform_code > 0 ? &image->sto_xyz : &image->qto_xyz;
+    }
+    static mat44* GetIJKMatrix(nifti_image *image) {
+        return image->sform_code > 0 ? &image->sto_ijk : &image->qto_ijk;
+    }
+
+protected:
+    virtual void AllocateWarpedImage();
+    virtual void ClearWarpedImage();
+    virtual void AllocateDeformationField(size_t bytes);
+    virtual void ClearDeformationField();
+
+    nifti_image *currentReference;
+    nifti_image *currentFloating;
+    int *currentReferenceMask;
+    nifti_image *currentDeformationField;
+    nifti_image *currentWarped;
+    mat44 *transformationMatrix;
+};
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index 5c7d113c..bc1be24b 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -3,13 +3,12 @@
 #include "Kernel.h"
 #include "nifti1_io.h"
 
-class ConvolutionKernel : public Kernel {
+class ConvolutionKernel: public Kernel {
 public:
     static std::string GetName() {
         return "ConvolutionKernel";
     }
-    ConvolutionKernel(std::string name) : Kernel(name) {
-    }
-    virtual ~ConvolutionKernel(){}
+    ConvolutionKernel() : Kernel() {}
+    virtual ~ConvolutionKernel() {}
     virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0;
 };
diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h
index e5b7b031..4d3a16f1 100755
--- a/reg-lib/Kernel.h
+++ b/reg-lib/Kernel.h
@@ -5,14 +5,11 @@
 
 class Kernel {
 public:
-    Kernel(std::string nameIn) { name = nameIn; }
+    Kernel() {}
     virtual ~Kernel() {}
 
     std::string GetName() const;
 
     template <class T>
     T* castTo() { return dynamic_cast<T*>(this); }
-
-private:
-    std::string name;
 };
diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h
index f7d99de2..c5348c9e 100755
--- a/reg-lib/KernelFactory.h
+++ b/reg-lib/KernelFactory.h
@@ -1,9 +1,10 @@
 #pragma once
 
-#include "AladinContent.h"
+#include "Kernel.h"
+#include "Content.h"
 
 class KernelFactory {
 public:
-    virtual Kernel* ProduceKernel(std::string name, AladinContent* con) const = 0;
+    virtual Kernel* ProduceKernel(std::string name, Content *con) const = 0;
     virtual ~KernelFactory() {}
 };
diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h
index d0066298..8c65d5de 100644
--- a/reg-lib/OptimiseKernel.h
+++ b/reg-lib/OptimiseKernel.h
@@ -2,13 +2,12 @@
 
 #include "Kernel.h"
 
-class OptimiseKernel : public Kernel{
+class OptimiseKernel: public Kernel {
 public:
     static std::string GetName() {
         return "OptimiseKernel";
     }
-    OptimiseKernel(std::string name) : Kernel(name) {
-    }
-    virtual ~OptimiseKernel(){}
+    OptimiseKernel() : Kernel() {}
+    virtual ~OptimiseKernel() {}
     virtual void Calculate(bool affine) = 0;
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 13aa6e64..ebc7bdcb 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -34,7 +34,7 @@ Platform::Platform(int platformCode) {
 #endif
 }
 /* *************************************************************** */
-Kernel* Platform::CreateKernel(const string& name, AladinContent *con) const {
+Kernel* Platform::CreateKernel(const string& name, Content *con) const {
     return this->factory->ProduceKernel(name, con);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 66ef2be1..ce75c9b3 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -10,14 +10,14 @@
 
 class Kernel;
 class KernelFactory;
-class AladinContent;
+class Content;
 
 class Platform {
 public:
     Platform(int platformCode);
     virtual ~Platform();
 
-    Kernel* CreateKernel(const std::string& name, AladinContent *con) const;
+    Kernel* CreateKernel(const std::string& name, Content *con) const;
     std::string GetName();
 
     int GetPlatformCode();
diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h
index 9ac7bfb5..16e3c133 100644
--- a/reg-lib/ResampleImageKernel.h
+++ b/reg-lib/ResampleImageKernel.h
@@ -3,15 +3,12 @@
 #include "Kernel.h"
 #include "nifti1_io.h"
 
-class ResampleImageKernel : public Kernel {
+class ResampleImageKernel: public Kernel {
 public:
     static std::string GetName() {
         return "ResampleImageKernel";
     }
-    ResampleImageKernel( std::string name) : Kernel(name) {
-    }
-
-    virtual ~ResampleImageKernel(){}
-
+    ResampleImageKernel() : Kernel() {}
+    virtual ~ResampleImageKernel() {}
     virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0;
 };
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 2ea21ec7..7b3599b4 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -486,25 +486,6 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::InitAladinContent(nifti_image *ref,
-                                      nifti_image *flo,
-                                      int *mask,
-                                      mat44 *transMat,
-                                      size_t bytes) {
-    if (this->platformCode == NR_PLATFORM_CPU)
-        this->con = new AladinContent(ref, flo, mask, transMat, bytes);
-#ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA)
-        this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes);
-#endif
-#ifdef _USE_OPENCL
-    else if (platformCode == NR_PLATFORM_CL)
-        this->con = new ClAladinContent(ref, flo, mask, transMat, bytes);
-#endif
-    this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
-}
-/* *************************************************************** */
-template<class T>
 void reg_aladin<T>::ClearAladinContent() {
     delete this->con;
 }
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 3485a303..9995303f 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -124,14 +124,9 @@ class reg_aladin {
                                    int *mask,
                                    mat44 *transMat,
                                    size_t bytes,
-                                   unsigned int blockPercentage,
-                                   unsigned int inlierLts,
-                                   unsigned int blockStepSize);
-    virtual void InitAladinContent(nifti_image *ref,
-                                   nifti_image *flo,
-                                   int *mask,
-                                   mat44 *transMat,
-                                   size_t bytes);
+                                   unsigned int blockPercentage = 0,
+                                   unsigned int inlierLts = 0,
+                                   unsigned int blockStepSize = 0);
     virtual void ClearAladinContent();
     virtual void CreateKernels();
     virtual void ClearKernels();
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 88b68d3b..32857cd9 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -257,32 +257,6 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type){
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
-                        nifti_image *flo,
-                        int *mask,
-                        mat44 *transMat,
-                        size_t bytes)
-{
-   reg_aladin<T>::InitAladinContent(ref,
-                               flo,
-                               mask,
-                               transMat,
-                               bytes);
-
-  if (this->platformCode == NR_PLATFORM_CPU)
-  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
-#ifdef _USE_CUDA
-  else if (this->platformCode == NR_PLATFORM_CUDA)
-  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
-#endif
-#ifdef _USE_OPENCL
-  else if (this->platformCode == NR_PLATFORM_CL)
-  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes);
-#endif
-  this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
-}
-/* *************************************************************** */
-template <class T>
 void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                         nifti_image *flo,
                         int *mask,
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index fc1d11b2..1ddfe436 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -22,19 +22,14 @@ class reg_aladin_sym : public reg_aladin<T>
   AladinContent *backCon;
   Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel;
 
-  virtual void InitAladinContent(nifti_image *ref,
-                                 nifti_image *flo,
-                                 int *mask,
-                                 mat44 *transMat,
-                                 size_t bytes);
   virtual void InitAladinContent(nifti_image *ref,
                                  nifti_image *flo,
                                  int *mask,
                                  mat44 *transMat,
                                  size_t bytes,
-                                 unsigned int blockPercentage,
-                                 unsigned int inlierLts,
-                                 unsigned int blockStepSize);
+                                 unsigned int blockPercentage = 0,
+                                 unsigned int inlierLts = 0,
+                                 unsigned int blockStepSize = 0);
   virtual void ClearAladinContent();
   virtual void CreateKernels();
   virtual void ClearKernels();
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index 20449a55..b71f1f04 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -1,12 +1,11 @@
 #include "ClAffineDeformationFieldKernel.h"
 #include "config.h"
-
 #include "_reg_tools.h"
 
-ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
-    AffineDeformationFieldKernel(nameIn) {
+/* *************************************************************** */
+ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() {
     //populate the CLAladinContent object ptr
-    con = static_cast<ClAladinContent*>(conIn);
+    ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //path to kernel files
     const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
@@ -15,19 +14,17 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co
     std::string clInstallPath;
     std::string clSrcPath;
     //src dir
-    if (niftyreg_src_dir != nullptr){
+    if (niftyreg_src_dir != nullptr) {
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
         clSrcPath = opencl_kernel_path;
-    }
-    else clSrcPath = CL_KERNELS_SRC_PATH;
+    } else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
-    if(niftyreg_install_dir!=nullptr){
+    if (niftyreg_install_dir != nullptr) {
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
         clInstallPath = opencl_kernel_path;
-    }
-    else clInstallPath = CL_KERNELS_PATH;
+    } else clInstallPath = CL_KERNELS_PATH;
 
     std::string clKernel("affineDeformationKernel.cl");
 
@@ -35,7 +32,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co
     std::string clKernelPath = (clInstallPath + clKernel);
     std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in);
     if (kernelFile.is_open() == 0) {
-        //"affineDeformationKernel.cl propbably not installed - let's use the src location"
+        //"affineDeformationKernel.cl probably not installed - let's use the src location"
         clKernelPath = (clSrcPath + clKernel);
     }
 
@@ -46,13 +43,13 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co
     program = sContext->CreateProgram(clKernelPath.c_str());
 
     //get cpu ptrs
-    this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
-    this->affineTransformation = con->AladinContent::GetTransformationMatrix();
-    this->ReferenceMatrix = (this->deformationFieldImage->sform_code > 0) ? &(this->deformationFieldImage->sto_xyz) : &(this->deformationFieldImage->qto_xyz);
+    deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
+    affineTransformation = con->AladinContent::GetTransformationMatrix();
+    referenceMatrix = AladinContent::GetXYZMatrix(deformationFieldImage);
 
     cl_int errNum;
     // Create OpenCL kernel
-    if(this->deformationFieldImage->nz>1)
+    if (deformationFieldImage->nz > 1)
         kernel = clCreateKernel(program, "affineKernel3D", &errNum);
     else kernel = clCreateKernel(program, "affineKernel2D", &errNum);
     sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel.");
@@ -62,7 +59,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co
     clMask = con->GetMaskClmem();
 
     //set some final kernel args
-    errNum = clSetKernelArg(this->kernel, 2, sizeof(cl_mem), &this->clMask);
+    errNum = clSetKernelArg(kernel, 2, sizeof(cl_mem), &clMask);
     sContext->checkErrNum(errNum, "Error setting clMask.");
 
 }
@@ -74,7 +71,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     std::size_t paramValueSize;
     errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
     sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
-    cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
+    cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize);
     errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
     sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
     maxWG = *info;
@@ -84,51 +81,51 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     unsigned int yThreads = 8;
     unsigned int zThreads = 8;
 
-    while(xThreads*yThreads*zThreads > maxWG) {
-        xThreads = xThreads/2;
-        yThreads = yThreads/2;
-        zThreads = zThreads/2;
+    while (xThreads * yThreads * zThreads > maxWG) {
+        xThreads = xThreads / 2;
+        yThreads = yThreads / 2;
+        zThreads = zThreads / 2;
     }
 
-    const unsigned int xBlocks = ((this->deformationFieldImage->nx % xThreads) == 0) ?
-                (this->deformationFieldImage->nx / xThreads) : (this->deformationFieldImage->nx / xThreads) + 1;
-    const unsigned int yBlocks = ((this->deformationFieldImage->ny % yThreads) == 0) ?
-                (this->deformationFieldImage->ny / yThreads) : (this->deformationFieldImage->ny / yThreads) + 1;
-    const unsigned int zBlocks = ((this->deformationFieldImage->nz % zThreads) == 0) ?
-                (this->deformationFieldImage->nz / zThreads) : (this->deformationFieldImage->nz / zThreads) + 1;
-    //const cl_uint dims = this->deformationFieldImage->nz>1?3:2;
+    const unsigned int xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ?
+        (deformationFieldImage->nx / xThreads) : (deformationFieldImage->nx / xThreads) + 1;
+    const unsigned int yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ?
+        (deformationFieldImage->ny / yThreads) : (deformationFieldImage->ny / yThreads) + 1;
+    const unsigned int zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ?
+        (deformationFieldImage->nz / zThreads) : (deformationFieldImage->nz / zThreads) + 1;
+    //const cl_uint dims = deformationFieldImage->nz>1?3:2;
     //Back to the old version... at least I could compile
     const cl_uint dims = 3;
-    const size_t globalWorkSize[dims] = { xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads };
-    const size_t localWorkSize[dims] = { xThreads, yThreads, zThreads };
+    const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads};
+    const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads};
 
     mat44 transformationMatrix = (compose == true) ?
-                *this->affineTransformation : reg_mat44_mul(this->affineTransformation, ReferenceMatrix);
+        *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix);
 
-    float* trans = (float *) malloc(16 * sizeof(float));
+    float* trans = (float *)malloc(16 * sizeof(float));
     mat44ToCptr(transformationMatrix, trans);
 
-    cl_uint3 pms_d = {{ (cl_uint)this->deformationFieldImage->nx,
-                        (cl_uint)this->deformationFieldImage->ny,
-                        (cl_uint)this->deformationFieldImage->nz,
-                        (cl_uint)0 }};
+    cl_uint3 pms_d = {{(cl_uint)deformationFieldImage->nx,
+        (cl_uint)deformationFieldImage->ny,
+        (cl_uint)deformationFieldImage->nz,
+        (cl_uint)0}};
 
-    cl_mem cltransMat = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+    cl_mem cltransMat = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                        sizeof(float) * 16, trans, &errNum);
-    this->sContext->checkErrNum(errNum,
-                                "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): ");
+    sContext->checkErrNum(errNum,
+                          "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): ");
 
     cl_uint composition = compose;
-    errNum = clSetKernelArg(this->kernel, 0, sizeof(cl_mem), &cltransMat);
+    errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cltransMat);
     sContext->checkErrNum(errNum, "Error setting cltransMat.");
-    errNum |= clSetKernelArg(this->kernel, 1, sizeof(cl_mem), &this->clDeformationField);
+    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &clDeformationField);
     sContext->checkErrNum(errNum, "Error setting clDeformationField.");
-    errNum |= clSetKernelArg(this->kernel, 3, sizeof(cl_uint3), &pms_d);
+    errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint3), &pms_d);
     sContext->checkErrNum(errNum, "Error setting kernel arguments.");
-    errNum |= clSetKernelArg(this->kernel, 4, sizeof(cl_uint), &composition);
+    errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &composition);
     sContext->checkErrNum(errNum, "Error setting kernel arguments.");
 
-    errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
+    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
     sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution");
     clFinish(commandQueue);
 
@@ -143,3 +140,4 @@ ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() {
     if (program != 0)
         clReleaseProgram(program);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h
index c4897caa..fb2c408d 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.h
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h
@@ -3,20 +3,19 @@
 #include "AffineDeformationFieldKernel.h"
 #include "CLAladinContent.h"
 
-class ClAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
-    public:
-       ClAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn);
-       ~ClAffineDeformationFieldKernel();
+class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
+public:
+    ClAffineDeformationFieldKernel(Content *conIn);
+    ~ClAffineDeformationFieldKernel();
+    void Calculate(bool compose = false);
 
-       void Calculate(bool compose = false);
-    private:
-       mat44 *affineTransformation, *ReferenceMatrix;
-       nifti_image *deformationFieldImage;
-       ClAladinContent *con;
-       cl_command_queue commandQueue;
-       cl_kernel kernel;
-       cl_context clContext;
-       cl_program program;
-       cl_mem clDeformationField, clMask;
-       ClContextSingleton *sContext;
+private:
+    mat44 *affineTransformation, *referenceMatrix;
+    nifti_image *deformationFieldImage;
+    cl_command_queue commandQueue;
+    cl_kernel kernel;
+    cl_context clContext;
+    cl_program program;
+    cl_mem clDeformationField, clMask;
+    ClContextSingleton *sContext;
 };
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index efa2c127..1788160c 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -1,71 +1,23 @@
 #include "CLAladinContent.h"
 #include "_reg_tools.h"
 
-/* *************************************************************** */
-ClAladinContent::ClAladinContent() {
-    InitVars();
-    AllocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
-                                 nifti_image *currentFloatingIn,
-                                 int *currentReferenceMaskIn,
-                                 size_t byte,
-                                 const unsigned int blockPercentage,
-                                 const unsigned int inlierLts,
-                                 int blockStep) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  byte, blockPercentage,
-                  inlierLts,
-                  blockStep) {
-    InitVars();
-    AllocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
-                                 nifti_image *currentFloatingIn,
-                                 int *currentReferenceMaskIn,
-                                 size_t byte) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  byte) {
-    InitVars();
-    AllocateClPtrs();
-}
 /* *************************************************************** */
 ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
                                  nifti_image *currentFloatingIn,
                                  int *currentReferenceMaskIn,
-                                 mat44 *transMat,
-                                 size_t byte,
-                                 const unsigned int blockPercentage,
+                                 mat44 *transformationMatrixIn,
+                                 size_t bytesIn,
+                                 const unsigned int percentageOfBlocks,
                                  const unsigned int inlierLts,
-                                 int blockStep) :
+                                 int blockStepSize) :
     AladinContent(currentReferenceIn,
                   currentFloatingIn,
                   currentReferenceMaskIn,
-                  transMat,
-                  byte,
-                  blockPercentage,
+                  transformationMatrixIn,
+                  bytesIn,
+                  percentageOfBlocks,
                   inlierLts,
-                  blockStep) {
-    InitVars();
-    AllocateClPtrs();
-}
-/* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
-                                 nifti_image *currentFloatingIn,
-                                 int *currentReferenceMaskIn,
-                                 mat44 *transMat,
-                                 size_t byte) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  transMat,
-                  byte) {
+                  blockStepSize) {
     InitVars();
     AllocateClPtrs();
 }
@@ -75,112 +27,107 @@ ClAladinContent::~ClAladinContent() {
 }
 /* *************************************************************** */
 void ClAladinContent::InitVars() {
-    this->referenceImageClmem = 0;
-    this->floatingImageClmem = 0;
-    this->warpedImageClmem = 0;
-    this->deformationFieldClmem = 0;
-    this->referencePositionClmem = 0;
-    this->warpedPositionClmem = 0;
-    this->totalBlockClmem = 0;
-    this->maskClmem = 0;
+    referenceImageClmem = nullptr;
+    floatingImageClmem = nullptr;
+    warpedImageClmem = nullptr;
+    deformationFieldClmem = nullptr;
+    referencePositionClmem = nullptr;
+    warpedPositionClmem = nullptr;
+    totalBlockClmem = nullptr;
+    maskClmem = nullptr;
 
-    if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(this->currentReference);
-    if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(this->currentFloating);
-        if (this->currentWarped != nullptr)
-            reg_tools_changeDatatype<float>(this->currentWarped);
+    if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(currentReference);
+    if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(currentFloating);
+        if (currentWarped != nullptr)
+            reg_tools_changeDatatype<float>(currentWarped);
     }
-    this->sContext = &ClContextSingleton::Instance();
-    this->clContext = this->sContext->GetContext();
-    this->commandQueue = this->sContext->GetCommandQueue();
-    //this->numBlocks = (this->blockMatchingParams != nullptr) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0;
+    sContext = &ClContextSingleton::Instance();
+    clContext = sContext->GetContext();
+    commandQueue = sContext->GetCommandQueue();
+    //numBlocks = (blockMatchingParams != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
 }
 /* *************************************************************** */
 void ClAladinContent::AllocateClPtrs() {
-
-    if (this->currentWarped != nullptr) {
-        this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
+    if (currentWarped != nullptr) {
+        warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
     }
-    if (this->currentDeformationField != nullptr) {
-        this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentDeformationField->nvox, this->currentDeformationField->data, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
+    if (currentDeformationField != nullptr) {
+        deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentDeformationField->nvox, currentDeformationField->data, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
     }
-    if (this->currentFloating != nullptr) {
-        this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentFloating->nvox, this->currentFloating->data, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): ");
+    if (currentFloating != nullptr) {
+        floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentFloating->nvox, currentFloating->data, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): ");
 
-        float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float));
-        mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
-        this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                           16 * sizeof(float),
-                                           sourceIJKMatrix_h, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
+        float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
+        mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h);
+        floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum);
+        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
         free(sourceIJKMatrix_h);
     }
-    if (this->currentReference != nullptr) {
-        this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                                   sizeof(float) * this->currentReference->nvox,
-                                                   this->currentReference->data, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
+    if (currentReference != nullptr) {
+        referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                             sizeof(float) * currentReference->nvox,
+                                             currentReference->data, &errNum);
+        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
 
-        float* targetMat = (float *)malloc(16 * sizeof(float)); //freed
-        mat44ToCptr(this->refMatrix_xyz, targetMat);
-        this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                           16 * sizeof(float),
-                                           targetMat, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
+        float* targetMat = (float *)malloc(sizeof(mat44)); //freed
+        mat44ToCptr(*GetXYZMatrix(currentReference), targetMat);
+        refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum);
+        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
         free(targetMat);
     }
-    if (this->blockMatchingParams != nullptr) {
-        if (this->blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams != nullptr) {
+        if (blockMatchingParams->referencePosition != nullptr) {
             //targetPositionClmem
-            this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                                                          this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
-                                                          this->blockMatchingParams->referencePosition, &this->errNum);
-            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
+            referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                                    blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
+                                                    blockMatchingParams->referencePosition, &errNum);
+            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
         }
-        if (this->blockMatchingParams->warpedPosition != nullptr) {
+        if (blockMatchingParams->warpedPosition != nullptr) {
             //resultPositionClmem
-            this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                                                       this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float),
-                                                       this->blockMatchingParams->warpedPosition, &this->errNum);
-            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
+            warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                                 blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
+                                                 blockMatchingParams->warpedPosition, &errNum);
+            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
         }
-        if (this->blockMatchingParams->totalBlock != nullptr) {
+        if (blockMatchingParams->totalBlock != nullptr) {
             //totalBlockClmem
-            this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                                   this->blockMatchingParams->totalBlockNumber * sizeof(int),
-                                                   this->blockMatchingParams->totalBlock, &this->errNum);
-            this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
+            totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                             blockMatchingParams->totalBlockNumber * sizeof(int),
+                                             blockMatchingParams->totalBlock, &errNum);
+            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
         }
     }
-    if (this->currentReferenceMask != nullptr && this->currentReference != nullptr) {
-        this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                         this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * sizeof(int),
-                                         this->currentReferenceMask, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
+    if (currentReferenceMask != nullptr && currentReference != nullptr) {
+        maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                   currentReference->nx * currentReference->ny * currentReference->nz * sizeof(int),
+                                   currentReferenceMask, &errNum);
+        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
 /* *************************************************************** */
 nifti_image* ClAladinContent::GetCurrentWarped(int datatype) {
-    DownloadImage(this->currentWarped, this->warpedImageClmem, datatype);
-    return this->currentWarped;
+    DownloadImage(currentWarped, warpedImageClmem, datatype);
+    return currentWarped;
 }
 /* *************************************************************** */
 nifti_image* ClAladinContent::GetCurrentDeformationField() {
-    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT
-    this->sContext->checkErrNum(errNum, "Get: failed currentDeformationField: ");
-    return this->currentDeformationField;
+    errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT
+    sContext->checkErrNum(errNum, "Get: failed currentDeformationField: ");
+    return currentDeformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() {
-    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT
-    this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: ");
-    this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT
-    this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: ");
-    return this->blockMatchingParams;
+    errNum = clEnqueueReadBuffer(commandQueue, warpedPositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT
+    sContext->checkErrNum(errNum, "CLContext: failed result position: ");
+    errNum = clEnqueueReadBuffer(commandQueue, referencePositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT
+    sContext->checkErrNum(errNum, "CLContext: failed target position: ");
+    return blockMatchingParams;
 }
 /* *************************************************************** */
 void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
@@ -188,103 +135,103 @@ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
 }
 /* *************************************************************** */
 void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
-    if (this->currentDeformationField != nullptr)
-        clReleaseMemObject(this->deformationFieldClmem);
+    if (currentDeformationField != nullptr)
+        clReleaseMemObject(deformationFieldClmem);
 
     AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
-    this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, &this->errNum);
-    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): ");
+    deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): ");
 }
 /* *************************************************************** */
-void ClAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) {
-    if (this->currentReferenceMask != nullptr)
+void ClAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) {
+    if (currentReferenceMask != nullptr)
         clReleaseMemObject(maskClmem);
-    this->currentReferenceMask = maskIn;
-    this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->currentReferenceMask, &this->errNum);
-    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): ");
+    AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn);
+    maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, currentReference->nvox * sizeof(int), currentReferenceMask, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
-    if (this->currentWarped != nullptr) {
-        clReleaseMemObject(this->warpedImageClmem);
+    if (currentWarped != nullptr) {
+        clReleaseMemObject(warpedImageClmem);
     }
     if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) {
         reg_tools_changeDatatype<float>(currentWarped);
     }
     AladinContent::SetCurrentWarped(currentWarped);
-    this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum);
-    this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): ");
+    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
 
     AladinContent::SetBlockMatchingParams(bmp);
-    if (this->blockMatchingParams->referencePosition != nullptr) {
-        clReleaseMemObject(this->referencePositionClmem);
+    if (blockMatchingParams->referencePosition != nullptr) {
+        clReleaseMemObject(referencePositionClmem);
         //referencePositionClmem
-        this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
+        referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
     }
-    if (this->blockMatchingParams->warpedPosition != nullptr) {
-        clReleaseMemObject(this->warpedPositionClmem);
+    if (blockMatchingParams->warpedPosition != nullptr) {
+        clReleaseMemObject(warpedPositionClmem);
         //warpedPositionClmem
-        this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
+        warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
     }
-    if (this->blockMatchingParams->totalBlock != nullptr) {
-        clReleaseMemObject(this->totalBlockClmem);
+    if (blockMatchingParams->totalBlock != nullptr) {
+        clReleaseMemObject(totalBlockClmem);
         //totalBlockClmem
-        this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum);
-        this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
+        totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
     }
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetReferenceImageArrayClmem() {
-    return this->referenceImageClmem;
+    return referenceImageClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetFloatingImageArrayClmem() {
-    return this->floatingImageClmem;
+    return floatingImageClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetWarpedImageClmem() {
-    return this->warpedImageClmem;
+    return warpedImageClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetReferencePositionClmem() {
-    return this->referencePositionClmem;
+    return referencePositionClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetWarpedPositionClmem() {
-    return this->warpedPositionClmem;
+    return warpedPositionClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetDeformationFieldArrayClmem() {
-    return this->deformationFieldClmem;
+    return deformationFieldClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetTotalBlockClmem() {
-    return this->totalBlockClmem;
+    return totalBlockClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetMaskClmem() {
-    return this->maskClmem;
+    return maskClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetRefMatClmem() {
-    return this->refMatClmem;
+    return refMatClmem;
 }
 /* *************************************************************** */
 cl_mem ClAladinContent::GetFloMatClmem() {
-    return this->floMatClmem;
+    return floMatClmem;
 }
 /* *************************************************************** */
 int *ClAladinContent::GetReferenceDims() {
-    return this->referenceDims;
+    return referenceDims;
 }
 /* *************************************************************** */
 int *ClAladinContent::GetFloatingDims() {
-    return this->floatingDims;
+    return floatingDims;
 }
 /* *************************************************************** */
 template<class DataType>
@@ -335,9 +282,9 @@ void ClAladinContent::FillImageData(nifti_image *image,
         reg_exit();
     }
 
-    this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0,
-                                       size * sizeof(float), buffer, 0, nullptr, nullptr);
-    this->sContext->checkErrNum(this->errNum, "Error reading warped buffer.");
+    errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0,
+                                 size * sizeof(float), buffer, 0, nullptr, nullptr);
+    sContext->checkErrNum(errNum, "Error reading warped buffer.");
 
     free(image->data);
     image->datatype = type;
@@ -386,28 +333,28 @@ void ClAladinContent::DownloadImage(nifti_image *image,
 }
 /* *************************************************************** */
 void ClAladinContent::FreeClPtrs() {
-    if (this->currentReference != nullptr) {
-        clReleaseMemObject(this->referenceImageClmem);
-        clReleaseMemObject(this->refMatClmem);
+    if (currentReference != nullptr) {
+        clReleaseMemObject(referenceImageClmem);
+        clReleaseMemObject(refMatClmem);
     }
-    if (this->currentFloating != nullptr) {
-        clReleaseMemObject(this->floatingImageClmem);
-        clReleaseMemObject(this->floMatClmem);
+    if (currentFloating != nullptr) {
+        clReleaseMemObject(floatingImageClmem);
+        clReleaseMemObject(floMatClmem);
     }
-    if (this->currentWarped != nullptr)
-        clReleaseMemObject(this->warpedImageClmem);
-    if (this->currentDeformationField != nullptr)
-        clReleaseMemObject(this->deformationFieldClmem);
-    if (this->currentReferenceMask != nullptr)
-        clReleaseMemObject(this->maskClmem);
-    if (this->blockMatchingParams != nullptr) {
-        clReleaseMemObject(this->totalBlockClmem);
-        clReleaseMemObject(this->referencePositionClmem);
-        clReleaseMemObject(this->warpedPositionClmem);
+    if (currentWarped != nullptr)
+        clReleaseMemObject(warpedImageClmem);
+    if (currentDeformationField != nullptr)
+        clReleaseMemObject(deformationFieldClmem);
+    if (currentReferenceMask != nullptr)
+        clReleaseMemObject(maskClmem);
+    if (blockMatchingParams != nullptr) {
+        clReleaseMemObject(totalBlockClmem);
+        clReleaseMemObject(referencePositionClmem);
+        clReleaseMemObject(warpedPositionClmem);
     }
 }
 /* *************************************************************** */
 bool ClAladinContent::IsCurrentComputationDoubleCapable() {
-    return this->sContext->GetIsCardDoubleCapable();
+    return sContext->GetIsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 00a94545..b4650549 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -12,36 +12,19 @@
 class ClAladinContent: public AladinContent {
 public:
     //constructors
-    ClAladinContent();
     ClAladinContent(nifti_image *currentReferenceIn,
                     nifti_image *currentFloatingIn,
-                    int *currentReferenceMaskIn,
-                    size_t byte,
-                    const unsigned int blockPercentage,
-                    const unsigned int inlierLts,
-                    int blockStep);
-    ClAladinContent(nifti_image *currentReferenceIn,
-                    nifti_image *currentFloatingIn,
-                    int *currentReferenceMaskIn,
-                    size_t byte);
-    ClAladinContent(nifti_image *currentReferenceIn,
-                    nifti_image *currentFloatingIn,
-                    int *currentReferenceMaskIn,
-                    mat44 *transMat,
-                    size_t byte,
-                    const unsigned int blockPercentage,
-                    const unsigned int inlierLts,
-                    int blockStep);
-    ClAladinContent(nifti_image *currentReferenceIn,
-                    nifti_image *currentFloatingIn,
-                    int *currentReferenceMaskIn,
-                    mat44 *transMat,
-                    size_t byte);
+                    int *currentReferenceMaskIn = nullptr,
+                    mat44 *transformationMatrixIn = nullptr,
+                    size_t bytesIn = sizeof(float),
+                    const unsigned int percentageOfBlocks = 0,
+                    const unsigned int inlierLts = 0,
+                    int blockStepSize = 0);
     ~ClAladinContent();
 
-    bool IsCurrentComputationDoubleCapable();
+    bool IsCurrentComputationDoubleCapable() override;
 
-    //opencl getters
+    // OpenCL getters
     cl_mem GetReferenceImageArrayClmem();
     cl_mem GetFloatingImageArrayClmem();
     cl_mem GetWarpedImageClmem();
@@ -55,18 +38,17 @@ class ClAladinContent: public AladinContent {
     int* GetReferenceDims();
     int* GetFloatingDims();
 
-    //cpu getters with data downloaded from device
-    _reg_blockMatchingParam* GetBlockMatchingParams();
-    nifti_image* GetCurrentDeformationField();
-    nifti_image* GetCurrentWarped(int typ);
-
-    //setters
-    void SetTransformationMatrix(mat44 *transformationMatrixIn);
-    void SetCurrentWarped(nifti_image *warpedImageIn);
-    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn);
-    void SetCurrentReferenceMask(int *maskIn, size_t size);
-    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp);
+    // CPU getters with data downloaded from device
+    _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    nifti_image* GetCurrentDeformationField() override;
+    nifti_image* GetCurrentWarped(int typ) override;
 
+    // Setters
+    void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    void SetCurrentWarped(nifti_image *warpedImageIn) override;
+    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override;
+    void SetCurrentReferenceMask(int *currentReferenceMaskIn) override;
+    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
     void InitVars();
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index 157b6214..a26e3c70 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -2,132 +2,128 @@
 #include "config.h"
 #include <fstream>
 
-ClBlockMatchingKernel::ClBlockMatchingKernel(AladinContent *conIn, std::string name) :
-   BlockMatchingKernel(name) {
+/* *************************************************************** */
+ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() {
    //populate the CLAladinContent object ptr
-   this->con = static_cast<ClAladinContent*>(conIn);
+   ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
    //path to kernel file
-   const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
-   const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
+   const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
+   const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
 
    std::string clInstallPath;
    std::string clSrcPath;
    //src dir
-   if (niftyreg_src_dir != nullptr){
+   if (niftyreg_src_dir != nullptr) {
       char opencl_kernel_path[255];
       sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
       clSrcPath = opencl_kernel_path;
-   }
-   else clSrcPath = CL_KERNELS_SRC_PATH;
+   } else clSrcPath = CL_KERNELS_SRC_PATH;
    //install dir
-   if(niftyreg_install_dir!=nullptr){
+   if (niftyreg_install_dir != nullptr) {
       char opencl_kernel_path[255];
       sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
       clInstallPath = opencl_kernel_path;
-   }
-   else clInstallPath = CL_KERNELS_PATH;
+   } else clInstallPath = CL_KERNELS_PATH;
    std::string clKernel("blockMatchingKernel.cl");
    //Let's check if we did an install
    std::string clKernelPath = (clInstallPath + clKernel);
    std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in);
    if (kernelFile.is_open() == 0) {
-      //"clKernel.cl propbably not installed - let's use the src location"
+      //"clKernel.cl probably not installed - let's use the src location"
       clKernelPath = (clSrcPath + clKernel);
    }
 
    //get opencl context params
-   this->sContext = &ClContextSingleton::Instance();
-   this->clContext = this->sContext->GetContext();
-   this->commandQueue = this->sContext->GetCommandQueue();
-   this->program = this->sContext->CreateProgram(clKernelPath.c_str());
+   sContext = &ClContextSingleton::Instance();
+   clContext = sContext->GetContext();
+   commandQueue = sContext->GetCommandQueue();
+   program = sContext->CreateProgram(clKernelPath.c_str());
 
    // Create OpenCL kernel
    cl_int errNum;
-   if (this->con->GetBlockMatchingParams()->dim == 3) {
-      this->kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum);
+   if (con->GetBlockMatchingParams()->dim == 3) {
+      kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum);
+   } else {
+      kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum);
    }
-   else {
-      this->kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum);
-   }
-   this->sContext->checkErrNum(errNum, "Error setting bm kernel.");
+   sContext->checkErrNum(errNum, "Error setting bm kernel.");
 
    //get cl ptrs
-   this->clTotalBlock = this->con->GetTotalBlockClmem();
-   this->clReferenceImageArray = this->con->GetReferenceImageArrayClmem();
-   this->clWarpedImageArray = this->con->GetWarpedImageClmem();
-   this->clWarpedPosition = this->con->GetWarpedPositionClmem();
-   this->clReferencePosition = this->con->GetReferencePositionClmem();
-   this->clMask = this->con->GetMaskClmem();
-   this->clReferenceMat = this->con->GetRefMatClmem();
+   clTotalBlock = con->GetTotalBlockClmem();
+   clReferenceImageArray = con->GetReferenceImageArrayClmem();
+   clWarpedImageArray = con->GetWarpedImageClmem();
+   clWarpedPosition = con->GetWarpedPositionClmem();
+   clReferencePosition = con->GetReferencePositionClmem();
+   clMask = con->GetMaskClmem();
+   clReferenceMat = con->GetRefMatClmem();
 
    //get cpu ptrs
-   this->reference = this->con->AladinContent::GetCurrentReference();
-   this->params = this->con->AladinContent::GetBlockMatchingParams();
+   reference = con->AladinContent::GetCurrentReference();
+   params = con->AladinContent::GetBlockMatchingParams();
 
 }
 /* *************************************************************** */
-void ClBlockMatchingKernel::Calculate()
-{
-   if (this->params->stepSize!=1 || this->params->voxelCaptureRange!=3){
+void ClBlockMatchingKernel::Calculate() {
+   if (params->stepSize != 1 || params->voxelCaptureRange != 3) {
       reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1");
       reg_exit();
    }
    cl_int errNum;
-   this->params->definedActiveBlockNumber = 0;
-   cl_mem cldefinedBlock = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                                          sizeof(int), &(this->params->definedActiveBlockNumber), &errNum);
-   this->sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
+   params->definedActiveBlockNumber = 0;
+   cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                          sizeof(int), &(params->definedActiveBlockNumber), &errNum);
+   sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
 
-   const cl_uint4 imageSize ={{(cl_uint)this->reference->nx,
-                               (cl_uint)this->reference->ny,
-                               (cl_uint)this->reference->nz,
-                               (cl_uint)0}};
+   const cl_uint4 imageSize = {{(cl_uint)reference->nx,
+      (cl_uint)reference->ny,
+      (cl_uint)reference->nz,
+      (cl_uint)0}};
 
-   size_t globalWorkSize[3] = { (size_t)params->blockNumber[0] * 4,
-                                (size_t)params->blockNumber[1] * 4,
-                                (size_t)params->blockNumber[2] * 4};
+   size_t globalWorkSize[3] = {(size_t)params->blockNumber[0] * 4,
+      (size_t)params->blockNumber[1] * 4,
+      (size_t)params->blockNumber[2] * 4};
    size_t localWorkSize[3] = {4, 4, 4};
    unsigned int sMemSize = 1728; // (3*4)^3
-   if(this->reference->nz==1){
+   if (reference->nz == 1) {
       globalWorkSize[2] = 1;
       localWorkSize[2] = 1;
       sMemSize = 144; // (3*4)^2
    }
 
    errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr);
-   this->sContext->checkErrNum(errNum, "Error setting shared memory.");
-   errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clWarpedImageArray);
-   this->sContext->checkErrNum(errNum, "Error setting resultImageArray.");
-   errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clReferenceImageArray);
-   this->sContext->checkErrNum(errNum, "Error setting targetImageArray.");
-   errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clWarpedPosition);
-   this->sContext->checkErrNum(errNum, "Error setting resultPosition.");
-   errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &this->clReferencePosition);
-   this->sContext->checkErrNum(errNum, "Error setting targetPosition.");
-   errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &this->clTotalBlock);
-   this->sContext->checkErrNum(errNum, "Error setting mask.");
-   errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &this->clMask);
-   this->sContext->checkErrNum(errNum, "Error setting mask.");
-   errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &this->clReferenceMat);
-   this->sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz.");
+   sContext->checkErrNum(errNum, "Error setting shared memory.");
+   errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clWarpedImageArray);
+   sContext->checkErrNum(errNum, "Error setting resultImageArray.");
+   errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &clReferenceImageArray);
+   sContext->checkErrNum(errNum, "Error setting targetImageArray.");
+   errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &clWarpedPosition);
+   sContext->checkErrNum(errNum, "Error setting resultPosition.");
+   errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &clReferencePosition);
+   sContext->checkErrNum(errNum, "Error setting targetPosition.");
+   errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &clTotalBlock);
+   sContext->checkErrNum(errNum, "Error setting mask.");
+   errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &clMask);
+   sContext->checkErrNum(errNum, "Error setting mask.");
+   errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &clReferenceMat);
+   sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz.");
    errNum |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &cldefinedBlock);
-   this->sContext->checkErrNum(errNum, "Error setting cldefinedBlock.");
+   sContext->checkErrNum(errNum, "Error setting cldefinedBlock.");
    errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize);
-   this->sContext->checkErrNum(errNum, "Error setting image size.");
+   sContext->checkErrNum(errNum, "Error setting image size.");
 
-   errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, nullptr,
+   errNum = clEnqueueNDRangeKernel(commandQueue, kernel, params->dim, nullptr,
                                    globalWorkSize, localWorkSize, 0, nullptr, nullptr);
-   this->sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution ");
+   sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution ");
 
-   errNum = clFinish(this->commandQueue);
-   this->sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel");
+   errNum = clFinish(commandQueue);
+   sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel");
 
-   errNum = clEnqueueReadBuffer(this->commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int),
-                                &(this->params->definedActiveBlockNumber), 0, nullptr, nullptr);
+   errNum = clEnqueueReadBuffer(commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int),
+                                &(params->definedActiveBlockNumber), 0, nullptr, nullptr);
    sContext->checkErrNum(errNum, "Error reading  var after ClBlockMatchingKernel execution ");
 
-   if(this->params->definedActiveBlockNumber == 0) {
+   if (params->definedActiveBlockNumber == 0) {
       reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution");
       reg_exit();
    }
@@ -140,3 +136,4 @@ ClBlockMatchingKernel::~ClBlockMatchingKernel() {
    if (program != 0)
       clReleaseProgram(program);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h
index 5474c578..9a01ea61 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.h
+++ b/reg-lib/cl/ClBlockMatchingKernel.h
@@ -3,26 +3,25 @@
 #include "BlockMatchingKernel.h"
 #include "CLAladinContent.h"
 
-class ClBlockMatchingKernel : public BlockMatchingKernel {
-    public:
-       ClBlockMatchingKernel(AladinContent * conIn, std::string name);
-       ~ClBlockMatchingKernel();
-       void Calculate();
+class ClBlockMatchingKernel: public BlockMatchingKernel {
+public:
+    ClBlockMatchingKernel(Content *conIn);
+    ~ClBlockMatchingKernel();
+    void Calculate();
 
-    private:
-       ClContextSingleton *sContext;
-       ClAladinContent *con;
-       nifti_image *reference;
-       _reg_blockMatchingParam *params;
-       cl_kernel kernel;
-       cl_context clContext;
-       cl_program program;
-       cl_command_queue commandQueue;
-       cl_mem clTotalBlock;
-       cl_mem clReferenceImageArray;
-       cl_mem clWarpedImageArray;
-       cl_mem clReferencePosition;
-       cl_mem clWarpedPosition;
-       cl_mem clMask;
-       cl_mem clReferenceMat;
+private:
+    ClContextSingleton *sContext;
+    nifti_image *reference;
+    _reg_blockMatchingParam *params;
+    cl_kernel kernel;
+    cl_context clContext;
+    cl_program program;
+    cl_command_queue commandQueue;
+    cl_mem clTotalBlock;
+    cl_mem clReferenceImageArray;
+    cl_mem clWarpedImageArray;
+    cl_mem clReferencePosition;
+    cl_mem clWarpedPosition;
+    cl_mem clMask;
+    cl_mem clReferenceMat;
 };
diff --git a/reg-lib/cl/ClConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp
index 7c30a2d9..299cef9c 100644
--- a/reg-lib/cl/ClConvolutionKernel.cpp
+++ b/reg-lib/cl/ClConvolutionKernel.cpp
@@ -1,15 +1,8 @@
 #include "ClConvolutionKernel.h"
 #include "_reg_tools.h"
 
-/* *************************************************************** */
-ClConvolutionKernel::ClConvolutionKernel(std::string name) : ConvolutionKernel(name) {
-    sContext = &ClContextSingleton::Instance();
-}
 /* *************************************************************** */
 void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
-    //cpu atm
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
 /* *************************************************************** */
-ClConvolutionKernel::~ClConvolutionKernel() {}
-/* *************************************************************** */
diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h
index 79ddbc2e..4d1b31d1 100644
--- a/reg-lib/cl/ClConvolutionKernel.h
+++ b/reg-lib/cl/ClConvolutionKernel.h
@@ -3,12 +3,9 @@
 #include "ConvolutionKernel.h"
 #include "ClContextSingleton.h"
 
-class ClConvolutionKernel : public ConvolutionKernel
-{
-    public:
-       ClConvolutionKernel(std::string name);
-       ~ClConvolutionKernel();
-       void Calculate(nifti_image * image, float *sigma, int kernelType, int *mask = nullptr, bool * timePoints = nullptr, bool * axis = nullptr);
-    private:
-       ClContextSingleton * sContext;
+class ClConvolutionKernel: public ConvolutionKernel {
+public:
+    ClConvolutionKernel() : ConvolutionKernel() {}
+    ~ClConvolutionKernel() {}
+    void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
 };
diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp
index 0c969b1e..f092e562 100644
--- a/reg-lib/cl/ClKernelFactory.cpp
+++ b/reg-lib/cl/ClKernelFactory.cpp
@@ -6,12 +6,11 @@
 #include "ClOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* ClKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
-
-	if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con, name);
-	else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(name);
-	else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con, name);
-	else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con, name);
-	else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con, name);
+Kernel* ClKernelFactory::ProduceKernel(std::string name, Content *con) const {
+	if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con);
+	else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel();
+	else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con);
+	else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con);
+	else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con);
 	else return nullptr;
 }
diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h
index 113907e3..4175569b 100644
--- a/reg-lib/cl/ClKernelFactory.h
+++ b/reg-lib/cl/ClKernelFactory.h
@@ -1,9 +1,8 @@
 #pragma once
 
 #include "KernelFactory.h"
-#include "AladinContent.h"
 
 class ClKernelFactory: public KernelFactory {
 public:
-   Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+   Kernel* ProduceKernel(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp
index c46d65c7..d38eaad0 100644
--- a/reg-lib/cl/ClOptimiseKernel.cpp
+++ b/reg-lib/cl/ClOptimiseKernel.cpp
@@ -1,14 +1,9 @@
 #include "ClOptimiseKernel.h"
 
 /* *************************************************************** */
-ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) {
+ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() {
     //populate the CLAladinContent object ptr
-    con = static_cast<ClAladinContent*>(conIn);
-
-    //get opencl context params
-    sContext = &ClContextSingleton::Instance();
-    /*clContext = sContext->GetContext();*/
-    /*commandQueue = sContext->GetCommandQueue();*/
+    ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //get necessary cpu ptrs
     transformationMatrix = con->AladinContent::GetTransformationMatrix();
@@ -16,10 +11,6 @@ ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : Opt
 }
 /* *************************************************************** */
 void ClOptimiseKernel::Calculate(bool affine) {
-    //cpu atm
-    this->blockMatchingParams = con->GetBlockMatchingParams();
-    optimize(this->blockMatchingParams, this->transformationMatrix, affine);
+    optimize(blockMatchingParams, transformationMatrix, affine);
 }
 /* *************************************************************** */
-ClOptimiseKernel::~ClOptimiseKernel() {}
-/* *************************************************************** */
diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h
index f369f592..e34f89c6 100644
--- a/reg-lib/cl/ClOptimiseKernel.h
+++ b/reg-lib/cl/ClOptimiseKernel.h
@@ -3,16 +3,13 @@
 #include "OptimiseKernel.h"
 #include "CLAladinContent.h"
 
-class ClOptimiseKernel : public OptimiseKernel
-{
-    public:
+class ClOptimiseKernel: public OptimiseKernel {
+public:
+    ClOptimiseKernel(Content *con);
+    ~ClOptimiseKernel() {}
+    void Calculate(bool affine);
 
-       ClOptimiseKernel(AladinContent * con, std::string name);
-       ~ClOptimiseKernel();
-       void Calculate(bool affine);
-    private:
-       _reg_blockMatchingParam * blockMatchingParams;
-       mat44 *transformationMatrix;
-       ClContextSingleton *sContext;
-       ClAladinContent  *con;
+private:
+    _reg_blockMatchingParam * blockMatchingParams;
+    mat44 *transformationMatrix;
 };
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 7d73cc7b..1e8019d1 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -4,32 +4,30 @@
 #include <algorithm>
 
 /* *************************************************************** */
-ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) {
+ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKernel() {
     //populate the CLContext object ptr
-    con = static_cast<ClAladinContent*>(conIn);
+    ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //path to kernel file
-    const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
-	const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
+    const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
+    const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
 
-	std::string clInstallPath;
+    std::string clInstallPath;
     std::string clSrcPath;
     //src dir
-    if (niftyreg_src_dir != nullptr){
+    if (niftyreg_src_dir != nullptr) {
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
         clSrcPath = opencl_kernel_path;
-    }
-    else clSrcPath = CL_KERNELS_SRC_PATH;
+    } else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
-    if(niftyreg_install_dir!=nullptr){
+    if (niftyreg_install_dir != nullptr) {
         char opencl_kernel_path[255];
         sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
         clInstallPath = opencl_kernel_path;
-    }
-    else clInstallPath = CL_KERNELS_PATH;
+    } else clInstallPath = CL_KERNELS_PATH;
     std::string clKernel("resampleKernel.cl");
-	//Let's check if we did an install
+    //Let's check if we did an install
     std::string clKernelPath = (clInstallPath + clKernel);
     std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in);
     if (kernelFile.is_open() == 0) {
@@ -60,12 +58,12 @@ ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string n
 }
 /* *************************************************************** */
 void ClResampleImageKernel::Calculate(int interp,
-                                                  float paddingValue,
-                                                  bool *dti_timepoint,
-                                                  mat33 *jacMat) {
+                                      float paddingValue,
+                                      bool *dti_timepoint,
+                                      mat33 *jacMat) {
     cl_int errNum;
     // Define the DTI indices if required
-    if(dti_timepoint!=nullptr || jacMat!=nullptr){
+    if (dti_timepoint != nullptr || jacMat != nullptr) {
         reg_print_fct_error("ClResampleImageKernel::calculate");
         reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit.");
         reg_exit();
@@ -73,19 +71,17 @@ void ClResampleImageKernel::Calculate(int interp,
 
     if (this->floatingImage->nz > 1) {
         this->kernel = clCreateKernel(program, "ResampleImage3D", &errNum);
-    }
-    else if (this->floatingImage->nz == 1) {
+    } else if (this->floatingImage->nz == 1) {
         //2D case
         this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum);
-    }
-    else {
-		reg_print_fct_error("ClResampleImageKernel::calculate");
+    } else {
+        reg_print_fct_error("ClResampleImageKernel::calculate");
         reg_print_msg_error("The image dimension is not supported. Exit.");
         reg_exit();
     }
     sContext->checkErrNum(errNum, "Error setting kernel ResampleImage.");
 
-    long targetVoxelNumber = (long) this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz;
+    long targetVoxelNumber = (long)this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz;
     const unsigned int maxThreads = sContext->GetMaxThreads();
     const unsigned int maxBlocks = sContext->GetMaxBlocks();
 
@@ -93,18 +89,18 @@ void ClResampleImageKernel::Calculate(int interp,
     blocks = std::min(blocks, maxBlocks);
 
     const cl_uint dims = 1;
-    const size_t globalWorkSize[dims] = { blocks * maxThreads };
-    const size_t localWorkSize[dims] = { maxThreads };
+    const size_t globalWorkSize[dims] = {blocks * maxThreads};
+    const size_t localWorkSize[dims] = {maxThreads};
 
-//    int numMats = 0; //needs to be a parameter
-//    float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float));
+    //    int numMats = 0; //needs to be a parameter
+    //    float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float));
 
-    cl_long2 voxelNumber = { {(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long) this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz} };
-    cl_uint3 fi_xyz = { {(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz} };
-    cl_uint2 wi_tu = { {(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu} };
+    cl_long2 voxelNumber = {{(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long)this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz}};
+    cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}};
+    cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}};
 
-//    if (numMats)
-//        mat33ToCptr(jacMat, jacMat_h, numMats);
+    //    if (numMats)
+    //        mat33ToCptr(jacMat, jacMat_h, numMats);
 
     int datatype = this->floatingImage->datatype;
 
diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h
index 5f10d203..d0deddf5 100644
--- a/reg-lib/cl/ClResampleImageKernel.h
+++ b/reg-lib/cl/ClResampleImageKernel.h
@@ -3,28 +3,24 @@
 #include "ResampleImageKernel.h"
 #include "CLAladinContent.h"
 
-class ClResampleImageKernel : public ResampleImageKernel
-{
-    public:
+class ClResampleImageKernel: public ResampleImageKernel {
+public:
+    ClResampleImageKernel(Content *conIn);
+    ~ClResampleImageKernel();
+    void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
 
-       ClResampleImageKernel(AladinContent * conIn, std::string name);
-       ~ClResampleImageKernel();
-
-       void Calculate(int interp, float paddingValue, bool * dti_timepoint = nullptr, mat33 * jacMat = nullptr);
-    private:
-
-       nifti_image *floatingImage;
-       nifti_image *warpedImage;
-       int *mask;
-       ClContextSingleton *sContext;
-       ClAladinContent *con;
-       cl_command_queue commandQueue;
-       cl_kernel kernel;
-       cl_context clContext;
-       cl_program program;
-       cl_mem clCurrentFloating;
-       cl_mem clCurrentDeformationField;
-       cl_mem clCurrentWarped;
-       cl_mem clMask;
-       cl_mem floMat;
+private:
+    nifti_image *floatingImage;
+    nifti_image *warpedImage;
+    int *mask;
+    ClContextSingleton *sContext;
+    cl_command_queue commandQueue;
+    cl_kernel kernel;
+    cl_context clContext;
+    cl_program program;
+    cl_mem clCurrentFloating;
+    cl_mem clCurrentDeformationField;
+    cl_mem clCurrentWarped;
+    cl_mem clMask;
+    cl_mem floMat;
 };
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
index 9cd44608..d21cda6a 100644
--- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
@@ -1,15 +1,18 @@
 #include "CpuAffineDeformationFieldKernel.h"
 #include "_reg_globalTrans.h"
 
-CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) {
-    this->deformationFieldImage = con->GetCurrentDeformationField();
-    this->affineTransformation = con->GetTransformationMatrix();
-    this->mask = con->GetCurrentReferenceMask();
+/* *************************************************************** */
+CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() {
+    AladinContent *con = static_cast<AladinContent*>(conIn);
+    deformationFieldImage = con->GetCurrentDeformationField();
+    affineTransformation = con->GetTransformationMatrix();
+    mask = con->GetCurrentReferenceMask();
 }
-
+/* *************************************************************** */
 void CpuAffineDeformationFieldKernel::Calculate(bool compose) {
-   reg_affine_getDeformationField(this->affineTransformation,
-                                  this->deformationFieldImage,
-                                  compose,
-                                  this->mask);
+    reg_affine_getDeformationField(affineTransformation,
+                                   deformationFieldImage,
+                                   compose,
+                                   mask);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
index 7f850256..47c16c17 100644
--- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
@@ -4,13 +4,13 @@
 #include "AladinContent.h"
 #include <string>
 
-class CpuAffineDeformationFieldKernel : public AffineDeformationFieldKernel {
+class CpuAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
-        CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn);
+    CpuAffineDeformationFieldKernel(Content *conIn);
+    void Calculate(bool compose = false);
 
-        void Calculate(bool compose = false);
-
-        mat44 *affineTransformation;
-        nifti_image *deformationFieldImage;
-        int *mask;
+private:
+    mat44 *affineTransformation;
+    nifti_image *deformationFieldImage;
+    int *mask;
 };
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
index 0626a136..4e4bd57e 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.cpp
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
@@ -1,13 +1,15 @@
 #include "CpuBlockMatchingKernel.h"
 
-CpuBlockMatchingKernel::CpuBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) {
+/* *************************************************************** */
+CpuBlockMatchingKernel::CpuBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() {
+    AladinContent *con = static_cast<AladinContent*>(conIn);
     reference = con->GetCurrentReference();
     warped = con->GetCurrentWarped();
     params = con->GetBlockMatchingParams();
     mask = con->GetCurrentReferenceMask();
 }
-
+/* *************************************************************** */
 void CpuBlockMatchingKernel::Calculate() {
-    block_matching_method(this->reference, this->warped, this->params, this->mask);
+    block_matching_method(reference, warped, params, mask);
 }
-//
+/* *************************************************************** */
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h
index 9ff19e01..60686878 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.h
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.h
@@ -5,16 +5,14 @@
 #include "nifti1_io.h"
 #include "AladinContent.h"
 
-class CpuBlockMatchingKernel : public BlockMatchingKernel {
+class CpuBlockMatchingKernel: public BlockMatchingKernel {
 public:
-
-    CpuBlockMatchingKernel(AladinContent *con, std::string name);
-
+    CpuBlockMatchingKernel(Content *con);
     void Calculate();
 
+private:
     nifti_image *reference;
     nifti_image *warped;
     _reg_blockMatchingParam* params;
     int *mask;
-
 };
diff --git a/reg-lib/cpu/CpuConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp
index f511b332..f91b3133 100644
--- a/reg-lib/cpu/CpuConvolutionKernel.cpp
+++ b/reg-lib/cpu/CpuConvolutionKernel.cpp
@@ -1,9 +1,8 @@
 #include "CpuConvolutionKernel.h"
 #include "_reg_globalTrans.h"
 
-CpuConvolutionKernel::CpuConvolutionKernel(std::string name) : ConvolutionKernel(name) {
-}
-
+/* *************************************************************** */
 void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h
index bba25ee4..49e2b333 100644
--- a/reg-lib/cpu/CpuConvolutionKernel.h
+++ b/reg-lib/cpu/CpuConvolutionKernel.h
@@ -3,9 +3,8 @@
 #include "ConvolutionKernel.h"
 #include <string>
 
-class CpuConvolutionKernel : public ConvolutionKernel {
+class CpuConvolutionKernel: public ConvolutionKernel {
 public:
-    CpuConvolutionKernel(std::string name);
-
+    CpuConvolutionKernel() : ConvolutionKernel() {}
     void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
 };
diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp
index 5e0b8926..a0932709 100644
--- a/reg-lib/cpu/CpuKernelFactory.cpp
+++ b/reg-lib/cpu/CpuKernelFactory.cpp
@@ -6,11 +6,11 @@
 #include "CpuOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* CpuKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
-	if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con, name);
-	else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(name);
-	else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con, name);
-	else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con, name);
-	else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con, name);
+Kernel* CpuKernelFactory::ProduceKernel(std::string name, Content *con) const {
+	if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con);
+	else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel();
+	else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con);
+	else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con);
+	else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con);
 	else return nullptr;
 }
diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h
index fca556ff..d3cbaa6a 100644
--- a/reg-lib/cpu/CpuKernelFactory.h
+++ b/reg-lib/cpu/CpuKernelFactory.h
@@ -2,9 +2,7 @@
 
 #include "KernelFactory.h"
 
-class AladinContent;
-
 class CpuKernelFactory: public KernelFactory {
 public:
-   Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+   Kernel* ProduceKernel(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuOptimiseKernel.cpp
index 52af770e..f7874795 100644
--- a/reg-lib/cpu/CpuOptimiseKernel.cpp
+++ b/reg-lib/cpu/CpuOptimiseKernel.cpp
@@ -1,10 +1,13 @@
 #include "CpuOptimiseKernel.h"
 
-CpuOptimiseKernel::CpuOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) {
+/* *************************************************************** */
+CpuOptimiseKernel::CpuOptimiseKernel(Content *conIn) : OptimiseKernel() {
+    AladinContent *con = static_cast<AladinContent*>(conIn);
     transformationMatrix = con->GetTransformationMatrix();
     blockMatchingParams = con->GetBlockMatchingParams();
 }
-
+/* *************************************************************** */
 void CpuOptimiseKernel::Calculate(bool affine) {
-    optimize(this->blockMatchingParams, this->transformationMatrix, affine);
+    optimize(blockMatchingParams, transformationMatrix, affine);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h
index 00914971..e8b27959 100644
--- a/reg-lib/cpu/CpuOptimiseKernel.h
+++ b/reg-lib/cpu/CpuOptimiseKernel.h
@@ -5,13 +5,12 @@
 #include "nifti1_io.h"
 #include "AladinContent.h"
 
-class CpuOptimiseKernel : public OptimiseKernel {
+class CpuOptimiseKernel: public OptimiseKernel {
 public:
-    CpuOptimiseKernel(AladinContent *con, std::string name);
+    CpuOptimiseKernel(Content *con);
+    void Calculate(bool affine);
 
+private:
     _reg_blockMatchingParam *blockMatchingParams;
     mat44 *transformationMatrix;
-
-    void Calculate(bool affine);
-
 };
diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp
index 60121ce5..827e1058 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.cpp
+++ b/reg-lib/cpu/CpuResampleImageKernel.cpp
@@ -1,24 +1,26 @@
 #include "CpuResampleImageKernel.h"
 #include "_reg_resampling.h"
 
-CpuResampleImageKernel::CpuResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) {
-   floatingImage = con->GetCurrentFloating();
-   warpedImage = con->GetCurrentWarped();
-   deformationField = con->GetCurrentDeformationField();
-   mask = con->GetCurrentReferenceMask();
+/* *************************************************************** */
+CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKernel() {
+    AladinContent *con = static_cast<AladinContent*>(conIn);
+    floatingImage = con->GetCurrentFloating();
+    warpedImage = con->GetCurrentWarped();
+    deformationField = con->GetCurrentDeformationField();
+    mask = con->GetCurrentReferenceMask();
 }
-
+/* *************************************************************** */
 void CpuResampleImageKernel::Calculate(int interp,
                                        float paddingValue,
                                        bool *dti_timepoint,
-                                       mat33 * jacMat)
-{
-   reg_resampleImage(this->floatingImage,
-                     this->warpedImage,
-                     this->deformationField,
-                     this->mask,
-                     interp,
-                     paddingValue,
-                     dti_timepoint,
-                     jacMat);
+                                       mat33 * jacMat) {
+    reg_resampleImage(floatingImage,
+                      warpedImage,
+                      deformationField,
+                      mask,
+                      interp,
+                      paddingValue,
+                      dti_timepoint,
+                      jacMat);
 }
+/* *************************************************************** */
diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h
index 5e787a16..81982fba 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.h
+++ b/reg-lib/cpu/CpuResampleImageKernel.h
@@ -3,15 +3,14 @@
 #include "ResampleImageKernel.h"
 #include "AladinContent.h"
 
-class CpuResampleImageKernel : public ResampleImageKernel
-{
-    public:
-        CpuResampleImageKernel(AladinContent *con, std::string name);
+class CpuResampleImageKernel: public ResampleImageKernel {
+public:
+    CpuResampleImageKernel(Content *con);
+    void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
 
-        nifti_image *floatingImage;
-        nifti_image *warpedImage;
-        nifti_image *deformationField;
-        int *mask;
-
-        void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
+private:
+    nifti_image *floatingImage;
+    nifti_image *warpedImage;
+    nifti_image *deformationField;
+    int *mask;
 };
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
index 3b3a572c..652e098f 100644
--- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
@@ -2,10 +2,8 @@
 #include "affineDeformationKernel.h"
 
 /* *************************************************************** */
-CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) :
-   AffineDeformationFieldKernel(nameIn)
-{
-   con = static_cast<CudaAladinContent*>(conIn);
+CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() {
+   CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
    //get necessary cpu ptrs
    this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
@@ -15,13 +13,9 @@ CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent
    mask_d = con->GetMask_d();
    deformationFieldArray_d = con->GetDeformationFieldArray_d();
    transformationMatrix_d = con->GetTransformationMatrix_d();
-
-   //cudaSContext = &CudaContextSingleton::Instance();
-   //cudaContext = cudaSContext->GetContext();
 }
 /* *************************************************************** */
-void CudaAffineDeformationFieldKernel::Calculate(bool compose)
-{
+void CudaAffineDeformationFieldKernel::Calculate(bool compose) {
    launchAffine(this->affineTransformation,
                 this->deformationFieldImage,
                 &deformationFieldArray_d,
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
index 01614cff..327e7d71 100644
--- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
@@ -4,10 +4,9 @@
 #include "CudaAladinContent.h"
 
 //Kernel functions for affine deformation field
-class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel
-{
+class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
-    CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn);
+    CudaAffineDeformationFieldKernel(Content *conIn);
     void Calculate(bool compose = false);
 private:
     mat44 *affineTransformation;
@@ -15,9 +14,4 @@ class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel
 
     float *deformationFieldArray_d, *transformationMatrix_d;
     int *mask_d;
-
-    CudaAladinContent *con;
-
-    //CudaContextSingleton *cudaSContext;
-    //CUContext cudaContext;
 };
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index b5ddab6c..01193a1c 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -3,88 +3,24 @@
 #include "_reg_tools.h"
 #include <algorithm>
 
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent() {
-    InitVars();
-    AllocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
-                                     nifti_image *currentFloatingIn,
-                                     int *currentReferenceMaskIn,
-                                     size_t byte,
-                                     const unsigned int blockPercentage,
-                                     const unsigned int inlierLts,
-                                     int blockStep) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  sizeof(float), // forcing float for CUDA
-                  blockPercentage,
-                  inlierLts,
-                  blockStep) {
-    if (byte != sizeof(float)) {
-        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-        reg_print_msg_warn("Datatype has been forced to float");
-    }
-    InitVars();
-    AllocateCuPtrs();
-
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
-                                     nifti_image *currentFloatingIn,
-                                     int *currentReferenceMaskIn,
-                                     size_t byte) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  sizeof(float)) // forcing float for CUDA
-{
-    if (byte != sizeof(float)) {
-        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-        reg_print_msg_warn("Datatype has been forced to float");
-    }
-    InitVars();
-    AllocateCuPtrs();
-}
 /* *************************************************************** */
 CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
                                      nifti_image *currentFloatingIn,
                                      int *currentReferenceMaskIn,
-                                     mat44 *transMat,
-                                     size_t byte,
-                                     const unsigned int blockPercentage,
+                                     mat44 *transformationMatrixIn,
+                                     size_t bytesIn,
+                                     const unsigned int percentageOfBlocks,
                                      const unsigned int inlierLts,
-                                     int blockStep) :
+                                     int blockStepSize) :
     AladinContent(currentReferenceIn,
                   currentFloatingIn,
                   currentReferenceMaskIn,
-                  transMat,
+                  transformationMatrixIn,
                   sizeof(float), // forcing float for CUDA
-                  blockPercentage,
+                  percentageOfBlocks,
                   inlierLts,
-                  blockStep) {
-    if (byte != sizeof(float)) {
-        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-        reg_print_msg_warn("Datatype has been forced to float");
-    }
-    InitVars();
-    AllocateCuPtrs();
-}
-/* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
-                                     nifti_image *currentFloatingIn,
-                                     int *currentReferenceMaskIn,
-                                     mat44 *transMat,
-                                     size_t byte) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
-                  transMat,
-                  sizeof(float)) // forcing float for CUDA
-{
-    if (byte != sizeof(float)) {
+                  blockStepSize) {
+    if (bytesIn != sizeof(float)) {
         reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
         reg_print_msg_warn("Datatype has been forced to float");
     }
@@ -97,94 +33,94 @@ CudaAladinContent::~CudaAladinContent() {
 }
 /* *************************************************************** */
 void CudaAladinContent::InitVars() {
-    this->referenceImageArray_d = 0;
-    this->floatingImageArray_d = 0;
-    this->warpedImageArray_d = 0;
-    this->deformationFieldArray_d = 0;
-    this->referencePosition_d = 0;
-    this->warpedPosition_d = 0;
-    this->totalBlock_d = 0;
-    this->mask_d = 0;
-    this->floIJKMat_d = 0;
+    referenceImageArray_d = nullptr;
+    floatingImageArray_d = nullptr;
+    warpedImageArray_d = nullptr;
+    deformationFieldArray_d = nullptr;
+    referencePosition_d = nullptr;
+    warpedPosition_d = nullptr;
+    totalBlock_d = nullptr;
+    mask_d = nullptr;
+    floIJKMat_d = nullptr;
 
-    if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(this->currentReference);
-    if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(this->currentFloating);
-        if (this->currentWarped != nullptr)
-            reg_tools_changeDatatype<float>(this->currentWarped);
+    if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(currentReference);
+    if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(currentFloating);
+        if (currentWarped != nullptr)
+            reg_tools_changeDatatype<float>(currentWarped);
     }
 
-    this->cudaSContext = &CudaContextSingleton::Instance();
-    this->cudaContext = this->cudaSContext->GetContext();
+    cudaSContext = &CudaContextSingleton::Instance();
+    cudaContext = cudaSContext->GetContext();
 
-    //this->numBlocks = (this->blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
+    //numBlocks = (blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
 }
 /* *************************************************************** */
 void CudaAladinContent::AllocateCuPtrs() {
-    if (this->transformationMatrix != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
+    if (transformationMatrix != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
 
-        float *tmpMat_h = (float*)malloc(16 * sizeof(float));
-        mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
-        NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+        float *tmpMat_h = (float*)malloc(sizeof(mat44));
+        mat44ToCptr(*(transformationMatrix), tmpMat_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice));
 
         free(tmpMat_h);
     }
-    if (this->currentReferenceMask != nullptr) {
+    if (currentReferenceMask != nullptr) {
         cudaCommon_allocateArrayToDevice<int>(&mask_d, currentReference->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, this->currentReferenceMask, currentReference->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, currentReferenceMask, currentReference->nvox);
     }
-    if (this->currentReference != nullptr) {
+    if (currentReference != nullptr) {
         cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, currentReference->nvox);
-        cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, 16);
+        cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, this->currentReference);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, currentReference);
 
-        float* targetMat = (float *)malloc(16 * sizeof(float)); //freed
-        mat44ToCptr(this->refMatrix_xyz, targetMat);
-        cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, 16);
+        float* targetMat = (float *)malloc(sizeof(mat44)); //freed
+        mat44ToCptr(*GetXYZMatrix(currentReference), targetMat);
+        cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
-    if (this->currentWarped != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, this->currentWarped->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->currentWarped);
+    if (currentWarped != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, currentWarped->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, currentWarped);
     }
-    if (this->currentDeformationField != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->currentDeformationField->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->currentDeformationField);
+    if (currentDeformationField != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, currentDeformationField->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, currentDeformationField);
     }
-    if (this->currentFloating != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, this->currentFloating->nvox);
-        cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, 16);
+    if (currentFloating != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, currentFloating->nvox);
+        cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, this->currentFloating);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, currentFloating);
 
-        float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float));
-        mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h);
-        NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+        float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
+        mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, sizeof(mat44), cudaMemcpyHostToDevice));
         free(sourceIJKMatrix_h);
     }
 
-    if (this->blockMatchingParams != nullptr) {
-        if (this->blockMatchingParams->referencePosition != nullptr) {
-            cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+    if (blockMatchingParams != nullptr) {
+        if (blockMatchingParams->referencePosition != nullptr) {
+            cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
-        if (this->blockMatchingParams->warpedPosition != nullptr) {
-            cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        if (blockMatchingParams->warpedPosition != nullptr) {
+            cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
-        if (this->blockMatchingParams->totalBlock != nullptr) {
+        if (blockMatchingParams->totalBlock != nullptr) {
             cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
             cudaCommon_transferFromDeviceToNiftiSimple1<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
         /* // Removed until CUDA SVD is added back
-        if (this->blockMatchingParams->activeBlockNumber > 0 ) {
-           unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
+        if (blockMatchingParams->activeBlockNumber > 0 ) {
+           unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
            unsigned int n = 0;
 
-           if (this->blockMatchingParams->dim == 2) {
+           if (blockMatchingParams->dim == 2) {
               n = 6;
            }
            else {
@@ -196,7 +132,7 @@ void CudaAladinContent::AllocateCuPtrs() {
            cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
            cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
            cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-           cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+           cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         */
     }
@@ -208,84 +144,82 @@ nifti_image* CudaAladinContent::GetCurrentWarped(int type) {
 }
 /* *************************************************************** */
 nifti_image* CudaAladinContent::GetCurrentDeformationField() {
-
     cudaCommon_transferFromDeviceToCpu<float>((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox);
     return currentDeformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
-
-    cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-    cudaCommon_transferFromDeviceToCpu<float>(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-    return this->blockMatchingParams;
+    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->warpedPosition, &warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->referencePosition, &referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    return blockMatchingParams;
 }
 /* *************************************************************** */
 void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
-    if (this->transformationMatrix != nullptr)
+    if (transformationMatrix != nullptr)
         cudaCommon_free<float>(&transformationMatrix_d);
 
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
-    float *tmpMat_h = (float*)malloc(16 * sizeof(float));
-    mat44ToCptr(*(this->transformationMatrix), tmpMat_h);
+    float *tmpMat_h = (float*)malloc(sizeof(mat44));
+    mat44ToCptr(*transformationMatrix, tmpMat_h);
 
-    cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, 16);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice));
+    cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice));
     free(tmpMat_h);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
-    if (this->currentDeformationField != nullptr)
+    if (currentDeformationField != nullptr)
         cudaCommon_free<float>(&deformationFieldArray_d);
     AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
 
-    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, this->currentDeformationField->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, this->currentDeformationField);
+    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, currentDeformationField->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, currentDeformationField);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) {
-    if (this->currentReferenceMask != nullptr)
+void CudaAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) {
+    if (currentReferenceMask != nullptr)
         cudaCommon_free<int>(&mask_d);
-    this->currentReferenceMask = maskIn;
-    cudaCommon_allocateArrayToDevice<int>(&mask_d, nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, maskIn, nvox);
+    AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn);
+    cudaCommon_allocateArrayToDevice<int>(&mask_d, currentReference->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, currentReferenceMaskIn, currentReference->nvox);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
-    if (this->currentWarped != nullptr)
+    if (currentWarped != nullptr)
         cudaCommon_free<float>(&warpedImageArray_d);
     AladinContent::SetCurrentWarped(currentWarped);
-    reg_tools_changeDatatype<float>(this->currentWarped);
+    reg_tools_changeDatatype<float>(currentWarped);
 
     cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, currentWarped->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, this->currentWarped);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, currentWarped);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
-    if (this->blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams->referencePosition != nullptr) {
         cudaCommon_free<float>(&referencePosition_d);
         //referencePosition
-        cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
-    if (this->blockMatchingParams->warpedPosition != nullptr) {
+    if (blockMatchingParams->warpedPosition != nullptr) {
         cudaCommon_free<float>(&warpedPosition_d);
         //warpedPosition
-        cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+        cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
-    if (this->blockMatchingParams->totalBlock != nullptr) {
+    if (blockMatchingParams->totalBlock != nullptr) {
         cudaCommon_free<int>(&totalBlock_d);
         //activeBlock
-        cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlockNumber);
-        cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber);
+        cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
+        cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
     }
     /* // Removed until CUDA SVD is added back
-     if (this->blockMatchingParams->activeBlockNumber > 0) {
-         unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim;
+     if (blockMatchingParams->activeBlockNumber > 0) {
+         unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
          unsigned int n = 0;
 
-         if (this->blockMatchingParams->dim == 2) {
+         if (blockMatchingParams->dim == 2) {
              n = 6;
          }
          else {
@@ -297,7 +231,7 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
          cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
          cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
          cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-         cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim);
+         cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
      }
      */
 }
@@ -331,15 +265,10 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
 /* *************************************************************** */
 template<class T>
 void CudaAladinContent::FillImageData(nifti_image *image,
-                                      float* memoryObject,
+                                      float *memoryObject,
                                       int type) {
     size_t size = image->nvox;
-    float* buffer = nullptr;
-    buffer = (float*)malloc(size * sizeof(float));
-
-    if (buffer == nullptr) {
-        reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!");
-    }
+    float *buffer = (float*)malloc(size * sizeof(float));
 
     cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
 
@@ -482,29 +411,29 @@ int* CudaAladinContent::GetFloatingDims() {
 }
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
-    if (this->transformationMatrix != nullptr)
+    if (transformationMatrix != nullptr)
         cudaCommon_free<float>(&transformationMatrix_d);
 
-    if (this->currentReference != nullptr) {
+    if (currentReference != nullptr) {
         cudaCommon_free<float>(&referenceImageArray_d);
         cudaCommon_free<float>(&referenceMat_d);
     }
 
-    if (this->currentFloating != nullptr) {
+    if (currentFloating != nullptr) {
         cudaCommon_free<float>(&floatingImageArray_d);
         cudaCommon_free<float>(&floIJKMat_d);
     }
 
-    if (this->currentWarped != nullptr)
+    if (currentWarped != nullptr)
         cudaCommon_free<float>(&warpedImageArray_d);
 
-    if (this->currentDeformationField != nullptr)
+    if (currentDeformationField != nullptr)
         cudaCommon_free<float>(&deformationFieldArray_d);
 
-    if (this->currentReferenceMask != nullptr)
+    if (currentReferenceMask != nullptr)
         cudaCommon_free<int>(&mask_d);
 
-    if (this->blockMatchingParams != nullptr) {
+    if (blockMatchingParams != nullptr) {
         cudaCommon_free<int>(&totalBlock_d);
         cudaCommon_free<float>(&referencePosition_d);
         cudaCommon_free<float>(&warpedPosition_d);
@@ -520,6 +449,6 @@ void CudaAladinContent::FreeCuPtrs() {
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
-    return this->cudaSContext->GetIsCardDoubleCapable();
+    return cudaSContext->GetIsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index e3d76732..1c0eb0de 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -2,41 +2,23 @@
 
 #include "AladinContent.h"
 #include "CudaContextSingleton.h"
-
 #include "_reg_tools.h"
 
 class CudaAladinContent: public AladinContent {
 public:
-    CudaAladinContent();
-    CudaAladinContent(nifti_image *currentReferenceIn,
-                      nifti_image *currentFloatingIn,
-                      int *currentReferenceMaskIn,
-                      size_t byte,
-                      const unsigned int blockPercentage,
-                      const unsigned int inlierLts,
-                      int blockStep);
-    CudaAladinContent(nifti_image *currentReferenceIn,
-                      nifti_image *currentFloatingIn,
-                      int *currentReferenceMaskIn,
-                      size_t byte);
-    CudaAladinContent(nifti_image *currentReferenceIn,
-                      nifti_image *currentFloatingIn,
-                      int *currentReferenceMaskIn,
-                      mat44 *transMat,
-                      size_t byte,
-                      const unsigned int blockPercentage,
-                      const unsigned int inlierLts,
-                      int blockStep);
     CudaAladinContent(nifti_image *currentReferenceIn,
                       nifti_image *currentFloatingIn,
-                      int *currentReferenceMaskIn,
-                      mat44 *transMat,
-                      size_t byte);
+                      int *currentReferenceMaskIn = nullptr,
+                      mat44 *transformationMatrixIn = nullptr,
+                      size_t bytesIn = sizeof(float),
+                      const unsigned int percentageOfBlocks = 0,
+                      const unsigned int inlierLts = 0,
+                      int blockStepSize = 0);
     ~CudaAladinContent();
 
-    bool IsCurrentComputationDoubleCapable();
+    bool IsCurrentComputationDoubleCapable() override;
 
-    //device getters
+    // Device getters
     float* GetReferenceImageArray_d();
     float* GetFloatingImageArray_d();
     float* GetWarpedImageArray_d();
@@ -60,20 +42,20 @@ class CudaAladinContent: public AladinContent {
     int* GetReferenceDims();
     int* GetFloatingDims();
 
-    //cpu getters and setters
-    _reg_blockMatchingParam* GetBlockMatchingParams();
-    nifti_image* GetCurrentDeformationField();
-    nifti_image* GetCurrentWarped(int typ);
+    // CPU getters with data downloaded from device
+    _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    nifti_image* GetCurrentDeformationField() override;
+    nifti_image* GetCurrentWarped(int typ) override;
 
-    void SetTransformationMatrix(mat44 *transformationMatrixIn);
-    void SetCurrentWarped(nifti_image *warpedImageIn);
-    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn);
-    void SetCurrentReferenceMask(int *maskIn, size_t size);
-    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp);
+    // Setters
+    void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    void SetCurrentWarped(nifti_image *warpedImageIn) override;
+    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override;
+    void SetCurrentReferenceMask(int *currentReferenceMaskIn) override;
+    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
     void InitVars();
-
     void AllocateCuPtrs();
     void FreeCuPtrs();
 
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
index 2ef0a629..45bae174 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
@@ -2,36 +2,33 @@
 #include "blockMatchingKernel.h"
 
 /* *************************************************************** */
-CudaBlockMatchingKernel::CudaBlockMatchingKernel(AladinContent *conIn, std::string name) :
-   BlockMatchingKernel(name)
-{
-   //get CudaAladinContent ptr
-   con = static_cast<CudaAladinContent*>(conIn);
+CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() {
+    //get CudaAladinContent ptr
+    CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
-   //get cpu ptrs
-   reference = con->AladinContent::GetCurrentReference();
-   params = con->AladinContent::GetBlockMatchingParams();
+    //get cpu ptrs
+    reference = con->AladinContent::GetCurrentReference();
+    params = con->AladinContent::GetBlockMatchingParams();
 
-   //get cuda ptrs
-   referenceImageArray_d = con->GetReferenceImageArray_d();
-   warpedImageArray_d = con->GetWarpedImageArray_d();
-   referencePosition_d = con->GetReferencePosition_d();
-   warpedPosition_d = con->GetWarpedPosition_d();
-   totalBlock_d = con->GetTotalBlock_d();
-   mask_d = con->GetMask_d();
-   referenceMat_d = con->GetReferenceMat_d();
+    //get cuda ptrs
+    referenceImageArray_d = con->GetReferenceImageArray_d();
+    warpedImageArray_d = con->GetWarpedImageArray_d();
+    referencePosition_d = con->GetReferencePosition_d();
+    warpedPosition_d = con->GetWarpedPosition_d();
+    totalBlock_d = con->GetTotalBlock_d();
+    mask_d = con->GetMask_d();
+    referenceMat_d = con->GetReferenceMat_d();
 }
 /* *************************************************************** */
-void CudaBlockMatchingKernel::Calculate()
-{
-   block_matching_method_gpu(reference,
-                             params,
-                             &referenceImageArray_d,
-                             &warpedImageArray_d,
-                             &referencePosition_d,
-                             &warpedPosition_d,
-                             &totalBlock_d,
-                             &mask_d,
-                             &referenceMat_d);
+void CudaBlockMatchingKernel::Calculate() {
+    block_matching_method_gpu(reference,
+                              params,
+                              &referenceImageArray_d,
+                              &warpedImageArray_d,
+                              &referencePosition_d,
+                              &warpedPosition_d,
+                              &totalBlock_d,
+                              &mask_d,
+                              &referenceMat_d);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h
index 797c499a..643d96f7 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.h
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.h
@@ -4,22 +4,16 @@
 #include "CudaAladinContent.h"
 
 //Kernel functions for block matching
-class CudaBlockMatchingKernel : public BlockMatchingKernel {
+class CudaBlockMatchingKernel: public BlockMatchingKernel {
 public:
-
-    CudaBlockMatchingKernel(AladinContent *conIn, std::string name);
+    CudaBlockMatchingKernel(Content *conIn);
     void Calculate();
+
 private:
     nifti_image *reference;
-    _reg_blockMatchingParam* params;
-
-    //CudaContextSingleton *cudaSContext;
-    //CUContext *cudaContext;
-
-    CudaAladinContent *con;
+    _reg_blockMatchingParam *params;
 
     float *referenceImageArray_d, *warpedImageArray_d, *referencePosition_d;
     float *warpedPosition_d, *referenceMat_d;
-    int   *totalBlock_d, *mask_d;
-
+    int *totalBlock_d, *mask_d;
 };
diff --git a/reg-lib/cuda/CudaConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp
index 78d51165..60d7b9cd 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.cpp
+++ b/reg-lib/cuda/CudaConvolutionKernel.cpp
@@ -1,18 +1,13 @@
 #include "CudaConvolutionKernel.h"
 #include "_reg_tools.h"
 
-/* *************************************************************** */
-CudaConvolutionKernel::CudaConvolutionKernel(std::string name) : ConvolutionKernel(name) {
-    //cudaSContext = &CudaContextSingleton::Instance();
-}
 /* *************************************************************** */
 void CudaConvolutionKernel::Calculate(nifti_image *image,
-                                                  float *sigma,
-                                                  int kernelType,
-                                                  int *mask,
-                                                  bool *timePoint,
-                                                  bool *axis)
-{
+                                      float *sigma,
+                                      int kernelType,
+                                      int *mask,
+                                      bool *timePoint,
+                                      bool *axis) {
     //cpu cheat
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoint, axis);
 }
diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h
index 81697a96..1fa5be8e 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.h
+++ b/reg-lib/cuda/CudaConvolutionKernel.h
@@ -3,20 +3,14 @@
 #include "ConvolutionKernel.h"
 #include "CudaContextSingleton.h"
 
-//a kernel function for convolution (gaussian smoothing?)
-class CudaConvolutionKernel: public ConvolutionKernel
-{
+// A kernel function for convolution (gaussian smoothing?)
+class CudaConvolutionKernel: public ConvolutionKernel {
 public:
-
-    CudaConvolutionKernel(std::string name);
+    CudaConvolutionKernel() : ConvolutionKernel() {}
     void Calculate(nifti_image *image,
-                        float *sigma,
-                        int kernelType,
-                        int *mask = nullptr,
-                        bool *timePoints = nullptr,
-                        bool *axis = nullptr);
-
-    private:
-       //CudaContextSingleton * cudaSContext;
-
+                   float *sigma,
+                   int kernelType,
+                   int *mask = nullptr,
+                   bool *timePoints = nullptr,
+                   bool *axis = nullptr);
 };
diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp
index a8b3e3ec..12045fa2 100644
--- a/reg-lib/cuda/CudaKernelFactory.cpp
+++ b/reg-lib/cuda/CudaKernelFactory.cpp
@@ -6,11 +6,11 @@
 #include "CudaOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* CudaKernelFactory::ProduceKernel(std::string name, AladinContent *con) const {
-    if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con, name);
-    else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(name);
-    else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con, name);
-    else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con, name);
-    else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con, name);
+Kernel* CudaKernelFactory::ProduceKernel(std::string name, Content *con) const {
+    if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con);
+    else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel();
+    else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con);
+    else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con);
+    else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con);
     else return nullptr;
 }
diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h
index f2b6af17..c9727ec9 100644
--- a/reg-lib/cuda/CudaKernelFactory.h
+++ b/reg-lib/cuda/CudaKernelFactory.h
@@ -1,9 +1,8 @@
 #pragma once
 
 #include "KernelFactory.h"
-#include "AladinContent.h"
 
 class CudaKernelFactory: public KernelFactory {
 public:
-	Kernel* ProduceKernel(std::string name, AladinContent *con) const;
+	Kernel* ProduceKernel(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp
index b4ae8eab..c28f00cd 100644
--- a/reg-lib/cuda/CudaOptimiseKernel.cpp
+++ b/reg-lib/cuda/CudaOptimiseKernel.cpp
@@ -4,79 +4,75 @@
 #include "optimizeKernel.h"
 
 /* *************************************************************** */
-CudaOptimiseKernel::CudaOptimiseKernel(AladinContent *conIn, std::string name) :
-   OptimiseKernel(name)
-{
-   //get CudaAladinContent ptr
-   con = static_cast<CudaAladinContent*>(conIn);
+CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() {
+    //get CudaAladinContent ptr
+    con = static_cast<CudaAladinContent*>(conIn);
 
-   //cudaSContext = &CudaContextSingleton::Instance();
+    //get cpu ptrs
+    transformationMatrix = con->AladinContent::GetTransformationMatrix();
+    blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
 
-   //get cpu ptrs
-   transformationMatrix = con->AladinContent::GetTransformationMatrix();
-   blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
-
-//   transformationMatrix_d = con->GetTransformationMatrix_d();
-//   AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back
-//   U_d = con->GetU_d(); // Removed until CUDA SVD is added back
-//   Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back
-//   VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back
-//   lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back
-//   referencePos_d = con->GetReferencePosition_d();
-//   warpedPos_d = con->GetWarpedPosition_d();
-//   newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
+    //   transformationMatrix_d = con->GetTransformationMatrix_d();
+    //   AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back
+    //   U_d = con->GetU_d(); // Removed until CUDA SVD is added back
+    //   Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back
+    //   VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back
+    //   lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back
+    //   referencePos_d = con->GetReferencePosition_d();
+    //   warpedPos_d = con->GetWarpedPosition_d();
+    //   newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
 
 }
 /* *************************************************************** */
 void CudaOptimiseKernel::Calculate(bool affine) {
-   /* // Removed until CUDA SVD is added back
-#if _WIN64 || __x86_64__ || __ppc64__
+    /* // Removed until CUDA SVD is added back
+ #if _WIN64 || __x86_64__ || __ppc64__
 
-    //for now. Soon we will have a GPU version of it
-    int* cudaRunTimeVersion = (int*)malloc(sizeof(int));
-    int* cudaDriverVersion = (int*)malloc(sizeof(int));
-    cudaRuntimeGetVersion(cudaRunTimeVersion);
-    cudaDriverGetVersion(cudaDriverVersion);
+     //for now. Soon we will have a GPU version of it
+     int* cudaRunTimeVersion = (int*)malloc(sizeof(int));
+     int* cudaDriverVersion = (int*)malloc(sizeof(int));
+     cudaRuntimeGetVersion(cudaRunTimeVersion);
+     cudaDriverGetVersion(cudaDriverVersion);
 
-#ifndef DEBUG
-    printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion);
-    printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion);
-#endif
+ #ifndef DEBUG
+     printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion);
+     printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion);
+ #endif
 
-    if (*cudaRunTimeVersion < 7050) {
-        this->blockMatchingParams = con->GetBlockMatchingParams();
-        optimize(this->blockMatchingParams, transformationMatrix, affine);
-    }
-    else {
-        //HAVE TO DO THE RIGID AND 2D VERSION
-        if(affine && this->blockMatchingParams->dim == 3) {
-            const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f));
-            optimize_affine3D_cuda(transformationMatrix,
-                                   transformationMatrix_d,
-                                   AR_d,
-                                   U_d,
-                                   Sigma_d,
-                                   VT_d,
-                                   lengths_d,
-                                   referencePos_d,
-                                   warpedPos_d,
-                                   newWarpedPos_d,
-                                   blockMatchingParams->activeBlockNumber * 3,
-                                   12,
-                                   num_to_keep,
-                                   ils,
-                                   affine);
-        } else {
-            this->blockMatchingParams = con->GetBlockMatchingParams();
-            optimize(this->blockMatchingParams, transformationMatrix, affine);
-        }
-    }
-#else
-    this->blockMatchingParams = con->GetBlockMatchingParams();
-    optimize(this->blockMatchingParams, transformationMatrix, affine);
-#endif
-*/
-   this->blockMatchingParams = con->GetBlockMatchingParams();
-   optimize(this->blockMatchingParams, transformationMatrix, affine);
+     if (*cudaRunTimeVersion < 7050) {
+         blockMatchingParams = con->GetBlockMatchingParams();
+         optimize(blockMatchingParams, transformationMatrix, affine);
+     }
+     else {
+         //HAVE TO DO THE RIGID AND 2D VERSION
+         if(affine && blockMatchingParams->dim == 3) {
+             const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f));
+             optimize_affine3D_cuda(transformationMatrix,
+                                    transformationMatrix_d,
+                                    AR_d,
+                                    U_d,
+                                    Sigma_d,
+                                    VT_d,
+                                    lengths_d,
+                                    referencePos_d,
+                                    warpedPos_d,
+                                    newWarpedPos_d,
+                                    blockMatchingParams->activeBlockNumber * 3,
+                                    12,
+                                    num_to_keep,
+                                    ils,
+                                    affine);
+         } else {
+             blockMatchingParams = con->GetBlockMatchingParams();
+             optimize(blockMatchingParams, transformationMatrix, affine);
+         }
+     }
+ #else
+     blockMatchingParams = con->GetBlockMatchingParams();
+     optimize(blockMatchingParams, transformationMatrix, affine);
+ #endif
+ */
+    blockMatchingParams = con->GetBlockMatchingParams();
+    optimize(blockMatchingParams, transformationMatrix, affine);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaOptimiseKernel.h
index 29d31b92..62356876 100644
--- a/reg-lib/cuda/CudaOptimiseKernel.h
+++ b/reg-lib/cuda/CudaOptimiseKernel.h
@@ -3,11 +3,10 @@
 #include "OptimiseKernel.h"
 #include "CudaAladinContent.h"
 
-//kernel functions for numerical optimisation
-class CudaOptimiseKernel: public OptimiseKernel
-{
+// Kernel functions for numerical optimisation
+class CudaOptimiseKernel: public OptimiseKernel {
 public:
-    CudaOptimiseKernel(AladinContent *conIn, std::string name);
+    CudaOptimiseKernel(Content *conIn);
     void Calculate(bool affine);
 
 private:
@@ -21,5 +20,4 @@ class CudaOptimiseKernel: public OptimiseKernel
 //    float *VT_d; // Removed until CUDA SVD is added back
 //    float *lengths_d; // Removed until CUDA SVD is added back
 //    float *newWarpedPos_d; // Removed until CUDA SVD is added back
-
 };
diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp
index c9049cda..a6e81267 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.cpp
+++ b/reg-lib/cuda/CudaResampleImageKernel.cpp
@@ -2,10 +2,8 @@
 #include "resampleKernel.h"
 
 /* *************************************************************** */
-CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::string name) :
-        ResampleImageKernel(name)
-{
-    con = static_cast<CudaAladinContent*>(conIn);
+CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImageKernel() {
+    CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
     floatingImage = con->AladinContent::GetCurrentFloating();
     warpedImage = con->AladinContent::GetCurrentWarped();
@@ -31,20 +29,19 @@ CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::stri
 }
 /* *************************************************************** */
 void CudaResampleImageKernel::Calculate(int interp,
-                                                     float paddingValue,
-                                                     bool *dti_timepoint,
-                                                     mat33 * jacMat)
-{
-    launchResample(this->floatingImage,
-                        this->warpedImage,
-                        interp,
-                        paddingValue,
-                        dti_timepoint,
-                        jacMat,
-                        &this->floatingImageArray_d,
-                        &this->warpedImageArray_d,
-                        &this->deformationFieldImageArray_d,
-                        &this->mask_d,
-                        &this->floIJKMat_d);
+                                        float paddingValue,
+                                        bool *dti_timepoint,
+                                        mat33 * jacMat) {
+    launchResample(floatingImage,
+                   warpedImage,
+                   interp,
+                   paddingValue,
+                   dti_timepoint,
+                   jacMat,
+                   &floatingImageArray_d,
+                   &warpedImageArray_d,
+                   &deformationFieldImageArray_d,
+                   &mask_d,
+                   &floIJKMat_d);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h
index 9aa978f8..216ae432 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.h
+++ b/reg-lib/cuda/CudaResampleImageKernel.h
@@ -8,11 +8,12 @@
  * */
 class CudaResampleImageKernel: public ResampleImageKernel {
 public:
-    CudaResampleImageKernel(AladinContent *conIn, std::string name);
+    CudaResampleImageKernel(Content *conIn);
     void Calculate(int interp,
-                        float paddingValue,
-                        bool *dti_timepoint = nullptr,
-                        mat33 *jacMat = nullptr);
+                   float paddingValue,
+                   bool *dti_timepoint = nullptr,
+                   mat33 *jacMat = nullptr);
+
 private:
     nifti_image *floatingImage;
     nifti_image *warpedImage;
@@ -23,7 +24,4 @@ class CudaResampleImageKernel: public ResampleImageKernel {
     float* warpedImageArray_d;
     float* deformationFieldImageArray_d;
     int *mask_d;
-
-    //CudaContextSingleton *cudaSContext;
-    CudaAladinContent *con;
 };
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index 94a245e3..38fa95a0 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -27,25 +27,19 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image *, mat44 *, float *, float *, float *> test_data;
-typedef std::tuple<AladinContent *, std::string, int> content_desc;
+typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> test_data;
+typedef std::tuple<AladinContent*, std::string, int> content_desc;
 
 TEST_CASE("Affine deformation field", "[AffineDefField]") {
     // Create a reference 2D image
-    int dim[8]= {2, 2, 2, 1, 1, 1, 1, 1};
-    nifti_image *reference2D = nifti_make_new_nim(
-            dim,
-            NIFTI_TYPE_FLOAT32,
-            true);
+    int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1};
+    nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2D);
 
     // Create a reference 3D image
-    dim[0]= 3;
-    dim[3]= 2;
-    nifti_image *reference3D = nifti_make_new_nim(
-            dim,
-            NIFTI_TYPE_FLOAT32,
-            true);
+    dim[0] = 3;
+    dim[3] = 2;
+    nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference3D);
 
     // Generate the different use cases
@@ -58,12 +52,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     float identity_result_2x[4] = {0, 1, 0, 1};
     float identity_result_2y[4] = {0, 0, 1, 1};
     test_use_cases.emplace_back(test_data(
-            "identity 2D",
-            reference2D,
-            identity,
-            identity_result_2x,
-            identity_result_2y,
-            nullptr)
+        "identity 2D",
+        reference2D,
+        identity,
+        identity_result_2x,
+        identity_result_2y,
+        nullptr)
     );
     // Identity use case - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
@@ -71,12 +65,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1};
     float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1};
     test_use_cases.emplace_back(test_data(
-            "identity 3D",
-            reference3D,
-            identity,
-            identity_result_3x,
-            identity_result_3y,
-            identity_result_3z)
+        "identity 3D",
+        reference3D,
+        identity,
+        identity_result_3x,
+        identity_result_3y,
+        identity_result_3z)
     );
 
     // Translation - 2D
@@ -89,12 +83,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     float translation_result_2x[4] = {-0.5, .5, -0.5, .5};
     float translation_result_2y[4] = {1.5, 1.5, 2.5, 2.5};
     test_use_cases.emplace_back(test_data(
-            "translation 2D",
-            reference2D,
-            translation,
-            translation_result_2x,
-            translation_result_2y,
-            nullptr)
+        "translation 2D",
+        reference2D,
+        translation,
+        translation_result_2x,
+        translation_result_2y,
+        nullptr)
     );
 
     // Translation - 3D
@@ -103,12 +97,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5};
     float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75};
     test_use_cases.emplace_back(test_data(
-            "translation 3D",
-            reference3D,
-            translation,
-            translation_result_3x,
-            translation_result_3y,
-            translation_result_3z)
+        "translation 3D",
+        reference3D,
+        translation,
+        translation_result_3x,
+        translation_result_3y,
+        translation_result_3z)
     );
 
 
@@ -119,56 +113,52 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     affine->m[0][3] = -0.5;
     affine->m[1][3] = 1.5;
     affine->m[2][3] = 0.75;
-    for (auto i=0; i<4; ++i){
-        for (auto j=0; j<4; ++j){
-            affine->m[i][j] +=  static_cast<float>((((float) rand() / (RAND_MAX))-.5)/10.);
+    for (auto i = 0; i < 4; ++i) {
+        for (auto j = 0; j < 4; ++j) {
+            affine->m[i][j] += static_cast<float>((((float)rand() / (RAND_MAX)) - .5) / 10.);
         }
     }
     float affine_result_2x[4];
     float affine_result_2y[4];
-    for (auto i=0; i<4;++i){
+    for (auto i = 0; i < 4; ++i) {
         auto x = identity_result_2x[i];
         auto y = identity_result_2y[i];
-        affine_result_2x[i] = affine->m[0][3] + affine->m[0][0]*x + affine->m[0][1]*y;
-        affine_result_2y[i] = affine->m[1][3] + affine->m[1][0]*x + affine->m[1][1]*y;
+        affine_result_2x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y;
+        affine_result_2y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y;
 
     }
     test_use_cases.emplace_back(test_data(
-            "full affine 2D",
-            reference2D,
-            affine,
-            affine_result_2x,
-            affine_result_2y,
-            nullptr)
+        "full affine 2D",
+        reference2D,
+        affine,
+        affine_result_2x,
+        affine_result_2y,
+        nullptr)
     );
     // Full affine - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
     float affine_result_3x[8];
     float affine_result_3y[8];
     float affine_result_3z[8];
-    for (auto i=0; i<8;++i){
+    for (auto i = 0; i < 8; ++i) {
         auto x = identity_result_3x[i];
         auto y = identity_result_3y[i];
         auto z = identity_result_3z[i];
-        affine_result_3x[i] = affine->m[0][3] +
-            affine->m[0][0]*x + affine->m[0][1]*y + affine->m[0][2]*z;
-        affine_result_3y[i] = affine->m[1][3] +
-            affine->m[1][0]*x + affine->m[1][1]*y + affine->m[1][2]*z;
-        affine_result_3z[i] = affine->m[2][3] +
-            affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z;
+        affine_result_3x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y + affine->m[0][2] * z;
+        affine_result_3y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y + affine->m[1][2] * z;
+        affine_result_3z[i] = affine->m[2][3] + affine->m[2][0] * x + affine->m[2][1] * y + affine->m[2][2] * z;
     }
     test_use_cases.emplace_back(test_data(
-            "affine 3D",
-            reference3D,
-            affine,
-            affine_result_3x,
-            affine_result_3y,
-            affine_result_3z)
+        "affine 3D",
+        reference3D,
+        affine,
+        affine_result_3x,
+        affine_result_3y,
+        affine_result_3z)
     );
 
     // Loop over all generated test cases to create all content and run all tests
-    for(auto && test_use_case: test_use_cases) {
-
+    for (auto&& test_use_case : test_use_cases) {
         // Retrieve test information
         std::string test_name;
         nifti_image *reference;
@@ -176,78 +166,63 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         float *test_res_x;
         float *test_res_y;
         float *test_res_z;
-        std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) =
-            test_use_case;
+        std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case;
 
         // Accumate all required contents with a vector
         std::vector<content_desc> listContent;
         listContent.push_back(content_desc(
-                new AladinContent(
-                        reference,
-                        nullptr,
-                        nullptr,
-                        test_mat,
-                        sizeof(float)),
-                "CPU",
-                0));
+            new AladinContent(
+                reference,
+                reference,
+                nullptr,
+                test_mat,
+                sizeof(float)),
+            "CPU",
+            0));
 #ifdef _USE_CUDA
         listContent.push_back(content_desc(
-                new CudaAladinContent(
-                        reference,
-                        nullptr,
-                        nullptr,
-                        test_mat,
-                        sizeof(float)),
-                "CUDA",
-                1));
+            new CudaAladinContent(
+                reference,
+                reference,
+                nullptr,
+                test_mat,
+                sizeof(float)),
+            "CUDA",
+            1));
 #endif
 #ifdef _USE_OPENCL
         listContent.push_back(content_desc(
-                new ClAladinContent(
-                        reference,
-                        nullptr,
-                        nullptr,
-                        test_mat,
-                        sizeof(float)),
-                "OpenCL",
-                2));
+            new ClAladinContent(
+                reference,
+                reference,
+                nullptr,
+                test_mat,
+                sizeof(float)),
+            "OpenCL",
+            2));
 #endif
         // Loop over all possibles contents for each test
-        for (auto &&content: listContent) {
-
+        for (auto &&content : listContent) {
             AladinContent *con;
             std::string desc;
             int plat_value;
             std::tie(con, desc, plat_value) = content;
-            SECTION(test_name + " " + desc){
+            SECTION(test_name + " " + desc) {
                 // Initialise the platform to run current content and retrieve deformation field
                 auto *platform = new Platform(plat_value);
-                Kernel *affineDeformKernel = platform->CreateKernel(
-                        AffineDeformationFieldKernel::GetName(),
-                        con);
+                Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-                nifti_image *defField =
-                        con->GetCurrentDeformationField();
+                nifti_image *defField = con->GetCurrentDeformationField();
 
                 // Check all values
                 auto *defFieldPtrX = static_cast<float *>(defField->data);
-                auto *defFieldPtrY = &defFieldPtrX[defField->nx *
-                                                    defField->ny *
-                                                    defField->nz];
-                auto *defFieldPtrZ = &defFieldPtrY[defField->nx *
-                                                    defField->ny *
-                                                    defField->nz];
-                for (int i = 0; i < defField->nx*defField->ny*defField->nz; ++i) {
-                    REQUIRE(fabs(
-                            defFieldPtrX[i] - test_res_x[i]) <
-                            EPS_SINGLE);
-                    REQUIRE(fabs(
-                            defFieldPtrY[i] - test_res_y[i]) <
-                            EPS_SINGLE);
-                    if(test_res_z != nullptr){
-                        REQUIRE(fabs(
-                                defFieldPtrZ[i] - test_res_z[i]) <
-                                EPS_SINGLE);
+                auto *defFieldPtrY = &defFieldPtrX[defField->nx * defField->ny * defField->nz];
+                auto *defFieldPtrZ = &defFieldPtrY[defField->nx * defField->ny * defField->nz];
+                for (int i = 0; i < defField->nx * defField->ny * defField->nz; ++i) {
+                    REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE);
+                    REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE);
+                    if (test_res_z != nullptr) {
+                        REQUIRE(fabs(defFieldPtrZ[i] - test_res_z[i]) < EPS_SINGLE);
                     }
                 }
                 delete affineDeformKernel;
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 37dee12f..ea16dbd1 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
     AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     con_cpu->SetCurrentWarped(cpu_warped);
     con_cpu->SetCurrentDeformationField(inputDeformationField);
-    con_cpu->SetCurrentReferenceMask(tempMask, cpu_warped->nvox);
+    con_cpu->SetCurrentReferenceMask(tempMask);
     Platform *platform_cpu = new Platform(NR_PLATFORM_CPU);
     Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu);
     resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
@@ -102,7 +102,7 @@ int main(int argc, char **argv)
 #endif
     con_gpu->SetCurrentWarped(gpu_warped);
     con_gpu->SetCurrentDeformationField(inputDeformationField);
-    con_gpu->SetCurrentReferenceMask(tempMask, gpu_warped->nvox);
+    con_gpu->SetCurrentReferenceMask(tempMask);
     Platform *platform_gpu = nullptr;
 #ifdef _USE_CUDA
     if (platformCode == NR_PLATFORM_CUDA)
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index d448176d..f75c4a81 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -111,33 +111,21 @@ TEST_CASE("Resampling", "[resampling]") {
         float *test_res;
         std::tie(test_name, reference, def_field, test_res) = test_use_case;
 
-        // Accumate all required contents with a vector
+        // Accumulate all required contents with a vector
         std::vector<content_desc> listContent;
         listContent.push_back(content_desc(
-            new AladinContent(
-                reference,
-                reference,
-                nullptr,
-                sizeof(float)),
+            new AladinContent(reference, reference),
             "CPU",
             NR_PLATFORM_CPU));
 #ifdef _USE_CUDA
         listContent.push_back(content_desc(
-            new CudaAladinContent(
-                reference,
-                reference,
-                nullptr,
-                sizeof(float)),
+            new CudaAladinContent(reference, reference),
             "CUDA",
             NR_PLATFORM_CUDA));
 #endif
 #ifdef _USE_OPENCL
         listContent.push_back(content_desc(
-            new ClAladinContent(
-                reference,
-                reference,
-                nullptr,
-                sizeof(float)),
+            new ClAladinContent(reference, reference),
             "OpenCL",
             NR_PLATFORM_CL));
 #endif
@@ -157,7 +145,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 con->SetCurrentDeformationField(def_field);
                 // Set an empty mask to consider all voxels
                 int *tempMask = (int*)calloc(reference->nvox, sizeof(int));
-                con->SetCurrentReferenceMask(tempMask, warped->nvox);
+                con->SetCurrentReferenceMask(tempMask);
                 // Initialise the platform to run current content and retrieve deformation field
                 auto *platform = new Platform(plat_value);
                 Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con);

From f4c1da1618c0dd4af8234820daa8418264652e74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:09:55 +0000
Subject: [PATCH 019/314] Several refactorisations

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_aladin.cpp                       |   2 +-
 reg-apps/reg_average.cpp                      |   6 +-
 reg-apps/reg_resample.cpp                     |   2 +-
 reg-apps/reg_transform.cpp                    |   4 +-
 reg-lib/AladinContent.cpp                     |  12 +-
 reg-lib/AladinContent.h                       |   6 +-
 reg-lib/Content.cpp                           | 116 +++----
 reg-lib/Content.h                             |  71 ++--
 reg-lib/KernelFactory.h                       |   2 +-
 reg-lib/Platform.cpp                          |  18 +-
 reg-lib/_reg_aladin.cpp                       |  60 ++--
 reg-lib/_reg_aladin.h                         |   6 +-
 reg-lib/_reg_aladin_sym.cpp                   |  38 +--
 reg-lib/_reg_aladin_sym.h                     |   6 +-
 reg-lib/_reg_f3d2.cpp                         |  14 +-
 reg-lib/_reg_f3d_sym.cpp                      | 238 ++++++-------
 reg-lib/_reg_f3d_sym.h                        |  14 +-
 reg-lib/_reg_polyAffine.cpp                   |   2 +-
 reg-lib/_reg_polyAffine.h                     |   2 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |   7 +-
 reg-lib/cl/ClAladinContent.cpp                | 121 ++++---
 reg-lib/cl/ClAladinContent.h                  |  16 +-
 reg-lib/cl/ClBlockMatchingKernel.cpp          |   2 +-
 reg-lib/cl/ClKernelFactory.cpp                |   2 +-
 reg-lib/cl/ClKernelFactory.h                  |   2 +-
 reg-lib/cl/ClResampleImageKernel.cpp          |  18 +-
 reg-lib/cl/ClResampleImageKernel.h            |   6 +-
 reg-lib/cl/blockMatchingKernel.cl             |  20 +-
 .../cpu/CpuAffineDeformationFieldKernel.cpp   |   4 +-
 reg-lib/cpu/CpuBlockMatchingKernel.cpp        |   6 +-
 reg-lib/cpu/CpuKernelFactory.cpp              |   2 +-
 reg-lib/cpu/CpuKernelFactory.h                |   2 +-
 reg-lib/cpu/CpuResampleImageKernel.cpp        |   8 +-
 reg-lib/cpu/_reg_dti.cpp                      |   6 +-
 reg-lib/cpu/_reg_dti.h                        |   2 +-
 reg-lib/cpu/_reg_globalTrans.cpp              |   8 +-
 reg-lib/cpu/_reg_kld.cpp                      |   4 +-
 reg-lib/cpu/_reg_lncc.cpp                     |   4 +-
 reg-lib/cpu/_reg_lncc.h                       |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp               |   8 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp         |   2 +-
 reg-lib/cpu/_reg_localTrans_regul.h           |   2 +-
 reg-lib/cpu/_reg_maths_eigen.cpp              |   2 +-
 reg-lib/cpu/_reg_nmi.cpp                      |  54 ++-
 reg-lib/cpu/_reg_nmi.h                        |   6 +-
 reg-lib/cpu/_reg_optimiser.cpp                |   4 +-
 reg-lib/cpu/_reg_polyAffine.cpp               |   2 +-
 reg-lib/cpu/_reg_polyAffine.h                 |   2 +-
 reg-lib/cpu/_reg_resampling.cpp               |  86 ++---
 reg-lib/cpu/_reg_resampling.h                 |   4 +-
 reg-lib/cpu/_reg_splineBasis.cpp              |   2 +-
 reg-lib/cpu/_reg_ssd.cpp                      |   4 +-
 reg-lib/cpu/_reg_tools.cpp                    |  12 +-
 reg-lib/cpu/_reg_tools.h                      |   2 +-
 .../cuda/CudaAffineDeformationFieldKernel.cpp |   2 +-
 reg-lib/cuda/CudaAladinContent.cpp            | 175 +++++-----
 reg-lib/cuda/CudaAladinContent.h              |  19 +-
 reg-lib/cuda/CudaBlockMatchingKernel.cpp      |   2 +-
 reg-lib/cuda/CudaKernelFactory.cpp            |   2 +-
 reg-lib/cuda/CudaKernelFactory.h              |   2 +-
 reg-lib/cuda/CudaResampleImageKernel.cpp      |   4 +-
 reg-lib/cuda/_reg_blocksize_gpu.cu            |  11 +-
 reg-lib/cuda/_reg_blocksize_gpu.h             | 202 +++++------
 reg-lib/cuda/_reg_common_cuda.cu              |   4 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp                 | 315 +++++++++---------
 reg-lib/cuda/_reg_f3d_gpu.h                   |  22 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   6 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |   2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  | 162 +++++----
 reg-lib/cuda/_reg_localTransformation_gpu.h   |  59 ++--
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  52 +--
 reg-lib/cuda/_reg_nmi_gpu.h                   |  14 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  79 +++--
 reg-lib/cuda/_reg_optimiser_gpu.h             | 143 ++++----
 reg-lib/cuda/_reg_optimiser_kernels.cu        |   8 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           |  36 +-
 reg-lib/cuda/_reg_resampling_gpu.h            |  14 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  41 ++-
 reg-lib/cuda/_reg_ssd_gpu.h                   |  13 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |  68 ++--
 reg-lib/cuda/_reg_tools_gpu.h                 |  77 ++---
 reg-lib/cuda/affineDeformationKernel.cu       |   2 +-
 reg-lib/cuda/blockMatchingKernel.cu           |  38 +--
 .../reg_test_affine_deformation_field.cpp     |   2 +-
 reg-test/reg_test_blockMatching.cpp           |   4 +-
 ...est_coherence_affine_deformation_field.cpp |   6 +-
 reg-test/reg_test_coherence_blockMatching.cpp |   4 +-
 reg-test/reg_test_coherence_interpolation.cpp |  16 +-
 reg-test/reg_test_interpolation.cpp           |   8 +-
 90 files changed, 1298 insertions(+), 1371 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0a3e7b04..c75acbe2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-126
+127
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 9b6d8984..02022454 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
    bool iso=false;
    bool verbose=true;
    int captureRangeVox = 3;
-   unsigned int platformFlag = NR_PLATFORM_CPU;
+   int platformFlag = NR_PLATFORM_CPU;
    unsigned gpuIdx = 999;
 
 #if defined (_OPENMP)
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index e4b88244..a74076c4 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -492,7 +492,7 @@ int compute_average_image(nifti_image *averageImage,
       remove_nan_and_add(averageImage, warpedImage, definedValue);
       nifti_image_free(warpedImage);
    }
-   // Clear the allocated demeanField if needed
+   // Deallocate the allocated demeanField if needed
    if(demeanField!=nullptr) nifti_image_free(demeanField);
    // Normalised the average image
    reg_tools_divideImageToImage(averageImage,definedValue, averageImage);
@@ -702,7 +702,7 @@ int main(int argc, char **argv)
    if(operation!=AVG_INPUT || trans_is_affine==false){
       input_image_names = (char **)malloc(image_number*sizeof(char *));
    }
-   if((operation==AVG_INPUT && trans_is_affine==true) || trans_is_affine || operation==AVG_IMG_TRANS_NOAFF){
+   if((operation==AVG_INPUT && trans_is_affine) || trans_is_affine || operation==AVG_IMG_TRANS_NOAFF){
       input_affine_names = (char **)malloc(image_number*sizeof(char *));
    }
    if((operation==AVG_IMG_TRANS && trans_is_affine==false) || operation==AVG_IMG_TRANS_NOAFF){
@@ -746,7 +746,7 @@ int main(int argc, char **argv)
    nifti_image *avg_output_image=nullptr;
 
    // Go over the different operations
-   if(operation==AVG_INPUT && trans_is_affine==true){
+   if(operation==AVG_INPUT && trans_is_affine){
       // compute the average matrix from the input provided
       avg_output_matrix = compute_average_matrices(image_number, input_affine_names);
    }
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index c5bd8772..ac6b3840 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -455,7 +455,7 @@ int main(int argc, char **argv)
             warpedImage->dim[3] * warpedImage->dim[4] * warpedImage->dim[5];
       warpedImage->data = (void *)calloc(warpedImage->nvox, warpedImage->nbyper);
 
-      if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor==true)
+      if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor)
       {
 #ifndef NDEBUG
          reg_print_msg_debug("DTI-based resampling\n");
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index cdddf4ab..0388e0cc 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -669,7 +669,7 @@ int main(int argc, char **argv)
             }
          }
          // Read the second reference image if specified
-         if(flag->referenceImage2Flag==true)
+         if(flag->referenceImage2Flag)
          {
             referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name);
             if(referenceImage2==nullptr)
@@ -1244,7 +1244,7 @@ int main(int argc, char **argv)
          // Save the image
          reg_io_WriteImageFile(inputTransImage,param->outputTransName);
       }
-      // Clear the allocated arrays
+      // Deallocate the allocated arrays
       if(affineTrans!=nullptr) free(affineTrans);
    }
    /* ******************************************** */
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index cfc0fe45..84070fed 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -3,26 +3,26 @@
 using namespace std;
 
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *currentReferenceIn,
-                             nifti_image *currentFloatingIn,
-                             int *currentReferenceMaskIn,
+AladinContent::AladinContent(nifti_image *referenceIn,
+                             nifti_image *floatingIn,
+                             int *referenceMaskIn,
                              mat44 *transformationMatrixIn,
                              size_t bytesIn,
                              const unsigned int currentPercentageOfBlockToUseIn,
                              const unsigned int inlierLtsIn,
                              int stepSizeBlockIn) :
-    Content(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, transformationMatrixIn, bytesIn),
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
     inlierLts(inlierLtsIn),
     stepSizeBlock(stepSizeBlockIn) {
     if (currentPercentageOfBlockToUseIn || inlierLtsIn || stepSizeBlockIn) {
         blockMatchingParams = new _reg_blockMatchingParam();
-        initialise_block_matching_method(currentReference,
+        initialise_block_matching_method(reference,
                                          blockMatchingParams,
                                          currentPercentageOfBlockToUse,
                                          inlierLts,
                                          stepSizeBlock,
-                                         currentReferenceMask,
+                                         referenceMask,
                                          false);
     } else {
         blockMatchingParams = nullptr;
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 21b407f6..51a9acb9 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -11,9 +11,9 @@
 
 class AladinContent: public Content {
 public:
-    AladinContent(nifti_image *currentReferenceIn,
-                  nifti_image *currentFloatingIn,
-                  int *currentReferenceMaskIn = nullptr,
+    AladinContent(nifti_image *referenceIn,
+                  nifti_image *floatingIn,
+                  int *referenceMaskIn = nullptr,
                   mat44 *transformationMatrixIn = nullptr,
                   size_t bytesIn = sizeof(float),
                   const unsigned int percentageOfBlocks = 0,
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index bf426b99..b88897df 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -1,89 +1,91 @@
 #include "Content.h"
-#include "_reg_maths.h"
 
 /* *************************************************************** */
-Content::Content(nifti_image *currentReferenceIn,
-                 nifti_image *currentFloatingIn,
-                 int *currentReferenceMaskIn,
+Content::Content(nifti_image *referenceIn,
+                 nifti_image *floatingIn,
+                 int *referenceMaskIn,
                  mat44 *transformationMatrixIn,
                  size_t bytesIn) :
-    currentReference(currentReferenceIn),
-    currentFloating(currentFloatingIn),
-    currentReferenceMask(currentReferenceMaskIn),
+    reference(referenceIn),
+    floating(floatingIn),
+    referenceMask(referenceMaskIn),
     transformationMatrix(transformationMatrixIn) {
-    if (!currentReferenceIn || !currentFloatingIn) {
+    if (!referenceIn || !floatingIn) {
         reg_print_fct_error("Content::Content()");
-        reg_print_msg_error("currentReferenceIn or currentFloatingIn can't be nullptr");
+        reg_print_msg_error("referenceIn or floatingIn can't be nullptr");
         reg_exit();
     }
-    AllocateWarpedImage();
+    AllocateWarped();
     AllocateDeformationField(bytesIn);
-    if (currentReferenceMask == nullptr)
-        currentReferenceMask = (int*)calloc(currentReference->nvox, sizeof(int));
+    if (!referenceMask)
+        referenceMask = (int*)calloc(reference->nvox, sizeof(int));
 }
 /* *************************************************************** */
 Content::~Content() {
-    ClearWarpedImage();
-    ClearDeformationField();
+    DeallocateWarped();
+    DeallocateDeformationField();
+    // free(referenceMask); // TODO Fix this with smart pointers
 }
 /* *************************************************************** */
-void Content::AllocateWarpedImage() {
-    currentWarped = nifti_copy_nim_info(currentReference);
-    currentWarped->dim[0] = currentWarped->ndim = currentFloating->ndim;
-    currentWarped->dim[4] = currentWarped->nt = currentFloating->nt;
-    currentWarped->pixdim[4] = currentWarped->dt = 1.0;
-    currentWarped->nvox = (size_t)(currentWarped->nx * currentWarped->ny * currentWarped->nz * currentWarped->nt);
-    currentWarped->datatype = currentFloating->datatype;
-    currentWarped->nbyper = currentFloating->nbyper;
-    currentWarped->data = (void*)calloc(currentWarped->nvox, currentWarped->nbyper);
+void Content::AllocateWarped() {
+    warped = nifti_copy_nim_info(reference);
+    warped->dim[0] = warped->ndim = floating->ndim;
+    warped->dim[4] = warped->nt = floating->nt;
+    warped->pixdim[4] = warped->dt = 1.0;
+    warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt);
+    warped->datatype = floating->datatype;
+    warped->nbyper = floating->nbyper;
+    warped->data = (void*)calloc(warped->nvox, warped->nbyper);
 }
 /* *************************************************************** */
-void Content::ClearWarpedImage() {
-    if (currentWarped)
-        nifti_image_free(currentWarped);
-    currentWarped = nullptr;
+void Content::DeallocateWarped() {
+    if (warped) {
+        nifti_image_free(warped);
+        warped = nullptr;
+    }
 }
 /* *************************************************************** */
 void Content::AllocateDeformationField(size_t bytes) {
-    currentDeformationField = nifti_copy_nim_info(currentReference);
-    currentDeformationField->dim[0] = currentDeformationField->ndim = 5;
-    if (currentReference->dim[0] == 2)
-        currentDeformationField->dim[3] = currentDeformationField->nz = 1;
-    currentDeformationField->dim[4] = currentDeformationField->nt = 1;
-    currentDeformationField->pixdim[4] = currentDeformationField->dt = 1;
-    if (currentReference->nz == 1)
-        currentDeformationField->dim[5] = currentDeformationField->nu = 2;
+    deformationField = nifti_copy_nim_info(reference);
+    deformationField->dim[0] = deformationField->ndim = 5;
+    if (reference->dim[0] == 2)
+        deformationField->dim[3] = deformationField->nz = 1;
+    deformationField->dim[4] = deformationField->nt = 1;
+    deformationField->pixdim[4] = deformationField->dt = 1;
+    if (reference->nz == 1)
+        deformationField->dim[5] = deformationField->nu = 2;
     else
-        currentDeformationField->dim[5] = currentDeformationField->nu = 3;
-    currentDeformationField->pixdim[5] = currentDeformationField->du = 1;
-    currentDeformationField->dim[6] = currentDeformationField->nv = 1;
-    currentDeformationField->pixdim[6] = currentDeformationField->dv = 1;
-    currentDeformationField->dim[7] = currentDeformationField->nw = 1;
-    currentDeformationField->pixdim[7] = currentDeformationField->dw = 1;
-    currentDeformationField->nvox = (size_t)(currentDeformationField->nx * currentDeformationField->ny * currentDeformationField->nz *
-                                             currentDeformationField->nt * currentDeformationField->nu);
-    currentDeformationField->nbyper = (int)bytes;
+        deformationField->dim[5] = deformationField->nu = 3;
+    deformationField->pixdim[5] = deformationField->du = 1;
+    deformationField->dim[6] = deformationField->nv = 1;
+    deformationField->pixdim[6] = deformationField->dv = 1;
+    deformationField->dim[7] = deformationField->nw = 1;
+    deformationField->pixdim[7] = deformationField->dw = 1;
+    deformationField->nvox = (size_t)(deformationField->nx * deformationField->ny * deformationField->nz *
+                                             deformationField->nt * deformationField->nu);
+    deformationField->nbyper = (int)bytes;
     if (bytes == 4)
-        currentDeformationField->datatype = NIFTI_TYPE_FLOAT32;
+        deformationField->datatype = NIFTI_TYPE_FLOAT32;
     else if (bytes == 8)
-        currentDeformationField->datatype = NIFTI_TYPE_FLOAT64;
+        deformationField->datatype = NIFTI_TYPE_FLOAT64;
     else {
         reg_print_fct_error("Content::AllocateDeformationField()");
         reg_print_msg_error("Only float or double are expected for the deformation field");
         reg_exit();
     }
-    currentDeformationField->intent_code = NIFTI_INTENT_VECTOR;
-    memset(currentDeformationField->intent_name, 0, sizeof(currentDeformationField->intent_name));
-    strcpy(currentDeformationField->intent_name, "NREG_TRANS");
-    currentDeformationField->intent_p1 = DEF_FIELD;
-    currentDeformationField->scl_slope = 1;
-    currentDeformationField->scl_inter = 0;
-    currentDeformationField->data = (void*)calloc(currentDeformationField->nvox, currentDeformationField->nbyper);
+    deformationField->intent_code = NIFTI_INTENT_VECTOR;
+    memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
+    strcpy(deformationField->intent_name, "NREG_TRANS");
+    deformationField->intent_p1 = DEF_FIELD;
+    deformationField->scl_slope = 1;
+    deformationField->scl_inter = 0;
+    deformationField->data = (void*)calloc(deformationField->nvox, deformationField->nbyper);
 }
 /* *************************************************************** */
-void Content::ClearDeformationField() {
-    if (currentDeformationField)
-        nifti_image_free(currentDeformationField);
-    currentDeformationField = nullptr;
+void Content::DeallocateDeformationField() {
+    if (deformationField) {
+        nifti_image_free(deformationField);
+        deformationField = nullptr;
+    }
 }
 /* *************************************************************** */
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 4530acd9..506820c7 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -1,61 +1,60 @@
 #pragma once
 
-#include "nifti1_io.h"
+#include "_reg_maths.h"
 
 class Content {
 public:
     Content() = delete; // Can't be initialised without reference and floating images
-    Content(nifti_image *currentReferenceIn,
-            nifti_image *currentFloatingIn,
-            int *currentReferenceMaskIn = nullptr,
+    Content(nifti_image *referenceIn,
+            nifti_image *floatingIn,
+            int *referenceMaskIn = nullptr,
             mat44 *transformationMatrixIn = nullptr,
             size_t bytesIn = sizeof(float));
     virtual ~Content();
 
+    virtual bool IsCurrentComputationDoubleCapable() { return true; }
+
     // Getters
-    virtual nifti_image* GetCurrentDeformationField() { return currentDeformationField; }
-    virtual nifti_image* GetCurrentReference() { return currentReference; }
-    virtual nifti_image* GetCurrentFloating() { return currentFloating; }
-    virtual nifti_image* GetCurrentWarped(int = 0) { return currentWarped; }
-    virtual int* GetCurrentReferenceMask() { return currentReferenceMask; }
+    virtual nifti_image* GetReference() { return reference; }
+    virtual nifti_image* GetFloating() { return floating; }
+    virtual nifti_image* GetDeformationField() { return deformationField; }
+    virtual int* GetReferenceMask() { return referenceMask; }
     virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
+    virtual nifti_image* GetWarped(int datatype = 0, int index = 0) { return warped; }
 
     // Setters
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
-        transformationMatrix = transformationMatrixIn;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) {
+        deformationField = deformationFieldIn;
     }
-    virtual void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
-        ClearDeformationField();
-        currentDeformationField = currentDeformationFieldIn;
+    virtual void SetReferenceMask(int *referenceMaskIn) {
+        referenceMask = referenceMaskIn;
     }
-    virtual void SetCurrentWarped(nifti_image *currentWarpedImageIn) {
-        ClearWarpedImage();
-        currentWarped = currentWarpedImageIn;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
+        transformationMatrix = transformationMatrixIn;
     }
-    virtual void SetCurrentReferenceMask(int *currentReferenceMaskIn) {
-        free(currentReferenceMask);
-        currentReferenceMask = currentReferenceMaskIn;
+    virtual void SetWarped(nifti_image *warpedIn) {
+        warped = warpedIn;
     }
 
-    virtual bool IsCurrentComputationDoubleCapable() { return true; }
-
-    static mat44* GetXYZMatrix(nifti_image *image) {
-        return image->sform_code > 0 ? &image->sto_xyz : &image->qto_xyz;
+    // Auxiliary methods
+    static mat44* GetXYZMatrix(nifti_image& image) {
+        return image.sform_code > 0 ? &image.sto_xyz : &image.qto_xyz;
     }
-    static mat44* GetIJKMatrix(nifti_image *image) {
-        return image->sform_code > 0 ? &image->sto_ijk : &image->qto_ijk;
+    static mat44* GetIJKMatrix(nifti_image& image) {
+        return image.sform_code > 0 ? &image.sto_ijk : &image.qto_ijk;
     }
 
 protected:
-    virtual void AllocateWarpedImage();
-    virtual void ClearWarpedImage();
-    virtual void AllocateDeformationField(size_t bytes);
-    virtual void ClearDeformationField();
-
-    nifti_image *currentReference;
-    nifti_image *currentFloating;
-    int *currentReferenceMask;
-    nifti_image *currentDeformationField;
-    nifti_image *currentWarped;
+    nifti_image *reference;
+    nifti_image *floating;
+    nifti_image *deformationField;
+    int *referenceMask;
     mat44 *transformationMatrix;
+    nifti_image *warped;
+
+private:
+    void AllocateWarped();
+    void DeallocateWarped();
+    void AllocateDeformationField(size_t bytes);
+    void DeallocateDeformationField();
 };
diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h
index c5348c9e..613ace44 100755
--- a/reg-lib/KernelFactory.h
+++ b/reg-lib/KernelFactory.h
@@ -5,6 +5,6 @@
 
 class KernelFactory {
 public:
-    virtual Kernel* ProduceKernel(std::string name, Content *con) const = 0;
+    virtual Kernel* Produce(std::string name, Content *con) const = 0;
     virtual ~KernelFactory() {}
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index ebc7bdcb..a46cb0fc 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -39,31 +39,31 @@ Kernel* Platform::CreateKernel(const string& name, Content *con) const {
 }
 /* *************************************************************** */
 std::string Platform::GetName() {
-    return this->platformName;
+    return platformName;
 }
 /* *************************************************************** */
 unsigned Platform::GetGpuIdx() {
-    return this->gpuIdx;
+    return gpuIdx;
 }
 /* *************************************************************** */
 void Platform::SetGpuIdx(unsigned gpuIdxIn) {
-    if (this->platformCode == NR_PLATFORM_CPU) {
-        this->gpuIdx = 999;
+    if (platformCode == NR_PLATFORM_CPU) {
+        gpuIdx = 999;
     }
 #ifdef _USE_CUDA
-    else if (this->platformCode == NR_PLATFORM_CUDA) {
+    else if (platformCode == NR_PLATFORM_CUDA) {
         CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
         if (gpuIdxIn != 999) {
-            this->gpuIdx = gpuIdxIn;
+            gpuIdx = gpuIdxIn;
             cudaContext->SetCudaIdx(gpuIdxIn);
         }
     }
 #endif
 #ifdef _USE_OPENCL
-    else if (this->platformCode == NR_PLATFORM_CL) {
+    else if (platformCode == NR_PLATFORM_CL) {
         ClContextSingleton *sContext = &ClContextSingleton::Instance();
         if (gpuIdxIn != 999) {
-            this->gpuIdx = gpuIdxIn;
+            gpuIdx = gpuIdxIn;
             sContext->SetClIdx(gpuIdxIn);
         }
 
@@ -81,7 +81,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
 }
 /* *************************************************************** */
 int Platform::GetPlatformCode() {
-    return this->platformCode;
+    return platformCode;
 }
 /* *************************************************************** */
 //void Platform::SetPlatformCode(const int platformCodeIn) {
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 7b3599b4..49a8f011 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -408,7 +408,7 @@ void reg_aladin<T>::InitialiseRegistration() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::ClearCurrentInputImage() {
+void reg_aladin<T>::DeallocateCurrentInputImage() {
     nifti_image_free(this->referencePyramid[this->currentLevel]);
     this->referencePyramid[this->currentLevel] = nullptr;
 
@@ -433,7 +433,7 @@ void reg_aladin<T>::CreateKernels() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::ClearKernels() {
+void reg_aladin<T>::DeallocateKernels() {
     delete this->affineTransformation3DKernel;
     delete this->resamplingKernel;
     if (this->blockMatchingKernel != nullptr)
@@ -486,7 +486,7 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::ClearAladinContent() {
+void reg_aladin<T>::DeinitAladinContent() {
     delete this->con;
 }
 /* *************************************************************** */
@@ -532,14 +532,14 @@ void reg_aladin<T>::Run() {
 #endif
 
 #ifndef NDEBUG
-        if (this->con->GetCurrentReference()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetCurrentReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
+        if (this->con->GetReference()->sform_code > 0)
+            reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
         else
-            reg_mat44_disp(&this->con->GetCurrentReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
-        if (this->con->GetCurrentFloating()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetCurrentFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
+            reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
+        if (this->con->GetFloating()->sform_code > 0)
+            reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
         else
-            reg_mat44_disp(&this->con->GetCurrentFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
+            reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
 #endif
 
         /* ****************** */
@@ -557,9 +557,9 @@ void reg_aladin<T>::Run() {
             ResolveMatrix(maxNumberOfIterationToPerform, AFFINE);
 
         // SOME CLEANING IS PERFORMED
-        this->ClearKernels();
-        this->ClearAladinContent();
-        this->ClearCurrentInputImage();
+        this->DeallocateKernels();
+        this->DeinitAladinContent();
+        this->DeallocateCurrentInputImage();
 
 #ifdef NDEBUG
         if (this->verbose) {
@@ -599,19 +599,19 @@ nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
     reg_aladin<T>::CreateKernels();
 
     reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
-    nifti_image *currentWarped = this->con->GetCurrentWarped(floatingType);
+    nifti_image *warped = this->con->GetWarped(floatingType);
 
     free(mask);
-    nifti_image *resultImage = nifti_copy_nim_info(currentWarped);
+    nifti_image *resultImage = nifti_copy_nim_info(warped);
     resultImage->cal_min = this->inputFloating->cal_min;
     resultImage->cal_max = this->inputFloating->cal_max;
     resultImage->scl_slope = this->inputFloating->scl_slope;
     resultImage->scl_inter = this->inputFloating->scl_inter;
     resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper);
-    memcpy(resultImage->data, currentWarped->data, resultImage->nvox * resultImage->nbyper);
+    memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper);
 
-    reg_aladin<T>::ClearKernels();
-    reg_aladin<T>::ClearAladinContent();
+    reg_aladin<T>::DeallocateKernels();
+    reg_aladin<T>::DeinitAladinContent();
     return resultImage;
 }
 /* *************************************************************** */
@@ -622,22 +622,22 @@ void reg_aladin<T>::DebugPrintLevelInfoStart() {
     sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels);
     reg_print_info(this->executableName, text);
     sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetCurrentReference()->nx,
-            this->con->GetCurrentReference()->ny,
-            this->con->GetCurrentReference()->nz,
-            this->con->GetCurrentReference()->dx,
-            this->con->GetCurrentReference()->dy,
-            this->con->GetCurrentReference()->dz);
+            this->con->GetReference()->nx,
+            this->con->GetReference()->ny,
+            this->con->GetReference()->nz,
+            this->con->GetReference()->dx,
+            this->con->GetReference()->dy,
+            this->con->GetReference()->dz);
     reg_print_info(this->executableName, text);
     sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetCurrentFloating()->nx,
-            this->con->GetCurrentFloating()->ny,
-            this->con->GetCurrentFloating()->nz,
-            this->con->GetCurrentFloating()->dx,
-            this->con->GetCurrentFloating()->dy,
-            this->con->GetCurrentFloating()->dz);
+            this->con->GetFloating()->nx,
+            this->con->GetFloating()->ny,
+            this->con->GetFloating()->nz,
+            this->con->GetFloating()->dx,
+            this->con->GetFloating()->dy,
+            this->con->GetFloating()->dz);
     reg_print_info(this->executableName, text);
-    if (this->con->GetCurrentReference()->nz == 1) {
+    if (this->con->GetReference()->nz == 1) {
         reg_print_info(this->executableName, "Block size = [4 4 1]");
     } else reg_print_info(this->executableName, "Block size = [4 4 4]");
     reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 9995303f..016681cc 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -109,7 +109,7 @@ class reg_aladin {
     bool TestMatrixConvergence(mat44 *mat);
 
     virtual void InitialiseRegistration();
-    virtual void ClearCurrentInputImage();
+    virtual void DeallocateCurrentInputImage();
 
     virtual void GetDeformationField();
     virtual void GetWarpedImage(int, float padding);
@@ -127,9 +127,9 @@ class reg_aladin {
                                    unsigned int blockPercentage = 0,
                                    unsigned int inlierLts = 0,
                                    unsigned int blockStepSize = 0);
-    virtual void ClearAladinContent();
+    virtual void DeinitAladinContent();
     virtual void CreateKernels();
-    virtual void ClearKernels();
+    virtual void DeallocateKernels();
 
 public:
     reg_aladin();
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 32857cd9..d2164a58 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -289,9 +289,9 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::ClearCurrentInputImage()
+void reg_aladin_sym<T>::DeallocateCurrentInputImage()
 {
-   reg_aladin<T>::ClearCurrentInputImage();
+   reg_aladin<T>::DeallocateCurrentInputImage();
    if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr)
       free(this->FloatingMaskPyramid[this->currentLevel]);
    this->FloatingMaskPyramid[this->currentLevel]=nullptr;
@@ -308,16 +308,16 @@ void reg_aladin_sym<T>::CreateKernels()
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::ClearAladinContent()
+void reg_aladin_sym<T>::DeinitAladinContent()
 {
-  reg_aladin<T>::ClearAladinContent();
+  reg_aladin<T>::DeinitAladinContent();
   delete this->backCon;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::ClearKernels()
+void reg_aladin_sym<T>::DeallocateKernels()
 {
-  reg_aladin<T>::ClearKernels();
+  reg_aladin<T>::DeallocateKernels();
   delete this->bResamplingKernel;
   delete this->bAffineTransformation3DKernel;
   delete this->bBlockMatchingKernel;
@@ -331,22 +331,22 @@ void reg_aladin_sym<T>::DebugPrintLevelInfoStart()
    sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels);
    reg_print_info(this->executableName,text);
    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->GetCurrentReference()->nx,
-           this->con->GetCurrentReference()->ny,
-           this->con->GetCurrentReference()->nz,
-           this->con->GetCurrentReference()->dx,
-           this->con->GetCurrentReference()->dy,
-           this->con->GetCurrentReference()->dz);
+           this->con->GetReference()->nx,
+           this->con->GetReference()->ny,
+           this->con->GetReference()->nz,
+           this->con->GetReference()->dx,
+           this->con->GetReference()->dy,
+           this->con->GetReference()->dz);
    reg_print_info(this->executableName,text);
    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->GetCurrentFloating()->nx,
-           this->con->GetCurrentFloating()->ny,
-           this->con->GetCurrentFloating()->nz,
-           this->con->GetCurrentFloating()->dx,
-           this->con->GetCurrentFloating()->dy,
-           this->con->GetCurrentFloating()->dz);
+           this->con->GetFloating()->nx,
+           this->con->GetFloating()->ny,
+           this->con->GetFloating()->nz,
+           this->con->GetFloating()->dx,
+           this->con->GetFloating()->dy,
+           this->con->GetFloating()->dz);
    reg_print_info(this->executableName,text);
-   if(this->con->GetCurrentReference()->nz==1){
+   if(this->con->GetReference()->nz==1){
       reg_print_info(this->executableName, "Block size = [4 4 1]");
    }
    else reg_print_info(this->executableName, "Block size = [4 4 4]");
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 1ddfe436..5f724e35 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -30,9 +30,9 @@ class reg_aladin_sym : public reg_aladin<T>
                                  unsigned int blockPercentage = 0,
                                  unsigned int inlierLts = 0,
                                  unsigned int blockStepSize = 0);
-  virtual void ClearAladinContent();
+  virtual void DeinitAladinContent();
   virtual void CreateKernels();
-  virtual void ClearKernels();
+  virtual void DeallocateKernels();
 
 protected:
   nifti_image *InputFloatingMask;
@@ -43,7 +43,7 @@ class reg_aladin_sym : public reg_aladin<T>
 
   mat44 *BackwardTransformationMatrix;
 
-  virtual void ClearCurrentInputImage();
+  virtual void DeallocateCurrentInputImage();
   virtual void GetBackwardDeformationField();
   virtual void UpdateTransformationMatrix(int);
 
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index bcf3710a..6fc83deb 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -446,11 +446,11 @@ nifti_image **reg_f3d2<T>::GetWarpedImage()
    }
 
    // Set the input images
-   reg_f3d2<T>::currentReference = this->inputReference;
-   reg_f3d2<T>::currentFloating = this->inputFloating;
+   reg_f3d2<T>::reference = this->inputReference;
+   reg_f3d2<T>::floating = this->inputFloating;
    // No mask is used to perform the final resampling
    reg_f3d2<T>::currentMask = nullptr;
-   reg_f3d2<T>::currentFloatingMask = nullptr;
+   reg_f3d2<T>::floatingMask = nullptr;
 
    // Allocate the forward and backward warped images
    reg_f3d2<T>::AllocateWarped();
@@ -460,8 +460,8 @@ nifti_image **reg_f3d2<T>::GetWarpedImage()
    // Warp the floating images into the reference spaces using a cubic spline interpolation
    reg_f3d2<T>::WarpFloatingImage(3); // cubic spline interpolation
 
-   // Clear the deformation field
-   reg_f3d2<T>::ClearDeformationField();
+   // Deallocate the deformation field
+   reg_f3d2<T>::DeallocateDeformationField();
 
    // Allocate and save the forward transformation warped image
    nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *));
@@ -482,8 +482,8 @@ nifti_image **reg_f3d2<T>::GetWarpedImage()
    warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper);
    memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper);
 
-   // Clear the warped images
-   reg_f3d2<T>::ClearWarped();
+   // Deallocate the warped images
+   reg_f3d2<T>::DeallocateWarped();
 
    // Return the two final warped images
    return warpedImage;
diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp
index 3874e12b..ae00600c 100644
--- a/reg-lib/_reg_f3d_sym.cpp
+++ b/reg-lib/_reg_f3d_sym.cpp
@@ -31,7 +31,7 @@ reg_f3d_sym<T>::reg_f3d_sym(int refTimePoint,int floTimePoint)
    this->backwardLogJointHistogram=nullptr;
 
    this->floatingMaskImage=nullptr;
-   this->currentFloatingMask=nullptr;
+   this->floatingMask=nullptr;
    this->floatingMaskPyramid=nullptr;
    this->backwardActiveVoxelNumber=nullptr;
 
@@ -117,7 +117,7 @@ template <class T>
 T reg_f3d_sym<T>::InitialiseCurrentLevel()
 {
    // Refine the control point grids if required
-   if(this->gridRefinement==true)
+   if(this->gridRefinement)
    {
       if(this->currentLevel==0){
          this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber-1));
@@ -136,23 +136,23 @@ T reg_f3d_sym<T>::InitialiseCurrentLevel()
    if(this->usePyramid)
    {
       this->currentMask = this->maskPyramid[this->currentLevel];
-      this->currentFloatingMask = this->floatingMaskPyramid[this->currentLevel];
+      this->floatingMask = this->floatingMaskPyramid[this->currentLevel];
    }
    else
    {
       this->currentMask = this->maskPyramid[0];
-      this->currentFloatingMask = this->floatingMaskPyramid[0];
+      this->floatingMask = this->floatingMaskPyramid[0];
    }
 
    // Define the initial step size for the gradient ascent optimisation
-   T maxStepSize = this->currentReference->dx;
-   maxStepSize = this->currentReference->dy>maxStepSize?this->currentReference->dy:maxStepSize;
-   maxStepSize = this->currentFloating->dx>maxStepSize?this->currentFloating->dx:maxStepSize;
-   maxStepSize = this->currentFloating->dy>maxStepSize?this->currentFloating->dy:maxStepSize;
-   if(this->currentReference->ndim>2)
+   T maxStepSize = this->reference->dx;
+   maxStepSize = this->reference->dy>maxStepSize?this->reference->dy:maxStepSize;
+   maxStepSize = this->floating->dx>maxStepSize?this->floating->dx:maxStepSize;
+   maxStepSize = this->floating->dy>maxStepSize?this->floating->dy:maxStepSize;
+   if(this->reference->ndim>2)
    {
-      maxStepSize = (this->currentReference->dz>maxStepSize)?this->currentReference->dz:maxStepSize;
-      maxStepSize = (this->currentFloating->dz>maxStepSize)?this->currentFloating->dz:maxStepSize;
+      maxStepSize = (this->reference->dz>maxStepSize)?this->reference->dz:maxStepSize;
+      maxStepSize = (this->floating->dz>maxStepSize)?this->floating->dz:maxStepSize;
    }
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::InitialiseCurrentLevel");
@@ -161,11 +161,11 @@ T reg_f3d_sym<T>::InitialiseCurrentLevel()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearCurrentInputImage()
+void reg_f3d_sym<T>::DeallocateCurrentInputImage()
 {
-   reg_f3d<T>::ClearCurrentInputImage();
+   reg_f3d<T>::DeallocateCurrentInputImage();
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearCurrentInputImage");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateCurrentInputImage");
 #endif
    return;
 }
@@ -174,26 +174,26 @@ void reg_f3d_sym<T>::ClearCurrentInputImage()
 template <class T>
 void reg_f3d_sym<T>::AllocateWarped()
 {
-   this->ClearWarped();
+   this->DeallocateWarped();
 
    reg_f3d<T>::AllocateWarped();
-   if(this->currentFloating==nullptr)
+   if(this->floating==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateWarped()");
       reg_print_msg_error("The floating image is not defined");
       reg_exit();
    }
-   this->backwardWarped = nifti_copy_nim_info(this->currentFloating);
-   this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->currentReference->ndim;
-   this->backwardWarped->dim[4]=this->backwardWarped->nt=this->currentReference->nt;
+   this->backwardWarped = nifti_copy_nim_info(this->floating);
+   this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->reference->ndim;
+   this->backwardWarped->dim[4]=this->backwardWarped->nt=this->reference->nt;
    this->backwardWarped->pixdim[4]=this->backwardWarped->dt=1.0;
    this->backwardWarped->nvox =
          (size_t)this->backwardWarped->nx *
          (size_t)this->backwardWarped->ny *
          (size_t)this->backwardWarped->nz *
          (size_t)this->backwardWarped->nt;
-   this->backwardWarped->datatype = this->currentReference->datatype;
-   this->backwardWarped->nbyper = this->currentReference->nbyper;
+   this->backwardWarped->datatype = this->reference->datatype;
+   this->backwardWarped->nbyper = this->reference->nbyper;
    this->backwardWarped->data = (void *)calloc(this->backwardWarped->nvox, this->backwardWarped->nbyper);
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::AllocateWarped");
@@ -202,16 +202,16 @@ void reg_f3d_sym<T>::AllocateWarped()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearWarped()
+void reg_f3d_sym<T>::DeallocateWarped()
 {
-   reg_f3d<T>::ClearWarped();
+   reg_f3d<T>::DeallocateWarped();
    if(this->backwardWarped!=nullptr)
    {
       nifti_image_free(this->backwardWarped);
       this->backwardWarped=nullptr;
    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearWarped");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateWarped");
 #endif
    return;
 }
@@ -220,10 +220,10 @@ void reg_f3d_sym<T>::ClearWarped()
 template <class T>
 void reg_f3d_sym<T>::AllocateDeformationField()
 {
-   this->ClearDeformationField();
+   this->DeallocateDeformationField();
 
    reg_f3d<T>::AllocateDeformationField();
-   if(this->currentFloating==nullptr)
+   if(this->floating==nullptr)
    {
       reg_print_fct_error("reg_f3d_sym<T>::AllocateDeformationField()");
       reg_print_msg_error("The floating image is not defined");
@@ -235,14 +235,14 @@ void reg_f3d_sym<T>::AllocateDeformationField()
       reg_print_msg_error("The backward control point image is not defined");
       reg_exit();
    }
-   this->backwardDeformationFieldImage = nifti_copy_nim_info(this->currentFloating);
+   this->backwardDeformationFieldImage = nifti_copy_nim_info(this->floating);
    this->backwardDeformationFieldImage->dim[0]=this->backwardDeformationFieldImage->ndim=5;
-   this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->currentFloating->nx;
-   this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->currentFloating->ny;
-   this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->currentFloating->nz;
+   this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->floating->nx;
+   this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->floating->ny;
+   this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->floating->nz;
    this->backwardDeformationFieldImage->dim[4]=this->backwardDeformationFieldImage->nt=1;
    this->backwardDeformationFieldImage->pixdim[4]=this->backwardDeformationFieldImage->dt=1.0;
-   if(this->currentFloating->nz==1)
+   if(this->floating->nz==1)
       this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=2;
    else this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=3;
    this->backwardDeformationFieldImage->pixdim[5]=this->backwardDeformationFieldImage->du=1.0;
@@ -281,9 +281,9 @@ void reg_f3d_sym<T>::AllocateDeformationField()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearDeformationField()
+void reg_f3d_sym<T>::DeallocateDeformationField()
 {
-   reg_f3d<T>::ClearDeformationField();
+   reg_f3d<T>::DeallocateDeformationField();
    if(this->backwardDeformationFieldImage!=nullptr)
    {
       nifti_image_free(this->backwardDeformationFieldImage);
@@ -295,7 +295,7 @@ void reg_f3d_sym<T>::ClearDeformationField()
       this->backwardJacobianMatrix=nullptr;
    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearDeformationField");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateDeformationField");
 #endif
    return;
 }
@@ -304,7 +304,7 @@ void reg_f3d_sym<T>::ClearDeformationField()
 template <class T>
 void reg_f3d_sym<T>::AllocateWarpedGradient()
 {
-   this->ClearWarpedGradient();
+   this->DeallocateWarpedGradient();
 
    reg_f3d<T>::AllocateWarpedGradient();
    if(this->backwardDeformationFieldImage==nullptr)
@@ -323,16 +323,16 @@ void reg_f3d_sym<T>::AllocateWarpedGradient()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearWarpedGradient()
+void reg_f3d_sym<T>::DeallocateWarpedGradient()
 {
-   reg_f3d<T>::ClearWarpedGradient();
+   reg_f3d<T>::DeallocateWarpedGradient();
    if(this->backwardWarpedGradientImage!=nullptr)
    {
       nifti_image_free(this->backwardWarpedGradientImage);
       this->backwardWarpedGradientImage=nullptr;
    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearWarpedGradient");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateWarpedGradient");
 #endif
    return;
 }
@@ -341,7 +341,7 @@ void reg_f3d_sym<T>::ClearWarpedGradient()
 template <class T>
 void reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()
 {
-   this->ClearVoxelBasedMeasureGradient();
+   this->DeallocateVoxelBasedMeasureGradient();
 
    reg_f3d<T>::AllocateVoxelBasedMeasureGradient();
    if(this->backwardDeformationFieldImage==nullptr)
@@ -361,16 +361,16 @@ void reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearVoxelBasedMeasureGradient()
+void reg_f3d_sym<T>::DeallocateVoxelBasedMeasureGradient()
 {
-   reg_f3d<T>::ClearVoxelBasedMeasureGradient();
+   reg_f3d<T>::DeallocateVoxelBasedMeasureGradient();
    if(this->backwardVoxelBasedMeasureGradientImage!=nullptr)
    {
       nifti_image_free(this->backwardVoxelBasedMeasureGradientImage);
       this->backwardVoxelBasedMeasureGradientImage=nullptr;
    }
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearVoxelBasedMeasureGradient");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateVoxelBasedMeasureGradient");
 #endif
    return;
 }
@@ -379,7 +379,7 @@ void reg_f3d_sym<T>::ClearVoxelBasedMeasureGradient()
 template <class T>
 void reg_f3d_sym<T>::AllocateTransformationGradient()
 {
-   this->ClearTransformationGradient();
+   this->DeallocateTransformationGradient();
 
    reg_f3d<T>::AllocateTransformationGradient();
    if(this->backwardControlPointGrid==nullptr)
@@ -399,14 +399,14 @@ void reg_f3d_sym<T>::AllocateTransformationGradient()
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d_sym<T>::ClearTransformationGradient()
+void reg_f3d_sym<T>::DeallocateTransformationGradient()
 {
-   reg_f3d<T>::ClearTransformationGradient();
+   reg_f3d<T>::DeallocateTransformationGradient();
    if(this->backwardTransformationGradient!=nullptr)
       nifti_image_free(this->backwardTransformationGradient);
    this->backwardTransformationGradient=nullptr;
 #ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ClearTransformationGradient");
+   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateTransformationGradient");
 #endif
    return;
 }
@@ -588,7 +588,7 @@ void reg_f3d_sym<T>::GetDeformationField()
                                   );
    reg_spline_getDeformationField(this->backwardControlPointGrid,
                                   this->backwardDeformationFieldImage,
-                                  this->currentFloatingMask,
+                                  this->floatingMask,
                                   false, //composition
                                   true // bspline
                                   );
@@ -608,7 +608,7 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
    // Resample the floating image
    if(this->measure_dti==nullptr)
    {
-      reg_resampleImage(this->currentFloating,
+      reg_resampleImage(this->floating,
                         this->warped,
                         this->deformationFieldImage,
                         this->currentMask,
@@ -620,7 +620,7 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
       reg_defField_getJacobianMatrix(this->deformationFieldImage,
                                      this->forwardJacobianMatrix);
       /*DTI needs fixing!
-     reg_resampleImage(this->currentFloating,
+     reg_resampleImage(this->floating,
                         this->warped,
                         this->deformationFieldImage,
                         this->currentMask,
@@ -633,10 +633,10 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
    // Resample the reference image
    if(this->measure_dti==nullptr)
    {
-      reg_resampleImage(this->currentReference, // input image
+      reg_resampleImage(this->reference, // input image
                         this->backwardWarped, // warped input image
                         this->backwardDeformationFieldImage, // deformation field
-                        this->currentFloatingMask, // mask
+                        this->floatingMask, // mask
                         inter, // interpolation type
                         this->warpedPaddingValue); // padding value
    }
@@ -645,10 +645,10 @@ void reg_f3d_sym<T>::WarpFloatingImage(int inter)
       reg_defField_getJacobianMatrix(this->backwardDeformationFieldImage,
                                      this->backwardJacobianMatrix);
      /* DTI needs fixing
-    reg_resampleImage(this->currentReference, // input image
+    reg_resampleImage(this->reference, // input image
                         this->backwardWarped, // warped input image
                         this->backwardDeformationFieldImage, // deformation field
-                        this->currentFloatingMask, // mask
+                        this->floatingMask, // mask
                         inter, // interpolation type
                         this->warpedPaddingValue, // padding value
                         this->measure_dti->GetActiveTimepoints(),
@@ -673,13 +673,13 @@ double reg_f3d_sym<T>::ComputeJacobianBasedPenaltyTerm(int type)
    if(type==2)
    {
       backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid,
-                                                              this->currentFloating,
+                                                              this->floating,
                                                               false);
    }
    else
    {
       backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid,
-                                                              this->currentFloating,
+                                                              this->floating,
                                                               this->jacobianLogApproximation);
    }
    unsigned int maxit=5;
@@ -690,13 +690,13 @@ double reg_f3d_sym<T>::ComputeJacobianBasedPenaltyTerm(int type)
       if(type==2)
       {
          backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid,
-                                                         this->currentFloating,
+                                                         this->floating,
                                                          false);
       }
       else
       {
          backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid,
-                                                         this->currentFloating,
+                                                         this->floating,
                                                          this->jacobianLogApproximation);
       }
 #ifndef NDEBUG
@@ -799,8 +799,8 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
                                   0.f);
    // The intensity gradient is first computed
    //    if(this->measure_dti!=nullptr){
-   //        reg_getImageGradient(this->currentFloating,
-   //                             this->warImgGradient,
+   //        reg_getImageGradient(this->floating,
+   //                             this->warpedGradient,
    //                             this->deformationFieldImage,
    //                             this->currentMask,
    //                             this->interpolation,
@@ -809,10 +809,10 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
    //                             this->forwardJacobianMatrix,
    //                             this->warped);
 
-   //        reg_getImageGradient(this->currentReference,
+   //        reg_getImageGradient(this->reference,
    //                             this->backwardWarpedGradientImage,
    //                             this->backwardDeformationFieldImage,
-   //                             this->currentFloatingMask,
+   //                             this->floatingMask,
    //                             this->interpolation,
    //                             this->warpedPaddingValue,
    //                             this->measure_dti->GetActiveTimepoints(),
@@ -825,19 +825,19 @@ void reg_f3d_sym<T>::GetVoxelBasedGradient()
    //    }
 
 
-   for(int t=0; t<this->currentReference->nt; ++t){
-      reg_getImageGradient(this->currentFloating,
-                           this->warImgGradient,
+   for(int t=0; t<this->reference->nt; ++t){
+      reg_getImageGradient(this->floating,
+                           this->warpedGradient,
                            this->deformationFieldImage,
                            this->currentMask,
                            this->interpolation,
                            this->warpedPaddingValue,
                            t);
 
-      reg_getImageGradient(this->currentReference,
+      reg_getImageGradient(this->reference,
                            this->backwardWarpedGradientImage,
                            this->backwardDeformationFieldImage,
-                           this->currentFloatingMask,
+                           this->floatingMask,
                            this->interpolation,
                            this->warpedPaddingValue,
                            t);
@@ -913,9 +913,9 @@ void reg_f3d_sym<T>::GetSimilarityMeasureGradient()
    }
    // The backward node based sim measure gradient is extracted
    mat44 reorientation;
-   if(this->currentReference->sform_code>0)
-      reorientation = this->currentReference->sto_ijk;
-   else reorientation = this->currentReference->qto_ijk;
+   if(this->reference->sform_code>0)
+      reorientation = this->reference->sto_ijk;
+   else reorientation = this->reference->qto_ijk;
    reg_voxelCentric2NodeCentric(this->backwardTransformationGradient,
                                 this->backwardVoxelBasedMeasureGradientImage,
                                 this->similarityWeight,
@@ -937,7 +937,7 @@ void reg_f3d_sym<T>::GetJacobianBasedGradient()
    reg_f3d<T>::GetJacobianBasedGradient();
 
    reg_spline_getJacobianPenaltyTermGradient(this->backwardControlPointGrid,
-                                             this->currentFloating,
+                                             this->floating,
                                              this->backwardTransformationGradient,
                                              this->jacobianLogWeight,
                                              this->jacobianLogApproximation);
@@ -1042,7 +1042,7 @@ void reg_f3d_sym<T>::GetApproximatedGradient()
    // Loop over every control points
    T *gridPtr = static_cast<T *>(this->backwardControlPointGrid->data);
    T *gradPtr = static_cast<T *>(this->backwardTransformationGradient->data);
-   T eps = this->currentFloating->dx/1000.f;
+   T eps = this->floating->dx/1000.f;
    for(size_t i=0; i<this->backwardControlPointGrid->nvox; i++)
    {
       T currentValue = this->optimiser->GetBestDOF_b()[i];
@@ -1078,11 +1078,11 @@ T reg_f3d_sym<T>::NormaliseGradient()
       for(size_t i=0; i<voxNumber; i++)
       {
          T valX=0,valY=0,valZ=0;
-         if(this->optimiseX==true)
+         if(this->optimiseX)
             valX = *bckPtrX++;
-         if(this->optimiseY==true)
+         if(this->optimiseY)
             valY = *bckPtrY++;
-         if(this->optimiseZ==true)
+         if(this->optimiseZ)
             valZ = *bckPtrZ++;
          T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ));
          maxGradValue = (length>maxGradValue)?length:maxGradValue;
@@ -1093,9 +1093,9 @@ T reg_f3d_sym<T>::NormaliseGradient()
       for(size_t i=0; i<voxNumber; i++)
       {
          T valX=0,valY=0;
-         if(this->optimiseX==true)
+         if(this->optimiseX)
             valX = *bckPtrX++;
-         if(this->optimiseY==true)
+         if(this->optimiseY)
             valY = *bckPtrY++;
          T length = (T)(sqrt(valX*valX + valY*valY));
          maxGradValue = (length>maxGradValue)?length:maxGradValue;
@@ -1222,7 +1222,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyErrorField(bool forceAll)
                                   );
    reg_spline_getDeformationField(this->controlPointGrid,
                                   this->backwardDeformationFieldImage,
-                                  this->currentFloatingMask,
+                                  this->floatingMask,
                                   true, // composition
                                   true // use B-Spline
                                   );
@@ -1283,7 +1283,7 @@ double reg_f3d_sym<T>::GetInverseConsistencyPenaltyTerm()
       T *dispPtrZ=&dispPtrY[voxelNumber];
       for(size_t i=0; i<voxelNumber; ++i)
       {
-         if(this->currentFloatingMask[i]>-1)
+         if(this->floatingMask[i]>-1)
          {
             double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
             berror += dist;
@@ -1294,7 +1294,7 @@ double reg_f3d_sym<T>::GetInverseConsistencyPenaltyTerm()
    {
       for(size_t i=0; i<voxelNumber; ++i)
       {
-         if(this->currentFloatingMask[i]>-1)
+         if(this->floatingMask[i]>-1)
          {
             double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
             berror += dist;
@@ -1350,7 +1350,7 @@ void reg_f3d_sym<T>::GetInverseConsistencyGradient()
    defPtrZ=&defPtrY[backwardVoxelNumber];
    for(size_t i=0; i<backwardVoxelNumber; ++i)
    {
-      if(this->currentFloatingMask[i]<0)
+      if(this->floatingMask[i]<0)
       {
          defPtrX[i]=0;
          defPtrY[i]=0;
@@ -1463,9 +1463,9 @@ void reg_f3d_sym<T>::UpdateParameters(float scale)
    T *gradient_b=this->optimiser->GetGradient_b();
 
    // Update the control point position
-   if(this->optimiser->GetOptimiseX()==true &&
-         this->optimiser->GetOptimiseY()==true &&
-         this->optimiser->GetOptimiseZ()==true)
+   if(this->optimiser->GetOptimiseX() &&
+         this->optimiser->GetOptimiseY() &&
+         this->optimiser->GetOptimiseZ())
    {
       // Update the values for all axis displacement
       for(size_t i=0; i<this->optimiser->GetDOFNumber_b(); ++i)
@@ -1477,7 +1477,7 @@ void reg_f3d_sym<T>::UpdateParameters(float scale)
    {
       size_t voxNumber_b = this->optimiser->GetVoxNumber_b();
       // Update the values for the x-axis displacement
-      if(this->optimiser->GetOptimiseX()==true)
+      if(this->optimiser->GetOptimiseX())
       {
          for(size_t i=0; i<voxNumber_b; ++i)
          {
@@ -1485,7 +1485,7 @@ void reg_f3d_sym<T>::UpdateParameters(float scale)
          }
       }
       // Update the values for the y-axis displacement
-      if(this->optimiser->GetOptimiseY()==true)
+      if(this->optimiser->GetOptimiseY())
       {
          T *currentDOFY_b=&currentDOF_b[voxNumber_b];
          T *bestDOFY_b=&bestDOF_b[voxNumber_b];
@@ -1496,7 +1496,7 @@ void reg_f3d_sym<T>::UpdateParameters(float scale)
          }
       }
       // Update the values for the z-axis displacement
-      if(this->optimiser->GetOptimiseZ()==true && this->optimiser->GetNDim()>2)
+      if(this->optimiser->GetOptimiseZ() && this->optimiser->GetNDim()>2)
       {
          T *currentDOFZ_b=&currentDOF_b[2*voxNumber_b];
          T *bestDOFZ_b=&bestDOF_b[2*voxNumber_b];
@@ -1649,98 +1649,98 @@ void reg_f3d_sym<T>::InitialiseSimilarity()
          this->measure_nmi->SetTimepointWeight(i,1.0);
    }
    if(this->measure_nmi!=nullptr)
-      this->measure_nmi->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
+      this->measure_nmi->InitialiseMeasure(this->reference,
+                                           this->floating,
                                            this->currentMask,
                                            this->warped,
-                                           this->warImgGradient,
+                                           this->warpedGradient,
                                            this->voxelBasedMeasureGradient,
                                            this->localWeightSimCurrent,
-                                           this->currentFloatingMask,
+                                           this->floatingMask,
                                            this->backwardWarped,
                                            this->backwardWarpedGradientImage,
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
    if(this->measure_ssd!=nullptr)
-      this->measure_ssd->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
+      this->measure_ssd->InitialiseMeasure(this->reference,
+                                           this->floating,
                                            this->currentMask,
                                            this->warped,
-                                           this->warImgGradient,
+                                           this->warpedGradient,
                                            this->voxelBasedMeasureGradient,
                                            this->localWeightSimCurrent,
-                                           this->currentFloatingMask,
+                                           this->floatingMask,
                                            this->backwardWarped,
                                            this->backwardWarpedGradientImage,
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
    if(this->measure_kld!=nullptr)
-      this->measure_kld->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
+      this->measure_kld->InitialiseMeasure(this->reference,
+                                           this->floating,
                                            this->currentMask,
                                            this->warped,
-                                           this->warImgGradient,
+                                           this->warpedGradient,
                                            this->voxelBasedMeasureGradient,
                                            this->localWeightSimCurrent,
-                                           this->currentFloatingMask,
+                                           this->floatingMask,
                                            this->backwardWarped,
                                            this->backwardWarpedGradientImage,
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
    if(this->measure_lncc!=nullptr)
-      this->measure_lncc->InitialiseMeasure(this->currentReference,
-                                            this->currentFloating,
+      this->measure_lncc->InitialiseMeasure(this->reference,
+                                            this->floating,
                                             this->currentMask,
                                             this->warped,
-                                            this->warImgGradient,
+                                            this->warpedGradient,
                                             this->voxelBasedMeasureGradient,
                                             this->localWeightSimCurrent,
-                                            this->currentFloatingMask,
+                                            this->floatingMask,
                                             this->backwardWarped,
                                             this->backwardWarpedGradientImage,
                                             this->backwardVoxelBasedMeasureGradientImage
                                             );
 
    if(this->measure_dti!=nullptr)
-      this->measure_dti->InitialiseMeasure(this->currentReference,
-                                           this->currentFloating,
+      this->measure_dti->InitialiseMeasure(this->reference,
+                                           this->floating,
                                            this->currentMask,
                                            this->warped,
-                                           this->warImgGradient,
+                                           this->warpedGradient,
                                            this->voxelBasedMeasureGradient,
                                            this->localWeightSimCurrent,
-                                           this->currentFloatingMask,
+                                           this->floatingMask,
                                            this->backwardWarped,
                                            this->backwardWarpedGradientImage,
                                            this->backwardVoxelBasedMeasureGradientImage
                                            );
 
    if(this->measure_mind!=nullptr)
-      this->measure_mind->InitialiseMeasure(this->currentReference,
-                                            this->currentFloating,
+      this->measure_mind->InitialiseMeasure(this->reference,
+                                            this->floating,
                                             this->currentMask,
                                             this->warped,
-                                            this->warImgGradient,
+                                            this->warpedGradient,
                                             this->voxelBasedMeasureGradient,
                                             this->localWeightSimCurrent,
-                                            this->currentFloatingMask,
+                                            this->floatingMask,
                                             this->backwardWarped,
                                             this->backwardWarpedGradientImage,
                                             this->backwardVoxelBasedMeasureGradientImage
                                             );
 
    if(this->measure_mindssc!=nullptr)
-      this->measure_mindssc->InitialiseMeasure(this->currentReference,
-                                               this->currentFloating,
+      this->measure_mindssc->InitialiseMeasure(this->reference,
+                                               this->floating,
                                                this->currentMask,
                                                this->warped,
-                                               this->warImgGradient,
+                                               this->warpedGradient,
                                                this->voxelBasedMeasureGradient,
                                                this->localWeightSimCurrent,
-                                               this->currentFloatingMask,
+                                               this->floatingMask,
                                                this->backwardWarped,
                                                this->backwardWarpedGradientImage,
                                                this->backwardVoxelBasedMeasureGradientImage
@@ -1766,17 +1766,17 @@ nifti_image **reg_f3d_sym<T>::GetWarpedImage()
       reg_exit();
    }
 
-   reg_f3d_sym<T>::currentReference = this->inputReference;
-   reg_f3d_sym<T>::currentFloating = this->inputFloating;
+   reg_f3d_sym<T>::reference = this->inputReference;
+   reg_f3d_sym<T>::floating = this->inputFloating;
    reg_f3d_sym<T>::currentMask = nullptr;
-   reg_f3d_sym<T>::currentFloatingMask = nullptr;
+   reg_f3d_sym<T>::floatingMask = nullptr;
 
    reg_f3d_sym<T>::AllocateWarped();
    reg_f3d_sym<T>::AllocateDeformationField();
 
    reg_f3d_sym<T>::WarpFloatingImage(3); // cubic spline interpolation
 
-   reg_f3d_sym<T>::ClearDeformationField();
+   reg_f3d_sym<T>::DeallocateDeformationField();
 
    nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *));
    warpedImage[0] = nifti_copy_nim_info(this->warped);
@@ -1795,7 +1795,7 @@ nifti_image **reg_f3d_sym<T>::GetWarpedImage()
    warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper);
    memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper);
 
-   reg_f3d_sym<T>::ClearWarped();
+   reg_f3d_sym<T>::DeallocateWarped();
 #ifndef NDEBUG
    reg_print_fct_debug("reg_f3d_sym<T>::GetWarpedImage");
 #endif
diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h
index 691bb966..6e09a0c6 100644
--- a/reg-lib/_reg_f3d_sym.h
+++ b/reg-lib/_reg_f3d_sym.h
@@ -24,7 +24,7 @@ class reg_f3d_sym : public reg_f3d<T>
 
    nifti_image *floatingMaskImage;
    int **floatingMaskPyramid;
-   int *currentFloatingMask;
+   int *floatingMask;
    int *backwardActiveVoxelNumber;
 
    nifti_image *backwardControlPointGrid;
@@ -45,17 +45,17 @@ class reg_f3d_sym : public reg_f3d<T>
    double bestIC;
 
    virtual void AllocateWarped();
-   virtual void ClearWarped();
+   virtual void DeallocateWarped();
    virtual void AllocateDeformationField();
-   virtual void ClearDeformationField();
+   virtual void DeallocateDeformationField();
    virtual void AllocateWarpedGradient();
-   virtual void ClearWarpedGradient();
+   virtual void DeallocateWarpedGradient();
    virtual void AllocateVoxelBasedMeasureGradient();
-   virtual void ClearVoxelBasedMeasureGradient();
+   virtual void DeallocateVoxelBasedMeasureGradient();
    virtual void AllocateTransformationGradient();
-   virtual void ClearTransformationGradient();
+   virtual void DeallocateTransformationGradient();
    virtual T InitialiseCurrentLevel();
-   virtual void ClearCurrentInputImage();
+   virtual void DeallocateCurrentInputImage();
 
    virtual double ComputeBendingEnergyPenaltyTerm();
    virtual double ComputeLinearEnergyPenaltyTerm();
diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp
index 9c059c0b..dd01abca 100644
--- a/reg-lib/_reg_polyAffine.cpp
+++ b/reg-lib/_reg_polyAffine.cpp
@@ -128,7 +128,7 @@ void reg_polyAffine<T>::AllocateTransformationGradient()
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_polyAffine<T>::ClearTransformationGradient()
+void reg_polyAffine<T>::DeallocateTransformationGradient()
 {
 
 }
diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h
index 661fa050..dbbc831a 100644
--- a/reg-lib/_reg_polyAffine.h
+++ b/reg-lib/_reg_polyAffine.h
@@ -31,7 +31,7 @@ class reg_polyAffine : public reg_base<T>
    void PrintCurrentObjFunctionValue(T);
    void PrintInitialObjFunctionValue();
    void AllocateTransformationGradient();
-   void ClearTransformationGradient();
+   void DeallocateTransformationGradient();
 
 public:
    reg_polyAffine(int refTimePoint,int floTimePoint);
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index b71f1f04..e5e12bbb 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -43,9 +43,9 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
     program = sContext->CreateProgram(clKernelPath.c_str());
 
     //get cpu ptrs
-    deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
+    deformationFieldImage = con->AladinContent::GetDeformationField();
     affineTransformation = con->AladinContent::GetTransformationMatrix();
-    referenceMatrix = AladinContent::GetXYZMatrix(deformationFieldImage);
+    referenceMatrix = AladinContent::GetXYZMatrix(*deformationFieldImage);
 
     cl_int errNum;
     // Create OpenCL kernel
@@ -99,8 +99,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads};
     const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads};
 
-    mat44 transformationMatrix = (compose == true) ?
-        *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix);
+    mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix);
 
     float* trans = (float *)malloc(16 * sizeof(float));
     mat44ToCptr(transformationMatrix, trans);
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 1788160c..90153818 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -2,17 +2,17 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn,
-                                 nifti_image *currentFloatingIn,
-                                 int *currentReferenceMaskIn,
+ClAladinContent::ClAladinContent(nifti_image *referenceIn,
+                                 nifti_image *floatingIn,
+                                 int *referenceMaskIn,
                                  mat44 *transformationMatrixIn,
                                  size_t bytesIn,
                                  const unsigned int percentageOfBlocks,
                                  const unsigned int inlierLts,
                                  int blockStepSize) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
+    AladinContent(referenceIn,
+                  floatingIn,
+                  referenceMaskIn,
                   transformationMatrixIn,
                   bytesIn,
                   percentageOfBlocks,
@@ -36,12 +36,12 @@ void ClAladinContent::InitVars() {
     totalBlockClmem = nullptr;
     maskClmem = nullptr;
 
-    if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(currentReference);
-    if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(currentFloating);
-        if (currentWarped != nullptr)
-            reg_tools_changeDatatype<float>(currentWarped);
+    if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(reference);
+    if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(floating);
+        if (warped != nullptr)
+            reg_tools_changeDatatype<float>(warped);
     }
     sContext = &ClContextSingleton::Instance();
     clContext = sContext->GetContext();
@@ -50,32 +50,32 @@ void ClAladinContent::InitVars() {
 }
 /* *************************************************************** */
 void ClAladinContent::AllocateClPtrs() {
-    if (currentWarped != nullptr) {
-        warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum);
+    if (warped != nullptr) {
+        warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
         sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
     }
-    if (currentDeformationField != nullptr) {
-        deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentDeformationField->nvox, currentDeformationField->data, &errNum);
+    if (deformationField != nullptr) {
+        deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * deformationField->nvox, deformationField->data, &errNum);
         sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
     }
-    if (currentFloating != nullptr) {
-        floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentFloating->nvox, currentFloating->data, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): ");
+    if (floating != nullptr) {
+        floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * floating->nvox, floating->data, &errNum);
+        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): ");
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
-        mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h);
+        mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
         floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum);
         sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
         free(sourceIJKMatrix_h);
     }
-    if (currentReference != nullptr) {
+    if (reference != nullptr) {
         referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                             sizeof(float) * currentReference->nvox,
-                                             currentReference->data, &errNum);
+                                             sizeof(float) * reference->nvox,
+                                             reference->data, &errNum);
         sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
-        mat44ToCptr(*GetXYZMatrix(currentReference), targetMat);
+        mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
         refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum);
         sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
         free(targetMat);
@@ -103,23 +103,23 @@ void ClAladinContent::AllocateClPtrs() {
             sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
         }
     }
-    if (currentReferenceMask != nullptr && currentReference != nullptr) {
+    if (referenceMask != nullptr && reference != nullptr) {
         maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                   currentReference->nx * currentReference->ny * currentReference->nz * sizeof(int),
-                                   currentReferenceMask, &errNum);
+                                   reference->nx * reference->ny * reference->nz * sizeof(int),
+                                   referenceMask, &errNum);
         sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetCurrentWarped(int datatype) {
-    DownloadImage(currentWarped, warpedImageClmem, datatype);
-    return currentWarped;
+nifti_image* ClAladinContent::GetWarped(int datatype, int index) {
+    DownloadImage(warped, warpedImageClmem, datatype);
+    return warped;
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetCurrentDeformationField() {
-    errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT
-    sContext->checkErrNum(errNum, "Get: failed currentDeformationField: ");
-    return currentDeformationField;
+nifti_image* ClAladinContent::GetDeformationField() {
+    errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, deformationField->nvox * sizeof(float), deformationField->data, 0, nullptr, nullptr); //CLCONTEXT
+    sContext->checkErrNum(errNum, "Get: failed deformationField: ");
+    return deformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() {
@@ -134,37 +134,36 @@ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
 }
 /* *************************************************************** */
-void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
-    if (currentDeformationField != nullptr)
+void ClAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
+    if (deformationField != nullptr)
         clReleaseMemObject(deformationFieldClmem);
 
-    AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
-    deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): ");
+    AladinContent::SetDeformationField(deformationFieldIn);
+    deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, deformationField->nvox * sizeof(float), deformationField->data, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): ");
 }
 /* *************************************************************** */
-void ClAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) {
-    if (currentReferenceMask != nullptr)
+void ClAladinContent::SetReferenceMask(int *referenceMaskIn) {
+    if (referenceMask != nullptr)
         clReleaseMemObject(maskClmem);
-    AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn);
-    maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, currentReference->nvox * sizeof(int), currentReferenceMask, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): ");
+    AladinContent::SetReferenceMask(referenceMaskIn);
+    maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, reference->nvox * sizeof(int), referenceMask, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): ");
 }
 /* *************************************************************** */
-void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
-    if (currentWarped != nullptr) {
+void ClAladinContent::SetWarped(nifti_image *warped) {
+    if (warped != nullptr) {
         clReleaseMemObject(warpedImageClmem);
     }
-    if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(currentWarped);
+    if (warped->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(warped);
     }
-    AladinContent::SetCurrentWarped(currentWarped);
-    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): ");
+    AladinContent::SetWarped(warped);
+    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
+    sContext->checkErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
-
     AladinContent::SetBlockMatchingParams(bmp);
     if (blockMatchingParams->referencePosition != nullptr) {
         clReleaseMemObject(referencePositionClmem);
@@ -270,9 +269,7 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
 }
 /* *************************************************************** */
 template<class T>
-void ClAladinContent::FillImageData(nifti_image *image,
-                                    cl_mem memoryObject,
-                                    int type) {
+void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int type) {
     size_t size = image->nvox;
     float* buffer = nullptr;
     buffer = (float*)malloc(size * sizeof(float));
@@ -296,9 +293,7 @@ void ClAladinContent::FillImageData(nifti_image *image,
     free(buffer);
 }
 /* *************************************************************** */
-void ClAladinContent::DownloadImage(nifti_image *image,
-                                    cl_mem memoryObject,
-                                    int datatype) {
+void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype) {
     switch (datatype) {
     case NIFTI_TYPE_FLOAT32:
         FillImageData<float>(image, memoryObject, datatype);
@@ -333,19 +328,19 @@ void ClAladinContent::DownloadImage(nifti_image *image,
 }
 /* *************************************************************** */
 void ClAladinContent::FreeClPtrs() {
-    if (currentReference != nullptr) {
+    if (reference != nullptr) {
         clReleaseMemObject(referenceImageClmem);
         clReleaseMemObject(refMatClmem);
     }
-    if (currentFloating != nullptr) {
+    if (floating != nullptr) {
         clReleaseMemObject(floatingImageClmem);
         clReleaseMemObject(floMatClmem);
     }
-    if (currentWarped != nullptr)
+    if (warped != nullptr)
         clReleaseMemObject(warpedImageClmem);
-    if (currentDeformationField != nullptr)
+    if (deformationField != nullptr)
         clReleaseMemObject(deformationFieldClmem);
-    if (currentReferenceMask != nullptr)
+    if (referenceMask != nullptr)
         clReleaseMemObject(maskClmem);
     if (blockMatchingParams != nullptr) {
         clReleaseMemObject(totalBlockClmem);
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index b4650549..d7a8646a 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -12,9 +12,9 @@
 class ClAladinContent: public AladinContent {
 public:
     //constructors
-    ClAladinContent(nifti_image *currentReferenceIn,
-                    nifti_image *currentFloatingIn,
-                    int *currentReferenceMaskIn = nullptr,
+    ClAladinContent(nifti_image *referenceIn,
+                    nifti_image *floatingIn,
+                    int *referenceMaskIn = nullptr,
                     mat44 *transformationMatrixIn = nullptr,
                     size_t bytesIn = sizeof(float),
                     const unsigned int percentageOfBlocks = 0,
@@ -40,14 +40,14 @@ class ClAladinContent: public AladinContent {
 
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    nifti_image* GetCurrentDeformationField() override;
-    nifti_image* GetCurrentWarped(int typ) override;
+    nifti_image* GetDeformationField() override;
+    nifti_image* GetWarped(int datatype, int index = 0) override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    void SetCurrentWarped(nifti_image *warpedImageIn) override;
-    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override;
-    void SetCurrentReferenceMask(int *currentReferenceMaskIn) override;
+    void SetWarped(nifti_image *warpedImageIn) override;
+    void SetDeformationField(nifti_image *deformationFieldIn) override;
+    void SetReferenceMask(int *referenceMaskIn) override;
     void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index a26e3c70..9522a465 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -59,7 +59,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
    clReferenceMat = con->GetRefMatClmem();
 
    //get cpu ptrs
-   reference = con->AladinContent::GetCurrentReference();
+   reference = con->AladinContent::GetReference();
    params = con->AladinContent::GetBlockMatchingParams();
 
 }
diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp
index f092e562..d7d4fdd7 100644
--- a/reg-lib/cl/ClKernelFactory.cpp
+++ b/reg-lib/cl/ClKernelFactory.cpp
@@ -6,7 +6,7 @@
 #include "ClOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* ClKernelFactory::ProduceKernel(std::string name, Content *con) const {
+Kernel* ClKernelFactory::Produce(std::string name, Content *con) const {
 	if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con);
 	else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel();
 	else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con);
diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h
index 4175569b..ef4791e6 100644
--- a/reg-lib/cl/ClKernelFactory.h
+++ b/reg-lib/cl/ClKernelFactory.h
@@ -4,5 +4,5 @@
 
 class ClKernelFactory: public KernelFactory {
 public:
-   Kernel* ProduceKernel(std::string name, Content *con) const;
+   Kernel* Produce(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 1e8019d1..d21a4782 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -42,14 +42,14 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern
     program = sContext->CreateProgram(clKernelPath.c_str());
 
     //get cpu ptrs
-    floatingImage = con->AladinContent::GetCurrentFloating();
-    warpedImage = con->AladinContent::GetCurrentWarped();
-    mask = con->AladinContent::GetCurrentReferenceMask();
+    floatingImage = con->AladinContent::GetFloating();
+    warpedImage = con->AladinContent::GetWarped();
+    mask = con->AladinContent::GetReferenceMask();
 
     //get cl ptrs
-    clCurrentFloating = con->GetFloatingImageArrayClmem();
-    clCurrentDeformationField = con->GetDeformationFieldArrayClmem();
-    clCurrentWarped = con->GetWarpedImageClmem();
+    clFloating = con->GetFloatingImageArrayClmem();
+    clDeformationField = con->GetDeformationFieldArrayClmem();
+    clWarped = con->GetWarpedImageClmem();
     clMask = con->GetMaskClmem();
     floMat = con->GetFloMatClmem();
 
@@ -104,11 +104,11 @@ void ClResampleImageKernel::Calculate(int interp,
 
     int datatype = this->floatingImage->datatype;
 
-    errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clCurrentFloating);
+    errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating);
     sContext->checkErrNum(errNum, "Error setting interp kernel arguments 0.");
-    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clCurrentDeformationField);
+    errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clDeformationField);
     sContext->checkErrNum(errNum, "Error setting interp kernel arguments 1.");
-    errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clCurrentWarped);
+    errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clWarped);
     sContext->checkErrNum(errNum, "Error setting interp kernel arguments 2.");
     errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clMask);
     sContext->checkErrNum(errNum, "Error setting interp kernel arguments 3.");
diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h
index d0deddf5..4bdfde91 100644
--- a/reg-lib/cl/ClResampleImageKernel.h
+++ b/reg-lib/cl/ClResampleImageKernel.h
@@ -18,9 +18,9 @@ class ClResampleImageKernel: public ResampleImageKernel {
     cl_kernel kernel;
     cl_context clContext;
     cl_program program;
-    cl_mem clCurrentFloating;
-    cl_mem clCurrentDeformationField;
-    cl_mem clCurrentWarped;
+    cl_mem clFloating;
+    cl_mem clDeformationField;
+    cl_mem clWarped;
     cl_mem clMask;
     cl_mem floMat;
 };
diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl
index dee7b13a..adf1955f 100755
--- a/reg-lib/cl/blockMatchingKernel.cl
+++ b/reg-lib/cl/blockMatchingKernel.cl
@@ -199,24 +199,24 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 					// Check if the warped and reference are defined
 					const bool overlap = isfinite(rWarpedValue) && finiteReference;
 					// Compute the number of defined value in the block
-					const unsigned int currentWarpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid);
+					const unsigned int warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid);
 
 					// Subsequent computation is performed if the more than half the voxel are defined
-					if (currentWarpedSize > 8){
+					if (warpedSize > 8){
 
 						// Store the reference variance and reference difference to the mean
 						float newReferenceTemp = referenceTemp;
 						float newReferenceVar = referenceVar;
 						// If the defined voxels are different the reference mean and variance are recomputed
-						if (currentWarpedSize != referenceSize){
+						if (warpedSize != referenceSize){
 							const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-							const float newReferenceMean = REDUCE2D(sData, newReferenceValue, tid) / (float)currentWarpedSize;
+							const float newReferenceMean = REDUCE2D(sData, newReferenceValue, tid) / (float)warpedSize;
 							newReferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
 							newReferenceVar = REDUCE2D(sData, newReferenceTemp*newReferenceTemp, tid);
 						}
 
 						const float rChecked = overlap ? rWarpedValue : 0.0f;
-						const float warpedMean = REDUCE2D(sData, rChecked, tid) / (float)currentWarpedSize;
+						const float warpedMean = REDUCE2D(sData, rChecked, tid) / (float)warpedSize;
 						const float warpedTemp = overlap ? rWarpedValue - warpedMean : 0.0f;
 						const float warpedVar = REDUCE2D(sData, warpedTemp*warpedTemp, tid);
 
@@ -362,24 +362,24 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 						// Check if the warped and reference are defined
 						const bool overlap = isfinite(rWarpedValue) && finiteReference;
 						// Compute the number of defined value in the block
-						const unsigned int currentWarpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid);
+						const unsigned int warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid);
 
 						// Subsequent computation is performed if the more than half the voxel are defined
-						if (currentWarpedSize > 32){
+						if (warpedSize > 32){
 
 							// Store the reference variance and reference difference to the mean
 							float newReferenceTemp = referenceTemp;
 							float newReferenceVar = referenceVar;
 							// If the defined voxels are different the reference mean and variance are recomputed
-							if (currentWarpedSize != referenceSize){
+							if (warpedSize != referenceSize){
 								const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-								const float newReferenceMean = REDUCE(sData, newReferenceValue, tid) / currentWarpedSize;
+								const float newReferenceMean = REDUCE(sData, newReferenceValue, tid) / warpedSize;
 								newReferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
 								newReferenceVar = REDUCE(sData, newReferenceTemp*newReferenceTemp, tid);
 							}
 
 							const float rChecked = overlap ? rWarpedValue : 0.0f;
-							const float warpedMean = REDUCE(sData, rChecked, tid) / currentWarpedSize;
+							const float warpedMean = REDUCE(sData, rChecked, tid) / warpedSize;
 							const float warpedTemp = overlap ? rWarpedValue - warpedMean : 0.0f;
 							const float warpedVar = REDUCE(sData, warpedTemp*warpedTemp, tid);
 
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
index d21cda6a..d8916dac 100644
--- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp
@@ -4,9 +4,9 @@
 /* *************************************************************** */
 CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() {
     AladinContent *con = static_cast<AladinContent*>(conIn);
-    deformationFieldImage = con->GetCurrentDeformationField();
+    deformationFieldImage = con->GetDeformationField();
     affineTransformation = con->GetTransformationMatrix();
-    mask = con->GetCurrentReferenceMask();
+    mask = con->GetReferenceMask();
 }
 /* *************************************************************** */
 void CpuAffineDeformationFieldKernel::Calculate(bool compose) {
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
index 4e4bd57e..51498554 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.cpp
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp
@@ -3,10 +3,10 @@
 /* *************************************************************** */
 CpuBlockMatchingKernel::CpuBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() {
     AladinContent *con = static_cast<AladinContent*>(conIn);
-    reference = con->GetCurrentReference();
-    warped = con->GetCurrentWarped();
+    reference = con->GetReference();
+    warped = con->GetWarped();
     params = con->GetBlockMatchingParams();
-    mask = con->GetCurrentReferenceMask();
+    mask = con->GetReferenceMask();
 }
 /* *************************************************************** */
 void CpuBlockMatchingKernel::Calculate() {
diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp
index a0932709..4ef1612c 100644
--- a/reg-lib/cpu/CpuKernelFactory.cpp
+++ b/reg-lib/cpu/CpuKernelFactory.cpp
@@ -6,7 +6,7 @@
 #include "CpuOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* CpuKernelFactory::ProduceKernel(std::string name, Content *con) const {
+Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const {
 	if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con);
 	else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel();
 	else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con);
diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h
index d3cbaa6a..d47a3461 100644
--- a/reg-lib/cpu/CpuKernelFactory.h
+++ b/reg-lib/cpu/CpuKernelFactory.h
@@ -4,5 +4,5 @@
 
 class CpuKernelFactory: public KernelFactory {
 public:
-   Kernel* ProduceKernel(std::string name, Content *con) const;
+   Kernel* Produce(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp
index 827e1058..a5791b13 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.cpp
+++ b/reg-lib/cpu/CpuResampleImageKernel.cpp
@@ -4,10 +4,10 @@
 /* *************************************************************** */
 CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKernel() {
     AladinContent *con = static_cast<AladinContent*>(conIn);
-    floatingImage = con->GetCurrentFloating();
-    warpedImage = con->GetCurrentWarped();
-    deformationField = con->GetCurrentDeformationField();
-    mask = con->GetCurrentReferenceMask();
+    floatingImage = con->GetFloating();
+    warpedImage = con->GetWarped();
+    deformationField = con->GetDeformationField();
+    mask = con->GetReferenceMask();
 }
 /* *************************************************************** */
 void CpuResampleImageKernel::Calculate(int interp,
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 7a4acbc0..ef3bf832 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -231,7 +231,7 @@ double reg_dti::GetSimilarityMeasureValue()
 template <class DTYPE>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *warpedImage,
-      nifti_image *warImgGradient,
+      nifti_image *warpedGradient,
       nifti_image *dtiMeasureGradientImage,
       int *mask,
       unsigned int * dtIndicies)
@@ -266,8 +266,8 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
    // THE FOLLOWING IS WRONG
    reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
    reg_exit();
-   unsigned int gradientVoxels = warImgGradient->nu*voxelNumber;
-   DTYPE *firstGradVox = static_cast<DTYPE *>(warImgGradient->data);
+   unsigned int gradientVoxels = warpedGradient->nu*voxelNumber;
+   DTYPE *firstGradVox = static_cast<DTYPE *>(warpedGradient->data);
    DTYPE *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]];
    DTYPE *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]];
    DTYPE *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]];
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 5738783c..c3327ce2 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -78,7 +78,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *warpedImage,
-      nifti_image *warImgGradient,
+      nifti_image *warpedGradient,
       nifti_image *dtiMeasureGradientImage,
       int *mask,
       unsigned int * dtIndicies);
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 1be923f0..01bad1f0 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -34,7 +34,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
    else referenceMatrix=&(deformationFieldImage->qto_xyz);
 
    mat44 transformationMatrix;
-   if(composition==true)
+   if(composition)
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
@@ -61,7 +61,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
          voxel[0]=(double)x;
          if(mask[index]>-1)
          {
-            if(composition==true)
+            if(composition)
             {
                voxel[0] = (double) deformationFieldPtrX[index];
                voxel[1] = (double) deformationFieldPtrY[index];
@@ -97,7 +97,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
    else referenceMatrix=&(deformationFieldImage->qto_xyz);
 
    mat44 transformationMatrix;
-   if(composition==true)
+   if(composition)
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
@@ -126,7 +126,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
             voxel[0]=(double) x;
             if(mask[index]>-1)
             {
-               if(composition==true)
+               if(composition)
                {
                   voxel[0]= (double) deformationFieldPtrX[index];
                   voxel[1]= (double) deformationFieldPtrY[index];
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 4acb641e..af0c8f8e 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -152,7 +152,7 @@ double reg_getKLDivergence(nifti_image *referenceImage,
        measure += measure_tp * timePointWeight[time] / num;
       }
    }
-   if(MrClean==true) free(maskPtr);
+   if(MrClean) free(maskPtr);
    return measure;
 }
 template double reg_getKLDivergence<float>
@@ -352,7 +352,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
          }
       }
    }
-   if(MrClean==true) free(maskPtr);
+   if(MrClean) free(maskPtr);
 }
 template void reg_getKLDivergenceVoxelBasedGradient<float>
 (nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double);
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index ca2a897b..89dac79b 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -522,7 +522,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    int *combinedMask,
                                    float *kernelStandardDeviation,
                                    nifti_image *correlationImage,
-                                   nifti_image *warImgGradient,
+                                   nifti_image *warpedGradient,
                                    nifti_image *measureGradientImage,
                                    int kernelType,
                                    int current_timepoint,
@@ -624,7 +624,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
       measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
    // Create pointers to the spatial gradient of the warped image
-   DTYPE *warpGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
+   DTYPE *warpGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
    DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber];
    DTYPE *warpGradPtrZ = nullptr;
    if(referenceImage->nz>1)
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index 3de0713a..ad86a044 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -123,7 +123,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    int *combinedMask,
                                    float *kernelStdDev,
                                    nifti_image *correlationImage,
-                                   nifti_image *warImgGradient,
+                                   nifti_image *warpedGradient,
                                    nifti_image *lnccGradientImage,
                                    int kernelType,
                                    int current_timepoint,
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 755f6893..35aaa0b5 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1731,7 +1731,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                mask);
       }
    }
-   if(MrPropre==true)
+   if(MrPropre)
    {
       free(mask);
       mask=nullptr;
@@ -2851,7 +2851,7 @@ void reg_defField_compose(nifti_image *deformationField,
       }
    }
 
-   if(freeMask==true) free(mask);
+   if(freeMask) free(mask);
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -4164,7 +4164,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                                     updateStepNumber);
       // Update the number of step required. No action otherwise
       velocityFieldGrid->intent_p2=flowField->intent_p2;
-      // Clear the allocated flow field
+      // Deallocate the allocated flow field
       nifti_image_free(flowField);
    }
    else
@@ -4230,7 +4230,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
                                       deformationFieldImage[0],
                                       scalingValue); // (/scalingValue)
 
-      // Clear the allocated flow field
+      // Deallocate the allocated flow field
       nifti_image_free(flowFieldImage);
       flowFieldImage=nullptr;
 
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index f4b41325..fc8c4c70 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -2192,7 +2192,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage,
                                             float weight)
 {
    if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){
-      reg_print_fct_error("reg_spline_getLandmarkDistance");
+      reg_print_fct_error("reg_spline_getLandmarkDistanceGradient");
       reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
       reg_exit();
    }
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 27a49dec..107d896d 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -145,7 +145,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage,
 /** @brief Compute and return a pairwise energy.
  * @param controlPointGridImage Image that contains the transformation
  * parametrisation
- * @return The normalised pariwise energy. Normalised by the number of voxel
+ * @return The normalised pairwise energy. Normalised by the number of voxel
  */
 extern "C++"
 void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage,
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 07965a5e..6872b5fb 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -298,7 +298,7 @@ void reg_mat33_logm(mat33 *in_tensor)
    // is a general eigensolver and the logarithm function should
    // suceed unless convergence just isn't happening.
    det = tensor.determinant();
-   if(all_zeros==true || det == 0){
+   if(all_zeros || det == 0){
       reg_mat33_to_nan(in_tensor);
       return;
    }
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 6af365da..69fa6050 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -37,13 +37,13 @@ reg_nmi::reg_nmi()
 /* *************************************************************** */
 reg_nmi::~reg_nmi()
 {
-   this->ClearHistogram();
+   this->DeallocateHistogram();
 #ifndef NDEBUG
    reg_print_msg_debug("reg_nmi destructor called");
 #endif
 }
 /* *************************************************************** */
-void reg_nmi::ClearHistogram()
+void reg_nmi::DeallocateHistogram()
 {
    int timepoint=this->referenceTimePoint;
    // Free the joint histograms and the entropy arrays
@@ -116,7 +116,7 @@ void reg_nmi::ClearHistogram()
    }
    this->backwardEntropyValues=nullptr;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi::ClearHistogram called");
+   reg_print_msg_debug("reg_nmi::DeallocateHistogram called");
 #endif
 }
 /* *************************************************************** */
@@ -146,8 +146,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
                                   warRefGraPtr,
                                   bckVoxBasedGraPtr);
 
-   // Clear all allocated arrays
-   this->ClearHistogram();
+   // Deallocate all allocated arrays
+   this->DeallocateHistogram();
    // Extract the number of time point
    int timepoint=this->referenceTimePoint;
    // Reference and floating are resampled between 2 and bin-3
@@ -567,12 +567,11 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     unsigned short *floatingBinNumber,
                                     double **jointHistogramLog,
                                     double **entropyValues,
-                                    nifti_image *warImgGradient,
+                                    nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
-                           double timepoint_weight
-                                    )
+                                    double timepoint_weight)
 {
    if(current_timepoint<0 || current_timepoint>=referenceImage->nt){
       reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
@@ -588,7 +587,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
    DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber];
 
    // Pointers to the spatial gradient of the warped image
-   DTYPE *warGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
+   DTYPE *warGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
 
    // Pointers to the measure of similarity gradient
@@ -667,7 +666,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     unsigned short *floatingBinNumber,
                                     double **jointHistogramLog,
                                     double **entropyValues,
-                                    nifti_image *warImgGradient,
+                                    nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
@@ -694,7 +693,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
    DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber];
 
    // Pointers to the spatial gradient of the warped image
-   DTYPE *warGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
+   DTYPE *warGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
    DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber];
 
@@ -824,7 +823,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                this->forwardVoxelBasedGradientImagePointer,
                                                this->referenceMaskPointer,
                                                current_timepoint,
-                                    this->timePointWeight[current_timepoint]);
+                                               this->timePointWeight[current_timepoint]);
          break;
       case NIFTI_TYPE_FLOAT64:
          reg_getVoxelBasedNMIGradient3D<double>(this->referenceImagePointer,
@@ -836,8 +835,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                 this->warpedFloatingGradientImagePointer,
                                                 this->forwardVoxelBasedGradientImagePointer,
                                                 this->referenceMaskPointer,
-                                    current_timepoint,
-                                    this->timePointWeight[current_timepoint]);
+                                                current_timepoint,
+                                                this->timePointWeight[current_timepoint]);
          break;
       default:
          reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -859,8 +858,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                this->warpedFloatingGradientImagePointer,
                                                this->forwardVoxelBasedGradientImagePointer,
                                                this->referenceMaskPointer,
-                                    current_timepoint,
-                                    this->timePointWeight[current_timepoint]);
+                                               current_timepoint,
+                                               this->timePointWeight[current_timepoint]);
          break;
       case NIFTI_TYPE_FLOAT64:
          reg_getVoxelBasedNMIGradient2D<double>(this->referenceImagePointer,
@@ -872,8 +871,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                 this->warpedFloatingGradientImagePointer,
                                                 this->forwardVoxelBasedGradientImagePointer,
                                                 this->referenceMaskPointer,
-                                    current_timepoint,
-                                    this->timePointWeight[current_timepoint]);
+                                                current_timepoint,
+                                                this->timePointWeight[current_timepoint]);
          break;
       default:
          reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -887,8 +886,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
       dtype = this->floatingImagePointer->datatype;
       if(this->warpedReferenceImagePointer->datatype != dtype ||
             this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype
-            )
+            this->backwardVoxelBasedGradientImagePointer->datatype != dtype)
       {
          reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
          reg_print_msg_error("Input images are exepected to be of the same type");
@@ -909,8 +907,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                   this->warpedReferenceGradientImagePointer,
                                                   this->backwardVoxelBasedGradientImagePointer,
                                                   this->floatingMaskPointer,
-                                      current_timepoint,
-                                      this->timePointWeight[current_timepoint]);
+                                                  current_timepoint,
+                                                  this->timePointWeight[current_timepoint]);
             break;
          case NIFTI_TYPE_FLOAT64:
             reg_getVoxelBasedNMIGradient3D<double>(this->floatingImagePointer,
@@ -922,8 +920,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                    this->warpedReferenceGradientImagePointer,
                                                    this->backwardVoxelBasedGradientImagePointer,
                                                    this->floatingMaskPointer,
-                                       current_timepoint,
-                                       this->timePointWeight[current_timepoint]);
+                                                   current_timepoint,
+                                                   this->timePointWeight[current_timepoint]);
             break;
          default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -945,8 +943,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                   this->warpedReferenceGradientImagePointer,
                                                   this->backwardVoxelBasedGradientImagePointer,
                                                   this->floatingMaskPointer,
-                                      current_timepoint,
-                                      this->timePointWeight[current_timepoint]);
+                                                  current_timepoint,
+                                                  this->timePointWeight[current_timepoint]);
             break;
          case NIFTI_TYPE_FLOAT64:
             reg_getVoxelBasedNMIGradient2D<double>(this->floatingImagePointer,
@@ -958,8 +956,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                                                    this->warpedReferenceGradientImagePointer,
                                                    this->backwardVoxelBasedGradientImagePointer,
                                                    this->floatingMaskPointer,
-                                       current_timepoint,
-                                       this->timePointWeight[current_timepoint]);
+                                                   current_timepoint,
+                                                   this->timePointWeight[current_timepoint]);
             break;
          default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 413ff46b..e49b1724 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -78,7 +78,7 @@ class reg_nmi : public reg_measure
    double **backwardJointHistogramLog;
    double **backwardEntropyValues;
 
-   void ClearHistogram();
+   void DeallocateHistogram();
 };
 /* *************************************************************** */
 /* *************************************************************** */
@@ -102,7 +102,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     unsigned short *floatingBinNumber,
                                     double **jointHistogramLog,
                                     double **entropyValues,
-                                    nifti_image *warImgGradient,
+                                    nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
@@ -116,7 +116,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     unsigned short *floatingBinNumber,
                                     double **jointHistogramLog,
                                     double **entropyValues,
-                                    nifti_image *warImgGradient,
+                                    nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 7a91c114..90cd64c9 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -137,7 +137,7 @@ void reg_optimiser<T>::Perturbation(float length)
    {
       this->currentDOF[i]=this->bestDOF[i] + length * (float)(rand() - RAND_MAX/2) / ((float)RAND_MAX/2.0f);
    }
-   if(this->backward==true)
+   if(this->backward)
    {
       for(size_t i=0; i<this->dofNumber_b; ++i)
       {
@@ -337,7 +337,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues()
    T *array1Ptr_b = this->array1_b;
    T *array2Ptr_b = this->array2_b;
 
-   if(this->firstcall==true)
+   if(this->firstcall)
    {
 #ifndef NDEBUG
       reg_print_msg_debug("Conjugate gradient initialisation");
diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp
index 95d4a2f8..4560f990 100644
--- a/reg-lib/cpu/_reg_polyAffine.cpp
+++ b/reg-lib/cpu/_reg_polyAffine.cpp
@@ -128,7 +128,7 @@ void reg_polyAffine<T>::AllocateTransformationGradient()
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 template <class T>
-void reg_polyAffine<T>::ClearTransformationGradient()
+void reg_polyAffine<T>::DeallocateTransformationGradient()
 {
 
 }
diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h
index 661fa050..dbbc831a 100644
--- a/reg-lib/cpu/_reg_polyAffine.h
+++ b/reg-lib/cpu/_reg_polyAffine.h
@@ -31,7 +31,7 @@ class reg_polyAffine : public reg_base<T>
    void PrintCurrentObjFunctionValue(T);
    void PrintInitialObjFunctionValue();
    void AllocateTransformationGradient();
-   void ClearTransformationGradient();
+   void DeallocateTransformationGradient();
 
 public:
    reg_polyAffine(int refTimePoint,int floTimePoint);
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 6b0e645e..954fde54 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -816,7 +816,7 @@ void reg_resampleImage(nifti_image *floatingImage,
         int j=0;
         for(int i=0; i<floatingImage->nt; ++i)
         {
-            if(dti_timepoint[i]==true)
+            if(dti_timepoint[i])
                 dtIndicies[j++]=i;
         }
         if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3))
@@ -1018,7 +1018,7 @@ void reg_resampleImage(nifti_image *floatingImage,
         printf("Deformation field pixel type unsupported.");
         break;
     }
-    if(MrPropreRules==true)
+    if(MrPropreRules)
     {
         free(mask);
         mask=nullptr;
@@ -2028,7 +2028,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
         printf("Deformation field pixel type unsupported.");
         break;
     }
-    if(MrPropreRules==true)
+    if(MrPropreRules)
     {
         free(mask);
         mask=nullptr;
@@ -2525,7 +2525,7 @@ void reg_resampleGradient(nifti_image *floatingImage,
 template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
 void TrilinearImageGradient(nifti_image *floatingImage,
                             nifti_image *deformationField,
-                            nifti_image *warImgGradient,
+                            nifti_image *warpedGradient,
                             int *mask,
                             float paddingValue,
                             int active_timepoint)
@@ -2537,11 +2537,11 @@ void TrilinearImageGradient(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz;
+    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
     long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz;
+    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
     size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
@@ -2551,7 +2551,7 @@ void TrilinearImageGradient(nifti_image *floatingImage,
     FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
     FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warImgGradient->data);
+    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
     GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
     GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
 
@@ -2721,7 +2721,7 @@ void TrilinearImageGradient(nifti_image *floatingImage,
 template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
 void BilinearImageGradient(nifti_image *floatingImage,
                            nifti_image *deformationField,
-                           nifti_image *warImgGradient,
+                           nifti_image *warpedGradient,
                            int *mask,
                            float paddingValue,
                            int active_timepoint)
@@ -2733,11 +2733,11 @@ void BilinearImageGradient(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny;
+    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny;
     long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny;
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny;
+    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny;
     size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny;
 #endif
 
@@ -2747,7 +2747,7 @@ void BilinearImageGradient(nifti_image *floatingImage,
     FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
     FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warImgGradient->data);
+    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
     GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
 
     int *maskPtr = &mask[0];
@@ -2855,7 +2855,7 @@ void BilinearImageGradient(nifti_image *floatingImage,
 template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
 void CubicSplineImageGradient3D(nifti_image *floatingImage,
                                 nifti_image *deformationField,
-                                nifti_image *warImgGradient,
+                                nifti_image *warpedGradient,
                                 int *mask,
                                 float paddingValue,
                                 int active_timepoint)
@@ -2867,11 +2867,11 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz;
+    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
     long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz;
+    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
     size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
@@ -2881,7 +2881,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
     FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
     FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warImgGradient->data);
+    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
     GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
     GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
 
@@ -3019,7 +3019,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
 template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
 void CubicSplineImageGradient2D(nifti_image *floatingImage,
                                 nifti_image *deformationField,
-                                nifti_image *warImgGradient,
+                                nifti_image *warpedGradient,
                                 int *mask,
                                 float paddingValue,
                                 int active_timepoint)
@@ -3031,11 +3031,11 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny;
+    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny;
     long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny;
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny;
+    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny;
     size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny;
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
@@ -3044,7 +3044,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
     FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
     FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warImgGradient->data);
+    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
     GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
 
     int *maskPtr = &mask[0];
@@ -3148,7 +3148,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
 /* *************************************************************** */
 template <class FieldTYPE, class FloatingTYPE, class GradientTYPE>
 void reg_getImageGradient3(nifti_image *floatingImage,
-                           nifti_image *warImgGradient,
+                           nifti_image *warpedGradient,
                            nifti_image *deformationField,
                            int *mask,
                            int interp,
@@ -3173,7 +3173,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
             CubicSplineImageGradient3D
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
                                                           deformationField,
-                                                          warImgGradient,
+                                                          warpedGradient,
                                                           mask,
                                                           paddingValue,
                                                           active_timepoint);
@@ -3183,7 +3183,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
             CubicSplineImageGradient2D
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
                                                           deformationField,
-                                                          warImgGradient,
+                                                          warpedGradient,
                                                           mask,
                                                           paddingValue,
                                                           active_timepoint);
@@ -3196,7 +3196,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
             TrilinearImageGradient
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
                                                           deformationField,
-                                                          warImgGradient,
+                                                          warpedGradient,
                                                           mask,
                                                           paddingValue,
                                                           active_timepoint);
@@ -3206,7 +3206,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
             BilinearImageGradient
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
                                                           deformationField,
-                                                          warImgGradient,
+                                                          warpedGradient,
                                                           mask,
                                                           paddingValue,
                                                           active_timepoint);
@@ -3220,7 +3220,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
         originalFloatingData=nullptr;
     }
     // The interpolated tensors are reoriented and exponentiated
-    reg_dti_resampling_postprocessing<FloatingTYPE>(warImgGradient,
+    reg_dti_resampling_postprocessing<FloatingTYPE>(warpedGradient,
                                                     mask,
                                                     jacMat,
                                                     dtIndicies,
@@ -3230,7 +3230,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
 /* *************************************************************** */
 template <class FieldTYPE, class FloatingTYPE>
 void reg_getImageGradient2(nifti_image *floatingImage,
-                           nifti_image *warImgGradient,
+                           nifti_image *warpedGradient,
                            nifti_image *deformationField,
                            int *mask,
                            int interp,
@@ -3241,15 +3241,15 @@ void reg_getImageGradient2(nifti_image *floatingImage,
                            nifti_image *warpedImage
                            )
 {
-    switch(warImgGradient->datatype)
+    switch(warpedGradient->datatype)
     {
     case NIFTI_TYPE_FLOAT32:
         reg_getImageGradient3<FieldTYPE,FloatingTYPE,float>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_FLOAT64:
         reg_getImageGradient3<FieldTYPE,FloatingTYPE,double>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     default:
         reg_print_fct_error("reg_getImageGradient2");
@@ -3260,7 +3260,7 @@ void reg_getImageGradient2(nifti_image *floatingImage,
 /* *************************************************************** */
 template <class FieldTYPE>
 void reg_getImageGradient1(nifti_image *floatingImage,
-                           nifti_image *warImgGradient,
+                           nifti_image *warpedGradient,
                            nifti_image *deformationField,
                            int *mask,
                            int interp,
@@ -3275,35 +3275,35 @@ void reg_getImageGradient1(nifti_image *floatingImage,
     {
     case NIFTI_TYPE_UINT8:
         reg_getImageGradient2<FieldTYPE,unsigned char>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_INT8:
         reg_getImageGradient2<FieldTYPE,char>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_UINT16:
         reg_getImageGradient2<FieldTYPE,unsigned short>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_INT16:
         reg_getImageGradient2<FieldTYPE,short>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_UINT32:
         reg_getImageGradient2<FieldTYPE,unsigned int>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_INT32:
         reg_getImageGradient2<FieldTYPE,int>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_FLOAT32:
         reg_getImageGradient2<FieldTYPE,float>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_FLOAT64:
         reg_getImageGradient2<FieldTYPE,double>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     default:
         reg_print_fct_error("reg_getImageGradient1");
@@ -3313,7 +3313,7 @@ void reg_getImageGradient1(nifti_image *floatingImage,
 }
 /* *************************************************************** */
 void reg_getImageGradient(nifti_image *floatingImage,
-                          nifti_image *warImgGradient,
+                          nifti_image *warpedGradient,
                           nifti_image *deformationField,
                           int *mask,
                           int interp,
@@ -3348,7 +3348,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
         int j=0;
         for(int i=0; i<floatingImage->nt; ++i)
         {
-            if(dti_timepoint[i]==true)
+            if(dti_timepoint[i])
                 dtIndicies[j++]=i;
         }
         if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3))
@@ -3363,11 +3363,11 @@ void reg_getImageGradient(nifti_image *floatingImage,
     {
     case NIFTI_TYPE_FLOAT32:
         reg_getImageGradient1<float>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_FLOAT64:
         reg_getImageGradient1<double>
-                (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
+                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     default:
         reg_print_fct_error("reg_getImageGradient");
@@ -3375,7 +3375,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
         reg_exit();
         break;
     }
-    if(MrPropreRule==true) free(mask);
+    if(MrPropreRule) free(mask);
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index 26c4c319..f2945c33 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -55,14 +55,14 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
 
 extern "C++"
 void reg_resampleGradient(nifti_image *gradientImage,
-                          nifti_image *warImgGradient,
+                          nifti_image *warpedGradient,
                           nifti_image *deformationField,
                           int interp,
                           float paddingValue);
 
 extern "C++"
 void reg_getImageGradient(nifti_image *floatingImage,
-                          nifti_image *warImgGradient,
+                          nifti_image *warpedGradient,
                           nifti_image *deformationField,
                           int *mask,
                           int interp,
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index 20639e32..ea63b3b4 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -682,7 +682,7 @@ void get_GridValues(int startX,
                     bool displacement)
 {
    int range=4;
-   if(approx==true)
+   if(approx)
       range=3;
 
    size_t index;
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index cd7a62ed..031d8f0e 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -292,7 +292,7 @@ double reg_ssd::GetSimilarityMeasureValue()
 template <class DTYPE>
 void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *warpedImage,
-                                  nifti_image *warImgGradient,
+                                  nifti_image *warpedGradient,
                                   nifti_image *measureGradientImage,
                                   nifti_image *jacobianDetImage,
                                   int *mask,
@@ -321,7 +321,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
    DTYPE *currentWarPtr=&warImagePtr[current_timepoint*voxelNumber];
 
    // Pointers to the spatial gradient of the warped image
-   DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warImgGradient->data);
+   DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
    DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
    DTYPE *spatialGradPtrZ = nullptr;
    if(referenceImage->nz>1)
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index c2ef723f..71d71b10 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1767,8 +1767,8 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis)
    for(int i=1; i<4; i++)
    {
       oldDim[i]=image->dim[i];
-      if(image->dim[i]>1 && downsampleAxis[i]==true) image->dim[i]=static_cast<int>(reg_ceil(image->dim[i]/2.0));
-      if(image->pixdim[i]>0 && downsampleAxis[i]==true) image->pixdim[i]=image->pixdim[i]*2.0f;
+      if(image->dim[i]>1 && downsampleAxis[i]) image->dim[i]=static_cast<int>(reg_ceil(image->dim[i]/2.0));
+      if(image->pixdim[i]>0 && downsampleAxis[i]) image->pixdim[i]=image->pixdim[i]*2.0f;
    }
    image->nx=image->dim[1];
    image->ny=image->dim[2];
@@ -3049,17 +3049,17 @@ void reg_setGradientToZero_core(nifti_image *image,
 {
    size_t voxel_number = (size_t)image->nx*image->ny*image->nz;
    DTYPE *ptr = static_cast<DTYPE *>(image->data);
-   if(x_axis==true){
+   if(x_axis){
       for(size_t i=0; i<voxel_number; ++i)
          *ptr++=0;
    }
    else ptr += voxel_number;
-   if(y_axis==true){
+   if(y_axis){
       for(size_t i=0; i<voxel_number; ++i)
          *ptr++=0;
    }
    else ptr += voxel_number;
-   if(z_axis==true && image->nu>2){
+   if(z_axis && image->nu>2){
       for(size_t i=0; i<voxel_number; ++i)
          *ptr++=0;
    }
@@ -3232,7 +3232,7 @@ void reg_tools_abs_image(nifti_image *img)
    }
 }
 /* *************************************************************** */
-void mat44ToCptr(mat44 mat, float* cMat)
+void mat44ToCptr(const mat44& mat, float* cMat)
 {
 	for (int i = 0; i < 4; i++)
 	{
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index d1253a07..d8ee8391 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -444,7 +444,7 @@ extern "C++"
 void reg_tools_abs_image(nifti_image *img);
 /* *************************************************************** */
 extern "C++"
-void mat44ToCptr(mat44 mat, float* cMat);
+void mat44ToCptr(const mat44& mat, float* cMat);
 /* *************************************************************** */
 extern "C++"
 void cPtrToMat44(mat44 *mat, float* cMat);
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
index 652e098f..5912fc96 100644
--- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
@@ -6,7 +6,7 @@ CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(Content *conI
    CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
    //get necessary cpu ptrs
-   this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField();
+   this->deformationFieldImage = con->AladinContent::GetDeformationField();
    this->affineTransformation = con->AladinContent::GetTransformationMatrix();
 
    //get necessary cuda ptrs
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 01193a1c..608cf634 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -4,17 +4,17 @@
 #include <algorithm>
 
 /* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn,
-                                     nifti_image *currentFloatingIn,
-                                     int *currentReferenceMaskIn,
+CudaAladinContent::CudaAladinContent(nifti_image *referenceIn,
+                                     nifti_image *floatingIn,
+                                     int *referenceMaskIn,
                                      mat44 *transformationMatrixIn,
                                      size_t bytesIn,
                                      const unsigned int percentageOfBlocks,
                                      const unsigned int inlierLts,
                                      int blockStepSize) :
-    AladinContent(currentReferenceIn,
-                  currentFloatingIn,
-                  currentReferenceMaskIn,
+    AladinContent(referenceIn,
+                  floatingIn,
+                  referenceMaskIn,
                   transformationMatrixIn,
                   sizeof(float), // forcing float for CUDA
                   percentageOfBlocks,
@@ -43,17 +43,14 @@ void CudaAladinContent::InitVars() {
     mask_d = nullptr;
     floIJKMat_d = nullptr;
 
-    if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(currentReference);
-    if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(currentFloating);
-        if (currentWarped != nullptr)
-            reg_tools_changeDatatype<float>(currentWarped);
+    if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(reference);
+    if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) {
+        reg_tools_changeDatatype<float>(floating);
+        if (warped != nullptr)
+            reg_tools_changeDatatype<float>(warped);
     }
 
-    cudaSContext = &CudaContextSingleton::Instance();
-    cudaContext = cudaSContext->GetContext();
-
     //numBlocks = (blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
 }
 /* *************************************************************** */
@@ -67,37 +64,37 @@ void CudaAladinContent::AllocateCuPtrs() {
 
         free(tmpMat_h);
     }
-    if (currentReferenceMask != nullptr) {
-        cudaCommon_allocateArrayToDevice<int>(&mask_d, currentReference->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, currentReferenceMask, currentReference->nvox);
+    if (referenceMask != nullptr) {
+        cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, referenceMask, reference->nvox);
     }
-    if (currentReference != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, currentReference->nvox);
+    if (reference != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, reference->nvox);
         cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, currentReference);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, reference);
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
-        mat44ToCptr(*GetXYZMatrix(currentReference), targetMat);
+        mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
         cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
-    if (currentWarped != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, currentWarped->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, currentWarped);
+    if (warped != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, warped);
     }
-    if (currentDeformationField != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, currentDeformationField->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, currentDeformationField);
+    if (deformationField != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, deformationField);
     }
-    if (currentFloating != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, currentFloating->nvox);
+    if (floating != nullptr) {
+        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, floating->nvox);
         cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, currentFloating);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, floating);
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
-        mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h);
+        mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
         NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, sizeof(mat44), cudaMemcpyHostToDevice));
         free(sourceIJKMatrix_h);
     }
@@ -138,14 +135,14 @@ void CudaAladinContent::AllocateCuPtrs() {
     }
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetCurrentWarped(int type) {
-    DownloadImage(currentWarped, warpedImageArray_d, type);
-    return currentWarped;
+nifti_image* CudaAladinContent::GetWarped(int datatype, int index) {
+    DownloadImage(warped, warpedImageArray_d, datatype);
+    return warped;
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetCurrentDeformationField() {
-    cudaCommon_transferFromDeviceToCpu<float>((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox);
-    return currentDeformationField;
+nifti_image* CudaAladinContent::GetDeformationField() {
+    cudaCommon_transferFromDeviceToCpu<float>((float*)deformationField->data, &deformationFieldArray_d, deformationField->nvox);
+    return deformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
@@ -156,7 +153,7 @@ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
 /* *************************************************************** */
 void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     if (transformationMatrix != nullptr)
-        cudaCommon_free<float>(&transformationMatrix_d);
+        cudaCommon_free(&transformationMatrix_d);
 
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
     float *tmpMat_h = (float*)malloc(sizeof(mat44));
@@ -167,49 +164,49 @@ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     free(tmpMat_h);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) {
-    if (currentDeformationField != nullptr)
-        cudaCommon_free<float>(&deformationFieldArray_d);
-    AladinContent::SetCurrentDeformationField(currentDeformationFieldIn);
+void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
+    if (deformationField != nullptr)
+        cudaCommon_free(&deformationFieldArray_d);
+    AladinContent::SetDeformationField(deformationFieldIn);
 
-    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, currentDeformationField->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, currentDeformationField);
+    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, deformationField);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) {
-    if (currentReferenceMask != nullptr)
-        cudaCommon_free<int>(&mask_d);
-    AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn);
-    cudaCommon_allocateArrayToDevice<int>(&mask_d, currentReference->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, currentReferenceMaskIn, currentReference->nvox);
+void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
+    if (referenceMask != nullptr)
+        cudaCommon_free(&mask_d);
+    AladinContent::SetReferenceMask(referenceMaskIn);
+    cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, referenceMaskIn, reference->nvox);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) {
-    if (currentWarped != nullptr)
-        cudaCommon_free<float>(&warpedImageArray_d);
-    AladinContent::SetCurrentWarped(currentWarped);
-    reg_tools_changeDatatype<float>(currentWarped);
+void CudaAladinContent::SetWarped(nifti_image *warped) {
+    if (warped != nullptr)
+        cudaCommon_free(&warpedImageArray_d);
+    AladinContent::SetWarped(warped);
+    reg_tools_changeDatatype<float>(warped);
 
-    cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, currentWarped->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, currentWarped);
+    cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, warped);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
     if (blockMatchingParams->referencePosition != nullptr) {
-        cudaCommon_free<float>(&referencePosition_d);
+        cudaCommon_free(&referencePosition_d);
         //referencePosition
         cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->warpedPosition != nullptr) {
-        cudaCommon_free<float>(&warpedPosition_d);
+        cudaCommon_free(&warpedPosition_d);
         //warpedPosition
         cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->totalBlock != nullptr) {
-        cudaCommon_free<int>(&totalBlock_d);
+        cudaCommon_free(&totalBlock_d);
         //activeBlock
         cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
         cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
@@ -264,9 +261,7 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
 }
 /* *************************************************************** */
 template<class T>
-void CudaAladinContent::FillImageData(nifti_image *image,
-                                      float *memoryObject,
-                                      int type) {
+void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, int type) {
     size_t size = image->nvox;
     float *buffer = (float*)malloc(size * sizeof(float));
 
@@ -275,16 +270,14 @@ void CudaAladinContent::FillImageData(nifti_image *image,
     free(image->data);
     image->datatype = type;
     image->nbyper = sizeof(T);
-    image->data = (void *)malloc(image->nvox * image->nbyper);
+    image->data = (void*)malloc(image->nvox * image->nbyper);
     T* dataT = static_cast<T*>(image->data);
     for (size_t i = 0; i < size; ++i)
         dataT[i] = FillWarpedImageData<T>(buffer[i], type);
     free(buffer);
 }
 /* *************************************************************** */
-void CudaAladinContent::DownloadImage(nifti_image *image,
-                                      float* memoryObject,
-                                      int datatype) {
+void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
     switch (datatype) {
     case NIFTI_TYPE_FLOAT32:
         FillImageData<float>(image, memoryObject, datatype);
@@ -412,43 +405,43 @@ int* CudaAladinContent::GetFloatingDims() {
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
     if (transformationMatrix != nullptr)
-        cudaCommon_free<float>(&transformationMatrix_d);
+        cudaCommon_free(&transformationMatrix_d);
 
-    if (currentReference != nullptr) {
-        cudaCommon_free<float>(&referenceImageArray_d);
-        cudaCommon_free<float>(&referenceMat_d);
+    if (reference != nullptr) {
+        cudaCommon_free(&referenceImageArray_d);
+        cudaCommon_free(&referenceMat_d);
     }
 
-    if (currentFloating != nullptr) {
-        cudaCommon_free<float>(&floatingImageArray_d);
-        cudaCommon_free<float>(&floIJKMat_d);
+    if (floating != nullptr) {
+        cudaCommon_free(&floatingImageArray_d);
+        cudaCommon_free(&floIJKMat_d);
     }
 
-    if (currentWarped != nullptr)
-        cudaCommon_free<float>(&warpedImageArray_d);
+    if (warped != nullptr)
+        cudaCommon_free(&warpedImageArray_d);
 
-    if (currentDeformationField != nullptr)
-        cudaCommon_free<float>(&deformationFieldArray_d);
+    if (deformationField != nullptr)
+        cudaCommon_free(&deformationFieldArray_d);
 
-    if (currentReferenceMask != nullptr)
-        cudaCommon_free<int>(&mask_d);
+    if (referenceMask != nullptr)
+        cudaCommon_free(&mask_d);
 
     if (blockMatchingParams != nullptr) {
-        cudaCommon_free<int>(&totalBlock_d);
-        cudaCommon_free<float>(&referencePosition_d);
-        cudaCommon_free<float>(&warpedPosition_d);
+        cudaCommon_free(&totalBlock_d);
+        cudaCommon_free(&referencePosition_d);
+        cudaCommon_free(&warpedPosition_d);
         /*
-        cudaCommon_free<float>(&AR_d);
-        cudaCommon_free<float>(&U_d);
-        cudaCommon_free<float>(&VT_d);
-        cudaCommon_free<float>(&Sigma_d);
-        cudaCommon_free<float>(&lengths_d);
-        cudaCommon_free<float>(&newWarpedPos_d);
+        cudaCommon_free(&AR_d);
+        cudaCommon_free(&U_d);
+        cudaCommon_free(&VT_d);
+        cudaCommon_free(&Sigma_d);
+        cudaCommon_free(&lengths_d);
+        cudaCommon_free(&newWarpedPos_d);
         */
     }
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
-    return cudaSContext->GetIsCardDoubleCapable();
+    return CudaContextSingleton::Instance().GetIsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index 1c0eb0de..ab27d449 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -6,9 +6,9 @@
 
 class CudaAladinContent: public AladinContent {
 public:
-    CudaAladinContent(nifti_image *currentReferenceIn,
-                      nifti_image *currentFloatingIn,
-                      int *currentReferenceMaskIn = nullptr,
+    CudaAladinContent(nifti_image *referenceIn,
+                      nifti_image *floatingIn,
+                      int *referenceMaskIn = nullptr,
                       mat44 *transformationMatrixIn = nullptr,
                       size_t bytesIn = sizeof(float),
                       const unsigned int percentageOfBlocks = 0,
@@ -44,14 +44,14 @@ class CudaAladinContent: public AladinContent {
 
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    nifti_image* GetCurrentDeformationField() override;
-    nifti_image* GetCurrentWarped(int typ) override;
+    nifti_image* GetDeformationField() override;
+    nifti_image* GetWarped(int datatype, int index = 0) override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    void SetCurrentWarped(nifti_image *warpedImageIn) override;
-    void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override;
-    void SetCurrentReferenceMask(int *currentReferenceMaskIn) override;
+    void SetWarped(nifti_image *warpedImageIn) override;
+    void SetDeformationField(nifti_image *deformationFieldIn) override;
+    void SetReferenceMask(int *referenceMaskIn) override;
     void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
@@ -59,9 +59,6 @@ class CudaAladinContent: public AladinContent {
     void AllocateCuPtrs();
     void FreeCuPtrs();
 
-    CudaContextSingleton *cudaSContext;
-    CUcontext cudaContext;
-
     float *referenceImageArray_d;
     float *floatingImageArray_d;
     float *warpedImageArray_d;
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
index 45bae174..4cc7fe18 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
@@ -7,7 +7,7 @@ CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatching
     CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
     //get cpu ptrs
-    reference = con->AladinContent::GetCurrentReference();
+    reference = con->AladinContent::GetReference();
     params = con->AladinContent::GetBlockMatchingParams();
 
     //get cuda ptrs
diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp
index 12045fa2..57af4ae0 100644
--- a/reg-lib/cuda/CudaKernelFactory.cpp
+++ b/reg-lib/cuda/CudaKernelFactory.cpp
@@ -6,7 +6,7 @@
 #include "CudaOptimiseKernel.h"
 #include "AladinContent.h"
 
-Kernel* CudaKernelFactory::ProduceKernel(std::string name, Content *con) const {
+Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const {
     if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con);
     else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel();
     else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con);
diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h
index c9727ec9..cc473958 100644
--- a/reg-lib/cuda/CudaKernelFactory.h
+++ b/reg-lib/cuda/CudaKernelFactory.h
@@ -4,5 +4,5 @@
 
 class CudaKernelFactory: public KernelFactory {
 public:
-	Kernel* ProduceKernel(std::string name, Content *con) const;
+	Kernel* Produce(std::string name, Content *con) const;
 };
diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp
index a6e81267..8f28948f 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.cpp
+++ b/reg-lib/cuda/CudaResampleImageKernel.cpp
@@ -5,8 +5,8 @@
 CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImageKernel() {
     CudaAladinContent *con = static_cast<CudaAladinContent*>(conIn);
 
-    floatingImage = con->AladinContent::GetCurrentFloating();
-    warpedImage = con->AladinContent::GetCurrentWarped();
+    floatingImage = con->AladinContent::GetFloating();
+    warpedImage = con->AladinContent::GetWarped();
 
     //cuda ptrs
     floatingImageArray_d = con->GetFloatingImageArray_d();
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu
index 63be0e5c..cea4c212 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.cu
+++ b/reg-lib/cuda/_reg_blocksize_gpu.cu
@@ -11,11 +11,10 @@
 
 /* ******************************** */
 /* ******************************** */
-NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = nullptr;
+NiftyReg_CudaBlock100 *NiftyReg_CudaBlock::instance = nullptr;
 /* ******************************** */
 /* ******************************** */
-NiftyReg_CudaBlock100::NiftyReg_CudaBlock100()
-{
+NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() {
     Block_target_block = 512; // 15 reg - 32 smem - 24 cmem
     Block_result_block = 384; // 21 reg - 11048 smem - 24 cmem
     /* _reg_mutualinformation_gpu */
@@ -83,8 +82,7 @@ NiftyReg_CudaBlock100::NiftyReg_CudaBlock100()
 #endif
 }
 /* ******************************** */
-NiftyReg_CudaBlock200::NiftyReg_CudaBlock200()
-{
+NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() {
 //    Block_target_block = ; //
 //    Block_result_block = ; //
 //    /* _reg_mutualinformation_gpu */
@@ -152,8 +150,7 @@ NiftyReg_CudaBlock200::NiftyReg_CudaBlock200()
 #endif
 }
 /* ******************************** */
-NiftyReg_CudaBlock300::NiftyReg_CudaBlock300()
-{
+NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() {
     Block_target_block = 640; // 45 reg
     Block_result_block = 640; // 47 reg - ????? smem
     /* _reg_mutualinformation_gpu */
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index e04510cf..019a3e58 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -13,13 +13,12 @@
 #include "cuda_runtime.h"
 #include "cuda.h"
 
-/* ******************************** */
-/* ******************************** */
+ /* ******************************** */
+ /* ******************************** */
 #ifndef __VECTOR_TYPES_H__
 #define __VECTOR_TYPES_H__
-struct __attribute__((aligned(4))) float4
-{
-	float x,y,z,w;
+struct __attribute__((aligned(4))) float4 {
+    float x, y, z, w;
 };
 #endif
 /* ******************************** */
@@ -71,124 +70,105 @@ struct __attribute__((aligned(4))) float4
 #endif //CUDART_VERSION >= 3200
 /* ******************************** */
 /* ******************************** */
-class NiftyReg_CudaBlock100
-{
+class NiftyReg_CudaBlock100 {
 public:    /* _reg_blockMatching_gpu */
-   size_t Block_target_block;
-   size_t Block_result_block;
-   /* _reg_mutualinformation_gpu */
-   size_t Block_reg_smoothJointHistogramX;
-   size_t Block_reg_smoothJointHistogramY;
-   size_t Block_reg_smoothJointHistogramZ;
-   size_t Block_reg_smoothJointHistogramW;
-   size_t Block_reg_marginaliseTargetX;
-   size_t Block_reg_marginaliseTargetXY;
-   size_t Block_reg_marginaliseResultX;
-   size_t Block_reg_marginaliseResultXY;
-   size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D;
-   size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D;
-   size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2;
-   /* _reg_globalTransformation_gpu */
-   size_t Block_reg_affine_deformationField;
-   /* _reg_localTransformation_gpu */
-   size_t Block_reg_spline_getDeformationField2D;
-   size_t Block_reg_spline_getDeformationField3D;
-   size_t Block_reg_spline_getApproxSecondDerivatives2D;
-   size_t Block_reg_spline_getApproxSecondDerivatives3D;
-   size_t Block_reg_spline_getApproxBendingEnergy2D;
-   size_t Block_reg_spline_getApproxBendingEnergy3D;
-   size_t Block_reg_spline_getApproxBendingEnergyGradient2D;
-   size_t Block_reg_spline_getApproxBendingEnergyGradient3D;
-   size_t Block_reg_spline_getApproxJacobianValues2D;
-   size_t Block_reg_spline_getApproxJacobianValues3D;
-   size_t Block_reg_spline_getJacobianValues2D;
-   size_t Block_reg_spline_getJacobianValues3D;
-   size_t Block_reg_spline_logSquaredValues;
-   size_t Block_reg_spline_computeApproxJacGradient2D;
-   size_t Block_reg_spline_computeApproxJacGradient3D;
-   size_t Block_reg_spline_computeJacGradient2D;
-   size_t Block_reg_spline_computeJacGradient3D;
-   size_t Block_reg_spline_approxCorrectFolding3D;
-   size_t Block_reg_spline_correctFolding3D;
-   size_t Block_reg_getDeformationFromDisplacement;
-   size_t Block_reg_getDisplacementFromDeformation;
-   size_t Block_reg_defField_compose2D;
-   size_t Block_reg_defField_compose3D;
-   size_t Block_reg_defField_getJacobianMatrix;
-   /* _reg_optimiser_gpu */
-   size_t Block_reg_initialiseConjugateGradient;
-   size_t Block_reg_GetConjugateGradient1;
-   size_t Block_reg_GetConjugateGradient2;
-   size_t Block_reg_getEuclideanDistance;
-   size_t Block_reg_updateControlPointPosition;
-   /* _reg_ssd_gpu */
-   size_t Block_reg_getSquaredDifference;
-   size_t Block_reg_getSSDGradient;
-   /* _reg_tools_gpu */
-   size_t Block_reg_voxelCentric2NodeCentric;
-   size_t Block_reg_convertNMIGradientFromVoxelToRealSpace;
-   size_t Block_reg_ApplyConvolutionWindowAlongX;
-   size_t Block_reg_ApplyConvolutionWindowAlongY;
-   size_t Block_reg_ApplyConvolutionWindowAlongZ;
-   size_t Block_reg_arithmetic;
-   /* _reg_resampling_gpu */
-   size_t Block_reg_resampleImage2D;
-   size_t Block_reg_resampleImage3D;
-   size_t Block_reg_getImageGradient2D;
-   size_t Block_reg_getImageGradient3D;
+    size_t Block_target_block;
+    size_t Block_result_block;
+    /* _reg_mutualinformation_gpu */
+    size_t Block_reg_smoothJointHistogramX;
+    size_t Block_reg_smoothJointHistogramY;
+    size_t Block_reg_smoothJointHistogramZ;
+    size_t Block_reg_smoothJointHistogramW;
+    size_t Block_reg_marginaliseTargetX;
+    size_t Block_reg_marginaliseTargetXY;
+    size_t Block_reg_marginaliseResultX;
+    size_t Block_reg_marginaliseResultXY;
+    size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D;
+    size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D;
+    size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2;
+    /* _reg_globalTransformation_gpu */
+    size_t Block_reg_affine_deformationField;
+    /* _reg_localTransformation_gpu */
+    size_t Block_reg_spline_getDeformationField2D;
+    size_t Block_reg_spline_getDeformationField3D;
+    size_t Block_reg_spline_getApproxSecondDerivatives2D;
+    size_t Block_reg_spline_getApproxSecondDerivatives3D;
+    size_t Block_reg_spline_getApproxBendingEnergy2D;
+    size_t Block_reg_spline_getApproxBendingEnergy3D;
+    size_t Block_reg_spline_getApproxBendingEnergyGradient2D;
+    size_t Block_reg_spline_getApproxBendingEnergyGradient3D;
+    size_t Block_reg_spline_getApproxJacobianValues2D;
+    size_t Block_reg_spline_getApproxJacobianValues3D;
+    size_t Block_reg_spline_getJacobianValues2D;
+    size_t Block_reg_spline_getJacobianValues3D;
+    size_t Block_reg_spline_logSquaredValues;
+    size_t Block_reg_spline_computeApproxJacGradient2D;
+    size_t Block_reg_spline_computeApproxJacGradient3D;
+    size_t Block_reg_spline_computeJacGradient2D;
+    size_t Block_reg_spline_computeJacGradient3D;
+    size_t Block_reg_spline_approxCorrectFolding3D;
+    size_t Block_reg_spline_correctFolding3D;
+    size_t Block_reg_getDeformationFromDisplacement;
+    size_t Block_reg_getDisplacementFromDeformation;
+    size_t Block_reg_defField_compose2D;
+    size_t Block_reg_defField_compose3D;
+    size_t Block_reg_defField_getJacobianMatrix;
+    /* _reg_optimiser_gpu */
+    size_t Block_reg_initialiseConjugateGradient;
+    size_t Block_reg_GetConjugateGradient1;
+    size_t Block_reg_GetConjugateGradient2;
+    size_t Block_reg_getEuclideanDistance;
+    size_t Block_reg_updateControlPointPosition;
+    /* _reg_ssd_gpu */
+    size_t Block_reg_getSquaredDifference;
+    size_t Block_reg_getSSDGradient;
+    /* _reg_tools_gpu */
+    size_t Block_reg_voxelCentric2NodeCentric;
+    size_t Block_reg_convertNMIGradientFromVoxelToRealSpace;
+    size_t Block_reg_ApplyConvolutionWindowAlongX;
+    size_t Block_reg_ApplyConvolutionWindowAlongY;
+    size_t Block_reg_ApplyConvolutionWindowAlongZ;
+    size_t Block_reg_arithmetic;
+    /* _reg_resampling_gpu */
+    size_t Block_reg_resampleImage2D;
+    size_t Block_reg_resampleImage3D;
+    size_t Block_reg_getImageGradient2D;
+    size_t Block_reg_getImageGradient3D;
 
-   NiftyReg_CudaBlock100();
-   ~NiftyReg_CudaBlock100()
-   {
-     ;
-   }
+    NiftyReg_CudaBlock100();
 };
 /* ******************************** */
-class NiftyReg_CudaBlock200 : public NiftyReg_CudaBlock100
-{
+class NiftyReg_CudaBlock200: public NiftyReg_CudaBlock100 {
 public:
-   NiftyReg_CudaBlock200();
-   ~NiftyReg_CudaBlock200()
-   {
-     ;
-   }
+    NiftyReg_CudaBlock200();
 };
 /* ******************************** */
-class NiftyReg_CudaBlock300 : public NiftyReg_CudaBlock100
-{
+class NiftyReg_CudaBlock300: public NiftyReg_CudaBlock100 {
 public:
-   NiftyReg_CudaBlock300();
-   ~NiftyReg_CudaBlock300()
-   {
-     ;
-   }
+    NiftyReg_CudaBlock300();
 };
 /* ******************************** */
-class NiftyReg_CudaBlock
-{
+class NiftyReg_CudaBlock {
 public:
-   static NiftyReg_CudaBlock100 * GetInstance(int major)
-   {
-     if (instance) return instance;
-     else
-     {
-       switch(major)
-       {
-       case 3:
-         instance = new NiftyReg_CudaBlock300();
-         break;
-       case 2:
-         instance = new NiftyReg_CudaBlock200();
-         break;
-       default:
-         instance = new NiftyReg_CudaBlock100();
-         break;
-       }
-     }
-     return instance;
-   }
+    static NiftyReg_CudaBlock100* GetInstance(int major) {
+        if (instance) return instance;
+        else {
+            switch (major) {
+            case 3:
+                instance = new NiftyReg_CudaBlock300();
+                break;
+            case 2:
+                instance = new NiftyReg_CudaBlock200();
+                break;
+            default:
+                instance = new NiftyReg_CudaBlock100();
+                break;
+            }
+        }
+        return instance;
+    }
 private:
-   static NiftyReg_CudaBlock100 * instance;
+    static NiftyReg_CudaBlock100 *instance;
 };
 /* ******************************** */
 /* ******************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index dec42d33..2c7c294f 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -38,7 +38,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) {
 	}
 	NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
 	NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device))
-		NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
+	NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
 
 	if (deviceProp.major < 1) {
 		fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
@@ -72,7 +72,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) {
 			printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
 				   deviceProp.multiProcessorCount);
 		}
-		NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(deviceProp.major);
+		NiftyReg_CudaBlock::GetInstance(deviceProp.major);
 	}
 	return EXIT_SUCCESS;
 }
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
index dbbc286f..1a142083 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ b/reg-lib/cuda/_reg_f3d_gpu.cpp
@@ -17,8 +17,8 @@
 reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
     : reg_f3d<float>::reg_f3d(refTimePoint, floTimePoint) {
     this->executableName = (char *)"NiftyReg F3D GPU";
-    this->currentReference_gpu = nullptr;
-    this->currentFloating_gpu = nullptr;
+    this->reference_gpu = nullptr;
+    this->floating_gpu = nullptr;
     this->currentMask_gpu = nullptr;
     this->warped_gpu = nullptr;
     this->controlPointGrid_gpu = nullptr;
@@ -33,8 +33,8 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
     this->measure_gpu_lncc = nullptr;
     this->measure_gpu_nmi = nullptr;
 
-    this->currentReference2_gpu = nullptr;
-    this->currentFloating2_gpu = nullptr;
+    this->reference2_gpu = nullptr;
+    this->floating2_gpu = nullptr;
     this->warped2_gpu = nullptr;
     this->warpedGradientImage2_gpu = nullptr;
 
@@ -45,33 +45,33 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 reg_f3d_gpu::~reg_f3d_gpu() {
-    if (this->currentReference_gpu != nullptr)
-        cudaCommon_free(&this->currentReference_gpu);
-    if (this->currentFloating_gpu != nullptr)
-        cudaCommon_free(&this->currentFloating_gpu);
+    if (this->reference_gpu != nullptr)
+        cudaCommon_free(&this->reference_gpu);
+    if (this->floating_gpu != nullptr)
+        cudaCommon_free(&this->floating_gpu);
     if (this->currentMask_gpu != nullptr)
-        cudaCommon_free<int>(&this->currentMask_gpu);
+        cudaCommon_free(&this->currentMask_gpu);
     if (this->warped_gpu != nullptr)
-        cudaCommon_free<float>(&this->warped_gpu);
+        cudaCommon_free(&this->warped_gpu);
     if (this->controlPointGrid_gpu != nullptr)
-        cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+        cudaCommon_free(&this->controlPointGrid_gpu);
     if (this->deformationFieldImage_gpu != nullptr)
-        cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
+        cudaCommon_free(&this->deformationFieldImage_gpu);
     if (this->warpedGradientImage_gpu != nullptr)
-        cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
+        cudaCommon_free(&this->warpedGradientImage_gpu);
     if (this->voxelBasedMeasureGradientImage_gpu != nullptr)
-        cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
+        cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu);
     if (this->transformationGradient_gpu != nullptr)
-        cudaCommon_free<float4>(&this->transformationGradient_gpu);
+        cudaCommon_free(&this->transformationGradient_gpu);
 
-    if (this->currentReference2_gpu != nullptr)
-        cudaCommon_free(&this->currentReference2_gpu);
-    if (this->currentFloating2_gpu != nullptr)
-        cudaCommon_free(&this->currentFloating2_gpu);
+    if (this->reference2_gpu != nullptr)
+        cudaCommon_free(&this->reference2_gpu);
+    if (this->floating2_gpu != nullptr)
+        cudaCommon_free(&this->floating2_gpu);
     if (this->warped2_gpu != nullptr)
-        cudaCommon_free<float>(&this->warped2_gpu);
+        cudaCommon_free(&this->warped2_gpu);
     if (this->warpedGradientImage2_gpu != nullptr)
-        cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
+        cudaCommon_free(&this->warpedGradientImage2_gpu);
 
     if (this->optimiser != nullptr) {
         delete this->optimiser;
@@ -136,25 +136,25 @@ void reg_f3d_gpu::AllocateWarped() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearWarped() {
-    reg_f3d::ClearWarped();
+void reg_f3d_gpu::DeallocateWarped() {
+    reg_f3d::DeallocateWarped();
 
     if (this->warped_gpu != nullptr) {
-        cudaCommon_free<float>(&this->warped_gpu);
+        cudaCommon_free(&this->warped_gpu);
         this->warped_gpu = nullptr;
     }
     if (this->warped2_gpu != nullptr) {
-        cudaCommon_free<float>(&this->warped2_gpu);
+        cudaCommon_free(&this->warped2_gpu);
         this->warped2_gpu = nullptr;
     }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearWarped");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateWarped");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateDeformationField() {
-    this->ClearDeformationField();
+    this->DeallocateDeformationField();
     NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu,
                                  this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
 #ifndef NDEBUG
@@ -163,19 +163,19 @@ void reg_f3d_gpu::AllocateDeformationField() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearDeformationField() {
+void reg_f3d_gpu::DeallocateDeformationField() {
     if (this->deformationFieldImage_gpu != nullptr) {
-        cudaCommon_free<float4>(&this->deformationFieldImage_gpu);
+        cudaCommon_free(&this->deformationFieldImage_gpu);
         this->deformationFieldImage_gpu = nullptr;
     }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateDeformationField");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateWarpedGradient() {
-    this->ClearWarpedGradient();
+    this->DeallocateWarpedGradient();
     if (this->inputFloating->nt == 1) {
         NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
                                      this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
@@ -195,24 +195,24 @@ void reg_f3d_gpu::AllocateWarpedGradient() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearWarpedGradient() {
+void reg_f3d_gpu::DeallocateWarpedGradient() {
     if (this->warpedGradientImage_gpu != nullptr) {
-        cudaCommon_free<float4>(&this->warpedGradientImage_gpu);
+        cudaCommon_free(&this->warpedGradientImage_gpu);
         this->warpedGradientImage_gpu = nullptr;
     }
     if (this->warpedGradientImage2_gpu != nullptr) {
-        cudaCommon_free<float4>(&this->warpedGradientImage2_gpu);
+        cudaCommon_free(&this->warpedGradientImage2_gpu);
         this->warpedGradientImage2_gpu = nullptr;
     }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateWarpedGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
-    this->ClearVoxelBasedMeasureGradient();
-    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->currentReference->dim)) {
+    this->DeallocateVoxelBasedMeasureGradient();
+    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->reference->dim)) {
         reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()");
         reg_print_msg_error("Error when allocating the voxel based measure gradient image");
         reg_exit();
@@ -223,19 +223,19 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() {
+void reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient() {
     if (this->voxelBasedMeasureGradientImage_gpu != nullptr) {
-        cudaCommon_free<float4>(&this->voxelBasedMeasureGradientImage_gpu);
+        cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu);
         this->voxelBasedMeasureGradientImage_gpu = nullptr;
     }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_f3d_gpu::AllocateTransformationGradient() {
-    this->ClearTransformationGradient();
+    this->DeallocateTransformationGradient();
     if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) {
         reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()");
         reg_print_msg_error("Error when allocating the node based gradient image");
@@ -247,13 +247,13 @@ void reg_f3d_gpu::AllocateTransformationGradient() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearTransformationGradient() {
+void reg_f3d_gpu::DeallocateTransformationGradient() {
     if (this->transformationGradient_gpu != nullptr) {
-        cudaCommon_free<float4>(&this->transformationGradient_gpu);
+        cudaCommon_free(&this->transformationGradient_gpu);
         this->transformationGradient_gpu = nullptr;
     }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateTransformationGradient");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -263,18 +263,18 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
 
     bool approx = type == 2 ? false : this->jacobianLogApproximation;
 
-    double value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference,
+    double value = reg_spline_getJacobianPenaltyTerm_gpu(this->reference,
                                                          this->controlPointGrid,
-                                                         &this->controlPointGrid_gpu,
+                                                         this->controlPointGrid_gpu,
                                                          approx);
 
     unsigned int maxit = 5;
     if (type > 0) maxit = 20;
     unsigned int it = 0;
     while (value != value && it < maxit) {
-        value = reg_spline_correctFolding_gpu(this->currentReference,
+        value = reg_spline_correctFolding_gpu(this->reference,
                                               this->controlPointGrid,
-                                              &this->controlPointGrid_gpu,
+                                              this->controlPointGrid_gpu,
                                               approx);
 #ifndef NDEBUG
         reg_print_msg_debug("Folding correction");
@@ -307,7 +307,7 @@ double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
     if (this->bendingEnergyWeight <= 0) return 0;
 
     double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
-                                                      &this->controlPointGrid_gpu);
+                                                      this->controlPointGrid_gpu);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm");
 #endif
@@ -343,10 +343,10 @@ void reg_f3d_gpu::GetDeformationField() {
     } else {
         // Compute the deformation field
         reg_spline_getDeformationField_gpu(this->controlPointGrid,
-                                           this->currentReference,
-                                           &this->controlPointGrid_gpu,
-                                           &this->deformationFieldImage_gpu,
-                                           &this->currentMask_gpu,
+                                           this->reference,
+                                           this->controlPointGrid_gpu,
+                                           this->deformationFieldImage_gpu,
+                                           this->currentMask_gpu,
                                            this->activeVoxelNumber[this->currentLevel],
                                            true); // use B-splines
     }
@@ -364,20 +364,20 @@ void reg_f3d_gpu::WarpFloatingImage(int inter) {
     this->GetDeformationField();
 
     // Resample the floating image
-    reg_resampleImage_gpu(this->currentFloating,
-                          &this->warped_gpu,
-                          &this->currentFloating_gpu,
-                          &this->deformationFieldImage_gpu,
-                          &this->currentMask_gpu,
+    reg_resampleImage_gpu(this->floating,
+                          this->warped_gpu,
+                          this->floating_gpu,
+                          this->deformationFieldImage_gpu,
+                          this->currentMask_gpu,
                           this->activeVoxelNumber[this->currentLevel],
                           this->warpedPaddingValue);
 
-    if (this->currentFloating->nt == 2) {
-        reg_resampleImage_gpu(this->currentFloating,
-                              &this->warped2_gpu,
-                              &this->currentFloating2_gpu,
-                              &this->deformationFieldImage_gpu,
-                              &this->currentMask_gpu,
+    if (this->floating->nt == 2) {
+        reg_resampleImage_gpu(this->floating,
+                              this->warped2_gpu,
+                              this->floating2_gpu,
+                              this->deformationFieldImage_gpu,
+                              this->currentMask_gpu,
                               this->activeVoxelNumber[this->currentLevel],
                               this->warpedPaddingValue);
     }
@@ -399,14 +399,14 @@ void reg_f3d_gpu::SetGradientImageToZero() {
 void reg_f3d_gpu::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
     cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0,
-               this->currentReference->nx * this->currentReference->ny * this->currentReference->nz *
+               this->reference->nx * this->reference->ny * this->reference->nz *
                sizeof(float4));
 
     // The intensity gradient is first computed
-    reg_getImageGradient_gpu(this->currentFloating,
-                             &this->currentFloating_gpu,
-                             &this->deformationFieldImage_gpu,
-                             &this->warpedGradientImage_gpu,
+    reg_getImageGradient_gpu(this->floating,
+                             this->floating_gpu,
+                             this->deformationFieldImage_gpu,
+                             this->warpedGradientImage_gpu,
                              this->activeVoxelNumber[this->currentLevel],
                              this->warpedPaddingValue);
 
@@ -437,33 +437,33 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() {
 
     // The voxel based gradient is smoothed
     float smoothingRadius[3] = {
-        this->controlPointGrid->dx / this->currentReference->dx,
-        this->controlPointGrid->dy / this->currentReference->dy,
-        this->controlPointGrid->dz / this->currentReference->dz
+        this->controlPointGrid->dx / this->reference->dx,
+        this->controlPointGrid->dy / this->reference->dy,
+        this->controlPointGrid->dz / this->reference->dz
     };
     reg_smoothImageForCubicSpline_gpu(this->warped,
-                                      &this->voxelBasedMeasureGradientImage_gpu,
+                                      this->voxelBasedMeasureGradientImage_gpu,
                                       smoothingRadius);
 
     // The node gradient is extracted
     reg_voxelCentric2NodeCentric_gpu(this->warped,
                                      this->controlPointGrid,
-                                     &this->voxelBasedMeasureGradientImage_gpu,
-                                     &this->transformationGradient_gpu,
+                                     this->voxelBasedMeasureGradientImage_gpu,
+                                     this->transformationGradient_gpu,
                                      this->similarityWeight);
 
     /* The similarity measure gradient is converted from voxel space to real space */
     mat44 *floatingMatrix_xyz = nullptr;
-    if (this->currentFloating->sform_code > 0)
-        floatingMatrix_xyz = &(this->currentFloating->sto_xyz);
-    else floatingMatrix_xyz = &(this->currentFloating->qto_xyz);
+    if (this->floating->sform_code > 0)
+        floatingMatrix_xyz = &(this->floating->sto_xyz);
+    else floatingMatrix_xyz = &(this->floating->qto_xyz);
     reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz,
                                                    this->controlPointGrid,
-                                                   &this->transformationGradient_gpu);
+                                                   this->transformationGradient_gpu);
     // The gradient is smoothed using a Gaussian kernel if it is required
     if (this->gradientSmoothingSigma != 0) {
         reg_gaussianSmoothing_gpu(this->controlPointGrid,
-                                  &this->transformationGradient_gpu,
+                                  this->transformationGradient_gpu,
                                   this->gradientSmoothingSigma,
                                   nullptr);
     }
@@ -477,8 +477,8 @@ void reg_f3d_gpu::GetBendingEnergyGradient() {
     if (this->bendingEnergyWeight <= 0) return;
 
     reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid,
-                                               &this->controlPointGrid_gpu,
-                                               &this->transformationGradient_gpu,
+                                               this->controlPointGrid_gpu,
+                                               this->transformationGradient_gpu,
                                                this->bendingEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient");
@@ -499,10 +499,10 @@ void reg_f3d_gpu::GetLinearEnergyGradient() {
 void reg_f3d_gpu::GetJacobianBasedGradient() {
     if (this->jacobianLogWeight <= 0) return;
 
-    reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference,
+    reg_spline_getJacobianPenaltyTermGradient_gpu(this->reference,
                                                   this->controlPointGrid,
-                                                  &this->controlPointGrid_gpu,
-                                                  &this->transformationGradient_gpu,
+                                                  this->controlPointGrid_gpu,
+                                                  this->transformationGradient_gpu,
                                                   this->jacobianLogWeight,
                                                   this->jacobianLogApproximation);
 #ifndef NDEBUG
@@ -526,8 +526,7 @@ void reg_f3d_gpu::UpdateParameters(float scale) {
     float4 *bestDOF = reinterpret_cast<float4*>(this->optimiser->GetBestDOF());
     float4 *gradient = reinterpret_cast<float4*>(this->optimiser->GetGradient());
 
-    reg_updateControlPointPosition_gpu(this->controlPointGrid, &currentDOF, &bestDOF, &gradient, scale);
-
+    reg_updateControlPointPosition_gpu(this->controlPointGrid, currentDOF, bestDOF, gradient, scale);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::UpdateParameters");
 #endif
@@ -624,18 +623,18 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
         reg_exit();
     }
 
-    this->currentReference = this->inputReference;
-    this->currentFloating = this->inputFloating;
+    this->reference = this->inputReference;
+    this->floating = this->inputFloating;
     this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int));
 
-    reg_tools_changeDatatype<float>(this->currentReference);
-    reg_tools_changeDatatype<float>(this->currentFloating);
+    reg_tools_changeDatatype<float>(this->reference);
+    reg_tools_changeDatatype<float>(this->floating);
 
     this->AllocateWarped();
     this->AllocateDeformationField();
     this->InitialiseCurrentLevel();
     this->WarpFloatingImage(3); // cubic spline interpolation
-    this->ClearDeformationField();
+    this->DeallocateDeformationField();
 
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
     warpedImage[0] = nifti_copy_nim_info(this->warped);
@@ -645,13 +644,13 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
     warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
     warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
     cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu);
-    if (this->currentFloating->nt == 2) {
+    if (this->floating->nt == 2) {
         warpedImage[1] = warpedImage[0];
         warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper);
         cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu);
     }
 
-    this->ClearWarped();
+    this->DeallocateWarped();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage");
 #endif
@@ -662,63 +661,63 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() {
 float reg_f3d_gpu::InitialiseCurrentLevel() {
     float maxStepSize = reg_f3d::InitialiseCurrentLevel();
 
-    if (this->currentReference_gpu != nullptr) cudaCommon_free(&this->currentReference_gpu);
-    if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu);
-    if (this->currentReference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->currentReference_gpu, this->currentReference->dim)) {
+    if (this->reference_gpu != nullptr) cudaCommon_free(&this->reference_gpu);
+    if (this->reference2_gpu != nullptr) cudaCommon_free(&this->reference2_gpu);
+    if (this->reference->nt == 1) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->reference_gpu, this->reference->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when allocating the reference image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu, this->currentReference)) {
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->reference_gpu, this->reference)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when transferring the reference image");
             reg_exit();
         }
-    } else if (this->currentReference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->currentReference_gpu,
-                                                    &this->currentReference2_gpu, this->currentReference->dim)) {
+    } else if (this->reference->nt == 2) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->reference_gpu,
+                                                    &this->reference2_gpu, this->reference->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when allocating the reference image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentReference_gpu,
-                                                           &this->currentReference2_gpu, this->currentReference)) {
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->reference_gpu,
+                                                           &this->reference2_gpu, this->reference)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when transferring the reference image");
             reg_exit();
         }
     }
 
-    if (this->currentFloating_gpu != nullptr) cudaCommon_free(&this->currentFloating_gpu);
-    if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu);
-    if (this->currentReference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->currentFloating_gpu, this->currentFloating->dim)) {
+    if (this->floating_gpu != nullptr) cudaCommon_free(&this->floating_gpu);
+    if (this->floating2_gpu != nullptr) cudaCommon_free(&this->floating2_gpu);
+    if (this->reference->nt == 1) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->floating_gpu, this->floating->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when allocating the floating image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu, this->currentFloating)) {
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->floating_gpu, this->floating)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when transferring the floating image");
             reg_exit();
         }
-    } else if (this->currentReference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->currentFloating_gpu,
-                                                    &this->currentFloating2_gpu, this->currentFloating->dim)) {
+    } else if (this->reference->nt == 2) {
+        if (cudaCommon_allocateArrayToDevice<float>(&this->floating_gpu,
+                                                    &this->floating2_gpu, this->floating->dim)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when allocating the floating image");
             reg_exit();
         }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->currentFloating_gpu,
-                                                           &this->currentFloating2_gpu, this->currentFloating)) {
+        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->floating_gpu,
+                                                           &this->floating2_gpu, this->floating)) {
             reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
             reg_print_msg_error("Error when transferring the floating image");
             reg_exit();
         }
     }
 
-    if (this->controlPointGrid_gpu != nullptr) cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu);
     if (cudaCommon_allocateArrayToDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
         reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
         reg_print_msg_error("Error when allocating the control point image");
@@ -733,7 +732,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
     int *targetMask_h;
     NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
     int *targetMask_h_ptr = &targetMask_h[0];
-    for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) {
+    for (int i = 0; i < this->reference->nx * this->reference->ny * this->reference->nz; i++) {
         if (this->currentMask[i] != -1)
             *targetMask_h_ptr++ = i;
     }
@@ -749,32 +748,32 @@ float reg_f3d_gpu::InitialiseCurrentLevel() {
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::ClearCurrentInputImage() {
-    reg_f3d::ClearCurrentInputImage();
+void reg_f3d_gpu::DeallocateCurrentInputImage() {
+    reg_f3d::DeallocateCurrentInputImage();
 
     if (cudaCommon_transferFromDeviceToNifti<float4>(this->controlPointGrid, &this->controlPointGrid_gpu)) {
-        reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()");
+        reg_print_fct_error("reg_f3d_gpu::DeallocateCurrentInputImage()");
         reg_print_msg_error("Error when transferring back the control point image");
         reg_exit();
     }
-    cudaCommon_free<float4>(&this->controlPointGrid_gpu);
+    cudaCommon_free(&this->controlPointGrid_gpu);
     this->controlPointGrid_gpu = nullptr;
-    cudaCommon_free(&this->currentReference_gpu);
-    this->currentReference_gpu = nullptr;
-    cudaCommon_free(&this->currentFloating_gpu);
-    this->currentFloating_gpu = nullptr;
+    cudaCommon_free(&this->reference_gpu);
+    this->reference_gpu = nullptr;
+    cudaCommon_free(&this->floating_gpu);
+    this->floating_gpu = nullptr;
     NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu));
     this->currentMask_gpu = nullptr;
 
-    if (this->currentReference2_gpu != nullptr)
-        cudaCommon_free(&this->currentReference2_gpu);
-    this->currentReference2_gpu = nullptr;
-    if (this->currentFloating2_gpu != nullptr)
-        cudaCommon_free(&this->currentFloating2_gpu);
-    this->currentFloating2_gpu = nullptr;
+    if (this->reference2_gpu != nullptr)
+        cudaCommon_free(&this->reference2_gpu);
+    this->reference2_gpu = nullptr;
+    if (this->floating2_gpu != nullptr)
+        cudaCommon_free(&this->floating2_gpu);
+    this->floating2_gpu = nullptr;
 
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage");
+    reg_print_fct_debug("reg_f3d_gpu::DeallocateCurrentInputImage");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -803,7 +802,7 @@ void reg_f3d_gpu::SetOptimiser() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 float reg_f3d_gpu::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, this->optimiser->GetVoxNumber());
+    float length = reg_getMaximalLength_gpu(this->transformationGradient_gpu, this->optimiser->GetVoxNumber());
 
     if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
         // The gradient is normalised if we are running F3D
@@ -813,7 +812,7 @@ float reg_f3d_gpu::NormaliseGradient() {
         sprintf(text, "Objective function gradient maximal length: %g", length);
         reg_print_msg_debug(text);
 #endif
-        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), &this->transformationGradient_gpu, 1.f / length);
+        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), this->transformationGradient_gpu, 1.f / length);
     }
 
 #ifndef NDEBUG
@@ -968,15 +967,15 @@ void reg_f3d_gpu::InitialiseSimilarity() {
             measure_gpu_nmi->SetTimepointWeight(i, 1.0);
     }
     if (this->measure_gpu_nmi != nullptr) {
-        this->measure_gpu_nmi->InitialiseMeasure(this->currentReference,
-                                                 this->currentFloating,
+        this->measure_gpu_nmi->InitialiseMeasure(this->reference,
+                                                 this->floating,
                                                  this->currentMask,
                                                  this->activeVoxelNumber[this->currentLevel],
                                                  this->warped,
-                                                 this->warImgGradient,
+                                                 this->warpedGradient,
                                                  this->voxelBasedMeasureGradient,
-                                                 &this->currentReference_gpu,
-                                                 &this->currentFloating_gpu,
+                                                 &this->reference_gpu,
+                                                 &this->floating_gpu,
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
@@ -985,16 +984,16 @@ void reg_f3d_gpu::InitialiseSimilarity() {
     }
 
     if (this->measure_gpu_ssd != nullptr) {
-        this->measure_gpu_ssd->InitialiseMeasure(this->currentReference,
-                                                 this->currentFloating,
+        this->measure_gpu_ssd->InitialiseMeasure(this->reference,
+                                                 this->floating,
                                                  this->currentMask,
                                                  this->activeVoxelNumber[this->currentLevel],
                                                  this->warped,
-                                                 this->warImgGradient,
+                                                 this->warpedGradient,
                                                  this->voxelBasedMeasureGradient,
                                                  this->localWeightSimCurrent,
-                                                 &this->currentReference_gpu,
-                                                 &this->currentFloating_gpu,
+                                                 &this->reference_gpu,
+                                                 &this->floating_gpu,
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
@@ -1003,15 +1002,15 @@ void reg_f3d_gpu::InitialiseSimilarity() {
     }
 
     if (this->measure_gpu_kld != nullptr) {
-        this->measure_gpu_kld->InitialiseMeasure(this->currentReference,
-                                                 this->currentFloating,
+        this->measure_gpu_kld->InitialiseMeasure(this->reference,
+                                                 this->floating,
                                                  this->currentMask,
                                                  this->activeVoxelNumber[this->currentLevel],
                                                  this->warped,
-                                                 this->warImgGradient,
+                                                 this->warpedGradient,
                                                  this->voxelBasedMeasureGradient,
-                                                 &this->currentReference_gpu,
-                                                 &this->currentFloating_gpu,
+                                                 &this->reference_gpu,
+                                                 &this->floating_gpu,
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
@@ -1020,15 +1019,15 @@ void reg_f3d_gpu::InitialiseSimilarity() {
     }
 
     if (this->measure_gpu_lncc != nullptr) {
-        this->measure_gpu_lncc->InitialiseMeasure(this->currentReference,
-                                                  this->currentFloating,
+        this->measure_gpu_lncc->InitialiseMeasure(this->reference,
+                                                  this->floating,
                                                   this->currentMask,
                                                   this->activeVoxelNumber[this->currentLevel],
                                                   this->warped,
-                                                  this->warImgGradient,
+                                                  this->warpedGradient,
                                                   this->voxelBasedMeasureGradient,
-                                                  &this->currentReference_gpu,
-                                                  &this->currentFloating_gpu,
+                                                  &this->reference_gpu,
+                                                  &this->floating_gpu,
                                                   &this->currentMask_gpu,
                                                   &this->warped_gpu,
                                                   &this->warpedGradientImage_gpu,
@@ -1037,15 +1036,15 @@ void reg_f3d_gpu::InitialiseSimilarity() {
     }
 
     if (this->measure_gpu_dti != nullptr) {
-        this->measure_gpu_dti->InitialiseMeasure(this->currentReference,
-                                                 this->currentFloating,
+        this->measure_gpu_dti->InitialiseMeasure(this->reference,
+                                                 this->floating,
                                                  this->currentMask,
                                                  this->activeVoxelNumber[this->currentLevel],
                                                  this->warped,
-                                                 this->warImgGradient,
+                                                 this->warpedGradient,
                                                  this->voxelBasedMeasureGradient,
-                                                 &this->currentReference_gpu,
-                                                 &this->currentFloating_gpu,
+                                                 &this->reference_gpu,
+                                                 &this->floating_gpu,
                                                  &this->currentMask_gpu,
                                                  &this->warped_gpu,
                                                  &this->warpedGradientImage_gpu,
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
index b982236d..94167eba 100755
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ b/reg-lib/cuda/_reg_f3d_gpu.h
@@ -25,8 +25,8 @@
 class reg_f3d_gpu: public reg_f3d<float> {
 protected:
     // cuda variables
-    cudaArray *currentReference_gpu;
-    cudaArray *currentFloating_gpu;
+    cudaArray *reference_gpu;
+    cudaArray *floating_gpu;
     int *currentMask_gpu;
     float *warped_gpu;
     float4 *controlPointGrid_gpu;
@@ -36,8 +36,8 @@ class reg_f3d_gpu: public reg_f3d<float> {
     float4 *transformationGradient_gpu;
 
     // cuda variable for multispectral registration
-    cudaArray *currentReference2_gpu;
-    cudaArray *currentFloating2_gpu;
+    cudaArray *reference2_gpu;
+    cudaArray *floating2_gpu;
     float *warped2_gpu;
     float4 *warpedGradientImage2_gpu;
 
@@ -49,17 +49,17 @@ class reg_f3d_gpu: public reg_f3d<float> {
     reg_nmi_gpu *measure_gpu_nmi;
 
     float InitialiseCurrentLevel();
-    void ClearCurrentInputImage();
+    void DeallocateCurrentInputImage();
     void AllocateWarped();
-    void ClearWarped();
+    void DeallocateWarped();
     void AllocateDeformationField();
-    void ClearDeformationField();
+    void DeallocateDeformationField();
     void AllocateWarpedGradient();
-    void ClearWarpedGradient();
+    void DeallocateWarpedGradient();
     void AllocateVoxelBasedMeasureGradient();
-    void ClearVoxelBasedMeasureGradient();
+    void DeallocateVoxelBasedMeasureGradient();
     void AllocateTransformationGradient();
-    void ClearTransformationGradient();
+    void DeallocateTransformationGradient();
 
     double ComputeJacobianBasedPenaltyTerm(int);
     double ComputeBendingEnergyPenaltyTerm();
@@ -77,7 +77,7 @@ class reg_f3d_gpu: public reg_f3d<float> {
     void GetApproximatedGradient();
     void UpdateParameters(float);
     void SetOptimiser();
-    void SetGradientImageToZero();
+    // void SetGradientImageToZero();
     float NormaliseGradient();
     void InitialiseSimilarity();
 
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 38d42a89..644f4fdd 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -17,9 +17,9 @@
 /* *************************************************************** */
 void reg_affine_positionField_gpu(	mat44 *affineMatrix,
 					nifti_image *targetImage,
-					float4 **array_d)
+					float4 *array_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz);
@@ -55,7 +55,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
     dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1);
         dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1);
 
-    reg_affine_deformationField_kernel <<< G1, B1 >>> (*array_d);
+    reg_affine_deformationField_kernel <<< G1, B1 >>> (array_d);
         NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #ifndef NDEBUG
     printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 68db157c..754f10e4 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -18,4 +18,4 @@
 extern "C++"
 void reg_affine_positionField_gpu(mat44 *,
                                   nifti_image *,
-                                  float4 **);
+                                  float4 *);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 5d191f30..fde32ebc 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -17,13 +17,13 @@
 /* *************************************************************** */
 void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 										nifti_image *reference,
-										float4 **controlPointImageArray_d,
-										float4 **positionFieldImageArray_d,
-										int **mask_d,
+										float4 *controlPointImageArray_d,
+										float4 *positionFieldImageArray_d,
+										int *mask_d,
 										int activeVoxelNumber,
 										bool bspline)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber = reference->nx * reference->ny * reference->nz;
@@ -44,8 +44,8 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)))
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, *controlPointImageArray_d, controlPointNumber*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
 
 	if(reference->nz>1){
 		const unsigned int Grid_reg_spline_getDeformationField3D =
@@ -54,8 +54,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 		dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField3D,1,1);
 		// 8 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField3D
-				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>
-				(*positionFieldImageArray_d);
+				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -65,8 +64,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 		dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField2D,1,1);
 		// 4 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField2D
-				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>
-				   (*positionFieldImageArray_d);
+				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 
@@ -76,10 +74,9 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
-										  float4 **controlPointImageArray_d)
+float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
@@ -88,7 +85,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
 
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
 
 	// First compute all the second derivatives
 	float4 *secondDerivativeValues_d;
@@ -150,11 +147,11 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-												float4 **controlPointImageArray_d,
-												float4 **nodeGradientArray_d,
+												float4 *controlPointImageArray_d,
+												float4 *nodeGradientArray_d,
 												float bendingEnergyWeight)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
@@ -163,7 +160,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
 
 	// First compute all the second derivatives
 	float4 *secondDerivativeValues_d;
@@ -198,7 +195,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1);
-		reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(*nodeGradientArray_d);
+		reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2)
 	}
 	else{
@@ -209,7 +206,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1);
-		reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(*nodeGradientArray_d);
+		reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2)
 	}
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture))
@@ -220,11 +217,11 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
-											 float4 **controlPointImageArray_d,
-											 float **jacobianMatrices_d,
-											 float **jacobianDet_d)
+											float4 *controlPointImageArray_d,
+											float *jacobianMatrices_d,
+											float *jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
@@ -247,7 +244,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
 
 	// The Jacobian matrix is computed for every control point
 	if(controlPointImage->nz>1){
@@ -255,7 +252,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D)));
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1);
-		reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(*jacobianMatrices_d, *jacobianDet_d);
+		reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -263,7 +260,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D)));
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1);
-		reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(*jacobianMatrices_d, *jacobianDet_d);
+		reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
@@ -271,11 +268,11 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 /* *************************************************************** */
 void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 									   nifti_image *referenceImage,
-									   float4 **controlPointImageArray_d,
-									   float **jacobianMatrices_d,
-									   float **jacobianDet_d)
+									   float4 *controlPointImageArray_d,
+									   float *jacobianMatrices_d,
+									   float *jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
@@ -306,7 +303,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)))
 
 	// The Jacobian matrix is computed for every voxel
 	if(controlPointImage->nz>1){
@@ -317,7 +314,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 		// 8 floats of shared memory are allocated per thread
 		reg_spline_getJacobianValues3D_kernel
 				<<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>>
-				(*jacobianMatrices_d, *jacobianDet_d);
+				(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -327,7 +324,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 		dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues2D,1,1);
 		reg_spline_getJacobianValues2D_kernel
 				<<< G1, B1>>>
-				(*jacobianMatrices_d, *jacobianDet_d);
+				(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
@@ -336,11 +333,10 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 /* *************************************************************** */
 double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 											 nifti_image *controlPointImage,
-											 float4 **controlPointImageArray_d,
-											 bool approx
-											 )
+											 float4 *controlPointImageArray_d,
+											 bool approx)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -363,8 +359,8 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 											   controlPointImageArray_d,
-											   &jacobianMatrices_d,
-											   &jacobianDet_d);
+											   jacobianMatrices_d,
+											   jacobianDet_d);
 	}
 	else{
 		jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
@@ -381,8 +377,8 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										 referenceImage,
 										 controlPointImageArray_d,
-										 &jacobianMatrices_d,
-										 &jacobianDet_d);
+										 jacobianMatrices_d,
+										 jacobianDet_d);
 	}
 	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d))
 
@@ -402,12 +398,12 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 /* *************************************************************** */
 void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 												   nifti_image *controlPointImage,
-												   float4 **controlPointImageArray_d,
-												   float4 **nodeGradientArray_d,
+												   float4 *controlPointImageArray_d,
+												   float4 *nodeGradientArray_d,
 												   float jacobianWeight,
 												   bool approx)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -422,8 +418,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 												controlPointImageArray_d,
-												&jacobianMatrices_d,
-												&jacobianDet_d);
+												jacobianMatrices_d,
+												jacobianDet_d);
 	}
 	else{
 		jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
@@ -434,8 +430,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										  referenceImage,
 										  controlPointImageArray_d,
-										  &jacobianMatrices_d,
-										  &jacobianDet_d);
+										  jacobianMatrices_d,
+										  jacobianDet_d);
 	}
 
 	// Need to desorient the Jacobian matrix using the header information - voxel to real conversion
@@ -476,7 +472,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D)));
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1);
-			reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(*nodeGradientArray_d);
+			reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1)
 		}
 		else{
@@ -484,7 +480,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D)));
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1);
-			reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(*nodeGradientArray_d);
+			reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1)
 		}
 	}
@@ -503,7 +499,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D)));
 			dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1);
-			reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(*nodeGradientArray_d);
+			reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1)
 		}
 		else{
@@ -511,7 +507,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient2D)));
 			dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1);
-			reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(*nodeGradientArray_d);
+			reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1)
 		}
 	}
@@ -523,10 +519,10 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 /* *************************************************************** */
 double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 									  nifti_image *controlPointImage,
-									  float4 **controlPointImageArray_d,
+									  float4 *controlPointImageArray_d,
 									  bool approx)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// The Jacobian matrices and determinants are computed
@@ -541,8 +537,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 												controlPointImageArray_d,
-												&jacobianMatrices_d,
-												&jacobianDet_d);
+												jacobianMatrices_d,
+												jacobianDet_d);
 	}
 	else{
 		jacSum=jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
@@ -551,8 +547,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										  referenceImage,
 										  controlPointImageArray_d,
-										  &jacobianMatrices_d,
-										  &jacobianDet_d);
+										  jacobianMatrices_d,
+										  jacobianDet_d);
 	}
 
 	// Check if the Jacobian determinant average
@@ -611,7 +607,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D)));
 		dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1);
-		reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(*controlPointImageArray_d);
+		reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -628,7 +624,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 		(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D)));
 		dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1);
-		reg_spline_correctFolding3D_kernel<<< G1, B1>>>(*controlPointImageArray_d);
+		reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture))
@@ -639,9 +635,9 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d)
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Bind the qform or sform
@@ -664,14 +660,14 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA
 	(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement)));
 	dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1);
 	dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1);
-	reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(*imageArray_d);
+	reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d)
+void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Bind the qform or sform
@@ -694,22 +690,22 @@ void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageA
 		(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation)));
 	dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1);
 	dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1);
-	reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(*imageArray_d);
+	reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1)
 }
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 												 nifti_image *def_h,
-												 float4 **cpp_gpu,
-												 float4 **def_gpu)
+												 float4 *cpp_gpu,
+												 float4 *def_gpu)
 {
 	const int voxelNumber = def_h->nx * def_h->ny * def_h->nz;
 
 	// Create a mask array where no voxel are excluded
 	int *mask_gpu=nullptr;
 	NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int)))
-	reg_fillMaskArray_gpu(voxelNumber,&mask_gpu);
+	reg_fillMaskArray_gpu(voxelNumber,mask_gpu);
 
 	// Define some variables for the deformation fields
 	float4 *tempDef_gpu=nullptr;
@@ -720,7 +716,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 									   def_h,
 									   cpp_gpu,
 									   def_gpu,
-									   &mask_gpu,
+									   mask_gpu,
 									   voxelNumber,
 									   true); // non-interpolant spline are used
 
@@ -749,13 +745,13 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 	for(unsigned int i=0;i<squaringNumber;++i){
 
 		// The deformation field arrays are updated
-		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,*def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice))
+		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice))
 
 		// The deformation fields are composed
 		reg_defField_compose_gpu(def_h,
-								 &tempDef_gpu,
+								 tempDef_gpu,
 								 def_gpu,
-								 &mask_gpu,
+								 mask_gpu,
 								 voxelNumber);
 	}
 
@@ -765,12 +761,12 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_defField_compose_gpu(nifti_image *def,
-							  float4 **def_gpu,
-							  float4 **defOut_gpu,
-							  int **mask_gpu,
+							  float4 *def_gpu,
+							  float4 *defOut_gpu,
+							  int *mask_gpu,
 							  int activeVoxel)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber=def->nx*def->ny*def->nz;
@@ -797,8 +793,8 @@ void reg_defField_compose_gpu(nifti_image *def,
 
 	const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz);
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*def_gpu,activeVoxel*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,*mask_gpu,activeVoxel*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int)))
 
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
@@ -808,7 +804,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose3D)));
 		dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1);
 		dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1);
-		reg_defField_compose3D_kernel<<< G1, B1>>>(*defOut_gpu);
+		reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -816,7 +812,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose2D)));
 		dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1);
 		dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1);
-		reg_defField_compose2D_kernel<<< G1, B1>>>(*defOut_gpu);
+		reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 
@@ -829,7 +825,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 										float4 **deformationField_gpu,
 										float **jacobianMatrices_gpu)
 {
-	// Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 621f6ff0..167a1bc4 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -20,65 +20,64 @@
 extern "C++"
 void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
                                         nifti_image *targetImage,
-                                        float4 **controlPointImageArray_d,
-                                        float4 **positionFieldImageArray_d,
-                                        int **mask,
+                                        float4 *controlPointImageArray_d,
+                                        float4 *positionFieldImageArray_d,
+                                        int *mask,
                                         int activeVoxelNumber,
                                         bool bspline);
 
 /* BE */
 extern "C++"
-float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage,
-      float4 **controlPointImageArray_d);
+float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d);
 
 extern "C++"
 void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-      float4 **controlPointImageArray_d,
-      float4 **nodeGradientArray_d,
-      float bendingEnergyWeight);
+                                                float4 *controlPointImageArray_d,
+                                                float4 *nodeGradientArray_d,
+                                                float bendingEnergyWeight);
 
 /** Jacobian
  *
  */
 extern "C++"
 double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
-      nifti_image *controlPointImage,
-      float4 **controlPointImageArray_d,
-      bool approx);
+                                             nifti_image *controlPointImage,
+                                             float4 *controlPointImageArray_d,
+                                             bool approx);
 
 extern "C++"
 void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
-      nifti_image *controlPointImage,
-      float4 **controlPointImageArray_d,
-      float4 **nodeGradientArray_d,
-      float jacobianWeight,
-      bool approx);
+                                                   nifti_image *controlPointImage,
+                                                   float4 *controlPointImageArray_d,
+                                                   float4 *nodeGradientArray_d,
+                                                   float jacobianWeight,
+                                                   bool approx);
 
 extern "C++"
-double reg_spline_correctFolding_gpu(  nifti_image *targetImage,
-                                       nifti_image *controlPointImage,
-                                       float4 **controlPointImageArray_d,
-                                       bool approx);
+double reg_spline_correctFolding_gpu(nifti_image *targetImage,
+                                     nifti_image *controlPointImage,
+                                     float4 *controlPointImageArray_d,
+                                     bool approx);
 
 extern "C++"
 void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
-      nifti_image *def_h,
-      float4 **cpp_gpu,
-      float4 **def_gpu);
+                                                 nifti_image *def_h,
+                                                 float4 *cpp_gpu,
+                                                 float4 *def_gpu);
 
 extern "C++"
 void reg_defField_compose_gpu(nifti_image *def,
-                              float4 **def_gpu,
-                              float4 **defOut_gpu,
-                              int **mask_gpu,
+                              float4 *def_gpu,
+                              float4 *defOut_gpu,
+                              int *mask_gpu,
                               int activeVoxel);
 
 extern "C++"
-void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d);
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d);
 extern "C++"
-void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d);
+void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d);
 
 extern "C++"
 void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
-                                        float4 **deformationField_gpu,
-                                        float **jacobianMatrices_gpu);
+                                        float4 *deformationField_gpu,
+                                        float *jacobianMatrices_gpu);
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index dd9b1bde..f690f492 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -30,20 +30,20 @@ reg_nmi_gpu::reg_nmi_gpu():
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 reg_nmi_gpu::~reg_nmi_gpu()
 {
-	this->ClearHistogram();
+	this->DeallocateHistogram();
 #ifndef NDEBUG
 		printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_nmi_gpu::ClearHistogram()
+void reg_nmi_gpu::DeallocateHistogram()
 {
 	if(this->forwardJointHistogramLog_device!=nullptr){
 		cudaFree(this->forwardJointHistogramLog_device);
 	}
 	this->forwardJointHistogramLog_device=nullptr;
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu::ClearHistogram() called\n");
+		printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -62,7 +62,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     float4 **warFloGradDevicePtr,
                                     float4 **forVoxBasedGraDevicePtr)
 {
-	this->ClearHistogram();
+	this->DeallocateHistogram();
     reg_nmi::InitialiseMeasure(refImgPtr,
                                floImgPtr,
                                maskRefPtr,
@@ -157,18 +157,18 @@ double reg_nmi_gpu::GetSimilarityMeasureValue()
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// Called when we only have one target and one source image
 void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
-									  cudaArray **referenceImageArray_d,
-									  float **warpedImageArray_d,
-									  float4 **warpedGradientArray_d,
-									  float **logJointHistogram_d,
-									  float4 **voxelNMIGradientArray_d,
-									  int **mask_d,
+									  cudaArray *referenceImageArray_d,
+									  float *warpedImageArray_d,
+									  float4 *warpedGradientArray_d,
+									  float *logJointHistogram_d,
+									  float4 *voxelNMIGradientArray_d,
+									  int *mask_d,
 									  int activeVoxelNumber,
 									  double *entropies,
 									  int refBinning,
 									  int floBinning)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
@@ -186,7 +186,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI,&NMI,sizeof(float)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)));
 
-    // Texture bindingcurrentFloating
+    // Texture binding floating
     //Bind target image array to a 3D texture
 	firstreferenceImageTexture.normalized = true;
 	firstreferenceImageTexture.filterMode = cudaFilterModeLinear;
@@ -194,19 +194,19 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
 	firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap;
 	firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap;
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, *referenceImageArray_d, channelDesc))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, *warpedImageArray_d, voxelNumber*sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, *warpedGradientArray_d, voxelNumber*sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, *logJointHistogram_d, binNumber*sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemset(*voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4)));
+	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber*sizeof(float)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber*sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber*sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4)));
 
 	if(referenceImage->nz>1){
 		const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
             (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D));
         dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D,1,1);
 		dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D,Grid_reg_getVoxelBasedNMIGradientUsingPW3D,1);
-		reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (*voxelNMIGradientArray_d);
+		reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
@@ -214,7 +214,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
             (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D));
         dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D,1,1);
 		dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D,Grid_reg_getVoxelBasedNMIGradientUsingPW2D,1);
-		reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (*voxelNMIGradientArray_d);
+		reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture));
@@ -239,12 +239,12 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient()
 
     // THe gradient of the NMI is computed on the GPU
     reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer,
-									 &this->referenceDevicePointer,
-									 &this->warpedFloatingDevicePointer,
-									 &this->warpedFloatingGradientDevicePointer,
-									 &this->forwardJointHistogramLog_device,
-									 &this->forwardVoxelBasedGradientDevicePointer,
-									 &this->referenceMaskDevicePointer,
+									 this->referenceDevicePointer,
+									 this->warpedFloatingDevicePointer,
+									 this->warpedFloatingGradientDevicePointer,
+									 this->forwardJointHistogramLog_device,
+									 this->forwardVoxelBasedGradientDevicePointer,
+									 this->referenceMaskDevicePointer,
                                      this->activeVoxeNumber,
 									 this->forwardEntropyValues[0],
 									 this->referenceBinNumber[0],
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 2e4dbac7..aed9cd46 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -48,7 +48,7 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
 protected:
    float *forwardJointHistogramLog_device;
 //	float **backwardJointHistogramLog_device;
-   void ClearHistogram();
+   void DeallocateHistogram();
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -92,12 +92,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur
 
 extern "C++"
 void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray **referenceImageArray_d,
-                                      float **warpedImageArray_d,
-                                      float4 **resultGradientArray_d,
-                                      float **logJointHistogram_d,
-                                      float4 **voxelNMIGradientArray_d,
-                                      int **targetMask_d,
+                                      cudaArray *referenceImageArray_d,
+                                      float *warpedImageArray_d,
+                                      float4 *resultGradientArray_d,
+                                      float *logJointHistogram_d,
+                                      float4 *voxelNMIGradientArray_d,
+                                      int *targetMask_d,
                                       int activeVoxelNumber,
                                       double *entropies,
                                       int refBinning,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 45f2baeb..7a17a1ab 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -19,7 +19,7 @@ reg_optimiser_gpu::reg_optimiser_gpu()
 reg_optimiser_gpu::~reg_optimiser_gpu()
 {
     if(this->bestDOF_gpu!=nullptr)
-        cudaCommon_free<float4>(&this->bestDOF_gpu);;
+        cudaCommon_free(&this->bestDOF_gpu);;
     this->bestDOF_gpu=nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
@@ -57,7 +57,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
         this->gradient_gpu=reinterpret_cast<float4 *>(gradData);
 
     if(this->bestDOF_gpu!=nullptr)
-        cudaCommon_free<float4>(&this->bestDOF_gpu);
+        cudaCommon_free(&this->bestDOF_gpu);
 
     if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu,
 									   (int)(this->GetVoxNumber()))){
@@ -121,11 +121,11 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu()
 reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu()
 {
     if(this->array1!=nullptr)
-        cudaCommon_free<float4>(&this->array1);
+        cudaCommon_free(&this->array1);
     this->array1=nullptr;
 
     if(this->array2!=nullptr)
-        cudaCommon_free<float4>(&this->array2);
+        cudaCommon_free(&this->array2);
     this->array2=nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n");
@@ -177,17 +177,17 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_conjugateGradient_gpu::UpdateGradientValues()
 {
-    if(this->firstcall==true){
-        reg_initialiseConjugateGradient_gpu(&(this->gradient_gpu),
-                                            &(this->array1),
-                                            &(this->array2),
+    if(this->firstcall){
+        reg_initialiseConjugateGradient_gpu(this->gradient_gpu,
+                                            this->array1,
+                                            this->array2,
                                             (int)(this->GetVoxNumber()));
         this->firstcall=false;
     }
     else{
-        reg_GetConjugateGradient_gpu(&this->gradient_gpu,
-                                     &this->array1,
-                                     &this->array2,
+        reg_GetConjugateGradient_gpu(this->gradient_gpu,
+                                     this->array1,
+                                     this->array2,
                                      (int)(this->GetVoxNumber()));
     }
     return;
@@ -219,41 +219,41 @@ void reg_conjugateGradient_gpu::reg_test_optimiser()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d,
-                                         float4 **conjugateG_d,
-                                         float4 **conjugateH_d,
+void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
+                                         float4 *conjugateG_d,
+                                         float4 *conjugateH_d,
                                          int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
 
     const unsigned int Grid_reg_initialiseConjugateGradient =
     (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_initialiseConjugateGradient));
     dim3 G1(Grid_reg_initialiseConjugateGradient,Grid_reg_initialiseConjugateGradient,1);
     dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient,1,1);
 
-    reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (*conjugateG_d);
+    reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d);
     NR_CUDA_CHECK_KERNEL(G1,B1)
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
-    NR_CUDA_SAFE_CALL(cudaMemcpy(*conjugateH_d, *conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
+    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
-                                  float4 **conjugateG_d,
-                                  float4 **conjugateH_d,
+void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
+                                  float4 *conjugateG_d,
+                                  float4 *conjugateH_d,
                                   int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, *conjugateG_d, nodeNumber*sizeof(float4)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, *conjugateH_d, nodeNumber*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
 
     // gam = sum((grad+g)*grad)/sum(HxG);
     const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient1));
@@ -280,7 +280,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
     const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient2));
     dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2,1,1);
     dim3 G2(Grid_reg_GetConjugateGradient2,Grid_reg_GetConjugateGradient2,1);
-	reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (*gradientArray_d, *conjugateG_d, *conjugateH_d);
+	reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d);
     NR_CUDA_CHECK_KERNEL(G1,B1)
 
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture))
@@ -290,15 +290,14 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_getMaximalLength_gpu(float4 **gradientArray_d,
-                               int nodeNumber)
+float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     // Copy constant memory value and bind texture
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
 
     float *dist_d=nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float)))
@@ -319,29 +318,27 @@ float reg_getMaximalLength_gpu(float4 **gradientArray_d,
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
-                                        float4 **controlPointImageArray_d,
-                                        float4 **bestControlPointPosition_d,
-										float4 **gradientArray_d,
+                                        float4 *controlPointImageArray_d,
+                                        float4 *bestControlPointPosition_d,
+										float4 *gradientArray_d,
                                         float currentLength)
 
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor,&currentLength,sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &currentLength, sizeof(float)))
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, *bestControlPointPosition_d,
-									  nodeNumber*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d,
-									  nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)))
 
     const unsigned int Grid_reg_updateControlPointPosition =
             (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_updateControlPointPosition));
     dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition,1,1);
     dim3 G1(Grid_reg_updateControlPointPosition,Grid_reg_updateControlPointPosition,1);
-    reg_updateControlPointPosition_kernel <<< G1, B1 >>> (*controlPointImageArray_d);
+    reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d);
     NR_CUDA_CHECK_KERNEL(G1,B1)
 	// Unbind the textures
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 2655294d..d325554d 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -9,116 +9,111 @@
 /** @class reg_optimiser_gpu
  * @brief Standard gradient acent optimisation for GPU
  */
-class reg_optimiser_gpu : public reg_optimiser<float>
-{
+class reg_optimiser_gpu: public reg_optimiser<float> {
 protected:
-   float4 *currentDOF_gpu; // pointers
-   float4 *gradient_gpu; // pointers
-   float4 *bestDOF_gpu; // allocated here
+    float4 *currentDOF_gpu; // pointers
+    float4 *gradient_gpu; // pointers
+    float4 *bestDOF_gpu; // allocated here
 
 public:
-   reg_optimiser_gpu();
-   ~reg_optimiser_gpu();
+    reg_optimiser_gpu();
+    ~reg_optimiser_gpu();
 
-   // Float4 are casted to float for compatibility with the cpu class
-   virtual float* GetCurrentDOF()
-   {
-      return reinterpret_cast<float *>(this->currentDOF_gpu);
-   }
-   virtual float* GetBestDOF()
-   {
-      return reinterpret_cast<float *>(this->bestDOF_gpu);
-   }
-   virtual float* GetGradient()
-   {
-      return reinterpret_cast<float *>(this->gradient_gpu);
-   }
+    // Float4 are casted to float for compatibility with the cpu class
+    virtual float* GetCurrentDOF() {
+        return reinterpret_cast<float *>(this->currentDOF_gpu);
+    }
+    virtual float* GetBestDOF() {
+        return reinterpret_cast<float *>(this->bestDOF_gpu);
+    }
+    virtual float* GetGradient() {
+        return reinterpret_cast<float *>(this->gradient_gpu);
+    }
 
-   virtual void RestoreBestDOF();
-   virtual void StoreCurrentDOF();
+    virtual void RestoreBestDOF();
+    virtual void StoreCurrentDOF();
 
-   virtual void Initialise(size_t nvox,
-                           int dim,
-                           bool optX,
-                           bool optY,
-                           bool optZ,
-                           size_t maxit,
-                           size_t start,
-                           InterfaceOptimiser *o,
-                           float *cppData,
-                           float *gradData=nullptr,
-                           size_t a=0,
-                           float *b=nullptr,
-                           float *c=nullptr);
-   virtual void Perturbation(float length);
+    virtual void Initialise(size_t nvox,
+                            int dim,
+                            bool optX,
+                            bool optY,
+                            bool optZ,
+                            size_t maxit,
+                            size_t start,
+                            InterfaceOptimiser *o,
+                            float *cppData,
+                            float *gradData = nullptr,
+                            size_t a = 0,
+                            float *b = nullptr,
+                            float *c = nullptr);
+    virtual void Perturbation(float length);
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /** @class reg_conjugateGradient_gpu
  * @brief Conjugate gradient acent optimisation for GPU
  */
-class reg_conjugateGradient_gpu : public reg_optimiser_gpu
-{
+class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 protected:
-   float4 *array1;
-   float4 *array2;
-   bool firstcall;
-   void UpdateGradientValues(); /// @brief Update the gradient array
+    float4 *array1;
+    float4 *array2;
+    bool firstcall;
+    void UpdateGradientValues(); /// @brief Update the gradient array
 
 public:
-   reg_conjugateGradient_gpu();
-   ~reg_conjugateGradient_gpu();
+    reg_conjugateGradient_gpu();
+    ~reg_conjugateGradient_gpu();
 
-   virtual void Initialise(size_t nvox,
-                           int dim,
-                           bool optX,
-                           bool optY,
-                           bool optZ,
-                           size_t maxit,
-                           size_t start,
-                           InterfaceOptimiser *o,
-                           float *cppData,
-                           float *gradData=nullptr,
-                           size_t a=0,
-                           float *b=nullptr,
-                           float *c=nullptr);
-   virtual void Optimise(float maxLength,
-                         float smallLength,
-                         float &startLength);
-   virtual void Perturbation(float length);
+    virtual void Initialise(size_t nvox,
+                            int dim,
+                            bool optX,
+                            bool optY,
+                            bool optZ,
+                            size_t maxit,
+                            size_t start,
+                            InterfaceOptimiser *o,
+                            float *cppData,
+                            float *gradData = nullptr,
+                            size_t a = 0,
+                            float *b = nullptr,
+                            float *c = nullptr);
+    virtual void Optimise(float maxLength,
+                          float smallLength,
+                          float &startLength);
+    virtual void Perturbation(float length);
 
-   // Function used for testing
-   virtual void reg_test_optimiser();
+    // Function used for testing
+    virtual void reg_test_optimiser();
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /** @brief
  */
 extern "C++"
-void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d,
-      float4 **conjugateG_d,
-      float4 **conjugateH_d,
-      int nodeNumber);
+void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
+                                         float4 *conjugateG_d,
+                                         float4 *conjugateH_d,
+                                         int nodeNumber);
 
 /** @brief
  */
 extern "C++"
-void reg_GetConjugateGradient_gpu(float4 **gradientArray_d,
-                                  float4 **conjugateG_d,
-                                  float4 **conjugateH_d,
+void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
+                                  float4 *conjugateG_d,
+                                  float4 *conjugateH_d,
                                   int nodeNumber);
 
 /** @brief
  */
 extern "C++"
-float reg_getMaximalLength_gpu(float4 **gradientArray_d,
+float reg_getMaximalLength_gpu(float4 *gradientArray_d,
                                int nodeNumber);
 
 /** @brief
  */
 extern "C++"
 void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
-                                        float4 **controlPointImageArray_d,
-                                        float4 **bestControlPointPosition_d,
-                                        float4 **gradientArray_d,
+                                        float4 *controlPointImageArray_d,
+                                        float4 *bestControlPointPosition_d,
+                                        float4 *gradientArray_d,
                                         float currentLength);
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 5889d42d..fdabd803 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -6,7 +6,7 @@ texture<float4, 1, cudaReadModeElementType> conjugateGTexture;
 texture<float4, 1, cudaReadModeElementType> conjugateHTexture;
 texture<float4, 1, cudaReadModeElementType> controlPointTexture;
 
-__global__ void reg_initialiseConjugateGradient_kernel(	float4 *conjugateG_d)
+__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateG_d)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid < c_NodeNumber){
@@ -31,9 +31,9 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sum)
     }
 }
 
-__global__ void reg_GetConjugateGradient2_kernel(	float4 *nodeNMIGradientArray_d,
-                                                        float4 *conjugateG_d,
-                                                        float4 *conjugateH_d)
+__global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d,
+                                                 float4 *conjugateG_d,
+                                                 float4 *conjugateH_d)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid < c_NodeNumber){
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 0f241094..e4d68d9c 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -16,14 +16,14 @@
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_resampleImage_gpu(nifti_image *floatingImage,
-                           float **warpedImageArray_d,
-                           cudaArray **floatingImageArray_d,
-                           float4 **deformationFieldImageArray_d,
-                           int **mask_d,
+                           float *warpedImageArray_d,
+                           cudaArray *floatingImageArray_d,
+                           float4 *deformationFieldImageArray_d,
+                           int *mask_d,
                            int activeVoxelNumber,
                            float paddingValue)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
@@ -40,13 +40,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     floatingTexture.addressMode[2] = cudaAddressModeWrap;
 
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, *floatingImageArray_d, channelDesc))
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc))
 
     //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, *deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
 
     //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
 
     // Bind the real to voxel matrix to texture
     mat44 *floatingMatrix;
@@ -71,7 +71,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                 (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage3D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1);
         dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1);
-        reg_resampleImage3D_kernel <<< G1, B1 >>> (*warpedImageArray_d);
+        reg_resampleImage3D_kernel <<< G1, B1 >>> (warpedImageArray_d);
 		cudaDeviceSynchronize();
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
@@ -80,7 +80,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                 (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage2D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage2D,1,1);
         dim3 G1(Grid_reg_resamplefloatingImage2D,Grid_reg_resamplefloatingImage2D,1);
-        reg_resampleImage2D_kernel <<< G1, B1 >>> (*warpedImageArray_d);
+        reg_resampleImage2D_kernel <<< G1, B1 >>> (warpedImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 
@@ -94,13 +94,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_getImageGradient_gpu(nifti_image *floatingImage,
-                              cudaArray **floatingImageArray_d,
-                              float4 **deformationFieldImageArray_d,
-                              float4 **warpedGradientArray_d,
+                              cudaArray *floatingImageArray_d,
+                              float4 *deformationFieldImageArray_d,
+                              float4 *warpedGradientArray_d,
                               int activeVoxelNumber,
                               float paddingValue)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
@@ -117,10 +117,10 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     floatingTexture.addressMode[2] = cudaAddressModeWrap;
 
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, *floatingImageArray_d, channelDesc))
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc))
 
     //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, *deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
 
     // Bind the real to voxel matrix to texture
     mat44 *floatingMatrix;
@@ -143,14 +143,14 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
         const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient3D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D,1,1);
 		dim3 G1(Grid_reg_getImageGradient3D,Grid_reg_getImageGradient3D,1);
-        reg_getImageGradient3D_kernel <<< G1, B1 >>> (*warpedGradientArray_d);
+        reg_getImageGradient3D_kernel <<< G1, B1 >>> (warpedGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
         const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient2D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D,1,1);
 		dim3 G1(Grid_reg_getImageGradient2D,Grid_reg_getImageGradient2D,1);
-        reg_getImageGradient2D_kernel <<< G1, B1 >>> (*warpedGradientArray_d);
+        reg_getImageGradient2D_kernel <<< G1, B1 >>> (warpedGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture))
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index b9b90dda..af540f68 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -17,17 +17,17 @@
 
 extern "C++"
 void reg_resampleImage_gpu(nifti_image *sourceImage,
-                           float **resultImageArray_d,
-                           cudaArray **sourceImageArray_d,
-                           float4 **positionFieldImageArray_d,
-                           int **mask_d,
+                           float *resultImageArray_d,
+                           cudaArray *sourceImageArray_d,
+                           float4 *positionFieldImageArray_d,
+                           int *mask_d,
                            int activeVoxelNumber,
                            float paddingValue);
 
 extern "C++"
 void reg_getImageGradient_gpu(nifti_image *sourceImage,
-                              cudaArray **sourceImageArray_d,
-                              float4 **positionFieldImageArray_d,
-                              float4 **resultGradientArray_d,
+                              cudaArray *sourceImageArray_d,
+                              float4 *positionFieldImageArray_d,
+                              float4 *resultGradientArray_d,
                               int activeVoxelNumber,
                               float paddingValue);
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index a34ed7e9..bfb9a2fe 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -85,7 +85,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
 						  int activeVoxelNumber
 						  )
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Copy the constant memory variables
@@ -141,16 +141,15 @@ double reg_ssd_gpu::GetSimilarityMeasureValue()
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
-									  cudaArray **reference_d,
-									  float **warped_d,
-									  float4 **spaGradient_d,
-									  float4 **ssdGradient_d,
+									  cudaArray *reference_d,
+									  float *warped_d,
+									  float4 *spaGradient_d,
+									  float4 *ssdGradient_d,
 									  float maxSD,
-									  int **mask_d,
-									  int activeVoxelNumber
-									  )
+									  int *mask_d,
+									  int activeVoxelNumber)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	// Copy the constant memory variables
@@ -166,19 +165,19 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
 	referenceTexture.addressMode[1] = cudaAddressModeWrap;
 	referenceTexture.addressMode[2] = cudaAddressModeWrap;
 	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber*sizeof(float)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, *spaGradient_d, voxelNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber*sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber*sizeof(float4)))
 	// Set the gradient image to zero
-	NR_CUDA_SAFE_CALL(cudaMemset(*ssdGradient_d,0,voxelNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d,0,voxelNumber*sizeof(float4)))
 	const unsigned int Grid_reg_getSSDGradient =
             (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSSDGradient));
     dim3 B1(NR_BLOCK->Block_reg_getSSDGradient,1,1);
 	dim3 G1(Grid_reg_getSSDGradient,Grid_reg_getSSDGradient,1);
 	if(referenceDim.z>1)
-		reg_getSSDGradient3D_kernel <<< G1, B1 >>> (*ssdGradient_d);
-	else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (*ssdGradient_d);
+		reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d);
+	else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1)
 	// Unbind the textures
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture))
@@ -191,12 +190,12 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
 void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient()
 {
 	reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
-									 &this->referenceDevicePointer,
-									 &this->warpedFloatingDevicePointer,
-									 &this->warpedFloatingGradientDevicePointer,
-									 &this->forwardVoxelBasedGradientDevicePointer,
+									 this->referenceDevicePointer,
+									 this->warpedFloatingDevicePointer,
+									 this->warpedFloatingGradientDevicePointer,
+									 this->forwardVoxelBasedGradientDevicePointer,
                                      1.0f,
-									 &this->referenceMaskDevicePointer,
+									 this->referenceMaskDevicePointer,
 									 this->activeVoxeNumber
 									 );
 	return;
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 3f45d19b..33cc16ef 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -59,11 +59,10 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 extern "C++"
 void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray **reference_d,
-                                      float **warped_d,
-                                      float4 **spaGradient_d,
-                                      float4 **ssdGradient_d,
+                                      cudaArray *reference_d,
+                                      float *warped_d,
+                                      float4 *spaGradient_d,
+                                      float4 *ssdGradient_d,
                                       float maxSD,
-                                      int **mask_d,
-                                      int activeVoxelNumber
-                                     );
+                                      int *mask_d,
+                                      int activeVoxelNumber);
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 8e4d3ab8..d14b75e6 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -19,11 +19,11 @@
 /* *************************************************************** */
 void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       nifti_image *controlPointImage,
-                                      float4 **voxelNMIGradientArray_d,
-                                      float4 **nodeNMIGradientArray_d,
+                                      float4 *voxelNMIGradientArray_d,
+                                      float4 *nodeNMIGradientArray_d,
                                       float weight)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
@@ -43,23 +43,23 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3)))
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float)))
 
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *voxelNMIGradientArray_d, voxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4)))
 
     const unsigned int Grid_reg_voxelCentric2NodeCentric = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_voxelCentric2NodeCentric));
     dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1);
 	dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1);
-    reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (*nodeNMIGradientArray_d);
+    reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1)
 
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_convertNMIGradientFromVoxelToRealSpace_gpu(	mat44 *sourceMatrix_xyz,
-                            nifti_image *controlPointImage,
-                            float4 **nodeNMIGradientArray_d)
+void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
+                                                    nifti_image *controlPointImage,
+                                                    float4 *nodeNMIGradientArray_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
@@ -80,7 +80,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(	mat44 *sourceMatrix_xyz,
     dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1);
     dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1);
 
-    _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (*nodeNMIGradientArray_d);
+    _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
     NR_CUDA_CHECK_KERNEL(G1,B1)
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture))
     NR_CUDA_SAFE_CALL(cudaFree(matrix_d))
@@ -88,12 +88,12 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(	mat44 *sourceMatrix_xyz,
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_gaussianSmoothing_gpu( nifti_image *image,
-                                float4 **imageArray_d,
+                                float4 *imageArray_d,
                                 float sigma,
                                 bool smoothXYZ[8])
 
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
 	const unsigned int voxelNumber = image->nx * image->ny * image->nz;
@@ -111,7 +111,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
     }
 
 	for(int n=1; n<4; n++){
-		if(axisToSmooth[n]==true && image->dim[n]>1){
+		if(axisToSmooth[n] && image->dim[n]>1){
             float currentSigma;
             if(sigma>0) currentSigma=sigma/image->pixdim[n];
             else currentSigma=fabs(sigma); // voxel based if negative value
@@ -139,7 +139,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                 NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4)))
 
                 NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)))
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *imageArray_d, voxelNumber*sizeof(float4)))
+                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)))
 
 				unsigned int Grid_reg_ApplyConvolutionWindow;
                 dim3 B,G;
@@ -172,7 +172,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                 NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture))
                 NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
                 NR_CUDA_SAFE_CALL(cudaFree(kernel_d))
-                NR_CUDA_SAFE_CALL(cudaMemcpy(*imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
+                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
                 NR_CUDA_SAFE_CALL(cudaFree(smoothedImage))
             }
 		}
@@ -180,10 +180,10 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
 }
 /* *************************************************************** */
 void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
-                                        float4 **imageArray_d,
+                                        float4 *imageArray_d,
 										float *spacingVoxel)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int voxelNumber = image->nx * image->ny * image->nz;
@@ -219,7 +219,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
             float4 *smoothedImage_d;
             NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d,voxelNumber*sizeof(float4)))
 
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *imageArray_d, voxelNumber*sizeof(float4)))
+            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)))
 
             unsigned int Grid_reg_ApplyConvolutionWindow;
             dim3 B,G;
@@ -252,15 +252,15 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
             NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture))
             NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
             NR_CUDA_SAFE_CALL(cudaFree(kernel_d))
-            NR_CUDA_SAFE_CALL(cudaMemcpy(*imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
+            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
             NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d))
         }
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
+void reg_multiplyValue_gpu(int num, float4 *array_d, float value)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -269,13 +269,13 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value)
     const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
-    reg_multiplyValue_kernel_float4<<<G,B>>>(*array_d);
+    reg_multiplyValue_kernel_float4<<<G,B>>>(array_d);
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
-void reg_addValue_gpu(int num, float4 **array_d, float value)
+void reg_addValue_gpu(int num, float4 *array_d, float value)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -284,13 +284,13 @@ void reg_addValue_gpu(int num, float4 **array_d, float value)
     const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
-    reg_addValue_kernel_float4<<<G,B>>>(*array_d);
+    reg_addValue_kernel_float4<<<G,B>>>(array_d);
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
+void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -298,13 +298,13 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
     const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
-    reg_multiplyArrays_kernel_float4<<<G,B>>>(*array1_d,*array2_d);
+    reg_multiplyArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
+void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -312,13 +312,13 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d)
     const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
-    reg_addArrays_kernel_float4<<<G,B>>>(*array1_d,*array2_d);
+    reg_addArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
-void reg_fillMaskArray_gpu(int num, int **array1_d)
+void reg_fillMaskArray_gpu(int num, int *array1_d)
 {
-    // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard
+    // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
@@ -326,7 +326,7 @@ void reg_fillMaskArray_gpu(int num, int **array1_d)
     const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
-    reg_fillMaskArray_kernel<<<G,B>>>(*array1_d);
+    reg_fillMaskArray_kernel<<<G,B>>>(array1_d);
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 300f6870..97d454c2 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -18,68 +18,51 @@
 #include <thrust/device_ptr.h>
 #include <thrust/reduce.h>
 
-/* ******************************** */
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       nifti_image *controlPointImage,
-                                      float4 **voxelNMIGradientArray_d,
-                                      float4 **nodeNMIGradientArray_d,
+                                      float4 *voxelNMIGradientArray_d,
+                                      float4 *nodeNMIGradientArray_d,
                                       float weight);
-/* ******************************** */
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
-      nifti_image *controlPointImage,
-      float4 **nodeNMIGradientArray_d);
-/* ******************************** */
-/* ******************************** */
+                                                    nifti_image *controlPointImage,
+                                                    float4 *nodeNMIGradientArray_d);
+/* *************************************************************** */
 extern "C++"
-void reg_gaussianSmoothing_gpu( nifti_image *image,
-                                float4 **imageArray_d,
-                                float sigma,
-                                bool axisToSmooth[8]);
-/* ******************************** */
-/* ******************************** */
-
+void reg_gaussianSmoothing_gpu(nifti_image *image,
+                               float4 *imageArray_d,
+                               float sigma,
+                               bool axisToSmooth[8]);
+/* *************************************************************** */
 extern "C++"
 void reg_smoothImageForCubicSpline_gpu(nifti_image *resultImage,
-                                       float4 **voxelNMIGradientArray_d,
+                                       float4 *voxelNMIGradientArray_d,
                                        float *smoothingRadius);
-/* ******************************** */
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
-void reg_multiplyValue_gpu(int num, float4 **array_d, float value);
-/* ******************************** */
-/* ******************************** */
+void reg_multiplyValue_gpu(int num, float4 *array_d, float value);
+/* *************************************************************** */
 extern "C++"
-void reg_addValue_gpu(int num, float4 **array_d, float value);
-/* ******************************** */
-/* ******************************** */
+void reg_addValue_gpu(int num, float4 *array_d, float value);
+/* *************************************************************** */
 extern "C++"
-void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d);
-/* ******************************** */
-/* ******************************** */
+void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d);
+/* *************************************************************** */
 extern "C++"
-void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d);
-/* ******************************** */
-/* ******************************** */
+void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d);
+/* *************************************************************** */
 extern "C++"
-void reg_fillMaskArray_gpu(int num, int **array1_d);
-/* ******************************** */
-/* ******************************** */
+void reg_fillMaskArray_gpu(int num, int *array1_d);
+/* *************************************************************** */
 extern "C++"
-float reg_sumReduction_gpu(float *array_d,
-                           int size);
-/* ******************************** */
-/* ******************************** */
+float reg_sumReduction_gpu(float *array_d, int size);
+/* *************************************************************** */
 extern "C++"
-float reg_maxReduction_gpu(float *array_d,
-                           int size);
-/* ******************************** */
-/* ******************************** */
+float reg_maxReduction_gpu(float *array_d, int size);
+/* *************************************************************** */
 extern "C++"
-float reg_minReduction_gpu(float *array_d,
-                           int size);
-/* ******************************** */
-/* ******************************** */
+float reg_minReduction_gpu(float *array_d, int size);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index ad225837..3c9e0074 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -82,7 +82,7 @@ void launchAffine(mat44 *affineTransformation,
 
    float* trans = (float *)malloc(16 * sizeof(float));
    const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz);
-   mat44 transformationMatrix = (compose == true) ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix);
+   mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix);
    mat44ToCptr(transformationMatrix, trans);
    NR_CUDA_SAFE_CALL(cudaMemcpy(*trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice));
    free(trans);
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 52aec362..762d0972 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -177,21 +177,21 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 					const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx;
 					const float rWarpedValue = sWarpedValues[sharedIndex];
 					const bool overlap = isfinite(rWarpedValue) && finiteReference;
-					const unsigned int currentWarpedSize = __syncthreads_count(overlap);
+					const unsigned int warpedSize = __syncthreads_count(overlap);
 
-                    if (currentWarpedSize > 8) {
+                    if (warpedSize > 8) {
                         //the reference values must remain intact at each loop, so please do not touch this!
 						float newreferenceTemp = referenceTemp;
 						float newreferenceVar = referenceVar;
-						if (currentWarpedSize != referenceSize){
+						if (warpedSize != referenceSize){
 							const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-							const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), currentWarpedSize);
+							const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize);
 							newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
 							newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid);
 						}
 
 						const float rChecked = overlap ? rWarpedValue : 0.0f;
-						const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), currentWarpedSize);
+						const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize);
 						const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
 						const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid);
 
@@ -329,17 +329,17 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                   const float rWarpedValue = sWarpedValues[sharedIndex];
                   const bool overlap = isfinite(rWarpedValue) && finiteReference;
                   tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid);
-                  const uint2 currentWarpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y);
+                  const uint2 warpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y);
 
-                  if (currentWarpedSize.x > 32 || currentWarpedSize.y > 32) {
+                  if (warpedSize.x > 32 || warpedSize.y > 32) {
 
                      float newreferenceTemp = referenceTemp;
                      float2 newreferenceVar = referenceVar;
-                     if (currentWarpedSize.x!=referenceSize.x || currentWarpedSize.y!=referenceSize.y){
+                     if (warpedSize.x!=referenceSize.x || warpedSize.y!=referenceSize.y){
                         const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
                         float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid);
-                        newReferenceMean.x /= (float)currentWarpedSize.x;
-                        newReferenceMean.y /= (float)currentWarpedSize.y;
+                        newReferenceMean.x /= (float)warpedSize.x;
+                        newReferenceMean.y /= (float)warpedSize.y;
                         if(tid>63)
                            referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f;
                         else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f;
@@ -347,8 +347,8 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                      }
                      const float rChecked = overlap ? rWarpedValue : 0.0f;
                      float2 warpedMean = REDUCE_TEST(sData, rChecked, tid);
-                     warpedMean.x /= (float)currentWarpedSize.x;
-                     warpedMean.y /= (float)currentWarpedSize.y;
+                     warpedMean.x /= (float)warpedSize.x;
+                     warpedMean.y /= (float)warpedSize.y;
                      float warpedTemp;
                      if(tid>63)
                         warpedTemp = overlap ? rChecked - warpedMean.y : 0.f;
@@ -356,7 +356,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                      const float2 warpedVar = REDUCE_TEST(sData, warpedTemp*warpedTemp, tid);
                      const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp*warpedTemp, tid);
 
-                     if (tid==0 && currentWarpedSize.x > 32 ){
+                     if (tid==0 && warpedSize.x > 32 ){
                         const float localCC = fabs(sumTargetResult.x *
                                                    rsqrtf(newreferenceVar.x * warpedVar.x));
                         if(localCC > bestValue.x) {
@@ -366,7 +366,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                            bestDisp[0][2] = z - 4.f;
                         }
                      }
-                     if (tid==64 && currentWarpedSize.y > 32 ){
+                     if (tid==64 && warpedSize.y > 32 ){
                         const float localCC = fabs(sumTargetResult.y *
                                                    rsqrtf(newreferenceVar.y * warpedVar.y));
                         if(localCC > bestValue.y) {
@@ -500,22 +500,22 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 						const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
 						const float rWarpedValue = sWarpedValues[sharedIndex];
 						const bool overlap = isfinite(rWarpedValue) && finiteReference;
-						const unsigned int currentWarpedSize = __syncthreads_count(overlap);
+						const unsigned int warpedSize = __syncthreads_count(overlap);
 
-						if (currentWarpedSize > 32) {
+						if (warpedSize > 32) {
 
 							//the target values must remain intact at each loop, so please do not touch this!
 							float newreferenceTemp = referenceTemp;
 							float newreferenceVar = referenceVar;
-							if (currentWarpedSize != referenceSize){
+							if (warpedSize != referenceSize){
 								const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-								const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), currentWarpedSize);
+								const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize);
 								newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
 								newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid);
 							}
 
 							const float rChecked = overlap ? rWarpedValue : 0.0f;
-							const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), currentWarpedSize);
+							const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize);
 							const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
 							const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid);
 
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index 38fa95a0..b2895d6b 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -212,7 +212,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 auto *platform = new Platform(plat_value);
                 Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-                nifti_image *defField = con->GetCurrentDeformationField();
+                nifti_image *defField = con->GetDeformationField();
 
                 // Check all values
                 auto *defFieldPtrX = static_cast<float *>(defField->data);
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index 2dd56ee0..a391831c 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -154,8 +154,8 @@ int main(int argc, char **argv)
       reg_print_msg_error("The platform code is not suppoted");
       return EXIT_FAILURE;
    }
-   con->SetCurrentWarped(warpedImage);
-   //con->SetCurrentWarped(referenceImage);
+   con->SetWarped(warpedImage);
+   //con->SetWarped(referenceImage);
    test(con, platformCode);
    blockMatchingParams = con->GetBlockMatchingParams();
 
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index e567292e..301f8734 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -101,12 +101,12 @@ int main(int argc, char **argv)
     //CPU or GPU code
     reg_tools_changeDatatype<float>(referenceImage);
     test(con_cpu, NR_PLATFORM_CPU);
-    test_field_cpu = con_cpu->GetCurrentDeformationField();
+    test_field_cpu = con_cpu->GetDeformationField();
 
     test(con_gpu, NR_PLATFORM_CPU);
-    test_field_gpu = con_gpu->GetCurrentDeformationField();
+    test_field_gpu = con_gpu->GetDeformationField();
 
-    // Compute the difference between the computed and inputed deformation field
+    // Compute the difference between the computed and inputted deformation field
     nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
     diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
     reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field);
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index 3c5f5acc..1dc80d81 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
    _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr;
    AladinContent *con_cpu = nullptr;
    con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   con_cpu->SetCurrentWarped(warpedImage);
+   con_cpu->SetWarped(warpedImage);
    test(con_cpu, NR_PLATFORM_CPU);
    blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams();
 
@@ -177,7 +177,7 @@ int main(int argc, char **argv)
       con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
-   con_gpu->SetCurrentWarped(warpedImage);
+   con_gpu->SetWarped(warpedImage);
    test(con_gpu, platformCode);
    blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams();
 
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index ea16dbd1..be731d9f 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -77,16 +77,16 @@ int main(int argc, char **argv)
 
     // CPU platform
     AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float));
-    con_cpu->SetCurrentWarped(cpu_warped);
-    con_cpu->SetCurrentDeformationField(inputDeformationField);
-    con_cpu->SetCurrentReferenceMask(tempMask);
+    con_cpu->SetWarped(cpu_warped);
+    con_cpu->SetDeformationField(inputDeformationField);
+    con_cpu->SetReferenceMask(tempMask);
     Platform *platform_cpu = new Platform(NR_PLATFORM_CPU);
     Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu);
     resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_cpu;
     delete platform_cpu;
-    cpu_warped = con_cpu->GetCurrentWarped(referenceImage->datatype);
+    cpu_warped = con_cpu->GetWarped(referenceImage->datatype);
 
     // GPU platform
     AladinContent *con_gpu = nullptr;
@@ -100,9 +100,9 @@ int main(int argc, char **argv)
         con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     }
 #endif
-    con_gpu->SetCurrentWarped(gpu_warped);
-    con_gpu->SetCurrentDeformationField(inputDeformationField);
-    con_gpu->SetCurrentReferenceMask(tempMask);
+    con_gpu->SetWarped(gpu_warped);
+    con_gpu->SetDeformationField(inputDeformationField);
+    con_gpu->SetReferenceMask(tempMask);
     Platform *platform_gpu = nullptr;
 #ifdef _USE_CUDA
     if (platformCode == NR_PLATFORM_CUDA)
@@ -118,7 +118,7 @@ int main(int argc, char **argv)
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_gpu;
     delete platform_gpu;
-    gpu_warped = con_gpu->GetCurrentWarped(referenceImage->datatype);
+    gpu_warped = con_gpu->GetWarped(referenceImage->datatype);
 
     //Check if the platform used is double capable
     double proper_eps = EPS;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index f75c4a81..3487aba3 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -140,12 +140,12 @@ TEST_CASE("Resampling", "[resampling]") {
                 // Create and set a warped image to host the computation
                 nifti_image *warped = nifti_copy_nim_info(reference);
                 warped->data = (void*)malloc(warped->nvox * warped->nbyper);
-                con->SetCurrentWarped(warped);
+                con->SetWarped(warped);
                 // Set the deformation field
-                con->SetCurrentDeformationField(def_field);
+                con->SetDeformationField(def_field);
                 // Set an empty mask to consider all voxels
                 int *tempMask = (int*)calloc(reference->nvox, sizeof(int));
-                con->SetCurrentReferenceMask(tempMask);
+                con->SetReferenceMask(tempMask);
                 // Initialise the platform to run current content and retrieve deformation field
                 auto *platform = new Platform(plat_value);
                 Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con);
@@ -153,7 +153,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 std::list<int> interp = {0, 1, 3};
                 for (auto it : interp) {
                     resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
-                    warped = con->GetCurrentWarped(reference->datatype);
+                    warped = con->GetWarped(reference->datatype);
 
                     // Check all values
                     auto *warpedPtr = static_cast<float*>(warped->data);

From cdec171a9d72e0950c8dc13b7c8d41539b858bbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:15:54 +0000
Subject: [PATCH 020/314] Add Compute class to handle computations among
 platforms

---
 niftyreg_build_version.txt        |   2 +-
 reg-lib/CMakeLists.txt            |  14 ++-
 reg-lib/Compute.cpp               | 163 ++++++++++++++++++++++++++++++
 reg-lib/Compute.h                 |  30 ++++++
 reg-lib/ComputeFactory.h          |   9 ++
 reg-lib/Platform.cpp              |  46 ++++-----
 reg-lib/Platform.h                |  30 ++++--
 reg-lib/cl/CMakeLists.txt         |   3 +-
 reg-lib/cl/ClCompute.cpp          |   7 ++
 reg-lib/cl/ClCompute.h            |  10 ++
 reg-lib/cl/ClComputeFactory.h     |   9 ++
 reg-lib/cuda/CMakeLists.txt       |   1 +
 reg-lib/cuda/CudaCompute.cpp      | 142 ++++++++++++++++++++++++++
 reg-lib/cuda/CudaCompute.h        |  25 +++++
 reg-lib/cuda/CudaComputeFactory.h |   9 ++
 15 files changed, 459 insertions(+), 41 deletions(-)
 create mode 100644 reg-lib/Compute.cpp
 create mode 100644 reg-lib/Compute.h
 create mode 100644 reg-lib/ComputeFactory.h
 create mode 100644 reg-lib/cl/ClCompute.cpp
 create mode 100644 reg-lib/cl/ClCompute.h
 create mode 100644 reg-lib/cl/ClComputeFactory.h
 create mode 100644 reg-lib/cuda/CudaCompute.cpp
 create mode 100644 reg-lib/cuda/CudaCompute.h
 create mode 100644 reg-lib/cuda/CudaComputeFactory.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c75acbe2..a949a93d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-127
+128
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 7187ad7b..3cea8942 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -139,13 +139,14 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans")
 #-----------------------------------------------------------------------------
 ## BUILD THE ALADIN LIBRARY
 set(_reg_aladin_files
-  Content.cpp
-  Content.h
   AladinContent.cpp
   AladinContent.h
+  Compute.cpp
+  Compute.h
+  Content.cpp
+  Content.h
   Platform.cpp
   Platform.h
-  Kernel.h
   cpu/CpuAffineDeformationFieldKernel.h
   cpu/CpuAffineDeformationFieldKernel.cpp
   cpu/CpuBlockMatchingKernel.h
@@ -181,10 +182,12 @@ install(FILES _reg_aladin.h _reg_aladin_sym.h DESTINATION include)
 install(FILES _reg_aladin.cpp _reg_aladin_sym.cpp DESTINATION include)
 install(FILES AladinContent.h Platform.h  DESTINATION include)
 install(FILES
-        Kernel.h
         AffineDeformationFieldKernel.h
         BlockMatchingKernel.h
+        Compute.h
+        ComputeFactory.h
         ConvolutionKernel.h
+        Kernel.h
         OptimiseKernel.h
         ResampleImageKernel.h
         cpu/CpuAffineDeformationFieldKernel.h
@@ -198,11 +201,12 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin")
 #-----------------------------------------------------------------------------
 ## BUILD THE F3D LIBRARY
 set(_reg_f3d_files
+  Compute.cpp
+  Compute.h
   Content.cpp
   Content.h
   Platform.cpp
   Platform.h
-  Kernel.h
   _reg_base.h
   _reg_base.cpp
   _reg_f3d.h
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
new file mode 100644
index 00000000..04342219
--- /dev/null
+++ b/reg-lib/Compute.cpp
@@ -0,0 +1,163 @@
+#include "Compute.h"
+#include "F3dContent.h"
+#include "_reg_resampling.h"
+#include "_reg_localTrans_jac.h"
+#include "_reg_localTrans_regul.h"
+
+/* *************************************************************** */
+void Compute::ResampleImage(int inter, float paddingValue) {
+    reg_resampleImage(con->GetFloating(),
+                      con->GetWarped(),
+                      con->GetDeformationField(),
+                      con->GetReferenceMask(),
+                      inter,
+                      paddingValue);
+}
+/* *************************************************************** */
+double Compute::GetJacobianPenaltyTerm(bool approx) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    return reg_spline_getJacobianPenaltyTerm(con->GetControlPointGrid(),
+                                             con->GetReference(),
+                                             approx);
+}
+/* *************************************************************** */
+void Compute::JacobianPenaltyTermGradient(float weight, bool approx) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_spline_getJacobianPenaltyTermGradient(con->GetControlPointGrid(),
+                                              con->GetReference(),
+                                              con->GetTransformationGradient(),
+                                              weight,
+                                              approx);
+}
+/* *************************************************************** */
+double Compute::CorrectFolding(bool approx) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    return reg_spline_correctFolding(con->GetControlPointGrid(),
+                                     con->GetReference(),
+                                     approx);
+}
+/* *************************************************************** */
+double Compute::ApproxBendingEnergy() {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    return reg_spline_approxBendingEnergy(con->GetControlPointGrid());
+}
+/* *************************************************************** */
+void Compute::ApproxBendingEnergyGradient(float weight) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_spline_approxBendingEnergyGradient(con->GetControlPointGrid(),
+                                           con->GetTransformationGradient(),
+                                           weight);
+}
+/* *************************************************************** */
+double Compute::ApproxLinearEnergy() {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    return reg_spline_approxLinearEnergy(con->GetControlPointGrid());
+}
+/* *************************************************************** */
+void Compute::ApproxLinearEnergyGradient(float weight) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_spline_approxLinearEnergyGradient(con->GetControlPointGrid(),
+                                          con->GetTransformationGradient(),
+                                          weight);
+}
+/* *************************************************************** */
+double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    return reg_spline_getLandmarkDistance(con->GetControlPointGrid(),
+                                          landmarkNumber,
+                                          landmarkReference,
+                                          landmarkFloating);
+}
+/* *************************************************************** */
+void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_spline_getLandmarkDistanceGradient(con->GetControlPointGrid(),
+                                           con->GetTransformationGradient(),
+                                           landmarkNumber,
+                                           landmarkReference,
+                                           landmarkFloating,
+                                           weight);
+}
+/* *************************************************************** */
+void Compute::GetDeformationField(bool composition, bool bspline) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_spline_getDeformationField(con->GetControlPointGrid(),
+                                   con->GetDeformationField(),
+                                   con->GetReferenceMask(),
+                                   composition,
+                                   bspline);
+}
+/* *************************************************************** */
+void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
+    nifti_image *controlPointGrid = dynamic_cast<F3dContent*>(con)->GetControlPointGrid();
+    if (optimiseX && optimiseY && optimiseZ) {
+        // Update the values for all axis displacement
+        for (size_t i = 0; i < controlPointGrid->nvox; ++i)
+            currentDOF[i] = bestDOF[i] + scale * gradient[i];
+    } else {
+        size_t voxNumber = controlPointGrid->nvox / controlPointGrid->ndim;
+        // Update the values for the x-axis displacement
+        if (optimiseX) {
+            for (size_t i = 0; i < voxNumber; ++i)
+                currentDOF[i] = bestDOF[i] + scale * gradient[i];
+        }
+        // Update the values for the y-axis displacement
+        if (optimiseY && controlPointGrid->ndim > 1) {
+            float *currentDOFY = &currentDOF[voxNumber];
+            float *bestDOFY = &bestDOF[voxNumber];
+            float *gradientY = &gradient[voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i)
+                currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
+        }
+        // Update the values for the z-axis displacement
+        if (optimiseZ && controlPointGrid->ndim > 2) {
+            float *currentDOFZ = &currentDOF[2 * voxNumber];
+            float *bestDOFZ = &bestDOF[2 * voxNumber];
+            float *gradientZ = &gradient[2 * voxNumber];
+            for (size_t i = 0; i < voxNumber; ++i)
+                currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i];
+        }
+    }
+}
+/* *************************************************************** */
+void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    reg_getImageGradient(con->GetFloating(),
+                         con->GetWarpedGradient(),
+                         con->GetDeformationField(),
+                         con->GetReferenceMask(),
+                         interpolation,
+                         paddingValue,
+                         activeTimepoint);
+}
+/* *************************************************************** */
+void Compute::VoxelCentricToNodeCentric(float weight) {
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    mat44 *reorientation = Content::GetIJKMatrix(*con->GetFloating());
+    reg_voxelCentric2NodeCentric(con->GetTransformationGradient(),
+                                 con->GetVoxelBasedMeasureGradient(),
+                                 weight,
+                                 false, // no update
+                                 reorientation);
+}
+/* *************************************************************** */
+double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+    // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ
+    nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
+    switch (transformationGradient->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return reg_getMaximalLength<float>(transformationGradient);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return reg_getMaximalLength<double>(transformationGradient);
+        break;
+    }
+    return 0;
+}
+/* *************************************************************** */
+void Compute::NormaliseGradient(double maxGradLength) {
+    // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
+    nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
+    reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
new file mode 100644
index 00000000..be1bbdd8
--- /dev/null
+++ b/reg-lib/Compute.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "Content.h"
+
+class Compute {
+public:
+    Compute() = delete;
+    Compute(Content *conIn) : con(conIn) {}
+    virtual ~Compute() {}
+
+    virtual void ResampleImage(int inter, float paddingValue);
+    virtual double GetJacobianPenaltyTerm(bool approx);
+    virtual void JacobianPenaltyTermGradient(float weight, bool approx);
+    virtual double CorrectFolding(bool approx);
+    virtual double ApproxBendingEnergy();
+    virtual void ApproxBendingEnergyGradient(float weight);
+    virtual double ApproxLinearEnergy();
+    virtual void ApproxLinearEnergyGradient(float weight);
+    virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating);
+    virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight);
+    virtual void GetDeformationField(bool composition, bool bspline);
+    virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
+    virtual void VoxelCentricToNodeCentric(float weight);
+    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void NormaliseGradient(double maxGradLength);
+
+protected:
+    Content *con;
+};
diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h
new file mode 100644
index 00000000..e2c2de1e
--- /dev/null
+++ b/reg-lib/ComputeFactory.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "Compute.h"
+
+class ComputeFactory {
+public:
+    virtual Compute* Produce(Content *con) { return new Compute(con); }
+    virtual ~ComputeFactory() {}
+};
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index a46cb0fc..555d1b59 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,41 +1,36 @@
 #include "Platform.h"
-#include "AladinContent.h"
-#include "KernelFactory.h"
-#include "CpuKernelFactory.h"
-#ifdef _USE_CUDA
-#include "CudaKernelFactory.h"
-#include "CudaContextSingleton.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClKernelFactory.h"
-#include "ClContextSingleton.h"
-#endif
-
-using namespace std;
 
 /* *************************************************************** */
-Platform::Platform(int platformCode) {
-    this->platformCode = platformCode;
+Platform::Platform(int platformCodeIn) {
+    platformCode = platformCodeIn;
     if (platformCode == NR_PLATFORM_CPU) {
-        this->factory = new CpuKernelFactory();
-        this->platformName = "cpu_platform";
+        kernelFactory = new CpuKernelFactory();
+        computeFactory = new ComputeFactory();
+        platformName = "cpu_platform";
     }
 #ifdef _USE_CUDA
     else if (platformCode == NR_PLATFORM_CUDA) {
-        this->factory = new CudaKernelFactory();
-        this->platformName = "cuda_platform";
+        kernelFactory = new CudaKernelFactory();
+        computeFactory = new CudaComputeFactory();
+        platformName = "cuda_platform";
     }
 #endif
 #ifdef _USE_OPENCL
     else if (platformCode == NR_PLATFORM_CL) {
-        this->factory = new ClKernelFactory();
-        this->platformName = "cl_platform";
+        kernelFactory = new ClKernelFactory();
+        computeFactory = new ClComputeFactory();
+        platformName = "cl_platform";
     }
 #endif
 }
 /* *************************************************************** */
-Kernel* Platform::CreateKernel(const string& name, Content *con) const {
-    return this->factory->ProduceKernel(name, con);
+Compute* Platform::CreateCompute(Content *con) const {
+    return computeFactory->Produce(con);
+}
+/* *************************************************************** */
+Kernel* Platform::CreateKernel(const std::string& name, Content *con) const {
+    return kernelFactory->Produce(name, con);
+}
 }
 /* *************************************************************** */
 std::string Platform::GetName() {
@@ -85,10 +80,11 @@ int Platform::GetPlatformCode() {
 }
 /* *************************************************************** */
 //void Platform::SetPlatformCode(const int platformCodeIn) {
-//    this->platformCode = platformCodeIn;
+//    platformCode = platformCodeIn;
 //}
 /* *************************************************************** */
 Platform::~Platform() {
-    delete this->factory;
+    delete kernelFactory;
+    delete computeFactory;
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index ce75c9b3..6d752afb 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -1,22 +1,33 @@
 #pragma once
 
-#include <map>
-#include <string>
-#include <vector>
+#include "F3dContent.h"
+#include "KernelFactory.h"
+#include "CpuKernelFactory.h"
+#include "ComputeFactory.h"
+#include "_reg_optimiser.h"
+#ifdef _USE_CUDA
+#include "CudaF3dContent.h"
+#include "CudaKernelFactory.h"
+#include "CudaComputeFactory.h"
+#include "CudaContextSingleton.h"
+#include "_reg_optimiser_gpu.h"
+#endif
+#ifdef _USE_OPENCL
+#include "ClKernelFactory.h"
+#include "ClComputeFactory.h"
+#include "ClContextSingleton.h"
+#endif
 
 #define NR_PLATFORM_CPU  0
 #define NR_PLATFORM_CUDA 1
 #define NR_PLATFORM_CL   2
 
-class Kernel;
-class KernelFactory;
-class Content;
-
 class Platform {
 public:
-    Platform(int platformCode);
+    Platform(int platformCodeIn);
     virtual ~Platform();
 
+    Compute* CreateCompute(Content *con) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
     std::string GetName();
 
@@ -26,7 +37,8 @@ class Platform {
     unsigned GetGpuIdx();
 
 private:
-    KernelFactory *factory;
+    KernelFactory *kernelFactory;
+    ComputeFactory *computeFactory;
     std::string platformName;
     int platformCode;
     unsigned gpuIdx;
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index b0589955..aa6a7771 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -21,6 +21,7 @@ include_directories(${OpenCL_INCLUDE_DIRS})
 # Build the _reg_opencl_kernels library
 set(NAME _reg_opencl_kernels)
 add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
+  ClCompute.cpp
   ClContextSingleton.cpp
   CLAladinContent.cpp
   ClKernelFactory.cpp
@@ -40,7 +41,7 @@ install(TARGETS ${NAME}
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
-install(FILES ClContextSingleton.h CLAladinContent.h ClKernelFactory.h
+install(FILES ClCompute.h ClContextSingleton.h CLAladinContent.h ClKernelFactory.h
         ClAffineDeformationFieldKernel.h
         ClBlockMatchingKernel.h
         ClConvolutionKernel.h
diff --git a/reg-lib/cl/ClCompute.cpp b/reg-lib/cl/ClCompute.cpp
new file mode 100644
index 00000000..1a8b137b
--- /dev/null
+++ b/reg-lib/cl/ClCompute.cpp
@@ -0,0 +1,7 @@
+#include "ClCompute.h"
+
+/* *************************************************************** */
+void ClCompute::ResampleImage(int inter, float paddingValue) {
+
+}
+/* *************************************************************** */
diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h
new file mode 100644
index 00000000..ba4690d5
--- /dev/null
+++ b/reg-lib/cl/ClCompute.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "Compute.h"
+
+class ClCompute: public Compute {
+public:
+    ClCompute(Content *con) : Compute(con) {}
+
+    virtual void ResampleImage(int inter, float paddingValue) override;
+};
diff --git a/reg-lib/cl/ClComputeFactory.h b/reg-lib/cl/ClComputeFactory.h
new file mode 100644
index 00000000..7a2fd18d
--- /dev/null
+++ b/reg-lib/cl/ClComputeFactory.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "ComputeFactory.h"
+#include "ClCompute.h"
+
+class ClComputeFactory: public ComputeFactory {
+public:
+    virtual Compute* Produce(Content *con) override { return new ClCompute(con); }
+};
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 204c9ab6..19abc9b6 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -74,6 +74,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
+    CudaCompute.cpp
     CudaContextSingleton.cpp
     CudaAladinContent.cpp
     CudaKernelFactory.cpp
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
new file mode 100644
index 00000000..b31f3152
--- /dev/null
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -0,0 +1,142 @@
+#include "CudaCompute.h"
+#include "CudaF3dContent.h"
+#include "_reg_resampling_gpu.h"
+#include "_reg_localTransformation_gpu.h"
+#include "_reg_optimiser_gpu.h"
+
+/* *************************************************************** */
+void CudaCompute::ResampleImage(int inter, float paddingValue) {
+    CudaContent *con = dynamic_cast<CudaContent*>(this->con);
+    reg_resampleImage_gpu(con->Content::GetFloating(),
+                          con->GetWarpedCuda()[0],
+                          con->GetFloatingCuda()[0],
+                          con->GetDeformationFieldCuda(),
+                          con->GetReferenceMaskCuda(),
+                          con->Content::GetReference()->nvox,
+                          paddingValue);
+}
+/* *************************************************************** */
+double CudaCompute::GetJacobianPenaltyTerm(bool approx) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    return reg_spline_getJacobianPenaltyTerm_gpu(con->F3dContent::GetReference(),
+                                                 con->F3dContent::GetControlPointGrid(),
+                                                 con->GetControlPointGridCuda(),
+                                                 approx);
+}
+/* *************************************************************** */
+void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    reg_spline_getJacobianPenaltyTermGradient_gpu(con->F3dContent::GetReference(),
+                                                  con->F3dContent::GetControlPointGrid(),
+                                                  con->GetControlPointGridCuda(),
+                                                  con->GetTransformationGradientCuda(),
+                                                  weight,
+                                                  approx);
+}
+/* *************************************************************** */
+double CudaCompute::CorrectFolding(bool approx) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    return reg_spline_correctFolding_gpu(con->F3dContent::GetReference(),
+                                         con->F3dContent::GetControlPointGrid(),
+                                         con->GetControlPointGridCuda(),
+                                         approx);
+}
+/* *************************************************************** */
+double CudaCompute::ApproxBendingEnergy() {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    return reg_spline_approxBendingEnergy_gpu(con->F3dContent::GetControlPointGrid(), con->GetControlPointGridCuda());
+}
+/* *************************************************************** */
+void CudaCompute::ApproxBendingEnergyGradient(float weight) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    reg_spline_approxBendingEnergyGradient_gpu(con->F3dContent::GetControlPointGrid(),
+                                               con->GetControlPointGridCuda(),
+                                               con->GetTransformationGradientCuda(),
+                                               weight);
+}
+/* *************************************************************** */
+double CudaCompute::ApproxLinearEnergy() {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    return Compute::ApproxLinearEnergy();
+}
+/* *************************************************************** */
+void CudaCompute::ApproxLinearEnergyGradient(float weight) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::ApproxLinearEnergyGradient(weight);
+    // Transfer the data back to the CUDA device
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
+}
+/* *************************************************************** */
+double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    return Compute::GetLandmarkDistance(landmarkNumber, landmarkReference, landmarkFloating);
+}
+/* *************************************************************** */
+void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight);
+    // Transfer the data back to the CUDA device
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
+}
+/* *************************************************************** */
+void CudaCompute::GetDeformationField(bool composition, bool bspline) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    reg_spline_getDeformationField_gpu(con->F3dContent::GetControlPointGrid(),
+                                       con->F3dContent::GetReference(),
+                                       con->GetControlPointGridCuda(),
+                                       con->GetDeformationFieldCuda(),
+                                       con->GetReferenceMaskCuda(),
+                                       con->F3dContent::GetReference()->nvox,
+                                       bspline);
+}
+/* *************************************************************** */
+void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
+    // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ
+    reg_updateControlPointPosition_gpu(dynamic_cast<CudaF3dContent*>(con)->F3dContent::GetControlPointGrid(),
+                                       reinterpret_cast<float4*>(currentDOF),
+                                       reinterpret_cast<float4*>(bestDOF),
+                                       reinterpret_cast<float4*>(gradient),
+                                       scale);
+}
+/* *************************************************************** */
+void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    reg_getImageGradient_gpu(con->F3dContent::GetFloating(),
+                             con->GetFloatingCuda()[0],
+                             con->GetDeformationFieldCuda(),
+                             con->GetWarpedGradientCuda()[0],
+                             con->F3dContent::GetReference()->nvox,
+                             paddingValue);
+}
+/* *************************************************************** */
+void CudaCompute::VoxelCentricToNodeCentric(float weight) {
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    reg_voxelCentric2NodeCentric_gpu(con->F3dContent::GetWarped(),
+                                     con->F3dContent::GetControlPointGrid(),
+                                     con->GetVoxelBasedMeasureGradientCuda(),
+                                     con->GetTransformationGradientCuda(),
+                                     weight);
+}
+/* *************************************************************** */
+double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+    // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient();
+    int nodeNumber = transformationGradient->nvox / transformationGradient->ndim;
+    return reg_getMaximalLength_gpu(con->GetTransformationGradientCuda(), nodeNumber);
+}
+/* *************************************************************** */
+void CudaCompute::NormaliseGradient(double maxGradLength) {
+    // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
+    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
+    nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient();
+    int nodeNumber = transformationGradient->nvox / transformationGradient->ndim;
+    reg_multiplyValue_gpu(nodeNumber, con->GetTransformationGradientCuda(), 1 / (float)maxGradLength);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
new file mode 100644
index 00000000..1ca941ab
--- /dev/null
+++ b/reg-lib/cuda/CudaCompute.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "Compute.h"
+
+class CudaCompute: public Compute {
+public:
+    CudaCompute(Content *con) : Compute(con) {}
+
+    virtual void ResampleImage(int inter, float paddingValue) override;
+    virtual double GetJacobianPenaltyTerm(bool approx) override;
+    virtual void JacobianPenaltyTermGradient(float weight, bool approx) override;
+    virtual double CorrectFolding(bool approx) override;
+    virtual double ApproxBendingEnergy() override;
+    virtual void ApproxBendingEnergyGradient(float weight) override;
+    virtual double ApproxLinearEnergy() override;
+    virtual void ApproxLinearEnergyGradient(float weight) override;
+    virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override;
+    virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override;
+    virtual void GetDeformationField(bool composition, bool bspline) override;
+    virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
+    virtual void VoxelCentricToNodeCentric(float weight) override;
+    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void NormaliseGradient(double maxGradLength) override;
+};
diff --git a/reg-lib/cuda/CudaComputeFactory.h b/reg-lib/cuda/CudaComputeFactory.h
new file mode 100644
index 00000000..d14fd425
--- /dev/null
+++ b/reg-lib/cuda/CudaComputeFactory.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "ComputeFactory.h"
+#include "CudaCompute.h"
+
+class CudaComputeFactory: public ComputeFactory {
+public:
+    virtual Compute* Produce(Content *con) override { return new CudaCompute(con); }
+};

From f4d9b2eacc0c2110430fb995d11823624ce18712 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:20:12 +0000
Subject: [PATCH 021/314] Add Content subclasses to handle CPU and CUDA
 contents for reg_f3d

---
 niftyreg_build_version.txt      |   2 +-
 reg-lib/CMakeLists.txt          |   6 +-
 reg-lib/F3dContent.cpp          |  97 +++++++++++++
 reg-lib/F3dContent.h            |  58 ++++++++
 reg-lib/cuda/CMakeLists.txt     |   4 +-
 reg-lib/cuda/CudaContent.cpp    | 239 ++++++++++++++++++++++++++++++++
 reg-lib/cuda/CudaContent.h      |  54 ++++++++
 reg-lib/cuda/CudaF3dContent.cpp | 137 ++++++++++++++++++
 reg-lib/cuda/CudaF3dContent.h   |  52 +++++++
 9 files changed, 645 insertions(+), 4 deletions(-)
 create mode 100644 reg-lib/F3dContent.cpp
 create mode 100644 reg-lib/F3dContent.h
 create mode 100644 reg-lib/cuda/CudaContent.cpp
 create mode 100644 reg-lib/cuda/CudaContent.h
 create mode 100644 reg-lib/cuda/CudaF3dContent.cpp
 create mode 100644 reg-lib/cuda/CudaF3dContent.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a949a93d..b0d73241 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-128
+129
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 3cea8942..a9b006d6 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -205,6 +205,8 @@ set(_reg_f3d_files
   Compute.h
   Content.cpp
   Content.h
+  F3dContent.cpp
+  F3dContent.h
   Platform.cpp
   Platform.h
   _reg_base.h
@@ -246,8 +248,8 @@ install(TARGETS _reg_f3d
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
   )
-install(FILES _reg_base.h DESTINATION include)
-install(FILES _reg_f3d.h DESTINATION include)
+install(FILES _reg_base.h Content.h DESTINATION include)
+install(FILES _reg_f3d.h F3dContent.h DESTINATION include)
 install(FILES _reg_f3d2.h DESTINATION include)
 install(FILES _reg_f3d_sym.h DESTINATION include)
 install(FILES cpu/_reg_optimiser.cpp cpu/_reg_optimiser.h DESTINATION include)
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
new file mode 100644
index 00000000..442e52cd
--- /dev/null
+++ b/reg-lib/F3dContent.cpp
@@ -0,0 +1,97 @@
+#include "F3dContent.h"
+#include "_reg_tools.h"
+#include "_reg_resampling.h"
+
+/* *************************************************************** */
+F3dContent::F3dContent(nifti_image *referenceIn,
+                       nifti_image *floatingIn,
+                       nifti_image *controlPointGridIn,
+                       nifti_image *localWeightSimIn,
+                       int *referenceMaskIn,
+                       mat44 *transformationMatrixIn,
+                       size_t bytesIn) :
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
+    controlPointGrid(controlPointGridIn) {
+    if (!controlPointGridIn) {
+        reg_print_fct_error("F3dContent::F3dContent()");
+        reg_print_msg_error("controlPointGridIn can't be nullptr");
+        reg_exit();
+    }
+    AllocateLocalWeightSim(localWeightSimIn);
+    AllocateWarpedGradient();
+    AllocateTransformationGradient();
+    AllocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+F3dContent::~F3dContent() {
+    DeallocateLocalWeightSim();
+    DeallocateWarpedGradient();
+    DeallocateTransformationGradient();
+    DeallocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
+    if (!localWeightSimIn) return;
+    localWeightSim = nifti_copy_nim_info(reference);
+    localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
+    localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
+    localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
+    localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz *
+                                  localWeightSim->nt * localWeightSim->nu);
+    localWeightSim->data = (void*)malloc(localWeightSim->nvox * localWeightSim->nbyper);
+    F3dContent::ZeroVoxelBasedMeasureGradient();
+    reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
+    reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
+}
+/* *************************************************************** */
+void F3dContent::DeallocateLocalWeightSim() {
+    if (localWeightSim) {
+        nifti_image_free(localWeightSim);
+        localWeightSim = nullptr;
+    }
+}
+/* *************************************************************** */
+void F3dContent::AllocateWarpedGradient() {
+    warpedGradient = nifti_copy_nim_info(deformationField);
+    warpedGradient->data = (void*)calloc(warpedGradient->nvox, warpedGradient->nbyper);
+}
+/* *************************************************************** */
+void F3dContent::DeallocateWarpedGradient() {
+    if (warpedGradient) {
+        nifti_image_free(warpedGradient);
+        warpedGradient = nullptr;
+    }
+}
+/* *************************************************************** */
+void F3dContent::AllocateTransformationGradient() {
+    transformationGradient = nifti_copy_nim_info(controlPointGrid);
+    transformationGradient->data = (void*)calloc(transformationGradient->nvox, transformationGradient->nbyper);
+}
+/* *************************************************************** */
+void F3dContent::DeallocateTransformationGradient() {
+    if (transformationGradient != nullptr) {
+        nifti_image_free(transformationGradient);
+        transformationGradient = nullptr;
+    }
+}
+/* *************************************************************** */
+void F3dContent::AllocateVoxelBasedMeasureGradient() {
+    voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField);
+    voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper);
+}
+/* *************************************************************** */
+void F3dContent::DeallocateVoxelBasedMeasureGradient() {
+    if (voxelBasedMeasureGradient) {
+        nifti_image_free(voxelBasedMeasureGradient);
+        voxelBasedMeasureGradient = nullptr;
+    }
+}
+/* *************************************************************** */
+void F3dContent::ZeroTransformationGradient() {
+    memset(transformationGradient->data, 0, transformationGradient->nvox * transformationGradient->nbyper);
+}
+/* *************************************************************** */
+void F3dContent::ZeroVoxelBasedMeasureGradient() {
+    memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper);
+}
+/* *************************************************************** */
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
new file mode 100644
index 00000000..091e4da9
--- /dev/null
+++ b/reg-lib/F3dContent.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "Content.h"
+
+class F3dContent: public virtual Content {
+public:
+    F3dContent() = delete;
+    F3dContent(nifti_image *referenceIn,
+               nifti_image *floatingIn,
+               nifti_image *controlPointGridIn,
+               nifti_image *localWeightSimIn,
+               int *referenceMaskIn = nullptr,
+               mat44 *transformationMatrixIn = nullptr,
+               size_t bytesIn = sizeof(float));
+    virtual ~F3dContent();
+
+    // Getters
+    virtual nifti_image* GetControlPointGrid() { return controlPointGrid; }
+    virtual nifti_image* GetLocalWeightSim() { return localWeightSim; }
+    virtual nifti_image* GetTransformationGradient() { return transformationGradient; }
+    virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
+    virtual nifti_image* GetWarpedGradient() { return warpedGradient; }
+
+    // Setters
+    virtual void SetControlPointGrid(nifti_image *controlPointGridIn) {
+        controlPointGrid = controlPointGridIn;
+    }
+    virtual void SetTransformationGradient(nifti_image *transformationGradientIn) {
+        transformationGradient = transformationGradientIn;
+    }
+    virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) {
+        voxelBasedMeasureGradient = voxelBasedMeasureGradientIn;
+    }
+    virtual void SetWarpedGradient(nifti_image *warpedGradientIn) {
+        warpedGradient = warpedGradientIn;
+    }
+
+    // Auxiliary methods
+    virtual void ZeroTransformationGradient();
+    virtual void ZeroVoxelBasedMeasureGradient();
+
+protected:
+    nifti_image *controlPointGrid;
+    nifti_image *localWeightSim = nullptr;
+    nifti_image *transformationGradient = nullptr;
+    nifti_image *voxelBasedMeasureGradient = nullptr;
+    nifti_image *warpedGradient = nullptr;
+
+private:
+    void AllocateLocalWeightSim(nifti_image*);
+    void DeallocateLocalWeightSim();
+    void AllocateWarpedGradient();
+    void DeallocateWarpedGradient();
+    void AllocateTransformationGradient();
+    void DeallocateTransformationGradient();
+    void AllocateVoxelBasedMeasureGradient();
+    void DeallocateVoxelBasedMeasureGradient();
+};
\ No newline at end of file
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 19abc9b6..452829d2 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -75,6 +75,8 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaCompute.cpp
+    CudaContent.cpp
+    CudaF3dContent.cpp
     CudaContextSingleton.cpp
     CudaAladinContent.cpp
     CudaKernelFactory.cpp
@@ -102,7 +104,7 @@ install(TARGETS ${NAME}
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
     )
-install(FILES blockMatchingKernel.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
+install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
 install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
new file mode 100644
index 00000000..20c1b12d
--- /dev/null
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -0,0 +1,239 @@
+#include "CudaContent.h"
+
+/* *************************************************************** */
+CudaContent::CudaContent(nifti_image *referenceIn,
+                         nifti_image *floatingIn,
+                         int *referenceMaskIn,
+                         mat44 *transformationMatrixIn,
+                         size_t bytesIn) :
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
+    AllocateImages();
+    AllocateWarped();
+    AllocateDeformationField();
+    SetReferenceMask(referenceMask);
+    SetTransformationMatrix(transformationMatrix);
+}
+/* *************************************************************** */
+CudaContent::~CudaContent() {
+    DeallocateImages();
+    DeallocateWarped();
+    DeallocateDeformationField();
+    SetReferenceMask(nullptr);
+    SetTransformationMatrix(nullptr);
+}
+/* *************************************************************** */
+void CudaContent::AllocateImages() {
+    if (reference->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(reference);
+    if (floating->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(floating);
+    if (reference->nt == 1) {
+        cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], reference->dim);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(&referenceCuda[0], reference);
+        cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], floating->dim);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(&floatingCuda[0], floating);
+    } else if (reference->nt == 2) {
+        cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], &referenceCuda[1], reference->dim);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(&referenceCuda[0], &referenceCuda[1], reference);
+        cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], &floatingCuda[1], floating->dim);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(&floatingCuda[0], &floatingCuda[1], floating);
+    }
+}
+/* *************************************************************** */
+void CudaContent::DeallocateImages() {
+    if (referenceCuda[0]) {
+        cudaCommon_free(&referenceCuda[0]);
+        referenceCuda[0] = nullptr;
+    }
+    if (referenceCuda[1]) {
+        cudaCommon_free(&referenceCuda[1]);
+        referenceCuda[1] = nullptr;
+    }
+    if (floatingCuda[0]) {
+        cudaCommon_free(&floatingCuda[0]);
+        floatingCuda[0] = nullptr;
+    }
+    if (floatingCuda[1]) {
+        cudaCommon_free(&floatingCuda[1]);
+        floatingCuda[1] = nullptr;
+    }
+}
+/* *************************************************************** */
+void CudaContent::AllocateDeformationField() {
+    NR_CUDA_SAFE_CALL(cudaMalloc(&deformationFieldCuda, deformationField->nvox * sizeof(float4)));
+}
+/* *************************************************************** */
+void CudaContent::DeallocateDeformationField() {
+    if (deformationFieldCuda) {
+        cudaCommon_free(&deformationFieldCuda);
+        deformationFieldCuda = nullptr;
+    }
+}
+/* *************************************************************** */
+void CudaContent::AllocateWarped() {
+    if (warped->nt == 1) {
+        cudaCommon_allocateArrayToDevice<float>(&warpedCuda[0], warped->dim);
+    } else if (warped->nt == 2) {
+        cudaCommon_allocateArrayToDevice<float>(&warpedCuda[0], &warpedCuda[1], warped->dim);
+    } else {
+        reg_print_fct_error("CudaContent::AllocateWarped()");
+        reg_print_msg_error("More than 2 time points aren't handled in the floating image");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void CudaContent::DeallocateWarped() {
+    if (warpedCuda[0]) {
+        cudaCommon_free(&warpedCuda[0]);
+        warpedCuda[0] = nullptr;
+    }
+    if (warpedCuda[1]) {
+        cudaCommon_free(&warpedCuda[1]);
+        warpedCuda[1] = nullptr;
+    }
+}
+/* *************************************************************** */
+bool CudaContent::IsCurrentComputationDoubleCapable() {
+    return CudaContextSingleton::Instance().GetIsCardDoubleCapable();
+}
+/* *************************************************************** */
+nifti_image* CudaContent::GetDeformationField() {
+    cudaCommon_transferFromDeviceToNifti(deformationField, &deformationFieldCuda);
+    return deformationField;
+}
+/* *************************************************************** */
+void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) {
+    Content::SetDeformationField(deformationFieldIn);
+    DeallocateDeformationField();
+    if (!deformationField) return;
+
+    AllocateDeformationField();
+    cudaCommon_transferNiftiToArrayOnDevice(&deformationFieldCuda, deformationField);
+}
+/* *************************************************************** */
+void CudaContent::SetReferenceMask(int *referenceMaskIn) {
+    Content::SetReferenceMask(referenceMaskIn);
+
+    if (referenceMaskCuda) {
+        cudaCommon_free(&referenceMaskCuda);
+        referenceMaskCuda = nullptr;
+    }
+
+    if (!referenceMask) return;
+
+    NR_CUDA_SAFE_CALL(cudaMalloc(&referenceMaskCuda, reference->nvox * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, referenceMask,
+                                 reference->nvox * sizeof(int), cudaMemcpyHostToDevice));
+}
+/* *************************************************************** */
+void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
+    Content::SetTransformationMatrix(transformationMatrixIn);
+
+    if (transformationMatrixCuda) {
+        cudaCommon_free(&transformationMatrixCuda);
+        transformationMatrixCuda = nullptr;
+    }
+
+    if (!transformationMatrix) return;
+
+    float *transformationMatrixCptr = (float*)malloc(sizeof(mat44));
+    mat44ToCptr(*transformationMatrix, transformationMatrixCptr);
+    cudaCommon_allocateArrayToDevice(&transformationMatrixCuda, sizeof(mat44) / sizeof(float));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrixCuda, transformationMatrixCptr, sizeof(mat44), cudaMemcpyHostToDevice));
+    free(transformationMatrixCptr);
+}
+/* *************************************************************** */
+nifti_image* CudaContent::GetWarped(int datatype, int index) {
+    DownloadImage(warped, warpedCuda[index], datatype);
+    return warped;
+}
+/* *************************************************************** */
+void CudaContent::SetWarped(nifti_image *warpedIn) {
+    Content::SetWarped(warpedIn);
+    DeallocateWarped();
+    if (!warped) return;
+
+    reg_tools_changeDatatype<float>(warped);
+    AllocateWarped();
+    cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[0], warped);
+    if (warpedCuda[1])
+        cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[1], warped);
+}
+/* *************************************************************** */
+template<class DataType>
+DataType CudaContent::CastImageData(float intensity, int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return static_cast<float>(intensity);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return static_cast<double>(intensity);
+        break;
+    case NIFTI_TYPE_UINT8:
+        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT16:
+        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    case NIFTI_TYPE_UINT32:
+        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        break;
+    default:
+        return static_cast<DataType>(reg_round(intensity));
+        break;
+    }
+}
+/* *************************************************************** */
+template<class DataType>
+void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) {
+    size_t size = image->nvox;
+    float *buffer = (float*)malloc(size * sizeof(float));
+
+    cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size);
+
+    free(image->data);
+    image->datatype = datatype;
+    image->nbyper = sizeof(DataType);
+    image->data = (void*)malloc(size * image->nbyper);
+    DataType* data = static_cast<DataType*>(image->data);
+    for (size_t i = 0; i < size; ++i)
+        data[i] = CastImageData<DataType>(buffer[i], datatype);
+    free(buffer);
+}
+/* *************************************************************** */
+void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
+    switch (datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        FillImageData<float>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        FillImageData<double>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT8:
+        FillImageData<unsigned char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT8:
+        FillImageData<char>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT16:
+        FillImageData<unsigned short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT16:
+        FillImageData<short>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_UINT32:
+        FillImageData<unsigned int>(image, memoryObject, datatype);
+        break;
+    case NIFTI_TYPE_INT32:
+        FillImageData<int>(image, memoryObject, datatype);
+        break;
+    default:
+        reg_print_fct_error("CudaContent::DownloadImage()");
+        reg_print_msg_error("Unsupported type");
+        break;
+    }
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
new file mode 100644
index 00000000..8f7161e0
--- /dev/null
+++ b/reg-lib/cuda/CudaContent.h
@@ -0,0 +1,54 @@
+#pragma once
+
+#include "Content.h"
+#include "CudaContextSingleton.h"
+#include "_reg_common_cuda.h"
+#include "_reg_tools.h"
+
+class CudaContent: public virtual Content {
+public:
+    CudaContent() = delete;
+    CudaContent(nifti_image *referenceIn,
+                nifti_image *floatingIn,
+                int *referenceMaskIn = nullptr,
+                mat44 *transformationMatrixIn = nullptr,
+                size_t bytesIn = sizeof(float));
+    virtual ~CudaContent();
+
+    virtual bool IsCurrentComputationDoubleCapable() override;
+
+    // Getters
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped(int datatype = 0, int index = 0) override;
+    virtual cudaArray** GetReferenceCuda() { return referenceCuda; }
+    virtual cudaArray** GetFloatingCuda() { return floatingCuda; }
+    virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
+    virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; }
+    virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
+    virtual float** GetWarpedCuda() { return warpedCuda; }
+
+    // Setters
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedIn) override;
+
+protected:
+    cudaArray *referenceCuda[2] = {nullptr};
+    cudaArray *floatingCuda[2] = {nullptr};
+    float4 *deformationFieldCuda = nullptr;
+    int *referenceMaskCuda = nullptr;
+    float *transformationMatrixCuda = nullptr;
+    float *warpedCuda[2] = {nullptr};
+
+private:
+    void AllocateImages();
+    void DeallocateImages();
+    void AllocateDeformationField();
+    void DeallocateDeformationField();
+    void AllocateWarped();
+    void DeallocateWarped();
+    template<class DataType> DataType CastImageData(float intensity, int datatype);
+    template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
+    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+};
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
new file mode 100644
index 00000000..499a670d
--- /dev/null
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -0,0 +1,137 @@
+#include "CudaF3dContent.h"
+
+/* *************************************************************** */
+CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
+                               nifti_image *floatingIn,
+                               nifti_image *controlPointGridIn,
+                               nifti_image *localWeightSimIn,
+                               int *referenceMaskIn,
+                               mat44 *transformationMatrixIn,
+                               size_t bytesIn) :
+    F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
+    SetControlPointGrid(controlPointGrid);
+    AllocateWarpedGradient();
+    AllocateTransformationGradient();
+    AllocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+CudaF3dContent::~CudaF3dContent() {
+    SetControlPointGrid(nullptr);
+    DeallocateWarpedGradient();
+    DeallocateTransformationGradient();
+    DeallocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+void CudaF3dContent::AllocateWarpedGradient() {
+    if (floating->nt >= 1)
+        NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[0], warpedGradient->nvox * sizeof(float4)));
+    if (floating->nt == 2)
+        NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[1], warpedGradient->nvox * sizeof(float4)));
+}
+/* *************************************************************** */
+void CudaF3dContent::DeallocateWarpedGradient() {
+    if (warpedGradientCuda[0] != nullptr) {
+        cudaCommon_free(&warpedGradientCuda[0]);
+        warpedGradientCuda[0] = nullptr;
+    }
+    if (warpedGradientCuda[1] != nullptr) {
+        cudaCommon_free(&warpedGradientCuda[1]);
+        warpedGradientCuda[1] = nullptr;
+    }
+}
+/* *************************************************************** */
+void CudaF3dContent::AllocateTransformationGradient() {
+    cudaCommon_allocateArrayToDevice(&transformationGradientCuda, controlPointGrid->dim);
+}
+/* *************************************************************** */
+void CudaF3dContent::DeallocateTransformationGradient() {
+    if (transformationGradientCuda) {
+        cudaCommon_free(&transformationGradientCuda);
+        transformationGradientCuda = nullptr;
+    }
+}
+/* *************************************************************** */
+void CudaF3dContent::AllocateVoxelBasedMeasureGradient() {
+    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, reference->dim);
+}
+/* *************************************************************** */
+void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() {
+    if (voxelBasedMeasureGradientCuda) {
+        cudaCommon_free(&voxelBasedMeasureGradientCuda);
+        voxelBasedMeasureGradientCuda = nullptr;
+    }
+}
+/* *************************************************************** */
+nifti_image* CudaF3dContent::GetControlPointGrid() {
+    cudaCommon_transferFromDeviceToNifti(controlPointGrid, &controlPointGridCuda);
+    return controlPointGrid;
+}
+/* *************************************************************** */
+void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) {
+    F3dContent::SetControlPointGrid(controlPointGridIn);
+
+    if (controlPointGridCuda) {
+        cudaCommon_free(&controlPointGridCuda);
+        controlPointGridCuda = nullptr;
+    }
+
+    if (!controlPointGrid) return;
+
+    cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim);
+    cudaCommon_transferNiftiToArrayOnDevice(&controlPointGridCuda, controlPointGrid);
+}
+/* *************************************************************** */
+nifti_image* CudaF3dContent::GetTransformationGradient() {
+    cudaCommon_transferFromDeviceToNifti(transformationGradient, &transformationGradientCuda);
+    return transformationGradient;
+}
+/* *************************************************************** */
+void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradientIn) {
+    F3dContent::SetTransformationGradient(transformationGradientIn);
+    DeallocateTransformationGradient();
+    if (!transformationGradient) return;
+
+    AllocateTransformationGradient();
+    cudaCommon_transferNiftiToArrayOnDevice(&transformationGradientCuda, transformationGradient);
+}
+/* *************************************************************** */
+nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() {
+    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, &voxelBasedMeasureGradientCuda);
+    return voxelBasedMeasureGradient;
+}
+/* *************************************************************** */
+void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) {
+    F3dContent::SetVoxelBasedMeasureGradient(voxelBasedMeasureGradientIn);
+    DeallocateVoxelBasedMeasureGradient();
+    if (!voxelBasedMeasureGradient) return;
+
+    AllocateVoxelBasedMeasureGradient();
+    cudaCommon_transferNiftiToArrayOnDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
+}
+/* *************************************************************** */
+nifti_image* CudaF3dContent::GetWarpedGradient() {
+    cudaCommon_transferFromDeviceToNifti(warpedGradient, &warpedGradientCuda[0]);
+    return warpedGradient;
+}
+/* *************************************************************** */
+void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) {
+    F3dContent::SetWarpedGradient(warpedGradientIn);
+    DeallocateWarpedGradient();
+    if (!warpedGradient) return;
+
+    AllocateWarpedGradient();
+    cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[0], warpedGradient);
+    if (warpedGradientCuda[1])
+        cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[1], warpedGradient);
+}
+/* *************************************************************** */
+void CudaF3dContent::ZeroTransformationGradient() {
+    cudaMemset(transformationGradientCuda, 0, transformationGradient->nvox * sizeof(float4));
+}
+/* *************************************************************** */
+void CudaF3dContent::ZeroVoxelBasedMeasureGradient() {
+    cudaMemset(voxelBasedMeasureGradientCuda, 0, voxelBasedMeasureGradient->nvox * sizeof(float4));
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
new file mode 100644
index 00000000..dfa6d222
--- /dev/null
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "F3dContent.h"
+#include "CudaContent.h"
+#include "_reg_blocksize_gpu.h"
+
+class CudaF3dContent: public F3dContent, public CudaContent {
+public:
+    CudaF3dContent() = delete;
+    CudaF3dContent(nifti_image *referenceIn,
+                   nifti_image *floatingIn,
+                   nifti_image *controlPointGridIn,
+                   nifti_image *localWeightSimIn,
+                   int *referenceMaskIn = nullptr,
+                   mat44 *transformationMatrixIn = nullptr,
+                   size_t bytesIn = sizeof(float));
+    virtual ~CudaF3dContent();
+
+    // Getters
+    virtual nifti_image* GetControlPointGrid() override;
+    virtual nifti_image* GetTransformationGradient() override;
+    virtual nifti_image* GetVoxelBasedMeasureGradient() override;
+    virtual nifti_image* GetWarpedGradient() override;
+    virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; }
+    virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; }
+    virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
+    virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; }
+
+    // Setters
+    virtual void SetControlPointGrid(nifti_image *controlPointGridIn) override;
+    virtual void SetTransformationGradient(nifti_image *transformationGradientIn) override;
+    virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) override;
+    virtual void SetWarpedGradient(nifti_image *warpedGradientIn) override;
+
+    // Auxiliary methods
+    virtual void ZeroTransformationGradient() override;
+    virtual void ZeroVoxelBasedMeasureGradient() override;
+
+protected:
+    float4 *controlPointGridCuda = nullptr;
+    float4 *transformationGradientCuda = nullptr;
+    float4 *voxelBasedMeasureGradientCuda = nullptr;
+    float4 *warpedGradientCuda[2] = {nullptr};
+
+private:
+    void AllocateWarpedGradient();
+    void DeallocateWarpedGradient();
+    void AllocateTransformationGradient();
+    void DeallocateTransformationGradient();
+    void AllocateVoxelBasedMeasureGradient();
+    void DeallocateVoxelBasedMeasureGradient();
+};

From 0c1958717bd67cc6136b358e6a90c2fe20930e80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:22:22 +0000
Subject: [PATCH 022/314] Initialise NiftyReg_CudaBlock in CudaContextSingleton

---
 niftyreg_build_version.txt            |  2 +-
 reg-lib/cuda/CudaContextSingleton.cpp | 38 ++++++++++-----------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b0d73241..fd03ab2a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-129
+130
diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp
index d3c0c165..ec968e6d 100644
--- a/reg-lib/cuda/CudaContextSingleton.cpp
+++ b/reg-lib/cuda/CudaContextSingleton.cpp
@@ -1,5 +1,6 @@
 #include "CudaContextSingleton.h"
 #include "_reg_common_cuda.h"
+#include "_reg_blocksize_gpu.h"
 
 /* *************************************************************** */
 CudaContextSingleton::CudaContextSingleton() {
@@ -25,8 +26,7 @@ void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) {
         reg_exit();
     }
     this->cudaIdx = cudaIdxIn;
-    NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
-    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx))
+    PickCard(this->cudaIdx);
 }
 /* *************************************************************** */
 CUcontext CudaContextSingleton::GetContext() {
@@ -37,10 +37,9 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
     struct cudaDeviceProp deviceProp;
     if (deviceId < this->numDevices) {
         this->cudaIdx = deviceId;
-        //
         NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
         NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx));
-        //
+
         cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
         if (deviceProp.major > 1) {
             this->isCardDoubleCapable = true;
@@ -49,7 +48,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
         } else {
             this->isCardDoubleCapable = false;
         }
-        //
+        NiftyReg_CudaBlock::GetInstance(deviceProp.major);
         return;
     }
 
@@ -67,8 +66,8 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
         ++current_device;
     }
     NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
-    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device))
-        NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
+    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device));
+    NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
 
     if (deviceProp.major < 1) {
         reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
@@ -85,25 +84,16 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
             reg_exit();
         }
 #ifndef NDEBUG
-        printf("[NiftyReg CUDA] The following device is used: %s\n",
-               deviceProp.name);
+        printf("[NiftyReg CUDA] The following device is used: %s\n", deviceProp.name);
         printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-               (unsigned long int)(free / (1024 * 1024)),
-               (unsigned long int)(total / (1024 * 1024)));
-        printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
-               deviceProp.major,
-               deviceProp.minor);
-        printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
-               deviceProp.sharedMemPerBlock);
-        printf("[NiftyReg CUDA] CUDA version %i\n",
-               CUDART_VERSION);
-        printf("[NiftyReg CUDA] Card clock rate: %i MHz\n",
-               deviceProp.clockRate / 1000);
-        printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
-               deviceProp.multiProcessorCount);
+               (unsigned long)(free / (1024 * 1024)), (unsigned long)(total / (1024 * 1024)));
+        printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", deviceProp.major, deviceProp.minor);
+        printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", deviceProp.sharedMemPerBlock);
+        printf("[NiftyReg CUDA] CUDA version %i\n", CUDART_VERSION);
+        printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000);
+        printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount);
 #endif
         this->cudaIdx = max_gflops_device;
-        //
         cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
         if (deviceProp.major > 1) {
             this->isCardDoubleCapable = true;
@@ -112,7 +102,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
         } else {
             this->isCardDoubleCapable = false;
         }
-        //
+        NiftyReg_CudaBlock::GetInstance(deviceProp.major);
     }
 }
 /* *************************************************************** */

From d4966ab89d3aaf1503378ae106c7a77e1562a4fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:23:27 +0000
Subject: [PATCH 023/314] Disable reg_f3d2 and reg_f3d_sym temporarily

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/CMakeLists.txt     | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fd03ab2a..a57f6ce7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-130
+131
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index a9b006d6..f927f247 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -213,10 +213,10 @@ set(_reg_f3d_files
   _reg_base.cpp
   _reg_f3d.h
   _reg_f3d.cpp
-  _reg_f3d2.h
-  _reg_f3d2.cpp
-  _reg_f3d_sym.h
-  _reg_f3d_sym.cpp
+  # _reg_f3d2.h
+  # _reg_f3d2.cpp
+  # _reg_f3d_sym.h
+  # _reg_f3d_sym.cpp
   cpu/CpuAffineDeformationFieldKernel.h
   cpu/CpuAffineDeformationFieldKernel.cpp
   cpu/CpuBlockMatchingKernel.h

From 9be440d38bbad1d31179725b689a7cff9c8b111e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:32:09 +0000
Subject: [PATCH 024/314] Make use of new Content and Compute classes to handle
 CPU and CUDA platforms on the same source code for reg_base and reg_f3d

---
 niftyreg_build_version.txt |    2 +-
 reg-apps/reg_f3d.cpp       | 1632 ++++++++++++++++--------------------
 reg-lib/Platform.cpp       |   40 +
 reg-lib/Platform.h         |    9 +
 reg-lib/_reg_base.cpp      | 1425 +++++++++++++++----------------
 reg-lib/_reg_base.h        |  111 +--
 reg-lib/_reg_f3d.cpp       |  978 ++++++++++-----------
 reg-lib/_reg_f3d.h         |   47 +-
 8 files changed, 1967 insertions(+), 2277 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a57f6ce7..94361d49 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-131
+132
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 7593edab..ddb74d4e 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -12,960 +12,796 @@
 
 #include "_reg_ReadWriteImage.h"
 #include "_reg_ReadWriteMatrix.h"
-#include "_reg_f3d2.h"
+#include "_reg_f3d.h"
 #include "reg_f3d.h"
 #include <float.h>
-//#include <libgen.h> //DOES NOT WORK ON WINDOWS !
-
-#ifdef _USE_CUDA
-#   include "_reg_f3d_gpu.h"
-#endif
+ //#include <libgen.h> //DOES NOT WORK ON WINDOWS !
 
 #ifdef _WIN32
 #   include <time.h>
 #endif
 
-void PetitUsage(char *exec)
-{
-   char text[255];
-   reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   reg_print_msg_error("Fast Free-Form Deformation algorithm for non-rigid registration");
-   sprintf(text,"Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]",exec);
-   reg_print_msg_error(text);
-   reg_print_msg_error("\tSee the help for more details (-h)");
-   reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   return;
+// OpenCL isn't supported!
+#undef _USE_OPENCL
+
+void PetitUsage(char *exec) {
+    char text[255];
+    reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    reg_print_msg_error("Fast Free-Form Deformation algorithm for non-rigid registration");
+    sprintf(text, "Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]", exec);
+    reg_print_msg_error(text);
+    reg_print_msg_error("\tSee the help for more details (-h)");
+    reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    return;
 }
-void Usage(char *exec)
-{
-   char text[255];
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration.");
-   reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using");
-   reg_print_info(exec, "graphics processing units\", CMPB, 2010");
-   reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].",exec);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "\t-ref <filename>\tFilename of the reference image (mandatory)");
-   reg_print_info(exec, "\t-flo <filename>\tFilename of the floating image (mandatory)");
-   reg_print_info(exec, "***************");
-   reg_print_info(exec, "*** OPTIONS ***");
-   reg_print_info(exec, "***************");
-   reg_print_info(exec, "*** Initial transformation options (One option will be considered):");
-   reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains an affine transformation (Affine*Reference=Floating)");
-   reg_print_info(exec, "\t-incpp <filename>\tFilename ofloatf control point grid input");
-   reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file.");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Output options:");
-   reg_print_info(exec, "\t-cpp <filename>\t\tFilename of control point grid [outputCPP.nii]");
-   reg_print_info(exec, "\t-res <filename> \tFilename of the resampled image [outputResult.nii]");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Input image options:");
-   reg_print_info(exec, "\t-rmask <filename>\t\tFilename of a mask image in the reference space");
-   reg_print_info(exec, "\t-smooR <float>\t\t\tSmooth the reference image using the specified sigma (mm) [0]");
-   reg_print_info(exec, "\t-smooF <float>\t\t\tSmooth the floating image using the specified sigma (mm) [0]");
-   reg_print_info(exec, "\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-   reg_print_info(exec, "\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-   reg_print_info(exec, "\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-   reg_print_info(exec, "\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-   reg_print_info(exec, "\t-rLwTh <timepoint> <float>\tLower threshold to apply to the reference image intensities [none]*");
-   reg_print_info(exec, "\t-rUpTh <timepoint> <float>\tUpper threshold to apply to the reference image intensities [none]*");
-   reg_print_info(exec, "\t-fLwTh <timepoint> <float>\tLower threshold to apply to the floating image intensities [none]*");
-   reg_print_info(exec, "\t-fUpTh <timepoint> <float>\tUpper threshold to apply to the floating image intensities [none]*");
-   reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Spline options (All defined at full resolution):");
-   reg_print_info(exec, "\t-sx <float>\t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]");
-   reg_print_info(exec, "\t-sy <float>\t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]");
-   reg_print_info(exec, "\t-sz <float>\t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Regularisation options:");
-   reg_print_info(exec, "\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
-   reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]");
-   reg_print_info(exec, "\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
-   reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
-   reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
-   reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)");
-   reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as");
-   reg_print_info(exec, "\t\t\t\t<refX> <refY> <refZ> <floX> <floY> <floZ>\\n for 3D images and");
-   reg_print_info(exec, "\t\t\t\t<refX> <refY> <floX> <floY>\\n for 2D images");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Measure of similarity options:");
-   reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise");
-   reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified");
-   reg_print_info(exec, "\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint");
-   reg_print_info(exec, "\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint");
-   reg_print_info(exec, "\t-rbn <tp> <int>\t\tNMI. Number of bin to use for the reference image histogram for the specified time point");
-   reg_print_info(exec, "\t-fbn <tp> <int>\t\tNMI. Number of bin to use for the floating image histogram for the specified time point");
-   reg_print_info(exec, "\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint");
-   reg_print_info(exec, "\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint");
-   reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure");
-   reg_print_info(exec, "\t-ssd <tp> \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure");
-   reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure");
-   reg_print_info(exec, "\t-ssdn <tp> \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure");
-   reg_print_info(exec, "\t--mind <offset>\t\tMIND and the offset to use to compute the descriptor");
-   reg_print_info(exec, "\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
-   reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points");
-   reg_print_info(exec, "\t-kld <tp>\t\tKLD. Used for the specified timepoint");
-   reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
-   reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
-   reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity");
-   reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1");
-   reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1");
-   reg_print_info(exec, "\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint");
-   reg_print_info(exec, "\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint");
-   reg_print_info(exec, "\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint");
-   reg_print_info(exec, "\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
-   reg_print_info(exec, "\t-wSim <filename>\tWeight to apply to the measure of simillarity at each voxel position");
+void Usage(char *exec) {
+    char text[255];
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration.");
+    reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using");
+    reg_print_info(exec, "graphics processing units\", CMPB, 2010");
+    reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].", exec);
+    reg_print_info(exec, text);
+    reg_print_info(exec, "\t-ref <filename>\tFilename of the reference image (mandatory)");
+    reg_print_info(exec, "\t-flo <filename>\tFilename of the floating image (mandatory)");
+    reg_print_info(exec, "***************");
+    reg_print_info(exec, "*** OPTIONS ***");
+    reg_print_info(exec, "***************");
+    reg_print_info(exec, "*** Initial transformation options (One option will be considered):");
+    reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains an affine transformation (Affine*Reference=Floating)");
+    reg_print_info(exec, "\t-incpp <filename>\tFilename ofloatf control point grid input");
+    reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file.");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Output options:");
+    reg_print_info(exec, "\t-cpp <filename>\t\tFilename of control point grid [outputCPP.nii]");
+    reg_print_info(exec, "\t-res <filename> \tFilename of the resampled image [outputResult.nii]");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Input image options:");
+    reg_print_info(exec, "\t-rmask <filename>\t\tFilename of a mask image in the reference space");
+    reg_print_info(exec, "\t-smooR <float>\t\t\tSmooth the reference image using the specified sigma (mm) [0]");
+    reg_print_info(exec, "\t-smooF <float>\t\t\tSmooth the floating image using the specified sigma (mm) [0]");
+    reg_print_info(exec, "\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
+    reg_print_info(exec, "\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
+    reg_print_info(exec, "\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
+    reg_print_info(exec, "\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
+    reg_print_info(exec, "\t-rLwTh <timepoint> <float>\tLower threshold to apply to the reference image intensities [none]*");
+    reg_print_info(exec, "\t-rUpTh <timepoint> <float>\tUpper threshold to apply to the reference image intensities [none]*");
+    reg_print_info(exec, "\t-fLwTh <timepoint> <float>\tLower threshold to apply to the floating image intensities [none]*");
+    reg_print_info(exec, "\t-fUpTh <timepoint> <float>\tUpper threshold to apply to the floating image intensities [none]*");
+    reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Spline options (All defined at full resolution):");
+    reg_print_info(exec, "\t-sx <float>\t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]");
+    reg_print_info(exec, "\t-sy <float>\t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]");
+    reg_print_info(exec, "\t-sz <float>\t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Regularisation options:");
+    reg_print_info(exec, "\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
+    reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]");
+    reg_print_info(exec, "\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
+    reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
+    reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
+    reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)");
+    reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as");
+    reg_print_info(exec, "\t\t\t\t<refX> <refY> <refZ> <floX> <floY> <floZ>\\n for 3D images and");
+    reg_print_info(exec, "\t\t\t\t<refX> <refY> <floX> <floY>\\n for 2D images");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Measure of similarity options:");
+    reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise");
+    reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified");
+    reg_print_info(exec, "\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint");
+    reg_print_info(exec, "\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint");
+    reg_print_info(exec, "\t-rbn <tp> <int>\t\tNMI. Number of bin to use for the reference image histogram for the specified time point");
+    reg_print_info(exec, "\t-fbn <tp> <int>\t\tNMI. Number of bin to use for the floating image histogram for the specified time point");
+    reg_print_info(exec, "\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint");
+    reg_print_info(exec, "\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint");
+    reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure");
+    reg_print_info(exec, "\t-ssd <tp> \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure");
+    reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure");
+    reg_print_info(exec, "\t-ssdn <tp> \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure");
+    reg_print_info(exec, "\t--mind <offset>\t\tMIND and the offset to use to compute the descriptor");
+    reg_print_info(exec, "\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
+    reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points");
+    reg_print_info(exec, "\t-kld <tp>\t\tKLD. Used for the specified timepoint");
+    reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
+    reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
+    reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity");
+    reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1");
+    reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1");
+    reg_print_info(exec, "\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint");
+    reg_print_info(exec, "\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint");
+    reg_print_info(exec, "\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint");
+    reg_print_info(exec, "\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
+    reg_print_info(exec, "\t-wSim <filename>\tWeight to apply to the measure of similarity at each voxel position");
 
 
-   //   reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Optimisation options:");
-   reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iteration at the final level [150]");
-   reg_print_info(exec, "\t-ln <int>\t\tNumber of level to perform [3]");
-   reg_print_info(exec, "\t-lp <int>\t\tOnly perform the first levels [ln]");
-   reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach");
-   reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent");
-   reg_print_info(exec, "\t-pert <int>\t\tTo add perturbation step(s) after each optimisation scheme");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** F3D2 options:");
-   reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation");
-   reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation");
-   reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
-   reg_print_info(exec, "");
+    //   reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Optimisation options:");
+    reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iteration at the final level [150]");
+    reg_print_info(exec, "\t-ln <int>\t\tNumber of level to perform [3]");
+    reg_print_info(exec, "\t-lp <int>\t\tOnly perform the first levels [ln]");
+    reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach");
+    reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent");
+    reg_print_info(exec, "\t-pert <int>\t\tTo add perturbation step(s) after each optimisation scheme");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** F3D2 options:");
+    reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation");
+    reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation");
+    reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
+    reg_print_info(exec, "");
 
-   reg_print_info(exec, "*** Platform options:");
-//#if defined(_USE_CUDA) && defined(_USE_OPENCL)
-//   reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
-//#else
+    reg_print_info(exec, "*** Platform options:");
+#if defined(_USE_CUDA) && defined(_USE_OPENCL)
+    reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
+#else
 #ifdef _USE_CUDA
-   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
+    reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
+#endif
+#ifdef _USE_OPENCL
+    reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]");
+#endif
+#endif
+#if defined(_USE_CUDA) || defined(_USE_OPENCL)
+    reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
+    reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
 #endif
-//#ifdef _USE_OPENCL
-//   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]");
-//#endif
-//#endif
-//#if defined(_USE_CUDA) || defined(_USE_OPENCL)
-//   reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
-//   reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
-//#endif
 
 #if defined (_OPENMP)
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** OpenMP-related options:");
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   sprintf(text,"\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
-           defaultOpenMPValue, omp_get_num_procs());
-   reg_print_info(exec, text);
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** OpenMP-related options:");
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    sprintf(text, "\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
+            defaultOpenMPValue, omp_get_num_procs());
+    reg_print_info(exec, text);
 #endif
-   reg_print_info(exec, "");
-   reg_print_info(exec, "*** Other options:");
-   reg_print_info(exec, "\t-smoothGrad <float>\tTo smooth the metric derivative (in mm) [0]");
-   reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
-   reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off");
-   reg_print_info(exec, "\t--version\t\tPrint current version and exit");
-   sprintf(text, "\t\t\t\t(%s)",NR_VERSION);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   return;
+    reg_print_info(exec, "");
+    reg_print_info(exec, "*** Other options:");
+    reg_print_info(exec, "\t-smoothGrad <float>\tTo smooth the metric derivative (in mm) [0]");
+    reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
+    reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off");
+    reg_print_info(exec, "\t--version\t\tPrint current version and exit");
+    sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
+    reg_print_info(exec, text);
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    return;
 }
 
-int main(int argc, char **argv)
-{
-   if(argc==1)
-   {
-      PetitUsage((argv[0]));
-      return EXIT_FAILURE;
-   }
-   time_t start;
-   time(&start);
-   int verbose=true;
+int main(int argc, char **argv) {
+    if (argc == 1) {
+        PetitUsage((argv[0]));
+        return EXIT_FAILURE;
+    }
+    time_t start;
+    time(&start);
+    int verbose = true;
 
 #if defined (_OPENMP)
-   // Set the default number of thread
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   omp_set_num_threads(defaultOpenMPValue);
+    // Set the default number of thread
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    omp_set_num_threads(defaultOpenMPValue);
 #endif
 
-   std::string text;
-   //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-   // Check if any information is required
-   for(int i=1; i<argc; i++)
-   {
-      if(strcmp(argv[i],"-h")==0 ||
-            strcmp(argv[i],"-H")==0 ||
-            strcmp(argv[i],"-help")==0 ||
-            strcmp(argv[i],"--help")==0 ||
-            strcmp(argv[i],"-HELP")==0 ||
-            strcmp(argv[i],"--HELP")==0 ||
-            strcmp(argv[i],"-Help")==0 ||
-            strcmp(argv[i],"--Help")==0
-        )
-      {
-         Usage((argv[0]));
-         return EXIT_SUCCESS;
-      }
-      if(strcmp(argv[i], "--xml")==0)
-      {
-         printf("%s",xml_f3d);
-         return EXIT_SUCCESS;
-      }
-      if(strcmp(argv[i], "-voff")==0)
-      {
+    std::string text;
+    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
+    // Check if any information is required
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-h") == 0 ||
+            strcmp(argv[i], "-H") == 0 ||
+            strcmp(argv[i], "-help") == 0 ||
+            strcmp(argv[i], "--help") == 0 ||
+            strcmp(argv[i], "-HELP") == 0 ||
+            strcmp(argv[i], "--HELP") == 0 ||
+            strcmp(argv[i], "-Help") == 0 ||
+            strcmp(argv[i], "--Help") == 0
+            ) {
+            Usage((argv[0]));
+            return EXIT_SUCCESS;
+        }
+        if (strcmp(argv[i], "--xml") == 0) {
+            printf("%s", xml_f3d);
+            return EXIT_SUCCESS;
+        }
+        if (strcmp(argv[i], "-voff") == 0) {
 #ifndef NDEBUG
-         reg_print_msg_debug("The verbose cannot be switch off in debug");
+            reg_print_msg_debug("The verbose cannot be switch off in debug");
 #else
-         verbose=false;
+            verbose = false;
 #endif
-      }
-      if( strcmp(argv[i], "-version")==0 ||
-          strcmp(argv[i], "-Version")==0 ||
-          strcmp(argv[i], "-V")==0 ||
-          strcmp(argv[i], "-v")==0 ||
-          strcmp(argv[i], "--v")==0 ||
-          strcmp(argv[i], "--version")==0)
-      {
-         printf("%s\n",NR_VERSION);
-         return EXIT_SUCCESS;
-      }
-   }
-   //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-   // Output the command line
+        }
+        if (strcmp(argv[i], "-version") == 0 ||
+            strcmp(argv[i], "-Version") == 0 ||
+            strcmp(argv[i], "-V") == 0 ||
+            strcmp(argv[i], "-v") == 0 ||
+            strcmp(argv[i], "--v") == 0 ||
+            strcmp(argv[i], "--version") == 0) {
+            printf("%s\n", NR_VERSION);
+            return EXIT_SUCCESS;
+        }
+    }
+    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
+    // Output the command line
 #ifdef NDEBUG
-   if(verbose)
-   {
+    if (verbose) {
 #endif
-      reg_print_info((argv[0]), "");
-      reg_print_info((argv[0]), "Command line:");
-      text = "\t";
-      for(int i=0; i<argc; i++) {
-        text = stringFormat("%s %s", text.c_str(), argv[i]);
-      }
-      reg_print_info((argv[0]), text.c_str());
-      reg_print_info((argv[0]), "");
+        reg_print_info((argv[0]), "");
+        reg_print_info((argv[0]), "Command line:");
+        text = "\t";
+        for (int i = 0; i < argc; i++) {
+            text = stringFormat("%s %s", text.c_str(), argv[i]);
+        }
+        reg_print_info((argv[0]), text.c_str());
+        reg_print_info((argv[0]), "");
 #ifdef NDEBUG
-   }
+    }
 #endif
 
-   //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-   // Read the reference and floating image
-   nifti_image *referenceImage=nullptr;
-   nifti_image *floatingImage=nullptr;
-   for(int i=1; i<argc; i++)
-   {
-      if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) || (strcmp(argv[i],"--ref")==0))
-      {
-         referenceImage=reg_io_ReadImageFile(argv[++i]);
-         if(referenceImage==nullptr)
-         {
-            reg_print_msg_error("Error when reading the reference image:");
-            reg_print_msg_error(argv[i-1]);
-            return EXIT_FAILURE;
-         }
-      }
-      if((strcmp(argv[i],"-flo")==0) || (strcmp(argv[i],"-source")==0) || (strcmp(argv[i],"--flo")==0))
-      {
-         floatingImage=reg_io_ReadImageFile(argv[++i]);
-         if(floatingImage==nullptr)
-         {
-            reg_print_msg_error("Error when reading the floating image:");
-            reg_print_msg_error(argv[i-1]);
-            return EXIT_FAILURE;
-         }
-      }
-   }
-   // Check that both reference and floating image have been defined
-   if(referenceImage==nullptr)
-   {
-      reg_print_msg_error("Error. No reference image has been defined");
-      PetitUsage((argv[0]));
-      return EXIT_FAILURE;
-   }
-   // Read the floating image
-   if(floatingImage==nullptr)
-   {
-      reg_print_msg_error("Error. No floating image has been defined");
-      PetitUsage((argv[0]));
-      return EXIT_FAILURE;
-   }
-   //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
-   // Check the type of registration object to create
-#ifdef _USE_CUDA
-   CUcontext ctx;
-#endif // _USE_CUDA
-   reg_f3d<float> *REG=nullptr;
-   float *referenceLandmark=nullptr;
-   float *floatingLandmark=nullptr;
-   for(int i=1; i<argc; i++)
-   {
-      if(strcmp(argv[i], "-vel")==0 || strcmp(argv[i], "--vel")==0)
-      {
-         REG=new reg_f3d2<float>(referenceImage->nt,floatingImage->nt);
-         break;
-      }
-      if(strcmp(argv[i], "-sym")==0 || strcmp(argv[i], "--sym")==0)
-      {
-         REG=new reg_f3d_sym<float>(referenceImage->nt,floatingImage->nt);
-         break;
-      }
-#ifdef _USE_CUDA
-      if (strcmp(argv[i], "-gpu") == 0 || strcmp(argv[i], "-mem") == 0) {
-         // Set up the cuda card and display some relevant information and check if the card is suitable
-         if (cudaCommon_setCUDACard(&ctx, true)) {
-            fprintf(stderr, "\n[NiftyReg CUDA ERROR] Error while detecting a CUDA card\n");
-            fprintf(stderr, "[NiftyReg CUDA WARNING] GPU implementation has been turned off.\n");
-         } else
-            REG = new reg_f3d_gpu(referenceImage->nt, floatingImage->nt);
-         break;
-      }
-#endif // _USE_CUDA
-   }
-   if(REG==nullptr)
-      REG=new reg_f3d<float>(referenceImage->nt,floatingImage->nt);
-   REG->SetReferenceImage(referenceImage);
-   REG->SetFloatingImage(floatingImage);
+    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
+    // Read the reference and floating image
+    nifti_image *referenceImage = nullptr;
+    nifti_image *floatingImage = nullptr;
+    for (int i = 1; i < argc; i++) {
+        if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) {
+            referenceImage = reg_io_ReadImageFile(argv[++i]);
+            if (referenceImage == nullptr) {
+                reg_print_msg_error("Error when reading the reference image:");
+                reg_print_msg_error(argv[i - 1]);
+                return EXIT_FAILURE;
+            }
+        }
+        if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) {
+            floatingImage = reg_io_ReadImageFile(argv[++i]);
+            if (floatingImage == nullptr) {
+                reg_print_msg_error("Error when reading the floating image:");
+                reg_print_msg_error(argv[i - 1]);
+                return EXIT_FAILURE;
+            }
+        }
+    }
+    // Check that both reference and floating image have been defined
+    if (referenceImage == nullptr) {
+        reg_print_msg_error("Error. No reference image has been defined");
+        PetitUsage((argv[0]));
+        return EXIT_FAILURE;
+    }
+    // Read the floating image
+    if (floatingImage == nullptr) {
+        reg_print_msg_error("Error. No floating image has been defined");
+        PetitUsage((argv[0]));
+        return EXIT_FAILURE;
+    }
+    //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
+    // Check the type of registration object to create
+    reg_f3d<float> *reg = nullptr;
+    float *referenceLandmark = nullptr;
+    float *floatingLandmark = nullptr;
+    int platformFlag = NR_PLATFORM_CPU;
+    unsigned gpuIdx = 999;
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
+            // reg = new reg_f3d2<float>(referenceImage->nt, floatingImage->nt);
+            break;
+        }
+        if (strcmp(argv[i], "-sym") == 0 || strcmp(argv[i], "--sym") == 0) {
+            // reg = new reg_f3d_sym<float>(referenceImage->nt, floatingImage->nt);
+            break;
+        } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
+            int value = atoi(argv[++i]);
+            if (value < NR_PLATFORM_CPU || value > NR_PLATFORM_CL) {
+                reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
+                return EXIT_FAILURE;
+            }
+#ifndef _USE_CUDA
+            if (value == NR_PLATFORM_CUDA) {
+                reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
+                reg_print_msg_warn("The CPU platform is used");
+                value = 0;
+            }
+#endif
+#ifndef _USE_OPENCL
+            if (value == NR_PLATFORM_CL) {
+                reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
+                reg_print_msg_warn("The CPU platform is used");
+                value = 0;
+            }
+#endif
+            platformFlag = value;
+        } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
+            gpuIdx = unsigned(atoi(argv[++i]));
+        }
+    }
+    if (reg == nullptr)
+        reg = new reg_f3d<float>(referenceImage->nt, floatingImage->nt);
+    reg->SetReferenceImage(referenceImage);
+    reg->SetFloatingImage(floatingImage);
+    reg->SetPlatformCode(platformFlag);
+    reg->SetGpuIdx(gpuIdx);
 
-   // Create some pointers that could be used
-   mat44 affineMatrix;
-   nifti_image *inputCCPImage=nullptr;
-   nifti_image *referenceMaskImage=nullptr;
-   nifti_image *floatingMaskImage=nullptr;
-   nifti_image *refLocalWeightSim=nullptr;
-   char *outputWarpedImageName=nullptr;
-   char *outputCPPImageName=nullptr;
-   bool useMeanLNCC=false;
-   int refBinNumber=0;
-   int floBinNumber=0;
+    // Create some pointers that could be used
+    mat44 affineMatrix;
+    nifti_image *inputCCPImage = nullptr;
+    nifti_image *referenceMaskImage = nullptr;
+    nifti_image *floatingMaskImage = nullptr;
+    nifti_image *refLocalWeightSim = nullptr;
+    char *outputWarpedImageName = nullptr;
+    char *outputCPPImageName = nullptr;
+    bool useMeanLNCC = false;
+    int refBinNumber = 0;
+    int floBinNumber = 0;
 
-   /* read the input parameter */
-   for(int i=1; i<argc; i++)
-   {
-      if(strcmp(argv[i],"-ref")==0 || strcmp(argv[i],"-target")==0 ||
-            strcmp(argv[i],"--ref")==0 || strcmp(argv[i],"-flo")==0 ||
-            strcmp(argv[i],"-source")==0 || strcmp(argv[i],"--flo")==0 )
-      {
-         // argument has already been parsed
-         ++i;
-      }
-      else if(strcmp(argv[i], "-voff")==0)
-      {
-         verbose=false;
-         REG->DoNotPrintOutInformation();
-      }
-      else if(strcmp(argv[i], "-aff")==0 || (strcmp(argv[i],"--aff")==0))
-      {
-         // Check first if the specified affine file exist
-         char *affineTransformationName=argv[++i];
-         if(FILE *aff=fopen(affineTransformationName, "r"))
-         {
-            fclose(aff);
-         }
-         else
-         {
-            reg_print_msg_error("The specified input affine file can not be read:");
-            reg_print_msg_error(affineTransformationName);
-            return EXIT_FAILURE;
-         }
-         // Read the affine matrix
-         reg_tool_ReadAffineFile(&affineMatrix,
-                                 affineTransformationName);
-         // Send the transformation to the registration object
-         REG->SetAffineTransformation(&affineMatrix);
-      }
-      else if(strcmp(argv[i], "-incpp")==0 || (strcmp(argv[i],"--incpp")==0))
-      {
-         inputCCPImage=reg_io_ReadImageFile(argv[++i]);
-         if(inputCCPImage==nullptr)
-         {
-            reg_print_msg_error("Error when reading the input control point grid image:");
-            reg_print_msg_error(argv[i-1]);
-            return EXIT_FAILURE;
-         }
-         REG->SetControlPointGridImage(inputCCPImage);
-      }
-      else if((strcmp(argv[i],"-rmask")==0) || (strcmp(argv[i],"-tmask")==0) || (strcmp(argv[i],"--rmask")==0))
-      {
-         referenceMaskImage=reg_io_ReadImageFile(argv[++i]);
-         if(referenceMaskImage==nullptr)
-         {
-            reg_print_msg_error("Error when reading the reference mask image:");
-            reg_print_msg_error(argv[i-1]);
-            return EXIT_FAILURE;
-         }
-         REG->SetReferenceMask(referenceMaskImage);
-      }
-      else if((strcmp(argv[i],"-res")==0) || (strcmp(argv[i],"-result")==0) || (strcmp(argv[i],"--res")==0))
-      {
-         outputWarpedImageName=argv[++i];
-      }
-      else if(strcmp(argv[i], "-cpp")==0 || (strcmp(argv[i],"--cpp")==0))
-      {
-         outputCPPImageName=argv[++i];
-      }
-      else if(strcmp(argv[i], "-maxit")==0 || strcmp(argv[i], "--maxit")==0)
-      {
-         REG->SetMaximalIterationNumber(atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-sx")==0 || strcmp(argv[i], "--sx")==0)
-      {
-         REG->SetSpacing(0,(float)atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-sy")==0 || strcmp(argv[i], "--sy")==0)
-      {
-         REG->SetSpacing(1,(float)atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-sz")==0 || strcmp(argv[i], "--sz")==0)
-      {
-         REG->SetSpacing(2,(float)atof(argv[++i]));
-      }
-      else if((strcmp(argv[i],"--nmi")==0) )
-      {
-         int bin=64;
-         if(refBinNumber!=0)
-            bin=refBinNumber;
-         for(int t=0; t<referenceImage->nt; ++t)
-            REG->UseNMISetReferenceBinNumber(t,bin);
-         bin=64;
-         if(floBinNumber!=0)
-            bin=floBinNumber;
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->UseNMISetFloatingBinNumber(t,bin);
-      }
-      else if((strcmp(argv[i],"-rbn")==0) || (strcmp(argv[i],"-tbn")==0))
-      {
-         int tp=atoi(argv[++i]);
-         int bin=atoi(argv[++i]);
-         refBinNumber=bin;
-         REG->UseNMISetReferenceBinNumber(tp,bin);
-      }
-      else if((strcmp(argv[i],"--rbn")==0) )
-      {
-         int bin = atoi(argv[++i]);
-         refBinNumber=bin;
-         for(int t=0; t<referenceImage->nt; ++t)
-            REG->UseNMISetReferenceBinNumber(t,bin);
-      }
-      else if((strcmp(argv[i],"-fbn")==0) || (strcmp(argv[i],"-sbn")==0))
-      {
-         int tp=atoi(argv[++i]);
-         int bin=atoi(argv[++i]);
-         floBinNumber=bin;
-         REG->UseNMISetFloatingBinNumber(tp,bin);
-      }
-      else if((strcmp(argv[i],"--fbn")==0) )
-      {
-         int bin = atoi(argv[++i]);
-         floBinNumber=bin;
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->UseNMISetFloatingBinNumber(t,bin);
-      }
-      else if(strcmp(argv[i], "-ln")==0 || strcmp(argv[i], "--ln")==0)
-      {
-         REG->SetLevelNumber(atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-lp")==0 || strcmp(argv[i], "--lp")==0)
-      {
-         REG->SetLevelToPerform(atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-be")==0 || strcmp(argv[i], "--be")==0)
-      {
-         REG->SetBendingEnergyWeight(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-le")==0 || strcmp(argv[i], "--le")==0)
-      {
-         REG->SetLinearEnergyWeight(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-jl")==0 || strcmp(argv[i], "--jl")==0)
-      {
-         REG->SetJacobianLogWeight(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-noAppJL")==0 || strcmp(argv[i], "--noAppJL")==0)
-      {
-         REG->DoNotApproximateJacobianLog();
-      }
-      else if(strcmp(argv[i], "-land")==0 ||strcmp(argv[i], "--land")==0)
-      {
-         float weight = atof(argv[++i]);
-         char *filename = argv[++i];
-         std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(filename);
-         size_t landmarkNumber = inputMatrixSize.first;
-         size_t n = inputMatrixSize.second;
-         if(n==4 && referenceImage->nz>1){
-            reg_print_msg_error("4 values per line are expected for 2D images");
-            return EXIT_FAILURE;
-         }
-         else if(n==6 && referenceImage->nz<2){
-            reg_print_msg_error("6 values per line are expected for 3D images");
-            return EXIT_FAILURE;
-         }
-         else if(n!=4 && n!=6){
-            reg_print_msg_error("4 or 6 values are expected per line");
-            return EXIT_FAILURE;
-         }
-         float **allLandmarks = reg_tool_ReadMatrixFile<float>(filename, landmarkNumber, n);
-         referenceLandmark=(float *)malloc(landmarkNumber * n/2 * sizeof(float));
-         floatingLandmark=(float *)malloc(landmarkNumber * n/2 * sizeof(float));
-         for(size_t l=0, index=0;l<landmarkNumber;++l){
-            referenceLandmark[index]=allLandmarks[l][0];
-            referenceLandmark[index+1]=allLandmarks[l][1];
-            if(n==4){
-               floatingLandmark[index]=allLandmarks[l][2];
-               floatingLandmark[index+1]=allLandmarks[l][3];
-               index+=2;
+    /* read the input parameter */
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 ||
+            strcmp(argv[i], "--ref") == 0 || strcmp(argv[i], "-flo") == 0 ||
+            strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "--flo") == 0 ||
+            strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
+            // argument has already been parsed
+            ++i;
+        } else if (strcmp(argv[i], "-voff") == 0) {
+            verbose = false;
+            reg->DoNotPrintOutInformation();
+        } else if (strcmp(argv[i], "-aff") == 0 || (strcmp(argv[i], "--aff") == 0)) {
+            // Check first if the specified affine file exist
+            char *affineTransformationName = argv[++i];
+            if (FILE *aff = fopen(affineTransformationName, "r")) {
+                fclose(aff);
+            } else {
+                reg_print_msg_error("The specified input affine file can not be read:");
+                reg_print_msg_error(affineTransformationName);
+                return EXIT_FAILURE;
             }
-            else{
-               referenceLandmark[index+2]=allLandmarks[l][2];
-               floatingLandmark[index]=allLandmarks[l][3];
-               floatingLandmark[index+1]=allLandmarks[l][4];
-               floatingLandmark[index+2]=allLandmarks[l][5];
-               index+=3;
+            // Read the affine matrix
+            reg_tool_ReadAffineFile(&affineMatrix,
+                                    affineTransformationName);
+            // Send the transformation to the registration object
+            reg->SetAffineTransformation(&affineMatrix);
+        } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) {
+            inputCCPImage = reg_io_ReadImageFile(argv[++i]);
+            if (inputCCPImage == nullptr) {
+                reg_print_msg_error("Error when reading the input control point grid image:");
+                reg_print_msg_error(argv[i - 1]);
+                return EXIT_FAILURE;
             }
-         }
-         REG->SetLandmarkRegularisationParam(landmarkNumber,
-                                             referenceLandmark,
-                                             floatingLandmark,
-                                             weight);
-         for(size_t l=0; l<landmarkNumber; ++l)
-            free(allLandmarks[l]);
-         free(allLandmarks);
-      }
-      else if((strcmp(argv[i],"-smooR")==0) || (strcmp(argv[i],"-smooT")==0) || strcmp(argv[i], "--smooR")==0)
-      {
-         REG->SetReferenceSmoothingSigma(atof(argv[++i]));
-      }
-      else if((strcmp(argv[i],"-smooF")==0) || (strcmp(argv[i],"-smooS")==0) || strcmp(argv[i], "--smooF")==0)
-      {
-         REG->SetFloatingSmoothingSigma(atof(argv[++i]));
-      }
-      else if((strcmp(argv[i],"-rLwTh")==0) || (strcmp(argv[i],"-tLwTh")==0))
-      {
-         int tp=atoi(argv[++i]);
-         float val=atof(argv[++i]);
-         REG->SetReferenceThresholdLow(tp,val);
-      }
-      else if((strcmp(argv[i],"-rUpTh")==0) || strcmp(argv[i],"-tUpTh")==0)
-      {
-         int tp=atoi(argv[++i]);
-         float val=atof(argv[++i]);
-         REG->SetReferenceThresholdUp(tp,val);
-      }
-      else if((strcmp(argv[i],"-fLwTh")==0) || (strcmp(argv[i],"-sLwTh")==0))
-      {
-         int tp=atoi(argv[++i]);
-         float val=atof(argv[++i]);
-         REG->SetFloatingThresholdLow(tp,val);
-      }
-      else if((strcmp(argv[i],"-fUpTh")==0) || (strcmp(argv[i],"-sUpTh")==0))
-      {
-         int tp=atoi(argv[++i]);
-         float val=atof(argv[++i]);
-         REG->SetFloatingThresholdUp(tp,val);
-      }
-      else if((strcmp(argv[i],"--rLwTh")==0) )
-      {
-         float threshold = atof(argv[++i]);
-         for(int t=0; t<referenceImage->nt; ++t)
-            REG->SetReferenceThresholdLow(t,threshold);
-      }
-      else if((strcmp(argv[i],"--rUpTh")==0) )
-      {
-         float threshold = atof(argv[++i]);
-         for(int t=0; t<referenceImage->nt; ++t)
-            REG->SetReferenceThresholdUp(t,threshold);
-      }
-      else if((strcmp(argv[i],"--fLwTh")==0) )
-      {
-         float threshold = atof(argv[++i]);
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->SetFloatingThresholdLow(t,threshold);
-      }
-      else if((strcmp(argv[i],"--fUpTh")==0) )
-      {
-         float threshold = atof(argv[++i]);
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->SetFloatingThresholdUp(t,threshold);
-      }
-      else if(strcmp(argv[i], "-smoothGrad")==0)
-      {
-         REG->SetGradientSmoothingSigma(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "--smoothGrad")==0)
-      {
-         REG->SetGradientSmoothingSigma(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-ssd")==0)
-      {
-         int timepoint = atoi(argv[++i]);
-         bool normalise = 1;
-         REG->UseSSD(timepoint, normalise);
-      }
-      else if(strcmp(argv[i], "--ssd")==0)
-      {
-         bool normalise = 1;
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->UseSSD(t, normalise);
-      }
-      else if(strcmp(argv[i], "-ssdn")==0)
-      {
-         int timepoint = atoi(argv[++i]);
-         bool normalise = 0;
-         REG->UseSSD(timepoint, normalise);
-      }
-      else if(strcmp(argv[i], "--ssdn")==0)
-      {
-         bool normalise = 0;
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->UseSSD(t, normalise);
-      }
-      else if(strcmp(argv[i], "--mind")==0)
-      {
-         int offset = atoi(argv[++i]);
-         if(offset!=-999999){ // Value specified by the CLI - to be ignored
-            if(referenceImage->nt>1 || floatingImage->nt>1){
-               reg_print_msg_error("reg_mind does not support multiple time point image");
-               reg_exit();
+            reg->SetControlPointGridImage(inputCCPImage);
+        } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) {
+            referenceMaskImage = reg_io_ReadImageFile(argv[++i]);
+            if (referenceMaskImage == nullptr) {
+                reg_print_msg_error("Error when reading the reference mask image:");
+                reg_print_msg_error(argv[i - 1]);
+                return EXIT_FAILURE;
             }
-            REG->UseMIND(0, offset);
-         }
-      }
-      else if(strcmp(argv[i], "--mindssc")==0)
-      {
-         int offset = atoi(argv[++i]);
-         if(offset!=-999999){ // Value specified by the CLI - to be ignored
-            if(referenceImage->nt>1 || floatingImage->nt>1){
-               reg_print_msg_error("reg_mindssc does not support multiple time point image");
-               reg_exit();
+            reg->SetReferenceMask(referenceMaskImage);
+        } else if ((strcmp(argv[i], "-res") == 0) || (strcmp(argv[i], "-result") == 0) || (strcmp(argv[i], "--res") == 0)) {
+            outputWarpedImageName = argv[++i];
+        } else if (strcmp(argv[i], "-cpp") == 0 || (strcmp(argv[i], "--cpp") == 0)) {
+            outputCPPImageName = argv[++i];
+        } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) {
+            reg->SetMaximalIterationNumber(atoi(argv[++i]));
+        } else if (strcmp(argv[i], "-sx") == 0 || strcmp(argv[i], "--sx") == 0) {
+            reg->SetSpacing(0, (float)atof(argv[++i]));
+        } else if (strcmp(argv[i], "-sy") == 0 || strcmp(argv[i], "--sy") == 0) {
+            reg->SetSpacing(1, (float)atof(argv[++i]));
+        } else if (strcmp(argv[i], "-sz") == 0 || strcmp(argv[i], "--sz") == 0) {
+            reg->SetSpacing(2, (float)atof(argv[++i]));
+        } else if ((strcmp(argv[i], "--nmi") == 0)) {
+            int bin = 64;
+            if (refBinNumber != 0)
+                bin = refBinNumber;
+            for (int t = 0; t < referenceImage->nt; ++t)
+                reg->UseNMISetReferenceBinNumber(t, bin);
+            bin = 64;
+            if (floBinNumber != 0)
+                bin = floBinNumber;
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->UseNMISetFloatingBinNumber(t, bin);
+        } else if ((strcmp(argv[i], "-rbn") == 0) || (strcmp(argv[i], "-tbn") == 0)) {
+            int tp = atoi(argv[++i]);
+            int bin = atoi(argv[++i]);
+            refBinNumber = bin;
+            reg->UseNMISetReferenceBinNumber(tp, bin);
+        } else if ((strcmp(argv[i], "--rbn") == 0)) {
+            int bin = atoi(argv[++i]);
+            refBinNumber = bin;
+            for (int t = 0; t < referenceImage->nt; ++t)
+                reg->UseNMISetReferenceBinNumber(t, bin);
+        } else if ((strcmp(argv[i], "-fbn") == 0) || (strcmp(argv[i], "-sbn") == 0)) {
+            int tp = atoi(argv[++i]);
+            int bin = atoi(argv[++i]);
+            floBinNumber = bin;
+            reg->UseNMISetFloatingBinNumber(tp, bin);
+        } else if ((strcmp(argv[i], "--fbn") == 0)) {
+            int bin = atoi(argv[++i]);
+            floBinNumber = bin;
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->UseNMISetFloatingBinNumber(t, bin);
+        } else if (strcmp(argv[i], "-ln") == 0 || strcmp(argv[i], "--ln") == 0) {
+            reg->SetLevelNumber(atoi(argv[++i]));
+        } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) {
+            reg->SetLevelToPerform(atoi(argv[++i]));
+        } else if (strcmp(argv[i], "-be") == 0 || strcmp(argv[i], "--be") == 0) {
+            reg->SetBendingEnergyWeight(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-le") == 0 || strcmp(argv[i], "--le") == 0) {
+            reg->SetLinearEnergyWeight(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-jl") == 0 || strcmp(argv[i], "--jl") == 0) {
+            reg->SetJacobianLogWeight(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-noAppJL") == 0 || strcmp(argv[i], "--noAppJL") == 0) {
+            reg->DoNotApproximateJacobianLog();
+        } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) {
+            float weight = atof(argv[++i]);
+            char *filename = argv[++i];
+            std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(filename);
+            size_t landmarkNumber = inputMatrixSize.first;
+            size_t n = inputMatrixSize.second;
+            if (n == 4 && referenceImage->nz > 1) {
+                reg_print_msg_error("4 values per line are expected for 2D images");
+                return EXIT_FAILURE;
+            } else if (n == 6 && referenceImage->nz < 2) {
+                reg_print_msg_error("6 values per line are expected for 3D images");
+                return EXIT_FAILURE;
+            } else if (n != 4 && n != 6) {
+                reg_print_msg_error("4 or 6 values are expected per line");
+                return EXIT_FAILURE;
             }
-            REG->UseMINDSSC(0, offset);
-         }
-      }
-      else if(strcmp(argv[i], "-kld")==0)
-      {
-         REG->UseKLDivergence(atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "--kld")==0)
-      {
-         for(int t=0; t<floatingImage->nt; ++t)
-            REG->UseKLDivergence(t);
-      }
-      else if(strcmp(argv[i], "-rr")==0 || strcmp(argv[i], "--rr")==0)
-      {
-         REG->UseRobustRange();
-      }
-      else if(strcmp(argv[i], "-lncc")==0)
-      {
-         int tp=atoi(argv[++i]);
-         float stdev = atof(argv[++i]);
-         REG->UseLNCC(tp,stdev);
-      }
-      else if(strcmp(argv[i], "--lncc")==0)
-      {
-         float stdev = (float)atof(argv[++i]);
-         if(stdev!=-999999){ // Value specified by the CLI - to be ignored
-            for(int t=0; t<referenceImage->nt; ++t)
-               REG->UseLNCC(t,stdev);
-         }
-      }
-      else if(strcmp(argv[i], "-lnccMean")==0)
-      {
-         useMeanLNCC=true;
-      }
-      else if(strcmp(argv[i], "-dti")==0 || strcmp(argv[i], "--dti")==0)
-      {
-         bool *timePoint = new bool[referenceImage->nt];
-         for(int t=0; t<referenceImage->nt; ++t)
-            timePoint[t]=false;
-         timePoint[atoi(argv[++i])]=true;
-         timePoint[atoi(argv[++i])]=true;
-         timePoint[atoi(argv[++i])]=true;
-         if(referenceImage->nz>1)
-         {
-            timePoint[atoi(argv[++i])]=true;
-            timePoint[atoi(argv[++i])]=true;
-            timePoint[atoi(argv[++i])]=true;
-         }
-         REG->UseDTI(timePoint);
-         delete []timePoint;
-      }
-      else if (strcmp(argv[i], "-nmiw") == 0)
-      {
-         int tp = atoi(argv[++i]);
-         double w = atof(argv[++i]);
-         REG->SetNMIWeight(tp, w);
-      }
-      else if (strcmp(argv[i], "-lnccw") == 0)
-      {
-         int tp = atoi(argv[++i]);
-         double w = atof(argv[++i]);
-         REG->SetLNCCWeight(tp, w);
-      }
-      else if (strcmp(argv[i], "-ssdw") == 0)
-      {
-         int tp = atoi(argv[++i]);
-         double w = atof(argv[++i]);
-         REG->SetSSDWeight(tp, w);
-      }
-      else if (strcmp(argv[i], "-kldw") == 0)
-      {
-         int tp = atoi(argv[++i]);
-         double w = atof(argv[++i]);
-         REG->SetKLDWeight(tp, w);
-      }
-      else if(strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0)
-      {
-         refLocalWeightSim = reg_io_ReadImageFile(argv[++i]);
-         REG->SetLocalWeightSim(refLocalWeightSim);
-      }
-      else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0)
-      {
-         REG->SetWarpedPaddingValue(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-nopy")==0 || strcmp(argv[i], "--nopy")==0)
-      {
-         REG->DoNotUsePyramidalApproach();
-      }
-      else if(strcmp(argv[i], "-noConj")==0 || strcmp(argv[i], "--noConj")==0)
-      {
-         REG->DoNotUseConjugateGradient();
-      }
-      else if(strcmp(argv[i], "-approxGrad")==0 || strcmp(argv[i], "--approxGrad")==0)
-      {
-         REG->UseApproximatedGradient();
-      }
-      else if(strcmp(argv[i], "-interp")==0 || strcmp(argv[i], "--interp")==0)
-      {
-         int interp=atoi(argv[++i]);
-         switch(interp)
-         {
-         case 0:
-            REG->UseNearestNeighborInterpolation();
-            break;
-         case 1:
-            REG->UseLinearInterpolation();
-            break;
-         default:
-            REG->UseCubicSplineInterpolation();
-            break;
-         }
-      }
-      else if((strcmp(argv[i],"-fmask")==0) || (strcmp(argv[i],"-smask")==0) ||
-              (strcmp(argv[i],"--fmask")==0) || (strcmp(argv[i],"--smask")==0))
-      {
-         floatingMaskImage=reg_io_ReadImageFile(argv[++i]);
-         if(floatingMaskImage==nullptr)
-         {
-            reg_print_msg_error("Error when reading the floating mask image:");
-            reg_print_msg_error(argv[i-1]);
-            return EXIT_FAILURE;
-         }
-         REG->SetFloatingMask(floatingMaskImage);
-      }
-      else if(strcmp(argv[i], "-ic")==0 || strcmp(argv[i], "--ic")==0)
-      {
-         REG->SetInverseConsistencyWeight(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-nox") ==0)
-      {
-         REG->NoOptimisationAlongX();
-      }
-      else if(strcmp(argv[i], "-noy") ==0)
-      {
-         REG->NoOptimisationAlongY();
-      }
-      else if(strcmp(argv[i], "-noz") ==0)
-      {
-         REG->NoOptimisationAlongZ();
-      }
-      else if(strcmp(argv[i],"-pert")==0 || strcmp(argv[i],"--pert")==0)
-      {
-         REG->SetPerturbationNumber((size_t)atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-nogr") ==0)
-      {
-         REG->NoGridRefinement();
-      }
-      else if(strcmp(argv[i], "-nogce")==0 || strcmp(argv[i], "--nogce")==0)
-      {
-         REG->DoNotUseGradientCumulativeExp();
-      }
-      else if(strcmp(argv[i], "-bch")==0 || strcmp(argv[i], "--bch")==0)
-      {
-         REG->UseBCHUpdate(atoi(argv[++i]));
-      }
+            float **allLandmarks = reg_tool_ReadMatrixFile<float>(filename, landmarkNumber, n);
+            referenceLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float));
+            floatingLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float));
+            for (size_t l = 0, index = 0; l < landmarkNumber; ++l) {
+                referenceLandmark[index] = allLandmarks[l][0];
+                referenceLandmark[index + 1] = allLandmarks[l][1];
+                if (n == 4) {
+                    floatingLandmark[index] = allLandmarks[l][2];
+                    floatingLandmark[index + 1] = allLandmarks[l][3];
+                    index += 2;
+                } else {
+                    referenceLandmark[index + 2] = allLandmarks[l][2];
+                    floatingLandmark[index] = allLandmarks[l][3];
+                    floatingLandmark[index + 1] = allLandmarks[l][4];
+                    floatingLandmark[index + 2] = allLandmarks[l][5];
+                    index += 3;
+                }
+            }
+            reg->SetLandmarkRegularisationParam(landmarkNumber,
+                                                referenceLandmark,
+                                                floatingLandmark,
+                                                weight);
+            for (size_t l = 0; l < landmarkNumber; ++l)
+                free(allLandmarks[l]);
+            free(allLandmarks);
+        } else if ((strcmp(argv[i], "-smooR") == 0) || (strcmp(argv[i], "-smooT") == 0) || strcmp(argv[i], "--smooR") == 0) {
+            reg->SetReferenceSmoothingSigma(atof(argv[++i]));
+        } else if ((strcmp(argv[i], "-smooF") == 0) || (strcmp(argv[i], "-smooS") == 0) || strcmp(argv[i], "--smooF") == 0) {
+            reg->SetFloatingSmoothingSigma(atof(argv[++i]));
+        } else if ((strcmp(argv[i], "-rLwTh") == 0) || (strcmp(argv[i], "-tLwTh") == 0)) {
+            int tp = atoi(argv[++i]);
+            float val = atof(argv[++i]);
+            reg->SetReferenceThresholdLow(tp, val);
+        } else if ((strcmp(argv[i], "-rUpTh") == 0) || strcmp(argv[i], "-tUpTh") == 0) {
+            int tp = atoi(argv[++i]);
+            float val = atof(argv[++i]);
+            reg->SetReferenceThresholdUp(tp, val);
+        } else if ((strcmp(argv[i], "-fLwTh") == 0) || (strcmp(argv[i], "-sLwTh") == 0)) {
+            int tp = atoi(argv[++i]);
+            float val = atof(argv[++i]);
+            reg->SetFloatingThresholdLow(tp, val);
+        } else if ((strcmp(argv[i], "-fUpTh") == 0) || (strcmp(argv[i], "-sUpTh") == 0)) {
+            int tp = atoi(argv[++i]);
+            float val = atof(argv[++i]);
+            reg->SetFloatingThresholdUp(tp, val);
+        } else if ((strcmp(argv[i], "--rLwTh") == 0)) {
+            float threshold = atof(argv[++i]);
+            for (int t = 0; t < referenceImage->nt; ++t)
+                reg->SetReferenceThresholdLow(t, threshold);
+        } else if ((strcmp(argv[i], "--rUpTh") == 0)) {
+            float threshold = atof(argv[++i]);
+            for (int t = 0; t < referenceImage->nt; ++t)
+                reg->SetReferenceThresholdUp(t, threshold);
+        } else if ((strcmp(argv[i], "--fLwTh") == 0)) {
+            float threshold = atof(argv[++i]);
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->SetFloatingThresholdLow(t, threshold);
+        } else if ((strcmp(argv[i], "--fUpTh") == 0)) {
+            float threshold = atof(argv[++i]);
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->SetFloatingThresholdUp(t, threshold);
+        } else if (strcmp(argv[i], "-smoothGrad") == 0) {
+            reg->SetGradientSmoothingSigma(atof(argv[++i]));
+        } else if (strcmp(argv[i], "--smoothGrad") == 0) {
+            reg->SetGradientSmoothingSigma(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-ssd") == 0) {
+            int timepoint = atoi(argv[++i]);
+            bool normalise = 1;
+            reg->UseSSD(timepoint, normalise);
+        } else if (strcmp(argv[i], "--ssd") == 0) {
+            bool normalise = 1;
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->UseSSD(t, normalise);
+        } else if (strcmp(argv[i], "-ssdn") == 0) {
+            int timepoint = atoi(argv[++i]);
+            bool normalise = 0;
+            reg->UseSSD(timepoint, normalise);
+        } else if (strcmp(argv[i], "--ssdn") == 0) {
+            bool normalise = 0;
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->UseSSD(t, normalise);
+        } else if (strcmp(argv[i], "--mind") == 0) {
+            int offset = atoi(argv[++i]);
+            if (offset != -999999) { // Value specified by the CLI - to be ignored
+                if (referenceImage->nt > 1 || floatingImage->nt > 1) {
+                    reg_print_msg_error("reg_mind does not support multiple time point image");
+                    reg_exit();
+                }
+                reg->UseMIND(0, offset);
+            }
+        } else if (strcmp(argv[i], "--mindssc") == 0) {
+            int offset = atoi(argv[++i]);
+            if (offset != -999999) { // Value specified by the CLI - to be ignored
+                if (referenceImage->nt > 1 || floatingImage->nt > 1) {
+                    reg_print_msg_error("reg_mindssc does not support multiple time point image");
+                    reg_exit();
+                }
+                reg->UseMINDSSC(0, offset);
+            }
+        } else if (strcmp(argv[i], "-kld") == 0) {
+            reg->UseKLDivergence(atoi(argv[++i]));
+        } else if (strcmp(argv[i], "--kld") == 0) {
+            for (int t = 0; t < floatingImage->nt; ++t)
+                reg->UseKLDivergence(t);
+        } else if (strcmp(argv[i], "-rr") == 0 || strcmp(argv[i], "--rr") == 0) {
+            reg->UseRobustRange();
+        } else if (strcmp(argv[i], "-lncc") == 0) {
+            int tp = atoi(argv[++i]);
+            float stdev = atof(argv[++i]);
+            reg->UseLNCC(tp, stdev);
+        } else if (strcmp(argv[i], "--lncc") == 0) {
+            float stdev = (float)atof(argv[++i]);
+            if (stdev != -999999) { // Value specified by the CLI - to be ignored
+                for (int t = 0; t < referenceImage->nt; ++t)
+                    reg->UseLNCC(t, stdev);
+            }
+        } else if (strcmp(argv[i], "-lnccMean") == 0) {
+            useMeanLNCC = true;
+        } else if (strcmp(argv[i], "-dti") == 0 || strcmp(argv[i], "--dti") == 0) {
+            bool *timePoint = new bool[referenceImage->nt];
+            for (int t = 0; t < referenceImage->nt; ++t)
+                timePoint[t] = false;
+            timePoint[atoi(argv[++i])] = true;
+            timePoint[atoi(argv[++i])] = true;
+            timePoint[atoi(argv[++i])] = true;
+            if (referenceImage->nz > 1) {
+                timePoint[atoi(argv[++i])] = true;
+                timePoint[atoi(argv[++i])] = true;
+                timePoint[atoi(argv[++i])] = true;
+            }
+            reg->UseDTI(timePoint);
+            delete[]timePoint;
+        } else if (strcmp(argv[i], "-nmiw") == 0) {
+            int tp = atoi(argv[++i]);
+            double w = atof(argv[++i]);
+            reg->SetNMIWeight(tp, w);
+        } else if (strcmp(argv[i], "-lnccw") == 0) {
+            int tp = atoi(argv[++i]);
+            double w = atof(argv[++i]);
+            reg->SetLNCCWeight(tp, w);
+        } else if (strcmp(argv[i], "-ssdw") == 0) {
+            int tp = atoi(argv[++i]);
+            double w = atof(argv[++i]);
+            reg->SetSSDWeight(tp, w);
+        } else if (strcmp(argv[i], "-kldw") == 0) {
+            int tp = atoi(argv[++i]);
+            double w = atof(argv[++i]);
+            reg->SetKLDWeight(tp, w);
+        } else if (strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0) {
+            refLocalWeightSim = reg_io_ReadImageFile(argv[++i]);
+            reg->SetLocalWeightSim(refLocalWeightSim);
+        } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) {
+            reg->SetWarpedPaddingValue(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) {
+            reg->DoNotUsePyramidalApproach();
+        } else if (strcmp(argv[i], "-noConj") == 0 || strcmp(argv[i], "--noConj") == 0) {
+            reg->DoNotUseConjugateGradient();
+        } else if (strcmp(argv[i], "-approxGrad") == 0 || strcmp(argv[i], "--approxGrad") == 0) {
+            reg->UseApproximatedGradient();
+        } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) {
+            int interp = atoi(argv[++i]);
+            switch (interp) {
+            case 0:
+                reg->UseNearestNeighborInterpolation();
+                break;
+            case 1:
+                reg->UseLinearInterpolation();
+                break;
+            default:
+                reg->UseCubicSplineInterpolation();
+                break;
+            }
+        } else if ((strcmp(argv[i], "-fmask") == 0) || (strcmp(argv[i], "-smask") == 0) ||
+                 (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) {
+            floatingMaskImage = reg_io_ReadImageFile(argv[++i]);
+            if (floatingMaskImage == nullptr) {
+                reg_print_msg_error("Error when reading the floating mask image:");
+                reg_print_msg_error(argv[i - 1]);
+                return EXIT_FAILURE;
+            }
+            reg->SetFloatingMask(floatingMaskImage);
+        } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) {
+            reg->SetInverseConsistencyWeight(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-nox") == 0) {
+            reg->NoOptimisationAlongX();
+        } else if (strcmp(argv[i], "-noy") == 0) {
+            reg->NoOptimisationAlongY();
+        } else if (strcmp(argv[i], "-noz") == 0) {
+            reg->NoOptimisationAlongZ();
+        } else if (strcmp(argv[i], "-pert") == 0 || strcmp(argv[i], "--pert") == 0) {
+            reg->SetPerturbationNumber((size_t)atoi(argv[++i]));
+        } else if (strcmp(argv[i], "-nogr") == 0) {
+            reg->NoGridRefinement();
+        } else if (strcmp(argv[i], "-nogce") == 0 || strcmp(argv[i], "--nogce") == 0) {
+            reg->DoNotUseGradientCumulativeExp();
+        } else if (strcmp(argv[i], "-bch") == 0 || strcmp(argv[i], "--bch") == 0) {
+            reg->UseBCHUpdate(atoi(argv[++i]));
+        }
 
-      else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
-      {
+        else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
 #if defined (_OPENMP)
-         omp_set_num_threads(atoi(argv[++i]));
+            omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
-         ++i;
+            reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            ++i;
 #endif
-      }
-      /* All the following arguments should have already been parsed */
-      else if(strcmp(argv[i], "-help")!=0 && strcmp(argv[i], "-Help")!=0 &&
-              strcmp(argv[i], "-HELP")!=0 && strcmp(argv[i], "-h")!=0 &&
-              strcmp(argv[i], "--h")!=0 && strcmp(argv[i], "--help")!=0 &&
-              strcmp(argv[i], "--xml")!=0 && strcmp(argv[i], "-version")!=0 &&
-              strcmp(argv[i], "-Version")!=0 && strcmp(argv[i], "-V")!=0 &&
-              strcmp(argv[i], "-v")!=0 && strcmp(argv[i], "--v")!=0 &&
-              strcmp(argv[i], "-gpu")!=0 && strcmp(argv[i], "--gpu")!=0 &&
-              strcmp(argv[i], "-vel")!=0 && strcmp(argv[i], "-sym")!=0)
-      {
-         reg_print_msg_error("\tParameter unknown:");
-         reg_print_msg_error(argv[i]);
-         PetitUsage((argv[0]));
-         return EXIT_FAILURE;
-      }
-   }
-   if(useMeanLNCC)
-      REG->SetLNCCKernelType(2);
+        }
+        /* All the following arguments should have already been parsed */
+        else if (strcmp(argv[i], "-help") != 0 && strcmp(argv[i], "-Help") != 0 &&
+                 strcmp(argv[i], "-HELP") != 0 && strcmp(argv[i], "-h") != 0 &&
+                 strcmp(argv[i], "--h") != 0 && strcmp(argv[i], "--help") != 0 &&
+                 strcmp(argv[i], "--xml") != 0 && strcmp(argv[i], "-version") != 0 &&
+                 strcmp(argv[i], "-Version") != 0 && strcmp(argv[i], "-V") != 0 &&
+                 strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 &&
+                 strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 &&
+                 strcmp(argv[i], "-vel") != 0 && strcmp(argv[i], "-sym") != 0) {
+            reg_print_msg_error("\tParameter unknown:");
+            reg_print_msg_error(argv[i]);
+            PetitUsage((argv[0]));
+            return EXIT_FAILURE;
+        }
+    }
+    if (useMeanLNCC)
+        reg->SetLNCCKernelType(2);
 
 #ifndef NDEBUG
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
-   reg_print_msg_debug("Please re-run cmake to set the variable");
-   reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
+    reg_print_msg_debug("Please re-run cmake to set the variable");
+    reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
 #endif
 
 #if defined (_OPENMP)
-   if(verbose)
-   {
-      int maxThreadNumber = omp_get_max_threads();
-      text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber);
-      reg_print_info((argv[0]), text.c_str());
-   }
+    if (verbose) {
+        int maxThreadNumber = omp_get_max_threads();
+        text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber);
+        reg_print_info((argv[0]), text.c_str());
+    }
 #endif // _OPENMP
 
-   // Run the registration
-   REG->Run();
-
-   // Save the control point image
-   nifti_image *outputControlPointGridImage = REG->GetControlPointPositionImage();
-   if(outputCPPImageName==nullptr) outputCPPImageName=(char *)"outputCPP.nii";
-   memset(outputControlPointGridImage->descrip, 0, 80);
-   strcpy (outputControlPointGridImage->descrip,"Control point position from NiftyReg (reg_f3d)");
-   if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0)
-      strcpy (outputControlPointGridImage->descrip,"Velocity field grid from NiftyReg (reg_f3d2)");
-   reg_io_WriteImageFile(outputControlPointGridImage,outputCPPImageName);
-   nifti_image_free(outputControlPointGridImage);
-   outputControlPointGridImage=nullptr;
+    // Run the registration
+    reg->Run();
 
-   // Save the backward control point image
-   if(REG->GetSymmetricStatus())
-   {
-      // _backward is added to the forward control point grid image name
-      std::string b(outputCPPImageName);
-      if(b.find( ".nii.gz") != std::string::npos)
-         b.replace(b.find( ".nii.gz"),7,"_backward.nii.gz");
-      else if(b.find( ".nii") != std::string::npos)
-         b.replace(b.find( ".nii"),4,"_backward.nii");
-      else if(b.find( ".hdr") != std::string::npos)
-         b.replace(b.find( ".hdr"),4,"_backward.hdr");
-      else if(b.find( ".img.gz") != std::string::npos)
-         b.replace(b.find( ".img.gz"),7,"_backward.img.gz");
-      else if(b.find( ".img") != std::string::npos)
-         b.replace(b.find( ".img"),4,"_backward.img");
-      else if(b.find( ".png") != std::string::npos)
-         b.replace(b.find( ".png"),4,"_backward.png");
-      else if(b.find( ".nrrd") != std::string::npos)
-         b.replace(b.find( ".nrrd"),5,"_backward.nrrd");
-      else b.append("_backward.nii");
-      nifti_image *outputBackwardControlPointGridImage = REG->GetBackwardControlPointPositionImage();
-      memset(outputBackwardControlPointGridImage->descrip, 0, 80);
-      strcpy (outputBackwardControlPointGridImage->descrip,"Backward Control point position from NiftyReg (reg_f3d)");
-      if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0)
-         strcpy (outputBackwardControlPointGridImage->descrip,"Backward velocity field grid from NiftyReg (reg_f3d2)");
-      reg_io_WriteImageFile(outputBackwardControlPointGridImage,b.c_str());
-      nifti_image_free(outputBackwardControlPointGridImage);
-      outputBackwardControlPointGridImage=nullptr;
-   }
+    // Save the control point image
+    nifti_image *outputControlPointGridImage = reg->GetControlPointPositionImage();
+    if (outputCPPImageName == nullptr) outputCPPImageName = (char *)"outputCPP.nii";
+    memset(outputControlPointGridImage->descrip, 0, 80);
+    strcpy(outputControlPointGridImage->descrip, "Control point position from NiftyReg (reg_f3d)");
+    if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0)
+        strcpy(outputControlPointGridImage->descrip, "Velocity field grid from NiftyReg (reg_f3d2)");
+    reg_io_WriteImageFile(outputControlPointGridImage, outputCPPImageName);
+    nifti_image_free(outputControlPointGridImage);
+    outputControlPointGridImage = nullptr;
 
-   // Save the warped image(s)
-   nifti_image **outputWarpedImage = REG->GetWarpedImage();
-   if(outputWarpedImageName==nullptr)
-      outputWarpedImageName=(char *)"outputResult.nii";
-   memset(outputWarpedImage[0]->descrip, 0, 80);
-   strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d)");
-   if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0)
-   {
-      strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d2)");
-      strcpy (outputWarpedImage[1]->descrip,"Warped image using NiftyReg (reg_f3d2)");
-   }
-   if(REG->GetSymmetricStatus())
-   {
-      if(outputWarpedImage[1]!=nullptr)
-      {
-         std::string b(outputWarpedImageName);
-         if(b.find( ".nii.gz") != std::string::npos)
-            b.replace(b.find( ".nii.gz"),7,"_backward.nii.gz");
-         else if(b.find( ".nii") != std::string::npos)
-            b.replace(b.find( ".nii"),4,"_backward.nii");
-         else if(b.find( ".hdr") != std::string::npos)
-            b.replace(b.find( ".hdr"),4,"_backward.hdr");
-         else if(b.find( ".img.gz") != std::string::npos)
-            b.replace(b.find( ".img.gz"),7,"_backward.img.gz");
-         else if(b.find( ".img") != std::string::npos)
-            b.replace(b.find( ".img"),4,"_backward.img");
-         else if(b.find( ".png") != std::string::npos)
-            b.replace(b.find( ".png"),4,"_backward.png");
-         else if(b.find( ".nrrd") != std::string::npos)
-            b.replace(b.find( ".nrrd"),5,"_backward.nrrd");
-         else b.append("_backward.nii");
-         reg_io_WriteImageFile(outputWarpedImage[1],b.c_str());
-      }
-   }
-   reg_io_WriteImageFile(outputWarpedImage[0],outputWarpedImageName);
-   if(outputWarpedImage[0]!=nullptr)
-      nifti_image_free(outputWarpedImage[0]);
-   outputWarpedImage[0]=nullptr;
-   if(outputWarpedImage[1]!=nullptr)
-      nifti_image_free(outputWarpedImage[1]);
-   outputWarpedImage[1]=nullptr;
-   free(outputWarpedImage);
-   outputWarpedImage=nullptr;
-   // Free the allocated landmarks if used
-   free(referenceLandmark);
-   free(floatingLandmark);
+    // Save the backward control point image
+    if (reg->GetSymmetricStatus()) {
+        // _backward is added to the forward control point grid image name
+        std::string b(outputCPPImageName);
+        if (b.find(".nii.gz") != std::string::npos)
+            b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz");
+        else if (b.find(".nii") != std::string::npos)
+            b.replace(b.find(".nii"), 4, "_backward.nii");
+        else if (b.find(".hdr") != std::string::npos)
+            b.replace(b.find(".hdr"), 4, "_backward.hdr");
+        else if (b.find(".img.gz") != std::string::npos)
+            b.replace(b.find(".img.gz"), 7, "_backward.img.gz");
+        else if (b.find(".img") != std::string::npos)
+            b.replace(b.find(".img"), 4, "_backward.img");
+        else if (b.find(".png") != std::string::npos)
+            b.replace(b.find(".png"), 4, "_backward.png");
+        else if (b.find(".nrrd") != std::string::npos)
+            b.replace(b.find(".nrrd"), 5, "_backward.nrrd");
+        else b.append("_backward.nii");
+        nifti_image *outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage();
+        memset(outputBackwardControlPointGridImage->descrip, 0, 80);
+        strcpy(outputBackwardControlPointGridImage->descrip, "Backward Control point position from NiftyReg (reg_f3d)");
+        if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0)
+            strcpy(outputBackwardControlPointGridImage->descrip, "Backward velocity field grid from NiftyReg (reg_f3d2)");
+        reg_io_WriteImageFile(outputBackwardControlPointGridImage, b.c_str());
+        nifti_image_free(outputBackwardControlPointGridImage);
+        outputBackwardControlPointGridImage = nullptr;
+    }
 
-   // Erase the registration object
-   delete REG;
+    // Save the warped image(s)
+    nifti_image **outputWarpedImage = reg->GetWarpedImage();
+    if (outputWarpedImageName == nullptr)
+        outputWarpedImageName = (char *)"outputResult.nii";
+    memset(outputWarpedImage[0]->descrip, 0, 80);
+    strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)");
+    if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) {
+        strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d2)");
+        strcpy(outputWarpedImage[1]->descrip, "Warped image using NiftyReg (reg_f3d2)");
+    }
+    if (reg->GetSymmetricStatus()) {
+        if (outputWarpedImage[1] != nullptr) {
+            std::string b(outputWarpedImageName);
+            if (b.find(".nii.gz") != std::string::npos)
+                b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz");
+            else if (b.find(".nii") != std::string::npos)
+                b.replace(b.find(".nii"), 4, "_backward.nii");
+            else if (b.find(".hdr") != std::string::npos)
+                b.replace(b.find(".hdr"), 4, "_backward.hdr");
+            else if (b.find(".img.gz") != std::string::npos)
+                b.replace(b.find(".img.gz"), 7, "_backward.img.gz");
+            else if (b.find(".img") != std::string::npos)
+                b.replace(b.find(".img"), 4, "_backward.img");
+            else if (b.find(".png") != std::string::npos)
+                b.replace(b.find(".png"), 4, "_backward.png");
+            else if (b.find(".nrrd") != std::string::npos)
+                b.replace(b.find(".nrrd"), 5, "_backward.nrrd");
+            else b.append("_backward.nii");
+            reg_io_WriteImageFile(outputWarpedImage[1], b.c_str());
+        }
+    }
+    reg_io_WriteImageFile(outputWarpedImage[0], outputWarpedImageName);
+    if (outputWarpedImage[0] != nullptr)
+        nifti_image_free(outputWarpedImage[0]);
+    outputWarpedImage[0] = nullptr;
+    if (outputWarpedImage[1] != nullptr)
+        nifti_image_free(outputWarpedImage[1]);
+    outputWarpedImage[1] = nullptr;
+    free(outputWarpedImage);
+    outputWarpedImage = nullptr;
+    // Free the allocated landmarks if used
+    free(referenceLandmark);
+    free(floatingLandmark);
 
-#ifdef _USE_CUDA
-   cudaCommon_unsetCUDACard(&ctx);
-#endif
+    // Erase the registration object
+    delete reg;
 
-   // Clean the allocated images
-   if(refLocalWeightSim!=nullptr) nifti_image_free(refLocalWeightSim);
-   if(referenceImage!=nullptr) nifti_image_free(referenceImage);
-   if(floatingImage!=nullptr) nifti_image_free(floatingImage);
-   if(inputCCPImage!=nullptr) nifti_image_free(inputCCPImage);
-   if(referenceMaskImage!=nullptr) nifti_image_free(referenceMaskImage);
-   if(floatingMaskImage!=nullptr) nifti_image_free(floatingMaskImage);
+    // Clean the allocated images
+    if (refLocalWeightSim != nullptr) nifti_image_free(refLocalWeightSim);
+    if (referenceImage != nullptr) nifti_image_free(referenceImage);
+    if (floatingImage != nullptr) nifti_image_free(floatingImage);
+    if (inputCCPImage != nullptr) nifti_image_free(inputCCPImage);
+    if (referenceMaskImage != nullptr) nifti_image_free(referenceMaskImage);
+    if (floatingMaskImage != nullptr) nifti_image_free(floatingMaskImage);
 
 #ifdef NDEBUG
-   if(verbose)
-   {
+    if (verbose) {
 #endif
-      time_t end;
-      time(&end);
-      int minutes=(int)floorf((end-start)/60.0f);
-      int seconds=(int)(end-start - 60*minutes);
-      text = stringFormat("Registration performed in %i min %i sec", minutes, seconds);
-      reg_print_info((argv[0]), text.c_str());
-      reg_print_info((argv[0]), "Have a good day !");
+        time_t end;
+        time(&end);
+        int minutes = (int)floorf((end - start) / 60.0f);
+        int seconds = (int)(end - start - 60 * minutes);
+        text = stringFormat("Registration performed in %i min %i sec", minutes, seconds);
+        reg_print_info((argv[0]), text.c_str());
+        reg_print_info((argv[0]), "Have a good day !");
 #ifdef NDEBUG
-   }
+    }
 #endif
 
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 555d1b59..365ed542 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -31,7 +31,47 @@ Compute* Platform::CreateCompute(Content *con) const {
 Kernel* Platform::CreateKernel(const std::string& name, Content *con) const {
     return kernelFactory->Produce(name, con);
 }
+/* *************************************************************** */
+template<typename Type>
+reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent *con,
+                                               InterfaceOptimiser *opt,
+                                               size_t maxIterationNumber,
+                                               bool useConjGradient,
+                                               bool optimiseX,
+                                               bool optimiseY,
+                                               bool optimiseZ) {
+    reg_optimiser<Type> *optimiser;
+    nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid();
+    Type *controlPointGridData, *transformationGradientData;
+
+    if (platformCode == NR_PLATFORM_CPU) {
+        optimiser = useConjGradient ? new reg_conjugateGradient<Type>() : new reg_optimiser<Type>();
+        controlPointGridData = (Type*)controlPointGrid->data;
+        transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data;
+    }
+#ifdef _USE_CUDA
+    else if (platformCode == NR_PLATFORM_CUDA) {
+        optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
+        controlPointGridData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetControlPointGridCuda();
+        transformationGradientData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda();
+    }
+#endif
+
+    optimiser->Initialise(controlPointGrid->nvox,
+                          controlPointGrid->nz > 1 ? 3 : 2,
+                          optimiseX,
+                          optimiseY,
+                          optimiseZ,
+                          maxIterationNumber,
+                          0, // currentIterationNumber,
+                          opt,
+                          controlPointGridData,
+                          transformationGradientData);
+
+    return optimiser;
 }
+template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
+template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
 /* *************************************************************** */
 std::string Platform::GetName() {
     return platformName;
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 6d752afb..47b9f697 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -29,6 +29,15 @@ class Platform {
 
     Compute* CreateCompute(Content *con) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
+    template<typename Type>
+    reg_optimiser<Type>* CreateOptimiser(F3dContent *con,
+                                         InterfaceOptimiser *opt,
+                                         size_t maxIterationNumber,
+                                         bool useConjGradient,
+                                         bool optimiseX,
+                                         bool optimiseY,
+                                         bool optimiseZ);
+
     std::string GetName();
 
     int GetPlatformCode();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 8b086faf..db23e75a 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -11,87 +11,87 @@
  */
 
 #include "_reg_base.h"
+#include "F3dContent.h" // TODO Temporary fix! Remove this line!
 
  /* *************************************************************** */
  /* *************************************************************** */
 template <class T>
-   //Platform
-//   this->platform = nullptr;
-//   this->platformCode = NR_PLATFORM_CPU;
-//   this->gpuIdx = 999;
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
-
-    this->optimiser = nullptr;
-    this->maxIterationNumber = 150;
-    this->optimiseX = true;
-    this->optimiseY = true;
-    this->optimiseZ = true;
-    this->perturbationNumber = 0;
-    this->useConjGradient = true;
-    this->useApproxGradient = false;
-
-    this->measure_ssd = nullptr;
-    this->measure_kld = nullptr;
-    this->measure_dti = nullptr;
-    this->measure_lncc = nullptr;
-    this->measure_nmi = nullptr;
-    this->measure_mind = nullptr;
-    this->measure_mindssc = nullptr;
-    this->localWeightSimInput = nullptr;
-    this->localWeightSimCurrent = nullptr;
-
-    this->similarityWeight = 0; // automatically set depending of the penalty term weights
-
-    this->executableName = (char*)"NiftyReg BASE";
-    this->referenceTimePoint = refTimePoint;
-    this->floatingTimePoint = floTimePoint;
-    this->inputReference = nullptr; // pointer to external
-    this->inputFloating = nullptr; // pointer to external
-    this->maskImage = nullptr; // pointer to external
-    this->affineTransformation = nullptr;  // pointer to external
-    this->referenceMask = nullptr;
-    this->referenceSmoothingSigma = 0;
-    this->floatingSmoothingSigma = 0;
-    this->referenceThresholdUp = new float[this->referenceTimePoint];
-    this->referenceThresholdLow = new float[this->referenceTimePoint];
-    this->floatingThresholdUp = new float[this->floatingTimePoint];
-    this->floatingThresholdLow = new float[this->floatingTimePoint];
-    for (int i = 0; i < this->referenceTimePoint; i++) {
-        this->referenceThresholdUp[i] = std::numeric_limits<T>::max();
-        this->referenceThresholdLow[i] = -std::numeric_limits<T>::max();
+    platform = nullptr;
+    platformCode = NR_PLATFORM_CPU;
+    gpuIdx = 999;
+
+    optimiser = nullptr;
+    maxIterationNumber = 150;
+    optimiseX = true;
+    optimiseY = true;
+    optimiseZ = true;
+    perturbationNumber = 0;
+    useConjGradient = true;
+    useApproxGradient = false;
+
+    measure_ssd = nullptr;
+    measure_kld = nullptr;
+    measure_dti = nullptr;
+    measure_lncc = nullptr;
+    measure_nmi = nullptr;
+    measure_mind = nullptr;
+    measure_mindssc = nullptr;
+    localWeightSimInput = nullptr;
+    // localWeightSimCurrent = nullptr;
+
+    similarityWeight = 0; // automatically set depending of the penalty term weights
+
+    executableName = (char*)"NiftyReg BASE";
+    referenceTimePoint = refTimePoint;
+    floatingTimePoint = floTimePoint;
+    inputReference = nullptr; // pointer to external
+    inputFloating = nullptr; // pointer to external
+    maskImage = nullptr; // pointer to external
+    affineTransformation = nullptr;  // pointer to external
+    referenceMask = nullptr;
+    referenceSmoothingSigma = 0;
+    floatingSmoothingSigma = 0;
+    referenceThresholdUp = new float[referenceTimePoint];
+    referenceThresholdLow = new float[referenceTimePoint];
+    floatingThresholdUp = new float[floatingTimePoint];
+    floatingThresholdLow = new float[floatingTimePoint];
+    for (int i = 0; i < referenceTimePoint; i++) {
+        referenceThresholdUp[i] = std::numeric_limits<T>::max();
+        referenceThresholdLow[i] = -std::numeric_limits<T>::max();
     }
-    for (int i = 0; i < this->floatingTimePoint; i++) {
-        this->floatingThresholdUp[i] = std::numeric_limits<T>::max();
-        this->floatingThresholdLow[i] = -std::numeric_limits<T>::max();
+    for (int i = 0; i < floatingTimePoint; i++) {
+        floatingThresholdUp[i] = std::numeric_limits<T>::max();
+        floatingThresholdLow[i] = -std::numeric_limits<T>::max();
     }
-    this->robustRange = false;
-    this->warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
-    this->levelNumber = 3;
-    this->levelToPerform = 0;
-    this->gradientSmoothingSigma = 0;
-    this->verbose = true;
-    this->usePyramid = true;
-    this->forwardJacobianMatrix = nullptr;
-
-    this->initialised = false;
-    this->referencePyramid = nullptr;
-    this->floatingPyramid = nullptr;
-    this->maskPyramid = nullptr;
-    this->activeVoxelNumber = nullptr;
-    this->currentReference = nullptr;
-    this->currentFloating = nullptr;
-    this->currentMask = nullptr;
-    this->warped = nullptr;
-    this->deformationFieldImage = nullptr;
-    this->warImgGradient = nullptr;
-    this->voxelBasedMeasureGradient = nullptr;
-
-    this->interpolation = 1;
-
-    this->landmarkRegWeight = 0;
-    this->landmarkRegNumber = 0;
-    this->landmarkReference = nullptr;
-    this->landmarkFloating = nullptr;
+    robustRange = false;
+    warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
+    levelNumber = 3;
+    levelToPerform = 0;
+    gradientSmoothingSigma = 0;
+    verbose = true;
+    usePyramid = true;
+    forwardJacobianMatrix = nullptr;
+
+    initialised = false;
+    referencePyramid = nullptr;
+    floatingPyramid = nullptr;
+    maskPyramid = nullptr;
+    activeVoxelNumber = nullptr;
+    // reference = nullptr;
+    // floating = nullptr;
+    // currentMask = nullptr;
+    // warped = nullptr;
+    // deformationFieldImage = nullptr;
+    // warpedGradient = nullptr;
+    // voxelBasedMeasureGradient = nullptr;
+
+    interpolation = 1;
+
+    landmarkRegWeight = 0;
+    landmarkRegNumber = 0;
+    landmarkReference = nullptr;
+    landmarkFloating = nullptr;
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::reg_base");
@@ -100,13 +100,13 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
 /* *************************************************************** */
 template <class T>
 reg_base<T>::~reg_base() {
-    this->ClearWarped();
-    this->ClearWarpedGradient();
-    this->ClearDeformationField();
-    this->ClearVoxelBasedMeasureGradient();
-    if (this->referencePyramid != nullptr) {
-        if (this->usePyramid) {
-            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+    // DeallocateWarped();
+    // DeallocateWarpedGradient();
+    // DeallocateDeformationField();
+    // DeallocateVoxelBasedMeasureGradient();
+    if (referencePyramid != nullptr) {
+        if (usePyramid) {
+            for (unsigned int i = 0; i < levelToPerform; i++) {
                 if (referencePyramid[i] != nullptr) {
                     nifti_image_free(referencePyramid[i]);
                     referencePyramid[i] = nullptr;
@@ -121,33 +121,31 @@ reg_base<T>::~reg_base() {
         free(referencePyramid);
         referencePyramid = nullptr;
     }
-    if (this->maskPyramid != nullptr) {
-        if (this->usePyramid) {
-            for (unsigned int i = 0; i < this->levelToPerform; i++) {
-                if (this->maskPyramid[i] != nullptr) {
-                    free(this->maskPyramid[i]);
-                    this->maskPyramid[i] = nullptr;
+    if (maskPyramid != nullptr) {
+        if (usePyramid) {
+            for (unsigned int i = 0; i < levelToPerform; i++) {
+                if (maskPyramid[i] != nullptr) {
+                    free(maskPyramid[i]);
+                    maskPyramid[i] = nullptr;
                 }
             }
         } else {
-            if (this->maskPyramid[0] != nullptr) {
-                free(this->maskPyramid[0]);
-                this->maskPyramid[0] = nullptr;
+            if (maskPyramid[0] != nullptr) {
+                free(maskPyramid[0]);
+                maskPyramid[0] = nullptr;
             }
         }
-        free(this->maskPyramid);
+        free(maskPyramid);
         maskPyramid = nullptr;
     }
-    if (this->floatingPyramid != nullptr) {
-        if (this->usePyramid) {
-            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+    if (floatingPyramid != nullptr) {
+        if (usePyramid) {
+            for (unsigned int i = 0; i < levelToPerform; i++) {
                 if (floatingPyramid[i] != nullptr) {
                     nifti_image_free(floatingPyramid[i]);
                     floatingPyramid[i] = nullptr;
                 }
             }
-   //Platform
-//   delete this->platform;
         } else {
             if (floatingPyramid[0] != nullptr) {
                 nifti_image_free(floatingPyramid[0]);
@@ -157,78 +155,56 @@ reg_base<T>::~reg_base() {
         free(floatingPyramid);
         floatingPyramid = nullptr;
     }
-    if (this->activeVoxelNumber != nullptr) {
+    if (activeVoxelNumber != nullptr) {
         free(activeVoxelNumber);
-        this->activeVoxelNumber = nullptr;
+        activeVoxelNumber = nullptr;
     }
-    if (this->referenceThresholdUp != nullptr) {
-        delete[]this->referenceThresholdUp;
-        this->referenceThresholdUp = nullptr;
+    if (referenceThresholdUp != nullptr) {
+        delete[]referenceThresholdUp;
+        referenceThresholdUp = nullptr;
     }
-    if (this->referenceThresholdLow != nullptr) {
-        delete[]this->referenceThresholdLow;
-        this->referenceThresholdLow = nullptr;
+    if (referenceThresholdLow != nullptr) {
+        delete[]referenceThresholdLow;
+        referenceThresholdLow = nullptr;
     }
-    if (this->floatingThresholdUp != nullptr) {
-        delete[]this->floatingThresholdUp;
-        this->floatingThresholdUp = nullptr;
+    if (floatingThresholdUp != nullptr) {
+        delete[]floatingThresholdUp;
+        floatingThresholdUp = nullptr;
     }
-    if (this->floatingThresholdLow != nullptr) {
-        delete[]this->floatingThresholdLow;
-        this->floatingThresholdLow = nullptr;
+    if (floatingThresholdLow != nullptr) {
+        delete[]floatingThresholdLow;
+        floatingThresholdLow = nullptr;
     }
-    if (this->optimiser != nullptr) {
-        delete this->optimiser;
-        this->optimiser = nullptr;
+    if (optimiser != nullptr) {
+        delete optimiser;
+        optimiser = nullptr;
     }
 
-    if (this->measure_nmi != nullptr)
-        delete this->measure_nmi;
-    if (this->measure_ssd != nullptr)
-        delete this->measure_ssd;
-    if (this->measure_kld != nullptr)
-        delete this->measure_kld;
-    if (this->measure_dti != nullptr)
-        delete this->measure_dti;
-    if (this->measure_lncc != nullptr)
-        delete this->measure_lncc;
-    if (this->measure_mind != nullptr)
-        delete this->measure_mind;
-    if (this->measure_mindssc != nullptr)
-        delete this->measure_mindssc;
-
+    if (measure_nmi != nullptr)
+        delete measure_nmi;
+    if (measure_ssd != nullptr)
+        delete measure_ssd;
+    if (measure_kld != nullptr)
+        delete measure_kld;
+    if (measure_dti != nullptr)
+        delete measure_dti;
+    if (measure_lncc != nullptr)
+        delete measure_lncc;
+    if (measure_mind != nullptr)
+        delete measure_mind;
+    if (measure_mindssc != nullptr)
+        delete measure_mindssc;
+
+    delete platform;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::~reg_base");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-//template<class T>
-//void reg_base<T>::setPlaform(Platform* inputPlatform)
-//{
-//    this->platform = inputPlatform;
-//}
-/* *************************************************************** */
-//template<class T>
-//Platform* reg_base<T>::getPlaform()
-//{
-//    return this->platform;
-//}
-/* *************************************************************** */
-//template<class T>
-//void reg_base<T>::setPlatformCode(int inputPlatformCode) {
-//    this->platformCode = inputPlatformCode;
-//}
-/* *************************************************************** */
-//template<class T>
-//void reg_base<T>::setGpuIdx(unsigned inputGPUIdx) {
-//    this->gpuIdx = inputGPUIdx;
-//}
-/* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceImage(nifti_image *r) {
-    this->inputReference = r;
+    inputReference = r;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceImage");
 #endif
@@ -236,7 +212,7 @@ void reg_base<T>::SetReferenceImage(nifti_image *r) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingImage(nifti_image *f) {
-    this->inputFloating = f;
+    inputFloating = f;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingImage");
 #endif
@@ -244,7 +220,7 @@ void reg_base<T>::SetFloatingImage(nifti_image *f) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetMaximalIterationNumber(unsigned int iter) {
-    this->maxIterationNumber = iter;
+    maxIterationNumber = iter;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetMaximalIterationNumber");
 #endif
@@ -252,7 +228,7 @@ void reg_base<T>::SetMaximalIterationNumber(unsigned int iter) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceMask(nifti_image *m) {
-    this->maskImage = m;
+    maskImage = m;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceMask");
 #endif
@@ -260,7 +236,7 @@ void reg_base<T>::SetReferenceMask(nifti_image *m) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetAffineTransformation(mat44 *a) {
-    this->affineTransformation = a;
+    affineTransformation = a;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
 #endif
@@ -268,7 +244,7 @@ void reg_base<T>::SetAffineTransformation(mat44 *a) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceSmoothingSigma(T s) {
-    this->referenceSmoothingSigma = s;
+    referenceSmoothingSigma = s;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceSmoothingSigma");
 #endif
@@ -276,7 +252,7 @@ void reg_base<T>::SetReferenceSmoothingSigma(T s) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingSmoothingSigma(T s) {
-    this->floatingSmoothingSigma = s;
+    floatingSmoothingSigma = s;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingSmoothingSigma");
 #endif
@@ -284,7 +260,7 @@ void reg_base<T>::SetFloatingSmoothingSigma(T s) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t) {
-    this->referenceThresholdUp[i] = t;
+    referenceThresholdUp[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceThresholdUp");
 #endif
@@ -292,7 +268,7 @@ void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t) {
-    this->referenceThresholdLow[i] = t;
+    referenceThresholdLow[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceThresholdLow");
 #endif
@@ -300,7 +276,7 @@ void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t) {
-    this->floatingThresholdUp[i] = t;
+    floatingThresholdUp[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingThresholdUp");
 #endif
@@ -308,7 +284,7 @@ void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t) {
-    this->floatingThresholdLow[i] = t;
+    floatingThresholdLow[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingThresholdLow");
 #endif
@@ -316,7 +292,7 @@ void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t) {
 /* *************************************************************** */
 template <class T>
 void reg_base<T>::UseRobustRange() {
-    this->robustRange = true;
+    robustRange = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseRobustRange");
 #endif
@@ -324,15 +300,15 @@ void reg_base<T>::UseRobustRange() {
 /* *************************************************************** */
 template <class T>
 void reg_base<T>::DoNotUseRobustRange() {
-    this->robustRange = false;
+    robustRange = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseRobustRange");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetWarpedPaddingValue(T p) {
-    this->warpedPaddingValue = p;
+void reg_base<T>::SetWarpedPaddingValue(float p) {
+    warpedPaddingValue = p;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetWarpedPaddingValue");
 #endif
@@ -340,7 +316,7 @@ void reg_base<T>::SetWarpedPaddingValue(T p) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLevelNumber(unsigned int l) {
-    this->levelNumber = l;
+    levelNumber = l;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelNumber");
 #endif
@@ -348,7 +324,7 @@ void reg_base<T>::SetLevelNumber(unsigned int l) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLevelToPerform(unsigned int l) {
-    this->levelToPerform = l;
+    levelToPerform = l;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
 #endif
@@ -356,7 +332,7 @@ void reg_base<T>::SetLevelToPerform(unsigned int l) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetGradientSmoothingSigma(T g) {
-    this->gradientSmoothingSigma = g;
+    gradientSmoothingSigma = g;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetGradientSmoothingSigma");
 #endif
@@ -364,7 +340,7 @@ void reg_base<T>::SetGradientSmoothingSigma(T g) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseConjugateGradient() {
-    this->useConjGradient = true;
+    useConjGradient = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseConjugateGradient");
 #endif
@@ -372,7 +348,7 @@ void reg_base<T>::UseConjugateGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUseConjugateGradient() {
-    this->useConjGradient = false;
+    useConjGradient = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::DoNotUseConjugateGradient");
 #endif
@@ -380,7 +356,7 @@ void reg_base<T>::DoNotUseConjugateGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseApproximatedGradient() {
-    this->useApproxGradient = true;
+    useApproxGradient = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseApproximatedGradient");
 #endif
@@ -388,7 +364,7 @@ void reg_base<T>::UseApproximatedGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUseApproximatedGradient() {
-    this->useApproxGradient = false;
+    useApproxGradient = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::DoNotUseApproximatedGradient");
 #endif
@@ -396,7 +372,7 @@ void reg_base<T>::DoNotUseApproximatedGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::PrintOutInformation() {
-    this->verbose = true;
+    verbose = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::PrintOutInformation");
 #endif
@@ -404,7 +380,7 @@ void reg_base<T>::PrintOutInformation() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotPrintOutInformation() {
-    this->verbose = false;
+    verbose = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::DoNotPrintOutInformation");
 #endif
@@ -412,7 +388,7 @@ void reg_base<T>::DoNotPrintOutInformation() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUsePyramidalApproach() {
-    this->usePyramid = false;
+    usePyramid = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::DoNotUsePyramidalApproach");
 #endif
@@ -420,7 +396,7 @@ void reg_base<T>::DoNotUsePyramidalApproach() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNearestNeighborInterpolation() {
-    this->interpolation = 0;
+    interpolation = 0;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseNearestNeighborInterpolation");
 #endif
@@ -428,7 +404,7 @@ void reg_base<T>::UseNearestNeighborInterpolation() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseLinearInterpolation() {
-    this->interpolation = 1;
+    interpolation = 1;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseLinearInterpolation");
 #endif
@@ -436,7 +412,7 @@ void reg_base<T>::UseLinearInterpolation() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseCubicSplineInterpolation() {
-    this->interpolation = 3;
+    interpolation = 3;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseCubicSplineInterpolation");
 #endif
@@ -444,201 +420,201 @@ void reg_base<T>::UseCubicSplineInterpolation() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w) {
-    this->landmarkRegNumber = n;
-    this->landmarkReference = r;
-    this->landmarkFloating = f;
-    this->landmarkRegWeight = w;
+    landmarkRegNumber = n;
+    landmarkReference = r;
+    landmarkFloating = f;
+    landmarkRegWeight = w;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLandmarkRegularisationParam");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
-void reg_base<T>::ClearCurrentInputImage() {
-    this->currentReference = nullptr;
-    this->currentMask = nullptr;
-    this->currentFloating = nullptr;
-    if (this->localWeightSimCurrent != nullptr)
-        nifti_image_free(this->localWeightSimCurrent);
-    this->localWeightSimCurrent = nullptr;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ClearCurrentInputImage");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::AllocateWarped() {
-    if (this->currentReference == nullptr) {
-        reg_print_fct_error("reg_base::AllocateWarped()");
-        reg_print_msg_error("The reference image is not defined");
-        reg_exit();
-    }
-    reg_base<T>::ClearWarped();
-    this->warped = nifti_copy_nim_info(this->currentReference);
-    this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim;
-    this->warped->dim[4] = this->warped->nt = this->currentFloating->nt;
-    this->warped->pixdim[4] = this->warped->dt = 1;
-    this->warped->nvox = (size_t)(this->warped->nx * this->warped->ny * this->warped->nz * this->warped->nt);
-    this->warped->scl_slope = 1;
-    this->warped->scl_inter = 0;
-    this->warped->datatype = this->currentFloating->datatype;
-    this->warped->nbyper = this->currentFloating->nbyper;
-    this->warped->data = (void*)calloc(this->warped->nvox, this->warped->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::AllocateWarped");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::ClearWarped() {
-    if (this->warped != nullptr)
-        nifti_image_free(this->warped);
-    this->warped = nullptr;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ClearWarped");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::AllocateDeformationField() {
-    if (this->currentReference == nullptr) {
-        reg_print_fct_error("reg_base::AllocateDeformationField()");
-        reg_print_msg_error("The reference image is not defined");
-        reg_exit();
-    }
-    reg_base<T>::ClearDeformationField();
-    this->deformationFieldImage = nifti_copy_nim_info(this->currentReference);
-    this->deformationFieldImage->dim[0] = this->deformationFieldImage->ndim = 5;
-    this->deformationFieldImage->dim[1] = this->deformationFieldImage->nx = this->currentReference->nx;
-    this->deformationFieldImage->dim[2] = this->deformationFieldImage->ny = this->currentReference->ny;
-    this->deformationFieldImage->dim[3] = this->deformationFieldImage->nz = this->currentReference->nz;
-    this->deformationFieldImage->dim[4] = this->deformationFieldImage->nt = 1;
-    this->deformationFieldImage->pixdim[4] = this->deformationFieldImage->dt = 1.0;
-    if (this->currentReference->nz == 1)
-        this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 2;
-    else this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 3;
-    this->deformationFieldImage->pixdim[5] = this->deformationFieldImage->du = 1.0;
-    this->deformationFieldImage->dim[6] = this->deformationFieldImage->nv = 1;
-    this->deformationFieldImage->pixdim[6] = this->deformationFieldImage->dv = 1.0;
-    this->deformationFieldImage->dim[7] = this->deformationFieldImage->nw = 1;
-    this->deformationFieldImage->pixdim[7] = this->deformationFieldImage->dw = 1.0;
-    this->deformationFieldImage->nvox =
-        (size_t)this->deformationFieldImage->nx *
-        (size_t)this->deformationFieldImage->ny *
-        (size_t)this->deformationFieldImage->nz *
-        (size_t)this->deformationFieldImage->nt *
-        (size_t)this->deformationFieldImage->nu;
-    this->deformationFieldImage->nbyper = sizeof(T);
-    if (sizeof(T) == sizeof(float))
-        this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
-    else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64;
-    this->deformationFieldImage->data = (void*)calloc(this->deformationFieldImage->nvox,
-                                                      this->deformationFieldImage->nbyper);
-    this->deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
-    memset(this->deformationFieldImage->intent_name, 0, 16);
-    strcpy(this->deformationFieldImage->intent_name, "NREG_TRANS");
-    this->deformationFieldImage->intent_p1 = DEF_FIELD;
-    this->deformationFieldImage->scl_slope = 1;
-    this->deformationFieldImage->scl_inter = 0;
-
-    if (this->measure_dti != nullptr)
-        this->forwardJacobianMatrix = (mat33*)malloc(this->deformationFieldImage->nx * this->deformationFieldImage->ny *
-                                                     this->deformationFieldImage->nz * sizeof(mat33));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::AllocateDeformationField");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::ClearDeformationField() {
-    if (this->deformationFieldImage != nullptr) {
-        nifti_image_free(this->deformationFieldImage);
-        this->deformationFieldImage = nullptr;
-    }
-    if (this->forwardJacobianMatrix != nullptr)
-        free(this->forwardJacobianMatrix);
-    this->forwardJacobianMatrix = nullptr;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ClearDeformationField");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::AllocateWarpedGradient() {
-    if (this->deformationFieldImage == nullptr) {
-        reg_print_fct_error("reg_base::AllocateWarpedGradient()");
-        reg_print_msg_error("The deformation field image is not defined");
-        reg_exit();
-    }
-    reg_base<T>::ClearWarpedGradient();
-    this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage);
-    this->warImgGradient->data = (void*)calloc(this->warImgGradient->nvox,
-                                                this->warImgGradient->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::AllocateWarpedGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::ClearWarpedGradient() {
-    if (this->warImgGradient != nullptr) {
-        nifti_image_free(this->warImgGradient);
-        this->warImgGradient = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ClearWarpedGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::AllocateVoxelBasedMeasureGradient() {
-    if (this->deformationFieldImage == nullptr) {
-        reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
-        reg_print_msg_error("The deformation field image is not defined");
-        reg_exit();
-    }
-    reg_base<T>::ClearVoxelBasedMeasureGradient();
-    this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage);
-    this->voxelBasedMeasureGradient->data = (void*)calloc(this->voxelBasedMeasureGradient->nvox,
-                                                           this->voxelBasedMeasureGradient->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::AllocateVoxelBasedMeasureGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_base<T>::ClearVoxelBasedMeasureGradient() {
-    if (this->voxelBasedMeasureGradient != nullptr) {
-        nifti_image_free(this->voxelBasedMeasureGradient);
-        this->voxelBasedMeasureGradient = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ClearVoxelBasedMeasureGradient");
-#endif
-}
+// template <class T>
+// void reg_base<T>::DeallocateCurrentInputImage() {
+//     reference = nullptr;
+//     currentMask = nullptr;
+//     floating = nullptr;
+//     if (localWeightSimCurrent != nullptr)
+//         nifti_image_free(localWeightSimCurrent);
+//     localWeightSimCurrent = nullptr;
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::DeallocateCurrentInputImage");
+// #endif
+// }
+/* *************************************************************** */
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::AllocateWarped() {
+//     if (reference == nullptr) {
+//         reg_print_fct_error("reg_base::AllocateWarped()");
+//         reg_print_msg_error("The reference image is not defined");
+//         reg_exit();
+//     }
+//     reg_base<T>::DeallocateWarped();
+//     warped = nifti_copy_nim_info(reference);
+//     warped->dim[0] = warped->ndim = floating->ndim;
+//     warped->dim[4] = warped->nt = floating->nt;
+//     warped->pixdim[4] = warped->dt = 1;
+//     warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt);
+//     warped->scl_slope = 1;
+//     warped->scl_inter = 0;
+//     warped->datatype = floating->datatype;
+//     warped->nbyper = floating->nbyper;
+//     warped->data = (void*)calloc(warped->nvox, warped->nbyper);
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::AllocateWarped");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::DeallocateWarped() {
+//     if (warped != nullptr)
+//         nifti_image_free(warped);
+//     warped = nullptr;
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::DeallocateWarped");
+// #endif
+// }
+/* *************************************************************** */
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::AllocateDeformationField() {
+//     if (reference == nullptr) {
+//         reg_print_fct_error("reg_base::AllocateDeformationField()");
+//         reg_print_msg_error("The reference image is not defined");
+//         reg_exit();
+//     }
+//     reg_base<T>::DeallocateDeformationField();
+//     deformationFieldImage = nifti_copy_nim_info(reference);
+//     deformationFieldImage->dim[0] = deformationFieldImage->ndim = 5;
+//     // deformationFieldImage->dim[1] = deformationFieldImage->nx;
+//     // deformationFieldImage->dim[2] = deformationFieldImage->ny;
+//     // deformationFieldImage->dim[3] = deformationFieldImage->nz;
+//     deformationFieldImage->dim[4] = deformationFieldImage->nt = 1;
+//     deformationFieldImage->pixdim[4] = deformationFieldImage->dt = 1.0;
+//     if (reference->nz == 1)
+//         deformationFieldImage->dim[5] = deformationFieldImage->nu = 2;
+//     else deformationFieldImage->dim[5] = deformationFieldImage->nu = 3;
+//     deformationFieldImage->pixdim[5] = deformationFieldImage->du = 1.0;
+//     deformationFieldImage->dim[6] = deformationFieldImage->nv = 1;
+//     deformationFieldImage->pixdim[6] = deformationFieldImage->dv = 1.0;
+//     deformationFieldImage->dim[7] = deformationFieldImage->nw = 1;
+//     deformationFieldImage->pixdim[7] = deformationFieldImage->dw = 1.0;
+//     deformationFieldImage->nvox =
+//         (size_t)deformationFieldImage->nx *
+//         (size_t)deformationFieldImage->ny *
+//         (size_t)deformationFieldImage->nz *
+//         (size_t)deformationFieldImage->nt *
+//         (size_t)deformationFieldImage->nu;
+//     deformationFieldImage->nbyper = sizeof(T);
+//     if (sizeof(T) == sizeof(float))
+//         deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
+//     else deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64;
+//     deformationFieldImage->data = (void*)calloc(deformationFieldImage->nvox,
+//                                                 deformationFieldImage->nbyper);
+//     deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+//     memset(deformationFieldImage->intent_name, 0, 16);
+//     strcpy(deformationFieldImage->intent_name, "NREG_TRANS");
+//     deformationFieldImage->intent_p1 = DEF_FIELD;
+//     deformationFieldImage->scl_slope = 1;
+//     deformationFieldImage->scl_inter = 0;
+
+//     if (measure_dti != nullptr)
+//         forwardJacobianMatrix = (mat33*)malloc(deformationFieldImage->nx * deformationFieldImage->ny *
+//                                                deformationFieldImage->nz * sizeof(mat33));
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::AllocateDeformationField");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::DeallocateDeformationField() {
+//     if (deformationFieldImage != nullptr) {
+//         nifti_image_free(deformationFieldImage);
+//         deformationFieldImage = nullptr;
+//     }
+//     if (forwardJacobianMatrix != nullptr)
+//         free(forwardJacobianMatrix);
+//     forwardJacobianMatrix = nullptr;
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::DeallocateDeformationField");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::AllocateWarpedGradient() {
+//     if (deformationFieldImage == nullptr) {
+//         reg_print_fct_error("reg_base::AllocateWarpedGradient()");
+//         reg_print_msg_error("The deformation field image is not defined");
+//         reg_exit();
+//     }
+//     reg_base<T>::DeallocateWarpedGradient();
+//     warpedGradient = nifti_copy_nim_info(deformationFieldImage);
+//     warpedGradient->data = (void*)calloc(warpedGradient->nvox,
+//                                          warpedGradient->nbyper);
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::AllocateWarpedGradient");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::DeallocateWarpedGradient() {
+//     if (warpedGradient != nullptr) {
+//         nifti_image_free(warpedGradient);
+//         warpedGradient = nullptr;
+//     }
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::DeallocateWarpedGradient");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::AllocateVoxelBasedMeasureGradient() {
+//     if (deformationFieldImage == nullptr) {
+//         reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
+//         reg_print_msg_error("The deformation field image is not defined");
+//         reg_exit();
+//     }
+//     reg_base<T>::DeallocateVoxelBasedMeasureGradient();
+//     voxelBasedMeasureGradient = nifti_copy_nim_info(deformationFieldImage);
+//     voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox,
+//                                                     voxelBasedMeasureGradient->nbyper);
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::AllocateVoxelBasedMeasureGradient");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_base<T>::DeallocateVoxelBasedMeasureGradient() {
+//     if (voxelBasedMeasureGradient != nullptr) {
+//         nifti_image_free(voxelBasedMeasureGradient);
+//         voxelBasedMeasureGradient = nullptr;
+//     }
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_base<T>::DeallocateVoxelBasedMeasureGradient");
+// #endif
+// }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::CheckParameters() {
     // CHECK THAT BOTH INPUT IMAGES ARE DEFINED
-    if (this->inputReference == nullptr) {
+    if (inputReference == nullptr) {
         reg_print_fct_error("reg_base::CheckParameters()");
         reg_print_msg_error("The reference image is not defined");
         reg_exit();
     }
-    if (this->inputFloating == nullptr) {
+    if (inputFloating == nullptr) {
         reg_print_fct_error("reg_base::CheckParameters()");
         reg_print_msg_error("The floating image is not defined");
         reg_exit();
     }
 
     // CHECK THE MASK DIMENSION IF IT IS DEFINED
-    if (this->maskImage != nullptr) {
-        if (this->inputReference->nx != this->maskImage->nx ||
-            this->inputReference->ny != this->maskImage->ny ||
-            this->inputReference->nz != this->maskImage->nz) {
+    if (maskImage != nullptr) {
+        if (inputReference->nx != maskImage->nx ||
+            inputReference->ny != maskImage->ny ||
+            inputReference->nz != maskImage->nz) {
             reg_print_fct_error("reg_base::CheckParameters()");
             reg_print_msg_error("The reference and mask images have different dimension");
             reg_exit();
@@ -646,24 +622,24 @@ void reg_base<T>::CheckParameters() {
     }
 
     // CHECK THE NUMBER OF LEVEL TO PERFORM
-    if (this->levelToPerform > 0) {
-        this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber;
-    } else this->levelToPerform = this->levelNumber;
-    if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber)
-        this->levelToPerform = this->levelNumber;
+    if (levelToPerform > 0) {
+        levelToPerform = levelToPerform < levelNumber ? levelToPerform : levelNumber;
+    } else levelToPerform = levelNumber;
+    if (levelToPerform == 0 || levelToPerform > levelNumber)
+        levelToPerform = levelNumber;
 
     // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-    if (this->measure_nmi == nullptr &&
-        this->measure_ssd == nullptr &&
-        this->measure_dti == nullptr &&
-        this->measure_lncc == nullptr &&
-        this->measure_lncc == nullptr &&
-        this->measure_kld == nullptr &&
-        this->measure_mind == nullptr &&
-        this->measure_mindssc == nullptr) {
-        this->measure_nmi = new reg_nmi;
-        for (int i = 0; i < this->inputReference->nt; ++i)
-            this->measure_nmi->SetTimepointWeight(i, 1.0);
+    if (measure_nmi == nullptr &&
+        measure_ssd == nullptr &&
+        measure_dti == nullptr &&
+        measure_lncc == nullptr &&
+        measure_lncc == nullptr &&
+        measure_kld == nullptr &&
+        measure_mind == nullptr &&
+        measure_mindssc == nullptr) {
+        measure_nmi = new reg_nmi;
+        for (int i = 0; i < inputReference->nt; ++i)
+            measure_nmi->SetTimepointWeight(i, 1.0);
     }
 
     // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS)
@@ -674,19 +650,19 @@ void reg_base<T>::CheckParameters() {
     // NOTE - DTI currently ignored as needs fixing
     //
     // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting
-    if (this->measure_mind == nullptr && this->measure_mindssc == nullptr) {
-        if (this->inputFloating->nt != this->inputReference->nt) {
+    if (measure_mind == nullptr && measure_mindssc == nullptr) {
+        if (inputFloating->nt != inputReference->nt) {
             reg_print_fct_error("reg_base::CheckParameters()");
             reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
             reg_exit();
         }
-        double *chanWeightSum = new double[this->inputReference->nt]();
+        double *chanWeightSum = new double[inputReference->nt]();
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
-        if (this->measure_nmi != nullptr) {
-            nmiWeights = this->measure_nmi->GetTimepointsWeights();
+        if (measure_nmi != nullptr) {
+            nmiWeights = measure_nmi->GetTimepointsWeights();
             simWeightSum = 0.0;
-            for (int n = 0; n < this->inputReference->nt; n++) {
+            for (int n = 0; n < inputReference->nt; n++) {
                 if (nmiWeights[n] < 0) {
                     char text[255];
                     sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n);
@@ -703,10 +679,10 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (this->measure_ssd != nullptr) {
-            ssdWeights = this->measure_ssd->GetTimepointsWeights();
+        if (measure_ssd != nullptr) {
+            ssdWeights = measure_ssd->GetTimepointsWeights();
             simWeightSum = 0.0;
-            for (int n = 0; n < this->inputReference->nt; n++) {
+            for (int n = 0; n < inputReference->nt; n++) {
                 if (ssdWeights[n] < 0) {
                     char text[255];
                     sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n);
@@ -723,10 +699,10 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (this->measure_kld != nullptr) {
-            kldWeights = this->measure_kld->GetTimepointsWeights();
+        if (measure_kld != nullptr) {
+            kldWeights = measure_kld->GetTimepointsWeights();
             simWeightSum = 0.0;
-            for (int n = 0; n < this->inputReference->nt; n++) {
+            for (int n = 0; n < inputReference->nt; n++) {
                 if (kldWeights[n] < 0) {
                     char text[255];
                     sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n);
@@ -743,10 +719,10 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (this->measure_lncc != nullptr) {
-            lnccWeights = this->measure_lncc->GetTimepointsWeights();
+        if (measure_lncc != nullptr) {
+            lnccWeights = measure_lncc->GetTimepointsWeights();
             simWeightSum = 0.0;
-            for (int n = 0; n < this->inputReference->nt; n++) {
+            for (int n = 0; n < inputReference->nt; n++) {
                 if (lnccWeights[n] < 0) {
                     char text[255];
                     sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n);
@@ -763,21 +739,21 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        for (int n = 0; n < this->inputReference->nt; n++) {
+        for (int n = 0; n < inputReference->nt; n++) {
             if (chanWeightSum[n] == 0) {
                 char text[255];
                 sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n);
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn(text);
             }
-            if (this->measure_nmi != nullptr)
-                this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
-            if (this->measure_ssd != nullptr)
-                this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
-            if (this->measure_kld != nullptr)
-                this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
-            if (this->measure_lncc != nullptr)
-                this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
+            if (measure_nmi != nullptr)
+                measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
+            if (measure_ssd != nullptr)
+                measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
+            if (measure_kld != nullptr)
+                measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
+            if (measure_lncc != nullptr)
+                measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
         }
         delete[] chanWeightSum;
     }
@@ -789,92 +765,70 @@ void reg_base<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::InitialiseSimilarity() {
-
-    if (this->localWeightSimInput != nullptr) {
-        if (this->localWeightSimCurrent != nullptr)
-            nifti_image_free(this->localWeightSimCurrent);
-        this->localWeightSimCurrent = nifti_copy_nim_info(this->currentReference);
-        this->localWeightSimCurrent->dim[0] = this->localWeightSimCurrent->ndim = this->localWeightSimInput->dim[0];
-        this->localWeightSimCurrent->dim[4] = this->localWeightSimCurrent->nt = this->localWeightSimInput->dim[4];
-        this->localWeightSimCurrent->dim[5] = this->localWeightSimCurrent->nu = this->localWeightSimInput->dim[5];
-        this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx *
-            this->localWeightSimCurrent->ny * this->localWeightSimCurrent->nz *
-            this->localWeightSimCurrent->nt * this->localWeightSimCurrent->nu;
-        this->localWeightSimCurrent->data = (void*)malloc(this->localWeightSimCurrent->nvox *
-                                                           this->localWeightSimCurrent->nbyper);
-        reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, this->voxelBasedMeasureGradient, 0);
-        reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient);
-        reg_tools_changeDatatype<T>(localWeightSimInput);
-        reg_resampleImage(this->localWeightSimInput,
-                          this->localWeightSimCurrent,
-                          this->voxelBasedMeasureGradient,
-                          nullptr,
-                          1,
-                          0);
-    } else this->localWeightSimCurrent = nullptr;
-
-    if (this->measure_nmi != nullptr)
-        this->measure_nmi->InitialiseMeasure(this->currentReference,
-                                             this->currentFloating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warImgGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent);
-
-    if (this->measure_ssd != nullptr)
-        this->measure_ssd->InitialiseMeasure(this->currentReference,
-                                             this->currentFloating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warImgGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent);
-
-    if (this->measure_kld != nullptr)
-        this->measure_kld->InitialiseMeasure(this->currentReference,
-                                             this->currentFloating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warImgGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent);
-
-    if (this->measure_lncc != nullptr)
-        this->measure_lncc->InitialiseMeasure(this->currentReference,
-                                              this->currentFloating,
-                                              this->currentMask,
-                                              this->warped,
-                                              this->warImgGradient,
-                                              this->voxelBasedMeasureGradient,
-                                              this->localWeightSimCurrent);
-
-    if (this->measure_dti != nullptr)
-        this->measure_dti->InitialiseMeasure(this->currentReference,
-                                             this->currentFloating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warImgGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent);
-
-    if (this->measure_mind != nullptr)
-        this->measure_mind->InitialiseMeasure(this->currentReference,
-                                              this->currentFloating,
-                                              this->currentMask,
-                                              this->warped,
-                                              this->warImgGradient,
-                                              this->voxelBasedMeasureGradient,
-                                              this->localWeightSimCurrent);
-
-    if (this->measure_mindssc != nullptr)
-        this->measure_mindssc->InitialiseMeasure(this->currentReference,
-                                                 this->currentFloating,
-                                                 this->currentMask,
-                                                 this->warped,
-                                                 this->warImgGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 this->localWeightSimCurrent);
+    // TODO Update this section to handle CUDA
+    // TODO Move this function to reg_f3d
+    if (measure_nmi != nullptr)
+        measure_nmi->InitialiseMeasure(con->GetReference(),
+                                       con->GetFloating(),
+                                       con->GetReferenceMask(),
+                                       con->GetWarped(),
+                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_ssd != nullptr)
+        measure_ssd->InitialiseMeasure(con->GetReference(),
+                                       con->GetFloating(),
+                                       con->GetReferenceMask(),
+                                       con->GetWarped(),
+                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_kld != nullptr)
+        measure_kld->InitialiseMeasure(con->GetReference(),
+                                       con->GetFloating(),
+                                       con->GetReferenceMask(),
+                                       con->GetWarped(),
+                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_lncc != nullptr)
+        measure_lncc->InitialiseMeasure(con->GetReference(),
+                                        con->GetFloating(),
+                                        con->GetReferenceMask(),
+                                        con->GetWarped(),
+                                        dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                        dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                        dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_dti != nullptr)
+        measure_dti->InitialiseMeasure(con->GetReference(),
+                                       con->GetFloating(),
+                                       con->GetReferenceMask(),
+                                       con->GetWarped(),
+                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_mind != nullptr)
+        measure_mind->InitialiseMeasure(con->GetReference(),
+                                        con->GetFloating(),
+                                        con->GetReferenceMask(),
+                                        con->GetWarped(),
+                                        dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                        dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                        dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+
+    if (measure_mindssc != nullptr)
+        measure_mindssc->InitialiseMeasure(con->GetReference(),
+                                           con->GetFloating(),
+                                           con->GetReferenceMask(),
+                                           con->GetWarped(),
+                                           dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
+                                           dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
+                                           dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::InitialiseSimilarity");
@@ -882,111 +836,110 @@ void reg_base<T>::InitialiseSimilarity() {
 }
 /* *************************************************************** */
 template<class T>
-   //PLATFORM
-//   this->platform = new Platform(this->platformCode);
-//   this->platform->setGpuIdx(this->gpuIdx);
 void reg_base<T>::Initialise() {
-    if (this->initialised) return;
+    if (initialised) return;
 
-    this->CheckParameters();
+    CheckParameters();
 
+    platform = new Platform(platformCode);
+    platform->SetGpuIdx(gpuIdx);
 
     // CREATE THE PYRAMIDE IMAGES
-    if (this->usePyramid) {
-        this->referencePyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*));
-        this->floatingPyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*));
-        this->maskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
-        this->activeVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int));
+    if (usePyramid) {
+        referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
+        floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
+        maskPyramid = (int**)malloc(levelToPerform * sizeof(int*));
+        activeVoxelNumber = (int*)malloc(levelToPerform * sizeof(int));
     } else {
-        this->referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*));
-        this->floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*));
-        this->maskPyramid = (int**)malloc(sizeof(int*));
-        this->activeVoxelNumber = (int*)malloc(sizeof(int));
+        referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*));
+        floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*));
+        maskPyramid = (int**)malloc(sizeof(int*));
+        activeVoxelNumber = (int*)malloc(sizeof(int));
     }
 
     // Update the input images threshold if required
-    if (this->robustRange == true) {
+    if (robustRange) {
         // Create a copy of the reference image to extract the robust range
-        nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference);
+        nifti_image *temp_reference = nifti_copy_nim_info(inputReference);
         temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper);
-        memcpy(temp_reference->data, this->inputReference->data, temp_reference->nvox * temp_reference->nbyper);
+        memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper);
         reg_tools_changeDatatype<T>(temp_reference);
         // Extract the robust range of the reference image
         T *refDataPtr = static_cast<T *>(temp_reference->data);
         reg_heapSort(refDataPtr, temp_reference->nvox);
         // Update the reference threshold values if no value has been setup by the user
-        if (this->referenceThresholdLow[0] == -std::numeric_limits<T>::max())
-            this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)];
-        if (this->referenceThresholdUp[0] == std::numeric_limits<T>::max())
-            this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)];
-        // Free the temporarly allocated image
+        if (referenceThresholdLow[0] == -std::numeric_limits<T>::max())
+            referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)];
+        if (referenceThresholdUp[0] == std::numeric_limits<T>::max())
+            referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)];
+        // Free the temporarily allocated image
         nifti_image_free(temp_reference);
 
         // Create a copy of the floating image to extract the robust range
-        nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating);
+        nifti_image *temp_floating = nifti_copy_nim_info(inputFloating);
         temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper);
-        memcpy(temp_floating->data, this->inputFloating->data, temp_floating->nvox * temp_floating->nbyper);
+        memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper);
         reg_tools_changeDatatype<T>(temp_floating);
         // Extract the robust range of the floating image
         T *floDataPtr = static_cast<T *>(temp_floating->data);
         reg_heapSort(floDataPtr, temp_floating->nvox);
         // Update the floating threshold values if no value has been setup by the user
-        if (this->floatingThresholdLow[0] == -std::numeric_limits<T>::max())
-            this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)];
-        if (this->floatingThresholdUp[0] == std::numeric_limits<T>::max())
-            this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)];
-        // Free the temporarly allocated image
+        if (floatingThresholdLow[0] == -std::numeric_limits<T>::max())
+            floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)];
+        if (floatingThresholdUp[0] == std::numeric_limits<T>::max())
+            floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)];
+        // Free the temporarily allocated image
         nifti_image_free(temp_floating);
     }
 
     // FINEST LEVEL OF REGISTRATION
-    if (this->usePyramid) {
-        reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform);
-        reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform);
-        if (this->maskImage != nullptr)
-            reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber);
+    if (usePyramid) {
+        reg_createImagePyramid<T>(inputReference, referencePyramid, levelNumber, levelToPerform);
+        reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelNumber, levelToPerform);
+        if (maskImage != nullptr)
+            reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber);
         else {
-            for (unsigned int l = 0; l < this->levelToPerform; ++l) {
-                this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
-                this->maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int));
+            for (unsigned int l = 0; l < levelToPerform; ++l) {
+                activeVoxelNumber[l] = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz;
+                maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int));
             }
         }
     } else {
-        reg_createImagePyramid<T>(this->inputReference, this->referencePyramid, 1, 1);
-        reg_createImagePyramid<T>(this->inputFloating, this->floatingPyramid, 1, 1);
-        if (this->maskImage != nullptr)
-            reg_createMaskPyramid<T>(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber);
+        reg_createImagePyramid<T>(inputReference, referencePyramid, 1, 1);
+        reg_createImagePyramid<T>(inputFloating, floatingPyramid, 1, 1);
+        if (maskImage != nullptr)
+            reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1, activeVoxelNumber);
         else {
-            this->activeVoxelNumber[0] = this->referencePyramid[0]->nx * this->referencePyramid[0]->ny * this->referencePyramid[0]->nz;
-            this->maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int));
+            activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz;
+            maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int));
         }
     }
 
     unsigned int pyramidalLevelNumber = 1;
-    if (this->usePyramid) pyramidalLevelNumber = this->levelToPerform;
+    if (usePyramid) pyramidalLevelNumber = levelToPerform;
 
     // SMOOTH THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < this->levelToPerform; l++) {
-        if (this->referenceSmoothingSigma != 0.0) {
-            bool *active = new bool[this->referencePyramid[l]->nt];
-            float *sigma = new float[this->referencePyramid[l]->nt];
+    for (unsigned int l = 0; l < levelToPerform; l++) {
+        if (referenceSmoothingSigma != 0.0) {
+            bool *active = new bool[referencePyramid[l]->nt];
+            float *sigma = new float[referencePyramid[l]->nt];
             active[0] = true;
-            for (int i = 1; i < this->referencePyramid[l]->nt; ++i)
+            for (int i = 1; i < referencePyramid[l]->nt; ++i)
                 active[i] = false;
-            sigma[0] = this->referenceSmoothingSigma;
-            reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
+            sigma[0] = referenceSmoothingSigma;
+            reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
             delete[]active;
             delete[]sigma;
         }
-        if (this->floatingSmoothingSigma != 0.0) {
+        if (floatingSmoothingSigma != 0.0) {
             // Only the first image is smoothed
-            bool *active = new bool[this->floatingPyramid[l]->nt];
-            float *sigma = new float[this->floatingPyramid[l]->nt];
+            bool *active = new bool[floatingPyramid[l]->nt];
+            float *sigma = new float[floatingPyramid[l]->nt];
             active[0] = true;
-            for (int i = 1; i < this->floatingPyramid[l]->nt; ++i)
+            for (int i = 1; i < floatingPyramid[l]->nt; ++i)
                 active[i] = false;
-            sigma[0] = this->floatingSmoothingSigma;
-            reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
+            sigma[0] = floatingSmoothingSigma;
+            reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
             delete[]active;
             delete[]sigma;
         }
@@ -994,11 +947,11 @@ void reg_base<T>::Initialise() {
 
     // THRESHOLD THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < pyramidalLevelNumber; l++) {
-        reg_thresholdImage<T>(this->referencePyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
-        reg_thresholdImage<T>(this->floatingPyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]);
+        reg_thresholdImage<T>(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
+        reg_thresholdImage<T>(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
     }
 
-    this->initialised = true;
+    initialised = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::Initialise");
 #endif
@@ -1006,104 +959,86 @@ void reg_base<T>::Initialise() {
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_base<T>::SetOptimiser() {
-    if (this->useConjGradient)
-        this->optimiser = new reg_conjugateGradient<T>();
-    else this->optimiser = new reg_optimiser<T>();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetOptimiser");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
 double reg_base<T>::ComputeSimilarityMeasure() {
-    double measure = 0.;
-    if (this->measure_nmi != nullptr)
-        measure += this->measure_nmi->GetSimilarityMeasureValue();
+    double measure = 0;
+    if (measure_nmi != nullptr)
+        measure += measure_nmi->GetSimilarityMeasureValue();
 
-    if (this->measure_ssd != nullptr)
-        measure += this->measure_ssd->GetSimilarityMeasureValue();
+    if (measure_ssd != nullptr)
+        measure += measure_ssd->GetSimilarityMeasureValue();
 
-    if (this->measure_kld != nullptr)
-        measure += this->measure_kld->GetSimilarityMeasureValue();
+    if (measure_kld != nullptr)
+        measure += measure_kld->GetSimilarityMeasureValue();
 
-    if (this->measure_lncc != nullptr)
-        measure += this->measure_lncc->GetSimilarityMeasureValue();
+    if (measure_lncc != nullptr)
+        measure += measure_lncc->GetSimilarityMeasureValue();
 
-    if (this->measure_dti != nullptr)
-        measure += this->measure_dti->GetSimilarityMeasureValue();
+    if (measure_dti != nullptr)
+        measure += measure_dti->GetSimilarityMeasureValue();
 
-    if (this->measure_mind != nullptr)
-        measure += this->measure_mind->GetSimilarityMeasureValue();
+    if (measure_mind != nullptr)
+        measure += measure_mind->GetSimilarityMeasureValue();
 
-    if (this->measure_mindssc != nullptr)
-        measure += this->measure_mindssc->GetSimilarityMeasureValue();
+    if (measure_mindssc != nullptr)
+        measure += measure_mindssc->GetSimilarityMeasureValue();
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::ComputeSimilarityMeasure");
 #endif
-    return double(this->similarityWeight) * measure;
+    return double(similarityWeight) * measure;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_base<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
-    reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
-                                   this->voxelBasedMeasureGradient,
-                                   0.f);
+    // TODO Temporarily call F3dContent. This function will be moved to reg_f3d.
+    dynamic_cast<F3dContent*>(con)->ZeroVoxelBasedMeasureGradient();
 
     // The intensity gradient is first computed
-    //   if(this->measure_nmi!=nullptr || this->measure_ssd!=nullptr ||
-    //         this->measure_kld!=nullptr || this->measure_lncc!=nullptr ||
-    //         this->measure_dti!=nullptr)
+    //   if(measure_nmi!=nullptr || measure_ssd!=nullptr ||
+    //         measure_kld!=nullptr || measure_lncc!=nullptr ||
+    //         measure_dti!=nullptr)
     //   {
-    //    if(this->measure_dti!=nullptr){
-    //        reg_getImageGradient(this->currentFloating,
-    //                             this->warImgGradient,
-    //                             this->deformationFieldImage,
-    //                             this->currentMask,
-    //                             this->interpolation,
-    //                             this->warpedPaddingValue,
-    //                             this->measure_dti->GetActiveTimepoints(),
-    //		 					   this->forwardJacobianMatrix,
-    //							   this->warped);
+    //    if(measure_dti!=nullptr){
+    //        reg_getImageGradient(floating,
+    //                             warpedGradient,
+    //                             deformationFieldImage,
+    //                             currentMask,
+    //                             interpolation,
+    //                             warpedPaddingValue,
+    //                             measure_dti->GetActiveTimepoints(),
+    //		 					   forwardJacobianMatrix,
+    //							   warped);
     //    }
     //    else{
     //    }
     //   }
 
-    //   if(this->measure_dti!=nullptr)
-    //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
+    //   if(measure_dti!=nullptr)
+    //      measure_dti->GetVoxelBasedSimilarityMeasureGradient();
 
-    for (int t = 0; t < this->currentReference->nt; ++t) {
-        reg_getImageGradient(this->currentFloating,
-                             this->warImgGradient,
-                             this->deformationFieldImage,
-                             this->currentMask,
-                             this->interpolation,
-                             this->warpedPaddingValue,
-                             t);
+    for (int t = 0; t < con->Content::GetReference()->nt; ++t) {
+        compute->GetImageGradient(interpolation, warpedPaddingValue, t);
 
         // The gradient of the various measures of similarity are computed
-        if (this->measure_nmi != nullptr)
-            this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_nmi != nullptr)
+            measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (this->measure_ssd != nullptr)
-            this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_ssd != nullptr)
+            measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (this->measure_kld != nullptr)
-            this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_kld != nullptr)
+            measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (this->measure_lncc != nullptr)
-            this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_lncc != nullptr)
+            measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (this->measure_mind != nullptr)
-            this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_mind != nullptr)
+            measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (this->measure_mindssc != nullptr)
-            this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
+        if (measure_mindssc != nullptr)
+            measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
     }
 
 #ifndef NDEBUG
@@ -1115,28 +1050,28 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //template<class T>
 //void reg_base<T>::ApproximateParzenWindow()
 //{
-//    if(this->measure_nmi==nullptr)
-//        this->measure_nmi=new reg_nmi;
-//    this->measure_nmi=approxParzenWindow = true;
+//    if(measure_nmi==nullptr)
+//        measure_nmi=new reg_nmi;
+//    measure_nmi=approxParzenWindow = true;
 //}
 ///* *************************************************************** */
 //template<class T>
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
-//    if(this->measure_nmi==nullptr)
-//        this->measure_nmi=new reg_nmi;
-//    this->measure_nmi=approxParzenWindow = false;
+//    if(measure_nmi==nullptr)
+//        measure_nmi=new reg_nmi;
+//    measure_nmi=approxParzenWindow = false;
 //}
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
-    if (this->measure_nmi == nullptr)
-        this->measure_nmi = new reg_nmi;
-    this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-    // I am here adding 4 to the specified bin number to accomodate for
+    if (measure_nmi == nullptr)
+        measure_nmi = new reg_nmi;
+    measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
-    this->measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
+    measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseNMISetReferenceBinNumber");
 #endif
@@ -1144,12 +1079,12 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
-    if (this->measure_nmi == nullptr)
-        this->measure_nmi = new reg_nmi;
-    this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-    // I am here adding 4 to the specified bin number to accomodate for
+    if (measure_nmi == nullptr)
+        measure_nmi = new reg_nmi;
+    measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
-    this->measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
+    measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseNMISetFloatingBinNumber");
 #endif
@@ -1157,10 +1092,10 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseSSD(int timepoint, bool normalise) {
-    if (this->measure_ssd == nullptr)
-        this->measure_ssd = new reg_ssd();
-    this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-    this->measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
+    if (measure_ssd == nullptr)
+        measure_ssd = new reg_ssd();
+    measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseSSD");
 #endif
@@ -1168,10 +1103,10 @@ void reg_base<T>::UseSSD(int timepoint, bool normalise) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseMIND(int timepoint, int offset) {
-    if (this->measure_mind == nullptr)
-        this->measure_mind = new reg_mind;
-    this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
-    this->measure_mind->SetDescriptorOffset(offset);
+    if (measure_mind == nullptr)
+        measure_mind = new reg_mind;
+    measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mind->SetDescriptorOffset(offset);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseMIND");
 #endif
@@ -1179,10 +1114,10 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
-    if (this->measure_mindssc == nullptr)
-        this->measure_mindssc = new reg_mindssc;
-    this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
-    this->measure_mindssc->SetDescriptorOffset(offset);
+    if (measure_mindssc == nullptr)
+        measure_mindssc = new reg_mindssc;
+    measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mindssc->SetDescriptorOffset(offset);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseMINDSSC");
 #endif
@@ -1190,9 +1125,9 @@ void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseKLDivergence(int timepoint) {
-    if (this->measure_kld == nullptr)
-        this->measure_kld = new reg_kld;
-    this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    if (measure_kld == nullptr)
+        measure_kld = new reg_kld;
+    measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseKLDivergence");
 #endif
@@ -1200,10 +1135,10 @@ void reg_base<T>::UseKLDivergence(int timepoint) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseLNCC(int timepoint, float stddev) {
-    if (this->measure_lncc == nullptr)
-        this->measure_lncc = new reg_lncc;
-    this->measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
-    this->measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
+    if (measure_lncc == nullptr)
+        measure_lncc = new reg_lncc;
+    measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
+    measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseLNCC");
 #endif
@@ -1211,12 +1146,12 @@ void reg_base<T>::UseLNCC(int timepoint, float stddev) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCKernelType(int type) {
-    if (this->measure_lncc == nullptr) {
+    if (measure_lncc == nullptr) {
         reg_print_fct_error("reg_base<T>::SetLNCCKernelType");
         reg_print_msg_error("The LNCC object has to be created first");
         reg_exit();
     }
-    this->measure_lncc->SetKernelType(type);
+    measure_lncc->SetKernelType(type);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLNCCKernelType");
 #endif
@@ -1227,11 +1162,11 @@ void reg_base<T>::UseDTI(bool *timepoint) {
     reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
     reg_exit();
 
-    if (this->measure_dti == nullptr)
-        this->measure_dti = new reg_dti;
-    for (int i = 0; i < this->inputReference->nt; ++i) {
-        if (timepoint[i] == true)
-            this->measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
+    if (measure_dti == nullptr)
+        measure_dti = new reg_dti;
+    for (int i = 0; i < inputReference->nt; ++i) {
+        if (timepoint[i])
+            measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseDTI");
@@ -1240,76 +1175,71 @@ void reg_base<T>::UseDTI(bool *timepoint) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
-    if (this->measure_nmi == nullptr) {
+    if (measure_nmi == nullptr) {
         reg_print_fct_error("reg_base<T>::SetNMIWeight");
         reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set");
         reg_exit();
     }
-    this->measure_nmi->SetTimepointWeight(timepoint, weight);
+    measure_nmi->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
-    if (this->measure_lncc == nullptr) {
+    if (measure_lncc == nullptr) {
         reg_print_fct_error("reg_base<T>::SetLNCCWeight");
         reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set");
         reg_exit();
     }
-    this->measure_lncc->SetTimepointWeight(timepoint, weight);
+    measure_lncc->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
-    if (this->measure_ssd == nullptr) {
+    if (measure_ssd == nullptr) {
         reg_print_fct_error("reg_base<T>::SetSSDWeight");
         reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set");
         reg_exit();
     }
-    this->measure_ssd->SetTimepointWeight(timepoint, weight);
+    measure_ssd->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
-    if (this->measure_kld == nullptr) {
+    if (measure_kld == nullptr) {
         reg_print_fct_error("reg_base<T>::SetKLDWeight");
         reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set");
         reg_exit();
     }
-    this->measure_kld->SetTimepointWeight(timepoint, weight);
+    measure_kld->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLocalWeightSim(nifti_image *i) {
-    this->localWeightSimInput = i;
+    localWeightSimInput = i;
+    reg_tools_changeDatatype<T>(localWeightSimInput);
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_base<T>::WarpFloatingImage(int inter) {
     // Compute the deformation field
-    this->GetDeformationField();
+    GetDeformationField();
 
-    if (this->measure_dti == nullptr) {
+    if (measure_dti == nullptr) {
         // Resample the floating image
-        reg_resampleImage(this->currentFloating,
-                          this->warped,
-                          this->deformationFieldImage,
-                          this->currentMask,
-                          inter,
-                          this->warpedPaddingValue);
+        compute->ResampleImage(inter, warpedPaddingValue);
     } else {
-        reg_defField_getJacobianMatrix(this->deformationFieldImage,
-                                       this->forwardJacobianMatrix);
+        // reg_defField_getJacobianMatrix(deformationFieldImage, forwardJacobianMatrix);
         /*DTI needs fixing!
-       reg_resampleImage(this->currentFloating,
-                          this->warped,
-                          this->deformationFieldImage,
-                          this->currentMask,
+       reg_resampleImage(floating,
+                          warped,
+                          deformationFieldImage,
+                          currentMask,
                           inter,
-                          this->warpedPaddingValue,
-                          this->measure_dti->GetActiveTimepoints(),
-                          this->forwardJacobianMatrix);*/
+                          warpedPaddingValue,
+                          measure_dti->GetActiveTimepoints(),
+                          forwardJacobianMatrix);*/
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::WarpFloatingImage");
@@ -1321,67 +1251,67 @@ template <class T>
 void reg_base<T>::Run() {
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "%s::Run() called", this->executableName);
+    sprintf(text, "%s::Run() called", executableName);
     reg_print_msg_debug(text);
 #endif
 
-    if (!this->initialised) this->Initialise();
+    Initialise();
 #ifdef NDEBUG
-    if (this->verbose) {
+    if (verbose) {
 #endif
-        reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(executableName, "***********************************************************");
 #ifdef NDEBUG
     }
 #endif
 
     // Update the maximal number of iteration to perform per level
-    this->maxIterationNumber = this->maxIterationNumber * pow(2, this->levelToPerform - 1);
+    maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1);
 
     // Loop over the different resolution level to perform
-    for (this->currentLevel = 0;
-         this->currentLevel < this->levelToPerform;
-         this->currentLevel++) {
-
+    for (currentLevel = 0; currentLevel < levelToPerform; currentLevel++) {
         // Set the current input images
-        if (this->usePyramid) {
-            this->currentReference = this->referencePyramid[this->currentLevel];
-            this->currentFloating = this->floatingPyramid[this->currentLevel];
-            this->currentMask = this->maskPyramid[this->currentLevel];
+        nifti_image *reference;
+        nifti_image *floating;
+        int *mask;
+        if (usePyramid) {
+            reference = referencePyramid[currentLevel];
+            floating = floatingPyramid[currentLevel];
+            mask = maskPyramid[currentLevel];
         } else {
-            this->currentReference = this->referencePyramid[0];
-            this->currentFloating = this->floatingPyramid[0];
-            this->currentMask = this->maskPyramid[0];
+            reference = referencePyramid[0];
+            floating = floatingPyramid[0];
+            mask = maskPyramid[0];
         }
 
         // Allocate image that depends on the reference image
-        this->AllocateWarped();
-        this->AllocateDeformationField();
-        this->AllocateWarpedGradient();
+        // AllocateWarped();
+        // AllocateDeformationField();
+        // AllocateWarpedGradient();
 
         // The grid is refined if necessary
-        T maxStepSize = this->InitialiseCurrentLevel();
+        T maxStepSize = InitialiseCurrentLevel(reference);
         T currentSize = maxStepSize;
         T smallestSize = maxStepSize / (T)100.0;
 
-        this->DisplayCurrentLevelParameters();
-
         // Allocate image that are required to compute the gradient
-        this->AllocateVoxelBasedMeasureGradient();
-        this->AllocateTransformationGradient();
+        // AllocateVoxelBasedMeasureGradient();
+        // AllocateTransformationGradient();
+
+        InitContent(reference, floating, mask);
+
+        DisplayCurrentLevelParameters();
 
         // Initialise the measures of similarity
-        this->InitialiseSimilarity();
+        InitialiseSimilarity();
 
         // initialise the optimiser
-        this->SetOptimiser();
+        SetOptimiser();
 
         // Loop over the number of perturbation to do
-        for (size_t perturbation = 0;
-             perturbation <= this->perturbationNumber;
-             ++perturbation) {
-            // Evalulate the objective function value
-            this->UpdateBestObjFunctionValue();
-            this->PrintInitialObjFunctionValue();
+        for (size_t perturbation = 0; perturbation <= perturbationNumber; ++perturbation) {
+            // Evaluate the objective function value
+            UpdateBestObjFunctionValue();
+            PrintInitialObjFunctionValue();
 
             // Iterate until convergence or until the max number of iteration is reach
             while (true) {
@@ -1389,39 +1319,39 @@ void reg_base<T>::Run() {
                 if (currentSize == 0)
                     break;
 
-                if (this->optimiser->GetCurrentIterationNumber() >= this->optimiser->GetMaxIterationNumber()) {
+                if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) {
                     reg_print_msg_warn("The current level reached the maximum number of iteration");
                     break;
                 }
 
                 // Compute the objective function gradient
-                this->GetObjectiveFunctionGradient();
+                GetObjectiveFunctionGradient();
 
                 // Normalise the gradient
-                this->NormaliseGradient();
+                NormaliseGradient();
 
                 // Initialise the line search initial step size
                 currentSize = currentSize > maxStepSize ? maxStepSize : currentSize;
 
                 // A line search is performed
-                this->optimiser->Optimise(maxStepSize, smallestSize, currentSize);
+                optimiser->Optimise(maxStepSize, smallestSize, currentSize);
 
-                // Update the obecjtive function variables and print some information
-                this->PrintCurrentObjFunctionValue(currentSize);
+                // Update the objective function variables and print some information
+                PrintCurrentObjFunctionValue(currentSize);
 
             } // while
-            if (perturbation < this->perturbationNumber) {
+            if (perturbation < perturbationNumber) {
 
-                this->optimiser->Perturbation(smallestSize);
+                optimiser->Perturbation(smallestSize);
                 currentSize = maxStepSize;
 #ifdef NDEBUG
-                if (this->verbose) {
+                if (verbose) {
 #endif
                     char text[255];
-                    reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0");
+                    reg_print_info(executableName, "Perturbation Step - The number of iteration is reset to 0");
                     sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]",
                             smallestSize, smallestSize);
-                    reg_print_info(this->executableName, text);
+                    reg_print_info(executableName, text);
 
 #ifdef NDEBUG
                 }
@@ -1430,46 +1360,49 @@ void reg_base<T>::Run() {
         } // perturbation loop
 
         // Final folding correction
-        this->CorrectTransformation();
+        CorrectTransformation();
 
         // Some cleaning is performed
-        delete this->optimiser;
-        this->optimiser = nullptr;
-        this->ClearWarped();
-        this->ClearDeformationField();
-        this->ClearWarpedGradient();
-        this->ClearVoxelBasedMeasureGradient();
-        this->ClearTransformationGradient();
-        if (this->usePyramid) {
-            nifti_image_free(this->referencePyramid[this->currentLevel]);
-            this->referencePyramid[this->currentLevel] = nullptr;
-            nifti_image_free(this->floatingPyramid[this->currentLevel]);
-            this->floatingPyramid[this->currentLevel] = nullptr;
-            free(this->maskPyramid[this->currentLevel]);
-            this->maskPyramid[this->currentLevel] = nullptr;
-        } else if (this->currentLevel == this->levelToPerform - 1) {
-            nifti_image_free(this->referencePyramid[0]);
-            this->referencePyramid[0] = nullptr;
-            nifti_image_free(this->floatingPyramid[0]);
-            this->floatingPyramid[0] = nullptr;
-            free(this->maskPyramid[0]);
-            this->maskPyramid[0] = nullptr;
+        DeinitContent();
+        delete optimiser;
+        optimiser = nullptr;
+        // if (localWeightSimCurrent) {
+        //     nifti_image_free(localWeightSimCurrent);
+        //     localWeightSimCurrent = nullptr;
+        // }
+        // DeallocateCurrentInputImage();
+        // DeallocateWarped();
+        // DeallocateDeformationField();
+        // DeallocateWarpedGradient();
+        // DeallocateVoxelBasedMeasureGradient();
+        // DeallocateTransformationGradient();
+        if (usePyramid) {
+            nifti_image_free(referencePyramid[currentLevel]);
+            referencePyramid[currentLevel] = nullptr;
+            nifti_image_free(floatingPyramid[currentLevel]);
+            floatingPyramid[currentLevel] = nullptr;
+            free(maskPyramid[currentLevel]);
+            maskPyramid[currentLevel] = nullptr;
+        } else if (currentLevel == levelToPerform - 1) {
+            nifti_image_free(referencePyramid[0]);
+            referencePyramid[0] = nullptr;
+            nifti_image_free(floatingPyramid[0]);
+            floatingPyramid[0] = nullptr;
+            free(maskPyramid[0]);
+            maskPyramid[0] = nullptr;
         }
-        this->ClearCurrentInputImage();
 
 #ifdef NDEBUG
-        if (this->verbose) {
+        if (verbose) {
 #endif
-            reg_print_info(this->executableName, "Current registration level done");
-            reg_print_info(this->executableName, "***********************************************************");
+            reg_print_info(executableName, "Current registration level done");
+            reg_print_info(executableName, "***********************************************************");
 #ifdef NDEBUG
         }
 #endif
         // Update the number of level for the next level
-        this->maxIterationNumber /= 2;
-    } // level this->levelToPerform
-    // Set this to the last value since it's used somewhere else
-    this->currentLevel--;
+        maxIterationNumber /= 2;
+    } // level levelToPerform
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::Run");
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 0333d0d2..f44a25b5 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -28,16 +28,22 @@
 #include "_reg_stringFormat.h"
 #include "_reg_optimiser.h"
 #include "float.h"
-//#include "Platform.h"
+#include "Platform.h"
 
-/// @brief Base registration class
+ /// @brief Base registration class
 template <class T>
 class reg_base: public InterfaceOptimiser {
 protected:
-   // Platform !!!
-//   Platform *platform;
-//   int platformCode;
-//   unsigned gpuIdx;
+    // Platform
+    Platform *platform;
+    int platformCode;
+    unsigned gpuIdx;
+
+    // Content
+    Content *con = nullptr;
+
+    // Compute
+    Compute *compute = nullptr;
 
     // Optimiser related variables
     reg_optimiser<T> *optimiser;
@@ -48,7 +54,7 @@ class reg_base: public InterfaceOptimiser {
     bool optimiseZ;
 
     // Optimiser related function
-    virtual void SetOptimiser();
+    virtual void SetOptimiser() = 0;
 
     // Measure related variables
     reg_ssd *measure_ssd;
@@ -59,7 +65,7 @@ class reg_base: public InterfaceOptimiser {
     reg_mind *measure_mind;
     reg_mindssc *measure_mindssc;
     nifti_image *localWeightSimInput;
-    nifti_image *localWeightSimCurrent;
+    // nifti_image *localWeightSimCurrent;
 
     char *executableName;
     int referenceTimePoint;
@@ -76,7 +82,7 @@ class reg_base: public InterfaceOptimiser {
     float *floatingThresholdUp;
     float *floatingThresholdLow;
     bool robustRange;
-    T warpedPaddingValue;
+    float warpedPaddingValue;
     unsigned int levelNumber;
     unsigned int levelToPerform;
     T gradientSmoothingSigma;
@@ -93,13 +99,13 @@ class reg_base: public InterfaceOptimiser {
     nifti_image **floatingPyramid;
     int **maskPyramid;
     int *activeVoxelNumber;
-    nifti_image *currentReference;
-    nifti_image *currentFloating;
-    int *currentMask;
-    nifti_image *warped;
-    nifti_image *deformationFieldImage;
-    nifti_image *warImgGradient;
-    nifti_image *voxelBasedMeasureGradient;
+    // nifti_image *reference;
+    // nifti_image *floating;
+    // int *currentMask;
+    // nifti_image *warped;
+    // nifti_image *deformationFieldImage;
+    // nifti_image *warpedGradient;
+    // nifti_image *voxelBasedMeasureGradient;
     unsigned int currentLevel;
 
     mat33 *forwardJacobianMatrix;
@@ -115,53 +121,52 @@ class reg_base: public InterfaceOptimiser {
     float *landmarkReference;
     float *landmarkFloating;
 
-    virtual void AllocateWarped();
-    virtual void ClearWarped();
-    virtual void AllocateDeformationField();
-    virtual void ClearDeformationField();
-    virtual void AllocateWarpedGradient();
-    virtual void ClearWarpedGradient();
-    virtual void AllocateVoxelBasedMeasureGradient();
-    virtual void ClearVoxelBasedMeasureGradient();
-    virtual T InitialiseCurrentLevel() { return 0; }
-    virtual void ClearCurrentInputImage();
+    // virtual void AllocateWarped();
+    // virtual void DeallocateWarped();
+    // virtual void AllocateDeformationField();
+    // virtual void DeallocateDeformationField();
+    // virtual void AllocateWarpedGradient();
+    // virtual void DeallocateWarpedGradient();
+    // virtual void AllocateVoxelBasedMeasureGradient();
+    // virtual void DeallocateVoxelBasedMeasureGradient();
+    // virtual void DeallocateCurrentInputImage();
 
     virtual void WarpFloatingImage(int);
     virtual double ComputeSimilarityMeasure();
     virtual void GetVoxelBasedGradient();
-    virtual void SmoothGradient() {}
     virtual void InitialiseSimilarity();
 
     // Virtual empty functions that have to be filled
-    virtual void GetDeformationField() {}
-    virtual void SetGradientImageToZero() {}
-    virtual void GetApproximatedGradient() {}
-    virtual double GetObjectiveFunctionValue() { return std::numeric_limits<float>::quiet_NaN(); }
-    virtual void UpdateParameters(float) {}
-    virtual T NormaliseGradient() { return std::numeric_limits<float>::quiet_NaN(); }
-    virtual void GetSimilarityMeasureGradient() {}
-    virtual void GetObjectiveFunctionGradient() {}
-    virtual void DisplayCurrentLevelParameters() {}
-    virtual void UpdateBestObjFunctionValue() {}
-    virtual void PrintCurrentObjFunctionValue(T) {}
-    virtual void PrintInitialObjFunctionValue() {}
-    virtual void AllocateTransformationGradient() {}
-    virtual void ClearTransformationGradient() {}
-    virtual void CorrectTransformation() {}
+    virtual T InitialiseCurrentLevel(nifti_image *reference) = 0;
+    virtual void SmoothGradient() = 0;
+    virtual void GetDeformationField() = 0;
+    // virtual void SetGradientImageToZero() = 0;
+    virtual void GetApproximatedGradient() = 0;
+    virtual double GetObjectiveFunctionValue() = 0;
+    virtual void UpdateParameters(float) = 0;
+    virtual T NormaliseGradient() = 0;
+    virtual void GetSimilarityMeasureGradient() = 0;
+    virtual void GetObjectiveFunctionGradient() = 0;
+    virtual void DisplayCurrentLevelParameters() = 0;
+    virtual void UpdateBestObjFunctionValue() = 0;
+    virtual void PrintCurrentObjFunctionValue(T) = 0;
+    virtual void PrintInitialObjFunctionValue() = 0;
+    // virtual void AllocateTransformationGradient() = 0;
+    // virtual void DeallocateTransformationGradient() = 0;
+    virtual void CorrectTransformation() = 0;
 
     void (*funcProgressCallback)(float pcntProgress, void *params);
     void* paramsProgressCallback;
 
 public:
-
-   //PLATFORM
-//   void setPlaform(Platform* inputPlatform);
-//   Platform* getPlaform();
-//   void setPlatformCode(int inputPlatformCode);
-//   void setGpuIdx(unsigned inputGPUIdx);
-
     reg_base(int refTimePoint, int floTimePoint);
     virtual ~reg_base();
+
+    // Platform
+    Platform* GetPlatform();
+    void SetPlatformCode(const int platformCodeIn) { platformCode = platformCodeIn; }
+    void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; }
+
     // Optimisation related functions
     void SetMaximalIterationNumber(unsigned int);
     void NoOptimisationAlongX() { optimiseX = false; }
@@ -204,7 +209,7 @@ class reg_base: public InterfaceOptimiser {
     void SetFloatingThresholdLow(unsigned int, T);
     void UseRobustRange();
     void DoNotUseRobustRange();
-    void SetWarpedPaddingValue(T);
+    void SetWarpedPaddingValue(float);
     void SetLevelNumber(unsigned int);
     void SetLevelToPerform(unsigned int);
     void PrintOutInformation();
@@ -218,8 +223,10 @@ class reg_base: public InterfaceOptimiser {
     virtual void CheckParameters();
     void Run();
     virtual void Initialise();
-    nifti_image** GetWarpedImage() { return nullptr; } // Need to be filled
-    virtual char* GetExecutableName() { return this->executableName; }
+    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0;
+    virtual void DeinitContent() = 0;
+    virtual nifti_image** GetWarpedImage() = 0;
+    virtual char* GetExecutableName() { return executableName; }
     virtual bool GetSymmetricStatus() { return false; }
 
     // Function required for the NiftyReg plugin in NiftyView
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 21b2fd6d..86247243 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -11,6 +11,11 @@
  */
 
 #include "_reg_f3d.h"
+#include "F3dContent.h"
+
+#ifdef _USE_CUDA
+#include "CudaF3dContent.h"
+#endif
 
  /* *************************************************************** */
  /* *************************************************************** */
@@ -18,24 +23,24 @@ template <class T>
 reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
     : reg_base<T>::reg_base(refTimePoint, floTimePoint) {
 
-    this->executableName = (char *)"NiftyReg F3D";
-    this->inputControlPointGrid = nullptr; // pointer to external
-    this->controlPointGrid = nullptr;
-    this->bendingEnergyWeight = 0.001;
-    this->linearEnergyWeight = 0.00;
-    this->jacobianLogWeight = 0.;
-    this->jacobianLogApproximation = true;
-    this->spacing[0] = -5;
-    this->spacing[1] = std::numeric_limits<T>::quiet_NaN();
-    this->spacing[2] = std::numeric_limits<T>::quiet_NaN();
-    this->useConjGradient = true;
-    this->useApproxGradient = false;
+    executableName = (char *)"NiftyReg F3D";
+    inputControlPointGrid = nullptr; // pointer to external
+    controlPointGrid = nullptr;
+    bendingEnergyWeight = 0.001;
+    linearEnergyWeight = 0.00;
+    jacobianLogWeight = 0.;
+    jacobianLogApproximation = true;
+    spacing[0] = -5;
+    spacing[1] = std::numeric_limits<T>::quiet_NaN();
+    spacing[2] = std::numeric_limits<T>::quiet_NaN();
+    useConjGradient = true;
+    useApproxGradient = false;
 
-    //    this->approxParzenWindow=true;
+    // approxParzenWindow=true;
 
-    this->transformationGradient = nullptr;
+    // transformationGradient = nullptr;
 
-    this->gridRefinement = true;
+    gridRefinement = true;
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::reg_f3d");
@@ -45,10 +50,10 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
 /* *************************************************************** */
 template <class T>
 reg_f3d<T>::~reg_f3d() {
-    this->ClearTransformationGradient();
-    if (this->controlPointGrid != nullptr) {
-        nifti_image_free(this->controlPointGrid);
-        this->controlPointGrid = nullptr;
+    // DeallocateTransformationGradient();
+    if (controlPointGrid != nullptr) {
+        nifti_image_free(controlPointGrid);
+        controlPointGrid = nullptr;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::~reg_f3d");
@@ -58,7 +63,7 @@ reg_f3d<T>::~reg_f3d() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp) {
-    this->inputControlPointGrid = cp;
+    inputControlPointGrid = cp;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetControlPointGridImage");
 #endif
@@ -66,7 +71,7 @@ void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetBendingEnergyWeight(T be) {
-    this->bendingEnergyWeight = be;
+    bendingEnergyWeight = be;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetBendingEnergyWeight");
 #endif
@@ -74,7 +79,7 @@ void reg_f3d<T>::SetBendingEnergyWeight(T be) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetLinearEnergyWeight(T le) {
-    this->linearEnergyWeight = le;
+    linearEnergyWeight = le;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetLinearEnergyWeight");
 #endif
@@ -82,7 +87,7 @@ void reg_f3d<T>::SetLinearEnergyWeight(T le) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetJacobianLogWeight(T j) {
-    this->jacobianLogWeight = j;
+    jacobianLogWeight = j;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetJacobianLogWeight");
 #endif
@@ -90,7 +95,7 @@ void reg_f3d<T>::SetJacobianLogWeight(T j) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::ApproximateJacobianLog() {
-    this->jacobianLogApproximation = true;
+    jacobianLogApproximation = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ApproximateJacobianLog");
 #endif
@@ -98,7 +103,7 @@ void reg_f3d<T>::ApproximateJacobianLog() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DoNotApproximateJacobianLog() {
-    this->jacobianLogApproximation = false;
+    jacobianLogApproximation = false;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::DoNotApproximateJacobianLog");
 #endif
@@ -106,28 +111,28 @@ void reg_f3d<T>::DoNotApproximateJacobianLog() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
-    this->spacing[i] = s;
+    spacing[i] = s;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetSpacing");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-T reg_f3d<T>::InitialiseCurrentLevel() {
+T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
     // Set the initial step size for the gradient ascent
-    T maxStepSize = this->currentReference->dx > this->currentReference->dy ? this->currentReference->dx : this->currentReference->dy;
-    if (this->currentReference->ndim > 2)
-        maxStepSize = (this->currentReference->dz > maxStepSize) ? this->currentReference->dz : maxStepSize;
+    T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy;
+    if (reference->ndim > 2)
+        maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize;
 
     // Refine the control point grid if required
-    if (this->gridRefinement == true) {
-        if (this->currentLevel == 0) {
-            this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber - 1));
-            this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber - 1));
+    if (gridRefinement) {
+        if (currentLevel == 0) {
+            bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16.0f, levelNumber - 1));
+            linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3.0f, levelNumber - 1));
         } else {
-            reg_spline_refineControlPointGrid(this->controlPointGrid, this->currentReference);
-            this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
-            this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
+            bendingEnergyWeight = bendingEnergyWeight * static_cast<T>(16);
+            linearEnergyWeight = linearEnergyWeight * static_cast<T>(3);
+            reg_spline_refineControlPointGrid(controlPointGrid, reference);
         }
     }
 
@@ -137,51 +142,51 @@ T reg_f3d<T>::InitialiseCurrentLevel() {
     return maxStepSize;
 }
 /* *************************************************************** */
-template <class T>
-void reg_f3d<T>::AllocateTransformationGradient() {
-    if (this->controlPointGrid == nullptr) {
-        reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
-        reg_print_msg_error("The control point image is not defined");
-        reg_exit();
-    }
-    reg_f3d<T>::ClearTransformationGradient();
-    this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid);
-    this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox,
-                                                        this->transformationGradient->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::AllocateTransformationGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d<T>::ClearTransformationGradient() {
-    if (this->transformationGradient != nullptr) {
-        nifti_image_free(this->transformationGradient);
-        this->transformationGradient = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ClearTransformationGradient");
-#endif
-}
+// template <class T>
+// void reg_f3d<T>::AllocateTransformationGradient() {
+//     if (controlPointGrid == nullptr) {
+//         reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
+//         reg_print_msg_error("The control point image is not defined");
+//         reg_exit();
+//     }
+//     reg_f3d<T>::DeallocateTransformationGradient();
+//     transformationGradient = nifti_copy_nim_info(controlPointGrid);
+//     transformationGradient->data = (void*)calloc(transformationGradient->nvox,
+//                                                        transformationGradient->nbyper);
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_f3d<T>::AllocateTransformationGradient");
+// #endif
+// }
+/* *************************************************************** */
+// template <class T>
+// void reg_f3d<T>::DeallocateTransformationGradient() {
+//     if (transformationGradient != nullptr) {
+//         nifti_image_free(transformationGradient);
+//         transformationGradient = nullptr;
+//     }
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_f3d<T>::DeallocateTransformationGradient");
+// #endif
+// }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
-    if (strcmp(this->executableName, "NiftyReg F3D") == 0 ||
-        strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
-        T penaltySum = this->bendingEnergyWeight +
-            this->linearEnergyWeight +
-            this->jacobianLogWeight +
-            this->landmarkRegWeight;
+    if (strcmp(executableName, "NiftyReg F3D") == 0 ||
+        strcmp(executableName, "NiftyReg F3D GPU") == 0) {
+        T penaltySum = bendingEnergyWeight +
+            linearEnergyWeight +
+            jacobianLogWeight +
+            landmarkRegWeight;
         if (penaltySum >= 1.0) {
-            this->similarityWeight = 0;
-            this->similarityWeight /= penaltySum;
-            this->bendingEnergyWeight /= penaltySum;
-            this->linearEnergyWeight /= penaltySum;
-            this->jacobianLogWeight /= penaltySum;
-            this->landmarkRegWeight /= penaltySum;
-        } else this->similarityWeight = 1.0 - penaltySum;
+            similarityWeight = 0;
+            similarityWeight /= penaltySum;
+            bendingEnergyWeight /= penaltySum;
+            linearEnergyWeight /= penaltySum;
+            jacobianLogWeight /= penaltySum;
+            landmarkRegWeight /= penaltySum;
+        } else similarityWeight = 1.0 - penaltySum;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::CheckParameters");
@@ -191,177 +196,177 @@ void reg_f3d<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::Initialise() {
-    if (this->initialised) return;
+    if (initialised) return;
 
     reg_base<T>::Initialise();
 
     // DETERMINE THE GRID SPACING AND CREATE THE GRID
-    if (this->inputControlPointGrid == nullptr) {
+    if (inputControlPointGrid == nullptr) {
         // Set the spacing along y and z if undefined. Their values are set to match
         // the spacing along the x axis
-        if (this->spacing[1] != this->spacing[1]) this->spacing[1] = this->spacing[0];
-        if (this->spacing[2] != this->spacing[2]) this->spacing[2] = this->spacing[0];
+        if (spacing[1] != spacing[1]) spacing[1] = spacing[0];
+        if (spacing[2] != spacing[2]) spacing[2] = spacing[0];
 
         /* Convert the spacing from voxel to mm if necessary */
-        float spacingInMillimeter[3] = {this->spacing[0], this->spacing[1], this->spacing[2]};
-        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx;
-        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy;
-        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz;
+        float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]};
+        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * inputReference->dx;
+        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * inputReference->dy;
+        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * inputReference->dz;
 
         // Define the spacing for the first level
         float gridSpacing[3];
-        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1));
-        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1));
+        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(levelNumber - 1));
+        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(levelNumber - 1));
         gridSpacing[2] = 1.0f;
-        if (this->referencePyramid[0]->nz > 1)
-            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1));
+        if (referencePyramid[0]->nz > 1)
+            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(levelNumber - 1));
 
         // Create and allocate the control point image
-        reg_createControlPointGrid<T>(&this->controlPointGrid, this->referencePyramid[0], gridSpacing);
+        reg_createControlPointGrid<T>(&controlPointGrid, referencePyramid[0], gridSpacing);
 
         // The control point position image is initialised with the affine transformation
-        if (this->affineTransformation == nullptr) {
-            memset(this->controlPointGrid->data, 0, this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-            reg_tools_multiplyValueToImage(this->controlPointGrid, this->controlPointGrid, 0.f);
-            reg_getDeformationFromDisplacement(this->controlPointGrid);
-        } else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid);
+        if (affineTransformation == nullptr) {
+            memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper);
+            reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f);
+            reg_getDeformationFromDisplacement(controlPointGrid);
+        } else reg_affine_getDeformationField(affineTransformation, controlPointGrid);
     } else {
         // The control point grid image is initialised with the provided grid
-        this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
-        this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-        memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data,
-               this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        controlPointGrid = nifti_copy_nim_info(inputControlPointGrid);
+        controlPointGrid->data = (void *)malloc(controlPointGrid->nvox * controlPointGrid->nbyper);
+        memcpy(controlPointGrid->data, inputControlPointGrid->data,
+               controlPointGrid->nvox * controlPointGrid->nbyper);
         // The final grid spacing is computed
-        this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1));
-        this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1));
-        if (this->controlPointGrid->nz > 1)
-            this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1));
+        spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(levelNumber - 1));
+        spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(levelNumber - 1));
+        if (controlPointGrid->nz > 1)
+            spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(levelNumber - 1));
     }
 #ifdef NDEBUG
-    if (this->verbose) {
+    if (verbose) {
 #endif
         std::string text;
         // Print out some global information about the registration
-        reg_print_info(this->executableName, "***********************************************************");
-        reg_print_info(this->executableName, "INPUT PARAMETERS");
-        reg_print_info(this->executableName, "***********************************************************");
-        reg_print_info(this->executableName, "Reference image:");
-        text = stringFormat("\t* name: %s", this->inputReference->fname);
-        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(executableName, "***********************************************************");
+        reg_print_info(executableName, "INPUT PARAMETERS");
+        reg_print_info(executableName, "***********************************************************");
+        reg_print_info(executableName, "Reference image:");
+        text = stringFormat("\t* name: %s", inputReference->fname);
+        reg_print_info(executableName, text.c_str());
         text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            this->inputReference->nx, this->inputReference->ny,
-                            this->inputReference->nz, this->inputReference->nt);
-        reg_print_info(this->executableName, text.c_str());
+                            inputReference->nx, inputReference->ny,
+                            inputReference->nz, inputReference->nt);
+        reg_print_info(executableName, text.c_str());
         text = stringFormat("\t* image spacing: %g x %g x %g mm",
-                            this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
-        reg_print_info(this->executableName, text.c_str());
-        for (int i = 0; i < this->inputReference->nt; i++) {
+                            inputReference->dx, inputReference->dy, inputReference->dz);
+        reg_print_info(executableName, text.c_str());
+        for (int i = 0; i < inputReference->nt; i++) {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
-            reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi != nullptr) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                                i, inputReference->nt - 1, referenceThresholdLow[i], referenceThresholdUp[i]);
+            reg_print_info(executableName, text.c_str());
+            if (measure_nmi != nullptr) {
+                if (measure_nmi->GetTimepointsWeights()[i] > 0.0) {
                     text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
-                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
-                    reg_print_info(this->executableName, text.c_str());
+                                        i, inputFloating->nt - 1, measure_nmi->GetReferenceBinNumber()[i] - 4);
+                    reg_print_info(executableName, text.c_str());
                 }
             }
         }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        reg_print_info(this->executableName, "Floating image:");
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* name: %s", this->inputFloating->fname);
-        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* gaussian smoothing sigma: %g", referenceSmoothingSigma);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+        reg_print_info(executableName, "Floating image:");
+        reg_print_info(executableName, text.c_str());
+        text = stringFormat("\t* name: %s", inputFloating->fname);
+        reg_print_info(executableName, text.c_str());
         text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
-                            this->inputFloating->dy, this->inputFloating->dz);
-        reg_print_info(this->executableName, text.c_str());
-        for (int i = 0; i < this->inputFloating->nt; i++) {
+                            inputFloating->nx, inputFloating->ny, inputFloating->nz, inputFloating->nt);
+        reg_print_info(executableName, text.c_str());
+        text = stringFormat("\t* image spacing: %g x %g x %g mm", inputFloating->dx,
+                            inputFloating->dy, inputFloating->dz);
+        reg_print_info(executableName, text.c_str());
+        for (int i = 0; i < inputFloating->nt; i++) {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
-            reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi != nullptr) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
-                    text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
-                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
-                    reg_print_info(this->executableName, text.c_str());
+                                i, inputFloating->nt - 1, floatingThresholdLow[i], floatingThresholdUp[i]);
+            reg_print_info(executableName, text.c_str());
+            if (measure_nmi != nullptr) {
+                if (measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                    text = stringFormat("\t* binning size for timepoint %i/%i: %i",
+                                        i, inputFloating->nt - 1, measure_nmi->GetFloatingBinNumber()[i] - 4);
+                    reg_print_info(executableName, text.c_str());
                 }
             }
         }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Level number: %i", this->levelNumber);
-        reg_print_info(this->executableName, text.c_str());
-        if (this->levelNumber != this->levelToPerform) {
-            text = stringFormat("\t* Level to perform: %i", this->levelToPerform);
-            reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* gaussian smoothing sigma: %g", floatingSmoothingSigma);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+        text = stringFormat("Warped image padding value: %g", warpedPaddingValue);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+        text = stringFormat("Level number: %i", levelNumber);
+        reg_print_info(executableName, text.c_str());
+        if (levelNumber != levelToPerform) {
+            text = stringFormat("\t* Level to perform: %i", levelToPerform);
+            reg_print_info(executableName, text.c_str());
         }
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-
-        text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        if (this->measure_ssd != nullptr)
-            reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
-        if (this->measure_kld != nullptr)
-            reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
-        if (this->measure_lncc != nullptr)
-            reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
-        if (this->measure_dti != nullptr)
-            reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
-        if (this->measure_mind != nullptr)
-            reg_print_info(this->executableName, "MIND is used as a similarity measure.");
-        if (this->measure_mindssc != nullptr)
-            reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
-        if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr &&
-                                          this->measure_lncc == nullptr && this->measure_nmi == nullptr &&
-                                          this->measure_ssd == nullptr && this->measure_mind == nullptr &&
-                                          this->measure_mindssc == nullptr))
-            reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
-        text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        if (this->bendingEnergyWeight > 0) {
-            text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
+        reg_print_info(executableName, "");
+        text = stringFormat("Maximum iteration number during the last level: %i", (int)maxIterationNumber);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+
+        text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+        if (measure_ssd != nullptr)
+            reg_print_info(executableName, "The SSD is used as a similarity measure.");
+        if (measure_kld != nullptr)
+            reg_print_info(executableName, "The KL divergence is used as a similarity measure.");
+        if (measure_lncc != nullptr)
+            reg_print_info(executableName, "The LNCC is used as a similarity measure.");
+        if (measure_dti != nullptr)
+            reg_print_info(executableName, "A DTI based measure is used as a similarity measure.");
+        if (measure_mind != nullptr)
+            reg_print_info(executableName, "MIND is used as a similarity measure.");
+        if (measure_mindssc != nullptr)
+            reg_print_info(executableName, "MINDSSC is used as a similarity measure.");
+        if (measure_nmi != nullptr || (measure_dti == nullptr && measure_kld == nullptr &&
+                                       measure_lncc == nullptr && measure_nmi == nullptr &&
+                                       measure_ssd == nullptr && measure_mind == nullptr &&
+                                       measure_mindssc == nullptr))
+            reg_print_info(executableName, "The NMI is used as a similarity measure.");
+        text = stringFormat("Similarity measure term weight: %g", similarityWeight);
+        reg_print_info(executableName, text.c_str());
+        reg_print_info(executableName, "");
+        if (bendingEnergyWeight > 0) {
+            text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight);
+            reg_print_info(executableName, text.c_str());
+            reg_print_info(executableName, "");
         }
-        if ((this->linearEnergyWeight) > 0) {
-            text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
+        if ((linearEnergyWeight) > 0) {
+            text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight);
+            reg_print_info(executableName, text.c_str());
+            reg_print_info(executableName, "");
         }
-        if (this->jacobianLogWeight > 0) {
-            text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight);
-            reg_print_info(this->executableName, text.c_str());
-            if (this->jacobianLogApproximation) {
-                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
+        if (jacobianLogWeight > 0) {
+            text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight);
+            reg_print_info(executableName, text.c_str());
+            if (jacobianLogApproximation) {
+                reg_print_info(executableName, "\t* Jacobian-based penalty term is approximated");
             } else {
-                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
+                reg_print_info(executableName, "\t* Jacobian-based penalty term is not approximated");
             }
-            reg_print_info(this->executableName, "");
+            reg_print_info(executableName, "");
         }
-        if ((this->landmarkRegWeight) > 0) {
-            text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
+        if ((landmarkRegWeight) > 0) {
+            text = stringFormat("Landmark distance regularisation term weight: %g", landmarkRegWeight);
+            reg_print_info(executableName, text.c_str());
+            reg_print_info(executableName, "");
         }
 #ifdef NDEBUG
     }
 #endif
 
-    this->initialised = true;
+    initialised = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::Initialise");
 #endif
@@ -369,13 +374,30 @@ void reg_f3d<T>::Initialise() {
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
+void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
+    if (platformCode == NR_PLATFORM_CPU)
+        con = new F3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T));
+#ifdef _USE_CUDA
+    else if (platformCode == NR_PLATFORM_CUDA)
+        con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T));
+#endif
+    compute = platform->CreateCompute(con);
+}
+/* *************************************************************** */
+/* *************************************************************** */
+template <class T>
+void reg_f3d<T>::DeinitContent() {
+    delete compute;
+    compute = nullptr;
+    delete con;
+    con = nullptr;
+}
+/* *************************************************************** */
+/* *************************************************************** */
+template <class T>
 void reg_f3d<T>::GetDeformationField() {
-    reg_spline_getDeformationField(this->controlPointGrid,
-                                   this->deformationFieldImage,
-                                   this->currentMask,
-                                   false, //composition
-                                   true // bspline
-    );
+    compute->GetDeformationField(false, // Composition
+                                 true); // bspline
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetDeformationField");
 #endif
@@ -384,31 +406,17 @@ void reg_f3d<T>::GetDeformationField() {
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
-    if (this->jacobianLogWeight <= 0) return 0;
+    if (jacobianLogWeight <= 0) return 0;
+
+    bool approx = type == 2 ? false : jacobianLogApproximation;
+
+    double value = compute->GetJacobianPenaltyTerm(approx);
 
-    double value;
-    if (type == 2) {
-        value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
-                                                  this->currentReference,
-                                                  false);
-    } else {
-        value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid,
-                                                  this->currentReference,
-                                                  this->jacobianLogApproximation);
-    }
     unsigned int maxit = 5;
     if (type > 0) maxit = 20;
     unsigned int it = 0;
     while (value != value && it < maxit) {
-        if (type == 2) {
-            value = reg_spline_correctFolding(this->controlPointGrid,
-                                              this->currentReference,
-                                              false);
-        } else {
-            value = reg_spline_correctFolding(this->controlPointGrid,
-                                              this->currentReference,
-                                              this->jacobianLogApproximation);
-        }
+        value = compute->CorrectFolding(approx);
 #ifndef NDEBUG
         reg_print_msg_debug("Folding correction");
 #endif
@@ -416,7 +424,7 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     }
     if (type > 0) {
         if (value != value) {
-            this->optimiser->RestoreBestDOF();
+            optimiser->RestoreBestDOF();
             reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
             reg_print_msg_warn("The folding correction scheme failed");
         } else {
@@ -432,120 +440,105 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm");
 #endif
-    return this->jacobianLogWeight * value;
+    return jacobianLogWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
-    if (this->bendingEnergyWeight <= 0) return 0;
+    if (bendingEnergyWeight <= 0) return 0;
 
-    double value = reg_spline_approxBendingEnergy(this->controlPointGrid);
+    double value = compute->ApproxBendingEnergy();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeBendingEnergyPenaltyTerm");
 #endif
-    return this->bendingEnergyWeight * value;
+    return bendingEnergyWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
-    if (this->linearEnergyWeight <= 0)
+    if (linearEnergyWeight <= 0)
         return 0;
 
-    double value = reg_spline_approxLinearEnergy(this->controlPointGrid);
-
+    double value = compute->ApproxLinearEnergy();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeLinearEnergyPenaltyTerm");
 #endif
-    return this->linearEnergyWeight * value;
+    return linearEnergyWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
-    if (this->landmarkRegWeight <= 0)
+    if (landmarkRegWeight <= 0)
         return 0;
 
-    double value = reg_spline_getLandmarkDistance(this->controlPointGrid,
-                                                  this->landmarkRegNumber,
-                                                  this->landmarkReference,
-                                                  this->landmarkFloating);
-
+    double value = compute->GetLandmarkDistance(landmarkRegNumber, landmarkReference, landmarkFloating);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm");
 #endif
-    return this->landmarkRegWeight * value;
+    return landmarkRegWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetSimilarityMeasureGradient() {
-    this->GetVoxelBasedGradient();
+    GetVoxelBasedGradient();
 
-    int kernel_type = CUBIC_SPLINE_KERNEL;
+    nifti_image *voxelBasedMeasureGradient = dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient();
+    const int kernel_type = CUBIC_SPLINE_KERNEL;
     // The voxel based NMI gradient is convolved with a spline kernel
     // Convolution along the x axis
     float currentNodeSpacing[3];
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx;
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
     bool activeAxis[3] = {1, 0, 0};
-    reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+    reg_tools_kernelConvolution(voxelBasedMeasureGradient,
                                 currentNodeSpacing,
                                 kernel_type,
                                 nullptr, // mask
                                 nullptr, // all volumes are considered as active
-                                activeAxis
-    );
+                                activeAxis);
     // Convolution along the y axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy;
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy;
     activeAxis[0] = 0;
     activeAxis[1] = 1;
-    reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+    reg_tools_kernelConvolution(voxelBasedMeasureGradient,
                                 currentNodeSpacing,
                                 kernel_type,
                                 nullptr, // mask
                                 nullptr, // all volumes are considered as active
-                                activeAxis
-    );
+                                activeAxis);
     // Convolution along the z axis if required
-    if (this->voxelBasedMeasureGradient->nz > 1) {
-        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz;
+    if (voxelBasedMeasureGradient->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz;
         activeAxis[1] = 0;
         activeAxis[2] = 1;
-        reg_tools_kernelConvolution(this->voxelBasedMeasureGradient,
+        reg_tools_kernelConvolution(voxelBasedMeasureGradient,
                                     currentNodeSpacing,
                                     kernel_type,
                                     nullptr, // mask
                                     nullptr, // all volumes are considered as active
-                                    activeAxis
-        );
+                                    activeAxis);
     }
 
+    // Update the changes of voxelBasedMeasureGradient
+    dynamic_cast<F3dContent*>(con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient);
+
     // The node based NMI gradient is extracted
-    mat44 reorientation;
-    if (this->currentFloating->sform_code > 0)
-        reorientation = this->currentFloating->sto_ijk;
-    else reorientation = this->currentFloating->qto_ijk;
-    reg_voxelCentric2NodeCentric(this->transformationGradient,
-                                 this->voxelBasedMeasureGradient,
-                                 this->similarityWeight,
-                                 false, // no update
-                                 &reorientation
-    );
+    compute->VoxelCentricToNodeCentric(similarityWeight);
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
 #endif
-    return;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetBendingEnergyGradient() {
-    if (this->bendingEnergyWeight <= 0) return;
+    if (bendingEnergyWeight <= 0) return;
 
-    reg_spline_approxBendingEnergyGradient(this->controlPointGrid,
-                                           this->transformationGradient,
-                                           this->bendingEnergyWeight);
+    compute->ApproxBendingEnergyGradient(bendingEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetBendingEnergyGradient");
 #endif
@@ -554,11 +547,9 @@ void reg_f3d<T>::GetBendingEnergyGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetLinearEnergyGradient() {
-    if (this->linearEnergyWeight <= 0) return;
+    if (linearEnergyWeight <= 0) return;
 
-    reg_spline_approxLinearEnergyGradient(this->controlPointGrid,
-                                          this->transformationGradient,
-                                          this->linearEnergyWeight);
+    compute->ApproxLinearEnergyGradient(linearEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetLinearEnergyGradient");
 #endif
@@ -567,13 +558,9 @@ void reg_f3d<T>::GetLinearEnergyGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetJacobianBasedGradient() {
-    if (this->jacobianLogWeight <= 0) return;
+    if (jacobianLogWeight <= 0) return;
 
-    reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid,
-                                              this->currentReference,
-                                              this->transformationGradient,
-                                              this->jacobianLogWeight,
-                                              this->jacobianLogApproximation);
+    compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetJacobianBasedGradient");
 #endif
@@ -582,190 +569,100 @@ void reg_f3d<T>::GetJacobianBasedGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetLandmarkDistanceGradient() {
-    if (this->landmarkRegWeight <= 0) return;
+    if (landmarkRegWeight <= 0) return;
 
-    reg_spline_getLandmarkDistanceGradient(this->controlPointGrid,
-                                           this->transformationGradient,
-                                           this->landmarkRegNumber,
-                                           this->landmarkReference,
-                                           this->landmarkFloating,
-                                           this->landmarkRegWeight);
+    compute->LandmarkDistanceGradient(landmarkRegNumber,
+                                      landmarkReference,
+                                      landmarkFloating,
+                                      landmarkRegWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetLandmarkDistanceGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
-void reg_f3d<T>::SetGradientImageToZero() {
-    T* nodeGradPtr = static_cast<T *>(this->transformationGradient->data);
-    for (size_t i = 0; i < this->transformationGradient->nvox; ++i)
-        *nodeGradPtr++ = 0;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetGradientImageToZero");
-#endif
-}
+// template <class T>
+// void reg_f3d<T>::SetGradientImageToZero() {
+//     T* nodeGradPtr = static_cast<T*>(transformationGradient->data);
+//     for (size_t i = 0; i < transformationGradient->nvox; ++i)
+//         *nodeGradPtr++ = 0;
+// #ifndef NDEBUG
+//     reg_print_fct_debug("reg_f3d<T>::SetGradientImageToZero");
+// #endif
+// }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 T reg_f3d<T>::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    //	T maxGradValue=0;
-    size_t voxNumber = this->transformationGradient->nx *
-        this->transformationGradient->ny *
-        this->transformationGradient->nz;
-    T *ptrX = static_cast<T *>(this->transformationGradient->data);
-    T *ptrY = &ptrX[voxNumber];
-    T *ptrZ = nullptr;
-    T maxGradValue = 0;
-    //	float *length=(float *)calloc(voxNumber,sizeof(float));
-    if (this->transformationGradient->nz > 1) {
-        ptrZ = &ptrY[voxNumber];
-        for (size_t i = 0; i < voxNumber; i++) {
-            T valX = 0, valY = 0, valZ = 0;
-            if (this->optimiseX == true)
-                valX = *ptrX++;
-            if (this->optimiseY == true)
-                valY = *ptrY++;
-            if (this->optimiseZ == true)
-                valZ = *ptrZ++;
-            //			length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
-            T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ));
-            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
-        }
-    } else {
-        for (size_t i = 0; i < voxNumber; i++) {
-            T valX = 0, valY = 0;
-            if (this->optimiseX == true)
-                valX = *ptrX++;
-            if (this->optimiseY == true)
-                valY = *ptrY++;
-            //			length[i] = (float)(sqrt(valX*valX + valY*valY));
-            T length = (T)(sqrt(valX * valX + valY * valY));
-            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
-        }
-    }
-    //	reg_heapSort(length,voxNumber);
-    //	T maxGradValue = (T)(length[90*voxNumber/100 - 1]);
-    //	free(length);
-
+    T maxGradLength = (T)compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ);
 
-    if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
+    if (strcmp(executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d_sym or f3d2
+        compute->NormaliseGradient(maxGradLength);
 #ifndef NDEBUG
         char text[255];
-        sprintf(text, "Objective function gradient maximal length: %g", maxGradValue);
+        sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
         reg_print_msg_debug(text);
 #endif
-        ptrX = static_cast<T *>(this->transformationGradient->data);
-        if (this->transformationGradient->nz > 1) {
-            ptrX = static_cast<T *>(this->transformationGradient->data);
-            ptrY = &ptrX[voxNumber];
-            ptrZ = &ptrY[voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i) {
-                T valX = 0, valY = 0, valZ = 0;
-                if (this->optimiseX == true)
-                    valX = *ptrX;
-                if (this->optimiseY == true)
-                    valY = *ptrY;
-                if (this->optimiseZ == true)
-                    valZ = *ptrZ;
-                //				T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ));
-                //				if(tempLength>maxGradValue){
-                //					*ptrX *= maxGradValue / tempLength;
-                //					*ptrY *= maxGradValue / tempLength;
-                //					*ptrZ *= maxGradValue / tempLength;
-                //				}
-                *ptrX++ = valX / maxGradValue;
-                *ptrY++ = valY / maxGradValue;
-                *ptrZ++ = valZ / maxGradValue;
-            }
-        } else {
-            ptrX = static_cast<T *>(this->transformationGradient->data);
-            ptrY = &ptrX[voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i) {
-                T valX = 0, valY = 0;
-                if (this->optimiseX == true)
-                    valX = *ptrX;
-                if (this->optimiseY == true)
-                    valY = *ptrY;
-                //				T tempLength = (float)(sqrt(valX*valX + valY*valY));
-                //				if(tempLength>maxGradValue){
-                //					*ptrX *= maxGradValue / tempLength;
-                //					*ptrY *= maxGradValue / tempLength;
-                //				}
-                *ptrX++ = valX / maxGradValue;
-                *ptrY++ = valY / maxGradValue;
-            }
-        }
     }
-    // Returns the largest gradient distance
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::NormaliseGradient");
 #endif
 
-    //   reg_io_WriteImageFile(transformationGradient,
-    //                         "gradient.nii");
-    //   reg_exit();
-
-    return maxGradValue;
+    // Returns the largest gradient distance
+    return maxGradLength;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::DisplayCurrentLevelParameters() {
 #ifdef NDEBUG
-    if (this->verbose) {
+    if (verbose) {
 #endif
+        nifti_image *reference = con->Content::GetReference();
+        nifti_image *floating = con->Content::GetFloating();
         char text[255];
-        sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current reference image");
-        sprintf(text, "\t* image dimension: %i x %i x %i x %i",
-                this->currentReference->nx, this->currentReference->ny,
-                this->currentReference->nz, this->currentReference->nt);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                this->currentReference->dx, this->currentReference->dy,
-                this->currentReference->dz);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current floating image");
-        sprintf(text, "\t* image dimension: %i x %i x %i x %i",
-                this->currentFloating->nx, this->currentFloating->ny,
-                this->currentFloating->nz, this->currentFloating->nt);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                this->currentFloating->dx, this->currentFloating->dy,
-                this->currentFloating->dz);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current control point image");
+        sprintf(text, "Current level: %i / %i", currentLevel + 1, levelNumber);
+        reg_print_info(executableName, text);
+        sprintf(text, "Maximum iteration number: %i", (int)maxIterationNumber);
+        reg_print_info(executableName, text);
+        reg_print_info(executableName, "Current reference image");
+        sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt);
+        reg_print_info(executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz);
+        reg_print_info(executableName, text);
+        reg_print_info(executableName, "Current floating image");
+        sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt);
+        reg_print_info(executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz);
+        reg_print_info(executableName, text);
+        reg_print_info(executableName, "Current control point image");
         sprintf(text, "\t* image dimension: %i x %i x %i",
-                this->controlPointGrid->nx, this->controlPointGrid->ny,
-                this->controlPointGrid->nz);
-        reg_print_info(this->executableName, text);
+                controlPointGrid->nx, controlPointGrid->ny,
+                controlPointGrid->nz);
+        reg_print_info(executableName, text);
         sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                this->controlPointGrid->dx, this->controlPointGrid->dy,
-                this->controlPointGrid->dz);
-        reg_print_info(this->executableName, text);
+                controlPointGrid->dx, controlPointGrid->dy,
+                controlPointGrid->dz);
+        reg_print_info(executableName, text);
 #ifdef NDEBUG
     }
 #endif
 
 #ifndef NDEBUG
-    if (this->currentReference->sform_code > 0)
-        reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform");
-    else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform");
+    if (reference->sform_code > 0)
+        reg_mat44_disp(&(reference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform");
+    else reg_mat44_disp(&(reference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform");
 
-    if (this->currentFloating->sform_code > 0)
-        reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform");
-    else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform");
+    if (floating->sform_code > 0)
+        reg_mat44_disp(&(floating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform");
+    else reg_mat44_disp(&(floating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform");
 
-    if (this->controlPointGrid->sform_code > 0)
-        reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform");
-    else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform");
+    if (controlPointGrid->sform_code > 0)
+        reg_mat44_disp(&(controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform");
+    else reg_mat44_disp(&(controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform");
 #endif
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::DisplayCurrentLevelParameters");
@@ -775,24 +672,24 @@ void reg_f3d<T>::DisplayCurrentLevelParameters() {
 /* *************************************************************** */
 template <class T>
 double reg_f3d<T>::GetObjectiveFunctionValue() {
-    this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
+    currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
 
-    this->currentWBE = this->ComputeBendingEnergyPenaltyTerm();
+    currentWBE = ComputeBendingEnergyPenaltyTerm();
 
-    this->currentWLE = this->ComputeLinearEnergyPenaltyTerm();
+    currentWLE = ComputeLinearEnergyPenaltyTerm();
 
-    this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm();
+    currentWLand = ComputeLandmarkDistancePenaltyTerm();
 
     // Compute initial similarity measure
-    this->currentWMeasure = 0.0;
-    if (this->similarityWeight > 0) {
-        this->WarpFloatingImage(this->interpolation);
-        this->currentWMeasure = this->ComputeSimilarityMeasure();
+    currentWMeasure = 0.0;
+    if (similarityWeight > 0) {
+        WarpFloatingImage(interpolation);
+        currentWMeasure = ComputeSimilarityMeasure();
     }
 #ifndef NDEBUG
     char text[255];
     sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
-            this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand);
+            currentWMeasure, currentWBE, currentWLE, currentWJac, currentWLand);
     reg_print_msg_debug(text);
 #endif
 
@@ -801,51 +698,17 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
 #endif
     // Store the global objective function value
 
-    return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand;
+    return currentWMeasure - currentWBE - currentWLE - currentWJac - currentWLand;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::UpdateParameters(float scale) {
-    T *currentDOF = this->optimiser->GetCurrentDOF();
-    T *bestDOF = this->optimiser->GetBestDOF();
-    T *gradient = this->optimiser->GetGradient();
+    T *currentDOF = optimiser->GetCurrentDOF();
+    T *bestDOF = optimiser->GetBestDOF();
+    T *gradient = optimiser->GetGradient();
 
-    // Update the control point position
-    if (this->optimiser->GetOptimiseX() == true &&
-        this->optimiser->GetOptimiseY() == true &&
-        this->optimiser->GetOptimiseZ() == true) {
-        // Update the values for all axis displacement
-        for (size_t i = 0; i < this->optimiser->GetDOFNumber(); ++i) {
-            currentDOF[i] = bestDOF[i] + scale * gradient[i];
-        }
-    } else {
-        size_t voxNumber = this->optimiser->GetVoxNumber();
-        // Update the values for the x-axis displacement
-        if (this->optimiser->GetOptimiseX() == true) {
-            for (size_t i = 0; i < voxNumber; ++i) {
-                currentDOF[i] = bestDOF[i] + scale * gradient[i];
-            }
-        }
-        // Update the values for the y-axis displacement
-        if (this->optimiser->GetOptimiseY() == true) {
-            T *currentDOFY = &currentDOF[voxNumber];
-            T *bestDOFY = &bestDOF[voxNumber];
-            T *gradientY = &gradient[voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i) {
-                currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
-            }
-        }
-        // Update the values for the z-axis displacement
-        if (this->optimiser->GetOptimiseZ() == true && this->optimiser->GetNDim() > 2) {
-            T *currentDOFZ = &currentDOF[2 * voxNumber];
-            T *bestDOFZ = &bestDOF[2 * voxNumber];
-            T *gradientZ = &gradient[2 * voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i) {
-                currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i];
-            }
-        }
-    }
+    compute->UpdateControlPointPosition(currentDOF, bestDOF, gradient, scale, optimiseX, optimiseY, optimiseZ);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::UpdateParameters");
 #endif
@@ -854,18 +717,13 @@ void reg_f3d<T>::UpdateParameters(float scale) {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::SetOptimiser() {
-    reg_base<T>::SetOptimiser();
-    this->optimiser->Initialise(this->controlPointGrid->nvox,
-                                this->controlPointGrid->nz > 1 ? 3 : 2,
-                                this->optimiseX,
-                                this->optimiseY,
-                                this->optimiseZ,
-                                this->maxIterationNumber,
-                                0, // currentIterationNumber,
-                                this,
-                                static_cast<T *>(this->controlPointGrid->data),
-                                static_cast<T *>(this->transformationGradient->data)
-    );
+    optimiser = platform->CreateOptimiser<T>(dynamic_cast<F3dContent*>(con),
+                                             this,
+                                             maxIterationNumber,
+                                             useConjGradient,
+                                             optimiseX,
+                                             optimiseY,
+                                             optimiseZ);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
 #endif
@@ -874,12 +732,15 @@ void reg_f3d<T>::SetOptimiser() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::SmoothGradient() {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
     // The gradient is smoothed using a Gaussian kernel if it is required
-    if (this->gradientSmoothingSigma != 0) {
-        float kernel = fabs(this->gradientSmoothingSigma);
-        reg_tools_kernelConvolution(this->transformationGradient,
-                                    &kernel,
-                                    GAUSSIAN_KERNEL);
+    if (gradientSmoothingSigma != 0) {
+        float kernel = fabs(gradientSmoothingSigma);
+        F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+        reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL);
+        // Update the changes of transformationGradient
+        con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
@@ -889,19 +750,34 @@ void reg_f3d<T>::SmoothGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d<T>::GetApproximatedGradient() {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    nifti_image *controlPointGrid = con->GetControlPointGrid();
+    nifti_image *transformationGradient = con->GetTransformationGradient();
+
     // Loop over every control point
-    T *gridPtr = static_cast<T*>(this->controlPointGrid->data);
-    T *gradPtr = static_cast<T*>(this->transformationGradient->data);
-    T eps = this->controlPointGrid->dx / 100.f;
-    for (size_t i = 0; i < this->controlPointGrid->nvox; ++i) {
-        T currentValue = this->optimiser->GetBestDOF()[i];
+    T *gridPtr = static_cast<T*>(controlPointGrid->data);
+    T *gradPtr = static_cast<T*>(transformationGradient->data);
+    T eps = controlPointGrid->dx / 100.f;
+    for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
+        T currentValue = optimiser->GetBestDOF()[i];
         gridPtr[i] = currentValue + eps;
-        double valPlus = this->GetObjectiveFunctionValue();
+        // Update the changes. Bad hack, fix that!
+        con->SetControlPointGrid(controlPointGrid);
+        double valPlus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue - eps;
-        double valMinus = this->GetObjectiveFunctionValue();
+        // Update the changes. Bad hack, fix that!
+        con->SetControlPointGrid(controlPointGrid);
+        double valMinus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue;
+        // Update the changes. Bad hack, fix that!
+        con->SetControlPointGrid(controlPointGrid);
         gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
     }
+
+    // Update the changes
+    con->SetTransformationGradient(transformationGradient);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
 #endif
@@ -911,35 +787,25 @@ void reg_f3d<T>::GetApproximatedGradient() {
 template<class T>
 nifti_image** reg_f3d<T>::GetWarpedImage() {
     // The initial images are used
-    if (this->inputReference == nullptr ||
-        this->inputFloating == nullptr ||
-        this->controlPointGrid == nullptr) {
+    if (!inputReference || !inputFloating || !controlPointGrid) {
         reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
         reg_print_msg_error("The reference, floating and control point grid images have to be defined");
         reg_exit();
     }
 
-    this->currentReference = this->inputReference;
-    this->currentFloating = this->inputFloating;
-    this->currentMask = nullptr;
+    const int datatype = inputFloating->datatype;
 
-    reg_base<T>::AllocateWarped();
-    reg_base<T>::AllocateDeformationField();
-    reg_base<T>::WarpFloatingImage(3); // cubic spline interpolation
-    reg_base<T>::ClearDeformationField();
+    InitContent(inputReference, inputFloating, nullptr);
 
-    nifti_image **warpedImage = (nifti_image **)malloc(2 * sizeof(nifti_image *));
-    warpedImage[0] = nifti_copy_nim_info(this->warped);
-    warpedImage[0]->cal_min = this->inputFloating->cal_min;
-    warpedImage[0]->cal_max = this->inputFloating->cal_max;
-    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
-    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
-    warpedImage[0]->data = (void *)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
-    memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper);
+    WarpFloatingImage(3); // cubic spline interpolation
 
-    warpedImage[1] = nullptr;
+    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
+    warpedImage[0] = con->GetWarped(datatype, 0);
+    if (inputFloating->nt == 2)
+        warpedImage[1] = con->GetWarped(datatype, 1);
 
-    reg_f3d<T>::ClearWarped();
+    con->SetWarped(nullptr); // Prevent deallocating of warpedImage
+    DeinitContent();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
 #endif
@@ -949,9 +815,9 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
 /* *************************************************************** */
 template<class T>
 nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
-    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid);
-    returnedControlPointGrid->data = (void *)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
-    memcpy(returnedControlPointGrid->data, this->controlPointGrid->data,
+    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid);
+    returnedControlPointGrid->data = (void*)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    memcpy(returnedControlPointGrid->data, controlPointGrid->data,
            returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
     return returnedControlPointGrid;
 #ifndef NDEBUG
@@ -962,11 +828,11 @@ nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::UpdateBestObjFunctionValue() {
-    this->bestWMeasure = this->currentWMeasure;
-    this->bestWBE = this->currentWBE;
-    this->bestWLE = this->currentWLE;
-    this->bestWJac = this->currentWJac;
-    this->bestWLand = this->currentWLand;
+    bestWMeasure = currentWMeasure;
+    bestWBE = currentWBE;
+    bestWLE = currentWLE;
+    bestWJac = currentWJac;
+    bestWLand = currentWLand;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::UpdateBestObjFunctionValue");
 #endif
@@ -975,14 +841,14 @@ void reg_f3d<T>::UpdateBestObjFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintInitialObjFunctionValue() {
-    if (!this->verbose) return;
+    if (!verbose) return;
 
-    double bestValue = this->optimiser->GetBestObjFunctionValue();
+    double bestValue = optimiser->GetBestObjFunctionValue();
 
     char text[255];
     sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g",
-            bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand);
-    reg_print_info(this->executableName, text);
+            bestValue, bestWMeasure, bestWBE, bestWLE, bestWJac, bestWLand);
+    reg_print_info(executableName, text);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::PrintInitialObjFunctionValue");
 #endif
@@ -991,23 +857,23 @@ void reg_f3d<T>::PrintInitialObjFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
-    if (!this->verbose) return;
+    if (!verbose) return;
 
     char text[255];
     sprintf(text, "[%i] Current objective function: %g",
-            (int)this->optimiser->GetCurrentIterationNumber(),
-            this->optimiser->GetBestObjFunctionValue());
-    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
-    if (this->bendingEnergyWeight > 0)
-        sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE);
-    if (this->linearEnergyWeight > 0)
-        sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE);
-    if (this->jacobianLogWeight > 0)
-        sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac);
-    if (this->landmarkRegWeight > 0)
-        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
+            (int)optimiser->GetCurrentIterationNumber(),
+            optimiser->GetBestObjFunctionValue());
+    sprintf(text + strlen(text), " = (wSIM)%g", bestWMeasure);
+    if (bendingEnergyWeight > 0)
+        sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE);
+    if (linearEnergyWeight > 0)
+        sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE);
+    if (jacobianLogWeight > 0)
+        sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac);
+    if (landmarkRegWeight > 0)
+        sprintf(text + strlen(text), " - (wLAN)%.2e", bestWLand);
     sprintf(text + strlen(text), " [+ %g mm]", currentSize);
-    reg_print_info(this->executableName, text);
+    reg_print_info(executableName, text);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::PrintCurrentObjFunctionValue");
 #endif
@@ -1016,27 +882,27 @@ void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetObjectiveFunctionGradient() {
-    if (!this->useApproxGradient) {
+    if (!useApproxGradient) {
         // Compute the gradient of the similarity measure
-        if (this->similarityWeight > 0) {
-            this->WarpFloatingImage(this->interpolation);
-            this->GetSimilarityMeasureGradient();
+        if (similarityWeight > 0) {
+            WarpFloatingImage(interpolation);
+            GetSimilarityMeasureGradient();
         } else {
-            this->SetGradientImageToZero();
+            dynamic_cast<F3dContent*>(con)->ZeroTransformationGradient();
         }
         // Compute the penalty term gradients if required
-        this->GetBendingEnergyGradient();
-        this->GetJacobianBasedGradient();
-        this->GetLinearEnergyGradient();
-        this->GetLandmarkDistanceGradient();
+        GetBendingEnergyGradient();
+        GetJacobianBasedGradient();
+        GetLinearEnergyGradient();
+        GetLandmarkDistanceGradient();
     } else {
-        this->GetApproximatedGradient();
+        GetApproximatedGradient();
     }
 
-    this->optimiser->IncrementCurrentIterationNumber();
+    optimiser->IncrementCurrentIterationNumber();
 
     // Smooth the gradient if require
-    this->SmoothGradient();
+    SmoothGradient();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionGradient");
 #endif
@@ -1045,8 +911,8 @@ void reg_f3d<T>::GetObjectiveFunctionGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CorrectTransformation() {
-    if (this->jacobianLogWeight > 0 && this->jacobianLogApproximation == true)
-        this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation
+    if (jacobianLogWeight > 0 && jacobianLogApproximation)
+        ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::CorrectTransformation");
 #endif
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 537a9bdc..86135bda 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -26,7 +26,7 @@ class reg_f3d: public reg_base<T> {
     bool jacobianLogApproximation;
     T spacing[3];
 
-    nifti_image *transformationGradient;
+    // nifti_image *transformationGradient;
     bool gridRefinement;
 
     double currentWJac;
@@ -36,9 +36,9 @@ class reg_f3d: public reg_base<T> {
     double bestWBE;
     double bestWLE;
 
-    virtual void AllocateTransformationGradient();
-    virtual void ClearTransformationGradient();
-    virtual T InitialiseCurrentLevel();
+    // virtual void AllocateTransformationGradient() override;
+    // virtual void DeallocateTransformationGradient() override;
+    virtual T InitialiseCurrentLevel(nifti_image *reference) override;
 
     virtual double ComputeBendingEnergyPenaltyTerm();
     virtual double ComputeLinearEnergyPenaltyTerm();
@@ -49,25 +49,25 @@ class reg_f3d: public reg_base<T> {
     virtual void GetLinearEnergyGradient();
     virtual void GetJacobianBasedGradient();
     virtual void GetLandmarkDistanceGradient();
-    virtual void SetGradientImageToZero();
-    virtual T NormaliseGradient();
-    virtual void SmoothGradient();
-    virtual void GetObjectiveFunctionGradient();
-    virtual void GetApproximatedGradient();
+    // virtual void SetGradientImageToZero() override;
+    virtual T NormaliseGradient() override;
+    virtual void SmoothGradient() override;
+    virtual void GetObjectiveFunctionGradient() override;
+    virtual void GetApproximatedGradient() override;
     void GetSimilarityMeasureGradient();
 
-    virtual void GetDeformationField();
-    virtual void DisplayCurrentLevelParameters();
+    virtual void GetDeformationField() override;
+    virtual void DisplayCurrentLevelParameters() override;
 
-    virtual double GetObjectiveFunctionValue();
-    virtual void UpdateBestObjFunctionValue();
-    virtual void UpdateParameters(float);
-    virtual void SetOptimiser();
+    virtual double GetObjectiveFunctionValue() override;
+    virtual void UpdateBestObjFunctionValue() override;
+    virtual void UpdateParameters(float) override;
+    virtual void SetOptimiser() override;
 
-    virtual void PrintInitialObjFunctionValue();
-    virtual void PrintCurrentObjFunctionValue(T);
+    virtual void PrintInitialObjFunctionValue() override;
+    virtual void PrintCurrentObjFunctionValue(T) override;
 
-    virtual void CorrectTransformation();
+    virtual void CorrectTransformation() override;
 
     void (*funcProgressCallback)(float pcntProgress, void *params);
     void *paramsProgressCallback;
@@ -101,11 +101,10 @@ class reg_f3d: public reg_base<T> {
     // f3d_gpu specific option
     virtual int CheckMemoryMB() { return EXIT_SUCCESS; }
 
-    virtual void CheckParameters();
-    virtual void Initialise();
+    virtual void CheckParameters() override;
+    virtual void Initialise() override;
+    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override;
+    virtual void DeinitContent() override;
     virtual nifti_image* GetControlPointPositionImage();
-    virtual nifti_image** GetWarpedImage();
-
-    // Function used for testing
-    virtual void reg_test_setControlPointGrid(nifti_image *cpp) { controlPointGrid = cpp; }
+    virtual nifti_image** GetWarpedImage() override;
 };

From fdbb3a3c083cfa31e3ddca22732c50ecefdfbe04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sat, 3 Dec 2022 01:33:41 +0000
Subject: [PATCH 025/314] Disable OpenCL for reg_test_interpolation

---
 niftyreg_build_version.txt          | 2 +-
 reg-test/reg_test_interpolation.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 94361d49..6a4573e8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-132
+133
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 3487aba3..ad04279d 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -124,10 +124,10 @@ TEST_CASE("Resampling", "[resampling]") {
             NR_PLATFORM_CUDA));
 #endif
 #ifdef _USE_OPENCL
-        listContent.push_back(content_desc(
-            new ClAladinContent(reference, reference),
-            "OpenCL",
-            NR_PLATFORM_CL));
+        // listContent.push_back(content_desc(
+        //     new ClAladinContent(reference, reference),
+        //     "OpenCL",
+        //     NR_PLATFORM_CL));
 #endif
         // Loop over all possibles contents for each test
         for (auto&& content : listContent) {

From 520d795015fc3eef7cfd258960869c054818d249 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 4 Jan 2023 15:54:43 +0000
Subject: [PATCH 026/314] Fix compilation errors

---
 .gitignore                                    |   2 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/CMakeLists.txt                       |   4 +-
 reg-lib/CMakeLists.txt                        | 183 ++++---
 reg-lib/Platform.cpp                          |  13 +
 reg-lib/Platform.h                            |  13 -
 reg-lib/_reg_aladin.cpp                       |  18 +-
 reg-lib/_reg_aladin.h                         |  32 +-
 reg-lib/_reg_aladin_sym.cpp                   |   2 +-
 reg-lib/_reg_aladin_sym.h                     |   7 +-
 reg-lib/_reg_base.cpp                         |  34 +-
 reg-lib/_reg_base.h                           |   4 +-
 reg-lib/_reg_f3d.cpp                          | 483 +++++++++---------
 reg-lib/cl/CMakeLists.txt                     |  25 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |   2 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.h   |   2 +-
 reg-lib/cl/ClAladinContent.cpp                |   2 +-
 reg-lib/cl/ClBlockMatchingKernel.cpp          |   2 +-
 reg-lib/cl/ClBlockMatchingKernel.h            |   2 +-
 reg-lib/cl/ClOptimiseKernel.cpp               |   2 +-
 reg-lib/cl/ClOptimiseKernel.h                 |   2 +-
 reg-lib/cl/ClResampleImageKernel.h            |   2 +-
 reg-lib/cpu/_reg_tools.cpp                    |   2 +-
 reg-lib/cuda/CMakeLists.txt                   |   9 +-
 .../reg_test_affine_deformation_field.cpp     |   2 +-
 reg-test/reg_test_blockMatching.cpp           |   2 +-
 ...est_coherence_affine_deformation_field.cpp |   2 +-
 reg-test/reg_test_coherence_blockMatching.cpp |   2 +-
 reg-test/reg_test_coherence_interpolation.cpp |   2 +-
 reg-test/reg_test_interpolation.cpp           |   2 +-
 reg-test/reg_test_leastTrimmedSquares.cpp     |   2 +-
 31 files changed, 436 insertions(+), 427 deletions(-)

diff --git a/.gitignore b/.gitignore
index d96bb96f..158e90bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,4 +41,4 @@ CMakeSettings.json
 .DS_Store
 
 # Build
-build
+build*
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6a4573e8..405e2afe 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-133
+134
diff --git a/reg-apps/CMakeLists.txt b/reg-apps/CMakeLists.txt
index 73e6d0e0..c9a9e955 100755
--- a/reg-apps/CMakeLists.txt
+++ b/reg-apps/CMakeLists.txt
@@ -49,7 +49,7 @@ set(MODULE_LIST
   reg_jacobian
   reg_aladin
   reg_f3d
-  )
+)
 #-----------------------------------------------------------------------------
 if(USE_CUDA OR USE_OPENCL)
   set(gpuinfo_libraries "")
@@ -69,7 +69,7 @@ foreach(MODULE_NAME ${MODULE_LIST})
         RUNTIME DESTINATION bin COMPONENT Runtime
         LIBRARY DESTINATION lib COMPONENT Runtime
         ARCHIVE DESTINATION lib COMPONENT Runtime
-        )
+    )
 endforeach(MODULE_NAME)
 #-----------------------------------------------------------------------------
 install(PROGRAMS groupwise_niftyreg_params.sh DESTINATION bin COMPONENT Runtime)
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index f927f247..0e0ec358 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -26,9 +26,7 @@ install(TARGETS _reg_maths
 install(FILES cpu/_reg_maths.h cpu/_reg_maths_eigen.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_maths")
 #-----------------------------------------------------------------------------
-add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE}
-  cpu/_reg_tools.cpp
-)
+add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_tools.cpp)
 target_link_libraries(_reg_tools
   _reg_maths
   reg_nifti
@@ -41,13 +39,8 @@ install(TARGETS _reg_tools
 install(FILES cpu/_reg_tools.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_tools")
 #-----------------------------------------------------------------------------
-add_library(_reg_globalTrans
-  ${NIFTYREG_LIBRARY_TYPE}
-  cpu/_reg_globalTrans.cpp
-)
-target_link_libraries(_reg_globalTrans
-  _reg_tools
-)
+add_library(_reg_globalTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_globalTrans.cpp)
+target_link_libraries(_reg_globalTrans _reg_tools)
 install(TARGETS _reg_globalTrans
   RUNTIME DESTINATION bin
   LIBRARY DESTINATION lib
@@ -56,8 +49,7 @@ install(TARGETS _reg_globalTrans
 install(FILES cpu/_reg_globalTrans.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_globalTrans")
 #-----------------------------------------------------------------------------
-add_library(_reg_localTrans
-  ${NIFTYREG_LIBRARY_TYPE}
+add_library(_reg_localTrans ${NIFTYREG_LIBRARY_TYPE}
   cpu/_reg_splineBasis.h
   cpu/_reg_splineBasis.cpp
   cpu/_reg_localTrans.h
@@ -101,68 +93,120 @@ install(TARGETS _reg_measure
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_measure.h cpu/_reg_nmi.h cpu/_reg_ssd.h cpu/_reg_kld.h cpu/_reg_lncc.h cpu/_reg_dti.h cpu/_reg_mind.h DESTINATION include)
+install(FILES
+  cpu/_reg_measure.h
+  cpu/_reg_nmi.h
+  cpu/_reg_ssd.h
+  cpu/_reg_kld.h
+  cpu/_reg_lncc.h
+  cpu/_reg_dti.h
+  cpu/_reg_mind.h DESTINATION include
+)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_measure")
 #-----------------------------------------------------------------------------
 add_library(_reg_resampling ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_resampling.cpp)
-target_link_libraries(_reg_resampling _reg_globalTrans
-)
+target_link_libraries(_reg_resampling _reg_globalTrans)
 install(TARGETS _reg_resampling
   RUNTIME DESTINATION bin
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
-  )
+)
 install(FILES cpu/_reg_resampling.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_resampling")
 #-----------------------------------------------------------------------------
 add_library(_reg_blockMatching ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_blockMatching.cpp)
-target_link_libraries(_reg_blockMatching _reg_globalTrans
-)
+target_link_libraries(_reg_blockMatching _reg_globalTrans)
 install(TARGETS _reg_blockMatching
   RUNTIME DESTINATION bin
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
-  )
+)
 install(FILES cpu/_reg_blockMatching.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching")
 #-----------------------------------------------------------------------------
 add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp)
-target_link_libraries(_reg_femTrans _reg_globalTrans
-)
+target_link_libraries(_reg_femTrans _reg_globalTrans)
 install(TARGETS _reg_femTrans
   RUNTIME DESTINATION bin
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
-  )
+)
 install(FILES cpu/_reg_femTrans.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans")
 #-----------------------------------------------------------------------------
-## BUILD THE ALADIN LIBRARY
-set(_reg_aladin_files
-  AladinContent.cpp
-  AladinContent.h
+add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   Compute.cpp
   Compute.h
+  AladinContent.cpp
+  AladinContent.h
   Content.cpp
   Content.h
+  F3dContent.cpp
+  F3dContent.h
   Platform.cpp
   Platform.h
-  cpu/CpuAffineDeformationFieldKernel.h
+)
+install(TARGETS _reg_compute
+  RUNTIME DESTINATION lib
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib
+)
+install(FILES
+  Compute.h
+  ComputeFactory.h
+  AladinContent.h
+  Content.h
+  F3dContent.h
+  Platform.h DESTINATION include
+)
+set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute")
+#-----------------------------------------------------------------------------
+add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE}
+  cpu/CpuKernelFactory.cpp
+  cpu/CpuKernelFactory.h
   cpu/CpuAffineDeformationFieldKernel.cpp
-  cpu/CpuBlockMatchingKernel.h
+  cpu/CpuAffineDeformationFieldKernel.h
   cpu/CpuBlockMatchingKernel.cpp
-  cpu/CpuConvolutionKernel.h
+  cpu/CpuBlockMatchingKernel.h
   cpu/CpuConvolutionKernel.cpp
-  cpu/CpuOptimiseKernel.h
+  cpu/CpuConvolutionKernel.h
   cpu/CpuOptimiseKernel.cpp
-  cpu/CpuResampleImageKernel.h
+  cpu/CpuOptimiseKernel.h
   cpu/CpuResampleImageKernel.cpp
-  cpu/CpuKernelFactory.cpp
+  cpu/CpuResampleImageKernel.h
+)
+target_link_libraries(_reg_kernels
+  _reg_blockMatching
+)
+install(TARGETS _reg_kernels
+  RUNTIME DESTINATION lib
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib
+)
+install(FILES
+  KernelFactory.h
+  AffineDeformationFieldKernel.h
+  BlockMatchingKernel.h
+  ConvolutionKernel.h
+  Kernel.h
+  OptimiseKernel.h
+  ResampleImageKernel.h
   cpu/CpuKernelFactory.h
+  cpu/CpuAffineDeformationFieldKernel.h
+  cpu/CpuBlockMatchingKernel.h
+  cpu/CpuConvolutionKernel.h
+  cpu/CpuOptimiseKernel.h
+  cpu/CpuResampleImageKernel.h DESTINATION include
 )
-set(_reg_aladin_libraries
-  _reg_localTrans
+set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_kernels")
+#-----------------------------------------------------------------------------
+## BUILD THE ALADIN LIBRARY
+add_library(_reg_aladin ${NIFTYREG_LIBRARY_TYPE} _reg_aladin.cpp _reg_aladin_sym.cpp)
+target_link_libraries(_reg_aladin
   _reg_blockMatching
+  _reg_compute
+  _reg_kernels
+  _reg_localTrans
   _reg_resampling
   _reg_globalTrans
   _reg_tools
@@ -170,68 +214,34 @@ set(_reg_aladin_libraries
   ${NR_OPENCL_LIBRARIES}
   ${NR_CUDA_LIBRARIES}
 )
-add_library(_reg_aladin ${NIFTYREG_LIBRARY_TYPE} ${_reg_aladin_files})
-target_link_libraries(_reg_aladin ${_reg_aladin_libraries})
 install(TARGETS _reg_aladin
   RUNTIME DESTINATION lib
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
-  )
-install(FILES cpu/_reg_macros.h DESTINATION include)
-install(FILES _reg_aladin.h _reg_aladin_sym.h DESTINATION include)
-install(FILES _reg_aladin.cpp _reg_aladin_sym.cpp DESTINATION include)
-install(FILES AladinContent.h Platform.h  DESTINATION include)
+)
 install(FILES
-        AffineDeformationFieldKernel.h
-        BlockMatchingKernel.h
-        Compute.h
-        ComputeFactory.h
-        ConvolutionKernel.h
-        Kernel.h
-        OptimiseKernel.h
-        ResampleImageKernel.h
-        cpu/CpuAffineDeformationFieldKernel.h
-        cpu/CpuBlockMatchingKernel.h
-        cpu/CpuConvolutionKernel.h
-        cpu/CpuOptimiseKernel.h
-        cpu/CpuResampleImageKernel.h
-        KernelFactory.h cpu/CpuKernelFactory.h DESTINATION include)
+  _reg_aladin.h
+  _reg_aladin_sym.h
+  cpu/_reg_macros.h DESTINATION include
+)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 ## BUILD THE F3D LIBRARY
 set(_reg_f3d_files
-  Compute.cpp
-  Compute.h
-  Content.cpp
-  Content.h
-  F3dContent.cpp
-  F3dContent.h
-  Platform.cpp
-  Platform.h
-  _reg_base.h
   _reg_base.cpp
-  _reg_f3d.h
+  _reg_base.h
   _reg_f3d.cpp
-  # _reg_f3d2.h
+  _reg_f3d.h
   # _reg_f3d2.cpp
-  # _reg_f3d_sym.h
+  # _reg_f3d2.h
   # _reg_f3d_sym.cpp
-  cpu/CpuAffineDeformationFieldKernel.h
-  cpu/CpuAffineDeformationFieldKernel.cpp
-  cpu/CpuBlockMatchingKernel.h
-  cpu/CpuBlockMatchingKernel.cpp
-  cpu/CpuConvolutionKernel.h
-  cpu/CpuConvolutionKernel.cpp
-  cpu/CpuOptimiseKernel.h
-  cpu/CpuOptimiseKernel.cpp
-  cpu/CpuResampleImageKernel.h
-  cpu/CpuResampleImageKernel.cpp
-  cpu/CpuKernelFactory.h
-  cpu/CpuKernelFactory.cpp
+  # _reg_f3d_sym.h
 )
 set(_reg_f3d_libraries
   _reg_blockMatching
+  _reg_compute
+  _reg_kernels
   _reg_localTrans
   _reg_globalTrans
   _reg_resampling
@@ -247,12 +257,15 @@ install(TARGETS _reg_f3d
   RUNTIME DESTINATION bin
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
-  )
-install(FILES _reg_base.h Content.h DESTINATION include)
-install(FILES _reg_f3d.h F3dContent.h DESTINATION include)
-install(FILES _reg_f3d2.h DESTINATION include)
-install(FILES _reg_f3d_sym.h DESTINATION include)
-install(FILES cpu/_reg_optimiser.cpp cpu/_reg_optimiser.h DESTINATION include)
+)
+install(FILES
+  _reg_base.h
+  _reg_f3d.h
+  _reg_f3d2.h
+  _reg_f3d_sym.h
+  cpu/_reg_optimiser.cpp
+  cpu/_reg_optimiser.h DESTINATION include
+)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 365ed542..ab20b0be 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,4 +1,17 @@
 #include "Platform.h"
+#include "CpuKernelFactory.h"
+#ifdef _USE_CUDA
+#include "CudaKernelFactory.h"
+#include "CudaF3dContent.h"
+#include "CudaComputeFactory.h"
+#include "CudaContextSingleton.h"
+#include "_reg_optimiser_gpu.h"
+#endif
+#ifdef _USE_OPENCL
+#include "ClKernelFactory.h"
+#include "ClComputeFactory.h"
+#include "ClContextSingleton.h"
+#endif
 
 /* *************************************************************** */
 Platform::Platform(int platformCodeIn) {
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 47b9f697..d1e02f83 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -2,21 +2,8 @@
 
 #include "F3dContent.h"
 #include "KernelFactory.h"
-#include "CpuKernelFactory.h"
 #include "ComputeFactory.h"
 #include "_reg_optimiser.h"
-#ifdef _USE_CUDA
-#include "CudaF3dContent.h"
-#include "CudaKernelFactory.h"
-#include "CudaComputeFactory.h"
-#include "CudaContextSingleton.h"
-#include "_reg_optimiser_gpu.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClKernelFactory.h"
-#include "ClComputeFactory.h"
-#include "ClContextSingleton.h"
-#endif
 
 #define NR_PLATFORM_CPU  0
 #define NR_PLATFORM_CUDA 1
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 49a8f011..534d643b 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -1,21 +1,4 @@
-#include "_reg_ReadWriteMatrix.h"
 #include "_reg_aladin.h"
-#include "_reg_stringFormat.h"
-#include "Platform.h"
-#include "AffineDeformationFieldKernel.h"
-#include "ResampleImageKernel.h"
-#include "BlockMatchingKernel.h"
-#include "OptimiseKernel.h"
-#include "ConvolutionKernel.h"
-#include "AladinContent.h"
-
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "CLAladinContent.h"
-#include "InfoDevice.h"
-#endif
 
 /* *************************************************************** */
 template<class T>
@@ -652,3 +635,4 @@ void reg_aladin<T>::DebugPrintLevelInfoEnd() {
     reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:");
 }
 /* *************************************************************** */
+template class reg_aladin<float>;
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 016681cc..72cd0988 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -23,12 +23,23 @@
 #include "_reg_nmi.h"
 #include "_reg_ssd.h"
 #include "_reg_tools.h"
-#include "float.h"
-#include <limits>
-
-class AladinContent;
-class Platform;
-class Kernel;
+#include "_reg_ReadWriteMatrix.h"
+#include "_reg_stringFormat.h"
+#include "Platform.h"
+#include "AffineDeformationFieldKernel.h"
+#include "ResampleImageKernel.h"
+#include "BlockMatchingKernel.h"
+#include "OptimiseKernel.h"
+#include "ConvolutionKernel.h"
+#include "AladinContent.h"
+
+#ifdef _USE_CUDA
+#include "CudaAladinContent.h"
+#endif
+#ifdef _USE_OPENCL
+#include "ClAladinContent.h"
+#include "InfoDevice.h"
+#endif
 
 /**
  * @brief Block matching registration class
@@ -158,8 +169,8 @@ class reg_aladin {
     }
 
     void SetInputTransform(const char *filename);
-    mat44* GetInputTransform() {
-        return this->InputTransform;
+    char* GetInputTransform() {
+        return this->inputTransformName;
     }
 
     mat44* GetTransformationMatrix() {
@@ -263,8 +274,5 @@ class reg_aladin {
 private:
     Kernel *affineTransformation3DKernel, *blockMatchingKernel;
     Kernel *optimiseKernel, *resamplingKernel;
-    void ResolveMatrix(unsigned int iterations,
-                       const unsigned int optimizationFlag);
+    void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag);
 };
-
-#include "_reg_aladin.cpp"
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index d2164a58..cf0b8b60 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -29,7 +29,6 @@ reg_aladin_sym<T>::reg_aladin_sym ()
 #ifndef NDEBUG
    reg_print_msg_debug("reg_aladin_sym constructor called");
 #endif
-
 }
 /* *************************************************************** */
 template <class T>
@@ -372,3 +371,4 @@ void reg_aladin_sym<T>::DebugPrintLevelInfoEnd()
    reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:");
 }
 /* *************************************************************** */
+template class reg_aladin_sym<float>;
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 5f724e35..0cd4ec5f 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -16,8 +16,7 @@
 
 /// @brief Symmetric Block matching registration class
 template <class T>
-class reg_aladin_sym : public reg_aladin<T>
-{
+class reg_aladin_sym : public reg_aladin<T> {
 private:
   AladinContent *backCon;
   Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel;
@@ -55,7 +54,5 @@ class reg_aladin_sym : public reg_aladin<T>
 public:
   reg_aladin_sym();
   virtual ~reg_aladin_sym();
-  virtual void SetInputFloatingMask(nifti_image *);
+  virtual void SetInputFloatingMask(nifti_image*);
 };
-
-#include "_reg_aladin_sym.cpp"
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index db23e75a..895b417d 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -15,7 +15,7 @@
 
  /* *************************************************************** */
  /* *************************************************************** */
-template <class T>
+template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     platform = nullptr;
     platformCode = NR_PLATFORM_CPU;
@@ -98,7 +98,7 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
 #endif
 }
 /* *************************************************************** */
-template <class T>
+template<class T>
 reg_base<T>::~reg_base() {
     // DeallocateWarped();
     // DeallocateWarpedGradient();
@@ -290,7 +290,7 @@ void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t) {
 #endif
 }
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_base<T>::UseRobustRange() {
     robustRange = true;
 #ifndef NDEBUG
@@ -298,7 +298,7 @@ void reg_base<T>::UseRobustRange() {
 #endif
 }
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_base<T>::DoNotUseRobustRange() {
     robustRange = false;
 #ifndef NDEBUG
@@ -430,7 +430,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 }
 /* *************************************************************** */
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::DeallocateCurrentInputImage() {
 //     reference = nullptr;
 //     currentMask = nullptr;
@@ -444,7 +444,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // }
 /* *************************************************************** */
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::AllocateWarped() {
 //     if (reference == nullptr) {
 //         reg_print_fct_error("reg_base::AllocateWarped()");
@@ -467,7 +467,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::DeallocateWarped() {
 //     if (warped != nullptr)
 //         nifti_image_free(warped);
@@ -478,7 +478,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // }
 /* *************************************************************** */
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::AllocateDeformationField() {
 //     if (reference == nullptr) {
 //         reg_print_fct_error("reg_base::AllocateDeformationField()");
@@ -528,7 +528,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::DeallocateDeformationField() {
 //     if (deformationFieldImage != nullptr) {
 //         nifti_image_free(deformationFieldImage);
@@ -542,7 +542,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::AllocateWarpedGradient() {
 //     if (deformationFieldImage == nullptr) {
 //         reg_print_fct_error("reg_base::AllocateWarpedGradient()");
@@ -558,7 +558,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::DeallocateWarpedGradient() {
 //     if (warpedGradient != nullptr) {
 //         nifti_image_free(warpedGradient);
@@ -569,7 +569,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::AllocateVoxelBasedMeasureGradient() {
 //     if (deformationFieldImage == nullptr) {
 //         reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
@@ -585,7 +585,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_base<T>::DeallocateVoxelBasedMeasureGradient() {
 //     if (voxelBasedMeasureGradient != nullptr) {
 //         nifti_image_free(voxelBasedMeasureGradient);
@@ -958,7 +958,7 @@ void reg_base<T>::Initialise() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_base<T>::ComputeSimilarityMeasure() {
     double measure = 0;
     if (measure_nmi != nullptr)
@@ -989,7 +989,7 @@ double reg_base<T>::ComputeSimilarityMeasure() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_base<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
     // TODO Temporarily call F3dContent. This function will be moved to reg_f3d.
@@ -1221,7 +1221,7 @@ void reg_base<T>::SetLocalWeightSim(nifti_image *i) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_base<T>::WarpFloatingImage(int inter) {
     // Compute the deformation field
     GetDeformationField();
@@ -1247,7 +1247,7 @@ void reg_base<T>::WarpFloatingImage(int inter) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_base<T>::Run() {
 #ifndef NDEBUG
     char text[255];
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index f44a25b5..4f361076 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -30,8 +30,8 @@
 #include "float.h"
 #include "Platform.h"
 
- /// @brief Base registration class
-template <class T>
+/// @brief Base registration class
+template<class T>
 class reg_base: public InterfaceOptimiser {
 protected:
     // Platform
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 86247243..a997d308 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -19,11 +19,11 @@
 
  /* *************************************************************** */
  /* *************************************************************** */
-template <class T>
+template<class T>
 reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
     : reg_base<T>::reg_base(refTimePoint, floTimePoint) {
 
-    executableName = (char *)"NiftyReg F3D";
+    this->executableName = (char*)"NiftyReg F3D";
     inputControlPointGrid = nullptr; // pointer to external
     controlPointGrid = nullptr;
     bendingEnergyWeight = 0.001;
@@ -33,8 +33,8 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
     spacing[0] = -5;
     spacing[1] = std::numeric_limits<T>::quiet_NaN();
     spacing[2] = std::numeric_limits<T>::quiet_NaN();
-    useConjGradient = true;
-    useApproxGradient = false;
+    this->useConjGradient = true;
+    this->useApproxGradient = false;
 
     // approxParzenWindow=true;
 
@@ -48,7 +48,7 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 reg_f3d<T>::~reg_f3d() {
     // DeallocateTransformationGradient();
     if (controlPointGrid != nullptr) {
@@ -117,7 +117,7 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 #endif
 }
 /* *************************************************************** */
-template <class T>
+template<class T>
 T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
     // Set the initial step size for the gradient ascent
     T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy;
@@ -126,9 +126,9 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
 
     // Refine the control point grid if required
     if (gridRefinement) {
-        if (currentLevel == 0) {
-            bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16.0f, levelNumber - 1));
-            linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3.0f, levelNumber - 1));
+        if (this->currentLevel == 0) {
+            bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber - 1));
+            linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber - 1));
         } else {
             bendingEnergyWeight = bendingEnergyWeight * static_cast<T>(16);
             linearEnergyWeight = linearEnergyWeight * static_cast<T>(3);
@@ -142,7 +142,7 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
     return maxStepSize;
 }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_f3d<T>::AllocateTransformationGradient() {
 //     if (controlPointGrid == nullptr) {
 //         reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
@@ -158,7 +158,7 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
 // #endif
 // }
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_f3d<T>::DeallocateTransformationGradient() {
 //     if (transformationGradient != nullptr) {
 //         nifti_image_free(transformationGradient);
@@ -173,20 +173,19 @@ template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
-    if (strcmp(executableName, "NiftyReg F3D") == 0 ||
-        strcmp(executableName, "NiftyReg F3D GPU") == 0) {
+    if (strcmp(this->executableName, "NiftyReg F3D") == 0 ||
+        strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
         T penaltySum = bendingEnergyWeight +
             linearEnergyWeight +
             jacobianLogWeight +
-            landmarkRegWeight;
+            this->landmarkRegWeight;
         if (penaltySum >= 1.0) {
-            similarityWeight = 0;
-            similarityWeight /= penaltySum;
+            this->similarityWeight = 0;
             bendingEnergyWeight /= penaltySum;
             linearEnergyWeight /= penaltySum;
             jacobianLogWeight /= penaltySum;
-            landmarkRegWeight /= penaltySum;
-        } else similarityWeight = 1.0 - penaltySum;
+            this->landmarkRegWeight /= penaltySum;
+        } else this->similarityWeight = 1.0 - penaltySum;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::CheckParameters");
@@ -196,7 +195,7 @@ void reg_f3d<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::Initialise() {
-    if (initialised) return;
+    if (this->initialised) return;
 
     reg_base<T>::Initialise();
 
@@ -209,27 +208,27 @@ void reg_f3d<T>::Initialise() {
 
         /* Convert the spacing from voxel to mm if necessary */
         float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]};
-        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * inputReference->dx;
-        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * inputReference->dy;
-        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * inputReference->dz;
+        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx;
+        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy;
+        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz;
 
         // Define the spacing for the first level
         float gridSpacing[3];
-        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(levelNumber - 1));
-        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(levelNumber - 1));
+        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1));
+        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1));
         gridSpacing[2] = 1.0f;
-        if (referencePyramid[0]->nz > 1)
-            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(levelNumber - 1));
+        if (this->referencePyramid[0]->nz > 1)
+            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1));
 
         // Create and allocate the control point image
-        reg_createControlPointGrid<T>(&controlPointGrid, referencePyramid[0], gridSpacing);
+        reg_createControlPointGrid<T>(&controlPointGrid, this->referencePyramid[0], gridSpacing);
 
         // The control point position image is initialised with the affine transformation
-        if (affineTransformation == nullptr) {
+        if (this->affineTransformation == nullptr) {
             memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper);
             reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f);
             reg_getDeformationFromDisplacement(controlPointGrid);
-        } else reg_affine_getDeformationField(affineTransformation, controlPointGrid);
+        } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid);
     } else {
         // The control point grid image is initialised with the provided grid
         controlPointGrid = nifti_copy_nim_info(inputControlPointGrid);
@@ -237,186 +236,187 @@ void reg_f3d<T>::Initialise() {
         memcpy(controlPointGrid->data, inputControlPointGrid->data,
                controlPointGrid->nvox * controlPointGrid->nbyper);
         // The final grid spacing is computed
-        spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(levelNumber - 1));
-        spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(levelNumber - 1));
+        spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1));
+        spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1));
         if (controlPointGrid->nz > 1)
-            spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(levelNumber - 1));
+            spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1));
     }
 #ifdef NDEBUG
-    if (verbose) {
+    if (this->verbose) {
 #endif
         std::string text;
         // Print out some global information about the registration
-        reg_print_info(executableName, "***********************************************************");
-        reg_print_info(executableName, "INPUT PARAMETERS");
-        reg_print_info(executableName, "***********************************************************");
-        reg_print_info(executableName, "Reference image:");
-        text = stringFormat("\t* name: %s", inputReference->fname);
-        reg_print_info(executableName, text.c_str());
+        reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(this->executableName, "INPUT PARAMETERS");
+        reg_print_info(this->executableName, "***********************************************************");
+        reg_print_info(this->executableName, "Reference image:");
+        text = stringFormat("\t* name: %s", this->inputReference->fname);
+        reg_print_info(this->executableName, text.c_str());
         text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            inputReference->nx, inputReference->ny,
-                            inputReference->nz, inputReference->nt);
-        reg_print_info(executableName, text.c_str());
+                            this->inputReference->nx, this->inputReference->ny,
+                            this->inputReference->nz, this->inputReference->nt);
+        reg_print_info(this->executableName, text.c_str());
         text = stringFormat("\t* image spacing: %g x %g x %g mm",
-                            inputReference->dx, inputReference->dy, inputReference->dz);
-        reg_print_info(executableName, text.c_str());
-        for (int i = 0; i < inputReference->nt; i++) {
+                            this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
+        reg_print_info(this->executableName, text.c_str());
+        for (int i = 0; i < this->inputReference->nt; i++) {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, inputReference->nt - 1, referenceThresholdLow[i], referenceThresholdUp[i]);
-            reg_print_info(executableName, text.c_str());
-            if (measure_nmi != nullptr) {
-                if (measure_nmi->GetTimepointsWeights()[i] > 0.0) {
-                    text = stringFormat("\t* binnining size for timepoint %i/%i: %i",
-                                        i, inputFloating->nt - 1, measure_nmi->GetReferenceBinNumber()[i] - 4);
-                    reg_print_info(executableName, text.c_str());
+                                i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
+            reg_print_info(this->executableName, text.c_str());
+            if (this->measure_nmi != nullptr) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                    text = stringFormat("\t* binning size for timepoint %i/%i: %i",
+                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
+                    reg_print_info(this->executableName, text.c_str());
                 }
             }
         }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", referenceSmoothingSigma);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
-        reg_print_info(executableName, "Floating image:");
-        reg_print_info(executableName, text.c_str());
-        text = stringFormat("\t* name: %s", inputFloating->fname);
-        reg_print_info(executableName, text.c_str());
+        text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        reg_print_info(this->executableName, "Floating image:");
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* name: %s", this->inputFloating->fname);
+        reg_print_info(this->executableName, text.c_str());
         text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            inputFloating->nx, inputFloating->ny, inputFloating->nz, inputFloating->nt);
-        reg_print_info(executableName, text.c_str());
-        text = stringFormat("\t* image spacing: %g x %g x %g mm", inputFloating->dx,
-                            inputFloating->dy, inputFloating->dz);
-        reg_print_info(executableName, text.c_str());
-        for (int i = 0; i < inputFloating->nt; i++) {
+                            this->inputFloating->nx, this->inputFloating->ny,
+                            this->inputFloating->nz, this->inputFloating->nt);
+        reg_print_info(this->executableName, text.c_str());
+        text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
+                            this->inputFloating->dy, this->inputFloating->dz);
+        reg_print_info(this->executableName, text.c_str());
+        for (int i = 0; i < this->inputFloating->nt; i++) {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, inputFloating->nt - 1, floatingThresholdLow[i], floatingThresholdUp[i]);
-            reg_print_info(executableName, text.c_str());
-            if (measure_nmi != nullptr) {
-                if (measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                                i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
+            reg_print_info(this->executableName, text.c_str());
+            if (this->measure_nmi != nullptr) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
                     text = stringFormat("\t* binning size for timepoint %i/%i: %i",
-                                        i, inputFloating->nt - 1, measure_nmi->GetFloatingBinNumber()[i] - 4);
-                    reg_print_info(executableName, text.c_str());
+                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
+                    reg_print_info(this->executableName, text.c_str());
                 }
             }
         }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", floatingSmoothingSigma);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
-        text = stringFormat("Warped image padding value: %g", warpedPaddingValue);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
-        text = stringFormat("Level number: %i", levelNumber);
-        reg_print_info(executableName, text.c_str());
-        if (levelNumber != levelToPerform) {
-            text = stringFormat("\t* Level to perform: %i", levelToPerform);
-            reg_print_info(executableName, text.c_str());
+        text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Level number: %i", this->levelNumber);
+        reg_print_info(this->executableName, text.c_str());
+        if (this->levelNumber != this->levelToPerform) {
+            text = stringFormat("\t* Level to perform: %i", this->levelToPerform);
+            reg_print_info(this->executableName, text.c_str());
         }
-        reg_print_info(executableName, "");
-        text = stringFormat("Maximum iteration number during the last level: %i", (int)maxIterationNumber);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
+        reg_print_info(this->executableName, "");
+        text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
 
         text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
-        if (measure_ssd != nullptr)
-            reg_print_info(executableName, "The SSD is used as a similarity measure.");
-        if (measure_kld != nullptr)
-            reg_print_info(executableName, "The KL divergence is used as a similarity measure.");
-        if (measure_lncc != nullptr)
-            reg_print_info(executableName, "The LNCC is used as a similarity measure.");
-        if (measure_dti != nullptr)
-            reg_print_info(executableName, "A DTI based measure is used as a similarity measure.");
-        if (measure_mind != nullptr)
-            reg_print_info(executableName, "MIND is used as a similarity measure.");
-        if (measure_mindssc != nullptr)
-            reg_print_info(executableName, "MINDSSC is used as a similarity measure.");
-        if (measure_nmi != nullptr || (measure_dti == nullptr && measure_kld == nullptr &&
-                                       measure_lncc == nullptr && measure_nmi == nullptr &&
-                                       measure_ssd == nullptr && measure_mind == nullptr &&
-                                       measure_mindssc == nullptr))
-            reg_print_info(executableName, "The NMI is used as a similarity measure.");
-        text = stringFormat("Similarity measure term weight: %g", similarityWeight);
-        reg_print_info(executableName, text.c_str());
-        reg_print_info(executableName, "");
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
+        if (this->measure_ssd != nullptr)
+            reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
+        if (this->measure_kld != nullptr)
+            reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
+        if (this->measure_lncc != nullptr)
+            reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
+        if (this->measure_dti != nullptr)
+            reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
+        if (this->measure_mind != nullptr)
+            reg_print_info(this->executableName, "MIND is used as a similarity measure.");
+        if (this->measure_mindssc != nullptr)
+            reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
+        if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr &&
+                                             this->measure_lncc == nullptr && this->measure_nmi == nullptr &&
+                                             this->measure_ssd == nullptr && this->measure_mind == nullptr &&
+                                             this->measure_mindssc == nullptr))
+            reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
+        text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
+        reg_print_info(this->executableName, text.c_str());
+        reg_print_info(this->executableName, "");
         if (bendingEnergyWeight > 0) {
             text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight);
-            reg_print_info(executableName, text.c_str());
-            reg_print_info(executableName, "");
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
         }
         if ((linearEnergyWeight) > 0) {
             text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight);
-            reg_print_info(executableName, text.c_str());
-            reg_print_info(executableName, "");
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
         }
         if (jacobianLogWeight > 0) {
             text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight);
-            reg_print_info(executableName, text.c_str());
+            reg_print_info(this->executableName, text.c_str());
             if (jacobianLogApproximation) {
-                reg_print_info(executableName, "\t* Jacobian-based penalty term is approximated");
+                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
             } else {
-                reg_print_info(executableName, "\t* Jacobian-based penalty term is not approximated");
+                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
             }
-            reg_print_info(executableName, "");
+            reg_print_info(this->executableName, "");
         }
-        if ((landmarkRegWeight) > 0) {
-            text = stringFormat("Landmark distance regularisation term weight: %g", landmarkRegWeight);
-            reg_print_info(executableName, text.c_str());
-            reg_print_info(executableName, "");
+        if (this->landmarkRegWeight > 0) {
+            text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
+            reg_print_info(this->executableName, text.c_str());
+            reg_print_info(this->executableName, "");
         }
 #ifdef NDEBUG
     }
 #endif
 
-    initialised = true;
+    this->initialised = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::Initialise");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    if (platformCode == NR_PLATFORM_CPU)
-        con = new F3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T));
+    if (this->platformCode == NR_PLATFORM_CPU)
+        this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
 #ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA)
-        con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T));
+    else if (this->platformCode == NR_PLATFORM_CUDA)
+        this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
 #endif
-    compute = platform->CreateCompute(con);
+    this->compute = this->platform->CreateCompute(this->con);
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::DeinitContent() {
-    delete compute;
-    compute = nullptr;
-    delete con;
-    con = nullptr;
+    delete this->compute;
+    this->compute = nullptr;
+    delete this->con;
+    this->con = nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetDeformationField() {
-    compute->GetDeformationField(false, // Composition
-                                 true); // bspline
+    this->compute->GetDeformationField(false, // Composition
+                                       true); // bspline
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetDeformationField");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     if (jacobianLogWeight <= 0) return 0;
 
     bool approx = type == 2 ? false : jacobianLogApproximation;
 
-    double value = compute->GetJacobianPenaltyTerm(approx);
+    double value = this->compute->GetJacobianPenaltyTerm(approx);
 
     unsigned int maxit = 5;
     if (type > 0) maxit = 20;
     unsigned int it = 0;
     while (value != value && it < maxit) {
-        value = compute->CorrectFolding(approx);
+        value = this->compute->CorrectFolding(approx);
 #ifndef NDEBUG
         reg_print_msg_debug("Folding correction");
 #endif
@@ -424,7 +424,7 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     }
     if (type > 0) {
         if (value != value) {
-            optimiser->RestoreBestDOF();
+            this->optimiser->RestoreBestDOF();
             reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
             reg_print_msg_warn("The folding correction scheme failed");
         } else {
@@ -444,11 +444,11 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
     if (bendingEnergyWeight <= 0) return 0;
 
-    double value = compute->ApproxBendingEnergy();
+    double value = this->compute->ApproxBendingEnergy();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeBendingEnergyPenaltyTerm");
 #endif
@@ -456,12 +456,12 @@ double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
     if (linearEnergyWeight <= 0)
         return 0;
 
-    double value = compute->ApproxLinearEnergy();
+    double value = this->compute->ApproxLinearEnergy();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeLinearEnergyPenaltyTerm");
 #endif
@@ -469,24 +469,26 @@ double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
-    if (landmarkRegWeight <= 0)
+    if (this->landmarkRegWeight <= 0)
         return 0;
 
-    double value = compute->GetLandmarkDistance(landmarkRegNumber, landmarkReference, landmarkFloating);
+    double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber,
+                                                      this->landmarkReference,
+                                                      this->landmarkFloating);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm");
 #endif
-    return landmarkRegWeight * value;
+    return this->landmarkRegWeight * value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetSimilarityMeasureGradient() {
-    GetVoxelBasedGradient();
+    this->GetVoxelBasedGradient();
 
-    nifti_image *voxelBasedMeasureGradient = dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient();
+    nifti_image *voxelBasedMeasureGradient = dynamic_cast<F3dContent*>(this->con)->GetVoxelBasedMeasureGradient();
     const int kernel_type = CUBIC_SPLINE_KERNEL;
     // The voxel based NMI gradient is convolved with a spline kernel
     // Convolution along the x axis
@@ -523,10 +525,10 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
     }
 
     // Update the changes of voxelBasedMeasureGradient
-    dynamic_cast<F3dContent*>(con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient);
+    dynamic_cast<F3dContent*>(this->con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient);
 
     // The node based NMI gradient is extracted
-    compute->VoxelCentricToNodeCentric(similarityWeight);
+    this->compute->VoxelCentricToNodeCentric(this->similarityWeight);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
@@ -534,54 +536,54 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetBendingEnergyGradient() {
     if (bendingEnergyWeight <= 0) return;
 
-    compute->ApproxBendingEnergyGradient(bendingEnergyWeight);
+    this->compute->ApproxBendingEnergyGradient(bendingEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetBendingEnergyGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetLinearEnergyGradient() {
     if (linearEnergyWeight <= 0) return;
 
-    compute->ApproxLinearEnergyGradient(linearEnergyWeight);
+    this->compute->ApproxLinearEnergyGradient(linearEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetLinearEnergyGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetJacobianBasedGradient() {
     if (jacobianLogWeight <= 0) return;
 
-    compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation);
+    this->compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetJacobianBasedGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetLandmarkDistanceGradient() {
-    if (landmarkRegWeight <= 0) return;
+    if (this->landmarkRegWeight <= 0) return;
 
-    compute->LandmarkDistanceGradient(landmarkRegNumber,
-                                      landmarkReference,
-                                      landmarkFloating,
-                                      landmarkRegWeight);
+    this->compute->LandmarkDistanceGradient(this->landmarkRegNumber,
+                                            this->landmarkReference,
+                                            this->landmarkFloating,
+                                            this->landmarkRegWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetLandmarkDistanceGradient");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-// template <class T>
+// template<class T>
 // void reg_f3d<T>::SetGradientImageToZero() {
 //     T* nodeGradPtr = static_cast<T*>(transformationGradient->data);
 //     for (size_t i = 0; i < transformationGradient->nvox; ++i)
@@ -592,15 +594,15 @@ void reg_f3d<T>::GetLandmarkDistanceGradient() {
 // }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 T reg_f3d<T>::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    T maxGradLength = (T)compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ);
+    T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ);
 
-    if (strcmp(executableName, "NiftyReg F3D") == 0) {
+    if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d_sym or f3d2
-        compute->NormaliseGradient(maxGradLength);
+        this->compute->NormaliseGradient(maxGradLength);
 #ifndef NDEBUG
         char text[255];
         sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
@@ -616,37 +618,37 @@ T reg_f3d<T>::NormaliseGradient() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::DisplayCurrentLevelParameters() {
 #ifdef NDEBUG
-    if (verbose) {
+    if (this->verbose) {
 #endif
-        nifti_image *reference = con->Content::GetReference();
-        nifti_image *floating = con->Content::GetFloating();
+        nifti_image *reference = this->con->Content::GetReference();
+        nifti_image *floating = this->con->Content::GetFloating();
         char text[255];
-        sprintf(text, "Current level: %i / %i", currentLevel + 1, levelNumber);
-        reg_print_info(executableName, text);
-        sprintf(text, "Maximum iteration number: %i", (int)maxIterationNumber);
-        reg_print_info(executableName, text);
-        reg_print_info(executableName, "Current reference image");
+        sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber);
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current reference image");
         sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt);
-        reg_print_info(executableName, text);
+        reg_print_info(this->executableName, text);
         sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz);
-        reg_print_info(executableName, text);
-        reg_print_info(executableName, "Current floating image");
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current floating image");
         sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt);
-        reg_print_info(executableName, text);
+        reg_print_info(this->executableName, text);
         sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz);
-        reg_print_info(executableName, text);
-        reg_print_info(executableName, "Current control point image");
+        reg_print_info(this->executableName, text);
+        reg_print_info(this->executableName, "Current control point image");
         sprintf(text, "\t* image dimension: %i x %i x %i",
                 controlPointGrid->nx, controlPointGrid->ny,
                 controlPointGrid->nz);
-        reg_print_info(executableName, text);
+        reg_print_info(this->executableName, text);
         sprintf(text, "\t* image spacing: %g x %g x %g mm",
                 controlPointGrid->dx, controlPointGrid->dy,
                 controlPointGrid->dz);
-        reg_print_info(executableName, text);
+        reg_print_info(this->executableName, text);
 #ifdef NDEBUG
     }
 #endif
@@ -670,7 +672,7 @@ void reg_f3d<T>::DisplayCurrentLevelParameters() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 double reg_f3d<T>::GetObjectiveFunctionValue() {
     currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
 
@@ -678,18 +680,18 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
 
     currentWLE = ComputeLinearEnergyPenaltyTerm();
 
-    currentWLand = ComputeLandmarkDistancePenaltyTerm();
+    this->currentWLand = ComputeLandmarkDistancePenaltyTerm();
 
     // Compute initial similarity measure
-    currentWMeasure = 0.0;
-    if (similarityWeight > 0) {
-        WarpFloatingImage(interpolation);
-        currentWMeasure = ComputeSimilarityMeasure();
+    this->currentWMeasure = 0.0;
+    if (this->similarityWeight > 0) {
+        this->WarpFloatingImage(this->interpolation);
+        this->currentWMeasure = this->ComputeSimilarityMeasure();
     }
 #ifndef NDEBUG
     char text[255];
     sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
-            currentWMeasure, currentWBE, currentWLE, currentWJac, currentWLand);
+            this->currentWMeasure, currentWBE, currentWLE, currentWJac, this->currentWLand);
     reg_print_msg_debug(text);
 #endif
 
@@ -698,45 +700,47 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
 #endif
     // Store the global objective function value
 
-    return currentWMeasure - currentWBE - currentWLE - currentWJac - currentWLand;
+    return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::UpdateParameters(float scale) {
-    T *currentDOF = optimiser->GetCurrentDOF();
-    T *bestDOF = optimiser->GetBestDOF();
-    T *gradient = optimiser->GetGradient();
-
-    compute->UpdateControlPointPosition(currentDOF, bestDOF, gradient, scale, optimiseX, optimiseY, optimiseZ);
+    this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(),
+                                              this->optimiser->GetBestDOF(),
+                                              this->optimiser->GetGradient(),
+                                              scale,
+                                              this->optimiseX,
+                                              this->optimiseY,
+                                              this->optimiseZ);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::UpdateParameters");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::SetOptimiser() {
-    optimiser = platform->CreateOptimiser<T>(dynamic_cast<F3dContent*>(con),
-                                             this,
-                                             maxIterationNumber,
-                                             useConjGradient,
-                                             optimiseX,
-                                             optimiseY,
-                                             optimiseZ);
+    this->optimiser = this->platform->template CreateOptimiser<T>(dynamic_cast<F3dContent*>(this->con),
+                                                                  this,
+                                                                  this->maxIterationNumber,
+                                                                  this->useConjGradient,
+                                                                  this->optimiseX,
+                                                                  this->optimiseY,
+                                                                  this->optimiseZ);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::SmoothGradient() {
     // TODO Implement this for CUDA
     // Use CPU temporarily
     // The gradient is smoothed using a Gaussian kernel if it is required
-    if (gradientSmoothingSigma != 0) {
-        float kernel = fabs(gradientSmoothingSigma);
+    if (this->gradientSmoothingSigma != 0) {
+        float kernel = fabs(this->gradientSmoothingSigma);
         F3dContent *con = dynamic_cast<F3dContent*>(this->con);
         reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL);
         // Update the changes of transformationGradient
@@ -748,7 +752,7 @@ void reg_f3d<T>::SmoothGradient() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class T>
+template<class T>
 void reg_f3d<T>::GetApproximatedGradient() {
     // TODO Implement this for CUDA
     // Use CPU temporarily
@@ -761,7 +765,7 @@ void reg_f3d<T>::GetApproximatedGradient() {
     T *gradPtr = static_cast<T*>(transformationGradient->data);
     T eps = controlPointGrid->dx / 100.f;
     for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
-        T currentValue = optimiser->GetBestDOF()[i];
+        T currentValue = this->optimiser->GetBestDOF()[i];
         gridPtr[i] = currentValue + eps;
         // Update the changes. Bad hack, fix that!
         con->SetControlPointGrid(controlPointGrid);
@@ -787,24 +791,24 @@ void reg_f3d<T>::GetApproximatedGradient() {
 template<class T>
 nifti_image** reg_f3d<T>::GetWarpedImage() {
     // The initial images are used
-    if (!inputReference || !inputFloating || !controlPointGrid) {
+    if (!this->inputReference || !this->inputFloating || !controlPointGrid) {
         reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
         reg_print_msg_error("The reference, floating and control point grid images have to be defined");
         reg_exit();
     }
 
-    const int datatype = inputFloating->datatype;
+    const int datatype = this->inputFloating->datatype;
 
-    InitContent(inputReference, inputFloating, nullptr);
+    InitContent(this->inputReference, this->inputFloating, nullptr);
 
-    WarpFloatingImage(3); // cubic spline interpolation
+    this->WarpFloatingImage(3); // cubic spline interpolation
 
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = con->GetWarped(datatype, 0);
-    if (inputFloating->nt == 2)
-        warpedImage[1] = con->GetWarped(datatype, 1);
+    warpedImage[0] = this->con->GetWarped(datatype, 0);
+    if (this->inputFloating->nt == 2)
+        warpedImage[1] = this->con->GetWarped(datatype, 1);
 
-    con->SetWarped(nullptr); // Prevent deallocating of warpedImage
+    this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage
     DeinitContent();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
@@ -828,11 +832,11 @@ nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::UpdateBestObjFunctionValue() {
-    bestWMeasure = currentWMeasure;
+    this->bestWMeasure = this->currentWMeasure;
     bestWBE = currentWBE;
     bestWLE = currentWLE;
     bestWJac = currentWJac;
-    bestWLand = currentWLand;
+    this->bestWLand = this->currentWLand;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::UpdateBestObjFunctionValue");
 #endif
@@ -841,14 +845,14 @@ void reg_f3d<T>::UpdateBestObjFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintInitialObjFunctionValue() {
-    if (!verbose) return;
+    if (!this->verbose) return;
 
-    double bestValue = optimiser->GetBestObjFunctionValue();
+    double bestValue = this->optimiser->GetBestObjFunctionValue();
 
     char text[255];
     sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g",
-            bestValue, bestWMeasure, bestWBE, bestWLE, bestWJac, bestWLand);
-    reg_print_info(executableName, text);
+            bestValue, this->bestWMeasure, bestWBE, bestWLE, bestWJac, this->bestWLand);
+    reg_print_info(this->executableName, text);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::PrintInitialObjFunctionValue");
 #endif
@@ -857,23 +861,23 @@ void reg_f3d<T>::PrintInitialObjFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
-    if (!verbose) return;
+    if (!this->verbose) return;
 
     char text[255];
     sprintf(text, "[%i] Current objective function: %g",
-            (int)optimiser->GetCurrentIterationNumber(),
-            optimiser->GetBestObjFunctionValue());
-    sprintf(text + strlen(text), " = (wSIM)%g", bestWMeasure);
+            (int)this->optimiser->GetCurrentIterationNumber(),
+            this->optimiser->GetBestObjFunctionValue());
+    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
     if (bendingEnergyWeight > 0)
         sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE);
     if (linearEnergyWeight > 0)
         sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE);
     if (jacobianLogWeight > 0)
         sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac);
-    if (landmarkRegWeight > 0)
-        sprintf(text + strlen(text), " - (wLAN)%.2e", bestWLand);
+    if (this->landmarkRegWeight > 0)
+        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
     sprintf(text + strlen(text), " [+ %g mm]", currentSize);
-    reg_print_info(executableName, text);
+    reg_print_info(this->executableName, text);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::PrintCurrentObjFunctionValue");
 #endif
@@ -882,13 +886,13 @@ void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetObjectiveFunctionGradient() {
-    if (!useApproxGradient) {
+    if (!this->useApproxGradient) {
         // Compute the gradient of the similarity measure
-        if (similarityWeight > 0) {
-            WarpFloatingImage(interpolation);
+        if (this->similarityWeight > 0) {
+            this->WarpFloatingImage(this->interpolation);
             GetSimilarityMeasureGradient();
         } else {
-            dynamic_cast<F3dContent*>(con)->ZeroTransformationGradient();
+            dynamic_cast<F3dContent*>(this->con)->ZeroTransformationGradient();
         }
         // Compute the penalty term gradients if required
         GetBendingEnergyGradient();
@@ -899,7 +903,7 @@ void reg_f3d<T>::GetObjectiveFunctionGradient() {
         GetApproximatedGradient();
     }
 
-    optimiser->IncrementCurrentIterationNumber();
+    this->optimiser->IncrementCurrentIterationNumber();
 
     // Smooth the gradient if require
     SmoothGradient();
@@ -919,5 +923,4 @@ void reg_f3d<T>::CorrectTransformation() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-
 template class reg_f3d<float>;
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index aa6a7771..2dde87f3 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -23,7 +23,7 @@ set(NAME _reg_opencl_kernels)
 add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
   ClCompute.cpp
   ClContextSingleton.cpp
-  CLAladinContent.cpp
+  ClAladinContent.cpp
   ClKernelFactory.cpp
   ClAffineDeformationFieldKernel.cpp
   ClBlockMatchingKernel.cpp
@@ -31,7 +31,6 @@ add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
   ClOptimiseKernel.cpp
   ClResampleImageKernel.cpp
   ../AladinContent.cpp
-  ../Platform.cpp
 )
 target_link_libraries(${NAME} ${OpenCL_LIBRARIES})
 install(TARGETS ${NAME}
@@ -41,14 +40,20 @@ install(TARGETS ${NAME}
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
-install(FILES ClCompute.h ClContextSingleton.h CLAladinContent.h ClKernelFactory.h
-        ClAffineDeformationFieldKernel.h
-        ClBlockMatchingKernel.h
-        ClConvolutionKernel.h
-        ClOptimiseKernel.h
-        ClResampleImageKernel.h
-        DESTINATION include/cl)
-install(FILES resampleKernel.cl affineDeformationKernel.cl blockMatchingKernel.cl DESTINATION include/cl)
+install(FILES
+  ClCompute.h
+  ClContextSingleton.h
+  ClAladinContent.h
+  ClKernelFactory.h
+  ClAffineDeformationFieldKernel.h
+  ClBlockMatchingKernel.h
+  ClConvolutionKernel.h
+  ClOptimiseKernel.h
+  ClResampleImageKernel.h
+  resampleKernel.cl
+  affineDeformationKernel.cl
+  blockMatchingKernel.cl DESTINATION include/cl
+)
 #-----------------------------------------------------------------------------
 set(NAME _reg_openclinfo)
 add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp)
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index e5e12bbb..511b877e 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -4,7 +4,7 @@
 
 /* *************************************************************** */
 ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() {
-    //populate the CLAladinContent object ptr
+    //populate the ClAladinContent object ptr
     ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //path to kernel files
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h
index fb2c408d..ad3a092b 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.h
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "AffineDeformationFieldKernel.h"
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 
 class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 90153818..aee0df8f 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -1,4 +1,4 @@
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #include "_reg_tools.h"
 
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index 9522a465..4dd1bb8d 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -4,7 +4,7 @@
 
 /* *************************************************************** */
 ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() {
-   //populate the CLAladinContent object ptr
+   //populate the ClAladinContent object ptr
    ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
    //path to kernel file
diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h
index 9a01ea61..acecafe3 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.h
+++ b/reg-lib/cl/ClBlockMatchingKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "BlockMatchingKernel.h"
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 
 class ClBlockMatchingKernel: public BlockMatchingKernel {
 public:
diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp
index d38eaad0..a040e0f1 100644
--- a/reg-lib/cl/ClOptimiseKernel.cpp
+++ b/reg-lib/cl/ClOptimiseKernel.cpp
@@ -2,7 +2,7 @@
 
 /* *************************************************************** */
 ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() {
-    //populate the CLAladinContent object ptr
+    //populate the ClAladinContent object ptr
     ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //get necessary cpu ptrs
diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h
index e34f89c6..527a5bee 100644
--- a/reg-lib/cl/ClOptimiseKernel.h
+++ b/reg-lib/cl/ClOptimiseKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "OptimiseKernel.h"
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 
 class ClOptimiseKernel: public OptimiseKernel {
 public:
diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h
index 4bdfde91..c6db7d23 100644
--- a/reg-lib/cl/ClResampleImageKernel.h
+++ b/reg-lib/cl/ClResampleImageKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "ResampleImageKernel.h"
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 
 class ClResampleImageKernel: public ResampleImageKernel {
 public:
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 71d71b10..d584b86a 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -108,7 +108,7 @@ void reg_intensityRescale_core(nifti_image *image,
    DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
    unsigned int voxelNumber = image->nx*image->ny*image->nz;
 
-   // The rescasling is done for each volume independtly
+   // The rescaling is done for each volume independently
    DTYPE *volumePtr = &imagePtr[timePoint*voxelNumber];
    DTYPE currentMin=0;
    DTYPE currentMax=0;
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 452829d2..03c1515a 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -68,7 +68,7 @@ install(TARGETS ${NAME}
     RUNTIME DESTINATION bin
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
-    )
+)
 install(FILES ${NAME}.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
@@ -89,7 +89,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaOptimiseKernel.cpp
     CudaResampleImageKernel.cpp
     ../AladinContent.cpp
-    ../Platform.cpp
     _reg_resampling_gpu.cu
     _reg_blocksize_gpu.cu
     _reg_tools_gpu.cu
@@ -97,13 +96,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
-    )
+)
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
-    )
+)
 install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
 install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
@@ -115,7 +114,7 @@ install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
-    )
+)
 install(FILES ${NAME}.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index b2895d6b..c8b19dea 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -12,7 +12,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #define EPS_SINGLE 0.0001
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index a391831c..baa794d4 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -12,7 +12,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #include <algorithm>
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index 301f8734..44b022b8 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -13,7 +13,7 @@
 #endif
 
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #define EPS 0.000001
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index 1dc80d81..b625175a 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -12,7 +12,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #include <algorithm>
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index be731d9f..6f0a9601 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -9,7 +9,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 #define EPS 0.000001
 #define EPS_SINGLE 0.0001
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index ad04279d..b3ef9a38 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -13,7 +13,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #define EPS_SINGLE 0.0001
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index 715be017..bd144401 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -13,7 +13,7 @@
 #include "CudaAladinContent.h"
 #endif
 #ifdef _USE_OPENCL
-#include "CLAladinContent.h"
+#include "ClAladinContent.h"
 #endif
 
 #define EPS 0.000001

From 177b62b451b32501812c1b123380163aafd194cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 4 Jan 2023 18:01:10 +0000
Subject: [PATCH 027/314] Upgrade C++ standard version to C++17

---
 CMakeLists.txt             | 3 +++
 niftyreg_build_version.txt | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 612ab3a6..0bec2caf 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,9 @@ else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATC
  mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
 endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
 #-----------------------------------------------------------------------------
+# Set C++ standard version
+set(CMAKE_CXX_STANDARD 17)
+#-----------------------------------------------------------------------------
 if(APPLE)
   set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 endif(APPLE)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 405e2afe..c8b255fc 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-134
+135

From 33038b15d398171c976758c750c5045a6b832eed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 5 Jan 2023 14:22:00 +0000
Subject: [PATCH 028/314] Infer datatype for Content::GetWarped()

---
 niftyreg_build_version.txt                    | 2 +-
 reg-lib/Content.h                             | 2 +-
 reg-lib/_reg_aladin.cpp                       | 3 +--
 reg-lib/_reg_f3d.cpp                          | 6 ++----
 reg-lib/cl/ClAladinContent.cpp                | 4 ++--
 reg-lib/cl/ClAladinContent.h                  | 2 +-
 reg-lib/cuda/CudaAladinContent.cpp            | 4 ++--
 reg-lib/cuda/CudaAladinContent.h              | 2 +-
 reg-lib/cuda/CudaContent.cpp                  | 4 ++--
 reg-lib/cuda/CudaContent.h                    | 2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           | 2 +-
 reg-test/reg_test_coherence_interpolation.cpp | 4 ++--
 reg-test/reg_test_interpolation.cpp           | 2 +-
 reg-test/reg_test_leastTrimmedSquares.cpp     | 2 +-
 14 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c8b255fc..065fd3e7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-135
+137
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 506820c7..c27c147c 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -20,7 +20,7 @@ class Content {
     virtual nifti_image* GetDeformationField() { return deformationField; }
     virtual int* GetReferenceMask() { return referenceMask; }
     virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
-    virtual nifti_image* GetWarped(int datatype = 0, int index = 0) { return warped; }
+    virtual nifti_image* GetWarped(int index = 0) { return warped; }
 
     // Setters
     virtual void SetDeformationField(nifti_image *deformationFieldIn) {
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 534d643b..66e4a118 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -563,7 +563,6 @@ void reg_aladin<T>::Run() {
 /* *************************************************************** */
 template<class T>
 nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
-    int floatingType = this->inputFloating->datatype; //t_dev ask before touching this!
     // The initial images are used
     if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) {
         reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
@@ -582,7 +581,7 @@ nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
     reg_aladin<T>::CreateKernels();
 
     reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
-    nifti_image *warped = this->con->GetWarped(floatingType);
+    nifti_image *warped = this->con->GetWarped();
 
     free(mask);
     nifti_image *resultImage = nifti_copy_nim_info(warped);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index a997d308..4d90fe8e 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -797,16 +797,14 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
         reg_exit();
     }
 
-    const int datatype = this->inputFloating->datatype;
-
     InitContent(this->inputReference, this->inputFloating, nullptr);
 
     this->WarpFloatingImage(3); // cubic spline interpolation
 
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = this->con->GetWarped(datatype, 0);
+    warpedImage[0] = this->con->GetWarped(0);
     if (this->inputFloating->nt == 2)
-        warpedImage[1] = this->con->GetWarped(datatype, 1);
+        warpedImage[1] = this->con->GetWarped(1);
 
     this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage
     DeinitContent();
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index aee0df8f..7206c9ee 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -111,8 +111,8 @@ void ClAladinContent::AllocateClPtrs() {
     }
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetWarped(int datatype, int index) {
-    DownloadImage(warped, warpedImageClmem, datatype);
+nifti_image* ClAladinContent::GetWarped(int index) {
+    DownloadImage(warped, warpedImageClmem, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index d7a8646a..97405730 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -41,7 +41,7 @@ class ClAladinContent: public AladinContent {
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
     nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped(int datatype, int index = 0) override;
+    nifti_image* GetWarped(int index = 0) override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 608cf634..e382c950 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -135,8 +135,8 @@ void CudaAladinContent::AllocateCuPtrs() {
     }
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetWarped(int datatype, int index) {
-    DownloadImage(warped, warpedImageArray_d, datatype);
+nifti_image* CudaAladinContent::GetWarped(int index) {
+    DownloadImage(warped, warpedImageArray_d, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index ab27d449..a7679ea8 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -45,7 +45,7 @@ class CudaAladinContent: public AladinContent {
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
     nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped(int datatype, int index = 0) override;
+    nifti_image* GetWarped(int index = 0) override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 20c1b12d..fe758c02 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -143,8 +143,8 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     free(transformationMatrixCptr);
 }
 /* *************************************************************** */
-nifti_image* CudaContent::GetWarped(int datatype, int index) {
-    DownloadImage(warped, warpedCuda[index], datatype);
+nifti_image* CudaContent::GetWarped(int index) {
+    DownloadImage(warped, warpedCuda[index], warped->datatype);
     return warped;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index 8f7161e0..8a632ad2 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -19,7 +19,7 @@ class CudaContent: public virtual Content {
 
     // Getters
     virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped(int datatype = 0, int index = 0) override;
+    virtual nifti_image* GetWarped(int index = 0) override;
     virtual cudaArray** GetReferenceCuda() { return referenceCuda; }
     virtual cudaArray** GetFloatingCuda() { return floatingCuda; }
     virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index e4d68d9c..520dc7f7 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -45,7 +45,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     //Bind deformationField to texture
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
 
-    //Bind deformationField to texture
+    //Bind mask to texture
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
 
     // Bind the real to voxel matrix to texture
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 6f0a9601..75cd9c23 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_cpu;
     delete platform_cpu;
-    cpu_warped = con_cpu->GetWarped(referenceImage->datatype);
+    cpu_warped = con_cpu->GetWarped();
 
     // GPU platform
     AladinContent *con_gpu = nullptr;
@@ -118,7 +118,7 @@ int main(int argc, char **argv)
                                                                       std::numeric_limits<float>::quiet_NaN());
     delete resampleImageKernel_gpu;
     delete platform_gpu;
-    gpu_warped = con_gpu->GetWarped(referenceImage->datatype);
+    gpu_warped = con_gpu->GetWarped();
 
     //Check if the platform used is double capable
     double proper_eps = EPS;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index b3ef9a38..69998293 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -153,7 +153,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 std::list<int> interp = {0, 1, 3};
                 for (auto it : interp) {
                     resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
-                    warped = con->GetWarped(reference->datatype);
+                    warped = con->GetWarped();
 
                     // Check all values
                     auto *warpedPtr = static_cast<float*>(warped->data);
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index bd144401..b98e39de 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -97,7 +97,7 @@ int main(int argc, char **argv)
    ////////////////////////
    float max_difference = 0;
    unsigned int num_points = m1;
-   //I think it is a bit durty... what I am going to do
+   //I think it is a bit dirty what I am going to do
    _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam();
 
    blockMatchingParams->blockNumber[0] = 1;

From e8963e52dd1da78416b59a1f4e7f58162d3eeefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Jan 2023 13:07:25 +0000
Subject: [PATCH 029/314] Remove unnecessary double pointers

---
 niftyreg_build_version.txt         |    2 +-
 reg-apps/reg_benchmark.cpp         |   32 +-
 reg-lib/cuda/CudaAladinContent.cpp |   86 +-
 reg-lib/cuda/CudaContent.cpp       |   36 +-
 reg-lib/cuda/CudaF3dContent.cpp    |   28 +-
 reg-lib/cuda/_reg_blocksize_gpu.h  |    4 +-
 reg-lib/cuda/_reg_common_cuda.cu   | 1164 ++++++++++++----------------
 reg-lib/cuda/_reg_common_cuda.h    |   78 +-
 reg-lib/cuda/_reg_measure_gpu.h    |   36 +-
 reg-lib/cuda/_reg_nmi_gpu.cu       |   57 +-
 reg-lib/cuda/_reg_nmi_gpu.h        |   26 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu |   31 +-
 reg-lib/cuda/_reg_ssd_gpu.cu       |   24 +-
 reg-lib/cuda/_reg_ssd_gpu.h        |   12 +-
 reg-test/reg_test_svd_cuda.cpp     |    2 +-
 15 files changed, 721 insertions(+), 897 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 065fd3e7..93e78032 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-137
+138
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index e217775f..ec09cc3c 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -187,9 +187,9 @@ int main(int argc, char **argv)
    if(runGPU)
    {
       if(cudaCommon_allocateArrayToDevice<float>(&targetImageArray_d, targetImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>(&targetImageArray_d, targetImage)) return 1;
+      if(cudaCommon_transferNiftiToArrayOnDevice<float>(targetImageArray_d, targetImage)) return 1;
       if(cudaCommon_allocateArrayToDevice<float>(&sourceImageArray_d, sourceImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>(&sourceImageArray_d,sourceImage)) return 1;
+      if(cudaCommon_transferNiftiToArrayOnDevice<float>(sourceImageArray_d,sourceImage)) return 1;
       CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int)));
       CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice));
       CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4)));
@@ -278,7 +278,7 @@ int main(int argc, char **argv)
    if(runGPU)
    {
       if(cudaCommon_allocateArrayToDevice<float4>(&controlPointImageArray_d, controlPointImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(&controlPointImageArray_d,controlPointImage)) return 1;
+      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(controlPointImageArray_d,controlPointImage)) return 1;
    }
 #endif
    {
@@ -331,7 +331,7 @@ int main(int argc, char **argv)
    if(runGPU)
    {
       if(cudaCommon_allocateArrayToDevice<float4>(&velocityFieldImageArray_d, velocityFieldImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(&velocityFieldImageArray_d,velocityFieldImage)) return 1;
+      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(velocityFieldImageArray_d,velocityFieldImage)) return 1;
    }
 #endif
    {
@@ -472,7 +472,7 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Spatial gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Spatial gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free( &sourceImageArray_d );
+         cudaCommon_free(sourceImageArray_d);
       }
 #endif
       printf("Spatial gradient done\n\n");
@@ -482,7 +482,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free( (void **)&deformationFieldImageArray_d );
+      cudaCommon_free(deformationFieldImageArray_d);
    }
 #endif
 
@@ -566,7 +566,7 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Voxel-based NMI gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Voxel-based NMI gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free((void **)&logJointHistogram_d);
+         cudaCommon_free(logJointHistogram_d);
       }
       CUDA_SAFE_CALL(cudaFree(targetMask_d));
 #endif
@@ -576,7 +576,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free((void **)&resultGradientArray_d);
+      cudaCommon_free(resultGradientArray_d);
    }
 #endif
 
@@ -638,8 +638,8 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free((void **)&voxelNMIGradientArray_d);
-      cudaCommon_free((void **)&nodeNMIGradientArray_d);
+      cudaCommon_free(voxelNMIGradientArray_d);
+      cudaCommon_free(nodeNMIGradientArray_d);
    }
 #endif
 
@@ -796,7 +796,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free( (void **)&controlPointImageArray_d );
+      cudaCommon_free(controlPointImageArray_d );
    }
 #endif
 
@@ -862,9 +862,9 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Block-Matching ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Block-Matching ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free((void **)&targetPosition_d);
-         cudaCommon_free((void **)&resultPosition_d);
-         cudaCommon_free((void **)&activeBlock_d);
+         cudaCommon_free(targetPosition_d);
+         cudaCommon_free(resultPosition_d);
+         cudaCommon_free(activeBlock_d);
       }
 #endif
       printf("Block-matching done\n");
@@ -887,8 +887,8 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free( (void **)&targetImageArray_d );
-      cudaCommon_free( (void **)&resultImageArray_d );
+      cudaCommon_free(targetImageArray_d);
+      cudaCommon_free(resultImageArray_d);
    }
 #endif
 
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index e382c950..c25004ea 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -66,32 +66,32 @@ void CudaAladinContent::AllocateCuPtrs() {
     }
     if (referenceMask != nullptr) {
         cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, referenceMask, reference->nvox);
+        cudaCommon_transferFromDeviceToNiftiSimple1<int>(mask_d, referenceMask, reference->nvox);
     }
     if (reference != nullptr) {
         cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, reference->nvox);
         cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&referenceImageArray_d, reference);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(referenceImageArray_d, reference);
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
         mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
-        cudaCommon_transferFromDeviceToNiftiSimple1<float>(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
+        cudaCommon_transferFromDeviceToNiftiSimple1<float>(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
     if (warped != nullptr) {
         cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, warped);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(warpedImageArray_d, warped);
     }
     if (deformationField != nullptr) {
         cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, deformationField);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(deformationFieldArray_d, deformationField);
     }
     if (floating != nullptr) {
         cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, floating->nvox);
         cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(&floatingImageArray_d, floating);
+        cudaCommon_transferFromDeviceToNiftiSimple<float>(floatingImageArray_d, floating);
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
         mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
@@ -102,15 +102,15 @@ void CudaAladinContent::AllocateCuPtrs() {
     if (blockMatchingParams != nullptr) {
         if (blockMatchingParams->referencePosition != nullptr) {
             cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         if (blockMatchingParams->warpedPosition != nullptr) {
             cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            cudaCommon_transferArrayFromCpuToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         if (blockMatchingParams->totalBlock != nullptr) {
             cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-            cudaCommon_transferFromDeviceToNiftiSimple1<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+            cudaCommon_transferFromDeviceToNiftiSimple1<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
         /* // Removed until CUDA SVD is added back
         if (blockMatchingParams->activeBlockNumber > 0 ) {
@@ -141,19 +141,19 @@ nifti_image* CudaAladinContent::GetWarped(int index) {
 }
 /* *************************************************************** */
 nifti_image* CudaAladinContent::GetDeformationField() {
-    cudaCommon_transferFromDeviceToCpu<float>((float*)deformationField->data, &deformationFieldArray_d, deformationField->nvox);
+    cudaCommon_transferFromDeviceToCpu<float>((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox);
     return deformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
-    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->warpedPosition, &warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->referencePosition, &referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     return blockMatchingParams;
 }
 /* *************************************************************** */
 void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     if (transformationMatrix != nullptr)
-        cudaCommon_free(&transformationMatrix_d);
+        cudaCommon_free(transformationMatrix_d);
 
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
     float *tmpMat_h = (float*)malloc(sizeof(mat44));
@@ -166,50 +166,50 @@ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
 /* *************************************************************** */
 void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
     if (deformationField != nullptr)
-        cudaCommon_free(&deformationFieldArray_d);
+        cudaCommon_free(deformationFieldArray_d);
     AladinContent::SetDeformationField(deformationFieldIn);
 
     cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&deformationFieldArray_d, deformationField);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(deformationFieldArray_d, deformationField);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
     if (referenceMask != nullptr)
-        cudaCommon_free(&mask_d);
+        cudaCommon_free(mask_d);
     AladinContent::SetReferenceMask(referenceMaskIn);
     cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple1<int>(&mask_d, referenceMaskIn, reference->nvox);
+    cudaCommon_transferFromDeviceToNiftiSimple1<int>(mask_d, referenceMaskIn, reference->nvox);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetWarped(nifti_image *warped) {
     if (warped != nullptr)
-        cudaCommon_free(&warpedImageArray_d);
+        cudaCommon_free(warpedImageArray_d);
     AladinContent::SetWarped(warped);
     reg_tools_changeDatatype<float>(warped);
 
     cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(&warpedImageArray_d, warped);
+    cudaCommon_transferFromDeviceToNiftiSimple<float>(warpedImageArray_d, warped);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
     if (blockMatchingParams->referencePosition != nullptr) {
-        cudaCommon_free(&referencePosition_d);
+        cudaCommon_free(referencePosition_d);
         //referencePosition
         cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->warpedPosition != nullptr) {
-        cudaCommon_free(&warpedPosition_d);
+        cudaCommon_free(warpedPosition_d);
         //warpedPosition
         cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        cudaCommon_transferArrayFromCpuToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->totalBlock != nullptr) {
-        cudaCommon_free(&totalBlock_d);
+        cudaCommon_free(totalBlock_d);
         //activeBlock
         cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-        cudaCommon_transferArrayFromCpuToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+        cudaCommon_transferArrayFromCpuToDevice<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
     }
     /* // Removed until CUDA SVD is added back
      if (blockMatchingParams->activeBlockNumber > 0) {
@@ -265,7 +265,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i
     size_t size = image->nvox;
     float *buffer = (float*)malloc(size * sizeof(float));
 
-    cudaCommon_transferFromDeviceToCpu<float>(buffer, &memoryObject, size);
+    cudaCommon_transferFromDeviceToCpu<float>(buffer, memoryObject, size);
 
     free(image->data);
     image->datatype = type;
@@ -405,38 +405,38 @@ int* CudaAladinContent::GetFloatingDims() {
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
     if (transformationMatrix != nullptr)
-        cudaCommon_free(&transformationMatrix_d);
+        cudaCommon_free(transformationMatrix_d);
 
     if (reference != nullptr) {
-        cudaCommon_free(&referenceImageArray_d);
-        cudaCommon_free(&referenceMat_d);
+        cudaCommon_free(referenceImageArray_d);
+        cudaCommon_free(referenceMat_d);
     }
 
     if (floating != nullptr) {
-        cudaCommon_free(&floatingImageArray_d);
-        cudaCommon_free(&floIJKMat_d);
+        cudaCommon_free(floatingImageArray_d);
+        cudaCommon_free(floIJKMat_d);
     }
 
     if (warped != nullptr)
-        cudaCommon_free(&warpedImageArray_d);
+        cudaCommon_free(warpedImageArray_d);
 
     if (deformationField != nullptr)
-        cudaCommon_free(&deformationFieldArray_d);
+        cudaCommon_free(deformationFieldArray_d);
 
     if (referenceMask != nullptr)
-        cudaCommon_free(&mask_d);
+        cudaCommon_free(mask_d);
 
     if (blockMatchingParams != nullptr) {
-        cudaCommon_free(&totalBlock_d);
-        cudaCommon_free(&referencePosition_d);
-        cudaCommon_free(&warpedPosition_d);
+        cudaCommon_free(totalBlock_d);
+        cudaCommon_free(referencePosition_d);
+        cudaCommon_free(warpedPosition_d);
         /*
-        cudaCommon_free(&AR_d);
-        cudaCommon_free(&U_d);
-        cudaCommon_free(&VT_d);
-        cudaCommon_free(&Sigma_d);
-        cudaCommon_free(&lengths_d);
-        cudaCommon_free(&newWarpedPos_d);
+        cudaCommon_free(AR_d);
+        cudaCommon_free(U_d);
+        cudaCommon_free(VT_d);
+        cudaCommon_free(Sigma_d);
+        cudaCommon_free(lengths_d);
+        cudaCommon_free(newWarpedPos_d);
         */
     }
 }
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index fe758c02..08b56279 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -29,32 +29,32 @@ void CudaContent::AllocateImages() {
         reg_tools_changeDatatype<float>(floating);
     if (reference->nt == 1) {
         cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], reference->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(&referenceCuda[0], reference);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda[0], reference);
         cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], floating->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(&floatingCuda[0], floating);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda[0], floating);
     } else if (reference->nt == 2) {
         cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], &referenceCuda[1], reference->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(&referenceCuda[0], &referenceCuda[1], reference);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda[0], referenceCuda[1], reference);
         cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], &floatingCuda[1], floating->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(&floatingCuda[0], &floatingCuda[1], floating);
+        cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda[0], floatingCuda[1], floating);
     }
 }
 /* *************************************************************** */
 void CudaContent::DeallocateImages() {
     if (referenceCuda[0]) {
-        cudaCommon_free(&referenceCuda[0]);
+        cudaCommon_free(referenceCuda[0]);
         referenceCuda[0] = nullptr;
     }
     if (referenceCuda[1]) {
-        cudaCommon_free(&referenceCuda[1]);
+        cudaCommon_free(referenceCuda[1]);
         referenceCuda[1] = nullptr;
     }
     if (floatingCuda[0]) {
-        cudaCommon_free(&floatingCuda[0]);
+        cudaCommon_free(floatingCuda[0]);
         floatingCuda[0] = nullptr;
     }
     if (floatingCuda[1]) {
-        cudaCommon_free(&floatingCuda[1]);
+        cudaCommon_free(floatingCuda[1]);
         floatingCuda[1] = nullptr;
     }
 }
@@ -65,7 +65,7 @@ void CudaContent::AllocateDeformationField() {
 /* *************************************************************** */
 void CudaContent::DeallocateDeformationField() {
     if (deformationFieldCuda) {
-        cudaCommon_free(&deformationFieldCuda);
+        cudaCommon_free(deformationFieldCuda);
         deformationFieldCuda = nullptr;
     }
 }
@@ -84,11 +84,11 @@ void CudaContent::AllocateWarped() {
 /* *************************************************************** */
 void CudaContent::DeallocateWarped() {
     if (warpedCuda[0]) {
-        cudaCommon_free(&warpedCuda[0]);
+        cudaCommon_free(warpedCuda[0]);
         warpedCuda[0] = nullptr;
     }
     if (warpedCuda[1]) {
-        cudaCommon_free(&warpedCuda[1]);
+        cudaCommon_free(warpedCuda[1]);
         warpedCuda[1] = nullptr;
     }
 }
@@ -98,7 +98,7 @@ bool CudaContent::IsCurrentComputationDoubleCapable() {
 }
 /* *************************************************************** */
 nifti_image* CudaContent::GetDeformationField() {
-    cudaCommon_transferFromDeviceToNifti(deformationField, &deformationFieldCuda);
+    cudaCommon_transferFromDeviceToNifti(deformationField, deformationFieldCuda);
     return deformationField;
 }
 /* *************************************************************** */
@@ -108,14 +108,14 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) {
     if (!deformationField) return;
 
     AllocateDeformationField();
-    cudaCommon_transferNiftiToArrayOnDevice(&deformationFieldCuda, deformationField);
+    cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField);
 }
 /* *************************************************************** */
 void CudaContent::SetReferenceMask(int *referenceMaskIn) {
     Content::SetReferenceMask(referenceMaskIn);
 
     if (referenceMaskCuda) {
-        cudaCommon_free(&referenceMaskCuda);
+        cudaCommon_free(referenceMaskCuda);
         referenceMaskCuda = nullptr;
     }
 
@@ -130,7 +130,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     Content::SetTransformationMatrix(transformationMatrixIn);
 
     if (transformationMatrixCuda) {
-        cudaCommon_free(&transformationMatrixCuda);
+        cudaCommon_free(transformationMatrixCuda);
         transformationMatrixCuda = nullptr;
     }
 
@@ -155,9 +155,9 @@ void CudaContent::SetWarped(nifti_image *warpedIn) {
 
     reg_tools_changeDatatype<float>(warped);
     AllocateWarped();
-    cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[0], warped);
+    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[0], warped);
     if (warpedCuda[1])
-        cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[1], warped);
+        cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[1], warped);
 }
 /* *************************************************************** */
 template<class DataType>
@@ -192,7 +192,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat
     size_t size = image->nvox;
     float *buffer = (float*)malloc(size * sizeof(float));
 
-    cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size);
+    cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size);
 
     free(image->data);
     image->datatype = datatype;
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index 499a670d..afb0f34e 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -33,11 +33,11 @@ void CudaF3dContent::AllocateWarpedGradient() {
 /* *************************************************************** */
 void CudaF3dContent::DeallocateWarpedGradient() {
     if (warpedGradientCuda[0] != nullptr) {
-        cudaCommon_free(&warpedGradientCuda[0]);
+        cudaCommon_free(warpedGradientCuda[0]);
         warpedGradientCuda[0] = nullptr;
     }
     if (warpedGradientCuda[1] != nullptr) {
-        cudaCommon_free(&warpedGradientCuda[1]);
+        cudaCommon_free(warpedGradientCuda[1]);
         warpedGradientCuda[1] = nullptr;
     }
 }
@@ -48,7 +48,7 @@ void CudaF3dContent::AllocateTransformationGradient() {
 /* *************************************************************** */
 void CudaF3dContent::DeallocateTransformationGradient() {
     if (transformationGradientCuda) {
-        cudaCommon_free(&transformationGradientCuda);
+        cudaCommon_free(transformationGradientCuda);
         transformationGradientCuda = nullptr;
     }
 }
@@ -59,13 +59,13 @@ void CudaF3dContent::AllocateVoxelBasedMeasureGradient() {
 /* *************************************************************** */
 void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() {
     if (voxelBasedMeasureGradientCuda) {
-        cudaCommon_free(&voxelBasedMeasureGradientCuda);
+        cudaCommon_free(voxelBasedMeasureGradientCuda);
         voxelBasedMeasureGradientCuda = nullptr;
     }
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetControlPointGrid() {
-    cudaCommon_transferFromDeviceToNifti(controlPointGrid, &controlPointGridCuda);
+    cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda);
     return controlPointGrid;
 }
 /* *************************************************************** */
@@ -73,18 +73,18 @@ void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) {
     F3dContent::SetControlPointGrid(controlPointGridIn);
 
     if (controlPointGridCuda) {
-        cudaCommon_free(&controlPointGridCuda);
+        cudaCommon_free(controlPointGridCuda);
         controlPointGridCuda = nullptr;
     }
 
     if (!controlPointGrid) return;
 
     cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim);
-    cudaCommon_transferNiftiToArrayOnDevice(&controlPointGridCuda, controlPointGrid);
+    cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid);
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetTransformationGradient() {
-    cudaCommon_transferFromDeviceToNifti(transformationGradient, &transformationGradientCuda);
+    cudaCommon_transferFromDeviceToNifti(transformationGradient, transformationGradientCuda);
     return transformationGradient;
 }
 /* *************************************************************** */
@@ -94,11 +94,11 @@ void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradie
     if (!transformationGradient) return;
 
     AllocateTransformationGradient();
-    cudaCommon_transferNiftiToArrayOnDevice(&transformationGradientCuda, transformationGradient);
+    cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient);
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() {
-    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, &voxelBasedMeasureGradientCuda);
+    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
     return voxelBasedMeasureGradient;
 }
 /* *************************************************************** */
@@ -108,11 +108,11 @@ void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasure
     if (!voxelBasedMeasureGradient) return;
 
     AllocateVoxelBasedMeasureGradient();
-    cudaCommon_transferNiftiToArrayOnDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
+    cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetWarpedGradient() {
-    cudaCommon_transferFromDeviceToNifti(warpedGradient, &warpedGradientCuda[0]);
+    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda[0]);
     return warpedGradient;
 }
 /* *************************************************************** */
@@ -122,9 +122,9 @@ void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) {
     if (!warpedGradient) return;
 
     AllocateWarpedGradient();
-    cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[0], warpedGradient);
+    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient);
     if (warpedGradientCuda[1])
-        cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[1], warpedGradient);
+        cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient);
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 019a3e58..06ee1359 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -13,8 +13,8 @@
 #include "cuda_runtime.h"
 #include "cuda.h"
 
- /* ******************************** */
- /* ******************************** */
+/* ******************************** */
+/* ******************************** */
 #ifndef __VECTOR_TYPES_H__
 #define __VECTOR_TYPES_H__
 struct __attribute__((aligned(4))) float4 {
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 2c7c294f..a1fcfa7b 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -14,753 +14,611 @@
 #include "_reg_blocksize_gpu.h"
 
  /* ******************************** */
- /* ******************************** */
-int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) {
-	// The CUDA card is setup
-	cuInit(0);
-	struct cudaDeviceProp deviceProp;
-	int device_count = 0;
-	cudaGetDeviceCount(&device_count);
-	if (verbose)
-		printf("[NiftyReg CUDA] %i card(s) detected\n", device_count);
-	// following code is from cutGetMaxGflopsDeviceId()
-	int max_gflops_device = 0;
-	int max_gflops = 0;
-	int current_device = 0;
-	while (current_device < device_count) {
-		cudaGetDeviceProperties(&deviceProp, current_device);
-		int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
-		if (gflops > max_gflops) {
-			max_gflops = gflops;
-			max_gflops_device = current_device;
-		}
-		++current_device;
-	}
-	NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
-	NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device))
-	NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
-
-	if (deviceProp.major < 1) {
-		fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
-		return EXIT_FAILURE;
-	} else {
-		size_t free = 0;
-		size_t total = 0;
-		cuMemGetInfo(&free, &total);
-		if (deviceProp.totalGlobalMem != total) {
-			fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n",
-					deviceProp.name);
-			fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %lu Mb - Recovered total memory: %lu Mb\n",
-					deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024));
-			return EXIT_FAILURE;
-		}
-		if (verbose) {
-			printf("[NiftyReg CUDA] The following device is used: %s\n",
-				   deviceProp.name);
-			printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-				   (unsigned long int)(free / (1024 * 1024)),
-				   (unsigned long int)(total / (1024 * 1024)));
-			printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
-				   deviceProp.major,
-				   deviceProp.minor);
-			printf("[NiftyReg CUDA] Shared memory size in bytes: %lu\n",
-				   deviceProp.sharedMemPerBlock);
-			printf("[NiftyReg CUDA] CUDA version %i\n",
-				   CUDART_VERSION);
-			printf("[NiftyReg CUDA] Card clock rate: %i MHz\n",
-				   deviceProp.clockRate / 1000);
-			printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
-				   deviceProp.multiProcessorCount);
-		}
-		NiftyReg_CudaBlock::GetInstance(deviceProp.major);
-	}
-	return EXIT_SUCCESS;
-}
-/* ******************************** */
-void cudaCommon_unsetCUDACard(CUcontext *ctx) {
-	//    cuCtxDetach(*ctx);
-	cuCtxDestroy(*ctx);
-}
-/* ******************************** */
-/* ******************************** */
 template <class NIFTI_TYPE>
-int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img) {
-
-	const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE);
+int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) {
+    const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE);
 
-	int *g_dim;
-	float* g_pixdim;
-	NIFTI_TYPE* g_data;
+    int *g_dim;
+    float* g_pixdim;
+    NIFTI_TYPE* g_data;
 
-	NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize));
 
-	NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE *>( img->data );
-	NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d ), img, sizeof(nifti_image), cudaMemcpyHostToDevice));
+    NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice));
 
-	NR_CUDA_SAFE_CALL(cudaMemcpy((*image_d)->data, array_h, memSize, cudaMemcpyHostToDevice));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d )->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d )->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice));
 
-	return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToNiftiOnDevice1<float>(nifti_image **image_d, nifti_image *img);
-template int cudaCommon_transferNiftiToNiftiOnDevice1<double>(nifti_image **image_d, nifti_image *img);
-/* ******************************** */
+template int cudaCommon_transferNiftiToNiftiOnDevice1<float>(nifti_image*, nifti_image*);
+template int cudaCommon_transferNiftiToNiftiOnDevice1<double>(nifti_image*, nifti_image*);
 /* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE **array_d, nifti_image *img)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else{
-		const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
-		NIFTI_TYPE *array_h=static_cast<NIFTI_TYPE *>(img->data);
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, memSize, cudaMemcpyHostToDevice));
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
+    }
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **array_d, nifti_image *img)
-{
-	if( sizeof(DTYPE)==sizeof(float4) ){
-		if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1)){
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The specified image is not a single precision deformation field image");
-			return EXIT_FAILURE;
-		}
-		float *niftiImgValues = static_cast<float *>(img->data);
-		float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
-		const int voxelNumber = img->nx*img->ny*img->nz;
-		for(int i=0; i<voxelNumber; i++)
-			array_h[i].x= *niftiImgValues++;
-		if(img->dim[5]>=2){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].y= *niftiImgValues++;
-		}
-		if(img->dim[5]>=3){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].z= *niftiImgValues++;
-		}
-		if(img->dim[5]>=4){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].w= *niftiImgValues++;
-		}
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, img->nx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice));
-		free(array_h);
-	}
-	else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE,float>(array_d, img);
-		default:
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The specified image is not a single precision deformation field image");
+            return EXIT_FAILURE;
+        }
+        float *niftiImgValues = static_cast<float*>(img->data);
+        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        const int voxelNumber = img->nx * img->ny * img->nz;
+        for (int i = 0; i < voxelNumber; i++)
+            array_h[i].x = *niftiImgValues++;
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].y = *niftiImgValues++;
+        }
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].z = *niftiImgValues++;
+        }
+        if (img->dim[5] >= 4) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].w = *niftiImgValues++;
+        }
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
+        free(array_h);
+    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(array_d, img);
+        default:
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(int **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4 **, nifti_image *);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, nifti_image*);
 /* ******************************** */
-
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE **array_d, DTYPE **array2_d, nifti_image *img)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else{
-		const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
-		NIFTI_TYPE *array_h=static_cast<NIFTI_TYPE *>(img->data);
-		NIFTI_TYPE *array2_h=&array_h[img->dim[1] * img->dim[2] * img->dim[3]];
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, memSize, cudaMemcpyHostToDevice));
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array2_d, array2_h, memSize, cudaMemcpyHostToDevice));
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice));
+    }
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **array_d, DTYPE **array2_d, nifti_image *img)
-{
-	if(sizeof(DTYPE)==sizeof(float4) ){
-		if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1)){
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The specified image is not a single precision deformation field image");
-			return EXIT_FAILURE;
-		}
-		float *niftiImgValues = static_cast<float *>(img->data);
-		float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
-		float4 *array2_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
-		const int voxelNumber = img->nx*img->ny*img->nz;
-		for(int i=0; i<voxelNumber; i++)
-			array_h[i].x= *niftiImgValues++;
-		for(int i=0; i<voxelNumber; i++)
-			array2_h[i].x= *niftiImgValues++;
-		if(img->dim[5]>=2){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].y= *niftiImgValues++;
-			for(int i=0; i<voxelNumber; i++)
-				array2_h[i].y= *niftiImgValues++;
-		}
-		if(img->dim[5]>=3){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].z= *niftiImgValues++;
-			for(int i=0; i<voxelNumber; i++)
-				array2_h[i].z= *niftiImgValues++;
-		}
-		if(img->dim[5]>=4){
-			for(int i=0; i<voxelNumber; i++)
-				array_h[i].w= *niftiImgValues++;
-			for(int i=0; i<voxelNumber; i++)
-				array2_h[i].w= *niftiImgValues++;
-		}
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, img->nx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice));
-		NR_CUDA_SAFE_CALL(cudaMemcpy(*array2_d, array2_h, img->nx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice));
-		free(array_h);
-		free(array2_h);
-	}
-	else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE,float>(array_d, array2_d, img);
-		default:
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The specified image is not a single precision deformation field image");
+            return EXIT_FAILURE;
+        }
+        float *niftiImgValues = static_cast<float *>(img->data);
+        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        const int voxelNumber = img->nx * img->ny * img->nz;
+        for (int i = 0; i < voxelNumber; i++)
+            array_h[i].x = *niftiImgValues++;
+        for (int i = 0; i < voxelNumber; i++)
+            array2_h[i].x = *niftiImgValues++;
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].y = *niftiImgValues++;
+            for (int i = 0; i < voxelNumber; i++)
+                array2_h[i].y = *niftiImgValues++;
+        }
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].z = *niftiImgValues++;
+            for (int i = 0; i < voxelNumber; i++)
+                array2_h[i].z = *niftiImgValues++;
+        }
+        if (img->dim[5] >= 4) {
+            for (int i = 0; i < voxelNumber; i++)
+                array_h[i].w = *niftiImgValues++;
+            for (int i = 0; i < voxelNumber; i++)
+                array2_h[i].w = *niftiImgValues++;
+        }
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
+        free(array_h);
+        free(array2_h);
+    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(array_d, array2_d, img);
+        default:
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float **,float **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double **,double **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4 **,float4 **, nifti_image *); // for deformation field
-/* ******************************** */
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, nifti_image*); // for deformation field
 /* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray **cuArray_d, nifti_image *img)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else{
-		NIFTI_TYPE *array_h=static_cast<NIFTI_TYPE *>(img->data);
+int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
 
-		cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-		copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray_d;
-		copyParams.kind = cudaMemcpyHostToDevice;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-	}
-	return EXIT_SUCCESS;
+        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
+        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray_d;
+        copyParams.kind = cudaMemcpyHostToDevice;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+    }
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **cuArray_d, nifti_image *img)
-{
-	if( sizeof(DTYPE)==sizeof(float4) ){
-		if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1) ){
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The specified image is not a single precision deformation field image");
-			return EXIT_FAILURE;
-		}
-		float *niftiImgValues = static_cast<float *>(img->data);
-		float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The specified image is not a single precision deformation field image");
+            return EXIT_FAILURE;
+        }
+        float *niftiImgValues = static_cast<float *>(img->data);
+        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
 
-		for(int i=0; i<img->nx*img->ny*img->nz; i++)
-			array_h[i].x= *niftiImgValues++;
-		if(img->dim[5]>=2)
-		{
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].y= *niftiImgValues++;
-		}
-		if(img->dim[5]>=3)
-		{
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].z= *niftiImgValues++;
-		}
-		if(img->dim[5]==3)
-		{
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].w= *niftiImgValues++;
-		}
-		cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-		copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray_d;
-		copyParams.kind = cudaMemcpyHostToDevice;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams))
-		free(array_h);
-	}
-	else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE,float>(cuArray_d, img);
-		default:
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
-	return EXIT_SUCCESS;
+        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            array_h[i].x = *niftiImgValues++;
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].y = *niftiImgValues++;
+        }
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].z = *niftiImgValues++;
+        }
+        if (img->dim[5] == 3) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].w = *niftiImgValues++;
+        }
+        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
+        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray_d;
+        copyParams.kind = cudaMemcpyHostToDevice;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        free(array_h);
+    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(cuArray_d, img);
+        default:
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray **, nifti_image *); // for deformation field
-/* ******************************** */
+template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, nifti_image*); // for deformation field
 /* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray **cuArray_d, cudaArray **cuArray2_d, nifti_image *img)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else{
-		NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE *>(img->data);
-		NIFTI_TYPE *array2_h = &array_h[img->dim[1]*img->dim[2]*img->dim[3]];
+int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
 
-		cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-		copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-		copyParams.kind = cudaMemcpyHostToDevice;
-		// First timepoint
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray_d;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-		// Second timepoint
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array2_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray2_d;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-	}
-	return EXIT_SUCCESS;
+        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
+        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        copyParams.kind = cudaMemcpyHostToDevice;
+        // First timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray_d;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        // Second timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray2_d;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+    }
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, nifti_image *img)
-{
-	if( sizeof(DTYPE)==sizeof(float4) ){
-		if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1) )
-		{
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-			reg_print_msg_error("The specified image is not a single precision deformation field image");
-			return EXIT_FAILURE;
-		}
-		float *niftiImgValues = static_cast<float *>(img->data);
-		float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
-		float4 *array2_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4));
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+            reg_print_msg_error("The specified image is not a single precision deformation field image");
+            return EXIT_FAILURE;
+        }
+        float *niftiImgValues = static_cast<float*>(img->data);
+        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
 
-		for(int i=0; i<img->nx*img->ny*img->nz; i++)
-			array_h[i].x= *niftiImgValues++;
-		for(int i=0; i<img->nx*img->ny*img->nz; i++)
-			array2_h[i].x= *niftiImgValues++;
+        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            array_h[i].x = *niftiImgValues++;
+        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            array2_h[i].x = *niftiImgValues++;
 
-		if(img->dim[5]>=2){
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].y= *niftiImgValues++;
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array2_h[i].y= *niftiImgValues++;
-		}
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].y = *niftiImgValues++;
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array2_h[i].y = *niftiImgValues++;
+        }
 
-		if(img->dim[5]>=3){
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].z= *niftiImgValues++;
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array2_h[i].z= *niftiImgValues++;
-		}
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].z = *niftiImgValues++;
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array2_h[i].z = *niftiImgValues++;
+        }
 
-		if(img->dim[5]==3){
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array_h[i].w= *niftiImgValues++;
-			for(int i=0; i<img->nx*img->ny*img->nz; i++)
-				array2_h[i].w= *niftiImgValues++;
-		}
+        if (img->dim[5] == 3) {
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array_h[i].w = *niftiImgValues++;
+            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+                array2_h[i].w = *niftiImgValues++;
+        }
 
-		cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-		copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-		copyParams.kind = cudaMemcpyHostToDevice;
-		// First timepoint
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray_d;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-		free(array_h);
-		// Second timepoint
-		copyParams.srcPtr = make_cudaPitchedPtr((void *) array2_h,
-												copyParams.extent.width*sizeof(DTYPE),
-												copyParams.extent.width,
-												copyParams.extent.height);
-		copyParams.dstArray = *cuArray2_d;
-		NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-		free(array2_h);
-	}
-	else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE,float>(cuArray_d, cuArray2_d, img);
-		default:
-			reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
-	return EXIT_SUCCESS;
+        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
+        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        copyParams.kind = cudaMemcpyHostToDevice;
+        // First timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray_d;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        free(array_h);
+        // Second timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
+                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = cuArray2_d;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        free(array2_h);
+    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(cuArray_d, cuArray2_d, img);
+        default:
+            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray **, cudaArray **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray **, cudaArray **, nifti_image *);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray **, cudaArray **, nifti_image *); // for deformation field
-/* ******************************** */
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArray*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, nifti_image*); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim)
-{
-	const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
-	cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
-	NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
-	return EXIT_SUCCESS;
-}template int cudaCommon_allocateArrayToDevice<float>(cudaArray **, int *);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray **, int *);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray **, int *); // for deformation field
-/* ******************************** */
+int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
+    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
+    return EXIT_SUCCESS;
+}
+template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, int*);
+template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, int*);
+template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, int*); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim)
-{
-	const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
-	cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
-	NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
-	NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize));
-	return EXIT_SUCCESS;
+int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
+    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(cudaArray **,cudaArray **, int *);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray **,cudaArray **, int *);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray **,cudaArray **, int *); // for deformation field
-/* ******************************** */
+template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, int*);
+template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, int*);
+template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, int*); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim)
-{
-	const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
-	NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
-	return EXIT_SUCCESS;
+int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) {
+    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
+    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float **, int *);
-template int cudaCommon_allocateArrayToDevice<double>(double **, int *);
-template int cudaCommon_allocateArrayToDevice<int>(int **, int *);
-template int cudaCommon_allocateArrayToDevice<float4>(float4 **, int *); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(float**, int*);
+template int cudaCommon_allocateArrayToDevice<double>(double**, int*);
+template int cudaCommon_allocateArrayToDevice<int>(int**, int*);
+template int cudaCommon_allocateArrayToDevice<float4>(float4**, int*); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox)
-{
-	const unsigned int memSize = vox * sizeof(DTYPE);
-	NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
-	return EXIT_SUCCESS;
+int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) {
+    const unsigned int memSize = vox * sizeof(DTYPE);
+    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float **, int);
-template int cudaCommon_allocateArrayToDevice<double>(double **, int);
-template int cudaCommon_allocateArrayToDevice<int>(int **, int);
-template int cudaCommon_allocateArrayToDevice<float4>(float4 **, int); // for deformation field
-/* ******************************** */
+template int cudaCommon_allocateArrayToDevice<float>(float**, int);
+template int cudaCommon_allocateArrayToDevice<double>(double**, int);
+template int cudaCommon_allocateArrayToDevice<int>(int**, int);
+template int cudaCommon_allocateArrayToDevice<float4>(float4**, int); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim)
-{
-	const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
-	NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
-	NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize));
-	return EXIT_SUCCESS;
+int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) {
+    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
+    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float **, float **, int *);
-template int cudaCommon_allocateArrayToDevice<double>(double **, double **, int *);
-template int  cudaCommon_allocateArrayToDevice<float4>(float4 **, float4 **, int *); // for deformation field
-/* ******************************** */
+template int cudaCommon_allocateArrayToDevice<float>(float**, float**, int*);
+template int cudaCommon_allocateArrayToDevice<double>(double**, double**, int*);
+template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, int*); // for deformation field
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsigned int nElements)
-{
-
-	NR_CUDA_SAFE_CALL(cudaMemcpy((void *)cpuPtr, (void *)*cuPtr, nElements*sizeof(DTYPE), cudaMemcpyDeviceToHost));
-	//NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-	return EXIT_SUCCESS;
+int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float **cuPtr, const unsigned int nElements);
-template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double **cuPtr, const unsigned int nElements);
-
-/* ******************************** */
-/* ******************************** */
+template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned int nElements);
+template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned int nElements);
 /* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else
-	{
-		NIFTI_TYPE *array_h=static_cast<NIFTI_TYPE *>(img->data);
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (void *)*array_d, img->nvox*sizeof(DTYPE), cudaMemcpyDeviceToHost));
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+    }
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNifti1<float, float>(nifti_image *img, float **array_d);
-template int cudaCommon_transferFromDeviceToNifti1<double, double>(nifti_image *img, double **array_d);
+template int cudaCommon_transferFromDeviceToNifti1<float, float>(nifti_image *img, float *array_d);
+template int cudaCommon_transferFromDeviceToNifti1<double, double>(nifti_image *img, double *array_d);
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d)
-{
-	if(sizeof(DTYPE)==sizeof(float4)){
-		// A nifti 5D volume is expected
-		if(img->dim[0]<5 || img->dim[4]>1 || img->dim[5]<2 || img->datatype!=NIFTI_TYPE_FLOAT32){
-			reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-			reg_print_msg_error("The nifti image is not a 5D volume");
-			return EXIT_FAILURE;
-		}
-		const int voxelNumber = img->nx*img->ny*img->nz;
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        // A nifti 5D volume is expected
+        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
+            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+            reg_print_msg_error("The nifti image is not a 5D volume");
+            return EXIT_FAILURE;
+        }
+        const int voxelNumber = img->nx * img->ny * img->nz;
 
-		float4 *array_h;
-		NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4)));
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost));
-		float *niftiImgValues = static_cast<float *>(img->data);
+        float4 *array_h;
+        NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
+        float *niftiImgValues = static_cast<float*>(img->data);
 
-		for(int i=0; i<voxelNumber; i++)
-			*niftiImgValues++ = array_h[i].x;
-		if(img->dim[5]>=2){
-			for(int i=0; i<voxelNumber; i++)
-				*niftiImgValues++ = array_h[i].y;
-		}
-		if(img->dim[5]>=3){
-			for(int i=0; i<voxelNumber; i++)
-				*niftiImgValues++ = array_h[i].z;
-		}
-		if(img->dim[5]>=4){
-			for(int i=0; i<voxelNumber; i++)
-				*niftiImgValues++ = array_h[i].w;
-		}
-		NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
+        for (int i = 0; i < voxelNumber; i++)
+            *niftiImgValues++ = array_h[i].x;
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < voxelNumber; i++)
+                *niftiImgValues++ = array_h[i].y;
+        }
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < voxelNumber; i++)
+                *niftiImgValues++ = array_h[i].z;
+        }
+        if (img->dim[5] >= 4) {
+            for (int i = 0; i < voxelNumber; i++)
+                *niftiImgValues++ = array_h[i].w;
+        }
+        NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
 
-		return EXIT_SUCCESS;
-	}
-	else{
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferFromDeviceToNifti1<DTYPE,float>(img, array_d);
-		default:
-			reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
+        return EXIT_SUCCESS;
+    } else {
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferFromDeviceToNifti1<DTYPE, float>(img, array_d);
+        default:
+            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image *, float **);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image *, double **);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image *, float4 **); // for deformation field
-/* ******************************** */
+template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*);
+template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*);
+template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*); // for deformation field
 /* ******************************** */
 template<>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray **cuArray_d) {
-	if (img->datatype != NIFTI_TYPE_FLOAT32) {
-		reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-		reg_print_msg_error("The image data type is not supported");
-		return EXIT_FAILURE;
-	}
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) {
+    if (img->datatype != NIFTI_TYPE_FLOAT32) {
+        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+        reg_print_msg_error("The image data type is not supported");
+        return EXIT_FAILURE;
+    }
 
-	cudaMemcpy3DParms copyParams = {0};
-	copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-	copyParams.srcArray = *cuArray_d;
-	copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float),
-											copyParams.extent.width, copyParams.extent.height);
-	copyParams.kind = cudaMemcpyDeviceToHost;
-	NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-	return EXIT_SUCCESS;
+    cudaMemcpy3DParms copyParams = {0};
+    copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+    copyParams.srcArray = cuArray_d;
+    copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float),
+                                            copyParams.extent.width, copyParams.extent.height);
+    copyParams.kind = cudaMemcpyDeviceToHost;
+    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
-/* ******************************** */
 template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d, DTYPE **array2_d)
-{
-	if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){
-		reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
-		reg_print_msg_error("The host and device arrays are of different types");
-		return EXIT_FAILURE;
-	}
-	else{
-		unsigned int voxelNumber=img->nx*img->ny*img->nz;
-		NIFTI_TYPE *array_h=static_cast<NIFTI_TYPE *>(img->data);
-		NIFTI_TYPE *array2_h=&array_h[voxelNumber];
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (void *)*array_d, voxelNumber*sizeof(DTYPE), cudaMemcpyDeviceToHost));
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array2_h, (void *)*array2_d, voxelNumber*sizeof(DTYPE), cudaMemcpyDeviceToHost));
-	}
-	return EXIT_SUCCESS;
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
+    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
+        reg_print_msg_error("The host and device arrays are of different types");
+        return EXIT_FAILURE;
+    } else {
+        unsigned int voxelNumber = img->nx * img->ny * img->nz;
+        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NIFTI_TYPE *array2_h = &array_h[voxelNumber];
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+    }
+    return EXIT_SUCCESS;
 }
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d, DTYPE **array2_d)
-{
-	if(sizeof(DTYPE)==sizeof(float4)){
-		// A nifti 5D volume is expected
-		if(img->dim[0]<5 || img->dim[4]>1 || img->dim[5]<2 || img->datatype!=NIFTI_TYPE_FLOAT32){
-			reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-			reg_print_msg_error("The nifti image is not a 5D volume");
-			return EXIT_FAILURE;
-		}
-		const int voxelNumber = img->nx*img->ny*img->nz;
-		float4 *array_h=nullptr;
-		float4 *array2_h=nullptr;
-		NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4)));
-		NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber*sizeof(float4)));
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost));
-		NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array2_h, (const void *)*array2_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost));
-		float *niftiImgValues = static_cast<float *>(img->data);
-		for(int i=0; i<voxelNumber; i++){
-			*niftiImgValues++ = array_h[i].x;
-		}
-		for(int i=0; i<voxelNumber; i++){
-			*niftiImgValues++ = array2_h[i].x;
-		}
-		if(img->dim[5]>=2){
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array_h[i].y;
-			}
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array2_h[i].y;
-			}
-		}
-		if(img->dim[5]>=3){
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array_h[i].z;
-			}
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array2_h[i].z;
-			}
-		}
-		if(img->dim[5]>=4){
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array_h[i].w;
-			}
-			for(int i=0; i<voxelNumber; i++){
-				*niftiImgValues++ = array2_h[i].w;
-			}
-		}
-		NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
-		NR_CUDA_SAFE_CALL(cudaFreeHost(array2_h));
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
+    if (sizeof(DTYPE) == sizeof(float4)) {
+        // A nifti 5D volume is expected
+        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
+            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+            reg_print_msg_error("The nifti image is not a 5D volume");
+            return EXIT_FAILURE;
+        }
+        const int voxelNumber = img->nx * img->ny * img->nz;
+        float4 *array_h = nullptr;
+        float4 *array2_h = nullptr;
+        NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
+        NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber * sizeof(float4)));
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
+        float *niftiImgValues = static_cast<float *>(img->data);
+        for (int i = 0; i < voxelNumber; i++) {
+            *niftiImgValues++ = array_h[i].x;
+        }
+        for (int i = 0; i < voxelNumber; i++) {
+            *niftiImgValues++ = array2_h[i].x;
+        }
+        if (img->dim[5] >= 2) {
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array_h[i].y;
+            }
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array2_h[i].y;
+            }
+        }
+        if (img->dim[5] >= 3) {
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array_h[i].z;
+            }
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array2_h[i].z;
+            }
+        }
+        if (img->dim[5] >= 4) {
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array_h[i].w;
+            }
+            for (int i = 0; i < voxelNumber; i++) {
+                *niftiImgValues++ = array2_h[i].w;
+            }
+        }
+        NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
+        NR_CUDA_SAFE_CALL(cudaFreeHost(array2_h));
 
-		return EXIT_SUCCESS;
-	}
-	else{
-		switch(img->datatype){
-		case NIFTI_TYPE_FLOAT32:
-			return cudaCommon_transferFromDeviceToNifti1<DTYPE,float>(img, array_d, array2_d);
-		default:
-			reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-			reg_print_msg_error("The image data type is not supported");
-			return EXIT_FAILURE;
-		}
-	}
+        return EXIT_SUCCESS;
+    } else {
+        switch (img->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return cudaCommon_transferFromDeviceToNifti1<DTYPE, float>(img, array_d, array2_d);
+        default:
+            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
+            reg_print_msg_error("The image data type is not supported");
+            return EXIT_FAILURE;
+        }
+    }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image *, float **, float **);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image *, double **, double **);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image *, float4 **, float4 **); // for deformation field
-/* ******************************** */
+template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*, float*);
+template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*, double*);
+template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*, float4*); // for deformation field
 /* ******************************** */
-void cudaCommon_free(cudaArray **cuArray_d)
-{
-	NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d));
-	return;
+void cudaCommon_free(cudaArray *cuArray_d) {
+    NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d));
 }
 /* ******************************** */
-/* ******************************** */
 template <class DTYPE>
-void cudaCommon_free(DTYPE **array_d)
-{
-	NR_CUDA_SAFE_CALL(cudaFree(*array_d));
-	return;
+void cudaCommon_free(DTYPE *array_d) {
+    NR_CUDA_SAFE_CALL(cudaFree(array_d));
 }
-template void cudaCommon_free<int>(int **);
-template void cudaCommon_free<float>(float **);
-template void cudaCommon_free<double>(double **);
-template void cudaCommon_free<float4>(float4 **);
-/* ******************************** */
+template void cudaCommon_free<int>(int*);
+template void cudaCommon_free<float>(float*);
+template void cudaCommon_free<double>(double*);
+template void cudaCommon_free<float4>(float4*);
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE **array_d, nifti_image *img)
-{
-	NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
-
-	return EXIT_SUCCESS;
+int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int **array_d, nifti_image *img);
-template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float **array_d, nifti_image *img);
-template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double **array_d, nifti_image *img);
-/* ******************************** */
+template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, nifti_image*);
+template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, nifti_image*);
+template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, nifti_image*);
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE **array_d, DTYPE *img, const unsigned int nvox)
-{
-	NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
-	return EXIT_SUCCESS;
+int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
+    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int **array_d, int *img, const unsigned);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float **array_d, float *img, const unsigned);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double **array_d, double *img, const unsigned);
-/* ******************************** */
-/* ******************************** */
-/* ******************************** */
+template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, int*, const unsigned);
+template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, float*, const unsigned);
+template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, double*, const unsigned);
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferArrayFromCpuToDevice(DTYPE **array_d, DTYPE *array_cpu, const unsigned int nElements) {
-
+int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) {
     const unsigned int memSize = nElements * sizeof(DTYPE);
-    //copyData
-    NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_cpu, memSize, cudaMemcpyHostToDevice));
-    //
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromCpuToDevice<int>(int **array_d, int *array_cpu, const unsigned int nElements);
-template int cudaCommon_transferArrayFromCpuToDevice<float>(float **array_d, float *array_cpu, const unsigned int nElements);
-template int cudaCommon_transferArrayFromCpuToDevice<double>(double **array_d, double *array_cpu, const unsigned int nElements);
-/* ******************************** */
-/* ******************************** */
-/* ******************************** */
+template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsigned int);
+template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned int);
+template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned int);
 /* ******************************** */
 template <class DTYPE>
-int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements) {
-
+int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) {
     const unsigned int memSize = nElements * sizeof(DTYPE);
-    //copyData
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, *array_d, memSize, cudaMemcpyDeviceToHost));
-    //
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromDeviceToCpu<int>(int *array_cpu, int **array_d, const unsigned int nElements);
-template int cudaCommon_transferArrayFromDeviceToCpu<float>(float *array_cpu, float **array_d, const unsigned int nElements);
-template int cudaCommon_transferArrayFromDeviceToCpu<double>(double *array_cpu, double **array_d, const unsigned int nElements);
+template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsigned int);
+template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned int);
+template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned int);
+/* ******************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 851bc03d..961dc148 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -13,17 +13,14 @@
 #include "cuda_runtime.h"
 #include "cuda.h"
 
-/* ******************************** */
-/* ******************************** */
+ /* ******************************** */
 #ifndef __VECTOR_TYPES_H__
 #define __VECTOR_TYPES_H__
-struct __attribute__((aligned(4))) float4
-{
-	float x,y,z,w;
+struct __attribute__((aligned(4))) float4 {
+    float x, y, z, w;
 };
 #endif
 /* ******************************** */
-/* ******************************** */
 #if CUDART_VERSION >= 3200
 #   define NR_CUDA_SAFE_CALL(call) { \
 		call; \
@@ -71,102 +68,73 @@ struct __attribute__((aligned(4))) float4
 	}
 #endif //CUDART_VERSION >= 3200
 /* ******************************** */
-/* ******************************** */
-int cudaCommon_setCUDACard(CUcontext *ctx,
-									bool verbose);
-/* ******************************** */
-void cudaCommon_unsetCUDACard(CUcontext *ctx);
-/* ******************************** */
-/* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(cudaArray **, int *);
+int cudaCommon_allocateArrayToDevice(cudaArray**, int*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(cudaArray **, cudaArray **, int *);
+int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **, int);
+int cudaCommon_allocateArrayToDevice(DTYPE**, int);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **, int *);
+int cudaCommon_allocateArrayToDevice(DTYPE**, int*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **, DTYPE **, int *);
-/* ******************************** */
+int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *);
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, cudaArray **, nifti_image *);
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **, nifti_image *);
+int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **, DTYPE **, nifti_image *);
-/* ******************************** */
-/* ******************************** */
-extern "C++"
-template <class DTYPE, class DTYPE2>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *, DTYPE **);
+int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *, DTYPE **);
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *, DTYPE **, DTYPE **);
-/* ******************************** */
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*);
 /* ******************************** */
 extern "C++"
-void cudaCommon_free(cudaArray **);
+void cudaCommon_free(cudaArray*);
 /* ******************************** */
 extern "C++" template <class DTYPE>
-void cudaCommon_free(DTYPE **);
-/* ******************************** */
-/* ******************************** */
-extern "C++" template <class DTYPE>
-int cudaCommon_allocateNiftiToDevice(nifti_image **image_d, int *dim);
-
-template <class DTYPE>
-int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img);
-
-
-/* ******************************** */
+void cudaCommon_free(DTYPE*);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE **, nifti_image * );
-
+int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*);
+/* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE **array_d, DTYPE *img, const unsigned  nvox);
-
+int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned);
+/* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsigned int nElements);
-/* ******************************** */
-/* ******************************** */
-/* ******************************** */
+int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferArrayFromCpuToDevice(DTYPE **array_d, DTYPE *array_cpu, const unsigned int nElements);
-/* ******************************** */
+int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int);
 /* ******************************** */
 extern "C++"
 template <class DTYPE>
-int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements);
-/* ******************************** */
+int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
 /* ******************************** */
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 29c084ab..343634c5 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -47,12 +47,12 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          cudaArray **refDevicePtr,
-                          cudaArray **floDevicePtr,
-                          int **refMskDevicePtr,
-                          float **warFloDevicePtr,
-                          float4 **warFloGradDevicePtr,
-                          float4 **forVoxBasedGraDevicePtr)
+                          cudaArray *refDevicePtr,
+                          cudaArray *floDevicePtr,
+                          int *refMskDevicePtr,
+                          float *warFloDevicePtr,
+                          float4 *warFloGradDevicePtr,
+                          float4 *forVoxBasedGraDevicePtr)
    {
       ;
    }
@@ -87,12 +87,12 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          cudaArray **refDevicePtr,
-                          cudaArray **floDevicePtr,
-                          int **refMskDevicePtr,
-                          float **warFloDevicePtr,
-                          float4 **warFloGradDevicePtr,
-                          float4 **forVoxBasedGraDevicePtr)
+                          cudaArray *refDevicePtr,
+                          cudaArray *floDevicePtr,
+                          int *refMskDevicePtr,
+                          float *warFloDevicePtr,
+                          float4 *warFloGradDevicePtr,
+                          float4 *forVoxBasedGraDevicePtr)
    {
       ;
    }
@@ -127,12 +127,12 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          cudaArray **refDevicePtr,
-                          cudaArray **floDevicePtr,
-                          int **refMskDevicePtr,
-                          float **warFloDevicePtr,
-                          float4 **warFloGradDevicePtr,
-                          float4 **forVoxBasedGraDevicePtr)
+                          cudaArray *refDevicePtr,
+                          cudaArray *floDevicePtr,
+                          int *refMskDevicePtr,
+                          float *warFloDevicePtr,
+                          float4 *warFloGradDevicePtr,
+                          float4 *forVoxBasedGraDevicePtr)
    {
       ;
    }
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index f690f492..a847594f 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -55,12 +55,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     nifti_image *warFloImgPtr,
                                     nifti_image *warFloGraPtr,
 									nifti_image *forVoxBasedGraPtr,
-									cudaArray **refDevicePtr,
-									cudaArray **floDevicePtr,
-                                    int **refMskDevicePtr,
-                                    float **warFloDevicePtr,
-                                    float4 **warFloGradDevicePtr,
-                                    float4 **forVoxBasedGraDevicePtr)
+									cudaArray *refDevicePtr,
+									cudaArray *floDevicePtr,
+                                    int *refMskDevicePtr,
+                                    float *warFloDevicePtr,
+                                    float4 *warFloGradDevicePtr,
+                                    float4 *forVoxBasedGraDevicePtr)
 {
 	this->DeallocateHistogram();
     reg_nmi::InitialiseMeasure(refImgPtr,
@@ -89,30 +89,27 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
         fprintf(stderr,"[NiftyReg ERROR] This class can only be \n");
         reg_exit();
     }
-	// Bind the required pointers
-	this->referenceDevicePointer = *refDevicePtr;
-	this->floatingDevicePointer = *floDevicePtr;
-    this->referenceMaskDevicePointer = *refMskDevicePtr;
-	this->activeVoxeNumber = activeVoxNum;
-    this->warpedFloatingDevicePointer = *warFloDevicePtr;
-    this->warpedFloatingGradientDevicePointer = *warFloGradDevicePtr;
-    this->forwardVoxelBasedGradientDevicePointer = *forVoxBasedGraDevicePtr;
-	// The reference and floating images have to be updated on the device
-	if(cudaCommon_transferNiftiToArrayOnDevice<float>
-			(&this->referenceDevicePointer, this->referenceImagePointer)){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
-		reg_exit();
-	}
-	if(cudaCommon_transferNiftiToArrayOnDevice<float>
-			(&this->floatingDevicePointer, this->floatingImagePointer)){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
-		reg_exit();
-	}
-	// Allocate the required joint histogram on the GPU
-	cudaMalloc(&this->forwardJointHistogramLog_device,
-			   this->totalBinNumber[0]*sizeof(float));
+    // Bind the required pointers
+    this->referenceDevicePointer = refDevicePtr;
+    this->floatingDevicePointer = floDevicePtr;
+    this->referenceMaskDevicePointer = refMskDevicePtr;
+    this->activeVoxeNumber = activeVoxNum;
+    this->warpedFloatingDevicePointer = warFloDevicePtr;
+    this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
+    this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
+    // The reference and floating images have to be updated on the device
+    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceDevicePointer, this->referenceImagePointer)) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+        reg_exit();
+    }
+    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingDevicePointer, this->floatingImagePointer)) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+        reg_exit();
+    }
+    // Allocate the required joint histogram on the GPU
+    cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float));
 
 #ifndef NDEBUG
 		printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n");
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index aed9cd46..395e1bdb 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -32,13 +32,13 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          cudaArray **refDevicePtr,
-                          cudaArray **floDevicePtr,
-                          int **refMskDevicePtr,
-                          float **warFloDevicePtr,
-                          float4 **warFloGradDevicePtr,
-                          float4 **forVoxBasedGraDevicePtr);
-   /// @brief Returns the nmi value
+                          cudaArray *refDevicePtr,
+                          cudaArray *floDevicePtr,
+                          int *refMskDevicePtr,
+                          float *warFloDevicePtr,
+                          float4 *warFloGradDevicePtr,
+                          float4 *forVoxBasedGraDevicePtr);
+   /// @brief Returns the nmi valu
    double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based nmi gradient
    void GetVoxelBasedSimilarityMeasureGradient();
@@ -63,12 +63,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur
                           nifti_image *warFloImgPtr,
                           nifti_image *warFloGraPtr,
                           nifti_image *forVoxBasedGraPtr,
-                          cudaArray **refDevicePtr,
-                          cudaArray **floDevicePtr,
-                          int **refMskDevicePtr,
-                          float **warFloDevicePtr,
-                          float4 **warFloGradDevicePtr,
-                          float4 **forVoxBasedGraDevicePtr)
+                          cudaArray *refDevicePtr,
+                          cudaArray *floDevicePtr,
+                          int *refMskDevicePtr,
+                          float *warFloDevicePtr,
+                          float4 *warFloGradDevicePtr,
+                          float4 *forVoxBasedGraDevicePtr)
    {
       ;
    }
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 7a17a1ab..f394a187 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -16,11 +16,11 @@ reg_optimiser_gpu::reg_optimiser_gpu()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_optimiser_gpu::~reg_optimiser_gpu()
-{
-    if(this->bestDOF_gpu!=nullptr)
-        cudaCommon_free(&this->bestDOF_gpu);;
-    this->bestDOF_gpu=nullptr;
+reg_optimiser_gpu::~reg_optimiser_gpu() {
+    if (this->bestDOF_gpu != nullptr) {
+        cudaCommon_free(this->bestDOF_gpu);
+        this->bestDOF_gpu = nullptr;
+    }
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
 #endif
@@ -56,8 +56,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     if(gradData!=nullptr)
         this->gradient_gpu=reinterpret_cast<float4 *>(gradData);
 
-    if(this->bestDOF_gpu!=nullptr)
-        cudaCommon_free(&this->bestDOF_gpu);
+    if (this->bestDOF_gpu != nullptr)
+        cudaCommon_free(this->bestDOF_gpu);
 
     if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu,
 									   (int)(this->GetVoxNumber()))){
@@ -118,15 +118,16 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu()
-{
-    if(this->array1!=nullptr)
-        cudaCommon_free(&this->array1);
-    this->array1=nullptr;
+reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
+    if (this->array1 != nullptr) {
+        cudaCommon_free(this->array1);
+        this->array1 = nullptr;
+    }
 
-    if(this->array2!=nullptr)
-        cudaCommon_free(&this->array2);
-    this->array2=nullptr;
+    if (this->array2 != nullptr) {
+        cudaCommon_free(this->array2);
+        this->array2 = nullptr;
+    }
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n");
 #endif
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index bfb9a2fe..f997a05c 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -32,12 +32,12 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 									nifti_image *warFloGraPtr,
 									nifti_image *forVoxBasedGraPtr,
 									nifti_image *localWeightSimPtr,
-									cudaArray **refDevicePtr,
-									cudaArray **floDevicePtr,
-									int **refMskDevicePtr,
-									float **warFloDevicePtr,
-									float4 **warFloGradDevicePtr,
-									float4 **forVoxBasedGraDevicePtr)
+									cudaArray *refDevicePtr,
+									cudaArray *floDevicePtr,
+									int *refMskDevicePtr,
+									float *warFloDevicePtr,
+									float4 *warFloGradDevicePtr,
+									float4 *forVoxBasedGraDevicePtr)
 {
 	reg_ssd::InitialiseMeasure(refImgPtr,
 							   floImgPtr,
@@ -66,13 +66,13 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 		reg_exit();
 	}
 	// Bind the required pointers
-	this->referenceDevicePointer = *refDevicePtr;
-	this->floatingDevicePointer = *floDevicePtr;
-	this->referenceMaskDevicePointer = *refMskDevicePtr;
+	this->referenceDevicePointer = refDevicePtr;
+	this->floatingDevicePointer = floDevicePtr;
+	this->referenceMaskDevicePointer = refMskDevicePtr;
 	this->activeVoxeNumber=activeVoxNum;
-	this->warpedFloatingDevicePointer = *warFloDevicePtr;
-	this->warpedFloatingGradientDevicePointer = *warFloGradDevicePtr;
-	this->forwardVoxelBasedGradientDevicePointer = *forVoxBasedGraDevicePtr;
+	this->warpedFloatingDevicePointer = warFloDevicePtr;
+	this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
+	this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
 #ifndef NDEBUG
 		printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
 #endif
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 33cc16ef..6f01d847 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -33,12 +33,12 @@ class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu
                                   nifti_image *warFloGraPtr,
                                   nifti_image *forVoxBasedGraPtr,
                                   nifti_image *localWeightSimPtr,
-                                  cudaArray **refDevicePtr,
-                                  cudaArray **floDevicePtr,
-                                  int **refMskDevicePtr,
-                                  float **warFloDevicePtr,
-                                  float4 **warFloGradDevicePtr,
-                                  float4 **forVoxBasedGraDevicePtr);
+                                  cudaArray *refDevicePtr,
+                                  cudaArray *floDevicePtr,
+                                  int *refMskDevicePtr,
+                                  float *warFloDevicePtr,
+                                  float4 *warFloGradDevicePtr,
+                                  float4 *forVoxBasedGraDevicePtr);
    /// @brief Returns the ssd value
    double GetSimilarityMeasureValue();
    /// @brief Compute the voxel based ssd gradient
diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp
index 2f4b38b8..10c85404 100644
--- a/reg-test/reg_test_svd_cuda.cpp
+++ b/reg-test/reg_test_svd_cuda.cpp
@@ -178,7 +178,7 @@ int main(int argc, char **argv)
     /*
             //RETRIEVE THE RESULTS FROM THE GPU
             float **test_UMatrixCUDA = reg_matrix2DAllocate<float>(m, m);
-            cudaCommon_transferArrayFromDeviceToCpu<float>(test_SVect, &Sigma_d, min_size);
+            cudaCommon_transferArrayFromDeviceToCpu<float>(test_SVect, Sigma_d, min_size);
             cudaCommon_transferFromDeviceTo2DMatrixCpu<float>(VT_d, test_VMatrix, min_size, min_size);
             test_VMatrix = reg_matrix2DTranspose<float>(test_VMatrix, min_size, min_size);
             cudaCommon_transferFromDeviceTo2DMatrixCpu<float>(U_d, test_UMatrixCUDA, m, m);

From e9e32adad328d6c4eca2575e9b8e926f2ed62e82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Jan 2023 13:33:11 +0000
Subject: [PATCH 030/314] Bug fixes

---
 niftyreg_build_version.txt        |   2 +-
 reg-lib/Compute.cpp               |   4 +-
 reg-lib/Compute.h                 |   4 +-
 reg-lib/F3dContent.h              |  18 ++---
 reg-lib/_reg_base.cpp             |   2 +-
 reg-lib/_reg_f3d.cpp              |  25 +++----
 reg-lib/cpu/_reg_dti.h            |  42 +++++------
 reg-lib/cpu/_reg_kld.h            |   9 +--
 reg-lib/cpu/_reg_lncc.h           |  60 ++++++++--------
 reg-lib/cpu/_reg_measure.h        |  21 +++---
 reg-lib/cpu/_reg_mind.h           |  56 ++++++++-------
 reg-lib/cpu/_reg_nmi.h            | 116 +++++++++++++++---------------
 reg-lib/cpu/_reg_optimiser.h      |  88 +++++++++++------------
 reg-lib/cpu/_reg_ssd.h            |  55 +++++++-------
 reg-lib/cuda/CudaCompute.cpp      |  20 ++----
 reg-lib/cuda/CudaCompute.h        |   4 +-
 reg-lib/cuda/CudaContent.cpp      |  18 +++--
 reg-lib/cuda/CudaF3dContent.cpp   |  66 ++++++++---------
 reg-lib/cuda/CudaF3dContent.h     |  12 ++--
 reg-lib/cuda/_reg_measure_gpu.h   |  40 +++--------
 reg-lib/cuda/_reg_nmi_gpu.cu      |   2 +-
 reg-lib/cuda/_reg_nmi_gpu.h       |  25 +++----
 reg-lib/cuda/_reg_optimiser_gpu.h |  32 ++++-----
 reg-lib/cuda/_reg_ssd_gpu.cu      |   2 +-
 reg-lib/cuda/_reg_ssd_gpu.h       |  46 ++++++------
 25 files changed, 366 insertions(+), 403 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 93e78032..dee261df 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-138
+140
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 04342219..381956f3 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -141,7 +141,7 @@ void Compute::VoxelCentricToNodeCentric(float weight) {
                                  reorientation);
 }
 /* *************************************************************** */
-double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
     switch (transformationGradient->datatype) {
@@ -155,7 +155,7 @@ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ)
     return 0;
 }
 /* *************************************************************** */
-void Compute::NormaliseGradient(double maxGradLength) {
+void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
     reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index be1bbdd8..4bdd1544 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -22,8 +22,8 @@ class Compute {
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
     virtual void VoxelCentricToNodeCentric(float weight);
-    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
-    virtual void NormaliseGradient(double maxGradLength);
+    virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength);
 
 protected:
     Content *con;
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
index 091e4da9..0df0f4d8 100644
--- a/reg-lib/F3dContent.h
+++ b/reg-lib/F3dContent.h
@@ -21,19 +21,11 @@ class F3dContent: public virtual Content {
     virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
     virtual nifti_image* GetWarpedGradient() { return warpedGradient; }
 
-    // Setters
-    virtual void SetControlPointGrid(nifti_image *controlPointGridIn) {
-        controlPointGrid = controlPointGridIn;
-    }
-    virtual void SetTransformationGradient(nifti_image *transformationGradientIn) {
-        transformationGradient = transformationGradientIn;
-    }
-    virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) {
-        voxelBasedMeasureGradient = voxelBasedMeasureGradientIn;
-    }
-    virtual void SetWarpedGradient(nifti_image *warpedGradientIn) {
-        warpedGradient = warpedGradientIn;
-    }
+    // Methods for transferring data from nifti to device
+    virtual void UpdateControlPointGrid() {}
+    virtual void UpdateTransformationGradient() {}
+    virtual void UpdateVoxelBasedMeasureGradient() {}
+    virtual void UpdateWarpedGradient() {}
 
     // Auxiliary methods
     virtual void ZeroTransformationGradient();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 895b417d..be3fee51 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1363,9 +1363,9 @@ void reg_base<T>::Run() {
         CorrectTransformation();
 
         // Some cleaning is performed
-        DeinitContent();
         delete optimiser;
         optimiser = nullptr;
+        DeinitContent();
         // if (localWeightSimCurrent) {
         //     nifti_image_free(localWeightSimCurrent);
         //     localWeightSimCurrent = nullptr;
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 4d90fe8e..273f9b25 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -525,7 +525,7 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
     }
 
     // Update the changes of voxelBasedMeasureGradient
-    dynamic_cast<F3dContent*>(this->con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient);
+    dynamic_cast<F3dContent*>(this->con)->UpdateVoxelBasedMeasureGradient();
 
     // The node based NMI gradient is extracted
     this->compute->VoxelCentricToNodeCentric(this->similarityWeight);
@@ -597,12 +597,12 @@ void reg_f3d<T>::GetLandmarkDistanceGradient() {
 template<class T>
 T reg_f3d<T>::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ);
+    T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiser->GetVoxNumber(), this->optimiseX, this->optimiseY, this->optimiseZ);
 
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d_sym or f3d2
-        this->compute->NormaliseGradient(maxGradLength);
+        this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength);
 #ifndef NDEBUG
         char text[255];
         sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
@@ -641,13 +641,9 @@ void reg_f3d<T>::DisplayCurrentLevelParameters() {
         sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz);
         reg_print_info(this->executableName, text);
         reg_print_info(this->executableName, "Current control point image");
-        sprintf(text, "\t* image dimension: %i x %i x %i",
-                controlPointGrid->nx, controlPointGrid->ny,
-                controlPointGrid->nz);
+        sprintf(text, "\t* image dimension: %i x %i x %i", controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
         reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                controlPointGrid->dx, controlPointGrid->dy,
-                controlPointGrid->dz);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm", controlPointGrid->dx, controlPointGrid->dy, controlPointGrid->dz);
         reg_print_info(this->executableName, text);
 #ifdef NDEBUG
     }
@@ -744,7 +740,7 @@ void reg_f3d<T>::SmoothGradient() {
         F3dContent *con = dynamic_cast<F3dContent*>(this->con);
         reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL);
         // Update the changes of transformationGradient
-        con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
+        con->UpdateTransformationGradient();
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
@@ -768,20 +764,20 @@ void reg_f3d<T>::GetApproximatedGradient() {
         T currentValue = this->optimiser->GetBestDOF()[i];
         gridPtr[i] = currentValue + eps;
         // Update the changes. Bad hack, fix that!
-        con->SetControlPointGrid(controlPointGrid);
+        con->UpdateControlPointGrid();
         double valPlus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue - eps;
         // Update the changes. Bad hack, fix that!
-        con->SetControlPointGrid(controlPointGrid);
+        con->UpdateControlPointGrid();
         double valMinus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue;
         // Update the changes. Bad hack, fix that!
-        con->SetControlPointGrid(controlPointGrid);
+        con->UpdateControlPointGrid();
         gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
     }
 
     // Update the changes
-    con->SetTransformationGradient(transformationGradient);
+    con->UpdateTransformationGradient();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
 #endif
@@ -797,6 +793,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
         reg_exit();
     }
 
+    InitialiseCurrentLevel(this->inputReference);
     InitContent(this->inputReference, this->inputFloating, nullptr);
 
     this->WarpFloatingImage(3); // cubic spline interpolation
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index c3327ce2..0cecebc2 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -23,26 +23,28 @@
 class reg_dti : public reg_measure
 {
 public:
-   /// @brief reg_dti class constructor
-   reg_dti();
-//    /// @brief Initialise the reg_dti object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
-//    /// @brief Returns the value
-   virtual double GetSimilarityMeasureValue();
-//    /// @brief Compute the voxel based gradient for DTI images
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   /// @brief reg_dti class destructor
-   ~reg_dti() {}
+    /// @brief reg_dti class constructor
+    reg_dti();
+    /// @brief reg_dti class destructor
+    virtual ~reg_dti() {}
+
+    /// @brief Initialise the reg_dti object
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *forwardLocalWeightPtr = nullptr,
+                           int *maskFloPtr = nullptr,
+                           nifti_image *warRefImgPtr = nullptr,
+                           nifti_image *warRefGraPtr = nullptr,
+                           nifti_image *bckVoxBasedGraPtr = nullptr);
+    /// @brief Returns the value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based gradient for DTI images
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+
 protected:
    // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
    unsigned int dtIndicies[6];
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index 40094be3..71efcaef 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -20,6 +20,9 @@ class reg_kld : public reg_measure
 public:
    /// @brief reg_kld class constructor
    reg_kld();
+   /// @brief reg_kld class destructor
+   virtual ~reg_kld() {}
+
    /// @brief Initialise the reg_kld object
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -33,11 +36,9 @@ class reg_kld : public reg_measure
                           nifti_image *warRefGraPtr = nullptr,
                           nifti_image *bckVoxBasedGraPtr = nullptr);
    /// @brief Returns the kld value
-   virtual double GetSimilarityMeasureValue();
+   virtual double GetSimilarityMeasureValue() override;
    /// @brief Compute the voxel based kld gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   /// @brief reg_kld class destructor
-   ~reg_kld() {}
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 };
 /* *************************************************************** */
 
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index ad86a044..d626c113 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -19,36 +19,36 @@
 class reg_lncc : public reg_measure
 {
 public:
-   /// @brief reg_lncc class constructor
-   reg_lncc();
-   /// @brief reg_lncc class destructor
-   ~reg_lncc();
-   /// @brief Initialise the reg_lncc object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
-   /// @brief Returns the lncc value
-   double GetSimilarityMeasureValue();
-   /// @brief Compute the voxel based lncc gradient
-   void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   /// @brief Stuff
-   void SetKernelStandardDeviation(int t, float stddev)
-   {
-      this->kernelStandardDeviation[t]=stddev;
-   }
-   /// @brief Stuff
-   void SetKernelType(int t)
-   {
-      this->kernelType=t;
-   }
+    /// @brief reg_lncc class constructor
+    reg_lncc();
+    /// @brief reg_lncc class destructor
+    virtual ~reg_lncc();
+
+    /// @brief Initialise the reg_lncc object
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *forwardLocalWeightPtr = nullptr,
+                           int *maskFloPtr = nullptr,
+                           nifti_image *warRefImgPtr = nullptr,
+                           nifti_image *warRefGraPtr = nullptr,
+                           nifti_image *bckVoxBasedGraPtr = nullptr);
+    /// @brief Returns the lncc value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based lncc gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    /// @brief Stuff
+    virtual void SetKernelStandardDeviation(int t, float stddev) {
+        this->kernelStandardDeviation[t] = stddev;
+    }
+    /// @brief Stuff
+    virtual void SetKernelType(int t) {
+        this->kernelType = t;
+    }
+
 protected:
    float kernelStandardDeviation[255];
    nifti_image *forwardCorrelationImage;
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 2c036243..a4cf2291 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -65,22 +65,21 @@ class reg_measure
       }
    }
    /// @brief Here
-   virtual void GetDiscretisedValue(nifti_image *, float *, int , int) {}
-   void SetTimepointWeight(int timepoint, double weight)
-   {
-      this->timePointWeight[timepoint]=weight;
+   virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {}
+
+   virtual void SetTimepointWeight(int timepoint, double weight) {
+      this->timePointWeight[timepoint] = weight;
    }
-   double *GetTimepointsWeights(void)
-   {
+
+   virtual double* GetTimepointsWeights(void) {
       return this->timePointWeight;
    }
-/************************************************************************/
-   nifti_image* GetReferenceImage(void)
-   {
+
+   virtual nifti_image* GetReferenceImage(void) {
       return this->referenceImagePointer;
    }
-   int* GetReferenceMask(void)
-   {
+
+   virtual int* GetReferenceMask(void) {
       return this->referenceMaskPointer;
    }
 /************************************************************************/
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 04404904..6d2aafa8 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -29,29 +29,31 @@
 class reg_mind : public reg_ssd
 {
 public:
-   /// @brief reg_mind class constructor
-   reg_mind();
-   /// @brief Initialise the reg_mind object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
-   /// @brief Returns the mind based measure of similarity value
-   virtual double GetSimilarityMeasureValue();
-   /// @brief Compute the voxel based gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   /// @brief
-   void SetDescriptorOffset(int);
-   int GetDescriptorOffset();
-   /// @brief Measure class desstructor
-   ~reg_mind();
+    /// @brief reg_mind class constructor
+    reg_mind();
+    /// @brief Measure class destructor
+    virtual ~reg_mind();
+
+    /// @brief Initialise the reg_mind object
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *forwardLocalWeightPtr = nullptr,
+                           int *maskFloPtr = nullptr,
+                           nifti_image *warRefImgPtr = nullptr,
+                           nifti_image *warRefGraPtr = nullptr,
+                           nifti_image *bckVoxBasedGraPtr = nullptr);
+
+    /// @brief Returns the mind based measure of similarity value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+
+    virtual void SetDescriptorOffset(int);
+    virtual int GetDescriptorOffset();
 
 protected:
    nifti_image *referenceImageDescriptor;
@@ -69,10 +71,10 @@ class reg_mind : public reg_ssd
 class reg_mindssc : public reg_mind
 {
 public:
-   /// @brief reg_mind class constructor
-   reg_mindssc();
-   /// @brief Measure class desstructor
-   ~reg_mindssc();
+    /// @brief reg_mind class constructor
+    reg_mindssc();
+    /// @brief Measure class destructor
+    virtual ~reg_mindssc();
 };
 /* *************************************************************** */
 
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index e49b1724..c3177443 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -24,48 +24,47 @@
 class reg_nmi : public reg_measure
 {
 public:
-   /// @brief reg_nmi class constructor
-   reg_nmi();
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
-   /// @brief Returns the nmi value
-   double GetSimilarityMeasureValue();
-   /// @brief Compute the voxel based nmi gradient
-   void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
-                                 unsigned short floBinNumber,
-                                 int timepoint)
-   {
-      this->referenceBinNumber[timepoint] = refBinNumber;
-      this->floatingBinNumber[timepoint] = floBinNumber;
-   }
-   void SetReferenceBinNumber(int b, int t)
-   {
-      this->referenceBinNumber[t]=b;
-   }
-   void SetFloatingBinNumber(int b, int t)
-   {
-      this->floatingBinNumber[t]=b;
-   }
-   unsigned short *GetReferenceBinNumber()
-   {
-      return this->referenceBinNumber;
-   }
-   unsigned short *GetFloatingBinNumber()
-   {
-      return this->floatingBinNumber;
-   }
-   /// @brief reg_nmi class destructor
-   ~reg_nmi();
+    /// @brief reg_nmi class constructor
+    reg_nmi();
+    /// @brief reg_nmi class destructor
+    virtual ~reg_nmi();
+
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *forwardLocalWeightPtr = nullptr,
+                           int *maskFloPtr = nullptr,
+                           nifti_image *warRefImgPtr = nullptr,
+                           nifti_image *warRefGraPtr = nullptr,
+                           nifti_image *bckVoxBasedGraPtr = nullptr);
+
+    /// @brief Returns the nmi value
+    virtual double GetSimilarityMeasureValue() override;
+
+    /// @brief Compute the voxel based nmi gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+
+    virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
+                                          unsigned short floBinNumber,
+                                          int timepoint) {
+        this->referenceBinNumber[timepoint] = refBinNumber;
+        this->floatingBinNumber[timepoint] = floBinNumber;
+    }
+    virtual void SetReferenceBinNumber(int b, int t) {
+        this->referenceBinNumber[t] = b;
+    }
+    virtual void SetFloatingBinNumber(int b, int t) {
+        this->floatingBinNumber[t] = b;
+    }
+    virtual unsigned short* GetReferenceBinNumber() {
+        return this->referenceBinNumber;
+    }
+    virtual unsigned short* GetFloatingBinNumber() {
+        return this->floatingBinNumber;
+    }
 
 protected:
    unsigned short referenceBinNumber[255];
@@ -262,23 +261,22 @@ inline int previous(int current, int num_dims)
 class reg_multichannel_nmi : public reg_measure
 {
 public:
-   /// @brief reg_nmi class constructor
-   reg_multichannel_nmi() {}
-   /// @brief Returns the nmi value
-   double GetSimilarityMeasureValue()
-   {
-      return 0.;
-   }
-   /// @brief Compute the voxel based nmi gradient
-   void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-   {
-      // Check if the specified time point exists and is active
-      reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-      if(this->timePointWeight[current_timepoint]==0.0)
-         return;;
-   }
-   /// @brief reg_nmi class destructor
-   ~reg_multichannel_nmi() {}
+    /// @brief reg_nmi class constructor
+    reg_multichannel_nmi() {}
+    /// @brief reg_nmi class destructor
+    virtual ~reg_multichannel_nmi() {}
+
+    /// @brief Returns the nmi value
+    virtual double GetSimilarityMeasureValue() override { return 0; }
+
+    /// @brief Compute the voxel based nmi gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {
+        // Check if the specified time point exists and is active
+        reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+        if (this->timePointWeight[current_timepoint] == 0)
+            return;
+    }
+
 protected:
    unsigned short referenceBinNumber[255];
    unsigned short floatingBinNumber[255];
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index 806ef167..d7bbee6e 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -25,10 +25,10 @@ class InterfaceOptimiser
    virtual void UpdateBestObjFunctionValue() = 0;
 
 protected:
-   /// @brief Interface constructor
-   InterfaceOptimiser() {}
-   /// @brief Interface destructor
-   ~InterfaceOptimiser() {}
+    /// @brief Interface constructor
+    InterfaceOptimiser() {}
+    /// @brief Interface destructor
+    virtual ~InterfaceOptimiser() {}
 };
 /* *************************************************************** */
 /* *************************************************************** */
@@ -186,28 +186,28 @@ class reg_conjugateGradient : public reg_optimiser<T>
    void UpdateGradientValues(); /// @brief Update the gradient array
 
 public:
-   reg_conjugateGradient();
-   ~reg_conjugateGradient();
-   virtual void Initialise(size_t nvox,
-                           int dim,
-                           bool optX,
-                           bool optY,
-                           bool optZ,
-                           size_t maxit,
-                           size_t start,
-                           InterfaceOptimiser *o,
-                           T *cppData=nullptr,
-                           T *gradData=nullptr,
-                           size_t nvox_b=0,
-                           T *cppData_b=nullptr,
-                           T *gradData_b=nullptr);
-   virtual void Optimise(T maxLength,
-                         T smallLength,
-                         T &startLength);
-   virtual void Perturbation(float length);
+    reg_conjugateGradient();
+    virtual ~reg_conjugateGradient();
+    virtual void Initialise(size_t nvox,
+                            int dim,
+                            bool optX,
+                            bool optY,
+                            bool optZ,
+                            size_t maxit,
+                            size_t start,
+                            InterfaceOptimiser *o,
+                            T *cppData = nullptr,
+                            T *gradData = nullptr,
+                            size_t nvox_b = 0,
+                            T *cppData_b = nullptr,
+                            T *gradData_b = nullptr) override;
+    virtual void Optimise(T maxLength,
+                          T smallLength,
+                          T &startLength) override;
+    virtual void Perturbation(float length) override;
 
-   // Function used for testing
-   virtual void reg_test_optimiser();
+    // Function used for testing
+    virtual void reg_test_optimiser() override;
 };
 /* *************************************************************** */
 /* *************************************************************** */
@@ -225,25 +225,25 @@ class reg_lbfgs : public reg_optimiser<T>
    T **diffGrad;
 
 public:
-   reg_lbfgs();
-   ~reg_lbfgs();
-   virtual void Initialise(size_t nvox,
-                           int dim,
-                           bool optX,
-                           bool optY,
-                           bool optZ,
-                           size_t maxit,
-                           size_t start,
-                           InterfaceOptimiser *o,
-                           T *cppData=nullptr,
-                           T *gradData=nullptr,
-                           size_t nvox_b=0,
-                           T *cppData_b=nullptr,
-                           T *gradData_b=nullptr);
-   virtual void Optimise(T maxLength,
-                         T smallLength,
-                         T &startLength);
-   virtual void UpdateGradientValues();
+    reg_lbfgs();
+    virtual ~reg_lbfgs();
+    virtual void Initialise(size_t nvox,
+                            int dim,
+                            bool optX,
+                            bool optY,
+                            bool optZ,
+                            size_t maxit,
+                            size_t start,
+                            InterfaceOptimiser *o,
+                            T *cppData = nullptr,
+                            T *gradData = nullptr,
+                            size_t nvox_b = 0,
+                            T *cppData_b = nullptr,
+                            T *gradData_b = nullptr) override;
+    virtual void Optimise(T maxLength,
+                          T smallLength,
+                          T &startLength) override;
+    virtual void UpdateGradientValues() override;
 };
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 0401c4d2..37514e43 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -22,34 +22,35 @@
 class reg_ssd : public reg_measure
 {
 public:
-   /// @brief reg_ssd class constructor
-   reg_ssd();
-   /// @brief Initialise the reg_ssd object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *localWeightSimPtr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
+    /// @brief reg_ssd class constructor
+    reg_ssd();
+    /// @brief reg_ssd class destructor
+    virtual ~reg_ssd() {}
 
-   /// @brief Define if the specified time point should be normalised
-   void SetNormaliseTimepoint(int timepoint, bool normalise);
-   /// @brief Returns the ssd value
-   virtual double GetSimilarityMeasureValue();
-   /// @brief Compute the voxel based ssd gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint);
-   /// @brief Here
-   virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
-                                    float *discretisedValue,
-                                    int discretise_radius,
-                                    int discretise_step);
-   /// @brief reg_ssd class desstructor
-   ~reg_ssd() {}
+    /// @brief Initialise the reg_ssd object
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *localWeightSimPtr,
+                           int *maskFloPtr = nullptr,
+                           nifti_image *warRefImgPtr = nullptr,
+                           nifti_image *warRefGraPtr = nullptr,
+                           nifti_image *bckVoxBasedGraPtr = nullptr);
+
+    /// @brief Define if the specified time point should be normalised
+    void SetNormaliseTimepoint(int timepoint, bool normalise);
+    /// @brief Returns the ssd value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based ssd gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    /// @brief Here
+    virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
+                                     float *discretisedValue,
+                                     int discretise_radius,
+                                     int discretise_step);
 protected:
    float currentValue[255];
 
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index b31f3152..69c053b8 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -66,8 +66,7 @@ void CudaCompute::ApproxLinearEnergyGradient(float weight) {
     // Use CPU temporarily
     Compute::ApproxLinearEnergyGradient(weight);
     // Transfer the data back to the CUDA device
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
+    dynamic_cast<CudaF3dContent*>(con)->UpdateTransformationGradient();
 }
 /* *************************************************************** */
 double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
@@ -81,8 +80,7 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
     // Use CPU temporarily
     Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight);
     // Transfer the data back to the CUDA device
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    con->SetTransformationGradient(con->F3dContent::GetTransformationGradient());
+    dynamic_cast<CudaF3dContent*>(con)->UpdateTransformationGradient();
 }
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
@@ -124,19 +122,13 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) {
                                      weight);
 }
 /* *************************************************************** */
-double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient();
-    int nodeNumber = transformationGradient->nvox / transformationGradient->ndim;
-    return reg_getMaximalLength_gpu(con->GetTransformationGradientCuda(), nodeNumber);
+    return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda(), nodeNumber);
 }
 /* *************************************************************** */
-void CudaCompute::NormaliseGradient(double maxGradLength) {
+void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient();
-    int nodeNumber = transformationGradient->nvox / transformationGradient->ndim;
-    reg_multiplyValue_gpu(nodeNumber, con->GetTransformationGradientCuda(), 1 / (float)maxGradLength);
+    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda(), 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 1ca941ab..c0451c0f 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -20,6 +20,6 @@ class CudaCompute: public Compute {
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
     virtual void VoxelCentricToNodeCentric(float weight) override;
-    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
-    virtual void NormaliseGradient(double maxGradLength) override;
+    virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override;
 };
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 08b56279..4746230e 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -60,7 +60,7 @@ void CudaContent::DeallocateImages() {
 }
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
-    NR_CUDA_SAFE_CALL(cudaMalloc(&deformationFieldCuda, deformationField->nvox * sizeof(float4)));
+    cudaCommon_allocateArrayToDevice(&deformationFieldCuda, deformationField->dim);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateDeformationField() {
@@ -121,9 +121,17 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
 
     if (!referenceMask) return;
 
-    NR_CUDA_SAFE_CALL(cudaMalloc(&referenceMaskCuda, reference->nvox * sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, referenceMask,
-                                 reference->nvox * sizeof(int), cudaMemcpyHostToDevice));
+    int *targetMask;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(int)));
+    int *targetMaskPtr = targetMask;
+    for (int i = 0; i < reference->nvox; i++) {
+        if (referenceMask[i] != -1)
+            *targetMaskPtr++ = i;
+    }
+
+    cudaCommon_allocateArrayToDevice(&referenceMaskCuda, reference->nvox);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, reference->nvox * sizeof(int),  cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask));
 }
 /* *************************************************************** */
 void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
@@ -138,7 +146,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
 
     float *transformationMatrixCptr = (float*)malloc(sizeof(mat44));
     mat44ToCptr(*transformationMatrix, transformationMatrixCptr);
-    cudaCommon_allocateArrayToDevice(&transformationMatrixCuda, sizeof(mat44) / sizeof(float));
+    NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrixCuda, sizeof(mat44)));
     NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrixCuda, transformationMatrixCptr, sizeof(mat44), cudaMemcpyHostToDevice));
     free(transformationMatrixCptr);
 }
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index afb0f34e..dfc0cbfa 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -11,24 +11,37 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
     F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
-    SetControlPointGrid(controlPointGrid);
+    AllocateControlPointGrid();
     AllocateWarpedGradient();
     AllocateTransformationGradient();
     AllocateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
 CudaF3dContent::~CudaF3dContent() {
-    SetControlPointGrid(nullptr);
+    GetControlPointGrid();  // Transfer device data back to nifti
+    DeallocateControlPointGrid();
     DeallocateWarpedGradient();
     DeallocateTransformationGradient();
     DeallocateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
+void CudaF3dContent::AllocateControlPointGrid() {
+    cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim);
+    cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid);
+}
+/* *************************************************************** */
+void CudaF3dContent::DeallocateControlPointGrid() {
+    if (controlPointGridCuda) {
+        cudaCommon_free(controlPointGridCuda);
+        controlPointGridCuda = nullptr;
+    }
+}
+/* *************************************************************** */
 void CudaF3dContent::AllocateWarpedGradient() {
     if (floating->nt >= 1)
-        NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[0], warpedGradient->nvox * sizeof(float4)));
+        cudaCommon_allocateArrayToDevice(&warpedGradientCuda[0], warpedGradient->dim);
     if (floating->nt == 2)
-        NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[1], warpedGradient->nvox * sizeof(float4)));
+        cudaCommon_allocateArrayToDevice(&warpedGradientCuda[1], warpedGradient->dim);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateWarpedGradient() {
@@ -43,7 +56,7 @@ void CudaF3dContent::DeallocateWarpedGradient() {
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateTransformationGradient() {
-    cudaCommon_allocateArrayToDevice(&transformationGradientCuda, controlPointGrid->dim);
+    cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateTransformationGradient() {
@@ -54,7 +67,7 @@ void CudaF3dContent::DeallocateTransformationGradient() {
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateVoxelBasedMeasureGradient() {
-    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, reference->dim);
+    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() {
@@ -69,17 +82,7 @@ nifti_image* CudaF3dContent::GetControlPointGrid() {
     return controlPointGrid;
 }
 /* *************************************************************** */
-void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) {
-    F3dContent::SetControlPointGrid(controlPointGridIn);
-
-    if (controlPointGridCuda) {
-        cudaCommon_free(controlPointGridCuda);
-        controlPointGridCuda = nullptr;
-    }
-
-    if (!controlPointGrid) return;
-
-    cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim);
+void CudaF3dContent::UpdateControlPointGrid() {
     cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid);
 }
 /* *************************************************************** */
@@ -88,12 +91,7 @@ nifti_image* CudaF3dContent::GetTransformationGradient() {
     return transformationGradient;
 }
 /* *************************************************************** */
-void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradientIn) {
-    F3dContent::SetTransformationGradient(transformationGradientIn);
-    DeallocateTransformationGradient();
-    if (!transformationGradient) return;
-
-    AllocateTransformationGradient();
+void CudaF3dContent::UpdateTransformationGradient() {
     cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient);
 }
 /* *************************************************************** */
@@ -102,12 +100,7 @@ nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() {
     return voxelBasedMeasureGradient;
 }
 /* *************************************************************** */
-void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) {
-    F3dContent::SetVoxelBasedMeasureGradient(voxelBasedMeasureGradientIn);
-    DeallocateVoxelBasedMeasureGradient();
-    if (!voxelBasedMeasureGradient) return;
-
-    AllocateVoxelBasedMeasureGradient();
+void CudaF3dContent::UpdateVoxelBasedMeasureGradient() {
     cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
 }
 /* *************************************************************** */
@@ -116,22 +109,21 @@ nifti_image* CudaF3dContent::GetWarpedGradient() {
     return warpedGradient;
 }
 /* *************************************************************** */
-void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) {
-    F3dContent::SetWarpedGradient(warpedGradientIn);
-    DeallocateWarpedGradient();
-    if (!warpedGradient) return;
-
-    AllocateWarpedGradient();
+void CudaF3dContent::UpdateWarpedGradient() {
     cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient);
     if (warpedGradientCuda[1])
         cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient);
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
-    cudaMemset(transformationGradientCuda, 0, transformationGradient->nvox * sizeof(float4));
+    cudaMemset(transformationGradientCuda, 0,
+               transformationGradient->nx * transformationGradient->ny * transformationGradient->nz *
+               sizeof(float4));
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroVoxelBasedMeasureGradient() {
-    cudaMemset(voxelBasedMeasureGradientCuda, 0, voxelBasedMeasureGradient->nvox * sizeof(float4));
+    cudaMemset(voxelBasedMeasureGradientCuda, 0,
+               voxelBasedMeasureGradient->nx * voxelBasedMeasureGradient->ny * voxelBasedMeasureGradient->nz *
+               sizeof(float4));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index dfa6d222..68c6a651 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -26,11 +26,11 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
     virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; }
 
-    // Setters
-    virtual void SetControlPointGrid(nifti_image *controlPointGridIn) override;
-    virtual void SetTransformationGradient(nifti_image *transformationGradientIn) override;
-    virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) override;
-    virtual void SetWarpedGradient(nifti_image *warpedGradientIn) override;
+    // Methods for transferring data from nifti to device
+    virtual void UpdateControlPointGrid() override;
+    virtual void UpdateTransformationGradient() override;
+    virtual void UpdateVoxelBasedMeasureGradient() override;
+    virtual void UpdateWarpedGradient() override;
 
     // Auxiliary methods
     virtual void ZeroTransformationGradient() override;
@@ -43,6 +43,8 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     float4 *warpedGradientCuda[2] = {nullptr};
 
 private:
+    void AllocateControlPointGrid();
+    void DeallocateControlPointGrid();
     void AllocateWarpedGradient();
     void DeallocateWarpedGradient();
     void AllocateTransformationGradient();
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 343634c5..56fb2af8 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -24,8 +24,8 @@ class reg_measure_gpu
 protected:
    /// @brief Measure class constructor
    reg_measure_gpu() {}
-   /// @brief Measure class desstructor
-   ~reg_measure_gpu() {}
+   /// @brief Measure class destructor
+   virtual ~reg_measure_gpu() {}
 
    cudaArray *referenceDevicePointer;
    cudaArray *floatingDevicePointer;
@@ -63,17 +63,11 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
       reg_exit();
    }
    /// @brief reg_lncc class destructor
-   ~reg_lncc_gpu() {}
+   virtual ~reg_lncc_gpu() {}
    /// @brief Returns the lncc value
-   double GetSimilarityMeasureValue()
-   {
-      return 0.;
-   }
+   virtual double GetSimilarityMeasureValue() override { return 0; }
    /// @brief Compute the voxel based lncc gradient
-   void GetVoxelBasedSimilarityMeasureGradient()
-   {
-      ;
-   }
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -103,17 +97,11 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu
       reg_exit();
    }
    /// @brief reg_kld_gpu class destructor
-   ~reg_kld_gpu() {}
+   virtual ~reg_kld_gpu() {}
    /// @brief Returns the kld value
-   double GetSimilarityMeasureValue()
-   {
-      return 0.;
-   }
+   virtual double GetSimilarityMeasureValue() override { return 0; }
    /// @brief Compute the voxel based kld gradient
-   void GetVoxelBasedSimilarityMeasureGradient()
-   {
-      ;
-   }
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -143,17 +131,11 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu
       reg_exit();
    }
    /// @brief reg_dti_gpu class destructor
-   ~reg_dti_gpu() {}
+   virtual ~reg_dti_gpu() {}
    /// @brief Returns the dti value
-   double GetSimilarityMeasureValue()
-   {
-      return 0.;
-   }
+   virtual double GetSimilarityMeasureValue() override { return 0; }
    /// @brief Compute the voxel based dti gradient
-   void GetVoxelBasedSimilarityMeasureGradient()
-   {
-      ;
-   }
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index a847594f..ee4d38e2 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -222,7 +222,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient()
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 {
     // The latest joint histogram is transfered onto the GPU
     float *temp=(float *)malloc(this->totalBinNumber[0]*sizeof(float));
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 395e1bdb..db549c28 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -24,6 +24,9 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
 public:
    /// @brief reg_nmi class constructor
    reg_nmi_gpu();
+   /// @brief reg_nmi class destructor
+   virtual ~reg_nmi_gpu();
+
    /// @brief Initialise the reg_nmi_gpu object
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -38,12 +41,10 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
                           float *warFloDevicePtr,
                           float4 *warFloGradDevicePtr,
                           float4 *forVoxBasedGraDevicePtr);
-   /// @brief Returns the nmi valu
-   double GetSimilarityMeasureValue();
+   /// @brief Returns the nmi value
+   virtual double GetSimilarityMeasureValue() override;
    /// @brief Compute the voxel based nmi gradient
-   void GetVoxelBasedSimilarityMeasureGradient();
-   /// @brief reg_nmi class destructor
-   ~reg_nmi_gpu();
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 
 protected:
    float *forwardJointHistogramLog_device;
@@ -74,18 +75,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur
    }
    /// @brief reg_nmi class constructor
    reg_multichannel_nmi_gpu() {}
+   /// @brief reg_nmi class destructor
+   virtual ~reg_multichannel_nmi_gpu() {}
    /// @brief Returns the nmi value
-   double GetSimilarityMeasureValue()
-   {
-      return 0.;
-   }
+   virtual double GetSimilarityMeasureValue() override { return 0; }
    /// @brief Compute the voxel based nmi gradient
-   void GetVoxelBasedSimilarityMeasureGradient()
-   {
-      ;
-   }
-   /// @brief reg_nmi class destructor
-   ~reg_multichannel_nmi_gpu() {}
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index d325554d..9af5eb7f 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -17,21 +17,21 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
 
 public:
     reg_optimiser_gpu();
-    ~reg_optimiser_gpu();
+    virtual ~reg_optimiser_gpu();
 
     // Float4 are casted to float for compatibility with the cpu class
-    virtual float* GetCurrentDOF() {
-        return reinterpret_cast<float *>(this->currentDOF_gpu);
+    virtual float* GetCurrentDOF() override {
+        return reinterpret_cast<float*>(this->currentDOF_gpu);
     }
-    virtual float* GetBestDOF() {
-        return reinterpret_cast<float *>(this->bestDOF_gpu);
+    virtual float* GetBestDOF() override {
+        return reinterpret_cast<float*>(this->bestDOF_gpu);
     }
-    virtual float* GetGradient() {
-        return reinterpret_cast<float *>(this->gradient_gpu);
+    virtual float* GetGradient() override {
+        return reinterpret_cast<float*>(this->gradient_gpu);
     }
 
-    virtual void RestoreBestDOF();
-    virtual void StoreCurrentDOF();
+    virtual void RestoreBestDOF() override;
+    virtual void StoreCurrentDOF() override;
 
     virtual void Initialise(size_t nvox,
                             int dim,
@@ -45,8 +45,8 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
                             float *gradData = nullptr,
                             size_t a = 0,
                             float *b = nullptr,
-                            float *c = nullptr);
-    virtual void Perturbation(float length);
+                            float *c = nullptr) override;
+    virtual void Perturbation(float length) override;
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -62,7 +62,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 
 public:
     reg_conjugateGradient_gpu();
-    ~reg_conjugateGradient_gpu();
+    virtual ~reg_conjugateGradient_gpu();
 
     virtual void Initialise(size_t nvox,
                             int dim,
@@ -76,14 +76,14 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
                             float *gradData = nullptr,
                             size_t a = 0,
                             float *b = nullptr,
-                            float *c = nullptr);
+                            float *c = nullptr) override;
     virtual void Optimise(float maxLength,
                           float smallLength,
-                          float &startLength);
-    virtual void Perturbation(float length);
+                          float &startLength) override;
+    virtual void Perturbation(float length) override;
 
     // Function used for testing
-    virtual void reg_test_optimiser();
+    virtual void reg_test_optimiser() override;
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index f997a05c..5a1e6e62 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -187,7 +187,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient()
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 {
 	reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
 									 this->referenceDevicePointer,
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 6f01d847..91e8b05f 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -22,29 +22,29 @@
 class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu
 {
 public:
-   /// @brief reg_ssd class constructor
-   reg_ssd_gpu();
-   /// @brief Initialise the reg_ssd object
-   virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                  nifti_image *floImgPtr,
-                                  int *maskRefPtr,
-                                  int activeVoxNum,
-                                  nifti_image *warFloImgPtr,
-                                  nifti_image *warFloGraPtr,
-                                  nifti_image *forVoxBasedGraPtr,
-                                  nifti_image *localWeightSimPtr,
-                                  cudaArray *refDevicePtr,
-                                  cudaArray *floDevicePtr,
-                                  int *refMskDevicePtr,
-                                  float *warFloDevicePtr,
-                                  float4 *warFloGradDevicePtr,
-                                  float4 *forVoxBasedGraDevicePtr);
-   /// @brief Returns the ssd value
-   double GetSimilarityMeasureValue();
-   /// @brief Compute the voxel based ssd gradient
-   void GetVoxelBasedSimilarityMeasureGradient();
-   /// @brief Measure class desstructor
-   ~reg_ssd_gpu() {}
+    /// @brief reg_ssd class constructor
+    reg_ssd_gpu();
+    /// @brief Measure class destructor
+    virtual ~reg_ssd_gpu() {}
+    /// @brief Initialise the reg_ssd object
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           int activeVoxNum,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *localWeightSimPtr,
+                           cudaArray *refDevicePtr,
+                           cudaArray *floDevicePtr,
+                           int *refMskDevicePtr,
+                           float *warFloDevicePtr,
+                           float4 *warFloGradDevicePtr,
+                           float4 *forVoxBasedGraDevicePtr);
+    /// @brief Returns the ssd value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based ssd gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */

From eed3c3aaef6a9fe66dcc8242c71456ad0422bcbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Jan 2023 15:22:06 +0000
Subject: [PATCH 031/314] Several refactorisations

---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_aladin.cpp                       |    4 +-
 reg-apps/reg_f3d.cpp                          |    2 +-
 reg-apps/reg_ppcnr.cpp                        |   34 +-
 reg-apps/reg_tools.cpp                        |    4 +-
 reg-io/_reg_ReadWriteMatrix.cpp               |    2 +-
 reg-lib/Compute.h                             |    2 +-
 reg-lib/Content.cpp                           |    8 +-
 reg-lib/F3dContent.cpp                        |    2 +-
 reg-lib/_reg_aladin.cpp                       |   12 +-
 reg-lib/_reg_base.cpp                         |   30 +-
 reg-lib/_reg_f3d.cpp                          |    6 +-
 reg-lib/_reg_f3d_sym.cpp                      |    2 +-
 reg-lib/cl/ClCompute.h                        |    2 +-
 reg-lib/cl/blockMatchingKernel.cl             |    4 +-
 reg-lib/cpu/_reg_blockMatching.cpp            |   32 +-
 reg-lib/cpu/_reg_discrete_init.cpp            |   12 +-
 reg-lib/cpu/_reg_dti.cpp                      |   12 +-
 reg-lib/cpu/_reg_dti.h                        |   27 +-
 reg-lib/cpu/_reg_globalTrans.cpp              |   32 +-
 reg-lib/cpu/_reg_kld.cpp                      |  681 +++----
 reg-lib/cpu/_reg_kld.h                        |    7 +-
 reg-lib/cpu/_reg_lncc.cpp                     | 1273 ++++++------
 reg-lib/cpu/_reg_lncc.h                       |   57 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  136 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp           |   82 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp         |   60 +-
 reg-lib/cpu/_reg_maths.cpp                    |   14 +-
 reg-lib/cpu/_reg_measure.h                    |   71 +-
 reg-lib/cpu/_reg_mind.cpp                     | 1361 ++++++-------
 reg-lib/cpu/_reg_mind.h                       |   34 +-
 reg-lib/cpu/_reg_mrf.cpp                      |   10 +-
 reg-lib/cpu/_reg_nmi.cpp                      | 1498 +++++++-------
 reg-lib/cpu/_reg_nmi.h                        |  280 ++-
 reg-lib/cpu/_reg_optimiser.cpp                |  747 ++++---
 reg-lib/cpu/_reg_optimiser.h                  |  279 ++-
 reg-lib/cpu/_reg_resampling.cpp               |  116 +-
 reg-lib/cpu/_reg_ssd.cpp                      | 1802 ++++++++---------
 reg-lib/cpu/_reg_ssd.h                        |   29 +-
 reg-lib/cpu/_reg_tools.cpp                    |   10 +-
 reg-lib/cuda/CudaCompute.h                    |    2 +-
 reg-lib/cuda/CudaContent.cpp                  |    2 +-
 reg-lib/cuda/CudaF3dContent.cpp               |    2 +-
 .../cuda/_reg_localTransformation_kernels.cu  |   18 +-
 reg-lib/cuda/_reg_measure_gpu.h               |   48 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  263 ++-
 reg-lib/cuda/_reg_nmi_gpu.h                   |   16 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  284 ++-
 reg-lib/cuda/_reg_optimiser_gpu.h             |    4 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  297 ++-
 reg-lib/cuda/_reg_ssd_gpu.h                   |    6 +-
 reg-lib/cuda/blockMatchingKernel.cu           |    6 +-
 reg-lib/cuda/resampleKernel.cu                |   34 +-
 reg-test/reg_test_mindDescriptor.cpp          |    2 +-
 reg-test/reg_test_mindsscDescriptor.cpp       |    2 +-
 55 files changed, 4595 insertions(+), 5169 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dee261df..b4f334f2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-140
+141
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 02022454..739b539f 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -161,8 +161,8 @@ int main(int argc, char **argv)
    int alignCentre=1;
    int alignCentreOfMass=0;
    int interpolation=1;
-   float floatingSigma=0.0;
-   float referenceSigma=0.0;
+   float floatingSigma=0;
+   float referenceSigma=0;
 
    float referenceLowerThr=-std::numeric_limits<PrecisionTYPE>::max();
    float referenceUpperThr=std::numeric_limits<PrecisionTYPE>::max();
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index ddb74d4e..46eabf25 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -737,7 +737,7 @@ int main(int argc, char **argv) {
     // Save the warped image(s)
     nifti_image **outputWarpedImage = reg->GetWarpedImage();
     if (outputWarpedImageName == nullptr)
-        outputWarpedImageName = (char *)"outputResult.nii";
+        outputWarpedImageName = (char*)"outputResult.nii";
     memset(outputWarpedImage[0]->descrip, 0, 80);
     strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)");
     if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) {
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index b4dbc4ee..08629c4b 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -409,7 +409,7 @@ int main(int argc, char **argv)
       PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
       for(size_t i=0; i<mask->nvox; i++) intensityPtrM[i]=1.0;
    }
-   PrecisionTYPE masksum=0.0;
+   PrecisionTYPE masksum=0;
    PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
    for(size_t i=0; i<mask->nvox; i++)
    {
@@ -581,12 +581,12 @@ int main(int argc, char **argv)
       for (i=n-1; i>0; i--)
       {
          l=i-1;
-         h=scale=0.0;
+         h=scale=0;
          if(l>0)
          {
             for(k=0; k<i; k++)
                scale+=std::abs(z[i+n*k]);
-            if (scale==0.0)
+            if (scale==0)
                e[i]=z[i+n*l];
             else
             {
@@ -596,15 +596,15 @@ int main(int argc, char **argv)
                   h+=z[i+n*k]*z[i+n*k];
                }
                f=z[i+n*l];
-               g=(f>=0.0 ? -sqrt(h) : sqrt(h));
+               g=(f>=0 ? -sqrt(h) : sqrt(h));
                e[i]=scale*g;
                h-=f*g;
                z[i+n*l]=f-g;
-               f=0.0;
+               f=0;
                for (j=0; j<i; j++)
                {
                   z[j+n*i]=z[i+n*j]/h;
-                  g=0.0;
+                  g=0;
                   for (k=0; k<j+1; k++)
                      g+=z[j+n*k]*z[i+n*k];
                   for (k=j+1; k<i; k++)
@@ -626,15 +626,15 @@ int main(int argc, char **argv)
             e[i]=z[i+n*l];
          d[i]=h;
       }
-      d[0]=0.0;
-      e[0]=0.0;
+      d[0]=0;
+      e[0]=0;
       for (i=0; i<n; i++)
       {
-         if(d[i]!=0.0)
+         if(d[i]!=0)
          {
             for (j=0; j<i; j++)
             {
-               g=0.0;
+               g=0;
                for (k=0; k<i; k++)
                   g+=z[i+n*k]*z[k+n*j];
                for (k=0; k<i; k++)
@@ -643,7 +643,7 @@ int main(int argc, char **argv)
          }
          d[i]=z[i+n*i];
          z[i+n*i]=1.0;
-         for (j=0; j<i; j++) z[j+n*i]=z[i+n*j]=0.0;
+         for (j=0; j<i; j++) z[j+n*i]=z[i+n*j]=0;
       }
 
       printf("Image Means=[%g",Mean[0]);
@@ -666,7 +666,7 @@ int main(int argc, char **argv)
       int m,iter;
       float s,r,p,dd,c,b;
       for (i=1; i<n; i++) e[i-1]=e[i];
-      e[n-1]=0.0;
+      e[n-1]=0;
       for (l=0; l<n; l++)
       {
          iter=0;
@@ -684,7 +684,7 @@ int main(int argc, char **argv)
                r=sqrt(g*g+1.0);
                g=d[m]-d[l]+e[l]/(g+std::abs(r)*g/std::abs(g));
                s=c=1.0;
-               p=0.0;
+               p=0;
                for (i=m-1; i>=l; i--)
                {
                   f=s*e[i];
@@ -693,7 +693,7 @@ int main(int argc, char **argv)
                   if(r<EPS)
                   {
                      d[i+1]-=p;
-                     e[m]=0.0;
+                     e[m]=0;
                      break;
                   }
                   s=f/r;
@@ -712,7 +712,7 @@ int main(int argc, char **argv)
                if(r<EPS && i>=l) continue;
                d[l]-=p;
                e[l]=g;
-               e[m]=0.0;
+               e[m]=0;
             }
             // printf("Iterations=%i\n",iter);
          }
@@ -796,7 +796,7 @@ int main(int argc, char **argv)
          {
             for(int t=0; t<image->nt; t++)
             {
-               dotty=0.0;
+               dotty=0;
                sum=0;
                for(int tt=max(t-param->locality,0); tt<=min(t+param->locality,image->nt); tt++)
                {
@@ -827,7 +827,7 @@ int main(int argc, char **argv)
          {
             for(int c=0; c<prinCompNumber; c++) // Add up component contributions
             {
-               dotty=0.0;
+               dotty=0;
                for(int t=0; t<image->nt; t++) // 1) Multiply each element by eigenvector and add (I.e. dot product)
                {
                   dotty += intensityPtr1[t*voxelNumber+i] * z[t+image->nt*c];
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 2a98658b..49b139ee 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -1051,7 +1051,7 @@ int main(int argc, char **argv)
         outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
-        GetMINDImageDesciptor(image, outputImage, mask, 1, 0);
+        GetMINDImageDescriptor(image, outputImage, mask, 1, 0);
         free(mask);
         // Save the MIND descriptor image
         if(flag->outputImageFlag)
@@ -1078,7 +1078,7 @@ int main(int argc, char **argv)
         outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND-SSC descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
-        GetMINDSSCImageDesciptor(image, outputImage, mask, 1, 0);
+        GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0);
         free(mask);
         // Save the MIND descriptor image
         if(flag->outputImageFlag)
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index da0d2c78..524abc72 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -48,7 +48,7 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         {
             for(int j=0; j<4; j++)
             {
-                absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0.0;
+                absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0;
             }
         }
         //If the reference sform is defined, it is used; qform otherwise;
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 4bdd1544..22f99c89 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -5,7 +5,7 @@
 class Compute {
 public:
     Compute() = delete;
-    Compute(Content *conIn) : con(conIn) {}
+    Compute(Content *conIn): con(conIn) {}
     virtual ~Compute() {}
 
     virtual void ResampleImage(int inter, float paddingValue);
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index b88897df..5a72dccc 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -5,7 +5,7 @@ Content::Content(nifti_image *referenceIn,
                  nifti_image *floatingIn,
                  int *referenceMaskIn,
                  mat44 *transformationMatrixIn,
-                 size_t bytesIn) :
+                 size_t bytesIn):
     reference(referenceIn),
     floating(floatingIn),
     referenceMask(referenceMaskIn),
@@ -32,7 +32,7 @@ void Content::AllocateWarped() {
     warped->dim[0] = warped->ndim = floating->ndim;
     warped->dim[4] = warped->nt = floating->nt;
     warped->pixdim[4] = warped->dt = 1.0;
-    warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt);
+    warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
     warped->data = (void*)calloc(warped->nvox, warped->nbyper);
@@ -61,8 +61,8 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->pixdim[6] = deformationField->dv = 1;
     deformationField->dim[7] = deformationField->nw = 1;
     deformationField->pixdim[7] = deformationField->dw = 1;
-    deformationField->nvox = (size_t)(deformationField->nx * deformationField->ny * deformationField->nz *
-                                             deformationField->nt * deformationField->nu);
+    deformationField->nvox = size_t(deformationField->nx * deformationField->ny * deformationField->nz *
+                                    deformationField->nt * deformationField->nu);
     deformationField->nbyper = (int)bytes;
     if (bytes == 4)
         deformationField->datatype = NIFTI_TYPE_FLOAT32;
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 442e52cd..4e650c04 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -9,7 +9,7 @@ F3dContent::F3dContent(nifti_image *referenceIn,
                        nifti_image *localWeightSimIn,
                        int *referenceMaskIn,
                        mat44 *transformationMatrixIn,
-                       size_t bytesIn) :
+                       size_t bytesIn):
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     controlPointGrid(controlPointGridIn) {
     if (!controlPointGridIn) {
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 66e4a118..5430663d 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -43,8 +43,8 @@ reg_aladin<T>::reg_aladin() {
 
     this->interpolation = 1;
 
-    this->floatingSigma = 0.0;
-    this->referenceSigma = 0.0;
+    this->floatingSigma = 0;
+    this->referenceSigma = 0;
 
     this->referenceUpperThreshold = std::numeric_limits<T>::max();
     this->referenceLowerThreshold = -std::numeric_limits<T>::max();
@@ -261,7 +261,7 @@ void reg_aladin<T>::InitialiseRegistration() {
     Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr);
     // SMOOTH THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < this->levelsToPerform; l++) {
-        if (this->referenceSigma != 0.0) {
+        if (this->referenceSigma != 0) {
             // Only the first image is smoothed
             bool *active = new bool[this->referencePyramid[l]->nt];
             float *sigma = new float[this->referencePyramid[l]->nt];
@@ -273,7 +273,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             delete[] active;
             delete[] sigma;
         }
-        if (this->floatingSigma != 0.0) {
+        if (this->floatingSigma != 0) {
             // Only the first image is smoothed
             bool *active = new bool[this->floatingPyramid[l]->nt];
             float *sigma = new float[this->floatingPyramid[l]->nt];
@@ -309,9 +309,9 @@ void reg_aladin<T>::InitialiseRegistration() {
     } else { // No input affine transformation
         for (int i = 0; i < 4; i++) {
             for (int j = 0; j < 4; j++) {
-                this->transformationMatrix->m[i][j] = 0.0;
+                this->transformationMatrix->m[i][j] = 0;
             }
-            this->transformationMatrix->m[i][i] = 1.0;
+            this->transformationMatrix->m[i][i] = 1;
         }
         if (this->alignCentre && this->alignCentreMass == 0) {
             const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz);
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index be3fee51..cb973174 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -13,8 +13,8 @@
 #include "_reg_base.h"
 #include "F3dContent.h" // TODO Temporary fix! Remove this line!
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
+/* *************************************************************** */
 template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     platform = nullptr;
@@ -661,7 +661,7 @@ void reg_base<T>::CheckParameters() {
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
         if (measure_nmi != nullptr) {
             nmiWeights = measure_nmi->GetTimepointsWeights();
-            simWeightSum = 0.0;
+            simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (nmiWeights[n] < 0) {
                     char text[255];
@@ -674,14 +674,14 @@ void reg_base<T>::CheckParameters() {
                 simWeightSum += nmiWeights[n];
                 totWeightSum += nmiWeights[n];
             }
-            if (simWeightSum == 0.0) {
+            if (simWeightSum == 0) {
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
         if (measure_ssd != nullptr) {
             ssdWeights = measure_ssd->GetTimepointsWeights();
-            simWeightSum = 0.0;
+            simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (ssdWeights[n] < 0) {
                     char text[255];
@@ -694,14 +694,14 @@ void reg_base<T>::CheckParameters() {
                 simWeightSum += ssdWeights[n];
                 totWeightSum += ssdWeights[n];
             }
-            if (simWeightSum == 0.0) {
+            if (simWeightSum == 0) {
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
         if (measure_kld != nullptr) {
             kldWeights = measure_kld->GetTimepointsWeights();
-            simWeightSum = 0.0;
+            simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (kldWeights[n] < 0) {
                     char text[255];
@@ -714,14 +714,14 @@ void reg_base<T>::CheckParameters() {
                 simWeightSum += kldWeights[n];
                 totWeightSum += kldWeights[n];
             }
-            if (simWeightSum == 0.0) {
+            if (simWeightSum == 0) {
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
         if (measure_lncc != nullptr) {
             lnccWeights = measure_lncc->GetTimepointsWeights();
-            simWeightSum = 0.0;
+            simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (lnccWeights[n] < 0) {
                     char text[255];
@@ -734,7 +734,7 @@ void reg_base<T>::CheckParameters() {
                 simWeightSum += lnccWeights[n];
                 totWeightSum += lnccWeights[n];
             }
-            if (simWeightSum == 0.0) {
+            if (simWeightSum == 0) {
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
             }
@@ -844,7 +844,7 @@ void reg_base<T>::Initialise() {
     platform = new Platform(platformCode);
     platform->SetGpuIdx(gpuIdx);
 
-    // CREATE THE PYRAMIDE IMAGES
+    // CREATE THE PYRAMID IMAGES
     if (usePyramid) {
         referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
         floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
@@ -920,7 +920,7 @@ void reg_base<T>::Initialise() {
 
     // SMOOTH THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < levelToPerform; l++) {
-        if (referenceSmoothingSigma != 0.0) {
+        if (referenceSmoothingSigma != 0) {
             bool *active = new bool[referencePyramid[l]->nt];
             float *sigma = new float[referencePyramid[l]->nt];
             active[0] = true;
@@ -931,7 +931,7 @@ void reg_base<T>::Initialise() {
             delete[]active;
             delete[]sigma;
         }
-        if (floatingSmoothingSigma != 0.0) {
+        if (floatingSmoothingSigma != 0) {
             // Only the first image is smoothed
             bool *active = new bool[floatingPyramid[l]->nt];
             float *sigma = new float[floatingPyramid[l]->nt];
@@ -985,7 +985,7 @@ double reg_base<T>::ComputeSimilarityMeasure() {
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::ComputeSimilarityMeasure");
 #endif
-    return double(similarityWeight) * measure;
+    return similarityWeight * measure;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -1304,7 +1304,7 @@ void reg_base<T>::Run() {
         // Initialise the measures of similarity
         InitialiseSimilarity();
 
-        // initialise the optimiser
+        // Initialise the optimiser
         SetOptimiser();
 
         // Loop over the number of perturbation to do
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 273f9b25..df94a742 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -264,7 +264,7 @@ void reg_f3d<T>::Initialise() {
                                 i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
             reg_print_info(this->executableName, text.c_str());
             if (this->measure_nmi != nullptr) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
                     text = stringFormat("\t* binning size for timepoint %i/%i: %i",
                                         i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
                     reg_print_info(this->executableName, text.c_str());
@@ -290,7 +290,7 @@ void reg_f3d<T>::Initialise() {
                                 i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
             reg_print_info(this->executableName, text.c_str());
             if (this->measure_nmi != nullptr) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) {
+                if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
                     text = stringFormat("\t* binning size for timepoint %i/%i: %i",
                                         i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
                     reg_print_info(this->executableName, text.c_str());
@@ -679,7 +679,7 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
     this->currentWLand = ComputeLandmarkDistancePenaltyTerm();
 
     // Compute initial similarity measure
-    this->currentWMeasure = 0.0;
+    this->currentWMeasure = 0;
     if (this->similarityWeight > 0) {
         this->WarpFloatingImage(this->interpolation);
         this->currentWMeasure = this->ComputeSimilarityMeasure();
diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp
index ae00600c..2fec42ce 100644
--- a/reg-lib/_reg_f3d_sym.cpp
+++ b/reg-lib/_reg_f3d_sym.cpp
@@ -1603,7 +1603,7 @@ double reg_f3d_sym<T>::GetObjectiveFunctionValue()
    this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm();
 
    // Compute initial similarity measure
-   this->currentWMeasure = 0.0;
+   this->currentWMeasure = 0;
    if(this->similarityWeight>0)
    {
       this->WarpFloatingImage(this->interpolation);
diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h
index ba4690d5..b93d3b04 100644
--- a/reg-lib/cl/ClCompute.h
+++ b/reg-lib/cl/ClCompute.h
@@ -4,7 +4,7 @@
 
 class ClCompute: public Compute {
 public:
-    ClCompute(Content *con) : Compute(con) {}
+    ClCompute(Content *con): Compute(con) {}
 
     virtual void ResampleImage(int inter, float paddingValue) override;
 };
diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl
index adf1955f..6e17deb9 100755
--- a/reg-lib/cl/blockMatchingKernel.cl
+++ b/reg-lib/cl/blockMatchingKernel.cl
@@ -221,7 +221,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 						const float warpedVar = REDUCE2D(sData, warpedTemp*warpedTemp, tid);
 
 						const float sumReferenceWarped = REDUCE2D(sData, (newReferenceTemp)*(warpedTemp), tid);
-                                                const float localCC = (newReferenceVar * warpedVar) > 0.0 ? fabs(sumReferenceWarped / sqrt(newReferenceVar*warpedVar)) : 0.0;
+                                                const float localCC = (newReferenceVar * warpedVar) > 0 ? fabs(sumReferenceWarped / sqrt(newReferenceVar*warpedVar)) : 0;
 
                   // Only the first thread of the block can update the final value
                   if (tid == 0 && localCC > bestCC) {
@@ -384,7 +384,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 							const float warpedVar = REDUCE(sData, warpedTemp*warpedTemp, tid);
 
 							const float sumReferenceWarped = REDUCE(sData, (newReferenceTemp)*(warpedTemp), tid);
-                                                        const float localCC = (newReferenceVar * warpedVar) > 0.0 ? fabs((sumReferenceWarped) / sqrt(newReferenceVar*warpedVar)) : 0.0;
+                                                        const float localCC = (newReferenceVar * warpedVar) > 0 ? fabs((sumReferenceWarped) / sqrt(newReferenceVar*warpedVar)) : 0;
 
 							// Only the first thread of the block can update the final value
                                                         if (tid == 0 && localCC > bestCC) {
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 65ce83b9..64f0f49d 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -329,7 +329,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                else
                   referenceIndex += BLOCK_WIDTH;
             }
-            bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0.0;
+            bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0;
             bestDisplacement[0] = std::numeric_limits<float>::quiet_NaN();
             bestDisplacement[1] = 0.f;
             bestDisplacement[2] = 0.f;
@@ -365,9 +365,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                      else
                         warpedIndex += BLOCK_WIDTH;
                   }
-                  referenceMean = 0.0;
-                  warpedMean = 0.0;
-                  voxelNumber = 0.0;
+                  referenceMean = 0;
+                  warpedMean = 0;
+                  voxelNumber = 0;
                   for (int a = 0; a < BLOCK_2D_SIZE; a++) {
                      if (referenceOverlap[a] && warpedOverlap[a]) {
                         referenceMean += referenceValues[a];
@@ -380,9 +380,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                      referenceMean /= voxelNumber;
                      warpedMean /= voxelNumber;
 
-                     referenceVar = 0.0;
-                     warpedVar = 0.0;
-                     localCC = 0.0;
+                     referenceVar = 0;
+                     warpedVar = 0;
+                     localCC = 0;
 
                      for (int a = 0; a < BLOCK_2D_SIZE; a++) {
                         if (referenceOverlap[a] && warpedOverlap[a]) {
@@ -394,7 +394,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                         }
                      }
 
-                     localCC = (referenceVar * warpedVar) > 0.0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0.0;
+                     localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0;
                      //localCC = fabs(localCC / sqrt(referenceVar * warpedVar));
 
                      if (localCC > bestCC) {
@@ -549,7 +549,7 @@ void block_matching_method3D(nifti_image * reference,
                   else
                      referenceIndex += BLOCK_WIDTH * BLOCK_WIDTH;
                }
-               bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0.0; //only when misaligned images are registered
+               bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; //only when misaligned images are registered
                bestDisplacement[0] = std::numeric_limits<float>::quiet_NaN();
                bestDisplacement[1] = 0.f;
                bestDisplacement[2] = 0.f;
@@ -596,9 +596,9 @@ void block_matching_method3D(nifti_image * reference,
                            else
                               warpedIndex += BLOCK_WIDTH * BLOCK_WIDTH;
                         }
-                        referenceMean = 0.0;
-                        warpedMean = 0.0;
-                        voxelNumber = 0.0;
+                        referenceMean = 0;
+                        warpedMean = 0;
+                        voxelNumber = 0;
                         for (int a = 0; a < BLOCK_3D_SIZE; a++) {
                            if (referenceOverlap[tid][a] && warpedOverlap[tid][a]) {
                               referenceMean += referenceValues[tid][a];
@@ -611,9 +611,9 @@ void block_matching_method3D(nifti_image * reference,
                            referenceMean /= voxelNumber;
                            warpedMean /= voxelNumber;
 
-                           referenceVar = 0.0;
-                           warpedVar = 0.0;
-                           localCC = 0.0;
+                           referenceVar = 0;
+                           warpedVar = 0;
+                           localCC = 0;
 
                            for (int a = 0; a < BLOCK_3D_SIZE; a++) {
                               if (referenceOverlap[tid][a] && warpedOverlap[tid][a]) {
@@ -624,7 +624,7 @@ void block_matching_method3D(nifti_image * reference,
                                  localCC += (referenceTemp)* (warpedTemp);
                               }
                            }
-                           localCC = (referenceVar * warpedVar) > 0.0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0.0;
+                           localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0;
 
                            if (localCC > bestCC) {
                               bestCC = localCC + 1.0e-7f;
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index ef2c121d..9072556d 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -297,12 +297,12 @@ void reg_discrete_init::GetRegularisedMeasure()
             splineCoeffY[13] = 0.f;
             splineCoeffZ[13] = 0.f;
             // Compute the second derivative without the central control point
-            float XX_x=0.0, YY_x=0.0, ZZ_x=0.0;
-            float XY_x=0.0, YZ_x=0.0, XZ_x=0.0;
-            float XX_y=0.0, YY_y=0.0, ZZ_y=0.0;
-            float XY_y=0.0, YZ_y=0.0, XZ_y=0.0;
-            float XX_z=0.0, YY_z=0.0, ZZ_z=0.0;
-            float XY_z=0.0, YZ_z=0.0, XZ_z=0.0;
+            float XX_x=0, YY_x=0, ZZ_x=0;
+            float XY_x=0, YZ_x=0, XZ_x=0;
+            float XX_y=0, YY_y=0, ZZ_y=0;
+            float XY_y=0, YZ_y=0, XZ_y=0;
+            float XX_z=0, YY_z=0, ZZ_z=0;
+            float XY_z=0, YZ_z=0, XZ_z=0;
             for(i=0; i<27; i++){
                XX_x += basisXX[i]*splineCoeffX[i];
                YY_x += basisYY[i]*splineCoeffX[i];
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index ef3bf832..6db7716e 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -116,7 +116,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
    DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
    DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
 
-   double DTI_cost=0.0, n=0.0;
+   double DTI_cost=0, n=0;
    const double twoThirds = (2.0/3.0);
    DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ;
 #if defined (_OPENMP)
@@ -162,7 +162,7 @@ double reg_dti::GetSimilarityMeasureValue()
    if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype)
    {
       reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-      reg_print_msg_error("Both input images are exepected to have the same type");
+      reg_print_msg_error("Both input images are expected to have the same type");
       reg_exit();
    }
    double DTIMeasureValue;
@@ -197,7 +197,7 @@ double reg_dti::GetSimilarityMeasureValue()
       if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype)
       {
          reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-         reg_print_msg_error("Both input images are exepected to have the same type");
+         reg_print_msg_error("Both input images are expected to have the same type");
          reg_exit();
       }
       switch(this->floatingImagePointer->datatype)
@@ -337,7 +337,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 {
    // Check if the specified time point exists and is active
    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
+   if(this->timePointWeight[current_timepoint]==0)
       return;
 
    // Check if all required input images are of the same data type
@@ -348,7 +348,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
      )
    {
       reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Input images are exepected to be of the same type");
+      reg_print_msg_error("Input images are expected to be of the same type");
       reg_exit();
    }
    // Compute the gradient of the ssd for the forward transformation
@@ -389,7 +389,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
         )
       {
          reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Input images are exepected to be of the same type");
+         reg_print_msg_error("Input images are expected to be of the same type");
          reg_exit();
       }
       // Compute the gradient of the nmi for the backward transformation
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 0cecebc2..f2dcce22 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -1,5 +1,5 @@
 /**
- * @file _reg_ssd.h
+ * @file _reg_dti.h
  * @brief File that contains sum squared difference related function
  * @author Marc Modat
  * @date 19/05/2009
@@ -14,14 +14,12 @@
 
 #pragma once
 
-//#include "_reg_measure.h"
-#include "_reg_ssd.h" // HERE
+#include "_reg_ssd.h"
 
 /* *************************************************************** */
 /* *************************************************************** */
 /// @brief DTI related measure of similarity class
-class reg_dti : public reg_measure
-{
+class reg_dti: public reg_measure {
 public:
     /// @brief reg_dti class constructor
     reg_dti();
@@ -46,9 +44,9 @@ class reg_dti : public reg_measure
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 
 protected:
-   // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
-   unsigned int dtIndicies[6];
-   float currentValue;
+    // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
+    unsigned int dtIndicies[6];
+    float currentValue;
 };
 /* *************************************************************** */
 
@@ -63,8 +61,7 @@ extern "C++" template <class DTYPE>
 double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
-                              unsigned int * dtIndicies
-                             );
+                              unsigned int *dtIndicies);
 
 /** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
@@ -79,8 +76,8 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
  */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
-      nifti_image *warpedImage,
-      nifti_image *warpedGradient,
-      nifti_image *dtiMeasureGradientImage,
-      int *mask,
-      unsigned int * dtIndicies);
+                                         nifti_image *warpedImage,
+                                         nifti_image *warpedGradient,
+                                         nifti_image *dtiMeasureGradientImage,
+                                         int *mask,
+                                         unsigned int *dtIndicies);
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 01bad1f0..2e479761 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -197,11 +197,11 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
 void estimate_rigid_transformation2D(float** points1, float** points2, int num_points, mat44 * transformation)
 {
 
-   double centroid_reference[2] = { 0.0 };
-   double centroid_warped[2] = { 0.0 };
+   double centroid_reference[2] = { 0 };
+   double centroid_warped[2] = { 0 };
 
-   float centroid_referenceFloat[2] = { 0.0 };
-   float centroid_warpedFloat[2] = { 0.0 };
+   float centroid_referenceFloat[2] = { 0 };
+   float centroid_warpedFloat[2] = { 0 };
 
    for (int j = 0; j < num_points; ++j) {
       centroid_reference[0] += (double) points1[j][0];
@@ -249,7 +249,7 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p
    float det = reg_matrix2DDet<float>(r, 2, 2);
 
    // Take care of possible reflection
-   if (det < 0.0) {
+   if (det < 0) {
       v[0][1] = -v[0][1];
       v[1][1] = -v[1][1];
       reg_matrix2DMultiply<float>(v, 2, 2, ut, 2, 2, r, false);
@@ -316,11 +316,11 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m
 void estimate_rigid_transformation3D(float** points1, float** points2, int num_points, mat44 * transformation)
 {
 
-   double centroid_reference[3] = { 0.0 };
-   double centroid_warped[3] = { 0.0 };
+   double centroid_reference[3] = { 0 };
+   double centroid_warped[3] = { 0 };
 
-   float centroid_referenceFloat[3] = { 0.0 };
-   float centroid_warpedFloat[3] = { 0.0 };
+   float centroid_referenceFloat[3] = { 0 };
+   float centroid_warpedFloat[3] = { 0 };
 
 
    for (int j = 0; j < num_points; ++j)
@@ -380,7 +380,7 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p
    float det = reg_matrix2DDet<float>(r, 3, 3);
 
    // Take care of possible reflection
-   if (det < 0.0) {
+   if (det < 0) {
       v[0][2] = -v[0][2];
       v[1][2] = -v[1][2];
       v[2][2] = -v[2][2];
@@ -694,14 +694,14 @@ void optimize_2D(float* referencePosition, float* warpedPosition,
    std::multimap<double, _reg_sorted_point2D> queue;
    std::vector<_reg_sorted_point2D> top_points;
 
-   double distance = 0.0;
+   double distance = 0;
    double lastDistance = std::numeric_limits<double>::max();
    unsigned long i;
 
    // The initial vector with all the input points
    for (unsigned j = 0; j < num_equations; j += 2)
    {
-      top_points.push_back(_reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], 0.0));
+      top_points.push_back(_reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], 0));
    }
    if (affine) {
       estimate_affine_transformation2D(top_points, final);
@@ -731,7 +731,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition,
                                                              _reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], distance)));
       }
 
-      distance = 0.0;
+      distance = 0;
       i = 0;
       top_points.clear();
 
@@ -776,7 +776,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
    // Keep a sorted list of the distance measure
    std::multimap<double, _reg_sorted_point3D> queue;
    std::vector<_reg_sorted_point3D> top_points;
-   double distance = 0.0;
+   double distance = 0;
    double lastDistance = std::numeric_limits<double>::max();
    unsigned long i;
 
@@ -784,7 +784,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
    for (unsigned j = 0; j < num_equations; j+=3) {
       top_points.push_back(_reg_sorted_point3D(&referencePosition[j],
                                                &warpedPosition[j],
-                                               0.0));
+                                               0));
    }
    if (affine) {
       estimate_affine_transformation3D(top_points, final);
@@ -814,7 +814,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
                                                                distance)));
       }
 
-      distance = 0.0;
+      distance = 0;
       i = 0;
       top_points.clear();
       for (std::multimap<double, _reg_sorted_point3D>::iterator it = queue.begin();it != queue.end(); ++it, ++i)
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index af0c8f8e..3adc497b 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -14,11 +14,9 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-reg_kld::reg_kld()
-   : reg_measure()
-{
+reg_kld::reg_kld(): reg_measure() {
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_kld constructor called");
+    reg_print_msg_debug("reg_kld constructor called");
 #endif
 }
 /* *************************************************************** */
@@ -33,50 +31,47 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr)
-{
-   // Set the pointers using the parent class function
-   reg_measure::InitialiseMeasure(refImgPtr,
-                                  floImgPtr,
-                                  maskRefPtr,
-                                  warFloImgPtr,
-                                  warFloGraPtr,
-                                  forVoxBasedGraPtr,
-                                  forwardLocalWeightPtr,
-                                  maskFloPtr,
-                                  warRefImgPtr,
-                                  warRefGraPtr,
-                                  bckVoxBasedGraPtr);
+                                nifti_image *bckVoxBasedGraPtr) {
+    // Set the pointers using the parent class function
+    reg_measure::InitialiseMeasure(refImgPtr,
+                                   floImgPtr,
+                                   maskRefPtr,
+                                   warFloImgPtr,
+                                   warFloGraPtr,
+                                   forVoxBasedGraPtr,
+                                   forwardLocalWeightPtr,
+                                   maskFloPtr,
+                                   warRefImgPtr,
+                                   warRefGraPtr,
+                                   bckVoxBasedGraPtr);
 
-   // Check that the input images have the same number of time point
-   if(this->referenceImagePointer->nt != this->floatingImagePointer->nt)
-   {
-      reg_print_fct_error("reg_kld::InitialiseMeasure");
-      reg_print_msg_error("This number of time point should be the same for both input images");
-      reg_exit();
-   }
-   // Input images are expected to be bounded between 0 and 1 as they
-   // are meant to be probabilities
-   for(int t=0; t<this->referenceImagePointer->nt; ++t){
-      if(this->timePointWeight[t]>0){
-         float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t);
-         float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t);
-         float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t);
-         float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t);
-         if(min_ref<0.f || min_flo<0.f || max_ref>1.f || max_flo>1.f){
-            reg_print_msg_error("The input images are expected to be probabilities to use the kld measure");
-            reg_exit();
-         }
-      }
-   }
+    // Check that the input images have the same number of time point
+    if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) {
+        reg_print_fct_error("reg_kld::InitialiseMeasure");
+        reg_print_msg_error("This number of time point should be the same for both input images");
+        reg_exit();
+    }
+    // Input images are expected to be bounded between 0 and 1 as they
+    // are meant to be probabilities
+    for (int t = 0; t < this->referenceImagePointer->nt; ++t) {
+        if (this->timePointWeight[t] > 0) {
+            float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t);
+            float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t);
+            float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t);
+            float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t);
+            if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) {
+                reg_print_msg_error("The input images are expected to be probabilities to use the kld measure");
+                reg_exit();
+            }
+        }
+    }
 #ifndef NDEBUG
-   char text[255];
-   reg_print_msg_debug("reg_kld::InitialiseMeasure().");
-   for(int i=0; i<this->referenceImagePointer->nt; ++i)
-   {
-      sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-      reg_print_msg_debug(text);
-   }
+    char text[255];
+    reg_print_msg_debug("reg_kld::InitialiseMeasure().");
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
+        reg_print_msg_debug(text);
+    }
 #endif
 }
 /* *************************************************************** */
@@ -86,153 +81,127 @@ double reg_getKLDivergence(nifti_image *referenceImage,
                            nifti_image *warpedImage,
                            double *timePointWeight,
                            nifti_image *jacobianDetImg,
-                           int *mask)
-{
+                           int *mask) {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
 
-   DTYPE *refPtr=static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *warPtr=static_cast<DTYPE *>(warpedImage->data);
-   int *maskPtr=nullptr;
-   bool MrClean=false;
-   if(mask==nullptr)
-   {
-      maskPtr=(int *)calloc(voxelNumber,sizeof(int));
-      MrClean=true;
-   }
-   else maskPtr = &mask[0];
+    DTYPE *refPtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *warPtr = static_cast<DTYPE*>(warpedImage->data);
+    int *maskPtr = nullptr;
+    bool MrClean = false;
+    if (mask == nullptr) {
+        maskPtr = (int*)calloc(voxelNumber, sizeof(int));
+        MrClean = true;
+    } else maskPtr = &mask[0];
 
-   DTYPE *jacPtr=nullptr;
-   if(jacobianDetImg!=nullptr)
-      jacPtr=static_cast<DTYPE *>(jacobianDetImg->data);
-   double measure = 0., measure_tp = 0., num = 0., tempRefValue, tempWarValue, tempValue;
+    DTYPE *jacPtr = nullptr;
+    if (jacobianDetImg != nullptr)
+        jacPtr = static_cast<DTYPE*>(jacobianDetImg->data);
+    double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue;
 
-   for(int time=0; time<referenceImage->nt; ++time)
-   {
-      if(timePointWeight[time]>0)
-      {
-         DTYPE *currentRefPtr=&refPtr[time*voxelNumber];
-         DTYPE *currentWarPtr=&warPtr[time*voxelNumber];
+    for (int time = 0; time < referenceImage->nt; ++time) {
+        if (timePointWeight[time] > 0) {
+            DTYPE *currentRefPtr = &refPtr[time * voxelNumber];
+            DTYPE *currentWarPtr = &warPtr[time * voxelNumber];
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,currentRefPtr, currentWarPtr, \
-   maskPtr, jacobianDetImg, jacPtr) \
-   private(voxel, tempRefValue, tempWarValue, tempValue) \
-   reduction(+:measure_tp) \
-   reduction(+:num)
+    shared(voxelNumber,currentRefPtr, currentWarPtr, \
+    maskPtr, jacobianDetImg, jacPtr) \
+    private(voxel, tempRefValue, tempWarValue, tempValue) \
+    reduction(+:measure_tp) \
+    reduction(+:num)
 #endif
-         for(voxel=0; voxel<voxelNumber; ++voxel)
-         {
-            if(maskPtr[voxel]>-1)
-            {
-               tempRefValue = currentRefPtr[voxel]+1e-16;
-               tempWarValue = currentWarPtr[voxel]+1e-16;
-               tempValue=tempRefValue*log(tempRefValue/tempWarValue);
-               if(tempValue==tempValue &&
-                     tempValue!=std::numeric_limits<double>::infinity())
-               {
-                  if(jacobianDetImg==nullptr)
-                  {
-                     measure_tp -= tempValue;
-                     num++;
-                  }
-                  else
-                  {
-                     measure_tp -= tempValue * jacPtr[voxel];
-                     num+=jacPtr[voxel];
-                  }
-               }
+            for (voxel = 0; voxel < voxelNumber; ++voxel) {
+                if (maskPtr[voxel] > -1) {
+                    tempRefValue = currentRefPtr[voxel] + 1e-16;
+                    tempWarValue = currentWarPtr[voxel] + 1e-16;
+                    tempValue = tempRefValue * log(tempRefValue / tempWarValue);
+                    if (tempValue == tempValue &&
+                        tempValue != std::numeric_limits<double>::infinity()) {
+                        if (jacobianDetImg == nullptr) {
+                            measure_tp -= tempValue;
+                            num++;
+                        } else {
+                            measure_tp -= tempValue * jacPtr[voxel];
+                            num += jacPtr[voxel];
+                        }
+                    }
+                }
             }
-         }
-       measure += measure_tp * timePointWeight[time] / num;
-      }
-   }
-   if(MrClean) free(maskPtr);
-   return measure;
+            measure += measure_tp * timePointWeight[time] / num;
+        }
+    }
+    if (MrClean) free(maskPtr);
+    return measure;
 }
-template double reg_getKLDivergence<float>
-(nifti_image *,nifti_image *,double *,nifti_image *,int *);
-template double reg_getKLDivergence<double>
-(nifti_image *,nifti_image *,double *,nifti_image *,int *);
+template double reg_getKLDivergence<float>(nifti_image*, nifti_image*, double*, nifti_image*, int*);
+template double reg_getKLDivergence<double>(nifti_image*, nifti_image*, double*, nifti_image*, int*);
 /* *************************************************************** */
-double reg_kld::GetSimilarityMeasureValue()
-{
-   // Check that all the specified image are of the same datatype
-   if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype)
-   {
-      reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-      reg_print_msg_error("Both input images are exepected to have the same type");
-      reg_exit();
-   }
-   double KLDValue;
-   switch(this->referenceImagePointer->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      KLDValue = reg_getKLDivergence<float>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer
-             );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      KLDValue = reg_getKLDivergence<double>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer
-             );
-      break;
-   default:
-      reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-      reg_print_msg_error("Warped pixel type unsupported");
-      reg_exit();
-   }
+/* *************************************************************** */
+double reg_kld::GetSimilarityMeasureValue() {
+    // Check that all the specified image are of the same datatype
+    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+        reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
+        reg_print_msg_error("Both input images are expected to have the same type");
+        reg_exit();
+    }
+    double KLDValue;
+    switch (this->referenceImagePointer->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        KLDValue = reg_getKLDivergence<float>(this->referenceImagePointer,
+                                              this->warpedFloatingImagePointer,
+                                              this->timePointWeight,
+                                              nullptr, // TODO this->forwardJacDetImagePointer,
+                                              this->referenceMaskPointer);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        KLDValue = reg_getKLDivergence<double>(this->referenceImagePointer,
+                                               this->warpedFloatingImagePointer,
+                                               this->timePointWeight,
+                                               nullptr, // TODO this->forwardJacDetImagePointer,
+                                               this->referenceMaskPointer);
+        break;
+    default:
+        reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
+        reg_print_msg_error("Warped pixel type unsupported");
+        reg_exit();
+    }
 
-   // Backward computation
-   if(this->isSymmetric)
-   {
-      // Check that all the specified image are of the same datatype
-      if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype)
-      {
-         reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-         reg_print_msg_error("Both input images are exepected to have the same type");
-         reg_exit();
-      }
-      switch(this->floatingImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         KLDValue += reg_getKLDivergence<float>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         KLDValue += reg_getKLDivergence<double>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-         reg_print_msg_error("Warped pixel type unsupported");
-         reg_exit();
-      }
-   }
-   return KLDValue;
+    // Backward computation
+    if (this->isSymmetric) {
+        // Check that all the specified image are of the same datatype
+        if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) {
+            reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
+            reg_print_msg_error("Both input images are expected to have the same type");
+            reg_exit();
+        }
+        switch (this->floatingImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            KLDValue += reg_getKLDivergence<float>(this->floatingImagePointer,
+                                                   this->warpedReferenceImagePointer,
+                                                   this->timePointWeight,
+                                                   nullptr, // TODO this->backwardJacDetImagePointer,
+                                                   this->floatingMaskPointer);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            KLDValue += reg_getKLDivergence<double>(this->floatingImagePointer,
+                                                    this->warpedReferenceImagePointer,
+                                                    this->timePointWeight,
+                                                    nullptr, // TODO this->backwardJacDetImagePointer,
+                                                    this->floatingMaskPointer);
+            break;
+        default:
+            reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
+            reg_print_msg_error("Warped pixel type unsupported");
+            reg_exit();
+        }
+    }
+    return KLDValue;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -244,217 +213,193 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                                            nifti_image *jacobianDetImg,
                                            int *mask,
                                            int current_timepoint,
-                                 double timepoint_weight)
-{
+                                           double timepoint_weight) {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t  voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
 
-   DTYPE *refImagePtr=static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *warImagePtr=static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber];
-   DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber];
-   int *maskPtr=nullptr;
-   bool MrClean=false;
-   if(mask==nullptr)
-   {
-      maskPtr=(int *)calloc(voxelNumber,sizeof(int));
-      MrClean=true;
-   }
-   else maskPtr = &mask[0];
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    int *maskPtr = nullptr;
+    bool MrClean = false;
+    if (mask == nullptr) {
+        maskPtr = (int*)calloc(voxelNumber, sizeof(int));
+        MrClean = true;
+    } else maskPtr = &mask[0];
 
-   DTYPE *jacPtr=nullptr;
-   if(jacobianDetImg!=nullptr)
-      jacPtr=static_cast<DTYPE *>(jacobianDetImg->data);
-   double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue;
+    DTYPE *jacPtr = nullptr;
+    if (jacobianDetImg != nullptr)
+        jacPtr = static_cast<DTYPE*>(jacobianDetImg->data);
+    double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue;
 
-   // Create pointers to the spatial gradient of the current warped volume
-   DTYPE *currentGradPtrX=static_cast<DTYPE *>(warpedImageGradient->data);
-   DTYPE *currentGradPtrY=&currentGradPtrX[voxelNumber];
-   DTYPE *currentGradPtrZ=nullptr;
-   if(referenceImage->nz>1)
-      currentGradPtrZ=&currentGradPtrY[voxelNumber];
+    // Create pointers to the spatial gradient of the current warped volume
+    DTYPE *currentGradPtrX = static_cast<DTYPE*>(warpedImageGradient->data);
+    DTYPE *currentGradPtrY = &currentGradPtrX[voxelNumber];
+    DTYPE *currentGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        currentGradPtrZ = &currentGradPtrY[voxelNumber];
 
-   // Create pointers to the kld gradient image
-   DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradient->data);
-   DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = nullptr;
-   if(referenceImage->nz>1)
-      measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    // Create pointers to the kld gradient image
+    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradient->data);
+    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DTYPE *measureGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
-   // find number of active voxels and correct weight
-   double activeVoxel_num = 0.0;
-   for (voxel = 0; voxel < voxelNumber; voxel++)
-   {
-      if (mask[voxel]>-1)
-      {
-         if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
-            activeVoxel_num += 1.0;
-      }
-   }
-   double adjusted_weight = timepoint_weight / activeVoxel_num;
+    // find number of active voxels and correct weight
+    double activeVoxel_num = 0;
+    for (voxel = 0; voxel < voxelNumber; voxel++) {
+        if (mask[voxel] > -1) {
+            if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
+                activeVoxel_num += 1.0;
+        }
+    }
+    double adjusted_weight = timepoint_weight / activeVoxel_num;
 
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,currentRefPtr, currentWarPtr, \
-   maskPtr, jacobianDetImg, jacPtr, referenceImage, \
-   measureGradPtrX, measureGradPtrY, measureGradPtrZ, \
-   currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \
-   private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \
-   tempRefValue, tempWarValue)
+    shared(voxelNumber,currentRefPtr, currentWarPtr, \
+    maskPtr, jacobianDetImg, jacPtr, referenceImage, \
+    measureGradPtrX, measureGradPtrY, measureGradPtrZ, \
+    currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \
+    private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \
+    tempRefValue, tempWarValue)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // Check if the current voxel is in the mask
-      if(maskPtr[voxel]>-1)
-      {
-         // Read referenceImage and warpedImage probabilities and compute the ratio
-         tempRefValue = currentRefPtr[voxel]+1e-16;
-         tempWarValue = currentWarPtr[voxel]+1e-16;
-         tempValue=(currentRefPtr[voxel]+1e-16)/(currentWarPtr[voxel]+1e-16);
-         // Check if the intensity ratio is defined and different from zero
-         if(tempValue==tempValue &&
-               tempValue!=std::numeric_limits<double>::infinity() &&
-               tempValue>0)
-         {
-            tempValue = tempRefValue / tempWarValue;
-            tempValue *= adjusted_weight;
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // Check if the current voxel is in the mask
+        if (maskPtr[voxel] > -1) {
+            // Read referenceImage and warpedImage probabilities and compute the ratio
+            tempRefValue = currentRefPtr[voxel] + 1e-16;
+            tempWarValue = currentWarPtr[voxel] + 1e-16;
+            tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16);
+            // Check if the intensity ratio is defined and different from zero
+            if (tempValue == tempValue &&
+                tempValue != std::numeric_limits<double>::infinity() &&
+                tempValue > 0) {
+                tempValue = tempRefValue / tempWarValue;
+                tempValue *= adjusted_weight;
 
-            // Jacobian modulation if the Jacobian determinant image is defined
-            if(jacobianDetImg!=nullptr)
-               tempValue *= jacPtr[voxel];
+                // Jacobian modulation if the Jacobian determinant image is defined
+                if (jacobianDetImg != nullptr)
+                    tempValue *= jacPtr[voxel];
 
-            // Ensure that gradient of the warpedImage image along x-axis is not NaN
-            tempGradX=currentGradPtrX[voxel];
-            if(tempGradX==tempGradX)
-               // Update the gradient along the x-axis
-               measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX);
+                // Ensure that gradient of the warpedImage image along x-axis is not NaN
+                tempGradX = currentGradPtrX[voxel];
+                if (tempGradX == tempGradX)
+                    // Update the gradient along the x-axis
+                    measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX);
 
-            // Ensure that gradient of the warpedImage image along y-axis is not NaN
-            tempGradY=currentGradPtrY[voxel];
-            if(tempGradY==tempGradY)
-               // Update the gradient along the y-axis
-               measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY);
+                // Ensure that gradient of the warpedImage image along y-axis is not NaN
+                tempGradY = currentGradPtrY[voxel];
+                if (tempGradY == tempGradY)
+                    // Update the gradient along the y-axis
+                    measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY);
 
-            // Check if the current images are 3D
-            if(referenceImage->nz>1)
-            {
-               // Ensure that gradient of the warpedImage image along z-axis is not NaN
-               tempGradZ=currentGradPtrZ[voxel];
-               if(tempGradZ==tempGradZ)
-                  // Update the gradient along the z-axis
-                  measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ);
+                // Check if the current images are 3D
+                if (referenceImage->nz > 1) {
+                    // Ensure that gradient of the warpedImage image along z-axis is not NaN
+                    tempGradZ = currentGradPtrZ[voxel];
+                    if (tempGradZ == tempGradZ)
+                        // Update the gradient along the z-axis
+                        measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ);
+                }
             }
-         }
-      }
-   }
-   if(MrClean) free(maskPtr);
+        }
+    }
+    if (MrClean) free(maskPtr);
 }
 template void reg_getKLDivergenceVoxelBasedGradient<float>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double);
+(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double);
 template void reg_getKLDivergenceVoxelBasedGradient<double>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double);
+(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double);
+/* *************************************************************** */
 /* *************************************************************** */
-void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
-      return;
+void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+    if (this->timePointWeight[current_timepoint] == 0)
+        return;
 
-   // Check if all required input images are of the same data type
-   int dtype = this->referenceImagePointer->datatype;
-   if(this->warpedFloatingImagePointer->datatype != dtype ||
-         this->warpedFloatingGradientImagePointer->datatype != dtype ||
-         this->forwardVoxelBasedGradientImagePointer->datatype != dtype
-         )
-   {
-      reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Input images are exepected to be of the same type");
-      reg_exit();
-   }
-   // Compute the gradient of the kld for the forward transformation
-   switch(dtype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_getKLDivergenceVoxelBasedGradient<float>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->warpedFloatingGradientImagePointer,
-             this->forwardVoxelBasedGradientImagePointer,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-             current_timepoint,
-          this->timePointWeight[current_timepoint]
-             );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_getKLDivergenceVoxelBasedGradient<double>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->warpedFloatingGradientImagePointer,
-             this->forwardVoxelBasedGradientImagePointer,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-          current_timepoint,
-          this->timePointWeight[current_timepoint]
-             );
-      break;
-   default:
-      reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-   // Compute the gradient of the kld for the backward transformation
-   if(this->isSymmetric)
-   {
-      dtype = this->floatingImagePointer->datatype;
-      if(this->warpedReferenceImagePointer->datatype != dtype ||
+    // Check if all required input images are of the same data type
+    int dtype = this->referenceImagePointer->datatype;
+    if (this->warpedFloatingImagePointer->datatype != dtype ||
+        this->warpedFloatingGradientImagePointer->datatype != dtype ||
+        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
+    // Compute the gradient of the kld for the forward transformation
+    switch (dtype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_getKLDivergenceVoxelBasedGradient<float>(this->referenceImagePointer,
+                                                     this->warpedFloatingImagePointer,
+                                                     this->warpedFloatingGradientImagePointer,
+                                                     this->forwardVoxelBasedGradientImagePointer,
+                                                     nullptr, // TODO this->forwardJacDetImagePointer,
+                                                     this->referenceMaskPointer,
+                                                     current_timepoint,
+                                                     this->timePointWeight[current_timepoint]);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_getKLDivergenceVoxelBasedGradient<double>(this->referenceImagePointer,
+                                                      this->warpedFloatingImagePointer,
+                                                      this->warpedFloatingGradientImagePointer,
+                                                      this->forwardVoxelBasedGradientImagePointer,
+                                                      nullptr, // TODO this->forwardJacDetImagePointer,
+                                                      this->referenceMaskPointer,
+                                                      current_timepoint,
+                                                      this->timePointWeight[current_timepoint]);
+        break;
+    default:
+        reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+    // Compute the gradient of the kld for the backward transformation
+    if (this->isSymmetric) {
+        dtype = this->floatingImagePointer->datatype;
+        if (this->warpedReferenceImagePointer->datatype != dtype ||
             this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype
-            )
-      {
-         reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Input images are exepected to be of the same type");
-         reg_exit();
-      }
-      // Compute the gradient of the nmi for the backward transformation
-      switch(dtype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getKLDivergenceVoxelBasedGradient<float>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->warpedReferenceGradientImagePointer,
-                this->backwardVoxelBasedGradientImagePointer,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-            current_timepoint,
-            this->timePointWeight[current_timepoint]
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getKLDivergenceVoxelBasedGradient<double>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->warpedReferenceGradientImagePointer,
-                this->backwardVoxelBasedGradientImagePointer,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-            current_timepoint,
-            this->timePointWeight[current_timepoint]
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
+            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+            reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Input images are expected to be of the same type");
+            reg_exit();
+        }
+        // Compute the gradient of the nmi for the backward transformation
+        switch (dtype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getKLDivergenceVoxelBasedGradient<float>(this->floatingImagePointer,
+                                                         this->warpedReferenceImagePointer,
+                                                         this->warpedReferenceGradientImagePointer,
+                                                         this->backwardVoxelBasedGradientImagePointer,
+                                                         nullptr, // TODO this->backwardJacDetImagePointer,
+                                                         this->floatingMaskPointer,
+                                                         current_timepoint,
+                                                         this->timePointWeight[current_timepoint]);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getKLDivergenceVoxelBasedGradient<double>(this->floatingImagePointer,
+                                                          this->warpedReferenceImagePointer,
+                                                          this->warpedReferenceGradientImagePointer,
+                                                          this->backwardVoxelBasedGradientImagePointer,
+                                                          nullptr, // TODO this->backwardJacDetImagePointer,
+                                                          this->floatingMaskPointer,
+                                                          current_timepoint,
+                                                          this->timePointWeight[current_timepoint]);
+            break;
+        default:
+            reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Unsupported datatype");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index 71efcaef..22f34a21 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -14,9 +14,8 @@
 
 #include "_reg_measure.h"
 
-/* *************************************************************** */
-class reg_kld : public reg_measure
-{
+ /* *************************************************************** */
+class reg_kld: public reg_measure {
 public:
    /// @brief reg_kld class constructor
    reg_kld();
@@ -84,5 +83,5 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
                                            nifti_image *jacobianDeterminantImage,
                                            int *mask,
                                            int current_timepoint,
-                                 double timepoint_weight);
+                                           double timepoint_weight);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 89dac79b..cbdd88c1 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -12,75 +12,72 @@
 
 #include "_reg_lncc.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
-reg_lncc::reg_lncc()
-   : reg_measure()
-{
-   this->forwardCorrelationImage=nullptr;
-   this->referenceMeanImage=nullptr;
-   this->referenceSdevImage=nullptr;
-   this->warpedFloatingMeanImage=nullptr;
-   this->warpedFloatingSdevImage=nullptr;
-   this->forwardMask = nullptr;
-
-   this->backwardCorrelationImage=nullptr;
-   this->floatingMeanImage=nullptr;
-   this->floatingSdevImage=nullptr;
-   this->warpedReferenceMeanImage=nullptr;
-   this->warpedReferenceSdevImage=nullptr;
-   this->backwardMask = nullptr;
-
-   // Gaussian kernel is used by default
-   this->kernelType=GAUSSIAN_KERNEL;
-
-   for(int i=0; i<255; ++i)
-      kernelStandardDeviation[i]=-5.f;
+ /* *************************************************************** */
+ /* *************************************************************** */
+reg_lncc::reg_lncc(): reg_measure() {
+    this->forwardCorrelationImage = nullptr;
+    this->referenceMeanImage = nullptr;
+    this->referenceSdevImage = nullptr;
+    this->warpedFloatingMeanImage = nullptr;
+    this->warpedFloatingSdevImage = nullptr;
+    this->forwardMask = nullptr;
+
+    this->backwardCorrelationImage = nullptr;
+    this->floatingMeanImage = nullptr;
+    this->floatingSdevImage = nullptr;
+    this->warpedReferenceMeanImage = nullptr;
+    this->warpedReferenceSdevImage = nullptr;
+    this->backwardMask = nullptr;
+
+    // Gaussian kernel is used by default
+    this->kernelType = GAUSSIAN_KERNEL;
+
+    for (int i = 0; i < 255; ++i)
+        kernelStandardDeviation[i] = -5.f;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_lncc constructor called");
+    reg_print_msg_debug("reg_lncc constructor called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-reg_lncc::~reg_lncc()
-{
-   if(this->forwardCorrelationImage!=nullptr)
-      nifti_image_free(this->forwardCorrelationImage);
-   this->forwardCorrelationImage=nullptr;
-   if(this->referenceMeanImage!=nullptr)
-      nifti_image_free(this->referenceMeanImage);
-   this->referenceMeanImage=nullptr;
-   if(this->referenceSdevImage!=nullptr)
-      nifti_image_free(this->referenceSdevImage);
-   this->referenceSdevImage=nullptr;
-   if(this->warpedFloatingMeanImage!=nullptr)
-      nifti_image_free(this->warpedFloatingMeanImage);
-   this->warpedFloatingMeanImage=nullptr;
-   if(this->warpedFloatingSdevImage!=nullptr)
-      nifti_image_free(this->warpedFloatingSdevImage);
-   this->warpedFloatingSdevImage=nullptr;
-   if(this->forwardMask!=nullptr)
-      free(this->forwardMask);
-   this->forwardMask=nullptr;
-
-   if(this->backwardCorrelationImage!=nullptr)
-      nifti_image_free(this->backwardCorrelationImage);
-   this->backwardCorrelationImage=nullptr;
-   if(this->floatingMeanImage!=nullptr)
-      nifti_image_free(this->floatingMeanImage);
-   this->floatingMeanImage=nullptr;
-   if(this->floatingSdevImage!=nullptr)
-      nifti_image_free(this->floatingSdevImage);
-   this->floatingSdevImage=nullptr;
-   if(this->warpedReferenceMeanImage!=nullptr)
-      nifti_image_free(this->warpedReferenceMeanImage);
-   this->warpedReferenceMeanImage=nullptr;
-   if(this->warpedReferenceSdevImage!=nullptr)
-      nifti_image_free(this->warpedReferenceSdevImage);
-   this->warpedReferenceSdevImage=nullptr;
-   if(this->backwardMask!=nullptr)
-      free(this->backwardMask);
-   this->backwardMask=nullptr;
+reg_lncc::~reg_lncc() {
+    if (this->forwardCorrelationImage != nullptr)
+        nifti_image_free(this->forwardCorrelationImage);
+    this->forwardCorrelationImage = nullptr;
+    if (this->referenceMeanImage != nullptr)
+        nifti_image_free(this->referenceMeanImage);
+    this->referenceMeanImage = nullptr;
+    if (this->referenceSdevImage != nullptr)
+        nifti_image_free(this->referenceSdevImage);
+    this->referenceSdevImage = nullptr;
+    if (this->warpedFloatingMeanImage != nullptr)
+        nifti_image_free(this->warpedFloatingMeanImage);
+    this->warpedFloatingMeanImage = nullptr;
+    if (this->warpedFloatingSdevImage != nullptr)
+        nifti_image_free(this->warpedFloatingSdevImage);
+    this->warpedFloatingSdevImage = nullptr;
+    if (this->forwardMask != nullptr)
+        free(this->forwardMask);
+    this->forwardMask = nullptr;
+
+    if (this->backwardCorrelationImage != nullptr)
+        nifti_image_free(this->backwardCorrelationImage);
+    this->backwardCorrelationImage = nullptr;
+    if (this->floatingMeanImage != nullptr)
+        nifti_image_free(this->floatingMeanImage);
+    this->floatingMeanImage = nullptr;
+    if (this->floatingSdevImage != nullptr)
+        nifti_image_free(this->floatingSdevImage);
+    this->floatingSdevImage = nullptr;
+    if (this->warpedReferenceMeanImage != nullptr)
+        nifti_image_free(this->warpedReferenceMeanImage);
+    this->warpedReferenceMeanImage = nullptr;
+    if (this->warpedReferenceSdevImage != nullptr)
+        nifti_image_free(this->warpedReferenceSdevImage);
+    this->warpedReferenceSdevImage = nullptr;
+    if (this->backwardMask != nullptr)
+        free(this->backwardMask);
+    this->backwardMask = nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -93,61 +90,51 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
                                      nifti_image *stdDevWarImage,
                                      int *refMask,
                                      int *combinedMask,
-                                     int current_timepoint)
-{
-   // Generate the foward mask to ignore all NaN values
+                                     int current_timepoint) {
+    // Generate the forward mask to ignore all NaN values
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)refImage->nx*refImage->ny*refImage->nz;
+    long voxel;
+    long voxelNumber = long(refImage->nx * refImage->ny * refImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber = (size_t)refImage->nx*refImage->ny*refImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
 #endif
-   memcpy(combinedMask, refMask, voxelNumber*sizeof(int));
-   reg_tools_removeNanFromMask(refImage, combinedMask);
-   reg_tools_removeNanFromMask(warImage, combinedMask);
-
-   DTYPE *origRefPtr = static_cast<DTYPE *>(refImage->data);
-   DTYPE *meanRefPtr = static_cast<DTYPE *>(meanRefImage->data);
-   DTYPE *sdevRefPtr = static_cast<DTYPE *>(stdDevRefImage->data);
-   memcpy(meanRefPtr, &origRefPtr[current_timepoint*voxelNumber],
-         voxelNumber*refImage->nbyper);
-   memcpy(sdevRefPtr, &origRefPtr[current_timepoint*voxelNumber],
-         voxelNumber*refImage->nbyper);
-
-   reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage);
-   reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation,
-                               this->kernelType, combinedMask);
-   reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation,
-                               this->kernelType, combinedMask);
-
-   DTYPE *origWarPtr = static_cast<DTYPE *>(warImage->data);
-   DTYPE *meanWarPtr = static_cast<DTYPE *>(meanWarImage->data);
-   DTYPE *sdevWarPtr = static_cast<DTYPE *>(stdDevWarImage->data);
-   memcpy(meanWarPtr, &origWarPtr[current_timepoint*voxelNumber],
-         voxelNumber*warImage->nbyper);
-   memcpy(sdevWarPtr, &origWarPtr[current_timepoint*voxelNumber],
-         voxelNumber*warImage->nbyper);
-
-   reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage);
-   reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation,
-                               this->kernelType, combinedMask);
-   reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation,
-                               this->kernelType, combinedMask);
+    memcpy(combinedMask, refMask, voxelNumber * sizeof(int));
+    reg_tools_removeNanFromMask(refImage, combinedMask);
+    reg_tools_removeNanFromMask(warImage, combinedMask);
+
+    DTYPE *origRefPtr = static_cast<DTYPE*>(refImage->data);
+    DTYPE *meanRefPtr = static_cast<DTYPE*>(meanRefImage->data);
+    DTYPE *sdevRefPtr = static_cast<DTYPE*>(stdDevRefImage->data);
+    memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
+    memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
+
+    reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage);
+    reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+
+    DTYPE *origWarPtr = static_cast<DTYPE*>(warImage->data);
+    DTYPE *meanWarPtr = static_cast<DTYPE*>(meanWarImage->data);
+    DTYPE *sdevWarPtr = static_cast<DTYPE*>(stdDevWarImage->data);
+    memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
+    memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
+
+    reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage);
+    reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \
-   private(voxel)
+    shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \
+    private(voxel)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // G*(I^2) - (G*I)^2
-      sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel]));
-      sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel]));
-      // Stabilise the computation
-      if(sdevRefPtr[voxel]<1.e-06) sdevRefPtr[voxel]=static_cast<DTYPE>(0);
-      if(sdevWarPtr[voxel]<1.e-06) sdevWarPtr[voxel]=static_cast<DTYPE>(0);
-   }
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // G*(I^2) - (G*I)^2
+        sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel]));
+        sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel]));
+        // Stabilise the computation
+        if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = static_cast<DTYPE>(0);
+        if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = static_cast<DTYPE>(0);
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -161,146 +148,125 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
                                  int *maskFloPtr,
                                  nifti_image *warRefImgPtr,
                                  nifti_image *warRefGraPtr,
-                                 nifti_image *bckVoxBasedGraPtr)
-{
-   reg_measure::InitialiseMeasure(refImgPtr,
-                                  floImgPtr,
-                                  maskRefPtr,
-                                  warFloImgPtr,
-                                  warFloGraPtr,
-                                  forVoxBasedGraPtr,
-                                  forwardLocalWeightPtr,
-                                  maskFloPtr,
-                                  warRefImgPtr,
-                                  warRefGraPtr,
-                                  bckVoxBasedGraPtr);
-
-   for(int i=0; i<this->referenceImagePointer->nt; ++i)
-   {
-      if(this->timePointWeight[i]>0.0)
-      {
-         reg_intensityRescale(this->referenceImagePointer,
-                              i,
-                              0.f,
-                              1.f);
-         reg_intensityRescale(this->floatingImagePointer,
-                              i,
-                              0.f,
-                              1.f);
-      }
-   }
-
-   // Check that no images are already allocated
-   if(this->forwardCorrelationImage!=nullptr)
-      nifti_image_free(this->forwardCorrelationImage);
-   this->forwardCorrelationImage=nullptr;
-   if(this->referenceMeanImage!=nullptr)
-      nifti_image_free(this->referenceMeanImage);
-   this->referenceMeanImage=nullptr;
-   if(this->referenceSdevImage!=nullptr)
-      nifti_image_free(this->referenceSdevImage);
-   this->referenceSdevImage=nullptr;
-   if(this->warpedFloatingMeanImage!=nullptr)
-      nifti_image_free(this->warpedFloatingMeanImage);
-   this->warpedFloatingMeanImage=nullptr;
-   if(this->warpedFloatingSdevImage!=nullptr)
-      nifti_image_free(this->warpedFloatingSdevImage);
-   this->warpedFloatingSdevImage=nullptr;
-   if(this->backwardCorrelationImage!=nullptr)
-      nifti_image_free(this->backwardCorrelationImage);
-   this->backwardCorrelationImage=nullptr;
-   if(this->floatingMeanImage!=nullptr)
-      nifti_image_free(this->floatingMeanImage);
-   this->floatingMeanImage=nullptr;
-   if(this->floatingSdevImage!=nullptr)
-      nifti_image_free(this->floatingSdevImage);
-   this->floatingSdevImage=nullptr;
-   if(this->warpedReferenceMeanImage!=nullptr)
-      nifti_image_free(this->warpedReferenceMeanImage);
-   this->warpedReferenceMeanImage=nullptr;
-   if(this->warpedReferenceSdevImage!=nullptr)
-      nifti_image_free(this->warpedReferenceSdevImage);
-   this->warpedReferenceSdevImage=nullptr;
-   if(this->forwardMask!=nullptr)
-      free(this->forwardMask);
-   this->forwardMask=nullptr;
-   if(this->backwardMask!=nullptr)
-      free(this->backwardMask);
-   this->backwardMask=nullptr;
-
-   //
-   size_t voxelNumber = (size_t)this->referenceImagePointer->nx *
-         this->referenceImagePointer->ny * this->referenceImagePointer->nz;
-
-   // Allocate the required image to store the correlation of the forward transformation
-   this->forwardCorrelationImage=nifti_copy_nim_info(this->referenceImagePointer);
-   this->forwardCorrelationImage->ndim=this->forwardCorrelationImage->dim[0]=this->referenceImagePointer->nz>1?3:2;
-   this->forwardCorrelationImage->nt=this->forwardCorrelationImage->dim[4]=1;
-   this->forwardCorrelationImage->nvox=voxelNumber;
-   this->forwardCorrelationImage->data=(void *)malloc(voxelNumber *
-                                                      this->forwardCorrelationImage->nbyper);
-
-   // Allocate the required images to store mean and stdev of the reference image
-   this->referenceMeanImage=nifti_copy_nim_info(this->forwardCorrelationImage);
-   this->referenceMeanImage->data=(void *)malloc(this->referenceMeanImage->nvox *
-                                                 this->referenceMeanImage->nbyper);
-
-   this->referenceSdevImage=nifti_copy_nim_info(this->forwardCorrelationImage);
-   this->referenceSdevImage->data=(void *)malloc(this->referenceSdevImage->nvox *
-                                                 this->referenceSdevImage->nbyper);
-
-   // Allocate the required images to store mean and stdev of the warped floating image
-   this->warpedFloatingMeanImage=nifti_copy_nim_info(this->forwardCorrelationImage);
-   this->warpedFloatingMeanImage->data=(void *)malloc(this->warpedFloatingMeanImage->nvox *
-                                                      this->warpedFloatingMeanImage->nbyper);
-
-   this->warpedFloatingSdevImage=nifti_copy_nim_info(this->forwardCorrelationImage);
-   this->warpedFloatingSdevImage->data=(void *)malloc(this->warpedFloatingSdevImage->nvox *
-                                                      this->warpedFloatingSdevImage->nbyper);
-
-   // Allocate the array to store the mask of the forward image
-   this->forwardMask=(int *)malloc(voxelNumber*sizeof(int));
-   if(this->isSymmetric)
-   {
-      voxelNumber = (size_t)floatingImagePointer->nx *
-            floatingImagePointer->ny * floatingImagePointer->nz;
-      // Allocate the required image to store the correlation of the backward transformation
-      this->backwardCorrelationImage=nifti_copy_nim_info(this->floatingImagePointer);
-      this->backwardCorrelationImage->ndim=this->backwardCorrelationImage->dim[0]=this->floatingImagePointer->nz>1?3:2;
-      this->backwardCorrelationImage->nt=this->backwardCorrelationImage->dim[4]=1;
-      this->backwardCorrelationImage->nvox=voxelNumber;
-      this->backwardCorrelationImage->data=(void *)malloc(voxelNumber *
-                                                          this->backwardCorrelationImage->nbyper);
-
-      // Allocate the required images to store mean and stdev of the floating image
-      this->floatingMeanImage=nifti_copy_nim_info(this->backwardCorrelationImage);
-      this->floatingMeanImage->data=(void *)malloc(this->floatingMeanImage->nvox *
-                                                   this->floatingMeanImage->nbyper);
-
-      this->floatingSdevImage=nifti_copy_nim_info(this->backwardCorrelationImage);
-      this->floatingSdevImage->data=(void *)malloc(this->floatingSdevImage->nvox *
-                                                   this->floatingSdevImage->nbyper);
-
-      // Allocate the required images to store mean and stdev of the warped reference image
-      this->warpedReferenceMeanImage=nifti_copy_nim_info(this->backwardCorrelationImage);
-      this->warpedReferenceMeanImage->data=(void *)malloc(this->warpedReferenceMeanImage->nvox *
-                                                          this->warpedReferenceMeanImage->nbyper);
-
-      this->warpedReferenceSdevImage=nifti_copy_nim_info(this->backwardCorrelationImage);
-      this->warpedReferenceSdevImage->data=(void *)malloc(this->warpedReferenceSdevImage->nvox *
-                                                          this->warpedReferenceSdevImage->nbyper);
-
-      // Allocate the array to store the mask of the backward image
-      this->backwardMask=(int *)malloc(voxelNumber*sizeof(int));
-   }
+                                 nifti_image *bckVoxBasedGraPtr) {
+    reg_measure::InitialiseMeasure(refImgPtr,
+                                   floImgPtr,
+                                   maskRefPtr,
+                                   warFloImgPtr,
+                                   warFloGraPtr,
+                                   forVoxBasedGraPtr,
+                                   forwardLocalWeightPtr,
+                                   maskFloPtr,
+                                   warRefImgPtr,
+                                   warRefGraPtr,
+                                   bckVoxBasedGraPtr);
+
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        if (this->timePointWeight[i] > 0) {
+            reg_intensityRescale(this->referenceImagePointer, i, 0.f, 1.f);
+            reg_intensityRescale(this->floatingImagePointer, i, 0.f, 1.f);
+        }
+    }
+
+    // Check that no images are already allocated
+    if (this->forwardCorrelationImage != nullptr)
+        nifti_image_free(this->forwardCorrelationImage);
+    this->forwardCorrelationImage = nullptr;
+    if (this->referenceMeanImage != nullptr)
+        nifti_image_free(this->referenceMeanImage);
+    this->referenceMeanImage = nullptr;
+    if (this->referenceSdevImage != nullptr)
+        nifti_image_free(this->referenceSdevImage);
+    this->referenceSdevImage = nullptr;
+    if (this->warpedFloatingMeanImage != nullptr)
+        nifti_image_free(this->warpedFloatingMeanImage);
+    this->warpedFloatingMeanImage = nullptr;
+    if (this->warpedFloatingSdevImage != nullptr)
+        nifti_image_free(this->warpedFloatingSdevImage);
+    this->warpedFloatingSdevImage = nullptr;
+    if (this->backwardCorrelationImage != nullptr)
+        nifti_image_free(this->backwardCorrelationImage);
+    this->backwardCorrelationImage = nullptr;
+    if (this->floatingMeanImage != nullptr)
+        nifti_image_free(this->floatingMeanImage);
+    this->floatingMeanImage = nullptr;
+    if (this->floatingSdevImage != nullptr)
+        nifti_image_free(this->floatingSdevImage);
+    this->floatingSdevImage = nullptr;
+    if (this->warpedReferenceMeanImage != nullptr)
+        nifti_image_free(this->warpedReferenceMeanImage);
+    this->warpedReferenceMeanImage = nullptr;
+    if (this->warpedReferenceSdevImage != nullptr)
+        nifti_image_free(this->warpedReferenceSdevImage);
+    this->warpedReferenceSdevImage = nullptr;
+    if (this->forwardMask != nullptr)
+        free(this->forwardMask);
+    this->forwardMask = nullptr;
+    if (this->backwardMask != nullptr)
+        free(this->backwardMask);
+    this->backwardMask = nullptr;
+
+    size_t voxelNumber = size_t(this->referenceImagePointer->nx *
+                                this->referenceImagePointer->ny *
+                                this->referenceImagePointer->nz);
+
+    // Allocate the required image to store the correlation of the forward transformation
+    this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer);
+    this->forwardCorrelationImage->ndim = this->forwardCorrelationImage->dim[0] = this->referenceImagePointer->nz > 1 ? 3 : 2;
+    this->forwardCorrelationImage->nt = this->forwardCorrelationImage->dim[4] = 1;
+    this->forwardCorrelationImage->nvox = voxelNumber;
+    this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper);
+
+    // Allocate the required images to store mean and stdev of the reference image
+    this->referenceMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage);
+    this->referenceMeanImage->data = malloc(this->referenceMeanImage->nvox * this->referenceMeanImage->nbyper);
+
+    this->referenceSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage);
+    this->referenceSdevImage->data = malloc(this->referenceSdevImage->nvox * this->referenceSdevImage->nbyper);
+
+    // Allocate the required images to store mean and stdev of the warped floating image
+    this->warpedFloatingMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage);
+    this->warpedFloatingMeanImage->data = malloc(this->warpedFloatingMeanImage->nvox * this->warpedFloatingMeanImage->nbyper);
+
+    this->warpedFloatingSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage);
+    this->warpedFloatingSdevImage->data = malloc(this->warpedFloatingSdevImage->nvox * this->warpedFloatingSdevImage->nbyper);
+
+    // Allocate the array to store the mask of the forward image
+    this->forwardMask = (int*)malloc(voxelNumber * sizeof(int));
+    if (this->isSymmetric) {
+        voxelNumber = size_t(floatingImagePointer->nx * floatingImagePointer->ny * floatingImagePointer->nz);
+
+        // Allocate the required image to store the correlation of the backward transformation
+        this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer);
+        this->backwardCorrelationImage->ndim = this->backwardCorrelationImage->dim[0] = this->floatingImagePointer->nz > 1 ? 3 : 2;
+        this->backwardCorrelationImage->nt = this->backwardCorrelationImage->dim[4] = 1;
+        this->backwardCorrelationImage->nvox = voxelNumber;
+        this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper);
+
+        // Allocate the required images to store mean and stdev of the floating image
+        this->floatingMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage);
+        this->floatingMeanImage->data = malloc(this->floatingMeanImage->nvox * this->floatingMeanImage->nbyper);
+
+        this->floatingSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage);
+        this->floatingSdevImage->data = malloc(this->floatingSdevImage->nvox * this->floatingSdevImage->nbyper);
+
+        // Allocate the required images to store mean and stdev of the warped reference image
+        this->warpedReferenceMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage);
+        this->warpedReferenceMeanImage->data = malloc(this->warpedReferenceMeanImage->nvox * this->warpedReferenceMeanImage->nbyper);
+
+        this->warpedReferenceSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage);
+        this->warpedReferenceSdevImage->data = malloc(this->warpedReferenceSdevImage->nvox * this->warpedReferenceSdevImage->nbyper);
+
+        // Allocate the array to store the mask of the backward image
+        this->backwardMask = (int*)malloc(voxelNumber * sizeof(int));
+    }
 #ifndef NDEBUG
-   char text[255];
-   reg_print_msg_debug("reg_lncc::InitialiseMeasure().");
-   for(int i=0; i<this->referenceImagePointer->nt; ++i)
-   {
-      sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-      reg_print_msg_debug(text);
-   }
+    char text[255];
+    reg_print_msg_debug("reg_lncc::InitialiseMeasure().");
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
+        reg_print_msg_debug(text);
+    }
 #endif
 }
 /* *************************************************************** */
@@ -316,199 +282,180 @@ double reg_getLNCCValue(nifti_image *referenceImage,
                         float *kernelStandardDeviation,
                         nifti_image *correlationImage,
                         int kernelType,
-                        int current_timepoint)
-{
+                        int current_timepoint) {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber=(long)referenceImage->nx*
-         referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber=(size_t)referenceImage->nx*
-         referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
 
-   // Compute the local correlation
-   DTYPE *refImagePtr=static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber];
+    // Compute the local correlation
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
 
-   DTYPE *warImagePtr=static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber];
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-   DTYPE *refMeanPtr=static_cast<DTYPE *>(referenceMeanImage->data);
-   DTYPE *warMeanPtr=static_cast<DTYPE *>(warpedMeanImage->data);
-   DTYPE *refSdevPtr=static_cast<DTYPE *>(referenceSdevImage->data);
-   DTYPE *warSdevPtr=static_cast<DTYPE *>(warpedSdevImage->data);
-   DTYPE *correlaPtr=static_cast<DTYPE *>(correlationImage->data);
+    DTYPE *refMeanPtr = static_cast<DTYPE*>(referenceMeanImage->data);
+    DTYPE *warMeanPtr = static_cast<DTYPE*>(warpedMeanImage->data);
+    DTYPE *refSdevPtr = static_cast<DTYPE*>(referenceSdevImage->data);
+    DTYPE *warSdevPtr = static_cast<DTYPE*>(warpedSdevImage->data);
+    DTYPE *correlaPtr = static_cast<DTYPE*>(correlationImage->data);
 
-   for(size_t i=0; i<voxelNumber; ++i)
-      correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
+    for (size_t i = 0; i < voxelNumber; ++i)
+        correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
 
-   reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
-   double lncc_value_sum  = 0., lncc_value;
-   double activeVoxel_num = 0.;
+    double lncc_value_sum = 0., lncc_value;
+    double activeVoxel_num = 0.;
 
-   // Iteration over all voxels
+    // Iteration over all voxels
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
-   refSdevPtr,warSdevPtr,correlaPtr) \
-   private(voxel,lncc_value) \
-   reduction(+:lncc_value_sum) \
-   reduction(+:activeVoxel_num)
+    shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
+    refSdevPtr,warSdevPtr,correlaPtr) \
+    private(voxel,lncc_value) \
+    reduction(+:lncc_value_sum) \
+    reduction(+:activeVoxel_num)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // Check if the current voxel belongs to the mask
-      if(combinedMask[voxel]>-1)
-      {
-         lncc_value = (
-                  correlaPtr[voxel] -
-                  (refMeanPtr[voxel]*warMeanPtr[voxel])
-                  ) /
-               (refSdevPtr[voxel]*warSdevPtr[voxel]);
-
-         if(lncc_value==lncc_value && isinf(lncc_value)==0)
-         {
-            lncc_value_sum += fabs(lncc_value);
-            ++activeVoxel_num;
-         }
-      }
-   }
-   return lncc_value_sum/activeVoxel_num;
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // Check if the current voxel belongs to the mask
+        if (combinedMask[voxel] > -1) {
+            lncc_value = (correlaPtr[voxel] - (refMeanPtr[voxel] * warMeanPtr[voxel])) / (refSdevPtr[voxel] * warSdevPtr[voxel]);
+            if (lncc_value == lncc_value && isinf(lncc_value) == 0) {
+                lncc_value_sum += fabs(lncc_value);
+                ++activeVoxel_num;
+            }
+        }
+    }
+    return lncc_value_sum / activeVoxel_num;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-double reg_lncc::GetSimilarityMeasureValue()
-{
-   double lncc_value=0.f;
-
-   for(int current_timepoint=0; current_timepoint<this->referenceImagePointer->nt; ++current_timepoint)
-   {
-      if (this->timePointWeight[current_timepoint] > 0.0)
-      {
-         double tp_value = 0.0;
-         // Compute the mean and variance of the reference and warped floating
-         switch (this->referenceImagePointer->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            this->UpdateLocalStatImages<float>(this->referenceImagePointer,
-               this->warpedFloatingImagePointer,
-               this->referenceMeanImage,
-               this->warpedFloatingMeanImage,
-               this->referenceSdevImage,
-               this->warpedFloatingSdevImage,
-               this->referenceMaskPointer,
-               this->forwardMask,
-               current_timepoint);
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            this->UpdateLocalStatImages<double>(this->referenceImagePointer,
-               this->warpedFloatingImagePointer,
-               this->referenceMeanImage,
-               this->warpedFloatingMeanImage,
-               this->referenceSdevImage,
-               this->warpedFloatingSdevImage,
-               this->referenceMaskPointer,
-               this->forwardMask,
-               current_timepoint);
-            break;
-         }
-
-			// Compute the LNCC - Forward
-			switch (this->referenceImagePointer->datatype)
-			{
-			case NIFTI_TYPE_FLOAT32:
-				tp_value += reg_getLNCCValue<float>(this->referenceImagePointer,
-					this->referenceMeanImage,
-					this->referenceSdevImage,
-					this->warpedFloatingImagePointer,
-					this->warpedFloatingMeanImage,
-					this->warpedFloatingSdevImage,
-					this->forwardMask,
-					this->kernelStandardDeviation,
-					this->forwardCorrelationImage,
-					this->kernelType,
-					current_timepoint);
-				break;
-			case NIFTI_TYPE_FLOAT64:
-				tp_value += reg_getLNCCValue<double>(this->referenceImagePointer,
-					this->referenceMeanImage,
-					this->referenceSdevImage,
-					this->warpedFloatingImagePointer,
-					this->warpedFloatingMeanImage,
-					this->warpedFloatingSdevImage,
-					this->forwardMask,
-					this->kernelStandardDeviation,
-					this->forwardCorrelationImage,
-					this->kernelType,
-					current_timepoint);
-				break;
-			}
-			if (this->isSymmetric)
-			{
-				// Compute the mean and variance of the floating and warped reference
-				switch (this->floatingImagePointer->datatype)
-				{
-				case NIFTI_TYPE_FLOAT32:
-					this->UpdateLocalStatImages<float>(this->floatingImagePointer,
-						this->warpedReferenceImagePointer,
-						this->floatingMeanImage,
-						this->warpedReferenceMeanImage,
-						this->floatingSdevImage,
-						this->warpedReferenceSdevImage,
-						this->floatingMaskPointer,
-						this->backwardMask,
-						current_timepoint);
-					break;
-				case NIFTI_TYPE_FLOAT64:
-					this->UpdateLocalStatImages<double>(this->floatingImagePointer,
-						this->warpedReferenceImagePointer,
-						this->floatingMeanImage,
-						this->warpedReferenceMeanImage,
-						this->floatingSdevImage,
-						this->warpedReferenceSdevImage,
-						this->floatingMaskPointer,
-						this->backwardMask,
-						current_timepoint);
-					break;
-				}
-				// Compute the LNCC - Backward
-				switch (this->floatingImagePointer->datatype)
-				{
-				case NIFTI_TYPE_FLOAT32:
-					tp_value += reg_getLNCCValue<float>(this->floatingImagePointer,
-						this->floatingMeanImage,
-						this->floatingSdevImage,
-						this->warpedReferenceImagePointer,
-						this->warpedReferenceMeanImage,
-						this->warpedReferenceSdevImage,
-						this->backwardMask,
-						this->kernelStandardDeviation,
-						this->backwardCorrelationImage,
-						this->kernelType,
-						current_timepoint);
-					break;
-				case NIFTI_TYPE_FLOAT64:
-					tp_value += reg_getLNCCValue<double>(this->floatingImagePointer,
-						this->floatingMeanImage,
-						this->floatingSdevImage,
-						this->warpedReferenceImagePointer,
-						this->warpedReferenceMeanImage,
-						this->warpedReferenceSdevImage,
-						this->backwardMask,
-						this->kernelStandardDeviation,
-						this->backwardCorrelationImage,
-						this->kernelType,
-						current_timepoint);
-					break;
-				}
-			}
-			lncc_value += tp_value * this->timePointWeight[current_timepoint];
-		}
-	}
-	return lncc_value;
+double reg_lncc::GetSimilarityMeasureValue() {
+    double lncc_value = 0;
+
+    for (int current_timepoint = 0; current_timepoint < this->referenceImagePointer->nt; ++current_timepoint) {
+        if (this->timePointWeight[current_timepoint] > 0) {
+            double tp_value = 0;
+            // Compute the mean and variance of the reference and warped floating
+            switch (this->referenceImagePointer->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                this->UpdateLocalStatImages<float>(this->referenceImagePointer,
+                                                   this->warpedFloatingImagePointer,
+                                                   this->referenceMeanImage,
+                                                   this->warpedFloatingMeanImage,
+                                                   this->referenceSdevImage,
+                                                   this->warpedFloatingSdevImage,
+                                                   this->referenceMaskPointer,
+                                                   this->forwardMask,
+                                                   current_timepoint);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                this->UpdateLocalStatImages<double>(this->referenceImagePointer,
+                                                    this->warpedFloatingImagePointer,
+                                                    this->referenceMeanImage,
+                                                    this->warpedFloatingMeanImage,
+                                                    this->referenceSdevImage,
+                                                    this->warpedFloatingSdevImage,
+                                                    this->referenceMaskPointer,
+                                                    this->forwardMask,
+                                                    current_timepoint);
+                break;
+            }
+
+            // Compute the LNCC - Forward
+            switch (this->referenceImagePointer->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                tp_value += reg_getLNCCValue<float>(this->referenceImagePointer,
+                                                    this->referenceMeanImage,
+                                                    this->referenceSdevImage,
+                                                    this->warpedFloatingImagePointer,
+                                                    this->warpedFloatingMeanImage,
+                                                    this->warpedFloatingSdevImage,
+                                                    this->forwardMask,
+                                                    this->kernelStandardDeviation,
+                                                    this->forwardCorrelationImage,
+                                                    this->kernelType,
+                                                    current_timepoint);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                tp_value += reg_getLNCCValue<double>(this->referenceImagePointer,
+                                                     this->referenceMeanImage,
+                                                     this->referenceSdevImage,
+                                                     this->warpedFloatingImagePointer,
+                                                     this->warpedFloatingMeanImage,
+                                                     this->warpedFloatingSdevImage,
+                                                     this->forwardMask,
+                                                     this->kernelStandardDeviation,
+                                                     this->forwardCorrelationImage,
+                                                     this->kernelType,
+                                                     current_timepoint);
+                break;
+            }
+            if (this->isSymmetric) {
+                // Compute the mean and variance of the floating and warped reference
+                switch (this->floatingImagePointer->datatype) {
+                case NIFTI_TYPE_FLOAT32:
+                    this->UpdateLocalStatImages<float>(this->floatingImagePointer,
+                                                       this->warpedReferenceImagePointer,
+                                                       this->floatingMeanImage,
+                                                       this->warpedReferenceMeanImage,
+                                                       this->floatingSdevImage,
+                                                       this->warpedReferenceSdevImage,
+                                                       this->floatingMaskPointer,
+                                                       this->backwardMask,
+                                                       current_timepoint);
+                    break;
+                case NIFTI_TYPE_FLOAT64:
+                    this->UpdateLocalStatImages<double>(this->floatingImagePointer,
+                                                        this->warpedReferenceImagePointer,
+                                                        this->floatingMeanImage,
+                                                        this->warpedReferenceMeanImage,
+                                                        this->floatingSdevImage,
+                                                        this->warpedReferenceSdevImage,
+                                                        this->floatingMaskPointer,
+                                                        this->backwardMask,
+                                                        current_timepoint);
+                    break;
+                }
+                // Compute the LNCC - Backward
+                switch (this->floatingImagePointer->datatype) {
+                case NIFTI_TYPE_FLOAT32:
+                    tp_value += reg_getLNCCValue<float>(this->floatingImagePointer,
+                                                        this->floatingMeanImage,
+                                                        this->floatingSdevImage,
+                                                        this->warpedReferenceImagePointer,
+                                                        this->warpedReferenceMeanImage,
+                                                        this->warpedReferenceSdevImage,
+                                                        this->backwardMask,
+                                                        this->kernelStandardDeviation,
+                                                        this->backwardCorrelationImage,
+                                                        this->kernelType,
+                                                        current_timepoint);
+                    break;
+                case NIFTI_TYPE_FLOAT64:
+                    tp_value += reg_getLNCCValue<double>(this->floatingImagePointer,
+                                                         this->floatingMeanImage,
+                                                         this->floatingSdevImage,
+                                                         this->warpedReferenceImagePointer,
+                                                         this->warpedReferenceMeanImage,
+                                                         this->warpedReferenceSdevImage,
+                                                         this->backwardMask,
+                                                         this->kernelStandardDeviation,
+                                                         this->backwardCorrelationImage,
+                                                         this->kernelType,
+                                                         current_timepoint);
+                    break;
+                }
+            }
+            lncc_value += tp_value * this->timePointWeight[current_timepoint];
+        }
+    }
+    return lncc_value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -526,291 +473,269 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *measureGradientImage,
                                    int kernelType,
                                    int current_timepoint,
-                           double timepoint_weight)
-{
+                                   double timepoint_weight) {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber=(long)referenceImage->nx*
-         referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber=(size_t)referenceImage->nx*
-         referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
 
-   // Compute the local correlation
-   DTYPE *refImagePtr=static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber];
+    // Compute the local correlation
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
 
-   DTYPE *warImagePtr=static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber];
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-   DTYPE *refMeanPtr=static_cast<DTYPE *>(referenceMeanImage->data);
-   DTYPE *warMeanPtr=static_cast<DTYPE *>(warpedMeanImage->data);
-   DTYPE *refSdevPtr=static_cast<DTYPE *>(referenceSdevImage->data);
-   DTYPE *warSdevPtr=static_cast<DTYPE *>(warpedSdevImage->data);
-   DTYPE *correlaPtr=static_cast<DTYPE *>(correlationImage->data);
+    DTYPE *refMeanPtr = static_cast<DTYPE*>(referenceMeanImage->data);
+    DTYPE *warMeanPtr = static_cast<DTYPE*>(warpedMeanImage->data);
+    DTYPE *refSdevPtr = static_cast<DTYPE*>(referenceSdevImage->data);
+    DTYPE *warSdevPtr = static_cast<DTYPE*>(warpedSdevImage->data);
+    DTYPE *correlaPtr = static_cast<DTYPE*>(correlationImage->data);
 
-   for(size_t i=0; i<voxelNumber; ++i)
-      correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
+    for (size_t i = 0; i < voxelNumber; ++i)
+        correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
 
-   reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
-   double refMeanValue, warMeanValue, refSdevValue,
-         warSdevValue, correlaValue;
-   double temp1, temp2, temp3;
-   double activeVoxel_num = 0.;
+    double refMeanValue, warMeanValue, refSdevValue, warSdevValue, correlaValue;
+    double temp1, temp2, temp3;
+    double activeVoxel_num = 0;
 
-   // Iteration over all voxels
+    // Iteration over all voxels
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
-   refSdevPtr,warSdevPtr,correlaPtr) \
-   private(voxel,refMeanValue,warMeanValue,refSdevValue, \
-   warSdevValue, correlaValue, temp1, temp2, temp3) \
-   reduction(+:activeVoxel_num)
+    shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
+    refSdevPtr,warSdevPtr,correlaPtr) \
+    private(voxel,refMeanValue,warMeanValue,refSdevValue, \
+    warSdevValue, correlaValue, temp1, temp2, temp3) \
+    reduction(+:activeVoxel_num)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // Check if the current voxel belongs to the mask
-      if(combinedMask[voxel]>-1)
-      {
-
-         refMeanValue = refMeanPtr[voxel];
-         warMeanValue = warMeanPtr[voxel];
-         refSdevValue = refSdevPtr[voxel];
-         warSdevValue = warSdevPtr[voxel];
-         correlaValue = correlaPtr[voxel] - (refMeanValue*warMeanValue);
-
-         temp1 = 1.0 / (refSdevValue * warSdevValue);
-         temp2 = correlaValue /
-               (refSdevValue*warSdevValue*warSdevValue*warSdevValue);
-         temp3 = (correlaValue * warMeanValue) /
-               (refSdevValue*warSdevValue*warSdevValue*warSdevValue)
-               -
-               refMeanValue / (refSdevValue * warSdevValue);
-         if(temp1==temp1 && isinf(temp1)==0 &&
-               temp2==temp2 && isinf(temp2)==0 &&
-               temp3==temp3 && isinf(temp3)==0)
-         {
-            // Derivative of the absolute function
-            if(correlaValue<0)
-            {
-               temp1 *= -1.;
-               temp2 *= -1.;
-               temp3 *= -1.;
-            }
-            warMeanPtr[voxel]=temp1;
-            warSdevPtr[voxel]=temp2;
-            correlaPtr[voxel]=temp3;
-         activeVoxel_num++;
-         }
-         else warMeanPtr[voxel]=warSdevPtr[voxel]=correlaPtr[voxel]=0.;
-      }
-      else warMeanPtr[voxel]=warSdevPtr[voxel]=correlaPtr[voxel]=0.;
-   }
-
-   //adjust weight for number of voxels
-   double adjusted_weight = timepoint_weight / activeVoxel_num;
-
-   // Smooth the newly computed values
-   reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
-   reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
-   reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
-   DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
-   DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = nullptr;
-   if(referenceImage->nz>1)
-      measureGradPtrZ = &measureGradPtrY[voxelNumber];
-
-   // Create pointers to the spatial gradient of the warped image
-   DTYPE *warpGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
-   DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber];
-   DTYPE *warpGradPtrZ = nullptr;
-   if(referenceImage->nz>1)
-      warpGradPtrZ=&warpGradPtrY[voxelNumber];
-
-   double common;
-   // Iteration over all voxels
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // Check if the current voxel belongs to the mask
+        if (combinedMask[voxel] > -1) {
+
+            refMeanValue = refMeanPtr[voxel];
+            warMeanValue = warMeanPtr[voxel];
+            refSdevValue = refSdevPtr[voxel];
+            warSdevValue = warSdevPtr[voxel];
+            correlaValue = correlaPtr[voxel] - (refMeanValue * warMeanValue);
+
+            temp1 = 1.0 / (refSdevValue * warSdevValue);
+            temp2 = correlaValue /
+                (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
+            temp3 = (correlaValue * warMeanValue) /
+                (refSdevValue * warSdevValue * warSdevValue * warSdevValue)
+                -
+                refMeanValue / (refSdevValue * warSdevValue);
+            if (temp1 == temp1 && isinf(temp1) == 0 &&
+                temp2 == temp2 && isinf(temp2) == 0 &&
+                temp3 == temp3 && isinf(temp3) == 0) {
+                // Derivative of the absolute function
+                if (correlaValue < 0) {
+                    temp1 *= -1;
+                    temp2 *= -1;
+                    temp3 *= -1;
+                }
+                warMeanPtr[voxel] = temp1;
+                warSdevPtr[voxel] = temp2;
+                correlaPtr[voxel] = temp3;
+                activeVoxel_num++;
+            } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
+        } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
+    }
+
+    //adjust weight for number of voxels
+    double adjusted_weight = timepoint_weight / activeVoxel_num;
+
+    // Smooth the newly computed values
+    reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
+    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
+    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DTYPE *measureGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        measureGradPtrZ = &measureGradPtrY[voxelNumber];
+
+    // Create pointers to the spatial gradient of the warped image
+    DTYPE *warpGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
+    DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber];
+    DTYPE *warpGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        warpGradPtrZ = &warpGradPtrY[voxelNumber];
+
+    double common;
+    // Iteration over all voxels
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
-   warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \
-   measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \
-   private(voxel, common)
+    shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
+    warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \
+    measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \
+    private(voxel, common)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // Check if the current voxel belongs to the mask
-      if(combinedMask[voxel]>-1)
-      {
-         common = warMeanPtr[voxel] * currentRefPtr[voxel] -
-               warSdevPtr[voxel] * currentWarPtr[voxel] +
-               correlaPtr[voxel];
-       common *= adjusted_weight;
-         measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common;
-         measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common;
-         if(warpGradPtrZ!=nullptr)
-            measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common;
-      }
-   }
-   // Check for NaN
-   DTYPE val;
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // Check if the current voxel belongs to the mask
+        if (combinedMask[voxel] > -1) {
+            common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel];
+            common *= adjusted_weight;
+            measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common;
+            measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common;
+            if (warpGradPtrZ != nullptr)
+                measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common;
+        }
+    }
+    // Check for NaN
+    DTYPE val;
 #ifdef _WIN32
-   voxelNumber = (long)measureGradientImage->nvox;
+    voxelNumber = (long)measureGradientImage->nvox;
 #else
-   voxelNumber=measureGradientImage->nvox;
+    voxelNumber = measureGradientImage->nvox;
 #endif
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber,measureGradPtrX) \
-   private(voxel, val)
+    shared(voxelNumber,measureGradPtrX) \
+    private(voxel, val)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      val=measureGradPtrX[voxel];
-      if(val!=val || isinf(val)!=0)
-         measureGradPtrX[voxel]=static_cast<DTYPE>(0);
-   }
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        val = measureGradPtrX[voxel];
+        if (val != val || isinf(val) != 0)
+            measureGradPtrX[voxel] = static_cast<DTYPE>(0);
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
-      return;
-
-   // Compute the mean and variance of the reference and warped floating
-   switch(this->referenceImagePointer->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      this->UpdateLocalStatImages<float>(this->referenceImagePointer,
-                                         this->warpedFloatingImagePointer,
-                                         this->referenceMeanImage,
-                                         this->warpedFloatingMeanImage,
-                                         this->referenceSdevImage,
-                                         this->warpedFloatingSdevImage,
-                                         this->referenceMaskPointer,
-                                         this->forwardMask,
-                                         current_timepoint);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      this->UpdateLocalStatImages<double>(this->referenceImagePointer,
-                                          this->warpedFloatingImagePointer,
-                                          this->referenceMeanImage,
-                                          this->warpedFloatingMeanImage,
-                                          this->referenceSdevImage,
-                                          this->warpedFloatingSdevImage,
-                                          this->referenceMaskPointer,
-                                          this->forwardMask,
-                                          current_timepoint);
-      break;
-   }
-
-   // Compute the LNCC gradient - Forward
-   switch(this->referenceImagePointer->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_getVoxelBasedLNCCGradient<float>(this->referenceImagePointer,
-                                           this->referenceMeanImage,
-                                           this->referenceSdevImage,
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+    if (this->timePointWeight[current_timepoint] == 0)
+        return;
+
+    // Compute the mean and variance of the reference and warped floating
+    switch (this->referenceImagePointer->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        this->UpdateLocalStatImages<float>(this->referenceImagePointer,
                                            this->warpedFloatingImagePointer,
+                                           this->referenceMeanImage,
                                            this->warpedFloatingMeanImage,
+                                           this->referenceSdevImage,
                                            this->warpedFloatingSdevImage,
+                                           this->referenceMaskPointer,
                                            this->forwardMask,
-                                           this->kernelStandardDeviation,
-                                           this->forwardCorrelationImage,
-                                           this->warpedFloatingGradientImagePointer,
-                                           this->forwardVoxelBasedGradientImagePointer,
-                                           this->kernelType,
-                                           current_timepoint,
-                                 this->timePointWeight[current_timepoint]);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_getVoxelBasedLNCCGradient<double>(this->referenceImagePointer,
-                                            this->referenceMeanImage,
-                                            this->referenceSdevImage,
+                                           current_timepoint);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        this->UpdateLocalStatImages<double>(this->referenceImagePointer,
                                             this->warpedFloatingImagePointer,
+                                            this->referenceMeanImage,
                                             this->warpedFloatingMeanImage,
+                                            this->referenceSdevImage,
                                             this->warpedFloatingSdevImage,
+                                            this->referenceMaskPointer,
                                             this->forwardMask,
-                                            this->kernelStandardDeviation,
-                                            this->forwardCorrelationImage,
-                                            this->warpedFloatingGradientImagePointer,
-                                            this->forwardVoxelBasedGradientImagePointer,
-                                            this->kernelType,
-                                 current_timepoint,
-                                 this->timePointWeight[current_timepoint]);
-      break;
-   }
-   if(this->isSymmetric)
-   {
-      // Compute the mean and variance of the floating and warped reference
-      switch(this->floatingImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         this->UpdateLocalStatImages<float>(this->floatingImagePointer,
-                                            this->warpedReferenceImagePointer,
-                                            this->floatingMeanImage,
-                                            this->warpedReferenceMeanImage,
-                                            this->floatingSdevImage,
-                                            this->warpedReferenceSdevImage,
-                                            this->floatingMaskPointer,
-                                            this->backwardMask,
                                             current_timepoint);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         this->UpdateLocalStatImages<double>(this->floatingImagePointer,
-                                             this->warpedReferenceImagePointer,
-                                             this->floatingMeanImage,
-                                             this->warpedReferenceMeanImage,
-                                             this->floatingSdevImage,
-                                             this->warpedReferenceSdevImage,
-                                             this->floatingMaskPointer,
-                                             this->backwardMask,
-                                             current_timepoint);
-         break;
-      }
-      // Compute the LNCC gradient - Backward
-      switch(this->floatingImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedLNCCGradient<float>(this->floatingImagePointer,
-                                              this->floatingMeanImage,
-                                              this->floatingSdevImage,
-                                              this->warpedReferenceImagePointer,
-                                              this->warpedReferenceMeanImage,
-                                              this->warpedReferenceSdevImage,
-                                              this->backwardMask,
+        break;
+    }
+
+    // Compute the LNCC gradient - Forward
+    switch (this->referenceImagePointer->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_getVoxelBasedLNCCGradient<float>(this->referenceImagePointer,
+                                             this->referenceMeanImage,
+                                             this->referenceSdevImage,
+                                             this->warpedFloatingImagePointer,
+                                             this->warpedFloatingMeanImage,
+                                             this->warpedFloatingSdevImage,
+                                             this->forwardMask,
+                                             this->kernelStandardDeviation,
+                                             this->forwardCorrelationImage,
+                                             this->warpedFloatingGradientImagePointer,
+                                             this->forwardVoxelBasedGradientImagePointer,
+                                             this->kernelType,
+                                             current_timepoint,
+                                             this->timePointWeight[current_timepoint]);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_getVoxelBasedLNCCGradient<double>(this->referenceImagePointer,
+                                              this->referenceMeanImage,
+                                              this->referenceSdevImage,
+                                              this->warpedFloatingImagePointer,
+                                              this->warpedFloatingMeanImage,
+                                              this->warpedFloatingSdevImage,
+                                              this->forwardMask,
                                               this->kernelStandardDeviation,
-                                              this->backwardCorrelationImage,
-                                              this->warpedReferenceGradientImagePointer,
-                                              this->backwardVoxelBasedGradientImagePointer,
+                                              this->forwardCorrelationImage,
+                                              this->warpedFloatingGradientImagePointer,
+                                              this->forwardVoxelBasedGradientImagePointer,
                                               this->kernelType,
-                                   current_timepoint,
-                                   this->timePointWeight[current_timepoint]);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedLNCCGradient<double>(this->floatingImagePointer,
-                                               this->floatingMeanImage,
-                                               this->floatingSdevImage,
+                                              current_timepoint,
+                                              this->timePointWeight[current_timepoint]);
+        break;
+    }
+    if (this->isSymmetric) {
+        // Compute the mean and variance of the floating and warped reference
+        switch (this->floatingImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            this->UpdateLocalStatImages<float>(this->floatingImagePointer,
                                                this->warpedReferenceImagePointer,
+                                               this->floatingMeanImage,
                                                this->warpedReferenceMeanImage,
+                                               this->floatingSdevImage,
                                                this->warpedReferenceSdevImage,
+                                               this->floatingMaskPointer,
                                                this->backwardMask,
-                                               this->kernelStandardDeviation,
-                                               this->backwardCorrelationImage,
-                                               this->warpedReferenceGradientImagePointer,
-                                               this->backwardVoxelBasedGradientImagePointer,
-                                               this->kernelType,
-                                    current_timepoint,
-                                    this->timePointWeight[current_timepoint]);
-         break;
-      }
-   }
-   return;
+                                               current_timepoint);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            this->UpdateLocalStatImages<double>(this->floatingImagePointer,
+                                                this->warpedReferenceImagePointer,
+                                                this->floatingMeanImage,
+                                                this->warpedReferenceMeanImage,
+                                                this->floatingSdevImage,
+                                                this->warpedReferenceSdevImage,
+                                                this->floatingMaskPointer,
+                                                this->backwardMask,
+                                                current_timepoint);
+            break;
+        }
+        // Compute the LNCC gradient - Backward
+        switch (this->floatingImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedLNCCGradient<float>(this->floatingImagePointer,
+                                                 this->floatingMeanImage,
+                                                 this->floatingSdevImage,
+                                                 this->warpedReferenceImagePointer,
+                                                 this->warpedReferenceMeanImage,
+                                                 this->warpedReferenceSdevImage,
+                                                 this->backwardMask,
+                                                 this->kernelStandardDeviation,
+                                                 this->backwardCorrelationImage,
+                                                 this->warpedReferenceGradientImagePointer,
+                                                 this->backwardVoxelBasedGradientImagePointer,
+                                                 this->kernelType,
+                                                 current_timepoint,
+                                                 this->timePointWeight[current_timepoint]);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedLNCCGradient<double>(this->floatingImagePointer,
+                                                  this->floatingMeanImage,
+                                                  this->floatingSdevImage,
+                                                  this->warpedReferenceImagePointer,
+                                                  this->warpedReferenceMeanImage,
+                                                  this->warpedReferenceSdevImage,
+                                                  this->backwardMask,
+                                                  this->kernelStandardDeviation,
+                                                  this->backwardCorrelationImage,
+                                                  this->warpedReferenceGradientImagePointer,
+                                                  this->backwardVoxelBasedGradientImagePointer,
+                                                  this->kernelType,
+                                                  current_timepoint,
+                                                  this->timePointWeight[current_timepoint]);
+            break;
+        }
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index d626c113..ed286ca5 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -14,10 +14,9 @@
 
 #include "_reg_measure.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
-class reg_lncc : public reg_measure
-{
+ /* *************************************************************** */
+ /* *************************************************************** */
+class reg_lncc: public reg_measure {
 public:
     /// @brief reg_lncc class constructor
     reg_lncc();
@@ -50,33 +49,33 @@ class reg_lncc : public reg_measure
     }
 
 protected:
-   float kernelStandardDeviation[255];
-   nifti_image *forwardCorrelationImage;
-   nifti_image *referenceMeanImage;
-   nifti_image *referenceSdevImage;
-   nifti_image *warpedFloatingMeanImage;
-   nifti_image *warpedFloatingSdevImage;
-   int *forwardMask;
+    float kernelStandardDeviation[255];
+    nifti_image *forwardCorrelationImage;
+    nifti_image *referenceMeanImage;
+    nifti_image *referenceSdevImage;
+    nifti_image *warpedFloatingMeanImage;
+    nifti_image *warpedFloatingSdevImage;
+    int *forwardMask;
 
-   nifti_image *backwardCorrelationImage;
-   nifti_image *floatingMeanImage;
-   nifti_image *floatingSdevImage;
-   nifti_image *warpedReferenceMeanImage;
-   nifti_image *warpedReferenceSdevImage;
-   int *backwardMask;
+    nifti_image *backwardCorrelationImage;
+    nifti_image *floatingMeanImage;
+    nifti_image *floatingSdevImage;
+    nifti_image *warpedReferenceMeanImage;
+    nifti_image *warpedReferenceSdevImage;
+    int *backwardMask;
 
-   int kernelType;
+    int kernelType;
 
-   template <class DTYPE>
-   void UpdateLocalStatImages(nifti_image *refImage,
-                              nifti_image *warImage,
-                              nifti_image *meanRefImage,
-                              nifti_image *meanWarImage,
-                              nifti_image *stdDevRefImage,
-                              nifti_image *stdDevWarImage,
-                              int *refMask,
-                              int *mask,
-                              int current_timepoint);
+    template <class DTYPE>
+    void UpdateLocalStatImages(nifti_image *refImage,
+                               nifti_image *warImage,
+                               nifti_image *meanRefImage,
+                               nifti_image *meanWarImage,
+                               nifti_image *stdDevRefImage,
+                               nifti_image *stdDevWarImage,
+                               int *refMask,
+                               int *mask,
+                               int current_timepoint);
 };
 /* *************************************************************** */
 /* *************************************************************** */
@@ -127,4 +126,4 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *lnccGradientImage,
                                    int kernelType,
                                    int current_timepoint,
-                           double timepoint_weight);
+                                   double timepoint_weight);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 35aaa0b5..a0fee955 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -486,22 +486,22 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   // The spline coefficients are computed
                   xPre=(int)reg_floor(voxel[0]);
                   xBasis[1]=voxel[0]-static_cast<DTYPE>(xPre);
-                  if(xBasis[1]<0.0) xBasis[1]=0.0; //rounding error
+                  if(xBasis[1]<0) xBasis[1]=0; //rounding error
                   xBasis[0]=1.-xBasis[1];
 
                   yPre=(int)reg_floor(voxel[1]);
                   yBasis[1]=voxel[1]-static_cast<DTYPE>(yPre);
-                  if(yBasis[1]<0.0) yBasis[1]=0.0; //rounding error
+                  if(yBasis[1]<0) yBasis[1]=0; //rounding error
                   yBasis[0]=1.-yBasis[1];
 
                   zPre=(int)reg_floor(voxel[2]);
                   zBasis[1]=voxel[2]-static_cast<DTYPE>(zPre);
-                  if(zBasis[1]<0.0) zBasis[1]=0.0; //rounding error
+                  if(zBasis[1]<0) zBasis[1]=0; //rounding error
                   zBasis[0]=1.-zBasis[1];
 
-                  real[0]=0.0;
-                  real[1]=0.0;
-                  real[2]=0.0;
+                  real[0]=0;
+                  real[1]=0;
+                  real[2]=0;
                   for(c=0; c<2; c++){
                      for(b=0; b<2; b++){
                         for(a=0; a<2; a++){
@@ -541,7 +541,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
          zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
          zBasis[1]=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-         if(zBasis[1]<0.0) zBasis[1]=0.0; //rounding error
+         if(zBasis[1]<0) zBasis[1]=0; //rounding error
          zBasis[0]=1.-zBasis[1];
          zPre++;
 
@@ -550,26 +550,26 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
             yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
             yBasis[1]=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-            if(yBasis[1]<0.0) yBasis[1]=0.0; //rounding error
+            if(yBasis[1]<0) yBasis[1]=0; //rounding error
             yBasis[0]=1.-yBasis[1];
             yPre++;
 
             for(x=0; x<deformationField->nx; x++)
             {
-               real[0]=0.0;
-               real[1]=0.0;
-               real[2]=0.0;
+               real[0]=0;
+               real[1]=0;
+               real[2]=0;
 
                if(mask[index]>-1)
                {
                   xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
                   xBasis[1]=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-                  if(xBasis[1]<0.0) xBasis[1]=0.0; //rounding error
+                  if(xBasis[1]<0) xBasis[1]=0; //rounding error
                   xBasis[0]=1.-xBasis[1];
                   xPre++;
-                  real[0]=0.0;
-                  real[1]=0.0;
-                  real[2]=0.0;
+                  real[0]=0;
+                  real[1]=0;
+                  real[2]=0;
                   for(c=0; c<2; c++){
                      for(b=0; b<2; b++){
                         for(a=0; a<2; a++){
@@ -702,14 +702,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
             xPre=(int)reg_floor(xVoxel);
             basis=xVoxel-(DTYPE)xPre;
             --xPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
             else get_SplineBasisValues<DTYPE>(basis, temp);
 
             yPre=(int)reg_floor(yVoxel);
             basis=yVoxel-(DTYPE)yPre;
             --yPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
             else get_SplineBasisValues<DTYPE>(basis, yBasis);
 
@@ -747,8 +747,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                   oldXpre=xPre;
                   oldYpre=yPre;
                }
-               xReal=0.0;
-               yReal=0.0;
+               xReal=0;
+               yReal=0;
 
                if(mask[index]>-1)
                {
@@ -762,8 +762,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                      }
                   }
 
-                  tempX =  _mm_set_ps1(0.0);
-                  tempY =  _mm_set_ps1(0.0);
+                  tempX =  _mm_set_ps1(0);
+                  tempY =  _mm_set_ps1(0);
                   //addition and multiplication of the 16 basis value and CP position for each axis
                   for(a=0; a<4; a++)
                   {
@@ -821,7 +821,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
          yPre=(int)((DTYPE)y/gridVoxelSpacing[1]);
          basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)yPre;
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
          else get_SplineBasisValues<DTYPE>(basis, yBasis);
 
@@ -830,7 +830,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
             xPre=(int)((DTYPE)x/gridVoxelSpacing[0]);
             basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)xPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
             else get_SplineBasisValues<DTYPE>(basis, temp);
 #if _USE_SSE
@@ -883,14 +883,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                oldYpre=yPre;
             }
 
-            xReal=0.0;
-            yReal=0.0;
+            xReal=0;
+            yReal=0;
 
             if(mask[index]>-1)
             {
 #if _USE_SSE
-               tempX =  _mm_set_ps1(0.0);
-               tempY =  _mm_set_ps1(0.0);
+               tempX =  _mm_set_ps1(0);
+               tempY =  _mm_set_ps1(0);
                //addition and multiplication of the 64 basis value and CP displacement for each axis
                for(a=0; a<4; a++)
                {
@@ -1079,21 +1079,21 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   xPre=(int)reg_floor(voxel[0]);
                   basis=voxel[0]-static_cast<DTYPE>(xPre);
                   --xPre;
-                  if(basis<0.0) basis=0.0; //rounding error
+                  if(basis<0) basis=0; //rounding error
                   if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
                   else get_SplineBasisValues<DTYPE>(basis, xBasis);
 
                   yPre=(int)reg_floor(voxel[1]);
                   basis=voxel[1]-static_cast<DTYPE>(yPre);
                   --yPre;
-                  if(basis<0.0) basis=0.0; //rounding error
+                  if(basis<0) basis=0; //rounding error
                   if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
                   else get_SplineBasisValues<DTYPE>(basis, yBasis);
 
                   zPre=(int)reg_floor(voxel[2]);
                   basis=voxel[2]-static_cast<DTYPE>(zPre);
                   --zPre;
-                  if(basis<0.0) basis=0.0; //rounding error
+                  if(basis<0) basis=0; //rounding error
                   if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
                   else get_SplineBasisValues<DTYPE>(basis, zBasis);
 
@@ -1135,9 +1135,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   }
 
 #if _USE_SSE
-                  tempX =  _mm_set_ps1(0.0);
-                  tempY =  _mm_set_ps1(0.0);
-                  tempZ =  _mm_set_ps1(0.0);
+                  tempX =  _mm_set_ps1(0);
+                  tempY =  _mm_set_ps1(0);
+                  tempZ =  _mm_set_ps1(0);
                   val.f[0] = xBasis[0];
                   val.f[1] = xBasis[1];
                   val.f[2] = xBasis[2];
@@ -1167,9 +1167,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   val.m = tempZ;
                   real[2] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
 #else
-                  real[0]=0.0;
-                  real[1]=0.0;
-                  real[2]=0.0;
+                  real[0]=0;
+                  real[1]=0;
+                  real[2]=0;
                   coord=0;
                   for(c=0; c<4; c++)
                   {
@@ -1384,9 +1384,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                           x = xPre*5+a;
                                           if(x<deformationField->nx && mask[index]>-1){
 #if _USE_SSE
-                                              tempX =  _mm_set_ps1(0.0);
-                                              tempY =  _mm_set_ps1(0.0);
-                                              tempZ =  _mm_set_ps1(0.0);
+                                              tempX =  _mm_set_ps1(0);
+                                              tempY =  _mm_set_ps1(0);
+                                              tempZ =  _mm_set_ps1(0);
                                               for(coord=0;coord<16;++coord){
                                                   val.m = _mm_set_ps(coefficients[coeff_index+3],
                                                         coefficients[coeff_index+2],
@@ -1474,7 +1474,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
               zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
               basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-              if(basis<0.0) basis=0.0; //rounding error
+              if(basis<0) basis=0; //rounding error
               if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
               else get_SplineBasisValues<DTYPE>(basis, zBasis);
 
@@ -1483,7 +1483,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                   yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
                   basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-                  if(basis<0.0) basis=0.0; //rounding error
+                  if(basis<0) basis=0; //rounding error
                   if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
                   else get_SplineBasisValues<DTYPE>(basis, temp);
 #if _USE_SSE
@@ -1513,7 +1513,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                       xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
                       basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-                      if(basis<0.0) basis=0.0; //rounding error
+                      if(basis<0) basis=0; //rounding error
                       if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
                       else get_SplineBasisValues<DTYPE>(basis, temp);
 #if _USE_SSE
@@ -1572,16 +1572,16 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                       }
                       oldBasis=basis;
 
-                      real[0]=0.0;
-                      real[1]=0.0;
-                      real[2]=0.0;
+                      real[0]=0;
+                      real[1]=0;
+                      real[2]=0;
 
                       if(mask[index]>-1)
                       {
 #if _USE_SSE
-                          tempX =  _mm_set_ps1(0.0);
-                          tempY =  _mm_set_ps1(0.0);
-                          tempZ =  _mm_set_ps1(0.0);
+                          tempX =  _mm_set_ps1(0);
+                          tempY =  _mm_set_ps1(0);
+                          tempZ =  _mm_set_ps1(0);
                           //addition and multiplication of the 64 basis value and CP displacement for each axis
                           for(a=0; a<16; a++)
                           {
@@ -1952,7 +1952,7 @@ template<class SplineTYPE>
 SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z)
 {
    if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3])
-      return 0.0;
+      return 0;
    return array[(z*dim[2]+y)*dim[1]+x];
 }
 /* *************************************************************** */
@@ -3058,7 +3058,7 @@ nmsimplex_move_corner (const double coeff, nmsimplex_state_t *state,
 
    for (j = 0; j < (size_t)state->nvec; j++)
    {
-      mp = 0.0;
+      mp = 0;
       for (i = 0; i < (size_t)state->nsimplex; i++)
       {
          if (i != corner)
@@ -3124,7 +3124,7 @@ nmsimplex_calc_center (const nmsimplex_state_t *state, double *mp)
 
    for (j = 0; j < (size_t)state->nvec; j++)
    {
-      val = 0.0;
+      val = 0;
       for (i = 0; i < (size_t)state->nsimplex; i++)
       {
          val += x1[i*state->nvec + j];
@@ -3149,7 +3149,7 @@ nmsimplex_size (nmsimplex_state_t *state)
 
    size_t i, j;
 
-   double t, ss = 0.0;
+   double t, ss = 0;
 
    /* Calculate middle point */
    nmsimplex_calc_center (state, mp);
@@ -3560,14 +3560,14 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          int xPre=(int)(reg_floor(xVoxel));
          basis=(DTYPE)xVoxel-(DTYPE)xPre;
          xPre--;
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
          else get_SplineBasisValues<DTYPE>(basis, xBasis);
 
          int yPre=(int)(reg_floor(yVoxel));
          basis=(DTYPE)yVoxel-(DTYPE)yPre;
          yPre--;
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
          else get_SplineBasisValues<DTYPE>(basis, yBasis);
 
@@ -3582,8 +3582,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
                                false, // no approximation
                                displacement1 // displacement field?
                                );
-         xReal=0.0;
-         yReal=0.0;
+         xReal=0;
+         yReal=0;
  #if _USE_SSE
          coord=0;
          for(unsigned int b=0; b<4; b++)
@@ -3594,8 +3594,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
             }
          }
 
-         __m128 tempX =  _mm_set_ps1(0.0);
-         __m128 tempY =  _mm_set_ps1(0.0);
+         __m128 tempX =  _mm_set_ps1(0);
+         __m128 tempY =  _mm_set_ps1(0);
          __m128 *ptrX = (__m128 *) &xControlPointCoordinates[0];
          __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0];
          __m128 *ptrBasis   = (__m128 *) &xyBasis[0];
@@ -3788,19 +3788,19 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
             // The spline coefficients are computed
             xPre=(int)(reg_floor(xVoxel));
             basis=(DTYPE)xVoxel-(DTYPE)xPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
             else get_SplineBasisValues<DTYPE>(basis, xBasis);
 
             yPre=(int)(reg_floor(yVoxel));
             basis=(DTYPE)yVoxel-(DTYPE)yPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
             else get_SplineBasisValues<DTYPE>(basis, yBasis);
 
             zPre=(int)(reg_floor(zVoxel));
             basis=(DTYPE)zVoxel-(DTYPE)zPre;
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
             else get_SplineBasisValues<DTYPE>(basis, zBasis);
 
@@ -3828,9 +3828,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                yPreOld=yPre;
                zPreOld=zPre;
             }
-            xReal=0.0;
-            yReal=0.0;
-            zReal=0.0;
+            xReal=0;
+            yReal=0;
+            zReal=0;
  #if _USE_SSE
             val.f[0] = xBasis[0];
             val.f[1] = xBasis[1];
@@ -3838,9 +3838,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
             val.f[3] = xBasis[3];
             _xBasis_sse = val.m;
 
-            tempX =  _mm_set_ps1(0.0);
-            tempY =  _mm_set_ps1(0.0);
-            tempZ =  _mm_set_ps1(0.0);
+            tempX =  _mm_set_ps1(0);
+            tempY =  _mm_set_ps1(0);
+            tempZ =  _mm_set_ps1(0);
             ptrX = (__m128 *) &xControlPointCoordinates[0];
             ptrY = (__m128 *) &yControlPointCoordinates[0];
             ptrZ = (__m128 *) &zControlPointCoordinates[0];
@@ -4593,7 +4593,7 @@ void intensitiesToSplineCoefficients(DTYPE *values, int number)
    DTYPE pole = sqrt(3.0) - 2.0;
    DTYPE currentPole = pole;
    DTYPE currentOpposite = pow(pole,(DTYPE)(2.0*(DTYPE)number-1.0));
-   DTYPE sum=0.0;
+   DTYPE sum=0;
    for(int i=1; i<number; i++)
    {
       sum += (currentPole - currentOpposite) * values[i];
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index d98f471d..bfb86338 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -477,7 +477,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 
             pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]);
             basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1];
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
 
             for(x=0; x<referenceImage->nx; x++)
@@ -485,7 +485,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 
                pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]);
                basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0];
-               if(basis<0.0) basis=0.0; //rounding error
+               if(basis<0) basis=0; //rounding error
                get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
 
                coord=0;
@@ -929,15 +929,15 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   }
                   // Compute the Jacobian matrix
 #if _USE_SSE
-                  tempX_x =  _mm_set_ps1(0.0);
-                  tempX_y =  _mm_set_ps1(0.0);
-                  tempX_z =  _mm_set_ps1(0.0);
-                  tempY_x =  _mm_set_ps1(0.0);
-                  tempY_y =  _mm_set_ps1(0.0);
-                  tempY_z =  _mm_set_ps1(0.0);
-                  tempZ_x =  _mm_set_ps1(0.0);
-                  tempZ_y =  _mm_set_ps1(0.0);
-                  tempZ_z =  _mm_set_ps1(0.0);
+                  tempX_x =  _mm_set_ps1(0);
+                  tempX_y =  _mm_set_ps1(0);
+                  tempX_z =  _mm_set_ps1(0);
+                  tempY_x =  _mm_set_ps1(0);
+                  tempY_y =  _mm_set_ps1(0);
+                  tempY_z =  _mm_set_ps1(0);
+                  tempZ_x =  _mm_set_ps1(0);
+                  tempZ_y =  _mm_set_ps1(0);
+                  tempZ_z =  _mm_set_ps1(0);
                   //addition and multiplication of the 16 basis value and CP position for each axis
                   for(incr0=0; incr0<16; ++incr0)
                   {
@@ -1036,7 +1036,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
             pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2]);
             basis=(DTYPE)z/gridVoxelSpacing[2]-(DTYPE)pre[2];
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, zBasis, zFirst);
 
             for(y=0; y<referenceImage->ny; y++)
@@ -1044,7 +1044,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
                pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]);
                basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1];
-               if(basis<0.0) basis=0.0; //rounding error
+               if(basis<0) basis=0; //rounding error
                get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
 
 #if _USE_SSE
@@ -1084,7 +1084,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
                   pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]);
                   basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0];
-                  if(basis<0.0) basis=0.0; //rounding error
+                  if(basis<0) basis=0; //rounding error
                   get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
 
 #if _USE_SSE
@@ -1157,15 +1157,15 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                      oldPre[2]=pre[2];
                   }
 #if _USE_SSE
-                  tempX_x =  _mm_set_ps1(0.0);
-                  tempX_y =  _mm_set_ps1(0.0);
-                  tempX_z =  _mm_set_ps1(0.0);
-                  tempY_x =  _mm_set_ps1(0.0);
-                  tempY_y =  _mm_set_ps1(0.0);
-                  tempY_z =  _mm_set_ps1(0.0);
-                  tempZ_x =  _mm_set_ps1(0.0);
-                  tempZ_y =  _mm_set_ps1(0.0);
-                  tempZ_z =  _mm_set_ps1(0.0);
+                  tempX_x =  _mm_set_ps1(0);
+                  tempX_y =  _mm_set_ps1(0);
+                  tempX_z =  _mm_set_ps1(0);
+                  tempY_x =  _mm_set_ps1(0);
+                  tempY_y =  _mm_set_ps1(0);
+                  tempY_z =  _mm_set_ps1(0);
+                  tempZ_x =  _mm_set_ps1(0);
+                  tempZ_y =  _mm_set_ps1(0);
+                  tempZ_z =  _mm_set_ps1(0);
                   //addition and multiplication of the 16 basis value and CP position for each axis
                   for(incr0=0; incr0<16; ++incr0)
                   {
@@ -1447,7 +1447,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                               (splineControlPoint->nx-2)+pixelX-1;
                         detJac = (double)jacobianDeterminant[jacIndex];
 
-                        if(detJac>0.0)
+                        if(detJac>0)
                         {
                            jacobianMatrix = jacobianMatrices[jacIndex];
 #ifdef _USE_SQUARE_LOG_JAC
@@ -1545,7 +1545,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                            basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
                            get_BSplineBasisValue<DTYPE>(basis,x-xPre,xBasis,xFirst);
 
-                           if(detJac>0.0 && (xBasis!=0 ||xFirst!=0))
+                           if(detJac>0 && (xBasis!=0 ||xFirst!=0))
                            {
 
                               jacobianMatrix = jacobianMatrices[jacIndex];
@@ -1699,7 +1699,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                        (splineControlPoint->nx-2)+pixelX-1;
                                  detJac = (double)jacobianDeterminant[jacIndex];
 
-                                 if(detJac>0.0)
+                                 if(detJac>0)
                                  {
                                     jacobianMatrix = jacobianMatrices[jacIndex];
 #ifdef _USE_SQUARE_LOG_JAC
@@ -1819,7 +1819,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                     basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
                                     get_BSplineBasisValue<DTYPE>(basis,x-xPre,xBasis,xFirst);
 
-                                    if(detJac>0.0 && (xBasis!=0 ||xFirst!=0))
+                                    if(detJac>0 && (xBasis!=0 ||xFirst!=0))
                                     {
 
                                        jacobianMatrix = jacobianMatrices[jacIndex];
@@ -2047,7 +2047,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                               (splineControlPoint->nx-2)+pixelX-1;
                         detJac = jacobianDeterminant[jacIndex];
 
-                        if(detJac<=0.0)
+                        if(detJac<=0)
                         {
                            get_BSplineBasisValue<DTYPE>(0, y-pixelY+1, yBasis, yFirst);
                            get_BSplineBasisValue<DTYPE>(0, x-pixelX+1, xBasis, xFirst);
@@ -2063,7 +2063,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                                                             basisValues[0],
                                  basisValues[1],
                                  foldingCorrection);
-                        } // detJac<0.0
+                        } // detJac<0
                      } // if x
                   }// x
                }// if y
@@ -2077,7 +2077,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0]
                      + gradient[1]*gradient[1]));
 
-               if(norm>(DTYPE)0.0)
+               if(norm>(DTYPE)0)
                {
                   id = y*splineControlPoint->nx+x;
                   controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
@@ -2140,7 +2140,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                            jacIndex = pixelY*referenceImage->nx+pixelX;
                            detJac = jacobianDeterminant[jacIndex];
 
-                           if(detJac<=0.0)
+                           if(detJac<=0)
                            {
 
                               jacobianMatrix = jacobianMatrices[jacIndex];
@@ -2162,7 +2162,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                                                                basisValues[0],
                                     basisValues[1],
                                     foldingCorrection);
-                           } // detJac<0.0
+                           } // detJac<0
                         } // if x
                      }// x
                   }// if y
@@ -2177,7 +2177,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                   norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] +
                         gradient[1]*gradient[1]));
 
-                  if(norm>0.0)
+                  if(norm>0)
                   {
                      id = y*splineControlPoint->nx+x;
                      controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
@@ -2305,7 +2305,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                        (splineControlPoint->nx-2)+pixelX-1;
                                  detJac = jacobianDeterminant[jacIndex];
 
-                                 if(detJac<=0.0)
+                                 if(detJac<=0)
                                  {
                                     get_BSplineBasisValue<DTYPE>(0, z-pixelZ+1, zBasis, zFirst);
                                     get_BSplineBasisValue<DTYPE>(0, y-pixelY+1, yBasis, yFirst);
@@ -2324,7 +2324,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                           basisValues[1],
                                           basisValues[2],
                                           foldingCorrection);
-                                 } // detJac<0.0
+                                 } // detJac<0
                               } // if x
                            }// x
                         }// if y
@@ -2346,7 +2346,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                         + gradient[1]*gradient[1]
                         + gradient[2]*gradient[2]));
 
-                  if(norm>(DTYPE)0.0)
+                  if(norm>(DTYPE)0)
                   {
                      id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x;
                      controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
@@ -2418,7 +2418,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                     jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+pixelX;
                                     detJac = jacobianDeterminant[jacIndex];
 
-                                    if(detJac<=0.0)
+                                    if(detJac<=0)
                                     {
 
                                        jacobianMatrix = jacobianMatrices[jacIndex];
@@ -2446,7 +2446,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                              basisValues[1],
                                              basisValues[2],
                                              foldingCorrection);
-                                    } // detJac<0.0
+                                    } // detJac<0
                                  } // if x
                               }// x
                            }// if y
@@ -2469,7 +2469,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                            gradient[1]*gradient[1] +
                            gradient[2]*gradient[2]));
 
-                     if(norm>0.0)
+                     if(norm>0)
                      {
                         id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x;
                         controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
@@ -2714,7 +2714,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
    DTYPE *deformationPtrX = static_cast<DTYPE *>(deformationField->data);
    DTYPE *deformationPtrY = &deformationPtrX[voxelNumber];
 
-   DTYPE basis[2]= {1.0,0.0};
+   DTYPE basis[2]= {1.0,0};
    DTYPE first[2]= {-1.0,1.0};
    DTYPE firstX, firstY, defX, defY;
 
@@ -2826,7 +2826,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
    DTYPE *deformationPtrY = &deformationPtrX[voxelNumber];
    DTYPE *deformationPtrZ = &deformationPtrY[voxelNumber];
 
-   DTYPE basis[2]= {1.0,0.0};
+   DTYPE basis[2]= {1.0,0};
    DTYPE first[2]= {-1.0,1.0};
    DTYPE firstX, firstY, firstZ, defX, defY, defZ;
 
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index fc8c4c70..3746b844 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -28,7 +28,7 @@ double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint)
    DTYPE basisXX[9], basisYY[9], basisXY[9];
    set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
 
-   double constraintValue=0.0;
+   double constraintValue=0;
 
    DTYPE splineCoeffX, splineCoeffY;
    DTYPE XX_x, YY_x, XY_x;
@@ -47,8 +47,8 @@ double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint)
    {
       for(x=1; x<splineControlPoint->nx-1; ++x)
       {
-         XX_x=0.0, YY_x=0.0, XY_x=0.0;
-         XX_y=0.0, YY_y=0.0, XY_y=0.0;
+         XX_x=0, YY_x=0, XY_x=0;
+         XX_y=0, YY_y=0, XY_y=0;
 
          i=0;
          for(b=-1; b<2; b++){
@@ -91,7 +91,7 @@ double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint)
    DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
    set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
 
-   double constraintValue=0.0;
+   double constraintValue=0;
 
    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
    DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
@@ -113,12 +113,12 @@ double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint)
       {
          for(x=1; x<splineControlPoint->nx-1; ++x)
          {
-            XX_x=0.0, YY_x=0.0, ZZ_x=0.0;
-            XY_x=0.0, YZ_x=0.0, XZ_x=0.0;
-            XX_y=0.0, YY_y=0.0, ZZ_y=0.0;
-            XY_y=0.0, YZ_y=0.0, XZ_y=0.0;
-            XX_z=0.0, YY_z=0.0, ZZ_z=0.0;
-            XY_z=0.0, YZ_z=0.0, XZ_z=0.0;
+            XX_x=0, YY_x=0, ZZ_x=0;
+            XY_x=0, YZ_x=0, XZ_x=0;
+            XX_y=0, YY_y=0, ZZ_y=0;
+            XY_y=0, YZ_y=0, XZ_y=0;
+            XX_z=0, YY_z=0, ZZ_z=0;
+            XY_z=0, YZ_z=0, XZ_z=0;
 
             i=0;
             for(c=-1; c<2; c++){
@@ -236,8 +236,8 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
       derivativeValuesPtr = &derivativeValues[6*y*splineControlPoint->nx];
       for(x=0; x<splineControlPoint->nx; x++)
       {
-         XX_x=0.0, YY_x=0.0, XY_x=0.0;
-         XX_y=0.0, YY_y=0.0, XY_y=0.0;
+         XX_x=0, YY_x=0, XY_x=0;
+         XX_y=0, YY_y=0, XY_y=0;
 
          i=0;
          for(b=-1; b<2; b++){
@@ -283,7 +283,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
       index=y*splineControlPoint->nx;
       for(x=0; x<splineControlPoint->nx; x++)
       {
-         gradientValue[0]=gradientValue[1]=0.0;
+         gradientValue[0]=gradientValue[1]=0;
          a=0;
          for(Y=y-1; Y<y+2; Y++)
          {
@@ -358,12 +358,12 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
       {
          for(x=0; x<splineControlPoint->nx; x++)
          {
-            XX_x=0.0, YY_x=0.0, ZZ_x=0.0;
-            XY_x=0.0, YZ_x=0.0, XZ_x=0.0;
-            XX_y=0.0, YY_y=0.0, ZZ_y=0.0;
-            XY_y=0.0, YZ_y=0.0, XZ_y=0.0;
-            XX_z=0.0, YY_z=0.0, ZZ_z=0.0;
-            XY_z=0.0, YZ_z=0.0, XZ_z=0.0;
+            XX_x=0, YY_x=0, ZZ_x=0;
+            XY_x=0, YZ_x=0, XZ_x=0;
+            XX_y=0, YY_y=0, ZZ_y=0;
+            XY_y=0, YZ_y=0, XZ_y=0;
+            XX_z=0, YY_z=0, ZZ_z=0;
+            XY_z=0, YZ_z=0, XZ_z=0;
 
             i=0;
             for(c=-1; c<2; c++){
@@ -441,7 +441,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
       {
          for(x=0; x<splineControlPoint->nx; x++)
          {
-            gradientValue[0]=gradientValue[1]=gradientValue[2]=0.0;
+            gradientValue[0]=gradientValue[1]=gradientValue[2]=0;
             a=0;
             for(Z=z-1; Z<z+2; Z++)
             {
@@ -786,14 +786,14 @@ double reg_spline_linearEnergyValue2D(nifti_image *referenceImage,
 
       yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
       basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-      if(basis<0.0) basis=0.0; //rounding error
+      if(basis<0) basis=0; //rounding error
       get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
 
       for(x=0; x<referenceImage->nx; ++x){
 
          xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
          basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
          memset(&matrix, 0, sizeof(mat33));
@@ -876,21 +876,21 @@ double reg_spline_linearEnergyValue3D(nifti_image *referenceImage,
 
       zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
       basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-      if(basis<0.0) basis=0.0; //rounding error
+      if(basis<0) basis=0; //rounding error
       get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
 
       for(y=0; y<referenceImage->ny; ++y){
 
          yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
          basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
 
          for(x=0; x<referenceImage->nx; ++x){
 
             xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
             basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
@@ -1021,14 +1021,14 @@ void reg_spline_linearEnergyGradient2D(nifti_image *referenceImage,
 
       yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
       basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-      if(basis<0.0) basis=0.0; //rounding error
+      if(basis<0) basis=0; //rounding error
       get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
 
       for(x=0; x<referenceImage->nx; ++x){
 
          xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
          basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
          memset(&matrix, 0, sizeof(mat33));
@@ -1127,21 +1127,21 @@ void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage,
 
       zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
       basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-      if(basis<0.0) basis=0.0; //rounding error
+      if(basis<0) basis=0; //rounding error
       get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
 
       for(y=0; y<referenceImage->ny; ++y){
 
          yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
          basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-         if(basis<0.0) basis=0.0; //rounding error
+         if(basis<0) basis=0; //rounding error
          get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
 
          for(x=0; x<referenceImage->nx; ++x){
 
             xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
             basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-            if(basis<0.0) basis=0.0; //rounding error
+            if(basis<0) basis=0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index b587175e..16df2f9f 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -137,7 +137,7 @@ void reg_matrixMultiply(T *mat1,
     {
         for (size_t i = 0; i < resDim[0]; ++i)
         {
-            double sum = 0.0;
+            double sum = 0;
             for (size_t k = 0; k < dim1[1]; ++k)
             {
                 sum += mat1[k * dim1[0] + i] * mat2[j * dim2[0] + k];
@@ -715,7 +715,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
     const int maxsteps = 24;  // certainly wont need that many.
     int k0, k1, k2;
     float o[3], m[3];
-    float q[4] = { 0.0, 0.0, 0.0, 1.0 };
+    float q[4] = { 0, 0, 0, 1 };
     float jr[4];
     float sqw, sqx, sqy, sqz;
     float tmp1, tmp2, mq;
@@ -774,12 +774,12 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
         k0 = (m[0] > m[1] && m[0] > m[2]) ? 0 : (m[1] > m[2]) ? 1 : 2; // index of largest element of offdiag
         k1 = (k0 + 1) % 3;
         k2 = (k0 + 2) % 3;
-        if (o[k0] == 0.0)
+        if (o[k0] == 0)
         {
             break;                          // diagonal already
         }
         thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0*o[k0]);
-        sgn = (thet > 0.0) ? 1.0 : -1.0;
+        sgn = (thet > 0) ? 1 : -1;
         thet *= sgn;                      // make it positive
         t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet*thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1))
         c = 1.0 / sqrt(t*t + 1.0);        //  c= 1/(t^2+1) , t=s/c
@@ -787,7 +787,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
         {
             break;                          // no room for improvement - reached machine precision.
         }
-        jr[0] = jr[1] = jr[2] = jr[3] = 0.0;
+        jr[0] = jr[1] = jr[2] = jr[3] = 0;
         jr[k0] = sgn*sqrt((1.0 - c) / 2.0);    // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2)
         jr[k0] *= -1.0;                     // since our quat-to-matrix convention was for v*M instead of M*v
         jr[3] = sqrt(1.0f - jr[k0] * jr[k0]);
@@ -863,8 +863,8 @@ void reg_mat44_eye(mat44 *mat)
 /* *************************************************************** */
 float reg_mat44_norm_inf(mat44 const* mat)
 {
-    float maxval = 0.0;
-    float newval = 0.0;
+    float maxval = 0;
+    float newval = 0;
     for (int i = 0; i < 4; i++)
     {
         for (int j = 0; j < 4; j++)
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index a4cf2291..0282b157 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -9,11 +9,9 @@
 
 #include "_reg_tools.h"
 #include <time.h>
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+
 /// @brief Class common to all measure of similarity classes
-class reg_measure
-{
+class reg_measure {
 public:
    /// @brief Set the pointers to be ussed by the measure object
    void InitialiseMeasure(nifti_image *refImgPtr,
@@ -26,44 +24,45 @@ class reg_measure
                           int *maskFloPtr = nullptr,
                           nifti_image *warRefImgPtr = nullptr,
                           nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr)
-   {
-      this->isSymmetric=false;
-      this->referenceImagePointer=refImgPtr;
-      this->referenceTimePoint=this->referenceImagePointer->nt;
-      this->floatingImagePointer=floImgPtr;
-      this->referenceMaskPointer=maskRefPtr;
-      this->warpedFloatingImagePointer=warFloImgPtr;
-      this->warpedFloatingGradientImagePointer=warFloGraPtr;
-      this->forwardVoxelBasedGradientImagePointer=forVoxBasedGraPtr;
-      this->forwardLocalWeightSimImagePointer=localWeightSimPtr;
-      if(maskFloPtr != nullptr && warRefImgPtr!=nullptr && warRefGraPtr!=nullptr && bckVoxBasedGraPtr!=nullptr) {
-         this->isSymmetric=true;
-         this->floatingMaskPointer=maskFloPtr;
-         this->warpedReferenceImagePointer=warRefImgPtr;
-         this->warpedReferenceGradientImagePointer=warRefGraPtr;
-         this->backwardVoxelBasedGradientImagePointer=bckVoxBasedGraPtr;
-      }
-      else {
-          this->floatingMaskPointer=nullptr;
-          this->warpedReferenceImagePointer=nullptr;
-          this->warpedReferenceGradientImagePointer=nullptr;
-          this->backwardVoxelBasedGradientImagePointer=nullptr;
+                          nifti_image *bckVoxBasedGraPtr = nullptr) {
+      this->isSymmetric = false;
+      this->referenceImagePointer = refImgPtr;
+      this->referenceTimePoint = this->referenceImagePointer->nt;
+      this->floatingImagePointer = floImgPtr;
+      this->referenceMaskPointer = maskRefPtr;
+      this->warpedFloatingImagePointer = warFloImgPtr;
+      this->warpedFloatingGradientImagePointer = warFloGraPtr;
+      this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr;
+      this->forwardLocalWeightSimImagePointer = localWeightSimPtr;
+      if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) {
+         this->isSymmetric = true;
+         this->floatingMaskPointer = maskFloPtr;
+         this->warpedReferenceImagePointer = warRefImgPtr;
+         this->warpedReferenceGradientImagePointer = warRefGraPtr;
+         this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr;
+      } else {
+         this->floatingMaskPointer = nullptr;
+         this->warpedReferenceImagePointer = nullptr;
+         this->warpedReferenceGradientImagePointer = nullptr;
+         this->backwardVoxelBasedGradientImagePointer = nullptr;
       }
 #ifndef NDEBUG
       printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
 #endif
    }
+
    /// @brief Returns the registration measure of similarity value
    virtual double GetSimilarityMeasureValue() = 0;
+
    /// @brief Compute the voxel based measure of similarity gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint){
-      if(current_timepoint<0 || current_timepoint>=this->referenceImagePointer->nt){
+   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+      if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) {
          reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
          reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
          reg_exit();
       }
    }
+
    /// @brief Here
    virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {}
 
@@ -82,7 +81,7 @@ class reg_measure
    virtual int* GetReferenceMask(void) {
       return this->referenceMaskPointer;
    }
-/************************************************************************/
+
 protected:
    nifti_image *referenceImagePointer;
    int *referenceMaskPointer;
@@ -98,18 +97,16 @@ class reg_measure
    nifti_image *warpedReferenceGradientImagePointer;
    nifti_image *backwardVoxelBasedGradientImagePointer;
 
-   double timePointWeight[255];
+   double timePointWeight[255] = {0};
    int referenceTimePoint;
+
    /// @brief Measure class constructor
-   reg_measure()
-   {
-      memset(this->timePointWeight,0,255*sizeof(double) );
+   reg_measure() {
 #ifndef NDEBUG
       printf("[NiftyReg DEBUG] reg_measure constructor called\n");
 #endif
    }
-   /// @brief Measure class desstructor
+
+   /// @brief Measure class destructor
    virtual ~reg_measure() {}
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 0601cdea..f5feaec0 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -12,400 +12,386 @@
 
 #include "_reg_mind.h"
 
-/* *************************************************************** */
+ /* *************************************************************** */
 template <class DTYPE>
 void ShiftImage(nifti_image* inputImgPtr,
                 nifti_image* shiftedImgPtr,
                 int *maskPtr,
                 int tx,
                 int ty,
-                int tz)
-{
-   DTYPE* inputData = static_cast<DTYPE*> (inputImgPtr->data);
-   DTYPE* shiftImageData = static_cast<DTYPE*> (shiftedImgPtr->data);
+                int tz) {
+    DTYPE* inputData = static_cast<DTYPE*>(inputImgPtr->data);
+    DTYPE* shiftImageData = static_cast<DTYPE*>(shiftedImgPtr->data);
 
-   int currentIndex;
-   int shiftedIndex;
+    int currentIndex;
+    int shiftedIndex;
 
-   int x, y, z, old_x, old_y, old_z;
+    int x, y, z, old_x, old_y, old_z;
 
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \
-   maskPtr, tx, ty, tz) \
-   private(x, y, z, old_x, old_y, old_z, shiftedIndex, \
-   currentIndex)
+    shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \
+    maskPtr, tx, ty, tz) \
+    private(x, y, z, old_x, old_y, old_z, shiftedIndex, \
+    currentIndex)
 #endif
-   for (z=0;z<shiftedImgPtr->nz;z++) {
-      currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny;
-      old_z = z-tz;
-      for (y=0;y<shiftedImgPtr->ny;y++) {
-         old_y = y-ty;
-         for (x=0;x<shiftedImgPtr->nx;x++) {
-            old_x = x-tx;
-            if(old_x>-1 && old_x<inputImgPtr->nx &&
-                  old_y>-1 && old_y<inputImgPtr->ny &&
-                  old_z>-1 && old_z<inputImgPtr->nz){
-               shiftedIndex = (old_z*inputImgPtr->ny+old_y)*inputImgPtr->nx+old_x;
-               if(maskPtr[shiftedIndex]>-1) {
-                  shiftImageData[currentIndex]=inputData[shiftedIndex];
-               } // mask is not defined
-               else{
-                  //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
-                   shiftImageData[currentIndex]=0.0;
-               }
-            } // outside of the image
-            else{
-               //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
-                shiftImageData[currentIndex]=0.0;
+    for (z = 0; z < shiftedImgPtr->nz; z++) {
+        currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny;
+        old_z = z - tz;
+        for (y = 0; y < shiftedImgPtr->ny; y++) {
+            old_y = y - ty;
+            for (x = 0; x < shiftedImgPtr->nx; x++) {
+                old_x = x - tx;
+                if (old_x > -1 && old_x<inputImgPtr->nx &&
+                    old_y>-1 && old_y<inputImgPtr->ny &&
+                    old_z>-1 && old_z < inputImgPtr->nz) {
+                    shiftedIndex = (old_z * inputImgPtr->ny + old_y) * inputImgPtr->nx + old_x;
+                    if (maskPtr[shiftedIndex] > -1) {
+                        shiftImageData[currentIndex] = inputData[shiftedIndex];
+                    } // mask is not defined
+                    else {
+                        //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
+                        shiftImageData[currentIndex] = 0;
+                    }
+                } // outside of the image
+                else {
+                    //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
+                    shiftImageData[currentIndex] = 0;
+                }
+                currentIndex++;
             }
-            currentIndex++;
-         }
-      }
-   }
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-void GetMINDImageDesciptor_core(nifti_image* inputImage,
+void GetMINDImageDescriptor_core(nifti_image* inputImage,
                                 nifti_image* MINDImage,
                                 int *maskPtr,
                                 int descriptorOffset,
-                                int current_timepoint)
-{
+                                int current_timepoint) {
 #ifdef WIN32
-   long voxelNumber = (long)inputImage->nx *
-         inputImage->ny * inputImage->nz;
-   long voxelIndex;
+    long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz);
+    long voxelIndex;
 #else
-   size_t voxelNumber = (size_t)inputImage->nx *
-         inputImage->ny * inputImage->nz;
-   size_t voxelIndex;
+    size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz);
+    size_t voxelIndex;
 #endif
 
-   // Create a pointer to the descriptor image
-   DTYPE* MINDImgDataPtr = static_cast<DTYPE *>(MINDImage->data);
-
-   // Allocate an image to store the current timepoint reference image
-   nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
-   currentInputImage->ndim=currentInputImage->dim[0]=inputImage->nz>1?3:2;
-   currentInputImage->nt=currentInputImage->dim[4]=1;
-   currentInputImage->nvox=voxelNumber;
-   DTYPE *inputImagePtr = static_cast<DTYPE *>(inputImage->data);
-   currentInputImage->data = static_cast<void *>(&inputImagePtr[current_timepoint*voxelNumber]);
-
-   // Allocate an image to store the mean image
-   nifti_image *meanImage = nifti_copy_nim_info(currentInputImage);
-   meanImage->data=(void *)calloc(meanImage->nvox,meanImage->nbyper);
-   DTYPE* meanImgDataPtr = static_cast<DTYPE *>(meanImage->data);
-
-   // Allocate an image to store the shifted image
-   nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-   shiftedImage->data = (void *)malloc(shiftedImage->nvox*shiftedImage->nbyper);
-
-   // Allocation of the difference image
-   nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-   diff_image->data = (void *) malloc(diff_image->nvox*diff_image->nbyper);
-
-   // Define the sigma for the convolution
-   float sigma = -0.5;// negative value denotes voxel width
-
-   //2D version
-   int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4;
-   int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset,  0, 0,  0, 0};
-   int RSampling3D_y[6] = {0,  0, -descriptorOffset, descriptorOffset,  0, 0};
-   int RSampling3D_z[6] = {0,  0,  0, 0, -descriptorOffset, descriptorOffset};
-
-   for(int i=0;i<samplingNbr;i++) {
-      ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
-                        RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-      reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
-      reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
-      reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
-      reg_tools_addImageToImage(meanImage, diff_image, meanImage);
-
-      // Store the current descriptor
-      unsigned int index = i * diff_image->nvox;
-      memcpy(&MINDImgDataPtr[index], diff_image->data,
-             diff_image->nbyper * diff_image->nvox);
-   }
-   // Compute the mean over the number of sample
-   reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr);
-
-   // Compute the MIND desccriptor
-   int mindIndex;
-   DTYPE meanValue, max_desc, descValue;
+    // Create a pointer to the descriptor image
+    DTYPE* MINDImgDataPtr = static_cast<DTYPE*>(MINDImage->data);
+
+    // Allocate an image to store the current timepoint reference image
+    nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
+    currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
+    currentInputImage->nt = currentInputImage->dim[4] = 1;
+    currentInputImage->nvox = voxelNumber;
+    DTYPE *inputImagePtr = static_cast<DTYPE*>(inputImage->data);
+    currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
+
+    // Allocate an image to store the mean image
+    nifti_image *meanImage = nifti_copy_nim_info(currentInputImage);
+    meanImage->data = (void*)calloc(meanImage->nvox, meanImage->nbyper);
+    DTYPE* meanImgDataPtr = static_cast<DTYPE*>(meanImage->data);
+
+    // Allocate an image to store the shifted image
+    nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
+    shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper);
+
+    // Allocation of the difference image
+    nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
+    diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper);
+
+    // Define the sigma for the convolution
+    float sigma = -0.5;// negative value denotes voxel width
+
+    //2D version
+    int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4;
+    int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset, 0, 0, 0, 0};
+    int RSampling3D_y[6] = {0, 0, -descriptorOffset, descriptorOffset, 0, 0};
+    int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset};
+
+    for (int i = 0; i < samplingNbr; i++) {
+        ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
+                          RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
+        reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
+        reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
+        reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
+        reg_tools_addImageToImage(meanImage, diff_image, meanImage);
+
+        // Store the current descriptor
+        unsigned int index = i * diff_image->nvox;
+        memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox);
+    }
+    // Compute the mean over the number of sample
+    reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr);
+
+    // Compute the MIND descriptor
+    int mindIndex;
+    DTYPE meanValue, max_desc, descValue;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \
-   MINDImgDataPtr) \
-   private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
+    shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \
+    MINDImgDataPtr) \
+    private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
 #endif
-   for(voxelIndex=0;voxelIndex<voxelNumber;voxelIndex++) {
-
-      if(maskPtr[voxelIndex]>-1){
-         // Get the mean value for the current voxel
-         meanValue = meanImgDataPtr[voxelIndex];
-         if(meanValue == 0) {
-            meanValue = std::numeric_limits<DTYPE>::epsilon();
-         }
-         max_desc = 0;
-         mindIndex=voxelIndex;
-         for(int t=0;t<samplingNbr;t++) {
-            descValue = (DTYPE)exp(-MINDImgDataPtr[mindIndex]/meanValue);
-            MINDImgDataPtr[mindIndex] = descValue;
-            max_desc = (std::max)(max_desc, descValue);
-            mindIndex+=voxelNumber;
-         }
-
-         mindIndex=voxelIndex;
-         for(int t=0;t<samplingNbr;t++) {
-            descValue = MINDImgDataPtr[mindIndex];
-            MINDImgDataPtr[mindIndex] = descValue/max_desc;
-            mindIndex+=voxelNumber;
-         }
-      } // mask
-   } // voxIndex
-   // Mr Propre
-   nifti_image_free(diff_image);
-   nifti_image_free(shiftedImage);
-   nifti_image_free(meanImage);
-   currentInputImage->data=nullptr;
-   nifti_image_free(currentInputImage);
+    for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
+
+        if (maskPtr[voxelIndex] > -1) {
+            // Get the mean value for the current voxel
+            meanValue = meanImgDataPtr[voxelIndex];
+            if (meanValue == 0) {
+                meanValue = std::numeric_limits<DTYPE>::epsilon();
+            }
+            max_desc = 0;
+            mindIndex = voxelIndex;
+            for (int t = 0; t < samplingNbr; t++) {
+                descValue = (DTYPE)exp(-MINDImgDataPtr[mindIndex] / meanValue);
+                MINDImgDataPtr[mindIndex] = descValue;
+                max_desc = (std::max)(max_desc, descValue);
+                mindIndex += voxelNumber;
+            }
+
+            mindIndex = voxelIndex;
+            for (int t = 0; t < samplingNbr; t++) {
+                descValue = MINDImgDataPtr[mindIndex];
+                MINDImgDataPtr[mindIndex] = descValue / max_desc;
+                mindIndex += voxelNumber;
+            }
+        } // mask
+    } // voxIndex
+    // Mr Propre
+    nifti_image_free(diff_image);
+    nifti_image_free(shiftedImage);
+    nifti_image_free(meanImage);
+    currentInputImage->data = nullptr;
+    nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
-void GetMINDImageDesciptor(nifti_image* inputImgPtr,
+void GetMINDImageDescriptor(nifti_image* inputImgPtr,
                            nifti_image* MINDImgPtr,
                            int *maskPtr,
                            int descriptorOffset,
                            int current_timepoint) {
 #ifndef NDEBUG
-   reg_print_fct_debug("GetMINDImageDesciptor()");
+    reg_print_fct_debug("GetMINDImageDescriptor()");
 #endif
-   if(inputImgPtr->datatype != MINDImgPtr->datatype) {
-      reg_print_fct_error("reg_mind -- GetMINDImageDesciptor");
-      reg_print_msg_error("The input image and the MIND image must have the same datatype !");
-      reg_exit();
-   }
-
-   switch (inputImgPtr->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      GetMINDImageDesciptor_core<float>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      GetMINDImageDesciptor_core<double>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
-      break;
-   default:
-      reg_print_fct_error("GetMINDImageDesciptor");
-      reg_print_msg_error("Input image datatype not supported");
-      reg_exit();
-      break;
-   }
+    if (inputImgPtr->datatype != MINDImgPtr->datatype) {
+        reg_print_fct_error("reg_mind -- GetMINDImageDescriptor");
+        reg_print_msg_error("The input image and the MIND image must have the same datatype !");
+        reg_exit();
+    }
+
+    switch (inputImgPtr->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        GetMINDImageDescriptor_core<float>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        GetMINDImageDescriptor_core<double>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        break;
+    default:
+        reg_print_fct_error("GetMINDImageDescriptor");
+        reg_print_msg_error("Input image datatype not supported");
+        reg_exit();
+        break;
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-void GetMINDSSCImageDesciptor_core(nifti_image* inputImage,
+void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
                                    nifti_image* MINDSSCImage,
                                    int *maskPtr,
                                    int descriptorOffset,
-                                   int current_timepoint)
-{
+                                   int current_timepoint) {
 
 #ifdef WIN32
-   long voxelNumber = (long)inputImage->nx *
-         inputImage->ny * inputImage->nz;
-   long voxelIndex;
+    long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz);
+    long voxelIndex;
 #else
-   size_t voxelNumber = (size_t)inputImage->nx *
-         inputImage->ny * inputImage->nz;
-   size_t voxelIndex;
+    size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz);
+    size_t voxelIndex;
 #endif
 
-   // Create a pointer to the descriptor image
-   DTYPE* MINDSSCImgDataPtr = static_cast<DTYPE *>(MINDSSCImage->data);
-
-   // Allocate an image to store the current timepoint reference image
-   nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
-   currentInputImage->ndim=currentInputImage->dim[0]=inputImage->nz>1?3:2;
-   currentInputImage->nt=currentInputImage->dim[4]=1;
-   currentInputImage->nvox=voxelNumber;
-   DTYPE *inputImagePtr = static_cast<DTYPE *>(inputImage->data);
-   currentInputImage->data = static_cast<void *>(&inputImagePtr[current_timepoint*voxelNumber]);
-
-   // Allocate an image to store the mean image
-   nifti_image *mean_img = nifti_copy_nim_info(currentInputImage);
-   mean_img->data=(void *)calloc(mean_img->nvox,mean_img->nbyper);
-   DTYPE* meanImgDataPtr = static_cast<DTYPE *>(mean_img->data);
-
-   // Allocate an image to store the warped image
-   nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-   shiftedImage->data = (void *)malloc(shiftedImage->nvox*shiftedImage->nbyper);
-
-   // Define the sigma for the convolution
-   float sigma = -0.5;// negative value denotes voxel width
-   //float sigma = -1.0;// negative value denotes voxel width
-
-   //2D version
-   int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2;
-   int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4;
-
-   // Allocation of the difference image
-   //std::vector<nifti_image *> vectNiftiImage;
-   //for(int i=0;i<samplingNbr;i++) {
-   nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-   diff_image->data = (void *) malloc(diff_image->nvox*diff_image->nbyper);
-   int *mask_diff_image = (int *)calloc(diff_image->nvox, sizeof(int));
-
-   nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage);
-   diff_imageShifted->data = (void *) malloc(diff_imageShifted->nvox*diff_imageShifted->nbyper);
-
-   int RSampling3D_x[6] = {+descriptorOffset,+descriptorOffset,-descriptorOffset,+0,+descriptorOffset,+0};
-   int RSampling3D_y[6] = {+descriptorOffset,-descriptorOffset,+0,-descriptorOffset,+0,+descriptorOffset};
-   int RSampling3D_z[6] = {+0,+0,+descriptorOffset,+descriptorOffset,+descriptorOffset,+descriptorOffset};
-
-   int tx[12]={-descriptorOffset,+0,-descriptorOffset,+0,+0,+descriptorOffset,+0,+0,+0,-descriptorOffset,+0,+0};
-   int ty[12]={+0,-descriptorOffset,+0,+descriptorOffset,+0,+0,+0,+descriptorOffset,+0,+0,+0,-descriptorOffset};
-   int tz[12]={+0,+0,+0,+0,-descriptorOffset,+0,-descriptorOffset,+0,-descriptorOffset,+0,-descriptorOffset,+0};
-   int compteurId = 0;
-
-   for(int i=0;i<samplingNbr;i++) {
-      ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
-                        RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-      reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
-      reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
-      reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
-
-      for(int j=0;j<2;j++){
-
-         ShiftImage<DTYPE>(diff_image, diff_imageShifted, mask_diff_image,
-                           tx[compteurId], ty[compteurId], tz[compteurId]);
-
-         reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
-         // Store the current descriptor
-         unsigned int index = compteurId * diff_imageShifted->nvox;
-         memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data,
-                diff_imageShifted->nbyper * diff_imageShifted->nvox);
-         compteurId++;
-      }
-   }
-   // Compute the mean over the number of sample
-   reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor);
-
-   // Compute the MINDSSC desccriptor
-   int mindIndex;
-   DTYPE meanValue, max_desc, descValue;
+    // Create a pointer to the descriptor image
+    DTYPE* MINDSSCImgDataPtr = static_cast<DTYPE*>(MINDSSCImage->data);
+
+    // Allocate an image to store the current timepoint reference image
+    nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
+    currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
+    currentInputImage->nt = currentInputImage->dim[4] = 1;
+    currentInputImage->nvox = voxelNumber;
+    DTYPE *inputImagePtr = static_cast<DTYPE*>(inputImage->data);
+    currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
+
+    // Allocate an image to store the mean image
+    nifti_image *mean_img = nifti_copy_nim_info(currentInputImage);
+    mean_img->data = (void*)calloc(mean_img->nvox, mean_img->nbyper);
+    DTYPE* meanImgDataPtr = static_cast<DTYPE*>(mean_img->data);
+
+    // Allocate an image to store the warped image
+    nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
+    shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper);
+
+    // Define the sigma for the convolution
+    float sigma = -0.5;// negative value denotes voxel width
+    //float sigma = -1.0;// negative value denotes voxel width
+
+    //2D version
+    int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2;
+    int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4;
+
+    // Allocation of the difference image
+    //std::vector<nifti_image *> vectNiftiImage;
+    //for(int i=0;i<samplingNbr;i++) {
+    nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
+    diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper);
+    int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int));
+
+    nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage);
+    diff_imageShifted->data = (void*)malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper);
+
+    int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0};
+    int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset};
+    int RSampling3D_z[6] = {+0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset};
+
+    int tx[12] = {-descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0};
+    int ty[12] = {+0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset};
+    int tz[12] = {+0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0};
+    int compteurId = 0;
+
+    for (int i = 0; i < samplingNbr; i++) {
+        ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
+                          RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
+        reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
+        reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
+        reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
+
+        for (int j = 0; j < 2; j++) {
+
+            ShiftImage<DTYPE>(diff_image, diff_imageShifted, mask_diff_image,
+                              tx[compteurId], ty[compteurId], tz[compteurId]);
+
+            reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
+            // Store the current descriptor
+            unsigned int index = compteurId * diff_imageShifted->nvox;
+            memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data,
+                   diff_imageShifted->nbyper * diff_imageShifted->nvox);
+            compteurId++;
+        }
+    }
+    // Compute the mean over the number of sample
+    reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor);
+
+    // Compute the MINDSSC descriptor
+    int mindIndex;
+    DTYPE meanValue, max_desc, descValue;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \
-   MINDSSCImgDataPtr) \
-   private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
+    shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \
+    MINDSSCImgDataPtr) \
+    private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
 #endif
-   for(voxelIndex=0;voxelIndex<voxelNumber;voxelIndex++) {
-
-      if(maskPtr[voxelIndex]>-1){
-         // Get the mean value for the current voxel
-         meanValue = meanImgDataPtr[voxelIndex];
-         if(meanValue == 0) {
-            meanValue = std::numeric_limits<DTYPE>::epsilon();
-         }
-         max_desc = 0;
-         mindIndex=voxelIndex;
-         for(int t=0;t<lengthDescriptor;t++) {
-            descValue = (DTYPE)exp(-MINDSSCImgDataPtr[mindIndex]/meanValue);
-            MINDSSCImgDataPtr[mindIndex] = descValue;
-            max_desc = std::max(max_desc, descValue);
-            mindIndex+=voxelNumber;
-         }
-
-         mindIndex=voxelIndex;
-         for(int t=0;t<lengthDescriptor;t++) {
-            descValue = MINDSSCImgDataPtr[mindIndex];
-            MINDSSCImgDataPtr[mindIndex] = descValue/max_desc;
-            mindIndex+=voxelNumber;
-         }
-      } // mask
-   } // voxIndex
-   // Mr Propre
-   nifti_image_free(diff_imageShifted);
-   free(mask_diff_image);
-   nifti_image_free(diff_image);
-   nifti_image_free(shiftedImage);
-   nifti_image_free(mean_img);
-   currentInputImage->data=nullptr;
-   nifti_image_free(currentInputImage);
+    for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
+
+        if (maskPtr[voxelIndex] > -1) {
+            // Get the mean value for the current voxel
+            meanValue = meanImgDataPtr[voxelIndex];
+            if (meanValue == 0) {
+                meanValue = std::numeric_limits<DTYPE>::epsilon();
+            }
+            max_desc = 0;
+            mindIndex = voxelIndex;
+            for (int t = 0; t < lengthDescriptor; t++) {
+                descValue = (DTYPE)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue);
+                MINDSSCImgDataPtr[mindIndex] = descValue;
+                max_desc = std::max(max_desc, descValue);
+                mindIndex += voxelNumber;
+            }
+
+            mindIndex = voxelIndex;
+            for (int t = 0; t < lengthDescriptor; t++) {
+                descValue = MINDSSCImgDataPtr[mindIndex];
+                MINDSSCImgDataPtr[mindIndex] = descValue / max_desc;
+                mindIndex += voxelNumber;
+            }
+        } // mask
+    } // voxIndex
+    // Mr Propre
+    nifti_image_free(diff_imageShifted);
+    free(mask_diff_image);
+    nifti_image_free(diff_image);
+    nifti_image_free(shiftedImage);
+    nifti_image_free(mean_img);
+    currentInputImage->data = nullptr;
+    nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
-void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr,
+void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr,
                               nifti_image* MINDSSCImgPtr,
                               int *maskPtr,
                               int descriptorOffset,
                               int current_timepoint) {
 #ifndef NDEBUG
-   reg_print_fct_debug("GetMINDSSCImageDesciptor()");
+    reg_print_fct_debug("GetMINDSSCImageDescriptor()");
 #endif
-   if(inputImgPtr->datatype != MINDSSCImgPtr->datatype) {
-      reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDesciptor");
-      reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !");
-      reg_exit();
-   }
-
-   switch (inputImgPtr->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      GetMINDSSCImageDesciptor_core<float>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      GetMINDSSCImageDesciptor_core<double>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
-      break;
-   default:
-      reg_print_fct_error("GetMINDSSCImageDesciptor");
-      reg_print_msg_error("Input image datatype not supported");
-      reg_exit();
-      break;
-   }
+    if (inputImgPtr->datatype != MINDSSCImgPtr->datatype) {
+        reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDescriptor");
+        reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !");
+        reg_exit();
+    }
+
+    switch (inputImgPtr->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        GetMINDSSCImageDescriptor_core<float>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        GetMINDSSCImageDescriptor_core<double>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        break;
+    default:
+        reg_print_fct_error("GetMINDSSCImageDescriptor");
+        reg_print_msg_error("Input image datatype not supported");
+        reg_exit();
+        break;
+    }
 }
 /* *************************************************************** */
-reg_mind::reg_mind()
-   : reg_ssd()
-{
-   memset(this->timePointWeightDescriptor,0,255*sizeof(double) );
-   this->referenceImageDescriptor=nullptr;
-   this->floatingImageDescriptor=nullptr;
-   this->warpedFloatingImageDescriptor=nullptr;
-   this->warpedReferenceImageDescriptor=nullptr;
-   this->mind_type=MIND_TYPE;
-   this->descriptorOffset=1;
+reg_mind::reg_mind(): reg_ssd() {
+    this->referenceImageDescriptor = nullptr;
+    this->floatingImageDescriptor = nullptr;
+    this->warpedFloatingImageDescriptor = nullptr;
+    this->warpedReferenceImageDescriptor = nullptr;
+    this->mind_type = MIND_TYPE;
+    this->descriptorOffset = 1;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_mind constructor called");
+    reg_print_msg_debug("reg_mind constructor called");
 #endif
 }
 /* *************************************************************** */
-void reg_mind::SetDescriptorOffset(int val)
-{
-   this->descriptorOffset = val;
+void reg_mind::SetDescriptorOffset(int val) {
+    this->descriptorOffset = val;
 }
 /* *************************************************************** */
-int reg_mind::GetDescriptorOffset()
-{
-   return this->descriptorOffset;
+int reg_mind::GetDescriptorOffset() {
+    return this->descriptorOffset;
 }
 /* *************************************************************** */
 reg_mind::~reg_mind() {
-   if(this->referenceImageDescriptor != nullptr)
-      nifti_image_free(this->referenceImageDescriptor);
-   this->referenceImageDescriptor = nullptr;
-
-   if(this->warpedFloatingImageDescriptor != nullptr)
-      nifti_image_free(this->warpedFloatingImageDescriptor);
-   this->warpedFloatingImageDescriptor = nullptr;
-
-   if(this->floatingImageDescriptor != nullptr)
-      nifti_image_free(this->floatingImageDescriptor);
-   this->floatingImageDescriptor = nullptr;
-
-   if(this->warpedReferenceImageDescriptor != nullptr)
-      nifti_image_free(this->warpedReferenceImageDescriptor);
-   this->warpedReferenceImageDescriptor = nullptr;
+    if (this->referenceImageDescriptor != nullptr) {
+        nifti_image_free(this->referenceImageDescriptor);
+        this->referenceImageDescriptor = nullptr;
+    }
+    if (this->warpedFloatingImageDescriptor != nullptr) {
+        nifti_image_free(this->warpedFloatingImageDescriptor);
+        this->warpedFloatingImageDescriptor = nullptr;
+    }
+    if (this->floatingImageDescriptor != nullptr) {
+        nifti_image_free(this->floatingImageDescriptor);
+        this->floatingImageDescriptor = nullptr;
+    }
+    if (this->warpedReferenceImageDescriptor != nullptr) {
+        nifti_image_free(this->warpedReferenceImageDescriptor);
+        this->warpedReferenceImageDescriptor = nullptr;
+    }
 }
 /* *************************************************************** */
 void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
@@ -418,417 +404,384 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
                                  int *maskFloPtr,
                                  nifti_image *warRefImgPtr,
                                  nifti_image *warRefGraPtr,
-                                 nifti_image *bckVoxBasedGraPtr)
-{
-   // Set the pointers using the parent class function
-   reg_ssd::InitialiseMeasure(refImgPtr,
-                              floImgPtr,
-                              maskRefPtr,
-                              warFloImgPtr,
-                              warFloGraPtr,
-                              forVoxBasedGraPtr,
-                              forwardLocalWeightPtr,
-                              maskFloPtr,
-                              warRefImgPtr,
-                              warRefGraPtr,
-                              bckVoxBasedGraPtr);
-
-   this->discriptor_number = 0;
-   if(this->mind_type==MIND_TYPE){
-      discriptor_number=this->referenceImagePointer->nz>1?6:4;
-   }
-   else if(this->mind_type==MINDSSC_TYPE){
-      discriptor_number=this->referenceImagePointer->nz>1?12:4;
-
-   }
-   // Initialise the reference descriptor
-   this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
-   this->referenceImageDescriptor->dim[0]=this->referenceImageDescriptor->ndim=4;
-   this->referenceImageDescriptor->dim[4]=this->referenceImageDescriptor->nt=this->discriptor_number;
-   this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx*
-         this->referenceImageDescriptor->ny*
-         this->referenceImageDescriptor->nz*
-         this->referenceImageDescriptor->nt;
-   this->referenceImageDescriptor->data=(void *)malloc(this->referenceImageDescriptor->nvox*
-                                                       this->referenceImageDescriptor->nbyper);
-   // Initialise the warped floating descriptor
-   this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
-   this->warpedFloatingImageDescriptor->dim[0]=this->warpedFloatingImageDescriptor->ndim=4;
-   this->warpedFloatingImageDescriptor->dim[4]=this->warpedFloatingImageDescriptor->nt=this->discriptor_number;
-   this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx*
-         this->warpedFloatingImageDescriptor->ny*
-         this->warpedFloatingImageDescriptor->nz*
-         this->warpedFloatingImageDescriptor->nt;
-   this->warpedFloatingImageDescriptor->data=(void *)malloc(this->warpedFloatingImageDescriptor->nvox*
-                                                            this->warpedFloatingImageDescriptor->nbyper);
-
-   if(this->isSymmetric) {
-      if(this->floatingImagePointer->nt>1 || this->warpedReferenceImagePointer->nt>1){
-         reg_print_msg_error("reg_mind does not support multiple time point image");
-         reg_exit();
-      }
-      // Initialise the floating descriptor
-      this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
-      this->floatingImageDescriptor->dim[0]=this->floatingImageDescriptor->ndim=4;
-      this->floatingImageDescriptor->dim[4]=this->floatingImageDescriptor->nt=this->discriptor_number;
-      this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx*
-            this->floatingImageDescriptor->ny*
-            this->floatingImageDescriptor->nz*
+                                 nifti_image *bckVoxBasedGraPtr) {
+    // Set the pointers using the parent class function
+    reg_ssd::InitialiseMeasure(refImgPtr,
+                               floImgPtr,
+                               maskRefPtr,
+                               warFloImgPtr,
+                               warFloGraPtr,
+                               forVoxBasedGraPtr,
+                               forwardLocalWeightPtr,
+                               maskFloPtr,
+                               warRefImgPtr,
+                               warRefGraPtr,
+                               bckVoxBasedGraPtr);
+
+    this->descriptor_number = 0;
+    if (this->mind_type == MIND_TYPE) {
+        descriptor_number = this->referenceImagePointer->nz > 1 ? 6 : 4;
+    } else if (this->mind_type == MINDSSC_TYPE) {
+        descriptor_number = this->referenceImagePointer->nz > 1 ? 12 : 4;
+
+    }
+    // Initialise the reference descriptor
+    this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
+    this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4;
+    this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number;
+    this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx *
+        this->referenceImageDescriptor->ny *
+        this->referenceImageDescriptor->nz *
+        this->referenceImageDescriptor->nt;
+    this->referenceImageDescriptor->data = (void*)malloc(this->referenceImageDescriptor->nvox *
+                                                         this->referenceImageDescriptor->nbyper);
+    // Initialise the warped floating descriptor
+    this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
+    this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
+    this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number;
+    this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx *
+        this->warpedFloatingImageDescriptor->ny *
+        this->warpedFloatingImageDescriptor->nz *
+        this->warpedFloatingImageDescriptor->nt;
+    this->warpedFloatingImageDescriptor->data = (void*)malloc(this->warpedFloatingImageDescriptor->nvox *
+                                                              this->warpedFloatingImageDescriptor->nbyper);
+
+    if (this->isSymmetric) {
+        if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) {
+            reg_print_msg_error("reg_mind does not support multiple time point image");
+            reg_exit();
+        }
+        // Initialise the floating descriptor
+        this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
+        this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
+        this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number;
+        this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx *
+            this->floatingImageDescriptor->ny *
+            this->floatingImageDescriptor->nz *
             this->floatingImageDescriptor->nt;
-      this->floatingImageDescriptor->data=(void *)malloc(this->floatingImageDescriptor->nvox*
-                                                         this->floatingImageDescriptor->nbyper);
-      // Initialise the warped floating descriptor
-      this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
-      this->warpedReferenceImageDescriptor->dim[0]=this->warpedReferenceImageDescriptor->ndim=4;
-      this->warpedReferenceImageDescriptor->dim[4]=this->warpedReferenceImageDescriptor->nt=this->discriptor_number;
-      this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx*
-            this->warpedReferenceImageDescriptor->ny*
-            this->warpedReferenceImageDescriptor->nz*
+        this->floatingImageDescriptor->data = (void*)malloc(this->floatingImageDescriptor->nvox *
+                                                            this->floatingImageDescriptor->nbyper);
+        // Initialise the warped floating descriptor
+        this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
+        this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
+        this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number;
+        this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx *
+            this->warpedReferenceImageDescriptor->ny *
+            this->warpedReferenceImageDescriptor->nz *
             this->warpedReferenceImageDescriptor->nt;
-      this->warpedReferenceImageDescriptor->data=(void *)malloc(this->warpedReferenceImageDescriptor->nvox*
-                                                                this->warpedReferenceImageDescriptor->nbyper);
-   }
+        this->warpedReferenceImageDescriptor->data = (void*)malloc(this->warpedReferenceImageDescriptor->nvox *
+                                                                   this->warpedReferenceImageDescriptor->nbyper);
+    }
 
-   for(int i=0;i<referenceImageDescriptor->nt;++i) {
-      this->timePointWeightDescriptor[i]=1.0;
-   }
+    for (int i = 0; i < referenceImageDescriptor->nt; ++i) {
+        this->timePointWeightDescriptor[i] = 1.0;
+    }
 
 #ifndef NDEBUG
-   char text[255];
-   reg_print_msg_debug("reg_mind::InitialiseMeasure().");
-   sprintf(text, "Active time point:");
-   for(int i=0; i<this->referenceImageDescriptor->nt; ++i)
-      if(this->timePointWeightDescriptor[i]>0.0)
-         sprintf(text, "%s %i", text, i);
-   reg_print_msg_debug(text);
+    char text[255];
+    reg_print_msg_debug("reg_mind::InitialiseMeasure().");
+    sprintf(text, "Active time point:");
+    for (int i = 0; i < this->referenceImageDescriptor->nt; ++i)
+        if (this->timePointWeightDescriptor[i] > 0)
+            sprintf(text, "%s %i", text, i);
+    reg_print_msg_debug(text);
 #endif
 }
 /* *************************************************************** */
-double reg_mind::GetSimilarityMeasureValue()
-{
-   double MINDValue=0.;
-   for(int t=0; t<this->referenceImagePointer->nt; ++t){
-      if(this->timePointWeight[t]>0.0){
-         size_t voxelNumber = (size_t)referenceImagePointer->nx *
-               referenceImagePointer->ny * referenceImagePointer->nz;
-         int *combinedMask = (int *)malloc(voxelNumber*sizeof(int));
-         memcpy(combinedMask, this->referenceMaskPointer, voxelNumber*sizeof(int));
-         reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
-         reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
-
-         if(this->mind_type==MIND_TYPE){
-            GetMINDImageDesciptor(this->referenceImagePointer,
-                                  this->referenceImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  t);
-            GetMINDImageDesciptor(this->warpedFloatingImagePointer,
-                                  this->warpedFloatingImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  t);
-         }
-         else if(this->mind_type==MINDSSC_TYPE){
-            GetMINDSSCImageDesciptor(this->referenceImagePointer,
-                                     this->referenceImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     t);
-            GetMINDSSCImageDesciptor(this->warpedFloatingImagePointer,
-                                     this->warpedFloatingImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     t);
-         }
-
-         switch(this->referenceImageDescriptor->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            MINDValue += reg_getSSDValue<float>
-                  (this->referenceImageDescriptor,
-                   this->warpedFloatingImageDescriptor,
-                   this->timePointWeightDescriptor,
-                   nullptr, // HERE TODO this->forwardJacDetImagePointer,
-                   combinedMask,
-                   this->currentValue,
-                   nullptr
-                   );
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            MINDValue += reg_getSSDValue<double>
-                  (this->referenceImageDescriptor,
-                   this->warpedFloatingImageDescriptor,
-                   this->timePointWeightDescriptor,
-                   nullptr, // HERE TODO this->forwardJacDetImagePointer,
-                   combinedMask,
-                   this->currentValue,
-                   nullptr
-                   );
-            break;
-         default:
-            reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
-            reg_print_msg_error("Warped pixel type unsupported");
-            reg_exit();
-         }
-         free(combinedMask);
-
-         // Backward computation
-         if(this->isSymmetric)
-         {
-            voxelNumber = (size_t)floatingImagePointer->nx *
-                  floatingImagePointer->ny * floatingImagePointer->nz;
-            combinedMask = (int *)malloc(voxelNumber*sizeof(int));
-            memcpy(combinedMask, this->floatingMaskPointer, voxelNumber*sizeof(int));
-            reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
-            reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
-
-            if(this->mind_type==MIND_TYPE){
-               GetMINDImageDesciptor(this->floatingImagePointer,
-                                     this->floatingImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     t);
-               GetMINDImageDesciptor(this->warpedReferenceImagePointer,
-                                     this->warpedReferenceImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     t);
-            }
-            else if(this->mind_type==MINDSSC_TYPE){
-               GetMINDSSCImageDesciptor(this->floatingImagePointer,
-                                        this->floatingImageDescriptor,
-                                        combinedMask,
-                                        this->descriptorOffset,
-                                        t);
-               GetMINDSSCImageDesciptor(this->warpedReferenceImagePointer,
-                                        this->warpedReferenceImageDescriptor,
-                                        combinedMask,
-                                        this->descriptorOffset,
-                                        t);
+double reg_mind::GetSimilarityMeasureValue() {
+    double MINDValue = 0.;
+    for (int t = 0; t < this->referenceImagePointer->nt; ++t) {
+        if (this->timePointWeight[t] > 0) {
+            size_t voxelNumber = (size_t)referenceImagePointer->nx *
+                referenceImagePointer->ny * referenceImagePointer->nz;
+            int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
+            memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
+            reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
+            reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
+
+            if (this->mind_type == MIND_TYPE) {
+                GetMINDImageDescriptor(this->referenceImagePointer,
+                                      this->referenceImageDescriptor,
+                                      combinedMask,
+                                      this->descriptorOffset,
+                                      t);
+                GetMINDImageDescriptor(this->warpedFloatingImagePointer,
+                                      this->warpedFloatingImageDescriptor,
+                                      combinedMask,
+                                      this->descriptorOffset,
+                                      t);
+            } else if (this->mind_type == MINDSSC_TYPE) {
+                GetMINDSSCImageDescriptor(this->referenceImagePointer,
+                                         this->referenceImageDescriptor,
+                                         combinedMask,
+                                         this->descriptorOffset,
+                                         t);
+                GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer,
+                                         this->warpedFloatingImageDescriptor,
+                                         combinedMask,
+                                         this->descriptorOffset,
+                                         t);
             }
 
-            switch(this->floatingImageDescriptor->datatype)
-            {
+            switch (this->referenceImageDescriptor->datatype) {
             case NIFTI_TYPE_FLOAT32:
-               MINDValue += reg_getSSDValue<float>
-                     (this->floatingImageDescriptor,
-                      this->warpedReferenceImageDescriptor,
-                      this->timePointWeightDescriptor,
-                      nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                      combinedMask,
-                      this->currentValue,
-                      nullptr
-                      );
-               break;
+                MINDValue += reg_getSSDValue<float>(this->referenceImageDescriptor,
+                                                    this->warpedFloatingImageDescriptor,
+                                                    this->timePointWeightDescriptor,
+                                                    nullptr, // TODO this->forwardJacDetImagePointer,
+                                                    combinedMask,
+                                                    this->currentValue,
+                                                    nullptr);
+                break;
             case NIFTI_TYPE_FLOAT64:
-               MINDValue += reg_getSSDValue<double>
-                     (this->floatingImageDescriptor,
-                      this->warpedReferenceImageDescriptor,
-                      this->timePointWeightDescriptor,
-                      nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                      combinedMask,
-                      this->currentValue,
-                      nullptr
-                      );
-               break;
+                MINDValue += reg_getSSDValue<double>(this->referenceImageDescriptor,
+                                                     this->warpedFloatingImageDescriptor,
+                                                     this->timePointWeightDescriptor,
+                                                     nullptr, // TODO this->forwardJacDetImagePointer,
+                                                     combinedMask,
+                                                     this->currentValue,
+                                                     nullptr);
+                break;
             default:
-               reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
-               reg_print_msg_error("Warped pixel type unsupported");
-               reg_exit();
+                reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
+                reg_print_msg_error("Warped pixel type unsupported");
+                reg_exit();
             }
             free(combinedMask);
-         }
-      }
-   }
-   return MINDValue;// /(double) this->referenceImageDescriptor->nt;
+
+            // Backward computation
+            if (this->isSymmetric) {
+                voxelNumber = (size_t)floatingImagePointer->nx *
+                    floatingImagePointer->ny * floatingImagePointer->nz;
+                combinedMask = (int*)malloc(voxelNumber * sizeof(int));
+                memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
+                reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
+                reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
+
+                if (this->mind_type == MIND_TYPE) {
+                    GetMINDImageDescriptor(this->floatingImagePointer,
+                                          this->floatingImageDescriptor,
+                                          combinedMask,
+                                          this->descriptorOffset,
+                                          t);
+                    GetMINDImageDescriptor(this->warpedReferenceImagePointer,
+                                          this->warpedReferenceImageDescriptor,
+                                          combinedMask,
+                                          this->descriptorOffset,
+                                          t);
+                } else if (this->mind_type == MINDSSC_TYPE) {
+                    GetMINDSSCImageDescriptor(this->floatingImagePointer,
+                                             this->floatingImageDescriptor,
+                                             combinedMask,
+                                             this->descriptorOffset,
+                                             t);
+                    GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer,
+                                             this->warpedReferenceImageDescriptor,
+                                             combinedMask,
+                                             this->descriptorOffset,
+                                             t);
+                }
+
+                switch (this->floatingImageDescriptor->datatype) {
+                case NIFTI_TYPE_FLOAT32:
+                    MINDValue += reg_getSSDValue<float>(this->floatingImageDescriptor,
+                                                        this->warpedReferenceImageDescriptor,
+                                                        this->timePointWeightDescriptor,
+                                                        nullptr, // TODO this->backwardJacDetImagePointer,
+                                                        combinedMask,
+                                                        this->currentValue,
+                                                        nullptr);
+                    break;
+                case NIFTI_TYPE_FLOAT64:
+                    MINDValue += reg_getSSDValue<double>(this->floatingImageDescriptor,
+                                                         this->warpedReferenceImageDescriptor,
+                                                         this->timePointWeightDescriptor,
+                                                         nullptr, // TODO this->backwardJacDetImagePointer,
+                                                         combinedMask,
+                                                         this->currentValue,
+                                                         nullptr);
+                    break;
+                default:
+                    reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
+                    reg_print_msg_error("Warped pixel type unsupported");
+                    reg_exit();
+                }
+                free(combinedMask);
+            }
+        }
+    }
+    return MINDValue;   // (double) this->referenceImageDescriptor->nt;
 }
 /* *************************************************************** */
-void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
-      return;
-
-   // Create a combined mask to ignore masked and undefined values
-   size_t voxelNumber = (size_t)this->referenceImagePointer->nx *
-         this->referenceImagePointer->ny *
-         this->referenceImagePointer->nz;
-   int *combinedMask = (int *)malloc(voxelNumber*sizeof(int));
-   memcpy(combinedMask, this->referenceMaskPointer, voxelNumber*sizeof(int));
-   reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
-   reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
-
-   if(this->mind_type==MIND_TYPE){
-      // Compute the reference image descriptors
-      GetMINDImageDesciptor(this->referenceImagePointer,
-                            this->referenceImageDescriptor,
-                            combinedMask,
-                            this->descriptorOffset,
-                            current_timepoint);
-      // Compute the warped floating image descriptors
-      GetMINDImageDesciptor(this->warpedFloatingImagePointer,
-                            this->warpedFloatingImageDescriptor,
-                            combinedMask,
-                            this->descriptorOffset,
-                            current_timepoint);
-   }
-   else if(this->mind_type==MINDSSC_TYPE){
-      // Compute the reference image descriptors
-      GetMINDSSCImageDesciptor(this->referenceImagePointer,
-                               this->referenceImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               current_timepoint);
-      // Compute the warped floating image descriptors
-      GetMINDSSCImageDesciptor(this->warpedFloatingImagePointer,
-                               this->warpedFloatingImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               current_timepoint);
-   }
-
-
-   for(int desc_index=0; desc_index<this->discriptor_number; ++desc_index){
-      // Compute the warped image descriptors gradient
-       reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor,
-                                    this->warpedFloatingGradientImagePointer,
-                                    combinedMask,
-                                    std::numeric_limits<float>::quiet_NaN(),
-                                    desc_index);
-
-      // Compute the gradient of the ssd for the forward transformation
-      switch(referenceImageDescriptor->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedSSDGradient<float>
-               (this->referenceImageDescriptor,
-                this->warpedFloatingImageDescriptor,
-                this->warpedFloatingGradientImagePointer,
-                this->forwardVoxelBasedGradientImagePointer,
-                nullptr, // no Jacobian required here,
-                combinedMask,
-                desc_index,
-                1.0, //all discriptors given weight of 1
-                nullptr
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedSSDGradient<double>
-               (this->referenceImageDescriptor,
-                this->warpedFloatingImageDescriptor,
-                this->warpedFloatingGradientImagePointer,
-                this->forwardVoxelBasedGradientImagePointer,
-                nullptr, // no Jacobian required here,
-                combinedMask,
-                desc_index,
-                1.0, //all discriptors given weight of 1
-                nullptr
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
-   free(combinedMask);
-
-   // Compute the gradient of the ssd for the backward transformation
-   if(this->isSymmetric)
-   {
-      voxelNumber = (size_t)floatingImagePointer->nx *
+void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+    if (this->timePointWeight[current_timepoint] == 0)
+        return;
+
+    // Create a combined mask to ignore masked and undefined values
+    size_t voxelNumber = (size_t)this->referenceImagePointer->nx *
+        this->referenceImagePointer->ny *
+        this->referenceImagePointer->nz;
+    int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
+    memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
+    reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
+    reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
+
+    if (this->mind_type == MIND_TYPE) {
+        // Compute the reference image descriptors
+        GetMINDImageDescriptor(this->referenceImagePointer,
+                              this->referenceImageDescriptor,
+                              combinedMask,
+                              this->descriptorOffset,
+                              current_timepoint);
+        // Compute the warped floating image descriptors
+        GetMINDImageDescriptor(this->warpedFloatingImagePointer,
+                              this->warpedFloatingImageDescriptor,
+                              combinedMask,
+                              this->descriptorOffset,
+                              current_timepoint);
+    } else if (this->mind_type == MINDSSC_TYPE) {
+        // Compute the reference image descriptors
+        GetMINDSSCImageDescriptor(this->referenceImagePointer,
+                                 this->referenceImageDescriptor,
+                                 combinedMask,
+                                 this->descriptorOffset,
+                                 current_timepoint);
+        // Compute the warped floating image descriptors
+        GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer,
+                                 this->warpedFloatingImageDescriptor,
+                                 combinedMask,
+                                 this->descriptorOffset,
+                                 current_timepoint);
+    }
+
+
+    for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
+        // Compute the warped image descriptors gradient
+        reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor,
+                                     this->warpedFloatingGradientImagePointer,
+                                     combinedMask,
+                                     std::numeric_limits<float>::quiet_NaN(),
+                                     desc_index);
+
+        // Compute the gradient of the ssd for the forward transformation
+        switch (referenceImageDescriptor->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedSSDGradient<float>(this->referenceImageDescriptor,
+                                                this->warpedFloatingImageDescriptor,
+                                                this->warpedFloatingGradientImagePointer,
+                                                this->forwardVoxelBasedGradientImagePointer,
+                                                nullptr, // no Jacobian required here,
+                                                combinedMask,
+                                                desc_index,
+                                                1.0, //all descriptors given weight of 1
+                                                nullptr);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedSSDGradient<double>(this->referenceImageDescriptor,
+                                                 this->warpedFloatingImageDescriptor,
+                                                 this->warpedFloatingGradientImagePointer,
+                                                 this->forwardVoxelBasedGradientImagePointer,
+                                                 nullptr, // no Jacobian required here,
+                                                 combinedMask,
+                                                 desc_index,
+                                                 1.0, //all descriptors given weight of 1
+                                                 nullptr);
+            break;
+        default:
+            reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Unsupported datatype");
+            reg_exit();
+        }
+    }
+    free(combinedMask);
+
+    // Compute the gradient of the ssd for the backward transformation
+    if (this->isSymmetric) {
+        voxelNumber = (size_t)floatingImagePointer->nx *
             floatingImagePointer->ny * floatingImagePointer->nz;
-      combinedMask = (int *)malloc(voxelNumber*sizeof(int));
-      memcpy(combinedMask, this->floatingMaskPointer, voxelNumber*sizeof(int));
-      reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
-      reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
-
-      if(this->mind_type==MIND_TYPE){
-         GetMINDImageDesciptor(this->floatingImagePointer,
-                               this->floatingImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               current_timepoint);
-         GetMINDImageDesciptor(this->warpedReferenceImagePointer,
-                               this->warpedReferenceImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               current_timepoint);
-      }
-      else if(this->mind_type==MINDSSC_TYPE){
-         GetMINDSSCImageDesciptor(this->floatingImagePointer,
+        combinedMask = (int*)malloc(voxelNumber * sizeof(int));
+        memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
+        reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
+        reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
+
+        if (this->mind_type == MIND_TYPE) {
+            GetMINDImageDescriptor(this->floatingImagePointer,
                                   this->floatingImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
                                   current_timepoint);
-         GetMINDSSCImageDesciptor(this->warpedReferenceImagePointer,
+            GetMINDImageDescriptor(this->warpedReferenceImagePointer,
                                   this->warpedReferenceImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
                                   current_timepoint);
-      }
-
-      for(int desc_index=0; desc_index<this->discriptor_number; ++desc_index){
-          reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor,
-                                       this->warpedReferenceGradientImagePointer,
-                                       combinedMask,
-                                       std::numeric_limits<float>::quiet_NaN(),
-                                       desc_index);
-
-         // Compute the gradient of the nmi for the backward transformation
-         switch(floatingImagePointer->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSSDGradient<float>
-                  (this->floatingImageDescriptor,
-                   this->warpedReferenceImageDescriptor,
-                   this->warpedReferenceGradientImagePointer,
-                   this->backwardVoxelBasedGradientImagePointer,
-                   nullptr, // no Jacobian required here,
-                   combinedMask,
-                   desc_index,
-                   1.0, //all discriptors given weight of 1
-                   nullptr
-                   );
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSSDGradient<double>
-                  (this->floatingImageDescriptor,
-                   this->warpedReferenceImageDescriptor,
-                   this->warpedReferenceGradientImagePointer,
-                   this->backwardVoxelBasedGradientImagePointer,
-                   nullptr, // no Jacobian required here,
-                   combinedMask,
-                   desc_index,
-                   1.0, //all discriptors given weight of 1
-                   nullptr
-                   );
-            break;
-         default:
-            reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
-         }
-      }
-      free(combinedMask);
-   }
+        } else if (this->mind_type == MINDSSC_TYPE) {
+            GetMINDSSCImageDescriptor(this->floatingImagePointer,
+                                     this->floatingImageDescriptor,
+                                     combinedMask,
+                                     this->descriptorOffset,
+                                     current_timepoint);
+            GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer,
+                                     this->warpedReferenceImageDescriptor,
+                                     combinedMask,
+                                     this->descriptorOffset,
+                                     current_timepoint);
+        }
+
+        for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
+            reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor,
+                                         this->warpedReferenceGradientImagePointer,
+                                         combinedMask,
+                                         std::numeric_limits<float>::quiet_NaN(),
+                                         desc_index);
+
+            // Compute the gradient of the nmi for the backward transformation
+            switch (floatingImagePointer->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_getVoxelBasedSSDGradient<float>(this->floatingImageDescriptor,
+                                                    this->warpedReferenceImageDescriptor,
+                                                    this->warpedReferenceGradientImagePointer,
+                                                    this->backwardVoxelBasedGradientImagePointer,
+                                                    nullptr, // no Jacobian required here,
+                                                    combinedMask,
+                                                    desc_index,
+                                                    1.0, //all descriptors given weight of 1
+                                                    nullptr);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_getVoxelBasedSSDGradient<double>(this->floatingImageDescriptor,
+                                                     this->warpedReferenceImageDescriptor,
+                                                     this->warpedReferenceGradientImagePointer,
+                                                     this->backwardVoxelBasedGradientImagePointer,
+                                                     nullptr, // no Jacobian required here,
+                                                     combinedMask,
+                                                     desc_index,
+                                                     1.0, //all descriptors given weight of 1
+                                                     nullptr);
+                break;
+            default:
+                reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
+                reg_print_msg_error("Unsupported datatype");
+                reg_exit();
+            }
+        }
+        free(combinedMask);
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
-reg_mindssc::reg_mindssc()
-   : reg_mind()
-{
-   this->mind_type=MINDSSC_TYPE;
+reg_mindssc::reg_mindssc(): reg_mind() {
+    this->mind_type = MINDSSC_TYPE;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_mindssc constructor called");
+    reg_print_msg_debug("reg_mindssc constructor called");
 #endif
 }
 /* *************************************************************** */
-reg_mindssc::~reg_mindssc()
-{
+reg_mindssc::~reg_mindssc() {
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_mindssc desctructor called");
+    reg_print_msg_debug("reg_mindssc destructor called");
 #endif
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 6d2aafa8..8c1c7d7f 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -13,12 +13,8 @@
 #pragma once
 
 #include "_reg_ssd.h"
-//#include "ConvolutionKernel.h"
-//#include "Platform.h"
-#include <math.h>
 #include "_reg_globalTrans.h"
 #include "_reg_resampling.h"
-#include <algorithm>
 
 #define MIND_TYPE 0
 #define MINDSSC_TYPE 1
@@ -26,8 +22,7 @@
 /* *************************************************************** */
 /* *************************************************************** */
 /// @brief MIND measure of similarity class
-class reg_mind : public reg_ssd
-{
+class reg_mind: public reg_ssd {
 public:
     /// @brief reg_mind class constructor
     reg_mind();
@@ -56,20 +51,19 @@ class reg_mind : public reg_ssd
     virtual int GetDescriptorOffset();
 
 protected:
-   nifti_image *referenceImageDescriptor;
-   nifti_image *floatingImageDescriptor;
-   nifti_image *warpedReferenceImageDescriptor;
-   nifti_image *warpedFloatingImageDescriptor;
-   double timePointWeightDescriptor[255];
+    nifti_image *referenceImageDescriptor;
+    nifti_image *floatingImageDescriptor;
+    nifti_image *warpedReferenceImageDescriptor;
+    nifti_image *warpedFloatingImageDescriptor;
+    double timePointWeightDescriptor[255] = {0};
 
-   int descriptorOffset;
-   int mind_type;
-   int discriptor_number;
+    int descriptorOffset;
+    int mind_type;
+    int descriptor_number;
 };
 /* *************************************************************** */
 /// @brief MIND-SSC measure of similarity class
-class reg_mindssc : public reg_mind
-{
+class reg_mindssc: public reg_mind {
 public:
     /// @brief reg_mind class constructor
     reg_mindssc();
@@ -79,14 +73,14 @@ class reg_mindssc : public reg_mind
 /* *************************************************************** */
 
 extern "C++"
-void GetMINDImageDesciptor(nifti_image* inputImgPtr,
-                           nifti_image* MINDImgPtr,
+void GetMINDImageDescriptor(nifti_image *inputImgPtr,
+                           nifti_image *MINDImgPtr,
                            int *mask,
                            int descriptorOffset,
                            int current_timepoint);
 extern "C++"
-void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr,
-                              nifti_image* MINDSSCImgPtr,
+void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr,
+                              nifti_image *MINDSSCImgPtr,
                               int *mask,
                               int descriptorOffset,
                               int current_timepoint);
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index 349eee33..ebce7f4b 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -417,7 +417,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
                         }
                      } else {
                         for(t=0; t<refImage->nt; ++t){
-                           refBlockValue[blockIndex] = 0.0;
+                           refBlockValue[blockIndex] = 0;
                            blockIndex++;
                         }
                      }
@@ -477,7 +477,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
                                  }
                               }else {
                                  for(t=0; t<refImage->nt; ++t){
-                                    neighbourBlockValue[blockIndex] = 0.0;
+                                    neighbourBlockValue[blockIndex] = 0;
                                     blockIndex++;
                                  } //t
                               }
@@ -521,7 +521,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
 
                      edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+
                            cpz*controlPointGridImage->nx*controlPointGridImage->ny+
-                           ngh_index*node_number]=0.0;
+                           ngh_index*node_number]=0;
                      //DEBUG
                      //index_neighbours[cpx+cpy*m1+
                      //        cpz*m1*n1+
@@ -530,7 +530,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
                      //        (cpz+dz[ngh_index])*m1*n1;
                      //edgeWeightMatrix[cpx+cpy*m1+
                      //        cpz*m1*n1+
-                     //        ngh_index*num_vertices]=0.0;
+                     //        ngh_index*num_vertices]=0;
                      //DEBUG
                   }
                }
@@ -740,7 +740,7 @@ void reg_mrf::GetRegularisation()
    for(size_t i=0;i<this->node_number*this->label_nD_num;i++){
       //matrix = discretisedValue (first dimension displacement label, second dim. control point)
       this->regularised_cost[i]=this->discretised_measures[i];
-      message[i]=0.0;
+      message[i]=0;
    }
 
    for(int i=0;i<this->label_nD_num;i++){
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 69fa6050..5c8979a7 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -1,5 +1,5 @@
 /*
- *  _reg_mutualinformation.cpp
+ *  _reg_nmi.cpp
  *
  *
  *  Created by Marc Modat on 25/03/2009.
@@ -12,111 +12,94 @@
 
 #include "_reg_nmi.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
-reg_nmi::reg_nmi()
-   : reg_measure()
-{
-   this->forwardJointHistogramPro=nullptr;
-   this->forwardJointHistogramLog=nullptr;
-   this->forwardEntropyValues=nullptr;
-   this->backwardJointHistogramPro=nullptr;
-   this->backwardJointHistogramLog=nullptr;
-   this->backwardEntropyValues=nullptr;
+ /* *************************************************************** */
+ /* *************************************************************** */
+reg_nmi::reg_nmi(): reg_measure() {
+    this->forwardJointHistogramPro = nullptr;
+    this->forwardJointHistogramLog = nullptr;
+    this->forwardEntropyValues = nullptr;
+    this->backwardJointHistogramPro = nullptr;
+    this->backwardJointHistogramLog = nullptr;
+    this->backwardEntropyValues = nullptr;
 
-   for(int i=0; i<255; ++i)
-   {
-      this->referenceBinNumber[i]=68;
-      this->floatingBinNumber[i]=68;
-   }
+    for (int i = 0; i < 255; ++i) {
+        this->referenceBinNumber[i] = 68;
+        this->floatingBinNumber[i] = 68;
+    }
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi constructor called");
+    reg_print_msg_debug("reg_nmi constructor called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-reg_nmi::~reg_nmi()
-{
-   this->DeallocateHistogram();
+reg_nmi::~reg_nmi() {
+    this->DeallocateHistogram();
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi destructor called");
+    reg_print_msg_debug("reg_nmi destructor called");
 #endif
 }
 /* *************************************************************** */
-void reg_nmi::DeallocateHistogram()
-{
-   int timepoint=this->referenceTimePoint;
-   // Free the joint histograms and the entropy arrays
-   if(this->forwardJointHistogramPro!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->forwardJointHistogramPro[i]!=nullptr)
-            free(this->forwardJointHistogramPro[i]);
-         this->forwardJointHistogramPro[i]=nullptr;
-      }
-      free(this->forwardJointHistogramPro);
-   }
-   this->forwardJointHistogramPro=nullptr;
-   if(this->backwardJointHistogramPro!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->backwardJointHistogramPro[i]!=nullptr)
-            free(this->backwardJointHistogramPro[i]);
-         this->backwardJointHistogramPro[i]=nullptr;
-      }
-      free(this->backwardJointHistogramPro);
-   }
-   this->backwardJointHistogramPro=nullptr;
+void reg_nmi::DeallocateHistogram() {
+    int timepoint = this->referenceTimePoint;
+    // Free the joint histograms and the entropy arrays
+    if (this->forwardJointHistogramPro != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->forwardJointHistogramPro[i] != nullptr)
+                free(this->forwardJointHistogramPro[i]);
+            this->forwardJointHistogramPro[i] = nullptr;
+        }
+        free(this->forwardJointHistogramPro);
+    }
+    this->forwardJointHistogramPro = nullptr;
+    if (this->backwardJointHistogramPro != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->backwardJointHistogramPro[i] != nullptr)
+                free(this->backwardJointHistogramPro[i]);
+            this->backwardJointHistogramPro[i] = nullptr;
+        }
+        free(this->backwardJointHistogramPro);
+    }
+    this->backwardJointHistogramPro = nullptr;
 
-   if(this->forwardJointHistogramLog!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->forwardJointHistogramLog[i]!=nullptr)
-            free(this->forwardJointHistogramLog[i]);
-         this->forwardJointHistogramLog[i]=nullptr;
-      }
-      free(this->forwardJointHistogramLog);
-   }
-   this->forwardJointHistogramLog=nullptr;
-   if(this->backwardJointHistogramLog!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->backwardJointHistogramLog[i]!=nullptr)
-            free(this->backwardJointHistogramLog[i]);
-         this->backwardJointHistogramLog[i]=nullptr;
-      }
-      free(this->backwardJointHistogramLog);
-   }
-   this->backwardJointHistogramLog=nullptr;
+    if (this->forwardJointHistogramLog != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->forwardJointHistogramLog[i] != nullptr)
+                free(this->forwardJointHistogramLog[i]);
+            this->forwardJointHistogramLog[i] = nullptr;
+        }
+        free(this->forwardJointHistogramLog);
+    }
+    this->forwardJointHistogramLog = nullptr;
+    if (this->backwardJointHistogramLog != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->backwardJointHistogramLog[i] != nullptr)
+                free(this->backwardJointHistogramLog[i]);
+            this->backwardJointHistogramLog[i] = nullptr;
+        }
+        free(this->backwardJointHistogramLog);
+    }
+    this->backwardJointHistogramLog = nullptr;
 
-   if(this->forwardEntropyValues!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->forwardEntropyValues[i]!=nullptr)
-            free(this->forwardEntropyValues[i]);
-         this->forwardEntropyValues[i]=nullptr;
-      }
-      free(this->forwardEntropyValues);
-   }
-   this->forwardEntropyValues=nullptr;
-   if(this->backwardEntropyValues!=nullptr)
-   {
-      for(int i=0; i<timepoint; ++i)
-      {
-         if(this->backwardEntropyValues[i]!=nullptr)
-            free(this->backwardEntropyValues[i]);
-         this->backwardEntropyValues[i]=nullptr;
-      }
-      free(this->backwardEntropyValues);
-   }
-   this->backwardEntropyValues=nullptr;
+    if (this->forwardEntropyValues != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->forwardEntropyValues[i] != nullptr)
+                free(this->forwardEntropyValues[i]);
+            this->forwardEntropyValues[i] = nullptr;
+        }
+        free(this->forwardEntropyValues);
+    }
+    this->forwardEntropyValues = nullptr;
+    if (this->backwardEntropyValues != nullptr) {
+        for (int i = 0; i < timepoint; ++i) {
+            if (this->backwardEntropyValues[i] != nullptr)
+                free(this->backwardEntropyValues[i]);
+            this->backwardEntropyValues[i] = nullptr;
+        }
+        free(this->backwardEntropyValues);
+    }
+    this->backwardEntropyValues = nullptr;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi::DeallocateHistogram called");
+    reg_print_msg_debug("reg_nmi::DeallocateHistogram called");
 #endif
 }
 /* *************************************************************** */
@@ -131,133 +114,110 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr)
-{
-   // Set the pointers using the parent class function
-   reg_measure::InitialiseMeasure(refImgPtr,
-                                  floImgPtr,
-                                  maskRefPtr,
-                                  warFloImgPtr,
-                                  warFloGraPtr,
-                                  forVoxBasedGraPtr,
-                                  forwardLocalWeightPtr,
-                                  maskFloPtr,
-                                  warRefImgPtr,
-                                  warRefGraPtr,
-                                  bckVoxBasedGraPtr);
+                                nifti_image *bckVoxBasedGraPtr) {
+    // Set the pointers using the parent class function
+    reg_measure::InitialiseMeasure(refImgPtr,
+                                   floImgPtr,
+                                   maskRefPtr,
+                                   warFloImgPtr,
+                                   warFloGraPtr,
+                                   forVoxBasedGraPtr,
+                                   forwardLocalWeightPtr,
+                                   maskFloPtr,
+                                   warRefImgPtr,
+                                   warRefGraPtr,
+                                   bckVoxBasedGraPtr);
 
-   // Deallocate all allocated arrays
-   this->DeallocateHistogram();
-   // Extract the number of time point
-   int timepoint=this->referenceTimePoint;
-   // Reference and floating are resampled between 2 and bin-3
-   for(int i=0; i<timepoint; ++i)
-   {
-      if(this->timePointWeight[i] > 0.0)
-      {
-         reg_intensityRescale(this->referenceImagePointer,
-                              i,
-                              2.f,
-                              this->referenceBinNumber[i]-3);
-         reg_intensityRescale(this->floatingImagePointer,
-                              i,
-                              2.f,
-                              this->floatingBinNumber[i]-3);
-      }
-   }
-   // Create the joint histograms
-   this->forwardJointHistogramPro=(double**)malloc(255*sizeof(double *));
-   this->forwardJointHistogramLog=(double**)malloc(255*sizeof(double *));
-   this->forwardEntropyValues=(double**)malloc(255*sizeof(double *));
-   if(this->isSymmetric)
-   {
-      this->backwardJointHistogramPro=(double**)malloc(255*sizeof(double *));
-      this->backwardJointHistogramLog=(double**)malloc(255*sizeof(double *));
-      this->backwardEntropyValues=(double**)malloc(255*sizeof(double *));
-   }
-   for(int i=0; i<timepoint; ++i)
-   {
-      if(this->timePointWeight[i] > 0.0)
-      {
-         // Compute the total number of bin
-         this->totalBinNumber[i]=this->referenceBinNumber[i]*this->floatingBinNumber[i] +
-               this->referenceBinNumber[i] + this->floatingBinNumber[i];
-         this->forwardJointHistogramLog[i]=(double *)
-               calloc(this->totalBinNumber[i],sizeof(double));
-         this->forwardJointHistogramPro[i]=(double *)
-               calloc(this->totalBinNumber[i],sizeof(double));
-         this->forwardEntropyValues[i]=(double *)
-               calloc(4,sizeof(double));
-         if(this->isSymmetric)
-         {
-            this->backwardJointHistogramLog[i]=(double *)
-                  calloc(this->totalBinNumber[i],sizeof(double));
-            this->backwardJointHistogramPro[i]=(double *)
-                  calloc(this->totalBinNumber[i],sizeof(double));
-            this->backwardEntropyValues[i]=(double *)
-                  calloc(4,sizeof(double));
-         }
-      }
-      else
-      {
-         this->forwardJointHistogramLog[i]=nullptr;
-         this->forwardJointHistogramPro[i]=nullptr;
-         this->forwardEntropyValues[i]=nullptr;
-         if(this->isSymmetric)
-         {
-            this->backwardJointHistogramLog[i]=nullptr;
-            this->backwardJointHistogramPro[i]=nullptr;
-            this->backwardEntropyValues[i]=nullptr;
-         }
-      }
-   }
+    // Deallocate all allocated arrays
+    this->DeallocateHistogram();
+    // Extract the number of time point
+    int timepoint = this->referenceTimePoint;
+    // Reference and floating are resampled between 2 and bin-3
+    for (int i = 0; i < timepoint; ++i) {
+        if (this->timePointWeight[i] > 0) {
+            reg_intensityRescale(this->referenceImagePointer,
+                                 i,
+                                 2.f,
+                                 this->referenceBinNumber[i] - 3);
+            reg_intensityRescale(this->floatingImagePointer,
+                                 i,
+                                 2.f,
+                                 this->floatingBinNumber[i] - 3);
+        }
+    }
+    // Create the joint histograms
+    this->forwardJointHistogramPro = (double**)malloc(255 * sizeof(double*));
+    this->forwardJointHistogramLog = (double**)malloc(255 * sizeof(double*));
+    this->forwardEntropyValues = (double**)malloc(255 * sizeof(double*));
+    if (this->isSymmetric) {
+        this->backwardJointHistogramPro = (double**)malloc(255 * sizeof(double*));
+        this->backwardJointHistogramLog = (double**)malloc(255 * sizeof(double*));
+        this->backwardEntropyValues = (double**)malloc(255 * sizeof(double*));
+    }
+    for (int i = 0; i < timepoint; ++i) {
+        if (this->timePointWeight[i] > 0) {
+            // Compute the total number of bin
+            this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] +
+                this->referenceBinNumber[i] + this->floatingBinNumber[i];
+            this->forwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+            this->forwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+            this->forwardEntropyValues[i] = (double*)calloc(4, sizeof(double));
+            if (this->isSymmetric) {
+                this->backwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+                this->backwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+                this->backwardEntropyValues[i] = (double*)calloc(4, sizeof(double));
+            }
+        } else {
+            this->forwardJointHistogramLog[i] = nullptr;
+            this->forwardJointHistogramPro[i] = nullptr;
+            this->forwardEntropyValues[i] = nullptr;
+            if (this->isSymmetric) {
+                this->backwardJointHistogramLog[i] = nullptr;
+                this->backwardJointHistogramPro[i] = nullptr;
+                this->backwardEntropyValues[i] = nullptr;
+            }
+        }
+    }
 #ifndef NDEBUG
-   char text[255];
-   reg_print_msg_debug("reg_nmi::InitialiseMeasure().");
-   for (int i = 0; i<this->referenceImagePointer->nt; ++i)
-   {
-      sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-      reg_print_msg_debug(text);
-   }
+    char text[255];
+    reg_print_msg_debug("reg_nmi::InitialiseMeasure().");
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
+        reg_print_msg_debug(text);
+    }
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class PrecisionTYPE>
-PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x)
-{
-   x=fabs(x);
-   PrecisionTYPE value=0.0;
-   if(x<2.0)
-   {
-      if(x<1.0)
-         value = (PrecisionTYPE)(2.0f/3.0f + (0.5f*x-1.0)*x*x);
-      else
-      {
-         x-=2.0f;
-         value = -x*x*x/6.0f;
-      }
-   }
-   return value;
+PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) {
+    x = fabs(x);
+    PrecisionTYPE value = 0;
+    if (x < 2.0) {
+        if (x < 1.0)
+            value = (PrecisionTYPE)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x);
+        else {
+            x -= 2.0f;
+            value = -x * x * x / 6.0f;
+        }
+    }
+    return value;
 }
 /* *************************************************************** */
 template<class PrecisionTYPE>
-PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori)
-{
-   PrecisionTYPE x=fabs(ori);
-   PrecisionTYPE value=0.0;
-   if(x<2.0)
-   {
-      if(x<1.0)
-         value = (PrecisionTYPE)((1.5f*x-2.0)*ori);
-      else
-      {
-         x-=2.0f;
-         value = -0.5f * x * x;
-         if(ori<0.0f) value =-value;
-      }
-   }
-   return value;
+PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) {
+    PrecisionTYPE x = fabs(ori);
+    PrecisionTYPE value = 0;
+    if (x < 2.0) {
+        if (x < 1.0)
+            value = (PrecisionTYPE)((1.5f * x - 2.0) * ori);
+        else {
+            x -= 2.0f;
+            value = -0.5f * x * x;
+            if (ori < 0.0f) value = -value;
+        }
+    }
+    return value;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -271,293 +231,247 @@ void reg_getNMIValue(nifti_image *referenceImage,
                      double **jointHistogramLog,
                      double **jointhistogramPro,
                      double **entropyValues,
-                     int *referenceMask
-                     )
-{
-   // Create pointers to the image data arrays
-   DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
-   // Useful variable
-   size_t voxelNumber = (size_t)referenceImage->nx *
-         referenceImage->ny *
-         referenceImage->nz;
-   // Iterate over all active time points
-   for(int t=0; t<referenceImage->nt; ++t)
-   {
-      if(timePointWeight[t] > 0.0)
-      {
+                     int *referenceMask) {
+    // Create pointers to the image data arrays
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    // Useful variable
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    // Iterate over all active time points
+    for (int t = 0; t < referenceImage->nt; ++t) {
+        if (timePointWeight[t] > 0) {
 #ifndef NDEBUG
-         char text[255];
-         sprintf(text, "Computing NMI for time point %i",t);
-         reg_print_msg_debug(text);
+            char text[255];
+            sprintf(text, "Computing NMI for time point %i", t);
+            reg_print_msg_debug(text);
 #endif
-         // Define some pointers to the current histograms
-         double *jointHistoProPtr = jointhistogramPro[t];
-         double *jointHistoLogPtr = jointHistogramLog[t];
-         // Empty the joint histogram
-         memset(jointHistoProPtr,0,totalBinNumber[t]*sizeof(double));
-         // Fill the joint histograms using an approximation
-         DTYPE *refPtr = &refImagePtr[t*voxelNumber];
-         DTYPE *warPtr = &warImagePtr[t*voxelNumber];
-         for(size_t voxel=0; voxel<voxelNumber; ++voxel)
-         {
-            if(referenceMask[voxel]>-1)
-            {
-               DTYPE refValue=refPtr[voxel];
-               DTYPE warValue=warPtr[voxel];
-               if(refValue==refValue && warValue==warValue &&
-                     refValue>=0 && warValue>=0 &&
-                     refValue<referenceBinNumber[t] &&
-                     warValue<floatingBinNumber[t])
-               {
-                  ++jointHistoProPtr[static_cast<int>(refValue) +
-                        static_cast<int>(warValue) * referenceBinNumber[t]];
-               }
+            // Define some pointers to the current histograms
+            double *jointHistoProPtr = jointhistogramPro[t];
+            double *jointHistoLogPtr = jointHistogramLog[t];
+            // Empty the joint histogram
+            memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double));
+            // Fill the joint histograms using an approximation
+            DTYPE *refPtr = &refImagePtr[t * voxelNumber];
+            DTYPE *warPtr = &warImagePtr[t * voxelNumber];
+            for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
+                if (referenceMask[voxel] > -1) {
+                    DTYPE refValue = refPtr[voxel];
+                    DTYPE warValue = warPtr[voxel];
+                    if (refValue == refValue && warValue == warValue &&
+                        refValue >= 0 && warValue >= 0 &&
+                        refValue < referenceBinNumber[t] &&
+                        warValue < floatingBinNumber[t]) {
+                        ++jointHistoProPtr[static_cast<int>(refValue) + static_cast<int>(warValue) * referenceBinNumber[t]];
+                    }
+                }
             }
-         }
-         // Convolve the histogram with a cubic B-spline kernel
-         double kernel[3];
-         kernel[0]=kernel[2]=GetBasisSplineValue(-1.);
-         kernel[1]=GetBasisSplineValue(0.);
-         // Histogram is first smooth along the reference axis
-         memset(jointHistoLogPtr,0,totalBinNumber[t]*sizeof(double));
-         for(int f=0; f<floatingBinNumber[t]; ++f)
-         {
-            for(int r=0; r<referenceBinNumber[t]; ++r)
-            {
-               double value=0.0;
-               int index = r-1;
-               double *ptrHisto = &jointHistoProPtr[index+referenceBinNumber[t]*f];
+            // Convolve the histogram with a cubic B-spline kernel
+            double kernel[3];
+            kernel[0] = kernel[2] = GetBasisSplineValue(-1.);
+            kernel[1] = GetBasisSplineValue(0.);
+            // Histogram is first smooth along the reference axis
+            memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
+            for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                    double value = 0;
+                    int index = r - 1;
+                    double *ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f];
 
-               for(int it=0; it<3; it++)
-               {
-                  if(-1<index && index<referenceBinNumber[t])
-                  {
-                     value += *ptrHisto * kernel[it];
-                  }
-                  ++ptrHisto;
-                  ++index;
-               }
-               jointHistoLogPtr[r+referenceBinNumber[t]*f] = value;
+                    for (int it = 0; it < 3; it++) {
+                        if (-1 < index && index < referenceBinNumber[t]) {
+                            value += *ptrHisto * kernel[it];
+                        }
+                        ++ptrHisto;
+                        ++index;
+                    }
+                    jointHistoLogPtr[r + referenceBinNumber[t] * f] = value;
+                }
             }
-         }
-         // Histogram is then smooth along the warped floating axis
-         for(int r=0; r<referenceBinNumber[t]; ++r)
-         {
-            for(int f=0; f<floatingBinNumber[t]; ++f)
-            {
-               double value=0.;
-               int index = f-1;
-               double *ptrHisto = &jointHistoLogPtr[r+referenceBinNumber[t]*index];
+            // Histogram is then smooth along the warped floating axis
+            for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                    double value = 0.;
+                    int index = f - 1;
+                    double *ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index];
 
-               for(int it=0; it<3; it++)
-               {
-                  if(-1<index && index<floatingBinNumber[t])
-                  {
-                     value += *ptrHisto * kernel[it];
-                  }
-                  ptrHisto+=referenceBinNumber[t];
-                  ++index;
-               }
-               jointHistoProPtr[r+referenceBinNumber[t]*f] = value;
+                    for (int it = 0; it < 3; it++) {
+                        if (-1 < index && index < floatingBinNumber[t]) {
+                            value += *ptrHisto * kernel[it];
+                        }
+                        ptrHisto += referenceBinNumber[t];
+                        ++index;
+                    }
+                    jointHistoProPtr[r + referenceBinNumber[t] * f] = value;
+                }
             }
-         }
-         // Normalise the histogram
-         double activeVoxel=0.f;
-         for(int i=0; i<totalBinNumber[t]; ++i)
-            activeVoxel+=jointHistoProPtr[i];
-         entropyValues[t][3]=activeVoxel;
-         for(int i=0; i<totalBinNumber[t]; ++i)
-            jointHistoProPtr[i]/=activeVoxel;
-         // Marginalise over the reference axis
-         for(int r=0; r<referenceBinNumber[t]; ++r)
-         {
-            double sum=0.;
-            int index=r;
-            for(int f=0; f<floatingBinNumber[t]; ++f)
-            {
-               sum+=jointHistoProPtr[index];
-               index+=referenceBinNumber[t];
+            // Normalise the histogram
+            double activeVoxel = 0.f;
+            for (int i = 0; i < totalBinNumber[t]; ++i)
+                activeVoxel += jointHistoProPtr[i];
+            entropyValues[t][3] = activeVoxel;
+            for (int i = 0; i < totalBinNumber[t]; ++i)
+                jointHistoProPtr[i] /= activeVoxel;
+            // Marginalise over the reference axis
+            for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                double sum = 0.;
+                int index = r;
+                for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                    sum += jointHistoProPtr[index];
+                    index += referenceBinNumber[t];
+                }
+                jointHistoProPtr[referenceBinNumber[t] *
+                    floatingBinNumber[t] + r] = sum;
             }
-            jointHistoProPtr[referenceBinNumber[t]*
-                  floatingBinNumber[t]+r]=sum;
-         }
-         // Marginalise over the warped floating axis
-         for(int f=0; f<floatingBinNumber[t]; ++f)
-         {
-            double sum=0.;
-            int index=referenceBinNumber[t]*f;
-            for(int r=0; r<referenceBinNumber[t]; ++r)
-            {
-               sum+=jointHistoProPtr[index];
-               ++index;
+            // Marginalise over the warped floating axis
+            for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                double sum = 0.;
+                int index = referenceBinNumber[t] * f;
+                for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                    sum += jointHistoProPtr[index];
+                    ++index;
+                }
+                jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = sum;
             }
-            jointHistoProPtr[referenceBinNumber[t]*
-                  floatingBinNumber[t]+referenceBinNumber[t]+f]=sum;
-         }
-         // Set the log values to zero
-         memset(jointHistoLogPtr,0,totalBinNumber[t]*sizeof(double));
-         // Compute the entropy of the reference image
-         double referenceEntropy=0.;
-         for(int r=0; r<referenceBinNumber[t]; ++r)
-         {
-            double valPro=jointHistoProPtr[referenceBinNumber[t]*floatingBinNumber[t]+r];
-            if(valPro>0)
-            {
-               double valLog=log(valPro);
-               referenceEntropy -= valPro * valLog;
-               jointHistoLogPtr[referenceBinNumber[t]*floatingBinNumber[t]+r]=valLog;
+            // Set the log values to zero
+            memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
+            // Compute the entropy of the reference image
+            double referenceEntropy = 0.;
+            for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
+                if (valPro > 0) {
+                    double valLog = log(valPro);
+                    referenceEntropy -= valPro * valLog;
+                    jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog;
+                }
             }
-         }
-         entropyValues[t][0]=referenceEntropy;
-         // Compute the entropy of the warped floating image
-         double warpedEntropy=0.;
-         for(int f=0; f<floatingBinNumber[t]; ++f)
-         {
-            double valPro=jointHistoProPtr[referenceBinNumber[t]*floatingBinNumber[t]+
-                  referenceBinNumber[t]+f];
-            if(valPro>0)
-            {
-               double valLog=log(valPro);
-               warpedEntropy -= valPro * valLog;
-               jointHistoLogPtr[referenceBinNumber[t]*floatingBinNumber[t]+
-                     referenceBinNumber[t]+f]=valLog;
+            entropyValues[t][0] = referenceEntropy;
+            // Compute the entropy of the warped floating image
+            double warpedEntropy = 0.;
+            for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] +
+                    referenceBinNumber[t] + f];
+                if (valPro > 0) {
+                    double valLog = log(valPro);
+                    warpedEntropy -= valPro * valLog;
+                    jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog;
+                }
             }
-         }
-         entropyValues[t][1]=warpedEntropy;
-         // Compute the joint entropy
-         double jointEntropy=0.;
-         for(int i=0; i<referenceBinNumber[t]*floatingBinNumber[t]; ++i)
-         {
-            double valPro=jointHistoProPtr[i];
-            if(valPro>0)
-            {
-               double valLog=log(valPro);
-               jointEntropy -= valPro * valLog;
-               jointHistoLogPtr[i]=valLog;
+            entropyValues[t][1] = warpedEntropy;
+            // Compute the joint entropy
+            double jointEntropy = 0.;
+            for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) {
+                double valPro = jointHistoProPtr[i];
+                if (valPro > 0) {
+                    double valLog = log(valPro);
+                    jointEntropy -= valPro * valLog;
+                    jointHistoLogPtr[i] = valLog;
+                }
             }
-         }
-         entropyValues[t][2]=jointEntropy;
-      } // if active time point
-   } // iterate over all time point in the reference image
+            entropyValues[t][2] = jointEntropy;
+        } // if active time point
+    } // iterate over all time point in the reference image
 }
 /* *************************************************************** */
-template void reg_getNMIValue<float>(nifti_image *,nifti_image *,double *,unsigned short *,unsigned short *,unsigned short *,double **,double **,double **,int *);
-template void reg_getNMIValue<double>(nifti_image *,nifti_image *,double *,unsigned short *,unsigned short *,unsigned short *,double **,double **,double **,int *);
+template void reg_getNMIValue<float>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
+template void reg_getNMIValue<double>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
 /* *************************************************************** */
 /* *************************************************************** */
-double reg_nmi::GetSimilarityMeasureValue()
-{
-   // Check that all the specified image are of the same datatype
-   if(this->warpedFloatingImagePointer->datatype !=this->referenceImagePointer->datatype)
-   {
-      reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-      reg_print_msg_error("Both input images are exepected to have the same type");
-      reg_exit();
-   }
-   switch(this->referenceImagePointer->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_getNMIValue<float>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             this->referenceBinNumber,
-             this->floatingBinNumber,
-             this->totalBinNumber,
-             this->forwardJointHistogramLog,
-             this->forwardJointHistogramPro,
-             this->forwardEntropyValues,
-             this->referenceMaskPointer
-             );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_getNMIValue<double>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             this->referenceBinNumber,
-             this->floatingBinNumber,
-             this->totalBinNumber,
-             this->forwardJointHistogramLog,
-             this->forwardJointHistogramPro,
-             this->forwardEntropyValues,
-             this->referenceMaskPointer
-             );
-      break;
-   default:
-      reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
+double reg_nmi::GetSimilarityMeasureValue() {
+    // Check that all the specified image are of the same datatype
+    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+        reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
+        reg_print_msg_error("Both input images are expected to have the same type");
+        reg_exit();
+    }
+    switch (this->referenceImagePointer->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_getNMIValue<float>(this->referenceImagePointer,
+                               this->warpedFloatingImagePointer,
+                               this->timePointWeight,
+                               this->referenceBinNumber,
+                               this->floatingBinNumber,
+                               this->totalBinNumber,
+                               this->forwardJointHistogramLog,
+                               this->forwardJointHistogramPro,
+                               this->forwardEntropyValues,
+                               this->referenceMaskPointer);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_getNMIValue<double>(this->referenceImagePointer,
+                                this->warpedFloatingImagePointer,
+                                this->timePointWeight,
+                                this->referenceBinNumber,
+                                this->floatingBinNumber,
+                                this->totalBinNumber,
+                                this->forwardJointHistogramLog,
+                                this->forwardJointHistogramPro,
+                                this->forwardEntropyValues,
+                                this->referenceMaskPointer);
+        break;
+    default:
+        reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
 
-   if(this->isSymmetric)
-   {
-      // Check that all the specified image are of the same datatype
-      if(this->floatingImagePointer->datatype !=this->warpedReferenceImagePointer->datatype)
-      {
-         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-         reg_print_msg_error("Both input images are exepected to have the same type");
-         reg_exit();
-      }
-      switch(this->floatingImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getNMIValue<float>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                this->floatingBinNumber,
-                this->referenceBinNumber,
-                this->totalBinNumber,
-                this->backwardJointHistogramLog,
-                this->backwardJointHistogramPro,
-                this->backwardEntropyValues,
-                this->floatingMaskPointer
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getNMIValue<double>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                this->floatingBinNumber,
-                this->referenceBinNumber,
-                this->totalBinNumber,
-                this->backwardJointHistogramLog,
-                this->backwardJointHistogramPro,
-                this->backwardEntropyValues,
-                this->floatingMaskPointer
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
+    if (this->isSymmetric) {
+        // Check that all the specified image are of the same datatype
+        if (this->floatingImagePointer->datatype != this->warpedReferenceImagePointer->datatype) {
+            reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
+            reg_print_msg_error("Both input images are expected to have the same type");
+            reg_exit();
+        }
+        switch (this->floatingImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getNMIValue<float>(this->floatingImagePointer,
+                                   this->warpedReferenceImagePointer,
+                                   this->timePointWeight,
+                                   this->floatingBinNumber,
+                                   this->referenceBinNumber,
+                                   this->totalBinNumber,
+                                   this->backwardJointHistogramLog,
+                                   this->backwardJointHistogramPro,
+                                   this->backwardEntropyValues,
+                                   this->floatingMaskPointer);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getNMIValue<double>(this->floatingImagePointer,
+                                    this->warpedReferenceImagePointer,
+                                    this->timePointWeight,
+                                    this->floatingBinNumber,
+                                    this->referenceBinNumber,
+                                    this->totalBinNumber,
+                                    this->backwardJointHistogramLog,
+                                    this->backwardJointHistogramPro,
+                                    this->backwardEntropyValues,
+                                    this->floatingMaskPointer);
+            break;
+        default:
+            reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
+            reg_print_msg_error("Unsupported datatype");
+            reg_exit();
+        }
+    }
 
-   double nmi_value_forward=0.;
-   double nmi_value_backward=0.;
-   for(int t=0; t<this->referenceTimePoint; ++t)
-   {
-      if(this->timePointWeight[t]>0.0)
-      {
-        nmi_value_forward += timePointWeight[t] *
-           (this->forwardEntropyValues[t][0] +
-               this->forwardEntropyValues[t][1] ) /
-               this->forwardEntropyValues[t][2];
-         if(this->isSymmetric)
-          nmi_value_backward += timePointWeight[t] *
-             (this->backwardEntropyValues[t][0] +
-                  this->backwardEntropyValues[t][1] ) /
-                  this->backwardEntropyValues[t][2];
-      }
-   }
+    double nmi_value_forward = 0.;
+    double nmi_value_backward = 0.;
+    for (int t = 0; t < this->referenceTimePoint; ++t) {
+        if (this->timePointWeight[t] > 0) {
+            nmi_value_forward += timePointWeight[t] *
+                (this->forwardEntropyValues[t][0] +
+                 this->forwardEntropyValues[t][1]) /
+                this->forwardEntropyValues[t][2];
+            if (this->isSymmetric)
+                nmi_value_backward += timePointWeight[t] *
+                (this->backwardEntropyValues[t][0] +
+                 this->backwardEntropyValues[t][1]) /
+                this->backwardEntropyValues[t][2];
+        }
+    }
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called");
+    reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called");
 #endif
-   return nmi_value_forward+nmi_value_backward;
+    return nmi_value_forward + nmi_value_backward;
 }
 /* *************************************************************** */
 template <class DTYPE>
@@ -571,93 +485,85 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     nifti_image *measureGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
-                                    double timepoint_weight)
-{
-   if(current_timepoint<0 || current_timepoint>=referenceImage->nt){
-      reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
-      reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-      reg_exit();
-   }
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+                                    double timepoint_weight) {
+    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+        reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
+        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
+        reg_exit();
+    }
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 
-   // Pointers to the image data
-   DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *refPtr = &refImagePtr[current_timepoint*voxelNumber];
-   DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber];
+    // Pointers to the image data
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-   // Pointers to the spatial gradient of the warped image
-   DTYPE *warGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
-   DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
+    // Pointers to the spatial gradient of the warped image
+    DTYPE *warGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
+    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
 
-   // Pointers to the measure of similarity gradient
-   DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
-   DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    // Pointers to the measure of similarity gradient
+    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
+    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
 
-   // Create pointers to the current joint histogram
-   double *logHistoPtr = jointHistogramLog[current_timepoint];
-   double *entropyPtr = entropyValues[current_timepoint];
-   double nmi = (entropyPtr[0]+entropyPtr[1])/entropyPtr[2];
-   size_t referenceOffset=referenceBinNumber[current_timepoint]*floatingBinNumber[current_timepoint];
-   size_t floatingOffset=referenceOffset+referenceBinNumber[current_timepoint];
-   // Iterate over all voxel
-   for(size_t i=0; i<voxelNumber; ++i)
-   {
-      // Check if the voxel belongs to the image mask
-      if(referenceMask[i]>-1)
-      {
-         DTYPE refValue = refPtr[i];
-         DTYPE warValue = warPtr[i];
-         if(refValue==refValue && warValue==warValue)
-         {
-            DTYPE gradX = warGradPtrX[i];
-            DTYPE gradY = warGradPtrY[i];
+    // Create pointers to the current joint histogram
+    double *logHistoPtr = jointHistogramLog[current_timepoint];
+    double *entropyPtr = entropyValues[current_timepoint];
+    double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
+    size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
+    size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    // Iterate over all voxel
+    for (size_t i = 0; i < voxelNumber; ++i) {
+        // Check if the voxel belongs to the image mask
+        if (referenceMask[i] > -1) {
+            DTYPE refValue = refPtr[i];
+            DTYPE warValue = warPtr[i];
+            if (refValue == refValue && warValue == warValue) {
+                DTYPE gradX = warGradPtrX[i];
+                DTYPE gradY = warGradPtrY[i];
 
-            double jointDeriv[2]= {0.};
-            double refDeriv[2]= {0.};
-            double warDeriv[2]= {0.};
+                double jointDeriv[2] = {0};
+                double refDeriv[2] = {0};
+                double warDeriv[2] = {0};
 
-            for(int r=(int)(refValue-1.0); r<(int)(refValue+3.0); ++r)
-            {
-               if(-1<r && r<referenceBinNumber[current_timepoint])
-               {
-                  for(int w=(int)(warValue-1.0); w<(int)(warValue+3.0); ++w)
-                  {
-                     if(-1<w && w<floatingBinNumber[current_timepoint])
-                     {
-                        double commun =
-                              GetBasisSplineValue((double)refValue - (double)r) *
-                              GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                        double jointLog = logHistoPtr[r+w*referenceBinNumber[current_timepoint]];
-                        double refLog = logHistoPtr[r+referenceOffset];
-                        double warLog = logHistoPtr[w+floatingOffset];
-                        if(gradX==gradX){
-                           jointDeriv[0] += commun * gradX * jointLog;
-                           refDeriv[0] += commun * gradX * refLog;
-                           warDeriv[0] += commun * gradX * warLog;
+                for (int r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
+                    if (-1 < r && r < referenceBinNumber[current_timepoint]) {
+                        for (int w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
+                            if (-1 < w && w < floatingBinNumber[current_timepoint]) {
+                                double commun =
+                                    GetBasisSplineValue((double)refValue - (double)r) *
+                                    GetBasisSplineDerivativeValue((double)warValue - (double)w);
+                                double jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]];
+                                double refLog = logHistoPtr[r + referenceOffset];
+                                double warLog = logHistoPtr[w + floatingOffset];
+                                if (gradX == gradX) {
+                                    jointDeriv[0] += commun * gradX * jointLog;
+                                    refDeriv[0] += commun * gradX * refLog;
+                                    warDeriv[0] += commun * gradX * warLog;
+                                }
+                                if (gradY == gradY) {
+                                    jointDeriv[1] += commun * gradY * jointLog;
+                                    refDeriv[1] += commun * gradY * refLog;
+                                    warDeriv[1] += commun * gradY * warLog;
+                                }
+                            }
                         }
-                        if(gradY==gradY){
-                           jointDeriv[1] += commun * gradY * jointLog;
-                           refDeriv[1] += commun * gradY * refLog;
-                           warDeriv[1] += commun * gradY * warLog;
-                        }
-                     }
-                  }
-               }
-            }
-         measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
-                  nmi * jointDeriv[0]) / (entropyPtr[2]*entropyPtr[3]));
-         measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
-                  nmi * jointDeriv[1]) / (entropyPtr[2]*entropyPtr[3]));
-         }// Check that the values are defined
-      } // mask
-   } // loop over all voxel
+                    }
+                }
+                measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                                                                  nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                                                                  nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+            }// Check that the values are defined
+        } // mask
+    } // loop over all voxel
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient2D<float>
-(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double);
+(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
 template void reg_getVoxelBasedNMIGradient2D<double>
-(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double);
+(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
 /* *************************************************************** */
 template <class DTYPE>
 void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
@@ -670,304 +576,280 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     nifti_image *measureGradientImage,
                                     int *referenceMask,
                                     int current_timepoint,
-                           double timepoint_weight
-                                    )
-{
-   if(current_timepoint<0 || current_timepoint>=referenceImage->nt){
-      reg_print_fct_error("reg_getVoxelBasedNMIGradient3D");
-      reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-      reg_exit();
-   }
-   //
+                                    double timepoint_weight) {
+    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+        reg_print_fct_error("reg_getVoxelBasedNMIGradient3D");
+        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
+        reg_exit();
+    }
+
 #ifdef WIN32
-   long i;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long i;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t i;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t i;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
-   // Pointers to the image data
-   DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *refPtr = &refImagePtr[current_timepoint*voxelNumber];
-   DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber];
+    // Pointers to the image data
+    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-   // Pointers to the spatial gradient of the warped image
-   DTYPE *warGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
-   DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
-   DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber];
+    // Pointers to the spatial gradient of the warped image
+    DTYPE *warGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
+    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
+    DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber];
 
-   // Pointers to the measure of similarity gradient
-   DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
-   DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    // Pointers to the measure of similarity gradient
+    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
+    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
-   // Create pointers to the current joint histogram
-   double *logHistoPtr = jointHistogramLog[current_timepoint];
-   double *entropyPtr = entropyValues[current_timepoint];
-   double nmi = (entropyPtr[0]+entropyPtr[1])/entropyPtr[2];
-   size_t referenceOffset=referenceBinNumber[current_timepoint]*floatingBinNumber[current_timepoint];
-   size_t floatingOffset=referenceOffset+referenceBinNumber[current_timepoint];
-   int r,w;
-   DTYPE refValue,warValue,gradX,gradY,gradZ;
-   double jointDeriv[3],refDeriv[3],warDeriv[3],commun,jointLog,refLog,warLog;
-   // Iterate over all voxel
+    // Create pointers to the current joint histogram
+    double *logHistoPtr = jointHistogramLog[current_timepoint];
+    double *entropyPtr = entropyValues[current_timepoint];
+    double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
+    size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
+    size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    int r, w;
+    DTYPE refValue, warValue, gradX, gradY, gradZ;
+    double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog;
+    // Iterate over all voxel
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \
-   jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
-   shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
-   logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
-   warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight)
+    private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \
+    jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
+    shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
+    logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
+    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight)
 #endif // _OPENMP
-   for(i=0; i<voxelNumber; ++i)
-   {
-      // Check if the voxel belongs to the image mask
-      if(referenceMask[i]>-1)
-      {
-         refValue = refPtr[i];
-         warValue = warPtr[i];
-         if(refValue==refValue && warValue==warValue)
-         {
-            gradX = warGradPtrX[i];
-            gradY = warGradPtrY[i];
-            gradZ = warGradPtrZ[i];
+    for (i = 0; i < voxelNumber; ++i) {
+        // Check if the voxel belongs to the image mask
+        if (referenceMask[i] > -1) {
+            refValue = refPtr[i];
+            warValue = warPtr[i];
+            if (refValue == refValue && warValue == warValue) {
+                gradX = warGradPtrX[i];
+                gradY = warGradPtrY[i];
+                gradZ = warGradPtrZ[i];
 
-            jointDeriv[0]=jointDeriv[1]=jointDeriv[2]=0.f;
-            refDeriv[0]=refDeriv[1]=refDeriv[2]=0.f;
-            warDeriv[0]=warDeriv[1]=warDeriv[2]=0.f;
+                jointDeriv[0] = jointDeriv[1] = jointDeriv[2] = 0.f;
+                refDeriv[0] = refDeriv[1] = refDeriv[2] = 0.f;
+                warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f;
 
-            for(r=(int)(refValue-1.0); r<(int)(refValue+3.0); ++r)
-            {
-               if(-1<r && r<referenceBinNumber[current_timepoint])
-               {
-                  for(w=(int)(warValue-1.0); w<(int)(warValue+3.0); ++w)
-                  {
-                     if(-1<w && w<floatingBinNumber[current_timepoint])
-                     {
-                        commun= GetBasisSplineValue((double)refValue - (double)r) *
-                              GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                        jointLog = logHistoPtr[r+w*referenceBinNumber[current_timepoint]];
-                        refLog = logHistoPtr[r+referenceOffset];
-                        warLog = logHistoPtr[w+floatingOffset];
-                        if(gradX==gradX){
-                           refDeriv[0] += commun * gradX * refLog;
-                           warDeriv[0] += commun * gradX * warLog;
-                           jointDeriv[0] += commun * gradX * jointLog;
-                        }
-                        if(gradY==gradY){
-                           refDeriv[1] += commun * gradY * refLog;
-                           warDeriv[1] += commun * gradY * warLog;
-                           jointDeriv[1] += commun * gradY * jointLog;
+                for (r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
+                    if (-1 < r && r < referenceBinNumber[current_timepoint]) {
+                        for (w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
+                            if (-1 < w && w < floatingBinNumber[current_timepoint]) {
+                                commun = GetBasisSplineValue((double)refValue - (double)r) *
+                                    GetBasisSplineDerivativeValue((double)warValue - (double)w);
+                                jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]];
+                                refLog = logHistoPtr[r + referenceOffset];
+                                warLog = logHistoPtr[w + floatingOffset];
+                                if (gradX == gradX) {
+                                    refDeriv[0] += commun * gradX * refLog;
+                                    warDeriv[0] += commun * gradX * warLog;
+                                    jointDeriv[0] += commun * gradX * jointLog;
+                                }
+                                if (gradY == gradY) {
+                                    refDeriv[1] += commun * gradY * refLog;
+                                    warDeriv[1] += commun * gradY * warLog;
+                                    jointDeriv[1] += commun * gradY * jointLog;
+                                }
+                                if (gradZ == gradZ) {
+                                    refDeriv[2] += commun * gradZ * refLog;
+                                    warDeriv[2] += commun * gradZ * warLog;
+                                    jointDeriv[2] += commun * gradZ * jointLog;
+                                }
+                            }
                         }
-                        if(gradZ==gradZ){
-                           refDeriv[2] += commun * gradZ * refLog;
-                           warDeriv[2] += commun * gradZ * warLog;
-                           jointDeriv[2] += commun * gradZ * jointLog;
-                        }
-                     }
-                  }
-               }
-            }
-         measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
-                  nmi * jointDeriv[0]) / (entropyPtr[2]*entropyPtr[3]));
-         measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
-                  nmi * jointDeriv[1]) / (entropyPtr[2]*entropyPtr[3]));
-         measureGradPtrZ[i] += (DTYPE)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
-                  nmi * jointDeriv[2]) / (entropyPtr[2]*entropyPtr[3]));
-         }// Check that the values are defined
-      } // mask
-   } // loop over all voxel
+                    }
+                }
+                measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                                                                  nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                                                                  nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrZ[i] += (DTYPE)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
+                                                                  nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
+            }// Check that the values are defined
+        } // mask
+    } // loop over all voxel
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient3D<float>
-(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double);
+(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
 template void reg_getVoxelBasedNMIGradient3D<double>
-(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double);
+(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
 /* *************************************************************** */
-void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
-      return;
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+    if (this->timePointWeight[current_timepoint] == 0)
+        return;
 
-   // Check if all required input images are of the same data type
-   int dtype = this->referenceImagePointer->datatype;
-   if(this->warpedFloatingImagePointer->datatype != dtype ||
-         this->warpedFloatingGradientImagePointer->datatype != dtype ||
-         this->forwardVoxelBasedGradientImagePointer->datatype != dtype
-         )
-   {
-      reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-      reg_print_msg_error("Input images are exepected to be of the same type");
-      reg_exit();
-   }
+    // Check if all required input images are of the same data type
+    int dtype = this->referenceImagePointer->datatype;
+    if (this->warpedFloatingImagePointer->datatype != dtype ||
+        this->warpedFloatingGradientImagePointer->datatype != dtype ||
+        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
 
-   // Call compute similarity measure to calculate joint histogram
-   this->GetSimilarityMeasureValue();
+    // Call compute similarity measure to calculate joint histogram
+    this->GetSimilarityMeasureValue();
 
-   // Compute the gradient of the nmi for the forward transformation
-   if(this->referenceImagePointer->nz>1)  // 3D input images
-   {
-      switch(dtype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedNMIGradient3D<float>(this->referenceImagePointer,
-                                               this->warpedFloatingImagePointer,
-                                               this->referenceBinNumber,
-                                               this->floatingBinNumber,
-                                               this->forwardJointHistogramLog,
-                                               this->forwardEntropyValues,
-                                               this->warpedFloatingGradientImagePointer,
-                                               this->forwardVoxelBasedGradientImagePointer,
-                                               this->referenceMaskPointer,
-                                               current_timepoint,
-                                               this->timePointWeight[current_timepoint]);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedNMIGradient3D<double>(this->referenceImagePointer,
-                                                this->warpedFloatingImagePointer,
-                                                this->referenceBinNumber,
-                                                this->floatingBinNumber,
-                                                this->forwardJointHistogramLog,
-                                                this->forwardEntropyValues,
-                                                this->warpedFloatingGradientImagePointer,
-                                                this->forwardVoxelBasedGradientImagePointer,
-                                                this->referenceMaskPointer,
-                                                current_timepoint,
-                                                this->timePointWeight[current_timepoint]);
-         break;
-      default:
-         reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
-   else  // 2D input images
-   {
-      switch(dtype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedNMIGradient2D<float>(this->referenceImagePointer,
-                                               this->warpedFloatingImagePointer,
-                                               this->referenceBinNumber,
-                                               this->floatingBinNumber,
-                                               this->forwardJointHistogramLog,
-                                               this->forwardEntropyValues,
-                                               this->warpedFloatingGradientImagePointer,
-                                               this->forwardVoxelBasedGradientImagePointer,
-                                               this->referenceMaskPointer,
-                                               current_timepoint,
-                                               this->timePointWeight[current_timepoint]);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedNMIGradient2D<double>(this->referenceImagePointer,
-                                                this->warpedFloatingImagePointer,
-                                                this->referenceBinNumber,
-                                                this->floatingBinNumber,
-                                                this->forwardJointHistogramLog,
-                                                this->forwardEntropyValues,
-                                                this->warpedFloatingGradientImagePointer,
-                                                this->forwardVoxelBasedGradientImagePointer,
-                                                this->referenceMaskPointer,
-                                                current_timepoint,
-                                                this->timePointWeight[current_timepoint]);
-         break;
-      default:
-         reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
-
-   if(this->isSymmetric)
-   {
-      dtype = this->floatingImagePointer->datatype;
-      if(this->warpedReferenceImagePointer->datatype != dtype ||
-            this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype)
-      {
-         reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-         reg_print_msg_error("Input images are exepected to be of the same type");
-         reg_exit();
-      }
-      // Compute the gradient of the nmi for the backward transformation
-      if(this->floatingImagePointer->nz>1)  // 3D input images
-      {
-         switch(dtype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient3D<float>(this->floatingImagePointer,
-                                                  this->warpedReferenceImagePointer,
-                                                  this->floatingBinNumber,
+    // Compute the gradient of the nmi for the forward transformation
+    if (this->referenceImagePointer->nz > 1) {  // 3D input images
+        switch (dtype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedNMIGradient3D<float>(this->referenceImagePointer,
+                                                  this->warpedFloatingImagePointer,
                                                   this->referenceBinNumber,
-                                                  this->backwardJointHistogramLog,
-                                                  this->backwardEntropyValues,
-                                                  this->warpedReferenceGradientImagePointer,
-                                                  this->backwardVoxelBasedGradientImagePointer,
-                                                  this->floatingMaskPointer,
+                                                  this->floatingBinNumber,
+                                                  this->forwardJointHistogramLog,
+                                                  this->forwardEntropyValues,
+                                                  this->warpedFloatingGradientImagePointer,
+                                                  this->forwardVoxelBasedGradientImagePointer,
+                                                  this->referenceMaskPointer,
                                                   current_timepoint,
                                                   this->timePointWeight[current_timepoint]);
             break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient3D<double>(this->floatingImagePointer,
-                                                   this->warpedReferenceImagePointer,
-                                                   this->floatingBinNumber,
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedNMIGradient3D<double>(this->referenceImagePointer,
+                                                   this->warpedFloatingImagePointer,
                                                    this->referenceBinNumber,
-                                                   this->backwardJointHistogramLog,
-                                                   this->backwardEntropyValues,
-                                                   this->warpedReferenceGradientImagePointer,
-                                                   this->backwardVoxelBasedGradientImagePointer,
-                                                   this->floatingMaskPointer,
+                                                   this->floatingBinNumber,
+                                                   this->forwardJointHistogramLog,
+                                                   this->forwardEntropyValues,
+                                                   this->warpedFloatingGradientImagePointer,
+                                                   this->forwardVoxelBasedGradientImagePointer,
+                                                   this->referenceMaskPointer,
                                                    current_timepoint,
                                                    this->timePointWeight[current_timepoint]);
             break;
-         default:
+        default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
             reg_print_msg_error("Unsupported datatype");
             reg_exit();
-         }
-      }
-      else  // 2D input images
-      {
-         switch(dtype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient2D<float>(this->floatingImagePointer,
-                                                  this->warpedReferenceImagePointer,
-                                                  this->floatingBinNumber,
+        }
+    } else { // 2D input images
+        switch (dtype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedNMIGradient2D<float>(this->referenceImagePointer,
+                                                  this->warpedFloatingImagePointer,
                                                   this->referenceBinNumber,
-                                                  this->backwardJointHistogramLog,
-                                                  this->backwardEntropyValues,
-                                                  this->warpedReferenceGradientImagePointer,
-                                                  this->backwardVoxelBasedGradientImagePointer,
-                                                  this->floatingMaskPointer,
+                                                  this->floatingBinNumber,
+                                                  this->forwardJointHistogramLog,
+                                                  this->forwardEntropyValues,
+                                                  this->warpedFloatingGradientImagePointer,
+                                                  this->forwardVoxelBasedGradientImagePointer,
+                                                  this->referenceMaskPointer,
                                                   current_timepoint,
                                                   this->timePointWeight[current_timepoint]);
             break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient2D<double>(this->floatingImagePointer,
-                                                   this->warpedReferenceImagePointer,
-                                                   this->floatingBinNumber,
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedNMIGradient2D<double>(this->referenceImagePointer,
+                                                   this->warpedFloatingImagePointer,
                                                    this->referenceBinNumber,
-                                                   this->backwardJointHistogramLog,
-                                                   this->backwardEntropyValues,
-                                                   this->warpedReferenceGradientImagePointer,
-                                                   this->backwardVoxelBasedGradientImagePointer,
-                                                   this->floatingMaskPointer,
+                                                   this->floatingBinNumber,
+                                                   this->forwardJointHistogramLog,
+                                                   this->forwardEntropyValues,
+                                                   this->warpedFloatingGradientImagePointer,
+                                                   this->forwardVoxelBasedGradientImagePointer,
+                                                   this->referenceMaskPointer,
                                                    current_timepoint,
                                                    this->timePointWeight[current_timepoint]);
             break;
-         default:
+        default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
             reg_print_msg_error("Unsupported datatype");
             reg_exit();
-         }
-      }
-   }
+        }
+    }
+
+    if (this->isSymmetric) {
+        dtype = this->floatingImagePointer->datatype;
+        if (this->warpedReferenceImagePointer->datatype != dtype ||
+            this->warpedReferenceGradientImagePointer->datatype != dtype ||
+            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+            reg_print_msg_error("Input images are expected to be of the same type");
+            reg_exit();
+        }
+        // Compute the gradient of the nmi for the backward transformation
+        if (this->floatingImagePointer->nz > 1) {  // 3D input images
+            switch (dtype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_getVoxelBasedNMIGradient3D<float>(this->floatingImagePointer,
+                                                      this->warpedReferenceImagePointer,
+                                                      this->floatingBinNumber,
+                                                      this->referenceBinNumber,
+                                                      this->backwardJointHistogramLog,
+                                                      this->backwardEntropyValues,
+                                                      this->warpedReferenceGradientImagePointer,
+                                                      this->backwardVoxelBasedGradientImagePointer,
+                                                      this->floatingMaskPointer,
+                                                      current_timepoint,
+                                                      this->timePointWeight[current_timepoint]);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_getVoxelBasedNMIGradient3D<double>(this->floatingImagePointer,
+                                                       this->warpedReferenceImagePointer,
+                                                       this->floatingBinNumber,
+                                                       this->referenceBinNumber,
+                                                       this->backwardJointHistogramLog,
+                                                       this->backwardEntropyValues,
+                                                       this->warpedReferenceGradientImagePointer,
+                                                       this->backwardVoxelBasedGradientImagePointer,
+                                                       this->floatingMaskPointer,
+                                                       current_timepoint,
+                                                       this->timePointWeight[current_timepoint]);
+                break;
+            default:
+                reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+                reg_print_msg_error("Unsupported datatype");
+                reg_exit();
+            }
+        } else { // 2D input images
+            switch (dtype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_getVoxelBasedNMIGradient2D<float>(this->floatingImagePointer,
+                                                      this->warpedReferenceImagePointer,
+                                                      this->floatingBinNumber,
+                                                      this->referenceBinNumber,
+                                                      this->backwardJointHistogramLog,
+                                                      this->backwardEntropyValues,
+                                                      this->warpedReferenceGradientImagePointer,
+                                                      this->backwardVoxelBasedGradientImagePointer,
+                                                      this->floatingMaskPointer,
+                                                      current_timepoint,
+                                                      this->timePointWeight[current_timepoint]);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_getVoxelBasedNMIGradient2D<double>(this->floatingImagePointer,
+                                                       this->warpedReferenceImagePointer,
+                                                       this->floatingBinNumber,
+                                                       this->referenceBinNumber,
+                                                       this->backwardJointHistogramLog,
+                                                       this->backwardEntropyValues,
+                                                       this->warpedReferenceGradientImagePointer,
+                                                       this->backwardVoxelBasedGradientImagePointer,
+                                                       this->floatingMaskPointer,
+                                                       current_timepoint,
+                                                       this->timePointWeight[current_timepoint]);
+                break;
+            default:
+                reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+                reg_print_msg_error("Unsupported datatype");
+                reg_exit();
+            }
+        }
+    }
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called");
+    reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called");
 #endif
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index c3177443..e58b58e7 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -1,5 +1,5 @@
 /*
- *  _reg_mutualinformation.h
+ *  _reg_nmi.h
  *
  *
  *  Created by Marc Modat on 25/03/2009.
@@ -20,9 +20,8 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-/// @brief NMI measure of similarity classe
-class reg_nmi : public reg_measure
-{
+/// @brief NMI measure of similarity class
+class reg_nmi: public reg_measure {
 public:
     /// @brief reg_nmi class constructor
     reg_nmi();
@@ -67,17 +66,17 @@ class reg_nmi : public reg_measure
     }
 
 protected:
-   unsigned short referenceBinNumber[255];
-   unsigned short floatingBinNumber[255];
-   unsigned short totalBinNumber[255];
-   double **forwardJointHistogramPro;
-   double **forwardJointHistogramLog;
-   double **forwardEntropyValues;
-   double **backwardJointHistogramPro;
-   double **backwardJointHistogramLog;
-   double **backwardEntropyValues;
-
-   void DeallocateHistogram();
+    unsigned short referenceBinNumber[255];
+    unsigned short floatingBinNumber[255];
+    unsigned short totalBinNumber[255];
+    double **forwardJointHistogramPro;
+    double **forwardJointHistogramLog;
+    double **forwardEntropyValues;
+    double **backwardJointHistogramPro;
+    double **backwardJointHistogramLog;
+    double **backwardEntropyValues;
+
+    void DeallocateHistogram();
 };
 /* *************************************************************** */
 /* *************************************************************** */
@@ -92,7 +91,7 @@ void reg_getNMIValue(nifti_image *referenceImage,
                      double **jointhistogramPro,
                      double **entropyValues,
                      int *referenceMask
-                    );
+);
 /* *************************************************************** */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
@@ -106,7 +105,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     int *referenceMask,
                                     int current_timepoint,
                                     double timepoint_weight
-                                   );
+);
 /* *************************************************************** */
 extern "C++" template <class DTYPE>
 void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
@@ -120,38 +119,34 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     int *referenceMask,
                                     int current_timepoint,
                                     double timepoint_weight
-                                   );
+);
 /* *************************************************************** */
 /* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
 template<class DataTYPE>
-class SafeArray
-{
+class SafeArray {
 public:
-   /// Constructor
-   SafeArray(int items)
-   {
-      data = new DataTYPE[items];
-   }
-
-   /// Destructor
-   ~SafeArray()
-   {
-      delete[] data;
-   }
-
-   /// Implicit conversion
-   operator DataTYPE *()
-   {
-      return data;
-   }
+    /// Constructor
+    SafeArray(int items) {
+        data = new DataTYPE[items];
+    }
+
+    /// Destructor
+    ~SafeArray() {
+        delete[] data;
+    }
+
+    /// Implicit conversion
+    operator DataTYPE *() {
+        return data;
+    }
 
 private:
-   void operator=(const SafeArray &) {};
-   SafeArray(const SafeArray &) {};
+    void operator=(const SafeArray &) {};
+    SafeArray(const SafeArray &) {};
 
-   DataTYPE *data;
+    DataTYPE *data;
 };
 
 //-----------------------------------------------------------------------------
@@ -161,105 +156,92 @@ class SafeArray
 // 'end' values are like the STL ranges, where they signify one past the last value.
 //-----------------------------------------------------------------------------
 template<typename T>
-class Multi_Loop
-{
+class Multi_Loop {
 public:
-   /// Add a for loop to the list
-   void Add(T begin_value, T end_value)
-   {
-      begin.push_back(begin_value);
-      end.push_back(end_value);
-   }
-
-   // Initialises the loops before use.
-   void Initialise()
-   {
-      current.resize(Count());
-      std::copy(begin.begin(), begin.end(), current.begin());
-   }
-
-   /// Gets the index or iterator for the specified loop.
-   T Index(int index) const
-   {
-      return (current[index]);
-   }
-
-   /// Gets the index or iterator for the specified loop.
-   const T &operator [](int index) const
-   {
-      return (current[index]);
-   }
-
-   /// Tests to see if the loops continue.
-   bool Continue() const
-   {
-      return (current[0] != end[0]);
-   }
-
-   /// Compute the next set of indexes or iterators in the sequence.
-   void Next()
-   {
-      int position = begin.size() - 1;
-      bool finished = false;
-
-      while (!finished)
-      {
-         ++current[position];
-         // Finished incrementing?
-         if ((current[position] != end[position]) || (position == 0))
-         {
-            finished = true;
-         }
-         else
-         {
-            // Reset this index, and move on to the previous one.
-            current[position] = begin[position];
-            --position;
-         }
-      }
-   }
-
-   /// Returns the number of 'for' loops added.
-   int Count() const
-   {
-      return (static_cast<int>(begin.size()));
-   }
+    /// Add a for loop to the list
+    void Add(T begin_value, T end_value) {
+        begin.push_back(begin_value);
+        end.push_back(end_value);
+    }
+
+    // Initialises the loops before use.
+    void Initialise() {
+        current.resize(Count());
+        std::copy(begin.begin(), begin.end(), current.begin());
+    }
+
+    /// Gets the index or iterator for the specified loop.
+    T Index(int index) const {
+        return (current[index]);
+    }
+
+    /// Gets the index or iterator for the specified loop.
+    const T &operator [](int index) const {
+        return (current[index]);
+    }
+
+    /// Tests to see if the loops continue.
+    bool Continue() const {
+        return (current[0] != end[0]);
+    }
+
+    /// Compute the next set of indexes or iterators in the sequence.
+    void Next() {
+        int position = begin.size() - 1;
+        bool finished = false;
+
+        while (!finished) {
+            ++current[position];
+            // Finished incrementing?
+            if ((current[position] != end[position]) || (position == 0)) {
+                finished = true;
+            } else {
+                // Reset this index, and move on to the previous one.
+                current[position] = begin[position];
+                --position;
+            }
+        }
+    }
+
+    /// Returns the number of 'for' loops added.
+    int Count() const {
+        return (static_cast<int>(begin.size()));
+    }
 
 private:
-   std::vector<T> begin;   // Start for each loop.
-   std::vector<T> end;     // End for each loop.
-   std::vector<T> current; // Current position of each loop
+    std::vector<T> begin;   // Start for each loop.
+    std::vector<T> end;     // End for each loop.
+    std::vector<T> current; // Current position of each loop
 };
 
 /// Some methods that will be needed for generating the multi-channel histogram
 /// Needed for multi channel NMI
-inline int calculate_product(int dim, int *dimensions)
-{
-   int product = 1;
-   for(int i = 0; i < dim; ++i) product *= dimensions[i];
+inline int calculate_product(int dim, int *dimensions) {
+    int product = 1;
+    for (int i = 0; i < dim; ++i)
+        product *= dimensions[i];
 
-   return product;
+    return product;
 }
 
-inline int calculate_index(int num_dims, int *dimensions, int *indices)
-{
-   int index = 0;
-   for(int i = 0; i < num_dims; ++i) index += indices[i] * calculate_product(i, dimensions);
+inline int calculate_index(int num_dims, int *dimensions, int *indices) {
+    int index = 0;
+    for (int i = 0; i < num_dims; ++i)
+        index += indices[i] * calculate_product(i, dimensions);
 
-   return index;
+    return index;
 }
 
-inline int previous(int current, int num_dims)
-{
-   if(current > 0) return current - 1;
+inline int previous(int current, int num_dims) {
+    if (current > 0)
+        return current - 1;
 
-   return num_dims - 1;
+    return num_dims - 1;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-/// @brief NMI measure of similarity classe
-class reg_multichannel_nmi : public reg_measure
-{
+/// @brief NMI measure of similarity class
+class reg_multichannel_nmi: public reg_measure {
 public:
     /// @brief reg_nmi class constructor
     reg_multichannel_nmi() {}
@@ -278,15 +260,15 @@ class reg_multichannel_nmi : public reg_measure
     }
 
 protected:
-   unsigned short referenceBinNumber[255];
-   unsigned short floatingBinNumber[255];
-   unsigned short totalBinNumber[255];
-   double *forwardJointHistogramProp;
-   double *forwardJointHistogramLog;
-   double *forwardEntropyValues;
-   double *backwardJointHistogramProp;
-   double *backwardJointHistogramLog;
-   double *backwardEntropyValues;
+    unsigned short referenceBinNumber[255];
+    unsigned short floatingBinNumber[255];
+    unsigned short totalBinNumber[255];
+    double *forwardJointHistogramProp;
+    double *forwardJointHistogramLog;
+    double *forwardEntropyValues;
+    double *backwardJointHistogramProp;
+    double *backwardJointHistogramLog;
+    double *backwardEntropyValues;
 };
 /* *************************************************************** */
 /// Multi channel NMI version - Entropy
@@ -304,26 +286,26 @@ void reg_getMultiChannelNMIValue(nifti_image *referenceImages,
 /// Multi channel NMI version - Gradient
 extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages,
-      nifti_image *warpedImages,
-      nifti_image *warpedImageGradient,
-      unsigned int *reference_bins,
-      unsigned int *warped_bins,
-      double *logJointHistogram,
-      double *entropies,
-      nifti_image *nmiGradientImage,
-      int *mask,
-      bool approx);
+                                                nifti_image *warpedImages,
+                                                nifti_image *warpedImageGradient,
+                                                unsigned int *reference_bins,
+                                                unsigned int *warped_bins,
+                                                double *logJointHistogram,
+                                                double *entropies,
+                                                nifti_image *nmiGradientImage,
+                                                int *mask,
+                                                bool approx);
 /// Multi channel NMI version - Gradient
 extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages,
-      nifti_image *warpedImages,
-      nifti_image *warpedImageGradient,
-      unsigned int *reference_bins,
-      unsigned int *warped_bins,
-      double *logJointHistogram,
-      double *entropies,
-      nifti_image *nmiGradientImage,
-      int *mask,
-      bool approx);
+                                                nifti_image *warpedImages,
+                                                nifti_image *warpedImageGradient,
+                                                unsigned int *reference_bins,
+                                                unsigned int *warped_bins,
+                                                double *logJointHistogram,
+                                                double *entropies,
+                                                nifti_image *nmiGradientImage,
+                                                int *mask,
+                                                bool approx);
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 90cd64c9..0788efb6 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -5,47 +5,45 @@
 
 #include "_reg_optimiser.h"
 
-/* *************************************************************** */
-/* *************************************************************** */
+ /* *************************************************************** */
+ /* *************************************************************** */
 template <class T>
-reg_optimiser<T>::reg_optimiser()
-{
-   this->dofNumber=0;
-   this->dofNumber_b=0;
-   this->ndim=3;
-   this->optimiseX=true;
-   this->optimiseY=true;
-   this->optimiseZ=true;
-   this->currentDOF=nullptr;
-   this->currentDOF_b=nullptr;
-   this->bestDOF=nullptr;
-   this->bestDOF_b=nullptr;
-   this->backward=false;
-   this->gradient=nullptr;
-   this->currentIterationNumber=0;
-   this->currentObjFunctionValue=0.0;
-   this->maxIterationNumber=0.0;
-   this->bestObjFunctionValue=0.0;
-   this->objFunc=nullptr;
-   this->gradient_b=nullptr;
+reg_optimiser<T>::reg_optimiser() {
+    this->dofNumber = 0;
+    this->dofNumber_b = 0;
+    this->ndim = 3;
+    this->optimiseX = true;
+    this->optimiseY = true;
+    this->optimiseZ = true;
+    this->currentDOF = nullptr;
+    this->currentDOF_b = nullptr;
+    this->bestDOF = nullptr;
+    this->bestDOF_b = nullptr;
+    this->backward = false;
+    this->gradient = nullptr;
+    this->currentIterationNumber = 0;
+    this->currentObjFunctionValue = 0;
+    this->maxIterationNumber = 0;
+    this->bestObjFunctionValue = 0;
+    this->objFunc = nullptr;
+    this->gradient_b = nullptr;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_optimiser<T>::reg_optimiser() called");
+    reg_print_msg_debug("reg_optimiser<T>::reg_optimiser() called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_optimiser<T>::~reg_optimiser()
-{
-   if(this->bestDOF!=nullptr)
-      free(this->bestDOF);
-   this->bestDOF=nullptr;
-   if(this->bestDOF_b!=nullptr)
-      free(this->bestDOF_b);
-   this->bestDOF_b=nullptr;
+reg_optimiser<T>::~reg_optimiser() {
+    if (this->bestDOF != nullptr)
+        free(this->bestDOF);
+    this->bestDOF = nullptr;
+    if (this->bestDOF_b != nullptr)
+        free(this->bestDOF_b);
+    this->bestDOF_b = nullptr;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_optimiser<T>::~reg_optimiser() called");
+    reg_print_msg_debug("reg_optimiser<T>::~reg_optimiser() called");
 #endif
 }
 /* *************************************************************** */
@@ -63,440 +61,396 @@ void reg_optimiser<T>::Initialise(size_t nvox,
                                   T *gradData,
                                   size_t nvox_b,
                                   T *cppData_b,
-                                  T *gradData_b
-                                 )
-{
-   this->dofNumber=nvox;
-   this->ndim=dim;
-   this->optimiseX=optX;
-   this->optimiseY=optY;
-   this->optimiseZ=optZ;
-   this->maxIterationNumber=maxit;
-   this->currentIterationNumber=start;
-   this->currentDOF=cppData;
-   if(this->bestDOF!=nullptr) free(this->bestDOF);
-   this->bestDOF=(T *)malloc(this->dofNumber*sizeof(T));
-   memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T));
-   if( gradData!=nullptr)
-      this->gradient=gradData;
+                                  T *gradData_b) {
+    this->dofNumber = nvox;
+    this->ndim = dim;
+    this->optimiseX = optX;
+    this->optimiseY = optY;
+    this->optimiseZ = optZ;
+    this->maxIterationNumber = maxit;
+    this->currentIterationNumber = start;
+    this->currentDOF = cppData;
+    if (this->bestDOF != nullptr) free(this->bestDOF);
+    this->bestDOF = (T*)malloc(this->dofNumber * sizeof(T));
+    memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T));
+    if (gradData != nullptr)
+        this->gradient = gradData;
 
-   if(nvox_b>0)
-      this->dofNumber_b=nvox_b;
-   if(cppData_b!=nullptr)
-   {
-      this->currentDOF_b=cppData_b;
-      this->backward=true;
-      if(this->bestDOF_b!=nullptr) free(this->bestDOF_b);
-      this->bestDOF_b=(T *)malloc(this->dofNumber_b*sizeof(T));
-      memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T));
-   }
-   if(gradData_b!=nullptr)
-      this->gradient_b=gradData_b;
+    if (nvox_b > 0)
+        this->dofNumber_b = nvox_b;
+    if (cppData_b != nullptr) {
+        this->currentDOF_b = cppData_b;
+        this->backward = true;
+        if (this->bestDOF_b != nullptr) free(this->bestDOF_b);
+        this->bestDOF_b = (T*)malloc(this->dofNumber_b * sizeof(T));
+        memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T));
+    }
+    if (gradData_b != nullptr)
+        this->gradient_b = gradData_b;
 
-   this->objFunc=obj;
-   this->bestObjFunctionValue = this->currentObjFunctionValue =
-                                   this->objFunc->GetObjectiveFunctionValue();
+    this->objFunc = obj;
+    this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_optimiser<T>::Initialise called");
+    reg_print_msg_debug("reg_optimiser<T>::Initialise called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::RestoreBestDOF()
-{
-   // restore forward transformation
-   memcpy(this->currentDOF,this->bestDOF,this->dofNumber*sizeof(T));
-   // restore backward transformation if required
-   if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0)
-      memcpy(this->currentDOF_b,this->bestDOF_b,this->dofNumber_b*sizeof(T));
+void reg_optimiser<T>::RestoreBestDOF() {
+    // restore forward transformation
+    memcpy(this->currentDOF, this->bestDOF, this->dofNumber * sizeof(T));
+    // restore backward transformation if required
+    if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0)
+        memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T));
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::StoreCurrentDOF()
-{
-   // save forward transformation
-   memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T));
-   // save backward transformation if required
-   if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0)
-      memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T));
+void reg_optimiser<T>::StoreCurrentDOF() {
+    // save forward transformation
+    memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T));
+    // save backward transformation if required
+    if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0)
+        memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T));
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Perturbation(float length)
-{
-   // initialise the randomiser
-   srand(time(nullptr));
-   // Reset the number of iteration
-   this->currentIterationNumber=0;
-   // Create some perturbation for degree of freedom
-   for(size_t i=0; i<this->dofNumber; ++i)
-   {
-      this->currentDOF[i]=this->bestDOF[i] + length * (float)(rand() - RAND_MAX/2) / ((float)RAND_MAX/2.0f);
-   }
-   if(this->backward)
-   {
-      for(size_t i=0; i<this->dofNumber_b; ++i)
-      {
-         this->currentDOF_b[i]=this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f;
-      }
-   }
-   this->StoreCurrentDOF();
-   this->currentObjFunctionValue=this->bestObjFunctionValue=
-                                    this->objFunc->GetObjectiveFunctionValue();
+void reg_optimiser<T>::Perturbation(float length) {
+    // initialise the randomiser
+    srand(time(nullptr));
+    // Reset the number of iteration
+    this->currentIterationNumber = 0;
+    // Create some perturbation for degree of freedom
+    for (size_t i = 0; i < this->dofNumber; ++i) {
+        this->currentDOF[i] = this->bestDOF[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f);
+    }
+    if (this->backward) {
+        for (size_t i = 0; i < this->dofNumber_b; ++i) {
+            this->currentDOF_b[i] = this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f;
+        }
+    }
+    this->StoreCurrentDOF();
+    this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Optimise(T maxLength,
                                 T smallLength,
-                                T &startLength)
-{
-   size_t lineIteration=0;
-   float addedLength=0;
-   float currentLength=startLength;
+                                T &startLength) {
+    size_t lineIteration = 0;
+    float addedLength = 0;
+    float currentLength = startLength;
 
-   // Start performing the line search
-   while(currentLength>smallLength &&
-         lineIteration<12 &&
-         this->currentIterationNumber<this->maxIterationNumber)
-   {
+    // Start performing the line search
+    while (currentLength > smallLength &&
+           lineIteration < 12 &&
+           this->currentIterationNumber < this->maxIterationNumber) {
 
-      // Compute the gradient normalisation value
-      float normValue = -currentLength;
+        // Compute the gradient normalisation value
+        float normValue = -currentLength;
 
-      this->objFunc->UpdateParameters(normValue);
+        this->objFunc->UpdateParameters(normValue);
 
-      // Compute the new value
-      this->currentObjFunctionValue=this->objFunc->GetObjectiveFunctionValue();
+        // Compute the new value
+        this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
 
-      // Check if the update lead to an improvement of the objective function
-      if(this->currentObjFunctionValue > this->bestObjFunctionValue)
-      {
+        // Check if the update lead to an improvement of the objective function
+        if (this->currentObjFunctionValue > this->bestObjFunctionValue) {
 #ifndef NDEBUG
-         char text[255];
-         sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED",
-                 (int)this->currentIterationNumber,
-                 this->currentObjFunctionValue,
-                 currentLength);
-         reg_print_msg_debug(text);
+            char text[255];
+            sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED",
+                    (int)this->currentIterationNumber,
+                    this->currentObjFunctionValue,
+                    currentLength);
+            reg_print_msg_debug(text);
 #endif
-         // Improvement - Save the new objective function value
-         this->objFunc->UpdateBestObjFunctionValue();
-         this->bestObjFunctionValue=this->currentObjFunctionValue;
-         // Update the total added length
-         addedLength += currentLength;
-         // Increase the step size
-         currentLength *= 1.1f;
-         currentLength = (currentLength<maxLength)?currentLength:maxLength;
-         // Save the current deformation parametrisation
-         this->StoreCurrentDOF();
-      }
-      else
-      {
+            // Improvement - Save the new objective function value
+            this->objFunc->UpdateBestObjFunctionValue();
+            this->bestObjFunctionValue = this->currentObjFunctionValue;
+            // Update the total added length
+            addedLength += currentLength;
+            // Increase the step size
+            currentLength *= 1.1f;
+            currentLength = (currentLength < maxLength) ? currentLength : maxLength;
+            // Save the current deformation parametrisation
+            this->StoreCurrentDOF();
+        } else {
 #ifndef NDEBUG
-         char text[255];
-         sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED",
-                 (int)this->currentIterationNumber,
-                 this->currentObjFunctionValue,
-                 currentLength);
-         reg_print_msg_debug(text);
+            char text[255];
+            sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED",
+                    (int)this->currentIterationNumber,
+                    this->currentObjFunctionValue,
+                    currentLength);
+            reg_print_msg_debug(text);
 #endif
-         // No improvement - Decrease the step size
-         currentLength*=0.5;
-      }
-      this->IncrementCurrentIterationNumber();
-      ++lineIteration;
-   }
-   // update the current size for the next iteration
-   startLength=addedLength;
-   // Restore the last best deformation parametrisation
-   this->RestoreBestDOF();
+            // No improvement - Decrease the step size
+            currentLength *= 0.5;
+        }
+        this->IncrementCurrentIterationNumber();
+        ++lineIteration;
+    }
+    // update the current size for the next iteration
+    startLength = addedLength;
+    // Restore the last best deformation parametrisation
+    this->RestoreBestDOF();
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::reg_test_optimiser()
-{
-   this->objFunc->UpdateParameters(1.f);
+void reg_optimiser<T>::reg_test_optimiser() {
+    this->objFunc->UpdateParameters(1.f);
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_conjugateGradient<T>::reg_conjugateGradient()
-   :reg_optimiser<T>::reg_optimiser()
-{
-   this->array1=nullptr;
-   this->array2=nullptr;
-   this->array1_b=nullptr;
-   this->array2_b=nullptr;
+reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
+    this->array1 = nullptr;
+    this->array2 = nullptr;
+    this->array1_b = nullptr;
+    this->array2_b = nullptr;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
+    reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_conjugateGradient<T>::~reg_conjugateGradient()
-{
-   if(this->array1!=nullptr)
-      free(this->array1);
-   this->array1=nullptr;
+reg_conjugateGradient<T>::~reg_conjugateGradient() {
+    if (this->array1 != nullptr)
+        free(this->array1);
+    this->array1 = nullptr;
 
-   if(this->array2!=nullptr)
-      free(this->array2);
-   this->array2=nullptr;
+    if (this->array2 != nullptr)
+        free(this->array2);
+    this->array2 = nullptr;
 
-   if(this->array1_b!=nullptr)
-      free(this->array1_b);
-   this->array1_b=nullptr;
+    if (this->array1_b != nullptr)
+        free(this->array1_b);
+    this->array1_b = nullptr;
 
-   if(this->array2_b!=nullptr)
-      free(this->array2_b);
-   this->array2_b=nullptr;
+    if (this->array2_b != nullptr)
+        free(this->array2_b);
+    this->array2_b = nullptr;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
+    reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Initialise(size_t nvox,
-      int dim,
-      bool optX,
-      bool optY,
-      bool optZ,
-      size_t maxit,
-      size_t start,
-      InterfaceOptimiser *o,
-      T *cppData,
-      T *gradData,
-      size_t nvox_b,
-      T *cppData_b,
-      T *gradData_b
-                                         )
-{
-   reg_optimiser<T>::Initialise(nvox,
-                                dim,
-                                optX,
-                                optY,
-                                optZ,
-                                maxit,
-                                start,
-                                o,
-                                cppData,
-                                gradData,
-                                nvox_b,
-                                cppData_b,
-                                gradData_b
-                               );
-   this->firstcall=true;
-   if(this->array1!=nullptr) free(this->array1);
-   if(this->array2!=nullptr) free(this->array2);
-   this->array1=(T *)malloc(this->dofNumber*sizeof(T));
-   this->array2=(T *)malloc(this->dofNumber*sizeof(T));
+                                          int dim,
+                                          bool optX,
+                                          bool optY,
+                                          bool optZ,
+                                          size_t maxit,
+                                          size_t start,
+                                          InterfaceOptimiser *o,
+                                          T *cppData,
+                                          T *gradData,
+                                          size_t nvox_b,
+                                          T *cppData_b,
+                                          T *gradData_b) {
+    reg_optimiser<T>::Initialise(nvox,
+                                 dim,
+                                 optX,
+                                 optY,
+                                 optZ,
+                                 maxit,
+                                 start,
+                                 o,
+                                 cppData,
+                                 gradData,
+                                 nvox_b,
+                                 cppData_b,
+                                 gradData_b);
+    this->firstcall = true;
+    if (this->array1 != nullptr) free(this->array1);
+    if (this->array2 != nullptr) free(this->array2);
+    this->array1 = (T*)malloc(this->dofNumber * sizeof(T));
+    this->array2 = (T*)malloc(this->dofNumber * sizeof(T));
 
-   if(cppData_b!=nullptr && gradData_b!=nullptr && nvox_b>0)
-   {
-      if(this->array1_b!=nullptr) free(this->array1_b);
-      if(this->array2_b!=nullptr) free(this->array2_b);
-      this->array1_b=(T *)malloc(this->dofNumber_b*sizeof(T));
-      this->array2_b=(T *)malloc(this->dofNumber_b*sizeof(T));
-   }
+    if (cppData_b != nullptr && gradData_b != nullptr && nvox_b > 0) {
+        if (this->array1_b != nullptr) free(this->array1_b);
+        if (this->array2_b != nullptr) free(this->array2_b);
+        this->array1_b = (T*)malloc(this->dofNumber_b * sizeof(T));
+        this->array2_b = (T*)malloc(this->dofNumber_b * sizeof(T));
+    }
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_conjugateGradient<T>::Initialise called");
+    reg_print_msg_debug("reg_conjugateGradient<T>::Initialise called");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::UpdateGradientValues()
-{
-
+void reg_conjugateGradient<T>::UpdateGradientValues() {
 #ifdef WIN32
-   long i;
-   long num = (long)this->dofNumber;
-   long num_b = (long)this->dofNumber_b;
+    long i;
+    long num = (long)this->dofNumber;
+    long num_b = (long)this->dofNumber_b;
 #else
-   size_t i;
-   size_t num = (size_t)this->dofNumber;
-   size_t num_b = (size_t)this->dofNumber_b;
+    size_t i;
+    size_t num = (size_t)this->dofNumber;
+    size_t num_b = (size_t)this->dofNumber_b;
 #endif
 
-   T *gradientPtr = this->gradient;
-   T *array1Ptr = this->array1;
-   T *array2Ptr = this->array2;
+    T *gradientPtr = this->gradient;
+    T *array1Ptr = this->array1;
+    T *array2Ptr = this->array2;
 
-   T *gradientPtr_b = this->gradient_b;
-   T *array1Ptr_b = this->array1_b;
-   T *array2Ptr_b = this->array2_b;
+    T *gradientPtr_b = this->gradient_b;
+    T *array1Ptr_b = this->array1_b;
+    T *array2Ptr_b = this->array2_b;
 
-   if(this->firstcall)
-   {
+    if (this->firstcall) {
 #ifndef NDEBUG
-      reg_print_msg_debug("Conjugate gradient initialisation");
+        reg_print_msg_debug("Conjugate gradient initialisation");
 #endif
-      // first conjugate gradient iteration
+        // first conjugate gradient iteration
 #if defined (_OPENMP)
-      #pragma omp parallel for default(none) \
-      shared(num,array1Ptr,array2Ptr,gradientPtr) \
-      private(i)
+#pragma omp parallel for default(none) \
+    shared(num,array1Ptr,array2Ptr,gradientPtr) \
+    private(i)
 #endif
-      for(i=0; i<num; i++)
-      {
-         array2Ptr[i] = array1Ptr[i] = - gradientPtr[i];
-      }
-      if(this->dofNumber_b>0)
-      {
+        for (i = 0; i < num; i++) {
+            array2Ptr[i] = array1Ptr[i] = -gradientPtr[i];
+        }
+        if (this->dofNumber_b > 0) {
 #if defined (_OPENMP)
-         #pragma omp parallel for default(none) \
-         shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
-         private(i)
+#pragma omp parallel for default(none) \
+    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
+    private(i)
 #endif
-         for(i=0; i<num_b; i++)
-         {
-            array2Ptr_b[i] = array1Ptr_b[i] = - gradientPtr_b[i];
-         }
-      }
-      this->firstcall=false;
-   }
-   else
-   {
+            for (i = 0; i < num_b; i++) {
+                array2Ptr_b[i] = array1Ptr_b[i] = -gradientPtr_b[i];
+            }
+        }
+        this->firstcall = false;
+    } else {
 #ifndef NDEBUG
-      reg_print_msg_debug("Conjugate gradient update");
+        reg_print_msg_debug("Conjugate gradient update");
 #endif
-      double dgg=0.0, gg=0.0;
+        double dgg = 0, gg = 0;
 #if defined (_OPENMP)
-      #pragma omp parallel for default(none) \
-      shared(num,array1Ptr,array2Ptr,gradientPtr) \
-      private(i) \
-reduction(+:gg) \
-reduction(+:dgg)
+#pragma omp parallel for default(none) \
+    shared(num,array1Ptr,array2Ptr,gradientPtr) \
+    private(i) \
+    reduction(+:gg) \
+    reduction(+:dgg)
 #endif
-      for(i=0; i<num; i++)
-      {
-         gg += array2Ptr[i] * array1Ptr[i];
-         dgg += (gradientPtr[i] + array1Ptr[i]) * gradientPtr[i];
-      }
-      double gam = dgg/gg;
+        for (i = 0; i < num; i++) {
+            gg += array2Ptr[i] * array1Ptr[i];
+            dgg += (gradientPtr[i] + array1Ptr[i]) * gradientPtr[i];
+        }
+        double gam = dgg / gg;
 
-      if(this->dofNumber_b>0)
-      {
-         double dgg_b=0.0, gg_b=0.0;
+        if (this->dofNumber_b > 0) {
+            double dgg_b = 0, gg_b = 0;
 #if defined (_OPENMP)
-         #pragma omp parallel for default(none) \
-         shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
-         private(i) \
-reduction(+:gg_b) \
-reduction(+:dgg_b)
+#pragma omp parallel for default(none) \
+    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
+    private(i) \
+    reduction(+:gg_b) \
+    reduction(+:dgg_b)
 #endif
-         for(i=0; i<num_b; i++)
-         {
-            gg_b += array2Ptr_b[i] * array1Ptr_b[i];
-            dgg_b += (gradientPtr_b[i] + array1Ptr_b[i]) * gradientPtr_b[i];
-         }
-         gam = (dgg+dgg_b)/(gg+gg_b);
-      }
+            for (i = 0; i < num_b; i++) {
+                gg_b += array2Ptr_b[i] * array1Ptr_b[i];
+                dgg_b += (gradientPtr_b[i] + array1Ptr_b[i]) * gradientPtr_b[i];
+            }
+            gam = (dgg + dgg_b) / (gg + gg_b);
+        }
 #if defined (_OPENMP)
-      #pragma omp parallel for default(none) \
-      shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \
-      private(i)
+#pragma omp parallel for default(none) \
+    shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \
+    private(i)
 #endif
-      for(i=0; i<num; i++)
-      {
-         array1Ptr[i] = - gradientPtr[i];
-         array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]);
-         gradientPtr[i] = - array2Ptr[i];
-      }
-      if(this->dofNumber_b>0)
-      {
+        for (i = 0; i < num; i++) {
+            array1Ptr[i] = -gradientPtr[i];
+            array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]);
+            gradientPtr[i] = -array2Ptr[i];
+        }
+        if (this->dofNumber_b > 0) {
 #if defined (_OPENMP)
-         #pragma omp parallel for default(none) \
-         shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \
-         private(i)
+#pragma omp parallel for default(none) \
+    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \
+    private(i)
 #endif
-         for(i=0; i<num_b; i++)
-         {
-            array1Ptr_b[i] = - gradientPtr_b[i];
-            array2Ptr_b[i] = (array1Ptr_b[i] + gam * array2Ptr_b[i]);
-            gradientPtr_b[i] = - array2Ptr_b[i];
-         }
-      }
-   }
-   return;
+            for (i = 0; i < num_b; i++) {
+                array1Ptr_b[i] = -gradientPtr_b[i];
+                array2Ptr_b[i] = (array1Ptr_b[i] + gam * array2Ptr_b[i]);
+                gradientPtr_b[i] = -array2Ptr_b[i];
+            }
+        }
+    }
+    return;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Optimise(T maxLength,
                                         T smallLength,
-                                        T &startLength)
-{
-   this->UpdateGradientValues();
-   reg_optimiser<T>::Optimise(maxLength,
-                              smallLength,
-                              startLength);
+                                        T &startLength) {
+    this->UpdateGradientValues();
+    reg_optimiser<T>::Optimise(maxLength,
+                               smallLength,
+                               startLength);
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::Perturbation(float length)
-{
-   reg_optimiser<T>::Perturbation(length);
-   this->firstcall=true;
+void reg_conjugateGradient<T>::Perturbation(float length) {
+    reg_optimiser<T>::Perturbation(length);
+    this->firstcall = true;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::reg_test_optimiser()
-{
-   this->UpdateGradientValues();
-   reg_optimiser<T>::reg_test_optimiser();
+void reg_conjugateGradient<T>::reg_test_optimiser() {
+    this->UpdateGradientValues();
+    reg_optimiser<T>::reg_test_optimiser();
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 reg_lbfgs<T>::reg_lbfgs()
-   :reg_optimiser<T>::reg_optimiser()
-{
-   this->stepToKeep=5;
-   this->oldDOF=nullptr;
-   this->oldGrad=nullptr;
-   this->diffDOF=nullptr;
-   this->diffGrad=nullptr;
+    :reg_optimiser<T>::reg_optimiser() {
+    this->stepToKeep = 5;
+    this->oldDOF = nullptr;
+    this->oldGrad = nullptr;
+    this->diffDOF = nullptr;
+    this->diffGrad = nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_lbfgs<T>::~reg_lbfgs()
-{
-   if(this->oldDOF!=nullptr)
-      free(this->oldDOF);
-   this->oldDOF=nullptr;
-   if(this->oldGrad!=nullptr)
-      free(this->oldGrad);
-   this->oldGrad=nullptr;
-   for(size_t i=0; i<this->stepToKeep; ++i)
-   {
-      if(this->diffDOF[i]!=nullptr)
-         free(this->diffDOF[i]);
-      this->diffDOF[i]=nullptr;
-      if(this->diffGrad[i]!=nullptr)
-         free(this->diffGrad[i]);
-      this->diffGrad[i]=nullptr;
-   }
-   if(this->diffDOF!=nullptr)
-      free(this->diffDOF);
-   this->diffDOF=nullptr;
-   if(this->diffGrad!=nullptr)
-      free(this->diffGrad);
-   this->diffGrad=nullptr;
+reg_lbfgs<T>::~reg_lbfgs() {
+    if (this->oldDOF != nullptr)
+        free(this->oldDOF);
+    this->oldDOF = nullptr;
+    if (this->oldGrad != nullptr)
+        free(this->oldGrad);
+    this->oldGrad = nullptr;
+    for (size_t i = 0; i < this->stepToKeep; ++i) {
+        if (this->diffDOF[i] != nullptr)
+            free(this->diffDOF[i]);
+        this->diffDOF[i] = nullptr;
+        if (this->diffGrad[i] != nullptr)
+            free(this->diffGrad[i]);
+        this->diffGrad[i] = nullptr;
+    }
+    if (this->diffDOF != nullptr)
+        free(this->diffDOF);
+    this->diffDOF = nullptr;
+    if (this->diffGrad != nullptr)
+        free(this->diffGrad);
+    this->diffGrad = nullptr;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -513,49 +467,44 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
                               T *gradData,
                               size_t nvox_b,
                               T *cppData_b,
-                              T *gradData_b)
-{
-   reg_optimiser<T>::Initialise(nvox,
-                                dim,
-                                optX,
-                                optY,
-                                optZ,
-                                maxit,
-                                start,
-                                o,
-                                cppData,
-                                gradData,
-                                nvox_b,
-                                cppData_b,
-                                gradData_b);
-   this->stepToKeep=5;
-   this->diffDOF=(T **)malloc(this->stepToKeep*sizeof(T *));
-   this->diffGrad=(T **)malloc(this->stepToKeep*sizeof(T *));
-   for(size_t i=0; i<this->stepToKeep; ++i)
-   {
-      this->diffDOF[i]=(T *)malloc(this->dofNumber*sizeof(T));
-      this->diffGrad[i]=(T *)malloc(this->dofNumber*sizeof(T));
-      if(this->diffDOF[i]==nullptr || this->diffGrad[i]==nullptr)
-      {
-         reg_print_fct_error("reg_lbfgs<T>::Initialise");
-         reg_print_msg_error("Out of memory");
-         reg_exit();
-      }
-   }
-   this->oldDOF=(T *)malloc(this->dofNumber*sizeof(T));
-   this->oldGrad=(T *)malloc(this->dofNumber*sizeof(T));
-   if(this->oldDOF==nullptr || this->oldGrad==nullptr)
-   {
-      reg_print_fct_error("reg_lbfgs<T>::Initialise");
-      reg_print_msg_error("Out of memory");
-      reg_exit();
-   }
+                              T *gradData_b) {
+    reg_optimiser<T>::Initialise(nvox,
+                                 dim,
+                                 optX,
+                                 optY,
+                                 optZ,
+                                 maxit,
+                                 start,
+                                 o,
+                                 cppData,
+                                 gradData,
+                                 nvox_b,
+                                 cppData_b,
+                                 gradData_b);
+    this->stepToKeep = 5;
+    this->diffDOF = (T**)malloc(this->stepToKeep * sizeof(T*));
+    this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*));
+    for (size_t i = 0; i < this->stepToKeep; ++i) {
+        this->diffDOF[i] = (T*)malloc(this->dofNumber * sizeof(T));
+        this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T));
+        if (this->diffDOF[i] == nullptr || this->diffGrad[i] == nullptr) {
+            reg_print_fct_error("reg_lbfgs<T>::Initialise");
+            reg_print_msg_error("Out of memory");
+            reg_exit();
+        }
+    }
+    this->oldDOF = (T*)malloc(this->dofNumber * sizeof(T));
+    this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T));
+    if (this->oldDOF == nullptr || this->oldGrad == nullptr) {
+        reg_print_fct_error("reg_lbfgs<T>::Initialise");
+        reg_print_msg_error("Out of memory");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-void reg_lbfgs<T>::UpdateGradientValues()
-{
+void reg_lbfgs<T>::UpdateGradientValues() {
 
 }
 /* *************************************************************** */
@@ -563,13 +512,11 @@ void reg_lbfgs<T>::UpdateGradientValues()
 template <class T>
 void reg_lbfgs<T>::Optimise(T maxLength,
                             T smallLength,
-                            T &startLength)
-{
-
-   this->UpdateGradientValues();
-   reg_optimiser<T>::Optimise(maxLength,
-                              smallLength,
-                              startLength);
+                            T &startLength) {
+    this->UpdateGradientValues();
+    reg_optimiser<T>::Optimise(maxLength,
+                               smallLength,
+                               startLength);
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index d7bbee6e..c0b7092e 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -14,15 +14,14 @@
 /* *************************************************************** */
 /** @brief Interface between the registration class and the optimiser
  */
-class InterfaceOptimiser
-{
+class InterfaceOptimiser {
 public:
-   /// @brief Returns the registration current objective function value
-   virtual double GetObjectiveFunctionValue() = 0;
-   /// @brief The transformation parameters are optimised
-   virtual void UpdateParameters(float) = 0;
-   /// @brief The best objective function values are stored
-   virtual void UpdateBestObjFunctionValue() = 0;
+    /// @brief Returns the registration current objective function value
+    virtual double GetObjectiveFunctionValue() = 0;
+    /// @brief The transformation parameters are optimised
+    virtual void UpdateParameters(float) = 0;
+    /// @brief The best objective function values are stored
+    virtual void UpdateBestObjFunctionValue() = 0;
 
 protected:
     /// @brief Interface constructor
@@ -33,157 +32,134 @@ class InterfaceOptimiser
 /* *************************************************************** */
 /* *************************************************************** */
 /** @class reg_optimiser
- * @brief Standard gradient acent optimisation
+ * @brief Standard gradient ascent optimisation
  */
 template <class T>
-class reg_optimiser
-{
+class reg_optimiser {
 protected:
-   bool backward;
-   size_t dofNumber;
-   size_t dofNumber_b;
-   size_t ndim;
-   T *currentDOF; // pointer to the cpp nifti image array
-   T *currentDOF_b; // pointer to the cpp nifti image array (backward)
-   T *bestDOF;
-   T *bestDOF_b;
-   T *gradient;
-   T *gradient_b;
-   bool optimiseX;
-   bool optimiseY;
-   bool optimiseZ;
-   size_t maxIterationNumber;
-   size_t currentIterationNumber;
-   double bestObjFunctionValue;
-   double currentObjFunctionValue;
-   InterfaceOptimiser *objFunc;
+    bool backward;
+    size_t dofNumber;
+    size_t dofNumber_b;
+    size_t ndim;
+    T *currentDOF; // pointer to the cpp nifti image array
+    T *currentDOF_b; // pointer to the cpp nifti image array (backward)
+    T *bestDOF;
+    T *bestDOF_b;
+    T *gradient;
+    T *gradient_b;
+    bool optimiseX;
+    bool optimiseY;
+    bool optimiseZ;
+    size_t maxIterationNumber;
+    size_t currentIterationNumber;
+    double bestObjFunctionValue;
+    double currentObjFunctionValue;
+    InterfaceOptimiser *objFunc;
 
 public:
-   reg_optimiser();
-   virtual ~reg_optimiser();
-   virtual void StoreCurrentDOF();
-   virtual void RestoreBestDOF();
-   virtual size_t GetDOFNumber()
-   {
-      return this->dofNumber;
-   }
-   virtual size_t GetDOFNumber_b()
-   {
-      return this->dofNumber_b;
-   }
-   virtual size_t GetNDim()
-   {
-      return this->ndim;
-   }
-   virtual size_t GetVoxNumber()
-   {
-      return this->dofNumber/this->ndim;
-   }
-   virtual size_t GetVoxNumber_b()
-   {
-      return this->dofNumber_b/this->ndim;
-   }
-   virtual T* GetBestDOF()
-   {
-      return this->bestDOF;
-   }
-   virtual T* GetBestDOF_b()
-   {
-      return this->bestDOF_b;
-   }
-   virtual T* GetCurrentDOF()
-   {
-      return this->currentDOF;
-   }
-   virtual T* GetCurrentDOF_b()
-   {
-      return this->currentDOF_b;
-   }
-   virtual T* GetGradient()
-   {
-      return this->gradient;
-   }
-   virtual T* GetGradient_b()
-   {
-      return this->gradient_b;
-   }
-   virtual bool GetOptimiseX()
-   {
-      return this->optimiseX;
-   }
-   virtual bool GetOptimiseY()
-   {
-      return this->optimiseY;
-   }
-   virtual bool GetOptimiseZ()
-   {
-      return this->optimiseZ;
-   }
-   virtual size_t GetMaxIterationNumber()
-   {
-      return this->maxIterationNumber;
-   }
-   virtual size_t GetCurrentIterationNumber()
-   {
-      return this->currentIterationNumber;
-   }
-   virtual size_t ResetCurrentIterationNumber()
-   {
-      return this->currentIterationNumber=0;
-   }
-   virtual double GetBestObjFunctionValue()
-   {
-      return this->bestObjFunctionValue;
-   }
-   virtual void SetBestObjFunctionValue(double i)
-   {
-      this->bestObjFunctionValue=i;
-   }
-   virtual double GetCurrentObjFunctionValue()
-   {
-      return this->currentObjFunctionValue;
-   }
-   virtual void IncrementCurrentIterationNumber()
-   {
-      this->currentIterationNumber++;
-   }
-   virtual void Initialise(size_t nvox,
-                           int dim,
-                           bool optX,
-                           bool optY,
-                           bool optZ,
-                           size_t maxit,
-                           size_t start,
-                           InterfaceOptimiser *o,
-                           T *cppData,
-                           T *gradData=nullptr,
-                           size_t nvox_b=0,
-                           T *cppData_b=nullptr,
-                           T *gradData_b=nullptr);
-   virtual void Optimise(T maxLength,
-                         T smallLength,
-                         T &startLength);
-   virtual void Perturbation(float length);
+    reg_optimiser();
+    virtual ~reg_optimiser();
+    virtual void StoreCurrentDOF();
+    virtual void RestoreBestDOF();
+    virtual size_t GetDOFNumber() {
+        return this->dofNumber;
+    }
+    virtual size_t GetDOFNumber_b() {
+        return this->dofNumber_b;
+    }
+    virtual size_t GetNDim() {
+        return this->ndim;
+    }
+    virtual size_t GetVoxNumber() {
+        return this->dofNumber / this->ndim;
+    }
+    virtual size_t GetVoxNumber_b() {
+        return this->dofNumber_b / this->ndim;
+    }
+    virtual T* GetBestDOF() {
+        return this->bestDOF;
+    }
+    virtual T* GetBestDOF_b() {
+        return this->bestDOF_b;
+    }
+    virtual T* GetCurrentDOF() {
+        return this->currentDOF;
+    }
+    virtual T* GetCurrentDOF_b() {
+        return this->currentDOF_b;
+    }
+    virtual T* GetGradient() {
+        return this->gradient;
+    }
+    virtual T* GetGradient_b() {
+        return this->gradient_b;
+    }
+    virtual bool GetOptimiseX() {
+        return this->optimiseX;
+    }
+    virtual bool GetOptimiseY() {
+        return this->optimiseY;
+    }
+    virtual bool GetOptimiseZ() {
+        return this->optimiseZ;
+    }
+    virtual size_t GetMaxIterationNumber() {
+        return this->maxIterationNumber;
+    }
+    virtual size_t GetCurrentIterationNumber() {
+        return this->currentIterationNumber;
+    }
+    virtual size_t ResetCurrentIterationNumber() {
+        return this->currentIterationNumber = 0;
+    }
+    virtual double GetBestObjFunctionValue() {
+        return this->bestObjFunctionValue;
+    }
+    virtual void SetBestObjFunctionValue(double i) {
+        this->bestObjFunctionValue = i;
+    }
+    virtual double GetCurrentObjFunctionValue() {
+        return this->currentObjFunctionValue;
+    }
+    virtual void IncrementCurrentIterationNumber() {
+        this->currentIterationNumber++;
+    }
+    virtual void Initialise(size_t nvox,
+                            int dim,
+                            bool optX,
+                            bool optY,
+                            bool optZ,
+                            size_t maxit,
+                            size_t start,
+                            InterfaceOptimiser *o,
+                            T *cppData,
+                            T *gradData = nullptr,
+                            size_t nvox_b = 0,
+                            T *cppData_b = nullptr,
+                            T *gradData_b = nullptr);
+    virtual void Optimise(T maxLength,
+                          T smallLength,
+                          T &startLength);
+    virtual void Perturbation(float length);
 
-   // Function used for testing
-   virtual void reg_test_optimiser();
+    // Function used for testing
+    virtual void reg_test_optimiser();
 };
 /* *************************************************************** */
 /* *************************************************************** */
 /** @class reg_conjugateGradient
- * @brief Conjugate gradient acent optimisation
+ * @brief Conjugate gradient ascent optimisation
  */
 template <class T>
-class reg_conjugateGradient : public reg_optimiser<T>
-{
+class reg_conjugateGradient: public reg_optimiser<T> {
 protected:
-   T *array1;
-   T *array1_b;
-   T *array2;
-   T *array2_b;
-   bool firstcall;
+    T *array1;
+    T *array1_b;
+    T *array2;
+    T *array2_b;
+    bool firstcall;
 
-   void UpdateGradientValues(); /// @brief Update the gradient array
+    void UpdateGradientValues(); /// @brief Update the gradient array
 
 public:
     reg_conjugateGradient();
@@ -215,14 +191,13 @@ class reg_conjugateGradient : public reg_optimiser<T>
  * @brief
  */
 template <class T>
-class reg_lbfgs : public reg_optimiser<T>
-{
+class reg_lbfgs: public reg_optimiser<T> {
 protected:
-   size_t stepToKeep;
-   T *oldDOF;
-   T *oldGrad;
-   T **diffDOF;
-   T **diffGrad;
+    size_t stepToKeep;
+    T *oldDOF;
+    T *oldGrad;
+    T **diffDOF;
+    T **diffGrad;
 
 public:
     reg_lbfgs();
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 954fde54..6637f857 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -21,13 +21,13 @@
 /* *************************************************************** */
 void interpWindowedSincKernel(double relative, double *basis)
 {
-    if(relative<0.0) relative=0.0; //reg_rounding error
+    if(relative<0) relative=0; //reg_rounding error
     int j=0;
     double sum=0.;
     for(int i=-SINC_KERNEL_RADIUS; i<SINC_KERNEL_RADIUS; ++i)
     {
         double x=relative-static_cast<double>(i);
-        if(x==0.0)
+        if(x==0)
             basis[j]=1.0;
         else if(fabs(x)>=static_cast<double>(SINC_KERNEL_RADIUS))
             basis[j]=0;
@@ -49,7 +49,7 @@ void interpWindowedSincKernel(double relative, double *basis)
 /* *************************************************************** */
 double interpWindowedSincKernel_Samp(double x, double kernelsize)
 {
-    if(x==0.0)
+    if(x==0)
         return 1.0;
     else if(fabs(x)>=static_cast<double>(kernelsize))
         return 0;
@@ -65,7 +65,7 @@ double interpWindowedSincKernel_Samp(double x, double kernelsize)
 /* *************************************************************** */
 void interpCubicSplineKernel(double relative, double *basis)
 {
-    if(relative<0.0) relative=0.0; //reg_rounding error
+    if(relative<0) relative=0; //reg_rounding error
     double FF= relative*relative;
     basis[0] = (relative * ((2.0-relative)*relative - 1.0))/2.0;
     basis[1] = (FF * (3.0*relative-5.0) + 2.0)/2.0;
@@ -76,7 +76,7 @@ void interpCubicSplineKernel(double relative, double *basis)
 void interpCubicSplineKernel(double relative, double *basis, double *derivative)
 {
     interpCubicSplineKernel(relative,basis);
-    if(relative<0.0) relative=0.0; //reg_rounding error
+    if(relative<0) relative=0; //reg_rounding error
     double FF= relative*relative;
     derivative[0] = (4.0*relative - 3.0*FF - 1.0)/2.0;
     derivative[1] = (9.0*relative - 10.0) * relative/2.0;
@@ -87,7 +87,7 @@ void interpCubicSplineKernel(double relative, double *basis, double *derivative)
 /* *************************************************************** */
 void interpLinearKernel(double relative, double *basis)
 {
-    if(relative<0.0) relative=0.0; //reg_rounding error
+    if(relative<0) relative=0; //reg_rounding error
     basis[1]=relative;
     basis[0]=1.0-relative;
 }
@@ -95,14 +95,14 @@ void interpLinearKernel(double relative, double *basis)
 void interpLinearKernel(double relative, double *basis, double *derivative)
 {
     interpLinearKernel(relative,basis);
-    derivative[1]=1.0;
-    derivative[0]=0.0;
+    derivative[1]=1;
+    derivative[0]=0;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 void interpNearestNeighKernel(double relative, double *basis)
 {
-    if(relative<0.0) relative=0.0; //reg_rounding error
+    if(relative<0) relative=0; //reg_rounding error
     basis[0]=basis[1]=0;
     if(relative>=0.5)
         basis[1]=1;
@@ -465,7 +465,7 @@ void ResampleImage3D(nifti_image *floatingImage,
                 previous[1]-=kernel_offset;
                 previous[2]-=kernel_offset;
 
-                intensity=0.0;
+                intensity=0;
                 if(-1<(previous[0]) && (previous[0]+kernel_size-1)<floatingImage->nx &&
                    -1<(previous[1]) && (previous[1]+kernel_size-1)<floatingImage->ny &&
                    -1<(previous[2]) && (previous[2]+kernel_size-1)<floatingImage->nz){
@@ -473,12 +473,12 @@ void ResampleImage3D(nifti_image *floatingImage,
                    {
                       Z= previous[2]+c;
                       zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                      yTempNewValue=0.0;
+                      yTempNewValue=0;
                       for(b=0; b<kernel_size; b++)
                       {
                          Y= previous[1]+b;
                          xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                         xTempNewValue=0.0;
+                         xTempNewValue=0;
                          for(a=0; a<kernel_size; a++)
                          {
                             xTempNewValue +=  static_cast<double>(*xyzPointer++) * xBasis[a];
@@ -493,12 +493,12 @@ void ResampleImage3D(nifti_image *floatingImage,
                    {
                       Z= previous[2]+c;
                       zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                      yTempNewValue=0.0;
+                      yTempNewValue=0;
                       for(b=0; b<kernel_size; b++)
                       {
                          Y= previous[1]+b;
                          xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                         xTempNewValue=0.0;
+                         xTempNewValue=0;
                          for(a=0; a<kernel_size; a++)
                          {
                             if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
@@ -628,8 +628,8 @@ void ResampleImage2D(nifti_image *floatingImage,
         FloatingTYPE *xyzPointer;
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], relative[2];
         double xTempNewValue, intensity;
-        float world[3] = {0.0, 0.0, 0.0};
-        float position[3] = {0.0, 0.0, 0.0};
+        float world[3] = {0, 0, 0};
+        float position[3] = {0, 0, 0};
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
     private(index, intensity, world, position, previous, xBasis, yBasis, relative, \
@@ -662,12 +662,12 @@ void ResampleImage2D(nifti_image *floatingImage,
                 previous[0]-=kernel_offset;
                 previous[1]-=kernel_offset;
 
-                intensity=0.0;
+                intensity=0;
                 for(b=0; b<kernel_size; b++)
                 {
                     Y= previous[1]+b;
                     xyzPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]];
-                    xTempNewValue=0.0;
+                    xTempNewValue=0;
                     for(a=0; a<kernel_size; a++)
                     {
                         if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
@@ -1235,17 +1235,17 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
                                     previous[1]-=kernel_offset;
                                     previous[2]-=kernel_offset;
 
-                                    psfIntensity=0.0;
+                                    psfIntensity=0;
                                     for(c=0; c<kernel_size; c++)
                                     {
                                         Z= previous[2]+c;
                                         zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                                        yTempNewValue=0.0;
+                                        yTempNewValue=0;
                                         for(b=0; b<kernel_size; b++)
                                         {
                                             Y= previous[1]+b;
                                             xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                            xTempNewValue=0.0;
+                                            xTempNewValue=0;
                                             for(a=0; a<kernel_size; a++)
                                             {
                                                 if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
@@ -1666,17 +1666,17 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                                         previous[1]-=kernel_offset;
                                         previous[2]-=kernel_offset;
 
-                                        psfIntensity=0.0;
+                                        psfIntensity=0;
                                         for(int c=0; c<kernel_size; c++)
                                         {
                                             Z= previous[2]+c;
                                             zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                                            yTempNewValue=0.0;
+                                            yTempNewValue=0;
                                             for(int b=0; b<kernel_size; b++)
                                             {
                                                 Y= previous[1]+b;
                                                 xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                                xTempNewValue=0.0;
+                                                xTempNewValue=0;
                                                 for(int a=0; a<kernel_size; a++)
                                                 {
                                                     if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
@@ -2587,9 +2587,9 @@ void TrilinearImageGradient(nifti_image *floatingImage,
     for(index=0; index<referenceVoxelNumber; index++)
     {
 
-        grad[0]=0.0;
-        grad[1]=0.0;
-        grad[2]=0.0;
+        grad[0]=0;
+        grad[1]=0;
+        grad[2]=0;
 
         if(maskPtr[index]>-1)
         {
@@ -2625,17 +2625,17 @@ void TrilinearImageGradient(nifti_image *floatingImage,
                     if(Z>-1 && Z<floatingImage->nz)
                     {
                         zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                        xxTempNewValue=0.0;
-                        yyTempNewValue=0.0;
-                        zzTempNewValue=0.0;
+                        xxTempNewValue=0;
+                        yyTempNewValue=0;
+                        zzTempNewValue=0;
                         for(b=0; b<2; b++)
                         {
                             Y=previous[1]+b;
                             if(Y>-1 && Y<floatingImage->ny)
                             {
                                 xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                xTempNewValue=0.0;
-                                yTempNewValue=0.0;
+                                xTempNewValue=0;
+                                yTempNewValue=0;
                                 for(a=0; a<2; a++)
                                 {
                                     X=previous[0]+a;
@@ -2683,15 +2683,15 @@ void TrilinearImageGradient(nifti_image *floatingImage,
                 {
                     Z=previous[2]+c;
                     zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                    xxTempNewValue=0.0;
-                    yyTempNewValue=0.0;
-                    zzTempNewValue=0.0;
+                    xxTempNewValue=0;
+                    yyTempNewValue=0;
+                    zzTempNewValue=0;
                     for(b=0; b<2; b++)
                     {
                         Y=previous[1]+b;
                         xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                        xTempNewValue=0.0;
-                        yTempNewValue=0.0;
+                        xTempNewValue=0;
+                        yTempNewValue=0;
                         for(a=0; a<2; a++)
                         {
                             X=previous[0]+a;
@@ -2783,8 +2783,8 @@ void BilinearImageGradient(nifti_image *floatingImage,
     for(index=0; index<referenceVoxelNumber; index++)
     {
 
-        grad[0]=0.0;
-        grad[1]=0.0;
+        grad[0]=0;
+        grad[1]=0;
 
         if(maskPtr[index]>-1)
         {
@@ -2816,8 +2816,8 @@ void BilinearImageGradient(nifti_image *floatingImage,
                 if(Y>-1 && Y<floatingImage->ny)
                 {
                     xyPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]];
-                    xTempNewValue=0.0;
-                    yTempNewValue=0.0;
+                    xTempNewValue=0;
+                    yTempNewValue=0;
                     for(a=0; a<2; a++)
                     {
                         X= previous[0]+a;
@@ -2915,9 +2915,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
     for(index=0; index<referenceVoxelNumber; index++)
     {
 
-        grad[0]=0.0;
-        grad[1]=0.0;
-        grad[2]=0.0;
+        grad[0]=0;
+        grad[1]=0;
+        grad[2]=0;
 
         if((*maskPtr++)>-1)
         {
@@ -2955,9 +2955,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
                 if(-1<Z && Z<floatingImage->nz)
                 {
                     zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                    xxTempNewValue=0.0;
-                    yyTempNewValue=0.0;
-                    zzTempNewValue=0.0;
+                    xxTempNewValue=0;
+                    yyTempNewValue=0;
+                    zzTempNewValue=0;
                     for(b=0; b<4; b++)
                     {
                         Y= previous[1]+b;
@@ -2965,8 +2965,8 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
                         if(-1<Y && Y<floatingImage->ny)
                         {
                             xyzPointer = &yzPointer[previous[0]];
-                            xTempNewValue=0.0;
-                            yTempNewValue=0.0;
+                            xTempNewValue=0;
+                            yTempNewValue=0;
                             for(a=0; a<4; a++)
                             {
                                 if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx)
@@ -3005,9 +3005,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
                 }
             } // c
 
-            grad[0]=grad[0]==grad[0]?grad[0]:0.0;
-            grad[1]=grad[1]==grad[1]?grad[1]:0.0;
-            grad[2]=grad[2]==grad[2]?grad[2]:0.0;
+            grad[0]=grad[0]==grad[0]?grad[0]:0;
+            grad[1]=grad[1]==grad[1]?grad[1]:0;
+            grad[2]=grad[2]==grad[2]?grad[2]:0;
         } // outside of the mask
 
         warpedGradientPtrX[index] = (GradientTYPE)grad[0];
@@ -3075,8 +3075,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
     for(index=0; index<referenceVoxelNumber; index++)
     {
 
-        grad[0]=0.0;
-        grad[1]=0.0;
+        grad[0]=0;
+        grad[1]=0;
 
         if(maskPtr[index]>-1)
         {
@@ -3110,8 +3110,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
                 if(-1<Y && Y<floatingImage->ny)
                 {
                     xyPointer = &yPointer[previous[0]];
-                    xTempNewValue=0.0;
-                    yTempNewValue=0.0;
+                    xTempNewValue=0;
+                    yTempNewValue=0;
                     for(a=0; a<4; a++)
                     {
                         if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx)
@@ -3137,8 +3137,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
                 }
             } // b
 
-            grad[0]=grad[0]==grad[0]?grad[0]:0.0;
-            grad[1]=grad[1]==grad[1]?grad[1]:0.0;
+            grad[0]=grad[0]==grad[0]?grad[0]:0;
+            grad[1]=grad[1]==grad[1]?grad[1]:0;
         } // outside of the mask
 
         warpedGradientPtrX[index] = (GradientTYPE)grad[0];
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 031d8f0e..6004b9f6 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -12,17 +12,15 @@
 
 #include "_reg_ssd.h"
 
-//#define USE_LOG_SSD
-//#define MRF_USE_SAD
+ //#define USE_LOG_SSD
+ //#define MRF_USE_SAD
 
-/* *************************************************************** */
-/* *************************************************************** */
-reg_ssd::reg_ssd()
-   : reg_measure()
-{
-   memset(this->normaliseTimePoint,0,255*sizeof(bool) );
+ /* *************************************************************** */
+ /* *************************************************************** */
+reg_ssd::reg_ssd(): reg_measure() {
+    memset(this->normaliseTimePoint, 0, 255 * sizeof(bool));
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_ssd constructor called");
+    reg_print_msg_debug("reg_ssd constructor called");
 #endif
 }
 /* *************************************************************** */
@@ -37,255 +35,223 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr)
-{
-   // Set the pointers using the parent class function
-   reg_measure::InitialiseMeasure(refImgPtr,
-                                  floImgPtr,
-                                  maskRefPtr,
-                                  warFloImgPtr,
-                                  warFloGraPtr,
-                                  forVoxBasedGraPtr,
-                                  localWeightSimPtr,
-                                  maskFloPtr,
-                                  warRefImgPtr,
-                                  warRefGraPtr,
-                                  bckVoxBasedGraPtr);
-
-   // Check that the input images have the same number of time point
-   if(this->referenceImagePointer->nt != this->floatingImagePointer->nt)
-   {
-      reg_print_fct_error("reg_ssd::InitialiseMeasure");
-      reg_print_msg_error("This number of time point should be the same for both input images");
-      reg_exit();
-   }
-   // Input images are normalised between 0 and 1
-   for(int i=0; i<this->referenceImagePointer->nt; ++i)
-   {if(this->timePointWeight[i] > 0.0 && normaliseTimePoint[i])
-      {
-         //sets max value over both images to be 1 and min value over both images to be 0
-         //scales values such that identical values in the images are still identical after scaling
-         float maxF = reg_tools_getMaxValue(this->floatingImagePointer,i);
-         float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i);
-         float minF = reg_tools_getMinValue(this->floatingImagePointer, i);
-         float minR = reg_tools_getMinValue(this->referenceImagePointer,i);
-         float maxFR = fmax(maxF, maxR);
-         float minFR = fmin(minF, minR);
-         float rangeFR = maxFR - minFR;
-         reg_intensityRescale(this->referenceImagePointer,
-                              i,
-                              (minR - minFR)/rangeFR,
-                              1 - ((maxFR - maxR) / rangeFR));
-         reg_intensityRescale(this->floatingImagePointer,
-                              i,
-                              (minF - minFR) / rangeFR,
-                              1 - ((maxFR - maxF) / rangeFR));
-      }
-   }
+                                nifti_image *bckVoxBasedGraPtr) {
+    // Set the pointers using the parent class function
+    reg_measure::InitialiseMeasure(refImgPtr,
+                                   floImgPtr,
+                                   maskRefPtr,
+                                   warFloImgPtr,
+                                   warFloGraPtr,
+                                   forVoxBasedGraPtr,
+                                   localWeightSimPtr,
+                                   maskFloPtr,
+                                   warRefImgPtr,
+                                   warRefGraPtr,
+                                   bckVoxBasedGraPtr);
+
+    // Check that the input images have the same number of time point
+    if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) {
+        reg_print_fct_error("reg_ssd::InitialiseMeasure");
+        reg_print_msg_error("This number of time point should be the same for both input images");
+        reg_exit();
+    }
+    // Input images are normalised between 0 and 1
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
+            //sets max value over both images to be 1 and min value over both images to be 0
+            //scales values such that identical values in the images are still identical after scaling
+            float maxF = reg_tools_getMaxValue(this->floatingImagePointer, i);
+            float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i);
+            float minF = reg_tools_getMinValue(this->floatingImagePointer, i);
+            float minR = reg_tools_getMinValue(this->referenceImagePointer, i);
+            float maxFR = fmax(maxF, maxR);
+            float minFR = fmin(minF, minR);
+            float rangeFR = maxFR - minFR;
+            reg_intensityRescale(this->referenceImagePointer,
+                                 i,
+                                 (minR - minFR) / rangeFR,
+                                 1 - ((maxFR - maxR) / rangeFR));
+            reg_intensityRescale(this->floatingImagePointer,
+                                 i,
+                                 (minF - minFR) / rangeFR,
+                                 1 - ((maxFR - maxF) / rangeFR));
+        }
+    }
 #ifdef MRF_USE_SAD
-   reg_print_msg_warn("SAD is used instead of SSD");
+    reg_print_msg_warn("SAD is used instead of SSD");
 #endif
 #ifndef NDEBUG
-	char text[255];
-	reg_print_msg_debug("reg_ssd::InitialiseMeasure().");
-	for(int i=0; i<this->referenceImagePointer->nt; ++i)
-	{
-		sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-		reg_print_msg_debug(text);
-	}
-	sprintf(text, "Normalize time point:");
-	for(int i=0; i<this->referenceImagePointer->nt; ++i)
-		if(this->normaliseTimePoint[i])
-			sprintf(text, "%s %i", text, i);
-	reg_print_msg_debug(text);
+    char text[255];
+    reg_print_msg_debug("reg_ssd::InitialiseMeasure().");
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
+        reg_print_msg_debug(text);
+    }
+    sprintf(text, "Normalize time point:");
+    for (int i = 0; i < this->referenceImagePointer->nt; ++i)
+        if (this->normaliseTimePoint[i])
+            sprintf(text, "%s %i", text, i);
+    reg_print_msg_debug(text);
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise)
-{
-	this->normaliseTimePoint[timepoint]=normalise;
+void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
+    this->normaliseTimePoint[timepoint] = normalise;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template<class DTYPE>
 double reg_getSSDValue(nifti_image *referenceImage,
-							  nifti_image *warpedImage,
-							  double *timePointWeight,
-							  nifti_image *jacobianDetImage,
-							  int *mask,
-							  float *currentValue,
-							  nifti_image *localWeightSimImage)
-{
+                       nifti_image *warpedImage,
+                       double *timePointWeight,
+                       nifti_image *jacobianDetImage,
+                       int *mask,
+                       float *currentValue,
+                       nifti_image *localWeightSimImage) {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
-   // Create pointers to the reference and warped image data
-   DTYPE *referencePtr=static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *warpedPtr=static_cast<DTYPE *>(warpedImage->data);
-   // Create a pointer to the Jacobian determinant image if defined
-   DTYPE *jacDetPtr=nullptr;
-   if(jacobianDetImage!=nullptr)
-      jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
-   // Create a pointer to the local weight image if defined
-   DTYPE *localWeightPtr=nullptr;
-   if(localWeightSimImage!=nullptr)
-      localWeightPtr=static_cast<DTYPE *>(localWeightSimImage->data);
-
-   double SSD_global=0.0;
-   double refValue, warValue, diff;
-
-   // Loop over the different time points
-   for(int time=0; time<referenceImage->nt; ++time)
-   {
-      if(timePointWeight[time] > 0.0)
-      {
-         // Create pointers to the current time point of the reference and warped images
-         DTYPE *currentRefPtr=&referencePtr[time*voxelNumber];
-         DTYPE *currentWarPtr=&warpedPtr[time*voxelNumber];
-
-         double SSD_local=0., n=0.;
+    // Create pointers to the reference and warped image data
+    DTYPE *referencePtr = static_cast<DTYPE*>(referenceImage->data);
+    DTYPE *warpedPtr = static_cast<DTYPE*>(warpedImage->data);
+    // Create a pointer to the Jacobian determinant image if defined
+    DTYPE *jacDetPtr = nullptr;
+    if (jacobianDetImage != nullptr)
+        jacDetPtr = static_cast<DTYPE*>(jacobianDetImage->data);
+    // Create a pointer to the local weight image if defined
+    DTYPE *localWeightPtr = nullptr;
+    if (localWeightSimImage != nullptr)
+        localWeightPtr = static_cast<DTYPE*>(localWeightSimImage->data);
+
+    double SSD_global = 0;
+    double refValue, warValue, diff;
+
+    // Loop over the different time points
+    for (int time = 0; time < referenceImage->nt; ++time) {
+        if (timePointWeight[time] > 0) {
+            // Create pointers to the current time point of the reference and warped images
+            DTYPE *currentRefPtr = &referencePtr[time * voxelNumber];
+            DTYPE *currentWarPtr = &warpedPtr[time * voxelNumber];
+
+            double SSD_local = 0., n = 0.;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
-   jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
-   private(voxel, refValue, warValue, diff) \
-   reduction(+:SSD_local) \
-   reduction(+:n)
+    shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
+    jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
+    private(voxel, refValue, warValue, diff) \
+    reduction(+:SSD_local) \
+    reduction(+:n)
 #endif
-         for(voxel=0; voxel<voxelNumber; ++voxel)
-         {
-            // Check if the current voxel belongs to the mask
-            if(mask[voxel]>-1)
-            {
-               // Ensure that both ref and warped values are defined
-               refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope +
-                                   referenceImage->scl_inter);
-               warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope +
-                                   warpedImage->scl_inter);
-
-               if(refValue==refValue && warValue==warValue)
-               {
+            for (voxel = 0; voxel < voxelNumber; ++voxel) {
+                // Check if the current voxel belongs to the mask
+                if (mask[voxel] > -1) {
+                    // Ensure that both ref and warped values are defined
+                    refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter);
+                    warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter);
+
+                    if (refValue == refValue && warValue == warValue) {
 #ifdef MRF_USE_SAD
-                  diff = fabs(refValue-warValue);
+                        diff = fabs(refValue - warValue);
 #else
-                  diff = reg_pow2(refValue-warValue);
+                        diff = reg_pow2(refValue - warValue);
 #endif
-                  // Jacobian determinant modulation of the ssd if required
-                  if(jacDetPtr!=nullptr)
-                  {
-                     SSD_local += diff * jacDetPtr[voxel];
-                     n += jacDetPtr[voxel];
-                  }
-                  else if(localWeightPtr!=nullptr)
-                  {
-                     SSD_local += diff * localWeightPtr[voxel];
-                     n += localWeightPtr[voxel];
-                  }
-                  else
-                  {
-                     SSD_local += diff;
-                     n += 1.0;
-                  }
-               }
+                        // Jacobian determinant modulation of the ssd if required
+                        if (jacDetPtr != nullptr) {
+                            SSD_local += diff * jacDetPtr[voxel];
+                            n += jacDetPtr[voxel];
+                        } else if (localWeightPtr != nullptr) {
+                            SSD_local += diff * localWeightPtr[voxel];
+                            n += localWeightPtr[voxel];
+                        } else {
+                            SSD_local += diff;
+                            n += 1.0;
+                        }
+                    }
+                }
             }
-         }
 
-         SSD_local *= timePointWeight[time];
-         currentValue[time]=-SSD_local;
-         SSD_global -= SSD_local/n;
-      }
-   }
-   return SSD_global;
+            SSD_local *= timePointWeight[time];
+            currentValue[time] = -SSD_local;
+            SSD_global -= SSD_local / n;
+        }
+    }
+    return SSD_global;
 }
-template double reg_getSSDValue<float>(nifti_image *,nifti_image *,double *,nifti_image *,int *, float *, nifti_image *);
-template double reg_getSSDValue<double>(nifti_image *,nifti_image *,double *,nifti_image *,int *, float *, nifti_image *);
+template double reg_getSSDValue<float>(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*);
+template double reg_getSSDValue<double>(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*);
 /* *************************************************************** */
-double reg_ssd::GetSimilarityMeasureValue()
-{
-   // Check that all the specified image are of the same datatype
-   if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype)
-   {
-      reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-      reg_print_msg_error("Both input images are exepected to have the same type");
-      reg_exit();
-   }
-   double SSDValue=0;
-   switch(this->referenceImagePointer->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      SSDValue = reg_getSSDValue<float>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-             this->currentValue,
-             this->forwardLocalWeightSimImagePointer
-             );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      SSDValue = reg_getSSDValue<double>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->timePointWeight,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-             this->currentValue,
-             this->forwardLocalWeightSimImagePointer
-             );
-      break;
-   default:
-      reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-      reg_print_msg_error("Warped pixel type unsupported");
-      reg_exit();
-   }
-
-   // Backward computation
-   if(this->isSymmetric)
-   {
-      // Check that all the specified image are of the same datatype
-      if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype)
-      {
-         reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-         reg_print_msg_error("Both input images are exepected to have the same type");
-         reg_exit();
-      }
-      switch(this->floatingImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         SSDValue += reg_getSSDValue<float>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-                this->currentValue,
-                nullptr
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         SSDValue += reg_getSSDValue<double>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->timePointWeight,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-                this->currentValue,
-                nullptr
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-         reg_print_msg_error("Warped pixel type unsupported");
-         reg_exit();
-      }
-   }
-   return SSDValue;
+double reg_ssd::GetSimilarityMeasureValue() {
+    // Check that all the specified image are of the same datatype
+    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+        reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
+        reg_print_msg_error("Both input images are expected to have the same type");
+        reg_exit();
+    }
+    double SSDValue = 0;
+    switch (this->referenceImagePointer->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        SSDValue = reg_getSSDValue<float>(this->referenceImagePointer,
+                                          this->warpedFloatingImagePointer,
+                                          this->timePointWeight,
+                                          nullptr, // TODO this->forwardJacDetImagePointer,
+                                          this->referenceMaskPointer,
+                                          this->currentValue,
+                                          this->forwardLocalWeightSimImagePointer);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        SSDValue = reg_getSSDValue<double>(this->referenceImagePointer,
+                                           this->warpedFloatingImagePointer,
+                                           this->timePointWeight,
+                                           nullptr, // TODO this->forwardJacDetImagePointer,
+                                           this->referenceMaskPointer,
+                                           this->currentValue,
+                                           this->forwardLocalWeightSimImagePointer);
+        break;
+    default:
+        reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
+        reg_print_msg_error("Warped pixel type unsupported");
+        reg_exit();
+    }
+
+    // Backward computation
+    if (this->isSymmetric) {
+        // Check that all the specified image are of the same datatype
+        if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) {
+            reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
+            reg_print_msg_error("Both input images are expected to have the same type");
+            reg_exit();
+        }
+        switch (this->floatingImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            SSDValue += reg_getSSDValue<float>(this->floatingImagePointer,
+                                               this->warpedReferenceImagePointer,
+                                               this->timePointWeight,
+                                               nullptr, // TODO this->backwardJacDetImagePointer,
+                                               this->floatingMaskPointer,
+                                               this->currentValue,
+                                               nullptr);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            SSDValue += reg_getSSDValue<double>(this->floatingImagePointer,
+                                                this->warpedReferenceImagePointer,
+                                                this->timePointWeight,
+                                                nullptr, // TODO this->backwardJacDetImagePointer,
+                                                this->floatingMaskPointer,
+                                                this->currentValue,
+                                                nullptr);
+            break;
+        default:
+            reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
+            reg_print_msg_error("Warped pixel type unsupported");
+            reg_exit();
+        }
+    }
+    return SSDValue;
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -298,216 +264,190 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   int *mask,
                                   int current_timepoint,
                                   double timepoint_weight,
-                                  nifti_image *localWeightSimImage
-                                  )
-{
-   if(current_timepoint<0 || current_timepoint>=referenceImage->nt){
-      reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
-      reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-      reg_exit();
-   }
-   // Create pointers to the reference and warped images
+                                  nifti_image *localWeightSimImage) {
+    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+        reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
+        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
+        reg_exit();
+    }
+    // Create pointers to the reference and warped images
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long voxel;
+    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #else
-   size_t voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
 #endif
-   // Pointers to the image data
-   DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *currentRefPtr=&refImagePtr[current_timepoint*voxelNumber];
-   DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *currentWarPtr=&warImagePtr[current_timepoint*voxelNumber];
-
-   // Pointers to the spatial gradient of the warped image
-   DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
-   DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
-   DTYPE *spatialGradPtrZ = nullptr;
-   if(referenceImage->nz>1)
-      spatialGradPtrZ=&spatialGradPtrY[voxelNumber];
-
-   // Pointers to the measure of similarity gradient
-   DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
-   DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-   DTYPE *measureGradPtrZ = nullptr;
-   if(referenceImage->nz>1)
-      measureGradPtrZ=&measureGradPtrY[voxelNumber];
-
-   // Create a pointer to the Jacobian determinant values if defined
-   DTYPE *jacDetPtr=nullptr;
-   if(jacobianDetImage!=nullptr)
-      jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
-   // Create a pointer to the local weight image if defined
-   DTYPE *localWeightPtr=nullptr;
-   if(localWeightSimImage!=nullptr)
-      localWeightPtr=static_cast<DTYPE *>(localWeightSimImage->data);
-
-   // find number of active voxels and correct weight
-   double activeVoxel_num = 0.0;
-   for (voxel = 0; voxel < voxelNumber; voxel++)
-   {
-      if (mask[voxel]>-1)
-      {
-         if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
-            activeVoxel_num += 1.0;
-      }
-   }
-   double adjusted_weight = timepoint_weight / activeVoxel_num;
-
-   double refValue, warValue, common;
+    // Pointers to the image data
+    DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
+    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
+    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+
+    // Pointers to the spatial gradient of the warped image
+    DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
+    DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
+    DTYPE *spatialGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        spatialGradPtrZ = &spatialGradPtrY[voxelNumber];
+
+    // Pointers to the measure of similarity gradient
+    DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
+    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DTYPE *measureGradPtrZ = nullptr;
+    if (referenceImage->nz > 1)
+        measureGradPtrZ = &measureGradPtrY[voxelNumber];
+
+    // Create a pointer to the Jacobian determinant values if defined
+    DTYPE *jacDetPtr = nullptr;
+    if (jacobianDetImage != nullptr)
+        jacDetPtr = static_cast<DTYPE *>(jacobianDetImage->data);
+    // Create a pointer to the local weight image if defined
+    DTYPE *localWeightPtr = nullptr;
+    if (localWeightSimImage != nullptr)
+        localWeightPtr = static_cast<DTYPE *>(localWeightSimImage->data);
+
+    // find number of active voxels and correct weight
+    double activeVoxel_num = 0;
+    for (voxel = 0; voxel < voxelNumber; voxel++) {
+        if (mask[voxel] > -1) {
+            if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
+                activeVoxel_num += 1.0;
+        }
+    }
+    double adjusted_weight = timepoint_weight / activeVoxel_num;
+
+    double refValue, warValue, common;
 
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \
-   mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
-   measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \
-   localWeightPtr, adjusted_weight) \
-   private(voxel, refValue, warValue, common)
+    shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \
+    mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
+    measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \
+    localWeightPtr, adjusted_weight) \
+    private(voxel, refValue, warValue, common)
 #endif
-   for(voxel=0; voxel<voxelNumber; voxel++)
-   {
-      if(mask[voxel]>-1)
-      {
-         refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope +
-                             referenceImage->scl_inter);
-         warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope +
-                             warpedImage->scl_inter);
-         if(refValue==refValue && warValue==warValue)
-         {
+    for (voxel = 0; voxel < voxelNumber; voxel++) {
+        if (mask[voxel] > -1) {
+            refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter);
+            warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter);
+            if (refValue == refValue && warValue == warValue) {
 #ifdef MRF_USE_SAD
-            common = refValue>warValue?-1.f:1.f;
-            common *= (refValue - warValue);
+                common = refValue > warValue ? -1.f : 1.f;
+                common *= (refValue - warValue);
 #else
-            common = -2.0 * (refValue - warValue);
+                common = -2.0 * (refValue - warValue);
 #endif
-            if(jacDetPtr!=nullptr)
-               common *= jacDetPtr[voxel];
-            else if(localWeightPtr!=nullptr)
-               common *= localWeightPtr[voxel];
-
-            common *= adjusted_weight;
-
-            if(spatialGradPtrX[voxel]==spatialGradPtrX[voxel])
-               measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]);
-            if(spatialGradPtrY[voxel]==spatialGradPtrY[voxel])
-               measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]);
-
-            if(measureGradPtrZ!=nullptr)
-            {
-               if(spatialGradPtrZ[voxel]==spatialGradPtrZ[voxel])
-                  measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]);
+                if (jacDetPtr != nullptr)
+                    common *= jacDetPtr[voxel];
+                else if (localWeightPtr != nullptr)
+                    common *= localWeightPtr[voxel];
+
+                common *= adjusted_weight;
+
+                if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel])
+                    measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]);
+                if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel])
+                    measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]);
+
+                if (measureGradPtrZ != nullptr) {
+                    if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel])
+                        measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]);
+                }
             }
-         }
-      }
-   }
+        }
+    }
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedSSDGradient<float>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double, nifti_image *);
+(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
 template void reg_getVoxelBasedSSDGradient<double>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double, nifti_image *);
+(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
 /* *************************************************************** */
-void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0.0)
-      return;
-
-   // Check if all required input images are of the same data type
-   int dtype = this->referenceImagePointer->datatype;
-   if(this->warpedFloatingImagePointer->datatype != dtype ||
-         this->warpedFloatingGradientImagePointer->datatype != dtype ||
-         this->forwardVoxelBasedGradientImagePointer->datatype != dtype
-         )
-   {
-      reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Input images are exepected to be of the same type");
-      reg_exit();
-   }
-   // Compute the gradient of the ssd for the forward transformation
-   switch(dtype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_getVoxelBasedSSDGradient<float>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->warpedFloatingGradientImagePointer,
-             this->forwardVoxelBasedGradientImagePointer,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-             current_timepoint,
-             this->timePointWeight[current_timepoint],
-             this->forwardLocalWeightSimImagePointer
-             );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_getVoxelBasedSSDGradient<double>
-            (this->referenceImagePointer,
-             this->warpedFloatingImagePointer,
-             this->warpedFloatingGradientImagePointer,
-             this->forwardVoxelBasedGradientImagePointer,
-             nullptr, // HERE TODO this->forwardJacDetImagePointer,
-             this->referenceMaskPointer,
-             current_timepoint,
-             this->timePointWeight[current_timepoint],
-             this->forwardLocalWeightSimImagePointer
-             );
-      break;
-   default:
-      reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-   // Compute the gradient of the ssd for the backward transformation
-   if(this->isSymmetric)
-   {
-      dtype = this->floatingImagePointer->datatype;
-      if(this->warpedReferenceImagePointer->datatype != dtype ||
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
+    if (this->timePointWeight[current_timepoint] == 0)
+        return;
+
+    // Check if all required input images are of the same data type
+    int dtype = this->referenceImagePointer->datatype;
+    if (this->warpedFloatingImagePointer->datatype != dtype ||
+        this->warpedFloatingGradientImagePointer->datatype != dtype ||
+        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
+    // Compute the gradient of the ssd for the forward transformation
+    switch (dtype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_getVoxelBasedSSDGradient<float>(this->referenceImagePointer,
+                                            this->warpedFloatingImagePointer,
+                                            this->warpedFloatingGradientImagePointer,
+                                            this->forwardVoxelBasedGradientImagePointer,
+                                            nullptr, // TODO this->forwardJacDetImagePointer,
+                                            this->referenceMaskPointer,
+                                            current_timepoint,
+                                            this->timePointWeight[current_timepoint],
+                                            this->forwardLocalWeightSimImagePointer);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_getVoxelBasedSSDGradient<double>(this->referenceImagePointer,
+                                             this->warpedFloatingImagePointer,
+                                             this->warpedFloatingGradientImagePointer,
+                                             this->forwardVoxelBasedGradientImagePointer,
+                                             nullptr, // TODO this->forwardJacDetImagePointer,
+                                             this->referenceMaskPointer,
+                                             current_timepoint,
+                                             this->timePointWeight[current_timepoint],
+                                             this->forwardLocalWeightSimImagePointer);
+        break;
+    default:
+        reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+    // Compute the gradient of the ssd for the backward transformation
+    if (this->isSymmetric) {
+        dtype = this->floatingImagePointer->datatype;
+        if (this->warpedReferenceImagePointer->datatype != dtype ||
             this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype
-            )
-      {
-         reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Input images are exepected to be of the same type");
-         reg_exit();
-      }
-      // Compute the gradient of the nmi for the backward transformation
-      switch(dtype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedSSDGradient<float>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->warpedReferenceGradientImagePointer,
-                this->backwardVoxelBasedGradientImagePointer,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-                current_timepoint,
-                this->timePointWeight[current_timepoint],
-                nullptr
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedSSDGradient<double>
-               (this->floatingImagePointer,
-                this->warpedReferenceImagePointer,
-                this->warpedReferenceGradientImagePointer,
-                this->backwardVoxelBasedGradientImagePointer,
-                nullptr, // HERE TODO this->backwardJacDetImagePointer,
-                this->floatingMaskPointer,
-                current_timepoint,
-                this->timePointWeight[current_timepoint],
-                nullptr
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
+            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+            reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Input images are expected to be of the same type");
+            reg_exit();
+        }
+        // Compute the gradient of the nmi for the backward transformation
+        switch (dtype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedSSDGradient<float>(this->floatingImagePointer,
+                                                this->warpedReferenceImagePointer,
+                                                this->warpedReferenceGradientImagePointer,
+                                                this->backwardVoxelBasedGradientImagePointer,
+                                                nullptr, // TODO this->backwardJacDetImagePointer,
+                                                this->floatingMaskPointer,
+                                                current_timepoint,
+                                                this->timePointWeight[current_timepoint],
+                                                nullptr);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedSSDGradient<double>(this->floatingImagePointer,
+                                                 this->warpedReferenceImagePointer,
+                                                 this->warpedReferenceGradientImagePointer,
+                                                 this->backwardVoxelBasedGradientImagePointer,
+                                                 nullptr, // TODO this->backwardJacDetImagePointer,
+                                                 this->floatingMaskPointer,
+                                                 current_timepoint,
+                                                 this->timePointWeight[current_timepoint],
+                                                 nullptr);
+            break;
+        default:
+            reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Unsupported datatype");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -518,245 +458,240 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                    int discretise_step,
                                    nifti_image *refImage,
                                    nifti_image *warImage,
-                                   int *mask)
-{
-   int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex;
-   size_t voxIndex, voxIndex_t;
-   int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
-   int label_2D_number = label_1D_number*label_1D_number;
-   int label_nD_number = label_2D_number*label_1D_number;
-   //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
-   float gridVox[3], imageVox[3];
-   float currentValue;
-   // Define the transformation matrices
-   mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
-   if(controlPointGridImage->sform_code>0)
-      grid_vox2mm = &controlPointGridImage->sto_xyz;
-   mat44 *image_mm2vox = &refImage->qto_ijk;
-   if(refImage->sform_code>0)
-      image_mm2vox = &refImage->sto_ijk;
-   mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
-
-   // Compute the block size
-   int blockSize[3]={
-      (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
-      (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
-      (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
-   };
-   int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
-   int currentControlPoint = 0;
-
-   // Allocate some static memory
-   float* refBlockValue = (float *) malloc(voxelBlockNumber*sizeof(float));
-
-   // Pointers to the input image
-   size_t voxelNumber = (size_t)refImage->nx*
-         refImage->ny*refImage->nz;
-   DTYPE *refImgPtr = static_cast<DTYPE *>(refImage->data);
-   DTYPE *warImgPtr = static_cast<DTYPE *>(warImage->data);
-
-   // Create a padded version of the warped image to avoid doundary condition check
-   int warPaddedOffset [3] = {
-      discretise_radius + blockSize[0],
-      discretise_radius + blockSize[1],
-      discretise_radius + blockSize[2],
-   };
-   int warPaddedDim[4] = {
-      warImage->nx + 2 * warPaddedOffset[0] + blockSize[0],
-      warImage->ny + 2 * warPaddedOffset[1] + blockSize[1],
-      warImage->nz + 2 * warPaddedOffset[2] + blockSize[2],
-      warImage->nt
-   };
-
-   //DTYPE padding_value = std::numeric_limits<DTYPE>::quiet_NaN();
-   DTYPE padding_value = 0.0;
-
-   size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] *
-         warPaddedDim[1] * warPaddedDim[2];
-   DTYPE *paddedWarImgPtr = (DTYPE *)calloc(warPaddedVoxelNumber*warPaddedDim[3], sizeof(DTYPE));
-   for(voxIndex=0; voxIndex<warPaddedVoxelNumber*warPaddedDim[3]; ++voxIndex)
-      paddedWarImgPtr[voxIndex]=padding_value;
-   voxIndex=0;
-   voxIndex_t=0;
-   for(t=0; t<warImage->nt; ++t){
-      for(z=warPaddedOffset[2]; z<warPaddedDim[2]-warPaddedOffset[2]-blockSize[2]; ++z){
-         for(y=warPaddedOffset[1]; y<warPaddedDim[1]-warPaddedOffset[1]-blockSize[1]; ++y){
-            voxIndex= t * warPaddedVoxelNumber + (z*warPaddedDim[1]+y)*warPaddedDim[0]+warPaddedOffset[0];
-            for(x=warPaddedOffset[0]; x<warPaddedDim[0]-warPaddedOffset[0]-blockSize[0]; ++x){
-               paddedWarImgPtr[voxIndex]=warImgPtr[voxIndex_t];
-               ++voxIndex;
-               ++voxIndex_t;
+                                   int *mask) {
+    int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex;
+    size_t voxIndex, voxIndex_t;
+    int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
+    int label_2D_number = label_1D_number * label_1D_number;
+    int label_nD_number = label_2D_number * label_1D_number;
+    //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
+    float gridVox[3], imageVox[3];
+    float currentValue;
+    // Define the transformation matrices
+    mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
+    if (controlPointGridImage->sform_code > 0)
+        grid_vox2mm = &controlPointGridImage->sto_xyz;
+    mat44 *image_mm2vox = &refImage->qto_ijk;
+    if (refImage->sform_code > 0)
+        image_mm2vox = &refImage->sto_ijk;
+    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
+
+    // Compute the block size
+    int blockSize[3] = {
+        (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
+        (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
+        (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
+    };
+    int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
+    int currentControlPoint = 0;
+
+    // Allocate some static memory
+    float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float));
+
+    // Pointers to the input image
+    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
+    DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
+    DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
+
+    // Create a padded version of the warped image to avoid boundary condition check
+    int warPaddedOffset[3] = {
+        discretise_radius + blockSize[0],
+        discretise_radius + blockSize[1],
+        discretise_radius + blockSize[2],
+    };
+    int warPaddedDim[4] = {
+        warImage->nx + 2 * warPaddedOffset[0] + blockSize[0],
+        warImage->ny + 2 * warPaddedOffset[1] + blockSize[1],
+        warImage->nz + 2 * warPaddedOffset[2] + blockSize[2],
+        warImage->nt
+    };
+
+    //DTYPE padding_value = std::numeric_limits<DTYPE>::quiet_NaN();
+    DTYPE padding_value = 0;
+
+    size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] *
+        warPaddedDim[1] * warPaddedDim[2];
+    DTYPE *paddedWarImgPtr = (DTYPE*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DTYPE));
+    for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex)
+        paddedWarImgPtr[voxIndex] = padding_value;
+    voxIndex = 0;
+    voxIndex_t = 0;
+    for (t = 0; t < warImage->nt; ++t) {
+        for (z = warPaddedOffset[2]; z < warPaddedDim[2] - warPaddedOffset[2] - blockSize[2]; ++z) {
+            for (y = warPaddedOffset[1]; y < warPaddedDim[1] - warPaddedOffset[1] - blockSize[1]; ++y) {
+                voxIndex = t * warPaddedVoxelNumber + (z * warPaddedDim[1] + y) * warPaddedDim[0] + warPaddedOffset[0];
+                for (x = warPaddedOffset[0]; x < warPaddedDim[0] - warPaddedOffset[0] - blockSize[0]; ++x) {
+                    paddedWarImgPtr[voxIndex] = warImgPtr[voxIndex_t];
+                    ++voxIndex;
+                    ++voxIndex_t;
+                }
             }
-         }
-      }
-   }
-
-   int definedValueNumber;
-
-   // Loop over all control points
-   for(cpz=1; cpz<controlPointGridImage->nz-1; ++cpz){
-      gridVox[2] = cpz;
-      for(cpy=1; cpy<controlPointGridImage->ny-1; ++cpy){
-         gridVox[1] = cpy;
-         currentControlPoint=(cpz*controlPointGridImage->ny+cpy)*controlPointGridImage->nx+1;
-         for(cpx=1; cpx<controlPointGridImage->nx-1; ++cpx){
-            gridVox[0] = cpx;
-            // Compute the corresponding image voxel position
-            reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-            imageVox[0]=reg_round(imageVox[0]);
-            imageVox[1]=reg_round(imageVox[1]);
-            imageVox[2]=reg_round(imageVox[2]);
-
-            // Extract the block in the reference image
-            blockIndex = 0;
-            definedValueNumber = 0;
-            for(z=imageVox[2]-blockSize[2]/2; z<imageVox[2]+blockSize[2]/2; ++z){
-               for(y=imageVox[1]-blockSize[1]/2; y<imageVox[1]+blockSize[1]/2; ++y){
-                  for(x=imageVox[0]-blockSize[0]/2; x<imageVox[0]+blockSize[0]/2; ++x){
-                     if(x>-1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z<refImage->nz) {
-                        voxIndex = (z*refImage->ny+y)*refImage->nx+x;
-                        if(mask[voxIndex]>-1){
-                           for(t=0; t<refImage->nt; ++t){
-                              voxIndex_t = t*voxelNumber + voxIndex;
-                              refBlockValue[blockIndex] = refImgPtr[voxIndex_t];
-                              if(refBlockValue[blockIndex]==refBlockValue[blockIndex])
-                                 ++definedValueNumber;
-                              blockIndex++;
-                           } //t
-                        }
-                        else{
-                           for(t=0; t<refImage->nt; ++t){
-                              refBlockValue[blockIndex] = padding_value;
-                              blockIndex++;
-                           } // t
-                        }
-                     }
-                     else{
-                        for(t=0; t<refImage->nt; ++t){
-                           refBlockValue[blockIndex] = padding_value;
-                           blockIndex++;
-                        } // t
-                     } // mask
-                  } // x
-               } // y
-            } // z
-            // Loop over the discretised value
-            if(definedValueNumber>0){
-
-               DTYPE warpedValue;
-               int paddedImageVox[3] = {
-                  static_cast<int>(imageVox[0]+warPaddedOffset[0]),
-                  static_cast<int>(imageVox[1]+warPaddedOffset[1]),
-                  static_cast<int>(imageVox[2]+warPaddedOffset[2])
-               };
-               int cc;
-               double currentSum;
+        }
+    }
+
+    int definedValueNumber;
+
+    // Loop over all control points
+    for (cpz = 1; cpz < controlPointGridImage->nz - 1; ++cpz) {
+        gridVox[2] = cpz;
+        for (cpy = 1; cpy < controlPointGridImage->ny - 1; ++cpy) {
+            gridVox[1] = cpy;
+            currentControlPoint = (cpz * controlPointGridImage->ny + cpy) * controlPointGridImage->nx + 1;
+            for (cpx = 1; cpx < controlPointGridImage->nx - 1; ++cpx) {
+                gridVox[0] = cpx;
+                // Compute the corresponding image voxel position
+                reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
+                imageVox[0] = reg_round(imageVox[0]);
+                imageVox[1] = reg_round(imageVox[1]);
+                imageVox[2] = reg_round(imageVox[2]);
+
+                // Extract the block in the reference image
+                blockIndex = 0;
+                definedValueNumber = 0;
+                for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) {
+                    for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) {
+                        for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) {
+                            if (x > -1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z < refImage->nz) {
+                                voxIndex = (z * refImage->ny + y) * refImage->nx + x;
+                                if (mask[voxIndex] > -1) {
+                                    for (t = 0; t < refImage->nt; ++t) {
+                                        voxIndex_t = t * voxelNumber + voxIndex;
+                                        refBlockValue[blockIndex] = refImgPtr[voxIndex_t];
+                                        if (refBlockValue[blockIndex] == refBlockValue[blockIndex])
+                                            ++definedValueNumber;
+                                        blockIndex++;
+                                    } //t
+                                } else {
+                                    for (t = 0; t < refImage->nt; ++t) {
+                                        refBlockValue[blockIndex] = padding_value;
+                                        blockIndex++;
+                                    } // t
+                                }
+                            } else {
+                                for (t = 0; t < refImage->nt; ++t) {
+                                    refBlockValue[blockIndex] = padding_value;
+                                    blockIndex++;
+                                } // t
+                            } // mask
+                        } // x
+                    } // y
+                } // z
+                // Loop over the discretised value
+                if (definedValueNumber > 0) {
+
+                    DTYPE warpedValue;
+                    int paddedImageVox[3] = {
+                        static_cast<int>(imageVox[0] + warPaddedOffset[0]),
+                        static_cast<int>(imageVox[1] + warPaddedOffset[1]),
+                        static_cast<int>(imageVox[2] + warPaddedOffset[2])
+                    };
+                    int cc;
+                    double currentSum;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \
-   paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \
-   discretisedValue, currentControlPoint, voxelBlockNumber) \
-   private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \
-   currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum)
+    shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \
+    paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \
+    discretisedValue, currentControlPoint, voxelBlockNumber) \
+    private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \
+    currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum)
 #endif
-               for(cc=0; cc<label_1D_number; ++cc){
-                  discretisedIndex = cc * label_2D_number;
-                  c = paddedImageVox[2]-discretise_radius + cc*discretise_step;
-                  for(b=paddedImageVox[1]-discretise_radius; b<=paddedImageVox[1]+discretise_radius; b+=discretise_step){
-                     for(a=paddedImageVox[0]-discretise_radius; a<=paddedImageVox[0]+discretise_radius; a+=discretise_step){
-
-                        blockIndex = 0;
-                        currentSum = 0.;
-                        definedValueNumber = 0;
-
-                        for(z=c-blockSize[2]/2; z<c+blockSize[2]/2; ++z){
-                           for(y=b-blockSize[1]/2; y<b+blockSize[1]/2; ++y){
-                              for(x=a-blockSize[0]/2; x<a+blockSize[0]/2; ++x){
-                                 voxIndex = (z*warPaddedDim[1]+y)*warPaddedDim[0]+x;
-                                 for(t=0; t<warPaddedDim[3]; ++t){
-                                    voxIndex_t = t*warPaddedVoxelNumber + voxIndex;
-                                    warpedValue = paddedWarImgPtr[voxIndex_t];
+                    for (cc = 0; cc < label_1D_number; ++cc) {
+                        discretisedIndex = cc * label_2D_number;
+                        c = paddedImageVox[2] - discretise_radius + cc * discretise_step;
+                        for (b = paddedImageVox[1] - discretise_radius; b <= paddedImageVox[1] + discretise_radius; b += discretise_step) {
+                            for (a = paddedImageVox[0] - discretise_radius; a <= paddedImageVox[0] + discretise_radius; a += discretise_step) {
+
+                                blockIndex = 0;
+                                currentSum = 0.;
+                                definedValueNumber = 0;
+
+                                for (z = c - blockSize[2] / 2; z < c + blockSize[2] / 2; ++z) {
+                                    for (y = b - blockSize[1] / 2; y < b + blockSize[1] / 2; ++y) {
+                                        for (x = a - blockSize[0] / 2; x < a + blockSize[0] / 2; ++x) {
+                                            voxIndex = (z * warPaddedDim[1] + y) * warPaddedDim[0] + x;
+                                            for (t = 0; t < warPaddedDim[3]; ++t) {
+                                                voxIndex_t = t * warPaddedVoxelNumber + voxIndex;
+                                                warpedValue = paddedWarImgPtr[voxIndex_t];
 #ifdef MRF_USE_SAD
-                                    currentValue = fabs(warpedValue-refBlockValue[blockIndex]);
+                                                currentValue = fabs(warpedValue - refBlockValue[blockIndex]);
 #else
-                                    currentValue = reg_pow2(warpedValue-refBlockValue[blockIndex]);
+                                                currentValue = reg_pow2(warpedValue - refBlockValue[blockIndex]);
 #endif
-                                    if(currentValue==currentValue){
-                                       currentSum -= currentValue;
-                                       ++definedValueNumber;
-                                    }
-                                    blockIndex++;
-                                 }
-                              } // x
-                           } // y
-                        } // z
-                        discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] =
-                              currentSum / static_cast<float>(definedValueNumber);
-                        ++discretisedIndex;
-                     } // a
-                  } // b
-               } // cc
-            } // defined value in the reference block
-            ++currentControlPoint;
-         } // cpx
-      } // cpy
-   } // cpz
-   free(paddedWarImgPtr);
-   free(refBlockValue);
-   // Deal with the labels that contains NaN values
-   for(int node=0; node<controlPointGridImage->nx*controlPointGridImage->ny*controlPointGridImage->nz; ++node){
-      int definedValueNumber=0;
-      float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
-      float meanValue=0;
-      for(int label=0; label<label_nD_number;++label){
-         if(discretisedValuePtr[label]==discretisedValuePtr[label]){
-            ++definedValueNumber;
-            meanValue+=discretisedValuePtr[label];
-         }
-      }
-      if(definedValueNumber==0){
-         for(int label=0; label<label_nD_number;++label){
-            discretisedValuePtr[label]=0;
-         }
-      }
-      else if(definedValueNumber<label_nD_number){
-         // Needs to be altered for efficiency
-         int label=0;
-         // Loop over all labels
-         int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2;
-         float min_distance, current_distance;
-         for(label_z=0; label_z<label_1D_number;++label_z){
-            for(label_y=0; label_y<label_1D_number;++label_y){
-               for(label_x=0; label_x<label_1D_number;++label_x){
-                  // check if the current label is defined
-                  if(discretisedValuePtr[label]!=discretisedValuePtr[label]){
-                     label2=0;
-                     min_distance=std::numeric_limits<float>::max();
-                     // Loop again over all label to detect the defined values
-                     for(label2_z=0; label2_z<label_1D_number;++label2_z){
-                        for(label2_y=0; label2_y<label_1D_number;++label2_y){
-                           for(label2_x=0; label2_x<label_1D_number;++label2_x){
-                              // Check if the value is defined
-                              if(discretisedValuePtr[label2]==discretisedValuePtr[label2]){
-                                 // compute the distance between label and label2
-                                 current_distance = reg_pow2(label_x-label2_x)+reg_pow2(label_y-label2_y)+reg_pow2(label_z-label2_z);
-                                 if(current_distance<min_distance){
-                                    min_distance=current_distance;
-                                    discretisedValuePtr[label] = discretisedValuePtr[label2];
-                                 }
-                              } // Check if label2 is defined
-                              ++label2;
-                           } // x
-                        } // y
-                     } // z
-                  } // check if undefined label
-                  ++label;
-               } //x
-            } // y
-         } // z
+                                                if (currentValue == currentValue) {
+                                                    currentSum -= currentValue;
+                                                    ++definedValueNumber;
+                                                }
+                                                blockIndex++;
+                                            }
+                                        } // x
+                                    } // y
+                                } // z
+                                discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] =
+                                    currentSum / static_cast<float>(definedValueNumber);
+                                ++discretisedIndex;
+                            } // a
+                        } // b
+                    } // cc
+                } // defined value in the reference block
+                ++currentControlPoint;
+            } // cpx
+        } // cpy
+    } // cpz
+    free(paddedWarImgPtr);
+    free(refBlockValue);
+    // Deal with the labels that contains NaN values
+    for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) {
+        int definedValueNumber = 0;
+        float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
+        float meanValue = 0;
+        for (int label = 0; label < label_nD_number; ++label) {
+            if (discretisedValuePtr[label] == discretisedValuePtr[label]) {
+                ++definedValueNumber;
+                meanValue += discretisedValuePtr[label];
+            }
+        }
+        if (definedValueNumber == 0) {
+            for (int label = 0; label < label_nD_number; ++label) {
+                discretisedValuePtr[label] = 0;
+            }
+        } else if (definedValueNumber < label_nD_number) {
+            // Needs to be altered for efficiency
+            int label = 0;
+            // Loop over all labels
+            int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2;
+            float min_distance, current_distance;
+            for (label_z = 0; label_z < label_1D_number; ++label_z) {
+                for (label_y = 0; label_y < label_1D_number; ++label_y) {
+                    for (label_x = 0; label_x < label_1D_number; ++label_x) {
+                        // check if the current label is defined
+                        if (discretisedValuePtr[label] != discretisedValuePtr[label]) {
+                            label2 = 0;
+                            min_distance = std::numeric_limits<float>::max();
+                            // Loop again over all label to detect the defined values
+                            for (label2_z = 0; label2_z < label_1D_number; ++label2_z) {
+                                for (label2_y = 0; label2_y < label_1D_number; ++label2_y) {
+                                    for (label2_x = 0; label2_x < label_1D_number; ++label2_x) {
+                                        // Check if the value is defined
+                                        if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
+                                            // compute the distance between label and label2
+                                            current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z);
+                                            if (current_distance < min_distance) {
+                                                min_distance = current_distance;
+                                                discretisedValuePtr[label] = discretisedValuePtr[label2];
+                                            }
+                                        } // Check if label2 is defined
+                                        ++label2;
+                                    } // x
+                                } // y
+                            } // z
+                        } // check if undefined label
+                        ++label;
+                    } //x
+                } // y
+            } // z
 
-      } // node with undefined label
-   } // node
+        } // node with undefined label
+    } // node
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -767,242 +702,238 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                      int discretise_step,
                                      nifti_image *refImage,
                                      nifti_image *warImage,
-                                     int *mask)
-{
-   //
-   int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex;
-   size_t voxIndex, voxIndex_t;
-   const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
-   const int label_2D_number = label_1D_number*label_1D_number;
-   int label_nD_number = label_2D_number*label_1D_number;
-   //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
-   float gridVox[3], imageVox[3];
-   float currentValue;
-   double currentSum;
-   // Define the transformation matrices
-   mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
-   if(controlPointGridImage->sform_code>0)
-      grid_vox2mm = &controlPointGridImage->sto_xyz;
-   mat44 *image_mm2vox = &refImage->qto_ijk;
-   if(refImage->sform_code>0)
-      image_mm2vox = &refImage->sto_ijk;
-   mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
-
-   // Compute the block size
-   const int blockSize[3]={
-      (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
-      (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
-      (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
-   };
-   int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2];
-   int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
-   int currentControlPoint = 0;
-
-   // Pointers to the input image
-   size_t voxelNumber = (size_t)refImage->nx*
-         refImage->ny*refImage->nz;
-   DTYPE *refImgPtr = static_cast<DTYPE *>(refImage->data);
-   DTYPE *warImgPtr = static_cast<DTYPE *>(warImage->data);
-
-   DTYPE padding_value = 0.0;
-
-   int definedValueNumber, idBlock, timeV;
-
-   int threadNumber = 1;
-   int tid = 0;
+                                     int *mask) {
+
+    int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex;
+    size_t voxIndex, voxIndex_t;
+    const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
+    const int label_2D_number = label_1D_number * label_1D_number;
+    int label_nD_number = label_2D_number * label_1D_number;
+    //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
+    float gridVox[3], imageVox[3];
+    float currentValue;
+    double currentSum;
+    // Define the transformation matrices
+    mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
+    if (controlPointGridImage->sform_code > 0)
+        grid_vox2mm = &controlPointGridImage->sto_xyz;
+    mat44 *image_mm2vox = &refImage->qto_ijk;
+    if (refImage->sform_code > 0)
+        image_mm2vox = &refImage->sto_ijk;
+    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
+
+    // Compute the block size
+    const int blockSize[3] = {
+        (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
+        (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
+        (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
+    };
+    int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2];
+    int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
+    int currentControlPoint = 0;
+
+    // Pointers to the input image
+    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
+    DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
+    DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
+
+    DTYPE padding_value = 0;
+
+    int definedValueNumber, idBlock, timeV;
+
+    int threadNumber = 1;
+    int tid = 0;
 #if defined (_OPENMP)
-   threadNumber=omp_get_max_threads();
+    threadNumber = omp_get_max_threads();
 #endif
 
-   // Allocate some static memory
-   float** refBlockValue = (float **) malloc(threadNumber*sizeof(float *));
-   for(a=0;a<threadNumber;++a)
-      refBlockValue[a] = (float *) malloc(voxelBlockNumber_t*sizeof(float));
+    // Allocate some static memory
+    float **refBlockValue = (float**)malloc(threadNumber * sizeof(float*));
+    for (a = 0; a < threadNumber; ++a)
+        refBlockValue[a] = (float*)malloc(voxelBlockNumber_t * sizeof(float));
 
-   // Loop over all control points
+    // Loop over all control points
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \
-   padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \
-   discretise_step, discretisedValue) \
-   private(cpx, cpy, cpz, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \
-   voxIndex, idBlock, blockIndex, definedValueNumber, tid, \
-   timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue)
+    shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \
+    padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \
+    discretise_step, discretisedValue) \
+    private(cpx, cpy, cpz, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \
+    voxIndex, idBlock, blockIndex, definedValueNumber, tid, \
+    timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue)
 #endif
-   for(cpz=0; cpz<controlPointGridImage->nz; ++cpz){
+    for (cpz = 0; cpz < controlPointGridImage->nz; ++cpz) {
 #if defined (_OPENMP)
-      tid=omp_get_thread_num();
+        tid = omp_get_thread_num();
 #endif
-      gridVox[2] = cpz;
-      for(cpy=0; cpy<controlPointGridImage->ny; ++cpy){
-         gridVox[1] = cpy;
-         for(cpx=0; cpx<controlPointGridImage->nx; ++cpx){
-            gridVox[0] = cpx;
-            currentControlPoint=controlPointGridImage->ny*controlPointGridImage->nx*cpz +
-                  controlPointGridImage->nx*cpy+cpx;
-
-            // Compute the corresponding image voxel position
-            reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-            imageVox[0]=reg_round(imageVox[0]);
-            imageVox[1]=reg_round(imageVox[1]);
-            imageVox[2]=reg_round(imageVox[2]);
-
-            //INIT
-            for(idBlock=0;idBlock<voxelBlockNumber_t;idBlock++) {
-               refBlockValue[tid][idBlock]=padding_value;
-            }
-
-            // Extract the block in the reference image
-            blockIndex = 0;
-            definedValueNumber = 0;
-            for(z=imageVox[2]-blockSize[2]/2; z<imageVox[2]+blockSize[2]/2; ++z) {
-               for(y=imageVox[1]-blockSize[1]/2; y<imageVox[1]+blockSize[1]/2; ++y) {
-                  for(x=imageVox[0]-blockSize[0]/2; x<imageVox[0]+blockSize[0]/2; ++x) {
-                     if(x>-1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z<refImage->nz) {
-                        voxIndex = refImage->ny*refImage->nx*z+refImage->nx*y+x;
-                        if(mask[voxIndex]>-1){
-                           for(timeV=0; timeV<refImage->nt; ++timeV){
-                              voxIndex_t = timeV*voxelNumber + voxIndex;
-                              blockIndex_t = timeV*voxelBlockNumber + blockIndex;
-                              refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t];
-                              if(refBlockValue[tid][blockIndex_t]==refBlockValue[tid][blockIndex_t]) {
-                                 ++definedValueNumber;
-                              }
-                              else refBlockValue[tid][blockIndex_t] = 0;
-                           } // timeV
-                        } //inside mask
-                     } //inside image
-                     blockIndex++;
-                  } // x
-               } // y
-            } // z
-            // Loop over the discretised value
-            if(definedValueNumber>0){
-
-               discretisedIndex=0;
-               for(c=imageVox[2]-discretise_radius; c<=imageVox[2]+discretise_radius; c+=discretise_step){
-                  for(b=imageVox[1]-discretise_radius; b<=imageVox[1]+discretise_radius; b+=discretise_step){
-                     for(a=imageVox[0]-discretise_radius; a<=imageVox[0]+discretise_radius; a+=discretise_step){
-
-                        blockIndex = 0;
-                        currentSum = 0.;
-                        definedValueNumber = 0;
-
-                        for(z=c-blockSize[2]/2; z<c+blockSize[2]/2; ++z){
-                           for(y=b-blockSize[1]/2; y<b+blockSize[1]/2; ++y){
-                              for(x=a-blockSize[0]/2; x<a+blockSize[0]/2; ++x){
-
-                                 if(x>-1 && x<warImage->nx && y>-1 && y<warImage->ny && z>-1 && z<warImage->nz) {
-                                    voxIndex = warImage->ny*warImage->nx*z+warImage->nx*y+x;
-                                    for(t=0; t<warImage->nt; ++t){
-                                       voxIndex_t = t*voxelNumber + voxIndex;
-                                       blockIndex_t = t*voxelBlockNumber + blockIndex;
-                                       if(warImgPtr[voxIndex_t]==warImgPtr[voxIndex_t]) {
+        gridVox[2] = cpz;
+        for (cpy = 0; cpy < controlPointGridImage->ny; ++cpy) {
+            gridVox[1] = cpy;
+            for (cpx = 0; cpx < controlPointGridImage->nx; ++cpx) {
+                gridVox[0] = cpx;
+                currentControlPoint = controlPointGridImage->ny * controlPointGridImage->nx * cpz +
+                    controlPointGridImage->nx * cpy + cpx;
+
+                // Compute the corresponding image voxel position
+                reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
+                imageVox[0] = reg_round(imageVox[0]);
+                imageVox[1] = reg_round(imageVox[1]);
+                imageVox[2] = reg_round(imageVox[2]);
+
+                //INIT
+                for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) {
+                    refBlockValue[tid][idBlock] = padding_value;
+                }
+
+                // Extract the block in the reference image
+                blockIndex = 0;
+                definedValueNumber = 0;
+                for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) {
+                    for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) {
+                        for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) {
+                            if (x > -1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z < refImage->nz) {
+                                voxIndex = refImage->ny * refImage->nx * z + refImage->nx * y + x;
+                                if (mask[voxIndex] > -1) {
+                                    for (timeV = 0; timeV < refImage->nt; ++timeV) {
+                                        voxIndex_t = timeV * voxelNumber + voxIndex;
+                                        blockIndex_t = timeV * voxelBlockNumber + blockIndex;
+                                        refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t];
+                                        if (refBlockValue[tid][blockIndex_t] == refBlockValue[tid][blockIndex_t]) {
+                                            ++definedValueNumber;
+                                        } else refBlockValue[tid][blockIndex_t] = 0;
+                                    } // timeV
+                                } //inside mask
+                            } //inside image
+                            blockIndex++;
+                        } // x
+                    } // y
+                } // z
+                // Loop over the discretised value
+                if (definedValueNumber > 0) {
+
+                    discretisedIndex = 0;
+                    for (c = imageVox[2] - discretise_radius; c <= imageVox[2] + discretise_radius; c += discretise_step) {
+                        for (b = imageVox[1] - discretise_radius; b <= imageVox[1] + discretise_radius; b += discretise_step) {
+                            for (a = imageVox[0] - discretise_radius; a <= imageVox[0] + discretise_radius; a += discretise_step) {
+
+                                blockIndex = 0;
+                                currentSum = 0.;
+                                definedValueNumber = 0;
+
+                                for (z = c - blockSize[2] / 2; z < c + blockSize[2] / 2; ++z) {
+                                    for (y = b - blockSize[1] / 2; y < b + blockSize[1] / 2; ++y) {
+                                        for (x = a - blockSize[0] / 2; x < a + blockSize[0] / 2; ++x) {
+
+                                            if (x > -1 && x<warImage->nx && y>-1 && y<warImage->ny && z>-1 && z < warImage->nz) {
+                                                voxIndex = warImage->ny * warImage->nx * z + warImage->nx * y + x;
+                                                for (t = 0; t < warImage->nt; ++t) {
+                                                    voxIndex_t = t * voxelNumber + voxIndex;
+                                                    blockIndex_t = t * voxelBlockNumber + blockIndex;
+                                                    if (warImgPtr[voxIndex_t] == warImgPtr[voxIndex_t]) {
 #ifdef MRF_USE_SAD
-                                          currentValue = fabs(warImgPtr[voxIndex_t]-refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]);
 #else
-                                          currentValue = reg_pow2(warImgPtr[voxIndex_t]-refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]);
 #endif
-                                       } else {
+                                                    } else {
 #ifdef MRF_USE_SAD
-                                          currentValue = fabs(0-refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]);
 #else
-                                          currentValue = reg_pow2(0-refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]);
 #endif
-                                       }
-
-                                       if(currentValue==currentValue){
-                                          currentSum -= currentValue;
-                                          ++definedValueNumber;
-                                       }
-                                    }
-                                 } //inside image
-                                 else {
-                                    for(t=0; t<warImage->nt; ++t){
-                                       blockIndex_t = t*voxelBlockNumber + blockIndex;
+                                                    }
+
+                                                    if (currentValue == currentValue) {
+                                                        currentSum -= currentValue;
+                                                        ++definedValueNumber;
+                                                    }
+                                                }
+                                            } //inside image
+                                            else {
+                                                for (t = 0; t < warImage->nt; ++t) {
+                                                    blockIndex_t = t * voxelBlockNumber + blockIndex;
 #ifdef MRF_USE_SAD
-                                       currentValue = fabs(0-refBlockValue[tid][blockIndex_t]);
+                                                    currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]);
 #else
-                                       currentValue = reg_pow2(0-refBlockValue[tid][blockIndex_t]);
+                                                    currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]);
 #endif
-                                       if(currentValue==currentValue){
-                                          currentSum -= currentValue;
-                                          ++definedValueNumber;
-                                       }
-                                    }
-                                 }
-                                 blockIndex++;
-                              } // x
-                           } // y
-                        } // z
-                        discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = currentSum;
-                        ++discretisedIndex;
-                     } // a
-                  } // b
-               } // cc
-            } // defined value in the reference block
-            ++currentControlPoint;
-         } // cpx
-      } // cpy
-   } // cpz
-   for(a=0;a<threadNumber;++a)
-      free(refBlockValue[a]);
-   free(refBlockValue);
-
-   // Deal with the labels that contains NaN values
-   for(int node=0; node<controlPointGridImage->nx*controlPointGridImage->ny*controlPointGridImage->nz; ++node){
-      int definedValueNumber=0;
-      float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
-      float meanValue=0;
-      for(int label=0; label<label_nD_number;++label){
-         if(discretisedValuePtr[label]==discretisedValuePtr[label]){
-            ++definedValueNumber;
-            meanValue+=discretisedValuePtr[label];
-         }
-      }
-      if(definedValueNumber==0){
-         for(int label=0; label<label_nD_number;++label){
-            discretisedValuePtr[label]=0;
-         }
-      }
-      else if(definedValueNumber<label_nD_number){
-         // Needs to be altered for efficiency
-         int label=0;
-         // Loop over all labels
-         int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2;
-         float min_distance, current_distance;
-         for(label_z=0; label_z<label_1D_number;++label_z){
-            for(label_y=0; label_y<label_1D_number;++label_y){
-               for(label_x=0; label_x<label_1D_number;++label_x){
-                  // check if the current label is defined
-                  if(discretisedValuePtr[label]!=discretisedValuePtr[label]){
-                     label2=0;
-                     min_distance=std::numeric_limits<float>::max();
-                     // Loop again over all label to detect the defined values
-                     for(label2_z=0; label2_z<label_1D_number;++label2_z){
-                        for(label2_y=0; label2_y<label_1D_number;++label2_y){
-                           for(label2_x=0; label2_x<label_1D_number;++label2_x){
-                              // Check if the value is defined
-                              if(discretisedValuePtr[label2]==discretisedValuePtr[label2]){
-                                 // compute the distance between label and label2
-                                 current_distance = reg_pow2(label_x-label2_x)+reg_pow2(label_y-label2_y)+reg_pow2(label_z-label2_z);
-                                 if(current_distance<min_distance){
-                                    min_distance=current_distance;
-                                    discretisedValuePtr[label] = discretisedValuePtr[label2];
-                                 }
-                              } // Check if label2 is defined
-                              ++label2;
-                           } // x
-                        } // y
-                     } // z
-                  } // check if undefined label
-                  ++label;
-               } //x
-            } // y
-         } // z
+                                                    if (currentValue == currentValue) {
+                                                        currentSum -= currentValue;
+                                                        ++definedValueNumber;
+                                                    }
+                                                }
+                                            }
+                                            blockIndex++;
+                                        } // x
+                                    } // y
+                                } // z
+                                discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = currentSum;
+                                ++discretisedIndex;
+                            } // a
+                        } // b
+                    } // cc
+                } // defined value in the reference block
+                ++currentControlPoint;
+            } // cpx
+        } // cpy
+    } // cpz
+    for (a = 0; a < threadNumber; ++a)
+        free(refBlockValue[a]);
+    free(refBlockValue);
+
+    // Deal with the labels that contains NaN values
+    for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) {
+        int definedValueNumber = 0;
+        float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
+        float meanValue = 0;
+        for (int label = 0; label < label_nD_number; ++label) {
+            if (discretisedValuePtr[label] == discretisedValuePtr[label]) {
+                ++definedValueNumber;
+                meanValue += discretisedValuePtr[label];
+            }
+        }
+        if (definedValueNumber == 0) {
+            for (int label = 0; label < label_nD_number; ++label) {
+                discretisedValuePtr[label] = 0;
+            }
+        } else if (definedValueNumber < label_nD_number) {
+            // Needs to be altered for efficiency
+            int label = 0;
+            // Loop over all labels
+            int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2;
+            float min_distance, current_distance;
+            for (label_z = 0; label_z < label_1D_number; ++label_z) {
+                for (label_y = 0; label_y < label_1D_number; ++label_y) {
+                    for (label_x = 0; label_x < label_1D_number; ++label_x) {
+                        // check if the current label is defined
+                        if (discretisedValuePtr[label] != discretisedValuePtr[label]) {
+                            label2 = 0;
+                            min_distance = std::numeric_limits<float>::max();
+                            // Loop again over all label to detect the defined values
+                            for (label2_z = 0; label2_z < label_1D_number; ++label2_z) {
+                                for (label2_y = 0; label2_y < label_1D_number; ++label2_y) {
+                                    for (label2_x = 0; label2_x < label_1D_number; ++label2_x) {
+                                        // Check if the value is defined
+                                        if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
+                                            // compute the distance between label and label2
+                                            current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z);
+                                            if (current_distance < min_distance) {
+                                                min_distance = current_distance;
+                                                discretisedValuePtr[label] = discretisedValuePtr[label2];
+                                            }
+                                        } // Check if label2 is defined
+                                        ++label2;
+                                    } // x
+                                } // y
+                            } // z
+                        } // check if undefined label
+                        ++label;
+                    } //x
+                } // y
+            } // z
 
-      } // node with undefined label
-   } // node
+        } // node with undefined label
+    } // node
 }
 /* *************************************************************** */
 //template <class DTYPE>
@@ -1022,73 +953,60 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
                                   float *discretisedValue,
                                   int discretise_radius,
-                                  int discretise_step)
-{
-   if(referenceImagePointer->nz > 1) {
-      switch(this->referenceImagePointer->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         GetDiscretisedValueSSD_core3D_2<float>
-               (controlPointGridImage,
-                discretisedValue,
-                discretise_radius,
-                discretise_step,
-                this->referenceImagePointer,
-                this->warpedFloatingImagePointer,
-                this->referenceMaskPointer
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         GetDiscretisedValueSSD_core3D_2<double>
-               (controlPointGridImage,
-                discretisedValue,
-                discretise_radius,
-                discretise_step,
-                this->referenceImagePointer,
-                this->warpedFloatingImagePointer,
-                this->referenceMaskPointer
-                );
-         break;
-      default:
-         reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
-      }
-   }
-   else
-   {
-      reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-      reg_print_msg_error("Not implemented in 2D yet");
-      reg_exit();
-      //        switch(this->referenceImagePointer->datatype)
-      //        {
-      //        case NIFTI_TYPE_FLOAT32:
-      //            GetDiscretisedValueSSD_core2D<float>
-      //                    (controlPointGridImage,
-      //                     discretisedValue,
-      //                     discretise_radius,
-      //                     discretise_step,
-      //                     this->referenceImagePointer,
-      //                     this->warpedFloatingImagePointer,
-      //                     this->referenceMaskPointer
-      //                     );
-      //            break;
-      //        case NIFTI_TYPE_FLOAT64:
-      //            GetDiscretisedValueSSD_core2D<double>
-      //                    (controlPointGridImage,
-      //                     discretisedValue,
-      //                     discretise_radius,
-      //                     discretise_step,
-      //                     this->referenceImagePointer,
-      //                     this->warpedFloatingImagePointer,
-      //                     this->referenceMaskPointer
-      //                     );
-      //            break;
-      //        default:
-      //            reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-      //            reg_print_msg_error("Unsupported datatype");
-      //            reg_exit();
-      //        }
-   }
+                                  int discretise_step) {
+    if (referenceImagePointer->nz > 1) {
+        switch (this->referenceImagePointer->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            GetDiscretisedValueSSD_core3D_2<float>(controlPointGridImage,
+                                                   discretisedValue,
+                                                   discretise_radius,
+                                                   discretise_step,
+                                                   this->referenceImagePointer,
+                                                   this->warpedFloatingImagePointer,
+                                                   this->referenceMaskPointer);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            GetDiscretisedValueSSD_core3D_2<double>(controlPointGridImage,
+                                                    discretisedValue,
+                                                    discretise_radius,
+                                                    discretise_step,
+                                                    this->referenceImagePointer,
+                                                    this->warpedFloatingImagePointer,
+                                                    this->referenceMaskPointer);
+            break;
+        default:
+            reg_print_fct_error("reg_ssd::GetDiscretisedValue");
+            reg_print_msg_error("Unsupported datatype");
+            reg_exit();
+        }
+    } else {
+        reg_print_fct_error("reg_ssd::GetDiscretisedValue");
+        reg_print_msg_error("Not implemented in 2D yet");
+        reg_exit();
+        // switch (this->referenceImagePointer->datatype) {
+        // case NIFTI_TYPE_FLOAT32:
+        //     GetDiscretisedValueSSD_core2D<float>(controlPointGridImage,
+        //                                          discretisedValue,
+        //                                          discretise_radius,
+        //                                          discretise_step,
+        //                                          this->referenceImagePointer,
+        //                                          this->warpedFloatingImagePointer,
+        //                                          this->referenceMaskPointer);
+        //     break;
+        // case NIFTI_TYPE_FLOAT64:
+        //     GetDiscretisedValueSSD_core2D<double>(controlPointGridImage,
+        //                                           discretisedValue,
+        //                                           discretise_radius,
+        //                                           discretise_step,
+        //                                           this->referenceImagePointer,
+        //                                           this->warpedFloatingImagePointer,
+        //                                           this->referenceMaskPointer);
+        //     break;
+        // default:
+        //     reg_print_fct_error("reg_ssd::GetDiscretisedValue");
+        //     reg_print_msg_error("Unsupported datatype");
+        //     reg_exit();
+        // }
+    }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 37514e43..e415dece 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -18,9 +18,8 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-/// @brief SSD measure of similarity classe
-class reg_ssd : public reg_measure
-{
+/// @brief SSD measure of similarity class
+class reg_ssd: public reg_measure {
 public:
     /// @brief reg_ssd class constructor
     reg_ssd();
@@ -52,14 +51,14 @@ class reg_ssd : public reg_measure
                                      int discretise_radius,
                                      int discretise_step);
 protected:
-   float currentValue[255];
+    float currentValue[255];
 
 private:
-   bool normaliseTimePoint[255];
+    bool normaliseTimePoint[255];
 };
 /* *************************************************************** */
 
-/** @brief Copmutes and returns the SSD between two input images
+/** @brief Computes and returns the SSD between two input images
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param activeTimePoint Specified which time point volumes have to be considered
@@ -73,20 +72,19 @@ class reg_ssd : public reg_measure
  */
 extern "C++" template <class DTYPE>
 double reg_getSSDValue(nifti_image *referenceImage,
-							  nifti_image *warpedImage,
-							  double *timePointWeight,
-							  nifti_image *jacobianDeterminantImage,
-							  int *mask,
-							  float *currentValue,
-							  nifti_image *localWeightImage
-							 );
+                       nifti_image *warpedImage,
+                       double *timePointWeight,
+                       nifti_image *jacobianDeterminantImage,
+                       int *mask,
+                       float *currentValue,
+                       nifti_image *localWeightImage);
 
 /** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param activeTimePoint Specified which time point volumes have to be considered
  * @param warpedImageGradient Spatial gradient of the input warped image
- * @param ssdGradientImage Output image htat will be updated with the
+ * @param ssdGradientImage Output image that will be updated with the
  * value of the SSD gradient
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
@@ -104,5 +102,4 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   int *mask,
                                   int current_timepoint,
                                   double timepoint_weight,
-                                  nifti_image *localWeightImage
-                                 );
+                                  nifti_image *localWeightImage);
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index d584b86a..4f14dea8 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -393,7 +393,7 @@ PrecisionTYPE reg_getMaximalLength2D(nifti_image *image)
    DTYPE *dataPtrX = static_cast<DTYPE *>(image->data);
    DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz];
 
-   PrecisionTYPE max=0.0;
+   PrecisionTYPE max=0;
 
    for(int i=0; i<image->nx*image->ny*image->nz; i++)
    {
@@ -412,7 +412,7 @@ PrecisionTYPE reg_getMaximalLength3D(nifti_image *image)
    DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz];
    DTYPE *dataPtrZ = &dataPtrY[image->nx*image->ny*image->nz];
 
-   PrecisionTYPE max=0.0;
+   PrecisionTYPE max=0;
 
    for(int i=0; i<image->nx*image->ny*image->nz; i++)
    {
@@ -1332,8 +1332,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                            // Set the current values to zero
                            // Increment the current value by performing the weighted sum
 #ifdef _USE_SSE
-                           intensity_sum_sse.m = _mm_set_ps1(0.0);
-                           density_sum_sse.m = _mm_set_ps1(0.0);
+                           intensity_sum_sse.m = _mm_set_ps1(0);
+                           density_sum_sse.m = _mm_set_ps1(0);
                            k=shiftPre;
                            while(k<shiftPst-3)
                            {
@@ -3099,7 +3099,7 @@ double reg_test_compare_arrays(DTYPE *ptrA,
                               DTYPE *ptrB,
                               size_t nvox)
 {
-   double maxDifference=0.0;
+   double maxDifference=0;
 
    for(size_t i=0; i<nvox; ++i)
    {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index c0451c0f..5cf53720 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -4,7 +4,7 @@
 
 class CudaCompute: public Compute {
 public:
-    CudaCompute(Content *con) : Compute(con) {}
+    CudaCompute(Content *con): Compute(con) {}
 
     virtual void ResampleImage(int inter, float paddingValue) override;
     virtual double GetJacobianPenaltyTerm(bool approx) override;
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 4746230e..3a6bd8c1 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -5,7 +5,7 @@ CudaContent::CudaContent(nifti_image *referenceIn,
                          nifti_image *floatingIn,
                          int *referenceMaskIn,
                          mat44 *transformationMatrixIn,
-                         size_t bytesIn) :
+                         size_t bytesIn):
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
     AllocateImages();
     AllocateWarped();
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index dfc0cbfa..3b6bd53b 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -7,7 +7,7 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
                                nifti_image *localWeightSimIn,
                                int *referenceMaskIn,
                                mat44 *transformationMatrixIn,
-                               size_t bytesIn) :
+                               size_t bytesIn):
     F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 2c34df01..30a93e54 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -1970,15 +1970,15 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices)
 		   y==imageSize.y-1 ||
 		   z==imageSize.z-1 ){
 			int index=tid*9;
-			jacobianMatrices[index++]=1.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index++]=1.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index++]=0.0;
-			jacobianMatrices[index]=1.0;
+			jacobianMatrices[index++]=1;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index++]=1;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index++]=0;
+			jacobianMatrices[index]=1;
 			return;
 		}
 
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 56fb2af8..70bfb4c2 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -9,18 +9,13 @@
 
 #include "_reg_lncc.h"
 #include "_reg_dti.h"
-
 #include "_reg_common_cuda.h"
 #include "_reg_kld.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief Class that contains the GPU device pointers
-class reg_measure_gpu
-{
+class reg_measure_gpu {
 protected:
    /// @brief Measure class constructor
    reg_measure_gpu() {}
@@ -30,15 +25,14 @@ class reg_measure_gpu
    cudaArray *referenceDevicePointer;
    cudaArray *floatingDevicePointer;
    int *referenceMaskDevicePointer;
-   int activeVoxeNumber;
+   int activeVoxelNumber;
    float *warpedFloatingDevicePointer;
    float4 *warpedFloatingGradientDevicePointer;
    float4 *forwardVoxelBasedGradientDevicePointer;
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
-{
+class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
 public:
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -52,14 +46,10 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
                           int *refMskDevicePtr,
                           float *warFloDevicePtr,
                           float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr)
-   {
-      ;
-   }
+                          float4 *forVoxBasedGraDevicePtr) {}
    /// @brief reg_lncc class constructor
-   reg_lncc_gpu()
-   {
-      fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
+   reg_lncc_gpu() {
+      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
       reg_exit();
    }
    /// @brief reg_lncc class destructor
@@ -71,8 +61,7 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-class reg_kld_gpu : public reg_kld , public reg_measure_gpu
-{
+class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
 public:
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -86,14 +75,10 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu
                           int *refMskDevicePtr,
                           float *warFloDevicePtr,
                           float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr)
-   {
-      ;
-   }
+                          float4 *forVoxBasedGraDevicePtr) {}
    /// @brief reg_kld_gpu class constructor
-   reg_kld_gpu()
-   {
-      fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
+   reg_kld_gpu() {
+      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
       reg_exit();
    }
    /// @brief reg_kld_gpu class destructor
@@ -105,8 +90,7 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-class reg_dti_gpu : public reg_dti , public reg_measure_gpu
-{
+class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
 public:
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -120,14 +104,10 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu
                           int *refMskDevicePtr,
                           float *warFloDevicePtr,
                           float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr)
-   {
-      ;
-   }
+                          float4 *forVoxBasedGraDevicePtr) {}
    /// @brief reg_dti_gpu class constructor
-   reg_dti_gpu()
-   {
-      fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
+   reg_dti_gpu() {
+      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
       reg_exit();
    }
    /// @brief reg_dti_gpu class destructor
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index ee4d38e2..41960409 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -16,34 +16,30 @@
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_nmi_gpu::reg_nmi_gpu():
-	reg_nmi::reg_nmi()
-{
-	this->forwardJointHistogramLog_device=nullptr;
-//	this->backwardJointHistogramLog_device=nullptr;
+reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
+    this->forwardJointHistogramLog_device = nullptr;
+    //	this->backwardJointHistogramLog_device=nullptr;
 
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_nmi_gpu::~reg_nmi_gpu()
-{
-	this->DeallocateHistogram();
+reg_nmi_gpu::~reg_nmi_gpu() {
+    this->DeallocateHistogram();
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_nmi_gpu::DeallocateHistogram()
-{
-	if(this->forwardJointHistogramLog_device!=nullptr){
-		cudaFree(this->forwardJointHistogramLog_device);
-	}
-	this->forwardJointHistogramLog_device=nullptr;
+void reg_nmi_gpu::DeallocateHistogram() {
+    if (this->forwardJointHistogramLog_device != nullptr) {
+        cudaFree(this->forwardJointHistogramLog_device);
+        this->forwardJointHistogramLog_device = nullptr;
+    }
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -54,46 +50,44 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     int activeVoxNum,
                                     nifti_image *warFloImgPtr,
                                     nifti_image *warFloGraPtr,
-									nifti_image *forVoxBasedGraPtr,
-									cudaArray *refDevicePtr,
-									cudaArray *floDevicePtr,
+                                    nifti_image *forVoxBasedGraPtr,
+                                    cudaArray *refDevicePtr,
+                                    cudaArray *floDevicePtr,
                                     int *refMskDevicePtr,
                                     float *warFloDevicePtr,
                                     float4 *warFloGradDevicePtr,
-                                    float4 *forVoxBasedGraDevicePtr)
-{
-	this->DeallocateHistogram();
+                                    float4 *forVoxBasedGraDevicePtr) {
+    this->DeallocateHistogram();
     reg_nmi::InitialiseMeasure(refImgPtr,
                                floImgPtr,
                                maskRefPtr,
                                warFloImgPtr,
                                warFloGraPtr,
-							   forVoxBasedGraPtr);
-	// Check if a symmetric measure is required
-	if(this->isSymmetric){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-		reg_exit();
-	}
-	// Check if the input images have multiple timepoints
-	if(this->referenceTimePoint>1 ||
-       this->floatingImagePointer->nt>1){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		fprintf(stderr,"[NiftyReg ERROR] This class can only be \n");
-		reg_exit();
+                               forVoxBasedGraPtr);
+    // Check if a symmetric measure is required
+    if (this->isSymmetric) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
+        reg_exit();
+    }
+    // Check if the input images have multiple timepoints
+    if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] This class can only be \n");
+        reg_exit();
     }
     // Check that the input image are of type float
-    if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 ||
-       this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){
-        fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr,"[NiftyReg ERROR] This class can only be \n");
+    if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 ||
+        this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] This class can only be \n");
         reg_exit();
     }
     // Bind the required pointers
     this->referenceDevicePointer = refDevicePtr;
     this->floatingDevicePointer = floDevicePtr;
     this->referenceMaskDevicePointer = refMskDevicePtr;
-    this->activeVoxeNumber = activeVoxNum;
+    this->activeVoxelNumber = activeVoxNum;
     this->warpedFloatingDevicePointer = warFloDevicePtr;
     this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
     this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
@@ -112,142 +106,133 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
     cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float));
 
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_nmi_gpu::GetSimilarityMeasureValue()
-{
-	// The NMI computation is performed into the host for now
-	// The relevant images have to be transfered from the device to the host
-	cudaMemcpy(this->warpedFloatingImagePointer->data,
-			   this->warpedFloatingDevicePointer,
-			   this->warpedFloatingImagePointer->nvox *
-			   this->warpedFloatingImagePointer->nbyper,
-			   cudaMemcpyDeviceToHost
-               );
+double reg_nmi_gpu::GetSimilarityMeasureValue() {
+    // The NMI computation is performed into the host for now
+    // The relevant images have to be transfered from the device to the host
+    cudaMemcpy(this->warpedFloatingImagePointer->data,
+               this->warpedFloatingDevicePointer,
+               this->warpedFloatingImagePointer->nvox *
+               this->warpedFloatingImagePointer->nbyper,
+               cudaMemcpyDeviceToHost);
 
-    reg_getNMIValue<float>
-            (this->referenceImagePointer,
-			 this->warpedFloatingImagePointer,
-			 this->timePointWeight,
-             this->referenceBinNumber,
-             this->floatingBinNumber,
-             this->totalBinNumber,
-             this->forwardJointHistogramLog,
-             this->forwardJointHistogramPro,
-             this->forwardEntropyValues,
-             this->referenceMaskPointer
-             );
+    reg_getNMIValue<float>(this->referenceImagePointer,
+                           this->warpedFloatingImagePointer,
+                           this->timePointWeight,
+                           this->referenceBinNumber,
+                           this->floatingBinNumber,
+                           this->totalBinNumber,
+                           this->forwardJointHistogramLog,
+                           this->forwardJointHistogramPro,
+                           this->forwardEntropyValues,
+                           this->referenceMaskPointer);
 
-    double nmi_value=0.;
-    nmi_value += (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1] ) /
-            this->forwardEntropyValues[0][2];
+    double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2];
 
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n");
 #endif
-	return nmi_value;
+    return nmi_value;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// Called when we only have one target and one source image
 void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
-									  cudaArray *referenceImageArray_d,
-									  float *warpedImageArray_d,
-									  float4 *warpedGradientArray_d,
-									  float *logJointHistogram_d,
-									  float4 *voxelNMIGradientArray_d,
-									  int *mask_d,
-									  int activeVoxelNumber,
-									  double *entropies,
-									  int refBinning,
-									  int floBinning)
-{
+                                      cudaArray *referenceImageArray_d,
+                                      float *warpedImageArray_d,
+                                      float4 *warpedGradientArray_d,
+                                      float *logJointHistogram_d,
+                                      float4 *voxelNMIGradientArray_d,
+                                      int *mask_d,
+                                      int activeVoxelNumber,
+                                      double *entropies,
+                                      int refBinning,
+                                      int floBinning) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
-	const int3 imageSize=make_int3(referenceImage->nx,referenceImage->ny,referenceImage->nz);
-    const int binNumber = refBinning*floBinning+refBinning+floBinning;
-	const float normalisedJE=(float)(entropies[2]*entropies[3]);
-    const float NMI = (float)((entropies[0]+entropies[1])/entropies[2]);
+    const int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int binNumber = refBinning * floBinning + refBinning + floBinning;
+    const float normalisedJE = (float)(entropies[2] * entropies[3]);
+    const float NMI = (float)((entropies[0] + entropies[1]) / entropies[2]);
 
     // Bind Symbols
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin,&refBinning,sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin,&floBinning,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE,&normalisedJE,sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI,&NMI,sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin, &refBinning, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin, &floBinning, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE, &normalisedJE, sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI, &NMI, sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
 
     // Texture binding floating
     //Bind target image array to a 3D texture
-	firstreferenceImageTexture.normalized = true;
-	firstreferenceImageTexture.filterMode = cudaFilterModeLinear;
-	firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap;
-	firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap;
-	firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap;
+    firstreferenceImageTexture.normalized = true;
+    firstreferenceImageTexture.filterMode = cudaFilterModeLinear;
+    firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap;
+    firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap;
+    firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap;
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber*sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber*sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber*sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4)));
 
-	if(referenceImage->nz>1){
-		const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D));
-        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D,1,1);
-		dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D,Grid_reg_getVoxelBasedNMIGradientUsingPW3D,1);
-		reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-	else{
-		const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D));
-        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D,1,1);
-		dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D,Grid_reg_getVoxelBasedNMIGradientUsingPW2D,1);
-		reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture));
+    if (referenceImage->nz > 1) {
+        const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
+            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D));
+        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1);
+        dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1);
+        reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D =
+            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D));
+        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1);
+        dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1);
+        reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture));
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(histogramTexture));
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     // The latest joint histogram is transfered onto the GPU
-    float *temp=(float *)malloc(this->totalBinNumber[0]*sizeof(float));
-    for(unsigned short i=0;i<this->totalBinNumber[0]; ++i)
-		temp[i]=static_cast<float>(this->forwardJointHistogramLog[0][i]);
+    float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float));
+    for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i)
+        temp[i] = static_cast<float>(this->forwardJointHistogramLog[0][i]);
     cudaMemcpy(this->forwardJointHistogramLog_device,
                temp,
-               this->totalBinNumber[0]*sizeof(float),
+               this->totalBinNumber[0] * sizeof(float),
                cudaMemcpyHostToDevice);
     free(temp);
 
     // THe gradient of the NMI is computed on the GPU
     reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer,
-									 this->referenceDevicePointer,
-									 this->warpedFloatingDevicePointer,
-									 this->warpedFloatingGradientDevicePointer,
-									 this->forwardJointHistogramLog_device,
-									 this->forwardVoxelBasedGradientDevicePointer,
-									 this->referenceMaskDevicePointer,
-                                     this->activeVoxeNumber,
-									 this->forwardEntropyValues[0],
-									 this->referenceBinNumber[0],
-									 this->floatingBinNumber[0]);
+                                     this->referenceDevicePointer,
+                                     this->warpedFloatingDevicePointer,
+                                     this->warpedFloatingGradientDevicePointer,
+                                     this->forwardJointHistogramLog_device,
+                                     this->forwardVoxelBasedGradientDevicePointer,
+                                     this->referenceMaskDevicePointer,
+                                     this->activeVoxelNumber,
+                                     this->forwardEntropyValues[0],
+                                     this->referenceBinNumber[0],
+                                     this->floatingBinNumber[0]);
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
+    printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index db549c28..c8e1c198 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -19,8 +19,7 @@
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief NMI measure of similarity class - GPU based
-class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
-{
+class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 public:
    /// @brief reg_nmi class constructor
    reg_nmi_gpu();
@@ -48,14 +47,13 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu
 
 protected:
    float *forwardJointHistogramLog_device;
-//	float **backwardJointHistogramLog_device;
+	// float **backwardJointHistogramLog_device;
    void DeallocateHistogram();
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/// @brief NMI measure of similarity classe
-class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measure_gpu
-{
+/// @brief NMI measure of similarity class
+class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
    void InitialiseMeasure(nifti_image *refImgPtr,
                           nifti_image *floImgPtr,
@@ -69,10 +67,7 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur
                           int *refMskDevicePtr,
                           float *warFloDevicePtr,
                           float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr)
-   {
-      ;
-   }
+                          float4 *forVoxBasedGraDevicePtr) {}
    /// @brief reg_nmi class constructor
    reg_multichannel_nmi_gpu() {}
    /// @brief reg_nmi class destructor
@@ -84,7 +79,6 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-
 extern "C++"
 void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
                                       cudaArray *referenceImageArray_d,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index f394a187..acda88f3 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -3,12 +3,10 @@
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_optimiser_gpu::reg_optimiser_gpu()
-    :reg_optimiser<float>::reg_optimiser()
-{
-    this->currentDOF_gpu=nullptr;
-    this->bestDOF_gpu=nullptr;
-    this->gradient_gpu=nullptr;
+reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
+    this->currentDOF_gpu = nullptr;
+    this->bestDOF_gpu = nullptr;
+    this->gradient_gpu = nullptr;
 
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n");
@@ -39,79 +37,66 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
                                    float *gradData,
                                    size_t a,
                                    float *b,
-                                   float *c
-                                   )
-{
-	this->dofNumber=nvox;
-	this->ndim=dim;
-    this->optimiseX=optX;
-    this->optimiseY=optY;
-    this->optimiseZ=optZ;
-    this->maxIterationNumber=maxit;
-    this->currentIterationNumber=start;
+                                   float *c) {
+    this->dofNumber = nvox;
+    this->ndim = dim;
+    this->optimiseX = optX;
+    this->optimiseY = optY;
+    this->optimiseZ = optZ;
+    this->maxIterationNumber = maxit;
+    this->currentIterationNumber = start;
 
-	// Arrays are converted from float to float4
-    this->currentDOF_gpu=reinterpret_cast<float4 *>(cppData);
+    // Arrays are converted from float to float4
+    this->currentDOF_gpu = reinterpret_cast<float4*>(cppData);
 
-    if(gradData!=nullptr)
-        this->gradient_gpu=reinterpret_cast<float4 *>(gradData);
+    if (gradData != nullptr)
+        this->gradient_gpu = reinterpret_cast<float4*>(gradData);
 
     if (this->bestDOF_gpu != nullptr)
         cudaCommon_free(this->bestDOF_gpu);
 
-    if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu,
-									   (int)(this->GetVoxNumber()))){
+    if (cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))) {
         printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n");
         reg_exit();
     }
 
-	this->StoreCurrentDOF();
-
-    this->objFunc=obj;
-	this->bestObjFunctionValue =
-			this->currentObjFunctionValue =
-			this->objFunc->GetObjectiveFunctionValue();
+    this->StoreCurrentDOF();
 
+    this->objFunc = obj;
+    this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
 
 #ifndef NDEBUG
-	printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n");
+    printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n");
 #endif
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_optimiser_gpu::RestoreBestDOF()
-{
-	// restore forward transformation
-    NR_CUDA_SAFE_CALL(
-        cudaMemcpy(this->currentDOF_gpu,
-                   this->bestDOF_gpu,
-                   this->GetVoxNumber()*sizeof(float4),
-                   cudaMemcpyDeviceToDevice))
+void reg_optimiser_gpu::RestoreBestDOF() {
+    // restore forward transformation
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDOF_gpu,
+                                 this->bestDOF_gpu,
+                                 this->GetVoxNumber() * sizeof(float4),
+                                 cudaMemcpyDeviceToDevice));
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_optimiser_gpu::StoreCurrentDOF()
-{
-	// Store forward transformation
-    NR_CUDA_SAFE_CALL(
-        cudaMemcpy(this->bestDOF_gpu,
-                   this->currentDOF_gpu,
-                   this->GetVoxNumber()*sizeof(float4),
-                   cudaMemcpyDeviceToDevice))
+void reg_optimiser_gpu::StoreCurrentDOF() {
+    // Store forward transformation
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDOF_gpu,
+                                 this->currentDOF_gpu,
+                                 this->GetVoxNumber() * sizeof(float4),
+                                 cudaMemcpyDeviceToDevice));
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_optimiser_gpu::Perturbation(float length)
-{
+void reg_optimiser_gpu::Perturbation(float length) {
     /// @todo
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_conjugateGradient_gpu::reg_conjugateGradient_gpu()
-    :reg_optimiser_gpu::reg_optimiser_gpu()
-{
-    this->array1=nullptr;
-    this->array2=nullptr;
+reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() {
+    this->array1 = nullptr;
+    this->array2 = nullptr;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n");
 #endif
@@ -135,19 +120,18 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_conjugateGradient_gpu::Initialise(size_t nvox,
-                                               int dim,
-                                               bool optX,
-                                               bool optY,
-                                               bool optZ,
-                                               size_t maxit,
-                                               size_t start,
-                                               InterfaceOptimiser *obj,
-                                               float *cppData,
-                                               float *gradData,
-                                               size_t a,
-                                               float *b,
-                                               float *c)
-{
+                                           int dim,
+                                           bool optX,
+                                           bool optY,
+                                           bool optZ,
+                                           size_t maxit,
+                                           size_t start,
+                                           InterfaceOptimiser *obj,
+                                           float *cppData,
+                                           float *gradData,
+                                           size_t a,
+                                           float *b,
+                                           float *c) {
     reg_optimiser_gpu::Initialise(nvox,
                                   dim,
                                   optX,
@@ -157,16 +141,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
                                   start,
                                   obj,
                                   cppData,
-                                  gradData
-                                  );
-	this->firstcall=true;
-	if(cudaCommon_allocateArrayToDevice<float4>(&this->array1,
-												(int)(this->GetVoxNumber()))){
+                                  gradData);
+    this->firstcall = true;
+    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, (int)(this->GetVoxNumber()))) {
         printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n");
         reg_exit();
     }
-	if(cudaCommon_allocateArrayToDevice<float4>(&this->array2,
-												(int)(this->GetVoxNumber()))){
+    if (cudaCommon_allocateArrayToDevice<float4>(&this->array2, (int)(this->GetVoxNumber()))) {
         printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n");
         reg_exit();
     }
@@ -176,45 +157,39 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_conjugateGradient_gpu::UpdateGradientValues()
-{
-    if(this->firstcall){
+void reg_conjugateGradient_gpu::UpdateGradientValues() {
+    if (this->firstcall) {
         reg_initialiseConjugateGradient_gpu(this->gradient_gpu,
                                             this->array1,
                                             this->array2,
                                             (int)(this->GetVoxNumber()));
-        this->firstcall=false;
-    }
-    else{
+        this->firstcall = false;
+    } else {
         reg_GetConjugateGradient_gpu(this->gradient_gpu,
                                      this->array1,
                                      this->array2,
                                      (int)(this->GetVoxNumber()));
     }
-    return;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_conjugateGradient_gpu::Optimise(float maxLength,
                                          float smallLength,
-                                         float &startLength)
-{
+                                         float &startLength) {
     this->UpdateGradientValues();
-	reg_optimiser::Optimise(maxLength,
-							smallLength,
-							startLength);
+    reg_optimiser::Optimise(maxLength,
+                            smallLength,
+                            startLength);
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_conjugateGradient_gpu::Perturbation(float length)
-{
+void reg_conjugateGradient_gpu::Perturbation(float length) {
     reg_optimiser_gpu::Perturbation(length);
-    this->firstcall=true;
+    this->firstcall = true;
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_conjugateGradient_gpu::reg_test_optimiser()
-{
+void reg_conjugateGradient_gpu::reg_test_optimiser() {
     this->UpdateGradientValues();
     reg_optimiser_gpu::reg_test_optimiser();
 }
@@ -223,96 +198,93 @@ void reg_conjugateGradient_gpu::reg_test_optimiser()
 void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
                                          float4 *conjugateG_d,
                                          float4 *conjugateH_d,
-                                         int nodeNumber)
-{
+                                         int nodeNumber) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
 
     const unsigned int Grid_reg_initialiseConjugateGradient =
-    (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_initialiseConjugateGradient));
-    dim3 G1(Grid_reg_initialiseConjugateGradient,Grid_reg_initialiseConjugateGradient,1);
-    dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient,1,1);
+        (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_initialiseConjugateGradient));
+    dim3 G1(Grid_reg_initialiseConjugateGradient, Grid_reg_initialiseConjugateGradient, 1);
+    dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient, 1, 1);
 
     reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
-    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
                                   float4 *conjugateG_d,
                                   float4 *conjugateH_d,
-                                  int nodeNumber)
-{
+                                  int nodeNumber) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber*sizeof(float4)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient1));
-    dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1,1,1);
-    dim3 G1(Grid_reg_GetConjugateGradient1,Grid_reg_GetConjugateGradient1,1);
+    const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient1));
+    dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1, 1, 1);
+    dim3 G1(Grid_reg_GetConjugateGradient1, Grid_reg_GetConjugateGradient1, 1);
 
     float2 *sum_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber*sizeof(float2)))
+    NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2)));
     reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
-    float2 *sum_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber*sizeof(float2)))
-    NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h,sum_d, nodeNumber*sizeof(float2),cudaMemcpyDeviceToHost))
-    NR_CUDA_SAFE_CALL(cudaFree(sum_d))
-    double dgg = 0.0;
-    double gg = 0.0;
-    for(int i=0; i<nodeNumber; i++){
-    dgg += sum_h[i].x;
-    gg += sum_h[i].y;
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    float2 *sum_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2)))
+        NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost))
+        NR_CUDA_SAFE_CALL(cudaFree(sum_d))
+        double dgg = 0;
+    double gg = 0;
+    for (int i = 0; i < nodeNumber; i++) {
+        dgg += sum_h[i].x;
+        gg += sum_h[i].y;
     }
     float gam = (float)(dgg / gg);
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)sum_h))
+    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)sum_h));
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor,&gam,sizeof(float)))
-    const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient2));
-    dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2,1,1);
-    dim3 G2(Grid_reg_GetConjugateGradient2,Grid_reg_GetConjugateGradient2,1);
-	reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float)));
+    const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2));
+    dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2, 1, 1);
+    dim3 G2(Grid_reg_GetConjugateGradient2, Grid_reg_GetConjugateGradient2, 1);
+    reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
 
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber)
-{
+float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     // Copy constant memory value and bind texture
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
 
-    float *dist_d=nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float)))
+    float *dist_d = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d, nodeNumber * sizeof(float)));
 
-    const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_getEuclideanDistance));
-    dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance,1,1);
-    dim3 G1(Grid_reg_getEuclideanDistance,Grid_reg_getEuclideanDistance,1);
+    const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_getEuclideanDistance));
+    dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance, 1, 1);
+    dim3 G1(Grid_reg_getEuclideanDistance, Grid_reg_getEuclideanDistance, 1);
     reg_getEuclideanDistance_kernel <<< G1, B1 >>> (dist_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
-	// Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    // Unbind the textures
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 
-    float maxDistance = reg_maxReduction_gpu(dist_d,nodeNumber);
-    NR_CUDA_SAFE_CALL(cudaFree(dist_d))
+    float maxDistance = reg_maxReduction_gpu(dist_d, nodeNumber);
+    NR_CUDA_SAFE_CALL(cudaFree(dist_d));
 
     return maxDistance;
 }
@@ -321,29 +293,27 @@ float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber)
 void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
                                         float4 *controlPointImageArray_d,
                                         float4 *bestControlPointPosition_d,
-										float4 *gradientArray_d,
-                                        float currentLength)
-
-{
+                                        float4 *gradientArray_d,
+                                        float currentLength) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &currentLength, sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &currentLength, sizeof(float)));
 
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
 
     const unsigned int Grid_reg_updateControlPointPosition =
-            (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_updateControlPointPosition));
-    dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition,1,1);
-    dim3 G1(Grid_reg_updateControlPointPosition,Grid_reg_updateControlPointPosition,1);
+        (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_updateControlPointPosition));
+    dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition, 1, 1);
+    dim3 G1(Grid_reg_updateControlPointPosition, Grid_reg_updateControlPointPosition, 1);
     reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
-	// Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    // Unbind the textures
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n");
 #endif
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 9af5eb7f..44659e65 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -7,7 +7,7 @@
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /** @class reg_optimiser_gpu
- * @brief Standard gradient acent optimisation for GPU
+ * @brief Standard gradient ascent optimisation for GPU
  */
 class reg_optimiser_gpu: public reg_optimiser<float> {
 protected:
@@ -51,7 +51,7 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /** @class reg_conjugateGradient_gpu
- * @brief Conjugate gradient acent optimisation for GPU
+ * @brief Conjugate gradient ascent optimisation for GPU
  */
 class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 protected:
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 5a1e6e62..af204451 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -15,190 +15,179 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-reg_ssd_gpu::reg_ssd_gpu()
-	: reg_ssd::reg_ssd()
-{
+reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n");
+    printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n");
 #endif
 }
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
-									nifti_image *floImgPtr,
-									int *maskRefPtr,
-									int activeVoxNum,
-									nifti_image *warFloImgPtr,
-									nifti_image *warFloGraPtr,
-									nifti_image *forVoxBasedGraPtr,
-									nifti_image *localWeightSimPtr,
-									cudaArray *refDevicePtr,
-									cudaArray *floDevicePtr,
-									int *refMskDevicePtr,
-									float *warFloDevicePtr,
-									float4 *warFloGradDevicePtr,
-									float4 *forVoxBasedGraDevicePtr)
-{
-	reg_ssd::InitialiseMeasure(refImgPtr,
-							   floImgPtr,
-							   maskRefPtr,
-							   warFloImgPtr,
-							   warFloGraPtr,
-							   forVoxBasedGraPtr,
-							   localWeightSimPtr);
-	// Check if a symmetric measure is required
-	if(this->isSymmetric){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-		reg_exit();
-	}
-	// Check that the input image are of type float
-	if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 ||
-	   this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		fprintf(stderr,"[NiftyReg ERROR] The input images are expected to be float\n");
-		reg_exit();
-	}
-	// Check that the input images have only one time point
-	if(this->referenceImagePointer->nt>1 || this->floatingImagePointer->nt>1){
-		fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-		fprintf(stderr,"[NiftyReg ERROR] Both input images should have only one time point\n");
-		reg_exit();
-	}
-	// Bind the required pointers
-	this->referenceDevicePointer = refDevicePtr;
-	this->floatingDevicePointer = floDevicePtr;
-	this->referenceMaskDevicePointer = refMskDevicePtr;
-	this->activeVoxeNumber=activeVoxNum;
-	this->warpedFloatingDevicePointer = warFloDevicePtr;
-	this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
-	this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
+                                    nifti_image *floImgPtr,
+                                    int *maskRefPtr,
+                                    int activeVoxNum,
+                                    nifti_image *warFloImgPtr,
+                                    nifti_image *warFloGraPtr,
+                                    nifti_image *forVoxBasedGraPtr,
+                                    nifti_image *localWeightSimPtr,
+                                    cudaArray *refDevicePtr,
+                                    cudaArray *floDevicePtr,
+                                    int *refMskDevicePtr,
+                                    float *warFloDevicePtr,
+                                    float4 *warFloGradDevicePtr,
+                                    float4 *forVoxBasedGraDevicePtr) {
+    reg_ssd::InitialiseMeasure(refImgPtr,
+                               floImgPtr,
+                               maskRefPtr,
+                               warFloImgPtr,
+                               warFloGraPtr,
+                               forVoxBasedGraPtr,
+                               localWeightSimPtr);
+    // Check if a symmetric measure is required
+    if (this->isSymmetric) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
+        reg_exit();
+    }
+    // Check that the input image are of type float
+    if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 ||
+        this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n");
+        reg_exit();
+    }
+    // Check that the input images have only one time point
+    if (this->referenceImagePointer->nt > 1 || this->floatingImagePointer->nt > 1) {
+        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
+        fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n");
+        reg_exit();
+    }
+    // Bind the required pointers
+    this->referenceDevicePointer = refDevicePtr;
+    this->floatingDevicePointer = floDevicePtr;
+    this->referenceMaskDevicePointer = refMskDevicePtr;
+    this->activeVoxelNumber = activeVoxNum;
+    this->warpedFloatingDevicePointer = warFloDevicePtr;
+    this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
+    this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
 #ifndef NDEBUG
-		printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
+    printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
 #endif
 }
 /* *************************************************************** */
 float reg_getSSDValue_gpu(nifti_image *referenceImage,
-						  cudaArray **reference_d,
-						  float **warped_d,
-						  int **mask_d,
-						  int activeVoxelNumber
-						  )
-{
+                          cudaArray **reference_d,
+                          float **warped_d,
+                          int **mask_d,
+                          int activeVoxelNumber) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	// Copy the constant memory variables
-	int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-	int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)))
-	// Bind the required textures
-	referenceTexture.normalized = true;
-	referenceTexture.filterMode = cudaFilterModeLinear;
-	referenceTexture.addressMode[0] = cudaAddressModeWrap;
-	referenceTexture.addressMode[1] = cudaAddressModeWrap;
-	referenceTexture.addressMode[2] = cudaAddressModeWrap;
-	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber*sizeof(float)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int)))
-	// Create an array on the device to store the absolute difference values
-	float *absoluteValues_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber*sizeof(float)))
-	// Compute the absolute values
-	const unsigned int Grid_reg_getSquaredDifference =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSquaredDifference));
-    dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference,1,1);
-	dim3 G1(Grid_reg_getSquaredDifference,Grid_reg_getSquaredDifference,1);
-	if(referenceDim.z>1)
-		reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d);
-	else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
-	// Unbind the textures
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture))
-	// Perform a reduction on the absolute values
-    float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d,activeVoxelNumber) / (double)activeVoxelNumber);
-	// Free the absolute value array
-	NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d))
+    // Copy the constant memory variables
+    int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+    // Bind the required textures
+    referenceTexture.normalized = true;
+    referenceTexture.filterMode = cudaFilterModeLinear;
+    referenceTexture.addressMode[0] = cudaAddressModeWrap;
+    referenceTexture.addressMode[1] = cudaAddressModeWrap;
+    referenceTexture.addressMode[2] = cudaAddressModeWrap;
+    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber * sizeof(int)));
+    // Create an array on the device to store the absolute difference values
+    float *absoluteValues_d;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float)));
+    // Compute the absolute values
+    const unsigned int Grid_reg_getSquaredDifference =
+        (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSquaredDifference));
+    dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference, 1, 1);
+    dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1);
+    if (referenceDim.z > 1)
+        reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d);
+    else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    // Unbind the textures
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+    // Perform a reduction on the absolute values
+    float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d, activeVoxelNumber) / (double)activeVoxelNumber);
+    // Free the absolute value array
+    NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d));
 
-	return ssd;
+    return ssd;
 }
 /* *************************************************************** */
 /* *************************************************************** */
-double reg_ssd_gpu::GetSimilarityMeasureValue()
-{
-	double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer,
-										  &this->referenceDevicePointer,
-										  &this->warpedFloatingDevicePointer,
-										  &this->referenceMaskDevicePointer,
-										  this->activeVoxeNumber
-										  );
+double reg_ssd_gpu::GetSimilarityMeasureValue() {
+    double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer,
+                                          &this->referenceDevicePointer,
+                                          &this->warpedFloatingDevicePointer,
+                                          &this->referenceMaskDevicePointer,
+                                          this->activeVoxelNumber);
     return -SSDValue;
 }
 /* *************************************************************** */
 /* *************************************************************** */
 void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
-									  cudaArray *reference_d,
-									  float *warped_d,
-									  float4 *spaGradient_d,
-									  float4 *ssdGradient_d,
-									  float maxSD,
-									  int *mask_d,
-									  int activeVoxelNumber)
-{
+                                      cudaArray *reference_d,
+                                      float *warped_d,
+                                      float4 *spaGradient_d,
+                                      float4 *ssdGradient_d,
+                                      float maxSD,
+                                      int *mask_d,
+                                      int activeVoxelNumber) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	// Copy the constant memory variables
-	int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-	int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber,&maxSD,sizeof(float)))
-	// Bind the required textures
-	referenceTexture.normalized = true;
-	referenceTexture.filterMode = cudaFilterModeLinear;
-	referenceTexture.addressMode[0] = cudaAddressModeWrap;
-	referenceTexture.addressMode[1] = cudaAddressModeWrap;
-	referenceTexture.addressMode[2] = cudaAddressModeWrap;
-	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-	NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber*sizeof(float)))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber*sizeof(float4)))
-	// Set the gradient image to zero
-	NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d,0,voxelNumber*sizeof(float4)))
-	const unsigned int Grid_reg_getSSDGradient =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSSDGradient));
-    dim3 B1(NR_BLOCK->Block_reg_getSSDGradient,1,1);
-	dim3 G1(Grid_reg_getSSDGradient,Grid_reg_getSSDGradient,1);
-	if(referenceDim.z>1)
-		reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d);
-	else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
-	// Unbind the textures
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture))
+    // Copy the constant memory variables
+    int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float)));
+    // Bind the required textures
+    referenceTexture.normalized = true;
+    referenceTexture.filterMode = cudaFilterModeLinear;
+    referenceTexture.addressMode[0] = cudaAddressModeWrap;
+    referenceTexture.addressMode[1] = cudaAddressModeWrap;
+    referenceTexture.addressMode[2] = cudaAddressModeWrap;
+    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4)));
+    // Set the gradient image to zero
+    NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4)))
+        const unsigned int Grid_reg_getSSDGradient =
+        (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSSDGradient));
+    dim3 B1(NR_BLOCK->Block_reg_getSSDGradient, 1, 1);
+    dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1);
+    if (referenceDim.z > 1)
+        reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d);
+    else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    // Unbind the textures
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
-{
-	reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
-									 this->referenceDevicePointer,
-									 this->warpedFloatingDevicePointer,
-									 this->warpedFloatingGradientDevicePointer,
-									 this->forwardVoxelBasedGradientDevicePointer,
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+    reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
+                                     this->referenceDevicePointer,
+                                     this->warpedFloatingDevicePointer,
+                                     this->warpedFloatingGradientDevicePointer,
+                                     this->forwardVoxelBasedGradientDevicePointer,
                                      1.0f,
-									 this->referenceMaskDevicePointer,
-									 this->activeVoxeNumber
-									 );
-	return;
+                                     this->referenceMaskDevicePointer,
+                                     this->activeVoxelNumber);
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 91e8b05f..f7b7f96b 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -19,8 +19,7 @@
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief SSD measure of similarity class on the device
-class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu
-{
+class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
 public:
     /// @brief reg_ssd class constructor
     reg_ssd_gpu();
@@ -53,8 +52,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
                           cudaArray **reference_d,
                           float **warped_d,
                           int **mask_d,
-                          int activeVoxelNumber
-                         );
+                          int activeVoxelNumber);
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 extern "C++"
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 762d0972..8f7fd210 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -163,7 +163,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 		const unsigned int referenceSize = __syncthreads_count(finiteReference);
 
         float bestDisplacement[2] = {nanf("sNaN"), 0.0f};
-        float bestCC = 0.0;
+        float bestCC = 0;
 
 		if (referenceSize > 8) {
 			//the target values must remain constant throughout the block matching process
@@ -196,7 +196,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 						const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid);
 
 						const float sumTargetResult = blockReduce2DSum((newreferenceTemp)* (warpedTemp), tid);
-                        const float localCC = (newreferenceVar * warpedVar) > 0.0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0.0;
+                        const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0;
 
                         if (tid == 0 && localCC > bestCC) {
                             bestCC = localCC + 1.0e-7f;
@@ -520,7 +520,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 							const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid);
 
 							const float sumTargetResult = blockReduceSum((newreferenceTemp)* (warpedTemp), tid);
-                            const float localCC = (newreferenceVar * warpedVar) > 0.0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0.0;
+                            const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0;
 
                             if (tid == 0 && localCC > bestCC) {
                                 bestCC = localCC + 1.0e-7f;
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 50a97ee0..be20a80b 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -55,8 +55,8 @@ __device__ __inline__ int cuda_reg_floor(double a)
 template<class FieldTYPE>
 __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *basis)
 {
-    if (ratio < 0.0)
-        ratio = 0.0; //reg_rounding error
+    if (ratio < 0)
+        ratio = 0; //reg_rounding error
     double FF = (double) ratio * ratio;
     basis[0] = (FieldTYPE) ((ratio * (((double)2.0 - ratio) * ratio - (double)1.0)) / (double)2.0);
     basis[1] = (FieldTYPE) ((FF * ((double)3.0 * ratio - 5.0) + 2.0) / (double)2.0);
@@ -78,13 +78,13 @@ void reg_mat44_eye(float *mat) {
 /* *************************************************************** */
 __inline__ __device__ void interpWindowedSincKernel(double relative, double *basis)
 {
-	if (relative < 0.0)
-		relative = 0.0; //reg_rounding error
+	if (relative < 0)
+		relative = 0; //reg_rounding error
 	int j = 0;
 	double sum = 0.;
 	for (int i = -SINC_KERNEL_RADIUS; i < SINC_KERNEL_RADIUS; ++i) {
 		double x = relative - (double) (i);
-		if (x == 0.0)
+		if (x == 0)
 			basis[j] = 1.0;
 		else if (abs(x) >= (double) (SINC_KERNEL_RADIUS))
 			basis[j] = 0;
@@ -101,8 +101,8 @@ __inline__ __device__ void interpWindowedSincKernel(double relative, double *bas
 /* *************************************************************** */
 __inline__ __device__ void interpCubicSplineKernel(double relative, double *basis)
 {
-	if (relative < 0.0)
-		relative = 0.0; //reg_rounding error
+	if (relative < 0)
+		relative = 0; //reg_rounding error
 	double FF = relative * relative;
 	basis[0] = (relative * ((2.0 - relative) * relative - 1.0)) / 2.0;
 	basis[1] = (FF * (3.0 * relative - 5.0) + 2.0) / 2.0;
@@ -112,17 +112,17 @@ __inline__ __device__ void interpCubicSplineKernel(double relative, double *basi
 /* *************************************************************** */
 __inline__ __device__ void interpLinearKernel(double relative, double *basis)
 {
-	if (relative < 0.0)
-		relative = 0.0; //reg_rounding error
+	if (relative < 0)
+		relative = 0; //reg_rounding error
 	basis[1] = relative;
 	basis[0] = 1.0 - relative;
 }
 /* *************************************************************** */
 __inline__ __device__ void interpNearestNeighKernel(double relative, double *basis)
 {
-	if (relative < 0.0)
-		relative = 0.0; //reg_rounding error
-	basis[0] = basis[1] = 0.0;
+	if (relative < 0)
+		relative = 0; //reg_rounding error
+	basis[0] = basis[1] = 0;
     if (relative >= 0.5)
 		basis[1] = 1;
 	else
@@ -138,12 +138,12 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity,
     float paddingValue,
     unsigned int kernel_size)
 {
-    double intensity = (double)(0.0);
+    double intensity = 0;
 
         for (int b = 0; b < kernel_size; b++) {
             int Y = previous[1] + b;
             bool yInBounds = -1 < Y && Y < fi_xyz.y;
-            double xTempNewValue = 0.0;
+            double xTempNewValue = 0;
 
             for (int a = 0; a < kernel_size; a++) {
                 int X = previous[0] + a;
@@ -167,15 +167,15 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity,
                                           float paddingValue,
                                           unsigned int kernel_size)
 {
-	double intensity = (double)(0.0);
+	double intensity = 0;
 	for (int c = 0; c < kernel_size; c++) {
 		int Z = previous[2] + c;
 		bool zInBounds = -1 < Z && Z < fi_xyz.z;
-		double yTempNewValue = 0.0;
+		double yTempNewValue = 0;
 		for (int b = 0; b < kernel_size; b++) {
 			int Y = previous[1] + b;
 			bool yInBounds = -1 < Y && Y < fi_xyz.y;
-			double xTempNewValue = 0.0;
+			double xTempNewValue = 0;
 			for (int a = 0; a < kernel_size; a++) {
 				int X = previous[0] + a;
 				bool xInBounds = -1 < X && X < fi_xyz.x;
diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp
index b848f16d..37c90641 100644
--- a/reg-test/reg_test_mindDescriptor.cpp
+++ b/reg-test/reg_test_mindDescriptor.cpp
@@ -44,7 +44,7 @@ int main(int argc, char **argv)
 
     // Compute the MIND descriptor
     int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
-    GetMINDImageDesciptor(inputImage,MIND_img, mask, 1, 0);
+    GetMINDImageDescriptor(inputImage,MIND_img, mask, 1, 0);
     free(mask);
     //
     //Compute the difference between the computed and expected image
diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp
index c2090567..11d9a81c 100644
--- a/reg-test/reg_test_mindsscDescriptor.cpp
+++ b/reg-test/reg_test_mindsscDescriptor.cpp
@@ -48,7 +48,7 @@ int main(int argc, char **argv)
 
     // Compute the MIND descriptor
     int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
-    GetMINDSSCImageDesciptor(inputImage,MINDSSC_img, mask, 1, 0);
+    GetMINDSSCImageDescriptor(inputImage,MINDSSC_img, mask, 1, 0);
     free(mask);
     //
     //Compute the difference between the computed and expected image

From 053c4200ffbfc507daf228842a620c2bc0a25b67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Jan 2023 16:34:50 +0000
Subject: [PATCH 032/314] Introduce PlatformType

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_aladin.cpp                       | 18 ++++++------
 reg-apps/reg_f3d.cpp                          | 18 ++++++------
 reg-lib/Platform.cpp                          | 28 +++++++++----------
 reg-lib/Platform.h                            | 12 ++++----
 reg-lib/_reg_aladin.cpp                       | 10 +++----
 reg-lib/_reg_aladin.h                         |  6 ++--
 reg-lib/_reg_aladin_sym.cpp                   |  6 ++--
 reg-lib/_reg_base.cpp                         |  4 +--
 reg-lib/_reg_base.h                           |  4 +--
 reg-lib/_reg_f3d.cpp                          |  4 +--
 .../reg_test_affine_deformation_field.cpp     | 12 ++++----
 reg-test/reg_test_blockMatching.cpp           | 18 ++++++------
 .../reg_test_bspline_deformation_field.cpp    |  4 +--
 ...est_coherence_affine_deformation_field.cpp | 18 ++++++------
 reg-test/reg_test_coherence_blockMatching.cpp | 22 +++++++--------
 reg-test/reg_test_coherence_interpolation.cpp | 24 ++++++++--------
 reg-test/reg_test_fullAffine.cpp              |  2 +-
 reg-test/reg_test_fullAffine_cl.cpp           |  2 +-
 reg-test/reg_test_fullAffine_cuda.cpp         |  2 +-
 reg-test/reg_test_interpolation.cpp           | 10 +++----
 reg-test/reg_test_leastTrimmedSquares.cpp     | 16 +++++------
 reg-test/reg_test_svd_cuda.cpp                |  4 +--
 23 files changed, 122 insertions(+), 124 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b4f334f2..fba7ed52 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-141
+143
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 739b539f..24cc3ac5 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
    bool iso=false;
    bool verbose=true;
    int captureRangeVox = 3;
-   int platformFlag = NR_PLATFORM_CPU;
+   PlatformType platformType(PlatformType::Cpu);
    unsigned gpuIdx = 999;
 
 #if defined (_OPENMP)
@@ -352,26 +352,26 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-platf")==0 || strcmp(argv[i], "--platf")==0)
       {
-         int value=atoi(argv[++i]);
-         if(value<NR_PLATFORM_CPU || value>NR_PLATFORM_CL){
+         PlatformType value{atoi(argv[++i])};
+         if(int(value)<int(PlatformType::Cpu) || int(value)>int(PlatformType::OpenCl)){
             reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
             return EXIT_FAILURE;
          }
 #ifndef _USE_CUDA
-            if(value==NR_PLATFORM_CUDA){
+         if (value == PlatformType::Cuda) {
                reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
                reg_print_msg_warn("The CPU platform is used");
-               value=0;
+               value=PlatformType::Cpu;
             }
 #endif
 #ifndef _USE_OPENCL
-            if(value==NR_PLATFORM_CL){
+            if(value==PlatformType::OpenCl){
                reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
                reg_print_msg_warn("The CPU platform is used");
-               value=0;
+               value=PlatformType::Cpu;
             }
 #endif
-         platformFlag=value;
+         platformType=value;
       }
       else if(strcmp(argv[i], "-gpuid")==0 || strcmp(argv[i], "--gpuid")==0)
       {
@@ -551,7 +551,7 @@ int main(int argc, char **argv)
    REG->SetInlierLts(inlierLts);
    REG->SetInterpolation(interpolation);
    REG->SetCaptureRangeVox(captureRangeVox);
-   REG->SetPlatformCode(platformFlag);
+   REG->SetPlatformType(platformType);
    REG->SetGpuIdx(gpuIdx);
 
    if (referenceLowerThr != referenceUpperThr)
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 46eabf25..722e0c7f 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -281,7 +281,7 @@ int main(int argc, char **argv) {
     reg_f3d<float> *reg = nullptr;
     float *referenceLandmark = nullptr;
     float *floatingLandmark = nullptr;
-    int platformFlag = NR_PLATFORM_CPU;
+    PlatformType platformType(PlatformType::Cpu);
     unsigned gpuIdx = 999;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
@@ -292,26 +292,26 @@ int main(int argc, char **argv) {
             // reg = new reg_f3d_sym<float>(referenceImage->nt, floatingImage->nt);
             break;
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
-            int value = atoi(argv[++i]);
-            if (value < NR_PLATFORM_CPU || value > NR_PLATFORM_CL) {
+            PlatformType value{atoi(argv[++i])};
+            if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::OpenCl)) {
                 reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
                 return EXIT_FAILURE;
             }
 #ifndef _USE_CUDA
-            if (value == NR_PLATFORM_CUDA) {
+            if (value == PlatformType::Cuda) {
                 reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
                 reg_print_msg_warn("The CPU platform is used");
-                value = 0;
+                value = PlatformType::Cpu;
             }
 #endif
 #ifndef _USE_OPENCL
-            if (value == NR_PLATFORM_CL) {
+            if (value == PlatformType::OpenCl) {
                 reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
                 reg_print_msg_warn("The CPU platform is used");
-                value = 0;
+                value = PlatformType::Cpu;
             }
 #endif
-            platformFlag = value;
+            platformType = value;
         } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
             gpuIdx = unsigned(atoi(argv[++i]));
         }
@@ -320,7 +320,7 @@ int main(int argc, char **argv) {
         reg = new reg_f3d<float>(referenceImage->nt, floatingImage->nt);
     reg->SetReferenceImage(referenceImage);
     reg->SetFloatingImage(floatingImage);
-    reg->SetPlatformCode(platformFlag);
+    reg->SetPlatformType(platformType);
     reg->SetGpuIdx(gpuIdx);
 
     // Create some pointers that could be used
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index ab20b0be..74865e27 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -14,22 +14,22 @@
 #endif
 
 /* *************************************************************** */
-Platform::Platform(int platformCodeIn) {
-    platformCode = platformCodeIn;
-    if (platformCode == NR_PLATFORM_CPU) {
+Platform::Platform(const PlatformType& platformTypeIn) {
+    platformType = platformTypeIn;
+    if (platformType == PlatformType::Cpu) {
         kernelFactory = new CpuKernelFactory();
         computeFactory = new ComputeFactory();
         platformName = "cpu_platform";
     }
 #ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA) {
+    else if (platformType == PlatformType::Cuda) {
         kernelFactory = new CudaKernelFactory();
         computeFactory = new CudaComputeFactory();
         platformName = "cuda_platform";
     }
 #endif
 #ifdef _USE_OPENCL
-    else if (platformCode == NR_PLATFORM_CL) {
+    else if (platformType == PlatformType::OpenCl) {
         kernelFactory = new ClKernelFactory();
         computeFactory = new ClComputeFactory();
         platformName = "cl_platform";
@@ -57,13 +57,13 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent *con,
     nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid();
     Type *controlPointGridData, *transformationGradientData;
 
-    if (platformCode == NR_PLATFORM_CPU) {
+    if (platformType == PlatformType::Cpu) {
         optimiser = useConjGradient ? new reg_conjugateGradient<Type>() : new reg_optimiser<Type>();
         controlPointGridData = (Type*)controlPointGrid->data;
         transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data;
     }
 #ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA) {
+    else if (platformType == PlatformType::Cuda) {
         optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
         controlPointGridData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetControlPointGridCuda();
         transformationGradientData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda();
@@ -95,11 +95,11 @@ unsigned Platform::GetGpuIdx() {
 }
 /* *************************************************************** */
 void Platform::SetGpuIdx(unsigned gpuIdxIn) {
-    if (platformCode == NR_PLATFORM_CPU) {
+    if (platformType == PlatformType::Cpu) {
         gpuIdx = 999;
     }
 #ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA) {
+    else if (platformType == PlatformType::Cuda) {
         CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
         if (gpuIdxIn != 999) {
             gpuIdx = gpuIdxIn;
@@ -108,7 +108,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
     }
 #endif
 #ifdef _USE_OPENCL
-    else if (platformCode == NR_PLATFORM_CL) {
+    else if (platformType == PlatformType::OpenCl) {
         ClContextSingleton *sContext = &ClContextSingleton::Instance();
         if (gpuIdxIn != 999) {
             gpuIdx = gpuIdxIn;
@@ -128,12 +128,12 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
 #endif
 }
 /* *************************************************************** */
-int Platform::GetPlatformCode() {
-    return platformCode;
+PlatformType Platform::GetPlatformType() {
+    return platformType;
 }
 /* *************************************************************** */
-//void Platform::SetPlatformCode(const int platformCodeIn) {
-//    platformCode = platformCodeIn;
+//void Platform::SetPlatformType(const PlatformType& platformTypeIn) {
+//    platformType = platformTypeIn;
 //}
 /* *************************************************************** */
 Platform::~Platform() {
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index d1e02f83..9d030bca 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -5,13 +5,11 @@
 #include "ComputeFactory.h"
 #include "_reg_optimiser.h"
 
-#define NR_PLATFORM_CPU  0
-#define NR_PLATFORM_CUDA 1
-#define NR_PLATFORM_CL   2
+enum class PlatformType { Cpu, Cuda, OpenCl };
 
 class Platform {
 public:
-    Platform(int platformCodeIn);
+    Platform(const PlatformType& platformTypeIn);
     virtual ~Platform();
 
     Compute* CreateCompute(Content *con) const;
@@ -27,8 +25,8 @@ class Platform {
 
     std::string GetName();
 
-    int GetPlatformCode();
-    //void SetPlatformCode(const int platformCodeIn);
+    PlatformType GetPlatformType();
+    //void SetPlatformType(const PlatformType& platformTypeIn);
     void SetGpuIdx(unsigned gpuIdxIn);
     unsigned GetGpuIdx();
 
@@ -36,6 +34,6 @@ class Platform {
     KernelFactory *kernelFactory;
     ComputeFactory *computeFactory;
     std::string platformName;
-    int platformCode;
+    PlatformType platformType;
     unsigned gpuIdx;
 };
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 5430663d..daa21fbb 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -57,7 +57,7 @@ reg_aladin<T>::reg_aladin() {
     this->funcProgressCallback = nullptr;
     this->paramsProgressCallback = nullptr;
 
-    this->platformCode = NR_PLATFORM_CPU;
+    this->platformType = PlatformType::Cpu;
     this->currentLevel = 0;
     this->gpuIdx = 999;
 
@@ -224,7 +224,7 @@ void reg_aladin<T>::InitialiseRegistration() {
     reg_print_fct_debug("reg_aladin::InitialiseRegistration()");
 #endif
 
-    this->platform = new Platform(this->platformCode);
+    this->platform = new Platform(this->platformType);
     this->platform->SetGpuIdx(this->gpuIdx);
 
     this->Print();
@@ -455,14 +455,14 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       unsigned int blockPercentage,
                                       unsigned int inlierLts,
                                       unsigned int blockStepSize) {
-    if (this->platformCode == NR_PLATFORM_CPU)
+    if (this->platformType == PlatformType::Cpu)
         this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #ifdef _USE_CUDA
-    else if (platformCode == NR_PLATFORM_CUDA)
+    else if (platformType == PlatformType::Cuda)
         this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
 #ifdef _USE_OPENCL
-    else if (platformCode == NR_PLATFORM_CL)
+    else if (platformType == PlatformType::OpenCl)
         this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
     this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 72cd0988..59864741 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -114,7 +114,7 @@ class reg_aladin {
     float warpedPaddingValue;
 
     Platform *platform;
-    int platformCode;
+    PlatformType platformType;
     unsigned gpuIdx;
 
     bool TestMatrixConvergence(mat44 *mat);
@@ -178,8 +178,8 @@ class reg_aladin {
     }
     nifti_image* GetFinalWarpedImage();
 
-    void SetPlatformCode(const int platformCodeIn) {
-        this->platformCode = platformCodeIn;
+    void SetPlatformType(const PlatformType& platformTypeIn) {
+        this->platformType = platformTypeIn;
     }
     void SetGpuIdx(unsigned gpuIdxIn) {
         this->gpuIdx = gpuIdxIn;
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index cf0b8b60..bb89632b 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -274,14 +274,14 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                                inlierLts,
                                blockStepSize);
 
-  if (this->platformCode == NR_PLATFORM_CPU)
+  if (this->platformType == PlatformType::Cpu)
   this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #ifdef _USE_CUDA
-  else if (this->platformCode == NR_PLATFORM_CUDA)
+  else if (this->platformType == PlatformType::Cuda)
   this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
 #ifdef _USE_OPENCL
-  else if (this->platformCode == NR_PLATFORM_CL)
+  else if (this->platformType == PlatformType::OpenCl)
   this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
 #endif
   this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index cb973174..3a55e3c9 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -18,7 +18,7 @@
 template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     platform = nullptr;
-    platformCode = NR_PLATFORM_CPU;
+    platformType = PlatformType::Cpu;
     gpuIdx = 999;
 
     optimiser = nullptr;
@@ -841,7 +841,7 @@ void reg_base<T>::Initialise() {
 
     CheckParameters();
 
-    platform = new Platform(platformCode);
+    platform = new Platform(platformType);
     platform->SetGpuIdx(gpuIdx);
 
     // CREATE THE PYRAMID IMAGES
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 4f361076..7a945bc8 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -36,7 +36,7 @@ class reg_base: public InterfaceOptimiser {
 protected:
     // Platform
     Platform *platform;
-    int platformCode;
+    PlatformType platformType;
     unsigned gpuIdx;
 
     // Content
@@ -164,7 +164,7 @@ class reg_base: public InterfaceOptimiser {
 
     // Platform
     Platform* GetPlatform();
-    void SetPlatformCode(const int platformCodeIn) { platformCode = platformCodeIn; }
+    void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; }
     void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; }
 
     // Optimisation related functions
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index df94a742..3a03502f 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -375,10 +375,10 @@ void reg_f3d<T>::Initialise() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    if (this->platformCode == NR_PLATFORM_CPU)
+    if (this->platformType == PlatformType::Cpu)
         this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
 #ifdef _USE_CUDA
-    else if (this->platformCode == NR_PLATFORM_CUDA)
+    else if (this->platformType == PlatformType::Cuda)
         this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
 #endif
     this->compute = this->platform->CreateCompute(this->con);
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index c8b19dea..e526f511 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -28,7 +28,7 @@
 
 
 typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> test_data;
-typedef std::tuple<AladinContent*, std::string, int> content_desc;
+typedef std::tuple<AladinContent*, std::string, PlatformType> content_desc;
 
 TEST_CASE("Affine deformation field", "[AffineDefField]") {
     // Create a reference 2D image
@@ -168,7 +168,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         float *test_res_z;
         std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case;
 
-        // Accumate all required contents with a vector
+        // Accumulate all required contents with a vector
         std::vector<content_desc> listContent;
         listContent.push_back(content_desc(
             new AladinContent(
@@ -178,7 +178,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 test_mat,
                 sizeof(float)),
             "CPU",
-            0));
+            PlatformType::Cpu));
 #ifdef _USE_CUDA
         listContent.push_back(content_desc(
             new CudaAladinContent(
@@ -188,7 +188,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 test_mat,
                 sizeof(float)),
             "CUDA",
-            1));
+            PlatformType::Cuda));
 #endif
 #ifdef _USE_OPENCL
         listContent.push_back(content_desc(
@@ -199,13 +199,13 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 test_mat,
                 sizeof(float)),
             "OpenCL",
-            2));
+            PlatformType::OpenCl));
 #endif
         // Loop over all possibles contents for each test
         for (auto &&content : listContent) {
             AladinContent *con;
             std::string desc;
-            int plat_value;
+            PlatformType plat_value;
             std::tie(con, desc, plat_value) = content;
             SECTION(test_name + " " + desc) {
                 // Initialise the platform to run current content and retrieve deformation field
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index baa794d4..cab1b6c6 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -79,9 +79,9 @@ void check_matching_difference(int dim,
    }
 }
 
-void test(AladinContent *con, int platformCode) {
+void test(AladinContent *con, PlatformType platformType) {
 
-   Platform *platform = new Platform(platformCode);
+   Platform *platform = new Platform(platformType);
 
    Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
@@ -94,14 +94,14 @@ int main(int argc, char **argv)
 {
 
    if (argc != 5) {
-      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <expectedBlockMatchingMatrix> <platformCode>\n", argv[0]);
+      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <expectedBlockMatchingMatrix> <platformType>\n", argv[0]);
       return EXIT_FAILURE;
    }
 
    char *inputRefImageName = argv[1];
    char *inputWarpedImageName = argv[2];
-   char* expectedBlockMatchingMatrixName = argv[3];
-   int   platformCode = atoi(argv[4]);
+   char *expectedBlockMatchingMatrixName = argv[3];
+   PlatformType platformType{atoi(argv[4])};
 
    // Read the input reference image
    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
@@ -137,16 +137,16 @@ int main(int argc, char **argv)
 
    // Platforms
    AladinContent *con = nullptr;
-   if (platformCode == NR_PLATFORM_CPU) {
+   if (platformType == PlatformType::Cpu) {
       con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #ifdef _USE_CUDA
-   else if (platformCode == NR_PLATFORM_CUDA) {
+   else if (platformType == PlatformType::Cuda) {
       con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
 #ifdef _USE_OPENCL
-   else if (platformCode == NR_PLATFORM_CL) {
+   else if (platformType == PlatformType::OpenCl) {
       con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
@@ -156,7 +156,7 @@ int main(int argc, char **argv)
    }
    con->SetWarped(warpedImage);
    //con->SetWarped(referenceImage);
-   test(con, platformCode);
+   test(con, platformType);
    blockMatchingParams = con->GetBlockMatchingParams();
 
 #ifndef NDEBUG
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
index 2c234cfa..38aef179 100644
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ b/reg-test/reg_test_bspline_deformation_field.cpp
@@ -10,7 +10,7 @@
 int main(int argc, char **argv)
 {
     if (argc != 6) {
-        fprintf(stderr, "Usage: %s <refImage> <inputGrid> <expectedField> <useComp> <platformCode>\n", argv[0]);
+        fprintf(stderr, "Usage: %s <refImage> <inputGrid> <expectedField> <useComp> <platformType>\n", argv[0]);
         return EXIT_FAILURE;
     }
 
@@ -18,7 +18,7 @@ int main(int argc, char **argv)
     char *inputCPPFileName = argv[2];
     char *inputDefImageName = argv[3];
     bool useComposition = atoi(argv[4]);
-//    int platformCode = atoi(argv[5]);
+    // PlatformType platformType{atoi(argv[5])};
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index 44b022b8..78793df5 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -19,9 +19,9 @@
 #define EPS 0.000001
 #define EPS_SINGLE 0.0001
 
-void test(AladinContent *con, int platformCode) {
+void test(AladinContent *con, int platformType) {
 
-    Platform *platform = new Platform(platformCode);
+    Platform *platform = new Platform(platformType);
 
     Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
     affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
@@ -33,14 +33,14 @@ void test(AladinContent *con, int platformCode) {
 int main(int argc, char **argv)
 {
     if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <inputMatrix> <expectedField> <platformCode>\n", argv[0]);
+        fprintf(stderr, "Usage: %s <refImage> <inputMatrix> <expectedField> <platformType>\n", argv[0]);
         return EXIT_FAILURE;
     }
 
     char *inputRefImageName = argv[1];
     char *inputMatFileName = argv[2];
     char *inputDefImageName = argv[3];
-    int platformCode = atoi(argv[4]);
+    PlatformType platformType{atoi(argv[4])};
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
@@ -78,16 +78,16 @@ int main(int argc, char **argv)
     AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
     AladinContent *con_gpu = nullptr;
 #ifdef _USE_CUDA
-    if (platformCode == NR_PLATFORM_CUDA) {
+    if (platformType == PlatformType::Cuda) {
         con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
     }
 #endif
 #ifdef _USE_OPENCL
-    if (platformCode == NR_PLATFORM_CL) {
+    if (platformType == PlatformType::OpenCl) {
         con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
     }
 #endif
-    if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){
+    if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
        reg_print_msg_error("Unexpected platform code");
        return EXIT_FAILURE;
     }
@@ -100,10 +100,10 @@ int main(int argc, char **argv)
 
     //CPU or GPU code
     reg_tools_changeDatatype<float>(referenceImage);
-    test(con_cpu, NR_PLATFORM_CPU);
+    test(con_cpu, PlatformType::Cpu);
     test_field_cpu = con_cpu->GetDeformationField();
 
-    test(con_gpu, NR_PLATFORM_CPU);
+    test(con_gpu, PlatformType::Cpu);
     test_field_gpu = con_gpu->GetDeformationField();
 
     // Compute the difference between the computed and inputted deformation field
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index b625175a..f58556a7 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -89,9 +89,9 @@ void check_matching_difference(int dim,
    }
 }
 
-void test(AladinContent *con, int platformCode) {
+void test(AladinContent *con, int platformType) {
 
-   Platform *platform = new Platform(platformCode);
+   Platform *platform = new Platform(platformType);
 
    Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
@@ -104,27 +104,27 @@ int main(int argc, char **argv)
 {
 
    if (argc != 4) {
-      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <platformCode>\n", argv[0]);
+      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <platformType>\n", argv[0]);
       return EXIT_FAILURE;
    }
 
    char *inputRefImageName = argv[1];
    char *inputWarpedImageName = argv[2];
-   int   platformCode = atoi(argv[3]);
+   PlatformType platformType{atoi(argv[3])};
 #ifndef _USE_CUDA
-   if(platformCode == NR_PLATFORM_CUDA){
+   if(platformType == PlatformType::Cuda){
       reg_print_msg_error("NiftyReg has not been compiled with CUDA");
       return EXIT_FAILURE;
    }
 #endif
 #ifndef _USE_OPENCL
-   if(platformCode == NR_PLATFORM_CL){
+   if(platformType == PlatformType::OpenCl){
       reg_print_msg_error("NiftyReg has not been compiled with OpenCL");
       return EXIT_FAILURE;
    }
 #endif
 
-   if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){
+   if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
       reg_print_msg_error("Unexpected platform code");
       return EXIT_FAILURE;
    }
@@ -156,7 +156,7 @@ int main(int argc, char **argv)
    AladinContent *con_cpu = nullptr;
    con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    con_cpu->SetWarped(warpedImage);
-   test(con_cpu, NR_PLATFORM_CPU);
+   test(con_cpu, PlatformType::Cpu);
    blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
@@ -168,17 +168,17 @@ int main(int argc, char **argv)
    AladinContent *con_gpu = nullptr;
    _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr;
 #ifdef _USE_CUDA
-   if (platformCode == NR_PLATFORM_CUDA) {
+   if (platformType == PlatformType::Cuda) {
       con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
 #ifdef _USE_OPENCL
-   if (platformCode == NR_PLATFORM_CL) {
+   if (platformType == PlatformType::OpenCl) {
       con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
    }
 #endif
    con_gpu->SetWarped(warpedImage);
-   test(con_gpu, platformCode);
+   test(con_gpu, platformType);
    blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 75cd9c23..9b03bc8c 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -18,27 +18,27 @@ int main(int argc, char **argv)
 {
     if(argc!=5)
     {
-        fprintf(stderr, "Usage: %s <refImage> <inputDefField> <order> <platformCode>\n", argv[0]);
+        fprintf(stderr, "Usage: %s <refImage> <inputDefField> <order> <platformType>\n", argv[0]);
         return EXIT_FAILURE;
     }
 
     char *inputRefImageName=argv[1];
     char *inputDefImageName=argv[2];
     int interpolation=atoi(argv[3]);
-    int platformCode = atoi(argv[4]);
+    PlatformType platformType{atoi(argv[4])};
 #ifndef _USE_CUDA
-   if(platformCode == NR_PLATFORM_CUDA){
+   if(platformType == PlatformType::Cuda){
       reg_print_msg_error("NiftyReg has not been compiled with CUDA");
       return EXIT_FAILURE;
    }
 #endif
 #ifndef _USE_OPENCL
-   if(platformCode == NR_PLATFORM_CL){
+   if(platformType == PlatformType::OpenCl){
       reg_print_msg_error("NiftyReg has not been compiled with OpenCL");
       return EXIT_FAILURE;
    }
 #endif
-   if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){
+   if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
       reg_print_msg_error("Unexpected platform code");
       return EXIT_FAILURE;
    }
@@ -80,7 +80,7 @@ int main(int argc, char **argv)
     con_cpu->SetWarped(cpu_warped);
     con_cpu->SetDeformationField(inputDeformationField);
     con_cpu->SetReferenceMask(tempMask);
-    Platform *platform_cpu = new Platform(NR_PLATFORM_CPU);
+    Platform *platform_cpu = new Platform(PlatformType::Cpu);
     Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu);
     resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
@@ -91,12 +91,12 @@ int main(int argc, char **argv)
     // GPU platform
     AladinContent *con_gpu = nullptr;
 #ifdef _USE_CUDA
-    if (platformCode == NR_PLATFORM_CUDA) {
+    if (platformType == PlatformType::Cuda) {
         con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     }
 #endif
 #ifdef _USE_OPENCL
-    if (platformCode == NR_PLATFORM_CL) {
+    if (platformType == PlatformType::OpenCl) {
         con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
     }
 #endif
@@ -105,12 +105,12 @@ int main(int argc, char **argv)
     con_gpu->SetReferenceMask(tempMask);
     Platform *platform_gpu = nullptr;
 #ifdef _USE_CUDA
-    if (platformCode == NR_PLATFORM_CUDA)
-       platform_gpu = new Platform(NR_PLATFORM_CUDA);
+    if (platformType == PlatformType::Cuda)
+       platform_gpu = new Platform(PlatformType::Cuda);
 #endif
 #ifdef _USE_OPENCL
-    if (platformCode == NR_PLATFORM_CL) {
-       platform_gpu = new Platform(NR_PLATFORM_CL);
+    if (platformType == PlatformType::OpenCl) {
+       platform_gpu = new Platform(PlatformType::OpenCl);
     }
 #endif
     Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu);
diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp
index 2e4609fe..d3424b26 100644
--- a/reg-test/reg_test_fullAffine.cpp
+++ b/reg-test/reg_test_fullAffine.cpp
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
    reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->SetPlatformCode(NR_PLATFORM_CPU);
+   affine->SetPlatformType(PlatformType::Cpu);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
 
diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp
index f4360541..af19c7c8 100755
--- a/reg-test/reg_test_fullAffine_cl.cpp
+++ b/reg-test/reg_test_fullAffine_cl.cpp
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
    reg_aladin<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->SetPlatformCode(NR_PLATFORM_CL);
+   affine->SetPlatformType(PlatformType::OpenCl);
    affine->SetClIdx(1);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp
index 65e874fd..ffe5e942 100755
--- a/reg-test/reg_test_fullAffine_cuda.cpp
+++ b/reg-test/reg_test_fullAffine_cuda.cpp
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
    reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
    affine->SetInputReference(referenceImage);
    affine->SetInputFloating(floatingImage);
-   affine->SetPlatformCode(NR_PLATFORM_CUDA);
+   affine->SetPlatformType(PlatformType::Cuda);
    affine->Run();
    mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
 
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 69998293..dcfed114 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -29,7 +29,7 @@
 
 
 typedef std::tuple<std::string, nifti_image*, nifti_image*, float*> test_data;
-typedef std::tuple<AladinContent*, std::string, int> content_desc;
+typedef std::tuple<AladinContent*, std::string, PlatformType> content_desc;
 
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
@@ -116,24 +116,24 @@ TEST_CASE("Resampling", "[resampling]") {
         listContent.push_back(content_desc(
             new AladinContent(reference, reference),
             "CPU",
-            NR_PLATFORM_CPU));
+            PlatformType::Cpu));
 #ifdef _USE_CUDA
         listContent.push_back(content_desc(
             new CudaAladinContent(reference, reference),
             "CUDA",
-            NR_PLATFORM_CUDA));
+            PlatformType::Cuda));
 #endif
 #ifdef _USE_OPENCL
         // listContent.push_back(content_desc(
         //     new ClAladinContent(reference, reference),
         //     "OpenCL",
-        //     NR_PLATFORM_CL));
+        //     PlatformType::OpenCl));
 #endif
         // Loop over all possibles contents for each test
         for (auto&& content : listContent) {
             AladinContent *con;
             std::string desc;
-            int plat_value;
+            PlatformType plat_value;
             std::tie(con, desc, plat_value) = content;
 
             SECTION(test_name + " " + desc) {
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index b98e39de..adb263c7 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -33,9 +33,9 @@ int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max
    }
    return EXIT_SUCCESS;
 }
-void test(AladinContent *con, int platformCode, bool isAffine) {
+void test(AladinContent *con, PlatformType platformType, bool isAffine) {
 
-   Platform *platform = new Platform(platformCode);
+   Platform *platform = new Platform(platformType);
 
    Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con);
    optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
@@ -48,7 +48,7 @@ int main(int argc, char **argv)
 {
 
    if (argc != 7) {
-      fprintf(stderr, "Usage: %s <inputPoints1> <inputPoints2> <percentToKeep> <isAffine> <expectedLTSMatrix> <platformCode> \n", argv[0]);
+      fprintf(stderr, "Usage: %s <inputPoints1> <inputPoints2> <percentToKeep> <isAffine> <expectedLTSMatrix> <platformType> \n", argv[0]);
       return EXIT_FAILURE;
    }
 
@@ -57,7 +57,7 @@ int main(int argc, char **argv)
    unsigned int percentToKeep = atoi(argv[3]);
    bool isAffine = atoi(argv[4]);
    char *expectedLTSMatrixFilename = argv[5];
-   int platformCode = atoi(argv[6]);
+   PlatformType platformType{atoi(argv[6])};
 
    std::pair<size_t, size_t> inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename);
    size_t m1 = inputMatrix1Size.first;
@@ -77,16 +77,16 @@ int main(int argc, char **argv)
    ////////////////////////
    // Platforms
    AladinContent *con = nullptr;
-   if (platformCode == NR_PLATFORM_CPU) {
+   if (platformType == PlatformType::Cpu) {
       con = new AladinContent();
    }
 #ifdef _USE_CUDA
-   else if (platformCode == NR_PLATFORM_CUDA) {
+   else if (platformType == PlatformType::Cuda) {
       con = new CudaAladinContent();
    }
 #endif
 #ifdef _USE_OPENCL
-   else if (platformCode == NR_PLATFORM_CL) {
+   else if (platformType == PlatformType::OpenCl) {
       con = new ClAladinContent();
    }
 #endif
@@ -152,7 +152,7 @@ int main(int argc, char **argv)
    }
 
    con->SetBlockMatchingParams(blockMatchingParams);
-   test(con, platformCode, isAffine);
+   test(con, platformType, isAffine);
 
 #ifndef NDEBUG
    if (n1 == 2)
diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp
index 10c85404..009b3db7 100644
--- a/reg-test/reg_test_svd_cuda.cpp
+++ b/reg-test/reg_test_svd_cuda.cpp
@@ -66,7 +66,7 @@ int main(int argc, char **argv)
     char *expectedUMatrixFilename = argv[2];
     char *expectedSMatrixFilename = argv[3];
     char *expectedVMatrixFilename = argv[4];
-    int platformCode = atoi(argv[5]);
+    PlatformType platformType{atoi(argv[5])};
 
     std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename);
     size_t m = inputMatrixSize.first;
@@ -106,7 +106,7 @@ int main(int argc, char **argv)
         double *test_SVect = (double*)malloc(min_size*sizeof(double));
         //SVD
 #ifdef _USE_CUDA
-        if(platformCode != 1) {
+        if(platformType != PlatformType::Cuda) {
 #endif
             //svd<float>(inputSVDMatrix, m, n, test_SVect, test_VMatrix);
             //U

From 253736bda5cda0b9b32121f7e7a80bc3dc985290 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 16 Jan 2023 11:09:31 +0000
Subject: [PATCH 033/314] Use Platform to handle different measure types

---
 niftyreg_build_version.txt        |   2 +-
 reg-lib/CMakeLists.txt            |   6 +-
 reg-lib/Measure.cpp               |  41 +++++++
 reg-lib/Measure.h                 |  12 ++
 reg-lib/MeasureFactory.h          |   8 ++
 reg-lib/Platform.cpp              |   7 ++
 reg-lib/Platform.h                |   8 +-
 reg-lib/_reg_base.cpp             | 111 +++++------------
 reg-lib/_reg_base.h               |   3 +
 reg-lib/cpu/_reg_dti.cpp          |   4 +-
 reg-lib/cpu/_reg_dti.h            |  22 ++--
 reg-lib/cpu/_reg_kld.cpp          |   4 +-
 reg-lib/cpu/_reg_kld.h            |  44 +++----
 reg-lib/cpu/_reg_lncc.cpp         |   4 +-
 reg-lib/cpu/_reg_lncc.h           |  22 ++--
 reg-lib/cpu/_reg_measure.h        | 169 +++++++++++++-------------
 reg-lib/cpu/_reg_mind.cpp         |   4 +-
 reg-lib/cpu/_reg_mind.h           |   8 +-
 reg-lib/cpu/_reg_nmi.cpp          |   4 +-
 reg-lib/cpu/_reg_nmi.h            |  24 ++--
 reg-lib/cpu/_reg_ssd.h            |  23 ++--
 reg-lib/cuda/CMakeLists.txt       |   7 +-
 reg-lib/cuda/CudaMeasure.cpp      |  49 ++++++++
 reg-lib/cuda/CudaMeasure.h        |   9 ++
 reg-lib/cuda/CudaMeasureFactory.h |   8 ++
 reg-lib/cuda/_reg_measure_gpu.h   | 192 +++++++++++++++++-------------
 reg-lib/cuda/_reg_nmi_gpu.cu      |   1 +
 reg-lib/cuda/_reg_nmi_gpu.h       |  94 ++++++++-------
 reg-lib/cuda/_reg_ssd_gpu.h       |  29 ++---
 29 files changed, 515 insertions(+), 404 deletions(-)
 create mode 100644 reg-lib/Measure.cpp
 create mode 100644 reg-lib/Measure.h
 create mode 100644 reg-lib/MeasureFactory.h
 create mode 100644 reg-lib/cuda/CudaMeasure.cpp
 create mode 100644 reg-lib/cuda/CudaMeasure.h
 create mode 100644 reg-lib/cuda/CudaMeasureFactory.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fba7ed52..878d5a02 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-143
+146
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 0e0ec358..8a8f80ff 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -145,7 +145,10 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   F3dContent.h
   Platform.cpp
   Platform.h
+  Measure.cpp
+  Measure.h
 )
+target_link_libraries(_reg_compute _reg_measure)
 install(TARGETS _reg_compute
   RUNTIME DESTINATION lib
   LIBRARY DESTINATION lib
@@ -157,7 +160,8 @@ install(FILES
   AladinContent.h
   Content.h
   F3dContent.h
-  Platform.h DESTINATION include
+  Platform.h
+  Measure.h DESTINATION include
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute")
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
new file mode 100644
index 00000000..6e4419a7
--- /dev/null
+++ b/reg-lib/Measure.cpp
@@ -0,0 +1,41 @@
+#include "Measure.h"
+#include "_reg_nmi.h"
+#include "_reg_ssd.h"
+#include "_reg_dti.h"
+#include "_reg_lncc.h"
+#include "_reg_kld.h"
+#include "_reg_mind.h"
+
+/* *************************************************************** */
+reg_measure* Measure::Create(const MeasureType& measureType) {
+    switch (measureType) {
+    case MeasureType::Nmi:
+        return new reg_nmi();
+    case MeasureType::Ssd:
+        return new reg_ssd();
+    case MeasureType::Dti:
+        return new reg_dti();
+    case MeasureType::Lncc:
+        return new reg_lncc();
+    case MeasureType::Kld:
+        return new reg_kld();
+    case MeasureType::Mind:
+        return new reg_mind();
+    case MeasureType::Mindssc:
+        return new reg_mindssc();
+    }
+    reg_print_msg_error("Unsupported measure type");
+    reg_exit();
+    return nullptr;
+}
+/* *************************************************************** */
+void Measure::Initialise(reg_measure& measure, F3dContent& con) {
+    measure.InitialiseMeasure(con.GetReference(),
+                              con.GetFloating(),
+                              con.GetReferenceMask(),
+                              con.GetWarped(),
+                              con.GetWarpedGradient(),
+                              con.GetVoxelBasedMeasureGradient(),
+                              con.GetLocalWeightSim());
+}
+/* *************************************************************** */
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
new file mode 100644
index 00000000..afa593b3
--- /dev/null
+++ b/reg-lib/Measure.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "F3dContent.h"
+#include "_reg_measure.h"
+
+enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc };
+
+class Measure {
+public:
+    virtual reg_measure* Create(const MeasureType& measureType);
+    virtual void Initialise(reg_measure& measure, F3dContent& con);
+};
diff --git a/reg-lib/MeasureFactory.h b/reg-lib/MeasureFactory.h
new file mode 100644
index 00000000..f256794e
--- /dev/null
+++ b/reg-lib/MeasureFactory.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "Measure.h"
+
+class MeasureFactory {
+public:
+    virtual Measure* Produce() { return new Measure(); }
+};
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 74865e27..abe57f5c 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -5,6 +5,7 @@
 #include "CudaF3dContent.h"
 #include "CudaComputeFactory.h"
 #include "CudaContextSingleton.h"
+#include "CudaMeasureFactory.h"
 #include "_reg_optimiser_gpu.h"
 #endif
 #ifdef _USE_OPENCL
@@ -19,12 +20,14 @@ Platform::Platform(const PlatformType& platformTypeIn) {
     if (platformType == PlatformType::Cpu) {
         kernelFactory = new CpuKernelFactory();
         computeFactory = new ComputeFactory();
+        measureFactory = new MeasureFactory();
         platformName = "cpu_platform";
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         kernelFactory = new CudaKernelFactory();
         computeFactory = new CudaComputeFactory();
+        measureFactory = new CudaMeasureFactory();
         platformName = "cuda_platform";
     }
 #endif
@@ -86,6 +89,10 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent *con,
 template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
 template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
 /* *************************************************************** */
+Measure* Platform::CreateMeasure() {
+    return measureFactory->Produce();
+}
+/* *************************************************************** */
 std::string Platform::GetName() {
     return platformName;
 }
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 9d030bca..478dd2db 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -3,6 +3,7 @@
 #include "F3dContent.h"
 #include "KernelFactory.h"
 #include "ComputeFactory.h"
+#include "MeasureFactory.h"
 #include "_reg_optimiser.h"
 
 enum class PlatformType { Cpu, Cuda, OpenCl };
@@ -22,17 +23,18 @@ class Platform {
                                          bool optimiseX,
                                          bool optimiseY,
                                          bool optimiseZ);
+    Measure* CreateMeasure();
 
     std::string GetName();
-
     PlatformType GetPlatformType();
     //void SetPlatformType(const PlatformType& platformTypeIn);
     void SetGpuIdx(unsigned gpuIdxIn);
     unsigned GetGpuIdx();
 
 private:
-    KernelFactory *kernelFactory;
-    ComputeFactory *computeFactory;
+    KernelFactory *kernelFactory = nullptr;
+    ComputeFactory *computeFactory = nullptr;
+    MeasureFactory *measureFactory = nullptr;
     std::string platformName;
     PlatformType platformType;
     unsigned gpuIdx;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 3a55e3c9..e5e290f9 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -11,7 +11,6 @@
  */
 
 #include "_reg_base.h"
-#include "F3dContent.h" // TODO Temporary fix! Remove this line!
 
 /* *************************************************************** */
 /* *************************************************************** */
@@ -629,15 +628,9 @@ void reg_base<T>::CheckParameters() {
         levelToPerform = levelNumber;
 
     // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-    if (measure_nmi == nullptr &&
-        measure_ssd == nullptr &&
-        measure_dti == nullptr &&
-        measure_lncc == nullptr &&
-        measure_lncc == nullptr &&
-        measure_kld == nullptr &&
-        measure_mind == nullptr &&
-        measure_mindssc == nullptr) {
-        measure_nmi = new reg_nmi;
+    if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc &&
+        !measure_kld && !measure_mind && !measure_mindssc) {
+        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
         for (int i = 0; i < inputReference->nt; ++i)
             measure_nmi->SetTimepointWeight(i, 1.0);
     }
@@ -765,70 +758,29 @@ void reg_base<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::InitialiseSimilarity() {
-    // TODO Update this section to handle CUDA
     // TODO Move this function to reg_f3d
-    if (measure_nmi != nullptr)
-        measure_nmi->InitialiseMeasure(con->GetReference(),
-                                       con->GetFloating(),
-                                       con->GetReferenceMask(),
-                                       con->GetWarped(),
-                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    F3dContent& con = *dynamic_cast<F3dContent*>(this->con);
 
-    if (measure_ssd != nullptr)
-        measure_ssd->InitialiseMeasure(con->GetReference(),
-                                       con->GetFloating(),
-                                       con->GetReferenceMask(),
-                                       con->GetWarped(),
-                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_nmi)
+        measure->Initialise(*measure_nmi, con);
 
-    if (measure_kld != nullptr)
-        measure_kld->InitialiseMeasure(con->GetReference(),
-                                       con->GetFloating(),
-                                       con->GetReferenceMask(),
-                                       con->GetWarped(),
-                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_ssd)
+        measure->Initialise(*measure_ssd, con);
 
-    if (measure_lncc != nullptr)
-        measure_lncc->InitialiseMeasure(con->GetReference(),
-                                        con->GetFloating(),
-                                        con->GetReferenceMask(),
-                                        con->GetWarped(),
-                                        dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                        dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                        dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_kld)
+        measure->Initialise(*measure_kld, con);
 
-    if (measure_dti != nullptr)
-        measure_dti->InitialiseMeasure(con->GetReference(),
-                                       con->GetFloating(),
-                                       con->GetReferenceMask(),
-                                       con->GetWarped(),
-                                       dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                       dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_lncc)
+        measure->Initialise(*measure_lncc, con);
 
-    if (measure_mind != nullptr)
-        measure_mind->InitialiseMeasure(con->GetReference(),
-                                        con->GetFloating(),
-                                        con->GetReferenceMask(),
-                                        con->GetWarped(),
-                                        dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                        dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                        dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_dti)
+        measure->Initialise(*measure_dti, con);
 
-    if (measure_mindssc != nullptr)
-        measure_mindssc->InitialiseMeasure(con->GetReference(),
-                                           con->GetFloating(),
-                                           con->GetReferenceMask(),
-                                           con->GetWarped(),
-                                           dynamic_cast<F3dContent*>(con)->GetWarpedGradient(),
-                                           dynamic_cast<F3dContent*>(con)->GetVoxelBasedMeasureGradient(),
-                                           dynamic_cast<F3dContent*>(con)->GetLocalWeightSim());
+    if (measure_mind)
+        measure->Initialise(*measure_mind, con);
+
+    if (measure_mindssc)
+        measure->Initialise(*measure_mindssc, con);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::InitialiseSimilarity");
@@ -839,10 +791,11 @@ template<class T>
 void reg_base<T>::Initialise() {
     if (initialised) return;
 
-    CheckParameters();
-
     platform = new Platform(platformType);
     platform->SetGpuIdx(gpuIdx);
+    measure = platform->CreateMeasure();
+
+    CheckParameters();
 
     // CREATE THE PYRAMID IMAGES
     if (usePyramid) {
@@ -1051,7 +1004,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::ApproximateParzenWindow()
 //{
 //    if(measure_nmi==nullptr)
-//        measure_nmi=new reg_nmi;
+//        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
 //    measure_nmi=approxParzenWindow = true;
 //}
 ///* *************************************************************** */
@@ -1059,7 +1012,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
 //    if(measure_nmi==nullptr)
-//        measure_nmi=new reg_nmi;
+//        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
 //    measure_nmi=approxParzenWindow = false;
 //}
 /* *************************************************************** */
@@ -1067,7 +1020,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
 template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
     if (measure_nmi == nullptr)
-        measure_nmi = new reg_nmi;
+        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -1080,7 +1033,7 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
 template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
     if (measure_nmi == nullptr)
-        measure_nmi = new reg_nmi;
+        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -1093,7 +1046,7 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
 template<class T>
 void reg_base<T>::UseSSD(int timepoint, bool normalise) {
     if (measure_ssd == nullptr)
-        measure_ssd = new reg_ssd();
+        measure_ssd = dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd));
     measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
 #ifndef NDEBUG
@@ -1104,7 +1057,7 @@ void reg_base<T>::UseSSD(int timepoint, bool normalise) {
 template<class T>
 void reg_base<T>::UseMIND(int timepoint, int offset) {
     if (measure_mind == nullptr)
-        measure_mind = new reg_mind;
+        measure_mind = dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind));
     measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mind->SetDescriptorOffset(offset);
 #ifndef NDEBUG
@@ -1115,7 +1068,7 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
 template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
     if (measure_mindssc == nullptr)
-        measure_mindssc = new reg_mindssc;
+        measure_mindssc = dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::Mindssc));
     measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
 #ifndef NDEBUG
@@ -1126,7 +1079,7 @@ void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
 template<class T>
 void reg_base<T>::UseKLDivergence(int timepoint) {
     if (measure_kld == nullptr)
-        measure_kld = new reg_kld;
+        measure_kld = dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld));
     measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseKLDivergence");
@@ -1136,7 +1089,7 @@ void reg_base<T>::UseKLDivergence(int timepoint) {
 template<class T>
 void reg_base<T>::UseLNCC(int timepoint, float stddev) {
     if (measure_lncc == nullptr)
-        measure_lncc = new reg_lncc;
+        measure_lncc = dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc));
     measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
     measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
 #ifndef NDEBUG
@@ -1163,7 +1116,7 @@ void reg_base<T>::UseDTI(bool *timepoint) {
     reg_exit();
 
     if (measure_dti == nullptr)
-        measure_dti = new reg_dti;
+        measure_dti = dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti));
     for (int i = 0; i < inputReference->nt; ++i) {
         if (timepoint[i])
             measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 7a945bc8..4f966a4c 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -45,6 +45,9 @@ class reg_base: public InterfaceOptimiser {
     // Compute
     Compute *compute = nullptr;
 
+    // Measure
+    Measure *measure = nullptr;
+
     // Optimiser related variables
     reg_optimiser<T> *optimiser;
     size_t maxIterationNumber;
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 6db7716e..b70656a0 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -30,7 +30,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
                                 nifti_image *warFloImgPtr,
                                 nifti_image *warFloGraPtr,
                                 nifti_image *forVoxBasedGraPtr,
-                                nifti_image *forwardLocalWeightPtr,
+                                nifti_image *localWeightSimPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
@@ -43,7 +43,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
                                   warFloImgPtr,
                                   warFloGraPtr,
                                   forVoxBasedGraPtr,
-                                  forwardLocalWeightPtr,
+                                  localWeightSimPtr,
                                   maskFloPtr,
                                   warRefImgPtr,
                                   warRefGraPtr,
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index f2dcce22..1c0ed6ff 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -27,17 +27,17 @@ class reg_dti: public reg_measure {
     virtual ~reg_dti() {}
 
     /// @brief Initialise the reg_dti object
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *forwardLocalWeightPtr = nullptr,
-                           int *maskFloPtr = nullptr,
-                           nifti_image *warRefImgPtr = nullptr,
-                           nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr);
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
     /// @brief Returns the value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based gradient for DTI images
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 3adc497b..26359d52 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -27,7 +27,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
                                 nifti_image *warFloImgPtr,
                                 nifti_image *warFloGraPtr,
                                 nifti_image *forVoxBasedGraPtr,
-                                nifti_image *forwardLocalWeightPtr,
+                                nifti_image *localWeightSimPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
@@ -39,7 +39,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
                                    warFloImgPtr,
                                    warFloGraPtr,
                                    forVoxBasedGraPtr,
-                                   forwardLocalWeightPtr,
+                                   localWeightSimPtr,
                                    maskFloPtr,
                                    warRefImgPtr,
                                    warRefGraPtr,
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index 22f34a21..ca5a553f 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -14,30 +14,30 @@
 
 #include "_reg_measure.h"
 
- /* *************************************************************** */
+/* *************************************************************** */
 class reg_kld: public reg_measure {
 public:
-   /// @brief reg_kld class constructor
-   reg_kld();
-   /// @brief reg_kld class destructor
-   virtual ~reg_kld() {}
+    /// @brief reg_kld class constructor
+    reg_kld();
+    /// @brief reg_kld class destructor
+    virtual ~reg_kld() {}
 
-   /// @brief Initialise the reg_kld object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *forwardLocalWeightPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr);
-   /// @brief Returns the kld value
-   virtual double GetSimilarityMeasureValue() override;
-   /// @brief Compute the voxel based kld gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    /// @brief Initialise the reg_kld object
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    /// @brief Returns the kld value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based kld gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 };
 /* *************************************************************** */
 
@@ -66,7 +66,7 @@ double reg_getKLDivergence(nifti_image *reference,
  * @param warped Second input image to use to compute the metric
  * @param activeTimePoint Specified which time point volumes have to be considered
  * @param warpedGradient Spatial gradient of the input result image
- * @param KLdivGradient Output image htat will be updated with the
+ * @param KLdivGradient Output image that will be updated with the
  * value of the KLD gradient
  * @param jacobianDeterminantImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index cbdd88c1..8c80eb8c 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -144,7 +144,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
                                  nifti_image *warFloImgPtr,
                                  nifti_image *warFloGraPtr,
                                  nifti_image *forVoxBasedGraPtr,
-                                 nifti_image *forwardLocalWeightPtr,
+                                 nifti_image *localWeightSimPtr,
                                  int *maskFloPtr,
                                  nifti_image *warRefImgPtr,
                                  nifti_image *warRefGraPtr,
@@ -155,7 +155,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
                                    warFloImgPtr,
                                    warFloGraPtr,
                                    forVoxBasedGraPtr,
-                                   forwardLocalWeightPtr,
+                                   localWeightSimPtr,
                                    maskFloPtr,
                                    warRefImgPtr,
                                    warRefGraPtr,
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index ed286ca5..e9cd0146 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -24,17 +24,17 @@ class reg_lncc: public reg_measure {
     virtual ~reg_lncc();
 
     /// @brief Initialise the reg_lncc object
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *forwardLocalWeightPtr = nullptr,
-                           int *maskFloPtr = nullptr,
-                           nifti_image *warRefImgPtr = nullptr,
-                           nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr);
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
     /// @brief Returns the lncc value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based lncc gradient
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 0282b157..dbe7a87d 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -1,7 +1,7 @@
 /** @file _reg_measure.h
  * @author Marc Modat
  * @date 25/06/2013
- * @brief Contains a measure class to embbed all measures of similarity classes
+ * @brief Contains a measure class to embed all measures of similarity classes
  * Also contains an interface class between reg_base and the measure class
  */
 
@@ -13,100 +13,93 @@
 /// @brief Class common to all measure of similarity classes
 class reg_measure {
 public:
-   /// @brief Set the pointers to be ussed by the measure object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          nifti_image *localWeightSimPtr = nullptr,
-                          int *maskFloPtr = nullptr,
-                          nifti_image *warRefImgPtr = nullptr,
-                          nifti_image *warRefGraPtr = nullptr,
-                          nifti_image *bckVoxBasedGraPtr = nullptr) {
-      this->isSymmetric = false;
-      this->referenceImagePointer = refImgPtr;
-      this->referenceTimePoint = this->referenceImagePointer->nt;
-      this->floatingImagePointer = floImgPtr;
-      this->referenceMaskPointer = maskRefPtr;
-      this->warpedFloatingImagePointer = warFloImgPtr;
-      this->warpedFloatingGradientImagePointer = warFloGraPtr;
-      this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr;
-      this->forwardLocalWeightSimImagePointer = localWeightSimPtr;
-      if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) {
-         this->isSymmetric = true;
-         this->floatingMaskPointer = maskFloPtr;
-         this->warpedReferenceImagePointer = warRefImgPtr;
-         this->warpedReferenceGradientImagePointer = warRefGraPtr;
-         this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr;
-      } else {
-         this->floatingMaskPointer = nullptr;
-         this->warpedReferenceImagePointer = nullptr;
-         this->warpedReferenceGradientImagePointer = nullptr;
-         this->backwardVoxelBasedGradientImagePointer = nullptr;
-      }
+    /// @brief Measure class constructor
+    reg_measure() {
 #ifndef NDEBUG
-      printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
+        printf("[NiftyReg DEBUG] reg_measure constructor called\n");
 #endif
-   }
+    }
+    /// @brief Measure class destructor
+    virtual ~reg_measure() {}
 
-   /// @brief Returns the registration measure of similarity value
-   virtual double GetSimilarityMeasureValue() = 0;
-
-   /// @brief Compute the voxel based measure of similarity gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
-      if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) {
-         reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-         reg_exit();
-      }
-   }
-
-   /// @brief Here
-   virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {}
-
-   virtual void SetTimepointWeight(int timepoint, double weight) {
-      this->timePointWeight[timepoint] = weight;
-   }
-
-   virtual double* GetTimepointsWeights(void) {
-      return this->timePointWeight;
-   }
+    /// @brief Set the pointers to be used by the measure object
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) {
+        this->isSymmetric = false;
+        this->referenceImagePointer = refImgPtr;
+        this->referenceTimePoint = this->referenceImagePointer->nt;
+        this->floatingImagePointer = floImgPtr;
+        this->referenceMaskPointer = maskRefPtr;
+        this->warpedFloatingImagePointer = warFloImgPtr;
+        this->warpedFloatingGradientImagePointer = warFloGraPtr;
+        this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr;
+        this->forwardLocalWeightSimImagePointer = localWeightSimPtr;
+        if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) {
+            this->isSymmetric = true;
+            this->floatingMaskPointer = maskFloPtr;
+            this->warpedReferenceImagePointer = warRefImgPtr;
+            this->warpedReferenceGradientImagePointer = warRefGraPtr;
+            this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr;
+        } else {
+            this->floatingMaskPointer = nullptr;
+            this->warpedReferenceImagePointer = nullptr;
+            this->warpedReferenceGradientImagePointer = nullptr;
+            this->backwardVoxelBasedGradientImagePointer = nullptr;
+        }
+#ifndef NDEBUG
+        printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
+#endif
+    }
 
-   virtual nifti_image* GetReferenceImage(void) {
-      return this->referenceImagePointer;
-   }
+    /// @brief Returns the registration measure of similarity value
+    virtual double GetSimilarityMeasureValue() = 0;
 
-   virtual int* GetReferenceMask(void) {
-      return this->referenceMaskPointer;
-   }
+    /// @brief Compute the voxel based measure of similarity gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+        if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) {
+            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
+            reg_exit();
+        }
+    }
+    virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {}
+    virtual void SetTimepointWeight(int timepoint, double weight) {
+        this->timePointWeight[timepoint] = weight;
+    }
+    virtual double* GetTimepointsWeights(void) {
+        return this->timePointWeight;
+    }
+    virtual nifti_image* GetReferenceImage(void) {
+        return this->referenceImagePointer;
+    }
+    virtual int* GetReferenceMask(void) {
+        return this->referenceMaskPointer;
+    }
 
 protected:
-   nifti_image *referenceImagePointer;
-   int *referenceMaskPointer;
-   nifti_image *warpedFloatingImagePointer;
-   nifti_image *warpedFloatingGradientImagePointer;
-   nifti_image *forwardVoxelBasedGradientImagePointer;
-   nifti_image *forwardLocalWeightSimImagePointer;
-
-   bool isSymmetric;
-   nifti_image *floatingImagePointer;
-   int *floatingMaskPointer;
-   nifti_image *warpedReferenceImagePointer;
-   nifti_image *warpedReferenceGradientImagePointer;
-   nifti_image *backwardVoxelBasedGradientImagePointer;
-
-   double timePointWeight[255] = {0};
-   int referenceTimePoint;
+    nifti_image *referenceImagePointer;
+    int *referenceMaskPointer;
+    nifti_image *warpedFloatingImagePointer;
+    nifti_image *warpedFloatingGradientImagePointer;
+    nifti_image *forwardVoxelBasedGradientImagePointer;
+    nifti_image *forwardLocalWeightSimImagePointer;
 
-   /// @brief Measure class constructor
-   reg_measure() {
-#ifndef NDEBUG
-      printf("[NiftyReg DEBUG] reg_measure constructor called\n");
-#endif
-   }
+    bool isSymmetric;
+    nifti_image *floatingImagePointer;
+    int *floatingMaskPointer;
+    nifti_image *warpedReferenceImagePointer;
+    nifti_image *warpedReferenceGradientImagePointer;
+    nifti_image *backwardVoxelBasedGradientImagePointer;
 
-   /// @brief Measure class destructor
-   virtual ~reg_measure() {}
+    double timePointWeight[255] = {0};
+    int referenceTimePoint;
 };
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index f5feaec0..7522eb98 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -400,7 +400,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
                                  nifti_image *warFloImgPtr,
                                  nifti_image *warFloGraPtr,
                                  nifti_image *forVoxBasedGraPtr,
-                                 nifti_image *forwardLocalWeightPtr,
+                                 nifti_image *localWeightSimPtr,
                                  int *maskFloPtr,
                                  nifti_image *warRefImgPtr,
                                  nifti_image *warRefGraPtr,
@@ -412,7 +412,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
                                warFloImgPtr,
                                warFloGraPtr,
                                forVoxBasedGraPtr,
-                               forwardLocalWeightPtr,
+                               localWeightSimPtr,
                                maskFloPtr,
                                warRefImgPtr,
                                warRefGraPtr,
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 8c1c7d7f..771cfd45 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -30,23 +30,21 @@ class reg_mind: public reg_ssd {
     virtual ~reg_mind();
 
     /// @brief Initialise the reg_mind object
-    void InitialiseMeasure(nifti_image *refImgPtr,
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
                            nifti_image *floImgPtr,
                            int *maskRefPtr,
                            nifti_image *warFloImgPtr,
                            nifti_image *warFloGraPtr,
                            nifti_image *forVoxBasedGraPtr,
-                           nifti_image *forwardLocalWeightPtr = nullptr,
+                           nifti_image *localWeightSimPtr = nullptr,
                            int *maskFloPtr = nullptr,
                            nifti_image *warRefImgPtr = nullptr,
                            nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr);
-
+                           nifti_image *bckVoxBasedGraPtr = nullptr) override;
     /// @brief Returns the mind based measure of similarity value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
-
     virtual void SetDescriptorOffset(int);
     virtual int GetDescriptorOffset();
 
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 5c8979a7..dfecd74e 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -110,7 +110,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
                                 nifti_image *warFloImgPtr,
                                 nifti_image *warFloGraPtr,
                                 nifti_image *forVoxBasedGraPtr,
-                                nifti_image *forwardLocalWeightPtr,
+                                nifti_image *localWeightSimPtr,
                                 int *maskFloPtr,
                                 nifti_image *warRefImgPtr,
                                 nifti_image *warRefGraPtr,
@@ -122,7 +122,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
                                    warFloImgPtr,
                                    warFloGraPtr,
                                    forVoxBasedGraPtr,
-                                   forwardLocalWeightPtr,
+                                   localWeightSimPtr,
                                    maskFloPtr,
                                    warRefImgPtr,
                                    warRefGraPtr,
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index e58b58e7..d1199822 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -28,21 +28,19 @@ class reg_nmi: public reg_measure {
     /// @brief reg_nmi class destructor
     virtual ~reg_nmi();
 
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *forwardLocalWeightPtr = nullptr,
-                           int *maskFloPtr = nullptr,
-                           nifti_image *warRefImgPtr = nullptr,
-                           nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr);
-
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override;
-
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index e415dece..c2d248bc 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -27,18 +27,17 @@ class reg_ssd: public reg_measure {
     virtual ~reg_ssd() {}
 
     /// @brief Initialise the reg_ssd object
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *localWeightSimPtr,
-                           int *maskFloPtr = nullptr,
-                           nifti_image *warRefImgPtr = nullptr,
-                           nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr);
-
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr = nullptr,
+                                   int *maskFloPtr = nullptr,
+                                   nifti_image *warRefImgPtr = nullptr,
+                                   nifti_image *warRefGraPtr = nullptr,
+                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
     /// @brief Define if the specified time point should be normalised
     void SetNormaliseTimepoint(int timepoint, bool normalise);
     /// @brief Returns the ssd value
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 03c1515a..8d63ab53 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -74,12 +74,13 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
+    CudaAladinContent.cpp
     CudaCompute.cpp
     CudaContent.cpp
-    CudaF3dContent.cpp
     CudaContextSingleton.cpp
-    CudaAladinContent.cpp
+    CudaF3dContent.cpp
     CudaKernelFactory.cpp
+    CudaMeasure.cpp
     affineDeformationKernel.cu
     blockMatchingKernel.cu
     resampleKernel.cu
@@ -103,7 +104,7 @@ install(TARGETS ${NAME}
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
 )
-install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
+install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaMeasure.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
 install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
new file mode 100644
index 00000000..9ae5d7d2
--- /dev/null
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -0,0 +1,49 @@
+#include "CudaMeasure.h"
+#include "CudaF3dContent.h"
+#include "_reg_nmi_gpu.h"
+#include "_reg_ssd_gpu.h"
+
+/* *************************************************************** */
+reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
+    switch (measureType) {
+    case MeasureType::Nmi:
+        return new reg_nmi_gpu();
+    case MeasureType::Ssd:
+        return new reg_ssd_gpu();
+    case MeasureType::Dti:
+        return new reg_dti_gpu();
+    case MeasureType::Lncc:
+        return new reg_lncc_gpu();
+    case MeasureType::Kld:
+        return new reg_kld_gpu();
+    case MeasureType::Mind:
+        reg_print_msg_error("MIND measure type isn't implemented for GPU");
+        reg_exit();
+    case MeasureType::Mindssc:
+        reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU");
+        reg_exit();
+    }
+    reg_print_msg_error("Unsupported measure type");
+    reg_exit();
+    return nullptr;
+}
+/* *************************************************************** */
+void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) {
+    reg_measure_gpu *measureGpu = dynamic_cast<reg_measure_gpu*>(&measure);
+    CudaF3dContent *cudaCon = dynamic_cast<CudaF3dContent*>(&con);
+    measureGpu->InitialiseMeasure(cudaCon->Content::GetReference(),
+                                  cudaCon->Content::GetFloating(),
+                                  cudaCon->Content::GetReferenceMask(),
+                                  cudaCon->Content::GetReference()->nvox,
+                                  cudaCon->Content::GetWarped(),
+                                  cudaCon->F3dContent::GetWarpedGradient(),
+                                  cudaCon->F3dContent::GetVoxelBasedMeasureGradient(),
+                                  cudaCon->F3dContent::GetLocalWeightSim(),
+                                  cudaCon->GetReferenceCuda()[0],
+                                  cudaCon->GetFloatingCuda()[0],
+                                  cudaCon->GetReferenceMaskCuda(),
+                                  cudaCon->GetWarpedCuda()[0],
+                                  cudaCon->GetWarpedGradientCuda()[0],
+                                  cudaCon->GetVoxelBasedMeasureGradientCuda());
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
new file mode 100644
index 00000000..c9c7f510
--- /dev/null
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "Measure.h"
+
+class CudaMeasure: public Measure {
+public:
+    virtual reg_measure* Create(const MeasureType& measureType) override;
+    virtual void Initialise(reg_measure& measure, F3dContent& con) override;
+};
diff --git a/reg-lib/cuda/CudaMeasureFactory.h b/reg-lib/cuda/CudaMeasureFactory.h
new file mode 100644
index 00000000..2f597e43
--- /dev/null
+++ b/reg-lib/cuda/CudaMeasureFactory.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "CudaMeasure.h"
+
+class CudaMeasureFactory: public MeasureFactory {
+public:
+    virtual Measure* Produce() override { return new CudaMeasure(); }
+};
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 70bfb4c2..fc82d88f 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -1,7 +1,7 @@
 /** @file _reg_measure_gpu.h
  * @author Marc Modat
  * @date 25/06/2013
- * @brief Contains a measure class to embbed all gpu measures of similarity classes
+ * @brief Contains a measure class to embed all gpu measures of similarity classes
  * Also contains an interface class between reg_base and the measure class
  */
 
@@ -16,106 +16,128 @@
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief Class that contains the GPU device pointers
 class reg_measure_gpu {
-protected:
-   /// @brief Measure class constructor
-   reg_measure_gpu() {}
-   /// @brief Measure class destructor
-   virtual ~reg_measure_gpu() {}
+public:
+    /// @brief Measure class constructor
+    reg_measure_gpu() {}
+    /// @brief Measure class destructor
+    virtual ~reg_measure_gpu() {}
 
-   cudaArray *referenceDevicePointer;
-   cudaArray *floatingDevicePointer;
-   int *referenceMaskDevicePointer;
-   int activeVoxelNumber;
-   float *warpedFloatingDevicePointer;
-   float4 *warpedFloatingGradientDevicePointer;
-   float4 *forwardVoxelBasedGradientDevicePointer;
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) = 0;
+
+protected:
+    cudaArray *referenceDevicePointer;
+    cudaArray *floatingDevicePointer;
+    int *referenceMaskDevicePointer;
+    int activeVoxelNumber;
+    float *warpedFloatingDevicePointer;
+    float4 *warpedFloatingGradientDevicePointer;
+    float4 *forwardVoxelBasedGradientDevicePointer;
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
 public:
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          int activeVoxNum,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          cudaArray *refDevicePtr,
-                          cudaArray *floDevicePtr,
-                          int *refMskDevicePtr,
-                          float *warFloDevicePtr,
-                          float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr) {}
-   /// @brief reg_lncc class constructor
-   reg_lncc_gpu() {
-      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
-      reg_exit();
-   }
-   /// @brief reg_lncc class destructor
-   virtual ~reg_lncc_gpu() {}
-   /// @brief Returns the lncc value
-   virtual double GetSimilarityMeasureValue() override { return 0; }
-   /// @brief Compute the voxel based lncc gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    /// @brief reg_lncc class constructor
+    reg_lncc_gpu() {
+        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
+        reg_exit();
+    }
+    /// @brief reg_lncc class destructor
+    virtual ~reg_lncc_gpu() {}
+
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) override {}
+    /// @brief Returns the lncc value
+    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Compute the voxel based lncc gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
 public:
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          int activeVoxNum,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          cudaArray *refDevicePtr,
-                          cudaArray *floDevicePtr,
-                          int *refMskDevicePtr,
-                          float *warFloDevicePtr,
-                          float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr) {}
-   /// @brief reg_kld_gpu class constructor
-   reg_kld_gpu() {
-      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
-      reg_exit();
-   }
-   /// @brief reg_kld_gpu class destructor
-   virtual ~reg_kld_gpu() {}
-   /// @brief Returns the kld value
-   virtual double GetSimilarityMeasureValue() override { return 0; }
-   /// @brief Compute the voxel based kld gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    /// @brief reg_kld_gpu class constructor
+    reg_kld_gpu() {
+        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
+        reg_exit();
+    }
+    /// @brief reg_kld_gpu class destructor
+    virtual ~reg_kld_gpu() {}
+
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) override {}
+    /// @brief Returns the kld value
+    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Compute the voxel based kld gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
 public:
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          int activeVoxNum,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          cudaArray *refDevicePtr,
-                          cudaArray *floDevicePtr,
-                          int *refMskDevicePtr,
-                          float *warFloDevicePtr,
-                          float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr) {}
-   /// @brief reg_dti_gpu class constructor
-   reg_dti_gpu() {
-      fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
-      reg_exit();
-   }
-   /// @brief reg_dti_gpu class destructor
-   virtual ~reg_dti_gpu() {}
-   /// @brief Returns the dti value
-   virtual double GetSimilarityMeasureValue() override { return 0; }
-   /// @brief Compute the voxel based dti gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    /// @brief reg_dti_gpu class constructor
+    reg_dti_gpu() {
+        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
+        reg_exit();
+    }
+    /// @brief reg_dti_gpu class destructor
+    virtual ~reg_dti_gpu() {}
+
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) override {}
+    /// @brief Returns the dti value
+    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Compute the voxel based dti gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 41960409..4d1e430e 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -51,6 +51,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     nifti_image *warFloImgPtr,
                                     nifti_image *warFloGraPtr,
                                     nifti_image *forVoxBasedGraPtr,
+                                    nifti_image *localWeightSimPtr,
                                     cudaArray *refDevicePtr,
                                     cudaArray *floDevicePtr,
                                     int *refMskDevicePtr,
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index c8e1c198..77b78ebd 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -21,61 +21,63 @@
 /// @brief NMI measure of similarity class - GPU based
 class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 public:
-   /// @brief reg_nmi class constructor
-   reg_nmi_gpu();
-   /// @brief reg_nmi class destructor
-   virtual ~reg_nmi_gpu();
+    /// @brief reg_nmi class constructor
+    reg_nmi_gpu();
+    /// @brief reg_nmi class destructor
+    virtual ~reg_nmi_gpu();
 
-   /// @brief Initialise the reg_nmi_gpu object
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          int activeVoxNum,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          cudaArray *refDevicePtr,
-                          cudaArray *floDevicePtr,
-                          int *refMskDevicePtr,
-                          float *warFloDevicePtr,
-                          float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr);
-   /// @brief Returns the nmi value
-   virtual double GetSimilarityMeasureValue() override;
-   /// @brief Compute the voxel based nmi gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    /// @brief Initialise the reg_nmi_gpu object
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) override;
+    /// @brief Returns the nmi value
+    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Compute the voxel based nmi gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 
 protected:
-   float *forwardJointHistogramLog_device;
-	// float **backwardJointHistogramLog_device;
-   void DeallocateHistogram();
+    float *forwardJointHistogramLog_device;
+    // float **backwardJointHistogramLog_device;
+    void DeallocateHistogram();
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
-   void InitialiseMeasure(nifti_image *refImgPtr,
-                          nifti_image *floImgPtr,
-                          int *maskRefPtr,
-                          int activeVoxNum,
-                          nifti_image *warFloImgPtr,
-                          nifti_image *warFloGraPtr,
-                          nifti_image *forVoxBasedGraPtr,
-                          cudaArray *refDevicePtr,
-                          cudaArray *floDevicePtr,
-                          int *refMskDevicePtr,
-                          float *warFloDevicePtr,
-                          float4 *warFloGradDevicePtr,
-                          float4 *forVoxBasedGraDevicePtr) {}
-   /// @brief reg_nmi class constructor
-   reg_multichannel_nmi_gpu() {}
-   /// @brief reg_nmi class destructor
-   virtual ~reg_multichannel_nmi_gpu() {}
-   /// @brief Returns the nmi value
-   virtual double GetSimilarityMeasureValue() override { return 0; }
-   /// @brief Compute the voxel based nmi gradient
-   virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    void InitialiseMeasure(nifti_image *refImgPtr,
+                           nifti_image *floImgPtr,
+                           int *maskRefPtr,
+                           int activeVoxNum,
+                           nifti_image *warFloImgPtr,
+                           nifti_image *warFloGraPtr,
+                           nifti_image *forVoxBasedGraPtr,
+                           nifti_image *localWeightSimPtr,
+                           cudaArray *refDevicePtr,
+                           cudaArray *floDevicePtr,
+                           int *refMskDevicePtr,
+                           float *warFloDevicePtr,
+                           float4 *warFloGradDevicePtr,
+                           float4 *forVoxBasedGraDevicePtr) {}
+    /// @brief reg_nmi class constructor
+    reg_multichannel_nmi_gpu() {}
+    /// @brief reg_nmi class destructor
+    virtual ~reg_multichannel_nmi_gpu() {}
+    /// @brief Returns the nmi value
+    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Compute the voxel based nmi gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index f7b7f96b..2f55dd21 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -25,21 +25,22 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     reg_ssd_gpu();
     /// @brief Measure class destructor
     virtual ~reg_ssd_gpu() {}
+
     /// @brief Initialise the reg_ssd object
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           int activeVoxNum,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *localWeightSimPtr,
-                           cudaArray *refDevicePtr,
-                           cudaArray *floDevicePtr,
-                           int *refMskDevicePtr,
-                           float *warFloDevicePtr,
-                           float4 *warFloGradDevicePtr,
-                           float4 *forVoxBasedGraDevicePtr);
+    virtual void InitialiseMeasure(nifti_image *refImgPtr,
+                                   nifti_image *floImgPtr,
+                                   int *maskRefPtr,
+                                   int activeVoxNum,
+                                   nifti_image *warFloImgPtr,
+                                   nifti_image *warFloGraPtr,
+                                   nifti_image *forVoxBasedGraPtr,
+                                   nifti_image *localWeightSimPtr,
+                                   cudaArray *refDevicePtr,
+                                   cudaArray *floDevicePtr,
+                                   int *refMskDevicePtr,
+                                   float *warFloDevicePtr,
+                                   float4 *warFloGradDevicePtr,
+                                   float4 *forVoxBasedGraDevicePtr) override;
     /// @brief Returns the ssd value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based ssd gradient

From a9b79c4a0e8d3ed4d0b0d38118781fa0d0e8d2cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 16 Jan 2023 15:09:55 +0000
Subject: [PATCH 034/314] Convert pointers to references

---
 niftyreg_build_version.txt        |  2 +-
 reg-lib/Compute.cpp               | 92 ++++++++++++++---------------
 reg-lib/Compute.h                 |  4 +-
 reg-lib/ComputeFactory.h          |  2 +-
 reg-lib/Platform.cpp              | 22 +++----
 reg-lib/Platform.h                |  8 +--
 reg-lib/_reg_f3d.cpp              |  6 +-
 reg-lib/cl/ClCompute.h            |  2 +-
 reg-lib/cl/ClComputeFactory.h     |  2 +-
 reg-lib/cuda/CudaCompute.cpp      | 98 +++++++++++++++----------------
 reg-lib/cuda/CudaCompute.h        |  2 +-
 reg-lib/cuda/CudaComputeFactory.h |  2 +-
 12 files changed, 120 insertions(+), 122 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 878d5a02..0d667b5e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-146
+148
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 381956f3..982ba18b 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -6,73 +6,73 @@
 
 /* *************************************************************** */
 void Compute::ResampleImage(int inter, float paddingValue) {
-    reg_resampleImage(con->GetFloating(),
-                      con->GetWarped(),
-                      con->GetDeformationField(),
-                      con->GetReferenceMask(),
+    reg_resampleImage(con.GetFloating(),
+                      con.GetWarped(),
+                      con.GetDeformationField(),
+                      con.GetReferenceMask(),
                       inter,
                       paddingValue);
 }
 /* *************************************************************** */
 double Compute::GetJacobianPenaltyTerm(bool approx) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    return reg_spline_getJacobianPenaltyTerm(con->GetControlPointGrid(),
-                                             con->GetReference(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    return reg_spline_getJacobianPenaltyTerm(con.GetControlPointGrid(),
+                                             con.GetReference(),
                                              approx);
 }
 /* *************************************************************** */
 void Compute::JacobianPenaltyTermGradient(float weight, bool approx) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_spline_getJacobianPenaltyTermGradient(con->GetControlPointGrid(),
-                                              con->GetReference(),
-                                              con->GetTransformationGradient(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_getJacobianPenaltyTermGradient(con.GetControlPointGrid(),
+                                              con.GetReference(),
+                                              con.GetTransformationGradient(),
                                               weight,
                                               approx);
 }
 /* *************************************************************** */
 double Compute::CorrectFolding(bool approx) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    return reg_spline_correctFolding(con->GetControlPointGrid(),
-                                     con->GetReference(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    return reg_spline_correctFolding(con.GetControlPointGrid(),
+                                     con.GetReference(),
                                      approx);
 }
 /* *************************************************************** */
 double Compute::ApproxBendingEnergy() {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    return reg_spline_approxBendingEnergy(con->GetControlPointGrid());
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    return reg_spline_approxBendingEnergy(con.GetControlPointGrid());
 }
 /* *************************************************************** */
 void Compute::ApproxBendingEnergyGradient(float weight) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_spline_approxBendingEnergyGradient(con->GetControlPointGrid(),
-                                           con->GetTransformationGradient(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_approxBendingEnergyGradient(con.GetControlPointGrid(),
+                                           con.GetTransformationGradient(),
                                            weight);
 }
 /* *************************************************************** */
 double Compute::ApproxLinearEnergy() {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    return reg_spline_approxLinearEnergy(con->GetControlPointGrid());
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    return reg_spline_approxLinearEnergy(con.GetControlPointGrid());
 }
 /* *************************************************************** */
 void Compute::ApproxLinearEnergyGradient(float weight) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_spline_approxLinearEnergyGradient(con->GetControlPointGrid(),
-                                          con->GetTransformationGradient(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(),
+                                          con.GetTransformationGradient(),
                                           weight);
 }
 /* *************************************************************** */
 double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    return reg_spline_getLandmarkDistance(con->GetControlPointGrid(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    return reg_spline_getLandmarkDistance(con.GetControlPointGrid(),
                                           landmarkNumber,
                                           landmarkReference,
                                           landmarkFloating);
 }
 /* *************************************************************** */
 void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_spline_getLandmarkDistanceGradient(con->GetControlPointGrid(),
-                                           con->GetTransformationGradient(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(),
+                                           con.GetTransformationGradient(),
                                            landmarkNumber,
                                            landmarkReference,
                                            landmarkFloating,
@@ -80,16 +80,16 @@ void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkRef
 }
 /* *************************************************************** */
 void Compute::GetDeformationField(bool composition, bool bspline) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_spline_getDeformationField(con->GetControlPointGrid(),
-                                   con->GetDeformationField(),
-                                   con->GetReferenceMask(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_getDeformationField(con.GetControlPointGrid(),
+                                   con.GetDeformationField(),
+                                   con.GetReferenceMask(),
                                    composition,
                                    bspline);
 }
 /* *************************************************************** */
 void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    nifti_image *controlPointGrid = dynamic_cast<F3dContent*>(con)->GetControlPointGrid();
+    nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).GetControlPointGrid();
     if (optimiseX && optimiseY && optimiseZ) {
         // Update the values for all axis displacement
         for (size_t i = 0; i < controlPointGrid->nvox; ++i)
@@ -121,21 +121,21 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa
 }
 /* *************************************************************** */
 void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    reg_getImageGradient(con->GetFloating(),
-                         con->GetWarpedGradient(),
-                         con->GetDeformationField(),
-                         con->GetReferenceMask(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_getImageGradient(con.GetFloating(),
+                         con.GetWarpedGradient(),
+                         con.GetDeformationField(),
+                         con.GetReferenceMask(),
                          interpolation,
                          paddingValue,
                          activeTimepoint);
 }
 /* *************************************************************** */
 void Compute::VoxelCentricToNodeCentric(float weight) {
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    mat44 *reorientation = Content::GetIJKMatrix(*con->GetFloating());
-    reg_voxelCentric2NodeCentric(con->GetTransformationGradient(),
-                                 con->GetVoxelBasedMeasureGradient(),
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating());
+    reg_voxelCentric2NodeCentric(con.GetTransformationGradient(),
+                                 con.GetVoxelBasedMeasureGradient(),
                                  weight,
                                  false, // no update
                                  reorientation);
@@ -143,21 +143,19 @@ void Compute::VoxelCentricToNodeCentric(float weight) {
 /* *************************************************************** */
 double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ
-    nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
+    nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     switch (transformationGradient->datatype) {
     case NIFTI_TYPE_FLOAT32:
         return reg_getMaximalLength<float>(transformationGradient);
-        break;
     case NIFTI_TYPE_FLOAT64:
         return reg_getMaximalLength<double>(transformationGradient);
-        break;
     }
     return 0;
 }
 /* *************************************************************** */
 void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
-    nifti_image *transformationGradient = dynamic_cast<F3dContent*>(con)->GetTransformationGradient();
+    nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 22f99c89..caedc34b 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -5,7 +5,7 @@
 class Compute {
 public:
     Compute() = delete;
-    Compute(Content *conIn): con(conIn) {}
+    Compute(Content& conIn): con(conIn) {}
     virtual ~Compute() {}
 
     virtual void ResampleImage(int inter, float paddingValue);
@@ -26,5 +26,5 @@ class Compute {
     virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength);
 
 protected:
-    Content *con;
+    Content& con;
 };
diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h
index e2c2de1e..d2f4e0fd 100644
--- a/reg-lib/ComputeFactory.h
+++ b/reg-lib/ComputeFactory.h
@@ -4,6 +4,6 @@
 
 class ComputeFactory {
 public:
-    virtual Compute* Produce(Content *con) { return new Compute(con); }
     virtual ~ComputeFactory() {}
+    virtual Compute* Produce(Content& con) { return new Compute(con); }
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index abe57f5c..6a77db4c 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -40,7 +40,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
 #endif
 }
 /* *************************************************************** */
-Compute* Platform::CreateCompute(Content *con) const {
+Compute* Platform::CreateCompute(Content& con) const {
     return computeFactory->Produce(con);
 }
 /* *************************************************************** */
@@ -49,27 +49,27 @@ Kernel* Platform::CreateKernel(const std::string& name, Content *con) const {
 }
 /* *************************************************************** */
 template<typename Type>
-reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent *con,
-                                               InterfaceOptimiser *opt,
+reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
+                                               InterfaceOptimiser& opt,
                                                size_t maxIterationNumber,
                                                bool useConjGradient,
                                                bool optimiseX,
                                                bool optimiseY,
-                                               bool optimiseZ) {
+                                               bool optimiseZ) const {
     reg_optimiser<Type> *optimiser;
-    nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid();
+    nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
     Type *controlPointGridData, *transformationGradientData;
 
     if (platformType == PlatformType::Cpu) {
         optimiser = useConjGradient ? new reg_conjugateGradient<Type>() : new reg_optimiser<Type>();
         controlPointGridData = (Type*)controlPointGrid->data;
-        transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data;
+        transformationGradientData = (Type*)con.F3dContent::GetTransformationGradient()->data;
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
-        controlPointGridData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetControlPointGridCuda();
-        transformationGradientData = (Type*)dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda();
+        controlPointGridData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda();
+        transformationGradientData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda();
     }
 #endif
 
@@ -80,14 +80,14 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent *con,
                           optimiseZ,
                           maxIterationNumber,
                           0, // currentIterationNumber,
-                          opt,
+                          &opt,
                           controlPointGridData,
                           transformationGradientData);
 
     return optimiser;
 }
-template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
-template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool);
+template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
+template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
 /* *************************************************************** */
 Measure* Platform::CreateMeasure() {
     return measureFactory->Produce();
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 478dd2db..a51e9202 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -13,16 +13,16 @@ class Platform {
     Platform(const PlatformType& platformTypeIn);
     virtual ~Platform();
 
-    Compute* CreateCompute(Content *con) const;
+    Compute* CreateCompute(Content& con) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
     template<typename Type>
-    reg_optimiser<Type>* CreateOptimiser(F3dContent *con,
-                                         InterfaceOptimiser *opt,
+    reg_optimiser<Type>* CreateOptimiser(F3dContent& con,
+                                         InterfaceOptimiser& opt,
                                          size_t maxIterationNumber,
                                          bool useConjGradient,
                                          bool optimiseX,
                                          bool optimiseY,
-                                         bool optimiseZ);
+                                         bool optimiseZ) const;
     Measure* CreateMeasure();
 
     std::string GetName();
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 3a03502f..e386a856 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -381,7 +381,7 @@ void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int
     else if (this->platformType == PlatformType::Cuda)
         this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
 #endif
-    this->compute = this->platform->CreateCompute(this->con);
+    this->compute = this->platform->CreateCompute(*this->con);
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -717,8 +717,8 @@ void reg_f3d<T>::UpdateParameters(float scale) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetOptimiser() {
-    this->optimiser = this->platform->template CreateOptimiser<T>(dynamic_cast<F3dContent*>(this->con),
-                                                                  this,
+    this->optimiser = this->platform->template CreateOptimiser<T>(*dynamic_cast<F3dContent*>(this->con),
+                                                                  *this,
                                                                   this->maxIterationNumber,
                                                                   this->useConjGradient,
                                                                   this->optimiseX,
diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h
index b93d3b04..9c8dc009 100644
--- a/reg-lib/cl/ClCompute.h
+++ b/reg-lib/cl/ClCompute.h
@@ -4,7 +4,7 @@
 
 class ClCompute: public Compute {
 public:
-    ClCompute(Content *con): Compute(con) {}
+    ClCompute(Content& con): Compute(con) {}
 
     virtual void ResampleImage(int inter, float paddingValue) override;
 };
diff --git a/reg-lib/cl/ClComputeFactory.h b/reg-lib/cl/ClComputeFactory.h
index 7a2fd18d..7673704a 100644
--- a/reg-lib/cl/ClComputeFactory.h
+++ b/reg-lib/cl/ClComputeFactory.h
@@ -5,5 +5,5 @@
 
 class ClComputeFactory: public ComputeFactory {
 public:
-    virtual Compute* Produce(Content *con) override { return new ClCompute(con); }
+    virtual Compute* Produce(Content& con) override { return new ClCompute(con); }
 };
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 69c053b8..694e1586 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -6,52 +6,52 @@
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int inter, float paddingValue) {
-    CudaContent *con = dynamic_cast<CudaContent*>(this->con);
-    reg_resampleImage_gpu(con->Content::GetFloating(),
-                          con->GetWarpedCuda()[0],
-                          con->GetFloatingCuda()[0],
-                          con->GetDeformationFieldCuda(),
-                          con->GetReferenceMaskCuda(),
-                          con->Content::GetReference()->nvox,
+    CudaContent& con = dynamic_cast<CudaContent&>(this->con);
+    reg_resampleImage_gpu(con.Content::GetFloating(),
+                          con.GetWarpedCuda()[0],
+                          con.GetFloatingCuda()[0],
+                          con.GetDeformationFieldCuda(),
+                          con.GetReferenceMaskCuda(),
+                          con.Content::GetReference()->nvox,
                           paddingValue);
 }
 /* *************************************************************** */
 double CudaCompute::GetJacobianPenaltyTerm(bool approx) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    return reg_spline_getJacobianPenaltyTerm_gpu(con->F3dContent::GetReference(),
-                                                 con->F3dContent::GetControlPointGrid(),
-                                                 con->GetControlPointGridCuda(),
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    return reg_spline_getJacobianPenaltyTerm_gpu(con.F3dContent::GetReference(),
+                                                 con.F3dContent::GetControlPointGrid(),
+                                                 con.GetControlPointGridCuda(),
                                                  approx);
 }
 /* *************************************************************** */
 void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    reg_spline_getJacobianPenaltyTermGradient_gpu(con->F3dContent::GetReference(),
-                                                  con->F3dContent::GetControlPointGrid(),
-                                                  con->GetControlPointGridCuda(),
-                                                  con->GetTransformationGradientCuda(),
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_spline_getJacobianPenaltyTermGradient_gpu(con.F3dContent::GetReference(),
+                                                  con.F3dContent::GetControlPointGrid(),
+                                                  con.GetControlPointGridCuda(),
+                                                  con.GetTransformationGradientCuda(),
                                                   weight,
                                                   approx);
 }
 /* *************************************************************** */
 double CudaCompute::CorrectFolding(bool approx) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    return reg_spline_correctFolding_gpu(con->F3dContent::GetReference(),
-                                         con->F3dContent::GetControlPointGrid(),
-                                         con->GetControlPointGridCuda(),
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    return reg_spline_correctFolding_gpu(con.F3dContent::GetReference(),
+                                         con.F3dContent::GetControlPointGrid(),
+                                         con.GetControlPointGridCuda(),
                                          approx);
 }
 /* *************************************************************** */
 double CudaCompute::ApproxBendingEnergy() {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    return reg_spline_approxBendingEnergy_gpu(con->F3dContent::GetControlPointGrid(), con->GetControlPointGridCuda());
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    return reg_spline_approxBendingEnergy_gpu(con.F3dContent::GetControlPointGrid(), con.GetControlPointGridCuda());
 }
 /* *************************************************************** */
 void CudaCompute::ApproxBendingEnergyGradient(float weight) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    reg_spline_approxBendingEnergyGradient_gpu(con->F3dContent::GetControlPointGrid(),
-                                               con->GetControlPointGridCuda(),
-                                               con->GetTransformationGradientCuda(),
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_spline_approxBendingEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(),
+                                               con.GetControlPointGridCuda(),
+                                               con.GetTransformationGradientCuda(),
                                                weight);
 }
 /* *************************************************************** */
@@ -66,7 +66,7 @@ void CudaCompute::ApproxLinearEnergyGradient(float weight) {
     // Use CPU temporarily
     Compute::ApproxLinearEnergyGradient(weight);
     // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent*>(con)->UpdateTransformationGradient();
+    dynamic_cast<CudaF3dContent&>(con).UpdateTransformationGradient();
 }
 /* *************************************************************** */
 double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
@@ -80,23 +80,23 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
     // Use CPU temporarily
     Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight);
     // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent*>(con)->UpdateTransformationGradient();
+    dynamic_cast<CudaF3dContent&>(con).UpdateTransformationGradient();
 }
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    reg_spline_getDeformationField_gpu(con->F3dContent::GetControlPointGrid(),
-                                       con->F3dContent::GetReference(),
-                                       con->GetControlPointGridCuda(),
-                                       con->GetDeformationFieldCuda(),
-                                       con->GetReferenceMaskCuda(),
-                                       con->F3dContent::GetReference()->nvox,
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(),
+                                       con.F3dContent::GetReference(),
+                                       con.GetControlPointGridCuda(),
+                                       con.GetDeformationFieldCuda(),
+                                       con.GetReferenceMaskCuda(),
+                                       con.F3dContent::GetReference()->nvox,
                                        bspline);
 }
 /* *************************************************************** */
 void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ
-    reg_updateControlPointPosition_gpu(dynamic_cast<CudaF3dContent*>(con)->F3dContent::GetControlPointGrid(),
+    reg_updateControlPointPosition_gpu(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(),
                                        reinterpret_cast<float4*>(currentDOF),
                                        reinterpret_cast<float4*>(bestDOF),
                                        reinterpret_cast<float4*>(gradient),
@@ -104,31 +104,31 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF,
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    reg_getImageGradient_gpu(con->F3dContent::GetFloating(),
-                             con->GetFloatingCuda()[0],
-                             con->GetDeformationFieldCuda(),
-                             con->GetWarpedGradientCuda()[0],
-                             con->F3dContent::GetReference()->nvox,
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_getImageGradient_gpu(con.F3dContent::GetFloating(),
+                             con.GetFloatingCuda()[0],
+                             con.GetDeformationFieldCuda(),
+                             con.GetWarpedGradientCuda()[0],
+                             con.F3dContent::GetReference()->nvox,
                              paddingValue);
 }
 /* *************************************************************** */
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
-    CudaF3dContent *con = dynamic_cast<CudaF3dContent*>(this->con);
-    reg_voxelCentric2NodeCentric_gpu(con->F3dContent::GetWarped(),
-                                     con->F3dContent::GetControlPointGrid(),
-                                     con->GetVoxelBasedMeasureGradientCuda(),
-                                     con->GetTransformationGradientCuda(),
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
+                                     con.F3dContent::GetControlPointGrid(),
+                                     con.GetVoxelBasedMeasureGradientCuda(),
+                                     con.GetTransformationGradientCuda(),
                                      weight);
 }
 /* *************************************************************** */
 double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
-    return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda(), nodeNumber);
+    return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), nodeNumber);
 }
 /* *************************************************************** */
 void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
-    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent*>(con)->GetTransformationGradientCuda(), 1 / (float)maxGradLength);
+    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 5cf53720..49a22181 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -4,7 +4,7 @@
 
 class CudaCompute: public Compute {
 public:
-    CudaCompute(Content *con): Compute(con) {}
+    CudaCompute(Content& con): Compute(con) {}
 
     virtual void ResampleImage(int inter, float paddingValue) override;
     virtual double GetJacobianPenaltyTerm(bool approx) override;
diff --git a/reg-lib/cuda/CudaComputeFactory.h b/reg-lib/cuda/CudaComputeFactory.h
index d14fd425..8a5f4084 100644
--- a/reg-lib/cuda/CudaComputeFactory.h
+++ b/reg-lib/cuda/CudaComputeFactory.h
@@ -5,5 +5,5 @@
 
 class CudaComputeFactory: public ComputeFactory {
 public:
-    virtual Compute* Produce(Content *con) override { return new CudaCompute(con); }
+    virtual Compute* Produce(Content& con) override { return new CudaCompute(con); }
 };

From ccc0266a14f4f405cf5595fbadb5f96944fe0648 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 17 Jan 2023 13:39:01 +0000
Subject: [PATCH 035/314] Refactorisations

---
 niftyreg_build_version.txt            |    2 +-
 reg-apps/reg_f3d.cpp                  |    2 +-
 reg-lib/Compute.cpp                   |    6 +-
 reg-lib/ComputeFactory.h              |    1 -
 reg-lib/F3dContent.h                  |    2 +-
 reg-lib/Platform.cpp                  |   12 +-
 reg-lib/Platform.h                    |    9 +-
 reg-lib/_reg_base.cpp                 |  345 +--
 reg-lib/_reg_f3d.cpp                  |  132 +-
 reg-lib/_reg_f3d.h                    |    4 -
 reg-lib/cpu/_reg_localTrans_regul.cpp | 4117 ++++++++++++-------------
 reg-lib/cpu/_reg_localTrans_regul.h   |   36 +-
 reg-lib/cuda/CudaF3dContent.h         |    2 +-
 reg-lib/cuda/_reg_f3d_gpu.cpp         | 1059 -------
 reg-lib/cuda/_reg_f3d_gpu.h           |   98 -
 15 files changed, 2078 insertions(+), 3749 deletions(-)
 delete mode 100755 reg-lib/cuda/_reg_f3d_gpu.cpp
 delete mode 100755 reg-lib/cuda/_reg_f3d_gpu.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0d667b5e..15c44e93 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-148
+149
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 722e0c7f..69da6cc2 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -84,7 +84,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
     reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
     reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)");
-    reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as");
+    reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as");
     reg_print_info(exec, "\t\t\t\t<refX> <refY> <refZ> <floX> <floY> <floZ>\\n for 3D images and");
     reg_print_info(exec, "\t\t\t\t<refX> <refY> <floX> <floY>\\n for 2D images");
     reg_print_info(exec, "");
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 982ba18b..02938046 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -56,14 +56,14 @@ double Compute::ApproxLinearEnergy() {
 /* *************************************************************** */
 void Compute::ApproxLinearEnergyGradient(float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(),
+    reg_spline_approxLinearEnergyGradient(con.F3dContent::GetControlPointGrid(),
                                           con.GetTransformationGradient(),
                                           weight);
 }
 /* *************************************************************** */
 double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    return reg_spline_getLandmarkDistance(con.GetControlPointGrid(),
+    return reg_spline_getLandmarkDistance(con.F3dContent::GetControlPointGrid(),
                                           landmarkNumber,
                                           landmarkReference,
                                           landmarkFloating);
@@ -71,7 +71,7 @@ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkRefere
 /* *************************************************************** */
 void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(),
+    reg_spline_getLandmarkDistanceGradient(con.F3dContent::GetControlPointGrid(),
                                            con.GetTransformationGradient(),
                                            landmarkNumber,
                                            landmarkReference,
diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h
index d2f4e0fd..426e1d1f 100644
--- a/reg-lib/ComputeFactory.h
+++ b/reg-lib/ComputeFactory.h
@@ -4,6 +4,5 @@
 
 class ComputeFactory {
 public:
-    virtual ~ComputeFactory() {}
     virtual Compute* Produce(Content& con) { return new Compute(con); }
 };
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
index 0df0f4d8..5c6b65d9 100644
--- a/reg-lib/F3dContent.h
+++ b/reg-lib/F3dContent.h
@@ -8,7 +8,7 @@ class F3dContent: public virtual Content {
     F3dContent(nifti_image *referenceIn,
                nifti_image *floatingIn,
                nifti_image *controlPointGridIn,
-               nifti_image *localWeightSimIn,
+               nifti_image *localWeightSimIn = nullptr,
                int *referenceMaskIn = nullptr,
                mat44 *transformationMatrixIn = nullptr,
                size_t bytesIn = sizeof(float));
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 6a77db4c..9acd9681 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -89,15 +89,15 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
 template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
 template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
 /* *************************************************************** */
-Measure* Platform::CreateMeasure() {
+Measure* Platform::CreateMeasure() const {
     return measureFactory->Produce();
 }
 /* *************************************************************** */
-std::string Platform::GetName() {
+std::string Platform::GetName() const {
     return platformName;
 }
 /* *************************************************************** */
-unsigned Platform::GetGpuIdx() {
+unsigned int Platform::GetGpuIdx() const {
     return gpuIdx;
 }
 /* *************************************************************** */
@@ -135,14 +135,10 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
 #endif
 }
 /* *************************************************************** */
-PlatformType Platform::GetPlatformType() {
+PlatformType Platform::GetPlatformType() const {
     return platformType;
 }
 /* *************************************************************** */
-//void Platform::SetPlatformType(const PlatformType& platformTypeIn) {
-//    platformType = platformTypeIn;
-//}
-/* *************************************************************** */
 Platform::~Platform() {
     delete kernelFactory;
     delete computeFactory;
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index a51e9202..76b650ab 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -23,13 +23,12 @@ class Platform {
                                          bool optimiseX,
                                          bool optimiseY,
                                          bool optimiseZ) const;
-    Measure* CreateMeasure();
+    Measure* CreateMeasure() const;
 
-    std::string GetName();
-    PlatformType GetPlatformType();
-    //void SetPlatformType(const PlatformType& platformTypeIn);
+    std::string GetName() const;
+    PlatformType GetPlatformType() const;
     void SetGpuIdx(unsigned gpuIdxIn);
-    unsigned GetGpuIdx();
+    unsigned int GetGpuIdx() const;
 
 private:
     KernelFactory *kernelFactory = nullptr;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index e5e290f9..0c3f5235 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -37,7 +37,6 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     measure_mind = nullptr;
     measure_mindssc = nullptr;
     localWeightSimInput = nullptr;
-    // localWeightSimCurrent = nullptr;
 
     similarityWeight = 0; // automatically set depending of the penalty term weights
 
@@ -77,13 +76,6 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     floatingPyramid = nullptr;
     maskPyramid = nullptr;
     activeVoxelNumber = nullptr;
-    // reference = nullptr;
-    // floating = nullptr;
-    // currentMask = nullptr;
-    // warped = nullptr;
-    // deformationFieldImage = nullptr;
-    // warpedGradient = nullptr;
-    // voxelBasedMeasureGradient = nullptr;
 
     interpolation = 1;
 
@@ -99,20 +91,16 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
 /* *************************************************************** */
 template<class T>
 reg_base<T>::~reg_base() {
-    // DeallocateWarped();
-    // DeallocateWarpedGradient();
-    // DeallocateDeformationField();
-    // DeallocateVoxelBasedMeasureGradient();
-    if (referencePyramid != nullptr) {
+    if (referencePyramid) {
         if (usePyramid) {
             for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (referencePyramid[i] != nullptr) {
+                if (referencePyramid[i]) {
                     nifti_image_free(referencePyramid[i]);
                     referencePyramid[i] = nullptr;
                 }
             }
         } else {
-            if (referencePyramid[0] != nullptr) {
+            if (referencePyramid[0]) {
                 nifti_image_free(referencePyramid[0]);
                 referencePyramid[0] = nullptr;
             }
@@ -120,16 +108,16 @@ reg_base<T>::~reg_base() {
         free(referencePyramid);
         referencePyramid = nullptr;
     }
-    if (maskPyramid != nullptr) {
+    if (maskPyramid) {
         if (usePyramid) {
             for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (maskPyramid[i] != nullptr) {
+                if (maskPyramid[i]) {
                     free(maskPyramid[i]);
                     maskPyramid[i] = nullptr;
                 }
             }
         } else {
-            if (maskPyramid[0] != nullptr) {
+            if (maskPyramid[0]) {
                 free(maskPyramid[0]);
                 maskPyramid[0] = nullptr;
             }
@@ -137,16 +125,16 @@ reg_base<T>::~reg_base() {
         free(maskPyramid);
         maskPyramid = nullptr;
     }
-    if (floatingPyramid != nullptr) {
+    if (floatingPyramid) {
         if (usePyramid) {
             for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (floatingPyramid[i] != nullptr) {
+                if (floatingPyramid[i]) {
                     nifti_image_free(floatingPyramid[i]);
                     floatingPyramid[i] = nullptr;
                 }
             }
         } else {
-            if (floatingPyramid[0] != nullptr) {
+            if (floatingPyramid[0]) {
                 nifti_image_free(floatingPyramid[0]);
                 floatingPyramid[0] = nullptr;
             }
@@ -154,46 +142,47 @@ reg_base<T>::~reg_base() {
         free(floatingPyramid);
         floatingPyramid = nullptr;
     }
-    if (activeVoxelNumber != nullptr) {
+    if (activeVoxelNumber) {
         free(activeVoxelNumber);
         activeVoxelNumber = nullptr;
     }
-    if (referenceThresholdUp != nullptr) {
+    if (referenceThresholdUp) {
         delete[]referenceThresholdUp;
         referenceThresholdUp = nullptr;
     }
-    if (referenceThresholdLow != nullptr) {
+    if (referenceThresholdLow) {
         delete[]referenceThresholdLow;
         referenceThresholdLow = nullptr;
     }
-    if (floatingThresholdUp != nullptr) {
+    if (floatingThresholdUp) {
         delete[]floatingThresholdUp;
         floatingThresholdUp = nullptr;
     }
-    if (floatingThresholdLow != nullptr) {
+    if (floatingThresholdLow) {
         delete[]floatingThresholdLow;
         floatingThresholdLow = nullptr;
     }
-    if (optimiser != nullptr) {
+    if (optimiser) {
         delete optimiser;
         optimiser = nullptr;
     }
 
-    if (measure_nmi != nullptr)
+    if (measure_nmi)
         delete measure_nmi;
-    if (measure_ssd != nullptr)
+    if (measure_ssd)
         delete measure_ssd;
-    if (measure_kld != nullptr)
+    if (measure_kld)
         delete measure_kld;
-    if (measure_dti != nullptr)
+    if (measure_dti)
         delete measure_dti;
-    if (measure_lncc != nullptr)
+    if (measure_lncc)
         delete measure_lncc;
-    if (measure_mind != nullptr)
+    if (measure_mind)
         delete measure_mind;
-    if (measure_mindssc != nullptr)
+    if (measure_mindssc)
         delete measure_mindssc;
 
+    delete measure;
     delete platform;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::~reg_base");
@@ -428,189 +417,22 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::DeallocateCurrentInputImage() {
-//     reference = nullptr;
-//     currentMask = nullptr;
-//     floating = nullptr;
-//     if (localWeightSimCurrent != nullptr)
-//         nifti_image_free(localWeightSimCurrent);
-//     localWeightSimCurrent = nullptr;
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::DeallocateCurrentInputImage");
-// #endif
-// }
-/* *************************************************************** */
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::AllocateWarped() {
-//     if (reference == nullptr) {
-//         reg_print_fct_error("reg_base::AllocateWarped()");
-//         reg_print_msg_error("The reference image is not defined");
-//         reg_exit();
-//     }
-//     reg_base<T>::DeallocateWarped();
-//     warped = nifti_copy_nim_info(reference);
-//     warped->dim[0] = warped->ndim = floating->ndim;
-//     warped->dim[4] = warped->nt = floating->nt;
-//     warped->pixdim[4] = warped->dt = 1;
-//     warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt);
-//     warped->scl_slope = 1;
-//     warped->scl_inter = 0;
-//     warped->datatype = floating->datatype;
-//     warped->nbyper = floating->nbyper;
-//     warped->data = (void*)calloc(warped->nvox, warped->nbyper);
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::AllocateWarped");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::DeallocateWarped() {
-//     if (warped != nullptr)
-//         nifti_image_free(warped);
-//     warped = nullptr;
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::DeallocateWarped");
-// #endif
-// }
-/* *************************************************************** */
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::AllocateDeformationField() {
-//     if (reference == nullptr) {
-//         reg_print_fct_error("reg_base::AllocateDeformationField()");
-//         reg_print_msg_error("The reference image is not defined");
-//         reg_exit();
-//     }
-//     reg_base<T>::DeallocateDeformationField();
-//     deformationFieldImage = nifti_copy_nim_info(reference);
-//     deformationFieldImage->dim[0] = deformationFieldImage->ndim = 5;
-//     // deformationFieldImage->dim[1] = deformationFieldImage->nx;
-//     // deformationFieldImage->dim[2] = deformationFieldImage->ny;
-//     // deformationFieldImage->dim[3] = deformationFieldImage->nz;
-//     deformationFieldImage->dim[4] = deformationFieldImage->nt = 1;
-//     deformationFieldImage->pixdim[4] = deformationFieldImage->dt = 1.0;
-//     if (reference->nz == 1)
-//         deformationFieldImage->dim[5] = deformationFieldImage->nu = 2;
-//     else deformationFieldImage->dim[5] = deformationFieldImage->nu = 3;
-//     deformationFieldImage->pixdim[5] = deformationFieldImage->du = 1.0;
-//     deformationFieldImage->dim[6] = deformationFieldImage->nv = 1;
-//     deformationFieldImage->pixdim[6] = deformationFieldImage->dv = 1.0;
-//     deformationFieldImage->dim[7] = deformationFieldImage->nw = 1;
-//     deformationFieldImage->pixdim[7] = deformationFieldImage->dw = 1.0;
-//     deformationFieldImage->nvox =
-//         (size_t)deformationFieldImage->nx *
-//         (size_t)deformationFieldImage->ny *
-//         (size_t)deformationFieldImage->nz *
-//         (size_t)deformationFieldImage->nt *
-//         (size_t)deformationFieldImage->nu;
-//     deformationFieldImage->nbyper = sizeof(T);
-//     if (sizeof(T) == sizeof(float))
-//         deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
-//     else deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64;
-//     deformationFieldImage->data = (void*)calloc(deformationFieldImage->nvox,
-//                                                 deformationFieldImage->nbyper);
-//     deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
-//     memset(deformationFieldImage->intent_name, 0, 16);
-//     strcpy(deformationFieldImage->intent_name, "NREG_TRANS");
-//     deformationFieldImage->intent_p1 = DEF_FIELD;
-//     deformationFieldImage->scl_slope = 1;
-//     deformationFieldImage->scl_inter = 0;
-
-//     if (measure_dti != nullptr)
-//         forwardJacobianMatrix = (mat33*)malloc(deformationFieldImage->nx * deformationFieldImage->ny *
-//                                                deformationFieldImage->nz * sizeof(mat33));
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::AllocateDeformationField");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::DeallocateDeformationField() {
-//     if (deformationFieldImage != nullptr) {
-//         nifti_image_free(deformationFieldImage);
-//         deformationFieldImage = nullptr;
-//     }
-//     if (forwardJacobianMatrix != nullptr)
-//         free(forwardJacobianMatrix);
-//     forwardJacobianMatrix = nullptr;
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::DeallocateDeformationField");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::AllocateWarpedGradient() {
-//     if (deformationFieldImage == nullptr) {
-//         reg_print_fct_error("reg_base::AllocateWarpedGradient()");
-//         reg_print_msg_error("The deformation field image is not defined");
-//         reg_exit();
-//     }
-//     reg_base<T>::DeallocateWarpedGradient();
-//     warpedGradient = nifti_copy_nim_info(deformationFieldImage);
-//     warpedGradient->data = (void*)calloc(warpedGradient->nvox,
-//                                          warpedGradient->nbyper);
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::AllocateWarpedGradient");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::DeallocateWarpedGradient() {
-//     if (warpedGradient != nullptr) {
-//         nifti_image_free(warpedGradient);
-//         warpedGradient = nullptr;
-//     }
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::DeallocateWarpedGradient");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::AllocateVoxelBasedMeasureGradient() {
-//     if (deformationFieldImage == nullptr) {
-//         reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()");
-//         reg_print_msg_error("The deformation field image is not defined");
-//         reg_exit();
-//     }
-//     reg_base<T>::DeallocateVoxelBasedMeasureGradient();
-//     voxelBasedMeasureGradient = nifti_copy_nim_info(deformationFieldImage);
-//     voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox,
-//                                                     voxelBasedMeasureGradient->nbyper);
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::AllocateVoxelBasedMeasureGradient");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_base<T>::DeallocateVoxelBasedMeasureGradient() {
-//     if (voxelBasedMeasureGradient != nullptr) {
-//         nifti_image_free(voxelBasedMeasureGradient);
-//         voxelBasedMeasureGradient = nullptr;
-//     }
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_base<T>::DeallocateVoxelBasedMeasureGradient");
-// #endif
-// }
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::CheckParameters() {
     // CHECK THAT BOTH INPUT IMAGES ARE DEFINED
-    if (inputReference == nullptr) {
+    if (!inputReference) {
         reg_print_fct_error("reg_base::CheckParameters()");
         reg_print_msg_error("The reference image is not defined");
         reg_exit();
     }
-    if (inputFloating == nullptr) {
+    if (!inputFloating) {
         reg_print_fct_error("reg_base::CheckParameters()");
         reg_print_msg_error("The floating image is not defined");
         reg_exit();
     }
 
     // CHECK THE MASK DIMENSION IF IT IS DEFINED
-    if (maskImage != nullptr) {
+    if (maskImage) {
         if (inputReference->nx != maskImage->nx ||
             inputReference->ny != maskImage->ny ||
             inputReference->nz != maskImage->nz) {
@@ -642,8 +464,8 @@ void reg_base<T>::CheckParameters() {
     //
     // NOTE - DTI currently ignored as needs fixing
     //
-    // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting
-    if (measure_mind == nullptr && measure_mindssc == nullptr) {
+    // tests ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting
+    if (!measure_mind && !measure_mindssc) {
         if (inputFloating->nt != inputReference->nt) {
             reg_print_fct_error("reg_base::CheckParameters()");
             reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
@@ -652,7 +474,7 @@ void reg_base<T>::CheckParameters() {
         double *chanWeightSum = new double[inputReference->nt]();
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
-        if (measure_nmi != nullptr) {
+        if (measure_nmi) {
             nmiWeights = measure_nmi->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
@@ -672,7 +494,7 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (measure_ssd != nullptr) {
+        if (measure_ssd) {
             ssdWeights = measure_ssd->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
@@ -692,7 +514,7 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (measure_kld != nullptr) {
+        if (measure_kld) {
             kldWeights = measure_kld->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
@@ -712,7 +534,7 @@ void reg_base<T>::CheckParameters() {
                 reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
             }
         }
-        if (measure_lncc != nullptr) {
+        if (measure_lncc) {
             lnccWeights = measure_lncc->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
@@ -739,13 +561,13 @@ void reg_base<T>::CheckParameters() {
                 reg_print_fct_warn("reg_base::CheckParameters()");
                 reg_print_msg_warn(text);
             }
-            if (measure_nmi != nullptr)
+            if (measure_nmi)
                 measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
-            if (measure_ssd != nullptr)
+            if (measure_ssd)
                 measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
-            if (measure_kld != nullptr)
+            if (measure_kld)
                 measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
-            if (measure_lncc != nullptr)
+            if (measure_lncc)
                 measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
         }
         delete[] chanWeightSum;
@@ -814,7 +636,7 @@ void reg_base<T>::Initialise() {
     if (robustRange) {
         // Create a copy of the reference image to extract the robust range
         nifti_image *temp_reference = nifti_copy_nim_info(inputReference);
-        temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper);
+        temp_reference->data = malloc(temp_reference->nvox * temp_reference->nbyper);
         memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper);
         reg_tools_changeDatatype<T>(temp_reference);
         // Extract the robust range of the reference image
@@ -830,7 +652,7 @@ void reg_base<T>::Initialise() {
 
         // Create a copy of the floating image to extract the robust range
         nifti_image *temp_floating = nifti_copy_nim_info(inputFloating);
-        temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper);
+        temp_floating->data = malloc(temp_floating->nvox * temp_floating->nbyper);
         memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper);
         reg_tools_changeDatatype<T>(temp_floating);
         // Extract the robust range of the floating image
@@ -849,7 +671,7 @@ void reg_base<T>::Initialise() {
     if (usePyramid) {
         reg_createImagePyramid<T>(inputReference, referencePyramid, levelNumber, levelToPerform);
         reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelNumber, levelToPerform);
-        if (maskImage != nullptr)
+        if (maskImage)
             reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber);
         else {
             for (unsigned int l = 0; l < levelToPerform; ++l) {
@@ -860,7 +682,7 @@ void reg_base<T>::Initialise() {
     } else {
         reg_createImagePyramid<T>(inputReference, referencePyramid, 1, 1);
         reg_createImagePyramid<T>(inputFloating, floatingPyramid, 1, 1);
-        if (maskImage != nullptr)
+        if (maskImage)
             reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1, activeVoxelNumber);
         else {
             activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz;
@@ -914,25 +736,25 @@ void reg_base<T>::Initialise() {
 template<class T>
 double reg_base<T>::ComputeSimilarityMeasure() {
     double measure = 0;
-    if (measure_nmi != nullptr)
+    if (measure_nmi)
         measure += measure_nmi->GetSimilarityMeasureValue();
 
-    if (measure_ssd != nullptr)
+    if (measure_ssd)
         measure += measure_ssd->GetSimilarityMeasureValue();
 
-    if (measure_kld != nullptr)
+    if (measure_kld)
         measure += measure_kld->GetSimilarityMeasureValue();
 
-    if (measure_lncc != nullptr)
+    if (measure_lncc)
         measure += measure_lncc->GetSimilarityMeasureValue();
 
-    if (measure_dti != nullptr)
+    if (measure_dti)
         measure += measure_dti->GetSimilarityMeasureValue();
 
-    if (measure_mind != nullptr)
+    if (measure_mind)
         measure += measure_mind->GetSimilarityMeasureValue();
 
-    if (measure_mindssc != nullptr)
+    if (measure_mindssc)
         measure += measure_mindssc->GetSimilarityMeasureValue();
 
 #ifndef NDEBUG
@@ -975,22 +797,22 @@ void reg_base<T>::GetVoxelBasedGradient() {
         compute->GetImageGradient(interpolation, warpedPaddingValue, t);
 
         // The gradient of the various measures of similarity are computed
-        if (measure_nmi != nullptr)
+        if (measure_nmi)
             measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (measure_ssd != nullptr)
+        if (measure_ssd)
             measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (measure_kld != nullptr)
+        if (measure_kld)
             measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (measure_lncc != nullptr)
+        if (measure_lncc)
             measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (measure_mind != nullptr)
+        if (measure_mind)
             measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
 
-        if (measure_mindssc != nullptr)
+        if (measure_mindssc)
             measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
     }
 
@@ -999,11 +821,10 @@ void reg_base<T>::GetVoxelBasedGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 //template<class T>
 //void reg_base<T>::ApproximateParzenWindow()
 //{
-//    if(measure_nmi==nullptr)
+//    if(!measure_nmi)
 //        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
 //    measure_nmi=approxParzenWindow = true;
 //}
@@ -1011,15 +832,14 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //template<class T>
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
-//    if(measure_nmi==nullptr)
+//    if(!measure_nmi)
 //        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
 //    measure_nmi=approxParzenWindow = false;
 //}
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
-    if (measure_nmi == nullptr)
+    if (!measure_nmi)
         measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
@@ -1032,7 +852,7 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
-    if (measure_nmi == nullptr)
+    if (!measure_nmi)
         measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
@@ -1045,7 +865,7 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseSSD(int timepoint, bool normalise) {
-    if (measure_ssd == nullptr)
+    if (!measure_ssd)
         measure_ssd = dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd));
     measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
@@ -1056,7 +876,7 @@ void reg_base<T>::UseSSD(int timepoint, bool normalise) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseMIND(int timepoint, int offset) {
-    if (measure_mind == nullptr)
+    if (!measure_mind)
         measure_mind = dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind));
     measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mind->SetDescriptorOffset(offset);
@@ -1067,7 +887,7 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
-    if (measure_mindssc == nullptr)
+    if (!measure_mindssc)
         measure_mindssc = dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::Mindssc));
     measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
@@ -1078,7 +898,7 @@ void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseKLDivergence(int timepoint) {
-    if (measure_kld == nullptr)
+    if (!measure_kld)
         measure_kld = dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld));
     measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
 #ifndef NDEBUG
@@ -1088,7 +908,7 @@ void reg_base<T>::UseKLDivergence(int timepoint) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseLNCC(int timepoint, float stddev) {
-    if (measure_lncc == nullptr)
+    if (!measure_lncc)
         measure_lncc = dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc));
     measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
     measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
@@ -1099,7 +919,7 @@ void reg_base<T>::UseLNCC(int timepoint, float stddev) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCKernelType(int type) {
-    if (measure_lncc == nullptr) {
+    if (!measure_lncc) {
         reg_print_fct_error("reg_base<T>::SetLNCCKernelType");
         reg_print_msg_error("The LNCC object has to be created first");
         reg_exit();
@@ -1115,7 +935,7 @@ void reg_base<T>::UseDTI(bool *timepoint) {
     reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
     reg_exit();
 
-    if (measure_dti == nullptr)
+    if (!measure_dti)
         measure_dti = dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti));
     for (int i = 0; i < inputReference->nt; ++i) {
         if (timepoint[i])
@@ -1128,7 +948,7 @@ void reg_base<T>::UseDTI(bool *timepoint) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
-    if (measure_nmi == nullptr) {
+    if (!measure_nmi) {
         reg_print_fct_error("reg_base<T>::SetNMIWeight");
         reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set");
         reg_exit();
@@ -1138,7 +958,7 @@ void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
-    if (measure_lncc == nullptr) {
+    if (!measure_lncc) {
         reg_print_fct_error("reg_base<T>::SetLNCCWeight");
         reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set");
         reg_exit();
@@ -1148,7 +968,7 @@ void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
-    if (measure_ssd == nullptr) {
+    if (!measure_ssd) {
         reg_print_fct_error("reg_base<T>::SetSSDWeight");
         reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set");
         reg_exit();
@@ -1158,7 +978,7 @@ void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
-    if (measure_kld == nullptr) {
+    if (!measure_kld) {
         reg_print_fct_error("reg_base<T>::SetKLDWeight");
         reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set");
         reg_exit();
@@ -1166,20 +986,18 @@ void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
     measure_kld->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLocalWeightSim(nifti_image *i) {
     localWeightSimInput = i;
     reg_tools_changeDatatype<T>(localWeightSimInput);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::WarpFloatingImage(int inter) {
     // Compute the deformation field
     GetDeformationField();
 
-    if (measure_dti == nullptr) {
+    if (!measure_dti) {
         // Resample the floating image
         compute->ResampleImage(inter, warpedPaddingValue);
     } else {
@@ -1236,20 +1054,11 @@ void reg_base<T>::Run() {
             mask = maskPyramid[0];
         }
 
-        // Allocate image that depends on the reference image
-        // AllocateWarped();
-        // AllocateDeformationField();
-        // AllocateWarpedGradient();
-
         // The grid is refined if necessary
         T maxStepSize = InitialiseCurrentLevel(reference);
         T currentSize = maxStepSize;
         T smallestSize = maxStepSize / (T)100.0;
 
-        // Allocate image that are required to compute the gradient
-        // AllocateVoxelBasedMeasureGradient();
-        // AllocateTransformationGradient();
-
         InitContent(reference, floating, mask);
 
         DisplayCurrentLevelParameters();
@@ -1267,11 +1076,7 @@ void reg_base<T>::Run() {
             PrintInitialObjFunctionValue();
 
             // Iterate until convergence or until the max number of iteration is reach
-            while (true) {
-
-                if (currentSize == 0)
-                    break;
-
+            while (currentSize) {
                 if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) {
                     reg_print_msg_warn("The current level reached the maximum number of iteration");
                     break;
@@ -1319,16 +1124,6 @@ void reg_base<T>::Run() {
         delete optimiser;
         optimiser = nullptr;
         DeinitContent();
-        // if (localWeightSimCurrent) {
-        //     nifti_image_free(localWeightSimCurrent);
-        //     localWeightSimCurrent = nullptr;
-        // }
-        // DeallocateCurrentInputImage();
-        // DeallocateWarped();
-        // DeallocateDeformationField();
-        // DeallocateWarpedGradient();
-        // DeallocateVoxelBasedMeasureGradient();
-        // DeallocateTransformationGradient();
         if (usePyramid) {
             nifti_image_free(referencePyramid[currentLevel]);
             referencePyramid[currentLevel] = nullptr;
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index e386a856..607a13f2 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -18,28 +18,22 @@
 #endif
 
  /* *************************************************************** */
- /* *************************************************************** */
 template<class T>
-reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
-    : reg_base<T>::reg_base(refTimePoint, floTimePoint) {
+reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
+    reg_base<T>::reg_base(refTimePoint, floTimePoint) {
 
     this->executableName = (char*)"NiftyReg F3D";
     inputControlPointGrid = nullptr; // pointer to external
     controlPointGrid = nullptr;
     bendingEnergyWeight = 0.001;
     linearEnergyWeight = 0.00;
-    jacobianLogWeight = 0.;
+    jacobianLogWeight = 0;
     jacobianLogApproximation = true;
     spacing[0] = -5;
     spacing[1] = std::numeric_limits<T>::quiet_NaN();
     spacing[2] = std::numeric_limits<T>::quiet_NaN();
     this->useConjGradient = true;
     this->useApproxGradient = false;
-
-    // approxParzenWindow=true;
-
-    // transformationGradient = nullptr;
-
     gridRefinement = true;
 
 #ifndef NDEBUG
@@ -47,11 +41,9 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint)
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 reg_f3d<T>::~reg_f3d() {
-    // DeallocateTransformationGradient();
-    if (controlPointGrid != nullptr) {
+    if (controlPointGrid) {
         nifti_image_free(controlPointGrid);
         controlPointGrid = nullptr;
     }
@@ -60,7 +52,6 @@ reg_f3d<T>::~reg_f3d() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp) {
     inputControlPointGrid = cp;
@@ -142,39 +133,11 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
     return maxStepSize;
 }
 /* *************************************************************** */
-// template<class T>
-// void reg_f3d<T>::AllocateTransformationGradient() {
-//     if (controlPointGrid == nullptr) {
-//         reg_print_fct_error("reg_f3d<T>::AllocateTransformationGradient()");
-//         reg_print_msg_error("The control point image is not defined");
-//         reg_exit();
-//     }
-//     reg_f3d<T>::DeallocateTransformationGradient();
-//     transformationGradient = nifti_copy_nim_info(controlPointGrid);
-//     transformationGradient->data = (void*)calloc(transformationGradient->nvox,
-//                                                        transformationGradient->nbyper);
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_f3d<T>::AllocateTransformationGradient");
-// #endif
-// }
-/* *************************************************************** */
-// template<class T>
-// void reg_f3d<T>::DeallocateTransformationGradient() {
-//     if (transformationGradient != nullptr) {
-//         nifti_image_free(transformationGradient);
-//         transformationGradient = nullptr;
-//     }
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_f3d<T>::DeallocateTransformationGradient");
-// #endif
-// }
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
-    if (strcmp(this->executableName, "NiftyReg F3D") == 0 ||
-        strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
+    if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         T penaltySum = bendingEnergyWeight +
             linearEnergyWeight +
             jacobianLogWeight +
@@ -192,7 +155,6 @@ void reg_f3d<T>::CheckParameters() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::Initialise() {
     if (this->initialised) return;
@@ -200,7 +162,7 @@ void reg_f3d<T>::Initialise() {
     reg_base<T>::Initialise();
 
     // DETERMINE THE GRID SPACING AND CREATE THE GRID
-    if (inputControlPointGrid == nullptr) {
+    if (!inputControlPointGrid) {
         // Set the spacing along y and z if undefined. Their values are set to match
         // the spacing along the x axis
         if (spacing[1] != spacing[1]) spacing[1] = spacing[0];
@@ -224,7 +186,7 @@ void reg_f3d<T>::Initialise() {
         reg_createControlPointGrid<T>(&controlPointGrid, this->referencePyramid[0], gridSpacing);
 
         // The control point position image is initialised with the affine transformation
-        if (this->affineTransformation == nullptr) {
+        if (!this->affineTransformation) {
             memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper);
             reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f);
             reg_getDeformationFromDisplacement(controlPointGrid);
@@ -263,7 +225,7 @@ void reg_f3d<T>::Initialise() {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
                                 i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
             reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi != nullptr) {
+            if (this->measure_nmi) {
                 if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
                     text = stringFormat("\t* binning size for timepoint %i/%i: %i",
                                         i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
@@ -289,7 +251,7 @@ void reg_f3d<T>::Initialise() {
             text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
                                 i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
             reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi != nullptr) {
+            if (this->measure_nmi) {
                 if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
                     text = stringFormat("\t* binning size for timepoint %i/%i: %i",
                                         i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
@@ -317,22 +279,20 @@ void reg_f3d<T>::Initialise() {
         text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]);
         reg_print_info(this->executableName, text.c_str());
         reg_print_info(this->executableName, "");
-        if (this->measure_ssd != nullptr)
+        if (this->measure_ssd)
             reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
-        if (this->measure_kld != nullptr)
+        if (this->measure_kld)
             reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
-        if (this->measure_lncc != nullptr)
+        if (this->measure_lncc)
             reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
-        if (this->measure_dti != nullptr)
+        if (this->measure_dti)
             reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
-        if (this->measure_mind != nullptr)
+        if (this->measure_mind)
             reg_print_info(this->executableName, "MIND is used as a similarity measure.");
-        if (this->measure_mindssc != nullptr)
+        if (this->measure_mindssc)
             reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
-        if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr &&
-                                             this->measure_lncc == nullptr && this->measure_nmi == nullptr &&
-                                             this->measure_ssd == nullptr && this->measure_mind == nullptr &&
-                                             this->measure_mindssc == nullptr))
+        if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc &&
+                                  !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc))
             reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
         text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
         reg_print_info(this->executableName, text.c_str());
@@ -372,7 +332,6 @@ void reg_f3d<T>::Initialise() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
     if (this->platformType == PlatformType::Cpu)
@@ -384,7 +343,6 @@ void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int
     this->compute = this->platform->CreateCompute(*this->con);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DeinitContent() {
     delete this->compute;
@@ -393,7 +351,6 @@ void reg_f3d<T>::DeinitContent() {
     this->con = nullptr;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetDeformationField() {
     this->compute->GetDeformationField(false, // Composition
@@ -403,7 +360,6 @@ void reg_f3d<T>::GetDeformationField() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     if (jacobianLogWeight <= 0) return 0;
@@ -443,7 +399,6 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     return jacobianLogWeight * value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
     if (bendingEnergyWeight <= 0) return 0;
@@ -455,11 +410,9 @@ double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
     return bendingEnergyWeight * value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
-    if (linearEnergyWeight <= 0)
-        return 0;
+    if (linearEnergyWeight <= 0) return 0;
 
     double value = this->compute->ApproxLinearEnergy();
 #ifndef NDEBUG
@@ -468,11 +421,9 @@ double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
     return linearEnergyWeight * value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
-    if (this->landmarkRegWeight <= 0)
-        return 0;
+    if (this->landmarkRegWeight <= 0) return 0;
 
     double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber,
                                                       this->landmarkReference,
@@ -483,9 +434,10 @@ double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
     return this->landmarkRegWeight * value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetSimilarityMeasureGradient() {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
     this->GetVoxelBasedGradient();
 
     nifti_image *voxelBasedMeasureGradient = dynamic_cast<F3dContent*>(this->con)->GetVoxelBasedMeasureGradient();
@@ -524,7 +476,7 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
                                     activeAxis);
     }
 
-    // Update the changes of voxelBasedMeasureGradient
+    // Update the changes for GPU
     dynamic_cast<F3dContent*>(this->con)->UpdateVoxelBasedMeasureGradient();
 
     // The node based NMI gradient is extracted
@@ -535,7 +487,6 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetBendingEnergyGradient() {
     if (bendingEnergyWeight <= 0) return;
@@ -546,7 +497,6 @@ void reg_f3d<T>::GetBendingEnergyGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetLinearEnergyGradient() {
     if (linearEnergyWeight <= 0) return;
@@ -557,7 +507,6 @@ void reg_f3d<T>::GetLinearEnergyGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetJacobianBasedGradient() {
     if (jacobianLogWeight <= 0) return;
@@ -568,7 +517,6 @@ void reg_f3d<T>::GetJacobianBasedGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetLandmarkDistanceGradient() {
     if (this->landmarkRegWeight <= 0) return;
@@ -582,18 +530,6 @@ void reg_f3d<T>::GetLandmarkDistanceGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
-// template<class T>
-// void reg_f3d<T>::SetGradientImageToZero() {
-//     T* nodeGradPtr = static_cast<T*>(transformationGradient->data);
-//     for (size_t i = 0; i < transformationGradient->nvox; ++i)
-//         *nodeGradPtr++ = 0;
-// #ifndef NDEBUG
-//     reg_print_fct_debug("reg_f3d<T>::SetGradientImageToZero");
-// #endif
-// }
-/* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 T reg_f3d<T>::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
@@ -617,7 +553,6 @@ T reg_f3d<T>::NormaliseGradient() {
     return maxGradLength;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DisplayCurrentLevelParameters() {
 #ifdef NDEBUG
@@ -667,7 +602,6 @@ void reg_f3d<T>::DisplayCurrentLevelParameters() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_f3d<T>::GetObjectiveFunctionValue() {
     currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
@@ -699,7 +633,6 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
     return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::UpdateParameters(float scale) {
     this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(),
@@ -714,7 +647,6 @@ void reg_f3d<T>::UpdateParameters(float scale) {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetOptimiser() {
     this->optimiser = this->platform->template CreateOptimiser<T>(*dynamic_cast<F3dContent*>(this->con),
@@ -729,7 +661,6 @@ void reg_f3d<T>::SetOptimiser() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SmoothGradient() {
     // TODO Implement this for CUDA
@@ -739,7 +670,7 @@ void reg_f3d<T>::SmoothGradient() {
         float kernel = fabs(this->gradientSmoothingSigma);
         F3dContent *con = dynamic_cast<F3dContent*>(this->con);
         reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL);
-        // Update the changes of transformationGradient
+        // Update the changes for GPU
         con->UpdateTransformationGradient();
     }
 #ifndef NDEBUG
@@ -747,7 +678,6 @@ void reg_f3d<T>::SmoothGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetApproximatedGradient() {
     // TODO Implement this for CUDA
@@ -763,27 +693,26 @@ void reg_f3d<T>::GetApproximatedGradient() {
     for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
         T currentValue = this->optimiser->GetBestDOF()[i];
         gridPtr[i] = currentValue + eps;
-        // Update the changes. Bad hack, fix that!
+        // Update the changes for GPU
         con->UpdateControlPointGrid();
         double valPlus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue - eps;
-        // Update the changes. Bad hack, fix that!
+        // Update the changes for GPU
         con->UpdateControlPointGrid();
         double valMinus = GetObjectiveFunctionValue();
         gridPtr[i] = currentValue;
-        // Update the changes. Bad hack, fix that!
+        // Update the changes for GPU
         con->UpdateControlPointGrid();
         gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
     }
 
-    // Update the changes
+    // Update the changes for GPU
     con->UpdateTransformationGradient();
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 nifti_image** reg_f3d<T>::GetWarpedImage() {
     // The initial images are used
@@ -811,7 +740,6 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
     return warpedImage;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
     nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid);
@@ -824,7 +752,6 @@ nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::UpdateBestObjFunctionValue() {
     this->bestWMeasure = this->currentWMeasure;
@@ -837,7 +764,6 @@ void reg_f3d<T>::UpdateBestObjFunctionValue() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintInitialObjFunctionValue() {
     if (!this->verbose) return;
@@ -853,7 +779,6 @@ void reg_f3d<T>::PrintInitialObjFunctionValue() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
     if (!this->verbose) return;
@@ -878,7 +803,6 @@ void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetObjectiveFunctionGradient() {
     if (!this->useApproxGradient) {
@@ -907,7 +831,6 @@ void reg_f3d<T>::GetObjectiveFunctionGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CorrectTransformation() {
     if (jacobianLogWeight > 0 && jacobianLogApproximation)
@@ -917,5 +840,4 @@ void reg_f3d<T>::CorrectTransformation() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template class reg_f3d<float>;
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 86135bda..b986237e 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -26,7 +26,6 @@ class reg_f3d: public reg_base<T> {
     bool jacobianLogApproximation;
     T spacing[3];
 
-    // nifti_image *transformationGradient;
     bool gridRefinement;
 
     double currentWJac;
@@ -36,8 +35,6 @@ class reg_f3d: public reg_base<T> {
     double bestWBE;
     double bestWLE;
 
-    // virtual void AllocateTransformationGradient() override;
-    // virtual void DeallocateTransformationGradient() override;
     virtual T InitialiseCurrentLevel(nifti_image *reference) override;
 
     virtual double ComputeBendingEnergyPenaltyTerm();
@@ -49,7 +46,6 @@ class reg_f3d: public reg_base<T> {
     virtual void GetLinearEnergyGradient();
     virtual void GetJacobianBasedGradient();
     virtual void GetLandmarkDistanceGradient();
-    // virtual void SetGradientImageToZero() override;
     virtual T NormaliseGradient() override;
     virtual void SmoothGradient() override;
     virtual void GetObjectiveFunctionGradient() override;
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 3746b844..550105ab 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -12,682 +12,539 @@
 
 #include "_reg_localTrans_regul.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 template<class DTYPE>
-double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx * splineControlPoint->ny;
-   int a, b, x, y, index, i;
+double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    int a, b, x, y, index, i;
 
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
 
-   // get the constant basis values
-   DTYPE basisXX[9], basisYY[9], basisXY[9];
-   set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
+    // get the constant basis values
+    DTYPE basisXX[9], basisYY[9], basisXY[9];
+    set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
 
-   double constraintValue=0;
+    double constraintValue = 0;
 
-   DTYPE splineCoeffX, splineCoeffY;
-   DTYPE XX_x, YY_x, XY_x;
-   DTYPE XX_y, YY_y, XY_y;
+    DTYPE splineCoeffX, splineCoeffY;
+    DTYPE XX_x, YY_x, XY_x;
+    DTYPE XX_y, YY_y, XY_y;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splineControlPoint, splinePtrX, splinePtrY, \
-   basisXX, basisYY, basisXY) \
-   private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \
-   x, y, a, b, index, i, \
-   splineCoeffX, splineCoeffY) \
-   reduction(+:constraintValue)
+    shared(splineControlPoint, splinePtrX, splinePtrY, \
+    basisXX, basisYY, basisXY) \
+    private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \
+    x, y, a, b, index, i, \
+    splineCoeffX, splineCoeffY) \
+    reduction(+:constraintValue)
 #endif
-   for(y=1; y<splineControlPoint->ny-1; ++y)
-   {
-      for(x=1; x<splineControlPoint->nx-1; ++x)
-      {
-         XX_x=0, YY_x=0, XY_x=0;
-         XX_y=0, YY_y=0, XY_y=0;
-
-         i=0;
-         for(b=-1; b<2; b++){
-            for(a=-1; a<2; a++){
-               index = (y+b)*splineControlPoint->nx+x+a;
-               splineCoeffX = splinePtrX[index];
-               splineCoeffY = splinePtrY[index];
-               XX_x += basisXX[i]*splineCoeffX;
-               YY_x += basisYY[i]*splineCoeffX;
-               XY_x += basisXY[i]*splineCoeffX;
-
-               XX_y += basisXX[i]*splineCoeffY;
-               YY_y += basisYY[i]*splineCoeffY;
-               XY_y += basisXY[i]*splineCoeffY;
-               ++i;
+    for (y = 1; y < splineControlPoint->ny - 1; ++y) {
+        for (x = 1; x < splineControlPoint->nx - 1; ++x) {
+            XX_x = 0, YY_x = 0, XY_x = 0;
+            XX_y = 0, YY_y = 0, XY_y = 0;
+
+            i = 0;
+            for (b = -1; b < 2; b++) {
+                for (a = -1; a < 2; a++) {
+                    index = (y + b) * splineControlPoint->nx + x + a;
+                    splineCoeffX = splinePtrX[index];
+                    splineCoeffY = splinePtrY[index];
+                    XX_x += basisXX[i] * splineCoeffX;
+                    YY_x += basisYY[i] * splineCoeffX;
+                    XY_x += basisXY[i] * splineCoeffX;
+
+                    XX_y += basisXX[i] * splineCoeffY;
+                    YY_y += basisYY[i] * splineCoeffY;
+                    XY_y += basisXY[i] * splineCoeffY;
+                    ++i;
+                }
             }
-         }
-
-         constraintValue += double(
-                  XX_x*XX_x + YY_x*YY_x + 2.0*XY_x*XY_x +
-                  XX_y*XX_y + YY_y*YY_y + 2.0*XY_y*XY_y );
-      }
-   }
-   return constraintValue / (double)splineControlPoint->nvox;
+
+            constraintValue += double(XX_x * XX_x + YY_x * YY_x + 2.0 * XY_x * XY_x +
+                                      XX_y * XX_y + YY_y * YY_y + 2.0 * XY_y * XY_y);
+        }
+    }
+    return constraintValue / (double)splineControlPoint->nvox;
 }
 /* *************************************************************** */
 template<class DTYPE>
-double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   int a, b, c, x, y, z, index, i;
+double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int a, b, c, x, y, z, index, i;
 
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
 
-   // get the constant basis values
-   DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
-   set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
+    // get the constant basis values
+    DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
+    set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
 
-   double constraintValue=0;
+    double constraintValue = 0;
 
-   DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
-   DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
-   DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
-   DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
+    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
+    DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
+    DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
+    DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \
-   basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
-   private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \
-   XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \
-   splineCoeffX, splineCoeffY, splineCoeffZ) \
-   reduction(+:constraintValue)
+    shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \
+    basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
+    private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \
+    XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \
+    splineCoeffX, splineCoeffY, splineCoeffZ) \
+    reduction(+:constraintValue)
 #endif
-   for(z=1; z<splineControlPoint->nz-1; ++z)
-   {
-      for(y=1; y<splineControlPoint->ny-1; ++y)
-      {
-         for(x=1; x<splineControlPoint->nx-1; ++x)
-         {
-            XX_x=0, YY_x=0, ZZ_x=0;
-            XY_x=0, YZ_x=0, XZ_x=0;
-            XX_y=0, YY_y=0, ZZ_y=0;
-            XY_y=0, YZ_y=0, XZ_y=0;
-            XX_z=0, YY_z=0, ZZ_z=0;
-            XY_z=0, YZ_z=0, XZ_z=0;
-
-            i=0;
-            for(c=-1; c<2; c++){
-               for(b=-1; b<2; b++){
-                  for(a=-1; a<2; a++){
-                     index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a;
-                     splineCoeffX = splinePtrX[index];
-                     splineCoeffY = splinePtrY[index];
-                     splineCoeffZ = splinePtrZ[index];
-                     XX_x += basisXX[i]*splineCoeffX;
-                     YY_x += basisYY[i]*splineCoeffX;
-                     ZZ_x += basisZZ[i]*splineCoeffX;
-                     XY_x += basisXY[i]*splineCoeffX;
-                     YZ_x += basisYZ[i]*splineCoeffX;
-                     XZ_x += basisXZ[i]*splineCoeffX;
-
-                     XX_y += basisXX[i]*splineCoeffY;
-                     YY_y += basisYY[i]*splineCoeffY;
-                     ZZ_y += basisZZ[i]*splineCoeffY;
-                     XY_y += basisXY[i]*splineCoeffY;
-                     YZ_y += basisYZ[i]*splineCoeffY;
-                     XZ_y += basisXZ[i]*splineCoeffY;
-
-                     XX_z += basisXX[i]*splineCoeffZ;
-                     YY_z += basisYY[i]*splineCoeffZ;
-                     ZZ_z += basisZZ[i]*splineCoeffZ;
-                     XY_z += basisXY[i]*splineCoeffZ;
-                     YZ_z += basisYZ[i]*splineCoeffZ;
-                     XZ_z += basisXZ[i]*splineCoeffZ;
-                     ++i;
-                  }
-               }
+    for (z = 1; z < splineControlPoint->nz - 1; ++z) {
+        for (y = 1; y < splineControlPoint->ny - 1; ++y) {
+            for (x = 1; x < splineControlPoint->nx - 1; ++x) {
+                XX_x = 0, YY_x = 0, ZZ_x = 0;
+                XY_x = 0, YZ_x = 0, XZ_x = 0;
+                XX_y = 0, YY_y = 0, ZZ_y = 0;
+                XY_y = 0, YZ_y = 0, XZ_y = 0;
+                XX_z = 0, YY_z = 0, ZZ_z = 0;
+                XY_z = 0, YZ_z = 0, XZ_z = 0;
+
+                i = 0;
+                for (c = -1; c < 2; c++) {
+                    for (b = -1; b < 2; b++) {
+                        for (a = -1; a < 2; a++) {
+                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            splineCoeffX = splinePtrX[index];
+                            splineCoeffY = splinePtrY[index];
+                            splineCoeffZ = splinePtrZ[index];
+                            XX_x += basisXX[i] * splineCoeffX;
+                            YY_x += basisYY[i] * splineCoeffX;
+                            ZZ_x += basisZZ[i] * splineCoeffX;
+                            XY_x += basisXY[i] * splineCoeffX;
+                            YZ_x += basisYZ[i] * splineCoeffX;
+                            XZ_x += basisXZ[i] * splineCoeffX;
+
+                            XX_y += basisXX[i] * splineCoeffY;
+                            YY_y += basisYY[i] * splineCoeffY;
+                            ZZ_y += basisZZ[i] * splineCoeffY;
+                            XY_y += basisXY[i] * splineCoeffY;
+                            YZ_y += basisYZ[i] * splineCoeffY;
+                            XZ_y += basisXZ[i] * splineCoeffY;
+
+                            XX_z += basisXX[i] * splineCoeffZ;
+                            YY_z += basisYY[i] * splineCoeffZ;
+                            ZZ_z += basisZZ[i] * splineCoeffZ;
+                            XY_z += basisXY[i] * splineCoeffZ;
+                            YZ_z += basisYZ[i] * splineCoeffZ;
+                            XZ_z += basisXZ[i] * splineCoeffZ;
+                            ++i;
+                        }
+                    }
+                }
+
+                constraintValue += double(
+                    XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + 2.0 * (XY_x * XY_x + YZ_x * YZ_x + XZ_x * XZ_x) +
+                    XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + 2.0 * (XY_y * XY_y + YZ_y * YZ_y + XZ_y * XZ_y) +
+                    XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + 2.0 * (XY_z * XY_z + YZ_z * YZ_z + XZ_z * XZ_z));
             }
-
-            constraintValue += double(
-                     XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + 2.0*(XY_x*XY_x + YZ_x*YZ_x + XZ_x*XZ_x) +
-                     XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + 2.0*(XY_y*XY_y + YZ_y*YZ_y + XZ_y*XZ_y) +
-                     XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + 2.0*(XY_z*XY_z + YZ_z*YZ_z + XZ_z*XZ_z) );
-         }
-      }
-   }
-   return constraintValue / (double)splineControlPoint->nvox;
+        }
+    }
+    return constraintValue / (double)splineControlPoint->nvox;
 }
 /* *************************************************************** */
 extern "C++"
-double reg_spline_approxBendingEnergy(nifti_image *splineControlPoint)
-{
-   if(splineControlPoint->nz==1)
-   {
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_approxBendingEnergyValue2D<float>(splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_approxBendingEnergyValue2D<double>(splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxBendingEnergy");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else
-   {
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_approxBendingEnergyValue3D<float>(splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_approxBendingEnergyValue3D<double>(splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxBendingEnergy");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
+double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) {
+    if (splineControlPoint->nz == 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_approxBendingEnergyValue2D<float>(splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_approxBendingEnergyValue2D<double>(splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxBendingEnergy");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_approxBendingEnergyValue3D<float>(splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_approxBendingEnergyValue3D<double>(splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxBendingEnergy");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DTYPE>
 void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
-                                              float weight)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*splineControlPoint->ny;
-   int a, b, x, y, X, Y, index, i;
+                                              float weight) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    int a, b, x, y, X, Y, index, i;
 
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
 
-   // get the constant basis values
-   DTYPE basisXX[9], basisYY[9], basisXY[9];
-   set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
+    // get the constant basis values
+    DTYPE basisXX[9], basisYY[9], basisXY[9];
+    set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
 
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
-   DTYPE XX_x, YY_x, XY_x;
-   DTYPE XX_y, YY_y, XY_y;
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
+    DTYPE XX_x, YY_x, XY_x;
+    DTYPE XX_y, YY_y, XY_y;
 
-   DTYPE *derivativeValues = (DTYPE *)calloc(6*nodeNumber, sizeof(DTYPE));
-   DTYPE *derivativeValuesPtr;
+    DTYPE *derivativeValues = (DTYPE*)calloc(6 * nodeNumber, sizeof(DTYPE));
+    DTYPE *derivativeValuesPtr;
 
-   reg_getDisplacementFromDeformation(splineControlPoint);
+    reg_getDisplacementFromDeformation(splineControlPoint);
 
-   // Compute the bending energy values everywhere but at the boundary
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \
-   basisXX, basisYY, basisXY) \
-   private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
-   XX_x, YY_x, XY_x, XX_y, YY_y, XY_y)
-#endif
-   for(y=0; y<splineControlPoint->ny; y++)
-   {
-      derivativeValuesPtr = &derivativeValues[6*y*splineControlPoint->nx];
-      for(x=0; x<splineControlPoint->nx; x++)
-      {
-         XX_x=0, YY_x=0, XY_x=0;
-         XX_y=0, YY_y=0, XY_y=0;
-
-         i=0;
-         for(b=-1; b<2; b++){
-            for(a=-1; a<2; a++){
-               if(-1<(x+a) && -1<(y+b) && (x+a)<splineControlPoint->nx && (y+b)<splineControlPoint->ny)
-               {
-                  index = (y+b)*splineControlPoint->nx+x+a;
-                  splineCoeffX = splinePtrX[index];
-                  splineCoeffY = splinePtrY[index];
-                  XX_x += basisXX[i]*splineCoeffX;
-                  YY_x += basisYY[i]*splineCoeffX;
-                  XY_x += basisXY[i]*splineCoeffX;
-
-                  XX_y += basisXX[i]*splineCoeffY;
-                  YY_y += basisYY[i]*splineCoeffY;
-                  XY_y += basisXY[i]*splineCoeffY;
-               }
-               ++i;
-            }
-         }
-         *derivativeValuesPtr++ = XX_x;
-         *derivativeValuesPtr++ = XX_y;
-         *derivativeValuesPtr++ = YY_x;
-         *derivativeValuesPtr++ = YY_y;
-         *derivativeValuesPtr++ = (DTYPE)(2.0*XY_x);
-         *derivativeValuesPtr++ = (DTYPE)(2.0*XY_y);
-      }
-   }
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-   DTYPE gradientValue[2];
+    // Compute the bending energy values everywhere but at the boundary
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \
-   basisXX, basisYY, basisXY, approxRatio) \
-   private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue)
+    shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \
+    basisXX, basisYY, basisXY) \
+    private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
+    XX_x, YY_x, XY_x, XX_y, YY_y, XY_y)
 #endif
-   for(y=0; y<splineControlPoint->ny; y++)
-   {
-      index=y*splineControlPoint->nx;
-      for(x=0; x<splineControlPoint->nx; x++)
-      {
-         gradientValue[0]=gradientValue[1]=0;
-         a=0;
-         for(Y=y-1; Y<y+2; Y++)
-         {
-            for(X=x-1; X<x+2; X++)
-            {
-               if(-1<X && -1<Y && X<splineControlPoint->nx && Y<splineControlPoint->ny)
-               {
-                  derivativeValuesPtr = &derivativeValues[6 * (Y*splineControlPoint->nx + X)];
-                  gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a];
-                  gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a];
-
-                  gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a];
-                  gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a];
-
-                  gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a];
-                  gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a];
-               }
-               a++;
-            }
-         }
-         gradientXPtr[index] += approxRatio*gradientValue[0];
-         gradientYPtr[index] += approxRatio*gradientValue[1];
-         index++;
-      }
-   }
-   reg_getDeformationFromDisplacement(splineControlPoint);
-   free(derivativeValues);
-}
-/* *************************************************************** */
-template<class DTYPE>
-void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
-                                              nifti_image *gradientImage,
-                                              float weight)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz;
-   int a, b, c, x, y, z, X, Y, Z, index, i;
-
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
-
-   // get the constant basis values
-   DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
-   set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
-
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
-   DTYPE splineCoeffZ;
-   DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
-   DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
-   DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
-
-   DTYPE *derivativeValues = (DTYPE *)calloc(18*nodeNumber, sizeof(DTYPE));
-   DTYPE *derivativeValuesPtr;
-
-   reg_getDisplacementFromDeformation(splineControlPoint);
-
-   // Compute the bending energy values everywhere but at the boundary
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \
-   basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
-   private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
-   splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \
-   ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z)
-#endif
-   for(z=0; z<splineControlPoint->nz; z++)
-   {
-      derivativeValuesPtr = &derivativeValues[18*z*splineControlPoint->ny*splineControlPoint->nx];
-      for(y=0; y<splineControlPoint->ny; y++)
-      {
-         for(x=0; x<splineControlPoint->nx; x++)
-         {
-            XX_x=0, YY_x=0, ZZ_x=0;
-            XY_x=0, YZ_x=0, XZ_x=0;
-            XX_y=0, YY_y=0, ZZ_y=0;
-            XY_y=0, YZ_y=0, XZ_y=0;
-            XX_z=0, YY_z=0, ZZ_z=0;
-            XY_z=0, YZ_z=0, XZ_z=0;
-
-            i=0;
-            for(c=-1; c<2; c++){
-               for(b=-1; b<2; b++){
-                  for(a=-1; a<2; a++){
-                     if(-1<(x+a) && -1<(y+b) && -1<(z+c) && (x+a)<splineControlPoint->nx && (y+b)<splineControlPoint->ny && (z+c)<splineControlPoint->nz)
-                     {
-                        index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a;
+    for (y = 0; y < splineControlPoint->ny; y++) {
+        derivativeValuesPtr = &derivativeValues[6 * y * splineControlPoint->nx];
+        for (x = 0; x < splineControlPoint->nx; x++) {
+            XX_x = 0, YY_x = 0, XY_x = 0;
+            XX_y = 0, YY_y = 0, XY_y = 0;
+
+            i = 0;
+            for (b = -1; b < 2; b++) {
+                for (a = -1; a < 2; a++) {
+                    if (-1 < (x + a) && -1 < (y + b) && (x + a) < splineControlPoint->nx && (y + b) < splineControlPoint->ny) {
+                        index = (y + b) * splineControlPoint->nx + x + a;
                         splineCoeffX = splinePtrX[index];
                         splineCoeffY = splinePtrY[index];
-                        splineCoeffZ = splinePtrZ[index];
-                        XX_x += basisXX[i]*splineCoeffX;
-                        YY_x += basisYY[i]*splineCoeffX;
-                        ZZ_x += basisZZ[i]*splineCoeffX;
-                        XY_x += basisXY[i]*splineCoeffX;
-                        YZ_x += basisYZ[i]*splineCoeffX;
-                        XZ_x += basisXZ[i]*splineCoeffX;
-
-                        XX_y += basisXX[i]*splineCoeffY;
-                        YY_y += basisYY[i]*splineCoeffY;
-                        ZZ_y += basisZZ[i]*splineCoeffY;
-                        XY_y += basisXY[i]*splineCoeffY;
-                        YZ_y += basisYZ[i]*splineCoeffY;
-                        XZ_y += basisXZ[i]*splineCoeffY;
-
-                        XX_z += basisXX[i]*splineCoeffZ;
-                        YY_z += basisYY[i]*splineCoeffZ;
-                        ZZ_z += basisZZ[i]*splineCoeffZ;
-                        XY_z += basisXY[i]*splineCoeffZ;
-                        YZ_z += basisYZ[i]*splineCoeffZ;
-                        XZ_z += basisXZ[i]*splineCoeffZ;
-                     }
-                     ++i;
-                  }
-               }
+                        XX_x += basisXX[i] * splineCoeffX;
+                        YY_x += basisYY[i] * splineCoeffX;
+                        XY_x += basisXY[i] * splineCoeffX;
+
+                        XX_y += basisXX[i] * splineCoeffY;
+                        YY_y += basisYY[i] * splineCoeffY;
+                        XY_y += basisXY[i] * splineCoeffY;
+                    }
+                    ++i;
+                }
             }
             *derivativeValuesPtr++ = XX_x;
             *derivativeValuesPtr++ = XX_y;
-            *derivativeValuesPtr++ = XX_z;
             *derivativeValuesPtr++ = YY_x;
             *derivativeValuesPtr++ = YY_y;
-            *derivativeValuesPtr++ = YY_z;
-            *derivativeValuesPtr++ = ZZ_x;
-            *derivativeValuesPtr++ = ZZ_y;
-            *derivativeValuesPtr++ = ZZ_z;
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XY_x);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XY_y);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XY_z);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_x);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_y);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_z);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_x);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_y);
-            *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_z);
-         }
-      }
-   }
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-   DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-   DTYPE gradientValue[3];
+            *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x);
+            *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y);
+        }
+    }
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
+    DTYPE gradientValue[2];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \
-   basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \
-   private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue)
+    shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \
+    basisXX, basisYY, basisXY, approxRatio) \
+    private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue)
 #endif
-   for(z=0; z<splineControlPoint->nz; z++)
-   {
-      index=z*splineControlPoint->nx*splineControlPoint->ny;
-      for(y=0; y<splineControlPoint->ny; y++)
-      {
-         for(x=0; x<splineControlPoint->nx; x++)
-         {
-            gradientValue[0]=gradientValue[1]=gradientValue[2]=0;
-            a=0;
-            for(Z=z-1; Z<z+2; Z++)
-            {
-               for(Y=y-1; Y<y+2; Y++)
-               {
-                  for(X=x-1; X<x+2; X++)
-                  {
-                     if(-1<X && -1<Y && -1<Z && X<splineControlPoint->nx && Y<splineControlPoint->ny && Z<splineControlPoint->nz)
-                     {
-                        derivativeValuesPtr = &derivativeValues[18 * ((Z*splineControlPoint->ny + Y)*splineControlPoint->nx + X)];
+    for (y = 0; y < splineControlPoint->ny; y++) {
+        index = y * splineControlPoint->nx;
+        for (x = 0; x < splineControlPoint->nx; x++) {
+            gradientValue[0] = gradientValue[1] = 0;
+            a = 0;
+            for (Y = y - 1; Y < y + 2; Y++) {
+                for (X = x - 1; X < x + 2; X++) {
+                    if (-1 < X && -1 < Y && X < splineControlPoint->nx && Y < splineControlPoint->ny) {
+                        derivativeValuesPtr = &derivativeValues[6 * (Y * splineControlPoint->nx + X)];
                         gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a];
                         gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisXX[a];
 
                         gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a];
                         gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisYY[a];
-
-                        gradientValue[0] += (*derivativeValuesPtr++) * basisZZ[a];
-                        gradientValue[1] += (*derivativeValuesPtr++) * basisZZ[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisZZ[a];
 
                         gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a];
                         gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisXY[a];
-
-                        gradientValue[0] += (*derivativeValuesPtr++) * basisYZ[a];
-                        gradientValue[1] += (*derivativeValuesPtr++) * basisYZ[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisYZ[a];
-
-                        gradientValue[0] += (*derivativeValuesPtr++) * basisXZ[a];
-                        gradientValue[1] += (*derivativeValuesPtr++) * basisXZ[a];
-                        gradientValue[2] += (*derivativeValuesPtr++) * basisXZ[a];
-                     }
-                     a++;
-                  }
-               }
+                    }
+                    a++;
+                }
             }
-            gradientXPtr[index] += approxRatio*gradientValue[0];
-            gradientYPtr[index] += approxRatio*gradientValue[1];
-            gradientZPtr[index] += approxRatio*gradientValue[2];
+            gradientXPtr[index] += approxRatio * gradientValue[0];
+            gradientYPtr[index] += approxRatio * gradientValue[1];
             index++;
-         }
-      }
-   }
-   free(derivativeValues);
-   reg_getDeformationFromDisplacement(splineControlPoint);
+        }
+    }
+    reg_getDeformationFromDisplacement(splineControlPoint);
+    free(derivativeValues);
 }
 /* *************************************************************** */
-extern "C++"
-void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
-                                            nifti_image *gradientImage,
-                                            float weight)
-{
-   if(splineControlPoint->datatype != gradientImage->datatype)
-   {
-      reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-      reg_print_msg_error("The input images are expected to have the same type");
-      reg_exit();
-   }
-   if(splineControlPoint->nz==1)
-   {
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_approxBendingEnergyGradient2D<float>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_approxBendingEnergyGradient2D<double>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else
-   {
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_approxBendingEnergyGradient3D<float>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_approxBendingEnergyGradient3D<double>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class DTYPE>
-double reg_spline_approxLinearEnergyValue2D(nifti_image *splineControlPoint)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*
-         splineControlPoint->ny;
-   int a, b, x, y, i, index;
-
-   double constraintValue = 0.;
-   double currentValue;
+template<class DTYPE>
+void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
+                                              nifti_image *gradientImage,
+                                              float weight) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int a, b, c, x, y, z, X, Y, Z, index, i;
 
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    // Create pointers to the spline coefficients
+    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
 
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[9], basisY[9];
-   set_first_order_basis_values(basisX, basisY);
+    // get the constant basis values
+    DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
+    set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
 
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
+    DTYPE splineCoeffZ;
+    DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
+    DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
+    DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
 
-   mat33 matrix, R;
+    DTYPE *derivativeValues = (DTYPE*)calloc(18 * nodeNumber, sizeof(DTYPE));
+    DTYPE *derivativeValuesPtr;
 
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    reg_getDisplacementFromDeformation(splineControlPoint);
 
+    // Compute the bending energy values everywhere but at the boundary
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splinePtrX, splinePtrY, splineControlPoint, \
-   basisX, basisY, reorientation) \
-   private(x, y, a, b, i, index, matrix, R, \
-   splineCoeffX, splineCoeffY, currentValue) \
-   reduction(+:constraintValue)
+    shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \
+    basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
+    private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
+    splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \
+    ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z)
 #endif
-   for(y=1; y<splineControlPoint->ny-1; ++y){
-      for(x=1; x<splineControlPoint->nx-1; ++x){
-
-         memset(&matrix, 0, sizeof(mat33));
-         matrix.m[2][2] = 1.f;
-
-         i=0;
-         for(b=-1; b<2; b++){
-            for(a=-1; a<2; a++){
-               index = (y+b)*splineControlPoint->nx+x+a;
-               splineCoeffX = splinePtrX[index];
-               splineCoeffY = splinePtrY[index];
-               matrix.m[0][0] += basisX[i]*splineCoeffX;
-               matrix.m[1][0] += basisY[i]*splineCoeffX;
-               matrix.m[0][1] += basisX[i]*splineCoeffY;
-               matrix.m[1][1] += basisY[i]*splineCoeffY;
-               ++i;
+    for (z = 0; z < splineControlPoint->nz; z++) {
+        derivativeValuesPtr = &derivativeValues[18 * z * splineControlPoint->ny * splineControlPoint->nx];
+        for (y = 0; y < splineControlPoint->ny; y++) {
+            for (x = 0; x < splineControlPoint->nx; x++) {
+                XX_x = 0, YY_x = 0, ZZ_x = 0;
+                XY_x = 0, YZ_x = 0, XZ_x = 0;
+                XX_y = 0, YY_y = 0, ZZ_y = 0;
+                XY_y = 0, YZ_y = 0, XZ_y = 0;
+                XX_z = 0, YY_z = 0, ZZ_z = 0;
+                XY_z = 0, YZ_z = 0, XZ_z = 0;
+
+                i = 0;
+                for (c = -1; c < 2; c++) {
+                    for (b = -1; b < 2; b++) {
+                        for (a = -1; a < 2; a++) {
+                            if (-1 < (x + a) && -1 < (y + b) && -1 < (z + c) && (x + a) < splineControlPoint->nx &&
+                                (y + b) < splineControlPoint->ny && (z + c) < splineControlPoint->nz) {
+                                index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                                splineCoeffX = splinePtrX[index];
+                                splineCoeffY = splinePtrY[index];
+                                splineCoeffZ = splinePtrZ[index];
+                                XX_x += basisXX[i] * splineCoeffX;
+                                YY_x += basisYY[i] * splineCoeffX;
+                                ZZ_x += basisZZ[i] * splineCoeffX;
+                                XY_x += basisXY[i] * splineCoeffX;
+                                YZ_x += basisYZ[i] * splineCoeffX;
+                                XZ_x += basisXZ[i] * splineCoeffX;
+
+                                XX_y += basisXX[i] * splineCoeffY;
+                                YY_y += basisYY[i] * splineCoeffY;
+                                ZZ_y += basisZZ[i] * splineCoeffY;
+                                XY_y += basisXY[i] * splineCoeffY;
+                                YZ_y += basisYZ[i] * splineCoeffY;
+                                XZ_y += basisXZ[i] * splineCoeffY;
+
+                                XX_z += basisXX[i] * splineCoeffZ;
+                                YY_z += basisYY[i] * splineCoeffZ;
+                                ZZ_z += basisZZ[i] * splineCoeffZ;
+                                XY_z += basisXY[i] * splineCoeffZ;
+                                YZ_z += basisYZ[i] * splineCoeffZ;
+                                XZ_z += basisXZ[i] * splineCoeffZ;
+                            }
+                            ++i;
+                        }
+                    }
+                }
+                *derivativeValuesPtr++ = XX_x;
+                *derivativeValuesPtr++ = XX_y;
+                *derivativeValuesPtr++ = XX_z;
+                *derivativeValuesPtr++ = YY_x;
+                *derivativeValuesPtr++ = YY_y;
+                *derivativeValuesPtr++ = YY_z;
+                *derivativeValuesPtr++ = ZZ_x;
+                *derivativeValuesPtr++ = ZZ_y;
+                *derivativeValuesPtr++ = ZZ_z;
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_z);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_x);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_y);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_z);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_x);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_y);
+                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_z);
             }
-         }
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-
-         currentValue = 0.;
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
+        }
+    }
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
+    DTYPE gradientValue[3];
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+    shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \
+    basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \
+    private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue)
+#endif
+    for (z = 0; z < splineControlPoint->nz; z++) {
+        index = z * splineControlPoint->nx * splineControlPoint->ny;
+        for (y = 0; y < splineControlPoint->ny; y++) {
+            for (x = 0; x < splineControlPoint->nx; x++) {
+                gradientValue[0] = gradientValue[1] = gradientValue[2] = 0;
+                a = 0;
+                for (Z = z - 1; Z < z + 2; Z++) {
+                    for (Y = y - 1; Y < y + 2; Y++) {
+                        for (X = x - 1; X < x + 2; X++) {
+                            if (-1 < X && -1 < Y && -1 < Z && X < splineControlPoint->nx && Y < splineControlPoint->ny && Z < splineControlPoint->nz) {
+                                derivativeValuesPtr = &derivativeValues[18 * ((Z * splineControlPoint->ny + Y) * splineControlPoint->nx + X)];
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisXX[a];
+
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisYY[a];
+
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisZZ[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisZZ[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisZZ[a];
+
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisXY[a];
+
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisYZ[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisYZ[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisYZ[a];
+
+                                gradientValue[0] += (*derivativeValuesPtr++) * basisXZ[a];
+                                gradientValue[1] += (*derivativeValuesPtr++) * basisXZ[a];
+                                gradientValue[2] += (*derivativeValuesPtr++) * basisXZ[a];
+                            }
+                            a++;
+                        }
+                    }
+                }
+                gradientXPtr[index] += approxRatio * gradientValue[0];
+                gradientYPtr[index] += approxRatio * gradientValue[1];
+                gradientZPtr[index] += approxRatio * gradientValue[2];
+                index++;
             }
-         }
-         constraintValue += currentValue;
-      }
-   }
-   return constraintValue / static_cast<double>(splineControlPoint->nvox);
+        }
+    }
+    free(derivativeValues);
+    reg_getDeformationFromDisplacement(splineControlPoint);
+}
+/* *************************************************************** */
+extern "C++"
+void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
+                                            nifti_image *gradientImage,
+                                            float weight) {
+    if (splineControlPoint->datatype != gradientImage->datatype) {
+        reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
+        reg_print_msg_error("The input images are expected to have the same type");
+        reg_exit();
+    }
+    if (splineControlPoint->nz == 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_approxBendingEnergyGradient2D<float>(splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_approxBendingEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_approxBendingEnergyGradient3D<float>(splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_approxBendingEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-double reg_spline_approxLinearEnergyValue3D(nifti_image *splineControlPoint)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   int a, b, c, x, y, z, i, index;
+double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    int a, b, x, y, i, index;
 
-   double constraintValue = 0.;
-   double currentValue;
+    double constraintValue = 0;
+    double currentValue;
 
-   // Create pointers to the spline coefficients
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
 
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[27], basisY[27], basisZ[27];
-   set_first_order_basis_values(basisX, basisY, basisZ);
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[9], basisY[9];
+    set_first_order_basis_values(basisX, basisY);
 
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
-   DTYPE splineCoeffZ;
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
 
-   mat33 matrix, R;
+    mat33 matrix, R;
 
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \
-   basisX, basisY, basisZ, reorientation) \
-   private(x, y, z, a, b, c, i, index, matrix, R, \
-   splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \
-   reduction(+:constraintValue)
+    shared(splinePtrX, splinePtrY, splineControlPoint, \
+    basisX, basisY, reorientation) \
+    private(x, y, a, b, i, index, matrix, R, \
+    splineCoeffX, splineCoeffY, currentValue) \
+    reduction(+:constraintValue)
 #endif
-   for(z=1; z<splineControlPoint->nz-1; ++z){
-      for(y=1; y<splineControlPoint->ny-1; ++y){
-         for(x=1; x<splineControlPoint->nx-1; ++x){
-
+    for (y = 1; y < splineControlPoint->ny - 1; ++y) {
+        for (x = 1; x < splineControlPoint->nx - 1; ++x) {
             memset(&matrix, 0, sizeof(mat33));
-
-            i=0;
-            for(c=-1; c<2; c++){
-               for(b=-1; b<2; b++){
-                  for(a=-1; a<2; a++){
-                     index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a;
-                     splineCoeffX = splinePtrX[index];
-                     splineCoeffY = splinePtrY[index];
-                     splineCoeffZ = splinePtrZ[index];
-
-                     matrix.m[0][0] += basisX[i]*splineCoeffX;
-                     matrix.m[1][0] += basisY[i]*splineCoeffX;
-                     matrix.m[2][0] += basisZ[i]*splineCoeffX;
-
-                     matrix.m[0][1] += basisX[i]*splineCoeffY;
-                     matrix.m[1][1] += basisY[i]*splineCoeffY;
-                     matrix.m[2][1] += basisZ[i]*splineCoeffY;
-
-                     matrix.m[0][2] += basisX[i]*splineCoeffZ;
-                     matrix.m[1][2] += basisY[i]*splineCoeffZ;
-                     matrix.m[2][2] += basisZ[i]*splineCoeffZ;
-                     ++i;
-                  }
-               }
+            matrix.m[2][2] = 1;
+
+            i = 0;
+            for (b = -1; b < 2; b++) {
+                for (a = -1; a < 2; a++) {
+                    index = (y + b) * splineControlPoint->nx + x + a;
+                    splineCoeffX = splinePtrX[index];
+                    splineCoeffY = splinePtrY[index];
+                    matrix.m[0][0] += basisX[i] * splineCoeffX;
+                    matrix.m[1][0] += basisY[i] * splineCoeffX;
+                    matrix.m[0][1] += basisX[i] * splineCoeffY;
+                    matrix.m[1][1] += basisY[i] * splineCoeffY;
+                    ++i;
+                }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
@@ -697,225 +554,196 @@ double reg_spline_approxLinearEnergyValue3D(nifti_image *splineControlPoint)
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
 
-            currentValue = 0.;
-            for(b=0; b<3; b++){
-               for(a=0; a<3; a++){
-                  currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
-               }
+            currentValue = 0;
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                }
             }
             constraintValue += currentValue;
-         }
-      }
-   }
-   return constraintValue / static_cast<double>(splineControlPoint->nvox);
+        }
+    }
+    return constraintValue / static_cast<double>(splineControlPoint->nvox);
 }
 /* *************************************************************** */
-double reg_spline_approxLinearEnergy(nifti_image *splineControlPoint)
-{
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_approxLinearEnergyValue3D<float>(splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_approxLinearEnergyValue3D<double>(splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxLinearEnergyValue3D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_approxLinearEnergyValue2D<float>(splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_approxLinearEnergyValue2D<double>(splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-double reg_spline_linearEnergyValue2D(nifti_image *referenceImage,
-                                      nifti_image *splineControlPoint)
-{
-   size_t voxelNumber = (size_t)referenceImage->nx *
-         referenceImage->ny;
-   int a, b, x, y, index, xPre, yPre;
-   DTYPE basis;
-
-
-   DTYPE gridVoxelSpacing[2] ={
-      gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx,
-      gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy
-   };
-
-   double constraintValue = 0.;
-   double currentValue;
-
-   // Create pointers to the spline coefficients
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE splineCoeffX, splineCoeffY;
-
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[4], basisY[4];
-   DTYPE firstX[4], firstY[4];
-
-   mat33 matrix, R;
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int a, b, c, x, y, z, i, index;
 
+    double constraintValue = 0;
+    double currentValue;
 
-   for(y=0; y<referenceImage->ny; ++y){
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
 
-      yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-      basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-      if(basis<0) basis=0; //rounding error
-      get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[27], basisY[27], basisZ[27];
+    set_first_order_basis_values(basisX, basisY, basisZ);
 
-      for(x=0; x<referenceImage->nx; ++x){
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
+    DTYPE splineCoeffZ;
 
-         xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-         basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-         if(basis<0) basis=0; //rounding error
-         get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+    mat33 matrix, R;
 
-         memset(&matrix, 0, sizeof(mat33));
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
 
-         for(b=0; b<4; b++){
-            for(a=0; a<4; a++){
-               index = (yPre+b)*splineControlPoint->nx+xPre+a;
-               splineCoeffX = splinePtrX[index];
-               splineCoeffY = splinePtrY[index];
-
-               matrix.m[0][0] += firstX[a]*basisY[b]*splineCoeffX;
-               matrix.m[1][0] += basisX[a]*firstY[b]*splineCoeffX;
-
-               matrix.m[0][1] += firstX[a]*basisY[b]*splineCoeffY;
-               matrix.m[1][1] += basisX[a]*firstY[b]*splineCoeffY;
-            }
-         }
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-
-         currentValue = 0.;
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+    shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \
+    basisX, basisY, basisZ, reorientation) \
+    private(x, y, z, a, b, c, i, index, matrix, R, \
+    splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \
+    reduction(+:constraintValue)
+#endif
+    for (z = 1; z < splineControlPoint->nz - 1; ++z) {
+        for (y = 1; y < splineControlPoint->ny - 1; ++y) {
+            for (x = 1; x < splineControlPoint->nx - 1; ++x) {
+                memset(&matrix, 0, sizeof(mat33));
+
+                i = 0;
+                for (c = -1; c < 2; c++) {
+                    for (b = -1; b < 2; b++) {
+                        for (a = -1; a < 2; a++) {
+                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            splineCoeffX = splinePtrX[index];
+                            splineCoeffY = splinePtrY[index];
+                            splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += basisX[i] * splineCoeffX;
+                            matrix.m[1][0] += basisY[i] * splineCoeffX;
+                            matrix.m[2][0] += basisZ[i] * splineCoeffX;
+
+                            matrix.m[0][1] += basisX[i] * splineCoeffY;
+                            matrix.m[1][1] += basisY[i] * splineCoeffY;
+                            matrix.m[2][1] += basisZ[i] * splineCoeffY;
+
+                            matrix.m[0][2] += basisX[i] * splineCoeffZ;
+                            matrix.m[1][2] += basisY[i] * splineCoeffZ;
+                            matrix.m[2][2] += basisZ[i] * splineCoeffZ;
+                            ++i;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+
+                currentValue = 0;
+                for (b = 0; b < 3; b++) {
+                    for (a = 0; a < 3; a++) {
+                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    }
+                }
+                constraintValue += currentValue;
             }
-         }
-         constraintValue += currentValue;
-      }
-   }
-   return constraintValue / static_cast<double>(voxelNumber*2);
+        }
+    }
+    return constraintValue / static_cast<double>(splineControlPoint->nvox);
+}
+/* *************************************************************** */
+double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_approxLinearEnergyValue3D<float>(splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_approxLinearEnergyValue3D<double>(splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxLinearEnergyValue3D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_approxLinearEnergyValue2D<float>(splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_approxLinearEnergyValue2D<double>(splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-double reg_spline_linearEnergyValue3D(nifti_image *referenceImage,
-                                      nifti_image *splineControlPoint)
-{
-   size_t voxelNumber = (size_t)referenceImage->nx *
-         referenceImage->ny * referenceImage->nz;
-   int a, b, c, x, y, z, index, xPre, yPre, zPre;
-   DTYPE basis;
-
-
-   DTYPE gridVoxelSpacing[3] ={
-      gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx,
-      gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy,
-      gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz
-   };
-
-   double constraintValue = 0.;
-   double currentValue;
-
-   // Create pointers to the spline coefficients
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
-   DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
-
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[4], basisY[4], basisZ[4];
-   DTYPE firstX[4], firstY[4], firstZ[4];
-
-   mat33 matrix, R;
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-
-   for(z=0; z<referenceImage->nz; ++z){
-
-      zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
-      basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-      if(basis<0) basis=0; //rounding error
-      get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
-
-      for(y=0; y<referenceImage->ny; ++y){
-
-         yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-         basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-         if(basis<0) basis=0; //rounding error
-         get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
-
-         for(x=0; x<referenceImage->nx; ++x){
-
-            xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-            basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-            if(basis<0) basis=0; //rounding error
+double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
+                                      const nifti_image *splineControlPoint) {
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny);
+    int a, b, x, y, index, xPre, yPre;
+    DTYPE basis;
+
+    const DTYPE gridVoxelSpacing[2] = {
+        splineControlPoint->dx / referenceImage->dx,
+        splineControlPoint->dy / referenceImage->dy
+    };
+
+    double constraintValue = 0;
+    double currentValue;
+
+    // Create pointers to the spline coefficients
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    DTYPE splineCoeffX, splineCoeffY;
+
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[4], basisY[4];
+    DTYPE firstX[4], firstY[4];
+
+    mat33 matrix, R;
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+
+
+    for (y = 0; y < referenceImage->ny; ++y) {
+        yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
+        basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+        if (basis < 0) basis = 0; //rounding error
+        get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+
+        for (x = 0; x < referenceImage->nx; ++x) {
+            xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
+            basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+            if (basis < 0) basis = 0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
 
-            for(c=0; c<4; c++){
-               for(b=0; b<4; b++){
-                  for(a=0; a<4; a++){
-                     index = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a;
-                     splineCoeffX = splinePtrX[index];
-                     splineCoeffY = splinePtrY[index];
-                     splineCoeffZ = splinePtrZ[index];
-
-                     matrix.m[0][0] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffX;
-                     matrix.m[1][0] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffX;
-                     matrix.m[2][0] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffX;
-
-                     matrix.m[0][1] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffY;
-                     matrix.m[1][1] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffY;
-                     matrix.m[2][1] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffY;
-
-                     matrix.m[0][2] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffZ;
-                     matrix.m[1][2] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffZ;
-                     matrix.m[2][2] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffZ;
-                  }
-               }
+            for (b = 0; b < 4; b++) {
+                for (a = 0; a < 4; a++) {
+                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
+                    splineCoeffX = splinePtrX[index];
+                    splineCoeffY = splinePtrY[index];
+
+                    matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX;
+                    matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX;
+
+                    matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY;
+                    matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY;
+                }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
@@ -925,249 +753,214 @@ double reg_spline_linearEnergyValue3D(nifti_image *referenceImage,
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
 
-            currentValue = 0.;
-            for(b=0; b<3; b++){
-               for(a=0; a<3; a++){
-                  currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
-               }
+            currentValue = 0;
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                }
             }
             constraintValue += currentValue;
-         }
-      }
-   }
-   return constraintValue / static_cast<double>(voxelNumber*3);
+        }
+    }
+    return constraintValue / static_cast<double>(voxelNumber * 2);
 }
 /* *************************************************************** */
-double reg_spline_linearEnergy(nifti_image *referenceImage,
-                               nifti_image *splineControlPoint)
-{
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_linearEnergyValue3D<float>(referenceImage, splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_linearEnergyValue3D<double>(referenceImage, splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyValue3D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_linearEnergyValue2D<float>(referenceImage, splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_linearEnergyValue2D<double>(referenceImage, splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-void reg_spline_linearEnergyGradient2D(nifti_image *referenceImage,
-                                       nifti_image *splineControlPoint,
-                                       nifti_image *gradientImage,
-                                       float weight
-                                       )
-{
-   size_t voxelNumber = (size_t)referenceImage->nx *
-         referenceImage->ny;
-   int a, b, x, y, index, xPre, yPre;
-   DTYPE basis;
-
-   DTYPE gridVoxelSpacing[2] ={
-      gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx,
-      gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy
-   };
-
-   // Create pointers to the spline coefficients
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE splineCoeffX, splineCoeffY;
-
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[4], basisY[4];
-   DTYPE firstX[4], firstY[4];
-
-   mat33 matrix, R;
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber);
-   DTYPE gradValues[2];
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-   // Loop over all voxels
-   for(y=0; y<referenceImage->ny; ++y){
-
-      yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-      basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-      if(basis<0) basis=0; //rounding error
-      get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
-
-      for(x=0; x<referenceImage->nx; ++x){
-
-         xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-         basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-         if(basis<0) basis=0; //rounding error
-         get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
-
-         memset(&matrix, 0, sizeof(mat33));
-
-         for(b=0; b<4; b++){
-            for(a=0; a<4; a++){
-               index = (yPre+b)*splineControlPoint->nx+xPre+a;
-               splineCoeffX = splinePtrX[index];
-               splineCoeffY = splinePtrY[index];
-
-               matrix.m[0][0] += firstX[a]*basisY[b]*splineCoeffX;
-               matrix.m[1][0] += basisX[a]*firstY[b]*splineCoeffX;
-
-               matrix.m[0][1] += firstX[a]*basisY[b]*splineCoeffY;
-               matrix.m[1][1] += basisX[a]*firstY[b]*splineCoeffY;
+double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
+                                      const nifti_image *splineControlPoint) {
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    int a, b, c, x, y, z, index, xPre, yPre, zPre;
+    DTYPE basis;
+
+    const DTYPE gridVoxelSpacing[3] = {
+        splineControlPoint->dx / referenceImage->dx,
+        splineControlPoint->dy / referenceImage->dy,
+        splineControlPoint->dz / referenceImage->dz
+    };
+
+    double constraintValue = 0;
+    double currentValue;
+
+    // Create pointers to the spline coefficients
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
+
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[4], basisY[4], basisZ[4];
+    DTYPE firstX[4], firstY[4], firstZ[4];
+
+    mat33 matrix, R;
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+
+    for (z = 0; z < referenceImage->nz; ++z) {
+        zPre = static_cast<int>(static_cast<DTYPE>(z) / gridVoxelSpacing[2]);
+        basis = static_cast<DTYPE>(z) / gridVoxelSpacing[2] - static_cast<DTYPE>(zPre);
+        if (basis < 0) basis = 0; //rounding error
+        get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
+
+        for (y = 0; y < referenceImage->ny; ++y) {
+            yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
+            basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+            if (basis < 0) basis = 0; //rounding error
+            get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+
+            for (x = 0; x < referenceImage->nx; ++x) {
+                xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
+                basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+                if (basis < 0) basis = 0; //rounding error
+                get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+
+                memset(&matrix, 0, sizeof(mat33));
+
+                for (c = 0; c < 4; c++) {
+                    for (b = 0; b < 4; b++) {
+                        for (a = 0; a < 4; a++) {
+                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
+                            splineCoeffX = splinePtrX[index];
+                            splineCoeffY = splinePtrY[index];
+                            splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX;
+                            matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX;
+                            matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX;
+
+                            matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY;
+                            matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY;
+                            matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY;
+
+                            matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ;
+                            matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ;
+                            matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+
+                currentValue = 0;
+                for (b = 0; b < 3; b++) {
+                    for (a = 0; a < 3; a++) {
+                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    }
+                }
+                constraintValue += currentValue;
             }
-         }
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-         for(b=0; b<4; b++){
-            for(a=0; a<4; a++){
-               index = (yPre+b)*splineControlPoint->nx+xPre+a;
-               gradValues[0] = -2.0*matrix.m[0][0] *
-                     firstX[3-a]*basisY[3-b];
-               gradValues[1] = -2.0*matrix.m[1][1] *
-                     basisX[3-a]*firstY[3-b];
-               gradientXPtr[index] += approxRatio *
-                     ( inv_reorientation.m[0][0]*gradValues[0]
-                     + inv_reorientation.m[0][1]*gradValues[1]);
-               gradientYPtr[index] += approxRatio *
-                     ( inv_reorientation.m[1][0]*gradValues[0]
-                     + inv_reorientation.m[1][1]*gradValues[1]);
-            } // a
-         } // b
-      }
-   }
-   return;
+        }
+    }
+    return constraintValue / static_cast<double>(voxelNumber * 3);
+}
+/* *************************************************************** */
+double reg_spline_linearEnergy(const nifti_image *referenceImage,
+                               const nifti_image *splineControlPoint) {
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_linearEnergyValue3D<float>(referenceImage, splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_linearEnergyValue3D<double>(referenceImage, splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyValue3D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_linearEnergyValue2D<float>(referenceImage, splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_linearEnergyValue2D<double>(referenceImage, splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage,
-                                       nifti_image *splineControlPoint,
+void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
+                                       const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
-                                       float weight
-                                       )
-{
-   size_t voxelNumber = (size_t)referenceImage->nx *
-         referenceImage->ny * referenceImage->nz;
-   int a, b, c, x, y, z, index, xPre, yPre, zPre;
-   DTYPE basis;
-
-
-   DTYPE gridVoxelSpacing[3] ={
-      gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx,
-      gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy,
-      gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz
-   };
-
-   // Create pointers to the spline coefficients
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
-   DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
-
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[4], basisY[4], basisZ[4];
-   DTYPE firstX[4], firstY[4], firstZ[4];
-
-   mat33 matrix, R;
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-   DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber);
-   DTYPE gradValues[3];
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-   // Loop over all voxels
-   for(z=0; z<referenceImage->nz; ++z){
-
-      zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
-      basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
-      if(basis<0) basis=0; //rounding error
-      get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
-
-      for(y=0; y<referenceImage->ny; ++y){
-
-         yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-         basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
-         if(basis<0) basis=0; //rounding error
-         get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
-
-         for(x=0; x<referenceImage->nx; ++x){
-
-            xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-            basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
-            if(basis<0) basis=0; //rounding error
+                                       float weight) {
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny);
+    int a, b, x, y, index, xPre, yPre;
+    DTYPE basis;
+
+    const DTYPE gridVoxelSpacing[2] = {
+        splineControlPoint->dx / referenceImage->dx,
+        splineControlPoint->dy / referenceImage->dy
+    };
+
+    // Create pointers to the spline coefficients
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    DTYPE splineCoeffX, splineCoeffY;
+
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[4], basisY[4];
+    DTYPE firstX[4], firstY[4];
+
+    mat33 matrix, R;
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
+    DTYPE gradValues[2];
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+
+    // Loop over all voxels
+    for (y = 0; y < referenceImage->ny; ++y) {
+        yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
+        basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+        if (basis < 0) basis = 0; //rounding error
+        get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+
+        for (x = 0; x < referenceImage->nx; ++x) {
+            xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
+            basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+            if (basis < 0) basis = 0; //rounding error
             get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
 
-            for(c=0; c<4; c++){
-               for(b=0; b<4; b++){
-                  for(a=0; a<4; a++){
-                     index = ((zPre+c)*splineControlPoint->ny+yPre+b) *
-                           splineControlPoint->nx+xPre+a;
-                     splineCoeffX = splinePtrX[index];
-                     splineCoeffY = splinePtrY[index];
-                     splineCoeffZ = splinePtrZ[index];
-
-                     matrix.m[0][0] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffX;
-                     matrix.m[1][0] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffX;
-                     matrix.m[2][0] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffX;
-
-                     matrix.m[0][1] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffY;
-                     matrix.m[1][1] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffY;
-                     matrix.m[2][1] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffY;
-
-                     matrix.m[0][2] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffZ;
-                     matrix.m[1][2] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffZ;
-                     matrix.m[2][2] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffZ;
-                  }
-               }
+            for (b = 0; b < 4; b++) {
+                for (a = 0; a < 4; a++) {
+                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
+                    splineCoeffX = splinePtrX[index];
+                    splineCoeffY = splinePtrY[index];
+
+                    matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX;
+                    matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX;
+
+                    matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY;
+                    matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY;
+                }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
@@ -1177,266 +970,241 @@ void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage,
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
-            for(c=0; c<4; c++){
-               for(b=0; b<4; b++){
-                  for(a=0; a<4; a++){
-                     index = ((zPre+c)*splineControlPoint->ny+yPre+b) *
-                           splineControlPoint->nx+xPre+a;
-                     gradValues[0] = -2.0*matrix.m[0][0] *
-                           firstX[3-a]*basisY[3-b]*basisZ[3-c];
-                     gradValues[1] = -2.0*matrix.m[1][1] *
-                           basisX[3-a]*firstY[3-b]*basisZ[3-c];
-                     gradValues[2] = -2.0*matrix.m[2][2] *
-                           basisX[3-a]*basisY[3-b]*firstZ[3-c];
-                     gradientXPtr[index] += approxRatio *
-                           ( inv_reorientation.m[0][0]*gradValues[0]
-                           + inv_reorientation.m[0][1]*gradValues[1]
-                           + inv_reorientation.m[0][2]*gradValues[2]);
-                     gradientYPtr[index] += approxRatio *
-                           ( inv_reorientation.m[1][0]*gradValues[0]
-                           + inv_reorientation.m[1][1]*gradValues[1]
-                           + inv_reorientation.m[1][2]*gradValues[2]);
-                     gradientZPtr[index] += approxRatio *
-                           ( inv_reorientation.m[2][0]*gradValues[0]
-                           + inv_reorientation.m[2][1]*gradValues[1]
-                           + inv_reorientation.m[2][2]*gradValues[2]);
-                  } // a
-               } // b
-            } // c
-         } // x
-      } // y
-   } // z
-   return;
+            for (b = 0; b < 4; b++) {
+                for (a = 0; a < 4; a++) {
+                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
+                    gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b];
+                    gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b];
+                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                          inv_reorientation.m[0][1] * gradValues[1]);
+                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                          inv_reorientation.m[1][1] * gradValues[1]);
+                } // a
+            } // b
+        }
+    }
 }
 /* *************************************************************** */
-void reg_spline_linearEnergyGradient(nifti_image *referenceImage,
-                                     nifti_image *splineControlPoint,
-                                     nifti_image *gradientImage,
-                                     float weight
-                                     )
-{
-   if(splineControlPoint->datatype != gradientImage->datatype)
-   {
-      reg_print_fct_error("reg_spline_linearEnergyGradient");
-      reg_print_msg_error("Input images are expected to have the same datatype");
-      reg_exit();
-   }
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_linearEnergyGradient3D<float>
-               (referenceImage, splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_linearEnergyGradient3D<double>
-               (referenceImage, splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyGradient3D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_linearEnergyGradient2D<float>
-               (referenceImage, splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_linearEnergyGradient2D<double>
-               (referenceImage, splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyGradient2D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
+template <class DTYPE>
+void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
+                                       const nifti_image *splineControlPoint,
+                                       nifti_image *gradientImage,
+                                       float weight) {
+    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    int a, b, c, x, y, z, index, xPre, yPre, zPre;
+    DTYPE basis;
+
+    const DTYPE gridVoxelSpacing[3] = {
+        splineControlPoint->dx / referenceImage->dx,
+        splineControlPoint->dy / referenceImage->dy,
+        splineControlPoint->dz / referenceImage->dz
+    };
+
+    // Create pointers to the spline coefficients
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
+
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[4], basisY[4], basisZ[4];
+    DTYPE firstX[4], firstY[4], firstZ[4];
+
+    mat33 matrix, R;
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
+    DTYPE gradValues[3];
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+
+    // Loop over all voxels
+    for (z = 0; z < referenceImage->nz; ++z) {
+        zPre = static_cast<int>(static_cast<DTYPE>(z) / gridVoxelSpacing[2]);
+        basis = static_cast<DTYPE>(z) / gridVoxelSpacing[2] - static_cast<DTYPE>(zPre);
+        if (basis < 0) basis = 0; //rounding error
+        get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
+
+        for (y = 0; y < referenceImage->ny; ++y) {
+            yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
+            basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+            if (basis < 0) basis = 0; //rounding error
+            get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+
+            for (x = 0; x < referenceImage->nx; ++x) {
+                xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
+                basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+                if (basis < 0) basis = 0; //rounding error
+                get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+
+                memset(&matrix, 0, sizeof(mat33));
+
+                for (c = 0; c < 4; c++) {
+                    for (b = 0; b < 4; b++) {
+                        for (a = 0; a < 4; a++) {
+                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
+                            splineCoeffX = splinePtrX[index];
+                            splineCoeffY = splinePtrY[index];
+                            splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX;
+                            matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX;
+                            matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX;
+
+                            matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY;
+                            matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY;
+                            matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY;
+
+                            matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ;
+                            matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ;
+                            matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+                for (c = 0; c < 4; c++) {
+                    for (b = 0; b < 4; b++) {
+                        for (a = 0; a < 4; a++) {
+                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
+                            gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c];
+                            gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c];
+                            gradValues[2] = -2.0 * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c];
+                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                                  inv_reorientation.m[0][1] * gradValues[1] +
+                                                                  inv_reorientation.m[0][2] * gradValues[2]);
+                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                                  inv_reorientation.m[1][1] * gradValues[1] +
+                                                                  inv_reorientation.m[1][2] * gradValues[2]);
+                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
+                                                                  inv_reorientation.m[2][1] * gradValues[1] +
+                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                        } // a
+                    } // b
+                } // c
+            } // x
+        } // y
+    } // z
 }
 /* *************************************************************** */
+void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
+                                     const nifti_image *splineControlPoint,
+                                     nifti_image *gradientImage,
+                                     float weight) {
+    if (splineControlPoint->datatype != gradientImage->datatype) {
+        reg_print_fct_error("reg_spline_linearEnergyGradient");
+        reg_print_msg_error("Input images are expected to have the same datatype");
+        reg_exit();
+    }
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_linearEnergyGradient3D<float>(referenceImage, splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_linearEnergyGradient3D<double>(referenceImage, splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyGradient3D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_linearEnergyGradient2D<float>(referenceImage, splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_linearEnergyGradient2D<double>(referenceImage, splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyGradient2D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-void reg_spline_approxLinearEnergyGradient2D(nifti_image *splineControlPoint,
+void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
-                                             float weight
-                                             )
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*
-         splineControlPoint->ny;
-   int x, y, a, b, i, index;
+                                             float weight) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    int x, y, a, b, i, index;
 
-   // Create pointers to the spline coefficients
-   DTYPE * splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE * splinePtrY = &splinePtrX[nodeNumber];
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
 
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[9];
-   DTYPE basisY[9];
-   set_first_order_basis_values(basisX, basisY);
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[9];
+    DTYPE basisY[9];
+    set_first_order_basis_values(basisX, basisY);
 
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
 
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
 
-   mat33 matrix, R;
+    mat33 matrix, R;
 
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
 
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber);
-   DTYPE gradValues[2];
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
+    DTYPE gradValues[2];
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(splineControlPoint, splinePtrX, splinePtrY, \
-   basisX, basisY, reorientation, inv_reorientation, \
-   gradientXPtr, gradientYPtr, approxRatio) \
-   private(x, y, a, b, i, index, gradValues, \
-   splineCoeffX, splineCoeffY, matrix, R)
-#endif
-   for(y=1; y<splineControlPoint->ny-1; y++)
-   {
-      for(x=1; x<splineControlPoint->nx-1; x++)
-      {
-         memset(&matrix, 0, sizeof(mat33));
-         matrix.m[2][2]=1.f;
-
-         i=0;
-         for(b=-1; b<2; b++){
-            for(a=-1; a<2; a++){
-               index = (y+b)*splineControlPoint->nx+x+a;
-               splineCoeffX = splinePtrX[index];
-               splineCoeffY = splinePtrY[index];
-
-               matrix.m[0][0] += basisX[i]*splineCoeffX;
-               matrix.m[1][0] += basisY[i]*splineCoeffX;
-
-               matrix.m[0][1] += basisX[i]*splineCoeffY;
-               matrix.m[1][1] += basisY[i]*splineCoeffY;
-               ++i;
-            } // a
-         } // b
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-         i=8;
-         for(b=-1; b<2; b++){
-            for(a=-1; a<2; a++){
-               index=(y+b)*splineControlPoint->nx+x+a;
-               gradValues[0] = -2.0*matrix.m[0][0]*basisX[i];
-               gradValues[1] = -2.0*matrix.m[1][1]*basisY[i];
-
-#ifdef _OPENMP
-               #pragma omp atomic
+    shared(splineControlPoint, splinePtrX, splinePtrY, \
+    basisX, basisY, reorientation, inv_reorientation, \
+    gradientXPtr, gradientYPtr, approxRatio) \
+    private(x, y, a, b, i, index, gradValues, \
+    splineCoeffX, splineCoeffY, matrix, R)
 #endif
-               gradientXPtr[index] += approxRatio *
-                     ( inv_reorientation.m[0][0]*gradValues[0]
-                     + inv_reorientation.m[0][1]*gradValues[1]);
-#ifdef _OPENMP
-               #pragma omp atomic
-#endif
-               gradientYPtr[index] += approxRatio *
-                     ( inv_reorientation.m[1][0]*gradValues[0]
-                     + inv_reorientation.m[1][1]*gradValues[1]);
-               --i;
-            } // a
-         } // b
-      } // x
-   } // y
-
-   return;
-}
-/* *************************************************************** */
-template <class DTYPE>
-void reg_spline_approxLinearEnergyGradient3D(nifti_image *splineControlPoint,
-                                             nifti_image *gradientImage,
-                                             float weight
-                                             )
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*
-         splineControlPoint->ny*splineControlPoint->nz;
-   int x, y, z, a, b, c, i, index;
-
-   // Create pointers to the spline coefficients
-   DTYPE * splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE * splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE * splinePtrZ = &splinePtrY[nodeNumber];
-
-   // Store the basis values since they are constant as the value is approximated
-   // at the control point positions only
-   DTYPE basisX[27];
-   DTYPE basisY[27];
-   DTYPE basisZ[27];
-   set_first_order_basis_values(basisX, basisY, basisZ);
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-   DTYPE splineCoeffX;
-   DTYPE splineCoeffY;
-   DTYPE splineCoeffZ;
-
-   mat33 matrix, R;
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-   DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber);
-   DTYPE gradValues[3];
-
-   for(z=1; z<splineControlPoint->nz-1; z++)
-   {
-      for(y=1; y<splineControlPoint->ny-1; y++)
-      {
-         for(x=1; x<splineControlPoint->nx-1; x++)
-         {
+    for (y = 1; y < splineControlPoint->ny - 1; y++) {
+        for (x = 1; x < splineControlPoint->nx - 1; x++) {
             memset(&matrix, 0, sizeof(mat33));
-
-            i=0;
-            for(c=-1; c<2; c++){
-               for(b=-1; b<2; b++){
-                  for(a=-1; a<2; a++){
-                     index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a;
-                     splineCoeffX = splinePtrX[index];
-                     splineCoeffY = splinePtrY[index];
-                     splineCoeffZ = splinePtrZ[index];
-
-                     matrix.m[0][0] += basisX[i]*splineCoeffX;
-                     matrix.m[1][0] += basisY[i]*splineCoeffX;
-                     matrix.m[2][0] += basisZ[i]*splineCoeffX;
-
-                     matrix.m[0][1] += basisX[i]*splineCoeffY;
-                     matrix.m[1][1] += basisY[i]*splineCoeffY;
-                     matrix.m[2][1] += basisZ[i]*splineCoeffY;
-
-                     matrix.m[0][2] += basisX[i]*splineCoeffZ;
-                     matrix.m[1][2] += basisY[i]*splineCoeffZ;
-                     matrix.m[2][2] += basisZ[i]*splineCoeffZ;
-                     ++i;
-                  }
-               }
-            }
+            matrix.m[2][2] = 1;
+
+            i = 0;
+            for (b = -1; b < 2; b++) {
+                for (a = -1; a < 2; a++) {
+                    index = (y + b) * splineControlPoint->nx + x + a;
+                    splineCoeffX = splinePtrX[index];
+                    splineCoeffY = splinePtrY[index];
+
+                    matrix.m[0][0] += basisX[i] * splineCoeffX;
+                    matrix.m[1][0] += basisY[i] * splineCoeffX;
+
+                    matrix.m[0][1] += basisX[i] * splineCoeffY;
+                    matrix.m[1][1] += basisY[i] * splineCoeffY;
+                    ++i;
+                } // a
+            } // b
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
@@ -1445,209 +1213,214 @@ void reg_spline_approxLinearEnergyGradient3D(nifti_image *splineControlPoint,
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
-            i=26;
-            for(c=-1; c<2; c++){
-               for(b=-1; b<2; b++){
-                  for(a=-1; a<2; a++){
-                     index=((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a;
-                     gradValues[0] = -2.0*matrix.m[0][0]*basisX[i];
-                     gradValues[1] = -2.0*matrix.m[1][1]*basisY[i];
-                     gradValues[2] = -2.0*matrix.m[2][2]*basisZ[i];
-
-                     gradientXPtr[index] += approxRatio *
-                           ( inv_reorientation.m[0][0]*gradValues[0]
-                           + inv_reorientation.m[0][1]*gradValues[1]
-                           + inv_reorientation.m[0][2]*gradValues[2]);
-
-                     gradientYPtr[index] += approxRatio *
-                           ( inv_reorientation.m[1][0]*gradValues[0]
-                           + inv_reorientation.m[1][1]*gradValues[1]
-                           + inv_reorientation.m[1][2]*gradValues[2]);
-
-                     gradientZPtr[index] += approxRatio *
-                           ( inv_reorientation.m[2][0]*gradValues[0]
-                           + inv_reorientation.m[2][1]*gradValues[1]
-                           + inv_reorientation.m[2][2]*gradValues[2]);
-                     --i;
-                  } // a
-               } // b
-            } // c
-         } // x
-      } // y
-   } // z
-   return;
+            i = 8;
+            for (b = -1; b < 2; b++) {
+                for (a = -1; a < 2; a++) {
+                    index = (y + b) * splineControlPoint->nx + x + a;
+                    gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i];
+                    gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i];
+
+#ifdef _OPENMP
+#pragma omp atomic
+#endif
+                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                          inv_reorientation.m[0][1] * gradValues[1]);
+#ifdef _OPENMP
+#pragma omp atomic
+#endif
+                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                          inv_reorientation.m[1][1] * gradValues[1]);
+                    --i;
+                } // a
+            } // b
+        } // x
+    } // y
 }
 /* *************************************************************** */
-void reg_spline_approxLinearEnergyGradient(nifti_image *splineControlPoint,
-                                           nifti_image *gradientImage,
-                                           float weight
-                                           )
-{
-   if(splineControlPoint->datatype != gradientImage->datatype)
-   {
-      reg_print_fct_error("reg_spline_linearEnergyGradient");
-      reg_print_msg_error("Input images are expected to have the same datatype");
-      reg_exit();
-   }
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_approxLinearEnergyGradient3D<float>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_approxLinearEnergyGradient3D<double>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyGradient");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_approxLinearEnergyGradient2D<float>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_approxLinearEnergyGradient2D<double>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyGradient");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
+template <class DTYPE>
+void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
+                                             nifti_image *gradientImage,
+                                             float weight) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int x, y, z, a, b, c, i, index;
+
+    // Create pointers to the spline coefficients
+    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DTYPE basisX[27];
+    DTYPE basisY[27];
+    DTYPE basisZ[27];
+    set_first_order_basis_values(basisX, basisY, basisZ);
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (splineControlPoint->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+
+    DTYPE splineCoeffX;
+    DTYPE splineCoeffY;
+    DTYPE splineCoeffZ;
+
+    mat33 matrix, R;
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber);
+    DTYPE gradValues[3];
+
+    for (z = 1; z < splineControlPoint->nz - 1; z++) {
+        for (y = 1; y < splineControlPoint->ny - 1; y++) {
+            for (x = 1; x < splineControlPoint->nx - 1; x++) {
+                memset(&matrix, 0, sizeof(mat33));
+
+                i = 0;
+                for (c = -1; c < 2; c++) {
+                    for (b = -1; b < 2; b++) {
+                        for (a = -1; a < 2; a++) {
+                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            splineCoeffX = splinePtrX[index];
+                            splineCoeffY = splinePtrY[index];
+                            splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += basisX[i] * splineCoeffX;
+                            matrix.m[1][0] += basisY[i] * splineCoeffX;
+                            matrix.m[2][0] += basisZ[i] * splineCoeffX;
+
+                            matrix.m[0][1] += basisX[i] * splineCoeffY;
+                            matrix.m[1][1] += basisY[i] * splineCoeffY;
+                            matrix.m[2][1] += basisZ[i] * splineCoeffY;
+
+                            matrix.m[0][2] += basisX[i] * splineCoeffZ;
+                            matrix.m[1][2] += basisY[i] * splineCoeffZ;
+                            matrix.m[2][2] += basisZ[i] * splineCoeffZ;
+                            ++i;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+                i = 26;
+                for (c = -1; c < 2; c++) {
+                    for (b = -1; b < 2; b++) {
+                        for (a = -1; a < 2; a++) {
+                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i];
+                            gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i];
+                            gradValues[2] = -2.0 * matrix.m[2][2] * basisZ[i];
+
+                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                                  inv_reorientation.m[0][1] * gradValues[1] +
+                                                                  inv_reorientation.m[0][2] * gradValues[2]);
+
+                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                                  inv_reorientation.m[1][1] * gradValues[1] +
+                                                                  inv_reorientation.m[1][2] * gradValues[2]);
+
+                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
+                                                                  inv_reorientation.m[2][1] * gradValues[1] +
+                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                            --i;
+                        } // a
+                    } // b
+                } // c
+            } // x
+        } // y
+    } // z
 }
 /* *************************************************************** */
-/* *************************************************************** */
-template <class DTYPE>
-double reg_defField_linearEnergyValue2D(nifti_image *deformationField)
-{
-   size_t voxelNumber = (size_t)deformationField->nx *
-         deformationField->ny;
-   int a, b, x, y, X, Y, index;
-   DTYPE basis[2]={1,0};
-   DTYPE first[2]={-1,1};
-
-   double constraintValue = 0.;
-   double currentValue;
-
-   // Create pointers to the deformation field
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[voxelNumber];
-   DTYPE defX, defY;
-
-   mat33 matrix, R;
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(deformationField->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-
-   for(y=0; y<deformationField->ny; ++y){
-      Y=(y!=deformationField->ny-1)?y:y-1;
-      for(x=0; x<deformationField->nx; ++x){
-         X=(x!=deformationField->nx-1)?x:x-1;
-
-         memset(&matrix, 0, sizeof(mat33));
-
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               index = (Y+b)*deformationField->nx+X+a;
-               defX = defPtrX[index];
-               defY = defPtrY[index];
-
-               matrix.m[0][0] += first[a]*basis[b]*defX;
-               matrix.m[1][0] += basis[a]*first[b]*defX;
-               matrix.m[0][1] += first[a]*basis[b]*defY;
-               matrix.m[1][1] += basis[a]*first[b]*defY;
-            }
-         }
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-
-         currentValue = 0.;
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
-            }
-         }
-         constraintValue += currentValue;
-      }
-   }
-   return constraintValue / static_cast<double>(deformationField->nvox);
+void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint,
+                                           nifti_image *gradientImage,
+                                           float weight) {
+    if (splineControlPoint->datatype != gradientImage->datatype) {
+        reg_print_fct_error("reg_spline_linearEnergyGradient");
+        reg_print_msg_error("Input images are expected to have the same datatype");
+        reg_exit();
+    }
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_approxLinearEnergyGradient3D<float>(splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_approxLinearEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyGradient");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_approxLinearEnergyGradient2D<float>(splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_approxLinearEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyGradient");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-double reg_defField_linearEnergyValue3D(nifti_image *deformationField)
-{
-   size_t voxelNumber = (size_t)deformationField->nx *
-         deformationField->ny * deformationField->nz;
-   int a, b, c, x, y, z, X, Y, Z, index;
-   DTYPE basis[2]={1,0};
-   DTYPE first[2]={-1,1};
-
-   double constraintValue = 0.;
-   double currentValue;
-
-   // Create pointers to the deformation field
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[voxelNumber];
-   DTYPE *defPtrZ = &defPtrY[voxelNumber];
-   DTYPE defX, defY, defZ;
-
-   mat33 matrix, R;
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(deformationField->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-
-   for(z=0; z<deformationField->nz; ++z){
-      Z=(z!=deformationField->nz-1)?z:z-1;
-      for(y=0; y<deformationField->ny; ++y){
-         Y=(y!=deformationField->ny-1)?y:y-1;
-         for(x=0; x<deformationField->nx; ++x){
-            X=(x!=deformationField->nx-1)?x:x-1;
+double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
+    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny);
+    int a, b, x, y, X, Y, index;
+    DTYPE basis[2] = {1, 0};
+    DTYPE first[2] = {-1, 1};
+
+    double constraintValue = 0;
+    double currentValue;
+
+    // Create pointers to the deformation field
+    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
+    const DTYPE *defPtrY = &defPtrX[voxelNumber];
+    DTYPE defX, defY;
+
+    mat33 matrix, R;
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (deformationField->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
+
+    for (y = 0; y < deformationField->ny; ++y) {
+        Y = (y != deformationField->ny - 1) ? y : y - 1;
+        for (x = 0; x < deformationField->nx; ++x) {
+            X = (x != deformationField->nx - 1) ? x : x - 1;
 
             memset(&matrix, 0, sizeof(mat33));
 
-            for(c=0; c<2; c++){
-               for(b=0; b<2; b++){
-                  for(a=0; a<2; a++){
-                     index = ((Z+c)*deformationField->ny+Y+b)*deformationField->nx+X+a;
-                     defX = defPtrX[index];
-                     defY = defPtrY[index];
-                     defZ = defPtrZ[index];
-
-                     matrix.m[0][0] += first[a]*basis[b]*basis[c]*defX;
-                     matrix.m[1][0] += basis[a]*first[b]*basis[c]*defX;
-                     matrix.m[2][0] += basis[a]*basis[b]*first[c]*defX;
-
-                     matrix.m[0][1] += first[a]*basis[b]*basis[c]*defY;
-                     matrix.m[1][1] += basis[a]*first[b]*basis[c]*defY;
-                     matrix.m[2][1] += basis[a]*basis[b]*first[c]*defY;
-
-                     matrix.m[0][2] += first[a]*basis[b]*basis[c]*defZ;
-                     matrix.m[1][2] += basis[a]*first[b]*basis[c]*defZ;
-                     matrix.m[2][2] += basis[a]*basis[b]*first[c]*defZ;
-                  }
-               }
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    index = (Y + b) * deformationField->nx + X + a;
+                    defX = defPtrX[index];
+                    defY = defPtrY[index];
+
+                    matrix.m[0][0] += first[a] * basis[b] * defX;
+                    matrix.m[1][0] += basis[a] * first[b] * defX;
+                    matrix.m[0][1] += first[a] * basis[b] * defY;
+                    matrix.m[1][1] += basis[a] * first[b] * defY;
+                }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
@@ -1657,193 +1430,170 @@ double reg_defField_linearEnergyValue3D(nifti_image *deformationField)
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
 
-            currentValue = 0.;
-            for(b=0; b<3; b++){
-               for(a=0; a<3; a++){
-                  currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part
-               }
+            currentValue = 0;
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                }
             }
             constraintValue += currentValue;
-         }
-      }
-   }
-   return constraintValue / static_cast<double>(deformationField->nvox);
-}
-/* *************************************************************** */
-double reg_defField_linearEnergy(nifti_image *deformationField)
-{
-   if(deformationField->nz>1){
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_defField_linearEnergyValue3D<float>(deformationField);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_defField_linearEnergyValue3D<double>(deformationField);
-      default:
-         reg_print_fct_error("reg_defField_linearEnergyValue3D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_defField_linearEnergyValue2D<float>(deformationField);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_defField_linearEnergyValue2D<double>(deformationField);
-      default:
-         reg_print_fct_error("reg_defField_linearEnergyValue2D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
+        }
+    }
+    return constraintValue / static_cast<double>(deformationField->nvox);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-void reg_defField_linearEnergyGradient2D(nifti_image *deformationField,
-                                         nifti_image *gradientImage,
-                                         float weight)
-{
-   size_t voxelNumber = (size_t)deformationField->nx *
-         deformationField->ny;
-   int a, b, x, y, X, Y, index;
-   DTYPE basis[2]={1,0};
-   DTYPE first[2]={-1,1};
-
-   // Create pointers to the deformation field
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[voxelNumber];
-   DTYPE defX, defY;
-
-   mat33 matrix, R;
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber);
-   DTYPE gradValues[2];
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(deformationField->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-   for(y=0; y<deformationField->ny; ++y){
-      Y=(y!=deformationField->ny-1)?y:y-1;
-      for(x=0; x<deformationField->nx; ++x){
-         X=(x!=deformationField->nx-1)?x:x-1;
-
-         memset(&matrix, 0, sizeof(mat33));
-
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               index = (Y+b)*deformationField->nx+X+a;
-               defX = defPtrX[index];
-               defY = defPtrY[index];
-
-               matrix.m[0][0] += first[a]*basis[b]*defX;
-               matrix.m[1][0] += basis[a]*first[b]*defX;
-               matrix.m[0][1] += first[a]*basis[b]*defY;
-               matrix.m[1][1] += basis[a]*first[b]*defY;
+double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
+    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz);
+    int a, b, c, x, y, z, X, Y, Z, index;
+    DTYPE basis[2] = {1, 0};
+    DTYPE first[2] = {-1, 1};
+
+    double constraintValue = 0;
+    double currentValue;
+
+    // Create pointers to the deformation field
+    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
+    const DTYPE *defPtrY = &defPtrX[voxelNumber];
+    const DTYPE *defPtrZ = &defPtrY[voxelNumber];
+    DTYPE defX, defY, defZ;
+
+    mat33 matrix, R;
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (deformationField->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
+
+    for (z = 0; z < deformationField->nz; ++z) {
+        Z = (z != deformationField->nz - 1) ? z : z - 1;
+        for (y = 0; y < deformationField->ny; ++y) {
+            Y = (y != deformationField->ny - 1) ? y : y - 1;
+            for (x = 0; x < deformationField->nx; ++x) {
+                X = (x != deformationField->nx - 1) ? x : x - 1;
+
+                memset(&matrix, 0, sizeof(mat33));
+
+                for (c = 0; c < 2; c++) {
+                    for (b = 0; b < 2; b++) {
+                        for (a = 0; a < 2; a++) {
+                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
+                            defX = defPtrX[index];
+                            defY = defPtrY[index];
+                            defZ = defPtrZ[index];
+
+                            matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX;
+                            matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX;
+                            matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX;
+
+                            matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY;
+                            matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY;
+                            matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY;
+
+                            matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ;
+                            matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ;
+                            matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+
+                currentValue = 0;
+                for (b = 0; b < 3; b++) {
+                    for (a = 0; a < 3; a++) {
+                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    }
+                }
+                constraintValue += currentValue;
             }
-         }
-         // Convert from mm to voxel
-         matrix = nifti_mat33_mul(reorientation, matrix);
-         // Removing the rotation component
-         R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-         matrix = nifti_mat33_mul(R, matrix);
-         // Convert to displacement
-         --matrix.m[0][0];
-         --matrix.m[1][1];
-
-         for(b=0; b<2; b++){
-            for(a=0; a<2; a++){
-               index = (Y+b)*deformationField->nx+X+a;
-               gradValues[0] = -2.0*matrix.m[0][0] *
-                     first[1-a]*basis[1-b];
-               gradValues[1] = -2.0*matrix.m[1][1] *
-                     basis[1-a]*first[1-b];
-               gradientXPtr[index] += approxRatio *
-                     ( inv_reorientation.m[0][0]*gradValues[0]
-                     + inv_reorientation.m[0][1]*gradValues[1]);
-               gradientYPtr[index] += approxRatio *
-                     ( inv_reorientation.m[1][0]*gradValues[0]
-                     + inv_reorientation.m[1][1]*gradValues[1]);
-            } // a
-         } // b
-      }
-   }
+        }
+    }
+    return constraintValue / static_cast<double>(deformationField->nvox);
+}
+/* *************************************************************** */
+double reg_defField_linearEnergy(const nifti_image *deformationField) {
+    if (deformationField->nz > 1) {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_defField_linearEnergyValue3D<float>(deformationField);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_defField_linearEnergyValue3D<double>(deformationField);
+        default:
+            reg_print_fct_error("reg_defField_linearEnergyValue3D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_defField_linearEnergyValue2D<float>(deformationField);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_defField_linearEnergyValue2D<double>(deformationField);
+        default:
+            reg_print_fct_error("reg_defField_linearEnergyValue2D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-void reg_defField_linearEnergyGradient3D(nifti_image *deformationField,
+void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
-                                         float weight)
-{
-   size_t voxelNumber = (size_t)deformationField->nx *
-         deformationField->ny * deformationField->nz;
-   int a, b, c, x, y, z, X, Y, Z, index;
-   DTYPE basis[2]={1,0};
-   DTYPE first[2]={-1,1};
-
-   // Create pointers to the deformation field
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[voxelNumber];
-   DTYPE *defPtrZ = &defPtrY[voxelNumber];
-   DTYPE defX, defY, defZ;
-
-   mat33 matrix, R;
-
-   DTYPE *gradientXPtr = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
-   DTYPE *gradientZPtr = &gradientYPtr[voxelNumber];
-
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber);
-   DTYPE gradValues[3];
-
-   // Matrix to use to convert the gradient from mm to voxel
-   mat33 reorientation;
-   if(deformationField->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-   mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-   for(z=0; z<deformationField->nz; ++z){
-      Z=(z!=deformationField->nz-1)?z:z-1;
-      for(y=0; y<deformationField->ny; ++y){
-         Y=(y!=deformationField->ny-1)?y:y-1;
-         for(x=0; x<deformationField->nx; ++x){
-            X=(x!=deformationField->nx-1)?x:x-1;
+                                         float weight) {
+    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny);
+    int a, b, x, y, X, Y, index;
+    DTYPE basis[2] = {1, 0};
+    DTYPE first[2] = {-1, 1};
+
+    // Create pointers to the deformation field
+    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
+    const DTYPE *defPtrY = &defPtrX[voxelNumber];
+    DTYPE defX, defY;
+
+    mat33 matrix, R;
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
+    DTYPE gradValues[2];
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (deformationField->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+
+    for (y = 0; y < deformationField->ny; ++y) {
+        Y = (y != deformationField->ny - 1) ? y : y - 1;
+        for (x = 0; x < deformationField->nx; ++x) {
+            X = (x != deformationField->nx - 1) ? x : x - 1;
 
             memset(&matrix, 0, sizeof(mat33));
 
-            for(c=0; c<2; c++){
-               for(b=0; b<2; b++){
-                  for(a=0; a<2; a++){
-                     index = ((Z+c)*deformationField->ny+Y+b)*deformationField->nx+X+a;
-                     defX = defPtrX[index];
-                     defY = defPtrY[index];
-                     defZ = defPtrZ[index];
-
-                     matrix.m[0][0] += first[a]*basis[b]*basis[c]*defX;
-                     matrix.m[1][0] += basis[a]*first[b]*basis[c]*defX;
-                     matrix.m[2][0] += basis[a]*basis[b]*first[c]*defX;
-
-                     matrix.m[0][1] += first[a]*basis[b]*basis[c]*defY;
-                     matrix.m[1][1] += basis[a]*first[b]*basis[c]*defY;
-                     matrix.m[2][1] += basis[a]*basis[b]*first[c]*defY;
-
-                     matrix.m[0][2] += first[a]*basis[b]*basis[c]*defZ;
-                     matrix.m[1][2] += basis[a]*first[b]*basis[c]*defZ;
-                     matrix.m[2][2] += basis[a]*basis[b]*first[c]*defZ;
-                  }
-               }
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    index = (Y + b) * deformationField->nx + X + a;
+                    defX = defPtrX[index];
+                    defY = defPtrY[index];
+
+                    matrix.m[0][0] += first[a] * basis[b] * defX;
+                    matrix.m[1][0] += basis[a] * first[b] * defX;
+                    matrix.m[0][1] += first[a] * basis[b] * defY;
+                    matrix.m[1][1] += basis[a] * first[b] * defY;
+                }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
@@ -1853,608 +1603,641 @@ void reg_defField_linearEnergyGradient3D(nifti_image *deformationField,
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
-            --matrix.m[2][2];
-            for(c=0; c<2; c++){
-               for(b=0; b<2; b++){
-                  for(a=0; a<2; a++){
-                     index = ((Z+c)*deformationField->ny+Y+b) *
-                           deformationField->nx+X+a;
-                     gradValues[0] = -2.0*matrix.m[0][0] *
-                           first[1-a]*basis[1-b]*basis[1-c];
-                     gradValues[1] = -2.0*matrix.m[1][1] *
-                           basis[1-a]*first[1-b]*basis[1-c];
-                     gradValues[2] = -2.0*matrix.m[2][2] *
-                           basis[1-a]*basis[1-b]*first[1-c];
-                     gradientXPtr[index] += approxRatio *
-                           ( inv_reorientation.m[0][0]*gradValues[0]
-                           + inv_reorientation.m[0][1]*gradValues[1]
-                           + inv_reorientation.m[0][2]*gradValues[2]);
-                     gradientYPtr[index] += approxRatio *
-                           ( inv_reorientation.m[1][0]*gradValues[0]
-                           + inv_reorientation.m[1][1]*gradValues[1]
-                           + inv_reorientation.m[1][2]*gradValues[2]);
-                     gradientZPtr[index] += approxRatio *
-                           ( inv_reorientation.m[2][0]*gradValues[0]
-                           + inv_reorientation.m[2][1]*gradValues[1]
-                           + inv_reorientation.m[2][2]*gradValues[2]);
-                  } // a
-               } // b
-            } // c
-         }
-      }
-   }
+
+            for (b = 0; b < 2; b++) {
+                for (a = 0; a < 2; a++) {
+                    index = (Y + b) * deformationField->nx + X + a;
+                    gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b];
+                    gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b];
+                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                          inv_reorientation.m[0][1] * gradValues[1]);
+                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                          inv_reorientation.m[1][1] * gradValues[1]);
+                } // a
+            } // b
+        }
+    }
 }
 /* *************************************************************** */
-void reg_defField_linearEnergyGradient(nifti_image *deformationField,
-                                       nifti_image *gradientImage,
-                                       float weight)
-{
-   if(deformationField->nz>1){
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_defField_linearEnergyGradient3D<float>
-               (deformationField, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_defField_linearEnergyGradient3D<double>
-               (deformationField, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_defField_linearEnergyGradient3D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_defField_linearEnergyGradient2D<float>
-               (deformationField, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_defField_linearEnergyGradient2D<double>
-               (deformationField, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_defField_linearEnergyGradient2D");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
+template <class DTYPE>
+void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
+                                         nifti_image *gradientImage,
+                                         float weight) {
+    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz);
+    int a, b, c, x, y, z, X, Y, Z, index;
+    DTYPE basis[2] = {1, 0};
+    DTYPE first[2] = {-1, 1};
+
+    // Create pointers to the deformation field
+    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
+    const DTYPE *defPtrY = &defPtrX[voxelNumber];
+    const DTYPE *defPtrZ = &defPtrY[voxelNumber];
+    DTYPE defX, defY, defZ;
+
+    mat33 matrix, R;
+
+    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
+    DTYPE *gradientZPtr = &gradientYPtr[voxelNumber];
+
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
+    DTYPE gradValues[3];
+
+    // Matrix to use to convert the gradient from mm to voxel
+    mat33 reorientation;
+    if (deformationField->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
+    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+
+    for (z = 0; z < deformationField->nz; ++z) {
+        Z = (z != deformationField->nz - 1) ? z : z - 1;
+        for (y = 0; y < deformationField->ny; ++y) {
+            Y = (y != deformationField->ny - 1) ? y : y - 1;
+            for (x = 0; x < deformationField->nx; ++x) {
+                X = (x != deformationField->nx - 1) ? x : x - 1;
+
+                memset(&matrix, 0, sizeof(mat33));
+
+                for (c = 0; c < 2; c++) {
+                    for (b = 0; b < 2; b++) {
+                        for (a = 0; a < 2; a++) {
+                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
+                            defX = defPtrX[index];
+                            defY = defPtrY[index];
+                            defZ = defPtrZ[index];
+
+                            matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX;
+                            matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX;
+                            matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX;
+
+                            matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY;
+                            matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY;
+                            matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY;
+
+                            matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ;
+                            matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ;
+                            matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ;
+                        }
+                    }
+                }
+                // Convert from mm to voxel
+                matrix = nifti_mat33_mul(reorientation, matrix);
+                // Removing the rotation component
+                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(R, matrix);
+                // Convert to displacement
+                --matrix.m[0][0];
+                --matrix.m[1][1];
+                --matrix.m[2][2];
+                for (c = 0; c < 2; c++) {
+                    for (b = 0; b < 2; b++) {
+                        for (a = 0; a < 2; a++) {
+                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
+                            gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c];
+                            gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c];
+                            gradValues[2] = -2.0 * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c];
+                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
+                                                                  inv_reorientation.m[0][1] * gradValues[1] +
+                                                                  inv_reorientation.m[0][2] * gradValues[2]);
+                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
+                                                                  inv_reorientation.m[1][1] * gradValues[1] +
+                                                                  inv_reorientation.m[1][2] * gradValues[2]);
+                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
+                                                                  inv_reorientation.m[2][1] * gradValues[1] +
+                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                        } // a
+                    } // b
+                } // c
+            }
+        }
+    }
 }
 /* *************************************************************** */
+void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
+                                       nifti_image *gradientImage,
+                                       float weight) {
+    if (deformationField->nz > 1) {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_defField_linearEnergyGradient3D<float>(deformationField, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_defField_linearEnergyGradient3D<double>(deformationField, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_defField_linearEnergyGradient3D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_defField_linearEnergyGradient2D<float>(deformationField, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_defField_linearEnergyGradient2D<double>(deformationField, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_defField_linearEnergyGradient2D");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-double reg_spline_getLandmarkDistance_core(nifti_image *controlPointImage,
+double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                                            size_t landmarkNumber,
                                            float *landmarkReference,
-                                           float *landmarkFloating)
-{
-   int imageDim=controlPointImage->nz>1?3:2;
-   size_t controlPointNumber = (size_t)controlPointImage->nx *
-         controlPointImage->ny * controlPointImage->nz;
-   double constraintValue=0.;
-   size_t l, index;
-   float ref_position[4];
-   float def_position[4];
-   float flo_position[4];
-   int previous[3], a, b, c;
-   DTYPE basisX[4], basisY[4], basisZ[4], basis;
-   mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
-   if(controlPointImage->sform_code>0)
-      gridRealToVox = &(controlPointImage->sto_ijk);
-   DTYPE *gridPtrX = static_cast<DTYPE *>(controlPointImage->data);
-   DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
-   DTYPE *gridPtrZ=nullptr;
-   if(imageDim>2)
-      gridPtrZ = &gridPtrY[controlPointNumber];
-
-   // Loop over all landmarks
-   for(l=0;l<landmarkNumber;++l){
-      // fetch the initial positions
-      ref_position[0]=landmarkReference[l*imageDim];
-      flo_position[0]=landmarkFloating[l*imageDim];
-      ref_position[1]=landmarkReference[l*imageDim+1];
-      flo_position[1]=landmarkFloating[l*imageDim+1];
-      if(imageDim>2){
-         ref_position[2]=landmarkReference[l*imageDim+2];
-         flo_position[2]=landmarkFloating[l*imageDim+2];
-      }
-      else ref_position[2]=flo_position[2]=0.f;
-      ref_position[3]=flo_position[3]=1.f;
-      // Convert the reference position to voxel in the control point grid space
-      reg_mat44_mul(gridRealToVox, ref_position, def_position);
-
-
-
-      // Extract the corresponding nodes
-      previous[0]=static_cast<int>(reg_floor(def_position[0]))-1;
-      previous[1]=static_cast<int>(reg_floor(def_position[1]))-1;
-      previous[2]=static_cast<int>(reg_floor(def_position[2]))-1;
-      // Check that the specified landmark belongs to the input image
-      if(previous[0]>-1 && previous[0]+3<controlPointImage->nx &&
-         previous[1]>-1 && previous[1]+3<controlPointImage->ny &&
-         ((previous[2]>-1 && previous[2]+3<controlPointImage->nz) || imageDim==2)){
-         // Extract the corresponding basis values
-         get_BSplineBasisValues<DTYPE>(def_position[0] - 1.f -(DTYPE)previous[0], basisX);
-         get_BSplineBasisValues<DTYPE>(def_position[1] - 1.f -(DTYPE)previous[1], basisY);
-         get_BSplineBasisValues<DTYPE>(def_position[2] - 1.f -(DTYPE)previous[2], basisZ);
-         def_position[0]=0.f;
-         def_position[1]=0.f;
-         def_position[2]=0.f;
-         if(imageDim>2){
-            for(c=0;c<4;++c){
-               for(b=0;b<4;++b){
-                  for(a=0;a<4;++a){
-                     index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) *
-                           controlPointImage->nx+previous[0]+a;
-                     basis = basisX[a] * basisY[b] * basisZ[c];
-                     def_position[0] += gridPtrX[index] * basis;
-                     def_position[1] += gridPtrY[index] * basis;
-                     def_position[2] += gridPtrZ[index] * basis;
-                  }
-               }
+                                           float *landmarkFloating) {
+    int imageDim = controlPointImage->nz > 1 ? 3 : 2;
+    size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz);
+    double constraintValue = 0;
+    size_t l, index;
+    float ref_position[4];
+    float def_position[4];
+    float flo_position[4];
+    int previous[3], a, b, c;
+    DTYPE basisX[4], basisY[4], basisZ[4], basis;
+    const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
+    if (controlPointImage->sform_code > 0)
+        gridRealToVox = &(controlPointImage->sto_ijk);
+    const DTYPE *gridPtrX = static_cast<DTYPE*>(controlPointImage->data);
+    const DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
+    const DTYPE *gridPtrZ = nullptr;
+    if (imageDim > 2)
+        gridPtrZ = &gridPtrY[controlPointNumber];
+
+    // Loop over all landmarks
+    for (l = 0; l < landmarkNumber; ++l) {
+        // fetch the initial positions
+        ref_position[0] = landmarkReference[l * imageDim];
+        flo_position[0] = landmarkFloating[l * imageDim];
+        ref_position[1] = landmarkReference[l * imageDim + 1];
+        flo_position[1] = landmarkFloating[l * imageDim + 1];
+        if (imageDim > 2) {
+            ref_position[2] = landmarkReference[l * imageDim + 2];
+            flo_position[2] = landmarkFloating[l * imageDim + 2];
+        } else ref_position[2] = flo_position[2] = 0;
+        ref_position[3] = flo_position[3] = 1;
+        // Convert the reference position to voxel in the control point grid space
+        reg_mat44_mul(gridRealToVox, ref_position, def_position);
+
+        // Extract the corresponding nodes
+        previous[0] = static_cast<int>(reg_floor(def_position[0])) - 1;
+        previous[1] = static_cast<int>(reg_floor(def_position[1])) - 1;
+        previous[2] = static_cast<int>(reg_floor(def_position[2])) - 1;
+        // Check that the specified landmark belongs to the input image
+        if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
+            previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
+            ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
+            // Extract the corresponding basis values
+            get_BSplineBasisValues<DTYPE>(def_position[0] - 1 - (DTYPE)previous[0], basisX);
+            get_BSplineBasisValues<DTYPE>(def_position[1] - 1 - (DTYPE)previous[1], basisY);
+            get_BSplineBasisValues<DTYPE>(def_position[2] - 1 - (DTYPE)previous[2], basisZ);
+            def_position[0] = 0;
+            def_position[1] = 0;
+            def_position[2] = 0;
+            if (imageDim > 2) {
+                for (c = 0; c < 4; ++c) {
+                    for (b = 0; b < 4; ++b) {
+                        for (a = 0; a < 4; ++a) {
+                            index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
+                                controlPointImage->nx + previous[0] + a;
+                            basis = basisX[a] * basisY[b] * basisZ[c];
+                            def_position[0] += gridPtrX[index] * basis;
+                            def_position[1] += gridPtrY[index] * basis;
+                            def_position[2] += gridPtrZ[index] * basis;
+                        }
+                    }
+                }
+            } else {
+                for (b = 0; b < 4; ++b) {
+                    for (a = 0; a < 4; ++a) {
+                        index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
+                        basis = basisX[a] * basisY[b];
+                        def_position[0] += gridPtrX[index] * basis;
+                        def_position[1] += gridPtrY[index] * basis;
+                    }
+                }
             }
-         }
-         else{
-            for(b=0;b<4;++b){
-               for(a=0;a<4;++a){
-                  index = (previous[1]+b)*controlPointImage->nx+previous[0]+a;
-                  basis = basisX[a] * basisY[b];
-                  def_position[0] += gridPtrX[index] * basis;
-                  def_position[1] += gridPtrY[index] * basis;
-               }
-            }
-         }
-         constraintValue += reg_pow2(flo_position[0]-def_position[0]);
-         constraintValue += reg_pow2(flo_position[1]-def_position[1]);
-         if(imageDim>2)
-            constraintValue += reg_pow2(flo_position[2]-def_position[2]);
-      }
-      else{
-         char warning_text[255];
-         if(imageDim>2)
-            sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
-                    ref_position[0], ref_position[1], ref_position[2]);
-         else
-            sprintf(warning_text, "The current landmark at position %g %g is ignored",
-                    ref_position[0], ref_position[1]);
-         reg_print_msg_warn(warning_text);
-         reg_print_msg_warn("as it is not in the space of the reference image");
-      }
-   }
-   return constraintValue;
+            constraintValue += reg_pow2(flo_position[0] - def_position[0]);
+            constraintValue += reg_pow2(flo_position[1] - def_position[1]);
+            if (imageDim > 2)
+                constraintValue += reg_pow2(flo_position[2] - def_position[2]);
+        } else {
+            char warning_text[255];
+            if (imageDim > 2)
+                sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
+                        ref_position[0], ref_position[1], ref_position[2]);
+            else
+                sprintf(warning_text, "The current landmark at position %g %g is ignored",
+                        ref_position[0], ref_position[1]);
+            reg_print_msg_warn(warning_text);
+            reg_print_msg_warn("as it is not in the space of the reference image");
+        }
+    }
+    return constraintValue;
 }
 /* *************************************************************** */
-double reg_spline_getLandmarkDistance(nifti_image *controlPointImage,
+double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
                                       size_t landmarkNumber,
                                       float *landmarkReference,
-                                      float *landmarkFloating)
-{
-   if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){
-      reg_print_fct_error("reg_spline_getLandmarkDistance");
-      reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
-      reg_exit();
-   }
-   switch(controlPointImage->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      return reg_spline_getLandmarkDistance_core<float>
-            (controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      return reg_spline_getLandmarkDistance_core<double>
-            (controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
-      break;
-   default:
-      reg_print_fct_error("reg_spline_getLandmarkDistance_core");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
-   }
+                                      float *landmarkFloating) {
+    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) {
+        reg_print_fct_error("reg_spline_getLandmarkDistance");
+        reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
+        reg_exit();
+    }
+    switch (controlPointImage->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return reg_spline_getLandmarkDistance_core<float>(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return reg_spline_getLandmarkDistance_core<double>(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
+        break;
+    default:
+        reg_print_fct_error("reg_spline_getLandmarkDistance_core");
+        reg_print_msg_error("Only implemented for single or double precision images");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-void reg_spline_getLandmarkDistanceGradient_core(nifti_image *controlPointImage,
+void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPointImage,
                                                  nifti_image *gradientImage,
                                                  size_t landmarkNumber,
                                                  float *landmarkReference,
                                                  float *landmarkFloating,
-                                                 float weight)
-{
-   int imageDim=controlPointImage->nz>1?3:2;
-   size_t controlPointNumber = (size_t)controlPointImage->nx *
-         controlPointImage->ny * controlPointImage->nz;
-   size_t l, index;
-   float ref_position[3];
-   float def_position[3];
-   float flo_position[3];
-   int previous[3], a, b, c;
-   DTYPE basisX[4], basisY[4], basisZ[4], basis;
-   mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
-   if(controlPointImage->sform_code>0)
-      gridRealToVox = &(controlPointImage->sto_ijk);
-   DTYPE *gridPtrX = static_cast<DTYPE *>(controlPointImage->data);
-   DTYPE *gradPtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
-   DTYPE *gradPtrY = &gradPtrX[controlPointNumber];
-   DTYPE *gridPtrZ=nullptr;
-   DTYPE *gradPtrZ=nullptr;
-   if(imageDim>2){
-      gridPtrZ = &gridPtrY[controlPointNumber];
-      gradPtrZ = &gradPtrY[controlPointNumber];
-   }
-
-   // Loop over all landmarks
-   for(l=0;l<landmarkNumber;++l){
-      // fetch the initial positions
-      ref_position[0]=landmarkReference[l*imageDim];
-      flo_position[0]=landmarkFloating[l*imageDim];
-      ref_position[1]=landmarkReference[l*imageDim+1];
-      flo_position[1]=landmarkFloating[l*imageDim+1];
-      if(imageDim>2){
-         ref_position[2]=landmarkReference[l*imageDim+2];
-         flo_position[2]=landmarkFloating[l*imageDim+2];
-      }
-      else ref_position[2]=flo_position[2]=0.f;
-      // Convert the reference position to voxel in the control point grid space
-      reg_mat44_mul(gridRealToVox, ref_position, def_position);
-      if(imageDim==2) def_position[2]=0.f;
-      // Extract the corresponding nodes
-      previous[0]=static_cast<int>(reg_floor(def_position[0]))-1;
-      previous[1]=static_cast<int>(reg_floor(def_position[1]))-1;
-      previous[2]=static_cast<int>(reg_floor(def_position[2]))-1;
-      // Check that the specified landmark belongs to the input image
-      if(previous[0]>-1 && previous[0]+3<controlPointImage->nx &&
-         previous[1]>-1 && previous[1]+3<controlPointImage->ny &&
-         ((previous[2]>-1 && previous[2]+3<controlPointImage->nz) || imageDim==2)){
-         // Extract the corresponding basis values
-         get_BSplineBasisValues<DTYPE>(def_position[0] - 1.f -(DTYPE)previous[0], basisX);
-         get_BSplineBasisValues<DTYPE>(def_position[1] - 1.f -(DTYPE)previous[1], basisY);
-         get_BSplineBasisValues<DTYPE>(def_position[2] - 1.f -(DTYPE)previous[2], basisZ);
-         def_position[0]=0.f;
-         def_position[1]=0.f;
-         def_position[2]=0.f;
-         if(imageDim>2){
-            for(c=0;c<4;++c){
-               for(b=0;b<4;++b){
-                  for(a=0;a<4;++a){
-                     index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) *
-                           controlPointImage->nx+previous[0]+a;
-                     basis = basisX[a] * basisY[b] * basisZ[c];
-                     def_position[0] += gridPtrX[index] * basis;
-                     def_position[1] += gridPtrY[index] * basis;
-                     def_position[2] += gridPtrZ[index] * basis;
-                  }
-               }
+                                                 float weight) {
+    int imageDim = controlPointImage->nz > 1 ? 3 : 2;
+    size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz);
+    size_t l, index;
+    float ref_position[3];
+    float def_position[3];
+    float flo_position[3];
+    int previous[3], a, b, c;
+    DTYPE basisX[4], basisY[4], basisZ[4], basis;
+    const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
+    if (controlPointImage->sform_code > 0)
+        gridRealToVox = &(controlPointImage->sto_ijk);
+    const DTYPE *gridPtrX = static_cast<DTYPE*>(controlPointImage->data);
+    DTYPE *gradPtrX = static_cast<DTYPE*>(gradientImage->data);
+    const DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
+    DTYPE *gradPtrY = &gradPtrX[controlPointNumber];
+    const DTYPE *gridPtrZ = nullptr;
+    DTYPE *gradPtrZ = nullptr;
+    if (imageDim > 2) {
+        gridPtrZ = &gridPtrY[controlPointNumber];
+        gradPtrZ = &gradPtrY[controlPointNumber];
+    }
+
+    // Loop over all landmarks
+    for (l = 0; l < landmarkNumber; ++l) {
+        // fetch the initial positions
+        ref_position[0] = landmarkReference[l * imageDim];
+        flo_position[0] = landmarkFloating[l * imageDim];
+        ref_position[1] = landmarkReference[l * imageDim + 1];
+        flo_position[1] = landmarkFloating[l * imageDim + 1];
+        if (imageDim > 2) {
+            ref_position[2] = landmarkReference[l * imageDim + 2];
+            flo_position[2] = landmarkFloating[l * imageDim + 2];
+        } else ref_position[2] = flo_position[2] = 0;
+        // Convert the reference position to voxel in the control point grid space
+        reg_mat44_mul(gridRealToVox, ref_position, def_position);
+        if (imageDim == 2) def_position[2] = 0;
+        // Extract the corresponding nodes
+        previous[0] = static_cast<int>(reg_floor(def_position[0])) - 1;
+        previous[1] = static_cast<int>(reg_floor(def_position[1])) - 1;
+        previous[2] = static_cast<int>(reg_floor(def_position[2])) - 1;
+        // Check that the specified landmark belongs to the input image
+        if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
+            previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
+            ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
+            // Extract the corresponding basis values
+            get_BSplineBasisValues<DTYPE>(def_position[0] - 1 - (DTYPE)previous[0], basisX);
+            get_BSplineBasisValues<DTYPE>(def_position[1] - 1 - (DTYPE)previous[1], basisY);
+            get_BSplineBasisValues<DTYPE>(def_position[2] - 1 - (DTYPE)previous[2], basisZ);
+            def_position[0] = 0;
+            def_position[1] = 0;
+            def_position[2] = 0;
+            if (imageDim > 2) {
+                for (c = 0; c < 4; ++c) {
+                    for (b = 0; b < 4; ++b) {
+                        for (a = 0; a < 4; ++a) {
+                            index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
+                                controlPointImage->nx + previous[0] + a;
+                            basis = basisX[a] * basisY[b] * basisZ[c];
+                            def_position[0] += gridPtrX[index] * basis;
+                            def_position[1] += gridPtrY[index] * basis;
+                            def_position[2] += gridPtrZ[index] * basis;
+                        }
+                    }
+                }
+            } else {
+                for (b = 0; b < 4; ++b) {
+                    for (a = 0; a < 4; ++a) {
+                        index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
+                        basis = basisX[a] * basisY[b];
+                        def_position[0] += gridPtrX[index] * basis;
+                        def_position[1] += gridPtrY[index] * basis;
+                    }
+                }
             }
-         }
-         else{
-            for(b=0;b<4;++b){
-               for(a=0;a<4;++a){
-                  index = (previous[1]+b)*controlPointImage->nx+previous[0]+a;
-                  basis = basisX[a] * basisY[b];
-                  def_position[0] += gridPtrX[index] * basis;
-                  def_position[1] += gridPtrY[index] * basis;
-               }
+            def_position[0] = flo_position[0] - def_position[0];
+            def_position[1] = flo_position[1] - def_position[1];
+            if (imageDim > 2)
+                def_position[2] = flo_position[2] - def_position[2];
+            if (imageDim > 2) {
+                for (c = 0; c < 4; ++c) {
+                    for (b = 0; b < 4; ++b) {
+                        for (a = 0; a < 4; ++a) {
+                            index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
+                                controlPointImage->nx + previous[0] + a;
+                            basis = basisX[a] * basisY[b] * basisZ[c] * weight;
+                            gradPtrX[index] -= def_position[0] * basis;
+                            gradPtrY[index] -= def_position[1] * basis;
+                            gradPtrZ[index] -= def_position[2] * basis;
+                        }
+                    }
+                }
+            } else {
+                for (b = 0; b < 4; ++b) {
+                    for (a = 0; a < 4; ++a) {
+                        index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
+                        basis = basisX[a] * basisY[b] * weight;
+                        gradPtrX[index] -= def_position[0] * basis;
+                        gradPtrY[index] -= def_position[1] * basis;
+                    }
+                }
             }
-         }
-         def_position[0]=flo_position[0]-def_position[0];
-         def_position[1]=flo_position[1]-def_position[1];
-         if(imageDim>2)
-            def_position[2]=flo_position[2]-def_position[2];
-         if(imageDim>2){
-            for(c=0;c<4;++c){
-               for(b=0;b<4;++b){
-                  for(a=0;a<4;++a){
-                     index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) *
-                           controlPointImage->nx+previous[0]+a;
-                     basis = basisX[a] * basisY[b] * basisZ[c] * weight;
-                     gradPtrX[index] -= def_position[0] * basis;
-                     gradPtrY[index] -= def_position[1] * basis;
-                     gradPtrZ[index] -= def_position[2] * basis;
-                  }
-               }
-            }
-         }
-         else{
-            for(b=0;b<4;++b){
-               for(a=0;a<4;++a){
-                  index = (previous[1]+b)*controlPointImage->nx+previous[0]+a;
-                  basis = basisX[a] * basisY[b] * weight;
-                  gradPtrX[index] -= def_position[0] * basis;
-                  gradPtrY[index] -= def_position[1] * basis;
-               }
-            }
-         }
-      }
-      else{
-         char warning_text[255];
-         if(imageDim>2)
-            sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
-                    ref_position[0], ref_position[1], ref_position[2]);
-         else
-            sprintf(warning_text, "The current landmark at position %g %g is ignored",
-                    ref_position[0], ref_position[1]);
-         reg_print_msg_warn(warning_text);
-         reg_print_msg_warn("as it is not in the space of the reference image");
-      }
-   }
+        } else {
+            char warning_text[255];
+            if (imageDim > 2)
+                sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
+                        ref_position[0], ref_position[1], ref_position[2]);
+            else
+                sprintf(warning_text, "The current landmark at position %g %g is ignored",
+                        ref_position[0], ref_position[1]);
+            reg_print_msg_warn(warning_text);
+            reg_print_msg_warn("as it is not in the space of the reference image");
+        }
+    }
 }
 /* *************************************************************** */
-void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage,
+void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage,
                                             nifti_image *gradientImage,
                                             size_t landmarkNumber,
                                             float *landmarkReference,
                                             float *landmarkFloating,
-                                            float weight)
-{
-   if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){
-      reg_print_fct_error("reg_spline_getLandmarkDistanceGradient");
-      reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
-      reg_exit();
-   }
-   switch(controlPointImage->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_spline_getLandmarkDistanceGradient_core<float>
+                                            float weight) {
+    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) {
+        reg_print_fct_error("reg_spline_getLandmarkDistanceGradient");
+        reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
+        reg_exit();
+    }
+    switch (controlPointImage->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_spline_getLandmarkDistanceGradient_core<float>
             (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_spline_getLandmarkDistanceGradient_core<double>
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_spline_getLandmarkDistanceGradient_core<double>
             (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight);
-      break;
-   default:
-      reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
-   }
+        break;
+    default:
+        reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core");
+        reg_print_msg_error("Only implemented for single or double precision images");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint)
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*
-         splineControlPoint->ny*splineControlPoint->nz;
-   int x, y, z, index;
+double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int x, y, z, index;
 
-   // Create pointers to the spline coefficients
-   reg_getDisplacementFromDeformation(splineControlPoint);
-   DTYPE * splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE * splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE * splinePtrZ = &splinePtrY[nodeNumber];
+    // Create pointers to the spline coefficients
+    reg_getDisplacementFromDeformation(splineControlPoint);
+    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
 
-   DTYPE centralCP[3], neigbCP[3];
+    DTYPE centralCP[3], neigbCP[3];
 
-   double constraintValue=0;
+    double constraintValue = 0;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   private(index, x, y, z, centralCP, neigbCP) \
-   shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \
-   reduction(+:constraintValue)
+    private(index, x, y, z, centralCP, neigbCP) \
+    shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \
+    reduction(+:constraintValue)
 #endif // _OPENMP
-   for(z=0; z<splineControlPoint->nz;++z){
-      index=z*splineControlPoint->nx*splineControlPoint->ny;
-      for(y=0; y<splineControlPoint->ny;++y){
-         for(x=0; x<splineControlPoint->nx;++x){
-            centralCP[0]=splinePtrX[index];
-            centralCP[1]=splinePtrY[index];
-            centralCP[2]=splinePtrZ[index];
-
-            if(x>0){
-               neigbCP[0]=splinePtrX[index-1];
-               neigbCP[1]=splinePtrY[index-1];
-               neigbCP[2]=splinePtrZ[index-1];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dx;
-            }
-            if(x<splineControlPoint->nx-1){
-               neigbCP[0]=splinePtrX[index+1];
-               neigbCP[1]=splinePtrY[index+1];
-               neigbCP[2]=splinePtrZ[index+1];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dx;
-            }
-
-            if(y>0){
-               neigbCP[0]=splinePtrX[index-splineControlPoint->nx];
-               neigbCP[1]=splinePtrY[index-splineControlPoint->nx];
-               neigbCP[2]=splinePtrZ[index-splineControlPoint->nx];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dy;
-            }
-            if(y<splineControlPoint->ny-1){
-               neigbCP[0]=splinePtrX[index+splineControlPoint->nx];
-               neigbCP[1]=splinePtrY[index+splineControlPoint->nx];
-               neigbCP[2]=splinePtrZ[index+splineControlPoint->nx];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dy;
-            }
-
-            if(z>0){
-               neigbCP[0]=splinePtrX[index-splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[1]=splinePtrY[index-splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[2]=splinePtrZ[index-splineControlPoint->nx*splineControlPoint->ny];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dz;
-            }
-            if(z<splineControlPoint->nz-1){
-               neigbCP[0]=splinePtrX[index+splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[1]=splinePtrY[index+splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[2]=splinePtrZ[index+splineControlPoint->nx*splineControlPoint->ny];
-               constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+
-                     reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dz;
-            }
-            index++;
-         } // x
-      } // y
-   } // z
-   reg_getDeformationFromDisplacement(splineControlPoint);
-   return constraintValue/static_cast<double>(nodeNumber);
+    for (z = 0; z < splineControlPoint->nz; ++z) {
+        index = z * splineControlPoint->nx * splineControlPoint->ny;
+        for (y = 0; y < splineControlPoint->ny; ++y) {
+            for (x = 0; x < splineControlPoint->nx; ++x) {
+                centralCP[0] = splinePtrX[index];
+                centralCP[1] = splinePtrY[index];
+                centralCP[2] = splinePtrZ[index];
+
+                if (x > 0) {
+                    neigbCP[0] = splinePtrX[index - 1];
+                    neigbCP[1] = splinePtrY[index - 1];
+                    neigbCP[2] = splinePtrZ[index - 1];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
+                }
+                if (x < splineControlPoint->nx - 1) {
+                    neigbCP[0] = splinePtrX[index + 1];
+                    neigbCP[1] = splinePtrY[index + 1];
+                    neigbCP[2] = splinePtrZ[index + 1];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
+                }
+
+                if (y > 0) {
+                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx];
+                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx];
+                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
+                }
+                if (y < splineControlPoint->ny - 1) {
+                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx];
+                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx];
+                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
+                }
+
+                if (z > 0) {
+                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
+                }
+                if (z < splineControlPoint->nz - 1) {
+                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny];
+                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
+                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
+                }
+                index++;
+            } // x
+        } // y
+    } // z
+    reg_getDeformationFromDisplacement(splineControlPoint);
+    return constraintValue / nodeNumber;
 }
 /* *************************************************************** */
-double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint)
-{
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_spline_approxLinearPairwise3D<float>(splineControlPoint);
-      case NIFTI_TYPE_FLOAT64:
-         return reg_spline_approxLinearPairwise3D<double>(splineControlPoint);
-      default:
-         reg_print_fct_error("reg_spline_approxLinearPairwise");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      reg_print_fct_error("reg_spline_approxLinearPairwise");
-      reg_print_msg_error("Not implemented in 2D yet");
-      reg_exit();
-   }
+double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) {
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_spline_approxLinearPairwise3D<float>(splineControlPoint);
+        case NIFTI_TYPE_FLOAT64:
+            return reg_spline_approxLinearPairwise3D<double>(splineControlPoint);
+        default:
+            reg_print_fct_error("reg_spline_approxLinearPairwise");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        reg_print_fct_error("reg_spline_approxLinearPairwise");
+        reg_print_msg_error("Not implemented in 2D yet");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
 void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
                                                nifti_image *gradientImage,
-                                               float weight
-                                               )
-{
-   size_t nodeNumber = (size_t)splineControlPoint->nx*
-         splineControlPoint->ny*splineControlPoint->nz;
-   int x, y, z, index;
+                                               float weight) {
+    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    int x, y, z, index;
 
-   // Create pointers to the spline coefficients
-   reg_getDisplacementFromDeformation(splineControlPoint);
-   DTYPE *splinePtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-   DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    // Create pointers to the spline coefficients
+    reg_getDisplacementFromDeformation(splineControlPoint);
+    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
+    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
 
-   // Pointers to the gradient image
-   DTYPE *gradPtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradPtrY = &gradPtrX[nodeNumber];
-   DTYPE *gradPtrZ = &gradPtrY[nodeNumber];
+    // Pointers to the gradient image
+    DTYPE *gradPtrX = static_cast<DTYPE*>(gradientImage->data);
+    DTYPE *gradPtrY = &gradPtrX[nodeNumber];
+    DTYPE *gradPtrZ = &gradPtrY[nodeNumber];
 
-   DTYPE centralCP[3], neigbCP[3];
+    DTYPE centralCP[3], neigbCP[3];
 
-   double grad_values[3];
+    double grad_values[3];
 
-   DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber);
+    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
 #if defined (_OPENMP)
 #pragma omp parallel for default(none) \
-   private(index, x, y, z, centralCP, neigbCP, grad_values) \
-   shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \
-   gradPtrX, gradPtrY, gradPtrZ)
+    private(index, x, y, z, centralCP, neigbCP, grad_values) \
+    shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \
+    gradPtrX, gradPtrY, gradPtrZ)
 #endif // _OPENMP
-   for(z=0; z<splineControlPoint->nz;++z){
-      index=z*splineControlPoint->nx*splineControlPoint->ny;
-      for(y=0; y<splineControlPoint->ny;++y){
-         for(x=0; x<splineControlPoint->nx;++x){
-            centralCP[0]=splinePtrX[index];
-            centralCP[1]=splinePtrY[index];
-            centralCP[2]=splinePtrZ[index];
-            grad_values[0]=0;
-            grad_values[1]=0;
-            grad_values[2]=0;
-
-            if(x>0){
-               neigbCP[0]=splinePtrX[index-1];
-               neigbCP[1]=splinePtrY[index-1];
-               neigbCP[2]=splinePtrZ[index-1];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dx;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dx;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dx;
-            }
-            if(x<splineControlPoint->nx-1){
-               neigbCP[0]=splinePtrX[index+1];
-               neigbCP[1]=splinePtrY[index+1];
-               neigbCP[2]=splinePtrZ[index+1];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dx;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dx;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dx;
-            }
-
-            if(y>0){
-               neigbCP[0]=splinePtrX[index-splineControlPoint->nx];
-               neigbCP[1]=splinePtrY[index-splineControlPoint->nx];
-               neigbCP[2]=splinePtrZ[index-splineControlPoint->nx];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dy;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dy;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dy;
-            }
-            if(y<splineControlPoint->ny-1){
-               neigbCP[0]=splinePtrX[index+splineControlPoint->nx];
-               neigbCP[1]=splinePtrY[index+splineControlPoint->nx];
-               neigbCP[2]=splinePtrZ[index+splineControlPoint->nx];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dy;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dy;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dy;
-            }
-
-            if(z>0){
-               neigbCP[0]=splinePtrX[index-splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[1]=splinePtrY[index-splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[2]=splinePtrZ[index-splineControlPoint->nx*splineControlPoint->ny];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dz;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dz;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dz;
-            }
-            if(z<splineControlPoint->nz-1){
-               neigbCP[0]=splinePtrX[index+splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[1]=splinePtrY[index+splineControlPoint->nx*splineControlPoint->ny];
-               neigbCP[2]=splinePtrZ[index+splineControlPoint->nx*splineControlPoint->ny];
-               grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dz;
-               grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dz;
-               grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dz;
-            }
-            gradPtrX[index] += approxRatio * static_cast<DTYPE>(grad_values[0]);
-            gradPtrY[index] += approxRatio * static_cast<DTYPE>(grad_values[1]);
-            gradPtrZ[index] += approxRatio * static_cast<DTYPE>(grad_values[2]);
-
-            index++;
-         } // x
-      } // y
-   } // z
-   reg_getDeformationFromDisplacement(splineControlPoint);
+    for (z = 0; z < splineControlPoint->nz; ++z) {
+        index = z * splineControlPoint->nx * splineControlPoint->ny;
+        for (y = 0; y < splineControlPoint->ny; ++y) {
+            for (x = 0; x < splineControlPoint->nx; ++x) {
+                centralCP[0] = splinePtrX[index];
+                centralCP[1] = splinePtrY[index];
+                centralCP[2] = splinePtrZ[index];
+                grad_values[0] = 0;
+                grad_values[1] = 0;
+                grad_values[2] = 0;
+
+                if (x > 0) {
+                    neigbCP[0] = splinePtrX[index - 1];
+                    neigbCP[1] = splinePtrY[index - 1];
+                    neigbCP[2] = splinePtrZ[index - 1];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx;
+                }
+                if (x < splineControlPoint->nx - 1) {
+                    neigbCP[0] = splinePtrX[index + 1];
+                    neigbCP[1] = splinePtrY[index + 1];
+                    neigbCP[2] = splinePtrZ[index + 1];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx;
+                }
+
+                if (y > 0) {
+                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx];
+                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx];
+                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy;
+                }
+                if (y < splineControlPoint->ny - 1) {
+                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx];
+                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx];
+                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy;
+                }
+
+                if (z > 0) {
+                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz;
+                }
+                if (z < splineControlPoint->nz - 1) {
+                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny];
+                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny];
+                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz;
+                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz;
+                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz;
+                }
+                gradPtrX[index] += approxRatio * static_cast<DTYPE>(grad_values[0]);
+                gradPtrY[index] += approxRatio * static_cast<DTYPE>(grad_values[1]);
+                gradPtrZ[index] += approxRatio * static_cast<DTYPE>(grad_values[2]);
+
+                index++;
+            } // x
+        } // y
+    } // z
+    reg_getDeformationFromDisplacement(splineControlPoint);
 }
 /* *************************************************************** */
 void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
-                                             float weight
-                                             )
-{
-   if(splineControlPoint->datatype != gradientImage->datatype)
-   {
-      reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
-      reg_print_msg_error("Input images are expected to have the same datatype");
-      reg_exit();
-   }
-   if(splineControlPoint->nz>1){
-      switch(splineControlPoint->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_approxLinearPairwiseGradient3D<float>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_approxLinearPairwiseGradient3D<double>
-               (splineControlPoint, gradientImage, weight);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_linearEnergyGradient");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
-      }
-   }
-   else{
-      reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
-      reg_print_msg_error("Not implemented for 2D images yet");
-      reg_exit();
-   }
+                                             float weight) {
+    if (splineControlPoint->datatype != gradientImage->datatype) {
+        reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
+        reg_print_msg_error("Input images are expected to have the same datatype");
+        reg_exit();
+    }
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_approxLinearPairwiseGradient3D<float>(splineControlPoint, gradientImage, weight);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_approxLinearPairwiseGradient3D<double>(splineControlPoint, gradientImage, weight);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_linearEnergyGradient");
+            reg_print_msg_error("Only implemented for single or double precision images");
+            reg_exit();
+        }
+    } else {
+        reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
+        reg_print_msg_error("Not implemented for 2D images yet");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 107d896d..237a06c1 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -24,7 +24,7 @@
  * @return The normalised bending energy. Normalised by the number of voxel
  */
 extern "C++"
-double reg_spline_approxBendingEnergy(nifti_image *controlPointGridImage);
+double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage);
 /* *************************************************************** */
 /** @brief Compute and return the approximated (at the control point position)
  * bending energy gradient for each control point
@@ -38,8 +38,7 @@ double reg_spline_approxBendingEnergy(nifti_image *controlPointGridImage);
 extern "C++"
 void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage,
                                             nifti_image *gradientImage,
-                                            float weight
-                                            );
+                                            float weight);
 /* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms.
  * @param controlPointGridImage Image that contains the transformation
@@ -47,8 +46,8 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage,
  * @return The normalised linear energy. Normalised by the number of voxel
  */
 extern "C++"
-double reg_spline_linearEnergy(nifti_image *referenceImage,
-                               nifti_image *controlPointGridImage);
+double reg_spline_linearEnergy(const nifti_image *referenceImage,
+                               const nifti_image *controlPointGridImage);
 /* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms approximated
  * at the control point positions only.
@@ -57,7 +56,7 @@ double reg_spline_linearEnergy(nifti_image *referenceImage,
  * @return The normalised linear energy. Normalised by the number of voxel
  */
 extern "C++"
-double reg_spline_approxLinearEnergy(nifti_image *controlPointGridImage);
+double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage);
 /* *************************************************************** */
 /** @brief Compute the gradient of the linear elastic energy terms
  * computed at all voxel position.
@@ -71,11 +70,10 @@ double reg_spline_approxLinearEnergy(nifti_image *controlPointGridImage);
  * @param weight Weight to apply to the term of the penalty
  */
 extern "C++"
-void reg_spline_linearEnergyGradient(nifti_image *referenceImage,
-                                     nifti_image *controlPointGridImage,
+void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
+                                     const nifti_image *controlPointGridImage,
                                      nifti_image *gradientImage,
-                                     float weight
-                                     );
+                                     float weight);
 /* *************************************************************** */
 /** @brief Compute the gradient of the linear elastic energy terms
  * approximated at the control point positions only.
@@ -83,29 +81,28 @@ void reg_spline_linearEnergyGradient(nifti_image *referenceImage,
  * parametrisation
  * @param gradientImage Image of similar size than the control point
  * grid and that contains the gradient of the objective function.
- * The gradient of the linear elasticily terms are added to the
+ * The gradient of the linear elasticity terms are added to the
  * current values
  * @param weight Weight to apply to the term of the penalty
  */
 extern "C++"
-void reg_spline_approxLinearEnergyGradient(nifti_image *controlPointGridImage,
+void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridImage,
                                            nifti_image *gradientImage,
-                                           float weight
-                                           );
+                                           float weight);
 /* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms.
  * @param deformationField Image that contains the transformation.
  * @return The normalised linear energy. Normalised by the number of voxel
  */
 extern "C++"
-double reg_defField_linearEnergy(nifti_image *deformationField);
+double reg_defField_linearEnergy(const nifti_image *deformationField);
 /* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms.
  * @param deformationField Image that contains the transformation.
  * @param weight Weight to apply to the term of the penalty
  */
 extern "C++"
-void reg_defField_linearEnergyGradient(nifti_image *deformationField,
+void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
                                        nifti_image *gradientImage,
                                        float weight);
 /* *************************************************************** */
@@ -118,7 +115,7 @@ void reg_defField_linearEnergyGradient(nifti_image *deformationField,
  * @param landmarkFloating Landmark in the floating image
  */
 extern "C++"
-double reg_spline_getLandmarkDistance(nifti_image *controlPointImage,
+double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
                                       size_t landmarkNumber,
                                       float *landmarkReference,
                                       float *landmarkFloating);
@@ -135,7 +132,7 @@ double reg_spline_getLandmarkDistance(nifti_image *controlPointImage,
  * @param weight weight to apply to the gradient
  */
 extern "C++"
-void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage,
+void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage,
                                             nifti_image *gradientImage,
                                             size_t landmarkNumber,
                                             float *landmarkReference,
@@ -150,8 +147,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage,
 extern "C++"
 void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage,
                                              nifti_image *gradientImage,
-                                             float weight
-                                             );
+                                             float weight);
 /* *************************************************************** */
 extern "C++"
 double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage);
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index 68c6a651..dc19ebbd 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -10,7 +10,7 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     CudaF3dContent(nifti_image *referenceIn,
                    nifti_image *floatingIn,
                    nifti_image *controlPointGridIn,
-                   nifti_image *localWeightSimIn,
+                   nifti_image *localWeightSimIn = nullptr,
                    int *referenceMaskIn = nullptr,
                    mat44 *transformationMatrixIn = nullptr,
                    size_t bytesIn = sizeof(float));
diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp
deleted file mode 100755
index 1a142083..00000000
--- a/reg-lib/cuda/_reg_f3d_gpu.cpp
+++ /dev/null
@@ -1,1059 +0,0 @@
-/*
- *  _reg_f3d_gpu.cpp
- *
- *
- *  Created by Marc Modat on 19/11/2010.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_f3d_gpu.h"
-
- /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
- /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint)
-    : reg_f3d<float>::reg_f3d(refTimePoint, floTimePoint) {
-    this->executableName = (char *)"NiftyReg F3D GPU";
-    this->reference_gpu = nullptr;
-    this->floating_gpu = nullptr;
-    this->currentMask_gpu = nullptr;
-    this->warped_gpu = nullptr;
-    this->controlPointGrid_gpu = nullptr;
-    this->deformationFieldImage_gpu = nullptr;
-    this->warpedGradientImage_gpu = nullptr;
-    this->voxelBasedMeasureGradientImage_gpu = nullptr;
-    this->transformationGradient_gpu = nullptr;
-
-    this->measure_gpu_ssd = nullptr;
-    this->measure_gpu_kld = nullptr;
-    this->measure_gpu_dti = nullptr;
-    this->measure_gpu_lncc = nullptr;
-    this->measure_gpu_nmi = nullptr;
-
-    this->reference2_gpu = nullptr;
-    this->floating2_gpu = nullptr;
-    this->warped2_gpu = nullptr;
-    this->warpedGradientImage2_gpu = nullptr;
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-reg_f3d_gpu::~reg_f3d_gpu() {
-    if (this->reference_gpu != nullptr)
-        cudaCommon_free(&this->reference_gpu);
-    if (this->floating_gpu != nullptr)
-        cudaCommon_free(&this->floating_gpu);
-    if (this->currentMask_gpu != nullptr)
-        cudaCommon_free(&this->currentMask_gpu);
-    if (this->warped_gpu != nullptr)
-        cudaCommon_free(&this->warped_gpu);
-    if (this->controlPointGrid_gpu != nullptr)
-        cudaCommon_free(&this->controlPointGrid_gpu);
-    if (this->deformationFieldImage_gpu != nullptr)
-        cudaCommon_free(&this->deformationFieldImage_gpu);
-    if (this->warpedGradientImage_gpu != nullptr)
-        cudaCommon_free(&this->warpedGradientImage_gpu);
-    if (this->voxelBasedMeasureGradientImage_gpu != nullptr)
-        cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu);
-    if (this->transformationGradient_gpu != nullptr)
-        cudaCommon_free(&this->transformationGradient_gpu);
-
-    if (this->reference2_gpu != nullptr)
-        cudaCommon_free(&this->reference2_gpu);
-    if (this->floating2_gpu != nullptr)
-        cudaCommon_free(&this->floating2_gpu);
-    if (this->warped2_gpu != nullptr)
-        cudaCommon_free(&this->warped2_gpu);
-    if (this->warpedGradientImage2_gpu != nullptr)
-        cudaCommon_free(&this->warpedGradientImage2_gpu);
-
-    if (this->optimiser != nullptr) {
-        delete this->optimiser;
-        this->optimiser = nullptr;
-    }
-
-    if (this->measure_gpu_nmi != nullptr) {
-        delete this->measure_gpu_nmi;
-        this->measure_gpu_nmi = nullptr;
-        this->measure_nmi = nullptr;
-    }
-    if (this->measure_gpu_ssd != nullptr) {
-        delete this->measure_gpu_ssd;
-        this->measure_gpu_ssd = nullptr;
-        this->measure_ssd = nullptr;
-    }
-    if (this->measure_gpu_kld != nullptr) {
-        delete this->measure_gpu_kld;
-        this->measure_gpu_kld = nullptr;
-        this->measure_kld = nullptr;
-    }
-    if (this->measure_gpu_dti != nullptr) {
-        delete this->measure_gpu_dti;
-        this->measure_gpu_dti = nullptr;
-        this->measure_dti = nullptr;
-    }
-    if (this->measure_gpu_lncc != nullptr) {
-        delete this->measure_gpu_lncc;
-        this->measure_gpu_lncc = nullptr;
-        this->measure_lncc = nullptr;
-    }
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::~reg_f3d_gpu");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateWarped() {
-    reg_f3d::AllocateWarped();
-
-    if (this->warped->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, this->warped->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
-            reg_print_msg_error("Error when allocating the warped image");
-            reg_exit();
-        }
-    } else if (this->warped->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
-            reg_print_msg_error("Error when allocating the warped image");
-            reg_exit();
-        }
-    } else {
-        reg_print_fct_error("reg_f3d_gpu::AllocateWarped()");
-        reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::AllocateWarped");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateWarped() {
-    reg_f3d::DeallocateWarped();
-
-    if (this->warped_gpu != nullptr) {
-        cudaCommon_free(&this->warped_gpu);
-        this->warped_gpu = nullptr;
-    }
-    if (this->warped2_gpu != nullptr) {
-        cudaCommon_free(&this->warped2_gpu);
-        this->warped2_gpu = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateWarped");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateDeformationField() {
-    this->DeallocateDeformationField();
-    NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu,
-                                 this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::AllocateDeformationField");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateDeformationField() {
-    if (this->deformationFieldImage_gpu != nullptr) {
-        cudaCommon_free(&this->deformationFieldImage_gpu);
-        this->deformationFieldImage_gpu = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateDeformationField");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateWarpedGradient() {
-    this->DeallocateWarpedGradient();
-    if (this->inputFloating->nt == 1) {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
-                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
-    } else if (this->inputFloating->nt == 2) {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu,
-                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
-        NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu,
-                                     this->activeVoxelNumber[this->currentLevel] * sizeof(float4)));
-    } else {
-        reg_print_fct_error("reg_f3d_gpu::AllocateWarpedGradient()");
-        reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::AllocateWarpedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateWarpedGradient() {
-    if (this->warpedGradientImage_gpu != nullptr) {
-        cudaCommon_free(&this->warpedGradientImage_gpu);
-        this->warpedGradientImage_gpu = nullptr;
-    }
-    if (this->warpedGradientImage2_gpu != nullptr) {
-        cudaCommon_free(&this->warpedGradientImage2_gpu);
-        this->warpedGradientImage2_gpu = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateWarpedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() {
-    this->DeallocateVoxelBasedMeasureGradient();
-    if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->reference->dim)) {
-        reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()");
-        reg_print_msg_error("Error when allocating the voxel based measure gradient image");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient() {
-    if (this->voxelBasedMeasureGradientImage_gpu != nullptr) {
-        cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu);
-        this->voxelBasedMeasureGradientImage_gpu = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::AllocateTransformationGradient() {
-    this->DeallocateTransformationGradient();
-    if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) {
-        reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()");
-        reg_print_msg_error("Error when allocating the node based gradient image");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::AllocateNodeBasedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateTransformationGradient() {
-    if (this->transformationGradient_gpu != nullptr) {
-        cudaCommon_free(&this->transformationGradient_gpu);
-        this->transformationGradient_gpu = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateTransformationGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) {
-    if (this->jacobianLogWeight <= 0) return 0;
-
-    bool approx = type == 2 ? false : this->jacobianLogApproximation;
-
-    double value = reg_spline_getJacobianPenaltyTerm_gpu(this->reference,
-                                                         this->controlPointGrid,
-                                                         this->controlPointGrid_gpu,
-                                                         approx);
-
-    unsigned int maxit = 5;
-    if (type > 0) maxit = 20;
-    unsigned int it = 0;
-    while (value != value && it < maxit) {
-        value = reg_spline_correctFolding_gpu(this->reference,
-                                              this->controlPointGrid,
-                                              this->controlPointGrid_gpu,
-                                              approx);
-#ifndef NDEBUG
-        reg_print_msg_debug("Folding correction");
-#endif
-        it++;
-    }
-    if (type > 0) {
-        if (value != value) {
-            this->optimiser->RestoreBestDOF();
-            reg_print_fct_error("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm()");
-            reg_print_msg_error("The folding correction scheme failed");
-        } else {
-#ifndef NDEBUG
-            if (it > 0) {
-                char text[255];
-                sprintf(text, "Folding correction, %i step(s)", it);
-                reg_print_msg_debug(text);
-            }
-#endif
-        }
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm");
-#endif
-    return this->jacobianLogWeight * value;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() {
-    if (this->bendingEnergyWeight <= 0) return 0;
-
-    double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid,
-                                                      this->controlPointGrid_gpu);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm");
-#endif
-    return this->bendingEnergyWeight * value;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() {
-    if (this->linearEnergyWeight <= 0)
-        return 0;
-
-    reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()");
-    reg_print_msg_error("Option not supported!");
-    reg_exit();
-    return 0;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() {
-    if (this->landmarkRegWeight <= 0)
-        return 0;
-
-    reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()");
-    reg_print_msg_error("Option not supported!");
-    reg_exit();
-    return 0;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetDeformationField() {
-    if (this->controlPointGrid_gpu == nullptr) {
-        reg_f3d::GetDeformationField();
-    } else {
-        // Compute the deformation field
-        reg_spline_getDeformationField_gpu(this->controlPointGrid,
-                                           this->reference,
-                                           this->controlPointGrid_gpu,
-                                           this->deformationFieldImage_gpu,
-                                           this->currentMask_gpu,
-                                           this->activeVoxelNumber[this->currentLevel],
-                                           true); // use B-splines
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetDeformationField");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::WarpFloatingImage(int inter) {
-    // Interpolation is linear by default when using GPU, the inter variable is not used.
-    inter = inter; // just to avoid a compiler warning
-
-    // Compute the deformation field
-    this->GetDeformationField();
-
-    // Resample the floating image
-    reg_resampleImage_gpu(this->floating,
-                          this->warped_gpu,
-                          this->floating_gpu,
-                          this->deformationFieldImage_gpu,
-                          this->currentMask_gpu,
-                          this->activeVoxelNumber[this->currentLevel],
-                          this->warpedPaddingValue);
-
-    if (this->floating->nt == 2) {
-        reg_resampleImage_gpu(this->floating,
-                              this->warped2_gpu,
-                              this->floating2_gpu,
-                              this->deformationFieldImage_gpu,
-                              this->currentMask_gpu,
-                              this->activeVoxelNumber[this->currentLevel],
-                              this->warpedPaddingValue);
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::WarpFloatingImage");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::SetGradientImageToZero() {
-    cudaMemset(this->transformationGradient_gpu, 0,
-               this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * sizeof(float4));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::SetGradientImageToZero");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetVoxelBasedGradient() {
-    // The voxel based gradient image is filled with zeros
-    cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0,
-               this->reference->nx * this->reference->ny * this->reference->nz *
-               sizeof(float4));
-
-    // The intensity gradient is first computed
-    reg_getImageGradient_gpu(this->floating,
-                             this->floating_gpu,
-                             this->deformationFieldImage_gpu,
-                             this->warpedGradientImage_gpu,
-                             this->activeVoxelNumber[this->currentLevel],
-                             this->warpedPaddingValue);
-
-    // The gradient of the various measures of similarity are computed
-    if (this->measure_gpu_nmi != nullptr)
-        this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient();
-
-    if (this->measure_gpu_ssd != nullptr)
-        this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient();
-
-    if (this->measure_gpu_kld != nullptr)
-        this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient();
-
-    if (this->measure_gpu_lncc != nullptr)
-        this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient();
-
-    if (this->measure_gpu_dti != nullptr)
-        this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient();
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetVoxelBasedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetSimilarityMeasureGradient() {
-    this->GetVoxelBasedGradient();
-
-    // The voxel based gradient is smoothed
-    float smoothingRadius[3] = {
-        this->controlPointGrid->dx / this->reference->dx,
-        this->controlPointGrid->dy / this->reference->dy,
-        this->controlPointGrid->dz / this->reference->dz
-    };
-    reg_smoothImageForCubicSpline_gpu(this->warped,
-                                      this->voxelBasedMeasureGradientImage_gpu,
-                                      smoothingRadius);
-
-    // The node gradient is extracted
-    reg_voxelCentric2NodeCentric_gpu(this->warped,
-                                     this->controlPointGrid,
-                                     this->voxelBasedMeasureGradientImage_gpu,
-                                     this->transformationGradient_gpu,
-                                     this->similarityWeight);
-
-    /* The similarity measure gradient is converted from voxel space to real space */
-    mat44 *floatingMatrix_xyz = nullptr;
-    if (this->floating->sform_code > 0)
-        floatingMatrix_xyz = &(this->floating->sto_xyz);
-    else floatingMatrix_xyz = &(this->floating->qto_xyz);
-    reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz,
-                                                   this->controlPointGrid,
-                                                   this->transformationGradient_gpu);
-    // The gradient is smoothed using a Gaussian kernel if it is required
-    if (this->gradientSmoothingSigma != 0) {
-        reg_gaussianSmoothing_gpu(this->controlPointGrid,
-                                  this->transformationGradient_gpu,
-                                  this->gradientSmoothingSigma,
-                                  nullptr);
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetBendingEnergyGradient() {
-    if (this->bendingEnergyWeight <= 0) return;
-
-    reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid,
-                                               this->controlPointGrid_gpu,
-                                               this->transformationGradient_gpu,
-                                               this->bendingEnergyWeight);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetLinearEnergyGradient() {
-    if (this->linearEnergyWeight <= 0)
-        return;
-
-    reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()");
-    reg_print_msg_error("Option not supported!");
-    reg_exit();
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetJacobianBasedGradient() {
-    if (this->jacobianLogWeight <= 0) return;
-
-    reg_spline_getJacobianPenaltyTermGradient_gpu(this->reference,
-                                                  this->controlPointGrid,
-                                                  this->controlPointGrid_gpu,
-                                                  this->transformationGradient_gpu,
-                                                  this->jacobianLogWeight,
-                                                  this->jacobianLogApproximation);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetJacobianBasedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetLandmarkDistanceGradient() {
-    if (this->landmarkRegWeight <= 0)
-        return;
-
-    reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()");
-    reg_print_msg_error("Option not supported!");
-    reg_exit();
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UpdateParameters(float scale) {
-    float4 *currentDOF = reinterpret_cast<float4*>(this->optimiser->GetCurrentDOF());
-    float4 *bestDOF = reinterpret_cast<float4*>(this->optimiser->GetBestDOF());
-    float4 *gradient = reinterpret_cast<float4*>(this->optimiser->GetGradient());
-
-    reg_updateControlPointPosition_gpu(this->controlPointGrid, currentDOF, bestDOF, gradient, scale);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UpdateParameters");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::SmoothGradient() {
-    if (this->gradientSmoothingSigma != 0) {
-        reg_print_fct_error("reg_f3d_gpu::SmoothGradient()");
-        reg_print_msg_error("Option not supported!");
-        reg_exit();
-    }
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::GetApproximatedGradient() {
-    float4 *gridValue, *currentValue, *gradientValue;
-    cudaMallocHost(&gridValue, sizeof(float4));
-    cudaMallocHost(&currentValue, sizeof(float4));
-    cudaMallocHost(&gradientValue, sizeof(float4));
-
-    float eps = this->controlPointGrid->dx / 100.f;
-
-    for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) {
-        // Extract the grid value
-        cudaMemcpy(gridValue, &this->controlPointGrid_gpu[i], sizeof(float4), cudaMemcpyDeviceToHost);
-        cudaMemcpy(currentValue, &(reinterpret_cast<float4*>(this->optimiser->GetBestDOF()))[i], sizeof(float4), cudaMemcpyDeviceToHost);
-
-        // -- X axis
-        // Modify the grid value along the x axis
-        gridValue->x = currentValue->x + eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-        // Evaluate the objective function value
-        gradientValue->x = this->GetObjectiveFunctionValue();
-        // Modify the grid value along the x axis
-        gridValue->x = currentValue->x - eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-        // Evaluate the objective function value
-        gradientValue->x -= this->GetObjectiveFunctionValue();
-        gradientValue->x /= 2.f * eps;
-        gridValue->x = currentValue->x;
-
-        // -- Y axis
-        // Modify the grid value along the y axis
-        gridValue->y = currentValue->y + eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-        // Evaluate the objective function value
-        gradientValue->y = this->GetObjectiveFunctionValue();
-        // Modify the grid value the y axis
-        gridValue->y = currentValue->y - eps;
-        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-        // Evaluate the objective function value
-        gradientValue->y -= this->GetObjectiveFunctionValue();
-        gradientValue->y /= 2.f * eps;
-        gridValue->y = currentValue->y;
-
-        if (this->optimiser->GetNDim() > 2) {
-            // -- Z axis
-            // Modify the grid value along the y axis
-            gridValue->z = currentValue->z + eps;
-            cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-            // Evaluate the objective function value
-            gradientValue->z = this->GetObjectiveFunctionValue();
-            // Modify the grid value the y axis
-            gridValue->z = currentValue->z - eps;
-            cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-            // Evaluate the objective function value
-            gradientValue->z -= this->GetObjectiveFunctionValue();
-            gradientValue->z /= 2.f * eps;
-        }
-
-        // Restore the initial parametrisation
-        cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice);
-
-        // Save the assessed gradient
-        cudaMemcpy(&this->transformationGradient_gpu[i], gradientValue, sizeof(float4), cudaMemcpyHostToDevice);
-    }
-
-    cudaFreeHost(gridValue);
-    cudaFreeHost(currentValue);
-    cudaFreeHost(gradientValue);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetApproximatedGradient");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-nifti_image** reg_f3d_gpu::GetWarpedImage() {
-    // The initial images are used
-    if (this->inputReference == nullptr || this->inputFloating == nullptr || this->controlPointGrid == nullptr) {
-        reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()");
-        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-        reg_exit();
-    }
-
-    this->reference = this->inputReference;
-    this->floating = this->inputFloating;
-    this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int));
-
-    reg_tools_changeDatatype<float>(this->reference);
-    reg_tools_changeDatatype<float>(this->floating);
-
-    this->AllocateWarped();
-    this->AllocateDeformationField();
-    this->InitialiseCurrentLevel();
-    this->WarpFloatingImage(3); // cubic spline interpolation
-    this->DeallocateDeformationField();
-
-    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = nifti_copy_nim_info(this->warped);
-    warpedImage[0]->cal_min = this->inputFloating->cal_min;
-    warpedImage[0]->cal_max = this->inputFloating->cal_max;
-    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
-    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
-    warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
-    cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu);
-    if (this->floating->nt == 2) {
-        warpedImage[1] = warpedImage[0];
-        warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper);
-        cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu);
-    }
-
-    this->DeallocateWarped();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage");
-#endif
-    return warpedImage;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_f3d_gpu::InitialiseCurrentLevel() {
-    float maxStepSize = reg_f3d::InitialiseCurrentLevel();
-
-    if (this->reference_gpu != nullptr) cudaCommon_free(&this->reference_gpu);
-    if (this->reference2_gpu != nullptr) cudaCommon_free(&this->reference2_gpu);
-    if (this->reference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->reference_gpu, this->reference->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when allocating the reference image");
-            reg_exit();
-        }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->reference_gpu, this->reference)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transferring the reference image");
-            reg_exit();
-        }
-    } else if (this->reference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->reference_gpu,
-                                                    &this->reference2_gpu, this->reference->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when allocating the reference image");
-            reg_exit();
-        }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->reference_gpu,
-                                                           &this->reference2_gpu, this->reference)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transferring the reference image");
-            reg_exit();
-        }
-    }
-
-    if (this->floating_gpu != nullptr) cudaCommon_free(&this->floating_gpu);
-    if (this->floating2_gpu != nullptr) cudaCommon_free(&this->floating2_gpu);
-    if (this->reference->nt == 1) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->floating_gpu, this->floating->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when allocating the floating image");
-            reg_exit();
-        }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->floating_gpu, this->floating)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transferring the floating image");
-            reg_exit();
-        }
-    } else if (this->reference->nt == 2) {
-        if (cudaCommon_allocateArrayToDevice<float>(&this->floating_gpu,
-                                                    &this->floating2_gpu, this->floating->dim)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when allocating the floating image");
-            reg_exit();
-        }
-        if (cudaCommon_transferNiftiToArrayOnDevice<float>(&this->floating_gpu,
-                                                           &this->floating2_gpu, this->floating)) {
-            reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-            reg_print_msg_error("Error when transferring the floating image");
-            reg_exit();
-        }
-    }
-
-    if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu);
-    if (cudaCommon_allocateArrayToDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) {
-        reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-        reg_print_msg_error("Error when allocating the control point image");
-        reg_exit();
-    }
-    if (cudaCommon_transferNiftiToArrayOnDevice<float4>(&this->controlPointGrid_gpu, this->controlPointGrid)) {
-        reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()");
-        reg_print_msg_error("Error when transferring the control point image");
-        reg_exit();
-    }
-
-    int *targetMask_h;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
-    int *targetMask_h_ptr = &targetMask_h[0];
-    for (int i = 0; i < this->reference->nx * this->reference->ny * this->reference->nz; i++) {
-        if (this->currentMask[i] != -1)
-            *targetMask_h_ptr++ = i;
-    }
-    NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h,
-                                 this->activeVoxelNumber[this->currentLevel] * sizeof(int), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h));
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::InitialiseCurrentLevel");
-#endif
-    return maxStepSize;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::DeallocateCurrentInputImage() {
-    reg_f3d::DeallocateCurrentInputImage();
-
-    if (cudaCommon_transferFromDeviceToNifti<float4>(this->controlPointGrid, &this->controlPointGrid_gpu)) {
-        reg_print_fct_error("reg_f3d_gpu::DeallocateCurrentInputImage()");
-        reg_print_msg_error("Error when transferring back the control point image");
-        reg_exit();
-    }
-    cudaCommon_free(&this->controlPointGrid_gpu);
-    this->controlPointGrid_gpu = nullptr;
-    cudaCommon_free(&this->reference_gpu);
-    this->reference_gpu = nullptr;
-    cudaCommon_free(&this->floating_gpu);
-    this->floating_gpu = nullptr;
-    NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu));
-    this->currentMask_gpu = nullptr;
-
-    if (this->reference2_gpu != nullptr)
-        cudaCommon_free(&this->reference2_gpu);
-    this->reference2_gpu = nullptr;
-    if (this->floating2_gpu != nullptr)
-        cudaCommon_free(&this->floating2_gpu);
-    this->floating2_gpu = nullptr;
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::DeallocateCurrentInputImage");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::SetOptimiser() {
-    if (this->useConjGradient)
-        this->optimiser = new reg_conjugateGradient_gpu();
-    else this->optimiser = new reg_optimiser_gpu();
-    // The cpp and grad images are converted to float* instead of float4
-    // to enable compatibility with the CPU class
-    this->optimiser->Initialise(this->controlPointGrid->nvox,
-                                this->controlPointGrid->nz > 1 ? 3 : 2,
-                                this->optimiseX,
-                                this->optimiseY,
-                                this->optimiseZ,
-                                this->maxIterationNumber,
-                                0, // currentIterationNumber,
-                                this,
-                                reinterpret_cast<float*>(this->controlPointGrid_gpu),
-                                reinterpret_cast<float*>(this->transformationGradient_gpu));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::SetOptimiser");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_f3d_gpu::NormaliseGradient() {
-    // First compute the gradient max length for normalisation purpose
-    float length = reg_getMaximalLength_gpu(this->transformationGradient_gpu, this->optimiser->GetVoxNumber());
-
-    if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) {
-        // The gradient is normalised if we are running F3D
-        // It will be normalised later when running symmetric or F3D2
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "Objective function gradient maximal length: %g", length);
-        reg_print_msg_debug(text);
-#endif
-        reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), this->transformationGradient_gpu, 1.f / length);
-    }
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::NormaliseGradient");
-#endif
-    // Returns the largest gradient distance
-    return length;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-int reg_f3d_gpu::CheckMemoryMB() {
-    if (!this->initialised)
-        reg_f3d::Initialise();
-
-    size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx *
-        this->referencePyramid[this->levelToPerform - 1]->ny *
-        this->referencePyramid[this->levelToPerform - 1]->nz;
-
-    size_t warpedVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx *
-        this->referencePyramid[this->levelToPerform - 1]->ny *
-        this->referencePyramid[this->levelToPerform - 1]->nz *
-        this->floatingPyramid[this->levelToPerform - 1]->nt;
-
-    size_t totalMemoryRequiered = 0;
-    // reference image
-    totalMemoryRequiered += this->referencePyramid[this->levelToPerform - 1]->nvox * sizeof(float);
-
-    // floating image
-    totalMemoryRequiered += this->floatingPyramid[this->levelToPerform - 1]->nvox * sizeof(float);
-
-    // warped image
-    totalMemoryRequiered += warpedVoxelNumber * sizeof(float);
-
-    // mask image
-    totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform - 1] * sizeof(int);
-
-    // deformation field
-    totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
-
-    // voxel based intensity gradient
-    totalMemoryRequiered += referenceVoxelNumber * sizeof(float4);
-
-    // voxel based NMI gradient + smoothing
-    totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4);
-
-    // control point grid
-    size_t cp = 1;
-    cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nx *
-                     this->referencePyramid[this->levelToPerform - 1]->dx /
-                     this->spacing[0]) + 5;
-    cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->ny *
-                     this->referencePyramid[this->levelToPerform - 1]->dy /
-                     this->spacing[1]) + 5;
-    if (this->referencePyramid[this->levelToPerform - 1]->nz > 1)
-        cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nz *
-                         this->referencePyramid[this->levelToPerform - 1]->dz /
-                         this->spacing[2]) + 5;
-    totalMemoryRequiered += cp * sizeof(float4);
-
-    // node based NMI gradient
-    totalMemoryRequiered += cp * sizeof(float4);
-
-    // conjugate gradient
-    totalMemoryRequiered += 2 * cp * sizeof(float4);
-
-
-    // HERE TODO
-
-    // jacobian array
-    if (this->jacobianLogWeight > 0)
-        totalMemoryRequiered += 10 * referenceVoxelNumber * sizeof(float);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::CheckMemoryMB");
-#endif
-    return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024)));
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
-    if (this->measure_gpu_nmi == nullptr)
-        this->measure_gpu_nmi = new reg_nmi_gpu;
-    this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
-    // I am here adding 4 to the specified bin number to accomodate for
-    // the spline support
-    this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UseNMISetFloatingBinNumber");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
-    if (this->measure_gpu_nmi == nullptr)
-        this->measure_gpu_nmi = new reg_nmi_gpu;
-    this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0);
-    // I am here adding 4 to the specified bin number to accomodate for
-    // the spline support
-    this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UseNMISetReferenceBinNumber");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseSSD(int timepoint) {
-    if (this->measure_gpu_ssd == nullptr)
-        this->measure_gpu_ssd = new reg_ssd_gpu;
-    this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UseSSD");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseKLDivergence(int timepoint) {
-    if (this->measure_gpu_kld == nullptr)
-        this->measure_gpu_kld = new reg_kld_gpu;
-    this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UseKLDivergence");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) {
-    if (this->measure_gpu_lncc == nullptr)
-        this->measure_gpu_lncc = new reg_lncc_gpu;
-    this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0);
-    this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::UseLNCC");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::UseDTI(int timepoint[6]) {
-    reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
-    reg_exit();
-
-    // if(this->measure_gpu_dti==nullptr)
-    //    this->measure_gpu_dti=new reg_dti_gpu;
-    // for(int i=0; i<6; ++i)
-    //    this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]);
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_f3d_gpu::InitialiseSimilarity() {
-    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-    if (this->measure_gpu_nmi == nullptr &&
-        this->measure_gpu_ssd == nullptr &&
-        this->measure_gpu_dti == nullptr &&
-        this->measure_gpu_kld == nullptr &&
-        this->measure_gpu_lncc == nullptr) {
-        measure_gpu_nmi = new reg_nmi_gpu;
-        for (int i = 0; i < this->inputReference->nt; ++i)
-            measure_gpu_nmi->SetTimepointWeight(i, 1.0);
-    }
-    if (this->measure_gpu_nmi != nullptr) {
-        this->measure_gpu_nmi->InitialiseMeasure(this->reference,
-                                                 this->floating,
-                                                 this->currentMask,
-                                                 this->activeVoxelNumber[this->currentLevel],
-                                                 this->warped,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 &this->reference_gpu,
-                                                 &this->floating_gpu,
-                                                 &this->currentMask_gpu,
-                                                 &this->warped_gpu,
-                                                 &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu);
-        this->measure_nmi = this->measure_gpu_nmi;
-    }
-
-    if (this->measure_gpu_ssd != nullptr) {
-        this->measure_gpu_ssd->InitialiseMeasure(this->reference,
-                                                 this->floating,
-                                                 this->currentMask,
-                                                 this->activeVoxelNumber[this->currentLevel],
-                                                 this->warped,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 this->localWeightSimCurrent,
-                                                 &this->reference_gpu,
-                                                 &this->floating_gpu,
-                                                 &this->currentMask_gpu,
-                                                 &this->warped_gpu,
-                                                 &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu);
-        this->measure_ssd = this->measure_gpu_ssd;
-    }
-
-    if (this->measure_gpu_kld != nullptr) {
-        this->measure_gpu_kld->InitialiseMeasure(this->reference,
-                                                 this->floating,
-                                                 this->currentMask,
-                                                 this->activeVoxelNumber[this->currentLevel],
-                                                 this->warped,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 &this->reference_gpu,
-                                                 &this->floating_gpu,
-                                                 &this->currentMask_gpu,
-                                                 &this->warped_gpu,
-                                                 &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu);
-        this->measure_kld = this->measure_gpu_kld;
-    }
-
-    if (this->measure_gpu_lncc != nullptr) {
-        this->measure_gpu_lncc->InitialiseMeasure(this->reference,
-                                                  this->floating,
-                                                  this->currentMask,
-                                                  this->activeVoxelNumber[this->currentLevel],
-                                                  this->warped,
-                                                  this->warpedGradient,
-                                                  this->voxelBasedMeasureGradient,
-                                                  &this->reference_gpu,
-                                                  &this->floating_gpu,
-                                                  &this->currentMask_gpu,
-                                                  &this->warped_gpu,
-                                                  &this->warpedGradientImage_gpu,
-                                                  &this->voxelBasedMeasureGradientImage_gpu);
-        this->measure_lncc = this->measure_gpu_lncc;
-    }
-
-    if (this->measure_gpu_dti != nullptr) {
-        this->measure_gpu_dti->InitialiseMeasure(this->reference,
-                                                 this->floating,
-                                                 this->currentMask,
-                                                 this->activeVoxelNumber[this->currentLevel],
-                                                 this->warped,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 &this->reference_gpu,
-                                                 &this->floating_gpu,
-                                                 &this->currentMask_gpu,
-                                                 &this->warped_gpu,
-                                                 &this->warpedGradientImage_gpu,
-                                                 &this->voxelBasedMeasureGradientImage_gpu);
-        this->measure_dti = this->measure_gpu_dti;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d_gpu::InitialiseSimilarity()");
-#endif
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h
deleted file mode 100755
index 94167eba..00000000
--- a/reg-lib/cuda/_reg_f3d_gpu.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- *  _reg_f3d_gpu.h
- *
- *
- *  Created by Marc Modat on 19/11/2010.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_resampling_gpu.h"
-#include "_reg_globalTransformation_gpu.h"
-#include "_reg_localTransformation_gpu.h"
-#include "_reg_nmi_gpu.h"
-#include "_reg_ssd_gpu.h"
-#include "_reg_tools_gpu.h"
-#include "_reg_common_cuda.h"
-#include "_reg_optimiser_gpu.h"
-#include "_reg_f3d.h"
-
-class reg_f3d_gpu: public reg_f3d<float> {
-protected:
-    // cuda variables
-    cudaArray *reference_gpu;
-    cudaArray *floating_gpu;
-    int *currentMask_gpu;
-    float *warped_gpu;
-    float4 *controlPointGrid_gpu;
-    float4 *deformationFieldImage_gpu;
-    float4 *warpedGradientImage_gpu;
-    float4 *voxelBasedMeasureGradientImage_gpu;
-    float4 *transformationGradient_gpu;
-
-    // cuda variable for multispectral registration
-    cudaArray *reference2_gpu;
-    cudaArray *floating2_gpu;
-    float *warped2_gpu;
-    float4 *warpedGradientImage2_gpu;
-
-    // Measure related variables
-    reg_ssd_gpu *measure_gpu_ssd;
-    reg_kld_gpu *measure_gpu_kld;
-    reg_dti_gpu *measure_gpu_dti;
-    reg_lncc_gpu *measure_gpu_lncc;
-    reg_nmi_gpu *measure_gpu_nmi;
-
-    float InitialiseCurrentLevel();
-    void DeallocateCurrentInputImage();
-    void AllocateWarped();
-    void DeallocateWarped();
-    void AllocateDeformationField();
-    void DeallocateDeformationField();
-    void AllocateWarpedGradient();
-    void DeallocateWarpedGradient();
-    void AllocateVoxelBasedMeasureGradient();
-    void DeallocateVoxelBasedMeasureGradient();
-    void AllocateTransformationGradient();
-    void DeallocateTransformationGradient();
-
-    double ComputeJacobianBasedPenaltyTerm(int);
-    double ComputeBendingEnergyPenaltyTerm();
-    double ComputeLinearEnergyPenaltyTerm();
-    double ComputeLandmarkDistancePenaltyTerm();
-    void GetDeformationField();
-    void WarpFloatingImage(int);
-    void GetVoxelBasedGradient();
-    void GetSimilarityMeasureGradient();
-    void GetBendingEnergyGradient();
-    void GetLinearEnergyGradient();
-    void GetJacobianBasedGradient();
-    void GetLandmarkDistanceGradient();
-    void SmoothGradient();
-    void GetApproximatedGradient();
-    void UpdateParameters(float);
-    void SetOptimiser();
-    // void SetGradientImageToZero();
-    float NormaliseGradient();
-    void InitialiseSimilarity();
-
-public:
-    void UseNMISetReferenceBinNumber(int, int);
-    void UseNMISetFloatingBinNumber(int, int);
-    void UseSSD(int timepoint);
-    void UseKLDivergence(int timepoint);
-    void UseDTI(int timepoint[6]);
-    void UseLNCC(int timepoint, float stdDevKernel);
-    nifti_image** GetWarpedImage();
-
-    reg_f3d_gpu(int refTimePoint, int floTimePoint);
-    ~reg_f3d_gpu();
-    int CheckMemoryMB();
-};
-
-#include "_reg_f3d_gpu.cpp"

From 57405d63b2d6c1f3adf9b00401a267194738c2be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 18 Jan 2023 15:02:08 +0000
Subject: [PATCH 036/314] Combine _reg_f3d2 and _reg_f3d_sym

---
 niftyreg_build_version.txt |    2 +-
 reg-lib/CMakeLists.txt     |    2 -
 reg-lib/_reg_f3d.h         |   28 +-
 reg-lib/_reg_f3d2.cpp      | 2123 ++++++++++++++++++++++++++++--------
 reg-lib/_reg_f3d2.h        |  104 +-
 reg-lib/_reg_f3d_sym.cpp   | 1824 -------------------------------
 reg-lib/_reg_f3d_sym.h     |  104 --
 7 files changed, 1792 insertions(+), 2395 deletions(-)
 delete mode 100644 reg-lib/_reg_f3d_sym.cpp
 delete mode 100644 reg-lib/_reg_f3d_sym.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 15c44e93..fa8f08cb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-149
+150
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 8a8f80ff..0d688304 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -239,8 +239,6 @@ set(_reg_f3d_files
   _reg_f3d.h
   # _reg_f3d2.cpp
   # _reg_f3d2.h
-  # _reg_f3d_sym.cpp
-  # _reg_f3d_sym.h
 )
 set(_reg_f3d_libraries
   _reg_blockMatching
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index b986237e..6a0251f3 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -50,7 +50,7 @@ class reg_f3d: public reg_base<T> {
     virtual void SmoothGradient() override;
     virtual void GetObjectiveFunctionGradient() override;
     virtual void GetApproximatedGradient() override;
-    void GetSimilarityMeasureGradient();
+    virtual void GetSimilarityMeasureGradient() override;
 
     virtual void GetDeformationField() override;
     virtual void DisplayCurrentLevelParameters() override;
@@ -72,30 +72,22 @@ class reg_f3d: public reg_base<T> {
     reg_f3d(int refTimePoint, int floTimePoint);
     virtual ~reg_f3d();
 
-    void SetControlPointGridImage(nifti_image*);
-    void SetBendingEnergyWeight(T);
-    void SetLinearEnergyWeight(T);
-    void SetJacobianLogWeight(T);
-    void ApproximateJacobianLog();
-    void DoNotApproximateJacobianLog();
-    void SetSpacing(unsigned int, T);
+    virtual void SetControlPointGridImage(nifti_image*);
+    virtual void SetBendingEnergyWeight(T);
+    virtual void SetLinearEnergyWeight(T);
+    virtual void SetJacobianLogWeight(T);
+    virtual void ApproximateJacobianLog();
+    virtual void DoNotApproximateJacobianLog();
+    virtual void SetSpacing(unsigned int, T);
+    virtual void NoGridRefinement() { gridRefinement = false; }
 
-    void NoGridRefinement() { gridRefinement = false; }
     // F3D2 specific options
-    virtual void SetCompositionStepNumber(int) {}
-    virtual void ApproximateComposition() {}
-    virtual void UseSimilaritySymmetry() {}
     virtual void UseBCHUpdate(int) {}
     virtual void UseGradientCumulativeExp() {}
     virtual void DoNotUseGradientCumulativeExp() {}
-
-    // f3d_sym specific options
     virtual void SetFloatingMask(nifti_image*) {}
     virtual void SetInverseConsistencyWeight(T) {}
-    virtual nifti_image *GetBackwardControlPointPositionImage() { return nullptr; }
-
-    // f3d_gpu specific option
-    virtual int CheckMemoryMB() { return EXIT_SUCCESS; }
+    virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; }
 
     virtual void CheckParameters() override;
     virtual void Initialise() override;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 6fc83deb..d4e2cc2b 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -13,481 +13,1752 @@
 #include "_reg_f3d2.h"
 
 /* *************************************************************** */
+template <class T>
+reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
+    reg_f3d<T>::reg_f3d(refTimePoint, floTimePoint) {
+    this->executableName = (char*)"NiftyReg F3D2";
+    backwardControlPointGrid = nullptr;
+    backwardWarped = nullptr;
+    backwardWarpedGradientImage = nullptr;
+    backwardDeformationFieldImage = nullptr;
+    backwardVoxelBasedMeasureGradientImage = nullptr;
+    backwardTransformationGradient = nullptr;
+    floatingMaskImage = nullptr;
+    floatingMask = nullptr;
+    floatingMaskPyramid = nullptr;
+    backwardActiveVoxelNumber = nullptr;
+    backwardJacobianMatrix = nullptr;
+    inverseConsistencyWeight = 0;
+    bchUpdate = false;
+    useGradientCumulativeExp = true;
+    bchUpdateValue = 0;
+
+#ifndef NDEBUG
+    reg_print_msg_debug("reg_f3d2 constructor called");
+#endif
+}
 /* *************************************************************** */
 template <class T>
-reg_f3d2<T>::reg_f3d2(int refTimePoint,int floTimePoint)
-   :reg_f3d_sym<T>::reg_f3d_sym(refTimePoint,floTimePoint)
-{
-   this->executableName=(char *)"NiftyReg F3D2";
-   this->inverseConsistencyWeight=0;
-   this->BCHUpdate=false;
-   this->useGradientCumulativeExp=true;
-   this->BCHUpdateValue=0;
+reg_f3d2<T>::~reg_f3d2() {
+    if (backwardControlPointGrid) {
+        nifti_image_free(backwardControlPointGrid);
+        backwardControlPointGrid = nullptr;
+    }
+
+    if (floatingMaskPyramid) {
+        if (this->usePyramid) {
+            for (unsigned int i = 0; i < this->levelToPerform; i++) {
+                if (floatingMaskPyramid[i]) {
+                    free(floatingMaskPyramid[i]);
+                    floatingMaskPyramid[i] = nullptr;
+                }
+            }
+        } else {
+            if (floatingMaskPyramid[0]) {
+                free(floatingMaskPyramid[0]);
+                floatingMaskPyramid[0] = nullptr;
+            }
+        }
+        free(floatingMaskPyramid);
+        floatingMaskPyramid = nullptr;
+    }
 
+    if (backwardActiveVoxelNumber) {
+        free(backwardActiveVoxelNumber);
+        backwardActiveVoxelNumber = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_msg_debug("reg_f3d2 destructor called");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::SetFloatingMask(nifti_image *m) {
+    floatingMaskImage = m;
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_f3d2 constructor called");
+    reg_print_fct_debug("reg_f3d2<T>::~SetFloatingMask");
 #endif
 }
 /* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
+    inverseConsistencyWeight = w;
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::SetInverseConsistencyWeight");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+T reg_f3d2<T>::InitialiseCurrentLevel() {
+    // Refine the control point grids if required
+    if (this->gridRefinement) {
+        if (this->currentLevel == 0) {
+            this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16, this->levelNumber - 1));
+            this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3, this->levelNumber - 1));
+        } else {
+            reg_spline_refineControlPointGrid(this->controlPointGrid);
+            reg_spline_refineControlPointGrid(backwardControlPointGrid);
+            this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
+            this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
+        }
+    }
+
+    // Set the mask images
+    if (this->usePyramid) {
+        this->currentMask = this->maskPyramid[this->currentLevel];
+        floatingMask = floatingMaskPyramid[this->currentLevel];
+    } else {
+        this->currentMask = this->maskPyramid[0];
+        floatingMask = floatingMaskPyramid[0];
+    }
+
+    // Define the initial step size for the gradient ascent optimisation
+    T maxStepSize = this->reference->dx;
+    maxStepSize = this->reference->dy > maxStepSize ? this->reference->dy : maxStepSize;
+    maxStepSize = this->floating->dx > maxStepSize ? this->floating->dx : maxStepSize;
+    maxStepSize = this->floating->dy > maxStepSize ? this->floating->dy : maxStepSize;
+    if (this->reference->ndim > 2) {
+        maxStepSize = (this->reference->dz > maxStepSize) ? this->reference->dz : maxStepSize;
+        maxStepSize = (this->floating->dz > maxStepSize) ? this->floating->dz : maxStepSize;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::InitialiseCurrentLevel");
+#endif
+    return maxStepSize;
+}
 /* *************************************************************** */
 template <class T>
-reg_f3d2<T>::~reg_f3d2()
-{
+void reg_f3d2<T>::DeallocateCurrentInputImage() {
+    reg_f3d<T>::DeallocateCurrentInputImage();
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_f3d2 destructor called");
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateCurrentInputImage");
 #endif
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::AllocateWarped() {
+    DeallocateWarped();
+
+    reg_f3d<T>::AllocateWarped();
+    if (!this->floating) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateWarped()");
+        reg_print_msg_error("The floating image is not defined");
+        reg_exit();
+    }
+    backwardWarped = nifti_copy_nim_info(this->floating);
+    backwardWarped->dim[0] = backwardWarped->ndim = this->reference->ndim;
+    backwardWarped->dim[4] = backwardWarped->nt = this->reference->nt;
+    backwardWarped->pixdim[4] = backwardWarped->dt = 1;
+    backwardWarped->nvox = size_t(backwardWarped->nx * backwardWarped->ny * backwardWarped->nz * backwardWarped->nt);
+    backwardWarped->datatype = this->reference->datatype;
+    backwardWarped->nbyper = this->reference->nbyper;
+    backwardWarped->data = calloc(backwardWarped->nvox, backwardWarped->nbyper);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::AllocateWarped");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DeallocateWarped() {
+    reg_f3d<T>::DeallocateWarped();
+    if (backwardWarped) {
+        nifti_image_free(backwardWarped);
+        backwardWarped = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateWarped");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::AllocateDeformationField() {
+    DeallocateDeformationField();
+
+    reg_f3d<T>::AllocateDeformationField();
+    if (!this->floating) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateDeformationField()");
+        reg_print_msg_error("The floating image is not defined");
+        reg_exit();
+    }
+    if (!backwardControlPointGrid) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateDeformationField()");
+        reg_print_msg_error("The backward control point image is not defined");
+        reg_exit();
+    }
+    backwardDeformationFieldImage = nifti_copy_nim_info(this->floating);
+    backwardDeformationFieldImage->dim[0] = backwardDeformationFieldImage->ndim = 5;
+    backwardDeformationFieldImage->dim[1] = backwardDeformationFieldImage->nx = this->floating->nx;
+    backwardDeformationFieldImage->dim[2] = backwardDeformationFieldImage->ny = this->floating->ny;
+    backwardDeformationFieldImage->dim[3] = backwardDeformationFieldImage->nz = this->floating->nz;
+    backwardDeformationFieldImage->dim[4] = backwardDeformationFieldImage->nt = 1;
+    backwardDeformationFieldImage->pixdim[4] = backwardDeformationFieldImage->dt = 1;
+    if (this->floating->nz == 1)
+        backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 2;
+    else backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 3;
+    backwardDeformationFieldImage->pixdim[5] = backwardDeformationFieldImage->du = 1;
+    backwardDeformationFieldImage->dim[6] = backwardDeformationFieldImage->nv = 1;
+    backwardDeformationFieldImage->pixdim[6] = backwardDeformationFieldImage->dv = 1;
+    backwardDeformationFieldImage->dim[7] = backwardDeformationFieldImage->nw = 1;
+    backwardDeformationFieldImage->pixdim[7] = backwardDeformationFieldImage->dw = 1;
+    backwardDeformationFieldImage->nvox = size_t(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny *
+                                                 backwardDeformationFieldImage->nz * backwardDeformationFieldImage->nt *
+                                                 backwardDeformationFieldImage->nu);
+    backwardDeformationFieldImage->nbyper = backwardControlPointGrid->nbyper;
+    backwardDeformationFieldImage->datatype = backwardControlPointGrid->datatype;
+    backwardDeformationFieldImage->data = calloc(backwardDeformationFieldImage->nvox,
+                                                 backwardDeformationFieldImage->nbyper);
+    backwardDeformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+    memset(backwardDeformationFieldImage->intent_name, 0, 16);
+    strcpy(backwardDeformationFieldImage->intent_name, "NREG_TRANS");
+    backwardDeformationFieldImage->intent_p1 = DEF_FIELD;
+    backwardDeformationFieldImage->scl_slope = 1;
+    backwardDeformationFieldImage->scl_inter = 0;
+
+    if (this->measure_dti)
+        backwardJacobianMatrix = (mat33*)malloc(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny *
+                                                backwardDeformationFieldImage->nz * sizeof(mat33));
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::AllocateDeformationField");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DeallocateDeformationField() {
+    reg_f3d<T>::DeallocateDeformationField();
+    if (backwardDeformationFieldImage) {
+        nifti_image_free(backwardDeformationFieldImage);
+        backwardDeformationFieldImage = nullptr;
+    }
+    if (backwardJacobianMatrix) {
+        free(backwardJacobianMatrix);
+        backwardJacobianMatrix = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateDeformationField");
+#endif
+}
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::UseBCHUpdate(int v)
-{
-   this->BCHUpdate = true;
-   this->useGradientCumulativeExp = false;
-   this->BCHUpdateValue=v;
-   return;
+void reg_f3d2<T>::AllocateWarpedGradient() {
+    DeallocateWarpedGradient();
+
+    reg_f3d<T>::AllocateWarpedGradient();
+    if (!backwardDeformationFieldImage) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateWarpedGradient()");
+        reg_print_msg_error("The backward control point image is not defined");
+        reg_exit();
+    }
+    backwardWarpedGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage);
+    backwardWarpedGradientImage->data = calloc(backwardWarpedGradientImage->nvox,
+                                               backwardWarpedGradientImage->nbyper);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::AllocateWarpedGradient");
+#endif
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DeallocateWarpedGradient() {
+    reg_f3d<T>::DeallocateWarpedGradient();
+    if (backwardWarpedGradientImage) {
+        nifti_image_free(backwardWarpedGradientImage);
+        backwardWarpedGradientImage = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateWarpedGradient");
+#endif
+}
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::UseGradientCumulativeExp()
-{
-   this->BCHUpdate = false;
-   this->useGradientCumulativeExp = true;
+void reg_f3d2<T>::AllocateVoxelBasedMeasureGradient() {
+    DeallocateVoxelBasedMeasureGradient();
+
+    reg_f3d<T>::AllocateVoxelBasedMeasureGradient();
+    if (!backwardDeformationFieldImage) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateVoxelBasedMeasureGradient()");
+        reg_print_msg_error("The backward control point image is not defined");
+        reg_exit();
+    }
+    backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage);
+    backwardVoxelBasedMeasureGradientImage->data = calloc(backwardVoxelBasedMeasureGradientImage->nvox,
+                                                          backwardVoxelBasedMeasureGradientImage->nbyper);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::AllocateVoxelBasedMeasureGradient");
+#endif
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DeallocateVoxelBasedMeasureGradient() {
+    reg_f3d<T>::DeallocateVoxelBasedMeasureGradient();
+    if (backwardVoxelBasedMeasureGradientImage) {
+        nifti_image_free(backwardVoxelBasedMeasureGradientImage);
+        backwardVoxelBasedMeasureGradientImage = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateVoxelBasedMeasureGradient");
+#endif
+}
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::DoNotUseGradientCumulativeExp()
-{
-   this->useGradientCumulativeExp = false;
+void reg_f3d2<T>::AllocateTransformationGradient() {
+    DeallocateTransformationGradient();
+
+    reg_f3d<T>::AllocateTransformationGradient();
+    if (!backwardControlPointGrid) {
+        reg_print_fct_error("reg_f3d2<T>::AllocateTransformationGradient()");
+        reg_print_msg_error("The backward control point image is not defined");
+        reg_exit();
+    }
+    backwardTransformationGradient = nifti_copy_nim_info(backwardControlPointGrid);
+    backwardTransformationGradient->data = calloc(backwardTransformationGradient->nvox,
+                                                  backwardTransformationGradient->nbyper);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::AllocateTransformationGradient");
+#endif
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DeallocateTransformationGradient() {
+    reg_f3d<T>::DeallocateTransformationGradient();
+    if (backwardTransformationGradient) {
+        nifti_image_free(backwardTransformationGradient);
+        backwardTransformationGradient = nullptr;
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DeallocateTransformationGradient");
+#endif
+}
 /* *************************************************************** */
 template<class T>
-void reg_f3d2<T>::Initialise()
-{
-   reg_f3d_sym<T>::Initialise();
+void reg_f3d2<T>::CheckParameters() {
+    reg_f3d<T>::CheckParameters();
+
+    // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED
+    if (floatingMaskImage) {
+        if (this->inputFloating->nx != floatingMaskImage->nx ||
+            this->inputFloating->ny != floatingMaskImage->ny ||
+            this->inputFloating->nz != floatingMaskImage->nz) {
+            reg_print_fct_error("reg_f3d2<T>::CheckParameters()");
+            reg_print_msg_error("The floating image and its mask have different dimension");
+            reg_exit();
+        }
+    }
 
-   // Convert the control point grid into velocity field parametrisation
-   this->controlPointGrid->intent_p1=SPLINE_VEL_GRID;
-   this->backwardControlPointGrid->intent_p1=SPLINE_VEL_GRID;
-   // Set the number of composition to 6 by default
-   this->controlPointGrid->intent_p2=6;
-   this->backwardControlPointGrid->intent_p2=6;
+    // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
+    T penaltySum = (this->bendingEnergyWeight + this->linearEnergyWeight + this->jacobianLogWeight +
+                    inverseConsistencyWeight + this->landmarkRegWeight);
+    if (penaltySum >= 1) {
+        this->similarityWeight = 0;
+        this->bendingEnergyWeight /= penaltySum;
+        this->linearEnergyWeight /= penaltySum;
+        this->jacobianLogWeight /= penaltySum;
+        inverseConsistencyWeight /= penaltySum;
+        this->landmarkRegWeight /= penaltySum;
+    } else this->similarityWeight = 1 - penaltySum;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_f3d2::Initialise_f3d() done");
+    reg_print_fct_debug("reg_f3d2<T>::CheckParameters");
 #endif
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetDeformationField() {
+    reg_spline_getDeformationField(this->controlPointGrid,
+                                   this->deformationFieldImage,
+                                   this->currentMask,
+                                   false, //composition
+                                   true); // bspline
+    reg_spline_getDeformationField(backwardControlPointGrid,
+                                   backwardDeformationFieldImage,
+                                   floatingMask,
+                                   false, //composition
+                                   true); // bspline
+
+    // By default the number of steps is automatically updated
+    bool updateStepNumber = true;
+    // The provided step number is used for the final resampling
+    if (!this->optimiser)
+        updateStepNumber = false;
+#ifndef NDEBUG
+    char text[255];
+    sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber);
+    reg_print_msg_debug(text);
+#endif
+    // The forward transformation is computed using the scaling-and-squaring approach
+    reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid,
+                                           this->deformationFieldImage,
+                                           updateStepNumber);
+#ifndef NDEBUG
+    sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber);
+    reg_print_msg_debug(text);
+#endif
+    // The number of step number is copied over from the forward transformation
+    backwardControlPointGrid->intent_p2 = this->controlPointGrid->intent_p2;
+    // The backward transformation is computed using the scaling-and-squaring approach
+    reg_spline_getDefFieldFromVelocityGrid(backwardControlPointGrid,
+                                           backwardDeformationFieldImage,
+                                           false);
+}
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::GetDeformationField()
-{
-   // By default the number of steps is automatically updated
-   bool updateStepNumber=true;
-   // The provided step number is used for the final resampling
-   if(this->optimiser==nullptr)
-      updateStepNumber=false;
+void reg_f3d2<T>::WarpFloatingImage(int inter) {
+    // Compute the deformation fields
+    GetDeformationField();
+
+    // Resample the floating image
+    if (!this->measure_dti) {
+        reg_resampleImage(this->floating,
+                          this->warped,
+                          this->deformationFieldImage,
+                          this->currentMask,
+                          inter,
+                          this->warpedPaddingValue);
+    } else {
+        reg_defField_getJacobianMatrix(this->deformationFieldImage,
+                                       this->forwardJacobianMatrix);
+        /*DTI needs fixing!
+        reg_resampleImage(this->floating,
+                          this->warped,
+                          this->deformationFieldImage,
+                          this->currentMask,
+                          inter,
+                          this->warpedPaddingValue,
+                          this->measure_dti->GetActiveTimepoints(),
+                          this->forwardJacobianMatrix);*/
+    }
+
+    // Resample the reference image
+    if (!this->measure_dti) {
+        reg_resampleImage(this->reference, // input image
+                          backwardWarped, // warped input image
+                          backwardDeformationFieldImage, // deformation field
+                          floatingMask, // mask
+                          inter, // interpolation type
+                          this->warpedPaddingValue); // padding value
+    } else {
+        reg_defField_getJacobianMatrix(backwardDeformationFieldImage,
+                                       backwardJacobianMatrix);
+        /* DTI needs fixing
+       reg_resampleImage(this->reference, // input image
+                           backwardWarped, // warped input image
+                           backwardDeformationFieldImage, // deformation field
+                           floatingMask, // mask
+                           inter, // interpolation type
+                           this->warpedPaddingValue, // padding value
+                           this->measure_dti->GetActiveTimepoints(),
+                           backwardJacobianMatrix);*/
+    }
 #ifndef NDEBUG
-   char text[255];
-   sprintf(text, "Velocity integration forward. Step number update=%i",updateStepNumber);
-   reg_print_msg_debug(text);
+    reg_print_fct_debug("reg_f3d2<T>::WarpFloatingImage");
 #endif
-   // The forward transformation is computed using the scaling-and-squaring approach
-   reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid,
-                                          this->deformationFieldImage,
-                                          updateStepNumber
-                                          );
+}
+/* *************************************************************** */
+template <class T>
+double reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm(int type) {
+    if (this->jacobianLogWeight <= 0) return 0;
+
+    double forwardPenaltyTerm = reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(type);
+
+    bool approx = type == 2 ? false : this->jacobianLogApproximation;
+
+    double backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(backwardControlPointGrid,
+                                                                   this->floating,
+                                                                   approx);
+
+    unsigned int maxit = 5;
+    if (type > 0) maxit = 20;
+    unsigned int it = 0;
+    while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) {
+        backwardPenaltyTerm = reg_spline_correctFolding(backwardControlPointGrid,
+                                                        this->floating,
+                                                        approx);
 #ifndef NDEBUG
-   sprintf(text, "Velocity integration backward. Step number update=%i",updateStepNumber);
-   reg_print_msg_debug(text);
+        reg_print_msg_debug("Folding correction - Backward transformation");
+#endif
+        it++;
+    }
+    if (type > 0 && it > 0) {
+        if (backwardPenaltyTerm != backwardPenaltyTerm) {
+            this->optimiser->RestoreBestDOF();
+#ifndef NDEBUG
+            reg_print_fct_warn("reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm()");
+            reg_print_msg_warn("The backward transformation folding correction scheme failed");
+#endif
+        } else {
+#ifdef NDEBUG
+            if (this->verbose) {
 #endif
-   // The number of step number is copied over from the forward transformation
-   this->backwardControlPointGrid->intent_p2=this->controlPointGrid->intent_p2;
-   // The backward transformation is computed using the scaling-and-squaring approach
-   reg_spline_getDefFieldFromVelocityGrid(this->backwardControlPointGrid,
-                                          this->backwardDeformationFieldImage,
-                                          false
-                                          );
-   return;
+                char text[255];
+                sprintf(text, "Backward transformation folding correction, %i step(s)", it);
+                reg_print_msg_debug(text);
+#ifdef NDEBUG
+            }
+#endif
+        }
+    }
+    backwardPenaltyTerm *= this->jacobianLogWeight;
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm");
+#endif
+    return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
+template <class T>
+double reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm() {
+    if (this->bendingEnergyWeight <= 0) return 0;
+
+    double forwardPenaltyTerm = reg_f3d<T>::ComputeBendingEnergyPenaltyTerm();
+
+    double value = reg_spline_approxBendingEnergy(backwardControlPointGrid);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm");
+#endif
+    return forwardPenaltyTerm + this->bendingEnergyWeight * value;
+}
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::GetInverseConsistencyErrorField(bool forceAll)
-{
-   if(this->inverseConsistencyWeight<=0) return;
+double reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm() {
+    if (this->linearEnergyWeight <= 0) return 0;
+
+    double forwardPenaltyTerm = reg_f3d<T>::ComputeLinearEnergyPenaltyTerm();
 
-   if(forceAll)
-   {
-      reg_print_fct_error("reg_f3d2<T>::GetInverseConsistencyErrorField()");
-      reg_print_msg_error("Option not supported in F3D2");
-   }
-   else
-   {
-      reg_print_fct_error("reg_f3d2<T>::GetInverseConsistencyErrorField()");
-      reg_print_msg_error("Option not supported in F3D2");
-   }
-   reg_exit();
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::GetInverseConsistencyGradient()
-{
-   if(this->inverseConsistencyWeight<=0) return;
-
-   reg_print_fct_error("reg_f3d2<T>::GetInverseConsistencyGradient()");
-   reg_print_msg_error("Option not supported in F3D2");
-   reg_exit();
-
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::GetVoxelBasedGradient()
-{
-   reg_f3d_sym<T>::GetVoxelBasedGradient();
-
-   // Exponentiate the gradients if required
-   this->ExponentiateGradient();
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::ExponentiateGradient()
-{
-   if(!this->useGradientCumulativeExp) return;
-
-   /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-   // Exponentiate the forward gradient using the backward transformation
-#ifndef NDEBUG
-   reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach");
-#endif
-   // Create all deformation field images needed for resampling
-   nifti_image **tempDef=(nifti_image **)malloc(
-                            (unsigned int)(fabs(this->backwardControlPointGrid->intent_p2)+1) *
-                            sizeof(nifti_image *));
-   for(unsigned int i=0; i<=(unsigned int)fabs(this->backwardControlPointGrid->intent_p2); ++i)
-   {
-      tempDef[i]=nifti_copy_nim_info(this->deformationFieldImage);
-      tempDef[i]->data=(void *)malloc(tempDef[i]->nvox*tempDef[i]->nbyper);
-   }
-   // Generate all intermediate deformation fields
-   reg_spline_getIntermediateDefFieldFromVelGrid(this->backwardControlPointGrid,
-         tempDef);
-
-   // Remove the affine component
-   nifti_image *affine_disp=nullptr;
-   if(this->affineTransformation!=nullptr){
-      affine_disp=nifti_copy_nim_info(this->deformationFieldImage);
-      affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper);
-      mat44 backwardAffineTransformation=nifti_mat44_inverse(*this->affineTransformation);
-      reg_affine_getDeformationField(&backwardAffineTransformation,
-                                     affine_disp);
-      reg_getDisplacementFromDeformation(affine_disp);
-   }
-
-   /* Allocate a temporary gradient image to store the backward gradient */
-   nifti_image *tempGrad=nifti_copy_nim_info(this->voxelBasedMeasureGradient);
-
-   tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper);
-   for(int i=0; i<(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i)
-   {
-      if(affine_disp!=nullptr)
-         reg_tools_substractImageToImage(tempDef[i],
-                                         affine_disp,
-                                         tempDef[i]);
-      reg_resampleGradient(this->voxelBasedMeasureGradient, // floating
-                           tempGrad, // warped - out
-                           tempDef[i], // deformation field
-                           1, // interpolation type - linear
-                           0.f); // padding value
-      reg_tools_addImageToImage(tempGrad, // in1
-                                this->voxelBasedMeasureGradient, // in2
-                                this->voxelBasedMeasureGradient); // out
-   }
-
-   // Free the temporary deformation fields
-   for(int i=0; i<=(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i)
-   {
-      nifti_image_free(tempDef[i]);
-      tempDef[i]=nullptr;
-   }
-   free(tempDef);
-   tempDef=nullptr;
-   // Free the temporary gradient image
-   nifti_image_free(tempGrad);
-   tempGrad=nullptr;
-   // Free the temporary affine displacement field
-   if(affine_disp!=nullptr)
-      nifti_image_free(affine_disp);
-   affine_disp=nullptr;
-   // Normalise the forward gradient
-   reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in
-                                this->voxelBasedMeasureGradient, // out
-                                powf(2.f,fabsf(this->backwardControlPointGrid->intent_p2))); // value
-
-   /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
-   /* Exponentiate the backward gradient using the forward transformation */
-#ifndef NDEBUG
-   reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach");
-#endif
-   // Allocate a temporary gradient image to store the backward gradient
-   tempGrad=nifti_copy_nim_info(this->backwardVoxelBasedMeasureGradientImage);
-   tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper);
-   // Create all deformation field images needed for resampling
-   tempDef=(nifti_image **)malloc((unsigned int)(fabs(this->controlPointGrid->intent_p2)+1) * sizeof(nifti_image *));
-   for(unsigned int i=0; i<=(unsigned int)fabs(this->controlPointGrid->intent_p2); ++i)
-   {
-      tempDef[i]=nifti_copy_nim_info(this->backwardDeformationFieldImage);
-      tempDef[i]->data=(void *)malloc(tempDef[i]->nvox*tempDef[i]->nbyper);
-   }
-   // Generate all intermediate deformation fields
-   reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid,
-         tempDef);
-
-   // Remove the affine component
-   if(this->affineTransformation!=nullptr){
-      affine_disp=nifti_copy_nim_info(this->backwardDeformationFieldImage);
-      affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper);
-      reg_affine_getDeformationField(this->affineTransformation,
-                                     affine_disp);
-      reg_getDisplacementFromDeformation(affine_disp);
-   }
-
-   for(int i=0; i<(int)fabsf(this->controlPointGrid->intent_p2); ++i)
-   {
-      if(affine_disp!=nullptr)
-         reg_tools_substractImageToImage(tempDef[i],
-                                         affine_disp,
-                                         tempDef[i]);
-      reg_resampleGradient(this->backwardVoxelBasedMeasureGradientImage, // floating
-                           tempGrad, // warped - out
-                           tempDef[i], // deformation field
-                           1, // interpolation type - linear
-                           0.f); // padding value
-      reg_tools_addImageToImage(tempGrad, // in1
-                                this->backwardVoxelBasedMeasureGradientImage, // in2
-                                this->backwardVoxelBasedMeasureGradientImage); // out
-   }
-
-   // Free the temporary deformation field
-   for(int i=0; i<=(int)fabsf(this->controlPointGrid->intent_p2); ++i)
-   {
-      nifti_image_free(tempDef[i]);
-      tempDef[i]=nullptr;
-   }
-   free(tempDef);
-   tempDef=nullptr;
-   // Free the temporary gradient image
-   nifti_image_free(tempGrad);
-   tempGrad=nullptr;
-   // Free the temporary affine displacement field
-   if(affine_disp!=nullptr)
-      nifti_image_free(affine_disp);
-   affine_disp=nullptr;
-   // Normalise the backward gradient
-   reg_tools_divideValueToImage(this->backwardVoxelBasedMeasureGradientImage, // in
-                                this->backwardVoxelBasedMeasureGradientImage, // out
-                                powf(2.f,fabsf(this->controlPointGrid->intent_p2))); // value
-
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::UpdateParameters(float scale)
-{
-   // Restore the last successfull control point grids
-   this->optimiser->RestoreBestDOF();
-
-   /************************/
-   /**** Forward update ****/
-   /************************/
-   // Scale the gradient image
-   nifti_image *forwardScaledGradient=nifti_copy_nim_info(this->transformationGradient);
-   forwardScaledGradient->data=(void *)malloc(forwardScaledGradient->nvox*forwardScaledGradient->nbyper);
-   reg_tools_multiplyValueToImage(this->transformationGradient,
-                                  forwardScaledGradient,
-                                  scale); // *(scale)
-   // The scaled gradient image is added to the current estimate of the transformation using
-   // a simple addition or by computing the BCH update
-   // Note that the gradient has been integrated over the path of transformation previously
-   if(this->BCHUpdate)
-   {
-      // Compute the BCH update
-      reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY");
-#ifndef NDEBUG
-      reg_print_msg_debug("Update the forward control point grid using BCH approximation");
-#endif
-      compute_BCH_update(this->controlPointGrid,
-                         forwardScaledGradient,
-                         this->BCHUpdateValue);
-   }
-   else
-   {
-      // Reset the gradient along the axes if appropriate
-      reg_setGradientToZero(forwardScaledGradient,
-                            !this->optimiser->GetOptimiseX(),
-                            !this->optimiser->GetOptimiseY(),
-                            !this->optimiser->GetOptimiseZ());
-      // Update the velocity field
-      reg_tools_addImageToImage(this->controlPointGrid, // in1
-                                forwardScaledGradient, // in2
-                                this->controlPointGrid); // out
-   }
-   // Clean the temporary nifti_images
-   nifti_image_free(forwardScaledGradient);
-   forwardScaledGradient=nullptr;
-
-   /************************/
-   /**** Backward update ***/
-   /************************/
-   // Scale the gradient image
-   nifti_image *backwardScaledGradient=nifti_copy_nim_info(this->backwardTransformationGradient);
-   backwardScaledGradient->data=(void *)malloc(backwardScaledGradient->nvox*backwardScaledGradient->nbyper);
-   reg_tools_multiplyValueToImage(this->backwardTransformationGradient,
-                                  backwardScaledGradient,
-                                  scale); // *(scale)
-   // The scaled gradient image is added to the current estimate of the transformation using
-   // a simple addition or by computing the BCH update
-   // Note that the gradient has been integrated over the path of transformation previously
-   if(this->BCHUpdate)
-   {
-      // Compute the BCH update
-      reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY");
-#ifndef NDEBUG
-      reg_print_msg_debug("Update the backward control point grid using BCH approximation");
-#endif
-      compute_BCH_update(this->backwardControlPointGrid,
-                         backwardScaledGradient,
-                         this->BCHUpdateValue);
-   }
-   else
-   {
-      // Reset the gradient along the axes if appropriate
-      reg_setGradientToZero(backwardScaledGradient,
-                            !this->optimiser->GetOptimiseX(),
-                            !this->optimiser->GetOptimiseY(),
-                            !this->optimiser->GetOptimiseZ());
-      // Update the velocity field
-      reg_tools_addImageToImage(this->backwardControlPointGrid, // in1
-                                backwardScaledGradient, // in2
-                                this->backwardControlPointGrid); // out
-   }
-   // Clean the temporary nifti_images
-   nifti_image_free(backwardScaledGradient);
-   backwardScaledGradient=nullptr;
-
-   /****************************/
-   /******** Symmetrise ********/
-   /****************************/
-
-   // In order to ensure symmetry the forward and backward velocity fields
-   // are averaged in both image spaces: reference and floating
-   /****************************/
-   nifti_image *warpedForwardTrans = nifti_copy_nim_info(this->backwardControlPointGrid);
-   warpedForwardTrans->data=(void *)malloc(warpedForwardTrans->nvox*warpedForwardTrans->nbyper);
-   nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid);
-   warpedBackwardTrans->data=(void *)malloc(warpedBackwardTrans->nvox*warpedBackwardTrans->nbyper);
-
-   // Both parametrisations are converted into displacement
-   reg_getDisplacementFromDeformation(this->controlPointGrid);
-   reg_getDisplacementFromDeformation(this->backwardControlPointGrid);
-
-   // Both parametrisations are copied over
-   memcpy(warpedBackwardTrans->data,this->backwardControlPointGrid->data,warpedBackwardTrans->nvox*warpedBackwardTrans->nbyper);
-   memcpy(warpedForwardTrans->data,this->controlPointGrid->data,warpedForwardTrans->nvox*warpedForwardTrans->nbyper);
-
-   // and substracted (sum and negation)
-   reg_tools_substractImageToImage(this->backwardControlPointGrid, // displacement
-                                   warpedForwardTrans, // displacement
-                                   this->backwardControlPointGrid); // displacement output
-   reg_tools_substractImageToImage(this->controlPointGrid, // displacement
-                                   warpedBackwardTrans, // displacement
-                                   this->controlPointGrid); // displacement output
-   // Division by 2
-   reg_tools_multiplyValueToImage(this->backwardControlPointGrid, // displacement
-                                  this->backwardControlPointGrid, // displacement
-                                  0.5f); // *(0.5)
-   reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement
-                                  this->controlPointGrid, // displacement
-                                  0.5f); // *(0.5)
-   // Clean the temporary allocated velocity fields
-   nifti_image_free(warpedForwardTrans);
-   warpedForwardTrans=nullptr;
-   nifti_image_free(warpedBackwardTrans);
-   warpedBackwardTrans=nullptr;
-
-   // Convert the velocity field from displacement to deformation
-   reg_getDeformationFromDisplacement(this->controlPointGrid);
-   reg_getDeformationFromDisplacement(this->backwardControlPointGrid);
-
-   return;
+    double backwardPenaltyTerm = this->linearEnergyWeight * reg_spline_approxLinearEnergy(backwardControlPointGrid);
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm");
+#endif
+    return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
+template <class T>
+double reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm() {
+    if (this->landmarkRegWeight <= 0) return 0;
+
+    double forwardPenaltyTerm = reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm();
+
+    double backwardPenaltyTerm = this->landmarkRegWeight * reg_spline_getLandmarkDistance(backwardControlPointGrid,
+                                                                                          this->landmarkRegNumber,
+                                                                                          this->landmarkFloating,
+                                                                                          this->landmarkReference);
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm");
+#endif
+    return forwardPenaltyTerm + backwardPenaltyTerm;
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetVoxelBasedGradient() {
+    // The voxel based gradient image is initialised with zeros
+    reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
+                                   this->voxelBasedMeasureGradient,
+                                   0);
+    reg_tools_multiplyValueToImage(backwardVoxelBasedMeasureGradientImage,
+                                   backwardVoxelBasedMeasureGradientImage,
+                                   0);
+    // The intensity gradient is first computed
+    //    if(this->measure_dti!=nullptr){
+    //        reg_getImageGradient(this->floating,
+    //                             this->warpedGradient,
+    //                             this->deformationFieldImage,
+    //                             this->currentMask,
+    //                             this->interpolation,
+    //                             this->warpedPaddingValue,
+    //                             this->measure_dti->GetActiveTimepoints(),
+    //                             this->forwardJacobianMatrix,
+    //                             this->warped);
+
+    //        reg_getImageGradient(this->reference,
+    //                             backwardWarpedGradientImage,
+    //                             backwardDeformationFieldImage,
+    //                             floatingMask,
+    //                             this->interpolation,
+    //                             this->warpedPaddingValue,
+    //                             this->measure_dti->GetActiveTimepoints(),
+    //                             backwardJacobianMatrix,
+    //                             backwardWarped);
+    //   if(this->measure_dti!=nullptr)
+    //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
+    //    }
+    //    else{
+    //    }
+
+
+    for (int t = 0; t < this->reference->nt; ++t) {
+        reg_getImageGradient(this->floating,
+                             this->warpedGradient,
+                             this->deformationFieldImage,
+                             this->currentMask,
+                             this->interpolation,
+                             this->warpedPaddingValue,
+                             t);
+
+        reg_getImageGradient(this->reference,
+                             backwardWarpedGradientImage,
+                             backwardDeformationFieldImage,
+                             floatingMask,
+                             this->interpolation,
+                             this->warpedPaddingValue,
+                             t);
+
+        // The gradient of the various measures of similarity are computed
+        if (this->measure_nmi)
+            this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_ssd)
+            this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_kld)
+            this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_lncc)
+            this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_mind)
+            this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
+
+        if (this->measure_mindssc)
+            this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
+    } // timepoint
+
+    // Exponentiate the gradients if required
+    ExponentiateGradient();
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetVoxelBasedGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetSimilarityMeasureGradient() {
+    reg_f3d<T>::GetSimilarityMeasureGradient();
+
+    // The voxel based sim measure gradient is convolved with a spline kernel
+    // Convolution along the x axis
+    float currentNodeSpacing[3];
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx;
+    bool activeAxis[3] = {1, 0, 0};
+    reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // mask
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // mask
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the z axis if required
+    if (this->voxelBasedMeasureGradient->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
+                                    currentNodeSpacing,
+                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                    nullptr, // mask
+                                    nullptr, // all volumes are active
+                                    activeAxis);
+    }
+    // The backward node based sim measure gradient is extracted
+    mat44 reorientation;
+    if (this->reference->sform_code > 0)
+        reorientation = this->reference->sto_ijk;
+    else reorientation = this->reference->qto_ijk;
+    reg_voxelCentric2NodeCentric(backwardTransformationGradient,
+                                 backwardVoxelBasedMeasureGradientImage,
+                                 this->similarityWeight,
+                                 false, // no update
+                                 &reorientation); // voxel to mm conversion
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetSimilarityMeasureGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetJacobianBasedGradient() {
+    if (this->jacobianLogWeight <= 0) return;
+
+    reg_f3d<T>::GetJacobianBasedGradient();
+
+    reg_spline_getJacobianPenaltyTermGradient(backwardControlPointGrid,
+                                              this->floating,
+                                              backwardTransformationGradient,
+                                              this->jacobianLogWeight,
+                                              this->jacobianLogApproximation);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetJacobianBasedGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetBendingEnergyGradient() {
+    if (this->bendingEnergyWeight <= 0) return;
+
+    reg_f3d<T>::GetBendingEnergyGradient();
+
+    reg_spline_approxBendingEnergyGradient(backwardControlPointGrid,
+                                           backwardTransformationGradient,
+                                           this->bendingEnergyWeight);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetBendingEnergyGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetLinearEnergyGradient() {
+    if (this->linearEnergyWeight <= 0) return;
+
+    reg_f3d<T>::GetLinearEnergyGradient();
+
+    reg_spline_approxLinearEnergyGradient(backwardControlPointGrid,
+                                          backwardTransformationGradient,
+                                          this->linearEnergyWeight);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetLinearEnergyGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetLandmarkDistanceGradient() {
+    if (this->landmarkRegWeight <= 0) return;
+
+    reg_f3d<T>::GetLandmarkDistanceGradient();
+
+    reg_spline_getLandmarkDistanceGradient(backwardControlPointGrid,
+                                           backwardTransformationGradient,
+                                           this->landmarkRegNumber,
+                                           this->landmarkFloating,
+                                           this->landmarkReference,
+                                           this->landmarkRegWeight);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetLandmarkDistanceGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::SetGradientImageToZero() {
+    reg_f3d<T>::SetGradientImageToZero();
+
+    T *nodeGradPtr = static_cast<T*>(backwardTransformationGradient->data);
+    for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i)
+        *nodeGradPtr++ = 0;
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::SetGradientImageToZero");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::SmoothGradient() {
+    if (this->gradientSmoothingSigma != 0) {
+        reg_f3d<T>::SmoothGradient();
+        // The gradient is smoothed using a Gaussian kernel if it is required
+        float kernel = fabs(this->gradientSmoothingSigma);
+        reg_tools_kernelConvolution(backwardTransformationGradient,
+                                    &kernel,
+                                    GAUSSIAN_KERNEL);
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::SmoothGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::GetApproximatedGradient() {
+    reg_f3d<T>::GetApproximatedGradient();
+
+    // Loop over every control points
+    T *gridPtr = static_cast<T*>(backwardControlPointGrid->data);
+    T *gradPtr = static_cast<T*>(backwardTransformationGradient->data);
+    T eps = this->floating->dx / 1000.f;
+    for (size_t i = 0; i < backwardControlPointGrid->nvox; i++) {
+        T currentValue = this->optimiser->GetBestDOF_b()[i];
+        gridPtr[i] = currentValue + eps;
+        double valPlus = GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue - eps;
+        double valMinus = GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue;
+        gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetApproximatedGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+T reg_f3d2<T>::NormaliseGradient() {
+    // The forward gradient max length is computed
+    T forwardMaxValue = reg_f3d<T>::NormaliseGradient();
+
+    // The backward gradient max length is computed
+    T maxGradValue = 0;
+    size_t voxNumber = backwardTransformationGradient->nx * backwardTransformationGradient->ny * backwardTransformationGradient->nz;
+    T *bckPtrX = static_cast<T*>(backwardTransformationGradient->data);
+    T *bckPtrY = &bckPtrX[voxNumber];
+    if (backwardTransformationGradient->nz > 1) {
+        T *bckPtrZ = &bckPtrY[voxNumber];
+        for (size_t i = 0; i < voxNumber; i++) {
+            T valX = 0, valY = 0, valZ = 0;
+            if (this->optimiseX)
+                valX = *bckPtrX++;
+            if (this->optimiseY)
+                valY = *bckPtrY++;
+            if (this->optimiseZ)
+                valZ = *bckPtrZ++;
+            T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ));
+            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
+        }
+    } else {
+        for (size_t i = 0; i < voxNumber; i++) {
+            T valX = 0, valY = 0;
+            if (this->optimiseX)
+                valX = *bckPtrX++;
+            if (this->optimiseY)
+                valY = *bckPtrY++;
+            T length = (T)(sqrt(valX * valX + valY * valY));
+            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
+        }
+    }
+
+    // The largest value between the forward and backward gradient is kept
+    maxGradValue = maxGradValue > forwardMaxValue ? maxGradValue : forwardMaxValue;
+#ifndef NDEBUG
+    char text[255];
+    sprintf(text, "Objective function gradient maximal length: %g", maxGradValue);
+    reg_print_msg_debug(text);
+#endif
+
+    // The forward gradient is normalised
+    T *forPtrX = static_cast<T*>(this->transformationGradient->data);
+    for (size_t i = 0; i < this->transformationGradient->nvox; ++i) {
+        *forPtrX++ /= maxGradValue;
+    }
+    // The backward gradient is normalised
+    bckPtrX = static_cast<T*>(backwardTransformationGradient->data);
+    for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) {
+        *bckPtrX++ /= maxGradValue;
+    }
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::NormaliseGradient");
+#endif
+    // Returns the largest gradient distance
+    return maxGradValue;
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::GetObjectiveFunctionGradient() {
+    if (!this->useApproxGradient) {
+        // Compute the gradient of the similarity measure
+        if (this->similarityWeight > 0) {
+            this->WarpFloatingImage(this->interpolation);
+            GetSimilarityMeasureGradient();
+        } else {
+            SetGradientImageToZero();
+        }
+    } else GetApproximatedGradient();
+    this->optimiser->IncrementCurrentIterationNumber();
+
+    // Smooth the gradient if require
+    SmoothGradient();
+
+    if (!this->useApproxGradient) {
+        // Compute the penalty term gradients if required
+        GetBendingEnergyGradient();
+        GetJacobianBasedGradient();
+        GetLinearEnergyGradient();
+        GetLandmarkDistanceGradient();
+        GetInverseConsistencyGradient();
+    }
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DisplayCurrentLevelParameters() {
+    reg_f3d<T>::DisplayCurrentLevelParameters();
+#ifdef NDEBUG
+    if (this->verbose) {
+#endif
+        char text[255];
+        reg_print_info(this->executableName, "Current backward control point image");
+        sprintf(text, "\t* image dimension: %i x %i x %i",
+                backwardControlPointGrid->nx, backwardControlPointGrid->ny, backwardControlPointGrid->nz);
+        reg_print_info(this->executableName, text);
+        sprintf(text, "\t* image spacing: %g x %g x %g mm",
+                backwardControlPointGrid->dx, backwardControlPointGrid->dy, backwardControlPointGrid->dz);
+        reg_print_info(this->executableName, text);
+#ifdef NDEBUG
+    }
+#endif
+
+#ifndef NDEBUG
+
+    if (backwardControlPointGrid->sform_code > 0)
+        reg_mat44_disp(&(backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform");
+    else reg_mat44_disp(&(backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform");
+#endif
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::DisplayCurrentLevelParameters");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::GetInverseConsistencyErrorField(bool forceAll) {
+    if (inverseConsistencyWeight <= 0) return;
+
+    // Compute both deformation fields
+    if (this->similarityWeight <= 0 || forceAll)
+        GetDeformationField();
+    // Compose the obtained deformation fields by the inverse transformations
+    reg_spline_getDeformationField(backwardControlPointGrid,
+                                   this->deformationFieldImage,
+                                   this->currentMask,
+                                   true, // composition
+                                   true); // use B-Spline
+    reg_spline_getDeformationField(this->controlPointGrid,
+                                   backwardDeformationFieldImage,
+                                   floatingMask,
+                                   true, // composition
+                                   true); // use B-Spline
+    // Convert the deformation fields into displacement
+    reg_getDisplacementFromDeformation(this->deformationFieldImage);
+    reg_getDisplacementFromDeformation(backwardDeformationFieldImage);
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyErrorField");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+double reg_f3d2<T>::GetInverseConsistencyPenaltyTerm() {
+    if (inverseConsistencyWeight <= 0) return 0;
+
+    GetInverseConsistencyErrorField(false);
+
+    double ferror = 0;
+    size_t voxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz;
+    T *dispPtrX = static_cast<T*>(this->deformationFieldImage->data);
+    T *dispPtrY = &dispPtrX[voxelNumber];
+    if (this->deformationFieldImage->nz > 1) {
+        T *dispPtrZ = &dispPtrY[voxelNumber];
+        for (size_t i = 0; i < voxelNumber; ++i) {
+            if (this->currentMask[i] > -1) {
+                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
+                ferror += dist;
+            }
+        }
+    } else {
+        for (size_t i = 0; i < voxelNumber; ++i) {
+            if (this->currentMask[i] > -1) {
+                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
+                ferror += dist;
+            }
+        }
+    }
+
+    double berror = 0;
+    voxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz;
+    dispPtrX = static_cast<T*>(backwardDeformationFieldImage->data);
+    dispPtrY = &dispPtrX[voxelNumber];
+    if (backwardDeformationFieldImage->nz > 1) {
+        T *dispPtrZ = &dispPtrY[voxelNumber];
+        for (size_t i = 0; i < voxelNumber; ++i) {
+            if (floatingMask[i] > -1) {
+                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
+                berror += dist;
+            }
+        }
+    } else {
+        for (size_t i = 0; i < voxelNumber; ++i) {
+            if (floatingMask[i] > -1) {
+                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
+                berror += dist;
+            }
+        }
+    }
+    double error = (ferror / double(this->activeVoxelNumber[this->currentLevel]) +
+                    berror / double(backwardActiveVoxelNumber[this->currentLevel]));
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyPenaltyTerm");
+#endif
+    return double(inverseConsistencyWeight) * error;
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::GetInverseConsistencyGradient() {
+    if (inverseConsistencyWeight <= 0) return;
+
+    // Note: I simplified the gradient computation in order to include
+    // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode)
+    // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode)
+    // cause it would only be an approximation since I don't have the
+    // real inverses
+    GetInverseConsistencyErrorField(true);
+
+    // The forward inverse consistency field is masked
+    size_t forwardVoxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz;
+    T *defPtrX = static_cast<T*>(this->deformationFieldImage->data);
+    T *defPtrY = &defPtrX[forwardVoxelNumber];
+    T *defPtrZ = &defPtrY[forwardVoxelNumber];
+    for (size_t i = 0; i < forwardVoxelNumber; ++i) {
+        if (this->currentMask[i] < 0) {
+            defPtrX[i] = 0;
+            defPtrY[i] = 0;
+            if (this->deformationFieldImage->nz > 1)
+                defPtrZ[i] = 0;
+        }
+    }
+    // The backward inverse consistency field is masked
+    size_t backwardVoxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz;
+    defPtrX = static_cast<T*>(backwardDeformationFieldImage->data);
+    defPtrY = &defPtrX[backwardVoxelNumber];
+    defPtrZ = &defPtrY[backwardVoxelNumber];
+    for (size_t i = 0; i < backwardVoxelNumber; ++i) {
+        if (floatingMask[i] < 0) {
+            defPtrX[i] = 0;
+            defPtrY[i] = 0;
+            if (backwardDeformationFieldImage->nz > 1)
+                defPtrZ[i] = 0;
+        }
+    }
+
+    // We convolve the inverse consistency map with a cubic B-Spline kernel
+    // Convolution along the x axis
+    float currentNodeSpacing[3];
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx;
+    bool activeAxis[3] = {1, 0, 0};
+    reg_tools_kernelConvolution(this->deformationFieldImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    reg_tools_kernelConvolution(this->deformationFieldImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the z axis if required
+    if (this->voxelBasedMeasureGradient->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        reg_tools_kernelConvolution(this->deformationFieldImage,
+                                    currentNodeSpacing,
+                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                    nullptr, // all volumes are active
+                                    activeAxis);
+    }
+    // The forward inverse consistency gradient is extracted at the node position
+    reg_voxelCentric2NodeCentric(this->transformationGradient,
+                                 this->deformationFieldImage,
+                                 2.f * inverseConsistencyWeight,
+                                 true, // update the current value
+                                 nullptr); // no voxel to mm conversion
+
+    // We convolve the inverse consistency map with a cubic B-Spline kernel
+    // Convolution along the x axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx;
+    activeAxis[0] = 1;
+    activeAxis[1] = 0;
+    activeAxis[2] = 0;
+    reg_tools_kernelConvolution(backwardDeformationFieldImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    reg_tools_kernelConvolution(backwardDeformationFieldImage,
+                                currentNodeSpacing,
+                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                nullptr, // all volumes are active
+                                activeAxis);
+    // Convolution along the z axis if required
+    if (this->voxelBasedMeasureGradient->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        reg_tools_kernelConvolution(backwardDeformationFieldImage,
+                                    currentNodeSpacing,
+                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
+                                    nullptr, // all volumes are active
+                                    activeAxis);
+    }
+    // The backward inverse consistency gradient is extracted at the node position
+    reg_voxelCentric2NodeCentric(backwardTransformationGradient,
+                                 backwardDeformationFieldImage,
+                                 2.f * inverseConsistencyWeight,
+                                 true, // update the current value
+                                 nullptr); // no voxel to mm conversion
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyGradient");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::SetOptimiser() {
+    if (this->useConjGradient)
+        this->optimiser = new reg_conjugateGradient<T>();
+    else this->optimiser = new reg_optimiser<T>();
+    this->optimiser->Initialise(this->controlPointGrid->nvox,
+                                this->controlPointGrid->nz > 1 ? 3 : 2,
+                                this->optimiseX,
+                                this->optimiseY,
+                                this->optimiseZ,
+                                this->maxIterationNumber,
+                                0, // currentIterationNumber
+                                this,
+                                static_cast<T*>(this->controlPointGrid->data),
+                                static_cast<T*>(this->transformationGradient->data),
+                                backwardControlPointGrid->nvox,
+                                static_cast<T*>(backwardControlPointGrid->data),
+                                static_cast<T*>(backwardTransformationGradient->data));
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::SetOptimiser");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::PrintCurrentObjFunctionValue(T currentSize) {
+    if (!this->verbose) return;
+
+    char text[255];
+    sprintf(text, "[%i] Current objective function: %g",
+            (int)this->optimiser->GetCurrentIterationNumber(),
+            this->optimiser->GetBestObjFunctionValue());
+    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
+    if (this->bendingEnergyWeight > 0)
+        sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE);
+    if (this->linearEnergyWeight)
+        sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE);
+    if (this->jacobianLogWeight > 0)
+        sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac);
+    if (this->landmarkRegWeight > 0)
+        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
+    if (inverseConsistencyWeight > 0)
+        sprintf(text + strlen(text), " - (wIC)%.2e", bestIC);
+    sprintf(text + strlen(text), " [+ %g mm]", currentSize);
+    reg_print_info(this->executableName, text);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::PrintCurrentObjFunctionValue");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::UpdateBestObjFunctionValue() {
+    reg_f3d<T>::UpdateBestObjFunctionValue();
+    bestIC = currentIC;
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::UpdateBestObjFunctionValue");
+#endif
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::PrintInitialObjFunctionValue() {
+    if (!this->verbose) return;
+    reg_f3d<T>::PrintInitialObjFunctionValue();
+    //   char text[255];
+    //   sprintf(text, "Initial Inverse consistency value: %g", bestIC);
+    //   reg_print_info(this->executableName, text);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::PrintInitialObjFunctionValue");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+double reg_f3d2<T>::GetObjectiveFunctionValue() {
+    this->currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
+
+    this->currentWBE = ComputeBendingEnergyPenaltyTerm();
+
+    this->currentWLE = ComputeLinearEnergyPenaltyTerm();
+
+    this->currentWLand = ComputeLandmarkDistancePenaltyTerm();
+
+    // Compute initial similarity measure
+    this->currentWMeasure = 0;
+    if (this->similarityWeight > 0) {
+        this->WarpFloatingImage(this->interpolation);
+        this->currentWMeasure = this->ComputeSimilarityMeasure();
+    }
+
+    // Compute the Inverse consistency penalty term if required
+    currentIC = GetInverseConsistencyPenaltyTerm();
+
+#ifndef NDEBUG
+    char text[255];
+    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g",
+            this->currentWMeasure, this->currentWBE, this->currentWLE,
+            this->currentWJac, this->currentWLand, currentIC);
+    reg_print_msg_debug(text);
+#endif
+
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionValue");
+#endif
+    // Store the global objective function value
+    return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - currentIC;
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::InitialiseSimilarity() {
+    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
+    if (!this->measure_nmi && !this->measure_ssd && !this->measure_dti && !this->measure_lncc &&
+        !this->measure_kld && !this->measure_mind && !this->measure_mindssc) {
+        this->measure_nmi = new reg_nmi;
+        for (int i = 0; i < this->inputReference->nt; ++i)
+            this->measure_nmi->SetTimepointWeight(i, 1);
+    }
+    if (this->measure_nmi)
+        this->measure_nmi->InitialiseMeasure(this->reference,
+                                             this->floating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warpedGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent,
+                                             floatingMask,
+                                             backwardWarped,
+                                             backwardWarpedGradientImage,
+                                             backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_ssd)
+        this->measure_ssd->InitialiseMeasure(this->reference,
+                                             this->floating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warpedGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent,
+                                             floatingMask,
+                                             backwardWarped,
+                                             backwardWarpedGradientImage,
+                                             backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_kld)
+        this->measure_kld->InitialiseMeasure(this->reference,
+                                             this->floating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warpedGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent,
+                                             floatingMask,
+                                             backwardWarped,
+                                             backwardWarpedGradientImage,
+                                             backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_lncc)
+        this->measure_lncc->InitialiseMeasure(this->reference,
+                                              this->floating,
+                                              this->currentMask,
+                                              this->warped,
+                                              this->warpedGradient,
+                                              this->voxelBasedMeasureGradient,
+                                              this->localWeightSimCurrent,
+                                              floatingMask,
+                                              backwardWarped,
+                                              backwardWarpedGradientImage,
+                                              backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_dti)
+        this->measure_dti->InitialiseMeasure(this->reference,
+                                             this->floating,
+                                             this->currentMask,
+                                             this->warped,
+                                             this->warpedGradient,
+                                             this->voxelBasedMeasureGradient,
+                                             this->localWeightSimCurrent,
+                                             floatingMask,
+                                             backwardWarped,
+                                             backwardWarpedGradientImage,
+                                             backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_mind)
+        this->measure_mind->InitialiseMeasure(this->reference,
+                                              this->floating,
+                                              this->currentMask,
+                                              this->warped,
+                                              this->warpedGradient,
+                                              this->voxelBasedMeasureGradient,
+                                              this->localWeightSimCurrent,
+                                              floatingMask,
+                                              backwardWarped,
+                                              backwardWarpedGradientImage,
+                                              backwardVoxelBasedMeasureGradientImage);
+
+    if (this->measure_mindssc)
+        this->measure_mindssc->InitialiseMeasure(this->reference,
+                                                 this->floating,
+                                                 this->currentMask,
+                                                 this->warped,
+                                                 this->warpedGradient,
+                                                 this->voxelBasedMeasureGradient,
+                                                 this->localWeightSimCurrent,
+                                                 floatingMask,
+                                                 backwardWarped,
+                                                 backwardWarpedGradientImage,
+                                                 backwardVoxelBasedMeasureGradientImage);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::InitialiseSimilarity");
+#endif
+}
 /* *************************************************************** */
 template<class T>
-nifti_image **reg_f3d2<T>::GetWarpedImage()
-{
-   // The initial images are used
-   if(this->inputReference==nullptr ||
-         this->inputFloating==nullptr ||
-         this->controlPointGrid==nullptr ||
-         this->backwardControlPointGrid==nullptr)
-   {
-      reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
-      reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-      reg_exit();
-   }
-
-   // Set the input images
-   reg_f3d2<T>::reference = this->inputReference;
-   reg_f3d2<T>::floating = this->inputFloating;
-   // No mask is used to perform the final resampling
-   reg_f3d2<T>::currentMask = nullptr;
-   reg_f3d2<T>::floatingMask = nullptr;
-
-   // Allocate the forward and backward warped images
-   reg_f3d2<T>::AllocateWarped();
-   // Allocate the forward and backward dense deformation field
-   reg_f3d2<T>::AllocateDeformationField();
-
-   // Warp the floating images into the reference spaces using a cubic spline interpolation
-   reg_f3d2<T>::WarpFloatingImage(3); // cubic spline interpolation
-
-   // Deallocate the deformation field
-   reg_f3d2<T>::DeallocateDeformationField();
-
-   // Allocate and save the forward transformation warped image
-   nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *));
-   warpedImage[0] = nifti_copy_nim_info(this->warped);
-   warpedImage[0]->cal_min=this->inputFloating->cal_min;
-   warpedImage[0]->cal_max=this->inputFloating->cal_max;
-   warpedImage[0]->scl_slope=this->inputFloating->scl_slope;
-   warpedImage[0]->scl_inter=this->inputFloating->scl_inter;
-   warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper);
-   memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper);
-
-   // Allocate and save the backward transformation warped image
-   warpedImage[1] = nifti_copy_nim_info(this->backwardWarped);
-   warpedImage[1]->cal_min=this->inputReference->cal_min;
-   warpedImage[1]->cal_max=this->inputReference->cal_max;
-   warpedImage[1]->scl_slope=this->inputReference->scl_slope;
-   warpedImage[1]->scl_inter=this->inputReference->scl_inter;
-   warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper);
-   memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper);
-
-   // Deallocate the warped images
-   reg_f3d2<T>::DeallocateWarped();
-
-   // Return the two final warped images
-   return warpedImage;
+nifti_image* reg_f3d2<T>::GetBackwardControlPointPositionImage() {
+    // Create a control point grid nifti image
+    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(backwardControlPointGrid);
+    // Allocate the new image data array
+    returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    // Copy the final backward control point grid image
+    memcpy(returnedControlPointGrid->data, backwardControlPointGrid->data,
+           returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    // Return the new control point grid
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetBackwardControlPointPositionImage");
+#endif
+    return returnedControlPointGrid;
 }
 /* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::UseBCHUpdate(int v) {
+    bchUpdate = true;
+    useGradientCumulativeExp = false;
+    bchUpdateValue = v;
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::UseGradientCumulativeExp() {
+    bchUpdate = false;
+    useGradientCumulativeExp = true;
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::DoNotUseGradientCumulativeExp() {
+    useGradientCumulativeExp = false;
+}
+/* *************************************************************** */
+template<class T>
+void reg_f3d2<T>::Initialise() {
+    reg_f3d<T>::Initialise();
+
+    if (!this->inputControlPointGrid) {
+        // Define the spacing for the first level
+        float gridSpacing[3] = {this->spacing[0], this->spacing[1], this->spacing[2]};
+        if (this->spacing[0] < 0)
+            gridSpacing[0] *= -(this->inputReference->dx + this->inputFloating->dx) / 2.f;
+        if (this->spacing[1] < 0)
+            gridSpacing[1] *= -(this->inputReference->dy + this->inputFloating->dy) / 2.f;
+        if (this->spacing[2] < 0)
+            gridSpacing[2] *= -(this->inputReference->dz + this->inputFloating->dz) / 2.f;
+        gridSpacing[0] *= powf(2, this->levelNumber - 1);
+        gridSpacing[1] *= powf(2, this->levelNumber - 1);
+        gridSpacing[2] *= powf(2, this->levelNumber - 1);
+
+        // Create the forward and backward control point grids
+        reg_createSymmetricControlPointGrids<T>(&this->controlPointGrid,
+                                                &backwardControlPointGrid,
+                                                this->referencePyramid[0],
+                                                this->floatingPyramid[0],
+                                                this->affineTransformation,
+                                                gridSpacing);
+    } else {
+        // The control point grid image is initialised with the provided grid
+        this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
+        this->controlPointGrid->data = malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        if (this->inputControlPointGrid->num_ext > 0)
+            nifti_copy_extensions(this->controlPointGrid, this->inputControlPointGrid);
+        memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data,
+               this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        // The final grid spacing is computed
+        this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1);
+        this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1);
+        if (this->controlPointGrid->nz > 1)
+            this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1);
+        // The backward grid is derived from the forward
+        backwardControlPointGrid = nifti_copy_nim_info(this->controlPointGrid);
+        backwardControlPointGrid->data = malloc(backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper);
+        if (this->controlPointGrid->num_ext > 0)
+            nifti_copy_extensions(backwardControlPointGrid, this->controlPointGrid);
+        memcpy(backwardControlPointGrid->data, this->controlPointGrid->data,
+               backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper);
+        reg_getDisplacementFromDeformation(backwardControlPointGrid);
+        reg_tools_multiplyValueToImage(backwardControlPointGrid, backwardControlPointGrid, -1);
+        reg_getDeformationFromDisplacement(backwardControlPointGrid);
+        for (int i = 0; i < backwardControlPointGrid->num_ext; ++i) {
+            mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast<mat44 *>(backwardControlPointGrid->ext_list[i].edata));
+            memcpy(backwardControlPointGrid->ext_list[i].edata, &tempMatrix, sizeof(mat44));
+        }
+    }
+
+    // Set the floating mask image pyramid
+    if (this->usePyramid) {
+        floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
+        backwardActiveVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int));
+    } else {
+        floatingMaskPyramid = (int**)malloc(sizeof(int*));
+        backwardActiveVoxelNumber = (int*)malloc(sizeof(int));
+    }
+
+    if (this->usePyramid) {
+        if (floatingMaskImage)
+            reg_createMaskPyramid<T>(floatingMaskImage,
+                                     floatingMaskPyramid,
+                                     this->levelNumber,
+                                     this->levelToPerform,
+                                     backwardActiveVoxelNumber);
+        else {
+            for (unsigned int l = 0; l < this->levelToPerform; ++l) {
+                backwardActiveVoxelNumber[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+                floatingMaskPyramid[l] = (int*)calloc(backwardActiveVoxelNumber[l], sizeof(int));
+            }
+        }
+    } else  // no pyramid
+    {
+        if (floatingMaskImage)
+            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1, backwardActiveVoxelNumber);
+        else {
+            backwardActiveVoxelNumber[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
+            floatingMaskPyramid[0] = (int*)calloc(backwardActiveVoxelNumber[0], sizeof(int));
+        }
+    }
+
+#ifdef NDEBUG
+    if (this->verbose) {
+#endif
+        if (inverseConsistencyWeight > 0) {
+            char text[255];
+            sprintf(text, "Inverse consistency error penalty term weight: %g",
+                    inverseConsistencyWeight);
+            reg_print_info(this->executableName, text);
+        }
+#ifdef NDEBUG
+    }
+#endif
+
+    // Convert the control point grid into velocity field parametrisation
+    this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID;
+    backwardControlPointGrid->intent_p1 = SPLINE_VEL_GRID;
+    // Set the number of composition to 6 by default
+    this->controlPointGrid->intent_p2 = 6;
+    backwardControlPointGrid->intent_p2 = 6;
+
+#ifndef NDEBUG
+    reg_print_msg_debug("reg_f3d2::Initialise() done");
+#endif
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::ExponentiateGradient() {
+    if (!useGradientCumulativeExp) return;
+
+    /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
+    // Exponentiate the forward gradient using the backward transformation
+#ifndef NDEBUG
+    reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach");
+#endif
+    // Create all deformation field images needed for resampling
+    nifti_image **tempDef = (nifti_image**)malloc(size_t(fabs(backwardControlPointGrid->intent_p2) + 1) * sizeof(nifti_image*));
+    for (int i = 0; i <= (int)fabs(backwardControlPointGrid->intent_p2); ++i) {
+        tempDef[i] = nifti_copy_nim_info(this->deformationFieldImage);
+        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
+    }
+    // Generate all intermediate deformation fields
+    reg_spline_getIntermediateDefFieldFromVelGrid(backwardControlPointGrid, tempDef);
+
+    // Remove the affine component
+    nifti_image *affine_disp = nullptr;
+    if (this->affineTransformation) {
+        affine_disp = nifti_copy_nim_info(this->deformationFieldImage);
+        affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper);
+        mat44 backwardAffineTransformation = nifti_mat44_inverse(*this->affineTransformation);
+        reg_affine_getDeformationField(&backwardAffineTransformation, affine_disp);
+        reg_getDisplacementFromDeformation(affine_disp);
+    }
+
+    /* Allocate a temporary gradient image to store the backward gradient */
+    nifti_image *tempGrad = nifti_copy_nim_info(this->voxelBasedMeasureGradient);
+
+    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
+    for (int i = 0; i < (int)fabsf(backwardControlPointGrid->intent_p2); ++i) {
+        if (affine_disp)
+            reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]);
+        reg_resampleGradient(this->voxelBasedMeasureGradient, // floating
+                             tempGrad, // warped - out
+                             tempDef[i], // deformation field
+                             1, // interpolation type - linear
+                             0); // padding value
+        reg_tools_addImageToImage(tempGrad, // in1
+                                  this->voxelBasedMeasureGradient, // in2
+                                  this->voxelBasedMeasureGradient); // out
+    }
+
+    // Free the temporary deformation fields
+    for (int i = 0; i <= (int)fabsf(backwardControlPointGrid->intent_p2); ++i) {
+        nifti_image_free(tempDef[i]);
+        tempDef[i] = nullptr;
+    }
+    free(tempDef);
+    tempDef = nullptr;
+    // Free the temporary gradient image
+    nifti_image_free(tempGrad);
+    tempGrad = nullptr;
+    // Free the temporary affine displacement field
+    if (affine_disp)
+        nifti_image_free(affine_disp);
+    affine_disp = nullptr;
+    // Normalise the forward gradient
+    reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in
+                                 this->voxelBasedMeasureGradient, // out
+                                 powf(2, fabsf(backwardControlPointGrid->intent_p2))); // value
+
+    /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
+    /* Exponentiate the backward gradient using the forward transformation */
+#ifndef NDEBUG
+    reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach");
+#endif
+    // Allocate a temporary gradient image to store the backward gradient
+    tempGrad = nifti_copy_nim_info(backwardVoxelBasedMeasureGradientImage);
+    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
+    // Create all deformation field images needed for resampling
+    tempDef = (nifti_image**)malloc(size_t(fabs(this->controlPointGrid->intent_p2) + 1) * sizeof(nifti_image*));
+    for (int i = 0; i <= (int)fabs(this->controlPointGrid->intent_p2); ++i) {
+        tempDef[i] = nifti_copy_nim_info(backwardDeformationFieldImage);
+        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
+    }
+    // Generate all intermediate deformation fields
+    reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid, tempDef);
+
+    // Remove the affine component
+    if (this->affineTransformation) {
+        affine_disp = nifti_copy_nim_info(backwardDeformationFieldImage);
+        affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper);
+        reg_affine_getDeformationField(this->affineTransformation, affine_disp);
+        reg_getDisplacementFromDeformation(affine_disp);
+    }
+
+    for (int i = 0; i < (int)fabsf(this->controlPointGrid->intent_p2); ++i) {
+        if (affine_disp)
+            reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]);
+        reg_resampleGradient(backwardVoxelBasedMeasureGradientImage, // floating
+                             tempGrad, // warped - out
+                             tempDef[i], // deformation field
+                             1, // interpolation type - linear
+                             0); // padding value
+        reg_tools_addImageToImage(tempGrad, // in1
+                                  backwardVoxelBasedMeasureGradientImage, // in2
+                                  backwardVoxelBasedMeasureGradientImage); // out
+    }
+
+    // Free the temporary deformation field
+    for (int i = 0; i <= (int)fabsf(this->controlPointGrid->intent_p2); ++i) {
+        nifti_image_free(tempDef[i]);
+        tempDef[i] = nullptr;
+    }
+    free(tempDef);
+    tempDef = nullptr;
+    // Free the temporary gradient image
+    nifti_image_free(tempGrad);
+    tempGrad = nullptr;
+    // Free the temporary affine displacement field
+    if (affine_disp)
+        nifti_image_free(affine_disp);
+    affine_disp = nullptr;
+    // Normalise the backward gradient
+    reg_tools_divideValueToImage(backwardVoxelBasedMeasureGradientImage, // in
+                                 backwardVoxelBasedMeasureGradientImage, // out
+                                 powf(2, fabsf(this->controlPointGrid->intent_p2))); // value
+}
+/* *************************************************************** */
+template <class T>
+void reg_f3d2<T>::UpdateParameters(float scale) {
+    // Restore the last successful control point grids
+    this->optimiser->RestoreBestDOF();
+
+    /************************/
+    /**** Forward update ****/
+    /************************/
+    // Scale the gradient image
+    nifti_image *forwardScaledGradient = nifti_copy_nim_info(this->transformationGradient);
+    forwardScaledGradient->data = malloc(forwardScaledGradient->nvox * forwardScaledGradient->nbyper);
+    reg_tools_multiplyValueToImage(this->transformationGradient,
+                                   forwardScaledGradient,
+                                   scale);
+    // The scaled gradient image is added to the current estimate of the transformation using
+    // a simple addition or by computing the BCH update
+    // Note that the gradient has been integrated over the path of transformation previously
+    if (bchUpdate) {
+        // Compute the BCH update
+        reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY");
+#ifndef NDEBUG
+        reg_print_msg_debug("Update the forward control point grid using BCH approximation");
+#endif
+        compute_BCH_update(this->controlPointGrid,
+                           forwardScaledGradient,
+                           bchUpdateValue);
+    } else {
+        // Reset the gradient along the axes if appropriate
+        reg_setGradientToZero(forwardScaledGradient,
+                              !this->optimiser->GetOptimiseX(),
+                              !this->optimiser->GetOptimiseY(),
+                              !this->optimiser->GetOptimiseZ());
+        // Update the velocity field
+        reg_tools_addImageToImage(this->controlPointGrid, // in1
+                                  forwardScaledGradient, // in2
+                                  this->controlPointGrid); // out
+    }
+    // Clean the temporary nifti_images
+    nifti_image_free(forwardScaledGradient);
+    forwardScaledGradient = nullptr;
+
+    /************************/
+    /**** Backward update ***/
+    /************************/
+    // Scale the gradient image
+    nifti_image *backwardScaledGradient = nifti_copy_nim_info(backwardTransformationGradient);
+    backwardScaledGradient->data = malloc(backwardScaledGradient->nvox * backwardScaledGradient->nbyper);
+    reg_tools_multiplyValueToImage(backwardTransformationGradient,
+                                   backwardScaledGradient,
+                                   scale);
+    // The scaled gradient image is added to the current estimate of the transformation using
+    // a simple addition or by computing the BCH update
+    // Note that the gradient has been integrated over the path of transformation previously
+    if (bchUpdate) {
+        // Compute the BCH update
+        reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY");
+#ifndef NDEBUG
+        reg_print_msg_debug("Update the backward control point grid using BCH approximation");
+#endif
+        compute_BCH_update(backwardControlPointGrid,
+                           backwardScaledGradient,
+                           bchUpdateValue);
+    } else {
+        // Reset the gradient along the axes if appropriate
+        reg_setGradientToZero(backwardScaledGradient,
+                              !this->optimiser->GetOptimiseX(),
+                              !this->optimiser->GetOptimiseY(),
+                              !this->optimiser->GetOptimiseZ());
+        // Update the velocity field
+        reg_tools_addImageToImage(backwardControlPointGrid, // in1
+                                  backwardScaledGradient, // in2
+                                  backwardControlPointGrid); // out
+    }
+    // Clean the temporary nifti_images
+    nifti_image_free(backwardScaledGradient);
+    backwardScaledGradient = nullptr;
+
+    /****************************/
+    /******** Symmetrise ********/
+    /****************************/
+
+    // In order to ensure symmetry the forward and backward velocity fields
+    // are averaged in both image spaces: reference and floating
+    /****************************/
+    nifti_image *warpedForwardTrans = nifti_copy_nim_info(backwardControlPointGrid);
+    warpedForwardTrans->data = malloc(warpedForwardTrans->nvox * warpedForwardTrans->nbyper);
+    nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid);
+    warpedBackwardTrans->data = malloc(warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper);
+
+    // Both parametrisations are converted into displacement
+    reg_getDisplacementFromDeformation(this->controlPointGrid);
+    reg_getDisplacementFromDeformation(backwardControlPointGrid);
+
+    // Both parametrisations are copied over
+    memcpy(warpedBackwardTrans->data, backwardControlPointGrid->data, warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper);
+    memcpy(warpedForwardTrans->data, this->controlPointGrid->data, warpedForwardTrans->nvox * warpedForwardTrans->nbyper);
+
+    // and subtracted (sum and negation)
+    reg_tools_substractImageToImage(backwardControlPointGrid, // displacement
+                                    warpedForwardTrans, // displacement
+                                    backwardControlPointGrid); // displacement output
+    reg_tools_substractImageToImage(this->controlPointGrid, // displacement
+                                    warpedBackwardTrans, // displacement
+                                    this->controlPointGrid); // displacement output
+    // Division by 2
+    reg_tools_multiplyValueToImage(backwardControlPointGrid, // displacement
+                                   backwardControlPointGrid, // displacement
+                                   0.5f);
+    reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement
+                                   this->controlPointGrid, // displacement
+                                   0.5f);
+    // Clean the temporary allocated velocity fields
+    nifti_image_free(warpedForwardTrans);
+    warpedForwardTrans = nullptr;
+    nifti_image_free(warpedBackwardTrans);
+    warpedBackwardTrans = nullptr;
+
+    // Convert the velocity field from displacement to deformation
+    reg_getDeformationFromDisplacement(this->controlPointGrid);
+    reg_getDeformationFromDisplacement(backwardControlPointGrid);
+}
+/* *************************************************************** */
+template<class T>
+nifti_image** reg_f3d2<T>::GetWarpedImage() {
+    // The initial images are used
+    if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !backwardControlPointGrid) {
+        reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
+        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
+        reg_exit();
+    }
+
+    // Set the input images
+    reg_f3d2<T>::reference = this->inputReference;
+    reg_f3d2<T>::floating = this->inputFloating;
+    // No mask is used to perform the final resampling
+    reg_f3d2<T>::currentMask = nullptr;
+    reg_f3d2<T>::floatingMask = nullptr;
+
+    // Allocate the forward and backward warped images
+    AllocateWarped();
+    // Allocate the forward and backward dense deformation field
+    AllocateDeformationField();
+
+    // Warp the floating images into the reference spaces using a cubic spline interpolation
+    reg_f3d2<T>::WarpFloatingImage(3); // cubic spline interpolation
+
+    // Deallocate the deformation field
+    DeallocateDeformationField();
+
+    // Allocate and save the forward transformation warped image
+    nifti_image **warpedImage = (nifti_image**)malloc(2 * sizeof(nifti_image*));
+    warpedImage[0] = nifti_copy_nim_info(this->warped);
+    warpedImage[0]->cal_min = this->inputFloating->cal_min;
+    warpedImage[0]->cal_max = this->inputFloating->cal_max;
+    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
+    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
+    warpedImage[0]->data = malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
+    memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper);
+
+    // Allocate and save the backward transformation warped image
+    warpedImage[1] = nifti_copy_nim_info(backwardWarped);
+    warpedImage[1]->cal_min = this->inputReference->cal_min;
+    warpedImage[1]->cal_max = this->inputReference->cal_max;
+    warpedImage[1]->scl_slope = this->inputReference->scl_slope;
+    warpedImage[1]->scl_inter = this->inputReference->scl_inter;
+    warpedImage[1]->data = malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper);
+    memcpy(warpedImage[1]->data, backwardWarped->data, warpedImage[1]->nvox * warpedImage[1]->nbyper);
+
+    // Deallocate the warped images
+    DeallocateWarped();
+
+    // Return the two final warped images
+    return warpedImage;
+}
 /* *************************************************************** */
 template class reg_f3d2<float>;
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index 8e86bcb1..19d5e4ab 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -12,30 +12,94 @@
 
 #pragma once
 
-#include "_reg_f3d_sym.h"
+#include "_reg_f3d.h"
 
 /// @brief Fast Free Form Diffeomorphic Deformation registration class
 template <class T>
-class reg_f3d2 : public reg_f3d_sym<T>
-{
+class reg_f3d2: public reg_f3d<T> {
 protected:
-   bool BCHUpdate;
-   bool useGradientCumulativeExp;
-   int BCHUpdateValue;
-
-   virtual void GetDeformationField();
-   virtual void GetInverseConsistencyErrorField(bool forceAll);
-   virtual void GetInverseConsistencyGradient();
-   virtual void GetVoxelBasedGradient();
-   virtual void UpdateParameters(float);
-   virtual void ExponentiateGradient();
-   virtual void UseBCHUpdate(int);
-   virtual void UseGradientCumulativeExp();
-   virtual void DoNotUseGradientCumulativeExp();
+    nifti_image *floatingMaskImage;
+    int **floatingMaskPyramid;
+    int *floatingMask;
+    int *backwardActiveVoxelNumber;
+
+    nifti_image *backwardControlPointGrid;
+    nifti_image *backwardDeformationFieldImage;
+    nifti_image *backwardWarped;
+    nifti_image *backwardWarpedGradientImage;
+    nifti_image *backwardVoxelBasedMeasureGradientImage;
+    nifti_image *backwardTransformationGradient;
+
+    mat33 *backwardJacobianMatrix;
+
+    T inverseConsistencyWeight;
+    double currentIC;
+    double bestIC;
+
+    bool bchUpdate;
+    bool useGradientCumulativeExp;
+    int bchUpdateValue;
+
+    // Optimiser-related function
+    virtual void SetOptimiser() override;
+
+    virtual void AllocateWarped();
+    virtual void DeallocateWarped();
+    virtual void AllocateDeformationField();
+    virtual void DeallocateDeformationField();
+    virtual void AllocateWarpedGradient();
+    virtual void DeallocateWarpedGradient();
+    virtual void AllocateVoxelBasedMeasureGradient();
+    virtual void DeallocateVoxelBasedMeasureGradient();
+    virtual void AllocateTransformationGradient();
+    virtual void DeallocateTransformationGradient();
+    virtual void DeallocateCurrentInputImage();
+
+    virtual double ComputeBendingEnergyPenaltyTerm() override;
+    virtual double ComputeLinearEnergyPenaltyTerm() override;
+    virtual double ComputeJacobianBasedPenaltyTerm(int) override;
+    virtual double ComputeLandmarkDistancePenaltyTerm() override;
+    virtual void GetDeformationField() override;
+    virtual void WarpFloatingImage(int) override;
+    virtual void GetVoxelBasedGradient() override;
+    virtual void GetSimilarityMeasureGradient() override;
+    virtual void GetObjectiveFunctionGradient() override;
+    virtual void GetBendingEnergyGradient() override;
+    virtual void GetLinearEnergyGradient() override;
+    virtual void GetJacobianBasedGradient() override;
+    virtual void GetLandmarkDistanceGradient() override;
+    virtual void SetGradientImageToZero() override;
+    virtual T NormaliseGradient() override;
+    virtual void SmoothGradient() override;
+    virtual void GetApproximatedGradient() override;
+    virtual void DisplayCurrentLevelParameters() override;
+    virtual void PrintInitialObjFunctionValue() override;
+    virtual void PrintCurrentObjFunctionValue(T) override;
+    virtual void UpdateBestObjFunctionValue() override;
+    virtual double GetObjectiveFunctionValue() override;
+
+    virtual T InitialiseCurrentLevel() override;
+    virtual void UpdateParameters(float) override;
+    virtual void InitialiseSimilarity() override;
+
+    virtual void GetInverseConsistencyErrorField(bool forceAll);
+    virtual double GetInverseConsistencyPenaltyTerm();
+    virtual void GetInverseConsistencyGradient();
+    virtual void ExponentiateGradient();
 
 public:
-   reg_f3d2(int refTimePoint,int floTimePoint);
-   ~reg_f3d2();
-   virtual void Initialise();
-   virtual nifti_image **GetWarpedImage();
+    reg_f3d2(int refTimePoint, int floTimePoint);
+    virtual ~reg_f3d2();
+
+    virtual void SetFloatingMask(nifti_image*) override;
+    virtual void SetInverseConsistencyWeight(T) override;
+    virtual void CheckParameters() override;
+    virtual void Initialise() override;
+    virtual nifti_image** GetWarpedImage() override;
+    virtual nifti_image* GetBackwardControlPointPositionImage() override;
+    virtual bool GetSymmetricStatus() { return true; }
+
+    virtual void UseBCHUpdate(int) override;
+    virtual void UseGradientCumulativeExp() override;
+    virtual void DoNotUseGradientCumulativeExp() override;
 };
diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp
deleted file mode 100644
index 2fec42ce..00000000
--- a/reg-lib/_reg_f3d_sym.cpp
+++ /dev/null
@@ -1,1824 +0,0 @@
-/*
- *  _reg_f3_symd.cpp
- *
- *
- *  Created by Marc Modat on 10/11/2011.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_f3d_sym.h"
-
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_f3d_sym<T>::reg_f3d_sym(int refTimePoint,int floTimePoint)
-   :reg_f3d<T>::reg_f3d(refTimePoint,floTimePoint)
-{
-   this->executableName=(char *)"NiftyReg F3D SYM";
-
-   this->backwardControlPointGrid=nullptr;
-   this->backwardWarped=nullptr;
-   this->backwardWarpedGradientImage=nullptr;
-   this->backwardDeformationFieldImage=nullptr;
-   this->backwardVoxelBasedMeasureGradientImage=nullptr;
-   this->backwardTransformationGradient=nullptr;
-
-   this->backwardProbaJointHistogram=nullptr;
-   this->backwardLogJointHistogram=nullptr;
-
-   this->floatingMaskImage=nullptr;
-   this->floatingMask=nullptr;
-   this->floatingMaskPyramid=nullptr;
-   this->backwardActiveVoxelNumber=nullptr;
-
-   this->backwardJacobianMatrix=nullptr;
-
-   this->inverseConsistencyWeight=0.1;
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::reg_f3d_sym");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_f3d_sym<T>::~reg_f3d_sym()
-{
-   if(this->backwardControlPointGrid!=nullptr)
-   {
-      nifti_image_free(this->backwardControlPointGrid);
-      this->backwardControlPointGrid=nullptr;
-   }
-
-   if(this->floatingMaskPyramid!=nullptr)
-   {
-      if(this->usePyramid)
-      {
-         for(unsigned int i=0; i<this->levelToPerform; i++)
-         {
-            if(this->floatingMaskPyramid[i]!=nullptr)
-            {
-               free(this->floatingMaskPyramid[i]);
-               this->floatingMaskPyramid[i]=nullptr;
-            }
-         }
-      }
-      else
-      {
-         if(this->floatingMaskPyramid[0]!=nullptr)
-         {
-            free(this->floatingMaskPyramid[0]);
-            this->floatingMaskPyramid[0]=nullptr;
-         }
-      }
-      free(this->floatingMaskPyramid);
-      floatingMaskPyramid=nullptr;
-   }
-
-   if(this->backwardActiveVoxelNumber!=nullptr)
-   {
-      free(this->backwardActiveVoxelNumber);
-      this->backwardActiveVoxelNumber=nullptr;
-   }
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::~reg_f3d_sym");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::SetFloatingMask(nifti_image *m)
-{
-   this->floatingMaskImage = m;
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::~SetFloatingMask");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::SetInverseConsistencyWeight(T w)
-{
-   this->inverseConsistencyWeight = w;
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::SetInverseConsistencyWeight");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_f3d_sym<T>::InitialiseCurrentLevel()
-{
-   // Refine the control point grids if required
-   if(this->gridRefinement)
-   {
-      if(this->currentLevel==0){
-         this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber-1));
-         this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber-1));
-      }
-      else
-      {
-         reg_spline_refineControlPointGrid(this->controlPointGrid);
-         reg_spline_refineControlPointGrid(this->backwardControlPointGrid);
-         this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
-         this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
-      }
-   }
-
-   // Set the mask images
-   if(this->usePyramid)
-   {
-      this->currentMask = this->maskPyramid[this->currentLevel];
-      this->floatingMask = this->floatingMaskPyramid[this->currentLevel];
-   }
-   else
-   {
-      this->currentMask = this->maskPyramid[0];
-      this->floatingMask = this->floatingMaskPyramid[0];
-   }
-
-   // Define the initial step size for the gradient ascent optimisation
-   T maxStepSize = this->reference->dx;
-   maxStepSize = this->reference->dy>maxStepSize?this->reference->dy:maxStepSize;
-   maxStepSize = this->floating->dx>maxStepSize?this->floating->dx:maxStepSize;
-   maxStepSize = this->floating->dy>maxStepSize?this->floating->dy:maxStepSize;
-   if(this->reference->ndim>2)
-   {
-      maxStepSize = (this->reference->dz>maxStepSize)?this->reference->dz:maxStepSize;
-      maxStepSize = (this->floating->dz>maxStepSize)?this->floating->dz:maxStepSize;
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::InitialiseCurrentLevel");
-#endif
-   return maxStepSize;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateCurrentInputImage()
-{
-   reg_f3d<T>::DeallocateCurrentInputImage();
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateCurrentInputImage");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::AllocateWarped()
-{
-   this->DeallocateWarped();
-
-   reg_f3d<T>::AllocateWarped();
-   if(this->floating==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateWarped()");
-      reg_print_msg_error("The floating image is not defined");
-      reg_exit();
-   }
-   this->backwardWarped = nifti_copy_nim_info(this->floating);
-   this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->reference->ndim;
-   this->backwardWarped->dim[4]=this->backwardWarped->nt=this->reference->nt;
-   this->backwardWarped->pixdim[4]=this->backwardWarped->dt=1.0;
-   this->backwardWarped->nvox =
-         (size_t)this->backwardWarped->nx *
-         (size_t)this->backwardWarped->ny *
-         (size_t)this->backwardWarped->nz *
-         (size_t)this->backwardWarped->nt;
-   this->backwardWarped->datatype = this->reference->datatype;
-   this->backwardWarped->nbyper = this->reference->nbyper;
-   this->backwardWarped->data = (void *)calloc(this->backwardWarped->nvox, this->backwardWarped->nbyper);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::AllocateWarped");
-#endif
-   return;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateWarped()
-{
-   reg_f3d<T>::DeallocateWarped();
-   if(this->backwardWarped!=nullptr)
-   {
-      nifti_image_free(this->backwardWarped);
-      this->backwardWarped=nullptr;
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateWarped");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::AllocateDeformationField()
-{
-   this->DeallocateDeformationField();
-
-   reg_f3d<T>::AllocateDeformationField();
-   if(this->floating==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateDeformationField()");
-      reg_print_msg_error("The floating image is not defined");
-      reg_exit();
-   }
-   if(this->backwardControlPointGrid==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateDeformationField()");
-      reg_print_msg_error("The backward control point image is not defined");
-      reg_exit();
-   }
-   this->backwardDeformationFieldImage = nifti_copy_nim_info(this->floating);
-   this->backwardDeformationFieldImage->dim[0]=this->backwardDeformationFieldImage->ndim=5;
-   this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->floating->nx;
-   this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->floating->ny;
-   this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->floating->nz;
-   this->backwardDeformationFieldImage->dim[4]=this->backwardDeformationFieldImage->nt=1;
-   this->backwardDeformationFieldImage->pixdim[4]=this->backwardDeformationFieldImage->dt=1.0;
-   if(this->floating->nz==1)
-      this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=2;
-   else this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=3;
-   this->backwardDeformationFieldImage->pixdim[5]=this->backwardDeformationFieldImage->du=1.0;
-   this->backwardDeformationFieldImage->dim[6]=this->backwardDeformationFieldImage->nv=1;
-   this->backwardDeformationFieldImage->pixdim[6]=this->backwardDeformationFieldImage->dv=1.0;
-   this->backwardDeformationFieldImage->dim[7]=this->backwardDeformationFieldImage->nw=1;
-   this->backwardDeformationFieldImage->pixdim[7]=this->backwardDeformationFieldImage->dw=1.0;
-   this->backwardDeformationFieldImage->nvox =
-         (size_t)this->backwardDeformationFieldImage->nx *
-         (size_t)this->backwardDeformationFieldImage->ny *
-         (size_t)this->backwardDeformationFieldImage->nz *
-         (size_t)this->backwardDeformationFieldImage->nt *
-         (size_t)this->backwardDeformationFieldImage->nu;
-   this->backwardDeformationFieldImage->nbyper = this->backwardControlPointGrid->nbyper;
-   this->backwardDeformationFieldImage->datatype = this->backwardControlPointGrid->datatype;
-   this->backwardDeformationFieldImage->data = (void *)calloc(this->backwardDeformationFieldImage->nvox,
-                                                              this->backwardDeformationFieldImage->nbyper);
-   this->backwardDeformationFieldImage->intent_code=NIFTI_INTENT_VECTOR;
-   memset(this->backwardDeformationFieldImage->intent_name, 0, 16);
-   strcpy(this->backwardDeformationFieldImage->intent_name,"NREG_TRANS");
-   this->backwardDeformationFieldImage->intent_p1=DEF_FIELD;
-   this->backwardDeformationFieldImage->scl_slope=1.f;
-   this->backwardDeformationFieldImage->scl_inter=0.f;
-
-   if(this->measure_dti!=nullptr)
-      this->backwardJacobianMatrix=(mat33 *)malloc(
-            this->backwardDeformationFieldImage->nx *
-            this->backwardDeformationFieldImage->ny *
-            this->backwardDeformationFieldImage->nz *
-            sizeof(mat33));
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::AllocateDeformationField");
-#endif
-   return;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateDeformationField()
-{
-   reg_f3d<T>::DeallocateDeformationField();
-   if(this->backwardDeformationFieldImage!=nullptr)
-   {
-      nifti_image_free(this->backwardDeformationFieldImage);
-      this->backwardDeformationFieldImage=nullptr;
-   }
-   if(this->backwardJacobianMatrix!=nullptr)
-   {
-      free(this->backwardJacobianMatrix);
-      this->backwardJacobianMatrix=nullptr;
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateDeformationField");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::AllocateWarpedGradient()
-{
-   this->DeallocateWarpedGradient();
-
-   reg_f3d<T>::AllocateWarpedGradient();
-   if(this->backwardDeformationFieldImage==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateWarpedGradient()");
-      reg_print_msg_error("The backward control point image is not defined");
-      reg_exit();
-   }
-   this->backwardWarpedGradientImage = nifti_copy_nim_info(this->backwardDeformationFieldImage);
-   this->backwardWarpedGradientImage->data = (void *)calloc(this->backwardWarpedGradientImage->nvox,
-                                                            this->backwardWarpedGradientImage->nbyper);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::AllocateWarpedGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateWarpedGradient()
-{
-   reg_f3d<T>::DeallocateWarpedGradient();
-   if(this->backwardWarpedGradientImage!=nullptr)
-   {
-      nifti_image_free(this->backwardWarpedGradientImage);
-      this->backwardWarpedGradientImage=nullptr;
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateWarpedGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()
-{
-   this->DeallocateVoxelBasedMeasureGradient();
-
-   reg_f3d<T>::AllocateVoxelBasedMeasureGradient();
-   if(this->backwardDeformationFieldImage==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient()");
-      reg_print_msg_error("The backward control point image is not defined");
-      reg_exit();
-   }
-   this->backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(this->backwardDeformationFieldImage);
-   this->backwardVoxelBasedMeasureGradientImage->data =
-         (void *)calloc(this->backwardVoxelBasedMeasureGradientImage->nvox,
-                        this->backwardVoxelBasedMeasureGradientImage->nbyper);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::AllocateVoxelBasedMeasureGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateVoxelBasedMeasureGradient()
-{
-   reg_f3d<T>::DeallocateVoxelBasedMeasureGradient();
-   if(this->backwardVoxelBasedMeasureGradientImage!=nullptr)
-   {
-      nifti_image_free(this->backwardVoxelBasedMeasureGradientImage);
-      this->backwardVoxelBasedMeasureGradientImage=nullptr;
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateVoxelBasedMeasureGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::AllocateTransformationGradient()
-{
-   this->DeallocateTransformationGradient();
-
-   reg_f3d<T>::AllocateTransformationGradient();
-   if(this->backwardControlPointGrid==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::AllocateTransformationGradient()");
-      reg_print_msg_error("The backward control point image is not defined");
-      reg_exit();
-   }
-   this->backwardTransformationGradient = nifti_copy_nim_info(this->backwardControlPointGrid);
-   this->backwardTransformationGradient->data =
-         (void *)calloc(this->backwardTransformationGradient->nvox,
-                        this->backwardTransformationGradient->nbyper);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::AllocateTransformationGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DeallocateTransformationGradient()
-{
-   reg_f3d<T>::DeallocateTransformationGradient();
-   if(this->backwardTransformationGradient!=nullptr)
-      nifti_image_free(this->backwardTransformationGradient);
-   this->backwardTransformationGradient=nullptr;
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DeallocateTransformationGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::CheckParameters()
-{
-
-   reg_f3d<T>::CheckParameters();
-
-   // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED
-   if(this->floatingMaskImage!=nullptr)
-   {
-      if(this->inputFloating->nx != this->floatingMaskImage->nx ||
-            this->inputFloating->ny != this->floatingMaskImage->ny ||
-            this->inputFloating->nz != this->floatingMaskImage->nz)
-      {
-         reg_print_fct_error("reg_f3d_sym<T>::CheckParameters()");
-         reg_print_msg_error("The floating image and its mask have different dimension");
-         reg_exit();
-      }
-   }
-
-   // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
-   T penaltySum=
-         this->bendingEnergyWeight
-         +this->linearEnergyWeight
-         +this->jacobianLogWeight
-         +this->inverseConsistencyWeight
-         +this->landmarkRegWeight;
-   if(penaltySum>=1)
-   {
-      this->similarityWeight=0;
-      this->bendingEnergyWeight /= penaltySum;
-      this->linearEnergyWeight /= penaltySum;
-      this->jacobianLogWeight /= penaltySum;
-      this->inverseConsistencyWeight /= penaltySum;
-      this->landmarkRegWeight /= penaltySum;
-   }
-   else this->similarityWeight = 1.0 - penaltySum;
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::CheckParameters");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::Initialise()
-{
-   reg_f3d<T>::Initialise();
-
-   if(this->inputControlPointGrid==nullptr){
-      // Define the spacing for the first level
-      float gridSpacing[3] = {this->spacing[0],this->spacing[1],this->spacing[2]};
-      if(this->spacing[0]<0)
-         gridSpacing[0] *= -(this->inputReference->dx+this->inputFloating->dx)/2.f;
-      if(this->spacing[1]<0)
-         gridSpacing[1] *= -(this->inputReference->dy+this->inputFloating->dy)/2.f;
-      if(this->spacing[2]<0)
-         gridSpacing[2] *= -(this->inputReference->dz+this->inputFloating->dz)/2.f;
-      gridSpacing[0] *= powf(2.0f, (float)(this->levelNumber-1));
-      gridSpacing[1] *= powf(2.0f, (float)(this->levelNumber-1));
-      gridSpacing[2] *= powf(2.0f, (float)(this->levelNumber-1));
-
-      // Create the forward and backward control point grids
-      reg_createSymmetricControlPointGrids<T>(&this->controlPointGrid,
-                                              &this->backwardControlPointGrid,
-                                              this->referencePyramid[0],
-            this->floatingPyramid[0],
-            this->affineTransformation,
-            gridSpacing);
-   }
-   else{
-      // The control point grid image is initialised with the provided grid
-      this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
-      this->controlPointGrid->data = (void *)malloc( this->controlPointGrid->nvox *
-                                                     this->controlPointGrid->nbyper);
-      if(this->inputControlPointGrid->num_ext>0)
-         nifti_copy_extensions(this->controlPointGrid,this->inputControlPointGrid);
-      memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data,
-              this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-      // The final grid spacing is computed
-      this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber-1));
-      this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber-1));
-      if(this->controlPointGrid->nz>1)
-         this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber-1));
-      // The backward grid is derived from the forward
-      this->backwardControlPointGrid=nifti_copy_nim_info(this->controlPointGrid);
-      this->backwardControlPointGrid->data = (void *)malloc(this->backwardControlPointGrid->nvox *
-                                                            this->backwardControlPointGrid->nbyper);
-      if(this->controlPointGrid->num_ext>0)
-         nifti_copy_extensions(this->backwardControlPointGrid,this->controlPointGrid);
-      memcpy(this->backwardControlPointGrid->data,
-             this->controlPointGrid->data,
-             this->backwardControlPointGrid->nvox*this->backwardControlPointGrid->nbyper);
-      reg_getDisplacementFromDeformation(this->backwardControlPointGrid);
-      reg_tools_multiplyValueToImage(this->backwardControlPointGrid,this->backwardControlPointGrid,-1.f);
-      reg_getDeformationFromDisplacement(this->backwardControlPointGrid);
-      for(int i=0; i<this->backwardControlPointGrid->num_ext; ++i){
-          mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast<mat44 *>(this->backwardControlPointGrid->ext_list[i].edata));
-         memcpy(this->backwardControlPointGrid->ext_list[i].edata,
-                &tempMatrix,
-                sizeof(mat44));
-      }
-   }
-
-   // Set the floating mask image pyramid
-   if(this->usePyramid)
-   {
-      this->floatingMaskPyramid = (int **)malloc(this->levelToPerform*sizeof(int *));
-      this->backwardActiveVoxelNumber= (int *)malloc(this->levelToPerform*sizeof(int));
-   }
-   else
-   {
-      this->floatingMaskPyramid = (int **)malloc(sizeof(int *));
-      this->backwardActiveVoxelNumber= (int *)malloc(sizeof(int));
-   }
-
-   if(this->usePyramid)
-   {
-      if (this->floatingMaskImage!=nullptr)
-         reg_createMaskPyramid<T>(this->floatingMaskImage,
-                                  this->floatingMaskPyramid,
-                                  this->levelNumber,
-                                  this->levelToPerform,
-                                  this->backwardActiveVoxelNumber);
-      else
-      {
-         for(unsigned int l=0; l<this->levelToPerform; ++l)
-         {
-            this->backwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
-            this->floatingMaskPyramid[l]=(int *)calloc(backwardActiveVoxelNumber[l],sizeof(int));
-         }
-      }
-   }
-   else  // no pyramid
-   {
-      if (this->floatingMaskImage!=nullptr)
-         reg_createMaskPyramid<T>(this->floatingMaskImage, this->floatingMaskPyramid, 1, 1, this->backwardActiveVoxelNumber);
-      else
-      {
-         this->backwardActiveVoxelNumber[0]=this->floatingPyramid[0]->nx*this->floatingPyramid[0]->ny*this->floatingPyramid[0]->nz;
-         this->floatingMaskPyramid[0]=(int *)calloc(backwardActiveVoxelNumber[0],sizeof(int));
-      }
-   }
-
-#ifdef NDEBUG
-   if(this->verbose)
-   {
-#endif
-      if(this->inverseConsistencyWeight>0){
-         char text[255];
-         sprintf(text, "Inverse consistency error penalty term weight: %g",
-                 this->inverseConsistencyWeight);
-         reg_print_info(this->executableName, text);
-      }
-#ifdef NDEBUG
-   }
-#endif
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::Initialise");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetDeformationField()
-{
-   reg_spline_getDeformationField(this->controlPointGrid,
-                                  this->deformationFieldImage,
-                                  this->currentMask,
-                                  false, //composition
-                                  true // bspline
-                                  );
-   reg_spline_getDeformationField(this->backwardControlPointGrid,
-                                  this->backwardDeformationFieldImage,
-                                  this->floatingMask,
-                                  false, //composition
-                                  true // bspline
-                                  );
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetDeformationField");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::WarpFloatingImage(int inter)
-{
-   // Compute the deformation fields
-   this->GetDeformationField();
-
-   // Resample the floating image
-   if(this->measure_dti==nullptr)
-   {
-      reg_resampleImage(this->floating,
-                        this->warped,
-                        this->deformationFieldImage,
-                        this->currentMask,
-                        inter,
-                        this->warpedPaddingValue);
-   }
-   else
-   {
-      reg_defField_getJacobianMatrix(this->deformationFieldImage,
-                                     this->forwardJacobianMatrix);
-      /*DTI needs fixing!
-     reg_resampleImage(this->floating,
-                        this->warped,
-                        this->deformationFieldImage,
-                        this->currentMask,
-                        inter,
-                        this->warpedPaddingValue,
-                        this->measure_dti->GetActiveTimepoints(),
-                        this->forwardJacobianMatrix);*/
-   }
-
-   // Resample the reference image
-   if(this->measure_dti==nullptr)
-   {
-      reg_resampleImage(this->reference, // input image
-                        this->backwardWarped, // warped input image
-                        this->backwardDeformationFieldImage, // deformation field
-                        this->floatingMask, // mask
-                        inter, // interpolation type
-                        this->warpedPaddingValue); // padding value
-   }
-   else
-   {
-      reg_defField_getJacobianMatrix(this->backwardDeformationFieldImage,
-                                     this->backwardJacobianMatrix);
-     /* DTI needs fixing
-    reg_resampleImage(this->reference, // input image
-                        this->backwardWarped, // warped input image
-                        this->backwardDeformationFieldImage, // deformation field
-                        this->floatingMask, // mask
-                        inter, // interpolation type
-                        this->warpedPaddingValue, // padding value
-                        this->measure_dti->GetActiveTimepoints(),
-                        this->backwardJacobianMatrix);*/
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::WarpFloatingImage");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_f3d_sym<T>::ComputeJacobianBasedPenaltyTerm(int type)
-{
-   if (this->jacobianLogWeight<=0) return 0.;
-
-   double forwardPenaltyTerm=reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(type);
-
-   double backwardPenaltyTerm=0.;
-
-   if(type==2)
-   {
-      backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid,
-                                                              this->floating,
-                                                              false);
-   }
-   else
-   {
-      backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid,
-                                                              this->floating,
-                                                              this->jacobianLogApproximation);
-   }
-   unsigned int maxit=5;
-   if(type>0) maxit=20;
-   unsigned int it=0;
-   while(backwardPenaltyTerm!=backwardPenaltyTerm && it<maxit)
-   {
-      if(type==2)
-      {
-         backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid,
-                                                         this->floating,
-                                                         false);
-      }
-      else
-      {
-         backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid,
-                                                         this->floating,
-                                                         this->jacobianLogApproximation);
-      }
-#ifndef NDEBUG
-      reg_print_msg_debug("Folding correction - Backward transformation");
-#endif
-      it++;
-   }
-   if(type>0 && it>0)
-   {
-      if(backwardPenaltyTerm!=backwardPenaltyTerm)
-      {
-         this->optimiser->RestoreBestDOF();
-#ifndef NDEBUG
-         reg_print_fct_warn("reg_f3d_sym<T>::ComputeJacobianBasedPenaltyTerm()");
-         reg_print_msg_warn("The backward transformation folding correction scheme failed");
-#endif
-      }
-      else
-      {
-#ifdef NDEBUG
-         if(this->verbose)
-         {
-#endif
-            char text[255];
-            sprintf(text, "Backward transformation folding correction, %i step(s)", it);
-            reg_print_msg_debug(text);
-#ifdef NDEBUG
-         }
-#endif
-      }
-   }
-   backwardPenaltyTerm *= (double)this->jacobianLogWeight;
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ComputeJacobianBasedPenaltyTerm");
-#endif
-   return forwardPenaltyTerm+backwardPenaltyTerm;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_f3d_sym<T>::ComputeBendingEnergyPenaltyTerm()
-{
-   if (this->bendingEnergyWeight<=0) return 0.;
-
-   double forwardPenaltyTerm=reg_f3d<T>::ComputeBendingEnergyPenaltyTerm();
-
-   double value = reg_spline_approxBendingEnergy(this->backwardControlPointGrid);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ComputeBendingEnergyPenaltyTerm");
-#endif
-   return forwardPenaltyTerm + this->bendingEnergyWeight * value;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_f3d_sym<T>::ComputeLinearEnergyPenaltyTerm()
-{
-   if(this->linearEnergyWeight<=0) return 0.;
-
-   double forwardPenaltyTerm=reg_f3d<T>::ComputeLinearEnergyPenaltyTerm();
-
-   double backwardPenaltyTerm = this->linearEnergyWeight*reg_spline_approxLinearEnergy(this->backwardControlPointGrid);
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ComputeLinearEnergyPenaltyTerm");
-#endif
-   return forwardPenaltyTerm+backwardPenaltyTerm;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_f3d_sym<T>::ComputeLandmarkDistancePenaltyTerm()
-{
-   if(this->landmarkRegWeight<=0) return 0.;
-
-   double forwardPenaltyTerm=reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm();
-
-   double backwardPenaltyTerm = this->landmarkRegWeight*reg_spline_getLandmarkDistance(this->backwardControlPointGrid,
-                                                                                       this->landmarkRegNumber,
-                                                                                       this->landmarkFloating,
-                                                                                       this->landmarkReference);
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::ComputeLandmarkDistancePenaltyTerm");
-#endif
-   return forwardPenaltyTerm+backwardPenaltyTerm;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetVoxelBasedGradient()
-{
-   // The voxel based gradient image is initialised with zeros
-   reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
-                                  this->voxelBasedMeasureGradient,
-                                  0.f);
-   reg_tools_multiplyValueToImage(this->backwardVoxelBasedMeasureGradientImage,
-                                  this->backwardVoxelBasedMeasureGradientImage,
-                                  0.f);
-   // The intensity gradient is first computed
-   //    if(this->measure_dti!=nullptr){
-   //        reg_getImageGradient(this->floating,
-   //                             this->warpedGradient,
-   //                             this->deformationFieldImage,
-   //                             this->currentMask,
-   //                             this->interpolation,
-   //                             this->warpedPaddingValue,
-   //                             this->measure_dti->GetActiveTimepoints(),
-   //                             this->forwardJacobianMatrix,
-   //                             this->warped);
-
-   //        reg_getImageGradient(this->reference,
-   //                             this->backwardWarpedGradientImage,
-   //                             this->backwardDeformationFieldImage,
-   //                             this->floatingMask,
-   //                             this->interpolation,
-   //                             this->warpedPaddingValue,
-   //                             this->measure_dti->GetActiveTimepoints(),
-   //                             this->backwardJacobianMatrix,
-   //                             this->backwardWarped);
-   //   if(this->measure_dti!=nullptr)
-   //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
-   //    }
-   //    else{
-   //    }
-
-
-   for(int t=0; t<this->reference->nt; ++t){
-      reg_getImageGradient(this->floating,
-                           this->warpedGradient,
-                           this->deformationFieldImage,
-                           this->currentMask,
-                           this->interpolation,
-                           this->warpedPaddingValue,
-                           t);
-
-      reg_getImageGradient(this->reference,
-                           this->backwardWarpedGradientImage,
-                           this->backwardDeformationFieldImage,
-                           this->floatingMask,
-                           this->interpolation,
-                           this->warpedPaddingValue,
-                           t);
-
-      // The gradient of the various measures of similarity are computed
-      if(this->measure_nmi!=nullptr)
-         this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_ssd!=nullptr)
-         this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_kld!=nullptr)
-         this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_lncc!=nullptr)
-         this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_mind!=nullptr)
-         this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t);
-
-      if(this->measure_mindssc!=nullptr)
-         this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
-   } // timepoint
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetVoxelBasedGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetSimilarityMeasureGradient()
-{
-   reg_f3d<T>::GetSimilarityMeasureGradient();
-
-   // The voxel based sim measure gradient is convolved with a spline kernel
-   // Convolution along the x axis
-   float currentNodeSpacing[3];
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dx;
-   bool activeAxis[3]= {1,0,0};
-   reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // mask
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the y axis
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dy;
-   activeAxis[0]=0;
-   activeAxis[1]=1;
-   reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // mask
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the z axis if required
-   if(this->voxelBasedMeasureGradient->nz>1)
-   {
-      currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dz;
-      activeAxis[1]=0;
-      activeAxis[2]=1;
-      reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage,
-                                  currentNodeSpacing,
-                                  CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  nullptr, // mask
-                                  nullptr, // all volumes are active
-                                  activeAxis
-                                  );
-   }
-   // The backward node based sim measure gradient is extracted
-   mat44 reorientation;
-   if(this->reference->sform_code>0)
-      reorientation = this->reference->sto_ijk;
-   else reorientation = this->reference->qto_ijk;
-   reg_voxelCentric2NodeCentric(this->backwardTransformationGradient,
-                                this->backwardVoxelBasedMeasureGradientImage,
-                                this->similarityWeight,
-                                false, // no update
-                                &reorientation // voxel to mm conversion
-                                );
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetSimilarityMeasureGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetJacobianBasedGradient()
-{
-   if(this->jacobianLogWeight<=0) return;
-
-   reg_f3d<T>::GetJacobianBasedGradient();
-
-   reg_spline_getJacobianPenaltyTermGradient(this->backwardControlPointGrid,
-                                             this->floating,
-                                             this->backwardTransformationGradient,
-                                             this->jacobianLogWeight,
-                                             this->jacobianLogApproximation);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetJacobianBasedGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetBendingEnergyGradient()
-{
-   if(this->bendingEnergyWeight<=0) return;
-
-   reg_f3d<T>::GetBendingEnergyGradient();
-   reg_spline_approxBendingEnergyGradient(this->backwardControlPointGrid,
-                                          this->backwardTransformationGradient,
-                                          this->bendingEnergyWeight);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetBendingEnergyGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetLinearEnergyGradient()
-{
-   if(this->linearEnergyWeight<=0) return;
-
-   reg_f3d<T>::GetLinearEnergyGradient();
-
-   reg_spline_approxLinearEnergyGradient(this->backwardControlPointGrid,
-                                         this->backwardTransformationGradient,
-                                         this->linearEnergyWeight);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetLinearEnergyGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetLandmarkDistanceGradient()
-{
-   if(this->landmarkRegWeight<=0) return;
-
-   reg_f3d<T>::GetLandmarkDistanceGradient();
-
-   reg_spline_getLandmarkDistanceGradient(this->backwardControlPointGrid,
-                                          this->backwardTransformationGradient,
-                                          this->landmarkRegNumber,
-                                          this->landmarkFloating,
-                                          this->landmarkReference,
-                                          this->landmarkRegWeight);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetLandmarkDistanceGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::SetGradientImageToZero()
-{
-   reg_f3d<T>::SetGradientImageToZero();
-
-   T* nodeGradPtr = static_cast<T *>(this->backwardTransformationGradient->data);
-   for(size_t i=0; i<this->backwardTransformationGradient->nvox; ++i)
-      *nodeGradPtr++=0;
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::SetGradientImageToZero");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::SmoothGradient()
-{
-   if(this->gradientSmoothingSigma!=0)
-   {
-      reg_f3d<T>::SmoothGradient();
-      // The gradient is smoothed using a Gaussian kernel if it is required
-      float kernel = fabs(this->gradientSmoothingSigma);
-      reg_tools_kernelConvolution(this->backwardTransformationGradient,
-                                  &kernel,
-                                  GAUSSIAN_KERNEL);
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::SmoothGradient");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::GetApproximatedGradient()
-{
-   reg_f3d<T>::GetApproximatedGradient();
-
-   // Loop over every control points
-   T *gridPtr = static_cast<T *>(this->backwardControlPointGrid->data);
-   T *gradPtr = static_cast<T *>(this->backwardTransformationGradient->data);
-   T eps = this->floating->dx/1000.f;
-   for(size_t i=0; i<this->backwardControlPointGrid->nvox; i++)
-   {
-      T currentValue = this->optimiser->GetBestDOF_b()[i];
-      gridPtr[i] = currentValue+eps;
-      double valPlus = this->GetObjectiveFunctionValue();
-      gridPtr[i] = currentValue-eps;
-      double valMinus = this->GetObjectiveFunctionValue();
-      gridPtr[i] = currentValue;
-      gradPtr[i] = -(T)((valPlus - valMinus ) / (2.0*eps));
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetApproximatedGradient");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_f3d_sym<T>::NormaliseGradient()
-{
-   // The forward gradient max length is computed
-   T forwardMaxValue = reg_f3d<T>::NormaliseGradient();
-
-   // The backward gradient max length is computed
-   T maxGradValue=0;
-   size_t voxNumber = this->backwardTransformationGradient->nx *
-         this->backwardTransformationGradient->ny *
-         this->backwardTransformationGradient->nz;
-   T *bckPtrX = static_cast<T *>(this->backwardTransformationGradient->data);
-   T *bckPtrY = &bckPtrX[voxNumber];
-   if(this->backwardTransformationGradient->nz>1)
-   {
-      T *bckPtrZ = &bckPtrY[voxNumber];
-      for(size_t i=0; i<voxNumber; i++)
-      {
-         T valX=0,valY=0,valZ=0;
-         if(this->optimiseX)
-            valX = *bckPtrX++;
-         if(this->optimiseY)
-            valY = *bckPtrY++;
-         if(this->optimiseZ)
-            valZ = *bckPtrZ++;
-         T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ));
-         maxGradValue = (length>maxGradValue)?length:maxGradValue;
-      }
-   }
-   else
-   {
-      for(size_t i=0; i<voxNumber; i++)
-      {
-         T valX=0,valY=0;
-         if(this->optimiseX)
-            valX = *bckPtrX++;
-         if(this->optimiseY)
-            valY = *bckPtrY++;
-         T length = (T)(sqrt(valX*valX + valY*valY));
-         maxGradValue = (length>maxGradValue)?length:maxGradValue;
-      }
-   }
-
-   // The largest value between the forward and backward gradient is kept
-   maxGradValue = maxGradValue>forwardMaxValue?maxGradValue:forwardMaxValue;
-#ifndef NDEBUG
-   char text[255];
-   sprintf(text, "Objective function gradient maximal length: %g", maxGradValue);
-   reg_print_msg_debug(text);
-#endif
-
-   // The forward gradient is normalised
-   T *forPtrX = static_cast<T *>(this->transformationGradient->data);
-   for(size_t i=0; i<this->transformationGradient->nvox; ++i)
-   {
-      *forPtrX++ /= maxGradValue;
-   }
-   // The backward gradient is normalised
-   bckPtrX = static_cast<T *>(this->backwardTransformationGradient->data);
-   for(size_t i=0; i<this->backwardTransformationGradient->nvox; ++i)
-   {
-      *bckPtrX++ /= maxGradValue;
-   }
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::NormaliseGradient");
-#endif
-   // Returns the largest gradient distance
-   return maxGradValue;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::GetObjectiveFunctionGradient()
-{
-   if(!this->useApproxGradient)
-   {
-      // Compute the gradient of the similarity measure
-      if(this->similarityWeight>0)
-      {
-         this->WarpFloatingImage(this->interpolation);
-         this->GetSimilarityMeasureGradient();
-      }
-      else
-      {
-         this->SetGradientImageToZero();
-      }
-   }
-   else this->GetApproximatedGradient();
-   this->optimiser->IncrementCurrentIterationNumber();
-
-   // Smooth the gradient if require
-   this->SmoothGradient();
-
-   if(!this->useApproxGradient)
-   {
-      // Compute the penalty term gradients if required
-      this->GetBendingEnergyGradient();
-      this->GetJacobianBasedGradient();
-      this->GetLinearEnergyGradient();
-      this->GetLandmarkDistanceGradient();
-      this->GetInverseConsistencyGradient();
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetObjectiveFunctionGradient");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::DisplayCurrentLevelParameters()
-{
-   reg_f3d<T>::DisplayCurrentLevelParameters();
-#ifdef NDEBUG
-   if(this->verbose)
-   {
-#endif
-      char text[255];
-      reg_print_info(this->executableName, "Current backward control point image");
-      sprintf(text, "\t* image dimension: %i x %i x %i",
-             this->backwardControlPointGrid->nx, this->backwardControlPointGrid->ny,
-             this->backwardControlPointGrid->nz);
-      reg_print_info(this->executableName, text);
-      sprintf(text, "\t* image spacing: %g x %g x %g mm",
-             this->backwardControlPointGrid->dx, this->backwardControlPointGrid->dy,
-             this->backwardControlPointGrid->dz);
-      reg_print_info(this->executableName, text);
-#ifdef NDEBUG
-   }
-#endif
-
-#ifndef NDEBUG
-
-   if(this->backwardControlPointGrid->sform_code>0)
-      reg_mat44_disp(&(this->backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform");
-   else reg_mat44_disp(&(this->backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform");
-#endif
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::DisplayCurrentLevelParameters");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::GetInverseConsistencyErrorField(bool forceAll)
-{
-   if (this->inverseConsistencyWeight<=0) return;
-
-   // Compute both deformation fields
-   if(this->similarityWeight<=0 || forceAll)
-   {
-      this->GetDeformationField();
-   }
-   // Compose the obtained deformation fields by the inverse transformations
-   reg_spline_getDeformationField(this->backwardControlPointGrid,
-                                  this->deformationFieldImage,
-                                  this->currentMask,
-                                  true, // composition
-                                  true // use B-Spline
-                                  );
-   reg_spline_getDeformationField(this->controlPointGrid,
-                                  this->backwardDeformationFieldImage,
-                                  this->floatingMask,
-                                  true, // composition
-                                  true // use B-Spline
-                                  );
-   // Convert the deformation fields into displacement
-   reg_getDisplacementFromDeformation(this->deformationFieldImage);
-   reg_getDisplacementFromDeformation(this->backwardDeformationFieldImage);
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetInverseConsistencyErrorField");
-#endif
-}
-/* *************************************************************** */
-template<class T>
-double reg_f3d_sym<T>::GetInverseConsistencyPenaltyTerm()
-{
-   if (this->inverseConsistencyWeight<=0) return 0.;
-
-   this->GetInverseConsistencyErrorField(false);
-
-   double ferror=0.;
-   size_t voxelNumber=this->deformationFieldImage->nx *
-         this->deformationFieldImage->ny *
-         this->deformationFieldImage->nz;
-   T *dispPtrX=static_cast<T *>(this->deformationFieldImage->data);
-   T *dispPtrY=&dispPtrX[voxelNumber];
-   if(this->deformationFieldImage->nz>1)
-   {
-      T *dispPtrZ=&dispPtrY[voxelNumber];
-      for(size_t i=0; i<voxelNumber; ++i)
-      {
-         if(this->currentMask[i]>-1)
-         {
-            double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
-            ferror += dist;
-         }
-      }
-   }
-   else
-   {
-      for(size_t i=0; i<voxelNumber; ++i)
-      {
-         if(this->currentMask[i]>-1)
-         {
-            double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
-            ferror += dist;
-         }
-      }
-   }
-
-   double berror=0.;
-   voxelNumber=this->backwardDeformationFieldImage->nx *
-         this->backwardDeformationFieldImage->ny *
-         this->backwardDeformationFieldImage->nz;
-   dispPtrX=static_cast<T *>(this->backwardDeformationFieldImage->data);
-   dispPtrY=&dispPtrX[voxelNumber];
-   if(this->backwardDeformationFieldImage->nz>1)
-   {
-      T *dispPtrZ=&dispPtrY[voxelNumber];
-      for(size_t i=0; i<voxelNumber; ++i)
-      {
-         if(this->floatingMask[i]>-1)
-         {
-            double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
-            berror += dist;
-         }
-      }
-   }
-   else
-   {
-      for(size_t i=0; i<voxelNumber; ++i)
-      {
-         if(this->floatingMask[i]>-1)
-         {
-            double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
-            berror += dist;
-         }
-      }
-   }
-   double error = ferror/double(this->activeVoxelNumber[this->currentLevel])
-         + berror / (double)(this->backwardActiveVoxelNumber[this->currentLevel]);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetInverseConsistencyPenaltyTerm");
-#endif
-   return double(this->inverseConsistencyWeight) * error;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::GetInverseConsistencyGradient()
-{
-   if(this->inverseConsistencyWeight<=0) return;
-
-   // Note: I simplified the gradient computation in order to include
-   // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode)
-   // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode)
-   // cause it would only be an approximation since I don't have the
-   // real inverses
-   this->GetInverseConsistencyErrorField(true);
-
-   // The forward inverse consistency field is masked
-   size_t forwardVoxelNumber=
-         this->deformationFieldImage->nx *
-         this->deformationFieldImage->ny *
-         this->deformationFieldImage->nz ;
-   T *defPtrX=static_cast<T* >(this->deformationFieldImage->data);
-   T *defPtrY=&defPtrX[forwardVoxelNumber];
-   T *defPtrZ=&defPtrY[forwardVoxelNumber];
-   for(size_t i=0; i<forwardVoxelNumber; ++i)
-   {
-      if(this->currentMask[i]<0)
-      {
-         defPtrX[i]=0;
-         defPtrY[i]=0;
-         if(this->deformationFieldImage->nz>1)
-            defPtrZ[i]=0;
-      }
-   }
-   // The backward inverse consistency field is masked
-   size_t backwardVoxelNumber =
-         this->backwardDeformationFieldImage->nx *
-         this->backwardDeformationFieldImage->ny *
-         this->backwardDeformationFieldImage->nz ;
-   defPtrX=static_cast<T* >(this->backwardDeformationFieldImage->data);
-   defPtrY=&defPtrX[backwardVoxelNumber];
-   defPtrZ=&defPtrY[backwardVoxelNumber];
-   for(size_t i=0; i<backwardVoxelNumber; ++i)
-   {
-      if(this->floatingMask[i]<0)
-      {
-         defPtrX[i]=0;
-         defPtrY[i]=0;
-         if(this->backwardDeformationFieldImage->nz>1)
-            defPtrZ[i]=0;
-      }
-   }
-
-   // We convolve the inverse consistency map with a cubic B-Spline kernel
-   // Convolution along the x axis
-   float currentNodeSpacing[3];
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dx;
-   bool activeAxis[3]= {1,0,0};
-   reg_tools_kernelConvolution(this->deformationFieldImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the y axis
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dy;
-   activeAxis[0]=0;
-   activeAxis[1]=1;
-   reg_tools_kernelConvolution(this->deformationFieldImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the z axis if required
-   if(this->voxelBasedMeasureGradient->nz>1)
-   {
-      currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dz;
-      activeAxis[1]=0;
-      activeAxis[2]=1;
-      reg_tools_kernelConvolution(this->deformationFieldImage,
-                                  currentNodeSpacing,
-                                  CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  nullptr, // all volumes are active
-                                  activeAxis
-                                  );
-   }
-   // The forward inverse consistency gradient is extracted at the node position
-   reg_voxelCentric2NodeCentric(this->transformationGradient,
-                                this->deformationFieldImage,
-                                2.f * this->inverseConsistencyWeight,
-                                true, // update the current value
-                                nullptr // no voxel to mm conversion
-                                );
-
-   // We convolve the inverse consistency map with a cubic B-Spline kernel
-   // Convolution along the x axis
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dx;
-   activeAxis[0]=1;
-   activeAxis[1]=0;
-   activeAxis[2]=0;
-   reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the y axis
-   currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dy;
-   activeAxis[0]=0;
-   activeAxis[1]=1;
-   reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
-                               currentNodeSpacing,
-                               CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                               nullptr, // all volumes are active
-                               activeAxis
-                               );
-   // Convolution along the z axis if required
-   if(this->voxelBasedMeasureGradient->nz>1)
-   {
-      currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dz;
-      activeAxis[1]=0;
-      activeAxis[2]=1;
-      reg_tools_kernelConvolution(this->backwardDeformationFieldImage,
-                                  currentNodeSpacing,
-                                  CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                  nullptr, // all volumes are active
-                                  activeAxis
-                                  );
-   }
-   // The backward inverse consistency gradient is extracted at the node position
-   reg_voxelCentric2NodeCentric(this->backwardTransformationGradient,
-                                this->backwardDeformationFieldImage,
-                                2.f * this->inverseConsistencyWeight,
-                                true, // update the current value
-                                nullptr // no voxel to mm conversion
-                                );
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetInverseConsistencyGradient");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::UpdateParameters(float scale)
-{
-   // Update first the forward transformation
-   reg_f3d<T>::UpdateParameters(scale);
-
-   // Create some pointers to the relevant arrays
-   T *currentDOF_b=this->optimiser->GetCurrentDOF_b();
-   T *bestDOF_b=this->optimiser->GetBestDOF_b();
-   T *gradient_b=this->optimiser->GetGradient_b();
-
-   // Update the control point position
-   if(this->optimiser->GetOptimiseX() &&
-         this->optimiser->GetOptimiseY() &&
-         this->optimiser->GetOptimiseZ())
-   {
-      // Update the values for all axis displacement
-      for(size_t i=0; i<this->optimiser->GetDOFNumber_b(); ++i)
-      {
-         currentDOF_b[i] = bestDOF_b[i] + scale * gradient_b[i];
-      }
-   }
-   else
-   {
-      size_t voxNumber_b = this->optimiser->GetVoxNumber_b();
-      // Update the values for the x-axis displacement
-      if(this->optimiser->GetOptimiseX())
-      {
-         for(size_t i=0; i<voxNumber_b; ++i)
-         {
-            currentDOF_b[i] =bestDOF_b[i] + scale * gradient_b[i];
-         }
-      }
-      // Update the values for the y-axis displacement
-      if(this->optimiser->GetOptimiseY())
-      {
-         T *currentDOFY_b=&currentDOF_b[voxNumber_b];
-         T *bestDOFY_b=&bestDOF_b[voxNumber_b];
-         T *gradientY_b=&gradient_b[voxNumber_b];
-         for(size_t i=0; i<voxNumber_b; ++i)
-         {
-            currentDOFY_b[i] = bestDOFY_b[i] + scale * gradientY_b[i];
-         }
-      }
-      // Update the values for the z-axis displacement
-      if(this->optimiser->GetOptimiseZ() && this->optimiser->GetNDim()>2)
-      {
-         T *currentDOFZ_b=&currentDOF_b[2*voxNumber_b];
-         T *bestDOFZ_b=&bestDOF_b[2*voxNumber_b];
-         T *gradientZ_b=&gradient_b[2*voxNumber_b];
-         for(size_t i=0; i<voxNumber_b; ++i)
-         {
-            currentDOFZ_b[i] = bestDOFZ_b[i] + scale * gradientZ_b[i];
-         }
-      }
-   }
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::UpdateParameters");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_f3d_sym<T>::SetOptimiser()
-{
-   if(this->useConjGradient)
-      this->optimiser=new reg_conjugateGradient<T>();
-   else this->optimiser=new reg_optimiser<T>();
-   this->optimiser->Initialise(this->controlPointGrid->nvox,
-                               this->controlPointGrid->nz>1?3:2,
-                               this->optimiseX,
-                               this->optimiseY,
-                               this->optimiseZ,
-                               this->maxIterationNumber,
-                               0, // currentIterationNumber
-                               this,
-                               static_cast<T *>(this->controlPointGrid->data),
-                               static_cast<T *>(this->transformationGradient->data),
-                               this->backwardControlPointGrid->nvox,
-                               static_cast<T *>(this->backwardControlPointGrid->data),
-                               static_cast<T *>(this->backwardTransformationGradient->data));
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::SetOptimiser");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::PrintCurrentObjFunctionValue(T currentSize)
-{
-   if(!this->verbose) return;
-
-   char text[255];
-   sprintf(text, "[%i] Current objective function: %g",
-          (int)this->optimiser->GetCurrentIterationNumber(),
-          this->optimiser->GetBestObjFunctionValue());
-   sprintf(text+strlen(text), " = (wSIM)%g", this->bestWMeasure);
-   if(this->bendingEnergyWeight>0)
-      sprintf(text+strlen(text), " - (wBE)%.2e", this->bestWBE);
-   if(this->linearEnergyWeight)
-      sprintf(text+strlen(text), " - (wLE)%.2e", this->bestWLE);
-   if(this->jacobianLogWeight>0)
-      sprintf(text+strlen(text), " - (wJAC)%.2e", this->bestWJac);
-   if(this->landmarkRegWeight>0)
-      sprintf(text+strlen(text), " - (wLAN)%.2e", this->bestWLand);
-   if(this->inverseConsistencyWeight>0)
-      sprintf(text+strlen(text), " - (wIC)%.2e", this->bestIC);
-   sprintf(text+strlen(text), " [+ %g mm]", currentSize);
-   reg_print_info(this->executableName, text);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::PrintCurrentObjFunctionValue");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::UpdateBestObjFunctionValue()
-{
-   reg_f3d<T>::UpdateBestObjFunctionValue();
-   this->bestIC=this->currentIC;
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::UpdateBestObjFunctionValue");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::PrintInitialObjFunctionValue()
-{
-   if(!this->verbose) return;
-   reg_f3d<T>::PrintInitialObjFunctionValue();
-//   char text[255];
-//   sprintf(text, "Initial Inverse consistency value: %g", this->bestIC);
-//   reg_print_info(this->executableName, text);
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::PrintInitialObjFunctionValue");
-#endif
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_f3d_sym<T>::GetObjectiveFunctionValue()
-{
-   this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
-
-   this->currentWBE = this->ComputeBendingEnergyPenaltyTerm();
-
-   this->currentWLE = this->ComputeLinearEnergyPenaltyTerm();
-
-   this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm();
-
-   // Compute initial similarity measure
-   this->currentWMeasure = 0;
-   if(this->similarityWeight>0)
-   {
-      this->WarpFloatingImage(this->interpolation);
-      this->currentWMeasure = this->ComputeSimilarityMeasure();
-   }
-
-   // Compute the Inverse consistency penalty term if required
-   this->currentIC = this->GetInverseConsistencyPenaltyTerm();
-
-#ifndef NDEBUG
-   char text[255];
-   sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g",
-           this->currentWMeasure, this->currentWBE,
-           this->currentWLE,
-           this->currentWJac,
-           this->currentWLand,
-           this->currentIC);
-   reg_print_msg_debug(text);
-#endif
-
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetObjectiveFunctionValue");
-#endif
-   // Store the global objective function value
-   return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentIC;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_f3d_sym<T>::InitialiseSimilarity()
-{
-   // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-   if(this->measure_nmi==nullptr &&
-         this->measure_ssd==nullptr &&
-         this->measure_dti==nullptr &&
-         this->measure_lncc==nullptr &&
-         this->measure_kld==nullptr &&
-         this->measure_mind==nullptr &&
-         this->measure_mindssc==nullptr)
-   {
-      this->measure_nmi=new reg_nmi;
-      for(int i=0; i<this->inputReference->nt; ++i)
-         this->measure_nmi->SetTimepointWeight(i,1.0);
-   }
-   if(this->measure_nmi!=nullptr)
-      this->measure_nmi->InitialiseMeasure(this->reference,
-                                           this->floating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warpedGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent,
-                                           this->floatingMask,
-                                           this->backwardWarped,
-                                           this->backwardWarpedGradientImage,
-                                           this->backwardVoxelBasedMeasureGradientImage
-                                           );
-
-   if(this->measure_ssd!=nullptr)
-      this->measure_ssd->InitialiseMeasure(this->reference,
-                                           this->floating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warpedGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent,
-                                           this->floatingMask,
-                                           this->backwardWarped,
-                                           this->backwardWarpedGradientImage,
-                                           this->backwardVoxelBasedMeasureGradientImage
-                                           );
-
-   if(this->measure_kld!=nullptr)
-      this->measure_kld->InitialiseMeasure(this->reference,
-                                           this->floating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warpedGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent,
-                                           this->floatingMask,
-                                           this->backwardWarped,
-                                           this->backwardWarpedGradientImage,
-                                           this->backwardVoxelBasedMeasureGradientImage
-                                           );
-
-   if(this->measure_lncc!=nullptr)
-      this->measure_lncc->InitialiseMeasure(this->reference,
-                                            this->floating,
-                                            this->currentMask,
-                                            this->warped,
-                                            this->warpedGradient,
-                                            this->voxelBasedMeasureGradient,
-                                            this->localWeightSimCurrent,
-                                            this->floatingMask,
-                                            this->backwardWarped,
-                                            this->backwardWarpedGradientImage,
-                                            this->backwardVoxelBasedMeasureGradientImage
-                                            );
-
-   if(this->measure_dti!=nullptr)
-      this->measure_dti->InitialiseMeasure(this->reference,
-                                           this->floating,
-                                           this->currentMask,
-                                           this->warped,
-                                           this->warpedGradient,
-                                           this->voxelBasedMeasureGradient,
-                                           this->localWeightSimCurrent,
-                                           this->floatingMask,
-                                           this->backwardWarped,
-                                           this->backwardWarpedGradientImage,
-                                           this->backwardVoxelBasedMeasureGradientImage
-                                           );
-
-   if(this->measure_mind!=nullptr)
-      this->measure_mind->InitialiseMeasure(this->reference,
-                                            this->floating,
-                                            this->currentMask,
-                                            this->warped,
-                                            this->warpedGradient,
-                                            this->voxelBasedMeasureGradient,
-                                            this->localWeightSimCurrent,
-                                            this->floatingMask,
-                                            this->backwardWarped,
-                                            this->backwardWarpedGradientImage,
-                                            this->backwardVoxelBasedMeasureGradientImage
-                                            );
-
-   if(this->measure_mindssc!=nullptr)
-      this->measure_mindssc->InitialiseMeasure(this->reference,
-                                               this->floating,
-                                               this->currentMask,
-                                               this->warped,
-                                               this->warpedGradient,
-                                               this->voxelBasedMeasureGradient,
-                                               this->localWeightSimCurrent,
-                                               this->floatingMask,
-                                               this->backwardWarped,
-                                               this->backwardWarpedGradientImage,
-                                               this->backwardVoxelBasedMeasureGradientImage
-                                               );
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::InitialiseSimilarity");
-#endif
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-nifti_image **reg_f3d_sym<T>::GetWarpedImage()
-{
-   // The initial images are used
-   if(this->inputReference==nullptr ||
-         this->inputFloating==nullptr ||
-         this->controlPointGrid==nullptr ||
-         this->backwardControlPointGrid==nullptr)
-   {
-      reg_print_fct_error("reg_f3d_sym<T>::GetWarpedImage()");
-      reg_print_msg_error("The reference, floating and both control point grid images have to be defined");
-      reg_exit();
-   }
-
-   reg_f3d_sym<T>::reference = this->inputReference;
-   reg_f3d_sym<T>::floating = this->inputFloating;
-   reg_f3d_sym<T>::currentMask = nullptr;
-   reg_f3d_sym<T>::floatingMask = nullptr;
-
-   reg_f3d_sym<T>::AllocateWarped();
-   reg_f3d_sym<T>::AllocateDeformationField();
-
-   reg_f3d_sym<T>::WarpFloatingImage(3); // cubic spline interpolation
-
-   reg_f3d_sym<T>::DeallocateDeformationField();
-
-   nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *));
-   warpedImage[0] = nifti_copy_nim_info(this->warped);
-   warpedImage[0]->cal_min=this->inputFloating->cal_min;
-   warpedImage[0]->cal_max=this->inputFloating->cal_max;
-   warpedImage[0]->scl_slope=this->inputFloating->scl_slope;
-   warpedImage[0]->scl_inter=this->inputFloating->scl_inter;
-   warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper);
-   memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper);
-
-   warpedImage[1] = nifti_copy_nim_info(this->backwardWarped);
-   warpedImage[1]->cal_min=this->inputReference->cal_min;
-   warpedImage[1]->cal_max=this->inputReference->cal_max;
-   warpedImage[1]->scl_slope=this->inputReference->scl_slope;
-   warpedImage[1]->scl_inter=this->inputReference->scl_inter;
-   warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper);
-   memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper);
-
-   reg_f3d_sym<T>::DeallocateWarped();
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetWarpedImage");
-#endif
-   return warpedImage;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-nifti_image * reg_f3d_sym<T>::GetBackwardControlPointPositionImage()
-{
-   // Create a control point grid nifti image
-   nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->backwardControlPointGrid);
-   // Allocate the new image data array
-   returnedControlPointGrid->data=(void *)malloc(returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper);
-   // Copy the final backward control point grid image
-   memcpy(returnedControlPointGrid->data, this->backwardControlPointGrid->data,
-          returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper);
-   // Return the new control point grid
-#ifndef NDEBUG
-   reg_print_fct_debug("reg_f3d_sym<T>::GetBackwardControlPointPositionImage");
-#endif
-   return returnedControlPointGrid;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template class reg_f3d_sym<float>;
diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h
deleted file mode 100644
index 6e09a0c6..00000000
--- a/reg-lib/_reg_f3d_sym.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * @file _reg_f3d_sym.h
- * @author Marc Modat
- * @date 10/11/2011
- *
- *  Copyright (c) 2011-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_f3d.h"
-
-/// @brief Symmetric Fast Free Form Deformation registration class
-template <class T>
-class reg_f3d_sym : public reg_f3d<T>
-{
-protected:
-   // Optimiser related function
-   virtual void SetOptimiser();
-
-   nifti_image *floatingMaskImage;
-   int **floatingMaskPyramid;
-   int *floatingMask;
-   int *backwardActiveVoxelNumber;
-
-   nifti_image *backwardControlPointGrid;
-   nifti_image *backwardDeformationFieldImage;
-   nifti_image *backwardWarped;
-   nifti_image *backwardWarpedGradientImage;
-   nifti_image *backwardVoxelBasedMeasureGradientImage;
-   nifti_image *backwardTransformationGradient;
-
-   double *backwardProbaJointHistogram;
-   double *backwardLogJointHistogram;
-   double backwardEntropies[4];
-
-   mat33 *backwardJacobianMatrix;
-
-   T inverseConsistencyWeight;
-   double currentIC;
-   double bestIC;
-
-   virtual void AllocateWarped();
-   virtual void DeallocateWarped();
-   virtual void AllocateDeformationField();
-   virtual void DeallocateDeformationField();
-   virtual void AllocateWarpedGradient();
-   virtual void DeallocateWarpedGradient();
-   virtual void AllocateVoxelBasedMeasureGradient();
-   virtual void DeallocateVoxelBasedMeasureGradient();
-   virtual void AllocateTransformationGradient();
-   virtual void DeallocateTransformationGradient();
-   virtual T InitialiseCurrentLevel();
-   virtual void DeallocateCurrentInputImage();
-
-   virtual double ComputeBendingEnergyPenaltyTerm();
-   virtual double ComputeLinearEnergyPenaltyTerm();
-   virtual double ComputeJacobianBasedPenaltyTerm(int);
-   virtual double ComputeLandmarkDistancePenaltyTerm();
-   virtual void GetDeformationField();
-   virtual void WarpFloatingImage(int);
-   virtual void GetVoxelBasedGradient();
-   virtual void GetSimilarityMeasureGradient();
-   virtual void GetObjectiveFunctionGradient();
-   virtual void GetBendingEnergyGradient();
-   virtual void GetLinearEnergyGradient();
-   virtual void GetJacobianBasedGradient();
-   virtual void GetLandmarkDistanceGradient();
-   virtual void SetGradientImageToZero();
-   virtual T NormaliseGradient();
-   virtual void SmoothGradient();
-   virtual void GetApproximatedGradient();
-   virtual void DisplayCurrentLevelParameters();
-   virtual void PrintInitialObjFunctionValue();
-   virtual void PrintCurrentObjFunctionValue(T);
-   virtual void UpdateBestObjFunctionValue();
-   virtual double GetObjectiveFunctionValue();
-
-   virtual void GetInverseConsistencyErrorField(bool forceAll);
-   virtual double GetInverseConsistencyPenaltyTerm();
-   virtual void GetInverseConsistencyGradient();
-
-   virtual void UpdateParameters(float);
-   virtual void InitialiseSimilarity();
-
-public:
-   virtual void SetFloatingMask(nifti_image *);
-   virtual void SetInverseConsistencyWeight(T);
-
-   reg_f3d_sym(int refTimePoint,int floTimePoint);
-   ~reg_f3d_sym();
-   void CheckParameters();
-   void Initialise();
-   nifti_image *GetBackwardControlPointPositionImage();
-   nifti_image **GetWarpedImage();
-   bool GetSymmetricStatus()
-   {
-      return true;
-   }
-};

From 1de2b6dcf10c0fb8a694aa5cc0ddf108ded55115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 18 Jan 2023 15:03:05 +0000
Subject: [PATCH 037/314] Refactorisations

---
 niftyreg_build_version.txt          |   2 +-
 reg-lib/Compute.h                   |   1 -
 reg-lib/Content.cpp                 |   4 +-
 reg-lib/Content.h                   |  12 +--
 reg-lib/F3dContent.cpp              |   8 +-
 reg-lib/F3dContent.h                |   2 +-
 reg-lib/_reg_base.cpp               |   9 +-
 reg-lib/_reg_base.h                 | 160 ++++++++++++----------------
 reg-lib/_reg_f3d.cpp                |  39 ++++---
 reg-lib/_reg_f3d.h                  |  42 +++-----
 reg-lib/cpu/_reg_maths.h            |   2 +-
 reg-lib/cpu/_reg_mind.cpp           |  30 +++---
 reg-lib/cpu/_reg_tools.h            |  81 ++++++--------
 reg-lib/cuda/CudaAladinContent.cpp  |   2 +-
 reg-lib/cuda/CudaContent.cpp        |   2 +-
 reg-lib/cuda/_reg_blocksize_gpu.h   |  47 --------
 reg-lib/cuda/_reg_common_cuda.h     |   3 +-
 reg-test/reg_test_interpolation.cpp |   4 +-
 18 files changed, 169 insertions(+), 281 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fa8f08cb..1b9cba4a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-150
+151
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index caedc34b..c4fc6b42 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -6,7 +6,6 @@ class Compute {
 public:
     Compute() = delete;
     Compute(Content& conIn): con(conIn) {}
-    virtual ~Compute() {}
 
     virtual void ResampleImage(int inter, float paddingValue);
     virtual double GetJacobianPenaltyTerm(bool approx);
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 5a72dccc..fb80d50c 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -35,7 +35,7 @@ void Content::AllocateWarped() {
     warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
-    warped->data = (void*)calloc(warped->nvox, warped->nbyper);
+    warped->data = calloc(warped->nvox, warped->nbyper);
 }
 /* *************************************************************** */
 void Content::DeallocateWarped() {
@@ -79,7 +79,7 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->intent_p1 = DEF_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
-    deformationField->data = (void*)calloc(deformationField->nvox, deformationField->nbyper);
+    deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
 }
 /* *************************************************************** */
 void Content::DeallocateDeformationField() {
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index c27c147c..adf2b36b 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -45,12 +45,12 @@ class Content {
     }
 
 protected:
-    nifti_image *reference;
-    nifti_image *floating;
-    nifti_image *deformationField;
-    int *referenceMask;
-    mat44 *transformationMatrix;
-    nifti_image *warped;
+    nifti_image *reference = nullptr;
+    nifti_image *floating = nullptr;
+    nifti_image *deformationField = nullptr;
+    int *referenceMask = nullptr;
+    mat44 *transformationMatrix = nullptr;
+    nifti_image *warped = nullptr;
 
 private:
     void AllocateWarped();
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 4e650c04..27a767da 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -38,7 +38,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
     localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
     localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz *
                                   localWeightSim->nt * localWeightSim->nu);
-    localWeightSim->data = (void*)malloc(localWeightSim->nvox * localWeightSim->nbyper);
+    localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
     F3dContent::ZeroVoxelBasedMeasureGradient();
     reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
     reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
@@ -53,7 +53,7 @@ void F3dContent::DeallocateLocalWeightSim() {
 /* *************************************************************** */
 void F3dContent::AllocateWarpedGradient() {
     warpedGradient = nifti_copy_nim_info(deformationField);
-    warpedGradient->data = (void*)calloc(warpedGradient->nvox, warpedGradient->nbyper);
+    warpedGradient->data = calloc(warpedGradient->nvox, warpedGradient->nbyper);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateWarpedGradient() {
@@ -65,7 +65,7 @@ void F3dContent::DeallocateWarpedGradient() {
 /* *************************************************************** */
 void F3dContent::AllocateTransformationGradient() {
     transformationGradient = nifti_copy_nim_info(controlPointGrid);
-    transformationGradient->data = (void*)calloc(transformationGradient->nvox, transformationGradient->nbyper);
+    transformationGradient->data = calloc(transformationGradient->nvox, transformationGradient->nbyper);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateTransformationGradient() {
@@ -77,7 +77,7 @@ void F3dContent::DeallocateTransformationGradient() {
 /* *************************************************************** */
 void F3dContent::AllocateVoxelBasedMeasureGradient() {
     voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField);
-    voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper);
+    voxelBasedMeasureGradient->data = calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateVoxelBasedMeasureGradient() {
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
index 5c6b65d9..46d232a6 100644
--- a/reg-lib/F3dContent.h
+++ b/reg-lib/F3dContent.h
@@ -32,7 +32,7 @@ class F3dContent: public virtual Content {
     virtual void ZeroVoxelBasedMeasureGradient();
 
 protected:
-    nifti_image *controlPointGrid;
+    nifti_image *controlPointGrid = nullptr;
     nifti_image *localWeightSim = nullptr;
     nifti_image *transformationGradient = nullptr;
     nifti_image *voxelBasedMeasureGradient = nullptr;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 0c3f5235..20d83c46 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -12,7 +12,6 @@
 
 #include "_reg_base.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
@@ -69,7 +68,6 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     gradientSmoothingSigma = 0;
     verbose = true;
     usePyramid = true;
-    forwardJacobianMatrix = nullptr;
 
     initialised = false;
     referencePyramid = nullptr;
@@ -189,7 +187,6 @@ reg_base<T>::~reg_base() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceImage(nifti_image *r) {
     inputReference = r;
@@ -732,7 +729,6 @@ void reg_base<T>::Initialise() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 double reg_base<T>::ComputeSimilarityMeasure() {
     double measure = 0;
@@ -763,11 +759,10 @@ double reg_base<T>::ComputeSimilarityMeasure() {
     return similarityWeight * measure;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
-    // TODO Temporarily call F3dContent. This function will be moved to reg_f3d.
+    // TODO Temporarily call F3dContent. This function will be moved to reg_f3d
     dynamic_cast<F3dContent*>(con)->ZeroVoxelBasedMeasureGradient();
 
     // The intensity gradient is first computed
@@ -1017,7 +1012,6 @@ void reg_base<T>::WarpFloatingImage(int inter) {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
 void reg_base<T>::Run() {
 #ifndef NDEBUG
@@ -1157,5 +1151,4 @@ void reg_base<T>::Run() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template class reg_base<float>;
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 4f966a4c..6bd92be9 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -27,7 +27,6 @@
 #include "_reg_ReadWriteImage.h"
 #include "_reg_stringFormat.h"
 #include "_reg_optimiser.h"
-#include "float.h"
 #include "Platform.h"
 
 /// @brief Base registration class
@@ -48,7 +47,7 @@ class reg_base: public InterfaceOptimiser {
     // Measure
     Measure *measure = nullptr;
 
-    // Optimiser related variables
+    // Optimiser-related variables
     reg_optimiser<T> *optimiser;
     size_t maxIterationNumber;
     size_t perturbationNumber;
@@ -56,10 +55,7 @@ class reg_base: public InterfaceOptimiser {
     bool optimiseY;
     bool optimiseZ;
 
-    // Optimiser related function
-    virtual void SetOptimiser() = 0;
-
-    // Measure related variables
+    // Measure-related variables
     reg_ssd *measure_ssd;
     reg_kld *measure_kld;
     reg_dti *measure_dti;
@@ -68,7 +64,6 @@ class reg_base: public InterfaceOptimiser {
     reg_mind *measure_mind;
     reg_mindssc *measure_mindssc;
     nifti_image *localWeightSimInput;
-    // nifti_image *localWeightSimCurrent;
 
     char *executableName;
     int referenceTimePoint;
@@ -102,17 +97,8 @@ class reg_base: public InterfaceOptimiser {
     nifti_image **floatingPyramid;
     int **maskPyramid;
     int *activeVoxelNumber;
-    // nifti_image *reference;
-    // nifti_image *floating;
-    // int *currentMask;
-    // nifti_image *warped;
-    // nifti_image *deformationFieldImage;
-    // nifti_image *warpedGradient;
-    // nifti_image *voxelBasedMeasureGradient;
     unsigned int currentLevel;
 
-    mat33 *forwardJacobianMatrix;
-
     double bestWMeasure;
     double currentWMeasure;
 
@@ -124,26 +110,22 @@ class reg_base: public InterfaceOptimiser {
     float *landmarkReference;
     float *landmarkFloating;
 
-    // virtual void AllocateWarped();
-    // virtual void DeallocateWarped();
-    // virtual void AllocateDeformationField();
-    // virtual void DeallocateDeformationField();
-    // virtual void AllocateWarpedGradient();
-    // virtual void DeallocateWarpedGradient();
-    // virtual void AllocateVoxelBasedMeasureGradient();
-    // virtual void DeallocateVoxelBasedMeasureGradient();
-    // virtual void DeallocateCurrentInputImage();
+    // For the NiftyReg plugin in NiftyView
+    void (*funcProgressCallback)(float pcntProgress, void *params);
+    void* paramsProgressCallback;
 
     virtual void WarpFloatingImage(int);
     virtual double ComputeSimilarityMeasure();
     virtual void GetVoxelBasedGradient();
     virtual void InitialiseSimilarity();
+    virtual void CheckParameters();
+    virtual void Initialise();
 
-    // Virtual empty functions that have to be filled
+    // Pure virtual functions
+    virtual void SetOptimiser() = 0;
     virtual T InitialiseCurrentLevel(nifti_image *reference) = 0;
     virtual void SmoothGradient() = 0;
     virtual void GetDeformationField() = 0;
-    // virtual void SetGradientImageToZero() = 0;
     virtual void GetApproximatedGradient() = 0;
     virtual double GetObjectiveFunctionValue() = 0;
     virtual void UpdateParameters(float) = 0;
@@ -154,35 +136,36 @@ class reg_base: public InterfaceOptimiser {
     virtual void UpdateBestObjFunctionValue() = 0;
     virtual void PrintCurrentObjFunctionValue(T) = 0;
     virtual void PrintInitialObjFunctionValue() = 0;
-    // virtual void AllocateTransformationGradient() = 0;
-    // virtual void DeallocateTransformationGradient() = 0;
     virtual void CorrectTransformation() = 0;
-
-    void (*funcProgressCallback)(float pcntProgress, void *params);
-    void* paramsProgressCallback;
+    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0;
+    virtual void DeinitContent() = 0;
 
 public:
     reg_base(int refTimePoint, int floTimePoint);
     virtual ~reg_base();
 
+    virtual void Run();
+    virtual nifti_image** GetWarpedImage() = 0;
+    virtual char* GetExecutableName() { return executableName; }
+    virtual bool GetSymmetricStatus() { return false; }
+
     // Platform
-    Platform* GetPlatform();
-    void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; }
-    void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; }
-
-    // Optimisation related functions
-    void SetMaximalIterationNumber(unsigned int);
-    void NoOptimisationAlongX() { optimiseX = false; }
-    void NoOptimisationAlongY() { optimiseY = false; }
-    void NoOptimisationAlongZ() { optimiseZ = false; }
-    void SetPerturbationNumber(size_t v) { perturbationNumber = v; }
-    void UseConjugateGradient();
-    void DoNotUseConjugateGradient();
-    void UseApproximatedGradient();
-    void DoNotUseApproximatedGradient();
-    // Measure of similarity related functions
- //    void ApproximateParzenWindow();
- //    void DoNotApproximateParzenWindow();
+    virtual void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; }
+    virtual void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; }
+
+    // Optimisation-related functions
+    virtual void SetMaximalIterationNumber(unsigned int);
+    virtual void NoOptimisationAlongX() { optimiseX = false; }
+    virtual void NoOptimisationAlongY() { optimiseY = false; }
+    virtual void NoOptimisationAlongZ() { optimiseZ = false; }
+    virtual void SetPerturbationNumber(size_t v) { perturbationNumber = v; }
+    virtual void UseConjugateGradient();
+    virtual void DoNotUseConjugateGradient();
+    virtual void UseApproximatedGradient();
+    virtual void DoNotUseApproximatedGradient();
+    // Measure of similarity-related functions
+    // virtual void ApproximateParzenWindow();
+    // virtual void DoNotApproximateParzenWindow();
     virtual void UseNMISetReferenceBinNumber(int, int);
     virtual void UseNMISetFloatingBinNumber(int, int);
     virtual void UseSSD(int timepoint, bool normalize);
@@ -192,53 +175,44 @@ class reg_base: public InterfaceOptimiser {
     virtual void UseDTI(bool *timepoint);
     virtual void UseLNCC(int timepoint, float stdDevKernel);
     virtual void SetLNCCKernelType(int type);
-    void SetLocalWeightSim(nifti_image*);
-
-    void SetNMIWeight(int, double);
-    void SetSSDWeight(int, double);
-    void SetKLDWeight(int, double);
-    void SetLNCCWeight(int, double);
-
-    void SetReferenceImage(nifti_image*);
-    void SetFloatingImage(nifti_image*);
-    void SetReferenceMask(nifti_image*);
-    void SetAffineTransformation(mat44*);
-    void SetReferenceSmoothingSigma(T);
-    void SetFloatingSmoothingSigma(T);
-    void SetGradientSmoothingSigma(T);
-    void SetReferenceThresholdUp(unsigned int, T);
-    void SetReferenceThresholdLow(unsigned int, T);
-    void SetFloatingThresholdUp(unsigned int, T);
-    void SetFloatingThresholdLow(unsigned int, T);
-    void UseRobustRange();
-    void DoNotUseRobustRange();
-    void SetWarpedPaddingValue(float);
-    void SetLevelNumber(unsigned int);
-    void SetLevelToPerform(unsigned int);
-    void PrintOutInformation();
-    void DoNotPrintOutInformation();
-    void DoNotUsePyramidalApproach();
-    void UseNearestNeighborInterpolation();
-    void UseLinearInterpolation();
-    void UseCubicSplineInterpolation();
-    void SetLandmarkRegularisationParam(size_t, float*, float*, float);
-
-    virtual void CheckParameters();
-    void Run();
-    virtual void Initialise();
-    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0;
-    virtual void DeinitContent() = 0;
-    virtual nifti_image** GetWarpedImage() = 0;
-    virtual char* GetExecutableName() { return executableName; }
-    virtual bool GetSymmetricStatus() { return false; }
-
-    // Function required for the NiftyReg plugin in NiftyView
-    void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params),
-                                     void *paramsProgCallback) {
+    virtual void SetLocalWeightSim(nifti_image*);
+
+    virtual void SetNMIWeight(int, double);
+    virtual void SetSSDWeight(int, double);
+    virtual void SetKLDWeight(int, double);
+    virtual void SetLNCCWeight(int, double);
+
+    virtual void SetReferenceImage(nifti_image*);
+    virtual void SetFloatingImage(nifti_image*);
+    virtual void SetReferenceMask(nifti_image*);
+    virtual void SetAffineTransformation(mat44*);
+    virtual void SetReferenceSmoothingSigma(T);
+    virtual void SetFloatingSmoothingSigma(T);
+    virtual void SetGradientSmoothingSigma(T);
+    virtual void SetReferenceThresholdUp(unsigned int, T);
+    virtual void SetReferenceThresholdLow(unsigned int, T);
+    virtual void SetFloatingThresholdUp(unsigned int, T);
+    virtual void SetFloatingThresholdLow(unsigned int, T);
+    virtual void UseRobustRange();
+    virtual void DoNotUseRobustRange();
+    virtual void SetWarpedPaddingValue(float);
+    virtual void SetLevelNumber(unsigned int);
+    virtual void SetLevelToPerform(unsigned int);
+    virtual void PrintOutInformation();
+    virtual void DoNotPrintOutInformation();
+    virtual void DoNotUsePyramidalApproach();
+    virtual void UseNearestNeighborInterpolation();
+    virtual void UseLinearInterpolation();
+    virtual void UseCubicSplineInterpolation();
+    virtual void SetLandmarkRegularisationParam(size_t, float*, float*, float);
+
+    // For the NiftyReg plugin in NiftyView
+    virtual void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params),
+                                             void *paramsProgCallback) {
         funcProgressCallback = funcProgCallback;
         paramsProgressCallback = paramsProgCallback;
     }
 
-    // Function used for testing
+    // For testing
     virtual void reg_test_setOptimiser(reg_optimiser<T> *opt) { optimiser = opt; }
 };
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 607a13f2..29263433 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -113,13 +113,13 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
     // Set the initial step size for the gradient ascent
     T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy;
     if (reference->ndim > 2)
-        maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize;
+        maxStepSize = reference->dz > maxStepSize ? reference->dz : maxStepSize;
 
     // Refine the control point grid if required
     if (gridRefinement) {
         if (this->currentLevel == 0) {
-            bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16.0f, this->levelNumber - 1));
-            linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3.0f, this->levelNumber - 1));
+            bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16, this->levelNumber - 1));
+            linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3, this->levelNumber - 1));
         } else {
             bendingEnergyWeight = bendingEnergyWeight * static_cast<T>(16);
             linearEnergyWeight = linearEnergyWeight * static_cast<T>(3);
@@ -138,17 +138,14 @@ void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
-        T penaltySum = bendingEnergyWeight +
-            linearEnergyWeight +
-            jacobianLogWeight +
-            this->landmarkRegWeight;
-        if (penaltySum >= 1.0) {
+        T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight;
+        if (penaltySum >= 1) {
             this->similarityWeight = 0;
             bendingEnergyWeight /= penaltySum;
             linearEnergyWeight /= penaltySum;
             jacobianLogWeight /= penaltySum;
             this->landmarkRegWeight /= penaltySum;
-        } else this->similarityWeight = 1.0 - penaltySum;
+        } else this->similarityWeight = 1 - penaltySum;
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::CheckParameters");
@@ -170,17 +167,17 @@ void reg_f3d<T>::Initialise() {
 
         /* Convert the spacing from voxel to mm if necessary */
         float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]};
-        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx;
-        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy;
-        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz;
+        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -this->inputReference->dx;
+        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -this->inputReference->dy;
+        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -this->inputReference->dz;
 
         // Define the spacing for the first level
         float gridSpacing[3];
-        gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1));
-        gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1));
-        gridSpacing[2] = 1.0f;
+        gridSpacing[0] = spacingInMillimeter[0] * powf(2, this->levelNumber - 1);
+        gridSpacing[1] = spacingInMillimeter[1] * powf(2, this->levelNumber - 1);
+        gridSpacing[2] = 1;
         if (this->referencePyramid[0]->nz > 1)
-            gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1));
+            gridSpacing[2] = spacingInMillimeter[2] * powf(2, this->levelNumber - 1);
 
         // Create and allocate the control point image
         reg_createControlPointGrid<T>(&controlPointGrid, this->referencePyramid[0], gridSpacing);
@@ -194,14 +191,14 @@ void reg_f3d<T>::Initialise() {
     } else {
         // The control point grid image is initialised with the provided grid
         controlPointGrid = nifti_copy_nim_info(inputControlPointGrid);
-        controlPointGrid->data = (void *)malloc(controlPointGrid->nvox * controlPointGrid->nbyper);
+        controlPointGrid->data = malloc(controlPointGrid->nvox * controlPointGrid->nbyper);
         memcpy(controlPointGrid->data, inputControlPointGrid->data,
                controlPointGrid->nvox * controlPointGrid->nbyper);
         // The final grid spacing is computed
-        spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1));
-        spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1));
+        spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1);
+        spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1);
         if (controlPointGrid->nz > 1)
-            spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1));
+            spacing[2] = controlPointGrid->dz / powf(2, this->levelNumber - 1);
     }
 #ifdef NDEBUG
     if (this->verbose) {
@@ -743,7 +740,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
 template<class T>
 nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
     nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid);
-    returnedControlPointGrid->data = (void*)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
+    returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
     memcpy(returnedControlPointGrid->data, controlPointGrid->data,
            returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
     return returnedControlPointGrid;
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 6a0251f3..00d16a03 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -25,9 +25,7 @@ class reg_f3d: public reg_base<T> {
     T jacobianLogWeight;
     bool jacobianLogApproximation;
     T spacing[3];
-
     bool gridRefinement;
-
     double currentWJac;
     double currentWBE;
     double currentWLE;
@@ -36,42 +34,41 @@ class reg_f3d: public reg_base<T> {
     double bestWLE;
 
     virtual T InitialiseCurrentLevel(nifti_image *reference) override;
-
-    virtual double ComputeBendingEnergyPenaltyTerm();
-    virtual double ComputeLinearEnergyPenaltyTerm();
-    virtual double ComputeJacobianBasedPenaltyTerm(int);
-    virtual double ComputeLandmarkDistancePenaltyTerm();
-
-    virtual void GetBendingEnergyGradient();
-    virtual void GetLinearEnergyGradient();
-    virtual void GetJacobianBasedGradient();
-    virtual void GetLandmarkDistanceGradient();
     virtual T NormaliseGradient() override;
     virtual void SmoothGradient() override;
     virtual void GetObjectiveFunctionGradient() override;
     virtual void GetApproximatedGradient() override;
     virtual void GetSimilarityMeasureGradient() override;
-
     virtual void GetDeformationField() override;
     virtual void DisplayCurrentLevelParameters() override;
-
     virtual double GetObjectiveFunctionValue() override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual void UpdateParameters(float) override;
     virtual void SetOptimiser() override;
-
     virtual void PrintInitialObjFunctionValue() override;
     virtual void PrintCurrentObjFunctionValue(T) override;
-
     virtual void CorrectTransformation() override;
+    virtual void CheckParameters() override;
+    virtual void Initialise() override;
+    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override;
+    virtual void DeinitContent() override;
 
-    void (*funcProgressCallback)(float pcntProgress, void *params);
-    void *paramsProgressCallback;
+    virtual double ComputeBendingEnergyPenaltyTerm();
+    virtual double ComputeLinearEnergyPenaltyTerm();
+    virtual double ComputeJacobianBasedPenaltyTerm(int);
+    virtual double ComputeLandmarkDistancePenaltyTerm();
+    virtual void GetBendingEnergyGradient();
+    virtual void GetLinearEnergyGradient();
+    virtual void GetJacobianBasedGradient();
+    virtual void GetLandmarkDistanceGradient();
 
 public:
     reg_f3d(int refTimePoint, int floTimePoint);
     virtual ~reg_f3d();
 
+    virtual nifti_image* GetControlPointPositionImage();
+    virtual nifti_image** GetWarpedImage() override;
+
     virtual void SetControlPointGridImage(nifti_image*);
     virtual void SetBendingEnergyWeight(T);
     virtual void SetLinearEnergyWeight(T);
@@ -82,17 +79,10 @@ class reg_f3d: public reg_base<T> {
     virtual void NoGridRefinement() { gridRefinement = false; }
 
     // F3D2 specific options
+    virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; }
     virtual void UseBCHUpdate(int) {}
     virtual void UseGradientCumulativeExp() {}
     virtual void DoNotUseGradientCumulativeExp() {}
     virtual void SetFloatingMask(nifti_image*) {}
     virtual void SetInverseConsistencyWeight(T) {}
-    virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; }
-
-    virtual void CheckParameters() override;
-    virtual void Initialise() override;
-    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override;
-    virtual void DeinitContent() override;
-    virtual nifti_image* GetControlPointPositionImage();
-    virtual nifti_image** GetWarpedImage() override;
 };
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index e6feead6..726144c7 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -19,8 +19,8 @@
 #include <math.h>
 #include <iostream>
 #include <vector>
-#include "nifti1_io.h"
 #include <stdexcept>
+#include "nifti1_io.h"
 
 #if defined (_OPENMP)
 #include <omp.h>
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 7522eb98..2cd53fd9 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -91,16 +91,16 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 
     // Allocate an image to store the mean image
     nifti_image *meanImage = nifti_copy_nim_info(currentInputImage);
-    meanImage->data = (void*)calloc(meanImage->nvox, meanImage->nbyper);
+    meanImage->data = calloc(meanImage->nvox, meanImage->nbyper);
     DTYPE* meanImgDataPtr = static_cast<DTYPE*>(meanImage->data);
 
     // Allocate an image to store the shifted image
     nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-    shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper);
+    shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper);
 
     // Allocation of the difference image
     nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-    diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper);
+    diff_image->data = malloc(diff_image->nvox * diff_image->nbyper);
 
     // Define the sigma for the convolution
     float sigma = -0.5;// negative value denotes voxel width
@@ -225,12 +225,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 
     // Allocate an image to store the mean image
     nifti_image *mean_img = nifti_copy_nim_info(currentInputImage);
-    mean_img->data = (void*)calloc(mean_img->nvox, mean_img->nbyper);
+    mean_img->data = calloc(mean_img->nvox, mean_img->nbyper);
     DTYPE* meanImgDataPtr = static_cast<DTYPE*>(mean_img->data);
 
     // Allocate an image to store the warped image
     nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-    shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper);
+    shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper);
 
     // Define the sigma for the convolution
     float sigma = -0.5;// negative value denotes voxel width
@@ -244,11 +244,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     //std::vector<nifti_image *> vectNiftiImage;
     //for(int i=0;i<samplingNbr;i++) {
     nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-    diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper);
+    diff_image->data = malloc(diff_image->nvox * diff_image->nbyper);
     int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int));
 
     nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage);
-    diff_imageShifted->data = (void*)malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper);
+    diff_imageShifted->data = malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper);
 
     int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0};
     int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset};
@@ -433,8 +433,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
         this->referenceImageDescriptor->ny *
         this->referenceImageDescriptor->nz *
         this->referenceImageDescriptor->nt;
-    this->referenceImageDescriptor->data = (void*)malloc(this->referenceImageDescriptor->nvox *
-                                                         this->referenceImageDescriptor->nbyper);
+    this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox *
+                                                  this->referenceImageDescriptor->nbyper);
     // Initialise the warped floating descriptor
     this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
     this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
@@ -443,8 +443,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
         this->warpedFloatingImageDescriptor->ny *
         this->warpedFloatingImageDescriptor->nz *
         this->warpedFloatingImageDescriptor->nt;
-    this->warpedFloatingImageDescriptor->data = (void*)malloc(this->warpedFloatingImageDescriptor->nvox *
-                                                              this->warpedFloatingImageDescriptor->nbyper);
+    this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox *
+                                                       this->warpedFloatingImageDescriptor->nbyper);
 
     if (this->isSymmetric) {
         if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) {
@@ -459,8 +459,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
             this->floatingImageDescriptor->ny *
             this->floatingImageDescriptor->nz *
             this->floatingImageDescriptor->nt;
-        this->floatingImageDescriptor->data = (void*)malloc(this->floatingImageDescriptor->nvox *
-                                                            this->floatingImageDescriptor->nbyper);
+        this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox *
+                                                     this->floatingImageDescriptor->nbyper);
         // Initialise the warped floating descriptor
         this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
         this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
@@ -469,8 +469,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
             this->warpedReferenceImageDescriptor->ny *
             this->warpedReferenceImageDescriptor->nz *
             this->warpedReferenceImageDescriptor->nt;
-        this->warpedReferenceImageDescriptor->data = (void*)malloc(this->warpedReferenceImageDescriptor->nvox *
-                                                                   this->warpedReferenceImageDescriptor->nbyper);
+        this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox *
+                                                            this->warpedReferenceImageDescriptor->nbyper);
     }
 
     for (int i = 0; i < referenceImageDescriptor->nt; ++i) {
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index d8ee8391..5e3228f6 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -18,24 +18,22 @@
 #include <map>
 #include "_reg_maths.h"
 
-typedef enum
-{
-   MEAN_KERNEL,
-   LINEAR_KERNEL,
-   GAUSSIAN_KERNEL,
-   CUBIC_SPLINE_KERNEL
+typedef enum {
+    MEAN_KERNEL,
+    LINEAR_KERNEL,
+    GAUSSIAN_KERNEL,
+    CUBIC_SPLINE_KERNEL
 } NREG_CONV_KERNEL_TYPE;
 
 /* *************************************************************** */
 /** @brief This function check some header parameters and correct them in
- * case of error. For example no dimension is lower than one. The scl_sclope
+ * case of error. For example no dimension is lower than one. The scl_slope
  * can not be equal to zero. The qto_xyz and qto_ijk are populated if
  * both qform_code and sform_code are set to zero.
  * @param image Input image to check and correct if necessary
  */
 extern "C++"
 void reg_checkAndCorrectDimension(nifti_image *image);
-
 /* *************************************************************** */
 /** @brief Check if the specified filename corresponds to an image.
  * @param name Input filename
@@ -44,10 +42,9 @@ void reg_checkAndCorrectDimension(nifti_image *image);
  */
 extern "C++"
 bool reg_isAnImageFileName(char *name);
-
 /* *************************************************************** */
 /** @brief Rescale an input image between two user-defined values.
- * Some threshold can also be applied concurrenlty
+ * Some threshold can also be applied concurrently
  * @param image Image to be rescaled
  * @param newMin Intensity lower bound after rescaling
  * @param newMax Intensity higher bound after rescaling
@@ -58,10 +55,7 @@ extern "C++"
 void reg_intensityRescale(nifti_image *image,
                           int timepoint,
                           float newMin,
-                          float newMax
-                         );
-
-
+                          float newMax);
 /* *************************************************************** */
 /** @brief Set the scl_slope to 1 and the scl_inter to 0 and rescale
  * the intensity values
@@ -69,7 +63,6 @@ void reg_intensityRescale(nifti_image *image,
  */
 extern "C++"
 void reg_tools_removeSCLInfo(nifti_image *img);
-
 /* *************************************************************** */
 /** @brief reg_getRealImageSpacing
  * @param image image
@@ -93,14 +86,13 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  int *mask = nullptr,
                                  bool *timePoints = nullptr,
                                  bool *axis = nullptr);
-
 /* *************************************************************** */
 /** @brief Smooth a label image using a Gaussian kernel
  * @param image Image to be smoothed
  * @param varianceX The variance of the Gaussian kernel in X
  * @param varianceY The variance of the Gaussian kernel in Y
  * @param varianceZ The variance of the Gaussian kernel in Z
- * @param mask An integer mask over which the Gaussian smoothing should occour
+ * @param mask An integer mask over which the Gaussian smoothing should occur
  * @param timePoint Boolean array to specify which timepoints have to be
  * smoothed.
  */
@@ -109,10 +101,8 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceX,
                                       float varianceY,
                                       float varianceZ,
-                                      int *mask=nullptr,
-                                      bool *timePoint=nullptr);
-
-
+                                      int *mask = nullptr,
+                                      bool *timePoint = nullptr);
 /* *************************************************************** */
 /** @brief Downsample an image by a ratio of two
  * @param image Image to be downsampled
@@ -125,8 +115,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
 extern "C++" template <class PrecisionTYPE>
 void reg_downsampleImage(nifti_image *image,
                          int type,
-                         bool *axis
-                        );
+                         bool *axis);
 /* *************************************************************** */
 /** @brief Returns the maximal euclidean distance from a
  * deformation field image
@@ -142,7 +131,7 @@ PrecisionTYPE reg_getMaximalLength(nifti_image *image);
  */
 extern "C++" template <class NewTYPE>
 void reg_tools_changeDatatype(nifti_image *image,
-                              int type=-1);
+                              int type = -1);
 /* *************************************************************** */
 /** @brief Add two images.
  * @param img1 First image to consider
@@ -187,7 +176,6 @@ extern "C++"
 void reg_tools_divideImageToImage(nifti_image *img1,
                                   nifti_image *img2,
                                   nifti_image *out);
-
 /* *************************************************************** */
 /** @brief Add a scalar to all image intensity
  * @param img1 Input image
@@ -228,7 +216,6 @@ extern "C++"
 void reg_tools_divideValueToImage(nifti_image *img1,
                                   nifti_image *out,
                                   float val);
-
 /* *************************************************************** */
 /** @brief Binarise an input image. All values different
  * from 0 are set to 1, 0 otherwise.
@@ -236,7 +223,6 @@ void reg_tools_divideValueToImage(nifti_image *img1,
  */
 extern "C++"
 void reg_tools_binarise_image(nifti_image *img);
-
 /* *************************************************************** */
 /** @brief Binarise an input image. The binarisation is
  * performed according to a threshold value that is
@@ -249,7 +235,6 @@ void reg_tools_binarise_image(nifti_image *img);
 extern "C++"
 void reg_tools_binarise_image(nifti_image *img,
                               float thr);
-
 /* *************************************************************** */
 /** @brief Convert a binary image into an array of int.
  * This is used to define a mask within the registration
@@ -264,14 +249,13 @@ void reg_tools_binarise_image(nifti_image *img,
 extern "C++"
 void reg_tools_binaryImage2int(nifti_image *img,
                                int *array,
-                               int &activeVoxelNumber);
-
+                               int& activeVoxelNumber);
 /* *************************************************************** */
 /** @brief Compute the mean root mean squared error between
  * two vector images
  * @param imgA Input vector image
  * @param imgB Input vector image
- * @return Mean rsoot mean squared error values returned
+ * @return Mean root mean squared error values returned
  */
 extern "C++"
 double reg_tools_getMeanRMS(nifti_image *imgA,
@@ -295,8 +279,7 @@ int reg_tools_nanMask_image(nifti_image *img,
  * @param mask Input mask which is updated in place
  */
 extern "C++"
-int reg_tools_removeNanFromMask(nifti_image *image,
-                                int *mask);
+int reg_tools_removeNanFromMask(nifti_image *image, int *mask);
 /* *************************************************************** */
 /** @brief Get the minimal value of an image
  * @param img Input image
@@ -338,7 +321,7 @@ float reg_tools_getSTDValue(nifti_image *img);
  * the registration.
  */
 extern "C++" template<class DTYPE>
-int reg_createImagePyramid(nifti_image * input,
+int reg_createImagePyramid(nifti_image *input,
                            nifti_image **pyramid,
                            unsigned int levelNumber,
                            unsigned int levelToPerform);
@@ -374,8 +357,7 @@ int reg_createMaskPyramid(nifti_image *input,
 extern "C++" template<class T>
 void reg_thresholdImage(nifti_image *image,
                         T lowThr,
-                        T upThr
-                       );
+                        T upThr);
 /* *************************************************************** */
 /** @brief This function flipp the specified axis
  * @param image Input image to be flipped
@@ -387,8 +369,7 @@ void reg_thresholdImage(nifti_image *image,
 extern "C++"
 void reg_flippAxis(nifti_image *image,
                    void *array,
-                   std::string cmd
-                  );
+                   std::string cmd);
 /* *************************************************************** */
 /** @brief This function converts an image containing deformation
  * field into a displacement field
@@ -416,9 +397,9 @@ int reg_getDeformationFromDisplacement(nifti_image *image);
  */
 extern "C++"
 void reg_setGradientToZero(nifti_image *image,
-                          bool x_axis,
-                          bool y_axis,
-                          bool z_axis);
+                           bool x_axis,
+                           bool y_axis,
+                           bool z_axis);
 /* *************************************************************** */
 /* *************************************************************** */
 /** @brief The functions returns the largest ratio between two arrays
@@ -427,8 +408,8 @@ void reg_setGradientToZero(nifti_image *image,
  */
 extern "C++" template<class DTYPE>
 double reg_test_compare_arrays(DTYPE *ptrA,
-                              DTYPE *ptrB,
-                              size_t nvox);
+                               DTYPE *ptrB,
+                               size_t nvox);
 /* *************************************************************** */
 /** @brief The functions returns the largest ratio between input image intensities
  * The returned value is the largest value computed as ((A/B)-1)
@@ -436,7 +417,7 @@ double reg_test_compare_arrays(DTYPE *ptrA,
  */
 extern "C++"
 double reg_test_compare_images(nifti_image *imgA,
-                              nifti_image *imgB);
+                               nifti_image *imgB);
 /* *************************************************************** */
 /** @brief The absolute operator is applied to the input image
  */
@@ -444,22 +425,22 @@ extern "C++"
 void reg_tools_abs_image(nifti_image *img);
 /* *************************************************************** */
 extern "C++"
-void mat44ToCptr(const mat44& mat, float* cMat);
+void mat44ToCptr(const mat44& mat, float *cMat);
 /* *************************************************************** */
 extern "C++"
-void cPtrToMat44(mat44 *mat, float* cMat);
+void cPtrToMat44(mat44 *mat, float *cMat);
 /* *************************************************************** */
 extern "C++"
-void mat33ToCptr(mat33* mat, float* cMat, const unsigned int numMats);
+void mat33ToCptr(mat33 *mat, float *cMat, const unsigned int numMats);
 /* *************************************************************** */
 extern "C++"
-void cPtrToMat33(mat33 *mat, float* cMat);
+void cPtrToMat33(mat33 *mat, float *cMat);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void matmnToCptr(T** mat, T* cMat, unsigned int m, unsigned int n);
+void matmnToCptr(T **mat, T *cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n);
+void cPtrToMatmn(T **mat, T *cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index c25004ea..c768cc50 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -270,7 +270,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i
     free(image->data);
     image->datatype = type;
     image->nbyper = sizeof(T);
-    image->data = (void*)malloc(image->nvox * image->nbyper);
+    image->data = malloc(image->nvox * image->nbyper);
     T* dataT = static_cast<T*>(image->data);
     for (size_t i = 0; i < size; ++i)
         dataT[i] = FillWarpedImageData<T>(buffer[i], type);
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 3a6bd8c1..11688116 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -205,7 +205,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat
     free(image->data);
     image->datatype = datatype;
     image->nbyper = sizeof(DataType);
-    image->data = (void*)malloc(size * image->nbyper);
+    image->data = malloc(size * image->nbyper);
     DataType* data = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < size; ++i)
         data[i] = CastImageData<DataType>(buffer[i], datatype);
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 06ee1359..4eebd833 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -23,53 +23,6 @@ struct __attribute__((aligned(4))) float4 {
 #endif
 /* ******************************** */
 /* ******************************** */
-#if CUDART_VERSION >= 3200
-#   define NR_CUDA_SAFE_CALL(call) { \
-		call; \
-		cudaError err = cudaPeekAtLastError(); \
-		if( cudaSuccess != err) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			exit(EXIT_FAILURE); \
-		} \
-	}
-#   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaDeviceSynchronize(); \
-		cudaError err = cudaPeekAtLastError(); \
-		if( err != cudaSuccess) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \
-			grid.x,grid.y,grid.z,block.x,block.y,block.z); \
-			exit(EXIT_FAILURE); \
-		} \
-		else{\
-		printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);\
-		}\
-	}
-#else //CUDART_VERSION >= 3200
-#   define NR_CUDA_SAFE_CALL(call) { \
-		call; \
-		cudaError err = cudaDeviceSynchronize(); \
-		if( cudaSuccess != err) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			exit(EXIT_FAILURE); \
-		} \
-	}
-#   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaError err = cudaDeviceSynchronize(); \
-		if( err != cudaSuccess) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \
-			grid.x,grid.y,grid.z,block.x,block.y,block.z); \
-			exit(EXIT_FAILURE); \
-		} \
-	}
-#endif //CUDART_VERSION >= 3200
-/* ******************************** */
-/* ******************************** */
 class NiftyReg_CudaBlock100 {
 public:    /* _reg_blockMatching_gpu */
     size_t Block_target_block;
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 961dc148..113aa619 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -12,8 +12,9 @@
 #include "nifti1_io.h"
 #include "cuda_runtime.h"
 #include "cuda.h"
+#include "_reg_maths.h"
 
- /* ******************************** */
+/* ******************************** */
 #ifndef __VECTOR_TYPES_H__
 #define __VECTOR_TYPES_H__
 struct __attribute__((aligned(4))) float4 {
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index dcfed114..1a9b2193 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -90,7 +90,7 @@ TEST_CASE("Resampling", "[resampling]") {
     id_field_3D->ndim = id_field_3D->dim[0] = 5;
     id_field_3D->nu = id_field_3D->dim[5] = 3;
     id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu;
-    id_field_3D->data = (void*)calloc(id_field_3D->nvox, id_field_3D->nbyper);
+    id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper);
     reg_getDeformationFromDisplacement(id_field_3D);
     float res3[8];
     memcpy(res3, reference3D->data, reference3D->nvox * sizeof(float));
@@ -139,7 +139,7 @@ TEST_CASE("Resampling", "[resampling]") {
             SECTION(test_name + " " + desc) {
                 // Create and set a warped image to host the computation
                 nifti_image *warped = nifti_copy_nim_info(reference);
-                warped->data = (void*)malloc(warped->nvox * warped->nbyper);
+                warped->data = malloc(warped->nvox * warped->nbyper);
                 con->SetWarped(warped);
                 // Set the deformation field
                 con->SetDeformationField(def_field);

From 7478317f81c8b837e4f462d2411cb5452dc23427 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 19 Jan 2023 13:38:12 +0000
Subject: [PATCH 038/314] Remove unnecessary CUDA variables

---
 niftyreg_build_version.txt         |  2 +-
 reg-lib/Content.h                  |  2 +-
 reg-lib/_reg_f3d.cpp               |  4 +-
 reg-lib/cl/ClAladinContent.cpp     |  2 +-
 reg-lib/cl/ClAladinContent.h       |  2 +-
 reg-lib/cuda/CudaAladinContent.cpp |  2 +-
 reg-lib/cuda/CudaAladinContent.h   |  2 +-
 reg-lib/cuda/CudaCompute.cpp       |  8 ++--
 reg-lib/cuda/CudaContent.cpp       | 63 ++++++++----------------------
 reg-lib/cuda/CudaContent.h         | 14 +++----
 reg-lib/cuda/CudaF3dContent.cpp    | 21 +++-------
 reg-lib/cuda/CudaF3dContent.h      |  4 +-
 reg-lib/cuda/CudaMeasure.cpp       |  8 ++--
 13 files changed, 47 insertions(+), 87 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1b9cba4a..492dff08 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-151
+152
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index adf2b36b..c3e53a1e 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -20,7 +20,7 @@ class Content {
     virtual nifti_image* GetDeformationField() { return deformationField; }
     virtual int* GetReferenceMask() { return referenceMask; }
     virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
-    virtual nifti_image* GetWarped(int index = 0) { return warped; }
+    virtual nifti_image* GetWarped() { return warped; }
 
     // Setters
     virtual void SetDeformationField(nifti_image *deformationFieldIn) {
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 29263433..0fe6c244 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -725,9 +725,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
     this->WarpFloatingImage(3); // cubic spline interpolation
 
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = this->con->GetWarped(0);
-    if (this->inputFloating->nt == 2)
-        warpedImage[1] = this->con->GetWarped(1);
+    warpedImage[0] = this->con->GetWarped();
 
     this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage
     DeinitContent();
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 7206c9ee..171ffcf6 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -111,7 +111,7 @@ void ClAladinContent::AllocateClPtrs() {
     }
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetWarped(int index) {
+nifti_image* ClAladinContent::GetWarped() {
     DownloadImage(warped, warpedImageClmem, warped->datatype);
     return warped;
 }
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 97405730..8331f0e7 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -41,7 +41,7 @@ class ClAladinContent: public AladinContent {
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
     nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped(int index = 0) override;
+    nifti_image* GetWarped() override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index c768cc50..14850439 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -135,7 +135,7 @@ void CudaAladinContent::AllocateCuPtrs() {
     }
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetWarped(int index) {
+nifti_image* CudaAladinContent::GetWarped() {
     DownloadImage(warped, warpedImageArray_d, warped->datatype);
     return warped;
 }
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index a7679ea8..26d68d4f 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -45,7 +45,7 @@ class CudaAladinContent: public AladinContent {
     // CPU getters with data downloaded from device
     _reg_blockMatchingParam* GetBlockMatchingParams() override;
     nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped(int index = 0) override;
+    nifti_image* GetWarped() override;
 
     // Setters
     void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 694e1586..8a57d35d 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -8,8 +8,8 @@
 void CudaCompute::ResampleImage(int inter, float paddingValue) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
     reg_resampleImage_gpu(con.Content::GetFloating(),
-                          con.GetWarpedCuda()[0],
-                          con.GetFloatingCuda()[0],
+                          con.GetWarpedCuda(),
+                          con.GetFloatingCuda(),
                           con.GetDeformationFieldCuda(),
                           con.GetReferenceMaskCuda(),
                           con.Content::GetReference()->nvox,
@@ -106,9 +106,9 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF,
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     reg_getImageGradient_gpu(con.F3dContent::GetFloating(),
-                             con.GetFloatingCuda()[0],
+                             con.GetFloatingCuda(),
                              con.GetDeformationFieldCuda(),
-                             con.GetWarpedGradientCuda()[0],
+                             con.GetWarpedGradientCuda(),
                              con.F3dContent::GetReference()->nvox,
                              paddingValue);
 }
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 11688116..83b2fc6c 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -27,35 +27,20 @@ void CudaContent::AllocateImages() {
         reg_tools_changeDatatype<float>(reference);
     if (floating->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(floating);
-    if (reference->nt == 1) {
-        cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], reference->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda[0], reference);
-        cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], floating->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda[0], floating);
-    } else if (reference->nt == 2) {
-        cudaCommon_allocateArrayToDevice<float>(&referenceCuda[0], &referenceCuda[1], reference->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda[0], referenceCuda[1], reference);
-        cudaCommon_allocateArrayToDevice<float>(&floatingCuda[0], &floatingCuda[1], floating->dim);
-        cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda[0], floatingCuda[1], floating);
-    }
+    cudaCommon_allocateArrayToDevice<float>(&referenceCuda, reference->dim);
+    cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda, reference);
+    cudaCommon_allocateArrayToDevice<float>(&floatingCuda, floating->dim);
+    cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda, floating);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateImages() {
-    if (referenceCuda[0]) {
-        cudaCommon_free(referenceCuda[0]);
-        referenceCuda[0] = nullptr;
-    }
-    if (referenceCuda[1]) {
-        cudaCommon_free(referenceCuda[1]);
-        referenceCuda[1] = nullptr;
-    }
-    if (floatingCuda[0]) {
-        cudaCommon_free(floatingCuda[0]);
-        floatingCuda[0] = nullptr;
+    if (referenceCuda) {
+        cudaCommon_free(referenceCuda);
+        referenceCuda = nullptr;
     }
-    if (floatingCuda[1]) {
-        cudaCommon_free(floatingCuda[1]);
-        floatingCuda[1] = nullptr;
+    if (floatingCuda) {
+        cudaCommon_free(floatingCuda);
+        floatingCuda = nullptr;
     }
 }
 /* *************************************************************** */
@@ -71,25 +56,13 @@ void CudaContent::DeallocateDeformationField() {
 }
 /* *************************************************************** */
 void CudaContent::AllocateWarped() {
-    if (warped->nt == 1) {
-        cudaCommon_allocateArrayToDevice<float>(&warpedCuda[0], warped->dim);
-    } else if (warped->nt == 2) {
-        cudaCommon_allocateArrayToDevice<float>(&warpedCuda[0], &warpedCuda[1], warped->dim);
-    } else {
-        reg_print_fct_error("CudaContent::AllocateWarped()");
-        reg_print_msg_error("More than 2 time points aren't handled in the floating image");
-        reg_exit();
-    }
+    cudaCommon_allocateArrayToDevice<float>(&warpedCuda, warped->dim);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateWarped() {
-    if (warpedCuda[0]) {
-        cudaCommon_free(warpedCuda[0]);
-        warpedCuda[0] = nullptr;
-    }
-    if (warpedCuda[1]) {
-        cudaCommon_free(warpedCuda[1]);
-        warpedCuda[1] = nullptr;
+    if (warpedCuda) {
+        cudaCommon_free(warpedCuda);
+        warpedCuda = nullptr;
     }
 }
 /* *************************************************************** */
@@ -151,8 +124,8 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     free(transformationMatrixCptr);
 }
 /* *************************************************************** */
-nifti_image* CudaContent::GetWarped(int index) {
-    DownloadImage(warped, warpedCuda[index], warped->datatype);
+nifti_image* CudaContent::GetWarped() {
+    DownloadImage(warped, warpedCuda, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
@@ -163,9 +136,7 @@ void CudaContent::SetWarped(nifti_image *warpedIn) {
 
     reg_tools_changeDatatype<float>(warped);
     AllocateWarped();
-    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[0], warped);
-    if (warpedCuda[1])
-        cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[1], warped);
+    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
 template<class DataType>
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index 8a632ad2..e1c7a8b4 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -19,13 +19,13 @@ class CudaContent: public virtual Content {
 
     // Getters
     virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped(int index = 0) override;
-    virtual cudaArray** GetReferenceCuda() { return referenceCuda; }
-    virtual cudaArray** GetFloatingCuda() { return floatingCuda; }
+    virtual nifti_image* GetWarped() override;
+    virtual cudaArray* GetReferenceCuda() { return referenceCuda; }
+    virtual cudaArray* GetFloatingCuda() { return floatingCuda; }
     virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
     virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; }
     virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
-    virtual float** GetWarpedCuda() { return warpedCuda; }
+    virtual float* GetWarpedCuda() { return warpedCuda; }
 
     // Setters
     virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
@@ -34,12 +34,12 @@ class CudaContent: public virtual Content {
     virtual void SetWarped(nifti_image *warpedIn) override;
 
 protected:
-    cudaArray *referenceCuda[2] = {nullptr};
-    cudaArray *floatingCuda[2] = {nullptr};
+    cudaArray *referenceCuda = nullptr;
+    cudaArray *floatingCuda = nullptr;
     float4 *deformationFieldCuda = nullptr;
     int *referenceMaskCuda = nullptr;
     float *transformationMatrixCuda = nullptr;
-    float *warpedCuda[2] = {nullptr};
+    float *warpedCuda = nullptr;
 
 private:
     void AllocateImages();
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index 3b6bd53b..ec393047 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -38,20 +38,13 @@ void CudaF3dContent::DeallocateControlPointGrid() {
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateWarpedGradient() {
-    if (floating->nt >= 1)
-        cudaCommon_allocateArrayToDevice(&warpedGradientCuda[0], warpedGradient->dim);
-    if (floating->nt == 2)
-        cudaCommon_allocateArrayToDevice(&warpedGradientCuda[1], warpedGradient->dim);
+    cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateWarpedGradient() {
-    if (warpedGradientCuda[0] != nullptr) {
-        cudaCommon_free(warpedGradientCuda[0]);
-        warpedGradientCuda[0] = nullptr;
-    }
-    if (warpedGradientCuda[1] != nullptr) {
-        cudaCommon_free(warpedGradientCuda[1]);
-        warpedGradientCuda[1] = nullptr;
+    if (warpedGradientCuda != nullptr) {
+        cudaCommon_free(warpedGradientCuda);
+        warpedGradientCuda = nullptr;
     }
 }
 /* *************************************************************** */
@@ -105,14 +98,12 @@ void CudaF3dContent::UpdateVoxelBasedMeasureGradient() {
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetWarpedGradient() {
-    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda[0]);
+    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
     return warpedGradient;
 }
 /* *************************************************************** */
 void CudaF3dContent::UpdateWarpedGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient);
-    if (warpedGradientCuda[1])
-        cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient);
+    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient);
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index dc19ebbd..770a501c 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -24,7 +24,7 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; }
     virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; }
     virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
-    virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; }
+    virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateControlPointGrid() override;
@@ -40,7 +40,7 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     float4 *controlPointGridCuda = nullptr;
     float4 *transformationGradientCuda = nullptr;
     float4 *voxelBasedMeasureGradientCuda = nullptr;
-    float4 *warpedGradientCuda[2] = {nullptr};
+    float4 *warpedGradientCuda = nullptr;
 
 private:
     void AllocateControlPointGrid();
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 9ae5d7d2..d6b8176c 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -39,11 +39,11 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) {
                                   cudaCon->F3dContent::GetWarpedGradient(),
                                   cudaCon->F3dContent::GetVoxelBasedMeasureGradient(),
                                   cudaCon->F3dContent::GetLocalWeightSim(),
-                                  cudaCon->GetReferenceCuda()[0],
-                                  cudaCon->GetFloatingCuda()[0],
+                                  cudaCon->GetReferenceCuda(),
+                                  cudaCon->GetFloatingCuda(),
                                   cudaCon->GetReferenceMaskCuda(),
-                                  cudaCon->GetWarpedCuda()[0],
-                                  cudaCon->GetWarpedGradientCuda()[0],
+                                  cudaCon->GetWarpedCuda(),
+                                  cudaCon->GetWarpedGradientCuda(),
                                   cudaCon->GetVoxelBasedMeasureGradientCuda());
 }
 /* *************************************************************** */

From 579f9b44338fc85cf92ca0527d3b596cd0b03947 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 24 Jan 2023 17:35:56 +0000
Subject: [PATCH 039/314] Fix GetApproximatedGradient() for CUDA

---
 niftyreg_build_version.txt   |  2 +-
 reg-lib/Compute.cpp          | 40 ++++++++++++++++++++++++++++++++++++
 reg-lib/Compute.h            |  5 +++++
 reg-lib/_reg_f3d.cpp         | 29 +-------------------------
 reg-lib/cuda/CudaCompute.cpp |  6 ++++++
 reg-lib/cuda/CudaCompute.h   |  1 +
 6 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 492dff08..7f1ddd53 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-152
+153
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 02938046..be7fb254 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -159,3 +159,43 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
+template<typename Type>
+void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    nifti_image *controlPointGrid = con.GetControlPointGrid();
+    nifti_image *transformationGradient = con.GetTransformationGradient();
+
+    // Loop over every control point
+    Type *gridPtr = static_cast<Type*>(controlPointGrid->data);
+    Type *gradPtr = static_cast<Type*>(transformationGradient->data);
+    const Type eps = controlPointGrid->dx / Type(100);
+    for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
+        const Type currentValue = gridPtr[i];
+        gridPtr[i] = currentValue + eps;
+        // Update the changes for GPU
+        con.UpdateControlPointGrid();
+        double valPlus = opt.GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue - eps;
+        // Update the changes for GPU
+        con.UpdateControlPointGrid();
+        double valMinus = opt.GetObjectiveFunctionValue();
+        gridPtr[i] = currentValue;
+        gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps));
+    }
+
+    // Update the changes for GPU
+    con.UpdateControlPointGrid();
+    con.UpdateTransformationGradient();
+}
+/* *************************************************************** */
+void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
+    switch (dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid()->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        GetApproximatedGradient<float>(opt);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        GetApproximatedGradient<double>(opt);
+        break;
+    }
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index c4fc6b42..58821641 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "Content.h"
+#include "_reg_optimiser.h"
 
 class Compute {
 public:
@@ -23,7 +24,11 @@ class Compute {
     virtual void VoxelCentricToNodeCentric(float weight);
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength);
+    virtual void GetApproximatedGradient(InterfaceOptimiser& opt);
 
 protected:
     Content& con;
+
+private:
+    template<typename Type> void GetApproximatedGradient(InterfaceOptimiser&);
 };
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 0fe6c244..4816aee4 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -677,34 +677,7 @@ void reg_f3d<T>::SmoothGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetApproximatedGradient() {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    nifti_image *controlPointGrid = con->GetControlPointGrid();
-    nifti_image *transformationGradient = con->GetTransformationGradient();
-
-    // Loop over every control point
-    T *gridPtr = static_cast<T*>(controlPointGrid->data);
-    T *gradPtr = static_cast<T*>(transformationGradient->data);
-    T eps = controlPointGrid->dx / 100.f;
-    for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
-        T currentValue = this->optimiser->GetBestDOF()[i];
-        gridPtr[i] = currentValue + eps;
-        // Update the changes for GPU
-        con->UpdateControlPointGrid();
-        double valPlus = GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue - eps;
-        // Update the changes for GPU
-        con->UpdateControlPointGrid();
-        double valMinus = GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue;
-        // Update the changes for GPU
-        con->UpdateControlPointGrid();
-        gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
-    }
-
-    // Update the changes for GPU
-    con->UpdateTransformationGradient();
+    this->compute->GetApproximatedGradient(*this);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
 #endif
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 8a57d35d..784f7b84 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -132,3 +132,9 @@ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
+void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::GetApproximatedGradient(opt);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 49a22181..284dd0d8 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -22,4 +22,5 @@ class CudaCompute: public Compute {
     virtual void VoxelCentricToNodeCentric(float weight) override;
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override;
+    virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override;
 };

From fc673a1f0eafa9f6e113da9e620366ac58376db3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 26 Jan 2023 17:52:27 +0000
Subject: [PATCH 040/314] Refactorise reg_tools

---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_average.cpp                      |    8 +-
 reg-apps/reg_tools.cpp                        |    8 +-
 reg-lib/cpu/_reg_localTrans.cpp               |    4 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp           |    2 +-
 reg-lib/cpu/_reg_mind.cpp                     |    4 +-
 reg-lib/cpu/_reg_tools.cpp                    | 5527 ++++++++---------
 reg-lib/cpu/_reg_tools.h                      |  116 +-
 .../reg_test_bspline_deformation_field.cpp    |    2 +-
 reg-test/reg_test_changeDataType.cpp          |    2 +-
 ...est_coherence_affine_deformation_field.cpp |    2 +-
 reg-test/reg_test_coherence_interpolation.cpp |    2 +-
 .../reg_test_compose_deformation_field.cpp    |    2 +-
 reg-test/reg_test_convolution.cpp             |    2 +-
 reg-test/reg_test_fullNonlinear.cpp           |    2 +-
 reg-test/reg_test_fullSymNonlinear.cpp        |    2 +-
 reg-test/reg_test_imageGradient.cpp           |    2 +-
 .../reg_test_linearElasticityGradient.cpp     |    2 +-
 reg-test/reg_test_mindDescriptor.cpp          |    2 +-
 reg-test/reg_test_mindsscDescriptor.cpp       |    2 +-
 .../reg_test_nonlinear_deformation_field.cpp  |    2 +-
 21 files changed, 2618 insertions(+), 3079 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7f1ddd53..a2ecc456 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-153
+154
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index a74076c4..68ef8c11 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -81,7 +81,7 @@ void average_norm_intensity(nifti_image *image)
    reg_heapSort(rankedIntensities,static_cast<int>(image->nvox));
    PrecisionTYPE lowerValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.03f)];
    PrecisionTYPE higherValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.97f)];
-   reg_tools_substractValueToImage(image,image,lowerValue);
+   reg_tools_subtractValueFromImage(image,image,lowerValue);
    reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue));
    free(rankedIntensities);
    return;
@@ -329,7 +329,7 @@ int compute_nrr_demean(nifti_image *demean_field,
          tempField->scl_slope=1.f;
          tempField->scl_inter=0.f;
          reg_affine_getDeformationField(&affineTransformation, tempField);
-         reg_tools_substractImageToImage(deformationField,tempField,deformationField);
+         reg_tools_subtractImageFromImage(deformationField,tempField,deformationField);
          nifti_image_free(tempField);
          if(deformationField->intent_p1==DEF_FIELD)
             deformationField->intent_p1=DISP_FIELD;
@@ -443,7 +443,7 @@ int compute_average_image(nifti_image *averageImage,
          nifti_image_free(current_transformation);
          if(demeanField!=nullptr){
             if(deformationField->intent_p1==DEF_VEL_FIELD){
-               reg_tools_substractImageToImage(deformationField,demeanField,deformationField);
+               reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
                nifti_image *tempDef = nifti_copy_nim_info(deformationField);
                tempDef->data = (void *)malloc(tempDef->nvox*tempDef->nbyper);
                memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper);
@@ -454,7 +454,7 @@ int compute_average_image(nifti_image *averageImage,
                nifti_free_extensions(deformationField);
                nifti_image_free(tempDef);
             }
-            else reg_tools_substractImageToImage(deformationField,demeanField,deformationField);
+            else reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
 #ifndef NDEBUG
             reg_print_msg_debug("Input non-linear transformation has been demeaned");
 #endif
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 49b139ee..105afac5 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -503,7 +503,7 @@ int main(int argc, char **argv)
         reg_heapSort(static_cast<float *>(normImage->data), normImage->nvox);
         float minValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(03*(int)normImage->nvox/100))];
         float maxValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(97*(int)normImage->nvox/100))];
-        reg_tools_substractValueToImage(image,normImage,minValue);
+        reg_tools_subtractValueFromImage(image,normImage,minValue);
         reg_tools_divideValueToImage(normImage,normImage,maxValue-minValue);
         if(flag->outputImageFlag)
             reg_io_WriteImageFile(normImage, param->outputImageName);
@@ -642,7 +642,7 @@ int main(int argc, char **argv)
                 reg_tools_addImageToImage(image, image2, outputImage);
                 break;
             case 1:
-                reg_tools_substractImageToImage(image, image2, outputImage);
+                reg_tools_subtractImageFromImage(image, image2, outputImage);
                 break;
             case 2:
                 reg_tools_multiplyImageToImage(image, image2, outputImage);
@@ -660,7 +660,7 @@ int main(int argc, char **argv)
                 reg_tools_addValueToImage(image, outputImage, param->operationValue);
                 break;
             case 1:
-                reg_tools_substractValueToImage(image, outputImage, param->operationValue);
+                reg_tools_subtractValueFromImage(image, outputImage, param->operationValue);
                 break;
             case 2:
                 reg_tools_multiplyValueToImage(image, outputImage, param->operationValue);
@@ -956,7 +956,7 @@ int main(int argc, char **argv)
         // Rescale the input image
         float min_value = reg_tools_getMinValue(image, -1);
         float max_value = reg_tools_getMaxValue(image, -1);
-        reg_tools_substractValueToImage(image, scaledImage, min_value);
+        reg_tools_subtractValueFromImage(image, scaledImage, min_value);
         reg_tools_multiplyValueToImage(scaledImage, scaledImage, 255.f/(max_value-min_value));
         // Create the rgb image
         nifti_image *outputImage = nifti_copy_nim_info(image);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index a0fee955..865d17a1 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -4024,7 +4024,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                affineOnly,
                false);
-         reg_tools_substractImageToImage(flowFieldImage,affineOnly,flowFieldImage);
+         reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage);
       }
    }
    else reg_getDisplacementFromDeformation(flowFieldImage);
@@ -4209,7 +4209,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
             reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                   affineOnly,
                   false);
-            reg_tools_substractImageToImage(flowFieldImage,affineOnly,flowFieldImage);
+            reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage);
          }
       }
       else reg_getDisplacementFromDeformation(flowFieldImage);
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index bfb86338..9dad9ffc 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -3001,7 +3001,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                defFieldImage,
                false);
-         reg_tools_substractImageToImage(flowFieldImage,defFieldImage,flowFieldImage);
+         reg_tools_subtractImageFromImage(flowFieldImage,defFieldImage,flowFieldImage);
       }
    }
    else reg_getDisplacementFromDeformation(flowFieldImage);
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 2cd53fd9..a9ea0401 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -114,7 +114,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     for (int i = 0; i < samplingNbr; i++) {
         ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
                           RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-        reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
+        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
         reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
         reg_tools_addImageToImage(meanImage, diff_image, meanImage);
@@ -262,7 +262,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     for (int i = 0; i < samplingNbr; i++) {
         ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
                           RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-        reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image);
+        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
         reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
 
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 4f14dea8..eb4d247b 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -15,1088 +15,972 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_checkAndCorrectDimension(nifti_image *image)
-{
-   // Ensure that no dimension is set to zero
-   if(image->nx<1 || image->dim[1]<1) image->dim[1]=image->nx=1;
-   if(image->ny<1 || image->dim[2]<1) image->dim[2]=image->ny=1;
-   if(image->nz<1 || image->dim[3]<1) image->dim[3]=image->nz=1;
-   if(image->nt<1 || image->dim[4]<1) image->dim[4]=image->nt=1;
-   if(image->nu<1 || image->dim[5]<1) image->dim[5]=image->nu=1;
-   if(image->nv<1 || image->dim[6]<1) image->dim[6]=image->nv=1;
-   if(image->nw<1 || image->dim[7]<1) image->dim[7]=image->nw=1;
-   //Correcting the dim of the images
-   for(int i=1;i<8;++i) {
-       if(image->dim[i]>1) {
-            image->dim[0]=image->ndim=i;
-       }
-   }
-   // Set the slope to 1 if undefined
-   if(image->scl_slope==0) image->scl_slope=1.f;
-   // Ensure that no spacing is set to zero
-   if(image->ny==1 && (image->dy==0 || image->pixdim[2]==0))
-      image->dy=image->pixdim[2]=1;
-   if(image->nz==1 && (image->dz==0 || image->pixdim[3]==0))
-      image->dz=image->pixdim[3]=1;
-   // Create the qform matrix if required
-   if(image->qform_code==0 && image->sform_code==0)
-   {
-      image->qto_xyz=nifti_quatern_to_mat44(image->quatern_b,
-                                            image->quatern_c,
-                                            image->quatern_d,
-                                            image->qoffset_x,
-                                            image->qoffset_y,
-                                            image->qoffset_z,
-                                            image->dx,
-                                            image->dy,
-                                            image->dz,
-                                            image->qfac);
-      image->qto_ijk=nifti_mat44_inverse(image->qto_xyz);
-   }
-   // Set the voxel spacing to millimeters
-   if(image->xyz_units==NIFTI_UNITS_MICRON)
-   {
-      for(int d=1; d<=image->ndim; ++d)
-         image->pixdim[d] /= 1000.f;
-      image->xyz_units=NIFTI_UNITS_MM;
-   }
-   if(image->xyz_units==NIFTI_UNITS_METER)
-   {
-      for(int d=1; d<=image->ndim; ++d)
-         image->pixdim[d] *= 1000.f;
-      image->xyz_units=NIFTI_UNITS_MM;
-   }
-   image->dx=image->pixdim[1];
-   image->dy=image->pixdim[2];
-   image->dz=image->pixdim[3];
-   image->dt=image->pixdim[4];
-   image->du=image->pixdim[5];
-   image->dv=image->pixdim[6];
-   image->dw=image->pixdim[7];
-}
-/* *************************************************************** */
-/* *************************************************************** */
-bool reg_isAnImageFileName(char *name)
-{
-   std::string n(name);
-   if(n.find( ".nii") != std::string::npos)
-      return true;
-   if(n.find( ".nii.gz") != std::string::npos)
-      return true;
-   if(n.find( ".hdr") != std::string::npos)
-      return true;
-   if(n.find( ".img") != std::string::npos)
-      return true;
-   if(n.find( ".img.gz") != std::string::npos)
-      return true;
-   if(n.find( ".nrrd") != std::string::npos)
-      return true;
-   if(n.find( ".png") != std::string::npos)
-      return true;
-   return false;
+void reg_checkAndCorrectDimension(nifti_image *image) {
+    // Ensure that no dimension is set to zero
+    if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1;
+    if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1;
+    if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1;
+    if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1;
+    if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1;
+    if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1;
+    if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1;
+    //Correcting the dim of the images
+    for (int i = 1; i < 8; ++i) {
+        if (image->dim[i] > 1) {
+            image->dim[0] = image->ndim = i;
+        }
+    }
+    // Set the slope to 1 if undefined
+    if (image->scl_slope == 0) image->scl_slope = 1.f;
+    // Ensure that no spacing is set to zero
+    if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0))
+        image->dy = image->pixdim[2] = 1;
+    if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0))
+        image->dz = image->pixdim[3] = 1;
+    // Create the qform matrix if required
+    if (image->qform_code == 0 && image->sform_code == 0) {
+        image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b,
+                                                image->quatern_c,
+                                                image->quatern_d,
+                                                image->qoffset_x,
+                                                image->qoffset_y,
+                                                image->qoffset_z,
+                                                image->dx,
+                                                image->dy,
+                                                image->dz,
+                                                image->qfac);
+        image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
+    }
+    // Set the voxel spacing to millimeters
+    if (image->xyz_units == NIFTI_UNITS_MICRON) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] /= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    if (image->xyz_units == NIFTI_UNITS_METER) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] *= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    image->dx = image->pixdim[1];
+    image->dy = image->pixdim[2];
+    image->dz = image->pixdim[3];
+    image->dt = image->pixdim[4];
+    image->du = image->pixdim[5];
+    image->dv = image->pixdim[6];
+    image->dw = image->pixdim[7];
+}
+/* *************************************************************** */
+bool reg_isAnImageFileName(const char *name) {
+    const std::string n(name);
+    if (n.find(".nii") != std::string::npos)
+        return true;
+    if (n.find(".nii.gz") != std::string::npos)
+        return true;
+    if (n.find(".hdr") != std::string::npos)
+        return true;
+    if (n.find(".img") != std::string::npos)
+        return true;
+    if (n.find(".img.gz") != std::string::npos)
+        return true;
+    if (n.find(".nrrd") != std::string::npos)
+        return true;
+    if (n.find(".png") != std::string::npos)
+        return true;
+    return false;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DTYPE>
 void reg_intensityRescale_core(nifti_image *image,
                                int timePoint,
                                float newMin,
-                               float newMax
-                               )
-{
-   DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
-   unsigned int voxelNumber = image->nx*image->ny*image->nz;
-
-   // The rescaling is done for each volume independently
-   DTYPE *volumePtr = &imagePtr[timePoint*voxelNumber];
-   DTYPE currentMin=0;
-   DTYPE currentMax=0;
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      currentMin=(DTYPE)std::numeric_limits<unsigned char>::max();
-      currentMax=0;
-      break;
-   case NIFTI_TYPE_INT8:
-      currentMin=(DTYPE)std::numeric_limits<char>::max();
-      currentMax=-(DTYPE)std::numeric_limits<char>::max();
-      break;
-   case NIFTI_TYPE_UINT16:
-      currentMin=(DTYPE)std::numeric_limits<unsigned short>::max();
-      currentMax=0;
-      break;
-   case NIFTI_TYPE_INT16:
-      currentMin=(DTYPE)std::numeric_limits<short>::max();
-      currentMax=-(DTYPE)std::numeric_limits<short>::max();
-      break;
-   case NIFTI_TYPE_UINT32:
-      currentMin=(DTYPE)std::numeric_limits<unsigned int>::max();
-      currentMax=0;
-      break;
-   case NIFTI_TYPE_INT32:
-      currentMin=(DTYPE)std::numeric_limits<int>::max();
-      currentMax=-(DTYPE)std::numeric_limits<int>::max();
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      currentMin=(DTYPE)std::numeric_limits<float>::max();
-      currentMax=-(DTYPE)std::numeric_limits<float>::max();
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      currentMin=(DTYPE)std::numeric_limits<double>::max();
-      currentMax=-(DTYPE)std::numeric_limits<double>::max();
-      break;
-   }
-
-   // Extract the minimal and maximal values from the current volume
-   if(image->scl_slope==0) image->scl_slope=1.0f;
-   for(unsigned int index=0; index<voxelNumber; index++)
-   {
-      DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter);
-      if(value==value)
-      {
-         currentMin=(currentMin<value)?currentMin:value;
-         currentMax=(currentMax>value)?currentMax:value;
-      }
-   }
-
-   // Compute constant values to rescale image intensities
-   double currentDiff = (double)(currentMax-currentMin);
-   double newDiff = (double)(newMax-newMin);
-
-   // Set the image header information for appropriate display
-   image->cal_min=newMin;
-   image->cal_max=newMax;
-
-   // Reset the volume pointer to the start of the current volume
-   volumePtr = &imagePtr[timePoint*voxelNumber];
-
-   // Iterates over all voxels in the current volume
-   for(unsigned int index=0; index<voxelNumber; index++)
-   {
-      double value = (double)*volumePtr * image->scl_slope + image->scl_inter;
-      // Check if the value is defined
-      if(value==value)
-      {
-         // Normalise the value between 0 and 1
-         value = (value-(double)currentMin)/currentDiff;
-         // Rescale the value using the specified range
-         value = value * newDiff + newMin;
-      }
-      *volumePtr++=(DTYPE)value;
-   }
-   image->scl_slope=1.f;
-   image->scl_inter=0.f;
+                               float newMax) {
+    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+    unsigned int voxelNumber = image->nx * image->ny * image->nz;
+
+    // The rescaling is done for each volume independently
+    DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber];
+    DTYPE currentMin = 0;
+    DTYPE currentMax = 0;
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        currentMin = (DTYPE)std::numeric_limits<unsigned char>::max();
+        currentMax = 0;
+        break;
+    case NIFTI_TYPE_INT8:
+        currentMin = (DTYPE)std::numeric_limits<char>::max();
+        currentMax = (DTYPE)std::numeric_limits<char>::min();
+        break;
+    case NIFTI_TYPE_UINT16:
+        currentMin = (DTYPE)std::numeric_limits<unsigned short>::max();
+        currentMax = (DTYPE)std::numeric_limits<unsigned short>::min();
+        break;
+    case NIFTI_TYPE_INT16:
+        currentMin = (DTYPE)std::numeric_limits<short>::max();
+        currentMax = (DTYPE)std::numeric_limits<short>::min();
+        break;
+    case NIFTI_TYPE_UINT32:
+        currentMin = (DTYPE)std::numeric_limits<unsigned int>::max();
+        currentMax = (DTYPE)std::numeric_limits<unsigned int>::min();
+        break;
+    case NIFTI_TYPE_INT32:
+        currentMin = (DTYPE)std::numeric_limits<int>::max();
+        currentMax = (DTYPE)std::numeric_limits<int>::min();
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        currentMin = (DTYPE)std::numeric_limits<float>::max();
+        currentMax = (DTYPE)std::numeric_limits<float>::min();
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        currentMin = (DTYPE)std::numeric_limits<double>::max();
+        currentMax = (DTYPE)std::numeric_limits<double>::min();
+        break;
+    }
+
+    // Extract the minimal and maximal values from the current volume
+    if (image->scl_slope == 0) image->scl_slope = 1.0f;
+    for (unsigned int index = 0; index < voxelNumber; index++) {
+        DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter);
+        if (value == value) {
+            currentMin = (currentMin < value) ? currentMin : value;
+            currentMax = (currentMax > value) ? currentMax : value;
+        }
+    }
+
+    // Compute constant values to rescale image intensities
+    double currentDiff = (double)(currentMax - currentMin);
+    double newDiff = (double)(newMax - newMin);
+
+    // Set the image header information for appropriate display
+    image->cal_min = newMin;
+    image->cal_max = newMax;
+
+    // Reset the volume pointer to the start of the current volume
+    volumePtr = &imagePtr[timePoint * voxelNumber];
+
+    // Iterates over all voxels in the current volume
+    for (unsigned int index = 0; index < voxelNumber; index++) {
+        double value = (double)*volumePtr * image->scl_slope + image->scl_inter;
+        // Check if the value is defined
+        if (value == value) {
+            // Normalise the value between 0 and 1
+            value = (value - (double)currentMin) / currentDiff;
+            // Rescale the value using the specified range
+            value = value * newDiff + newMin;
+        }
+        *volumePtr++ = (DTYPE)value;
+    }
+    image->scl_slope = 1.f;
+    image->scl_inter = 0.f;
 }
 /* *************************************************************** */
 void reg_intensityRescale(nifti_image *image,
                           int timepoint,
                           float newMin,
-                          float newMax
-                          )
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_intensityRescale_core<unsigned char>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_intensityRescale_core<char>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_intensityRescale_core<unsigned short>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_intensityRescale_core<short>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_intensityRescale_core<unsigned int>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_intensityRescale_core<int>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_intensityRescale_core<float>(image, timepoint, newMin, newMax);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_intensityRescale_core<double>(image, timepoint, newMin, newMax);
-      break;
-   default:
-      reg_print_fct_error("reg_intensityRescale");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+                          float newMax) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_intensityRescale_core<unsigned char>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_intensityRescale_core<char>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_intensityRescale_core<unsigned short>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_intensityRescale_core<short>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_intensityRescale_core<unsigned int>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_intensityRescale_core<int>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_intensityRescale_core<float>(image, timepoint, newMin, newMax);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_intensityRescale_core<double>(image, timepoint, newMin, newMax);
+        break;
+    default:
+        reg_print_fct_error("reg_intensityRescale");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DTYPE>
-void reg_tools_removeSCLInfo_core(nifti_image *image)
-{
-   if(image->scl_slope==1.f && image->scl_inter==0.f)
-      return;
-   DTYPE *imgPtr = static_cast<DTYPE *>(image->data);
-   for(size_t i=0;i<image->nvox; ++i){
-      *imgPtr=*imgPtr*(DTYPE)image->scl_slope+(DTYPE)image->scl_inter;
-      imgPtr++;
-   }
-   image->scl_slope=1.f;
-   image->scl_inter=0.f;
-}
-/* *************************************************************** */
-void reg_tools_removeSCLInfo(nifti_image *image)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_removeSCLInfo_core<unsigned char>(image);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_removeSCLInfo_core<char>(image);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_removeSCLInfo_core<unsigned short>(image);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_removeSCLInfo_core<short>(image);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_removeSCLInfo_core<unsigned int>(image);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_removeSCLInfo_core<int>(image);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_removeSCLInfo_core<float>(image);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_removeSCLInfo_core<double>(image);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_removeSCLInfo");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-void reg_getRealImageSpacing(nifti_image *image,
-                             float *spacingValues)
-{
-   float indexVoxel1[3]= {0,0,0};
-   float indexVoxel2[3], realVoxel1[3], realVoxel2[3];
-   reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1);
-
-   indexVoxel2[1]=indexVoxel2[2]=0;
-   indexVoxel2[0]=1;
-   reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-   spacingValues[0]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2]));
-
-   indexVoxel2[0]=indexVoxel2[2]=0;
-   indexVoxel2[1]=1;
-   reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-   spacingValues[1]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2]));
-
-   if(image->nz>1)
-   {
-      indexVoxel2[0]=indexVoxel2[1]=0;
-      indexVoxel2[2]=1;
-      reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-      spacingValues[2]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2]));
-   }
+void reg_tools_removeSCLInfo_core(nifti_image *image) {
+    if (image->scl_slope == 1.f && image->scl_inter == 0.f)
+        return;
+    DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+    for (size_t i = 0; i < image->nvox; ++i) {
+        *imgPtr = *imgPtr * (DTYPE)image->scl_slope + (DTYPE)image->scl_inter;
+        imgPtr++;
+    }
+    image->scl_slope = 1.f;
+    image->scl_inter = 0.f;
+}
+/* *************************************************************** */
+void reg_tools_removeSCLInfo(nifti_image *image) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_removeSCLInfo_core<unsigned char>(image);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_removeSCLInfo_core<char>(image);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_removeSCLInfo_core<unsigned short>(image);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_removeSCLInfo_core<short>(image);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_removeSCLInfo_core<unsigned int>(image);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_removeSCLInfo_core<int>(image);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_removeSCLInfo_core<float>(image);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_removeSCLInfo_core<double>(image);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_removeSCLInfo");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
+void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) {
+    float indexVoxel1[3] = {0, 0, 0};
+    float indexVoxel2[3], realVoxel1[3], realVoxel2[3];
+    reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1);
+
+    indexVoxel2[1] = indexVoxel2[2] = 0;
+    indexVoxel2[0] = 1;
+    reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+    spacingValues[0] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+
+    indexVoxel2[0] = indexVoxel2[2] = 0;
+    indexVoxel2[1] = 1;
+    reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+    spacingValues[1] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+
+    if (image->nz > 1) {
+        indexVoxel2[0] = indexVoxel2[1] = 0;
+        indexVoxel2[2] = 1;
+        reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+        spacingValues[2] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+    }
+}
 /* *************************************************************** */
 //this function will threshold an image to the values provided,
 //set the scl_slope and sct_inter of the image to 1 and 0 (SSD uses actual image data values),
 //and sets cal_min and cal_max to have the min/max image data values
-template<class T,class DTYPE>
-void reg_thresholdImage2(nifti_image *image,
-                         T lowThr,
-                         T upThr
-                         )
-{
-   DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
-   T currentMin=std::numeric_limits<T>::max();
-   T currentMax=-std::numeric_limits<T>::max();
-
-   if(image->scl_slope==0)image->scl_slope=1.0;
-
-   for(unsigned int index=0; index<image->nvox; index++)
-   {
-      T value = (T)(*imagePtr * image->scl_slope + image->scl_inter);
-      if(value==value)
-      {
-         if(value<lowThr)
-         {
-            value = lowThr;
-         }
-         else if(value>upThr)
-         {
-            value = upThr;
-         }
-         currentMin=(currentMin<value)?currentMin:value;
-         currentMax=(currentMax>value)?currentMax:value;
-      }
-      *imagePtr++=(DTYPE)value;
-   }
-
-   image->cal_min = currentMin;
-   image->cal_max = currentMax;
+template<class T, class DTYPE>
+void reg_thresholdImage2(nifti_image *image, T lowThr, T upThr) {
+    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+    T currentMin = std::numeric_limits<T>::max();
+    T currentMax = -std::numeric_limits<T>::max();
+
+    if (image->scl_slope == 0)image->scl_slope = 1.0;
+
+    for (unsigned int index = 0; index < image->nvox; index++) {
+        T value = (T)(*imagePtr * image->scl_slope + image->scl_inter);
+        if (value == value) {
+            if (value < lowThr) {
+                value = lowThr;
+            } else if (value > upThr) {
+                value = upThr;
+            }
+            currentMin = (currentMin < value) ? currentMin : value;
+            currentMax = (currentMax > value) ? currentMax : value;
+        }
+        *imagePtr++ = (DTYPE)value;
+    }
+
+    image->cal_min = currentMin;
+    image->cal_max = currentMax;
 }
 /* *************************************************************** */
 template<class T>
-void reg_thresholdImage(nifti_image *image,
-                        T lowThr,
-                        T upThr
-                        )
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_thresholdImage2<T,unsigned char>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_thresholdImage2<T,char>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_thresholdImage2<T,unsigned short>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_thresholdImage2<T,short>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_thresholdImage2<T,unsigned int>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_thresholdImage2<T,int>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_thresholdImage2<T,float>(image, lowThr, upThr);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_thresholdImage2<T,double>(image, lowThr, upThr);
-      break;
-   default:
-      reg_print_fct_error("reg_thresholdImage");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-}
-template void reg_thresholdImage<float>(nifti_image *, float, float);
-template void reg_thresholdImage<double>(nifti_image *, double, double);
-/* *************************************************************** */
+void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_thresholdImage2<T, unsigned char>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_thresholdImage2<T, char>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_thresholdImage2<T, unsigned short>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_thresholdImage2<T, short>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_thresholdImage2<T, unsigned int>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_thresholdImage2<T, int>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_thresholdImage2<T, float>(image, lowThr, upThr);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_thresholdImage2<T, double>(image, lowThr, upThr);
+        break;
+    default:
+        reg_print_fct_error("reg_thresholdImage");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
+template void reg_thresholdImage<float>(nifti_image*, float, float);
+template void reg_thresholdImage<double>(nifti_image*, double, double);
 /* *************************************************************** */
 template <class PrecisionTYPE, class DTYPE>
-PrecisionTYPE reg_getMaximalLength2D(nifti_image *image)
-{
-   DTYPE *dataPtrX = static_cast<DTYPE *>(image->data);
-   DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz];
-
-   PrecisionTYPE max=0;
-
-   for(int i=0; i<image->nx*image->ny*image->nz; i++)
-   {
-      PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
-      PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
-      PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX*valX + valY*valY));
-      max = (length>max)?length:max;
-   }
-   return max;
+PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) {
+    const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
+    const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz];
+    PrecisionTYPE max = 0;
+    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
+        PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
+        PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
+        PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY));
+        max = (length > max) ? length : max;
+    }
+    return max;
 }
 /* *************************************************************** */
 template <class PrecisionTYPE, class DTYPE>
-PrecisionTYPE reg_getMaximalLength3D(nifti_image *image)
-{
-   DTYPE *dataPtrX = static_cast<DTYPE *>(image->data);
-   DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz];
-   DTYPE *dataPtrZ = &dataPtrY[image->nx*image->ny*image->nz];
-
-   PrecisionTYPE max=0;
-
-   for(int i=0; i<image->nx*image->ny*image->nz; i++)
-   {
-      PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
-      PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
-      PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++);
-      PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX*valX + valY*valY + valZ*valZ));
-      max = (length>max)?length:max;
-   }
-   return max;
+PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) {
+    const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
+    const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz];
+    const DTYPE *dataPtrZ = &dataPtrY[image->nx * image->ny * image->nz];
+    PrecisionTYPE max = 0;
+    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
+        PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
+        PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
+        PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++);
+        PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY + valZ * valZ));
+        max = (length > max) ? length : max;
+    }
+    return max;
 }
 /* *************************************************************** */
 template <class PrecisionTYPE>
-PrecisionTYPE reg_getMaximalLength(nifti_image *image)
-{
-   if(image->nz==1)
-   {
-      switch(image->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_getMaximalLength2D<PrecisionTYPE,float>(image);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         return reg_getMaximalLength2D<PrecisionTYPE,double>(image);
-         break;
-      }
-   }
-   else
-   {
-      switch(image->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         return reg_getMaximalLength3D<PrecisionTYPE,float>(image);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         return reg_getMaximalLength3D<PrecisionTYPE,double>(image);
-         break;
-      }
-   }
-   return EXIT_SUCCESS;
-}
-/* *************************************************************** */
-template float reg_getMaximalLength<float>(nifti_image *);
-template double reg_getMaximalLength<double>(nifti_image *);
-/* *************************************************************** */
+PrecisionTYPE reg_getMaximalLength(const nifti_image *image) {
+    if (image->nz == 1) {
+        switch (image->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_getMaximalLength2D<PrecisionTYPE, float>(image);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            return reg_getMaximalLength2D<PrecisionTYPE, double>(image);
+            break;
+        }
+    } else {
+        switch (image->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            return reg_getMaximalLength3D<PrecisionTYPE, float>(image);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            return reg_getMaximalLength3D<PrecisionTYPE, double>(image);
+            break;
+        }
+    }
+    return EXIT_SUCCESS;
+}
+template float reg_getMaximalLength<float>(const nifti_image*);
+template double reg_getMaximalLength<double>(const nifti_image*);
 /* *************************************************************** */
 template <class NewTYPE, class DTYPE>
-void reg_tools_changeDatatype1(nifti_image *image,int type)
-{
-   // the initial array is saved and freeed
-   DTYPE *initialValue = (DTYPE *)malloc(image->nvox*sizeof(DTYPE));
-   memcpy(initialValue, image->data, image->nvox*sizeof(DTYPE));
-
-   // the new array is allocated and then filled
-   if(type>-1){
-      image->datatype=type;
-   }
-   else{
-      if(sizeof(NewTYPE)==sizeof(unsigned char)) {
-          image->datatype = NIFTI_TYPE_UINT8;
+void reg_tools_changeDatatype1(nifti_image *image, int type) {
+    // the initial array is saved and freed
+    DTYPE *initialValue = (DTYPE*)malloc(image->nvox * sizeof(DTYPE));
+    memcpy(initialValue, image->data, image->nvox * sizeof(DTYPE));
+
+    // the new array is allocated and then filled
+    if (type > -1) {
+        image->datatype = type;
+    } else {
+        if (sizeof(NewTYPE) == sizeof(unsigned char)) {
+            image->datatype = NIFTI_TYPE_UINT8;
 #ifndef NDEBUG
-    reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8");
+            reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8");
 #endif
-      }
-      else if(sizeof(NewTYPE)==sizeof(float)) {
-          image->datatype = NIFTI_TYPE_FLOAT32;
+        } else if (sizeof(NewTYPE) == sizeof(float)) {
+            image->datatype = NIFTI_TYPE_FLOAT32;
 #ifndef NDEBUG
-    reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32");
+            reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32");
 #endif
-      }
-      else if(sizeof(NewTYPE)==sizeof(double)) {
-          image->datatype = NIFTI_TYPE_FLOAT64;
+        } else if (sizeof(NewTYPE) == sizeof(double)) {
+            image->datatype = NIFTI_TYPE_FLOAT64;
 #ifndef NDEBUG
-    reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64");
+            reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64");
 #endif
-      }
-      else {
-         reg_print_fct_error("reg_tools_changeDatatype1");
-         reg_print_msg_error("Only change to unsigned char, float or double are supported");
-         reg_exit();
-      }
-   }
-   free(image->data);
-   image->nbyper = sizeof(NewTYPE);
-   image->data = (void *)calloc(image->nvox,sizeof(NewTYPE));
-   NewTYPE *dataPtr = static_cast<NewTYPE *>(image->data);
-   for (size_t i = 0; i < image->nvox; i++) {
-       dataPtr[i] = (NewTYPE)(initialValue[i]);
-   }
-
-   free(initialValue);
-   return;
+        } else {
+            reg_print_fct_error("reg_tools_changeDatatype1");
+            reg_print_msg_error("Only change to unsigned char, float or double are supported");
+            reg_exit();
+        }
+    }
+    free(image->data);
+    image->nbyper = sizeof(NewTYPE);
+    image->data = calloc(image->nvox, sizeof(NewTYPE));
+    NewTYPE *dataPtr = static_cast<NewTYPE *>(image->data);
+    for (size_t i = 0; i < image->nvox; i++) {
+        dataPtr[i] = (NewTYPE)(initialValue[i]);
+    }
+
+    free(initialValue);
 }
 /* *************************************************************** */
 template <class NewTYPE>
-void reg_tools_changeDatatype(nifti_image *image, int type)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_changeDatatype1<NewTYPE,unsigned char>(image,type);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_changeDatatype1<NewTYPE,char>(image,type);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_changeDatatype1<NewTYPE,unsigned short>(image,type);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_changeDatatype1<NewTYPE,short>(image,type);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_changeDatatype1<NewTYPE,unsigned int>(image,type);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_changeDatatype1<NewTYPE,int>(image,type);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_changeDatatype1<NewTYPE,float>(image,type);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_changeDatatype1<NewTYPE,double>(image,type);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_changeDatatype");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-template void reg_tools_changeDatatype<unsigned char>(nifti_image *, int);
-template void reg_tools_changeDatatype<unsigned short>(nifti_image *, int);
-template void reg_tools_changeDatatype<unsigned int>(nifti_image *, int);
-template void reg_tools_changeDatatype<char>(nifti_image *, int);
-template void reg_tools_changeDatatype<short>(nifti_image *, int);
-template void reg_tools_changeDatatype<int>(nifti_image *, int);
-template void reg_tools_changeDatatype<float>(nifti_image *, int);
-template void reg_tools_changeDatatype<double>(nifti_image *, int);
-/* *************************************************************** */
+void reg_tools_changeDatatype(nifti_image *image, int type) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_changeDatatype1<NewTYPE, unsigned char>(image, type);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_changeDatatype1<NewTYPE, char>(image, type);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_changeDatatype1<NewTYPE, unsigned short>(image, type);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_changeDatatype1<NewTYPE, short>(image, type);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_changeDatatype1<NewTYPE, unsigned int>(image, type);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_changeDatatype1<NewTYPE, int>(image, type);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_changeDatatype1<NewTYPE, float>(image, type);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_changeDatatype1<NewTYPE, double>(image, type);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_changeDatatype");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+}
+template void reg_tools_changeDatatype<unsigned char>(nifti_image*, int);
+template void reg_tools_changeDatatype<unsigned short>(nifti_image*, int);
+template void reg_tools_changeDatatype<unsigned int>(nifti_image*, int);
+template void reg_tools_changeDatatype<char>(nifti_image*, int);
+template void reg_tools_changeDatatype<short>(nifti_image*, int);
+template void reg_tools_changeDatatype<int>(nifti_image*, int);
+template void reg_tools_changeDatatype<float>(nifti_image*, int);
+template void reg_tools_changeDatatype<double>(nifti_image*, int);
 /* *************************************************************** */
 template <class TYPE1>
-void reg_tools_operationImageToImage(nifti_image *img1,
-                                     nifti_image *img2,
+void reg_tools_operationImageToImage(const nifti_image *img1,
+                                     const nifti_image *img2,
                                      nifti_image *res,
-                                     int type)
-{
-   TYPE1 *img1Ptr = static_cast<TYPE1 *>(img1->data);
-   TYPE1 *resPtr = static_cast<TYPE1 *>(res->data);
-   TYPE1 *img2Ptr = static_cast<TYPE1 *>(img2->data);
+                                     int type) {
+    const TYPE1 *img1Ptr = static_cast<TYPE1*>(img1->data);
+    const TYPE1 *img2Ptr = static_cast<TYPE1*>(img2->data);
+    TYPE1 *resPtr = static_cast<TYPE1*>(res->data);
 
+    const float sclSlope1 = img1->scl_slope == 0 ? 1 : img1->scl_slope;
+    const float sclSlope2 = img2->scl_slope == 0 ? 1 : img2->scl_slope;
 
-   if(img1->scl_slope==0) {
-      img1->scl_slope=1.f;
-   }
-   if(img2->scl_slope==0) {
-       img2->scl_slope=1.f;
-   }
-
-   res->scl_slope=img1->scl_slope;
-   res->scl_inter=img1->scl_inter;
-
+    res->scl_slope = sclSlope1;
+    res->scl_inter = img1->scl_inter;
 
 #ifdef _WIN32
-   long i;
-   long voxelNumber=(long)res->nvox;
+    long i;
+    const long voxelNumber = (long)res->nvox;
 #else
-   size_t i;
-   size_t voxelNumber=res->nvox;
+    size_t i;
+    const size_t voxelNumber = res->nvox;
 #endif
 
-   switch(type)
-   {
-   case 0:
-#if defined (_OPENMP)
+    switch (type) {
+    case 0:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2)
+   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) +
-                              ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) -
-                              (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   case 1:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) +
+                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
+                                 (double)img1->scl_inter) / (double)sclSlope1);
+        break;
+    case 1:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2)
+   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
 #endif // _OPENMP
-       for (i = 0; i < voxelNumber; i++) {
-               resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) -
-                                 ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) -
-                                   (double)img1->scl_inter) / (double)img1->scl_slope);
-       }
-      break;
-   case 2:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++) {
+            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) -
+                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
+                                 (double)img1->scl_inter) / (double)sclSlope1);
+        }
+        break;
+    case 2:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2)
+   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
 #endif // _OPENMP
-       for (i = 0; i < voxelNumber; i++) {
-           resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) *
-               ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) -
-               (double)img1->scl_inter) / (double)img1->scl_slope);
-       }
-      break;
-   case 3:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++) {
+            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) *
+                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
+                                 (double)img1->scl_inter) / (double)sclSlope1);
+        }
+        break;
+    case 3:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2)
+   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) /
-                              ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) -
-                              (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   }
-}
-/* *************************************************************** */
-void reg_tools_addImageToImage(nifti_image *img1,
-                               nifti_image *img2,
-                               nifti_image *res)
-{
-   if(img1->datatype != res->datatype || img2->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_addImageToImage");
-      reg_print_msg_error("Input images are expected to be of the same type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox || img2->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_addImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationImageToImage<char>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationImageToImage<short>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationImageToImage<int>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationImageToImage<float>(img1, img2, res, 0);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationImageToImage<double>(img1, img2, res, 0);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_addImageToImage");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_substractImageToImage(nifti_image *img1,
-                                     nifti_image *img2,
-                                     nifti_image *res)
-{
-   if(img1->datatype != res->datatype || img2->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_substractImageToImage");
-      reg_print_msg_error("Input images are expected to be of the same type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox || img2->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_substractImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationImageToImage<char>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationImageToImage<short>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationImageToImage<int>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationImageToImage<float>(img1, img2, res, 1);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationImageToImage<double>(img1, img2, res, 1);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_substractImageToImage");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_multiplyImageToImage(nifti_image *img1,
-                                    nifti_image *img2,
-                                    nifti_image *res)
-{
-   if(img1->datatype != res->datatype || img2->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_multiplyImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox || img2->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_multiplyImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationImageToImage<char>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationImageToImage<short>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationImageToImage<int>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationImageToImage<float>(img1, img2, res, 2);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationImageToImage<double>(img1, img2, res, 2);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_multiplyImageToImage");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_divideImageToImage(nifti_image *img1,
-                                  nifti_image *img2,
-                                  nifti_image *res)
-{
-   if(img1->datatype != res->datatype || img2->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_divideImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox || img2->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_divideImageToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationImageToImage<char>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationImageToImage<short>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationImageToImage<int>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationImageToImage<float>(img1, img2, res, 3);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationImageToImage<double>(img1, img2, res, 3);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_divideImageToImage");
-      reg_print_msg_error("Unsupported datatype");
-      reg_exit();
-   }
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) /
+                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
+                                 (double)img1->scl_inter) / (double)sclSlope1);
+        break;
+    }
 }
 /* *************************************************************** */
+void reg_tools_addImageToImage(const nifti_image *img1,
+                               const nifti_image *img2,
+                               nifti_image *res) {
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_addImageToImage");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_addImageToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img1->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationImageToImage<char>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationImageToImage<short>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationImageToImage<int>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationImageToImage<float>(img1, img2, res, 0);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationImageToImage<double>(img1, img2, res, 0);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_addImageToImage");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void reg_tools_subtractImageFromImage(const nifti_image *img1,
+                                      const nifti_image *img2,
+                                      nifti_image *res) {
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_subtractImageFromImage");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_subtractImageFromImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img1->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationImageToImage<char>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationImageToImage<short>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationImageToImage<int>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationImageToImage<float>(img1, img2, res, 1);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationImageToImage<double>(img1, img2, res, 1);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_subtractImageFromImage");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void reg_tools_multiplyImageToImage(const nifti_image *img1,
+                                    const nifti_image *img2,
+                                    nifti_image *res) {
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_multiplyImageToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_multiplyImageToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img1->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationImageToImage<char>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationImageToImage<short>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationImageToImage<int>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationImageToImage<float>(img1, img2, res, 2);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationImageToImage<double>(img1, img2, res, 2);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_multiplyImageToImage");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void reg_tools_divideImageToImage(const nifti_image *img1,
+                                  const nifti_image *img2,
+                                  nifti_image *res) {
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_divideImageToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_divideImageToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img1->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationImageToImage<char>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationImageToImage<short>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationImageToImage<int>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationImageToImage<float>(img1, img2, res, 3);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationImageToImage<double>(img1, img2, res, 3);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_divideImageToImage");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class TYPE1>
-void reg_tools_operationValueToImage(nifti_image *img1,
+void reg_tools_operationValueToImage(const nifti_image *img,
                                      nifti_image *res,
                                      float val,
-                                     int type)
-{
-   TYPE1 *img1Ptr = static_cast<TYPE1 *>(img1->data);
-   TYPE1 *resPtr = static_cast<TYPE1 *>(res->data);
+                                     int type) {
+    const TYPE1 *imgPtr = static_cast<TYPE1*>(img->data);
+    TYPE1 *resPtr = static_cast<TYPE1*>(res->data);
 
-   if(img1->scl_slope==0)
-   {
-      img1->scl_slope=1.f;
-   }
+    const float sclSlope = img->scl_slope == 0 ? 1 : img->scl_slope;
 
-   res->scl_slope=img1->scl_slope;
-   res->scl_inter=img1->scl_inter;
+    res->scl_slope = sclSlope;
+    res->scl_inter = img->scl_inter;
 
 #ifdef _WIN32
-   long i;
-   long voxelNumber=(long)res->nvox;
+    long i;
+    const long voxelNumber = (long)res->nvox;
 #else
-   size_t i;
-   size_t voxelNumber=res->nvox;
+    size_t i;
+    const size_t voxelNumber = res->nvox;
 #endif
 
-   switch(type)
-   {
-   case 0:
-#if defined (_OPENMP)
+    switch (type) {
+    case 0:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img1,val)
+   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)(((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) +
-                               (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   case 1:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) +
+                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
+        break;
+    case 1:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img1,val)
+   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)(((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) -
-                               (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   case 2:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) -
+                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
+        break;
+    case 2:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img1,val)
+   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)(((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) *
-                               (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   case 3:
-#if defined (_OPENMP)
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) *
+                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
+        break;
+    case 3:
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img1,val)
+   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
 #endif // _OPENMP
-      for(i=0; i<voxelNumber; i++)
-         resPtr[i] = (TYPE1)(((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) /
-                               (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope);
-      break;
-   }
+        for (i = 0; i < voxelNumber; i++)
+            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) /
+                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
+        break;
+    }
 }
 /* *************************************************************** */
-void reg_tools_addValueToImage(nifti_image *img1,
+void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *res,
-                               float val)
-{
-   if(img1->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_addValueToImage");
-      reg_print_msg_error("Input and output image do not have the same data type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_addValueToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationValueToImage<unsigned char>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationValueToImage<char>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationValueToImage<unsigned short>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationValueToImage<short>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationValueToImage<unsigned int>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationValueToImage<int>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationValueToImage<float>(img1, res, val, 0);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationValueToImage<double>(img1, res, val, 0);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_addValueToImage");
-      reg_print_msg_error("Image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_substractValueToImage(nifti_image *img1,
-                                     nifti_image *res,
-                                     float val)
-{
-   if(img1->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_substractValueToImage");
-      reg_print_msg_error("Input and output image do not have the same data type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_substractValueToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationValueToImage<unsigned char>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationValueToImage<char>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationValueToImage<unsigned short>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationValueToImage<short>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationValueToImage<unsigned int>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationValueToImage<int>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationValueToImage<float>(img1, res, val, 1);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationValueToImage<double>(img1, res, val, 1);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_substractValueToImage");
-      reg_print_msg_error("Image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_multiplyValueToImage(nifti_image *img1,
+                               float val) {
+    if (img->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_addValueToImage");
+        reg_print_msg_error("Input and output image do not have the same data type");
+        reg_exit();
+    }
+    if (img->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_addValueToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationValueToImage<char>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationValueToImage<short>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationValueToImage<int>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationValueToImage<float>(img, res, val, 0);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationValueToImage<double>(img, res, val, 0);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_addValueToImage");
+        reg_print_msg_error("Image data type is not supported");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void reg_tools_subtractValueFromImage(const nifti_image *img,
+                                      nifti_image *res,
+                                      float val) {
+    if (img->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_subtractValueFromImage");
+        reg_print_msg_error("Input and output image do not have the same data type");
+        reg_exit();
+    }
+    if (img->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_subtractValueFromImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationValueToImage<char>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationValueToImage<short>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationValueToImage<int>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationValueToImage<float>(img, res, val, 1);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationValueToImage<double>(img, res, val, 1);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_subtractValueFromImage");
+        reg_print_msg_error("Image data type is not supported");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *res,
-                                    float val)
-{
-   if(img1->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_multiplyValueToImage");
-      reg_print_msg_error("Input and output image do not have the same data type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_multiplyValueToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationValueToImage<unsigned char>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationValueToImage<char>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationValueToImage<unsigned short>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationValueToImage<short>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationValueToImage<unsigned int>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationValueToImage<int>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationValueToImage<float>(img1, res, val, 2);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationValueToImage<double>(img1, res, val, 2);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_multiplyValueToImage");
-      reg_print_msg_error("Image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void reg_tools_divideValueToImage(nifti_image *img1,
-                                  nifti_image *res,
-                                  float val)
-{
-   if(img1->datatype != res->datatype)
-   {
-      reg_print_fct_error("reg_tools_divideValueToImage");
-      reg_print_msg_error("Input and output image do not have the same data type");
-      reg_exit();
-   }
-   if(img1->nvox != res->nvox)
-   {
-      reg_print_fct_error("reg_tools_divideValueToImage");
-      reg_print_msg_error("Input images are expected to have the same size");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_operationValueToImage<unsigned char>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_operationValueToImage<char>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_operationValueToImage<unsigned short>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_operationValueToImage<short>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_operationValueToImage<unsigned int>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_operationValueToImage<int>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_operationValueToImage<float>(img1, res, val, 3);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_operationValueToImage<double>(img1, res, val, 3);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_divideValueToImage");
-      reg_print_msg_error("Image data type is not supported");
-      reg_exit();
-   }
+                                    float val) {
+    if (img->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_multiplyValueToImage");
+        reg_print_msg_error("Input and output image do not have the same data type");
+        reg_exit();
+    }
+    if (img->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_multiplyValueToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationValueToImage<char>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationValueToImage<short>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationValueToImage<int>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationValueToImage<float>(img, res, val, 2);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationValueToImage<double>(img, res, val, 2);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_multiplyValueToImage");
+        reg_print_msg_error("Image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
+void reg_tools_divideValueToImage(const nifti_image *img,
+                                  nifti_image *res,
+                                  float val) {
+    if (img->datatype != res->datatype) {
+        reg_print_fct_error("reg_tools_divideValueToImage");
+        reg_print_msg_error("Input and output image do not have the same data type");
+        reg_exit();
+    }
+    if (img->nvox != res->nvox) {
+        reg_print_fct_error("reg_tools_divideValueToImage");
+        reg_print_msg_error("Input images are expected to have the same size");
+        reg_exit();
+    }
+    switch (img->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_operationValueToImage<char>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_operationValueToImage<short>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_operationValueToImage<int>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_operationValueToImage<float>(img, res, val, 3);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_operationValueToImage<double>(img, res, val, 3);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_divideValueToImage");
+        reg_print_msg_error("Image data type is not supported");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
 void reg_tools_kernelConvolution_core(nifti_image *image,
@@ -1104,169 +988,142 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                                       int kernelType,
                                       int *mask,
                                       bool *timePoint,
-                                      bool *axis)
-{
-   if(image->nx>2048 || image->ny>2048 || image->nz>2048){
-      reg_print_fct_error("reg_tools_kernelConvolution_core");
-      reg_print_msg_error("This function does not support images with dimension > 2048");
-      reg_exit();
-   }
+                                      bool *axis) {
+    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) {
+        reg_print_fct_error("reg_tools_kernelConvolution_core");
+        reg_print_msg_error("This function does not support images with dimension > 2048");
+        reg_exit();
+    }
 #ifdef WIN32
-   long index;
-   long voxelNumber = (long)image->nx*image->ny*image->nz;
+    long index;
+    const long voxelNumber = long(image->nx * image->ny * image->nz);
 #else
-   size_t index;
-   size_t voxelNumber = (size_t)image->nx*image->ny*image->nz;
+    size_t index;
+    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
 #endif
-   DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
-   int imageDim[3]= {image->nx,image->ny,image->nz};
-
-   bool *nanImagePtr = (bool *)calloc(voxelNumber, sizeof(bool));
-   float *densityPtr = (float *)calloc(voxelNumber, sizeof(float));
-
-   // Loop over the dimension higher than 3
-   for(int t=0; t<image->nt*image->nu; t++)
-   {
-      if(timePoint[t])
-      {
-         DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
-#if defined (_OPENMP)
+    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+    int imageDim[3] = {image->nx, image->ny, image->nz};
+
+    bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool));
+    float *densityPtr = (float*)calloc(voxelNumber, sizeof(float));
+
+    // Loop over the dimension higher than 3
+    for (int t = 0; t < image->nt * image->nu; t++) {
+        if (timePoint[t]) {
+            DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \
    private(index)
 #endif
-         for(index=0; index<voxelNumber; index++)
-         {
-            densityPtr[index] = (intensityPtr[index]==intensityPtr[index])?1:0;
-            densityPtr[index] *= (mask[index]>=0)?1:0;
-            nanImagePtr[index] = static_cast<bool>(densityPtr[index]);
-            if(nanImagePtr[index]==0)
-               intensityPtr[index]=static_cast<DTYPE>(0);
-         }
-         // Loop over the x, y and z dimensions
-         for(int n=0; n<3; n++)
-         {
-            if(axis[n] && image->dim[n]>1)
-            {
-               double temp;
-               if(sigma[t]>0) temp=sigma[t]/image->pixdim[n+1]; // mm to voxel
-               else temp=fabs(sigma[t]); // voxel based if negative value
-               int radius=0;
-               // Define the kernel size
-               if(kernelType==MEAN_KERNEL || kernelType==LINEAR_KERNEL)
-               {
-                  // Mean  or linear filtering
-                  radius = static_cast<int>(temp);
-               }
-               else if(kernelType==GAUSSIAN_KERNEL)
-               {
-                  // Gaussian kernel
-                  radius=static_cast<int>(temp*3.0f);
-               }
-               else if(kernelType==CUBIC_SPLINE_KERNEL)
-               {
-                  // Spline kernel
-                  radius=static_cast<int>(temp*2.0f);
-               }
-               else{
-                  reg_print_fct_error("reg_tools_kernelConvolution_core");
-                  reg_print_msg_error("Unknown kernel type");
-                  reg_exit();
-               }
-               if(radius>0)
-               {
-                  // Allocate the kernel
-                  float kernel[4096];
-                  double kernelSum=0;
-                  // Fill the kernel
-                  if(kernelType==CUBIC_SPLINE_KERNEL)
-                  {
-                     // Compute the Cubic Spline kernel
-                     for(int i=-radius; i<=radius; i++)
-                     {
-                        // temp contains the kernel node spacing
-                        double relative = (double)(fabs((double)(double)i/(double)temp));
-                        if(relative<1.0) kernel[i+radius] = (float)(2.0/3.0 - relative*relative + 0.5*relative*relative*relative);
-                        else if (relative<2.0) kernel[i+radius] = (float)(-(relative-2.0)*(relative-2.0)*(relative-2.0)/6.0);
-                        else kernel[i+radius]=0;
-                        kernelSum += kernel[i+radius];
-                     }
-                  }
-                  else if(kernelType==GAUSSIAN_KERNEL)
-                  {
-                     // Compute the Gaussian kernel
-                     for(int i=-radius; i<=radius; i++)
-                     {
-                        // 2.506... = sqrt(2*pi)
-                        // temp contains the sigma in voxel
-                        kernel[radius+i]=static_cast<float>(exp(-(double)(i*i)/(2.0*reg_pow2(temp))) /
-                                                            (temp*2.506628274631));
-                        kernelSum += kernel[radius+i];
-                     }
-                  }
-                  else if(kernelType==LINEAR_KERNEL)
-                  {
-                     // Compute the linear kernel
-                     for(int i=-radius; i<=radius; i++)
-                     {
-                        kernel[radius+i]= 1.f - fabs(static_cast<float>(i)/static_cast<float>(radius));
-                        kernelSum += kernel[radius+i];
-                     }
-                  }
-                  else if(kernelType==MEAN_KERNEL && imageDim[2]==1)
-                  {
-                     // Compute the mean kernel
-                     for(int i=-radius; i<=radius; i++)
-                     {
-                        kernel[radius+i]= 1.f;
-                        kernelSum += kernel[radius+i];
-                     }
-                  }
-                  // No kernel is required for the mean filtering
-                  // No need for kernel normalisation as this is handle by the density function
+            for (index = 0; index < voxelNumber; index++) {
+                densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1 : 0;
+                densityPtr[index] *= (mask[index] >= 0) ? 1 : 0;
+                nanImagePtr[index] = static_cast<bool>(densityPtr[index]);
+                if (nanImagePtr[index] == 0)
+                    intensityPtr[index] = static_cast<DTYPE>(0);
+            }
+            // Loop over the x, y and z dimensions
+            for (int n = 0; n < 3; n++) {
+                if (axis[n] && image->dim[n] > 1) {
+                    double temp;
+                    if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel
+                    else temp = fabs(sigma[t]); // voxel based if negative value
+                    int radius = 0;
+                    // Define the kernel size
+                    if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) {
+                        // Mean  or linear filtering
+                        radius = static_cast<int>(temp);
+                    } else if (kernelType == GAUSSIAN_KERNEL) {
+                        // Gaussian kernel
+                        radius = static_cast<int>(temp * 3.0f);
+                    } else if (kernelType == CUBIC_SPLINE_KERNEL) {
+                        // Spline kernel
+                        radius = static_cast<int>(temp * 2.0f);
+                    } else {
+                        reg_print_fct_error("reg_tools_kernelConvolution_core");
+                        reg_print_msg_error("Unknown kernel type");
+                        reg_exit();
+                    }
+                    if (radius > 0) {
+                        // Allocate the kernel
+                        float kernel[4096];
+                        double kernelSum = 0;
+                        // Fill the kernel
+                        if (kernelType == CUBIC_SPLINE_KERNEL) {
+                            // Compute the Cubic Spline kernel
+                            for (int i = -radius; i <= radius; i++) {
+                                // temp contains the kernel node spacing
+                                double relative = fabs(i / temp);
+                                if (relative < 1.0) kernel[i + radius] = static_cast<float>(2.0 / 3.0 - relative * relative + 0.5 * relative * relative * relative);
+                                else if (relative < 2.0) kernel[i + radius] = static_cast<float>(-(relative - 2.0) * (relative - 2.0) * (relative - 2.0) / 6.0);
+                                else kernel[i + radius] = 0;
+                                kernelSum += kernel[i + radius];
+                            }
+                        } else if (kernelType == GAUSSIAN_KERNEL) {
+                            // Compute the Gaussian kernel
+                            for (int i = -radius; i <= radius; i++) {
+                                // 2.506... = sqrt(2*pi)
+                                // temp contains the sigma in voxel
+                                kernel[radius + i] = static_cast<float>(exp(-(i * i) / (2.0 * reg_pow2(temp))) / (temp * 2.506628274631));
+                                kernelSum += kernel[radius + i];
+                            }
+                        } else if (kernelType == LINEAR_KERNEL) {
+                            // Compute the linear kernel
+                            for (int i = -radius; i <= radius; i++) {
+                                kernel[radius + i] = 1.f - fabs(i / static_cast<float>(radius));
+                                kernelSum += kernel[radius + i];
+                            }
+                        } else if (kernelType == MEAN_KERNEL && imageDim[2] == 1) {
+                            // Compute the mean kernel
+                            for (int i = -radius; i <= radius; i++) {
+                                kernel[radius + i] = 1.f;
+                                kernelSum += kernel[radius + i];
+                            }
+                        }
+                        // No kernel is required for the mean filtering
+                        // No need for kernel normalisation as this is handle by the density function
 #ifndef NDEBUG
-                  char text[255];
-                  sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum);
-                  reg_print_msg_debug(text);
+                        char text[255];
+                        sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum);
+                        reg_print_msg_debug(text);
 #endif
-                  int planeNumber, planeIndex, lineOffset;
-                  int lineIndex, shiftPre, shiftPst, k;
-                  switch(n)
-                  {
-                  case 0:
-                     planeNumber=imageDim[1]*imageDim[2];
-                     lineOffset  = 1;
-                     break;
-                  case 1:
-                     planeNumber = imageDim[0]*imageDim[2];
-                     lineOffset  = imageDim[0];
-                     break;
-                  case 2:
-                     planeNumber = imageDim[0]*imageDim[1];
-                     lineOffset  = planeNumber;
-                     break;
-                  }
-
-                  size_t realIndex;
-                  float *kernelPtr, kernelValue;
-                  double densitySum, intensitySum;
-                  DTYPE *currentIntensityPtr=nullptr;
-                  float *currentDensityPtr = nullptr;
-                  DTYPE bufferIntensity[2048];
-                  float bufferDensity[2048];
-                  double bufferIntensitycur=0;
-                  double bufferDensitycur=0;
+                        int planeNumber, planeIndex, lineOffset;
+                        int lineIndex, shiftPre, shiftPst, k;
+                        switch (n) {
+                        case 0:
+                            planeNumber = imageDim[1] * imageDim[2];
+                            lineOffset = 1;
+                            break;
+                        case 1:
+                            planeNumber = imageDim[0] * imageDim[2];
+                            lineOffset = imageDim[0];
+                            break;
+                        case 2:
+                            planeNumber = imageDim[0] * imageDim[1];
+                            lineOffset = planeNumber;
+                            break;
+                        }
+
+                        size_t realIndex;
+                        float *kernelPtr, kernelValue;
+                        double densitySum, intensitySum;
+                        DTYPE *currentIntensityPtr = nullptr;
+                        float *currentDensityPtr = nullptr;
+                        DTYPE bufferIntensity[2048];
+                        float bufferDensity[2048];
+                        double bufferIntensitycur = 0;
+                        double bufferDensitycur = 0;
 
 #ifdef _USE_SSE
-                  union
-                  {
-                     __m128 m;
-                     float f[4] ;
-                  } intensity_sum_sse, density_sum_sse;
-                  __m128 kernel_sse, intensity_sse, density_sse;
+                        union {
+                            __m128 m;
+                            float f[4];
+                        } intensity_sum_sse, density_sum_sse;
+                        __m128 kernel_sse, intensity_sse, density_sse;
 #endif
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
    shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \
@@ -1284,170 +1141,144 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
    k, bufferIntensitycur,bufferDensitycur, planeIndex)
 #endif
 #endif // _OPENMP
-                  // Loop over the different voxel
-                  for(planeIndex=0; planeIndex<planeNumber; ++planeIndex)
-                  {
-
-                     switch(n)
-                     {
-                     case 0:
-                        realIndex = planeIndex * imageDim[0];
-                        break;
-                     case 1:
-                        realIndex = (planeIndex/imageDim[0]) *
-                              imageDim[0]*imageDim[1] +
-                              planeIndex%imageDim[0];
-                        break;
-                     case 2:
-                        realIndex = planeIndex;
-                        break;
-                     default:
-                        realIndex=0;
-                     }
-                     // Fetch the current line into a stack buffer
-                     currentIntensityPtr= &intensityPtr[realIndex];
-                     currentDensityPtr  = &densityPtr[realIndex];
-                     for(lineIndex=0; lineIndex<imageDim[n]; ++lineIndex)
-                     {
-                        bufferIntensity[lineIndex] = *currentIntensityPtr;
-                        bufferDensity[lineIndex]   = *currentDensityPtr;
-                        currentIntensityPtr       += lineOffset;
-                        currentDensityPtr         += lineOffset;
-                     }
-                     if(kernelSum>0)
-                     {
-                        // Perform the kernel convolution along 1 line
-                        for(lineIndex=0; lineIndex<imageDim[n]; ++lineIndex)
-                        {
-                           // Define the kernel boundaries
-                           shiftPre = lineIndex - radius;
-                           shiftPst = lineIndex + radius + 1;
-                           if(shiftPre<0)
-                           {
-                              kernelPtr = &kernel[-shiftPre];
-                              shiftPre=0;
-                           }
-                           else kernelPtr = &kernel[0];
-                           if(shiftPst>imageDim[n]) shiftPst=imageDim[n];
-                           // Set the current values to zero
-                           // Increment the current value by performing the weighted sum
+                        // Loop over the different voxel
+                        for (planeIndex = 0; planeIndex < planeNumber; ++planeIndex) {
+                            switch (n) {
+                            case 0:
+                                realIndex = planeIndex * imageDim[0];
+                                break;
+                            case 1:
+                                realIndex = (planeIndex / imageDim[0]) *
+                                    imageDim[0] * imageDim[1] +
+                                    planeIndex % imageDim[0];
+                                break;
+                            case 2:
+                                realIndex = planeIndex;
+                                break;
+                            default:
+                                realIndex = 0;
+                            }
+                            // Fetch the current line into a stack buffer
+                            currentIntensityPtr = &intensityPtr[realIndex];
+                            currentDensityPtr = &densityPtr[realIndex];
+                            for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) {
+                                bufferIntensity[lineIndex] = *currentIntensityPtr;
+                                bufferDensity[lineIndex] = *currentDensityPtr;
+                                currentIntensityPtr += lineOffset;
+                                currentDensityPtr += lineOffset;
+                            }
+                            if (kernelSum > 0) {
+                                // Perform the kernel convolution along 1 line
+                                for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) {
+                                    // Define the kernel boundaries
+                                    shiftPre = lineIndex - radius;
+                                    shiftPst = lineIndex + radius + 1;
+                                    if (shiftPre < 0) {
+                                        kernelPtr = &kernel[-shiftPre];
+                                        shiftPre = 0;
+                                    } else kernelPtr = &kernel[0];
+                                    if (shiftPst > imageDim[n]) shiftPst = imageDim[n];
+                                    // Set the current values to zero
+                                    // Increment the current value by performing the weighted sum
 #ifdef _USE_SSE
-                           intensity_sum_sse.m = _mm_set_ps1(0);
-                           density_sum_sse.m = _mm_set_ps1(0);
-                           k=shiftPre;
-                           while(k<shiftPst-3)
-                           {
-                              kernel_sse = _mm_set_ps(kernelPtr[0], kernelPtr[1], kernelPtr[2], kernelPtr[3]);
-                              kernelPtr+=4;
-                              intensity_sse = _mm_set_ps(static_cast<float>(bufferIntensity[k]),
-                                                         static_cast<float>(bufferIntensity[k+1]),
-                                                         static_cast<float>(bufferIntensity[k+2]),
-                                                         static_cast<float>(bufferIntensity[k+3]));
-                              density_sse = _mm_set_ps(bufferDensity[k],
-                                    bufferDensity[k+1],
-                                    bufferDensity[k+2],
-                                    bufferDensity[k+3]);
-                              k+=4;
-                              intensity_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, intensity_sse), intensity_sum_sse.m);
-                              density_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, density_sse), density_sum_sse.m);
-                           }
+                                    intensity_sum_sse.m = _mm_set_ps1(0);
+                                    density_sum_sse.m = _mm_set_ps1(0);
+                                    k = shiftPre;
+                                    while (k < shiftPst - 3) {
+                                        kernel_sse = _mm_set_ps(kernelPtr[0], kernelPtr[1], kernelPtr[2], kernelPtr[3]);
+                                        kernelPtr += 4;
+                                        intensity_sse = _mm_set_ps(static_cast<float>(bufferIntensity[k]),
+                                                                   static_cast<float>(bufferIntensity[k + 1]),
+                                                                   static_cast<float>(bufferIntensity[k + 2]),
+                                                                   static_cast<float>(bufferIntensity[k + 3]));
+                                        density_sse = _mm_set_ps(bufferDensity[k],
+                                                                 bufferDensity[k + 1],
+                                                                 bufferDensity[k + 2],
+                                                                 bufferDensity[k + 3]);
+                                        k += 4;
+                                        intensity_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, intensity_sse), intensity_sum_sse.m);
+                                        density_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, density_sse), density_sum_sse.m);
+                                    }
 #ifdef __SSE3__
-                           intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, density_sum_sse.m);
-                           intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, intensity_sum_sse.m);
-                           intensitySum = intensity_sum_sse.f[0];
-                           densitySum = intensity_sum_sse.f[1];
+                                    intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, density_sum_sse.m);
+                                    intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, intensity_sum_sse.m);
+                                    intensitySum = intensity_sum_sse.f[0];
+                                    densitySum = intensity_sum_sse.f[1];
 #else
-                           intensitySum = intensity_sum_sse.f[0] + intensity_sum_sse.f[1] + intensity_sum_sse.f[2] + intensity_sum_sse.f[3];
-                           densitySum = density_sum_sse.f[0] + density_sum_sse.f[1] + density_sum_sse.f[2] + density_sum_sse.f[3];
+                                    intensitySum = intensity_sum_sse.f[0] + intensity_sum_sse.f[1] + intensity_sum_sse.f[2] + intensity_sum_sse.f[3];
+                                    densitySum = density_sum_sse.f[0] + density_sum_sse.f[1] + density_sum_sse.f[2] + density_sum_sse.f[3];
 #endif
-                           while(k<shiftPst)
-                           {
-                              kernelValue   = *kernelPtr++;
-                              intensitySum +=  kernelValue * bufferIntensity[k];
-                              densitySum   +=  kernelValue * bufferDensity[k++];
-                           }
+                                    while (k < shiftPst) {
+                                        kernelValue = *kernelPtr++;
+                                        intensitySum += kernelValue * bufferIntensity[k];
+                                        densitySum += kernelValue * bufferDensity[k++];
+                                    }
 #else
-                           intensitySum=0;
-                           densitySum=0;
-                           for(k=shiftPre; k<shiftPst; ++k)
-                           {
-                              kernelValue   = *kernelPtr++;
-                              intensitySum +=  kernelValue * bufferIntensity[k];
-                              densitySum   +=  kernelValue * bufferDensity[k];
-                           }
+                                    intensitySum = 0;
+                                    densitySum = 0;
+                                    for (k = shiftPre; k < shiftPst; ++k) {
+                                        kernelValue = *kernelPtr++;
+                                        intensitySum += kernelValue * bufferIntensity[k];
+                                        densitySum += kernelValue * bufferDensity[k];
+                                    }
 #endif
-                           // Store the computed value inplace
-                           intensityPtr[realIndex] = static_cast<DTYPE>(intensitySum);
-                           densityPtr[realIndex] = static_cast<float>(densitySum);
-                           realIndex += lineOffset;
-                        } // line convolution
-                     } // kernel sum
-                     else
-                     {
-                        for(lineIndex=1; lineIndex<imageDim[n]; ++lineIndex)
-                        {
-                           bufferIntensity[lineIndex]+=bufferIntensity[lineIndex-1];
-                           bufferDensity[lineIndex]+=bufferDensity[lineIndex-1];
-                        }
-                        shiftPre = -radius - 1;
-                        shiftPst = radius;
-                        for(lineIndex=0; lineIndex<imageDim[n]; ++lineIndex,++shiftPre,++shiftPst)
-                        {
-                           if(shiftPre>-1)
-                           {
-                              if(shiftPst<imageDim[n])
-                              {
-                                 bufferIntensitycur = bufferIntensity[shiftPre]-bufferIntensity[shiftPst];
-                                 bufferDensitycur = bufferDensity[shiftPre]-bufferDensity[shiftPst];
-                              }
-                              else
-                              {
-                                 bufferIntensitycur = bufferIntensity[shiftPre]-bufferIntensity[imageDim[n]-1];
-                                 bufferDensitycur = bufferDensity[shiftPre]-bufferDensity[imageDim[n]-1];
-                              }
-                           }
-                           else
-                           {
-                              if(shiftPst<imageDim[n])
-                              {
-                                 bufferIntensitycur = -bufferIntensity[shiftPst];
-                                 bufferDensitycur = -bufferDensity[shiftPst];
-                              }
-                              else{
-                                 bufferIntensitycur = 0;
-                                 bufferDensitycur = 0;
-                              }
-                           }
-                           intensityPtr[realIndex]=static_cast<DTYPE>(bufferIntensitycur);
-                           densityPtr[realIndex]=static_cast<float>(bufferDensitycur);
-
-                           realIndex += lineOffset;
-                        } // line convolution of mean filter
-                     } // No kernel computation
-                  } // pixel in starting plane
-               } // radius > 0
-            } // active axis
-         } // axes
-         // Normalise per timepoint
-#if defined (_OPENMP)
+                                    // Store the computed value inplace
+                                    intensityPtr[realIndex] = static_cast<DTYPE>(intensitySum);
+                                    densityPtr[realIndex] = static_cast<float>(densitySum);
+                                    realIndex += lineOffset;
+                                } // line convolution
+                            } // kernel sum
+                            else {
+                                for (lineIndex = 1; lineIndex < imageDim[n]; ++lineIndex) {
+                                    bufferIntensity[lineIndex] += bufferIntensity[lineIndex - 1];
+                                    bufferDensity[lineIndex] += bufferDensity[lineIndex - 1];
+                                }
+                                shiftPre = -radius - 1;
+                                shiftPst = radius;
+                                for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex, ++shiftPre, ++shiftPst) {
+                                    if (shiftPre > -1) {
+                                        if (shiftPst < imageDim[n]) {
+                                            bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst];
+                                            bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[shiftPst];
+                                        } else {
+                                            bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[imageDim[n] - 1];
+                                            bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[imageDim[n] - 1];
+                                        }
+                                    } else {
+                                        if (shiftPst < imageDim[n]) {
+                                            bufferIntensitycur = -bufferIntensity[shiftPst];
+                                            bufferDensitycur = -bufferDensity[shiftPst];
+                                        } else {
+                                            bufferIntensitycur = 0;
+                                            bufferDensitycur = 0;
+                                        }
+                                    }
+                                    intensityPtr[realIndex] = static_cast<DTYPE>(bufferIntensitycur);
+                                    densityPtr[realIndex] = static_cast<float>(bufferDensitycur);
+
+                                    realIndex += lineOffset;
+                                } // line convolution of mean filter
+                            } // No kernel computation
+                        } // pixel in starting plane
+                    } // radius > 0
+                } // active axis
+            } // axes
+            // Normalise per timepoint
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) \
    private(index)
 #endif
-         for(index=0; index<voxelNumber; ++index)
-         {
-            if(nanImagePtr[index]!=0)
-               intensityPtr[index] = static_cast<DTYPE>((float)intensityPtr[index]/densityPtr[index]);
-            else intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
-         }
-      } // check if the time point is active
-   } // loop over the time points
-   free(nanImagePtr);
-   free(densityPtr);
+            for (index = 0; index < voxelNumber; ++index) {
+                if (nanImagePtr[index] != 0)
+                    intensityPtr[index] = static_cast<DTYPE>((float)intensityPtr[index] / densityPtr[index]);
+                else intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
+            }
+        } // check if the time point is active
+    } // loop over the time points
+    free(nanImagePtr);
+    free(densityPtr);
 }
-
-
-/* *************************************************************** */
 /* *************************************************************** */
 template <class DTYPE>
 void reg_tools_labelKernelConvolution_core(nifti_image *image,
@@ -1455,1863 +1286,1571 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                                            float varianceY,
                                            float varianceZ,
                                            int *mask,
-                                           bool *timePoint)
-{
-   if(image->nx>2048 || image->ny>2048 || image->nz>2048){
-      reg_print_fct_error("reg_tools_labelKernelConvolution_core");
-      reg_print_msg_error("This function does not support images with dimension > 2048");
-      reg_exit();
-   }
+                                           bool *timePoint) {
+    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) {
+        reg_print_fct_error("reg_tools_labelKernelConvolution_core");
+        reg_print_msg_error("This function does not support images with dimension > 2048");
+        reg_exit();
+    }
 #ifdef WIN32
-   long index;
-   long voxelNumber = (long)image->nx*image->ny*image->nz;
+    long index;
+    const long voxelNumber = long(image->nx * image->ny * image->nz);
 #else
-   size_t index;
-   size_t voxelNumber = (size_t)image->nx*image->ny*image->nz;
+    size_t index;
+    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
 #endif
-   DTYPE *imagePtr = static_cast<DTYPE *>(image->data);
-
-   bool * activeTimePoint = (bool *)calloc(image->nt*image->nu,sizeof(bool));
-   // Check if input time points and masks are nullptr
-   if(timePoint==nullptr)
-   {
-      // All time points are considered as active
-      for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=true;
-   }
-   else for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=timePoint[i];
-
-   int *currentMask=nullptr;
-   if(mask==nullptr)
-   {
-      currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int));
-   }
-   else currentMask=mask;
-
-
-   bool *nanImagePtr = (bool *)calloc(voxelNumber, sizeof(bool));
-   DTYPE *tmpImagePtr = (DTYPE *)calloc(voxelNumber, sizeof(DTYPE));
-
-   typedef std::map <DTYPE, float> DataPointMap;
-   typedef std::pair <DTYPE, float> DataPointPair;
-   typedef typename std::map<DTYPE,float>::iterator DataPointMapIt;
-
-   // Loop over the dimension higher than 3
-   for(int t=0; t<image->nt*image->nu; t++)
-   {
-      if(activeTimePoint[t])
-      {
-         DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
-         for(index=0; index<voxelNumber; index++)
-         {
-            nanImagePtr[index] = (intensityPtr[index]==intensityPtr[index])?true:false;
-            nanImagePtr[index] = (currentMask[index]>=0)?nanImagePtr[index]:false;
-         }
-         float gaussX_var=varianceX;
-         float gaussY_var=varianceY;
-         float gaussZ_var=varianceZ;
-         index=0;
-         int currentXYZposition[3]={0};
-         int dim_array[3]= {image->nx,image->ny,image->nz};
-         int shiftdirection[3]= {1,image->nx,image->nx*image->ny};
-
-         int kernelXsize, kernelXshift, shiftXstart, shiftXstop;
-         int kernelYsize, kernelYshift, shiftYstart, shiftYstop;
-         int kernelZsize, kernelZshift, shiftZstart, shiftZstop;
-         int shiftx, shifty, shiftz;
-         int indexNeighbour;
-         float kernelval;
-         DTYPE maxindex;
-         double maxval;
-         DataPointMapIt location, currIterator;
-         DataPointMap tmp_lab;
-
-         for(int currentZposition=0; currentZposition<dim_array[2]; currentZposition++)
-         {
-            currentXYZposition[2]=currentZposition;
-            for(currentXYZposition[1]=0; currentXYZposition[1]<dim_array[1]; currentXYZposition[1]++)
-            {
-               for(currentXYZposition[0]=0; currentXYZposition[0]<dim_array[0]; currentXYZposition[0]++)
-               {
-
-                  tmp_lab.clear();
-                  index=currentXYZposition[0]+(currentXYZposition[1]+currentXYZposition[2]*dim_array[1])*dim_array[0];
-
-                  // Calculate allowed kernel shifts
-                  kernelXsize=(int)(sqrtf(gaussX_var)*6.0f) % 2 != 0 ?
-                           (int)(sqrtf(gaussX_var)*6.0f) : (int)(sqrtf(gaussX_var)*6.0f)+1;
-                  kernelXshift=(int)(kernelXsize/2.0f);
-                  shiftXstart=((currentXYZposition[0]<kernelXshift)?
-                           -currentXYZposition[0]:-kernelXshift);
-                  shiftXstop=((currentXYZposition[0]>=(dim_array[0]-kernelXshift))?
-                           (int)dim_array[0]-currentXYZposition[0]-1:kernelXshift);
-
-                  kernelYsize=(int)(sqrtf(gaussY_var)*6.0f) % 2 != 0 ?
-                           (int)(sqrtf(gaussY_var)*6.0f) : (int)(sqrtf(gaussY_var)*6.0f)+1;
-                  kernelYshift=(int)(kernelYsize/2.0f);
-                  shiftYstart=((currentXYZposition[1]<kernelYshift)?
-                           -currentXYZposition[1]:-kernelYshift);
-                  shiftYstop=((currentXYZposition[1]>=(dim_array[1]-kernelYshift))?
-                           (int)dim_array[1]-currentXYZposition[1]-1:kernelYshift);
-
-                  kernelZsize=(int)(sqrtf(gaussZ_var)*6.0f) % 2 != 0 ?
-                           (int)(sqrtf(gaussZ_var)*6.0f) : (int)(sqrtf(gaussZ_var)*6.0f)+1;
-                  kernelZshift=(int)(kernelZsize/2.0f);
-                  shiftZstart=((currentXYZposition[2]<kernelZshift)?
-                           -currentXYZposition[2]:-kernelZshift);
-                  shiftZstop=((currentXYZposition[2]>=(dim_array[2]-kernelZshift))?
-                           (int)dim_array[2]-currentXYZposition[2]-1:kernelZshift);
-
-                  if(nanImagePtr[index]!=0){
-                     for(shiftx=shiftXstart; shiftx<=shiftXstop; shiftx++)
-                     {
-                        for(shifty=shiftYstart; shifty<=shiftYstop; shifty++)
-                        {
-                           for(shiftz=shiftZstart; shiftz<=shiftZstop; shiftz++)
-                           {
-
-                              // Data Blur
-                              indexNeighbour=index+(shiftx*shiftdirection[0])+
-                                    (shifty*shiftdirection[1])+(shiftz*shiftdirection[2]);
-                              if(nanImagePtr[indexNeighbour]!=0){
-                                 kernelval=expf((float)(-0.5f *(powf(shiftx,2)/gaussX_var
-                                                                +powf(shifty,2)/gaussY_var
-                                                                +powf(shiftz,2)/gaussZ_var
-                                                                )))/
-                                       (sqrtf(2.0f*3.14159265*powf(gaussX_var*gaussY_var*gaussZ_var, 2)));
-
-                                 location=tmp_lab.find(intensityPtr[indexNeighbour]);
-                                 if(location!=tmp_lab.end())
-                                 {
-                                    location->second=location->second+kernelval;
-                                 }
-                                 else
-                                 {
-                                    tmp_lab.insert(DataPointPair(intensityPtr[indexNeighbour],kernelval));
-                                 }
-                              }
-                           }
-                        }
-                     }
-                     currIterator = tmp_lab.begin();
-                     maxindex=0;
-                     maxval=-std::numeric_limits<float>::max();;
-                     while(currIterator != tmp_lab.end())
-                     {
-                        if(currIterator->second>maxval)
-                        {
-                           maxindex=currIterator->first;
-                           maxval=currIterator->second;
+    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+
+    bool *activeTimePoint = (bool*)calloc(image->nt * image->nu, sizeof(bool));
+    // Check if input time points and masks are nullptr
+    if (timePoint == nullptr) {
+        // All time points are considered as active
+        for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true;
+    } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i];
+
+    int *currentMask = nullptr;
+    if (mask == nullptr) {
+        currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int));
+    } else currentMask = mask;
+
+
+    bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool));
+    DTYPE *tmpImagePtr = (DTYPE*)calloc(voxelNumber, sizeof(DTYPE));
+
+    typedef std::map<DTYPE, float> DataPointMap;
+    typedef std::pair<DTYPE, float> DataPointPair;
+    typedef typename std::map<DTYPE, float>::iterator DataPointMapIt;
+
+    // Loop over the dimension higher than 3
+    for (int t = 0; t < image->nt * image->nu; t++) {
+        if (activeTimePoint[t]) {
+            DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
+            for (index = 0; index < voxelNumber; index++) {
+                nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false;
+                nanImagePtr[index] = (currentMask[index] >= 0) ? nanImagePtr[index] : false;
+            }
+            float gaussX_var = varianceX;
+            float gaussY_var = varianceY;
+            float gaussZ_var = varianceZ;
+            index = 0;
+            int currentXYZposition[3] = {0};
+            int dim_array[3] = {image->nx, image->ny, image->nz};
+            int shiftdirection[3] = {1, image->nx, image->nx * image->ny};
+
+            int kernelXsize, kernelXshift, shiftXstart, shiftXstop;
+            int kernelYsize, kernelYshift, shiftYstart, shiftYstop;
+            int kernelZsize, kernelZshift, shiftZstart, shiftZstop;
+            int shiftx, shifty, shiftz;
+            int indexNeighbour;
+            float kernelval;
+            DTYPE maxindex;
+            double maxval;
+            DataPointMapIt location, currIterator;
+            DataPointMap tmp_lab;
+
+            for (int currentZposition = 0; currentZposition < dim_array[2]; currentZposition++) {
+                currentXYZposition[2] = currentZposition;
+                for (currentXYZposition[1] = 0; currentXYZposition[1] < dim_array[1]; currentXYZposition[1]++) {
+                    for (currentXYZposition[0] = 0; currentXYZposition[0] < dim_array[0]; currentXYZposition[0]++) {
+
+                        tmp_lab.clear();
+                        index = currentXYZposition[0] + (currentXYZposition[1] + currentXYZposition[2] * dim_array[1]) * dim_array[0];
+
+                        // Calculate allowed kernel shifts
+                        kernelXsize = (int)(sqrtf(gaussX_var) * 6.0f) % 2 != 0 ?
+                            (int)(sqrtf(gaussX_var) * 6.0f) : (int)(sqrtf(gaussX_var) * 6.0f) + 1;
+                        kernelXshift = (int)(kernelXsize / 2.0f);
+                        shiftXstart = ((currentXYZposition[0] < kernelXshift) ?
+                                       -currentXYZposition[0] : -kernelXshift);
+                        shiftXstop = ((currentXYZposition[0] >= (dim_array[0] - kernelXshift)) ?
+                                      (int)dim_array[0] - currentXYZposition[0] - 1 : kernelXshift);
+
+                        kernelYsize = (int)(sqrtf(gaussY_var) * 6.0f) % 2 != 0 ?
+                            (int)(sqrtf(gaussY_var) * 6.0f) : (int)(sqrtf(gaussY_var) * 6.0f) + 1;
+                        kernelYshift = (int)(kernelYsize / 2.0f);
+                        shiftYstart = ((currentXYZposition[1] < kernelYshift) ?
+                                       -currentXYZposition[1] : -kernelYshift);
+                        shiftYstop = ((currentXYZposition[1] >= (dim_array[1] - kernelYshift)) ?
+                                      (int)dim_array[1] - currentXYZposition[1] - 1 : kernelYshift);
+
+                        kernelZsize = (int)(sqrtf(gaussZ_var) * 6.0f) % 2 != 0 ?
+                            (int)(sqrtf(gaussZ_var) * 6.0f) : (int)(sqrtf(gaussZ_var) * 6.0f) + 1;
+                        kernelZshift = (int)(kernelZsize / 2.0f);
+                        shiftZstart = ((currentXYZposition[2] < kernelZshift) ?
+                                       -currentXYZposition[2] : -kernelZshift);
+                        shiftZstop = ((currentXYZposition[2] >= (dim_array[2] - kernelZshift)) ?
+                                      (int)dim_array[2] - currentXYZposition[2] - 1 : kernelZshift);
+
+                        if (nanImagePtr[index] != 0) {
+                            for (shiftx = shiftXstart; shiftx <= shiftXstop; shiftx++) {
+                                for (shifty = shiftYstart; shifty <= shiftYstop; shifty++) {
+                                    for (shiftz = shiftZstart; shiftz <= shiftZstop; shiftz++) {
+
+                                        // Data Blur
+                                        indexNeighbour = index + (shiftx * shiftdirection[0]) +
+                                            (shifty * shiftdirection[1]) + (shiftz * shiftdirection[2]);
+                                        if (nanImagePtr[indexNeighbour] != 0) {
+                                            kernelval = expf((float)(-0.5f * (powf(shiftx, 2) / gaussX_var
+                                                                              + powf(shifty, 2) / gaussY_var
+                                                                              + powf(shiftz, 2) / gaussZ_var))) /
+                                                (sqrtf(2.0f * 3.14159265 * powf(gaussX_var * gaussY_var * gaussZ_var, 2)));
+
+                                            location = tmp_lab.find(intensityPtr[indexNeighbour]);
+                                            if (location != tmp_lab.end()) {
+                                                location->second = location->second + kernelval;
+                                            } else {
+                                                tmp_lab.insert(DataPointPair(intensityPtr[indexNeighbour], kernelval));
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            currIterator = tmp_lab.begin();
+                            maxindex = 0;
+                            maxval = -std::numeric_limits<float>::max();;
+                            while (currIterator != tmp_lab.end()) {
+                                if (currIterator->second > maxval) {
+                                    maxindex = currIterator->first;
+                                    maxval = currIterator->second;
+                                }
+                                currIterator++;
+                            }
+                            tmpImagePtr[index] = maxindex;
+                        } else {
+                            tmpImagePtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
                         }
-                        currIterator++;
-                     }
-                     tmpImagePtr[index]=maxindex;
-                  }
-                  else{
-                     tmpImagePtr[index]=std::numeric_limits<DTYPE>::quiet_NaN();
-                  }
-               }
+                    }
+                }
+            }
+            // Normalise per timepoint
+            for (index = 0; index < voxelNumber; ++index) {
+                if (nanImagePtr[index] == 0)
+                    intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
+                else
+                    intensityPtr[index] = tmpImagePtr[index];
             }
-         }
-         // Normalise per timepoint
-         for(index=0; index<voxelNumber; ++index)
-         {
-            if(nanImagePtr[index]==0)
-               intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
-            else
-               intensityPtr[index]=tmpImagePtr[index];
-         }
-      } // check if the time point is active
-   } // loop over the time points
+        } // check if the time point is active
+    } // loop over the time points
 
-   free(tmpImagePtr);
-   free(currentMask);
-   free(activeTimePoint);
-   free(nanImagePtr);
+    free(tmpImagePtr);
+    free(currentMask);
+    free(activeTimePoint);
+    free(nanImagePtr);
 }
 /* *************************************************************** */
-
 void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceX,
                                       float varianceY,
                                       float varianceZ,
                                       int *mask,
-                                      bool *timePoint){
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_labelKernelConvolution_core<unsigned char>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_labelKernelConvolution_core<char>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_labelKernelConvolution_core<unsigned short>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_labelKernelConvolution_core<short>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_labelKernelConvolution_core<unsigned int>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_labelKernelConvolution_core<int>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_labelKernelConvolution_core<float>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_labelKernelConvolution_core<double>
-            (image,varianceX,varianceY,varianceZ,mask,timePoint);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_labelKernelConvolution");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-   return;
+                                      bool *timePoint) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_labelKernelConvolution_core<unsigned char>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_labelKernelConvolution_core<char>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_labelKernelConvolution_core<unsigned short>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_labelKernelConvolution_core<short>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_labelKernelConvolution_core<unsigned int>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_labelKernelConvolution_core<int>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_labelKernelConvolution_core<float>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_labelKernelConvolution_core<double>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_labelKernelConvolution");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-
 void reg_tools_kernelConvolution(nifti_image *image,
                                  float *sigma,
                                  int kernelType,
                                  int *mask,
                                  bool *timePoint,
-                                 bool *axis)
-{
-
-
-   if(image->nt<=0) image->nt=image->dim[4]=1;
-   if(image->nu<=0) image->nu=image->dim[5]=1;
-
-   bool *axisToSmooth = new bool[3];
-   bool *activeTimePoint = new bool[image->nt*image->nu];
-   if(axis==nullptr)
-   {
-      // All axis are smoothed by default
-      for(int i=0; i<3; i++) axisToSmooth[i]=true;
-   }
-   else for(int i=0; i<3; i++) axisToSmooth[i]=axis[i];
-
-   if(timePoint==nullptr)
-   {
-      // All time points are considered as active
-      for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=true;
-   }
-   else for(int i=0; i<image->nt*image->nu; i++) activeTimePoint[i]=timePoint[i];
-
-   int *currentMask=nullptr;
-   if(mask==nullptr)
-   {
-      currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int));
-   }
-   else currentMask=mask;
-
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_kernelConvolution_core<float>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_kernelConvolution_core<double>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_kernelConvolution");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-
-   if(mask==nullptr) free(currentMask);
-   delete []axisToSmooth;
-   delete []activeTimePoint;
+                                 bool *axis) {
+    if (image->nt <= 0) image->nt = image->dim[4] = 1;
+    if (image->nu <= 0) image->nu = image->dim[5] = 1;
+
+    bool *axisToSmooth = new bool[3];
+    bool *activeTimePoint = new bool[image->nt * image->nu];
+    if (axis == nullptr) {
+        // All axis are smoothed by default
+        for (int i = 0; i < 3; i++) axisToSmooth[i] = true;
+    } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
+
+    if (timePoint == nullptr) {
+        // All time points are considered as active
+        for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true;
+    } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i];
+
+    int *currentMask = nullptr;
+    if (mask == nullptr) {
+        currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int));
+    } else currentMask = mask;
+
+    switch (image->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_kernelConvolution_core<float>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_kernelConvolution_core<double>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_kernelConvolution");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+
+    if (mask == nullptr) free(currentMask);
+    delete[] axisToSmooth;
+    delete[] activeTimePoint;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class PrecisionTYPE, class ImageTYPE>
-void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis)
-{
-   if(type==1)
-   {
-      /* the input image is first smooth */
-      float *sigma=new float[image->nt];
-      for(int i=0; i<image->nt; ++i) sigma[i]=-0.7355f;
-      reg_tools_kernelConvolution(image,sigma,GAUSSIAN_KERNEL);
-      delete []sigma;
-   }
-
-   /* the values are copied */
-   ImageTYPE *oldValues = (ImageTYPE *)malloc(image->nvox * image->nbyper);
-   ImageTYPE *imagePtr = static_cast<ImageTYPE *>(image->data);
-   memcpy(oldValues, imagePtr, image->nvox*image->nbyper);
-   free(image->data);
-
-   // Keep the previous real to voxel qform
-   mat44 real2Voxel_qform;
-   for(int i=0; i<4; i++)
-   {
-      for(int j=0; j<4; j++)
-      {
-         real2Voxel_qform.m[i][j]=image->qto_ijk.m[i][j];
-      }
-   }
-
-   // Update the axis dimension
-   int oldDim[4];
-   for(int i=1; i<4; i++)
-   {
-      oldDim[i]=image->dim[i];
-      if(image->dim[i]>1 && downsampleAxis[i]) image->dim[i]=static_cast<int>(reg_ceil(image->dim[i]/2.0));
-      if(image->pixdim[i]>0 && downsampleAxis[i]) image->pixdim[i]=image->pixdim[i]*2.0f;
-   }
-   image->nx=image->dim[1];
-   image->ny=image->dim[2];
-   image->nz=image->dim[3];
-   image->dx=image->pixdim[1];
-   image->dy=image->pixdim[2];
-   image->dz=image->pixdim[3];
-   if(image->nt<1 || image->dim[4]<1) image->nt=image->dim[4]=1;
-   if(image->nu<1 || image->dim[5]<1) image->nu=image->dim[5]=1;
-   if(image->nv<1 || image->dim[6]<1) image->nv=image->dim[6]=1;
-   if(image->nw<1 || image->dim[7]<1) image->nw=image->dim[7]=1;
-
-   // update the qform matrix
-   image->qto_xyz=nifti_quatern_to_mat44(image->quatern_b,
-                                         image->quatern_c,
-                                         image->quatern_d,
-                                         image->qoffset_x,
-                                         image->qoffset_y,
-                                         image->qoffset_z,
-                                         image->dx,
-                                         image->dy,
-                                         image->dz,
-                                         image->qfac);
-   image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
-
-   // update the sform matrix
-   if(downsampleAxis[1])
-   {
-      image->sto_xyz.m[0][0] *= 2.f;
-      image->sto_xyz.m[1][0] *= 2.f;
-      image->sto_xyz.m[2][0] *= 2.f;
-   }
-   if(downsampleAxis[2])
-   {
-      image->sto_xyz.m[0][1] *= 2.f;
-      image->sto_xyz.m[1][1] *= 2.f;
-      image->sto_xyz.m[2][1] *= 2.f;
-   }
-   if(downsampleAxis[3])
-   {
-      image->sto_xyz.m[0][2] *= 2.f;
-      image->sto_xyz.m[1][2] *= 2.f;
-      image->sto_xyz.m[2][2] *= 2.f;
-   }
-   float origin_sform[3]= {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]};
-   image->sto_xyz.m[0][3]=origin_sform[0];
-   image->sto_xyz.m[1][3]=origin_sform[1];
-   image->sto_xyz.m[2][3]=origin_sform[2];
-   image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
-
-   // Reallocate the image
-   image->nvox =
-         (size_t)image->nx*
-         (size_t)image->ny*
-         (size_t)image->nz*
-         (size_t)image->nt*
-         (size_t)image->nu*
-         (size_t)image->nv*
-         (size_t)image->nw;
-   image->data=(void *)calloc(image->nvox, image->nbyper);
-   imagePtr = static_cast<ImageTYPE *>(image->data);
-
-   PrecisionTYPE real[3];
-   ImageTYPE intensity;
-   int position[3];
-
-   // qform is used for resampling
-   for(size_t tuvw=0; tuvw<(size_t)image->nt*image->nu*image->nv*image->nw; tuvw++)
-   {
-      ImageTYPE *valuesPtrTUVW = &oldValues[tuvw*oldDim[1]*oldDim[2]*oldDim[3]];
-      for(int z=0; z<image->nz; z++)
-      {
-         for(int y=0; y<image->ny; y++)
-         {
-            for(int x=0; x<image->nx; x++)
-            {
-               // Extract the voxel coordinate in mm
-               real[0]=x*image->qto_xyz.m[0][0] +
-                     y*image->qto_xyz.m[0][1] +
-                     z*image->qto_xyz.m[0][2] +
-                     image->qto_xyz.m[0][3];
-               real[1]=x*image->qto_xyz.m[1][0] +
-                     y*image->qto_xyz.m[1][1] +
-                     z*image->qto_xyz.m[1][2] +
-                     image->qto_xyz.m[1][3];
-               real[2]=x*image->qto_xyz.m[2][0] +
-                     y*image->qto_xyz.m[2][1] +
-                     z*image->qto_xyz.m[2][2] +
-                     image->qto_xyz.m[2][3];
-               // Extract the position in voxel in the old image;
-               position[0]=(int)reg_round(real[0]*real2Voxel_qform.m[0][0] + real[1]*real2Voxel_qform.m[0][1] + real[2]*real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
-               position[1]=(int)reg_round(real[0]*real2Voxel_qform.m[1][0] + real[1]*real2Voxel_qform.m[1][1] + real[2]*real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
-               position[2]=(int)reg_round(real[0]*real2Voxel_qform.m[2][0] + real[1]*real2Voxel_qform.m[2][1] + real[2]*real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
-               if(oldDim[3]==1) position[2]=0;
-               // Nearest neighboor is used as downsampling ratio is constant
-               intensity=std::numeric_limits<ImageTYPE>::quiet_NaN();
-               if(-1<position[0] && position[0]<oldDim[1] &&
-                     -1<position[1] && position[1]<oldDim[2] &&
-                     -1<position[2] && position[2]<oldDim[3])
-               {
-                  intensity = valuesPtrTUVW[(position[2]*oldDim[2]+position[1])*oldDim[1]+position[0]];
-               }
-               *imagePtr=intensity;
-               imagePtr++;
+void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
+    if (type == 1) {
+        /* the input image is first smooth */
+        float *sigma = new float[image->nt];
+        for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f;
+        reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL);
+        delete[]sigma;
+    }
+
+    /* the values are copied */
+    ImageTYPE *oldValues = (ImageTYPE*)malloc(image->nvox * image->nbyper);
+    ImageTYPE *imagePtr = static_cast<ImageTYPE*>(image->data);
+    memcpy(oldValues, imagePtr, image->nvox * image->nbyper);
+    free(image->data);
+
+    // Keep the previous real to voxel qform
+    mat44 real2Voxel_qform;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            real2Voxel_qform.m[i][j] = image->qto_ijk.m[i][j];
+        }
+    }
+
+    // Update the axis dimension
+    int oldDim[4];
+    for (int i = 1; i < 4; i++) {
+        oldDim[i] = image->dim[i];
+        if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = static_cast<int>(reg_ceil(image->dim[i] / 2.0));
+        if (image->pixdim[i] > 0 && downsampleAxis[i]) image->pixdim[i] = image->pixdim[i] * 2.0f;
+    }
+    image->nx = image->dim[1];
+    image->ny = image->dim[2];
+    image->nz = image->dim[3];
+    image->dx = image->pixdim[1];
+    image->dy = image->pixdim[2];
+    image->dz = image->pixdim[3];
+    if (image->nt < 1 || image->dim[4] < 1) image->nt = image->dim[4] = 1;
+    if (image->nu < 1 || image->dim[5] < 1) image->nu = image->dim[5] = 1;
+    if (image->nv < 1 || image->dim[6] < 1) image->nv = image->dim[6] = 1;
+    if (image->nw < 1 || image->dim[7] < 1) image->nw = image->dim[7] = 1;
+
+    // update the qform matrix
+    image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b,
+                                            image->quatern_c,
+                                            image->quatern_d,
+                                            image->qoffset_x,
+                                            image->qoffset_y,
+                                            image->qoffset_z,
+                                            image->dx,
+                                            image->dy,
+                                            image->dz,
+                                            image->qfac);
+    image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
+
+    // update the sform matrix
+    if (downsampleAxis[1]) {
+        image->sto_xyz.m[0][0] *= 2.f;
+        image->sto_xyz.m[1][0] *= 2.f;
+        image->sto_xyz.m[2][0] *= 2.f;
+    }
+    if (downsampleAxis[2]) {
+        image->sto_xyz.m[0][1] *= 2.f;
+        image->sto_xyz.m[1][1] *= 2.f;
+        image->sto_xyz.m[2][1] *= 2.f;
+    }
+    if (downsampleAxis[3]) {
+        image->sto_xyz.m[0][2] *= 2.f;
+        image->sto_xyz.m[1][2] *= 2.f;
+        image->sto_xyz.m[2][2] *= 2.f;
+    }
+    float origin_sform[3] = {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]};
+    image->sto_xyz.m[0][3] = origin_sform[0];
+    image->sto_xyz.m[1][3] = origin_sform[1];
+    image->sto_xyz.m[2][3] = origin_sform[2];
+    image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
+
+    // Reallocate the image
+    image->nvox =
+        (size_t)image->nx *
+        (size_t)image->ny *
+        (size_t)image->nz *
+        (size_t)image->nt *
+        (size_t)image->nu *
+        (size_t)image->nv *
+        (size_t)image->nw;
+    image->data = calloc(image->nvox, image->nbyper);
+    imagePtr = static_cast<ImageTYPE*>(image->data);
+
+    PrecisionTYPE real[3];
+    ImageTYPE intensity;
+    int position[3];
+
+    // qform is used for resampling
+    for (size_t tuvw = 0; tuvw < (size_t)image->nt * image->nu * image->nv * image->nw; tuvw++) {
+        ImageTYPE *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]];
+        for (int z = 0; z < image->nz; z++) {
+            for (int y = 0; y < image->ny; y++) {
+                for (int x = 0; x < image->nx; x++) {
+                    // Extract the voxel coordinate in mm
+                    real[0] = x * image->qto_xyz.m[0][0] +
+                        y * image->qto_xyz.m[0][1] +
+                        z * image->qto_xyz.m[0][2] +
+                        image->qto_xyz.m[0][3];
+                    real[1] = x * image->qto_xyz.m[1][0] +
+                        y * image->qto_xyz.m[1][1] +
+                        z * image->qto_xyz.m[1][2] +
+                        image->qto_xyz.m[1][3];
+                    real[2] = x * image->qto_xyz.m[2][0] +
+                        y * image->qto_xyz.m[2][1] +
+                        z * image->qto_xyz.m[2][2] +
+                        image->qto_xyz.m[2][3];
+                    // Extract the position in voxel in the old image;
+                    position[0] = (int)reg_round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
+                    position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
+                    position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
+                    if (oldDim[3] == 1) position[2] = 0;
+                    // Nearest neighboor is used as downsampling ratio is constant
+                    intensity = std::numeric_limits<ImageTYPE>::quiet_NaN();
+                    if (-1 < position[0] && position[0] < oldDim[1] &&
+                        -1 < position[1] && position[1] < oldDim[2] &&
+                        -1 < position[2] && position[2] < oldDim[3]) {
+                        intensity = valuesPtrTUVW[(position[2] * oldDim[2] + position[1]) * oldDim[1] + position[0]];
+                    }
+                    *imagePtr = intensity;
+                    imagePtr++;
+                }
             }
-         }
-      }
-   }
-   free(oldValues);
+        }
+    }
+    free(oldValues);
 }
 /* *************************************************************** */
 template <class PrecisionTYPE>
-void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_downsampleImage1<PrecisionTYPE,unsigned char>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_downsampleImage1<PrecisionTYPE,char>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_downsampleImage1<PrecisionTYPE,unsigned short>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_downsampleImage1<PrecisionTYPE,short>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_downsampleImage1<PrecisionTYPE,unsigned int>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_downsampleImage1<PrecisionTYPE,int>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_downsampleImage1<PrecisionTYPE,float>(image, type, downsampleAxis);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_downsampleImage1<PrecisionTYPE,double>(image, type, downsampleAxis);
-      break;
-   default:
-      reg_print_fct_error("reg_downsampleImage");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-}
-template void reg_downsampleImage<float>(nifti_image *, int, bool *);
-template void reg_downsampleImage<double>(nifti_image *, int, bool *);
-/* *************************************************************** */
+void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_downsampleImage1<PrecisionTYPE, unsigned char>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_downsampleImage1<PrecisionTYPE, char>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_downsampleImage1<PrecisionTYPE, unsigned short>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_downsampleImage1<PrecisionTYPE, short>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_downsampleImage1<PrecisionTYPE, unsigned int>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_downsampleImage1<PrecisionTYPE, int>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_downsampleImage1<PrecisionTYPE, float>(image, type, downsampleAxis);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_downsampleImage1<PrecisionTYPE, double>(image, type, downsampleAxis);
+        break;
+    default:
+        reg_print_fct_error("reg_downsampleImage");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
+template void reg_downsampleImage<float>(nifti_image*, int, bool*);
+template void reg_downsampleImage<double>(nifti_image*, int, bool*);
 /* *************************************************************** */
 template <class DTYPE>
-void reg_tools_binarise_image1(nifti_image *image)
-{
-   DTYPE *dataPtr=static_cast<DTYPE *>(image->data);
-   image->scl_inter=0.f;
-   image->scl_slope=1.f;
-   for(size_t i=0; i<image->nvox; i++)
-   {
-      *dataPtr = (*dataPtr)!=0?(DTYPE)1:(DTYPE)0;
-      dataPtr++;
-   }
-}
-/* *************************************************************** */
-void reg_tools_binarise_image(nifti_image *image)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_binarise_image1<unsigned char>(image);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_binarise_image1<char>(image);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_binarise_image1<unsigned short>(image);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_binarise_image1<short>(image);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_binarise_image1<unsigned int>(image);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_binarise_image1<int>(image);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_binarise_image1<float>(image);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_binarise_image1<double>(image);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_binarise_image");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+void reg_tools_binarise_image1(nifti_image *image) {
+    DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
+    image->scl_inter = 0.f;
+    image->scl_slope = 1.f;
+    for (size_t i = 0; i < image->nvox; i++) {
+        *dataPtr = (*dataPtr) != 0 ? (DTYPE)1 : (DTYPE)0;
+        dataPtr++;
+    }
 }
 /* *************************************************************** */
+void reg_tools_binarise_image(nifti_image *image) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_binarise_image1<unsigned char>(image);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_binarise_image1<char>(image);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_binarise_image1<unsigned short>(image);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_binarise_image1<short>(image);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_binarise_image1<unsigned int>(image);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_binarise_image1<int>(image);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_binarise_image1<float>(image);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_binarise_image1<double>(image);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_binarise_image");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-void reg_tools_binarise_image1(nifti_image *image, float threshold)
-{
-   DTYPE *dataPtr=static_cast<DTYPE *>(image->data);
-   for(size_t i=0; i<image->nvox; i++)
-   {
-      *dataPtr = (*dataPtr)<threshold?(DTYPE)0:(DTYPE)1;
-      dataPtr++;
-   }
-}
-/* *************************************************************** */
-void reg_tools_binarise_image(nifti_image *image, float threshold)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_binarise_image1<unsigned char>(image, threshold);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_binarise_image1<char>(image, threshold);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_binarise_image1<unsigned short>(image, threshold);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_binarise_image1<short>(image, threshold);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_binarise_image1<unsigned int>(image, threshold);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_binarise_image1<int>(image, threshold);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_binarise_image1<float>(image, threshold);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_binarise_image1<double>(image, threshold);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_binarise_image");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+void reg_tools_binarise_image1(nifti_image *image, float threshold) {
+    DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
+    for (size_t i = 0; i < image->nvox; i++) {
+        *dataPtr = (*dataPtr) < threshold ? (DTYPE)0 : (DTYPE)1;
+        dataPtr++;
+    }
 }
 /* *************************************************************** */
+void reg_tools_binarise_image(nifti_image *image, float threshold) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_binarise_image1<unsigned char>(image, threshold);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_binarise_image1<char>(image, threshold);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_binarise_image1<unsigned short>(image, threshold);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_binarise_image1<short>(image, threshold);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_binarise_image1<unsigned int>(image, threshold);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_binarise_image1<int>(image, threshold);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_binarise_image1<float>(image, threshold);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_binarise_image1<double>(image, threshold);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_binarise_image");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-void reg_tools_binaryImage2int1(nifti_image *image, int *array, int &activeVoxelNumber)
-{
-   // Active voxel are different from -1
-   activeVoxelNumber=0;
-   DTYPE *dataPtr=static_cast<DTYPE *>(image->data);
-   for(int i=0; i<image->nx*image->ny*image->nz; i++)
-   {
-      if(*dataPtr++ != 0)
-      {
-         array[i]=1;
-         activeVoxelNumber++;
-      }
-      else
-      {
-         array[i]=-1;
-      }
-   }
-}
-/* *************************************************************** */
-void reg_tools_binaryImage2int(nifti_image *image, int *array, int &activeVoxelNumber)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_binaryImage2int1<unsigned char>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_binaryImage2int1<char>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_binaryImage2int1<unsigned short>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_binaryImage2int1<short>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_binaryImage2int1<unsigned int>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_binaryImage2int1<int>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_binaryImage2int1<float>(image, array, activeVoxelNumber);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_binaryImage2int1<double>(image, array, activeVoxelNumber);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_binaryImage2int");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class ATYPE,class BTYPE>
-double reg_tools_getMeanRMS2(nifti_image *imageA, nifti_image *imageB)
-{
-   ATYPE *imageAPtrX = static_cast<ATYPE *>(imageA->data);
-   BTYPE *imageBPtrX = static_cast<BTYPE *>(imageB->data);
-   ATYPE *imageAPtrY=nullptr;
-   BTYPE *imageBPtrY=nullptr;
-   ATYPE *imageAPtrZ=nullptr;
-   BTYPE *imageBPtrZ=nullptr;
-   if(imageA->dim[5]>1)
-   {
-      imageAPtrY = &imageAPtrX[imageA->nx*imageA->ny*imageA->nz];
-      imageBPtrY = &imageBPtrX[imageA->nx*imageA->ny*imageA->nz];
-   }
-   if(imageA->dim[5]>2)
-   {
-      imageAPtrZ = &imageAPtrY[imageA->nx*imageA->ny*imageA->nz];
-      imageBPtrZ = &imageBPtrY[imageA->nx*imageA->ny*imageA->nz];
-   }
-   double sum=0.0f;
-   double rms;
-   double diff;
-   for(int i=0; i<imageA->nx*imageA->ny*imageA->nz; i++)
-   {
-      diff = (double)*imageAPtrX++ - (double)*imageBPtrX++;
-      rms = diff * diff;
-      if(imageA->dim[5]>1)
-      {
-         diff = (double)*imageAPtrY++ - (double)*imageBPtrY++;
-         rms += diff * diff;
-      }
-      if(imageA->dim[5]>2)
-      {
-         diff = (double)*imageAPtrZ++ - (double)*imageBPtrZ++;
-         rms += diff * diff;
-      }
-      if(rms==rms)
-         sum += sqrt(rms);
-   }
-   return sum/(double)(imageA->nx*imageA->ny*imageA->nz);
+void reg_tools_binaryImage2int1(const nifti_image *image, int *array, int& activeVoxelNumber) {
+    // Active voxel are different from -1
+    activeVoxelNumber = 0;
+    const DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
+    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
+        if (*dataPtr++ != 0) {
+            array[i] = 1;
+            activeVoxelNumber++;
+        } else {
+            array[i] = -1;
+        }
+    }
+}
+/* *************************************************************** */
+void reg_tools_binaryImage2int(const nifti_image *image, int *array, int& activeVoxelNumber) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_binaryImage2int1<unsigned char>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_binaryImage2int1<char>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_binaryImage2int1<unsigned short>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_binaryImage2int1<short>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_binaryImage2int1<unsigned int>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_binaryImage2int1<int>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_binaryImage2int1<float>(image, array, activeVoxelNumber);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_binaryImage2int1<double>(image, array, activeVoxelNumber);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_binaryImage2int");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+template <class ATYPE, class BTYPE>
+double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) {
+    const ATYPE *imageAPtrX = static_cast<ATYPE*>(imageA->data);
+    const BTYPE *imageBPtrX = static_cast<BTYPE*>(imageB->data);
+    const ATYPE *imageAPtrY = nullptr;
+    const BTYPE *imageBPtrY = nullptr;
+    const ATYPE *imageAPtrZ = nullptr;
+    const BTYPE *imageBPtrZ = nullptr;
+    if (imageA->dim[5] > 1) {
+        imageAPtrY = &imageAPtrX[imageA->nx * imageA->ny * imageA->nz];
+        imageBPtrY = &imageBPtrX[imageA->nx * imageA->ny * imageA->nz];
+    }
+    if (imageA->dim[5] > 2) {
+        imageAPtrZ = &imageAPtrY[imageA->nx * imageA->ny * imageA->nz];
+        imageBPtrZ = &imageBPtrY[imageA->nx * imageA->ny * imageA->nz];
+    }
+    double sum = 0;
+    double rms;
+    double diff;
+    for (int i = 0; i < imageA->nx * imageA->ny * imageA->nz; i++) {
+        diff = (double)*imageAPtrX++ - (double)*imageBPtrX++;
+        rms = diff * diff;
+        if (imageA->dim[5] > 1) {
+            diff = (double)*imageAPtrY++ - (double)*imageBPtrY++;
+            rms += diff * diff;
+        }
+        if (imageA->dim[5] > 2) {
+            diff = (double)*imageAPtrZ++ - (double)*imageBPtrZ++;
+            rms += diff * diff;
+        }
+        if (rms == rms)
+            sum += sqrt(rms);
+    }
+    return sum / double(imageA->nx * imageA->ny * imageA->nz);
 }
 /* *************************************************************** */
 template <class ATYPE>
-double reg_tools_getMeanRMS1(nifti_image *imageA, nifti_image *imageB)
-{
-   switch(imageB->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getMeanRMS2<ATYPE,unsigned char>(imageA, imageB);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getMeanRMS2<ATYPE,char>(imageA, imageB);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getMeanRMS2<ATYPE,unsigned short>(imageA, imageB);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getMeanRMS2<ATYPE,short>(imageA, imageB);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getMeanRMS2<ATYPE,unsigned int>(imageA, imageB);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getMeanRMS2<ATYPE,int>(imageA, imageB);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getMeanRMS2<ATYPE,float>(imageA, imageB);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getMeanRMS2<ATYPE,double>(imageA, imageB);
-   default:
-      reg_print_fct_error("reg_tools_getMeanRMS1");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-double reg_tools_getMeanRMS(nifti_image *imageA, nifti_image *imageB)
-{
-   switch(imageA->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getMeanRMS1<unsigned char>(imageA, imageB);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getMeanRMS1<char>(imageA, imageB);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getMeanRMS1<unsigned short>(imageA, imageB);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getMeanRMS1<short>(imageA, imageB);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getMeanRMS1<unsigned int>(imageA, imageB);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getMeanRMS1<int>(imageA, imageB);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getMeanRMS1<float>(imageA, imageB);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getMeanRMS1<double>(imageA, imageB);
-   default:
-      reg_print_fct_error("reg_tools_getMeanRMS");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *imageB) {
+    switch (imageB->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getMeanRMS2<ATYPE, unsigned char>(imageA, imageB);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getMeanRMS2<ATYPE, char>(imageA, imageB);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getMeanRMS2<ATYPE, unsigned short>(imageA, imageB);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getMeanRMS2<ATYPE, short>(imageA, imageB);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getMeanRMS2<ATYPE, unsigned int>(imageA, imageB);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getMeanRMS2<ATYPE, int>(imageA, imageB);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getMeanRMS2<ATYPE, float>(imageA, imageB);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getMeanRMS2<ATYPE, double>(imageA, imageB);
+    default:
+        reg_print_fct_error("reg_tools_getMeanRMS1");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
+double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) {
+    switch (imageA->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getMeanRMS1<unsigned char>(imageA, imageB);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getMeanRMS1<char>(imageA, imageB);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getMeanRMS1<unsigned short>(imageA, imageB);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getMeanRMS1<short>(imageA, imageB);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getMeanRMS1<unsigned int>(imageA, imageB);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getMeanRMS1<int>(imageA, imageB);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getMeanRMS1<float>(imageA, imageB);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getMeanRMS1<double>(imageA, imageB);
+    default:
+        reg_print_fct_error("reg_tools_getMeanRMS");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-int reg_createImagePyramid(nifti_image *inputImage, nifti_image **pyramid, int unsigned levelNumber, int unsigned levelToPerform)
-{
-   // FINEST LEVEL OF REGISTRATION
-   pyramid[levelToPerform-1]=nifti_copy_nim_info(inputImage);
-   pyramid[levelToPerform-1]->data = (void *)calloc(pyramid[levelToPerform-1]->nvox,
-         pyramid[levelToPerform-1]->nbyper);
-   memcpy(pyramid[levelToPerform-1]->data, inputImage->data,
-         pyramid[levelToPerform-1]->nvox* pyramid[levelToPerform-1]->nbyper);
-   reg_tools_changeDatatype<DTYPE>(pyramid[levelToPerform-1]);
-   reg_tools_removeSCLInfo(pyramid[levelToPerform-1]);
-
-   // Images are downsampled if appropriate
-   for(unsigned int l=levelToPerform; l<levelNumber; l++)
-   {
-      bool downsampleAxis[8]= {false,true,true,true,false,false,false,false};
-      if((pyramid[levelToPerform-1]->nx/2) < 32) downsampleAxis[1]=false;
-      if((pyramid[levelToPerform-1]->ny/2) < 32) downsampleAxis[2]=false;
-      if((pyramid[levelToPerform-1]->nz/2) < 32) downsampleAxis[3]=false;
-      reg_downsampleImage<DTYPE>(pyramid[levelToPerform-1], 1, downsampleAxis);
-   }
-
-   // Images for each subsequent levels are allocated and downsampled if appropriate
-   for(int l=levelToPerform-2; l>=0; l--)
-   {
-      // Allocation of the image
-      pyramid[l]=nifti_copy_nim_info(pyramid[l+1]);
-      pyramid[l]->data = (void *)calloc(pyramid[l]->nvox,
-                                        pyramid[l]->nbyper);
-
-      memcpy(pyramid[l]->data, pyramid[l+1]->data,
-            pyramid[l]->nvox* pyramid[l]->nbyper);
-
-      // Downsample the image if appropriate
-      bool downsampleAxis[8]= {false,true,true,true,false,false,false,false};
-      if((pyramid[l]->nx/2) < 32) downsampleAxis[1]=false;
-      if((pyramid[l]->ny/2) < 32) downsampleAxis[2]=false;
-      if((pyramid[l]->nz/2) < 32) downsampleAxis[3]=false;
-      reg_downsampleImage<DTYPE>(pyramid[l], 1, downsampleAxis);
-   }
-   return EXIT_SUCCESS;
-}
-template int reg_createImagePyramid<float>(nifti_image *, nifti_image **, unsigned int , unsigned int);
-template int reg_createImagePyramid<double>(nifti_image *, nifti_image **, unsigned int , unsigned int);
-/* *************************************************************** */
+int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
+    // FINEST LEVEL OF REGISTRATION
+    pyramid[levelToPerform - 1] = nifti_copy_nim_info(inputImage);
+    pyramid[levelToPerform - 1]->data = calloc(pyramid[levelToPerform - 1]->nvox,
+                                               pyramid[levelToPerform - 1]->nbyper);
+    memcpy(pyramid[levelToPerform - 1]->data, inputImage->data,
+           pyramid[levelToPerform - 1]->nvox * pyramid[levelToPerform - 1]->nbyper);
+    reg_tools_changeDatatype<DTYPE>(pyramid[levelToPerform - 1]);
+    reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]);
+
+    // Images are downsampled if appropriate
+    for (unsigned int l = levelToPerform; l < levelNumber; l++) {
+        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
+        if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
+        if ((pyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
+        reg_downsampleImage<DTYPE>(pyramid[levelToPerform - 1], 1, downsampleAxis);
+    }
+
+    // Images for each subsequent levels are allocated and downsampled if appropriate
+    for (int l = levelToPerform - 2; l >= 0; l--) {
+        // Allocation of the image
+        pyramid[l] = nifti_copy_nim_info(pyramid[l + 1]);
+        pyramid[l]->data = calloc(pyramid[l]->nvox, pyramid[l]->nbyper);
+
+        memcpy(pyramid[l]->data, pyramid[l + 1]->data,
+               pyramid[l]->nvox * pyramid[l]->nbyper);
+
+        // Downsample the image if appropriate
+        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        if ((pyramid[l]->nx / 2) < 32) downsampleAxis[1] = false;
+        if ((pyramid[l]->ny / 2) < 32) downsampleAxis[2] = false;
+        if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
+        reg_downsampleImage<DTYPE>(pyramid[l], 1, downsampleAxis);
+    }
+    return EXIT_SUCCESS;
+}
+template int reg_createImagePyramid<float>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
+template int reg_createImagePyramid<double>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class DTYPE>
-int reg_createMaskPyramid(nifti_image *inputMaskImage, int **maskPyramid, int unsigned levelNumber, int unsigned levelToPerform, int *activeVoxelNumber)
-{
-   // FINEST LEVEL OF REGISTRATION
-   nifti_image **tempMaskImagePyramid=(nifti_image **)malloc(levelToPerform*sizeof(nifti_image *));
-   tempMaskImagePyramid[levelToPerform-1]=nifti_copy_nim_info(inputMaskImage);
-   tempMaskImagePyramid[levelToPerform-1]->data = (void *)calloc(tempMaskImagePyramid[levelToPerform-1]->nvox,
-         tempMaskImagePyramid[levelToPerform-1]->nbyper);
-   memcpy(tempMaskImagePyramid[levelToPerform-1]->data, inputMaskImage->data,
-         tempMaskImagePyramid[levelToPerform-1]->nvox* tempMaskImagePyramid[levelToPerform-1]->nbyper);
-   reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform-1]);
-   reg_tools_changeDatatype<unsigned char>(tempMaskImagePyramid[levelToPerform-1]);
-
-   // Image is downsampled if appropriate
-   for(unsigned int l=levelToPerform; l<levelNumber; l++)
-   {
-      bool downsampleAxis[8]= {false,true,true,true,false,false,false,false};
-      if((tempMaskImagePyramid[levelToPerform-1]->nx/2) < 32) downsampleAxis[1]=false;
-      if((tempMaskImagePyramid[levelToPerform-1]->ny/2) < 32) downsampleAxis[2]=false;
-      if((tempMaskImagePyramid[levelToPerform-1]->nz/2) < 32) downsampleAxis[3]=false;
-      reg_downsampleImage<DTYPE>(tempMaskImagePyramid[levelToPerform-1], 0, downsampleAxis);
-   }
-   activeVoxelNumber[levelToPerform-1]=tempMaskImagePyramid[levelToPerform-1]->nx *
-         tempMaskImagePyramid[levelToPerform-1]->ny *
-         tempMaskImagePyramid[levelToPerform-1]->nz;
-   maskPyramid[levelToPerform-1]=(int *)malloc(activeVoxelNumber[levelToPerform-1] * sizeof(int));
-   reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform-1],
-         maskPyramid[levelToPerform-1],
-         activeVoxelNumber[levelToPerform-1]);
-
-   // Images for each subsequent levels are allocated and downsampled if appropriate
-   for(int l=levelToPerform-2; l>=0; l--)
-   {
-      // Allocation of the reference image
-      tempMaskImagePyramid[l]=nifti_copy_nim_info(tempMaskImagePyramid[l+1]);
-      tempMaskImagePyramid[l]->data = (void *)calloc(tempMaskImagePyramid[l]->nvox,
-                                                     tempMaskImagePyramid[l]->nbyper);
-      memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l+1]->data,
-            tempMaskImagePyramid[l]->nvox* tempMaskImagePyramid[l]->nbyper);
-
-      // Downsample the image if appropriate
-      bool downsampleAxis[8]= {false,true,true,true,false,false,false,false};
-      if((tempMaskImagePyramid[l]->nx/2) < 32) downsampleAxis[1]=false;
-      if((tempMaskImagePyramid[l]->ny/2) < 32) downsampleAxis[2]=false;
-      if((tempMaskImagePyramid[l]->nz/2) < 32) downsampleAxis[3]=false;
-      reg_downsampleImage<DTYPE>(tempMaskImagePyramid[l], 0, downsampleAxis);
-
-      activeVoxelNumber[l]=tempMaskImagePyramid[l]->nx *
+int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform, int *activeVoxelNumber) {
+    // FINEST LEVEL OF REGISTRATION
+    nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *));
+    tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage);
+    tempMaskImagePyramid[levelToPerform - 1]->data = calloc(tempMaskImagePyramid[levelToPerform - 1]->nvox,
+                                                            tempMaskImagePyramid[levelToPerform - 1]->nbyper);
+    memcpy(tempMaskImagePyramid[levelToPerform - 1]->data, inputMaskImage->data,
+           tempMaskImagePyramid[levelToPerform - 1]->nvox * tempMaskImagePyramid[levelToPerform - 1]->nbyper);
+    reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]);
+    reg_tools_changeDatatype<unsigned char>(tempMaskImagePyramid[levelToPerform - 1]);
+
+    // Image is downsampled if appropriate
+    for (unsigned int l = levelToPerform; l < levelNumber; l++) {
+        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
+        if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
+        if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
+        reg_downsampleImage<DTYPE>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
+    }
+    activeVoxelNumber[levelToPerform - 1] = (tempMaskImagePyramid[levelToPerform - 1]->nx *
+                                             tempMaskImagePyramid[levelToPerform - 1]->ny *
+                                             tempMaskImagePyramid[levelToPerform - 1]->nz);
+    maskPyramid[levelToPerform - 1] = (int*)malloc(activeVoxelNumber[levelToPerform - 1] * sizeof(int));
+    reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1],
+                              maskPyramid[levelToPerform - 1],
+                              activeVoxelNumber[levelToPerform - 1]);
+
+    // Images for each subsequent levels are allocated and downsampled if appropriate
+    for (int l = levelToPerform - 2; l >= 0; l--) {
+        // Allocation of the reference image
+        tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]);
+        tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper);
+        memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l + 1]->data,
+               tempMaskImagePyramid[l]->nvox * tempMaskImagePyramid[l]->nbyper);
+
+        // Downsample the image if appropriate
+        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        if ((tempMaskImagePyramid[l]->nx / 2) < 32) downsampleAxis[1] = false;
+        if ((tempMaskImagePyramid[l]->ny / 2) < 32) downsampleAxis[2] = false;
+        if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
+        reg_downsampleImage<DTYPE>(tempMaskImagePyramid[l], 0, downsampleAxis);
+
+        activeVoxelNumber[l] = tempMaskImagePyramid[l]->nx *
             tempMaskImagePyramid[l]->ny *
             tempMaskImagePyramid[l]->nz;
-      maskPyramid[l]=(int *)malloc(activeVoxelNumber[l] * sizeof(int));
-      reg_tools_binaryImage2int(tempMaskImagePyramid[l],
-                                maskPyramid[l],
-                                activeVoxelNumber[l]);
-   }
-   for(unsigned int l=0; l<levelToPerform; ++l)
-      nifti_image_free(tempMaskImagePyramid[l]);
-   free(tempMaskImagePyramid);
-   return EXIT_SUCCESS;
+        maskPyramid[l] = (int*)malloc(activeVoxelNumber[l] * sizeof(int));
+        reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l], activeVoxelNumber[l]);
+    }
+    for (unsigned int l = 0; l < levelToPerform; ++l)
+        nifti_image_free(tempMaskImagePyramid[l]);
+    free(tempMaskImagePyramid);
+    return EXIT_SUCCESS;
 }
-template int reg_createMaskPyramid<float>(nifti_image *, int **, unsigned int , unsigned int , int *);
-template int reg_createMaskPyramid<double>(nifti_image *, int **, unsigned int , unsigned int , int *);
-/* *************************************************************** */
+template int reg_createMaskPyramid<float>(const nifti_image*, int**, unsigned int, unsigned int, int*);
+template int reg_createMaskPyramid<double>(const nifti_image*, int**, unsigned int, unsigned int, int*);
 /* *************************************************************** */
 template <class TYPE1, class TYPE2>
-int reg_tools_nanMask_image2(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage)
-{
-   TYPE1 *imagePtr = static_cast<TYPE1 *>(image->data);
-   TYPE2 *maskPtr = static_cast<TYPE2 *>(maskImage->data);
-   TYPE1 *resPtr = static_cast<TYPE1 *>(outputImage->data);
-   for(size_t i=0; i<image->nvox; ++i)
-   {
-      if(*maskPtr == 0)
-         *resPtr=std::numeric_limits<TYPE1>::quiet_NaN();
-      else *resPtr=*imagePtr;
-      maskPtr++;
-      imagePtr++;
-      resPtr++;
-   }
-   return EXIT_SUCCESS;
+int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
+    const TYPE1 *imagePtr = static_cast<TYPE1*>(image->data);
+    const TYPE2 *maskPtr = static_cast<TYPE2*>(maskImage->data);
+    TYPE1 *resPtr = static_cast<TYPE1*>(outputImage->data);
+    for (size_t i = 0; i < image->nvox; ++i) {
+        if (*maskPtr == 0)
+            *resPtr = std::numeric_limits<TYPE1>::quiet_NaN();
+        else *resPtr = *imagePtr;
+        maskPtr++;
+        imagePtr++;
+        resPtr++;
+    }
+    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class TYPE1>
-int reg_tools_nanMask_image1(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage)
-{
-   switch(maskImage->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_nanMask_image2<TYPE1,unsigned char>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_nanMask_image2<TYPE1,char>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_nanMask_image2<TYPE1,unsigned short>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_nanMask_image2<TYPE1,short>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_nanMask_image2<TYPE1,unsigned int>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_nanMask_image2<TYPE1,int>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_nanMask_image2<TYPE1,float>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_nanMask_image2<TYPE1,double>
-            (image, maskImage, outputImage);
-   default:
-      reg_print_fct_error("reg_tools_nanMask_image1");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-int reg_tools_nanMask_image(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage)
-{
-   // Check dimension
-   if(image->nvox != maskImage->nvox || image->nvox != outputImage->nvox)
-   {
-      reg_print_fct_error("reg_tools_nanMask_image");
-      reg_print_msg_error("Input images have different size");
-      reg_exit();
-   }
-   // Check output data type
-   if(image->datatype != outputImage->datatype)
-   {
-      reg_print_fct_error("reg_tools_nanMask_image");
-      reg_print_msg_error("Input and output images have different data type");
-      reg_exit();
-   }
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_nanMask_image1<unsigned char>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_nanMask_image1<char>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_nanMask_image1<unsigned short>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_nanMask_image1<short>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_nanMask_image1<unsigned int>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_nanMask_image1<int>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_nanMask_image1<float>
-            (image, maskImage, outputImage);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_nanMask_image1<double>
-            (image, maskImage, outputImage);
-   default:
-      reg_print_fct_error("reg_tools_nanMask_image");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+int reg_tools_nanMask_image1(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
+    switch (maskImage->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_nanMask_image2<TYPE1, unsigned char>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_nanMask_image2<TYPE1, char>(image, maskImage, outputImage);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_nanMask_image2<TYPE1, unsigned short>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_nanMask_image2<TYPE1, short>(image, maskImage, outputImage);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_nanMask_image2<TYPE1, unsigned int>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_nanMask_image2<TYPE1, int>(image, maskImage, outputImage);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_nanMask_image2<TYPE1, float>(image, maskImage, outputImage);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_nanMask_image2<TYPE1, double>(image, maskImage, outputImage);
+    default:
+        reg_print_fct_error("reg_tools_nanMask_image1");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
+int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
+    // Check dimension
+    if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) {
+        reg_print_fct_error("reg_tools_nanMask_image");
+        reg_print_msg_error("Input images have different size");
+        reg_exit();
+    }
+    // Check output data type
+    if (image->datatype != outputImage->datatype) {
+        reg_print_fct_error("reg_tools_nanMask_image");
+        reg_print_msg_error("Input and output images have different data type");
+        reg_exit();
+    }
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_nanMask_image1<unsigned char>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_nanMask_image1<char>(image, maskImage, outputImage);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_nanMask_image1<unsigned short>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_nanMask_image1<short>(image, maskImage, outputImage);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_nanMask_image1<unsigned int>(image, maskImage, outputImage);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_nanMask_image1<int>(image, maskImage, outputImage);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_nanMask_image1<float>(image, maskImage, outputImage);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_nanMask_image1<double>(image, maskImage, outputImage);
+    default:
+        reg_print_fct_error("reg_tools_nanMask_image");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class TYPE>
-int reg_tools_removeNanFromMask_core(nifti_image *image, int *mask)
-{
-   size_t voxelNumber = (size_t)image->nx*image->ny*image->nz;
-   TYPE *imagePtr = static_cast<TYPE *>(image->data);
-   for(int t=0; t<image->nt; ++t){
-      for(size_t i=0; i<voxelNumber; ++i){
-         TYPE value=*imagePtr++;
-         if(value!=value)
-            mask[i]=-1;
-      }
-   }
-   return EXIT_SUCCESS;
-}
-/* *************************************************************** */
-int reg_tools_removeNanFromMask(nifti_image *image,
-                                int *mask)
-{
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_removeNanFromMask_core<float>
-            (image, mask);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_removeNanFromMask_core<double>
-            (image, mask);
-   default:
-      reg_print_fct_error("reg_tools_removeNanFromMask");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) {
+    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const TYPE *imagePtr = static_cast<TYPE*>(image->data);
+    for (int t = 0; t < image->nt; ++t) {
+        for (size_t i = 0; i < voxelNumber; ++i) {
+            TYPE value = *imagePtr++;
+            if (value != value)
+                mask[i] = -1;
+        }
+    }
+    return EXIT_SUCCESS;
+}
+/* *************************************************************** */
+int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_removeNanFromMask_core<float>(image, mask);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_removeNanFromMask_core<double>(image, mask);
+    default:
+        reg_print_fct_error("reg_tools_removeNanFromMask");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
-
-/* *************************************************************** */
 /* *************************************************************** */
 template <class DTYPE>
-DTYPE reg_tools_getMinValue_core(nifti_image *image, int timepoint)
-{
-   if(timepoint<-1 || timepoint>=image->nt)
-      reg_print_msg_error("reg_tools_getMinValue_core. The required time point does not exists");
-   // Create a pointer to the image data
-   DTYPE *imgPtr = static_cast<DTYPE *>(image->data);
-   // Set a variable to store the minimal value
-   DTYPE minValue=std::numeric_limits<DTYPE>::max();
-   if(image->scl_slope==0) image->scl_slope=1.f;
-
-   size_t voxelNumber = (size_t)image->nx*
-         image->ny*image->nz;
-   // Loop over all voxel to find the lowest value
-   for(int time=0; time<image->nt; ++time){
-      if(time==timepoint || timepoint==-1){
-         for(int u=0; u<image->nu; ++u){
-            DTYPE *currentVolumePtr = &imgPtr[(u*image->nt+time)*voxelNumber];
-            for(size_t i=0; i<voxelNumber; ++i){
-               DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * image->scl_slope + image->scl_inter);
-               minValue=currentVal<minValue?currentVal:minValue;
+DTYPE reg_tools_getMinMaxValue_core(const nifti_image *image, int timepoint, bool calcMin = true) {
+    if (timepoint < -1 || timepoint >= image->nt)
+        reg_print_msg_error("reg_tools_getMinMaxValue_core. The required time point does not exists");
+
+    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+    DTYPE retValue = calcMin ? std::numeric_limits<DTYPE>::max() : std::numeric_limits<DTYPE>::min();
+    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
+
+    for (int time = 0; time < image->nt; ++time) {
+        if (time == timepoint || timepoint == -1) {
+            for (int u = 0; u < image->nu; ++u) {
+                const DTYPE *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber];
+                for (size_t i = 0; i < voxelNumber; ++i) {
+                    DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * sclSlope + image->scl_inter);
+                    retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue);
+                }
             }
-         }
-      }
-   }
-   // The lowest value is returned
-   return minValue;
-}
-/* *************************************************************** */
-float reg_tools_getMinValue(nifti_image *image, int timepoint)
-{
-   // Check the image data type
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getMinValue_core<unsigned char>(image, timepoint);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getMinValue_core<char>(image, timepoint);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getMinValue_core<unsigned short>(image, timepoint);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getMinValue_core<short>(image, timepoint);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getMinValue_core<unsigned int>(image, timepoint);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getMinValue_core<int>(image, timepoint);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getMinValue_core<float>(image, timepoint);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getMinValue_core<double>(image, timepoint);
-   default:
-      reg_print_fct_error("reg_tools_getMinValue");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+        }
+    }
+    return retValue;
+}
+/* *************************************************************** */
+float reg_tools_getMinValue(const nifti_image *image, int timepoint) {
+    // Check the image data type
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getMinMaxValue_core<unsigned char>(image, timepoint);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getMinMaxValue_core<char>(image, timepoint);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getMinMaxValue_core<unsigned short>(image, timepoint);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getMinMaxValue_core<short>(image, timepoint);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getMinMaxValue_core<unsigned int>(image, timepoint);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getMinMaxValue_core<int>(image, timepoint);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getMinMaxValue_core<float>(image, timepoint);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getMinMaxValue_core<double>(image, timepoint);
+    default:
+        reg_print_fct_error("reg_tools_getMinValue");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-template <class DTYPE>
-DTYPE reg_tools_getMaxValue_core(nifti_image *image, int timepoint)
-{
-   if(timepoint<-1 || timepoint>=image->nt)
-      reg_print_msg_error("reg_tools_getMinValue_core. The required time point does not exists");
-   // Create a pointer to the image data
-   DTYPE *imgPtr = static_cast<DTYPE *>(image->data);
-   // Set a variable to store the minimal value
-   DTYPE maxValue=std::numeric_limits<DTYPE>::min();
-   if(image->scl_slope==0) image->scl_slope=1.f;
-
-   size_t voxelNumber = (size_t)image->nx *
-         image->ny * image->nz;
-   // Loop over all voxel to find the lowest value
-   for(int time=0; time<image->nt; ++time){
-      if(time==timepoint || timepoint==-1){
-         for(int u=0; u<image->nu; ++u){
-            DTYPE *currentVolumePtr = &imgPtr[(u*image->nt+time)*voxelNumber];
-            for(size_t i=0; i<voxelNumber; ++i){
-               DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * image->scl_slope + image->scl_inter);
-               maxValue=currentVal>maxValue?currentVal:maxValue;
-            } // u
-         } // t
-      } // if time
-   } // time
-   // The lowest value is returned
-   return maxValue;
-}
-/* *************************************************************** */
-float reg_tools_getMaxValue(nifti_image *image, int timepoint)
-{
-   // Check the image data type
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getMaxValue_core<unsigned char>(image, timepoint);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getMaxValue_core<char>(image, timepoint);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getMaxValue_core<unsigned short>(image, timepoint);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getMaxValue_core<short>(image, timepoint);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getMaxValue_core<unsigned int>(image, timepoint);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getMaxValue_core<int>(image, timepoint);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getMaxValue_core<float>(image, timepoint);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getMaxValue_core<double>(image, timepoint);
-   default:
-      reg_print_fct_error("reg_tools_getMaxValue");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
+    // Check the image data type
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getMinMaxValue_core<unsigned char>(image, timepoint, false);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getMinMaxValue_core<char>(image, timepoint, false);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getMinMaxValue_core<unsigned short>(image, timepoint, false);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getMinMaxValue_core<short>(image, timepoint, false);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getMinMaxValue_core<unsigned int>(image, timepoint, false);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getMinMaxValue_core<int>(image, timepoint, false);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getMinMaxValue_core<float>(image, timepoint, false);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getMinMaxValue_core<double>(image, timepoint, false);
+    default:
+        reg_print_fct_error("reg_tools_getMaxValue");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-float reg_tools_getMeanValue_core(nifti_image *image)
-{
-   // Create a pointer to the image data
-   DTYPE *imgPtr = static_cast<DTYPE *>(image->data);
-   // Set a variable to store the minimal value
-   float meanValue=0;
-   if(image->scl_slope==0) image->scl_slope=1.f;
-   // Loop over all voxel to find the lowest value
-   for(size_t i=0; i<image->nvox; ++i)
-   {
-      DTYPE currentVal = (DTYPE)((float)imgPtr[i] * image->scl_slope + image->scl_inter);
-      meanValue+=currentVal;
-   }
-   meanValue=(float)(meanValue/(double) image->nvox);
-   // The lowest value is returned
-   return meanValue;
-}
-/* *************************************************************** */
-float reg_tools_getMeanValue(nifti_image *image)
-{
-   // Check the image data type
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getMeanValue_core<unsigned char>(image);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getMeanValue_core<char>(image);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getMeanValue_core<unsigned short>(image);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getMeanValue_core<short>(image);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getMeanValue_core<unsigned int>(image);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getMeanValue_core<int>(image);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getMeanValue_core<float>(image);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getMeanValue_core<double>(image);
-   default:
-      reg_print_fct_error("reg_tools_getMeanValue");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+float reg_tools_getMeanValue_core(const nifti_image *image) {
+    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+    float meanValue = 0;
+    const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
+    for (size_t i = 0; i < image->nvox; ++i) {
+        DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter);
+        meanValue += currentVal;
+    }
+    meanValue = float(meanValue / image->nvox);
+    return meanValue;
+}
+/* *************************************************************** */
+float reg_tools_getMeanValue(const nifti_image *image) {
+    // Check the image data type
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getMeanValue_core<unsigned char>(image);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getMeanValue_core<char>(image);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getMeanValue_core<unsigned short>(image);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getMeanValue_core<short>(image);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getMeanValue_core<unsigned int>(image);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getMeanValue_core<int>(image);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getMeanValue_core<float>(image);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getMeanValue_core<double>(image);
+    default:
+        reg_print_fct_error("reg_tools_getMeanValue");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
 template <class DTYPE>
-float reg_tools_getSTDValue_core(nifti_image *image)
-{
-   // Create a pointer to the image data
-   DTYPE *imgPtr = static_cast<DTYPE *>(image->data);
-   // Set a variable to store the minimal value
-   float meanValue = reg_tools_getMeanValue(image);
-   float stdValue=0;
-   if(image->scl_slope==0) image->scl_slope=1.f;
-   // Loop over all voxel to find the lowest value
-   for(size_t i=0; i<image->nvox; ++i)
-   {
-      DTYPE currentVal = (DTYPE)((float)imgPtr[i] * image->scl_slope + image->scl_inter);
-      stdValue+=(currentVal-meanValue)*(currentVal-meanValue);
-   }
-   stdValue = (float) std::sqrt(stdValue/(double) image->nvox);
-   // The lowest value is returned
-   return stdValue;
-}
-/* *************************************************************** */
-float reg_tools_getSTDValue(nifti_image *image)
-{
-   // Check the image data type
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_tools_getSTDValue_core<unsigned char>(image);
-   case NIFTI_TYPE_INT8:
-      return reg_tools_getSTDValue_core<char>(image);
-   case NIFTI_TYPE_UINT16:
-      return reg_tools_getSTDValue_core<unsigned short>(image);
-   case NIFTI_TYPE_INT16:
-      return reg_tools_getSTDValue_core<short>(image);
-   case NIFTI_TYPE_UINT32:
-      return reg_tools_getSTDValue_core<unsigned int>(image);
-   case NIFTI_TYPE_INT32:
-      return reg_tools_getSTDValue_core<int>(image);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_tools_getSTDValue_core<float>(image);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_tools_getSTDValue_core<double>(image);
-   default:
-      reg_print_fct_error("reg_tools_getSTDValue");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
+float reg_tools_getSTDValue_core(const nifti_image *image) {
+    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+    const float meanValue = reg_tools_getMeanValue(image);
+    float stdValue = 0;
+    const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
+    for (size_t i = 0; i < image->nvox; ++i) {
+        const DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter);
+        stdValue += (currentVal - meanValue) * (currentVal - meanValue);
+    }
+    stdValue = std::sqrt(stdValue / image->nvox);
+    return stdValue;
+}
+/* *************************************************************** */
+float reg_tools_getSTDValue(const nifti_image *image) {
+    // Check the image data type
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_tools_getSTDValue_core<unsigned char>(image);
+    case NIFTI_TYPE_INT8:
+        return reg_tools_getSTDValue_core<char>(image);
+    case NIFTI_TYPE_UINT16:
+        return reg_tools_getSTDValue_core<unsigned short>(image);
+    case NIFTI_TYPE_INT16:
+        return reg_tools_getSTDValue_core<short>(image);
+    case NIFTI_TYPE_UINT32:
+        return reg_tools_getSTDValue_core<unsigned int>(image);
+    case NIFTI_TYPE_INT32:
+        return reg_tools_getSTDValue_core<int>(image);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_tools_getSTDValue_core<float>(image);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_tools_getSTDValue_core<double>(image);
+    default:
+        reg_print_fct_error("reg_tools_getSTDValue");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-void reg_flippAxis_type(int nx,
-                        int ny,
-                        int nz,
-                        int nt,
-                        int nu,
-                        int nv,
-                        int nw,
-                        void *inputArray,
-                        void *outputArray,
-                        std::string cmd
-                        )
-{
-   // Allocate the outputArray if it is not allocated yet
-   if(outputArray==nullptr)
-      outputArray=(void *)malloc(nx*ny*nz*nt*nu*nv*nw*sizeof(DTYPE));
-
-   // Parse the cmd to check which axis have to be flipped
-   char *axisName=(char *)"x\0y\0z\0t\0u\0v\0w\0";
-   int increment[7]= {1,1,1,1,1,1,1};
-   int start[7]= {0,0,0,0,0,0,0};
-   int end[7]= {nx,ny,nz,nt,nu,nv,nw};
-   for(int i=0; i<7; ++i)
-   {
-      if(cmd.find(axisName[i*2])!=std::string::npos)
-      {
-         increment[i]=-1;
-         start[i]=end[i]-1;
-      }
-   }
-
-   // Define the reading and writting pointers
-   DTYPE *inputPtr=static_cast<DTYPE *>(inputArray);
-   DTYPE *outputPtr=static_cast<DTYPE *>(outputArray);
-
-   // Copy the data and flipp axis if required
-   for(int w=0, w2=start[6]; w<nw; ++w, w2+=increment[6])
-   {
-      size_t index_w=w2*nx*ny*nz*nt*nu*nv;
-      for(int v=0, v2=start[5]; v<nv; ++v, v2+=increment[5])
-      {
-         size_t index_v=index_w + v2*nx*ny*nz*nt*nu;
-         for(int u=0, u2=start[4]; u<nu; ++u, u2+=increment[4])
-         {
-            size_t index_u=index_v + u2*nx*ny*nz*nt;
-            for(int t=0, t2=start[3]; t<nt; ++t, t2+=increment[3])
-            {
-               size_t index_t=index_u + t2*nx*ny*nz;
-               for(int z=0, z2=start[2]; z<nz; ++z, z2+=increment[2])
-               {
-                  size_t index_z=index_t + z2*nx*ny;
-                  for(int y=0, y2=start[1]; y<ny; ++y, y2+=increment[1])
-                  {
-                     size_t index_y=index_z + y2*nx;
-                     for(int x=0, x2=start[0]; x<nx; ++x, x2+=increment[0])
-                     {
-                        size_t index=index_y + x2;
-                        *outputPtr++ = inputPtr[index];
-                     }
-                  }
-               }
+void reg_flipAxis_type(int nx,
+                       int ny,
+                       int nz,
+                       int nt,
+                       int nu,
+                       int nv,
+                       int nw,
+                       const void *inputArray,
+                       void **outputArray,
+                       const std::string& cmd) {
+    // Allocate the outputArray if it is not allocated yet
+    if (*outputArray == nullptr)
+        *outputArray = malloc(nx * ny * nz * nt * nu * nv * nw * sizeof(DTYPE));
+
+    // Parse the cmd to check which axis have to be flipped
+    const char *axisName = "x\0y\0z\0t\0u\0v\0w\0";
+    int increment[7] = {1, 1, 1, 1, 1, 1, 1};
+    int start[7] = {0, 0, 0, 0, 0, 0, 0};
+    const int end[7] = {nx, ny, nz, nt, nu, nv, nw};
+    for (int i = 0; i < 7; ++i) {
+        if (cmd.find(axisName[i * 2]) != std::string::npos) {
+            increment[i] = -1;
+            start[i] = end[i] - 1;
+        }
+    }
+
+    // Define the reading and writting pointers
+    const DTYPE *inputPtr = static_cast<const DTYPE*>(inputArray);
+    DTYPE *outputPtr = static_cast<DTYPE*>(*outputArray);
+
+    // Copy the data and flipp axis if required
+    for (int w = 0, w2 = start[6]; w < nw; ++w, w2 += increment[6]) {
+        size_t index_w = w2 * nx * ny * nz * nt * nu * nv;
+        for (int v = 0, v2 = start[5]; v < nv; ++v, v2 += increment[5]) {
+            size_t index_v = index_w + v2 * nx * ny * nz * nt * nu;
+            for (int u = 0, u2 = start[4]; u < nu; ++u, u2 += increment[4]) {
+                size_t index_u = index_v + u2 * nx * ny * nz * nt;
+                for (int t = 0, t2 = start[3]; t < nt; ++t, t2 += increment[3]) {
+                    size_t index_t = index_u + t2 * nx * ny * nz;
+                    for (int z = 0, z2 = start[2]; z < nz; ++z, z2 += increment[2]) {
+                        size_t index_z = index_t + z2 * nx * ny;
+                        for (int y = 0, y2 = start[1]; y < ny; ++y, y2 += increment[1]) {
+                            size_t index_y = index_z + y2 * nx;
+                            for (int x = 0, x2 = start[0]; x < nx; ++x, x2 += increment[0]) {
+                                size_t index = index_y + x2;
+                                *outputPtr++ = inputPtr[index];
+                            }
+                        }
+                    }
+                }
             }
-         }
-      }
-   }
-   return;
-}
-/* *************************************************************** */
-void reg_flippAxis(nifti_image *image,
-                   void *outputArray,
-                   std::string cmd
-                   )
-{
-   // Check the image data type
-   switch(image->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_flippAxis_type<unsigned char>
+        }
+    }
+}
+/* *************************************************************** */
+void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) {
+    // Check the image data type
+    switch (image->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_flipAxis_type<unsigned char>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_flippAxis_type<char>
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_flipAxis_type<char>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_flippAxis_type<unsigned short>
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_flipAxis_type<unsigned short>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_flippAxis_type<short>
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_flipAxis_type<short>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_flippAxis_type<unsigned int>
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_flipAxis_type<unsigned int>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_flippAxis_type<int>
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_flipAxis_type<int>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_flippAxis_type<float>
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_flipAxis_type<float>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_flippAxis_type<double>
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_flipAxis_type<double>
             (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
              image->data, outputArray, cmd);
-      break;
-   default:
-      reg_print_fct_error("reg_flippAxis");
-      reg_print_msg_error("The image data type is not supported");
-      reg_exit();
-   }
-   return;
+        break;
+    default:
+        reg_print_fct_error("reg_flipAxis");
+        reg_print_msg_error("The image data type is not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DTYPE>
-void reg_getDisplacementFromDeformation_2D(nifti_image *field)
-{
-   DTYPE *ptrX = static_cast<DTYPE *>(field->data);
-   DTYPE *ptrY = &ptrX[field->nx*field->ny];
-
-   mat44 matrix;
-   if(field->sform_code>0)
-      matrix=field->sto_xyz;
-   else matrix=field->qto_xyz;
-
-   int x, y,  index;
-   DTYPE xInit, yInit;
-#if defined (_OPENMP)
+void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
+    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
+    DTYPE *ptrY = &ptrX[field->nx * field->ny];
+
+    mat44 matrix;
+    if (field->sform_code > 0)
+        matrix = field->sto_xyz;
+    else matrix = field->qto_xyz;
+
+    int x, y, index;
+    DTYPE xInit, yInit;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, ptrX, ptrY) \
    private(x, y, index, xInit, yInit)
 #endif
-   for(y=0; y<field->ny; y++)
-   {
-      index=y*field->nx;
-      for(x=0; x<field->nx; x++)
-      {
-
-         // Get the initial control point position
-         xInit = matrix.m[0][0]*(DTYPE)x
-               + matrix.m[0][1]*(DTYPE)y
-               + matrix.m[0][3];
-         yInit = matrix.m[1][0]*(DTYPE)x
-               + matrix.m[1][1]*(DTYPE)y
-               + matrix.m[1][3];
-
-         // The initial position is subtracted from every values
-         ptrX[index] -= xInit;
-         ptrY[index] -= yInit;
-         index++;
-      }
-   }
+    for (y = 0; y < field->ny; y++) {
+        index = y * field->nx;
+        for (x = 0; x < field->nx; x++) {
+            // Get the initial control point position
+            xInit = matrix.m[0][0] * (DTYPE)x
+                + matrix.m[0][1] * (DTYPE)y
+                + matrix.m[0][3];
+            yInit = matrix.m[1][0] * (DTYPE)x
+                + matrix.m[1][1] * (DTYPE)y
+                + matrix.m[1][3];
+
+            // The initial position is subtracted from every values
+            ptrX[index] -= xInit;
+            ptrY[index] -= yInit;
+            index++;
+        }
+    }
 }
 /* *************************************************************** */
 template<class DTYPE>
-void reg_getDisplacementFromDeformation_3D(nifti_image *field)
-{
-   DTYPE *ptrX = static_cast<DTYPE *>(field->data);
-   DTYPE *ptrY = &ptrX[field->nx*field->ny*field->nz];
-   DTYPE *ptrZ = &ptrY[field->nx*field->ny*field->nz];
-
-   mat44 matrix;
-   if(field->sform_code>0)
-      matrix=field->sto_xyz;
-   else matrix=field->qto_xyz;
-
-   int x, y, z, index;
-   float xInit, yInit, zInit;
-#if defined (_OPENMP)
+void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
+    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
+    DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz];
+    DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz];
+
+    mat44 matrix;
+    if (field->sform_code > 0)
+        matrix = field->sto_xyz;
+    else matrix = field->qto_xyz;
+
+    int x, y, z, index;
+    float xInit, yInit, zInit;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, \
    ptrX, ptrY, ptrZ) \
    private(x, y, z, index, xInit, yInit, zInit)
 #endif
-   for(z=0; z<field->nz; z++)
-   {
-      index=z*field->nx*field->ny;
-      for(y=0; y<field->ny; y++)
-      {
-         for(x=0; x<field->nx; x++)
-         {
-            // Get the initial control point position
-            xInit = matrix.m[0][0]*static_cast<float>(x)
-                  + matrix.m[0][1]*static_cast<float>(y)
-                  + matrix.m[0][2]*static_cast<float>(z)
-                  + matrix.m[0][3];
-            yInit = matrix.m[1][0]*static_cast<float>(x)
-                  + matrix.m[1][1]*static_cast<float>(y)
-                  + matrix.m[1][2]*static_cast<float>(z)
-                  + matrix.m[1][3];
-            zInit = matrix.m[2][0]*static_cast<float>(x)
-                  + matrix.m[2][1]*static_cast<float>(y)
-                  + matrix.m[2][2]*static_cast<float>(z)
-                  + matrix.m[2][3];
-
-            // The initial position is subtracted from every values
-            ptrX[index] -= static_cast<DTYPE>(xInit);
-            ptrY[index] -= static_cast<DTYPE>(yInit);
-            ptrZ[index] -= static_cast<DTYPE>(zInit);
-            index++;
-         }
-      }
-   }
-}
-/* *************************************************************** */
-int reg_getDisplacementFromDeformation(nifti_image *field)
-{
-   if(field->datatype==NIFTI_TYPE_FLOAT32)
-   {
-      switch(field->nu)
-      {
-      case 2:
-         reg_getDisplacementFromDeformation_2D<float>(field);
-         break;
-      case 3:
-         reg_getDisplacementFromDeformation_3D<float>(field);
-         break;
-      default:
-         reg_print_fct_error("reg_getDisplacementFromDeformation");
-         reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
-         reg_exit();
-      }
-   }
-   else if(field->datatype==NIFTI_TYPE_FLOAT64)
-   {
-      switch(field->nu)
-      {
-      case 2:
-         reg_getDisplacementFromDeformation_2D<double>(field);
-         break;
-      case 3:
-         reg_getDisplacementFromDeformation_3D<double>(field);
-         break;
-      default:
-         reg_print_fct_error("reg_getDisplacementFromDeformation");
-         reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
-         reg_exit();
-      }
-   }
-   else
-   {
-      reg_print_fct_error("reg_getDisplacementFromDeformation");
-      reg_print_msg_error("Only single or double floating precision have been implemented");
-      reg_exit();
-   }
-   field->intent_code=NIFTI_INTENT_VECTOR;
-   memset(field->intent_name, 0, 16);
-   strcpy(field->intent_name,"NREG_TRANS");
-   if(field->intent_p1==DEF_FIELD)
-      field->intent_p1=DISP_FIELD;
-   if(field->intent_p1==DEF_VEL_FIELD)
-      field->intent_p1=DISP_VEL_FIELD;
-   return EXIT_SUCCESS;
+    for (z = 0; z < field->nz; z++) {
+        index = z * field->nx * field->ny;
+        for (y = 0; y < field->ny; y++) {
+            for (x = 0; x < field->nx; x++) {
+                // Get the initial control point position
+                xInit = matrix.m[0][0] * static_cast<float>(x)
+                    + matrix.m[0][1] * static_cast<float>(y)
+                    + matrix.m[0][2] * static_cast<float>(z)
+                    + matrix.m[0][3];
+                yInit = matrix.m[1][0] * static_cast<float>(x)
+                    + matrix.m[1][1] * static_cast<float>(y)
+                    + matrix.m[1][2] * static_cast<float>(z)
+                    + matrix.m[1][3];
+                zInit = matrix.m[2][0] * static_cast<float>(x)
+                    + matrix.m[2][1] * static_cast<float>(y)
+                    + matrix.m[2][2] * static_cast<float>(z)
+                    + matrix.m[2][3];
+
+                // The initial position is subtracted from every values
+                ptrX[index] -= static_cast<DTYPE>(xInit);
+                ptrY[index] -= static_cast<DTYPE>(yInit);
+                ptrZ[index] -= static_cast<DTYPE>(zInit);
+                index++;
+            }
+        }
+    }
 }
 /* *************************************************************** */
+int reg_getDisplacementFromDeformation(nifti_image *field) {
+    if (field->datatype == NIFTI_TYPE_FLOAT32) {
+        switch (field->nu) {
+        case 2:
+            reg_getDisplacementFromDeformation_2D<float>(field);
+            break;
+        case 3:
+            reg_getDisplacementFromDeformation_3D<float>(field);
+            break;
+        default:
+            reg_print_fct_error("reg_getDisplacementFromDeformation");
+            reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
+            reg_exit();
+        }
+    } else if (field->datatype == NIFTI_TYPE_FLOAT64) {
+        switch (field->nu) {
+        case 2:
+            reg_getDisplacementFromDeformation_2D<double>(field);
+            break;
+        case 3:
+            reg_getDisplacementFromDeformation_3D<double>(field);
+            break;
+        default:
+            reg_print_fct_error("reg_getDisplacementFromDeformation");
+            reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
+            reg_exit();
+        }
+    } else {
+        reg_print_fct_error("reg_getDisplacementFromDeformation");
+        reg_print_msg_error("Only single or double floating precision have been implemented");
+        reg_exit();
+    }
+    field->intent_code = NIFTI_INTENT_VECTOR;
+    memset(field->intent_name, 0, 16);
+    strcpy(field->intent_name, "NREG_TRANS");
+    if (field->intent_p1 == DEF_FIELD)
+        field->intent_p1 = DISP_FIELD;
+    if (field->intent_p1 == DEF_VEL_FIELD)
+        field->intent_p1 = DISP_VEL_FIELD;
+    return EXIT_SUCCESS;
+}
 /* *************************************************************** */
 template<class DTYPE>
-void reg_getDeformationFromDisplacement_2D(nifti_image *field)
-{
-   DTYPE *ptrX = static_cast<DTYPE *>(field->data);
-   DTYPE *ptrY = &ptrX[field->nx*field->ny];
-
-   mat44 matrix;
-   if(field->sform_code>0)
-      matrix=field->sto_xyz;
-   else matrix=field->qto_xyz;
-
-   int x, y, index;
-   DTYPE xInit, yInit;
-#if defined (_OPENMP)
+void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
+    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
+    DTYPE *ptrY = &ptrX[field->nx * field->ny];
+
+    mat44 matrix;
+    if (field->sform_code > 0)
+        matrix = field->sto_xyz;
+    else matrix = field->qto_xyz;
+
+    int x, y, index;
+    DTYPE xInit, yInit;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, \
    ptrX, ptrY) \
    private(x, y, index, xInit, yInit)
 #endif
-   for(y=0; y<field->ny; y++)
-   {
-      index=y*field->nx;
-      for(x=0; x<field->nx; x++)
-      {
-
-         // Get the initial control point position
-         xInit = matrix.m[0][0]*(DTYPE)x
-               + matrix.m[0][1]*(DTYPE)y
-               + matrix.m[0][3];
-         yInit = matrix.m[1][0]*(DTYPE)x
-               + matrix.m[1][1]*(DTYPE)y
-               + matrix.m[1][3];
-
-         // The initial position is added from every values
-         ptrX[index] += xInit;
-         ptrY[index] += yInit;
-         index++;
-      }
-   }
+    for (y = 0; y < field->ny; y++) {
+        index = y * field->nx;
+        for (x = 0; x < field->nx; x++) {
+            // Get the initial control point position
+            xInit = matrix.m[0][0] * (DTYPE)x
+                + matrix.m[0][1] * (DTYPE)y
+                + matrix.m[0][3];
+            yInit = matrix.m[1][0] * (DTYPE)x
+                + matrix.m[1][1] * (DTYPE)y
+                + matrix.m[1][3];
+
+            // The initial position is added from every values
+            ptrX[index] += xInit;
+            ptrY[index] += yInit;
+            index++;
+        }
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DTYPE>
-void reg_getDeformationFromDisplacement_3D(nifti_image *field)
-{
-   DTYPE *ptrX = static_cast<DTYPE *>(field->data);
-   DTYPE *ptrY = &ptrX[field->nx*field->ny*field->nz];
-   DTYPE *ptrZ = &ptrY[field->nx*field->ny*field->nz];
-
-   mat44 matrix;
-   if(field->sform_code>0)
-      matrix=field->sto_xyz;
-   else matrix=field->qto_xyz;
-
-   int x, y, z, index;
-   float xInit, yInit, zInit;
-#if defined (_OPENMP)
+void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
+    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
+    DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz];
+    DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz];
+
+    mat44 matrix;
+    if (field->sform_code > 0)
+        matrix = field->sto_xyz;
+    else matrix = field->qto_xyz;
+
+    int x, y, z, index;
+    float xInit, yInit, zInit;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, ptrX, ptrY, ptrZ) \
    private(x, y, z, index, xInit, yInit, zInit)
 #endif
-   for(z=0; z<field->nz; z++)
-   {
-      index=z*field->nx*field->ny;
-      for(y=0; y<field->ny; y++)
-      {
-         for(x=0; x<field->nx; x++)
-         {
-
-            // Get the initial control point position
-            xInit = matrix.m[0][0]*static_cast<float>(x)
-                  + matrix.m[0][1]*static_cast<float>(y)
-                  + matrix.m[0][2]*static_cast<float>(z)
-                  + matrix.m[0][3];
-            yInit = matrix.m[1][0]*static_cast<float>(x)
-                  + matrix.m[1][1]*static_cast<float>(y)
-                  + matrix.m[1][2]*static_cast<float>(z)
-                  + matrix.m[1][3];
-            zInit = matrix.m[2][0]*static_cast<float>(x)
-                  + matrix.m[2][1]*static_cast<float>(y)
-                  + matrix.m[2][2]*static_cast<float>(z)
-                  + matrix.m[2][3];
+    for (z = 0; z < field->nz; z++) {
+        index = z * field->nx * field->ny;
+        for (y = 0; y < field->ny; y++) {
+            for (x = 0; x < field->nx; x++) {
+                // Get the initial control point position
+                xInit = matrix.m[0][0] * static_cast<float>(x)
+                    + matrix.m[0][1] * static_cast<float>(y)
+                    + matrix.m[0][2] * static_cast<float>(z)
+                    + matrix.m[0][3];
+                yInit = matrix.m[1][0] * static_cast<float>(x)
+                    + matrix.m[1][1] * static_cast<float>(y)
+                    + matrix.m[1][2] * static_cast<float>(z)
+                    + matrix.m[1][3];
+                zInit = matrix.m[2][0] * static_cast<float>(x)
+                    + matrix.m[2][1] * static_cast<float>(y)
+                    + matrix.m[2][2] * static_cast<float>(z)
+                    + matrix.m[2][3];
+
+                // The initial position is subtracted from every values
+                ptrX[index] += static_cast<DTYPE>(xInit);
+                ptrY[index] += static_cast<DTYPE>(yInit);
+                ptrZ[index] += static_cast<DTYPE>(zInit);
+                index++;
+            }
+        }
+    }
+}
+/* *************************************************************** */
+int reg_getDeformationFromDisplacement(nifti_image *field) {
+    if (field->datatype == NIFTI_TYPE_FLOAT32) {
+        switch (field->nu) {
+        case 2:
+            reg_getDeformationFromDisplacement_2D<float>(field);
+            break;
+        case 3:
+            reg_getDeformationFromDisplacement_3D<float>(field);
+            break;
+        default:
+            reg_print_fct_error("reg_getDeformationFromDisplacement");
+            reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
+            reg_exit();
+        }
+    } else if (field->datatype == NIFTI_TYPE_FLOAT64) {
+        switch (field->nu) {
+        case 2:
+            reg_getDeformationFromDisplacement_2D<double>(field);
+            break;
+        case 3:
+            reg_getDeformationFromDisplacement_3D<double>(field);
+            break;
+        default:
+            reg_print_fct_error("reg_getDeformationFromDisplacement");
+            reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
+            reg_exit();
+        }
+    } else {
+        reg_print_fct_error("reg_getDeformationFromDisplacement");
+        reg_print_msg_error("Only single or double floating precision have been implemented");
+        reg_exit();
+    }
 
-            // The initial position is subtracted from every values
-            ptrX[index] += static_cast<DTYPE>(xInit);
-            ptrY[index] += static_cast<DTYPE>(yInit);
-            ptrZ[index] += static_cast<DTYPE>(zInit);
-            index++;
-         }
-      }
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
-int reg_getDeformationFromDisplacement(nifti_image *field)
-{
-   if(field->datatype==NIFTI_TYPE_FLOAT32)
-   {
-      switch(field->nu)
-      {
-      case 2:
-         reg_getDeformationFromDisplacement_2D<float>(field);
-         break;
-      case 3:
-         reg_getDeformationFromDisplacement_3D<float>(field);
-         break;
-      default:
-         reg_print_fct_error("reg_getDeformationFromDisplacement");
-         reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
-         reg_exit();
-      }
-   }
-   else if(field->datatype==NIFTI_TYPE_FLOAT64)
-   {
-      switch(field->nu)
-      {
-      case 2:
-         reg_getDeformationFromDisplacement_2D<double>(field);
-         break;
-      case 3:
-         reg_getDeformationFromDisplacement_3D<double>(field);
-         break;
-      default:
-         reg_print_fct_error("reg_getDeformationFromDisplacement");
-         reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
-         reg_exit();
-      }
-   }
-   else
-   {
-      reg_print_fct_error("reg_getDeformationFromDisplacement");
-      reg_print_msg_error("Only single or double floating precision have been implemented");
-      reg_exit();
-   }
-
-   field->intent_code=NIFTI_INTENT_VECTOR;
-   memset(field->intent_name, 0, 16);
-   strcpy(field->intent_name,"NREG_TRANS");
-   if(field->intent_p1==DISP_FIELD)
-      field->intent_p1=DEF_FIELD;
-   if(field->intent_p1==DISP_VEL_FIELD)
-      field->intent_p1=DEF_VEL_FIELD;
-   return EXIT_SUCCESS;
+    field->intent_code = NIFTI_INTENT_VECTOR;
+    memset(field->intent_name, 0, 16);
+    strcpy(field->intent_name, "NREG_TRANS");
+    if (field->intent_p1 == DISP_FIELD)
+        field->intent_p1 = DEF_FIELD;
+    if (field->intent_p1 == DISP_VEL_FIELD)
+        field->intent_p1 = DEF_VEL_FIELD;
+    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DTYPE>
 void reg_setGradientToZero_core(nifti_image *image,
-                               bool x_axis,
-                               bool y_axis,
-                               bool z_axis)
-{
-   size_t voxel_number = (size_t)image->nx*image->ny*image->nz;
-   DTYPE *ptr = static_cast<DTYPE *>(image->data);
-   if(x_axis){
-      for(size_t i=0; i<voxel_number; ++i)
-         *ptr++=0;
-   }
-   else ptr += voxel_number;
-   if(y_axis){
-      for(size_t i=0; i<voxel_number; ++i)
-         *ptr++=0;
-   }
-   else ptr += voxel_number;
-   if(z_axis && image->nu>2){
-      for(size_t i=0; i<voxel_number; ++i)
-         *ptr++=0;
-   }
-   return;
+                                bool xAxis,
+                                bool yAxis,
+                                bool zAxis) {
+    size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    DTYPE *ptr = static_cast<DTYPE*>(image->data);
+    if (xAxis) {
+        for (size_t i = 0; i < voxelNumber; ++i)
+            *ptr++ = 0;
+    } else ptr += voxelNumber;
+    if (yAxis) {
+        for (size_t i = 0; i < voxelNumber; ++i)
+            *ptr++ = 0;
+    } else ptr += voxelNumber;
+    if (zAxis && image->nu > 2) {
+        for (size_t i = 0; i < voxelNumber; ++i)
+            *ptr++ = 0;
+    }
 }
 /* *************************************************************** */
 void reg_setGradientToZero(nifti_image *image,
-                           bool x_axis,
-                           bool y_axis,
-                           bool z_axis=false)
-{
-   // Ensure that the specified image is a 5D image
-   if(image->ndim != 5)
-   {
-      reg_print_fct_error("reg_setGradientToZero");
-      reg_print_msg_error("Input image is expected to be a 5D image");
-      reg_exit();
-   }
-   switch(image->datatype){
-   case NIFTI_TYPE_FLOAT32:
-      reg_setGradientToZero_core<float>(image, x_axis, y_axis, z_axis);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_setGradientToZero_core<double>(image, x_axis, y_axis, z_axis);
-      break;
-   default:
-      reg_print_fct_error("reg_setGradientToZero");
-      reg_print_msg_error("Input image is expected to be float or double");
-      reg_exit();
-   }
-   return;
+                           bool xAxis,
+                           bool yAxis,
+                           bool zAxis = false) {
+    // Ensure that the specified image is a 5D image
+    if (image->ndim != 5) {
+        reg_print_fct_error("reg_setGradientToZero");
+        reg_print_msg_error("Input image is expected to be a 5D image");
+        reg_exit();
+    }
+    switch (image->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_setGradientToZero_core<float>(image, xAxis, yAxis, zAxis);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_setGradientToZero_core<double>(image, xAxis, yAxis, zAxis);
+        break;
+    default:
+        reg_print_fct_error("reg_setGradientToZero");
+        reg_print_msg_error("Input image is expected to be float or double");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DTYPE>
-double reg_test_compare_arrays(DTYPE *ptrA,
-                              DTYPE *ptrB,
-                              size_t nvox)
-{
-   double maxDifference=0;
-
-   for(size_t i=0; i<nvox; ++i)
-   {
-      double valA=(double)ptrA[i];
-      double valB=(double)ptrB[i];
-      if(valA!=valA || valB!=valB)
-      {
-         if(valA==valA || valB==valB)
-         {
-            reg_print_fct_warn("reg_test_compare_arrays");
-            reg_print_msg_warn("Unexpected NaN in only one of the array");
-            return std::numeric_limits<float>::max();
-         }
-      }
-      else
-      {
-         if(valA!=0 && valB!=0)
-         {
-            double diffRatio=valA/valB;
-            if(diffRatio<0)
-            {
-               diffRatio=std::abs(valA-valB);
-               maxDifference=maxDifference>diffRatio?maxDifference:diffRatio;
+double reg_test_compare_arrays(const DTYPE *ptrA,
+                               const DTYPE *ptrB,
+                               size_t nvox) {
+    double maxDifference = 0;
+
+    for (size_t i = 0; i < nvox; ++i) {
+        const double valA = (double)ptrA[i];
+        const double valB = (double)ptrB[i];
+        if (valA != valA || valB != valB) {
+            if (valA == valA || valB == valB) {
+                reg_print_fct_warn("reg_test_compare_arrays");
+                reg_print_msg_warn("Unexpected NaN in only one of the array");
+                return std::numeric_limits<float>::max();
+            }
+        } else {
+            if (valA != 0 && valB != 0) {
+                double diffRatio = valA / valB;
+                if (diffRatio < 0) {
+                    diffRatio = std::abs(valA - valB);
+                    maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
+                }
+                diffRatio -= 1.0;
+                maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
+            } else {
+                double diffRatio = std::abs(valA - valB);
+                maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
             }
-            diffRatio-=1.0;
-            maxDifference=maxDifference>diffRatio?maxDifference:diffRatio;
-         }
-         else
-         {
-            double diffRatio=std::abs(valA-valB);
-            maxDifference=maxDifference>diffRatio?maxDifference:diffRatio;
-         }
-      }
-   }
-   return maxDifference;
-}
-template double reg_test_compare_arrays<float>(float *ptrA, float *ptrB, size_t nvox);
-template double reg_test_compare_arrays<double>(double *ptrA, double *ptrB, size_t nvox);
+        }
+    }
+    return maxDifference;
+}
+template double reg_test_compare_arrays<float>(const float*, const float*, size_t);
+template double reg_test_compare_arrays<double>(const double*, const double*, size_t);
+/* *************************************************************** */
+template <class DTYPE>
+double reg_test_compare_images1(const nifti_image *imgA,
+                                const nifti_image *imgB) {
+    const DTYPE *imgAPtr = static_cast<DTYPE*>(imgA->data);
+    const DTYPE *imgBPtr = static_cast<DTYPE*>(imgB->data);
+    return reg_test_compare_arrays<DTYPE>(imgAPtr, imgBPtr, imgA->nvox);
+}
+/* *************************************************************** */
+double reg_test_compare_images(const nifti_image *imgA,
+                               const nifti_image *imgB) {
+    if (imgA->datatype != imgB->datatype) {
+        reg_print_fct_error("reg_test_compare_images");
+        reg_print_msg_error("Input images have different datatype");
+        reg_exit();
+    }
+    if (imgA->nvox != imgB->nvox) {
+        reg_print_fct_error("reg_test_compare_images");
+        reg_print_msg_error("Input images have different size");
+        reg_exit();
+    }
+    switch (imgA->datatype) {
+    case NIFTI_TYPE_UINT8:
+        return reg_test_compare_images1<unsigned char>(imgA, imgB);
+    case NIFTI_TYPE_UINT16:
+        return reg_test_compare_images1<unsigned short>(imgA, imgB);
+    case NIFTI_TYPE_UINT32:
+        return reg_test_compare_images1<unsigned int>(imgA, imgB);
+    case NIFTI_TYPE_INT8:
+        return reg_test_compare_images1<char>(imgA, imgB);
+    case NIFTI_TYPE_INT16:
+        return reg_test_compare_images1<short>(imgA, imgB);
+    case NIFTI_TYPE_INT32:
+        return reg_test_compare_images1<int>(imgA, imgB);
+    case NIFTI_TYPE_FLOAT32:
+        return reg_test_compare_images1<float>(imgA, imgB);
+    case NIFTI_TYPE_FLOAT64:
+        return reg_test_compare_images1<double>(imgA, imgB);
+    default:
+        reg_print_fct_error("reg_test_compare_images");
+        reg_print_msg_error("Unsupported data type");
+        reg_exit();
+    }
+}
 /* *************************************************************** */
 template <class DTYPE>
-double reg_test_compare_images1(nifti_image *imgA,
-                               nifti_image *imgB)
-{
-   DTYPE *imgAPtr = static_cast<DTYPE *>(imgA->data);
-   DTYPE *imgBPtr = static_cast<DTYPE *>(imgB->data);
-   return reg_test_compare_arrays<DTYPE>(imgAPtr,imgBPtr,imgA->nvox);
-}
-/* *************************************************************** */
-double reg_test_compare_images(nifti_image *imgA,
-                              nifti_image *imgB)
-{
-   if(imgA->datatype!=imgB->datatype)
-   {
-      reg_print_fct_error("reg_test_compare_images");
-      reg_print_msg_error("Input images have different datatype");
-      reg_exit();
-   }
-   if(imgA->nvox!=imgB->nvox)
-   {
-      reg_print_fct_error("reg_test_compare_images");
-      reg_print_msg_error("Input images have different size");
-      reg_exit();
-   }
-   switch(imgA->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      return reg_test_compare_images1<unsigned char>(imgA,imgB);
-   case NIFTI_TYPE_UINT16:
-      return reg_test_compare_images1<unsigned short>(imgA,imgB);
-   case NIFTI_TYPE_UINT32:
-      return reg_test_compare_images1<unsigned int>(imgA,imgB);
-   case NIFTI_TYPE_INT8:
-      return reg_test_compare_images1<char>(imgA,imgB);
-   case NIFTI_TYPE_INT16:
-      return reg_test_compare_images1<short>(imgA,imgB);
-   case NIFTI_TYPE_INT32:
-      return reg_test_compare_images1<int>(imgA,imgB);
-   case NIFTI_TYPE_FLOAT32:
-      return reg_test_compare_images1<float>(imgA,imgB);
-   case NIFTI_TYPE_FLOAT64:
-      return reg_test_compare_images1<double>(imgA,imgB);
-   default:
-      reg_print_fct_error("reg_test_compare_images");
-      reg_print_msg_error("Unsupported data type");
-      reg_exit();
-   }
+void reg_tools_abs_image1(nifti_image *img) {
+    DTYPE *ptr = static_cast<DTYPE*>(img->data);
+    for (size_t i = 0; i < img->nvox; ++i)
+        ptr[i] = static_cast<DTYPE>(fabs(static_cast<double>(ptr[i])));
+}
+/* *************************************************************** */
+void reg_tools_abs_image(nifti_image *img) {
+    switch (img->datatype) {
+    case NIFTI_TYPE_UINT8:
+        reg_tools_abs_image1<unsigned char>(img);
+        break;
+    case NIFTI_TYPE_UINT16:
+        reg_tools_abs_image1<unsigned short>(img);
+        break;
+    case NIFTI_TYPE_UINT32:
+        reg_tools_abs_image1<unsigned int>(img);
+        break;
+    case NIFTI_TYPE_INT8:
+        reg_tools_abs_image1<char>(img);
+        break;
+    case NIFTI_TYPE_INT16:
+        reg_tools_abs_image1<short>(img);
+        break;
+    case NIFTI_TYPE_INT32:
+        reg_tools_abs_image1<int>(img);
+        break;
+    case NIFTI_TYPE_FLOAT32:
+        reg_tools_abs_image1<float>(img);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_tools_abs_image1<double>(img);
+        break;
+    default:
+        reg_print_fct_error("reg_tools_abs_image");
+        reg_print_msg_error("Unsupported data type");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
+void mat44ToCptr(const mat44& mat, float *cMat) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            cMat[i * 4 + j] = mat.m[i][j];
+        }
+    }
+}
 /* *************************************************************** */
-template <class DTYPE>
-void reg_tools_abs_image1(nifti_image *img)
-{
-   DTYPE *ptr = static_cast<DTYPE *>(img->data);
-   for(size_t i=0; i<img->nvox; ++i)
-      ptr[i]=static_cast<DTYPE>(fabs(static_cast<double>(ptr[i])));
-}
-/* *************************************************************** */
-void reg_tools_abs_image(nifti_image *img)
-{
-   switch(img->datatype)
-   {
-   case NIFTI_TYPE_UINT8:
-      reg_tools_abs_image1<unsigned char>(img);
-      break;
-   case NIFTI_TYPE_UINT16:
-      reg_tools_abs_image1<unsigned short>(img);
-      break;
-   case NIFTI_TYPE_UINT32:
-      reg_tools_abs_image1<unsigned int>(img);
-      break;
-   case NIFTI_TYPE_INT8:
-      reg_tools_abs_image1<char>(img);
-      break;
-   case NIFTI_TYPE_INT16:
-      reg_tools_abs_image1<short>(img);
-      break;
-   case NIFTI_TYPE_INT32:
-      reg_tools_abs_image1<int>(img);
-      break;
-   case NIFTI_TYPE_FLOAT32:
-      reg_tools_abs_image1<float>(img);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_tools_abs_image1<double>(img);
-      break;
-   default:
-      reg_print_fct_error("reg_tools_abs_image");
-      reg_print_msg_error("Unsupported data type");
-      reg_exit();
-   }
-}
-/* *************************************************************** */
-void mat44ToCptr(const mat44& mat, float* cMat)
-{
-	for (int i = 0; i < 4; i++)
-	{
-		for (int j = 0; j < 4; j++)
-		{
-			cMat[i * 4 + j] = mat.m[i][j];
-		}
-	}
-}
-/* *************************************************************** */
-void cPtrToMat44(mat44 *mat, float* cMat)
-{
-	for (int i = 0; i < 4; i++)
-	{
-		for (int j = 0; j < 4; j++)
-		{
-			 mat->m[i][j]=cMat[i * 4 + j];
-		}
-	}
-}
-/* *************************************************************** */
-void mat33ToCptr(mat33 *mat, float* cMat, const unsigned int numMats)
-{
-	for (size_t k = 0; k < numMats; k++)
-	{
-		for (int i = 0; i < 3; i++)
-		{
-			for (int j = 0; j < 3; j++)
-			{
-				cMat[9*k +i * 3 + j] = mat[k].m[i][j];
-
-			}
-		}
-	}
-}
-/* *************************************************************** */
-void cPtrToMat33(mat33 *mat, float* cMat)
-{
-    for (int i = 0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
-             mat->m[i][j]=cMat[i * 3 + j];
+void cPtrToMat44(mat44 *mat, const float *cMat) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            mat->m[i][j] = cMat[i * 4 + j];
+        }
+    }
+}
+/* *************************************************************** */
+void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats) {
+    for (size_t k = 0; k < numMats; k++) {
+        for (int i = 0; i < 3; i++) {
+            for (int j = 0; j < 3; j++) {
+                cMat[9 * k + i * 3 + j] = mat[k].m[i][j];
+            }
+        }
+    }
+}
+/* *************************************************************** */
+void cPtrToMat33(mat33 *mat, const float *cMat) {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            mat->m[i][j] = cMat[i * 3 + j];
         }
     }
 }
 /* *************************************************************** */
 template<typename T>
-void matmnToCptr(T** mat, T* cMat, unsigned int m, unsigned int n) {
-    for (unsigned int i = 0; i < m; i++)
-    {
-        for (unsigned int j = 0; j < n; j++)
-        {
+void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n) {
+    for (unsigned int i = 0; i < m; i++) {
+        for (unsigned int j = 0; j < n; j++) {
             cMat[i * n + j] = mat[i][j];
         }
     }
 }
-template void matmnToCptr<float>(float** mat, float* cMat, unsigned int m, unsigned int n);
-template void matmnToCptr<double>(double** mat, double* cMat, unsigned int m, unsigned int n);
+template void matmnToCptr<float>(const float**, float*, unsigned int, unsigned int);
+template void matmnToCptr<double>(const double**, double*, unsigned int, unsigned int);
 /* *************************************************************** */
 template<typename T>
-void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n) {
-    for (unsigned int i = 0; i < m; i++)
-    {
-        for (unsigned int j = 0; j < n; j++)
-        {
-             mat[i][j]=cMat[i * n + j];
+void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n) {
+    for (unsigned int i = 0; i < m; i++) {
+        for (unsigned int j = 0; j < n; j++) {
+            mat[i][j] = cMat[i * n + j];
         }
     }
 }
-template void cPtrToMatmn<float>(float** mat, float* cMat, unsigned int m, unsigned int n);
-template void cPtrToMatmn<double>(double** mat, double* cMat, unsigned int m, unsigned int n);
+template void cPtrToMatmn<float>(float**, const float*, unsigned int, unsigned int);
+template void cPtrToMatmn<double>(double**, const double*, unsigned int, unsigned int);
 /* *************************************************************** */
-void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z)
-{
-    x =  index % (maxValue_x+1);
-    index /= (maxValue_x+1);
-    y = index % (maxValue_y+1);
-    index /= (maxValue_y+1);
+void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) {
+    x = index % (maxValue_x + 1);
+    index /= (maxValue_x + 1);
+    y = index % (maxValue_y + 1);
+    index /= (maxValue_y + 1);
     z = index;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 5e3228f6..b1d40511 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -41,7 +41,7 @@ void reg_checkAndCorrectDimension(nifti_image *image);
  * false otherwise.
  */
 extern "C++"
-bool reg_isAnImageFileName(char *name);
+bool reg_isAnImageFileName(const char *name);
 /* *************************************************************** */
 /** @brief Rescale an input image between two user-defined values.
  * Some threshold can also be applied concurrently
@@ -124,7 +124,7 @@ void reg_downsampleImage(nifti_image *image,
  * euclidean distance
  */
 extern "C++" template <class PrecisionTYPE>
-PrecisionTYPE reg_getMaximalLength(nifti_image *image);
+PrecisionTYPE reg_getMaximalLength(const nifti_image *image);
 /* *************************************************************** */
 /** @brief Change the datatype of a nifti image
  * @param image Image to be updated.
@@ -140,20 +140,20 @@ void reg_tools_changeDatatype(nifti_image *image,
  * between the first and second image.
  */
 extern "C++"
-void reg_tools_addImageToImage(nifti_image *img1,
-                               nifti_image *img2,
+void reg_tools_addImageToImage(const nifti_image *img1,
+                               const nifti_image *img2,
                                nifti_image *out);
 /* *************************************************************** */
-/** @brief Substract two images.
+/** @brief Subtract two images.
  * @param img1 First image to consider
  * @param img2 Second image to consider
  * @param out Result image that contains the result of the operation
  * between the first and second image.
  */
 extern "C++"
-void reg_tools_substractImageToImage(nifti_image *img1,
-                                     nifti_image *img2,
-                                     nifti_image *out);
+void reg_tools_subtractImageFromImage(const nifti_image *img1,
+                                      const nifti_image *img2,
+                                      nifti_image *out);
 /* *************************************************************** */
 /** @brief Multiply two images.
  * @param img1 First image to consider
@@ -162,8 +162,8 @@ void reg_tools_substractImageToImage(nifti_image *img1,
  * between the first and second image.
  */
 extern "C++"
-void reg_tools_multiplyImageToImage(nifti_image *img1,
-                                    nifti_image *img2,
+void reg_tools_multiplyImageToImage(const nifti_image *img1,
+                                    const nifti_image *img2,
                                     nifti_image *out);
 /* *************************************************************** */
 /** @brief Divide two images.
@@ -173,47 +173,47 @@ void reg_tools_multiplyImageToImage(nifti_image *img1,
  * between the first and second image.
  */
 extern "C++"
-void reg_tools_divideImageToImage(nifti_image *img1,
-                                  nifti_image *img2,
+void reg_tools_divideImageToImage(const nifti_image *img1,
+                                  const nifti_image *img2,
                                   nifti_image *out);
 /* *************************************************************** */
 /** @brief Add a scalar to all image intensity
- * @param img1 Input image
+ * @param img Input image
  * @param out Result image that contains the result of the operation.
  * @param val Value to be added to input image
  */
 extern "C++"
-void reg_tools_addValueToImage(nifti_image *img1,
+void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *out,
                                float val);
 /* *************************************************************** */
-/** @brief Substract a scalar to all image intensity
- * @param img1 Input image
+/** @brief Subtract a scalar from all image intensity
+ * @param img Input image
  * @param out Result image that contains the result of the operation.
- * @param val Value to be substracted to input image
+ * @param val Value to be subtracted from input image
  */
 extern "C++"
-void reg_tools_substractValueToImage(nifti_image *img1,
-                                     nifti_image *out,
-                                     float val);
+void reg_tools_subtractValueFromImage(const nifti_image *img,
+                                      nifti_image *out,
+                                      float val);
 /* *************************************************************** */
 /** @brief Multiply a scalar to all image intensity
- * @param img1 Input image
+ * @param img Input image
  * @param out Result image that contains the result of the operation.
  * @param val Value to be multiplied to input image
  */
 extern "C++"
-void reg_tools_multiplyValueToImage(nifti_image *img1,
+void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *out,
                                     float val);
 /* *************************************************************** */
-/** @brief Mivide a scalar to all image intensity
- * @param img1 Input image
+/** @brief Divide a scalar to all image intensity
+ * @param img Input image
  * @param out Result image that contains the result of the operation.
  * @param val Value to be divided to input image
  */
 extern "C++"
-void reg_tools_divideValueToImage(nifti_image *img1,
+void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *out,
                                   float val);
 /* *************************************************************** */
@@ -247,7 +247,7 @@ void reg_tools_binarise_image(nifti_image *img,
  * mask
  */
 extern "C++"
-void reg_tools_binaryImage2int(nifti_image *img,
+void reg_tools_binaryImage2int(const nifti_image *img,
                                int *array,
                                int& activeVoxelNumber);
 /* *************************************************************** */
@@ -258,19 +258,19 @@ void reg_tools_binaryImage2int(nifti_image *img,
  * @return Mean root mean squared error values returned
  */
 extern "C++"
-double reg_tools_getMeanRMS(nifti_image *imgA,
-                            nifti_image *imgB);
+double reg_tools_getMeanRMS(const nifti_image *imgA,
+                            const nifti_image *imgB);
 /* *************************************************************** */
 /** @brief Set all voxels from an image to NaN if the voxel
- * bellong to the mask
+ * belong to the mask
  * @param img Input image to be masked with NaN value
  * @param mask Input mask that defines which voxels
  * have to be set to NaN
  * @param res Output image
  */
 extern "C++"
-int reg_tools_nanMask_image(nifti_image *img,
-                            nifti_image *mask,
+int reg_tools_nanMask_image(const nifti_image *img,
+                            const nifti_image *mask,
                             nifti_image *res);
 /* *************************************************************** */
 /** @brief Set all the voxel with NaN value in the input image to
@@ -279,7 +279,7 @@ int reg_tools_nanMask_image(nifti_image *img,
  * @param mask Input mask which is updated in place
  */
 extern "C++"
-int reg_tools_removeNanFromMask(nifti_image *image, int *mask);
+int reg_tools_removeNanFromMask(const nifti_image *image, int *mask);
 /* *************************************************************** */
 /** @brief Get the minimal value of an image
  * @param img Input image
@@ -287,7 +287,7 @@ int reg_tools_removeNanFromMask(nifti_image *image, int *mask);
  * @return min value
  */
 extern "C++"
-float reg_tools_getMinValue(nifti_image *img, int timepoint);
+float reg_tools_getMinValue(const nifti_image *img, int timepoint);
 /* *************************************************************** */
 /** @brief Get the maximal value of an image
  * @param img Input image
@@ -295,21 +295,21 @@ float reg_tools_getMinValue(nifti_image *img, int timepoint);
  * @return max value
  */
 extern "C++"
-float reg_tools_getMaxValue(nifti_image *img, int timepoint);
+float reg_tools_getMaxValue(const nifti_image *img, int timepoint);
 /* *************************************************************** */
 /** @brief Get the mean value of an image
  * @param img Input image
  * @return mean value
  */
 extern "C++"
-float reg_tools_getMeanValue(nifti_image *img);
+float reg_tools_getMeanValue(const nifti_image *img);
 /* *************************************************************** */
 /** @brief Get the std value of an image
  * @param img Input image
  * @return std value
  */
 extern "C++"
-float reg_tools_getSTDValue(nifti_image *img);
+float reg_tools_getSTDValue(const nifti_image *img);
 /* *************************************************************** */
 /** @brief Generate a pyramid from an input image.
  * @param input Input image to be downsampled to create the pyramid
@@ -321,7 +321,7 @@ float reg_tools_getSTDValue(nifti_image *img);
  * the registration.
  */
 extern "C++" template<class DTYPE>
-int reg_createImagePyramid(nifti_image *input,
+int reg_createImagePyramid(const nifti_image *input,
                            nifti_image **pyramid,
                            unsigned int levelNumber,
                            unsigned int levelToPerform);
@@ -338,7 +338,7 @@ int reg_createImagePyramid(nifti_image *input,
  * voxel for each level of the pyramid
  */
 extern "C++" template<class DTYPE>
-int reg_createMaskPyramid(nifti_image *input,
+int reg_createMaskPyramid(const nifti_image *input,
                           int **pyramid,
                           unsigned int levelNumber,
                           unsigned int levelToPerform,
@@ -359,7 +359,7 @@ void reg_thresholdImage(nifti_image *image,
                         T lowThr,
                         T upThr);
 /* *************************************************************** */
-/** @brief This function flipp the specified axis
+/** @brief This function flip the specified axis
  * @param image Input image to be flipped
  * @param array Array that will contain the flipped
  * input image->data array
@@ -367,9 +367,9 @@ void reg_thresholdImage(nifti_image *image,
  * to flip (xyztuvw)
  */
 extern "C++"
-void reg_flippAxis(nifti_image *image,
-                   void *array,
-                   std::string cmd);
+void reg_flipAxis(const nifti_image *image,
+                  void **outputArray,
+                  const std::string& cmd);
 /* *************************************************************** */
 /** @brief This function converts an image containing deformation
  * field into a displacement field
@@ -391,15 +391,15 @@ int reg_getDeformationFromDisplacement(nifti_image *image);
 /* *************************************************************** */
 /** @brief Set the gradient value along specified direction to zero
  * @param image Input Image that will be modified
- * @param x_axis Boolean to specified if the x-axis has to be zeroed
- * @param y_axis Boolean to specified if the y-axis has to be zeroed
- * @param z_axis Boolean to specified if the z-axis has to be zeroed
+ * @param xAxis Boolean to specified if the x-axis has to be zeroed
+ * @param yAxis Boolean to specified if the y-axis has to be zeroed
+ * @param zAxis Boolean to specified if the z-axis has to be zeroed
  */
 extern "C++"
 void reg_setGradientToZero(nifti_image *image,
-                           bool x_axis,
-                           bool y_axis,
-                           bool z_axis);
+                           bool xAxis,
+                           bool yAxis,
+                           bool zAxis);
 /* *************************************************************** */
 /* *************************************************************** */
 /** @brief The functions returns the largest ratio between two arrays
@@ -407,8 +407,8 @@ void reg_setGradientToZero(nifti_image *image,
  * If A or B are zeros then the (A-B) value is returned.
  */
 extern "C++" template<class DTYPE>
-double reg_test_compare_arrays(DTYPE *ptrA,
-                               DTYPE *ptrB,
+double reg_test_compare_arrays(const DTYPE *ptrA,
+                               const DTYPE *ptrB,
                                size_t nvox);
 /* *************************************************************** */
 /** @brief The functions returns the largest ratio between input image intensities
@@ -416,8 +416,8 @@ double reg_test_compare_arrays(DTYPE *ptrA,
  * If A or B are zeros then the (A-B) value is returned.
  */
 extern "C++"
-double reg_test_compare_images(nifti_image *imgA,
-                               nifti_image *imgB);
+double reg_test_compare_images(const nifti_image *imgA,
+                               const nifti_image *imgB);
 /* *************************************************************** */
 /** @brief The absolute operator is applied to the input image
  */
@@ -428,19 +428,19 @@ extern "C++"
 void mat44ToCptr(const mat44& mat, float *cMat);
 /* *************************************************************** */
 extern "C++"
-void cPtrToMat44(mat44 *mat, float *cMat);
+void cPtrToMat44(mat44 *mat, const float *cMat);
 /* *************************************************************** */
 extern "C++"
-void mat33ToCptr(mat33 *mat, float *cMat, const unsigned int numMats);
+void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats);
 /* *************************************************************** */
 extern "C++"
-void cPtrToMat33(mat33 *mat, float *cMat);
+void cPtrToMat33(mat33 *mat, const float *cMat);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void matmnToCptr(T **mat, T *cMat, unsigned int m, unsigned int n);
+void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void cPtrToMatmn(T **mat, T *cMat, unsigned int m, unsigned int n);
+void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
-void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z);
+void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
 /* *************************************************************** */
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
index 38aef179..2cde3356 100644
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ b/reg-test/reg_test_bspline_deformation_field.cpp
@@ -77,7 +77,7 @@ int main(int argc, char **argv)
     // Compute the difference between the computed and expected deformation fields
     nifti_image *diff_field = nifti_copy_nim_info(expectedDefField);
     diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
-    reg_tools_substractImageToImage(expectedDefField, test_field, diff_field);
+    reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
 
diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp
index cc17aec9..1f924e41 100644
--- a/reg-test/reg_test_changeDataType.cpp
+++ b/reg-test/reg_test_changeDataType.cpp
@@ -81,7 +81,7 @@ int main(int argc, char **argv)
     }
     //
     // Compute the difference between the computed and inputed deformation field
-    reg_tools_substractImageToImage(referenceImage, expectedImage, expectedImage);
+    reg_tools_subtractImageFromImage(referenceImage, expectedImage, expectedImage);
     reg_tools_abs_image(expectedImage);
     double max_difference = reg_tools_getMaxValue(expectedImage, -1);
 
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index 78793df5..daddd286 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -109,7 +109,7 @@ int main(int argc, char **argv)
     // Compute the difference between the computed and inputted deformation field
     nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
     diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
-    reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field);
+    reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 9b03bc8c..dd879f87 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -131,7 +131,7 @@ int main(int argc, char **argv)
     diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper);
 
     // Compute the difference between the computed and inputed warped image
-    reg_tools_substractImageToImage(cpu_warped, gpu_warped, diff_field);
+    reg_tools_subtractImageFromImage(cpu_warped, gpu_warped, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp
index 26349806..2833f82e 100644
--- a/reg-test/reg_test_compose_deformation_field.cpp
+++ b/reg-test/reg_test_compose_deformation_field.cpp
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
                         nullptr);
 
    // Compute the difference between the computed and inputed deformation field
-   reg_tools_substractImageToImage(inputComFieldImage,test_field,test_field);
+   reg_tools_subtractImageFromImage(inputComFieldImage,test_field,test_field);
    reg_tools_abs_image(test_field);
    double max_difference=reg_tools_getMaxValue(test_field);
 
diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp
index 4c2a509f..065261b6 100644
--- a/reg-test/reg_test_convolution.cpp
+++ b/reg-test/reg_test_convolution.cpp
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
     // Compute the difference between the computed and expected deformation fields
     nifti_image *diff_file = nifti_copy_nim_info(expectedFile);
     diff_file->data = (void *) malloc(diff_file->nvox*diff_file->nbyper);
-    reg_tools_substractImageToImage(expectedFile, referenceImage, diff_file);
+    reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file);
     reg_tools_abs_image(diff_file);
     double max_difference = reg_tools_getMaxValue(diff_file, -1);
 
diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp
index 3910fd8a..136e3307 100644
--- a/reg-test/reg_test_fullNonlinear.cpp
+++ b/reg-test/reg_test_fullNonlinear.cpp
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
    }
 
    // Compute the difference between the computed and inputed deformation field
-   reg_tools_substractImageToImage(inputControlPointGridImage,
+   reg_tools_subtractImageFromImage(inputControlPointGridImage,
                                    nonlinear->GetControlPointPositionImage(),
                                    inputControlPointGridImage);
    reg_tools_abs_image(inputControlPointGridImage);
diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp
index 3f86334a..1becd432 100644
--- a/reg-test/reg_test_fullSymNonlinear.cpp
+++ b/reg-test/reg_test_fullSymNonlinear.cpp
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
    }
 
    // Compute the difference between the computed and inputed deformation field
-   reg_tools_substractImageToImage(inputControlPointGridImage,
+   reg_tools_subtractImageFromImage(inputControlPointGridImage,
                                    nonlinear->GetControlPointPositionImage(),
                                    inputControlPointGridImage);
    reg_tools_abs_image(inputControlPointGridImage);
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index ad732158..fb72dc65 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -145,7 +145,7 @@ int main(int argc, char **argv)
     free(mask);
 
     //Compute the difference between the computed and expected image
-    reg_tools_substractImageToImage(gradientImage, expectedImage, expectedImage);
+    reg_tools_subtractImageFromImage(gradientImage, expectedImage, expectedImage);
 
     // Extract the maximal absolute value
     reg_tools_abs_image(expectedImage);
diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp
index 9a10a005..17a0d9da 100644
--- a/reg-test/reg_test_linearElasticityGradient.cpp
+++ b/reg-test/reg_test_linearElasticityGradient.cpp
@@ -61,7 +61,7 @@ int main(int argc, char **argv)
     // Compute the difference between the computed and expected gradient
     nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient);
     diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper);
-    reg_tools_substractImageToImage(obtainedGradient, expectedGradientImage, diff_field);
+    reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
 
diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp
index 37c90641..bd33496c 100644
--- a/reg-test/reg_test_mindDescriptor.cpp
+++ b/reg-test/reg_test_mindDescriptor.cpp
@@ -49,7 +49,7 @@ int main(int argc, char **argv)
     //
     //Compute the difference between the computed and expected image
     //
-    reg_tools_substractImageToImage(MIND_img, expectedImage, expectedImage);
+    reg_tools_subtractImageFromImage(MIND_img, expectedImage, expectedImage);
     reg_tools_abs_image(expectedImage);
     double max_difference = reg_tools_getMaxValue(expectedImage, -1);
 
diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp
index 11d9a81c..2da9a047 100644
--- a/reg-test/reg_test_mindsscDescriptor.cpp
+++ b/reg-test/reg_test_mindsscDescriptor.cpp
@@ -53,7 +53,7 @@ int main(int argc, char **argv)
     //
     //Compute the difference between the computed and expected image
     //
-    reg_tools_substractImageToImage(MINDSSC_img, expectedImage, expectedImage);
+    reg_tools_subtractImageFromImage(MINDSSC_img, expectedImage, expectedImage);
     reg_tools_abs_image(expectedImage);
     double max_difference = reg_tools_getMaxValue(expectedImage, -1);
 
diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp
index 18f80687..63e47f20 100644
--- a/reg-test/reg_test_nonlinear_deformation_field.cpp
+++ b/reg-test/reg_test_nonlinear_deformation_field.cpp
@@ -57,7 +57,7 @@ int main(int argc, char **argv)
                                   true);
 
    // Compute the difference between the computed and inputed deformation field
-   reg_tools_substractImageToImage(inputDeformationField,test_field,test_field);
+   reg_tools_subtractImageFromImage(inputDeformationField,test_field,test_field);
    reg_tools_abs_image(test_field);
    double max_difference=reg_tools_getMaxValue(test_field);
 

From 3781bb465721349d511bdc77750d243111d11c9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 31 Jan 2023 13:51:21 +0000
Subject: [PATCH 041/314] Remove reg_base::currentLevel variable

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/_reg_base.cpp      |  6 +++---
 reg-lib/_reg_base.h        |  5 ++---
 reg-lib/_reg_f3d.cpp       | 10 +++++-----
 reg-lib/_reg_f3d.h         |  4 ++--
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a2ecc456..bb793653 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-154
+155
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 20d83c46..5c5f5256 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1033,7 +1033,7 @@ void reg_base<T>::Run() {
     maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1);
 
     // Loop over the different resolution level to perform
-    for (currentLevel = 0; currentLevel < levelToPerform; currentLevel++) {
+    for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) {
         // Set the current input images
         nifti_image *reference;
         nifti_image *floating;
@@ -1049,13 +1049,13 @@ void reg_base<T>::Run() {
         }
 
         // The grid is refined if necessary
-        T maxStepSize = InitialiseCurrentLevel(reference);
+        T maxStepSize = InitialiseCurrentLevel(currentLevel, reference);
         T currentSize = maxStepSize;
         T smallestSize = maxStepSize / (T)100.0;
 
         InitContent(reference, floating, mask);
 
-        DisplayCurrentLevelParameters();
+        DisplayCurrentLevelParameters(currentLevel);
 
         // Initialise the measures of similarity
         InitialiseSimilarity();
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 6bd92be9..d20df983 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -97,7 +97,6 @@ class reg_base: public InterfaceOptimiser {
     nifti_image **floatingPyramid;
     int **maskPyramid;
     int *activeVoxelNumber;
-    unsigned int currentLevel;
 
     double bestWMeasure;
     double currentWMeasure;
@@ -123,7 +122,7 @@ class reg_base: public InterfaceOptimiser {
 
     // Pure virtual functions
     virtual void SetOptimiser() = 0;
-    virtual T InitialiseCurrentLevel(nifti_image *reference) = 0;
+    virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) = 0;
     virtual void SmoothGradient() = 0;
     virtual void GetDeformationField() = 0;
     virtual void GetApproximatedGradient() = 0;
@@ -132,7 +131,7 @@ class reg_base: public InterfaceOptimiser {
     virtual T NormaliseGradient() = 0;
     virtual void GetSimilarityMeasureGradient() = 0;
     virtual void GetObjectiveFunctionGradient() = 0;
-    virtual void DisplayCurrentLevelParameters() = 0;
+    virtual void DisplayCurrentLevelParameters(int currentLevel) = 0;
     virtual void UpdateBestObjFunctionValue() = 0;
     virtual void PrintCurrentObjFunctionValue(T) = 0;
     virtual void PrintInitialObjFunctionValue() = 0;
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 4816aee4..1c61e0c8 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -109,7 +109,7 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 }
 /* *************************************************************** */
 template<class T>
-T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
+T reg_f3d<T>::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) {
     // Set the initial step size for the gradient ascent
     T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy;
     if (reference->ndim > 2)
@@ -117,7 +117,7 @@ T reg_f3d<T>::InitialiseCurrentLevel(nifti_image *reference) {
 
     // Refine the control point grid if required
     if (gridRefinement) {
-        if (this->currentLevel == 0) {
+        if (currentLevel == 0) {
             bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16, this->levelNumber - 1));
             linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3, this->levelNumber - 1));
         } else {
@@ -551,14 +551,14 @@ T reg_f3d<T>::NormaliseGradient() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::DisplayCurrentLevelParameters() {
+void reg_f3d<T>::DisplayCurrentLevelParameters(int currentLevel) {
 #ifdef NDEBUG
     if (this->verbose) {
 #endif
         nifti_image *reference = this->con->Content::GetReference();
         nifti_image *floating = this->con->Content::GetFloating();
         char text[255];
-        sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber);
+        sprintf(text, "Current level: %i / %i", currentLevel + 1, this->levelNumber);
         reg_print_info(this->executableName, text);
         sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber);
         reg_print_info(this->executableName, text);
@@ -692,7 +692,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
         reg_exit();
     }
 
-    InitialiseCurrentLevel(this->inputReference);
+    InitialiseCurrentLevel(-1, this->inputReference);
     InitContent(this->inputReference, this->inputFloating, nullptr);
 
     this->WarpFloatingImage(3); // cubic spline interpolation
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 00d16a03..a884a2d4 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -33,14 +33,14 @@ class reg_f3d: public reg_base<T> {
     double bestWBE;
     double bestWLE;
 
-    virtual T InitialiseCurrentLevel(nifti_image *reference) override;
+    virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) override;
     virtual T NormaliseGradient() override;
     virtual void SmoothGradient() override;
     virtual void GetObjectiveFunctionGradient() override;
     virtual void GetApproximatedGradient() override;
     virtual void GetSimilarityMeasureGradient() override;
     virtual void GetDeformationField() override;
-    virtual void DisplayCurrentLevelParameters() override;
+    virtual void DisplayCurrentLevelParameters(int currentLevel) override;
     virtual double GetObjectiveFunctionValue() override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual void UpdateParameters(float) override;

From e34829993f12bdfa7499d79628c651aaa9a4b305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 31 Jan 2023 15:56:41 +0000
Subject: [PATCH 042/314] Make reg_f3d2 platform independent

---
 niftyreg_build_version.txt   |    2 +-
 reg-apps/reg_f3d.cpp         |   26 +-
 reg-lib/CMakeLists.txt       |    4 +-
 reg-lib/Compute.cpp          |  207 +++++-
 reg-lib/Compute.h            |   11 +-
 reg-lib/Content.cpp          |    2 +-
 reg-lib/Content.h            |   16 +-
 reg-lib/Measure.cpp          |    8 +-
 reg-lib/Measure.h            |    2 +-
 reg-lib/Platform.cpp         |   24 +-
 reg-lib/Platform.h           |    3 +-
 reg-lib/_reg_base.cpp        |   60 +-
 reg-lib/_reg_base.h          |   19 +-
 reg-lib/_reg_f3d.cpp         |  128 ++--
 reg-lib/_reg_f3d.h           |    8 +-
 reg-lib/_reg_f3d2.cpp        | 1311 +++++++---------------------------
 reg-lib/_reg_f3d2.h          |   59 +-
 reg-lib/cuda/CudaCompute.cpp |   81 ++-
 reg-lib/cuda/CudaCompute.h   |    8 +-
 reg-lib/cuda/CudaMeasure.cpp |   35 +-
 reg-lib/cuda/CudaMeasure.h   |    2 +-
 21 files changed, 695 insertions(+), 1321 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bb793653..91b629b0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-155
+156
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 69da6cc2..d1dd67b2 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -12,10 +12,10 @@
 
 #include "_reg_ReadWriteImage.h"
 #include "_reg_ReadWriteMatrix.h"
-#include "_reg_f3d.h"
+#include "_reg_f3d2.h"
 #include "reg_f3d.h"
 #include <float.h>
- //#include <libgen.h> //DOES NOT WORK ON WINDOWS !
+// #include <libgen.h> //DOES NOT WORK ON WINDOWS !
 
 #ifdef _WIN32
 #   include <time.h>
@@ -51,7 +51,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "***************");
     reg_print_info(exec, "*** Initial transformation options (One option will be considered):");
     reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains an affine transformation (Affine*Reference=Floating)");
-    reg_print_info(exec, "\t-incpp <filename>\tFilename ofloatf control point grid input");
+    reg_print_info(exec, "\t-incpp <filename>\tFilename of the control point grid input");
     reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file.");
     reg_print_info(exec, "");
     reg_print_info(exec, "*** Output options:");
@@ -116,8 +116,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
     reg_print_info(exec, "\t-wSim <filename>\tWeight to apply to the measure of similarity at each voxel position");
 
-
-    //   reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
+    // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
     reg_print_info(exec, "");
     reg_print_info(exec, "*** Optimisation options:");
     reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iteration at the final level [150]");
@@ -133,8 +132,8 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
     reg_print_info(exec, "");
 
-    reg_print_info(exec, "*** Platform options:");
 #if defined(_USE_CUDA) && defined(_USE_OPENCL)
+    reg_print_info(exec, "*** Platform options:");
     reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
 #else
 #ifdef _USE_CUDA
@@ -149,7 +148,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
 #endif
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     reg_print_info(exec, "");
     reg_print_info(exec, "*** OpenMP-related options:");
     int defaultOpenMPValue = omp_get_num_procs();
@@ -285,16 +284,11 @@ int main(int argc, char **argv) {
     unsigned gpuIdx = 999;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
-            // reg = new reg_f3d2<float>(referenceImage->nt, floatingImage->nt);
-            break;
-        }
-        if (strcmp(argv[i], "-sym") == 0 || strcmp(argv[i], "--sym") == 0) {
-            // reg = new reg_f3d_sym<float>(referenceImage->nt, floatingImage->nt);
-            break;
+            reg = new reg_f3d2<float>(referenceImage->nt, floatingImage->nt);
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
             PlatformType value{atoi(argv[++i])};
-            if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::OpenCl)) {
-                reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
+            if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::Cuda)) {
+                reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA");
                 return EXIT_FAILURE;
             }
 #ifndef _USE_CUDA
@@ -663,7 +657,7 @@ int main(int argc, char **argv) {
                  strcmp(argv[i], "-Version") != 0 && strcmp(argv[i], "-V") != 0 &&
                  strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 &&
                  strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 &&
-                 strcmp(argv[i], "-vel") != 0 && strcmp(argv[i], "-sym") != 0) {
+                 strcmp(argv[i], "-vel") != 0) {
             reg_print_msg_error("\tParameter unknown:");
             reg_print_msg_error(argv[i]);
             PetitUsage((argv[0]));
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 0d688304..73e59e8d 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -237,8 +237,8 @@ set(_reg_f3d_files
   _reg_base.h
   _reg_f3d.cpp
   _reg_f3d.h
-  # _reg_f3d2.cpp
-  # _reg_f3d2.h
+  _reg_f3d2.cpp
+  _reg_f3d2.h
 )
 set(_reg_f3d_libraries
   _reg_blockMatching
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index be7fb254..f37634e8 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -131,16 +131,6 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
                          activeTimepoint);
 }
 /* *************************************************************** */
-void Compute::VoxelCentricToNodeCentric(float weight) {
-    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating());
-    reg_voxelCentric2NodeCentric(con.GetTransformationGradient(),
-                                 con.GetVoxelBasedMeasureGradient(),
-                                 weight,
-                                 false, // no update
-                                 reorientation);
-}
-/* *************************************************************** */
 double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
@@ -159,6 +149,13 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
+void Compute::SmoothGradient(float sigma) {
+    if (sigma != 0) {
+        sigma = fabs(sigma);
+        reg_tools_kernelConvolution(dynamic_cast<F3dContent&>(con).GetTransformationGradient(), &sigma, GAUSSIAN_KERNEL);
+    }
+}
+/* *************************************************************** */
 template<typename Type>
 void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
@@ -199,3 +196,193 @@ void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     }
 }
 /* *************************************************************** */
+void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    reg_spline_getDefFieldFromVelocityGrid(con.GetControlPointGrid(),
+                                           con.GetDeformationField(),
+                                           updateStepNumber);
+}
+/* *************************************************************** */
+void Compute::ConvolveImage(nifti_image *image) {
+    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
+    const int kernelType = CUBIC_SPLINE_KERNEL;
+    float currentNodeSpacing[3];
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
+    bool activeAxis[3] = {1, 0, 0};
+    reg_tools_kernelConvolution(image,
+                                currentNodeSpacing,
+                                kernelType,
+                                nullptr, // mask
+                                nullptr, // all volumes are considered as active
+                                activeAxis);
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    reg_tools_kernelConvolution(image,
+                                currentNodeSpacing,
+                                kernelType,
+                                nullptr, // mask
+                                nullptr, // all volumes are considered as active
+                                activeAxis);
+    // Convolution along the z axis if required
+    if (image->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        reg_tools_kernelConvolution(image,
+                                    currentNodeSpacing,
+                                    kernelType,
+                                    nullptr, // mask
+                                    nullptr, // all volumes are considered as active
+                                    activeAxis);
+    }
+}
+/* *************************************************************** */
+void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    ConvolveImage(con.GetVoxelBasedMeasureGradient());
+
+    // The node-based NMI gradient is extracted
+    mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating());
+    reg_voxelCentric2NodeCentric(con.GetTransformationGradient(),
+                                 con.GetVoxelBasedMeasureGradient(),
+                                 weight,
+                                 false, // no update
+                                 reorientation);
+}
+/* *************************************************************** */
+void Compute::ExponentiateGradient(Content& conBwIn) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    F3dContent& conBw = dynamic_cast<F3dContent&>(conBwIn);
+    const nifti_image *deformationField = con.Content::GetDeformationField();
+    nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
+    nifti_image *controlPointGridBw = conBw.GetControlPointGrid();
+    mat44 *affineTransformationBw = conBw.GetTransformationMatrix();
+    const size_t compNum = size_t(fabs(controlPointGridBw->intent_p2)); // The number of composition
+
+    /* Allocate a temporary gradient image to store the backward gradient */
+    nifti_image *tempGrad = nifti_copy_nim_info(voxelBasedMeasureGradient);
+    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
+
+    // Create all deformation field images needed for resampling
+    nifti_image **tempDef = (nifti_image**)malloc((compNum + 1) * sizeof(nifti_image*));
+    for (size_t i = 0; i <= compNum; ++i) {
+        tempDef[i] = nifti_copy_nim_info(deformationField);
+        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
+    }
+
+    // Generate all intermediate deformation fields
+    reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef);
+
+    // Remove the affine component
+    nifti_image *affineDisp = nullptr;
+    if (affineTransformationBw) {
+        affineDisp = nifti_copy_nim_info(deformationField);
+        affineDisp->data = malloc(affineDisp->nvox * affineDisp->nbyper);
+        reg_affine_getDeformationField(affineTransformationBw, affineDisp);
+        reg_getDisplacementFromDeformation(affineDisp);
+    }
+
+    for (size_t i = 0; i < compNum; ++i) {
+        if (affineDisp)
+            reg_tools_subtractImageFromImage(tempDef[i], affineDisp, tempDef[i]);
+        reg_resampleGradient(voxelBasedMeasureGradient, // floating
+                             tempGrad,   // warped - out
+                             tempDef[i], // deformation field
+                             1,  // interpolation type - linear
+                             0); // padding value
+        reg_tools_addImageToImage(tempGrad, // in
+                                  voxelBasedMeasureGradient,  // in
+                                  voxelBasedMeasureGradient); // out
+    }
+
+    // Normalise the forward gradient
+    reg_tools_divideValueToImage(voxelBasedMeasureGradient, // in
+                                 voxelBasedMeasureGradient, // out
+                                 powf(2, compNum)); // value
+
+    for (size_t i = 0; i <= compNum; ++i)
+        nifti_image_free(tempDef[i]);
+    free(tempDef);
+    nifti_image_free(tempGrad);
+    if (affineDisp)
+        nifti_image_free(affineDisp);
+}
+/* *************************************************************** */
+nifti_image* Compute::ScaleGradient(const nifti_image& transformationGradient, float scale) {
+    nifti_image *scaledGradient = nifti_copy_nim_info(&transformationGradient);
+    scaledGradient->data = malloc(scaledGradient->nvox * scaledGradient->nbyper);
+    reg_tools_multiplyValueToImage(&transformationGradient, scaledGradient, scale);
+    return scaledGradient;
+}
+/* *************************************************************** */
+void Compute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale);
+    nifti_image *controlPointGrid = con.GetControlPointGrid();
+
+    // Reset the gradient along the axes if appropriate
+    reg_setGradientToZero(scaledGradient, !optimiseX, !optimiseY, !optimiseZ);
+
+    // Update the velocity field
+    reg_tools_addImageToImage(controlPointGrid,  // in
+                              scaledGradient,    // in
+                              controlPointGrid); // out
+
+    nifti_image_free(scaledGradient);
+}
+/* *************************************************************** */
+void Compute::BchUpdate(float scale, int bchUpdateValue) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale);
+    nifti_image *controlPointGrid = con.GetControlPointGrid();
+
+    compute_BCH_update(controlPointGrid, scaledGradient, bchUpdateValue);
+
+    nifti_image_free(scaledGradient);
+}
+/* *************************************************************** */
+void Compute::SymmetriseVelocityFields(Content& conBwIn) {
+    nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(this->con).GetControlPointGrid();
+    nifti_image *controlPointGridBw = dynamic_cast<F3dContent&>(conBwIn).GetControlPointGrid();
+
+    // In order to ensure symmetry, the forward and backward velocity fields
+    // are averaged in both image spaces: reference and floating
+    nifti_image *warpedTrans = nifti_copy_nim_info(controlPointGridBw);
+    warpedTrans->data = malloc(warpedTrans->nvox * warpedTrans->nbyper);
+    nifti_image *warpedTransBw = nifti_copy_nim_info(controlPointGrid);
+    warpedTransBw->data = malloc(warpedTransBw->nvox * warpedTransBw->nbyper);
+
+    // Both parametrisations are converted into displacement
+    reg_getDisplacementFromDeformation(controlPointGrid);
+    reg_getDisplacementFromDeformation(controlPointGridBw);
+
+    // Both parametrisations are copied over
+    memcpy(warpedTransBw->data, controlPointGridBw->data, warpedTransBw->nvox * warpedTransBw->nbyper);
+    memcpy(warpedTrans->data, controlPointGrid->data, warpedTrans->nvox * warpedTrans->nbyper);
+
+    // and subtracted (sum and negation)
+    reg_tools_subtractImageFromImage(controlPointGridBw,  // displacement
+                                   warpedTrans,         // displacement
+                                   controlPointGridBw); // displacement output
+    reg_tools_subtractImageFromImage(controlPointGrid,  // displacement
+                                   warpedTransBw,     // displacement
+                                   controlPointGrid); // displacement output
+
+    // Divide by 2
+    reg_tools_multiplyValueToImage(controlPointGridBw, // displacement
+                                   controlPointGridBw, // displacement output
+                                   0.5f);
+    reg_tools_multiplyValueToImage(controlPointGrid, // displacement
+                                   controlPointGrid, // displacement output
+                                   0.5f);
+
+    // Convert the velocity field from displacement to deformation
+    reg_getDeformationFromDisplacement(controlPointGrid);
+    reg_getDeformationFromDisplacement(controlPointGridBw);
+
+    nifti_image_free(warpedTrans);
+    nifti_image_free(warpedTransBw);
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 58821641..9b4fded1 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -21,14 +21,23 @@ class Compute {
     virtual void GetDeformationField(bool composition, bool bspline);
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
-    virtual void VoxelCentricToNodeCentric(float weight);
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength);
+    virtual void SmoothGradient(float sigma);
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt);
+    virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber);
+    virtual void ConvolveVoxelBasedMeasureGradient(float weight);
+    virtual void ExponentiateGradient(Content& conBw);
+    virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void BchUpdate(float scale, int bchUpdateValue);
+    virtual void SymmetriseVelocityFields(Content& conBw);
 
 protected:
     Content& con;
 
+    void ConvolveImage(nifti_image*);
+
 private:
     template<typename Type> void GetApproximatedGradient(InterfaceOptimiser&);
+    nifti_image* ScaleGradient(const nifti_image&, float);
 };
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index fb80d50c..5df249fb 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -31,7 +31,7 @@ void Content::AllocateWarped() {
     warped = nifti_copy_nim_info(reference);
     warped->dim[0] = warped->ndim = floating->ndim;
     warped->dim[4] = warped->nt = floating->nt;
-    warped->pixdim[4] = warped->dt = 1.0;
+    warped->pixdim[4] = warped->dt = 1;
     warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index c3e53a1e..5b6b8a4c 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -23,18 +23,10 @@ class Content {
     virtual nifti_image* GetWarped() { return warped; }
 
     // Setters
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) {
-        deformationField = deformationFieldIn;
-    }
-    virtual void SetReferenceMask(int *referenceMaskIn) {
-        referenceMask = referenceMaskIn;
-    }
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) {
-        transformationMatrix = transformationMatrixIn;
-    }
-    virtual void SetWarped(nifti_image *warpedIn) {
-        warped = warpedIn;
-    }
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; }
+    virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; }
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; }
+    virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; }
 
     // Auxiliary methods
     static mat44* GetXYZMatrix(nifti_image& image) {
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index 6e4419a7..e28b4314 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -29,13 +29,17 @@ reg_measure* Measure::Create(const MeasureType& measureType) {
     return nullptr;
 }
 /* *************************************************************** */
-void Measure::Initialise(reg_measure& measure, F3dContent& con) {
+void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
     measure.InitialiseMeasure(con.GetReference(),
                               con.GetFloating(),
                               con.GetReferenceMask(),
                               con.GetWarped(),
                               con.GetWarpedGradient(),
                               con.GetVoxelBasedMeasureGradient(),
-                              con.GetLocalWeightSim());
+                              con.GetLocalWeightSim(),
+                              conBw ? conBw->GetReferenceMask() : nullptr,
+                              conBw ? conBw->GetWarped() : nullptr,
+                              conBw ? conBw->GetWarpedGradient() : nullptr,
+                              conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
index afa593b3..d33c1757 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/Measure.h
@@ -8,5 +8,5 @@ enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc };
 class Measure {
 public:
     virtual reg_measure* Create(const MeasureType& measureType);
-    virtual void Initialise(reg_measure& measure, F3dContent& con);
+    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr);
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 9acd9681..06aac408 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -55,21 +55,32 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
                                                bool useConjGradient,
                                                bool optimiseX,
                                                bool optimiseY,
-                                               bool optimiseZ) const {
+                                               bool optimiseZ,
+                                               F3dContent *conBw) const {
     reg_optimiser<Type> *optimiser;
     nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    nifti_image *controlPointGridBw = conBw ? conBw->F3dContent::GetControlPointGrid() : nullptr;
     Type *controlPointGridData, *transformationGradientData;
+    Type *controlPointGridDataBw = nullptr, *transformationGradientDataBw = nullptr;
 
     if (platformType == PlatformType::Cpu) {
         optimiser = useConjGradient ? new reg_conjugateGradient<Type>() : new reg_optimiser<Type>();
         controlPointGridData = (Type*)controlPointGrid->data;
-        transformationGradientData = (Type*)con.F3dContent::GetTransformationGradient()->data;
+        transformationGradientData = (Type*)con.GetTransformationGradient()->data;
+        if (conBw) {
+            controlPointGridDataBw = (Type*)controlPointGridBw->data;
+            transformationGradientDataBw = (Type*)conBw->GetTransformationGradient()->data;
+        }
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
         controlPointGridData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda();
         transformationGradientData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda();
+        if (conBw) {
+            controlPointGridDataBw = (Type*)dynamic_cast<CudaF3dContent*>(conBw)->GetControlPointGridCuda();
+            transformationGradientDataBw = (Type*)dynamic_cast<CudaF3dContent*>(conBw)->GetTransformationGradientCuda();
+        }
     }
 #endif
 
@@ -82,12 +93,15 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
                           0, // currentIterationNumber,
                           &opt,
                           controlPointGridData,
-                          transformationGradientData);
+                          transformationGradientData,
+                          controlPointGridBw ? controlPointGridBw->nvox : 0,
+                          controlPointGridDataBw,
+                          transformationGradientDataBw);
 
     return optimiser;
 }
-template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
-template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const;
+template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
+template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
 /* *************************************************************** */
 Measure* Platform::CreateMeasure() const {
     return measureFactory->Produce();
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 76b650ab..faff5757 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -22,7 +22,8 @@ class Platform {
                                          bool useConjGradient,
                                          bool optimiseX,
                                          bool optimiseY,
-                                         bool optimiseZ) const;
+                                         bool optimiseZ,
+                                         F3dContent *conBw = nullptr) const;
     Measure* CreateMeasure() const;
 
     std::string GetName() const;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 5c5f5256..5f428ea1 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1013,6 +1013,29 @@ void reg_base<T>::WarpFloatingImage(int inter) {
 }
 /* *************************************************************** */
 template<class T>
+void reg_base<T>::DeinitCurrentLevel(int currentLevel) {
+    delete optimiser;
+    optimiser = nullptr;
+    if (currentLevel >= 0) {
+        if (usePyramid) {
+            nifti_image_free(referencePyramid[currentLevel]);
+            referencePyramid[currentLevel] = nullptr;
+            nifti_image_free(floatingPyramid[currentLevel]);
+            floatingPyramid[currentLevel] = nullptr;
+            free(maskPyramid[currentLevel]);
+            maskPyramid[currentLevel] = nullptr;
+        } else if (currentLevel == levelToPerform - 1) {
+            nifti_image_free(referencePyramid[0]);
+            referencePyramid[0] = nullptr;
+            nifti_image_free(floatingPyramid[0]);
+            floatingPyramid[0] = nullptr;
+            free(maskPyramid[0]);
+            maskPyramid[0] = nullptr;
+        }
+    }
+}
+/* *************************************************************** */
+template<class T>
 void reg_base<T>::Run() {
 #ifndef NDEBUG
     char text[255];
@@ -1034,27 +1057,11 @@ void reg_base<T>::Run() {
 
     // Loop over the different resolution level to perform
     for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) {
-        // Set the current input images
-        nifti_image *reference;
-        nifti_image *floating;
-        int *mask;
-        if (usePyramid) {
-            reference = referencePyramid[currentLevel];
-            floating = floatingPyramid[currentLevel];
-            mask = maskPyramid[currentLevel];
-        } else {
-            reference = referencePyramid[0];
-            floating = floatingPyramid[0];
-            mask = maskPyramid[0];
-        }
-
         // The grid is refined if necessary
-        T maxStepSize = InitialiseCurrentLevel(currentLevel, reference);
+        T maxStepSize = InitCurrentLevel(currentLevel);
         T currentSize = maxStepSize;
         T smallestSize = maxStepSize / (T)100.0;
 
-        InitContent(reference, floating, mask);
-
         DisplayCurrentLevelParameters(currentLevel);
 
         // Initialise the measures of similarity
@@ -1115,24 +1122,7 @@ void reg_base<T>::Run() {
         CorrectTransformation();
 
         // Some cleaning is performed
-        delete optimiser;
-        optimiser = nullptr;
-        DeinitContent();
-        if (usePyramid) {
-            nifti_image_free(referencePyramid[currentLevel]);
-            referencePyramid[currentLevel] = nullptr;
-            nifti_image_free(floatingPyramid[currentLevel]);
-            floatingPyramid[currentLevel] = nullptr;
-            free(maskPyramid[currentLevel]);
-            maskPyramid[currentLevel] = nullptr;
-        } else if (currentLevel == levelToPerform - 1) {
-            nifti_image_free(referencePyramid[0]);
-            referencePyramid[0] = nullptr;
-            nifti_image_free(floatingPyramid[0]);
-            floatingPyramid[0] = nullptr;
-            free(maskPyramid[0]);
-            maskPyramid[0] = nullptr;
-        }
+        DeinitCurrentLevel(currentLevel);
 
 #ifdef NDEBUG
         if (verbose) {
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index d20df983..53b50fad 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -122,7 +122,8 @@ class reg_base: public InterfaceOptimiser {
 
     // Pure virtual functions
     virtual void SetOptimiser() = 0;
-    virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) = 0;
+    virtual T InitCurrentLevel(int) = 0;
+    virtual void DeinitCurrentLevel(int);
     virtual void SmoothGradient() = 0;
     virtual void GetDeformationField() = 0;
     virtual void GetApproximatedGradient() = 0;
@@ -131,13 +132,11 @@ class reg_base: public InterfaceOptimiser {
     virtual T NormaliseGradient() = 0;
     virtual void GetSimilarityMeasureGradient() = 0;
     virtual void GetObjectiveFunctionGradient() = 0;
-    virtual void DisplayCurrentLevelParameters(int currentLevel) = 0;
+    virtual void DisplayCurrentLevelParameters(int) = 0;
     virtual void UpdateBestObjFunctionValue() = 0;
     virtual void PrintCurrentObjFunctionValue(T) = 0;
     virtual void PrintInitialObjFunctionValue() = 0;
     virtual void CorrectTransformation() = 0;
-    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0;
-    virtual void DeinitContent() = 0;
 
 public:
     reg_base(int refTimePoint, int floTimePoint);
@@ -167,12 +166,12 @@ class reg_base: public InterfaceOptimiser {
     // virtual void DoNotApproximateParzenWindow();
     virtual void UseNMISetReferenceBinNumber(int, int);
     virtual void UseNMISetFloatingBinNumber(int, int);
-    virtual void UseSSD(int timepoint, bool normalize);
-    virtual void UseMIND(int timepoint, int offset);
-    virtual void UseMINDSSC(int timepoint, int offset);
-    virtual void UseKLDivergence(int timepoint);
-    virtual void UseDTI(bool *timepoint);
-    virtual void UseLNCC(int timepoint, float stdDevKernel);
+    virtual void UseSSD(int, bool);
+    virtual void UseMIND(int, int);
+    virtual void UseMINDSSC(int, int);
+    virtual void UseKLDivergence(int);
+    virtual void UseDTI(bool*);
+    virtual void UseLNCC(int, float);
     virtual void SetLNCCKernelType(int type);
     virtual void SetLocalWeightSim(nifti_image*);
 
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 1c61e0c8..55ca713d 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -26,7 +26,7 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
     inputControlPointGrid = nullptr; // pointer to external
     controlPointGrid = nullptr;
     bendingEnergyWeight = 0.001;
-    linearEnergyWeight = 0.00;
+    linearEnergyWeight = 0.01;
     jacobianLogWeight = 0;
     jacobianLogApproximation = true;
     spacing[0] = -5;
@@ -109,7 +109,32 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 }
 /* *************************************************************** */
 template<class T>
-T reg_f3d<T>::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) {
+void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
+    if (this->platformType == PlatformType::Cpu)
+        this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
+#ifdef _USE_CUDA
+    else if (this->platformType == PlatformType::Cuda)
+        this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
+#endif
+    this->compute = this->platform->CreateCompute(*this->con);
+}
+/* *************************************************************** */
+template<class T>
+T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
+    // Set the current input images
+    nifti_image *reference, *floating;
+    int *mask;
+    if (currentLevel < 0) {
+        reference = this->inputReference;
+        floating = this->inputFloating;
+        mask = nullptr;
+    } else {
+        const int index = this->usePyramid ? currentLevel : 0;
+        reference = this->referencePyramid[index];
+        floating = this->floatingPyramid[index];
+        mask = this->maskPyramid[index];
+    }
+
     // Set the initial step size for the gradient ascent
     T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy;
     if (reference->ndim > 2)
@@ -121,19 +146,30 @@ T reg_f3d<T>::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) {
             bendingEnergyWeight = bendingEnergyWeight / static_cast<T>(powf(16, this->levelNumber - 1));
             linearEnergyWeight = linearEnergyWeight / static_cast<T>(powf(3, this->levelNumber - 1));
         } else {
-            bendingEnergyWeight = bendingEnergyWeight * static_cast<T>(16);
-            linearEnergyWeight = linearEnergyWeight * static_cast<T>(3);
+            bendingEnergyWeight = bendingEnergyWeight * 16;
+            linearEnergyWeight = linearEnergyWeight * 3;
             reg_spline_refineControlPointGrid(controlPointGrid, reference);
         }
     }
 
+    InitContent(reference, floating, mask);
+
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::InitialiseCurrentLevel");
+    reg_print_fct_debug("reg_f3d<T>::InitCurrentLevel");
 #endif
     return maxStepSize;
 }
 /* *************************************************************** */
 template<class T>
+void reg_f3d<T>::DeinitCurrentLevel(int currentLevel) {
+    reg_base<T>::DeinitCurrentLevel(currentLevel);
+    delete this->compute;
+    this->compute = nullptr;
+    delete this->con;
+    this->con = nullptr;
+}
+/* *************************************************************** */
+template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
@@ -330,25 +366,6 @@ void reg_f3d<T>::Initialise() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    if (this->platformType == PlatformType::Cpu)
-        this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
-#ifdef _USE_CUDA
-    else if (this->platformType == PlatformType::Cuda)
-        this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
-#endif
-    this->compute = this->platform->CreateCompute(*this->con);
-}
-/* *************************************************************** */
-template<class T>
-void reg_f3d<T>::DeinitContent() {
-    delete this->compute;
-    this->compute = nullptr;
-    delete this->con;
-    this->con = nullptr;
-}
-/* *************************************************************** */
-template<class T>
 void reg_f3d<T>::GetDeformationField() {
     this->compute->GetDeformationField(false, // Composition
                                        true); // bspline
@@ -433,51 +450,11 @@ double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetSimilarityMeasureGradient() {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
     this->GetVoxelBasedGradient();
 
-    nifti_image *voxelBasedMeasureGradient = dynamic_cast<F3dContent*>(this->con)->GetVoxelBasedMeasureGradient();
-    const int kernel_type = CUBIC_SPLINE_KERNEL;
-    // The voxel based NMI gradient is convolved with a spline kernel
-    // Convolution along the x axis
-    float currentNodeSpacing[3];
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
-    bool activeAxis[3] = {1, 0, 0};
-    reg_tools_kernelConvolution(voxelBasedMeasureGradient,
-                                currentNodeSpacing,
-                                kernel_type,
-                                nullptr, // mask
-                                nullptr, // all volumes are considered as active
-                                activeAxis);
-    // Convolution along the y axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy;
-    activeAxis[0] = 0;
-    activeAxis[1] = 1;
-    reg_tools_kernelConvolution(voxelBasedMeasureGradient,
-                                currentNodeSpacing,
-                                kernel_type,
-                                nullptr, // mask
-                                nullptr, // all volumes are considered as active
-                                activeAxis);
-    // Convolution along the z axis if required
-    if (voxelBasedMeasureGradient->nz > 1) {
-        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz;
-        activeAxis[1] = 0;
-        activeAxis[2] = 1;
-        reg_tools_kernelConvolution(voxelBasedMeasureGradient,
-                                    currentNodeSpacing,
-                                    kernel_type,
-                                    nullptr, // mask
-                                    nullptr, // all volumes are considered as active
-                                    activeAxis);
-    }
-
-    // Update the changes for GPU
-    dynamic_cast<F3dContent*>(this->con)->UpdateVoxelBasedMeasureGradient();
-
-    // The node based NMI gradient is extracted
-    this->compute->VoxelCentricToNodeCentric(this->similarityWeight);
+    // The voxel-based NMI gradient is convolved with a spline kernel
+    // And the node-based NMI gradient is extracted
+    this->compute->ConvolveVoxelBasedMeasureGradient(this->similarityWeight);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
@@ -534,7 +511,7 @@ T reg_f3d<T>::NormaliseGradient() {
 
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
-        // It will be normalised later when running f3d_sym or f3d2
+        // It will be normalised later when running f3d2
         this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength);
 #ifndef NDEBUG
         char text[255];
@@ -660,16 +637,8 @@ void reg_f3d<T>::SetOptimiser() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SmoothGradient() {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
     // The gradient is smoothed using a Gaussian kernel if it is required
-    if (this->gradientSmoothingSigma != 0) {
-        float kernel = fabs(this->gradientSmoothingSigma);
-        F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-        reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL);
-        // Update the changes for GPU
-        con->UpdateTransformationGradient();
-    }
+    this->compute->SmoothGradient(this->gradientSmoothingSigma);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
 #endif
@@ -692,8 +661,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
         reg_exit();
     }
 
-    InitialiseCurrentLevel(-1, this->inputReference);
-    InitContent(this->inputReference, this->inputFloating, nullptr);
+    InitCurrentLevel(-1);
 
     this->WarpFloatingImage(3); // cubic spline interpolation
 
@@ -701,7 +669,7 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
     warpedImage[0] = this->con->GetWarped();
 
     this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage
-    DeinitContent();
+    DeinitCurrentLevel(-1);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
 #endif
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index a884a2d4..27186c8b 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -33,14 +33,16 @@ class reg_f3d: public reg_base<T> {
     double bestWBE;
     double bestWLE;
 
-    virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) override;
+    void InitContent(nifti_image*, nifti_image*, int*);
+    virtual T InitCurrentLevel(int) override;
+    virtual void DeinitCurrentLevel(int) override;
     virtual T NormaliseGradient() override;
     virtual void SmoothGradient() override;
     virtual void GetObjectiveFunctionGradient() override;
     virtual void GetApproximatedGradient() override;
     virtual void GetSimilarityMeasureGradient() override;
     virtual void GetDeformationField() override;
-    virtual void DisplayCurrentLevelParameters(int currentLevel) override;
+    virtual void DisplayCurrentLevelParameters(int) override;
     virtual double GetObjectiveFunctionValue() override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual void UpdateParameters(float) override;
@@ -50,8 +52,6 @@ class reg_f3d: public reg_base<T> {
     virtual void CorrectTransformation() override;
     virtual void CheckParameters() override;
     virtual void Initialise() override;
-    virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override;
-    virtual void DeinitContent() override;
 
     virtual double ComputeBendingEnergyPenaltyTerm();
     virtual double ComputeLinearEnergyPenaltyTerm();
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index d4e2cc2b..56a99eab 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -11,23 +11,22 @@
  */
 
 #include "_reg_f3d2.h"
+#include "F3dContent.h"
+
+#ifdef _USE_CUDA
+#include "CudaF3dContent.h"
+#endif
 
 /* *************************************************************** */
 template <class T>
 reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
     reg_f3d<T>::reg_f3d(refTimePoint, floTimePoint) {
     this->executableName = (char*)"NiftyReg F3D2";
-    backwardControlPointGrid = nullptr;
-    backwardWarped = nullptr;
-    backwardWarpedGradientImage = nullptr;
-    backwardDeformationFieldImage = nullptr;
-    backwardVoxelBasedMeasureGradientImage = nullptr;
-    backwardTransformationGradient = nullptr;
+    controlPointGridBw = nullptr;
     floatingMaskImage = nullptr;
-    floatingMask = nullptr;
     floatingMaskPyramid = nullptr;
-    backwardActiveVoxelNumber = nullptr;
-    backwardJacobianMatrix = nullptr;
+    activeVoxelNumberBw = nullptr;
+    affineTransformationBw = nullptr;
     inverseConsistencyWeight = 0;
     bchUpdate = false;
     useGradientCumulativeExp = true;
@@ -40,9 +39,9 @@ reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
 /* *************************************************************** */
 template <class T>
 reg_f3d2<T>::~reg_f3d2() {
-    if (backwardControlPointGrid) {
-        nifti_image_free(backwardControlPointGrid);
-        backwardControlPointGrid = nullptr;
+    if (controlPointGridBw) {
+        nifti_image_free(controlPointGridBw);
+        controlPointGridBw = nullptr;
     }
 
     if (floatingMaskPyramid) {
@@ -63,9 +62,14 @@ reg_f3d2<T>::~reg_f3d2() {
         floatingMaskPyramid = nullptr;
     }
 
-    if (backwardActiveVoxelNumber) {
-        free(backwardActiveVoxelNumber);
-        backwardActiveVoxelNumber = nullptr;
+    if (activeVoxelNumberBw) {
+        free(activeVoxelNumberBw);
+        activeVoxelNumberBw = nullptr;
+    }
+
+    if (affineTransformationBw) {
+        delete affineTransformationBw;
+        affineTransformationBw = nullptr;
     }
 #ifndef NDEBUG
     reg_print_msg_debug("reg_f3d2 destructor called");
@@ -88,245 +92,84 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 #endif
 }
 /* *************************************************************** */
-template <class T>
-T reg_f3d2<T>::InitialiseCurrentLevel() {
+template<class T>
+void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
+    if (this->platformType == PlatformType::Cpu)
+        conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
+#ifdef _USE_CUDA
+    else if (this->platformType == PlatformType::Cuda)
+        conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
+#endif
+    computeBw = this->platform->CreateCompute(*conBw);
+}
+/* *************************************************************** */
+template <class T>
+T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
+    // Set the current input images
+    nifti_image *reference, *floating;
+    int *referenceMask, *floatingMask;
+    if (currentLevel < 0) {
+        reference = this->inputReference;
+        floating = this->inputFloating;
+        referenceMask = nullptr;
+        floatingMask = nullptr;
+    } else {
+        const int index = this->usePyramid ? currentLevel : 0;
+        reference = this->referencePyramid[index];
+        floating = this->floatingPyramid[index];
+        referenceMask = this->maskPyramid[index];
+        floatingMask = floatingMaskPyramid[index];
+    }
+
+    // Define the initial step size for the gradient ascent optimisation
+    T maxStepSize = reference->dx;
+    maxStepSize = reference->dy > maxStepSize ? reference->dy : maxStepSize;
+    maxStepSize = floating->dx > maxStepSize ? floating->dx : maxStepSize;
+    maxStepSize = floating->dy > maxStepSize ? floating->dy : maxStepSize;
+    if (reference->ndim > 2) {
+        maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize;
+        maxStepSize = (floating->dz > maxStepSize) ? floating->dz : maxStepSize;
+    }
+
     // Refine the control point grids if required
-    if (this->gridRefinement) {
-        if (this->currentLevel == 0) {
+    // Don't if currentLevel < 0, since it's not required for GetWarpedImage()
+    if (this->gridRefinement && currentLevel >= 0) {
+        if (currentLevel == 0) {
             this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast<T>(powf(16, this->levelNumber - 1));
             this->linearEnergyWeight = this->linearEnergyWeight / static_cast<T>(powf(3, this->levelNumber - 1));
         } else {
+            this->bendingEnergyWeight = this->bendingEnergyWeight * 16;
+            this->linearEnergyWeight = this->linearEnergyWeight * 3;
             reg_spline_refineControlPointGrid(this->controlPointGrid);
-            reg_spline_refineControlPointGrid(backwardControlPointGrid);
-            this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast<T>(16);
-            this->linearEnergyWeight = this->linearEnergyWeight * static_cast<T>(3);
+            reg_spline_refineControlPointGrid(controlPointGridBw);
         }
     }
 
-    // Set the mask images
-    if (this->usePyramid) {
-        this->currentMask = this->maskPyramid[this->currentLevel];
-        floatingMask = floatingMaskPyramid[this->currentLevel];
-    } else {
-        this->currentMask = this->maskPyramid[0];
-        floatingMask = floatingMaskPyramid[0];
-    }
+    reg_f3d<T>::InitContent(reference, floating, referenceMask);
+    InitContent(reference, floating, floatingMask);
 
-    // Define the initial step size for the gradient ascent optimisation
-    T maxStepSize = this->reference->dx;
-    maxStepSize = this->reference->dy > maxStepSize ? this->reference->dy : maxStepSize;
-    maxStepSize = this->floating->dx > maxStepSize ? this->floating->dx : maxStepSize;
-    maxStepSize = this->floating->dy > maxStepSize ? this->floating->dy : maxStepSize;
-    if (this->reference->ndim > 2) {
-        maxStepSize = (this->reference->dz > maxStepSize) ? this->reference->dz : maxStepSize;
-        maxStepSize = (this->floating->dz > maxStepSize) ? this->floating->dz : maxStepSize;
-    }
 #ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::InitialiseCurrentLevel");
+    reg_print_fct_debug("reg_f3d2<T>::InitCurrentLevel");
 #endif
     return maxStepSize;
 }
 /* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateCurrentInputImage() {
-    reg_f3d<T>::DeallocateCurrentInputImage();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateCurrentInputImage");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::AllocateWarped() {
-    DeallocateWarped();
-
-    reg_f3d<T>::AllocateWarped();
-    if (!this->floating) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateWarped()");
-        reg_print_msg_error("The floating image is not defined");
-        reg_exit();
-    }
-    backwardWarped = nifti_copy_nim_info(this->floating);
-    backwardWarped->dim[0] = backwardWarped->ndim = this->reference->ndim;
-    backwardWarped->dim[4] = backwardWarped->nt = this->reference->nt;
-    backwardWarped->pixdim[4] = backwardWarped->dt = 1;
-    backwardWarped->nvox = size_t(backwardWarped->nx * backwardWarped->ny * backwardWarped->nz * backwardWarped->nt);
-    backwardWarped->datatype = this->reference->datatype;
-    backwardWarped->nbyper = this->reference->nbyper;
-    backwardWarped->data = calloc(backwardWarped->nvox, backwardWarped->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::AllocateWarped");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateWarped() {
-    reg_f3d<T>::DeallocateWarped();
-    if (backwardWarped) {
-        nifti_image_free(backwardWarped);
-        backwardWarped = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateWarped");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::AllocateDeformationField() {
-    DeallocateDeformationField();
-
-    reg_f3d<T>::AllocateDeformationField();
-    if (!this->floating) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateDeformationField()");
-        reg_print_msg_error("The floating image is not defined");
-        reg_exit();
-    }
-    if (!backwardControlPointGrid) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateDeformationField()");
-        reg_print_msg_error("The backward control point image is not defined");
-        reg_exit();
-    }
-    backwardDeformationFieldImage = nifti_copy_nim_info(this->floating);
-    backwardDeformationFieldImage->dim[0] = backwardDeformationFieldImage->ndim = 5;
-    backwardDeformationFieldImage->dim[1] = backwardDeformationFieldImage->nx = this->floating->nx;
-    backwardDeformationFieldImage->dim[2] = backwardDeformationFieldImage->ny = this->floating->ny;
-    backwardDeformationFieldImage->dim[3] = backwardDeformationFieldImage->nz = this->floating->nz;
-    backwardDeformationFieldImage->dim[4] = backwardDeformationFieldImage->nt = 1;
-    backwardDeformationFieldImage->pixdim[4] = backwardDeformationFieldImage->dt = 1;
-    if (this->floating->nz == 1)
-        backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 2;
-    else backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 3;
-    backwardDeformationFieldImage->pixdim[5] = backwardDeformationFieldImage->du = 1;
-    backwardDeformationFieldImage->dim[6] = backwardDeformationFieldImage->nv = 1;
-    backwardDeformationFieldImage->pixdim[6] = backwardDeformationFieldImage->dv = 1;
-    backwardDeformationFieldImage->dim[7] = backwardDeformationFieldImage->nw = 1;
-    backwardDeformationFieldImage->pixdim[7] = backwardDeformationFieldImage->dw = 1;
-    backwardDeformationFieldImage->nvox = size_t(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny *
-                                                 backwardDeformationFieldImage->nz * backwardDeformationFieldImage->nt *
-                                                 backwardDeformationFieldImage->nu);
-    backwardDeformationFieldImage->nbyper = backwardControlPointGrid->nbyper;
-    backwardDeformationFieldImage->datatype = backwardControlPointGrid->datatype;
-    backwardDeformationFieldImage->data = calloc(backwardDeformationFieldImage->nvox,
-                                                 backwardDeformationFieldImage->nbyper);
-    backwardDeformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
-    memset(backwardDeformationFieldImage->intent_name, 0, 16);
-    strcpy(backwardDeformationFieldImage->intent_name, "NREG_TRANS");
-    backwardDeformationFieldImage->intent_p1 = DEF_FIELD;
-    backwardDeformationFieldImage->scl_slope = 1;
-    backwardDeformationFieldImage->scl_inter = 0;
-
-    if (this->measure_dti)
-        backwardJacobianMatrix = (mat33*)malloc(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny *
-                                                backwardDeformationFieldImage->nz * sizeof(mat33));
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::AllocateDeformationField");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateDeformationField() {
-    reg_f3d<T>::DeallocateDeformationField();
-    if (backwardDeformationFieldImage) {
-        nifti_image_free(backwardDeformationFieldImage);
-        backwardDeformationFieldImage = nullptr;
-    }
-    if (backwardJacobianMatrix) {
-        free(backwardJacobianMatrix);
-        backwardJacobianMatrix = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateDeformationField");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::AllocateWarpedGradient() {
-    DeallocateWarpedGradient();
-
-    reg_f3d<T>::AllocateWarpedGradient();
-    if (!backwardDeformationFieldImage) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateWarpedGradient()");
-        reg_print_msg_error("The backward control point image is not defined");
-        reg_exit();
-    }
-    backwardWarpedGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage);
-    backwardWarpedGradientImage->data = calloc(backwardWarpedGradientImage->nvox,
-                                               backwardWarpedGradientImage->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::AllocateWarpedGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateWarpedGradient() {
-    reg_f3d<T>::DeallocateWarpedGradient();
-    if (backwardWarpedGradientImage) {
-        nifti_image_free(backwardWarpedGradientImage);
-        backwardWarpedGradientImage = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateWarpedGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::AllocateVoxelBasedMeasureGradient() {
-    DeallocateVoxelBasedMeasureGradient();
-
-    reg_f3d<T>::AllocateVoxelBasedMeasureGradient();
-    if (!backwardDeformationFieldImage) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateVoxelBasedMeasureGradient()");
-        reg_print_msg_error("The backward control point image is not defined");
-        reg_exit();
-    }
-    backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage);
-    backwardVoxelBasedMeasureGradientImage->data = calloc(backwardVoxelBasedMeasureGradientImage->nvox,
-                                                          backwardVoxelBasedMeasureGradientImage->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::AllocateVoxelBasedMeasureGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateVoxelBasedMeasureGradient() {
-    reg_f3d<T>::DeallocateVoxelBasedMeasureGradient();
-    if (backwardVoxelBasedMeasureGradientImage) {
-        nifti_image_free(backwardVoxelBasedMeasureGradientImage);
-        backwardVoxelBasedMeasureGradientImage = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateVoxelBasedMeasureGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::AllocateTransformationGradient() {
-    DeallocateTransformationGradient();
-
-    reg_f3d<T>::AllocateTransformationGradient();
-    if (!backwardControlPointGrid) {
-        reg_print_fct_error("reg_f3d2<T>::AllocateTransformationGradient()");
-        reg_print_msg_error("The backward control point image is not defined");
-        reg_exit();
-    }
-    backwardTransformationGradient = nifti_copy_nim_info(backwardControlPointGrid);
-    backwardTransformationGradient->data = calloc(backwardTransformationGradient->nvox,
-                                                  backwardTransformationGradient->nbyper);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::AllocateTransformationGradient");
-#endif
-}
-/* *************************************************************** */
-template <class T>
-void reg_f3d2<T>::DeallocateTransformationGradient() {
-    reg_f3d<T>::DeallocateTransformationGradient();
-    if (backwardTransformationGradient) {
-        nifti_image_free(backwardTransformationGradient);
-        backwardTransformationGradient = nullptr;
+template<class T>
+void reg_f3d2<T>::DeinitCurrentLevel(int currentLevel) {
+    reg_f3d<T>::DeinitCurrentLevel(currentLevel);
+    delete computeBw;
+    computeBw = nullptr;
+    delete conBw;
+    conBw = nullptr;
+    if (currentLevel >= 0) {
+        if (this->usePyramid) {
+            free(floatingMaskPyramid[currentLevel]);
+            floatingMaskPyramid[currentLevel] = nullptr;
+        } else if (currentLevel == this->levelToPerform - 1) {
+            free(floatingMaskPyramid[0]);
+            floatingMaskPyramid[0] = nullptr;
+        }
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DeallocateTransformationGradient");
-#endif
 }
 /* *************************************************************** */
 template<class T>
@@ -363,90 +206,47 @@ void reg_f3d2<T>::CheckParameters() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetDeformationField() {
-    reg_spline_getDeformationField(this->controlPointGrid,
-                                   this->deformationFieldImage,
-                                   this->currentMask,
-                                   false, //composition
-                                   true); // bspline
-    reg_spline_getDeformationField(backwardControlPointGrid,
-                                   backwardDeformationFieldImage,
-                                   floatingMask,
-                                   false, //composition
-                                   true); // bspline
-
     // By default the number of steps is automatically updated
     bool updateStepNumber = true;
-    // The provided step number is used for the final resampling
     if (!this->optimiser)
         updateStepNumber = false;
+
 #ifndef NDEBUG
     char text[255];
     sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber);
     reg_print_msg_debug(text);
 #endif
     // The forward transformation is computed using the scaling-and-squaring approach
-    reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid,
-                                           this->deformationFieldImage,
-                                           updateStepNumber);
+    this->compute->GetDefFieldFromVelocityGrid(updateStepNumber);
+
 #ifndef NDEBUG
     sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber);
     reg_print_msg_debug(text);
 #endif
     // The number of step number is copied over from the forward transformation
-    backwardControlPointGrid->intent_p2 = this->controlPointGrid->intent_p2;
+    controlPointGridBw->intent_p2 = this->controlPointGrid->intent_p2;
     // The backward transformation is computed using the scaling-and-squaring approach
-    reg_spline_getDefFieldFromVelocityGrid(backwardControlPointGrid,
-                                           backwardDeformationFieldImage,
-                                           false);
+    computeBw->GetDefFieldFromVelocityGrid(false);
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::WarpFloatingImage(int inter) {
-    // Compute the deformation fields
-    GetDeformationField();
-
-    // Resample the floating image
-    if (!this->measure_dti) {
-        reg_resampleImage(this->floating,
-                          this->warped,
-                          this->deformationFieldImage,
-                          this->currentMask,
-                          inter,
-                          this->warpedPaddingValue);
-    } else {
-        reg_defField_getJacobianMatrix(this->deformationFieldImage,
-                                       this->forwardJacobianMatrix);
-        /*DTI needs fixing!
-        reg_resampleImage(this->floating,
-                          this->warped,
-                          this->deformationFieldImage,
-                          this->currentMask,
-                          inter,
-                          this->warpedPaddingValue,
-                          this->measure_dti->GetActiveTimepoints(),
-                          this->forwardJacobianMatrix);*/
-    }
+    reg_f3d<T>::WarpFloatingImage(inter);
 
     // Resample the reference image
     if (!this->measure_dti) {
+        computeBw->ResampleImage(inter, this->warpedPaddingValue);
+    } else {
+        // reg_defField_getJacobianMatrix(backwardDeformationFieldImage, backwardJacobianMatrix);
+        /* DTI needs fixing
         reg_resampleImage(this->reference, // input image
                           backwardWarped, // warped input image
                           backwardDeformationFieldImage, // deformation field
                           floatingMask, // mask
                           inter, // interpolation type
-                          this->warpedPaddingValue); // padding value
-    } else {
-        reg_defField_getJacobianMatrix(backwardDeformationFieldImage,
-                                       backwardJacobianMatrix);
-        /* DTI needs fixing
-       reg_resampleImage(this->reference, // input image
-                           backwardWarped, // warped input image
-                           backwardDeformationFieldImage, // deformation field
-                           floatingMask, // mask
-                           inter, // interpolation type
-                           this->warpedPaddingValue, // padding value
-                           this->measure_dti->GetActiveTimepoints(),
-                           backwardJacobianMatrix);*/
+                          this->warpedPaddingValue, // padding value
+                          this->measure_dti->GetActiveTimepoints(),
+                          backwardJacobianMatrix);*/
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::WarpFloatingImage");
@@ -461,17 +261,13 @@ double reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm(int type) {
 
     bool approx = type == 2 ? false : this->jacobianLogApproximation;
 
-    double backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(backwardControlPointGrid,
-                                                                   this->floating,
-                                                                   approx);
+    double backwardPenaltyTerm = computeBw->GetJacobianPenaltyTerm(approx);
 
     unsigned int maxit = 5;
     if (type > 0) maxit = 20;
     unsigned int it = 0;
     while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) {
-        backwardPenaltyTerm = reg_spline_correctFolding(backwardControlPointGrid,
-                                                        this->floating,
-                                                        approx);
+        backwardPenaltyTerm = computeBw->CorrectFolding(approx);
 #ifndef NDEBUG
         reg_print_msg_debug("Folding correction - Backward transformation");
 #endif
@@ -509,12 +305,12 @@ double reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm() {
     if (this->bendingEnergyWeight <= 0) return 0;
 
     double forwardPenaltyTerm = reg_f3d<T>::ComputeBendingEnergyPenaltyTerm();
+    double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy();
 
-    double value = reg_spline_approxBendingEnergy(backwardControlPointGrid);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm");
 #endif
-    return forwardPenaltyTerm + this->bendingEnergyWeight * value;
+    return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
 template <class T>
@@ -522,8 +318,7 @@ double reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm() {
     if (this->linearEnergyWeight <= 0) return 0;
 
     double forwardPenaltyTerm = reg_f3d<T>::ComputeLinearEnergyPenaltyTerm();
-
-    double backwardPenaltyTerm = this->linearEnergyWeight * reg_spline_approxLinearEnergy(backwardControlPointGrid);
+    double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy();
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm");
@@ -536,9 +331,7 @@ double reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm() {
     if (this->landmarkRegWeight <= 0) return 0;
 
     double forwardPenaltyTerm = reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm();
-
-    double backwardPenaltyTerm = this->landmarkRegWeight * reg_spline_getLandmarkDistance(backwardControlPointGrid,
-                                                                                          this->landmarkRegNumber,
+    double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber,
                                                                                           this->landmarkFloating,
                                                                                           this->landmarkReference);
 
@@ -551,12 +344,9 @@ double reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm() {
 template <class T>
 void reg_f3d2<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is initialised with zeros
-    reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient,
-                                   this->voxelBasedMeasureGradient,
-                                   0);
-    reg_tools_multiplyValueToImage(backwardVoxelBasedMeasureGradientImage,
-                                   backwardVoxelBasedMeasureGradientImage,
-                                   0);
+    dynamic_cast<F3dContent*>(this->con)->ZeroVoxelBasedMeasureGradient();
+    conBw->ZeroVoxelBasedMeasureGradient();
+
     // The intensity gradient is first computed
     //    if(this->measure_dti!=nullptr){
     //        reg_getImageGradient(this->floating,
@@ -584,23 +374,9 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
     //    else{
     //    }
 
-
-    for (int t = 0; t < this->reference->nt; ++t) {
-        reg_getImageGradient(this->floating,
-                             this->warpedGradient,
-                             this->deformationFieldImage,
-                             this->currentMask,
-                             this->interpolation,
-                             this->warpedPaddingValue,
-                             t);
-
-        reg_getImageGradient(this->reference,
-                             backwardWarpedGradientImage,
-                             backwardDeformationFieldImage,
-                             floatingMask,
-                             this->interpolation,
-                             this->warpedPaddingValue,
-                             t);
+    for (int t = 0; t < this->con->Content::GetReference()->nt; ++t) {
+        this->compute->GetImageGradient(this->interpolation, this->warpedPaddingValue, t);
+        computeBw->GetImageGradient(this->interpolation, this->warpedPaddingValue, t);
 
         // The gradient of the various measures of similarity are computed
         if (this->measure_nmi)
@@ -620,7 +396,7 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
 
         if (this->measure_mindssc)
             this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
-    } // timepoint
+    }
 
     // Exponentiate the gradients if required
     ExponentiateGradient();
@@ -634,49 +410,10 @@ template <class T>
 void reg_f3d2<T>::GetSimilarityMeasureGradient() {
     reg_f3d<T>::GetSimilarityMeasureGradient();
 
-    // The voxel based sim measure gradient is convolved with a spline kernel
-    // Convolution along the x axis
-    float currentNodeSpacing[3];
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx;
-    bool activeAxis[3] = {1, 0, 0};
-    reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // mask
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the y axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy;
-    activeAxis[0] = 0;
-    activeAxis[1] = 1;
-    reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // mask
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the z axis if required
-    if (this->voxelBasedMeasureGradient->nz > 1) {
-        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz;
-        activeAxis[1] = 0;
-        activeAxis[2] = 1;
-        reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage,
-                                    currentNodeSpacing,
-                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                    nullptr, // mask
-                                    nullptr, // all volumes are active
-                                    activeAxis);
-    }
-    // The backward node based sim measure gradient is extracted
-    mat44 reorientation;
-    if (this->reference->sform_code > 0)
-        reorientation = this->reference->sto_ijk;
-    else reorientation = this->reference->qto_ijk;
-    reg_voxelCentric2NodeCentric(backwardTransformationGradient,
-                                 backwardVoxelBasedMeasureGradientImage,
-                                 this->similarityWeight,
-                                 false, // no update
-                                 &reorientation); // voxel to mm conversion
+    // The voxel-based sim-measure-gradient is convolved with a spline kernel
+    // And the backward-node-based NMI gradient is extracted
+    computeBw->ConvolveVoxelBasedMeasureGradient(this->similarityWeight);
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetSimilarityMeasureGradient");
 #endif
@@ -687,12 +424,8 @@ void reg_f3d2<T>::GetJacobianBasedGradient() {
     if (this->jacobianLogWeight <= 0) return;
 
     reg_f3d<T>::GetJacobianBasedGradient();
+    computeBw->JacobianPenaltyTermGradient(this->jacobianLogWeight, this->jacobianLogApproximation);
 
-    reg_spline_getJacobianPenaltyTermGradient(backwardControlPointGrid,
-                                              this->floating,
-                                              backwardTransformationGradient,
-                                              this->jacobianLogWeight,
-                                              this->jacobianLogApproximation);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetJacobianBasedGradient");
 #endif
@@ -703,10 +436,8 @@ void reg_f3d2<T>::GetBendingEnergyGradient() {
     if (this->bendingEnergyWeight <= 0) return;
 
     reg_f3d<T>::GetBendingEnergyGradient();
+    computeBw->ApproxBendingEnergyGradient(this->bendingEnergyWeight);
 
-    reg_spline_approxBendingEnergyGradient(backwardControlPointGrid,
-                                           backwardTransformationGradient,
-                                           this->bendingEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetBendingEnergyGradient");
 #endif
@@ -717,10 +448,8 @@ void reg_f3d2<T>::GetLinearEnergyGradient() {
     if (this->linearEnergyWeight <= 0) return;
 
     reg_f3d<T>::GetLinearEnergyGradient();
+    computeBw->ApproxLinearEnergyGradient(this->linearEnergyWeight);
 
-    reg_spline_approxLinearEnergyGradient(backwardControlPointGrid,
-                                          backwardTransformationGradient,
-                                          this->linearEnergyWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetLinearEnergyGradient");
 #endif
@@ -731,40 +460,22 @@ void reg_f3d2<T>::GetLandmarkDistanceGradient() {
     if (this->landmarkRegWeight <= 0) return;
 
     reg_f3d<T>::GetLandmarkDistanceGradient();
-
-    reg_spline_getLandmarkDistanceGradient(backwardControlPointGrid,
-                                           backwardTransformationGradient,
-                                           this->landmarkRegNumber,
-                                           this->landmarkFloating,
-                                           this->landmarkReference,
-                                           this->landmarkRegWeight);
+    computeBw->LandmarkDistanceGradient(this->landmarkRegNumber,
+                                        this->landmarkFloating,
+                                        this->landmarkReference,
+                                        this->landmarkRegWeight);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetLandmarkDistanceGradient");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::SetGradientImageToZero() {
-    reg_f3d<T>::SetGradientImageToZero();
-
-    T *nodeGradPtr = static_cast<T*>(backwardTransformationGradient->data);
-    for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i)
-        *nodeGradPtr++ = 0;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::SetGradientImageToZero");
-#endif
-}
-/* *************************************************************** */
-template <class T>
 void reg_f3d2<T>::SmoothGradient() {
-    if (this->gradientSmoothingSigma != 0) {
-        reg_f3d<T>::SmoothGradient();
-        // The gradient is smoothed using a Gaussian kernel if it is required
-        float kernel = fabs(this->gradientSmoothingSigma);
-        reg_tools_kernelConvolution(backwardTransformationGradient,
-                                    &kernel,
-                                    GAUSSIAN_KERNEL);
-    }
+    reg_f3d<T>::SmoothGradient();
+
+    // The gradient is smoothed using a Gaussian kernel if it is required
+    computeBw->SmoothGradient(this->gradientSmoothingSigma);
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::SmoothGradient");
 #endif
@@ -774,19 +485,8 @@ template <class T>
 void reg_f3d2<T>::GetApproximatedGradient() {
     reg_f3d<T>::GetApproximatedGradient();
 
-    // Loop over every control points
-    T *gridPtr = static_cast<T*>(backwardControlPointGrid->data);
-    T *gradPtr = static_cast<T*>(backwardTransformationGradient->data);
-    T eps = this->floating->dx / 1000.f;
-    for (size_t i = 0; i < backwardControlPointGrid->nvox; i++) {
-        T currentValue = this->optimiser->GetBestDOF_b()[i];
-        gridPtr[i] = currentValue + eps;
-        double valPlus = GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue - eps;
-        double valMinus = GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue;
-        gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps));
-    }
+    computeBw->GetApproximatedGradient(*this);
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetApproximatedGradient");
 #endif
@@ -795,62 +495,33 @@ void reg_f3d2<T>::GetApproximatedGradient() {
 template <class T>
 T reg_f3d2<T>::NormaliseGradient() {
     // The forward gradient max length is computed
-    T forwardMaxValue = reg_f3d<T>::NormaliseGradient();
+    const T forwardMaxGradLength = reg_f3d<T>::NormaliseGradient();
 
     // The backward gradient max length is computed
-    T maxGradValue = 0;
-    size_t voxNumber = backwardTransformationGradient->nx * backwardTransformationGradient->ny * backwardTransformationGradient->nz;
-    T *bckPtrX = static_cast<T*>(backwardTransformationGradient->data);
-    T *bckPtrY = &bckPtrX[voxNumber];
-    if (backwardTransformationGradient->nz > 1) {
-        T *bckPtrZ = &bckPtrY[voxNumber];
-        for (size_t i = 0; i < voxNumber; i++) {
-            T valX = 0, valY = 0, valZ = 0;
-            if (this->optimiseX)
-                valX = *bckPtrX++;
-            if (this->optimiseY)
-                valY = *bckPtrY++;
-            if (this->optimiseZ)
-                valZ = *bckPtrZ++;
-            T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ));
-            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
-        }
-    } else {
-        for (size_t i = 0; i < voxNumber; i++) {
-            T valX = 0, valY = 0;
-            if (this->optimiseX)
-                valX = *bckPtrX++;
-            if (this->optimiseY)
-                valY = *bckPtrY++;
-            T length = (T)(sqrt(valX * valX + valY * valY));
-            maxGradValue = (length > maxGradValue) ? length : maxGradValue;
-        }
-    }
+    const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiser->GetVoxNumber_b(),
+                                                                   this->optimiseX,
+                                                                   this->optimiseY,
+                                                                   this->optimiseZ);
 
     // The largest value between the forward and backward gradient is kept
-    maxGradValue = maxGradValue > forwardMaxValue ? maxGradValue : forwardMaxValue;
+    const T maxGradLength = std::max(backwardMaxGradLength, forwardMaxGradLength);
+
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "Objective function gradient maximal length: %g", maxGradValue);
+    sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
     reg_print_msg_debug(text);
 #endif
 
     // The forward gradient is normalised
-    T *forPtrX = static_cast<T*>(this->transformationGradient->data);
-    for (size_t i = 0; i < this->transformationGradient->nvox; ++i) {
-        *forPtrX++ /= maxGradValue;
-    }
+    this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength);
     // The backward gradient is normalised
-    bckPtrX = static_cast<T*>(backwardTransformationGradient->data);
-    for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) {
-        *bckPtrX++ /= maxGradValue;
-    }
+    computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::NormaliseGradient");
 #endif
     // Returns the largest gradient distance
-    return maxGradValue;
+    return maxGradLength;
 }
 /* *************************************************************** */
 template<class T>
@@ -858,10 +529,11 @@ void reg_f3d2<T>::GetObjectiveFunctionGradient() {
     if (!this->useApproxGradient) {
         // Compute the gradient of the similarity measure
         if (this->similarityWeight > 0) {
-            this->WarpFloatingImage(this->interpolation);
+            WarpFloatingImage(this->interpolation);
             GetSimilarityMeasureGradient();
         } else {
-            SetGradientImageToZero();
+            dynamic_cast<F3dContent*>(this->con)->ZeroTransformationGradient();
+            conBw->ZeroTransformationGradient();
         }
     } else GetApproximatedGradient();
     this->optimiser->IncrementCurrentIterationNumber();
@@ -869,13 +541,12 @@ void reg_f3d2<T>::GetObjectiveFunctionGradient() {
     // Smooth the gradient if require
     SmoothGradient();
 
+    // Compute the penalty term gradients if required
     if (!this->useApproxGradient) {
-        // Compute the penalty term gradients if required
         GetBendingEnergyGradient();
         GetJacobianBasedGradient();
         GetLinearEnergyGradient();
         GetLandmarkDistanceGradient();
-        GetInverseConsistencyGradient();
     }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionGradient");
@@ -883,252 +554,43 @@ void reg_f3d2<T>::GetObjectiveFunctionGradient() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_f3d2<T>::DisplayCurrentLevelParameters() {
-    reg_f3d<T>::DisplayCurrentLevelParameters();
+void reg_f3d2<T>::DisplayCurrentLevelParameters(int currentLevel) {
+    reg_f3d<T>::DisplayCurrentLevelParameters(currentLevel);
 #ifdef NDEBUG
     if (this->verbose) {
 #endif
         char text[255];
         reg_print_info(this->executableName, "Current backward control point image");
         sprintf(text, "\t* image dimension: %i x %i x %i",
-                backwardControlPointGrid->nx, backwardControlPointGrid->ny, backwardControlPointGrid->nz);
+                controlPointGridBw->nx, controlPointGridBw->ny, controlPointGridBw->nz);
         reg_print_info(this->executableName, text);
         sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                backwardControlPointGrid->dx, backwardControlPointGrid->dy, backwardControlPointGrid->dz);
+                controlPointGridBw->dx, controlPointGridBw->dy, controlPointGridBw->dz);
         reg_print_info(this->executableName, text);
 #ifdef NDEBUG
     }
 #endif
 
 #ifndef NDEBUG
-
-    if (backwardControlPointGrid->sform_code > 0)
-        reg_mat44_disp(&(backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform");
-    else reg_mat44_disp(&(backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform");
+    if (controlPointGridBw->sform_code > 0)
+        reg_mat44_disp(&controlPointGridBw->sto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP sform");
+    else reg_mat44_disp(&controlPointGridBw->qto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP qform");
 #endif
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::DisplayCurrentLevelParameters");
 #endif
 }
 /* *************************************************************** */
-template<class T>
-void reg_f3d2<T>::GetInverseConsistencyErrorField(bool forceAll) {
-    if (inverseConsistencyWeight <= 0) return;
-
-    // Compute both deformation fields
-    if (this->similarityWeight <= 0 || forceAll)
-        GetDeformationField();
-    // Compose the obtained deformation fields by the inverse transformations
-    reg_spline_getDeformationField(backwardControlPointGrid,
-                                   this->deformationFieldImage,
-                                   this->currentMask,
-                                   true, // composition
-                                   true); // use B-Spline
-    reg_spline_getDeformationField(this->controlPointGrid,
-                                   backwardDeformationFieldImage,
-                                   floatingMask,
-                                   true, // composition
-                                   true); // use B-Spline
-    // Convert the deformation fields into displacement
-    reg_getDisplacementFromDeformation(this->deformationFieldImage);
-    reg_getDisplacementFromDeformation(backwardDeformationFieldImage);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyErrorField");
-#endif
-}
-/* *************************************************************** */
-template<class T>
-double reg_f3d2<T>::GetInverseConsistencyPenaltyTerm() {
-    if (inverseConsistencyWeight <= 0) return 0;
-
-    GetInverseConsistencyErrorField(false);
-
-    double ferror = 0;
-    size_t voxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz;
-    T *dispPtrX = static_cast<T*>(this->deformationFieldImage->data);
-    T *dispPtrY = &dispPtrX[voxelNumber];
-    if (this->deformationFieldImage->nz > 1) {
-        T *dispPtrZ = &dispPtrY[voxelNumber];
-        for (size_t i = 0; i < voxelNumber; ++i) {
-            if (this->currentMask[i] > -1) {
-                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
-                ferror += dist;
-            }
-        }
-    } else {
-        for (size_t i = 0; i < voxelNumber; ++i) {
-            if (this->currentMask[i] > -1) {
-                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
-                ferror += dist;
-            }
-        }
-    }
-
-    double berror = 0;
-    voxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz;
-    dispPtrX = static_cast<T*>(backwardDeformationFieldImage->data);
-    dispPtrY = &dispPtrX[voxelNumber];
-    if (backwardDeformationFieldImage->nz > 1) {
-        T *dispPtrZ = &dispPtrY[voxelNumber];
-        for (size_t i = 0; i < voxelNumber; ++i) {
-            if (floatingMask[i] > -1) {
-                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]);
-                berror += dist;
-            }
-        }
-    } else {
-        for (size_t i = 0; i < voxelNumber; ++i) {
-            if (floatingMask[i] > -1) {
-                double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]);
-                berror += dist;
-            }
-        }
-    }
-    double error = (ferror / double(this->activeVoxelNumber[this->currentLevel]) +
-                    berror / double(backwardActiveVoxelNumber[this->currentLevel]));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyPenaltyTerm");
-#endif
-    return double(inverseConsistencyWeight) * error;
-}
-/* *************************************************************** */
-template<class T>
-void reg_f3d2<T>::GetInverseConsistencyGradient() {
-    if (inverseConsistencyWeight <= 0) return;
-
-    // Note: I simplified the gradient computation in order to include
-    // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode)
-    // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode)
-    // cause it would only be an approximation since I don't have the
-    // real inverses
-    GetInverseConsistencyErrorField(true);
-
-    // The forward inverse consistency field is masked
-    size_t forwardVoxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz;
-    T *defPtrX = static_cast<T*>(this->deformationFieldImage->data);
-    T *defPtrY = &defPtrX[forwardVoxelNumber];
-    T *defPtrZ = &defPtrY[forwardVoxelNumber];
-    for (size_t i = 0; i < forwardVoxelNumber; ++i) {
-        if (this->currentMask[i] < 0) {
-            defPtrX[i] = 0;
-            defPtrY[i] = 0;
-            if (this->deformationFieldImage->nz > 1)
-                defPtrZ[i] = 0;
-        }
-    }
-    // The backward inverse consistency field is masked
-    size_t backwardVoxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz;
-    defPtrX = static_cast<T*>(backwardDeformationFieldImage->data);
-    defPtrY = &defPtrX[backwardVoxelNumber];
-    defPtrZ = &defPtrY[backwardVoxelNumber];
-    for (size_t i = 0; i < backwardVoxelNumber; ++i) {
-        if (floatingMask[i] < 0) {
-            defPtrX[i] = 0;
-            defPtrY[i] = 0;
-            if (backwardDeformationFieldImage->nz > 1)
-                defPtrZ[i] = 0;
-        }
-    }
-
-    // We convolve the inverse consistency map with a cubic B-Spline kernel
-    // Convolution along the x axis
-    float currentNodeSpacing[3];
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx;
-    bool activeAxis[3] = {1, 0, 0};
-    reg_tools_kernelConvolution(this->deformationFieldImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the y axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy;
-    activeAxis[0] = 0;
-    activeAxis[1] = 1;
-    reg_tools_kernelConvolution(this->deformationFieldImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the z axis if required
-    if (this->voxelBasedMeasureGradient->nz > 1) {
-        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz;
-        activeAxis[1] = 0;
-        activeAxis[2] = 1;
-        reg_tools_kernelConvolution(this->deformationFieldImage,
-                                    currentNodeSpacing,
-                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                    nullptr, // all volumes are active
-                                    activeAxis);
-    }
-    // The forward inverse consistency gradient is extracted at the node position
-    reg_voxelCentric2NodeCentric(this->transformationGradient,
-                                 this->deformationFieldImage,
-                                 2.f * inverseConsistencyWeight,
-                                 true, // update the current value
-                                 nullptr); // no voxel to mm conversion
-
-    // We convolve the inverse consistency map with a cubic B-Spline kernel
-    // Convolution along the x axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx;
-    activeAxis[0] = 1;
-    activeAxis[1] = 0;
-    activeAxis[2] = 0;
-    reg_tools_kernelConvolution(backwardDeformationFieldImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the y axis
-    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy;
-    activeAxis[0] = 0;
-    activeAxis[1] = 1;
-    reg_tools_kernelConvolution(backwardDeformationFieldImage,
-                                currentNodeSpacing,
-                                CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                nullptr, // all volumes are active
-                                activeAxis);
-    // Convolution along the z axis if required
-    if (this->voxelBasedMeasureGradient->nz > 1) {
-        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz;
-        activeAxis[1] = 0;
-        activeAxis[2] = 1;
-        reg_tools_kernelConvolution(backwardDeformationFieldImage,
-                                    currentNodeSpacing,
-                                    CUBIC_SPLINE_KERNEL, // cubic spline kernel
-                                    nullptr, // all volumes are active
-                                    activeAxis);
-    }
-    // The backward inverse consistency gradient is extracted at the node position
-    reg_voxelCentric2NodeCentric(backwardTransformationGradient,
-                                 backwardDeformationFieldImage,
-                                 2.f * inverseConsistencyWeight,
-                                 true, // update the current value
-                                 nullptr); // no voxel to mm conversion
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetInverseConsistencyGradient");
-#endif
-}
-/* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::SetOptimiser() {
-    if (this->useConjGradient)
-        this->optimiser = new reg_conjugateGradient<T>();
-    else this->optimiser = new reg_optimiser<T>();
-    this->optimiser->Initialise(this->controlPointGrid->nvox,
-                                this->controlPointGrid->nz > 1 ? 3 : 2,
-                                this->optimiseX,
-                                this->optimiseY,
-                                this->optimiseZ,
-                                this->maxIterationNumber,
-                                0, // currentIterationNumber
-                                this,
-                                static_cast<T*>(this->controlPointGrid->data),
-                                static_cast<T*>(this->transformationGradient->data),
-                                backwardControlPointGrid->nvox,
-                                static_cast<T*>(backwardControlPointGrid->data),
-                                static_cast<T*>(backwardTransformationGradient->data));
+    this->optimiser = this->platform->template CreateOptimiser<T>(*dynamic_cast<F3dContent*>(this->con),
+                                                                  *this,
+                                                                  this->maxIterationNumber,
+                                                                  this->useConjGradient,
+                                                                  this->optimiseX,
+                                                                  this->optimiseY,
+                                                                  this->optimiseZ,
+                                                                  conBw);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::SetOptimiser");
 #endif
@@ -1151,8 +613,6 @@ void reg_f3d2<T>::PrintCurrentObjFunctionValue(T currentSize) {
         sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac);
     if (this->landmarkRegWeight > 0)
         sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
-    if (inverseConsistencyWeight > 0)
-        sprintf(text + strlen(text), " - (wIC)%.2e", bestIC);
     sprintf(text + strlen(text), " [+ %g mm]", currentSize);
     reg_print_info(this->executableName, text);
 #ifndef NDEBUG
@@ -1163,7 +623,6 @@ void reg_f3d2<T>::PrintCurrentObjFunctionValue(T currentSize) {
 template<class T>
 void reg_f3d2<T>::UpdateBestObjFunctionValue() {
     reg_f3d<T>::UpdateBestObjFunctionValue();
-    bestIC = currentIC;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::UpdateBestObjFunctionValue");
 #endif
@@ -1173,9 +632,6 @@ template<class T>
 void reg_f3d2<T>::PrintInitialObjFunctionValue() {
     if (!this->verbose) return;
     reg_f3d<T>::PrintInitialObjFunctionValue();
-    //   char text[255];
-    //   sprintf(text, "Initial Inverse consistency value: %g", bestIC);
-    //   reg_print_info(this->executableName, text);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::PrintInitialObjFunctionValue");
 #endif
@@ -1194,18 +650,15 @@ double reg_f3d2<T>::GetObjectiveFunctionValue() {
     // Compute initial similarity measure
     this->currentWMeasure = 0;
     if (this->similarityWeight > 0) {
-        this->WarpFloatingImage(this->interpolation);
+        WarpFloatingImage(this->interpolation);
         this->currentWMeasure = this->ComputeSimilarityMeasure();
     }
 
-    // Compute the Inverse consistency penalty term if required
-    currentIC = GetInverseConsistencyPenaltyTerm();
-
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g",
+    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
             this->currentWMeasure, this->currentWBE, this->currentWLE,
-            this->currentWJac, this->currentWLand, currentIC);
+            this->currentWJac, this->currentWLand);
     reg_print_msg_debug(text);
 #endif
 
@@ -1213,108 +666,34 @@ double reg_f3d2<T>::GetObjectiveFunctionValue() {
     reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionValue");
 #endif
     // Store the global objective function value
-    return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - currentIC;
+    return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac;
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::InitialiseSimilarity() {
-    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
-    if (!this->measure_nmi && !this->measure_ssd && !this->measure_dti && !this->measure_lncc &&
-        !this->measure_kld && !this->measure_mind && !this->measure_mindssc) {
-        this->measure_nmi = new reg_nmi;
-        for (int i = 0; i < this->inputReference->nt; ++i)
-            this->measure_nmi->SetTimepointWeight(i, 1);
-    }
+    F3dContent& con = *dynamic_cast<F3dContent*>(this->con);
+
     if (this->measure_nmi)
-        this->measure_nmi->InitialiseMeasure(this->reference,
-                                             this->floating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warpedGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent,
-                                             floatingMask,
-                                             backwardWarped,
-                                             backwardWarpedGradientImage,
-                                             backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_nmi, con, conBw);
 
     if (this->measure_ssd)
-        this->measure_ssd->InitialiseMeasure(this->reference,
-                                             this->floating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warpedGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent,
-                                             floatingMask,
-                                             backwardWarped,
-                                             backwardWarpedGradientImage,
-                                             backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_ssd, con, conBw);
 
     if (this->measure_kld)
-        this->measure_kld->InitialiseMeasure(this->reference,
-                                             this->floating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warpedGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent,
-                                             floatingMask,
-                                             backwardWarped,
-                                             backwardWarpedGradientImage,
-                                             backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_kld, con, conBw);
 
     if (this->measure_lncc)
-        this->measure_lncc->InitialiseMeasure(this->reference,
-                                              this->floating,
-                                              this->currentMask,
-                                              this->warped,
-                                              this->warpedGradient,
-                                              this->voxelBasedMeasureGradient,
-                                              this->localWeightSimCurrent,
-                                              floatingMask,
-                                              backwardWarped,
-                                              backwardWarpedGradientImage,
-                                              backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_lncc, con, conBw);
 
     if (this->measure_dti)
-        this->measure_dti->InitialiseMeasure(this->reference,
-                                             this->floating,
-                                             this->currentMask,
-                                             this->warped,
-                                             this->warpedGradient,
-                                             this->voxelBasedMeasureGradient,
-                                             this->localWeightSimCurrent,
-                                             floatingMask,
-                                             backwardWarped,
-                                             backwardWarpedGradientImage,
-                                             backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_dti, con, conBw);
 
     if (this->measure_mind)
-        this->measure_mind->InitialiseMeasure(this->reference,
-                                              this->floating,
-                                              this->currentMask,
-                                              this->warped,
-                                              this->warpedGradient,
-                                              this->voxelBasedMeasureGradient,
-                                              this->localWeightSimCurrent,
-                                              floatingMask,
-                                              backwardWarped,
-                                              backwardWarpedGradientImage,
-                                              backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_mind, con, conBw);
 
     if (this->measure_mindssc)
-        this->measure_mindssc->InitialiseMeasure(this->reference,
-                                                 this->floating,
-                                                 this->currentMask,
-                                                 this->warped,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedMeasureGradient,
-                                                 this->localWeightSimCurrent,
-                                                 floatingMask,
-                                                 backwardWarped,
-                                                 backwardWarpedGradientImage,
-                                                 backwardVoxelBasedMeasureGradientImage);
+        this->measure->Initialise(*this->measure_mindssc, con, conBw);
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::InitialiseSimilarity");
 #endif
@@ -1323,11 +702,11 @@ void reg_f3d2<T>::InitialiseSimilarity() {
 template<class T>
 nifti_image* reg_f3d2<T>::GetBackwardControlPointPositionImage() {
     // Create a control point grid nifti image
-    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(backwardControlPointGrid);
+    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGridBw);
     // Allocate the new image data array
     returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
     // Copy the final backward control point grid image
-    memcpy(returnedControlPointGrid->data, backwardControlPointGrid->data,
+    memcpy(returnedControlPointGrid->data, controlPointGridBw->data,
            returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
     // Return the new control point grid
 #ifndef NDEBUG
@@ -1373,7 +752,7 @@ void reg_f3d2<T>::Initialise() {
 
         // Create the forward and backward control point grids
         reg_createSymmetricControlPointGrids<T>(&this->controlPointGrid,
-                                                &backwardControlPointGrid,
+                                                &controlPointGridBw,
                                                 this->referencePyramid[0],
                                                 this->floatingPyramid[0],
                                                 this->affineTransformation,
@@ -1392,50 +771,49 @@ void reg_f3d2<T>::Initialise() {
         if (this->controlPointGrid->nz > 1)
             this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1);
         // The backward grid is derived from the forward
-        backwardControlPointGrid = nifti_copy_nim_info(this->controlPointGrid);
-        backwardControlPointGrid->data = malloc(backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper);
+        controlPointGridBw = nifti_copy_nim_info(this->controlPointGrid);
+        controlPointGridBw->data = malloc(controlPointGridBw->nvox * controlPointGridBw->nbyper);
         if (this->controlPointGrid->num_ext > 0)
-            nifti_copy_extensions(backwardControlPointGrid, this->controlPointGrid);
-        memcpy(backwardControlPointGrid->data, this->controlPointGrid->data,
-               backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper);
-        reg_getDisplacementFromDeformation(backwardControlPointGrid);
-        reg_tools_multiplyValueToImage(backwardControlPointGrid, backwardControlPointGrid, -1);
-        reg_getDeformationFromDisplacement(backwardControlPointGrid);
-        for (int i = 0; i < backwardControlPointGrid->num_ext; ++i) {
-            mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast<mat44 *>(backwardControlPointGrid->ext_list[i].edata));
-            memcpy(backwardControlPointGrid->ext_list[i].edata, &tempMatrix, sizeof(mat44));
+            nifti_copy_extensions(controlPointGridBw, this->controlPointGrid);
+        memcpy(controlPointGridBw->data, this->controlPointGrid->data,
+               controlPointGridBw->nvox * controlPointGridBw->nbyper);
+        reg_getDisplacementFromDeformation(controlPointGridBw);
+        reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1);
+        reg_getDeformationFromDisplacement(controlPointGridBw);
+        for (int i = 0; i < controlPointGridBw->num_ext; ++i) {
+            mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast<mat44 *>(controlPointGridBw->ext_list[i].edata));
+            memcpy(controlPointGridBw->ext_list[i].edata, &tempMatrix, sizeof(mat44));
         }
     }
 
     // Set the floating mask image pyramid
     if (this->usePyramid) {
         floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
-        backwardActiveVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int));
+        activeVoxelNumberBw = (int*)malloc(this->levelToPerform * sizeof(int));
     } else {
         floatingMaskPyramid = (int**)malloc(sizeof(int*));
-        backwardActiveVoxelNumber = (int*)malloc(sizeof(int));
+        activeVoxelNumberBw = (int*)malloc(sizeof(int));
     }
 
     if (this->usePyramid) {
-        if (floatingMaskImage)
+        if (floatingMaskImage) {
             reg_createMaskPyramid<T>(floatingMaskImage,
                                      floatingMaskPyramid,
                                      this->levelNumber,
                                      this->levelToPerform,
-                                     backwardActiveVoxelNumber);
-        else {
+                                     activeVoxelNumberBw);
+        } else {
             for (unsigned int l = 0; l < this->levelToPerform; ++l) {
-                backwardActiveVoxelNumber[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
-                floatingMaskPyramid[l] = (int*)calloc(backwardActiveVoxelNumber[l], sizeof(int));
+                activeVoxelNumberBw[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+                floatingMaskPyramid[l] = (int*)calloc(activeVoxelNumberBw[l], sizeof(int));
             }
         }
-    } else  // no pyramid
-    {
+    } else {  // no pyramid
         if (floatingMaskImage)
-            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1, backwardActiveVoxelNumber);
+            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1, activeVoxelNumberBw);
         else {
-            backwardActiveVoxelNumber[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
-            floatingMaskPyramid[0] = (int*)calloc(backwardActiveVoxelNumber[0], sizeof(int));
+            activeVoxelNumberBw[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
+            floatingMaskPyramid[0] = (int*)calloc(activeVoxelNumberBw[0], sizeof(int));
         }
     }
 
@@ -1444,8 +822,7 @@ void reg_f3d2<T>::Initialise() {
 #endif
         if (inverseConsistencyWeight > 0) {
             char text[255];
-            sprintf(text, "Inverse consistency error penalty term weight: %g",
-                    inverseConsistencyWeight);
+            sprintf(text, "Inverse consistency error penalty term weight: %g", inverseConsistencyWeight);
             reg_print_info(this->executableName, text);
         }
 #ifdef NDEBUG
@@ -1454,10 +831,12 @@ void reg_f3d2<T>::Initialise() {
 
     // Convert the control point grid into velocity field parametrisation
     this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID;
-    backwardControlPointGrid->intent_p1 = SPLINE_VEL_GRID;
+    controlPointGridBw->intent_p1 = SPLINE_VEL_GRID;
     // Set the number of composition to 6 by default
-    this->controlPointGrid->intent_p2 = 6;
-    backwardControlPointGrid->intent_p2 = 6;
+    this->controlPointGrid->intent_p2 = controlPointGridBw->intent_p2 = 6;
+
+    if (this->affineTransformation)
+        affineTransformationBw = new mat44(nifti_mat44_inverse(*this->affineTransformation));
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_f3d2::Initialise() done");
@@ -1468,122 +847,21 @@ template <class T>
 void reg_f3d2<T>::ExponentiateGradient() {
     if (!useGradientCumulativeExp) return;
 
-    /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
     // Exponentiate the forward gradient using the backward transformation
 #ifndef NDEBUG
     reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach");
 #endif
-    // Create all deformation field images needed for resampling
-    nifti_image **tempDef = (nifti_image**)malloc(size_t(fabs(backwardControlPointGrid->intent_p2) + 1) * sizeof(nifti_image*));
-    for (int i = 0; i <= (int)fabs(backwardControlPointGrid->intent_p2); ++i) {
-        tempDef[i] = nifti_copy_nim_info(this->deformationFieldImage);
-        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
-    }
-    // Generate all intermediate deformation fields
-    reg_spline_getIntermediateDefFieldFromVelGrid(backwardControlPointGrid, tempDef);
-
-    // Remove the affine component
-    nifti_image *affine_disp = nullptr;
-    if (this->affineTransformation) {
-        affine_disp = nifti_copy_nim_info(this->deformationFieldImage);
-        affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper);
-        mat44 backwardAffineTransformation = nifti_mat44_inverse(*this->affineTransformation);
-        reg_affine_getDeformationField(&backwardAffineTransformation, affine_disp);
-        reg_getDisplacementFromDeformation(affine_disp);
-    }
+    this->compute->ExponentiateGradient(*conBw);
 
-    /* Allocate a temporary gradient image to store the backward gradient */
-    nifti_image *tempGrad = nifti_copy_nim_info(this->voxelBasedMeasureGradient);
-
-    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
-    for (int i = 0; i < (int)fabsf(backwardControlPointGrid->intent_p2); ++i) {
-        if (affine_disp)
-            reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]);
-        reg_resampleGradient(this->voxelBasedMeasureGradient, // floating
-                             tempGrad, // warped - out
-                             tempDef[i], // deformation field
-                             1, // interpolation type - linear
-                             0); // padding value
-        reg_tools_addImageToImage(tempGrad, // in1
-                                  this->voxelBasedMeasureGradient, // in2
-                                  this->voxelBasedMeasureGradient); // out
-    }
-
-    // Free the temporary deformation fields
-    for (int i = 0; i <= (int)fabsf(backwardControlPointGrid->intent_p2); ++i) {
-        nifti_image_free(tempDef[i]);
-        tempDef[i] = nullptr;
-    }
-    free(tempDef);
-    tempDef = nullptr;
-    // Free the temporary gradient image
-    nifti_image_free(tempGrad);
-    tempGrad = nullptr;
-    // Free the temporary affine displacement field
-    if (affine_disp)
-        nifti_image_free(affine_disp);
-    affine_disp = nullptr;
-    // Normalise the forward gradient
-    reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in
-                                 this->voxelBasedMeasureGradient, // out
-                                 powf(2, fabsf(backwardControlPointGrid->intent_p2))); // value
-
-    /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */
     /* Exponentiate the backward gradient using the forward transformation */
 #ifndef NDEBUG
     reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach");
 #endif
-    // Allocate a temporary gradient image to store the backward gradient
-    tempGrad = nifti_copy_nim_info(backwardVoxelBasedMeasureGradientImage);
-    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
-    // Create all deformation field images needed for resampling
-    tempDef = (nifti_image**)malloc(size_t(fabs(this->controlPointGrid->intent_p2) + 1) * sizeof(nifti_image*));
-    for (int i = 0; i <= (int)fabs(this->controlPointGrid->intent_p2); ++i) {
-        tempDef[i] = nifti_copy_nim_info(backwardDeformationFieldImage);
-        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
-    }
-    // Generate all intermediate deformation fields
-    reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid, tempDef);
-
-    // Remove the affine component
-    if (this->affineTransformation) {
-        affine_disp = nifti_copy_nim_info(backwardDeformationFieldImage);
-        affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper);
-        reg_affine_getDeformationField(this->affineTransformation, affine_disp);
-        reg_getDisplacementFromDeformation(affine_disp);
-    }
-
-    for (int i = 0; i < (int)fabsf(this->controlPointGrid->intent_p2); ++i) {
-        if (affine_disp)
-            reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]);
-        reg_resampleGradient(backwardVoxelBasedMeasureGradientImage, // floating
-                             tempGrad, // warped - out
-                             tempDef[i], // deformation field
-                             1, // interpolation type - linear
-                             0); // padding value
-        reg_tools_addImageToImage(tempGrad, // in1
-                                  backwardVoxelBasedMeasureGradientImage, // in2
-                                  backwardVoxelBasedMeasureGradientImage); // out
-    }
+    computeBw->ExponentiateGradient(*this->con);
 
-    // Free the temporary deformation field
-    for (int i = 0; i <= (int)fabsf(this->controlPointGrid->intent_p2); ++i) {
-        nifti_image_free(tempDef[i]);
-        tempDef[i] = nullptr;
-    }
-    free(tempDef);
-    tempDef = nullptr;
-    // Free the temporary gradient image
-    nifti_image_free(tempGrad);
-    tempGrad = nullptr;
-    // Free the temporary affine displacement field
-    if (affine_disp)
-        nifti_image_free(affine_disp);
-    affine_disp = nullptr;
-    // Normalise the backward gradient
-    reg_tools_divideValueToImage(backwardVoxelBasedMeasureGradientImage, // in
-                                 backwardVoxelBasedMeasureGradientImage, // out
-                                 powf(2, fabsf(this->controlPointGrid->intent_p2))); // value
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::ExponentiateGradient");
+#endif
 }
 /* *************************************************************** */
 template <class T>
@@ -1591,173 +869,64 @@ void reg_f3d2<T>::UpdateParameters(float scale) {
     // Restore the last successful control point grids
     this->optimiser->RestoreBestDOF();
 
-    /************************/
-    /**** Forward update ****/
-    /************************/
-    // Scale the gradient image
-    nifti_image *forwardScaledGradient = nifti_copy_nim_info(this->transformationGradient);
-    forwardScaledGradient->data = malloc(forwardScaledGradient->nvox * forwardScaledGradient->nbyper);
-    reg_tools_multiplyValueToImage(this->transformationGradient,
-                                   forwardScaledGradient,
-                                   scale);
     // The scaled gradient image is added to the current estimate of the transformation using
     // a simple addition or by computing the BCH update
     // Note that the gradient has been integrated over the path of transformation previously
     if (bchUpdate) {
-        // Compute the BCH update
+        // Forward update
         reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY");
 #ifndef NDEBUG
         reg_print_msg_debug("Update the forward control point grid using BCH approximation");
 #endif
-        compute_BCH_update(this->controlPointGrid,
-                           forwardScaledGradient,
-                           bchUpdateValue);
-    } else {
-        // Reset the gradient along the axes if appropriate
-        reg_setGradientToZero(forwardScaledGradient,
-                              !this->optimiser->GetOptimiseX(),
-                              !this->optimiser->GetOptimiseY(),
-                              !this->optimiser->GetOptimiseZ());
-        // Update the velocity field
-        reg_tools_addImageToImage(this->controlPointGrid, // in1
-                                  forwardScaledGradient, // in2
-                                  this->controlPointGrid); // out
-    }
-    // Clean the temporary nifti_images
-    nifti_image_free(forwardScaledGradient);
-    forwardScaledGradient = nullptr;
-
-    /************************/
-    /**** Backward update ***/
-    /************************/
-    // Scale the gradient image
-    nifti_image *backwardScaledGradient = nifti_copy_nim_info(backwardTransformationGradient);
-    backwardScaledGradient->data = malloc(backwardScaledGradient->nvox * backwardScaledGradient->nbyper);
-    reg_tools_multiplyValueToImage(backwardTransformationGradient,
-                                   backwardScaledGradient,
-                                   scale);
-    // The scaled gradient image is added to the current estimate of the transformation using
-    // a simple addition or by computing the BCH update
-    // Note that the gradient has been integrated over the path of transformation previously
-    if (bchUpdate) {
-        // Compute the BCH update
+        this->compute->BchUpdate(scale, bchUpdateValue);
+
+        // Backward update
         reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY");
 #ifndef NDEBUG
         reg_print_msg_debug("Update the backward control point grid using BCH approximation");
 #endif
-        compute_BCH_update(backwardControlPointGrid,
-                           backwardScaledGradient,
-                           bchUpdateValue);
+        computeBw->BchUpdate(scale, bchUpdateValue);
     } else {
-        // Reset the gradient along the axes if appropriate
-        reg_setGradientToZero(backwardScaledGradient,
-                              !this->optimiser->GetOptimiseX(),
-                              !this->optimiser->GetOptimiseY(),
-                              !this->optimiser->GetOptimiseZ());
-        // Update the velocity field
-        reg_tools_addImageToImage(backwardControlPointGrid, // in1
-                                  backwardScaledGradient, // in2
-                                  backwardControlPointGrid); // out
+        // Forward update
+        this->compute->UpdateVelocityField(scale,
+                                           this->optimiser->GetOptimiseX(),
+                                           this->optimiser->GetOptimiseY(),
+                                           this->optimiser->GetOptimiseZ());
+        // Backward update
+        computeBw->UpdateVelocityField(scale,
+                                       this->optimiser->GetOptimiseX(),
+                                       this->optimiser->GetOptimiseY(),
+                                       this->optimiser->GetOptimiseZ());
     }
-    // Clean the temporary nifti_images
-    nifti_image_free(backwardScaledGradient);
-    backwardScaledGradient = nullptr;
-
-    /****************************/
-    /******** Symmetrise ********/
-    /****************************/
-
-    // In order to ensure symmetry the forward and backward velocity fields
-    // are averaged in both image spaces: reference and floating
-    /****************************/
-    nifti_image *warpedForwardTrans = nifti_copy_nim_info(backwardControlPointGrid);
-    warpedForwardTrans->data = malloc(warpedForwardTrans->nvox * warpedForwardTrans->nbyper);
-    nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid);
-    warpedBackwardTrans->data = malloc(warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper);
-
-    // Both parametrisations are converted into displacement
-    reg_getDisplacementFromDeformation(this->controlPointGrid);
-    reg_getDisplacementFromDeformation(backwardControlPointGrid);
-
-    // Both parametrisations are copied over
-    memcpy(warpedBackwardTrans->data, backwardControlPointGrid->data, warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper);
-    memcpy(warpedForwardTrans->data, this->controlPointGrid->data, warpedForwardTrans->nvox * warpedForwardTrans->nbyper);
-
-    // and subtracted (sum and negation)
-    reg_tools_substractImageToImage(backwardControlPointGrid, // displacement
-                                    warpedForwardTrans, // displacement
-                                    backwardControlPointGrid); // displacement output
-    reg_tools_substractImageToImage(this->controlPointGrid, // displacement
-                                    warpedBackwardTrans, // displacement
-                                    this->controlPointGrid); // displacement output
-    // Division by 2
-    reg_tools_multiplyValueToImage(backwardControlPointGrid, // displacement
-                                   backwardControlPointGrid, // displacement
-                                   0.5f);
-    reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement
-                                   this->controlPointGrid, // displacement
-                                   0.5f);
-    // Clean the temporary allocated velocity fields
-    nifti_image_free(warpedForwardTrans);
-    warpedForwardTrans = nullptr;
-    nifti_image_free(warpedBackwardTrans);
-    warpedBackwardTrans = nullptr;
-
-    // Convert the velocity field from displacement to deformation
-    reg_getDeformationFromDisplacement(this->controlPointGrid);
-    reg_getDeformationFromDisplacement(backwardControlPointGrid);
+
+    // Symmetrise
+    this->compute->SymmetriseVelocityFields(*conBw);
 }
 /* *************************************************************** */
 template<class T>
 nifti_image** reg_f3d2<T>::GetWarpedImage() {
     // The initial images are used
-    if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !backwardControlPointGrid) {
+    if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) {
         reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
         reg_print_msg_error("The reference, floating and control point grid images have to be defined");
         reg_exit();
     }
 
-    // Set the input images
-    reg_f3d2<T>::reference = this->inputReference;
-    reg_f3d2<T>::floating = this->inputFloating;
-    // No mask is used to perform the final resampling
-    reg_f3d2<T>::currentMask = nullptr;
-    reg_f3d2<T>::floatingMask = nullptr;
-
-    // Allocate the forward and backward warped images
-    AllocateWarped();
-    // Allocate the forward and backward dense deformation field
-    AllocateDeformationField();
-
-    // Warp the floating images into the reference spaces using a cubic spline interpolation
-    reg_f3d2<T>::WarpFloatingImage(3); // cubic spline interpolation
-
-    // Deallocate the deformation field
-    DeallocateDeformationField();
-
-    // Allocate and save the forward transformation warped image
-    nifti_image **warpedImage = (nifti_image**)malloc(2 * sizeof(nifti_image*));
-    warpedImage[0] = nifti_copy_nim_info(this->warped);
-    warpedImage[0]->cal_min = this->inputFloating->cal_min;
-    warpedImage[0]->cal_max = this->inputFloating->cal_max;
-    warpedImage[0]->scl_slope = this->inputFloating->scl_slope;
-    warpedImage[0]->scl_inter = this->inputFloating->scl_inter;
-    warpedImage[0]->data = malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper);
-    memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper);
-
-    // Allocate and save the backward transformation warped image
-    warpedImage[1] = nifti_copy_nim_info(backwardWarped);
-    warpedImage[1]->cal_min = this->inputReference->cal_min;
-    warpedImage[1]->cal_max = this->inputReference->cal_max;
-    warpedImage[1]->scl_slope = this->inputReference->scl_slope;
-    warpedImage[1]->scl_inter = this->inputReference->scl_inter;
-    warpedImage[1]->data = malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper);
-    memcpy(warpedImage[1]->data, backwardWarped->data, warpedImage[1]->nvox * warpedImage[1]->nbyper);
-
-    // Deallocate the warped images
-    DeallocateWarped();
-
-    // Return the two final warped images
+    InitCurrentLevel(-1);
+
+    WarpFloatingImage(3); // cubic spline interpolation
+
+    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
+    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
+    warpedImage[0] = con->GetWarped();
+    warpedImage[1] = conBw->GetWarped();
+
+    con->SetWarped(nullptr); // Prevent deallocating of warpedImage
+    conBw->SetWarped(nullptr);
+    DeinitCurrentLevel(-1);
+#ifndef NDEBUG
+    reg_print_fct_debug("reg_f3d2<T>::GetWarpedImage");
+#endif
     return warpedImage;
 }
 /* *************************************************************** */
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index 19d5e4ab..f851c2d1 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -20,41 +20,21 @@ class reg_f3d2: public reg_f3d<T> {
 protected:
     nifti_image *floatingMaskImage;
     int **floatingMaskPyramid;
-    int *floatingMask;
-    int *backwardActiveVoxelNumber;
-
-    nifti_image *backwardControlPointGrid;
-    nifti_image *backwardDeformationFieldImage;
-    nifti_image *backwardWarped;
-    nifti_image *backwardWarpedGradientImage;
-    nifti_image *backwardVoxelBasedMeasureGradientImage;
-    nifti_image *backwardTransformationGradient;
-
-    mat33 *backwardJacobianMatrix;
-
+    nifti_image *controlPointGridBw;
+    int *activeVoxelNumberBw;
+    mat44 *affineTransformationBw;
     T inverseConsistencyWeight;
-    double currentIC;
-    double bestIC;
-
     bool bchUpdate;
     bool useGradientCumulativeExp;
     int bchUpdateValue;
 
-    // Optimiser-related function
-    virtual void SetOptimiser() override;
+    // Content backwards
+    F3dContent *conBw = nullptr;
 
-    virtual void AllocateWarped();
-    virtual void DeallocateWarped();
-    virtual void AllocateDeformationField();
-    virtual void DeallocateDeformationField();
-    virtual void AllocateWarpedGradient();
-    virtual void DeallocateWarpedGradient();
-    virtual void AllocateVoxelBasedMeasureGradient();
-    virtual void DeallocateVoxelBasedMeasureGradient();
-    virtual void AllocateTransformationGradient();
-    virtual void DeallocateTransformationGradient();
-    virtual void DeallocateCurrentInputImage();
+    // Compute backwards
+    Compute *computeBw = nullptr;
 
+    virtual void SetOptimiser() override;
     virtual double ComputeBendingEnergyPenaltyTerm() override;
     virtual double ComputeLinearEnergyPenaltyTerm() override;
     virtual double ComputeJacobianBasedPenaltyTerm(int) override;
@@ -68,37 +48,34 @@ class reg_f3d2: public reg_f3d<T> {
     virtual void GetLinearEnergyGradient() override;
     virtual void GetJacobianBasedGradient() override;
     virtual void GetLandmarkDistanceGradient() override;
-    virtual void SetGradientImageToZero() override;
     virtual T NormaliseGradient() override;
     virtual void SmoothGradient() override;
     virtual void GetApproximatedGradient() override;
-    virtual void DisplayCurrentLevelParameters() override;
+    virtual void DisplayCurrentLevelParameters(int) override;
     virtual void PrintInitialObjFunctionValue() override;
     virtual void PrintCurrentObjFunctionValue(T) override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual double GetObjectiveFunctionValue() override;
-
-    virtual T InitialiseCurrentLevel() override;
+    void InitContent(nifti_image*, nifti_image*, int*);
+    virtual T InitCurrentLevel(int) override;
+    virtual void DeinitCurrentLevel(int) override;
     virtual void UpdateParameters(float) override;
     virtual void InitialiseSimilarity() override;
+    virtual void CheckParameters() override;
+    virtual void Initialise() override;
 
-    virtual void GetInverseConsistencyErrorField(bool forceAll);
-    virtual double GetInverseConsistencyPenaltyTerm();
-    virtual void GetInverseConsistencyGradient();
     virtual void ExponentiateGradient();
 
 public:
     reg_f3d2(int refTimePoint, int floTimePoint);
     virtual ~reg_f3d2();
 
-    virtual void SetFloatingMask(nifti_image*) override;
-    virtual void SetInverseConsistencyWeight(T) override;
-    virtual void CheckParameters() override;
-    virtual void Initialise() override;
-    virtual nifti_image** GetWarpedImage() override;
     virtual nifti_image* GetBackwardControlPointPositionImage() override;
-    virtual bool GetSymmetricStatus() { return true; }
+    virtual nifti_image** GetWarpedImage() override;
+    virtual bool GetSymmetricStatus() override { return true; }
 
+    virtual void SetFloatingMask(nifti_image*) override;
+    virtual void SetInverseConsistencyWeight(T) override;
     virtual void UseBCHUpdate(int) override;
     virtual void UseGradientCumulativeExp() override;
     virtual void DoNotUseGradientCumulativeExp() override;
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 784f7b84..615c22e0 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -84,6 +84,7 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
 }
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
+    // TODO Fix reg_spline_getDeformationField_gpu to accept composition
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(),
                                        con.F3dContent::GetReference(),
@@ -104,6 +105,7 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF,
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
+    // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     reg_getImageGradient_gpu(con.F3dContent::GetFloating(),
                              con.GetFloatingCuda(),
@@ -113,15 +115,6 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
                              paddingValue);
 }
 /* *************************************************************** */
-void CudaCompute::VoxelCentricToNodeCentric(float weight) {
-    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
-                                     con.F3dContent::GetControlPointGrid(),
-                                     con.GetVoxelBasedMeasureGradientCuda(),
-                                     con.GetTransformationGradientCuda(),
-                                     weight);
-}
-/* *************************************************************** */
 double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
     return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), nodeNumber);
@@ -132,9 +125,79 @@ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength);
 }
 /* *************************************************************** */
+void CudaCompute::SmoothGradient(float sigma) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    if (sigma != 0) {
+        Compute::SmoothGradient(sigma);
+        // Update the changes for GPU
+        dynamic_cast<CudaF3dContent&>(con).UpdateTransformationGradient();
+    }
+}
+/* *************************************************************** */
 void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     // TODO Implement this for CUDA
     // Use CPU temporarily
     Compute::GetApproximatedGradient(opt);
 }
 /* *************************************************************** */
+void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::GetDefFieldFromVelocityGrid(updateStepNumber);
+    // Transfer the data back to the CUDA device
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    // TODO update only the required ones
+    con.UpdateControlPointGrid();
+    con.SetDeformationField(con.F3dContent::GetDeformationField());
+}
+/* *************************************************************** */
+void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient());
+    // Transfer the data back to the CUDA device
+    con.UpdateVoxelBasedMeasureGradient();
+
+    // The node-based NMI gradient is extracted
+    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
+                                     con.F3dContent::GetControlPointGrid(),
+                                     con.GetVoxelBasedMeasureGradientCuda(),
+                                     con.GetTransformationGradientCuda(),
+                                     weight);
+}
+/* *************************************************************** */
+void CudaCompute::ExponentiateGradient(Content& conBwIn) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::ExponentiateGradient(conBwIn);
+    // Transfer the data back to the CUDA device
+    dynamic_cast<CudaF3dContent&>(con).UpdateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
+    // Transfer the data back to the CUDA device
+    dynamic_cast<CudaF3dContent&>(con).UpdateControlPointGrid();
+}
+/* *************************************************************** */
+void CudaCompute::BchUpdate(float scale, int bchUpdateValue) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::BchUpdate(scale, bchUpdateValue);
+    // Transfer the data back to the CUDA device
+    dynamic_cast<CudaF3dContent&>(con).UpdateControlPointGrid();
+}
+/* *************************************************************** */
+void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
+    // TODO Implement this for CUDA
+    // Use CPU temporarily
+    Compute::SymmetriseVelocityFields(conBwIn);
+    // Transfer the data back to the CUDA device
+    dynamic_cast<CudaF3dContent&>(con).UpdateControlPointGrid();
+    dynamic_cast<CudaF3dContent&>(conBwIn).UpdateControlPointGrid();
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 284dd0d8..e9796408 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -19,8 +19,14 @@ class CudaCompute: public Compute {
     virtual void GetDeformationField(bool composition, bool bspline) override;
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
-    virtual void VoxelCentricToNodeCentric(float weight) override;
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override;
+    virtual void SmoothGradient(float sigma) override;
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override;
+    virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override;
+    virtual void ConvolveVoxelBasedMeasureGradient(float weight) override;
+    virtual void ExponentiateGradient(Content& conBw) override;
+    virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void BchUpdate(float scale, int bchUpdateValue) override;
+    virtual void SymmetriseVelocityFields(Content& conBw) override;
 };
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index d6b8176c..ca57c782 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -28,22 +28,23 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
     return nullptr;
 }
 /* *************************************************************** */
-void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) {
-    reg_measure_gpu *measureGpu = dynamic_cast<reg_measure_gpu*>(&measure);
-    CudaF3dContent *cudaCon = dynamic_cast<CudaF3dContent*>(&con);
-    measureGpu->InitialiseMeasure(cudaCon->Content::GetReference(),
-                                  cudaCon->Content::GetFloating(),
-                                  cudaCon->Content::GetReferenceMask(),
-                                  cudaCon->Content::GetReference()->nvox,
-                                  cudaCon->Content::GetWarped(),
-                                  cudaCon->F3dContent::GetWarpedGradient(),
-                                  cudaCon->F3dContent::GetVoxelBasedMeasureGradient(),
-                                  cudaCon->F3dContent::GetLocalWeightSim(),
-                                  cudaCon->GetReferenceCuda(),
-                                  cudaCon->GetFloatingCuda(),
-                                  cudaCon->GetReferenceMaskCuda(),
-                                  cudaCon->GetWarpedCuda(),
-                                  cudaCon->GetWarpedGradientCuda(),
-                                  cudaCon->GetVoxelBasedMeasureGradientCuda());
+void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
+    // TODO Implement symmetric scheme for CUDA measure types
+    reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
+    CudaF3dContent& cudaCon = dynamic_cast<CudaF3dContent&>(con);
+    measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
+                                 cudaCon.Content::GetFloating(),
+                                 cudaCon.Content::GetReferenceMask(),
+                                 cudaCon.Content::GetReference()->nvox,
+                                 cudaCon.Content::GetWarped(),
+                                 cudaCon.F3dContent::GetWarpedGradient(),
+                                 cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),
+                                 cudaCon.F3dContent::GetLocalWeightSim(),
+                                 cudaCon.GetReferenceCuda(),
+                                 cudaCon.GetFloatingCuda(),
+                                 cudaCon.GetReferenceMaskCuda(),
+                                 cudaCon.GetWarpedCuda(),
+                                 cudaCon.GetWarpedGradientCuda(),
+                                 cudaCon.GetVoxelBasedMeasureGradientCuda());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
index c9c7f510..6b178611 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -5,5 +5,5 @@
 class CudaMeasure: public Measure {
 public:
     virtual reg_measure* Create(const MeasureType& measureType) override;
-    virtual void Initialise(reg_measure& measure, F3dContent& con) override;
+    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override;
 };

From 690f552a1adbae44bef8ece49ee28ff2d916829c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 6 Feb 2023 14:18:40 +0000
Subject: [PATCH 043/314] Move activeVoxelNumber calculation into Content
 classes

---
 CMakeLists.txt                  |  2 +-
 niftyreg_build_version.txt      |  2 +-
 reg-apps/reg_measure.cpp        |  2 +-
 reg-lib/Content.cpp             |  3 +-
 reg-lib/Content.h               |  2 ++
 reg-lib/_reg_aladin.cpp         | 11 ++----
 reg-lib/_reg_aladin.h           |  1 -
 reg-lib/_reg_aladin_sym.cpp     | 14 ++------
 reg-lib/_reg_aladin_sym.h       |  1 -
 reg-lib/_reg_base.cpp           | 19 ++++-------
 reg-lib/_reg_base.h             |  1 -
 reg-lib/_reg_f3d2.cpp           | 24 ++++---------
 reg-lib/_reg_f3d2.h             |  1 -
 reg-lib/cpu/_reg_localTrans.cpp |  2 +-
 reg-lib/cpu/_reg_tools.cpp      | 60 +++++++++++++--------------------
 reg-lib/cpu/_reg_tools.h        | 11 ++----
 reg-lib/cuda/CudaCompute.cpp    |  6 ++--
 reg-lib/cuda/CudaContent.cpp    | 13 ++++---
 reg-lib/cuda/CudaMeasure.cpp    |  2 +-
 19 files changed, 64 insertions(+), 113 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0bec2caf..bea681ba 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -73,7 +73,7 @@ option(USE_OPENCL "To use the OpenCL platform" OFF)
 option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
 option(USE_SSE "To enable SEE computation in some case" ON)
 #-----------------------------------------------------------------------------
-option(USE_THROW_EXCEP "To throw exeception rather than exit" OFF)
+option(USE_THROW_EXCEP "To throw exception rather than exit" OFF)
 mark_as_advanced(USE_THROW_EXCEP)
 #-----------------------------------------------------------------------------
 option(USE_NRRD "To use the NRRD file format" OFF)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 91b629b0..29e49a01 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-156
+157
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index e7e7fbc1..62442cc4 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -245,7 +245,7 @@ int main(int argc, char **argv)
                  param->refMaskImageName);
          return EXIT_FAILURE;
       }
-      reg_createMaskPyramid<float>(refMaskImage, &refMask, 1, 1, &refMaskVoxNumber);
+      reg_createMaskPyramid<float>(refMaskImage, &refMask, 1, 1);
    }
    else{
       refMask = (int *)calloc(refMaskVoxNumber,sizeof(int));
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 5df249fb..04ab478e 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -17,8 +17,9 @@ Content::Content(nifti_image *referenceIn,
     }
     AllocateWarped();
     AllocateDeformationField(bytesIn);
+    activeVoxelNumber = reference->nvox;
     if (!referenceMask)
-        referenceMask = (int*)calloc(reference->nvox, sizeof(int));
+        referenceMask = (int*)calloc(activeVoxelNumber, sizeof(int));
 }
 /* *************************************************************** */
 Content::~Content() {
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 5b6b8a4c..8da20be2 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -15,6 +15,7 @@ class Content {
     virtual bool IsCurrentComputationDoubleCapable() { return true; }
 
     // Getters
+    virtual size_t GetActiveVoxelNumber() { return activeVoxelNumber; }
     virtual nifti_image* GetReference() { return reference; }
     virtual nifti_image* GetFloating() { return floating; }
     virtual nifti_image* GetDeformationField() { return deformationField; }
@@ -37,6 +38,7 @@ class Content {
     }
 
 protected:
+    size_t activeVoxelNumber = 0;
     nifti_image *reference = nullptr;
     nifti_image *floating = nullptr;
     nifti_image *deformationField = nullptr;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index daa21fbb..a194064b 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -10,7 +10,6 @@ reg_aladin<T>::reg_aladin() {
     this->referencePyramid = nullptr;
     this->floatingPyramid = nullptr;
     this->referenceMaskPyramid = nullptr;
-    this->activeVoxelNumber = nullptr;
 
     this->transformationMatrix = new mat44;
     this->inputTransformName = nullptr;
@@ -99,8 +98,6 @@ reg_aladin<T>::~reg_aladin() {
         free(this->referenceMaskPyramid);
         this->referenceMaskPyramid = nullptr;
     }
-    if (this->activeVoxelNumber != nullptr)
-        free(this->activeVoxelNumber);
     if (this->platform != nullptr)
         delete this->platform;
 #ifndef NDEBUG
@@ -233,7 +230,6 @@ void reg_aladin<T>::InitialiseRegistration() {
     this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
     this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
     this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *));
-    this->activeVoxelNumber = (int *)malloc(this->levelsToPerform * sizeof(int));
 
     // FINEST LEVEL OF REGISTRATION
     reg_createImagePyramid<T>(this->inputReference,
@@ -249,12 +245,11 @@ void reg_aladin<T>::InitialiseRegistration() {
         reg_createMaskPyramid<T>(this->inputReferenceMask,
                                  this->referenceMaskPyramid,
                                  this->numberOfLevels,
-                                 this->levelsToPerform,
-                                 this->activeVoxelNumber);
+                                 this->levelsToPerform);
     else {
         for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
-            this->referenceMaskPyramid[l] = (int *)calloc(activeVoxelNumber[l], sizeof(int));
+            const size_t voxelNumber = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
+            this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int));
         }
     }
 
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 59864741..a07a304e 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -77,7 +77,6 @@ class reg_aladin {
     nifti_image **referencePyramid;
     nifti_image **floatingPyramid;
     int **referenceMaskPyramid;
-    int *activeVoxelNumber; ///TODO Needs to be removed
 
     char *inputTransformName;
     mat44 *transformationMatrix;
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index bb89632b..bab14aaa 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -10,7 +10,6 @@ reg_aladin_sym<T>::reg_aladin_sym ()
 
    this->InputFloatingMask=nullptr;
    this->FloatingMaskPyramid=nullptr;
-   this->BackwardActiveVoxelNumber=nullptr;
 
    this->BackwardTransformationMatrix=new mat44;
 
@@ -52,9 +51,6 @@ reg_aladin_sym<T>::~reg_aladin_sym()
       free(this->FloatingMaskPyramid);
       this->FloatingMaskPyramid=nullptr;
    }
-   if(this->BackwardActiveVoxelNumber!=nullptr)
-     free(this->BackwardActiveVoxelNumber);
-   this->BackwardActiveVoxelNumber=nullptr;
 
 #ifndef NDEBUG
    reg_print_msg_debug("reg_aladin_sym destructor called");
@@ -77,21 +73,19 @@ void reg_aladin_sym<T>::InitialiseRegistration()
 
    reg_aladin<T>::InitialiseRegistration();
    this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *));
-   this->BackwardActiveVoxelNumber= (int *)malloc(this->levelsToPerform*sizeof(int));
    if (this->InputFloatingMask!=nullptr)
    {
       reg_createMaskPyramid<T>(this->InputFloatingMask,
                                this->FloatingMaskPyramid,
                                this->numberOfLevels,
-                               this->levelsToPerform,
-                               this->BackwardActiveVoxelNumber);
+                               this->levelsToPerform);
    }
    else
    {
       for(unsigned int l=0; l<this->levelsToPerform; ++l)
       {
-         this->BackwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
-         this->FloatingMaskPyramid[l]=(int *)calloc(this->BackwardActiveVoxelNumber[l],sizeof(int));
+         const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+         this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int));
       }
    }
 
@@ -116,7 +110,6 @@ void reg_aladin_sym<T>::InitialiseRegistration()
                }
             }
          }
-         this->BackwardActiveVoxelNumber[l] -= removedVoxel;
       }
    }
    if(this->floatingLowerThreshold!=-std::numeric_limits<T>::max())
@@ -139,7 +132,6 @@ void reg_aladin_sym<T>::InitialiseRegistration()
                }
             }
          }
-         this->BackwardActiveVoxelNumber[l] -= removedVoxel;
       }
    }
 
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 0cd4ec5f..35434d56 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -36,7 +36,6 @@ class reg_aladin_sym : public reg_aladin<T> {
 protected:
   nifti_image *InputFloatingMask;
   int **FloatingMaskPyramid;
-  int *BackwardActiveVoxelNumber;
 
   _reg_blockMatchingParam *BackwardBlockMatchingParams;
 
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 5f428ea1..e6e452fd 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -73,7 +73,6 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     referencePyramid = nullptr;
     floatingPyramid = nullptr;
     maskPyramid = nullptr;
-    activeVoxelNumber = nullptr;
 
     interpolation = 1;
 
@@ -140,10 +139,6 @@ reg_base<T>::~reg_base() {
         free(floatingPyramid);
         floatingPyramid = nullptr;
     }
-    if (activeVoxelNumber) {
-        free(activeVoxelNumber);
-        activeVoxelNumber = nullptr;
-    }
     if (referenceThresholdUp) {
         delete[]referenceThresholdUp;
         referenceThresholdUp = nullptr;
@@ -621,12 +616,10 @@ void reg_base<T>::Initialise() {
         referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
         floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
         maskPyramid = (int**)malloc(levelToPerform * sizeof(int*));
-        activeVoxelNumber = (int*)malloc(levelToPerform * sizeof(int));
     } else {
         referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*));
         floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*));
         maskPyramid = (int**)malloc(sizeof(int*));
-        activeVoxelNumber = (int*)malloc(sizeof(int));
     }
 
     // Update the input images threshold if required
@@ -669,21 +662,21 @@ void reg_base<T>::Initialise() {
         reg_createImagePyramid<T>(inputReference, referencePyramid, levelNumber, levelToPerform);
         reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelNumber, levelToPerform);
         if (maskImage)
-            reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber);
+            reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform);
         else {
             for (unsigned int l = 0; l < levelToPerform; ++l) {
-                activeVoxelNumber[l] = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz;
-                maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int));
+                const size_t voxelNumber = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz;
+                maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int));
             }
         }
     } else {
         reg_createImagePyramid<T>(inputReference, referencePyramid, 1, 1);
         reg_createImagePyramid<T>(inputFloating, floatingPyramid, 1, 1);
         if (maskImage)
-            reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1, activeVoxelNumber);
+            reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1);
         else {
-            activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz;
-            maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int));
+            const size_t voxelNumber = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz;
+            maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int));
         }
     }
 
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 53b50fad..3a5f0146 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -96,7 +96,6 @@ class reg_base: public InterfaceOptimiser {
     nifti_image **referencePyramid;
     nifti_image **floatingPyramid;
     int **maskPyramid;
-    int *activeVoxelNumber;
 
     double bestWMeasure;
     double currentWMeasure;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 56a99eab..d791707e 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -25,7 +25,6 @@ reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
     controlPointGridBw = nullptr;
     floatingMaskImage = nullptr;
     floatingMaskPyramid = nullptr;
-    activeVoxelNumberBw = nullptr;
     affineTransformationBw = nullptr;
     inverseConsistencyWeight = 0;
     bchUpdate = false;
@@ -62,11 +61,6 @@ reg_f3d2<T>::~reg_f3d2() {
         floatingMaskPyramid = nullptr;
     }
 
-    if (activeVoxelNumberBw) {
-        free(activeVoxelNumberBw);
-        activeVoxelNumberBw = nullptr;
-    }
-
     if (affineTransformationBw) {
         delete affineTransformationBw;
         affineTransformationBw = nullptr;
@@ -789,31 +783,25 @@ void reg_f3d2<T>::Initialise() {
     // Set the floating mask image pyramid
     if (this->usePyramid) {
         floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
-        activeVoxelNumberBw = (int*)malloc(this->levelToPerform * sizeof(int));
     } else {
         floatingMaskPyramid = (int**)malloc(sizeof(int*));
-        activeVoxelNumberBw = (int*)malloc(sizeof(int));
     }
 
     if (this->usePyramid) {
         if (floatingMaskImage) {
-            reg_createMaskPyramid<T>(floatingMaskImage,
-                                     floatingMaskPyramid,
-                                     this->levelNumber,
-                                     this->levelToPerform,
-                                     activeVoxelNumberBw);
+            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform);
         } else {
             for (unsigned int l = 0; l < this->levelToPerform; ++l) {
-                activeVoxelNumberBw[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
-                floatingMaskPyramid[l] = (int*)calloc(activeVoxelNumberBw[l], sizeof(int));
+                const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+                floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int));
             }
         }
     } else {  // no pyramid
         if (floatingMaskImage)
-            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1, activeVoxelNumberBw);
+            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1);
         else {
-            activeVoxelNumberBw[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
-            floatingMaskPyramid[0] = (int*)calloc(activeVoxelNumberBw[0], sizeof(int));
+            const size_t voxelNumberBw = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
+            floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int));
         }
     }
 
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index f851c2d1..73124c04 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -21,7 +21,6 @@ class reg_f3d2: public reg_f3d<T> {
     nifti_image *floatingMaskImage;
     int **floatingMaskPyramid;
     nifti_image *controlPointGridBw;
-    int *activeVoxelNumberBw;
     mat44 *affineTransformationBw;
     T inverseConsistencyWeight;
     bool bchUpdate;
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 865d17a1..c6c33ff7 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -770,7 +770,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                      tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX );
                      tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY );
                   }
-                  //the values stored in SSE variables are transfered to normal float
+                  //the values stored in SSE variables are transferred to normal float
                   val.m = tempX;
                   xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
                   val.m = tempY;
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index eb4d247b..0dc1199f 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1776,45 +1776,37 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
 }
 /* *************************************************************** */
 template <class DTYPE>
-void reg_tools_binaryImage2int1(const nifti_image *image, int *array, int& activeVoxelNumber) {
-    // Active voxel are different from -1
-    activeVoxelNumber = 0;
+void reg_tools_binaryImage2int1(const nifti_image *image, int *array) {
     const DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
-    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
-        if (*dataPtr++ != 0) {
-            array[i] = 1;
-            activeVoxelNumber++;
-        } else {
-            array[i] = -1;
-        }
-    }
+    for (size_t i = 0; i < image->nx * image->ny * image->nz; i++)
+        array[i] = dataPtr[i] != 0 ? 1 : -1;
 }
 /* *************************************************************** */
-void reg_tools_binaryImage2int(const nifti_image *image, int *array, int& activeVoxelNumber) {
+void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_binaryImage2int1<unsigned char>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<unsigned char>(image, array);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_binaryImage2int1<char>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<char>(image, array);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_binaryImage2int1<unsigned short>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<unsigned short>(image, array);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_binaryImage2int1<short>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<short>(image, array);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binaryImage2int1<unsigned int>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<unsigned int>(image, array);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_binaryImage2int1<int>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<int>(image, array);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_binaryImage2int1<float>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<float>(image, array);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_binaryImage2int1<double>(image, array, activeVoxelNumber);
+        reg_tools_binaryImage2int1<double>(image, array);
         break;
     default:
         reg_print_fct_error("reg_tools_binaryImage2int");
@@ -1952,7 +1944,7 @@ template int reg_createImagePyramid<float>(const nifti_image*, nifti_image**, un
 template int reg_createImagePyramid<double>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class DTYPE>
-int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform, int *activeVoxelNumber) {
+int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *));
     tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage);
@@ -1971,16 +1963,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DTYPE>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
     }
-    activeVoxelNumber[levelToPerform - 1] = (tempMaskImagePyramid[levelToPerform - 1]->nx *
-                                             tempMaskImagePyramid[levelToPerform - 1]->ny *
-                                             tempMaskImagePyramid[levelToPerform - 1]->nz);
-    maskPyramid[levelToPerform - 1] = (int*)malloc(activeVoxelNumber[levelToPerform - 1] * sizeof(int));
-    reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1],
-                              maskPyramid[levelToPerform - 1],
-                              activeVoxelNumber[levelToPerform - 1]);
+    size_t voxelNumber = (tempMaskImagePyramid[levelToPerform - 1]->nx *
+                          tempMaskImagePyramid[levelToPerform - 1]->ny *
+                          tempMaskImagePyramid[levelToPerform - 1]->nz);
+    maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int));
+    reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]);
 
     // Images for each subsequent levels are allocated and downsampled if appropriate
-    for (int l = levelToPerform - 2; l >= 0; l--) {
+    for (int l = (int)levelToPerform - 2; l >= 0; l--) {
         // Allocation of the reference image
         tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]);
         tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper);
@@ -1994,19 +1984,17 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DTYPE>(tempMaskImagePyramid[l], 0, downsampleAxis);
 
-        activeVoxelNumber[l] = tempMaskImagePyramid[l]->nx *
-            tempMaskImagePyramid[l]->ny *
-            tempMaskImagePyramid[l]->nz;
-        maskPyramid[l] = (int*)malloc(activeVoxelNumber[l] * sizeof(int));
-        reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l], activeVoxelNumber[l]);
+        voxelNumber = tempMaskImagePyramid[l]->nx * tempMaskImagePyramid[l]->ny * tempMaskImagePyramid[l]->nz;
+        maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int));
+        reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]);
     }
     for (unsigned int l = 0; l < levelToPerform; ++l)
         nifti_image_free(tempMaskImagePyramid[l]);
     free(tempMaskImagePyramid);
     return EXIT_SUCCESS;
 }
-template int reg_createMaskPyramid<float>(const nifti_image*, int**, unsigned int, unsigned int, int*);
-template int reg_createMaskPyramid<double>(const nifti_image*, int**, unsigned int, unsigned int, int*);
+template int reg_createMaskPyramid<float>(const nifti_image*, int**, unsigned int, unsigned int);
+template int reg_createMaskPyramid<double>(const nifti_image*, int**, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class TYPE1, class TYPE2>
 int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index b1d40511..000ebe76 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -242,14 +242,10 @@ void reg_tools_binarise_image(nifti_image *img,
  * @param img Input image
  * @param array The data array from the input nifti image
  * is binarised and stored in this array.
- * @param activeVoxelNumber This reference is updated
- * with the number of voxel that are included into the
- * mask
  */
 extern "C++"
 void reg_tools_binaryImage2int(const nifti_image *img,
-                               int *array,
-                               int& activeVoxelNumber);
+                               int *array);
 /* *************************************************************** */
 /** @brief Compute the mean root mean squared error between
  * two vector images
@@ -334,15 +330,12 @@ int reg_createImagePyramid(const nifti_image *input,
  * 1 level corresponds to the original image resolution.
  * @param levelToPerform Number to level that will be perform during
  * the registration.
- * @param activeVoxelNumber Array that contains the number of active
- * voxel for each level of the pyramid
  */
 extern "C++" template<class DTYPE>
 int reg_createMaskPyramid(const nifti_image *input,
                           int **pyramid,
                           unsigned int levelNumber,
-                          unsigned int levelToPerform,
-                          int *activeVoxelNumber);
+                          unsigned int levelToPerform);
 /* *************************************************************** */
 /** @brief this function will threshold an image to the values provided,
  * set the scl_slope and sct_inter of the image to 1 and 0
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 615c22e0..e1f5fee8 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -12,7 +12,7 @@ void CudaCompute::ResampleImage(int inter, float paddingValue) {
                           con.GetFloatingCuda(),
                           con.GetDeformationFieldCuda(),
                           con.GetReferenceMaskCuda(),
-                          con.Content::GetReference()->nvox,
+                          con.GetActiveVoxelNumber(),
                           paddingValue);
 }
 /* *************************************************************** */
@@ -91,7 +91,7 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) {
                                        con.GetControlPointGridCuda(),
                                        con.GetDeformationFieldCuda(),
                                        con.GetReferenceMaskCuda(),
-                                       con.F3dContent::GetReference()->nvox,
+                                       con.GetActiveVoxelNumber(),
                                        bspline);
 }
 /* *************************************************************** */
@@ -111,7 +111,7 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
                              con.GetFloatingCuda(),
                              con.GetDeformationFieldCuda(),
                              con.GetWarpedGradientCuda(),
-                             con.F3dContent::GetReference()->nvox,
+                             con.GetActiveVoxelNumber(),
                              paddingValue);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 83b2fc6c..83ba5bc3 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -95,15 +95,18 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
     if (!referenceMask) return;
 
     int *targetMask;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask)));
     int *targetMaskPtr = targetMask;
-    for (int i = 0; i < reference->nvox; i++) {
-        if (referenceMask[i] != -1)
+    activeVoxelNumber = 0;
+    for (size_t i = 0; i < reference->nvox; i++) {
+        if (referenceMask[i] != -1) {
             *targetMaskPtr++ = i;
+            activeVoxelNumber++;
+        }
     }
 
-    cudaCommon_allocateArrayToDevice(&referenceMaskCuda, reference->nvox);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, reference->nvox * sizeof(int),  cudaMemcpyHostToDevice));
+    cudaCommon_allocateArrayToDevice(&referenceMaskCuda, activeVoxelNumber);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice));
     NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index ca57c782..549290d5 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -35,7 +35,7 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *
     measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
                                  cudaCon.Content::GetFloating(),
                                  cudaCon.Content::GetReferenceMask(),
-                                 cudaCon.Content::GetReference()->nvox,
+                                 cudaCon.GetActiveVoxelNumber(),
                                  cudaCon.Content::GetWarped(),
                                  cudaCon.F3dContent::GetWarpedGradient(),
                                  cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),

From 4a318404783a05dd26cae6985022baba0725c645 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 8 Feb 2023 18:13:40 +0000
Subject: [PATCH 044/314] Add CalcVoxelNumber() function to calculate voxel
 numbers

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_average.cpp                      |   8 +-
 reg-apps/reg_jacobian.cpp                     |  10 +-
 reg-apps/reg_measure.cpp                      |  10 +-
 reg-apps/reg_ppcnr.cpp                        |  12 +-
 reg-apps/reg_resample.cpp                     |  24 +---
 reg-apps/reg_tools.cpp                        |  25 ++--
 reg-apps/reg_transform.cpp                    |  27 +---
 reg-io/_reg_ReadWriteImage.cpp                |   3 +-
 reg-io/nrrd/reg_nrrd.cpp                      |   2 +-
 reg-lib/Content.cpp                           |   6 +-
 reg-lib/F3dContent.cpp                        |   3 +-
 reg-lib/_reg_aladin.cpp                       |   5 +-
 reg-lib/_reg_aladin_sym.cpp                   |  32 +----
 reg-lib/_reg_base.cpp                         |   4 +-
 reg-lib/_reg_f3d2.cpp                         |   4 +-
 reg-lib/cl/ClAladinContent.cpp                |   3 +-
 reg-lib/cl/ClResampleImageKernel.cpp          |   4 +-
 reg-lib/cpu/_reg_discrete_init.cpp            |   3 +-
 reg-lib/cpu/_reg_dti.cpp                      |  14 +-
 reg-lib/cpu/_reg_femTrans.cpp                 |  22 ++--
 reg-lib/cpu/_reg_globalTrans.cpp              |   9 +-
 reg-lib/cpu/_reg_kld.cpp                      |   8 +-
 reg-lib/cpu/_reg_lncc.cpp                     |  18 ++-
 reg-lib/cpu/_reg_localTrans.cpp               |  92 ++++++-------
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  64 ++++-----
 reg-lib/cpu/_reg_localTrans_regul.cpp         |  52 ++++----
 reg-lib/cpu/_reg_mind.cpp                     |  48 +++----
 reg-lib/cpu/_reg_mrf.cpp                      |  16 +--
 reg-lib/cpu/_reg_nmi.cpp                      |   8 +-
 reg-lib/cpu/_reg_resampling.cpp               | 123 +++++++++---------
 reg-lib/cpu/_reg_ssd.cpp                      |  16 +--
 reg-lib/cpu/_reg_ssd.h                        |   2 +-
 reg-lib/cpu/_reg_thinPlateSpline.cpp          |   2 +-
 reg-lib/cpu/_reg_tools.cpp                    | 110 +++++++++-------
 reg-lib/cpu/_reg_tools.h                      |   7 +
 reg-lib/cuda/CudaF3dContent.cpp               |   8 +-
 reg-lib/cuda/_reg_common_cuda.cu              | 102 ++++++++-------
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  42 +++---
 reg-lib/cuda/_reg_nmi_gpu.cu                  |   2 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |   2 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |   8 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |  12 +-
 reg-lib/cuda/affineDeformationKernel.cu       |   2 +-
 reg-lib/cuda/resampleKernel.cu                |   4 +-
 .../reg_test_affine_deformation_field.cpp     |   7 +-
 reg-test/reg_test_computation_time.cpp        |   3 +-
 reg-test/reg_test_imageGradient.cpp           |   9 +-
 reg-test/reg_test_interpolation.cpp           |   6 +-
 49 files changed, 451 insertions(+), 554 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 29e49a01..4c5c8078 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-157
+158
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 68ef8c11..79801fa2 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -370,9 +370,7 @@ int compute_average_image(nifti_image *averageImage,
       demeanField->ndim=demeanField->dim[0]=5;
       demeanField->nt=demeanField->dim[4]=1;
       demeanField->nu=demeanField->dim[5]=demeanField->nz>1?3:2;
-      demeanField->nvox=(size_t)demeanField->nx *
-            demeanField->ny * demeanField->nz *
-            demeanField->nt * demeanField->nu;
+      demeanField->nvox=CalcVoxelNumber(*demeanField, demeanField->ndim);
       demeanField->nbyper=sizeof(float);
       demeanField->datatype=NIFTI_TYPE_FLOAT32;
       demeanField->intent_code=NIFTI_INTENT_VECTOR;
@@ -400,9 +398,7 @@ int compute_average_image(nifti_image *averageImage,
       deformationField->ndim=deformationField->dim[0]=5;
       deformationField->nt=deformationField->dim[4]=1;
       deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2;
-      deformationField->nvox=(size_t)deformationField->nx *
-            deformationField->ny * deformationField->nz *
-            deformationField->nt * deformationField->nu;
+      deformationField->nvox=CalcVoxelNumber(*deformationField, deformationField->ndim);
       deformationField->nbyper=sizeof(float);
       deformationField->datatype=NIFTI_TYPE_FLOAT32;
       deformationField->intent_code=NIFTI_INTENT_VECTOR;
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index e5adc0d5..23033742 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -52,7 +52,7 @@ void reg_jacobian_computeLog(nifti_image *image)
 template <class DTYPE>
 void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image)
 {
-   size_t voxelNumber=image->nx*image->ny*image->nz;
+   const size_t voxelNumber=CalcVoxelNumber(*image);
    DTYPE *ptrXX=static_cast<DTYPE *>(image->data);
    if(image->nz>1)
    {
@@ -285,8 +285,7 @@ int main(int argc, char **argv)
       jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2;
       jacobianImage->nu=jacobianImage->dim[5]=1;
       jacobianImage->nt=jacobianImage->dim[4]=1;
-      jacobianImage->nvox=(size_t)jacobianImage->nx *jacobianImage->ny*
-            jacobianImage->nz*jacobianImage->nt*jacobianImage->nu;
+      jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim);
       jacobianImage->datatype = inputTransformation->datatype;
       jacobianImage->nbyper = inputTransformation->nbyper;
       jacobianImage->cal_min=0;
@@ -340,8 +339,7 @@ int main(int argc, char **argv)
       jacobianImage->ndim=jacobianImage->dim[0]=5;
       jacobianImage->nu=jacobianImage->dim[5]=jacobianImage->nz>1?9:4;
       jacobianImage->nt=jacobianImage->dim[4]=1;
-      jacobianImage->nvox=(size_t)jacobianImage->nx *jacobianImage->ny*
-            jacobianImage->nz*jacobianImage->nt*jacobianImage->nu;
+      jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim);
       jacobianImage->datatype = inputTransformation->datatype;
       jacobianImage->nbyper = inputTransformation->nbyper;
       jacobianImage->cal_min=0;
@@ -350,7 +348,7 @@ int main(int argc, char **argv)
       jacobianImage->scl_inter = 0.0f;
       jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper);
 
-      mat33 *jacobianMatriceArray=(mat33 *)malloc(jacobianImage->nx*jacobianImage->ny*jacobianImage->nz*sizeof(mat33));
+      mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33));
       // Compute the map of Jacobian matrices
       switch((int)inputTransformation->intent_p1){
       case DISP_FIELD:
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index 62442cc4..ab22e717 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -236,7 +236,7 @@ int main(int argc, char **argv)
 
    /* Read and create the mask array */
    int *refMask=nullptr;
-   int refMaskVoxNumber=refImage->nx*refImage->ny*refImage->nz;
+   size_t refMaskVoxNumber = CalcVoxelNumber(*refImage);
    if(flag->refMaskImageFlag){
       nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName);
       if(refMaskImage == nullptr)
@@ -249,7 +249,7 @@ int main(int argc, char **argv)
    }
    else{
       refMask = (int *)calloc(refMaskVoxNumber,sizeof(int));
-      for(int i=0;i<refMaskVoxNumber;++i) refMask[i]=i;
+      for(size_t i=0;i<refMaskVoxNumber;++i) refMask[i]=i;
    }
 
    /* Create the warped floating image */
@@ -257,8 +257,7 @@ int main(int argc, char **argv)
    warpedFloImage->ndim=warpedFloImage->dim[0]=floImage->ndim;
    warpedFloImage->nt=warpedFloImage->dim[4]=floImage->nt;
    warpedFloImage->nu=warpedFloImage->dim[5]=floImage->nu;
-   warpedFloImage->nvox=(size_t)warpedFloImage->nx * warpedFloImage->ny *
-         warpedFloImage->nz * warpedFloImage->nt * warpedFloImage->nu;
+   warpedFloImage->nvox=CalcVoxelNumber(*warpedFloImage, warpedFloImage->ndim);
    warpedFloImage->cal_min=floImage->cal_min;
    warpedFloImage->cal_max=floImage->cal_max;
    warpedFloImage->scl_inter=floImage->scl_inter;
@@ -272,8 +271,7 @@ int main(int argc, char **argv)
    defField->ndim=defField->dim[0]=5;
    defField->nt=defField->dim[4]=1;
    defField->nu=defField->dim[5]=refImage->nz>1?3:2;
-   defField->nvox=(size_t)defField->nx * defField->ny *
-         defField->nz * defField->nt * defField->nu;
+   defField->nvox=CalcVoxelNumber(*defField, defField->ndim);
    defField->datatype=NIFTI_TYPE_FLOAT32;
    defField->nbyper=sizeof(float);
    defField->data=(void *)calloc(defField->nvox,defField->nbyper);
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 08629c4b..c691266b 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -191,7 +191,7 @@ int main(int argc, char **argv)
          nifti_image_free(source);
          makesource->ndim=makesource->dim[0] = 4;
          makesource->nt = makesource->dim[4] = atoi(argv[++i]);
-         makesource->nvox=makesource->nx*makesource->nz*makesource->ny*makesource->nt;
+         makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim);
          makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(makesource->data);
          for(int ii=0; ii<makesource->nt; ii++) // fill with file data
@@ -214,8 +214,8 @@ int main(int argc, char **argv)
          nifti_image *makesource = nifti_copy_nim_info(source);
          makesource->ndim=makesource->dim[0] = 3;
          makesource->nt = makesource->dim[4] = 1;
-         makesource->nvox=makesource->nx*makesource->ny*makesource->nz;
-         makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper);
+         makesource->nvox = CalcVoxelNumber(*makesource, makesource->ndim);
+         makesource->data = malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(source->data);
          for(int ii=0; ii<source->nt; ii++) // fill with file data
          {
@@ -404,8 +404,8 @@ int main(int argc, char **argv)
       mask = nifti_copy_nim_info(image);
       mask->ndim=mask->dim[0]=3;
       mask->nt=mask->dim[4]=1;
-      mask->nvox=mask->nx*mask->ny*mask->nz;
-      mask->data = (void *)malloc(mask->nvox*mask->nbyper);
+      mask->nvox = CalcVoxelNumber(*mask, mask->ndim);
+      mask->data = malloc(mask->nvox*mask->nbyper);
       PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
       for(size_t i=0; i<mask->nvox; i++) intensityPtrM[i]=1.0;
    }
@@ -863,7 +863,7 @@ int main(int argc, char **argv)
             nifti_image *stores = nifti_copy_nim_info(images);
             stores->ndim=stores->dim[0]=3;
             stores->nt=stores->dim[4]=1;
-            stores->nvox=stores->nx*stores->ny*stores->nz;
+            stores->nvox = CalcVoxelNumber(*stores, stores->ndim);
             stores->data = (void *)calloc(stores->nvox,images->nbyper);
 
             nifti_image *storet = nifti_copy_nim_info(stores);
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index ac6b3840..18ad4863 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -331,9 +331,7 @@ int main(int argc, char **argv)
    deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2;
    deformationFieldImage->dim[6]=deformationFieldImage->nv=1;
    deformationFieldImage->dim[7]=deformationFieldImage->nw=1;
-   deformationFieldImage->nvox =(size_t)deformationFieldImage->nx*
-         deformationFieldImage->ny*deformationFieldImage->nz*
-         deformationFieldImage->nt*deformationFieldImage->nu;
+   deformationFieldImage->nvox = CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim);
    deformationFieldImage->scl_slope=1.f;
    deformationFieldImage->scl_inter=0.f;
    if(inputTransformationImage!=nullptr)
@@ -461,12 +459,8 @@ int main(int argc, char **argv)
          reg_print_msg_debug("DTI-based resampling\n");
 #endif
          // Compute first the Jacobian matrices
-         mat33 *jacobian = (mat33 *)malloc(deformationFieldImage->nx *
-                                           deformationFieldImage->ny *
-                                           deformationFieldImage->nz *
-                                           sizeof(mat33));
-         reg_defField_getJacobianMatrix(deformationFieldImage,
-                                        jacobian);
+         mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33));
+         reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
          // resample the DTI image
          bool timepoints[7];
          for(int i=0; i<7; ++i) timepoints[i]=true;
@@ -484,13 +478,8 @@ int main(int argc, char **argv)
       else{
          if(flag->usePSF){
             // Compute first the Jacobian matrices
-            mat33 *jacobian = (mat33 *)malloc(deformationFieldImage->nx *
-                                              deformationFieldImage->ny *
-                                              deformationFieldImage->nz *
-                                              sizeof(mat33));
-            reg_defField_getJacobianMatrix(deformationFieldImage,
-                                           jacobian);
-
+            mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33));
+            reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
 
             reg_resampleImage_PSF(floatingImage,
                                   warpedImage,
@@ -544,8 +533,7 @@ int main(int argc, char **argv)
       gridImage->dim[3]=gridImage->nz=floatingImage->nz;
       gridImage->dim[4]=gridImage->nt=1;
       gridImage->dim[5]=gridImage->nu=1;
-      gridImage->nvox=(size_t)gridImage->nx*
-            gridImage->ny*gridImage->nz;
+      gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim);
       gridImage->datatype = NIFTI_TYPE_UINT8;
       gridImage->nbyper = sizeof(unsigned char);
       gridImage->data = (void *)calloc(gridImage->nvox, gridImage->nbyper);
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 105afac5..fda62a49 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -891,19 +891,16 @@ int main(int argc, char **argv)
         def->pixdim[6]=def->dv=1.f;
         def->dim[7]=def->nw=1;
         def->pixdim[7]=def->dw=1.f;
-        def->nvox = (size_t)def->nx * def->ny *
-                    def->nz * def->nt * def->nu;
+        def->nvox = CalcVoxelNumber(*def, def->ndim);
         def->nbyper = sizeof(float);
         def->datatype = NIFTI_TYPE_FLOAT32;
         def->data = (void *)calloc(def->nvox,def->nbyper);
         // Fill the deformation field with an identity transformation
         reg_getDeformationFromDisplacement(def);
         // Allocate and compute the Jacobian matrices
-        mat33 *jacobian = (mat33 *)malloc(def->nx *
-                                          def->ny *
-                                          def->nz *
-                                          sizeof(mat33));
-        for(size_t i=0;i<(size_t)def->nx*def->ny*def->nz;++i)
+        const size_t jacobianVoxelNumber = CalcVoxelNumber(*def);
+        mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33));
+        for (size_t i = 0; i < jacobianVoxelNumber; ++i)
             reg_mat33_eye(&jacobian[i]);
         // resample the original image into the space of the new image
         if(flag->interpFlag == 0){
@@ -962,8 +959,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox=(size_t)outputImage->nx*
-                          outputImage->ny*outputImage->nz;
+        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
         outputImage->datatype = NIFTI_TYPE_RGB24;
         outputImage->nbyper = 3 * sizeof(unsigned char);
         outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox);
@@ -1001,8 +997,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox=(size_t)outputImage->nx*
-                          outputImage->ny*outputImage->nz;
+        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
         outputImage->datatype = NIFTI_TYPE_RGB24;
         outputImage->nbyper = 3 * sizeof(unsigned char);
         outputImage->scl_slope = 1.f;
@@ -1093,8 +1088,9 @@ int main(int argc, char **argv)
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
             reg_tools_changeDatatype<float>(image);
         // Create a temporary mask
-        int *temp_mask = (int *)malloc(image->nx*image->ny*image->nz*sizeof(int));
-        for(size_t i=0; i<(size_t)image->nx*image->ny*image->nz; ++i)
+        const size_t voxelNumber = CalcVoxelNumber(*image);
+        int *temp_mask = (int *)malloc(voxelNumber * sizeof(int));
+        for (size_t i = 0; i < voxelNumber; ++i)
             temp_mask[i]=i;
         // Initialise the block matching
         _reg_blockMatchingParam bm_param;
@@ -1110,8 +1106,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox=(size_t)outputImage->nx*
-                          outputImage->ny*outputImage->nz;
+        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
         outputImage->cal_min=0;
         outputImage->data = (void *)calloc(outputImage->nbyper, outputImage->nvox);
         float *inPtr = static_cast<float *>(image->data);
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 0388e0cc..4c760e3b 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -388,9 +388,7 @@ int main(int argc, char **argv)
          outputTransformationImage->ndim=outputTransformationImage->dim[0]=5;
          outputTransformationImage->nt=outputTransformationImage->dim[4]=1;
          outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2;
-         outputTransformationImage->nvox=(size_t)outputTransformationImage->nx *
-                                         outputTransformationImage->ny * outputTransformationImage->nz *
-                                         outputTransformationImage->nt * outputTransformationImage->nu;
+         outputTransformationImage->nvox=CalcVoxelNumber(*outputTransformationImage, outputTransformationImage->ndim);
          outputTransformationImage->nbyper=sizeof(float);
          outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32;
          outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR;
@@ -687,9 +685,7 @@ int main(int argc, char **argv)
             output1TransImage->ndim=output1TransImage->dim[0]=5;
             output1TransImage->nt=output1TransImage->dim[4]=1;
             output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2;
-            output1TransImage->nvox=(size_t)output1TransImage->nx *
-                                    output1TransImage->ny * output1TransImage->nz *
-                                    output1TransImage->nt * output1TransImage->nu;
+            output1TransImage->nvox=CalcVoxelNumber(*output1TransImage, output1TransImage->ndim);
             output1TransImage->scl_slope=1.f;
             output1TransImage->scl_inter=0.f;
             if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32)
@@ -831,9 +827,7 @@ int main(int argc, char **argv)
                output2TransImage->ndim=output2TransImage->dim[0]=5;
                output2TransImage->nt=output2TransImage->dim[4]=1;
                output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2;
-               output2TransImage->nvox=(size_t)output2TransImage->nx *
-                                       output2TransImage->ny * output2TransImage->nz *
-                                       output2TransImage->nt * output2TransImage->nu;
+               output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim);
                output2TransImage->nbyper=output1TransImage->nbyper;
                output2TransImage->datatype=output1TransImage->datatype;
                output2TransImage->data=(void *)calloc
@@ -970,9 +964,7 @@ int main(int argc, char **argv)
          deformationFieldImage->ndim=deformationFieldImage->dim[0]=5;
          deformationFieldImage->nt=deformationFieldImage->dim[4]=1;
          deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2;
-         deformationFieldImage->nvox=(size_t)deformationFieldImage->nx *
-               deformationFieldImage->ny * deformationFieldImage->nz *
-               deformationFieldImage->nt * deformationFieldImage->nu;
+         deformationFieldImage->nvox=CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim);
          deformationFieldImage->nbyper=sizeof(float);
          deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32;
          deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR;
@@ -1102,9 +1094,7 @@ int main(int argc, char **argv)
       landmarkImage->nx=landmarkImage->dim[1]=1;
       landmarkImage->ny=landmarkImage->dim[2]=1;
       landmarkImage->nz=landmarkImage->dim[3]=1;
-      landmarkImage->nvox=(size_t)landmarkImage->nx *
-            landmarkImage->ny * landmarkImage->nz *
-            landmarkImage->nt * landmarkImage->nu;
+      landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim);
       landmarkImage->data=(void *)malloc(landmarkImage->nvox*landmarkImage->nbyper);
       float *landmarkImagePtr = static_cast<float *>(landmarkImage->data);
       for(size_t l=0, index=0;l<landmarkNumber;++l){
@@ -1293,8 +1283,7 @@ int main(int argc, char **argv)
          tempField->ndim=tempField->dim[0]=5;
          tempField->nt=tempField->dim[4]=1;
          tempField->nu=tempField->dim[5]=tempField->nz>1?3:2;
-         tempField->nvox=(size_t)tempField->nx * tempField->ny * tempField->nz *
-                         tempField->nt * tempField->nu;
+         tempField->nvox=CalcVoxelNumber(*tempField, tempField->ndim);
          tempField->nbyper=inputTransImage->nbyper;
          tempField->datatype=inputTransImage->datatype;
          tempField->intent_code=NIFTI_INTENT_VECTOR;
@@ -1331,9 +1320,7 @@ int main(int argc, char **argv)
      outputTransImage->ndim = outputTransImage->dim[0] = 5;
      outputTransImage->nt = outputTransImage->dim[4] = 1;
      outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2;
-     outputTransImage->nvox = (size_t)outputTransImage->nx *
-        outputTransImage->ny * outputTransImage->nz *
-        outputTransImage->nt * outputTransImage->nu;
+     outputTransImage->nvox = CalcVoxelNumber(*outputTransImage, outputTransImage->ndim);
      outputTransImage->nbyper = inputTransImage->nbyper;
      outputTransImage->datatype = inputTransImage->datatype;
      outputTransImage->intent_code = NIFTI_INTENT_VECTOR;
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index d21b0304..d39c290c 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -181,7 +181,6 @@ template <class DTYPE>
 void reg_io_diplayImageData1(nifti_image *image)
 {
     reg_print_msg_debug("image values:");
-    size_t voxelNumber = (size_t)image->nx * image->ny * image->nz;
     DTYPE *data = static_cast<DTYPE *>(image->data);
     std::string text;
 
@@ -195,7 +194,7 @@ void reg_io_diplayImageData1(nifti_image *image)
              text = stringFormat("[%d - %d - %d] = [", x, y, z);
              for(int tu=0;tu<image->nt*image->nu; ++tu){
                 text = stringFormat("%s%g ", text.c_str(),
-                    static_cast<double>(data[voxelIndex + tu*voxelNumber]));
+                    static_cast<double>(data[voxelIndex + tu*CalcVoxelNumber(*image)]));
              }
              text = stringFormat("%s]", text.c_str());
              reg_print_msg_debug(text.c_str());
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index b32a1124..94e37acf 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -17,7 +17,7 @@ template <class DTYPE>
 void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage,
       Nrrd *nrrdImage)
 {
-   size_t voxNumber = niiImage->nx*niiImage->ny*niiImage->nz;
+   const size_t voxNumber = CalcVoxelNumber(*niiImage);
 
    DTYPE *inPtrX=static_cast<DTYPE *>(niiImage->data);
    DTYPE *inPtrY=&inPtrX[voxNumber];
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 04ab478e..e772f87e 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -1,4 +1,5 @@
 #include "Content.h"
+#include "_reg_tools.h"
 
 /* *************************************************************** */
 Content::Content(nifti_image *referenceIn,
@@ -33,7 +34,7 @@ void Content::AllocateWarped() {
     warped->dim[0] = warped->ndim = floating->ndim;
     warped->dim[4] = warped->nt = floating->nt;
     warped->pixdim[4] = warped->dt = 1;
-    warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt);
+    warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
     warped->data = calloc(warped->nvox, warped->nbyper);
@@ -62,8 +63,7 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->pixdim[6] = deformationField->dv = 1;
     deformationField->dim[7] = deformationField->nw = 1;
     deformationField->pixdim[7] = deformationField->dw = 1;
-    deformationField->nvox = size_t(deformationField->nx * deformationField->ny * deformationField->nz *
-                                    deformationField->nt * deformationField->nu);
+    deformationField->nvox = CalcVoxelNumber(*deformationField, deformationField->ndim);
     deformationField->nbyper = (int)bytes;
     if (bytes == 4)
         deformationField->datatype = NIFTI_TYPE_FLOAT32;
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 27a767da..29b9fc7e 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -36,8 +36,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
     localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
     localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
     localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
-    localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz *
-                                  localWeightSim->nt * localWeightSim->nu);
+    localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim);
     localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
     F3dContent::ZeroVoxelBasedMeasureGradient();
     reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index a194064b..7001bb61 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -248,7 +248,7 @@ void reg_aladin<T>::InitialiseRegistration() {
                                  this->levelsToPerform);
     else {
         for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            const size_t voxelNumber = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz;
+            const size_t voxelNumber = CalcVoxelNumber(*this->referencePyramid[l]);
             this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int));
         }
     }
@@ -565,8 +565,7 @@ nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
         reg_exit();
     }
 
-    int *mask = (int *)calloc(this->inputReference->nx * this->inputReference->ny * this->inputReference->nz,
-                              sizeof(int));
+    int *mask = (int *)calloc(CalcVoxelNumber(*this->inputReference), sizeof(int));
 
     reg_aladin<T>::InitAladinContent(this->inputReference,
                                      this->inputFloating,
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index bab14aaa..0aa51218 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -84,7 +84,7 @@ void reg_aladin_sym<T>::InitialiseRegistration()
    {
       for(unsigned int l=0; l<this->levelsToPerform; ++l)
       {
-         const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+         const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]);
          this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int));
       }
    }
@@ -96,19 +96,10 @@ void reg_aladin_sym<T>::InitialiseRegistration()
       {
          T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
          int *mskPtr = this->FloatingMaskPyramid[l];
-         size_t removedVoxel=0;
-         for(size_t i=0;
-               i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
-               ++i)
+         for(size_t i=0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i)
          {
-            if(mskPtr[i]>-1)
-            {
-               if(refPtr[i]>this->floatingUpperThreshold)
-               {
-                  ++removedVoxel;
-                  mskPtr[i]=-1;
-               }
-            }
+            if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold)
+               mskPtr[i] = -1;
          }
       }
    }
@@ -118,19 +109,10 @@ void reg_aladin_sym<T>::InitialiseRegistration()
       {
          T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
          int *mskPtr = this->FloatingMaskPyramid[l];
-         size_t removedVoxel=0;
-         for(size_t i=0;
-               i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz;
-               ++i)
+         for (size_t i = 0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i)
          {
-            if(mskPtr[i]>-1)
-            {
-               if(refPtr[i]<this->floatingLowerThreshold)
-               {
-                  ++removedVoxel;
-                  mskPtr[i]=-1;
-               }
-            }
+            if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold)
+               mskPtr[i] = -1;
          }
       }
    }
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index e6e452fd..dd73a129 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -665,7 +665,7 @@ void reg_base<T>::Initialise() {
             reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform);
         else {
             for (unsigned int l = 0; l < levelToPerform; ++l) {
-                const size_t voxelNumber = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz;
+                const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[l]);
                 maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int));
             }
         }
@@ -675,7 +675,7 @@ void reg_base<T>::Initialise() {
         if (maskImage)
             reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1);
         else {
-            const size_t voxelNumber = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz;
+            const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[0]);
             maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int));
         }
     }
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index d791707e..7b7a625b 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -792,7 +792,7 @@ void reg_f3d2<T>::Initialise() {
             reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform);
         } else {
             for (unsigned int l = 0; l < this->levelToPerform; ++l) {
-                const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz;
+                const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]);
                 floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int));
             }
         }
@@ -800,7 +800,7 @@ void reg_f3d2<T>::Initialise() {
         if (floatingMaskImage)
             reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1);
         else {
-            const size_t voxelNumberBw = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz;
+            const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[0]);
             floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int));
         }
     }
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 171ffcf6..8836c5dc 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -105,8 +105,7 @@ void ClAladinContent::AllocateClPtrs() {
     }
     if (referenceMask != nullptr && reference != nullptr) {
         maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                   reference->nx * reference->ny * reference->nz * sizeof(int),
-                                   referenceMask, &errNum);
+                                   CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum);
         sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index d21a4782..82da961e 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -81,7 +81,7 @@ void ClResampleImageKernel::Calculate(int interp,
     }
     sContext->checkErrNum(errNum, "Error setting kernel ResampleImage.");
 
-    long targetVoxelNumber = (long)this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz;
+    const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage);
     const unsigned int maxThreads = sContext->GetMaxThreads();
     const unsigned int maxBlocks = sContext->GetMaxBlocks();
 
@@ -95,7 +95,7 @@ void ClResampleImageKernel::Calculate(int interp,
     //    int numMats = 0; //needs to be a parameter
     //    float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float));
 
-    cl_long2 voxelNumber = {{(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long)this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz}};
+    cl_long2 voxelNumber = {{(cl_long)CalcVoxelNumber(*warpedImage), (cl_long)CalcVoxelNumber(*this->floatingImage)}};
     cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}};
     cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}};
 
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 9072556d..47d3c365 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -26,8 +26,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure,
    this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
    this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = (size_t)this->controlPointImage->nx *
-         this->controlPointImage->ny * this->controlPointImage->nz;
+   this->node_number = CalcVoxelNumber(*this->controlPointImage);
 
    this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
    this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index b70656a0..9b2a19fa 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -90,12 +90,10 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
 {
 #ifdef _WIN32
    long voxel;
-   long voxelNumber = (long)referenceImage->nx*
-                        referenceImage->ny*referenceImage->nz;
+   const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
    size_t voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*
-                        referenceImage->ny*referenceImage->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
@@ -238,11 +236,11 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
 {
    // Create pointers to the reference and warped images
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    long voxel;
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
-   size_t voxel;
-   size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+    size_t voxel;
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index a6367ed6..63a9839c 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -11,6 +11,7 @@
  */
 
 #include "_reg_femTrans.h"
+#include "_reg_tools.h"
 
 float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4)
 {
@@ -38,7 +39,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
                                      )
 {
    // Set all the closest nodes and coefficients to zero
-   for(int i=0; i<4*deformationFieldImage->nx*deformationFieldImage->ny*deformationFieldImage->nz; ++i)
+   for (int i = 0; i < 4 * CalcVoxelNumber(*deformationFieldImage); ++i)
    {
       closestNodes[i]=0;
       femInterpolationWeight[i]=0.f;
@@ -148,14 +149,13 @@ void reg_fem_getDeformationField(float *nodePositions,
                                 )
 {
 #ifdef _WIN32
-   long voxel;
-   long voxelNumber=(long)deformationFieldImage->nx*
-      deformationFieldImage->ny*deformationFieldImage->nz;
+    long voxel;
+    const long voxelNumber = (long)CalcVoxelNumber(*deformationFieldImage);
 #else
-   size_t voxel;
-   size_t voxelNumber=(size_t)deformationFieldImage->nx*
-      deformationFieldImage->ny*deformationFieldImage->nz;
+    size_t voxel;
+    const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage);
 #endif
+
    float *defPtrX = static_cast<float *>(deformationFieldImage->data);
    float *defPtrY = &defPtrX[voxelNumber];
    float *defPtrZ = &defPtrY[voxelNumber];
@@ -215,9 +215,7 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
                                  unsigned int nodeNumber,
                                  float *femBasedGradient)
 {
-   unsigned int voxelNumber = voxelBasedGradient->nx *
-                              voxelBasedGradient->ny *
-                              voxelBasedGradient->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*voxelBasedGradient);
    float *voxGradPtrX = static_cast<float *>(voxelBasedGradient->data);
    float *voxGradPtrY = &voxGradPtrX[voxelNumber];
    float *voxGradPtrZ = &voxGradPtrY[voxelNumber];
@@ -225,10 +223,10 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
    for(unsigned int node=0; node<3*nodeNumber; ++node)
       femBasedGradient[node]=0.f;
 
-   unsigned int currentNodes[4], voxel;
+   unsigned int currentNodes[4];
    float currentGradient[3];
    float coefficients[4];
-   for(voxel=0; voxel<voxelNumber; ++voxel)
+   for(size_t voxel=0; voxel<voxelNumber; ++voxel)
    {
       currentNodes[0]=closestNodes[4*voxel];
       currentNodes[1]=closestNodes[4*voxel+1];
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 2e479761..1d85c61d 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -22,7 +22,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
                                    bool composition,
                                    int *mask)
 {
-   size_t voxelNumber=deformationFieldImage->nx*deformationFieldImage->ny;
+   const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage, 2);
    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationFieldImage->data);
    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber];
 
@@ -84,7 +84,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
                                    bool composition,
                                    int *mask)
 {
-   size_t voxelNumber=deformationFieldImage->nx*deformationFieldImage->ny*deformationFieldImage->nz;
+   const size_t voxelNumber=CalcVoxelNumber(*deformationFieldImage);
    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationFieldImage->data);
    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber];
    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[voxelNumber];
@@ -153,10 +153,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
    int *tempMask=mask;
    if(mask==nullptr)
    {
-      tempMask=(int *)calloc(deformationField->nx*
-                             deformationField->ny*
-                             deformationField->nz,
-                             sizeof(int));
+      tempMask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
    }
    if(deformationField->nz==1)
    {
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 26359d52..a9a469c0 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -84,10 +84,10 @@ double reg_getKLDivergence(nifti_image *referenceImage,
                            int *mask) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
     DTYPE *refPtr = static_cast<DTYPE*>(referenceImage->data);
@@ -216,10 +216,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                                            double timepoint_weight) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
     DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 8c80eb8c..4b91a93f 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -94,10 +94,10 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     // Generate the forward mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(refImage->nx * refImage->ny * refImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*refImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*refImage);
 #endif
     memcpy(combinedMask, refMask, voxelNumber * sizeof(int));
     reg_tools_removeNanFromMask(refImage, combinedMask);
@@ -206,9 +206,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
         free(this->backwardMask);
     this->backwardMask = nullptr;
 
-    size_t voxelNumber = size_t(this->referenceImagePointer->nx *
-                                this->referenceImagePointer->ny *
-                                this->referenceImagePointer->nz);
+    size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer);
 
     // Allocate the required image to store the correlation of the forward transformation
     this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer);
@@ -234,7 +232,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
     // Allocate the array to store the mask of the forward image
     this->forwardMask = (int*)malloc(voxelNumber * sizeof(int));
     if (this->isSymmetric) {
-        voxelNumber = size_t(floatingImagePointer->nx * floatingImagePointer->ny * floatingImagePointer->nz);
+        voxelNumber = CalcVoxelNumber(*floatingImagePointer);
 
         // Allocate the required image to store the correlation of the backward transformation
         this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer);
@@ -285,10 +283,10 @@ double reg_getLNCCValue(nifti_image *referenceImage,
                         int current_timepoint) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
     // Compute the local correlation
@@ -476,10 +474,10 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    double timepoint_weight) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
     // Compute the local correlation
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index c6c33ff7..94cbd6de 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -431,13 +431,15 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 {
    int coord;
 
+   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
+   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
 
+   const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
    DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz];
-   DTYPE *fieldPtrZ=&fieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz];
+   DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
+   DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
 
    int x, y, z, a, b, c, xPre, yPre, zPre, index;
    DTYPE xBasis[2], yBasis[2], zBasis[2], real[3];
@@ -657,10 +659,10 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
 
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny];
+   DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
 
    DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz];
+   DTYPE *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)];
 
    DTYPE gridVoxelSpacing[2];
    gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
@@ -983,13 +985,15 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    int coord;
 #endif // _USE_SSE
 
+   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
+   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
 
+   const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
    DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz];
-   DTYPE *fieldPtrZ=&fieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz];
+   DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
+   DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
 
    DTYPE basis, oldBasis=(DTYPE)(1.1);
 
@@ -1646,7 +1650,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
    {
       // Active voxel are all superior to -1, 0 thus will do !
       MrPropre=true;
-      mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz, sizeof(int));
+      mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
    }
 
    // Check if an affine initialisation is required
@@ -1749,8 +1753,8 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                                        mat44 *voxelToMillimeter
                                        )
 {
-   size_t nodeNumber = (size_t)nodeImage->nx*nodeImage->ny*nodeImage->nz;
-   size_t voxelNumber = (size_t)voxelImage->nx*voxelImage->ny*voxelImage->nz;
+   const size_t nodeNumber = CalcVoxelNumber(*nodeImage);
+   const size_t voxelNumber = CalcVoxelNumber(*voxelImage);
    DTYPE *nodePtrX = static_cast<DTYPE *>(nodeImage->data);
    DTYPE *nodePtrY = &nodePtrX[nodeNumber];
    DTYPE *nodePtrZ = nullptr;
@@ -1994,16 +1998,10 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
    }
    splineControlPoint->dim[3]=splineControlPoint->nz=1;
 
-   splineControlPoint->nvox =
-         (size_t)splineControlPoint->nx*
-         (size_t)splineControlPoint->ny*
-         (size_t)splineControlPoint->nz*
-         (size_t)splineControlPoint->nt*
-         (size_t)splineControlPoint->nu;
-
+   splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
    splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
    gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   SplineTYPE *gridPtrY = &gridPtrX[splineControlPoint->nx*splineControlPoint->ny];
+   SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
    SplineTYPE *oldGridPtrX = &oldGrid[0];
    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]];
 
@@ -2103,22 +2101,17 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
       splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3;
       splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3;
    }
-   splineControlPoint->nvox =
-         (size_t)splineControlPoint->nx*
-         (size_t)splineControlPoint->ny*
-         (size_t)splineControlPoint->nz*
-         (size_t)splineControlPoint->nt*
-         (size_t)splineControlPoint->nu;
+   splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
    splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
 
+   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
    gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   SplineTYPE *gridPtrY = &gridPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
-   SplineTYPE *gridPtrZ = &gridPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz];
+   SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber];
+   SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber];
    SplineTYPE *oldGridPtrX = &oldGrid[0];
    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]*oldDim[3]];
    SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1]*oldDim[2]*oldDim[3]];
 
-
    for(int z=0; z<oldDim[3]; z++)
    {
       int Z=2*z-1;
@@ -2554,13 +2547,13 @@ void reg_defField_compose2D(nifti_image *deformationField,
                             nifti_image *dfToUpdate,
                             int *mask)
 {
-   size_t DFVoxelNumber=(size_t)deformationField->nx*deformationField->ny;
+   const size_t DFVoxelNumber = CalcVoxelNumber(*deformationField, 2);
 #ifdef _WIN32
    long i;
-   long warVoxelNumber=(size_t)dfToUpdate->nx*dfToUpdate->ny;
+   const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate, 2);
 #else
    size_t i;
-   size_t warVoxelNumber=(size_t)dfToUpdate->nx*dfToUpdate->ny;
+   const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2);
 #endif
    DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
    DTYPE *defPtrY = &defPtrX[DFVoxelNumber];
@@ -2661,12 +2654,10 @@ void reg_defField_compose3D(nifti_image *deformationField,
    const size_t DFVoxelNumber=(size_t)DefFieldDim[0]*DefFieldDim[1]*DefFieldDim[2];
 #ifdef _WIN32
    long i;
-   long warVoxelNumber=(size_t)dfToUpdate->nx*
-         dfToUpdate->ny*dfToUpdate->nz;
+   const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate);
 #else
    size_t i;
-   size_t warVoxelNumber=(size_t)dfToUpdate->nx*
-         dfToUpdate->ny*dfToUpdate->nz;
+   const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate);
 #endif
 
    DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
@@ -2811,10 +2802,7 @@ void reg_defField_compose(nifti_image *deformationField,
    bool freeMask=false;
    if(mask==nullptr)
    {
-      mask=(int *)calloc(dfToUpdate->nx*
-                         dfToUpdate->ny*
-                         dfToUpdate->nz,
-                         sizeof(int));
+      mask = (int *)calloc(CalcVoxelNumber(*dfToUpdate), sizeof(int));
       freeMask=true;
    }
 
@@ -3352,9 +3340,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
                           nifti_image *outputDeformationField,
                           float tolerance)
 {
-   int outputVoxelNumber = outputDeformationField->nx *
-         outputDeformationField->ny *
-         outputDeformationField->nz;
+   const size_t outputVoxelNumber = CalcVoxelNumber(*outputDeformationField);
 
    mat44 *OutXYZMatrix;
    if(outputDeformationField->sform_code>0)
@@ -3487,10 +3473,10 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
  #endif // _USE_SSE
 
    DTYPE *outCPPPtrX = static_cast<DTYPE *>(grid2->data);
-   DTYPE *outCPPPtrY = &outCPPPtrX[grid2->nx*grid2->ny];
+   DTYPE *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)];
 
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(grid1->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[grid1->nx*grid1->ny];
+   DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)];
 
    DTYPE basis;
 
@@ -3670,13 +3656,15 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
    DTYPE tempValue;
  #endif
 
+   const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2);
    DTYPE *outCPPPtrX = static_cast<DTYPE *>(grid2->data);
-   DTYPE *outCPPPtrY = &outCPPPtrX[grid2->nx*grid2->ny*grid2->nz];
-   DTYPE *outCPPPtrZ = &outCPPPtrY[grid2->nx*grid2->ny*grid2->nz];
+   DTYPE *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber];
+   DTYPE *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber];
 
+   const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1);
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(grid1->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[grid1->nx*grid1->ny*grid1->nz];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[grid1->nx*grid1->ny*grid1->nz];
+   DTYPE *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber];
+   DTYPE *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber];
 
    DTYPE basis;
 
@@ -4295,9 +4283,9 @@ void compute_lie_bracket(nifti_image *img1,
    reg_print_msg_error("The compute_lie_bracket function needs updating");
    reg_exit();
  #ifdef _WIN32
-   long voxNumber=(long)img1->nx*img1->ny*img1->nz;
+   long voxNumber=(long)CalcVoxelNumber(*img1);
  #else
-   size_t voxNumber=(size_t)img1->nx*img1->ny*img1->nz;
+   size_t voxNumber=CalcVoxelNumber(*img1);
  #endif
    // Lie bracket using Jacobian for testing
    if(use_jac)
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 9dad9ffc..eefcac8f 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -75,8 +75,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
+   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
    DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
    DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber];
@@ -134,7 +133,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
       if(splineControlPoint->num_ext>0)
          useHeaderInformation=true;
 
-      // Allocate variables that are used in both scenarii
+      // Allocate variables that are used in both scenario
       DTYPE gridVoxelSpacing[3]=
       {
          splineControlPoint->dx / referenceImage->dx,
@@ -145,7 +144,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
 
       if(useHeaderInformation)
       {
-         // The reference image is not necessarly aligned with the grid
+         // The reference image is not necessarily aligned with the grid
          mat44 transformation;
          // reference: voxel to mm
          if(referenceImage->sform_code>0)
@@ -277,8 +276,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny;
+   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
    DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
    DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
 
@@ -561,8 +559,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
+   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
    DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
    DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber];
@@ -1248,8 +1245,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       if(splineControlPoint->nz>1)
          detNumber *= (size_t)(splineControlPoint->nz-2);
    }
-   else detNumber = (size_t)referenceImage->nx *
-         referenceImage->ny * referenceImage->nz;
+   else detNumber = CalcVoxelNumber(*referenceImage);
 
    void *JacobianDetermiantArray=(void *)malloc(detNumber*splineControlPoint->nbyper);
 
@@ -1360,8 +1356,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    if(approximation)
       arraySize = (size_t)(splineControlPoint->nx-2) *
             (splineControlPoint->ny-2);
-   else arraySize = (size_t)referenceImage->nx *
-         referenceImage->ny;
+   else arraySize = CalcVoxelNumber(*referenceImage, 2);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
    DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE));
@@ -1376,7 +1371,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 
    // The gradient are now computed for every control point
    DTYPE *gradientImagePtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientImagePtrY = &gradientImagePtrX[gradientImage->nx*gradientImage->ny];
+   DTYPE *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)];
 
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
@@ -1387,7 +1382,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    // Ratio to be used for normalisation
    size_t jacobianNumber;
    if(approximation)
-      jacobianNumber = splineControlPoint->nx * splineControlPoint->ny;
+      jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2);
    else jacobianNumber = arraySize;
    DTYPE ratio[2] =
    {
@@ -1599,8 +1594,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    if(approximation)
       arraySize = (size_t)(splineControlPoint->nx-2) *
             (splineControlPoint->ny-2) * (splineControlPoint->nz-2);
-   else arraySize = (size_t)referenceImage->nx *
-         referenceImage->ny*referenceImage->nz;
+   else arraySize = CalcVoxelNumber(*referenceImage);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
    DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE));
@@ -1614,9 +1608,10 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                 useHeaderInformation);
 
    // The gradient are now computed for every control point
+   const size_t voxelNumber = CalcVoxelNumber(*gradientImage);
    DTYPE *gradientImagePtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientImagePtrY = &gradientImagePtrX[gradientImage->nx*gradientImage->ny*gradientImage->nz];
-   DTYPE *gradientImagePtrZ = &gradientImagePtrY[gradientImage->nx*gradientImage->ny*gradientImage->nz];
+   DTYPE *gradientImagePtrY = &gradientImagePtrX[voxelNumber];
+   DTYPE *gradientImagePtrZ = &gradientImagePtrY[voxelNumber];
 
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
@@ -1627,7 +1622,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    // Ratio to be used for normalisation
    size_t jacobianNumber;
    if(approximation)
-      jacobianNumber = splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz;
+      jacobianNumber = CalcVoxelNumber(*splineControlPoint);
    else jacobianNumber = arraySize;
    DTYPE ratio[3] =
    {
@@ -1954,13 +1949,13 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    long jacobianNumber;
    if(approximation)
       jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2);
-   else jacobianNumber = (long)referenceImage->nx*referenceImage->ny;
+   else jacobianNumber = (long)CalcVoxelNumber(*referenceImage, 2);
 #else
    size_t i;
    size_t jacobianNumber;
    if(approximation)
       jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2);
-   else jacobianNumber = (size_t)referenceImage->nx*referenceImage->ny;
+   else jacobianNumber = CalcVoxelNumber(*referenceImage, 2);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
    DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE));
@@ -2001,8 +1996,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
       reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
+   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
    DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber];
 
@@ -2204,13 +2198,13 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    long jacobianNumber;
    if(approximation)
       jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2);
-   else jacobianNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+   else jacobianNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
    size_t i;
    size_t jacobianNumber;
    if(approximation)
       jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2);
-   else jacobianNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz;
+   else jacobianNumber = CalcVoxelNumber(*referenceImage);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
    DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE));
@@ -2251,8 +2245,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
       reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
-   size_t nodeNumber = (size_t)splineControlPoint->nx *
-         splineControlPoint->ny * splineControlPoint->nz;
+   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
    DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
    DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber];
    DTYPE *controlPointPtrZ = &controlPointPtrY[nodeNumber];
@@ -2690,7 +2683,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
-   size_t voxelNumber=deformationField->nx*deformationField->ny;
+   const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
 
    DTYPE *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
@@ -2800,7 +2793,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
-   size_t voxelNumber=deformationField->nx*deformationField->ny*deformationField->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*deformationField);
 
    DTYPE *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
@@ -3037,8 +3030,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
       }
       else reg_exit();
    }
-   size_t voxelNumber = (size_t)flowFieldImage->nx *
-         flowFieldImage->ny * flowFieldImage->nz ;
+   const size_t voxelNumber = CalcVoxelNumber(*flowFieldImage);
    for(size_t i=0; i<voxelNumber; ++i)
       jacobianMatrices[i]=affineMatrix;
 
@@ -3088,7 +3080,7 @@ void reg_getDetArrayFromMatArray(nifti_image *jacobianDetImage,
                                  mat33 *jacobianMatrices
                                  )
 {
-   size_t voxelNumber=jacobianDetImage->nx*jacobianDetImage->ny*jacobianDetImage->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage);
    DTYPE *jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
    if(jacobianDetImage->nz>1){
        for(size_t voxel=0; voxel<voxelNumber; ++voxel)
@@ -3137,8 +3129,7 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
    flowFieldImage->ndim=flowFieldImage->dim[0]=5;
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2;
-   flowFieldImage->nvox=(size_t)flowFieldImage->nx*flowFieldImage->ny*
-         flowFieldImage->nz*flowFieldImage->nt*flowFieldImage->nu;
+   flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
    flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
@@ -3157,7 +3148,7 @@ int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage,
                                              )
 {
    // create an array of mat33
-   size_t voxelNumber=jacobianDetImage->nx*jacobianDetImage->ny*jacobianDetImage->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage);
    mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33));
 
    // Compute the Jacobian matrice array
@@ -3195,8 +3186,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage,
    flowFieldImage->ndim=flowFieldImage->dim[0]=5;
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2;
-   flowFieldImage->nvox=(size_t)flowFieldImage->nx*flowFieldImage->ny*
-         flowFieldImage->nz*flowFieldImage->nt*flowFieldImage->nu;
+   flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
    flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 550105ab..89babf29 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -15,7 +15,7 @@
 /* *************************************************************** */
 template<class DTYPE>
 double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int a, b, x, y, index, i;
 
     // Create pointers to the spline coefficients
@@ -72,7 +72,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi
 /* *************************************************************** */
 template<class DTYPE>
 double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int a, b, c, x, y, z, index, i;
 
     // Create pointers to the spline coefficients
@@ -184,7 +184,7 @@ template<class DTYPE>
 void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int a, b, x, y, X, Y, index, i;
 
     // Create pointers to the spline coefficients
@@ -291,7 +291,7 @@ template<class DTYPE>
 void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int a, b, c, x, y, z, X, Y, Z, index, i;
 
     // Create pointers to the spline coefficients
@@ -494,7 +494,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
 /* *************************************************************** */
 template <class DTYPE>
 double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int a, b, x, y, i, index;
 
     double constraintValue = 0;
@@ -569,7 +569,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
 /* *************************************************************** */
 template <class DTYPE>
 double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int a, b, c, x, y, z, i, index;
 
     double constraintValue = 0;
@@ -686,7 +686,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
 template <class DTYPE>
 double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
     DTYPE basis;
 
@@ -699,7 +699,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
     double currentValue;
 
     // Create pointers to the spline coefficients
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
     const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
     DTYPE splineCoeffX, splineCoeffY;
@@ -769,7 +769,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
 template <class DTYPE>
 double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
     DTYPE basis;
 
@@ -783,7 +783,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
     double currentValue;
 
     // Create pointers to the spline coefficients
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
     const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
     const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
@@ -899,7 +899,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
     DTYPE basis;
 
@@ -909,7 +909,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
     };
 
     // Create pointers to the spline coefficients
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
     const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
     DTYPE splineCoeffX, splineCoeffY;
@@ -990,7 +990,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
     DTYPE basis;
 
@@ -1001,7 +1001,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
     };
 
     // Create pointers to the spline coefficients
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
     const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
     const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
@@ -1146,7 +1146,7 @@ template <class DTYPE>
 void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int x, y, a, b, i, index;
 
     // Create pointers to the spline coefficients
@@ -1241,7 +1241,7 @@ template <class DTYPE>
 void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int x, y, z, a, b, c, i, index;
 
     // Create pointers to the spline coefficients
@@ -1382,7 +1382,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint
 /* *************************************************************** */
 template <class DTYPE>
 double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
-    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny);
+    const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
     int a, b, x, y, X, Y, index;
     DTYPE basis[2] = {1, 0};
     DTYPE first[2] = {-1, 1};
@@ -1445,7 +1445,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
 /* *************************************************************** */
 template <class DTYPE>
 double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
-    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*deformationField);
     int a, b, c, x, y, z, X, Y, Z, index;
     DTYPE basis[2] = {1, 0};
     DTYPE first[2] = {-1, 1};
@@ -1551,7 +1551,7 @@ template <class DTYPE>
 void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
-    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny);
+    const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
     int a, b, x, y, X, Y, index;
     DTYPE basis[2] = {1, 0};
     DTYPE first[2] = {-1, 1};
@@ -1623,7 +1623,7 @@ template <class DTYPE>
 void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
-    size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*deformationField);
     int a, b, c, x, y, z, X, Y, Z, index;
     DTYPE basis[2] = {1, 0};
     DTYPE first[2] = {-1, 1};
@@ -1751,8 +1751,8 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                                            size_t landmarkNumber,
                                            float *landmarkReference,
                                            float *landmarkFloating) {
-    int imageDim = controlPointImage->nz > 1 ? 3 : 2;
-    size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz);
+    const int imageDim = controlPointImage->nz > 1 ? 3 : 2;
+    const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage);
     double constraintValue = 0;
     size_t l, index;
     float ref_position[4];
@@ -1871,8 +1871,8 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                                                  float *landmarkReference,
                                                  float *landmarkFloating,
                                                  float weight) {
-    int imageDim = controlPointImage->nz > 1 ? 3 : 2;
-    size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz);
+    const int imageDim = controlPointImage->nz > 1 ? 3 : 2;
+    const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage);
     size_t l, index;
     float ref_position[3];
     float def_position[3];
@@ -2015,7 +2015,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
 /* *************************************************************** */
 template <class DTYPE>
 double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int x, y, z, index;
 
     // Create pointers to the spline coefficients
@@ -2116,7 +2116,7 @@ template <class DTYPE>
 void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
                                                nifti_image *gradientImage,
                                                float weight) {
-    size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz);
+    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int x, y, z, index;
 
     // Create pointers to the spline coefficients
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index a9ea0401..e2c424ac 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -71,11 +71,11 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
                                 int descriptorOffset,
                                 int current_timepoint) {
 #ifdef WIN32
-    long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz);
     long voxelIndex;
+    const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
 #else
-    size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz);
     size_t voxelIndex;
+    const size_t voxelNumber = CalcVoxelNumber(*inputImage);
 #endif
 
     // Create a pointer to the descriptor image
@@ -203,13 +203,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
                                    int *maskPtr,
                                    int descriptorOffset,
                                    int current_timepoint) {
-
 #ifdef WIN32
-    long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz);
     long voxelIndex;
+    const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
 #else
-    size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz);
     size_t voxelIndex;
+    const size_t voxelNumber = CalcVoxelNumber(*inputImage);
 #endif
 
     // Create a pointer to the descriptor image
@@ -429,20 +428,14 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
     this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
     this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4;
     this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number;
-    this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx *
-        this->referenceImageDescriptor->ny *
-        this->referenceImageDescriptor->nz *
-        this->referenceImageDescriptor->nt;
-    this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox *
-                                                  this->referenceImageDescriptor->nbyper);
+    this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim);
+    this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper);
     // Initialise the warped floating descriptor
     this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
     this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
     this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number;
-    this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx *
-        this->warpedFloatingImageDescriptor->ny *
-        this->warpedFloatingImageDescriptor->nz *
-        this->warpedFloatingImageDescriptor->nt;
+    this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor,
+                                                                this->warpedFloatingImageDescriptor->ndim);
     this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox *
                                                        this->warpedFloatingImageDescriptor->nbyper);
 
@@ -455,20 +448,16 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
         this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
         this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
         this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number;
-        this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx *
-            this->floatingImageDescriptor->ny *
-            this->floatingImageDescriptor->nz *
-            this->floatingImageDescriptor->nt;
+        this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor,
+                                                              this->floatingImageDescriptor->ndim);
         this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox *
                                                      this->floatingImageDescriptor->nbyper);
         // Initialise the warped floating descriptor
         this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
         this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
         this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number;
-        this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx *
-            this->warpedReferenceImageDescriptor->ny *
-            this->warpedReferenceImageDescriptor->nz *
-            this->warpedReferenceImageDescriptor->nt;
+        this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor,
+                                                                     this->warpedReferenceImageDescriptor->ndim);
         this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox *
                                                             this->warpedReferenceImageDescriptor->nbyper);
     }
@@ -492,8 +481,7 @@ double reg_mind::GetSimilarityMeasureValue() {
     double MINDValue = 0.;
     for (int t = 0; t < this->referenceImagePointer->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
-            size_t voxelNumber = (size_t)referenceImagePointer->nx *
-                referenceImagePointer->ny * referenceImagePointer->nz;
+            size_t voxelNumber = CalcVoxelNumber(*referenceImagePointer);
             int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
             memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
             reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
@@ -551,8 +539,7 @@ double reg_mind::GetSimilarityMeasureValue() {
 
             // Backward computation
             if (this->isSymmetric) {
-                voxelNumber = (size_t)floatingImagePointer->nx *
-                    floatingImagePointer->ny * floatingImagePointer->nz;
+                voxelNumber = CalcVoxelNumber(*floatingImagePointer);
                 combinedMask = (int*)malloc(voxelNumber * sizeof(int));
                 memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
                 reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
@@ -620,9 +607,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         return;
 
     // Create a combined mask to ignore masked and undefined values
-    size_t voxelNumber = (size_t)this->referenceImagePointer->nx *
-        this->referenceImagePointer->ny *
-        this->referenceImagePointer->nz;
+    size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer);
     int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
     memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
     reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
@@ -699,8 +684,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
 
     // Compute the gradient of the ssd for the backward transformation
     if (this->isSymmetric) {
-        voxelNumber = (size_t)floatingImagePointer->nx *
-            floatingImagePointer->ny * floatingImagePointer->nz;
+        voxelNumber = CalcVoxelNumber(*floatingImagePointer);
         combinedMask = (int*)malloc(voxelNumber * sizeof(int));
         memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
         reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index ebce7f4b..a259c052 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -59,8 +59,7 @@ reg_mrf::reg_mrf(reg_measure *_measure,
    this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
    this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = (size_t)this->controlPointImage->nx *
-         this->controlPointImage->ny * this->controlPointImage->nz;
+   this->node_number = CalcVoxelNumber(*this->controlPointImage);
 
    this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
    this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
@@ -171,9 +170,8 @@ void reg_mrf::Initialise()
    for(int i =0;i<edge_number;i++) {
       index_neighbours[i]=-1;
    }
-   int num_vertices = this->controlPointImage->nx *
-               this->controlPointImage->ny * this->controlPointImage->nz;
-   int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4;
+   const size_t num_vertices = CalcVoxelNumber(*this->controlPointImage);
+   const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4;
 
    this->GetGraph(edgeWeightMatrix, index_neighbours);
    this->GetPrimsMST(edgeWeightMatrix, index_neighbours, num_vertices, num_neighbours, true);
@@ -360,8 +358,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
       image_mm2vox = &refImage->sto_ijk;
    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
 
-   size_t node_number = (size_t)controlPointGridImage->nx *
-         controlPointGridImage->ny * controlPointGridImage->nz;
+   const size_t node_number = CalcVoxelNumber(*controlPointGridImage);
 
    // Compute the block size
    int blockSize[3]={
@@ -636,8 +633,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
 void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
                           int *index_neighbours, int num_vertices, int num_neighbours,bool norm)
 {
-   //int num_vertices = this->controlPointImage->nx *
-   //      this->controlPointImage->ny * this->controlPointImage->nz;
+   //size_t num_vertices = CalcVoxelNumber(*controlPointGridImage);
 
    //DEBUG
    //int blockSize[3]={
@@ -645,7 +641,7 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
    //    (int)reg_ceil(controlPointImage->dy / referenceImage->dy),
    //    (int)reg_ceil(controlPointImage->dz / referenceImage->dz),
    //};
-   //int sz=referenceImage->nx * referenceImage->ny * referenceImage->nz;
+   //size_t sz=CalcVoxelNumber(*referenceImage);
    //int m=referenceImage->nx;
    //int n=referenceImage->ny;
    //int o=referenceImage->nz;
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index dfecd74e..35d3dd74 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -236,7 +236,7 @@ void reg_getNMIValue(nifti_image *referenceImage,
     DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
     DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
     // Useful variable
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     // Iterate over all active time points
     for (int t = 0; t < referenceImage->nt; ++t) {
         if (timePointWeight[t] > 0) {
@@ -491,7 +491,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
     }
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 
     // Pointers to the image data
     DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
@@ -585,10 +585,10 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
 
 #ifdef WIN32
     long i;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t i;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Pointers to the image data
     DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 6637f857..6c2ae4ca 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -130,10 +130,10 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
 
 #ifdef WIN32
         long floatingIndex;
-        long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+        const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
         size_t floatingIndex;
-        size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+        const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
 
         *originalFloatingData=(void *)malloc(floatingImage->nvox*sizeof(DTYPE));
@@ -221,10 +221,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
     {
 #ifdef WIN32
         long warpedIndex;
-        long voxelNumber = (long)inputImage->nx*inputImage->ny*inputImage->nz;
+        const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
 #else
         size_t warpedIndex;
-        size_t voxelNumber = (size_t)inputImage->nx*inputImage->ny*inputImage->nz;
+        const size_t voxelNumber = CalcVoxelNumber(*inputImage);
 #endif
         DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ;
         if(warpedImage!=nullptr)
@@ -363,12 +363,12 @@ void ResampleImage3D(nifti_image *floatingImage,
 {
 #ifdef _WIN32
     long  index;
-    long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
     size_t  index;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
@@ -567,12 +567,12 @@ void ResampleImage2D(nifti_image *floatingImage,
 {
 #ifdef _WIN32
     long  index;
-    long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny;
+    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage, 2);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
 #else
     size_t  index;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny;
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage, 2);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
@@ -832,7 +832,7 @@ void reg_resampleImage(nifti_image *floatingImage,
     if(mask==nullptr)
     {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
-        mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int));
+        mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int));
         MrPropreRules = true;
     }
 
@@ -1036,16 +1036,16 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
 {
 #ifdef _WIN32
     long index;
-    long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    long warpedPlaneNumber = (long)warpedImage->nx*warpedImage->ny;
-    long warpedLineNumber = (long)warpedImage->nx;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
+    const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2);
+    const long warpedLineNumber = (long)warpedImage->nx;
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
     size_t index;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    size_t warpedPlaneNumber = (size_t)warpedImage->nx*warpedImage->ny;
-    size_t warpedLineNumber = (size_t)warpedImage->nx;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
+    const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2);
+    const size_t warpedLineNumber = (size_t)warpedImage->nx;
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
@@ -1331,16 +1331,16 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
 {
 #ifdef _WIN32
     long index;
-    long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    long warpedPlaneNumber = (long)warpedImage->nx*warpedImage->ny;
-    long warpedLineNumber = (long)warpedImage->nx;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
+    const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2);
+    const long warpedLineNumber = (long)warpedImage->nx;
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
     size_t index;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz;
-    size_t warpedPlaneNumber = (size_t)warpedImage->nx*warpedImage->ny;
-    size_t warpedLineNumber = (size_t)warpedImage->nx;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
+    const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2);
+    const size_t warpedLineNumber = (size_t)warpedImage->nx;
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
@@ -1842,7 +1842,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
     if(mask==nullptr)
     {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
-        mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int));
+        mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int));
         MrPropreRules = true;
     }
 
@@ -2042,14 +2042,14 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
                                   nifti_image *deformationField,
                                   float paddingValue)
 {
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz;
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
     DTYPE *floatingIntensityX = static_cast<DTYPE *>(floatingImage->data);
     DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
     DTYPE *warpedIntensityX = static_cast<DTYPE *>(warpedImage->data);
     DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
     DTYPE *deformationFieldPtrX = static_cast<DTYPE *>(deformationField->data);
-    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz];
+    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)];
 
     // Extract the relevant affine matrix
     mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
@@ -2223,8 +2223,9 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
                                    nifti_image *deformationField,
                                    float paddingValue)
 {
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
-    size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz;
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
+    const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
     DTYPE *floatingIntensityX = static_cast<DTYPE *>(floatingImage->data);
     DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
     DTYPE *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber];
@@ -2232,8 +2233,8 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
     DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
     DTYPE *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber];
     DTYPE *deformationFieldPtrX = static_cast<DTYPE *>(deformationField->data);
-    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz];
-    DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz];
+    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber];
+    DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber];
 
     // Extract the relevant affine matrix
     mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
@@ -2537,12 +2538,12 @@ void TrilinearImageGradient(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
@@ -2733,12 +2734,12 @@ void BilinearImageGradient(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny;
+    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny;
+    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
 #endif
 
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
@@ -2867,12 +2868,12 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz;
+    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
@@ -3031,12 +3032,12 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
     }
 #ifdef _WIN32
     long index;
-    long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny;
-    long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny;
+    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2);
+    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
 #else
     size_t index;
-    size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny;
-    size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny;
+    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2);
+    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
 #endif
     FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
     FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
@@ -3329,7 +3330,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
     if(mask==nullptr)
     {
         // voxels in the backgreg_round are set to -1 so 0 will do the job here
-        mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz,sizeof(int));
+        mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
         MrPropreRule=true;
     }
 
@@ -3386,8 +3387,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
                                        float padding_value,
                                        int timepoint)
 {
-    size_t voxIndex, voxelNumber = (size_t)img->nx *
-            img->ny * img->nz;
+    const size_t voxelNumber = CalcVoxelNumber(*img);
 
     int dimImg = img->nz > 1 ? 3 : 2;
     int x, y, z;
@@ -3407,10 +3407,10 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
 #pragma omp parallel for default(none) \
     shared(img, currentImgPtr, mask, \
     gradPtrX, gradPtrY, gradPtrZ, padding_value) \
-    private(x, y, z, voxIndex, pre, post, valX, valY, valZ)
+    private(x, y, z, pre, post, valX, valY, valZ)
 #endif
     for(z=0; z<img->nz; ++z){
-        voxIndex=z*img->nx*img->ny;
+        size_t voxIndex=z*img->nx*img->ny;
         for(y=0; y<img->ny; ++y){
             for(x=0; x<img->nx; ++x){
                 valX = valY = valZ = 0;
@@ -3550,12 +3550,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img,
     def->pixdim[6]=def->dv=1.0;
     def->dim[7]=def->nw=1;
     def->pixdim[7]=def->dw=1.0;
-    def->nvox =
-            (size_t)def->nx *
-            (size_t)def->ny *
-            (size_t)def->nz *
-            (size_t)def->nt *
-            (size_t)def->nu;
+    def->nvox = CalcVoxelNumber(*def, def->ndim);
     def->nbyper = sizeof(float);
     def->datatype = NIFTI_TYPE_FLOAT32;
     def->data = (void *)calloc(def->nvox,def->nbyper);
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 6004b9f6..8a5aca1c 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -111,10 +111,10 @@ double reg_getSSDValue(nifti_image *referenceImage,
                        nifti_image *localWeightSimImage) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Create pointers to the reference and warped image data
     DTYPE *referencePtr = static_cast<DTYPE*>(referenceImage->data);
@@ -273,10 +273,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     // Create pointers to the reference and warped images
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
 #else
     size_t voxel;
-    size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Pointers to the image data
     DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
@@ -489,7 +489,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float));
 
     // Pointers to the input image
-    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*refImage);
     DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
     DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
 
@@ -640,7 +640,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     free(paddedWarImgPtr);
     free(refBlockValue);
     // Deal with the labels that contains NaN values
-    for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) {
+    for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) {
         int definedValueNumber = 0;
         float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
         float meanValue = 0;
@@ -733,7 +733,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     int currentControlPoint = 0;
 
     // Pointers to the input image
-    size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*refImage);
     DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
     DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
 
@@ -882,7 +882,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     free(refBlockValue);
 
     // Deal with the labels that contains NaN values
-    for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) {
+    for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) {
         int definedValueNumber = 0;
         float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
         float meanValue = 0;
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index c2d248bc..41b4c2d9 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -48,7 +48,7 @@ class reg_ssd: public reg_measure {
     virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
                                      int discretise_radius,
-                                     int discretise_step);
+                                     int discretise_step) override;
 protected:
     float currentValue[255];
 
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp
index 4a197266..a6c28188 100644
--- a/reg-lib/cpu/_reg_thinPlateSpline.cpp
+++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp
@@ -214,7 +214,7 @@ void reg_tps<T>::FillDeformationField(nifti_image *deformationField)
    if(this->initialised==false)
       this->InitialiseTPS();
 
-   size_t voxelNumber = deformationField->nx*deformationField->ny*deformationField->nz;
+   const size_t voxelNumber = CalcVoxelNumber(*deformationField);
    T *defX=static_cast<T *>(deformationField->data);
    T *defY=&defX[voxelNumber];
    T *defZ=nullptr;
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 0dc1199f..8671a456 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -96,7 +96,7 @@ void reg_intensityRescale_core(nifti_image *image,
                                float newMin,
                                float newMax) {
     DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
-    unsigned int voxelNumber = image->nx * image->ny * image->nz;
+    const size_t voxelNumber = CalcVoxelNumber(*image);
 
     // The rescaling is done for each volume independently
     DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber];
@@ -139,7 +139,7 @@ void reg_intensityRescale_core(nifti_image *image,
 
     // Extract the minimal and maximal values from the current volume
     if (image->scl_slope == 0) image->scl_slope = 1.0f;
-    for (unsigned int index = 0; index < voxelNumber; index++) {
+    for (size_t index = 0; index < voxelNumber; index++) {
         DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter);
         if (value == value) {
             currentMin = (currentMin < value) ? currentMin : value;
@@ -159,7 +159,7 @@ void reg_intensityRescale_core(nifti_image *image,
     volumePtr = &imagePtr[timePoint * voxelNumber];
 
     // Iterates over all voxels in the current volume
-    for (unsigned int index = 0; index < voxelNumber; index++) {
+    for (size_t index = 0; index < voxelNumber; index++) {
         double value = (double)*volumePtr * image->scl_slope + image->scl_inter;
         // Check if the value is defined
         if (value == value) {
@@ -346,10 +346,11 @@ template void reg_thresholdImage<double>(nifti_image*, double, double);
 /* *************************************************************** */
 template <class PrecisionTYPE, class DTYPE>
 PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) {
+    const size_t voxelNumber = CalcVoxelNumber(*image);
     const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
-    const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz];
+    const DTYPE *dataPtrY = &dataPtrX[voxelNumber];
     PrecisionTYPE max = 0;
-    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
+    for (size_t i = 0; i < voxelNumber; i++) {
         PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
         PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
         PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY));
@@ -360,11 +361,12 @@ PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) {
 /* *************************************************************** */
 template <class PrecisionTYPE, class DTYPE>
 PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) {
+    const size_t voxelNumber = CalcVoxelNumber(*image);
     const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
-    const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz];
-    const DTYPE *dataPtrZ = &dataPtrY[image->nx * image->ny * image->nz];
+    const DTYPE *dataPtrY = &dataPtrX[voxelNumber];
+    const DTYPE *dataPtrZ = &dataPtrY[voxelNumber];
     PrecisionTYPE max = 0;
-    for (int i = 0; i < image->nx * image->ny * image->nz; i++) {
+    for (int i = 0; i < voxelNumber; i++) {
         PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
         PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
         PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++);
@@ -996,10 +998,10 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
     }
 #ifdef WIN32
     long index;
-    const long voxelNumber = long(image->nx * image->ny * image->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*image);
 #else
     size_t index;
-    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*image);
 #endif
     DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
     int imageDim[3] = {image->nx, image->ny, image->nz};
@@ -1294,23 +1296,24 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
     }
 #ifdef WIN32
     long index;
-    const long voxelNumber = long(image->nx * image->ny * image->nz);
+    const long voxelNumber = (long)CalcVoxelNumber(*image);
 #else
     size_t index;
-    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*image);
 #endif
     DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
 
-    bool *activeTimePoint = (bool*)calloc(image->nt * image->nu, sizeof(bool));
+    const int activeTimePointNumber = image->nt * image->nu;
+    bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool));
     // Check if input time points and masks are nullptr
     if (timePoint == nullptr) {
         // All time points are considered as active
-        for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true;
-    } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i];
+        for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true;
+    } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i];
 
     int *currentMask = nullptr;
     if (mask == nullptr) {
-        currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int));
+        currentMask = (int*)calloc(voxelNumber, sizeof(int));
     } else currentMask = mask;
 
 
@@ -1322,7 +1325,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
     typedef typename std::map<DTYPE, float>::iterator DataPointMapIt;
 
     // Loop over the dimension higher than 3
-    for (int t = 0; t < image->nt * image->nu; t++) {
+    for (int t = 0; t < activeTimePointNumber; t++) {
         if (activeTimePoint[t]) {
             DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
             for (index = 0; index < voxelNumber; index++) {
@@ -1486,7 +1489,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
     if (image->nu <= 0) image->nu = image->dim[5] = 1;
 
     bool *axisToSmooth = new bool[3];
-    bool *activeTimePoint = new bool[image->nt * image->nu];
+    const int activeTimePointNumber = image->nt * image->nu;
+    bool *activeTimePoint = new bool[activeTimePointNumber];
     if (axis == nullptr) {
         // All axis are smoothed by default
         for (int i = 0; i < 3; i++) axisToSmooth[i] = true;
@@ -1494,12 +1498,12 @@ void reg_tools_kernelConvolution(nifti_image *image,
 
     if (timePoint == nullptr) {
         // All time points are considered as active
-        for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true;
-    } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i];
+        for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true;
+    } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i];
 
     int *currentMask = nullptr;
     if (mask == nullptr) {
-        currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int));
+        currentMask = (int*)calloc(CalcVoxelNumber(*image), sizeof(int));
     } else currentMask = mask;
 
     switch (image->datatype) {
@@ -1598,14 +1602,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
     image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
 
     // Reallocate the image
-    image->nvox =
-        (size_t)image->nx *
-        (size_t)image->ny *
-        (size_t)image->nz *
-        (size_t)image->nt *
-        (size_t)image->nu *
-        (size_t)image->nv *
-        (size_t)image->nw;
+    image->nvox = CalcVoxelNumber(*image, 7);
     image->data = calloc(image->nvox, image->nbyper);
     imagePtr = static_cast<ImageTYPE*>(image->data);
 
@@ -1778,7 +1775,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
 template <class DTYPE>
 void reg_tools_binaryImage2int1(const nifti_image *image, int *array) {
     const DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
-    for (size_t i = 0; i < image->nx * image->ny * image->nz; i++)
+    for (size_t i = 0; i < CalcVoxelNumber(*image); i++)
         array[i] = dataPtr[i] != 0 ? 1 : -1;
 }
 /* *************************************************************** */
@@ -1817,6 +1814,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
 /* *************************************************************** */
 template <class ATYPE, class BTYPE>
 double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) {
+    const size_t voxelNumber = CalcVoxelNumber(*imageA);
     const ATYPE *imageAPtrX = static_cast<ATYPE*>(imageA->data);
     const BTYPE *imageBPtrX = static_cast<BTYPE*>(imageB->data);
     const ATYPE *imageAPtrY = nullptr;
@@ -1824,17 +1822,17 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image
     const ATYPE *imageAPtrZ = nullptr;
     const BTYPE *imageBPtrZ = nullptr;
     if (imageA->dim[5] > 1) {
-        imageAPtrY = &imageAPtrX[imageA->nx * imageA->ny * imageA->nz];
-        imageBPtrY = &imageBPtrX[imageA->nx * imageA->ny * imageA->nz];
+        imageAPtrY = &imageAPtrX[voxelNumber];
+        imageBPtrY = &imageBPtrX[voxelNumber];
     }
     if (imageA->dim[5] > 2) {
-        imageAPtrZ = &imageAPtrY[imageA->nx * imageA->ny * imageA->nz];
-        imageBPtrZ = &imageBPtrY[imageA->nx * imageA->ny * imageA->nz];
+        imageAPtrZ = &imageAPtrY[voxelNumber];
+        imageBPtrZ = &imageBPtrY[voxelNumber];
     }
     double sum = 0;
     double rms;
     double diff;
-    for (int i = 0; i < imageA->nx * imageA->ny * imageA->nz; i++) {
+    for (size_t i = 0; i < voxelNumber; i++) {
         diff = (double)*imageAPtrX++ - (double)*imageBPtrX++;
         rms = diff * diff;
         if (imageA->dim[5] > 1) {
@@ -1848,7 +1846,7 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image
         if (rms == rms)
             sum += sqrt(rms);
     }
-    return sum / double(imageA->nx * imageA->ny * imageA->nz);
+    return sum / static_cast<double>(voxelNumber);
 }
 /* *************************************************************** */
 template <class ATYPE>
@@ -1963,9 +1961,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DTYPE>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
     }
-    size_t voxelNumber = (tempMaskImagePyramid[levelToPerform - 1]->nx *
-                          tempMaskImagePyramid[levelToPerform - 1]->ny *
-                          tempMaskImagePyramid[levelToPerform - 1]->nz);
+    size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]);
     maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int));
     reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]);
 
@@ -1984,7 +1980,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DTYPE>(tempMaskImagePyramid[l], 0, downsampleAxis);
 
-        voxelNumber = tempMaskImagePyramid[l]->nx * tempMaskImagePyramid[l]->ny * tempMaskImagePyramid[l]->nz;
+        voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]);
         maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int));
         reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]);
     }
@@ -2077,7 +2073,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
 /* *************************************************************** */
 template <class TYPE>
 int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) {
-    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*image);
     const TYPE *imagePtr = static_cast<TYPE*>(image->data);
     for (int t = 0; t < image->nt; ++t) {
         for (size_t i = 0; i < voxelNumber; ++i) {
@@ -2109,7 +2105,7 @@ DTYPE reg_tools_getMinMaxValue_core(const nifti_image *image, int timepoint, boo
 
     const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
     DTYPE retValue = calcMin ? std::numeric_limits<DTYPE>::max() : std::numeric_limits<DTYPE>::min();
-    const size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*image);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 
     for (int time = 0; time < image->nt; ++time) {
@@ -2366,7 +2362,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
 template<class DTYPE>
 void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
     DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[field->nx * field->ny];
+    DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2401,9 +2397,10 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
 /* *************************************************************** */
 template<class DTYPE>
 void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
+    const size_t voxelNumber = CalcVoxelNumber(*field);
     DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz];
-    DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz];
+    DTYPE *ptrY = &ptrX[voxelNumber];
+    DTYPE *ptrZ = &ptrY[voxelNumber];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2491,7 +2488,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) {
 template<class DTYPE>
 void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
     DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[field->nx * field->ny];
+    DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2527,9 +2524,10 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
 /* *************************************************************** */
 template<class DTYPE>
 void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
+    const size_t voxelNumber = CalcVoxelNumber(*field);
     DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz];
-    DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz];
+    DTYPE *ptrY = &ptrX[voxelNumber];
+    DTYPE *ptrZ = &ptrY[voxelNumber];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2619,7 +2617,7 @@ void reg_setGradientToZero_core(nifti_image *image,
                                 bool xAxis,
                                 bool yAxis,
                                 bool zAxis) {
-    size_t voxelNumber = size_t(image->nx * image->ny * image->nz);
+    const size_t voxelNumber = CalcVoxelNumber(*image);
     DTYPE *ptr = static_cast<DTYPE*>(image->data);
     if (xAxis) {
         for (size_t i = 0; i < voxelNumber; ++i)
@@ -2842,3 +2840,17 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
     z = index;
 }
 /* *************************************************************** */
+size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) {
+    size_t voxelNumber = static_cast<size_t>(std::abs(image.nx)) * static_cast<size_t>(std::abs(image.ny));
+    if (dimCount > 2)
+        voxelNumber *= static_cast<size_t>(std::abs(image.nz));
+    if (dimCount > 3)
+        voxelNumber *= static_cast<size_t>(std::abs(image.nt));
+    if (dimCount > 4)
+        voxelNumber *= static_cast<size_t>(std::abs(image.nu));
+    if (dimCount > 5)
+        voxelNumber *= static_cast<size_t>(std::abs(image.nv));
+    if (dimCount > 6)
+        voxelNumber *= static_cast<size_t>(std::abs(image.nw));
+    return voxelNumber;
+}
\ No newline at end of file
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 000ebe76..0b0a5c37 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -437,3 +437,10 @@ void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
 /* *************************************************************** */
+/** @brief Calculates the number of voxels in the image
+ * @param image Input image
+ * @param dimCount Number of dimensions to consider
+ * @return The number of voxels in the image
+ */
+size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index ec393047..a8ea0241 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -107,14 +107,10 @@ void CudaF3dContent::UpdateWarpedGradient() {
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
-    cudaMemset(transformationGradientCuda, 0,
-               transformationGradient->nx * transformationGradient->ny * transformationGradient->nz *
-               sizeof(float4));
+    cudaMemset(transformationGradientCuda, 0, CalcVoxelNumber(*transformationGradient) * sizeof(float4));
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroVoxelBasedMeasureGradient() {
-    cudaMemset(voxelBasedMeasureGradientCuda, 0,
-               voxelBasedMeasureGradient->nx * voxelBasedMeasureGradient->ny * voxelBasedMeasureGradient->nz *
-               sizeof(float4));
+    cudaMemset(voxelBasedMeasureGradientCuda, 0, CalcVoxelNumber(*voxelBasedMeasureGradient) * sizeof(float4));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index a1fcfa7b..40baab4c 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -61,23 +61,23 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) {
             return EXIT_FAILURE;
         }
         float *niftiImgValues = static_cast<float*>(img->data);
-        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
-        const int voxelNumber = img->nx * img->ny * img->nz;
-        for (int i = 0; i < voxelNumber; i++)
+        const size_t voxelNumber = CalcVoxelNumber(*img);
+        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        for (size_t i = 0; i < voxelNumber; i++)
             array_h[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].z = *niftiImgValues++;
         }
         if (img->dim[5] >= 4) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
         free(array_h);
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
@@ -121,33 +121,33 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif
             return EXIT_FAILURE;
         }
         float *niftiImgValues = static_cast<float *>(img->data);
-        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
-        float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
-        const int voxelNumber = img->nx * img->ny * img->nz;
-        for (int i = 0; i < voxelNumber; i++)
+        const size_t voxelNumber = CalcVoxelNumber(*img);
+        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        for (size_t i = 0; i < voxelNumber; i++)
             array_h[i].x = *niftiImgValues++;
-        for (int i = 0; i < voxelNumber; i++)
+        for (size_t i = 0; i < voxelNumber; i++)
             array2_h[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].y = *niftiImgValues++;
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].z = *niftiImgValues++;
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].z = *niftiImgValues++;
         }
         if (img->dim[5] >= 4) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].w = *niftiImgValues++;
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
         free(array_h);
         free(array2_h);
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
@@ -197,20 +197,21 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i
             return EXIT_FAILURE;
         }
         float *niftiImgValues = static_cast<float *>(img->data);
-        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        const size_t voxelNumber = CalcVoxelNumber(*img);
+        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
 
-        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+        for (size_t i = 0; i < voxelNumber; i++)
             array_h[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].z = *niftiImgValues++;
         }
         if (img->dim[5] == 3) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].w = *niftiImgValues++;
         }
         cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
@@ -280,32 +281,33 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
             return EXIT_FAILURE;
         }
         float *niftiImgValues = static_cast<float*>(img->data);
-        float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
-        float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4));
+        const size_t voxelNumber = CalcVoxelNumber(*img);
+        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4));
 
-        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+        for (size_t i = 0; i < voxelNumber; i++)
             array_h[i].x = *niftiImgValues++;
-        for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+        for (size_t i = 0; i < voxelNumber; i++)
             array2_h[i].x = *niftiImgValues++;
 
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].y = *niftiImgValues++;
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].y = *niftiImgValues++;
         }
 
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].z = *niftiImgValues++;
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].z = *niftiImgValues++;
         }
 
         if (img->dim[5] == 3) {
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array_h[i].w = *niftiImgValues++;
-            for (int i = 0; i < img->nx * img->ny * img->nz; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 array2_h[i].w = *niftiImgValues++;
         }
 
@@ -432,25 +434,25 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
             reg_print_msg_error("The nifti image is not a 5D volume");
             return EXIT_FAILURE;
         }
-        const int voxelNumber = img->nx * img->ny * img->nz;
 
         float4 *array_h;
+        const size_t voxelNumber = CalcVoxelNumber(*img);
         NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
         NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
         float *niftiImgValues = static_cast<float*>(img->data);
 
-        for (int i = 0; i < voxelNumber; i++)
+        for (size_t i = 0; i < voxelNumber; i++)
             *niftiImgValues++ = array_h[i].x;
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array_h[i].y;
         }
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array_h[i].z;
         }
         if (img->dim[5] >= 4) {
-            for (int i = 0; i < voxelNumber; i++)
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array_h[i].w;
         }
         NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
@@ -496,7 +498,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYP
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        unsigned int voxelNumber = img->nx * img->ny * img->nz;
+        const size_t voxelNumber = CalcVoxelNumber(*img);
         NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
         NIFTI_TYPE *array2_h = &array_h[voxelNumber];
         NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost));
@@ -514,7 +516,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE
             reg_print_msg_error("The nifti image is not a 5D volume");
             return EXIT_FAILURE;
         }
-        const int voxelNumber = img->nx * img->ny * img->nz;
+        const size_t voxelNumber = CalcVoxelNumber(*img);
         float4 *array_h = nullptr;
         float4 *array2_h = nullptr;
         NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
@@ -522,33 +524,33 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE
         NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
         NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
         float *niftiImgValues = static_cast<float *>(img->data);
-        for (int i = 0; i < voxelNumber; i++) {
+        for (size_t i = 0; i < voxelNumber; i++) {
             *niftiImgValues++ = array_h[i].x;
         }
-        for (int i = 0; i < voxelNumber; i++) {
+        for (size_t i = 0; i < voxelNumber; i++) {
             *niftiImgValues++ = array2_h[i].x;
         }
         if (img->dim[5] >= 2) {
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array_h[i].y;
             }
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array2_h[i].y;
             }
         }
         if (img->dim[5] >= 3) {
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array_h[i].z;
             }
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array2_h[i].z;
             }
         }
         if (img->dim[5] >= 4) {
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array_h[i].w;
             }
-            for (int i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++) {
                 *niftiImgValues++ = array2_h[i].w;
             }
         }
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index fde32ebc..1d6a3e0f 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -26,8 +26,8 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const int voxelNumber = reference->nx * reference->ny * reference->nz;
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int voxelNumber = CalcVoxelNumber(*reference);
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const int useBSpline = static_cast<int>(bspline);
@@ -79,7 +79,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4
 	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
 
@@ -154,7 +154,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
 
@@ -237,7 +237,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
 
 	// Bind some variables
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
@@ -288,8 +288,8 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
 
 	// Bind some variables
-	const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int voxelNumber = CalcVoxelNumber(*referenceImage);
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
@@ -345,7 +345,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 	int jacNumber;
 	double jacSum;
 	if(approx){
-		jacNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+		jacNumber = CalcVoxelNumber(*controlPointImage);
 		jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2);
 		if(controlPointImage->nz>1){
 			jacSum *= controlPointImage->nz-2;
@@ -363,7 +363,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 											   jacobianDet_d);
 	}
 	else{
-		jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
+		jacNumber = CalcVoxelNumber(*referenceImage);
 		jacSum=jacNumber;
 		if(controlPointImage->nz>1){
 			// Allocate array for 3x3 matrices
@@ -411,7 +411,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 	float *jacobianDet_d;
 	int jacNumber;
 	if(approx){
-		jacNumber=controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+		jacNumber=CalcVoxelNumber(*controlPointImage);
 		if(controlPointImage->nz>1)
 			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
 		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
@@ -422,7 +422,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 												jacobianDet_d);
 	}
 	else{
-		jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
+		jacNumber=CalcVoxelNumber(*referenceImage);
 		if(controlPointImage->nz>1)
 			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
 		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
@@ -455,7 +455,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 										   4*jacNumber*sizeof(float)))
 
 	// Bind some variables
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
@@ -485,7 +485,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		}
 	}
 	else{
-		const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
+		const int voxelNumber = CalcVoxelNumber(*referenceImage);
 		const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
 		const float3 controlPointVoxelSpacing = make_float3(
 				controlPointImage->dx / referenceImage->dx,
@@ -531,7 +531,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	int jacNumber;
 	double jacSum;
 	if(approx){
-		jacNumber=controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+		jacNumber=CalcVoxelNumber(*controlPointImage);
 		jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2);
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
@@ -541,7 +541,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 												jacobianDet_d);
 	}
 	else{
-		jacSum=jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz;
+		jacSum=jacNumber=CalcVoxelNumber(*referenceImage);
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
 		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
 		reg_spline_ComputeJacobianValues(controlPointImage,
@@ -596,7 +596,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 									  9*jacNumber*sizeof(float)))
 
 	// Bind some variables
-	const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz;
+	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
@@ -611,7 +611,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 		NR_CUDA_CHECK_KERNEL(G1,B1)
 	}
 	else{
-		const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz;
+		const int voxelNumber = CalcVoxelNumber(*referenceImage);
 		const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
 		const float3 controlPointVoxelSpacing = make_float3(
 				controlPointImage->dx / referenceImage->dx,
@@ -650,7 +650,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)))
 
-	const int voxelNumber=image->nx*image->ny*image->nz;
+	const int voxelNumber = CalcVoxelNumber(*image);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
 
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
@@ -680,7 +680,7 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)))
 
-	const int voxelNumber=image->nx*image->ny*image->nz;
+	const int voxelNumber = CalcVoxelNumber(*image);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
 
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
@@ -700,7 +700,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 												 float4 *cpp_gpu,
 												 float4 *def_gpu)
 {
-	const int voxelNumber = def_h->nx * def_h->ny * def_h->nz;
+	const int voxelNumber = CalcVoxelNumber(*def_h);
 
 	// Create a mask array where no voxel are excluded
 	int *mask_gpu=nullptr;
@@ -769,7 +769,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 	// Get the BlockSize - The values have been set in CudaContextSingleton
 	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const int voxelNumber=def->nx*def->ny*def->nz;
+	const int voxelNumber = CalcVoxelNumber(*def);
 
 	// Bind the qform or sform
 	mat44 temp_mat=def->qto_ijk;
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 4d1e430e..71f2a460 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -156,7 +156,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    const int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    const int voxelNumber = CalcVoxelNumber(*referenceImage);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int binNumber = refBinning * floBinning + refBinning + floBinning;
     const float normalisedJE = (float)(entropies[2] * entropies[3]);
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index acda88f3..1d0566de 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -298,7 +298,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
+    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &currentLength, sizeof(float)));
 
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index af204451..2ce6057e 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -84,8 +84,8 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     // Copy the constant memory variables
-    int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int voxelNumber = CalcVoxelNumber(*referenceImage);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
     // Bind the required textures
@@ -145,8 +145,8 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     // Copy the constant memory variables
-    int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz;
+    const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int voxelNumber = CalcVoxelNumber(*referenceImage);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float)));
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index d14b75e6..9459ecbf 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -26,8 +26,8 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
-    const int voxelNumber = targetImage->nx * targetImage->ny * targetImage->nz;
+    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
+    const int voxelNumber = CalcVoxelNumber(*targetImage);
     const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
     const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	float3 voxelNodeRatio_h = make_float3(
@@ -62,7 +62,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz;
+    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
 
     float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4)))
@@ -96,11 +96,11 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-	const unsigned int voxelNumber = image->nx * image->ny * image->nz;
+    const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int3)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)))
 
     bool axisToSmooth[8];
     if(smoothXYZ==nullptr){
@@ -186,7 +186,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    const int voxelNumber = image->nx * image->ny * image->nz;
+    const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)))
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index 3c9e0074..a37e99d3 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -88,7 +88,7 @@ void launchAffine(mat44 *affineTransformation,
    free(trans);
 
    uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz);
-   affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, deformationField->nx* deformationField->ny* deformationField->nz, compose);
+   affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose);
 
 #ifndef NDEBUG
    NR_CUDA_CHECK_KERNEL(G1_b, B1_b)
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index be20a80b..4423e45c 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -397,7 +397,7 @@ void launchResample(nifti_image *floatingImage,
 		reg_exit();
 	}
 
-	long targetVoxelNumber = (long) warpedImage->nx * warpedImage->ny * warpedImage->nz;
+	const size_t targetVoxelNumber = CalcVoxelNumber(*warpedImage);
 
 	//the below lines need to be moved to cu common
 	cudaDeviceProp prop;
@@ -410,7 +410,7 @@ void launchResample(nifti_image *floatingImage,
 	dim3 mygrid(blocks, 1, 1);
 	dim3 myblocks(maxThreads, 1, 1);
 
-	ulong2 voxelNumber = make_ulong2(warpedImage->nx * warpedImage->ny * warpedImage->nz, floatingImage->nx * floatingImage->ny * floatingImage->nz);
+	ulong2 voxelNumber = make_ulong2(targetVoxelNumber, CalcVoxelNumber(*floatingImage));
 	uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 	uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu);
 	 if (floatingImage->nz > 1) {
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index e526f511..af17e015 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -216,9 +216,10 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
 
                 // Check all values
                 auto *defFieldPtrX = static_cast<float *>(defField->data);
-                auto *defFieldPtrY = &defFieldPtrX[defField->nx * defField->ny * defField->nz];
-                auto *defFieldPtrZ = &defFieldPtrY[defField->nx * defField->ny * defField->nz];
-                for (int i = 0; i < defField->nx * defField->ny * defField->nz; ++i) {
+                const size_t voxelNumber = CalcVoxelNumber(*defField);
+                auto *defFieldPtrY = &defFieldPtrX[voxelNumber];
+                auto *defFieldPtrZ = &defFieldPtrY[voxelNumber];
+                for (size_t i = 0; i < voxelNumber; ++i) {
                     REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE);
                     REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE);
                     if (test_res_z != nullptr) {
diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp
index f6306499..ace1f4f3 100644
--- a/reg-test/reg_test_computation_time.cpp
+++ b/reg-test/reg_test_computation_time.cpp
@@ -60,8 +60,7 @@ int main(int argc, char **argv)
     defFieldOne->ndim=defFieldOne->dim[0]=5;
     defFieldOne->nt=defFieldOne->dim[4]=1;
     defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2;
-    defFieldOne->nvox = (size_t)defFieldOne->nx * defFieldOne->ny *
-            defFieldOne->nz * defFieldOne->nu;
+    defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim);
     defFieldOne->data = (void *)malloc(defFieldOne->nvox*defFieldOne->nbyper);
     nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne);
     defFieldTwo->data = (void *)malloc(defFieldTwo->nvox*defFieldTwo->nbyper);
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index fb72dc65..f0fb9ced 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -41,8 +41,7 @@ int main(int argc, char **argv)
     nifti_image *gradientImage = nifti_copy_nim_info(inputImage);
     gradientImage->dim[0]=gradientImage->ndim=5;
     gradientImage->dim[5]=gradientImage->nu=dim;
-    gradientImage->nvox = (size_t)gradientImage->nx*gradientImage->ny*
-                      gradientImage->nz*gradientImage->nt*gradientImage->nu;
+    gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim);
     gradientImage->nbyper=sizeof(float);
     gradientImage->datatype=NIFTI_TYPE_FLOAT32;
     gradientImage->data=(void *)malloc(gradientImage->nvox*gradientImage->nbyper);
@@ -50,8 +49,7 @@ int main(int argc, char **argv)
     // Allocate a temporary file to compute the gradient's timepoint one at the time
     nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage);
     tempGradImage->dim[4]=tempGradImage->nt=1;
-    tempGradImage->nvox = (size_t)tempGradImage->nx*tempGradImage->ny*
-                      tempGradImage->nz*tempGradImage->nt*tempGradImage->nu;
+    tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim);
     tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper);
 
     // Declare a deformation field image
@@ -63,8 +61,7 @@ int main(int argc, char **argv)
         defFieldImage->dim[0]=defFieldImage->ndim=5;
         defFieldImage->dim[4]=defFieldImage->nt=1;
         defFieldImage->dim[5]=defFieldImage->nu=dim;
-        defFieldImage->nvox = (size_t)defFieldImage->nx*defFieldImage->ny *
-                                 defFieldImage->nz*defFieldImage->nu;
+        defFieldImage->nvox = CalcVoxelNumber(*defFieldImage, defFieldImage->ndim);
         defFieldImage->nbyper=sizeof(float);
         defFieldImage->datatype=NIFTI_TYPE_FLOAT32;
         defFieldImage->intent_code=NIFTI_INTENT_VECTOR;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 1a9b2193..0c4a8c71 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -72,7 +72,7 @@ TEST_CASE("Resampling", "[resampling]") {
     nifti_image *id_field_2D = nifti_copy_nim_info(reference2D);
     id_field_2D->ndim = id_field_2D->dim[0] = 5;
     id_field_2D->nu = id_field_2D->dim[5] = 2;
-    id_field_2D->nvox = id_field_2D->nx * id_field_2D->ny * id_field_2D->nu;
+    id_field_2D->nvox = CalcVoxelNumber(*id_field_2D, id_field_2D->ndim);
     id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper);
     reg_getDeformationFromDisplacement(id_field_2D);
     float res2[4];
@@ -89,7 +89,7 @@ TEST_CASE("Resampling", "[resampling]") {
     nifti_image *id_field_3D = nifti_copy_nim_info(reference3D);
     id_field_3D->ndim = id_field_3D->dim[0] = 5;
     id_field_3D->nu = id_field_3D->dim[5] = 3;
-    id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu;
+    id_field_3D->nvox = CalcVoxelNumber(*id_field_3D, id_field_3D->ndim);
     id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper);
     reg_getDeformationFromDisplacement(id_field_3D);
     float res3[8];
@@ -157,7 +157,7 @@ TEST_CASE("Resampling", "[resampling]") {
 
                     // Check all values
                     auto *warpedPtr = static_cast<float*>(warped->data);
-                    for (int i = 0; i < warped->nx * warped->ny * warped->nz; ++i) {
+                    for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) {
                         std::cout << i << " " << static_cast<float*>(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl;
                         REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE);
                     }

From ba17bf15b73955141bde656bf3c30b8987a25c3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 10 Feb 2023 19:08:28 +0000
Subject: [PATCH 045/314] Add Platform::CreateContentCreator() to handle
 content creation and ditch use of _USE_CUDA and _USE_OPENCL directives

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/AladinContentCreator.h                |  18 +
 reg-lib/ContentCreator.h                      |  14 +
 reg-lib/ContentCreatorFactory.h               |  21 ++
 reg-lib/F3dContentCreator.h                   |  17 +
 reg-lib/Platform.cpp                          | 140 +++----
 reg-lib/Platform.h                            |  21 +-
 reg-lib/_reg_aladin.cpp                       |  12 +-
 reg-lib/_reg_aladin.h                         |   8 -
 reg-lib/_reg_aladin_sym.cpp                   |  30 +-
 reg-lib/_reg_f3d.cpp                          |  14 +-
 reg-lib/_reg_f3d.h                            |   2 +-
 reg-lib/_reg_f3d2.cpp                         |  12 +-
 reg-lib/cl/ClAladinContentCreator.h           |  18 +
 reg-lib/cl/ClContentCreatorFactory.h          |  18 +
 reg-lib/cpu/_reg_localTrans.cpp               |   4 +-
 reg-lib/cpu/_reg_tools.h                      |   1 +
 reg-lib/cuda/CudaAladinContentCreator.h       |  18 +
 reg-lib/cuda/CudaContentCreator.h             |  15 +
 reg-lib/cuda/CudaContentCreatorFactory.h      |  20 +
 reg-lib/cuda/CudaF3dContentCreator.h          |  17 +
 reg-test/reg_test_blockMatching.cpp           | 312 +++++++---------
 ...est_coherence_affine_deformation_field.cpp |  76 ++--
 reg-test/reg_test_coherence_blockMatching.cpp | 351 ++++++++----------
 reg-test/reg_test_coherence_interpolation.cpp | 129 +++----
 reg-test/reg_test_interpolation.cpp           |   4 +-
 reg-test/reg_test_leastTrimmedSquares.cpp     | 286 +++++++-------
 27 files changed, 775 insertions(+), 805 deletions(-)
 create mode 100644 reg-lib/AladinContentCreator.h
 create mode 100644 reg-lib/ContentCreator.h
 create mode 100644 reg-lib/ContentCreatorFactory.h
 create mode 100644 reg-lib/F3dContentCreator.h
 create mode 100644 reg-lib/cl/ClAladinContentCreator.h
 create mode 100644 reg-lib/cl/ClContentCreatorFactory.h
 create mode 100644 reg-lib/cuda/CudaAladinContentCreator.h
 create mode 100644 reg-lib/cuda/CudaContentCreator.h
 create mode 100644 reg-lib/cuda/CudaContentCreatorFactory.h
 create mode 100644 reg-lib/cuda/CudaF3dContentCreator.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4c5c8078..3f7d1915 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-158
+159
diff --git a/reg-lib/AladinContentCreator.h b/reg-lib/AladinContentCreator.h
new file mode 100644
index 00000000..58d42853
--- /dev/null
+++ b/reg-lib/AladinContentCreator.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "AladinContent.h"
+
+class AladinContentCreator: public ContentCreator {
+public:
+    virtual AladinContent* Create(nifti_image *reference,
+                                  nifti_image *floating,
+                                  int *referenceMask = nullptr,
+                                  mat44 *transformationMatrix = nullptr,
+                                  size_t bytes = sizeof(float),
+                                  const unsigned int percentageOfBlocks = 0,
+                                  const unsigned int inlierLts = 0,
+                                  int blockStepSize = 0) {
+        return new AladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
+    }
+};
diff --git a/reg-lib/ContentCreator.h b/reg-lib/ContentCreator.h
new file mode 100644
index 00000000..050bdba8
--- /dev/null
+++ b/reg-lib/ContentCreator.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "Content.h"
+
+class ContentCreator {
+public:
+    virtual Content* Create(nifti_image *reference,
+                            nifti_image *floating,
+                            int *referenceMask = nullptr,
+                            mat44 *transformationMatrix = nullptr,
+                            size_t bytes = sizeof(float)) {
+        return new Content(reference, floating, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
new file mode 100644
index 00000000..575eb8c4
--- /dev/null
+++ b/reg-lib/ContentCreatorFactory.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "AladinContentCreator.h"
+#include "F3dContentCreator.h"
+
+enum class ContentType { Base, Aladin, F3d };
+
+class ContentCreatorFactory {
+public:
+    virtual ContentCreator* Produce(const ContentType& conType) {
+        switch (conType) {
+        case ContentType::Aladin:
+            return new AladinContentCreator();
+        case ContentType::F3d:
+            return new F3dContentCreator();
+        default:
+            return new ContentCreator();
+        }
+    }
+};
diff --git a/reg-lib/F3dContentCreator.h b/reg-lib/F3dContentCreator.h
new file mode 100644
index 00000000..d57657b0
--- /dev/null
+++ b/reg-lib/F3dContentCreator.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "F3dContent.h"
+
+class F3dContentCreator: public ContentCreator {
+public:
+    virtual F3dContent* Create(nifti_image *reference,
+                               nifti_image *floating,
+                               nifti_image *controlPointGrid,
+                               nifti_image *localWeightSim = nullptr,
+                               int *referenceMask = nullptr,
+                               mat44 *transformationMatrix = nullptr,
+                               size_t bytes = sizeof(float)) {
+        return new F3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 06aac408..87e4aece 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,42 +1,105 @@
 #include "Platform.h"
 #include "CpuKernelFactory.h"
 #ifdef _USE_CUDA
-#include "CudaKernelFactory.h"
+#include "CudaContextSingleton.h"
 #include "CudaF3dContent.h"
 #include "CudaComputeFactory.h"
-#include "CudaContextSingleton.h"
+#include "CudaContentCreatorFactory.h"
+#include "CudaKernelFactory.h"
 #include "CudaMeasureFactory.h"
 #include "_reg_optimiser_gpu.h"
 #endif
 #ifdef _USE_OPENCL
-#include "ClKernelFactory.h"
-#include "ClComputeFactory.h"
 #include "ClContextSingleton.h"
+#include "ClComputeFactory.h"
+#include "ClContentCreatorFactory.h"
+#include "ClKernelFactory.h"
 #endif
 
 /* *************************************************************** */
 Platform::Platform(const PlatformType& platformTypeIn) {
     platformType = platformTypeIn;
     if (platformType == PlatformType::Cpu) {
-        kernelFactory = new CpuKernelFactory();
         computeFactory = new ComputeFactory();
+        contentCreatorFactory = new ContentCreatorFactory();
+        kernelFactory = new CpuKernelFactory();
         measureFactory = new MeasureFactory();
         platformName = "cpu_platform";
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
-        kernelFactory = new CudaKernelFactory();
         computeFactory = new CudaComputeFactory();
+        contentCreatorFactory = new CudaContentCreatorFactory();
+        kernelFactory = new CudaKernelFactory();
         measureFactory = new CudaMeasureFactory();
         platformName = "cuda_platform";
     }
 #endif
 #ifdef _USE_OPENCL
     else if (platformType == PlatformType::OpenCl) {
-        kernelFactory = new ClKernelFactory();
         computeFactory = new ClComputeFactory();
+        contentCreatorFactory = new ClContentCreatorFactory();
+        kernelFactory = new ClKernelFactory();
         platformName = "cl_platform";
     }
+#endif
+    else {
+        reg_print_fct_error("Platform::Platform");
+        reg_print_msg_error("Unsupported platform type");
+        reg_exit();
+    }
+}
+/* *************************************************************** */
+Platform::~Platform() {
+    delete computeFactory;
+    delete contentCreatorFactory;
+    delete kernelFactory;
+    delete measureFactory;
+}
+/* *************************************************************** */
+std::string Platform::GetName() const {
+    return platformName;
+}
+/* *************************************************************** */
+PlatformType Platform::GetPlatformType() const {
+    return platformType;
+}
+/* *************************************************************** */
+unsigned int Platform::GetGpuIdx() const {
+    return gpuIdx;
+}
+/* *************************************************************** */
+void Platform::SetGpuIdx(unsigned gpuIdxIn) {
+    if (platformType == PlatformType::Cpu) {
+        gpuIdx = 999;
+    }
+#ifdef _USE_CUDA
+    else if (platformType == PlatformType::Cuda) {
+        CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
+        if (gpuIdxIn != 999) {
+            gpuIdx = gpuIdxIn;
+            cudaContext->SetCudaIdx(gpuIdxIn);
+        }
+    }
+#endif
+#ifdef _USE_OPENCL
+    else if (platformType == PlatformType::OpenCl) {
+        ClContextSingleton *sContext = &ClContextSingleton::Instance();
+        if (gpuIdxIn != 999) {
+            gpuIdx = gpuIdxIn;
+            sContext->SetClIdx(gpuIdxIn);
+        }
+
+        std::size_t paramValueSize;
+        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
+        cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
+        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
+        if (CL_DEVICE_TYPE_CPU == *field) {
+            reg_print_fct_error("Platform::setClIdx");
+            reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
+            reg_exit();
+        }
+    }
 #endif
 }
 /* *************************************************************** */
@@ -44,10 +107,18 @@ Compute* Platform::CreateCompute(Content& con) const {
     return computeFactory->Produce(con);
 }
 /* *************************************************************** */
+ContentCreator* Platform::CreateContentCreator(const ContentType& conType) const {
+    return contentCreatorFactory->Produce(conType);
+}
+/* *************************************************************** */
 Kernel* Platform::CreateKernel(const std::string& name, Content *con) const {
     return kernelFactory->Produce(name, con);
 }
 /* *************************************************************** */
+Measure* Platform::CreateMeasure() const {
+    return measureFactory->Produce();
+}
+/* *************************************************************** */
 template<typename Type>
 reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
                                                InterfaceOptimiser& opt,
@@ -103,58 +174,3 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
 template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
 template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
 /* *************************************************************** */
-Measure* Platform::CreateMeasure() const {
-    return measureFactory->Produce();
-}
-/* *************************************************************** */
-std::string Platform::GetName() const {
-    return platformName;
-}
-/* *************************************************************** */
-unsigned int Platform::GetGpuIdx() const {
-    return gpuIdx;
-}
-/* *************************************************************** */
-void Platform::SetGpuIdx(unsigned gpuIdxIn) {
-    if (platformType == PlatformType::Cpu) {
-        gpuIdx = 999;
-    }
-#ifdef _USE_CUDA
-    else if (platformType == PlatformType::Cuda) {
-        CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
-        if (gpuIdxIn != 999) {
-            gpuIdx = gpuIdxIn;
-            cudaContext->SetCudaIdx(gpuIdxIn);
-        }
-    }
-#endif
-#ifdef _USE_OPENCL
-    else if (platformType == PlatformType::OpenCl) {
-        ClContextSingleton *sContext = &ClContextSingleton::Instance();
-        if (gpuIdxIn != 999) {
-            gpuIdx = gpuIdxIn;
-            sContext->SetClIdx(gpuIdxIn);
-        }
-
-        std::size_t paramValueSize;
-        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
-        cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
-        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
-        if (CL_DEVICE_TYPE_CPU == *field) {
-            reg_print_fct_error("Platform::setClIdx");
-            reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
-            reg_exit();
-        }
-    }
-#endif
-}
-/* *************************************************************** */
-PlatformType Platform::GetPlatformType() const {
-    return platformType;
-}
-/* *************************************************************** */
-Platform::~Platform() {
-    delete kernelFactory;
-    delete computeFactory;
-}
-/* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index faff5757..7d7f9b37 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -1,8 +1,9 @@
 #pragma once
 
 #include "F3dContent.h"
-#include "KernelFactory.h"
 #include "ComputeFactory.h"
+#include "ContentCreatorFactory.h"
+#include "KernelFactory.h"
 #include "MeasureFactory.h"
 #include "_reg_optimiser.h"
 
@@ -11,10 +12,17 @@ enum class PlatformType { Cpu, Cuda, OpenCl };
 class Platform {
 public:
     Platform(const PlatformType& platformTypeIn);
-    virtual ~Platform();
+    ~Platform();
+
+    std::string GetName() const;
+    PlatformType GetPlatformType() const;
+    unsigned int GetGpuIdx() const;
+    void SetGpuIdx(unsigned gpuIdxIn);
 
     Compute* CreateCompute(Content& con) const;
+    ContentCreator* CreateContentCreator(const ContentType& conType = ContentType::Base) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
+    Measure* CreateMeasure() const;
     template<typename Type>
     reg_optimiser<Type>* CreateOptimiser(F3dContent& con,
                                          InterfaceOptimiser& opt,
@@ -24,16 +32,11 @@ class Platform {
                                          bool optimiseY,
                                          bool optimiseZ,
                                          F3dContent *conBw = nullptr) const;
-    Measure* CreateMeasure() const;
-
-    std::string GetName() const;
-    PlatformType GetPlatformType() const;
-    void SetGpuIdx(unsigned gpuIdxIn);
-    unsigned int GetGpuIdx() const;
 
 private:
-    KernelFactory *kernelFactory = nullptr;
     ComputeFactory *computeFactory = nullptr;
+    ContentCreatorFactory *contentCreatorFactory = nullptr;
+    KernelFactory *kernelFactory = nullptr;
     MeasureFactory *measureFactory = nullptr;
     std::string platformName;
     PlatformType platformType;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 7001bb61..ff73e6c9 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -450,16 +450,8 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       unsigned int blockPercentage,
                                       unsigned int inlierLts,
                                       unsigned int blockStepSize) {
-    if (this->platformType == PlatformType::Cpu)
-        this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
-#ifdef _USE_CUDA
-    else if (platformType == PlatformType::Cuda)
-        this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
-#endif
-#ifdef _USE_OPENCL
-    else if (platformType == PlatformType::OpenCl)
-        this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
-#endif
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
+    this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
     this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index a07a304e..4abfcd4a 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -33,14 +33,6 @@
 #include "ConvolutionKernel.h"
 #include "AladinContent.h"
 
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#include "InfoDevice.h"
-#endif
-
 /**
  * @brief Block matching registration class
  *
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 0aa51218..7ea18cfa 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -239,26 +239,18 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                         unsigned int inlierLts,
                         unsigned int blockStepSize)
 {
-    reg_aladin<T>::InitAladinContent(ref,
-                               flo,
-                               mask,
-                               transMat,
-                               bytes,
-                               blockPercentage,
-                               inlierLts,
-                               blockStepSize);
+   reg_aladin<T>::InitAladinContent(ref,
+                              flo,
+                              mask,
+                              transMat,
+                              bytes,
+                              blockPercentage,
+                              inlierLts,
+                              blockStepSize);
 
-  if (this->platformType == PlatformType::Cpu)
-  this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
-#ifdef _USE_CUDA
-  else if (this->platformType == PlatformType::Cuda)
-  this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
-#endif
-#ifdef _USE_OPENCL
-  else if (this->platformType == PlatformType::OpenCl)
-  this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
-#endif
-  this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
+   std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
+   this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
+   this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template <class T>
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 55ca713d..d5412c5e 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -13,11 +13,7 @@
 #include "_reg_f3d.h"
 #include "F3dContent.h"
 
-#ifdef _USE_CUDA
-#include "CudaF3dContent.h"
-#endif
-
- /* *************************************************************** */
+/* *************************************************************** */
 template<class T>
 reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
     reg_base<T>::reg_base(refTimePoint, floTimePoint) {
@@ -110,12 +106,8 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    if (this->platformType == PlatformType::Cpu)
-        this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
-#ifdef _USE_CUDA
-    else if (this->platformType == PlatformType::Cuda)
-        this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
-#endif
+    std::unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
+    this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
     this->compute = this->platform->CreateCompute(*this->con);
 }
 /* *************************************************************** */
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 27186c8b..3ef13cd5 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -14,7 +14,7 @@
 
 #include "_reg_base.h"
 
- /// @brief Fast Free Form Deformation registration class
+/// @brief Fast Free Form Deformation registration class
 template <class T>
 class reg_f3d: public reg_base<T> {
 protected:
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 7b7a625b..2128bc23 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -13,10 +13,6 @@
 #include "_reg_f3d2.h"
 #include "F3dContent.h"
 
-#ifdef _USE_CUDA
-#include "CudaF3dContent.h"
-#endif
-
 /* *************************************************************** */
 template <class T>
 reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
@@ -88,12 +84,8 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    if (this->platformType == PlatformType::Cpu)
-        conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
-#ifdef _USE_CUDA
-    else if (this->platformType == PlatformType::Cuda)
-        conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
-#endif
+    std::unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
+    conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
     computeBw = this->platform->CreateCompute(*conBw);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClAladinContentCreator.h b/reg-lib/cl/ClAladinContentCreator.h
new file mode 100644
index 00000000..a1f2f5fe
--- /dev/null
+++ b/reg-lib/cl/ClAladinContentCreator.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "AladinContentCreator.h"
+#include "ClAladinContent.h"
+
+class ClAladinContentCreator: public AladinContentCreator {
+public:
+    virtual AladinContent* Create(nifti_image *reference,
+                                  nifti_image *floating,
+                                  int *referenceMask = nullptr,
+                                  mat44 *transformationMatrix = nullptr,
+                                  size_t bytes = sizeof(float),
+                                  const unsigned int percentageOfBlocks = 0,
+                                  const unsigned int inlierLts = 0,
+                                  int blockStepSize = 0) override {
+        return new ClAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
+    }
+};
diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h
new file mode 100644
index 00000000..b80c687e
--- /dev/null
+++ b/reg-lib/cl/ClContentCreatorFactory.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "ContentCreatorFactory.h"
+#include "ClAladinContentCreator.h"
+
+class ClContentCreatorFactory: public ContentCreatorFactory {
+public:
+    virtual ContentCreator* Produce(const ContentType& conType) override {
+        switch (conType) {
+        case ContentType::Aladin:
+            return new ClAladinContentCreator();
+        default:
+            reg_print_fct_error("ClContentFactory::Produce");
+            reg_print_msg_error("Unsupported content type");
+            reg_exit();
+        }
+    }
+};
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 94cbd6de..e5b42432 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -720,7 +720,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                   yVoxel>=0 && yVoxel<=deformationField->ny-1)
             {
 
-               // The control point postions are extracted
+               // The control point positions are extracted
                if(oldXpre!=xPre || oldYpre!=yPre)
                {
 #ifdef _USE_SSE
@@ -899,7 +899,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                   tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX );
                   tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY );
                }
-               //the values stored in SSE variables are transfered to normal float
+               //the values stored in SSE variables are transferred to normal float
                val.m=tempX;
                xReal=val.f[0]+val.f[1]+val.f[2]+val.f[3];
                val.m=tempY;
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 0b0a5c37..aa419d7d 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -16,6 +16,7 @@
 
 #include <fstream>
 #include <map>
+#include <memory>
 #include "_reg_maths.h"
 
 typedef enum {
diff --git a/reg-lib/cuda/CudaAladinContentCreator.h b/reg-lib/cuda/CudaAladinContentCreator.h
new file mode 100644
index 00000000..278e6f1f
--- /dev/null
+++ b/reg-lib/cuda/CudaAladinContentCreator.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "AladinContentCreator.h"
+#include "CudaAladinContent.h"
+
+class CudaAladinContentCreator: public AladinContentCreator {
+public:
+    virtual AladinContent* Create(nifti_image *reference,
+                                  nifti_image *floating,
+                                  int *referenceMask = nullptr,
+                                  mat44 *transformationMatrix = nullptr,
+                                  size_t bytes = sizeof(float),
+                                  const unsigned int percentageOfBlocks = 0,
+                                  const unsigned int inlierLts = 0,
+                                  int blockStepSize = 0) override {
+        return new CudaAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
+    }
+};
diff --git a/reg-lib/cuda/CudaContentCreator.h b/reg-lib/cuda/CudaContentCreator.h
new file mode 100644
index 00000000..2bd82113
--- /dev/null
+++ b/reg-lib/cuda/CudaContentCreator.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "CudaContent.h"
+
+class CudaContentCreator: public ContentCreator {
+public:
+    virtual Content* Create(nifti_image *reference,
+                            nifti_image *floating,
+                            int *referenceMask = nullptr,
+                            mat44 *transformationMatrix = nullptr,
+                            size_t bytes = sizeof(float)) override {
+        return new CudaContent(reference, floating, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h
new file mode 100644
index 00000000..a70bbe57
--- /dev/null
+++ b/reg-lib/cuda/CudaContentCreatorFactory.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "ContentCreatorFactory.h"
+#include "CudaContentCreator.h"
+#include "CudaAladinContentCreator.h"
+#include "CudaF3dContentCreator.h"
+
+class CudaContentCreatorFactory: public ContentCreatorFactory {
+public:
+    virtual ContentCreator* Produce(const ContentType& conType) override {
+        switch (conType) {
+        case ContentType::Aladin:
+            return new CudaAladinContentCreator();
+        case ContentType::F3d:
+            return new CudaF3dContentCreator();
+        default:
+            return new CudaContentCreator();
+        }
+    }
+};
diff --git a/reg-lib/cuda/CudaF3dContentCreator.h b/reg-lib/cuda/CudaF3dContentCreator.h
new file mode 100644
index 00000000..3e741eb6
--- /dev/null
+++ b/reg-lib/cuda/CudaF3dContentCreator.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "F3dContentCreator.h"
+#include "CudaF3dContent.h"
+
+class CudaF3dContentCreator: public F3dContentCreator {
+public:
+    virtual F3dContent* Create(nifti_image *reference,
+                               nifti_image *floating,
+                               nifti_image *controlPointGrid,
+                               nifti_image *localWeightSim = nullptr,
+                               int *referenceMask = nullptr,
+                               mat44 *transformationMatrix = nullptr,
+                               size_t bytes = sizeof(float)) override {
+        return new CudaF3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index cab1b6c6..ab5a8fef 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -6,16 +6,7 @@
 
 #include "BlockMatchingKernel.h"
 #include "Platform.h"
-
 #include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
-
-#include <algorithm>
 
 #define EPS 0.000001
 
@@ -24,195 +15,168 @@ void check_matching_difference(int dim,
                                float* warpedPosition,
                                float* expectedReferencePositions,
                                float* expectedWarpedPosition,
-                               float &max_difference)
-{
-   float difference;
-   for (int i = 0; i < dim; ++i) {
-      difference = fabsf(referencePosition[i] - expectedReferencePositions[i]);
-      max_difference = std::max(difference, max_difference);
-      if (difference > EPS){
+                               float &max_difference) {
+    float difference;
+    for (int i = 0; i < dim; ++i) {
+        difference = fabsf(referencePosition[i] - expectedReferencePositions[i]);
+        max_difference = std::max(difference, max_difference);
+        if (difference > EPS) {
 #ifndef NDEBUG
-         fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
-         if(dim==2){
-            fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
-                    referencePosition[0], referencePosition[1],
-                  expectedReferencePositions[0], expectedReferencePositions[1]);
-            fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
-                    warpedPosition[0], warpedPosition[1],
-                  expectedWarpedPosition[0], expectedWarpedPosition[1]);
-         }
-         else{
-            fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
-                    referencePosition[0], referencePosition[1], referencePosition[2],
-                  expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
-            fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
-                    warpedPosition[0], warpedPosition[1], warpedPosition[2],
-                  expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
-         }
-         reg_exit();
+            fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
+            if (dim == 2) {
+                fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
+                        referencePosition[0], referencePosition[1],
+                        expectedReferencePositions[0], expectedReferencePositions[1]);
+                fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
+                        warpedPosition[0], warpedPosition[1],
+                        expectedWarpedPosition[0], expectedWarpedPosition[1]);
+            } else {
+                fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
+                        referencePosition[0], referencePosition[1], referencePosition[2],
+                        expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
+                fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
+                        warpedPosition[0], warpedPosition[1], warpedPosition[2],
+                        expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
+            }
+            reg_exit();
 #endif
-      }
-      difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]);
-      max_difference = std::max(difference, max_difference);
-      if (difference > EPS){
+        }
+        difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]);
+        max_difference = std::max(difference, max_difference);
+        if (difference > EPS) {
 #ifndef NDEBUG
-         fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
-         if(dim==2){
-            fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
-                    referencePosition[0], referencePosition[1],
-                  expectedReferencePositions[0], expectedReferencePositions[1]);
-            fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
-                    warpedPosition[0], warpedPosition[1],
-                  expectedWarpedPosition[0], expectedWarpedPosition[1]);
-         }
-         else{
-            fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
-                    referencePosition[0], referencePosition[1], referencePosition[2],
-                  expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
-            fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
-                    warpedPosition[0], warpedPosition[1], warpedPosition[2],
-                  expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
-         }
-         reg_exit();
+            fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
+            if (dim == 2) {
+                fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
+                        referencePosition[0], referencePosition[1],
+                        expectedReferencePositions[0], expectedReferencePositions[1]);
+                fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
+                        warpedPosition[0], warpedPosition[1],
+                        expectedWarpedPosition[0], expectedWarpedPosition[1]);
+            } else {
+                fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
+                        referencePosition[0], referencePosition[1], referencePosition[2],
+                        expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
+                fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
+                        warpedPosition[0], warpedPosition[1], warpedPosition[2],
+                        expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
+            }
+            reg_exit();
 #endif
-      }
-   }
+        }
+    }
 }
 
-void test(AladinContent *con, PlatformType platformType) {
-
-   Platform *platform = new Platform(platformType);
-
-   Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
-   blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
-
-   delete blockMatchingKernel;
-   delete platform;
+void test(AladinContent *con, Platform *platform) {
+    std::unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
+    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 }
 
-int main(int argc, char **argv)
-{
-
-   if (argc != 5) {
-      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <expectedBlockMatchingMatrix> <platformType>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName = argv[1];
-   char *inputWarpedImageName = argv[2];
-   char *expectedBlockMatchingMatrixName = argv[3];
-   PlatformType platformType{atoi(argv[4])};
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if (referenceImage == nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   //dim
-   int imgDim = referenceImage->dim[0];
-
-   // Read the input floating image
-   nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-   if (warpedImage == nullptr){
-      reg_print_msg_error("The input warped image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(warpedImage);
-
-   // Read the expected block matching matrix
-   std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName);
-   size_t m = inputMatrixSize.first;
-   size_t n = inputMatrixSize.second;
-   float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile<float>(expectedBlockMatchingMatrixName, m, n);
-
-   // Create a mask
-   int *mask = (int *)malloc(referenceImage->nvox*sizeof(int));
-   for (size_t i = 0; i < referenceImage->nvox; ++i) {
-      mask[i] = i;
-   }
-
-   _reg_blockMatchingParam* blockMatchingParams;
-
-   // Platforms
-   AladinContent *con = nullptr;
-   if (platformType == PlatformType::Cpu) {
-      con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   }
-#ifdef _USE_CUDA
-   else if (platformType == PlatformType::Cuda) {
-      con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   }
-#endif
-#ifdef _USE_OPENCL
-   else if (platformType == PlatformType::OpenCl) {
-      con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   }
-#endif
-   else {
-      reg_print_msg_error("The platform code is not suppoted");
-      return EXIT_FAILURE;
-   }
-   con->SetWarped(warpedImage);
-   //con->SetWarped(referenceImage);
-   test(con, platformType);
-   blockMatchingParams = con->GetBlockMatchingParams();
+int main(int argc, char **argv) {
+
+    if (argc != 5) {
+        fprintf(stderr, "Usage: %s <refImage> <warpedImage> <expectedBlockMatchingMatrix> <platformType>\n", argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    char *inputRefImageName = argv[1];
+    char *inputWarpedImageName = argv[2];
+    char *expectedBlockMatchingMatrixName = argv[3];
+    PlatformType platformType{ atoi(argv[4]) };
+
+    // Read the input reference image
+    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
+    if (referenceImage == nullptr) {
+        reg_print_msg_error("The input reference image could not be read");
+        return EXIT_FAILURE;
+    }
+    reg_tools_changeDatatype<float>(referenceImage);
+    //dim
+    int imgDim = referenceImage->dim[0];
+
+    // Read the input floating image
+    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
+    if (warpedImage == nullptr) {
+        reg_print_msg_error("The input warped image could not be read");
+        return EXIT_FAILURE;
+    }
+    reg_tools_changeDatatype<float>(warpedImage);
+
+    // Read the expected block matching matrix
+    std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName);
+    size_t m = inputMatrixSize.first;
+    size_t n = inputMatrixSize.second;
+    float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile<float>(expectedBlockMatchingMatrixName, m, n);
+
+    // Create a mask
+    int *mask = (int *)malloc(referenceImage->nvox * sizeof(int));
+    for (size_t i = 0; i < referenceImage->nvox; ++i) {
+        mask[i] = i;
+    }
+
+    _reg_blockMatchingParam* blockMatchingParams;
+
+    // Platforms
+    std::unique_ptr<Platform> platform{ new Platform(platformType) };
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+    std::unique_ptr<AladinContent> con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    con->SetWarped(warpedImage);
+    //con->SetWarped(referenceImage);
+    test(con.get(), platform.get());
+    blockMatchingParams = con->GetBlockMatchingParams();
 
 #ifndef NDEBUG
-   std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl;
+    std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl;
 #endif
 
-   float max_difference = 0;
+    float max_difference = 0;
 
-   int blockIndex = 0;
-   int positionIndex = 0;
-   int matrixIndex = 0;
+    int blockIndex = 0;
+    int positionIndex = 0;
+    int matrixIndex = 0;
 
-   unsigned int zMax = 2;
-   if (imgDim == 3)
-      zMax = blockMatchingParams->blockNumber[2] - 1;
+    unsigned int zMax = 2;
+    if (imgDim == 3)
+        zMax = blockMatchingParams->blockNumber[2] - 1;
 
 
-   for (unsigned int z = 1; z < zMax; z += 3) {
-      for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) {
-         for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) {
+    for (unsigned int z = 1; z < zMax; z += 3) {
+        for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) {
+            for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) {
 
-            if (imgDim == 3) {
-               blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x;
-            }
-            else {
-               blockIndex = y * blockMatchingParams->blockNumber[0] + x;
-            }
+                if (imgDim == 3) {
+                    blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x;
+                } else {
+                    blockIndex = y * blockMatchingParams->blockNumber[0] + x;
+                }
 
-            positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex];
+                positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex];
 
-            if (positionIndex > -1) {
-               check_matching_difference(imgDim,
-                                         &blockMatchingParams->referencePosition[positionIndex],
-                                         &blockMatchingParams->warpedPosition[positionIndex],
-                                         &expectedBlockMatchingMatrix[matrixIndex][0],
-                     &expectedBlockMatchingMatrix[matrixIndex][3],
-                     max_difference);
-               matrixIndex++;
+                if (positionIndex > -1) {
+                    check_matching_difference(imgDim,
+                                              &blockMatchingParams->referencePosition[positionIndex],
+                                              &blockMatchingParams->warpedPosition[positionIndex],
+                                              &expectedBlockMatchingMatrix[matrixIndex][0],
+                                              &expectedBlockMatchingMatrix[matrixIndex][3],
+                                              max_difference);
+                    matrixIndex++;
+                }
             }
-         }
-      }
-   }
+        }
+    }
 
-   delete con;
-   free(mask);
-   reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix);
-   nifti_image_free(referenceImage);
+    free(mask);
+    reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix);
+    nifti_image_free(referenceImage);
 
-   if(max_difference>EPS){
+    if (max_difference > EPS) {
 #ifndef NDEBUG
-      fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
+        fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
 #endif
-      return EXIT_FAILURE;
-   }
+        return EXIT_FAILURE;
+    }
 #ifndef NDEBUG
-   printf("All good (%g<%g)\n", max_difference, EPS);
+    printf("All good (%g<%g)\n", max_difference, EPS);
 #endif
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index daddd286..96b83577 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -6,32 +6,17 @@
 #include "Kernel.h"
 #include "AffineDeformationFieldKernel.h"
 #include "Platform.h"
-
 #include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
 
 #define EPS 0.000001
 #define EPS_SINGLE 0.0001
 
-void test(AladinContent *con, int platformType) {
-
-    Platform *platform = new Platform(platformType);
-
-    Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
+void test(AladinContent *con, Platform *platform) {
+    unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con) };
     affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-
-    delete affineDeformKernel;
-    delete platform;
 }
 
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
     if (argc != 5) {
         fprintf(stderr, "Usage: %s <refImage> <inputMatrix> <expectedField> <platformType>\n", argv[0]);
         return EXIT_FAILURE;
@@ -40,7 +25,7 @@ int main(int argc, char **argv)
     char *inputRefImageName = argv[1];
     char *inputMatFileName = argv[2];
     char *inputDefImageName = argv[3];
-    PlatformType platformType{atoi(argv[4])};
+    PlatformType platformType{ atoi(argv[4]) };
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
@@ -54,73 +39,60 @@ int main(int argc, char **argv)
 
     // Read the input deformation field image image
     nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if (inputDeformationField == nullptr){
+    if (inputDeformationField == nullptr) {
         reg_print_msg_error("The input deformation field image could not be read");
         return EXIT_FAILURE;
     }
     // Check the dimension of the input images
     if (referenceImage->nx != inputDeformationField->nx ||
-            referenceImage->ny != inputDeformationField->ny ||
-            referenceImage->nz != inputDeformationField->nz ||
-            (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu){
+        referenceImage->ny != inputDeformationField->ny ||
+        referenceImage->nz != inputDeformationField->nz ||
+        (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) {
         reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
         return EXIT_FAILURE;
     }
 
     // Create a deformation field
     nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_cpu->data = (void *) malloc(test_field_cpu->nvox*test_field_cpu->nbyper);
+    test_field_cpu->data = (void *)malloc(test_field_cpu->nvox * test_field_cpu->nbyper);
 
     nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_gpu->data = (void *) malloc(test_field_gpu->nvox*test_field_gpu->nbyper);
+    test_field_gpu->data = (void *)malloc(test_field_gpu->nvox * test_field_gpu->nbyper);
 
     // Compute the affine deformation field
-    AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
-    AladinContent *con_gpu = nullptr;
-#ifdef _USE_CUDA
-    if (platformType == PlatformType::Cuda) {
-        con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
-    }
-#endif
-#ifdef _USE_OPENCL
-    if (platformType == PlatformType::OpenCl) {
-        con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float));
-    }
-#endif
-    if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
-       reg_print_msg_error("Unexpected platform code");
-       return EXIT_FAILURE;
-    }
+    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    std::unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
+    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
+
     //Check if the platform used is double capable
-    bool isDouble = con_gpu->IsCurrentComputationDoubleCapable();
+    bool isDouble = conGpu->IsCurrentComputationDoubleCapable();
     double proper_eps = EPS;
-    if(isDouble == 0) {
+    if (isDouble == 0) {
         proper_eps = EPS_SINGLE;
     }
 
     //CPU or GPU code
     reg_tools_changeDatatype<float>(referenceImage);
-    test(con_cpu, PlatformType::Cpu);
-    test_field_cpu = con_cpu->GetDeformationField();
+    test(conCpu.get(), platformCpu.get());
+    test_field_cpu = conCpu->GetDeformationField();
 
-    test(con_gpu, PlatformType::Cpu);
-    test_field_gpu = con_gpu->GetDeformationField();
+    test(conGpu.get(), platformGpu.get());
+    test_field_gpu = conGpu->GetDeformationField();
 
     // Compute the difference between the computed and inputted deformation field
     nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
-    diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
+    diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper);
     reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
     nifti_image_free(referenceImage);
     nifti_image_free(inputDeformationField);
-
-    delete con_cpu;
-    delete con_gpu;
     free(inputMatrix);
 
-    if (max_difference > proper_eps){
+    if (max_difference > proper_eps) {
         fprintf(stderr, "reg_test_affine_deformation_field error too large: %g (>%g)\n",
                 max_difference, proper_eps);
         return EXIT_FAILURE;
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index f58556a7..3e581b81 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -6,16 +6,7 @@
 
 #include "BlockMatchingKernel.h"
 #include "Platform.h"
-
 #include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
-
-#include <algorithm>
 
 #define EPS 0.000001
 
@@ -24,214 +15,178 @@ void check_matching_difference(int dim,
                                float* cpuWarPos,
                                float* gpuRefPos,
                                float* gpuWarPos,
-                               float &max_difference)
-{
-   bool cpu_finite = cpuWarPos[0]==cpuWarPos[0] ? true : false;
-   bool gpu_finite = gpuWarPos[0]==gpuWarPos[0] ? true : false;
-
-   if(!cpu_finite && !gpu_finite) return;
-
-   if(cpu_finite!=gpu_finite){
-      max_difference = std::numeric_limits<float>::max();
-      return;
-   }
-
-   float difference;
-   for (int i = 0; i < dim; ++i) {
-      difference = fabsf(cpuRefPos[i] - gpuRefPos[i]);
-      max_difference = std::max(difference, max_difference);
-      if (difference > EPS){
+                               float &max_difference) {
+    bool cpu_finite = cpuWarPos[0] == cpuWarPos[0] ? true : false;
+    bool gpu_finite = gpuWarPos[0] == gpuWarPos[0] ? true : false;
+
+    if (!cpu_finite && !gpu_finite) return;
+
+    if (cpu_finite != gpu_finite) {
+        max_difference = std::numeric_limits<float>::max();
+        return;
+    }
+
+    float difference;
+    for (int i = 0; i < dim; ++i) {
+        difference = fabsf(cpuRefPos[i] - gpuRefPos[i]);
+        max_difference = std::max(difference, max_difference);
+        if (difference > EPS) {
 #ifndef NDEBUG
-         fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
-         if(dim==2){
-            fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
-                    cpuRefPos[0], cpuRefPos[1],
-                  gpuRefPos[0], gpuRefPos[1]);
-            fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
-                    cpuWarPos[0], cpuWarPos[1],
-                  gpuWarPos[0], gpuWarPos[1]);
-         }
-         else{
-            fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
-                    cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
-                  gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
-            fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
-                    cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
-                  gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
-         }
-         reg_exit();
+            fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
+            if (dim == 2) {
+                fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
+                        cpuRefPos[0], cpuRefPos[1],
+                        gpuRefPos[0], gpuRefPos[1]);
+                fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
+                        cpuWarPos[0], cpuWarPos[1],
+                        gpuWarPos[0], gpuWarPos[1]);
+            } else {
+                fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
+                        cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
+                        gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
+                fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
+                        cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
+                        gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
+            }
+            reg_exit();
 #endif
-      }
-      difference = fabsf(cpuWarPos[i] - gpuWarPos[i]);
-      max_difference = std::max(difference, max_difference);
-      if (difference > EPS){
+        }
+        difference = fabsf(cpuWarPos[i] - gpuWarPos[i]);
+        max_difference = std::max(difference, max_difference);
+        if (difference > EPS) {
 #ifndef NDEBUG
-         fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
-         if(dim==2){
-            fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
-                    cpuRefPos[0], cpuRefPos[1],
-                  gpuRefPos[0], gpuRefPos[1]);
-            fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
-                    cpuWarPos[0], cpuWarPos[1],
-                  gpuWarPos[0], gpuWarPos[1]);
-         }
-         else{
-            fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
-                    cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
-                  gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
-            fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
-                    cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
-                  gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
-         }
-         reg_exit();
+            fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
+            if (dim == 2) {
+                fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
+                        cpuRefPos[0], cpuRefPos[1],
+                        gpuRefPos[0], gpuRefPos[1]);
+                fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
+                        cpuWarPos[0], cpuWarPos[1],
+                        gpuWarPos[0], gpuWarPos[1]);
+            } else {
+                fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
+                        cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
+                        gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
+                fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
+                        cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
+                        gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
+            }
+            reg_exit();
 #endif
-      }
-   }
+        }
+    }
 }
 
-void test(AladinContent *con, int platformType) {
-
-   Platform *platform = new Platform(platformType);
-
-   Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con);
-   blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
-
-   delete blockMatchingKernel;
-   delete platform;
+void test(AladinContent *con, Platform *platform) {
+    std::unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
+    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 }
 
-int main(int argc, char **argv)
-{
-
-   if (argc != 4) {
-      fprintf(stderr, "Usage: %s <refImage> <warpedImage> <platformType>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName = argv[1];
-   char *inputWarpedImageName = argv[2];
-   PlatformType platformType{atoi(argv[3])};
-#ifndef _USE_CUDA
-   if(platformType == PlatformType::Cuda){
-      reg_print_msg_error("NiftyReg has not been compiled with CUDA");
-      return EXIT_FAILURE;
-   }
-#endif
-#ifndef _USE_OPENCL
-   if(platformType == PlatformType::OpenCl){
-      reg_print_msg_error("NiftyReg has not been compiled with OpenCL");
-      return EXIT_FAILURE;
-   }
-#endif
-
-   if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
-      reg_print_msg_error("Unexpected platform code");
-      return EXIT_FAILURE;
-   }
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if (referenceImage == nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   //dim
-   int imgDim = referenceImage->dim[0];
-
-   // Read the input floating image
-   nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-   if (warpedImage == nullptr){
-      reg_print_msg_error("The input warped image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(warpedImage);
-
-   // Create a mask
-   int *mask = (int *)malloc(referenceImage->nvox*sizeof(int));
-   for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i;
-
-   // CPU Platform
-   _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr;
-   AladinContent *con_cpu = nullptr;
-   con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   con_cpu->SetWarped(warpedImage);
-   test(con_cpu, PlatformType::Cpu);
-   blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams();
+int main(int argc, char **argv) {
+    if (argc != 4) {
+        fprintf(stderr, "Usage: %s <refImage> <warpedImage> <platformType>\n", argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    char *inputRefImageName = argv[1];
+    char *inputWarpedImageName = argv[2];
+    PlatformType platformType{ atoi(argv[3]) };
+
+    if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) {
+        reg_print_msg_error("Unexpected platform code");
+        return EXIT_FAILURE;
+    }
+
+    // Read the input reference image
+    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
+    if (referenceImage == nullptr) {
+        reg_print_msg_error("The input reference image could not be read");
+        return EXIT_FAILURE;
+    }
+    reg_tools_changeDatatype<float>(referenceImage);
+    //dim
+    int imgDim = referenceImage->dim[0];
+
+    // Read the input floating image
+    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
+    if (warpedImage == nullptr) {
+        reg_print_msg_error("The input warped image could not be read");
+        return EXIT_FAILURE;
+    }
+    reg_tools_changeDatatype<float>(warpedImage);
+
+    // Create a mask
+    int *mask = (int *)malloc(referenceImage->nvox * sizeof(int));
+    for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i;
+
+    // CPU Platform
+    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    std::unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    conCpu->SetWarped(warpedImage);
+    test(conCpu.get(), platformCpu.get());
+    _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
-   std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl;
-   std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl;
+    std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl;
+    std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl;
 #endif
 
-   // GPU Platform
-   AladinContent *con_gpu = nullptr;
-   _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr;
-#ifdef _USE_CUDA
-   if (platformType == PlatformType::Cuda) {
-      con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   }
-#endif
-#ifdef _USE_OPENCL
-   if (platformType == PlatformType::OpenCl) {
-      con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1);
-   }
-#endif
-   con_gpu->SetWarped(warpedImage);
-   test(con_gpu, platformType);
-   blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams();
+    // GPU Platform
+    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    conGpu->SetWarped(warpedImage);
+    test(conGpu.get(), platformGpu.get());
+    _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams();
 
 #ifndef NDEBUG
-   std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl;
-   std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl;
+    std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl;
+    std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl;
 #endif
 
-   float max_difference = 0;
-
-   if(blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber){
-      reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms");
-      char out_text[255];
-      sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber);
-      reg_print_msg_error(out_text);
-      sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber);
-      reg_print_msg_error(out_text);
-      sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber);
-      reg_print_msg_error(out_text);
-      sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber);
-      reg_print_msg_error(out_text);
-      return EXIT_FAILURE;
-   }
-
-   for(int i=0; i<blockMatchingParams_cpu->activeBlockNumber*imgDim; i+=imgDim){
-      check_matching_difference(imgDim,
-                                &blockMatchingParams_cpu->referencePosition[i],
-                                &blockMatchingParams_cpu->warpedPosition[i],
-                                &blockMatchingParams_gpu->referencePosition[i],
-                                &blockMatchingParams_gpu->warpedPosition[i],
-                                max_difference);
-   }
-   size_t test_cpu=0, test_gpu=0;
-   for(int i=0; i<blockMatchingParams_cpu->activeBlockNumber*imgDim; i+=imgDim){
-       test_cpu = (blockMatchingParams_cpu->warpedPosition[i]==blockMatchingParams_cpu->warpedPosition[i])?test_cpu+1:test_cpu;
-       test_gpu = (blockMatchingParams_gpu->warpedPosition[i]==blockMatchingParams_gpu->warpedPosition[i])?test_gpu+1:test_gpu;
-   }
-   printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu);
-
-   delete con_gpu;
-   //delete con_cpu;
-   free(mask);
-   nifti_image_free(referenceImage);
-
-   if(max_difference>EPS){
+    float max_difference = 0;
+
+    if (blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber) {
+        reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms");
+        char out_text[255];
+        sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber);
+        reg_print_msg_error(out_text);
+        sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber);
+        reg_print_msg_error(out_text);
+        sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber);
+        reg_print_msg_error(out_text);
+        sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber);
+        reg_print_msg_error(out_text);
+        return EXIT_FAILURE;
+    }
+
+    for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) {
+        check_matching_difference(imgDim,
+                                  &blockMatchingParams_cpu->referencePosition[i],
+                                  &blockMatchingParams_cpu->warpedPosition[i],
+                                  &blockMatchingParams_gpu->referencePosition[i],
+                                  &blockMatchingParams_gpu->warpedPosition[i],
+                                  max_difference);
+    }
+    size_t test_cpu = 0, test_gpu = 0;
+    for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) {
+        test_cpu = (blockMatchingParams_cpu->warpedPosition[i] == blockMatchingParams_cpu->warpedPosition[i]) ? test_cpu + 1 : test_cpu;
+        test_gpu = (blockMatchingParams_gpu->warpedPosition[i] == blockMatchingParams_gpu->warpedPosition[i]) ? test_gpu + 1 : test_gpu;
+    }
+    printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu);
+
+    free(mask);
+    nifti_image_free(referenceImage);
+
+    if (max_difference > EPS) {
 #ifndef NDEBUG
-      fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
+        fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
 #endif
-      return EXIT_FAILURE;
-   }
+        return EXIT_FAILURE;
+    }
 #ifndef NDEBUG
-   printf("All good (%g<%g)\n", max_difference, EPS);
+    printf("All good (%g<%g)\n", max_difference, EPS);
 #endif
 
-
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index dd879f87..07fbc7d5 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -5,143 +5,104 @@
 #include "ResampleImageKernel.h"
 #include "Platform.h"
 #include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
+
 #define EPS 0.000001
 #define EPS_SINGLE 0.0001
 
-int main(int argc, char **argv)
-{
-    if(argc!=5)
-    {
+int main(int argc, char **argv) {
+    if (argc != 5) {
         fprintf(stderr, "Usage: %s <refImage> <inputDefField> <order> <platformType>\n", argv[0]);
         return EXIT_FAILURE;
     }
 
-    char *inputRefImageName=argv[1];
-    char *inputDefImageName=argv[2];
-    int interpolation=atoi(argv[3]);
-    PlatformType platformType{atoi(argv[4])};
-#ifndef _USE_CUDA
-   if(platformType == PlatformType::Cuda){
-      reg_print_msg_error("NiftyReg has not been compiled with CUDA");
-      return EXIT_FAILURE;
-   }
-#endif
-#ifndef _USE_OPENCL
-   if(platformType == PlatformType::OpenCl){
-      reg_print_msg_error("NiftyReg has not been compiled with OpenCL");
-      return EXIT_FAILURE;
-   }
-#endif
-   if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){
-      reg_print_msg_error("Unexpected platform code");
-      return EXIT_FAILURE;
-   }
+    char *inputRefImageName = argv[1];
+    char *inputDefImageName = argv[2];
+    int interpolation = atoi(argv[3]);
+    PlatformType platformType{ atoi(argv[4]) };
+
+    if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) {
+        reg_print_msg_error("Unexpected platform code");
+        return EXIT_FAILURE;
+    }
 
     // Read the input reference image
     nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if(referenceImage==nullptr){
+    if (referenceImage == nullptr) {
         reg_print_msg_error("The input reference image could not be read");
         return EXIT_FAILURE;
     }
     reg_tools_changeDatatype<float>(referenceImage);
     // Read the input deformation field image image
     nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if(inputDeformationField==nullptr){
+    if (inputDeformationField == nullptr) {
         reg_print_msg_error("The input deformation field image could not be read");
         return EXIT_FAILURE;
     }
     reg_tools_changeDatatype<float>(inputDeformationField);
 
     // Check the dimension of the input images
-    if(referenceImage->nx != inputDeformationField->nx ||
-            referenceImage->ny != inputDeformationField->ny ||
-            referenceImage->nz != inputDeformationField->nz ||
-            (referenceImage->nz>1?3:2) != inputDeformationField->nu){
+    if (referenceImage->nx != inputDeformationField->nx ||
+        referenceImage->ny != inputDeformationField->ny ||
+        referenceImage->nz != inputDeformationField->nz ||
+        (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) {
         reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
         return EXIT_FAILURE;
     }
 
     // Initialise warped images
-    nifti_image *cpu_warped=nifti_copy_nim_info(referenceImage);
-    cpu_warped->data=(void *)malloc(cpu_warped->nvox*cpu_warped->nbyper);
-    nifti_image *gpu_warped=nifti_copy_nim_info(referenceImage);
-    gpu_warped->data=(void *)malloc(gpu_warped->nvox*gpu_warped->nbyper);
+    nifti_image *cpuWarped = nifti_copy_nim_info(referenceImage);
+    cpuWarped->data = malloc(cpuWarped->nvox * cpuWarped->nbyper);
+    nifti_image *gpuWarped = nifti_copy_nim_info(referenceImage);
+    gpuWarped->data = malloc(gpuWarped->nvox * gpuWarped->nbyper);
 
     int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int));
 
     // CPU platform
-    AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float));
-    con_cpu->SetWarped(cpu_warped);
-    con_cpu->SetDeformationField(inputDeformationField);
-    con_cpu->SetReferenceMask(tempMask);
-    Platform *platform_cpu = new Platform(PlatformType::Cpu);
-    Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu);
+    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    std::unique_ptr<AladinContent> conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) };
+    conCpu->SetWarped(cpuWarped);
+    conCpu->SetDeformationField(inputDeformationField);
+    conCpu->SetReferenceMask(tempMask);
+    std::unique_ptr<Kernel> resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) };
     resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
-    delete resampleImageKernel_cpu;
-    delete platform_cpu;
-    cpu_warped = con_cpu->GetWarped();
+    cpuWarped = conCpu->GetWarped();
 
     // GPU platform
-    AladinContent *con_gpu = nullptr;
-#ifdef _USE_CUDA
-    if (platformType == PlatformType::Cuda) {
-        con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
-    }
-#endif
-#ifdef _USE_OPENCL
-    if (platformType == PlatformType::OpenCl) {
-        con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float));
-    }
-#endif
-    con_gpu->SetWarped(gpu_warped);
-    con_gpu->SetDeformationField(inputDeformationField);
-    con_gpu->SetReferenceMask(tempMask);
-    Platform *platform_gpu = nullptr;
-#ifdef _USE_CUDA
-    if (platformType == PlatformType::Cuda)
-       platform_gpu = new Platform(PlatformType::Cuda);
-#endif
-#ifdef _USE_OPENCL
-    if (platformType == PlatformType::OpenCl) {
-       platform_gpu = new Platform(PlatformType::OpenCl);
-    }
-#endif
-    Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu);
+    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) };
+    conGpu->SetWarped(gpuWarped);
+    conGpu->SetDeformationField(inputDeformationField);
+    conGpu->SetReferenceMask(tempMask);
+
+    std::unique_ptr<Kernel> resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) };
     resampleImageKernel_gpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
-    delete resampleImageKernel_gpu;
-    delete platform_gpu;
-    gpu_warped = con_gpu->GetWarped();
+    gpuWarped = conGpu->GetWarped();
 
     //Check if the platform used is double capable
     double proper_eps = EPS;
-    if(con_gpu->IsCurrentComputationDoubleCapable() == 0) {
+    if (conGpu->IsCurrentComputationDoubleCapable() == 0) {
         proper_eps = EPS_SINGLE;
     }
 
     // Compute the difference between the warped images
     nifti_image *diff_field = nifti_copy_nim_info(referenceImage);
-    diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper);
+    diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper);
 
-    // Compute the difference between the computed and inputed warped image
-    reg_tools_subtractImageFromImage(cpu_warped, gpu_warped, diff_field);
+    // Compute the difference between the computed and inputted warped image
+    reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
 
     // free the allocated images
     nifti_image_free(referenceImage);
-    nifti_image_free(cpu_warped);
-    nifti_image_free(gpu_warped);
+    nifti_image_free(cpuWarped);
+    nifti_image_free(gpuWarped);
     nifti_image_free(inputDeformationField);
 
-    if(max_difference>proper_eps){
+    if (max_difference > proper_eps) {
         fprintf(stderr, "reg_test_interpolation error too large: %g (>%g)\n",
                 max_difference, proper_eps);
         return EXIT_FAILURE;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 0c4a8c71..73100254 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -33,7 +33,7 @@ typedef std::tuple<AladinContent*, std::string, PlatformType> content_desc;
 
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
-    int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1};
+    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
     nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2D);
 
@@ -150,7 +150,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 auto *platform = new Platform(plat_value);
                 Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con);
                 // args = interpolation and padding
-                std::list<int> interp = {0, 1, 3};
+                std::list<int> interp = { 0, 1, 3 };
                 for (auto it : interp) {
                     resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
                     warped = con->GetWarped();
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index adb263c7..921c1b2f 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -2,183 +2,145 @@
 #include "_reg_maths.h"
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_globalTrans.h"
-//STD
-#include <algorithm>
-//
+
 #include "OptimiseKernel.h"
 #include "Platform.h"
-
 #include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
 
 #define EPS 0.000001
 
-int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference)
-{
-   for (int i = 0; i < 4; i++) {
-      for (int j = 0; j < 4; j++) {
-         float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]);
-         max_difference = std::max(difference, max_difference);
-         if (difference > EPS){
-            fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n",
-                    name, difference, EPS);
-            return EXIT_FAILURE;
-         }
-      }
-   }
-   return EXIT_SUCCESS;
+int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]);
+            max_difference = std::max(difference, max_difference);
+            if (difference > EPS) {
+                fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n",
+                        name, difference, EPS);
+                return EXIT_FAILURE;
+            }
+        }
+    }
+    return EXIT_SUCCESS;
 }
-void test(AladinContent *con, PlatformType platformType, bool isAffine) {
-
-   Platform *platform = new Platform(platformType);
 
-   Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con);
-   optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
-
-   delete optimiseKernel;
-   delete platform;
+void test(AladinContent *con, Platform *platform, bool isAffine) {
+    std::unique_ptr<Kernel> optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) };
+    optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
 }
 
-int main(int argc, char **argv)
-{
-
-   if (argc != 7) {
-      fprintf(stderr, "Usage: %s <inputPoints1> <inputPoints2> <percentToKeep> <isAffine> <expectedLTSMatrix> <platformType> \n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputMatrix1Filename = argv[1];
-   char *inputMatrix2Filename = argv[2];
-   unsigned int percentToKeep = atoi(argv[3]);
-   bool isAffine = atoi(argv[4]);
-   char *expectedLTSMatrixFilename = argv[5];
-   PlatformType platformType{atoi(argv[6])};
-
-   std::pair<size_t, size_t> inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename);
-   size_t m1 = inputMatrix1Size.first;
-   size_t n1 = inputMatrix1Size.second;
-   std::pair<size_t, size_t> inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename);
-   size_t m2 = inputMatrix2Size.first;
-   size_t n2 = inputMatrix2Size.second;
-
-   if (m1 != m2 || n1 != n2) {
-      fprintf(stderr, "The input matrices must have the same size");
-      return EXIT_FAILURE;
-   }
-
-   float **inputMatrix1 = reg_tool_ReadMatrixFile<float>(inputMatrix1Filename, m1, n1);
-   float **inputMatrix2 = reg_tool_ReadMatrixFile<float>(inputMatrix2Filename, m2, n2);
-   mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename);
-   ////////////////////////
-   // Platforms
-   AladinContent *con = nullptr;
-   if (platformType == PlatformType::Cpu) {
-      con = new AladinContent();
-   }
-#ifdef _USE_CUDA
-   else if (platformType == PlatformType::Cuda) {
-      con = new CudaAladinContent();
-   }
-#endif
-#ifdef _USE_OPENCL
-   else if (platformType == PlatformType::OpenCl) {
-      con = new ClAladinContent();
-   }
-#endif
-   else {
-      reg_print_msg_error("The platform code is not suppoted");
-      return EXIT_FAILURE;
-   }
-   ////////////////////////
-   float max_difference = 0;
-   unsigned int num_points = m1;
-   //I think it is a bit dirty what I am going to do
-   _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam();
-
-   blockMatchingParams->blockNumber[0] = 1;
-   blockMatchingParams->blockNumber[1] = 1;
-
-   blockMatchingParams->totalBlockNumber = num_points;
-   blockMatchingParams->activeBlockNumber = num_points;
-   blockMatchingParams->definedActiveBlockNumber = num_points;
-   blockMatchingParams->percent_to_keep = percentToKeep;
-
-   mat44* test_LTS = (mat44 *)malloc(sizeof(mat44));
-   reg_mat44_eye(test_LTS);
-   con->SetTransformationMatrix(test_LTS);
-
-   //2-D
-   if (n1 == 2) {
-
-      blockMatchingParams->dim = n1;
-      blockMatchingParams->blockNumber[2] = 1;
-      blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
-      blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
-
-      unsigned int compteur = 0;
-      for (unsigned int j = 0; j < num_points; j++) {
-         blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
-         blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
-         blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
-         blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
-         compteur +=n1;
-      }
-   }
-   else if (n1 == 3) {
-
-      blockMatchingParams->dim = n1;
-      blockMatchingParams->blockNumber[2] = 2;
-      blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
-      blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
-      unsigned int compteur = 0;
-      for (unsigned int j = 0; j < num_points; j++) {
-         blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
-         blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
-         blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2];
-         blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
-         blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
-         blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2];
-         compteur +=n1;
-      }
-   }
-   else {
-      fprintf(stderr, "The input matrix dimensions are not supported");
-      return EXIT_FAILURE;
-   }
-
-   con->SetBlockMatchingParams(blockMatchingParams);
-   test(con, platformType, isAffine);
+int main(int argc, char **argv) {
+    if (argc != 7) {
+        fprintf(stderr, "Usage: %s <inputPoints1> <inputPoints2> <percentToKeep> <isAffine> <expectedLTSMatrix> <platformType> \n", argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    char *inputMatrix1Filename = argv[1];
+    char *inputMatrix2Filename = argv[2];
+    unsigned int percentToKeep = atoi(argv[3]);
+    bool isAffine = atoi(argv[4]);
+    char *expectedLTSMatrixFilename = argv[5];
+    PlatformType platformType{ atoi(argv[6]) };
+
+    std::pair<size_t, size_t> inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename);
+    size_t m1 = inputMatrix1Size.first;
+    size_t n1 = inputMatrix1Size.second;
+    std::pair<size_t, size_t> inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename);
+    size_t m2 = inputMatrix2Size.first;
+    size_t n2 = inputMatrix2Size.second;
+
+    if (m1 != m2 || n1 != n2) {
+        fprintf(stderr, "The input matrices must have the same size");
+        return EXIT_FAILURE;
+    }
+
+    float **inputMatrix1 = reg_tool_ReadMatrixFile<float>(inputMatrix1Filename, m1, n1);
+    float **inputMatrix2 = reg_tool_ReadMatrixFile<float>(inputMatrix2Filename, m2, n2);
+    mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename);
+
+    // Platform
+    std::unique_ptr<Platform> platform{ new Platform(platformType) };
+    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+    std::unique_ptr<AladinContent> con{ contentCreator->Create() };
+
+    float max_difference = 0;
+    unsigned int num_points = m1;
+    //I think it is a bit dirty what I am going to do
+    _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam();
+
+    blockMatchingParams->blockNumber[0] = 1;
+    blockMatchingParams->blockNumber[1] = 1;
+
+    blockMatchingParams->totalBlockNumber = num_points;
+    blockMatchingParams->activeBlockNumber = num_points;
+    blockMatchingParams->definedActiveBlockNumber = num_points;
+    blockMatchingParams->percent_to_keep = percentToKeep;
+
+    mat44* test_LTS = (mat44 *)malloc(sizeof(mat44));
+    reg_mat44_eye(test_LTS);
+    con->SetTransformationMatrix(test_LTS);
+
+    //2-D
+    if (n1 == 2) {
+
+        blockMatchingParams->dim = n1;
+        blockMatchingParams->blockNumber[2] = 1;
+        blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
+        blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
+
+        unsigned int compteur = 0;
+        for (unsigned int j = 0; j < num_points; j++) {
+            blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
+            blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
+            blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
+            blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
+            compteur += n1;
+        }
+    } else if (n1 == 3) {
+
+        blockMatchingParams->dim = n1;
+        blockMatchingParams->blockNumber[2] = 2;
+        blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
+        blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
+        unsigned int compteur = 0;
+        for (unsigned int j = 0; j < num_points; j++) {
+            blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
+            blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
+            blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2];
+            blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
+            blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
+            blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2];
+            compteur += n1;
+        }
+    } else {
+        fprintf(stderr, "The input matrix dimensions are not supported");
+        return EXIT_FAILURE;
+    }
+
+    con->SetBlockMatchingParams(blockMatchingParams);
+    test(con.get(), platform.get(), isAffine);
 
 #ifndef NDEBUG
-   if (n1 == 2)
-      reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_2D");
-   else reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_3D");
+    if (n1 == 2)
+        reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_2D");
+    else reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_3D");
 #endif
 
-   if (n1 == 2){
-      if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference))
-         return EXIT_FAILURE;
-   }
-   else{
-      if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference))
-         return EXIT_FAILURE;
-   }
-
-   ////////////////////////
-   // FREE THE MEMORY: ////
-   ////////////////////////
-   delete con;
-   free(expectedLSMatrix);
-   reg_matrix2DDeallocate(m2, inputMatrix2);
-   reg_matrix2DDeallocate(m1, inputMatrix1);
+    if (n1 == 2) {
+        if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 2D affine - rigid", max_difference))
+            return EXIT_FAILURE;
+    } else {
+        if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 3D affine - rigid", max_difference))
+            return EXIT_FAILURE;
+    }
+
+    // Free memory
+    free(expectedLSMatrix);
+    reg_matrix2DDeallocate(m2, inputMatrix2);
+    reg_matrix2DDeallocate(m1, inputMatrix1);
 
 #ifndef NDEBUG
-   fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS);
+    fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS);
 #endif
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }

From babb5e1b60043b81beaa8f2f989ae871286976a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Feb 2023 13:51:43 +0000
Subject: [PATCH 046/314] Add Platform::IsCudaEnabled() and
 Platform::IsOpenClEnabled() functions to ditch use of _USE_CUDA and
 _USE_OPENCL directives

---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_aladin.cpp                       | 1084 ++++++++---------
 reg-apps/reg_f3d.cpp                          |   50 +-
 reg-lib/Platform.cpp                          |    6 +-
 reg-lib/Platform.h                            |   22 +
 .../reg_test_affine_deformation_field.cpp     |  251 ++--
 reg-test/reg_test_interpolation.cpp           |  148 +--
 7 files changed, 699 insertions(+), 864 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3f7d1915..a7625603 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-159
+160
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 24cc3ac5..7d1eb92d 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -15,7 +15,7 @@
 #include "_reg_aladin_sym.h"
 #include "_reg_tools.h"
 #include "reg_aladin.h"
-//#include <libgen.h> //DO NOT WORK ON WINDOWS !
+// #include <libgen.h> //DO NOT WORK ON WINDOWS !
 
 #ifdef _WIN32
 #   include <time.h>
@@ -23,628 +23,526 @@
 
 #define PrecisionTYPE float
 
-void PetitUsage(char *exec)
-{
-   char text[255];
-   reg_print_msg_error("");
-   reg_print_msg_error("reg_aladin");
-   sprintf(text, "Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]",exec);
-   reg_print_msg_error(text);
-   reg_print_msg_error("\tSee the help for more details (-h).");
-   reg_print_msg_error("");
-   return;
+void PetitUsage(char *exec) {
+    char text[255];
+    reg_print_msg_error("");
+    reg_print_msg_error("reg_aladin");
+    sprintf(text, "Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]", exec);
+    reg_print_msg_error(text);
+    reg_print_msg_error("\tSee the help for more details (-h).");
+    reg_print_msg_error("");
+    return;
 }
-void Usage(char *exec)
-{
-   char text[255];
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   reg_print_info(exec, "Block Matching algorithm for global registration.");
-   reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\"");
-   reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003");
-   reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].", exec);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "\t-ref <filename>\tReference image filename (also called Target or Fixed) (mandatory)");
-   reg_print_info(exec, "\t-flo <filename>\tFloating image filename (also called Source or moving) (mandatory)");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "* * OPTIONS * *");
-   reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it.");
-   reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)");
-   reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)");
-
-   reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains the output affine transformation. [outputAffine.txt]");
-   reg_print_info(exec, "\t-inaff <filename>\tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]");
-
-   reg_print_info(exec, "\t-rmask <filename>\tFilename of a mask image in the reference space.");
-   reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space. (Only used when symmetric turned on)");
-   reg_print_info(exec, "\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii]");
-
-   reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]");
-   reg_print_info(exec, "\t-ln <int>\t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]");
-   reg_print_info(exec, "\t-lp <int>\t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]");
-
-   reg_print_info(exec, "\t-smooR <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]");
-   reg_print_info(exec, "\t-smooF <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]");
-   reg_print_info(exec, "\t-refLowThr <float>\tLower threshold value applied to the reference image. [0]");
-   reg_print_info(exec, "\t-refUpThr <float>\tUpper threshold value applied to the reference image. [0]");
-   reg_print_info(exec, "\t-floLowThr <float>\tLower threshold value applied to the floating image. [0]");
-   reg_print_info(exec, "\t-floUpThr <float>\tUpper threshold value applied to the floating image. [0]");
-   reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
-
-   reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)");
-   reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)");
-   reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)");
-   reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image.");
-   reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required.");
-
-   reg_print_info(exec, "\t-pv <int>\t\tPercentage of blocks to use in the optimisation scheme. [50]");
-   reg_print_info(exec, "\t-pi <int>\t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]");
-   reg_print_info(exec, "\t-speeeeed\t\tGo faster");
-#if defined(_USE_CUDA) && defined(_USE_OPENCL)
-   reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
-#else
-#ifdef _USE_CUDA
-   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
-#endif
-#ifdef _USE_OPENCL
-   reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]");
-#endif
-#endif
-#if defined(_USE_CUDA) || defined(_USE_OPENCL)
-   reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
-   reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
-#endif
-//   reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg");
+
+void Usage(char *exec) {
+    char text[255];
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    reg_print_info(exec, "Block Matching algorithm for global registration.");
+    reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\"");
+    reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003");
+    reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].", exec);
+    reg_print_info(exec, text);
+    reg_print_info(exec, "\t-ref <filename>\tReference image filename (also called Target or Fixed) (mandatory)");
+    reg_print_info(exec, "\t-flo <filename>\tFloating image filename (also called Source or moving) (mandatory)");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "* * OPTIONS * *");
+    reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it.");
+    reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)");
+    reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)");
+
+    reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains the output affine transformation. [outputAffine.txt]");
+    reg_print_info(exec, "\t-inaff <filename>\tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]");
+
+    reg_print_info(exec, "\t-rmask <filename>\tFilename of a mask image in the reference space.");
+    reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space. (Only used when symmetric turned on)");
+    reg_print_info(exec, "\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii]");
+
+    reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]");
+    reg_print_info(exec, "\t-ln <int>\t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]");
+    reg_print_info(exec, "\t-lp <int>\t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]");
+
+    reg_print_info(exec, "\t-smooR <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]");
+    reg_print_info(exec, "\t-smooF <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]");
+    reg_print_info(exec, "\t-refLowThr <float>\tLower threshold value applied to the reference image. [0]");
+    reg_print_info(exec, "\t-refUpThr <float>\tUpper threshold value applied to the reference image. [0]");
+    reg_print_info(exec, "\t-floLowThr <float>\tLower threshold value applied to the floating image. [0]");
+    reg_print_info(exec, "\t-floUpThr <float>\tUpper threshold value applied to the floating image. [0]");
+    reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
+
+    reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)");
+    reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)");
+    reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)");
+    reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image.");
+    reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required.");
+
+    reg_print_info(exec, "\t-pv <int>\t\tPercentage of blocks to use in the optimisation scheme. [50]");
+    reg_print_info(exec, "\t-pi <int>\t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]");
+    reg_print_info(exec, "\t-speeeeed\t\tGo faster");
+
+    if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) {
+        reg_print_info(exec, "*** Platform options:");
+        std::string platform = "\t-platf <uint>\t\tChoose platform: CPU=0 | ";
+        if (Platform::IsCudaEnabled()) {
+            platform += "Cuda=1";
+            if (Platform::IsOpenClEnabled())
+                platform += " | ";
+        }
+        if (Platform::IsOpenClEnabled())
+            platform += "OpenCL=2";
+        platform += " [0]";
+        reg_print_info(exec, platform.c_str());
+
+        reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
+        reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
+    }
+
+    //   reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg");
 #if defined (_OPENMP)
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   sprintf(text,"\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
-          defaultOpenMPValue, omp_get_num_procs());
-   reg_print_info(exec, text);
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    sprintf(text, "\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
+            defaultOpenMPValue, omp_get_num_procs());
+    reg_print_info(exec, text);
 #endif
-   reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "\t--version\t\tPrint current version and exit");
-   sprintf(text, "\t\t\t\t(%s)",NR_VERSION);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   return;
+    reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]");
+    reg_print_info(exec, "");
+    reg_print_info(exec, "\t--version\t\tPrint current version and exit");
+    sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
+    reg_print_info(exec, text);
+    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    return;
 }
 
-int main(int argc, char **argv)
-{
-   if(argc==1)
-   {
-      //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS !
-      PetitUsage(argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char text[2048];
-
-   time_t start;
-   time(&start);
-
-   int symFlag=1;
-
-   char *referenceImageName=nullptr;
-   int referenceImageFlag=0;
-
-   char *floatingImageName=nullptr;
-   int floatingImageFlag=0;
-
-   char *outputAffineName=nullptr;
-   int outputAffineFlag=0;
-
-   char *inputAffineName=nullptr;
-   int inputAffineFlag=0;
-
-   char *referenceMaskName=nullptr;
-   int referenceMaskFlag=0;
-
-   char *floatingMaskName=nullptr;
-   int floatingMaskFlag=0;
-
-   char *outputResultName=nullptr;
-   int outputResultFlag=0;
-
-   int maxIter=5;
-   int nLevels=3;
-   int levelsToPerform=std::numeric_limits<int>::max();
-   int affineFlag=1;
-   int rigidFlag=1;
-   int blockStepSize=1;
-   int blockPercentage=50;
-   float inlierLts=50.0f;
-   int alignCentre=1;
-   int alignCentreOfMass=0;
-   int interpolation=1;
-   float floatingSigma=0;
-   float referenceSigma=0;
-
-   float referenceLowerThr=-std::numeric_limits<PrecisionTYPE>::max();
-   float referenceUpperThr=std::numeric_limits<PrecisionTYPE>::max();
-   float floatingLowerThr=-std::numeric_limits<PrecisionTYPE>::max();
-   float floatingUpperThr=std::numeric_limits<PrecisionTYPE>::max();
-   float paddingValue=std::numeric_limits<PrecisionTYPE>::quiet_NaN();
-
-   bool iso=false;
-   bool verbose=true;
-   int captureRangeVox = 3;
-   PlatformType platformType(PlatformType::Cpu);
-   unsigned gpuIdx = 999;
+int main(int argc, char **argv) {
+    if (argc == 1) {
+        //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS !
+        PetitUsage(argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    char text[2048];
+
+    time_t start;
+    time(&start);
+
+    int symFlag = 1;
+
+    char *referenceImageName = nullptr;
+    int referenceImageFlag = 0;
+
+    char *floatingImageName = nullptr;
+    int floatingImageFlag = 0;
+
+    char *outputAffineName = nullptr;
+    int outputAffineFlag = 0;
+
+    char *inputAffineName = nullptr;
+    int inputAffineFlag = 0;
+
+    char *referenceMaskName = nullptr;
+    int referenceMaskFlag = 0;
+
+    char *floatingMaskName = nullptr;
+    int floatingMaskFlag = 0;
+
+    char *outputResultName = nullptr;
+    int outputResultFlag = 0;
+
+    int maxIter = 5;
+    int nLevels = 3;
+    int levelsToPerform = std::numeric_limits<int>::max();
+    int affineFlag = 1;
+    int rigidFlag = 1;
+    int blockStepSize = 1;
+    int blockPercentage = 50;
+    float inlierLts = 50.0f;
+    int alignCentre = 1;
+    int alignCentreOfMass = 0;
+    int interpolation = 1;
+    float floatingSigma = 0;
+    float referenceSigma = 0;
+
+    float referenceLowerThr = -std::numeric_limits<PrecisionTYPE>::max();
+    float referenceUpperThr = std::numeric_limits<PrecisionTYPE>::max();
+    float floatingLowerThr = -std::numeric_limits<PrecisionTYPE>::max();
+    float floatingUpperThr = std::numeric_limits<PrecisionTYPE>::max();
+    float paddingValue = std::numeric_limits<PrecisionTYPE>::quiet_NaN();
+
+    bool iso = false;
+    bool verbose = true;
+    int captureRangeVox = 3;
+    PlatformType platformType(PlatformType::Cpu);
+    unsigned gpuIdx = 999;
 
 #if defined (_OPENMP)
-   // Set the default number of thread
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   omp_set_num_threads(defaultOpenMPValue);
+    // Set the default number of thread
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    omp_set_num_threads(defaultOpenMPValue);
 #endif
 
-   /* read the input parameter */
-   for(int i=1; i<argc; i++)
-   {
-      if(strcmp(argv[i], "-help")==0 || strcmp(argv[i], "-Help")==0 ||
-            strcmp(argv[i], "-HELP")==0 || strcmp(argv[i], "-h")==0 ||
-            strcmp(argv[i], "--h")==0 || strcmp(argv[i], "--help")==0)
-      {
-         Usage(argv[0]);
-         return EXIT_SUCCESS;
-      }
-      else if(strcmp(argv[i], "--xml")==0)
-      {
-         printf("%s",xml_aladin);
-         return EXIT_SUCCESS;
-      }
-      if( strcmp(argv[i], "-version")==0 ||
-            strcmp(argv[i], "-Version")==0 ||
-            strcmp(argv[i], "-V")==0 ||
-            strcmp(argv[i], "-v")==0 ||
-            strcmp(argv[i], "--v")==0 ||
-            strcmp(argv[i], "--version")==0)
-      {
-         printf("%s\n",NR_VERSION);
-         return EXIT_SUCCESS;
-      }
-      else if(strcmp(argv[i], "-ref")==0 || strcmp(argv[i], "-target")==0 || strcmp(argv[i], "--ref")==0)
-      {
-         referenceImageName=argv[++i];
-         referenceImageFlag=1;
-      }
-      else if(strcmp(argv[i], "-flo")==0 || strcmp(argv[i], "-source")==0 || strcmp(argv[i], "--flo")==0)
-      {
-         floatingImageName=argv[++i];
-         floatingImageFlag=1;
-      }
-
-      else if(strcmp(argv[i], "-noSym")==0 || strcmp(argv[i], "--noSym")==0)
-      {
-         symFlag=0;
-      }
-      else if(strcmp(argv[i], "-aff")==0 || strcmp(argv[i], "--aff")==0)
-      {
-         outputAffineName=argv[++i];
-         outputAffineFlag=1;
-      }
-      else if(strcmp(argv[i], "-inaff")==0 || strcmp(argv[i], "--inaff")==0)
-      {
-         inputAffineName=argv[++i];
-         inputAffineFlag=1;
-      }
-      else if(strcmp(argv[i], "-rmask")==0 || strcmp(argv[i], "-tmask")==0 || strcmp(argv[i], "--rmask")==0)
-      {
-         referenceMaskName=argv[++i];
-         referenceMaskFlag=1;
-      }
-      else if(strcmp(argv[i], "-fmask")==0 || strcmp(argv[i], "-smask")==0 || strcmp(argv[i], "--fmask")==0)
-      {
-         floatingMaskName=argv[++i];
-         floatingMaskFlag=1;
-      }
-      else if(strcmp(argv[i], "-res")==0 || strcmp(argv[i], "-result")==0 || strcmp(argv[i], "--res")==0)
-      {
-         outputResultName=argv[++i];
-         outputResultFlag=1;
-      }
-      else if(strcmp(argv[i], "-maxit")==0 || strcmp(argv[i], "--maxit")==0)
-      {
-         maxIter = atoi(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-ln")==0 || strcmp(argv[i], "--ln")==0)
-      {
-         nLevels=atoi(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-lp")==0 || strcmp(argv[i], "--lp")==0)
-      {
-         levelsToPerform=atoi(argv[++i]);
-      }
-
-      else if(strcmp(argv[i], "-smooR")==0 || strcmp(argv[i], "-smooT")==0 || strcmp(argv[i], "--smooR")==0)
-      {
-         referenceSigma = (float)(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-smooF")==0 || strcmp(argv[i], "-smooS")==0 || strcmp(argv[i], "--smooF")==0)
-      {
-         floatingSigma=(float)(atof(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-rigOnly")==0 || strcmp(argv[i], "--rigOnly")==0)
-      {
-         rigidFlag=1;
-         affineFlag=0;
-      }
-      else if(strcmp(argv[i], "-affDirect")==0 || strcmp(argv[i], "--affDirect")==0)
-      {
-         rigidFlag=0;
-         affineFlag=1;
-      }
-      else if(strcmp(argv[i], "-nac")==0 || strcmp(argv[i], "--nac")==0)
-      {
-         alignCentre=0;
-      }
-	  else if (strcmp(argv[i], "-comm") == 0 || strcmp(argv[i], "--comm") == 0 ||
-		  strcmp(argv[i], "-cog") == 0 || strcmp(argv[i], "--cog") == 0)
-	  {
-		  alignCentre = 0;
-		  alignCentreOfMass=1;
-	  }
-	  else if (strcmp(argv[i], "-comi") == 0 || strcmp(argv[i], "--comi") == 0)
-	  {
-		  alignCentre = 0;
-		  alignCentreOfMass=2;
-	  }
-      else if(strcmp(argv[i], "-%v")==0 || strcmp(argv[i], "-pv")==0 || strcmp(argv[i], "--pv")==0 )
-      {
-         float value=atof(argv[++i]);
-         if(value<0.f || value>100.f){
-            reg_print_msg_error("The variance argument is expected to be between 0 and 100");
-            return EXIT_FAILURE;
-         }
-         blockPercentage=value;
-      }
-      else if(strcmp(argv[i], "-%i")==0 || strcmp(argv[i], "-pi")==0 || strcmp(argv[i], "--pi")==0)
-      {
-         float value=atof(argv[++i]);
-         if(value<0.f || value>100.f){
-            reg_print_msg_error("The inlier argument is expected to be between 0 and 100");
-            return EXIT_FAILURE;
-         }
-         inlierLts=value;
-      }
-      else if(strcmp(argv[i], "-speeeeed")==0 || strcmp(argv[i], "--speeed")==0)
-      {
-         blockStepSize=2;
-      }
-      else if(strcmp(argv[i], "-interp")==0 || strcmp(argv[i], "--interp")==0)
-      {
-         interpolation=atoi(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-refLowThr")==0 || strcmp(argv[i], "--refLowThr")==0)
-      {
-         referenceLowerThr=atof(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-refUpThr")==0 || strcmp(argv[i], "--refUpThr")==0)
-      {
-         referenceUpperThr=atof(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-floLowThr")==0 || strcmp(argv[i], "--floLowThr")==0)
-      {
-         floatingLowerThr=atof(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-floUpThr")==0 || strcmp(argv[i], "--floUpThr")==0)
-      {
-         floatingUpperThr=atof(argv[++i]);
-      }
-
-      else if(strcmp(argv[i], "-pad")==0 || strcmp(argv[i], "--pad")==0)
-      {
-         paddingValue=atof(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-iso")==0 || strcmp(argv[i], "--iso")==0)
-      {
-         iso=true;
-      }
-      else if(strcmp(argv[i], "-voff")==0 || strcmp(argv[i], "--voff")==0)
-      {
-         verbose=false;
-      }
-      else if(strcmp(argv[i], "-platf")==0 || strcmp(argv[i], "--platf")==0)
-      {
-         PlatformType value{atoi(argv[++i])};
-         if(int(value)<int(PlatformType::Cpu) || int(value)>int(PlatformType::OpenCl)){
-            reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
-            return EXIT_FAILURE;
-         }
-#ifndef _USE_CUDA
-         if (value == PlatformType::Cuda) {
-               reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
-               reg_print_msg_warn("The CPU platform is used");
-               value=PlatformType::Cpu;
+    /* read the input parameter */
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "-Help") == 0 ||
+            strcmp(argv[i], "-HELP") == 0 || strcmp(argv[i], "-h") == 0 ||
+            strcmp(argv[i], "--h") == 0 || strcmp(argv[i], "--help") == 0) {
+            Usage(argv[0]);
+            return EXIT_SUCCESS;
+        } else if (strcmp(argv[i], "--xml") == 0) {
+            printf("%s", xml_aladin);
+            return EXIT_SUCCESS;
+        }
+        if (strcmp(argv[i], "-version") == 0 ||
+            strcmp(argv[i], "-Version") == 0 ||
+            strcmp(argv[i], "-V") == 0 ||
+            strcmp(argv[i], "-v") == 0 ||
+            strcmp(argv[i], "--v") == 0 ||
+            strcmp(argv[i], "--version") == 0) {
+            printf("%s\n", NR_VERSION);
+            return EXIT_SUCCESS;
+        } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || strcmp(argv[i], "--ref") == 0) {
+            referenceImageName = argv[++i];
+            referenceImageFlag = 1;
+        } else if (strcmp(argv[i], "-flo") == 0 || strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "--flo") == 0) {
+            floatingImageName = argv[++i];
+            floatingImageFlag = 1;
+        }
+
+        else if (strcmp(argv[i], "-noSym") == 0 || strcmp(argv[i], "--noSym") == 0) {
+            symFlag = 0;
+        } else if (strcmp(argv[i], "-aff") == 0 || strcmp(argv[i], "--aff") == 0) {
+            outputAffineName = argv[++i];
+            outputAffineFlag = 1;
+        } else if (strcmp(argv[i], "-inaff") == 0 || strcmp(argv[i], "--inaff") == 0) {
+            inputAffineName = argv[++i];
+            inputAffineFlag = 1;
+        } else if (strcmp(argv[i], "-rmask") == 0 || strcmp(argv[i], "-tmask") == 0 || strcmp(argv[i], "--rmask") == 0) {
+            referenceMaskName = argv[++i];
+            referenceMaskFlag = 1;
+        } else if (strcmp(argv[i], "-fmask") == 0 || strcmp(argv[i], "-smask") == 0 || strcmp(argv[i], "--fmask") == 0) {
+            floatingMaskName = argv[++i];
+            floatingMaskFlag = 1;
+        } else if (strcmp(argv[i], "-res") == 0 || strcmp(argv[i], "-result") == 0 || strcmp(argv[i], "--res") == 0) {
+            outputResultName = argv[++i];
+            outputResultFlag = 1;
+        } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) {
+            maxIter = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "-ln") == 0 || strcmp(argv[i], "--ln") == 0) {
+            nLevels = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) {
+            levelsToPerform = atoi(argv[++i]);
+        }
+
+        else if (strcmp(argv[i], "-smooR") == 0 || strcmp(argv[i], "-smooT") == 0 || strcmp(argv[i], "--smooR") == 0) {
+            referenceSigma = (float)(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-smooF") == 0 || strcmp(argv[i], "-smooS") == 0 || strcmp(argv[i], "--smooF") == 0) {
+            floatingSigma = (float)(atof(argv[++i]));
+        } else if (strcmp(argv[i], "-rigOnly") == 0 || strcmp(argv[i], "--rigOnly") == 0) {
+            rigidFlag = 1;
+            affineFlag = 0;
+        } else if (strcmp(argv[i], "-affDirect") == 0 || strcmp(argv[i], "--affDirect") == 0) {
+            rigidFlag = 0;
+            affineFlag = 1;
+        } else if (strcmp(argv[i], "-nac") == 0 || strcmp(argv[i], "--nac") == 0) {
+            alignCentre = 0;
+        } else if (strcmp(argv[i], "-comm") == 0 || strcmp(argv[i], "--comm") == 0 ||
+                  strcmp(argv[i], "-cog") == 0 || strcmp(argv[i], "--cog") == 0) {
+            alignCentre = 0;
+            alignCentreOfMass = 1;
+        } else if (strcmp(argv[i], "-comi") == 0 || strcmp(argv[i], "--comi") == 0) {
+            alignCentre = 0;
+            alignCentreOfMass = 2;
+        } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) {
+            float value = atof(argv[++i]);
+            if (value < 0.f || value>100.f) {
+                reg_print_msg_error("The variance argument is expected to be between 0 and 100");
+                return EXIT_FAILURE;
             }
-#endif
-#ifndef _USE_OPENCL
-            if(value==PlatformType::OpenCl){
-               reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
-               reg_print_msg_warn("The CPU platform is used");
-               value=PlatformType::Cpu;
+            blockPercentage = value;
+        } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) {
+            float value = atof(argv[++i]);
+            if (value < 0.f || value>100.f) {
+                reg_print_msg_error("The inlier argument is expected to be between 0 and 100");
+                return EXIT_FAILURE;
             }
-#endif
-         platformType=value;
-      }
-      else if(strcmp(argv[i], "-gpuid")==0 || strcmp(argv[i], "--gpuid")==0)
-      {
-          gpuIdx = unsigned(atoi(argv[++i]));
-      }
-      else if(strcmp(argv[i], "-crv")==0 || strcmp(argv[i], "--crv")==0)
-      {
-          captureRangeVox=atoi(argv[++i]);
-      }
-      else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
-      {
+            inlierLts = value;
+        } else if (strcmp(argv[i], "-speeeeed") == 0 || strcmp(argv[i], "--speeed") == 0) {
+            blockStepSize = 2;
+        } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) {
+            interpolation = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "-refLowThr") == 0 || strcmp(argv[i], "--refLowThr") == 0) {
+            referenceLowerThr = atof(argv[++i]);
+        } else if (strcmp(argv[i], "-refUpThr") == 0 || strcmp(argv[i], "--refUpThr") == 0) {
+            referenceUpperThr = atof(argv[++i]);
+        } else if (strcmp(argv[i], "-floLowThr") == 0 || strcmp(argv[i], "--floLowThr") == 0) {
+            floatingLowerThr = atof(argv[++i]);
+        } else if (strcmp(argv[i], "-floUpThr") == 0 || strcmp(argv[i], "--floUpThr") == 0) {
+            floatingUpperThr = atof(argv[++i]);
+        }
+
+        else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) {
+            paddingValue = atof(argv[++i]);
+        } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) {
+            iso = true;
+        } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) {
+            verbose = false;
+        } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
+            PlatformType value{ atoi(argv[++i]) };
+            if (value < PlatformType::Cpu || value > PlatformType::OpenCl) {
+                reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
+                return EXIT_FAILURE;
+            }
+            if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) {
+                reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
+                reg_print_msg_warn("The CPU platform is used");
+                value = PlatformType::Cpu;
+            }
+            if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) {
+                reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
+                reg_print_msg_warn("The CPU platform is used");
+                value = PlatformType::Cpu;
+            }
+            platformType = value;
+        } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
+            gpuIdx = unsigned(atoi(argv[++i]));
+        } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) {
+            captureRangeVox = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
 #if defined (_OPENMP)
-         omp_set_num_threads(atoi(argv[++i]));
+            omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
-         ++i;
+            reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            ++i;
 #endif
-      }
-      else
-      {
-
-         sprintf(text,"Err:\tParameter %s unknown.",argv[i]);
-         reg_print_msg_error(text);
-         PetitUsage(argv[0]);
-         return EXIT_FAILURE;
-      }
-   }
-
-   if(!referenceImageFlag || !floatingImageFlag)
-   {
-      sprintf(text ,"Err:\tThe reference and the floating image have to be defined.");
-      reg_print_msg_error(text);
-      PetitUsage(argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   // Output the command line
+        } else {
+
+            sprintf(text, "Err:\tParameter %s unknown.", argv[i]);
+            reg_print_msg_error(text);
+            PetitUsage(argv[0]);
+            return EXIT_FAILURE;
+        }
+    }
+
+    if (!referenceImageFlag || !floatingImageFlag) {
+        sprintf(text, "Err:\tThe reference and the floating image have to be defined.");
+        reg_print_msg_error(text);
+        PetitUsage(argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    // Output the command line
 #ifdef NDEBUG
-   if(verbose)
-   {
+    if (verbose) {
 #endif
-      reg_print_info((argv[0]), "");
-      reg_print_info((argv[0]), "Command line:");
-      sprintf(text, "\t");
-      for(int i=0; i<argc; i++)
-         sprintf(text+strlen(text), " %s", argv[i]);
-      reg_print_info((argv[0]), text);
-      reg_print_info((argv[0]), "");
+        reg_print_info((argv[0]), "");
+        reg_print_info((argv[0]), "Command line:");
+        sprintf(text, "\t");
+        for (int i = 0; i < argc; i++)
+            sprintf(text + strlen(text), " %s", argv[i]);
+        reg_print_info((argv[0]), text);
+        reg_print_info((argv[0]), "");
 #ifdef NDEBUG
-   }
+    }
 #endif
 
-   reg_aladin<PrecisionTYPE> *REG;
-   if(symFlag)
-   {
-      REG = new reg_aladin_sym<PrecisionTYPE>;
-      if ( (referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName) )
-      {
-         reg_print_msg_warn("You have one image mask option turned on but not the other.");
-         reg_print_msg_warn("This will affect the degree of symmetry achieved.");
-      }
-   }
-   else
-   {
-      REG = new reg_aladin<PrecisionTYPE>;
-      if (floatingMaskFlag)
-      {
-         reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option");
-      }
-   }
-
-   /* Read the reference image and check its dimension */
-   nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName);
-   if(referenceHeader == nullptr)
-   {
-      sprintf(text,"Error when reading the reference image: %s", referenceImageName);
-      reg_print_msg_error(text);
-      return EXIT_FAILURE;
-   }
-
-   /* Read the floating image and check its dimension */
-   nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName);
-   if(floatingHeader == nullptr)
-   {
-      sprintf(text,"Error when reading the floating image: %s", floatingImageName);
-      reg_print_msg_error(text);
-      return EXIT_FAILURE;
-   }
-
-   // Set the reference and floating images
-   nifti_image *isoRefImage=nullptr;
-   nifti_image *isoFloImage=nullptr;
-   if(iso)
-   {
-      // make the images isotropic if required
-      isoRefImage=reg_makeIsotropic(referenceHeader,1);
-      isoFloImage=reg_makeIsotropic(floatingHeader,1);
-      REG->SetInputReference(isoRefImage);
-      REG->SetInputFloating(isoFloImage);
-   }
-   else
-   {
-      REG->SetInputReference(referenceHeader);
-      REG->SetInputFloating(floatingHeader);
-   }
-
-   /* read the reference mask image */
-   nifti_image *referenceMaskImage=nullptr;
-   nifti_image *isoRefMaskImage=nullptr;
-   if(referenceMaskFlag)
-   {
-      referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
-      if(referenceMaskImage == nullptr)
-      {
-         sprintf(text,"Error when reading the reference mask image: %s", referenceMaskName);
-         reg_print_msg_error(text);
-         return EXIT_FAILURE;
-      }
-      /* check the dimension */
-      for(int i=1; i<=referenceHeader->dim[0]; i++)
-      {
-         if(referenceHeader->dim[i]!=referenceMaskImage->dim[i])
-         {
-            reg_print_msg_error("The reference image and its mask do not have the same dimension");
+    reg_aladin<PrecisionTYPE> *REG;
+    if (symFlag) {
+        REG = new reg_aladin_sym<PrecisionTYPE>;
+        if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) {
+            reg_print_msg_warn("You have one image mask option turned on but not the other.");
+            reg_print_msg_warn("This will affect the degree of symmetry achieved.");
+        }
+    } else {
+        REG = new reg_aladin<PrecisionTYPE>;
+        if (floatingMaskFlag) {
+            reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option");
+        }
+    }
+
+    /* Read the reference image and check its dimension */
+    nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName);
+    if (referenceHeader == nullptr) {
+        sprintf(text, "Error when reading the reference image: %s", referenceImageName);
+        reg_print_msg_error(text);
+        return EXIT_FAILURE;
+    }
+
+    /* Read the floating image and check its dimension */
+    nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName);
+    if (floatingHeader == nullptr) {
+        sprintf(text, "Error when reading the floating image: %s", floatingImageName);
+        reg_print_msg_error(text);
+        return EXIT_FAILURE;
+    }
+
+    // Set the reference and floating images
+    nifti_image *isoRefImage = nullptr;
+    nifti_image *isoFloImage = nullptr;
+    if (iso) {
+        // make the images isotropic if required
+        isoRefImage = reg_makeIsotropic(referenceHeader, 1);
+        isoFloImage = reg_makeIsotropic(floatingHeader, 1);
+        REG->SetInputReference(isoRefImage);
+        REG->SetInputFloating(isoFloImage);
+    } else {
+        REG->SetInputReference(referenceHeader);
+        REG->SetInputFloating(floatingHeader);
+    }
+
+    /* read the reference mask image */
+    nifti_image *referenceMaskImage = nullptr;
+    nifti_image *isoRefMaskImage = nullptr;
+    if (referenceMaskFlag) {
+        referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
+        if (referenceMaskImage == nullptr) {
+            sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName);
+            reg_print_msg_error(text);
             return EXIT_FAILURE;
-         }
-      }
-      if(iso)
-      {
-         // make the image isotropic if required
-         isoRefMaskImage=reg_makeIsotropic(referenceMaskImage,0);
-         REG->SetInputMask(isoRefMaskImage);
-      }
-      else REG->SetInputMask(referenceMaskImage);
-   }
-   /* Read the floating mask image */
-   nifti_image *floatingMaskImage=nullptr;
-   nifti_image *isoFloMaskImage=nullptr;
-   if(floatingMaskFlag && symFlag)
-   {
-      floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
-      if(floatingMaskImage == nullptr)
-      {
-         sprintf(text,"Error when reading the floating mask image: %s", floatingMaskName);
-         reg_print_msg_error(text);
-         return EXIT_FAILURE;
-      }
-      /* check the dimension */
-      for(int i=1; i<=floatingHeader->dim[0]; i++)
-      {
-         if(floatingHeader->dim[i]!=floatingMaskImage->dim[i])
-         {
-            reg_print_msg_error("The floating image and its mask do not have the same dimension");
+        }
+        /* check the dimension */
+        for (int i = 1; i <= referenceHeader->dim[0]; i++) {
+            if (referenceHeader->dim[i] != referenceMaskImage->dim[i]) {
+                reg_print_msg_error("The reference image and its mask do not have the same dimension");
+                return EXIT_FAILURE;
+            }
+        }
+        if (iso) {
+            // make the image isotropic if required
+            isoRefMaskImage = reg_makeIsotropic(referenceMaskImage, 0);
+            REG->SetInputMask(isoRefMaskImage);
+        } else REG->SetInputMask(referenceMaskImage);
+    }
+    /* Read the floating mask image */
+    nifti_image *floatingMaskImage = nullptr;
+    nifti_image *isoFloMaskImage = nullptr;
+    if (floatingMaskFlag && symFlag) {
+        floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
+        if (floatingMaskImage == nullptr) {
+            sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName);
+            reg_print_msg_error(text);
             return EXIT_FAILURE;
-         }
-      }
-      if(iso)
-      {
-         // make the image isotropic if required
-         isoFloMaskImage=reg_makeIsotropic(floatingMaskImage,0);
-         REG->SetInputFloatingMask(isoFloMaskImage);
-      }
-      else REG->SetInputFloatingMask(floatingMaskImage);
-   }
-
-   REG->SetMaxIterations(maxIter);
-   REG->SetNumberOfLevels(nLevels);
-   REG->SetLevelsToPerform(levelsToPerform);
-   REG->SetReferenceSigma(referenceSigma);
-   REG->SetFloatingSigma(floatingSigma);
-   REG->SetAlignCentre(alignCentre);
-   REG->SetAlignCentreMass(alignCentreOfMass);
-   REG->SetPerformAffine(affineFlag);
-   REG->SetPerformRigid(rigidFlag);
-   REG->SetBlockStepSize(blockStepSize);
-   REG->SetBlockPercentage(blockPercentage);
-   REG->SetInlierLts(inlierLts);
-   REG->SetInterpolation(interpolation);
-   REG->SetCaptureRangeVox(captureRangeVox);
-   REG->SetPlatformType(platformType);
-   REG->SetGpuIdx(gpuIdx);
-
-   if (referenceLowerThr != referenceUpperThr)
-   {
-      REG->SetReferenceLowerThreshold(referenceLowerThr);
-      REG->SetReferenceUpperThreshold(referenceUpperThr);
-   }
-
-   if (floatingLowerThr != floatingUpperThr)
-   {
-      REG->SetFloatingLowerThreshold(floatingLowerThr);
-      REG->SetFloatingUpperThreshold(floatingUpperThr);
-   }
-
-   REG->SetWarpedPaddingValue(paddingValue);
-
-   if(REG->GetLevelsToPerform() > REG->GetNumberOfLevels())
-      REG->SetLevelsToPerform(REG->GetNumberOfLevels());
-
-   // Set the input affine transformation if defined
-   if(inputAffineFlag==1)
-      REG->SetInputTransform(inputAffineName);
-
-   // Set the verbose type
-   REG->SetVerbose(verbose);
+        }
+        /* check the dimension */
+        for (int i = 1; i <= floatingHeader->dim[0]; i++) {
+            if (floatingHeader->dim[i] != floatingMaskImage->dim[i]) {
+                reg_print_msg_error("The floating image and its mask do not have the same dimension");
+                return EXIT_FAILURE;
+            }
+        }
+        if (iso) {
+            // make the image isotropic if required
+            isoFloMaskImage = reg_makeIsotropic(floatingMaskImage, 0);
+            REG->SetInputFloatingMask(isoFloMaskImage);
+        } else REG->SetInputFloatingMask(floatingMaskImage);
+    }
+
+    REG->SetMaxIterations(maxIter);
+    REG->SetNumberOfLevels(nLevels);
+    REG->SetLevelsToPerform(levelsToPerform);
+    REG->SetReferenceSigma(referenceSigma);
+    REG->SetFloatingSigma(floatingSigma);
+    REG->SetAlignCentre(alignCentre);
+    REG->SetAlignCentreMass(alignCentreOfMass);
+    REG->SetPerformAffine(affineFlag);
+    REG->SetPerformRigid(rigidFlag);
+    REG->SetBlockStepSize(blockStepSize);
+    REG->SetBlockPercentage(blockPercentage);
+    REG->SetInlierLts(inlierLts);
+    REG->SetInterpolation(interpolation);
+    REG->SetCaptureRangeVox(captureRangeVox);
+    REG->SetPlatformType(platformType);
+    REG->SetGpuIdx(gpuIdx);
+
+    if (referenceLowerThr != referenceUpperThr) {
+        REG->SetReferenceLowerThreshold(referenceLowerThr);
+        REG->SetReferenceUpperThreshold(referenceUpperThr);
+    }
+
+    if (floatingLowerThr != floatingUpperThr) {
+        REG->SetFloatingLowerThreshold(floatingLowerThr);
+        REG->SetFloatingUpperThreshold(floatingUpperThr);
+    }
+
+    REG->SetWarpedPaddingValue(paddingValue);
+
+    if (REG->GetLevelsToPerform() > REG->GetNumberOfLevels())
+        REG->SetLevelsToPerform(REG->GetNumberOfLevels());
+
+    // Set the input affine transformation if defined
+    if (inputAffineFlag == 1)
+        REG->SetInputTransform(inputAffineName);
+
+    // Set the verbose type
+    REG->SetVerbose(verbose);
 
 #ifndef NDEBUG
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
-   reg_print_msg_debug("Please re-run cmake to set the variable");
-   reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
-   reg_print_msg_debug("*******************************************");
-   reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
+    reg_print_msg_debug("Please re-run cmake to set the variable");
+    reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
+    reg_print_msg_debug("*******************************************");
+    reg_print_msg_debug("*******************************************");
 #endif
 
 #if defined (_OPENMP)
-   if(verbose)
-   {
-      int maxThreadNumber = omp_get_max_threads();
-      sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber);
-      reg_print_info((argv[0]), text);
-   }
+    if (verbose) {
+        int maxThreadNumber = omp_get_max_threads();
+        sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber);
+        reg_print_info((argv[0]), text);
+    }
 #endif // _OPENMP
 
-   // Run the registration
-   REG->Run();
-
-   // The warped image is saved
-   if(iso)
-   {
-      REG->SetInputReference(referenceHeader);
-      REG->SetInputFloating(floatingHeader);
-   }
-   nifti_image *outputResultImage=REG->GetFinalWarpedImage();
-   if(!outputResultFlag) outputResultName=(char *)"outputResult.nii.gz";
-   reg_io_WriteImageFile(outputResultImage,outputResultName);
-   nifti_image_free(outputResultImage);
-
-   /* The affine transformation is saved */
-   if(outputAffineFlag)
-      reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName);
-   else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt");
-
-   nifti_image_free(referenceHeader);
-   nifti_image_free(floatingHeader);
-   if(isoRefImage!=nullptr)
-      nifti_image_free(isoRefImage);
-   if(isoFloImage!=nullptr)
-      nifti_image_free(isoFloImage);
-   if(referenceMaskImage!=nullptr)
-      nifti_image_free(referenceMaskImage);
-   if(floatingMaskImage!=nullptr)
-      nifti_image_free(floatingMaskImage);
-   if(isoRefMaskImage!=nullptr)
-      nifti_image_free(isoRefMaskImage);
-   if(isoFloMaskImage!=nullptr)
-      nifti_image_free(isoFloMaskImage);
-
-   delete REG;
+    // Run the registration
+    REG->Run();
+
+    // The warped image is saved
+    if (iso) {
+        REG->SetInputReference(referenceHeader);
+        REG->SetInputFloating(floatingHeader);
+    }
+    nifti_image *outputResultImage = REG->GetFinalWarpedImage();
+    if (!outputResultFlag) outputResultName = (char *)"outputResult.nii.gz";
+    reg_io_WriteImageFile(outputResultImage, outputResultName);
+    nifti_image_free(outputResultImage);
+
+    /* The affine transformation is saved */
+    if (outputAffineFlag)
+        reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName);
+    else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt");
+
+    nifti_image_free(referenceHeader);
+    nifti_image_free(floatingHeader);
+    if (isoRefImage != nullptr)
+        nifti_image_free(isoRefImage);
+    if (isoFloImage != nullptr)
+        nifti_image_free(isoFloImage);
+    if (referenceMaskImage != nullptr)
+        nifti_image_free(referenceMaskImage);
+    if (floatingMaskImage != nullptr)
+        nifti_image_free(floatingMaskImage);
+    if (isoRefMaskImage != nullptr)
+        nifti_image_free(isoRefMaskImage);
+    if (isoFloMaskImage != nullptr)
+        nifti_image_free(isoFloMaskImage);
+
+    delete REG;
 #ifdef NDEBUG
-   if(verbose)
-   {
+    if (verbose) {
 #endif
-      time_t end;
-      time(&end);
-      int minutes=(int)floorf((end-start)/60.0f);
-      int seconds=(int)(end-start - 60*minutes);
-      sprintf(text, "Registration performed in %i min %i sec", minutes, seconds);
-      reg_print_info((argv[0]), text);
-      reg_print_info((argv[0]), "Have a good day !");
+        time_t end;
+        time(&end);
+        int minutes = (int)floorf((end - start) / 60.0f);
+        int seconds = (int)(end - start - 60 * minutes);
+        sprintf(text, "Registration performed in %i min %i sec", minutes, seconds);
+        reg_print_info((argv[0]), text);
+        reg_print_info((argv[0]), "Have a good day !");
 #ifdef NDEBUG
-   }
+    }
 #endif
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index d1dd67b2..741083be 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -10,6 +10,9 @@
  *
  */
 
+// OpenCL isn't supported!
+#undef _USE_OPENCL
+
 #include "_reg_ReadWriteImage.h"
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_f3d2.h"
@@ -21,9 +24,6 @@
 #   include <time.h>
 #endif
 
-// OpenCL isn't supported!
-#undef _USE_OPENCL
-
 void PetitUsage(char *exec) {
     char text[255];
     reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
@@ -34,6 +34,7 @@ void PetitUsage(char *exec) {
     reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
     return;
 }
+
 void Usage(char *exec) {
     char text[255];
     reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
@@ -132,21 +133,22 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
     reg_print_info(exec, "");
 
-#if defined(_USE_CUDA) && defined(_USE_OPENCL)
-    reg_print_info(exec, "*** Platform options:");
-    reg_print_info(exec, "\t-platf <uint>\t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]");
-#else
-#ifdef _USE_CUDA
-    reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]");
-#endif
-#ifdef _USE_OPENCL
-    reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]");
-#endif
-#endif
-#if defined(_USE_CUDA) || defined(_USE_OPENCL)
-    reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
-    reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
-#endif
+    if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) {
+        reg_print_info(exec, "*** Platform options:");
+        std::string platform = "\t-platf <uint>\t\tChoose platform: CPU=0 | ";
+        if (Platform::IsCudaEnabled()) {
+            platform += "Cuda=1";
+            if (Platform::IsOpenClEnabled())
+                platform += " | ";
+        }
+        if (Platform::IsOpenClEnabled())
+            platform += "OpenCL=2";
+        platform += " [0]";
+        reg_print_info(exec, platform.c_str());
+
+        reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
+        reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
+    }
 
 #ifdef _OPENMP
     reg_print_info(exec, "");
@@ -286,25 +288,21 @@ int main(int argc, char **argv) {
         if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
             reg = new reg_f3d2<float>(referenceImage->nt, floatingImage->nt);
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
-            PlatformType value{atoi(argv[++i])};
-            if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::Cuda)) {
+            PlatformType value{ atoi(argv[++i]) };
+            if (value < PlatformType::Cpu || value > PlatformType::Cuda) {
                 reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA");
                 return EXIT_FAILURE;
             }
-#ifndef _USE_CUDA
-            if (value == PlatformType::Cuda) {
+            if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) {
                 reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
                 reg_print_msg_warn("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
-#endif
-#ifndef _USE_OPENCL
-            if (value == PlatformType::OpenCl) {
+            if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) {
                 reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
                 reg_print_msg_warn("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
-#endif
             platformType = value;
         } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
             gpuIdx = unsigned(atoi(argv[++i]));
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 87e4aece..070dbbf8 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -24,7 +24,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         contentCreatorFactory = new ContentCreatorFactory();
         kernelFactory = new CpuKernelFactory();
         measureFactory = new MeasureFactory();
-        platformName = "cpu_platform";
+        platformName = "CPU";
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
@@ -32,7 +32,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         contentCreatorFactory = new CudaContentCreatorFactory();
         kernelFactory = new CudaKernelFactory();
         measureFactory = new CudaMeasureFactory();
-        platformName = "cuda_platform";
+        platformName = "CUDA";
     }
 #endif
 #ifdef _USE_OPENCL
@@ -40,7 +40,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         computeFactory = new ClComputeFactory();
         contentCreatorFactory = new ClContentCreatorFactory();
         kernelFactory = new ClKernelFactory();
-        platformName = "cl_platform";
+        platformName = "OpenCL";
     }
 #endif
     else {
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 7d7f9b37..0b195873 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -8,6 +8,15 @@
 #include "_reg_optimiser.h"
 
 enum class PlatformType { Cpu, Cuda, OpenCl };
+constexpr PlatformType PlatformTypes[] = {
+    PlatformType::Cpu,
+#ifdef _USE_CUDA
+    PlatformType::Cuda,
+#endif
+#ifdef _USE_OPENCL
+    PlatformType::OpenCl
+#endif
+};
 
 class Platform {
 public:
@@ -33,6 +42,19 @@ class Platform {
                                          bool optimiseZ,
                                          F3dContent *conBw = nullptr) const;
 
+    static constexpr bool IsCudaEnabled() {
+#ifdef _USE_CUDA
+        return true;
+#endif
+        return false;
+    }
+    static constexpr bool IsOpenClEnabled() {
+#ifdef _USE_OPENCL
+        return true;
+#endif
+        return false;
+    }
+
 private:
     ComputeFactory *computeFactory = nullptr;
     ContentCreatorFactory *contentCreatorFactory = nullptr;
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index af17e015..df7b0274 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -4,17 +4,10 @@
 #include "Kernel.h"
 #include "AffineDeformationFieldKernel.h"
 #include "Platform.h"
+#include "AladinContent.h"
 
 #include <catch2/catch_test_macros.hpp>
 
-#include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
-
 #define EPS_SINGLE 0.0001
 
 /*
@@ -27,192 +20,157 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> test_data;
-typedef std::tuple<AladinContent*, std::string, PlatformType> content_desc;
+typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
+typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Affine deformation field", "[AffineDefField]") {
     // Create a reference 2D image
-    int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1};
-    nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2D);
+    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
+    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference2d);
 
     // Create a reference 3D image
     dim[0] = 3;
     dim[3] = 2;
-    nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3D);
+    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference3d);
 
     // Generate the different use cases
-    std::vector<test_data> test_use_cases;
+    std::vector<TestData> testCases;
 
     // Identity use case - 2D
-    auto *identity = new mat44;
-    reg_mat44_eye(identity);
+    mat44 identity;
+    reg_mat44_eye(&identity);
     // Test order [0,0] [1,0] [0,1] [1,1]
-    float identity_result_2x[4] = {0, 1, 0, 1};
-    float identity_result_2y[4] = {0, 0, 1, 1};
-    test_use_cases.emplace_back(test_data(
+    float identityResult2x[4] = { 0, 1, 0, 1 };
+    float identityResult2y[4] = { 0, 0, 1, 1 };
+    testCases.emplace_back(TestData(
         "identity 2D",
-        reference2D,
-        identity,
-        identity_result_2x,
-        identity_result_2y,
+        reference2d,
+        &identity,
+        identityResult2x,
+        identityResult2y,
         nullptr)
     );
     // Identity use case - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float identity_result_3x[8] = {0, 1, 0, 1, 0, 1, 0, 1};
-    float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1};
-    float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1};
-    test_use_cases.emplace_back(test_data(
+    float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 };
+    float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 };
+    float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 };
+    testCases.emplace_back(TestData(
         "identity 3D",
-        reference3D,
-        identity,
-        identity_result_3x,
-        identity_result_3y,
-        identity_result_3z)
+        reference3d,
+        &identity,
+        identityResult3x,
+        identityResult3y,
+        identityResult3z)
     );
 
     // Translation - 2D
-    auto *translation = new mat44;
-    reg_mat44_eye(translation);
-    translation->m[0][3] = -0.5;
-    translation->m[1][3] = 1.5;
-    translation->m[2][3] = 0.75;
+    mat44 translation;
+    reg_mat44_eye(&translation);
+    translation.m[0][3] = -0.5;
+    translation.m[1][3] = 1.5;
+    translation.m[2][3] = 0.75;
     // Test order [0,0] [1,0] [0,1] [1,1]
-    float translation_result_2x[4] = {-0.5, .5, -0.5, .5};
-    float translation_result_2y[4] = {1.5, 1.5, 2.5, 2.5};
-    test_use_cases.emplace_back(test_data(
+    float translationResult2x[4] = { -0.5, .5, -0.5, .5 };
+    float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 };
+    testCases.emplace_back(TestData(
         "translation 2D",
-        reference2D,
-        translation,
-        translation_result_2x,
-        translation_result_2y,
+        reference2d,
+        &translation,
+        translationResult2x,
+        translationResult2y,
         nullptr)
     );
 
     // Translation - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float translation_result_3x[8] = {-0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5};
-    float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5};
-    float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75};
-    test_use_cases.emplace_back(test_data(
+    float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 };
+    float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 };
+    float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 };
+    testCases.emplace_back(TestData(
         "translation 3D",
-        reference3D,
-        translation,
-        translation_result_3x,
-        translation_result_3y,
-        translation_result_3z)
+        reference3d,
+        &translation,
+        translationResult3x,
+        translationResult3y,
+        translationResult3z)
     );
 
 
     // Full affine - 2D
     // Test order [0,0] [1,0] [0,1] [1,1]
-    auto *affine = new mat44;
-    reg_mat44_eye(affine);
-    affine->m[0][3] = -0.5;
-    affine->m[1][3] = 1.5;
-    affine->m[2][3] = 0.75;
+    mat44 affine;
+    reg_mat44_eye(&affine);
+    affine.m[0][3] = -0.5;
+    affine.m[1][3] = 1.5;
+    affine.m[2][3] = 0.75;
     for (auto i = 0; i < 4; ++i) {
         for (auto j = 0; j < 4; ++j) {
-            affine->m[i][j] += static_cast<float>((((float)rand() / (RAND_MAX)) - .5) / 10.);
+            affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f;
         }
     }
-    float affine_result_2x[4];
-    float affine_result_2y[4];
+    float affineResult2x[4];
+    float affineResult2y[4];
     for (auto i = 0; i < 4; ++i) {
-        auto x = identity_result_2x[i];
-        auto y = identity_result_2y[i];
-        affine_result_2x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y;
-        affine_result_2y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y;
+        auto x = identityResult2x[i];
+        auto y = identityResult2y[i];
+        affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y;
+        affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y;
 
     }
-    test_use_cases.emplace_back(test_data(
+    testCases.emplace_back(TestData(
         "full affine 2D",
-        reference2D,
-        affine,
-        affine_result_2x,
-        affine_result_2y,
+        reference2d,
+        &affine,
+        affineResult2x,
+        affineResult2y,
         nullptr)
     );
     // Full affine - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float affine_result_3x[8];
-    float affine_result_3y[8];
-    float affine_result_3z[8];
+    float affineResult3x[8];
+    float affineResult3y[8];
+    float affineResult3z[8];
     for (auto i = 0; i < 8; ++i) {
-        auto x = identity_result_3x[i];
-        auto y = identity_result_3y[i];
-        auto z = identity_result_3z[i];
-        affine_result_3x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y + affine->m[0][2] * z;
-        affine_result_3y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y + affine->m[1][2] * z;
-        affine_result_3z[i] = affine->m[2][3] + affine->m[2][0] * x + affine->m[2][1] * y + affine->m[2][2] * z;
+        auto x = identityResult3x[i];
+        auto y = identityResult3y[i];
+        auto z = identityResult3z[i];
+        affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z;
+        affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z;
+        affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z;
     }
-    test_use_cases.emplace_back(test_data(
+    testCases.emplace_back(TestData(
         "affine 3D",
-        reference3D,
-        affine,
-        affine_result_3x,
-        affine_result_3y,
-        affine_result_3z)
+        reference3d,
+        &affine,
+        affineResult3x,
+        affineResult3y,
+        affineResult3z)
     );
 
     // Loop over all generated test cases to create all content and run all tests
-    for (auto&& test_use_case : test_use_cases) {
+    for (auto&& testCase : testCases) {
         // Retrieve test information
-        std::string test_name;
-        nifti_image *reference;
-        mat44 *test_mat;
-        float *test_res_x;
-        float *test_res_y;
-        float *test_res_z;
-        std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case;
+        auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase;
 
         // Accumulate all required contents with a vector
-        std::vector<content_desc> listContent;
-        listContent.push_back(content_desc(
-            new AladinContent(
-                reference,
-                reference,
-                nullptr,
-                test_mat,
-                sizeof(float)),
-            "CPU",
-            PlatformType::Cpu));
-#ifdef _USE_CUDA
-        listContent.push_back(content_desc(
-            new CudaAladinContent(
-                reference,
-                reference,
-                nullptr,
-                test_mat,
-                sizeof(float)),
-            "CUDA",
-            PlatformType::Cuda));
-#endif
-#ifdef _USE_OPENCL
-        listContent.push_back(content_desc(
-            new ClAladinContent(
-                reference,
-                reference,
-                nullptr,
-                test_mat,
-                sizeof(float)),
-            "OpenCL",
-            PlatformType::OpenCl));
-#endif
+        std::vector<ContentDesc> contentDescs;
+        for (auto&& platformType : PlatformTypes) {
+            std::unique_ptr<Platform> platform{ new Platform(platformType) };
+            std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            std::unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
+            contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
+        }
         // Loop over all possibles contents for each test
-        for (auto &&content : listContent) {
-            AladinContent *con;
-            std::string desc;
-            PlatformType plat_value;
-            std::tie(con, desc, plat_value) = content;
-            SECTION(test_name + " " + desc) {
+        for (auto&& contentDesc : contentDescs) {
+            auto&& [content, platform] = contentDesc;
+            SECTION(testName + " " + platform->GetName()) {
                 // Initialise the platform to run current content and retrieve deformation field
-                auto *platform = new Platform(plat_value);
-                Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con);
+                std::unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-                nifti_image *defField = con->GetDeformationField();
+                nifti_image *defField = content->GetDeformationField();
 
                 // Check all values
                 auto *defFieldPtrX = static_cast<float *>(defField->data);
@@ -220,23 +178,14 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 auto *defFieldPtrY = &defFieldPtrX[voxelNumber];
                 auto *defFieldPtrZ = &defFieldPtrY[voxelNumber];
                 for (size_t i = 0; i < voxelNumber; ++i) {
-                    REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE);
-                    REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE);
-                    if (test_res_z != nullptr) {
-                        REQUIRE(fabs(defFieldPtrZ[i] - test_res_z[i]) < EPS_SINGLE);
-                    }
+                    REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS_SINGLE);
+                    REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS_SINGLE);
+                    if (testResZ)
+                        REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS_SINGLE);
                 }
-                delete affineDeformKernel;
-                delete platform;
-                delete con;
             }
         }
-        listContent.clear();
     }
-    test_use_cases.clear();
-    nifti_image_free(reference2D);
-    nifti_image_free(reference3D);
-    free(identity);
-    free(translation);
-    free(affine);
+    nifti_image_free(reference2d);
+    nifti_image_free(reference3d);
 }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 73100254..29c56719 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -1,21 +1,17 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_tools.h"
 
 #include "Kernel.h"
 #include "ResampleImageKernel.h"
 #include "Platform.h"
+#include "AladinContent.h"
 
 #include <list>
 #include <catch2/catch_test_macros.hpp>
 
-#include "AladinContent.h"
-#ifdef _USE_CUDA
-#include "CudaAladinContent.h"
-#endif
-#ifdef _USE_OPENCL
-#include "ClAladinContent.h"
-#endif
-
 #define EPS_SINGLE 0.0001
 
 /*
@@ -28,19 +24,19 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image*, nifti_image*, float*> test_data;
-typedef std::tuple<AladinContent*, std::string, PlatformType> content_desc;
+typedef std::tuple<std::string, nifti_image*, nifti_image*, float*> TestData;
+typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
     int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
-    nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2D);
+    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference2d);
 
     // Fill image with distance from identity
-    auto* ref2dPrt = static_cast<float*>(reference2D->data);
-    for (float y = 0; y < reference2D->ny; ++y) {
-        for (float x = 0; x < reference2D->nx; ++x) {
+    auto* ref2dPrt = static_cast<float*>(reference2d->data);
+    for (float y = 0; y < reference2d->ny; ++y) {
+        for (float x = 0; x < reference2d->nx; ++x) {
             *ref2dPrt = sqrtf(x * x + y * y);
             ref2dPrt++;
         }
@@ -50,14 +46,14 @@ TEST_CASE("Resampling", "[resampling]") {
 
     // Create a reference 3D image
     dim[0] = 3; dim[3] = 2;
-    nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3D);
+    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference3d);
 
     // Fill image with distance from identity
-    auto *ref3dPrt = static_cast<float*>(reference3D->data);
-    for (float z = 0; z < reference3D->nz; ++z) {
-        for (float y = 0; y < reference3D->ny; ++y) {
-            for (float x = 0; x < reference3D->nx; ++x) {
+    auto *ref3dPrt = static_cast<float*>(reference3d->data);
+    for (float z = 0; z < reference3d->nz; ++z) {
+        for (float y = 0; y < reference3d->ny; ++y) {
+            for (float x = 0; x < reference3d->nx; ++x) {
                 *ref3dPrt = sqrtf(x * x + y * y + z * z);
                 ref3dPrt++;
             }
@@ -65,113 +61,85 @@ TEST_CASE("Resampling", "[resampling]") {
     }
 
     // Generate the different use cases
-    std::vector<test_data> test_use_cases;
+    std::vector<TestData> testCases;
 
     // Identity use case - 2D
     // First create an identity displacement field and then convert it into a deformation
-    nifti_image *id_field_2D = nifti_copy_nim_info(reference2D);
-    id_field_2D->ndim = id_field_2D->dim[0] = 5;
-    id_field_2D->nu = id_field_2D->dim[5] = 2;
-    id_field_2D->nvox = CalcVoxelNumber(*id_field_2D, id_field_2D->ndim);
-    id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper);
-    reg_getDeformationFromDisplacement(id_field_2D);
+    nifti_image *idField2d = nifti_copy_nim_info(reference2d);
+    idField2d->ndim = idField2d->dim[0] = 5;
+    idField2d->nu = idField2d->dim[5] = 2;
+    idField2d->nvox = CalcVoxelNumber(*idField2d, idField2d->ndim);
+    idField2d->data = (void *)calloc(idField2d->nvox, idField2d->nbyper);
+    reg_getDeformationFromDisplacement(idField2d);
     float res2[4];
-    memcpy(res2, reference2D->data, reference2D->nvox * sizeof(float));
+    memcpy(res2, reference2d->data, reference2d->nvox * sizeof(float));
     // create the test case
-    test_use_cases.emplace_back(test_data(
+    testCases.emplace_back(TestData(
         "identity 2D",
-        reference2D,
-        id_field_2D,
+        reference2d,
+        idField2d,
         res2)
     );
 
     // Identity use case - 3D
-    nifti_image *id_field_3D = nifti_copy_nim_info(reference3D);
-    id_field_3D->ndim = id_field_3D->dim[0] = 5;
-    id_field_3D->nu = id_field_3D->dim[5] = 3;
-    id_field_3D->nvox = CalcVoxelNumber(*id_field_3D, id_field_3D->ndim);
-    id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper);
-    reg_getDeformationFromDisplacement(id_field_3D);
+    nifti_image *idField3d = nifti_copy_nim_info(reference3d);
+    idField3d->ndim = idField3d->dim[0] = 5;
+    idField3d->nu = idField3d->dim[5] = 3;
+    idField3d->nvox = CalcVoxelNumber(*idField3d, idField3d->ndim);
+    idField3d->data = calloc(idField3d->nvox, idField3d->nbyper);
+    reg_getDeformationFromDisplacement(idField3d);
     float res3[8];
-    memcpy(res3, reference3D->data, reference3D->nvox * sizeof(float));
+    memcpy(res3, reference3d->data, reference3d->nvox * sizeof(float));
     // create the test case
-    test_use_cases.emplace_back(test_data(
+    testCases.emplace_back(TestData(
         "identity 3D",
-        reference3D,
-        id_field_3D,
+        reference3d,
+        idField3d,
         res3)
     );
 
     // Loop over all generated test cases to create all content and run all tests
-    for (auto&& test_use_case : test_use_cases) {
+    for (auto&& testCase : testCases) {
         // Retrieve test information
-        std::string test_name;
-        nifti_image *reference;
-        nifti_image *def_field;
-        float *test_res;
-        std::tie(test_name, reference, def_field, test_res) = test_use_case;
+        auto&& [testName, reference, defField, testResult] = testCase;
 
         // Accumulate all required contents with a vector
-        std::vector<content_desc> listContent;
-        listContent.push_back(content_desc(
-            new AladinContent(reference, reference),
-            "CPU",
-            PlatformType::Cpu));
-#ifdef _USE_CUDA
-        listContent.push_back(content_desc(
-            new CudaAladinContent(reference, reference),
-            "CUDA",
-            PlatformType::Cuda));
-#endif
-#ifdef _USE_OPENCL
-        // listContent.push_back(content_desc(
-        //     new ClAladinContent(reference, reference),
-        //     "OpenCL",
-        //     PlatformType::OpenCl));
-#endif
+        std::vector<ContentDesc> contentDescs;
+        for (auto&& platformType : PlatformTypes) {
+            std::unique_ptr<Platform> platform{ new Platform(platformType) };
+            std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            std::unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference) };
+            contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
+        }
         // Loop over all possibles contents for each test
-        for (auto&& content : listContent) {
-            AladinContent *con;
-            std::string desc;
-            PlatformType plat_value;
-            std::tie(con, desc, plat_value) = content;
-
-            SECTION(test_name + " " + desc) {
+        for (auto&& contentDesc : contentDescs) {
+            auto&& [content, platform] = contentDesc;
+            SECTION(testName + " " + platform->GetName()) {
                 // Create and set a warped image to host the computation
                 nifti_image *warped = nifti_copy_nim_info(reference);
                 warped->data = malloc(warped->nvox * warped->nbyper);
-                con->SetWarped(warped);
+                content->SetWarped(warped);
                 // Set the deformation field
-                con->SetDeformationField(def_field);
-                // Set an empty mask to consider all voxels
-                int *tempMask = (int*)calloc(reference->nvox, sizeof(int));
-                con->SetReferenceMask(tempMask);
+                content->SetDeformationField(defField);
                 // Initialise the platform to run current content and retrieve deformation field
-                auto *platform = new Platform(plat_value);
-                Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con);
+                std::unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
                 // args = interpolation and padding
                 std::list<int> interp = { 0, 1, 3 };
                 for (auto it : interp) {
                     resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
-                    warped = con->GetWarped();
+                    warped = content->GetWarped();
 
                     // Check all values
                     auto *warpedPtr = static_cast<float*>(warped->data);
                     for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) {
-                        std::cout << i << " " << static_cast<float*>(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl;
-                        REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE);
+                        std::cout << i << " " << static_cast<float*>(reference->data)[i] << " " << warpedPtr[i] << " " << testResult[i] << std::endl;
+                        REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE);
                     }
                 }
-                delete resampleKernel;
-                delete platform;
-                free(tempMask);
-                delete con;
             }
         }
-        listContent.clear();
     }
-    test_use_cases.clear();
     // Only free-ing ref as the rest if cleared by content destructor
-    nifti_image_free(reference2D);
-    nifti_image_free(reference3D);
+    nifti_image_free(reference2d);
+    nifti_image_free(reference3d);
 }

From 52cb0d74754af237fd8d309bb5b5b493cdfc1b57 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@kcl.ac.uk>
Date: Mon, 13 Feb 2023 14:37:07 +0000
Subject: [PATCH 047/314] Added default values for linear interpolation

---
 niftyreg_build_version.txt          |   2 +-
 reg-test/reg_test_interpolation.cpp | 107 +++++++++++++++++-----------
 2 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a7625603..9386c220 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-160
+161
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 29c56719..a5f64344 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -18,9 +18,8 @@
     This test file contains the following unit tests:
     test function: image resampling
     In 2D and 3D
-    identity
-    translation
-    affine
+    linear
+    cubic
 */
 
 
@@ -29,73 +28,99 @@ typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> Co
 
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
-    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    int dim_flo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
+    nifti_image *reference2d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2d);
 
     // Fill image with distance from identity
     auto* ref2dPrt = static_cast<float*>(reference2d->data);
-    for (float y = 0; y < reference2d->ny; ++y) {
-        for (float x = 0; x < reference2d->nx; ++x) {
-            *ref2dPrt = sqrtf(x * x + y * y);
+    for (auto y = 0; y < reference2d->ny; ++y) {
+        for (auto x = 0; x < reference2d->nx; ++x) {
+            *ref2dPrt = sqrtf(float(x * x) + float(y * y));
             ref2dPrt++;
         }
     }
 
-    // Create a corresponding deformation field
+    // Create a corresponding 2D deformation field
+    int dim_def[8] = {5, 1, 1, 1, 1, 2, 1, 1};
+    nifti_image *deformationField2D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField2D);
+    auto* def2dPrt = static_cast<float*>(deformationField2D->data);
+    def2dPrt[0] = 1.2;
+    def2dPrt[1] = 1.3;
 
     // Create a reference 3D image
-    dim[0] = 3; dim[3] = 2;
-    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    dim_flo[0] = 3; dim_flo[3] = 4;
+    nifti_image *reference3d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference3d);
 
     // Fill image with distance from identity
     auto *ref3dPrt = static_cast<float*>(reference3d->data);
-    for (float z = 0; z < reference3d->nz; ++z) {
-        for (float y = 0; y < reference3d->ny; ++y) {
-            for (float x = 0; x < reference3d->nx; ++x) {
-                *ref3dPrt = sqrtf(x * x + y * y + z * z);
+    for (auto z = 0; z < reference3d->nz; ++z) {
+        for (auto y = 0; y < reference3d->ny; ++y) {
+            for (auto x = 0; x < reference3d->nx; ++x) {
+                *ref3dPrt = sqrtf(float(x * x) + float(y * y) + float(z * z));
                 ref3dPrt++;
             }
         }
     }
 
+    // Create a corresponding 2D deformation field
+    dim_def[5] = 3;
+    nifti_image *deformationField3D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField3D);
+    auto* def3dPrt = static_cast<float*>(deformationField3D->data);
+    def3dPrt[0] = 1.2;
+    def3dPrt[1] = 1.3;
+    def3dPrt[2] = 1.4;
+
     // Generate the different use cases
     std::vector<TestData> testCases;
 
-    // Identity use case - 2D
-    // First create an identity displacement field and then convert it into a deformation
-    nifti_image *idField2d = nifti_copy_nim_info(reference2d);
-    idField2d->ndim = idField2d->dim[0] = 5;
-    idField2d->nu = idField2d->dim[5] = 2;
-    idField2d->nvox = CalcVoxelNumber(*idField2d, idField2d->ndim);
-    idField2d->data = (void *)calloc(idField2d->nvox, idField2d->nbyper);
-    reg_getDeformationFromDisplacement(idField2d);
-    float res2[4];
-    memcpy(res2, reference2d->data, reference2d->nvox * sizeof(float));
+    // Linear interpolation - 2D
+    // coordinate in image: [1.2, 1.3]
+    auto *res_linear_2d = new float[1];
+    res_linear_2d[0] = 0;
+    for (auto y=1; y<2; ++y){
+        for (auto x=1; x<2; ++x){
+            res_linear_2d[0] += ref2dPrt[y*dim_flo[1]+
+                                         x] *
+                                abs(2.0 - (float)x - 0.2) *
+                                abs(2.0 - (float)y - 0.3);
+        }
+    }
+
     // create the test case
     testCases.emplace_back(TestData(
-        "identity 2D",
+        "Linear 2D",
         reference2d,
-        idField2d,
-        res2)
+        deformationField2D,
+        res_linear_2d)
     );
 
-    // Identity use case - 3D
-    nifti_image *idField3d = nifti_copy_nim_info(reference3d);
-    idField3d->ndim = idField3d->dim[0] = 5;
-    idField3d->nu = idField3d->dim[5] = 3;
-    idField3d->nvox = CalcVoxelNumber(*idField3d, idField3d->ndim);
-    idField3d->data = calloc(idField3d->nvox, idField3d->nbyper);
-    reg_getDeformationFromDisplacement(idField3d);
-    float res3[8];
-    memcpy(res3, reference3d->data, reference3d->nvox * sizeof(float));
+    // Linear interpolation - 23D
+    // coordinate in image: [1.2, 1.3, 1.4]
+    auto *res_linear_3d = new float[1];
+    res_linear_3d[0] = 0;
+    for (auto z=1; z<2; ++z){
+        for (auto y=1; y<2; ++y){
+            for (auto x=1; x<2; ++x) {
+                res_linear_3d[0] += ref2dPrt[z * dim_flo[1]* dim_flo[2] +
+                                             y * dim_flo[1] +
+                                             x] *
+                                    abs(2.0 - (float) x - 0.2) *
+                                    abs(2.0 - (float) y - 0.3) *
+                                    abs(2.0 - (float) z - 0.4);
+            }
+        }
+    }
+
     // create the test case
     testCases.emplace_back(TestData(
-        "identity 3D",
-        reference3d,
-        idField3d,
-        res3)
+            "Linear 3D",
+            reference3d,
+            deformationField3D,
+            res_linear_3d)
     );
 
     // Loop over all generated test cases to create all content and run all tests

From a6d0f9dd883a12dc663b0652e24e54cbb4e8615e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Feb 2023 16:24:29 +0000
Subject: [PATCH 048/314] Fix a bug incorrectly choosing 2D/3D resampling

---
 niftyreg_build_version.txt      |  2 +-
 reg-lib/cpu/_reg_resampling.cpp | 20 +++++++-------------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9386c220..9cc2bc3e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-161
+163
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 6c2ae4ca..fc6a4587 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -392,7 +392,7 @@ void ResampleImage3D(nifti_image *floatingImage,
         kernel_size=2;
         kernelCompFctPtr=&interpNearestNeighKernel;
         kernel_offset=0;
-        break; // nereast-neighboor interpolation
+        break; // nearest-neighbour interpolation
     case 1:
         kernel_size=2;
         kernelCompFctPtr=&interpLinearKernel;
@@ -594,7 +594,7 @@ void ResampleImage2D(nifti_image *floatingImage,
         kernel_size=2;
         kernelCompFctPtr=&interpNearestNeighKernel;
         kernel_offset=0;
-        break; // nereast-neighboor interpolation
+        break; // nearest-neighbour interpolation
     case 1:
         kernel_size=2;
         kernelCompFctPtr=&interpLinearKernel;
@@ -746,7 +746,7 @@ void reg_resampleImage2(nifti_image *floatingImage,
                                                    dtIndicies);
 
     // The deformation field contains the position in the real world
-    if(deformationFieldImage->nz>1)
+    if(deformationFieldImage->nu>2)
     {
         ResampleImage3D<FloatingTYPE,FieldTYPE>(floatingImage,
                                                 deformationFieldImage,
@@ -1071,7 +1071,7 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
         kernel_size=2;
         kernelCompFctPtr=&interpNearestNeighKernel;
         kernel_offset=0;
-        break; // nereast-neighboor interpolation
+        break; // nearest-neighbour interpolation
     case 1:
         kernel_size=2;
         kernelCompFctPtr=&interpLinearKernel;
@@ -1391,7 +1391,7 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
         kernel_size=2;
         kernelCompFctPtr=&interpNearestNeighKernel;
         kernel_offset=0;
-        break; // nereast-neighboor interpolation
+        break; // nearest-neighbour interpolation
     case 1:
         kernel_size=2;
         kernelCompFctPtr=&interpLinearKernel;
@@ -1773,10 +1773,8 @@ void reg_resampleImage2_PSF(nifti_image *floatingImage,
                             mat33 * jacMat,
                             char algorithm)
 {
-
     // The deformation field contains the position in the real world
-
-    if(deformationFieldImage->nz>1)
+    if(deformationFieldImage->nu>2)
     {
         if(algorithm==2){
 #ifndef NDEBUG
@@ -1801,8 +1799,6 @@ void reg_resampleImage2_PSF(nifti_image *floatingImage,
                                                         interp,
                                                         jacMat,
                                                         algorithm);
-
-
         }
     }
     else
@@ -3542,9 +3538,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img,
     def->dim[0]=def->ndim=5;
     def->dim[4]=def->nt=1;
     def->pixdim[4]=def->dt=1.0;
-    if(newImg->nz==1)
-        def->dim[5]=def->nu=2;
-    else def->dim[5]=def->nu=3;
+    def->dim[5]=def->nu=newImg->nz>1?3:2;
     def->pixdim[5]=def->du=1.0;
     def->dim[6]=def->nv=1;
     def->pixdim[6]=def->dv=1.0;

From 2137d0349f6d4cd16d3bf32298af462f55bfebe2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Feb 2023 16:29:06 +0000
Subject: [PATCH 049/314] Add test cases for interpolation

 - Linear interpolation 2D/3D
 - Nearest neighbour interpolation 2D/3D
 - Cubic spline interpolation 2D/3D
---
 niftyreg_build_version.txt          |   2 +-
 reg-test/reg_test_interpolation.cpp | 218 +++++++++++++++++++---------
 2 files changed, 154 insertions(+), 66 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9cc2bc3e..4e9bdff0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-163
+164
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index a5f64344..0afef586 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -23,110 +23,193 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image*, nifti_image*, float*> TestData;
+typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
 typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> ContentDesc;
 
+template <typename T>
+void interpCubicSplineKernel(T relative, T (&basis)[4]) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    const T relative2 = relative * relative;
+    basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f;
+    basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f;
+    basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f;
+    basis[3] = (relative - 1.f) * relative2 / 2.f;
+}
+
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
-    int dim_flo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true);
+    int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
+    nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2d);
 
     // Fill image with distance from identity
-    auto* ref2dPrt = static_cast<float*>(reference2d->data);
+    auto *ref2dPtr = static_cast<float*>(reference2d->data);
     for (auto y = 0; y < reference2d->ny; ++y) {
         for (auto x = 0; x < reference2d->nx; ++x) {
-            *ref2dPrt = sqrtf(float(x * x) + float(y * y));
-            ref2dPrt++;
+            *ref2dPtr = sqrtf(float(x * x) + float(y * y));
+            ref2dPtr++;
         }
     }
 
     // Create a corresponding 2D deformation field
-    int dim_def[8] = {5, 1, 1, 1, 1, 2, 1, 1};
-    nifti_image *deformationField2D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField2D);
-    auto* def2dPrt = static_cast<float*>(deformationField2D->data);
-    def2dPrt[0] = 1.2;
-    def2dPrt[1] = 1.3;
+    int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
+    nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField2d);
+    auto *def2dPtr = static_cast<float*>(deformationField2d->data);
+    def2dPtr[0] = 1.2f;
+    def2dPtr[1] = 1.3f;
 
     // Create a reference 3D image
-    dim_flo[0] = 3; dim_flo[3] = 4;
-    nifti_image *reference3d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true);
+    dimFlo[0] = 3; dimFlo[3] = 4;
+    nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference3d);
 
     // Fill image with distance from identity
-    auto *ref3dPrt = static_cast<float*>(reference3d->data);
+    auto *ref3dPtr = static_cast<float*>(reference3d->data);
     for (auto z = 0; z < reference3d->nz; ++z) {
         for (auto y = 0; y < reference3d->ny; ++y) {
             for (auto x = 0; x < reference3d->nx; ++x) {
-                *ref3dPrt = sqrtf(float(x * x) + float(y * y) + float(z * z));
-                ref3dPrt++;
+                *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z));
+                ref3dPtr++;
             }
         }
     }
 
-    // Create a corresponding 2D deformation field
-    dim_def[5] = 3;
-    nifti_image *deformationField3D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField3D);
-    auto* def3dPrt = static_cast<float*>(deformationField3D->data);
-    def3dPrt[0] = 1.2;
-    def3dPrt[1] = 1.3;
-    def3dPrt[2] = 1.4;
+    // Create a corresponding 3D deformation field
+    dimDef[5] = 3;
+    nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField3d);
+    auto *def3dPtr = static_cast<float*>(deformationField3d->data);
+    def3dPtr[0] = 1.2f;
+    def3dPtr[1] = 1.3f;
+    def3dPtr[2] = 1.4f;
 
     // Generate the different use cases
     std::vector<TestData> testCases;
 
     // Linear interpolation - 2D
     // coordinate in image: [1.2, 1.3]
-    auto *res_linear_2d = new float[1];
-    res_linear_2d[0] = 0;
-    for (auto y=1; y<2; ++y){
-        for (auto x=1; x<2; ++x){
-            res_linear_2d[0] += ref2dPrt[y*dim_flo[1]+
-                                         x] *
-                                abs(2.0 - (float)x - 0.2) *
-                                abs(2.0 - (float)y - 0.3);
+    float resLinear2d[1] = {0};
+    ref2dPtr = static_cast<float*>(reference2d->data);
+    for (int y = 1; y <= 2; ++y) {
+        for (int x = 1; x <= 2; ++x) {
+            resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] *
+                abs(2.0f - (float)x - 0.2f) *
+                abs(2.0f - (float)y - 0.3f);
         }
     }
-
     // create the test case
     testCases.emplace_back(TestData(
         "Linear 2D",
         reference2d,
-        deformationField2D,
-        res_linear_2d)
+        deformationField2d,
+        1,
+        resLinear2d)
+    );
+
+    // Nearest neighbour interpolation - 2D
+    // coordinate in image: [1.2, 1.3]
+    float resNearest2d[1];
+    resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1];
+    // create the test case
+    testCases.emplace_back(TestData(
+        "Nearest Neighbour 2D",
+        reference2d,
+        deformationField2d,
+        0,
+        resNearest2d)
+    );
+
+    // Cubic spline interpolation - 2D
+    // coordinate in image: [1.2, 1.3]
+    float resCubic2d[1] = {0};
+    float xBasis[4], yBasis[4];
+    interpCubicSplineKernel(0.2f, xBasis);
+    interpCubicSplineKernel(0.3f, yBasis);
+    for (int y = 0; y <= 3; ++y) {
+        float resX = 0;
+        for (int x = 0; x <= 3; ++x) {
+            resX += ref2dPtr[y * dimFlo[1] + x] * xBasis[x];
+        }
+        resCubic2d[0] += resX * yBasis[y];
+    }
+
+    // create the test case
+    testCases.emplace_back(TestData(
+        "Cubic Spline 2D",
+        reference2d,
+        deformationField2d,
+        3,
+        resCubic2d)
+    );
+
+    // Linear interpolation - 3D
+    // coordinate in image: [1.2, 1.3, 1.4]
+    float resLinear3d[1] = {0};
+    ref3dPtr = static_cast<float*>(reference3d->data);
+    for (int z = 1; z <= 2; ++z) {
+        for (int y = 1; y <= 2; ++y) {
+            for (int x = 1; x <= 2; ++x) {
+                resLinear3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] *
+                    abs(2.0f - (float)x - 0.2f) *
+                    abs(2.0f - (float)y - 0.3f) *
+                    abs(2.0f - (float)z - 0.4f);
+            }
+        }
+    }
+
+    // create the test case
+    testCases.emplace_back(TestData(
+        "Linear 3D",
+        reference3d,
+        deformationField3d,
+        1,
+        resLinear3d)
     );
 
-    // Linear interpolation - 23D
+    // Nearest neighbour interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
-    auto *res_linear_3d = new float[1];
-    res_linear_3d[0] = 0;
-    for (auto z=1; z<2; ++z){
-        for (auto y=1; y<2; ++y){
-            for (auto x=1; x<2; ++x) {
-                res_linear_3d[0] += ref2dPrt[z * dim_flo[1]* dim_flo[2] +
-                                             y * dim_flo[1] +
-                                             x] *
-                                    abs(2.0 - (float) x - 0.2) *
-                                    abs(2.0 - (float) y - 0.3) *
-                                    abs(2.0 - (float) z - 0.4);
+    float resNearest3d[1];
+    resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1];
+    // create the test case
+    testCases.emplace_back(TestData(
+        "Nearest Neighbour 3D",
+        reference3d,
+        deformationField3d,
+        0,
+        resNearest3d)
+    );
+
+    // Cubic spline interpolation - 3D
+    // coordinate in image: [1.2, 1.3, 1.4]
+    float resCubic3d[1] = {0};
+    float zBasis[4];
+    interpCubicSplineKernel(0.4f, zBasis);
+    for (int z = 0; z <= 3; ++z) {
+        float resY = 0;
+        for (int y = 0; y <= 3; ++y) {
+            float resX = 0;
+            for (int x = 0; x <= 3; ++x) {
+                resX += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x];
             }
+            resY += resX * yBasis[y];
         }
+        resCubic3d[0] += resY * zBasis[z];
     }
 
     // create the test case
     testCases.emplace_back(TestData(
-            "Linear 3D",
-            reference3d,
-            deformationField3D,
-            res_linear_3d)
+        "Cubic Spline 3D",
+        reference3d,
+        deformationField3d,
+        3,
+        resCubic3d)
     );
 
     // Loop over all generated test cases to create all content and run all tests
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [testName, reference, defField, testResult] = testCase;
+        auto&& [testName, reference, defField, interp, testResult] = testCase;
 
         // Accumulate all required contents with a vector
         std::vector<ContentDesc> contentDescs;
@@ -136,12 +219,19 @@ TEST_CASE("Resampling", "[resampling]") {
             std::unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference) };
             contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
         }
+
         // Loop over all possibles contents for each test
         for (auto&& contentDesc : contentDescs) {
             auto&& [content, platform] = contentDesc;
             SECTION(testName + " " + platform->GetName()) {
                 // Create and set a warped image to host the computation
-                nifti_image *warped = nifti_copy_nim_info(reference);
+                nifti_image *warped = nifti_copy_nim_info(defField);
+                warped->ndim = warped->dim[0] = defField->nu;
+                warped->dim[1] = warped->nx = 1;
+                warped->dim[2] = warped->ny = 1;
+                warped->dim[3] = warped->nz = 1;
+                warped->dim[5] = warped->nu = 1;
+                warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
                 warped->data = malloc(warped->nvox * warped->nbyper);
                 content->SetWarped(warped);
                 // Set the deformation field
@@ -149,17 +239,15 @@ TEST_CASE("Resampling", "[resampling]") {
                 // Initialise the platform to run current content and retrieve deformation field
                 std::unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
                 // args = interpolation and padding
-                std::list<int> interp = { 0, 1, 3 };
-                for (auto it : interp) {
-                    resampleKernel->castTo<ResampleImageKernel>()->Calculate(it, 0);
-                    warped = content->GetWarped();
-
-                    // Check all values
-                    auto *warpedPtr = static_cast<float*>(warped->data);
-                    for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) {
-                        std::cout << i << " " << static_cast<float*>(reference->data)[i] << " " << warpedPtr[i] << " " << testResult[i] << std::endl;
-                        REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE);
-                    }
+
+                resampleKernel->castTo<ResampleImageKernel>()->Calculate(interp, 0);
+                warped = content->GetWarped();
+
+                // Check all values
+                auto *warpedPtr = static_cast<float*>(warped->data);
+                for (size_t i = 0; i < warped->nvox; ++i) {
+                    std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE);
                 }
             }
         }

From aec5c7ebd209d8e3e818ea278483a63c79594b62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 16 Feb 2023 14:28:25 +0000
Subject: [PATCH 050/314] Upgrade C++ standard version to C++17 for CUDA

---
 reg-lib/cuda/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 8d63ab53..0f8156e3 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -36,7 +36,8 @@ elseif(RUN_RESULT_VAR)
     return()
 else(NOT COMPILE_RESULT_VAR)
     message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
-    set(CUDA_NVCC_FLAGS "")
+    # Set C++ standard version for CUDA
+    set(CUDA_NVCC_FLAGS "-std=c++17")
     #check cuda version and adjust compile flags
     if("${RUN_OUTPUT_VAR}" LESS "30")
         set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)

From c4c71481ff791ca35f926cd97ac305a465be3fe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Feb 2023 16:30:44 +0000
Subject: [PATCH 051/314] Refactorisations

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_average.cpp                      |  18 +-
 reg-apps/reg_benchmark.cpp                    |   8 +-
 reg-apps/reg_jacobian.cpp                     |   4 +-
 reg-apps/reg_measure.cpp                      |   4 +-
 reg-apps/reg_ppcnr.cpp                        |   6 +-
 reg-apps/reg_resample.cpp                     |  11 +-
 reg-apps/reg_tools.cpp                        |  24 +--
 reg-apps/reg_transform.cpp                    |  32 ++--
 reg-lib/Content.cpp                           |   5 +-
 reg-lib/_reg_aladin.cpp                       |   4 +-
 reg-lib/_reg_aladin_sym.cpp                   |   2 +-
 reg-lib/_reg_f3d.cpp                          |   2 +-
 reg-lib/_reg_f3d2.cpp                         |   2 +-
 reg-lib/cl/ClAladinContent.cpp                |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  26 +--
 reg-lib/cpu/_reg_localTrans_jac.cpp           |   9 +-
 reg-lib/cpu/_reg_resampling.cpp               |   4 +-
 reg-lib/cpu/_reg_tools.cpp                    |   2 +-
 reg-lib/cpu/_reg_tools.h                      |   2 +
 reg-lib/cuda/_reg_common_cuda.cu              |  55 +++---
 reg-lib/cuda/_reg_common_cuda.h               |  45 +++--
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   2 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |   2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           | 162 +++++++++---------
 reg-lib/cuda/_reg_tools_gpu.cu                |   2 +-
 .../reg_test_affine_deformation_field.cpp     |  10 +-
 reg-test/reg_test_blockMatching.cpp           |   8 +-
 .../reg_test_bspline_deformation_field.cpp    |   4 +-
 ...est_coherence_affine_deformation_field.cpp |  16 +-
 reg-test/reg_test_coherence_blockMatching.cpp |  12 +-
 reg-test/reg_test_coherence_interpolation.cpp |  16 +-
 .../reg_test_compose_deformation_field.cpp    |   2 +-
 reg-test/reg_test_computation_time.cpp        |  10 +-
 reg-test/reg_test_convolution.cpp             |   2 +-
 reg-test/reg_test_imageGradient.cpp           |   6 +-
 reg-test/reg_test_interpolation.cpp           |  10 +-
 reg-test/reg_test_leastTrimmedSquares.cpp     |   8 +-
 .../reg_test_linearElasticityGradient.cpp     |   4 +-
 reg-test/reg_test_mindDescriptor.cpp          |   2 +-
 reg-test/reg_test_mindsscDescriptor.cpp       |   2 +-
 .../reg_test_nonlinear_deformation_field.cpp  |   2 +-
 42 files changed, 265 insertions(+), 286 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4e9bdff0..9e42f3ef 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-164
+165
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 79801fa2..2f337399 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -61,7 +61,7 @@ void usage(char *exec)
    reg_print_info(exec, "\t-demean_noaff <referenceImage> <AffineMat1> <NonRigidTrans1> <floatingImage1> ...  <AffineMatN> <NonRigidTransN> <floatingImageN>");
    reg_print_info(exec, "\t\tSame as -demean expect that the specified affine is removed from the");
    reg_print_info(exec, "\t\tnon-linear (euclidean) transformation.");
-   reg_print_info(exec, "\t--NN\t\tUse nearest neighboor interpolation - cubic is default");
+   reg_print_info(exec, "\t--NN\t\tUse nearest neighbour interpolation - cubic is default");
    reg_print_info(exec, "\t--LIN\t\tUse linear interpolation - cubic is default");
    reg_print_info(exec, "\t--version\t\tPrint current version and exit");
    sprintf(text, "\t\t\t\t(%s)",NR_VERSION);
@@ -278,7 +278,7 @@ int compute_nrr_demean(nifti_image *demean_field,
       nifti_image *transformation = reg_io_ReadImageFile(inputNRRName[t]);
       // Generate the deformation or flow field
       nifti_image *deformationField = nifti_copy_nim_info(demean_field);
-      deformationField->data = (void *)calloc(deformationField->nvox,deformationField->nbyper);
+      deformationField->data = calloc(deformationField->nvox,deformationField->nbyper);
       reg_tools_multiplyValueToImage(deformationField,deformationField,0.f);
       deformationField->scl_slope=1.f;
       deformationField->scl_inter=0.f;
@@ -325,7 +325,7 @@ int compute_nrr_demean(nifti_image *demean_field,
          else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]);
          // The affine component is substracted
          nifti_image *tempField = nifti_copy_nim_info(deformationField);
-         tempField->data = (void *)malloc(tempField->nvox*tempField->nbyper);
+         tempField->data = malloc(tempField->nvox*tempField->nbyper);
          tempField->scl_slope=1.f;
          tempField->scl_inter=0.f;
          reg_affine_getDeformationField(&affineTransformation, tempField);
@@ -379,7 +379,7 @@ int compute_average_image(nifti_image *averageImage,
       demeanField->scl_slope=1.f;
       demeanField->scl_inter=0.f;
       demeanField->intent_p1=DISP_FIELD;
-      demeanField->data=(void *)calloc(demeanField->nvox, demeanField->nbyper);
+      demeanField->data=calloc(demeanField->nvox, demeanField->nbyper);
       compute_nrr_demean(demeanField, imageNumber, inputNRRName, inputAffName);
 #ifndef NDEBUG
       reg_print_msg_debug("Displacement field to use for demeaning computed");
@@ -390,7 +390,7 @@ int compute_average_image(nifti_image *averageImage,
    memset(averageImage->data, 0, averageImage->nvox*averageImage->nbyper);
    // Create an image to store the defined value number
    nifti_image *definedValue = nifti_copy_nim_info(averageImage);
-   definedValue->data = (void *)calloc(averageImage->nvox, averageImage->nbyper);
+   definedValue->data = calloc(averageImage->nvox, averageImage->nbyper);
    // Loop over all input images
    for(size_t i=0; i<imageNumber; ++i){
       // Generate a deformation field defined by the average final
@@ -407,7 +407,7 @@ int compute_average_image(nifti_image *averageImage,
       deformationField->scl_slope=1.f;
       deformationField->scl_inter=0.f;
       deformationField->intent_p1=DISP_FIELD;
-      deformationField->data=(void *)calloc(deformationField->nvox, deformationField->nbyper);
+      deformationField->data=calloc(deformationField->nvox, deformationField->nbyper);
       reg_tools_multiplyValueToImage(deformationField,deformationField,0.f);
       // Set the transformation to identity
       reg_getDeformationFromDisplacement(deformationField);
@@ -441,7 +441,7 @@ int compute_average_image(nifti_image *averageImage,
             if(deformationField->intent_p1==DEF_VEL_FIELD){
                reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
                nifti_image *tempDef = nifti_copy_nim_info(deformationField);
-               tempDef->data = (void *)malloc(tempDef->nvox*tempDef->nbyper);
+               tempDef->data = malloc(tempDef->nvox*tempDef->nbyper);
                memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper);
                tempDef->scl_slope=1.f;
                tempDef->scl_inter=0.f;
@@ -471,7 +471,7 @@ int compute_average_image(nifti_image *averageImage,
       nifti_image *warpedImage = nifti_copy_nim_info(averageImage);
       warpedImage->datatype = NIFTI_TYPE_FLOAT32;
       warpedImage->nbyper = sizeof(float);
-      warpedImage->data = (void *)malloc(warpedImage->nvox*warpedImage->nbyper);
+      warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper);
       // Read the input image
       nifti_image *current_input_image = reg_io_ReadImageFile(inputImageName[i]);
       reg_tools_changeDatatype<PrecisionTYPE>(current_input_image);
@@ -763,7 +763,7 @@ int main(int argc, char **argv)
       if(sizeof(PrecisionTYPE)==sizeof(double))
          avg_output_image->datatype=NIFTI_TYPE_FLOAT64;
       avg_output_image->nbyper=sizeof(PrecisionTYPE);
-      avg_output_image->data=(void *)calloc(avg_output_image->nvox,avg_output_image->nbyper);
+      avg_output_image->data=calloc(avg_output_image->nvox,avg_output_image->nbyper);
       reg_tools_multiplyValueToImage(avg_output_image, avg_output_image, 0.f);
       // Set the output filename
       nifti_set_filenames(avg_output_image, outputName, 0, 0);
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index ec09cc3c..2bde68ef 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -163,21 +163,21 @@ int main(int argc, char **argv)
    nifti_image *velocityFieldImage = nifti_copy_nim_info(controlPointImage);
    velocityFieldImage->datatype = NIFTI_TYPE_FLOAT32;
    velocityFieldImage->nbyper = sizeof(float);
-   velocityFieldImage->data = (void *)calloc(velocityFieldImage->nvox, velocityFieldImage->nbyper);
+   velocityFieldImage->data = calloc(velocityFieldImage->nvox, velocityFieldImage->nbyper);
 
    // Different gradient images
    nifti_image *resultGradientImage = nifti_copy_nim_info(deformationFieldImage);
    resultGradientImage->datatype = NIFTI_TYPE_FLOAT32;
    resultGradientImage->nbyper = sizeof(float);
-   resultGradientImage->data = (void *)calloc(resultGradientImage->nvox, resultGradientImage->nbyper);
+   resultGradientImage->data = calloc(resultGradientImage->nvox, resultGradientImage->nbyper);
    nifti_image *voxelNMIGradientImage = nifti_copy_nim_info(deformationFieldImage);
    voxelNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32;
    voxelNMIGradientImage->nbyper = sizeof(float);
-   voxelNMIGradientImage->data = (void *)calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper);
+   voxelNMIGradientImage->data = calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper);
    nifti_image *nodeNMIGradientImage = nifti_copy_nim_info(controlPointImage);
    nodeNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32;
    nodeNMIGradientImage->nbyper = sizeof(float);
-   nodeNMIGradientImage->data = (void *)calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper);
+   nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper);
 
 #ifdef _USE_CUDA
    float *targetImageArray_d;
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index 23033742..b4a5b8c7 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -292,7 +292,7 @@ int main(int argc, char **argv)
       jacobianImage->cal_max=0;
       jacobianImage->scl_slope = 1.0f;
       jacobianImage->scl_inter = 0.0f;
-      jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper);
+      jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper);
 
       switch((int)inputTransformation->intent_p1){
       case DISP_FIELD:
@@ -346,7 +346,7 @@ int main(int argc, char **argv)
       jacobianImage->cal_max=0;
       jacobianImage->scl_slope = 1.0f;
       jacobianImage->scl_inter = 0.0f;
-      jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper);
+      jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper);
 
       mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33));
       // Compute the map of Jacobian matrices
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index ab22e717..d1ac54a5 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -264,7 +264,7 @@ int main(int argc, char **argv)
    warpedFloImage->scl_slope=floImage->scl_slope;
    warpedFloImage->datatype=floImage->datatype;
    warpedFloImage->nbyper=floImage->nbyper;
-   warpedFloImage->data=(void *)malloc(warpedFloImage->nvox*warpedFloImage->nbyper);
+   warpedFloImage->data=malloc(warpedFloImage->nvox*warpedFloImage->nbyper);
 
    /* Create the deformation field */
    nifti_image *defField = nifti_copy_nim_info(refImage);
@@ -274,7 +274,7 @@ int main(int argc, char **argv)
    defField->nvox=CalcVoxelNumber(*defField, defField->ndim);
    defField->datatype=NIFTI_TYPE_FLOAT32;
    defField->nbyper=sizeof(float);
-   defField->data=(void *)calloc(defField->nvox,defField->nbyper);
+   defField->data=calloc(defField->nvox,defField->nbyper);
    defField->scl_slope=1.f;
    defField->scl_inter=0.f;
    reg_tools_multiplyValueToImage(defField,defField,0.f);
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index c691266b..02f4a228 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -192,7 +192,7 @@ int main(int argc, char **argv)
          makesource->ndim=makesource->dim[0] = 4;
          makesource->nt = makesource->dim[4] = atoi(argv[++i]);
          makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim);
-         makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper);
+         makesource->data = malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(makesource->data);
          for(int ii=0; ii<makesource->nt; ii++) // fill with file data
          {
@@ -864,10 +864,10 @@ int main(int argc, char **argv)
             stores->ndim=stores->dim[0]=3;
             stores->nt=stores->dim[4]=1;
             stores->nvox = CalcVoxelNumber(*stores, stores->ndim);
-            stores->data = (void *)calloc(stores->nvox,images->nbyper);
+            stores->data = calloc(stores->nvox,images->nbyper);
 
             nifti_image *storet = nifti_copy_nim_info(stores);
-            storet->data = (void *)calloc(storet->nvox, storet->nbyper);
+            storet->data = calloc(storet->nvox, storet->nbyper);
 
             // COPY THE APPROPRIATE VALUES
             PrecisionTYPE *intensityPtrPP = static_cast<PrecisionTYPE *>(storet->data); // 3D real source image (needs current cpp image)
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 18ad4863..888298c4 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -344,7 +344,7 @@ int main(int argc, char **argv)
       deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
       deformationFieldImage->nbyper = sizeof(float);
    }
-   deformationFieldImage->data = (void *)calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper);
+   deformationFieldImage->data = calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper);
 
    // Initialise the deformation field with an identity transformation
    reg_tools_multiplyValueToImage(deformationFieldImage,deformationFieldImage,0.f);
@@ -369,7 +369,7 @@ int main(int argc, char **argv)
       case DEF_VEL_FIELD:
          {
             nifti_image *tempFlowField = nifti_copy_nim_info(deformationFieldImage);
-            tempFlowField->data = (void *)malloc(tempFlowField->nvox*tempFlowField->nbyper);
+            tempFlowField->data = malloc(tempFlowField->nvox*tempFlowField->nbyper);
             memcpy(tempFlowField->data,deformationFieldImage->data,
                    tempFlowField->nvox*tempFlowField->nbyper);
             reg_defField_compose(inputTransformationImage,
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
       warpedImage->nbyper = floatingImage->nbyper;
       warpedImage->nvox = (size_t)warpedImage->dim[1] * warpedImage->dim[2] *
             warpedImage->dim[3] * warpedImage->dim[4] * warpedImage->dim[5];
-      warpedImage->data = (void *)calloc(warpedImage->nvox, warpedImage->nbyper);
+      warpedImage->data = calloc(warpedImage->nvox, warpedImage->nbyper);
 
       if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor)
       {
@@ -536,7 +536,7 @@ int main(int argc, char **argv)
       gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim);
       gridImage->datatype = NIFTI_TYPE_UINT8;
       gridImage->nbyper = sizeof(unsigned char);
-      gridImage->data = (void *)calloc(gridImage->nvox, gridImage->nbyper);
+      gridImage->data = calloc(gridImage->nvox, gridImage->nbyper);
       unsigned char *gridImageValuePtr = static_cast<unsigned char *>(gridImage->data);
       for(int z=0; z<gridImage->nz; z++)
       {
@@ -590,8 +590,7 @@ int main(int argc, char **argv)
       warpedImage->dim[5]=warpedImage->nu=1;
       warpedImage->datatype =NIFTI_TYPE_UINT8;
       warpedImage->nbyper = sizeof(unsigned char);
-      warpedImage->data = (void *)calloc(warpedImage->nvox,
-                                         warpedImage->nbyper);
+      warpedImage->data = calloc(warpedImage->nvox, warpedImage->nbyper);
       reg_resampleImage(gridImage,
                         warpedImage,
                         deformationFieldImage,
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index fda62a49..02ed8b09 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -498,7 +498,7 @@ int main(int argc, char **argv)
     {
         reg_tools_changeDatatype<float>(image);
         nifti_image *normImage = nifti_copy_nim_info(image);
-        normImage->data = (void *)malloc(normImage->nvox * normImage->nbyper);
+        normImage->data = malloc(normImage->nvox * normImage->nbyper);
         memcpy(normImage->data, image->data, normImage->nvox*normImage->nbyper);
         reg_heapSort(static_cast<float *>(normImage->data), normImage->nvox);
         float minValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(03*(int)normImage->nvox/100))];
@@ -516,7 +516,7 @@ int main(int argc, char **argv)
     if(flag->smoothGaussianFlag || flag->smoothSplineFlag || flag->smoothMeanFlag)
     {
         nifti_image *smoothImg = nifti_copy_nim_info(image);
-        smoothImg->data = (void *)malloc(smoothImg->nvox * smoothImg->nbyper);
+        smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper);
         memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper);
         float *kernelSize = new float[smoothImg->nt*smoothImg->nu];
         bool *timePoint = new bool[smoothImg->nt*smoothImg->nu];
@@ -556,7 +556,7 @@ int main(int argc, char **argv)
     if(flag->smoothLabFlag)
     {
         nifti_image *smoothImg = nifti_copy_nim_info(image);
-        smoothImg->data = (void *)malloc(smoothImg->nvox * smoothImg->nbyper);
+        smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper);
         memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper);
 
         bool *timePoint = new bool[smoothImg->nt*smoothImg->nu];
@@ -632,7 +632,7 @@ int main(int argc, char **argv)
         }
 
         nifti_image *outputImage = nifti_copy_nim_info(image);
-        outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
+        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
 
         if(image2!=nullptr)
         {
@@ -735,7 +735,7 @@ int main(int argc, char **argv)
         }
 
         nifti_image *outputImage = nifti_copy_nim_info(image);
-        outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
+        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
 
         reg_tools_nanMask_image(image,maskImage,outputImage);
 
@@ -894,7 +894,7 @@ int main(int argc, char **argv)
         def->nvox = CalcVoxelNumber(*def, def->ndim);
         def->nbyper = sizeof(float);
         def->datatype = NIFTI_TYPE_FLOAT32;
-        def->data = (void *)calloc(def->nvox,def->nbyper);
+        def->data = calloc(def->nvox,def->nbyper);
         // Fill the deformation field with an identity transformation
         reg_getDeformationFromDisplacement(def);
         // Allocate and compute the Jacobian matrices
@@ -949,7 +949,7 @@ int main(int argc, char **argv)
             reg_tools_changeDatatype<float>(image);
         // Create a temporary scaled image
         nifti_image *scaledImage = nifti_copy_nim_info(image);
-        scaledImage->data = (void *)malloc(scaledImage->nvox * scaledImage->nbyper);
+        scaledImage->data = malloc(scaledImage->nvox * scaledImage->nbyper);
         // Rescale the input image
         float min_value = reg_tools_getMinValue(image, -1);
         float max_value = reg_tools_getMaxValue(image, -1);
@@ -962,7 +962,7 @@ int main(int argc, char **argv)
         outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
         outputImage->datatype = NIFTI_TYPE_RGB24;
         outputImage->nbyper = 3 * sizeof(unsigned char);
-        outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox);
+        outputImage->data = malloc(outputImage->nbyper*outputImage->nvox);
         // Convert the image
         float *inPtr = static_cast<float *>(scaledImage->data);
         unsigned char *outPtr = static_cast<unsigned char *>(outputImage->data);
@@ -1004,7 +1004,7 @@ int main(int argc, char **argv)
         outputImage->scl_inter = 0.f;
         outputImage->cal_min = 0.f;
         outputImage->cal_max = 255.f;
-        outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox);
+        outputImage->data = malloc(outputImage->nbyper*outputImage->nvox);
         // Convert the image
         float *inPtr = static_cast<float *>(image->data);
         unsigned char *outPtr = static_cast<unsigned char *>(outputImage->data);
@@ -1043,7 +1043,7 @@ int main(int argc, char **argv)
         outputImage->dim[0]=outputImage->ndim=4;
         outputImage->dim[4]=outputImage->nt=image->nz>1?6:4;
         outputImage->nvox=(size_t)image->nvox*outputImage->nt;
-        outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
+        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
         GetMINDImageDescriptor(image, outputImage, mask, 1, 0);
@@ -1070,7 +1070,7 @@ int main(int argc, char **argv)
         outputImage->dim[0]=outputImage->ndim=4;
         outputImage->dim[4]=outputImage->nt=image->nz>1?12:4;
         outputImage->nvox=(size_t)image->nvox*outputImage->nt;
-        outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper);
+        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND-SSC descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
         GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0);
@@ -1108,7 +1108,7 @@ int main(int argc, char **argv)
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
         outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
         outputImage->cal_min=0;
-        outputImage->data = (void *)calloc(outputImage->nbyper, outputImage->nvox);
+        outputImage->data = calloc(outputImage->nbyper, outputImage->nvox);
         float *inPtr = static_cast<float *>(image->data);
         float *outPtr = static_cast<float *>(outputImage->data);
         // Iterate through the blocks
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 4c760e3b..ec533193 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -403,8 +403,7 @@ int main(int argc, char **argv)
          outputTransformationImage=nifti_copy_nim_info(inputTransformationImage);
       }
       // Allocate the output field data array
-      outputTransformationImage->data=(void *)malloc
-                                      (outputTransformationImage->nvox*outputTransformationImage->nbyper);
+      outputTransformationImage->data=malloc(outputTransformationImage->nvox*outputTransformationImage->nbyper);
       // Create a flow field image
       if(flag->outputFlowFlag)
       {
@@ -705,8 +704,7 @@ int main(int argc, char **argv)
          memset(output1TransImage->intent_name, 0, 16);
          strcpy(output1TransImage->intent_name,"NREG_TRANS");
          output1TransImage->intent_p1=DEF_FIELD;
-         output1TransImage->data=(void *)calloc
-                                 (output1TransImage->nvox,output1TransImage->nbyper);
+         output1TransImage->data=calloc(output1TransImage->nvox,output1TransImage->nbyper);
          if(affine1Trans!=nullptr)
          {
             reg_affine_getDeformationField(affine1Trans,output1TransImage);
@@ -779,8 +777,7 @@ int main(int argc, char **argv)
             memset(output2TransImage->intent_name, 0, 16);
             strcpy(output2TransImage->intent_name,"NREG_TRANS");
             output2TransImage->intent_p1=DEF_FIELD;
-            output2TransImage->data=(void *)calloc
-                                    (output2TransImage->nvox,output2TransImage->nbyper);
+            output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
             reg_affine_getDeformationField(affine2Trans,output2TransImage);
             reg_defField_compose(output2TransImage,output1TransImage,nullptr);
          }
@@ -830,8 +827,7 @@ int main(int argc, char **argv)
                output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim);
                output2TransImage->nbyper=output1TransImage->nbyper;
                output2TransImage->datatype=output1TransImage->datatype;
-               output2TransImage->data=(void *)calloc
-                                       (output2TransImage->nvox,output2TransImage->nbyper);
+               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
                printf("[NiftyReg] Transformation 2 is a spline velocity field parametrisation:\n[NiftyReg] %s\n",
                       input2TransImage->fname);
                reg_spline_getDefFieldFromVelocityGrid(input2TransImage,
@@ -845,8 +841,7 @@ int main(int argc, char **argv)
                       input2TransImage->fname);
                output2TransImage=nifti_copy_nim_info(input2TransImage);
                output2TransImage->intent_p1=DEF_FIELD;
-               output2TransImage->data=(void *)calloc
-                                       (output2TransImage->nvox,output2TransImage->nbyper);
+               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
                reg_defField_getDeformationFieldFromFlowField(input2TransImage,
                      output2TransImage,
                      false // the number of step is not automatically updated
@@ -858,8 +853,7 @@ int main(int argc, char **argv)
                       input2TransImage->fname);
                output2TransImage=nifti_copy_nim_info(input2TransImage);
                output2TransImage->intent_p1=DEF_FIELD;
-               output2TransImage->data=(void *)calloc
-                                       (output2TransImage->nvox,output2TransImage->nbyper);
+               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
                reg_getDeformationFromDisplacement(input2TransImage);
                reg_defField_getDeformationFieldFromFlowField(input2TransImage,
                      output2TransImage,
@@ -979,8 +973,7 @@ int main(int argc, char **argv)
          deformationFieldImage=nifti_copy_nim_info(inputTransformationImage);
       }
       // Allocate the deformation field
-      deformationFieldImage->data=(void *)malloc
-            (deformationFieldImage->nvox*deformationFieldImage->nbyper);
+      deformationFieldImage->data=malloc(deformationFieldImage->nvox*deformationFieldImage->nbyper);
       // Fill the deformation field
       if(affineTransformation!=nullptr)
       {
@@ -1095,7 +1088,7 @@ int main(int argc, char **argv)
       landmarkImage->ny=landmarkImage->dim[2]=1;
       landmarkImage->nz=landmarkImage->dim[3]=1;
       landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim);
-      landmarkImage->data=(void *)malloc(landmarkImage->nvox*landmarkImage->nbyper);
+      landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper);
       float *landmarkImagePtr = static_cast<float *>(landmarkImage->data);
       for(size_t l=0, index=0;l<landmarkNumber;++l){
          for(size_t i=0;i<n;++i){
@@ -1297,7 +1290,7 @@ int main(int argc, char **argv)
          }
          tempField->scl_slope=1.f;
          tempField->scl_inter=0.f;
-         tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper);
+         tempField->data=calloc(tempField->nvox,tempField->nbyper);
          // Compute the dense field
          if(inputTransImage->intent_p1==LIN_SPLINE_GRID ||
                inputTransImage->intent_p1==CUB_SPLINE_GRID)
@@ -1330,8 +1323,7 @@ int main(int argc, char **argv)
      outputTransImage->intent_p2 = inputTransImage->intent_p2;
      outputTransImage->scl_slope = 1.f;
      outputTransImage->scl_inter = 0.f;
-     outputTransImage->data = (void *)malloc
-        (outputTransImage->nvox*outputTransImage->nbyper);
+     outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper);
       // Invert the provided
       switch(reg_round(inputTransImage->intent_p1))
       {
@@ -1352,7 +1344,7 @@ int main(int argc, char **argv)
          // create a temp deformation field containing an identity transformation
          nifti_image *tempField=nifti_copy_nim_info(outputTransImage);
          tempField->intent_p1=DEF_FIELD;
-         tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper);
+         tempField->data=calloc(tempField->nvox,tempField->nbyper);
          reg_getDeformationFromDisplacement(tempField);
          reg_getDisplacementFromDeformation(inputTransImage);
          reg_resampleGradient(inputTransImage,
@@ -1372,7 +1364,7 @@ int main(int argc, char **argv)
          // create a temp deformation field containing an identity transformation
          nifti_image *tempField=nifti_copy_nim_info(outputTransImage);
          tempField->intent_p1=DEF_FIELD;
-         tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper);
+         tempField->data=calloc(tempField->nvox,tempField->nbyper);
          reg_getDeformationFromDisplacement(tempField);
          reg_resampleGradient(inputTransImage,
                               outputTransImage,
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index e772f87e..145c9e1e 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -54,10 +54,7 @@ void Content::AllocateDeformationField(size_t bytes) {
         deformationField->dim[3] = deformationField->nz = 1;
     deformationField->dim[4] = deformationField->nt = 1;
     deformationField->pixdim[4] = deformationField->dt = 1;
-    if (reference->nz == 1)
-        deformationField->dim[5] = deformationField->nu = 2;
-    else
-        deformationField->dim[5] = deformationField->nu = 3;
+    deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2;
     deformationField->pixdim[5] = deformationField->du = 1;
     deformationField->dim[6] = deformationField->nv = 1;
     deformationField->pixdim[6] = deformationField->dv = 1;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index ff73e6c9..f8a812c4 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -450,7 +450,7 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       unsigned int blockPercentage,
                                       unsigned int inlierLts,
                                       unsigned int blockStepSize) {
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
     this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
     this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
 }
@@ -575,7 +575,7 @@ nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
     resultImage->cal_max = this->inputFloating->cal_max;
     resultImage->scl_slope = this->inputFloating->scl_slope;
     resultImage->scl_inter = this->inputFloating->scl_inter;
-    resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper);
+    resultImage->data = malloc(resultImage->nvox * resultImage->nbyper);
     memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper);
 
     reg_aladin<T>::DeallocateKernels();
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 7ea18cfa..fd61974d 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -248,7 +248,7 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                               inlierLts,
                               blockStepSize);
 
-   std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
+   unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
    this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
    this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index d5412c5e..28f75860 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -106,7 +106,7 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    std::unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
+    unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
     this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
     this->compute = this->platform->CreateCompute(*this->con);
 }
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 2128bc23..05dca3ac 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -84,7 +84,7 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    std::unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
+    unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
     conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
     computeBw = this->platform->CreateCompute(*conBw);
 }
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 8836c5dc..a2d51605 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -285,7 +285,7 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int
     free(image->data);
     image->datatype = type;
     image->nbyper = sizeof(T);
-    image->data = (void *)malloc(image->nvox * image->nbyper);
+    image->data = malloc(image->nvox * image->nbyper);
     T* dataT = static_cast<T*>(image->data);
     for (size_t i = 0; i < size; ++i)
         dataT[i] = FillWarpedImageData<T>(buffer[i], type);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index e5b42432..2a5eb57a 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1999,7 +1999,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
    splineControlPoint->dim[3]=splineControlPoint->nz=1;
 
    splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
-   splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
+   splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
    gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
    SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
    SplineTYPE *oldGridPtrX = &oldGrid[0];
@@ -2102,7 +2102,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
       splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3;
    }
    splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
-   splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
+   splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
 
    const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
    gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
@@ -3406,7 +3406,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
             pars[2] += delta[2];
             // end added
 
-            optimize(cost_function, pars, (void *)&dat, tolerance);
+            optimize(cost_function, pars, &dat, tolerance);
             // output = (warp-1)(input);
 
             outData[0]        = pars[0];
@@ -4008,7 +4008,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
       {
          // Create a field that contains the affine component only
          affineOnly = nifti_copy_nim_info(deformationFieldImage);
-         affineOnly->data = (void *)calloc(affineOnly->nvox,affineOnly->nbyper);
+         affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper);
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                affineOnly,
                false);
@@ -4134,7 +4134,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
    {
       // Create an image to store the flow field
       nifti_image *flowField = nifti_copy_nim_info(deformationFieldImage);
-      flowField->data = (void *)calloc(flowField->nvox,flowField->nbyper);
+      flowField->data = calloc(flowField->nvox,flowField->nbyper);
       flowField->intent_code=NIFTI_INTENT_VECTOR;
       memset(flowField->intent_name, 0, 16);
       strcpy(flowField->intent_name,"NREG_TRANS");
@@ -4173,7 +4173,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
    {
       // Create an image to store the flow field
       nifti_image *flowFieldImage = nifti_copy_nim_info(deformationFieldImage[0]);
-      flowFieldImage->data = (void *)calloc(flowFieldImage->nvox,flowFieldImage->nbyper);
+      flowFieldImage->data = calloc(flowFieldImage->nvox,flowFieldImage->nbyper);
       flowFieldImage->intent_code=NIFTI_INTENT_VECTOR;
       memset(flowFieldImage->intent_name, 0, 16);
       strcpy(flowFieldImage->intent_name,"NREG_TRANS");
@@ -4193,7 +4193,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
          {
             // Create a field that contains the affine component only
             affineOnly = nifti_copy_nim_info(deformationFieldImage[0]);
-            affineOnly->data = (void *)calloc(affineOnly->nvox,affineOnly->nbyper);
+            affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper);
             reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                   affineOnly,
                   false);
@@ -4370,8 +4370,8 @@ void compute_lie_bracket(nifti_image *img1,
    nifti_image *one_two = nifti_copy_nim_info(img2);
    nifti_image *two_one = nifti_copy_nim_info(img1);
    // Set the temporary images to zero displacement
-   one_two->data=(void *)calloc(one_two->nvox, one_two->nbyper);
-   two_one->data=(void *)calloc(two_one->nvox, two_one->nbyper);
+   one_two->data=calloc(one_two->nvox, one_two->nbyper);
+   two_one->data=calloc(two_one->nvox, two_one->nbyper);
    // Compute the displacement from img1
    reg_spline_cppComposition(img1,
                              two_one,
@@ -4465,7 +4465,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
 
       // r <- 2 + 1 + 0.5[2,1]
       nifti_image *lie_bracket_img2_img1=nifti_copy_nim_info(img1);
-      lie_bracket_img2_img1->data=(void *)malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper);
+      lie_bracket_img2_img1->data=malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper);
       compute_lie_bracket<DTYPE>(img2, img1, lie_bracket_img2_img1, use_jac);
       DTYPE *lie_bracket_img2_img1Ptr=static_cast<DTYPE *>(lie_bracket_img2_img1->data);
  #if defined (_OPENMP)
@@ -4480,7 +4480,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
       {
          // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12
          nifti_image *lie_bracket_img2_lie1=nifti_copy_nim_info(lie_bracket_img2_img1);
-         lie_bracket_img2_lie1->data=(void *)malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper);
+         lie_bracket_img2_lie1->data=malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper);
          compute_lie_bracket<DTYPE>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
          DTYPE *lie_bracket_img2_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img2_lie1->data);
  #if defined (_OPENMP)
@@ -4495,7 +4495,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
          {
             // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12
             nifti_image *lie_bracket_img1_lie1=nifti_copy_nim_info(lie_bracket_img2_img1);
-            lie_bracket_img1_lie1->data=(void *)malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper);
+            lie_bracket_img1_lie1->data=malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper);
             compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
             DTYPE *lie_bracket_img1_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie1->data);
  #if defined (_OPENMP)
@@ -4511,7 +4511,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
             {
                // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24
                nifti_image *lie_bracket_img1_lie2=nifti_copy_nim_info(lie_bracket_img2_lie1);
-               lie_bracket_img1_lie2->data=(void *)malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper);
+               lie_bracket_img1_lie2->data=malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper);
                compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
                DTYPE *lie_bracket_img1_lie2Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie2->data);
  #if defined (_OPENMP)
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index eefcac8f..7711b0ed 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -1247,7 +1247,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
    }
    else detNumber = CalcVoxelNumber(*referenceImage);
 
-   void *JacobianDetermiantArray=(void *)malloc(detNumber*splineControlPoint->nbyper);
+   void *JacobianDetermiantArray=malloc(detNumber*splineControlPoint->nbyper);
 
    // The jacobian determinants are computed
    if(splineControlPoint->nz==1)
@@ -2982,8 +2982,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
 
    // A second field is allocated to store the deformation
    nifti_image *defFieldImage = nifti_copy_nim_info(flowFieldImage);
-   defFieldImage->data = (void *)malloc(defFieldImage->nvox *
-                                        defFieldImage->nbyper);
+   defFieldImage->data = malloc(defFieldImage->nvox * defFieldImage->nbyper);
 
    // Remove the affine component from the flow field
    if(flowFieldImage->num_ext>0)
@@ -3130,7 +3129,7 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2;
    flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
-   flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
+   flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
    reg_spline_getFlowFieldFromVelocityGrid(velocityGridImage,
@@ -3187,7 +3186,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage,
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2;
    flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
-   flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
+   flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
    reg_spline_getFlowFieldFromVelocityGrid(velocityGridImage,
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index fc6a4587..5835c229 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -136,7 +136,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
         const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
 
-        *originalFloatingData=(void *)malloc(floatingImage->nvox*sizeof(DTYPE));
+        *originalFloatingData=malloc(floatingImage->nvox*sizeof(DTYPE));
         memcpy(*originalFloatingData,
                floatingImage->data,
                floatingImage->nvox*sizeof(DTYPE));
@@ -3547,7 +3547,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img,
     def->nvox = CalcVoxelNumber(*def, def->ndim);
     def->nbyper = sizeof(float);
     def->datatype = NIFTI_TYPE_FLOAT32;
-    def->data = (void *)calloc(def->nvox,def->nbyper);
+    def->data = calloc(def->nvox,def->nbyper);
     // Fill the deformation field with an identity transformation
     reg_getDeformationFromDisplacement(def);
     // resample the original image into the space of the new image
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 8671a456..7e723256 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1634,7 +1634,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
                     position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
                     position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
                     if (oldDim[3] == 1) position[2] = 0;
-                    // Nearest neighboor is used as downsampling ratio is constant
+                    // Nearest neighbour is used as downsampling ratio is constant
                     intensity = std::numeric_limits<ImageTYPE>::quiet_NaN();
                     if (-1 < position[0] && position[0] < oldDim[1] &&
                         -1 < position[1] && position[1] < oldDim[2] &&
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index aa419d7d..59d467c2 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -19,6 +19,8 @@
 #include <memory>
 #include "_reg_maths.h"
 
+using std::unique_ptr;
+
 typedef enum {
     MEAN_KERNEL,
     LINEAR_KERNEL,
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 40baab4c..3178cf40 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -10,10 +10,9 @@
  */
 
 #include "_reg_common_cuda.h"
-#include "_reg_tools.h"
 #include "_reg_blocksize_gpu.h"
 
- /* ******************************** */
+/* *************************************************************** */
 template <class NIFTI_TYPE>
 int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) {
     const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE);
@@ -37,7 +36,7 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *
 }
 template int cudaCommon_transferNiftiToNiftiOnDevice1<float>(nifti_image*, nifti_image*);
 template int cudaCommon_transferNiftiToNiftiOnDevice1<double>(nifti_image*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -51,7 +50,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) {
     }
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -95,7 +94,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, nifti_imag
 template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -111,7 +110,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, ni
     }
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -165,7 +164,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif
 template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, nifti_image*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -187,7 +186,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *
     }
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -240,7 +239,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray*, nifti_imag
 template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, nifti_image*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -271,7 +270,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cu
     }
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -345,7 +344,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
 template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, nifti_image*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) {
     const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
@@ -356,7 +355,7 @@ int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) {
 template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, int*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) {
     const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
@@ -368,7 +367,7 @@ int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2
 template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, int*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) {
     const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
@@ -379,7 +378,7 @@ template int cudaCommon_allocateArrayToDevice<float>(float**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(double**, int*);
 template int cudaCommon_allocateArrayToDevice<int>(int**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(float4**, int*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) {
     const unsigned int memSize = vox * sizeof(DTYPE);
@@ -390,7 +389,7 @@ template int cudaCommon_allocateArrayToDevice<float>(float**, int);
 template int cudaCommon_allocateArrayToDevice<double>(double**, int);
 template int cudaCommon_allocateArrayToDevice<int>(int**, int);
 template int cudaCommon_allocateArrayToDevice<float4>(float4**, int); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) {
     const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
@@ -401,7 +400,7 @@ int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim
 template int cudaCommon_allocateArrayToDevice<float>(float**, float**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(double**, double**, int*);
 template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, int*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) {
     NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost));
@@ -409,7 +408,7 @@ int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsign
 }
 template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned int nElements);
 template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned int nElements);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -424,7 +423,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) {
 }
 template int cudaCommon_transferFromDeviceToNifti1<float, float>(nifti_image *img, float *array_d);
 template int cudaCommon_transferFromDeviceToNifti1<double, double>(nifti_image *img, double *array_d);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -472,7 +471,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
 template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*);
 template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*);
 template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 template<>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) {
     if (img->datatype != NIFTI_TYPE_FLOAT32) {
@@ -490,7 +489,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d)
     NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE, class NIFTI_TYPE>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
     if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
@@ -506,7 +505,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYP
     }
     return EXIT_SUCCESS;
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
     if (sizeof(DTYPE) == sizeof(float4)) {
@@ -572,11 +571,11 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE
 template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*, float*);
 template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*, double*);
 template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*, float4*); // for deformation field
-/* ******************************** */
+/* *************************************************************** */
 void cudaCommon_free(cudaArray *cuArray_d) {
     NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d));
 }
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 void cudaCommon_free(DTYPE *array_d) {
     NR_CUDA_SAFE_CALL(cudaFree(array_d));
@@ -585,7 +584,7 @@ template void cudaCommon_free<int>(int*);
 template void cudaCommon_free<float>(float*);
 template void cudaCommon_free<double>(double*);
 template void cudaCommon_free<float4>(float4*);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
@@ -594,7 +593,7 @@ int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img)
 template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, nifti_image*);
 template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, nifti_image*);
 template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
@@ -603,7 +602,7 @@ int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, cons
 template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, int*, const unsigned);
 template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, float*, const unsigned);
 template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, double*, const unsigned);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) {
     const unsigned int memSize = nElements * sizeof(DTYPE);
@@ -613,7 +612,7 @@ int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, co
 template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsigned int);
 template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned int);
 template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned int);
-/* ******************************** */
+/* *************************************************************** */
 template <class DTYPE>
 int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) {
     const unsigned int memSize = nElements * sizeof(DTYPE);
@@ -623,4 +622,4 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, co
 template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsigned int);
 template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned int);
 template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned int);
-/* ******************************** */
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 113aa619..ea834349 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -9,19 +9,18 @@
 
 #pragma once
 
-#include "nifti1_io.h"
 #include "cuda_runtime.h"
 #include "cuda.h"
-#include "_reg_maths.h"
+#include "_reg_tools.h"
 
-/* ******************************** */
+/* *************************************************************** */
 #ifndef __VECTOR_TYPES_H__
 #define __VECTOR_TYPES_H__
 struct __attribute__((aligned(4))) float4 {
     float x, y, z, w;
 };
 #endif
-/* ******************************** */
+/* *************************************************************** */
 #if CUDART_VERSION >= 3200
 #   define NR_CUDA_SAFE_CALL(call) { \
 		call; \
@@ -68,74 +67,74 @@ struct __attribute__((aligned(4))) float4 {
 		} \
 	}
 #endif //CUDART_VERSION >= 3200
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(cudaArray**, int*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE**, int);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE**, int*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 void cudaCommon_free(cudaArray*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++" template <class DTYPE>
 void cudaCommon_free(DTYPE*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int);
-/* ******************************** */
+/* *************************************************************** */
 extern "C++"
 template <class DTYPE>
 int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
-/* ******************************** */
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 644f4fdd..a55d8463 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -49,7 +49,7 @@ void reg_affine_positionField_gpu(	mat44 *affineMatrix,
     }
     NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice));
 	cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4));
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)transformationMatrix_h));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h));
 
         const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField));
     dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1);
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 1d0566de..541bcf66 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -248,7 +248,7 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
         gg += sum_h[i].y;
     }
     float gam = (float)(dgg / gg);
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)sum_h));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(sum_h));
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float)));
     const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2));
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 520dc7f7..df4954ef 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -13,7 +13,6 @@
 #include "_reg_resampling_gpu.h"
 #include "_reg_resampling_kernels.cu"
 
-/* *************************************************************** */
 /* *************************************************************** */
 void reg_resampleImage_gpu(nifti_image *floatingImage,
                            float *warpedImageArray_d,
@@ -21,16 +20,15 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                            float4 *deformationFieldImageArray_d,
                            int *mask_d,
                            int activeVoxelNumber,
-                           float paddingValue)
-{
+                           float paddingValue) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim,&floatingDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue,&paddingValue,sizeof(float)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
 
     //Bind floating image array to a 3D texture
     floatingTexture.normalized = false;
@@ -40,74 +38,69 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     floatingTexture.addressMode[2] = cudaAddressModeWrap;
 
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc))
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc));
 
     //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4)));
 
     //Bind mask to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
 
     // Bind the real to voxel matrix to texture
     mat44 *floatingMatrix;
-    if(floatingImage->sform_code>0)
-        floatingMatrix=&(floatingImage->sto_ijk);
-    else floatingMatrix=&(floatingImage->qto_ijk);
-    float4 *floatingRealToVoxel_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3*sizeof(float4)))
+    if (floatingImage->sform_code > 0)
+        floatingMatrix = &(floatingImage->sto_ijk);
+    else floatingMatrix = &(floatingImage->qto_ijk);
+    float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4)));
     float4 *floatingRealToVoxel_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3*sizeof(float4)))
-    for(int i=0; i<3; i++){
-        floatingRealToVoxel_h[i].x=floatingMatrix->m[i][0];
-        floatingRealToVoxel_h[i].y=floatingMatrix->m[i][1];
-        floatingRealToVoxel_h[i].z=floatingMatrix->m[i][2];
-        floatingRealToVoxel_h[i].w=floatingMatrix->m[i][3];
+    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4)));
+    for (int i = 0; i < 3; i++) {
+        floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0];
+        floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1];
+        floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2];
+        floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3];
     }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3*sizeof(float4), cudaMemcpyHostToDevice))
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)floatingRealToVoxel_h))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4)));
 
-    if(floatingImage->nz>1){
+    if (floatingImage->nz > 1) {
         const unsigned int Grid_reg_resamplefloatingImage3D =
-                (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage3D));
-        dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1);
-        dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1);
-        reg_resampleImage3D_kernel <<< G1, B1 >>> (warpedImageArray_d);
-		cudaDeviceSynchronize();
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-	else{
+            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D));
+        dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1);
+        dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1);
+        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
         const unsigned int Grid_reg_resamplefloatingImage2D =
-                (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage2D));
-        dim3 B1(NR_BLOCK->Block_reg_resampleImage2D,1,1);
-        dim3 G1(Grid_reg_resamplefloatingImage2D,Grid_reg_resamplefloatingImage2D,1);
-        reg_resampleImage2D_kernel <<< G1, B1 >>> (warpedImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture))
-
-    NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d))
+            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D));
+        dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1);
+        dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1);
+        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_getImageGradient_gpu(nifti_image *floatingImage,
                               cudaArray *floatingImageArray_d,
                               float4 *deformationFieldImageArray_d,
                               float4 *warpedGradientArray_d,
                               int activeVoxelNumber,
-                              float paddingValue)
-{
+                              float paddingValue) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float)));
 
     //Bind floating image array to a 3D texture
     floatingTexture.normalized = true;
@@ -117,47 +110,46 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     floatingTexture.addressMode[2] = cudaAddressModeWrap;
 
     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc))
+    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc));
 
     //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4)));
 
     // Bind the real to voxel matrix to texture
     mat44 *floatingMatrix;
-    if(floatingImage->sform_code>0)
-        floatingMatrix=&(floatingImage->sto_ijk);
-    else floatingMatrix=&(floatingImage->qto_ijk);
-    float4 *floatingRealToVoxel_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3*sizeof(float4)))
+    if (floatingImage->sform_code > 0)
+        floatingMatrix = &(floatingImage->sto_ijk);
+    else floatingMatrix = &(floatingImage->qto_ijk);
+    float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4)));
     float4 *floatingRealToVoxel_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3*sizeof(float4)))
-    for(int i=0; i<3; i++){
-        floatingRealToVoxel_h[i].x=floatingMatrix->m[i][0];
-        floatingRealToVoxel_h[i].y=floatingMatrix->m[i][1];
-        floatingRealToVoxel_h[i].z=floatingMatrix->m[i][2];
-        floatingRealToVoxel_h[i].w=floatingMatrix->m[i][3];
+    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4)));
+    for (int i = 0; i < 3; i++) {
+        floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0];
+        floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1];
+        floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2];
+        floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3];
     }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3*sizeof(float4), cudaMemcpyHostToDevice))
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)floatingRealToVoxel_h))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3*sizeof(float4)))
-    if(floatingImage->nz>1){
-        const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient3D));
-        dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D,1,1);
-		dim3 G1(Grid_reg_getImageGradient3D,Grid_reg_getImageGradient3D,1);
-        reg_getImageGradient3D_kernel <<< G1, B1 >>> (warpedGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-	else{
-        const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient2D));
-        dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D,1,1);
-		dim3 G1(Grid_reg_getImageGradient2D,Grid_reg_getImageGradient2D,1);
-        reg_getImageGradient2D_kernel <<< G1, B1 >>> (warpedGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
-	}
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture))
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture))
-
-    cudaFree(floatingRealToVoxel_d);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4)));
+
+    if (floatingImage->nz > 1) {
+        const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D));
+        dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1);
+        dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1);
+        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D));
+        dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1);
+        dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1);
+        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d));
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 9459ecbf..0c2c511a 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -72,7 +72,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
     float4 *matrix_d;
     NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4)))
     NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice))
-    NR_CUDA_SAFE_CALL(cudaFreeHost((void *)matrix_h))
+    NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h))
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4)))
 
     const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace =
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index df7b0274..78229415 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -21,7 +21,7 @@
 
 
 typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
-typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> ContentDesc;
+typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Affine deformation field", "[AffineDefField]") {
     // Create a reference 2D image
@@ -158,9 +158,9 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         // Accumulate all required contents with a vector
         std::vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
-            std::unique_ptr<Platform> platform{ new Platform(platformType) };
-            std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-            std::unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
+            unique_ptr<Platform> platform{ new Platform(platformType) };
+            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
             contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
         }
         // Loop over all possibles contents for each test
@@ -168,7 +168,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
             auto&& [content, platform] = contentDesc;
             SECTION(testName + " " + platform->GetName()) {
                 // Initialise the platform to run current content and retrieve deformation field
-                std::unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
+                unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
                 nifti_image *defField = content->GetDeformationField();
 
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index ab5a8fef..a14411df 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -68,7 +68,7 @@ void check_matching_difference(int dim,
 }
 
 void test(AladinContent *con, Platform *platform) {
-    std::unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
+    unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
     blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 }
 
@@ -117,9 +117,9 @@ int main(int argc, char **argv) {
     _reg_blockMatchingParam* blockMatchingParams;
 
     // Platforms
-    std::unique_ptr<Platform> platform{ new Platform(platformType) };
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-    std::unique_ptr<AladinContent> con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    unique_ptr<Platform> platform{ new Platform(platformType) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContent> con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
     con->SetWarped(warpedImage);
     //con->SetWarped(referenceImage);
     test(con.get(), platform.get());
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
index 2cde3356..fa3a888e 100644
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ b/reg-test/reg_test_bspline_deformation_field.cpp
@@ -49,7 +49,7 @@ int main(int argc, char **argv)
 
     // Create a deformation field
     nifti_image *test_field = nifti_copy_nim_info(expectedDefField);
-    test_field->data = (void *)malloc(test_field->nvox*test_field->nbyper);
+    test_field->data = malloc(test_field->nvox*test_field->nbyper);
 
     if(useComposition)
     {
@@ -76,7 +76,7 @@ int main(int argc, char **argv)
 
     // Compute the difference between the computed and expected deformation fields
     nifti_image *diff_field = nifti_copy_nim_info(expectedDefField);
-    diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper);
+    diff_field->data = malloc(diff_field->nvox*diff_field->nbyper);
     reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index 96b83577..f1960fca 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -54,17 +54,17 @@ int main(int argc, char **argv) {
 
     // Create a deformation field
     nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_cpu->data = (void *)malloc(test_field_cpu->nvox * test_field_cpu->nbyper);
+    test_field_cpu->data = malloc(test_field_cpu->nvox * test_field_cpu->nbyper);
 
     nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_gpu->data = (void *)malloc(test_field_gpu->nvox * test_field_gpu->nbyper);
+    test_field_gpu->data = malloc(test_field_gpu->nvox * test_field_gpu->nbyper);
 
     // Compute the affine deformation field
-    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    std::unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
-    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
+    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
+    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
 
     //Check if the platform used is double capable
     bool isDouble = conGpu->IsCurrentComputationDoubleCapable();
@@ -83,7 +83,7 @@ int main(int argc, char **argv) {
 
     // Compute the difference between the computed and inputted deformation field
     nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
-    diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper);
+    diff_field->data = malloc(diff_field->nvox * diff_field->nbyper);
     reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
index 3e581b81..7c9ce127 100644
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ b/reg-test/reg_test_coherence_blockMatching.cpp
@@ -78,7 +78,7 @@ void check_matching_difference(int dim,
 }
 
 void test(AladinContent *con, Platform *platform) {
-    std::unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
+    unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
     blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
 }
 
@@ -120,8 +120,8 @@ int main(int argc, char **argv) {
     for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i;
 
     // CPU Platform
-    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    std::unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
     conCpu->SetWarped(warpedImage);
     test(conCpu.get(), platformCpu.get());
     _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams();
@@ -132,9 +132,9 @@ int main(int argc, char **argv) {
 #endif
 
     // GPU Platform
-    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
+    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
     conGpu->SetWarped(warpedImage);
     test(conGpu.get(), platformGpu.get());
     _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams();
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 07fbc7d5..04007080 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -58,25 +58,25 @@ int main(int argc, char **argv) {
     int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int));
 
     // CPU platform
-    std::unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    std::unique_ptr<AladinContent> conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) };
+    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
+    unique_ptr<AladinContent> conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) };
     conCpu->SetWarped(cpuWarped);
     conCpu->SetDeformationField(inputDeformationField);
     conCpu->SetReferenceMask(tempMask);
-    std::unique_ptr<Kernel> resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) };
+    unique_ptr<Kernel> resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) };
     resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
     cpuWarped = conCpu->GetWarped();
 
     // GPU platform
-    std::unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    std::unique_ptr<AladinContent> conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) };
+    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContent> conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) };
     conGpu->SetWarped(gpuWarped);
     conGpu->SetDeformationField(inputDeformationField);
     conGpu->SetReferenceMask(tempMask);
 
-    std::unique_ptr<Kernel> resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) };
+    unique_ptr<Kernel> resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) };
     resampleImageKernel_gpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
                                                                       std::numeric_limits<float>::quiet_NaN());
     gpuWarped = conGpu->GetWarped();
@@ -89,7 +89,7 @@ int main(int argc, char **argv) {
 
     // Compute the difference between the warped images
     nifti_image *diff_field = nifti_copy_nim_info(referenceImage);
-    diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper);
+    diff_field->data = malloc(diff_field->nvox * diff_field->nbyper);
 
     // Compute the difference between the computed and inputted warped image
     reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field);
diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp
index 2833f82e..d3081015 100644
--- a/reg-test/reg_test_compose_deformation_field.cpp
+++ b/reg-test/reg_test_compose_deformation_field.cpp
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
 
    // Create a deformation field
    nifti_image *test_field=nifti_copy_nim_info(inputDeformationField);
-   test_field->data=(void *)malloc(test_field->nvox*test_field->nbyper);
+   test_field->data=malloc(test_field->nvox*test_field->nbyper);
    memcpy(test_field->data, inputDeformationField->data, test_field->nvox*test_field->nbyper);
 
    // Compute the non-linear deformation field
diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp
index ace1f4f3..f883e70f 100644
--- a/reg-test/reg_test_computation_time.cpp
+++ b/reg-test/reg_test_computation_time.cpp
@@ -50,7 +50,7 @@ int main(int argc, char **argv)
 
     // Allocate a warped image
     nifti_image *warpedImage = nifti_copy_nim_info(inputImageOne);
-    warpedImage->data = (void *)malloc(warpedImage->nvox*warpedImage->nbyper);
+    warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper);
 
     // Create mask
     int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int));
@@ -61,11 +61,11 @@ int main(int argc, char **argv)
     defFieldOne->nt=defFieldOne->dim[4]=1;
     defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2;
     defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim);
-    defFieldOne->data = (void *)malloc(defFieldOne->nvox*defFieldOne->nbyper);
+    defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper);
     nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne);
-    defFieldTwo->data = (void *)malloc(defFieldTwo->nvox*defFieldTwo->nbyper);
+    defFieldTwo->data = malloc(defFieldTwo->nvox*defFieldTwo->nbyper);
     nifti_image *defFieldThr=nifti_copy_nim_info(defFieldOne);
-    defFieldThr->data = (void *)malloc(defFieldThr->nvox*defFieldThr->nbyper);
+    defFieldThr->data = malloc(defFieldThr->nvox*defFieldThr->nbyper);
 
 
     // Generate a control point grids
@@ -79,7 +79,7 @@ int main(int argc, char **argv)
                                       inputImageOne,
                                       spacing);
     nifti_image *splineGridTwo = nifti_copy_nim_info(splineGridOne);
-    splineGridTwo->data = (void *)malloc(splineGridTwo->nvox*splineGridTwo->nbyper);
+    splineGridTwo->data = malloc(splineGridTwo->nvox*splineGridTwo->nbyper);
 
     // Generate an affine matrix
     mat44 affine;reg_mat44_eye(&affine);
diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp
index 065261b6..7d0e25b1 100644
--- a/reg-test/reg_test_convolution.cpp
+++ b/reg-test/reg_test_convolution.cpp
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
 
     // Compute the difference between the computed and expected deformation fields
     nifti_image *diff_file = nifti_copy_nim_info(expectedFile);
-    diff_file->data = (void *) malloc(diff_file->nvox*diff_file->nbyper);
+    diff_file->data = malloc(diff_file->nvox*diff_file->nbyper);
     reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file);
     reg_tools_abs_image(diff_file);
     double max_difference = reg_tools_getMaxValue(diff_file, -1);
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index f0fb9ced..2254836d 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -44,13 +44,13 @@ int main(int argc, char **argv)
     gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim);
     gradientImage->nbyper=sizeof(float);
     gradientImage->datatype=NIFTI_TYPE_FLOAT32;
-    gradientImage->data=(void *)malloc(gradientImage->nvox*gradientImage->nbyper);
+    gradientImage->data=malloc(gradientImage->nvox*gradientImage->nbyper);
 
     // Allocate a temporary file to compute the gradient's timepoint one at the time
     nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage);
     tempGradImage->dim[4]=tempGradImage->nt=1;
     tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim);
-    tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper);
+    tempGradImage->data=malloc(tempGradImage->nvox*tempGradImage->nbyper);
 
     // Declare a deformation field image
     nifti_image *defFieldImage = nullptr;
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
         strcpy(defFieldImage->intent_name,"NREG_TRANS");
         defFieldImage->intent_p1=DISP_FIELD;
         // Set the deformation field to identity
-        defFieldImage->data = (void *)calloc(defFieldImage->nvox, defFieldImage->nbyper);
+        defFieldImage->data = calloc(defFieldImage->nvox, defFieldImage->nbyper);
         reg_getDeformationFromDisplacement(defFieldImage);
     }
 
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 0afef586..eb6e9e5b 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -24,7 +24,7 @@
 
 
 typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
-typedef std::tuple<std::unique_ptr<AladinContent>, std::unique_ptr<Platform>> ContentDesc;
+typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
 
 template <typename T>
 void interpCubicSplineKernel(T relative, T (&basis)[4]) {
@@ -214,9 +214,9 @@ TEST_CASE("Resampling", "[resampling]") {
         // Accumulate all required contents with a vector
         std::vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
-            std::unique_ptr<Platform> platform{ new Platform(platformType) };
-            std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-            std::unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference) };
+            unique_ptr<Platform> platform{ new Platform(platformType) };
+            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference) };
             contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
         }
 
@@ -237,7 +237,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 // Set the deformation field
                 content->SetDeformationField(defField);
                 // Initialise the platform to run current content and retrieve deformation field
-                std::unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
+                unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
                 // args = interpolation and padding
 
                 resampleKernel->castTo<ResampleImageKernel>()->Calculate(interp, 0);
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
index 921c1b2f..b175350d 100644
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ b/reg-test/reg_test_leastTrimmedSquares.cpp
@@ -25,7 +25,7 @@ int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max
 }
 
 void test(AladinContent *con, Platform *platform, bool isAffine) {
-    std::unique_ptr<Kernel> optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) };
+    unique_ptr<Kernel> optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) };
     optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
 }
 
@@ -59,9 +59,9 @@ int main(int argc, char **argv) {
     mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename);
 
     // Platform
-    std::unique_ptr<Platform> platform{ new Platform(platformType) };
-    std::unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-    std::unique_ptr<AladinContent> con{ contentCreator->Create() };
+    unique_ptr<Platform> platform{ new Platform(platformType) };
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+    unique_ptr<AladinContent> con{ contentCreator->Create() };
 
     float max_difference = 0;
     unsigned int num_points = m1;
diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp
index 17a0d9da..4265b270 100644
--- a/reg-test/reg_test_linearElasticityGradient.cpp
+++ b/reg-test/reg_test_linearElasticityGradient.cpp
@@ -36,7 +36,7 @@ int main(int argc, char **argv)
 
     // Compute the linear elasticity gradient
     nifti_image *obtainedGradient = nifti_copy_nim_info(expectedGradientImage);
-    obtainedGradient->data=(void *)calloc(obtainedGradient->nvox,obtainedGradient->nbyper);
+    obtainedGradient->data=calloc(obtainedGradient->nvox,obtainedGradient->nbyper);
     switch(computationType){
     case 0: // Approximation based on the control point grid
        reg_spline_approxLinearEnergyGradient(transImage,
@@ -60,7 +60,7 @@ int main(int argc, char **argv)
     }
     // Compute the difference between the computed and expected gradient
     nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient);
-    diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper);
+    diff_field->data = malloc(diff_field->nvox*diff_field->nbyper);
     reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp
index bd33496c..09a94729 100644
--- a/reg-test/reg_test_mindDescriptor.cpp
+++ b/reg-test/reg_test_mindDescriptor.cpp
@@ -40,7 +40,7 @@ int main(int argc, char **argv)
     MIND_img->ndim = MIND_img->dim[0] = 4;
     MIND_img->nt = MIND_img->dim[4] = 2*dim;
     MIND_img->nvox = MIND_img->nvox*2*dim;
-    MIND_img->data=(void *)calloc(MIND_img->nvox,MIND_img->nbyper);
+    MIND_img->data=calloc(MIND_img->nvox,MIND_img->nbyper);
 
     // Compute the MIND descriptor
     int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp
index 2da9a047..161b14a1 100644
--- a/reg-test/reg_test_mindsscDescriptor.cpp
+++ b/reg-test/reg_test_mindsscDescriptor.cpp
@@ -44,7 +44,7 @@ int main(int argc, char **argv)
     MINDSSC_img->ndim = MINDSSC_img->dim[0] = 4;
     MINDSSC_img->nt = MINDSSC_img->dim[4] = lengthDescritor;
     MINDSSC_img->nvox = MINDSSC_img->nvox*lengthDescritor;
-    MINDSSC_img->data=(void *)calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper);
+    MINDSSC_img->data=calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper);
 
     // Compute the MIND descriptor
     int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp
index 63e47f20..d208b353 100644
--- a/reg-test/reg_test_nonlinear_deformation_field.cpp
+++ b/reg-test/reg_test_nonlinear_deformation_field.cpp
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 
    // Create a deformation field
    nifti_image *test_field=nifti_copy_nim_info(inputDeformationField);
-   test_field->data=(void *)malloc(test_field->nvox*test_field->nbyper);
+   test_field->data=malloc(test_field->nvox*test_field->nbyper);
 
    // Compute the non-linear deformation field
    memset(test_field->data, 0, test_field->nvox*test_field->nbyper);

From 235dece12032b6e01552368c3815055216dd8c03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 16 Feb 2023 15:23:20 +0000
Subject: [PATCH 052/314] Add cudaCommon_createTextureObject() to create
 managed CUDA texture objects

---
 niftyreg_build_version.txt       |  2 +-
 reg-lib/cuda/_reg_common_cuda.cu | 53 ++++++++++++++++++++++++++++++++
 reg-lib/cuda/_reg_common_cuda.h  | 14 +++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9e42f3ef..cdffbbc4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-165
+166
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 3178cf40..2ae6debd 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -623,3 +623,56 @@ template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsi
 template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned int);
 template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned int);
 /* *************************************************************** */
+void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
+    NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj));
+    delete texObj;
+}
+/* *************************************************************** */
+UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
+                                                      cudaResourceType resType,
+                                                      bool normalizedCoordinates,
+                                                      size_t size,
+                                                      cudaChannelFormatKind channelFormat,
+                                                      unsigned channelCount,
+                                                      cudaTextureFilterMode filterMode) {
+    // Specify texture
+    cudaResourceDesc resDesc{};
+    resDesc.resType = resType;
+    switch (resType) {
+    case cudaResourceTypeLinear:
+        resDesc.res.linear.devPtr = devPtr;
+        resDesc.res.linear.desc.f = channelFormat;
+        resDesc.res.linear.desc.x = 32;
+        if (channelCount > 1)
+            resDesc.res.linear.desc.y = 32;
+        if (channelCount > 2)
+            resDesc.res.linear.desc.z = 32;
+        if (channelCount > 3)
+            resDesc.res.linear.desc.w = 32;
+        resDesc.res.linear.sizeInBytes = size;
+        break;
+    case cudaResourceTypeArray:
+        resDesc.res.array.array = static_cast<cudaArray*>(devPtr);
+        break;
+    default:
+        reg_print_fct_error("reg_createTextureObject");
+        reg_print_msg_error("Unsupported resource type");
+        reg_exit();
+    }
+
+    // Specify texture object parameters
+    cudaTextureDesc texDesc{};
+    texDesc.addressMode[0] = cudaAddressModeWrap;
+    texDesc.addressMode[1] = cudaAddressModeWrap;
+    texDesc.addressMode[2] = cudaAddressModeWrap;
+    texDesc.filterMode = filterMode;
+    texDesc.readMode = cudaReadModeElementType;
+    texDesc.normalizedCoords = normalizedCoordinates;
+
+    // Create texture object
+    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), &cudaCommon_destroyTextureObject);
+    NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr));
+
+    return texObj;
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index ea834349..18845c32 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -138,3 +138,17 @@ extern "C++"
 template <class DTYPE>
 int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
 /* *************************************************************** */
+extern "C++"
+void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj);
+/* *************************************************************** */
+using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, decltype(&cudaCommon_destroyTextureObject)>;
+/* *************************************************************** */
+extern "C++"
+UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
+													  cudaResourceType resType,
+													  bool normalizedCoordinates = false,
+													  size_t size = 0,
+													  cudaChannelFormatKind channelFormat = cudaChannelFormatKindNone,
+													  unsigned channelCount = 1,
+													  cudaTextureFilterMode filterMode = cudaFilterModeLinear);
+/* *************************************************************** */

From abaf91d256851f91b379843f830c4c3c2382c6ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 16 Feb 2023 15:27:55 +0000
Subject: [PATCH 053/314] Modernise CUDA resampling functions

 - Ditch old texture objects and use up-to-date ones
 - Make texture objects managed
 - Ditch CUDA symbols and pass them as kernel function parameters
---
 niftyreg_build_version.txt              |   2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu     | 106 ++------
 reg-lib/cuda/_reg_resampling_gpu.h      |   4 +-
 reg-lib/cuda/_reg_resampling_kernels.cu | 319 ++++++++++++------------
 4 files changed, 187 insertions(+), 244 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cdffbbc4..f2c1eeeb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-166
+167
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index df4954ef..877f275e 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -19,137 +19,85 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                            cudaArray *floatingImageArray_d,
                            float4 *deformationFieldImageArray_d,
                            int *mask_d,
-                           int activeVoxelNumber,
+                           size_t activeVoxelNumber,
                            float paddingValue) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+    // Create texture object for the floating image
+    auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
 
-    //Bind floating image array to a 3D texture
-    floatingTexture.normalized = false;
-    floatingTexture.filterMode = cudaFilterModeLinear;
-    floatingTexture.addressMode[0] = cudaAddressModeWrap;
-    floatingTexture.addressMode[1] = cudaAddressModeWrap;
-    floatingTexture.addressMode[2] = cudaAddressModeWrap;
+    // Create texture object for the deformation field
+    auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+                                                                    false, activeVoxelNumber * sizeof(float4),
+                                                                    cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc));
-
-    //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4)));
-
-    //Bind mask to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
+    // Create texture object for the mask
+    auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int),
+                                                        cudaChannelFormatKindSigned, 1, cudaFilterModePoint);
 
     // Bind the real to voxel matrix to texture
-    mat44 *floatingMatrix;
+    mat44 floatingMatrix;
     if (floatingImage->sform_code > 0)
-        floatingMatrix = &(floatingImage->sto_ijk);
-    else floatingMatrix = &(floatingImage->qto_ijk);
-    float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4)));
-    float4 *floatingRealToVoxel_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4)));
-    for (int i = 0; i < 3; i++) {
-        floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0];
-        floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1];
-        floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2];
-        floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3];
-    }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4)));
+        floatingMatrix = floatingImage->sto_ijk;
+    else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
         const unsigned int Grid_reg_resamplefloatingImage3D =
             (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1);
-        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d);
+        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
         const unsigned int Grid_reg_resamplefloatingImage2D =
             (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1);
-        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d);
+        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     }
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d));
 }
 /* *************************************************************** */
 void reg_getImageGradient_gpu(nifti_image *floatingImage,
                               cudaArray *floatingImageArray_d,
                               float4 *deformationFieldImageArray_d,
                               float4 *warpedGradientArray_d,
-                              int activeVoxelNumber,
+                              size_t activeVoxelNumber,
                               float paddingValue) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float)));
+    // Create texture object for the floating image
+    auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true);
 
-    //Bind floating image array to a 3D texture
-    floatingTexture.normalized = true;
-    floatingTexture.filterMode = cudaFilterModeLinear;
-    floatingTexture.addressMode[0] = cudaAddressModeWrap;
-    floatingTexture.addressMode[1] = cudaAddressModeWrap;
-    floatingTexture.addressMode[2] = cudaAddressModeWrap;
-
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc));
-
-    //Bind deformationField to texture
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4)));
+    // Create texture object for the deformation field
+    auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+                                                                    false, activeVoxelNumber * sizeof(float4),
+                                                                    cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     // Bind the real to voxel matrix to texture
-    mat44 *floatingMatrix;
+    mat44 floatingMatrix;
     if (floatingImage->sform_code > 0)
-        floatingMatrix = &(floatingImage->sto_ijk);
-    else floatingMatrix = &(floatingImage->qto_ijk);
-    float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4)));
-    float4 *floatingRealToVoxel_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4)));
-    for (int i = 0; i < 3; i++) {
-        floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0];
-        floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1];
-        floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2];
-        floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3];
-    }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4)));
+        floatingMatrix = floatingImage->sto_ijk;
+    else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
         const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1);
-        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d);
+        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
         const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1);
-        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d);
+        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     }
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index af540f68..4dcf81fe 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -21,7 +21,7 @@ void reg_resampleImage_gpu(nifti_image *sourceImage,
                            cudaArray *sourceImageArray_d,
                            float4 *positionFieldImageArray_d,
                            int *mask_d,
-                           int activeVoxelNumber,
+                           size_t activeVoxelNumber,
                            float paddingValue);
 
 extern "C++"
@@ -29,5 +29,5 @@ void reg_getImageGradient_gpu(nifti_image *sourceImage,
                               cudaArray *sourceImageArray_d,
                               float4 *positionFieldImageArray_d,
                               float4 *resultGradientArray_d,
-                              int activeVoxelNumber,
+                              size_t activeVoxelNumber,
                               float paddingValue);
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index dbcb5055..f37b4528 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -10,206 +10,202 @@
  *
  */
 
-texture<float, 3, cudaReadModeElementType> floatingTexture;
-texture<float4, 1, cudaReadModeElementType> floatingMatrixTexture;
-texture<float4, 1, cudaReadModeElementType> deformationFieldTexture;
-texture<int, 1, cudaReadModeElementType> maskTexture;
 /* *************************************************************** */
-__device__ __constant__ int3 c_FloatingDim;
-__device__ __constant__ int c_VoxelNumber;
-__device__ __constant__ float c_PaddingValue;
-__device__ __constant__ int c_ActiveVoxelNumber;
-/* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_resampleImage2D_kernel(float *resultArray)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
+__global__ void reg_resampleImage2D_kernel(float *resultArray,
+                                           cudaTextureObject_t floatingTexture,
+                                           cudaTextureObject_t deformationFieldTexture,
+                                           cudaTextureObject_t maskTexture,
+                                           mat44 floatingMatrix,
+                                           int3 floatingDim,
+                                           size_t activeVoxelNumber,
+                                           float paddingValue) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
-        const int tid2 = tex1Dfetch(maskTexture,tid);
-        float4 realdeformation = tex1Dfetch(deformationFieldTexture,tid);
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
         //Get the voxel-based deformation in the floating space
-        float2 voxeldeformation;
-        float4 matrix = tex1Dfetch(floatingMatrixTexture,0);
-        voxeldeformation.x =
-                matrix.x*realdeformation.x +
-                matrix.y*realdeformation.y +
-                matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,1);
-        voxeldeformation.y =
-                matrix.x*realdeformation.x +
-                matrix.y*realdeformation.y +
-                matrix.w;
-
-        int3 floatingImageSize = c_FloatingDim;
-        if( voxeldeformation.x>=0.0f && voxeldeformation.x<=floatingImageSize.x-1 &&
-            voxeldeformation.y>=0.0f && voxeldeformation.y<=floatingImageSize.y-1 ){
-            resultArray[tid2]=tex3D(floatingTexture, voxeldeformation.x+0.5f, voxeldeformation.y+0.5f, 0.5f);
-        }
-        else resultArray[tid2]=c_PaddingValue;
+        float2 voxelDeformation;
+        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                              floatingMatrix.m[0][1] * realDeformation.y +
+                              floatingMatrix.m[0][3]);
+        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                              floatingMatrix.m[1][1] * realDeformation.y +
+                              floatingMatrix.m[1][3]);
+
+        if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 &&
+            voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1) {
+            resultArray[tid2] = tex3D<float>(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, 0.5f);
+        } else resultArray[tid2] = paddingValue;
     }
 }
 /* *************************************************************** */
-__global__ void reg_resampleImage3D_kernel(float *resultArray)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-        const int tid2 = tex1Dfetch(maskTexture,tid);
+__global__ void reg_resampleImage3D_kernel(float *resultArray,
+                                           cudaTextureObject_t floatingTexture,
+                                           cudaTextureObject_t deformationFieldTexture,
+                                           cudaTextureObject_t maskTexture,
+                                           mat44 floatingMatrix,
+                                           int3 floatingDim,
+                                           size_t activeVoxelNumber,
+                                           float paddingValue) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
 
         //Get the real world deformation in the floating space
-        float4 realdeformation = tex1Dfetch(deformationFieldTexture,tid);
+        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
         //Get the voxel-based deformation in the floating space
-        float3 voxeldeformation;
-        float4 matrix = tex1Dfetch(floatingMatrixTexture,0);
-        voxeldeformation.x =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,1);
-        voxeldeformation.y =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,2);
-        voxeldeformation.z =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
-
-        int3 floatingImageSize = c_FloatingDim;
-        if( voxeldeformation.x>=0.0f && voxeldeformation.x<=floatingImageSize.x-1 &&
-            voxeldeformation.y>=0.0f && voxeldeformation.y<=floatingImageSize.y-1 &&
-            voxeldeformation.z>=0.0f && voxeldeformation.z<=floatingImageSize.z-1 ){
-            resultArray[tid2]=tex3D(floatingTexture, voxeldeformation.x+0.5f, voxeldeformation.y+0.5f, voxeldeformation.z+0.5f);
-        }
-        else resultArray[tid2]=c_PaddingValue;
+        float3 voxelDeformation;
+        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                              floatingMatrix.m[0][1] * realDeformation.y +
+                              floatingMatrix.m[0][2] * realDeformation.z +
+                              floatingMatrix.m[0][3]);
+        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                              floatingMatrix.m[1][1] * realDeformation.y +
+                              floatingMatrix.m[1][2] * realDeformation.z +
+                              floatingMatrix.m[1][3]);
+        voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
+                              floatingMatrix.m[2][1] * realDeformation.y +
+                              floatingMatrix.m[2][2] * realDeformation.z +
+                              floatingMatrix.m[2][3]);
+
+        if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 &&
+            voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1 &&
+            voxelDeformation.z >= 0.0f && voxelDeformation.z <= floatingDim.z - 1) {
+            resultArray[tid2] = tex3D<float>(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, voxelDeformation.z + 0.5f);
+        } else resultArray[tid2] = paddingValue;
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_getImageGradient2D_kernel(float4 *gradientArray)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
+__global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
+                                              cudaTextureObject_t floatingTexture,
+                                              cudaTextureObject_t deformationFieldTexture,
+                                              mat44 floatingMatrix,
+                                              int3 floatingDim,
+                                              size_t activeVoxelNumber,
+                                              float paddingValue) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
-        float4 realdeformation = tex1Dfetch(deformationFieldTexture,tid);
+        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
         //Get the voxel-based deformation in the floating space
-        float3 voxeldeformation;
-        float4 matrix = tex1Dfetch(floatingMatrixTexture,0);
-        voxeldeformation.x =
-                matrix.x*realdeformation.x +
-                matrix.y*realdeformation.y  +
-                matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,1);
-        voxeldeformation.y =
-                matrix.x*realdeformation.x +
-                matrix.y*realdeformation.y  +
-                matrix.w;
+        float3 voxelDeformation;
+        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                              floatingMatrix.m[0][1] * realDeformation.y +
+                              floatingMatrix.m[0][3]);
+        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                              floatingMatrix.m[1][1] * realDeformation.y +
+                              floatingMatrix.m[1][3]);
 
         int2 voxel;
-        voxel.x = (int)(voxeldeformation.x);
-        voxel.y = (int)(voxeldeformation.y);
+        voxel.x = (int)(voxelDeformation.x);
+        voxel.y = (int)(voxelDeformation.y);
 
         float xBasis[2];
-        float relative = fabsf(voxeldeformation.x - (float)voxel.x);
-        xBasis[0]=1.0f-relative;
-        xBasis[1]=relative;
+        float relative = fabsf(voxelDeformation.x - (float)voxel.x);
+        xBasis[0] = 1.0f - relative;
+        xBasis[1] = relative;
         float yBasis[2];
-        relative = fabsf(voxeldeformation.y - (float)voxel.y);
-        yBasis[0]=1.0f-relative;
-        yBasis[1]=relative;
+        relative = fabsf(voxelDeformation.y - (float)voxel.y);
+        yBasis[0] = 1.0f - relative;
+        yBasis[1] = relative;
         float deriv[2];
-        deriv[0]=-1.0f;
-        deriv[1]=1.0f;
-
-        float4 gradientValue=make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        float2 relativedeformation;
-        for(short b=0; b<2; b++){
-            float2 tempValueX=make_float2(0.0f, 0.0f);
-            relativedeformation.y=((float)voxel.y+(float)b+0.5f)/(float)c_FloatingDim.y;
-            for(short a=0; a<2; a++){
-                relativedeformation.x=((float)voxel.x+(float)a+0.5f)/(float)c_FloatingDim.x;
-                float intensity=c_PaddingValue;
-
-                if(0.f<=relativedeformation.x && relativedeformation.x<=1.f &&
-                   0.f<=relativedeformation.y && relativedeformation.y<=1.f)
-                    intensity=tex3D(floatingTexture,
-                                    relativedeformation.x,
-                                    relativedeformation.y,
-                                    0.5f);
-
-                tempValueX.x +=  intensity * deriv[a];
-                tempValueX.y +=  intensity * xBasis[a];
+        deriv[0] = -1.0f;
+        deriv[1] = 1.0f;
+
+        float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float2 relativeDeformation;
+        for (short b = 0; b < 2; b++) {
+            float2 tempValueX = make_float2(0.0f, 0.0f);
+            relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y;
+            for (short a = 0; a < 2; a++) {
+                relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x;
+                float intensity = paddingValue;
+
+                if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f &&
+                    0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f)
+                    intensity = tex3D<float>(floatingTexture, relativeDeformation.x, relativeDeformation.y, 0.5f);
+
+                tempValueX.x += intensity * deriv[a];
+                tempValueX.y += intensity * xBasis[a];
             }
             gradientValue.x += tempValueX.x * yBasis[b];
             gradientValue.y += tempValueX.y * deriv[b];
         }
-        gradientArray[tid]=gradientValue;
+        gradientArray[tid] = gradientValue;
     }
 }
 /* *************************************************************** */
-__global__ void reg_getImageGradient3D_kernel(float4 *gradientArray)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
+__global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
+                                              cudaTextureObject_t floatingTexture,
+                                              cudaTextureObject_t deformationFieldTexture,
+                                              mat44 floatingMatrix,
+                                              int3 floatingDim,
+                                              size_t activeVoxelNumber,
+                                              float paddingValue) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
-        float4 realdeformation = tex1Dfetch(deformationFieldTexture,tid);
+        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
         //Get the voxel-based deformation in the floating space
-        float3 voxeldeformation;
-        float4 matrix = tex1Dfetch(floatingMatrixTexture,0);
-        voxeldeformation.x =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,1);
-        voxeldeformation.y =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
-        matrix = tex1Dfetch(floatingMatrixTexture,2);
-        voxeldeformation.z =	matrix.x*realdeformation.x + matrix.y*realdeformation.y  +
-                                matrix.z*realdeformation.z  +  matrix.w;
+        float3 voxelDeformation;
+        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                              floatingMatrix.m[0][1] * realDeformation.y +
+                              floatingMatrix.m[0][2] * realDeformation.z +
+                              floatingMatrix.m[0][3]);
+        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                              floatingMatrix.m[1][1] * realDeformation.y +
+                              floatingMatrix.m[1][2] * realDeformation.z +
+                              floatingMatrix.m[1][3]);
+        voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
+                              floatingMatrix.m[2][1] * realDeformation.y +
+                              floatingMatrix.m[2][2] * realDeformation.z +
+                              floatingMatrix.m[2][3]);
 
         int3 voxel;
-        voxel.x = (int)(voxeldeformation.x);
-        voxel.y = (int)(voxeldeformation.y);
-        voxel.z = (int)(voxeldeformation.z);
+        voxel.x = (int)(voxelDeformation.x);
+        voxel.y = (int)(voxelDeformation.y);
+        voxel.z = (int)(voxelDeformation.z);
 
         float xBasis[2];
-        float relative = fabsf(voxeldeformation.x - (float)voxel.x);
-        xBasis[0]=1.0f-relative;
-        xBasis[1]=relative;
+        float relative = fabsf(voxelDeformation.x - (float)voxel.x);
+        xBasis[0] = 1.0f - relative;
+        xBasis[1] = relative;
         float yBasis[2];
-        relative = fabsf(voxeldeformation.y - (float)voxel.y);
-        yBasis[0]=1.0f-relative;
-        yBasis[1]=relative;
+        relative = fabsf(voxelDeformation.y - (float)voxel.y);
+        yBasis[0] = 1.0f - relative;
+        yBasis[1] = relative;
         float zBasis[2];
-        relative = fabsf(voxeldeformation.z - (float)voxel.z);
-        zBasis[0]=1.0f-relative;
-        zBasis[1]=relative;
+        relative = fabsf(voxelDeformation.z - (float)voxel.z);
+        zBasis[0] = 1.0f - relative;
+        zBasis[1] = relative;
         float deriv[2];
-        deriv[0]=-1.0f;
-        deriv[1]=1.0f;
-
-        float4 gradientValue=make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        float3 relativedeformation;
-        for(short c=0; c<2; c++){
-            relativedeformation.z=((float)voxel.z+(float)c+0.5f)/(float)c_FloatingDim.z;
-            float3 tempValueY=make_float3(0.0f, 0.0f, 0.0f);
-            for(short b=0; b<2; b++){
-                float2 tempValueX=make_float2(0.0f, 0.0f);
-                relativedeformation.y=((float)voxel.y+(float)b+0.5f)/(float)c_FloatingDim.y;
-                for(short a=0; a<2; a++){
-                    relativedeformation.x=((float)voxel.x+(float)a+0.5f)/(float)c_FloatingDim.x;
-                    float intensity=c_PaddingValue;
-
-                    if(0.f<=relativedeformation.x && relativedeformation.x<=1.f &&
-                       0.f<=relativedeformation.y && relativedeformation.y<=1.f &&
-                       0.f<=relativedeformation.z && relativedeformation.z<=1.f)
-                        intensity=tex3D(floatingTexture,
-                                        relativedeformation.x,
-                                        relativedeformation.y,
-                                        relativedeformation.z);
-
-                    tempValueX.x +=  intensity * deriv[a];
-                    tempValueX.y +=  intensity * xBasis[a];
+        deriv[0] = -1.0f;
+        deriv[1] = 1.0f;
+
+        float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float3 relativeDeformation;
+        for (short c = 0; c < 2; c++) {
+            relativeDeformation.z = ((float)voxel.z + (float)c + 0.5f) / (float)floatingDim.z;
+            float3 tempValueY = make_float3(0.0f, 0.0f, 0.0f);
+            for (short b = 0; b < 2; b++) {
+                float2 tempValueX = make_float2(0.0f, 0.0f);
+                relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y;
+                for (short a = 0; a < 2; a++) {
+                    relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x;
+                    float intensity = paddingValue;
+
+                    if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f &&
+                        0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f &&
+                        0.f <= relativeDeformation.z && relativeDeformation.z <= 1.f)
+                        intensity = tex3D<float>(floatingTexture, relativeDeformation.x, relativeDeformation.y, relativeDeformation.z);
+
+                    tempValueX.x += intensity * deriv[a];
+                    tempValueX.y += intensity * xBasis[a];
                 }
                 tempValueY.x += tempValueX.x * yBasis[b];
                 tempValueY.y += tempValueX.y * deriv[b];
@@ -219,8 +215,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray)
             gradientValue.y += tempValueY.y * zBasis[c];
             gradientValue.z += tempValueY.z * deriv[c];
         }
-        gradientArray[tid]=gradientValue;
+        gradientArray[tid] = gradientValue;
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */

From d1f78d5f74a88dba8f084ad2d24c8b29c1530568 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Feb 2023 09:57:59 +0000
Subject: [PATCH 054/314] Add tests for *Compute::ResampleImage()

---
 niftyreg_build_version.txt          |  2 +-
 reg-lib/cpu/_reg_tools.h            |  1 +
 reg-test/reg_test_interpolation.cpp | 40 +++++++++++++++++++----------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f2c1eeeb..de8febe1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-167
+168
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 59d467c2..d79dda14 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -20,6 +20,7 @@
 #include "_reg_maths.h"
 
 using std::unique_ptr;
+using std::shared_ptr;
 
 typedef enum {
     MEAN_KERNEL,
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index eb6e9e5b..e3183de1 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -12,19 +12,20 @@
 #include <list>
 #include <catch2/catch_test_macros.hpp>
 
-#define EPS_SINGLE 0.0001
+#define EPS_SINGLE 0.001
 
 /*
     This test file contains the following unit tests:
     test function: image resampling
     In 2D and 3D
-    linear
-    cubic
+    Nearest neighbour
+    Linear
+    Cubic spline
 */
 
 
 typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
-typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
+typedef std::tuple<unique_ptr<Content>, shared_ptr<Platform>> ContentDesc;
 
 template <typename T>
 void interpCubicSplineKernel(T relative, T (&basis)[4]) {
@@ -214,16 +215,25 @@ TEST_CASE("Resampling", "[resampling]") {
         // Accumulate all required contents with a vector
         std::vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
-            unique_ptr<Platform> platform{ new Platform(platformType) };
-            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference) };
-            contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
+            shared_ptr<Platform> platform{ new Platform(platformType) };
+            // Add Aladin content
+            unique_ptr<AladinContentCreator> aladinContentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            unique_ptr<AladinContent> aladinContent{ aladinContentCreator->Create(reference, reference) };
+            contentDescs.push_back(ContentDesc(std::move(aladinContent), platform));
+            // Add content
+            if (platformType == PlatformType::Cuda && interp != 1)
+                continue;   // CUDA platform only supports linear interpolation
+            unique_ptr<ContentCreator> contentCreator{ dynamic_cast<ContentCreator*>(platform->CreateContentCreator()) };
+            unique_ptr<Content> content{ contentCreator->Create(reference, reference) };
+            contentDescs.push_back(ContentDesc(std::move(content), platform));
         }
 
         // Loop over all possibles contents for each test
         for (auto&& contentDesc : contentDescs) {
             auto&& [content, platform] = contentDesc;
-            SECTION(testName + " " + platform->GetName()) {
+            const bool isAladinContent = dynamic_cast<AladinContent*>(content.get());
+            auto contentName = isAladinContent ? "Aladin" : "Base";
+            SECTION(testName + " " + platform->GetName() + " - " + contentName) {
                 // Create and set a warped image to host the computation
                 nifti_image *warped = nifti_copy_nim_info(defField);
                 warped->ndim = warped->dim[0] = defField->nu;
@@ -236,11 +246,15 @@ TEST_CASE("Resampling", "[resampling]") {
                 content->SetWarped(warped);
                 // Set the deformation field
                 content->SetDeformationField(defField);
-                // Initialise the platform to run current content and retrieve deformation field
-                unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
-                // args = interpolation and padding
 
-                resampleKernel->castTo<ResampleImageKernel>()->Calculate(interp, 0);
+                if (isAladinContent) {
+                    unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
+                    resampleKernel->castTo<ResampleImageKernel>()->Calculate(interp, 0);
+                } else {
+                    unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                    compute->ResampleImage(interp, 0);
+                }
+
                 warped = content->GetWarped();
 
                 // Check all values

From 3203f382bd658235316e7beb8e4d1332d3897c76 Mon Sep 17 00:00:00 2001
From: onurulgen <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Feb 2023 12:19:48 +0000
Subject: [PATCH 055/314] Refactorisations

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_tools.cpp                        |  2 +-
 reg-lib/AladinContent.h                       |  2 +-
 reg-lib/_reg_base.cpp                         |  8 ++--
 reg-lib/cl/ClAladinContent.h                  | 44 +++++++++---------
 reg-lib/cuda/CudaAladinContent.h              | 46 +++++++++----------
 reg-lib/cuda/_reg_nmi_gpu.cu                  | 12 ++---
 reg-lib/cuda/_reg_resampling_gpu.cu           | 10 ++--
 .../reg_test_affine_deformation_field.cpp     |  2 +-
 reg-test/reg_test_interpolation.cpp           |  8 ++--
 10 files changed, 67 insertions(+), 69 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index de8febe1..fb402ef6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-168
+169
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 02ed8b09..b19c72d5 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -626,7 +626,7 @@ int main(int argc, char **argv)
                 reg_tools_changeDatatype<double>(image2,NIFTI_TYPE_FLOAT64);
                 break;
             default:
-                reg_print_msg_error("Unsurported data type.");
+                reg_print_msg_error("Unsupported data type.");
                 reg_exit();
             }
         }
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 51a9acb9..2614e57b 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -26,7 +26,7 @@ class AladinContent: public Content {
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; }
 
     // Setters
-    void SetCaptureRange(const int captureRangeIn);
+    virtual void SetCaptureRange(const int captureRangeIn);
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 
 protected:
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index dd73a129..7b23f115 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -693,8 +693,8 @@ void reg_base<T>::Initialise() {
                 active[i] = false;
             sigma[0] = referenceSmoothingSigma;
             reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
-            delete[]active;
-            delete[]sigma;
+            delete[] active;
+            delete[] sigma;
         }
         if (floatingSmoothingSigma != 0) {
             // Only the first image is smoothed
@@ -705,8 +705,8 @@ void reg_base<T>::Initialise() {
                 active[i] = false;
             sigma[0] = floatingSmoothingSigma;
             reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
-            delete[]active;
-            delete[]sigma;
+            delete[] active;
+            delete[] sigma;
         }
     }
 
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 8331f0e7..8be61f1d 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -20,35 +20,35 @@ class ClAladinContent: public AladinContent {
                     const unsigned int percentageOfBlocks = 0,
                     const unsigned int inlierLts = 0,
                     int blockStepSize = 0);
-    ~ClAladinContent();
+    virtual ~ClAladinContent();
 
-    bool IsCurrentComputationDoubleCapable() override;
+    virtual bool IsCurrentComputationDoubleCapable() override;
 
     // OpenCL getters
-    cl_mem GetReferenceImageArrayClmem();
-    cl_mem GetFloatingImageArrayClmem();
-    cl_mem GetWarpedImageClmem();
-    cl_mem GetReferencePositionClmem();
-    cl_mem GetWarpedPositionClmem();
-    cl_mem GetDeformationFieldArrayClmem();
-    cl_mem GetTotalBlockClmem();
-    cl_mem GetMaskClmem();
-    cl_mem GetRefMatClmem();
-    cl_mem GetFloMatClmem();
-    int* GetReferenceDims();
-    int* GetFloatingDims();
+    virtual cl_mem GetReferenceImageArrayClmem();
+    virtual cl_mem GetFloatingImageArrayClmem();
+    virtual cl_mem GetWarpedImageClmem();
+    virtual cl_mem GetReferencePositionClmem();
+    virtual cl_mem GetWarpedPositionClmem();
+    virtual cl_mem GetDeformationFieldArrayClmem();
+    virtual cl_mem GetTotalBlockClmem();
+    virtual cl_mem GetMaskClmem();
+    virtual cl_mem GetRefMatClmem();
+    virtual cl_mem GetFloMatClmem();
+    virtual int* GetReferenceDims();
+    virtual int* GetFloatingDims();
 
     // CPU getters with data downloaded from device
-    _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped() override;
+    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped() override;
 
     // Setters
-    void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    void SetWarped(nifti_image *warpedImageIn) override;
-    void SetDeformationField(nifti_image *deformationFieldIn) override;
-    void SetReferenceMask(int *referenceMaskIn) override;
-    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
     void InitVars();
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index 26d68d4f..6ff9cc61 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -14,20 +14,20 @@ class CudaAladinContent: public AladinContent {
                       const unsigned int percentageOfBlocks = 0,
                       const unsigned int inlierLts = 0,
                       int blockStepSize = 0);
-    ~CudaAladinContent();
+    virtual ~CudaAladinContent();
 
-    bool IsCurrentComputationDoubleCapable() override;
+    virtual bool IsCurrentComputationDoubleCapable() override;
 
     // Device getters
-    float* GetReferenceImageArray_d();
-    float* GetFloatingImageArray_d();
-    float* GetWarpedImageArray_d();
-    float* GetTransformationMatrix_d();
-    float* GetReferencePosition_d();
-    float* GetWarpedPosition_d();
-    float* GetDeformationFieldArray_d();
-    float* GetReferenceMat_d();
-    float* GetFloIJKMat_d();
+    virtual float* GetReferenceImageArray_d();
+    virtual float* GetFloatingImageArray_d();
+    virtual float* GetWarpedImageArray_d();
+    virtual float* GetTransformationMatrix_d();
+    virtual float* GetReferencePosition_d();
+    virtual float* GetWarpedPosition_d();
+    virtual float* GetDeformationFieldArray_d();
+    virtual float* GetReferenceMat_d();
+    virtual float* GetFloIJKMat_d();
 
     //	float* GetAR_d(); // Removed until CUDA SVD is added back
     //	float* GetU_d(); // Removed until CUDA SVD is added back
@@ -36,23 +36,23 @@ class CudaAladinContent: public AladinContent {
     //	float* GetLengths_d(); // Removed until CUDA SVD is added back
     //	float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
 
-    int* GetTotalBlock_d();
-    int* GetMask_d();
+    virtual int* GetTotalBlock_d();
+    virtual int* GetMask_d();
 
-    int* GetReferenceDims();
-    int* GetFloatingDims();
+    virtual int* GetReferenceDims();
+    virtual int* GetFloatingDims();
 
     // CPU getters with data downloaded from device
-    _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    nifti_image* GetDeformationField() override;
-    nifti_image* GetWarped() override;
+    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped() override;
 
     // Setters
-    void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    void SetWarped(nifti_image *warpedImageIn) override;
-    void SetDeformationField(nifti_image *deformationFieldIn) override;
-    void SetReferenceMask(int *referenceMaskIn) override;
-    void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 
 private:
     void InitVars();
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 71f2a460..71eeb05a 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -114,12 +114,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 double reg_nmi_gpu::GetSimilarityMeasureValue() {
     // The NMI computation is performed into the host for now
-    // The relevant images have to be transfered from the device to the host
-    cudaMemcpy(this->warpedFloatingImagePointer->data,
-               this->warpedFloatingDevicePointer,
-               this->warpedFloatingImagePointer->nvox *
-               this->warpedFloatingImagePointer->nbyper,
-               cudaMemcpyDeviceToHost);
+    // The relevant images have to be transferred from the device to the host
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedFloatingImagePointer->data,
+                                 this->warpedFloatingDevicePointer,
+                                 this->warpedFloatingImagePointer->nvox *
+                                 this->warpedFloatingImagePointer->nbyper,
+                                 cudaMemcpyDeviceToHost));
 
     reg_getNMIValue<float>(this->referenceImagePointer,
                            this->warpedFloatingImagePointer,
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 877f275e..bb86b9cd 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -45,15 +45,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned int Grid_reg_resamplefloatingImage3D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D));
+        const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1);
         reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
-        const unsigned int Grid_reg_resamplefloatingImage2D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D));
+        const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D));
         dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1);
         reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
@@ -87,13 +85,13 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D));
+        const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1);
         reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
-        const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D));
+        const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D));
         dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1);
         reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index 78229415..3ca5619f 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -35,7 +35,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference3d);
 
-    // Generate the different use cases
+    // Generate the different test cases
     std::vector<TestData> testCases;
 
     // Identity use case - 2D
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index e3183de1..ebdabcae 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -51,6 +51,7 @@ TEST_CASE("Resampling", "[resampling]") {
             ref2dPtr++;
         }
     }
+    ref2dPtr = static_cast<float*>(reference2d->data);
 
     // Create a corresponding 2D deformation field
     int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
@@ -75,6 +76,7 @@ TEST_CASE("Resampling", "[resampling]") {
             }
         }
     }
+    ref3dPtr = static_cast<float*>(reference3d->data);
 
     // Create a corresponding 3D deformation field
     dimDef[5] = 3;
@@ -85,13 +87,12 @@ TEST_CASE("Resampling", "[resampling]") {
     def3dPtr[1] = 1.3f;
     def3dPtr[2] = 1.4f;
 
-    // Generate the different use cases
+    // Generate the different test cases
     std::vector<TestData> testCases;
 
     // Linear interpolation - 2D
     // coordinate in image: [1.2, 1.3]
     float resLinear2d[1] = {0};
-    ref2dPtr = static_cast<float*>(reference2d->data);
     for (int y = 1; y <= 2; ++y) {
         for (int x = 1; x <= 2; ++x) {
             resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] *
@@ -147,7 +148,6 @@ TEST_CASE("Resampling", "[resampling]") {
     // Linear interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
     float resLinear3d[1] = {0};
-    ref3dPtr = static_cast<float*>(reference3d->data);
     for (int z = 1; z <= 2; ++z) {
         for (int y = 1; y <= 2; ++y) {
             for (int x = 1; x <= 2; ++x) {
@@ -242,7 +242,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 warped->dim[3] = warped->nz = 1;
                 warped->dim[5] = warped->nu = 1;
                 warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
-                warped->data = malloc(warped->nvox * warped->nbyper);
+                warped->data = calloc(warped->nvox, warped->nbyper);
                 content->SetWarped(warped);
                 // Set the deformation field
                 content->SetDeformationField(defField);

From 6789f421f953abbbb6722eb7f6f28635d7d656f0 Mon Sep 17 00:00:00 2001
From: onurulgen <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Feb 2023 13:15:31 +0000
Subject: [PATCH 056/314] Add nifti_dup() to duplicate a nifti image

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/cpu/_reg_tools.cpp | 11 ++++++++++-
 reg-lib/cpu/_reg_tools.h   |  7 +++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fb402ef6..2cd1cfa2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-169
+170
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 7e723256..0530cfae 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -2853,4 +2853,13 @@ size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) {
     if (dimCount > 6)
         voxelNumber *= static_cast<size_t>(std::abs(image.nw));
     return voxelNumber;
-}
\ No newline at end of file
+}
+/* *************************************************************** */
+nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) {
+    nifti_image *newImage = nifti_copy_nim_info(&image);
+    newImage->data = calloc(image.nvox, image.nbyper);
+    if (copyData)
+        memcpy(newImage->data, image.data, image.nvox * image.nbyper);
+    return newImage;
+}
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index d79dda14..936fdd57 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -448,3 +448,10 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
  */
 size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3);
 /* *************************************************************** */
+/** @brief Duplicates the nifti image
+ * @param image Input image
+ * @param copyData Boolean to specify if the image data should be copied
+ * @return The duplicated image
+ */
+nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true);
+/* *************************************************************** */

From 52093ace6acd052af4011684799ac1772b2e128d Mon Sep 17 00:00:00 2001
From: onurulgen <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Feb 2023 13:27:21 +0000
Subject: [PATCH 057/314] Hide test functions from public

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/AladinContent.h                       | 13 +++++++++----
 reg-lib/Content.h                             | 18 +++++++++++++-----
 reg-lib/_reg_f3d.cpp                          |  3 +--
 reg-lib/_reg_f3d2.cpp                         |  6 ++----
 reg-lib/cl/ClAladinContent.h                  | 19 ++++++++++++-------
 reg-lib/cuda/CudaAladinContent.h              | 19 ++++++++++++-------
 reg-lib/cuda/CudaCompute.cpp                  |  2 +-
 reg-lib/cuda/CudaContent.cpp                  |  4 ++++
 reg-lib/cuda/CudaContent.h                    | 18 +++++++++++++-----
 .../reg_test_affine_deformation_field.cpp     |  3 +++
 reg-test/reg_test_interpolation.cpp           |  2 ++
 12 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2cd1cfa2..b34c321e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-170
+171
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 2614e57b..0cc6e16d 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -25,13 +25,18 @@ class AladinContent: public Content {
     // Getters
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; }
 
-    // Setters
-    virtual void SetCaptureRange(const int captureRangeIn);
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
-
 protected:
     _reg_blockMatchingParam* blockMatchingParams;
     unsigned int currentPercentageOfBlockToUse;
     unsigned int inlierLts;
     int stepSizeBlock;
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetCaptureRange(const int captureRangeIn);
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 };
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 8da20be2..4731b084 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -23,11 +23,8 @@ class Content {
     virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
     virtual nifti_image* GetWarped() { return warped; }
 
-    // Setters
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; }
-    virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; }
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; }
-    virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; }
+    // Methods for transferring data from nifti to device
+    virtual void UpdateDeformationField() {}
 
     // Auxiliary methods
     static mat44* GetXYZMatrix(nifti_image& image) {
@@ -51,4 +48,15 @@ class Content {
     void DeallocateWarped();
     void AllocateDeformationField(size_t bytes);
     void DeallocateDeformationField();
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; }
+    virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; }
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; }
+    virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; }
 };
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 28f75860..fdab1b81 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -658,9 +658,8 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
     this->WarpFloatingImage(3); // cubic spline interpolation
 
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = this->con->GetWarped();
+    warpedImage[0] = nifti_dup(*this->con->GetWarped());
 
-    this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage
     DeinitCurrentLevel(-1);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 05dca3ac..c2058c47 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -898,11 +898,9 @@ nifti_image** reg_f3d2<T>::GetWarpedImage() {
 
     F3dContent *con = dynamic_cast<F3dContent*>(this->con);
     nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = con->GetWarped();
-    warpedImage[1] = conBw->GetWarped();
+    warpedImage[0] = nifti_dup(*con->GetWarped());
+    warpedImage[1] = nifti_dup(*conBw->GetWarped());
 
-    con->SetWarped(nullptr); // Prevent deallocating of warpedImage
-    conBw->SetWarped(nullptr);
     DeinitCurrentLevel(-1);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetWarpedImage");
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 8be61f1d..fa2418f4 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -43,13 +43,6 @@ class ClAladinContent: public AladinContent {
     virtual nifti_image* GetDeformationField() override;
     virtual nifti_image* GetWarped() override;
 
-    // Setters
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
-
 private:
     void InitVars();
     void AllocateClPtrs();
@@ -81,4 +74,16 @@ class ClAladinContent: public AladinContent {
     void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
     template<class T>
     T FillWarpedImageData(float intensity, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 };
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index 6ff9cc61..b210e294 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -47,13 +47,6 @@ class CudaAladinContent: public AladinContent {
     virtual nifti_image* GetDeformationField() override;
     virtual nifti_image* GetWarped() override;
 
-    // Setters
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
-
 private:
     void InitVars();
     void AllocateCuPtrs();
@@ -88,4 +81,16 @@ class CudaAladinContent: public AladinContent {
 
     template<class FloatingTYPE>
     FloatingTYPE FillWarpedImageData(float intensity, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
 };
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index e1f5fee8..910c66f5 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -149,7 +149,7 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     // TODO update only the required ones
     con.UpdateControlPointGrid();
-    con.SetDeformationField(con.F3dContent::GetDeformationField());
+    con.UpdateDeformationField();
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 83ba5bc3..94bd9034 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -84,6 +84,10 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) {
     cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField);
 }
 /* *************************************************************** */
+void CudaContent::UpdateDeformationField() {
+    cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField);
+}
+/* *************************************************************** */
 void CudaContent::SetReferenceMask(int *referenceMaskIn) {
     Content::SetReferenceMask(referenceMaskIn);
 
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index e1c7a8b4..a32316ac 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -27,11 +27,8 @@ class CudaContent: public virtual Content {
     virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
     virtual float* GetWarpedCuda() { return warpedCuda; }
 
-    // Setters
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedIn) override;
+    // Methods for transferring data from nifti to device
+    virtual void UpdateDeformationField() override;
 
 protected:
     cudaArray *referenceCuda = nullptr;
@@ -51,4 +48,15 @@ class CudaContent: public virtual Content {
     template<class DataType> DataType CastImageData(float intensity, int datatype);
     template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
     void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedIn) override;
 };
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp
index 3ca5619f..e3c9f749 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affine_deformation_field.cpp
@@ -1,3 +1,6 @@
+// Enable testing
+#define NR_TESTING
+
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_tools.h"
 
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index ebdabcae..2fad9b34 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -1,5 +1,7 @@
 // OpenCL is not supported for this test
 #undef _USE_OPENCL
+// Enable testing
+#define NR_TESTING
 
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_tools.h"

From 4ee7a399f9febda5e78f0df41d6b86893f579569 Mon Sep 17 00:00:00 2001
From: onurulgen <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Feb 2023 14:22:40 +0000
Subject: [PATCH 058/314] Use nifti_dup()

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_average.cpp                      | 13 +++-----
 reg-apps/reg_ppcnr.cpp                        | 10 ++----
 reg-apps/reg_resample.cpp                     |  5 +--
 reg-apps/reg_tools.cpp                        | 21 ++++--------
 reg-apps/reg_transform.cpp                    | 12 +++----
 reg-lib/Compute.cpp                           | 21 ++++--------
 reg-lib/F3dContent.cpp                        |  9 ++---
 reg-lib/_reg_aladin.cpp                       |  4 +--
 reg-lib/_reg_base.cpp                         |  8 ++---
 reg-lib/_reg_f3d.cpp                          | 11 ++-----
 reg-lib/_reg_f3d2.cpp                         | 24 ++------------
 reg-lib/cpu/_reg_lncc.cpp                     | 28 +++++-----------
 reg-lib/cpu/_reg_localTrans.cpp               | 33 +++++++------------
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  3 +-
 reg-lib/cpu/_reg_mind.cpp                     | 21 ++++--------
 reg-lib/cpu/_reg_tools.cpp                    | 23 +++----------
 .../reg_test_bspline_deformation_field.cpp    |  6 ++--
 ...est_coherence_affine_deformation_field.cpp | 10 ++----
 reg-test/reg_test_coherence_interpolation.cpp |  9 ++---
 .../reg_test_compose_deformation_field.cpp    |  4 +--
 reg-test/reg_test_computation_time.cpp        | 13 +++-----
 reg-test/reg_test_convolution.cpp             |  3 +-
 .../reg_test_linearElasticityGradient.cpp     |  6 ++--
 .../reg_test_nonlinear_deformation_field.cpp  |  4 +--
 25 files changed, 85 insertions(+), 218 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b34c321e..730a054a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-171
+172
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 2f337399..6e83fe95 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -277,8 +277,7 @@ int compute_nrr_demean(nifti_image *demean_field,
       // read the transformation
       nifti_image *transformation = reg_io_ReadImageFile(inputNRRName[t]);
       // Generate the deformation or flow field
-      nifti_image *deformationField = nifti_copy_nim_info(demean_field);
-      deformationField->data = calloc(deformationField->nvox,deformationField->nbyper);
+      nifti_image *deformationField = nifti_dup(*demean_field, false);
       reg_tools_multiplyValueToImage(deformationField,deformationField,0.f);
       deformationField->scl_slope=1.f;
       deformationField->scl_inter=0.f;
@@ -324,8 +323,7 @@ int compute_nrr_demean(nifti_image *demean_field,
          }
          else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]);
          // The affine component is substracted
-         nifti_image *tempField = nifti_copy_nim_info(deformationField);
-         tempField->data = malloc(tempField->nvox*tempField->nbyper);
+         nifti_image *tempField = nifti_dup(*deformationField, false);
          tempField->scl_slope=1.f;
          tempField->scl_inter=0.f;
          reg_affine_getDeformationField(&affineTransformation, tempField);
@@ -389,8 +387,7 @@ int compute_average_image(nifti_image *averageImage,
    // Set the average image to zero
    memset(averageImage->data, 0, averageImage->nvox*averageImage->nbyper);
    // Create an image to store the defined value number
-   nifti_image *definedValue = nifti_copy_nim_info(averageImage);
-   definedValue->data = calloc(averageImage->nvox, averageImage->nbyper);
+   nifti_image *definedValue = nifti_dup(*averageImage, false);
    // Loop over all input images
    for(size_t i=0; i<imageNumber; ++i){
       // Generate a deformation field defined by the average final
@@ -440,9 +437,7 @@ int compute_average_image(nifti_image *averageImage,
          if(demeanField!=nullptr){
             if(deformationField->intent_p1==DEF_VEL_FIELD){
                reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
-               nifti_image *tempDef = nifti_copy_nim_info(deformationField);
-               tempDef->data = malloc(tempDef->nvox*tempDef->nbyper);
-               memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper);
+               nifti_image *tempDef = nifti_dup(*deformationField);
                tempDef->scl_slope=1.f;
                tempDef->scl_inter=0.f;
                reg_defField_getDeformationFieldFromFlowField(tempDef,deformationField,false);
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 02f4a228..565dc887 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -493,9 +493,7 @@ int main(int argc, char **argv)
    /* START THE REGISTRATION */
    /* ********************** */
    param->outputImageName="anchor.nii";   // NEED TO GET WORKING AND PUT INTERMEDIATE FILES IN SOURCE DIRECTORY.
-   nifti_image *images=nifti_copy_nim_info(image); // Need to make a new image that has the same info as the original.
-   images->data = (PrecisionTYPE *)calloc(images->nvox, image->nbyper);
-   memcpy(images->data, image->data, image->nvox*image->nbyper);
+   nifti_image *images=nifti_dup(*image); // Need to make a new image that has the same info as the original.
 
    /* ************************************/
    /* FOR NUMBER OF PRINCIPAL COMPONENTS */
@@ -785,8 +783,7 @@ int main(int argc, char **argv)
 
 
       // 4. rebuild images
-      nifti_image *imagep=nifti_copy_nim_info(image); // Need to make a new image that has the same info as the original.
-      imagep->data = (PrecisionTYPE *)calloc(imagep->nvox, image->nbyper);
+      nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original.
       float dotty,sum;
       if(flag->locality)  // local mean
       {
@@ -866,8 +863,7 @@ int main(int argc, char **argv)
             stores->nvox = CalcVoxelNumber(*stores, stores->ndim);
             stores->data = calloc(stores->nvox,images->nbyper);
 
-            nifti_image *storet = nifti_copy_nim_info(stores);
-            storet->data = calloc(storet->nvox, storet->nbyper);
+            nifti_image *storet = nifti_dup(*stores, false);
 
             // COPY THE APPROPRIATE VALUES
             PrecisionTYPE *intensityPtrPP = static_cast<PrecisionTYPE *>(storet->data); // 3D real source image (needs current cpp image)
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 888298c4..c7a12e52 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -368,10 +368,7 @@ int main(int argc, char **argv)
          reg_getDeformationFromDisplacement(inputTransformationImage);
       case DEF_VEL_FIELD:
          {
-            nifti_image *tempFlowField = nifti_copy_nim_info(deformationFieldImage);
-            tempFlowField->data = malloc(tempFlowField->nvox*tempFlowField->nbyper);
-            memcpy(tempFlowField->data,deformationFieldImage->data,
-                   tempFlowField->nvox*tempFlowField->nbyper);
+            nifti_image *tempFlowField = nifti_dup(*deformationFieldImage);
             reg_defField_compose(inputTransformationImage,
                                  tempFlowField,
                                  nullptr);
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index b19c72d5..14a6bdfb 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -497,9 +497,7 @@ int main(int argc, char **argv)
     if(flag->normFlag)
     {
         reg_tools_changeDatatype<float>(image);
-        nifti_image *normImage = nifti_copy_nim_info(image);
-        normImage->data = malloc(normImage->nvox * normImage->nbyper);
-        memcpy(normImage->data, image->data, normImage->nvox*normImage->nbyper);
+        nifti_image *normImage = nifti_dup(*image);
         reg_heapSort(static_cast<float *>(normImage->data), normImage->nvox);
         float minValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(03*(int)normImage->nvox/100))];
         float maxValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(97*(int)normImage->nvox/100))];
@@ -515,9 +513,7 @@ int main(int argc, char **argv)
 
     if(flag->smoothGaussianFlag || flag->smoothSplineFlag || flag->smoothMeanFlag)
     {
-        nifti_image *smoothImg = nifti_copy_nim_info(image);
-        smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper);
-        memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper);
+        nifti_image *smoothImg = nifti_dup(*image);
         float *kernelSize = new float[smoothImg->nt*smoothImg->nu];
         bool *timePoint = new bool[smoothImg->nt*smoothImg->nu];
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) timePoint[i]=true;
@@ -555,9 +551,7 @@ int main(int argc, char **argv)
 
     if(flag->smoothLabFlag)
     {
-        nifti_image *smoothImg = nifti_copy_nim_info(image);
-        smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper);
-        memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper);
+        nifti_image *smoothImg = nifti_dup(*image);
 
         bool *timePoint = new bool[smoothImg->nt*smoothImg->nu];
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) timePoint[i]=true;
@@ -631,8 +625,7 @@ int main(int argc, char **argv)
             }
         }
 
-        nifti_image *outputImage = nifti_copy_nim_info(image);
-        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
+        nifti_image *outputImage = nifti_dup(*image, false);
 
         if(image2!=nullptr)
         {
@@ -734,8 +727,7 @@ int main(int argc, char **argv)
             return EXIT_FAILURE;
         }
 
-        nifti_image *outputImage = nifti_copy_nim_info(image);
-        outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
+        nifti_image *outputImage = nifti_dup(*image, false);
 
         reg_tools_nanMask_image(image,maskImage,outputImage);
 
@@ -948,8 +940,7 @@ int main(int argc, char **argv)
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
             reg_tools_changeDatatype<float>(image);
         // Create a temporary scaled image
-        nifti_image *scaledImage = nifti_copy_nim_info(image);
-        scaledImage->data = malloc(scaledImage->nvox * scaledImage->nbyper);
+        nifti_image *scaledImage = nifti_dup(*image, false);
         // Rescale the input image
         float min_value = reg_tools_getMinValue(image, -1);
         float max_value = reg_tools_getMaxValue(image, -1);
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index ec533193..095b0668 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -839,9 +839,8 @@ int main(int argc, char **argv)
             case DEF_VEL_FIELD:
                printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n",
                       input2TransImage->fname);
-               output2TransImage=nifti_copy_nim_info(input2TransImage);
+               output2TransImage = nifti_dup(*input2TransImage, false);
                output2TransImage->intent_p1=DEF_FIELD;
-               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
                reg_defField_getDeformationFieldFromFlowField(input2TransImage,
                      output2TransImage,
                      false // the number of step is not automatically updated
@@ -851,9 +850,8 @@ int main(int argc, char **argv)
             case DISP_VEL_FIELD:
                printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n",
                       input2TransImage->fname);
-               output2TransImage=nifti_copy_nim_info(input2TransImage);
+               output2TransImage = nifti_dup(*input2TransImage, false);
                output2TransImage->intent_p1=DEF_FIELD;
-               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
                reg_getDeformationFromDisplacement(input2TransImage);
                reg_defField_getDeformationFieldFromFlowField(input2TransImage,
                      output2TransImage,
@@ -1342,9 +1340,8 @@ int main(int argc, char **argv)
       case DEF_VEL_FIELD:
       {
          // create a temp deformation field containing an identity transformation
-         nifti_image *tempField=nifti_copy_nim_info(outputTransImage);
+         nifti_image *tempField = nifti_dup(*outputTransImage, false);
          tempField->intent_p1=DEF_FIELD;
-         tempField->data=calloc(tempField->nvox,tempField->nbyper);
          reg_getDeformationFromDisplacement(tempField);
          reg_getDisplacementFromDeformation(inputTransImage);
          reg_resampleGradient(inputTransImage,
@@ -1362,9 +1359,8 @@ int main(int argc, char **argv)
       case DISP_VEL_FIELD:
       {
          // create a temp deformation field containing an identity transformation
-         nifti_image *tempField=nifti_copy_nim_info(outputTransImage);
+         nifti_image *tempField = nifti_dup(*outputTransImage, false);
          tempField->intent_p1=DEF_FIELD;
-         tempField->data=calloc(tempField->nvox,tempField->nbyper);
          reg_getDeformationFromDisplacement(tempField);
          reg_resampleGradient(inputTransImage,
                               outputTransImage,
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index f37634e8..800d821f 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -262,15 +262,12 @@ void Compute::ExponentiateGradient(Content& conBwIn) {
     const size_t compNum = size_t(fabs(controlPointGridBw->intent_p2)); // The number of composition
 
     /* Allocate a temporary gradient image to store the backward gradient */
-    nifti_image *tempGrad = nifti_copy_nim_info(voxelBasedMeasureGradient);
-    tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper);
+    nifti_image *tempGrad = nifti_dup(*voxelBasedMeasureGradient, false);
 
     // Create all deformation field images needed for resampling
     nifti_image **tempDef = (nifti_image**)malloc((compNum + 1) * sizeof(nifti_image*));
-    for (size_t i = 0; i <= compNum; ++i) {
-        tempDef[i] = nifti_copy_nim_info(deformationField);
-        tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper);
-    }
+    for (size_t i = 0; i <= compNum; ++i)
+        tempDef[i] = nifti_dup(*deformationField, false);
 
     // Generate all intermediate deformation fields
     reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef);
@@ -278,8 +275,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) {
     // Remove the affine component
     nifti_image *affineDisp = nullptr;
     if (affineTransformationBw) {
-        affineDisp = nifti_copy_nim_info(deformationField);
-        affineDisp->data = malloc(affineDisp->nvox * affineDisp->nbyper);
+        affineDisp = nifti_dup(*deformationField, false);
         reg_affine_getDeformationField(affineTransformationBw, affineDisp);
         reg_getDisplacementFromDeformation(affineDisp);
     }
@@ -311,8 +307,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) {
 }
 /* *************************************************************** */
 nifti_image* Compute::ScaleGradient(const nifti_image& transformationGradient, float scale) {
-    nifti_image *scaledGradient = nifti_copy_nim_info(&transformationGradient);
-    scaledGradient->data = malloc(scaledGradient->nvox * scaledGradient->nbyper);
+    nifti_image *scaledGradient = nifti_dup(transformationGradient, false);
     reg_tools_multiplyValueToImage(&transformationGradient, scaledGradient, scale);
     return scaledGradient;
 }
@@ -349,10 +344,8 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) {
 
     // In order to ensure symmetry, the forward and backward velocity fields
     // are averaged in both image spaces: reference and floating
-    nifti_image *warpedTrans = nifti_copy_nim_info(controlPointGridBw);
-    warpedTrans->data = malloc(warpedTrans->nvox * warpedTrans->nbyper);
-    nifti_image *warpedTransBw = nifti_copy_nim_info(controlPointGrid);
-    warpedTransBw->data = malloc(warpedTransBw->nvox * warpedTransBw->nbyper);
+    nifti_image *warpedTrans = nifti_dup(*controlPointGridBw, false);
+    nifti_image *warpedTransBw = nifti_dup(*controlPointGrid, false);
 
     // Both parametrisations are converted into displacement
     reg_getDisplacementFromDeformation(controlPointGrid);
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 29b9fc7e..aaf37975 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -51,8 +51,7 @@ void F3dContent::DeallocateLocalWeightSim() {
 }
 /* *************************************************************** */
 void F3dContent::AllocateWarpedGradient() {
-    warpedGradient = nifti_copy_nim_info(deformationField);
-    warpedGradient->data = calloc(warpedGradient->nvox, warpedGradient->nbyper);
+    warpedGradient = nifti_dup(*deformationField, false);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateWarpedGradient() {
@@ -63,8 +62,7 @@ void F3dContent::DeallocateWarpedGradient() {
 }
 /* *************************************************************** */
 void F3dContent::AllocateTransformationGradient() {
-    transformationGradient = nifti_copy_nim_info(controlPointGrid);
-    transformationGradient->data = calloc(transformationGradient->nvox, transformationGradient->nbyper);
+    transformationGradient = nifti_dup(*controlPointGrid, false);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateTransformationGradient() {
@@ -75,8 +73,7 @@ void F3dContent::DeallocateTransformationGradient() {
 }
 /* *************************************************************** */
 void F3dContent::AllocateVoxelBasedMeasureGradient() {
-    voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField);
-    voxelBasedMeasureGradient->data = calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper);
+    voxelBasedMeasureGradient = nifti_dup(*deformationField, false);
 }
 /* *************************************************************** */
 void F3dContent::DeallocateVoxelBasedMeasureGradient() {
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index f8a812c4..dfdae9d7 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -570,13 +570,11 @@ nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
     nifti_image *warped = this->con->GetWarped();
 
     free(mask);
-    nifti_image *resultImage = nifti_copy_nim_info(warped);
+    nifti_image *resultImage = nifti_dup(*warped);
     resultImage->cal_min = this->inputFloating->cal_min;
     resultImage->cal_max = this->inputFloating->cal_max;
     resultImage->scl_slope = this->inputFloating->scl_slope;
     resultImage->scl_inter = this->inputFloating->scl_inter;
-    resultImage->data = malloc(resultImage->nvox * resultImage->nbyper);
-    memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper);
 
     reg_aladin<T>::DeallocateKernels();
     reg_aladin<T>::DeinitAladinContent();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 7b23f115..c82ffd33 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -625,9 +625,7 @@ void reg_base<T>::Initialise() {
     // Update the input images threshold if required
     if (robustRange) {
         // Create a copy of the reference image to extract the robust range
-        nifti_image *temp_reference = nifti_copy_nim_info(inputReference);
-        temp_reference->data = malloc(temp_reference->nvox * temp_reference->nbyper);
-        memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper);
+        nifti_image *temp_reference = nifti_dup(*inputReference);
         reg_tools_changeDatatype<T>(temp_reference);
         // Extract the robust range of the reference image
         T *refDataPtr = static_cast<T *>(temp_reference->data);
@@ -641,9 +639,7 @@ void reg_base<T>::Initialise() {
         nifti_image_free(temp_reference);
 
         // Create a copy of the floating image to extract the robust range
-        nifti_image *temp_floating = nifti_copy_nim_info(inputFloating);
-        temp_floating->data = malloc(temp_floating->nvox * temp_floating->nbyper);
-        memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper);
+        nifti_image *temp_floating = nifti_dup(*inputFloating);
         reg_tools_changeDatatype<T>(temp_floating);
         // Extract the robust range of the floating image
         T *floDataPtr = static_cast<T *>(temp_floating->data);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index fdab1b81..66207c26 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -218,10 +218,7 @@ void reg_f3d<T>::Initialise() {
         } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid);
     } else {
         // The control point grid image is initialised with the provided grid
-        controlPointGrid = nifti_copy_nim_info(inputControlPointGrid);
-        controlPointGrid->data = malloc(controlPointGrid->nvox * controlPointGrid->nbyper);
-        memcpy(controlPointGrid->data, inputControlPointGrid->data,
-               controlPointGrid->nvox * controlPointGrid->nbyper);
+        controlPointGrid = nifti_dup(*inputControlPointGrid);
         // The final grid spacing is computed
         spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1);
         spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1);
@@ -669,14 +666,10 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
 /* *************************************************************** */
 template<class T>
 nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
-    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid);
-    returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
-    memcpy(returnedControlPointGrid->data, controlPointGrid->data,
-           returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
-    return returnedControlPointGrid;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetControlPointPositionImage");
 #endif
+    return nifti_dup(*controlPointGrid);
 }
 /* *************************************************************** */
 template<class T>
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index c2058c47..e4330e0e 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -687,18 +687,10 @@ void reg_f3d2<T>::InitialiseSimilarity() {
 /* *************************************************************** */
 template<class T>
 nifti_image* reg_f3d2<T>::GetBackwardControlPointPositionImage() {
-    // Create a control point grid nifti image
-    nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGridBw);
-    // Allocate the new image data array
-    returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
-    // Copy the final backward control point grid image
-    memcpy(returnedControlPointGrid->data, controlPointGridBw->data,
-           returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper);
-    // Return the new control point grid
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetBackwardControlPointPositionImage");
 #endif
-    return returnedControlPointGrid;
+    return nifti_dup(*controlPointGridBw);
 }
 /* *************************************************************** */
 template <class T>
@@ -745,24 +737,14 @@ void reg_f3d2<T>::Initialise() {
                                                 gridSpacing);
     } else {
         // The control point grid image is initialised with the provided grid
-        this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid);
-        this->controlPointGrid->data = malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
-        if (this->inputControlPointGrid->num_ext > 0)
-            nifti_copy_extensions(this->controlPointGrid, this->inputControlPointGrid);
-        memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data,
-               this->controlPointGrid->nvox * this->controlPointGrid->nbyper);
+        this->controlPointGrid = nifti_dup(*this->inputControlPointGrid);
         // The final grid spacing is computed
         this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1);
         this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1);
         if (this->controlPointGrid->nz > 1)
             this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1);
         // The backward grid is derived from the forward
-        controlPointGridBw = nifti_copy_nim_info(this->controlPointGrid);
-        controlPointGridBw->data = malloc(controlPointGridBw->nvox * controlPointGridBw->nbyper);
-        if (this->controlPointGrid->num_ext > 0)
-            nifti_copy_extensions(controlPointGridBw, this->controlPointGrid);
-        memcpy(controlPointGridBw->data, this->controlPointGrid->data,
-               controlPointGridBw->nvox * controlPointGridBw->nbyper);
+        controlPointGridBw = nifti_dup(*this->controlPointGrid);
         reg_getDisplacementFromDeformation(controlPointGridBw);
         reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1);
         reg_getDeformationFromDisplacement(controlPointGridBw);
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 4b91a93f..7451f1b8 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -216,18 +216,12 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
     this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper);
 
     // Allocate the required images to store mean and stdev of the reference image
-    this->referenceMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage);
-    this->referenceMeanImage->data = malloc(this->referenceMeanImage->nvox * this->referenceMeanImage->nbyper);
-
-    this->referenceSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage);
-    this->referenceSdevImage->data = malloc(this->referenceSdevImage->nvox * this->referenceSdevImage->nbyper);
+    this->referenceMeanImage = nifti_dup(*this->forwardCorrelationImage, false);
+    this->referenceSdevImage = nifti_dup(*this->forwardCorrelationImage, false);
 
     // Allocate the required images to store mean and stdev of the warped floating image
-    this->warpedFloatingMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage);
-    this->warpedFloatingMeanImage->data = malloc(this->warpedFloatingMeanImage->nvox * this->warpedFloatingMeanImage->nbyper);
-
-    this->warpedFloatingSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage);
-    this->warpedFloatingSdevImage->data = malloc(this->warpedFloatingSdevImage->nvox * this->warpedFloatingSdevImage->nbyper);
+    this->warpedFloatingMeanImage = nifti_dup(*this->forwardCorrelationImage, false);
+    this->warpedFloatingSdevImage = nifti_dup(*this->forwardCorrelationImage, false);
 
     // Allocate the array to store the mask of the forward image
     this->forwardMask = (int*)malloc(voxelNumber * sizeof(int));
@@ -242,18 +236,12 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
         this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper);
 
         // Allocate the required images to store mean and stdev of the floating image
-        this->floatingMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage);
-        this->floatingMeanImage->data = malloc(this->floatingMeanImage->nvox * this->floatingMeanImage->nbyper);
-
-        this->floatingSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage);
-        this->floatingSdevImage->data = malloc(this->floatingSdevImage->nvox * this->floatingSdevImage->nbyper);
+        this->floatingMeanImage = nifti_dup(*this->backwardCorrelationImage, false);
+        this->floatingSdevImage = nifti_dup(*this->backwardCorrelationImage, false);
 
         // Allocate the required images to store mean and stdev of the warped reference image
-        this->warpedReferenceMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage);
-        this->warpedReferenceMeanImage->data = malloc(this->warpedReferenceMeanImage->nvox * this->warpedReferenceMeanImage->nbyper);
-
-        this->warpedReferenceSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage);
-        this->warpedReferenceSdevImage->data = malloc(this->warpedReferenceSdevImage->nvox * this->warpedReferenceSdevImage->nbyper);
+        this->warpedReferenceMeanImage = nifti_dup(*this->backwardCorrelationImage, false);
+        this->warpedReferenceSdevImage = nifti_dup(*this->backwardCorrelationImage, false);
 
         // Allocate the array to store the mask of the backward image
         this->backwardMask = (int*)malloc(voxelNumber * sizeof(int));
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 2a5eb57a..873d7bf8 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -4007,8 +4007,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
       if(flowFieldImage->ext_list[0].edata!=nullptr)
       {
          // Create a field that contains the affine component only
-         affineOnly = nifti_copy_nim_info(deformationFieldImage);
-         affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper);
+         affineOnly = nifti_dup(*deformationFieldImage, false);
          reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                affineOnly,
                false);
@@ -4133,8 +4132,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
    else if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID)
    {
       // Create an image to store the flow field
-      nifti_image *flowField = nifti_copy_nim_info(deformationFieldImage);
-      flowField->data = calloc(flowField->nvox,flowField->nbyper);
+      nifti_image *flowField = nifti_dup(*deformationFieldImage, false);
       flowField->intent_code=NIFTI_INTENT_VECTOR;
       memset(flowField->intent_name, 0, 16);
       strcpy(flowField->intent_name,"NREG_TRANS");
@@ -4172,8 +4170,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
    if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID)
    {
       // Create an image to store the flow field
-      nifti_image *flowFieldImage = nifti_copy_nim_info(deformationFieldImage[0]);
-      flowFieldImage->data = calloc(flowFieldImage->nvox,flowFieldImage->nbyper);
+      nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false);
       flowFieldImage->intent_code=NIFTI_INTENT_VECTOR;
       memset(flowFieldImage->intent_name, 0, 16);
       strcpy(flowFieldImage->intent_name,"NREG_TRANS");
@@ -4192,8 +4189,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
          if(flowFieldImage->ext_list[0].edata!=nullptr)
          {
             // Create a field that contains the affine component only
-            affineOnly = nifti_copy_nim_info(deformationFieldImage[0]);
-            affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper);
+            affineOnly = nifti_dup(*deformationFieldImage[0], false);
             reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
                   affineOnly,
                   false);
@@ -4366,12 +4362,9 @@ void compute_lie_bracket(nifti_image *img1,
    }
 
 
-   // Allocate two temporary nifti images
-   nifti_image *one_two = nifti_copy_nim_info(img2);
-   nifti_image *two_one = nifti_copy_nim_info(img1);
-   // Set the temporary images to zero displacement
-   one_two->data=calloc(one_two->nvox, one_two->nbyper);
-   two_one->data=calloc(two_one->nvox, two_one->nbyper);
+   // Allocate two temporary nifti images and set them to zero displacement
+   nifti_image *one_two = nifti_dup(*img2, false);
+   nifti_image *two_one = nifti_dup(*img1, false);
    // Compute the displacement from img1
    reg_spline_cppComposition(img1,
                              two_one,
@@ -4464,8 +4457,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
       reg_getDisplacementFromDeformation(img1);
 
       // r <- 2 + 1 + 0.5[2,1]
-      nifti_image *lie_bracket_img2_img1=nifti_copy_nim_info(img1);
-      lie_bracket_img2_img1->data=malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper);
+      nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false);
       compute_lie_bracket<DTYPE>(img2, img1, lie_bracket_img2_img1, use_jac);
       DTYPE *lie_bracket_img2_img1Ptr=static_cast<DTYPE *>(lie_bracket_img2_img1->data);
  #if defined (_OPENMP)
@@ -4479,8 +4471,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
       if(type>1)
       {
          // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12
-         nifti_image *lie_bracket_img2_lie1=nifti_copy_nim_info(lie_bracket_img2_img1);
-         lie_bracket_img2_lie1->data=malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper);
+         nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
          compute_lie_bracket<DTYPE>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
          DTYPE *lie_bracket_img2_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img2_lie1->data);
  #if defined (_OPENMP)
@@ -4494,8 +4485,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
          if(type>2)
          {
             // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12
-            nifti_image *lie_bracket_img1_lie1=nifti_copy_nim_info(lie_bracket_img2_img1);
-            lie_bracket_img1_lie1->data=malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper);
+            nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
             compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
             DTYPE *lie_bracket_img1_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie1->data);
  #if defined (_OPENMP)
@@ -4510,8 +4500,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
             if(type>3)
             {
                // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24
-               nifti_image *lie_bracket_img1_lie2=nifti_copy_nim_info(lie_bracket_img2_lie1);
-               lie_bracket_img1_lie2->data=malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper);
+               nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false);
                compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
                DTYPE *lie_bracket_img1_lie2Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie2->data);
  #if defined (_OPENMP)
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 7711b0ed..0869c416 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -2981,8 +2981,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
 {
 
    // A second field is allocated to store the deformation
-   nifti_image *defFieldImage = nifti_copy_nim_info(flowFieldImage);
-   defFieldImage->data = malloc(defFieldImage->nvox * defFieldImage->nbyper);
+   nifti_image *defFieldImage = nifti_dup(*flowFieldImage, false);
 
    // Remove the affine component from the flow field
    if(flowFieldImage->num_ext>0)
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index e2c424ac..fd110cf6 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -90,17 +90,14 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
-    nifti_image *meanImage = nifti_copy_nim_info(currentInputImage);
-    meanImage->data = calloc(meanImage->nvox, meanImage->nbyper);
+    nifti_image *meanImage = nifti_dup(*currentInputImage, false);
     DTYPE* meanImgDataPtr = static_cast<DTYPE*>(meanImage->data);
 
     // Allocate an image to store the shifted image
-    nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-    shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper);
+    nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
 
     // Allocation of the difference image
-    nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-    diff_image->data = malloc(diff_image->nvox * diff_image->nbyper);
+    nifti_image *diff_image = nifti_dup(*currentInputImage, false);
 
     // Define the sigma for the convolution
     float sigma = -0.5;// negative value denotes voxel width
@@ -223,13 +220,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
-    nifti_image *mean_img = nifti_copy_nim_info(currentInputImage);
-    mean_img->data = calloc(mean_img->nvox, mean_img->nbyper);
+    nifti_image *mean_img = nifti_dup(*currentInputImage, false);
     DTYPE* meanImgDataPtr = static_cast<DTYPE*>(mean_img->data);
 
     // Allocate an image to store the warped image
-    nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage);
-    shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper);
+    nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
 
     // Define the sigma for the convolution
     float sigma = -0.5;// negative value denotes voxel width
@@ -242,12 +237,10 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     // Allocation of the difference image
     //std::vector<nifti_image *> vectNiftiImage;
     //for(int i=0;i<samplingNbr;i++) {
-    nifti_image *diff_image = nifti_copy_nim_info(currentInputImage);
-    diff_image->data = malloc(diff_image->nvox * diff_image->nbyper);
+    nifti_image *diff_image = nifti_dup(*currentInputImage, false);
     int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int));
 
-    nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage);
-    diff_imageShifted->data = malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper);
+    nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false);
 
     int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0};
     int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset};
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 0530cfae..c2eb5c61 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1903,11 +1903,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
 template <class DTYPE>
 int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
-    pyramid[levelToPerform - 1] = nifti_copy_nim_info(inputImage);
-    pyramid[levelToPerform - 1]->data = calloc(pyramid[levelToPerform - 1]->nvox,
-                                               pyramid[levelToPerform - 1]->nbyper);
-    memcpy(pyramid[levelToPerform - 1]->data, inputImage->data,
-           pyramid[levelToPerform - 1]->nvox * pyramid[levelToPerform - 1]->nbyper);
+    pyramid[levelToPerform - 1] = nifti_dup(*inputImage);
     reg_tools_changeDatatype<DTYPE>(pyramid[levelToPerform - 1]);
     reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]);
 
@@ -1923,11 +1919,7 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid,
     // Images for each subsequent levels are allocated and downsampled if appropriate
     for (int l = levelToPerform - 2; l >= 0; l--) {
         // Allocation of the image
-        pyramid[l] = nifti_copy_nim_info(pyramid[l + 1]);
-        pyramid[l]->data = calloc(pyramid[l]->nvox, pyramid[l]->nbyper);
-
-        memcpy(pyramid[l]->data, pyramid[l + 1]->data,
-               pyramid[l]->nvox * pyramid[l]->nbyper);
+        pyramid[l] = nifti_dup(*pyramid[l + 1]);
 
         // Downsample the image if appropriate
         bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
@@ -1945,11 +1937,7 @@ template <class DTYPE>
 int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *));
-    tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage);
-    tempMaskImagePyramid[levelToPerform - 1]->data = calloc(tempMaskImagePyramid[levelToPerform - 1]->nvox,
-                                                            tempMaskImagePyramid[levelToPerform - 1]->nbyper);
-    memcpy(tempMaskImagePyramid[levelToPerform - 1]->data, inputMaskImage->data,
-           tempMaskImagePyramid[levelToPerform - 1]->nvox * tempMaskImagePyramid[levelToPerform - 1]->nbyper);
+    tempMaskImagePyramid[levelToPerform - 1] = nifti_dup(*inputMaskImage);
     reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]);
     reg_tools_changeDatatype<unsigned char>(tempMaskImagePyramid[levelToPerform - 1]);
 
@@ -1968,10 +1956,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
     // Images for each subsequent levels are allocated and downsampled if appropriate
     for (int l = (int)levelToPerform - 2; l >= 0; l--) {
         // Allocation of the reference image
-        tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]);
-        tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper);
-        memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l + 1]->data,
-               tempMaskImagePyramid[l]->nvox * tempMaskImagePyramid[l]->nbyper);
+        tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]);
 
         // Downsample the image if appropriate
         bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
index fa3a888e..1f16c543 100644
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ b/reg-test/reg_test_bspline_deformation_field.cpp
@@ -48,8 +48,7 @@ int main(int argc, char **argv)
     }
 
     // Create a deformation field
-    nifti_image *test_field = nifti_copy_nim_info(expectedDefField);
-    test_field->data = malloc(test_field->nvox*test_field->nbyper);
+    nifti_image *test_field = nifti_dup(*expectedDefField, false);
 
     if(useComposition)
     {
@@ -75,8 +74,7 @@ int main(int argc, char **argv)
     }
 
     // Compute the difference between the computed and expected deformation fields
-    nifti_image *diff_field = nifti_copy_nim_info(expectedDefField);
-    diff_field->data = malloc(diff_field->nvox*diff_field->nbyper);
+    nifti_image *diff_field = nifti_dup(*expectedDefField, false);
     reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
index f1960fca..905f71af 100644
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp
@@ -53,11 +53,8 @@ int main(int argc, char **argv) {
     }
 
     // Create a deformation field
-    nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_cpu->data = malloc(test_field_cpu->nvox * test_field_cpu->nbyper);
-
-    nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField);
-    test_field_gpu->data = malloc(test_field_gpu->nvox * test_field_gpu->nbyper);
+    nifti_image *test_field_cpu = nifti_dup(*inputDeformationField, false);
+    nifti_image *test_field_gpu = nifti_dup(*inputDeformationField, false);
 
     // Compute the affine deformation field
     unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
@@ -82,8 +79,7 @@ int main(int argc, char **argv) {
     test_field_gpu = conGpu->GetDeformationField();
 
     // Compute the difference between the computed and inputted deformation field
-    nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField);
-    diff_field->data = malloc(diff_field->nvox * diff_field->nbyper);
+    nifti_image *diff_field = nifti_dup(*inputDeformationField, false);
     reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_GetMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
index 04007080..3463640e 100644
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ b/reg-test/reg_test_coherence_interpolation.cpp
@@ -50,10 +50,8 @@ int main(int argc, char **argv) {
     }
 
     // Initialise warped images
-    nifti_image *cpuWarped = nifti_copy_nim_info(referenceImage);
-    cpuWarped->data = malloc(cpuWarped->nvox * cpuWarped->nbyper);
-    nifti_image *gpuWarped = nifti_copy_nim_info(referenceImage);
-    gpuWarped->data = malloc(gpuWarped->nvox * gpuWarped->nbyper);
+    nifti_image *cpuWarped = nifti_dup(*referenceImage, false);
+    nifti_image *gpuWarped = nifti_dup(*referenceImage, false);
 
     int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int));
 
@@ -88,8 +86,7 @@ int main(int argc, char **argv) {
     }
 
     // Compute the difference between the warped images
-    nifti_image *diff_field = nifti_copy_nim_info(referenceImage);
-    diff_field->data = malloc(diff_field->nvox * diff_field->nbyper);
+    nifti_image *diff_field = nifti_dup(*referenceImage, false);
 
     // Compute the difference between the computed and inputted warped image
     reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field);
diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp
index d3081015..0d2cdc5e 100644
--- a/reg-test/reg_test_compose_deformation_field.cpp
+++ b/reg-test/reg_test_compose_deformation_field.cpp
@@ -36,9 +36,7 @@ int main(int argc, char **argv)
    }
 
    // Create a deformation field
-   nifti_image *test_field=nifti_copy_nim_info(inputDeformationField);
-   test_field->data=malloc(test_field->nvox*test_field->nbyper);
-   memcpy(test_field->data, inputDeformationField->data, test_field->nvox*test_field->nbyper);
+   nifti_image *test_field = nifti_dup(*inputDeformationField);
 
    // Compute the non-linear deformation field
    reg_defField_compose(inputDeformationField,
diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp
index f883e70f..cfe24ad0 100644
--- a/reg-test/reg_test_computation_time.cpp
+++ b/reg-test/reg_test_computation_time.cpp
@@ -49,8 +49,7 @@ int main(int argc, char **argv)
     }
 
     // Allocate a warped image
-    nifti_image *warpedImage = nifti_copy_nim_info(inputImageOne);
-    warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper);
+    nifti_image *warpedImage = nifti_dup(*inputImageOne, false);
 
     // Create mask
     int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int));
@@ -62,11 +61,8 @@ int main(int argc, char **argv)
     defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2;
     defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim);
     defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper);
-    nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne);
-    defFieldTwo->data = malloc(defFieldTwo->nvox*defFieldTwo->nbyper);
-    nifti_image *defFieldThr=nifti_copy_nim_info(defFieldOne);
-    defFieldThr->data = malloc(defFieldThr->nvox*defFieldThr->nbyper);
-
+    nifti_image *defFieldTwo=nifti_dup(*defFieldOne, false);
+    nifti_image *defFieldThr=nifti_dup(*defFieldOne, false);
 
     // Generate a control point grids
     nifti_image *splineGridOne = nullptr;
@@ -78,8 +74,7 @@ int main(int argc, char **argv)
     reg_createControlPointGrid<float>(&splineGridOne,
                                       inputImageOne,
                                       spacing);
-    nifti_image *splineGridTwo = nifti_copy_nim_info(splineGridOne);
-    splineGridTwo->data = malloc(splineGridTwo->nvox*splineGridTwo->nbyper);
+    nifti_image *splineGridTwo = nifti_dup(*splineGridOne, false);
 
     // Generate an affine matrix
     mat44 affine;reg_mat44_eye(&affine);
diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp
index 7d0e25b1..54bd7232 100644
--- a/reg-test/reg_test_convolution.cpp
+++ b/reg-test/reg_test_convolution.cpp
@@ -38,8 +38,7 @@ int main(int argc, char **argv)
     reg_tools_changeDatatype<double>(expectedFile);
 
     // Compute the difference between the computed and expected deformation fields
-    nifti_image *diff_file = nifti_copy_nim_info(expectedFile);
-    diff_file->data = malloc(diff_file->nvox*diff_file->nbyper);
+    nifti_image *diff_file = nifti_dup(*expectedFile, false);
     reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file);
     reg_tools_abs_image(diff_file);
     double max_difference = reg_tools_getMaxValue(diff_file, -1);
diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp
index 4265b270..eb55ef43 100644
--- a/reg-test/reg_test_linearElasticityGradient.cpp
+++ b/reg-test/reg_test_linearElasticityGradient.cpp
@@ -35,8 +35,7 @@ int main(int argc, char **argv)
     }
 
     // Compute the linear elasticity gradient
-    nifti_image *obtainedGradient = nifti_copy_nim_info(expectedGradientImage);
-    obtainedGradient->data=calloc(obtainedGradient->nvox,obtainedGradient->nbyper);
+    nifti_image *obtainedGradient = nifti_dup(*expectedGradientImage, false);
     switch(computationType){
     case 0: // Approximation based on the control point grid
        reg_spline_approxLinearEnergyGradient(transImage,
@@ -59,8 +58,7 @@ int main(int argc, char **argv)
        reg_exit();
     }
     // Compute the difference between the computed and expected gradient
-    nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient);
-    diff_field->data = malloc(diff_field->nvox*diff_field->nbyper);
+    nifti_image *diff_field = nifti_dup(*obtainedGradient, false);
     reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field);
     reg_tools_abs_image(diff_field);
     double max_difference = reg_tools_getMaxValue(diff_field, -1);
diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp
index d208b353..d697271a 100644
--- a/reg-test/reg_test_nonlinear_deformation_field.cpp
+++ b/reg-test/reg_test_nonlinear_deformation_field.cpp
@@ -44,11 +44,9 @@ int main(int argc, char **argv)
    }
 
    // Create a deformation field
-   nifti_image *test_field=nifti_copy_nim_info(inputDeformationField);
-   test_field->data=malloc(test_field->nvox*test_field->nbyper);
+   nifti_image *test_field = nifti_dup(*inputDeformationField, false);
 
    // Compute the non-linear deformation field
-   memset(test_field->data, 0, test_field->nvox*test_field->nbyper);
    reg_getDeformationFromDisplacement(test_field);
    reg_spline_getDeformationField(controlPointGridImage,
                                   test_field,

From c807b2986c3ff839a8fe0fb956d7fed3dac8b938 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 21 Feb 2023 16:11:24 +0000
Subject: [PATCH 059/314] Refactorise _reg_tools

---
 niftyreg_build_version.txt |    2 +-
 reg-lib/Compute.cpp        |    5 +-
 reg-lib/cpu/_reg_tools.cpp | 1119 ++++++++++++++++--------------------
 reg-lib/cpu/_reg_tools.h   |   24 +-
 4 files changed, 526 insertions(+), 624 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 730a054a..c4597e53 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-172
+173
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 800d821f..138a739f 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -132,13 +132,12 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
 }
 /* *************************************************************** */
 double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     switch (transformationGradient->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        return reg_getMaximalLength<float>(transformationGradient);
+        return reg_getMaximalLength<float>(transformationGradient, optimiseX, optimiseY, optimiseZ);
     case NIFTI_TYPE_FLOAT64:
-        return reg_getMaximalLength<double>(transformationGradient);
+        return reg_getMaximalLength<double>(transformationGradient, optimiseX, optimiseY, optimiseZ);
     }
     return 0;
 }
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index c2eb5c61..ee023059 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -11,7 +11,6 @@
  *
  */
 
-#include <cmath>
 #include "_reg_tools.h"
 
 /* *************************************************************** */
@@ -90,60 +89,60 @@ bool reg_isAnImageFileName(const char *name) {
     return false;
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_intensityRescale_core(nifti_image *image,
                                int timePoint,
                                float newMin,
                                float newMax) {
-    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+    DataType *imagePtr = static_cast<DataType*>(image->data);
     const size_t voxelNumber = CalcVoxelNumber(*image);
 
     // The rescaling is done for each volume independently
-    DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber];
-    DTYPE currentMin = 0;
-    DTYPE currentMax = 0;
+    DataType *volumePtr = &imagePtr[timePoint * voxelNumber];
+    DataType currentMin = 0;
+    DataType currentMax = 0;
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        currentMin = (DTYPE)std::numeric_limits<unsigned char>::max();
+        currentMin = (DataType)std::numeric_limits<unsigned char>::max();
         currentMax = 0;
         break;
     case NIFTI_TYPE_INT8:
-        currentMin = (DTYPE)std::numeric_limits<char>::max();
-        currentMax = (DTYPE)std::numeric_limits<char>::min();
+        currentMin = (DataType)std::numeric_limits<char>::max();
+        currentMax = (DataType)std::numeric_limits<char>::min();
         break;
     case NIFTI_TYPE_UINT16:
-        currentMin = (DTYPE)std::numeric_limits<unsigned short>::max();
-        currentMax = (DTYPE)std::numeric_limits<unsigned short>::min();
+        currentMin = (DataType)std::numeric_limits<unsigned short>::max();
+        currentMax = (DataType)std::numeric_limits<unsigned short>::min();
         break;
     case NIFTI_TYPE_INT16:
-        currentMin = (DTYPE)std::numeric_limits<short>::max();
-        currentMax = (DTYPE)std::numeric_limits<short>::min();
+        currentMin = (DataType)std::numeric_limits<short>::max();
+        currentMax = (DataType)std::numeric_limits<short>::min();
         break;
     case NIFTI_TYPE_UINT32:
-        currentMin = (DTYPE)std::numeric_limits<unsigned int>::max();
-        currentMax = (DTYPE)std::numeric_limits<unsigned int>::min();
+        currentMin = (DataType)std::numeric_limits<unsigned int>::max();
+        currentMax = (DataType)std::numeric_limits<unsigned int>::min();
         break;
     case NIFTI_TYPE_INT32:
-        currentMin = (DTYPE)std::numeric_limits<int>::max();
-        currentMax = (DTYPE)std::numeric_limits<int>::min();
+        currentMin = (DataType)std::numeric_limits<int>::max();
+        currentMax = (DataType)std::numeric_limits<int>::min();
         break;
     case NIFTI_TYPE_FLOAT32:
-        currentMin = (DTYPE)std::numeric_limits<float>::max();
-        currentMax = (DTYPE)std::numeric_limits<float>::min();
+        currentMin = (DataType)std::numeric_limits<float>::max();
+        currentMax = (DataType)std::numeric_limits<float>::min();
         break;
     case NIFTI_TYPE_FLOAT64:
-        currentMin = (DTYPE)std::numeric_limits<double>::max();
-        currentMax = (DTYPE)std::numeric_limits<double>::min();
+        currentMin = (DataType)std::numeric_limits<double>::max();
+        currentMax = (DataType)std::numeric_limits<double>::min();
         break;
     }
 
     // Extract the minimal and maximal values from the current volume
     if (image->scl_slope == 0) image->scl_slope = 1.0f;
     for (size_t index = 0; index < voxelNumber; index++) {
-        DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter);
+        DataType value = (DataType)(*volumePtr++ * image->scl_slope + image->scl_inter);
         if (value == value) {
-            currentMin = (currentMin < value) ? currentMin : value;
-            currentMax = (currentMax > value) ? currentMax : value;
+            currentMin = std::min(currentMin, value);
+            currentMax = std::max(currentMax, value);
         }
     }
 
@@ -168,7 +167,7 @@ void reg_intensityRescale_core(nifti_image *image,
             // Rescale the value using the specified range
             value = value * newDiff + newMin;
         }
-        *volumePtr++ = (DTYPE)value;
+        *volumePtr++ = (DataType)value;
     }
     image->scl_slope = 1.f;
     image->scl_inter = 0.f;
@@ -210,14 +209,13 @@ void reg_intensityRescale(nifti_image *image,
     }
 }
 /* *************************************************************** */
-template<class DTYPE>
-void reg_tools_removeSCLInfo_core(nifti_image *image) {
+template<class DataType>
+void reg_tools_removeSCLInfo(nifti_image *image) {
     if (image->scl_slope == 1.f && image->scl_inter == 0.f)
         return;
-    DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+    DataType *imgPtr = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < image->nvox; ++i) {
-        *imgPtr = *imgPtr * (DTYPE)image->scl_slope + (DTYPE)image->scl_inter;
-        imgPtr++;
+        imgPtr[i] = imgPtr[i] * (DataType)image->scl_slope + (DataType)image->scl_inter;
     }
     image->scl_slope = 1.f;
     image->scl_inter = 0.f;
@@ -226,28 +224,28 @@ void reg_tools_removeSCLInfo_core(nifti_image *image) {
 void reg_tools_removeSCLInfo(nifti_image *image) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_removeSCLInfo_core<unsigned char>(image);
+        reg_tools_removeSCLInfo<unsigned char>(image);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_removeSCLInfo_core<char>(image);
+        reg_tools_removeSCLInfo<char>(image);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_removeSCLInfo_core<unsigned short>(image);
+        reg_tools_removeSCLInfo<unsigned short>(image);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_removeSCLInfo_core<short>(image);
+        reg_tools_removeSCLInfo<short>(image);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_removeSCLInfo_core<unsigned int>(image);
+        reg_tools_removeSCLInfo<unsigned int>(image);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_removeSCLInfo_core<int>(image);
+        reg_tools_removeSCLInfo<int>(image);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_removeSCLInfo_core<float>(image);
+        reg_tools_removeSCLInfo<float>(image);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_removeSCLInfo_core<double>(image);
+        reg_tools_removeSCLInfo<double>(image);
         break;
     default:
         reg_print_fct_error("reg_tools_removeSCLInfo");
@@ -257,7 +255,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) {
 }
 /* *************************************************************** */
 void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) {
-    float indexVoxel1[3] = {0, 0, 0};
+    float indexVoxel1[3] = { 0, 0, 0 };
     float indexVoxel2[3], realVoxel1[3], realVoxel2[3];
     reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1);
 
@@ -282,58 +280,54 @@ void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) {
 //this function will threshold an image to the values provided,
 //set the scl_slope and sct_inter of the image to 1 and 0 (SSD uses actual image data values),
 //and sets cal_min and cal_max to have the min/max image data values
-template<class T, class DTYPE>
-void reg_thresholdImage2(nifti_image *image, T lowThr, T upThr) {
-    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+template<class T, class DataType>
+void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
+    DataType *imagePtr = static_cast<DataType*>(image->data);
     T currentMin = std::numeric_limits<T>::max();
-    T currentMax = -std::numeric_limits<T>::max();
+    T currentMax = std::numeric_limits<T>::min();
 
     if (image->scl_slope == 0)image->scl_slope = 1.0;
 
-    for (unsigned int index = 0; index < image->nvox; index++) {
-        T value = (T)(*imagePtr * image->scl_slope + image->scl_inter);
+    for (size_t i = 0; i < image->nvox; i++) {
+        T value = (T)(imagePtr[i] * image->scl_slope + image->scl_inter);
         if (value == value) {
-            if (value < lowThr) {
-                value = lowThr;
-            } else if (value > upThr) {
-                value = upThr;
-            }
-            currentMin = (currentMin < value) ? currentMin : value;
-            currentMax = (currentMax > value) ? currentMax : value;
+            value = std::clamp(value, lowThr, upThr);
+            currentMin = std::min(currentMin, value);
+            currentMax = std::max(currentMax, value);
         }
-        *imagePtr++ = (DTYPE)value;
+        imagePtr[i] = (DataType)value;
     }
 
-    image->cal_min = currentMin;
-    image->cal_max = currentMax;
+    image->cal_min = static_cast<float>(currentMin);
+    image->cal_max = static_cast<float>(currentMax);
 }
 /* *************************************************************** */
 template<class T>
 void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_thresholdImage2<T, unsigned char>(image, lowThr, upThr);
+        reg_thresholdImage<T, unsigned char>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_INT8:
-        reg_thresholdImage2<T, char>(image, lowThr, upThr);
+        reg_thresholdImage<T, char>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_thresholdImage2<T, unsigned short>(image, lowThr, upThr);
+        reg_thresholdImage<T, unsigned short>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_INT16:
-        reg_thresholdImage2<T, short>(image, lowThr, upThr);
+        reg_thresholdImage<T, short>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_thresholdImage2<T, unsigned int>(image, lowThr, upThr);
+        reg_thresholdImage<T, unsigned int>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_INT32:
-        reg_thresholdImage2<T, int>(image, lowThr, upThr);
+        reg_thresholdImage<T, int>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_thresholdImage2<T, float>(image, lowThr, upThr);
+        reg_thresholdImage<T, float>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_thresholdImage2<T, double>(image, lowThr, upThr);
+        reg_thresholdImage<T, double>(image, lowThr, upThr);
         break;
     default:
         reg_print_fct_error("reg_thresholdImage");
@@ -344,132 +338,111 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
 template void reg_thresholdImage<float>(nifti_image*, float, float);
 template void reg_thresholdImage<double>(nifti_image*, double, double);
 /* *************************************************************** */
-template <class PrecisionTYPE, class DTYPE>
-PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) {
+template <class PrecisionType, class DataType>
+PrecisionType reg_getMaximalLength(const nifti_image *image,
+                                   const bool& optimiseX,
+                                   const bool& optimiseY,
+                                   const bool& optimiseZ) {
     const size_t voxelNumber = CalcVoxelNumber(*image);
-    const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
-    const DTYPE *dataPtrY = &dataPtrX[voxelNumber];
-    PrecisionTYPE max = 0;
+    const DataType *dataPtrX = static_cast<DataType*>(image->data);
+    const DataType *dataPtrY = &dataPtrX[voxelNumber];
+    const DataType *dataPtrZ = &dataPtrY[voxelNumber];
+    PrecisionType max = 0;
     for (size_t i = 0; i < voxelNumber; i++) {
-        PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
-        PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
-        PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY));
-        max = (length > max) ? length : max;
+        PrecisionType valX = optimiseX ? static_cast<PrecisionType>(*dataPtrX++) : 0;
+        PrecisionType valY = optimiseY ? static_cast<PrecisionType>(*dataPtrY++) : 0;
+        PrecisionType valZ = optimiseZ ? static_cast<PrecisionType>(*dataPtrZ++) : 0;
+        PrecisionType length = static_cast<PrecisionType>(sqrt(valX * valX + valY * valY + valZ * valZ));
+        max = std::max(length, max);
     }
     return max;
 }
 /* *************************************************************** */
-template <class PrecisionTYPE, class DTYPE>
-PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) {
-    const size_t voxelNumber = CalcVoxelNumber(*image);
-    const DTYPE *dataPtrX = static_cast<DTYPE*>(image->data);
-    const DTYPE *dataPtrY = &dataPtrX[voxelNumber];
-    const DTYPE *dataPtrZ = &dataPtrY[voxelNumber];
-    PrecisionTYPE max = 0;
-    for (int i = 0; i < voxelNumber; i++) {
-        PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++);
-        PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++);
-        PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++);
-        PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY + valZ * valZ));
-        max = (length > max) ? length : max;
-    }
-    return max;
-}
-/* *************************************************************** */
-template <class PrecisionTYPE>
-PrecisionTYPE reg_getMaximalLength(const nifti_image *image) {
-    if (image->nz == 1) {
-        switch (image->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_getMaximalLength2D<PrecisionTYPE, float>(image);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            return reg_getMaximalLength2D<PrecisionTYPE, double>(image);
-            break;
-        }
-    } else {
-        switch (image->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_getMaximalLength3D<PrecisionTYPE, float>(image);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            return reg_getMaximalLength3D<PrecisionTYPE, double>(image);
-            break;
-        }
+template <class PrecisionType>
+PrecisionType reg_getMaximalLength(const nifti_image *image,
+                                   const bool& optimiseX,
+                                   const bool& optimiseY,
+                                   const bool& optimiseZ) {
+    switch (image->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        return reg_getMaximalLength<PrecisionType, float>(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        return reg_getMaximalLength<PrecisionType, double>(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ);
+        break;
     }
     return EXIT_SUCCESS;
 }
-template float reg_getMaximalLength<float>(const nifti_image*);
-template double reg_getMaximalLength<double>(const nifti_image*);
+template float reg_getMaximalLength<float>(const nifti_image*, const bool&, const bool&, const bool&);
+template double reg_getMaximalLength<double>(const nifti_image*, const bool&, const bool&, const bool&);
 /* *************************************************************** */
-template <class NewTYPE, class DTYPE>
-void reg_tools_changeDatatype1(nifti_image *image, int type) {
+template <class NewType, class DataType>
+void reg_tools_changeDatatype(nifti_image *image, int type) {
     // the initial array is saved and freed
-    DTYPE *initialValue = (DTYPE*)malloc(image->nvox * sizeof(DTYPE));
-    memcpy(initialValue, image->data, image->nvox * sizeof(DTYPE));
+    DataType *initialValue = (DataType*)malloc(image->nvox * sizeof(DataType));
+    memcpy(initialValue, image->data, image->nvox * sizeof(DataType));
 
     // the new array is allocated and then filled
     if (type > -1) {
         image->datatype = type;
     } else {
-        if (sizeof(NewTYPE) == sizeof(unsigned char)) {
+        if (sizeof(NewType) == sizeof(unsigned char)) {
             image->datatype = NIFTI_TYPE_UINT8;
 #ifndef NDEBUG
             reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8");
 #endif
-        } else if (sizeof(NewTYPE) == sizeof(float)) {
+        } else if (sizeof(NewType) == sizeof(float)) {
             image->datatype = NIFTI_TYPE_FLOAT32;
 #ifndef NDEBUG
             reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32");
 #endif
-        } else if (sizeof(NewTYPE) == sizeof(double)) {
+        } else if (sizeof(NewType) == sizeof(double)) {
             image->datatype = NIFTI_TYPE_FLOAT64;
 #ifndef NDEBUG
             reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64");
 #endif
         } else {
-            reg_print_fct_error("reg_tools_changeDatatype1");
+            reg_print_fct_error("reg_tools_changeDatatype");
             reg_print_msg_error("Only change to unsigned char, float or double are supported");
             reg_exit();
         }
     }
     free(image->data);
-    image->nbyper = sizeof(NewTYPE);
-    image->data = calloc(image->nvox, sizeof(NewTYPE));
-    NewTYPE *dataPtr = static_cast<NewTYPE *>(image->data);
-    for (size_t i = 0; i < image->nvox; i++) {
-        dataPtr[i] = (NewTYPE)(initialValue[i]);
-    }
+    image->nbyper = sizeof(NewType);
+    image->data = calloc(image->nvox, sizeof(NewType));
+    NewType *dataPtr = static_cast<NewType*>(image->data);
+    for (size_t i = 0; i < image->nvox; i++)
+        dataPtr[i] = static_cast<NewType>(initialValue[i]);
 
     free(initialValue);
 }
 /* *************************************************************** */
-template <class NewTYPE>
+template <class NewType>
 void reg_tools_changeDatatype(nifti_image *image, int type) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_changeDatatype1<NewTYPE, unsigned char>(image, type);
+        reg_tools_changeDatatype<NewType, unsigned char>(image, type);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_changeDatatype1<NewTYPE, char>(image, type);
+        reg_tools_changeDatatype<NewType, char>(image, type);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_changeDatatype1<NewTYPE, unsigned short>(image, type);
+        reg_tools_changeDatatype<NewType, unsigned short>(image, type);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_changeDatatype1<NewTYPE, short>(image, type);
+        reg_tools_changeDatatype<NewType, short>(image, type);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_changeDatatype1<NewTYPE, unsigned int>(image, type);
+        reg_tools_changeDatatype<NewType, unsigned int>(image, type);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_changeDatatype1<NewTYPE, int>(image, type);
+        reg_tools_changeDatatype<NewType, int>(image, type);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_changeDatatype1<NewTYPE, float>(image, type);
+        reg_tools_changeDatatype<NewType, float>(image, type);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_changeDatatype1<NewTYPE, double>(image, type);
+        reg_tools_changeDatatype<NewType, double>(image, type);
         break;
     default:
         reg_print_fct_error("reg_tools_changeDatatype");
@@ -486,14 +459,36 @@ template void reg_tools_changeDatatype<int>(nifti_image*, int);
 template void reg_tools_changeDatatype<float>(nifti_image*, int);
 template void reg_tools_changeDatatype<double>(nifti_image*, int);
 /* *************************************************************** */
-template <class TYPE1>
+struct Operation {
+    enum class Type { Add, Subtract, Multiply, Divide } type;
+    Operation(Type type) : type(type) {}
+    double operator()(const double& lhs, const double& rhs) const {
+        switch (type) {
+        case Type::Add:
+            return lhs + rhs;
+        case Type::Subtract:
+            return lhs - rhs;
+        case Type::Multiply:
+            return lhs * rhs;
+        case Type::Divide:
+            return lhs / rhs;
+        default:
+            reg_print_fct_error("Operation::operator()");
+            reg_print_msg_error("Unsupported operation");
+            reg_exit();
+            return 0;
+        }
+    }
+};
+/* *************************************************************** */
+template <class Type>
 void reg_tools_operationImageToImage(const nifti_image *img1,
                                      const nifti_image *img2,
                                      nifti_image *res,
-                                     int type) {
-    const TYPE1 *img1Ptr = static_cast<TYPE1*>(img1->data);
-    const TYPE1 *img2Ptr = static_cast<TYPE1*>(img2->data);
-    TYPE1 *resPtr = static_cast<TYPE1*>(res->data);
+                                     const Operation& operation) {
+    const Type *img1Ptr = static_cast<Type*>(img1->data);
+    const Type *img2Ptr = static_cast<Type*>(img2->data);
+    Type *resPtr = static_cast<Type*>(res->data);
 
     const float sclSlope1 = img1->scl_slope == 0 ? 1 : img1->scl_slope;
     const float sclSlope2 = img2->scl_slope == 0 ? 1 : img2->scl_slope;
@@ -509,54 +504,14 @@ void reg_tools_operationImageToImage(const nifti_image *img1,
     const size_t voxelNumber = res->nvox;
 #endif
 
-    switch (type) {
-    case 0:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) +
-                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
-                                 (double)img1->scl_inter) / (double)sclSlope1);
-        break;
-    case 1:
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++) {
-            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) -
-                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
-                                 (double)img1->scl_inter) / (double)sclSlope1);
-        }
-        break;
-    case 2:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++) {
-            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) *
-                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
-                                 (double)img1->scl_inter) / (double)sclSlope1);
-        }
-        break;
-    case 3:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) /
-                                 ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) -
-                                 (double)img1->scl_inter) / (double)sclSlope1);
-        break;
-    }
+   shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation)
+#endif
+    for (i = 0; i < voxelNumber; i++)
+        resPtr[i] = Type((operation((double)img1Ptr[i] * sclSlope1 + img1->scl_inter,
+                                    (double)img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1);
 }
 /* *************************************************************** */
 void reg_tools_addImageToImage(const nifti_image *img1,
@@ -572,30 +527,31 @@ void reg_tools_addImageToImage(const nifti_image *img1,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Add);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationImageToImage<char>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationImageToImage<short>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationImageToImage<int>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationImageToImage<float>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<float>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationImageToImage<double>(img1, img2, res, 0);
+        reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_addImageToImage");
@@ -617,30 +573,31 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Subtract);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationImageToImage<char>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationImageToImage<short>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationImageToImage<int>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationImageToImage<float>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<float>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationImageToImage<double>(img1, img2, res, 1);
+        reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_subtractImageFromImage");
@@ -662,30 +619,31 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Multiply);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationImageToImage<char>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationImageToImage<short>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationImageToImage<int>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationImageToImage<float>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<float>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationImageToImage<double>(img1, img2, res, 2);
+        reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_multiplyImageToImage");
@@ -707,30 +665,31 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Divide);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<unsigned char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationImageToImage<char>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<char>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<unsigned short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationImageToImage<short>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationImageToImage<int>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<int>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationImageToImage<float>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<float>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationImageToImage<double>(img1, img2, res, 3);
+        reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_divideImageToImage");
@@ -739,13 +698,13 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
     }
 }
 /* *************************************************************** */
-template <class TYPE1>
+template <class Type>
 void reg_tools_operationValueToImage(const nifti_image *img,
                                      nifti_image *res,
                                      float val,
-                                     int type) {
-    const TYPE1 *imgPtr = static_cast<TYPE1*>(img->data);
-    TYPE1 *resPtr = static_cast<TYPE1*>(res->data);
+                                     const Operation& operation) {
+    const Type *imgPtr = static_cast<Type*>(img->data);
+    Type *resPtr = static_cast<Type*>(res->data);
 
     const float sclSlope = img->scl_slope == 0 ? 1 : img->scl_slope;
 
@@ -760,48 +719,13 @@ void reg_tools_operationValueToImage(const nifti_image *img,
     const size_t voxelNumber = res->nvox;
 #endif
 
-    switch (type) {
-    case 0:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) +
-                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
-        break;
-    case 1:
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(i) \
-   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) -
-                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
-        break;
-    case 2:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) *
-                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
-        break;
-    case 3:
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   private(i) \
-   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope)
-#endif // _OPENMP
-        for (i = 0; i < voxelNumber; i++)
-            resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) /
-                                  (double)val) - (double)img->scl_inter) / (double)sclSlope);
-        break;
-    }
+   shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation)
+#endif
+    for (i = 0; i < voxelNumber; i++)
+        resPtr[i] = Type((operation((double)imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope);
 }
 /* *************************************************************** */
 void reg_tools_addValueToImage(const nifti_image *img,
@@ -817,30 +741,31 @@ void reg_tools_addValueToImage(const nifti_image *img,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Add);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationValueToImage<unsigned char>(img, res, val, 0);
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationValueToImage<char>(img, res, val, 0);
+        reg_tools_operationValueToImage<char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationValueToImage<unsigned short>(img, res, val, 0);
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationValueToImage<short>(img, res, val, 0);
+        reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, 0);
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationValueToImage<int>(img, res, val, 0);
+        reg_tools_operationValueToImage<int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationValueToImage<float>(img, res, val, 0);
+        reg_tools_operationValueToImage<float>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationValueToImage<double>(img, res, val, 0);
+        reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_addValueToImage");
@@ -862,30 +787,31 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Subtract);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationValueToImage<unsigned char>(img, res, val, 1);
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationValueToImage<char>(img, res, val, 1);
+        reg_tools_operationValueToImage<char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationValueToImage<unsigned short>(img, res, val, 1);
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationValueToImage<short>(img, res, val, 1);
+        reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, 1);
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationValueToImage<int>(img, res, val, 1);
+        reg_tools_operationValueToImage<int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationValueToImage<float>(img, res, val, 1);
+        reg_tools_operationValueToImage<float>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationValueToImage<double>(img, res, val, 1);
+        reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_subtractValueFromImage");
@@ -907,30 +833,31 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Multiply);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationValueToImage<unsigned char>(img, res, val, 2);
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationValueToImage<char>(img, res, val, 2);
+        reg_tools_operationValueToImage<char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationValueToImage<unsigned short>(img, res, val, 2);
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationValueToImage<short>(img, res, val, 2);
+        reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, 2);
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationValueToImage<int>(img, res, val, 2);
+        reg_tools_operationValueToImage<int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationValueToImage<float>(img, res, val, 2);
+        reg_tools_operationValueToImage<float>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationValueToImage<double>(img, res, val, 2);
+        reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_multiplyValueToImage");
@@ -952,30 +879,31 @@ void reg_tools_divideValueToImage(const nifti_image *img,
         reg_print_msg_error("Input images are expected to have the same size");
         reg_exit();
     }
+    Operation operation(Operation::Type::Divide);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_operationValueToImage<unsigned char>(img, res, val, 3);
+        reg_tools_operationValueToImage<unsigned char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_operationValueToImage<char>(img, res, val, 3);
+        reg_tools_operationValueToImage<char>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_operationValueToImage<unsigned short>(img, res, val, 3);
+        reg_tools_operationValueToImage<unsigned short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_operationValueToImage<short>(img, res, val, 3);
+        reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, 3);
+        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_operationValueToImage<int>(img, res, val, 3);
+        reg_tools_operationValueToImage<int>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_operationValueToImage<float>(img, res, val, 3);
+        reg_tools_operationValueToImage<float>(img, res, val, operation);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_operationValueToImage<double>(img, res, val, 3);
+        reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
         reg_print_fct_error("reg_tools_divideValueToImage");
@@ -984,7 +912,7 @@ void reg_tools_divideValueToImage(const nifti_image *img,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_tools_kernelConvolution_core(nifti_image *image,
                                       float *sigma,
                                       int kernelType,
@@ -1003,8 +931,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
     size_t index;
     const size_t voxelNumber = CalcVoxelNumber(*image);
 #endif
-    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
-    int imageDim[3] = {image->nx, image->ny, image->nz};
+    DataType *imagePtr = static_cast<DataType*>(image->data);
+    int imageDim[3] = { image->nx, image->ny, image->nz };
 
     bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool));
     float *densityPtr = (float*)calloc(voxelNumber, sizeof(float));
@@ -1012,18 +940,18 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
     // Loop over the dimension higher than 3
     for (int t = 0; t < image->nt * image->nu; t++) {
         if (timePoint[t]) {
-            DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
+            DataType *intensityPtr = &imagePtr[t * voxelNumber];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \
    private(index)
 #endif
             for (index = 0; index < voxelNumber; index++) {
-                densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1 : 0;
+                densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1.f : 0;
                 densityPtr[index] *= (mask[index] >= 0) ? 1 : 0;
                 nanImagePtr[index] = static_cast<bool>(densityPtr[index]);
                 if (nanImagePtr[index] == 0)
-                    intensityPtr[index] = static_cast<DTYPE>(0);
+                    intensityPtr[index] = static_cast<DataType>(0);
             }
             // Loop over the x, y and z dimensions
             for (int n = 0; n < 3; n++) {
@@ -1110,9 +1038,9 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                         size_t realIndex;
                         float *kernelPtr, kernelValue;
                         double densitySum, intensitySum;
-                        DTYPE *currentIntensityPtr = nullptr;
+                        DataType *currentIntensityPtr = nullptr;
                         float *currentDensityPtr = nullptr;
-                        DTYPE bufferIntensity[2048];
+                        DataType bufferIntensity[2048];
                         float bufferDensity[2048];
                         double bufferIntensitycur = 0;
                         double bufferDensitycur = 0;
@@ -1225,7 +1153,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                                     }
 #endif
                                     // Store the computed value inplace
-                                    intensityPtr[realIndex] = static_cast<DTYPE>(intensitySum);
+                                    intensityPtr[realIndex] = static_cast<DataType>(intensitySum);
                                     densityPtr[realIndex] = static_cast<float>(densitySum);
                                     realIndex += lineOffset;
                                 } // line convolution
@@ -1255,7 +1183,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                                             bufferDensitycur = 0;
                                         }
                                     }
-                                    intensityPtr[realIndex] = static_cast<DTYPE>(bufferIntensitycur);
+                                    intensityPtr[realIndex] = static_cast<DataType>(bufferIntensitycur);
                                     densityPtr[realIndex] = static_cast<float>(bufferDensitycur);
 
                                     realIndex += lineOffset;
@@ -1273,8 +1201,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
 #endif
             for (index = 0; index < voxelNumber; ++index) {
                 if (nanImagePtr[index] != 0)
-                    intensityPtr[index] = static_cast<DTYPE>((float)intensityPtr[index] / densityPtr[index]);
-                else intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
+                    intensityPtr[index] = static_cast<DataType>((float)intensityPtr[index] / densityPtr[index]);
+                else intensityPtr[index] = std::numeric_limits<DataType>::quiet_NaN();
             }
         } // check if the time point is active
     } // loop over the time points
@@ -1282,7 +1210,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
     free(densityPtr);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_tools_labelKernelConvolution_core(nifti_image *image,
                                            float varianceX,
                                            float varianceY,
@@ -1301,7 +1229,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
     size_t index;
     const size_t voxelNumber = CalcVoxelNumber(*image);
 #endif
-    DTYPE *imagePtr = static_cast<DTYPE*>(image->data);
+    DataType *imagePtr = static_cast<DataType*>(image->data);
 
     const int activeTimePointNumber = image->nt * image->nu;
     bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool));
@@ -1318,16 +1246,16 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
 
 
     bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool));
-    DTYPE *tmpImagePtr = (DTYPE*)calloc(voxelNumber, sizeof(DTYPE));
+    DataType *tmpImagePtr = (DataType*)calloc(voxelNumber, sizeof(DataType));
 
-    typedef std::map<DTYPE, float> DataPointMap;
-    typedef std::pair<DTYPE, float> DataPointPair;
-    typedef typename std::map<DTYPE, float>::iterator DataPointMapIt;
+    typedef std::map<DataType, float> DataPointMap;
+    typedef std::pair<DataType, float> DataPointPair;
+    typedef typename std::map<DataType, float>::iterator DataPointMapIt;
 
     // Loop over the dimension higher than 3
     for (int t = 0; t < activeTimePointNumber; t++) {
         if (activeTimePoint[t]) {
-            DTYPE *intensityPtr = &imagePtr[t * voxelNumber];
+            DataType *intensityPtr = &imagePtr[t * voxelNumber];
             for (index = 0; index < voxelNumber; index++) {
                 nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false;
                 nanImagePtr[index] = (currentMask[index] >= 0) ? nanImagePtr[index] : false;
@@ -1336,9 +1264,9 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
             float gaussY_var = varianceY;
             float gaussZ_var = varianceZ;
             index = 0;
-            int currentXYZposition[3] = {0};
-            int dim_array[3] = {image->nx, image->ny, image->nz};
-            int shiftdirection[3] = {1, image->nx, image->nx * image->ny};
+            int currentXYZposition[3] = { 0 };
+            int dim_array[3] = { image->nx, image->ny, image->nz };
+            int shiftdirection[3] = { 1, image->nx, image->nx * image->ny };
 
             int kernelXsize, kernelXshift, shiftXstart, shiftXstop;
             int kernelYsize, kernelYshift, shiftYstart, shiftYstop;
@@ -1346,7 +1274,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
             int shiftx, shifty, shiftz;
             int indexNeighbour;
             float kernelval;
-            DTYPE maxindex;
+            DataType maxindex;
             double maxval;
             DataPointMapIt location, currIterator;
             DataPointMap tmp_lab;
@@ -1393,10 +1321,10 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                                         indexNeighbour = index + (shiftx * shiftdirection[0]) +
                                             (shifty * shiftdirection[1]) + (shiftz * shiftdirection[2]);
                                         if (nanImagePtr[indexNeighbour] != 0) {
-                                            kernelval = expf((float)(-0.5f * (powf(shiftx, 2) / gaussX_var
-                                                                              + powf(shifty, 2) / gaussY_var
-                                                                              + powf(shiftz, 2) / gaussZ_var))) /
-                                                (sqrtf(2.0f * 3.14159265 * powf(gaussX_var * gaussY_var * gaussZ_var, 2)));
+                                            kernelval = expf((float)(-0.5f * (pow(shiftx, 2) / gaussX_var
+                                                                              + pow(shifty, 2) / gaussY_var
+                                                                              + pow(shiftz, 2) / gaussZ_var))) /
+                                                (sqrtf(2.f * 3.14159265f * pow(gaussX_var * gaussY_var * gaussZ_var, 2.f)));
 
                                             location = tmp_lab.find(intensityPtr[indexNeighbour]);
                                             if (location != tmp_lab.end()) {
@@ -1420,7 +1348,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                             }
                             tmpImagePtr[index] = maxindex;
                         } else {
-                            tmpImagePtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
+                            tmpImagePtr[index] = std::numeric_limits<DataType>::quiet_NaN();
                         }
                     }
                 }
@@ -1428,7 +1356,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
             // Normalise per timepoint
             for (index = 0; index < voxelNumber; ++index) {
                 if (nanImagePtr[index] == 0)
-                    intensityPtr[index] = std::numeric_limits<DTYPE>::quiet_NaN();
+                    intensityPtr[index] = std::numeric_limits<DataType>::quiet_NaN();
                 else
                     intensityPtr[index] = tmpImagePtr[index];
             }
@@ -1524,19 +1452,19 @@ void reg_tools_kernelConvolution(nifti_image *image,
     delete[] activeTimePoint;
 }
 /* *************************************************************** */
-template <class PrecisionTYPE, class ImageTYPE>
-void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
+template <class PrecisionType, class ImageType>
+void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
     if (type == 1) {
         /* the input image is first smooth */
         float *sigma = new float[image->nt];
         for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f;
         reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL);
-        delete[]sigma;
+        delete[] sigma;
     }
 
     /* the values are copied */
-    ImageTYPE *oldValues = (ImageTYPE*)malloc(image->nvox * image->nbyper);
-    ImageTYPE *imagePtr = static_cast<ImageTYPE*>(image->data);
+    ImageType *oldValues = (ImageType*)malloc(image->nvox * image->nbyper);
+    ImageType *imagePtr = static_cast<ImageType*>(image->data);
     memcpy(oldValues, imagePtr, image->nvox * image->nbyper);
     free(image->data);
 
@@ -1595,7 +1523,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
         image->sto_xyz.m[1][2] *= 2.f;
         image->sto_xyz.m[2][2] *= 2.f;
     }
-    float origin_sform[3] = {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]};
+    float origin_sform[3] = { image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3] };
     image->sto_xyz.m[0][3] = origin_sform[0];
     image->sto_xyz.m[1][3] = origin_sform[1];
     image->sto_xyz.m[2][3] = origin_sform[2];
@@ -1604,15 +1532,15 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
     // Reallocate the image
     image->nvox = CalcVoxelNumber(*image, 7);
     image->data = calloc(image->nvox, image->nbyper);
-    imagePtr = static_cast<ImageTYPE*>(image->data);
+    imagePtr = static_cast<ImageType*>(image->data);
 
-    PrecisionTYPE real[3];
-    ImageTYPE intensity;
+    PrecisionType real[3];
+    ImageType intensity;
     int position[3];
 
     // qform is used for resampling
     for (size_t tuvw = 0; tuvw < (size_t)image->nt * image->nu * image->nv * image->nw; tuvw++) {
-        ImageTYPE *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]];
+        ImageType *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]];
         for (int z = 0; z < image->nz; z++) {
             for (int y = 0; y < image->ny; y++) {
                 for (int x = 0; x < image->nx; x++) {
@@ -1635,7 +1563,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
                     position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
                     if (oldDim[3] == 1) position[2] = 0;
                     // Nearest neighbour is used as downsampling ratio is constant
-                    intensity = std::numeric_limits<ImageTYPE>::quiet_NaN();
+                    intensity = std::numeric_limits<ImageType>::quiet_NaN();
                     if (-1 < position[0] && position[0] < oldDim[1] &&
                         -1 < position[1] && position[1] < oldDim[2] &&
                         -1 < position[2] && position[2] < oldDim[3]) {
@@ -1650,32 +1578,32 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) {
     free(oldValues);
 }
 /* *************************************************************** */
-template <class PrecisionTYPE>
+template <class PrecisionType>
 void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_downsampleImage1<PrecisionTYPE, unsigned char>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, unsigned char>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_INT8:
-        reg_downsampleImage1<PrecisionTYPE, char>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, char>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_downsampleImage1<PrecisionTYPE, unsigned short>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, unsigned short>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_INT16:
-        reg_downsampleImage1<PrecisionTYPE, short>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, short>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_downsampleImage1<PrecisionTYPE, unsigned int>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, unsigned int>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_INT32:
-        reg_downsampleImage1<PrecisionTYPE, int>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, int>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_downsampleImage1<PrecisionTYPE, float>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, float>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_downsampleImage1<PrecisionTYPE, double>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, double>(image, type, downsampleAxis);
         break;
     default:
         reg_print_fct_error("reg_downsampleImage");
@@ -1686,42 +1614,40 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
 template void reg_downsampleImage<float>(nifti_image*, int, bool*);
 template void reg_downsampleImage<double>(nifti_image*, int, bool*);
 /* *************************************************************** */
-template <class DTYPE>
-void reg_tools_binarise_image1(nifti_image *image) {
-    DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
+template <class DataType>
+void reg_tools_binarise_image(nifti_image *image) {
+    DataType *dataPtr = static_cast<DataType*>(image->data);
     image->scl_inter = 0.f;
     image->scl_slope = 1.f;
-    for (size_t i = 0; i < image->nvox; i++) {
-        *dataPtr = (*dataPtr) != 0 ? (DTYPE)1 : (DTYPE)0;
-        dataPtr++;
-    }
+    for (size_t i = 0; i < image->nvox; i++)
+        dataPtr[i] = dataPtr[i] != 0 ? (DataType)1 : (DataType)0;
 }
 /* *************************************************************** */
 void reg_tools_binarise_image(nifti_image *image) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_binarise_image1<unsigned char>(image);
+        reg_tools_binarise_image<unsigned char>(image);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_binarise_image1<char>(image);
+        reg_tools_binarise_image<char>(image);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_binarise_image1<unsigned short>(image);
+        reg_tools_binarise_image<unsigned short>(image);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_binarise_image1<short>(image);
+        reg_tools_binarise_image<short>(image);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binarise_image1<unsigned int>(image);
+        reg_tools_binarise_image<unsigned int>(image);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_binarise_image1<int>(image);
+        reg_tools_binarise_image<int>(image);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_binarise_image1<float>(image);
+        reg_tools_binarise_image<float>(image);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_binarise_image1<double>(image);
+        reg_tools_binarise_image<double>(image);
         break;
     default:
         reg_print_fct_error("reg_tools_binarise_image");
@@ -1730,40 +1656,38 @@ void reg_tools_binarise_image(nifti_image *image) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-void reg_tools_binarise_image1(nifti_image *image, float threshold) {
-    DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
-    for (size_t i = 0; i < image->nvox; i++) {
-        *dataPtr = (*dataPtr) < threshold ? (DTYPE)0 : (DTYPE)1;
-        dataPtr++;
-    }
+template <class DataType>
+void reg_tools_binarise_image(nifti_image *image, float threshold) {
+    DataType *dataPtr = static_cast<DataType*>(image->data);
+    for (size_t i = 0; i < image->nvox; i++)
+        dataPtr[i] = dataPtr[i] < threshold ? (DataType)0 : (DataType)1;
 }
 /* *************************************************************** */
 void reg_tools_binarise_image(nifti_image *image, float threshold) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_binarise_image1<unsigned char>(image, threshold);
+        reg_tools_binarise_image<unsigned char>(image, threshold);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_binarise_image1<char>(image, threshold);
+        reg_tools_binarise_image<char>(image, threshold);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_binarise_image1<unsigned short>(image, threshold);
+        reg_tools_binarise_image<unsigned short>(image, threshold);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_binarise_image1<short>(image, threshold);
+        reg_tools_binarise_image<short>(image, threshold);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binarise_image1<unsigned int>(image, threshold);
+        reg_tools_binarise_image<unsigned int>(image, threshold);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_binarise_image1<int>(image, threshold);
+        reg_tools_binarise_image<int>(image, threshold);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_binarise_image1<float>(image, threshold);
+        reg_tools_binarise_image<float>(image, threshold);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_binarise_image1<double>(image, threshold);
+        reg_tools_binarise_image<double>(image, threshold);
         break;
     default:
         reg_print_fct_error("reg_tools_binarise_image");
@@ -1772,9 +1696,9 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-void reg_tools_binaryImage2int1(const nifti_image *image, int *array) {
-    const DTYPE *dataPtr = static_cast<DTYPE*>(image->data);
+template <class DataType>
+void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
+    const DataType *dataPtr = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < CalcVoxelNumber(*image); i++)
         array[i] = dataPtr[i] != 0 ? 1 : -1;
 }
@@ -1782,28 +1706,28 @@ void reg_tools_binaryImage2int1(const nifti_image *image, int *array) {
 void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_binaryImage2int1<unsigned char>(image, array);
+        reg_tools_binaryImage2int<unsigned char>(image, array);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_binaryImage2int1<char>(image, array);
+        reg_tools_binaryImage2int<char>(image, array);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_binaryImage2int1<unsigned short>(image, array);
+        reg_tools_binaryImage2int<unsigned short>(image, array);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_binaryImage2int1<short>(image, array);
+        reg_tools_binaryImage2int<short>(image, array);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binaryImage2int1<unsigned int>(image, array);
+        reg_tools_binaryImage2int<unsigned int>(image, array);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_binaryImage2int1<int>(image, array);
+        reg_tools_binaryImage2int<int>(image, array);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_binaryImage2int1<float>(image, array);
+        reg_tools_binaryImage2int<float>(image, array);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_binaryImage2int1<double>(image, array);
+        reg_tools_binaryImage2int<double>(image, array);
         break;
     default:
         reg_print_fct_error("reg_tools_binaryImage2int");
@@ -1812,15 +1736,15 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
     }
 }
 /* *************************************************************** */
-template <class ATYPE, class BTYPE>
-double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) {
+template <class AType, class BType>
+double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) {
     const size_t voxelNumber = CalcVoxelNumber(*imageA);
-    const ATYPE *imageAPtrX = static_cast<ATYPE*>(imageA->data);
-    const BTYPE *imageBPtrX = static_cast<BTYPE*>(imageB->data);
-    const ATYPE *imageAPtrY = nullptr;
-    const BTYPE *imageBPtrY = nullptr;
-    const ATYPE *imageAPtrZ = nullptr;
-    const BTYPE *imageBPtrZ = nullptr;
+    const AType *imageAPtrX = static_cast<AType*>(imageA->data);
+    const BType *imageBPtrX = static_cast<BType*>(imageB->data);
+    const AType *imageAPtrY = nullptr;
+    const BType *imageBPtrY = nullptr;
+    const AType *imageAPtrZ = nullptr;
+    const BType *imageBPtrZ = nullptr;
     if (imageA->dim[5] > 1) {
         imageAPtrY = &imageAPtrX[voxelNumber];
         imageBPtrY = &imageBPtrX[voxelNumber];
@@ -1849,27 +1773,27 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image
     return sum / static_cast<double>(voxelNumber);
 }
 /* *************************************************************** */
-template <class ATYPE>
-double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *imageB) {
+template <class AType>
+double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) {
     switch (imageB->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMeanRMS2<ATYPE, unsigned char>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, unsigned char>(imageA, imageB);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMeanRMS2<ATYPE, char>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, char>(imageA, imageB);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMeanRMS2<ATYPE, unsigned short>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, unsigned short>(imageA, imageB);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMeanRMS2<ATYPE, short>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, short>(imageA, imageB);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanRMS2<ATYPE, unsigned int>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, unsigned int>(imageA, imageB);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getMeanRMS2<ATYPE, int>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, int>(imageA, imageB);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMeanRMS2<ATYPE, float>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, float>(imageA, imageB);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getMeanRMS2<ATYPE, double>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, double>(imageA, imageB);
     default:
-        reg_print_fct_error("reg_tools_getMeanRMS1");
+        reg_print_fct_error("reg_tools_getMeanRMS");
         reg_print_msg_error("The image data type is not supported");
         reg_exit();
     }
@@ -1878,21 +1802,21 @@ double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *image
 double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) {
     switch (imageA->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMeanRMS1<unsigned char>(imageA, imageB);
+        return reg_tools_getMeanRMS<unsigned char>(imageA, imageB);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMeanRMS1<char>(imageA, imageB);
+        return reg_tools_getMeanRMS<char>(imageA, imageB);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMeanRMS1<unsigned short>(imageA, imageB);
+        return reg_tools_getMeanRMS<unsigned short>(imageA, imageB);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMeanRMS1<short>(imageA, imageB);
+        return reg_tools_getMeanRMS<short>(imageA, imageB);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanRMS1<unsigned int>(imageA, imageB);
+        return reg_tools_getMeanRMS<unsigned int>(imageA, imageB);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getMeanRMS1<int>(imageA, imageB);
+        return reg_tools_getMeanRMS<int>(imageA, imageB);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMeanRMS1<float>(imageA, imageB);
+        return reg_tools_getMeanRMS<float>(imageA, imageB);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getMeanRMS1<double>(imageA, imageB);
+        return reg_tools_getMeanRMS<double>(imageA, imageB);
     default:
         reg_print_fct_error("reg_tools_getMeanRMS");
         reg_print_msg_error("The image data type is not supported");
@@ -1900,20 +1824,20 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     pyramid[levelToPerform - 1] = nifti_dup(*inputImage);
-    reg_tools_changeDatatype<DTYPE>(pyramid[levelToPerform - 1]);
+    reg_tools_changeDatatype<DataType>(pyramid[levelToPerform - 1]);
     reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]);
 
     // Images are downsampled if appropriate
     for (unsigned int l = levelToPerform; l < levelNumber; l++) {
-        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
         if ((pyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
-        reg_downsampleImage<DTYPE>(pyramid[levelToPerform - 1], 1, downsampleAxis);
+        reg_downsampleImage<DataType>(pyramid[levelToPerform - 1], 1, downsampleAxis);
     }
 
     // Images for each subsequent levels are allocated and downsampled if appropriate
@@ -1922,18 +1846,18 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid,
         pyramid[l] = nifti_dup(*pyramid[l + 1]);
 
         // Downsample the image if appropriate
-        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((pyramid[l]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((pyramid[l]->ny / 2) < 32) downsampleAxis[2] = false;
         if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
-        reg_downsampleImage<DTYPE>(pyramid[l], 1, downsampleAxis);
+        reg_downsampleImage<DataType>(pyramid[l], 1, downsampleAxis);
     }
     return EXIT_SUCCESS;
 }
 template int reg_createImagePyramid<float>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
 template int reg_createImagePyramid<double>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *));
@@ -1943,11 +1867,11 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
 
     // Image is downsampled if appropriate
     for (unsigned int l = levelToPerform; l < levelNumber; l++) {
-        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
         if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
-        reg_downsampleImage<DTYPE>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
+        reg_downsampleImage<DataType>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
     }
     size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]);
     maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int));
@@ -1959,11 +1883,11 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]);
 
         // Downsample the image if appropriate
-        bool downsampleAxis[8] = {false, true, true, true, false, false, false, false};
+        bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((tempMaskImagePyramid[l]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((tempMaskImagePyramid[l]->ny / 2) < 32) downsampleAxis[2] = false;
         if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
-        reg_downsampleImage<DTYPE>(tempMaskImagePyramid[l], 0, downsampleAxis);
+        reg_downsampleImage<DataType>(tempMaskImagePyramid[l], 0, downsampleAxis);
 
         voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]);
         maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int));
@@ -1977,14 +1901,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
 template int reg_createMaskPyramid<float>(const nifti_image*, int**, unsigned int, unsigned int);
 template int reg_createMaskPyramid<double>(const nifti_image*, int**, unsigned int, unsigned int);
 /* *************************************************************** */
-template <class TYPE1, class TYPE2>
-int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
-    const TYPE1 *imagePtr = static_cast<TYPE1*>(image->data);
-    const TYPE2 *maskPtr = static_cast<TYPE2*>(maskImage->data);
-    TYPE1 *resPtr = static_cast<TYPE1*>(outputImage->data);
+template <class ImageType, class MaskType>
+int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
+    const ImageType *imagePtr = static_cast<ImageType*>(image->data);
+    const MaskType *maskPtr = static_cast<MaskType*>(maskImage->data);
+    ImageType *resPtr = static_cast<ImageType*>(outputImage->data);
     for (size_t i = 0; i < image->nvox; ++i) {
         if (*maskPtr == 0)
-            *resPtr = std::numeric_limits<TYPE1>::quiet_NaN();
+            *resPtr = std::numeric_limits<ImageType>::quiet_NaN();
         else *resPtr = *imagePtr;
         maskPtr++;
         imagePtr++;
@@ -1993,27 +1917,27 @@ int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskIm
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class TYPE1>
-int reg_tools_nanMask_image1(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
+template <class ImageType>
+int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
     switch (maskImage->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_nanMask_image2<TYPE1, unsigned char>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, unsigned char>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT8:
-        return reg_tools_nanMask_image2<TYPE1, char>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, char>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_nanMask_image2<TYPE1, unsigned short>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, unsigned short>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT16:
-        return reg_tools_nanMask_image2<TYPE1, short>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, short>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_nanMask_image2<TYPE1, unsigned int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, unsigned int>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT32:
-        return reg_tools_nanMask_image2<TYPE1, int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, int>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_nanMask_image2<TYPE1, float>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, float>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_nanMask_image2<TYPE1, double>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, double>(image, maskImage, outputImage);
     default:
-        reg_print_fct_error("reg_tools_nanMask_image1");
+        reg_print_fct_error("reg_tools_nanMask_image");
         reg_print_msg_error("The image data type is not supported");
         reg_exit();
     }
@@ -2034,21 +1958,21 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     }
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_nanMask_image1<unsigned char>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<unsigned char>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT8:
-        return reg_tools_nanMask_image1<char>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<char>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_nanMask_image1<unsigned short>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<unsigned short>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT16:
-        return reg_tools_nanMask_image1<short>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<short>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_nanMask_image1<unsigned int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<unsigned int>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT32:
-        return reg_tools_nanMask_image1<int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<int>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_nanMask_image1<float>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<float>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_nanMask_image1<double>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<double>(image, maskImage, outputImage);
     default:
         reg_print_fct_error("reg_tools_nanMask_image");
         reg_print_msg_error("The image data type is not supported");
@@ -2056,13 +1980,13 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     }
 }
 /* *************************************************************** */
-template <class TYPE>
+template <class DataType>
 int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) {
     const size_t voxelNumber = CalcVoxelNumber(*image);
-    const TYPE *imagePtr = static_cast<TYPE*>(image->data);
+    const DataType *imagePtr = static_cast<DataType*>(image->data);
     for (int t = 0; t < image->nt; ++t) {
         for (size_t i = 0; i < voxelNumber; ++i) {
-            TYPE value = *imagePtr++;
+            DataType value = *imagePtr++;
             if (value != value)
                 mask[i] = -1;
         }
@@ -2083,22 +2007,22 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-DTYPE reg_tools_getMinMaxValue_core(const nifti_image *image, int timepoint, bool calcMin = true) {
+template <class DataType>
+DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) {
     if (timepoint < -1 || timepoint >= image->nt)
-        reg_print_msg_error("reg_tools_getMinMaxValue_core. The required time point does not exists");
+        reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists");
 
-    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
-    DTYPE retValue = calcMin ? std::numeric_limits<DTYPE>::max() : std::numeric_limits<DTYPE>::min();
+    const DataType *imgPtr = static_cast<DataType*>(image->data);
+    DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::min();
     const size_t voxelNumber = CalcVoxelNumber(*image);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 
     for (int time = 0; time < image->nt; ++time) {
         if (time == timepoint || timepoint == -1) {
             for (int u = 0; u < image->nu; ++u) {
-                const DTYPE *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber];
+                const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber];
                 for (size_t i = 0; i < voxelNumber; ++i) {
-                    DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * sclSlope + image->scl_inter);
+                    DataType currentVal = (DataType)((float)currentVolumePtr[i] * sclSlope + image->scl_inter);
                     retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue);
                 }
             }
@@ -2111,21 +2035,21 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMinMaxValue_core<unsigned char>(image, timepoint);
+        return reg_tools_getMinMaxValue<unsigned char>(image, timepoint);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMinMaxValue_core<char>(image, timepoint);
+        return reg_tools_getMinMaxValue<char>(image, timepoint);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMinMaxValue_core<unsigned short>(image, timepoint);
+        return reg_tools_getMinMaxValue<unsigned short>(image, timepoint);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMinMaxValue_core<short>(image, timepoint);
+        return reg_tools_getMinMaxValue<short>(image, timepoint);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMinMaxValue_core<unsigned int>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<unsigned int>(image, timepoint);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getMinMaxValue_core<int>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<int>(image, timepoint);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMinMaxValue_core<float>(image, timepoint);
+        return reg_tools_getMinMaxValue<float>(image, timepoint);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getMinMaxValue_core<double>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<double>(image, timepoint);
     default:
         reg_print_fct_error("reg_tools_getMinValue");
         reg_print_msg_error("The image data type is not supported");
@@ -2137,21 +2061,21 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMinMaxValue_core<unsigned char>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<unsigned char>(image, timepoint, false);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMinMaxValue_core<char>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<char>(image, timepoint, false);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMinMaxValue_core<unsigned short>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<unsigned short>(image, timepoint, false);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMinMaxValue_core<short>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<short>(image, timepoint, false);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMinMaxValue_core<unsigned int>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<unsigned int>(image, timepoint, false);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getMinMaxValue_core<int>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<int>(image, timepoint, false);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMinMaxValue_core<float>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<float>(image, timepoint, false);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getMinMaxValue_core<double>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<double>(image, timepoint, false);
     default:
         reg_print_fct_error("reg_tools_getMaxValue");
         reg_print_msg_error("The image data type is not supported");
@@ -2159,13 +2083,13 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-float reg_tools_getMeanValue_core(const nifti_image *image) {
-    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+template <class DataType>
+float reg_tools_getMeanValue(const nifti_image *image) {
+    const DataType *imgPtr = static_cast<DataType*>(image->data);
     float meanValue = 0;
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
     for (size_t i = 0; i < image->nvox; ++i) {
-        DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter);
+        const float currentVal = static_cast<float>(imgPtr[i]) * sclSlope + image->scl_inter;
         meanValue += currentVal;
     }
     meanValue = float(meanValue / image->nvox);
@@ -2176,21 +2100,21 @@ float reg_tools_getMeanValue(const nifti_image *image) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMeanValue_core<unsigned char>(image);
+        return reg_tools_getMeanValue<unsigned char>(image);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMeanValue_core<char>(image);
+        return reg_tools_getMeanValue<char>(image);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMeanValue_core<unsigned short>(image);
+        return reg_tools_getMeanValue<unsigned short>(image);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMeanValue_core<short>(image);
+        return reg_tools_getMeanValue<short>(image);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanValue_core<unsigned int>(image);
+        return reg_tools_getMeanValue<unsigned int>(image);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getMeanValue_core<int>(image);
+        return reg_tools_getMeanValue<int>(image);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMeanValue_core<float>(image);
+        return reg_tools_getMeanValue<float>(image);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getMeanValue_core<double>(image);
+        return reg_tools_getMeanValue<double>(image);
     default:
         reg_print_fct_error("reg_tools_getMeanValue");
         reg_print_msg_error("The image data type is not supported");
@@ -2198,14 +2122,14 @@ float reg_tools_getMeanValue(const nifti_image *image) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-float reg_tools_getSTDValue_core(const nifti_image *image) {
-    const DTYPE *imgPtr = static_cast<DTYPE*>(image->data);
+template <class DataType>
+float reg_tools_getSTDValue(const nifti_image *image) {
+    const DataType *imgPtr = static_cast<DataType*>(image->data);
     const float meanValue = reg_tools_getMeanValue(image);
     float stdValue = 0;
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
     for (size_t i = 0; i < image->nvox; ++i) {
-        const DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter);
+        const float currentVal = static_cast<float>(imgPtr[i]) * sclSlope + image->scl_inter;
         stdValue += (currentVal - meanValue) * (currentVal - meanValue);
     }
     stdValue = std::sqrt(stdValue / image->nvox);
@@ -2216,21 +2140,21 @@ float reg_tools_getSTDValue(const nifti_image *image) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getSTDValue_core<unsigned char>(image);
+        return reg_tools_getSTDValue<unsigned char>(image);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getSTDValue_core<char>(image);
+        return reg_tools_getSTDValue<char>(image);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getSTDValue_core<unsigned short>(image);
+        return reg_tools_getSTDValue<unsigned short>(image);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getSTDValue_core<short>(image);
+        return reg_tools_getSTDValue<short>(image);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getSTDValue_core<unsigned int>(image);
+        return reg_tools_getSTDValue<unsigned int>(image);
     case NIFTI_TYPE_INT32:
-        return reg_tools_getSTDValue_core<int>(image);
+        return reg_tools_getSTDValue<int>(image);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getSTDValue_core<float>(image);
+        return reg_tools_getSTDValue<float>(image);
     case NIFTI_TYPE_FLOAT64:
-        return reg_tools_getSTDValue_core<double>(image);
+        return reg_tools_getSTDValue<double>(image);
     default:
         reg_print_fct_error("reg_tools_getSTDValue");
         reg_print_msg_error("The image data type is not supported");
@@ -2238,26 +2162,17 @@ float reg_tools_getSTDValue(const nifti_image *image) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-void reg_flipAxis_type(int nx,
-                       int ny,
-                       int nz,
-                       int nt,
-                       int nu,
-                       int nv,
-                       int nw,
-                       const void *inputArray,
-                       void **outputArray,
-                       const std::string& cmd) {
+template <class DataType>
+void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) {
     // Allocate the outputArray if it is not allocated yet
     if (*outputArray == nullptr)
-        *outputArray = malloc(nx * ny * nz * nt * nu * nv * nw * sizeof(DTYPE));
+        *outputArray = malloc(CalcVoxelNumber(*image, 7) * sizeof(DataType));
 
     // Parse the cmd to check which axis have to be flipped
     const char *axisName = "x\0y\0z\0t\0u\0v\0w\0";
-    int increment[7] = {1, 1, 1, 1, 1, 1, 1};
-    int start[7] = {0, 0, 0, 0, 0, 0, 0};
-    const int end[7] = {nx, ny, nz, nt, nu, nv, nw};
+    int increment[7] = { 1, 1, 1, 1, 1, 1, 1 };
+    int start[7] = { 0, 0, 0, 0, 0, 0, 0 };
+    const int end[7] = { image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw };
     for (int i = 0; i < 7; ++i) {
         if (cmd.find(axisName[i * 2]) != std::string::npos) {
             increment[i] = -1;
@@ -2266,23 +2181,23 @@ void reg_flipAxis_type(int nx,
     }
 
     // Define the reading and writting pointers
-    const DTYPE *inputPtr = static_cast<const DTYPE*>(inputArray);
-    DTYPE *outputPtr = static_cast<DTYPE*>(*outputArray);
+    const DataType *inputPtr = static_cast<const DataType*>(image->data);
+    DataType *outputPtr = static_cast<DataType*>(*outputArray);
 
     // Copy the data and flipp axis if required
-    for (int w = 0, w2 = start[6]; w < nw; ++w, w2 += increment[6]) {
-        size_t index_w = w2 * nx * ny * nz * nt * nu * nv;
-        for (int v = 0, v2 = start[5]; v < nv; ++v, v2 += increment[5]) {
-            size_t index_v = index_w + v2 * nx * ny * nz * nt * nu;
-            for (int u = 0, u2 = start[4]; u < nu; ++u, u2 += increment[4]) {
-                size_t index_u = index_v + u2 * nx * ny * nz * nt;
-                for (int t = 0, t2 = start[3]; t < nt; ++t, t2 += increment[3]) {
-                    size_t index_t = index_u + t2 * nx * ny * nz;
-                    for (int z = 0, z2 = start[2]; z < nz; ++z, z2 += increment[2]) {
-                        size_t index_z = index_t + z2 * nx * ny;
-                        for (int y = 0, y2 = start[1]; y < ny; ++y, y2 += increment[1]) {
-                            size_t index_y = index_z + y2 * nx;
-                            for (int x = 0, x2 = start[0]; x < nx; ++x, x2 += increment[0]) {
+    for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) {
+        size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv;
+        for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) {
+            size_t index_v = index_w + v2 * image->nx * image->ny * image->nz * image->nt * image->nu;
+            for (int u = 0, u2 = start[4]; u < image->nu; ++u, u2 += increment[4]) {
+                size_t index_u = index_v + u2 * image->nx * image->ny * image->nz * image->nt;
+                for (int t = 0, t2 = start[3]; t < image->nt; ++t, t2 += increment[3]) {
+                    size_t index_t = index_u + t2 * image->nx * image->ny * image->nz;
+                    for (int z = 0, z2 = start[2]; z < image->nz; ++z, z2 += increment[2]) {
+                        size_t index_z = index_t + z2 * image->nx * image->ny;
+                        for (int y = 0, y2 = start[1]; y < image->ny; ++y, y2 += increment[1]) {
+                            size_t index_y = index_z + y2 * image->nx;
+                            for (int x = 0, x2 = start[0]; x < image->nx; ++x, x2 += increment[0]) {
                                 size_t index = index_y + x2;
                                 *outputPtr++ = inputPtr[index];
                             }
@@ -2298,44 +2213,28 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_flipAxis_type<unsigned char>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<unsigned char>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_INT8:
-        reg_flipAxis_type<char>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<char>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_flipAxis_type<unsigned short>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<unsigned short>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_INT16:
-        reg_flipAxis_type<short>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<short>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_flipAxis_type<unsigned int>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<unsigned int>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_INT32:
-        reg_flipAxis_type<int>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<int>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_flipAxis_type<float>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<float>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_flipAxis_type<double>
-            (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw,
-             image->data, outputArray, cmd);
+        reg_flipAxis<double>(image, outputArray, cmd);
         break;
     default:
         reg_print_fct_error("reg_flipAxis");
@@ -2344,10 +2243,10 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
     }
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
-    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
+    DataType *ptrX = static_cast<DataType*>(field->data);
+    DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2355,7 +2254,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
     else matrix = field->qto_xyz;
 
     int x, y, index;
-    DTYPE xInit, yInit;
+    DataType xInit, yInit;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, ptrX, ptrY) \
@@ -2365,11 +2264,11 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
         index = y * field->nx;
         for (x = 0; x < field->nx; x++) {
             // Get the initial control point position
-            xInit = matrix.m[0][0] * (DTYPE)x
-                + matrix.m[0][1] * (DTYPE)y
+            xInit = matrix.m[0][0] * (DataType)x
+                + matrix.m[0][1] * (DataType)y
                 + matrix.m[0][3];
-            yInit = matrix.m[1][0] * (DTYPE)x
-                + matrix.m[1][1] * (DTYPE)y
+            yInit = matrix.m[1][0] * (DataType)x
+                + matrix.m[1][1] * (DataType)y
                 + matrix.m[1][3];
 
             // The initial position is subtracted from every values
@@ -2380,12 +2279,12 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
     }
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
     const size_t voxelNumber = CalcVoxelNumber(*field);
-    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[voxelNumber];
-    DTYPE *ptrZ = &ptrY[voxelNumber];
+    DataType *ptrX = static_cast<DataType*>(field->data);
+    DataType *ptrY = &ptrX[voxelNumber];
+    DataType *ptrZ = &ptrY[voxelNumber];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2419,9 +2318,9 @@ void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
                     + matrix.m[2][3];
 
                 // The initial position is subtracted from every values
-                ptrX[index] -= static_cast<DTYPE>(xInit);
-                ptrY[index] -= static_cast<DTYPE>(yInit);
-                ptrZ[index] -= static_cast<DTYPE>(zInit);
+                ptrX[index] -= static_cast<DataType>(xInit);
+                ptrY[index] -= static_cast<DataType>(yInit);
+                ptrZ[index] -= static_cast<DataType>(zInit);
                 index++;
             }
         }
@@ -2470,10 +2369,10 @@ int reg_getDisplacementFromDeformation(nifti_image *field) {
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
-    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
+    DataType *ptrX = static_cast<DataType*>(field->data);
+    DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2481,7 +2380,7 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
     else matrix = field->qto_xyz;
 
     int x, y, index;
-    DTYPE xInit, yInit;
+    DataType xInit, yInit;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, \
@@ -2492,11 +2391,11 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
         index = y * field->nx;
         for (x = 0; x < field->nx; x++) {
             // Get the initial control point position
-            xInit = matrix.m[0][0] * (DTYPE)x
-                + matrix.m[0][1] * (DTYPE)y
+            xInit = matrix.m[0][0] * (DataType)x
+                + matrix.m[0][1] * (DataType)y
                 + matrix.m[0][3];
-            yInit = matrix.m[1][0] * (DTYPE)x
-                + matrix.m[1][1] * (DTYPE)y
+            yInit = matrix.m[1][0] * (DataType)x
+                + matrix.m[1][1] * (DataType)y
                 + matrix.m[1][3];
 
             // The initial position is added from every values
@@ -2507,12 +2406,12 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
     }
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
     const size_t voxelNumber = CalcVoxelNumber(*field);
-    DTYPE *ptrX = static_cast<DTYPE*>(field->data);
-    DTYPE *ptrY = &ptrX[voxelNumber];
-    DTYPE *ptrZ = &ptrY[voxelNumber];
+    DataType *ptrX = static_cast<DataType*>(field->data);
+    DataType *ptrY = &ptrX[voxelNumber];
+    DataType *ptrZ = &ptrY[voxelNumber];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2545,9 +2444,9 @@ void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
                     + matrix.m[2][3];
 
                 // The initial position is subtracted from every values
-                ptrX[index] += static_cast<DTYPE>(xInit);
-                ptrY[index] += static_cast<DTYPE>(yInit);
-                ptrZ[index] += static_cast<DTYPE>(zInit);
+                ptrX[index] += static_cast<DataType>(xInit);
+                ptrY[index] += static_cast<DataType>(yInit);
+                ptrZ[index] += static_cast<DataType>(zInit);
                 index++;
             }
         }
@@ -2597,13 +2496,13 @@ int reg_getDeformationFromDisplacement(nifti_image *field) {
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_setGradientToZero_core(nifti_image *image,
                                 bool xAxis,
                                 bool yAxis,
                                 bool zAxis) {
     const size_t voxelNumber = CalcVoxelNumber(*image);
-    DTYPE *ptr = static_cast<DTYPE*>(image->data);
+    DataType *ptr = static_cast<DataType*>(image->data);
     if (xAxis) {
         for (size_t i = 0; i < voxelNumber; ++i)
             *ptr++ = 0;
@@ -2642,9 +2541,9 @@ void reg_setGradientToZero(nifti_image *image,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-double reg_test_compare_arrays(const DTYPE *ptrA,
-                               const DTYPE *ptrB,
+template <class DataType>
+double reg_test_compare_arrays(const DataType *ptrA,
+                               const DataType *ptrB,
                                size_t nvox) {
     double maxDifference = 0;
 
@@ -2677,16 +2576,14 @@ double reg_test_compare_arrays(const DTYPE *ptrA,
 template double reg_test_compare_arrays<float>(const float*, const float*, size_t);
 template double reg_test_compare_arrays<double>(const double*, const double*, size_t);
 /* *************************************************************** */
-template <class DTYPE>
-double reg_test_compare_images1(const nifti_image *imgA,
-                                const nifti_image *imgB) {
-    const DTYPE *imgAPtr = static_cast<DTYPE*>(imgA->data);
-    const DTYPE *imgBPtr = static_cast<DTYPE*>(imgB->data);
-    return reg_test_compare_arrays<DTYPE>(imgAPtr, imgBPtr, imgA->nvox);
+template <class DataType>
+double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) {
+    const DataType *imgAPtr = static_cast<DataType*>(imgA->data);
+    const DataType *imgBPtr = static_cast<DataType*>(imgB->data);
+    return reg_test_compare_arrays<DataType>(imgAPtr, imgBPtr, imgA->nvox);
 }
 /* *************************************************************** */
-double reg_test_compare_images(const nifti_image *imgA,
-                               const nifti_image *imgB) {
+double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) {
     if (imgA->datatype != imgB->datatype) {
         reg_print_fct_error("reg_test_compare_images");
         reg_print_msg_error("Input images have different datatype");
@@ -2699,21 +2596,21 @@ double reg_test_compare_images(const nifti_image *imgA,
     }
     switch (imgA->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_test_compare_images1<unsigned char>(imgA, imgB);
+        return reg_test_compare_images<unsigned char>(imgA, imgB);
     case NIFTI_TYPE_UINT16:
-        return reg_test_compare_images1<unsigned short>(imgA, imgB);
+        return reg_test_compare_images<unsigned short>(imgA, imgB);
     case NIFTI_TYPE_UINT32:
-        return reg_test_compare_images1<unsigned int>(imgA, imgB);
+        return reg_test_compare_images<unsigned int>(imgA, imgB);
     case NIFTI_TYPE_INT8:
-        return reg_test_compare_images1<char>(imgA, imgB);
+        return reg_test_compare_images<char>(imgA, imgB);
     case NIFTI_TYPE_INT16:
-        return reg_test_compare_images1<short>(imgA, imgB);
+        return reg_test_compare_images<short>(imgA, imgB);
     case NIFTI_TYPE_INT32:
-        return reg_test_compare_images1<int>(imgA, imgB);
+        return reg_test_compare_images<int>(imgA, imgB);
     case NIFTI_TYPE_FLOAT32:
-        return reg_test_compare_images1<float>(imgA, imgB);
+        return reg_test_compare_images<float>(imgA, imgB);
     case NIFTI_TYPE_FLOAT64:
-        return reg_test_compare_images1<double>(imgA, imgB);
+        return reg_test_compare_images<double>(imgA, imgB);
     default:
         reg_print_fct_error("reg_test_compare_images");
         reg_print_msg_error("Unsupported data type");
@@ -2721,38 +2618,38 @@ double reg_test_compare_images(const nifti_image *imgA,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
-void reg_tools_abs_image1(nifti_image *img) {
-    DTYPE *ptr = static_cast<DTYPE*>(img->data);
+template <class DataType>
+void reg_tools_abs_image(nifti_image *img) {
+    DataType *ptr = static_cast<DataType*>(img->data);
     for (size_t i = 0; i < img->nvox; ++i)
-        ptr[i] = static_cast<DTYPE>(fabs(static_cast<double>(ptr[i])));
+        ptr[i] = static_cast<DataType>(fabs(static_cast<double>(ptr[i])));
 }
 /* *************************************************************** */
 void reg_tools_abs_image(nifti_image *img) {
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_abs_image1<unsigned char>(img);
+        reg_tools_abs_image<unsigned char>(img);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_abs_image1<unsigned short>(img);
+        reg_tools_abs_image<unsigned short>(img);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_abs_image1<unsigned int>(img);
+        reg_tools_abs_image<unsigned int>(img);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_abs_image1<char>(img);
+        reg_tools_abs_image<char>(img);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_abs_image1<short>(img);
+        reg_tools_abs_image<short>(img);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_abs_image1<int>(img);
+        reg_tools_abs_image<int>(img);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_abs_image1<float>(img);
+        reg_tools_abs_image<float>(img);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_abs_image1<double>(img);
+        reg_tools_abs_image<double>(img);
         break;
     default:
         reg_print_fct_error("reg_tools_abs_image");
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 936fdd57..92c2d6bd 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -17,6 +17,9 @@
 #include <fstream>
 #include <map>
 #include <memory>
+#include <cmath>
+#include <algorithm>
+#include <functional>
 #include "_reg_maths.h"
 
 using std::unique_ptr;
@@ -116,7 +119,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
  * @param axis Boolean array to specify which axis have to be
  * downsampled. The array follow the dim array of the nifti header.
  */
-extern "C++" template <class PrecisionTYPE>
+extern "C++" template <class PrecisionType>
 void reg_downsampleImage(nifti_image *image,
                          int type,
                          bool *axis);
@@ -127,13 +130,16 @@ void reg_downsampleImage(nifti_image *image,
  * @return Scalar value that corresponds to the longest
  * euclidean distance
  */
-extern "C++" template <class PrecisionTYPE>
-PrecisionTYPE reg_getMaximalLength(const nifti_image *image);
+extern "C++" template <class PrecisionType>
+PrecisionType reg_getMaximalLength(const nifti_image *image,
+                                   const bool& optimiseX,
+                                   const bool& optimiseY,
+                                   const bool& optimiseZ);
 /* *************************************************************** */
 /** @brief Change the datatype of a nifti image
  * @param image Image to be updated.
  */
-extern "C++" template <class NewTYPE>
+extern "C++" template <class NewType>
 void reg_tools_changeDatatype(nifti_image *image,
                               int type = -1);
 /* *************************************************************** */
@@ -320,7 +326,7 @@ float reg_tools_getSTDValue(const nifti_image *img);
  * @param levelToPerform Number to level that will be perform during
  * the registration.
  */
-extern "C++" template<class DTYPE>
+extern "C++" template<class DataType>
 int reg_createImagePyramid(const nifti_image *input,
                            nifti_image **pyramid,
                            unsigned int levelNumber,
@@ -335,7 +341,7 @@ int reg_createImagePyramid(const nifti_image *input,
  * @param levelToPerform Number to level that will be perform during
  * the registration.
  */
-extern "C++" template<class DTYPE>
+extern "C++" template<class DataType>
 int reg_createMaskPyramid(const nifti_image *input,
                           int **pyramid,
                           unsigned int levelNumber,
@@ -403,9 +409,9 @@ void reg_setGradientToZero(nifti_image *image,
  * The returned value is the largest value computed as ((A/B)-1)
  * If A or B are zeros then the (A-B) value is returned.
  */
-extern "C++" template<class DTYPE>
-double reg_test_compare_arrays(const DTYPE *ptrA,
-                               const DTYPE *ptrB,
+extern "C++" template<class DataType>
+double reg_test_compare_arrays(const DataType *ptrA,
+                               const DataType *ptrB,
                                size_t nvox);
 /* *************************************************************** */
 /** @brief The functions returns the largest ratio between input image intensities

From 066f3269508042a5e009a6d13e21ce8a9334d894 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 21 Feb 2023 19:36:54 +0000
Subject: [PATCH 060/314] Refactorisations

---
 niftyreg_build_version.txt            |   2 +-
 reg-apps/reg_aladin.cpp               |  26 +-
 reg-apps/reg_average.cpp              |  30 +-
 reg-apps/reg_f3d.cpp                  |   6 +-
 reg-apps/reg_jacobian.cpp             |  38 +-
 reg-apps/reg_measure.cpp              |   6 +-
 reg-apps/reg_ppcnr.cpp                |  72 ++-
 reg-apps/reg_resample.cpp             |   6 +-
 reg-apps/reg_tools.cpp                |   6 +-
 reg-apps/reg_transform.cpp            |   6 +-
 reg-io/_reg_ReadWriteImage.cpp        |   4 +-
 reg-io/nrrd/reg_nrrd.cpp              |  20 +-
 reg-lib/_reg_base.cpp                 |   1 -
 reg-lib/cpu/_reg_blockMatching.cpp    |  68 +--
 reg-lib/cpu/_reg_discrete_init.cpp    |   2 +-
 reg-lib/cpu/_reg_dti.cpp              |  88 ++--
 reg-lib/cpu/_reg_dti.h                |   4 +-
 reg-lib/cpu/_reg_femTrans.cpp         |   2 +-
 reg-lib/cpu/_reg_globalTrans.cpp      |   4 +-
 reg-lib/cpu/_reg_kld.cpp              |  50 +--
 reg-lib/cpu/_reg_kld.h                |   4 +-
 reg-lib/cpu/_reg_lncc.cpp             |  96 ++--
 reg-lib/cpu/_reg_lncc.h               |   6 +-
 reg-lib/cpu/_reg_localTrans.cpp       | 624 +++++++++++++-------------
 reg-lib/cpu/_reg_localTrans.h         |   4 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp   | 464 +++++++++----------
 reg-lib/cpu/_reg_localTrans_regul.cpp | 490 ++++++++++----------
 reg-lib/cpu/_reg_maths.cpp            |   8 +-
 reg-lib/cpu/_reg_maths.h              |   2 +-
 reg-lib/cpu/_reg_maths_eigen.cpp      |  16 +-
 reg-lib/cpu/_reg_mind.cpp             |  50 +--
 reg-lib/cpu/_reg_mrf.cpp              |   6 +-
 reg-lib/cpu/_reg_mrf.h                |   4 +-
 reg-lib/cpu/_reg_nmi.cpp              |  94 ++--
 reg-lib/cpu/_reg_nmi.h                |   8 +-
 reg-lib/cpu/_reg_optimiser.cpp        |  38 +-
 reg-lib/cpu/_reg_optimiser.h          |  11 -
 reg-lib/cpu/_reg_resampling.cpp       | 182 ++++----
 reg-lib/cpu/_reg_splineBasis.cpp      | 376 ++++++++--------
 reg-lib/cpu/_reg_splineBasis.h        | 156 +++----
 reg-lib/cpu/_reg_ssd.cpp              |  90 ++--
 reg-lib/cpu/_reg_ssd.h                |   4 +-
 reg-lib/cuda/_reg_common_cuda.cu      | 184 ++++----
 reg-lib/cuda/_reg_common_cuda.h       |  60 +--
 reg-lib/cuda/_reg_optimiser_gpu.cu    |  11 +-
 reg-lib/cuda/blockMatchingKernel.cu   |  18 +-
 reg-lib/cuda/optimizeKernel.cu        |  10 +-
 reg-lib/cuda/resampleKernel.cu        |  20 +-
 48 files changed, 1721 insertions(+), 1756 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c4597e53..c5356ba1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-173
+174
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 7d1eb92d..cfd6a6a2 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -21,7 +21,7 @@
 #   include <time.h>
 #endif
 
-#define PrecisionTYPE float
+using PrecisionType = float;
 
 void PetitUsage(char *exec) {
     char text[255];
@@ -99,7 +99,7 @@ void Usage(char *exec) {
     }
 
     //   reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg");
-#if defined (_OPENMP)
+#ifdef _OPENMP
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
         defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
@@ -165,11 +165,11 @@ int main(int argc, char **argv) {
     float floatingSigma = 0;
     float referenceSigma = 0;
 
-    float referenceLowerThr = -std::numeric_limits<PrecisionTYPE>::max();
-    float referenceUpperThr = std::numeric_limits<PrecisionTYPE>::max();
-    float floatingLowerThr = -std::numeric_limits<PrecisionTYPE>::max();
-    float floatingUpperThr = std::numeric_limits<PrecisionTYPE>::max();
-    float paddingValue = std::numeric_limits<PrecisionTYPE>::quiet_NaN();
+    float referenceLowerThr = -std::numeric_limits<PrecisionType>::max();
+    float referenceUpperThr = std::numeric_limits<PrecisionType>::max();
+    float floatingLowerThr = -std::numeric_limits<PrecisionType>::max();
+    float floatingUpperThr = std::numeric_limits<PrecisionType>::max();
+    float paddingValue = std::numeric_limits<PrecisionType>::quiet_NaN();
 
     bool iso = false;
     bool verbose = true;
@@ -177,7 +177,7 @@ int main(int argc, char **argv) {
     PlatformType platformType(PlatformType::Cpu);
     unsigned gpuIdx = 999;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     // Set the default number of thread
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
@@ -312,7 +312,7 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) {
             captureRangeVox = atoi(argv[++i]);
         } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
             reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
@@ -349,15 +349,15 @@ int main(int argc, char **argv) {
     }
 #endif
 
-    reg_aladin<PrecisionTYPE> *REG;
+    reg_aladin<PrecisionType> *REG;
     if (symFlag) {
-        REG = new reg_aladin_sym<PrecisionTYPE>;
+        REG = new reg_aladin_sym<PrecisionType>;
         if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) {
             reg_print_msg_warn("You have one image mask option turned on but not the other.");
             reg_print_msg_warn("This will affect the degree of symmetry achieved.");
         }
     } else {
-        REG = new reg_aladin<PrecisionTYPE>;
+        REG = new reg_aladin<PrecisionType>;
         if (floatingMaskFlag) {
             reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option");
         }
@@ -489,7 +489,7 @@ int main(int argc, char **argv) {
     reg_print_msg_debug("*******************************************");
 #endif
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     if (verbose) {
         int maxThreadNumber = omp_get_max_threads();
         sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber);
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 6e83fe95..aea56da1 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -20,7 +20,7 @@
 #include "_reg_localTrans.h"
 #include "_reg_maths_eigen.h"
 
-#define PrecisionTYPE float
+using PrecisionType = float;
 
 typedef enum
 {
@@ -76,11 +76,11 @@ void usage(char *exec)
 
 void average_norm_intensity(nifti_image *image)
 {
-   PrecisionTYPE *rankedIntensities = (PrecisionTYPE *)malloc(image->nvox*sizeof(PrecisionTYPE));
-   memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionTYPE));
+   PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType));
+   memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType));
    reg_heapSort(rankedIntensities,static_cast<int>(image->nvox));
-   PrecisionTYPE lowerValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.03f)];
-   PrecisionTYPE higherValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.97f)];
+   PrecisionType lowerValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.03f)];
+   PrecisionType higherValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.97f)];
    reg_tools_subtractValueFromImage(image,image,lowerValue);
    reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue));
    free(rankedIntensities);
@@ -96,11 +96,11 @@ int remove_nan_and_add(nifti_image *averageImage,
       reg_print_msg_error(" All images must have the same size");
       return EXIT_FAILURE;
    }
-   PrecisionTYPE *avgImgPtr = static_cast<PrecisionTYPE *>(averageImage->data);
-   PrecisionTYPE *addImgPtr = static_cast<PrecisionTYPE *>(toAddImage->data);
-   PrecisionTYPE *defImgPtr = static_cast<PrecisionTYPE *>(definedNumImage->data);
+   PrecisionType *avgImgPtr = static_cast<PrecisionType *>(averageImage->data);
+   PrecisionType *addImgPtr = static_cast<PrecisionType *>(toAddImage->data);
+   PrecisionType *defImgPtr = static_cast<PrecisionType *>(definedNumImage->data);
    for(size_t i=0; i<averageImage->nvox; ++i){
-      PrecisionTYPE value = *addImgPtr;
+      PrecisionType value = *addImgPtr;
       if(value==value){
          *avgImgPtr+=value;
          *defImgPtr+=1;
@@ -469,7 +469,7 @@ int compute_average_image(nifti_image *averageImage,
       warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper);
       // Read the input image
       nifti_image *current_input_image = reg_io_ReadImageFile(inputImageName[i]);
-      reg_tools_changeDatatype<PrecisionTYPE>(current_input_image);
+      reg_tools_changeDatatype<PrecisionType>(current_input_image);
       // Apply the transformation
       reg_resampleImage(current_input_image,
                         warpedImage,
@@ -499,7 +499,7 @@ int main(int argc, char **argv)
       usage(argv[0]);
       return EXIT_FAILURE;
    }
-#if defined (_OPENMP)
+#ifdef _OPENMP
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -561,14 +561,14 @@ int main(int argc, char **argv)
          int length = strchr(buffer, '\0')-buffer+1;
          if(strcmp(buffer, "-omp")==0){
             fscanf(cmd_file," %511s", buffer);
-#if defined (_OPENMP)
+#ifdef _OPENMP
             omp_set_num_threads(atoi(buffer));
 #else
             reg_print_msg_warn("OpenMP flag detected and ignored.");
 #endif
 #ifndef NDEBUG
             reg_print_msg_debug("OpenMP flag detected");
-#if defined (_OPENMP)
+#ifdef _OPENMP
             reg_print_msg_debug("OpenMP core number set to:");
             reg_print_msg_debug(buffer);
 #endif
@@ -755,9 +755,9 @@ int main(int argc, char **argv)
       avg_output_image->scl_slope=1.f;
       avg_output_image->scl_inter=0.f;
       avg_output_image->datatype=NIFTI_TYPE_FLOAT32;
-      if(sizeof(PrecisionTYPE)==sizeof(double))
+      if(sizeof(PrecisionType)==sizeof(double))
          avg_output_image->datatype=NIFTI_TYPE_FLOAT64;
-      avg_output_image->nbyper=sizeof(PrecisionTYPE);
+      avg_output_image->nbyper=sizeof(PrecisionType);
       avg_output_image->data=calloc(avg_output_image->nvox,avg_output_image->nbyper);
       reg_tools_multiplyValueToImage(avg_output_image, avg_output_image, 0.f);
       // Set the output filename
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 741083be..5cf0f25c 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -181,7 +181,7 @@ int main(int argc, char **argv) {
     time(&start);
     int verbose = true;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     // Set the default number of thread
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
@@ -640,7 +640,7 @@ int main(int argc, char **argv) {
         }
 
         else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
             reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
@@ -675,7 +675,7 @@ int main(int argc, char **argv) {
     reg_print_msg_debug("*******************************************");
 #endif
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     if (verbose) {
         int maxThreadNumber = omp_get_max_threads();
         text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber);
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index b4a5b8c7..e4eaa54f 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -38,32 +38,32 @@ typedef struct
    bool outputLogDetFlag;
 } FLAG;
 
-template <class DTYPE>
+template <class DataType>
 void reg_jacobian_computeLog(nifti_image *image)
 {
-   DTYPE *imgPtr=static_cast<DTYPE *>(image->data);
+   DataType *imgPtr=static_cast<DataType *>(image->data);
    for(size_t i=0; i<image->nvox;++i){
-      *imgPtr = static_cast<DTYPE>(log(*imgPtr));
+      *imgPtr = static_cast<DataType>(log(*imgPtr));
       ++imgPtr;
    }
    return;
 }
 
-template <class DTYPE>
+template <class DataType>
 void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image)
 {
    const size_t voxelNumber=CalcVoxelNumber(*image);
-   DTYPE *ptrXX=static_cast<DTYPE *>(image->data);
+   DataType *ptrXX=static_cast<DataType *>(image->data);
    if(image->nz>1)
    {
-      DTYPE *ptrXY=&ptrXX[voxelNumber];
-      DTYPE *ptrXZ=&ptrXY[voxelNumber];
-      DTYPE *ptrYX=&ptrXZ[voxelNumber];
-      DTYPE *ptrYY=&ptrYX[voxelNumber];
-      DTYPE *ptrYZ=&ptrYY[voxelNumber];
-      DTYPE *ptrZX=&ptrYZ[voxelNumber];
-      DTYPE *ptrZY=&ptrZX[voxelNumber];
-      DTYPE *ptrZZ=&ptrZY[voxelNumber];
+      DataType *ptrXY=&ptrXX[voxelNumber];
+      DataType *ptrXZ=&ptrXY[voxelNumber];
+      DataType *ptrYX=&ptrXZ[voxelNumber];
+      DataType *ptrYY=&ptrYX[voxelNumber];
+      DataType *ptrYZ=&ptrYY[voxelNumber];
+      DataType *ptrZX=&ptrYZ[voxelNumber];
+      DataType *ptrZY=&ptrZX[voxelNumber];
+      DataType *ptrZZ=&ptrZY[voxelNumber];
       for(size_t voxel=0; voxel<voxelNumber; ++voxel)
       {
          mat33 matrix=array[voxel];
@@ -80,9 +80,9 @@ void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image)
    }
    else
    {
-      DTYPE *ptrXY=&ptrXX[voxelNumber];
-      DTYPE *ptrYX=&ptrXY[voxelNumber];
-      DTYPE *ptrYY=&ptrYX[voxelNumber];
+      DataType *ptrXY=&ptrXX[voxelNumber];
+      DataType *ptrYX=&ptrXY[voxelNumber];
+      DataType *ptrYY=&ptrYX[voxelNumber];
       for(size_t voxel=0; voxel<voxelNumber; ++voxel)
       {
          mat33 matrix=array[voxel];
@@ -117,7 +117,7 @@ void Usage(char *exec)
    printf("\t\tFilename of the Jacobian matrix map. (9 or 4 values are stored as a 5D nifti).\n");
    printf("\t-jacL <filename>\n");
    printf("\t\tFilename of the Log of the Jacobian determinant map.\n");
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -139,7 +139,7 @@ int main(int argc, char **argv)
    PARAM *param = (PARAM *)calloc(1,sizeof(PARAM));
    FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG));
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -170,7 +170,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
       {
-#if defined (_OPENMP)
+#ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
          reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index d1ac54a5..10380334 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -66,7 +66,7 @@ void Usage(char *exec)
    printf("\t-nmi\t\tReturns the NMI value (64 bins are used)\n");
    printf("\t-ssd\t\tReturns the SSD value\n");
    printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n");
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -86,7 +86,7 @@ int main(int argc, char **argv)
    param->interpolation=3; // Cubic spline interpolation used by default
    param->paddingValue=std::numeric_limits<float>::quiet_NaN();
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -117,7 +117,7 @@ int main(int argc, char **argv)
 //      }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
       {
-#if defined (_OPENMP)
+#ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
          reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 565dc887..fda85e82 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -22,9 +22,7 @@
 #include <time.h>
 #endif
 
-#define PrecisionTYPE float
-#define min(a,b)    ((a) < (b) ? (a): (b))
-#define max(a,b)    ((a) > (b) ? (a): (b))
+using PrecisionType = float;
 
 typedef struct
 {
@@ -385,7 +383,7 @@ int main(int argc, char **argv)
       fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName);
       return EXIT_FAILURE;
    }
-   reg_tools_changeDatatype<PrecisionTYPE>(image); // FIX DATA TYPE - DOES THIS WORK?
+   reg_tools_changeDatatype<PrecisionType>(image); // FIX DATA TYPE - DOES THIS WORK?
 
    // --- 2) READ/SET IMAGE MASK (4D VOLUME, [NS, SS]) ---
    nifti_image *mask=nullptr;
@@ -397,7 +395,7 @@ int main(int argc, char **argv)
          fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName);
          return EXIT_FAILURE;
       }
-      reg_tools_changeDatatype<PrecisionTYPE>(mask);
+      reg_tools_changeDatatype<PrecisionType>(mask);
    }
    else
    {
@@ -406,11 +404,11 @@ int main(int argc, char **argv)
       mask->nt=mask->dim[4]=1;
       mask->nvox = CalcVoxelNumber(*mask, mask->ndim);
       mask->data = malloc(mask->nvox*mask->nbyper);
-      PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
+      PrecisionType *intensityPtrM = static_cast<PrecisionType *>(mask->data);
       for(size_t i=0; i<mask->nvox; i++) intensityPtrM[i]=1.0;
    }
-   PrecisionTYPE masksum=0;
-   PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
+   PrecisionType masksum=0;
+   PrecisionType *intensityPtrM = static_cast<PrecisionType *>(mask->data);
    for(size_t i=0; i<mask->nvox; i++)
    {
       if(intensityPtrM[i]) masksum++;
@@ -418,7 +416,7 @@ int main(int argc, char **argv)
 
    if(!flag->prinCompFlag && !flag->locality && !flag->meanonly && !flag->tp)
    {
-      param->prinComp=min((int)(image->nt/2),25);// Check the number of components
+      param->prinComp=std::min(image->nt/2,25);// Check the number of components
    }
    if(param->prinComp>=image->nt) param->prinComp=image->nt-1;
    if(!flag->outputResultFlag) param->outputResultName="ppcnrfinal-img.nii";
@@ -508,9 +506,9 @@ int main(int argc, char **argv)
    levels[2]=-2.5;
    int levelNumber=1;
    if(images->nt<3) levelNumber=3;
-   PrecisionTYPE *Mean = new PrecisionTYPE [image->nt];
-   PrecisionTYPE *Cov = new PrecisionTYPE [image->nt*image->nt];
-   PrecisionTYPE cov;
+   PrecisionType *Mean = new PrecisionType [image->nt];
+   PrecisionType *Cov = new PrecisionType [image->nt*image->nt];
+   PrecisionType cov;
 //   char pcaname[20];
 //   char outname[20];
 
@@ -529,8 +527,8 @@ int main(int argc, char **argv)
 
       // Read images and find image means
       unsigned int voxelNumber = image->nvox/image->nt;
-      PrecisionTYPE *intensityPtr = static_cast<PrecisionTYPE *>(image->data);
-      PrecisionTYPE *intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
+      PrecisionType *intensityPtr = static_cast<PrecisionType *>(image->data);
+      PrecisionType *intensityPtrM = static_cast<PrecisionType *>(mask->data);
       for(int t=0; t<image->nt; t++)
       {
          Mean[t]=0.f;
@@ -542,14 +540,14 @@ int main(int argc, char **argv)
       }
 
       // calculate covariance matrix
-      intensityPtr = static_cast<PrecisionTYPE *>(image->data);
-      intensityPtrM = static_cast<PrecisionTYPE *>(mask->data);
+      intensityPtr = static_cast<PrecisionType *>(image->data);
+      intensityPtrM = static_cast<PrecisionType *>(mask->data);
       for(int t=0; t<image->nt; t++)
       {
-         PrecisionTYPE *currentIntensityPtr2 = &intensityPtr[t*voxelNumber];
+         PrecisionType *currentIntensityPtr2 = &intensityPtr[t*voxelNumber];
          for(int t2=t; t2<image->nt; t2++)
          {
-            PrecisionTYPE *currentIntensityPtr1 = &intensityPtr[t*voxelNumber];
+            PrecisionType *currentIntensityPtr1 = &intensityPtr[t*voxelNumber];
             cov=0.f;
             for(size_t i=0; i<voxelNumber; i++)
             {
@@ -787,15 +785,15 @@ int main(int argc, char **argv)
       float dotty,sum;
       if(flag->locality)  // local mean
       {
-         PrecisionTYPE *intensityPtr1 = static_cast<PrecisionTYPE *>(image->data);
-         PrecisionTYPE *intensityPtr2 = static_cast<PrecisionTYPE *>(imagep->data);
+         PrecisionType *intensityPtr1 = static_cast<PrecisionType *>(image->data);
+         PrecisionType *intensityPtr2 = static_cast<PrecisionType *>(imagep->data);
          for(size_t i=0; i<voxelNumber; i++)
          {
             for(int t=0; t<image->nt; t++)
             {
                dotty=0;
                sum=0;
-               for(int tt=max(t-param->locality,0); tt<=min(t+param->locality,image->nt); tt++)
+               for(int tt=std::max(t-param->locality,0); tt<=std::min(t+param->locality,image->nt); tt++)
                {
                   dotty += intensityPtr1[tt*voxelNumber+i];
                   sum++;
@@ -806,8 +804,8 @@ int main(int argc, char **argv)
       }
       else if(flag->tp)  // single timepoint
       {
-         PrecisionTYPE *intensityPtr1 = static_cast<PrecisionTYPE *>(image->data);
-         PrecisionTYPE *intensityPtr2 = static_cast<PrecisionTYPE *>(imagep->data);
+         PrecisionType *intensityPtr1 = static_cast<PrecisionType *>(image->data);
+         PrecisionType *intensityPtr2 = static_cast<PrecisionType *>(imagep->data);
          for(size_t i=0; i<voxelNumber; i++)
          {
             for(int t=0; t<image->nt; t++)
@@ -818,8 +816,8 @@ int main(int argc, char **argv)
       }
       else  // ppcr and mean
       {
-         PrecisionTYPE *intensityPtr1 = static_cast<PrecisionTYPE *>(image->data);
-         PrecisionTYPE *intensityPtr2 = static_cast<PrecisionTYPE *>(imagep->data);
+         PrecisionType *intensityPtr1 = static_cast<PrecisionType *>(image->data);
+         PrecisionType *intensityPtr2 = static_cast<PrecisionType *>(imagep->data);
          for(size_t i=0; i<voxelNumber; i++)
          {
             for(int c=0; c<prinCompNumber; c++) // Add up component contributions
@@ -851,9 +849,9 @@ int main(int argc, char **argv)
          /* ****************************/
          // current: images // these are both open: perpetual source
          // target:  imagep //					   pca target
-         PrecisionTYPE *intensityPtrP = static_cast<PrecisionTYPE *>(imagep->data); // pointer to pca-anchor data
-         PrecisionTYPE *intensityPtrS = static_cast<PrecisionTYPE *>(images->data); // pointer to real source-float data
-         PrecisionTYPE *intensityPtrC = static_cast<PrecisionTYPE *>(image->data); // pointer to updated 'current' data
+         PrecisionType *intensityPtrP = static_cast<PrecisionType *>(imagep->data); // pointer to pca-anchor data
+         PrecisionType *intensityPtrS = static_cast<PrecisionType *>(images->data); // pointer to real source-float data
+         PrecisionType *intensityPtrC = static_cast<PrecisionType *>(image->data); // pointer to updated 'current' data
          for(int imageNumber=0; imageNumber<images->nt; imageNumber++)
          {
             // ROLLING FLOAT AND ANCHOR IMAGES
@@ -866,8 +864,8 @@ int main(int argc, char **argv)
             nifti_image *storet = nifti_dup(*stores, false);
 
             // COPY THE APPROPRIATE VALUES
-            PrecisionTYPE *intensityPtrPP = static_cast<PrecisionTYPE *>(storet->data); // 3D real source image (needs current cpp image)
-            PrecisionTYPE *intensityPtrSS = static_cast<PrecisionTYPE *>(stores->data); // 3D pca-float data
+            PrecisionType *intensityPtrPP = static_cast<PrecisionType *>(storet->data); // 3D real source image (needs current cpp image)
+            PrecisionType *intensityPtrSS = static_cast<PrecisionType *>(stores->data); // 3D pca-float data
             memcpy(intensityPtrPP, &intensityPtrP[imageNumber*storet->nvox], storet->nvox*storet->nbyper);
             memcpy(intensityPtrSS, &intensityPtrS[imageNumber*stores->nvox], stores->nvox*stores->nbyper);
 
@@ -943,7 +941,7 @@ int main(int argc, char **argv)
 
             // READ IN RESULT AND MAKE A NEW CURRENT IMAGE 'image'
             stores = nifti_image_read("outputResult.nii",true); // TODO NAME
-            PrecisionTYPE *intensityPtrCC = static_cast<PrecisionTYPE *>(stores->data); // 3D result image
+            PrecisionType *intensityPtrCC = static_cast<PrecisionType *>(stores->data); // 3D result image
             memcpy(&intensityPtrC[imageNumber*stores->nvox], intensityPtrCC, stores->nvox*stores->nbyper);
             nifti_image_free(stores);
          }
@@ -968,14 +966,14 @@ int main(int argc, char **argv)
             nifti_image *dofs = nifti_copy_nim_info(dof);
             dofs->nt = dofs->dim[4] = images->nt;
             dofs->nvox = dof->nvox*images->nt;
-            dofs->data = (PrecisionTYPE *)calloc(dofs->nvox, dof->nbyper);
-            PrecisionTYPE *intensityPtrD = static_cast<PrecisionTYPE *>(dofs->data);
+            dofs->data = (PrecisionType *)calloc(dofs->nvox, dof->nbyper);
+            PrecisionType *intensityPtrD = static_cast<PrecisionType *>(dofs->data);
             for(int t=0; t<images->nt; t++)
             {
                char buffer[20];
                sprintf(buffer,"float%s%i.nii",style, t+1);
                nifti_image *dof = nifti_image_read(buffer,true);
-               PrecisionTYPE *intensityPtrDD = static_cast<PrecisionTYPE *>(dof->data);
+               PrecisionType *intensityPtrDD = static_cast<PrecisionType *>(dof->data);
                int r=dof->nvox/3.0;
                for(int i=0; i<3; i++)
                {
@@ -1019,14 +1017,14 @@ int main(int argc, char **argv)
          nifti_image *dofs = nifti_copy_nim_info(dof);
          dofs->nt = dofs->dim[4] = images->nt;
          dofs->nvox = dof->nvox*images->nt;
-         dofs->data = (PrecisionTYPE *)calloc(dofs->nvox, dof->nbyper);
-         PrecisionTYPE *intensityPtrD = static_cast<PrecisionTYPE *>(dofs->data);
+         dofs->data = (PrecisionType *)calloc(dofs->nvox, dof->nbyper);
+         PrecisionType *intensityPtrD = static_cast<PrecisionType *>(dofs->data);
          for(int t=0; t<images->nt; t++)
          {
             char buffer[20];
             sprintf(buffer,"float%s%i.nii",style, t+1);
             nifti_image *dof = nifti_image_read(buffer,true);
-            PrecisionTYPE *intensityPtrDD = static_cast<PrecisionTYPE *>(dof->data);
+            PrecisionType *intensityPtrDD = static_cast<PrecisionType *>(dof->data);
             int r=dof->nvox/3.0;
             for(int i=0; i<3; i++)
             {
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index c7a12e52..793a340f 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -69,7 +69,7 @@ void Usage(char *exec)
    printf("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]\n");
    printf("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]\n");
    printf("\t-voff\n\t\tTurns verbose off [on]\n");
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
    param->PSF_Algorithm=0;
    bool verbose=true;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -126,7 +126,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
       {
-#if defined (_OPENMP)
+#ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
          reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 14a6bdfb..cebab176 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -128,7 +128,7 @@ void Usage(char *exec)
     printf("\t-mind\t\t\tCreate a MIND descriptor image\n");
     printf("\t-mindssc\t\tCreate a MIND-SSC descriptor image\n");
     printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n");
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -152,7 +152,7 @@ int main(int argc, char **argv)
         return EXIT_FAILURE;
     }
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
     // Set the default number of thread
     int defaultOpenMPValue=omp_get_num_procs();
     if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -183,7 +183,7 @@ int main(int argc, char **argv)
         }
         else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
         {
-#if defined (_OPENMP)
+#ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
             reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 095b0668..174fe2fe 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -141,7 +141,7 @@ void Usage(char *exec)
    printf("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)\n");
    printf("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)\n");
    printf("\t\tfilename4 - Output affine transformation file name\n\n");
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -175,7 +175,7 @@ int main(int argc, char **argv)
    PARAM *param = (PARAM *)calloc(1,sizeof(PARAM));
    FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG));
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    // Set the default number of thread
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
       {
-#if defined (_OPENMP)
+#ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
          reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index d39c290c..eba5b063 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -177,11 +177,11 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename)
    return;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_io_diplayImageData1(nifti_image *image)
 {
     reg_print_msg_debug("image values:");
-    DTYPE *data = static_cast<DTYPE *>(image->data);
+    DataType *data = static_cast<DataType *>(image->data);
     std::string text;
 
     size_t voxelIndex=0;
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 94e37acf..76f812b7 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -13,17 +13,17 @@
 #include "reg_nrrd.h"
 
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage,
       Nrrd *nrrdImage)
 {
    const size_t voxNumber = CalcVoxelNumber(*niiImage);
 
-   DTYPE *inPtrX=static_cast<DTYPE *>(niiImage->data);
-   DTYPE *inPtrY=&inPtrX[voxNumber];
-   DTYPE *inPtrZ=nullptr;
+   DataType *inPtrX=static_cast<DataType *>(niiImage->data);
+   DataType *inPtrY=&inPtrX[voxNumber];
+   DataType *inPtrZ=nullptr;
 
-   DTYPE *outPtr=static_cast<DTYPE *>(nrrdImage->data);
+   DataType *outPtr=static_cast<DataType *>(nrrdImage->data);
 
    if(niiImage->nu==3)
    {
@@ -45,7 +45,7 @@ void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage,
    }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage,
       nifti_image *niiImage)
 {
@@ -53,11 +53,11 @@ void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage,
                       nrrdImage->axis[2].size *
                       nrrdImage->axis[3].size;
 
-   DTYPE *outPtr=static_cast<DTYPE *>(nrrdImage->data);
+   DataType *outPtr=static_cast<DataType *>(nrrdImage->data);
 
-   DTYPE *inPtrX=static_cast<DTYPE *>(niiImage->data);
-   DTYPE *inPtrY=&inPtrX[voxNumber];
-   DTYPE *inPtrZ=nullptr;
+   DataType *inPtrX=static_cast<DataType *>(niiImage->data);
+   DataType *inPtrY=&inPtrX[voxNumber];
+   DataType *inPtrZ=nullptr;
 
    if(nrrdImage->axis[0].size==3)
    {
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index c82ffd33..2c7cd9e6 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1089,7 +1089,6 @@ void reg_base<T>::Run() {
 
             } // while
             if (perturbation < perturbationNumber) {
-
                 optimiser->Perturbation(smallestSize);
                 currentSize = maxStepSize;
 #ifdef NDEBUG
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 64f0f49d..8f32e33d 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -16,7 +16,7 @@
 #include <iostream>
 #include <cmath>
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam *params, int *mask, bool runningOnGPU) {
 
    float *varianceArray = (float *)malloc(params->totalBlockNumber * sizeof(float));
@@ -26,14 +26,14 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
 
    int unusableBlock = 0;
    size_t index;
-   DTYPE *referenceValues = nullptr;
+   DataType *referenceValues = nullptr;
    if (referenceImage->nz > 1) {
-      referenceValues = (DTYPE *)malloc(BLOCK_3D_SIZE * sizeof(DTYPE));
+      referenceValues = (DataType *)malloc(BLOCK_3D_SIZE * sizeof(DataType));
    }
    else {
-      referenceValues = (DTYPE *)malloc(BLOCK_2D_SIZE * sizeof(DTYPE));
+      referenceValues = (DataType *)malloc(BLOCK_2D_SIZE * sizeof(DataType));
    }
-   DTYPE *referencePtr = static_cast<DTYPE *>(referenceImage->data);
+   DataType *referencePtr = static_cast<DataType *>(referenceImage->data);
    int blockIndex = 0;
 
    if (referenceImage->nz > 1) {
@@ -43,7 +43,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
             for (unsigned int i = 0; i < params->blockNumber[0]; i++) {
 
                for (unsigned int n = 0; n < BLOCK_3D_SIZE; n++)
-                  referenceValues[n] = (DTYPE)std::numeric_limits<float>::quiet_NaN();
+                  referenceValues[n] = (DataType)std::numeric_limits<float>::quiet_NaN();
 
                float mean = 0.0f;
                float voxelNumber = 0.0f;
@@ -51,12 +51,12 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
                for (unsigned int z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) {
                   if (z < (unsigned int)referenceImage->nz) {
                      index = z * referenceImage->nx * referenceImage->ny;
-                     DTYPE *referencePtrZ = &referencePtr[index];
+                     DataType *referencePtrZ = &referencePtr[index];
                      int *maskPtrZ = &maskPtr[index];
                      for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
                         if (y < (unsigned int)referenceImage->ny) {
                            index = y * referenceImage->nx + i * BLOCK_WIDTH;
-                           DTYPE *referencePtrXYZ = &referencePtrZ[index];
+                           DataType *referencePtrXYZ = &referencePtrZ[index];
                            int *maskPtrXYZ = &maskPtrZ[index];
                            for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
                               if (x < (unsigned int)referenceImage->nx) {
@@ -103,7 +103,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
          for (unsigned int i = 0; i < params->blockNumber[0]; i++) {
 
             for (unsigned int n = 0; n < BLOCK_2D_SIZE; n++)
-               referenceValues[n] = (DTYPE)std::numeric_limits<float>::quiet_NaN();
+               referenceValues[n] = std::numeric_limits<DataType>::quiet_NaN();
 
             float mean = 0.0f;
             float voxelNumber = 0.0f;
@@ -112,7 +112,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
             for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
                if (y < (unsigned )referenceImage->ny) {
                   index = y * referenceImage->nx + i * BLOCK_WIDTH;
-                  DTYPE *referencePtrXY = &referencePtr[index];
+                  DataType *referencePtrXY = &referencePtr[index];
                   int *maskPtrXY = &maskPtr[index];
                   for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
                      if (x < (unsigned)referenceImage->nx) {
@@ -256,10 +256,10 @@ void initialise_block_matching_method(nifti_image * reference,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<typename DTYPE>
+template<typename DataType>
 void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg_blockMatchingParam *params, int *mask) {
-   DTYPE *referencePtr = static_cast<DTYPE *>(reference->data);
-   DTYPE *warpedPtr = static_cast<DTYPE *>(warped->data);
+   DataType *referencePtr = static_cast<DataType *>(reference->data);
+   DataType *warpedPtr = static_cast<DataType *>(warped->data);
 
    mat44 *referenceMatrix_xyz;
    if (reference->sform_code > 0)
@@ -284,14 +284,14 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
    int index, l, m, x, y, z = 0;
    unsigned int i, j;
    int *maskPtr_XY;
-   DTYPE *referencePtr_XY, *warpedPtr_XY;
-   DTYPE value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar;
-   DTYPE voxelNumber, localCC, referenceTemp, warpedTemp;
+   DataType *referencePtr_XY, *warpedPtr_XY;
+   DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar;
+   DataType voxelNumber, localCC, referenceTemp, warpedTemp;
    float bestDisplacement[3], referencePosition_temp[3], tempPosition[3];
 
-   DTYPE referenceValues[BLOCK_2D_SIZE];
+   DataType referenceValues[BLOCK_2D_SIZE];
    bool referenceOverlap[BLOCK_2D_SIZE];
-   DTYPE warpedValues[BLOCK_2D_SIZE];
+   DataType warpedValues[BLOCK_2D_SIZE];
    bool warpedOverlap[BLOCK_2D_SIZE];
 
    params->definedActiveBlockNumber = 0;
@@ -329,7 +329,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                else
                   referenceIndex += BLOCK_WIDTH;
             }
-            bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0;
+            bestCC = params->voxelCaptureRange > 3 ? 0.9f : 0;
             bestDisplacement[0] = std::numeric_limits<float>::quiet_NaN();
             bestDisplacement[1] = 0.f;
             bestDisplacement[2] = 0.f;
@@ -434,13 +434,13 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
 
 }
 /* *************************************************************** */
-template<typename DTYPE>
+template<typename DataType>
 void block_matching_method3D(nifti_image * reference,
                              nifti_image * warped,
                              _reg_blockMatchingParam *params,
                              int *mask) {
-   DTYPE *referencePtr = static_cast<DTYPE *>(reference->data);
-   DTYPE *warpedPtr = static_cast<DTYPE *>(warped->data);
+   DataType *referencePtr = static_cast<DataType *>(reference->data);
+   DataType *warpedPtr = static_cast<DataType *>(warped->data);
 
    mat44 *referenceMatrix_xyz;
    if (reference->sform_code > 0)
@@ -464,29 +464,29 @@ void block_matching_method3D(nifti_image * reference,
    int index, l, m, n, x, y, z;
    int i, j, k; //Need to be int for VC++ compiler and OpenMP
    int *maskPtr_Z, *maskPtr_XYZ;
-   DTYPE *referencePtr_Z, *referencePtr_XYZ, *warpedPtr_Z, *warpedPtr_XYZ;
-   DTYPE value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar;
-   DTYPE voxelNumber, localCC, referenceTemp, warpedTemp;
+   DataType *referencePtr_Z, *referencePtr_XYZ, *warpedPtr_Z, *warpedPtr_XYZ;
+   DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar;
+   DataType voxelNumber, localCC, referenceTemp, warpedTemp;
    float bestDisplacement[3], referencePosition_temp[3], tempPosition[3];
    size_t referenceIndex, warpedIndex, blockIndex, tid = 0;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    int threadNumber = omp_get_max_threads();
    if (threadNumber > 16)
       omp_set_num_threads(16);
-   DTYPE referenceValues[16][BLOCK_3D_SIZE];
-   DTYPE warpedValues[16][BLOCK_3D_SIZE];
+   DataType referenceValues[16][BLOCK_3D_SIZE];
+   DataType warpedValues[16][BLOCK_3D_SIZE];
    bool referenceOverlap[16][BLOCK_3D_SIZE];
    bool warpedOverlap[16][BLOCK_3D_SIZE];
 #else
-   DTYPE referenceValues[1][BLOCK_3D_SIZE];
-   DTYPE warpedValues[1][BLOCK_3D_SIZE];
+   DataType referenceValues[1][BLOCK_3D_SIZE];
+   DataType warpedValues[1][BLOCK_3D_SIZE];
    bool referenceOverlap[1][BLOCK_3D_SIZE];
    bool warpedOverlap[1][BLOCK_3D_SIZE];
 #endif
 
    int currentDefinedActiveBlockNumber = 0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(params, reference, warped, referencePtr, warpedPtr, mask, referenceMatrix_xyz, \
    referenceOverlap, warpedOverlap, referenceValues, warpedValues) \
@@ -502,7 +502,7 @@ void block_matching_method3D(nifti_image * reference,
    reduction(+:currentDefinedActiveBlockNumber)
 #endif
    for (k = 0; k < (int)params->blockNumber[2]; k++) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
       tid = omp_get_thread_num();
 #endif
       blockIndex = k * params->blockNumber[0] * params->blockNumber[1];
@@ -549,7 +549,7 @@ void block_matching_method3D(nifti_image * reference,
                   else
                      referenceIndex += BLOCK_WIDTH * BLOCK_WIDTH;
                }
-               bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; //only when misaligned images are registered
+               bestCC = params->voxelCaptureRange > 3 ? 0.9f : 0; //only when misaligned images are registered
                bestDisplacement[0] = std::numeric_limits<float>::quiet_NaN();
                bestDisplacement[1] = 0.f;
                bestDisplacement[2] = 0.f;
@@ -665,7 +665,7 @@ void block_matching_method3D(nifti_image * reference,
    }
    params->definedActiveBlockNumber = currentDefinedActiveBlockNumber;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
    omp_set_num_threads(threadNumber);
 #endif
 }
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 47d3c365..6e959816 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -211,7 +211,7 @@ void reg_discrete_init::AddL2Penalisation(float weight)
    int _node_number = static_cast<int>(this->node_number);
    int _label_nD_num = this->label_nD_num;
    float *_discretised_measures = &this->discretised_measures[0];
-#if defined (_OPENMP)
+#ifdef _OPENMP
    #pragma omp parallel for default(none) \
    shared(_node_number, _label_nD_num, _discretised_measures, l2_penalisation) \
    private(measure_index, n, label_index)
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 9b2a19fa..2f0c66e0 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -81,7 +81,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
    }
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
@@ -98,26 +98,26 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
    for the floating and reference images. */
-   DTYPE *firstWarpedVox = static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
-   DTYPE *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
-   DTYPE *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
-   DTYPE *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
-   DTYPE *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
-   DTYPE *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
+   DataType *firstWarpedVox = static_cast<DataType *>(warpedImage->data);
+   DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
+   DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
+   DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
+   DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
+   DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
+   DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
 
-   DTYPE *firstRefVox = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
-   DTYPE *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
-   DTYPE *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
-   DTYPE *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
-   DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
-   DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
+   DataType *firstRefVox = static_cast<DataType *>(referenceImage->data);
+   DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
+   DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
+   DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
+   DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
+   DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
+   DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
 
    double DTI_cost=0, n=0;
    const double twoThirds = (2.0/3.0);
-   DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ;
-#if defined (_OPENMP)
+   DataType rXX, rXY, rYY, rXZ, rYZ, rZZ;
+#ifdef _OPENMP
    #pragma omp parallel for default(none) \
    shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \
@@ -226,7 +226,7 @@ double reg_dti::GetSimilarityMeasureValue()
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *warpedImage,
       nifti_image *warpedGradient,
@@ -245,44 +245,44 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
    for the floating and reference images. */
-   DTYPE *firstWarpedVox = static_cast<DTYPE *>(warpedImage->data);
-   DTYPE *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
-   DTYPE *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
-   DTYPE *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
-   DTYPE *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
-   DTYPE *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
-   DTYPE *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
+   DataType *firstWarpedVox = static_cast<DataType *>(warpedImage->data);
+   DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
+   DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
+   DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
+   DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
+   DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
+   DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
 
-   DTYPE *firstRefVox = static_cast<DTYPE *>(referenceImage->data);
-   DTYPE *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
-   DTYPE *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
-   DTYPE *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
-   DTYPE *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
-   DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
-   DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
+   DataType *firstRefVox = static_cast<DataType *>(referenceImage->data);
+   DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
+   DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
+   DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
+   DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
+   DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
+   DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
 
    // THE FOLLOWING IS WRONG
    reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
    reg_exit();
    unsigned int gradientVoxels = warpedGradient->nu*voxelNumber;
-   DTYPE *firstGradVox = static_cast<DTYPE *>(warpedGradient->data);
-   DTYPE *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]];
-   DTYPE *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]];
-   DTYPE *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]];
-   DTYPE *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]];
-   DTYPE *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]];
-   DTYPE *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]];
+   DataType *firstGradVox = static_cast<DataType *>(warpedGradient->data);
+   DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]];
+   DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]];
+   DataType *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]];
+   DataType *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]];
+   DataType *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]];
+   DataType *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]];
 
    // Create an array to store the computed gradient per time point
-   DTYPE *dtiMeasureGradPtrX=static_cast<DTYPE *>(dtiMeasureGradientImage->data);
-   DTYPE *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber];
-   DTYPE *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber];
+   DataType *dtiMeasureGradPtrX=static_cast<DataType *>(dtiMeasureGradientImage->data);
+   DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber];
+   DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber];
 
    const double twoThirds = 2.0/3.0;
    const double fourThirds = 4.0/3.0;
 
-   DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
-#if defined (_OPENMP)
+   DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
+#ifdef _OPENMP
    #pragma omp parallel for default(none) \
    shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ,warpedIntensityXX, \
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 1c0ed6ff..3aafa4be 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -57,7 +57,7 @@ class reg_dti: public reg_measure {
  * should be considered. If set to nullptr, all voxels are considered
  * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
@@ -74,7 +74,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
  * @param mask Array that contains a mask to specify which voxel
  * should be considered. If set to nullptr, all voxels are considered
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
                                          nifti_image *warpedImage,
                                          nifti_image *warpedGradient,
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index 63a9839c..ff6fdc2b 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -162,7 +162,7 @@ void reg_fem_getDeformationField(float *nodePositions,
 
    float coefficients[4];
    float positionA[3], positionB[3], positionC[3], positionD[3];
-#if defined (_OPENMP)
+#ifdef _OPENMP
    #pragma omp parallel for default(none) \
    shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \
           nodePositions, closestNodes, voxelNumber) \
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 1d85c61d..ff387fa9 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -45,7 +45,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0;
    size_t index=0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(deformationFieldImage, transformationMatrix, affineTransformation, \
    deformationFieldPtrX, deformationFieldPtrY, mask, composition) \
@@ -108,7 +108,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0, z=0;
    size_t index=0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(deformationFieldImage, transformationMatrix, affineTransformation, \
    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, composition) \
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index a9a469c0..f0a5b3af 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -76,7 +76,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_getKLDivergence(nifti_image *referenceImage,
                            nifti_image *warpedImage,
                            double *timePointWeight,
@@ -90,8 +90,8 @@ double reg_getKLDivergence(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
-    DTYPE *refPtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *warPtr = static_cast<DTYPE*>(warpedImage->data);
+    DataType *refPtr = static_cast<DataType*>(referenceImage->data);
+    DataType *warPtr = static_cast<DataType*>(warpedImage->data);
     int *maskPtr = nullptr;
     bool MrClean = false;
     if (mask == nullptr) {
@@ -99,16 +99,16 @@ double reg_getKLDivergence(nifti_image *referenceImage,
         MrClean = true;
     } else maskPtr = &mask[0];
 
-    DTYPE *jacPtr = nullptr;
+    DataType *jacPtr = nullptr;
     if (jacobianDetImg != nullptr)
-        jacPtr = static_cast<DTYPE*>(jacobianDetImg->data);
+        jacPtr = static_cast<DataType*>(jacobianDetImg->data);
     double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue;
 
     for (int time = 0; time < referenceImage->nt; ++time) {
         if (timePointWeight[time] > 0) {
-            DTYPE *currentRefPtr = &refPtr[time * voxelNumber];
-            DTYPE *currentWarPtr = &warPtr[time * voxelNumber];
-#if defined (_OPENMP)
+            DataType *currentRefPtr = &refPtr[time * voxelNumber];
+            DataType *currentWarPtr = &warPtr[time * voxelNumber];
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,currentRefPtr, currentWarPtr, \
     maskPtr, jacobianDetImg, jacPtr) \
@@ -205,7 +205,7 @@ double reg_kld::GetSimilarityMeasureValue() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                                            nifti_image *warpedImage,
                                            nifti_image *warpedImageGradient,
@@ -222,10 +222,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
 
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
-    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
     int *maskPtr = nullptr;
     bool MrClean = false;
     if (mask == nullptr) {
@@ -233,22 +233,22 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
         MrClean = true;
     } else maskPtr = &mask[0];
 
-    DTYPE *jacPtr = nullptr;
+    DataType *jacPtr = nullptr;
     if (jacobianDetImg != nullptr)
-        jacPtr = static_cast<DTYPE*>(jacobianDetImg->data);
+        jacPtr = static_cast<DataType*>(jacobianDetImg->data);
     double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue;
 
     // Create pointers to the spatial gradient of the current warped volume
-    DTYPE *currentGradPtrX = static_cast<DTYPE*>(warpedImageGradient->data);
-    DTYPE *currentGradPtrY = &currentGradPtrX[voxelNumber];
-    DTYPE *currentGradPtrZ = nullptr;
+    DataType *currentGradPtrX = static_cast<DataType*>(warpedImageGradient->data);
+    DataType *currentGradPtrY = &currentGradPtrX[voxelNumber];
+    DataType *currentGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         currentGradPtrZ = &currentGradPtrY[voxelNumber];
 
     // Create pointers to the kld gradient image
-    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradient->data);
-    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DTYPE *measureGradPtrZ = nullptr;
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradient->data);
+    DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DataType *measureGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
@@ -262,7 +262,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
     }
     double adjusted_weight = timepoint_weight / activeVoxel_num;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,currentRefPtr, currentWarPtr, \
     maskPtr, jacobianDetImg, jacPtr, referenceImage, \
@@ -293,13 +293,13 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                 tempGradX = currentGradPtrX[voxel];
                 if (tempGradX == tempGradX)
                     // Update the gradient along the x-axis
-                    measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX);
+                    measureGradPtrX[voxel] -= (DataType)(tempValue * tempGradX);
 
                 // Ensure that gradient of the warpedImage image along y-axis is not NaN
                 tempGradY = currentGradPtrY[voxel];
                 if (tempGradY == tempGradY)
                     // Update the gradient along the y-axis
-                    measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY);
+                    measureGradPtrY[voxel] -= (DataType)(tempValue * tempGradY);
 
                 // Check if the current images are 3D
                 if (referenceImage->nz > 1) {
@@ -307,7 +307,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                     tempGradZ = currentGradPtrZ[voxel];
                     if (tempGradZ == tempGradZ)
                         // Update the gradient along the z-axis
-                        measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ);
+                        measureGradPtrZ[voxel] -= (DataType)(tempValue * tempGradZ);
                 }
             }
         }
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index ca5a553f..fa84ef20 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -53,7 +53,7 @@ class reg_kld: public reg_measure {
  * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed sum squared difference
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 double reg_getKLDivergence(nifti_image *reference,
                            nifti_image *warped,
                            double *timePointWeight,
@@ -75,7 +75,7 @@ double reg_getKLDivergence(nifti_image *reference,
  * @param mask Array that contains a mask to specify which voxel
  * should be considered. If set to nullptr, all voxels are considered
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
                                            nifti_image *warped,
                                            nifti_image *warpedGradient,
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 7451f1b8..13134155 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -81,7 +81,7 @@ reg_lncc::~reg_lncc() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
                                      nifti_image *warImage,
                                      nifti_image *meanRefImage,
@@ -103,9 +103,9 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     reg_tools_removeNanFromMask(refImage, combinedMask);
     reg_tools_removeNanFromMask(warImage, combinedMask);
 
-    DTYPE *origRefPtr = static_cast<DTYPE*>(refImage->data);
-    DTYPE *meanRefPtr = static_cast<DTYPE*>(meanRefImage->data);
-    DTYPE *sdevRefPtr = static_cast<DTYPE*>(stdDevRefImage->data);
+    DataType *origRefPtr = static_cast<DataType*>(refImage->data);
+    DataType *meanRefPtr = static_cast<DataType*>(meanRefImage->data);
+    DataType *sdevRefPtr = static_cast<DataType*>(stdDevRefImage->data);
     memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
     memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
 
@@ -113,16 +113,16 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
     reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
 
-    DTYPE *origWarPtr = static_cast<DTYPE*>(warImage->data);
-    DTYPE *meanWarPtr = static_cast<DTYPE*>(meanWarImage->data);
-    DTYPE *sdevWarPtr = static_cast<DTYPE*>(stdDevWarImage->data);
+    DataType *origWarPtr = static_cast<DataType*>(warImage->data);
+    DataType *meanWarPtr = static_cast<DataType*>(meanWarImage->data);
+    DataType *sdevWarPtr = static_cast<DataType*>(stdDevWarImage->data);
     memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
     memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
 
     reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage);
     reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
     reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \
     private(voxel)
@@ -132,8 +132,8 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
         sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel]));
         sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel]));
         // Stabilise the computation
-        if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = static_cast<DTYPE>(0);
-        if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = static_cast<DTYPE>(0);
+        if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = 0;
+        if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = 0;
     }
 }
 /* *************************************************************** */
@@ -257,7 +257,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_getLNCCValue(nifti_image *referenceImage,
                         nifti_image *referenceMeanImage,
                         nifti_image *referenceSdevImage,
@@ -278,17 +278,17 @@ double reg_getLNCCValue(nifti_image *referenceImage,
 #endif
 
     // Compute the local correlation
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
 
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
-    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-    DTYPE *refMeanPtr = static_cast<DTYPE*>(referenceMeanImage->data);
-    DTYPE *warMeanPtr = static_cast<DTYPE*>(warpedMeanImage->data);
-    DTYPE *refSdevPtr = static_cast<DTYPE*>(referenceSdevImage->data);
-    DTYPE *warSdevPtr = static_cast<DTYPE*>(warpedSdevImage->data);
-    DTYPE *correlaPtr = static_cast<DTYPE*>(correlationImage->data);
+    DataType *refMeanPtr = static_cast<DataType*>(referenceMeanImage->data);
+    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
+    DataType *refSdevPtr = static_cast<DataType*>(referenceSdevImage->data);
+    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
+    DataType *correlaPtr = static_cast<DataType*>(correlationImage->data);
 
     for (size_t i = 0; i < voxelNumber; ++i)
         correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
@@ -299,7 +299,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
     double activeVoxel_num = 0.;
 
     // Iteration over all voxels
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
     refSdevPtr,warSdevPtr,correlaPtr) \
@@ -445,7 +445,7 @@ double reg_lncc::GetSimilarityMeasureValue() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *referenceMeanImage,
                                    nifti_image *referenceSdevImage,
@@ -469,17 +469,17 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 #endif
 
     // Compute the local correlation
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
 
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
-    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
 
-    DTYPE *refMeanPtr = static_cast<DTYPE*>(referenceMeanImage->data);
-    DTYPE *warMeanPtr = static_cast<DTYPE*>(warpedMeanImage->data);
-    DTYPE *refSdevPtr = static_cast<DTYPE*>(referenceSdevImage->data);
-    DTYPE *warSdevPtr = static_cast<DTYPE*>(warpedSdevImage->data);
-    DTYPE *correlaPtr = static_cast<DTYPE*>(correlationImage->data);
+    DataType *refMeanPtr = static_cast<DataType*>(referenceMeanImage->data);
+    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
+    DataType *refSdevPtr = static_cast<DataType*>(referenceSdevImage->data);
+    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
+    DataType *correlaPtr = static_cast<DataType*>(correlationImage->data);
 
     for (size_t i = 0; i < voxelNumber; ++i)
         correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
@@ -491,7 +491,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     double activeVoxel_num = 0;
 
     // Iteration over all voxels
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
     refSdevPtr,warSdevPtr,correlaPtr) \
@@ -525,9 +525,9 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                     temp2 *= -1;
                     temp3 *= -1;
                 }
-                warMeanPtr[voxel] = temp1;
-                warSdevPtr[voxel] = temp2;
-                correlaPtr[voxel] = temp3;
+                warMeanPtr[voxel] = static_cast<DataType>(temp1);
+                warSdevPtr[voxel] = static_cast<DataType>(temp2);
+                correlaPtr[voxel] = static_cast<DataType>(temp3);
                 activeVoxel_num++;
             } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
         } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
@@ -540,22 +540,22 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
     reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
-    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
-    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DTYPE *measureGradPtrZ = nullptr;
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
+    DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DataType *measureGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create pointers to the spatial gradient of the warped image
-    DTYPE *warpGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
-    DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber];
-    DTYPE *warpGradPtrZ = nullptr;
+    DataType *warpGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    DataType *warpGradPtrY = &warpGradPtrX[voxelNumber];
+    DataType *warpGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         warpGradPtrZ = &warpGradPtrY[voxelNumber];
 
     double common;
     // Iteration over all voxels
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
     warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \
@@ -567,20 +567,20 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
         if (combinedMask[voxel] > -1) {
             common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel];
             common *= adjusted_weight;
-            measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common;
-            measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common;
+            measureGradPtrX[voxel] -= warpGradPtrX[voxel] * static_cast<DataType>(common);
+            measureGradPtrY[voxel] -= warpGradPtrY[voxel] * static_cast<DataType>(common);
             if (warpGradPtrZ != nullptr)
-                measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common;
+                measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * static_cast<DataType>(common);
         }
     }
     // Check for NaN
-    DTYPE val;
+    DataType val;
 #ifdef _WIN32
     voxelNumber = (long)measureGradientImage->nvox;
 #else
     voxelNumber = measureGradientImage->nvox;
 #endif
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,measureGradPtrX) \
     private(voxel, val)
@@ -588,7 +588,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         val = measureGradPtrX[voxel];
         if (val != val || isinf(val) != 0)
-            measureGradPtrX[voxel] = static_cast<DTYPE>(0);
+            measureGradPtrX[voxel] = 0;
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index e9cd0146..07f14eca 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -66,7 +66,7 @@ class reg_lncc: public reg_measure {
 
     int kernelType;
 
-    template <class DTYPE>
+    template <class DataType>
     void UpdateLocalStatImages(nifti_image *refImage,
                                nifti_image *warImage,
                                nifti_image *meanRefImage,
@@ -88,7 +88,7 @@ class reg_lncc: public reg_measure {
  * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed LNCC
  */
-extern "C++" template<class DTYPE>
+extern "C++" template<class DataType>
 double reg_getLNCCValue(nifti_image *referenceImage,
                         nifti_image *referenceMeanImage,
                         nifti_image *referenceStdDevImage,
@@ -112,7 +112,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
  *  @param mask Array that contains a mask to specify which voxel
  *  should be considered. If set to nullptr, all voxels are considered
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *referenceMeanImage,
                                    nifti_image *referenceStdDevImage,
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 873d7bf8..026c0a63 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -16,7 +16,7 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_createControlPointGrid(nifti_image **controlPointGridImage,
                                 nifti_image *referenceImage,
                                 float *spacingMillimeter)
@@ -36,7 +36,7 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage,
    dim_cpp[4]=dim_cpp[6]=dim_cpp[7]=1;
 
    // Create the new control point grid image and allocate its space
-   if(sizeof(DTYPE)==4)
+   if(sizeof(DataType)==4)
       *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT32, true);
    else *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT64, true);
 
@@ -142,7 +142,7 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage,
 template void reg_createControlPointGrid<float>(nifti_image **, nifti_image *, float *);
 template void reg_createControlPointGrid<double>(nifti_image **, nifti_image *, float *);
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
                                           nifti_image **backwardGridImage,
                                           nifti_image *referenceImage,
@@ -325,7 +325,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
                      };
 
    // Create the control point grid image
-   if(sizeof(DTYPE)==sizeof(float))
+   if(sizeof(DataType)==sizeof(float))
    {
       (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true);
       (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true);
@@ -422,7 +422,7 @@ template void reg_createSymmetricControlPointGrids<double>
 (nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                              nifti_image *deformationField,
                                              int *mask,
@@ -432,17 +432,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
    int coord;
 
    const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
+   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+   DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
 
    const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-   DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
-   DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
+   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
+   DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
+   DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
 
    int x, y, z, a, b, c, xPre, yPre, zPre, index;
-   DTYPE xBasis[2], yBasis[2], zBasis[2], real[3];
+   DataType xBasis[2], yBasis[2], zBasis[2], real[3];
 
    if(composition)  // Composition of deformation fields
    {
@@ -452,7 +452,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
          referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk);
       else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk);
 
-      DTYPE voxel[3];
+      DataType voxel[3];
 
       for(z=0; z<deformationField->nz; z++)
       {
@@ -487,17 +487,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                   // The spline coefficients are computed
                   xPre=(int)reg_floor(voxel[0]);
-                  xBasis[1]=voxel[0]-static_cast<DTYPE>(xPre);
+                  xBasis[1]=voxel[0]-static_cast<DataType>(xPre);
                   if(xBasis[1]<0) xBasis[1]=0; //rounding error
                   xBasis[0]=1.-xBasis[1];
 
                   yPre=(int)reg_floor(voxel[1]);
-                  yBasis[1]=voxel[1]-static_cast<DTYPE>(yPre);
+                  yBasis[1]=voxel[1]-static_cast<DataType>(yPre);
                   if(yBasis[1]<0) yBasis[1]=0; //rounding error
                   yBasis[0]=1.-yBasis[1];
 
                   zPre=(int)reg_floor(voxel[2]);
-                  zBasis[1]=voxel[2]-static_cast<DTYPE>(zPre);
+                  zBasis[1]=voxel[2]-static_cast<DataType>(zPre);
                   if(zBasis[1]<0) zBasis[1]=0; //rounding error
                   zBasis[0]=1.-zBasis[1];
 
@@ -507,7 +507,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   for(c=0; c<2; c++){
                      for(b=0; b<2; b++){
                         for(a=0; a<2; a++){
-                           DTYPE tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                           DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
                            coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a;
                            real[0] += controlPointPtrX[coord] * tempValue;
                            real[1] += controlPointPtrY[coord] * tempValue;
@@ -526,12 +526,12 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
    }//Composition of deformation
    else  // !composition
    {
-      DTYPE gridVoxelSpacing[3];
+      DataType gridVoxelSpacing[3];
       gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
       gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
       gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
-      DTYPE tempValue;
-#if defined (_OPENMP)
+      DataType tempValue;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    private(x, y, z, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \
    shared(deformationField, gridVoxelSpacing, mask, fieldPtrX, fieldPtrY, fieldPtrZ, \
@@ -541,8 +541,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
       {
          index=z*deformationField->nx*deformationField->ny;
 
-         zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
-         zBasis[1]=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
+         zPre=static_cast<int>(static_cast<DataType>(z)/gridVoxelSpacing[2]);
+         zBasis[1]=static_cast<DataType>(z)/gridVoxelSpacing[2]-static_cast<DataType>(zPre);
          if(zBasis[1]<0) zBasis[1]=0; //rounding error
          zBasis[0]=1.-zBasis[1];
          zPre++;
@@ -550,8 +550,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
          for(y=0; y<deformationField->ny; y++)
          {
 
-            yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-            yBasis[1]=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
+            yPre=static_cast<int>(static_cast<DataType>(y)/gridVoxelSpacing[1]);
+            yBasis[1]=static_cast<DataType>(y)/gridVoxelSpacing[1]-static_cast<DataType>(yPre);
             if(yBasis[1]<0) yBasis[1]=0; //rounding error
             yBasis[0]=1.-yBasis[1];
             yPre++;
@@ -564,8 +564,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                if(mask[index]>-1)
                {
-                  xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-                  xBasis[1]=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
+                  xPre=static_cast<int>(static_cast<DataType>(x)/gridVoxelSpacing[0]);
+                  xBasis[1]=static_cast<DataType>(x)/gridVoxelSpacing[0]-static_cast<DataType>(xPre);
                   if(xBasis[1]<0) xBasis[1]=0; //rounding error
                   xBasis[0]=1.-xBasis[1];
                   xPre++;
@@ -597,7 +597,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                       nifti_image *deformationField,
                                       int *mask,
@@ -613,62 +613,62 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
    } val;
    __m128 tempCurrent, tempX, tempY;
 #ifdef _WIN32
-   __declspec(align(16)) DTYPE temp[4];
-   __declspec(align(16)) DTYPE yBasis[4];
+   __declspec(align(16)) DataType temp[4];
+   __declspec(align(16)) DataType yBasis[4];
    union
    {
       __m128 m[16];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } xControlPointCoordinates;
    union
    {
       __m128 m[16];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } yControlPointCoordinates;
    union u1
    {
       __m128 m[4];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } xyBasis;
 #else // _WIN32
-   DTYPE temp[4] __attribute__((aligned(16)));
-   DTYPE yBasis[4] __attribute__((aligned(16)));
+   DataType temp[4] __attribute__((aligned(16)));
+   DataType yBasis[4] __attribute__((aligned(16)));
    union
    {
       __m128 m[16];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } xControlPointCoordinates;
    union
    {
       __m128 m[16];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } yControlPointCoordinates;
    union u1
    {
       __m128 m[4];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } xyBasis;
 #endif // _WIN32
 #else // _USE_SSE
-   DTYPE temp[4];
-   DTYPE yBasis[4];
-   DTYPE xyBasis[16];
-   DTYPE xControlPointCoordinates[16];
-   DTYPE yControlPointCoordinates[16];
+   DataType temp[4];
+   DataType yBasis[4];
+   DataType xyBasis[16];
+   DataType xControlPointCoordinates[16];
+   DataType yControlPointCoordinates[16];
 #endif // _USE_SSE
 
 
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
+   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
 
-   DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)];
+   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
+   DataType *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)];
 
-   DTYPE gridVoxelSpacing[2];
+   DataType gridVoxelSpacing[2];
    gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
    gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
 
-   DTYPE basis, xReal, yReal, xVoxel, yVoxel;
+   DataType basis, xReal, yReal, xVoxel, yVoxel;
    int x, y, a, b, xPre, yPre, oldXpre, oldYpre;
    size_t index, coord;
 
@@ -689,8 +689,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
          {
 
             // The previous position at the current pixel position is read
-            xReal = (DTYPE)(fieldPtrX[index]);
-            yReal = (DTYPE)(fieldPtrY[index]);
+            xReal = (DataType)(fieldPtrX[index]);
+            yReal = (DataType)(fieldPtrY[index]);
 
             // From real to pixel position in the CPP
             xVoxel = referenceMatrix_real_to_voxel->m[0][0]*xReal
@@ -702,18 +702,18 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
             // The spline coefficients are computed
             xPre=(int)reg_floor(xVoxel);
-            basis=xVoxel-(DTYPE)xPre;
+            basis=xVoxel-(DataType)xPre;
             --xPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-            else get_SplineBasisValues<DTYPE>(basis, temp);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+            else get_SplineBasisValues<DataType>(basis, temp);
 
             yPre=(int)reg_floor(yVoxel);
-            basis=yVoxel-(DTYPE)yPre;
+            basis=yVoxel-(DataType)yPre;
             --yPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
-            else get_SplineBasisValues<DTYPE>(basis, yBasis);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+            else get_SplineBasisValues<DataType>(basis, yBasis);
 
 
             if(xVoxel>=0 && xVoxel<=deformationField->nx-1 &&
@@ -724,7 +724,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                if(oldXpre!=xPre || oldYpre!=yPre)
                {
 #ifdef _USE_SSE
-                  get_GridValues<DTYPE>(xPre,
+                  get_GridValues<DataType>(xPre,
                                         yPre,
                                         splineControlPoint,
                                         controlPointPtrX,
@@ -735,7 +735,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                         false // not a displacement field
                                         );
 #else // _USE_SSE
-                  get_GridValues<DTYPE>(xPre,
+                  get_GridValues<DataType>(xPre,
                                         yPre,
                                         splineControlPoint,
                                         controlPointPtrX,
@@ -782,7 +782,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                   {
                      for(a=0; a<4; a++)
                      {
-                        DTYPE tempValue = temp[a] * yBasis[b];
+                        DataType tempValue = temp[a] * yBasis[b];
                         xReal += xControlPointCoordinates[b*4+a] * tempValue;
                         yReal += yControlPointCoordinates[b*4+a] * tempValue;
                      }
@@ -790,8 +790,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 #endif
                }
 
-               fieldPtrX[index] = (DTYPE)xReal;
-               fieldPtrY[index] = (DTYPE)yReal;
+               fieldPtrX[index] = (DataType)xReal;
+               fieldPtrY[index] = (DataType)yReal;
             }
             index++;
          }
@@ -800,7 +800,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
    else  // starting deformation field is blank - !composition
    {
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
@@ -821,20 +821,20 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
          index=y*deformationField->nx;
          oldXpre=oldYpre=9999999;
 
-         yPre=(int)((DTYPE)y/gridVoxelSpacing[1]);
-         basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)yPre;
+         yPre=(int)((DataType)y/gridVoxelSpacing[1]);
+         basis=(DataType)y/gridVoxelSpacing[1]-(DataType)yPre;
          if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
-         else get_SplineBasisValues<DTYPE>(basis, yBasis);
+         if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+         else get_SplineBasisValues<DataType>(basis, yBasis);
 
          for(x=0; x<deformationField->nx; x++)
          {
 
-            xPre=(int)((DTYPE)x/gridVoxelSpacing[0]);
-            basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)xPre;
+            xPre=(int)((DataType)x/gridVoxelSpacing[0]);
+            basis=(DataType)x/gridVoxelSpacing[0]-(DataType)xPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-            else get_SplineBasisValues<DTYPE>(basis, temp);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+            else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
             val.f[0] = temp[0];
             val.f[1] = temp[1];
@@ -859,7 +859,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
             if(oldXpre!=xPre || oldYpre!=yPre)
             {
 #ifdef _USE_SSE
-               get_GridValues<DTYPE>(xPre,
+               get_GridValues<DataType>(xPre,
                                      yPre,
                                      splineControlPoint,
                                      controlPointPtrX,
@@ -870,7 +870,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                      false // not a deformation field
                                      );
 #else // _USE_SSE
-               get_GridValues<DTYPE>(xPre,
+               get_GridValues<DataType>(xPre,
                                      yPre,
                                      splineControlPoint,
                                      controlPointPtrX,
@@ -912,8 +912,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                }
 #endif
             }// mask
-            fieldPtrX[index] = (DTYPE)xReal;
-            fieldPtrY[index] = (DTYPE)yReal;
+            fieldPtrX[index] = (DataType)xReal;
+            fieldPtrY[index] = (DataType)yReal;
             index++;
          } // x
       } // y
@@ -922,7 +922,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
    return;
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                             nifti_image *deformationField,
                                             int *mask,
@@ -940,65 +940,65 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse;
 
 #ifdef _WIN32
-   __declspec(align(16)) DTYPE temp[4];
-   __declspec(align(16)) DTYPE zBasis[4];
+   __declspec(align(16)) DataType temp[4];
+   __declspec(align(16)) DataType zBasis[4];
    union
    {
       __m128 m[16];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } xControlPointCoordinates;
    union
    {
       __m128 m[16];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } yControlPointCoordinates;
    union
    {
       __m128 m[16];
-      __declspec(align(16)) DTYPE f[16];
+      __declspec(align(16)) DataType f[16];
    } zControlPointCoordinates;
 #else // _WIN32
-   DTYPE temp[4] __attribute__((aligned(16)));
-   DTYPE zBasis[4] __attribute__((aligned(16)));
+   DataType temp[4] __attribute__((aligned(16)));
+   DataType zBasis[4] __attribute__((aligned(16)));
    union
    {
       __m128 m[16];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } xControlPointCoordinates;
    union
    {
       __m128 m[16];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } yControlPointCoordinates;
    union
    {
       __m128 m[16];
-      DTYPE f[16] __attribute__((aligned(16)));
+      DataType f[16] __attribute__((aligned(16)));
    } zControlPointCoordinates;
 #endif // _WIN32
 #else // _USE_SSE
-   DTYPE temp[4];
-   DTYPE zBasis[4];
-   DTYPE xControlPointCoordinates[64];
-   DTYPE yControlPointCoordinates[64];
-   DTYPE zControlPointCoordinates[64];
+   DataType temp[4];
+   DataType zBasis[4];
+   DataType xControlPointCoordinates[64];
+   DataType yControlPointCoordinates[64];
+   DataType zControlPointCoordinates[64];
    int coord;
 #endif // _USE_SSE
 
    const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
+   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+   DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
 
    const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-   DTYPE *fieldPtrX=static_cast<DTYPE *>(deformationField->data);
-   DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
-   DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
+   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
+   DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
+   DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
 
-   DTYPE basis, oldBasis=(DTYPE)(1.1);
+   DataType basis, oldBasis=(DataType)(1.1);
 
    int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index;
-   DTYPE real[3];
+   DataType real[3];
 
    if(composition)  // Composition of deformation fields
    {
@@ -1009,19 +1009,19 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
       else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk);
 #ifdef _USE_SSE
 #ifdef _WIN32
-      __declspec(align(16)) DTYPE xBasis[4];
-      __declspec(align(16)) DTYPE yBasis[4];
+      __declspec(align(16)) DataType xBasis[4];
+      __declspec(align(16)) DataType yBasis[4];
 #else
-      DTYPE xBasis[4] __attribute__((aligned(16)));
-      DTYPE yBasis[4] __attribute__((aligned(16)));
+      DataType xBasis[4] __attribute__((aligned(16)));
+      DataType yBasis[4] __attribute__((aligned(16)));
 #endif
 #else // _USE_SSE
-      DTYPE xBasis[4], yBasis[4];
+      DataType xBasis[4], yBasis[4];
 #endif // _USE_SSE
 
-      DTYPE voxel[3];
+      DataType voxel[3];
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
    private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
@@ -1081,31 +1081,31 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                   // The spline coefficients are computed
                   xPre=(int)reg_floor(voxel[0]);
-                  basis=voxel[0]-static_cast<DTYPE>(xPre);
+                  basis=voxel[0]-static_cast<DataType>(xPre);
                   --xPre;
                   if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
-                  else get_SplineBasisValues<DTYPE>(basis, xBasis);
+                  if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                  else get_SplineBasisValues<DataType>(basis, xBasis);
 
                   yPre=(int)reg_floor(voxel[1]);
-                  basis=voxel[1]-static_cast<DTYPE>(yPre);
+                  basis=voxel[1]-static_cast<DataType>(yPre);
                   --yPre;
                   if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
-                  else get_SplineBasisValues<DTYPE>(basis, yBasis);
+                  if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+                  else get_SplineBasisValues<DataType>(basis, yBasis);
 
                   zPre=(int)reg_floor(voxel[2]);
-                  basis=voxel[2]-static_cast<DTYPE>(zPre);
+                  basis=voxel[2]-static_cast<DataType>(zPre);
                   --zPre;
                   if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
-                  else get_SplineBasisValues<DTYPE>(basis, zBasis);
+                  if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+                  else get_SplineBasisValues<DataType>(basis, zBasis);
 
                   // The control point postions are extracted
                   if(xPre!=oldPreX || yPre!=oldPreY || zPre!=oldPreZ)
                   {
 #ifdef _USE_SSE
-                     get_GridValues<DTYPE>(xPre,
+                     get_GridValues<DataType>(xPre,
                                            yPre,
                                            zPre,
                                            splineControlPoint,
@@ -1119,7 +1119,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                            false // not a deformation field
                                            );
 #else // _USE_SSE
-                     get_GridValues<DTYPE>(xPre,
+                     get_GridValues<DataType>(xPre,
                                            yPre,
                                            zPre,
                                            splineControlPoint,
@@ -1181,7 +1181,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                      {
                         for(a=0; a<4; a++)
                         {
-                           DTYPE tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                           DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
                            real[0] += xControlPointCoordinates[coord] * tempValue;
                            real[1] += yControlPointCoordinates[coord] * tempValue;
                            real[2] += zControlPointCoordinates[coord] * tempValue;
@@ -1201,7 +1201,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    }//Composition of deformation
    else  // !composition
    {
-      DTYPE gridVoxelSpacing[3];
+      DataType gridVoxelSpacing[3];
       gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
       gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
       gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
@@ -1211,37 +1211,37 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
       union u1
       {
          __m128 m[4];
-         __declspec(align(16)) DTYPE f[16];
+         __declspec(align(16)) DataType f[16];
       } yzBasis;
       union u2
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } xyzBasis;
 #else // _WIN32
       union
       {
          __m128 m[4];
-         DTYPE f[16] __attribute__((aligned(16)));
+         DataType f[16] __attribute__((aligned(16)));
       } yzBasis;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } xyzBasis;
 #endif // _WIN32
 #else // _USE_SSE
-      DTYPE yzBasis[16], xyzBasis[64];
+      DataType yzBasis[16], xyzBasis[64];
 #endif // _USE_SSE
 
       // Assess if lookup table can be used
       if(gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && force_no_lut==false){
 
           // Assign a single array that will contain all coefficients
-         DTYPE *coefficients = (DTYPE *)malloc(125*64*sizeof(DTYPE));
+         DataType *coefficients = (DataType *)malloc(125*64*sizeof(DataType));
           // Compute and store all required coefficients
           int coeff_index;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \
@@ -1256,13 +1256,13 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #endif // _OPENMP
           for(z=0;z<5;++z){
              coeff_index=z*5*5*64;
-              basis=(DTYPE)z/5.;
-              if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
-              else get_SplineBasisValues<DTYPE>(basis, zBasis);
+              basis=(DataType)z/5.;
+              if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+              else get_SplineBasisValues<DataType>(basis, zBasis);
               for(y=0;y<5;++y){
-                  basis=(DTYPE)y/5.;
-                  if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-                  else get_SplineBasisValues<DTYPE>(basis, temp);
+                  basis=(DataType)y/5.;
+                  if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                  else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
                   val.f[0] = temp[0];
                   val.f[1] = temp[1];
@@ -1286,9 +1286,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #endif
 
                   for(x=0;x<5;++x){
-                      basis=(DTYPE)x/5.;
-                      if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-                      else get_SplineBasisValues<DTYPE>(basis, temp);
+                      basis=(DataType)x/5.;
+                      if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                      else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
 
                       val.f[0] = temp[0];
@@ -1322,7 +1322,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #if _USE_SSE
           int coord;
 #endif // USE_SSE
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
    private(x, y, z, a, b, c, xPre, yPre, zPre, real, \
@@ -1348,7 +1348,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   for(xPre=0; xPre<splineControlPoint->nx-3; xPre++)
                   {
 #if _USE_SSE
-                      get_GridValues<DTYPE>(xPre,
+                      get_GridValues<DataType>(xPre,
                                             yPre,
                                             zPre,
                                             splineControlPoint,
@@ -1362,7 +1362,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                             false // not a deformation field
                                             );
 #else // _USE_SSE
-                      get_GridValues<DTYPE>(xPre,
+                      get_GridValues<DataType>(xPre,
                                             yPre,
                                             zPre,
                                             splineControlPoint,
@@ -1451,7 +1451,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
       } // if spacings==5 voxels
       else{
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
@@ -1476,20 +1476,20 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
               index=z*deformationField->nx*deformationField->ny;
               oldBasis=1.1;
 
-              zPre=static_cast<int>(static_cast<DTYPE>(z)/gridVoxelSpacing[2]);
-              basis=static_cast<DTYPE>(z)/gridVoxelSpacing[2]-static_cast<DTYPE>(zPre);
+              zPre=static_cast<int>(static_cast<DataType>(z)/gridVoxelSpacing[2]);
+              basis=static_cast<DataType>(z)/gridVoxelSpacing[2]-static_cast<DataType>(zPre);
               if(basis<0) basis=0; //rounding error
-              if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
-              else get_SplineBasisValues<DTYPE>(basis, zBasis);
+              if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+              else get_SplineBasisValues<DataType>(basis, zBasis);
 
               for(y=0; y<deformationField->ny; y++)
               {
 
-                  yPre=static_cast<int>(static_cast<DTYPE>(y)/gridVoxelSpacing[1]);
-                  basis=static_cast<DTYPE>(y)/gridVoxelSpacing[1]-static_cast<DTYPE>(yPre);
+                  yPre=static_cast<int>(static_cast<DataType>(y)/gridVoxelSpacing[1]);
+                  basis=static_cast<DataType>(y)/gridVoxelSpacing[1]-static_cast<DataType>(yPre);
                   if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-                  else get_SplineBasisValues<DTYPE>(basis, temp);
+                  if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                  else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
                   val.f[0] = temp[0];
                   val.f[1] = temp[1];
@@ -1515,11 +1515,11 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                   for(x=0; x<deformationField->nx; x++)
                   {
 
-                      xPre=static_cast<int>(static_cast<DTYPE>(x)/gridVoxelSpacing[0]);
-                      basis=static_cast<DTYPE>(x)/gridVoxelSpacing[0]-static_cast<DTYPE>(xPre);
+                      xPre=static_cast<int>(static_cast<DataType>(x)/gridVoxelSpacing[0]);
+                      basis=static_cast<DataType>(x)/gridVoxelSpacing[0]-static_cast<DataType>(xPre);
                       if(basis<0) basis=0; //rounding error
-                      if(bspline) get_BSplineBasisValues<DTYPE>(basis, temp);
-                      else get_SplineBasisValues<DTYPE>(basis, temp);
+                      if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                      else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
 
                       val.f[0] = temp[0];
@@ -1545,7 +1545,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                       if(basis<=oldBasis || x==0)
                       {
 #ifdef _USE_SSE
-                          get_GridValues<DTYPE>(xPre,
+                          get_GridValues<DataType>(xPre,
                                                 yPre,
                                                 zPre,
                                                 splineControlPoint,
@@ -1559,7 +1559,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                 false // not a deformation field
                                                 );
 #else // _USE_SSE
-                          get_GridValues<DTYPE>(xPre,
+                          get_GridValues<DataType>(xPre,
                                                 yPre,
                                                 zPre,
                                                 splineControlPoint,
@@ -1745,7 +1745,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                                        nifti_image *voxelImage,
                                        float weight,
@@ -1755,13 +1755,13 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
 {
    const size_t nodeNumber = CalcVoxelNumber(*nodeImage);
    const size_t voxelNumber = CalcVoxelNumber(*voxelImage);
-   DTYPE *nodePtrX = static_cast<DTYPE *>(nodeImage->data);
-   DTYPE *nodePtrY = &nodePtrX[nodeNumber];
-   DTYPE *nodePtrZ = nullptr;
+   DataType *nodePtrX = static_cast<DataType *>(nodeImage->data);
+   DataType *nodePtrY = &nodePtrX[nodeNumber];
+   DataType *nodePtrZ = nullptr;
 
-   DTYPE *voxelPtrX = static_cast<DTYPE *>(voxelImage->data);
-   DTYPE *voxelPtrY = &voxelPtrX[voxelNumber];
-   DTYPE *voxelPtrZ = nullptr;
+   DataType *voxelPtrX = static_cast<DataType *>(voxelImage->data);
+   DataType *voxelPtrY = &voxelPtrX[voxelNumber];
+   DataType *voxelPtrZ = nullptr;
 
    if(nodeImage->nz>1)
    {
@@ -1836,23 +1836,23 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
             nodeCoord[0]=x;
             reg_mat44_mul(&transformation,nodeCoord,voxelCoord);
             // linear interpolation is performed
-            DTYPE basisX[2], basisY[2], basisZ[2]={0,0};
+            DataType basisX[2], basisY[2], basisZ[2]={0,0};
             int pre[3]=
             {
                static_cast<int>(reg_floor(voxelCoord[0])),
                static_cast<int>(reg_floor(voxelCoord[1])),
                static_cast<int>(reg_floor(voxelCoord[2]))
             };
-            basisX[1]=voxelCoord[0]-static_cast<DTYPE>(pre[0]);
-            basisX[0]=static_cast<DTYPE>(1) - basisX[1];
-            basisY[1]=voxelCoord[1]-static_cast<DTYPE>(pre[1]);
-            basisY[0]=static_cast<DTYPE>(1) - basisY[1];
+            basisX[1]=voxelCoord[0]-static_cast<DataType>(pre[0]);
+            basisX[0]=static_cast<DataType>(1) - basisX[1];
+            basisY[1]=voxelCoord[1]-static_cast<DataType>(pre[1]);
+            basisY[0]=static_cast<DataType>(1) - basisY[1];
             if(voxelPtrZ!=nullptr)
             {
-               basisZ[1]=voxelCoord[2]-static_cast<DTYPE>(pre[2]);
-               basisZ[0]=static_cast<DTYPE>(1) - basisZ[1];
+               basisZ[1]=voxelCoord[2]-static_cast<DataType>(pre[2]);
+               basisZ[0]=static_cast<DataType>(1) - basisZ[1];
             }
-            DTYPE interpolatedValue[3]= {0,0,0};
+            DataType interpolatedValue[3]= {0,0,0};
             for(int c=0; c<2; ++c)
             {
                int indexZ=pre[2]+c;
@@ -1870,7 +1870,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                            {
                               size_t index=(indexZ*voxelImage->ny+indexY) *
                                     voxelImage->nx+indexX;
-                              DTYPE linearWeight = basisX[a] * basisY[b];
+                              DataType linearWeight = basisX[a] * basisY[b];
                               if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c];
                               interpolatedValue[0] += linearWeight * voxelPtrX[index];
                               interpolatedValue[1] += linearWeight * voxelPtrY[index];
@@ -1882,7 +1882,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                   }
                }
             }
-            DTYPE reorientedValue[3]={0,0,0};
+            DataType reorientedValue[3]={0,0,0};
             reorientedValue[0] =
                   reorientation.m[0][0] * interpolatedValue[0] +
                   reorientation.m[1][0] * interpolatedValue[1] +
@@ -1898,17 +1898,17 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                      reorientation.m[2][2] * interpolatedValue[2] ;
             if(update)
             {
-               *nodePtrX += reorientedValue[0]*static_cast<DTYPE>(weight);
-               *nodePtrY += reorientedValue[1]*static_cast<DTYPE>(weight);
+               *nodePtrX += reorientedValue[0]*static_cast<DataType>(weight);
+               *nodePtrY += reorientedValue[1]*static_cast<DataType>(weight);
                if(voxelPtrZ!=nullptr)
-                  *nodePtrZ += reorientedValue[2]*static_cast<DTYPE>(weight);
+                  *nodePtrZ += reorientedValue[2]*static_cast<DataType>(weight);
             }
             else
             {
-               *nodePtrX = reorientedValue[0]*static_cast<DTYPE>(weight);
-               *nodePtrY = reorientedValue[1]*static_cast<DTYPE>(weight);
+               *nodePtrX = reorientedValue[0]*static_cast<DataType>(weight);
+               *nodePtrY = reorientedValue[1]*static_cast<DataType>(weight);
                if(voxelPtrZ!=nullptr)
-                  *nodePtrZ = reorientedValue[2]*static_cast<DTYPE>(weight);
+                  *nodePtrZ = reorientedValue[2]*static_cast<DataType>(weight);
             }
             ++nodePtrX;
             ++nodePtrY;
@@ -2542,7 +2542,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_compose2D(nifti_image *deformationField,
                             nifti_image *dfToUpdate,
                             int *mask)
@@ -2555,11 +2555,11 @@ void reg_defField_compose2D(nifti_image *deformationField,
    size_t i;
    const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2);
 #endif
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[DFVoxelNumber];
+   DataType *defPtrX = static_cast<DataType *>(deformationField->data);
+   DataType *defPtrY = &defPtrX[DFVoxelNumber];
 
-   DTYPE *resPtrX = static_cast<DTYPE *>(dfToUpdate->data);
-   DTYPE *resPtrY = &resPtrX[warVoxelNumber];
+   DataType *resPtrX = static_cast<DataType *>(dfToUpdate->data);
+   DataType *resPtrY = &resPtrX[warVoxelNumber];
 
    mat44 *df_real2Voxel=nullptr;
    mat44 *df_voxel2Real=nullptr;
@@ -2576,9 +2576,9 @@ void reg_defField_compose2D(nifti_image *deformationField,
 
    size_t index;
    int a, b, pre[2];
-   DTYPE realDefX, realDefY, voxelX, voxelY;
-   DTYPE defX, defY, relX[2], relY[2], basis;
-#if defined (_OPENMP)
+   DataType realDefX, realDefY, voxelX, voxelY;
+   DataType defX, defY, relX[2], relY[2], basis;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \
    deformationField, defPtrX, defPtrY, resPtrX, resPtrY) \
@@ -2603,9 +2603,9 @@ void reg_defField_compose2D(nifti_image *deformationField,
          // Linear interpolation to compute the new deformation
          pre[0]=(int)reg_floor(voxelX);
          pre[1]=(int)reg_floor(voxelY);
-         relX[1]=voxelX-(DTYPE)pre[0];
+         relX[1]=voxelX-(DataType)pre[0];
          relX[0]=1.f-relX[1];
-         relY[1]=voxelY-(DTYPE)pre[1];
+         relY[1]=voxelY-(DataType)pre[1];
          relY[0]=1.f-relY[1];
          realDefX=realDefY=0.f;
          for(b=0; b<2; ++b)
@@ -2624,7 +2624,7 @@ void reg_defField_compose2D(nifti_image *deformationField,
                else
                {
                   // Uses a sliding effect
-                  get_SlidedValues<DTYPE>(defX,
+                  get_SlidedValues<DataType>(defX,
                                           defY,
                                           pre[0]+a,
                         pre[1]+b,
@@ -2645,7 +2645,7 @@ void reg_defField_compose2D(nifti_image *deformationField,
    }// loop over every voxel
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_compose3D(nifti_image *deformationField,
                             nifti_image *dfToUpdate,
                             int *mask)
@@ -2660,13 +2660,13 @@ void reg_defField_compose3D(nifti_image *deformationField,
    const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate);
 #endif
 
-   DTYPE *defPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *defPtrY = &defPtrX[DFVoxelNumber];
-   DTYPE *defPtrZ = &defPtrY[DFVoxelNumber];
+   DataType *defPtrX = static_cast<DataType *>(deformationField->data);
+   DataType *defPtrY = &defPtrX[DFVoxelNumber];
+   DataType *defPtrZ = &defPtrY[DFVoxelNumber];
 
-   DTYPE *resPtrX = static_cast<DTYPE *>(dfToUpdate->data);
-   DTYPE *resPtrY = &resPtrX[warVoxelNumber];
-   DTYPE *resPtrZ = &resPtrY[warVoxelNumber];
+   DataType *resPtrX = static_cast<DataType *>(dfToUpdate->data);
+   DataType *resPtrY = &resPtrX[warVoxelNumber];
+   DataType *resPtrZ = &resPtrY[warVoxelNumber];
 
 #ifdef _WIN32
    __declspec(align(16))mat44 df_real2Voxel;
@@ -2687,10 +2687,10 @@ void reg_defField_compose3D(nifti_image *deformationField,
 
    size_t tempIndex, index;
    int a, b, c, currentX, currentY, currentZ, pre[3];
-   DTYPE realDef[3], voxel[3], basis, tempBasis;
-   DTYPE defX, defY, defZ, relX[2], relY[2], relZ[2];
+   DataType realDef[3], voxel[3], basis, tempBasis;
+   DataType defX, defY, defZ, relX[2], relY[2], relZ[2];
    bool inY, inZ;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \
    defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \
@@ -2726,11 +2726,11 @@ void reg_defField_compose3D(nifti_image *deformationField,
          pre[0]=static_cast<int>reg_floor(voxel[0]);
          pre[1]=static_cast<int>reg_floor(voxel[1]);
          pre[2]=static_cast<int>reg_floor(voxel[2]);
-         relX[1]=voxel[0]-static_cast<DTYPE>(pre[0]);
+         relX[1]=voxel[0]-static_cast<DataType>(pre[0]);
          relX[0]=1.-relX[1];
-         relY[1]=voxel[1]-static_cast<DTYPE>(pre[1]);
+         relY[1]=voxel[1]-static_cast<DataType>(pre[1]);
          relY[0]=1.-relY[1];
-         relZ[1]=voxel[2]-static_cast<DTYPE>(pre[2]);
+         relZ[1]=voxel[2]-static_cast<DataType>(pre[2]);
          relZ[0]=1.-relZ[1];
          realDef[0]=realDef[1]=realDef[2]=0.;
          for(c=0; c<2; ++c)
@@ -2759,7 +2759,7 @@ void reg_defField_compose3D(nifti_image *deformationField,
                   else
                   {
                      // Uses a sliding effect
-                     get_SlidedValues<DTYPE>(defX,
+                     get_SlidedValues<DataType>(defX,
                                              defY,
                                              defZ,
                                              currentX,
@@ -3335,7 +3335,7 @@ static void optimize(gsl_multimin_function *f, double *start, void *data, double
    nmsimplex_calc_center (&t, start);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defFieldInvert3D(nifti_image *inputDeformationField,
                           nifti_image *outputDeformationField,
                           float tolerance)
@@ -3369,8 +3369,8 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
    int i,x,y,z;
    double position[4], pars[4], arrayy[4][3];
    struct ddata dat;
-   DTYPE *outData;
-#if defined (_OPENMP)
+   DataType *outData;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(outputDeformationField,tolerance,outputVoxelNumber, \
    inputDeformationField, OutXYZMatrix, delta) \
@@ -3382,7 +3382,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
       for(i=0; i<4; ++i)              /* set up 2D array pointers */
          dat.arrayy[i]= arrayy[i];
 
-      outData = (DTYPE *)(outputDeformationField->data) +
+      outData = (DataType *)(outputDeformationField->data) +
             outputDeformationField->nx * outputDeformationField->ny * z;
 
       for(y=0; y<outputDeformationField->ny; ++y)
@@ -3455,7 +3455,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField,
 /* *************************************************************** */
 /* *************************************************************** */
 //HAVE TO BE CHECKED
-template<class DTYPE>
+template<class DataType>
 void reg_spline_cppComposition_2D(nifti_image *grid1,
                                   nifti_image *grid2,
                                   bool displacement1,
@@ -3472,32 +3472,32 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
    } val;
  #endif // _USE_SSE
 
-   DTYPE *outCPPPtrX = static_cast<DTYPE *>(grid2->data);
-   DTYPE *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)];
+   DataType *outCPPPtrX = static_cast<DataType *>(grid2->data);
+   DataType *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)];
 
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(grid1->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)];
+   DataType *controlPointPtrX = static_cast<DataType *>(grid1->data);
+   DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)];
 
-   DTYPE basis;
+   DataType basis;
 
  #ifdef _WIN32
-   __declspec(align(16)) DTYPE xBasis[4];
-   __declspec(align(16)) DTYPE yBasis[4];
+   __declspec(align(16)) DataType xBasis[4];
+   __declspec(align(16)) DataType yBasis[4];
  #if _USE_SSE
-   __declspec(align(16)) DTYPE xyBasis[16];
+   __declspec(align(16)) DataType xyBasis[16];
  #endif  //_USE_SSE
 
-   __declspec(align(16)) DTYPE xControlPointCoordinates[16];
-   __declspec(align(16)) DTYPE yControlPointCoordinates[16];
+   __declspec(align(16)) DataType xControlPointCoordinates[16];
+   __declspec(align(16)) DataType yControlPointCoordinates[16];
  #else // _WIN32
-   DTYPE xBasis[4] __attribute__((aligned(16)));
-   DTYPE yBasis[4] __attribute__((aligned(16)));
+   DataType xBasis[4] __attribute__((aligned(16)));
+   DataType yBasis[4] __attribute__((aligned(16)));
  #if _USE_SSE
-   DTYPE xyBasis[16] __attribute__((aligned(16)));
+   DataType xyBasis[16] __attribute__((aligned(16)));
  #endif  //_USE_SSE
 
-   DTYPE xControlPointCoordinates[16] __attribute__((aligned(16)));
-   DTYPE yControlPointCoordinates[16] __attribute__((aligned(16)));
+   DataType xControlPointCoordinates[16] __attribute__((aligned(16)));
+   DataType yControlPointCoordinates[16] __attribute__((aligned(16)));
  #endif // _WIN32
 
    size_t coord;
@@ -3518,10 +3518,10 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
       {
 
          // Get the control point actual position
-         DTYPE xReal = *outCPPPtrX;
-         DTYPE yReal = *outCPPPtrY;
-         DTYPE initialX=xReal;
-         DTYPE initialY=yReal;
+         DataType xReal = *outCPPPtrX;
+         DataType yReal = *outCPPPtrY;
+         DataType initialX=xReal;
+         DataType initialY=yReal;
          if(displacement2)
          {
             xReal +=
@@ -3535,30 +3535,30 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          }
 
          // Get the voxel based control point position in grid1
-         DTYPE xVoxel = matrix_real_to_voxel1->m[0][0]*xReal
+         DataType xVoxel = matrix_real_to_voxel1->m[0][0]*xReal
                + matrix_real_to_voxel1->m[0][1]*yReal
                + matrix_real_to_voxel1->m[0][3];
-         DTYPE yVoxel = matrix_real_to_voxel1->m[1][0]*xReal
+         DataType yVoxel = matrix_real_to_voxel1->m[1][0]*xReal
                + matrix_real_to_voxel1->m[1][1]*yReal
                + matrix_real_to_voxel1->m[1][3];
 
          // The spline coefficients are computed
          int xPre=(int)(reg_floor(xVoxel));
-         basis=(DTYPE)xVoxel-(DTYPE)xPre;
+         basis=(DataType)xVoxel-(DataType)xPre;
          xPre--;
          if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
-         else get_SplineBasisValues<DTYPE>(basis, xBasis);
+         if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+         else get_SplineBasisValues<DataType>(basis, xBasis);
 
          int yPre=(int)(reg_floor(yVoxel));
-         basis=(DTYPE)yVoxel-(DTYPE)yPre;
+         basis=(DataType)yVoxel-(DataType)yPre;
          yPre--;
          if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
-         else get_SplineBasisValues<DTYPE>(basis, yBasis);
+         if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+         else get_SplineBasisValues<DataType>(basis, yBasis);
 
          // The control points are stored
-         get_GridValues<DTYPE>(xPre,
+         get_GridValues<DataType>(xPre,
                                yPre,
                                grid1,
                                controlPointPtrX,
@@ -3605,7 +3605,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          {
             for(unsigned int a=0; a<4; a++)
             {
-               DTYPE tempValue = xBasis[a] * yBasis[b];
+               DataType tempValue = xBasis[a] * yBasis[b];
                xReal += xControlPointCoordinates[coord] * tempValue;
                yReal += yControlPointCoordinates[coord] * tempValue;
                coord++;
@@ -3625,7 +3625,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
 }
 /* *************************************************************** */
 //HAVE TO BE CHECKED
-template<class DTYPE>
+template<class DataType>
 void reg_spline_cppComposition_3D(nifti_image *grid1,
                                   nifti_image *grid2,
                                   bool displacement1,
@@ -3653,42 +3653,42 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
  #else
    int a, b, c;
    size_t coord;
-   DTYPE tempValue;
+   DataType tempValue;
  #endif
 
    const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2);
-   DTYPE *outCPPPtrX = static_cast<DTYPE *>(grid2->data);
-   DTYPE *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber];
-   DTYPE *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber];
+   DataType *outCPPPtrX = static_cast<DataType *>(grid2->data);
+   DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber];
+   DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber];
 
    const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1);
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(grid1->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber];
+   DataType *controlPointPtrX = static_cast<DataType *>(grid1->data);
+   DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber];
+   DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber];
 
-   DTYPE basis;
+   DataType basis;
 
  #ifdef _WIN32
-   __declspec(align(16)) DTYPE xBasis[4];
-   __declspec(align(16)) DTYPE yBasis[4];
-   __declspec(align(16)) DTYPE zBasis[4];
-   __declspec(align(16)) DTYPE xControlPointCoordinates[64];
-   __declspec(align(16)) DTYPE yControlPointCoordinates[64];
-   __declspec(align(16)) DTYPE zControlPointCoordinates[64];
+   __declspec(align(16)) DataType xBasis[4];
+   __declspec(align(16)) DataType yBasis[4];
+   __declspec(align(16)) DataType zBasis[4];
+   __declspec(align(16)) DataType xControlPointCoordinates[64];
+   __declspec(align(16)) DataType yControlPointCoordinates[64];
+   __declspec(align(16)) DataType zControlPointCoordinates[64];
  #else
-   DTYPE xBasis[4] __attribute__((aligned(16)));
-   DTYPE yBasis[4] __attribute__((aligned(16)));
-   DTYPE zBasis[4] __attribute__((aligned(16)));
-   DTYPE xControlPointCoordinates[64] __attribute__((aligned(16)));
-   DTYPE yControlPointCoordinates[64] __attribute__((aligned(16)));
-   DTYPE zControlPointCoordinates[64] __attribute__((aligned(16)));
+   DataType xBasis[4] __attribute__((aligned(16)));
+   DataType yBasis[4] __attribute__((aligned(16)));
+   DataType zBasis[4] __attribute__((aligned(16)));
+   DataType xControlPointCoordinates[64] __attribute__((aligned(16)));
+   DataType yControlPointCoordinates[64] __attribute__((aligned(16)));
+   DataType zControlPointCoordinates[64] __attribute__((aligned(16)));
  #endif
 
    int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld;
    int x, y, z;
    size_t index;
-   DTYPE xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ;
-   DTYPE xVoxel, yVoxel, zVoxel;
+   DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ;
+   DataType xVoxel, yVoxel, zVoxel;
 
    // read the xyz/ijk sform or qform, as appropriate
    mat44 *matrix_real_to_voxel1=nullptr;
@@ -3700,7 +3700,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
       matrix_voxel_to_real2=&(grid2->sto_xyz);
    else matrix_voxel_to_real2=&(grid2->qto_xyz);
 
- #if defined (_OPENMP)
+ #ifdef _OPENMP
  #ifdef _USE_SSE
  #pragma omp parallel for default(none) \
    shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \
@@ -3775,22 +3775,22 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
 
             // The spline coefficients are computed
             xPre=(int)(reg_floor(xVoxel));
-            basis=(DTYPE)xVoxel-(DTYPE)xPre;
+            basis=(DataType)xVoxel-(DataType)xPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, xBasis);
-            else get_SplineBasisValues<DTYPE>(basis, xBasis);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+            else get_SplineBasisValues<DataType>(basis, xBasis);
 
             yPre=(int)(reg_floor(yVoxel));
-            basis=(DTYPE)yVoxel-(DTYPE)yPre;
+            basis=(DataType)yVoxel-(DataType)yPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, yBasis);
-            else get_SplineBasisValues<DTYPE>(basis, yBasis);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+            else get_SplineBasisValues<DataType>(basis, yBasis);
 
             zPre=(int)(reg_floor(zVoxel));
-            basis=(DTYPE)zVoxel-(DTYPE)zPre;
+            basis=(DataType)zVoxel-(DataType)zPre;
             if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DTYPE>(basis, zBasis);
-            else get_SplineBasisValues<DTYPE>(basis, zBasis);
+            if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+            else get_SplineBasisValues<DataType>(basis, zBasis);
 
             --xPre;
             --yPre;
@@ -4269,7 +4269,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void compute_lie_bracket(nifti_image *img1,
                          nifti_image *img2,
                          nifti_image *res,
@@ -4298,17 +4298,17 @@ void compute_lie_bracket(nifti_image *img1,
       reg_getDisplacementFromDeformation(img1);
       reg_getDisplacementFromDeformation(img2);
 
-      DTYPE *resPtrX=static_cast<DTYPE *>(res->data);
-      DTYPE *resPtrY=&resPtrX[voxNumber];
-      DTYPE *img1DispPtrX=static_cast<DTYPE *>(img1->data);
-      DTYPE *img1DispPtrY=&img1DispPtrX[voxNumber];
-      DTYPE *img2DispPtrX=static_cast<DTYPE *>(img2->data);
-      DTYPE *img2DispPtrY=&img1DispPtrX[voxNumber];
+      DataType *resPtrX=static_cast<DataType *>(res->data);
+      DataType *resPtrY=&resPtrX[voxNumber];
+      DataType *img1DispPtrX=static_cast<DataType *>(img1->data);
+      DataType *img1DispPtrY=&img1DispPtrX[voxNumber];
+      DataType *img2DispPtrX=static_cast<DataType *>(img2->data);
+      DataType *img2DispPtrY=&img1DispPtrX[voxNumber];
       if(img1->nz>1)
       {
-         DTYPE *resPtrZ=&resPtrY[voxNumber];
-         DTYPE *img1DispPtrZ=&img1DispPtrY[voxNumber];
-         DTYPE *img2DispPtrZ=&img1DispPtrY[voxNumber];
+         DataType *resPtrZ=&resPtrY[voxNumber];
+         DataType *img1DispPtrZ=&img1DispPtrY[voxNumber];
+         DataType *img2DispPtrZ=&img1DispPtrY[voxNumber];
 
          for(size_t i=0; i<voxNumber; ++i)
          {
@@ -4394,9 +4394,9 @@ void compute_lie_bracket(nifti_image *img1,
                              true // bspline?
                              );
    // Create the data pointers
-   DTYPE *resPtr=static_cast<DTYPE *>(res->data);
-   DTYPE *one_twoPtr=static_cast<DTYPE *>(one_two->data);
-   DTYPE *two_onePtr=static_cast<DTYPE *>(two_one->data);
+   DataType *resPtr=static_cast<DataType *>(res->data);
+   DataType *one_twoPtr=static_cast<DataType *>(one_two->data);
+   DataType *two_onePtr=static_cast<DataType *>(two_one->data);
    // Compute the lie bracket value using difference of composition
 
  #ifdef _WIN32
@@ -4407,7 +4407,7 @@ void compute_lie_bracket(nifti_image *img1,
    voxNumber=res->nvox;
  #endif
 
- #if defined (_OPENMP)
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxNumber, resPtr, one_twoPtr, two_onePtr) \
    private(i)
@@ -4420,7 +4420,7 @@ void compute_lie_bracket(nifti_image *img1,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void compute_BCH_update1(nifti_image *img1, // current field
                          nifti_image *img2, // gradient
                          int type)
@@ -4428,7 +4428,7 @@ void compute_BCH_update1(nifti_image *img1, // current field
    // To update
    reg_print_msg_error("The compute_BCH_update function needs updating");
    reg_exit();
-   DTYPE *res=(DTYPE *)malloc(img1->nvox*sizeof(DTYPE));
+   DataType *res=(DataType *)malloc(img1->nvox*sizeof(DataType));
 
  #ifdef _WIN32
    long i;
@@ -4441,9 +4441,9 @@ void compute_BCH_update1(nifti_image *img1, // current field
    bool use_jac=false;
 
    // r <- 2 + 1
-   DTYPE *img1Ptr=static_cast<DTYPE *>(img1->data);
-   DTYPE *img2Ptr=static_cast<DTYPE *>(img2->data);
- #if defined (_OPENMP)
+   DataType *img1Ptr=static_cast<DataType *>(img1->data);
+   DataType *img2Ptr=static_cast<DataType *>(img2->data);
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxelNumber,img1Ptr,img2Ptr, res) \
    private(i)
@@ -4458,9 +4458,9 @@ void compute_BCH_update1(nifti_image *img1, // current field
 
       // r <- 2 + 1 + 0.5[2,1]
       nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false);
-      compute_lie_bracket<DTYPE>(img2, img1, lie_bracket_img2_img1, use_jac);
-      DTYPE *lie_bracket_img2_img1Ptr=static_cast<DTYPE *>(lie_bracket_img2_img1->data);
- #if defined (_OPENMP)
+      compute_lie_bracket<DataType>(img2, img1, lie_bracket_img2_img1, use_jac);
+      DataType *lie_bracket_img2_img1Ptr=static_cast<DataType *>(lie_bracket_img2_img1->data);
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxelNumber, res, lie_bracket_img2_img1Ptr) \
    private(i)
@@ -4472,9 +4472,9 @@ void compute_BCH_update1(nifti_image *img1, // current field
       {
          // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12
          nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
-         compute_lie_bracket<DTYPE>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
-         DTYPE *lie_bracket_img2_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img2_lie1->data);
- #if defined (_OPENMP)
+         compute_lie_bracket<DataType>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
+         DataType *lie_bracket_img2_lie1Ptr=static_cast<DataType *>(lie_bracket_img2_lie1->data);
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxelNumber, res, lie_bracket_img2_lie1Ptr) \
    private(i)
@@ -4486,9 +4486,9 @@ void compute_BCH_update1(nifti_image *img1, // current field
          {
             // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12
             nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
-            compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
-            DTYPE *lie_bracket_img1_lie1Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie1->data);
- #if defined (_OPENMP)
+            compute_lie_bracket<DataType>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
+            DataType *lie_bracket_img1_lie1Ptr=static_cast<DataType *>(lie_bracket_img1_lie1->data);
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxelNumber, res, lie_bracket_img1_lie1Ptr) \
    private(i)
@@ -4501,9 +4501,9 @@ void compute_BCH_update1(nifti_image *img1, // current field
             {
                // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24
                nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false);
-               compute_lie_bracket<DTYPE>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
-               DTYPE *lie_bracket_img1_lie2Ptr=static_cast<DTYPE *>(lie_bracket_img1_lie2->data);
- #if defined (_OPENMP)
+               compute_lie_bracket<DataType>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
+               DataType *lie_bracket_img1_lie2Ptr=static_cast<DataType *>(lie_bracket_img1_lie2->data);
+ #ifdef _OPENMP
  #pragma omp parallel for default(none) \
    shared(voxelNumber, res, lie_bracket_img1_lie2Ptr) \
    private(i)
@@ -4549,35 +4549,35 @@ void compute_BCH_update(nifti_image *img1, // current field
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
-void extractLine(int start, int end, int increment,const DTYPE *image, DTYPE *values)
+template <class DataType>
+void extractLine(int start, int end, int increment,const DataType *image, DataType *values)
 {
    size_t index = 0;
    for(int i=start; i<end; i+=increment) values[index++] = image[i];
 }
 /* *************************************************************** */
-template <class DTYPE>
-void restoreLine(int start, int end, int increment, DTYPE *image, const DTYPE *values)
+template <class DataType>
+void restoreLine(int start, int end, int increment, DataType *image, const DataType *values)
 {
    size_t index = 0;
    for(int i=start; i<end; i+=increment) image[i] = values[index++];
 }
 /* *************************************************************** */
-template <class DTYPE>
-void intensitiesToSplineCoefficients(DTYPE *values, int number)
+template <class DataType>
+void intensitiesToSplineCoefficients(DataType *values, int number)
 {
    // Border are set to zero
-   DTYPE pole = sqrt(3.0) - 2.0;
-   DTYPE currentPole = pole;
-   DTYPE currentOpposite = pow(pole,(DTYPE)(2.0*(DTYPE)number-1.0));
-   DTYPE sum=0;
+   DataType pole = sqrt(3.0) - 2.0;
+   DataType currentPole = pole;
+   DataType currentOpposite = pow(pole,(DataType)(2.0*(DataType)number-1.0));
+   DataType sum=0;
    for(int i=1; i<number; i++)
    {
       sum += (currentPole - currentOpposite) * values[i];
       currentPole *= pole;
       currentOpposite /= pole;
    }
-   values[0] = (DTYPE)((values[0] - pole*pole*(values[0] + sum)) / (1.0 - pow(pole,(DTYPE)(2.0*(double)number+2.0))));
+   values[0] = (DataType)((values[0] - pole*pole*(values[0] + sum)) / (1.0 - pow(pole,(DataType)(2.0*(double)number+2.0))));
 
    //other values forward
    for(int i=1; i<number; i++)
@@ -4585,7 +4585,7 @@ void intensitiesToSplineCoefficients(DTYPE *values, int number)
       values[i] += pole * values[i-1];
    }
 
-   DTYPE ipp=(DTYPE)(1.0-pole);
+   DataType ipp=(DataType)(1.0-pole);
    ipp*=ipp;
 
    //last value
@@ -4599,11 +4599,11 @@ void intensitiesToSplineCoefficients(DTYPE *values, int number)
    return;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img)
 {
    double *coeff=(double *)malloc(img->nvox*sizeof(double));
-   DTYPE *imgPtr=static_cast<DTYPE *>(img->data);
+   DataType *imgPtr=static_cast<DataType *>(img->data);
    for(size_t i=0; i<img->nvox; ++i)
       coeff[i]=imgPtr[i];
    for(int u=0; u<img->nu; ++u)
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index 14c913d7..d6a964a1 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -35,12 +35,12 @@
  * define the control point grid image space
  * @param spacingMillimeter Control point spacing along each axis
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_createControlPointGrid(nifti_image **controlPointGridImage,
                                 nifti_image *referenceImage,
                                 float *spacingMillimeter);
 
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
                                           nifti_image **backwardGridImage,
                                           nifti_image *referenceImage,
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 0869c416..0c21b34e 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -16,24 +16,24 @@
 
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void addJacobianGradientValues(mat33 jacobianMatrix,
                                double detJac,
-                               DTYPE basisX,
-                               DTYPE basisY,
-                               DTYPE *jacobianConstraint)
+                               DataType basisX,
+                               DataType basisY,
+                               DataType *jacobianConstraint)
 {
    jacobianConstraint[0] += detJac * (jacobianMatrix.m[1][1]*basisX - jacobianMatrix.m[1][0]*basisY);
    jacobianConstraint[1] += detJac * (jacobianMatrix.m[0][0]*basisY - jacobianMatrix.m[0][1]*basisX);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void addJacobianGradientValues(mat33 jacobianMatrix,
                                double detJac,
-                               DTYPE basisX,
-                               DTYPE basisY,
-                               DTYPE basisZ,
-                               DTYPE *jacobianConstraint)
+                               DataType basisX,
+                               DataType basisY,
+                               DataType basisZ,
+                               DataType *jacobianConstraint)
 {
    jacobianConstraint[0] += detJac * (
             basisX * (jacobianMatrix.m[1][1]*jacobianMatrix.m[2][2] - jacobianMatrix.m[1][2]*jacobianMatrix.m[2][1]) +
@@ -52,11 +52,11 @@ void addJacobianGradientValues(mat33 jacobianMatrix,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                                   nifti_image *referenceImage,
                                   mat33 *JacobianMatrices,
-                                  DTYPE *JacobianDeterminants,
+                                  DataType *JacobianDeterminants,
                                   bool approximation,
                                   bool useHeaderInformation)
 {
@@ -76,9 +76,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
    }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
-   DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber];
+   DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *coeffPtrY = &coeffPtrX[nodeNumber];
+   DataType *coeffPtrZ = &coeffPtrY[nodeNumber];
 
    // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
@@ -117,7 +117,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                   JacobianMatrices[index]=jacobianMatrix;
                if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[index] =
-                        static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                        static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                ++index;
             } // loop over x
          } // loop over y
@@ -134,13 +134,13 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
          useHeaderInformation=true;
 
       // Allocate variables that are used in both scenario
-      DTYPE gridVoxelSpacing[3]=
+      DataType gridVoxelSpacing[3]=
       {
          splineControlPoint->dx / referenceImage->dx,
          splineControlPoint->dy / referenceImage->dy,
          splineControlPoint->dz / referenceImage->dz
       };
-      DTYPE pre[3];
+      DataType pre[3];
 
       if(useHeaderInformation)
       {
@@ -198,7 +198,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                      JacobianMatrices[index]=jacobianMatrix;
                   if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[index] =
-                           static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                           static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                   ++index;
                } // x
             } // y
@@ -210,16 +210,16 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
          for(z=0; z<referenceImage->nz; z++)
          {
             index=z*referenceImage->nx*referenceImage->ny;
-            pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2])+1;
+            pre[2]=(int)((DataType)z/gridVoxelSpacing[2])+1;
 
             for(y=0; y<referenceImage->ny; y++)
             {
-               pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1])+1;
+               pre[1]=(int)((DataType)y/gridVoxelSpacing[1])+1;
 
                for(x=0; x<referenceImage->nx; x++)
                {
 
-                  pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0])+1;
+                  pre[0]=(int)((DataType)x/gridVoxelSpacing[0])+1;
                   int controlPoint_index=(pre[2]*splineControlPoint->ny+pre[1])*splineControlPoint->nx+pre[0];
 
                   jacobianMatrix.m[0][0] = (coeffPtrX[controlPoint_index+1] - coeffPtrX[controlPoint_index]);
@@ -242,7 +242,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                      JacobianMatrices[index]=jacobianMatrix;
                   if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[index] =
-                           static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                           static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                   ++index;
                } // loop over x
             } // loop over y
@@ -253,11 +253,11 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                            nifti_image *referenceImage,
                            mat33 *JacobianMatrices,
-                           DTYPE *JacobianDeterminants,
+                           DataType *JacobianDeterminants,
                            bool approximation,
                            bool useHeaderInformation)
 {
@@ -277,8 +277,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
    }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
-   DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
+   DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *coeffPtrY = &coeffPtrX[nodeNumber];
 
    // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
@@ -295,9 +295,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
       // The Jacobian information is only computed at the control point positions
       // Note that the header information is not used here
       float basisX[9], basisY[9];
-      DTYPE coeffX[9], coeffY[9];
-	  DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
-	  DTYPE first[3] = { -0.5f, 0.f, 0.5f };
+      DataType coeffX[9], coeffY[9];
+	  DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
+	  DataType first[3] = { -0.5f, 0.f, 0.5f };
       // There are six different values taken into account
       int coord=0;
       for(int b=0; b<3; ++b)
@@ -322,7 +322,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
          for(x=1; x<splineControlPoint->nx-1; x++)
          {
 
-            get_GridValues<DTYPE>(x-1,
+            get_GridValues<DataType>(x-1,
                                   y-1,
                                   splineControlPoint,
                                   coeffPtrX,
@@ -347,7 +347,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                JacobianMatrices[voxelIndex]=jacobianMatrix;
             if(JacobianDeterminants!=nullptr)
                JacobianDeterminants[voxelIndex] =
-                     static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                     static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
             ++voxelIndex;
          } // loop over x
       } // loop over y
@@ -365,9 +365,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
       // Allocate variables that are used in both scenarii
       int pre[2], oldPre[2];
       int coord, incr0, incr1;
-      DTYPE xBasis[4], xFirst[4], yBasis[4], yFirst[4];
-      DTYPE basisX[16], basisY[16];
-      DTYPE coeffX[16], coeffY[16];
+      DataType xBasis[4], xFirst[4], yBasis[4], yFirst[4];
+      DataType basisX[16], basisY[16];
+      DataType coeffX[16], coeffY[16];
       size_t voxelIndex;
 
       if(useHeaderInformation)
@@ -405,9 +405,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                pre[1]=static_cast<int>(reg_floor(gridCoord[1]));
                // Compute the basis values and their first derivatives
                basis = gridCoord[0] - pre[0];
-               get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
+               get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
                basis = gridCoord[1] - pre[1];
-               get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
+               get_BSplineBasisValues<DataType>(basis, yBasis, yFirst);
                // Compute the 16 basis values and the corresponding derivatives
 
                coord=0;
@@ -424,7 +424,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1])
                {
 
-                  get_GridValues<DTYPE>(pre[0]-1,
+                  get_GridValues<DataType>(pre[0]-1,
                         pre[1]-1,
                         splineControlPoint,
                         coeffPtrX,
@@ -454,15 +454,15 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
                if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
-                        static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                        static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
             } // x
          } // y
       }
       else
       {
-         DTYPE basis;
-         DTYPE gridVoxelSpacing[2]=
+         DataType basis;
+         DataType gridVoxelSpacing[2]=
          {
             splineControlPoint->dx / referenceImage->dx,
             splineControlPoint->dy / referenceImage->dy
@@ -473,18 +473,18 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
             voxelIndex=y*referenceImage->nx;
             oldPre[0]=oldPre[1]=999999;
 
-            pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]);
-            basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1];
+            pre[1]=(int)((DataType)y/gridVoxelSpacing[1]);
+            basis=(DataType)y/gridVoxelSpacing[1]-(DataType)pre[1];
             if(basis<0) basis=0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
+            get_BSplineBasisValues<DataType>(basis, yBasis, yFirst);
 
             for(x=0; x<referenceImage->nx; x++)
             {
 
-               pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]);
-               basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0];
+               pre[0]=(int)((DataType)x/gridVoxelSpacing[0]);
+               basis=(DataType)x/gridVoxelSpacing[0]-(DataType)pre[0];
                if(basis<0) basis=0; //rounding error
-               get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
+               get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
 
                coord=0;
                for(incr0=0; incr0<4; ++incr0)
@@ -499,7 +499,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 
                if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1])
                {
-                  get_GridValues<DTYPE>(pre[0],
+                  get_GridValues<DataType>(pre[0],
                         pre[1],
                         splineControlPoint,
                         coeffPtrX,
@@ -527,7 +527,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
                if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
-                        static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                        static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
             } // loop over x
          } // loop over y
@@ -536,11 +536,11 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
    return;
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            nifti_image *referenceImage,
                            mat33 *JacobianMatrices,
-                           DTYPE *JacobianDeterminants,
+                           DataType *JacobianDeterminants,
                            bool approximation,
                            bool useHeaderInformation)
 {
@@ -560,9 +560,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
    }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *coeffPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *coeffPtrY = &coeffPtrX[nodeNumber];
-   DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber];
+   DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *coeffPtrY = &coeffPtrX[nodeNumber];
+   DataType *coeffPtrZ = &coeffPtrY[nodeNumber];
 
    // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
@@ -579,11 +579,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
       // The Jacobian information is only computed at the control point positions
       // Note that the header information is not used here
       float basisX[27], basisY[27], basisZ[27];
-      DTYPE coeffX[27], coeffY[27], coeffZ[27];
-	  DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
-	  DTYPE first[3] = { -0.5f, 0.f, 0.5f };
+      DataType coeffX[27], coeffY[27], coeffZ[27];
+	  DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
+	  DataType first[3] = { -0.5f, 0.f, 0.5f };
       // There are six different values taken into account
-      DTYPE tempX[9], tempY[9], tempZ[9];
+      DataType tempX[9], tempY[9], tempZ[9];
       int coord=0;
       for(int c=0; c<3; c++)
       {
@@ -621,7 +621,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
             for(x=1; x<splineControlPoint->nx-1; x++)
             {
 
-               get_GridValues<DTYPE>(x-1,
+               get_GridValues<DataType>(x-1,
                                      y-1,
                                      z-1,
                                      splineControlPoint,
@@ -653,7 +653,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   JacobianMatrices[voxelIndex]=jacobianMatrix;
                if(JacobianDeterminants!=nullptr)
                   JacobianDeterminants[voxelIndex] =
-                        static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                        static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                ++voxelIndex;
             } // loop over x
          } // loop over y
@@ -671,7 +671,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
       // Allocate variables that are used in both scenarii
       int pre[3], oldPre[3], incr0;
-      DTYPE basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4];
+      DataType basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4];
 #if _USE_SSE
       union
       {
@@ -684,63 +684,63 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
       union
       {
          __m128 m[4];
-         __declspec(align(16)) DTYPE f[16];
+         __declspec(align(16)) DataType f[16];
       } tempX;
       union
       {
          __m128 m[4];
-         __declspec(align(16)) DTYPE f[16];
+         __declspec(align(16)) DataType f[16];
       } tempY;
       union
       {
          __m128 m[4];
-         __declspec(align(16)) DTYPE f[16];
+         __declspec(align(16)) DataType f[16];
       } tempZ;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } basisX;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } basisY;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } basisZ;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } coeffX;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } coeffY;
       union
       {
          __m128 m[16];
-         __declspec(align(16)) DTYPE f[64];
+         __declspec(align(16)) DataType f[64];
       } coeffZ;
 #else // _WINDOWS
       union
       {
          __m128 m[4];
-         DTYPE f[16] __attribute__((aligned(16)));
+         DataType f[16] __attribute__((aligned(16)));
       } tempX;
       union
       {
          __m128 m[4];
-         DTYPE f[16] __attribute__((aligned(16)));
+         DataType f[16] __attribute__((aligned(16)));
       } tempY;
       union
       {
          __m128 m[4];
-         DTYPE f[16] __attribute__((aligned(16)));
+         DataType f[16] __attribute__((aligned(16)));
       } tempZ;
       memset(&(tempX.f[0]),0,16*sizeof(float));
       memset(&(tempY.f[0]),0,16*sizeof(float));
@@ -748,41 +748,41 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } basisX;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } basisY;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } basisZ;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } coeffX;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } coeffY;
       union
       {
          __m128 m[16];
-         DTYPE f[64] __attribute__((aligned(16)));
+         DataType f[64] __attribute__((aligned(16)));
       } coeffZ;
 #endif // _WINDOWS
 #else
       int coord, incr1, incr2;
-      DTYPE tempX[16], tempY[16], tempZ[16];
-      DTYPE basisX[64], basisY[64], basisZ[64];
-      DTYPE coeffX[64], coeffY[64], coeffZ[64];
+      DataType tempX[16], tempY[16], tempZ[16];
+      DataType basisX[64], basisY[64], basisZ[64];
+      DataType coeffX[64], coeffY[64], coeffZ[64];
 #endif
-      DTYPE gridVoxelSpacing[3]=
+      DataType gridVoxelSpacing[3]=
       {
          splineControlPoint->dx / referenceImage->dx,
          splineControlPoint->dy / referenceImage->dy,
@@ -828,11 +828,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   pre[2]=static_cast<int>(reg_floor(gridCoord[2]));
                   // Compute the basis values and their first derivatives
                   basis = gridCoord[0] - pre[0];
-                  get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
+                  get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
                   basis = gridCoord[1] - pre[1];
-                  get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
+                  get_BSplineBasisValues<DataType>(basis, yBasis, yFirst);
                   basis = gridCoord[2] - pre[2];
-                  get_BSplineBasisValues<DTYPE>(basis, zBasis, zFirst);
+                  get_BSplineBasisValues<DataType>(basis, zBasis, zFirst);
                   // Compute the 64 basis values and the corresponding derivatives
 #if _USE_SSE
                   val.f[0]=yBasis[0];
@@ -892,7 +892,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2])
                   {
 #ifdef _USE_SSE
-                     get_GridValues<DTYPE>(pre[0]-1,
+                     get_GridValues<DataType>(pre[0]-1,
                            pre[1]-1,
                            pre[2]-1,
                            splineControlPoint,
@@ -906,7 +906,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false // not disp
                            );
 #else // _USE_SSE
-                     get_GridValues<DTYPE>(pre[0]-1,
+                     get_GridValues<DataType>(pre[0]-1,
                            pre[1]-1,
                            pre[2]-1,
                            splineControlPoint,
@@ -992,7 +992,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                      JacobianMatrices[voxelIndex]=jacobianMatrix;
                   if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[voxelIndex] =
-                           static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                           static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                   ++voxelIndex;
                } // x
             } // y
@@ -1031,18 +1031,18 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
             voxelIndex=z*referenceImage->nx*referenceImage->ny;
             oldPre[0]=oldPre[1]=oldPre[2]=999999;
 
-            pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2]);
-            basis=(DTYPE)z/gridVoxelSpacing[2]-(DTYPE)pre[2];
+            pre[2]=(int)((DataType)z/gridVoxelSpacing[2]);
+            basis=(DataType)z/gridVoxelSpacing[2]-(DataType)pre[2];
             if(basis<0) basis=0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, zBasis, zFirst);
+            get_BSplineBasisValues<DataType>(basis, zBasis, zFirst);
 
             for(y=0; y<referenceImage->ny; y++)
             {
 
-               pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]);
-               basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1];
+               pre[1]=(int)((DataType)y/gridVoxelSpacing[1]);
+               basis=(DataType)y/gridVoxelSpacing[1]-(DataType)pre[1];
                if(basis<0) basis=0; //rounding error
-               get_BSplineBasisValues<DTYPE>(basis, yBasis, yFirst);
+               get_BSplineBasisValues<DataType>(basis, yBasis, yFirst);
 
 #if _USE_SSE
                val.f[0]=yBasis[0];
@@ -1079,10 +1079,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                for(x=0; x<referenceImage->nx; x++)
                {
 
-                  pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]);
-                  basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0];
+                  pre[0]=(int)((DataType)x/gridVoxelSpacing[0]);
+                  basis=(DataType)x/gridVoxelSpacing[0]-(DataType)pre[0];
                   if(basis<0) basis=0; //rounding error
-                  get_BSplineBasisValues<DTYPE>(basis, xBasis, xFirst);
+                  get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
 
 #if _USE_SSE
                   val.f[0]=xBasis[0];
@@ -1121,7 +1121,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2])
                   {
 #ifdef _USE_SSE
-                     get_GridValues<DTYPE>(pre[0],
+                     get_GridValues<DataType>(pre[0],
                            pre[1],
                            pre[2],
                            splineControlPoint,
@@ -1135,7 +1135,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false // not disp
                            );
 #else // _USE_SSE
-                     get_GridValues<DTYPE>(pre[0],
+                     get_GridValues<DataType>(pre[0],
                            pre[1],
                            pre[2],
                            splineControlPoint,
@@ -1219,7 +1219,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                      JacobianMatrices[voxelIndex]=jacobianMatrix;
                   if(JacobianDeterminants!=nullptr)
                      JacobianDeterminants[voxelIndex] =
-                           static_cast<DTYPE>(nifti_mat33_determ(jacobianMatrix));
+                           static_cast<DataType>(nifti_mat33_determ(jacobianMatrix));
                   ++voxelIndex;
                } // loop over x
             } // loop over y
@@ -1344,7 +1344,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                                       nifti_image *referenceImage,
                                       nifti_image *gradientImage,
@@ -1359,10 +1359,10 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    else arraySize = CalcVoxelNumber(*referenceImage, 2);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
-   DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE));
+   DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType));
 
    // Compute all the required Jacobian determinants and matrices
-   reg_cubic_spline_jacobian2D<DTYPE>(splineControlPoint,
+   reg_cubic_spline_jacobian2D<DataType>(splineControlPoint,
                                 referenceImage,
                                 jacobianMatrices,
                                 jacobianDeterminant,
@@ -1370,8 +1370,8 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                                 useHeaderInformation);
 
    // The gradient are now computed for every control point
-   DTYPE *gradientImagePtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)];
+   DataType *gradientImagePtrX = static_cast<DataType *>(gradientImage->data);
+   DataType *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)];
 
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
@@ -1384,19 +1384,19 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    if(approximation)
       jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2);
    else jacobianNumber = arraySize;
-   DTYPE ratio[2] =
+   DataType ratio[2] =
    {
-      referenceImage->dx*weight / ((DTYPE)jacobianNumber*splineControlPoint->dx),
-      referenceImage->dy*weight / ((DTYPE)jacobianNumber*splineControlPoint->dy)
+      referenceImage->dx*weight / ((DataType)jacobianNumber*splineControlPoint->dx),
+      referenceImage->dy*weight / ((DataType)jacobianNumber*splineControlPoint->dy)
    };
 
    // Only information at the control point position is considered
    if(approximation)
    {
-      DTYPE basisX[9], basisY[9];
-	  DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
-	  DTYPE first[3] = { -0.5f, 0.f, 0.5f };
-      DTYPE jacobianConstraint[2], detJac;
+      DataType basisX[9], basisY[9];
+	  DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f };
+	  DataType first[3] = { -0.5f, 0.f, 0.5f };
+      DataType jacobianConstraint[2], detJac;
       size_t coord=0, jacIndex, index;
       int x, y, pixelX, pixelY;
       // INVERTED ON PURPOSE
@@ -1450,7 +1450,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 #else
                            detJac = (log(detJac)>0?1.0:-1.0) / detJac;
 #endif
-                           addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                           addJacobianGradientValues<DataType>(jacobianMatrix,
                                                             detJac,
                                                             basisX[coord],
                                                             basisY[coord],
@@ -1488,17 +1488,17 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
       else
       {
          // assumes that the reference and grid image are aligned
-         DTYPE gridVoxelSpacing[2];
+         DataType gridVoxelSpacing[2];
          gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx;
          gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy;
 
-         DTYPE xBasis, yBasis, basis;
-         DTYPE xFirst, yFirst;
-         DTYPE basisValues[2];
+         DataType xBasis, yBasis, basis;
+         DataType xFirst, yFirst;
+         DataType basisValues[2];
          unsigned int jacIndex;
 
          int x, y, xPre, yPre, pixelX, pixelY, index;
-         DTYPE jacobianConstraint[2];
+         DataType jacobianConstraint[2];
          double detJac;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -1523,9 +1523,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                   if(pixelY>-1 && pixelY<referenceImage->ny)
                   {
 
-                     yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]);
-                     basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre;
-                     get_BSplineBasisValue<DTYPE>(basis,y-yPre,yBasis,yFirst);
+                     yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]);
+                     basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
+                     get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
                      jacIndex = pixelY*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]);
 
@@ -1536,9 +1536,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 
                            detJac = jacobianDeterminant[jacIndex];
 
-                           xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]);
-                           basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
-                           get_BSplineBasisValue<DTYPE>(basis,x-xPre,xBasis,xFirst);
+                           xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]);
+                           basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre;
+                           get_BSplineBasisValue<DataType>(basis,x-xPre,xBasis,xFirst);
 
                            if(detJac>0 && (xBasis!=0 ||xFirst!=0))
                            {
@@ -1554,7 +1554,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 #else
                               detJac = (log(detJac)>0?1.0:-1.0) / detJac;
 #endif
-                              addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                              addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                detJac,
                                                                basisValues[0],
                                     basisValues[1],
@@ -1582,7 +1582,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    free(jacobianDeterminant);
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                       nifti_image *referenceImage,
                                       nifti_image *gradientImage,
@@ -1597,10 +1597,10 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    else arraySize = CalcVoxelNumber(*referenceImage);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
-   DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE));
+   DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType));
 
    // Compute all the required Jacobian determinants and matrices
-   reg_cubic_spline_jacobian3D<DTYPE>(splineControlPoint,
+   reg_cubic_spline_jacobian3D<DataType>(splineControlPoint,
                                 referenceImage,
                                 jacobianMatrices,
                                 jacobianDeterminant,
@@ -1609,9 +1609,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 
    // The gradient are now computed for every control point
    const size_t voxelNumber = CalcVoxelNumber(*gradientImage);
-   DTYPE *gradientImagePtrX = static_cast<DTYPE *>(gradientImage->data);
-   DTYPE *gradientImagePtrY = &gradientImagePtrX[voxelNumber];
-   DTYPE *gradientImagePtrZ = &gradientImagePtrY[voxelNumber];
+   DataType *gradientImagePtrX = static_cast<DataType *>(gradientImage->data);
+   DataType *gradientImagePtrY = &gradientImagePtrX[voxelNumber];
+   DataType *gradientImagePtrZ = &gradientImagePtrY[voxelNumber];
 
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
@@ -1624,20 +1624,20 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    if(approximation)
       jacobianNumber = CalcVoxelNumber(*splineControlPoint);
    else jacobianNumber = arraySize;
-   DTYPE ratio[3] =
+   DataType ratio[3] =
    {
-      referenceImage->dx*weight / ((DTYPE)jacobianNumber*splineControlPoint->dx),
-      referenceImage->dy*weight / ((DTYPE)jacobianNumber*splineControlPoint->dy),
-      referenceImage->dz*weight / ((DTYPE)jacobianNumber*splineControlPoint->dz)
+      referenceImage->dx*weight / ((DataType)jacobianNumber*splineControlPoint->dx),
+      referenceImage->dy*weight / ((DataType)jacobianNumber*splineControlPoint->dy),
+      referenceImage->dz*weight / ((DataType)jacobianNumber*splineControlPoint->dz)
    };
 
    // Only information at the control point position is considered
    if(approximation)
    {
-      DTYPE basisX[27], basisY[27], basisZ[27];
-      DTYPE normal[3]= {1.f/6.f, 2.f/3.f, 1.f/6.f};
-      DTYPE first[3]= {-0.5f, 0.f, 0.5f};
-      DTYPE jacobianConstraint[3], detJac;
+      DataType basisX[27], basisY[27], basisZ[27];
+      DataType normal[3]= {1.f/6.f, 2.f/3.f, 1.f/6.f};
+      DataType first[3]= {-0.5f, 0.f, 0.5f};
+      DataType jacobianConstraint[3], detJac;
       size_t coord=0, jacIndex, index;
       int x, y, z, pixelX, pixelY, pixelZ;
       // INVERTED ON PURPOSE
@@ -1702,7 +1702,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 #else
                                     detJac = (log(detJac)>0?1.0:-1.0) / detJac;
 #endif
-                                    addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                                    addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                      detJac,
                                                                      basisX[coord],
                                                                      basisY[coord],
@@ -1751,18 +1751,18 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
       else
       {
          // assumes that the reference and grid image are aligned
-         DTYPE gridVoxelSpacing[3];
+         DataType gridVoxelSpacing[3];
          gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx;
          gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy;
          gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz;
 
-         DTYPE xBasis, yBasis, zBasis, basis;
-         DTYPE xFirst, yFirst, zFirst;
-         DTYPE basisValues[3];
+         DataType xBasis, yBasis, zBasis, basis;
+         DataType xFirst, yFirst, zFirst;
+         DataType basisValues[3];
          unsigned int jacIndex;
 
          int x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, index;
-         DTYPE jacobianConstraint[3];
+         DataType jacobianConstraint[3];
          double detJac;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -1788,18 +1788,18 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                      if(pixelZ>-1 && pixelZ<referenceImage->nz)
                      {
 
-                        zPre=(int)((DTYPE)pixelZ/gridVoxelSpacing[2]);
-                        basis=(DTYPE)pixelZ/gridVoxelSpacing[2]-(DTYPE)zPre;
-                        get_BSplineBasisValue<DTYPE>(basis,z-zPre,zBasis,zFirst);
+                        zPre=(int)((DataType)pixelZ/gridVoxelSpacing[2]);
+                        basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre;
+                        get_BSplineBasisValue<DataType>(basis,z-zPre,zBasis,zFirst);
 
                         for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
                         {
                            if(pixelY>-1 && pixelY<referenceImage->ny && (zFirst!=0 || zBasis!=0))
                            {
 
-                              yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]);
-                              basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre;
-                              get_BSplineBasisValue<DTYPE>(basis,y-yPre,yBasis,yFirst);
+                              yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]);
+                              basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
+                              get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
                               jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]);
 
@@ -1810,9 +1810,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 
                                     detJac = jacobianDeterminant[jacIndex];
 
-                                    xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]);
-                                    basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
-                                    get_BSplineBasisValue<DTYPE>(basis,x-xPre,xBasis,xFirst);
+                                    xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]);
+                                    basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre;
+                                    get_BSplineBasisValue<DataType>(basis,x-xPre,xBasis,xFirst);
 
                                     if(detJac>0 && (xBasis!=0 ||xFirst!=0))
                                     {
@@ -1829,7 +1829,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 #else
                                        detJac = (log(detJac)>0?1.0:-1.0) / detJac;
 #endif
-                                       addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                                       addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                         detJac,
                                                                         basisValues[0],
                                              basisValues[1],
@@ -1938,7 +1938,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                                    nifti_image *referenceImage,
                                    bool approximation,
@@ -1958,7 +1958,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    else jacobianNumber = CalcVoxelNumber(*referenceImage, 2);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
-   DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE));
+   DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType));
 
    reg_cubic_spline_jacobian2D(splineControlPoint,
                          referenceImage,
@@ -1997,11 +1997,11 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber];
+   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *controlPointPtrY = &controlPointPtrX[nodeNumber];
 
-   DTYPE basisValues[2], foldingCorrection[2], gradient[2], norm;
-   DTYPE xBasis=0, yBasis=0, xFirst=0, yFirst=0;
+   DataType basisValues[2], foldingCorrection[2], gradient[2], norm;
+   DataType xBasis=0, yBasis=0, xFirst=0, yFirst=0;
    int x, y, id, pixelX, pixelY, jacIndex;
    bool correctFolding;
    double detJac;
@@ -2043,8 +2043,8 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 
                         if(detJac<=0)
                         {
-                           get_BSplineBasisValue<DTYPE>(0, y-pixelY+1, yBasis, yFirst);
-                           get_BSplineBasisValue<DTYPE>(0, x-pixelX+1, xBasis, xFirst);
+                           get_BSplineBasisValue<DataType>(0, y-pixelY+1, yBasis, yFirst);
+                           get_BSplineBasisValue<DataType>(0, x-pixelX+1, xBasis, xFirst);
 
                            basisValues[0] = xFirst * yBasis ;
                            basisValues[1] = xBasis * yFirst ;
@@ -2052,7 +2052,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                            jacobianMatrix = jacobianMatrices[jacIndex];
 
                            correctFolding=true;
-                           addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                           addJacobianGradientValues<DataType>(jacobianMatrix,
                                                             1.0,
                                                             basisValues[0],
                                  basisValues[1],
@@ -2068,14 +2068,14 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                      + reorientation.m[0][1]*foldingCorrection[1];
                gradient[1] = reorientation.m[1][0]*foldingCorrection[0]
                      + reorientation.m[1][1]*foldingCorrection[1];
-               norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0]
+               norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0]
                      + gradient[1]*gradient[1]));
 
-               if(norm>(DTYPE)0)
+               if(norm>(DataType)0)
                {
                   id = y*splineControlPoint->nx+x;
-                  controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
-                  controlPointPtrY[id] += (DTYPE)(gradient[1]/norm);
+                  controlPointPtrX[id] += (DataType)(gradient[0]/norm);
+                  controlPointPtrY[id] += (DataType)(gradient[1]/norm);
                }
             }
          }
@@ -2089,7 +2089,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
          useHeaderInformation=true;
 
       int xPre, yPre;
-      DTYPE basis;
+      DataType basis;
 
       if(useHeaderInformation)
       {
@@ -2099,7 +2099,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
       else
       {
          // The grid and reference image are expected to be aligned
-         DTYPE gridVoxelSpacing[2];
+         DataType gridVoxelSpacing[2];
          gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx;
          gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy;
 
@@ -2139,19 +2139,19 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 
                               jacobianMatrix = jacobianMatrices[jacIndex];
 
-                              yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]);
-                              basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre;
-                              get_BSplineBasisValue<DTYPE>(basis, y-yPre,yBasis,yFirst);
+                              yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]);
+                              basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
+                              get_BSplineBasisValue<DataType>(basis, y-yPre,yBasis,yFirst);
 
-                              xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]);
-                              basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
-                              get_BSplineBasisValue<DTYPE>(basis, x-xPre,xBasis,xFirst);
+                              xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]);
+                              basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre;
+                              get_BSplineBasisValue<DataType>(basis, x-xPre,xBasis,xFirst);
 
                               basisValues[0]= xFirst * yBasis ;
                               basisValues[1]= xBasis * yFirst ;
 
                               correctFolding=true;
-                              addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                              addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                1.0,
                                                                basisValues[0],
                                     basisValues[1],
@@ -2168,14 +2168,14 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
                         + reorientation.m[0][1]*foldingCorrection[1];
                   gradient[1] = reorientation.m[1][0]*foldingCorrection[0]
                         + reorientation.m[1][1]*foldingCorrection[1];
-                  norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] +
+                  norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] +
                         gradient[1]*gradient[1]));
 
                   if(norm>0)
                   {
                      id = y*splineControlPoint->nx+x;
-                     controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
-                     controlPointPtrY[id] += (DTYPE)(gradient[1]/norm);
+                     controlPointPtrX[id] += (DataType)(gradient[0]/norm);
+                     controlPointPtrY[id] += (DataType)(gradient[1]/norm);
                   }
                }
             }
@@ -2187,7 +2187,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                    nifti_image *referenceImage,
                                    bool approximation,
@@ -2207,7 +2207,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    else jacobianNumber = CalcVoxelNumber(*referenceImage);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
-   DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE));
+   DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType));
 
    reg_cubic_spline_jacobian3D(splineControlPoint,
                          referenceImage,
@@ -2246,12 +2246,12 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-   DTYPE *controlPointPtrX = static_cast<DTYPE *>(splineControlPoint->data);
-   DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber];
-   DTYPE *controlPointPtrZ = &controlPointPtrY[nodeNumber];
+   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
+   DataType *controlPointPtrY = &controlPointPtrX[nodeNumber];
+   DataType *controlPointPtrZ = &controlPointPtrY[nodeNumber];
 
-   DTYPE basisValues[3], foldingCorrection[3], gradient[3], norm;
-   DTYPE xBasis=0, yBasis=0, zBasis=0, xFirst=0, yFirst=0, zFirst=0;
+   DataType basisValues[3], foldingCorrection[3], gradient[3], norm;
+   DataType xBasis=0, yBasis=0, zBasis=0, xFirst=0, yFirst=0, zFirst=0;
    int x, y, z, id, pixelX, pixelY, pixelZ, jacIndex;
    bool correctFolding;
    double detJac;
@@ -2300,9 +2300,9 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 
                                  if(detJac<=0)
                                  {
-                                    get_BSplineBasisValue<DTYPE>(0, z-pixelZ+1, zBasis, zFirst);
-                                    get_BSplineBasisValue<DTYPE>(0, y-pixelY+1, yBasis, yFirst);
-                                    get_BSplineBasisValue<DTYPE>(0, x-pixelX+1, xBasis, xFirst);
+                                    get_BSplineBasisValue<DataType>(0, z-pixelZ+1, zBasis, zFirst);
+                                    get_BSplineBasisValue<DataType>(0, y-pixelY+1, yBasis, yFirst);
+                                    get_BSplineBasisValue<DataType>(0, x-pixelX+1, xBasis, xFirst);
 
                                     basisValues[0] = xFirst * yBasis * zBasis ;
                                     basisValues[1] = xBasis * yFirst * zBasis ;
@@ -2311,7 +2311,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                                     jacobianMatrix = jacobianMatrices[jacIndex];
 
                                     correctFolding=true;
-                                    addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                                    addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                      1.0,
                                                                      basisValues[0],
                                           basisValues[1],
@@ -2335,16 +2335,16 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                   gradient[2] = reorientation.m[2][0]*foldingCorrection[0]
                         + reorientation.m[2][1]*foldingCorrection[1]
                         + reorientation.m[2][2]*foldingCorrection[2];
-                  norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0]
+                  norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0]
                         + gradient[1]*gradient[1]
                         + gradient[2]*gradient[2]));
 
-                  if(norm>(DTYPE)0)
+                  if(norm>(DataType)0)
                   {
                      id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x;
-                     controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
-                     controlPointPtrY[id] += (DTYPE)(gradient[1]/norm);
-                     controlPointPtrZ[id] += (DTYPE)(gradient[2]/norm);
+                     controlPointPtrX[id] += (DataType)(gradient[0]/norm);
+                     controlPointPtrY[id] += (DataType)(gradient[1]/norm);
+                     controlPointPtrZ[id] += (DataType)(gradient[2]/norm);
                   }
                }
             }
@@ -2359,7 +2359,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
          useHeaderInformation=true;
 
       int xPre, yPre, zPre;
-      DTYPE basis;
+      DataType basis;
 
       if(useHeaderInformation)
       {
@@ -2369,7 +2369,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
       else
       {
          // The grid and reference image are expected to be aligned
-         DTYPE gridVoxelSpacing[3];
+         DataType gridVoxelSpacing[3];
          gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx;
          gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy;
          gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz;
@@ -2416,24 +2416,24 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 
                                        jacobianMatrix = jacobianMatrices[jacIndex];
 
-                                       zPre=(int)((DTYPE)pixelZ/gridVoxelSpacing[2]);
-                                       basis=(DTYPE)pixelZ/gridVoxelSpacing[2]-(DTYPE)zPre;
-                                       get_BSplineBasisValue<DTYPE>(basis, z-zPre,zBasis,zFirst);
+                                       zPre=(int)((DataType)pixelZ/gridVoxelSpacing[2]);
+                                       basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre;
+                                       get_BSplineBasisValue<DataType>(basis, z-zPre,zBasis,zFirst);
 
-                                       yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]);
-                                       basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre;
-                                       get_BSplineBasisValue<DTYPE>(basis, y-yPre,yBasis,yFirst);
+                                       yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]);
+                                       basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
+                                       get_BSplineBasisValue<DataType>(basis, y-yPre,yBasis,yFirst);
 
-                                       xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]);
-                                       basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre;
-                                       get_BSplineBasisValue<DTYPE>(basis, x-xPre,xBasis,xFirst);
+                                       xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]);
+                                       basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre;
+                                       get_BSplineBasisValue<DataType>(basis, x-xPre,xBasis,xFirst);
 
                                        basisValues[0]= xFirst * yBasis * zBasis ;
                                        basisValues[1]= xBasis * yFirst * zBasis ;
                                        basisValues[2]= xBasis * yBasis * zFirst ;
 
                                        correctFolding=true;
-                                       addJacobianGradientValues<DTYPE>(jacobianMatrix,
+                                       addJacobianGradientValues<DataType>(jacobianMatrix,
                                                                         1.0,
                                                                         basisValues[0],
                                              basisValues[1],
@@ -2458,16 +2458,16 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                      gradient[2] = reorientation.m[2][0]*foldingCorrection[0]
                            + reorientation.m[2][1]*foldingCorrection[1]
                            + reorientation.m[2][2]*foldingCorrection[2];
-                     norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] +
+                     norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] +
                            gradient[1]*gradient[1] +
                            gradient[2]*gradient[2]));
 
                      if(norm>0)
                      {
                         id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x;
-                        controlPointPtrX[id] += (DTYPE)(gradient[0]/norm);
-                        controlPointPtrY[id] += (DTYPE)(gradient[1]/norm);
-                        controlPointPtrZ[id] += (DTYPE)(gradient[2]/norm);
+                        controlPointPtrX[id] += (DataType)(gradient[0]/norm);
+                        controlPointPtrY[id] += (DataType)(gradient[1]/norm);
+                        controlPointPtrZ[id] += (DataType)(gradient[2]/norm);
                      }
                   }
                }
@@ -2678,16 +2678,16 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_getJacobianMap2D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
    const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
 
-   DTYPE *jacDetPtr=nullptr;
+   DataType *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
-      jacDetPtr=static_cast<DTYPE *>(jacobianDeterminant->data);
+      jacDetPtr=static_cast<DataType *>(jacobianDeterminant->data);
 
    float spacing[3];
    mat33 reorientation, jacobianMatrix;
@@ -2704,12 +2704,12 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
       reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz)));
    }
 
-   DTYPE *deformationPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *deformationPtrY = &deformationPtrX[voxelNumber];
+   DataType *deformationPtrX = static_cast<DataType *>(deformationField->data);
+   DataType *deformationPtrY = &deformationPtrX[voxelNumber];
 
-   DTYPE basis[2]= {1.0,0};
-   DTYPE first[2]= {-1.0,1.0};
-   DTYPE firstX, firstY, defX, defY;
+   DataType basis[2]= {1.0,0};
+   DataType first[2]= {-1.0,1.0};
+   DataType firstX, firstY, defX, defY;
 
    int currentIndex, x, y, a, b, index;
 #ifdef _OPENMP
@@ -2788,16 +2788,16 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
    } // y
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_getJacobianMap3D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
    const size_t voxelNumber = CalcVoxelNumber(*deformationField);
 
-   DTYPE *jacDetPtr=nullptr;
+   DataType *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
-      jacDetPtr=static_cast<DTYPE *>(jacobianDeterminant->data);
+      jacDetPtr=static_cast<DataType *>(jacobianDeterminant->data);
 
    float spacing[3];
    mat33 reorientation, jacobianMatrix;
@@ -2815,13 +2815,13 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
       reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz)));
    }
 
-   DTYPE *deformationPtrX = static_cast<DTYPE *>(deformationField->data);
-   DTYPE *deformationPtrY = &deformationPtrX[voxelNumber];
-   DTYPE *deformationPtrZ = &deformationPtrY[voxelNumber];
+   DataType *deformationPtrX = static_cast<DataType *>(deformationField->data);
+   DataType *deformationPtrY = &deformationPtrX[voxelNumber];
+   DataType *deformationPtrZ = &deformationPtrY[voxelNumber];
 
-   DTYPE basis[2]= {1.0,0};
-   DTYPE first[2]= {-1.0,1.0};
-   DTYPE firstX, firstY, firstZ, defX, defY, defZ;
+   DataType basis[2]= {1.0,0};
+   DataType first[2]= {-1.0,1.0};
+   DataType firstX, firstY, firstZ, defX, defY, defZ;
 
    int currentIndex, x, y, z, a, b, c, currentZ, index;
 #ifdef _OPENMP
@@ -2974,7 +2974,7 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField,
    }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
                                                    nifti_image* flowFieldImage
                                                    )
@@ -3073,13 +3073,13 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getDetArrayFromMatArray(nifti_image *jacobianDetImage,
                                  mat33 *jacobianMatrices
                                  )
 {
    const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage);
-   DTYPE *jacDetPtr=static_cast<DTYPE *>(jacobianDetImage->data);
+   DataType *jacDetPtr=static_cast<DataType *>(jacobianDetImage->data);
    if(jacobianDetImage->nz>1){
        for(size_t voxel=0; voxel<voxelNumber; ++voxel)
           jacDetPtr[voxel]=nifti_mat33_determ(jacobianMatrices[voxel]);
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 89babf29..62ff07b4 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -13,24 +13,24 @@
 #include "_reg_localTrans_regul.h"
 
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) {
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int a, b, x, y, index, i;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
 
     // get the constant basis values
-    DTYPE basisXX[9], basisYY[9], basisXY[9];
+    DataType basisXX[9], basisYY[9], basisXY[9];
     set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
 
     double constraintValue = 0;
 
-    DTYPE splineCoeffX, splineCoeffY;
-    DTYPE XX_x, YY_x, XY_x;
-    DTYPE XX_y, YY_y, XY_y;
+    DataType splineCoeffX, splineCoeffY;
+    DataType XX_x, YY_x, XY_x;
+    DataType XX_y, YY_y, XY_y;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -70,26 +70,26 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi
     return constraintValue / (double)splineControlPoint->nvox;
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) {
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int a, b, c, x, y, z, index, i;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
     // get the constant basis values
-    DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
+    DataType basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
     set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
 
     double constraintValue = 0;
 
-    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
-    DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
-    DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
-    DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
+    DataType splineCoeffX, splineCoeffY, splineCoeffZ;
+    DataType XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
+    DataType XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
+    DataType XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -180,7 +180,7 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) {
     }
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
@@ -188,20 +188,20 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
     int a, b, x, y, X, Y, index, i;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
 
     // get the constant basis values
-    DTYPE basisXX[9], basisYY[9], basisXY[9];
+    DataType basisXX[9], basisYY[9], basisXY[9];
     set_second_order_bspline_basis_values(basisXX, basisYY, basisXY);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
-    DTYPE XX_x, YY_x, XY_x;
-    DTYPE XX_y, YY_y, XY_y;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
+    DataType XX_x, YY_x, XY_x;
+    DataType XX_y, YY_y, XY_y;
 
-    DTYPE *derivativeValues = (DTYPE*)calloc(6 * nodeNumber, sizeof(DTYPE));
-    DTYPE *derivativeValuesPtr;
+    DataType *derivativeValues = (DataType*)calloc(6 * nodeNumber, sizeof(DataType));
+    DataType *derivativeValuesPtr;
 
     reg_getDisplacementFromDeformation(splineControlPoint);
 
@@ -241,16 +241,16 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
             *derivativeValuesPtr++ = XX_y;
             *derivativeValuesPtr++ = YY_x;
             *derivativeValuesPtr++ = YY_y;
-            *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x);
-            *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y);
+            *derivativeValuesPtr++ = (DataType)(2.0 * XY_x);
+            *derivativeValuesPtr++ = (DataType)(2.0 * XY_y);
         }
     }
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-    DTYPE gradientValue[2];
+    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType gradientValue[2];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \
@@ -287,7 +287,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
     free(derivativeValues);
 }
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
@@ -295,23 +295,23 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
     int a, b, c, x, y, z, X, Y, Z, index, i;
 
     // Create pointers to the spline coefficients
-    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
     // get the constant basis values
-    DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
+    DataType basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
     set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
-    DTYPE splineCoeffZ;
-    DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
-    DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
-    DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
+    DataType splineCoeffZ;
+    DataType XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x;
+    DataType XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y;
+    DataType XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z;
 
-    DTYPE *derivativeValues = (DTYPE*)calloc(18 * nodeNumber, sizeof(DTYPE));
-    DTYPE *derivativeValuesPtr;
+    DataType *derivativeValues = (DataType*)calloc(18 * nodeNumber, sizeof(DataType));
+    DataType *derivativeValuesPtr;
 
     reg_getDisplacementFromDeformation(splineControlPoint);
 
@@ -379,25 +379,25 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
                 *derivativeValuesPtr++ = ZZ_x;
                 *derivativeValuesPtr++ = ZZ_y;
                 *derivativeValuesPtr++ = ZZ_z;
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_z);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_x);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_y);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_z);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_x);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_y);
-                *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_z);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XY_x);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XY_y);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XY_z);
+                *derivativeValuesPtr++ = (DataType)(2.0 * YZ_x);
+                *derivativeValuesPtr++ = (DataType)(2.0 * YZ_y);
+                *derivativeValuesPtr++ = (DataType)(2.0 * YZ_z);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XZ_x);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XZ_y);
+                *derivativeValuesPtr++ = (DataType)(2.0 * XZ_z);
             }
         }
     }
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-    DTYPE gradientValue[3];
+    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType gradientValue[3];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \
@@ -492,7 +492,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) {
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
     int a, b, x, y, i, index;
@@ -501,16 +501,16 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
     double currentValue;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[9], basisY[9];
+    DataType basisX[9], basisY[9];
     set_first_order_basis_values(basisX, basisY);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
 
     mat33 matrix, R;
 
@@ -567,7 +567,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
     return constraintValue / static_cast<double>(splineControlPoint->nvox);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) {
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int a, b, c, x, y, z, i, index;
@@ -576,18 +576,18 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
     double currentValue;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[27], basisY[27], basisZ[27];
+    DataType basisX[27], basisY[27], basisZ[27];
     set_first_order_basis_values(basisX, basisY, basisZ);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
-    DTYPE splineCoeffZ;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
+    DataType splineCoeffZ;
 
     mat33 matrix, R;
 
@@ -683,14 +683,14 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
-    DTYPE basis;
+    DataType basis;
 
-    const DTYPE gridVoxelSpacing[2] = {
+    const DataType gridVoxelSpacing[2] = {
         splineControlPoint->dx / referenceImage->dx,
         splineControlPoint->dy / referenceImage->dy
     };
@@ -700,14 +700,14 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
 
     // Create pointers to the spline coefficients
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    DTYPE splineCoeffX, splineCoeffY;
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType splineCoeffX, splineCoeffY;
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[4], basisY[4];
-    DTYPE firstX[4], firstY[4];
+    DataType basisX[4], basisY[4];
+    DataType firstX[4], firstY[4];
 
     mat33 matrix, R;
 
@@ -719,16 +719,16 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
 
 
     for (y = 0; y < referenceImage->ny; ++y) {
-        yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
-        basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+        yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+        basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
         if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+        get_BSplineBasisValues<DataType>(basis, basisY, firstY);
 
         for (x = 0; x < referenceImage->nx; ++x) {
-            xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
-            basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+            xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+            basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
             if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+            get_BSplineBasisValues<DataType>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
 
@@ -766,14 +766,14 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
     return constraintValue / static_cast<double>(voxelNumber * 2);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
-    DTYPE basis;
+    DataType basis;
 
-    const DTYPE gridVoxelSpacing[3] = {
+    const DataType gridVoxelSpacing[3] = {
         splineControlPoint->dx / referenceImage->dx,
         splineControlPoint->dy / referenceImage->dy,
         splineControlPoint->dz / referenceImage->dz
@@ -784,15 +784,15 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
 
     // Create pointers to the spline coefficients
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
-    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType splineCoeffX, splineCoeffY, splineCoeffZ;
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[4], basisY[4], basisZ[4];
-    DTYPE firstX[4], firstY[4], firstZ[4];
+    DataType basisX[4], basisY[4], basisZ[4];
+    DataType firstX[4], firstY[4], firstZ[4];
 
     mat33 matrix, R;
 
@@ -803,22 +803,22 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
     else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
 
     for (z = 0; z < referenceImage->nz; ++z) {
-        zPre = static_cast<int>(static_cast<DTYPE>(z) / gridVoxelSpacing[2]);
-        basis = static_cast<DTYPE>(z) / gridVoxelSpacing[2] - static_cast<DTYPE>(zPre);
+        zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
+        basis = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
         if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
+        get_BSplineBasisValues<DataType>(basis, basisZ, firstZ);
 
         for (y = 0; y < referenceImage->ny; ++y) {
-            yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
-            basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+            yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+            basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
             if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+            get_BSplineBasisValues<DataType>(basis, basisY, firstY);
 
             for (x = 0; x < referenceImage->nx; ++x) {
-                xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
-                basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+                xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+                basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
                 if (basis < 0) basis = 0; //rounding error
-                get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+                get_BSplineBasisValues<DataType>(basis, basisX, firstX);
 
                 memset(&matrix, 0, sizeof(mat33));
 
@@ -894,38 +894,38 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
-    DTYPE basis;
+    DataType basis;
 
-    const DTYPE gridVoxelSpacing[2] = {
+    const DataType gridVoxelSpacing[2] = {
         splineControlPoint->dx / referenceImage->dx,
         splineControlPoint->dy / referenceImage->dy
     };
 
     // Create pointers to the spline coefficients
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    DTYPE splineCoeffX, splineCoeffY;
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType splineCoeffX, splineCoeffY;
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[4], basisY[4];
-    DTYPE firstX[4], firstY[4];
+    DataType basisX[4], basisY[4];
+    DataType firstX[4], firstY[4];
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
-    DTYPE gradValues[2];
+    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType gradValues[2];
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -936,16 +936,16 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
 
     // Loop over all voxels
     for (y = 0; y < referenceImage->ny; ++y) {
-        yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
-        basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+        yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+        basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
         if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+        get_BSplineBasisValues<DataType>(basis, basisY, firstY);
 
         for (x = 0; x < referenceImage->nx; ++x) {
-            xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
-            basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+            xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+            basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
             if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+            get_BSplineBasisValues<DataType>(basis, basisX, firstX);
 
             memset(&matrix, 0, sizeof(mat33));
 
@@ -985,16 +985,16 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
-    DTYPE basis;
+    DataType basis;
 
-    const DTYPE gridVoxelSpacing[3] = {
+    const DataType gridVoxelSpacing[3] = {
         splineControlPoint->dx / referenceImage->dx,
         splineControlPoint->dy / referenceImage->dy,
         splineControlPoint->dz / referenceImage->dz
@@ -1002,24 +1002,24 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
 
     // Create pointers to the spline coefficients
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
-    DTYPE splineCoeffX, splineCoeffY, splineCoeffZ;
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType splineCoeffX, splineCoeffY, splineCoeffZ;
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[4], basisY[4], basisZ[4];
-    DTYPE firstX[4], firstY[4], firstZ[4];
+    DataType basisX[4], basisY[4], basisZ[4];
+    DataType firstX[4], firstY[4], firstZ[4];
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
-    DTYPE gradValues[3];
+    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType gradValues[3];
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -1030,22 +1030,22 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
 
     // Loop over all voxels
     for (z = 0; z < referenceImage->nz; ++z) {
-        zPre = static_cast<int>(static_cast<DTYPE>(z) / gridVoxelSpacing[2]);
-        basis = static_cast<DTYPE>(z) / gridVoxelSpacing[2] - static_cast<DTYPE>(zPre);
+        zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
+        basis = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
         if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DTYPE>(basis, basisZ, firstZ);
+        get_BSplineBasisValues<DataType>(basis, basisZ, firstZ);
 
         for (y = 0; y < referenceImage->ny; ++y) {
-            yPre = static_cast<int>(static_cast<DTYPE>(y) / gridVoxelSpacing[1]);
-            basis = static_cast<DTYPE>(y) / gridVoxelSpacing[1] - static_cast<DTYPE>(yPre);
+            yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+            basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
             if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DTYPE>(basis, basisY, firstY);
+            get_BSplineBasisValues<DataType>(basis, basisY, firstY);
 
             for (x = 0; x < referenceImage->nx; ++x) {
-                xPre = static_cast<int>(static_cast<DTYPE>(x) / gridVoxelSpacing[0]);
-                basis = static_cast<DTYPE>(x) / gridVoxelSpacing[0] - static_cast<DTYPE>(xPre);
+                xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+                basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
                 if (basis < 0) basis = 0; //rounding error
-                get_BSplineBasisValues<DTYPE>(basis, basisX, firstX);
+                get_BSplineBasisValues<DataType>(basis, basisX, firstX);
 
                 memset(&matrix, 0, sizeof(mat33));
 
@@ -1142,7 +1142,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
@@ -1150,13 +1150,13 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
     int x, y, a, b, i, index;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[9];
-    DTYPE basisY[9];
+    DataType basisX[9];
+    DataType basisY[9];
     set_first_order_basis_values(basisX, basisY);
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -1166,16 +1166,16 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
     else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
     mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-    DTYPE gradValues[2];
+    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType gradValues[2];
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -1237,7 +1237,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
     } // y
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
@@ -1245,15 +1245,15 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
     int x, y, z, a, b, c, i, index;
 
     // Create pointers to the spline coefficients
-    const DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    const DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    const DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DTYPE basisX[27];
-    DTYPE basisY[27];
-    DTYPE basisZ[27];
+    DataType basisX[27];
+    DataType basisY[27];
+    DataType basisZ[27];
     set_first_order_basis_values(basisX, basisY, basisZ);
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -1263,18 +1263,18 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
     else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
     mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
 
-    DTYPE splineCoeffX;
-    DTYPE splineCoeffY;
-    DTYPE splineCoeffZ;
+    DataType splineCoeffX;
+    DataType splineCoeffY;
+    DataType splineCoeffZ;
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[nodeNumber];
-    DTYPE *gradientZPtr = &gradientYPtr[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber);
-    DTYPE gradValues[3];
+    DataType approxRatio = (DataType)weight / (DataType)(nodeNumber);
+    DataType gradValues[3];
 
     for (z = 1; z < splineControlPoint->nz - 1; z++) {
         for (y = 1; y < splineControlPoint->ny - 1; y++) {
@@ -1380,20 +1380,20 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
     const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
     int a, b, x, y, X, Y, index;
-    DTYPE basis[2] = {1, 0};
-    DTYPE first[2] = {-1, 1};
+    DataType basis[2] = {1, 0};
+    DataType first[2] = {-1, 1};
 
     double constraintValue = 0;
     double currentValue;
 
     // Create pointers to the deformation field
-    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
-    const DTYPE *defPtrY = &defPtrX[voxelNumber];
-    DTYPE defX, defY;
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[voxelNumber];
+    DataType defX, defY;
 
     mat33 matrix, R;
 
@@ -1443,21 +1443,21 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
     return constraintValue / static_cast<double>(deformationField->nvox);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
     const size_t voxelNumber = CalcVoxelNumber(*deformationField);
     int a, b, c, x, y, z, X, Y, Z, index;
-    DTYPE basis[2] = {1, 0};
-    DTYPE first[2] = {-1, 1};
+    DataType basis[2] = {1, 0};
+    DataType first[2] = {-1, 1};
 
     double constraintValue = 0;
     double currentValue;
 
     // Create pointers to the deformation field
-    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
-    const DTYPE *defPtrY = &defPtrX[voxelNumber];
-    const DTYPE *defPtrZ = &defPtrY[voxelNumber];
-    DTYPE defX, defY, defZ;
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[voxelNumber];
+    const DataType *defPtrZ = &defPtrY[voxelNumber];
+    DataType defX, defY, defZ;
 
     mat33 matrix, R;
 
@@ -1547,27 +1547,27 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
     const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
     int a, b, x, y, X, Y, index;
-    DTYPE basis[2] = {1, 0};
-    DTYPE first[2] = {-1, 1};
+    DataType basis[2] = {1, 0};
+    DataType first[2] = {-1, 1};
 
     // Create pointers to the deformation field
-    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
-    const DTYPE *defPtrY = &defPtrX[voxelNumber];
-    DTYPE defX, defY;
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[voxelNumber];
+    DataType defX, defY;
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[voxelNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
-    DTYPE gradValues[2];
+    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType gradValues[2];
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -1619,29 +1619,29 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
     const size_t voxelNumber = CalcVoxelNumber(*deformationField);
     int a, b, c, x, y, z, X, Y, Z, index;
-    DTYPE basis[2] = {1, 0};
-    DTYPE first[2] = {-1, 1};
+    DataType basis[2] = {1, 0};
+    DataType first[2] = {-1, 1};
 
     // Create pointers to the deformation field
-    const DTYPE *defPtrX = static_cast<DTYPE*>(deformationField->data);
-    const DTYPE *defPtrY = &defPtrX[voxelNumber];
-    const DTYPE *defPtrZ = &defPtrY[voxelNumber];
-    DTYPE defX, defY, defZ;
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[voxelNumber];
+    const DataType *defPtrZ = &defPtrY[voxelNumber];
+    DataType defX, defY, defZ;
 
     mat33 matrix, R;
 
-    DTYPE *gradientXPtr = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradientYPtr = &gradientXPtr[voxelNumber];
-    DTYPE *gradientZPtr = &gradientYPtr[voxelNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[voxelNumber];
+    DataType *gradientZPtr = &gradientYPtr[voxelNumber];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber;
-    DTYPE gradValues[3];
+    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType gradValues[3];
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -1746,7 +1746,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                                            size_t landmarkNumber,
                                            float *landmarkReference,
@@ -1759,13 +1759,13 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
     float def_position[4];
     float flo_position[4];
     int previous[3], a, b, c;
-    DTYPE basisX[4], basisY[4], basisZ[4], basis;
+    DataType basisX[4], basisY[4], basisZ[4], basis;
     const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
     if (controlPointImage->sform_code > 0)
         gridRealToVox = &(controlPointImage->sto_ijk);
-    const DTYPE *gridPtrX = static_cast<DTYPE*>(controlPointImage->data);
-    const DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
-    const DTYPE *gridPtrZ = nullptr;
+    const DataType *gridPtrX = static_cast<DataType*>(controlPointImage->data);
+    const DataType *gridPtrY = &gridPtrX[controlPointNumber];
+    const DataType *gridPtrZ = nullptr;
     if (imageDim > 2)
         gridPtrZ = &gridPtrY[controlPointNumber];
 
@@ -1793,9 +1793,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
             ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
             // Extract the corresponding basis values
-            get_BSplineBasisValues<DTYPE>(def_position[0] - 1 - (DTYPE)previous[0], basisX);
-            get_BSplineBasisValues<DTYPE>(def_position[1] - 1 - (DTYPE)previous[1], basisY);
-            get_BSplineBasisValues<DTYPE>(def_position[2] - 1 - (DTYPE)previous[2], basisZ);
+            get_BSplineBasisValues<DataType>(def_position[0] - 1 - (DataType)previous[0], basisX);
+            get_BSplineBasisValues<DataType>(def_position[1] - 1 - (DataType)previous[1], basisY);
+            get_BSplineBasisValues<DataType>(def_position[2] - 1 - (DataType)previous[2], basisZ);
             def_position[0] = 0;
             def_position[1] = 0;
             def_position[2] = 0;
@@ -1864,7 +1864,7 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPointImage,
                                                  nifti_image *gradientImage,
                                                  size_t landmarkNumber,
@@ -1878,16 +1878,16 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
     float def_position[3];
     float flo_position[3];
     int previous[3], a, b, c;
-    DTYPE basisX[4], basisY[4], basisZ[4], basis;
+    DataType basisX[4], basisY[4], basisZ[4], basis;
     const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
     if (controlPointImage->sform_code > 0)
         gridRealToVox = &(controlPointImage->sto_ijk);
-    const DTYPE *gridPtrX = static_cast<DTYPE*>(controlPointImage->data);
-    DTYPE *gradPtrX = static_cast<DTYPE*>(gradientImage->data);
-    const DTYPE *gridPtrY = &gridPtrX[controlPointNumber];
-    DTYPE *gradPtrY = &gradPtrX[controlPointNumber];
-    const DTYPE *gridPtrZ = nullptr;
-    DTYPE *gradPtrZ = nullptr;
+    const DataType *gridPtrX = static_cast<DataType*>(controlPointImage->data);
+    DataType *gradPtrX = static_cast<DataType*>(gradientImage->data);
+    const DataType *gridPtrY = &gridPtrX[controlPointNumber];
+    DataType *gradPtrY = &gradPtrX[controlPointNumber];
+    const DataType *gridPtrZ = nullptr;
+    DataType *gradPtrZ = nullptr;
     if (imageDim > 2) {
         gridPtrZ = &gridPtrY[controlPointNumber];
         gradPtrZ = &gradPtrY[controlPointNumber];
@@ -1916,9 +1916,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
             ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
             // Extract the corresponding basis values
-            get_BSplineBasisValues<DTYPE>(def_position[0] - 1 - (DTYPE)previous[0], basisX);
-            get_BSplineBasisValues<DTYPE>(def_position[1] - 1 - (DTYPE)previous[1], basisY);
-            get_BSplineBasisValues<DTYPE>(def_position[2] - 1 - (DTYPE)previous[2], basisZ);
+            get_BSplineBasisValues<DataType>(def_position[0] - 1 - (DataType)previous[0], basisX);
+            get_BSplineBasisValues<DataType>(def_position[1] - 1 - (DataType)previous[1], basisY);
+            get_BSplineBasisValues<DataType>(def_position[2] - 1 - (DataType)previous[2], basisZ);
             def_position[0] = 0;
             def_position[1] = 0;
             def_position[2] = 0;
@@ -2013,21 +2013,21 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
     const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
     int x, y, z, index;
 
     // Create pointers to the spline coefficients
     reg_getDisplacementFromDeformation(splineControlPoint);
-    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
-    DTYPE centralCP[3], neigbCP[3];
+    DataType centralCP[3], neigbCP[3];
 
     double constraintValue = 0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, x, y, z, centralCP, neigbCP) \
     shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \
@@ -2112,7 +2112,7 @@ double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) {
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
                                                nifti_image *gradientImage,
                                                float weight) {
@@ -2121,21 +2121,21 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
 
     // Create pointers to the spline coefficients
     reg_getDisplacementFromDeformation(splineControlPoint);
-    DTYPE *splinePtrX = static_cast<DTYPE*>(splineControlPoint->data);
-    DTYPE *splinePtrY = &splinePtrX[nodeNumber];
-    DTYPE *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType *splinePtrZ = &splinePtrY[nodeNumber];
 
     // Pointers to the gradient image
-    DTYPE *gradPtrX = static_cast<DTYPE*>(gradientImage->data);
-    DTYPE *gradPtrY = &gradPtrX[nodeNumber];
-    DTYPE *gradPtrZ = &gradPtrY[nodeNumber];
+    DataType *gradPtrX = static_cast<DataType*>(gradientImage->data);
+    DataType *gradPtrY = &gradPtrX[nodeNumber];
+    DataType *gradPtrZ = &gradPtrY[nodeNumber];
 
-    DTYPE centralCP[3], neigbCP[3];
+    DataType centralCP[3], neigbCP[3];
 
     double grad_values[3];
 
-    DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber;
-#if defined (_OPENMP)
+    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, x, y, z, centralCP, neigbCP, grad_values) \
     shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \
@@ -2202,9 +2202,9 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
                     grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz;
                     grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz;
                 }
-                gradPtrX[index] += approxRatio * static_cast<DTYPE>(grad_values[0]);
-                gradPtrY[index] += approxRatio * static_cast<DTYPE>(grad_values[1]);
-                gradPtrZ[index] += approxRatio * static_cast<DTYPE>(grad_values[2]);
+                gradPtrX[index] += approxRatio * static_cast<DataType>(grad_values[0]);
+                gradPtrY[index] += approxRatio * static_cast<DataType>(grad_values[1]);
+                gradPtrZ[index] += approxRatio * static_cast<DataType>(grad_values[2]);
 
                 index++;
             } // x
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index 16df2f9f..7ca78285 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -425,13 +425,13 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum)
 }
 /* *************************************************************** */
 // Heap sort
-template<class DTYPE>
-void reg_heapSort(DTYPE *array_tmp, int blockNum)
+template<class DataType>
+void reg_heapSort(DataType *array_tmp, int blockNum)
 {
-    DTYPE *array = &array_tmp[-1];
+    DataType *array = &array_tmp[-1];
     int l = (blockNum >> 1) + 1;
     int ir = blockNum;
-    DTYPE val;
+    DataType val;
     for (;;)
     {
         if (l > 1)
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 726144c7..2aa2ff61 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -22,7 +22,7 @@
 #include <stdexcept>
 #include "nifti1_io.h"
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #include <omp.h>
 #endif
 
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 6872b5fb..7bd48f42 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -36,7 +36,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
    Eigen::MatrixXd m(size_m, size_n);
 
    //Convert to Eigen matrix
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in,m, size__m, size__n) \
    private(sm, sn)
@@ -51,7 +51,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
 
    Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in,svd,v,w, size__n,size__m) \
    private(sn2, sn, sm)
@@ -97,7 +97,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
    Eigen::MatrixXd m(size__m, size__n);
 
    //Convert to Eigen matrix
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in, m, size__m, size__n) \
    private(sm, sn)
@@ -113,7 +113,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
    Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
 
    min_dim = std::min(size__m, size__n);
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, S) \
    private(i, j)
@@ -131,7 +131,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
    }
 
    if (size__m > size__n) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, V) \
    private(i, j)
@@ -143,7 +143,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 
          }
       }
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, size__m, size__n, U) \
    private(i, j)
@@ -155,7 +155,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
       }
    }
    else {
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, U) \
    private(i, j)
@@ -167,7 +167,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 
          }
       }
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, size__m, size__n, V) \
    private(i, j)
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index fd110cf6..d2708c41 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -13,22 +13,22 @@
 #include "_reg_mind.h"
 
  /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void ShiftImage(nifti_image* inputImgPtr,
                 nifti_image* shiftedImgPtr,
                 int *maskPtr,
                 int tx,
                 int ty,
                 int tz) {
-    DTYPE* inputData = static_cast<DTYPE*>(inputImgPtr->data);
-    DTYPE* shiftImageData = static_cast<DTYPE*>(shiftedImgPtr->data);
+    DataType* inputData = static_cast<DataType*>(inputImgPtr->data);
+    DataType* shiftImageData = static_cast<DataType*>(shiftedImgPtr->data);
 
     int currentIndex;
     int shiftedIndex;
 
     int x, y, z, old_x, old_y, old_z;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \
     maskPtr, tx, ty, tz) \
@@ -50,12 +50,12 @@ void ShiftImage(nifti_image* inputImgPtr,
                         shiftImageData[currentIndex] = inputData[shiftedIndex];
                     } // mask is not defined
                     else {
-                        //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
+                        //shiftImageData[currentIndex]=std::numeric_limits<DataType>::quiet_NaN();
                         shiftImageData[currentIndex] = 0;
                     }
                 } // outside of the image
                 else {
-                    //shiftImageData[currentIndex]=std::numeric_limits<DTYPE>::quiet_NaN();
+                    //shiftImageData[currentIndex]=std::numeric_limits<DataType>::quiet_NaN();
                     shiftImageData[currentIndex] = 0;
                 }
                 currentIndex++;
@@ -64,7 +64,7 @@ void ShiftImage(nifti_image* inputImgPtr,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void GetMINDImageDescriptor_core(nifti_image* inputImage,
                                 nifti_image* MINDImage,
                                 int *maskPtr,
@@ -79,19 +79,19 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 #endif
 
     // Create a pointer to the descriptor image
-    DTYPE* MINDImgDataPtr = static_cast<DTYPE*>(MINDImage->data);
+    DataType* MINDImgDataPtr = static_cast<DataType*>(MINDImage->data);
 
     // Allocate an image to store the current timepoint reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
     currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
-    DTYPE *inputImagePtr = static_cast<DTYPE*>(inputImage->data);
+    DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
     currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
     nifti_image *meanImage = nifti_dup(*currentInputImage, false);
-    DTYPE* meanImgDataPtr = static_cast<DTYPE*>(meanImage->data);
+    DataType* meanImgDataPtr = static_cast<DataType*>(meanImage->data);
 
     // Allocate an image to store the shifted image
     nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
@@ -109,7 +109,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset};
 
     for (int i = 0; i < samplingNbr; i++) {
-        ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
+        ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
                           RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
@@ -125,8 +125,8 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 
     // Compute the MIND descriptor
     int mindIndex;
-    DTYPE meanValue, max_desc, descValue;
-#if defined (_OPENMP)
+    DataType meanValue, max_desc, descValue;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \
     MINDImgDataPtr) \
@@ -138,12 +138,12 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
             if (meanValue == 0) {
-                meanValue = std::numeric_limits<DTYPE>::epsilon();
+                meanValue = std::numeric_limits<DataType>::epsilon();
             }
             max_desc = 0;
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                descValue = (DTYPE)exp(-MINDImgDataPtr[mindIndex] / meanValue);
+                descValue = (DataType)exp(-MINDImgDataPtr[mindIndex] / meanValue);
                 MINDImgDataPtr[mindIndex] = descValue;
                 max_desc = (std::max)(max_desc, descValue);
                 mindIndex += voxelNumber;
@@ -194,7 +194,7 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
                                    nifti_image* MINDSSCImage,
                                    int *maskPtr,
@@ -209,19 +209,19 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 #endif
 
     // Create a pointer to the descriptor image
-    DTYPE* MINDSSCImgDataPtr = static_cast<DTYPE*>(MINDSSCImage->data);
+    DataType* MINDSSCImgDataPtr = static_cast<DataType*>(MINDSSCImage->data);
 
     // Allocate an image to store the current timepoint reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
     currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
-    DTYPE *inputImagePtr = static_cast<DTYPE*>(inputImage->data);
+    DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
     currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
     nifti_image *mean_img = nifti_dup(*currentInputImage, false);
-    DTYPE* meanImgDataPtr = static_cast<DTYPE*>(mean_img->data);
+    DataType* meanImgDataPtr = static_cast<DataType*>(mean_img->data);
 
     // Allocate an image to store the warped image
     nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
@@ -252,7 +252,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     int compteurId = 0;
 
     for (int i = 0; i < samplingNbr; i++) {
-        ShiftImage<DTYPE>(currentInputImage, shiftedImage, maskPtr,
+        ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
                           RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
@@ -260,7 +260,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 
         for (int j = 0; j < 2; j++) {
 
-            ShiftImage<DTYPE>(diff_image, diff_imageShifted, mask_diff_image,
+            ShiftImage<DataType>(diff_image, diff_imageShifted, mask_diff_image,
                               tx[compteurId], ty[compteurId], tz[compteurId]);
 
             reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
@@ -276,8 +276,8 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 
     // Compute the MINDSSC descriptor
     int mindIndex;
-    DTYPE meanValue, max_desc, descValue;
-#if defined (_OPENMP)
+    DataType meanValue, max_desc, descValue;
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \
     MINDSSCImgDataPtr) \
@@ -289,12 +289,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
             if (meanValue == 0) {
-                meanValue = std::numeric_limits<DTYPE>::epsilon();
+                meanValue = std::numeric_limits<DataType>::epsilon();
             }
             max_desc = 0;
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                descValue = (DTYPE)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue);
+                descValue = (DataType)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue);
                 MINDSSCImgDataPtr[mindIndex] = descValue;
                 max_desc = std::max(max_desc, descValue);
                 mindIndex += voxelNumber;
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index a259c052..b92118d1 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -340,7 +340,7 @@ void reg_mrf::Run()
 }
 /*****************************************************/
 /*****************************************************/
-template <class DTYPE>
+template <class DataType>
 void GetGraph_core3D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      int* index_neighbours,
@@ -373,7 +373,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
    float SADNeighbourValue = 0;
 
    // Pointers to the input image
-   DTYPE *refImgPtr = static_cast<DTYPE *>(refImage->data);
+   DataType *refImgPtr = static_cast<DataType *>(refImage->data);
 
    // Loop over all control points
    for(cpz=0; cpz<controlPointGridImage->nz; ++cpz){
@@ -558,7 +558,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
    free(refBlockValue);
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void GetGraph_core2D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      int* index_neighbours,
diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h
index 4391b1de..e6584ce4 100644
--- a/reg-lib/cpu/_reg_mrf.h
+++ b/reg-lib/cpu/_reg_mrf.h
@@ -103,14 +103,14 @@ class reg_mrf
 };
 /********************************************************************************************************/
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 void GetGraph_core3D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      float* index_neighbours,
                      nifti_image *refImage,
                      int *mask);
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 void GetGraph_core2D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      float* index_neighbours,
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 35d3dd74..745ed3f5 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -189,13 +189,13 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class PrecisionTYPE>
-PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) {
+template<class PrecisionType>
+PrecisionType GetBasisSplineValue(PrecisionType x) {
     x = fabs(x);
-    PrecisionTYPE value = 0;
+    PrecisionType value = 0;
     if (x < 2.0) {
         if (x < 1.0)
-            value = (PrecisionTYPE)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x);
+            value = (PrecisionType)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x);
         else {
             x -= 2.0f;
             value = -x * x * x / 6.0f;
@@ -204,13 +204,13 @@ PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) {
     return value;
 }
 /* *************************************************************** */
-template<class PrecisionTYPE>
-PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) {
-    PrecisionTYPE x = fabs(ori);
-    PrecisionTYPE value = 0;
+template<class PrecisionType>
+PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
+    PrecisionType x = fabs(ori);
+    PrecisionType value = 0;
     if (x < 2.0) {
         if (x < 1.0)
-            value = (PrecisionTYPE)((1.5f * x - 2.0) * ori);
+            value = (PrecisionType)((1.5f * x - 2.0) * ori);
         else {
             x -= 2.0f;
             value = -0.5f * x * x;
@@ -221,7 +221,7 @@ PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getNMIValue(nifti_image *referenceImage,
                      nifti_image *warpedImage,
                      double *timePointWeight,
@@ -233,8 +233,8 @@ void reg_getNMIValue(nifti_image *referenceImage,
                      double **entropyValues,
                      int *referenceMask) {
     // Create pointers to the image data arrays
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
     // Useful variable
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
     // Iterate over all active time points
@@ -251,12 +251,12 @@ void reg_getNMIValue(nifti_image *referenceImage,
             // Empty the joint histogram
             memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double));
             // Fill the joint histograms using an approximation
-            DTYPE *refPtr = &refImagePtr[t * voxelNumber];
-            DTYPE *warPtr = &warImagePtr[t * voxelNumber];
+            DataType *refPtr = &refImagePtr[t * voxelNumber];
+            DataType *warPtr = &warImagePtr[t * voxelNumber];
             for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                 if (referenceMask[voxel] > -1) {
-                    DTYPE refValue = refPtr[voxel];
-                    DTYPE warValue = warPtr[voxel];
+                    DataType refValue = refPtr[voxel];
+                    DataType warValue = warPtr[voxel];
                     if (refValue == refValue && warValue == warValue &&
                         refValue >= 0 && warValue >= 0 &&
                         refValue < referenceBinNumber[t] &&
@@ -474,7 +474,7 @@ double reg_nmi::GetSimilarityMeasureValue() {
     return nmi_value_forward + nmi_value_backward;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     nifti_image *warpedImage,
                                     unsigned short *referenceBinNumber,
@@ -494,18 +494,18 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 
     // Pointers to the image data
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
-    DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DTYPE *warGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
-    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
+    DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    DataType *warGradPtrY = &warGradPtrX[voxelNumber];
 
     // Pointers to the measure of similarity gradient
-    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
-    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
+    DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
 
     // Create pointers to the current joint histogram
     double *logHistoPtr = jointHistogramLog[current_timepoint];
@@ -517,11 +517,11 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
     for (size_t i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
         if (referenceMask[i] > -1) {
-            DTYPE refValue = refPtr[i];
-            DTYPE warValue = warPtr[i];
+            DataType refValue = refPtr[i];
+            DataType warValue = warPtr[i];
             if (refValue == refValue && warValue == warValue) {
-                DTYPE gradX = warGradPtrX[i];
-                DTYPE gradY = warGradPtrY[i];
+                DataType gradX = warGradPtrX[i];
+                DataType gradY = warGradPtrY[i];
 
                 double jointDeriv[2] = {0};
                 double refDeriv[2] = {0};
@@ -551,9 +551,9 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
                                                                   nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
                                                                   nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
@@ -565,7 +565,7 @@ template void reg_getVoxelBasedNMIGradient2D<float>
 template void reg_getVoxelBasedNMIGradient2D<double>
 (nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     nifti_image *warpedImage,
                                     unsigned short *referenceBinNumber,
@@ -591,20 +591,20 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Pointers to the image data
-    DTYPE *refImagePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DTYPE *warImagePtr = static_cast<DTYPE*>(warpedImage->data);
-    DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DTYPE *warGradPtrX = static_cast<DTYPE*>(warpedGradient->data);
-    DTYPE *warGradPtrY = &warGradPtrX[voxelNumber];
-    DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber];
+    DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    DataType *warGradPtrY = &warGradPtrX[voxelNumber];
+    DataType *warGradPtrZ = &warGradPtrY[voxelNumber];
 
     // Pointers to the measure of similarity gradient
-    DTYPE *measureGradPtrX = static_cast<DTYPE*>(measureGradientImage->data);
-    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
+    DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create pointers to the current joint histogram
     double *logHistoPtr = jointHistogramLog[current_timepoint];
@@ -613,10 +613,10 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
     size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
     size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
     int r, w;
-    DTYPE refValue, warValue, gradX, gradY, gradZ;
+    DataType refValue, warValue, gradX, gradY, gradZ;
     double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog;
     // Iterate over all voxel
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \
     jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
@@ -666,11 +666,11 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
                                                                   nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
                                                                   nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrZ[i] += (DTYPE)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
+                measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
                                                                   nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index d1199822..2068a340 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -14,7 +14,7 @@
 
 #include "_reg_measure.h"
 #include <vector>
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #include "omp.h"
 #endif
 
@@ -78,7 +78,7 @@ class reg_nmi: public reg_measure {
 };
 /* *************************************************************** */
 /* *************************************************************** */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getNMIValue(nifti_image *referenceImage,
                      nifti_image *warpedImage,
                      double *timePointWeight,
@@ -91,7 +91,7 @@ void reg_getNMIValue(nifti_image *referenceImage,
                      int *referenceMask
 );
 /* *************************************************************** */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     nifti_image *warpedImage,
                                     unsigned short *referenceBinNumber,
@@ -105,7 +105,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
                                     double timepoint_weight
 );
 /* *************************************************************** */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
                                     nifti_image *warpedImage,
                                     unsigned short *referenceBinNumber,
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 0788efb6..f04f64a5 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -5,8 +5,7 @@
 
 #include "_reg_optimiser.h"
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
 template <class T>
 reg_optimiser<T>::reg_optimiser() {
     this->dofNumber = 0;
@@ -33,7 +32,6 @@ reg_optimiser<T>::reg_optimiser() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 reg_optimiser<T>::~reg_optimiser() {
     if (this->bestDOF != nullptr)
@@ -47,7 +45,6 @@ reg_optimiser<T>::~reg_optimiser() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Initialise(size_t nvox,
                                   int dim,
@@ -96,7 +93,6 @@ void reg_optimiser<T>::Initialise(size_t nvox,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::RestoreBestDOF() {
     // restore forward transformation
@@ -106,7 +102,6 @@ void reg_optimiser<T>::RestoreBestDOF() {
         memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::StoreCurrentDOF() {
     // save forward transformation
@@ -116,7 +111,6 @@ void reg_optimiser<T>::StoreCurrentDOF() {
         memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Perturbation(float length) {
     // initialise the randomiser
@@ -136,7 +130,6 @@ void reg_optimiser<T>::Perturbation(float length) {
     this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Optimise(T maxLength,
                                 T smallLength,
@@ -175,7 +168,7 @@ void reg_optimiser<T>::Optimise(T maxLength,
             addedLength += currentLength;
             // Increase the step size
             currentLength *= 1.1f;
-            currentLength = (currentLength < maxLength) ? currentLength : maxLength;
+            currentLength = std::min(currentLength, static_cast<float>(maxLength));
             // Save the current deformation parametrisation
             this->StoreCurrentDOF();
         } else {
@@ -199,13 +192,11 @@ void reg_optimiser<T>::Optimise(T maxLength,
     this->RestoreBestDOF();
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::reg_test_optimiser() {
     this->objFunc->UpdateParameters(1.f);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
     this->array1 = nullptr;
@@ -218,7 +209,6 @@ reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimis
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 reg_conjugateGradient<T>::~reg_conjugateGradient() {
     if (this->array1 != nullptr)
@@ -242,7 +232,6 @@ reg_conjugateGradient<T>::~reg_conjugateGradient() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Initialise(size_t nvox,
                                           int dim,
@@ -288,7 +277,6 @@ void reg_conjugateGradient<T>::Initialise(size_t nvox,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::UpdateGradientValues() {
 #ifdef WIN32
@@ -314,7 +302,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         reg_print_msg_debug("Conjugate gradient initialisation");
 #endif
         // first conjugate gradient iteration
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr) \
     private(i)
@@ -323,7 +311,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
             array2Ptr[i] = array1Ptr[i] = -gradientPtr[i];
         }
         if (this->dofNumber_b > 0) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
     private(i)
@@ -338,7 +326,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         reg_print_msg_debug("Conjugate gradient update");
 #endif
         double dgg = 0, gg = 0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr) \
     private(i) \
@@ -353,7 +341,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 
         if (this->dofNumber_b > 0) {
             double dgg_b = 0, gg_b = 0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
     private(i) \
@@ -366,7 +354,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
             }
             gam = (dgg + dgg_b) / (gg + gg_b);
         }
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \
     private(i)
@@ -377,7 +365,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
             gradientPtr[i] = -array2Ptr[i];
         }
         if (this->dofNumber_b > 0) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \
     private(i)
@@ -389,10 +377,8 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
             }
         }
     }
-    return;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Optimise(T maxLength,
                                         T smallLength,
@@ -403,21 +389,18 @@ void reg_conjugateGradient<T>::Optimise(T maxLength,
                                startLength);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Perturbation(float length) {
     reg_optimiser<T>::Perturbation(length);
     this->firstcall = true;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::reg_test_optimiser() {
     this->UpdateGradientValues();
     reg_optimiser<T>::reg_test_optimiser();
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 reg_lbfgs<T>::reg_lbfgs()
     :reg_optimiser<T>::reg_optimiser() {
@@ -428,7 +411,6 @@ reg_lbfgs<T>::reg_lbfgs()
     this->diffGrad = nullptr;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 reg_lbfgs<T>::~reg_lbfgs() {
     if (this->oldDOF != nullptr)
@@ -453,7 +435,6 @@ reg_lbfgs<T>::~reg_lbfgs() {
     this->diffGrad = nullptr;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_lbfgs<T>::Initialise(size_t nvox,
                               int dim,
@@ -502,13 +483,11 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_lbfgs<T>::UpdateGradientValues() {
 
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class T>
 void reg_lbfgs<T>::Optimise(T maxLength,
                             T smallLength,
@@ -519,7 +498,6 @@ void reg_lbfgs<T>::Optimise(T maxLength,
                                startLength);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 //template class reg_optimiser<float>;
 //template class reg_conjugateGradient<float>;
 //template class reg_lbfgs<float>;
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index c0b7092e..d15b1365 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -10,7 +10,6 @@
 #include <stdio.h>
 #include <time.h>
 
-/* *************************************************************** */
 /* *************************************************************** */
 /** @brief Interface between the registration class and the optimiser
  */
@@ -22,15 +21,8 @@ class InterfaceOptimiser {
     virtual void UpdateParameters(float) = 0;
     /// @brief The best objective function values are stored
     virtual void UpdateBestObjFunctionValue() = 0;
-
-protected:
-    /// @brief Interface constructor
-    InterfaceOptimiser() {}
-    /// @brief Interface destructor
-    virtual ~InterfaceOptimiser() {}
 };
 /* *************************************************************** */
-/* *************************************************************** */
 /** @class reg_optimiser
  * @brief Standard gradient ascent optimisation
  */
@@ -146,7 +138,6 @@ class reg_optimiser {
     virtual void reg_test_optimiser();
 };
 /* *************************************************************** */
-/* *************************************************************** */
 /** @class reg_conjugateGradient
  * @brief Conjugate gradient ascent optimisation
  */
@@ -186,7 +177,6 @@ class reg_conjugateGradient: public reg_optimiser<T> {
     virtual void reg_test_optimiser() override;
 };
 /* *************************************************************** */
-/* *************************************************************** */
 /** @class Global optimisation class
  * @brief
  */
@@ -221,5 +211,4 @@ class reg_lbfgs: public reg_optimiser<T> {
     virtual void UpdateGradientValues() override;
 };
 /* *************************************************************** */
-/* *************************************************************** */
 #include "_reg_optimiser.cpp"
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 5835c229..83abc996 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -110,7 +110,7 @@ void interpNearestNeighKernel(double relative, double *basis)
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
                                       void **originalFloatingData,
                                       int *dtIndicies)
@@ -136,31 +136,31 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
         const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
 #endif
 
-        *originalFloatingData=malloc(floatingImage->nvox*sizeof(DTYPE));
+        *originalFloatingData=malloc(floatingImage->nvox*sizeof(DataType));
         memcpy(*originalFloatingData,
                floatingImage->data,
-               floatingImage->nvox*sizeof(DTYPE));
+               floatingImage->nvox*sizeof(DataType));
 #ifndef NDEBUG
         reg_print_msg_debug("The floating image data has been copied");
 #endif
 
         /* As the tensor has 6 unique components that we need to worry about, read them out
       for the floating image. */
-        DTYPE *firstVox = static_cast<DTYPE *>(floatingImage->data);
+        DataType *firstVox = static_cast<DataType *>(floatingImage->data);
         // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
-        DTYPE *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]];
-        DTYPE *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]];
-        DTYPE *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]];
-        DTYPE *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]];
-        DTYPE *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]];
-        DTYPE *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]];
+        DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]];
+        DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]];
+        DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]];
+        DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]];
+        DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]];
+        DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]];
 
 
         // Should log the tensor up front
         // We need to take the logarithm of the tensor for each voxel in the floating intensity
         // image, and replace the warped
         int tid=0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
         mat33 diffTensor[16];
         int max_thread_number = omp_get_max_threads();
         if(max_thread_number>16) omp_set_num_threads(16);
@@ -174,7 +174,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
 #endif
         for(floatingIndex=0; floatingIndex<floatingVoxelNumber; ++floatingIndex)
         {
-#if defined (_OPENMP)
+#ifdef _OPENMP
             tid=omp_get_thread_num();
 #endif
             // Fill a mat44 with the tensor components
@@ -192,14 +192,14 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
             reg_mat33_logm(&diffTensor[tid]);
 
             // Write this out as a new image
-            floatingIntensityXX[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[0][0]);
-            floatingIntensityXY[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[0][1]);
-            floatingIntensityYY[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[1][1]);
-            floatingIntensityXZ[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[0][2]);
-            floatingIntensityYZ[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[1][2]);
-            floatingIntensityZZ[floatingIndex] = static_cast<DTYPE>(diffTensor[tid].m[2][2]);
+            floatingIntensityXX[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[0][0]);
+            floatingIntensityXY[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[0][1]);
+            floatingIntensityYY[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[1][1]);
+            floatingIntensityXZ[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[0][2]);
+            floatingIntensityYZ[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[1][2]);
+            floatingIntensityZZ[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[2][2]);
         }
-#if defined (_OPENMP)
+#ifdef _OPENMP
         omp_set_num_threads(max_thread_number);
 #endif
 #ifndef NDEBUG
@@ -208,7 +208,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
     }
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                                        int *mask,
                                        mat33 *jacMat,
@@ -226,10 +226,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
         size_t warpedIndex;
         const size_t voxelNumber = CalcVoxelNumber(*inputImage);
 #endif
-        DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ;
+        DataType *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ;
         if(warpedImage!=nullptr)
         {
-            warpVox = static_cast<DTYPE *>(warpedImage->data);
+            warpVox = static_cast<DataType *>(warpedImage->data);
             // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
             warpedXX = &warpVox[voxelNumber*dtIndicies[0]];
             warpedXY = &warpVox[voxelNumber*dtIndicies[1]];
@@ -245,19 +245,19 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
             /* As the tensor has 6 unique components that we need to worry about, read them out
          for the warped image. */
             // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
-            DTYPE *firstWarpVox = static_cast<DTYPE *>(inputImage->data);
-            DTYPE *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)];
-            DTYPE *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)];
-            DTYPE *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)];
-            DTYPE *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)];
-            DTYPE *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)];
-            DTYPE *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)];
+            DataType *firstWarpVox = static_cast<DataType *>(inputImage->data);
+            DataType *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)];
+            DataType *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)];
+            DataType *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)];
+            DataType *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)];
+            DataType *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)];
+            DataType *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)];
 
             // Step through each voxel in the warped image
             double testSum=0;
             int col, row;
             int tid=0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
             mat33 inputTensor[16], warpedTensor[16], RotMat[16], RotMatT[16];
             int max_thread_number = omp_get_max_threads();
             if(max_thread_number>16) omp_set_num_threads(16);
@@ -272,7 +272,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
 #endif
             for(warpedIndex=0; warpedIndex<voxelNumber; ++warpedIndex)
             {
-#if defined (_OPENMP)
+#ifdef _OPENMP
                 tid=omp_get_thread_num();
 #endif
                 if(mask[warpedIndex]>-1)
@@ -325,25 +325,25 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                         inputTensor[tid] = nifti_mat33_mul(nifti_mat33_mul(RotMatT[tid], inputTensor[tid]), RotMat[tid]);
 
                         // Finally, read the tensor back out as a warped image
-                        inputIntensityXX[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[0][0]);
-                        inputIntensityYY[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[1][1]);
-                        inputIntensityZZ[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[2][2]);
-                        inputIntensityXY[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[0][1]);
-                        inputIntensityXZ[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[0][2]);
-                        inputIntensityYZ[warpedIndex] = static_cast<DTYPE>(inputTensor[tid].m[1][2]);
+                        inputIntensityXX[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[0][0]);
+                        inputIntensityYY[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[1][1]);
+                        inputIntensityZZ[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[2][2]);
+                        inputIntensityXY[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[0][1]);
+                        inputIntensityXZ[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[0][2]);
+                        inputIntensityYZ[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[1][2]);
                     }
                     else
                     {
-                        inputIntensityXX[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
-                        inputIntensityYY[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
-                        inputIntensityZZ[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
-                        inputIntensityXY[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
-                        inputIntensityXZ[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
-                        inputIntensityYZ[warpedIndex] = std::numeric_limits<DTYPE>::quiet_NaN();
+                        inputIntensityXX[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
+                        inputIntensityYY[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
+                        inputIntensityZZ[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
+                        inputIntensityXY[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
+                        inputIntensityXZ[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
+                        inputIntensityYZ[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
                     }
                 }
             }
-#if defined (_OPENMP)
+#ifdef _OPENMP
             omp_set_num_threads(max_thread_number);
 #endif
         }
@@ -428,7 +428,7 @@ void ResampleImage3D(nifti_image *floatingImage,
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3];
         double xTempNewValue, yTempNewValue, intensity;
         float world[3], position[3];
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \
     a, b, c, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue) \
@@ -630,7 +630,7 @@ void ResampleImage2D(nifti_image *floatingImage,
         double xTempNewValue, intensity;
         float world[3] = {0, 0, 0};
         float position[3] = {0, 0, 0};
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, intensity, world, position, previous, xBasis, yBasis, relative, \
     a, b, Y, xyzPointer, xTempNewValue) \
@@ -1116,7 +1116,7 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
         size_t currentIndex;
 
         /*
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(intensity, psfWeightSum, psfWeight, \
     currentA, currentB, currentC, psfWorld, position,  shiftSamp,\
@@ -2032,7 +2032,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_bilinearResampleGradient(nifti_image *floatingImage,
                                   nifti_image *warpedImage,
                                   nifti_image *deformationField,
@@ -2040,12 +2040,12 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
 {
     const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
     const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    DTYPE *floatingIntensityX = static_cast<DTYPE *>(floatingImage->data);
-    DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
-    DTYPE *warpedIntensityX = static_cast<DTYPE *>(warpedImage->data);
-    DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
-    DTYPE *deformationFieldPtrX = static_cast<DTYPE *>(deformationField->data);
-    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)];
+    DataType *floatingIntensityX = static_cast<DataType *>(floatingImage->data);
+    DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
+    DataType *warpedIntensityX = static_cast<DataType *>(warpedImage->data);
+    DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
+    DataType *deformationFieldPtrX = static_cast<DataType *>(deformationField->data);
+    DataType *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)];
 
     // Extract the relevant affine matrix
     mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
@@ -2069,15 +2069,15 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
 
     // Some useful variables
     mat33 jacMat;
-    DTYPE defX,defY;
-    DTYPE basisX[2], basisY[2], deriv[2], basis[2];
-    DTYPE xFloCoord,yFloCoord;
+    DataType defX,defY;
+    DataType basisX[2], basisY[2], deriv[2], basis[2];
+    DataType xFloCoord,yFloCoord;
     int anteIntX[2],anteIntY[2];
     int x,y,a,b,defIndex,floIndex,warpedIndex;
-    DTYPE val_x,val_y,weight[2];
+    DataType val_x,val_y,weight[2];
 
     // Loop over all voxel
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(x,y,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \
     anteIntX,anteIntY,xFloCoord,yFloCoord, \
@@ -2118,8 +2118,8 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
             anteIntY[1]=static_cast<int>(reg_ceil(yFloCoord));
             val_x=0;
             val_y=0;
-            basisX[1]=fabs(xFloCoord-(DTYPE)anteIntX[0]);
-            basisY[1]=fabs(yFloCoord-(DTYPE)anteIntY[0]);
+            basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]);
+            basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]);
             basisX[0]=1.0-basisX[1];
             basisY[0]=1.0-basisY[1];
             for(b=0; b<2; ++b)
@@ -2213,7 +2213,7 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
     } // y
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_trilinearResampleGradient(nifti_image *floatingImage,
                                    nifti_image *warpedImage,
                                    nifti_image *deformationField,
@@ -2222,15 +2222,15 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
     const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
     const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
     const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-    DTYPE *floatingIntensityX = static_cast<DTYPE *>(floatingImage->data);
-    DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
-    DTYPE *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber];
-    DTYPE *warpedIntensityX = static_cast<DTYPE *>(warpedImage->data);
-    DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
-    DTYPE *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber];
-    DTYPE *deformationFieldPtrX = static_cast<DTYPE *>(deformationField->data);
-    DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber];
-    DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber];
+    DataType *floatingIntensityX = static_cast<DataType *>(floatingImage->data);
+    DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
+    DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber];
+    DataType *warpedIntensityX = static_cast<DataType *>(warpedImage->data);
+    DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
+    DataType *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber];
+    DataType *deformationFieldPtrX = static_cast<DataType *>(deformationField->data);
+    DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber];
+    DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber];
 
     // Extract the relevant affine matrix
     mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
@@ -2255,15 +2255,15 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
 
     // Some useful variables
     mat33 jacMat;
-    DTYPE defX,defY,defZ;
-    DTYPE basisX[2], basisY[2], basisZ[2], deriv[2], basis[2];
-    DTYPE xFloCoord,yFloCoord,zFloCoord;
+    DataType defX,defY,defZ;
+    DataType basisX[2], basisY[2], basisZ[2], deriv[2], basis[2];
+    DataType xFloCoord,yFloCoord,zFloCoord;
     int anteIntX[2],anteIntY[2],anteIntZ[2];
     int x,y,z,a,b,c,defIndex,floIndex,warpedIndex;
-    DTYPE val_x,val_y,val_z,weight[3];
+    DataType val_x,val_y,val_z,weight[3];
 
     // Loop over all voxel
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(x,y,z,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \
     anteIntX,anteIntY,anteIntZ,xFloCoord,yFloCoord,zFloCoord, \
@@ -2318,9 +2318,9 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
                 val_x=0;
                 val_y=0;
                 val_z=0;
-                basisX[1]=fabs(xFloCoord-(DTYPE)anteIntX[0]);
-                basisY[1]=fabs(yFloCoord-(DTYPE)anteIntY[0]);
-                basisZ[1]=fabs(zFloCoord-(DTYPE)anteIntZ[0]);
+                basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]);
+                basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]);
+                basisZ[1]=fabs(zFloCoord-(DataType)anteIntZ[0]);
                 basisX[0]=1.0-basisX[1];
                 basisY[0]=1.0-basisY[1];
                 basisZ[0]=1.0-basisZ[1];
@@ -2573,7 +2573,7 @@ void TrilinearImageGradient(nifti_image *floatingImage,
     FieldTYPE relative, world[3], grad[3], coeff;
     FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
     FloatingTYPE *zPointer, *xyzPointer;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \
     a, b, c, X, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \
@@ -2769,7 +2769,7 @@ void BilinearImageGradient(nifti_image *floatingImage,
     int previous[3], a, b, X, Y;
     FloatingTYPE *xyPointer;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, world, position, previous, xBasis, yBasis, relative, grad, coeff, \
     a, b, X, Y, xyPointer, xTempNewValue, yTempNewValue) \
@@ -2901,7 +2901,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
     FieldTYPE coeff, position[3], world[3], grad[3];
     FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
     FloatingTYPE *zPointer, *yzPointer, *xyzPointer;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \
     a, b, c, Y, Z, zPointer, yzPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \
@@ -3061,7 +3061,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage,
     FieldTYPE coeff, position[3], world[3], grad[2];
     FieldTYPE xTempNewValue, yTempNewValue;
     FloatingTYPE *yPointer, *xyPointer;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \
     a, b, Y, yPointer, xyPointer, xTempNewValue, yTempNewValue) \
@@ -3376,7 +3376,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 void reg_getImageGradient_symDiff_core(nifti_image *img,
                                        nifti_image *gradImg,
                                        int *mask,
@@ -3388,18 +3388,18 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
     int dimImg = img->nz > 1 ? 3 : 2;
     int x, y, z;
 
-    DTYPE *imgPtr = static_cast<DTYPE *>(img->data);
-    DTYPE *currentImgPtr = &imgPtr[timepoint*voxelNumber];
+    DataType *imgPtr = static_cast<DataType *>(img->data);
+    DataType *currentImgPtr = &imgPtr[timepoint*voxelNumber];
 
-    DTYPE *gradPtrX = static_cast<DTYPE *>(gradImg->data);
-    DTYPE *gradPtrY = &gradPtrX[voxelNumber];
-    DTYPE *gradPtrZ = nullptr;
+    DataType *gradPtrX = static_cast<DataType *>(gradImg->data);
+    DataType *gradPtrY = &gradPtrX[voxelNumber];
+    DataType *gradPtrZ = nullptr;
     if(dimImg==3)
         gradPtrZ = &gradPtrY[voxelNumber];
 
-    DTYPE valX, valY, valZ, pre, post;
+    DataType valX, valY, valZ, pre, post;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(img, currentImgPtr, mask, \
     gradPtrX, gradPtrY, gradPtrZ, padding_value) \
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index ea63b3b4..911c5487 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -14,66 +14,66 @@
 #include "_reg_splineBasis.h"
 
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis, DTYPE *values)
+template<class DataType>
+void get_BSplineBasisValues(DataType basis, DataType *values)
 {
-   DTYPE FF= basis*basis;
-   DTYPE FFF= FF*basis;
-   DTYPE MF=static_cast<DTYPE>(1.0-basis);
-   values[0] = static_cast<DTYPE>((MF)*(MF)*(MF)/(6.0));
-   values[1] = static_cast<DTYPE>((3.0*FFF - 6.0*FF + 4.0)/6.0);
-   values[2] = static_cast<DTYPE>((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0);
-   values[3] = static_cast<DTYPE>(FFF/6.0);
+   DataType FF= basis*basis;
+   DataType FFF= FF*basis;
+   DataType MF=static_cast<DataType>(1.0-basis);
+   values[0] = static_cast<DataType>((MF)*(MF)*(MF)/(6.0));
+   values[1] = static_cast<DataType>((3.0*FFF - 6.0*FF + 4.0)/6.0);
+   values[2] = static_cast<DataType>((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0);
+   values[3] = static_cast<DataType>(FFF/6.0);
 }
 template void get_BSplineBasisValues<float>(float, float *);
 template void get_BSplineBasisValues<double>(double, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first)
+template<class DataType>
+void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first)
 {
-   get_BSplineBasisValues<DTYPE>(basis, values);
-   first[3]= static_cast<DTYPE>(basis * basis / 2.0);
-   first[0]= static_cast<DTYPE>(basis - 1.0/2.0 - first[3]);
-   first[2]= static_cast<DTYPE>(1.0 + first[0] - 2.0*first[3]);
+   get_BSplineBasisValues<DataType>(basis, values);
+   first[3]= static_cast<DataType>(basis * basis / 2.0);
+   first[0]= static_cast<DataType>(basis - 1.0/2.0 - first[3]);
+   first[2]= static_cast<DataType>(1.0 + first[0] - 2.0*first[3]);
    first[1]= - first[0] - first[2] - first[3];
 }
 template void get_BSplineBasisValues<float>(float, float *, float *);
 template void get_BSplineBasisValues<double>(double, double *, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first, DTYPE *second)
+template<class DataType>
+void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second)
 {
-   get_BSplineBasisValues<DTYPE>(basis, values, first);
+   get_BSplineBasisValues<DataType>(basis, values, first);
    second[3]= basis;
-   second[0]= static_cast<DTYPE>(1.0 - second[3]);
-   second[2]= static_cast<DTYPE>(second[0] - 2.0*second[3]);
+   second[0]= static_cast<DataType>(1.0 - second[3]);
+   second[2]= static_cast<DataType>(second[0] - 2.0*second[3]);
    second[1]= - second[0] - second[2] - second[3];
 }
 template void get_BSplineBasisValues<float>(float, float *, float *, float *);
 template void get_BSplineBasisValues<double>(double, double *, double *, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value)
+template<class DataType>
+void get_BSplineBasisValue(DataType basis, int index, DataType &value)
 {
    switch(index)
    {
    case 0:
-      value = (DTYPE)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0);
+      value = (DataType)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0);
       break;
    case 1:
-      value = (DTYPE)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0);
+      value = (DataType)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0);
       break;
    case 2:
-      value = (DTYPE)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0);
+      value = (DataType)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0);
       break;
    case 3:
-      value = (DTYPE)(basis*basis*basis/6.0);
+      value = (DataType)(basis*basis*basis/6.0);
       break;
    default:
-      value = (DTYPE)0;
+      value = (DataType)0;
       break;
    }
 }
@@ -81,26 +81,26 @@ template void get_BSplineBasisValue<float>(float, int, float &);
 template void get_BSplineBasisValue<double>(double, int, double &);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value, DTYPE &first)
+template<class DataType>
+void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first)
 {
-   get_BSplineBasisValue<DTYPE>(basis, index, value);
+   get_BSplineBasisValue<DataType>(basis, index, value);
    switch(index)
    {
    case 0:
-      first = (DTYPE)((2.0*basis - basis*basis - 1.0)/2.0);
+      first = (DataType)((2.0*basis - basis*basis - 1.0)/2.0);
       break;
    case 1:
-      first = (DTYPE)((3.0*basis*basis - 4.0*basis)/2.0);
+      first = (DataType)((3.0*basis*basis - 4.0*basis)/2.0);
       break;
    case 2:
-      first = (DTYPE)((2.0*basis - 3.0*basis*basis + 1.0)/2.0);
+      first = (DataType)((2.0*basis - 3.0*basis*basis + 1.0)/2.0);
       break;
    case 3:
-      first = (DTYPE)(basis*basis/2.0);
+      first = (DataType)(basis*basis/2.0);
       break;
    default:
-      first = (DTYPE)0;
+      first = (DataType)0;
       break;
    }
 }
@@ -108,26 +108,26 @@ template void get_BSplineBasisValue<float>(float, int, float &, float &);
 template void get_BSplineBasisValue<double>(double, int, double &, double &);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value, DTYPE &first, DTYPE &second)
+template<class DataType>
+void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first, DataType &second)
 {
-   get_BSplineBasisValue<DTYPE>(basis, index, value, first);
+   get_BSplineBasisValue<DataType>(basis, index, value, first);
    switch(index)
    {
    case 0:
-      second = (DTYPE)(1.0 - basis);
+      second = (DataType)(1.0 - basis);
       break;
    case 1:
-      second = (DTYPE)(3.0*basis -2.0);
+      second = (DataType)(3.0*basis -2.0);
       break;
    case 2:
-      second = (DTYPE)(1.0 - 3.0*basis);
+      second = (DataType)(1.0 - 3.0*basis);
       break;
    case 3:
-      second = (DTYPE)(basis);
+      second = (DataType)(basis);
       break;
    default:
-      second = (DTYPE)0;
+      second = (DataType)0;
       break;
    }
 }
@@ -135,48 +135,48 @@ template void get_BSplineBasisValue<float>(float, int, float &, float &, float &
 template void get_BSplineBasisValue<double>(double, int, double &, double &, double &);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis, DTYPE *values)
+template<class DataType>
+void get_SplineBasisValues(DataType basis, DataType *values)
 {
-   DTYPE FF= basis*basis;
-   values[0] = static_cast<DTYPE>((basis * ((2.0-basis)*basis - 1.0))/2.0);
-   values[1] = static_cast<DTYPE>((FF * (3.0*basis-5.0) + 2.0)/2.0);
-   values[2] = static_cast<DTYPE>((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0);
-   values[3] = static_cast<DTYPE>((basis-1.0) * FF/2.0);
+   DataType FF= basis*basis;
+   values[0] = static_cast<DataType>((basis * ((2.0-basis)*basis - 1.0))/2.0);
+   values[1] = static_cast<DataType>((FF * (3.0*basis-5.0) + 2.0)/2.0);
+   values[2] = static_cast<DataType>((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0);
+   values[3] = static_cast<DataType>((basis-1.0) * FF/2.0);
 }
 template void get_SplineBasisValues<float>(float, float *);
 template void get_SplineBasisValues<double>(double, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first)
+template<class DataType>
+void get_SplineBasisValues(DataType basis, DataType *values, DataType *first)
 {
-   get_SplineBasisValues<DTYPE>(basis,values);
-   DTYPE FF= basis*basis;
-   first[0] = static_cast<DTYPE>((4.0*basis - 3.0*FF - 1.0)/2.0);
-   first[1] = static_cast<DTYPE>((9.0*basis - 10.0) * basis/2.0);
-   first[2] = static_cast<DTYPE>((8.0*basis - 9.0*FF + 1.0)/2.0);
-   first[3] = static_cast<DTYPE>((3.0*basis - 2.0) * basis/2.0);
+   get_SplineBasisValues<DataType>(basis,values);
+   DataType FF= basis*basis;
+   first[0] = static_cast<DataType>((4.0*basis - 3.0*FF - 1.0)/2.0);
+   first[1] = static_cast<DataType>((9.0*basis - 10.0) * basis/2.0);
+   first[2] = static_cast<DataType>((8.0*basis - 9.0*FF + 1.0)/2.0);
+   first[3] = static_cast<DataType>((3.0*basis - 2.0) * basis/2.0);
 }
 template void get_SplineBasisValues<float>(float, float *, float *);
 template void get_SplineBasisValues<double>(double, double *, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first, DTYPE *second)
+template<class DataType>
+void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second)
 {
-   get_SplineBasisValues<DTYPE>(basis, values, first);
-   second[0] = static_cast<DTYPE>(2.0 - 3.0*basis);
-   second[1] = static_cast<DTYPE>(9.0*basis - 5.0);
-   second[2] = static_cast<DTYPE>(4.0 - 9.0*basis);
-   second[3] = static_cast<DTYPE>(3.0*basis - 1.0);
+   get_SplineBasisValues<DataType>(basis, values, first);
+   second[0] = static_cast<DataType>(2.0 - 3.0*basis);
+   second[1] = static_cast<DataType>(9.0*basis - 5.0);
+   second[2] = static_cast<DataType>(4.0 - 9.0*basis);
+   second[3] = static_cast<DataType>(3.0*basis - 1.0);
 }
 template void get_SplineBasisValues<float>(float, float *, float *, float *);
 template void get_SplineBasisValues<double>(double, double *, double *, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
-void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY)
+template <class DataType>
+void set_first_order_basis_values(DataType *basisX, DataType *basisY)
 {
    double BASIS[4], FIRST[4];get_BSplineBasisValues<double>(0, BASIS, FIRST);
    int index=0;
@@ -191,96 +191,96 @@ void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY)
 template void set_first_order_basis_values<float>(float *, float *);
 template void set_first_order_basis_values<double>(double *, double *);
 /* *************************************************************** */
-template <class DTYPE>
-void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY, DTYPE *basisZ)
+template <class DataType>
+void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ)
 {
-      basisX[0]=static_cast<DTYPE>(-0.0138889);
-      basisY[0]=static_cast<DTYPE>(-0.0138889);
-      basisZ[0]=static_cast<DTYPE>(-0.0138889);
-      basisX[1]=static_cast<DTYPE>(0);
-      basisY[1]=static_cast<DTYPE>(-0.0555556);
-      basisZ[1]=static_cast<DTYPE>(-0.0555556);
-      basisX[2]=static_cast<DTYPE>(0.0138889);
-      basisY[2]=static_cast<DTYPE>(-0.0138889);
-      basisZ[2]=static_cast<DTYPE>(-0.0138889);
-      basisX[3]=static_cast<DTYPE>(-0.0555556);
-      basisY[3]=static_cast<DTYPE>(0);
-      basisZ[3]=static_cast<DTYPE>(-0.0555556);
-      basisX[4]=static_cast<DTYPE>(0);
-      basisY[4]=static_cast<DTYPE>(0);
-      basisZ[4]=static_cast<DTYPE>(-0.222222);
-      basisX[5]=static_cast<DTYPE>(0.0555556);
-      basisY[5]=static_cast<DTYPE>(0);
-      basisZ[5]=static_cast<DTYPE>(-0.0555556);
-      basisX[6]=static_cast<DTYPE>(-0.0138889);
-      basisY[6]=static_cast<DTYPE>(0.0138889);
-      basisZ[6]=static_cast<DTYPE>(-0.0138889);
-      basisX[7]=static_cast<DTYPE>(0);
-      basisY[7]=static_cast<DTYPE>(0.0555556);
-      basisZ[7]=static_cast<DTYPE>(-0.0555556);
-      basisX[8]=static_cast<DTYPE>(0.0138889);
-      basisY[8]=static_cast<DTYPE>(0.0138889);
-      basisZ[8]=static_cast<DTYPE>(-0.0138889);
-      basisX[9]=static_cast<DTYPE>(-0.0555556);
-      basisY[9]=static_cast<DTYPE>(-0.0555556);
-      basisZ[9]=static_cast<DTYPE>(0);
-      basisX[10]=static_cast<DTYPE>(0);
-      basisY[10]=static_cast<DTYPE>(-0.222222);
-      basisZ[10]=static_cast<DTYPE>(0);
-      basisX[11]=static_cast<DTYPE>(0.0555556);
-      basisY[11]=static_cast<DTYPE>(-0.0555556);
-      basisZ[11]=static_cast<DTYPE>(0);
-      basisX[12]=static_cast<DTYPE>(-0.222222);
-      basisY[12]=static_cast<DTYPE>(0);
-      basisZ[12]=static_cast<DTYPE>(0);
-      basisX[13]=static_cast<DTYPE>(0);
-      basisY[13]=static_cast<DTYPE>(0);
-      basisZ[13]=static_cast<DTYPE>(0);
-      basisX[14]=static_cast<DTYPE>(0.222222);
-      basisY[14]=static_cast<DTYPE>(0);
-      basisZ[14]=static_cast<DTYPE>(0);
-      basisX[15]=static_cast<DTYPE>(-0.0555556);
-      basisY[15]=static_cast<DTYPE>(0.0555556);
-      basisZ[15]=static_cast<DTYPE>(0);
-      basisX[16]=static_cast<DTYPE>(0);
-      basisY[16]=static_cast<DTYPE>(0.222222);
-      basisZ[16]=static_cast<DTYPE>(0);
-      basisX[17]=static_cast<DTYPE>(0.0555556);
-      basisY[17]=static_cast<DTYPE>(0.0555556);
-      basisZ[17]=static_cast<DTYPE>(0);
-      basisX[18]=static_cast<DTYPE>(-0.0138889);
-      basisY[18]=static_cast<DTYPE>(-0.0138889);
-      basisZ[18]=static_cast<DTYPE>(0.0138889);
-      basisX[19]=static_cast<DTYPE>(0);
-      basisY[19]=static_cast<DTYPE>(-0.0555556);
-      basisZ[19]=static_cast<DTYPE>(0.0555556);
-      basisX[20]=static_cast<DTYPE>(0.0138889);
-      basisY[20]=static_cast<DTYPE>(-0.0138889);
-      basisZ[20]=static_cast<DTYPE>(0.0138889);
-      basisX[21]=static_cast<DTYPE>(-0.0555556);
-      basisY[21]=static_cast<DTYPE>(0);
-      basisZ[21]=static_cast<DTYPE>(0.0555556);
-      basisX[22]=static_cast<DTYPE>(0);
-      basisY[22]=static_cast<DTYPE>(0);
-      basisZ[22]=static_cast<DTYPE>(0.222222);
-      basisX[23]=static_cast<DTYPE>(0.0555556);
-      basisY[23]=static_cast<DTYPE>(0);
-      basisZ[23]=static_cast<DTYPE>(0.0555556);
-      basisX[24]=static_cast<DTYPE>(-0.0138889);
-      basisY[24]=static_cast<DTYPE>(0.0138889);
-      basisZ[24]=static_cast<DTYPE>(0.0138889);
-      basisX[25]=static_cast<DTYPE>(0);
-      basisY[25]=static_cast<DTYPE>(0.0555556);
-      basisZ[25]=static_cast<DTYPE>(0.0555556);
-      basisX[26]=static_cast<DTYPE>(0.0138889);
-      basisY[26]=static_cast<DTYPE>(0.0138889);
-      basisZ[26]=static_cast<DTYPE>(0.0138889);
+      basisX[0]=static_cast<DataType>(-0.0138889);
+      basisY[0]=static_cast<DataType>(-0.0138889);
+      basisZ[0]=static_cast<DataType>(-0.0138889);
+      basisX[1]=static_cast<DataType>(0);
+      basisY[1]=static_cast<DataType>(-0.0555556);
+      basisZ[1]=static_cast<DataType>(-0.0555556);
+      basisX[2]=static_cast<DataType>(0.0138889);
+      basisY[2]=static_cast<DataType>(-0.0138889);
+      basisZ[2]=static_cast<DataType>(-0.0138889);
+      basisX[3]=static_cast<DataType>(-0.0555556);
+      basisY[3]=static_cast<DataType>(0);
+      basisZ[3]=static_cast<DataType>(-0.0555556);
+      basisX[4]=static_cast<DataType>(0);
+      basisY[4]=static_cast<DataType>(0);
+      basisZ[4]=static_cast<DataType>(-0.222222);
+      basisX[5]=static_cast<DataType>(0.0555556);
+      basisY[5]=static_cast<DataType>(0);
+      basisZ[5]=static_cast<DataType>(-0.0555556);
+      basisX[6]=static_cast<DataType>(-0.0138889);
+      basisY[6]=static_cast<DataType>(0.0138889);
+      basisZ[6]=static_cast<DataType>(-0.0138889);
+      basisX[7]=static_cast<DataType>(0);
+      basisY[7]=static_cast<DataType>(0.0555556);
+      basisZ[7]=static_cast<DataType>(-0.0555556);
+      basisX[8]=static_cast<DataType>(0.0138889);
+      basisY[8]=static_cast<DataType>(0.0138889);
+      basisZ[8]=static_cast<DataType>(-0.0138889);
+      basisX[9]=static_cast<DataType>(-0.0555556);
+      basisY[9]=static_cast<DataType>(-0.0555556);
+      basisZ[9]=static_cast<DataType>(0);
+      basisX[10]=static_cast<DataType>(0);
+      basisY[10]=static_cast<DataType>(-0.222222);
+      basisZ[10]=static_cast<DataType>(0);
+      basisX[11]=static_cast<DataType>(0.0555556);
+      basisY[11]=static_cast<DataType>(-0.0555556);
+      basisZ[11]=static_cast<DataType>(0);
+      basisX[12]=static_cast<DataType>(-0.222222);
+      basisY[12]=static_cast<DataType>(0);
+      basisZ[12]=static_cast<DataType>(0);
+      basisX[13]=static_cast<DataType>(0);
+      basisY[13]=static_cast<DataType>(0);
+      basisZ[13]=static_cast<DataType>(0);
+      basisX[14]=static_cast<DataType>(0.222222);
+      basisY[14]=static_cast<DataType>(0);
+      basisZ[14]=static_cast<DataType>(0);
+      basisX[15]=static_cast<DataType>(-0.0555556);
+      basisY[15]=static_cast<DataType>(0.0555556);
+      basisZ[15]=static_cast<DataType>(0);
+      basisX[16]=static_cast<DataType>(0);
+      basisY[16]=static_cast<DataType>(0.222222);
+      basisZ[16]=static_cast<DataType>(0);
+      basisX[17]=static_cast<DataType>(0.0555556);
+      basisY[17]=static_cast<DataType>(0.0555556);
+      basisZ[17]=static_cast<DataType>(0);
+      basisX[18]=static_cast<DataType>(-0.0138889);
+      basisY[18]=static_cast<DataType>(-0.0138889);
+      basisZ[18]=static_cast<DataType>(0.0138889);
+      basisX[19]=static_cast<DataType>(0);
+      basisY[19]=static_cast<DataType>(-0.0555556);
+      basisZ[19]=static_cast<DataType>(0.0555556);
+      basisX[20]=static_cast<DataType>(0.0138889);
+      basisY[20]=static_cast<DataType>(-0.0138889);
+      basisZ[20]=static_cast<DataType>(0.0138889);
+      basisX[21]=static_cast<DataType>(-0.0555556);
+      basisY[21]=static_cast<DataType>(0);
+      basisZ[21]=static_cast<DataType>(0.0555556);
+      basisX[22]=static_cast<DataType>(0);
+      basisY[22]=static_cast<DataType>(0);
+      basisZ[22]=static_cast<DataType>(0.222222);
+      basisX[23]=static_cast<DataType>(0.0555556);
+      basisY[23]=static_cast<DataType>(0);
+      basisZ[23]=static_cast<DataType>(0.0555556);
+      basisX[24]=static_cast<DataType>(-0.0138889);
+      basisY[24]=static_cast<DataType>(0.0138889);
+      basisZ[24]=static_cast<DataType>(0.0138889);
+      basisX[25]=static_cast<DataType>(0);
+      basisY[25]=static_cast<DataType>(0.0555556);
+      basisZ[25]=static_cast<DataType>(0.0555556);
+      basisX[26]=static_cast<DataType>(0.0138889);
+      basisY[26]=static_cast<DataType>(0.0138889);
+      basisZ[26]=static_cast<DataType>(0.0138889);
 }
 template void set_first_order_basis_values<float>(float *, float *, float *);
 template void set_first_order_basis_values<double>(double *, double *, double *);
 /* *************************************************************** */
-template <class DTYPE>
-void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE *basisXY)
+template <class DataType>
+void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY)
 {
    basisXX[0]=0.166667f;
    basisYY[0]=0.166667f;
@@ -313,8 +313,8 @@ void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE
 template void set_second_order_bspline_basis_values<float>(float *, float *, float *);
 template void set_second_order_bspline_basis_values<double>(double *, double *, double *);
 /* *************************************************************** */
-template <class DTYPE>
-void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE *basisZZ, DTYPE *basisXY, DTYPE *basisYZ, DTYPE *basisXZ)
+template <class DataType>
+void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ)
 {
    basisXX[0]=0.027778f;
    basisYY[0]=0.027778f;
@@ -483,13 +483,13 @@ template void set_second_order_bspline_basis_values<float>(float *, float *, flo
 template void set_second_order_bspline_basis_values<double>(double *, double *, double *, double *, double *, double *);
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
-void get_SlidedValues(DTYPE &defX,
-                      DTYPE &defY,
+template <class DataType>
+void get_SlidedValues(DataType &defX,
+                      DataType &defY,
                       int X,
                       int Y,
-                      DTYPE *defPtrX,
-                      DTYPE *defPtrY,
+                      DataType *defPtrX,
+                      DataType *defPtrY,
                       mat44 *df_voxel2Real,
                       int *dim,
                       bool displacement)
@@ -512,8 +512,8 @@ void get_SlidedValues(DTYPE &defX,
    {
       newY=dim[2]-1;
    }
-   DTYPE shiftValueX = 0;
-   DTYPE shiftValueY = 0;
+   DataType shiftValueX = 0;
+   DataType shiftValueY = 0;
    if(!displacement)
    {
       int shiftIndexX=X-newX;
@@ -532,16 +532,16 @@ float *, float *, mat44 *, int *, bool);
 template void get_SlidedValues<double>(double &, double &, int, int,
 double *, double *, mat44 *, int *, bool);
 /* *************************************************************** */
-template <class DTYPE>
-void get_SlidedValues(DTYPE &defX,
-                      DTYPE &defY,
-                      DTYPE &defZ,
+template <class DataType>
+void get_SlidedValues(DataType &defX,
+                      DataType &defY,
+                      DataType &defZ,
                       int X,
                       int Y,
                       int Z,
-                      DTYPE *defPtrX,
-                      DTYPE *defPtrY,
-                      DTYPE *defPtrZ,
+                      DataType *defPtrX,
+                      DataType *defPtrY,
+                      DataType *defPtrZ,
                       mat44 *df_voxel2Real,
                       int *dim,
                       bool displacement)
@@ -573,9 +573,9 @@ void get_SlidedValues(DTYPE &defX,
    {
       newZ=dim[3]-1;
    }
-   DTYPE shiftValueX=0;
-   DTYPE shiftValueY=0;
-   DTYPE shiftValueZ=0;
+   DataType shiftValueX=0;
+   DataType shiftValueY=0;
+   DataType shiftValueZ=0;
    if(!displacement)
    {
       int shiftIndexX=X-newX;
@@ -605,14 +605,14 @@ template void get_SlidedValues<double>(double &, double &, double &, int, int, i
 double *, double *, double *, mat44 *, int *, bool);
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     nifti_image *splineControlPoint,
-                    DTYPE *splineX,
-                    DTYPE *splineY,
-                    DTYPE *dispX,
-                    DTYPE *dispY,
+                    DataType *splineX,
+                    DataType *splineY,
+                    DataType *dispX,
+                    DataType *dispY,
                     bool approx,
                     bool displacement)
 
@@ -622,7 +622,7 @@ void get_GridValues(int startX,
 
    size_t index;
    size_t coord=0;
-   DTYPE *xxPtr=nullptr, *yyPtr=nullptr;
+   DataType *xxPtr=nullptr, *yyPtr=nullptr;
 
    mat44 *voxel2realMatrix=nullptr;
    if(splineControlPoint->sform_code>0)
@@ -648,7 +648,7 @@ void get_GridValues(int startX,
          }
          else
          {
-            get_SlidedValues<DTYPE>(dispX[coord],
+            get_SlidedValues<DataType>(dispX[coord],
                                     dispY[coord],
                                     X,
                                     Y,
@@ -667,17 +667,17 @@ float *, float *, float *, float *, bool, bool);
 template void get_GridValues<double>(int, int, nifti_image *,
 double *, double *, double *, double *, bool, bool);
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     int startZ,
                     nifti_image *splineControlPoint,
-                    DTYPE *splineX,
-                    DTYPE *splineY,
-                    DTYPE *splineZ,
-                    DTYPE *dispX,
-                    DTYPE *dispY,
-                    DTYPE *dispZ,
+                    DataType *splineX,
+                    DataType *splineY,
+                    DataType *splineZ,
+                    DataType *dispX,
+                    DataType *dispY,
+                    DataType *dispZ,
                     bool approx,
                     bool displacement)
 {
@@ -687,8 +687,8 @@ void get_GridValues(int startX,
 
    size_t index;
    size_t coord=0;
-   DTYPE *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr;
-   DTYPE *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr;
+   DataType *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr;
+   DataType *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr;
 
    mat44 *voxel2realMatrix=nullptr;
    if(splineControlPoint->sform_code>0)
@@ -726,7 +726,7 @@ void get_GridValues(int startX,
             }
             else
             {
-               get_SlidedValues<DTYPE>(dispX[coord],
+               get_SlidedValues<DataType>(dispX[coord],
                                        dispY[coord],
                                        dispZ[coord],
                                        X,
diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h
index 602f8d6b..5436ea7e 100755
--- a/reg-lib/cpu/_reg_splineBasis.h
+++ b/reg-lib/cpu/_reg_splineBasis.h
@@ -16,116 +16,116 @@
 #include "_reg_tools.h"
 
 
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis,
-                            DTYPE *values);
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis,
-                            DTYPE *values,
-                            DTYPE *first);
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValues(DTYPE basis,
-                            DTYPE *values,
-                            DTYPE *first,
-                            DTYPE *second);
+extern "C++" template<class DataType>
+void get_BSplineBasisValues(DataType basis,
+                            DataType *values);
+extern "C++" template<class DataType>
+void get_BSplineBasisValues(DataType basis,
+                            DataType *values,
+                            DataType *first);
+extern "C++" template<class DataType>
+void get_BSplineBasisValues(DataType basis,
+                            DataType *values,
+                            DataType *first,
+                            DataType *second);
 
 
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis,
+extern "C++" template<class DataType>
+void get_BSplineBasisValue(DataType basis,
                            int index,
-                           DTYPE &value);
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis,
+                           DataType &value);
+extern "C++" template<class DataType>
+void get_BSplineBasisValue(DataType basis,
                            int index,
-                           DTYPE &value,
-                           DTYPE &first);
-extern "C++" template<class DTYPE>
-void get_BSplineBasisValue(DTYPE basis,
+                           DataType &value,
+                           DataType &first);
+extern "C++" template<class DataType>
+void get_BSplineBasisValue(DataType basis,
                            int index,
-                           DTYPE &value,
-                           DTYPE &first,
-                           DTYPE &second);
+                           DataType &value,
+                           DataType &first,
+                           DataType &second);
 
-extern "C++" template <class DTYPE>
-void set_first_order_basis_values(DTYPE *basisX,
-                                  DTYPE *basisY);
+extern "C++" template <class DataType>
+void set_first_order_basis_values(DataType *basisX,
+                                  DataType *basisY);
 
-extern "C++" template <class DTYPE>
-void set_first_order_basis_values(DTYPE *basisX,
-                                  DTYPE *basisY,
-                                  DTYPE *basisZ);
+extern "C++" template <class DataType>
+void set_first_order_basis_values(DataType *basisX,
+                                  DataType *basisY,
+                                  DataType *basisZ);
 
-extern "C++" template <class DTYPE>
-void set_second_order_bspline_basis_values(DTYPE *basisXX,
-                                           DTYPE *basisYY,
-                                           DTYPE *basisXY);
-extern "C++" template <class DTYPE>
-void set_second_order_bspline_basis_values(DTYPE *basisXX,
-                                           DTYPE *basisYY,
-                                           DTYPE *basisZZ,
-                                           DTYPE *basisXY,
-                                           DTYPE *basisYZ,
-                                           DTYPE *basisXZ);
+extern "C++" template <class DataType>
+void set_second_order_bspline_basis_values(DataType *basisXX,
+                                           DataType *basisYY,
+                                           DataType *basisXY);
+extern "C++" template <class DataType>
+void set_second_order_bspline_basis_values(DataType *basisXX,
+                                           DataType *basisYY,
+                                           DataType *basisZZ,
+                                           DataType *basisXY,
+                                           DataType *basisYZ,
+                                           DataType *basisXZ);
 
 
-extern "C++" template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis,
-                           DTYPE *values);
-extern "C++" template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis,
-                           DTYPE *values,
-                           DTYPE *first);
-extern "C++" template<class DTYPE>
-void get_SplineBasisValues(DTYPE basis,
-                           DTYPE *values,
-                           DTYPE *first,
-                           DTYPE *second);
+extern "C++" template<class DataType>
+void get_SplineBasisValues(DataType basis,
+                           DataType *values);
+extern "C++" template<class DataType>
+void get_SplineBasisValues(DataType basis,
+                           DataType *values,
+                           DataType *first);
+extern "C++" template<class DataType>
+void get_SplineBasisValues(DataType basis,
+                           DataType *values,
+                           DataType *first,
+                           DataType *second);
 
-extern "C++" template <class DTYPE>
-void get_SlidedValues(DTYPE &defX,
-                      DTYPE &defY,
+extern "C++" template <class DataType>
+void get_SlidedValues(DataType &defX,
+                      DataType &defY,
                       int X,
                       int Y,
-                      DTYPE *defPtrX,
-                      DTYPE *defPtrY,
+                      DataType *defPtrX,
+                      DataType *defPtrY,
                       mat44 *df_voxel2Real,
                       int *dim,
                       bool displacement);
-extern "C++" template <class DTYPE>
-void get_SlidedValues(DTYPE &defX,
-                      DTYPE &defY,
-                      DTYPE &defZ,
+extern "C++" template <class DataType>
+void get_SlidedValues(DataType &defX,
+                      DataType &defY,
+                      DataType &defZ,
                       int X,
                       int Y,
                       int Z,
-                      DTYPE *defPtrX,
-                      DTYPE *defPtrY,
-                      DTYPE *defPtrZ,
+                      DataType *defPtrX,
+                      DataType *defPtrY,
+                      DataType *defPtrZ,
                       mat44 *df_voxel2Real,
                       int *dim,
                       bool displacement);
 
 
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     nifti_image *splineControlPoint,
-                    DTYPE *splineX,
-                    DTYPE *splineY,
-                    DTYPE *dispX,
-                    DTYPE *dispY,
+                    DataType *splineX,
+                    DataType *splineY,
+                    DataType *dispX,
+                    DataType *dispY,
                     bool approx,
                     bool displacement);
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     int startZ,
                     nifti_image *splineControlPoint,
-                    DTYPE *splineX,
-                    DTYPE *splineY,
-                    DTYPE *splineZ,
-                    DTYPE *dispX,
-                    DTYPE *dispY,
-                    DTYPE *dispZ,
+                    DataType *splineX,
+                    DataType *splineY,
+                    DataType *splineZ,
+                    DataType *dispX,
+                    DataType *dispY,
+                    DataType *dispZ,
                     bool approx,
                     bool displacement);
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 8a5aca1c..a89f0122 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -101,7 +101,7 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 double reg_getSSDValue(nifti_image *referenceImage,
                        nifti_image *warpedImage,
                        double *timePointWeight,
@@ -117,16 +117,16 @@ double reg_getSSDValue(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Create pointers to the reference and warped image data
-    DTYPE *referencePtr = static_cast<DTYPE*>(referenceImage->data);
-    DTYPE *warpedPtr = static_cast<DTYPE*>(warpedImage->data);
+    DataType *referencePtr = static_cast<DataType*>(referenceImage->data);
+    DataType *warpedPtr = static_cast<DataType*>(warpedImage->data);
     // Create a pointer to the Jacobian determinant image if defined
-    DTYPE *jacDetPtr = nullptr;
+    DataType *jacDetPtr = nullptr;
     if (jacobianDetImage != nullptr)
-        jacDetPtr = static_cast<DTYPE*>(jacobianDetImage->data);
+        jacDetPtr = static_cast<DataType*>(jacobianDetImage->data);
     // Create a pointer to the local weight image if defined
-    DTYPE *localWeightPtr = nullptr;
+    DataType *localWeightPtr = nullptr;
     if (localWeightSimImage != nullptr)
-        localWeightPtr = static_cast<DTYPE*>(localWeightSimImage->data);
+        localWeightPtr = static_cast<DataType*>(localWeightSimImage->data);
 
     double SSD_global = 0;
     double refValue, warValue, diff;
@@ -135,11 +135,11 @@ double reg_getSSDValue(nifti_image *referenceImage,
     for (int time = 0; time < referenceImage->nt; ++time) {
         if (timePointWeight[time] > 0) {
             // Create pointers to the current time point of the reference and warped images
-            DTYPE *currentRefPtr = &referencePtr[time * voxelNumber];
-            DTYPE *currentWarPtr = &warpedPtr[time * voxelNumber];
+            DataType *currentRefPtr = &referencePtr[time * voxelNumber];
+            DataType *currentWarPtr = &warpedPtr[time * voxelNumber];
 
             double SSD_local = 0., n = 0.;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
     jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
@@ -255,7 +255,7 @@ double reg_ssd::GetSimilarityMeasureValue() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *warpedImage,
                                   nifti_image *warpedGradient,
@@ -279,33 +279,33 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Pointers to the image data
-    DTYPE *refImagePtr = static_cast<DTYPE *>(referenceImage->data);
-    DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DTYPE *warImagePtr = static_cast<DTYPE *>(warpedImage->data);
-    DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *refImagePtr = static_cast<DataType *>(referenceImage->data);
+    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *warImagePtr = static_cast<DataType *>(warpedImage->data);
+    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DTYPE *spatialGradPtrX = static_cast<DTYPE *>(warpedGradient->data);
-    DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
-    DTYPE *spatialGradPtrZ = nullptr;
+    DataType *spatialGradPtrX = static_cast<DataType *>(warpedGradient->data);
+    DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
+    DataType *spatialGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         spatialGradPtrZ = &spatialGradPtrY[voxelNumber];
 
     // Pointers to the measure of similarity gradient
-    DTYPE *measureGradPtrX = static_cast<DTYPE *>(measureGradientImage->data);
-    DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DTYPE *measureGradPtrZ = nullptr;
+    DataType *measureGradPtrX = static_cast<DataType *>(measureGradientImage->data);
+    DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
+    DataType *measureGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create a pointer to the Jacobian determinant values if defined
-    DTYPE *jacDetPtr = nullptr;
+    DataType *jacDetPtr = nullptr;
     if (jacobianDetImage != nullptr)
-        jacDetPtr = static_cast<DTYPE *>(jacobianDetImage->data);
+        jacDetPtr = static_cast<DataType *>(jacobianDetImage->data);
     // Create a pointer to the local weight image if defined
-    DTYPE *localWeightPtr = nullptr;
+    DataType *localWeightPtr = nullptr;
     if (localWeightSimImage != nullptr)
-        localWeightPtr = static_cast<DTYPE *>(localWeightSimImage->data);
+        localWeightPtr = static_cast<DataType *>(localWeightSimImage->data);
 
     // find number of active voxels and correct weight
     double activeVoxel_num = 0;
@@ -319,7 +319,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
 
     double refValue, warValue, common;
 
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \
     mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
@@ -346,13 +346,13 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                 common *= adjusted_weight;
 
                 if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel])
-                    measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]);
+                    measureGradPtrX[voxel] += (DataType)(common * spatialGradPtrX[voxel]);
                 if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel])
-                    measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]);
+                    measureGradPtrY[voxel] += (DataType)(common * spatialGradPtrY[voxel]);
 
                 if (measureGradPtrZ != nullptr) {
                     if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel])
-                        measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]);
+                        measureGradPtrZ[voxel] += (DataType)(common * spatialGradPtrZ[voxel]);
                 }
             }
         }
@@ -451,7 +451,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                    float *discretisedValue,
                                    int discretise_radius,
@@ -490,8 +490,8 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 
     // Pointers to the input image
     const size_t voxelNumber = CalcVoxelNumber(*refImage);
-    DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
-    DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
+    DataType *refImgPtr = static_cast<DataType*>(refImage->data);
+    DataType *warImgPtr = static_cast<DataType*>(warImage->data);
 
     // Create a padded version of the warped image to avoid boundary condition check
     int warPaddedOffset[3] = {
@@ -506,12 +506,12 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
         warImage->nt
     };
 
-    //DTYPE padding_value = std::numeric_limits<DTYPE>::quiet_NaN();
-    DTYPE padding_value = 0;
+    //DataType padding_value = std::numeric_limits<DataType>::quiet_NaN();
+    DataType padding_value = 0;
 
     size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] *
         warPaddedDim[1] * warPaddedDim[2];
-    DTYPE *paddedWarImgPtr = (DTYPE*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DTYPE));
+    DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType));
     for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex)
         paddedWarImgPtr[voxIndex] = padding_value;
     voxIndex = 0;
@@ -579,7 +579,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                 // Loop over the discretised value
                 if (definedValueNumber > 0) {
 
-                    DTYPE warpedValue;
+                    DataType warpedValue;
                     int paddedImageVox[3] = {
                         static_cast<int>(imageVox[0] + warPaddedOffset[0]),
                         static_cast<int>(imageVox[1] + warPaddedOffset[1]),
@@ -587,7 +587,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                     };
                     int cc;
                     double currentSum;
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \
     paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \
@@ -695,7 +695,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
                                      int discretise_radius,
@@ -734,16 +734,16 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 
     // Pointers to the input image
     const size_t voxelNumber = CalcVoxelNumber(*refImage);
-    DTYPE *refImgPtr = static_cast<DTYPE*>(refImage->data);
-    DTYPE *warImgPtr = static_cast<DTYPE*>(warImage->data);
+    DataType *refImgPtr = static_cast<DataType*>(refImage->data);
+    DataType *warImgPtr = static_cast<DataType*>(warImage->data);
 
-    DTYPE padding_value = 0;
+    DataType padding_value = 0;
 
     int definedValueNumber, idBlock, timeV;
 
     int threadNumber = 1;
     int tid = 0;
-#if defined (_OPENMP)
+#ifdef _OPENMP
     threadNumber = omp_get_max_threads();
 #endif
 
@@ -753,7 +753,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
         refBlockValue[a] = (float*)malloc(voxelBlockNumber_t * sizeof(float));
 
     // Loop over all control points
-#if defined (_OPENMP)
+#ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \
     padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \
@@ -763,7 +763,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue)
 #endif
     for (cpz = 0; cpz < controlPointGridImage->nz; ++cpz) {
-#if defined (_OPENMP)
+#ifdef _OPENMP
         tid = omp_get_thread_num();
 #endif
         gridVox[2] = cpz;
@@ -936,7 +936,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     } // node
 }
 /* *************************************************************** */
-//template <class DTYPE>
+//template <class DataType>
 //void GetDiscretisedValueSSD_core2D(nifti_image *controlPointGridImage,
 //                                   float *discretisedValue,
 //                                   int discretise_radius,
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 41b4c2d9..c2ab3f99 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -69,7 +69,7 @@ class reg_ssd: public reg_measure {
  * should be considered. If set to nullptr, all voxels are considered
  * @return Returns the computed sum squared difference
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 double reg_getSSDValue(nifti_image *referenceImage,
                        nifti_image *warpedImage,
                        double *timePointWeight,
@@ -92,7 +92,7 @@ double reg_getSSDValue(nifti_image *referenceImage,
  * @param mask Array that contains a mask to specify which voxel
  * should be considered. If set to nullptr, all voxels are considered
  */
-extern "C++" template <class DTYPE>
+extern "C++" template <class DataType>
 void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *warpedImage,
                                   nifti_image *warpedImageGradient,
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 2ae6debd..ab3fc019 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -13,19 +13,19 @@
 #include "_reg_blocksize_gpu.h"
 
 /* *************************************************************** */
-template <class NIFTI_TYPE>
+template <class NiftiType>
 int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) {
-    const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE);
+    const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType);
 
     int *g_dim;
     float* g_pixdim;
-    NIFTI_TYPE* g_data;
+    NiftiType* g_data;
 
     NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float)));
     NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize));
 
-    NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+    NiftiType *array_h = static_cast<NiftiType*>(img->data);
     NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice));
 
     NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice));
@@ -37,23 +37,23 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *
 template int cudaCommon_transferNiftiToNiftiOnDevice1<float>(nifti_image*, nifti_image*);
 template int cudaCommon_transferNiftiToNiftiOnDevice1<double>(nifti_image*, nifti_image*);
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+template <class DataType, class NiftiType>
+int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
         NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+template <class DataType>
+int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img) {
+    if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
@@ -81,7 +81,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) {
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(array_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array_d, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -95,25 +95,25 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, nifti_image*
 template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, nifti_image*);
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+template <class DataType, class NiftiType>
+int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2_d, nifti_image *img) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE);
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
-        NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
+        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
+        NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
         NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
         NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+template <class DataType>
+int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_d, nifti_image *img) {
+    if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
@@ -152,7 +152,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(array_d, array2_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array_d, array2_d, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -165,19 +165,19 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, nift
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, nifti_image*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
+template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
 
         cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
         copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray_d;
@@ -187,9 +187,9 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+    if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
@@ -216,7 +216,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i
         cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
         copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray_d;
@@ -226,7 +226,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(cuArray_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(cuArray_d, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -240,29 +240,29 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, nifti_im
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, nifti_image*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
+template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
-        NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
+        NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
 
         cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
         copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
         copyParams.kind = cudaMemcpyHostToDevice;
         // First timepoint
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray_d;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
         // Second timepoint
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray2_d;
@@ -271,9 +271,9 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cu
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+    if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
@@ -315,7 +315,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
         copyParams.kind = cudaMemcpyHostToDevice;
         // First timepoint
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray_d;
@@ -323,7 +323,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
         free(array_h);
         // Second timepoint
         copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
-                                                copyParams.extent.width * sizeof(DTYPE),
+                                                copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
         copyParams.dstArray = cuArray2_d;
@@ -332,7 +332,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DTYPE, float>(cuArray_d, cuArray2_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(cuArray_d, cuArray2_d, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
             reg_print_msg_error("The image data type is not supported");
@@ -345,10 +345,10 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArra
 template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, nifti_image*);
 template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, nifti_image*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) {
     const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
-    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
+    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
     NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
     return EXIT_SUCCESS;
 }
@@ -356,10 +356,10 @@ template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, int*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) {
     const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
-    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DTYPE>();
+    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
     NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
     NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize));
     return EXIT_SUCCESS;
@@ -368,9 +368,9 @@ template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, i
 template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, int*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) {
-    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) {
+    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     return EXIT_SUCCESS;
 }
@@ -379,9 +379,9 @@ template int cudaCommon_allocateArrayToDevice<double>(double**, int*);
 template int cudaCommon_allocateArrayToDevice<int>(int**, int*);
 template int cudaCommon_allocateArrayToDevice<float4>(float4**, int*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) {
-    const unsigned int memSize = vox * sizeof(DTYPE);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) {
+    const unsigned int memSize = vox * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     return EXIT_SUCCESS;
 }
@@ -390,9 +390,9 @@ template int cudaCommon_allocateArrayToDevice<double>(double**, int);
 template int cudaCommon_allocateArrayToDevice<int>(int**, int);
 template int cudaCommon_allocateArrayToDevice<float4>(float4**, int); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) {
-    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) {
+    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize));
     return EXIT_SUCCESS;
@@ -401,32 +401,32 @@ template int cudaCommon_allocateArrayToDevice<float>(float**, float**, int*);
 template int cudaCommon_allocateArrayToDevice<double>(double**, double**, int*);
 template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, int*); // for deformation field
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+template <class DataType>
+int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned int nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
 template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned int nElements);
 template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned int nElements);
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+template <class DataType, class NiftiType>
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
     return EXIT_SUCCESS;
 }
 template int cudaCommon_transferFromDeviceToNifti1<float, float>(nifti_image *img, float *array_d);
 template int cudaCommon_transferFromDeviceToNifti1<double, double>(nifti_image *img, double *array_d);
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+template <class DataType>
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) {
+    if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
@@ -460,7 +460,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) {
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DTYPE, float>(img, array_d);
+            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array_d);
         default:
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
             reg_print_msg_error("The image data type is not supported");
@@ -490,25 +490,25 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d)
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE, class NIFTI_TYPE>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
-    if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) {
+template <class DataType, class NiftiType>
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d, DataType *array2_d) {
+    if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
         const size_t voxelNumber = CalcVoxelNumber(*img);
-        NIFTI_TYPE *array_h = static_cast<NIFTI_TYPE*>(img->data);
-        NIFTI_TYPE *array2_h = &array_h[voxelNumber];
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost));
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost));
+        NiftiType *array_h = static_cast<NiftiType*>(img->data);
+        NiftiType *array2_h = &array_h[voxelNumber];
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
+        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) {
-    if (sizeof(DTYPE) == sizeof(float4)) {
+template <class DataType>
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, DataType *array2_d) {
+    if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
@@ -560,7 +560,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DTYPE, float>(img, array_d, array2_d);
+            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array_d, array2_d);
         default:
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
             reg_print_msg_error("The image data type is not supported");
@@ -576,8 +576,8 @@ void cudaCommon_free(cudaArray *cuArray_d) {
     NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d));
 }
 /* *************************************************************** */
-template <class DTYPE>
-void cudaCommon_free(DTYPE *array_d) {
+template <class DataType>
+void cudaCommon_free(DataType *array_d) {
     NR_CUDA_SAFE_CALL(cudaFree(array_d));
 }
 template void cudaCommon_free<int>(int*);
@@ -585,27 +585,27 @@ template void cudaCommon_free<float>(float*);
 template void cudaCommon_free<double>(double*);
 template void cudaCommon_free<float4>(float4*);
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
+template <class DataType>
+int cudaCommon_transferFromDeviceToNiftiSimple(DataType *array_d, nifti_image *img) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
 template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, nifti_image*);
 template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, nifti_image*);
 template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, nifti_image*);
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice));
+template <class DataType>
+int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned int nvox) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
 template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, int*, const unsigned);
 template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, float*, const unsigned);
 template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, double*, const unsigned);
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) {
-    const unsigned int memSize = nElements * sizeof(DTYPE);
+template <class DataType>
+int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned int nElements) {
+    const unsigned int memSize = nElements * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
@@ -613,9 +613,9 @@ template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsi
 template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned int);
 template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned int);
 /* *************************************************************** */
-template <class DTYPE>
-int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) {
-    const unsigned int memSize = nElements * sizeof(DTYPE);
+template <class DataType>
+int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned int nElements) {
+    const unsigned int memSize = nElements * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 18845c32..c8d7efc1 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -69,74 +69,74 @@ struct __attribute__((aligned(4))) float4 {
 #endif //CUDART_VERSION >= 3200
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_allocateArrayToDevice(cudaArray**, int*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE**, int);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType**, int);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE**, int*);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType**, int*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*);
+template <class DataType>
+int cudaCommon_allocateArrayToDevice(DataType**, DataType**, int*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
+template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*);
+template <class DataType>
+int cudaCommon_transferNiftiToArrayOnDevice(DataType*, nifti_image*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*);
+template <class DataType>
+int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, nifti_image*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*);
+template <class DataType>
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*);
+template <class DataType>
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*, DataType*);
 /* *************************************************************** */
 extern "C++"
 void cudaCommon_free(cudaArray*);
 /* *************************************************************** */
-extern "C++" template <class DTYPE>
-void cudaCommon_free(DTYPE*);
+extern "C++" template <class DataType>
+void cudaCommon_free(DataType*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*);
+template <class DataType>
+int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, nifti_image*);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned);
+template <class DataType>
+int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsigned);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
+template <class DataType>
+int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned int);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int);
+template <class DataType>
+int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned int);
 /* *************************************************************** */
 extern "C++"
-template <class DTYPE>
-int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int);
+template <class DataType>
+int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int);
 /* *************************************************************** */
 extern "C++"
 void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj);
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 541bcf66..ef369a52 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -90,7 +90,7 @@ void reg_optimiser_gpu::StoreCurrentDOF() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_optimiser_gpu::Perturbation(float length) {
-    /// @todo
+    // TODO: Implement reg_optimiser_gpu::Perturbation()
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
@@ -238,10 +238,11 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
     NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2)));
     reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d);
     NR_CUDA_CHECK_KERNEL(G1, B1);
-    float2 *sum_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2)))
-        NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost))
-        NR_CUDA_SAFE_CALL(cudaFree(sum_d))
-        double dgg = 0;
+    float2 *sum_h;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost));
+    NR_CUDA_SAFE_CALL(cudaFree(sum_d));
+    double dgg = 0;
     double gg = 0;
     for (int i = 0; i < nodeNumber; i++) {
         dgg += sum_h[i].x;
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 8f7fd210..9aa08e44 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -53,20 +53,20 @@ texture<float, 1, cudaReadModeElementType> referenceImageArray_texture;
 texture<float, 1, cudaReadModeElementType> warpedImageArray_texture;
 texture<int, 1, cudaReadModeElementType> totalBlock_texture;
 /* *************************************************************** */
-template<class DTYPE>
+template<class DataType>
 __inline__ __device__
-void reg2D_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out)
+void reg2D_mat44_mul_cuda(float* mat, DataType const* in, DataType *out)
 {
-   out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]);
-   out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]);
+   out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]);
+   out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]);
    return;
 }
-template<class DTYPE>
-__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out)
+template<class DataType>
+__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out)
 {
-   out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-   out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-   out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
+   out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
+   out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
+   out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
    return;
 }
 // Apply the transformation matrix
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index 9282047c..a30cfce3 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -15,11 +15,11 @@
 
 #define IDX2C(i,j,ld) (((j)*(ld))+(i))
 /* *************************************************************** */
-template<class DTYPE>
-__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out) {
-    out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-    out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-    out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
+template<class DataType>
+__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) {
+    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
+    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
+    out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
     return;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 4423e45c..dc85dc9b 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -29,21 +29,21 @@ void reg_mat44_logm_cuda(float* mat)
 	//todo
 }
 /* *************************************************************** */
-template<class DTYPE>
-__device__ __inline__ void reg_mat44_mul_cuda(DTYPE const* mat, DTYPE const* in, DTYPE *out)
+template<class DataType>
+__device__ __inline__ void reg_mat44_mul_cuda(DataType const* mat, DataType const* in, DataType *out)
 {
-    out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-    out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-    out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
+    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
+    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
+    out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
    return;
 }
 /* *************************************************************** */
-template<class DTYPE>
-__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out)
+template<class DataType>
+__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out)
 {
-    out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-    out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-    out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
+    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
+    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
+    out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
    return;
 }
 /* *************************************************************** */

From 3a98656ba18a95f8e9954256092dcb61f7e26177 Mon Sep 17 00:00:00 2001
From: onurulgen <onur.ulgen@kcl.ac.uk>
Date: Wed, 22 Feb 2023 12:10:39 +0000
Subject: [PATCH 061/314] Bug fixes and improvements

---
 niftyreg_build_version.txt       |  2 +-
 reg-lib/Compute.cpp              |  8 ++++----
 reg-lib/F3dContent.cpp           |  5 ++---
 reg-lib/_reg_base.cpp            |  2 +-
 reg-lib/_reg_f3d.cpp             |  2 +-
 reg-lib/cpu/_reg_localTrans.cpp  |  6 ++----
 reg-lib/cpu/_reg_localTrans.h    | 12 +++---------
 reg-lib/cpu/_reg_tools.cpp       | 16 ++++++++--------
 reg-lib/cpu/_reg_tools.h         |  8 ++++----
 reg-lib/cuda/CudaCompute.cpp     |  2 +-
 reg-lib/cuda/_reg_common_cuda.cu |  2 +-
 reg-lib/cuda/_reg_common_cuda.h  |  5 +----
 12 files changed, 29 insertions(+), 41 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c5356ba1..f07e2860 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-174
+175
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 138a739f..2607b56a 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -95,14 +95,14 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa
         for (size_t i = 0; i < controlPointGrid->nvox; ++i)
             currentDOF[i] = bestDOF[i] + scale * gradient[i];
     } else {
-        size_t voxNumber = controlPointGrid->nvox / controlPointGrid->ndim;
+        size_t voxNumber = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2);
         // Update the values for the x-axis displacement
         if (optimiseX) {
             for (size_t i = 0; i < voxNumber; ++i)
                 currentDOF[i] = bestDOF[i] + scale * gradient[i];
         }
         // Update the values for the y-axis displacement
-        if (optimiseY && controlPointGrid->ndim > 1) {
+        if (optimiseY) {
             float *currentDOFY = &currentDOF[voxNumber];
             float *bestDOFY = &bestDOF[voxNumber];
             float *gradientY = &gradient[voxNumber];
@@ -110,7 +110,7 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa
                 currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
         }
         // Update the values for the z-axis displacement
-        if (optimiseZ && controlPointGrid->ndim > 2) {
+        if (optimiseZ && controlPointGrid->nz > 1) {
             float *currentDOFZ = &currentDOF[2 * voxNumber];
             float *bestDOFZ = &bestDOF[2 * voxNumber];
             float *gradientZ = &gradient[2 * voxNumber];
@@ -145,7 +145,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis
 void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
-    reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength);
+    reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength);
 }
 /* *************************************************************** */
 void Compute::SmoothGradient(float sigma) {
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index aaf37975..0f474212 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -17,17 +17,17 @@ F3dContent::F3dContent(nifti_image *referenceIn,
         reg_print_msg_error("controlPointGridIn can't be nullptr");
         reg_exit();
     }
-    AllocateLocalWeightSim(localWeightSimIn);
     AllocateWarpedGradient();
     AllocateTransformationGradient();
     AllocateVoxelBasedMeasureGradient();
+    AllocateLocalWeightSim(localWeightSimIn);
 }
 /* *************************************************************** */
 F3dContent::~F3dContent() {
-    DeallocateLocalWeightSim();
     DeallocateWarpedGradient();
     DeallocateTransformationGradient();
     DeallocateVoxelBasedMeasureGradient();
+    DeallocateLocalWeightSim();
 }
 /* *************************************************************** */
 void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
@@ -38,7 +38,6 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
     localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
     localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim);
     localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
-    F3dContent::ZeroVoxelBasedMeasureGradient();
     reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
     reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
 }
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 2c7cd9e6..c267f535 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -1079,7 +1079,7 @@ void reg_base<T>::Run() {
                 NormaliseGradient();
 
                 // Initialise the line search initial step size
-                currentSize = currentSize > maxStepSize ? maxStepSize : currentSize;
+                currentSize = std::min(currentSize, maxStepSize);
 
                 // A line search is performed
                 optimiser->Optimise(maxStepSize, smallestSize, currentSize);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 66207c26..c8c296eb 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -591,8 +591,8 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionValue");
 #endif
-    // Store the global objective function value
 
+    // Store the global objective function value
     return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 026c0a63..a1f2eb9d 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1750,8 +1750,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                                        nifti_image *voxelImage,
                                        float weight,
                                        bool update,
-                                       mat44 *voxelToMillimeter
-                                       )
+                                       const mat44 *voxelToMillimeter)
 {
    const size_t nodeNumber = CalcVoxelNumber(*nodeImage);
    const size_t voxelNumber = CalcVoxelNumber(*voxelImage);
@@ -1924,8 +1923,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
                                   bool update,
-                                  mat44 *voxelToMillimeter
-                                  )
+                                  const mat44 *voxelToMillimeter)
 {
    if(nodeImage->datatype!=voxelImage->datatype)
    {
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index d6a964a1..30d1aec7 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -47,7 +47,6 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
                                           nifti_image *floatingImage,
                                           mat44 *forwardAffineTrans,
                                           float *spacing);
-
 /* *************************************************************** */
 /** @brief Compute a dense deformation field in the space of a reference
  * image from a grid of control point.
@@ -86,8 +85,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
                                   bool update,
-                                  mat44 *voxelToMillimeter = nullptr
-      );
+                                  const mat44 *voxelToMillimeter = nullptr);
 /* *************************************************************** */
 /** @brief Refine a grid of control points
  * @param referenceImage Image that defined the space of the reference
@@ -97,8 +95,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
  */
 extern "C++"
 void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage,
-                                       nifti_image *referenceImage = nullptr
-      );
+                                       nifti_image *referenceImage = nullptr);
 /* *************************************************************** */
 /** @brief This function compose the a first control point image with a second one:
  * Grid2(x) <= Grid1(Grid2(x)).
@@ -119,8 +116,7 @@ int reg_spline_cppComposition(nifti_image *grid1,
                               nifti_image *grid2,
                               bool displacement1,
                               bool displacement2,
-                              bool bspline
-                              );
+                              bool bspline);
 /* *************************************************************** */
 /** @brief Preforms the composition of two deformation fields
  * The deformation field image is applied to the second image:
@@ -157,7 +153,6 @@ extern "C++"
 void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
                                                    nifti_image *deformationFieldImage,
                                                    bool updateStepNumber);
-
 /* *************************************************************** */
 /** @brief The deformation field (img2) is computed by integrating
  * a velocity Grid (img1)
@@ -178,7 +173,6 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
 extern "C++"
 void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                              nifti_image *flowField);
-
 /* *************************************************************** */
 
 
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index ee023059..b7bec647 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -510,8 +510,8 @@ void reg_tools_operationImageToImage(const nifti_image *img1,
    shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation)
 #endif
     for (i = 0; i < voxelNumber; i++)
-        resPtr[i] = Type((operation((double)img1Ptr[i] * sclSlope1 + img1->scl_inter,
-                                    (double)img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1);
+        resPtr[i] = static_cast<Type>((operation(img1Ptr[i] * sclSlope1 + img1->scl_inter,
+                                                 img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1);
 }
 /* *************************************************************** */
 void reg_tools_addImageToImage(const nifti_image *img1,
@@ -701,7 +701,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
 template <class Type>
 void reg_tools_operationValueToImage(const nifti_image *img,
                                      nifti_image *res,
-                                     float val,
+                                     const double& val,
                                      const Operation& operation) {
     const Type *imgPtr = static_cast<Type*>(img->data);
     Type *resPtr = static_cast<Type*>(res->data);
@@ -725,12 +725,12 @@ void reg_tools_operationValueToImage(const nifti_image *img,
    shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation)
 #endif
     for (i = 0; i < voxelNumber; i++)
-        resPtr[i] = Type((operation((double)imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope);
+        resPtr[i] = static_cast<Type>((operation(imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope);
 }
 /* *************************************************************** */
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *res,
-                               float val) {
+                               const double& val) {
     if (img->datatype != res->datatype) {
         reg_print_fct_error("reg_tools_addValueToImage");
         reg_print_msg_error("Input and output image do not have the same data type");
@@ -776,7 +776,7 @@ void reg_tools_addValueToImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *res,
-                                      float val) {
+                                      const double& val) {
     if (img->datatype != res->datatype) {
         reg_print_fct_error("reg_tools_subtractValueFromImage");
         reg_print_msg_error("Input and output image do not have the same data type");
@@ -822,7 +822,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *res,
-                                    float val) {
+                                    const double& val) {
     if (img->datatype != res->datatype) {
         reg_print_fct_error("reg_tools_multiplyValueToImage");
         reg_print_msg_error("Input and output image do not have the same data type");
@@ -868,7 +868,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *res,
-                                  float val) {
+                                  const double& val) {
     if (img->datatype != res->datatype) {
         reg_print_fct_error("reg_tools_divideValueToImage");
         reg_print_msg_error("Input and output image do not have the same data type");
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 92c2d6bd..bcbe3df1 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -195,7 +195,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
 extern "C++"
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *out,
-                               float val);
+                               const double& val);
 /* *************************************************************** */
 /** @brief Subtract a scalar from all image intensity
  * @param img Input image
@@ -205,7 +205,7 @@ void reg_tools_addValueToImage(const nifti_image *img,
 extern "C++"
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *out,
-                                      float val);
+                                      const double& val);
 /* *************************************************************** */
 /** @brief Multiply a scalar to all image intensity
  * @param img Input image
@@ -215,7 +215,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
 extern "C++"
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *out,
-                                    float val);
+                                    const double& val);
 /* *************************************************************** */
 /** @brief Divide a scalar to all image intensity
  * @param img Input image
@@ -225,7 +225,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
 extern "C++"
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *out,
-                                  float val);
+                                  const double& val);
 /* *************************************************************** */
 /** @brief Binarise an input image. All values different
  * from 0 are set to 1, 0 otherwise.
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 910c66f5..a20b8d12 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -122,7 +122,7 @@ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool opt
 /* *************************************************************** */
 void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
     // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
-    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength);
+    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), float(1 / maxGradLength));
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index ab3fc019..a401e995 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -670,7 +670,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
     texDesc.normalizedCoords = normalizedCoordinates;
 
     // Create texture object
-    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), &cudaCommon_destroyTextureObject);
+    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), cudaCommon_destroyTextureObject);
     NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr));
 
     return texObj;
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index c8d7efc1..f601c2ee 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -138,10 +138,7 @@ extern "C++"
 template <class DataType>
 int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int);
 /* *************************************************************** */
-extern "C++"
-void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj);
-/* *************************************************************** */
-using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, decltype(&cudaCommon_destroyTextureObject)>;
+using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
 extern "C++"
 UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,

From 2153f65430900dfbffdbb7f349e4fb1d4f631c0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Feb 2023 17:18:10 +0000
Subject: [PATCH 062/314] Refactorisations

---
 niftyreg_build_version.txt          |  2 +-
 reg-apps/reg_f3d.cpp                |  2 +-
 reg-io/nrrd/reg_nrrd.cpp            |  4 ++--
 reg-lib/Compute.cpp                 |  4 ++--
 reg-lib/Compute.h                   |  2 +-
 reg-lib/_reg_f3d.cpp                |  4 +---
 reg-lib/_reg_f3d2.cpp               |  4 ++--
 reg-lib/cpu/_reg_localTrans.cpp     | 10 +++++-----
 reg-lib/cpu/_reg_localTrans_jac.cpp |  4 ++--
 reg-lib/cuda/CudaCompute.cpp        |  2 +-
 reg-lib/cuda/CudaCompute.h          |  2 +-
 reg-lib/cuda/_reg_nmi_gpu.cu        |  2 +-
 reg-test/reg_test_interpolation.cpp | 12 +++---------
 13 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f07e2860..1057e9a2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-175
+176
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 5cf0f25c..f273e138 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -124,7 +124,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "\t-ln <int>\t\tNumber of level to perform [3]");
     reg_print_info(exec, "\t-lp <int>\t\tOnly perform the first levels [ln]");
     reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach");
-    reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent");
+    reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent");
     reg_print_info(exec, "\t-pert <int>\t\tTo add perturbation step(s) after each optimisation scheme");
     reg_print_info(exec, "");
     reg_print_info(exec, "*** F3D2 options:");
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 76f812b7..57fd436b 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -386,7 +386,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       break;
    default:
       reg_print_fct_error("reg_io_nifti2nrrd");
-      reg_print_msg_error("he data type is not supported. Exit");
+      reg_print_msg_error("The data type is not supported. Exit");
       reg_exit();
    }
 
@@ -543,7 +543,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
          break;
       default:
          reg_print_fct_error("reg_convertVectorField_nifti_to_nrrd");
-         reg_print_msg_error("he data type is not supported. Exit");
+         reg_print_msg_error("The data type is not supported. Exit");
          reg_exit();
       }
 
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 2607b56a..cee5b7de 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -142,7 +142,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis
     return 0;
 }
 /* *************************************************************** */
-void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
+void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
     nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength);
@@ -295,7 +295,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) {
     // Normalise the forward gradient
     reg_tools_divideValueToImage(voxelBasedMeasureGradient, // in
                                  voxelBasedMeasureGradient, // out
-                                 powf(2, compNum)); // value
+                                 pow(2, compNum)); // value
 
     for (size_t i = 0; i <= compNum; ++i)
         nifti_image_free(tempDef[i]);
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 9b4fded1..aef76487 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -22,7 +22,7 @@ class Compute {
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ);
-    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength);
+    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void SmoothGradient(float sigma);
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt);
     virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index c8c296eb..6cb183ac 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -212,8 +212,6 @@ void reg_f3d<T>::Initialise() {
 
         // The control point position image is initialised with the affine transformation
         if (!this->affineTransformation) {
-            memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper);
-            reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f);
             reg_getDeformationFromDisplacement(controlPointGrid);
         } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid);
     } else {
@@ -501,7 +499,7 @@ T reg_f3d<T>::NormaliseGradient() {
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d2
-        this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength);
+        this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
 #ifndef NDEBUG
         char text[255];
         sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index e4330e0e..dc51ddcf 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -499,9 +499,9 @@ T reg_f3d2<T>::NormaliseGradient() {
 #endif
 
     // The forward gradient is normalised
-    this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength);
+    this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
     // The backward gradient is normalised
-    computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength);
+    computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::NormaliseGradient");
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index a1f2eb9d..ace0ff95 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1163,7 +1163,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                         tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c*4+b]), tempZ );
                      }
                   }
-                  //the values stored in SSE variables are transfered to normal float
+                  //the values stored in SSE variables are transferred to normal float
                   val.m = tempX;
                   real[0] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
                   val.m = tempY;
@@ -1407,7 +1407,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                                                 zControlPointCoordinates.m[coord]),
                                                                      tempZ );
                                               }
-                                              //the values stored in SSE variables are transfered to normal float
+                                              //the values stored in SSE variables are transferred to normal float
 #ifdef __SSE3__
                                               val.m = _mm_hadd_ps(tempX, tempY);
                                               val.m = _mm_hadd_ps(val.m, tempZ);
@@ -1593,7 +1593,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                               tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY );
                               tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ );
                           }
-                          //the values stored in SSE variables are transfered to normal float
+                          //the values stored in SSE variables are transferred to normal float
                           val.m=tempX;
                           real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3];
                           val.m=tempY;
@@ -3592,7 +3592,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
             ptrX++;
             ptrY++;
          }
-         //the values stored in SSE variables are transfered to normal float
+         //the values stored in SSE variables are transferred to normal float
          val.m = tempX;
          xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
          val.m = tempY;
@@ -3847,7 +3847,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                   ptrZ++;
                }
             }
-            //the values stored in SSE variables are transfered to normal float
+            //the values stored in SSE variables are transferred to normal float
             val.m = tempX;
             xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
             val.m = tempY;
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 0c21b34e..7e3baadf 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -951,7 +951,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                      tempZ_z = _mm_add_ps(_mm_mul_ps(basisZ.m[incr0], coeffZ.m[incr0]), tempZ_z );
                   }
 
-                  //the values stored in SSE variables are transfered to normal float
+                  //the values stored in SSE variables are transferred to normal float
                   val.m = tempX_x;
                   jacobianMatrix.m[0][0] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
                   val.m = tempX_y;
@@ -1179,7 +1179,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                      tempZ_z = _mm_add_ps(_mm_mul_ps(basisZ.m[incr0], coeffZ.m[incr0]), tempZ_z );
                   }
 
-                  //the values stored in SSE variables are transfered to normal float
+                  //the values stored in SSE variables are transferred to normal float
                   val.m = tempX_x;
                   jacobianMatrix.m[0][0] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
                   val.m = tempX_y;
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index a20b8d12..2717cc83 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -120,7 +120,7 @@ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool opt
     return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), nodeNumber);
 }
 /* *************************************************************** */
-void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) {
+void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
     reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), float(1 / maxGradLength));
 }
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index e9796408..85d3904e 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -20,7 +20,7 @@ class CudaCompute: public Compute {
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
     virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override;
-    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override;
+    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void SmoothGradient(float sigma) override;
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override;
     virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override;
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 71eeb05a..07a708f9 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -210,7 +210,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
-    // The latest joint histogram is transfered onto the GPU
+    // The latest joint histogram is transferred onto the GPU
     float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float));
     for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i)
         temp[i] = static_cast<float>(this->forwardJointHistogramLog[0][i]);
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 2fad9b34..116a2bc8 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -131,11 +131,9 @@ TEST_CASE("Resampling", "[resampling]") {
     interpCubicSplineKernel(0.2f, xBasis);
     interpCubicSplineKernel(0.3f, yBasis);
     for (int y = 0; y <= 3; ++y) {
-        float resX = 0;
         for (int x = 0; x <= 3; ++x) {
-            resX += ref2dPtr[y * dimFlo[1] + x] * xBasis[x];
+            resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y];
         }
-        resCubic2d[0] += resX * yBasis[y];
     }
 
     // create the test case
@@ -189,15 +187,11 @@ TEST_CASE("Resampling", "[resampling]") {
     float zBasis[4];
     interpCubicSplineKernel(0.4f, zBasis);
     for (int z = 0; z <= 3; ++z) {
-        float resY = 0;
         for (int y = 0; y <= 3; ++y) {
-            float resX = 0;
             for (int x = 0; x <= 3; ++x) {
-                resX += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x];
+                resCubic3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x] * yBasis[y] * zBasis[z];
             }
-            resY += resX * yBasis[y];
         }
-        resCubic3d[0] += resY * zBasis[z];
     }
 
     // create the test case
@@ -268,7 +262,7 @@ TEST_CASE("Resampling", "[resampling]") {
             }
         }
     }
-    // Only free-ing ref as the rest if cleared by content destructor
+    // Only freeing ref as the rest if cleared by content destructor
     nifti_image_free(reference2d);
     nifti_image_free(reference3d);
 }

From e8c116fa2bb0ef4cac70c3112477250e27ba6fce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Feb 2023 15:52:45 +0000
Subject: [PATCH 063/314] Fix a bug incorrectly choosing 2D/3D image gradient

---
 niftyreg_build_version.txt      | 2 +-
 reg-lib/cpu/_reg_resampling.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1057e9a2..eec49411 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-176
+177
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 83abc996..48251afc 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -3165,7 +3165,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
     /* The deformation field contains the position in the real world */
     if(interp==3)
     {
-        if(deformationField->nz>1)
+        if(deformationField->nu>2)
         {
             CubicSplineImageGradient3D
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
@@ -3188,7 +3188,7 @@ void reg_getImageGradient3(nifti_image *floatingImage,
     }
     else  // trilinear interpolation [ by default ]
     {
-        if(deformationField->nz>1)
+        if(deformationField->nu>2)
         {
             TrilinearImageGradient
                     <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,

From bc7ff3bd121063c40c43c895b90dd6794fad5659 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Feb 2023 15:54:29 +0000
Subject: [PATCH 064/314] Add a common header for tests

---
 niftyreg_build_version.txt          |  2 +-
 reg-test/reg_test_common.h          | 21 +++++++++++++++++++++
 reg-test/reg_test_interpolation.cpp | 23 +----------------------
 3 files changed, 23 insertions(+), 23 deletions(-)
 create mode 100644 reg-test/reg_test_common.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index eec49411..f84d24e5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-177
+178
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
new file mode 100644
index 00000000..1991aabc
--- /dev/null
+++ b/reg-test/reg_test_common.h
@@ -0,0 +1,21 @@
+// Enable testing
+#define NR_TESTING
+
+#include "Platform.h"
+#include "ResampleImageKernel.h"
+#include "_reg_localTrans.h"
+
+#include <list>
+#include <catch2/catch_test_macros.hpp>
+
+
+template <typename T>
+void interpCubicSplineKernel(T relative, T (&basis)[4]) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    const T relative2 = relative * relative;
+    basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f;
+    basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f;
+    basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f;
+    basis[3] = (relative - 1.f) * relative2 / 2.f;
+}
+
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 116a2bc8..27f5182a 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -1,18 +1,7 @@
 // OpenCL is not supported for this test
 #undef _USE_OPENCL
-// Enable testing
-#define NR_TESTING
 
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_tools.h"
-
-#include "Kernel.h"
-#include "ResampleImageKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#include <list>
-#include <catch2/catch_test_macros.hpp>
+#include "reg_test_common.h"
 
 #define EPS_SINGLE 0.001
 
@@ -29,16 +18,6 @@
 typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
 typedef std::tuple<unique_ptr<Content>, shared_ptr<Platform>> ContentDesc;
 
-template <typename T>
-void interpCubicSplineKernel(T relative, T (&basis)[4]) {
-    if (relative < 0) relative = 0; //reg_rounding error
-    const T relative2 = relative * relative;
-    basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f;
-    basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f;
-    basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f;
-    basis[3] = (relative - 1.f) * relative2 / 2.f;
-}
-
 TEST_CASE("Resampling", "[resampling]") {
     // Create a reference 2D image
     int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };

From c7247492ece55f858fb822a5d3375ac461d55eba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Feb 2023 17:41:27 +0000
Subject: [PATCH 065/314] Remove the old tests

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/reg_test_blockMatching.cpp           | 182 --------
 .../reg_test_bspline_deformation_field.cpp    | 104 -----
 reg-test/reg_test_changeDataType.cpp          | 100 ----
 ...est_coherence_affine_deformation_field.cpp | 102 -----
 reg-test/reg_test_coherence_blockMatching.cpp | 192 --------
 reg-test/reg_test_coherence_interpolation.cpp | 111 -----
 .../reg_test_compose_deformation_field.cpp    |  62 ---
 reg-test/reg_test_computation_time.cpp        | 392 ----------------
 reg-test/reg_test_convolution.cpp             |  62 ---
 reg-test/reg_test_fullAffine.cpp              |  69 ---
 reg-test/reg_test_fullAffine_cl.cpp           |  65 ---
 reg-test/reg_test_fullAffine_cuda.cpp         |  64 ---
 reg-test/reg_test_fullNonlinear.cpp           |  85 ----
 reg-test/reg_test_fullSymNonlinear.cpp        |  85 ----
 reg-test/reg_test_imageGradient.cpp           | 168 -------
 reg-test/reg_test_leastTrimmedSquares.cpp     | 146 ------
 reg-test/reg_test_linearElasticity.cpp        |  82 ----
 .../reg_test_linearElasticityGradient.cpp     |  84 ----
 reg-test/reg_test_matrix_operation.cpp        | 101 -----
 reg-test/reg_test_measure.cpp                 | 148 ------
 reg-test/reg_test_mindDescriptor.cpp          |  69 ---
 reg-test/reg_test_mindsscDescriptor.cpp       |  73 ---
 .../reg_test_nonlinear_deformation_field.cpp  |  74 ---
 reg-test/reg_test_svd.cpp                     | 292 ------------
 reg-test/reg_test_svd_cuda.cpp                | 427 ------------------
 26 files changed, 1 insertion(+), 3340 deletions(-)
 delete mode 100644 reg-test/reg_test_blockMatching.cpp
 delete mode 100644 reg-test/reg_test_bspline_deformation_field.cpp
 delete mode 100644 reg-test/reg_test_changeDataType.cpp
 delete mode 100644 reg-test/reg_test_coherence_affine_deformation_field.cpp
 delete mode 100644 reg-test/reg_test_coherence_blockMatching.cpp
 delete mode 100644 reg-test/reg_test_coherence_interpolation.cpp
 delete mode 100644 reg-test/reg_test_compose_deformation_field.cpp
 delete mode 100644 reg-test/reg_test_computation_time.cpp
 delete mode 100644 reg-test/reg_test_convolution.cpp
 delete mode 100644 reg-test/reg_test_fullAffine.cpp
 delete mode 100755 reg-test/reg_test_fullAffine_cl.cpp
 delete mode 100755 reg-test/reg_test_fullAffine_cuda.cpp
 delete mode 100644 reg-test/reg_test_fullNonlinear.cpp
 delete mode 100644 reg-test/reg_test_fullSymNonlinear.cpp
 delete mode 100644 reg-test/reg_test_imageGradient.cpp
 delete mode 100644 reg-test/reg_test_leastTrimmedSquares.cpp
 delete mode 100644 reg-test/reg_test_linearElasticity.cpp
 delete mode 100644 reg-test/reg_test_linearElasticityGradient.cpp
 delete mode 100644 reg-test/reg_test_matrix_operation.cpp
 delete mode 100644 reg-test/reg_test_measure.cpp
 delete mode 100644 reg-test/reg_test_mindDescriptor.cpp
 delete mode 100644 reg-test/reg_test_mindsscDescriptor.cpp
 delete mode 100644 reg-test/reg_test_nonlinear_deformation_field.cpp
 delete mode 100644 reg-test/reg_test_svd.cpp
 delete mode 100644 reg-test/reg_test_svd_cuda.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f84d24e5..a14f8d53 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-178
+179
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
deleted file mode 100644
index a14411df..00000000
--- a/reg-test/reg_test_blockMatching.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_blockMatching.h"
-#include "_reg_tools.h"
-#include "_reg_globalTrans.h"
-
-#include "BlockMatchingKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#define EPS 0.000001
-
-void check_matching_difference(int dim,
-                               float* referencePosition,
-                               float* warpedPosition,
-                               float* expectedReferencePositions,
-                               float* expectedWarpedPosition,
-                               float &max_difference) {
-    float difference;
-    for (int i = 0; i < dim; ++i) {
-        difference = fabsf(referencePosition[i] - expectedReferencePositions[i]);
-        max_difference = std::max(difference, max_difference);
-        if (difference > EPS) {
-#ifndef NDEBUG
-            fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
-            if (dim == 2) {
-                fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
-                        referencePosition[0], referencePosition[1],
-                        expectedReferencePositions[0], expectedReferencePositions[1]);
-                fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
-                        warpedPosition[0], warpedPosition[1],
-                        expectedWarpedPosition[0], expectedWarpedPosition[1]);
-            } else {
-                fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
-                        referencePosition[0], referencePosition[1], referencePosition[2],
-                        expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
-                fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
-                        warpedPosition[0], warpedPosition[1], warpedPosition[2],
-                        expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
-            }
-            reg_exit();
-#endif
-        }
-        difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]);
-        max_difference = std::max(difference, max_difference);
-        if (difference > EPS) {
-#ifndef NDEBUG
-            fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
-            if (dim == 2) {
-                fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n",
-                        referencePosition[0], referencePosition[1],
-                        expectedReferencePositions[0], expectedReferencePositions[1]);
-                fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n",
-                        warpedPosition[0], warpedPosition[1],
-                        expectedWarpedPosition[0], expectedWarpedPosition[1]);
-            } else {
-                fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n",
-                        referencePosition[0], referencePosition[1], referencePosition[2],
-                        expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]);
-                fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n",
-                        warpedPosition[0], warpedPosition[1], warpedPosition[2],
-                        expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]);
-            }
-            reg_exit();
-#endif
-        }
-    }
-}
-
-void test(AladinContent *con, Platform *platform) {
-    unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
-    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
-}
-
-int main(int argc, char **argv) {
-
-    if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <warpedImage> <expectedBlockMatchingMatrix> <platformType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputWarpedImageName = argv[2];
-    char *expectedBlockMatchingMatrixName = argv[3];
-    PlatformType platformType{ atoi(argv[4]) };
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(referenceImage);
-    //dim
-    int imgDim = referenceImage->dim[0];
-
-    // Read the input floating image
-    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-    if (warpedImage == nullptr) {
-        reg_print_msg_error("The input warped image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(warpedImage);
-
-    // Read the expected block matching matrix
-    std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName);
-    size_t m = inputMatrixSize.first;
-    size_t n = inputMatrixSize.second;
-    float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile<float>(expectedBlockMatchingMatrixName, m, n);
-
-    // Create a mask
-    int *mask = (int *)malloc(referenceImage->nvox * sizeof(int));
-    for (size_t i = 0; i < referenceImage->nvox; ++i) {
-        mask[i] = i;
-    }
-
-    _reg_blockMatchingParam* blockMatchingParams;
-
-    // Platforms
-    unique_ptr<Platform> platform{ new Platform(platformType) };
-    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-    unique_ptr<AladinContent> con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
-    con->SetWarped(warpedImage);
-    //con->SetWarped(referenceImage);
-    test(con.get(), platform.get());
-    blockMatchingParams = con->GetBlockMatchingParams();
-
-#ifndef NDEBUG
-    std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl;
-#endif
-
-    float max_difference = 0;
-
-    int blockIndex = 0;
-    int positionIndex = 0;
-    int matrixIndex = 0;
-
-    unsigned int zMax = 2;
-    if (imgDim == 3)
-        zMax = blockMatchingParams->blockNumber[2] - 1;
-
-
-    for (unsigned int z = 1; z < zMax; z += 3) {
-        for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) {
-            for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) {
-
-                if (imgDim == 3) {
-                    blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x;
-                } else {
-                    blockIndex = y * blockMatchingParams->blockNumber[0] + x;
-                }
-
-                positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex];
-
-                if (positionIndex > -1) {
-                    check_matching_difference(imgDim,
-                                              &blockMatchingParams->referencePosition[positionIndex],
-                                              &blockMatchingParams->warpedPosition[positionIndex],
-                                              &expectedBlockMatchingMatrix[matrixIndex][0],
-                                              &expectedBlockMatchingMatrix[matrixIndex][3],
-                                              max_difference);
-                    matrixIndex++;
-                }
-            }
-        }
-    }
-
-    free(mask);
-    reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix);
-    nifti_image_free(referenceImage);
-
-    if (max_difference > EPS) {
-#ifndef NDEBUG
-        fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
-#endif
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    printf("All good (%g<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp
deleted file mode 100644
index 1f16c543..00000000
--- a/reg-test/reg_test_bspline_deformation_field.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_localTrans.h"
-#include "_reg_tools.h"
-
-#include "AffineDeformationFieldKernel.h"
-
-#define EPS 0.0001
-
-int main(int argc, char **argv)
-{
-    if (argc != 6) {
-        fprintf(stderr, "Usage: %s <refImage> <inputGrid> <expectedField> <useComp> <platformType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputCPPFileName = argv[2];
-    char *inputDefImageName = argv[3];
-    bool useComposition = atoi(argv[4]);
-    // PlatformType platformType{atoi(argv[5])};
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    nifti_image *cppImage = reg_io_ReadImageFile(inputCPPFileName);
-    if (cppImage == nullptr) {
-        reg_print_msg_error("The control point grid image could not be read");
-        return EXIT_FAILURE;
-    }
-
-    // Read the input deformation field image image
-    nifti_image *expectedDefField = reg_io_ReadImageFile(inputDefImageName);
-    if (expectedDefField == nullptr){
-        reg_print_msg_error("The input deformation field image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Check the dimension of the input images
-    if (referenceImage->nx != expectedDefField->nx ||
-        referenceImage->ny != expectedDefField->ny ||
-        referenceImage->nz != expectedDefField->nz ||
-        (referenceImage->nz > 1 ? 3 : 2) != expectedDefField->nu){
-        reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
-        return EXIT_FAILURE;
-    }
-
-    // Create a deformation field
-    nifti_image *test_field = nifti_dup(*expectedDefField, false);
-
-    if(useComposition)
-    {
-       // Set the deformation to identity
-       reg_tools_multiplyValueToImage(test_field, test_field, 0.f);
-       test_field->intent_p1=DISP_FIELD;
-       reg_getDeformationFromDisplacement(test_field);
-
-       // Compute the deformation field throught composition
-       reg_spline_getDeformationField(cppImage,
-                                      test_field,
-                                      nullptr,
-                                      true,
-                                      true);
-    }
-    else{
-       // Compute the deformation field from scratch
-       reg_spline_getDeformationField(cppImage,
-                                      test_field,
-                                      nullptr,
-                                      false,
-                                      true);
-    }
-
-    // Compute the difference between the computed and expected deformation fields
-    nifti_image *diff_field = nifti_dup(*expectedDefField, false);
-    reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field);
-    reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_getMaxValue(diff_field, -1);
-
-    // Delete all allocated images
-    nifti_image_free(referenceImage);
-    nifti_image_free(expectedDefField);
-    nifti_image_free(cppImage);
-    nifti_image_free(test_field);
-    nifti_image_free(diff_field);
-
-    // Check if the obtained difference is below a specific threshold
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_bspline_deformation_field from blank error too large: %g (>%g)\n",
-                max_difference, EPS);
-        // return on a failed test
-        return EXIT_FAILURE;
-    }
-
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_bspline_deformation_field ok 1: %g (<%g)\n",
-            max_difference, EPS);
-#endif
-
-    // return on a successful test
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp
deleted file mode 100644
index 1f924e41..00000000
--- a/reg-test/reg_test_changeDataType.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//TEST CHANGE DATATYPE
-#include "_reg_ReadWriteImage.h"
-#include "_reg_globalTrans.h"
-#include "_reg_tools.h"
-//
-#define EPS 0.000001
-//
-int main(int argc, char **argv)
-{
-    if (argc != 4) {
-        fprintf(stderr, "Usage: %s <path to the image to cast> <cast value: float - double> <expected casted image>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-    //
-    char str_float[] = "float";
-    char str_double[] = "double";
-    char str_uchar[] = "uchar";
-    //
-    char *inputImageName = argv[1];
-    // Read the input image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    //
-    char* castValue = argv[2];
-    if (strcmp(castValue, str_float) != 0 && strcmp(castValue, str_double) != 0 && strcmp(castValue, str_uchar) != 0) {
-        reg_print_msg_error("The cast value is wrong - it should be uchar, float or double");
-        return EXIT_FAILURE;
-    }
-    //
-    char *expectedImageName = argv[3];
-    // Read the input image
-    nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == nullptr) {
-        reg_print_msg_error("The expected image could not be read");
-        return EXIT_FAILURE;
-    }
-    //
-    ///////////////////////////////////////////////////////////////////////////////////////
-#ifndef NDEBUG
-    //TEST CHANGE DATATYPE --> WE CAN ONLY UPGRADE THE DATATYPE !
-    //FIRST DETECT THE DATATYPE OF THE INPUT IMAGE
-    char* inputDataType = nifti_datatype_string(referenceImage->datatype);
-    char text[255];
-    sprintf(text, "The input image datatype is: %s", inputDataType);
-    reg_print_msg_debug(text);
-    //
-    char text3[255];
-    sprintf(text3, "The cast value is: %s", castValue);
-    reg_print_msg_debug(text3);
-    //DETECT THE DATATYPE OF THE EXPECTED IMAGE
-    char* expectedDataType = nifti_datatype_string(expectedImage->datatype);
-    char text2[255];
-    sprintf(text2, "The expected image datatype is: %s", expectedDataType);
-    reg_print_msg_debug(text2);
-#endif
-    ///////////////////////////////////////////////////////////////////////////////////////
-    if (strcmp(castValue, str_float) == 0) {
-#ifndef NDEBUG
-        reg_print_msg_debug("cast image to float")
-#endif
-            reg_tools_changeDatatype<float>(referenceImage);
-    }
-    else if (strcmp(castValue, str_double) == 0) {
-#ifndef NDEBUG
-        reg_print_msg_debug("cast image to double")
-#endif
-            reg_tools_changeDatatype<double>(referenceImage);
-    }
-    else if (strcmp(castValue, str_uchar) == 0) {
-#ifndef NDEBUG
-        reg_print_msg_debug("cast image to unsigned char")
-#endif
-            reg_tools_changeDatatype<unsigned char>(referenceImage);
-    }
-    else {
-        reg_print_msg_error("The reference image could not be casted");
-        return EXIT_FAILURE;
-    }
-    //
-    // Compute the difference between the computed and inputed deformation field
-    reg_tools_subtractImageFromImage(referenceImage, expectedImage, expectedImage);
-    reg_tools_abs_image(expectedImage);
-    double max_difference = reg_tools_getMaxValue(expectedImage, -1);
-
-    nifti_image_free(referenceImage);
-    nifti_image_free(expectedImage);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_changeDataType error too large: %g (>%g)\n",
-            max_difference, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_changeDataType ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp
deleted file mode 100644
index 905f71af..00000000
--- a/reg-test/reg_test_coherence_affine_deformation_field.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_globalTrans.h"
-#include "_reg_tools.h"
-
-#include "Kernel.h"
-#include "AffineDeformationFieldKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#define EPS 0.000001
-#define EPS_SINGLE 0.0001
-
-void test(AladinContent *con, Platform *platform) {
-    unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con) };
-    affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-}
-
-int main(int argc, char **argv) {
-    if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <inputMatrix> <expectedField> <platformType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputMatFileName = argv[2];
-    char *inputDefImageName = argv[3];
-    PlatformType platformType{ atoi(argv[4]) };
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Read the input affine matrix
-    mat44 *inputMatrix = (mat44 *)malloc(sizeof(mat44));
-    reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-
-    // Read the input deformation field image image
-    nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if (inputDeformationField == nullptr) {
-        reg_print_msg_error("The input deformation field image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Check the dimension of the input images
-    if (referenceImage->nx != inputDeformationField->nx ||
-        referenceImage->ny != inputDeformationField->ny ||
-        referenceImage->nz != inputDeformationField->nz ||
-        (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) {
-        reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
-        return EXIT_FAILURE;
-    }
-
-    // Create a deformation field
-    nifti_image *test_field_cpu = nifti_dup(*inputDeformationField, false);
-    nifti_image *test_field_gpu = nifti_dup(*inputDeformationField, false);
-
-    // Compute the affine deformation field
-    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
-    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) };
-
-    //Check if the platform used is double capable
-    bool isDouble = conGpu->IsCurrentComputationDoubleCapable();
-    double proper_eps = EPS;
-    if (isDouble == 0) {
-        proper_eps = EPS_SINGLE;
-    }
-
-    //CPU or GPU code
-    reg_tools_changeDatatype<float>(referenceImage);
-    test(conCpu.get(), platformCpu.get());
-    test_field_cpu = conCpu->GetDeformationField();
-
-    test(conGpu.get(), platformGpu.get());
-    test_field_gpu = conGpu->GetDeformationField();
-
-    // Compute the difference between the computed and inputted deformation field
-    nifti_image *diff_field = nifti_dup(*inputDeformationField, false);
-    reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field);
-    reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_GetMaxValue(diff_field, -1);
-
-    nifti_image_free(referenceImage);
-    nifti_image_free(inputDeformationField);
-    free(inputMatrix);
-
-    if (max_difference > proper_eps) {
-        fprintf(stderr, "reg_test_affine_deformation_field error too large: %g (>%g)\n",
-                max_difference, proper_eps);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_affine_deformation_field ok: %g (<%g)\n",
-            max_difference, proper_eps);
-#endif
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp
deleted file mode 100644
index 7c9ce127..00000000
--- a/reg-test/reg_test_coherence_blockMatching.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_blockMatching.h"
-#include "_reg_tools.h"
-#include "_reg_globalTrans.h"
-
-#include "BlockMatchingKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#define EPS 0.000001
-
-void check_matching_difference(int dim,
-                               float* cpuRefPos,
-                               float* cpuWarPos,
-                               float* gpuRefPos,
-                               float* gpuWarPos,
-                               float &max_difference) {
-    bool cpu_finite = cpuWarPos[0] == cpuWarPos[0] ? true : false;
-    bool gpu_finite = gpuWarPos[0] == gpuWarPos[0] ? true : false;
-
-    if (!cpu_finite && !gpu_finite) return;
-
-    if (cpu_finite != gpu_finite) {
-        max_difference = std::numeric_limits<float>::max();
-        return;
-    }
-
-    float difference;
-    for (int i = 0; i < dim; ++i) {
-        difference = fabsf(cpuRefPos[i] - gpuRefPos[i]);
-        max_difference = std::max(difference, max_difference);
-        if (difference > EPS) {
-#ifndef NDEBUG
-            fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS);
-            if (dim == 2) {
-                fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
-                        cpuRefPos[0], cpuRefPos[1],
-                        gpuRefPos[0], gpuRefPos[1]);
-                fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
-                        cpuWarPos[0], cpuWarPos[1],
-                        gpuWarPos[0], gpuWarPos[1]);
-            } else {
-                fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
-                        cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
-                        gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
-                fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
-                        cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
-                        gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
-            }
-            reg_exit();
-#endif
-        }
-        difference = fabsf(cpuWarPos[i] - gpuWarPos[i]);
-        max_difference = std::max(difference, max_difference);
-        if (difference > EPS) {
-#ifndef NDEBUG
-            fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS);
-            if (dim == 2) {
-                fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n",
-                        cpuRefPos[0], cpuRefPos[1],
-                        gpuRefPos[0], gpuRefPos[1]);
-                fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n",
-                        cpuWarPos[0], cpuWarPos[1],
-                        gpuWarPos[0], gpuWarPos[1]);
-            } else {
-                fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n",
-                        cpuRefPos[0], cpuRefPos[1], cpuRefPos[2],
-                        gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]);
-                fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n",
-                        cpuWarPos[0], cpuWarPos[1], cpuWarPos[2],
-                        gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]);
-            }
-            reg_exit();
-#endif
-        }
-    }
-}
-
-void test(AladinContent *con, Platform *platform) {
-    unique_ptr<Kernel> blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) };
-    blockMatchingKernel->castTo<BlockMatchingKernel>()->Calculate();
-}
-
-int main(int argc, char **argv) {
-    if (argc != 4) {
-        fprintf(stderr, "Usage: %s <refImage> <warpedImage> <platformType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputWarpedImageName = argv[2];
-    PlatformType platformType{ atoi(argv[3]) };
-
-    if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) {
-        reg_print_msg_error("Unexpected platform code");
-        return EXIT_FAILURE;
-    }
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(referenceImage);
-    //dim
-    int imgDim = referenceImage->dim[0];
-
-    // Read the input floating image
-    nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName);
-    if (warpedImage == nullptr) {
-        reg_print_msg_error("The input warped image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(warpedImage);
-
-    // Create a mask
-    int *mask = (int *)malloc(referenceImage->nvox * sizeof(int));
-    for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i;
-
-    // CPU Platform
-    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    unique_ptr<AladinContent> conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
-    conCpu->SetWarped(warpedImage);
-    test(conCpu.get(), platformCpu.get());
-    _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams();
-
-#ifndef NDEBUG
-    std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl;
-    std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl;
-#endif
-
-    // GPU Platform
-    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    unique_ptr<AladinContent> conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) };
-    conGpu->SetWarped(warpedImage);
-    test(conGpu.get(), platformGpu.get());
-    _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams();
-
-#ifndef NDEBUG
-    std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl;
-    std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl;
-#endif
-
-    float max_difference = 0;
-
-    if (blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber) {
-        reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms");
-        char out_text[255];
-        sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber);
-        reg_print_msg_error(out_text);
-        sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber);
-        reg_print_msg_error(out_text);
-        sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber);
-        reg_print_msg_error(out_text);
-        sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber);
-        reg_print_msg_error(out_text);
-        return EXIT_FAILURE;
-    }
-
-    for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) {
-        check_matching_difference(imgDim,
-                                  &blockMatchingParams_cpu->referencePosition[i],
-                                  &blockMatchingParams_cpu->warpedPosition[i],
-                                  &blockMatchingParams_gpu->referencePosition[i],
-                                  &blockMatchingParams_gpu->warpedPosition[i],
-                                  max_difference);
-    }
-    size_t test_cpu = 0, test_gpu = 0;
-    for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) {
-        test_cpu = (blockMatchingParams_cpu->warpedPosition[i] == blockMatchingParams_cpu->warpedPosition[i]) ? test_cpu + 1 : test_cpu;
-        test_gpu = (blockMatchingParams_gpu->warpedPosition[i] == blockMatchingParams_gpu->warpedPosition[i]) ? test_gpu + 1 : test_gpu;
-    }
-    printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu);
-
-    free(mask);
-    nifti_image_free(referenceImage);
-
-    if (max_difference > EPS) {
-#ifndef NDEBUG
-        fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS);
-#endif
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    printf("All good (%g<%g)\n", max_difference, EPS);
-#endif
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp
deleted file mode 100644
index 3463640e..00000000
--- a/reg-test/reg_test_coherence_interpolation.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_resampling.h"
-#include "_reg_tools.h"
-
-#include "ResampleImageKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#define EPS 0.000001
-#define EPS_SINGLE 0.0001
-
-int main(int argc, char **argv) {
-    if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <inputDefField> <order> <platformType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputDefImageName = argv[2];
-    int interpolation = atoi(argv[3]);
-    PlatformType platformType{ atoi(argv[4]) };
-
-    if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) {
-        reg_print_msg_error("Unexpected platform code");
-        return EXIT_FAILURE;
-    }
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(referenceImage);
-    // Read the input deformation field image image
-    nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-    if (inputDeformationField == nullptr) {
-        reg_print_msg_error("The input deformation field image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(inputDeformationField);
-
-    // Check the dimension of the input images
-    if (referenceImage->nx != inputDeformationField->nx ||
-        referenceImage->ny != inputDeformationField->ny ||
-        referenceImage->nz != inputDeformationField->nz ||
-        (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) {
-        reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
-        return EXIT_FAILURE;
-    }
-
-    // Initialise warped images
-    nifti_image *cpuWarped = nifti_dup(*referenceImage, false);
-    nifti_image *gpuWarped = nifti_dup(*referenceImage, false);
-
-    int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int));
-
-    // CPU platform
-    unique_ptr<Platform> platformCpu{ new Platform(PlatformType::Cpu) };
-    unique_ptr<AladinContent> conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) };
-    conCpu->SetWarped(cpuWarped);
-    conCpu->SetDeformationField(inputDeformationField);
-    conCpu->SetReferenceMask(tempMask);
-    unique_ptr<Kernel> resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) };
-    resampleImageKernel_cpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
-                                                                      std::numeric_limits<float>::quiet_NaN());
-    cpuWarped = conCpu->GetWarped();
-
-    // GPU platform
-    unique_ptr<Platform> platformGpu{ new Platform(platformType) };
-    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platformGpu->CreateContentCreator(ContentType::Aladin)) };
-    unique_ptr<AladinContent> conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) };
-    conGpu->SetWarped(gpuWarped);
-    conGpu->SetDeformationField(inputDeformationField);
-    conGpu->SetReferenceMask(tempMask);
-
-    unique_ptr<Kernel> resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) };
-    resampleImageKernel_gpu->castTo<ResampleImageKernel>()->Calculate(interpolation,
-                                                                      std::numeric_limits<float>::quiet_NaN());
-    gpuWarped = conGpu->GetWarped();
-
-    //Check if the platform used is double capable
-    double proper_eps = EPS;
-    if (conGpu->IsCurrentComputationDoubleCapable() == 0) {
-        proper_eps = EPS_SINGLE;
-    }
-
-    // Compute the difference between the warped images
-    nifti_image *diff_field = nifti_dup(*referenceImage, false);
-
-    // Compute the difference between the computed and inputted warped image
-    reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field);
-    reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_GetMaxValue(diff_field, -1);
-
-    // free the allocated images
-    nifti_image_free(referenceImage);
-    nifti_image_free(cpuWarped);
-    nifti_image_free(gpuWarped);
-    nifti_image_free(inputDeformationField);
-
-    if (max_difference > proper_eps) {
-        fprintf(stderr, "reg_test_interpolation error too large: %g (>%g)\n",
-                max_difference, proper_eps);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_interpolation ok: %g ( < %g )\n", max_difference, proper_eps);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp
deleted file mode 100644
index 0d2cdc5e..00000000
--- a/reg-test/reg_test_compose_deformation_field.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_localTrans.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-   if(argc!=3)
-   {
-      fprintf(stderr, "Usage: %s <inputDefField> <expectedField>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputDefFieldImageName=argv[1];
-   char *inputComFieldImageName=argv[2];
-
-   // Read the input deformation field image image
-   nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefFieldImageName);
-   if(inputDeformationField==nullptr){
-      reg_print_msg_error("The input deformation field image could not be read");
-      return EXIT_FAILURE;
-   }
-   nifti_image *inputComFieldImage = reg_io_ReadImageFile(inputComFieldImageName);
-   if(inputComFieldImage==nullptr){
-      reg_print_msg_error("The input composed deformation field image could not be read");
-      return EXIT_FAILURE;
-   }
-   // Check the dimension of the input images
-   if(inputDeformationField->nx != inputComFieldImage->nx ||
-      inputDeformationField->ny != inputComFieldImage->ny ||
-      inputDeformationField->nz != inputComFieldImage->nz ||
-      inputDeformationField->nu != inputComFieldImage->nu){
-      reg_print_msg_error("The input deformation field images do not have corresponding sizes");
-      return EXIT_FAILURE;
-   }
-
-   // Create a deformation field
-   nifti_image *test_field = nifti_dup(*inputDeformationField);
-
-   // Compute the non-linear deformation field
-   reg_defField_compose(inputDeformationField,
-                        test_field,
-                        nullptr);
-
-   // Compute the difference between the computed and inputed deformation field
-   reg_tools_subtractImageFromImage(inputComFieldImage,test_field,test_field);
-   reg_tools_abs_image(test_field);
-   double max_difference=reg_tools_getMaxValue(test_field);
-
-   nifti_image_free(inputDeformationField);
-   nifti_image_free(inputComFieldImage);
-   nifti_image_free(test_field);
-
-   if(max_difference>EPS){
-      fprintf(stderr, "reg_test_compose_deformation_field error too large: %g (>%g)\n",
-              max_difference, EPS);
-      return EXIT_FAILURE;
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp
deleted file mode 100644
index cfe24ad0..00000000
--- a/reg-test/reg_test_computation_time.cpp
+++ /dev/null
@@ -1,392 +0,0 @@
-#include "_reg_f3d.h"
-
-//#define ONLY_ONE_ITERATION
-
-//#define COMPUTE_DEF_AFFINE
-#define COMPUTE_DEF_SPLINE_LUT
-//#define COMPUTE_DEF_SPLINE
-//#define COMPUTE_DEF_COMP
-#define COMPUTE_RESAMPLING
-#define COMPUTE_SP_GRAD
-#define COMPUTE_NMI
-#define COMPUTE_NMI_GRAD
-#define COMPUTE_BE
-#define COMPUTE_BE_GRAD
-#define COMPUTE_LE
-#define COMPUTE_LE_GRAD
-#define COMPUTE_VOX_GRID_CONV
-
-int main(int argc, char **argv)
-{
-    if (argc != 3) {
-        fprintf(stderr, "Usage: %s <img1>  <img2>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputImageOneName = argv[1];
-    char *inputImageTwoName = argv[2];
-
-    // Read the input reference image
-    nifti_image *inputImageOne = reg_io_ReadImageFile(inputImageOneName);
-    if (inputImageOne == nullptr) {
-        reg_print_msg_error("The first input image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(inputImageOne);
-    nifti_image *inputImageTwo = reg_io_ReadImageFile(inputImageTwoName);
-    if (inputImageTwo == nullptr) {
-        reg_print_msg_error("The second input image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<float>(inputImageTwo);
-
-    // Check that both images have the same size
-    for(int i=0;i<8;++i){
-        if(inputImageOne->dim[i]!=inputImageTwo->dim[i]){
-            reg_print_msg_error("The input images do not have the same side");
-            return EXIT_FAILURE;
-        }
-    }
-
-    // Allocate a warped image
-    nifti_image *warpedImage = nifti_dup(*inputImageOne, false);
-
-    // Create mask
-    int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int));
-
-    // Generate deformation fields
-    nifti_image *defFieldOne=nifti_copy_nim_info(inputImageOne);
-    defFieldOne->ndim=defFieldOne->dim[0]=5;
-    defFieldOne->nt=defFieldOne->dim[4]=1;
-    defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2;
-    defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim);
-    defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper);
-    nifti_image *defFieldTwo=nifti_dup(*defFieldOne, false);
-    nifti_image *defFieldThr=nifti_dup(*defFieldOne, false);
-
-    // Generate a control point grids
-    nifti_image *splineGridOne = nullptr;
-    float spacing[3] = {
-        inputImageOne->dx * 5.f,
-        inputImageOne->dz * 5.f,
-        inputImageOne->dy * 5.f
-    };
-    reg_createControlPointGrid<float>(&splineGridOne,
-                                      inputImageOne,
-                                      spacing);
-    nifti_image *splineGridTwo = nifti_dup(*splineGridOne, false);
-
-    // Generate an affine matrix
-    mat44 affine;reg_mat44_eye(&affine);
-
-    time_t start,end; float total_time;
-
-#ifdef COMPUTE_DEF_AFFINE
-    // Compute n deformation field from the affine matrix
-#ifdef ONLY_ONE_ITERATION
-    const int affine_iteration=1;
-#else
-    const int affine_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<affine_iteration;++i)
-        reg_affine_getDeformationField(&affine,
-                                       defFieldOne,
-                                       false,
-                                       mask);
-    time(&end);
-    total_time=end-start;
-    printf("Affine deformation in %g second(s) per iteration [%g]\n",
-           total_time/(float)affine_iteration, total_time);
-#endif
-
-
-    // Compute n deformation field from the control point grid
-#ifdef ONLY_ONE_ITERATION
-    const int spline_iteration=1;
-#else
-    const int spline_iteration=150;
-#endif
-#ifdef COMPUTE_DEF_SPLINE
-    time(&start);
-    for(int i=0;i<spline_iteration;++i)
-        reg_spline_getDeformationField(splineGridOne,
-                                       defFieldOne,
-                                       mask,
-                                       false,
-                                       true,
-                                       true);
-    time(&end);
-    total_time=end-start;
-    printf("BSpline (no lut) deformation in %g second(s) per iteration [%g]\n",
-           total_time/(float)spline_iteration, total_time);
-#endif
-#ifdef COMPUTE_DEF_SPLINE_LUT
-    time(&start);
-    for(int i=0;i<spline_iteration;++i)
-        reg_spline_getDeformationField(splineGridOne,
-                                       defFieldOne,
-                                       mask);
-    time(&end);
-    total_time=end-start;
-    printf("BSpline (with lut) deformation in %g second(s) per iteration [%g]\n",
-           total_time/(float)spline_iteration, total_time);
-#endif
-
-
-#ifdef COMPUTE_DEF_COMP
-    reg_spline_getDeformationField(splineGridOne,
-                                   defFieldTwo,
-                                   mask);
-// Compute n composed deformation fields
-#ifdef ONLY_ONE_ITERATION
-    const int compose_field_iteration=1;
-#else
-    const int compose_field_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<compose_field_iteration;++i){
-        reg_defField_compose(defFieldOne, defFieldTwo, mask);
-        memcpy(defFieldTwo->data, defFieldOne->data, defFieldTwo->nvox*defFieldTwo->nbyper);
-    }
-    time(&end);
-    total_time=end-start;
-    printf("Compose deformation in %g second(s) per iteration [%g]\n",
-           total_time/(float)compose_field_iteration, total_time);
-#endif
-    // generate and initialise a NMI object
-    reg_nmi *nmi=new reg_nmi;
-    nmi->SetTimepointWeight(0, 1.);
-    nmi->SetRefAndFloatBinNumbers(68, 68, 0);
-    nmi->InitialiseMeasure(inputImageOne,
-                           inputImageTwo,
-                           mask,
-                           inputImageTwo,
-                           defFieldTwo,
-                           defFieldThr);
-
-    // Compute the NMI
-
-#ifdef COMPUTE_NMI
-#ifdef ONLY_ONE_ITERATION
-    const int nmi_iteration=1;
-#else
-    const int nmi_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<nmi_iteration;++i)
-        nmi->GetSimilarityMeasureValue();
-    time(&end);
-    total_time=end-start;
-    printf("Compute NMI in %g second(s) per iteration [%g]\n",
-           total_time/(float)nmi_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_RESAMPLING
-    // Warp the floating image the NMI
-#ifdef ONLY_ONE_ITERATION
-    const int resample_iteration=1;
-#else
-    const int resample_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<resample_iteration;++i)
-       reg_resampleImage(inputImageTwo,
-                         warpedImage,
-                         defFieldOne,
-                         mask,
-                         1,
-                         std::numeric_limits<float>::quiet_NaN());
-    time(&end);
-    total_time=end-start;
-    printf("Resampling in %g second(s) per iteration [%g]\n",
-           total_time/(float)resample_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_BE
-    // Compute the bending energy
-#ifdef ONLY_ONE_ITERATION
-    const int be_iteration=1;
-#else
-    const int be_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<be_iteration;++i)
-       reg_spline_approxBendingEnergy(splineGridOne);
-    time(&end);
-    total_time=end-start;
-    printf("Bending energy in %g second(s) per iteration [%g]\n",
-           total_time/(float)be_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_BE_GRAD
-    // Compute the bending energy gradient
-#ifdef ONLY_ONE_ITERATION
-    const int be_grad_iteration=1;
-#else
-    const int be_grad_iteration=15;
-#endif
-    time(&start);
-    for(int i=0;i<be_grad_iteration;++i)
-       reg_spline_approxBendingEnergyGradient(splineGridOne,
-                                              splineGridTwo,
-                                              0.01);
-    time(&end);
-    total_time=end-start;
-    printf("Bending energy gradient in %g second(s) per iteration [%g]\n",
-           total_time/(float)be_grad_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_LE
-    // Compute the linear-elasticity
-#ifdef ONLY_ONE_ITERATION
-    const int le_iteration=1;
-#else
-    const int le_iteration=150;
-#endif
-    time(&start);
-    for(int i=0;i<le_iteration;++i)
-       reg_spline_approxLinearEnergy(splineGridOne);
-    time(&end);
-    total_time=end-start;
-    printf("Linear elasticity in %g second(s) per iteration [%g]\n",
-           total_time/(float)le_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_LE_GRAD
-    // Compute the linear-elasticity Gradient
-#ifdef ONLY_ONE_ITERATION
-    const int le_grad_iteration=1;
-#else
-    const int le_grad_iteration=15;
-#endif
-    time(&start);
-    for(int i=0;i<le_grad_iteration;++i)
-       reg_spline_approxLinearEnergyGradient(splineGridOne,
-                                             splineGridTwo,
-                                             0.01);
-    time(&end);
-    total_time=end-start;
-    printf("Linear elasticity gradient in %g second(s) per iteration [%g]\n",
-           total_time/(float)le_grad_iteration, total_time);
-#endif
-
-#ifdef COMPUTE_SP_GRAD
-    // Compute the spatial gradient
-#ifdef ONLY_ONE_ITERATION
-    const int spatial_gradient_iteration=1;
-#else
-    const int spatial_gradient_iteration=15;
-#endif
-    time(&start);
-    for(int i=0;i<spatial_gradient_iteration;++i)
-        reg_getImageGradient(inputImageOne,
-                             defFieldTwo,
-                             defFieldOne,
-                             mask,
-                             1,
-                             std::numeric_limits<float>::quiet_NaN(),
-                             0);
-    time(&end);
-    total_time=end-start;
-    printf("Spatial gradient in %g second(s) per iteration [%g]\n",
-           total_time/(float)spatial_gradient_iteration, total_time);
-#endif
-
-
-#ifdef COMPUTE_NMI_GRAD
-    // Compute the NMI voxel gradient
-#ifdef ONLY_ONE_ITERATION
-    const int nmi_gradient_iteration=1;
-#else
-    const int nmi_gradient_iteration=15;
-#endif
-    time(&start);
-    for(int i=0;i<nmi_gradient_iteration;++i)
-        nmi->GetVoxelBasedSimilarityMeasureGradient(0);
-    time(&end);
-    total_time=end-start;
-    printf("NMI gradient in %g second(s) per iteration [%g]\n",
-           total_time/(float)nmi_gradient_iteration, total_time);
-#endif
-
-
-#ifdef COMPUTE_VOX_GRID_CONV
-    // Compute n voxel to grid conversion
-#ifdef ONLY_ONE_ITERATION
-    const int voxel_to_grid_iteration=1;
-#else
-    const int voxel_to_grid_iteration=15;
-#endif
-    time(&start);
-    for(int i=0;i<voxel_to_grid_iteration;++i){
-       int kernel_type=CUBIC_SPLINE_KERNEL;
-       // The voxel based NMI gradient is convolved with a spline kernel
-       // Convolution along the x axis
-       float currentNodeSpacing[3];
-       currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=splineGridOne->dx;
-       bool activeAxis[3]= {1,0,0};
-       reg_tools_kernelConvolution(defFieldThr,
-                                   currentNodeSpacing,
-                                   kernel_type,
-                                   nullptr, // mask
-                                   nullptr, // all volumes are considered as active
-                                   activeAxis
-                                   );
-       // Convolution along the y axis
-       currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=splineGridOne->dy;
-       activeAxis[0]=0;
-       activeAxis[1]=1;
-       reg_tools_kernelConvolution(defFieldThr,
-                                   currentNodeSpacing,
-                                   kernel_type,
-                                   nullptr, // mask
-                                   nullptr, // all volumes are considered as active
-                                   activeAxis
-                                   );
-       // Convolution along the z axis if required
-       if(defFieldThr->nz>1)
-       {
-          currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=splineGridOne->dz;
-          activeAxis[1]=0;
-          activeAxis[2]=1;
-          reg_tools_kernelConvolution(defFieldThr,
-                                      currentNodeSpacing,
-                                      kernel_type,
-                                      nullptr, // mask
-                                      nullptr, // all volumes are considered as active
-                                      activeAxis
-                                      );
-       }
-
-       // The node based NMI gradient is extracted
-       mat44 reorientation;
-       if(inputImageTwo->sform_code>0)
-          reorientation = inputImageTwo->sto_ijk;
-       else reorientation = inputImageTwo->qto_ijk;
-       reg_voxelCentric2NodeCentric(splineGridTwo,
-                                    defFieldThr,
-                                    0.1,
-                                    false, // no update
-                                    &reorientation
-                                    );
-    }
-    time(&end);
-    total_time=end-start;
-    printf("Grid based gradient in %g second(s) per iteration [%g]\n",
-           total_time/(float)voxel_to_grid_iteration, total_time);
-#endif
-
-    free(mask);
-
-    nifti_image_free(defFieldOne);
-    nifti_image_free(defFieldTwo);
-    nifti_image_free(defFieldThr);
-    nifti_image_free(splineGridOne);
-    nifti_image_free(splineGridTwo);
-
-    nifti_image_free(inputImageOne);
-    nifti_image_free(inputImageTwo);
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp
deleted file mode 100644
index 54bd7232..00000000
--- a/reg-test/reg_test_convolution.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_tools.h"
-
-#define EPS 0.0001
-
-int main(int argc, char **argv)
-{
-    if (argc != 4) {
-        fprintf(stderr, "Usage: %s <refImage> <expectedImage> <convolutionType>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputImageName = argv[1];
-    char *expectedFileName = argv[2];
-    int convolutionType = atoi(argv[3]);
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<double>(referenceImage);
-
-    // Apply the convolution
-    float spacing[3]={-5.f,-5.f,-5.f};
-    reg_tools_kernelConvolution(referenceImage,
-                                spacing,
-                                convolutionType);
-
-
-    // Read the input reference image
-    nifti_image *expectedFile = reg_io_ReadImageFile(expectedFileName);
-    if (expectedFile == nullptr) {
-        reg_print_msg_error("The expected result image could not be read");
-        return EXIT_FAILURE;
-    }
-    reg_tools_changeDatatype<double>(expectedFile);
-
-    // Compute the difference between the computed and expected deformation fields
-    nifti_image *diff_file = nifti_dup(*expectedFile, false);
-    reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file);
-    reg_tools_abs_image(diff_file);
-    double max_difference = reg_tools_getMaxValue(diff_file, -1);
-
-    nifti_image_free(referenceImage);
-    nifti_image_free(expectedFile);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_convolution error too large: %g (>%g)\n",
-                max_difference, EPS);
-        reg_io_WriteImageFile(diff_file, "diff_file.nii.gz");
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_bspline_deformation_field ok: %g (<%g)\n",
-            max_difference, EPS);
-#endif
-    nifti_image_free(diff_file);
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp
deleted file mode 100644
index d3424b26..00000000
--- a/reg-test/reg_test_fullAffine.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_aladin_sym.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=4)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <floImage> <expectedMatrix>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputFloImageName=argv[2];
-   char *inputMatFileName=argv[3];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   // Read the input reference image
-   nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==nullptr){
-      reg_print_msg_error("The input floating image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(floatingImage);
-
-   // Read the input affine matrix
-   mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44));
-   reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-
-   // Run the affine registration
-   reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
-   affine->SetInputReference(referenceImage);
-   affine->SetInputFloating(floatingImage);
-   affine->SetPlatformType(PlatformType::Cpu);
-   affine->Run();
-   mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
-
-   // Cleaning up
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
-
-   for(int i=0;i<4;++i){
-      for(int j=0;j<4;++j){
-         if(fabsf(differenceMatrix.m[i][j])>EPS){
-            fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n",
-                    fabs(differenceMatrix.m[i][j]), EPS);
-            reg_mat44_disp(inputMatrix, (char *)"Expected Matrix");
-            reg_mat44_disp(affine->GetTransformationMatrix(), (char *)"Obtained Matrix");
-            reg_mat44_disp(&differenceMatrix, (char *)"Difference Matrix");
-            free(inputMatrix);
-            delete affine;
-            return EXIT_FAILURE;
-         }
-      }
-   }
-   free(inputMatrix);
-   delete affine;
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp
deleted file mode 100755
index af19c7c8..00000000
--- a/reg-test/reg_test_fullAffine_cl.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_aladin_sym.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=4)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <floImage> <expectedMatrix>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputFloImageName=argv[2];
-   char *inputMatFileName=argv[3];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   // Read the input reference image
-   nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==nullptr){
-      reg_print_msg_error("The input floating image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(floatingImage);
-
-   // Read the input affine matrix
-   mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44));
-   reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-
-   // Run the affine registration
-   reg_aladin<float> *affine=new reg_aladin_sym<float>();
-   affine->SetInputReference(referenceImage);
-   affine->SetInputFloating(floatingImage);
-   affine->SetPlatformType(PlatformType::OpenCl);
-   affine->SetClIdx(1);
-   affine->Run();
-   mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
-
-   // Cleaning up
-   free(inputMatrix);
-   delete affine;
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
-
-   for(int i=0;i<4;++i){
-      for(int j=0;j<4;++j){
-         if(fabsf(differenceMatrix.m[i][j])>EPS){
-            fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n",
-                    differenceMatrix.m[i][j], EPS);
-            return EXIT_FAILURE;
-         }
-      }
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp
deleted file mode 100755
index ffe5e942..00000000
--- a/reg-test/reg_test_fullAffine_cuda.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_aladin_sym.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=4)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <floImage> <expectedMatrix>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputFloImageName=argv[2];
-   char *inputMatFileName=argv[3];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   // Read the input reference image
-   nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==nullptr){
-      reg_print_msg_error("The input floating image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(floatingImage);
-
-   // Read the input affine matrix
-   mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44));
-   reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-
-   // Run the affine registration
-   reg_aladin_sym<float> *affine=new reg_aladin_sym<float>();
-   affine->SetInputReference(referenceImage);
-   affine->SetInputFloating(floatingImage);
-   affine->SetPlatformType(PlatformType::Cuda);
-   affine->Run();
-   mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix());
-
-   // Cleaning up
-   free(inputMatrix);
-   delete affine;
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
-
-   for(int i=0;i<4;++i){
-      for(int j=0;j<4;++j){
-         if(fabsf(differenceMatrix.m[i][j])>EPS){
-            fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n",
-                    differenceMatrix.m[i][j], EPS);
-            return EXIT_FAILURE;
-         }
-      }
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp
deleted file mode 100644
index 136e3307..00000000
--- a/reg-test/reg_test_fullNonlinear.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_f3d.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=5)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <floImage> <affineMatrix> <expectedControlPointGrid>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputFloImageName=argv[2];
-   char *inputMatFileName=argv[3];
-   char *inputControlPointGridFileName=argv[4];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   // Read the input reference image
-   nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==nullptr){
-      reg_print_msg_error("The input floating image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(floatingImage);
-   // Read the input affine matrix
-   mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44));
-   reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-   // Read the input control point grid image
-   nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName);
-   if(inputControlPointGridImage==nullptr){
-      reg_print_msg_error("The input control point grid image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(inputControlPointGridImage);
-
-   // Run the affine registration
-   reg_f3d<float> *nonlinear=new reg_f3d<float>(referenceImage->nt,floatingImage->nt);
-   nonlinear->SetReferenceImage(referenceImage);
-   nonlinear->SetFloatingImage(floatingImage);
-   nonlinear->SetAffineTransformation(inputMatrix);
-   nonlinear->Run();
-
-   // Check the control point grid dimension
-   if(nonlinear->GetControlPointPositionImage()->nx != inputControlPointGridImage->nx ||
-      nonlinear->GetControlPointPositionImage()->ny != inputControlPointGridImage->ny ||
-      nonlinear->GetControlPointPositionImage()->nz != inputControlPointGridImage->nz ||
-      nonlinear->GetControlPointPositionImage()->nt != inputControlPointGridImage->nt ||
-      nonlinear->GetControlPointPositionImage()->nu != inputControlPointGridImage->nu){
-      reg_print_msg_error("The input and recovered control point grid images do not have corresponding sizes");
-      return EXIT_FAILURE;
-   }
-
-   // Compute the difference between the computed and inputed deformation field
-   reg_tools_subtractImageFromImage(inputControlPointGridImage,
-                                   nonlinear->GetControlPointPositionImage(),
-                                   inputControlPointGridImage);
-   reg_tools_abs_image(inputControlPointGridImage);
-   double max_difference=reg_tools_getMaxValue(inputControlPointGridImage);
-
-   // Cleaning up
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
-   nifti_image_free(inputControlPointGridImage);
-   delete nonlinear;
-   free(inputMatrix);
-
-   if(max_difference>EPS){
-      fprintf(stderr, "reg_test_fullNonlinear error too large: %g (>%g)\n",
-              max_difference, EPS);
-      return EXIT_FAILURE;
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp
deleted file mode 100644
index 1becd432..00000000
--- a/reg-test/reg_test_fullSymNonlinear.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_f3d2.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=5)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <floImage> <affineMatrix> <expectedControlPointGrid>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputFloImageName=argv[2];
-   char *inputMatFileName=argv[3];
-   char *inputControlPointGridFileName=argv[4];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(referenceImage);
-   // Read the input reference image
-   nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName);
-   if(floatingImage==nullptr){
-      reg_print_msg_error("The input floating image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(floatingImage);
-   // Read the input affine matrix
-   mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44));
-   reg_tool_ReadAffineFile(inputMatrix, inputMatFileName);
-   // Read the input control point grid image
-   nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName);
-   if(inputControlPointGridImage==nullptr){
-      reg_print_msg_error("The input control point grid image could not be read");
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(inputControlPointGridImage);
-
-   // Run the affine registration
-   reg_f3d2<float> *nonlinear=new reg_f3d2<float>(referenceImage->nt,floatingImage->nt);
-   nonlinear->SetReferenceImage(referenceImage);
-   nonlinear->SetFloatingImage(floatingImage);
-   nonlinear->SetAffineTransformation(inputMatrix);
-   nonlinear->Run();
-
-   // Check the control point grid dimension
-   if(nonlinear->GetControlPointPositionImage()->nx != inputControlPointGridImage->nx ||
-      nonlinear->GetControlPointPositionImage()->ny != inputControlPointGridImage->ny ||
-      nonlinear->GetControlPointPositionImage()->nz != inputControlPointGridImage->nz ||
-      nonlinear->GetControlPointPositionImage()->nt != inputControlPointGridImage->nt ||
-      nonlinear->GetControlPointPositionImage()->nu != inputControlPointGridImage->nu){
-      reg_print_msg_error("The input and recovered control point grid images do not have corresponding sizes");
-      return EXIT_FAILURE;
-   }
-
-   // Compute the difference between the computed and inputed deformation field
-   reg_tools_subtractImageFromImage(inputControlPointGridImage,
-                                   nonlinear->GetControlPointPositionImage(),
-                                   inputControlPointGridImage);
-   reg_tools_abs_image(inputControlPointGridImage);
-   double max_difference=reg_tools_getMaxValue(inputControlPointGridImage);
-
-   // Cleaning up
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
-   nifti_image_free(inputControlPointGridImage);
-   delete nonlinear;
-   free(inputMatrix);
-
-   if(max_difference>EPS){
-      fprintf(stderr, "reg_test_fullSymNonlinear error too large: %g (>%g)\n",
-              max_difference, EPS);
-      return EXIT_FAILURE;
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
deleted file mode 100644
index 2254836d..00000000
--- a/reg-test/reg_test_imageGradient.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_globalTrans.h"
-#include "_reg_tools.h"
-#include "_reg_mind.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-    if (argc != 4) {
-        fprintf(stderr, "Usage: %s <image to process> <expected gradient image> <type=0|1>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-    char *inputImageName = argv[1];
-    // Read the input image
-    nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == nullptr) {
-        reg_print_msg_error("The input image could not be read");
-        return EXIT_FAILURE;
-    }
-    //Convert the image in float
-    reg_tools_changeDatatype<float>(inputImage);
-    //
-    char *expectedImageName = argv[2];
-    // Read the expected image
-    nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == nullptr) {
-        reg_print_msg_error("The expected image could not be read");
-        return EXIT_FAILURE;
-    }
-
-    int usedMethod = atoi(argv[3]);
-    // Read the expected image
-    if(usedMethod != 0 && usedMethod != 1 && usedMethod != 3) {
-        reg_print_msg_error("The current method is not supported - should be 0, 1 or 3");
-        return EXIT_FAILURE;
-    }
-    int dim = (inputImage->nz > 1) ? 3 : 2;
-
-    // Allocate a gradient image
-    nifti_image *gradientImage = nifti_copy_nim_info(inputImage);
-    gradientImage->dim[0]=gradientImage->ndim=5;
-    gradientImage->dim[5]=gradientImage->nu=dim;
-    gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim);
-    gradientImage->nbyper=sizeof(float);
-    gradientImage->datatype=NIFTI_TYPE_FLOAT32;
-    gradientImage->data=malloc(gradientImage->nvox*gradientImage->nbyper);
-
-    // Allocate a temporary file to compute the gradient's timepoint one at the time
-    nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage);
-    tempGradImage->dim[4]=tempGradImage->nt=1;
-    tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim);
-    tempGradImage->data=malloc(tempGradImage->nvox*tempGradImage->nbyper);
-
-    // Declare a deformation field image
-    nifti_image *defFieldImage = nullptr;
-    // Allocate a deformation field image if required
-    if(usedMethod > 0)
-    {
-        defFieldImage = nifti_copy_nim_info(inputImage);
-        defFieldImage->dim[0]=defFieldImage->ndim=5;
-        defFieldImage->dim[4]=defFieldImage->nt=1;
-        defFieldImage->dim[5]=defFieldImage->nu=dim;
-        defFieldImage->nvox = CalcVoxelNumber(*defFieldImage, defFieldImage->ndim);
-        defFieldImage->nbyper=sizeof(float);
-        defFieldImage->datatype=NIFTI_TYPE_FLOAT32;
-        defFieldImage->intent_code=NIFTI_INTENT_VECTOR;
-        memset(defFieldImage->intent_name, 0, 16);
-        strcpy(defFieldImage->intent_name,"NREG_TRANS");
-        defFieldImage->intent_p1=DISP_FIELD;
-        // Set the deformation field to identity
-        defFieldImage->data = calloc(defFieldImage->nvox, defFieldImage->nbyper);
-        reg_getDeformationFromDisplacement(defFieldImage);
-    }
-
-    // Allocate a mask array
-    int *mask = (int *)calloc(inputImage->nvox,sizeof(int));
-
-    // Setup pointers over the gradient images
-    float *tempGradImgPtr = static_cast<float *>(tempGradImage->data);
-
-    float *gradImagePtr = static_cast<float *>(gradientImage->data);
-    // Loop over the input image timepoints
-    for(int time=0; time<inputImage->nt; ++time){
-        if(usedMethod == 0){
-            // Compute the gradient using symmetric difference
-            reg_getImageGradient_symDiff(inputImage,
-                                         tempGradImage,
-                                         mask,
-                                         0,
-                                         time);
-        }
-        else if(usedMethod == 3){
-            // Compute the gradient from the deformation field using spline interpolation
-            // Given an identity transformation, since gives the same as symmetric
-            // difference with a kernel of [-1/2 0 1/2]
-            reg_getImageGradient(inputImage,
-                                 tempGradImage,
-                                 defFieldImage,
-                                 mask,
-                                 3,
-                                 0.f,
-                                 time);
-        }
-        else{
-            // Compute the gradient from the deformation field using linear interpolation
-            reg_getImageGradient(inputImage,
-                                 tempGradImage,
-                                 defFieldImage,
-                                 mask,
-                                 1,
-                                 std::numeric_limits<float>::quiet_NaN(),
-                                 time);
-        }
-        // Copy the single time point gradient in the less effective way known to mankind
-        for(int u=0; u<gradientImage->nu; ++u){
-            for(int z=0; z<gradientImage->nz; ++z){
-                for(int y=0; y<gradientImage->ny; ++y){
-                    for(int x=0; x<gradientImage->nx; ++x){
-                        size_t voxIndex_gradImg=
-                                gradientImage->nx*gradientImage->ny*gradientImage->nz*gradientImage->nt*u +
-                                gradientImage->nx*gradientImage->ny*gradientImage->nz*time +
-                                gradientImage->nx*gradientImage->ny*z +
-                                gradientImage->nx*y +
-                                x;
-                        size_t voxIndex_tempGrad=
-                                tempGradImage->nx*tempGradImage->ny*tempGradImage->nz*tempGradImage->nt*u +
-                                tempGradImage->nx*tempGradImage->ny*z +
-                                tempGradImage->nx*y +
-                                x;
-                        gradImagePtr[voxIndex_gradImg]=tempGradImgPtr[voxIndex_tempGrad];
-                    }
-                }
-            }
-        }
-    }
-
-    // Free the allocated arrays and images
-    if(defFieldImage!=nullptr)
-        nifti_image_free(defFieldImage);
-    nifti_image_free(tempGradImage);
-    free(mask);
-
-    //Compute the difference between the computed and expected image
-    reg_tools_subtractImageFromImage(gradientImage, expectedImage, expectedImage);
-
-    // Extract the maximal absolute value
-    reg_tools_abs_image(expectedImage);
-    double max_difference = reg_tools_getMaxValue(expectedImage, -1);
-
-
-    reg_io_WriteImageFile(gradientImage, "res.nii.gz");
-    reg_io_WriteImageFile(expectedImage, "diff.nii.gz");
-
-    nifti_image_free(inputImage);
-    nifti_image_free(expectedImage);
-    nifti_image_free(gradientImage);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_imageGradient error too large: %g (>%g)\n",
-                max_difference, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_imageGradient ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp
deleted file mode 100644
index b175350d..00000000
--- a/reg-test/reg_test_leastTrimmedSquares.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-#include "nifti1_io.h"
-#include "_reg_maths.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_globalTrans.h"
-
-#include "OptimiseKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#define EPS 0.000001
-
-int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) {
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS) {
-                fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n",
-                        name, difference, EPS);
-                return EXIT_FAILURE;
-            }
-        }
-    }
-    return EXIT_SUCCESS;
-}
-
-void test(AladinContent *con, Platform *platform, bool isAffine) {
-    unique_ptr<Kernel> optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) };
-    optimiseKernel->castTo<OptimiseKernel>()->Calculate(isAffine);
-}
-
-int main(int argc, char **argv) {
-    if (argc != 7) {
-        fprintf(stderr, "Usage: %s <inputPoints1> <inputPoints2> <percentToKeep> <isAffine> <expectedLTSMatrix> <platformType> \n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputMatrix1Filename = argv[1];
-    char *inputMatrix2Filename = argv[2];
-    unsigned int percentToKeep = atoi(argv[3]);
-    bool isAffine = atoi(argv[4]);
-    char *expectedLTSMatrixFilename = argv[5];
-    PlatformType platformType{ atoi(argv[6]) };
-
-    std::pair<size_t, size_t> inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename);
-    size_t m1 = inputMatrix1Size.first;
-    size_t n1 = inputMatrix1Size.second;
-    std::pair<size_t, size_t> inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename);
-    size_t m2 = inputMatrix2Size.first;
-    size_t n2 = inputMatrix2Size.second;
-
-    if (m1 != m2 || n1 != n2) {
-        fprintf(stderr, "The input matrices must have the same size");
-        return EXIT_FAILURE;
-    }
-
-    float **inputMatrix1 = reg_tool_ReadMatrixFile<float>(inputMatrix1Filename, m1, n1);
-    float **inputMatrix2 = reg_tool_ReadMatrixFile<float>(inputMatrix2Filename, m2, n2);
-    mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename);
-
-    // Platform
-    unique_ptr<Platform> platform{ new Platform(platformType) };
-    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-    unique_ptr<AladinContent> con{ contentCreator->Create() };
-
-    float max_difference = 0;
-    unsigned int num_points = m1;
-    //I think it is a bit dirty what I am going to do
-    _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam();
-
-    blockMatchingParams->blockNumber[0] = 1;
-    blockMatchingParams->blockNumber[1] = 1;
-
-    blockMatchingParams->totalBlockNumber = num_points;
-    blockMatchingParams->activeBlockNumber = num_points;
-    blockMatchingParams->definedActiveBlockNumber = num_points;
-    blockMatchingParams->percent_to_keep = percentToKeep;
-
-    mat44* test_LTS = (mat44 *)malloc(sizeof(mat44));
-    reg_mat44_eye(test_LTS);
-    con->SetTransformationMatrix(test_LTS);
-
-    //2-D
-    if (n1 == 2) {
-
-        blockMatchingParams->dim = n1;
-        blockMatchingParams->blockNumber[2] = 1;
-        blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
-        blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
-
-        unsigned int compteur = 0;
-        for (unsigned int j = 0; j < num_points; j++) {
-            blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
-            blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
-            blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
-            blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
-            compteur += n1;
-        }
-    } else if (n1 == 3) {
-
-        blockMatchingParams->dim = n1;
-        blockMatchingParams->blockNumber[2] = 2;
-        blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float));
-        blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float));
-        unsigned int compteur = 0;
-        for (unsigned int j = 0; j < num_points; j++) {
-            blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0];
-            blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1];
-            blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2];
-            blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0];
-            blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1];
-            blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2];
-            compteur += n1;
-        }
-    } else {
-        fprintf(stderr, "The input matrix dimensions are not supported");
-        return EXIT_FAILURE;
-    }
-
-    con->SetBlockMatchingParams(blockMatchingParams);
-    test(con.get(), platform.get(), isAffine);
-
-#ifndef NDEBUG
-    if (n1 == 2)
-        reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_2D");
-    else reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_3D");
-#endif
-
-    if (n1 == 2) {
-        if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 2D affine - rigid", max_difference))
-            return EXIT_FAILURE;
-    } else {
-        if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 3D affine - rigid", max_difference))
-            return EXIT_FAILURE;
-    }
-
-    // Free memory
-    free(expectedLSMatrix);
-    reg_matrix2DDeallocate(m2, inputMatrix2);
-    reg_matrix2DDeallocate(m1, inputMatrix1);
-
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_linearElasticity.cpp b/reg-test/reg_test_linearElasticity.cpp
deleted file mode 100644
index b339ac1a..00000000
--- a/reg-test/reg_test_linearElasticity.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_localTrans_regul.h"
-#include "_reg_tools.h"
-
-#include "AffineDeformationFieldKernel.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-    if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <inputTrans> <expectedValue> <type>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputTransFileName = argv[2];
-    char *expectedValueFileName = argv[3];
-    int computationType = atoi(argv[4]);
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Read the transformation file
-    nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName);
-    if (transImage == nullptr) {
-        reg_print_msg_error("The transformation image could not be read");
-        return EXIT_FAILURE;
-    }
-
-    // Compute the linear elasticity value
-    double obtainedValue;
-    switch(computationType){
-    case 0: // Approximation based on the control point grid
-       obtainedValue = reg_spline_approxLinearEnergy(transImage);
-       break;
-    case 1: // Dense based on the control point grid
-       obtainedValue = reg_spline_linearEnergy(referenceImage, transImage);
-       break;
-    case 2: // Dense based on the deformation field
-       obtainedValue = reg_defField_linearEnergy(transImage);
-       break;
-    default:
-       reg_print_msg_error("Unexpected computation type");
-       reg_exit();
-    }
-
-    // Read the expected value
-    std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedValueFileName);
-    size_t m = inputMatrixSize.first;
-    size_t n = inputMatrixSize.second;
-    if(m != 1 && n!= 1)
-    {
-       fprintf(stderr,"[NiftyReg ERROR] Error when reading the expected constraint value: %s\n",
-               expectedValueFileName);
-       return EXIT_FAILURE;
-    }
-    float **inputMatrix = reg_tool_ReadMatrixFile<float>(expectedValueFileName, m, n);
-    float expectedValue = inputMatrix[0][0];
-    double max_difference = fabs(obtainedValue-expectedValue);
-
-
-    reg_matrix2DDeallocate(m, inputMatrix);
-    nifti_image_free(referenceImage);
-    nifti_image_free(transImage);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_linearElasticity error too large: %g (|%g-%g| > %g)\n",
-                max_difference, obtainedValue, expectedValue, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_linearElasticity ok: %g (<%g)\n",
-            max_difference, EPS);
-#endif
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp
deleted file mode 100644
index eb55ef43..00000000
--- a/reg-test/reg_test_linearElasticityGradient.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_localTrans_regul.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-    if (argc != 5) {
-        fprintf(stderr, "Usage: %s <refImage> <inputTrans> <expectedGradient> <type>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputRefImageName = argv[1];
-    char *inputTransFileName = argv[2];
-    char *expectedGradFileName = argv[3];
-    int computationType = atoi(argv[4]);
-
-    // Read the input reference image
-    nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName);
-    if (referenceImage == nullptr) {
-        reg_print_msg_error("The input reference image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Read the transformation file
-    nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName);
-    if (transImage == nullptr) {
-        reg_print_msg_error("The transformation image could not be read");
-        return EXIT_FAILURE;
-    }
-    // Read the expected gradient file
-    nifti_image *expectedGradientImage = reg_io_ReadImageFile(expectedGradFileName);
-    if (expectedGradientImage == nullptr) {
-        reg_print_msg_error("The expected gradient image could not be read");
-        return EXIT_FAILURE;
-    }
-
-    // Compute the linear elasticity gradient
-    nifti_image *obtainedGradient = nifti_dup(*expectedGradientImage, false);
-    switch(computationType){
-    case 0: // Approximation based on the control point grid
-       reg_spline_approxLinearEnergyGradient(transImage,
-                                             obtainedGradient,
-                                             1.f);
-       break;
-    case 1: // Dense based on the control point grid
-       reg_spline_linearEnergyGradient(referenceImage,
-                                       transImage,
-                                       obtainedGradient,
-                                       1.f);
-       break;
-    case 2: // Dense based on the deformation field
-       reg_defField_linearEnergyGradient(transImage,
-                                         obtainedGradient,
-                                         1.f);
-       break;
-    default:
-       reg_print_msg_error("Unexpected computation type");
-       reg_exit();
-    }
-    // Compute the difference between the computed and expected gradient
-    nifti_image *diff_field = nifti_dup(*obtainedGradient, false);
-    reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field);
-    reg_tools_abs_image(diff_field);
-    double max_difference = reg_tools_getMaxValue(diff_field, -1);
-
-    // Free allocated images
-    nifti_image_free(diff_field);
-    nifti_image_free(obtainedGradient);
-    nifti_image_free(expectedGradientImage);
-    nifti_image_free(referenceImage);
-    nifti_image_free(transImage);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_linearElasticityGradient error too large: %g ( > %g)\n",
-                max_difference, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_linearElasticityGradient ok: %g (<%g)\n",
-            max_difference, EPS);
-#endif
-
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_matrix_operation.cpp b/reg-test/reg_test_matrix_operation.cpp
deleted file mode 100644
index e5dc9fa6..00000000
--- a/reg-test/reg_test_matrix_operation.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-#include "nifti1_io.h"
-#include "_reg_maths.h"
-#include "_reg_maths_eigen.h"
-#include "_reg_ReadWriteMatrix.h"
-//STD
-#include <algorithm>
-
-#define EPS 0.000001
-
-int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference)
-{
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-                fprintf(stderr, "reg_test_matrix_operation - %s failed %g>%g\n",
-                    name, difference, EPS);
-                return EXIT_FAILURE;
-            }
-        }
-    }
-    return EXIT_SUCCESS;
-}
-
-int main(int argc, char **argv)
-{
-
-    if (argc != 9) {
-        fprintf(stderr, "Usage: %s <inputMatrix1> <inputMatrix2>\
-                                                <expectedMultMatrix> <expectedAddMatrix> <expectedSubMatrix> \
-                                                                        <expectedExpMatrix> <expectedLogMatrix> <expectedInvMatrix> \n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputMatrix1Filename = argv[1];
-    char *inputMatrix2Filename = argv[2];
-    char *expectedMultMatrixFilename = argv[3];
-    char *expectedAddMatrixFilename = argv[4];
-    char *expectedSubMatrixFilename = argv[5];
-    char *expectedExpMatrixFilename = argv[6];
-    char *expectedLogMatrixFilename = argv[7];
-    char *expectedInvMatrixFilename = argv[8];
-
-    std::pair<size_t, size_t> inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename);
-    size_t m = inputMatrix1Size.first;
-    size_t n = inputMatrix1Size.second;
-
-    if (m != 4 || n != 4) {
-        fprintf(stderr, "The input matrices have to be 4x4 matrices");
-        return EXIT_FAILURE;
-    }
-
-    std::pair<size_t, size_t> inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename);
-    size_t m2 = inputMatrix2Size.first;
-    size_t n2 = inputMatrix2Size.second;
-
-    if (m2 != 4 || n2 != 4) {
-        fprintf(stderr, "The input matrices have to be 4x4 matrices");
-        return EXIT_FAILURE;
-    }
-
-    mat44 *inputMatrix1 = reg_tool_ReadMat44File(inputMatrix1Filename);
-    mat44 *inputMatrix2 = reg_tool_ReadMat44File(inputMatrix2Filename);
-    mat44 *expectedMultMatrix = reg_tool_ReadMat44File(expectedMultMatrixFilename);
-    mat44 *expectedAddMatrix = reg_tool_ReadMat44File(expectedAddMatrixFilename);
-    mat44 *expectedSubMatrix = reg_tool_ReadMat44File(expectedSubMatrixFilename);
-    mat44 *expectedExpMatrix = reg_tool_ReadMat44File(expectedExpMatrixFilename);
-    mat44 *expectedLogMatrix = reg_tool_ReadMat44File(expectedLogMatrixFilename);
-    mat44 *expectedInvMatrix = reg_tool_ReadMat44File(expectedInvMatrixFilename);
-
-    ///////////////////////
-    float max_difference = 0;
-
-    if (check_matrix_difference(*expectedMultMatrix, (*inputMatrix1)*(*inputMatrix2), (char *) "matrix multiplication", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedMultMatrix, reg_mat44_mul(inputMatrix1, inputMatrix2), (char *) "matrix multiplication", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedAddMatrix, (*inputMatrix1) + (*inputMatrix2), (char *) "matrix addition", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedAddMatrix, reg_mat44_add(inputMatrix1, inputMatrix2), (char *) "matrix addition", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedSubMatrix, (*inputMatrix1) - (*inputMatrix2), (char *) "matrix subtraction", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedSubMatrix, reg_mat44_minus(inputMatrix1, inputMatrix2), (char *) "matrix subtraction", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedExpMatrix, reg_mat44_expm(inputMatrix1), (char *) "matrix exponentiation", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedLogMatrix, reg_mat44_logm(inputMatrix1), (char *) "matrix logarithm", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedInvMatrix, reg_mat44_inv(inputMatrix1), (char *) "reg_mat44_inv matrix inverse", max_difference)) return EXIT_FAILURE;
-
-    if (check_matrix_difference(*expectedInvMatrix, nifti_mat44_inverse(*inputMatrix1), (char *) "nifti_mat44_inverse matrix inverse", max_difference)) return EXIT_FAILURE;
-
-    ////////////////////////
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_matrix_operation ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
-
diff --git a/reg-test/reg_test_measure.cpp b/reg-test/reg_test_measure.cpp
deleted file mode 100644
index f46467f9..00000000
--- a/reg-test/reg_test_measure.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_tools.h"
-#include "_reg_nmi.h"
-#include "_reg_ssd.h"
-#include "_reg_mind.h"
-#include "_reg_lncc.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-
-   if(argc!=5)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <warImage> <LNCC|NMI|SSD|MIND> <expectedValueFile>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   double max_difference = EPS;
-
-   char *inputRefImageName=argv[1];
-   char *inputWarImageName=argv[2];
-   char *measure_type=argv[3];
-   char *inputMatrixFilename = argv[4];
-
-   /* Read the reference image */
-   nifti_image *refImage = reg_io_ReadImageFile(inputRefImageName);
-   if(refImage == nullptr)
-   {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
-              inputRefImageName);
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(refImage);
-
-   /* Read the warped image */
-   nifti_image *warImage = reg_io_ReadImageFile(inputWarImageName);
-   if(warImage == nullptr)
-   {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
-              inputWarImageName);
-      return EXIT_FAILURE;
-   }
-   reg_tools_changeDatatype<float>(warImage);
-
-   /* Read the expected value */
-   std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(inputMatrixFilename);
-   size_t m = inputMatrixSize.first;
-   size_t n = inputMatrixSize.second;
-   if(m != 1 && n!= 1)
-   {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the expected similarity measure value: %s\n",
-              inputMatrixFilename);
-      return EXIT_FAILURE;
-   }
-   float **inputMatrix = reg_tool_ReadMatrixFile<float>(inputMatrixFilename, m, n);
-
-   // Check if the input images have the same size
-   for(int i=0;i<8;++i){
-      if(refImage->dim[i]!=warImage->dim[i])
-      {
-         reg_print_msg_error("reg_test_measure: The input images do not have the same size");
-         return EXIT_FAILURE;
-      }
-   }
-
-   int *mask_image=(int *)calloc(refImage->nvox,sizeof(int));
-
-   /* Compute the LNCC if required */
-   if(strcmp(measure_type, "SSD")==0)
-   {
-      reg_ssd *measure_object=new reg_ssd();
-      for(int i=0;i<refImage->nt;++i){
-         measure_object->SetTimepointWeight(i, 1.);
-         measure_object->SetNormaliseTimepoint(i,true);
-      }
-      measure_object->InitialiseMeasure(refImage,
-                                        warImage,
-                                        mask_image,
-                                        warImage,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr);
-      double measure=measure_object->GetSimilarityMeasureValue();
-
-#ifndef NDEBUG
-      printf("reg_test_measure: SSD value %iD = %.7g\n",
-             (refImage->nz>1?3:2), measure);
-#endif
-      double expectedValue = inputMatrix[0][0];
-      max_difference = fabs(measure-expectedValue);
-      //
-      if(max_difference>EPS)
-      {
-         printf("reg_test_measure: Incorrect measure value %.7g (diff=%.7g)\n",
-                measure, max_difference);
-         return EXIT_FAILURE;
-      }
-      delete measure_object;
-   }
-   /* Compute the MIND if required */
-   else if(strcmp(measure_type, "MIND")==0)
-   {
-      reg_mind *measure_object=new reg_mind();
-      //Let's normalize between 0..1
-      for(int i=0;i<refImage->nt;++i)
-         measure_object->SetTimepointWeight(i, 1.);
-      measure_object->InitialiseMeasure(refImage,
-                                        warImage,
-                                        mask_image,
-                                        warImage,
-                                        nullptr,
-                                        nullptr);
-      double measure=measure_object->GetSimilarityMeasureValue();
-#ifndef NDEBUG
-      printf("reg_test_measure: MIND value %iD = %.7g\n",
-             (refImage->nz>1?3:2), measure);
-#endif
-      double expectedValue = inputMatrix[0][0];
-      max_difference = fabs(measure-expectedValue);
-      //
-      if(max_difference>EPS)
-      {
-         printf("reg_test_measure: Incorrect measure value %.7g (diff=%.7g)\n",
-                measure, max_difference);
-         return EXIT_FAILURE;
-      }
-      delete measure_object;
-   }
-   else
-   {
-      reg_print_msg_error("reg_test_measure: Unknown measure type");
-      return EXIT_FAILURE;
-   }
-
-   // Free the allocated images
-   nifti_image_free(refImage);
-   nifti_image_free(warImage);
-   free(mask_image);
-   reg_matrix2DDeallocate(m, inputMatrix);
-
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_measure ok: %g (<%g)\n", max_difference, EPS);
-#endif
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp
deleted file mode 100644
index 09a94729..00000000
--- a/reg-test/reg_test_mindDescriptor.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//TEST CHANGE DATATYPE
-#include "_reg_ReadWriteImage.h"
-#include "_reg_globalTrans.h"
-#include "_reg_tools.h"
-#include "_reg_mind.h"
-//
-#define EPS 0.000001
-//
-int main(int argc, char **argv)
-{
-    if (argc != 3) {
-        fprintf(stderr, "Usage: %s <image to process> <expected MIND image>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-    char *inputImageName = argv[1];
-    // Read the input image
-    nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == nullptr) {
-        reg_print_msg_error("The input image could not be read");
-        return EXIT_FAILURE;
-    }
-    //Convert the image in float
-    reg_tools_changeDatatype<float>(inputImage);
-    //
-    char *expectedImageName = argv[2];
-    // Read the expected image
-    nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == nullptr) {
-        reg_print_msg_error("The expected image could not be read");
-        return EXIT_FAILURE;
-    }
-    int dim = (inputImage->nz > 1) ? 3 : 2;
-    if(dim<2 || dim>3){
-        reg_print_msg_error("dimension not supported");
-        return EXIT_FAILURE;
-    }
-    // COMPUTE THE MIND DESCRIPTOR
-    //MIND image
-    nifti_image *MIND_img = nifti_copy_nim_info(inputImage);
-    MIND_img->ndim = MIND_img->dim[0] = 4;
-    MIND_img->nt = MIND_img->dim[4] = 2*dim;
-    MIND_img->nvox = MIND_img->nvox*2*dim;
-    MIND_img->data=calloc(MIND_img->nvox,MIND_img->nbyper);
-
-    // Compute the MIND descriptor
-    int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
-    GetMINDImageDescriptor(inputImage,MIND_img, mask, 1, 0);
-    free(mask);
-    //
-    //Compute the difference between the computed and expected image
-    //
-    reg_tools_subtractImageFromImage(MIND_img, expectedImage, expectedImage);
-    reg_tools_abs_image(expectedImage);
-    double max_difference = reg_tools_getMaxValue(expectedImage, -1);
-
-    nifti_image_free(inputImage);
-    nifti_image_free(expectedImage);
-    nifti_image_free(MIND_img);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_MINDDescriptor error too large: %g (>%g)\n",
-            max_difference, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_MINDDescriptor ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp
deleted file mode 100644
index 161b14a1..00000000
--- a/reg-test/reg_test_mindsscDescriptor.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//TEST CHANGE DATATYPE
-#include "_reg_ReadWriteImage.h"
-#include "_reg_globalTrans.h"
-#include "_reg_tools.h"
-#include "_reg_mind.h"
-//
-#define EPS 0.000001
-//
-int main(int argc, char **argv)
-{
-    if (argc != 3) {
-        fprintf(stderr, "Usage: %s <image to process> <expected MIND-SSC image>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-    char *inputImageName = argv[1];
-    // Read the input image
-    nifti_image *inputImage = reg_io_ReadImageFile(inputImageName);
-    if (inputImage == nullptr) {
-        reg_print_msg_error("The input image could not be read");
-        return EXIT_FAILURE;
-    }
-    //Convert the image in float
-    reg_tools_changeDatatype<float>(inputImage);
-    //
-    char *expectedImageName = argv[2];
-    // Read the expected image
-    nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName);
-    if (expectedImage == nullptr) {
-        reg_print_msg_error("The expected image could not be read");
-        return EXIT_FAILURE;
-    }
-    int dim = (inputImage->nz > 1) ? 3 : 2;
-    if(dim<2 || dim>3){
-        reg_print_msg_error("dimension not supported");
-        return EXIT_FAILURE;
-    }
-    // COMPUTE THE MIND DESCRIPTOR
-    int lengthDescritor = 12;
-    if(dim == 2) {
-        lengthDescritor = 4;
-    }
-    //MINDSSC image
-    nifti_image *MINDSSC_img = nifti_copy_nim_info(inputImage);
-    MINDSSC_img->ndim = MINDSSC_img->dim[0] = 4;
-    MINDSSC_img->nt = MINDSSC_img->dim[4] = lengthDescritor;
-    MINDSSC_img->nvox = MINDSSC_img->nvox*lengthDescritor;
-    MINDSSC_img->data=calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper);
-
-    // Compute the MIND descriptor
-    int *mask = (int *)calloc(inputImage->nvox, sizeof(int));
-    GetMINDSSCImageDescriptor(inputImage,MINDSSC_img, mask, 1, 0);
-    free(mask);
-    //
-    //Compute the difference between the computed and expected image
-    //
-    reg_tools_subtractImageFromImage(MINDSSC_img, expectedImage, expectedImage);
-    reg_tools_abs_image(expectedImage);
-    double max_difference = reg_tools_getMaxValue(expectedImage, -1);
-
-    nifti_image_free(inputImage);
-    nifti_image_free(expectedImage);
-    nifti_image_free(MINDSSC_img);
-
-    if (max_difference > EPS){
-        fprintf(stderr, "reg_test_MINDSSCDescriptor error too large: %g (>%g)\n",
-            max_difference, EPS);
-        return EXIT_FAILURE;
-    }
-#ifndef NDEBUG
-    fprintf(stdout, "reg_test_MINDSSCDescriptor ok: %g (<%g)\n", max_difference, EPS);
-#endif
-    return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp
deleted file mode 100644
index d697271a..00000000
--- a/reg-test/reg_test_nonlinear_deformation_field.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "_reg_ReadWriteImage.h"
-#include "_reg_localTrans.h"
-#include "_reg_tools.h"
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-   if(argc!=4)
-   {
-      fprintf(stderr, "Usage: %s <refImage> <inputControlPointGrid> <expectedField>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputRefImageName=argv[1];
-   char *inputCPPImageName=argv[2];
-   char *inputDefImageName=argv[3];
-
-   // Read the input reference image
-   nifti_image *referenceImage = reg_io_ReadImageHeader(inputRefImageName);
-   if(referenceImage==nullptr){
-      reg_print_msg_error("The input reference image could not be read");
-      return EXIT_FAILURE;
-   }
-   // Read the input deformation field image image
-   nifti_image *controlPointGridImage = reg_io_ReadImageFile(inputCPPImageName);
-   if(controlPointGridImage==nullptr){
-      reg_print_msg_error("The input control point grid image could not be read");
-      return EXIT_FAILURE;
-   }
-   // Read the input deformation field image image
-   nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName);
-   if(inputDeformationField==nullptr){
-      reg_print_msg_error("The input deformation field image could not be read");
-      return EXIT_FAILURE;
-   }
-   // Check the dimension of the input images
-   if(referenceImage->nx != inputDeformationField->nx ||
-      referenceImage->ny != inputDeformationField->ny ||
-      referenceImage->nz != inputDeformationField->nz ||
-      (referenceImage->nz>1?3:2) != inputDeformationField->nu){
-      reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes");
-      return EXIT_FAILURE;
-   }
-
-   // Create a deformation field
-   nifti_image *test_field = nifti_dup(*inputDeformationField, false);
-
-   // Compute the non-linear deformation field
-   reg_getDeformationFromDisplacement(test_field);
-   reg_spline_getDeformationField(controlPointGridImage,
-                                  test_field,
-                                  nullptr,
-                                  true,
-                                  true);
-
-   // Compute the difference between the computed and inputed deformation field
-   reg_tools_subtractImageFromImage(inputDeformationField,test_field,test_field);
-   reg_tools_abs_image(test_field);
-   double max_difference=reg_tools_getMaxValue(test_field);
-
-   nifti_image_free(referenceImage);
-   nifti_image_free(controlPointGridImage);
-   nifti_image_free(inputDeformationField);
-   nifti_image_free(test_field);
-
-   if(max_difference>EPS){
-      fprintf(stderr, "reg_test_nonlinear_deformation_field error too large: %g (>%g)\n",
-              max_difference, EPS);
-      return EXIT_FAILURE;
-   }
-
-   return EXIT_SUCCESS;
-}
diff --git a/reg-test/reg_test_svd.cpp b/reg-test/reg_test_svd.cpp
deleted file mode 100644
index f8d80d20..00000000
--- a/reg-test/reg_test_svd.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-#include "_reg_tools.h"
-#include "_reg_maths_eigen.h"
-#include "_reg_ReadWriteMatrix.h"
-#include <algorithm>
-
-#define EPS 0.000001
-
-int main(int argc, char **argv)
-{
-   //NOT REALLY PLATFORM... HAVE TO CHANGE THAT LATER
-   if (argc != 5) {
-      fprintf(stderr, "Usage: %s <inputSVDMatrix> <expectedUMatrix> <expectedSMatrix> <expectedVMatrix>\n", argv[0]);
-      return EXIT_FAILURE;
-   }
-
-   char *inputSVDMatrixFilename = argv[1];
-   char *expectedUMatrixFilename = argv[2];
-   char *expectedSMatrixFilename = argv[3];
-   char *expectedVMatrixFilename = argv[4];
-
-   std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename);
-   size_t m = inputMatrixSize.first;
-   size_t n = inputMatrixSize.second;
-   size_t min_size = std::min(m, n);
-#ifndef NDEBUG
-   std::cout << "min_size=" << min_size << std::endl;
-#endif
-
-   float **inputSVDMatrix = reg_tool_ReadMatrixFile<float>(inputSVDMatrixFilename, m, n);
-
-#ifndef NDEBUG
-   std::cout << "inputSVDMatrix[i][j]=" << std::endl;
-   for (size_t i = 0; i < m; i++) {
-      for (size_t j = 0; j < n; j++) {
-         std::cout << inputSVDMatrix[i][j] << " ";
-      }
-      std::cout << std::endl;
-   }
-#endif
-
-   float ** expectedSMatrix = reg_tool_ReadMatrixFile<float>(expectedSMatrixFilename, min_size, min_size);
-   float **test_SMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-
-   //more row than columns
-   if (m > n) {
-
-      float ** expectedUMatrix = reg_tool_ReadMatrixFile<float>(expectedUMatrixFilename, m, n);
-      float ** expectedVMatrix = reg_tool_ReadMatrixFile<float>(expectedVMatrixFilename, min_size, min_size);
-
-      float **test_UMatrix = reg_matrix2DAllocate<float>(m, n);
-      float **test_VMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-
-      //For the old version of the function:
-      float **inputSVDMatrixNotTouched = reg_tool_ReadMatrixFile<float>(inputSVDMatrixFilename, m, n);
-      float *test_SVect = (float*)malloc(min_size*sizeof(float));
-      //SVD
-      svd<float>(inputSVDMatrix, m, n, test_SVect, test_VMatrix);
-      //U
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            test_UMatrix[i][j] = inputSVDMatrix[i][j];
-         }
-      }
-      //S
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            if (i == j) {
-               test_SMatrix[i][j] = test_SVect[i];
-            }
-            else {
-               test_SMatrix[i][j] = 0;
-            }
-         }
-      }
-
-#ifndef NDEBUG
-      std::cout << "test_UMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            std::cout << test_UMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-      std::cout << "test_SMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            std::cout << test_SMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-      std::cout << "test_VMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            std::cout << test_VMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-#endif
-      //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M
-      float max_difference = 0;
-
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - checking S - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-            difference = fabsf(test_VMatrix[i][j]) - fabsf(expectedVMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - checking V - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            float difference = fabsf(test_UMatrix[i][j]) - fabsf(expectedUMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - checking U - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-      //check that U*S*V' = M
-      float ** US = reg_matrix2DMultiply(test_UMatrix, m, n, test_SMatrix, min_size, min_size, false);
-      float ** VT = reg_matrix2DTranspose(test_VMatrix, min_size, min_size);
-      float ** test_inputMatrix = reg_matrix2DMultiply(US, m, min_size, VT, min_size, min_size, false);
-#ifndef NDEBUG
-      std::cout << "test_inputMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            std::cout << test_inputMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-#endif
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            float difference = fabsf(inputSVDMatrixNotTouched[i][j] - test_inputMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-
-      // Free the allocated variables
-      for (size_t i = 0; i < m; i++) {
-         free(inputSVDMatrix[i]);
-         free(inputSVDMatrixNotTouched[i]);
-         free(expectedUMatrix[i]);
-         free(test_UMatrix[i]);
-      }
-      for (size_t j = 0; j < min_size; j++) {
-         free(expectedSMatrix[j]);
-         free(expectedVMatrix[j]);
-         free(test_SMatrix[j]);
-         free(test_VMatrix[j]);
-      }
-      free(inputSVDMatrix);
-      free(inputSVDMatrixNotTouched);
-      free(expectedUMatrix);
-      free(expectedSMatrix);
-      free(expectedVMatrix);
-      free(test_UMatrix);
-      free(test_SMatrix);
-      free(test_VMatrix);
-      free(test_SVect);
-      //
-#ifndef NDEBUG
-      fprintf(stdout, "reg_test_svd ok: %g ( <%g )\n", max_difference, EPS);
-#endif
-      return EXIT_SUCCESS;
-   }
-   //more colums than rows
-   else {
-
-      float ** expectedUMatrix = reg_tool_ReadMatrixFile<float>(expectedUMatrixFilename, min_size, min_size);
-      float ** expectedVMatrix = reg_tool_ReadMatrixFile<float>(expectedVMatrixFilename, n, m);
-
-      float **test_UMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-      float **test_VMatrix = reg_matrix2DAllocate<float>(n, m);
-
-      svd<float>(inputSVDMatrix, m, n, &test_UMatrix, &test_SMatrix, &test_VMatrix);
-#ifndef NDEBUG
-      std::cout << "test_UMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            std::cout << test_UMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-      std::cout << "test_SMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            std::cout << test_SMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-      std::cout << "test_VMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < n; i++) {
-         for (size_t j = 0; j < m; j++) {
-            std::cout << test_VMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-#endif
-      //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M
-      float max_difference = 0;
-
-      for (size_t i = 0; i < min_size; i++) {
-         for (size_t j = 0; j < min_size; j++) {
-            float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-            difference = fabsf(test_UMatrix[i][j]) - fabsf(test_UMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-      for (size_t i = 0; i < n; i++) {
-         for (size_t j = 0; j < m; j++) {
-            float difference = fabsf(test_VMatrix[i][j]) - fabsf(test_VMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-
-      //check that U*S*V' = M
-      float ** US = reg_matrix2DMultiply(test_UMatrix, min_size, min_size, test_SMatrix, min_size, min_size, false);
-      float ** VT = reg_matrix2DTranspose(test_VMatrix, n, m);
-      float ** test_inputMatrix = reg_matrix2DMultiply(US, min_size, min_size, VT, m, n, false);
-#ifndef NDEBUG
-      std::cout << "test_inputMatrix[i][j]=" << std::endl;
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            std::cout << test_inputMatrix[i][j] << " ";
-         }
-         std::cout << std::endl;
-      }
-#endif
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            float difference = fabsf(inputSVDMatrix[i][j] - test_inputMatrix[i][j]);
-            max_difference = std::max(difference, max_difference);
-            if (difference > EPS){
-               fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-               return EXIT_FAILURE;
-            }
-         }
-      }
-
-      // Free the allocated variables
-      for (size_t i = 0; i < min_size; i++) {
-         free(inputSVDMatrix[i]);
-         free(expectedUMatrix[i]);
-         free(test_UMatrix[i]);
-         free(expectedSMatrix[i]);
-         free(test_SMatrix[i]);
-      }
-      for (size_t j = 0; j < n; j++) {
-         free(expectedVMatrix[j]);
-         free(test_VMatrix[j]);
-      }
-      free(inputSVDMatrix);
-      free(expectedUMatrix);
-      free(expectedSMatrix);
-      free(expectedVMatrix);
-      free(test_UMatrix);
-      free(test_SMatrix);
-      free(test_VMatrix);
-      //
-#ifndef NDEBUG
-      fprintf(stdout, "reg_test_svd ok: %g (<%g)\n", max_difference, EPS);
-#endif
-      return EXIT_SUCCESS;
-   }
-}
diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp
deleted file mode 100644
index 009b3db7..00000000
--- a/reg-test/reg_test_svd_cuda.cpp
+++ /dev/null
@@ -1,427 +0,0 @@
-#include "_reg_tools.h"
-#include "_reg_maths_eigen.h"
-#include "_reg_ReadWriteMatrix.h"
-
-#ifdef _USE_CUDA
-#include "cusolverDn.h"
-#include "_reg_common_cuda.h"
-#include "optimizeKernel.h"
-#endif
-//STD
-#include <algorithm>
-
-#define EPS 0.000001
-
-#ifdef _USE_CUDA
-/***********************/
-/* CUDA ERROR CHECKING */
-/***********************/
-void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
-{
-    if (code != cudaSuccess)
-    {
-        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-        if (abort) { exit(code); }
-    }
-}
-void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); }
-
-
-/* ******************************** */
-template<typename T>
-void cudaCommon_transfer2DMatrixFromCpuToDevice(T* M_d, T** M_h, unsigned int m, unsigned int n) {
-
-    T *tmpMat_h = (T*)malloc(m*n * sizeof(T));
-    matmnToCptr<T>(M_h, tmpMat_h, m, n);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(M_d, tmpMat_h, m*n * sizeof(T), cudaMemcpyHostToDevice));
-    free(tmpMat_h);
-
-}
-template void cudaCommon_transfer2DMatrixFromCpuToDevice<float>(float* M_d, float** M_h, unsigned int m, unsigned int n);
-template void cudaCommon_transfer2DMatrixFromCpuToDevice<double>(double* M_d, double** M_h, unsigned int m, unsigned int n);
-/* ******************************** */
-/* ******************************** */
-template<typename T>
-void cudaCommon_transferFromDeviceTo2DMatrixCpu(T* M_d, T** M_h, unsigned int m, unsigned int n) {
-
-    T *tmpMat_h = (T*)malloc(m*n * sizeof(T));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(tmpMat_h, M_d, m*n * sizeof(T), cudaMemcpyDeviceToHost));
-    cPtrToMatmn<T>(M_h, tmpMat_h, m, n);
-    free(tmpMat_h);
-
-}
-template void cudaCommon_transferFromDeviceTo2DMatrixCpu<float>(float* M_d, float** M_h, unsigned int m, unsigned int n);
-template void cudaCommon_transferFromDeviceTo2DMatrixCpu<double>(double* M_d, double** M_h, unsigned int m, unsigned int n);
-#endif
-
-int main(int argc, char **argv)
-{
-    //NOT REALLY PLATFORM... HAVE TO CHANGE THAT LATER
-    if (argc != 6) {
-        fprintf(stderr, "Usage: %s <inputSVDMatrix> <expectedUMatrix> <expectedSMatrix> <expectedVMatrix> <platform>\n", argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    char *inputSVDMatrixFilename = argv[1];
-    char *expectedUMatrixFilename = argv[2];
-    char *expectedSMatrixFilename = argv[3];
-    char *expectedVMatrixFilename = argv[4];
-    PlatformType platformType{atoi(argv[5])};
-
-    std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename);
-    size_t m = inputMatrixSize.first;
-    size_t n = inputMatrixSize.second;
-    size_t min_size = std::min(m, n);
-    size_t max_size = std::max(m, n);
-#ifndef NDEBUG
-    std::cout << "min_size=" << min_size << std::endl;
-#endif
-
-    float **inputSVDMatrix = reg_tool_ReadMatrixFile<float>(inputSVDMatrixFilename, m, n);
-
-#ifndef NDEBUG
-    std::cout << "inputSVDMatrix[i][j]=" << std::endl;
-    for (size_t i = 0; i < m; i++) {
-        for (size_t j = 0; j < n; j++) {
-            std::cout << inputSVDMatrix[i][j] << " ";
-        }
-        std::cout << std::endl;
-    }
-#endif
-
-    float ** expectedSMatrix = reg_tool_ReadMatrixFile<float>(expectedSMatrixFilename, min_size, min_size);
-    float **test_SMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-
-    //more row than columns
-    if (m > n) {
-
-        float ** expectedUMatrix = reg_tool_ReadMatrixFile<float>(expectedUMatrixFilename, m, n);
-        float ** expectedVMatrix = reg_tool_ReadMatrixFile<float>(expectedVMatrixFilename, min_size, min_size);
-
-        float **test_UMatrix = reg_matrix2DAllocate<float>(m, n);
-        float **test_VMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-
-        //For the old version of the function:
-        float **inputSVDMatrixNotTouched = reg_tool_ReadMatrixFile<float>(inputSVDMatrixFilename, m, n);
-        double *test_SVect = (double*)malloc(min_size*sizeof(double));
-        //SVD
-#ifdef _USE_CUDA
-        if(platformType != PlatformType::Cuda) {
-#endif
-            //svd<float>(inputSVDMatrix, m, n, test_SVect, test_VMatrix);
-            //U
-            for (size_t i = 0; i < m; i++) {
-                for (size_t j = 0; j < n; j++) {
-                    test_UMatrix[i][j] = inputSVDMatrix[i][j];
-                }
-            }
-#ifdef _USE_CUDA
-        }
-        else{
-            double* inputSVDMatrix_d;
-            NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice<double>(&inputSVDMatrix_d, m * n));
-            double **inputSVDMatrix_h = reg_tool_ReadMatrixFile<double>(inputSVDMatrixFilename, m, n);
-            cudaCommon_transfer2DMatrixFromCpuToDevice<double>(inputSVDMatrix_d,inputSVDMatrix_h,m,n);
-
-            double* Sigma_d;
-            NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice<double>(&Sigma_d, min_size));
-            double* U_d;
-            NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice<double>(&U_d, max_size * max_size));
-            double* VT_d;
-            NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice<double>(&VT_d, min_size * min_size));
-
-            //CUDA EXECUTION
-            //cusolverSVD(inputSVDMatrix_d, m, n, Sigma_d, VT_d, U_d);
-            // --- device side SVD workspace and matrices
-            int Lwork = 0;
-            int *devInfo;
-            gpuErrchk(cudaMalloc(&devInfo, sizeof(int)));
-            cusolverStatus_t stat;
-
-            // --- CUDA solver initialization
-            cusolverDnHandle_t solver_handle;
-            cusolverDnCreate(&solver_handle);
-
-            stat = cusolverDnDgesvd_bufferSize(solver_handle, m, n, &Lwork);
-            if(stat != CUSOLVER_STATUS_SUCCESS ) std::cout << "Initialization of cuSolver failed. \n";
-
-            double *work_d;
-            gpuErrchk(cudaMalloc(&work_d, Lwork * sizeof(double)));
-
-            // --- CUDA SVD execution
-            stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, nullptr, devInfo);
-            //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, nullptr, devInfo);
-            cudaDeviceSynchronize();
-
-            int devInfo_h = 0;
-            gpuErrchk(cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost));
-            std::cout << "devInfo = " << devInfo_h << "\n";
-
-            switch(stat){
-            case CUSOLVER_STATUS_SUCCESS:           std::cout << "SVD computation success\n";                       break;
-            case CUSOLVER_STATUS_NOT_INITIALIZED:   std::cout << "Library cuSolver not initialized correctly\n";    break;
-            case CUSOLVER_STATUS_INVALID_VALUE:     std::cout << "Invalid parameters passed\n";                     break;
-            case CUSOLVER_STATUS_INTERNAL_ERROR:    std::cout << "Internal operation failed\n";                     break;
-            }
-
-            if (devInfo_h == 0 && stat == CUSOLVER_STATUS_SUCCESS) std::cout    << "SVD successful\n\n";
-
-            // --- Moving the results from device to host
-            gpuErrchk(cudaMemcpy(test_SVect, Sigma_d, n * sizeof(double), cudaMemcpyDeviceToHost));
-
-            for(int i = 0; i < n; i++) std::cout << "d_S["<<i<<"] = " << test_SVect[i] << std::endl;
-
-            cusolverDnDestroy(solver_handle);
-        }
-    }
-#endif
-    /*
-            //RETRIEVE THE RESULTS FROM THE GPU
-            float **test_UMatrixCUDA = reg_matrix2DAllocate<float>(m, m);
-            cudaCommon_transferArrayFromDeviceToCpu<float>(test_SVect, Sigma_d, min_size);
-            cudaCommon_transferFromDeviceTo2DMatrixCpu<float>(VT_d, test_VMatrix, min_size, min_size);
-            test_VMatrix = reg_matrix2DTranspose<float>(test_VMatrix, min_size, min_size);
-            cudaCommon_transferFromDeviceTo2DMatrixCpu<float>(U_d, test_UMatrixCUDA, m, m);
-
-#ifndef NDEBUG
-            std::cout << "test_UMatrixCUDA[i][j]=" << std::endl;
-            for (size_t i = 0; i < m; i++) {
-                for (size_t j = 0; j < m; j++) {
-                    std::cout << test_UMatrixCUDA[i][j] << " ";
-                }
-                std::cout << std::endl;
-            }
-#endif
-
-        }
-#endif
-        //S
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                if (i == j) {
-                    test_SMatrix[i][j] = test_SVect[i];
-                }
-                else {
-                    test_SMatrix[i][j] = 0;
-                }
-            }
-        }
-
-#ifndef NDEBUG
-        std::cout << "test_UMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                std::cout << test_UMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-        std::cout << "test_SMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                std::cout << test_SMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-        std::cout << "test_VMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                std::cout << test_VMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-#endif
-        //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M
-        float max_difference = 0;
-
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - checking S - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-                difference = fabsf(test_VMatrix[i][j]) - fabsf(expectedVMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - checking V - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                float difference = fabsf(test_UMatrix[i][j]) - fabsf(expectedUMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - checking U - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-        //check that U*S*V' = M
-        float ** US = reg_matrix2DMultiply(test_UMatrix, m, n, test_SMatrix, min_size, min_size, false);
-        float ** VT = reg_matrix2DTranspose(test_VMatrix, min_size, min_size);
-        float ** test_inputMatrix = reg_matrix2DMultiply(US, m, min_size, VT, min_size, min_size, false);
-#ifndef NDEBUG
-        std::cout << "test_inputMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                std::cout << test_inputMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-#endif
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                float difference = fabsf(inputSVDMatrixNotTouched[i][j] - test_inputMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-
-        // Free the allocated variables
-        for (size_t i = 0; i < m; i++) {
-            free(inputSVDMatrix[i]);
-            free(inputSVDMatrixNotTouched[i]);
-            free(expectedUMatrix[i]);
-            free(test_UMatrix[i]);
-        }
-        for (size_t j = 0; j < min_size; j++) {
-            free(expectedSMatrix[j]);
-            free(expectedVMatrix[j]);
-            free(test_SMatrix[j]);
-            free(test_VMatrix[j]);
-        }
-        free(inputSVDMatrix);
-        free(inputSVDMatrixNotTouched);
-        free(expectedUMatrix);
-        free(expectedSMatrix);
-        free(expectedVMatrix);
-        free(test_UMatrix);
-        free(test_SMatrix);
-        free(test_VMatrix);
-        free(test_SVect);
-        //
-#ifndef NDEBUG
-        fprintf(stdout, "reg_test_svd ok: %g ( <%g )\n", max_difference, EPS);
-#endif
-        return EXIT_SUCCESS;
-    }
-    //more colums than rows
-    else {
-
-        float ** expectedUMatrix = reg_tool_ReadMatrixFile<float>(expectedUMatrixFilename, min_size, min_size);
-        float ** expectedVMatrix = reg_tool_ReadMatrixFile<float>(expectedVMatrixFilename, n, m);
-
-        float **test_UMatrix = reg_matrix2DAllocate<float>(min_size, min_size);
-        float **test_VMatrix = reg_matrix2DAllocate<float>(n, m);
-
-        svd<float>(inputSVDMatrix, m, n, &test_UMatrix, &test_SMatrix, &test_VMatrix);
-#ifndef NDEBUG
-        std::cout << "test_UMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                std::cout << test_UMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-        std::cout << "test_SMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                std::cout << test_SMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-        std::cout << "test_VMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < n; i++) {
-            for (size_t j = 0; j < m; j++) {
-                std::cout << test_VMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-#endif
-        //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M
-        float max_difference = 0;
-
-        for (size_t i = 0; i < min_size; i++) {
-            for (size_t j = 0; j < min_size; j++) {
-                float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-                difference = fabsf(test_UMatrix[i][j]) - fabsf(test_UMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-        for (size_t i = 0; i < n; i++) {
-            for (size_t j = 0; j < m; j++) {
-                float difference = fabsf(test_VMatrix[i][j]) - fabsf(test_VMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-
-        //check that U*S*V' = M
-        float ** US = reg_matrix2DMultiply(test_UMatrix, min_size, min_size, test_SMatrix, min_size, min_size, false);
-        float ** VT = reg_matrix2DTranspose(test_VMatrix, n, m);
-        float ** test_inputMatrix = reg_matrix2DMultiply(US, min_size, min_size, VT, m, n, false);
-#ifndef NDEBUG
-        std::cout << "test_inputMatrix[i][j]=" << std::endl;
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                std::cout << test_inputMatrix[i][j] << " ";
-            }
-            std::cout << std::endl;
-        }
-#endif
-        for (size_t i = 0; i < m; i++) {
-            for (size_t j = 0; j < n; j++) {
-                float difference = fabsf(inputSVDMatrix[i][j] - test_inputMatrix[i][j]);
-                max_difference = std::max(difference, max_difference);
-                if (difference > EPS){
-                    fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS);
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-
-        // Free the allocated variables
-        for (size_t i = 0; i < min_size; i++) {
-            free(inputSVDMatrix[i]);
-            free(expectedUMatrix[i]);
-            free(test_UMatrix[i]);
-            free(expectedSMatrix[i]);
-            free(test_SMatrix[i]);
-        }
-        for (size_t j = 0; j < n; j++) {
-            free(expectedVMatrix[j]);
-            free(test_VMatrix[j]);
-        }
-        free(inputSVDMatrix);
-        free(expectedUMatrix);
-        free(expectedSMatrix);
-        free(expectedVMatrix);
-        free(test_UMatrix);
-        free(test_SMatrix);
-        free(test_VMatrix);
-        //
-#ifndef NDEBUG
-        fprintf(stdout, "reg_test_svd ok: %g (<%g)\n", max_difference, EPS);
-#endif
-        return EXIT_SUCCESS;
-    }
-    */
-}

From 3b300bc91ea4ab6b5b8d43c3be0dfc5ff6f6d24a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Feb 2023 18:14:23 +0000
Subject: [PATCH 066/314] Add tests for *Compute::GetImageGradient()

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       | 144 +++----
 ...pp => reg_test_affineDeformationField.cpp} | 379 +++++++++---------
 reg-test/reg_test_common.h                    |  39 +-
 reg-test/reg_test_imageGradient.cpp           | 220 ++++++++++
 reg-test/reg_test_interpolation.cpp           |  40 +-
 6 files changed, 534 insertions(+), 290 deletions(-)
 rename reg-test/{reg_test_affine_deformation_field.cpp => reg_test_affineDeformationField.cpp} (88%)
 create mode 100644 reg-test/reg_test_imageGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a14f8d53..3af99eee 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-179
+180
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 2a247161..a7efe69f 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -1,73 +1,73 @@
-find_package(Catch2 3)
-if(NOT Catch2_FOUND)
-  set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE)
-  message(STATUS "Catch2 not found")
-  message(SEND_ERROR "Catch2 is required to generate the unit test.
-  The BUILD_TESTING flag is turned OFF")
-  return()
-endif(NOT Catch2_FOUND)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-# Build the coverage test
-if(NOT MSVC)
-  option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF)
-  if(WITH_COVERAGE)
-    set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE)
-    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING
-      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
-      FORCE)
-    set(CMAKE_CXX_FLAGS_DEBUG
-      "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage"
-      CACHE STRING "Force the debug CXX flags for the coverage test" FORCE)
-    set(CMAKE_EXE_LINKER_FLAGS_DEBUG
-      "-fprofile-arcs -ftest-coverage"
-      CACHE STRING "Force the debug linker flags for the coverage test" FORCE)
-    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
-    configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in
-                   ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
-  endif(WITH_COVERAGE)
-endif(NOT MSVC)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-# Set the build name
-set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}")
-if(USE_SSE)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse")
-endif(USE_SSE)
-if(USE_OPENMP)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp")
-endif(USE_OPENMP)
-if(USE_CUDA)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}")
-endif(USE_CUDA)
-if(USE_OPENCL)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl")
-endif(USE_OPENCL)
-if(NOT MSVC)
-  unset(BUILDNAME CACHE)
-  unset(BUILDNAME)
-  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE)
-else(MSVC)
-  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash")
-  message(STATUS "The buildname might need manual editing")
-endif(NOT MSVC)
-mark_as_advanced(BUILDNAME)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-include(CTest)
-include(Catch)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-set(EXEC_LIST reg_test_affine_deformation_field)
-set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
-
-
-foreach(EXEC ${EXEC_LIST})
-  add_executable(${EXEC} ${EXEC}.cpp)
-  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain)
-  target_link_libraries(${EXEC} PRIVATE _reg_aladin)
-  target_link_libraries(${EXEC} PRIVATE _reg_f3d)
-  catch_discover_tests(${EXEC})
-endforeach(EXEC)
-#-----------------------------------------------------------------------------
+find_package(Catch2 3)
+if(NOT Catch2_FOUND)
+  set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE)
+  message(STATUS "Catch2 not found")
+  message(SEND_ERROR "Catch2 is required to generate the unit test.
+  The BUILD_TESTING flag is turned OFF")
+  return()
+endif(NOT Catch2_FOUND)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Build the coverage test
+if(NOT MSVC)
+  option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF)
+  if(WITH_COVERAGE)
+    set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING
+      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+      FORCE)
+    set(CMAKE_CXX_FLAGS_DEBUG
+      "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage"
+      CACHE STRING "Force the debug CXX flags for the coverage test" FORCE)
+    set(CMAKE_EXE_LINKER_FLAGS_DEBUG
+      "-fprofile-arcs -ftest-coverage"
+      CACHE STRING "Force the debug linker flags for the coverage test" FORCE)
+    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
+    configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in
+                   ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
+  endif(WITH_COVERAGE)
+endif(NOT MSVC)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Set the build name
+set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}")
+if(USE_SSE)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse")
+endif(USE_SSE)
+if(USE_OPENMP)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp")
+endif(USE_OPENMP)
+if(USE_CUDA)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}")
+endif(USE_CUDA)
+if(USE_OPENCL)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl")
+endif(USE_OPENCL)
+if(NOT MSVC)
+  unset(BUILDNAME CACHE)
+  unset(BUILDNAME)
+  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE)
+else(MSVC)
+  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash")
+  message(STATUS "The buildname might need manual editing")
+endif(NOT MSVC)
+mark_as_advanced(BUILDNAME)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+include(CTest)
+include(Catch)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+set(EXEC_LIST reg_test_affineDeformationField)
+set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
+set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
+
+foreach(EXEC ${EXEC_LIST})
+  add_executable(${EXEC} ${EXEC}.cpp)
+  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain)
+  target_link_libraries(${EXEC} PRIVATE _reg_aladin)
+  target_link_libraries(${EXEC} PRIVATE _reg_f3d)
+  catch_discover_tests(${EXEC})
+endforeach(EXEC)
+#-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affineDeformationField.cpp
similarity index 88%
rename from reg-test/reg_test_affine_deformation_field.cpp
rename to reg-test/reg_test_affineDeformationField.cpp
index e3c9f749..9285e8bd 100644
--- a/reg-test/reg_test_affine_deformation_field.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -1,194 +1,185 @@
-// Enable testing
-#define NR_TESTING
-
-#include "_reg_ReadWriteMatrix.h"
-#include "_reg_tools.h"
-
-#include "Kernel.h"
-#include "AffineDeformationFieldKernel.h"
-#include "Platform.h"
-#include "AladinContent.h"
-
-#include <catch2/catch_test_macros.hpp>
-
-#define EPS_SINGLE 0.0001
-
-/*
-    This test file contains the following unit tests:
-    test function: creation of a deformation field from an affine matrix
-    In 2D and 3D
-    identity
-    translation
-    affine
-*/
-
-
-typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
-typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
-
-TEST_CASE("Affine deformation field", "[AffineDefField]") {
-    // Create a reference 2D image
-    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2d);
-
-    // Create a reference 3D image
-    dim[0] = 3;
-    dim[3] = 2;
-    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3d);
-
-    // Generate the different test cases
-    std::vector<TestData> testCases;
-
-    // Identity use case - 2D
-    mat44 identity;
-    reg_mat44_eye(&identity);
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    float identityResult2x[4] = { 0, 1, 0, 1 };
-    float identityResult2y[4] = { 0, 0, 1, 1 };
-    testCases.emplace_back(TestData(
-        "identity 2D",
-        reference2d,
-        &identity,
-        identityResult2x,
-        identityResult2y,
-        nullptr)
-    );
-    // Identity use case - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 };
-    float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 };
-    float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 };
-    testCases.emplace_back(TestData(
-        "identity 3D",
-        reference3d,
-        &identity,
-        identityResult3x,
-        identityResult3y,
-        identityResult3z)
-    );
-
-    // Translation - 2D
-    mat44 translation;
-    reg_mat44_eye(&translation);
-    translation.m[0][3] = -0.5;
-    translation.m[1][3] = 1.5;
-    translation.m[2][3] = 0.75;
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    float translationResult2x[4] = { -0.5, .5, -0.5, .5 };
-    float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 };
-    testCases.emplace_back(TestData(
-        "translation 2D",
-        reference2d,
-        &translation,
-        translationResult2x,
-        translationResult2y,
-        nullptr)
-    );
-
-    // Translation - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 };
-    float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 };
-    float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 };
-    testCases.emplace_back(TestData(
-        "translation 3D",
-        reference3d,
-        &translation,
-        translationResult3x,
-        translationResult3y,
-        translationResult3z)
-    );
-
-
-    // Full affine - 2D
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    mat44 affine;
-    reg_mat44_eye(&affine);
-    affine.m[0][3] = -0.5;
-    affine.m[1][3] = 1.5;
-    affine.m[2][3] = 0.75;
-    for (auto i = 0; i < 4; ++i) {
-        for (auto j = 0; j < 4; ++j) {
-            affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f;
-        }
-    }
-    float affineResult2x[4];
-    float affineResult2y[4];
-    for (auto i = 0; i < 4; ++i) {
-        auto x = identityResult2x[i];
-        auto y = identityResult2y[i];
-        affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y;
-        affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y;
-
-    }
-    testCases.emplace_back(TestData(
-        "full affine 2D",
-        reference2d,
-        &affine,
-        affineResult2x,
-        affineResult2y,
-        nullptr)
-    );
-    // Full affine - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float affineResult3x[8];
-    float affineResult3y[8];
-    float affineResult3z[8];
-    for (auto i = 0; i < 8; ++i) {
-        auto x = identityResult3x[i];
-        auto y = identityResult3y[i];
-        auto z = identityResult3z[i];
-        affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z;
-        affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z;
-        affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z;
-    }
-    testCases.emplace_back(TestData(
-        "affine 3D",
-        reference3d,
-        &affine,
-        affineResult3x,
-        affineResult3y,
-        affineResult3z)
-    );
-
-    // Loop over all generated test cases to create all content and run all tests
-    for (auto&& testCase : testCases) {
-        // Retrieve test information
-        auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase;
-
-        // Accumulate all required contents with a vector
-        std::vector<ContentDesc> contentDescs;
-        for (auto&& platformType : PlatformTypes) {
-            unique_ptr<Platform> platform{ new Platform(platformType) };
-            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
-            contentDescs.push_back(ContentDesc(std::move(content), std::move(platform)));
-        }
-        // Loop over all possibles contents for each test
-        for (auto&& contentDesc : contentDescs) {
-            auto&& [content, platform] = contentDesc;
-            SECTION(testName + " " + platform->GetName()) {
-                // Initialise the platform to run current content and retrieve deformation field
-                unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
-                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-                nifti_image *defField = content->GetDeformationField();
-
-                // Check all values
-                auto *defFieldPtrX = static_cast<float *>(defField->data);
-                const size_t voxelNumber = CalcVoxelNumber(*defField);
-                auto *defFieldPtrY = &defFieldPtrX[voxelNumber];
-                auto *defFieldPtrZ = &defFieldPtrY[voxelNumber];
-                for (size_t i = 0; i < voxelNumber; ++i) {
-                    REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS_SINGLE);
-                    REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS_SINGLE);
-                    if (testResZ)
-                        REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS_SINGLE);
-                }
-            }
-        }
-    }
-    nifti_image_free(reference2d);
-    nifti_image_free(reference3d);
-}
+#include "reg_test_common.h"
+
+#define EPS 0.0001
+
+/*
+    This test file contains the following unit tests:
+    test function: creation of a deformation field from an affine matrix
+    In 2D and 3D
+    identity
+    translation
+    affine
+*/
+
+
+typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
+typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
+
+TEST_CASE("Affine deformation field", "[AffineDefField]") {
+    // Create a reference 2D image
+    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
+    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference2d);
+
+    // Create a reference 3D image
+    dim[0] = 3;
+    dim[3] = 2;
+    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference3d);
+
+    // Generate the different test cases
+    std::vector<TestData> testCases;
+
+    // Identity use case - 2D
+    mat44 identity;
+    reg_mat44_eye(&identity);
+    // Test order [0,0] [1,0] [0,1] [1,1]
+    float identityResult2x[4] = { 0, 1, 0, 1 };
+    float identityResult2y[4] = { 0, 0, 1, 1 };
+    testCases.emplace_back(TestData(
+        "identity 2D",
+        reference2d,
+        &identity,
+        identityResult2x,
+        identityResult2y,
+        nullptr)
+    );
+
+    // Identity use case - 3D
+    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+    float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 };
+    float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 };
+    float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 };
+    testCases.emplace_back(TestData(
+        "identity 3D",
+        reference3d,
+        &identity,
+        identityResult3x,
+        identityResult3y,
+        identityResult3z)
+    );
+
+    // Translation - 2D
+    mat44 translation;
+    reg_mat44_eye(&translation);
+    translation.m[0][3] = -0.5;
+    translation.m[1][3] = 1.5;
+    translation.m[2][3] = 0.75;
+    // Test order [0,0] [1,0] [0,1] [1,1]
+    float translationResult2x[4] = { -0.5, .5, -0.5, .5 };
+    float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 };
+    testCases.emplace_back(TestData(
+        "translation 2D",
+        reference2d,
+        &translation,
+        translationResult2x,
+        translationResult2y,
+        nullptr)
+    );
+
+    // Translation - 3D
+    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+    float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 };
+    float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 };
+    float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 };
+    testCases.emplace_back(TestData(
+        "translation 3D",
+        reference3d,
+        &translation,
+        translationResult3x,
+        translationResult3y,
+        translationResult3z)
+    );
+
+    // Full affine - 2D
+    // Test order [0,0] [1,0] [0,1] [1,1]
+    mat44 affine;
+    reg_mat44_eye(&affine);
+    affine.m[0][3] = -0.5;
+    affine.m[1][3] = 1.5;
+    affine.m[2][3] = 0.75;
+    for (auto i = 0; i < 4; ++i) {
+        for (auto j = 0; j < 4; ++j) {
+            affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f;
+        }
+    }
+    float affineResult2x[4];
+    float affineResult2y[4];
+    for (auto i = 0; i < 4; ++i) {
+        auto x = identityResult2x[i];
+        auto y = identityResult2y[i];
+        affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y;
+        affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y;
+
+    }
+    testCases.emplace_back(TestData(
+        "full affine 2D",
+        reference2d,
+        &affine,
+        affineResult2x,
+        affineResult2y,
+        nullptr)
+    );
+
+    // Full affine - 3D
+    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+    float affineResult3x[8];
+    float affineResult3y[8];
+    float affineResult3z[8];
+    for (auto i = 0; i < 8; ++i) {
+        auto x = identityResult3x[i];
+        auto y = identityResult3y[i];
+        auto z = identityResult3z[i];
+        affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z;
+        affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z;
+        affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z;
+    }
+    testCases.emplace_back(TestData(
+        "affine 3D",
+        reference3d,
+        &affine,
+        affineResult3x,
+        affineResult3y,
+        affineResult3z)
+    );
+
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase;
+
+        // Accumulate all required contents with a vector
+        std::vector<ContentDesc> contentDescs;
+        for (auto&& platformType : PlatformTypes) {
+            unique_ptr<Platform> platform{ new Platform(platformType) };
+            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
+            contentDescs.push_back({ std::move(content), std::move(platform) });
+        }
+        // Loop over all possibles contents for each test
+        for (auto&& contentDesc : contentDescs) {
+            auto&& [content, platform] = contentDesc;
+            SECTION(testName + " " + platform->GetName()) {
+                // Do the calculation
+                unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
+                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
+
+                // Check all values
+                nifti_image *defField = content->GetDeformationField();
+                auto defFieldPtrX = static_cast<float*>(defField->data);
+                const size_t voxelNumber = CalcVoxelNumber(*defField);
+                auto defFieldPtrY = &defFieldPtrX[voxelNumber];
+                auto defFieldPtrZ = &defFieldPtrY[voxelNumber];
+                for (size_t i = 0; i < voxelNumber; ++i) {
+                    REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS);
+                    REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS);
+                    if (testResZ)
+                        REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS);
+                }
+            }
+        }
+    }
+    // Clean up
+    nifti_image_free(reference2d);
+    nifti_image_free(reference3d);
+}
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 1991aabc..a1b53590 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -1,12 +1,11 @@
 // Enable testing
 #define NR_TESTING
 
+#include <catch2/catch_test_macros.hpp>
+#include "_reg_localTrans.h"
 #include "Platform.h"
 #include "ResampleImageKernel.h"
-#include "_reg_localTrans.h"
-
-#include <list>
-#include <catch2/catch_test_macros.hpp>
+#include "AffineDeformationFieldKernel.h"
 
 
 template <typename T>
@@ -19,3 +18,35 @@ void interpCubicSplineKernel(T relative, T (&basis)[4]) {
     basis[3] = (relative - 1.f) * relative2 / 2.f;
 }
 
+template <typename T>
+void interpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
+    interpCubicSplineKernel(relative, basis);
+    if (relative < 0) relative = 0; //reg_rounding error
+    const T relative2 = relative * relative;
+    derivative[0] = (4.f * relative - 3.f * relative2 - 1.f) / 2.f;
+    derivative[1] = (9.f * relative - 10.f) * relative / 2.f;
+    derivative[2] = (8.f * relative - 9.f * relative2 + 1.f) / 2.f;
+    derivative[3] = (3.f * relative - 2.f) * relative / 2.f;
+}
+
+nifti_image* CreateControlPointGrid(nifti_image *reference) {
+    // Set the spacing for the control point grid
+    float spacingInMillimeter[3] = { reference->dx, reference->dy, reference->dz };
+
+    // Define the spacing for the first level
+    float gridSpacing[3];
+    gridSpacing[0] = spacingInMillimeter[0];
+    gridSpacing[1] = spacingInMillimeter[1];
+    gridSpacing[2] = 1;
+    if (reference->nz > 1)
+        gridSpacing[2] = spacingInMillimeter[2];
+
+    // Create and allocate the control point image
+    nifti_image *controlPointGrid = nullptr;
+    reg_createControlPointGrid<float>(&controlPointGrid, reference, gridSpacing);
+
+    // The control point position image is initialised with the affine transformation
+    reg_getDeformationFromDisplacement(controlPointGrid);
+
+    return controlPointGrid;
+}
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
new file mode 100644
index 00000000..ab0e1249
--- /dev/null
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -0,0 +1,220 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+
+#define EPS 0.000001
+
+/*
+    This test file contains the following unit tests:
+    test function: image gradient
+    In 2D and 3D
+    Linear
+    Cubic spline
+*/
+
+
+typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
+typedef std::tuple<unique_ptr<F3dContent>, unique_ptr<Platform>> ContentDesc;
+
+TEST_CASE("Image gradient", "[ImageGradient]") {
+    // Create a reference 2D image
+    int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
+    nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference2d);
+
+    // Fill image with distance from identity
+    auto ref2dPtr = static_cast<float*>(reference2d->data);
+    for (auto y = 0; y < reference2d->ny; ++y) {
+        for (auto x = 0; x < reference2d->nx; ++x) {
+            *ref2dPtr = sqrtf(float(x * x) + float(y * y));
+            ref2dPtr++;
+        }
+    }
+    ref2dPtr = static_cast<float*>(reference2d->data);
+
+    // Create a corresponding 2D deformation field
+    int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
+    nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField2d);
+    auto def2dPtr = static_cast<float*>(deformationField2d->data);
+    def2dPtr[0] = 1.2f;
+    def2dPtr[1] = 1.3f;
+
+    // Create a reference 3D image
+    dimFlo[0] = 3; dimFlo[3] = 4;
+    nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(reference3d);
+
+    // Fill image with distance from identity
+    auto ref3dPtr = static_cast<float*>(reference3d->data);
+    for (auto z = 0; z < reference3d->nz; ++z) {
+        for (auto y = 0; y < reference3d->ny; ++y) {
+            for (auto x = 0; x < reference3d->nx; ++x) {
+                *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z));
+                ref3dPtr++;
+            }
+        }
+    }
+    ref3dPtr = static_cast<float*>(reference3d->data);
+
+    // Create a corresponding 3D deformation field
+    dimDef[5] = 3;
+    nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
+    reg_checkAndCorrectDimension(deformationField3d);
+    auto def3dPtr = static_cast<float*>(deformationField3d->data);
+    def3dPtr[0] = 1.2f;
+    def3dPtr[1] = 1.3f;
+    def3dPtr[2] = 1.4f;
+
+    // Generate the different test cases
+    std::vector<TestData> testCases;
+
+    // Linear image gradient - 2D
+    // coordinate in image: [1.2, 1.3]
+    float resLinear2d[2] = {};
+    const float derivLinear[2] = { -1, 1 };
+    const float xBasisLinear[2] = { 0.8f, 0.2f };
+    const float yBasisLinear[2] = { 0.7f, 0.3f };
+    for (int y = 0; y < 2; ++y) {
+        for (int x = 0; x < 2; ++x) {
+            const auto coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)];
+            resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y];
+            resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y];
+        }
+    }
+    // Create the test case
+    testCases.emplace_back(TestData(
+        "Linear 2D",
+        reference2d,
+        deformationField2d,
+        1,
+        resLinear2d)
+    );
+
+    // Cubic spline image gradient - 2D
+    // coordinate in image: [1.2, 1.3]
+    float resCubic2d[2] = {};
+    float xBasisCubic[4], yBasisCubic[4];
+    float xDerivCubic[4], yDerivCubic[4];
+    interpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic);
+    interpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic);
+    for (int y = 0; y <= 3; ++y) {
+        for (int x = 0; x <= 3; ++x) {
+            const auto coeff = ref2dPtr[y * dimFlo[1] + x];
+            resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y];
+            resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y];
+        }
+    }
+
+    // Create the test case
+    testCases.emplace_back(TestData(
+        "Cubic Spline 2D",
+        reference2d,
+        deformationField2d,
+        3,
+        resCubic2d)
+    );
+
+    // Linear image gradient - 3D
+    // coordinate in image: [1.2, 1.3, 1.4]
+    float resLinear3d[3] = {};
+    const float zBasisLinear[2] = { 0.6f, 0.4f };
+    for (int z = 0; z < 2; ++z) {
+        for (int y = 0; y < 2; ++y) {
+            for (int x = 0; x < 2; ++x) {
+                const auto coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)];
+                resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z];
+                resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z];
+                resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z];
+            }
+        }
+    }
+
+    // Create the test case
+    testCases.emplace_back(TestData(
+        "Linear 3D",
+        reference3d,
+        deformationField3d,
+        1,
+        resLinear3d)
+    );
+
+    // Cubic spline image gradient - 3D
+    // coordinate in image: [1.2, 1.3, 1.4]
+    float resCubic3d[3] = {};
+    float zBasisCubic[4], zDerivCubic[4];
+    interpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic);
+    for (int z = 0; z <= 3; ++z) {
+        for (int y = 0; y <= 3; ++y) {
+            for (int x = 0; x <= 3; ++x) {
+                const auto coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x];
+                resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z];
+                resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z];
+                resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z];
+            }
+        }
+    }
+
+    // Create the test case
+    testCases.emplace_back(TestData(
+        "Cubic Spline 3D",
+        reference3d,
+        deformationField3d,
+        3,
+        resCubic3d)
+    );
+
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, reference, defField, interp, testResult] = testCase;
+        // Create the control point grid
+        unique_ptr<nifti_image> controlPointGrid{ CreateControlPointGrid(reference) };
+
+        // Accumulate all required contents with a vector
+        std::vector<ContentDesc> contentDescs;
+        for (auto&& platformType : PlatformTypes) {
+            unique_ptr<Platform> platform{ new Platform(platformType) };
+            // Add content
+            if (platformType == PlatformType::Cuda && interp != 1)
+                continue;   // CUDA platform only supports linear interpolation
+            unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid.get()) };
+            contentDescs.push_back({ std::move(content), std::move(platform) });
+        }
+
+        // Loop over all possibles contents for each test
+        for (auto&& contentDesc : contentDescs) {
+            auto&& [content, platform] = contentDesc;
+            SECTION(testName + " " + platform->GetName()) {
+                // Set the warped gradient image to host the computation
+                auto warpedGradient = content->GetWarpedGradient();
+                warpedGradient->ndim = warpedGradient->dim[0] = defField->ndim;
+                warpedGradient->dim[1] = warpedGradient->nx = 1;
+                warpedGradient->dim[2] = warpedGradient->ny = 1;
+                warpedGradient->dim[3] = warpedGradient->nz = 1;
+                warpedGradient->dim[5] = warpedGradient->nu = defField->nu;
+                warpedGradient->nvox = CalcVoxelNumber(*warpedGradient, warpedGradient->ndim);
+
+                // Set the deformation field
+                content->SetDeformationField(defField);
+
+                // Do the computation
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                compute->GetImageGradient(interp, 0, 0);
+
+                // Check all values
+                warpedGradient = content->GetWarpedGradient();
+                auto warpedGradPtr = static_cast<float*>(warpedGradient->data);
+                for (size_t i = 0; i < warpedGradient->nvox; ++i) {
+                    std::cout << i << " " << warpedGradPtr[i] << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(warpedGradPtr[i] - testResult[i]) < EPS);
+                }
+            }
+        }
+    }
+    // Clean up
+    nifti_image_free(reference2d);
+    nifti_image_free(reference3d);
+}
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 27f5182a..a00f9b9e 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -3,7 +3,7 @@
 
 #include "reg_test_common.h"
 
-#define EPS_SINGLE 0.001
+#define EPS 0.001
 
 /*
     This test file contains the following unit tests:
@@ -18,14 +18,14 @@
 typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
 typedef std::tuple<unique_ptr<Content>, shared_ptr<Platform>> ContentDesc;
 
-TEST_CASE("Resampling", "[resampling]") {
+TEST_CASE("Interpolation", "[Interpolation]") {
     // Create a reference 2D image
     int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
     nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(reference2d);
 
     // Fill image with distance from identity
-    auto *ref2dPtr = static_cast<float*>(reference2d->data);
+    auto ref2dPtr = static_cast<float*>(reference2d->data);
     for (auto y = 0; y < reference2d->ny; ++y) {
         for (auto x = 0; x < reference2d->nx; ++x) {
             *ref2dPtr = sqrtf(float(x * x) + float(y * y));
@@ -38,7 +38,7 @@ TEST_CASE("Resampling", "[resampling]") {
     int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
     nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(deformationField2d);
-    auto *def2dPtr = static_cast<float*>(deformationField2d->data);
+    auto def2dPtr = static_cast<float*>(deformationField2d->data);
     def2dPtr[0] = 1.2f;
     def2dPtr[1] = 1.3f;
 
@@ -48,7 +48,7 @@ TEST_CASE("Resampling", "[resampling]") {
     reg_checkAndCorrectDimension(reference3d);
 
     // Fill image with distance from identity
-    auto *ref3dPtr = static_cast<float*>(reference3d->data);
+    auto ref3dPtr = static_cast<float*>(reference3d->data);
     for (auto z = 0; z < reference3d->nz; ++z) {
         for (auto y = 0; y < reference3d->ny; ++y) {
             for (auto x = 0; x < reference3d->nx; ++x) {
@@ -63,7 +63,7 @@ TEST_CASE("Resampling", "[resampling]") {
     dimDef[5] = 3;
     nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
     reg_checkAndCorrectDimension(deformationField3d);
-    auto *def3dPtr = static_cast<float*>(deformationField3d->data);
+    auto def3dPtr = static_cast<float*>(deformationField3d->data);
     def3dPtr[0] = 1.2f;
     def3dPtr[1] = 1.3f;
     def3dPtr[2] = 1.4f;
@@ -81,7 +81,8 @@ TEST_CASE("Resampling", "[resampling]") {
                 abs(2.0f - (float)y - 0.3f);
         }
     }
-    // create the test case
+
+    // Create the test case
     testCases.emplace_back(TestData(
         "Linear 2D",
         reference2d,
@@ -94,7 +95,7 @@ TEST_CASE("Resampling", "[resampling]") {
     // coordinate in image: [1.2, 1.3]
     float resNearest2d[1];
     resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1];
-    // create the test case
+    // Create the test case
     testCases.emplace_back(TestData(
         "Nearest Neighbour 2D",
         reference2d,
@@ -115,7 +116,7 @@ TEST_CASE("Resampling", "[resampling]") {
         }
     }
 
-    // create the test case
+    // Create the test case
     testCases.emplace_back(TestData(
         "Cubic Spline 2D",
         reference2d,
@@ -138,7 +139,7 @@ TEST_CASE("Resampling", "[resampling]") {
         }
     }
 
-    // create the test case
+    // Create the test case
     testCases.emplace_back(TestData(
         "Linear 3D",
         reference3d,
@@ -151,7 +152,7 @@ TEST_CASE("Resampling", "[resampling]") {
     // coordinate in image: [1.2, 1.3, 1.4]
     float resNearest3d[1];
     resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1];
-    // create the test case
+    // Create the test case
     testCases.emplace_back(TestData(
         "Nearest Neighbour 3D",
         reference3d,
@@ -173,7 +174,7 @@ TEST_CASE("Resampling", "[resampling]") {
         }
     }
 
-    // create the test case
+    // Create the test case
     testCases.emplace_back(TestData(
         "Cubic Spline 3D",
         reference3d,
@@ -182,7 +183,7 @@ TEST_CASE("Resampling", "[resampling]") {
         resCubic3d)
     );
 
-    // Loop over all generated test cases to create all content and run all tests
+    // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
         auto&& [testName, reference, defField, interp, testResult] = testCase;
@@ -200,7 +201,7 @@ TEST_CASE("Resampling", "[resampling]") {
                 continue;   // CUDA platform only supports linear interpolation
             unique_ptr<ContentCreator> contentCreator{ dynamic_cast<ContentCreator*>(platform->CreateContentCreator()) };
             unique_ptr<Content> content{ contentCreator->Create(reference, reference) };
-            contentDescs.push_back(ContentDesc(std::move(content), platform));
+            contentDescs.push_back({ std::move(content), platform });
         }
 
         // Loop over all possibles contents for each test
@@ -219,9 +220,11 @@ TEST_CASE("Resampling", "[resampling]") {
                 warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
                 warped->data = calloc(warped->nvox, warped->nbyper);
                 content->SetWarped(warped);
+
                 // Set the deformation field
                 content->SetDeformationField(defField);
 
+                // Do the computation
                 if (isAladinContent) {
                     unique_ptr<Kernel> resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) };
                     resampleKernel->castTo<ResampleImageKernel>()->Calculate(interp, 0);
@@ -230,18 +233,17 @@ TEST_CASE("Resampling", "[resampling]") {
                     compute->ResampleImage(interp, 0);
                 }
 
-                warped = content->GetWarped();
-
                 // Check all values
-                auto *warpedPtr = static_cast<float*>(warped->data);
+                warped = content->GetWarped();
+                auto warpedPtr = static_cast<float*>(warped->data);
                 for (size_t i = 0; i < warped->nvox; ++i) {
                     std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE);
+                    REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS);
                 }
             }
         }
     }
-    // Only freeing ref as the rest if cleared by content destructor
+    // Clean up
     nifti_image_free(reference2d);
     nifti_image_free(reference3d);
 }

From 6d91c8227382bfa6fb422020fdd451a177003219 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 1 Mar 2023 16:37:39 +0000
Subject: [PATCH 067/314] Improve code coverage generation

---
 niftyreg_build_version.txt    |   2 +-
 reg-test/CMakeLists.txt       | 193 +++++++++++++++++++++-------------
 reg-test/CTestCustom.cmake.in |   6 --
 3 files changed, 122 insertions(+), 79 deletions(-)
 delete mode 100644 reg-test/CTestCustom.cmake.in

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3af99eee..3b4a6e84 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-180
+181
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index a7efe69f..89e51322 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -1,73 +1,122 @@
-find_package(Catch2 3)
-if(NOT Catch2_FOUND)
-  set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE)
-  message(STATUS "Catch2 not found")
-  message(SEND_ERROR "Catch2 is required to generate the unit test.
-  The BUILD_TESTING flag is turned OFF")
-  return()
-endif(NOT Catch2_FOUND)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-# Build the coverage test
-if(NOT MSVC)
-  option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF)
-  if(WITH_COVERAGE)
-    set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE)
-    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING
-      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
-      FORCE)
-    set(CMAKE_CXX_FLAGS_DEBUG
-      "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage"
-      CACHE STRING "Force the debug CXX flags for the coverage test" FORCE)
-    set(CMAKE_EXE_LINKER_FLAGS_DEBUG
-      "-fprofile-arcs -ftest-coverage"
-      CACHE STRING "Force the debug linker flags for the coverage test" FORCE)
-    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
-    configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in
-                   ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
-  endif(WITH_COVERAGE)
-endif(NOT MSVC)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-# Set the build name
-set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}")
-if(USE_SSE)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse")
-endif(USE_SSE)
-if(USE_OPENMP)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp")
-endif(USE_OPENMP)
-if(USE_CUDA)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}")
-endif(USE_CUDA)
-if(USE_OPENCL)
-  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl")
-endif(USE_OPENCL)
-if(NOT MSVC)
-  unset(BUILDNAME CACHE)
-  unset(BUILDNAME)
-  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE)
-else(MSVC)
-  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash")
-  message(STATUS "The buildname might need manual editing")
-endif(NOT MSVC)
-mark_as_advanced(BUILDNAME)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-include(CTest)
-include(Catch)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-set(EXEC_LIST reg_test_affineDeformationField)
-set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
-set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
-
-foreach(EXEC ${EXEC_LIST})
-  add_executable(${EXEC} ${EXEC}.cpp)
-  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain)
-  target_link_libraries(${EXEC} PRIVATE _reg_aladin)
-  target_link_libraries(${EXEC} PRIVATE _reg_f3d)
-  catch_discover_tests(${EXEC})
-endforeach(EXEC)
-#-----------------------------------------------------------------------------
+find_package(Catch2 3)
+if(NOT Catch2_FOUND)
+  set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE)
+  message(STATUS "Catch2 not found")
+  message(SEND_ERROR "Catch2 is required to generate the unit test.
+  The BUILD_TESTING flag is turned OFF")
+  return()
+endif(NOT Catch2_FOUND)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Build the coverage test
+option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF)
+if(WITH_COVERAGE)
+  if(NOT MSVC)
+    # Check prerequisites
+    find_program(LCOV lcov REQUIRED)
+    find_program(GENHTML genhtml REQUIRED)
+
+    if(NOT LCOV)
+      message(FATAL_ERROR "lcov not found! Aborting...")
+    endif()
+
+    if(NOT GENHTML)
+      message(FATAL_ERROR "genhtml not found! Aborting...")
+    endif()
+
+    # Set the build type to debug
+    set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING
+      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+      FORCE)
+    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
+
+    # Set the flags for coverage
+    set(CMAKE_CXX_FLAGS_DEBUG
+      "-g -O0 -coverage"
+      CACHE STRING "Force the debug CXX flags for the coverage test" FORCE)
+    set(CMAKE_C_FLAGS_DEBUG
+      ${CMAKE_CXX_FLAGS_DEBUG}
+      CACHE STRING "Force the debug C flags for the coverage test" FORCE)
+
+    # Add the coverage target
+    add_custom_target(coverage
+      # Gather data only for the reg-lib directory
+      COMMAND ${LCOV} --directory . --capture --output-file coverage.info --include '*/reg-lib/*'
+      # Generate report
+      COMMAND ${GENHTML} --demangle-cpp -o coverage coverage.info
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+
+    # Add the clean target
+    add_custom_target(clean_coverage
+      COMMAND ${LCOV} --directory . --zerocounters
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+  else(NOT MSVC)
+    # Check prerequisites
+    find_program(OPENCPPCOVERAGE OpenCppCoverage REQUIRED)
+
+    if(NOT OPENCPPCOVERAGE)
+      message(FATAL_ERROR "OpenCppCoverage not found! Aborting...")
+    endif()
+
+    # Set the build type to debug
+    set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE)
+    set(CMAKE_BUILD_TYPE "Debug" CACHE STRING
+      "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+      FORCE)
+    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
+
+    # Only include the reg-lib directory as coverage source
+    string(REPLACE "/" "\\" COVERAGE_SOURCE "${CMAKE_SOURCE_DIR}/reg-lib")
+
+    # Add the coverage target
+    add_custom_target(coverage
+      # Gather data only for the reg-lib directory
+      COMMAND ${OPENCPPCOVERAGE} --sources=${COVERAGE_SOURCE} --cover_children -- ctest -C Debug
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
+  endif(NOT MSVC)
+endif(WITH_COVERAGE)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+# Set the build name
+set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}")
+if(USE_SSE)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse")
+endif(USE_SSE)
+if(USE_OPENMP)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp")
+endif(USE_OPENMP)
+if(USE_CUDA)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}")
+endif(USE_CUDA)
+if(USE_OPENCL)
+  set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl")
+endif(USE_OPENCL)
+if(NOT MSVC)
+  unset(BUILDNAME CACHE)
+  unset(BUILDNAME)
+  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE)
+else(MSVC)
+  set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash")
+  message(STATUS "The buildname might need manual editing")
+endif(NOT MSVC)
+mark_as_advanced(BUILDNAME)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+include(CTest)
+include(Catch)
+#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
+set(EXEC_LIST reg_test_affineDeformationField)
+set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
+set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
+
+foreach(EXEC ${EXEC_LIST})
+  add_executable(${EXEC} ${EXEC}.cpp)
+  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain)
+  target_link_libraries(${EXEC} PRIVATE _reg_aladin)
+  target_link_libraries(${EXEC} PRIVATE _reg_f3d)
+  catch_discover_tests(${EXEC})
+endforeach(EXEC)
+#-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/reg-test/CTestCustom.cmake.in b/reg-test/CTestCustom.cmake.in
deleted file mode 100644
index a49824a6..00000000
--- a/reg-test/CTestCustom.cmake.in
+++ /dev/null
@@ -1,6 +0,0 @@
-set(CTEST_CUSTOM_COVERAGE_EXCLUDE
-    "reg-io/nifti"
-    "reg-io/nrrd/NrrdIO"
-    "reg-io/png/lpng1510"
-    "reg-io/zlib"
-    "third-party/Eigen")

From b44efb10a35199b17df911b6503a290ee380562d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 1 Mar 2023 17:13:04 +0000
Subject: [PATCH 068/314] Fix CUDA compilation errors

---
 CMakeLists.txt                          | 484 ++++++++++++------------
 cmake/FindOPENCL.cmake                  | 341 -----------------
 niftyreg_build_version.txt              |   2 +-
 reg-lib/cl/CMakeLists.txt               |  17 -
 reg-lib/cuda/CMakeLists.txt             |  15 -
 reg-lib/cuda/CudaContextSingleton.h     |   2 +-
 reg-lib/cuda/CudaOptimiseKernel.cpp     |   4 +-
 reg-lib/cuda/_reg_blocksize_gpu.h       |   4 +-
 reg-lib/cuda/_reg_common_cuda.h         |   4 +-
 reg-lib/cuda/affineDeformationKernel.cu |   4 +-
 reg-lib/cuda/resampleKernel.cu          |   4 +-
 11 files changed, 263 insertions(+), 618 deletions(-)
 delete mode 100755 cmake/FindOPENCL.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bea681ba..7be28026 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,234 +1,252 @@
-project(NiftyReg)
-#-----------------------------------------------------------------------------
-cmake_minimum_required(VERSION 3.2.2)
-if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
- mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
-else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
- mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
-endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
-#-----------------------------------------------------------------------------
-# Set C++ standard version
-set(CMAKE_CXX_STANDARD 17)
-#-----------------------------------------------------------------------------
-if(APPLE)
-  set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
-endif(APPLE)
-#-----------------------------------------------------------------------------
-if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
-  message("In-source builds not allowed by NiftyReg police.")
-  message("Please create a new directory (called a build directory) and run CMake from there.")
-  message(FATAL_ERROR "You may need to remove CMakeCache.txt and CMakeFiles.")
-endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
-#-----------------------------------------------------------------------------
-if(NOT MSVC)
-  if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE "Release")
-  endif(NOT CMAKE_BUILD_TYPE)
-  string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
-  if(NOT cmake_build_type_tolower STREQUAL "debug"
-     AND NOT cmake_build_type_tolower STREQUAL "release"
-     AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
-    message("Unknown build type \"${CMAKE_BUILD_TYPE}\".")
-    message(FATAL_ERROR "Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
-  endif(NOT cmake_build_type_tolower STREQUAL "debug"
-     AND NOT cmake_build_type_tolower STREQUAL "release"
-     AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
-  if(cmake_build_type_tolower STREQUAL "debug")
-    set(DEBUG_MODE ON)
-  elseif(cmake_build_type_tolower STREQUAL "release")
-    set(DEBUG_MODE OFF)
-  endif(cmake_build_type_tolower STREQUAL "debug")
-endif(NOT MSVC)
-#-----------------------------------------------------------------------------
-# Set the NiftyReg version
-set(NR_VERSION_MAJOR 1)
-set(NR_VERSION_MINOR 5)
-file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD)
-set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}")
-add_definitions(-DNR_VERSION="${NR_VERSION}")
-# Define the pre-commit hook for developer
-find_package(Git)
-if(GIT_FOUND)
-  message(STATUS "Found Git")
-  file(COPY "${CMAKE_SOURCE_DIR}/update_version_hook" DESTINATION "${CMAKE_SOURCE_DIR}/.git/hooks" USE_SOURCE_PERMISSIONS)
-  file(RENAME "${CMAKE_SOURCE_DIR}/.git/hooks/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit")
-endif(GIT_FOUND)
-#-----------------------------------------------------------------------------
-if(MSVC)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
-  set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
-  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
-endif(MSVC)
-#-----------------------------------------------------------------------------
-if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    add_definitions(-fPIC)
-endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-#-----------------------------------------------------------------------------
-option(BUILD_ALL_DEP "All the dependencies are build" OFF)
-option(BUILD_SHARED_LIBS "Build the libraries as shared" OFF)
-option(BUILD_TESTING "To build the unit tests" OFF)
-option(USE_CUDA "To use the CUDA platform" OFF)
-option(USE_OPENCL "To use the OpenCL platform" OFF)
-option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
-option(USE_SSE "To enable SEE computation in some case" ON)
-#-----------------------------------------------------------------------------
-option(USE_THROW_EXCEP "To throw exception rather than exit" OFF)
-mark_as_advanced(USE_THROW_EXCEP)
-#-----------------------------------------------------------------------------
-option(USE_NRRD "To use the NRRD file format" OFF)
-mark_as_advanced(USE_NRRD)
-#-----------------------------------------------------------------------------
-if(WIN32)
-    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
-endif(WIN32)
-#-----------------------------------------------------------------------------
-# All dependencies are build to create the 3DSlicer package
-if(BUILD_NR_SLICER_EXT)
-    set(BUILD_ALL_DEP ON)
-    mark_as_advanced(FORCE BUILD_ALL_DEP)
-else(BUILD_NR_SLICER_EXT)
-    mark_as_advanced(CLEAR BUILD_ALL_DEP)
-endif(BUILD_NR_SLICER_EXT)
-#-----------------------------------------------------------------------------
-# Z library
-# Try first to find the z library on the system and built is from the sources if it can not be find
-if(NOT BUILD_ALL_DEP)
-    find_package(ZLIB)
-    if(ZLIB_FOUND)
-        include_directories(${ZLIB_INCLUDE_DIR})
-        message(STATUS "Found zlib - the z library will not be built")
-    else(ZLIB_FOUND)
-        include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
-        message(STATUS "zlib not found - the z library will be built")
-    endif(ZLIB_FOUND)
-else(NOT BUILD_ALL_DEP)
-    include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
-endif(NOT BUILD_ALL_DEP)
-#-----------------------------------------------------------------------------
-# Try to find the png library and header on the system
-if(NOT BUILD_ALL_DEP)
-    ## PNG support - First try to find the PNG library on the system and build it if it is not found
-    ## I did not use the FindPNG.cmake here as the zlib is also included into the project
-    if(CYGWIN)
-        if(NOT BUILD_SHARED_LIBS)
-            set (PNG_DEFINITIONS -DPNG_STATIC)
-        endif(NOT BUILD_SHARED_LIBS)
-    endif(CYGWIN)
-    set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d)
-    find_library(PNG_LIBRARY NAMES ${PNG_NAMES})
-    find_path(PNG_INCLUDE_DIR png.h
-        /usr/local/include/libpng
-        /sw/include
-    )
-    # If the png library and header can not be found, it is build from the sources
-    if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-        message(STATUS "libpng not found - the png library will be built")
-        set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
-        set(PNG_LIBRARY png)
-        set(BUILD_INTERNAL_PNG true)
-    else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-        message(STATUS "Found libpng - the png library will not be built")
-        set(BUILD_INTERNAL_PNG false)
-    endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-else(NOT BUILD_ALL_DEP)
-    set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
-    set(PNG_LIBRARY png)
-endif(NOT BUILD_ALL_DEP)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/png)
-include_directories(${PNG_INCLUDE_DIR})
-#-----------------------------------------------------------------------------
-include_directories(${CMAKE_SOURCE_DIR}/reg-lib)
-include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti)
-include_directories(${CMAKE_SOURCE_DIR}/third-party)
-include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3)
-include_directories(${CMAKE_BINARY_DIR})
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO)
-#-----------------------------------------------------------------------------
-if(USE_OPENCL)
-    include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl)
-    include_directories(${OPENCL_INCLUDE_DIRS})
-    add_definitions(-D_USE_OPENCL)
-endif(USE_OPENCL)
-#-----------------------------------------------------------------------------
-if(USE_CUDA)
-  include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
-  include_directories(${CUDA_INCLUDE_DIRS})
-  add_definitions(-D_USE_CUDA)
-endif(USE_CUDA)
-#-----------------------------------------------------------------------------
-if(USE_SSE)
-  if(NOT MSVC)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
-  endif(NOT MSVC)
-  add_definitions(-D_USE_SSE)
-endif(USE_SSE)
-#-----------------------------------------------------------------------------
-if(USE_OPENMP)
-  find_package(OpenMP)
-  if(NOT OPENMP_FOUND)
-    set(USE_OPENMP OFF CACHE BOOL "To use openMP for multi-CPU processing" FORCE)
-    message(WARNING "OpenMP does not appear to be supported by your compiler, forcing USE_OPENMP to OFF")
-  else(NOT OPENMP_FOUND)
-     message(STATUS "Found OpenMP")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-  endif(NOT OPENMP_FOUND)
-endif(USE_OPENMP)
-#-----------------------------------------------------------------------------
-if(BUILD_SHARED_LIBS)
-  if(USE_CUDA)
-     set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE)
-     message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF")
-     set(NIFTYREG_LIBRARY_TYPE STATIC)
-  else(USE_CUDA)
-    set(NIFTYREG_LIBRARY_TYPE SHARED)
-  endif(USE_CUDA)
-else(BUILD_SHARED_LIBS)
-  set(NIFTYREG_LIBRARY_TYPE STATIC)
-endif(BUILD_SHARED_LIBS)
-#-----------------------------------------------------------------------------
-if(USE_THROW_EXCEP)
-  add_definitions(-DNR_THROW_EXCEP)
-endif(USE_THROW_EXCEP)
-#-----------------------------------------------------------------------------
-add_subdirectory(third-party)
-add_subdirectory(reg-io)
-add_subdirectory(reg-lib)
-add_subdirectory(reg-apps)
-add_subdirectory(cmake)
-#-----------------------------------------------------------------------------
-if(BUILD_TESTING)
-  enable_testing()
-  add_subdirectory(reg-test)
-endif(BUILD_TESTING)
-#-----------------------------------------------------------------------------
-# add a target to generate API documentation with Doxygen
-find_package(Doxygen)
-if(DOXYGEN_FOUND)
-  set(DOXY_EXCLUDED_PATTERNS "")
-  if(NOT BUILD_TESTING)
-    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-test/*")
-  endif(NOT BUILD_TESTING)
-  if(NOT USE_NRRD)
-    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-io/nrrd/*")
-  endif(NOT USE_NRRD)
-  if(NOT USE_CUDA)
-    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cuda/*")
-  endif(NOT USE_CUDA)
-  if(NOT USE_OPENCL)
-    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cl/*")
-  endif(NOT USE_OPENCL)
-  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
-  add_custom_target(doc
-    ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-    COMMENT "Generating API documentation with Doxygen" VERBATIM
-  )
-  message(STATUS "Found doxygen")
-endif(DOXYGEN_FOUND)
+project(NiftyReg)
+#-----------------------------------------------------------------------------
+cmake_minimum_required(VERSION 3.2.2)
+if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
+ mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
+else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
+ mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
+endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
+#-----------------------------------------------------------------------------
+# Set C++ standard version
+set(CMAKE_CXX_STANDARD 17)
+#-----------------------------------------------------------------------------
+if(APPLE)
+  set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+endif(APPLE)
+#-----------------------------------------------------------------------------
+if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
+  message("In-source builds not allowed by NiftyReg police.")
+  message("Please create a new directory (called a build directory) and run CMake from there.")
+  message(FATAL_ERROR "You may need to remove CMakeCache.txt and CMakeFiles.")
+endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
+#-----------------------------------------------------------------------------
+if(NOT MSVC)
+  if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release")
+  endif(NOT CMAKE_BUILD_TYPE)
+  string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower)
+  if(NOT cmake_build_type_tolower STREQUAL "debug"
+     AND NOT cmake_build_type_tolower STREQUAL "release"
+     AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
+    message("Unknown build type \"${CMAKE_BUILD_TYPE}\".")
+    message(FATAL_ERROR "Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).")
+  endif(NOT cmake_build_type_tolower STREQUAL "debug"
+     AND NOT cmake_build_type_tolower STREQUAL "release"
+     AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo")
+  if(cmake_build_type_tolower STREQUAL "debug")
+    set(DEBUG_MODE ON)
+  elseif(cmake_build_type_tolower STREQUAL "release")
+    set(DEBUG_MODE OFF)
+  endif(cmake_build_type_tolower STREQUAL "debug")
+endif(NOT MSVC)
+#-----------------------------------------------------------------------------
+# Set the NiftyReg version
+set(NR_VERSION_MAJOR 1)
+set(NR_VERSION_MINOR 5)
+file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD)
+set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}")
+add_definitions(-DNR_VERSION="${NR_VERSION}")
+# Define the pre-commit hook for developer
+find_package(Git)
+if(GIT_FOUND)
+  message(STATUS "Found Git")
+  file(COPY "${CMAKE_SOURCE_DIR}/update_version_hook" DESTINATION "${CMAKE_SOURCE_DIR}/.git/hooks" USE_SOURCE_PERMISSIONS)
+  file(RENAME "${CMAKE_SOURCE_DIR}/.git/hooks/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit")
+endif(GIT_FOUND)
+#-----------------------------------------------------------------------------
+if(MSVC)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
+  set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
+  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
+endif(MSVC)
+#-----------------------------------------------------------------------------
+if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+    add_definitions(-fPIC)
+endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
+#-----------------------------------------------------------------------------
+option(BUILD_ALL_DEP "All the dependencies are build" OFF)
+option(BUILD_SHARED_LIBS "Build the libraries as shared" OFF)
+option(BUILD_TESTING "To build the unit tests" OFF)
+option(USE_CUDA "To use the CUDA platform" OFF)
+option(USE_OPENCL "To use the OpenCL platform" OFF)
+option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
+option(USE_SSE "To enable SEE computation in some case" ON)
+#-----------------------------------------------------------------------------
+option(USE_THROW_EXCEP "To throw exception rather than exit" OFF)
+mark_as_advanced(USE_THROW_EXCEP)
+#-----------------------------------------------------------------------------
+option(USE_NRRD "To use the NRRD file format" OFF)
+mark_as_advanced(USE_NRRD)
+#-----------------------------------------------------------------------------
+if(WIN32)
+    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
+endif(WIN32)
+#-----------------------------------------------------------------------------
+# All dependencies are build to create the 3DSlicer package
+if(BUILD_NR_SLICER_EXT)
+    set(BUILD_ALL_DEP ON)
+    mark_as_advanced(FORCE BUILD_ALL_DEP)
+else(BUILD_NR_SLICER_EXT)
+    mark_as_advanced(CLEAR BUILD_ALL_DEP)
+endif(BUILD_NR_SLICER_EXT)
+#-----------------------------------------------------------------------------
+# Z library
+# Try first to find the z library on the system and built is from the sources if it can not be find
+if(NOT BUILD_ALL_DEP)
+    find_package(ZLIB)
+    if(ZLIB_FOUND)
+        include_directories(${ZLIB_INCLUDE_DIR})
+        message(STATUS "Found zlib - the z library will not be built")
+    else(ZLIB_FOUND)
+        include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
+        message(STATUS "zlib not found - the z library will be built")
+    endif(ZLIB_FOUND)
+else(NOT BUILD_ALL_DEP)
+    include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
+endif(NOT BUILD_ALL_DEP)
+#-----------------------------------------------------------------------------
+# Try to find the png library and header on the system
+if(NOT BUILD_ALL_DEP)
+    ## PNG support - First try to find the PNG library on the system and build it if it is not found
+    ## I did not use the FindPNG.cmake here as the zlib is also included into the project
+    if(CYGWIN)
+        if(NOT BUILD_SHARED_LIBS)
+            set (PNG_DEFINITIONS -DPNG_STATIC)
+        endif(NOT BUILD_SHARED_LIBS)
+    endif(CYGWIN)
+    set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d)
+    find_library(PNG_LIBRARY NAMES ${PNG_NAMES})
+    find_path(PNG_INCLUDE_DIR png.h
+        /usr/local/include/libpng
+        /sw/include
+    )
+    # If the png library and header can not be found, it is build from the sources
+    if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+        message(STATUS "libpng not found - the png library will be built")
+        set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
+        set(PNG_LIBRARY png)
+        set(BUILD_INTERNAL_PNG true)
+    else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+        message(STATUS "Found libpng - the png library will not be built")
+        set(BUILD_INTERNAL_PNG false)
+    endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+else(NOT BUILD_ALL_DEP)
+    set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
+    set(PNG_LIBRARY png)
+endif(NOT BUILD_ALL_DEP)
+include_directories(${CMAKE_SOURCE_DIR}/reg-io/png)
+include_directories(${PNG_INCLUDE_DIR})
+#-----------------------------------------------------------------------------
+include_directories(${CMAKE_SOURCE_DIR}/reg-lib)
+include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
+include_directories(${CMAKE_SOURCE_DIR}/reg-io)
+include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti)
+include_directories(${CMAKE_SOURCE_DIR}/third-party)
+include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3)
+include_directories(${CMAKE_BINARY_DIR})
+include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd)
+include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO)
+#-----------------------------------------------------------------------------
+if(USE_OPENCL)
+  # Find the OpenCL package
+  find_package(OpenCL REQUIRED)
+  if(NOT OpenCL_FOUND)
+    set(USE_OPENCL OFF CACHE BOOL "To use the OpenCL platform" FORCE)
+    message(SEND_ERROR "OpenCL not found. The USE_OPENCL flag is turned OFF")
+  else(NOT OpenCL_FOUND)
+    message(STATUS "Found OpenCL")
+    include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl)
+    include_directories(${OpenCL_INCLUDE_DIRS})
+    add_definitions(-D_USE_OPENCL)
+  endif(NOT OpenCL_FOUND)
+endif(USE_OPENCL)
+#-----------------------------------------------------------------------------
+if(USE_CUDA)
+  # Check if the CUDA drivers are available
+  find_package(CUDA REQUIRED)
+  mark_as_advanced(CUDA_SDK_ROOT_DIR)
+  option(CUDA_FAST_MATH "To use the fast math flag" OFF)
+  mark_as_advanced(CUDA_FAST_MATH)
+  if(NOT CUDA_FOUND)
+    set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
+    message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF")
+  else(NOT CUDA_FOUND)
+    include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
+    include_directories(${CUDA_INCLUDE_DIRS})
+    add_definitions(-D_USE_CUDA)
+  endif(NOT CUDA_FOUND)
+endif(USE_CUDA)
+#-----------------------------------------------------------------------------
+if(USE_SSE)
+  if(NOT MSVC)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+  endif(NOT MSVC)
+  add_definitions(-D_USE_SSE)
+endif(USE_SSE)
+#-----------------------------------------------------------------------------
+if(USE_OPENMP)
+  find_package(OpenMP)
+  if(NOT OPENMP_FOUND)
+    set(USE_OPENMP OFF CACHE BOOL "To use openMP for multi-CPU processing" FORCE)
+    message(WARNING "OpenMP does not appear to be supported by your compiler, forcing USE_OPENMP to OFF")
+  else(NOT OPENMP_FOUND)
+    message(STATUS "Found OpenMP")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+  endif(NOT OPENMP_FOUND)
+endif(USE_OPENMP)
+#-----------------------------------------------------------------------------
+if(BUILD_SHARED_LIBS)
+  if(USE_CUDA)
+     set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE)
+     message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF")
+     set(NIFTYREG_LIBRARY_TYPE STATIC)
+  else(USE_CUDA)
+    set(NIFTYREG_LIBRARY_TYPE SHARED)
+  endif(USE_CUDA)
+else(BUILD_SHARED_LIBS)
+  set(NIFTYREG_LIBRARY_TYPE STATIC)
+endif(BUILD_SHARED_LIBS)
+#-----------------------------------------------------------------------------
+if(USE_THROW_EXCEP)
+  add_definitions(-DNR_THROW_EXCEP)
+endif(USE_THROW_EXCEP)
+#-----------------------------------------------------------------------------
+add_subdirectory(third-party)
+add_subdirectory(reg-io)
+add_subdirectory(reg-lib)
+add_subdirectory(reg-apps)
+add_subdirectory(cmake)
+#-----------------------------------------------------------------------------
+if(BUILD_TESTING)
+  enable_testing()
+  add_subdirectory(reg-test)
+endif(BUILD_TESTING)
+#-----------------------------------------------------------------------------
+# add a target to generate API documentation with Doxygen
+find_package(Doxygen)
+if(DOXYGEN_FOUND)
+  set(DOXY_EXCLUDED_PATTERNS "")
+  if(NOT BUILD_TESTING)
+    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-test/*")
+  endif(NOT BUILD_TESTING)
+  if(NOT USE_NRRD)
+    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-io/nrrd/*")
+  endif(NOT USE_NRRD)
+  if(NOT USE_CUDA)
+    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cuda/*")
+  endif(NOT USE_CUDA)
+  if(NOT USE_OPENCL)
+    set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cl/*")
+  endif(NOT USE_OPENCL)
+  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
+  add_custom_target(doc
+    ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Generating API documentation with Doxygen" VERBATIM
+  )
+  message(STATUS "Found doxygen")
+endif(DOXYGEN_FOUND)
 #-----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/cmake/FindOPENCL.cmake b/cmake/FindOPENCL.cmake
deleted file mode 100755
index c8c6d64f..00000000
--- a/cmake/FindOPENCL.cmake
+++ /dev/null
@@ -1,341 +0,0 @@
-# Find OpenCL
-
-#
-
-# To set manually the paths, define these environment variables:
-
-# OpenCL_INCPATH    - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
-
-# OpenCL_LIBPATH    - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
-
-#
-
-# Once done this will define
-
-#  OPENCL_FOUND            - system has OpenCL
-
-#  OPENCL_INCLUDE_DIRS     - the OpenCL include directory
-
-#  OPENCL_LIBRARIES        - link these to use OpenCL
-
-#  OPENCL_HAS_CPP_BINDINGS - system has also cl.hpp
-
-
-
-FIND_PACKAGE(PackageHandleStandardArgs)
-
-
-
-SET (OPENCL_VERSION_STRING "0.1.0")
-
-SET (OPENCL_VERSION_MAJOR 0)
-
-SET (OPENCL_VERSION_MINOR 1)
-
-SET (OPENCL_VERSION_PATCH 0)
-
-
-
-IF (APPLE)
-
-
-
-	# IF OpenCL_LIBPATH is given use it and don't use default path
-
-	IF (DEFINED ENV{OpenCL_LIBPATH})
-
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH)
-
-	ELSE ()
-
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
-
-	ENDIF ()
-
-
-
-	# IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths
-
-	IF (DEFINED ENV{OpenCL_INCPATH})
-
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH)
-
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp OpenCL/cl.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH)
-
-	ELSE ()
-
-		FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
-
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
-
-	ENDIF ()
-
-
-
-ELSE (APPLE)
-
-
-
-	IF (WIN32)
-
-
-
-		# Find OpenCL includes and libraries from environment variables provided by vendor
-
-		SET(OPENCL_INCLUDE_SEARCH_PATHS)
-
-		SET(OPENCL_LIBRARY_SEARCH_PATHS)
-
-		SET(OPENCL_LIBRARY_64_SEARCH_PATHS)
-
-
-
-		# Nvidia
-
-		IF (DEFINED ENV{CUDA_INC_PATH})
-
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH})
-
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib64)
-
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib)
-
-		ENDIF()
-
-		IF (DEFINED ENV{CUDA_PATH})
-
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH})
-
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_PATH}/lib/x64/)
-
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_PATH}/lib/Win32/)
-
-		ENDIF()
-
-
-
-		# Intel SDK
-
-		IF (DEFINED ENV{INTELOCSDKROOT})
-
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/include)
-
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x64)
-
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x86)
-
-		ENDIF()
-
-
-
-		# AMD SDK
-
-		IF (DEFINED ENV{AMDAPPSDKROOT})
-
-			SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/include)
-
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86_64)
-
-			SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86)
-
-		ENDIF()
-
-
-
-		# Override search paths with OpenCL_INCPATH env variable
-
-		IF (DEFINED ENV{OpenCL_INCPATH})
-
-			SET(OPENCL_INCLUDE_SEARCH_PATHS $ENV{OpenCL_INCPATH})
-
-		ENDIF ()
-
-
-
-		# Override search paths with OpenCL_LIBPATH env variable
-
-		IF (DEFINED ENV{OpenCL_LIBPATH})
-
-			SET(OPENCL_LIBRARY_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
-
-			SET(OPENCL_LIBRARY_64_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
-
-		ENDIF ()
-
-
-
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
-
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
-
-		
-
-		FIND_LIBRARY(_OPENCL_32_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
-
-		FIND_LIBRARY(_OPENCL_64_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_64_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
-
-
-
-		# Check if 64bit or 32bit versions links fine
-
-		SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/openclversion.c")
-
-		#SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}/test.c")
-
-		FILE (WRITE "${_OPENCL_VERSION_SOURCE}"
-
-			"
-
-			#if __APPLE__
-
-			#include <OpenCL/cl.h>
-
-			#else /* !__APPLE__ */
-
-			#include <CL/cl.h>
-
-			#endif /* __APPLE__ */
-
-			int main()
-
-			{	
-
-			    cl_int result;
-
-			    cl_platform_id id;
-
-			    result = clGetPlatformIDs(1, &id, NULL);
-
-			    return result != CL_SUCCESS;
-
-			}
-
-			")
-
-
-
-		TRY_COMPILE(_OPENCL_64_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
-
-			CMAKE_FLAGS
-
-			"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
-
-			CMAKE_FLAGS
-
-			"-DLINK_LIBRARIES:STRING=${_OPENCL_64_LIBRARIES}"
-
-		)
-
-
-
-		IF(_OPENCL_64_COMPILE_SUCCESS)
-
-			message(STATUS "OpenCL 64bit lib found.")
-
-			SET(OPENCL_LIBRARIES ${_OPENCL_64_LIBRARIES})
-
-		ELSE()
-
-	  		TRY_COMPILE(_OPENCL_32_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
-
-				CMAKE_FLAGS
-
-				"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
-
-				CMAKE_FLAGS
-
-				"-DLINK_LIBRARIES:STRING=${_OPENCL_32_LIBRARIES}"
-
-	  		)
-
-			IF(_OPENCL_32_COMPILE_SUCCESS)
-
-				message(STATUS "OpenCL 32bit lib found.")
-
-				SET(OPENCL_LIBRARIES ${_OPENCL_32_LIBRARIES})
-
-			ELSE()
-
-				message(STATUS "Couldn't link opencl..")
-
-			ENDIF()
-
-		ENDIF()
-
-
-
-
-
-	ELSE (WIN32)
-
-
-
-		IF (CYGWIN)
-
-			SET (CMAKE_FIND_LIBRARY_SUFFIXES .lib)
-
-			SET (OCL_LIB_SUFFIX .lib)
-
-		ENDIF (CYGWIN)
-
-
-
-		# Unix style platforms
-
-		FIND_LIBRARY(OPENCL_LIBRARIES OpenCL${OCL_LIB_SUFFIX}
-
-			PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH
-
-		)
-
-
-
-		GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
-
-		GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
-
-
-
-		# The AMD SDK currently does not place its headers
-
-		# in /usr/include, therefore also search relative
-
-		# to the library
-
-		FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
-
-		FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH)
-
-
-
-	ENDIF (WIN32)
-
-
-
-ENDIF (APPLE)
-
-
-
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS)
-
-
-
-IF(_OPENCL_CPP_INCLUDE_DIRS)
-
-	SET( OPENCL_HAS_CPP_BINDINGS TRUE )
-
-	LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
-
-	# This is often the same, so clean up
-
-	LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
-
-ENDIF(_OPENCL_CPP_INCLUDE_DIRS)
-
-
-
-MARK_AS_ADVANCED(
-
-  OPENCL_INCLUDE_DIRS
-
-)
\ No newline at end of file
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3b4a6e84..960e7a87 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-181
+182
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index 2dde87f3..431aefb8 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -1,22 +1,5 @@
 #-----------------------------------------------------------------------------
-# Find the OpenCL package
-find_package(OpenCL REQUIRED)
-if(NOT OpenCL_FOUND)
-  set(USE_OpenCL OFF CACHE BOOL "To use the OpenCL platform" FORCE)
-  message(SEND_ERROR "OpenCL not found. The USE_OpenCL flag is turned OFF")
-  return()
-else(NOT OpenCL_FOUND)
-  message(STATUS "Found OpenCL")
-endif(NOT OpenCL_FOUND)
-#-----------------------------------------------------------------------------
-set(SOURCE_PATH ${CMAKE_BINARY_DIR})
-#-----------------------------------------------------------------------------
 configure_file(config.h.in ${CMAKE_BINARY_DIR}/config.h IMMEDIATE)
-mark_as_advanced(_OpenCL_CPP_INCLUDE_DIRS)
-#-----------------------------------------------------------------------------
-include_directories(${CMAKE_BINARY_DIR})
-include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl)
-include_directories(${OpenCL_INCLUDE_DIRS})
 #-----------------------------------------------------------------------------
 # Build the _reg_opencl_kernels library
 set(NAME _reg_opencl_kernels)
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 0f8156e3..5f842fff 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -1,19 +1,4 @@
 #-----------------------------------------------------------------------------
-# Check if the CUDA drivers are available
-find_package(CUDA)
-mark_as_advanced(CUDA_SDK_ROOT_DIR)
-option(CUDA_FAST_MATH "To use the fast math flag" OFF)
-mark_as_advanced(CUDA_FAST_MATH)
-#-----------------------------------------------------------------------------
-if(NOT CUDA_FOUND)
-    set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
-    message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF")
-    return()
-endif(NOT CUDA_FOUND)
-#-----------------------------------------------------------------------------
-SET(CUDA_INCLUDE_DIRS  "${CUDA_INCLUDE_DIRS}" CACHE INTERNAL "CUDA_INCLUDE_DIRS")
-include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
-#-----------------------------------------------------------------------------
 # Compile an executable to check if there is at least one suitable graphical card
 try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cpp
   CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CUDA_INCLUDE_DIRS} -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h
index f9b0351e..b46cb879 100644
--- a/reg-lib/cuda/CudaContextSingleton.h
+++ b/reg-lib/cuda/CudaContextSingleton.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "_reg_maths.h"
-#include "cuda.h"
+#include <cuda.h>
 
 class CudaContextSingleton {
 public:
diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp
index c28f00cd..bac2268f 100644
--- a/reg-lib/cuda/CudaOptimiseKernel.cpp
+++ b/reg-lib/cuda/CudaOptimiseKernel.cpp
@@ -1,5 +1,5 @@
-#include "cuda_runtime.h"
-#include "cuda.h"
+#include <cuda_runtime.h>
+#include <cuda.h>
 #include "CudaOptimiseKernel.h"
 #include "optimizeKernel.h"
 
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 4eebd833..46b02298 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -10,8 +10,8 @@
 #pragma once
 
 #include "nifti1_io.h"
-#include "cuda_runtime.h"
-#include "cuda.h"
+#include <cuda_runtime.h>
+#include <cuda.h>
 
 /* ******************************** */
 /* ******************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index f601c2ee..93e31d75 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -9,8 +9,8 @@
 
 #pragma once
 
-#include "cuda_runtime.h"
-#include "cuda.h"
+#include <cuda_runtime.h>
+#include <cuda.h>
 #include "_reg_tools.h"
 
 /* *************************************************************** */
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index a37e99d3..eb0d74c1 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <assert.h>
-#include "cuda_runtime.h"
-#include "cuda.h"
+#include <cuda_runtime.h>
+#include <cuda.h>
 #include"_reg_resampling.h"
 #include"_reg_maths.h"
 #include "_reg_common_cuda.h"
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index dc85dc9b..be78998d 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <assert.h>
-#include "cuda_runtime.h"
-#include "cuda.h"
+#include <cuda_runtime.h>
+#include <cuda.h>
 #include"_reg_resampling.h"
 #include"_reg_maths.h"
 #include "resampleKernel.h"

From 9423847929ecda6a8cc38d94a1c68b45265e9399 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 1 Mar 2023 17:15:25 +0000
Subject: [PATCH 069/314] Bump NiftyReg version

---
 CMakeLists.txt             | 2 +-
 niftyreg_build_version.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7be28026..7a1f40d8 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,7 +42,7 @@ endif(NOT MSVC)
 #-----------------------------------------------------------------------------
 # Set the NiftyReg version
 set(NR_VERSION_MAJOR 1)
-set(NR_VERSION_MINOR 5)
+set(NR_VERSION_MINOR 6)
 file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD)
 set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}")
 add_definitions(-DNR_VERSION="${NR_VERSION}")
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 960e7a87..90afb3e9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-182
+183

From 323182c7748fa455712a1097a7ea9cd76e17fdf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 2 Mar 2023 17:46:53 +0000
Subject: [PATCH 070/314] Generate coverage and upload it to Coveralls

---
 .github/workflows/coverage.yml | 50 ++++++++++++++++++++++++++++++++++
 niftyreg_build_version.txt     |  2 +-
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/coverage.yml

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
new file mode 100644
index 00000000..ebe51947
--- /dev/null
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,50 @@
+name: Coverage
+on: [push, pull_request]
+jobs:
+  Coverage:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Clone NiftyReg
+        uses: actions/checkout@v3
+
+      - name: Install Catch2
+        run:  |
+          git clone https://github.com/catchorg/Catch2.git
+          cd Catch2
+          cmake -Bbuild -H. -DBUILD_TESTING=OFF
+          sudo cmake --build build/ --target install --config Debug
+
+      - name: Install lcov
+        run: sudo apt-get install lcov
+
+      - name: Configure NiftyReg
+        run: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_CXX_COMPILER=g++ \
+                -DCMAKE_C_COMPILER=gcc \
+                -DCMAKE_BUILD_TYPE=Debug \
+                -DBUILD_ALL_DEP=ON \
+                -DUSE_CUDA=OFF \
+                -DUSE_OPENCL=OFF \
+                -DUSE_SSE=ON \
+                -DUSE_OPENMP=ON \
+                -DBUILD_TESTING=ON \
+                -DWITH_COVERAGE=ON \
+                ..
+
+      - name: Build NiftyReg
+        run: cmake --build build --config Debug
+
+      - name: Run tests
+        run: ctest -V
+        working-directory: build
+
+      - name: Coverage
+        run: make coverage
+        working-directory: build
+
+      - name: Upload coverage to Coveralls
+        uses: coverallsapp/github-action@v1
+        with:
+          path-to-lcov: build/coverage.info
\ No newline at end of file
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 90afb3e9..dc37bbdb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-183
+184

From 07e0558f38dac5d67130b2868b591c30de302cd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 2 Mar 2023 17:55:15 +0000
Subject: [PATCH 071/314] Add badge for the coverage

---
 README.md                  | 3 +--
 niftyreg_build_version.txt | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6f76e858..8e1e3689 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@
 [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml?query=branch%3Amaster)
 [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml?query=branch%3Amaster)
 [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml?query=branch%3Amaster)
+[![Coverage Status](https://coveralls.io/repos/github/KCL-BMEIS/niftyreg/badge.svg?branch=master)](https://coveralls.io/github/KCL-BMEIS/niftyreg?branch=master)
 
 
@@ -81,5 +82,3 @@ Imaging, 18(8), 712–721. doi:10.1109/42.796284
 [4] Modat, et al. (2010). Fast free-form deformation using graphics processing
 units. Computer Methods And Programs In Biomedicine,98(3), 278–284.
 doi:10.1016/j.cmpb.2009.09.002
-
-
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dc37bbdb..725a5ba2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-184
+185

From 8538e7fec966c1c166a7f018bbaadf6118d8c5b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 6 Mar 2023 14:53:59 +0000
Subject: [PATCH 072/314] Add RNifti library

---
 CMakeLists.txt                            |    5 +-
 niftyreg_build_version.txt                |    2 +-
 reg-io/CMakeLists.txt                     |    7 +-
 reg-io/RNifti.h                           |   64 +
 reg-io/RNifti/NiftiImage.h                | 1957 +++++
 reg-io/RNifti/NiftiImage_impl.h           | 1882 ++++
 reg-io/RNifti/NiftiImage_matrix.h         |  135 +
 reg-io/RNifti/NiftiImage_print.h          |   36 +
 reg-io/_reg_ReadWriteImage.h              |    2 +-
 reg-io/_reg_ReadWriteMatrix.h             |    2 +-
 reg-io/nifti/LICENSE                      |    9 -
 reg-io/nifti/nifti1.h                     | 1505 ----
 reg-io/nifti/nifti1_io.h                  |  549 --
 reg-io/{nifti => niftilib}/CMakeLists.txt |    5 +-
 reg-io/niftilib/nifti1.h                  | 1528 ++++
 reg-io/{nifti => niftilib}/nifti1_io.c    | 1289 +--
 reg-io/niftilib/nifti1_io.h               |  587 ++
 reg-io/niftilib/nifti2.h                  |  117 +
 reg-io/niftilib/nifti2_image.h            |  106 +
 reg-io/niftilib/nifti2_io.c               | 9703 +++++++++++++++++++++
 reg-io/niftilib/nifti2_io.h               |  830 ++
 reg-io/nrrd/reg_nrrd.h                    |    2 +-
 reg-io/png/reg_png.h                      |    2 +-
 reg-io/zlib/CMakeLists.txt                |    1 -
 reg-io/znzlib/CMakeLists.txt              |    8 +
 reg-io/{nifti => znzlib}/znzlib.c         |   34 +-
 reg-io/{nifti => znzlib}/znzlib.h         |   81 +-
 reg-lib/ConvolutionKernel.h               |    2 +-
 reg-lib/ResampleImageKernel.h             |    2 +-
 reg-lib/cpu/CpuBlockMatchingKernel.h      |    2 +-
 reg-lib/cpu/CpuOptimiseKernel.h           |    2 +-
 reg-lib/cpu/_reg_femTrans.h               |    2 +-
 reg-lib/cpu/_reg_globalTrans.h            |    2 +-
 reg-lib/cpu/_reg_maths.h                  |    2 +-
 reg-lib/cpu/_reg_maths_eigen.cpp          |    2 +-
 reg-lib/cpu/_reg_maths_eigen.h            |    2 +-
 reg-lib/cpu/_reg_resampling.h             |    2 +-
 reg-lib/cuda/_reg_blocksize_gpu.h         |    2 +-
 reg-lib/cuda/affineDeformationKernel.h    |    2 +-
 reg-lib/cuda/optimizeKernel.h             |    2 +-
 reg-lib/cuda/resampleKernel.h             |    2 +-
 41 files changed, 17710 insertions(+), 2766 deletions(-)
 create mode 100644 reg-io/RNifti.h
 create mode 100644 reg-io/RNifti/NiftiImage.h
 create mode 100644 reg-io/RNifti/NiftiImage_impl.h
 create mode 100644 reg-io/RNifti/NiftiImage_matrix.h
 create mode 100644 reg-io/RNifti/NiftiImage_print.h
 delete mode 100755 reg-io/nifti/LICENSE
 delete mode 100755 reg-io/nifti/nifti1.h
 delete mode 100755 reg-io/nifti/nifti1_io.h
 rename reg-io/{nifti => niftilib}/CMakeLists.txt (72%)
 mode change 100755 => 100644
 create mode 100644 reg-io/niftilib/nifti1.h
 rename reg-io/{nifti => niftilib}/nifti1_io.c (86%)
 mode change 100755 => 100644
 create mode 100644 reg-io/niftilib/nifti1_io.h
 create mode 100644 reg-io/niftilib/nifti2.h
 create mode 100644 reg-io/niftilib/nifti2_image.h
 create mode 100644 reg-io/niftilib/nifti2_io.c
 create mode 100644 reg-io/niftilib/nifti2_io.h
 create mode 100644 reg-io/znzlib/CMakeLists.txt
 rename reg-io/{nifti => znzlib}/znzlib.c (91%)
 mode change 100755 => 100644
 rename reg-io/{nifti => znzlib}/znzlib.h (54%)
 mode change 100755 => 100644

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a1f40d8..1c7b9840 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -141,12 +141,9 @@ include_directories(${PNG_INCLUDE_DIR})
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib)
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
 include_directories(${CMAKE_SOURCE_DIR}/reg-io)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti)
 include_directories(${CMAKE_SOURCE_DIR}/third-party)
-include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3)
 include_directories(${CMAKE_BINARY_DIR})
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO)
+include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3)
 #-----------------------------------------------------------------------------
 if(USE_OPENCL)
   # Find the OpenCL package
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 725a5ba2..bc3d5444 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-185
+186
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index c027f43a..74712e43 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -3,10 +3,13 @@ if(NOT ZLIB_FOUND OR BUILD_ALL_DEP)
     subdirs(zlib)
 endif(NOT ZLIB_FOUND OR BUILD_ALL_DEP)
 
+# Build the znz library
+subdirs(znzlib)
+
 # Build the nifti file format library
-subdirs(nifti)
+subdirs(niftilib)
 
-set(LIBRARIES reg_nifti z)
+set(LIBRARIES reg_nifti z znz)
 
 # Build the png library if required
 subdirs(png)
diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h
new file mode 100644
index 00000000..2327b601
--- /dev/null
+++ b/reg-io/RNifti.h
@@ -0,0 +1,64 @@
+#ifndef _RNIFTI_H_
+#define _RNIFTI_H_
+
+// RNiftyReg and divest have used HAVE_R, so accept this variant for compatibility
+#if !defined(USING_R) && defined(HAVE_R)
+#define USING_R
+#endif
+
+// Defined since RNifti v0.10.0, and equal to 100 * (major version) + (minor version). May not
+// change if the API does not change, and in particular never changes with patch level
+#define RNIFTI_VERSION 104
+
+// Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some
+// work to get them to play nicely:
+// 
+// - The compile-time constant RNIFTI_NIFTILIB_VERSION indicates which version of the library has
+//   precedence. nifti1_io.h sets this to 1, and nifti2.io.h to 2, so the first-included header
+//   wins unless the user sets a value explicitly.
+// - nifti_image is aliased to the appropriate struct type according to the library version in use.
+// - Library functions with the same name but different signatures in the two versions are renamed
+//   to use "nifti2" in place of "nifti" in the version 2 library. They are aliased back to their
+//   original names if RNIFTI_NIFTILIB_VERSION is 2 and NO_REMAP_NIFTI2_FUNCTIONS *is not* defined.
+// - Library functions that are essentially the same in the two versions are fenced out of
+//   nifti1_io.c (if RNIFTI_NIFTILIB_DEDUPLICATE is defined), to avoid duplicate symbols in the
+//   compiled package library.
+// 
+// There are therefore several possible modes of usage:
+// 
+// 1. Standalone programs that include RNifti.h can *first* define RNIFTI_NIFTILIB_VERSION to
+//    choose the library version required (the default is 1). They should link against nifti1_io.o
+//    or nifti2_io.o, accordingly. (A mismatch will result in compiler/linker errors.) See the
+//    "standalone" directory for an example.
+// 2. Standalone or linked R package C/C++ code can include "niftilib/nifti1_io.h" or "niftilib/
+//    nifti2_io.h", use the appropriate version of the library, and not worry about the clash. This
+//    will make most sense for existing code already written for one or other version of the
+//    NIfTI library. Standalone code will again need to link to the appropriate object file; R will
+//    handle linkage for packages, but the API header "RNiftiAPI.h" must also be included. See the
+//    "clients" directory for an example of the latter.
+// 3. Code that explicitly wants to handle both versions of the library should define
+//    NO_REMAP_NIFTI2_FUNCTIONS to avoid name clashes, include both library headers, and use
+//    nifti2_* functions explicitly when required.
+#if !defined(RNIFTI_NIFTILIB_VERSION) || (RNIFTI_NIFTILIB_VERSION == 1)
+#include "niftilib/nifti1_io.h"
+#include "niftilib/nifti2_image.h"
+#else
+#include "niftilib/nifti2_io.h"
+#endif
+
+#ifdef __cplusplus
+#include "RNifti/NiftiImage.h"
+
+// Defined since RNifti v0.3.0
+#define HAVE_RNIFTI_NAMESPACE
+
+extern "C" {
+#endif // __cplusplus
+
+extern void niftilib_register_all (void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
new file mode 100644
index 00000000..92183705
--- /dev/null
+++ b/reg-io/RNifti/NiftiImage.h
@@ -0,0 +1,1957 @@
+#ifndef _NIFTI_IMAGE_H_
+#define _NIFTI_IMAGE_H_
+
+
+#ifdef USING_R
+
+#include <Rcpp.h>
+
+// Defined since R 3.1.0, according to Tomas Kalibera, but there's no reason to break
+// compatibility with 3.0.x
+#ifndef MAYBE_SHARED
+#define MAYBE_SHARED(x) (NAMED(x) > 1)
+#endif
+
+#else
+
+#define R_NegInf -INFINITY
+
+#include <stdint.h>
+#include <cstddef>
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <list>
+#include <complex>
+#include <stdexcept>
+#include <algorithm>
+#include <map>
+#include <locale>
+#include <limits>
+
+#endif
+
+/**
+ * @mainpage RNifti: Fast R and C++ Access to NIfTI Images
+ * A more extensive overview of the \c RNifti package, and its usage from R, is provided on the
+ * package's GitHub page at \c https://github.com/jonclayden/RNifti. The primary role of these
+ * pages is to document the \ref RNifti::NiftiImage and \ref RNifti::NiftiImageData C++ classes
+ * for package developers linking to \c RNifti.
+**/
+
+namespace RNifti {
+
+typedef std::complex<float> complex64_t;
+typedef std::complex<double> complex128_t;
+
+/**
+ * Simple RGB(A) type encapsulating an 8-bit colour value with optional opacity, which can also be
+ * set or retrieved as a single 32-bit integer. The default value is equivalent to zero, a fully
+ * transparent black.
+ * @author Jon Clayden (<code@clayden.org>)
+**/
+struct rgba32_t
+{
+    union ValueType {
+        int packed;
+        unsigned char bytes[4];
+    };
+    ValueType value;
+    rgba32_t () { value.packed = 0; }
+};
+
+/**
+ * Wrapper class encapsulating a NIfTI data blob, with responsibility for handling data scaling
+ * and polymorphism. This class provides read/write data access, iterators, etc., which internally
+ * handle conversion to and from the data's native type. It can be linked to the data in a
+ * \c nifti_image or used independently.
+ * @author Jon Clayden (<code@clayden.org>)
+**/
+class NiftiImageData
+{
+public:
+    double slope;                       /**< The slope term used to scale data values. Ignored if zero. */
+    double intercept;                   /**< The intercept term used to scale data values */
+
+protected:
+    /**
+     * Abstract inner class defining the type-specific functions required in concrete subclasses
+    **/
+    struct TypeHandler
+    {
+        virtual ~TypeHandler() {}
+        virtual size_t size () const { return 0; }
+        virtual bool hasNaN () const { return false; }
+        virtual complex128_t getComplex (void *ptr) const { return complex128_t(0.0, 0.0); }
+        virtual double getDouble (void *ptr) const { return 0.0; }
+        virtual int getInt (void *ptr) const { return 0; }
+        virtual rgba32_t getRgb (void *ptr) const { return rgba32_t(); }
+        virtual void setComplex (void *ptr, const complex128_t value) const {}
+        virtual void setDouble (void *ptr, const double value) const {}
+        virtual void setInt (void *ptr, const int value) const {}
+        virtual void setRgb (void *ptr, const rgba32_t value) const {}
+        virtual void minmax (void *ptr, const size_t length, double *min, double *max) const { *min = 0.0; *max = 0.0; }
+    };
+
+    /**
+     * Concrete inner class template defining behaviour specific to individual data types
+    **/
+    template <typename Type, bool alpha = false>
+    struct ConcreteTypeHandler : public TypeHandler
+    {
+        size_t size () const { return (sizeof(Type)); }
+        bool hasNaN () const { return std::numeric_limits<Type>::has_quiet_NaN; }
+        complex128_t getComplex (void *ptr) const { return complex128_t(static_cast<double>(*static_cast<Type*>(ptr)), 0.0); }
+        double getDouble (void *ptr) const { return static_cast<double>(*static_cast<Type*>(ptr)); }
+        int getInt (void *ptr) const { return static_cast<int>(*static_cast<Type*>(ptr)); }
+        void setComplex (void *ptr, const complex128_t value) const
+        {
+            *(static_cast<Type*>(ptr)) = Type(value.real());
+            *(static_cast<Type*>(ptr) + 1) = Type(0);
+        }
+        void setDouble (void *ptr, const double value) const { *(static_cast<Type*>(ptr)) = Type(value); }
+        void setInt (void *ptr, const int value) const { *(static_cast<Type*>(ptr)) = Type(value); }
+        void minmax (void *ptr, const size_t length, double *min, double *max) const;
+    };
+
+    template <typename ElementType>
+    struct ConcreteTypeHandler<std::complex<ElementType>,false> : public TypeHandler
+    {
+        size_t size () const { return (sizeof(ElementType) * 2); }
+        bool hasNaN () const { return std::numeric_limits<ElementType>::has_quiet_NaN; }
+        std::complex<ElementType> getNative (void *ptr) const
+        {
+            const ElementType real = *static_cast<ElementType*>(ptr);
+            const ElementType imag = *(static_cast<ElementType*>(ptr) + 1);
+            return std::complex<ElementType>(real, imag);
+        }
+        void setNative (void *ptr, const std::complex<ElementType> native) const
+        {
+            *(static_cast<ElementType*>(ptr)) = native.real();
+            *(static_cast<ElementType*>(ptr) + 1) = native.imag();
+        }
+        complex128_t getComplex (void *ptr) const { return complex128_t(getNative(ptr)); }
+        double getDouble (void *ptr) const { return static_cast<double>(getNative(ptr).real()); }
+        int getInt (void *ptr) const { return static_cast<int>(getNative(ptr).real()); }
+        void setComplex (void *ptr, const complex128_t value) const { setNative(ptr, std::complex<ElementType>(value)); }
+        void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex<ElementType>(static_cast<ElementType>(value), 0.0)); }
+        void setInt (void *ptr, const int value) const { setNative(ptr, std::complex<ElementType>(static_cast<ElementType>(value), 0.0)); }
+        void minmax (void *ptr, const size_t length, double *min, double *max) const;
+    };
+
+    template <bool alpha>
+    struct ConcreteTypeHandler<rgba32_t,alpha> : public TypeHandler
+    {
+        size_t size () const { return alpha ? 4 : 3; }
+        int getInt (void *ptr) const { return getRgb(ptr).value.packed; }
+        rgba32_t getRgb (void *ptr) const
+        {
+            rgba32_t value;
+            unsigned char *source = static_cast<unsigned char *>(ptr);
+            std::copy(source, source + (alpha ? 4 : 3), value.value.bytes);
+            return value;
+        }
+        void setInt (void *ptr, const int value) const
+        {
+            rgba32_t native;
+            native.value.packed = value;
+            setRgb(ptr, native);
+        }
+        void setRgb (void *ptr, const rgba32_t value) const
+        {
+            unsigned char *target = static_cast<unsigned char *>(ptr);
+            std::copy(value.value.bytes, value.value.bytes + (alpha ? 4 : 3), target);
+        }
+        void minmax (void *ptr, const size_t length, double *min, double *max) const { *min = 0.0; *max = 255.0; }
+    };
+
+    /**
+     * Create a concrete type handler appropriate to the datatype code stored with the data
+     * @return The newly allocated type handler, or \c NULL
+     * @exception runtime_error If the current datatype is unsupported
+    **/
+    TypeHandler * createHandler ()
+    {
+        if (_datatype == DT_NONE)
+            return NULL;
+
+        switch (_datatype)
+        {
+            case DT_UINT8:      return new ConcreteTypeHandler<uint8_t>();          break;
+            case DT_INT16:      return new ConcreteTypeHandler<int16_t>();          break;
+            case DT_INT32:      return new ConcreteTypeHandler<int32_t>();          break;
+            case DT_FLOAT32:    return new ConcreteTypeHandler<float>();            break;
+            case DT_FLOAT64:    return new ConcreteTypeHandler<double>();           break;
+            case DT_INT8:       return new ConcreteTypeHandler<int8_t>();           break;
+            case DT_UINT16:     return new ConcreteTypeHandler<uint16_t>();         break;
+            case DT_UINT32:     return new ConcreteTypeHandler<uint32_t>();         break;
+            case DT_INT64:      return new ConcreteTypeHandler<int64_t>();          break;
+            case DT_UINT64:     return new ConcreteTypeHandler<uint64_t>();         break;
+            case DT_COMPLEX64:  return new ConcreteTypeHandler<complex64_t>();      break;
+            case DT_COMPLEX128: return new ConcreteTypeHandler<complex128_t>();     break;
+            case DT_RGB24:      return new ConcreteTypeHandler<rgba32_t,false>();   break;
+            case DT_RGBA32:     return new ConcreteTypeHandler<rgba32_t,true>();    break;
+
+            default:
+            throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(_datatype)) + ")");
+        }
+    }
+
+    void *dataPtr;                      /**< Opaque pointer to the underlying data blob */
+    int _datatype;                      /**< Datatype code indicating the actual type of the elements */
+    TypeHandler *handler;               /**< Type handler, which is created to match the datatype */
+    size_t _length;                     /**< The number of data elements in the blob */
+    bool owner;                         /**< An indicator of whether this object is responsible for cleaning up the data */
+
+    /**
+     * Initialiser method, used by constructors
+     * @param data Pointer to a preallocated data blob, or \c NULL
+     * @param length Number of elements in the blob
+     * @param datatype NIfTI datatype code appropriate to the blob
+     * @param slope Slope parameter for scaling values
+     * @param intercept Intercept parameter for scaling values
+     * @param alloc If \c true, the default, and \c data is \c NULL, memory will be allocated for
+     *   the blob. If \c false, the blob will be \c NULL in this case
+    **/
+    void init (void *data, const size_t length, const int datatype, const double slope, const double intercept, const bool alloc = true)
+    {
+        this->_length = length;
+        this->_datatype = datatype;
+        this->slope = slope;
+        this->intercept = intercept;
+
+        owner = false;
+        handler = createHandler();
+        if (handler == NULL)
+            dataPtr = NULL;
+        else if (alloc && data == NULL)
+        {
+            dataPtr = calloc(length, handler->size());
+            owner = true;
+        }
+        else
+            dataPtr = data;
+    }
+
+    /**
+     * Update the slope and intercept to cover the range of another data object. If the current
+     * object's datatype can capture the required range without scaling, the slope and intercept
+     * are simply reset
+     * @param data Another data object
+    **/
+    void calibrateFrom (const NiftiImageData &data)
+    {
+        slope = 1.0;
+        intercept = 0.0;
+
+        if (this->isInteger())
+        {
+            double dataMin, dataMax, typeMin, typeMax;
+            data.minmax(&dataMin, &dataMax);
+            handler->minmax(NULL, 0, &typeMin, &typeMax);
+
+            // If the source type is floating-point but values are in range, we will just round them
+            if (dataMin < typeMin || dataMax > typeMax)
+            {
+                slope = (dataMax - dataMin) / (typeMax - typeMin);
+                intercept = dataMin - (slope) * typeMin;
+            }
+        }
+    }
+
+public:
+    /**
+     * Inner class representing a single element in the data blob
+    **/
+    struct Element
+    {
+    private:
+        const NiftiImageData &parent;
+        void *ptr;
+
+    public:
+        /**
+         * Primary constructor
+         * @param parent A reference to the parent object
+         * @param ptr An opaque pointer to the element. If \c NULL, the start of the data blob
+         *   encapsulated by the parent will be used
+        **/
+        Element (const NiftiImageData &parent, void *ptr = NULL)
+            : parent(parent)
+        {
+            this->ptr = (ptr == NULL ? parent.dataPtr : ptr);
+        }
+
+        /**
+         * Copy assignment operator
+         * @param value The value to assign. Any basic numeric type supported by NIfTI-1 is
+         *   allowed, but \c int is used as an intermediate type for all integers, so values
+         *   unrepresentable in a signed 32-bit integer may overflow
+         * @return A reference to the callee
+        **/
+        template <typename SourceType>
+        Element & operator= (const SourceType &value);
+
+        /**
+         * Copy assignment operator
+         * @param other Another data element
+         * @return A reference to the callee
+        **/
+        Element & operator= (const Element &other);
+
+        /**
+         * Type-cast operator, suitable for implicit conversion to basic numeric types
+        **/
+        template <typename TargetType>
+        operator TargetType() const
+        {
+            if (parent.isScaled())
+                return TargetType(parent.handler->getDouble(ptr) * parent.slope + parent.intercept);
+            else if (std::numeric_limits<TargetType>::is_integer)
+                return TargetType(parent.handler->getInt(ptr));
+            else
+                return TargetType(parent.handler->getDouble(ptr));
+        }
+
+        template <typename ElementType>
+        operator std::complex<ElementType>() const
+        {
+            if (parent.isScaled())
+                return std::complex<ElementType>(parent.handler->getComplex(ptr) * parent.slope + complex128_t(parent.intercept, parent.intercept));
+            else
+                return std::complex<ElementType>(parent.handler->getComplex(ptr));
+        }
+
+#ifdef USING_R
+        /**
+         * \c Rcomplex type-cast operator, allowing data to be copied straight to a CPLXSXP
+        **/
+        operator Rcomplex() const
+        {
+            const complex128_t value = parent.handler->getComplex(ptr);
+            Rcomplex rValue = { value.real(), value.imag() };
+            if (parent.isScaled())
+            {
+                rValue.r = rValue.r * parent.slope + parent.intercept;
+                rValue.i = rValue.i * parent.slope + parent.intercept;
+            }
+            return rValue;
+        }
+#endif
+
+        operator rgba32_t() const
+        {
+            return parent.handler->getRgb(ptr);
+        }
+    };
+
+    /**
+     * Iterator type for \c NiftiImageData, with \c Element as its value type
+    **/
+    class Iterator
+    {
+    private:
+        const NiftiImageData &parent;
+        void *ptr;
+        size_t step;
+
+    public:
+        // Standard iterator typedefs
+        typedef std::random_access_iterator_tag iterator_category;
+        typedef Element value_type;
+        typedef std::ptrdiff_t difference_type;
+        typedef Element* pointer;
+        typedef Element& reference;
+
+        /**
+         * Primary constructor
+         * @param parent A reference to the parent object
+         * @param ptr An opaque pointer to the memory underpinning the iterator
+         * @param step The increment between elements within the blob, in bytes. If zero, the
+         *   default, the width associated with the stored datatype will be used.
+        **/
+        Iterator (const NiftiImageData &parent, void *ptr = NULL, const size_t step = 0)
+            : parent(parent)
+        {
+            this->ptr = (ptr == NULL ? parent.dataPtr : ptr);
+            this->step = (step == 0 ? parent.handler->size() : step);
+        }
+
+        /**
+         * Copy constructor
+         * @param other Another iterator
+        **/
+        Iterator (const Iterator &other)
+            : parent(other.parent), ptr(other.ptr), step(other.step) {}
+
+        Iterator & operator++ () { ptr = static_cast<char*>(ptr) + step; return *this; }
+        Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast<char*>(ptr) + step; return copy; }
+        Iterator operator+ (ptrdiff_t n) const
+        {
+            void *newptr = static_cast<char*>(ptr) + (n * step);
+            return Iterator(parent, newptr, step);
+        }
+        Iterator & operator-- () { ptr = static_cast<char*>(ptr) - step; return *this; }
+        Iterator operator-- (int) { Iterator copy(*this); ptr = static_cast<char*>(ptr) - step; return copy; }
+        Iterator operator- (ptrdiff_t n) const
+        {
+            void *newptr = static_cast<char*>(ptr) - (n * step);
+            return Iterator(parent, newptr, step);
+        }
+
+        ptrdiff_t operator- (const Iterator &other) const
+        {
+            const ptrdiff_t difference = static_cast<char*>(ptr) - static_cast<char*>(other.ptr);
+            return difference / step;
+        }
+
+        bool operator== (const Iterator &other) const { return (ptr==other.ptr && step==other.step); }
+        bool operator!= (const Iterator &other) const { return (ptr!=other.ptr || step!=other.step); }
+        bool operator> (const Iterator &other) const { return (ptr > other.ptr); }
+        bool operator< (const Iterator &other) const { return (ptr < other.ptr); }
+
+        const Element operator* () const { return Element(parent, ptr); }
+        Element operator* () { return Element(parent, ptr); }
+        const Element operator[] (const size_t i) const { return Element(parent, static_cast<char*>(ptr) + (i * step)); }
+        Element operator[] (const size_t i) { return Element(parent, static_cast<char*>(ptr) + (i * step)); }
+    };
+
+    /**
+     * Default constructor, creating an empty data object
+    **/
+    NiftiImageData ()
+        : slope(1.0), intercept(0.0), dataPtr(NULL), _datatype(DT_NONE), handler(NULL), _length(0), owner(false) {}
+
+    /**
+     * Primary constructor
+     * @param data A pointer to a pre-allocated data blob, or \c NULL. In the latter case, memory
+     *   will be allocated by the object, and cleaned up at destruction unless it is disowned
+     * @param length The number of elements in the blob
+     * @param datatype The NIfTI datatype code corresponding to the type of the data elements
+     * @param slope The slope parameter to use for data scaling, if any
+     * @param intercept The intercept parameter to use for data scaling, if any
+    **/
+    NiftiImageData (void *data, const size_t length, const int datatype, const double slope = 1.0, const double intercept = 0.0)
+    {
+        init(data, length, datatype, slope, intercept);
+    }
+
+    /**
+     * Convenience constructor for a \c nifti_image
+     * @param image The image struct whose data the object will wrap
+    **/
+    NiftiImageData (nifti_image *image)
+    {
+        if (image == NULL)
+            init(NULL, 0, DT_NONE, 0.0, 0.0, false);
+        else
+            init(image->data, image->nvox, image->datatype, static_cast<double>(image->scl_slope), static_cast<double>(image->scl_inter), false);
+    }
+
+    /**
+     * Copy constructor with optional type conversion
+     * @param source Another \c NiftiImageData object to copy data from
+     * @param datatype The datatype to convert to, or \c DT_NONE, the default, for no conversion.
+     *   If the range of the source data cannot be represented by the chosen type, the slope and
+     *   intercept parameters will be set to adjust the range
+    **/
+    NiftiImageData (const NiftiImageData &source, const int datatype = DT_NONE)
+    {
+        init(NULL, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept);
+
+        if (datatype == DT_NONE || datatype == source.datatype())
+            memcpy(dataPtr, source.dataPtr, source.totalBytes());
+        else
+        {
+            calibrateFrom(source);
+            for (size_t i = 0; i < source.length(); ++i)
+                (*this)[i] = source[i];
+        }
+    }
+
+    /**
+     * Iterator-based constructor
+     * @param from Iterator type representing the start of the source data to be copied
+     * @param to Iterator type representing the end of the source data to be copied
+     * @param datatype The NIfTI datatype to use within the data blob
+    **/
+    template <class InputIterator>
+    NiftiImageData (InputIterator from, InputIterator to, const int datatype)
+    {
+        const size_t length = static_cast<size_t>(std::distance(from, to));
+        init(NULL, length, datatype, 1.0, 0.0);
+        std::copy(from, to, this->begin());
+    }
+
+    /**
+     * Destructor which frees the type handler, and the data blob if it is owned by this object
+    **/
+    virtual ~NiftiImageData ()
+    {
+        delete handler;
+        if (owner)
+            free(dataPtr);
+    }
+
+    /**
+     * Copy assignment operator
+     * @param source Another \c NiftiImageData object, from which the data and metadata are copied
+     * @return A reference to the callee
+    **/
+    NiftiImageData & operator= (const NiftiImageData &source)
+    {
+        if (source.dataPtr != NULL)
+        {
+            // Free the old data, if we allocated it
+            if (owner)
+                free(dataPtr);
+            init(NULL, source.length(), source.datatype(), source.slope, source.intercept);
+            memcpy(dataPtr, source.dataPtr, source.totalBytes());
+        }
+        return *this;
+    }
+
+    void * blob () const             { return dataPtr; }                /**< Return an opaque pointer to the blob */
+    int datatype () const            { return _datatype; }              /**< Return stored datatype code */
+    size_t length () const           { return _length; }                /**< Return the number of elements in the data */
+    size_t size () const             { return _length; }                /**< Return the number of elements in the data */
+
+    /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c NULL */
+    size_t bytesPerPixel () const    { return (handler == NULL ? 0 : handler->size()); }
+
+    /** Return the total size of the data blob, in bytes */
+    size_t totalBytes () const       { return _length * bytesPerPixel(); }
+
+    /**
+     * Determine whether or not the object is empty
+     * @return \c true if the data pointer is \c NULL; \c false otherwise
+    **/
+    bool isEmpty () const            { return (dataPtr == NULL); }
+
+    /**
+     * Determine whether the object uses data scaling
+     * @return \c true if the slope and intercept parameters are set to nontrivial values;
+         \c false otherwise
+    **/
+    bool isScaled () const           { return (slope != 0.0 && (slope != 1.0 || intercept != 0.0)); }
+
+    /**
+     * Determine whether the datatype is complex
+     * @return \c true if the data represents complex floating point values; \c false otherwise
+    **/
+    bool isComplex () const          { return (_datatype == DT_COMPLEX64 || _datatype == DT_COMPLEX128); }
+
+    /**
+     * Determine whether the datatype is floating point
+     * @return \c true if the data represents 32-bit or 64-bit floating point values; \c false
+     *   otherwise
+    **/
+    bool isFloatingPoint () const    { return (_datatype == DT_FLOAT32 || _datatype == DT_FLOAT64); }
+
+    /**
+     * Determine whether the datatype is an integer type
+     * @return \c true if the data represents integers; \c false otherwise
+    **/
+    bool isInteger () const          { return nifti_is_inttype(_datatype); }
+
+    /**
+     * Determine whether the datatype corresponds to an RGB type
+     * @return \c true if the data represents RGB colour values; \c false otherwise
+    **/
+    bool isRgb () const              { return (_datatype == DT_RGB24 || _datatype == DT_RGBA32); }
+
+    /**
+     * Return a similar object to the callee, but with the slope and intercept values reset
+     * @return A new \c NiftiImageData object, pointing to the same memory as the callee
+    **/
+    NiftiImageData unscaled () const { return NiftiImageData(dataPtr, _length, _datatype); }
+
+    /**
+     * Disown the data blob, removing responsibility for freeing it upon destruction
+     * @return A reference to the modified callee
+    **/
+    NiftiImageData & disown ()       { this->owner = false; return *this; }
+
+    /** Obtain a constant iterator corresponding to the start of the blob */
+    const Iterator begin () const { return Iterator(*this); }
+
+    /** Obtain a constant iterator corresponding to the end of the blob */
+    const Iterator end () const { return Iterator(*this, static_cast<char*>(dataPtr) + totalBytes()); }
+
+    /** Obtain a mutable iterator corresponding to the start of the blob */
+    Iterator begin () { return Iterator(*this); }
+
+    /** Obtain a mutable iterator corresponding to the end of the blob */
+    Iterator end () { return Iterator(*this, static_cast<char*>(dataPtr) + totalBytes()); }
+
+    /**
+     * Indexing operator, returning a constant element
+     * @param i Index value, where the first dimension moves fastest
+     * @return Constant element proxy type
+    **/
+    const Element operator[] (const size_t i) const { return Element(*this, static_cast<char*>(dataPtr) + (i * bytesPerPixel())); }
+
+    /**
+     * Indexing operator, returning a mutable element
+     * @param i Index value, where the first dimension moves fastest
+     * @return Mutable element proxy type
+    **/
+    Element operator[] (const size_t i) { return Element(*this, static_cast<char*>(dataPtr) + (i * bytesPerPixel())); }
+
+    /**
+     * Calculate the minimum and maximum values in the blob, as doubles
+     * @param min Pointer to the minimum value (output parameter). Will be set to zero if the
+     *   datatype is unknown or the data is empty
+     * @param max Pointer to the maximum value (output parameter). Will be set to zero if the
+     *   datatype is unknown or the data is empty
+    **/
+    void minmax (double *min, double *max) const
+    {
+        if (handler == NULL)
+        {
+            *min = 0.0;
+            *max = 0.0;
+        }
+        else
+            handler->minmax(dataPtr, _length, min, max);
+    }
+};
+
+
+// R provides an NaN (NA) value for integers
+#ifdef USING_R
+template <>
+inline bool NiftiImageData::ConcreteTypeHandler<int>::hasNaN () const { return true; }
+#endif
+
+
+/**
+ * A simple object-oriented wrapper around a fixed-length array.
+ * @author Jon Clayden (<code@clayden.org>)
+**/
+template <typename ElementType, int Length>
+class Vector
+{
+protected:
+    ElementType elements[Length];
+
+public:
+    /**
+     * Initialise with a fixed element value, defaulting to zero
+    **/
+    Vector (const ElementType value = 0.0)
+    {
+        std::fill(elements, elements + Length, value);
+    }
+
+    /**
+     * Initialise from a C-style array of the appropriate type and length
+    **/
+    Vector (const ElementType * source)
+    {
+        std::copy(source, source + Length, this->elements);
+    }
+
+    /**
+     * Unary negation operator, which reverses the signs of all elements
+    **/
+    Vector<ElementType,Length> operator- () const
+    {
+        Vector<ElementType,Length> result;
+        for (int i=0; i<Length; i++)
+            result.elements[i] = -elements[i];
+        return result;
+    }
+
+    const ElementType & operator[] (const size_t i) const { return elements[i]; }
+
+    ElementType & operator[] (const size_t i) { return elements[i]; }
+};
+
+
+/**
+ * Class representing a numeric square matrix of a fixed order. Provides object-based encapsulation
+ * and version agnosticism for certain NIfTI-relevant matrix operations, as well as other
+ * conveniences such as iterators and conversion to/from R matrices.
+ * @author Jon Clayden (<code@clayden.org>)
+**/
+template <class NiftiType, typename ElementType, int Order>
+class SquareMatrix
+{
+protected:
+    ElementType elements[Order*Order];          /**< The underlying raw data elements, stored row-major for consistency with niftilib */
+
+    /**
+     * Obtain a pointer to a NIfTI-style \c mat44 or \c dmat44 encapsulating the same data as this
+     * object.
+    */
+    NiftiType * niftiPointer () const { return (NiftiType *) elements; }
+
+    /**
+     * Copy the data elements into a new NIfTI-style \c mat44 or \c dmat44.
+    */
+    NiftiType niftiCopy () const
+    {
+        NiftiType value;
+        std::copy(elements, elements + Order*Order, *value.m);
+        return value;
+    }
+
+public:
+    typedef NiftiType NativeType;                                       /**< The niftilib structure type corresponding to this matrix */
+    typedef SquareMatrix<NiftiType,ElementType,Order> MatrixType;       /**< Type alias for the current specialisation */
+    typedef Vector<ElementType,Order> VectorType;                       /**< Type of vectors for which this matrix is a linear operator */
+
+    /**
+     * Initialise with a fixed element value, defaulting to zero
+    **/
+    SquareMatrix (const ElementType value = 0.0)
+    {
+        std::fill(elements, elements + Order*Order, value);
+    }
+
+    /**
+     * Initialise from a C-style array of the appropriate type and length
+    **/
+    SquareMatrix (const ElementType * source)
+    {
+        std::copy(source, source + Order*Order, this->elements);
+    }
+
+    /**
+     * Initialise from the appropriate niftilib type
+    **/
+    SquareMatrix (const NiftiType &source)
+    {
+        const ElementType *castSource = (const ElementType *) *source.m;
+        std::copy(castSource, castSource + Order*Order, this->elements);
+    }
+
+#ifdef USING_R
+    /**
+     * Initialise from an R object representing a numeric matrix
+    **/
+    SquareMatrix (SEXP source)
+    {
+        Rcpp::NumericMatrix matrix(source);
+        if (matrix.cols() != Order && matrix.rows() != Order)
+            throw std::runtime_error("Matrix does not have the expected dimensions");
+        for (int i=0; i<Order; i++)
+        {
+            for (int j=0; j<Order; j++)
+                elements[j + i*Order] = matrix(i,j);
+        }
+    }
+#endif
+
+    /**
+     * Implicit conversion to the corresponding niftilib type, which allows a \c SquareMatrix
+     * object to be used directly in library functions
+    **/
+    operator const NiftiType () const { return niftiCopy(); }
+
+    /**
+     * Implicit conversion to the corresponding niftilib type, which allows a \c SquareMatrix
+     * object to be used directly in library functions
+    **/
+    operator NiftiType () { return niftiCopy(); }
+
+    /**
+     * Return a pointer/iterator to the beginning of the data. Elements are accessed in row-major
+     * order
+    **/
+    const ElementType * begin () const { return elements; }
+
+    /**
+     * Return a pointer/iterator to the beginning of the data. Elements are accessed in row-major
+     * order
+    **/
+    ElementType * begin () { return elements; }
+
+    /**
+     * Return a pointer/iterator to the end of the data
+    **/
+    const ElementType * end () const { return elements + Order*Order; }
+
+    /**
+     * Return a pointer/iterator to the end of the data
+    **/
+    ElementType * end () { return elements + Order*Order; }
+
+    /**
+     * Construct an identity matrix of the appropriate size
+    **/
+    static MatrixType eye ()
+    {
+        MatrixType matrix;
+        for (int i=0; i<Order; i++)
+            matrix.elements[i + i*Order] = 1.0;
+        return matrix;
+    }
+
+    MatrixType inverse () const;                                /**< Matrix inverse */
+    MatrixType polar () const;                                  /**< Polar decomposition, as implemented in niftilib (\c Order 3 only) */
+    ElementType colnorm () const;                               /**< Maximum column norm, as implemented in niftilib (\c Order 3 only) */
+    ElementType rownorm () const;                               /**< Maximum row norm, as implemented in niftilib (\c Order 3 only) */
+    ElementType determ () const;                                /**< Matrix determinant, as implemented in niftilib (\c Order 3 only) */
+    MatrixType multiply (const MatrixType &other) const;        /**< Matrix-matrix multiplication */
+    VectorType multiply (const VectorType &vec) const;          /**< Matrix-vector multiplication */
+
+    MatrixType operator* (const MatrixType &other) const { return multiply(other); }    /**< Matrix-matrix multiplication (infix shorthand) */
+    VectorType operator* (const VectorType &vec) const { return multiply(vec); }        /**< Matrix-vector multiplication (infix shorthand) */
+
+    /**
+     * Indexing operator. The first index is for column, and the second is for row
+    **/
+    const ElementType & operator() (const int i, const int j) const { return elements[j + i*Order]; }
+
+    /**
+     * Indexing operator. The first index is for column, and the second is for row
+    **/
+    ElementType & operator() (const int i, const int j) { return elements[j + i*Order]; }
+
+#ifdef USING_R
+    /**
+     * \c SEXP cast operator, which converts to R's numeric matrix type
+    **/
+    operator SEXP () const
+    {
+        Rcpp::NumericMatrix result(Order, Order);
+        for (int i=0; i<Order; i++)
+        {
+            for (int j=0; j<Order; j++)
+                result(i,j) = elements[j + i*Order];
+        }
+        return result;
+    }
+#endif
+};
+
+
+// Include matrix implementations
+#include "RNifti/NiftiImage_matrix.h"
+
+
+/**
+ * Thin wrapper around a C-style \c nifti_image struct that allows C++-style destruction. Reference
+ * counting is used to allow multiple \c NiftiImage objects to wrap the same \c nifti_image
+ * pointer, akin to a \c std::shared_ptr (but without requiring C++11).
+ * @author Jon Clayden (<code@clayden.org>)
+**/
+class NiftiImage
+{
+public:
+#if RNIFTI_NIFTILIB_VERSION == 1
+    typedef int dim_t;                  /**< Type used for dimension elements */
+    typedef float pixdim_t;             /**< Type used for pixel dimension elements */
+    typedef float scale_t;              /**< Type used for scale elements */
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    typedef int64_t dim_t;              /**< Type used for dimension elements */
+    typedef double pixdim_t;            /**< Type used for pixel dimension elements */
+    typedef double scale_t;             /**< Type used for scale elements */
+#endif
+
+    /**
+     * Inner class referring to a subset of an image. Currently must refer to the last
+     * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image
+    **/
+    struct Block
+    {
+        const NiftiImage &image;        /**< The parent image */
+        const int dimension;            /**< The dimension along which the block applies (which should be the last) */
+        const dim_t index;              /**< The location along \c dimension */
+
+        /**
+         * Standard constructor for this class
+         * @param image The parent image
+         * @param dimension The dimension along which the block applies (which should be the last)
+         * @param index The location along \c dimension
+         * @exception runtime_error If \c dimension is not the last dimension in the image
+        **/
+        Block (const NiftiImage &image, const int dimension, const dim_t index)
+            : image(image), dimension(dimension), index(index)
+        {
+            if (dimension != image->ndim)
+                throw std::runtime_error("Blocks must be along the last dimension in the image");
+        }
+
+        /**
+         * Copy assignment operator, which allows a block in one image to be replaced with
+         * the contents of another image
+         * @param source A \ref NiftiImage, containing the data to replace the block with
+         * @return A reference to the block
+         * @exception runtime_error If the \c source is incompatible with the block in size or
+         * datatype
+        **/
+        Block & operator= (const NiftiImage &source)
+        {
+            if (source->datatype != image->datatype)
+                throw std::runtime_error("New data does not have the same datatype as the target block");
+            if (source->scl_slope != image->scl_slope || source->scl_inter != image->scl_inter)
+                throw std::runtime_error("New data does not have the same scale parameters as the target block");
+
+            size_t blockSize = 1;
+            for (int i=1; i<dimension; i++)
+                blockSize *= image->dim[i];
+
+            if (blockSize != size_t(source->nvox))
+                throw std::runtime_error("New data does not have the same size as the target block");
+
+            blockSize *= image->nbyper;
+            memcpy(static_cast<char*>(image->data) + blockSize*index, source->data, blockSize);
+            return *this;
+        }
+
+        /**
+         * Obtain the data within the block
+         * @return A \c NiftiImageData object encapsulating the data
+        **/
+        NiftiImageData data () const
+        {
+            if (image.isNull())
+                return NiftiImageData();
+            else
+            {
+                size_t blockSize = 1;
+                for (int i=1; i<dimension; i++)
+                    blockSize *= image->dim[i];
+                return NiftiImageData(static_cast<char*>(image->data) + blockSize * index * image->nbyper, blockSize, image->datatype, static_cast<double>(image->scl_slope), static_cast<double>(image->scl_inter));
+            }
+        }
+
+        /**
+         * Extract a vector of data from a block, casting it to any required element type
+         * @param useSlope If \c true, the default, then the data will be adjusted for the slope
+         * and intercept stored with the image, if any
+         * @note If the slope and intercept are applied, there is no guarantee that the adjusted
+         * values will fit within the requested type. No check is made for this
+        **/
+        template <typename TargetType>
+        std::vector<TargetType> getData (const bool useSlope = true) const;
+    };
+
+    /**
+     * Inner class wrapping a NIfTI extension, a weakly-specified standard for attaching additional
+     * metadata to NIfTI-1 and NIfTI-2 images.
+    **/
+    class Extension
+    {
+    protected:
+        nifti1_extension *ext;          /**< The wrapped extension structure */
+
+        /**
+         * Copy an existing \c nifti1_extension structure into the object
+         * @param source A pointer to a \c nifti1_extension
+        **/
+        void copy (const nifti1_extension *source);
+
+        /**
+         * Copy the specified data buffer into the object
+         * @param data An array of data
+         * @param length The number of elements in \c data
+         * @param code The extension code to associate with the data
+        **/
+        template <typename SourceType>
+        void copy (const SourceType *data, const size_t length, const int code);
+
+    public:
+        /**
+         * Default constructor, wrapping \c NULL
+        **/
+        Extension ()
+            : ext(NULL) {}
+
+        /**
+         * Initialise from an existing \c nifti1_extension (which is used by both NIfTI-1 and
+         * NIfTI-2 images), optionally copying the contents
+         * @param extension A pointer to a \c nifti1_extension
+         * @param copy If \c true, the contents of the extension are copied; otherwise the pointer
+         * is wrapped directly
+        **/
+        Extension (nifti1_extension * const extension, const bool copy = false)
+        {
+            if (!copy || extension == NULL)
+                this->ext = extension;
+            else
+                this->copy(extension);
+        }
+
+        /**
+         * Copy constructor
+         * @param source Another \c Extension object
+        **/
+        Extension (const Extension &source)
+        {
+            copy(source.ext);
+        }
+
+        /**
+         * Construct the object from its constituent parts
+         * @param data An array of data
+         * @param length The number of elements in \c data
+         * @param code The extension code to associate with the data
+        **/
+        template <typename SourceType>
+        Extension (const SourceType *data, const size_t length, const int code)
+        {
+            copy(data, length, code);
+        }
+
+#ifdef USING_R
+        /**
+         * Construct the object from an atomic R object, copying the data into a new extension
+         * @param source An R object, which should be of an atomic type (integer, double,
+         * character, etc.)
+         * @param code The extension code to associate with the data. If -1, the default, a
+         * \c code attribute will be used, if available
+        **/
+        Extension (SEXP source, int code = -1)
+        {
+            const Rcpp::RObject object(source);
+            if (code == -1 && object.hasAttribute("code"))
+                code = Rcpp::as<int>(object.attr("code"));
+
+            switch (object.sexp_type())
+            {
+                case RAWSXP:  copy(RAW(source), Rf_length(source), code);       break;
+                case REALSXP: copy(REAL(source), Rf_length(source), code);      break;
+                case CPLXSXP: copy(COMPLEX(source), Rf_length(source), code);   break;
+                case INTSXP:  copy(INTEGER(source), Rf_length(source), code);   break;
+                case LGLSXP:  copy(LOGICAL(source), Rf_length(source), code);   break;
+                case STRSXP:
+                {
+                    if (Rf_length(source) > 1)
+                        Rf_warning("Character vector elements after the first will not be stored in a NIfTI extension");
+                    const char *string = CHAR(STRING_ELT(source, 0));
+                    copy(string, strlen(string), code);
+                    break;
+                }
+                default: Rf_error("Unable to convert SEXP type %d to NIfTI extension", object.sexp_type());
+            }
+        }
+#endif
+
+        /**
+         * Return the code associated with the extension
+         * @return An integer code giving the relevant code, or -1 if the extension is \c NULL
+        **/
+        int code () const { return (ext == NULL ? -1 : ext->ecode); }
+
+        /**
+         * Return the data blob associated with the extension
+         * @return The data, as a byte array
+        **/
+        const char * data () const { return (ext == NULL ? NULL : ext->edata); }
+
+        /**
+         * Return the length of the data array
+         * @return The length of the data array, in bytes
+        **/
+        size_t length () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
+
+        /**
+         * Return the length of the data array
+         * @return The length of the data array, in bytes
+        **/
+        size_t size () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
+
+#ifdef USING_R
+        /**
+         * \c SEXP cast operator, which converts to R's raw vector type
+        **/
+        operator SEXP () const
+        {
+            if (ext == NULL || ext->esize < 8)
+                return R_NilValue;
+
+            const int length = ext->esize - 8;
+            Rcpp::RawVector result(length);
+            const Rbyte *source = (const Rbyte *) ext->edata;
+            std::copy(source, source+length, result.begin());
+            result.attr("code") = ext->ecode;
+            return result;
+        }
+#endif
+    };
+
+    /**
+     * Inner class representing an xform matrix, which indicates the orientation and other spatial
+     * properties of an image. Specifically, an xform is an affine transformation in 3D space,
+     * representing the conversion from the image's coordinate system to canonical "real-world"
+     * space. The header file \c nifti1.h contains authoritative documentation.
+    **/
+    class Xform
+    {
+    public:
+#if RNIFTI_NIFTILIB_VERSION == 1
+        typedef float Element;                                      /**< Scalar element type */
+        typedef Vector<float,4> Vector4;                            /**< 4-element vector type */
+        typedef Vector<float,3> Vector3;                            /**< 3-element vector type */
+        typedef SquareMatrix<mat44,float,4> Matrix;                 /**< 4x4 matrix type */
+        typedef SquareMatrix<mat33,float,3> Submatrix;              /**< 3x3 matrix type */
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        typedef double Element;                                     /**< Scalar element type */
+        typedef Vector<double,4> Vector4;                           /**< 4-element vector type */
+        typedef Vector<double,3> Vector3;                           /**< 3-element vector type */
+        typedef SquareMatrix<nifti_dmat44,double,4> Matrix;         /**< 4x4 matrix type */
+        typedef SquareMatrix<nifti_dmat33,double,3> Submatrix;      /**< 3x3 matrix type */
+#endif
+
+    protected:
+        Element *forward, *inverse, *qparams;                       /**< Pointers to linked C-style arrays */
+        Matrix mat;                                                 /**< The full xform matrix underpinning this object */
+
+        /**
+         * Replace the current matrix with a new one. This function propagates the changes to the
+         * linked arrays, if they are not \c NULL.
+        **/
+        void replace (const Matrix &source);
+
+    public:
+        /**
+         * Default constructor
+        **/
+        Xform ()
+            : forward(NULL), inverse(NULL), qparams(NULL), mat() {}
+
+        /**
+         * Initialise from a 4x4 \ref SquareMatrix
+        **/
+        Xform (const Matrix &source)
+            : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {}
+
+        /**
+         * Initialise from a constant NIfTI \c mat44 or \c dmat44
+        **/
+        Xform (const Matrix::NativeType &source)
+            : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {}
+
+        /**
+         * Initialise from a NIfTI \c mat44 or \c dmat44. The data in the linked matrix will be
+         * replaced if this object is updated.
+        **/
+        Xform (Matrix::NativeType &source)
+            : forward(*source.m), inverse(NULL), qparams(NULL), mat(source) {}
+
+        /**
+         * Initialise from forward and backward matrices, and optionally quaternion parameters.
+         * These will all be linked to the new object and replaced if it is updated.
+        **/
+        Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = NULL)
+            : forward(*source.m), inverse(*inverse.m), qparams(qparams), mat(source) {}
+
+#ifdef USING_R
+        /**
+         * Initialise from an R numeric matrix object
+        **/
+        Xform (SEXP source)
+            : forward(NULL), inverse(NULL), qparams(NULL), mat(Matrix(source)) {}
+#endif
+
+        /**
+         * Allows an \c Xform to be treated as a constant NIfTI matrix implicitly, making it
+         * directly compatible with API functions
+        **/
+        operator const Matrix::NativeType () const { return mat; }
+
+        /**
+         * Allows an \c Xform to be treated as a NIfTI matrix implicitly, making it directly
+         * compatible with API functions
+        **/
+        operator Matrix::NativeType () { return mat; }
+
+        /**
+         * Copy assignment operator, taking an \c Xform and replacing linked data
+        **/
+        Xform & operator= (const Xform &source)
+        {
+            replace(source.mat);
+            return *this;
+        }
+
+        /**
+         * Copy assignment operator, taking a \c SquareMatrix and replacing linked data
+        **/
+        Xform & operator= (const Matrix &source)
+        {
+            replace(source);
+            return *this;
+        }
+
+#ifdef USING_R
+        /**
+         * Copy assignment operator, taking a \c SEXP and replacing linked data
+        **/
+        Xform & operator= (SEXP source)
+        {
+            replace(Matrix(source));
+            return *this;
+        }
+#endif
+
+        /**
+         * Access the xform matrix as an immutable \c SquareMatrix object
+        **/
+        const Matrix & matrix () const { return mat; }
+
+        /**
+         * Obtain the upper left 3x3 submatrix from the xform matrix
+        **/
+        Submatrix submatrix () const;
+
+        /**
+         * Obtain the 3x3 rotation matrix from the xform matrix, with scale and skew components
+         * removed
+        **/
+        Submatrix rotation () const;
+
+        /**
+         * Returns the \c qfac value, which should be 1 where the xform matrix represents a
+         * right-handed coordinate system (like \c RAS, the NIfTI default) and -1 for a left-handed
+         * system (like \c LAS, the ANALYZE default). Also see the \ref orientation method
+        **/
+        Element handedness () const;
+
+        /**
+         * Obtain the quaternion representation of the xform's rotation component
+        **/
+        Vector4 quaternion () const;
+
+        /**
+         * Obtain the translation component of the xform matrix
+        **/
+        Vector3 offset () const;
+
+        /**
+         * Obtain the pixel spacing of the image in each spatial dimension
+        **/
+        Vector3 spacing () const;
+
+        /**
+         * Obtain the approximate orientation of the image's coordinate frame, as a three-character
+         * string consisting of some permutation of the letters \c L or \c R (for left or right),
+         * \c P or \c A (for posterior or anterior) and \c I or \c S (for inferior or superior).
+         * These give the canonical axes most closely aligned with each of the three dimensions as
+         * stored
+        **/
+        std::string orientation () const;
+    };
+
+#ifdef USING_R
+    /**
+     * Convert between R \c SEXP object type and \c nifti_image datatype codes
+     * @param sexpType A numeric R \c SEXP type code
+     * @return A \c nifti_image datatype code
+     * @exception runtime_error If a non-numeric type is passed
+    **/
+    static int sexpTypeToNiftiType (const int sexpType)
+    {
+        if (sexpType == INTSXP || sexpType == LGLSXP)
+            return DT_INT32;
+        else if (sexpType == REALSXP)
+            return DT_FLOAT64;
+        else if (sexpType == CPLXSXP)
+            return DT_COMPLEX128;
+        else
+            throw std::runtime_error("Array elements must be numeric");
+    }
+#endif
+
+    /**
+     * Get the NIfTI format version used by the file at the specified path
+     * @param path A string specifying a file path
+     * @return An integer: -1 if the file is not present or not valid, 0 for ANALYZE-7.5, or
+     *         a value greater than 0 for NIfTI
+    **/
+    static int fileVersion (const std::string &path);
+
+
+protected:
+    nifti_image *image;         /**< The wrapped \c nifti_image pointer */
+    int *refCount;              /**< A reference counter, shared with other objects wrapping the same pointer */
+
+    /**
+     * Acquire the specified pointer to a \c nifti_image \c struct, taking (possibly shared)
+     * responsibility for freeing the associated memory. If the object currently wraps another
+     * pointer, it will be released
+     * @param image The pointer to wrap
+    **/
+    void acquire (nifti_image * const image);
+
+    /**
+     * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count
+     * @param source A reference to a \c NiftiImage
+    **/
+    void acquire (const NiftiImage &source)
+    {
+        refCount = source.refCount;
+        acquire(source.image);
+    }
+
+    /**
+     * Release the currently wrapped pointer, if it is not \c NULL, decrementing the reference
+     * count and releasing memory if there are no remaining references to the pointer
+    **/
+    void release ();
+
+    /**
+     * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer
+     * @param source A pointer to a \c nifti_image
+    **/
+    void copy (const nifti_image *source);
+
+    /**
+     * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer
+     * @param source A reference to a \c NiftiImage
+    **/
+    void copy (const NiftiImage &source);
+
+    /**
+     * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
+     * @param source A reference to a \ref Block
+    **/
+    void copy (const Block &source);
+
+
+#ifdef USING_R
+
+    /**
+     * Initialise the object from an S4 object of class \c "nifti"
+     * @param object The source object
+     * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted
+    **/
+    void initFromNiftiS4 (const Rcpp::RObject &object, const bool copyData = true);
+
+    /**
+     * Initialise the object from a reference object of class \c "MriImage"
+     * @param object The source object
+     * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted
+    **/
+    void initFromMriImage (const Rcpp::RObject &object, const bool copyData = true);
+
+    /**
+     * Initialise the object from an R list with named elements, which can only contain metadata
+     * @param object The source object
+    **/
+    void initFromList (const Rcpp::RObject &object);
+
+    /**
+     * Initialise the object from an R array
+     * @param object The source object
+     * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted
+    **/
+    void initFromArray (const Rcpp::RObject &object, const bool copyData = true);
+
+#endif
+
+    /**
+     * Initialise an empty object from basic metadata
+     * @param dim A vector of image dimensions
+     * @param datatype A datatype code for the image data
+    **/
+    void initFromDims (const std::vector<dim_t> &dim, const int datatype);
+
+    /**
+     * Modify the pixel dimensions, and potentially the xform matrices to match
+     * @param pixdim Vector of new pixel dimensions
+    **/
+    void updatePixdim (const std::vector<pixdim_t> &pixdim);
+
+    /**
+     * Modify the pixel dimension units
+     * @param pixunits Vector of new pixel units, specified using their standard abbreviations
+    **/
+    void setPixunits (const std::vector<std::string> &pixunits);
+
+public:
+    /**
+     * Default constructor
+    **/
+    NiftiImage ()
+        : image(NULL), refCount(NULL) {}
+
+    /**
+     * Copy constructor
+     * @param source Another \c NiftiImage object
+     * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new
+     * object wraps the same \c nifti_image and increments the shared reference count
+    **/
+    NiftiImage (const NiftiImage &source, const bool copy = true)
+        : image(NULL), refCount(NULL)
+    {
+        if (copy)
+            this->copy(source);
+        else
+            acquire(source);
+#ifndef NDEBUG
+        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+    }
+
+    /**
+     * Initialise from a block, copying in the data
+     * @param source A \c Block object, referring to part of another \c NiftiImage
+    **/
+    NiftiImage (const Block &source)
+        : image(NULL), refCount(NULL)
+    {
+        this->copy(source);
+#ifndef NDEBUG
+        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+    }
+
+    /**
+     * Initialise using an existing \c nifti_image pointer
+     * @param image An existing \c nifti_image pointer, possibly \c NULL
+     * @param copy If \c true, the image data will be copied; otherwise this object just wraps
+     * the pointer passed to it
+    **/
+    NiftiImage (nifti_image * const image, const bool copy = false)
+        : image(NULL), refCount(NULL)
+    {
+        if (copy)
+            this->copy(image);
+        else
+            acquire(image);
+#ifndef NDEBUG
+        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from pointer)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+    }
+
+    /**
+     * Initialise from basic metadata, allocating and zeroing pixel data
+     * @param dim A vector of image dimensions
+     * @param datatype A datatype code for the image data
+    **/
+    NiftiImage (const std::vector<dim_t> &dim, const int datatype);
+
+    /**
+     * Initialise from basic metadata, allocating and zeroing pixel data
+     * @param dim A vector of image dimensions
+     * @param datatype A datatype string for the image data
+    **/
+    NiftiImage (const std::vector<dim_t> &dim, const std::string &datatype);
+
+    /**
+     * Initialise using a path string
+     * @param path A string specifying a path to a valid NIfTI-1 file, possibly gzipped
+     * @param readData If \c true, the data will be read as well as the metadata
+     * @exception runtime_error If reading from the file fails
+    **/
+    NiftiImage (const std::string &path, const bool readData = true);
+
+    /**
+     * Initialise using a path string and sequence of required volumes
+     * @param path A string specifying a path to a valid NIfTI-1 file, possibly gzipped
+     * @param volumes The volumes to read in (squashing all dimensions above the third together)
+     * @exception runtime_error If reading from the file fails, or \c volumes is empty
+    **/
+    NiftiImage (const std::string &path, const std::vector<dim_t> &volumes);
+
+#ifdef USING_R
+    /**
+     * Initialise from an R object, retrieving an existing image from an external pointer attribute
+     * if available; otherwise constructing a new one from the R object itself
+     * @param object The source object
+     * @param readData If \c true, the data will be retrieved as well as the metadata
+     * @param readOnly If \c true, the caller asserts that its intent is read-only. Otherwise, if
+     * the \c SEXP may have multiple names at the R level (according to the \c MAYBE_SHARED R
+     * macro), an image retrieved from an external pointer will be duplicated to preserve R's usual
+     * semantics
+    **/
+    NiftiImage (const SEXP object, const bool readData = true, const bool readOnly = false);
+#endif
+
+    /**
+     * Destructor which decrements the reference counter, and releases the wrapped pointer if the
+     * counter drops to zero
+    **/
+    virtual ~NiftiImage () { release(); }
+
+    /**
+     * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image
+    **/
+    operator const nifti_image* () const { return image; }
+
+    /**
+     * Allows a \c NiftiImage object to be treated as a pointer to a \c nifti_image
+    **/
+    operator nifti_image* () { return image; }
+
+    /**
+     * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image
+    **/
+    const nifti_image * operator-> () const { return image; }
+
+    /**
+     * Allows a \c NiftiImage object to be treated as a pointer to a \c nifti_image
+    **/
+    nifti_image * operator-> () { return image; }
+
+    /**
+     * Copy assignment operator, which copies from its argument
+     * @param source Another \c NiftiImage
+    **/
+    NiftiImage & operator= (const NiftiImage &source)
+    {
+        copy(source);
+#ifndef NDEBUG
+        Rc_printf("Creating NiftiImage (v%d), with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+        return *this;
+    }
+
+    /**
+     * Copy assignment operator, which allows a block to be used to replace the contents of a
+     * suitably sized image
+     * @param source A reference to a suitable \ref Block object
+    **/
+    NiftiImage & operator= (const Block &source)
+    {
+        copy(source);
+#ifndef NDEBUG
+        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+        return *this;
+    }
+
+    /**
+     * Mark the image as persistent, so that it can be passed back to R
+     * @param persistent The new persistence state of the object
+     * @return A reference to the callee.
+     * @deprecated The persistence mechanism has been replaced with reference counting, so this
+     * function no longer has any effect. Instead it returns \c *this, unmodified.
+    **/
+    NiftiImage & setPersistence (const bool persistent) { return *this; }
+
+    /**
+     * Determine whether or not the wrapped pointer is \c NULL
+     * @return \c true if the wrapped pointer is \c NULL; \c false otherwise
+    **/
+    bool isNull () const { return (image == NULL); }
+
+    /**
+     * Determine whether the wrapped pointer is shared with another \c NiftiImage
+     * @return \c true if the reference count is greater than 1; \c false otherwise
+    **/
+    bool isShared () const { return (refCount != NULL && *refCount > 1); }
+
+    /**
+     * Determine whether or not the image is marked as persistent
+     * @return \c false, always
+     * @deprecated The persistence mechanism has been replaced with reference counting, so this
+     * function will always return \c false. Use \ref isShared instead.
+    **/
+    bool isPersistent () const { return false; }
+
+    /**
+     * Determine whether nontrivial scale and slope parameters are set
+     * @return \c true if the object wraps an image pointer, its slope is not zero and the slope
+     *         and intercept are not exactly one and zero; \c false otherwise
+    **/
+    bool isDataScaled () const { return (image != NULL && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); }
+
+    /**
+     * Return the number of dimensions in the image
+     * @return An integer giving the image dimensionality
+    **/
+    int nDims () const
+    {
+        if (image == NULL)
+            return 0;
+        else
+            return image->ndim;
+    }
+
+    /**
+     * Return the dimensions of the image
+     * @return A vector of integers giving the width in each dimension
+    **/
+    std::vector<dim_t> dim () const
+    {
+        if (image == NULL)
+            return std::vector<dim_t>();
+        else
+            return std::vector<dim_t>(image->dim+1, image->dim+image->ndim+1);
+    }
+
+    /**
+     * Return the dimensions of the pixels or voxels in the image
+     * @return A vector of floating-point values giving the pixel width in each dimension
+    **/
+    std::vector<pixdim_t> pixdim () const
+    {
+        if (image == NULL)
+            return std::vector<pixdim_t>();
+        else
+            return std::vector<pixdim_t>(image->pixdim+1, image->pixdim+image->ndim+1);
+    }
+
+    /**
+     * Drop unitary dimensions
+     * @return Self, after possibly reducing the dimensionality of the image
+     * @note This function differs from its R equivalent in only dropping unitary dimensions after
+     * the last nonunitary one
+    **/
+    NiftiImage & drop ()
+    {
+        int ndim = image->ndim;
+        while (image->dim[ndim] < 2)
+            ndim--;
+        image->dim[0] = image->ndim = ndim;
+
+        return *this;
+    }
+
+    /**
+     * Obtain the pixel data within the image
+     * @return A constant \c NiftiImageData object encapsulating the data
+    **/
+    const NiftiImageData data () const { return NiftiImageData(image); }
+
+    /**
+     * Obtain the pixel data within the image
+     * @return A mutable \c NiftiImageData object encapsulating the data
+    **/
+    NiftiImageData data () { return NiftiImageData(image); }
+
+    /**
+     * Extract a vector of data from the image, casting it to any required element type
+     * @param useSlope If \c true, the default, then the data will be adjusted for the slope and
+     * intercept stored with the image, if any
+     * @return A vector of data values, cast to the required type
+     * @note If the slope and intercept are applied, there is no guarantee that the adjusted values
+     * will fit within the requested type. No check is made for this
+     * @deprecated Use of the (ultimately more flexible) \ref data methods is now preferred
+    **/
+    template <typename TargetType>
+    std::vector<TargetType> getData (const bool useSlope = true) const;
+
+    /**
+     * Change the datatype of the image, casting the pixel data if present
+     * @param datatype A NIfTI datatype code
+     * @param useSlope If \c true, and conversion is to an integer type, the data will be rescaled
+     * and the image's slope and intercept set to capture the full range of original values
+     * @return Self, after changing the datatype
+    **/
+    NiftiImage & changeDatatype (const int datatype, const bool useSlope = false);
+
+    /**
+     * Change the datatype of the image, casting the pixel data if present
+     * @param datatype A string specifying the new datatype
+     * @param useSlope If \c true, and conversion is to an integer type, the data will be rescaled
+     * and the image's slope and intercept set to capture the full range of original values
+     * @return Self, after changing the datatype
+    **/
+    NiftiImage & changeDatatype (const std::string &datatype, const bool useSlope = false);
+
+    /**
+     * Replace the pixel data in the image with the contents of a vector
+     * @param data A data vector, whose elements will be used to replace the image data
+     * @param datatype The final datatype required. By default the existing datatype of the image
+     * is used
+     * @exception runtime_error If the length of the new data does not match the image
+     * @return Self, after replacing the data
+    **/
+    template <typename SourceType>
+    NiftiImage & replaceData (const std::vector<SourceType> &data, const int datatype = DT_NONE);
+
+    /**
+     * Replace the pixel data in the image with the contents of a \c NiftiImageData object
+     * @param data A data object, whose elements will be case to match the datatype of the image
+     * @exception runtime_error If the length of the new data does not match the image
+     * @return Self, after replacing the data
+    **/
+    NiftiImage & replaceData (const NiftiImageData &data);
+
+    /**
+     * Drop the data from the image, retaining only the metadata. This method invalidates any
+     * \ref NiftiImageData objects referencing the old data
+     * @return Self, after dropping the data
+    **/
+    NiftiImage & dropData ()
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_image_unload(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti2_image_unload(image);
+#endif
+        return *this;
+    }
+
+    /**
+     * Rescale the image, changing its image dimensions and pixel dimensions
+     * @param scales Vector of scale factors along each dimension
+     * @return Self, after rescaling the metadata
+     * @note No interpolation is performed on the pixel data, which is simply dropped
+    **/
+    NiftiImage & rescale (const std::vector<pixdim_t> &scales);
+
+    /**
+     * Reorient the image by permuting dimensions and potentially reversing some
+     * @param i,j,k Constants such as \c NIFTI_L2R, \c NIFTI_P2A and \c NIFTI_I2S, giving the
+     * canonical axes to reorient to
+     * @return Self, after reorientation
+     * @note The pixel data is reordered, but not resampled. The xform matrices will also be
+     * adjusted in line with the transformation
+    **/
+    NiftiImage & reorient (const int i, const int j, const int k);
+
+    /**
+     * Reorient the image by permuting dimensions and potentially reversing some
+     * @param orientation A string containing some permutation of the letters \c L or \c R,
+     * \c P or \c A, \c I or \c S, giving the canonical axes to reorient to
+     * @return Self, after reorientation
+     * @note The pixel data is reordered, but not resampled. The xform matrices will also be
+     * adjusted in line with the transformation
+    **/
+    NiftiImage & reorient (const std::string &orientation);
+
+#ifdef USING_R
+    /**
+     * Update the image from an R array
+     * @param object An R array or list object
+     * @return Self, after updating data and/or metadata
+    **/
+    NiftiImage & update (const Rcpp::RObject &object);
+#endif
+
+    /**
+     * Obtain an xform matrix, indicating the orientation of the image
+     * @param preferQuaternion If \c true, use the qform matrix in preference to the sform;
+     * otherwise prefer the sform
+     * @return An \ref Xform object
+    **/
+    const Xform xform (const bool preferQuaternion = true) const;
+
+    /**
+     * Access the qform matrix
+     * @return An \ref Xform object
+    **/
+    const Xform qform () const { return (image == NULL ? Xform() : Xform(image->qto_xyz)); }
+
+    /**
+     * Access the qform matrix
+     * @return An \ref Xform object
+    **/
+    Xform qform () { return (image == NULL ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); }
+
+    /**
+     * Access the sform matrix
+     * @return An \ref Xform object
+    **/
+    const Xform sform () const { return (image == NULL ? Xform() : Xform(image->sto_xyz)); }
+
+    /**
+     * Access the sform matrix
+     * @return An \ref Xform object
+    **/
+    Xform sform () { return (image == NULL ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); }
+
+    /**
+     * Return the number of blocks in the image
+     * @return An integer giving the number of blocks in the image
+    **/
+    dim_t nBlocks () const { return (image == NULL ? 0 : image->dim[image->ndim]); }
+
+    /**
+     * Extract a block from the image
+     * @param i The block number required
+     * @return A \ref Block object
+     * @note \ref slice and \ref volume are variants of this function specific to 3D and 4D images,
+     * respectively, which may be preferred in some cases for clarity
+    **/
+    const Block block (const int i) const { return Block(*this, nDims(), i); }
+
+    /**
+     * Extract a block from the image
+     * @param i The block number required
+     * @return A \ref Block object
+     * @note \ref slice and \ref volume are variants of this function specific to 3D and 4D images,
+     * respectively, which may be preferred in some cases for clarity
+    **/
+    Block block (const int i) { return Block(*this, nDims(), i); }
+
+    /**
+     * Extract a slice block from a 3D image
+     * @param i The slice number required
+     * @return A \ref Block object
+    **/
+    const Block slice (const int i) const { return Block(*this, 3, i); }
+
+    /**
+     * Extract a slice block from a 3D image
+     * @param i The slice number required
+     * @return A \ref Block object
+    **/
+    Block slice (const int i) { return Block(*this, 3, i); }
+
+    /**
+     * Extract a volume block from a 4D image
+     * @param i The volume number required
+     * @return A \ref Block object
+    **/
+    const Block volume (const int i) const { return Block(*this, 4, i); }
+
+    /**
+     * Extract a volume block from a 4D image
+     * @param i The volume number required
+     * @return A \ref Block object
+    **/
+    Block volume (const int i) { return Block(*this, 4, i); }
+
+    /**
+     * Return the number of colour channels used by the image
+     * @return An integer giving the number of channels: generally 1, exception for RGB datatypes,
+     * which have 3 or 4, or the empty datatype, which has 0. Also 0 for null images
+    **/
+    int nChannels () const
+    {
+        if (image == NULL)
+            return 0;
+        else
+        {
+            switch (image->datatype)
+            {
+                case DT_NONE:   return 0;
+                case DT_RGB24:  return 3;
+                case DT_RGBA32: return 4;
+                default:        return 1;
+            }
+        }
+    }
+
+    /**
+     * Return the number of voxels in the image
+     * @return An integer giving the number of voxels in the image
+    **/
+    size_t nVoxels () const { return (image == NULL ? 0 : image->nvox); }
+
+    /**
+     * Return the number of extensions associated with the image
+     * @return An integer giving the number of extensions
+    **/
+    int nExtensions () const { return (image == NULL ? 0 : image->num_ext); }
+
+    /**
+     * Return a list of the extensions associated with the image
+     * @param code Integer specifying the code corresponding to the extensions required. If -1, the
+     * default, all extensions are returned. There may be more than one extension with a given code
+     * @return A list of \ref Extension objects
+    **/
+    std::list<Extension> extensions (const int code = -1) const
+    {
+        if (image == NULL)
+            return std::list<Extension>();
+        else
+        {
+            std::list<Extension> result;
+            for (int i=0; i<image->num_ext; i++)
+            {
+                const Extension extension(image->ext_list + i);
+                if (code < 0 || code == extension.code())
+                    result.push_back(extension);
+            }
+            return result;
+        }
+    }
+
+    /**
+     * Add an extension to the image
+     * @param The new image extension, an \ref Extension object
+     * @return Self, with the extension appended
+    **/
+    NiftiImage & addExtension (const Extension &extension)
+    {
+        if (image != NULL)
+#if RNIFTI_NIFTILIB_VERSION == 1
+            nifti_add_extension(image, extension.data(), int(extension.length()), extension.code());
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            nifti2_add_extension(image, extension.data(), int(extension.length()), extension.code());
+#endif
+        return *this;
+    }
+
+    /**
+     * Replace all extensions with new ones
+     * @param A list of \ref Extension objects
+     * @return Self, with the new extensions attached
+    **/
+    NiftiImage & replaceExtensions (const std::list<Extension> extensions)
+    {
+        dropExtensions();
+        for (std::list<Extension>::const_iterator it=extensions.begin(); it!=extensions.end(); ++it)
+            addExtension(*it);
+        return *this;
+    }
+
+    /**
+     * Remove any extensions from the image
+     * @return Self, with extensions removed
+    **/
+    NiftiImage & dropExtensions ()
+    {
+        if (image != NULL)
+#if RNIFTI_NIFTILIB_VERSION == 1
+            nifti_free_extensions(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            nifti2_free_extensions(image);
+#endif
+        return *this;
+    }
+
+    /**
+     * Write the image to a NIfTI-1 file
+     * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz")
+     * @param datatype The datatype to use when writing the file
+     * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case
+     * the file name is used to determine the file type
+     * @return A pair of strings, giving the final header and image paths in that order
+    **/
+    std::pair<std::string,std::string> toFile (const std::string fileName, const int datatype = DT_NONE, const int filetype = -1) const;
+
+    /**
+     * Write the image to a NIfTI-1 file
+     * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz")
+     * @param datatype The datatype to use when writing the file, or "auto"
+     * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case
+     * the file name is used to determine the file type
+     * @return A pair of strings, giving the final header and image paths in that order
+    **/
+    std::pair<std::string,std::string> toFile (const std::string fileName, const std::string &datatype, const int filetype = -1) const;
+
+#ifdef USING_R
+
+    /**
+     * Create an R array from the image
+     * @return A numeric array object with an external pointer attribute
+    **/
+    Rcpp::RObject toArray () const;
+
+    /**
+     * Create an internal image to pass back to R
+     * @param label A string labelling the image
+     * @return An R character string with additional attributes
+    **/
+    Rcpp::RObject toPointer (const std::string label) const;
+
+    /**
+     * A conditional method that calls either \ref toArray or \ref toPointer
+     * @param internal If \c true, \ref toPointer will be called; otherwise \ref toArray
+     * @param label A string labelling the image
+     * @return An R object
+    **/
+    Rcpp::RObject toArrayOrPointer (const bool internal, const std::string label) const;
+
+#endif
+
+};
+
+// Include image implementations
+#include "RNifti/NiftiImage_impl.h"
+
+} // main namespace
+
+#endif
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
new file mode 100644
index 00000000..2e7c6b7a
--- /dev/null
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -0,0 +1,1882 @@
+#ifndef _NIFTI_IMAGE_IMPL_H_
+#define _NIFTI_IMAGE_IMPL_H_
+
+namespace internal {
+
+// A poor man's NaN check, but should work whenever proper IEEE arithmetic is being used
+template <typename Type>
+inline bool isNaN (const Type x) { return (x != x); }
+
+#ifdef USING_R
+// R offers the portable ISNAN macro for doubles, which is more robust
+// Note that this tests for NaN and NA values
+template <>
+inline bool isNaN<double> (const double x) { return bool(ISNAN(x)); }
+
+// For R specifically, we have to catch NA_INTEGER (a.k.a. INT_MIN)
+template <>
+inline bool isNaN<int> (const int x) { return (x == NA_INTEGER); }
+
+template <>
+inline bool isNaN<rgba32_t> (const rgba32_t x) { return (x.value.packed == NA_INTEGER); }
+
+// Specifically test for missingness - this is only relevant for R, and only when the distinction from NaN is important
+template <typename Type>
+inline bool isNA (const Type x) { return false; }
+
+template <>
+inline bool isNA<int> (const int x) { return (x == NA_INTEGER); }
+
+template <>
+inline bool isNA<double> (const double x) { return ISNA(x); }
+#endif
+
+template <typename Type>
+inline bool lessThan (Type a, Type b) { return (!isNaN(a) && !isNaN(b) && a < b); }
+
+inline double roundEven (const double value)
+{
+    if (isNaN(value))
+        return value;
+
+    double whole;
+    double frac = std::fabs(std::modf(value, &whole));
+    double sign = (value < 0.0 ? -1.0 : 1.0);
+
+    if (frac < 0.5)
+        return whole;
+    else if (frac > 0.5)
+        return whole + sign;
+    else if (std::fmod(whole, 2.0) < 0.0001)
+        return whole;
+    else
+        return whole + sign;
+}
+
+inline int stringToDatatype (const std::string &datatype)
+{
+    static std::map<std::string,int> datatypeCodes;
+    if (datatypeCodes.empty())
+    {
+        datatypeCodes["auto"] = DT_NONE;
+        datatypeCodes["none"] = DT_NONE;
+        datatypeCodes["unknown"] = DT_NONE;
+        datatypeCodes["uint8"] = DT_UINT8;
+        datatypeCodes["char"] = DT_UINT8;
+        datatypeCodes["int16"] = DT_INT16;
+        datatypeCodes["short"] = DT_INT16;
+        datatypeCodes["int32"] = DT_INT32;
+        datatypeCodes["int"] = DT_INT32;
+        datatypeCodes["float32"] = DT_FLOAT32;
+        datatypeCodes["float"] = DT_FLOAT32;
+        datatypeCodes["float64"] = DT_FLOAT64;
+        datatypeCodes["double"] = DT_FLOAT64;
+        datatypeCodes["int8"] = DT_INT8;
+        datatypeCodes["uint16"] = DT_UINT16;
+        datatypeCodes["uint32"] = DT_UINT32;
+        datatypeCodes["int64"] = DT_INT64;
+        datatypeCodes["uint64"] = DT_UINT64;
+        datatypeCodes["complex64"] = DT_COMPLEX64;
+        datatypeCodes["complex128"] = DT_COMPLEX128;
+        datatypeCodes["complex"] = DT_COMPLEX128;
+        datatypeCodes["rgb24"] = DT_RGB24;
+        datatypeCodes["rgb"] = DT_RGB24;
+        datatypeCodes["rgba32"] = DT_RGBA32;
+        datatypeCodes["rgba"] = DT_RGBA32;
+    }
+
+    std::locale locale;
+    std::string lowerCaseDatatype = datatype;
+    for (std::string::size_type i=0; i<lowerCaseDatatype.length(); i++)
+        lowerCaseDatatype[i] = std::tolower(lowerCaseDatatype[i], locale);
+
+    if (datatypeCodes.count(lowerCaseDatatype) == 0)
+    {
+        std::ostringstream message;
+        message << "Datatype \"" << datatype << "\" is not valid";
+        Rf_warning(message.str().c_str());
+        return DT_NONE;
+    }
+    else
+        return datatypeCodes[lowerCaseDatatype];
+}
+
+template <typename TargetType>
+struct ElementConverter
+{
+    template <typename SourceType>
+    TargetType operator() (const SourceType &source)
+    {
+        return static_cast<TargetType>(source);
+    }
+};
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+
+// Byte-by-byte conversion of nifti2_image struct to a nifti1_image
+// By nature this is a risky operation, which has to make assumptions about the layout of the structs in memory
+inline nifti1_image * convertImageV2to1 (nifti2_image *image)
+{
+    if (image == NULL)
+        return NULL;
+
+    nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image));
+
+#ifndef NDEBUG
+    Rc_printf("Converting v2 image with pointer %p to v1 image with pointer %p\n", image, result);
+#endif
+
+    // We assume that each block of a given type is stored contiguously like an array - this should be the case, but may not be guaranteed
+    std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter<int>());
+    result->nvox = static_cast<int>(image->nvox);
+    std::copy(&image->nbyper, &image->nbyper + 2, &result->nbyper);
+    std::transform(&image->dx, &image->dx + 19, &result->dx, ElementConverter<float>());
+    std::copy(&image->qform_code, &image->qform_code + 6, &result->qform_code);
+    std::transform(&image->slice_start, &image->slice_start + 2, &result->slice_start, ElementConverter<int>());
+    std::transform(&image->slice_duration, &image->slice_duration + 73, &result->slice_duration, ElementConverter<float>());
+    std::copy(&image->xyz_units, &image->xyz_units + 4, &result->xyz_units);
+    std::transform(&image->intent_p1, &image->intent_p1 + 3, &result->intent_p1, ElementConverter<float>());
+    std::copy(static_cast<char*>(image->intent_name), static_cast<char*>(image->intent_name) + 120, static_cast<char*>(result->intent_name));
+    result->iname_offset = static_cast<int>(image->iname_offset);
+    std::copy(&image->swapsize, &image->swapsize + 2, &result->swapsize);
+    result->analyze75_orient = image->analyze75_orient;
+
+    // Copy buffers, since the memory-freeing logic isn't portable between struct versions
+    result->fname = nifti_strdup(image->fname);
+    result->iname = nifti_strdup(image->iname);
+    if (image->data != NULL)
+    {
+        result->data = calloc(result->nvox, result->nbyper);
+        memcpy(result->data, image->data, result->nvox * result->nbyper);
+    }
+
+    // Copy extensions
+    result->num_ext = image->num_ext;
+    result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
+    for (int i=0; i<result->num_ext; i++)
+    {
+        result->ext_list[i].esize = image->ext_list[i].esize;
+        result->ext_list[i].ecode = image->ext_list[i].ecode;
+        result->ext_list[i].edata = (char *) calloc(result->ext_list[i].esize - 8, sizeof(char));
+        memcpy(result->ext_list[i].edata, image->ext_list[i].edata, result->ext_list[i].esize - 8);
+    }
+
+    // Check the result looks plausible
+    if (!nifti_nim_is_valid(result, 0))
+        throw std::runtime_error("Conversion between image versions failed");
+
+    return result;
+}
+
+#elif RNIFTI_NIFTILIB_VERSION == 2
+
+// Byte-by-byte conversion of nifti1_image struct to a nifti2_image
+inline nifti2_image * convertImageV1to2 (nifti1_image *image)
+{
+    if (image == NULL)
+        return NULL;
+
+    nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image));
+
+#ifndef NDEBUG
+    Rc_printf("Converting v1 image with pointer %p to v2 image with pointer %p\n", image, result);
+#endif
+
+    std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter<int64_t>());
+    result->nvox = static_cast<int64_t>(image->nvox);
+    std::copy(&image->nbyper, &image->nbyper + 2, &result->nbyper);
+    std::transform(&image->dx, &image->dx + 19, &result->dx, ElementConverter<double>());
+    std::copy(&image->qform_code, &image->qform_code + 6, &result->qform_code);
+    std::transform(&image->slice_start, &image->slice_start + 2, &result->slice_start, ElementConverter<int64_t>());
+    std::transform(&image->slice_duration, &image->slice_duration + 73, &result->slice_duration, ElementConverter<double>());
+    std::copy(&image->xyz_units, &image->xyz_units + 4, &result->xyz_units);
+    std::transform(&image->intent_p1, &image->intent_p1 + 3, &result->intent_p1, ElementConverter<double>());
+    std::copy(static_cast<char*>(image->intent_name), static_cast<char*>(image->intent_name) + 120, static_cast<char*>(result->intent_name));
+    result->iname_offset = static_cast<int64_t>(image->iname_offset);
+    std::copy(&image->swapsize, &image->swapsize + 2, &result->swapsize);
+    result->analyze75_orient = image->analyze75_orient;
+
+    result->fname = nifti_strdup(image->fname);
+    result->iname = nifti_strdup(image->iname);
+    if (image->data != NULL)
+    {
+        result->data = calloc(result->nvox, result->nbyper);
+        memcpy(result->data, image->data, result->nvox * result->nbyper);
+    }
+
+    result->num_ext = image->num_ext;
+    result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
+    for (int i=0; i<result->num_ext; i++)
+    {
+        result->ext_list[i].esize = image->ext_list[i].esize;
+        result->ext_list[i].ecode = image->ext_list[i].ecode;
+        result->ext_list[i].edata = (char *) calloc(result->ext_list[i].esize - 8, sizeof(char));
+        memcpy(result->ext_list[i].edata, image->ext_list[i].edata, result->ext_list[i].esize - 8);
+    }
+
+    if (!nifti2_nim_is_valid(result, 0))
+        throw std::runtime_error("Conversion between image versions failed");
+
+    return result;
+}
+
+#endif // RNIFTI_NIFTILIB_VERSION
+
+#ifdef USING_R
+inline const char * stringToPath (const std::string &str) { return R_ExpandFileName(str.c_str()); }
+#else
+inline const char * stringToPath (const std::string &str) { return str.c_str(); }
+#endif
+
+#ifdef USING_R
+
+template <typename TargetType>
+inline void copyIfPresent (const Rcpp::List &list, const std::set<std::string> names, const std::string &name, TargetType &target)
+{
+    if (names.count(name) == 1)
+    {
+        const Rcpp::RObject object = list[name];
+        const int length = Rf_length(object);
+        if (length == 0)
+        {
+            std::ostringstream message;
+            message << "Field \"" << name << "\" is empty and will be ignored";
+            Rf_warning(message.str().c_str());
+        }
+        else if (length > 1)
+        {
+            std::ostringstream message;
+            message << "Field \"" << name << "\" has " << length << "elements, but only the first will be used";
+            Rf_warning(message.str().c_str());
+            target = Rcpp::as< std::vector<TargetType> >(object)[0];
+        }
+        else
+            target = Rcpp::as<TargetType>(object);
+    }
+}
+
+// Special case for char, because Rcpp tries to be too clever and convert it to a string
+template <>
+inline void copyIfPresent (const Rcpp::List &list, const std::set<std::string> names, const std::string &name, char &target)
+{
+    if (names.count(name) == 1)
+    {
+        int intValue = 0;
+        copyIfPresent<int>(list, names, name, intValue);
+        target = static_cast<char>(intValue);
+    }
+}
+
+inline void updateHeader (nifti_1_header *header, const Rcpp::List &list, const bool ignoreDatatype = false)
+{
+    if (header == NULL || Rf_isNull(list.names()))
+        return;
+
+    const Rcpp::CharacterVector _names = list.names();
+    std::set<std::string> names;
+    for (Rcpp::CharacterVector::const_iterator it=_names.begin(); it!=_names.end(); it++)
+        names.insert(Rcpp::as<std::string>(*it));
+
+    copyIfPresent(list, names, "sizeof_hdr", header->sizeof_hdr);
+
+    copyIfPresent(list, names, "dim_info", header->dim_info);
+    if (names.count("dim") == 1)
+    {
+        std::vector<short> dim = list["dim"];
+        if (dim.size() != 8)
+            throw std::runtime_error("Field \"dim\" must contain 8 elements");
+        for (size_t i=0; i<8; i++)
+            header->dim[i] = dim[i];
+    }
+
+    copyIfPresent(list, names, "intent_p1", header->intent_p1);
+    copyIfPresent(list, names, "intent_p2", header->intent_p2);
+    copyIfPresent(list, names, "intent_p3", header->intent_p3);
+    copyIfPresent(list, names, "intent_code", header->intent_code);
+
+    if (!ignoreDatatype)
+    {
+        copyIfPresent(list, names, "datatype", header->datatype);
+        copyIfPresent(list, names, "bitpix", header->bitpix);
+    }
+
+    copyIfPresent(list, names, "slice_start", header->slice_start);
+    if (names.count("pixdim") == 1)
+    {
+        std::vector<NiftiImage::pixdim_t> pixdim = list["pixdim"];
+        if (pixdim.size() != 8)
+            throw std::runtime_error("Field \"pixdim\" must contain 8 elements");
+        for (size_t i=0; i<8; i++)
+            header->pixdim[i] = pixdim[i];
+    }
+    copyIfPresent(list, names, "vox_offset", header->vox_offset);
+    copyIfPresent(list, names, "scl_slope", header->scl_slope);
+    copyIfPresent(list, names, "scl_inter", header->scl_inter);
+    copyIfPresent(list, names, "slice_end", header->slice_end);
+    copyIfPresent(list, names, "slice_code", header->slice_code);
+    copyIfPresent(list, names, "xyzt_units", header->xyzt_units);
+    copyIfPresent(list, names, "cal_max", header->cal_max);
+    copyIfPresent(list, names, "cal_min", header->cal_min);
+    copyIfPresent(list, names, "slice_duration", header->slice_duration);
+    copyIfPresent(list, names, "toffset", header->toffset);
+
+    if (names.count("descrip") == 1)
+        strcpy(header->descrip, Rcpp::as<std::string>(list["descrip"]).substr(0,79).c_str());
+    if (names.count("aux_file") == 1)
+        strcpy(header->aux_file, Rcpp::as<std::string>(list["aux_file"]).substr(0,23).c_str());
+
+    copyIfPresent(list, names, "qform_code", header->qform_code);
+    copyIfPresent(list, names, "sform_code", header->sform_code);
+    copyIfPresent(list, names, "quatern_b", header->quatern_b);
+    copyIfPresent(list, names, "quatern_c", header->quatern_c);
+    copyIfPresent(list, names, "quatern_d", header->quatern_d);
+    copyIfPresent(list, names, "qoffset_x", header->qoffset_x);
+    copyIfPresent(list, names, "qoffset_y", header->qoffset_y);
+    copyIfPresent(list, names, "qoffset_z", header->qoffset_z);
+
+    if (names.count("srow_x") == 1)
+    {
+        std::vector<NiftiImage::Xform::Element> srow_x = list["srow_x"];
+        if (srow_x.size() != 4)
+            throw std::runtime_error("Field \"srow_x\" must contain 4 elements");
+        for (size_t i=0; i<4; i++)
+            header->srow_x[i] = srow_x[i];
+    }
+    if (names.count("srow_y") == 1)
+    {
+        std::vector<NiftiImage::Xform::Element> srow_y = list["srow_y"];
+        if (srow_y.size() != 4)
+            throw std::runtime_error("Field \"srow_y\" must contain 4 elements");
+        for (size_t i=0; i<4; i++)
+            header->srow_y[i] = srow_y[i];
+    }
+    if (names.count("srow_z") == 1)
+    {
+        std::vector<NiftiImage::Xform::Element> srow_z = list["srow_z"];
+        if (srow_z.size() != 4)
+            throw std::runtime_error("Field \"srow_z\" must contain 4 elements");
+        for (size_t i=0; i<4; i++)
+            header->srow_z[i] = srow_z[i];
+    }
+
+    if (names.count("intent_name") == 1)
+        strcpy(header->intent_name, Rcpp::as<std::string>(list["intent_name"]).substr(0,15).c_str());
+    if (names.count("magic") == 1)
+        strcpy(header->magic, Rcpp::as<std::string>(list["magic"]).substr(0,3).c_str());
+}
+
+inline void addAttributes (const SEXP pointer, const NiftiImage &source, const bool realDim = true, const bool includeXptr = true, const bool keepData = true)
+{
+    const int nDims = source->dim[0];
+    Rcpp::RObject object(pointer);
+    Rcpp::IntegerVector dim(source->dim+1, source->dim+1+nDims);
+
+    if (realDim)
+        object.attr("dim") = dim;
+    else
+        object.attr("imagedim") = dim;
+
+    Rcpp::DoubleVector pixdim(nDims);
+    for (int i=0; i<nDims; i++)
+        pixdim[i] = std::abs(static_cast<double>(source->pixdim[i+1]));
+    object.attr("pixdim") = pixdim;
+
+    if (source->xyz_units == NIFTI_UNITS_UNKNOWN && source->time_units == NIFTI_UNITS_UNKNOWN)
+        object.attr("pixunits") = "Unknown";
+    else
+    {
+        Rcpp::CharacterVector pixunits(2);
+        pixunits[0] = nifti_units_string(source->xyz_units);
+        pixunits[1] = nifti_units_string(source->time_units);
+        object.attr("pixunits") = pixunits;
+    }
+
+    if (includeXptr)
+    {
+        NiftiImage *imagePtr = new NiftiImage(source, false);
+        if (!keepData)
+            imagePtr->dropData();
+        Rcpp::XPtr<NiftiImage> xptr(imagePtr);
+        object.attr(".nifti_image_ptr") = xptr;
+        object.attr(".nifti_image_ver") = RNIFTI_NIFTILIB_VERSION;
+    }
+}
+
+#endif  // USING_R
+
+}       // internal namespace
+
+template <typename Type, bool alpha>
+inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr, const size_t length, double *min, double *max) const
+{
+    if (ptr == NULL || length < 1)
+    {
+        *min = static_cast<double>(std::numeric_limits<Type>::min());
+        *max = static_cast<double>(std::numeric_limits<Type>::max());
+    }
+    else
+    {
+        Type *loc = static_cast<Type*>(ptr);
+        Type currentMin = *loc, currentMax = *loc;
+        for (size_t i=1; i<length; i++)
+        {
+            loc++;
+            if (internal::lessThan(*loc, currentMin))
+                currentMin = *loc;
+            if (internal::lessThan(currentMax, *loc))
+                currentMax = *loc;
+        }
+        *min = static_cast<double>(currentMin);
+        *max = static_cast<double>(currentMax);
+    }
+}
+
+template <typename ElementType>
+inline void NiftiImageData::ConcreteTypeHandler<std::complex<ElementType>,false>::minmax (void *ptr, const size_t length, double *min, double *max) const
+{
+    if (ptr == NULL || length < 1)
+    {
+        *min = static_cast<double>(std::numeric_limits<ElementType>::min());
+        *max = static_cast<double>(std::numeric_limits<ElementType>::max());
+    }
+    else
+    {
+        ElementType *loc = static_cast<ElementType*>(ptr);
+        ElementType currentMin = *loc, currentMax = *loc;
+        for (size_t i=1; i<(2*length); i++)
+        {
+            loc++;
+            if (internal::lessThan(*loc, currentMin))
+                currentMin = *loc;
+            if (internal::lessThan(currentMax, *loc))
+                currentMax = *loc;
+        }
+        *min = static_cast<double>(currentMin);
+        *max = static_cast<double>(currentMax);
+    }
+}
+
+template <typename SourceType>
+inline NiftiImageData::Element & NiftiImageData::Element::operator= (const SourceType &value)
+{
+    if (internal::isNaN(value))
+    {
+        if (!parent.handler->hasNaN())
+        {
+            const double zeroValue = parent.isScaled() ? (-parent.intercept / parent.slope) : 0.0;
+            if (parent.isFloatingPoint())
+                parent.handler->setDouble(ptr, zeroValue);
+            else
+                parent.handler->setInt(ptr, static_cast<int>(internal::roundEven(zeroValue)));
+        }
+#ifdef USING_R
+        // Only happens for integer types that admit an NaN/NA value.
+        // In practice this means int specifically for R, so we don't
+        // need to worry about the effect of casting INT_MIN to a wider
+        // or narrower type
+        else if (parent.isInteger())
+            parent.handler->setInt(ptr, NA_INTEGER);
+        else if (internal::isNA(value))
+            parent.handler->setDouble(ptr, NA_REAL);
+#endif
+        else
+            parent.handler->setDouble(ptr, std::numeric_limits<double>::quiet_NaN());
+    }
+    else if (parent.isScaled())
+    {
+        double reverseScaledValue = (static_cast<double>(value) - parent.intercept) / parent.slope;
+        if (parent.isFloatingPoint())
+            parent.handler->setDouble(ptr, reverseScaledValue);
+        else
+            parent.handler->setInt(ptr, static_cast<int>(internal::roundEven(reverseScaledValue)));
+    }
+    else if (std::numeric_limits<SourceType>::is_integer)
+        parent.handler->setInt(ptr, static_cast<int>(value));
+    else
+        parent.handler->setDouble(ptr, static_cast<double>(value));
+    return *this;
+}
+
+inline NiftiImageData::Element & NiftiImageData::Element::operator= (const NiftiImageData::Element &other)
+{
+    if (other.parent.isScaled() || other.parent.isFloatingPoint())
+    {
+        const double value = other;
+        *this = value;
+    }
+    else
+    {
+        const int value = other;
+        *this = value;
+    }
+    return *this;
+}
+
+inline void NiftiImage::Extension::copy (const nifti1_extension *source)
+{
+    if (source == NULL)
+        ext = NULL;
+    else
+    {
+        ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension));
+        ext->esize = source->esize;
+        ext->ecode = source->ecode;
+        if (source->edata != NULL && source->esize > 8)
+        {
+            ext->edata = (char *) calloc(source->esize - 8, 1);
+            memcpy(ext->edata, source->edata, source->esize - 8);
+        }
+    }
+}
+
+template <typename SourceType>
+inline void NiftiImage::Extension::copy (const SourceType *data, const size_t length, const int code)
+{
+    if (data == NULL)
+        ext = NULL;
+    else
+    {
+        const size_t bytes = length * sizeof(SourceType);
+        ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension));
+        ext->esize = int(bytes + 8);
+        const int remainder = ext->esize % 16;
+        ext->esize += (remainder == 0 ? 0 : 16 - remainder);
+        ext->ecode = code;
+        ext->edata = (char *) calloc(ext->esize - 8, 1);
+        memcpy(ext->edata, data, bytes);
+    }
+}
+
+inline void NiftiImage::Xform::replace (const Matrix &source)
+{
+    mat = source;
+    if (forward != NULL)
+        std::copy(source.begin(), source.end(), forward);
+    if (inverse != NULL)
+    {
+        Matrix inv = source.inverse();
+        std::copy(inv.begin(), inv.end(), inverse);
+    }
+    if (qparams != NULL)
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6);
+#endif
+    }
+}
+
+inline NiftiImage::Xform::Submatrix NiftiImage::Xform::submatrix () const
+{
+    NiftiImage::Xform::Submatrix result;
+    for (int i=0; i<3; i++)
+    {
+        for (int j=0; j<3; j++)
+            result(i,j) = mat(i,j);
+    }
+    return result;
+}
+
+inline NiftiImage::Xform::Submatrix NiftiImage::Xform::rotation () const
+{
+    NiftiImage::Xform::Vector3 qbcd;
+    NiftiImage::Xform::Element qfac;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    NiftiImage::Xform rotation = nifti_quatern_to_mat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    NiftiImage::Xform rotation = nifti_quatern_to_dmat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac);
+#endif
+    return rotation.submatrix();
+}
+
+inline NiftiImage::Xform::Element NiftiImage::Xform::handedness () const
+{
+    NiftiImage::Xform::Element qfac;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+#endif
+    return qfac;
+}
+
+inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const
+{
+    NiftiImage::Xform::Vector4 q;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+#endif
+    q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]);
+    return q;
+}
+
+inline NiftiImage::Xform::Vector3 NiftiImage::Xform::offset () const
+{
+    NiftiImage::Xform::Vector3 vec;
+    for (int i=0; i<3; i++)
+        vec[i] = mat(i,3);
+    return vec;
+}
+
+inline NiftiImage::Xform::Vector3 NiftiImage::Xform::spacing () const
+{
+    NiftiImage::Xform::Vector3 vec;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL);
+#endif
+    return vec;
+}
+
+inline std::string NiftiImage::Xform::orientation () const
+{
+    int icode, jcode, kcode;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_orientation(mat, &icode, &jcode, &kcode);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_orientation(mat, &icode, &jcode, &kcode);
+#endif
+
+    int codes[3] = { icode, jcode, kcode };
+    std::string result("---");
+    for (int i=0; i<3; i++)
+    {
+        switch (codes[i])
+        {
+            case NIFTI_L2R: result[i] = 'R'; break;
+            case NIFTI_R2L: result[i] = 'L'; break;
+            case NIFTI_P2A: result[i] = 'A'; break;
+            case NIFTI_A2P: result[i] = 'P'; break;
+            case NIFTI_I2S: result[i] = 'S'; break;
+            case NIFTI_S2I: result[i] = 'I'; break;
+        }
+    }
+    return result;
+}
+
+inline int NiftiImage::fileVersion (const std::string &path)
+{
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_1_header *header = nifti_read_header(internal::stringToPath(path), NULL, false);
+    if (header == NULL)
+        return -1;
+    else
+    {
+        int version = NIFTI_VERSION(*header);
+        if (version == 0)
+        {
+            // NIfTI-2 has a 540-byte header - check for this or its byte-swapped equivalent
+            if (header->sizeof_hdr == 540 || header->sizeof_hdr == 469893120)
+            {
+                // The magic number has moved in NIfTI-2, so find it by byte offset
+                const char *magic = (char *) header + 4;
+                if (strncmp(magic,"ni2",3) == 0 || strncmp(magic,"n+2",3) == 0)
+                    version = 2;
+            }
+            else if (!nifti_hdr_looks_good(header))
+            {
+                // Not plausible as ANALYZE, so return -1
+                version = -1;
+            }
+        }
+        free(header);
+        return version;
+    }
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    int version;
+    void *header = nifti2_read_header(internal::stringToPath(path), &version, true);
+    if (header == NULL)
+        return -1;
+    free(header);
+    return version;
+#endif
+}
+
+inline void NiftiImage::acquire (nifti_image * const image)
+{
+    // If we're taking ownership of a new image, release the old one
+    if (this->image != NULL && this->image != image)
+        release();
+
+    // Set the internal pointer and create or update the reference counter
+    this->image = image;
+    if (image != NULL)
+    {
+        if (this->refCount == NULL)
+            this->refCount = new int(1);
+        else
+            (*this->refCount)++;
+
+#ifndef NDEBUG
+        Rc_printf("Acquiring pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
+#endif
+    }
+}
+
+inline void NiftiImage::release ()
+{
+    if (this->image != NULL)
+    {
+        if (this->refCount != NULL)
+        {
+            (*this->refCount)--;
+#ifndef NDEBUG
+            Rc_printf("Releasing pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
+#endif
+            if (*this->refCount < 1)
+            {
+#if RNIFTI_NIFTILIB_VERSION == 1
+                nifti_image_free(this->image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+                nifti2_image_free(this->image);
+#endif
+                this->image = NULL;
+                delete this->refCount;
+                this->refCount = NULL;
+            }
+        }
+        else
+            Rc_printf("Releasing untracked object %p", this->image);
+    }
+}
+
+inline void NiftiImage::copy (const nifti_image *source)
+{
+    if (source == NULL)
+        acquire(NULL);
+    else
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        acquire(nifti_copy_nim_info(source));
+        if (source->data != NULL)
+        {
+            size_t dataSize = nifti_get_volsize(source);
+            image->data = calloc(1, dataSize);
+            memcpy(image->data, source->data, dataSize);
+        }
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        acquire(nifti2_copy_nim_info(source));
+        if (source->data != NULL)
+        {
+            size_t dataSize = nifti2_get_volsize(source);
+            image->data = calloc(1, dataSize);
+            memcpy(image->data, source->data, dataSize);
+        }
+#endif
+    }
+}
+
+inline void NiftiImage::copy (const NiftiImage &source)
+{
+    const nifti_image *sourceStruct = source;
+
+    copy(sourceStruct);
+}
+
+inline void NiftiImage::copy (const Block &source)
+{
+    const nifti_image *sourceStruct = source.image;
+    if (sourceStruct == NULL)
+        acquire(NULL);
+    else
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        acquire(nifti_copy_nim_info(sourceStruct));
+        image->dim[0] = source.image->dim[0] - 1;
+        image->dim[source.dimension] = 1;
+        image->pixdim[source.dimension] = 1.0;
+        nifti_update_dims_from_array(image);
+
+        if (sourceStruct->data != NULL)
+        {
+            size_t blockSize = nifti_get_volsize(image);
+            image->data = calloc(1, blockSize);
+            memcpy(image->data, static_cast<char*>(source.image->data) + blockSize*source.index, blockSize);
+        }
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        acquire(nifti2_copy_nim_info(sourceStruct));
+        image->dim[0] = source.image->dim[0] - 1;
+        image->dim[source.dimension] = 1;
+        image->pixdim[source.dimension] = 1.0;
+        nifti2_update_dims_from_array(image);
+
+        if (sourceStruct->data != NULL)
+        {
+            size_t blockSize = nifti2_get_volsize(image);
+            image->data = calloc(1, blockSize);
+            memcpy(image->data, static_cast<char*>(source.image->data) + blockSize*source.index, blockSize);
+        }
+#endif
+    }
+}
+
+#ifdef USING_R
+
+// Convert an S4 "nifti" object, as defined in the oro.nifti package, to a "nifti_image" struct
+inline void NiftiImage::initFromNiftiS4 (const Rcpp::RObject &object, const bool copyData)
+{
+    nifti_1_header header;
+    header.sizeof_hdr = 348;
+
+    const std::vector<dim_t> dims = object.slot("dim_");
+    for (int i=0; i<8; i++)
+        header.dim[i] = dims[i];
+
+    header.intent_p1 = object.slot("intent_p1");
+    header.intent_p2 = object.slot("intent_p2");
+    header.intent_p3 = object.slot("intent_p3");
+    header.intent_code = object.slot("intent_code");
+
+    header.datatype = object.slot("datatype");
+    header.bitpix = object.slot("bitpix");
+
+    header.slice_start = object.slot("slice_start");
+    header.slice_end = object.slot("slice_end");
+    header.slice_code = Rcpp::as<int>(object.slot("slice_code"));
+    header.slice_duration = object.slot("slice_duration");
+
+    const std::vector<pixdim_t> pixdims = object.slot("pixdim");
+    for (int i=0; i<8; i++)
+        header.pixdim[i] = pixdims[i];
+    header.xyzt_units = Rcpp::as<int>(object.slot("xyzt_units"));
+
+    header.vox_offset = object.slot("vox_offset");
+
+    // oro.nifti does its own data rescaling, so we ignore the slope and intercept fields
+    header.scl_slope = 0.0;
+    header.scl_inter = 0.0;
+    header.toffset = object.slot("toffset");
+
+    header.cal_max = object.slot("cal_max");
+    header.cal_min = object.slot("cal_min");
+    header.glmax = header.glmin = 0;
+
+    strncpy(header.descrip, Rcpp::as<std::string>(object.slot("descrip")).c_str(), 79);
+    header.descrip[79] = '\0';
+    strncpy(header.aux_file, Rcpp::as<std::string>(object.slot("aux_file")).c_str(), 23);
+    header.aux_file[23] = '\0';
+    strncpy(header.intent_name, Rcpp::as<std::string>(object.slot("intent_name")).c_str(), 15);
+    header.intent_name[15] = '\0';
+    strncpy(header.magic, Rcpp::as<std::string>(object.slot("magic")).c_str(), 3);
+    header.magic[3] = '\0';
+
+    header.qform_code = object.slot("qform_code");
+    header.sform_code = object.slot("sform_code");
+
+    header.quatern_b = object.slot("quatern_b");
+    header.quatern_c = object.slot("quatern_c");
+    header.quatern_d = object.slot("quatern_d");
+    header.qoffset_x = object.slot("qoffset_x");
+    header.qoffset_y = object.slot("qoffset_y");
+    header.qoffset_z = object.slot("qoffset_z");
+
+    const std::vector<Xform::Element> srow_x = object.slot("srow_x");
+    const std::vector<Xform::Element> srow_y = object.slot("srow_y");
+    const std::vector<Xform::Element> srow_z = object.slot("srow_z");
+    for (int i=0; i<4; i++)
+    {
+        header.srow_x[i] = srow_x[i];
+        header.srow_y[i] = srow_y[i];
+        header.srow_z[i] = srow_z[i];
+    }
+
+    // Ignoring complex and RGB types here because oro.nifti doesn't yet support them
+    if (header.datatype == DT_UINT8 || header.datatype == DT_INT16 || header.datatype == DT_INT32 || header.datatype == DT_INT8 || header.datatype == DT_UINT16 || header.datatype == DT_UINT32)
+        header.datatype = DT_INT32;
+    else if (header.datatype == DT_FLOAT32 || header.datatype == DT_FLOAT64)
+        header.datatype = DT_FLOAT64;
+    else
+        throw std::runtime_error("Data type is not supported");
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+    acquire(nifti_convert_nhdr2nim(header, NULL));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    acquire(nifti_convert_n1hdr2nim(header, NULL));
+#endif
+
+    const Rcpp::RObject data = object.slot(".Data");
+    if (!copyData || Rf_length(data) <= 1)
+        this->image->data = NULL;
+    else if (header.datatype == DT_INT32)
+    {
+        Rcpp::IntegerVector intData(data);
+        replaceData(NiftiImageData(intData.begin(), intData.end(), DT_INT32));
+    }
+    else
+    {
+        Rcpp::DoubleVector doubleData(data);
+        replaceData(NiftiImageData(doubleData.begin(), doubleData.end(), DT_FLOAT64));
+    }
+}
+
+inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const bool copyData)
+{
+    Rcpp::Reference mriImage(object);
+    Rcpp::Function getXform = mriImage.field("getXform");
+    Rcpp::NumericMatrix xform = getXform();
+
+    acquire(NULL);
+
+    if (Rf_length(mriImage.field("tags")) > 0)
+        initFromList(mriImage.field("tags"));
+
+    Rcpp::RObject data = mriImage.field("data");
+    if (data.inherits("SparseArray"))
+    {
+        Rcpp::Language call("as.array", data);
+        data = call.eval();
+    }
+
+    const int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type()));
+
+    dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+    const std::vector<dim_t> dimVector = mriImage.field("imageDims");
+    const int nDims = std::min(7, int(dimVector.size()));
+    dims[0] = nDims;
+    size_t nVoxels = 1;
+    for (int i=0; i<nDims; i++)
+    {
+        dims[i+1] = dimVector[i];
+        nVoxels *= dimVector[i];
+    }
+
+    if (this->image == NULL)
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        acquire(nifti_make_new_nim(dims, datatype, FALSE));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        acquire(nifti2_make_new_nim(dims, datatype, FALSE));
+#endif
+    }
+    else
+    {
+        std::copy(dims, dims+8, this->image->dim);
+        this->image->datatype = datatype;
+        nifti_datatype_sizes(image->datatype, &image->nbyper, NULL);
+    }
+
+    if (copyData && !Rf_isNull(data))
+    {
+        // NB: nifti_get_volsize() will not be right here if there were tags
+        const size_t dataSize = nVoxels * image->nbyper;
+        this->image->data = calloc(1, dataSize);
+        if (datatype == DT_INT32)
+            memcpy(this->image->data, INTEGER(data), dataSize);
+        else
+            memcpy(this->image->data, REAL(data), dataSize);
+    }
+    else
+        this->image->data = NULL;
+
+    const std::vector<pixdim_t> pixdimVector = mriImage.field("voxelDims");
+    const int pixdimLength = pixdimVector.size();
+    for (int i=0; i<std::min(pixdimLength,nDims); i++)
+        this->image->pixdim[i+1] = std::abs(pixdimVector[i]);
+
+    const std::vector<std::string> pixunitsVector = mriImage.field("voxelDimUnits");
+    setPixunits(pixunitsVector);
+
+    if (xform.rows() != 4 || xform.cols() != 4)
+        this->image->qform_code = this->image->sform_code = 0;
+    else
+    {
+        const Xform::Matrix xformMatrix(xform);
+        this->qform() = xformMatrix;
+        this->sform() = xformMatrix;
+        this->image->qform_code = this->image->sform_code = 2;
+    }
+}
+
+inline void NiftiImage::initFromList (const Rcpp::RObject &object)
+{
+    Rcpp::List list(object);
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_1_header *header = nifti_make_new_header(NULL, DT_FLOAT64);
+    internal::updateHeader(header, list);
+    acquire(nifti_convert_nhdr2nim(*header, NULL));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_1_header *header = nifti_make_new_n1_header(NULL, DT_FLOAT64);
+    internal::updateHeader(header, list);
+    acquire(nifti_convert_n1hdr2nim(*header, NULL));
+#endif
+    this->image->data = NULL;
+    free(header);
+}
+
+inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool copyData)
+{
+    dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+    const std::vector<dim_t> dimVector = object.attr("dim");
+
+    const int nDims = std::min(7, int(dimVector.size()));
+    dims[0] = nDims;
+    for (int i=0; i<nDims; i++)
+        dims[i+1] = dimVector[i];
+
+    int datatype = sexpTypeToNiftiType(object.sexp_type());
+    if (object.inherits("rgbArray"))
+    {
+        const int channels = (object.hasAttribute("channels") ? object.attr("channels") : 3);
+        datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24);
+    }
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+    acquire(nifti_make_new_nim(dims, datatype, int(copyData)));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    acquire(nifti2_make_new_nim(dims, datatype, int(copyData)));
+#endif
+
+    if (copyData)
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        const size_t dataSize = nifti_get_volsize(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        const size_t dataSize = nifti2_get_volsize(image);
+#endif
+        if (datatype == DT_INT32 || datatype == DT_RGBA32)
+            memcpy(this->image->data, INTEGER(object), dataSize);
+        else if (datatype == DT_RGB24)
+        {
+            NiftiImageData data(image);
+            std::copy(INTEGER(object), INTEGER(object)+image->nvox, data.begin());
+        }
+        else if (datatype == DT_COMPLEX128)
+            memcpy(this->image->data, COMPLEX(object), dataSize);
+        else
+            memcpy(this->image->data, REAL(object), dataSize);
+    }
+    else
+        this->image->data = NULL;
+
+    if (object.hasAttribute("pixdim"))
+    {
+        const std::vector<pixdim_t> pixdimVector = object.attr("pixdim");
+        const int pixdimLength = pixdimVector.size();
+        for (int i=0; i<std::min(pixdimLength,nDims); i++)
+            this->image->pixdim[i+1] = pixdimVector[i];
+    }
+
+    if (object.hasAttribute("pixunits"))
+    {
+        const std::vector<std::string> pixunitsVector = object.attr("pixunits");
+        setPixunits(pixunitsVector);
+    }
+}
+
+inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly)
+    : image(NULL), refCount(NULL)
+{
+    Rcpp::RObject imageObject(object);
+    bool resolved = false;
+
+    if (imageObject.hasAttribute(".nifti_image_ptr"))
+    {
+        Rcpp::XPtr<NiftiImage> imagePtr(SEXP(imageObject.attr(".nifti_image_ptr")));
+        NiftiImage *ptr = imagePtr.get();
+        if (ptr != NULL)
+        {
+#if RNIFTI_NIFTILIB_VERSION == 1
+            if (imageObject.hasAttribute(".nifti_image_ver") && int(imageObject.attr(".nifti_image_ver")) == 2)
+                acquire(internal::convertImageV2to1(reinterpret_cast<nifti2_image*>(ptr->image)));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            if (!imageObject.hasAttribute(".nifti_image_ver") || int(imageObject.attr(".nifti_image_ver")) == 1)
+                acquire(internal::convertImageV1to2(reinterpret_cast<nifti1_image*>(ptr->image)));
+#endif
+            // Copy if the object have multiple R-level references and we're not working read-only
+            else if (MAYBE_SHARED(object) && !readOnly)
+                copy(*ptr);
+            else
+                acquire(*ptr);
+
+            resolved = true;
+
+            if (imageObject.hasAttribute("dim"))
+                update(imageObject);
+        }
+        else if (Rf_isString(object))
+            throw std::runtime_error("Internal image is not valid");
+        else
+            Rf_warning("Ignoring invalid internal pointer");
+    }
+
+    if (!resolved)
+    {
+        if (Rf_isNull(object))
+            acquire(NULL);
+        else if (Rf_isString(object))
+        {
+            const std::string path = Rcpp::as<std::string>(object);
+#if RNIFTI_NIFTILIB_VERSION == 1
+            acquire(nifti_image_read(internal::stringToPath(path), readData));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            acquire(nifti2_image_read(internal::stringToPath(path), readData));
+#endif
+            if (this->image == NULL)
+                throw std::runtime_error("Failed to read image from path " + path);
+        }
+        else if (imageObject.inherits("nifti"))
+            initFromNiftiS4(imageObject, readData);
+        else if (imageObject.inherits("anlz"))
+            throw std::runtime_error("Cannot currently convert objects of class \"anlz\"");
+        else if (imageObject.inherits("MriImage"))
+            initFromMriImage(imageObject, readData);
+        else if (Rf_isVectorList(object))
+            initFromList(imageObject);
+        else if (imageObject.hasAttribute("dim"))
+            initFromArray(imageObject, readData);
+        else if (imageObject.hasAttribute("class"))
+            throw std::runtime_error("Cannot convert object of class \"" + Rcpp::as<std::string>(imageObject.attr("class")) + "\" to a nifti_image");
+        else
+            throw std::runtime_error("Cannot convert unclassed non-array object");
+    }
+
+    if (this->image != NULL)
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_update_dims_from_array(this->image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti2_update_dims_from_array(this->image);
+#endif
+    }
+
+#ifndef NDEBUG
+    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from SEXP)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+}
+
+#endif // USING_R
+
+inline void NiftiImage::initFromDims (const std::vector<dim_t> &dim, const int datatype)
+{
+    const int nDims = std::min(7, int(dim.size()));
+    dim_t dims[8] = { nDims, 0, 0, 0, 0, 0, 0, 0 };
+    std::copy(dim.begin(), dim.begin() + nDims, &dims[1]);
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+    acquire(nifti_make_new_nim(dims, datatype, 1));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    acquire(nifti2_make_new_nim(dims, datatype, 1));
+#endif
+
+    if (image == NULL)
+        throw std::runtime_error("Failed to create image from scratch");
+}
+
+inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype)
+    : image(NULL), refCount(NULL)
+{
+    initFromDims(dim, datatype);
+#ifndef NDEBUG
+    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+}
+
+inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string &datatype)
+    : image(NULL), refCount(NULL)
+{
+    initFromDims(dim, internal::stringToDatatype(datatype));
+#ifndef NDEBUG
+    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+}
+
+inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
+    : image(NULL), refCount(NULL)
+{
+#if RNIFTI_NIFTILIB_VERSION == 1
+    acquire(nifti_image_read(internal::stringToPath(path), readData));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    acquire(nifti2_image_read(internal::stringToPath(path), readData));
+#endif
+
+    if (image == NULL)
+        throw std::runtime_error("Failed to read image from path " + path);
+
+#ifndef NDEBUG
+    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+}
+
+inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t> &volumes)
+    : image(NULL), refCount(NULL)
+{
+    if (volumes.empty())
+        throw std::runtime_error("The vector of volumes is empty");
+
+    nifti_brick_list brickList;
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+    acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast<int>(volumes.size()), &volumes.front(), &brickList));
+
+    if (image == NULL)
+        throw std::runtime_error("Failed to read image from path " + path);
+
+    size_t brickSize = image->nbyper * image->nx * image->ny * image->nz;
+    image->data = calloc(1, nifti_get_volsize(image));
+    for (dim_t i=0; i<brickList.nbricks; i++)
+        memcpy((char *) image->data + i * brickSize, brickList.bricks[i], brickSize);
+
+    nifti_free_NBL(&brickList);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    acquire(nifti2_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList));
+
+    if (image == NULL)
+        throw std::runtime_error("Failed to read image from path " + path);
+
+    size_t brickSize = image->nbyper * image->nx * image->ny * image->nz;
+    image->data = calloc(1, nifti2_get_volsize(image));
+    for (dim_t i=0; i<brickList.nbricks; i++)
+        memcpy((char *) image->data + i * brickSize, brickList.bricks[i], brickSize);
+
+    nifti2_free_NBL(&brickList);
+#endif
+
+#ifndef NDEBUG
+    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+}
+
+inline void NiftiImage::updatePixdim (const std::vector<pixdim_t> &pixdim)
+{
+    const int nDims = image->dim[0];
+    const std::vector<pixdim_t> origPixdim(image->pixdim+1, image->pixdim+4);
+
+    for (int i=1; i<8; i++)
+        image->pixdim[i] = 0.0;
+
+    const int pixdimLength = static_cast<int>(pixdim.size());
+    for (int i=0; i<std::min(pixdimLength,nDims); i++)
+        image->pixdim[i+1] = pixdim[i];
+
+    if (!std::equal(origPixdim.begin(), origPixdim.begin() + std::min(3,nDims), pixdim.begin()))
+    {
+        Xform::Matrix scaleMatrix = Xform::Matrix::eye();
+        for (int i=0; i<std::min(pixdimLength,3); i++)
+            scaleMatrix(i,i) = pixdim[i] / origPixdim[i];
+
+        if (image->qform_code > 0)
+            this->qform() = qform().matrix() * scaleMatrix;
+        if (image->sform_code > 0)
+            this->sform() = sform().matrix() * scaleMatrix;
+    }
+}
+
+inline void NiftiImage::setPixunits (const std::vector<std::string> &pixunits)
+{
+    for (size_t i=0; i<pixunits.size(); i++)
+    {
+        if (pixunits[i] == "m")
+            image->xyz_units = NIFTI_UNITS_METER;
+        else if (pixunits[i] == "mm")
+            image->xyz_units = NIFTI_UNITS_MM;
+        else if (pixunits[i] == "um")
+            image->xyz_units = NIFTI_UNITS_MICRON;
+        else if (pixunits[i] == "s")
+            image->time_units = NIFTI_UNITS_SEC;
+        else if (pixunits[i] == "ms")
+            image->time_units = NIFTI_UNITS_MSEC;
+        else if (pixunits[i] == "us")
+            image->time_units = NIFTI_UNITS_USEC;
+        else if (pixunits[i] == "Hz")
+            image->time_units = NIFTI_UNITS_HZ;
+        else if (pixunits[i] == "ppm")
+            image->time_units = NIFTI_UNITS_PPM;
+        else if (pixunits[i] == "rad/s")
+            image->time_units = NIFTI_UNITS_RADS;
+    }
+}
+
+inline NiftiImage & NiftiImage::rescale (const std::vector<pixdim_t> &scales)
+{
+    std::vector<pixdim_t> pixdim(image->pixdim+1, image->pixdim+4);
+
+    for (int i=0; i<std::min(3, int(scales.size())); i++)
+    {
+        if (scales[i] != 1.0)
+        {
+            pixdim[i] /= scales[i];
+            image->dim[i+1] = static_cast<dim_t>(std::floor(image->dim[i+1] * scales[i]));
+        }
+    }
+
+    updatePixdim(pixdim);
+
+    // Data vector is now the wrong size, so drop it
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_update_dims_from_array(image);
+    nifti_image_unload(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti2_update_dims_from_array(image);
+    nifti2_image_unload(image);
+#endif
+
+    image->scl_slope = 0.0;
+    image->scl_inter = 0.0;
+
+    return *this;
+}
+
+inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, const int kcode)
+{
+    if (this->isNull())
+        return *this;
+    if (image->qform_code == 0 && image->sform_code == 0)
+    {
+        Rf_warning("Image qform and sform codes are both zero, so it cannot be reoriented");
+        return *this;
+    }
+
+    int used[6] = { 0, 0, 0, 0, 0, 0 };
+    used[icode-1] = 1;
+    used[jcode-1] = 1;
+    used[kcode-1] = 1;
+    if (used[0]+used[1] != 1 || used[2]+used[3] != 1 || used[4]+used[5] != 1)
+        throw std::runtime_error("Each canonical axis should be used exactly once");
+
+    const int codes[3] = { icode, jcode, kcode };
+    const Xform native = this->xform();
+
+    // Calculate the origin, which requires inverting the current xform
+    // Here we use a simplified formula that exploits blockwise inversion and the nature of xforms
+    Xform::Vector3 origin = -(native.submatrix().inverse() * native.offset());
+
+    // Create a target xform (rotation matrix only)
+    Xform::Submatrix target;
+    for (int j=0; j<3; j++)
+    {
+        for (int i=0; i<3; i++)
+            target(i,j) = 0.0;
+
+        switch (codes[j])
+        {
+            case NIFTI_L2R: target(0,j) =  1.0; break;
+            case NIFTI_R2L: target(0,j) = -1.0; break;
+            case NIFTI_P2A: target(1,j) =  1.0; break;
+            case NIFTI_A2P: target(1,j) = -1.0; break;
+            case NIFTI_I2S: target(2,j) =  1.0; break;
+            case NIFTI_S2I: target(2,j) = -1.0; break;
+        }
+    }
+
+    // Extract (inverse of) canonical axis matrix from native xform
+    int nicode, njcode, nkcode;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_mat44_to_orientation(native, &nicode, &njcode, &nkcode);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti_dmat44_to_orientation(native, &nicode, &njcode, &nkcode);
+#endif
+    int ncodes[3] = { nicode, njcode, nkcode };
+    Xform::Submatrix nativeAxesTransposed;
+    for (int i=0; i<3; i++)
+    {
+        for (int j=0; j<3; j++)
+            nativeAxesTransposed(i,j) = 0.0;
+
+        switch (ncodes[i])
+        {
+            case NIFTI_L2R: nativeAxesTransposed(i,0) =  1.0; break;
+            case NIFTI_R2L: nativeAxesTransposed(i,0) = -1.0; break;
+            case NIFTI_P2A: nativeAxesTransposed(i,1) =  1.0; break;
+            case NIFTI_A2P: nativeAxesTransposed(i,1) = -1.0; break;
+            case NIFTI_I2S: nativeAxesTransposed(i,2) =  1.0; break;
+            case NIFTI_S2I: nativeAxesTransposed(i,2) = -1.0; break;
+        }
+    }
+
+    // Check for no-op case
+    if (icode == nicode && jcode == njcode && kcode == nkcode)
+        return *this;
+
+    // The transform is t(approx_old_xform) %*% target_xform
+    // The new xform is old_xform %*% transform
+    // NB: "transform" is really 4x4, but the last row is simple and the last column is filled below
+    const Xform::Matrix &nativeMat = native.matrix();
+    Xform::Submatrix transform = nativeAxesTransposed * target;
+    Xform::Matrix result;
+    for (int i=0; i<4; i++)
+    {
+        for (int j=0; j<3; j++)
+            result(i,j) = nativeMat(i,0) * transform(0,j) + nativeMat(i,1) * transform(1,j) + nativeMat(i,2) * transform(2,j);
+
+        result(3,i) = (i == 3 ? 1.f : 0.f);
+    }
+
+    // Extract the mapping between dimensions and the signs
+    // These vectors are all indexed in the target space, except "revsigns"
+    dim_t locs[3], signs[3], newdim[3], revsigns[3];
+    pixdim_t newpixdim[3];
+    double maxes[3] = { R_NegInf, R_NegInf, R_NegInf };
+    Xform::Vector3 offset;
+    for (int j=0; j<3; j++)
+    {
+        // Find the largest absolute value in each column, which gives the old dimension corresponding to each new dimension
+        for (int i=0; i<3; i++)
+        {
+            const double value = static_cast<double>(transform(i,j));
+            if (fabs(value) > maxes[j])
+            {
+                maxes[j] = fabs(value);
+                signs[j] = value > 0.0 ? 1 : -1;
+                locs[j] = i;
+            }
+        }
+
+        // Obtain the sign for the reverse mapping
+        revsigns[locs[j]] = signs[j];
+
+        // Permute dim and pixdim
+        newdim[j] = image->dim[locs[j]+1];
+        newpixdim[j] = image->pixdim[locs[j]+1];
+
+        // Flip and/or permute the origin
+        if (signs[j] < 0)
+            offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1;
+        else
+            offset[j] = origin[locs[j]];
+    }
+
+    // Convert the origin back to an xform offset and insert it
+    offset = -(Xform(result).submatrix() * offset);
+    for (int i=0; i<3; i++)
+        result(i,3) = offset[i];
+
+    // Update the xforms with nonzero codes
+    if (image->qform_code > 0)
+        this->qform() = result;
+    if (image->sform_code > 0)
+        this->sform() = result;
+
+    // Calculate strides: the step in target space associated with each dimension in source space
+    ptrdiff_t strides[3];
+    strides[locs[0]] = 1;
+    strides[locs[1]] = strides[locs[0]] * image->dim[locs[0]+1];
+    strides[locs[2]] = strides[locs[1]] * image->dim[locs[1]+1];
+
+    // Permute the data (if present)
+    if (image->data != NULL)
+    {
+        size_t volSize = size_t(image->nx * image->ny * image->nz);
+        size_t nVolumes = std::max(size_t(1), size_t(image->nvox) / volSize);
+
+        const NiftiImageData oldData = this->data();
+        NiftiImageData newData(oldData);
+
+        // Where the sign is negative we need to start at the end of the dimension
+        size_t volStart = 0;
+        for (int i=0; i<3; i++)
+        {
+            if (revsigns[i] < 0)
+                volStart += (image->dim[i+1] - 1) * strides[i];
+        }
+
+        // Iterate over the data and place it into a new vector
+        NiftiImageData::Iterator it = oldData.begin();
+        for (size_t v=0; v<nVolumes; v++)
+        {
+            for (dim_t k=0; k<image->nz; k++)
+            {
+                ptrdiff_t offset = k * strides[2] * revsigns[2];
+                for (dim_t j=0; j<image->ny; j++)
+                {
+                    for (dim_t i=0; i<image->nx; i++)
+                    {
+                        newData[volStart + offset] = *it++;
+                        offset += strides[0] * revsigns[0];
+                    }
+                    offset += strides[1] * revsigns[1] - image->nx * strides[0] * revsigns[0];
+                }
+            }
+            volStart += volSize;
+        }
+
+        // Vector data needs to be reoriented to match the xform
+        if (image->intent_code == NIFTI_INTENT_VECTOR && image->dim[5] == 3)
+        {
+            Xform::Vector3 oldVec;
+            const size_t supervolSize = volSize * image->nt;
+            NiftiImageData::Iterator it = newData.begin();
+            for (size_t i=0; i<supervolSize; i++, ++it)
+            {
+                for (int j=0; j<3; j++)
+                    oldVec[j] = *(it + j*supervolSize);
+                const Xform::Vector3 newVec = transform * oldVec;
+                for (int j=0; j<3; j++)
+                    *(it + j*supervolSize) = newVec[j];
+            }
+        }
+
+        // Replace the existing data in the image
+        this->replaceData(newData);
+    }
+
+    // Copy new dims and pixdims in
+    // NB: Old dims are used above, so this must happen last
+    std::copy(newdim, newdim+3, image->dim+1);
+    std::copy(newpixdim, newpixdim+3, image->pixdim+1);
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_update_dims_from_array(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti2_update_dims_from_array(image);
+#endif
+
+    return *this;
+}
+
+inline NiftiImage & NiftiImage::reorient (const std::string &orientation)
+{
+    if (orientation.length() != 3)
+        throw std::runtime_error("Orientation string should have exactly three characters");
+
+    int codes[3];
+    for (int i=0; i<3; i++)
+    {
+        switch (orientation[i])
+        {
+            case 'r': case 'R': codes[i] = NIFTI_L2R; break;
+            case 'l': case 'L': codes[i] = NIFTI_R2L; break;
+            case 'a': case 'A': codes[i] = NIFTI_P2A; break;
+            case 'p': case 'P': codes[i] = NIFTI_A2P; break;
+            case 's': case 'S': codes[i] = NIFTI_I2S; break;
+            case 'i': case 'I': codes[i] = NIFTI_S2I; break;
+
+            default:
+            throw std::runtime_error("Orientation string is invalid");
+        }
+    }
+
+    return reorient(codes[0], codes[1], codes[2]);
+}
+
+#ifdef USING_R
+
+inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
+{
+    if (Rf_isVectorList(object))
+    {
+        Rcpp::List list(object);
+        nifti_1_header *header = NULL;
+        if (this->isNull())
+        {
+#if RNIFTI_NIFTILIB_VERSION == 1
+            header = nifti_make_new_header(NULL, DT_FLOAT64);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            header = nifti_make_new_n1_header(NULL, DT_FLOAT64);
+#endif
+            internal::updateHeader(header, list, true);
+        }
+        else
+        {
+            header = (nifti_1_header *) calloc(1, sizeof(nifti_1_header));
+#if RNIFTI_NIFTILIB_VERSION == 1
+            *header = nifti_convert_nim2nhdr(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            nifti_convert_nim2n1hdr(image, header);
+#endif
+            internal::updateHeader(header, list, true);
+        }
+
+        if (header != NULL)
+        {
+            // Retain the data pointer, but otherwise overwrite the stored object with one created from the header
+            // The file names can't be preserved through the round-trip, so free them
+            void *dataPtr = image->data;
+#if RNIFTI_NIFTILIB_VERSION == 1
+            nifti_image *tempImage = nifti_convert_nhdr2nim(*header, NULL);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, NULL);
+#endif
+
+            if (image->fname != NULL)
+                free(image->fname);
+            if (image->iname != NULL)
+                free(image->iname);
+
+            memcpy(image, tempImage, sizeof(nifti_image));
+            image->num_ext = 0;
+            image->ext_list = NULL;
+            image->data = dataPtr;
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+            nifti_image_free(tempImage);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+            nifti2_image_free(tempImage);
+#endif
+            free(header);
+        }
+    }
+    else if (object.hasAttribute("dim"))
+    {
+        for (int i=0; i<8; i++)
+            image->dim[i] = 0;
+        const std::vector<int> dimVector = object.attr("dim");
+
+        const int nDims = std::min(7, int(dimVector.size()));
+        image->dim[0] = nDims;
+        for (int i=0; i<nDims; i++)
+            image->dim[i+1] = dimVector[i];
+
+        if (object.hasAttribute("pixdim"))
+        {
+            const std::vector<pixdim_t> pixdimVector = object.attr("pixdim");
+            updatePixdim(pixdimVector);
+        }
+
+        if (object.hasAttribute("pixunits"))
+        {
+            const std::vector<std::string> pixunitsVector = object.attr("pixunits");
+            setPixunits(pixunitsVector);
+        }
+
+        // This library function clobbers dim[0] if the last dimension is unitary; we undo that here
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_update_dims_from_array(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti2_update_dims_from_array(image);
+#endif
+        image->dim[0] = image->ndim = nDims;
+
+        image->datatype = NiftiImage::sexpTypeToNiftiType(object.sexp_type());
+        if (object.inherits("rgbArray"))
+        {
+            const int channels = object.attr("channels");
+            image->datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24);
+        }
+        nifti_datatype_sizes(image->datatype, &image->nbyper, NULL);
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_image_unload(image);
+        const size_t dataSize = nifti_get_volsize(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti2_image_unload(image);
+        const size_t dataSize = nifti2_get_volsize(image);
+#endif
+
+        image->data = calloc(1, dataSize);
+        if (image->datatype == DT_INT32 || image->datatype == DT_RGBA32)
+            memcpy(image->data, INTEGER(object), dataSize);
+        else if (image->datatype == DT_RGB24)
+            std::copy(INTEGER(object), INTEGER(object)+image->nvox, this->data().begin());
+        else if (image->datatype == DT_COMPLEX128)
+            memcpy(image->data, COMPLEX(object), dataSize);
+        else
+            memcpy(image->data, REAL(object), dataSize);
+
+        image->scl_slope = 0.0;
+        image->scl_inter = 0.0;
+    }
+
+    return *this;
+}
+
+#endif // USING_R
+
+inline const NiftiImage::Xform NiftiImage::xform (const bool preferQuaternion) const
+{
+    if (image == NULL)
+        return Xform();
+    else if (image->qform_code <= 0 && image->sform_code <= 0)
+    {
+        // No qform or sform so use pixdim (NB: other software may assume differently)
+        Xform::Matrix matrix;
+        for (int i=0; i<3; i++)
+            matrix(i,i) = (image->pixdim[i+1]==0 ? 1 : image->pixdim[i+1]);
+        matrix(3,3) = 1.0;
+        return Xform(matrix);
+    }
+    else if ((preferQuaternion && image->qform_code > 0) || image->sform_code <= 0)
+        return qform();
+    else
+        return sform();
+}
+
+template <typename TargetType>
+inline std::vector<TargetType> NiftiImage::Block::getData (const bool useSlope) const
+{
+    NiftiImageData data = this->data();
+    if (!useSlope)
+        data = data.unscaled();
+
+    if (image.isNull() || data.isEmpty())
+        return std::vector<TargetType>();
+    else
+    {
+        std::vector<TargetType> result(data.size());
+        std::copy(data.begin(), data.end(), result.begin());
+        return result;
+    }
+}
+
+template <typename TargetType>
+inline std::vector<TargetType> NiftiImage::getData (const bool useSlope) const
+{
+    NiftiImageData data = this->data();
+    if (!useSlope)
+        data = data.unscaled();
+
+    if (this->isNull() || data.isEmpty())
+        return std::vector<TargetType>();
+    else
+    {
+        std::vector<TargetType> result(data.size());
+        std::copy(data.begin(), data.end(), result.begin());
+        return result;
+    }
+}
+
+inline NiftiImage & NiftiImage::changeDatatype (const int datatype, const bool useSlope)
+{
+    if (this->isNull() || image->datatype == datatype)
+        return *this;
+
+    if (useSlope && this->isDataScaled())
+        throw std::runtime_error("Resetting the slope and intercept for an image with them already set is not supported");
+
+    const NiftiImageData data(useSlope ? this->data() : this->data().unscaled(), datatype);
+    return replaceData(data);
+}
+
+inline NiftiImage & NiftiImage::changeDatatype (const std::string &datatype, const bool useSlope)
+{
+    return changeDatatype(internal::stringToDatatype(datatype), useSlope);
+}
+
+template <typename SourceType>
+inline NiftiImage & NiftiImage::replaceData (const std::vector<SourceType> &data, const int datatype)
+{
+    replaceData(NiftiImageData(data.begin(), data.end(), datatype));
+    return *this;
+}
+
+inline NiftiImage & NiftiImage::replaceData (const NiftiImageData &data)
+{
+    if (this->isNull())
+        return *this;
+    else if (data.isEmpty())
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        nifti_image_unload(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        nifti2_image_unload(image);
+#endif
+        return *this;
+    }
+    else if (data.length() != size_t(image->nvox))
+        throw std::runtime_error("New data length does not match the number of voxels in the image");
+
+    // Copy the data
+    NiftiImageData copy = data;
+#if RNIFTI_NIFTILIB_VERSION == 1
+    nifti_image_unload(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    nifti2_image_unload(image);
+#endif
+    image->data = copy.blob();
+    image->datatype = copy.datatype();
+    image->scl_slope = static_cast<scale_t>(copy.slope);
+    image->scl_inter = static_cast<scale_t>(copy.intercept);
+    nifti_datatype_sizes(image->datatype, &image->nbyper, &image->swapsize);
+
+    double min, max;
+    copy.minmax(&min, &max);
+    image->cal_min = static_cast<scale_t>(min);
+    image->cal_max = static_cast<scale_t>(max);
+
+    copy.disown();
+
+    return *this;
+}
+
+inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const
+{
+    const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype);
+
+    // Copy the source image only if the datatype will be changed
+    NiftiImage imageToWrite(*this, changingDatatype);
+
+    if (changingDatatype)
+        imageToWrite.changeDatatype(datatype, true);
+    if (filetype >= 0 && filetype <= NIFTI_MAX_FTYPE)
+        imageToWrite->nifti_type = filetype;
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+    const int status = nifti_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true);
+    if (status != 0)
+        throw std::runtime_error("Failed to set filenames for NIfTI object");
+    nifti_image_write(imageToWrite);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+    const int status = nifti2_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true);
+    if (status != 0)
+        throw std::runtime_error("Failed to set filenames for NIfTI object");
+    nifti2_image_write(imageToWrite);
+#endif
+
+    return std::pair<std::string,std::string>(std::string(imageToWrite->fname), std::string(imageToWrite->iname));
+}
+
+inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string fileName, const std::string &datatype, const int filetype) const
+{
+    return toFile(fileName, internal::stringToDatatype(datatype), filetype);
+}
+
+#ifdef USING_R
+
+inline Rcpp::RObject NiftiImage::toArray () const
+{
+    Rcpp::RObject array;
+
+    if (this->isNull())
+        return array;
+    else
+    {
+        NiftiImageData data = this->data();
+        if (data.isEmpty())
+        {
+            Rf_warning("Internal image contains no data - filling array with NAs");
+            array = Rcpp::LogicalVector(image->nvox, NA_LOGICAL);
+        }
+        else if (data.isComplex())
+            array = Rcpp::ComplexVector(data.begin(), data.end());
+        else if (data.isFloatingPoint() || data.isScaled())
+            array = Rcpp::NumericVector(data.begin(), data.end());
+        else
+            array = Rcpp::IntegerVector(data.begin(), data.end());
+
+        internal::addAttributes(array, *this, true, true, false);
+        if (data.isRgb())
+        {
+            array.attr("class") = Rcpp::CharacterVector::create("niftiImage", "rgbArray", "array");
+            array.attr("channels") = (data.datatype() == DT_RGBA32 ? 4 : 3);
+        }
+        else
+            array.attr("class") = Rcpp::CharacterVector::create("niftiImage", "array");
+        return array;
+    }
+}
+
+inline Rcpp::RObject NiftiImage::toPointer (const std::string label) const
+{
+    if (this->isNull())
+        return Rcpp::RObject();
+    else
+    {
+        Rcpp::RObject string = Rcpp::wrap(label);
+        internal::addAttributes(string, *this, false);
+        string.attr("class") = Rcpp::CharacterVector::create("internalImage", "niftiImage");
+        return string;
+    }
+}
+
+inline Rcpp::RObject NiftiImage::toArrayOrPointer (const bool internal, const std::string label) const
+{
+    return (internal ? toPointer(label) : toArray());
+}
+
+#endif // USING_R
+
+#endif
diff --git a/reg-io/RNifti/NiftiImage_matrix.h b/reg-io/RNifti/NiftiImage_matrix.h
new file mode 100644
index 00000000..e89695db
--- /dev/null
+++ b/reg-io/RNifti/NiftiImage_matrix.h
@@ -0,0 +1,135 @@
+#ifndef _NIFTI_IMAGE_MATRIX_H_
+#define _NIFTI_IMAGE_MATRIX_H_
+
+template <>
+inline SquareMatrix<mat33,float,3> SquareMatrix<mat33,float,3>::inverse () const
+{
+    return SquareMatrix<mat33,float,3>(nifti_mat33_inverse(*niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<mat33,float,3> SquareMatrix<mat33,float,3>::polar () const
+{
+    return SquareMatrix<mat33,float,3>(nifti_mat33_polar(*niftiPointer()));
+}
+
+template <>
+inline float SquareMatrix<mat33,float,3>::colnorm () const
+{
+    return nifti_mat33_colnorm(*niftiPointer());
+}
+
+template <>
+inline float SquareMatrix<mat33,float,3>::rownorm () const
+{
+    return nifti_mat33_rownorm(*niftiPointer());
+}
+
+template <>
+inline float SquareMatrix<mat33,float,3>::determ () const
+{
+    return nifti_mat33_determ(*niftiPointer());
+}
+
+template <>
+inline SquareMatrix<mat33,float,3> SquareMatrix<mat33,float,3>::multiply (const SquareMatrix<mat33,float,3> &other) const
+{
+    return SquareMatrix<mat33,float,3>(nifti_mat33_mul(*niftiPointer(), *other.niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<mat44,float,4> SquareMatrix<mat44,float,4>::inverse () const
+{
+    return SquareMatrix<mat44,float,4>(nifti_mat44_inverse(*niftiPointer()));
+}
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+
+// NB: niftilib v1 does not define nifti_mat44_mul
+template <>
+inline SquareMatrix<mat44,float,4> SquareMatrix<mat44,float,4>::multiply (const SquareMatrix<mat44,float,4> &other) const
+{
+    SquareMatrix<mat44,float,4> result;
+    for (int i=0; i < 4; i++)
+    {
+        for (int j=0; j < 4; j++)
+        {
+            result(i,j) = 0.0;
+            for (int k=0; k<4; k++)
+                result(i,j) += (*this)(i,k) * other(k,j);
+        }
+    }
+    return result;
+}
+
+#elif RNIFTI_NIFTILIB_VERSION == 2
+
+template <>
+inline SquareMatrix<nifti_dmat33,double,3> SquareMatrix<nifti_dmat33,double,3>::inverse () const
+{
+    return SquareMatrix<nifti_dmat33,double,3>(nifti_dmat33_inverse(*niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<nifti_dmat33,double,3> SquareMatrix<nifti_dmat33,double,3>::polar () const
+{
+    return SquareMatrix<nifti_dmat33,double,3>(nifti_dmat33_polar(*niftiPointer()));
+}
+
+template <>
+inline double SquareMatrix<nifti_dmat33,double,3>::colnorm () const
+{
+    return nifti_dmat33_colnorm(*niftiPointer());
+}
+
+template <>
+inline double SquareMatrix<nifti_dmat33,double,3>::rownorm () const
+{
+    return nifti_dmat33_rownorm(*niftiPointer());
+}
+
+template <>
+inline double SquareMatrix<nifti_dmat33,double,3>::determ () const
+{
+    return nifti_dmat33_determ(*niftiPointer());
+}
+
+template <>
+inline SquareMatrix<nifti_dmat33,double,3> SquareMatrix<nifti_dmat33,double,3>::multiply (const SquareMatrix<nifti_dmat33,double,3> &other) const
+{
+    return SquareMatrix<nifti_dmat33,double,3>(nifti_dmat33_mul(*niftiPointer(), *other.niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<mat44,float,4> SquareMatrix<mat44,float,4>::multiply (const SquareMatrix<mat44,float,4> &other) const
+{
+    return SquareMatrix<mat44,float,4>(nifti_mat44_mul(*niftiPointer(), *other.niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<nifti_dmat44,double,4> SquareMatrix<nifti_dmat44,double,4>::inverse () const
+{
+    return SquareMatrix<nifti_dmat44,double,4>(nifti_dmat44_inverse(*niftiPointer()));
+}
+
+template <>
+inline SquareMatrix<nifti_dmat44,double,4> SquareMatrix<nifti_dmat44,double,4>::multiply (const SquareMatrix<nifti_dmat44,double,4> &other) const
+{
+    return SquareMatrix<nifti_dmat44,double,4>(nifti_dmat44_mul(*niftiPointer(), *other.niftiPointer()));
+}
+
+#endif
+
+template <class NiftiType, typename ElementType, int Order>
+inline Vector<ElementType,Order> SquareMatrix<NiftiType,ElementType,Order>::multiply (const Vector<ElementType,Order> &vec) const
+{
+    Vector<ElementType,Order> result;
+    for (int i=0; i<Order; i++)
+    {
+        for (int j=0; j<Order; j++)
+            result[i] += (*this)(i,j) * vec[j];
+    }
+    return result;
+}
+
+#endif
diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h
new file mode 100644
index 00000000..92689ba2
--- /dev/null
+++ b/reg-io/RNifti/NiftiImage_print.h
@@ -0,0 +1,36 @@
+#ifndef _NIFTI_IMAGE_PRINT_H_
+#define _NIFTI_IMAGE_PRINT_H_
+
+#ifdef USING_R
+
+#define R_NO_REMAP
+#define R_USE_C99_IN_CXX
+
+#include <R_ext/Print.h>
+#include <R_ext/Error.h>
+
+#define Rc_printf Rprintf
+#define Rc_fprintf_stdout(...) Rprintf(__VA_ARGS__)
+#define Rc_fprintf_stderr(...) REprintf(__VA_ARGS__)
+#define Rc_fputs_stdout(str) Rprintf(str)
+#define Rc_fputs_stderr(str) REprintf(str)
+#define Rc_fputc_stdout(ch) Rprintf("%c", ch)
+#define Rc_fputc_stderr(ch) REprintf("%c", ch)
+
+#else
+
+#include <stdio.h>
+
+#define Rc_printf printf
+#define Rc_fprintf_stdout(...) fprintf(stdout, __VA_ARGS__)
+#define Rc_fprintf_stderr(...) fprintf(stderr, __VA_ARGS__)
+#define Rc_fputs_stdout(str) fputs(str, stdout)
+#define Rc_fputs_stderr(str) fputs(str, stderr)
+#define Rc_fputc_stdout(ch) fputc(ch, stdout)
+#define Rc_fputc_stderr(ch) fputc(ch, stderr)
+#define Rf_warning(str) fprintf(stderr, "%s\n", str)
+#define Rprintf(...) fprintf(stderr, __VA_ARGS__)
+
+#endif // USING_R
+
+#endif // _PRINT_H_
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index 771e1fc8..0b1b6d98 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include <string>
 
 #include "reg_png.h"
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index 446303c4..ef625c74 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 //STD
 #include <fstream>
 #include <utility>
diff --git a/reg-io/nifti/LICENSE b/reg-io/nifti/LICENSE
deleted file mode 100755
index cd7ce566..00000000
--- a/reg-io/nifti/LICENSE
+++ /dev/null
@@ -1,9 +0,0 @@
-Niftilib has been developed by members of the NIFTI DFWG and volunteers in the
-neuroimaging community and serves as a reference implementation of the nifti-1
-file format.
-
-http://nifti.nimh.nih.gov/
-
-Nifticlib code is released into the public domain, developers are encouraged to
-incorporate niftilib code into their applications, and, to contribute changes
-and enhancements to niftilib.
diff --git a/reg-io/nifti/nifti1.h b/reg-io/nifti/nifti1.h
deleted file mode 100755
index edc21db2..00000000
--- a/reg-io/nifti/nifti1.h
+++ /dev/null
@@ -1,1505 +0,0 @@
-/** \file nifti1.h
-    \brief Official definition of the nifti1 header.  Written by Bob Cox, SSCC, NIMH.
-
-    HISTORY:
-
-        29 Nov 2007 [rickr]
-           - added DT_RGBA32 and NIFTI_TYPE_RGBA32
-           - added NIFTI_INTENT codes:
-                TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE
- */
-
-#pragma once
-
-/*****************************************************************************
-      ** This file defines the "NIFTI-1" header format.               **
-      ** It is derived from 2 meetings at the NIH (31 Mar 2003 and    **
-      ** 02 Sep 2003) of the Data Format Working Group (DFWG),        **
-      ** chartered by the NIfTI (Neuroimaging Informatics Technology  **
-      ** Initiative) at the National Institutes of Health (NIH).      **
-      **--------------------------------------------------------------**
-      ** Neither the National Institutes of Health (NIH), the DFWG,   **
-      ** nor any of the members or employees of these institutions    **
-      ** imply any warranty of usefulness of this material for any    **
-      ** purpose, and do not assume any liability for damages,        **
-      ** incidental or otherwise, caused by any use of this document. **
-      ** If these conditions are not acceptable, do not use this!     **
-      **--------------------------------------------------------------**
-      ** Author:   Robert W Cox (NIMH, Bethesda)                      **
-      ** Advisors: John Ashburner (FIL, London),                      **
-      **           Stephen Smith (FMRIB, Oxford),                     **
-      **           Mark Jenkinson (FMRIB, Oxford)                     **
-******************************************************************************/
-
-/*---------------------------------------------------------------------------*/
-/* Note that the ANALYZE 7.5 file header (dbh.h) is
-         (c) Copyright 1986-1995
-         Biomedical Imaging Resource
-         Mayo Foundation
-   Incorporation of components of dbh.h are by permission of the
-   Mayo Foundation.
-
-   Changes from the ANALYZE 7.5 file header in this file are released to the
-   public domain, including the functional comments and any amusing asides.
------------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------*/
-/*! INTRODUCTION TO NIFTI-1:
-   ------------------------
-   The twin (and somewhat conflicting) goals of this modified ANALYZE 7.5
-   format are:
-    (a) To add information to the header that will be useful for functional
-        neuroimaging data analysis and display.  These additions include:
-        - More basic data types.
-        - Two affine transformations to specify voxel coordinates.
-        - "Intent" codes and parameters to describe the meaning of the data.
-        - Affine scaling of the stored data values to their "true" values.
-        - Optional storage of the header and image data in one file (.nii).
-    (b) To maintain compatibility with non-NIFTI-aware ANALYZE 7.5 compatible
-        software (i.e., such a program should be able to do something useful
-        with a NIFTI-1 dataset -- at least, with one stored in a traditional
-        .img/.hdr file pair).
-
-   Most of the unused fields in the ANALYZE 7.5 header have been taken,
-   and some of the lesser-used fields have been co-opted for other purposes.
-   Notably, most of the data_history substructure has been co-opted for
-   other purposes, since the ANALYZE 7.5 format describes this substructure
-   as "not required".
-
-   NIFTI-1 FLAG (MAGIC STRINGS):
-   ----------------------------
-   To flag such a struct as being conformant to the NIFTI-1 spec, the last 4
-   bytes of the header must be either the C String "ni1" or "n+1";
-   in hexadecimal, the 4 bytes
-     6E 69 31 00   or   6E 2B 31 00
-   (in any future version of this format, the '1' will be upgraded to '2',
-   etc.).  Normally, such a "magic number" or flag goes at the start of the
-   file, but trying to avoid clobbering widely-used ANALYZE 7.5 fields led to
-   putting this marker last.  However, recall that "the last shall be first"
-   (Matthew 20:16).
-
-   If a NIFTI-aware program reads a header file that is NOT marked with a
-   NIFTI magic string, then it should treat the header as an ANALYZE 7.5
-   structure.
-
-   NIFTI-1 FILE STORAGE:
-   --------------------
-   "ni1" means that the image data is stored in the ".img" file corresponding
-   to the header file (starting at file offset 0).
-
-   "n+1" means that the image data is stored in the same file as the header
-   information.  We recommend that the combined header+data filename suffix
-   be ".nii".  When the dataset is stored in one file, the first byte of image
-   data is stored at byte location (int)vox_offset in this combined file.
-   The minimum allowed value of vox_offset is 352; for compatibility with
-   some software, vox_offset should be an integral multiple of 16.
-
-   GRACE UNDER FIRE:
-   ----------------
-   Most NIFTI-aware programs will only be able to handle a subset of the full
-   range of datasets possible with this format.  All NIFTI-aware programs
-   should take care to check if an input dataset conforms to the program's
-   needs and expectations (e.g., check datatype, intent_code, etc.).  If the
-   input dataset can't be handled by the program, the program should fail
-   gracefully (e.g., print a useful warning; not crash).
-
-   SAMPLE CODES:
-   ------------
-   The associated files nifti1_io.h and nifti1_io.c provide a sample
-   implementation in C of a set of functions to read, write, and manipulate
-   NIFTI-1 files.  The file nifti1_test.c is a sample program that uses
-   the nifti1_io.c functions.
------------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------*/
-/* HEADER STRUCT DECLARATION:
-   -------------------------
-   In the comments below for each field, only NIFTI-1 specific requirements
-   or changes from the ANALYZE 7.5 format are described.  For convenience,
-   the 348 byte header is described as a single struct, rather than as the
-   ANALYZE 7.5 group of 3 substructs.
-
-   Further comments about the interpretation of various elements of this
-   header are after the data type definition itself.  Fields that are
-   marked as ++UNUSED++ have no particular interpretation in this standard.
-   (Also see the UNUSED FIELDS comment section, far below.)
-
-   The presumption below is that the various C types have particular sizes:
-     sizeof(int) = sizeof(float) = 4 ;  sizeof(short) = 2
------------------------------------------------------------------------------*/
-
-/*=================*/
-#ifdef  __cplusplus
-extern "C" {
-#endif
-   /*=================*/
-
-   /*! \struct nifti_1_header
-       \brief Data structure defining the fields in the nifti1 header.
-              This binary header should be found at the beginning of a valid
-              NIFTI-1 header file.
-    */
-   /*************************/  /************************/
-   struct nifti_1_header
-   {
-      /* NIFTI-1 usage         */  /* ANALYZE 7.5 field(s) */
-      /*************************/  /************************/
-
-      /*--- was header_key substruct ---*/
-      int   sizeof_hdr;    /*!< MUST be 348           */  /* int sizeof_hdr;      */
-      char  data_type[10]; /*!< ++UNUSED++            */  /* char data_type[10];  */
-      char  db_name[18];   /*!< ++UNUSED++            */  /* char db_name[18];    */
-      int   extents;       /*!< ++UNUSED++            */  /* int extents;         */
-      short session_error; /*!< ++UNUSED++            */  /* short session_error; */
-      char  regular;       /*!< ++UNUSED++            */  /* char regular;        */
-      char  dim_info;      /*!< MRI slice ordering.   */  /* char hkey_un0;       */
-
-      /*--- was image_dimension substruct ---*/
-      short dim[8];        /*!< Data array dimensions.*/  /* short dim[8];        */
-      float intent_p1 ;    /*!< 1st intent parameter. */  /* short unused8;       */
-      /* short unused9;       */
-      float intent_p2 ;    /*!< 2nd intent parameter. */  /* short unused10;      */
-      /* short unused11;      */
-      float intent_p3 ;    /*!< 3rd intent parameter. */  /* short unused12;      */
-      /* short unused13;      */
-      short intent_code ;  /*!< NIFTI_INTENT_* code.  */  /* short unused14;      */
-      short datatype;      /*!< Defines data type!    */  /* short datatype;      */
-      short bitpix;        /*!< Number bits/voxel.    */  /* short bitpix;        */
-      short slice_start;   /*!< First slice index.    */  /* short dim_un0;       */
-      float pixdim[8];     /*!< Grid spacings.        */  /* float pixdim[8];     */
-      float vox_offset;    /*!< Offset into .nii file */  /* float vox_offset;    */
-      float scl_slope ;    /*!< Data scaling: slope.  */  /* float funused1;      */
-      float scl_inter ;    /*!< Data scaling: offset. */  /* float funused2;      */
-      short slice_end;     /*!< Last slice index.     */  /* float funused3;      */
-      char  slice_code ;   /*!< Slice timing order.   */
-      char  xyzt_units ;   /*!< Units of pixdim[1..4] */
-      float cal_max;       /*!< Max display intensity */  /* float cal_max;       */
-      float cal_min;       /*!< Min display intensity */  /* float cal_min;       */
-      float slice_duration;/*!< Time for 1 slice.     */  /* float compressed;    */
-      float toffset;       /*!< Time axis shift.      */  /* float verified;      */
-      int   glmax;         /*!< ++UNUSED++            */  /* int glmax;           */
-      int   glmin;         /*!< ++UNUSED++            */  /* int glmin;           */
-
-      /*--- was data_history substruct ---*/
-      char  descrip[80];   /*!< any text you like.    */  /* char descrip[80];    */
-      char  aux_file[24];  /*!< auxiliary filename.   */  /* char aux_file[24];   */
-
-      short qform_code ;   /*!< NIFTI_XFORM_* code.   */  /*-- all ANALYZE 7.5 ---*/
-      short sform_code ;   /*!< NIFTI_XFORM_* code.   */  /*   fields below here  */
-      /*   are replaced       */
-      float quatern_b ;    /*!< Quaternion b param.   */
-      float quatern_c ;    /*!< Quaternion c param.   */
-      float quatern_d ;    /*!< Quaternion d param.   */
-      float qoffset_x ;    /*!< Quaternion x shift.   */
-      float qoffset_y ;    /*!< Quaternion y shift.   */
-      float qoffset_z ;    /*!< Quaternion z shift.   */
-
-      float srow_x[4] ;    /*!< 1st row affine transform.   */
-      float srow_y[4] ;    /*!< 2nd row affine transform.   */
-      float srow_z[4] ;    /*!< 3rd row affine transform.   */
-
-      char intent_name[16];/*!< 'name' or meaning of data.  */
-
-      char magic[4] ;      /*!< MUST be "ni1\0" or "n+1\0". */
-
-   } ;                   /**** 348 bytes total ****/
-
-   typedef struct nifti_1_header nifti_1_header ;
-
-   /*---------------------------------------------------------------------------*/
-   /* HEADER EXTENSIONS:
-      -----------------
-      After the end of the 348 byte header (e.g., after the magic field),
-      the next 4 bytes are a char array field named "extension". By default,
-      all 4 bytes of this array should be set to zero. In a .nii file, these
-      4 bytes will always be present, since the earliest start point for
-      the image data is byte #352. In a separate .hdr file, these bytes may
-      or may not be present. If not present (i.e., if the length of the .hdr
-      file is 348 bytes), then a NIfTI-1 compliant program should use the
-      default value of extension={0,0,0,0}. The first byte (extension[0])
-      is the only value of this array that is specified at present. The other
-      3 bytes are reserved for future use.
-
-      If extension[0] is nonzero, it indicates that extended header information
-      is present in the bytes following the extension array. In a .nii file,
-      this extended header data is before the image data (and vox_offset
-      must be set correctly to allow for this). In a .hdr file, this extended
-      data follows extension and proceeds (potentially) to the end of the file.
-
-      The format of extended header data is weakly specified. Each extension
-      must be an integer multiple of 16 bytes long. The first 8 bytes of each
-      extension comprise 2 integers:
-         int esize , ecode ;
-      These values may need to be byte-swapped, as indicated by dim[0] for
-      the rest of the header.
-        * esize is the number of bytes that form the extended header data
-          + esize must be a positive integral multiple of 16
-          + this length includes the 8 bytes of esize and ecode themselves
-        * ecode is a non-negative integer that indicates the format of the
-          extended header data that follows
-          + different ecode values are assigned to different developer groups
-          + at present, the "registered" values for code are
-            = 0 = unknown private format (not recommended!)
-            = 2 = DICOM format (i.e., attribute tags and values)
-            = 4 = AFNI group (i.e., ASCII XML-ish elements)
-      In the interests of interoperability (a primary rationale for NIfTI),
-      groups developing software that uses this extension mechanism are
-      encouraged to document and publicize the format of their extensions.
-      To this end, the NIfTI DFWG will assign even numbered codes upon request
-      to groups submitting at least rudimentary documentation for the format
-      of their extension; at present, the contact is mailto:rwcox@nih.gov.
-      The assigned codes and documentation will be posted on the NIfTI
-      website. All odd values of ecode (and 0) will remain unassigned;
-      at least, until the even ones are used up, when we get to 2,147,483,646.
-
-      Note that the other contents of the extended header data section are
-      totally unspecified by the NIfTI-1 standard. In particular, if binary
-      data is stored in such a section, its byte order is not necessarily
-      the same as that given by examining dim[0]; it is incumbent on the
-      programs dealing with such data to determine the byte order of binary
-      extended header data.
-
-      Multiple extended header sections are allowed, each starting with an
-      esize,ecode value pair. The first esize value, as described above,
-      is at bytes #352-355 in the .hdr or .nii file (files start at byte #0).
-      If this value is positive, then the second (esize2) will be found
-      starting at byte #352+esize1 , the third (esize3) at byte #352+esize1+esize2,
-      et cetera.  Of course, in a .nii file, the value of vox_offset must
-      be compatible with these extensions. If a malformed file indicates
-      that an extended header data section would run past vox_offset, then
-      the entire extended header section should be ignored. In a .hdr file,
-      if an extended header data section would run past the end-of-file,
-      that extended header data should also be ignored.
-
-      With the above scheme, a program can successively examine the esize
-      and ecode values, and skip over each extended header section if the
-      program doesn't know how to interpret the data within. Of course, any
-      program can simply ignore all extended header sections simply by jumping
-      straight to the image data using vox_offset.
-   -----------------------------------------------------------------------------*/
-
-   /*! \struct nifti1_extender
-       \brief This structure represents a 4-byte string that should follow the
-              binary nifti_1_header data in a NIFTI-1 header file.  If the char
-              values are {1,0,0,0}, the file is expected to contain extensions,
-              values of {0,0,0,0} imply the file does not contain extensions.
-              Other sequences of values are not currently defined.
-    */
-   struct nifti1_extender
-   {
-      char extension[4] ;
-   } ;
-   typedef struct nifti1_extender nifti1_extender ;
-
-   /*! \struct nifti1_extension
-       \brief Data structure defining the fields of a header extension.
-    */
-   struct nifti1_extension
-   {
-      int    esize ; /*!< size of extension, in bytes (must be multiple of 16) */
-      int    ecode ; /*!< extension code, one of the NIFTI_ECODE_ values       */
-      char * edata ; /*!< raw data, with no byte swapping (length is esize-8)  */
-   } ;
-   typedef struct nifti1_extension nifti1_extension ;
-
-   /*---------------------------------------------------------------------------*/
-   /* DATA DIMENSIONALITY (as in ANALYZE 7.5):
-      ---------------------------------------
-        dim[0] = number of dimensions;
-                 - if dim[0] is outside range 1..7, then the header information
-                   needs to be byte swapped appropriately
-                 - ANALYZE supports dim[0] up to 7, but NIFTI-1 reserves
-                   dimensions 1,2,3 for space (x,y,z), 4 for time (t), and
-                   5,6,7 for anything else needed.
-
-        dim[i] = length of dimension #i, for i=1..dim[0]  (must be positive)
-                 - also see the discussion of intent_code, far below
-
-        pixdim[i] = voxel width along dimension #i, i=1..dim[0] (positive)
-                    - cf. ORIENTATION section below for use of pixdim[0]
-                    - the units of pixdim can be specified with the xyzt_units
-                      field (also described far below).
-
-      Number of bits per voxel value is in bitpix, which MUST correspond with
-      the datatype field.  The total number of bytes in the image data is
-        dim[1] * ... * dim[dim[0]] * bitpix / 8
-
-      In NIFTI-1 files, dimensions 1,2,3 are for space, dimension 4 is for time,
-      and dimension 5 is for storing multiple values at each spatiotemporal
-      voxel.  Some examples:
-        - A typical whole-brain FMRI experiment's time series:
-           - dim[0] = 4
-           - dim[1] = 64   pixdim[1] = 3.75 xyzt_units =  NIFTI_UNITS_MM
-           - dim[2] = 64   pixdim[2] = 3.75             | NIFTI_UNITS_SEC
-           - dim[3] = 20   pixdim[3] = 5.0
-           - dim[4] = 120  pixdim[4] = 2.0
-        - A typical T1-weighted anatomical volume:
-           - dim[0] = 3
-           - dim[1] = 256  pixdim[1] = 1.0  xyzt_units = NIFTI_UNITS_MM
-           - dim[2] = 256  pixdim[2] = 1.0
-           - dim[3] = 128  pixdim[3] = 1.1
-        - A single slice EPI time series:
-           - dim[0] = 4
-           - dim[1] = 64   pixdim[1] = 3.75 xyzt_units =  NIFTI_UNITS_MM
-           - dim[2] = 64   pixdim[2] = 3.75             | NIFTI_UNITS_SEC
-           - dim[3] = 1    pixdim[3] = 5.0
-           - dim[4] = 1200 pixdim[4] = 0.2
-        - A 3-vector stored at each point in a 3D volume:
-           - dim[0] = 5
-           - dim[1] = 256  pixdim[1] = 1.0  xyzt_units = NIFTI_UNITS_MM
-           - dim[2] = 256  pixdim[2] = 1.0
-           - dim[3] = 128  pixdim[3] = 1.1
-           - dim[4] = 1    pixdim[4] = 0.0
-           - dim[5] = 3                     intent_code = NIFTI_INTENT_VECTOR
-        - A single time series with a 3x3 matrix at each point:
-           - dim[0] = 5
-           - dim[1] = 1                     xyzt_units = NIFTI_UNITS_SEC
-           - dim[2] = 1
-           - dim[3] = 1
-           - dim[4] = 1200 pixdim[4] = 0.2
-           - dim[5] = 9                     intent_code = NIFTI_INTENT_GENMATRIX
-           - intent_p1 = intent_p2 = 3.0    (indicates matrix dimensions)
-   -----------------------------------------------------------------------------*/
-
-   /*---------------------------------------------------------------------------*/
-   /* DATA STORAGE:
-      ------------
-      If the magic field is "n+1", then the voxel data is stored in the
-      same file as the header.  In this case, the voxel data starts at offset
-      (int)vox_offset into the header file.  Thus, vox_offset=352.0 means that
-      the data starts immediately after the NIFTI-1 header.  If vox_offset is
-      greater than 352, the NIFTI-1 format does not say much about the
-      contents of the dataset file between the end of the header and the
-      start of the data.
-
-      FILES:
-      -----
-      If the magic field is "ni1", then the voxel data is stored in the
-      associated ".img" file, starting at offset 0 (i.e., vox_offset is not
-      used in this case, and should be set to 0.0).
-
-      When storing NIFTI-1 datasets in pairs of files, it is customary to name
-      the files in the pattern "name.hdr" and "name.img", as in ANALYZE 7.5.
-      When storing in a single file ("n+1"), the file name should be in
-      the form "name.nii" (the ".nft" and ".nif" suffixes are already taken;
-      cf. http://www.icdatamaster.com/n.html ).
-
-      BYTE ORDERING:
-      -------------
-      The byte order of the data arrays is presumed to be the same as the byte
-      order of the header (which is determined by examining dim[0]).
-
-      Floating point types are presumed to be stored in IEEE-754 format.
-   -----------------------------------------------------------------------------*/
-
-   /*---------------------------------------------------------------------------*/
-   /* DETAILS ABOUT vox_offset:
-      ------------------------
-      In a .nii file, the vox_offset field value is interpreted as the start
-      location of the image data bytes in that file. In a .hdr/.img file pair,
-      the vox_offset field value is the start location of the image data
-      bytes in the .img file.
-       * If vox_offset is less than 352 in a .nii file, it is equivalent
-         to 352 (i.e., image data never starts before byte #352 in a .nii file).
-       * The default value for vox_offset in a .nii file is 352.
-       * In a .hdr file, the default value for vox_offset is 0.
-       * vox_offset should be an integer multiple of 16; otherwise, some
-         programs may not work properly (e.g., SPM). This is to allow
-         memory-mapped input to be properly byte-aligned.
-      Note that since vox_offset is an IEEE-754 32 bit float (for compatibility
-      with the ANALYZE-7.5 format), it effectively has a 24 bit mantissa. All
-      integers from 0 to 2^24 can be represented exactly in this format, but not
-      all larger integers are exactly storable as IEEE-754 32 bit floats. However,
-      unless you plan to have vox_offset be potentially larger than 16 MB, this
-      should not be an issue. (Actually, any integral multiple of 16 up to 2^27
-      can be represented exactly in this format, which allows for up to 128 MB
-      of random information before the image data.  If that isn't enough, then
-      perhaps this format isn't right for you.)
-
-      In a .img file (i.e., image data stored separately from the NIfTI-1
-      header), data bytes between #0 and #vox_offset-1 (inclusive) are completely
-      undefined and unregulated by the NIfTI-1 standard. One potential use of
-      having vox_offset > 0 in the .hdr/.img file pair storage method is to make
-      the .img file be a copy of (or link to) a pre-existing image file in some
-      other format, such as DICOM; then vox_offset would be set to the offset of
-      the image data in this file. (It may not be possible to follow the
-      "multiple-of-16 rule" with an arbitrary external file; using the NIfTI-1
-      format in such a case may lead to a file that is incompatible with software
-      that relies on vox_offset being a multiple of 16.)
-
-      In a .nii file, data bytes between #348 and #vox_offset-1 (inclusive) may
-      be used to store user-defined extra information; similarly, in a .hdr file,
-      any data bytes after byte #347 are available for user-defined extra
-      information. The (very weak) regulation of this extra header data is
-      described elsewhere.
-   -----------------------------------------------------------------------------*/
-
-   /*---------------------------------------------------------------------------*/
-   /* DATA SCALING:
-      ------------
-      If the scl_slope field is nonzero, then each voxel value in the dataset
-      should be scaled as
-         y = scl_slope * x + scl_inter
-      where x = voxel value stored
-            y = "true" voxel value
-      Normally, we would expect this scaling to be used to store "true" floating
-      values in a smaller integer datatype, but that is not required.  That is,
-      it is legal to use scaling even if the datatype is a float type (crazy,
-      perhaps, but legal).
-       - However, the scaling is to be ignored if datatype is DT_RGB24.
-       - If datatype is a complex type, then the scaling is to be
-         applied to both the real and imaginary parts.
-
-      The cal_min and cal_max fields (if nonzero) are used for mapping (possibly
-      scaled) dataset values to display colors:
-       - Minimum display intensity (black) corresponds to dataset value cal_min.
-       - Maximum display intensity (white) corresponds to dataset value cal_max.
-       - Dataset values below cal_min should display as black also, and values
-         above cal_max as white.
-       - Colors "black" and "white", of course, may refer to any scalar display
-         scheme (e.g., a color lookup table specified via aux_file).
-       - cal_min and cal_max only make sense when applied to scalar-valued
-         datasets (i.e., dim[0] < 5 or dim[5] = 1).
-   -----------------------------------------------------------------------------*/
-
-   /*---------------------------------------------------------------------------*/
-   /* TYPE OF DATA (acceptable values for datatype field):
-      ---------------------------------------------------
-      Values of datatype smaller than 256 are ANALYZE 7.5 compatible.
-      Larger values are NIFTI-1 additions.  These are all multiples of 256, so
-      that no bits below position 8 are set in datatype.  But there is no need
-      to use only powers-of-2, as the original ANALYZE 7.5 datatype codes do.
-
-      The additional codes are intended to include a complete list of basic
-      scalar types, including signed and unsigned integers from 8 to 64 bits,
-      floats from 32 to 128 bits, and complex (float pairs) from 64 to 256 bits.
-
-      Note that most programs will support only a few of these datatypes!
-      A NIFTI-1 program should fail gracefully (e.g., print a warning message)
-      when it encounters a dataset with a type it doesn't like.
-   -----------------------------------------------------------------------------*/
-
-#undef DT_UNKNOWN  /* defined in dirent.h on some Unix systems */
-
-   /*! \defgroup NIFTI1_DATATYPES
-       \brief nifti1 datatype codes
-       @{
-    */
-   /*--- the original ANALYZE 7.5 type codes ---*/
-#define DT_NONE                    0
-#define DT_UNKNOWN                 0     /* what it says, dude           */
-#define DT_BINARY                  1     /* binary (1 bit/voxel)         */
-#define DT_UNSIGNED_CHAR           2     /* unsigned char (8 bits/voxel) */
-#define DT_SIGNED_SHORT            4     /* signed short (16 bits/voxel) */
-#define DT_SIGNED_INT              8     /* signed int (32 bits/voxel)   */
-#define DT_FLOAT                  16     /* float (32 bits/voxel)        */
-#define DT_COMPLEX                32     /* complex (64 bits/voxel)      */
-#define DT_DOUBLE                 64     /* double (64 bits/voxel)       */
-#define DT_RGB                   128     /* RGB triple (24 bits/voxel)   */
-#define DT_ALL                   255     /* not very useful (?)          */
-
-   /*----- another set of names for the same ---*/
-#define DT_UINT8                   2
-#define DT_INT16                   4
-#define DT_INT32                   8
-#define DT_FLOAT32                16
-#define DT_COMPLEX64              32
-#define DT_FLOAT64                64
-#define DT_RGB24                 128
-
-   /*------------------- new codes for NIFTI ---*/
-#define DT_INT8                  256     /* signed char (8 bits)         */
-#define DT_UINT16                512     /* unsigned short (16 bits)     */
-#define DT_UINT32                768     /* unsigned int (32 bits)       */
-#define DT_INT64                1024     /* long long (64 bits)          */
-#define DT_UINT64               1280     /* unsigned long long (64 bits) */
-#define DT_FLOAT128             1536     /* long double (128 bits)       */
-#define DT_COMPLEX128           1792     /* double pair (128 bits)       */
-#define DT_COMPLEX256           2048     /* long double pair (256 bits)  */
-#define DT_RGBA32               2304     /* 4 byte RGBA (32 bits/voxel)  */
-   /* @} */
-
-
-   /*------- aliases for all the above codes ---*/
-
-   /*! \defgroup NIFTI1_DATATYPE_ALIASES
-       \brief aliases for the nifti1 datatype codes
-       @{
-    */
-   /*! unsigned char. */
-#define NIFTI_TYPE_UINT8           2
-   /*! signed short. */
-#define NIFTI_TYPE_INT16           4
-   /*! signed int. */
-#define NIFTI_TYPE_INT32           8
-   /*! 32 bit float. */
-#define NIFTI_TYPE_FLOAT32        16
-   /*! 64 bit complex = 2 32 bit floats. */
-#define NIFTI_TYPE_COMPLEX64      32
-   /*! 64 bit float = double. */
-#define NIFTI_TYPE_FLOAT64        64
-   /*! 3 8 bit bytes. */
-#define NIFTI_TYPE_RGB24         128
-   /*! signed char. */
-#define NIFTI_TYPE_INT8          256
-   /*! unsigned short. */
-#define NIFTI_TYPE_UINT16        512
-   /*! unsigned int. */
-#define NIFTI_TYPE_UINT32        768
-   /*! signed long long. */
-#define NIFTI_TYPE_INT64        1024
-   /*! unsigned long long. */
-#define NIFTI_TYPE_UINT64       1280
-   /*! 128 bit float = long double. */
-#define NIFTI_TYPE_FLOAT128     1536
-   /*! 128 bit complex = 2 64 bit floats. */
-#define NIFTI_TYPE_COMPLEX128   1792
-   /*! 256 bit complex = 2 128 bit floats */
-#define NIFTI_TYPE_COMPLEX256   2048
-   /*! 4 8 bit bytes. */
-#define NIFTI_TYPE_RGBA32       2304
-   /* @} */
-
-   /*-------- sample typedefs for complicated types ---*/
-#if 0
-   typedef struct
-   {
-      float       r,i;
-   } complex_float ;
-   typedef struct
-   {
-      double      r,i;
-   } complex_double ;
-   typedef struct
-   {
-      long double r,i;
-   } complex_longdouble ;
-   typedef struct
-   {
-      unsigned char r,g,b;
-   } rgb_byte ;
-#endif
-
-   /*---------------------------------------------------------------------------*/
-   /* INTERPRETATION OF VOXEL DATA:
-      ----------------------------
-      The intent_code field can be used to indicate that the voxel data has
-      some particular meaning.  In particular, a large number of codes is
-      given to indicate that the the voxel data should be interpreted as
-      being drawn from a given probability distribution.
-
-      VECTOR-VALUED DATASETS:
-      ----------------------
-      The 5th dimension of the dataset, if present (i.e., dim[0]=5 and
-      dim[5] > 1), contains multiple values (e.g., a vector) to be stored
-      at each spatiotemporal location.  For example, the header values
-       - dim[0] = 5
-       - dim[1] = 64
-       - dim[2] = 64
-       - dim[3] = 20
-       - dim[4] = 1     (indicates no time axis)
-       - dim[5] = 3
-       - datatype = DT_FLOAT
-       - intent_code = NIFTI_INTENT_VECTOR
-      mean that this dataset should be interpreted as a 3D volume (64x64x20),
-      with a 3-vector of floats defined at each point in the 3D grid.
-
-      A program reading a dataset with a 5th dimension may want to reformat
-      the image data to store each voxels' set of values together in a struct
-      or array.  This programming detail, however, is beyond the scope of the
-      NIFTI-1 file specification!  Uses of dimensions 6 and 7 are also not
-      specified here.
-
-      STATISTICAL PARAMETRIC DATASETS (i.e., SPMs):
-      --------------------------------------------
-      Values of intent_code from NIFTI_FIRST_STATCODE to NIFTI_LAST_STATCODE
-      (inclusive) indicate that the numbers in the dataset should be interpreted
-      as being drawn from a given distribution.  Most such distributions have
-      auxiliary parameters (e.g., NIFTI_INTENT_TTEST has 1 DOF parameter).
-
-      If the dataset DOES NOT have a 5th dimension, then the auxiliary parameters
-      are the same for each voxel, and are given in header fields intent_p1,
-      intent_p2, and intent_p3.
-
-      If the dataset DOES have a 5th dimension, then the auxiliary parameters
-      are different for each voxel.  For example, the header values
-       - dim[0] = 5
-       - dim[1] = 128
-       - dim[2] = 128
-       - dim[3] = 1      (indicates a single slice)
-       - dim[4] = 1      (indicates no time axis)
-       - dim[5] = 2
-       - datatype = DT_FLOAT
-       - intent_code = NIFTI_INTENT_TTEST
-      mean that this is a 2D dataset (128x128) of t-statistics, with the
-      t-statistic being in the first "plane" of data and the degrees-of-freedom
-      parameter being in the second "plane" of data.
-
-      If the dataset 5th dimension is used to store the voxel-wise statistical
-      parameters, then dim[5] must be 1 plus the number of parameters required
-      by that distribution (e.g., intent_code=NIFTI_INTENT_TTEST implies dim[5]
-      must be 2, as in the example just above).
-
-      Note: intent_code values 2..10 are compatible with AFNI 1.5x (which is
-      why there is no code with value=1, which is obsolescent in AFNI).
-
-      OTHER INTENTIONS:
-      ----------------
-      The purpose of the intent_* fields is to help interpret the values
-      stored in the dataset.  Some non-statistical values for intent_code
-      and conventions are provided for storing other complex data types.
-
-      The intent_name field provides space for a 15 character (plus 0 byte)
-      'name' string for the type of data stored. Examples:
-       - intent_code = NIFTI_INTENT_ESTIMATE; intent_name = "T1";
-          could be used to signify that the voxel values are estimates of the
-          NMR parameter T1.
-       - intent_code = NIFTI_INTENT_TTEST; intent_name = "House";
-          could be used to signify that the voxel values are t-statistics
-          for the significance of 'activation' response to a House stimulus.
-       - intent_code = NIFTI_INTENT_DISPVECT; intent_name = "ToMNI152";
-          could be used to signify that the voxel values are a displacement
-          vector that transforms each voxel (x,y,z) location to the
-          corresponding location in the MNI152 standard brain.
-       - intent_code = NIFTI_INTENT_SYMMATRIX; intent_name = "DTI";
-          could be used to signify that the voxel values comprise a diffusion
-          tensor image.
-
-      If no data name is implied or needed, intent_name[0] should be set to 0.
-   -----------------------------------------------------------------------------*/
-
-   /*! default: no intention is indicated in the header. */
-
-#define NIFTI_INTENT_NONE        0
-
-   /*-------- These codes are for probability distributions ---------------*/
-   /* Most distributions have a number of parameters,
-      below denoted by p1, p2, and p3, and stored in
-       - intent_p1, intent_p2, intent_p3 if dataset doesn't have 5th dimension
-       - image data array                if dataset does have 5th dimension
-
-      Functions to compute with many of the distributions below can be found
-      in the CDF library from U Texas.
-
-      Formulas for and discussions of these distributions can be found in the
-      following books:
-
-       [U] Univariate Discrete Distributions,
-           NL Johnson, S Kotz, AW Kemp.
-
-       [C1] Continuous Univariate Distributions, vol. 1,
-            NL Johnson, S Kotz, N Balakrishnan.
-
-       [C2] Continuous Univariate Distributions, vol. 2,
-            NL Johnson, S Kotz, N Balakrishnan.                            */
-   /*----------------------------------------------------------------------*/
-
-   /*! [C2, chap 32] Correlation coefficient R (1 param):
-        p1 = degrees of freedom
-        R/sqrt(1-R*R) is t-distributed with p1 DOF. */
-
-   /*! \defgroup NIFTI1_INTENT_CODES
-       \brief nifti1 intent codes, to describe intended meaning of dataset contents
-       @{
-    */
-#define NIFTI_INTENT_CORREL      2
-
-   /*! [C2, chap 28] Student t statistic (1 param): p1 = DOF. */
-
-#define NIFTI_INTENT_TTEST       3
-
-   /*! [C2, chap 27] Fisher F statistic (2 params):
-        p1 = numerator DOF, p2 = denominator DOF. */
-
-#define NIFTI_INTENT_FTEST       4
-
-   /*! [C1, chap 13] Standard normal (0 params): Density = N(0,1). */
-
-#define NIFTI_INTENT_ZSCORE      5
-
-   /*! [C1, chap 18] Chi-squared (1 param): p1 = DOF.
-       Density(x) proportional to exp(-x/2) * x^(p1/2-1). */
-
-#define NIFTI_INTENT_CHISQ       6
-
-   /*! [C2, chap 25] Beta distribution (2 params): p1=a, p2=b.
-       Density(x) proportional to x^(a-1) * (1-x)^(b-1). */
-
-#define NIFTI_INTENT_BETA        7
-
-   /*! [U, chap 3] Binomial distribution (2 params):
-        p1 = number of trials, p2 = probability per trial.
-       Prob(x) = (p1 choose x) * p2^x * (1-p2)^(p1-x), for x=0,1,...,p1. */
-
-#define NIFTI_INTENT_BINOM       8
-
-   /*! [C1, chap 17] Gamma distribution (2 params):
-        p1 = shape, p2 = scale.
-       Density(x) proportional to x^(p1-1) * exp(-p2*x). */
-
-#define NIFTI_INTENT_GAMMA       9
-
-   /*! [U, chap 4] Poisson distribution (1 param): p1 = mean.
-       Prob(x) = exp(-p1) * p1^x / x! , for x=0,1,2,.... */
-
-#define NIFTI_INTENT_POISSON    10
-
-   /*! [C1, chap 13] Normal distribution (2 params):
-        p1 = mean, p2 = standard deviation. */
-
-#define NIFTI_INTENT_NORMAL     11
-
-   /*! [C2, chap 30] Noncentral F statistic (3 params):
-        p1 = numerator DOF, p2 = denominator DOF,
-        p3 = numerator noncentrality parameter.  */
-
-#define NIFTI_INTENT_FTEST_NONC 12
-
-   /*! [C2, chap 29] Noncentral chi-squared statistic (2 params):
-        p1 = DOF, p2 = noncentrality parameter.     */
-
-#define NIFTI_INTENT_CHISQ_NONC 13
-
-   /*! [C2, chap 23] Logistic distribution (2 params):
-        p1 = location, p2 = scale.
-       Density(x) proportional to sech^2((x-p1)/(2*p2)). */
-
-#define NIFTI_INTENT_LOGISTIC   14
-
-   /*! [C2, chap 24] Laplace distribution (2 params):
-        p1 = location, p2 = scale.
-       Density(x) proportional to exp(-abs(x-p1)/p2). */
-
-#define NIFTI_INTENT_LAPLACE    15
-
-   /*! [C2, chap 26] Uniform distribution: p1 = lower end, p2 = upper end. */
-
-#define NIFTI_INTENT_UNIFORM    16
-
-   /*! [C2, chap 31] Noncentral t statistic (2 params):
-        p1 = DOF, p2 = noncentrality parameter. */
-
-#define NIFTI_INTENT_TTEST_NONC 17
-
-   /*! [C1, chap 21] Weibull distribution (3 params):
-        p1 = location, p2 = scale, p3 = power.
-       Density(x) proportional to
-        ((x-p1)/p2)^(p3-1) * exp(-((x-p1)/p2)^p3) for x > p1. */
-
-#define NIFTI_INTENT_WEIBULL    18
-
-   /*! [C1, chap 18] Chi distribution (1 param): p1 = DOF.
-       Density(x) proportional to x^(p1-1) * exp(-x^2/2) for x > 0.
-        p1 = 1 = 'half normal' distribution
-        p1 = 2 = Rayleigh distribution
-        p1 = 3 = Maxwell-Boltzmann distribution.                  */
-
-#define NIFTI_INTENT_CHI        19
-
-   /*! [C1, chap 15] Inverse Gaussian (2 params):
-        p1 = mu, p2 = lambda
-       Density(x) proportional to
-        exp(-p2*(x-p1)^2/(2*p1^2*x)) / x^3  for x > 0. */
-
-#define NIFTI_INTENT_INVGAUSS   20
-
-   /*! [C2, chap 22] Extreme value type I (2 params):
-        p1 = location, p2 = scale
-       cdf(x) = exp(-exp(-(x-p1)/p2)). */
-
-#define NIFTI_INTENT_EXTVAL     21
-
-   /*! Data is a 'p-value' (no params). */
-
-#define NIFTI_INTENT_PVAL       22
-
-   /*! Data is ln(p-value) (no params).
-       To be safe, a program should compute p = exp(-abs(this_value)).
-       The nifti_stats.c library returns this_value
-       as positive, so that this_value = -log(p). */
-
-
-#define NIFTI_INTENT_LOGPVAL    23
-
-   /*! Data is log10(p-value) (no params).
-       To be safe, a program should compute p = pow(10.,-abs(this_value)).
-       The nifti_stats.c library returns this_value
-       as positive, so that this_value = -log10(p). */
-
-#define NIFTI_INTENT_LOG10PVAL  24
-
-   /*! Smallest intent_code that indicates a statistic. */
-
-#define NIFTI_FIRST_STATCODE     2
-
-   /*! Largest intent_code that indicates a statistic. */
-
-#define NIFTI_LAST_STATCODE     24
-
-   /*---------- these values for intent_code aren't for statistics ----------*/
-
-   /*! To signify that the value at each voxel is an estimate
-       of some parameter, set intent_code = NIFTI_INTENT_ESTIMATE.
-       The name of the parameter may be stored in intent_name.     */
-
-#define NIFTI_INTENT_ESTIMATE  1001
-
-   /*! To signify that the value at each voxel is an index into
-       some set of labels, set intent_code = NIFTI_INTENT_LABEL.
-       The filename with the labels may stored in aux_file.        */
-
-#define NIFTI_INTENT_LABEL     1002
-
-   /*! To signify that the value at each voxel is an index into the
-       NeuroNames labels set, set intent_code = NIFTI_INTENT_NEURONAME. */
-
-#define NIFTI_INTENT_NEURONAME 1003
-
-   /*! To store an M x N matrix at each voxel:
-         - dataset must have a 5th dimension (dim[0]=5 and dim[5]>1)
-         - intent_code must be NIFTI_INTENT_GENMATRIX
-         - dim[5] must be M*N
-         - intent_p1 must be M (in float format)
-         - intent_p2 must be N (ditto)
-         - the matrix values A[i][[j] are stored in row-order:
-           - A[0][0] A[0][1] ... A[0][N-1]
-           - A[1][0] A[1][1] ... A[1][N-1]
-           - etc., until
-           - A[M-1][0] A[M-1][1] ... A[M-1][N-1]        */
-
-#define NIFTI_INTENT_GENMATRIX 1004
-
-   /*! To store an NxN symmetric matrix at each voxel:
-         - dataset must have a 5th dimension
-         - intent_code must be NIFTI_INTENT_SYMMATRIX
-         - dim[5] must be N*(N+1)/2
-         - intent_p1 must be N (in float format)
-         - the matrix values A[i][[j] are stored in row-order:
-           - A[0][0]
-           - A[1][0] A[1][1]
-           - A[2][0] A[2][1] A[2][2]
-           - etc.: row-by-row                           */
-
-#define NIFTI_INTENT_SYMMATRIX 1005
-
-   /*! To signify that the vector value at each voxel is to be taken
-       as a displacement field or vector:
-         - dataset must have a 5th dimension
-         - intent_code must be NIFTI_INTENT_DISPVECT
-         - dim[5] must be the dimensionality of the displacment
-           vector (e.g., 3 for spatial displacement, 2 for in-plane) */
-
-#define NIFTI_INTENT_DISPVECT  1006   /* specifically for displacements */
-#define NIFTI_INTENT_VECTOR    1007   /* for any other type of vector */
-
-   /*! To signify that the vector value at each voxel is really a
-       spatial coordinate (e.g., the vertices or nodes of a surface mesh):
-         - dataset must have a 5th dimension
-         - intent_code must be NIFTI_INTENT_POINTSET
-         - dim[0] = 5
-         - dim[1] = number of points
-         - dim[2] = dim[3] = dim[4] = 1
-         - dim[5] must be the dimensionality of space (e.g., 3 => 3D space).
-         - intent_name may describe the object these points come from
-           (e.g., "pial", "gray/white" , "EEG", "MEG").                   */
-
-#define NIFTI_INTENT_POINTSET  1008
-
-   /*! To signify that the vector value at each voxel is really a triple
-       of indexes (e.g., forming a triangle) from a pointset dataset:
-         - dataset must have a 5th dimension
-         - intent_code must be NIFTI_INTENT_TRIANGLE
-         - dim[0] = 5
-         - dim[1] = number of triangles
-         - dim[2] = dim[3] = dim[4] = 1
-         - dim[5] = 3
-         - datatype should be an integer type (preferably DT_INT32)
-         - the data values are indexes (0,1,...) into a pointset dataset. */
-
-#define NIFTI_INTENT_TRIANGLE  1009
-
-   /*! To signify that the vector value at each voxel is a quaternion:
-         - dataset must have a 5th dimension
-         - intent_code must be NIFTI_INTENT_QUATERNION
-         - dim[0] = 5
-         - dim[5] = 4
-         - datatype should be a floating point type     */
-
-#define NIFTI_INTENT_QUATERNION 1010
-
-   /*! Dimensionless value - no params - although, as in _ESTIMATE
-       the name of the parameter may be stored in intent_name.     */
-
-#define NIFTI_INTENT_DIMLESS    1011
-
-   /*---------- these values apply to GIFTI datasets ----------*/
-
-   /*! To signify that the value at each location is from a time series. */
-
-#define NIFTI_INTENT_TIME_SERIES  2001
-
-   /*! To signify that the value at each location is a node index, from
-       a complete surface dataset.                                       */
-
-#define NIFTI_INTENT_NODE_INDEX   2002
-
-   /*! To signify that the vector value at each location is an RGB triplet,
-       of whatever type.
-         - dataset must have a 5th dimension
-         - dim[0] = 5
-         - dim[1] = number of nodes
-         - dim[2] = dim[3] = dim[4] = 1
-         - dim[5] = 3
-      */
-
-#define NIFTI_INTENT_RGB_VECTOR   2003
-
-   /*! To signify that the vector value at each location is a 4 valued RGBA
-       vector, of whatever type.
-         - dataset must have a 5th dimension
-         - dim[0] = 5
-         - dim[1] = number of nodes
-         - dim[2] = dim[3] = dim[4] = 1
-         - dim[5] = 4
-      */
-
-#define NIFTI_INTENT_RGBA_VECTOR  2004
-
-   /*! To signify that the value at each location is a shape value, such
-       as the curvature.  */
-
-#define NIFTI_INTENT_SHAPE        2005
-
-   /* @} */
-
-   /*---------------------------------------------------------------------------*/
-   /* 3D IMAGE (VOLUME) ORIENTATION AND LOCATION IN SPACE:
-      ---------------------------------------------------
-      There are 3 different methods by which continuous coordinates can
-      attached to voxels.  The discussion below emphasizes 3D volumes, and
-      the continuous coordinates are referred to as (x,y,z).  The voxel
-      index coordinates (i.e., the array indexes) are referred to as (i,j,k),
-      with valid ranges:
-        i = 0 .. dim[1]-1
-        j = 0 .. dim[2]-1  (if dim[0] >= 2)
-        k = 0 .. dim[3]-1  (if dim[0] >= 3)
-      The (x,y,z) coordinates refer to the CENTER of a voxel.  In methods
-      2 and 3, the (x,y,z) axes refer to a subject-based coordinate system,
-      with
-        +x = Right  +y = Anterior  +z = Superior.
-      This is a right-handed coordinate system.  However, the exact direction
-      these axes point with respect to the subject depends on qform_code
-      (Method 2) and sform_code (Method 3).
-
-      N.B.: The i index varies most rapidly, j index next, k index slowest.
-       Thus, voxel (i,j,k) is stored starting at location
-         (i + j*dim[1] + k*dim[1]*dim[2]) * (bitpix/8)
-       into the dataset array.
-
-      N.B.: The ANALYZE 7.5 coordinate system is
-         +x = Left  +y = Anterior  +z = Superior
-       which is a left-handed coordinate system.  This backwardness is
-       too difficult to tolerate, so this NIFTI-1 standard specifies the
-       coordinate order which is most common in functional neuroimaging.
-
-      N.B.: The 3 methods below all give the locations of the voxel centers
-       in the (x,y,z) coordinate system.  In many cases, programs will wish
-       to display image data on some other grid.  In such a case, the program
-       will need to convert its desired (x,y,z) values into (i,j,k) values
-       in order to extract (or interpolate) the image data.  This operation
-       would be done with the inverse transformation to those described below.
-
-      N.B.: Method 2 uses a factor 'qfac' which is either -1 or 1; qfac is
-       stored in the otherwise unused pixdim[0].  If pixdim[0]=0.0 (which
-       should not occur), we take qfac=1.  Of course, pixdim[0] is only used
-       when reading a NIFTI-1 header, not when reading an ANALYZE 7.5 header.
-
-      N.B.: The units of (x,y,z) can be specified using the xyzt_units field.
-
-      METHOD 1 (the "old" way, used only when qform_code = 0):
-      -------------------------------------------------------
-      The coordinate mapping from (i,j,k) to (x,y,z) is the ANALYZE
-      7.5 way.  This is a simple scaling relationship:
-
-        x = pixdim[1] * i
-        y = pixdim[2] * j
-        z = pixdim[3] * k
-
-      No particular spatial orientation is attached to these (x,y,z)
-      coordinates.  (NIFTI-1 does not have the ANALYZE 7.5 orient field,
-      which is not general and is often not set properly.)  This method
-      is not recommended, and is present mainly for compatibility with
-      ANALYZE 7.5 files.
-
-      METHOD 2 (used when qform_code > 0, which should be the "normal" case):
-      ---------------------------------------------------------------------
-      The (x,y,z) coordinates are given by the pixdim[] scales, a rotation
-      matrix, and a shift.  This method is intended to represent
-      "scanner-anatomical" coordinates, which are often embedded in the
-      image header (e.g., DICOM fields (0020,0032), (0020,0037), (0028,0030),
-      and (0018,0050)), and represent the nominal orientation and location of
-      the data.  This method can also be used to represent "aligned"
-      coordinates, which would typically result from some post-acquisition
-      alignment of the volume to a standard orientation (e.g., the same
-      subject on another day, or a rigid rotation to true anatomical
-      orientation from the tilted position of the subject in the scanner).
-      The formula for (x,y,z) in terms of header parameters and (i,j,k) is:
-
-        [ x ]   [ R11 R12 R13 ] [        pixdim[1] * i ]   [ qoffset_x ]
-        [ y ] = [ R21 R22 R23 ] [        pixdim[2] * j ] + [ qoffset_y ]
-        [ z ]   [ R31 R32 R33 ] [ qfac * pixdim[3] * k ]   [ qoffset_z ]
-
-      The qoffset_* shifts are in the NIFTI-1 header.  Note that the center
-      of the (i,j,k)=(0,0,0) voxel (first value in the dataset array) is
-      just (x,y,z)=(qoffset_x,qoffset_y,qoffset_z).
-
-      The rotation matrix R is calculated from the quatern_* parameters.
-      This calculation is described below.
-
-      The scaling factor qfac is either 1 or -1.  The rotation matrix R
-      defined by the quaternion parameters is "proper" (has determinant 1).
-      This may not fit the needs of the data; for example, if the image
-      grid is
-        i increases from Left-to-Right
-        j increases from Anterior-to-Posterior
-        k increases from Inferior-to-Superior
-      Then (i,j,k) is a left-handed triple.  In this example, if qfac=1,
-      the R matrix would have to be
-
-        [  1   0   0 ]
-        [  0  -1   0 ]  which is "improper" (determinant = -1).
-        [  0   0   1 ]
-
-      If we set qfac=-1, then the R matrix would be
-
-        [  1   0   0 ]
-        [  0  -1   0 ]  which is proper.
-        [  0   0  -1 ]
-
-      This R matrix is represented by quaternion [a,b,c,d] = [0,1,0,0]
-      (which encodes a 180 degree rotation about the x-axis).
-
-      METHOD 3 (used when sform_code > 0):
-      -----------------------------------
-      The (x,y,z) coordinates are given by a general affine transformation
-      of the (i,j,k) indexes:
-
-        x = srow_x[0] * i + srow_x[1] * j + srow_x[2] * k + srow_x[3]
-        y = srow_y[0] * i + srow_y[1] * j + srow_y[2] * k + srow_y[3]
-        z = srow_z[0] * i + srow_z[1] * j + srow_z[2] * k + srow_z[3]
-
-      The srow_* vectors are in the NIFTI_1 header.  Note that no use is
-      made of pixdim[] in this method.
-
-      WHY 3 METHODS?
-      --------------
-      Method 1 is provided only for backwards compatibility.  The intention
-      is that Method 2 (qform_code > 0) represents the nominal voxel locations
-      as reported by the scanner, or as rotated to some fiducial orientation and
-      location.  Method 3, if present (sform_code > 0), is to be used to give
-      the location of the voxels in some standard space.  The sform_code
-      indicates which standard space is present.  Both methods 2 and 3 can be
-      present, and be useful in different contexts (method 2 for displaying the
-      data on its original grid; method 3 for displaying it on a standard grid).
-
-      In this scheme, a dataset would originally be set up so that the
-      Method 2 coordinates represent what the scanner reported.  Later,
-      a registration to some standard space can be computed and inserted
-      in the header.  Image display software can use either transform,
-      depending on its purposes and needs.
-
-      In Method 2, the origin of coordinates would generally be whatever
-      the scanner origin is; for example, in MRI, (0,0,0) is the center
-      of the gradient coil.
-
-      In Method 3, the origin of coordinates would depend on the value
-      of sform_code; for example, for the Talairach coordinate system,
-      (0,0,0) corresponds to the Anterior Commissure.
-
-      QUATERNION REPRESENTATION OF ROTATION MATRIX (METHOD 2)
-      -------------------------------------------------------
-      The orientation of the (x,y,z) axes relative to the (i,j,k) axes
-      in 3D space is specified using a unit quaternion [a,b,c,d], where
-      a*a+b*b+c*c+d*d=1.  The (b,c,d) values are all that is needed, since
-      we require that a = sqrt(1.0-(b*b+c*c+d*d)) be nonnegative.  The (b,c,d)
-      values are stored in the (quatern_b,quatern_c,quatern_d) fields.
-
-      The quaternion representation is chosen for its compactness in
-      representing rotations. The (proper) 3x3 rotation matrix that
-      corresponds to [a,b,c,d] is
-
-            [ a*a+b*b-c*c-d*d   2*b*c-2*a*d       2*b*d+2*a*c     ]
-        R = [ 2*b*c+2*a*d       a*a+c*c-b*b-d*d   2*c*d-2*a*b     ]
-            [ 2*b*d-2*a*c       2*c*d+2*a*b       a*a+d*d-c*c-b*b ]
-
-            [ R11               R12               R13             ]
-          = [ R21               R22               R23             ]
-            [ R31               R32               R33             ]
-
-      If (p,q,r) is a unit 3-vector, then rotation of angle h about that
-      direction is represented by the quaternion
-
-        [a,b,c,d] = [cos(h/2), p*sin(h/2), q*sin(h/2), r*sin(h/2)].
-
-      Requiring a >= 0 is equivalent to requiring -Pi <= h <= Pi.  (Note that
-      [-a,-b,-c,-d] represents the same rotation as [a,b,c,d]; there are 2
-      quaternions that can be used to represent a given rotation matrix R.)
-      To rotate a 3-vector (x,y,z) using quaternions, we compute the
-      quaternion product
-
-        [0,x',y',z'] = [a,b,c,d] * [0,x,y,z] * [a,-b,-c,-d]
-
-      which is equivalent to the matrix-vector multiply
-
-        [ x' ]     [ x ]
-        [ y' ] = R [ y ]   (equivalence depends on a*a+b*b+c*c+d*d=1)
-        [ z' ]     [ z ]
-
-      Multiplication of 2 quaternions is defined by the following:
-
-        [a,b,c,d] = a*1 + b*I + c*J + d*K
-        where
-          I*I = J*J = K*K = -1 (I,J,K are square roots of -1)
-          I*J =  K    J*K =  I    K*I =  J
-          J*I = -K    K*J = -I    I*K = -J  (not commutative!)
-        For example
-          [a,b,0,0] * [0,0,0,1] = [0,0,-b,a]
-        since this expands to
-          (a+b*I)*(K) = (a*K+b*I*K) = (a*K-b*J).
-
-      The above formula shows how to go from quaternion (b,c,d) to
-      rotation matrix and direction cosines.  Conversely, given R,
-      we can compute the fields for the NIFTI-1 header by
-
-        a = 0.5  * sqrt(1+R11+R22+R33)    (not stored)
-        b = 0.25 * (R32-R23) / a       => quatern_b
-        c = 0.25 * (R13-R31) / a       => quatern_c
-        d = 0.25 * (R21-R12) / a       => quatern_d
-
-      If a=0 (a 180 degree rotation), alternative formulas are needed.
-      See the nifti1_io.c function mat44_to_quatern() for an implementation
-      of the various cases in converting R to [a,b,c,d].
-
-      Note that R-transpose (= R-inverse) would lead to the quaternion
-      [a,-b,-c,-d].
-
-      The choice to specify the qoffset_x (etc.) values in the final
-      coordinate system is partly to make it easy to convert DICOM images to
-      this format.  The DICOM attribute "Image Position (Patient)" (0020,0032)
-      stores the (Xd,Yd,Zd) coordinates of the center of the first voxel.
-      Here, (Xd,Yd,Zd) refer to DICOM coordinates, and Xd=-x, Yd=-y, Zd=z,
-      where (x,y,z) refers to the NIFTI coordinate system discussed above.
-      (i.e., DICOM +Xd is Left, +Yd is Posterior, +Zd is Superior,
-           whereas +x is Right, +y is Anterior  , +z is Superior. )
-      Thus, if the (0020,0032) DICOM attribute is extracted into (px,py,pz), then
-        qoffset_x = -px   qoffset_y = -py   qoffset_z = pz
-      is a reasonable setting when qform_code=NIFTI_XFORM_SCANNER_ANAT.
-
-      That is, DICOM's coordinate system is 180 degrees rotated about the z-axis
-      from the neuroscience/NIFTI coordinate system.  To transform between DICOM
-      and NIFTI, you just have to negate the x- and y-coordinates.
-
-      The DICOM attribute (0020,0037) "Image Orientation (Patient)" gives the
-      orientation of the x- and y-axes of the image data in terms of 2 3-vectors.
-      The first vector is a unit vector along the x-axis, and the second is
-      along the y-axis.  If the (0020,0037) attribute is extracted into the
-      value (xa,xb,xc,ya,yb,yc), then the first two columns of the R matrix
-      would be
-                 [ -xa  -ya ]
-                 [ -xb  -yb ]
-                 [  xc   yc ]
-      The negations are because DICOM's x- and y-axes are reversed relative
-      to NIFTI's.  The third column of the R matrix gives the direction of
-      displacement (relative to the subject) along the slice-wise direction.
-      This orientation is not encoded in the DICOM standard in a simple way;
-      DICOM is mostly concerned with 2D images.  The third column of R will be
-      either the cross-product of the first 2 columns or its negative.  It is
-      possible to infer the sign of the 3rd column by examining the coordinates
-      in DICOM attribute (0020,0032) "Image Position (Patient)" for successive
-      slices.  However, this method occasionally fails for reasons that I
-      (RW Cox) do not understand.
-   -----------------------------------------------------------------------------*/
-
-   /* [qs]form_code value:  */      /* x,y,z coordinate system refers to:    */
-   /*-----------------------*/      /*---------------------------------------*/
-
-   /*! \defgroup NIFTI1_XFORM_CODES
-       \brief nifti1 xform codes to describe the "standard" coordinate system
-       @{
-    */
-   /*! Arbitrary coordinates (Method 1). */
-
-#define NIFTI_XFORM_UNKNOWN      0
-
-   /*! Scanner-based anatomical coordinates */
-
-#define NIFTI_XFORM_SCANNER_ANAT 1
-
-   /*! Coordinates aligned to another file's,
-       or to anatomical "truth".            */
-
-#define NIFTI_XFORM_ALIGNED_ANAT 2
-
-   /*! Coordinates aligned to Talairach-
-       Tournoux Atlas; (0,0,0)=AC, etc. */
-
-#define NIFTI_XFORM_TALAIRACH    3
-
-   /*! MNI 152 normalized coordinates. */
-
-#define NIFTI_XFORM_MNI_152      4
-   /* @} */
-
-   /*---------------------------------------------------------------------------*/
-   /* UNITS OF SPATIAL AND TEMPORAL DIMENSIONS:
-      ----------------------------------------
-      The codes below can be used in xyzt_units to indicate the units of pixdim.
-      As noted earlier, dimensions 1,2,3 are for x,y,z; dimension 4 is for
-      time (t).
-       - If dim[4]=1 or dim[0] < 4, there is no time axis.
-       - A single time series (no space) would be specified with
-         - dim[0] = 4 (for scalar data) or dim[0] = 5 (for vector data)
-         - dim[1] = dim[2] = dim[3] = 1
-         - dim[4] = number of time points
-         - pixdim[4] = time step
-         - xyzt_units indicates units of pixdim[4]
-         - dim[5] = number of values stored at each time point
-
-      Bits 0..2 of xyzt_units specify the units of pixdim[1..3]
-       (e.g., spatial units are values 1..7).
-      Bits 3..5 of xyzt_units specify the units of pixdim[4]
-       (e.g., temporal units are multiples of 8).
-
-      This compression of 2 distinct concepts into 1 byte is due to the
-      limited space available in the 348 byte ANALYZE 7.5 header.  The
-      macros XYZT_TO_SPACE and XYZT_TO_TIME can be used to mask off the
-      undesired bits from the xyzt_units fields, leaving "pure" space
-      and time codes.  Inversely, the macro SPACE_TIME_TO_XYZT can be
-      used to assemble a space code (0,1,2,...,7) with a time code
-      (0,8,16,32,...,56) into the combined value for xyzt_units.
-
-      Note that codes are provided to indicate the "time" axis units are
-      actually frequency in Hertz (_HZ), in part-per-million (_PPM)
-      or in radians-per-second (_RADS).
-
-      The toffset field can be used to indicate a nonzero start point for
-      the time axis.  That is, time point #m is at t=toffset+m*pixdim[4]
-      for m=0..dim[4]-1.
-   -----------------------------------------------------------------------------*/
-
-   /*! \defgroup NIFTI1_UNITS
-       \brief nifti1 units codes to describe the unit of measurement for
-              each dimension of the dataset
-       @{
-    */
-   /*! NIFTI code for unspecified units. */
-#define NIFTI_UNITS_UNKNOWN 0
-
-   /** Space codes are multiples of 1. **/
-   /*! NIFTI code for meters. */
-#define NIFTI_UNITS_METER   1
-   /*! NIFTI code for millimeters. */
-#define NIFTI_UNITS_MM      2
-   /*! NIFTI code for micrometers. */
-#define NIFTI_UNITS_MICRON  3
-
-   /** Time codes are multiples of 8. **/
-   /*! NIFTI code for seconds. */
-#define NIFTI_UNITS_SEC     8
-   /*! NIFTI code for milliseconds. */
-#define NIFTI_UNITS_MSEC   16
-   /*! NIFTI code for microseconds. */
-#define NIFTI_UNITS_USEC   24
-
-   /*** These units are for spectral data: ***/
-   /*! NIFTI code for Hertz. */
-#define NIFTI_UNITS_HZ     32
-   /*! NIFTI code for ppm. */
-#define NIFTI_UNITS_PPM    40
-   /*! NIFTI code for radians per second. */
-#define NIFTI_UNITS_RADS   48
-   /* @} */
-
-#undef  XYZT_TO_SPACE
-#undef  XYZT_TO_TIME
-#define XYZT_TO_SPACE(xyzt)       ( (xyzt) & 0x07 )
-#define XYZT_TO_TIME(xyzt)        ( (xyzt) & 0x38 )
-
-#undef  SPACE_TIME_TO_XYZT
-#define SPACE_TIME_TO_XYZT(ss,tt) (  (((char)(ss)) & 0x07)   \
-                                   | (((char)(tt)) & 0x38) )
-
-   /*---------------------------------------------------------------------------*/
-   /* MRI-SPECIFIC SPATIAL AND TEMPORAL INFORMATION:
-      ---------------------------------------------
-      A few fields are provided to store some extra information
-      that is sometimes important when storing the image data
-      from an FMRI time series experiment.  (After processing such
-      data into statistical images, these fields are not likely
-      to be useful.)
-
-     { freq_dim  } = These fields encode which spatial dimension (1,2, or 3)
-     { phase_dim } = corresponds to which acquisition dimension for MRI data.
-     { slice_dim } =
-       Examples:
-         Rectangular scan multi-slice EPI:
-           freq_dim = 1  phase_dim = 2  slice_dim = 3  (or some permutation)
-         Spiral scan multi-slice EPI:
-           freq_dim = phase_dim = 0  slice_dim = 3
-           since the concepts of frequency- and phase-encoding directions
-           don't apply to spiral scan
-
-       slice_duration = If this is positive, AND if slice_dim is nonzero,
-                        indicates the amount of time used to acquire 1 slice.
-                        slice_duration*dim[slice_dim] can be less than pixdim[4]
-                        with a clustered acquisition method, for example.
-
-       slice_code = If this is nonzero, AND if slice_dim is nonzero, AND
-                    if slice_duration is positive, indicates the timing
-                    pattern of the slice acquisition.  The following codes
-                    are defined:
-                      NIFTI_SLICE_SEQ_INC  == sequential increasing
-                      NIFTI_SLICE_SEQ_DEC  == sequential decreasing
-                      NIFTI_SLICE_ALT_INC  == alternating increasing
-                      NIFTI_SLICE_ALT_DEC  == alternating decreasing
-                      NIFTI_SLICE_ALT_INC2 == alternating increasing #2
-                      NIFTI_SLICE_ALT_DEC2 == alternating decreasing #2
-     { slice_start } = Indicates the start and end of the slice acquisition
-     { slice_end   } = pattern, when slice_code is nonzero.  These values
-                       are present to allow for the possible addition of
-                       "padded" slices at either end of the volume, which
-                       don't fit into the slice timing pattern.  If there
-                       are no padding slices, then slice_start=0 and
-                       slice_end=dim[slice_dim]-1 are the correct values.
-                       For these values to be meaningful, slice_start must
-                       be non-negative and slice_end must be greater than
-                       slice_start.  Otherwise, they should be ignored.
-
-     The following table indicates the slice timing pattern, relative to
-     time=0 for the first slice acquired, for some sample cases.  Here,
-     dim[slice_dim]=7 (there are 7 slices, labeled 0..6), slice_duration=0.1,
-     and slice_start=1, slice_end=5 (1 padded slice on each end).
-
-     slice
-     index  SEQ_INC SEQ_DEC ALT_INC ALT_DEC ALT_INC2 ALT_DEC2
-       6  :   n/a     n/a     n/a     n/a    n/a      n/a    n/a = not applicable
-       5  :   0.4     0.0     0.2     0.0    0.4      0.2    (slice time offset
-       4  :   0.3     0.1     0.4     0.3    0.1      0.0     doesn't apply to
-       3  :   0.2     0.2     0.1     0.1    0.3      0.3     slices outside
-       2  :   0.1     0.3     0.3     0.4    0.0      0.1     the range
-       1  :   0.0     0.4     0.0     0.2    0.2      0.4     slice_start ..
-       0  :   n/a     n/a     n/a     n/a    n/a      n/a     slice_end)
-
-     The SEQ slice_codes are sequential ordering (uncommon but not unknown),
-     either increasing in slice number or decreasing (INC or DEC), as
-     illustrated above.
-
-     The ALT slice codes are alternating ordering.  The 'standard' way for
-     these to operate (without the '2' on the end) is for the slice timing
-     to start at the edge of the slice_start .. slice_end group (at slice_start
-     for INC and at slice_end for DEC).  For the 'ALT_*2' slice_codes, the
-     slice timing instead starts at the first slice in from the edge (at
-     slice_start+1 for INC2 and at slice_end-1 for DEC2).  This latter
-     acquisition scheme is found on some Siemens scanners.
-
-     The fields freq_dim, phase_dim, slice_dim are all squished into the single
-     byte field dim_info (2 bits each, since the values for each field are
-     limited to the range 0..3).  This unpleasantness is due to lack of space
-     in the 348 byte allowance.
-
-     The macros DIM_INFO_TO_FREQ_DIM, DIM_INFO_TO_PHASE_DIM, and
-     DIM_INFO_TO_SLICE_DIM can be used to extract these values from the
-     dim_info byte.
-
-     The macro FPS_INTO_DIM_INFO can be used to put these 3 values
-     into the dim_info byte.
-   -----------------------------------------------------------------------------*/
-
-#undef  DIM_INFO_TO_FREQ_DIM
-#undef  DIM_INFO_TO_PHASE_DIM
-#undef  DIM_INFO_TO_SLICE_DIM
-
-#define DIM_INFO_TO_FREQ_DIM(di)   ( ((di)     ) & 0x03 )
-#define DIM_INFO_TO_PHASE_DIM(di)  ( ((di) >> 2) & 0x03 )
-#define DIM_INFO_TO_SLICE_DIM(di)  ( ((di) >> 4) & 0x03 )
-
-#undef  FPS_INTO_DIM_INFO
-#define FPS_INTO_DIM_INFO(fd,pd,sd) ( ( ( ((char)(fd)) & 0x03)      ) |  \
-                                      ( ( ((char)(pd)) & 0x03) << 2 ) |  \
-                                      ( ( ((char)(sd)) & 0x03) << 4 )  )
-
-   /*! \defgroup NIFTI1_SLICE_ORDER
-       \brief nifti1 slice order codes, describing the acquisition order
-              of the slices
-       @{
-    */
-#define NIFTI_SLICE_UNKNOWN   0
-#define NIFTI_SLICE_SEQ_INC   1
-#define NIFTI_SLICE_SEQ_DEC   2
-#define NIFTI_SLICE_ALT_INC   3
-#define NIFTI_SLICE_ALT_DEC   4
-#define NIFTI_SLICE_ALT_INC2  5  /* 05 May 2005: RWCox */
-#define NIFTI_SLICE_ALT_DEC2  6  /* 05 May 2005: RWCox */
-   /* @} */
-
-   /*---------------------------------------------------------------------------*/
-   /* UNUSED FIELDS:
-      -------------
-      Some of the ANALYZE 7.5 fields marked as ++UNUSED++ may need to be set
-      to particular values for compatibility with other programs.  The issue
-      of interoperability of ANALYZE 7.5 files is a murky one -- not all
-      programs require exactly the same set of fields.  (Unobscuring this
-      murkiness is a principal motivation behind NIFTI-1.)
-
-      Some of the fields that may need to be set for other (non-NIFTI aware)
-      software to be happy are:
-
-        extents    dbh.h says this should be 16384
-        regular    dbh.h says this should be the character 'r'
-        glmin,   } dbh.h says these values should be the min and max voxel
-         glmax   }  values for the entire dataset
-
-      It is best to initialize ALL fields in the NIFTI-1 header to 0
-      (e.g., with calloc()), then fill in what is needed.
-   -----------------------------------------------------------------------------*/
-
-   /*---------------------------------------------------------------------------*/
-   /* MISCELLANEOUS C MACROS
-   -----------------------------------------------------------------------------*/
-
-   /*.................*/
-   /*! Given a nifti_1_header struct, check if it has a good magic number.
-       Returns NIFTI version number (1..9) if magic is good, 0 if it is not. */
-
-#define NIFTI_VERSION(h)                               \
- ( ( (h).magic[0]=='n' && (h).magic[3]=='\0'    &&     \
-     ( (h).magic[1]=='i' || (h).magic[1]=='+' ) &&     \
-     ( (h).magic[2]>='1' && (h).magic[2]<='9' )   )    \
- ? (h).magic[2]-'0' : 0 )
-
-   /*.................*/
-   /*! Check if a nifti_1_header struct says if the data is stored in the
-       same file or in a separate file.  Returns 1 if the data is in the same
-       file as the header, 0 if it is not.                                   */
-
-#define NIFTI_ONEFILE(h) ( (h).magic[1] == '+' )
-
-   /*.................*/
-   /*! Check if a nifti_1_header struct needs to be byte swapped.
-       Returns 1 if it needs to be swapped, 0 if it does not.     */
-
-#define NIFTI_NEEDS_SWAP(h) ( (h).dim[0] < 0 || (h).dim[0] > 7 )
-
-   /*.................*/
-   /*! Check if a nifti_1_header struct contains a 5th (vector) dimension.
-       Returns size of 5th dimension if > 1, returns 0 otherwise.         */
-
-#define NIFTI_5TH_DIM(h) ( ((h).dim[0]>4 && (h).dim[5]>1) ? (h).dim[5] : 0 )
-
-   /*****************************************************************************/
-
-   /*=================*/
-#ifdef  __cplusplus
-}
-#endif
-/*=================*/
diff --git a/reg-io/nifti/nifti1_io.h b/reg-io/nifti/nifti1_io.h
deleted file mode 100755
index df0f9b1e..00000000
--- a/reg-io/nifti/nifti1_io.h
+++ /dev/null
@@ -1,549 +0,0 @@
-/** \file nifti1_io.h
-    \brief Data structures for using nifti1_io API.
-           - Written by Bob Cox, SSCC NIMH
-           - Revisions by Rick Reynolds, SSCC NIMH
- */
-
-#pragma once
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <ctype.h>
-
-#ifndef DONT_INCLUDE_ANALYZE_STRUCT
-#define DONT_INCLUDE_ANALYZE_STRUCT  /*** not needed herein ***/
-#endif
-#include "nifti1.h"                  /*** NIFTI-1 header specification ***/
-
-#include <znzlib.h>
-
-/*=================*/
-#ifdef  __cplusplus
-extern "C" {
-#endif
-   /*=================*/
-
-   /*****===================================================================*****/
-   /*****         File nifti1_io.h == Declarations for nifti1_io.c          *****/
-   /*****...................................................................*****/
-   /*****            This code is released to the public domain.            *****/
-   /*****...................................................................*****/
-   /*****  Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH          *****/
-   /*****  Date:   August 2003                                              *****/
-   /*****...................................................................*****/
-   /*****  Neither the National Institutes of Health (NIH), nor any of its  *****/
-   /*****  employees imply any warranty of usefulness of this software for  *****/
-   /*****  any purpose, and do not assume any liability for damages,        *****/
-   /*****  incidental or otherwise, caused by any use of this document.     *****/
-   /*****===================================================================*****/
-
-   /*
-      Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK)
-      Date: July/August 2004
-
-         Mainly adding low-level IO and changing things to allow gzipped files
-         to be read and written
-         Full backwards compatability should have been maintained
-
-      Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
-      Date: December 2004
-
-         Modified and added many routines for I/O.
-   */
-
-   /********************** Some sample data structures **************************/
-
-   typedef struct                     /** 4x4 matrix struct **/
-   {
-      float m[4][4] ;
-   } mat44 ;
-
-   typedef struct                     /** 3x3 matrix struct **/
-   {
-      float m[3][3] ;
-   } mat33 ;
-
-   /*...........................................................................*/
-
-   /*! \enum analyze_75_orient_code
-    *  \brief Old-style analyze75 orientation
-    *         codes.
-    */
-   typedef enum _analyze75_orient_code
-   {
-      a75_transverse_unflipped = 0,
-      a75_coronal_unflipped = 1,
-      a75_sagittal_unflipped = 2,
-      a75_transverse_flipped = 3,
-      a75_coronal_flipped = 4,
-      a75_sagittal_flipped = 5,
-      a75_orient_unknown = 6
-   } analyze_75_orient_code;
-
-   /*! \struct nifti_image
-       \brief High level data structure for open nifti datasets in the
-              nifti1_io API.  Note that this structure is not part of the
-              nifti1 format definition; it is used to implement one API
-              for reading/writing formats in the nifti1 format.
-    */
-   typedef struct                  /*!< Image storage struct **/
-   {
-
-      int ndim ;                    /*!< last dimension greater than 1 (1..7) */
-      int nx ;                      /*!< dimensions of grid array             */
-      int ny ;                      /*!< dimensions of grid array             */
-      int nz ;                      /*!< dimensions of grid array             */
-      int nt ;                      /*!< dimensions of grid array             */
-      int nu ;                      /*!< dimensions of grid array             */
-      int nv ;                      /*!< dimensions of grid array             */
-      int nw ;                      /*!< dimensions of grid array             */
-      int dim[8] ;                  /*!< dim[0]=ndim, dim[1]=nx, etc.         */
-      size_t nvox ;                    /*!< number of voxels = nx*ny*nz*...*nw   */
-      int nbyper ;                  /*!< bytes per voxel, matches datatype    */
-      int datatype ;                /*!< type of data in voxels: DT_* code    */
-
-      float dx ;                    /*!< grid spacings      */
-      float dy ;                    /*!< grid spacings      */
-      float dz ;                    /*!< grid spacings      */
-      float dt ;                    /*!< grid spacings      */
-      float du ;                    /*!< grid spacings      */
-      float dv ;                    /*!< grid spacings      */
-      float dw ;                    /*!< grid spacings      */
-      float pixdim[8] ;             /*!< pixdim[1]=dx, etc. */
-
-      float scl_slope ;             /*!< scaling parameter - slope        */
-      float scl_inter ;             /*!< scaling parameter - intercept    */
-
-      float cal_min ;               /*!< calibration parameter, minimum   */
-      float cal_max ;               /*!< calibration parameter, maximum   */
-
-      int qform_code ;              /*!< codes for (x,y,z) space meaning  */
-      int sform_code ;              /*!< codes for (x,y,z) space meaning  */
-
-      int freq_dim  ;               /*!< indexes (1,2,3, or 0) for MRI    */
-      int phase_dim ;               /*!< directions in dim[]/pixdim[]     */
-      int slice_dim ;               /*!< directions in dim[]/pixdim[]     */
-
-      int   slice_code  ;           /*!< code for slice timing pattern    */
-      int   slice_start ;           /*!< index for start of slices        */
-      int   slice_end   ;           /*!< index for end of slices          */
-      float slice_duration ;        /*!< time between individual slices   */
-
-      /*! quaternion transform parameters
-        [when writing a dataset, these are used for qform, NOT qto_xyz]   */
-      float quatern_b , quatern_c , quatern_d ,
-            qoffset_x , qoffset_y , qoffset_z ,
-            qfac      ;
-
-      mat44 qto_xyz ;               /*!< qform: transform (i,j,k) to (x,y,z) */
-      mat44 qto_ijk ;               /*!< qform: transform (x,y,z) to (i,j,k) */
-
-      mat44 sto_xyz ;               /*!< sform: transform (i,j,k) to (x,y,z) */
-      mat44 sto_ijk ;               /*!< sform: transform (x,y,z) to (i,j,k) */
-
-      float toffset ;               /*!< time coordinate offset */
-
-      int xyz_units  ;              /*!< dx,dy,dz units: NIFTI_UNITS_* code  */
-      int time_units ;              /*!< dt       units: NIFTI_UNITS_* code  */
-
-      int nifti_type ;              /*!< 0==ANALYZE, 1==NIFTI-1 (1 file),
-                                                 2==NIFTI-1 (2 files),
-                                                 3==NIFTI-ASCII (1 file) */
-      int   intent_code ;           /*!< statistic type (or something)       */
-      float intent_p1 ;             /*!< intent parameters                   */
-      float intent_p2 ;             /*!< intent parameters                   */
-      float intent_p3 ;             /*!< intent parameters                   */
-      char  intent_name[16] ;       /*!< optional description of intent data */
-
-      char descrip[80]  ;           /*!< optional text to describe dataset   */
-      char aux_file[24] ;           /*!< auxiliary filename                  */
-
-      char *fname ;                 /*!< header filename (.hdr or .nii)         */
-      char *iname ;                 /*!< image filename  (.img or .nii)         */
-      int   iname_offset ;          /*!< offset into iname where data starts    */
-      int   swapsize ;              /*!< swap unit in image data (might be 0)   */
-      int   byteorder ;             /*!< byte order on disk (MSB_ or LSB_FIRST) */
-      void *data ;                  /*!< pointer to data: nbyper*nvox bytes     */
-
-      int                num_ext ;  /*!< number of extensions in ext_list       */
-      nifti1_extension * ext_list ; /*!< array of extension structs (with data) */
-      analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */
-
-   } nifti_image ;
-
-
-
-   /* struct for return from nifti_image_read_bricks() */
-   typedef struct
-   {
-      int       nbricks;    /* the number of allocated pointers in 'bricks' */
-      size_t    bsize;      /* the length of each data block, in bytes      */
-      void   ** bricks;     /* array of pointers to data blocks             */
-   } nifti_brick_list;
-
-
-   /*****************************************************************************/
-   /*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/
-
-   /* (based on fsliolib/dbh.h, but updated for version 7.5) */
-
-   typedef struct
-   {
-      /* header info fields - describes the header    overlap with NIfTI */
-      /*                                              ------------------ */
-      int sizeof_hdr;                  /* 0 + 4        same              */
-      char data_type[10];              /* 4 + 10       same              */
-      char db_name[18];                /* 14 + 18      same              */
-      int extents;                     /* 32 + 4       same              */
-      short int session_error;         /* 36 + 2       same              */
-      char regular;                    /* 38 + 1       same              */
-      char hkey_un0;                   /* 39 + 1                40 bytes */
-
-      /* image dimension fields - describes image sizes */
-      short int dim[8];                /* 0 + 16       same              */
-      short int unused8;               /* 16 + 2       intent_p1...      */
-      short int unused9;               /* 18 + 2         ...             */
-      short int unused10;              /* 20 + 2       intent_p2...      */
-      short int unused11;              /* 22 + 2         ...             */
-      short int unused12;              /* 24 + 2       intent_p3...      */
-      short int unused13;              /* 26 + 2         ...             */
-      short int unused14;              /* 28 + 2       intent_code       */
-      short int datatype;              /* 30 + 2       same              */
-      short int bitpix;                /* 32 + 2       same              */
-      short int dim_un0;               /* 34 + 2       slice_start       */
-      float pixdim[8];                 /* 36 + 32      same              */
-
-      float vox_offset;                /* 68 + 4       same              */
-      float funused1;                  /* 72 + 4       scl_slope         */
-      float funused2;                  /* 76 + 4       scl_inter         */
-      float funused3;                  /* 80 + 4       slice_end,        */
-      /* slice_code,       */
-      /* xyzt_units        */
-      float cal_max;                   /* 84 + 4       same              */
-      float cal_min;                   /* 88 + 4       same              */
-      float compressed;                /* 92 + 4       slice_duration    */
-      float verified;                  /* 96 + 4       toffset           */
-      int glmax,glmin;                 /* 100 + 8              108 bytes */
-
-      /* data history fields - optional */
-      char descrip[80];                /* 0 + 80       same              */
-      char aux_file[24];               /* 80 + 24      same              */
-      char orient;                     /* 104 + 1      NO GOOD OVERLAP   */
-      char originator[10];             /* 105 + 10     FROM HERE DOWN... */
-      char generated[10];              /* 115 + 10                       */
-      char scannum[10];                /* 125 + 10                       */
-      char patient_id[10];             /* 135 + 10                       */
-      char exp_date[10];               /* 145 + 10                       */
-      char exp_time[10];               /* 155 + 10                       */
-      char hist_un0[3];                /* 165 + 3                        */
-      int views;                       /* 168 + 4                        */
-      int vols_added;                  /* 172 + 4                        */
-      int start_field;                 /* 176 + 4                        */
-      int field_skip;                  /* 180 + 4                        */
-      int omax, omin;                  /* 184 + 8                        */
-      int smax, smin;                  /* 192 + 8              200 bytes */
-   } nifti_analyze75;                                   /* total:  348 bytes */
-
-
-   /*****************************************************************************/
-   /*--------------- Prototypes of functions defined in this file --------------*/
-
-   char *nifti_datatype_string   ( int dt ) ;
-   char *nifti_units_string      ( int uu ) ;
-   char *nifti_intent_string     ( int ii ) ;
-   char *nifti_xform_string      ( int xx ) ;
-   char *nifti_slice_string      ( int ss ) ;
-   char *nifti_orientation_string( int ii ) ;
-
-   int   nifti_is_inttype( int dt ) ;
-
-   mat44 nifti_mat44_inverse( mat44 R ) ;
-
-   mat33 nifti_mat33_inverse( mat33 R ) ;
-   mat33 nifti_mat33_polar  ( mat33 A ) ;
-   float nifti_mat33_rownorm( mat33 A ) ;
-   float nifti_mat33_colnorm( mat33 A ) ;
-   float nifti_mat33_determ ( mat33 R ) ;
-   mat33 nifti_mat33_mul    ( mat33 A , mat33 B ) ;
-
-   void  nifti_swap_2bytes ( size_t n , void *ar ) ;
-   void  nifti_swap_4bytes ( size_t n , void *ar ) ;
-   void  nifti_swap_8bytes ( size_t n , void *ar ) ;
-   void  nifti_swap_16bytes( size_t n , void *ar ) ;
-   void  nifti_swap_Nbytes ( size_t n , int siz , void *ar ) ;
-
-   int    nifti_datatype_is_valid   (int dtype, int for_nifti);
-   int    nifti_datatype_from_string(const char * name);
-   char * nifti_datatype_to_string  (int dtype);
-
-   int   nifti_get_filesize( const char *pathname ) ;
-   void  swap_nifti_header ( struct nifti_1_header *h , int is_nifti ) ;
-   void  old_swap_nifti_header( struct nifti_1_header *h , int is_nifti );
-   int   nifti_swap_as_analyze( nifti_analyze75 *h );
-
-
-   /* main read/write routines */
-
-   nifti_image *nifti_image_read_bricks(const char *hname , int nbricks,
-                                        const int *blist, nifti_brick_list * NBL);
-   int          nifti_image_load_bricks(nifti_image *nim , int nbricks,
-                                        const int *blist, nifti_brick_list * NBL);
-   void         nifti_free_NBL( nifti_brick_list * NBL );
-
-   nifti_image *nifti_image_read    ( const char *hname , int read_data ) ;
-   int          nifti_image_load    ( nifti_image *nim ) ;
-   void         nifti_image_unload  ( nifti_image *nim ) ;
-   void         nifti_image_free    ( nifti_image *nim ) ;
-
-   int          nifti_read_collapsed_image( nifti_image * nim, const int dims [8],
-         void ** data );
-
-   int          nifti_read_subregion_image( nifti_image * nim,
-         int *start_index, int *region_size,
-         void ** data );
-
-   void         nifti_image_write   ( nifti_image * nim ) ;
-   void         nifti_image_write_bricks(nifti_image * nim,
-                                         const nifti_brick_list * NBL);
-   void         nifti_image_infodump( const nifti_image * nim ) ;
-
-   void         nifti_disp_lib_hist( void ) ;     /* to display library history */
-   void         nifti_disp_lib_version( void ) ;  /* to display library version */
-   int          nifti_disp_matrix_orient( const char * mesg, mat44 mat );
-   int          nifti_disp_type_list( int which );
-
-
-   char *       nifti_image_to_ascii  ( const nifti_image * nim ) ;
-   nifti_image *nifti_image_from_ascii( const char * str, int * bytes_read ) ;
-
-   size_t       nifti_get_volsize(const nifti_image *nim) ;
-
-   /* basic file operations */
-   int    nifti_set_filenames(nifti_image * nim, const char * prefix, int check,
-                              int set_byte_order);
-   char * nifti_makehdrname  (const char * prefix, int nifti_type, int check,
-                              int comp);
-   char * nifti_makeimgname  (const char * prefix, int nifti_type, int check,
-                              int comp);
-   int    is_nifti_file      (const char *hname);
-   char * nifti_find_file_extension(const char * name);
-   int    nifti_is_complete_filename(const char* fname);
-   int    nifti_validfilename(const char* fname);
-
-   int    disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ;
-   void   nifti_set_debug_level( int level ) ;
-   void   nifti_set_skip_blank_ext( int skip ) ;
-   void   nifti_set_allow_upper_fext( int allow ) ;
-
-   int    valid_nifti_brick_list(nifti_image * nim , int nbricks,
-                                 const int * blist, int disp_error);
-
-   /* znzFile operations */
-   znzFile nifti_image_open(const char * hname, char * opts, nifti_image ** nim);
-   znzFile nifti_image_write_hdr_img(nifti_image *nim, int write_data,
-                                     const char* opts);
-   znzFile nifti_image_write_hdr_img2( nifti_image *nim , int write_opts ,
-                                       const char* opts, znzFile imgfile, const nifti_brick_list * NBL);
-   size_t  nifti_read_buffer(znzFile fp, void* datatptr, size_t ntot,
-                             nifti_image *nim);
-   int     nifti_write_all_data(znzFile fp, nifti_image * nim,
-                                const nifti_brick_list * NBL);
-   size_t  nifti_write_buffer(znzFile fp, const void * buffer, size_t numbytes);
-   nifti_image *nifti_read_ascii_image(znzFile fp, char *fname, int flen,
-                                       int read_data);
-   znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
-                                   const char * opts, int write_data, int leave_open);
-
-
-   void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ;
-
-   void nifti_mat44_to_quatern( mat44 R ,
-                                float *qb, float *qc, float *qd,
-                                float *qx, float *qy, float *qz,
-                                float *dx, float *dy, float *dz, float *qfac ) ;
-
-   mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
-                                 float qx, float qy, float qz,
-                                 float dx, float dy, float dz, float qfac );
-
-   mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
-                                  float r21, float r22, float r23 ,
-                                  float r31, float r32, float r33  ) ;
-
-   int nifti_short_order(void) ;              /* CPU byte order */
-
-
-   /* Orientation codes that might be returned from nifti_mat44_to_orientation().*/
-
-#define NIFTI_L2R  1    /* Left to Right         */
-#define NIFTI_R2L  2    /* Right to Left         */
-#define NIFTI_P2A  3    /* Posterior to Anterior */
-#define NIFTI_A2P  4    /* Anterior to Posterior */
-#define NIFTI_I2S  5    /* Inferior to Superior  */
-#define NIFTI_S2I  6    /* Superior to Inferior  */
-
-   void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ;
-
-   /*--------------------- Low level IO routines ------------------------------*/
-
-   char * nifti_findhdrname (const char* fname);
-   char * nifti_findimgname (const char* fname , int nifti_type);
-   int    nifti_is_gzfile   (const char* fname);
-
-   char * nifti_makebasename(const char* fname);
-
-
-   /* other routines */
-   struct nifti_1_header   nifti_convert_nim2nhdr(const nifti_image *nim);
-   nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype);
-   nifti_1_header * nifti_read_header(const char *hname, int *swapped, int check);
-   nifti_image    * nifti_copy_nim_info(const nifti_image * src);
-   nifti_image    * nifti_make_new_nim(const int dims[], int datatype,
-                                       int data_fill);
-   nifti_image    * nifti_simple_init_nim(void);
-   nifti_image    * nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
-                                           const char * fname);
-
-   int    nifti_hdr_looks_good        (const nifti_1_header * hdr);
-   int    nifti_is_valid_datatype     (int dtype);
-   int    nifti_is_valid_ecode        (int ecode);
-   int    nifti_nim_is_valid          (nifti_image * nim, int complain);
-   int    nifti_nim_has_valid_dims    (nifti_image * nim, int complain);
-   int    is_valid_nifti_type         (int nifti_type);
-   int    nifti_test_datatype_sizes   (int verb);
-   int    nifti_type_and_names_match  (nifti_image * nim, int show_warn);
-   int    nifti_update_dims_from_array(nifti_image * nim);
-   void   nifti_set_iname_offset      (nifti_image *nim);
-   int    nifti_set_type_from_names   (nifti_image * nim);
-   int    nifti_add_extension(nifti_image * nim, const char * data, int len,
-                              int ecode );
-   int    nifti_compiled_with_zlib    (void);
-   int    nifti_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src);
-   int    nifti_free_extensions (nifti_image *nim);
-   int  * nifti_get_intlist     (int nvals , const char *str);
-   char * nifti_strdup          (const char *str);
-   int    valid_nifti_extensions(const nifti_image *nim);
-
-
-   /*-------------------- Some C convenience macros ----------------------------*/
-
-   /* NIfTI-1.1 extension codes:
-      see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */
-
-#define NIFTI_ECODE_IGNORE           0  /* changed from UNKNOWN, 29 June 2005 */
-
-#define NIFTI_ECODE_DICOM            2  /* intended for raw DICOM attributes  */
-
-#define NIFTI_ECODE_AFNI             4  /* Robert W Cox: rwcox@nih.gov
-http://afni.nimh.nih.gov/afni      */
-
-#define NIFTI_ECODE_COMMENT          6  /* plain ASCII text only              */
-
-#define NIFTI_ECODE_XCEDE            8  /* David B Keator: dbkeator@uci.edu
-http://www.nbirn.net/Resources
-   /Users/Applications/
-   /xcede/index.htm              */
-
-#define NIFTI_ECODE_JIMDIMINFO      10  /* Mark A Horsfield:
-   mah5@leicester.ac.uk
-http://someplace/something         */
-
-#define NIFTI_ECODE_WORKFLOW_FWDS   12  /* Kate Fissell: fissell@pitt.edu
-http://kraepelin.wpic.pitt.edu
-   /~fissell/NIFTI_ECODE_WORKFLOW_FWDS
-   /NIFTI_ECODE_WORKFLOW_FWDS.html   */
-
-#define NIFTI_ECODE_FREESURFER      14  /* http://surfer.nmr.mgh.harvard.edu  */
-
-#define NIFTI_ECODE_PYPICKLE        16  /* embedded Python objects
-http://niftilib.sourceforge.net
-   /pynifti                     */
-
-   /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */
-#define NIFTI_ECODE_MIND_IDENT      18  /* Vishal Patel: vishal.patel@ucla.edu*/
-#define NIFTI_ECODE_B_VALUE         20
-#define NIFTI_ECODE_SPHERICAL_DIRECTION 22
-#define NIFTI_ECODE_DT_COMPONENT    24
-#define NIFTI_ECODE_SHC_DEGREEORDER 26  /* end LONI MiND codes                */
-
-#define NIFTI_ECODE_VOXBO           28  /* Dan Kimberg: www.voxbo.org         */
-
-#define NIFTI_ECODE_CARET           30  /* John Harwell: john@brainvis.wustl.edu
-http://brainvis.wustl.edu/wiki
-/index.php/Caret:Documentation
-:CaretNiftiExtension             */
-
-#define NIFTI_MAX_ECODE             30  /******* maximum extension code *******/
-
-   /* nifti_type file codes */
-#define NIFTI_FTYPE_ANALYZE   0
-#define NIFTI_FTYPE_NIFTI1_1  1
-#define NIFTI_FTYPE_NIFTI1_2  2
-#define NIFTI_FTYPE_ASCII     3
-#define NIFTI_MAX_FTYPE       3    /* this should match the maximum code */
-
-   /*------------------------------------------------------------------------*/
-   /*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */
-   /*                                                    Feb 9, 2005 [rickr] */
-#ifdef _NIFTI1_IO_C_
-
-   typedef struct
-   {
-      int debug;               /*!< debug level for status reports  */
-      int skip_blank_ext;      /*!< skip extender if no extensions  */
-      int allow_upper_fext;    /*!< allow uppercase file extensions */
-   } nifti_global_options;
-
-   typedef struct
-   {
-      int    type;           /* should match the NIFTI_TYPE_ #define */
-      int    nbyper;         /* bytes per value, matches nifti_image */
-      int    swapsize;       /* bytes per swap piece, matches nifti_image */
-      char * name;           /* text string to match #define */
-   } nifti_type_ele;
-
-#undef  LNI_FERR /* local nifti file error, to be compact and repetative */
-#define LNI_FERR(func,msg,file)                                      \
-            fprintf(stderr,"** ERROR (%s): %s '%s'\n",func,msg,file)
-
-#undef  swap_2
-#undef  swap_4
-#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */
-#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */
-
-   /***** isfinite() is a C99 macro, which is
-          present in many C implementations already *****/
-
-#undef IS_GOOD_FLOAT
-#undef FIXED_FLOAT
-
-#ifdef isfinite       /* use isfinite() to check floats/doubles for goodness */
-#  define IS_GOOD_FLOAT(x) isfinite(x)       /* check if x is a "good" float */
-#  define FIXED_FLOAT(x)   (isfinite(x) ? (x) : 0)           /* fixed if bad */
-#else
-#  define IS_GOOD_FLOAT(x) 1                               /* don't check it */
-#  define FIXED_FLOAT(x)   (x)                               /* don't fix it */
-#endif
-
-#undef  ASSIF                                 /* assign v to *p, if possible */
-#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v)
-
-#undef  MSB_FIRST
-#undef  LSB_FIRST
-#undef  REVERSE_ORDER
-#define LSB_FIRST 1
-#define MSB_FIRST 2
-#define REVERSE_ORDER(x) (3-(x))    /* convert MSB_FIRST <--> LSB_FIRST */
-
-#define LNI_MAX_NIA_EXT_LEN 100000  /* consider a longer extension invalid */
-
-#endif  /* _NIFTI1_IO_C_ section */
-   /*------------------------------------------------------------------------*/
-
-   /*=================*/
-#ifdef  __cplusplus
-}
-#endif
-/*=================*/
diff --git a/reg-io/nifti/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt
old mode 100755
new mode 100644
similarity index 72%
rename from reg-io/nifti/CMakeLists.txt
rename to reg-io/niftilib/CMakeLists.txt
index 0b5d48b8..a2b1d466
--- a/reg-io/nifti/CMakeLists.txt
+++ b/reg-io/niftilib/CMakeLists.txt
@@ -6,12 +6,11 @@ if(USE_NII_NAN)
 endif(USE_NII_NAN)
 add_definitions(-DHAVE_ZLIB)
 set(NAME reg_nifti)
-add_library(${NAME} nifti1.h nifti1_io.c nifti1_io.h znzlib.c znzlib.h)
-target_link_libraries(${NAME} z)
+add_library(${NAME} nifti1_io.c)
+target_link_libraries(${NAME} z znz)
 install(TARGETS ${NAME}
         RUNTIME DESTINATION bin
         LIBRARY DESTINATION lib
         ARCHIVE DESTINATION lib
 )
-install(FILES nifti1_io.h znzlib.h nifti1.h DESTINATION include COMPONENT Development)
 #-----------------------------------------------------------------------------
diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h
new file mode 100644
index 00000000..4121dc0f
--- /dev/null
+++ b/reg-io/niftilib/nifti1.h
@@ -0,0 +1,1528 @@
+/** \file nifti1.h
+    \brief Official definition of the nifti1 header.  Written by Bob Cox, SSCC, NIMH.
+
+    HISTORY:
+
+        29 Nov 2007 [rickr]
+           - added DT_RGBA32 and NIFTI_TYPE_RGBA32
+           - added NIFTI_INTENT codes:
+                TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE
+
+        08 Mar 2019 [PT,DRG]
+           - Updated to include [qs]form_code = 5 
+
+ */
+
+#ifndef _NIFTI_HEADER_
+#define _NIFTI_HEADER_
+
+/*****************************************************************************
+      ** This file defines the "NIFTI-1" header format.               **
+      ** It is derived from 2 meetings at the NIH (31 Mar 2003 and    **
+      ** 02 Sep 2003) of the Data Format Working Group (DFWG),        **
+      ** chartered by the NIfTI (Neuroimaging Informatics Technology  **
+      ** Initiative) at the National Institutes of Health (NIH).      **
+      **--------------------------------------------------------------**
+      ** Neither the National Institutes of Health (NIH), the DFWG,   **
+      ** nor any of the members or employees of these institutions    **
+      ** imply any warranty of usefulness of this material for any    **
+      ** purpose, and do not assume any liability for damages,        **
+      ** incidental or otherwise, caused by any use of this document. **
+      ** If these conditions are not acceptable, do not use this!     **
+      **--------------------------------------------------------------**
+      ** Author:   Robert W Cox (NIMH, Bethesda)                      **
+      ** Advisors: John Ashburner (FIL, London),                      **
+      **           Stephen Smith (FMRIB, Oxford),                     **
+      **           Mark Jenkinson (FMRIB, Oxford)                     **
+******************************************************************************/
+
+/*---------------------------------------------------------------------------*/
+/* Note that the ANALYZE 7.5 file header (dbh.h) is
+         (c) Copyright 1986-1995
+         Biomedical Imaging Resource
+         Mayo Foundation
+   Incorporation of components of dbh.h are by permission of the
+   Mayo Foundation.
+
+   Changes from the ANALYZE 7.5 file header in this file are released to the
+   public domain, including the functional comments and any amusing asides.
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/*! INTRODUCTION TO NIFTI-1:
+   ------------------------
+   The twin (and somewhat conflicting) goals of this modified ANALYZE 7.5
+   format are:
+    (a) To add information to the header that will be useful for functional
+        neuroimaging data analysis and display.  These additions include:
+        - More basic data types.
+        - Two affine transformations to specify voxel coordinates.
+        - "Intent" codes and parameters to describe the meaning of the data.
+        - Affine scaling of the stored data values to their "true" values.
+        - Optional storage of the header and image data in one file (.nii).
+    (b) To maintain compatibility with non-NIFTI-aware ANALYZE 7.5 compatible
+        software (i.e., such a program should be able to do something useful
+        with a NIFTI-1 dataset -- at least, with one stored in a traditional
+        .img/.hdr file pair).
+
+   Most of the unused fields in the ANALYZE 7.5 header have been taken,
+   and some of the lesser-used fields have been co-opted for other purposes.
+   Notably, most of the data_history substructure has been co-opted for
+   other purposes, since the ANALYZE 7.5 format describes this substructure
+   as "not required".
+
+   NIFTI-1 FLAG (MAGIC STRINGS):
+   ----------------------------
+   To flag such a struct as being conformant to the NIFTI-1 spec, the last 4
+   bytes of the header must be either the C String "ni1" or "n+1";
+   in hexadecimal, the 4 bytes
+     6E 69 31 00   or   6E 2B 31 00
+   (in any future version of this format, the '1' will be upgraded to '2',
+   etc.).  Normally, such a "magic number" or flag goes at the start of the
+   file, but trying to avoid clobbering widely-used ANALYZE 7.5 fields led to
+   putting this marker last.  However, recall that "the last shall be first"
+   (Matthew 20:16).
+
+   If a NIFTI-aware program reads a header file that is NOT marked with a
+   NIFTI magic string, then it should treat the header as an ANALYZE 7.5
+   structure.
+
+   NIFTI-1 FILE STORAGE:
+   --------------------
+   "ni1" means that the image data is stored in the ".img" file corresponding
+   to the header file (starting at file offset 0).
+
+   "n+1" means that the image data is stored in the same file as the header
+   information.  We recommend that the combined header+data filename suffix
+   be ".nii".  When the dataset is stored in one file, the first byte of image
+   data is stored at byte location (int)vox_offset in this combined file.
+   The minimum allowed value of vox_offset is 352; for compatibility with
+   some software, vox_offset should be an integral multiple of 16.
+
+   GRACE UNDER FIRE:
+   ----------------
+   Most NIFTI-aware programs will only be able to handle a subset of the full
+   range of datasets possible with this format.  All NIFTI-aware programs
+   should take care to check if an input dataset conforms to the program's
+   needs and expectations (e.g., check datatype, intent_code, etc.).  If the
+   input dataset can't be handled by the program, the program should fail
+   gracefully (e.g., print a useful warning; not crash).
+
+   SAMPLE CODES:
+   ------------
+   The associated files nifti1_io.h and nifti1_io.c provide a sample
+   implementation in C of a set of functions to read, write, and manipulate
+   NIFTI-1 files.  The file nifti1_test.c is a sample program that uses
+   the nifti1_io.c functions.
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* HEADER STRUCT DECLARATION:
+   -------------------------
+   In the comments below for each field, only NIFTI-1 specific requirements
+   or changes from the ANALYZE 7.5 format are described.  For convenience,
+   the 348 byte header is described as a single struct, rather than as the
+   ANALYZE 7.5 group of 3 substructs.
+
+   Further comments about the interpretation of various elements of this
+   header are after the data type definition itself.  Fields that are
+   marked as ++UNUSED++ have no particular interpretation in this standard.
+   (Also see the UNUSED FIELDS comment section, far below.)
+
+   The presumption below is that the various C types have particular sizes:
+     sizeof(int) = sizeof(float) = 4 ;  sizeof(short) = 2
+-----------------------------------------------------------------------------*/
+
+/*=================*/
+#ifdef  __cplusplus
+extern "C" {
+#endif
+/*=================*/
+
+/*! \struct nifti_1_header
+    \brief Data structure defining the fields in the nifti1 header.
+           This binary header should be found at the beginning of a valid
+           NIFTI-1 header file.
+ */
+                        /*************************/  /************************/
+struct nifti_1_header { /* NIFTI-1 usage         */  /* ANALYZE 7.5 field(s) */
+                        /*************************/  /************************/
+
+                                           /*--- was header_key substruct ---*/
+ int   sizeof_hdr;    /*!< MUST be 348           */  /* int sizeof_hdr;      */
+ char  data_type[10]; /*!< ++UNUSED++            */  /* char data_type[10];  */
+ char  db_name[18];   /*!< ++UNUSED++            */  /* char db_name[18];    */
+ int   extents;       /*!< ++UNUSED++            */  /* int extents;         */
+ short session_error; /*!< ++UNUSED++            */  /* short session_error; */
+ char  regular;       /*!< ++UNUSED++            */  /* char regular;        */
+ char  dim_info;      /*!< MRI slice ordering.   */  /* char hkey_un0;       */
+
+                                      /*--- was image_dimension substruct ---*/
+ short dim[8];        /*!< Data array dimensions.*/  /* short dim[8];        */
+ float intent_p1 ;    /*!< 1st intent parameter. */  /* short unused8;       */
+                                                     /* short unused9;       */
+ float intent_p2 ;    /*!< 2nd intent parameter. */  /* short unused10;      */
+                                                     /* short unused11;      */
+ float intent_p3 ;    /*!< 3rd intent parameter. */  /* short unused12;      */
+                                                     /* short unused13;      */
+ short intent_code ;  /*!< NIFTI_INTENT_* code.  */  /* short unused14;      */
+ short datatype;      /*!< Defines data type!    */  /* short datatype;      */
+ short bitpix;        /*!< Number bits/voxel.    */  /* short bitpix;        */
+ short slice_start;   /*!< First slice index.    */  /* short dim_un0;       */
+ float pixdim[8];     /*!< Grid spacings.        */  /* float pixdim[8];     */
+ float vox_offset;    /*!< Offset into .nii file */  /* float vox_offset;    */
+ float scl_slope ;    /*!< Data scaling: slope.  */  /* float funused1;      */
+ float scl_inter ;    /*!< Data scaling: offset. */  /* float funused2;      */
+ short slice_end;     /*!< Last slice index.     */  /* float funused3;      */
+ char  slice_code ;   /*!< Slice timing order.   */
+ char  xyzt_units ;   /*!< Units of pixdim[1..4] */
+ float cal_max;       /*!< Max display intensity */  /* float cal_max;       */
+ float cal_min;       /*!< Min display intensity */  /* float cal_min;       */
+ float slice_duration;/*!< Time for 1 slice.     */  /* float compressed;    */
+ float toffset;       /*!< Time axis shift.      */  /* float verified;      */
+ int   glmax;         /*!< ++UNUSED++            */  /* int glmax;           */
+ int   glmin;         /*!< ++UNUSED++            */  /* int glmin;           */
+
+                                         /*--- was data_history substruct ---*/
+ char  descrip[80];   /*!< any text you like.    */  /* char descrip[80];    */
+ char  aux_file[24];  /*!< auxiliary filename.   */  /* char aux_file[24];   */
+
+ short qform_code ;   /*!< NIFTI_XFORM_* code.   */  /*-- all ANALYZE 7.5 ---*/
+ short sform_code ;   /*!< NIFTI_XFORM_* code.   */  /*   fields below here  */
+                                                     /*   are replaced       */
+ float quatern_b ;    /*!< Quaternion b param.   */
+ float quatern_c ;    /*!< Quaternion c param.   */
+ float quatern_d ;    /*!< Quaternion d param.   */
+ float qoffset_x ;    /*!< Quaternion x shift.   */
+ float qoffset_y ;    /*!< Quaternion y shift.   */
+ float qoffset_z ;    /*!< Quaternion z shift.   */
+
+ float srow_x[4] ;    /*!< 1st row affine transform.   */
+ float srow_y[4] ;    /*!< 2nd row affine transform.   */
+ float srow_z[4] ;    /*!< 3rd row affine transform.   */
+
+ char intent_name[16];/*!< 'name' or meaning of data.  */
+
+ char magic[4] ;      /*!< MUST be "ni1\0" or "n+1\0". */
+
+} ;                   /**** 348 bytes total ****/
+
+typedef struct nifti_1_header nifti_1_header ;
+
+/*---------------------------------------------------------------------------*/
+/* HEADER EXTENSIONS:
+   -----------------
+   After the end of the 348 byte header (e.g., after the magic field),
+   the next 4 bytes are a char array field named "extension". By default,
+   all 4 bytes of this array should be set to zero. In a .nii file, these
+   4 bytes will always be present, since the earliest start point for
+   the image data is byte #352. In a separate .hdr file, these bytes may
+   or may not be present. If not present (i.e., if the length of the .hdr
+   file is 348 bytes), then a NIfTI-1 compliant program should use the
+   default value of extension={0,0,0,0}. The first byte (extension[0])
+   is the only value of this array that is specified at present. The other
+   3 bytes are reserved for future use.
+
+   If extension[0] is nonzero, it indicates that extended header information
+   is present in the bytes following the extension array. In a .nii file,
+   this extended header data is before the image data (and vox_offset
+   must be set correctly to allow for this). In a .hdr file, this extended
+   data follows extension and proceeds (potentially) to the end of the file.
+
+   The format of extended header data is weakly specified. Each extension
+   must be an integer multiple of 16 bytes long. The first 8 bytes of each
+   extension comprise 2 integers:
+      int esize , ecode ;
+   These values may need to be byte-swapped, as indicated by dim[0] for
+   the rest of the header.
+     * esize is the number of bytes that form the extended header data
+       + esize must be a positive integral multiple of 16
+       + this length includes the 8 bytes of esize and ecode themselves
+     * ecode is a non-negative integer that indicates the format of the
+       extended header data that follows
+       + different ecode values are assigned to different developer groups
+       + at present, the "registered" values for code are
+         = 0 = unknown private format (not recommended!)
+         = 2 = DICOM format (i.e., attribute tags and values)
+         = 4 = AFNI group (i.e., ASCII XML-ish elements)
+   In the interests of interoperability (a primary rationale for NIfTI),
+   groups developing software that uses this extension mechanism are
+   encouraged to document and publicize the format of their extensions.
+   To this end, the NIfTI DFWG will assign even numbered codes upon request
+   to groups submitting at least rudimentary documentation for the format
+   of their extension; at present, the contact is mailto:rwcox@nih.gov.
+   The assigned codes and documentation will be posted on the NIfTI
+   website. All odd values of ecode (and 0) will remain unassigned;
+   at least, until the even ones are used up, when we get to 2,147,483,646.
+
+   Note that the other contents of the extended header data section are
+   totally unspecified by the NIfTI-1 standard. In particular, if binary
+   data is stored in such a section, its byte order is not necessarily
+   the same as that given by examining dim[0]; it is incumbent on the
+   programs dealing with such data to determine the byte order of binary
+   extended header data.
+
+   Multiple extended header sections are allowed, each starting with an
+   esize,ecode value pair. The first esize value, as described above,
+   is at bytes #352-355 in the .hdr or .nii file (files start at byte #0).
+   If this value is positive, then the second (esize2) will be found
+   starting at byte #352+esize1 , the third (esize3) at byte #352+esize1+esize2,
+   et cetera.  Of course, in a .nii file, the value of vox_offset must
+   be compatible with these extensions. If a malformed file indicates
+   that an extended header data section would run past vox_offset, then
+   the entire extended header section should be ignored. In a .hdr file,
+   if an extended header data section would run past the end-of-file,
+   that extended header data should also be ignored.
+
+   With the above scheme, a program can successively examine the esize
+   and ecode values, and skip over each extended header section if the
+   program doesn't know how to interpret the data within. Of course, any
+   program can simply ignore all extended header sections simply by jumping
+   straight to the image data using vox_offset.
+-----------------------------------------------------------------------------*/
+
+/*! \struct nifti1_extender
+    \brief This structure represents a 4-byte string that should follow the
+           binary nifti_1_header data in a NIFTI-1 header file.  If the char
+           values are {1,0,0,0}, the file is expected to contain extensions,
+           values of {0,0,0,0} imply the file does not contain extensions.
+           Other sequences of values are not currently defined.
+ */
+struct nifti1_extender { char extension[4] ; } ;
+typedef struct nifti1_extender nifti1_extender ;
+
+/*! \struct nifti1_extension
+    \brief Data structure defining the fields of a header extension.
+ */
+struct nifti1_extension {
+   int    esize ; /*!< size of extension, in bytes (must be multiple of 16) */
+   int    ecode ; /*!< extension code, one of the NIFTI_ECODE_ values       */
+   char * edata ; /*!< raw data, with no byte swapping (length is esize-8)  */
+} ;
+typedef struct nifti1_extension nifti1_extension ;
+
+/*---------------------------------------------------------------------------*/
+/* DATA DIMENSIONALITY (as in ANALYZE 7.5):
+   ---------------------------------------
+     dim[0] = number of dimensions;
+              - if dim[0] is outside range 1..7, then the header information
+                needs to be byte swapped appropriately
+              - ANALYZE supports dim[0] up to 7, but NIFTI-1 reserves
+                dimensions 1,2,3 for space (x,y,z), 4 for time (t), and
+                5,6,7 for anything else needed.
+
+     dim[i] = length of dimension #i, for i=1..dim[0]  (must be positive)
+              - also see the discussion of intent_code, far below
+
+     pixdim[i] = voxel width along dimension #i, i=1..dim[0] (positive)
+                 - cf. ORIENTATION section below for use of pixdim[0]
+                 - the units of pixdim can be specified with the xyzt_units
+                   field (also described far below).
+
+   Number of bits per voxel value is in bitpix, which MUST correspond with
+   the datatype field.  The total number of bytes in the image data is
+     dim[1] * ... * dim[dim[0]] * bitpix / 8
+
+   In NIFTI-1 files, dimensions 1,2,3 are for space, dimension 4 is for time,
+   and dimension 5 is for storing multiple values at each spatiotemporal
+   voxel.  Some examples:
+     - A typical whole-brain FMRI experiment's time series:
+        - dim[0] = 4
+        - dim[1] = 64   pixdim[1] = 3.75 xyzt_units =  NIFTI_UNITS_MM
+        - dim[2] = 64   pixdim[2] = 3.75             | NIFTI_UNITS_SEC
+        - dim[3] = 20   pixdim[3] = 5.0
+        - dim[4] = 120  pixdim[4] = 2.0
+     - A typical T1-weighted anatomical volume:
+        - dim[0] = 3
+        - dim[1] = 256  pixdim[1] = 1.0  xyzt_units = NIFTI_UNITS_MM
+        - dim[2] = 256  pixdim[2] = 1.0
+        - dim[3] = 128  pixdim[3] = 1.1
+     - A single slice EPI time series:
+        - dim[0] = 4
+        - dim[1] = 64   pixdim[1] = 3.75 xyzt_units =  NIFTI_UNITS_MM
+        - dim[2] = 64   pixdim[2] = 3.75             | NIFTI_UNITS_SEC
+        - dim[3] = 1    pixdim[3] = 5.0
+        - dim[4] = 1200 pixdim[4] = 0.2
+     - A 3-vector stored at each point in a 3D volume:
+        - dim[0] = 5
+        - dim[1] = 256  pixdim[1] = 1.0  xyzt_units = NIFTI_UNITS_MM
+        - dim[2] = 256  pixdim[2] = 1.0
+        - dim[3] = 128  pixdim[3] = 1.1
+        - dim[4] = 1    pixdim[4] = 0.0
+        - dim[5] = 3                     intent_code = NIFTI_INTENT_VECTOR
+     - A single time series with a 3x3 matrix at each point:
+        - dim[0] = 5
+        - dim[1] = 1                     xyzt_units = NIFTI_UNITS_SEC
+        - dim[2] = 1
+        - dim[3] = 1
+        - dim[4] = 1200 pixdim[4] = 0.2
+        - dim[5] = 9                     intent_code = NIFTI_INTENT_GENMATRIX
+        - intent_p1 = intent_p2 = 3.0    (indicates matrix dimensions)
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DATA STORAGE:
+   ------------
+   If the magic field is "n+1", then the voxel data is stored in the
+   same file as the header.  In this case, the voxel data starts at offset
+   (int)vox_offset into the header file.  Thus, vox_offset=352.0 means that
+   the data starts immediately after the NIFTI-1 header.  If vox_offset is
+   greater than 352, the NIFTI-1 format does not say much about the
+   contents of the dataset file between the end of the header and the
+   start of the data.
+
+   FILES:
+   -----
+   If the magic field is "ni1", then the voxel data is stored in the
+   associated ".img" file, starting at offset 0 (i.e., vox_offset is not
+   used in this case, and should be set to 0.0).
+
+   When storing NIFTI-1 datasets in pairs of files, it is customary to name
+   the files in the pattern "name.hdr" and "name.img", as in ANALYZE 7.5.
+   When storing in a single file ("n+1"), the file name should be in
+   the form "name.nii" (the ".nft" and ".nif" suffixes are already taken;
+   cf. http://www.icdatamaster.com/n.html ).
+
+   BYTE ORDERING:
+   -------------
+   The byte order of the data arrays is presumed to be the same as the byte
+   order of the header (which is determined by examining dim[0]).
+
+   Floating point types are presumed to be stored in IEEE-754 format.
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DETAILS ABOUT vox_offset:
+   ------------------------
+   In a .nii file, the vox_offset field value is interpreted as the start
+   location of the image data bytes in that file. In a .hdr/.img file pair,
+   the vox_offset field value is the start location of the image data
+   bytes in the .img file.
+    * If vox_offset is less than 352 in a .nii file, it is equivalent
+      to 352 (i.e., image data never starts before byte #352 in a .nii file).
+    * The default value for vox_offset in a .nii file is 352.
+    * In a .hdr file, the default value for vox_offset is 0.
+    * vox_offset should be an integer multiple of 16; otherwise, some
+      programs may not work properly (e.g., SPM). This is to allow
+      memory-mapped input to be properly byte-aligned.
+   Note that since vox_offset is an IEEE-754 32 bit float (for compatibility
+   with the ANALYZE-7.5 format), it effectively has a 24 bit mantissa. All
+   integers from 0 to 2^24 can be represented exactly in this format, but not
+   all larger integers are exactly storable as IEEE-754 32 bit floats. However,
+   unless you plan to have vox_offset be potentially larger than 16 MB, this
+   should not be an issue. (Actually, any integral multiple of 16 up to 2^27
+   can be represented exactly in this format, which allows for up to 128 MB
+   of random information before the image data.  If that isn't enough, then
+   perhaps this format isn't right for you.)
+
+   In a .img file (i.e., image data stored separately from the NIfTI-1
+   header), data bytes between #0 and #vox_offset-1 (inclusive) are completely
+   undefined and unregulated by the NIfTI-1 standard. One potential use of
+   having vox_offset > 0 in the .hdr/.img file pair storage method is to make
+   the .img file be a copy of (or link to) a pre-existing image file in some
+   other format, such as DICOM; then vox_offset would be set to the offset of
+   the image data in this file. (It may not be possible to follow the
+   "multiple-of-16 rule" with an arbitrary external file; using the NIfTI-1
+   format in such a case may lead to a file that is incompatible with software
+   that relies on vox_offset being a multiple of 16.)
+
+   In a .nii file, data bytes between #348 and #vox_offset-1 (inclusive) may
+   be used to store user-defined extra information; similarly, in a .hdr file,
+   any data bytes after byte #347 are available for user-defined extra
+   information. The (very weak) regulation of this extra header data is
+   described elsewhere.
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* DATA SCALING:
+   ------------
+   If the scl_slope field is nonzero, then each voxel value in the dataset
+   should be scaled as
+      y = scl_slope * x + scl_inter
+   where x = voxel value stored
+         y = "true" voxel value
+   Normally, we would expect this scaling to be used to store "true" floating
+   values in a smaller integer datatype, but that is not required.  That is,
+   it is legal to use scaling even if the datatype is a float type (crazy,
+   perhaps, but legal).
+    - However, the scaling is to be ignored if datatype is DT_RGB24.
+    - If datatype is a complex type, then the scaling is to be
+      applied to both the real and imaginary parts.
+
+   The cal_min and cal_max fields (if nonzero) are used for mapping (possibly
+   scaled) dataset values to display colors:
+    - Minimum display intensity (black) corresponds to dataset value cal_min.
+    - Maximum display intensity (white) corresponds to dataset value cal_max.
+    - Dataset values below cal_min should display as black also, and values
+      above cal_max as white.
+    - Colors "black" and "white", of course, may refer to any scalar display
+      scheme (e.g., a color lookup table specified via aux_file).
+    - cal_min and cal_max only make sense when applied to scalar-valued
+      datasets (i.e., dim[0] < 5 or dim[5] = 1).
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* TYPE OF DATA (acceptable values for datatype field):
+   ---------------------------------------------------
+   Values of datatype smaller than 256 are ANALYZE 7.5 compatible.
+   Larger values are NIFTI-1 additions.  These are all multiples of 256, so
+   that no bits below position 8 are set in datatype.  But there is no need
+   to use only powers-of-2, as the original ANALYZE 7.5 datatype codes do.
+
+   The additional codes are intended to include a complete list of basic
+   scalar types, including signed and unsigned integers from 8 to 64 bits,
+   floats from 32 to 128 bits, and complex (float pairs) from 64 to 256 bits.
+
+   Note that most programs will support only a few of these datatypes!
+   A NIFTI-1 program should fail gracefully (e.g., print a warning message)
+   when it encounters a dataset with a type it doesn't like.
+-----------------------------------------------------------------------------*/
+
+#undef DT_UNKNOWN  /* defined in dirent.h on some Unix systems */
+
+/*! \defgroup NIFTI1_DATATYPES
+    \brief nifti1 datatype codes
+    @{
+ */
+                            /*--- the original ANALYZE 7.5 type codes ---*/
+#define DT_NONE                    0
+#define DT_UNKNOWN                 0     /* what it says, dude           */
+#define DT_BINARY                  1     /* binary (1 bit/voxel)         */
+#define DT_UNSIGNED_CHAR           2     /* unsigned char (8 bits/voxel) */
+#define DT_SIGNED_SHORT            4     /* signed short (16 bits/voxel) */
+#define DT_SIGNED_INT              8     /* signed int (32 bits/voxel)   */
+#define DT_FLOAT                  16     /* float (32 bits/voxel)        */
+#define DT_COMPLEX                32     /* complex (64 bits/voxel)      */
+#define DT_DOUBLE                 64     /* double (64 bits/voxel)       */
+#define DT_RGB                   128     /* RGB triple (24 bits/voxel)   */
+#define DT_ALL                   255     /* not very useful (?)          */
+
+                            /*----- another set of names for the same ---*/
+#define DT_UINT8                   2
+#define DT_INT16                   4
+#define DT_INT32                   8
+#define DT_FLOAT32                16
+#define DT_COMPLEX64              32
+#define DT_FLOAT64                64
+#define DT_RGB24                 128
+
+                            /*------------------- new codes for NIFTI ---*/
+#define DT_INT8                  256     /* signed char (8 bits)         */
+#define DT_UINT16                512     /* unsigned short (16 bits)     */
+#define DT_UINT32                768     /* unsigned int (32 bits)       */
+#define DT_INT64                1024     /* long long (64 bits)          */
+#define DT_UINT64               1280     /* unsigned long long (64 bits) */
+#define DT_FLOAT128             1536     /* long double (128 bits)       */
+#define DT_COMPLEX128           1792     /* double pair (128 bits)       */
+#define DT_COMPLEX256           2048     /* long double pair (256 bits)  */
+#define DT_RGBA32               2304     /* 4 byte RGBA (32 bits/voxel)  */
+/* @} */
+
+
+                            /*------- aliases for all the above codes ---*/
+
+/*! \defgroup NIFTI1_DATATYPE_ALIASES
+    \brief aliases for the nifti1 datatype codes
+    @{
+ */
+                                       /*! unsigned char. */
+#define NIFTI_TYPE_UINT8           2
+                                       /*! signed short. */
+#define NIFTI_TYPE_INT16           4
+                                       /*! signed int. */
+#define NIFTI_TYPE_INT32           8
+                                       /*! 32 bit float. */
+#define NIFTI_TYPE_FLOAT32        16
+                                       /*! 64 bit complex = 2 32 bit floats. */
+#define NIFTI_TYPE_COMPLEX64      32
+                                       /*! 64 bit float = double. */
+#define NIFTI_TYPE_FLOAT64        64
+                                       /*! 3 8 bit bytes. */
+#define NIFTI_TYPE_RGB24         128
+                                       /*! signed char. */
+#define NIFTI_TYPE_INT8          256
+                                       /*! unsigned short. */
+#define NIFTI_TYPE_UINT16        512
+                                       /*! unsigned int. */
+#define NIFTI_TYPE_UINT32        768
+                                       /*! signed long long. */
+#define NIFTI_TYPE_INT64        1024
+                                       /*! unsigned long long. */
+#define NIFTI_TYPE_UINT64       1280
+                                       /*! 128 bit float = long double. */
+#define NIFTI_TYPE_FLOAT128     1536
+                                       /*! 128 bit complex = 2 64 bit floats. */
+#define NIFTI_TYPE_COMPLEX128   1792
+                                       /*! 256 bit complex = 2 128 bit floats */
+#define NIFTI_TYPE_COMPLEX256   2048
+                                       /*! 4 8 bit bytes. */
+#define NIFTI_TYPE_RGBA32       2304
+/* @} */
+
+                     /*-------- sample typedefs for complicated types ---*/
+#if 0
+typedef struct { float       r,i;     } complex_float ;
+typedef struct { double      r,i;     } complex_double ;
+typedef struct { long double r,i;     } complex_longdouble ;
+typedef struct { unsigned char r,g,b; } rgb_byte ;
+#endif
+
+/*---------------------------------------------------------------------------*/
+/* INTERPRETATION OF VOXEL DATA:
+   ----------------------------
+   The intent_code field can be used to indicate that the voxel data has
+   some particular meaning.  In particular, a large number of codes is
+   given to indicate that the the voxel data should be interpreted as
+   being drawn from a given probability distribution.
+
+   VECTOR-VALUED DATASETS:
+   ----------------------
+   The 5th dimension of the dataset, if present (i.e., dim[0]=5 and
+   dim[5] > 1), contains multiple values (e.g., a vector) to be stored
+   at each spatiotemporal location.  For example, the header values
+    - dim[0] = 5
+    - dim[1] = 64
+    - dim[2] = 64
+    - dim[3] = 20
+    - dim[4] = 1     (indicates no time axis)
+    - dim[5] = 3
+    - datatype = DT_FLOAT
+    - intent_code = NIFTI_INTENT_VECTOR
+   mean that this dataset should be interpreted as a 3D volume (64x64x20),
+   with a 3-vector of floats defined at each point in the 3D grid.
+
+   A program reading a dataset with a 5th dimension may want to reformat
+   the image data to store each voxels' set of values together in a struct
+   or array.  This programming detail, however, is beyond the scope of the
+   NIFTI-1 file specification!  Uses of dimensions 6 and 7 are also not
+   specified here.
+
+   STATISTICAL PARAMETRIC DATASETS (i.e., SPMs):
+   --------------------------------------------
+   Values of intent_code from NIFTI_FIRST_STATCODE to NIFTI_LAST_STATCODE
+   (inclusive) indicate that the numbers in the dataset should be interpreted
+   as being drawn from a given distribution.  Most such distributions have
+   auxiliary parameters (e.g., NIFTI_INTENT_TTEST has 1 DOF parameter).
+
+   If the dataset DOES NOT have a 5th dimension, then the auxiliary parameters
+   are the same for each voxel, and are given in header fields intent_p1,
+   intent_p2, and intent_p3.
+
+   If the dataset DOES have a 5th dimension, then the auxiliary parameters
+   are different for each voxel.  For example, the header values
+    - dim[0] = 5
+    - dim[1] = 128
+    - dim[2] = 128
+    - dim[3] = 1      (indicates a single slice)
+    - dim[4] = 1      (indicates no time axis)
+    - dim[5] = 2
+    - datatype = DT_FLOAT
+    - intent_code = NIFTI_INTENT_TTEST
+   mean that this is a 2D dataset (128x128) of t-statistics, with the
+   t-statistic being in the first "plane" of data and the degrees-of-freedom
+   parameter being in the second "plane" of data.
+
+   If the dataset 5th dimension is used to store the voxel-wise statistical
+   parameters, then dim[5] must be 1 plus the number of parameters required
+   by that distribution (e.g., intent_code=NIFTI_INTENT_TTEST implies dim[5]
+   must be 2, as in the example just above).
+
+   Note: intent_code values 2..10 are compatible with AFNI 1.5x (which is
+   why there is no code with value=1, which is obsolescent in AFNI).
+
+   OTHER INTENTIONS:
+   ----------------
+   The purpose of the intent_* fields is to help interpret the values
+   stored in the dataset.  Some non-statistical values for intent_code
+   and conventions are provided for storing other complex data types.
+
+   The intent_name field provides space for a 15 character (plus 0 byte)
+   'name' string for the type of data stored. Examples:
+    - intent_code = NIFTI_INTENT_ESTIMATE; intent_name = "T1";
+       could be used to signify that the voxel values are estimates of the
+       NMR parameter T1.
+    - intent_code = NIFTI_INTENT_TTEST; intent_name = "House";
+       could be used to signify that the voxel values are t-statistics
+       for the significance of 'activation' response to a House stimulus.
+    - intent_code = NIFTI_INTENT_DISPVECT; intent_name = "ToMNI152";
+       could be used to signify that the voxel values are a displacement
+       vector that transforms each voxel (x,y,z) location to the
+       corresponding location in the MNI152 standard brain.
+    - intent_code = NIFTI_INTENT_SYMMATRIX; intent_name = "DTI";
+       could be used to signify that the voxel values comprise a diffusion
+       tensor image.
+
+   If no data name is implied or needed, intent_name[0] should be set to 0.
+-----------------------------------------------------------------------------*/
+
+ /*! default: no intention is indicated in the header. */
+
+#define NIFTI_INTENT_NONE        0
+
+    /*-------- These codes are for probability distributions ---------------*/
+    /* Most distributions have a number of parameters,
+       below denoted by p1, p2, and p3, and stored in
+        - intent_p1, intent_p2, intent_p3 if dataset doesn't have 5th dimension
+        - image data array                if dataset does have 5th dimension
+
+       Functions to compute with many of the distributions below can be found
+       in the CDF library from U Texas.
+
+       Formulas for and discussions of these distributions can be found in the
+       following books:
+
+        [U] Univariate Discrete Distributions,
+            NL Johnson, S Kotz, AW Kemp.
+
+        [C1] Continuous Univariate Distributions, vol. 1,
+             NL Johnson, S Kotz, N Balakrishnan.
+
+        [C2] Continuous Univariate Distributions, vol. 2,
+             NL Johnson, S Kotz, N Balakrishnan.                            */
+    /*----------------------------------------------------------------------*/
+
+  /*! [C2, chap 32] Correlation coefficient R (1 param):
+       p1 = degrees of freedom
+       R/sqrt(1-R*R) is t-distributed with p1 DOF. */
+
+/*! \defgroup NIFTI1_INTENT_CODES
+    \brief nifti1 intent codes, to describe intended meaning of dataset contents
+    @{
+ */
+#define NIFTI_INTENT_CORREL      2
+
+  /*! [C2, chap 28] Student t statistic (1 param): p1 = DOF. */
+
+#define NIFTI_INTENT_TTEST       3
+
+  /*! [C2, chap 27] Fisher F statistic (2 params):
+       p1 = numerator DOF, p2 = denominator DOF. */
+
+#define NIFTI_INTENT_FTEST       4
+
+  /*! [C1, chap 13] Standard normal (0 params): Density = N(0,1). */
+
+#define NIFTI_INTENT_ZSCORE      5
+
+  /*! [C1, chap 18] Chi-squared (1 param): p1 = DOF.
+      Density(x) proportional to exp(-x/2) * x^(p1/2-1). */
+
+#define NIFTI_INTENT_CHISQ       6
+
+  /*! [C2, chap 25] Beta distribution (2 params): p1=a, p2=b.
+      Density(x) proportional to x^(a-1) * (1-x)^(b-1). */
+
+#define NIFTI_INTENT_BETA        7
+
+  /*! [U, chap 3] Binomial distribution (2 params):
+       p1 = number of trials, p2 = probability per trial.
+      Prob(x) = (p1 choose x) * p2^x * (1-p2)^(p1-x), for x=0,1,...,p1. */
+
+#define NIFTI_INTENT_BINOM       8
+
+  /*! [C1, chap 17] Gamma distribution (2 params):
+       p1 = shape, p2 = scale.
+      Density(x) proportional to x^(p1-1) * exp(-p2*x). */
+
+#define NIFTI_INTENT_GAMMA       9
+
+  /*! [U, chap 4] Poisson distribution (1 param): p1 = mean.
+      Prob(x) = exp(-p1) * p1^x / x! , for x=0,1,2,.... */
+
+#define NIFTI_INTENT_POISSON    10
+
+  /*! [C1, chap 13] Normal distribution (2 params):
+       p1 = mean, p2 = standard deviation. */
+
+#define NIFTI_INTENT_NORMAL     11
+
+  /*! [C2, chap 30] Noncentral F statistic (3 params):
+       p1 = numerator DOF, p2 = denominator DOF,
+       p3 = numerator noncentrality parameter.  */
+
+#define NIFTI_INTENT_FTEST_NONC 12
+
+  /*! [C2, chap 29] Noncentral chi-squared statistic (2 params):
+       p1 = DOF, p2 = noncentrality parameter.     */
+
+#define NIFTI_INTENT_CHISQ_NONC 13
+
+  /*! [C2, chap 23] Logistic distribution (2 params):
+       p1 = location, p2 = scale.
+      Density(x) proportional to sech^2((x-p1)/(2*p2)). */
+
+#define NIFTI_INTENT_LOGISTIC   14
+
+  /*! [C2, chap 24] Laplace distribution (2 params):
+       p1 = location, p2 = scale.
+      Density(x) proportional to exp(-abs(x-p1)/p2). */
+
+#define NIFTI_INTENT_LAPLACE    15
+
+  /*! [C2, chap 26] Uniform distribution: p1 = lower end, p2 = upper end. */
+
+#define NIFTI_INTENT_UNIFORM    16
+
+  /*! [C2, chap 31] Noncentral t statistic (2 params):
+       p1 = DOF, p2 = noncentrality parameter. */
+
+#define NIFTI_INTENT_TTEST_NONC 17
+
+  /*! [C1, chap 21] Weibull distribution (3 params):
+       p1 = location, p2 = scale, p3 = power.
+      Density(x) proportional to
+       ((x-p1)/p2)^(p3-1) * exp(-((x-p1)/p2)^p3) for x > p1. */
+
+#define NIFTI_INTENT_WEIBULL    18
+
+  /*! [C1, chap 18] Chi distribution (1 param): p1 = DOF.
+      Density(x) proportional to x^(p1-1) * exp(-x^2/2) for x > 0.
+       p1 = 1 = 'half normal' distribution
+       p1 = 2 = Rayleigh distribution
+       p1 = 3 = Maxwell-Boltzmann distribution.                  */
+
+#define NIFTI_INTENT_CHI        19
+
+  /*! [C1, chap 15] Inverse Gaussian (2 params):
+       p1 = mu, p2 = lambda
+      Density(x) proportional to
+       exp(-p2*(x-p1)^2/(2*p1^2*x)) / x^3  for x > 0. */
+
+#define NIFTI_INTENT_INVGAUSS   20
+
+  /*! [C2, chap 22] Extreme value type I (2 params):
+       p1 = location, p2 = scale
+      cdf(x) = exp(-exp(-(x-p1)/p2)). */
+
+#define NIFTI_INTENT_EXTVAL     21
+
+  /*! Data is a 'p-value' (no params). */
+
+#define NIFTI_INTENT_PVAL       22
+
+  /*! Data is ln(p-value) (no params).
+      To be safe, a program should compute p = exp(-abs(this_value)).
+      The nifti_stats.c library returns this_value
+      as positive, so that this_value = -log(p). */
+
+
+#define NIFTI_INTENT_LOGPVAL    23
+
+  /*! Data is log10(p-value) (no params).
+      To be safe, a program should compute p = pow(10.,-abs(this_value)).
+      The nifti_stats.c library returns this_value
+      as positive, so that this_value = -log10(p). */
+
+#define NIFTI_INTENT_LOG10PVAL  24
+
+  /*! Smallest intent_code that indicates a statistic. */
+
+#define NIFTI_FIRST_STATCODE     2
+
+  /*! Largest intent_code that indicates a statistic. */
+
+#define NIFTI_LAST_STATCODE     24
+
+ /*---------- these values for intent_code aren't for statistics ----------*/
+
+ /*! To signify that the value at each voxel is an estimate
+     of some parameter, set intent_code = NIFTI_INTENT_ESTIMATE.
+     The name of the parameter may be stored in intent_name.     */
+
+#define NIFTI_INTENT_ESTIMATE  1001
+
+ /*! To signify that the value at each voxel is an index into
+     some set of labels, set intent_code = NIFTI_INTENT_LABEL.
+     The filename with the labels may stored in aux_file.        */
+
+#define NIFTI_INTENT_LABEL     1002
+
+ /*! To signify that the value at each voxel is an index into the
+     NeuroNames labels set, set intent_code = NIFTI_INTENT_NEURONAME. */
+
+#define NIFTI_INTENT_NEURONAME 1003
+
+ /*! To store an M x N matrix at each voxel:
+       - dataset must have a 5th dimension (dim[0]=5 and dim[5]>1)
+       - intent_code must be NIFTI_INTENT_GENMATRIX
+       - dim[5] must be M*N
+       - intent_p1 must be M (in float format)
+       - intent_p2 must be N (ditto)
+       - the matrix values A[i][[j] are stored in row-order:
+         - A[0][0] A[0][1] ... A[0][N-1]
+         - A[1][0] A[1][1] ... A[1][N-1]
+         - etc., until
+         - A[M-1][0] A[M-1][1] ... A[M-1][N-1]        */
+
+#define NIFTI_INTENT_GENMATRIX 1004
+
+ /*! To store an NxN symmetric matrix at each voxel:
+       - dataset must have a 5th dimension
+       - intent_code must be NIFTI_INTENT_SYMMATRIX
+       - dim[5] must be N*(N+1)/2
+       - intent_p1 must be N (in float format)
+       - the matrix values A[i][[j] are stored in row-order:
+         - A[0][0]
+         - A[1][0] A[1][1]
+         - A[2][0] A[2][1] A[2][2]
+         - etc.: row-by-row                           */
+
+#define NIFTI_INTENT_SYMMATRIX 1005
+
+ /*! To signify that the vector value at each voxel is to be taken
+     as a displacement field or vector:
+       - dataset must have a 5th dimension
+       - intent_code must be NIFTI_INTENT_DISPVECT
+       - dim[5] must be the dimensionality of the displacment
+         vector (e.g., 3 for spatial displacement, 2 for in-plane) */
+
+#define NIFTI_INTENT_DISPVECT  1006   /* specifically for displacements */
+#define NIFTI_INTENT_VECTOR    1007   /* for any other type of vector */
+
+ /*! To signify that the vector value at each voxel is really a
+     spatial coordinate (e.g., the vertices or nodes of a surface mesh):
+       - dataset must have a 5th dimension
+       - intent_code must be NIFTI_INTENT_POINTSET
+       - dim[0] = 5
+       - dim[1] = number of points
+       - dim[2] = dim[3] = dim[4] = 1
+       - dim[5] must be the dimensionality of space (e.g., 3 => 3D space).
+       - intent_name may describe the object these points come from
+         (e.g., "pial", "gray/white" , "EEG", "MEG").                   */
+
+#define NIFTI_INTENT_POINTSET  1008
+
+ /*! To signify that the vector value at each voxel is really a triple
+     of indexes (e.g., forming a triangle) from a pointset dataset:
+       - dataset must have a 5th dimension
+       - intent_code must be NIFTI_INTENT_TRIANGLE
+       - dim[0] = 5
+       - dim[1] = number of triangles
+       - dim[2] = dim[3] = dim[4] = 1
+       - dim[5] = 3
+       - datatype should be an integer type (preferably DT_INT32)
+       - the data values are indexes (0,1,...) into a pointset dataset. */
+
+#define NIFTI_INTENT_TRIANGLE  1009
+
+ /*! To signify that the vector value at each voxel is a quaternion:
+       - dataset must have a 5th dimension
+       - intent_code must be NIFTI_INTENT_QUATERNION
+       - dim[0] = 5
+       - dim[5] = 4
+       - datatype should be a floating point type     */
+
+#define NIFTI_INTENT_QUATERNION 1010
+
+ /*! Dimensionless value - no params - although, as in _ESTIMATE
+     the name of the parameter may be stored in intent_name.     */
+
+#define NIFTI_INTENT_DIMLESS    1011
+
+ /*---------- these values apply to GIFTI datasets ----------*/
+
+ /*! To signify that the value at each location is from a time series. */
+
+#define NIFTI_INTENT_TIME_SERIES  2001
+
+ /*! To signify that the value at each location is a node index, from
+     a complete surface dataset.                                       */
+
+#define NIFTI_INTENT_NODE_INDEX   2002
+
+ /*! To signify that the vector value at each location is an RGB triplet,
+     of whatever type.
+       - dataset must have a 5th dimension
+       - dim[0] = 5
+       - dim[1] = number of nodes
+       - dim[2] = dim[3] = dim[4] = 1
+       - dim[5] = 3
+    */
+
+#define NIFTI_INTENT_RGB_VECTOR   2003
+
+ /*! To signify that the vector value at each location is a 4 valued RGBA
+     vector, of whatever type.
+       - dataset must have a 5th dimension
+       - dim[0] = 5
+       - dim[1] = number of nodes
+       - dim[2] = dim[3] = dim[4] = 1
+       - dim[5] = 4
+    */
+
+#define NIFTI_INTENT_RGBA_VECTOR  2004
+
+ /*! To signify that the value at each location is a shape value, such
+     as the curvature.  */
+
+#define NIFTI_INTENT_SHAPE        2005
+
+ /*! The following intent codes have been used by FSL FNIRT for
+     displacement/coefficient files.
+
+     These codes are included to prevent clashes in community-created
+     extensions to NIfTI. Encoding and decoding behavior for these
+     intents is not specified by the standard, and support is OPTIONAL
+     for conforming implementations.
+     */
+
+#define NIFTI_INTENT_FSL_FNIRT_DISPLACEMENT_FIELD       2006
+#define NIFTI_INTENT_FSL_CUBIC_SPLINE_COEFFICIENTS      2007
+#define NIFTI_INTENT_FSL_DCT_COEFFICIENTS               2008
+#define NIFTI_INTENT_FSL_QUADRATIC_SPLINE_COEFFICIENTS  2009
+
+ /*! The following intent codes have been used by FSL TOPUP for
+     displacement/coefficient files.
+
+     These codes are included to prevent clashes in community-created
+     extensions to NIfTI. Encoding and decoding behavior for these
+     intents is not specified by the standard, and support is OPTIONAL
+     for conforming implementations.
+     */
+
+#define NIFTI_INTENT_FSL_TOPUP_CUBIC_SPLINE_COEFFICIENTS        2016
+#define NIFTI_INTENT_FSL_TOPUP_QUADRATIC_SPLINE_COEFFICIENTS    2017
+#define NIFTI_INTENT_FSL_TOPUP_FIELD                            2018
+
+/* @} */
+
+/*---------------------------------------------------------------------------*/
+/* 3D IMAGE (VOLUME) ORIENTATION AND LOCATION IN SPACE:
+   ---------------------------------------------------
+   There are 3 different methods by which continuous coordinates can
+   attached to voxels.  The discussion below emphasizes 3D volumes, and
+   the continuous coordinates are referred to as (x,y,z).  The voxel
+   index coordinates (i.e., the array indexes) are referred to as (i,j,k),
+   with valid ranges:
+     i = 0 .. dim[1]-1
+     j = 0 .. dim[2]-1  (if dim[0] >= 2)
+     k = 0 .. dim[3]-1  (if dim[0] >= 3)
+   The (x,y,z) coordinates refer to the CENTER of a voxel.  In methods
+   2 and 3, the (x,y,z) axes refer to a subject-based coordinate system,
+   with
+     +x = Right  +y = Anterior  +z = Superior.
+   This is a right-handed coordinate system.  However, the exact direction
+   these axes point with respect to the subject depends on qform_code
+   (Method 2) and sform_code (Method 3).
+
+   N.B.: The i index varies most rapidly, j index next, k index slowest.
+    Thus, voxel (i,j,k) is stored starting at location
+      (i + j*dim[1] + k*dim[1]*dim[2]) * (bitpix/8)
+    into the dataset array.
+
+   N.B.: The ANALYZE 7.5 coordinate system is
+      +x = Left  +y = Anterior  +z = Superior
+    which is a left-handed coordinate system.  This backwardness is
+    too difficult to tolerate, so this NIFTI-1 standard specifies the
+    coordinate order which is most common in functional neuroimaging.
+
+   N.B.: The 3 methods below all give the locations of the voxel centers
+    in the (x,y,z) coordinate system.  In many cases, programs will wish
+    to display image data on some other grid.  In such a case, the program
+    will need to convert its desired (x,y,z) values into (i,j,k) values
+    in order to extract (or interpolate) the image data.  This operation
+    would be done with the inverse transformation to those described below.
+
+   N.B.: Method 2 uses a factor 'qfac' which is either -1 or 1; qfac is
+    stored in the otherwise unused pixdim[0].  If pixdim[0]=0.0 (which
+    should not occur), we take qfac=1.  Of course, pixdim[0] is only used
+    when reading a NIFTI-1 header, not when reading an ANALYZE 7.5 header.
+
+   N.B.: The units of (x,y,z) can be specified using the xyzt_units field.
+
+   METHOD 1 (the "old" way, used only when qform_code = 0):
+   -------------------------------------------------------
+   The coordinate mapping from (i,j,k) to (x,y,z) is the ANALYZE
+   7.5 way.  This is a simple scaling relationship:
+
+     x = pixdim[1] * i
+     y = pixdim[2] * j
+     z = pixdim[3] * k
+
+   No particular spatial orientation is attached to these (x,y,z)
+   coordinates.  (NIFTI-1 does not have the ANALYZE 7.5 orient field,
+   which is not general and is often not set properly.)  This method
+   is not recommended, and is present mainly for compatibility with
+   ANALYZE 7.5 files.
+
+   METHOD 2 (used when qform_code > 0, which should be the "normal" case):
+   ---------------------------------------------------------------------
+   The (x,y,z) coordinates are given by the pixdim[] scales, a rotation
+   matrix, and a shift.  This method is intended to represent
+   "scanner-anatomical" coordinates, which are often embedded in the
+   image header (e.g., DICOM fields (0020,0032), (0020,0037), (0028,0030),
+   and (0018,0050)), and represent the nominal orientation and location of
+   the data.  This method can also be used to represent "aligned"
+   coordinates, which would typically result from some post-acquisition
+   alignment of the volume to a standard orientation (e.g., the same
+   subject on another day, or a rigid rotation to true anatomical
+   orientation from the tilted position of the subject in the scanner).
+   The formula for (x,y,z) in terms of header parameters and (i,j,k) is:
+
+     [ x ]   [ R11 R12 R13 ] [        pixdim[1] * i ]   [ qoffset_x ]
+     [ y ] = [ R21 R22 R23 ] [        pixdim[2] * j ] + [ qoffset_y ]
+     [ z ]   [ R31 R32 R33 ] [ qfac * pixdim[3] * k ]   [ qoffset_z ]
+
+   The qoffset_* shifts are in the NIFTI-1 header.  Note that the center
+   of the (i,j,k)=(0,0,0) voxel (first value in the dataset array) is
+   just (x,y,z)=(qoffset_x,qoffset_y,qoffset_z).
+
+   The rotation matrix R is calculated from the quatern_* parameters.
+   This calculation is described below.
+
+   The scaling factor qfac is either 1 or -1.  The rotation matrix R
+   defined by the quaternion parameters is "proper" (has determinant 1).
+   This may not fit the needs of the data; for example, if the image
+   grid is
+     i increases from Left-to-Right
+     j increases from Anterior-to-Posterior
+     k increases from Inferior-to-Superior
+   Then (i,j,k) is a left-handed triple.  In this example, if qfac=1,
+   the R matrix would have to be
+
+     [  1   0   0 ]
+     [  0  -1   0 ]  which is "improper" (determinant = -1).
+     [  0   0   1 ]
+
+   If we set qfac=-1, then the R matrix would be
+
+     [  1   0   0 ]
+     [  0  -1   0 ]  which is proper.
+     [  0   0  -1 ]
+
+   This R matrix is represented by quaternion [a,b,c,d] = [0,1,0,0]
+   (which encodes a 180 degree rotation about the x-axis).
+
+   METHOD 3 (used when sform_code > 0):
+   -----------------------------------
+   The (x,y,z) coordinates are given by a general affine transformation
+   of the (i,j,k) indexes:
+
+     x = srow_x[0] * i + srow_x[1] * j + srow_x[2] * k + srow_x[3]
+     y = srow_y[0] * i + srow_y[1] * j + srow_y[2] * k + srow_y[3]
+     z = srow_z[0] * i + srow_z[1] * j + srow_z[2] * k + srow_z[3]
+
+   The srow_* vectors are in the NIFTI_1 header.  Note that no use is
+   made of pixdim[] in this method.
+
+   WHY 3 METHODS?
+   --------------
+   Method 1 is provided only for backwards compatibility.  The intention
+   is that Method 2 (qform_code > 0) represents the nominal voxel locations
+   as reported by the scanner, or as rotated to some fiducial orientation and
+   location.  Method 3, if present (sform_code > 0), is to be used to give
+   the location of the voxels in some standard space.  The sform_code
+   indicates which standard space is present.  Both methods 2 and 3 can be
+   present, and be useful in different contexts (method 2 for displaying the
+   data on its original grid; method 3 for displaying it on a standard grid).
+
+   In this scheme, a dataset would originally be set up so that the
+   Method 2 coordinates represent what the scanner reported.  Later,
+   a registration to some standard space can be computed and inserted
+   in the header.  Image display software can use either transform,
+   depending on its purposes and needs.
+
+   In Method 2, the origin of coordinates would generally be whatever
+   the scanner origin is; for example, in MRI, (0,0,0) is the center
+   of the gradient coil.
+
+   In Method 3, the origin of coordinates would depend on the value
+   of sform_code; for example, for the Talairach coordinate system,
+   (0,0,0) corresponds to the Anterior Commissure.
+
+   QUATERNION REPRESENTATION OF ROTATION MATRIX (METHOD 2)
+   -------------------------------------------------------
+   The orientation of the (x,y,z) axes relative to the (i,j,k) axes
+   in 3D space is specified using a unit quaternion [a,b,c,d], where
+   a*a+b*b+c*c+d*d=1.  The (b,c,d) values are all that is needed, since
+   we require that a = sqrt(1.0-(b*b+c*c+d*d)) be nonnegative.  The (b,c,d)
+   values are stored in the (quatern_b,quatern_c,quatern_d) fields.
+
+   The quaternion representation is chosen for its compactness in
+   representing rotations. The (proper) 3x3 rotation matrix that
+   corresponds to [a,b,c,d] is
+
+         [ a*a+b*b-c*c-d*d   2*b*c-2*a*d       2*b*d+2*a*c     ]
+     R = [ 2*b*c+2*a*d       a*a+c*c-b*b-d*d   2*c*d-2*a*b     ]
+         [ 2*b*d-2*a*c       2*c*d+2*a*b       a*a+d*d-c*c-b*b ]
+
+         [ R11               R12               R13             ]
+       = [ R21               R22               R23             ]
+         [ R31               R32               R33             ]
+
+   If (p,q,r) is a unit 3-vector, then rotation of angle h about that
+   direction is represented by the quaternion
+
+     [a,b,c,d] = [cos(h/2), p*sin(h/2), q*sin(h/2), r*sin(h/2)].
+
+   Requiring a >= 0 is equivalent to requiring -Pi <= h <= Pi.  (Note that
+   [-a,-b,-c,-d] represents the same rotation as [a,b,c,d]; there are 2
+   quaternions that can be used to represent a given rotation matrix R.)
+   To rotate a 3-vector (x,y,z) using quaternions, we compute the
+   quaternion product
+
+     [0,x',y',z'] = [a,b,c,d] * [0,x,y,z] * [a,-b,-c,-d]
+
+   which is equivalent to the matrix-vector multiply
+
+     [ x' ]     [ x ]
+     [ y' ] = R [ y ]   (equivalence depends on a*a+b*b+c*c+d*d=1)
+     [ z' ]     [ z ]
+
+   Multiplication of 2 quaternions is defined by the following:
+
+     [a,b,c,d] = a*1 + b*I + c*J + d*K
+     where
+       I*I = J*J = K*K = -1 (I,J,K are square roots of -1)
+       I*J =  K    J*K =  I    K*I =  J
+       J*I = -K    K*J = -I    I*K = -J  (not commutative!)
+     For example
+       [a,b,0,0] * [0,0,0,1] = [0,0,-b,a]
+     since this expands to
+       (a+b*I)*(K) = (a*K+b*I*K) = (a*K-b*J).
+
+   The above formula shows how to go from quaternion (b,c,d) to
+   rotation matrix and direction cosines.  Conversely, given R,
+   we can compute the fields for the NIFTI-1 header by
+
+     a = 0.5  * sqrt(1+R11+R22+R33)    (not stored)
+     b = 0.25 * (R32-R23) / a       => quatern_b
+     c = 0.25 * (R13-R31) / a       => quatern_c
+     d = 0.25 * (R21-R12) / a       => quatern_d
+
+   If a=0 (a 180 degree rotation), alternative formulas are needed.
+   See the nifti1_io.c function mat44_to_quatern() for an implementation
+   of the various cases in converting R to [a,b,c,d].
+
+   Note that R-transpose (= R-inverse) would lead to the quaternion
+   [a,-b,-c,-d].
+
+   The choice to specify the qoffset_x (etc.) values in the final
+   coordinate system is partly to make it easy to convert DICOM images to
+   this format.  The DICOM attribute "Image Position (Patient)" (0020,0032)
+   stores the (Xd,Yd,Zd) coordinates of the center of the first voxel.
+   Here, (Xd,Yd,Zd) refer to DICOM coordinates, and Xd=-x, Yd=-y, Zd=z,
+   where (x,y,z) refers to the NIFTI coordinate system discussed above.
+   (i.e., DICOM +Xd is Left, +Yd is Posterior, +Zd is Superior,
+        whereas +x is Right, +y is Anterior  , +z is Superior. )
+   Thus, if the (0020,0032) DICOM attribute is extracted into (px,py,pz), then
+     qoffset_x = -px   qoffset_y = -py   qoffset_z = pz
+   is a reasonable setting when qform_code=NIFTI_XFORM_SCANNER_ANAT.
+
+   That is, DICOM's coordinate system is 180 degrees rotated about the z-axis
+   from the neuroscience/NIFTI coordinate system.  To transform between DICOM
+   and NIFTI, you just have to negate the x- and y-coordinates.
+
+   The DICOM attribute (0020,0037) "Image Orientation (Patient)" gives the
+   orientation of the x- and y-axes of the image data in terms of 2 3-vectors.
+   The first vector is a unit vector along the x-axis, and the second is
+   along the y-axis.  If the (0020,0037) attribute is extracted into the
+   value (xa,xb,xc,ya,yb,yc), then the first two columns of the R matrix
+   would be
+              [ -xa  -ya ]
+              [ -xb  -yb ]
+              [  xc   yc ]
+   The negations are because DICOM's x- and y-axes are reversed relative
+   to NIFTI's.  The third column of the R matrix gives the direction of
+   displacement (relative to the subject) along the slice-wise direction.
+   This orientation is not encoded in the DICOM standard in a simple way;
+   DICOM is mostly concerned with 2D images.  The third column of R will be
+   either the cross-product of the first 2 columns or its negative.  It is
+   possible to infer the sign of the 3rd column by examining the coordinates
+   in DICOM attribute (0020,0032) "Image Position (Patient)" for successive
+   slices.  However, this method occasionally fails for reasons that I
+   (RW Cox) do not understand.
+-----------------------------------------------------------------------------*/
+
+   /* [qs]form_code value:  */      /* x,y,z coordinate system refers to:    */
+   /*-----------------------*/      /*---------------------------------------*/
+
+/*! \defgroup NIFTI1_XFORM_CODES
+    \brief nifti1 xform codes to describe the "standard" coordinate system
+    @{
+ */
+                                    /*! Arbitrary coordinates (Method 1). */
+
+#define NIFTI_XFORM_UNKNOWN      0
+
+                                    /*! Scanner-based anatomical coordinates */
+
+#define NIFTI_XFORM_SCANNER_ANAT 1
+
+                                    /*! Coordinates aligned to another file's,
+                                        or to anatomical "truth".            */
+
+#define NIFTI_XFORM_ALIGNED_ANAT 2
+
+                                    /*! Coordinates aligned to Talairach-
+                                        Tournoux Atlas; (0,0,0)=AC, etc. */
+
+#define NIFTI_XFORM_TALAIRACH    3
+
+                                    /*! MNI 152 normalized coordinates. */
+
+#define NIFTI_XFORM_MNI_152      4
+
+                                    /*!  Normalized coordinates (for
+                                         any general standard template
+                                         space). Added March 8, 2019. */
+
+#define NIFTI_XFORM_TEMPLATE_OTHER  5
+
+/* @} */
+
+/*---------------------------------------------------------------------------*/
+/* UNITS OF SPATIAL AND TEMPORAL DIMENSIONS:
+   ----------------------------------------
+   The codes below can be used in xyzt_units to indicate the units of pixdim.
+   As noted earlier, dimensions 1,2,3 are for x,y,z; dimension 4 is for
+   time (t).
+    - If dim[4]=1 or dim[0] < 4, there is no time axis.
+    - A single time series (no space) would be specified with
+      - dim[0] = 4 (for scalar data) or dim[0] = 5 (for vector data)
+      - dim[1] = dim[2] = dim[3] = 1
+      - dim[4] = number of time points
+      - pixdim[4] = time step
+      - xyzt_units indicates units of pixdim[4]
+      - dim[5] = number of values stored at each time point
+
+   Bits 0..2 of xyzt_units specify the units of pixdim[1..3]
+    (e.g., spatial units are values 1..7).
+   Bits 3..5 of xyzt_units specify the units of pixdim[4]
+    (e.g., temporal units are multiples of 8).
+
+   This compression of 2 distinct concepts into 1 byte is due to the
+   limited space available in the 348 byte ANALYZE 7.5 header.  The
+   macros XYZT_TO_SPACE and XYZT_TO_TIME can be used to mask off the
+   undesired bits from the xyzt_units fields, leaving "pure" space
+   and time codes.  Inversely, the macro SPACE_TIME_TO_XYZT can be
+   used to assemble a space code (0,1,2,...,7) with a time code
+   (0,8,16,32,...,56) into the combined value for xyzt_units.
+
+   Note that codes are provided to indicate the "time" axis units are
+   actually frequency in Hertz (_HZ), in part-per-million (_PPM)
+   or in radians-per-second (_RADS).
+
+   The toffset field can be used to indicate a nonzero start point for
+   the time axis.  That is, time point #m is at t=toffset+m*pixdim[4]
+   for m=0..dim[4]-1.
+-----------------------------------------------------------------------------*/
+
+/*! \defgroup NIFTI1_UNITS
+    \brief nifti1 units codes to describe the unit of measurement for
+           each dimension of the dataset
+    @{
+ */
+                               /*! NIFTI code for unspecified units. */
+#define NIFTI_UNITS_UNKNOWN 0
+
+                               /** Space codes are multiples of 1. **/
+                               /*! NIFTI code for meters. */
+#define NIFTI_UNITS_METER   1
+                               /*! NIFTI code for millimeters. */
+#define NIFTI_UNITS_MM      2
+                               /*! NIFTI code for micrometers. */
+#define NIFTI_UNITS_MICRON  3
+
+                               /** Time codes are multiples of 8. **/
+                               /*! NIFTI code for seconds. */
+#define NIFTI_UNITS_SEC     8
+                               /*! NIFTI code for milliseconds. */
+#define NIFTI_UNITS_MSEC   16
+                               /*! NIFTI code for microseconds. */
+#define NIFTI_UNITS_USEC   24
+
+                               /*** These units are for spectral data: ***/
+                               /*! NIFTI code for Hertz. */
+#define NIFTI_UNITS_HZ     32
+                               /*! NIFTI code for ppm. */
+#define NIFTI_UNITS_PPM    40
+                               /*! NIFTI code for radians per second. */
+#define NIFTI_UNITS_RADS   48
+/* @} */
+
+#undef  XYZT_TO_SPACE
+#undef  XYZT_TO_TIME
+#define XYZT_TO_SPACE(xyzt)       ( (xyzt) & 0x07 )
+#define XYZT_TO_TIME(xyzt)        ( (xyzt) & 0x38 )
+
+#undef  SPACE_TIME_TO_XYZT
+#define SPACE_TIME_TO_XYZT(ss,tt) (  (((char)(ss)) & 0x07)   \
+                                   | (((char)(tt)) & 0x38) )
+
+/*---------------------------------------------------------------------------*/
+/* MRI-SPECIFIC SPATIAL AND TEMPORAL INFORMATION:
+   ---------------------------------------------
+   A few fields are provided to store some extra information
+   that is sometimes important when storing the image data
+   from an FMRI time series experiment.  (After processing such
+   data into statistical images, these fields are not likely
+   to be useful.)
+
+  { freq_dim  } = These fields encode which spatial dimension (1,2, or 3)
+  { phase_dim } = corresponds to which acquisition dimension for MRI data.
+  { slice_dim } =
+    Examples:
+      Rectangular scan multi-slice EPI:
+        freq_dim = 1  phase_dim = 2  slice_dim = 3  (or some permutation)
+      Spiral scan multi-slice EPI:
+        freq_dim = phase_dim = 0  slice_dim = 3
+        since the concepts of frequency- and phase-encoding directions
+        don't apply to spiral scan
+
+    slice_duration = If this is positive, AND if slice_dim is nonzero,
+                     indicates the amount of time used to acquire 1 slice.
+                     slice_duration*dim[slice_dim] can be less than pixdim[4]
+                     with a clustered acquisition method, for example.
+
+    slice_code = If this is nonzero, AND if slice_dim is nonzero, AND
+                 if slice_duration is positive, indicates the timing
+                 pattern of the slice acquisition.  The following codes
+                 are defined:
+                   NIFTI_SLICE_SEQ_INC  == sequential increasing
+                   NIFTI_SLICE_SEQ_DEC  == sequential decreasing
+                   NIFTI_SLICE_ALT_INC  == alternating increasing
+                   NIFTI_SLICE_ALT_DEC  == alternating decreasing
+                   NIFTI_SLICE_ALT_INC2 == alternating increasing #2
+                   NIFTI_SLICE_ALT_DEC2 == alternating decreasing #2
+  { slice_start } = Indicates the start and end of the slice acquisition
+  { slice_end   } = pattern, when slice_code is nonzero.  These values
+                    are present to allow for the possible addition of
+                    "padded" slices at either end of the volume, which
+                    don't fit into the slice timing pattern.  If there
+                    are no padding slices, then slice_start=0 and
+                    slice_end=dim[slice_dim]-1 are the correct values.
+                    For these values to be meaningful, slice_start must
+                    be non-negative and slice_end must be greater than
+                    slice_start.  Otherwise, they should be ignored.
+
+  The following table indicates the slice timing pattern, relative to
+  time=0 for the first slice acquired, for some sample cases.  Here,
+  dim[slice_dim]=7 (there are 7 slices, labeled 0..6), slice_duration=0.1,
+  and slice_start=1, slice_end=5 (1 padded slice on each end).
+
+  slice
+  index  SEQ_INC SEQ_DEC ALT_INC ALT_DEC ALT_INC2 ALT_DEC2
+    6  :   n/a     n/a     n/a     n/a    n/a      n/a    n/a = not applicable
+    5  :   0.4     0.0     0.2     0.0    0.4      0.2    (slice time offset
+    4  :   0.3     0.1     0.4     0.3    0.1      0.0     doesn't apply to
+    3  :   0.2     0.2     0.1     0.1    0.3      0.3     slices outside
+    2  :   0.1     0.3     0.3     0.4    0.0      0.1     the range
+    1  :   0.0     0.4     0.0     0.2    0.2      0.4     slice_start ..
+    0  :   n/a     n/a     n/a     n/a    n/a      n/a     slice_end)
+
+  The SEQ slice_codes are sequential ordering (uncommon but not unknown),
+  either increasing in slice number or decreasing (INC or DEC), as
+  illustrated above.
+
+  The ALT slice codes are alternating ordering.  The 'standard' way for
+  these to operate (without the '2' on the end) is for the slice timing
+  to start at the edge of the slice_start .. slice_end group (at slice_start
+  for INC and at slice_end for DEC).  For the 'ALT_*2' slice_codes, the
+  slice timing instead starts at the first slice in from the edge (at
+  slice_start+1 for INC2 and at slice_end-1 for DEC2).  This latter
+  acquisition scheme is found on some Siemens scanners.
+
+  The fields freq_dim, phase_dim, slice_dim are all squished into the single
+  byte field dim_info (2 bits each, since the values for each field are
+  limited to the range 0..3).  This unpleasantness is due to lack of space
+  in the 348 byte allowance.
+
+  The macros DIM_INFO_TO_FREQ_DIM, DIM_INFO_TO_PHASE_DIM, and
+  DIM_INFO_TO_SLICE_DIM can be used to extract these values from the
+  dim_info byte.
+
+  The macro FPS_INTO_DIM_INFO can be used to put these 3 values
+  into the dim_info byte.
+-----------------------------------------------------------------------------*/
+
+#undef  DIM_INFO_TO_FREQ_DIM
+#undef  DIM_INFO_TO_PHASE_DIM
+#undef  DIM_INFO_TO_SLICE_DIM
+
+#define DIM_INFO_TO_FREQ_DIM(di)   ( ((di)     ) & 0x03 )
+#define DIM_INFO_TO_PHASE_DIM(di)  ( ((di) >> 2) & 0x03 )
+#define DIM_INFO_TO_SLICE_DIM(di)  ( ((di) >> 4) & 0x03 )
+
+#undef  FPS_INTO_DIM_INFO
+#define FPS_INTO_DIM_INFO(fd,pd,sd) ( ( ( ((char)(fd)) & 0x03)      ) |  \
+                                      ( ( ((char)(pd)) & 0x03) << 2 ) |  \
+                                      ( ( ((char)(sd)) & 0x03) << 4 )  )
+
+/*! \defgroup NIFTI1_SLICE_ORDER
+    \brief nifti1 slice order codes, describing the acquisition order
+           of the slices
+    @{
+ */
+#define NIFTI_SLICE_UNKNOWN   0
+#define NIFTI_SLICE_SEQ_INC   1
+#define NIFTI_SLICE_SEQ_DEC   2
+#define NIFTI_SLICE_ALT_INC   3
+#define NIFTI_SLICE_ALT_DEC   4
+#define NIFTI_SLICE_ALT_INC2  5  /* 05 May 2005: RWCox */
+#define NIFTI_SLICE_ALT_DEC2  6  /* 05 May 2005: RWCox */
+/* @} */
+
+/*---------------------------------------------------------------------------*/
+/* UNUSED FIELDS:
+   -------------
+   Some of the ANALYZE 7.5 fields marked as ++UNUSED++ may need to be set
+   to particular values for compatibility with other programs.  The issue
+   of interoperability of ANALYZE 7.5 files is a murky one -- not all
+   programs require exactly the same set of fields.  (Unobscuring this
+   murkiness is a principal motivation behind NIFTI-1.)
+
+   Some of the fields that may need to be set for other (non-NIFTI aware)
+   software to be happy are:
+
+     extents    dbh.h says this should be 16384
+     regular    dbh.h says this should be the character 'r'
+     glmin,   } dbh.h says these values should be the min and max voxel
+      glmax   }  values for the entire dataset
+
+   It is best to initialize ALL fields in the NIFTI-1 header to 0
+   (e.g., with calloc()), then fill in what is needed.
+-----------------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------------*/
+/* MISCELLANEOUS C MACROS
+-----------------------------------------------------------------------------*/
+
+/*.................*/
+/*! Given a nifti_1_header struct, check if it has a good magic number.
+    Returns NIFTI version number (1..9) if magic is good, 0 if it is not. */
+
+#define NIFTI_VERSION(h)                               \
+ ( ( (h).magic[0]=='n' && (h).magic[3]=='\0'    &&     \
+     ( (h).magic[1]=='i' || (h).magic[1]=='+' ) &&     \
+     ( (h).magic[2]>='1' && (h).magic[2]<='9' )   )    \
+ ? (h).magic[2]-'0' : 0 )
+
+/*.................*/
+/*! Check if a nifti_1_header struct says if the data is stored in the
+    same file or in a separate file.  Returns 1 if the data is in the same
+    file as the header, 0 if it is not.                                   */
+
+#define NIFTI_ONEFILE(h) ( (h).magic[1] == '+' )
+
+/*.................*/
+/*! Check if a nifti_1_header struct needs to be byte swapped.
+    Returns 1 if it needs to be swapped, 0 if it does not.     */
+
+#define NIFTI_NEEDS_SWAP(h) ( (h).dim[0] < 0 || (h).dim[0] > 7 )
+
+/*.................*/
+/*! Check if a nifti_1_header struct contains a 5th (vector) dimension.
+    Returns size of 5th dimension if > 1, returns 0 otherwise.         */
+
+#define NIFTI_5TH_DIM(h) ( ((h).dim[0]>4 && (h).dim[5]>1) ? (h).dim[5] : 0 )
+
+/*****************************************************************************/
+
+/*=================*/
+#ifdef  __cplusplus
+}
+#endif
+/*=================*/
+
+#endif /* _NIFTI_HEADER_ */
diff --git a/reg-io/nifti/nifti1_io.c b/reg-io/niftilib/nifti1_io.c
old mode 100755
new mode 100644
similarity index 86%
rename from reg-io/nifti/nifti1_io.c
rename to reg-io/niftilib/nifti1_io.c
index bea49cc6..afd444c9
--- a/reg-io/nifti/nifti1_io.c
+++ b/reg-io/niftilib/nifti1_io.c
@@ -1,6 +1,6 @@
 #define _NIFTI1_IO_C_
 
-#include "nifti1_io.h"   /* typedefs, prototypes, macros, etc. */
+#include "niftilib/nifti1_io.h"   /* typedefs, prototypes, macros, etc. */
 
 /*****===================================================================*****/
 /*****     Sample functions to deal with NIFTI-1 and ANALYZE files       *****/
@@ -28,7 +28,7 @@
  */
 
 /*! global history and version strings, for printing */
-static char * gni_history[] =
+static char const * const gni_history[] =
 {
   "----------------------------------------------------------------------\n"
   "history (of nifti library changes):\n"
@@ -336,9 +336,13 @@ static char * gni_history[] =
   "   - fixed znzread/write, noting example by M Adler\n"
   "   - changed nifti_swap_* routines/calls to take size_t (6)\n"
   "1.43 07 Jul 2010 [rickr]: fixed znzR/W to again return nmembers\n",
+  "1.44 19 Jul 2013 [rickr]: ITK compatibility updates from H Johnson\n",
+  "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n",
+  "1.46 26 Sep 2019 [rickr]:\n"
+  "   - nifti_read_ascii_image no longer closes fp or free's fname\n",
   "----------------------------------------------------------------------\n"
 };
-static char gni_version[] = "nifti library version 1.43 (7 July, 2010)";
+static const char gni_version[] = "nifti library version 1.46 (26 Sep, 2019)";
 
 /*! global nifti options structure - init with defaults */
 static nifti_global_options g_opts = {
@@ -348,7 +352,7 @@ static nifti_global_options g_opts = {
 };
 
 /*! global nifti types structure list (per type, ordered oldest to newest) */
-static nifti_type_ele nifti_type_list[] = {
+static const nifti_type_ele nifti_type_list[] = {
     /* type  nbyper  swapsize   name  */
     {    0,     0,       0,   "DT_UNKNOWN"              },
     {    0,     0,       0,   "DT_NONE"                 },
@@ -409,7 +413,7 @@ static int  nifti_fill_extension(nifti1_extension * ext, const char * data,
                                  int len, int ecode);
 
 /* NBL routines */
-static int  nifti_load_NBL_bricks(nifti_image * nim , int * slist, int * sindex,                                  nifti_brick_list * NBL, znzFile fp );
+static int  nifti_load_NBL_bricks(nifti_image * nim , const int * slist, const int * sindex,                                  nifti_brick_list * NBL, znzFile fp );
 static int  nifti_alloc_NBL_mem(  nifti_image * nim, int nbricks,
                                   nifti_brick_list * nbl);
 static int  nifti_copynsort(int nbricks, const int *blist, int **slist,
@@ -420,7 +424,7 @@ static int  nifti_NBL_matches_nim(const nifti_image *nim,
 /* for nifti_read_collapsed_image: */
 static int  rci_read_data(nifti_image *nim, int *pivots, int *prods, int nprods,
                   const int dims[], char *data, znzFile fp, size_t base_offset);
-static int  rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper );
+static int  rci_alloc_mem(void ** data, const int prods[8], int nprods, int nbyper );
 static int  make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
                             int prods[], int * nprods );
 
@@ -428,13 +432,13 @@ static int  make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
 static int   compare_strlist   (const char * str, char ** strlist, int len);
 static int   fileext_compare   (const char * test_ext, const char * known_ext);
 static int   fileext_n_compare (const char * test_ext,
-                                const char * known_ext, int maxlen);
+                                const char * known_ext, size_t maxlen);
 static int   is_mixedcase      (const char * str);
 static int   is_uppercase      (const char * str);
 static int   make_lowercase    (char * str);
 static int   make_uppercase    (char * str);
 static int   need_nhdr_swap    (short dim0, int hdrsize);
-static int   print_hex_vals    (const char * data, int nbytes, FILE * fp);
+static int   print_hex_vals    (const char * data, size_t nbytes, FILE * fp);
 static int   unescape_string   (char *str);  /* string utility functions */
 static char *escapize_string   (const char *str);
 
@@ -445,7 +449,6 @@ static int     has_ascii_header(znzFile fp);
 
 
 /* for calling from some main program */
-
 /*----------------------------------------------------------------------*/
 /*! display the nifti library module history (via stdout)
 *//*--------------------------------------------------------------------*/
@@ -453,17 +456,19 @@ void nifti_disp_lib_hist( void )
 {
    int c, len = sizeof(gni_history)/sizeof(char *);
    for( c = 0; c < len; c++ )
-       fputs(gni_history[c], stdout);
+       Rc_fputs_stdout(gni_history[c]);
 }
 
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
+
 /*----------------------------------------------------------------------*/
 /*! display the nifti library version (via stdout)
 *//*--------------------------------------------------------------------*/
 void nifti_disp_lib_version( void )
 {
-   printf("%s, compiled %s\n", gni_version, __DATE__);
+   Rc_printf("%s, compiled %s\n", gni_version, __DATE__);
 }
-
+#endif
 
 /*----------------------------------------------------------------------*/
 /*! nifti_image_read_bricks        - read nifti data as array of bricks
@@ -534,13 +539,13 @@ nifti_image *nifti_image_read_bricks(const char * hname, int nbricks,
    nifti_image * nim;
 
    if( !hname || !NBL ){
-      fprintf(stderr,"** nifti_image_read_bricks: bad params (%p,%p)\n",
+      Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n",
               hname, (void *)NBL);
       return NULL;
    }
 
    if( blist && nbricks <= 0 ){
-      fprintf(stderr,"** nifti_image_read_bricks: bad nbricks, %d\n", nbricks);
+      Rc_fprintf_stderr("** nifti_image_read_bricks: bad nbricks, %d\n", nbricks);
       return NULL;
    }
 
@@ -572,10 +577,10 @@ static void update_nifti_image_for_brick_list( nifti_image * nim , int nbricks )
    int ndim;
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"+d updating image dimensions for %d bricks in list\n",
+      Rc_fprintf_stderr("+d updating image dimensions for %d bricks in list\n",
               nbricks);
-      fprintf(stderr,"   ndim = %d\n",nim->ndim);
-      fprintf(stderr,"   nx,ny,nz,nt,nu,nv,nw: (%d,%d,%d,%d,%d,%d,%d)\n",
+      Rc_fprintf_stderr("   ndim = %d\n",nim->ndim);
+      Rc_fprintf_stderr("   nx,ny,nz,nt,nu,nv,nw: (%d,%d,%d,%d,%d,%d,%d)\n",
               nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
    }
 
@@ -594,8 +599,8 @@ static void update_nifti_image_for_brick_list( nifti_image * nim , int nbricks )
        ;
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"+d ndim = %d -> %d\n",nim->ndim, ndim);
-      fprintf(stderr," --> (%d,%d,%d,%d,%d,%d,%d)\n",
+      Rc_fprintf_stderr("+d ndim = %d -> %d\n",nim->ndim, ndim);
+      Rc_fprintf_stderr(" --> (%d,%d,%d,%d,%d,%d,%d)\n",
               nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
    }
 
@@ -617,21 +622,21 @@ int nifti_update_dims_from_array( nifti_image * nim )
    int c, ndim;
 
    if( !nim ){
-      fprintf(stderr,"** update_dims: missing nim\n");
+      Rc_fprintf_stderr("** update_dims: missing nim\n");
       return 1;
    }
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"+d updating image dimensions given nim->dim:");
-      for( c = 0; c < 8; c++ ) fprintf(stderr," %d", nim->dim[c]);
-      fputc('\n',stderr);
+      Rc_fprintf_stderr("+d updating image dimensions given nim->dim:");
+      for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]);
+      Rc_fputc_stderr('\n');
    }
 
    /* verify dim[0] first */
    if(nim->dim[0] < 1 || nim->dim[0] > 7){
-      fprintf(stderr,"** invalid dim[0], dim[] = ");
-      for( c = 0; c < 8; c++ ) fprintf(stderr," %d", nim->dim[c]);
-      fputc('\n',stderr);
+      Rc_fprintf_stderr("** invalid dim[0], dim[] = ");
+      for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]);
+      Rc_fputc_stderr('\n');
       return 1;
    }
 
@@ -688,8 +693,8 @@ int nifti_update_dims_from_array( nifti_image * nim )
        ;
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"+d ndim = %d -> %d\n",nim->ndim, ndim);
-      fprintf(stderr," --> (%d,%d,%d,%d,%d,%d,%d)\n",
+      Rc_fprintf_stderr("+d ndim = %d -> %d\n",nim->ndim, ndim);
+      Rc_fprintf_stderr(" --> (%d,%d,%d,%d,%d,%d,%d)\n",
               nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
    }
 
@@ -724,14 +729,14 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks,
 
    /* we can have blist == NULL */
    if( !nim || !NBL ){
-      fprintf(stderr,"** nifti_image_load_bricks, bad params (%p,%p)\n",
+      Rc_fprintf_stderr("** nifti_image_load_bricks, bad params (%p,%p)\n",
               (void *)nim, (void *)NBL);
       return -1;
    }
 
    if( blist && nbricks <= 0 ){
       if( g_opts.debug > 1 )
-         fprintf(stderr,"-d load_bricks: received blist with nbricks = %d,"
+         Rc_fprintf_stderr("-d load_bricks: received blist with nbricks = %d,"
                         "ignoring blist\n", nbricks);
       blist = NULL; /* pretend nothing was passed */
    }
@@ -747,7 +752,7 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks,
    fp = nifti_image_load_prep( nim );
    if( !fp ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** nifti_image_load_bricks, failed load_prep\n");
+         Rc_fprintf_stderr("** nifti_image_load_bricks, failed load_prep\n");
       if( blist ){ free(slist); free(sindex); }
       return -1;
    }
@@ -800,7 +805,7 @@ void nifti_free_NBL( nifti_brick_list * NBL )
  *
  * return 0 on success, -1 on failure
  *----------------------------------------------------------------------*/
-static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex,
+static int nifti_load_NBL_bricks( nifti_image * nim , const int * slist, const int * sindex,
                                   nifti_brick_list * NBL, znzFile fp )
 {
    size_t oposn, fposn;      /* orig and current file positions */
@@ -811,7 +816,7 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex,
 
    test = znztell(fp);  /* store current file position */
    if( test < 0 ){
-      fprintf(stderr,"** load bricks: ztell failed??\n");
+      Rc_fprintf_stderr("** load bricks: ztell failed??\n");
       return -1;
    }
    fposn = oposn = test;
@@ -821,20 +826,20 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex,
       for( c = 0; c < NBL->nbricks; c++ ) {
          rv = nifti_read_buffer(fp, NBL->bricks[c], NBL->bsize, nim);
          if( rv != NBL->bsize ){
-            fprintf(stderr,"** load bricks: cannot read brick %d from '%s'\n",
+            Rc_fprintf_stderr("** load bricks: cannot read brick %d from '%s'\n",
                     c, nim->iname ? nim->iname : nim->fname);
             return -1;
          }
       }
       if( g_opts.debug > 1 )
-         fprintf(stderr,"+d read %d default %u-byte bricks from file %s\n",
+         Rc_fprintf_stderr("+d read %d default %u-byte bricks from file %s\n",
                  NBL->nbricks, (unsigned int)NBL->bsize,
                  nim->iname ? nim->iname:nim->fname );
       return 0;
    }
 
    if( !sindex ){
-      fprintf(stderr,"** load_NBL_bricks: missing index list\n");
+      Rc_fprintf_stderr("** load_NBL_bricks: missing index list\n");
       return -1;
    }
 
@@ -850,7 +855,7 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex,
           if( fposn != (oposn + isrc*NBL->bsize) ){
              fposn = oposn + isrc*NBL->bsize;
              if( znzseek(fp, (long)fposn, SEEK_SET) < 0 ){
-                fprintf(stderr,"** failed to locate brick %d in file '%s'\n",
+                Rc_fprintf_stderr("** failed to locate brick %d in file '%s'\n",
                         isrc, nim->iname ? nim->iname : nim->fname);
                 return -1;
              }
@@ -859,10 +864,10 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex,
           /* only 10,000 lines later and we're actually reading something! */
           rv = nifti_read_buffer(fp, NBL->bricks[idest], NBL->bsize, nim);
           if( rv != NBL->bsize ){
-             fprintf(stderr,"** failed to read brick %d from file '%s'\n",
+             Rc_fprintf_stderr("** failed to read brick %d from file '%s'\n",
                      isrc, nim->iname ? nim->iname : nim->fname);
              if( g_opts.debug > 1 )
-                fprintf(stderr,"   (read %u of %u bytes)\n",
+                Rc_fprintf_stderr("   (read %u of %u bytes)\n",
                         (unsigned int)rv, (unsigned int)NBL->bsize);
              return -1;
           }
@@ -902,14 +907,14 @@ static int nifti_alloc_NBL_mem(nifti_image * nim, int nbricks,
    nbl->bricks  = (void **)malloc(nbl->nbricks * sizeof(void *));
 
    if( ! nbl->bricks ){
-      fprintf(stderr,"** NANM: failed to alloc %d void ptrs\n",nbricks);
+      Rc_fprintf_stderr("** NANM: failed to alloc %d void ptrs\n",nbricks);
       return -1;
    }
 
    for( c = 0; c < nbl->nbricks; c++ ){
       nbl->bricks[c] = (void *)malloc(nbl->bsize);
       if( ! nbl->bricks[c] ){
-         fprintf(stderr,"** NANM: failed to alloc %u bytes for brick %d\n",
+         Rc_fprintf_stderr("** NANM: failed to alloc %u bytes for brick %d\n",
                  (unsigned int)nbl->bsize, c);
          /* so free and clear everything before returning */
          while( c > 0 ){
@@ -924,7 +929,7 @@ static int nifti_alloc_NBL_mem(nifti_image * nim, int nbricks,
    }
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d NANM: alloc'd %d bricks of %u bytes for NBL\n",
+      Rc_fprintf_stderr("+d NANM: alloc'd %d bricks of %u bytes for NBL\n",
               nbl->nbricks, (unsigned int)nbl->bsize);
 
    return 0;
@@ -953,7 +958,7 @@ static int nifti_copynsort(int nbricks, const int * blist, int ** slist,
    *sindex = (int *)malloc(nbricks * sizeof(int));
 
    if( !*slist || !*sindex ){
-      fprintf(stderr,"** NCS: failed to alloc %d ints for sorting\n",nbricks);
+      Rc_fprintf_stderr("** NCS: failed to alloc %d ints for sorting\n",nbricks);
       if(*slist)  free(*slist);   /* maybe one succeeded */
       if(*sindex) free(*sindex);
       return -1;
@@ -984,26 +989,26 @@ static int nifti_copynsort(int nbricks, const int * blist, int ** slist,
    }
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,  "+d sorted indexing list:\n");
-      fprintf(stderr,  "  orig   : ");
-      for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr,"  %d",blist[c1]);
-      fprintf(stderr,"\n  new    : ");
-      for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr,"  %d",stmp[c1]);
-      fprintf(stderr,"\n  indices: ");
-      for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr,"  %d",itmp[c1]);
-      fputc('\n', stderr);
+      Rc_fprintf_stderr("+d sorted indexing list:\n");
+      Rc_fprintf_stderr("  orig   : ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %d",blist[c1]);
+      Rc_fprintf_stderr("\n  new    : ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %d",stmp[c1]);
+      Rc_fprintf_stderr("\n  indices: ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %d",itmp[c1]);
+      Rc_fputc_stderr('\n');
    }
 
    /* check the sort (why not?  I've got time...) */
    for( c1 = 0; c1 < nbricks-1; c1++ ){
        if( (stmp[c1] > stmp[c1+1]) || (blist[itmp[c1]] != stmp[c1]) ){
-          fprintf(stderr,"** sorting screw-up, way to go, rick!\n");
+          Rc_fprintf_stderr("** sorting screw-up, way to go, rick!\n");
           free(stmp); free(itmp); *slist = NULL; *sindex = NULL;
           return -1;
        }
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"-d sorting is okay\n");
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d sorting is okay\n");
 
    return 0;
 }
@@ -1029,19 +1034,19 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks,
 
    if( !nim ){
       if( disp_error || g_opts.debug > 0 )
-         fprintf(stderr,"** valid_nifti_brick_list: missing nifti image\n");
+         Rc_fprintf_stderr("** valid_nifti_brick_list: missing nifti image\n");
       return 0;
    }
 
    if( nbricks <= 0 || !blist ){
       if( disp_error || g_opts.debug > 1 )
-         fprintf(stderr,"** valid_nifti_brick_list: no brick list to check\n");
+         Rc_fprintf_stderr("** valid_nifti_brick_list: no brick list to check\n");
       return 0;
    }
 
    if( nim->dim[0] < 3 ){
       if( disp_error || g_opts.debug > 1 )
-         fprintf(stderr,"** cannot read explict brick list from %d-D dataset\n",
+         Rc_fprintf_stderr("** cannot read explict brick list from %d-D dataset\n",
                  nim->dim[0]);
       return 0;
    }
@@ -1051,7 +1056,7 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks,
       nsubs *= nim->dim[c];
 
    if( nsubs <= 0 ){
-      fprintf(stderr,"** VNBL warning: bad dim list (%d,%d,%d,%d)\n",
+      Rc_fprintf_stderr("** VNBL warning: bad dim list (%d,%d,%d,%d)\n",
                      nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7]);
       return 0;
    }
@@ -1059,7 +1064,7 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks,
    for( c = 0; c < nbricks; c++ )
       if( (blist[c] < 0) || (blist[c] >= nsubs) ){
          if( disp_error || g_opts.debug > 1 )
-            fprintf(stderr,
+            Rc_fprintf_stderr(
                "** volume index %d (#%d) is out of range [0,%d]\n",
                blist[c], c, nsubs-1);
          return 0;
@@ -1082,7 +1087,7 @@ static int nifti_NBL_matches_nim(const nifti_image *nim,
 
    if( !nim || !NBL ) {
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** nifti_NBL_matches_nim: NULL pointer(s)\n");
+         Rc_fprintf_stderr("** nifti_NBL_matches_nim: NULL pointer(s)\n");
       return 0;
    }
 
@@ -1099,21 +1104,21 @@ static int nifti_NBL_matches_nim(const nifti_image *nim,
 
    if( volbytes != NBL->bsize ) {
       if( g_opts.debug > 1 )
-         fprintf(stderr,"** NBL/nim mismatch, volbytes = %u, %u\n",
+         Rc_fprintf_stderr("** NBL/nim mismatch, volbytes = %u, %u\n",
                  (unsigned)NBL->bsize, (unsigned)volbytes);
       errs++;
    }
 
    if( nvols != NBL->nbricks ) {
       if( g_opts.debug > 1 )
-         fprintf(stderr,"** NBL/nim mismatch, nvols = %d, %d\n",
+         Rc_fprintf_stderr("** NBL/nim mismatch, nvols = %d, %d\n",
                  NBL->nbricks, nvols);
       errs++;
    }
 
    if( errs ) return 0;
    else if ( g_opts.debug > 2 )
-      fprintf(stderr,"-- nim/NBL agree: nvols = %d, nbytes = %u\n",
+      Rc_fprintf_stderr("-- nim/NBL agree: nvols = %d, nbytes = %u\n",
               nvols, (unsigned)volbytes);
 
    return 1;
@@ -1133,13 +1138,13 @@ int nifti_disp_matrix_orient( const char * mesg, mat44 mat )
 {
    int i, j, k;
 
-   if ( mesg ) fputs( mesg, stderr );  /* use stdout? */
+   if ( mesg ) Rc_fputs_stderr( mesg );  /* use stdout? */
 
    nifti_mat44_to_orientation( mat, &i,&j,&k );
    if ( i <= 0 || j <= 0 || k <= 0 ) return -1;
 
    /* so we have good codes */
-   fprintf(stderr, "  i orientation = '%s'\n"
+   Rc_fprintf_stderr( "  i orientation = '%s'\n"
                    "  j orientation = '%s'\n"
                    "  k orientation = '%s'\n",
                    nifti_orientation_string(i),
@@ -1148,7 +1153,7 @@ int nifti_disp_matrix_orient( const char * mesg, mat44 mat )
    return 0;
 }
 
-
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*----------------------------------------------------------------------*/
 /*! duplicate the given string (alloc length+1)
  *
@@ -1164,7 +1169,7 @@ char *nifti_strdup(const char *str)
 
   /* check for failure */
   if( dup ) strcpy(dup, str);
-  else      fprintf(stderr,"** nifti_strdup: failed to alloc %u bytes\n",
+  else      Rc_fprintf_stderr("** nifti_strdup: failed to alloc %u bytes\n",
                     (unsigned int)strlen(str)+1);
 
   return dup;
@@ -1183,7 +1188,7 @@ char *nifti_strdup(const char *str)
 
     \sa NIFTI1_DATATYPES group in nifti1.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_datatype_string( int dt )
+char const * nifti_datatype_string( int dt )
 {
    switch( dt ){
      case DT_UNKNOWN:    return "UNKNOWN"    ;
@@ -1252,7 +1257,7 @@ int nifti_is_inttype( int dt )
 
     \sa     NIFTI1_UNITS group in nifti1.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_units_string( int uu )
+char const *nifti_units_string( int uu )
 {
    switch( uu ){
      case NIFTI_UNITS_METER:  return "m" ;
@@ -1280,7 +1285,7 @@ char *nifti_units_string( int uu )
 
     \sa     NIFTI1_XFORM_CODES group in nifti1.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_xform_string( int xx )
+char const *nifti_xform_string( int xx )
 {
    switch( xx ){
      case NIFTI_XFORM_SCANNER_ANAT:  return "Scanner Anat" ;
@@ -1303,7 +1308,7 @@ char *nifti_xform_string( int xx )
 
     \sa     NIFTI1_INTENT_CODES group in nifti1.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_intent_string( int ii )
+char const *nifti_intent_string( int ii )
 {
    switch( ii ){
      case NIFTI_INTENT_CORREL:     return "Correlation statistic" ;
@@ -1359,7 +1364,7 @@ char *nifti_intent_string( int ii )
 
     \sa     NIFTI1_SLICE_ORDER group in nifti1.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_slice_string( int ss )
+char const *nifti_slice_string( int ss )
 {
    switch( ss ){
      case NIFTI_SLICE_SEQ_INC:  return "sequential_increasing"    ;
@@ -1384,7 +1389,7 @@ char *nifti_slice_string( int ss )
 
     \sa  NIFTI_L2R in nifti1_io.h
 *//*-------------------------------------------------------------------------*/
-char *nifti_orientation_string( int ii )
+char const *nifti_orientation_string( int ii )
 {
    switch( ii ){
      case NIFTI_L2R: return "Left-to-Right" ;
@@ -1440,8 +1445,7 @@ void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize )
      case DT_COMPLEX256:  nb = 32 ; ss = 16 ; break ;
    }
 
-   ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; return ;
-}
+   ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; }
 
 /*---------------------------------------------------------------------------*/
 /*! Given the quaternion parameters (etc.), compute a transformation matrix.
@@ -1472,7 +1476,7 @@ mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
 
    /* last row is always [ 0 0 0 1 ] */
 
-   R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0 ; R.m[3][3]= 1.0 ;
+   R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0f ; R.m[3][3]= 1.0f ;
 
    /* compute a parameter from b,c,d */
 
@@ -1493,15 +1497,15 @@ mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
 
    if( qfac < 0.0 ) zd = -zd ;         /* left handedness? */
 
-   R.m[0][0] =        (a*a+b*b-c*c-d*d) * xd ;
+   R.m[0][0] = (float)( (a*a+b*b-c*c-d*d) * xd) ;
    R.m[0][1] = 2.0l * (b*c-a*d        ) * yd ;
    R.m[0][2] = 2.0l * (b*d+a*c        ) * zd ;
    R.m[1][0] = 2.0l * (b*c+a*d        ) * xd ;
-   R.m[1][1] =        (a*a+c*c-b*b-d*d) * yd ;
+   R.m[1][1] = (float)( (a*a+c*c-b*b-d*d) * yd) ;
    R.m[1][2] = 2.0l * (c*d-a*b        ) * zd ;
    R.m[2][0] = 2.0l * (b*d-a*c        ) * xd ;
    R.m[2][1] = 2.0l * (c*d+a*b        ) * yd ;
-   R.m[2][2] =        (a*a+d*d-c*c-b*b) * zd ;
+   R.m[2][2] = (float)( (a*a+d*d-c*c-b*b) * zd) ;
 
    /* load offsets */
 
@@ -1567,7 +1571,7 @@ void nifti_mat44_to_quatern( mat44 R ,
 
    /* assign the output lengths */
 
-   ASSIF(dx,xd) ; ASSIF(dy,yd) ; ASSIF(dz,zd) ;
+   ASSIF(dx,(float)xd) ; ASSIF(dy,(float)yd) ; ASSIF(dz,(float)zd) ;
 
    /* normalize the columns */
 
@@ -1587,9 +1591,9 @@ void nifti_mat44_to_quatern( mat44 R ,
       will result in the inverse orthogonal matrix at this point.
       If we just orthogonalized the columns, this wouldn't necessarily hold. */
 
-   Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */
-   Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ;
-   Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ;
+   Q.m[0][0] = (float)r11 ; Q.m[0][1] = (float)r12 ; Q.m[0][2] = (float)r13 ; /* load Q */
+   Q.m[1][0] = (float)r21 ; Q.m[1][1] = (float)r22 ; Q.m[1][2] = (float)r23 ;
+   Q.m[2][0] = (float)r31 ; Q.m[2][1] = (float)r32 ; Q.m[2][2] = (float)r33 ;
 
    P = nifti_mat33_polar(Q) ;  /* P is orthog matrix closest to Q */
 
@@ -1607,9 +1611,9 @@ void nifti_mat44_to_quatern( mat44 R ,
        +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;  /* should be -1 or 1 */
 
    if( zd > 0 ){             /* proper */
-     ASSIF(qfac,1.0) ;
+     ASSIF(qfac,1.0f) ;
    } else {                  /* improper ==> flip 3rd column */
-     ASSIF(qfac,-1.0) ;
+     ASSIF(qfac,-1.0f) ;
      r13 = -r13 ; r23 = -r23 ; r33 = -r33 ;
    }
 
@@ -1642,11 +1646,10 @@ void nifti_mat44_to_quatern( mat44 R ,
        c = 0.25l* (r23+r32) / d ;
        a = 0.25l* (r21-r12) / d ;
      }
-     if( a < 0.0l ){ b=-b ; c=-c ; d=-d; a=-a; }
+     if( a < 0.0l ){ b=-b ; c=-c ; d=-d;}
    }
 
-   ASSIF(qb,b) ; ASSIF(qc,c) ; ASSIF(qd,d) ;
-   return ;
+   ASSIF(qb,(float)b) ; ASSIF(qc,(float)c) ; ASSIF(qd,(float)d);
 }
 
 /*---------------------------------------------------------------------------*/
@@ -1680,23 +1683,23 @@ mat44 nifti_mat44_inverse( mat44 R )
 
    if( deti != 0.0l ) deti = 1.0l / deti ;
 
-   Q.m[0][0] = deti*( r22*r33-r32*r23) ;
-   Q.m[0][1] = deti*(-r12*r33+r32*r13) ;
-   Q.m[0][2] = deti*( r12*r23-r22*r13) ;
-   Q.m[0][3] = deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3
-                     -r22*v1*r33-r32*r13*v2+r32*v1*r23) ;
+   Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ;
+   Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ;
+   Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ;
+   Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3
+                     -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ;
 
-   Q.m[1][0] = deti*(-r21*r33+r31*r23) ;
-   Q.m[1][1] = deti*( r11*r33-r31*r13) ;
-   Q.m[1][2] = deti*(-r11*r23+r21*r13) ;
-   Q.m[1][3] = deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3
-                     +r21*v1*r33+r31*r13*v2-r31*v1*r23) ;
+   Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ;
+   Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ;
+   Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ;
+   Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3
+                     +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ;
 
-   Q.m[2][0] = deti*( r21*r32-r31*r22) ;
-   Q.m[2][1] = deti*(-r11*r32+r31*r12) ;
-   Q.m[2][2] = deti*( r11*r22-r21*r12) ;
-   Q.m[2][3] = deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3
-                     -r21*r32*v1-r31*r12*v2+r31*r22*v1) ;
+   Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ;
+   Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ;
+   Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ;
+   Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3
+                     -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ;
 
    Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ;
    Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */
@@ -1753,7 +1756,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
    val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ;
    if( val > 0.0l ){
      val = 1.0l / sqrt(val) ;
-     Q.m[0][0] *= val ; Q.m[0][1] *= val ; Q.m[0][2] *= val ;
+     Q.m[0][0] *= (float)val ; Q.m[0][1] *= (float)val ; Q.m[0][2] *= (float)val ;
    } else {
      Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ;
    }
@@ -1763,7 +1766,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
    val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ;
    if( val > 0.0l ){
      val = 1.0l / sqrt(val) ;
-     Q.m[1][0] *= val ; Q.m[1][1] *= val ; Q.m[1][2] *= val ;
+     Q.m[1][0] *= (float)val ; Q.m[1][1] *= (float)val ; Q.m[1][2] *= (float)val ;
    } else {
      Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ;
    }
@@ -1773,7 +1776,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
    val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ;
    if( val > 0.0l ){
      val = 1.0l / sqrt(val) ;
-     Q.m[2][0] *= val ; Q.m[2][1] *= val ; Q.m[2][2] *= val ;
+     Q.m[2][0] *= (float)val ; Q.m[2][1] *= (float)val ; Q.m[2][2] *= (float)val ;
    } else {
      Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ;  /* cross */
      Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ;  /* product */
@@ -1786,7 +1789,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
    R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ;
    R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ;
 
-   R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0 ; return R ;
+   R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ;
 }
 
 /*----------------------------------------------------------------------*/
@@ -1806,17 +1809,17 @@ mat33 nifti_mat33_inverse( mat33 R )   /* inverse of 3x3 matrix */
 
    if( deti != 0.0l ) deti = 1.0l / deti ;
 
-   Q.m[0][0] = deti*( r22*r33-r32*r23) ;
-   Q.m[0][1] = deti*(-r12*r33+r32*r13) ;
-   Q.m[0][2] = deti*( r12*r23-r22*r13) ;
+   Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ;
+   Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ;
+   Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ;
 
-   Q.m[1][0] = deti*(-r21*r33+r31*r23) ;
-   Q.m[1][1] = deti*( r11*r33-r31*r13) ;
-   Q.m[1][2] = deti*(-r11*r23+r21*r13) ;
+   Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ;
+   Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ;
+   Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ;
 
-   Q.m[2][0] = deti*( r21*r32-r31*r22) ;
-   Q.m[2][1] = deti*(-r11*r32+r31*r12) ;
-   Q.m[2][2] = deti*( r11*r22-r21*r12) ;
+   Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ;
+   Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ;
+   Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ;
 
    return Q ;
 }
@@ -1832,8 +1835,8 @@ float nifti_mat33_determ( mat33 R )   /* determinant of 3x3 matrix */
    r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 ] */
    r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 ] */
 
-   return r11*r22*r33-r11*r32*r23-r21*r12*r33
-         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+   return (float)(r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13) ;
 }
 
 /*----------------------------------------------------------------------*/
@@ -1843,9 +1846,9 @@ float nifti_mat33_rownorm( mat33 A )  /* max row norm of 3x3 matrix */
 {
    float r1,r2,r3 ;
 
-   r1 = fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ;
-   r2 = fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ;
-   r3 = fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ;
+   r1 = (float)( fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ) ;
+   r2 = (float)( fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ) ;
+   r3 = (float)( fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ) ;
    if( r1 < r2 ) r1 = r2 ;
    if( r1 < r3 ) r1 = r3 ;
    return r1 ;
@@ -1858,9 +1861,9 @@ float nifti_mat33_colnorm( mat33 A )  /* max column norm of 3x3 matrix */
 {
    float r1,r2,r3 ;
 
-   r1 = fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ;
-   r2 = fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ;
-   r3 = fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ;
+   r1 = (float)( fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ) ;
+   r2 = (float)( fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ) ;
+   r3 = (float)( fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ) ;
    if( r1 < r2 ) r1 = r2 ;
    if( r1 < r3 ) r1 = r3 ;
    return r1 ;
@@ -1880,7 +1883,6 @@ mat33 nifti_mat33_mul( mat33 A , mat33 B )  /* multiply 2 3x3 matrices */
    return C ;
 }
 
-
 /*---------------------------------------------------------------------------*/
 /*! polar decomposition of a 3x3 matrix
 
@@ -1892,7 +1894,7 @@ mat33 nifti_mat33_mul( mat33 A , mat33 B )  /* multiply 2 3x3 matrices */
 mat33 nifti_mat33_polar( mat33 A )
 {
    mat33 X , Y , Z ;
-   float alp,bet,gam,gmi , dif=1.0 ;
+   float alp,bet,gam,gmi , dif=1.0f ;
    int k=0 ;
 
    X = A ;
@@ -1901,7 +1903,7 @@ mat33 nifti_mat33_polar( mat33 A )
 
    gam = nifti_mat33_determ(X) ;
    while( gam == 0.0 ){        /* perturb matrix */
-     gam = 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ;
+     gam = (float)( 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ) ;
      X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ;
      gam = nifti_mat33_determ(X) ;
    }
@@ -1909,28 +1911,28 @@ mat33 nifti_mat33_polar( mat33 A )
    while(1){
      Y = nifti_mat33_inverse(X) ;
      if( dif > 0.3 ){     /* far from convergence */
-       alp = sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ;
-       bet = sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ;
-       gam = sqrt( bet / alp ) ;
-       gmi = 1.0 / gam ;
+       alp = (float)( sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ) ;
+       bet = (float)( sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ) ;
+       gam = (float)( sqrt( bet / alp ) ) ;
+       gmi = (float)( 1.0 / gam ) ;
      } else {
-       gam = gmi = 1.0 ;  /* close to convergence */
+       gam = gmi = 1.0f ;  /* close to convergence */
      }
-     Z.m[0][0] = 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ;
-     Z.m[0][1] = 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ;
-     Z.m[0][2] = 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ;
-     Z.m[1][0] = 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ;
-     Z.m[1][1] = 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ;
-     Z.m[1][2] = 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ;
-     Z.m[2][0] = 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ;
-     Z.m[2][1] = 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ;
-     Z.m[2][2] = 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ;
-
-     dif = fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1])
+     Z.m[0][0] = (float)( 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ) ;
+     Z.m[0][1] = (float)( 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ) ;
+     Z.m[0][2] = (float)( 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ) ;
+     Z.m[1][0] = (float)( 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ) ;
+     Z.m[1][1] = (float)( 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ) ;
+     Z.m[1][2] = (float)( 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ) ;
+     Z.m[2][0] = (float)( 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ) ;
+     Z.m[2][1] = (float)( 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ) ;
+     Z.m[2][2] = (float)( 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ) ;
+
+     dif = (float)( fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1])
           +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0])
           +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2])
           +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1])
-          +fabs(Z.m[2][2]-X.m[2][2])                          ;
+          +fabs(Z.m[2][2]-X.m[2][2])                          );
 
      k = k+1 ;
      if( k > 100 || dif < 3.e-6 ) break ;  /* convergence or exhaustion */
@@ -1987,13 +1989,13 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 
    /* normalize i axis */
 
-   val = sqrt( xi*xi + yi*yi + zi*zi ) ;
+   val = (float)sqrt( xi*xi + yi*yi + zi*zi ) ;
    if( val == 0.0 ) return ;                 /* stupid input */
    xi /= val ; yi /= val ; zi /= val ;
 
    /* normalize j axis */
 
-   val = sqrt( xj*xj + yj*yj + zj*zj ) ;
+   val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ;
    if( val == 0.0 ) return ;                 /* stupid input */
    xj /= val ; yj /= val ; zj /= val ;
 
@@ -2002,14 +2004,14 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
    val = xi*xj + yi*yj + zi*zj ;    /* dot product between i and j */
    if( fabs(val) > 1.e-4 ){
      xj -= val*xi ; yj -= val*yi ; zj -= val*zi ;
-     val = sqrt( xj*xj + yj*yj + zj*zj ) ;  /* must renormalize */
+     val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ;  /* must renormalize */
      if( val == 0.0 ) return ;              /* j was parallel to i? */
      xj /= val ; yj /= val ; zj /= val ;
    }
 
    /* normalize k axis; if it is zero, make it the cross product i x j */
 
-   val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+   val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
    if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; }
    else            { xk /= val ; yk /= val ; zk /= val ; }
 
@@ -2018,7 +2020,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
    val = xi*xk + yi*yk + zi*zk ;    /* dot product between i and k */
    if( fabs(val) > 1.e-4 ){
      xk -= val*xi ; yk -= val*yi ; zk -= val*zi ;
-     val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+     val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
      if( val == 0.0 ) return ;      /* bad */
      xk /= val ; yk /= val ; zk /= val ;
    }
@@ -2028,7 +2030,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
    val = xj*xk + yj*yk + zj*zk ;    /* dot product between j and k */
    if( fabs(val) > 1.e-4 ){
      xk -= val*xj ; yk -= val*yj ; zk -= val*zj ;
-     val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+     val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
      if( val == 0.0 ) return ;      /* bad */
      xk /= val ; yk /= val ; zk /= val ;
    }
@@ -2049,7 +2051,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
    /* Despite the formidable looking 6 nested loops, there are
       only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */
 
-   vbest = -666.0 ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ;
+   vbest = -666.0f ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ;
    for( i=1 ; i <= 3 ; i++ ){     /* i = column number to use for row #1 */
     for( j=1 ; j <= 3 ; j++ ){    /* j = column number to use for row #2 */
      if( i == j ) continue ;
@@ -2057,7 +2059,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
        if( i == k || j == k ) continue ;
        P.m[0][0] = P.m[0][1] = P.m[0][2] =
         P.m[1][0] = P.m[1][1] = P.m[1][2] =
-         P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0 ;
+         P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0f ;
        for( p=-1 ; p <= 1 ; p+=2 ){    /* p,q,r are -1 or +1      */
         for( q=-1 ; q <= 1 ; q+=2 ){   /* and go into rows #1,2,3 */
          for( r=-1 ; r <= 1 ; r+=2 ){
@@ -2118,8 +2120,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -3: k = NIFTI_S2I ; break ;
    }
 
-   *icod = i ; *jcod = j ; *kcod = k ; return ;
-}
+   *icod = i ; *jcod = j ; *kcod = k ; }
 
 /*---------------------------------------------------------------------------*/
 /* Routines to swap byte arrays in various ways:
@@ -2139,7 +2140,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 *//*--------------------------------------------------------------------*/
 void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
 {
-   register size_t ii ;
+   size_t ii ;
    unsigned char * cp1 = (unsigned char *)ar, * cp2 ;
    unsigned char   tval;
 
@@ -2148,17 +2149,16 @@ void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp1 += 2;
    }
-   return ;
-}
+   }
 
 /*----------------------------------------------------------------------*/
 /*! swap 4 bytes at a time from the given list of n sets of 4 bytes
 *//*--------------------------------------------------------------------*/
 void nifti_swap_4bytes( size_t n , void *ar )    /* 4 bytes at a time */
 {
-   register size_t ii ;
+   size_t ii ;
    unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
-   register unsigned char tval ;
+   unsigned char tval ;
 
    for( ii=0 ; ii < n ; ii++ ){
        cp1 = cp0; cp2 = cp0+3;
@@ -2167,8 +2167,7 @@ void nifti_swap_4bytes( size_t n , void *ar )    /* 4 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp0 += 4;
    }
-   return ;
-}
+   }
 
 /*----------------------------------------------------------------------*/
 /*! swap 8 bytes at a time from the given list of n sets of 8 bytes
@@ -2177,9 +2176,9 @@ void nifti_swap_4bytes( size_t n , void *ar )    /* 4 bytes at a time */
 *//*--------------------------------------------------------------------*/
 void nifti_swap_8bytes( size_t n , void *ar )    /* 8 bytes at a time */
 {
-   register size_t ii ;
+   size_t ii ;
    unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
-   register unsigned char tval ;
+   unsigned char tval ;
 
    for( ii=0 ; ii < n ; ii++ ){
        cp1 = cp0;  cp2 = cp0+7;
@@ -2190,17 +2189,16 @@ void nifti_swap_8bytes( size_t n , void *ar )    /* 8 bytes at a time */
        }
        cp0 += 8;
    }
-   return ;
-}
+   }
 
 /*----------------------------------------------------------------------*/
 /*! swap 16 bytes at a time from the given list of n sets of 16 bytes
 *//*--------------------------------------------------------------------*/
 void nifti_swap_16bytes( size_t n , void *ar )    /* 16 bytes at a time */
 {
-   register size_t ii ;
+   size_t ii ;
    unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
-   register unsigned char tval ;
+   unsigned char tval ;
 
    for( ii=0 ; ii < n ; ii++ ){
        cp1 = cp0;  cp2 = cp0+15;
@@ -2211,8 +2209,7 @@ void nifti_swap_16bytes( size_t n , void *ar )    /* 16 bytes at a time */
        }
        cp0 += 16;
    }
-   return ;
-}
+   }
 
 #if 0  /* not important: save for version update     6 Jul 2010 [rickr] */
 
@@ -2221,9 +2218,9 @@ void nifti_swap_16bytes( size_t n , void *ar )    /* 16 bytes at a time */
 *//*--------------------------------------------------------------------*/
 void nifti_swap_bytes( size_t n , int siz , void *ar )
 {
-   register size_t ii ;
+   size_t ii ;
    unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
-   register unsigned char tval ;
+   unsigned char tval ;
 
    for( ii=0 ; ii < n ; ii++ ){
        cp1 = cp0;  cp2 = cp0+(siz-1);
@@ -2251,11 +2248,10 @@ void nifti_swap_Nbytes( size_t n , int siz , void *ar )  /* subsuming case */
      case 8:  nifti_swap_8bytes ( n , ar ) ; break ;
      case 16: nifti_swap_16bytes( n , ar ) ; break ;
      default:    /* nifti_swap_bytes  ( n , siz, ar ) ; */
-        fprintf(stderr,"** NIfTI: cannot swap in %d byte blocks\n", siz);
+        Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz);
         break ;
    }
-   return ;
-}
+   }
 
 
 /*-------------------------------------------------------------------------*/
@@ -2316,8 +2312,6 @@ void swap_nifti_header( struct nifti_1_header *h , int is_nifti )
    nifti_swap_4bytes(4, h->srow_x);
    nifti_swap_4bytes(4, h->srow_y);
    nifti_swap_4bytes(4, h->srow_z);
-
-   return ;
 }
 
 /*-------------------------------------------------------------------------*/
@@ -2413,9 +2407,9 @@ void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti )
      nifti_swap_4bytes(4,h->srow_y);
      nifti_swap_4bytes(4,h->srow_z);
    }
-   return ;
-}
+   }
 
+#endif /* RNIFTI_NIFTILIB_DEDUPLICATE */
 
 #define USE_STAT
 #ifdef  USE_STAT
@@ -2456,7 +2450,6 @@ int nifti_get_filesize( const char *pathname )
 
 #endif /* USE_STAT */
 
-
 /*----------------------------------------------------------------------*/
 /*! return the total volume size, in bytes
 
@@ -2464,7 +2457,7 @@ int nifti_get_filesize( const char *pathname )
 *//*--------------------------------------------------------------------*/
 size_t nifti_get_volsize(const nifti_image *nim)
 {
-   return nim->nbyper * nim->nvox ; /* total bytes */
+   return (size_t)(nim->nbyper) * (size_t)(nim->nvox) ; /* total bytes */
 }
 
 
@@ -2473,7 +2466,7 @@ size_t nifti_get_volsize(const nifti_image *nim)
    - allows for gzipped files
 */
 
-
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*----------------------------------------------------------------------*/
 /*! simple check for file existence
 
@@ -2482,7 +2475,7 @@ size_t nifti_get_volsize(const nifti_image *nim)
 int nifti_fileexists(const char* fname)
 {
    znzFile fp;
-   fp = znzopen( fname , "rb" , 1 ) ;
+   fp = znzopen( fname , "rb" , nifti_is_gzfile(fname) ) ;
    if( !znz_isnull(fp) )  { znzclose(fp);  return 1; }
    return 0; /* fp is NULL */
 }
@@ -2506,25 +2499,25 @@ int nifti_fileexists(const char* fname)
 *//*--------------------------------------------------------------------*/
 int nifti_is_complete_filename(const char* fname)
 {
-   char * ext;
+   const char * ext;
 
    /* check input file(s) for sanity */
    if( fname == NULL || *fname == '\0' ){
       if ( g_opts.debug > 1 )
-         fprintf(stderr,"-- empty filename in nifti_validfilename()\n");
+         Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n");
       return 0;
    }
 
    ext = nifti_find_file_extension(fname);
    if ( ext == NULL ) { /*Invalid extension given */
       if ( g_opts.debug > 0 )
-         fprintf(stderr,"-- no nifti valid extension for filename '%s'\n", fname);
+         Rc_fprintf_stderr("-- no nifti valid extension for filename '%s'\n", fname);
        return 0;
    }
 
    if ( ext && ext == fname ) {   /* then no filename prefix */
       if ( g_opts.debug > 0 )
-         fprintf(stderr,"-- no prefix for filename '%s'\n", fname);
+         Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname);
       return 0;
    }
    return 1;
@@ -2550,12 +2543,12 @@ int nifti_is_complete_filename(const char* fname)
 *//*--------------------------------------------------------------------*/
 int nifti_validfilename(const char* fname)
 {
-   char * ext;
+   const char * ext;
 
    /* check input file(s) for sanity */
    if( fname == NULL || *fname == '\0' ){
       if ( g_opts.debug > 1 )
-         fprintf(stderr,"-- empty filename in nifti_validfilename()\n");
+         Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n");
       return 0;
    }
 
@@ -2563,7 +2556,7 @@ int nifti_validfilename(const char* fname)
 
    if ( ext && ext == fname ) {   /* then no filename prefix */
       if ( g_opts.debug > 0 )
-         fprintf(stderr,"-- no prefix for filename '%s'\n", fname);
+         Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname);
       return 0;
    }
 
@@ -2579,11 +2572,18 @@ int nifti_validfilename(const char* fname)
 
     Uppercase extensions are also valid, but not mixed case.
 
-    \return a pointer to the extension (within the filename), or NULL
+    \return a pointer to the extension substring within the original
+            function input parameter name, or NULL if not found.
+    \caution Note that if the input parameter is is immutabale
+             (i.e. a const char *) then this function performs an
+             implicit casting away of the mutability constraint and
+             the return parameter will appear as a mutable
+             even though it is part of the immuttable string.
 *//*--------------------------------------------------------------------*/
 char * nifti_find_file_extension( const char * name )
 {
-   char * ext, extcopy[8];
+   const char * ext;
+   char extcopy[8];
    int    len;
    char   extnii[8] = ".nii";   /* modifiable, for possible uppercase */
    char   exthdr[8] = ".hdr";   /* (leave space for .gz) */
@@ -2600,7 +2600,7 @@ char * nifti_find_file_extension( const char * name )
    len = (int)strlen(name);
    if ( len < 4 ) return NULL;
 
-   ext = (char *)name + len - 4;
+   ext = name + len - 4;
 
    /* make manipulation copy, and possibly convert to lowercase */
    strcpy(extcopy, ext);
@@ -2609,16 +2609,16 @@ char * nifti_find_file_extension( const char * name )
    /* if it look like a basic extension, fail or return it */
    if( compare_strlist(extcopy, elist, 4) >= 0 ) {
       if( is_mixedcase(ext) ) {
-         fprintf(stderr,"** mixed case extension '%s' is not valid\n", ext);
+         Rc_fprintf_stderr("** mixed case extension '%s' is not valid\n", ext);
          return NULL;
       }
-      else return ext;
+      else return (char *)ext; /* Cast away the constness of the input parameter */
    }
 
 #ifdef HAVE_ZLIB
    if ( len < 7 ) return NULL;
 
-   ext = (char *)name + len - 7;
+   ext = name + len - 7;
 
    /* make manipulation copy, and possibly convert to lowercase */
    strcpy(extcopy, ext);
@@ -2629,16 +2629,16 @@ char * nifti_find_file_extension( const char * name )
 
    if( compare_strlist(extcopy, elist, 3) >= 0 ) {
       if( is_mixedcase(ext) ) {
-         fprintf(stderr,"** mixed case extension '%s' is not valid\n", ext);
+         Rc_fprintf_stderr("** mixed case extension '%s' is not valid\n", ext);
          return NULL;
       }
-      else return ext;
+      else return (char *)ext; /* Cast away the constness of the input parameter */
    }
 
 #endif
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"** find_file_ext: failed for name '%s'\n", name);
+      Rc_fprintf_stderr("** find_file_ext: failed for name '%s'\n", name);
 
    return NULL;
 }
@@ -2680,12 +2680,16 @@ int nifti_compiled_with_zlib(void)
 *//*--------------------------------------------------------------------*/
 char * nifti_makebasename(const char* fname)
 {
-   char *basename, *ext;
+   char *basename;
+   const char *ext;
 
    basename=nifti_strdup(fname);
 
    ext = nifti_find_file_extension(basename);
-   if ( ext ) *ext = '\0';  /* clear out extension */
+   if ( ext )
+   {
+     basename[strlen(basename)-strlen(ext)] = '\0';  /* clear out extension */
+   }
 
    return basename;  /* in either case */
 }
@@ -2735,7 +2739,8 @@ void nifti_set_allow_upper_fext( int allow )
 *//*-------------------------------------------------------------------*/
 char * nifti_findhdrname(const char* fname)
 {
-   char *basename, *hdrname, *ext;
+   char *basename, *hdrname;
+   const char *ext;
    char  elist[2][5] = { ".hdr", ".nii" };
    char  extzip[4]   = ".gz";
    int   efirst = 1;    /* init to .nii extension */
@@ -2783,7 +2788,7 @@ char * nifti_findhdrname(const char* fname)
 
    hdrname = (char *)calloc(sizeof(char),strlen(basename)+8);
    if( !hdrname ){
-      fprintf(stderr,"** nifti_findhdrname: failed to alloc hdrname\n");
+      Rc_fprintf_stderr("** nifti_findhdrname: failed to alloc hdrname\n");
       free(basename);
       return NULL;
    }
@@ -2837,7 +2842,7 @@ char * nifti_findimgname(const char* fname , int nifti_type)
    char *basename, *imgname, elist[2][5] = { ".nii", ".img" };
    char  extzip[4] = ".gz";
    char  extnia[5] = ".nia";
-   char *ext;
+   const char *ext;
    int   first;  /* first extension to use */
 
    /* check input file(s) for sanity */
@@ -2846,7 +2851,7 @@ char * nifti_findimgname(const char* fname , int nifti_type)
    basename =  nifti_makebasename(fname);
    imgname = (char *)calloc(sizeof(char),strlen(basename)+8);
    if( !imgname ){
-      fprintf(stderr,"** nifti_findimgname: failed to alloc imgname\n");
+      Rc_fprintf_stderr("** nifti_findimgname: failed to alloc imgname\n");
       free(basename);
       return NULL;
    }
@@ -2919,7 +2924,8 @@ char * nifti_findimgname(const char* fname , int nifti_type)
 char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
                          int comp)
 {
-   char * iname, * ext;
+   char * iname;
+   const char * ext;
    char   extnii[5] = ".nii";   /* modifiable, for possible uppercase */
    char   exthdr[5] = ".hdr";
    char   extimg[5] = ".img";
@@ -2930,7 +2936,7 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
 
    /* add space for extension, optional ".gz", and null char */
    iname = (char *)calloc(sizeof(char),strlen(prefix)+8);
-   if( !iname ){ fprintf(stderr,"** small malloc failure!\n"); return NULL; }
+   if( !iname ){ Rc_fprintf_stderr("** small malloc failure!\n"); return NULL; }
    strcpy(iname, prefix);
 
    /* use any valid extension */
@@ -2945,7 +2951,9 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
       }
 
       if( strncmp(ext,extimg,4) == 0 )
-         memcpy(ext,exthdr,4);   /* then convert img name to hdr */
+      {
+         memcpy(&(iname[strlen(iname)-strlen(ext)]),exthdr,4);   /* then convert img name to hdr */
+      }
    }
    /* otherwise, make one up */
    else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii);
@@ -2958,12 +2966,12 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
 
    /* check for existence failure */
    if( check && nifti_fileexists(iname) ){
-      fprintf(stderr,"** failure: header file '%s' already exists\n",iname);
+      Rc_fprintf_stderr("** failure: header file '%s' already exists\n",iname);
       free(iname);
       return NULL;
    }
 
-   if(g_opts.debug > 2) fprintf(stderr,"+d made header filename '%s'\n", iname);
+   if(g_opts.debug > 2) Rc_fprintf_stderr("+d made header filename '%s'\n", iname);
 
    return iname;
 }
@@ -2986,7 +2994,8 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
 char * nifti_makeimgname(const char * prefix, int nifti_type, int check,
                          int comp)
 {
-   char * iname, * ext;
+   char * iname;
+   const char * ext;
    char   extnii[5] = ".nii";   /* modifiable, for possible uppercase */
    char   exthdr[5] = ".hdr";
    char   extimg[5] = ".img";
@@ -2997,7 +3006,7 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check,
 
    /* add space for extension, optional ".gz", and null char */
    iname = (char *)calloc(sizeof(char),strlen(prefix)+8);
-   if( !iname ){ fprintf(stderr,"** small malloc failure!\n"); return NULL; }
+   if( !iname ){ Rc_fprintf_stderr("** small malloc failure!\n"); return NULL; }
    strcpy(iname, prefix);
 
    /* use any valid extension */
@@ -3012,7 +3021,9 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check,
       }
 
       if( strncmp(ext,exthdr,4) == 0 )
-         memcpy(ext,extimg,4);   /* then convert hdr name to img */
+      {
+         memcpy(&(iname[strlen(iname)-strlen(ext)]),extimg,4);   /* then convert hdr name to img */
+      }
    }
    /* otherwise, make one up */
    else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii);
@@ -3025,16 +3036,16 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check,
 
    /* check for existence failure */
    if( check && nifti_fileexists(iname) ){
-      fprintf(stderr,"** failure: image file '%s' already exists\n",iname);
+      Rc_fprintf_stderr("** failure: image file '%s' already exists\n",iname);
       free(iname);
       return NULL;
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"+d made image filename '%s'\n",iname);
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d made image filename '%s'\n",iname);
 
    return iname;
 }
-
+#endif
 
 /*----------------------------------------------------------------------*/
 /*! create and set new filenames, based on prefix and image type
@@ -3058,13 +3069,13 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
    int comp = nifti_is_gzfile(prefix);
 
    if( !nim || !prefix ){
-      fprintf(stderr,"** nifti_set_filenames, bad params %p, %p\n",
+      Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
               (void *)nim,prefix);
       return -1;
    }
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d modifying output filenames using prefix %s\n", prefix);
+      Rc_fprintf_stderr("+d modifying output filenames using prefix %s\n", prefix);
 
    if( nim->fname ) free(nim->fname);
    if( nim->iname ) free(nim->iname);
@@ -3081,7 +3092,7 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
       return -1;
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d have new filenames %s and %s\n",nim->fname,nim->iname);
+      Rc_fprintf_stderr("+d have new filenames %s and %s\n",nim->fname,nim->iname);
 
    return 0;
 }
@@ -3107,25 +3118,26 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
 int nifti_type_and_names_match( nifti_image * nim, int show_warn )
 {
    char func[] = "nifti_type_and_names_match";
-   char * ext_h, * ext_i;  /* header and image filename extensions */
+   const char * ext_h;  /* header  filename extension */
+   const char * ext_i;  /* image filename extension */
    int  errs = 0;          /* error counter */
 
    /* sanity checks */
    if( !nim ){
-      if( show_warn ) fprintf(stderr,"** %s: missing nifti_image\n", func);
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing nifti_image\n", func);
       return -1;
    }
    if( !nim->fname ){
-      if( show_warn ) fprintf(stderr,"** %s: missing header filename\n", func);
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing header filename\n", func);
       errs++;
    }
    if( !nim->iname ){
-      if( show_warn ) fprintf(stderr,"** %s: missing image filename\n", func);
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing image filename\n", func);
       errs++;
    }
    if( !is_valid_nifti_type(nim->nifti_type) ){
       if( show_warn )
-         fprintf(stderr,"** %s: bad nifti_type %d\n", func, nim->nifti_type);
+         Rc_fprintf_stderr("** %s: bad nifti_type %d\n", func, nim->nifti_type);
       errs++;
    }
 
@@ -3138,13 +3150,13 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
    /* check for filename extensions */
    if( !ext_h ){
       if( show_warn )
-         fprintf(stderr,"-d missing NIFTI extension in header filename, %s\n",
+         Rc_fprintf_stderr("-d missing NIFTI extension in header filename, %s\n",
                  nim->fname);
       errs++;
    }
    if( !ext_i ){
       if( show_warn )
-         fprintf(stderr,"-d missing NIFTI extension in image filename, %s\n",
+         Rc_fprintf_stderr("-d missing NIFTI extension in image filename, %s\n",
                  nim->iname);
       errs++;
    }
@@ -3155,21 +3167,21 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
    if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){  /* .nii */
       if( fileext_n_compare(ext_h,".nii",4) ) {
          if( show_warn )
-            fprintf(stderr,
+            Rc_fprintf_stderr(
             "-d NIFTI_FTYPE 1, but no .nii extension in header filename, %s\n",
             nim->fname);
          errs++;
       }
       if( fileext_n_compare(ext_i,".nii",4) ) {
          if( show_warn )
-            fprintf(stderr,
+            Rc_fprintf_stderr(
             "-d NIFTI_FTYPE 1, but no .nii extension in image filename, %s\n",
             nim->iname);
          errs++;
       }
       if( strcmp(nim->fname, nim->iname) != 0 ){
          if( show_warn )
-            fprintf(stderr,
+            Rc_fprintf_stderr(
             "-d NIFTI_FTYPE 1, but header and image filenames differ: %s, %s\n",
             nim->fname, nim->iname);
          errs++;
@@ -3180,19 +3192,21 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
    {
       if( fileext_n_compare(ext_h,".hdr",4) != 0 ){
          if( show_warn )
-            fprintf(stderr,"-d no '.hdr' extension, but NIFTI type is %d, %s\n",
+            Rc_fprintf_stderr("-d no '.hdr' extension, but NIFTI type is %d, %s\n",
                     nim->nifti_type, nim->fname);
          errs++;
       }
       if( fileext_n_compare(ext_i,".img",4) != 0 ){
          if( show_warn )
-            fprintf(stderr,"-d no '.img' extension, but NIFTI type is %d, %s\n",
+            Rc_fprintf_stderr("-d no '.img' extension, but NIFTI type is %d, %s\n",
                     nim->nifti_type, nim->iname);
          errs++;
       }
    }
    /* ignore any other nifti_type */
 
+   if( errs ) return 0;   /* types do not match */
+
    return 1;
 }
 
@@ -3201,10 +3215,9 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
 static int fileext_compare(const char * test_ext, const char * known_ext)
 {
    char caps[8] = "";
-   int  c, cmp, len;
-
+   size_t c,len;
    /* if equal, don't need to check case (store to avoid multiple calls) */
-   cmp = strcmp(test_ext, known_ext);
+   const int cmp = strcmp(test_ext, known_ext);
    if( cmp == 0 ) return cmp;
 
    /* if anything odd, use default */
@@ -3215,7 +3228,7 @@ static int fileext_compare(const char * test_ext, const char * known_ext)
 
    /* if here, strings are different but need to check upper-case */
 
-   for(c = 0; c < len; c++ ) caps[c] = toupper(known_ext[c]);
+   for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]);
    caps[c] = '\0';
 
    return strcmp(test_ext, caps);
@@ -3224,13 +3237,12 @@ static int fileext_compare(const char * test_ext, const char * known_ext)
 /* like strncmp, but also check against capitalization of known_ext
  * (test as local string, with max length 7) */
 static int fileext_n_compare(const char * test_ext,
-                             const char * known_ext, int maxlen)
+                             const char * known_ext, size_t maxlen)
 {
    char caps[8] = "";
-   int  c, cmp, len;
-
+   size_t c,len;
    /* if equal, don't need to check case (store to avoid multiple calls) */
-   cmp = strncmp(test_ext, known_ext, maxlen);
+   const int  cmp = strncmp(test_ext, known_ext, maxlen);
    if( cmp == 0 ) return cmp;
 
    /* if anything odd, use default */
@@ -3241,8 +3253,7 @@ static int fileext_n_compare(const char * test_ext,
    if( len > 7 ) return cmp;
 
    /* if here, strings are different but need to check upper-case */
-
-   for(c = 0; c < len; c++ ) caps[c] = toupper(known_ext[c]);
+   for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]);
    caps[c] = '\0';
 
    return strncmp(test_ext, caps, maxlen);
@@ -3251,13 +3262,14 @@ static int fileext_n_compare(const char * test_ext,
 /* return 1 if there are uppercase but no lowercase */
 static int is_uppercase(const char * str)
 {
-   unsigned int c, hasupper = 0;
+   size_t c;
+   int hasupper = 0;
 
    if( !str || !*str ) return 0;
 
    for(c = 0; c < strlen(str); c++ ) {
-      if( islower(str[c]) ) return 0;
-      if( !hasupper && isupper(str[c]) ) hasupper = 1;
+     if( islower((int) str[c]) ) return 0;
+     if( !hasupper && isupper((int) str[c]) ) hasupper = 1;
    }
 
    return hasupper;
@@ -3266,13 +3278,14 @@ static int is_uppercase(const char * str)
 /* return 1 if there are both uppercase and lowercase characters */
 static int is_mixedcase(const char * str)
 {
-   unsigned int c, hasupper = 0, haslower = 0;
+   size_t c;
+   int hasupper = 0, haslower = 0;
 
    if( !str || !*str ) return 0;
 
    for(c = 0; c < strlen(str); c++ ) {
-      if( !haslower && islower(str[c]) ) haslower = 1;
-      if( !hasupper && isupper(str[c]) ) hasupper = 1;
+     if( !haslower && islower((int) str[c]) ) haslower = 1;
+     if( !hasupper && isupper((int) str[c]) ) hasupper = 1;
 
       if( haslower && hasupper ) return 1;
    }
@@ -3283,12 +3296,12 @@ static int is_mixedcase(const char * str)
 /* convert any lowercase chars to uppercase */
 static int make_uppercase(char * str)
 {
-   unsigned int c;
+   size_t c;
 
    if( !str || !*str ) return 0;
 
    for(c = 0; c < strlen(str); c++ )
-      if( islower(str[c]) ) str[c] = toupper(str[c]);
+     if( islower((int) str[c]) ) str[c] = toupper((int) str[c]);
 
    return 0;
 }
@@ -3296,12 +3309,11 @@ static int make_uppercase(char * str)
 /* convert any uppercase chars to lowercase */
 static int make_lowercase(char * str)
 {
-   unsigned int c;
-
+   size_t c;
    if( !str || !*str ) return 0;
 
    for(c = 0; c < strlen(str); c++ )
-      if( isupper(str[c]) ) str[c] = tolower(str[c]);
+     if( isupper((int) str[c]) ) str[c] = tolower((int) str[c]);
 
    return 0;
 }
@@ -3335,7 +3347,7 @@ int is_valid_nifti_type( int nifti_type )
    return 0;
 }
 
-
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*--------------------------------------------------------------------------*/
 /*! check whether the given type is on the "approved" list
 
@@ -3365,7 +3377,7 @@ int nifti_is_valid_datatype( int dtype )
        dtype == NIFTI_TYPE_COMPLEX256 ) return 1;
    return 0;
 }
-
+#endif
 
 /*--------------------------------------------------------------------------*/
 /*! set the nifti_type field based on fname and iname
@@ -3380,10 +3392,10 @@ int nifti_is_valid_datatype( int dtype )
 int nifti_set_type_from_names( nifti_image * nim )
 {
    /* error checking first */
-   if( !nim ){ fprintf(stderr,"** NSTFN: no nifti_image\n");  return -1; }
+   if( !nim ){ Rc_fprintf_stderr("** NSTFN: no nifti_image\n");  return -1; }
 
    if( !nim->fname || !nim->iname ){
-      fprintf(stderr,"** NSTFN: missing filename(s) fname @ %p, iname @ %p\n",
+      Rc_fprintf_stderr("** NSTFN: missing filename(s) fname @ %p, iname @ %p\n",
               nim->fname, nim->iname);
       return -1;
    }
@@ -3393,13 +3405,13 @@ int nifti_set_type_from_names( nifti_image * nim )
        ! nifti_find_file_extension( nim->fname ) ||
        ! nifti_find_file_extension( nim->iname )
      ) {
-      fprintf(stderr,"** NSTFN: invalid filename(s) fname='%s', iname='%s'\n",
+      Rc_fprintf_stderr("** NSTFN: invalid filename(s) fname='%s', iname='%s'\n",
               nim->fname, nim->iname);
       return -1;
    }
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"-d verify nifti_type from filenames: %d",nim->nifti_type);
+      Rc_fprintf_stderr("-d verify nifti_type from filenames: %d",nim->nifti_type);
 
    /* type should be NIFTI_FTYPE_ASCII if extension is .nia */
    if( (fileext_compare(nifti_find_file_extension(nim->fname),".nia")==0)){
@@ -3412,20 +3424,20 @@ int nifti_set_type_from_names( nifti_image * nim )
          nim->nifti_type = NIFTI_FTYPE_NIFTI1_2;
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr," -> %d\n",nim->nifti_type);
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr(" -> %d\n",nim->nifti_type);
 
    if( g_opts.debug > 1 )  /* warn user about anything strange */
       nifti_type_and_names_match(nim, 1);
 
    if( is_valid_nifti_type(nim->nifti_type) ) return 0;  /* success! */
 
-   fprintf(stderr,"** NSTFN: bad nifti_type %d, for '%s' and '%s'\n",
+   Rc_fprintf_stderr("** NSTFN: bad nifti_type %d, for '%s' and '%s'\n",
            nim->nifti_type, nim->fname, nim->iname);
 
    return -1;
 }
 
-
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*--------------------------------------------------------------------------*/
 /*! Determine if this is a NIFTI-formatted file.
 
@@ -3452,7 +3464,7 @@ int is_nifti_file( const char *hname )
    tmpname = nifti_findhdrname(hname);
    if( tmpname == NULL ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** no header file found for '%s'\n",hname);
+         Rc_fprintf_stderr("** no header file found for '%s'\n",hname);
       return -1;
    }
    fp = znzopen( tmpname , "rb" , nifti_is_gzfile(tmpname) ) ;
@@ -3484,9 +3496,9 @@ int is_nifti_file( const char *hname )
    return -1 ;                          /* not good */
 }
 
-static int print_hex_vals( const char * data, int nbytes, FILE * fp )
+static int print_hex_vals( const char * data, size_t nbytes, FILE * fp )
 {
-   int c;
+   size_t c;
 
    if ( !data || nbytes < 1 || !fp ) return -1;
 
@@ -3507,26 +3519,30 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
 {
    int c;
 
-   fputs( "-------------------------------------------------------\n", stdout );
-   if ( info )  fputs( info, stdout );
-   if ( !hp  ){ fputs(" ** no nifti_1_header to display!\n",stdout); return 1; }
+   Rc_fputs_stdout( "-------------------------------------------------------\n" );
+   if ( info )  Rc_fputs_stdout( info );
+   if ( !hp  ){ Rc_fputs_stdout(" ** no nifti_1_header to display!\n"); return 1; }
 
-   fprintf(stdout," nifti_1_header :\n"
+   Rc_fprintf_stdout(" nifti_1_header :\n"
            "    sizeof_hdr     = %d\n"
            "    data_type[10]  = ", hp->sizeof_hdr);
+#ifndef USING_R
    print_hex_vals(hp->data_type, 10, stdout);
-   fprintf(stdout, "\n"
+#endif
+   Rc_fprintf_stdout( "\n"
            "    db_name[18]    = ");
+#ifndef USING_R
    print_hex_vals(hp->db_name, 18, stdout);
-   fprintf(stdout, "\n"
+#endif
+   Rc_fprintf_stdout( "\n"
            "    extents        = %d\n"
            "    session_error  = %d\n"
            "    regular        = 0x%x\n"
            "    dim_info       = 0x%x\n",
       hp->extents, hp->session_error, hp->regular, hp->dim_info );
-   fprintf(stdout, "    dim[8]         =");
-   for ( c = 0; c < 8; c++ ) fprintf(stdout," %d", hp->dim[c]);
-   fprintf(stdout, "\n"
+   Rc_fprintf_stdout( "    dim[8]         =");
+   for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %d", hp->dim[c]);
+   Rc_fprintf_stdout( "\n"
            "    intent_p1      = %f\n"
            "    intent_p2      = %f\n"
            "    intent_p3      = %f\n"
@@ -3538,10 +3554,10 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
            hp->intent_p1, hp->intent_p2, hp->intent_p3, hp->intent_code,
            hp->datatype, hp->bitpix, hp->slice_start);
    /* break pixdim over 2 lines */
-   for ( c = 0; c < 4; c++ ) fprintf(stdout," %f", hp->pixdim[c]);
-   fprintf(stdout, "\n                    ");
-   for ( c = 4; c < 8; c++ ) fprintf(stdout," %f", hp->pixdim[c]);
-   fprintf(stdout, "\n"
+   for ( c = 0; c < 4; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]);
+   Rc_fprintf_stdout( "\n                    ");
+   for ( c = 4; c < 8; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]);
+   Rc_fprintf_stdout( "\n"
            "    vox_offset     = %f\n"
            "    scl_slope      = %f\n"
            "    scl_inter      = %f\n"
@@ -3557,7 +3573,7 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
            hp->vox_offset, hp->scl_slope, hp->scl_inter, hp->slice_end,
            hp->slice_code, hp->xyzt_units, hp->cal_max, hp->cal_min,
            hp->slice_duration, hp->toffset, hp->glmax, hp->glmin);
-   fprintf(stdout,
+   Rc_fprintf_stdout(
            "    descrip        = '%.80s'\n"
            "    aux_file       = '%.24s'\n"
            "    qform_code     = %d\n"
@@ -3580,16 +3596,15 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
            hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3],
            hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3],
            hp->intent_name, hp->magic);
-   fputs( "-------------------------------------------------------\n", stdout );
-   fflush(stdout);
+   Rc_fprintf_stdout( "-------------------------------------------------------\n" );
 
    return 0;
 }
-
+#endif
 
 #undef  ERREX
 #define ERREX(msg)                                           \
- do{ fprintf(stderr,"** ERROR: nifti_convert_nhdr2nim: %s\n", (msg) ) ;  \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_convert_nhdr2nim: %s\n", (msg) ) ;  \
      return NULL ; } while(0)
 
 /*----------------------------------------------------------------------*/
@@ -3597,7 +3612,7 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
 
    \return an allocated nifti_image, or NULL on failure
 *//*--------------------------------------------------------------------*/
-nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
+nifti_image* nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
                                     const char * fname)
 {
    int   ii , doswap , ioff ;
@@ -3617,6 +3632,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
    doswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */
 
    if( doswap < 0 ){
+      free(nim);
       if( doswap == -1 ) ERREX("bad dim[0]") ;
       ERREX("bad sizeof_hdr") ;  /* else */
    }
@@ -3644,10 +3660,17 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
    if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr2nim : ", &nhdr);
 
-   if( nhdr.datatype == DT_BINARY ||
-       nhdr.datatype == DT_UNKNOWN  )    ERREX("bad datatype") ;
+   if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN  )
+   {
+     free(nim);
+     ERREX("bad datatype") ;
+   }
 
-   if( nhdr.dim[1] <= 0 )                ERREX("bad dim[1]") ;
+   if( nhdr.dim[1] <= 0 )
+   {
+     free(nim);
+     ERREX("bad dim[1]") ;
+   }
 
    /* fix bad dim[] values in the defined dimension range */
    for( ii=2 ; ii <= nhdr.dim[0] ; ii++ )
@@ -3670,7 +3693,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
    for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){
      if( nhdr.pixdim[ii] == 0.0         ||
-         !IS_GOOD_FLOAT(nhdr.pixdim[ii])  ) nhdr.pixdim[ii] = 1.0 ;
+         !IS_GOOD_FLOAT(nhdr.pixdim[ii])  ) nhdr.pixdim[ii] = 1.0f ;
    }
 
   is_onefile = is_nifti && NIFTI_ONEFILE(nhdr) ;
@@ -3726,18 +3749,18 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
     /* off diagonal is zero */
 
-    nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0;
-    nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0;
-    nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0;
+    nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f;
+    nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f;
+    nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f;
 
     /* last row is always [ 0 0 0 1 ] */
 
-    nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0;
-    nim->qto_xyz.m[3][3]= 1.0 ;
+    nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f;
+    nim->qto_xyz.m[3][3]= 1.0f ;
 
     nim->qform_code = NIFTI_XFORM_UNKNOWN ;
 
-    if( g_opts.debug > 1 ) fprintf(stderr,"-d no qform provided\n");
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n");
   } else {
     /**- else NIFTI: use the quaternion-specified transformation */
 
@@ -3749,7 +3772,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
     nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ;
     nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ;
 
-    nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0 : 1.0 ;  /* left-handedness? */
+    nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0f : 1.0f ;  /* left-handedness? */
 
     nim->qto_xyz = nifti_quatern_to_mat44(
                       nim->quatern_b, nim->quatern_c, nim->quatern_d,
@@ -3774,7 +3797,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
     nim->sform_code = NIFTI_XFORM_UNKNOWN ;
 
-    if( g_opts.debug > 1 ) fprintf(stderr,"-d no sform provided\n");
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n");
 
   } else {
     /**- else set the sto transformation from srow_*[] */
@@ -3796,8 +3819,8 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
     /* last row is always [ 0 0 0 1 ] */
 
-    nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0;
-    nim->sto_xyz.m[3][3]= 1.0 ;
+    nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f;
+    nim->sto_xyz.m[3][3]= 1.0f ;
 
     nim->sto_ijk = nifti_mat44_inverse( nim->sto_xyz ) ;
 
@@ -3875,7 +3898,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
 
 #undef  ERREX
 #define ERREX(msg)                                           \
- do{ fprintf(stderr,"** ERROR: nifti_image_open(%s): %s\n",  \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_open(%s): %s\n",  \
              (hname != NULL) ? hname : "(null)" , (msg) ) ;  \
      return fptr ; } while(0)
 
@@ -3902,7 +3925,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
         <br>NULL if something fails badly.
     \sa nifti_image_load, nifti_image_free
  */
-znzFile nifti_image_open(const char * hname, char * opts, nifti_image ** nim)
+znzFile nifti_image_open(const char * hname, const char * opts, nifti_image ** nim)
 {
   znzFile fptr=NULL;
   /* open the hdr and reading it in, but do not load the data  */
@@ -3950,7 +3973,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check)
          LNI_FERR(fname,"failed to find header file for", hname);
       return NULL;
    } else if( g_opts.debug > 1 )
-      fprintf(stderr,"-d %s: found header filename '%s'\n",fname,hfile);
+      Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile);
 
    fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) );
    if( znz_isnull(fp) ){
@@ -3975,7 +3998,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check)
    if( bytes < (int)sizeof(nhdr) ){
       if( g_opts.debug > 0 ){
          LNI_FERR(fname,"bad binary header read for file", hname);
-         fprintf(stderr,"  - read %d of %d bytes\n",bytes, (int)sizeof(nhdr));
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",bytes, (int)sizeof(nhdr));
       }
       return NULL;
    }
@@ -3987,7 +4010,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check)
       return NULL;
    } else if ( lswap < 0 ) {
       lswap = 0;  /* if swapping does not help, don't do it */
-      if(g_opts.debug > 1) fprintf(stderr,"-- swap failure, none applied\n");
+      if(g_opts.debug > 1) Rc_fprintf_stderr("-- swap failure, none applied\n");
    }
 
    if( lswap ) {
@@ -4005,7 +4028,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check)
    /* all looks good, so allocate memory for and return the header */
    hptr = (nifti_1_header *)malloc(sizeof(nifti_1_header));
    if( ! hptr ){
-      fprintf(stderr,"** nifti_read_hdr: failed to alloc nifti_1_header\n");
+      Rc_fprintf_stderr("** nifti_read_hdr: failed to alloc nifti_1_header\n");
       return NULL;
    }
 
@@ -4035,7 +4058,7 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr)
    /* check dim[0] and sizeof_hdr */
    if( need_nhdr_swap(hdr->dim[0], hdr->sizeof_hdr) < 0 ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** bad nhdr fields: dim0, sizeof_hdr = %d, %d\n",
+         Rc_fprintf_stderr("** bad nhdr fields: dim0, sizeof_hdr = %d, %d\n",
                  hdr->dim[0], hdr->sizeof_hdr);
       errs++;
    }
@@ -4044,7 +4067,7 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr)
    for( c = 1; c <= hdr->dim[0] && c <= 7; c++ )
       if( hdr->dim[c] <= 0 ){
          if( g_opts.debug > 0 )
-            fprintf(stderr,"** bad nhdr field: dim[%d] = %d\n",c,hdr->dim[c]);
+            Rc_fprintf_stderr("** bad nhdr field: dim[%d] = %d\n",c,hdr->dim[c]);
          errs++;
       }
 
@@ -4054,26 +4077,26 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr)
 
       if( ! nifti_datatype_is_valid(hdr->datatype, 1) ){
          if( g_opts.debug > 0 )
-            fprintf(stderr,"** bad NIFTI datatype in hdr, %d\n",hdr->datatype);
+            Rc_fprintf_stderr("** bad NIFTI datatype in hdr, %d\n",hdr->datatype);
          errs++;
       }
 
    } else {             /* ANALYZE 7.5 */
 
       if( g_opts.debug > 1 )  /* maybe tell user it's an ANALYZE hdr */
-         fprintf(stderr,
+         Rc_fprintf_stderr(
             "-- nhdr magic field implies ANALYZE: magic = '%.4s'\n",hdr->magic);
 
       if( ! nifti_datatype_is_valid(hdr->datatype, 0) ){
          if( g_opts.debug > 0 )
-           fprintf(stderr,"** bad ANALYZE datatype in hdr, %d\n",hdr->datatype);
+           Rc_fprintf_stderr("** bad ANALYZE datatype in hdr, %d\n",hdr->datatype);
          errs++;
       }
    }
 
    if( errs ) return 0;  /* problems */
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"-d nifti header looks good\n");
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n");
 
    return 1;   /* looks good */
 }
@@ -4100,9 +4123,9 @@ static int need_nhdr_swap( short dim0, int hdrsize )
       if( d0 > 0 && d0 <= 7 ) return 1;
 
       if( g_opts.debug > 1 ){
-         fprintf(stderr,"** NIFTI: bad swapped d0 = %d, unswapped = ", d0);
+         Rc_fprintf_stderr("** NIFTI: bad swapped d0 = %d, unswapped = ", d0);
          nifti_swap_2bytes(1, &d0);        /* swap? */
-         fprintf(stderr,"%d\n", d0);
+         Rc_fprintf_stderr("%d\n", d0);
       }
 
       return -1;        /* bad, naughty d0 */
@@ -4115,9 +4138,9 @@ static int need_nhdr_swap( short dim0, int hdrsize )
    if( hsize == sizeof(nifti_1_header) ) return 1;
 
    if( g_opts.debug > 1 ){
-      fprintf(stderr,"** NIFTI: bad swapped hsize = %d, unswapped = ", hsize);
+      Rc_fprintf_stderr("** NIFTI: bad swapped hsize = %d, unswapped = ", hsize);
       nifti_swap_4bytes(1, &hsize);        /* swap? */
-      fprintf(stderr,"%d\n", hsize);
+      Rc_fprintf_stderr("%d\n", hsize);
    }
 
    return -2;     /* bad, naughty hsize */
@@ -4127,7 +4150,7 @@ static int need_nhdr_swap( short dim0, int hdrsize )
 /* use macro LNI_FILE_ERROR instead of ERREX()
 #undef  ERREX
 #define ERREX(msg)                                           \
- do{ fprintf(stderr,"** ERROR: nifti_image_read(%s): %s\n",  \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_read(%s): %s\n",  \
              (hname != NULL) ? hname : "(null)" , (msg) ) ;  \
      return NULL ; } while(0)
 */
@@ -4158,11 +4181,11 @@ nifti_image *nifti_image_read( const char *hname , int read_data )
    char                  *hfile=NULL;
 
    if( g_opts.debug > 1 ){
-      fprintf(stderr,"-d image_read from '%s', read_data = %d",hname,read_data);
+      Rc_fprintf_stderr("-d image_read from '%s', read_data = %d",hname,read_data);
 #ifdef HAVE_ZLIB
-      fprintf(stderr,", HAVE_ZLIB = 1\n");
+      Rc_fprintf_stderr(", HAVE_ZLIB = 1\n");
 #else
-      fprintf(stderr,", HAVE_ZLIB = 0\n");
+      Rc_fprintf_stderr(", HAVE_ZLIB = 0\n");
 #endif
    }
 
@@ -4173,7 +4196,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data )
          LNI_FERR(fname,"failed to find header file for", hname);
       return NULL;  /* check return */
    } else if( g_opts.debug > 1 )
-      fprintf(stderr,"-d %s: found header filename '%s'\n",fname,hfile);
+      Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile);
 
    if( nifti_is_gzfile(hfile) ) filesize = -1;  /* unknown */
    else                         filesize = nifti_get_filesize(hfile);
@@ -4192,8 +4215,12 @@ nifti_image *nifti_image_read( const char *hname , int read_data )
       free(hfile);
       return NULL;
    }
-   else if ( rv == 1 )  /* process special file type */
-      return nifti_read_ascii_image( fp, hfile, filesize, read_data );
+   else if ( rv == 1 ){ /* process special file type */
+      nim = nifti_read_ascii_image( fp, hfile, filesize, read_data );
+      znzclose(fp);
+      free(hfile);
+      return nim;
+   }
 
    /* else, just process normally */
 
@@ -4206,7 +4233,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data )
    if( ii < (int) sizeof(nhdr) ){
       if( g_opts.debug > 0 ){
          LNI_FERR(fname,"bad binary header read for file", hfile);
-         fprintf(stderr,"  - read %d of %d bytes\n",ii, (int)sizeof(nhdr));
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",ii, (int)sizeof(nhdr));
       }
       znzclose(fp) ;
       free(hfile);
@@ -4227,7 +4254,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data )
    }
 
    if( g_opts.debug > 3 ){
-      fprintf(stderr,"+d nifti_image_read(), have nifti image:\n");
+      Rc_fprintf_stderr("+d nifti_image_read(), have nifti image:\n");
       if( g_opts.debug > 2 ) nifti_image_infodump(nim);
    }
 
@@ -4300,24 +4327,24 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen,
    if( nifti_is_gzfile(fname) ){
      LNI_FERR(lfunc,"compression not supported for file type NIFTI_FTYPE_ASCII",
               fname);
-     free(fname);  znzclose(fp);  return NULL;
+     return NULL;
    }
    slen = flen;  /* slen will be our buffer length */
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"-d %s: have ASCII NIFTI file of size %d\n",fname,slen);
+      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen);
 
    if( slen > 65530 ) slen = 65530 ;
    sbuf = (char *)calloc(sizeof(char),slen+1) ;
    if( !sbuf ){
-      fprintf(stderr,"** %s: failed to alloc %d bytes for sbuf",lfunc,65530);
-      free(fname);  znzclose(fp);  return NULL;
+      Rc_fprintf_stderr("** %s: failed to alloc %d bytes for sbuf",lfunc,65530);
+      return NULL;
    }
    znzread( sbuf , 1 , slen , fp ) ;
    nim = nifti_image_from_ascii( sbuf, &txt_size ) ; free( sbuf ) ;
    if( nim == NULL ){
       LNI_FERR(lfunc,"failed nifti_image_from_ascii()",fname);
-      free(fname);  znzclose(fp);  return NULL;
+      return NULL;
    }
    nim->nifti_type = NIFTI_FTYPE_ASCII ;
 
@@ -4329,9 +4356,6 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen,
       (void) nifti_read_extensions(nim, fp, remain);
    }
 
-   free(fname);
-   znzclose( fp ) ;
-
    nim->iname_offset = -1 ;  /* check from the end of the file */
 
    if( read_data ) rv = nifti_image_load( nim ) ;
@@ -4340,7 +4364,7 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen,
    /* check for nifti_image_load() failure, maybe bail out */
    if( read_data && rv != 0 ){
       if( g_opts.debug > 1 )
-         fprintf(stderr,"-d failed image_load, free nifti image struct\n");
+         Rc_fprintf_stderr("-d failed image_load, free nifti image struct\n");
       free(nim);
       return NULL;
    }
@@ -4368,7 +4392,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
 
    if( !nim || znz_isnull(fp) ) {
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** nifti_read_extensions: bad inputs (%p,%p)\n",
+         Rc_fprintf_stderr("** nifti_read_extensions: bad inputs (%p,%p)\n",
                  (void *)nim, (void *)fp);
       return -1;
    }
@@ -4377,20 +4401,20 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
 
    if( (posn != sizeof(nifti_1_header)) &&
        (nim->nifti_type != NIFTI_FTYPE_ASCII) )
-      fprintf(stderr,"** WARNING: posn not header size (%d, %d)\n",
+      Rc_fprintf_stderr("** WARNING: posn not header size (%d, %d)\n",
               posn, (int)sizeof(nifti_1_header));
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"-d nre: posn = %d, offset = %d, type = %d, remain = %d\n",
+      Rc_fprintf_stderr("-d nre: posn = %d, offset = %d, type = %d, remain = %d\n",
               posn, nim->iname_offset, nim->nifti_type, remain);
 
    if( remain < 16 ){
       if( g_opts.debug > 2 ){
          if( g_opts.skip_blank_ext )
-            fprintf(stderr,"-d no extender in '%s' is okay, as "
+            Rc_fprintf_stderr("-d no extender in '%s' is okay, as "
                            "skip_blank_ext is set\n",nim->fname);
          else
-            fprintf(stderr,"-d remain=%d, no space for extensions\n",remain);
+            Rc_fprintf_stderr("-d remain=%d, no space for extensions\n",remain);
       }
       return 0;
    }
@@ -4399,21 +4423,21 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
 
    if( count < 4 ){
       if( g_opts.debug > 1 )
-         fprintf(stderr,"-d file '%s' is too short for an extender\n",
+         Rc_fprintf_stderr("-d file '%s' is too short for an extender\n",
                  nim->fname);
       return 0;
    }
 
    if( extdr.extension[0] != 1 ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d extender[0] (%d) shows no extensions for '%s'\n",
+         Rc_fprintf_stderr("-d extender[0] (%d) shows no extensions for '%s'\n",
                  extdr.extension[0], nim->fname);
       return 0;
    }
 
    remain -= 4;
    if( g_opts.debug > 2 )
-      fprintf(stderr,"-d found valid 4-byte extender, remain = %d\n", remain);
+      Rc_fprintf_stderr("-d found valid 4-byte extender, remain = %d\n", remain);
 
    /* so we expect extensions, but have no idea of how many there may be */
 
@@ -4422,27 +4446,28 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
    while (nifti_read_next_extension(&extn, nim, remain, fp) > 0)
    {
       if( nifti_add_exten_to_list(&extn, &Elist, count+1) < 0 ){
+         free(Elist);
          if( g_opts.debug > 0 )
-            fprintf(stderr,"** failed adding ext %d to list\n", count);
+            Rc_fprintf_stderr("** failed adding ext %d to list\n", count);
          return -1;
       }
 
       /* we have a new extension */
       if( g_opts.debug > 1 ){
-         fprintf(stderr,"+d found extension #%d, code = 0x%x, size = %d\n",
+         Rc_fprintf_stderr("+d found extension #%d, code = 0x%x, size = %d\n",
                  count, extn.ecode, extn.esize);
          if( extn.ecode == NIFTI_ECODE_AFNI && g_opts.debug > 2 ) /* ~XML */
-            fprintf(stderr,"   AFNI extension: %.*s\n",
+            Rc_fprintf_stderr("   AFNI extension: %.*s\n",
                     extn.esize-8,extn.edata);
          else if( extn.ecode == NIFTI_ECODE_COMMENT && g_opts.debug > 2 )
-            fprintf(stderr,"   COMMENT extension: %.*s\n",        /* TEXT */
+            Rc_fprintf_stderr("   COMMENT extension: %.*s\n",        /* TEXT */
                     extn.esize-8,extn.edata);
       }
       remain -= extn.esize;
       count++;
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"+d found %d extension(s)\n", count);
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d found %d extension(s)\n", count);
 
    nim->num_ext = count;
    nim->ext_list = Elist;
@@ -4473,8 +4498,8 @@ int nifti_add_extension(nifti_image *nim, const char * data, int len, int ecode)
    nifti1_extension ext;
 
    /* error are printed in functions */
-   if( nifti_fill_extension(&ext, data, len, ecode) )                 return -1;
-   if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) return -1;
+   if( nifti_fill_extension(&ext, data, len, ecode) )                 {free(ext.edata); return -1;}
+   if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) {free(ext.edata); return -1;}
 
    nim->num_ext++;  /* success, so increment */
 
@@ -4501,7 +4526,7 @@ static int nifti_add_exten_to_list( nifti1_extension *  new_ext,
 
    /* check for failure first */
    if( ! *list ){
-      fprintf(stderr,"** failed to alloc %d extension structs (%d bytes)\n",
+      Rc_fprintf_stderr("** failed to alloc %d extension structs (%d bytes)\n",
               new_length, new_length*(int)sizeof(nifti1_extension));
       if( !tmplist ) return -1;  /* no old list to lose */
 
@@ -4521,7 +4546,7 @@ static int nifti_add_exten_to_list( nifti1_extension *  new_ext,
    (*list)[new_length-1].edata = new_ext->edata;
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d allocated and appended extension #%d to list\n",
+      Rc_fprintf_stderr("+d allocated and appended extension #%d to list\n",
               new_length);
 
    return 0;
@@ -4541,12 +4566,12 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data,
    int esize;
 
    if( !ext || !data || len < 0 ){
-      fprintf(stderr,"** fill_ext: bad params (%p,%p,%d)\n",
+      Rc_fprintf_stderr("** fill_ext: bad params (%p,%p,%d)\n",
               (void *)ext, data, len);
       return -1;
    } else if( ! nifti_is_valid_ecode(ecode) ){
-      fprintf(stderr,"** fill_ext: invalid ecode %d\n", ecode);
-      return -1;
+      Rc_fprintf_stderr("** warning: writing unknown ecode %d\n", ecode);
+      /* should not be fatal    29 Apr 2015 [rickr] */
    }
 
    /* compute esize, first : len+8, and take ceiling up to a mult of 16 */
@@ -4557,7 +4582,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data,
    /* allocate esize-8 (maybe more than len), using calloc for fill */
    ext->edata = (char *)calloc(esize-8, sizeof(char));
    if( !ext->edata ){
-      fprintf(stderr,"** NFE: failed to alloc %d bytes for extension\n",len);
+      Rc_fprintf_stderr("** NFE: failed to alloc %d bytes for extension\n",len);
       return -1;
    }
 
@@ -4565,7 +4590,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data,
    ext->ecode = ecode;             /* set the ecode */
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d alloc %d bytes for ext len %d, ecode %d, esize %d\n",
+      Rc_fprintf_stderr("+d alloc %d bytes for ext len %d, ecode %d, esize %d\n",
               esize-8, len, ecode, esize);
 
    return 0;
@@ -4585,7 +4610,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
                                       int remain, znzFile fp )
 {
    int swap = nim->byteorder != nifti_short_order();
-   int count, size, code;
+   int count, size, code = NIFTI_ECODE_IGNORE;
 
    /* first clear nex */
    nex->esize = nex->ecode = 0;
@@ -4593,7 +4618,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
 
    if( remain < 16 ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d only %d bytes remain, so no extension\n", remain);
+         Rc_fprintf_stderr("-d only %d bytes remain, so no extension\n", remain);
       return 0;
    }
 
@@ -4603,25 +4628,25 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
 
    if( count != 2 ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d current extension read failed\n");
+         Rc_fprintf_stderr("-d current extension read failed\n");
       znzseek(fp, -4*count, SEEK_CUR); /* back up past any read */
       return 0;                        /* no extension, no error condition */
    }
 
    if( swap ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d pre-swap exts: code %d, size %d\n", code, size);
+         Rc_fprintf_stderr("-d pre-swap exts: code %d, size %d\n", code, size);
 
       nifti_swap_4bytes(1, &size);
       nifti_swap_4bytes(1, &code);
    }
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"-d potential extension: code %d, size %d\n", code, size);
+      Rc_fprintf_stderr("-d potential extension: code %d, size %d\n", code, size);
 
    if( !nifti_check_extension(nim, size, code, remain) ){
       if( znzseek(fp, -8, SEEK_CUR) < 0 ){      /* back up past any read */
-         fprintf(stderr,"** failure to back out of extension read!\n");
+         Rc_fprintf_stderr("** failure to back out of extension read!\n");
          return -1;
       }
       return 0;
@@ -4634,14 +4659,14 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
    size -= 8;  /* subtract space for size and code in extension */
    nex->edata = (char *)malloc(size * sizeof(char));
    if( !nex->edata ){
-      fprintf(stderr,"** failed to allocate %d bytes for extension\n",size);
+      Rc_fprintf_stderr("** failed to allocate %d bytes for extension\n",size);
       return -1;
    }
 
    count = (int)znzread(nex->edata, 1, size, fp);
    if( count < size ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"-d read only %d (of %d) bytes for extension\n",
+         Rc_fprintf_stderr("-d read only %d (of %d) bytes for extension\n",
                  count, size);
       free(nex->edata);
       nex->edata = NULL;
@@ -4650,7 +4675,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
 
    /* success! */
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d successfully read extension, code %d, size %d\n",
+      Rc_fprintf_stderr("+d successfully read extension, code %d, size %d\n",
               nex->ecode, nex->esize);
 
    return nex->esize;
@@ -4666,7 +4691,7 @@ int valid_nifti_extensions(const nifti_image * nim)
    int                c, errs;
 
    if( nim->num_ext <= 0 || nim->ext_list == NULL ){
-      if( g_opts.debug > 2 ) fprintf(stderr,"-d empty extension list\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d empty extension list\n");
       return 0;
    }
 
@@ -4676,23 +4701,23 @@ int valid_nifti_extensions(const nifti_image * nim)
    for ( c = 0; c < nim->num_ext; c++ ){
       if( ! nifti_is_valid_ecode(ext->ecode) ) {
          if( g_opts.debug > 1 )
-            fprintf(stderr,"-d ext %d, invalid code %d\n", c, ext->ecode);
-         errs++;
+            Rc_fprintf_stderr("-d ext %d, unknown code %d\n", c, ext->ecode);
+         /* should not be fatal    29 Apr 2015 [rickr] */
       }
 
       if( ext->esize <= 0 ){
          if( g_opts.debug > 1 )
-            fprintf(stderr,"-d ext %d, bad size = %d\n", c, ext->esize);
+            Rc_fprintf_stderr("-d ext %d, bad size = %d\n", c, ext->esize);
          errs++;
       } else if( ext->esize & 0xf ){
          if( g_opts.debug > 1 )
-            fprintf(stderr,"-d ext %d, size %d not multiple of 16\n",
+            Rc_fprintf_stderr("-d ext %d, size %d not multiple of 16\n",
                     c, ext->esize);
          errs++;
       }
 
       if( ext->edata == NULL ){
-         if( g_opts.debug > 1 ) fprintf(stderr,"-d ext %d, missing data\n", c);
+         if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d ext %d, missing data\n", c);
          errs++;
       }
 
@@ -4701,7 +4726,7 @@ int valid_nifti_extensions(const nifti_image * nim)
 
    if( errs > 0 ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"-d had %d extension errors, none will be written\n",
+         Rc_fprintf_stderr("-d had %d extension errors, none will be written\n",
                  errs);
       return 0;
    }
@@ -4710,7 +4735,7 @@ int valid_nifti_extensions(const nifti_image * nim)
    return 1;
 }
 
-
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*----------------------------------------------------------------------*/
 /*! check whether the extension code is valid
 
@@ -4725,7 +4750,7 @@ int nifti_is_valid_ecode( int ecode )
 
    return 1;
 }
-
+#endif
 
 /*----------------------------------------------------------------------
  * check for valid size and code, as well as can be done
@@ -4735,31 +4760,31 @@ static int nifti_check_extension(nifti_image *nim, int size, int code, int rem)
    /* check for bad code before bad size */
    if( ! nifti_is_valid_ecode(code) ) {
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d invalid extension code %d\n",code);
-      return 0;
+         Rc_fprintf_stderr("-d invalid extension code %d\n",code);
+      /* should not be fatal    29 Apr 2015 [rickr] */
    }
 
    if( size < 16 ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d ext size %d, no extension\n",size);
+         Rc_fprintf_stderr("-d ext size %d, no extension\n",size);
       return 0;
    }
 
    if( size > rem ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d ext size %d, space %d, no extension\n", size, rem);
+         Rc_fprintf_stderr("-d ext size %d, space %d, no extension\n", size, rem);
       return 0;
    }
 
    if( size & 0xf ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d nifti extension size %d not multiple of 16\n",size);
+         Rc_fprintf_stderr("-d nifti extension size %d not multiple of 16\n",size);
       return 0;
    }
 
    if( nim->nifti_type == NIFTI_FTYPE_ASCII && size > LNI_MAX_NIA_EXT_LEN ){
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d NVE, bad nifti_type 3 size %d\n", size);
+         Rc_fprintf_stderr("-d NVE, bad nifti_type 3 size %d\n", size);
       return 0;
    }
 
@@ -4788,8 +4813,8 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
        nim->nbyper <= 0 || nim->nvox <= 0       )
    {
       if ( g_opts.debug > 0 ){
-         if( !nim ) fprintf(stderr,"** ERROR: N_image_load: no nifti image\n");
-         else fprintf(stderr,"** ERROR: N_image_load: bad params (%p,%d,%u)\n",
+         if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
+         else Rc_fprintf_stderr("** ERROR: N_image_load: bad params (%p,%d,%u)\n",
                       nim->iname, nim->nbyper, (unsigned)nim->nvox);
       }
       return NULL;
@@ -4802,7 +4827,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
    tmpimgname = nifti_findimgname(nim->iname , nim->nifti_type);
    if( tmpimgname == NULL ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** no image file found for '%s'\n",nim->iname);
+         Rc_fprintf_stderr("** no image file found for '%s'\n",nim->iname);
       return NULL;
    }
 
@@ -4835,7 +4860,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
 
    /**- seek to the appropriate read position */
    if( znzseek(fp , (long)ioff , SEEK_SET) < 0 ){
-      fprintf(stderr,"** could not seek to offset %u in file '%s'\n",
+      Rc_fprintf_stderr("** could not seek to offset %u in file '%s'\n",
               (unsigned)ioff, nim->iname);
       znzclose(fp);
       return NULL;
@@ -4875,7 +4900,7 @@ int nifti_image_load( nifti_image *nim )
 
    if( fp == NULL ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** nifti_image_load, failed load_prep\n");
+         Rc_fprintf_stderr("** nifti_image_load, failed load_prep\n");
       return -1;
    }
 
@@ -4888,7 +4913,7 @@ int nifti_image_load( nifti_image *nim )
      nim->data = (void *)calloc(1,ntot) ;  /* create image memory */
      if( nim->data == NULL ){
         if( g_opts.debug > 0 )
-           fprintf(stderr,"** failed to alloc %d bytes for image data\n",
+           Rc_fprintf_stderr("** failed to alloc %d bytes for image data\n",
                    (int)ntot);
         znzclose(fp);
         return -1;
@@ -4914,7 +4939,7 @@ int nifti_image_load( nifti_image *nim )
 /* 30 Nov 2004 [rickr]
 #undef  ERREX
 #define ERREX(msg)                                               \
- do{ fprintf(stderr,"** ERROR: nifti_read_buffer: %s\n",(msg)) ;  \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_read_buffer: %s\n",(msg)) ;  \
      return 0; } while(0)
 */
 
@@ -4933,7 +4958,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
 
   if( dataptr == NULL ){
      if( g_opts.debug > 0 )
-        fprintf(stderr,"** ERROR: nifti_read_buffer: NULL dataptr\n");
+        Rc_fprintf_stderr("** ERROR: nifti_read_buffer: NULL dataptr\n");
      return -1;
   }
 
@@ -4942,7 +4967,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
   /* if read was short, fail */
   if( ii < ntot ){
     if( g_opts.debug > 0 )
-       fprintf(stderr,"++ WARNING: nifti_read_buffer(%s):\n"
+       Rc_fprintf_stderr("++ WARNING: nifti_read_buffer(%s):\n"
                "   data bytes needed = %u\n"
                "   data bytes input  = %u\n"
                "   number missing    = %u (set to 0)\n",
@@ -4953,18 +4978,18 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
   }
 
   if( g_opts.debug > 2 )
-    fprintf(stderr,"+d nifti_read_buffer: read %u bytes\n", (unsigned)ii);
+    Rc_fprintf_stderr("+d nifti_read_buffer: read %u bytes\n", (unsigned)ii);
 
   /* byte swap array if needed */
 
   /* ntot/swapsize might not fit as int, use size_t    6 Jul 2010 [rickr] */
   if( nim->swapsize > 1 && nim->byteorder != nifti_short_order() ) {
     if( g_opts.debug > 1 )
-       fprintf(stderr,"+d nifti_read_buffer: swapping data bytes...\n");
-    nifti_swap_Nbytes( ntot / nim->swapsize, nim->swapsize , dataptr ) ;
+       Rc_fprintf_stderr("+d nifti_read_buffer: swapping data bytes...\n");
+    nifti_swap_Nbytes( (int)(ntot / nim->swapsize), nim->swapsize , dataptr ) ;
   }
-#ifndef USE_NII_NAN
-#ifdef isfinite
+
+#if defined(isfinite) && !defined(USING_R)
 {
   /* check input float arrays for goodness, and fix bad floats */
   int fix_count = 0 ;
@@ -4973,7 +4998,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
 
     case NIFTI_TYPE_FLOAT32:
     case NIFTI_TYPE_COMPLEX64:{
-        register float *far = (float *)dataptr ; register size_t jj,nj ;
+        float *far = (float *)dataptr ; size_t jj,nj ;
         nj = ntot / sizeof(float) ;
         for( jj=0 ; jj < nj ; jj++ )   /* count fixes 30 Nov 2004 [rickr] */
            if( !IS_GOOD_FLOAT(far[jj]) ){
@@ -4985,7 +5010,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
 
     case NIFTI_TYPE_FLOAT64:
     case NIFTI_TYPE_COMPLEX128:{
-        register double *far = (double *)dataptr ; register size_t jj,nj ;
+        double *far = (double *)dataptr ; size_t jj,nj ;
         nj = ntot / sizeof(double) ;
         for( jj=0 ; jj < nj ; jj++ )   /* count fixes 30 Nov 2004 [rickr] */
            if( !IS_GOOD_FLOAT(far[jj]) ){
@@ -4998,9 +5023,8 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
   }
 
   if( g_opts.debug > 1 )
-     fprintf(stderr,"+d in image, %d bad floats were set to 0\n", fix_count);
+     Rc_fprintf_stderr("+d in image, %d bad floats were set to 0\n", fix_count);
 }
-#endif
 #endif
 
   return ii;
@@ -5014,8 +5038,7 @@ void nifti_image_unload( nifti_image *nim )
    if( nim != NULL && nim->data != NULL ){
      free(nim->data) ; nim->data = NULL ;
    }
-   return ;
-}
+   }
 
 /*--------------------------------------------------------------------------*/
 /*! free 'everything' about a nifti_image struct (including the passed struct)
@@ -5034,8 +5057,7 @@ void nifti_image_free( nifti_image *nim )
    if( nim->iname != NULL ) free(nim->iname) ;
    if( nim->data  != NULL ) free(nim->data ) ;
    (void)nifti_free_extensions( nim ) ;
-   free(nim) ; return ;
-}
+   free(nim) ; }
 
 
 /*--------------------------------------------------------------------------*/
@@ -5060,11 +5082,11 @@ int nifti_free_extensions( nifti_image *nim )
    }
    /* or if it is inconsistent, warn the user (if we are not in quiet mode) */
    else if ( (nim->num_ext > 0 || nim->ext_list != NULL) && (g_opts.debug > 0) )
-      fprintf(stderr,"** warning: nifti extension num/ptr mismatch (%d,%p)\n",
+      Rc_fprintf_stderr("** warning: nifti extension num/ptr mismatch (%d,%p)\n",
               nim->num_ext, (void *)nim->ext_list);
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"+d free'd %d extension(s)\n", nim->num_ext);
+      Rc_fprintf_stderr("+d free'd %d extension(s)\n", nim->num_ext);
 
    nim->num_ext = 0;
    nim->ext_list = NULL;
@@ -5080,9 +5102,8 @@ void nifti_image_infodump( const nifti_image *nim )
 {
    char *str = nifti_image_to_ascii( nim ) ;
    /* stdout -> stderr   2 Dec 2004 [rickr] */
-   if( str != NULL ){ fputs(str,stderr) ; free(str) ; }
-   return ;
-}
+   if( str != NULL ){ Rc_fputs_stderr(str) ; free(str) ; }
+   }
 
 
 /*--------------------------------------------------------------------------
@@ -5101,10 +5122,10 @@ size_t nifti_write_buffer(znzFile fp, const void *buffer, size_t numbytes)
    /* Write all the image data at once (no swapping here) */
    size_t ss;
    if (znz_isnull(fp)){
-      fprintf(stderr,"** ERROR: nifti_write_buffer: null file pointer\n");
+      Rc_fprintf_stderr("** ERROR: nifti_write_buffer: null file pointer\n");
       return 0;
    }
-   ss = znzwrite( (void*)buffer , 1 , numbytes , fp ) ;
+   ss = znzwrite( (const void*)buffer , 1 , numbytes , fp ) ;
    return ss;
 }
 
@@ -5136,23 +5157,23 @@ int nifti_write_all_data(znzFile fp, nifti_image * nim,
 
    if( !NBL ){ /* just write one buffer and get out of here */
       if( nim->data == NULL ){
-         fprintf(stderr,"** NWAD: no image data to write\n");
+         Rc_fprintf_stderr("** NWAD: no image data to write\n");
          return -1;
       }
 
       ss = nifti_write_buffer(fp,nim->data,nim->nbyper * nim->nvox);
       if (ss < nim->nbyper * nim->nvox){
-         fprintf(stderr,
+         Rc_fprintf_stderr(
             "** ERROR: NWAD: wrote only %u of %u bytes to file\n",
             (unsigned)ss, (unsigned)(nim->nbyper * nim->nvox));
          return -1;
       }
 
       if( g_opts.debug > 1 )
-         fprintf(stderr,"+d wrote single image of %u bytes\n", (unsigned)ss);
+         Rc_fprintf_stderr("+d wrote single image of %u bytes\n", (unsigned)ss);
    } else {
       if( ! NBL->bricks || NBL->nbricks <= 0 || NBL->bsize <= 0 ){
-         fprintf(stderr,"** NWAD: no brick data to write (%p,%d,%u)\n",
+         Rc_fprintf_stderr("** NWAD: no brick data to write (%p,%d,%u)\n",
                  (void *)NBL->bricks, NBL->nbricks, (unsigned)NBL->bsize);
          return -1;
       }
@@ -5160,14 +5181,14 @@ int nifti_write_all_data(znzFile fp, nifti_image * nim,
       for( bnum = 0; bnum < NBL->nbricks; bnum++ ){
          ss = nifti_write_buffer(fp, NBL->bricks[bnum], NBL->bsize);
          if( ss < NBL->bsize ){
-            fprintf(stderr,
+            Rc_fprintf_stderr(
               "** NWAD ERROR: wrote %u of %u bytes of brick %d of %d to file",
                (unsigned)ss, (unsigned)NBL->bsize, bnum+1, NBL->nbricks);
             return -1;
          }
       }
       if( g_opts.debug > 1 )
-         fprintf(stderr,"+d wrote image of %d brick(s), each of %u bytes\n",
+         Rc_fprintf_stderr("+d wrote image of %d brick(s), each of %u bytes\n",
                  NBL->nbricks, (unsigned int)NBL->bsize);
    }
 
@@ -5186,14 +5207,14 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim)
 
    if( znz_isnull(fp) || !nim || nim->num_ext < 0 ){
       if( g_opts.debug > 0 )
-         fprintf(stderr,"** nifti_write_extensions, bad params\n");
+         Rc_fprintf_stderr("** nifti_write_extensions, bad params\n");
       return -1;
    }
 
    /* if no extensions and user requests it, skip extender */
    if( g_opts.skip_blank_ext && (nim->num_ext == 0 || ! nim->ext_list ) ){
       if( g_opts.debug > 1 )
-         fprintf(stderr,"-d no exts and skip_blank_ext set, "
+         Rc_fprintf_stderr("-d no exts and skip_blank_ext set, "
                         "so skipping 4-byte extender\n");
       return 0;
    }
@@ -5204,7 +5225,7 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim)
    /* write out extender block */
    if( nim->num_ext > 0 ) extdr[0] = 1;
    if( nifti_write_buffer(fp, extdr, 4) != 4 ){
-      fprintf(stderr,"** failed to write extender\n");
+      Rc_fprintf_stderr("** failed to write extender\n");
       return -1;
    }
 
@@ -5222,16 +5243,16 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim)
       }
 
       if( !ok ){
-         fprintf(stderr,"** failed while writing extension #%d\n",c);
+         Rc_fprintf_stderr("** failed while writing extension #%d\n",c);
          return -1;
       } else if ( g_opts.debug > 2 )
-         fprintf(stderr,"+d wrote extension %d of %d bytes\n", c, size);
+         Rc_fprintf_stderr("+d wrote extension %d of %d bytes\n", c, size);
 
       list++;
    }
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d wrote out %d extension(s)\n", nim->num_ext);
+      Rc_fprintf_stderr("+d wrote out %d extension(s)\n", nim->num_ext);
 
    return nim->num_ext;
 }
@@ -5240,7 +5261,7 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim)
 /*----------------------------------------------------------------------*/
 /*! basic initialization of a nifti_image struct (to a 1x1x1 image)
 *//*--------------------------------------------------------------------*/
-nifti_image *nifti_simple_init_nim(void)
+nifti_image* nifti_simple_init_nim(void)
 {
   nifti_image *nim;
   struct nifti_1_header nhdr;
@@ -5255,9 +5276,9 @@ nifti_image *nifti_simple_init_nim(void)
    nhdr.dim[1] = 1 ; nhdr.dim[2] = 1 ; nhdr.dim[3] = 1 ;
    nhdr.dim[4] = 0 ;
 
-   nhdr.pixdim[0] = 0.0 ;
-   nhdr.pixdim[1] = 1.0 ; nhdr.pixdim[2] = 1.0 ;
-   nhdr.pixdim[3] = 1.0 ;
+   nhdr.pixdim[0] = 0.0f ;
+   nhdr.pixdim[1] = 1.0f ; nhdr.pixdim[2] = 1.0f ;
+   nhdr.pixdim[3] = 1.0f ;
 
    nhdr.datatype = DT_FLOAT32 ;
    nifti_datatype_sizes( nhdr.datatype , &nbyper, &swapsize );
@@ -5297,13 +5318,13 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype)
 
    /* validate dim: if there is any problem, apply default_dims */
    if( dim[0] < 1 || dim[0] > 7 ) {
-      fprintf(stderr,"** nifti_simple_hdr_with_dims: bad dim[0]=%d\n",dim[0]);
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%d\n",dim[0]);
       dim = default_dims;
    } else {
       for( c = 1; c <= dim[0]; c++ )
          if( dim[c] < 1 )
          {
-            fprintf(stderr,
+            Rc_fprintf_stderr(
                 "** nifti_simple_hdr_with_dims: bad dim[%d]=%d\n",c,dim[c]);
             dim = default_dims;
             break;
@@ -5313,19 +5334,19 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype)
    /* validate dtype, too */
    dtype = arg_dtype;
    if( ! nifti_is_valid_datatype(dtype) ) {
-      fprintf(stderr,"** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype);
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype);
       dtype = DT_FLOAT32;
    }
 
    /* now populate the header struct */
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d nifti_make_new_header, dim[0] = %d, datatype = %d\n",
+      Rc_fprintf_stderr("+d nifti_make_new_header, dim[0] = %d, datatype = %d\n",
               dim[0], dtype);
 
    nhdr = (nifti_1_header *)calloc(1,sizeof(nifti_1_header));
    if( !nhdr ){
-      fprintf(stderr,"** nifti_make_new_header: failed to alloc hdr\n");
+      Rc_fprintf_stderr("** nifti_make_new_header: failed to alloc hdr\n");
       return NULL;
    }
 
@@ -5334,10 +5355,10 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype)
 
    /* init dim and pixdim */
    nhdr->dim[0] = dim[0] ;
-   nhdr->pixdim[0] = 0.0;
+   nhdr->pixdim[0] = 0.0f;
    for( c = 1; c <= dim[0]; c++ ) {
       nhdr->dim[c] = dim[c];
-      nhdr->pixdim[c] = 1.0;
+      nhdr->pixdim[c] = 1.0f;
    }
 
    nhdr->datatype = dtype ;
@@ -5373,19 +5394,19 @@ nifti_image * nifti_make_new_nim(const int dims[], int datatype, int data_fill)
    nim = nifti_convert_nhdr2nim(*nhdr,NULL);
    free(nhdr);               /* in any case, we are done with this */
    if( !nim ){
-      fprintf(stderr,"** NMNN: nifti_convert_nhdr2nim failure\n");
+      Rc_fprintf_stderr("** NMNN: nifti_convert_nhdr2nim failure\n");
       return NULL;
    }
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d nifti_make_new_nim, data_fill = %d\n",data_fill);
+      Rc_fprintf_stderr("+d nifti_make_new_nim, data_fill = %d\n",data_fill);
 
    if( data_fill ) {
       nim->data = calloc(nim->nvox, nim->nbyper);
 
       /* if we cannot allocate data, take ball and go home */
       if( !nim->data ) {
-         fprintf(stderr,"** NMNN: failed to alloc %u bytes for data\n",
+         Rc_fprintf_stderr("** NMNN: failed to alloc %u bytes for data\n",
                  (unsigned)(nim->nvox*nim->nbyper));
          nifti_image_free(nim);
          nim = NULL;
@@ -5423,7 +5444,7 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim)
    nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ;
    nhdr.dim[7] = nim->nw ;
 
-   nhdr.pixdim[0] = 0.0 ;
+   nhdr.pixdim[0] = 0.0f ;
    nhdr.pixdim[1] = nim->dx ; nhdr.pixdim[2] = nim->dy ;
    nhdr.pixdim[3] = nim->dz ; nhdr.pixdim[4] = nim->dt ;
    nhdr.pixdim[5] = nim->du ; nhdr.pixdim[6] = nim->dv ;
@@ -5456,10 +5477,10 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim)
      if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcpy(nhdr.magic,"n+1") ;
      else                                          strcpy(nhdr.magic,"ni1") ;
 
-     nhdr.pixdim[1] = fabs(nhdr.pixdim[1]) ; nhdr.pixdim[2] = fabs(nhdr.pixdim[2]) ;
-     nhdr.pixdim[3] = fabs(nhdr.pixdim[3]) ; nhdr.pixdim[4] = fabs(nhdr.pixdim[4]) ;
-     nhdr.pixdim[5] = fabs(nhdr.pixdim[5]) ; nhdr.pixdim[6] = fabs(nhdr.pixdim[6]) ;
-     nhdr.pixdim[7] = fabs(nhdr.pixdim[7]) ;
+     nhdr.pixdim[1] = (float)fabs(nhdr.pixdim[1]) ; nhdr.pixdim[2] = (float)fabs(nhdr.pixdim[2]) ;
+     nhdr.pixdim[3] = (float)fabs(nhdr.pixdim[3]) ; nhdr.pixdim[4] = (float)fabs(nhdr.pixdim[4]) ;
+     nhdr.pixdim[5] = (float)fabs(nhdr.pixdim[5]) ; nhdr.pixdim[6] = (float)fabs(nhdr.pixdim[6]) ;
+     nhdr.pixdim[7] = (float)fabs(nhdr.pixdim[7]) ;
 
      nhdr.intent_code = nim->intent_code ;
      nhdr.intent_p1   = nim->intent_p1 ;
@@ -5482,7 +5503,7 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim)
        nhdr.qoffset_x  = nim->qoffset_x ;
        nhdr.qoffset_y  = nim->qoffset_y ;
        nhdr.qoffset_z  = nim->qoffset_z ;
-       nhdr.pixdim[0]  = (nim->qfac >= 0.0) ? 1.0 : -1.0 ;
+       nhdr.pixdim[0]  = (nim->qfac >= 0.0) ? 1.0f : -1.0f ;
      }
 
      if( nim->sform_code > 0 ){
@@ -5530,19 +5551,19 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src)
    int      c, size, old_size;
 
    if( nim_dest->num_ext > 0 || nim_dest->ext_list != NULL ){
-      fprintf(stderr,"** will not copy extensions over existing ones\n");
+      Rc_fprintf_stderr("** will not copy extensions over existing ones\n");
       return -1;
    }
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d duplicating %d extension(s)\n", nim_src->num_ext);
+      Rc_fprintf_stderr("+d duplicating %d extension(s)\n", nim_src->num_ext);
 
    if( nim_src->num_ext <= 0 ) return 0;
 
    bytes = nim_src->num_ext * sizeof(nifti1_extension);  /* I'm lazy */
    nim_dest->ext_list = (nifti1_extension *)malloc(bytes);
    if( !nim_dest->ext_list ){
-      fprintf(stderr,"** failed to allocate %d nifti1_extension structs\n",
+      Rc_fprintf_stderr("** failed to allocate %d nifti1_extension structs\n",
               nim_src->num_ext);
       return -1;
    }
@@ -5553,12 +5574,12 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src)
       size = old_size = nim_src->ext_list[c].esize;
       if( size & 0xf ) size = (size + 0xf) & ~0xf; /* make multiple of 16 */
       if( g_opts.debug > 2 )
-         fprintf(stderr,"+d dup'ing ext #%d of size %d (from size %d)\n",
+         Rc_fprintf_stderr("+d dup'ing ext #%d of size %d (from size %d)\n",
                  c, size, old_size);
       /* data length is size-8, as esize includes space for esize and ecode */
       data = (char *)calloc(size-8,sizeof(char));      /* maybe size > old */
       if( !data ){
-         fprintf(stderr,"** failed to alloc %d bytes for extention\n", size);
+         Rc_fprintf_stderr("** failed to alloc %d bytes for extention\n", size);
          if( c == 0 ) { free(nim_dest->ext_list); nim_dest->ext_list = NULL; }
          /* otherwise, keep what we have (a.o.t. deleting them all) */
          return -1;
@@ -5591,14 +5612,14 @@ int nifti_extension_size(nifti_image *nim)
 
    if( !nim || nim->num_ext <= 0 ) return 0;
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"-d ext sizes:");
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d ext sizes:");
 
    for ( c = 0; c < nim->num_ext; c++ ){
       size += nim->ext_list[c].esize;
-      if( g_opts.debug > 2 ) fprintf(stderr,"  %d",nim->ext_list[c].esize);
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("  %d",nim->ext_list[c].esize);
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr," (total = %d)\n",size);
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr(" (total = %d)\n",size);
 
    return size;
 }
@@ -5630,7 +5651,7 @@ void nifti_set_iname_offset(nifti_image *nim)
        if ( ( offset % 16 ) != 0 )  offset = ((offset + 0xf) & ~0xf);
        if( nim->iname_offset != offset ){
           if( g_opts.debug > 1 )
-             fprintf(stderr,"+d changing offset from %d to %d\n",
+             Rc_fprintf_stderr("+d changing offset from %d to %d\n",
                   nim->iname_offset, offset);
           nim->iname_offset = offset;
        }
@@ -5665,7 +5686,7 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data ,
 
 #undef  ERREX
 #define ERREX(msg)                                                \
- do{ fprintf(stderr,"** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
      return fp ; } while(0)
 
 
@@ -5716,9 +5737,9 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
    nifti_set_iname_offset(nim);
 
    if( g_opts.debug > 1 ){
-      fprintf(stderr,"-d writing nifti file '%s'...\n", nim->fname);
+      Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
       if( g_opts.debug > 2 )
-         fprintf(stderr,"-d nifti type %d, offset %d\n",
+         Rc_fprintf_stderr("-d nifti type %d, offset %d\n",
                  nim->nifti_type, nim->iname_offset);
    }
 
@@ -5740,12 +5761,12 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    /* if we have an imgfile and will write the header there, use it */
    if( ! znz_isnull(imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){
-      if( g_opts.debug > 2 ) fprintf(stderr,"+d using passed file for hdr\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
       fp = imgfile;
    }
    else {
       if( g_opts.debug > 2 )
-         fprintf(stderr,"+d opening output file %s [%s]\n",nim->fname,opts);
+         Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
       fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
       if( znz_isnull(fp) ){
          LNI_FERR(func,"cannot open output file",nim->fname);
@@ -5767,19 +5788,19 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    /* if the header is all we want, we are done */
    if( ! write_data && ! leave_open ){
-      if( g_opts.debug > 2 ) fprintf(stderr,"-d header is all we want: done\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
       znzclose(fp); return(fp);
    }
 
    if( nim->nifti_type != NIFTI_FTYPE_NIFTI1_1 ){ /* get a new file pointer */
       znzclose(fp);         /* first, close header file */
       if( ! znz_isnull(imgfile) ){
-         if(g_opts.debug > 2) fprintf(stderr,"+d using passed file for img\n");
+         if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
          fp = imgfile;
       }
       else {
          if( g_opts.debug > 2 )
-            fprintf(stderr,"+d opening img file '%s'\n", nim->iname);
+            Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname);
          fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ;
          if( znz_isnull(fp) ) ERREX("cannot open image file") ;
       }
@@ -5804,12 +5825,12 @@ znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
    char    * hstr;
 
    hstr = nifti_image_to_ascii( nim ) ;  /* get header in ASCII form */
-   if( ! hstr ){ fprintf(stderr,"** failed image_to_ascii()\n"); return NULL; }
+   if( ! hstr ){ Rc_fprintf_stderr("** failed image_to_ascii()\n"); return NULL; }
 
    fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
    if( znz_isnull(fp) ){
       free(hstr);
-      fprintf(stderr,"** failed to open '%s' for ascii write\n",nim->fname);
+      Rc_fprintf_stderr("** failed to open '%s' for ascii write\n",nim->fname);
       return fp;
    }
 
@@ -5852,10 +5873,10 @@ void nifti_image_write( nifti_image *nim )
 {
    znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
    if( fp ){
-      if( g_opts.debug > 2 ) fprintf(stderr,"-d niw: done with znzFile\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) fprintf(stderr,"-d nifti_image_write: done\n");
+   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
 }
 
 
@@ -5868,10 +5889,10 @@ void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
 {
    znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
    if( fp ){
-      if( g_opts.debug > 2 ) fprintf(stderr,"-d niwb: done with znzFile\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) fprintf(stderr,"-d niwb: done writing bricks\n");
+   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
 }
 
 
@@ -5886,7 +5907,7 @@ nifti_image * nifti_copy_nim_info(const nifti_image * src)
   nifti_image *dest;
   dest = (nifti_image *)calloc(1,sizeof(nifti_image));
   if( !dest ){
-     fprintf(stderr,"** NCNI: failed to alloc nifti_image\n");
+     Rc_fprintf_stderr("** NCNI: failed to alloc nifti_image\n");
      return NULL;
   }
   memcpy(dest, src, sizeof(nifti_image));
@@ -5971,7 +5992,7 @@ static int unescape_string( char *str )
 
         else if( ii+3 < ll        &&
                  str[ii+1] == '#' &&
-                 isdigit(str[ii+2]) ){   /* &#dec; */
+                 isdigit((int) str[ii+2]) ){   /* &#dec; */
 
            unsigned int val='?' ; int kk=ii+3 ;
            while( kk < ll && kk != ';' ) kk++ ;
@@ -5982,7 +6003,7 @@ static int unescape_string( char *str )
         else if( ii+4 < ll        &&
                  str[ii+1] == '#' &&
                  str[ii+2] == 'x' &&
-                 isxdigit(str[ii+3]) ){   /* &#hex; */
+                 isxdigit((int) str[ii+3]) ){   /* &#hex; */
 
            unsigned int val='?' ; int kk=ii+4 ;
            while( kk < ll && kk != ';' ) kk++ ;
@@ -6048,7 +6069,7 @@ static char *escapize_string( const char * str )
    }
    out = (char *)calloc(1,lout) ;     /* allocate output string */
    if( !out ){
-      fprintf(stderr,"** escapize_string: failed to alloc %d bytes\n",lout);
+      Rc_fprintf_stderr("** escapize_string: failed to alloc %d bytes\n",lout);
       return NULL;
    }
    out[0] = '\'' ;                    /* opening quote mark */
@@ -6083,13 +6104,17 @@ static char *escapize_string( const char * str )
 *//*-------------------------------------------------------------------------*/
 char *nifti_image_to_ascii( const nifti_image *nim )
 {
+#ifdef USING_R
+   Rf_error("nifti_image_to_ascii is currently unimplemented for R packages, for portability reasons");
+   return NULL;
+#else
    char *buf , *ebuf ; int nbuf ;
 
    if( nim == NULL ) return NULL ;   /* stupid caller */
 
-   buf = (char *)calloc(1,65534); nbuf = 0; /* longer than needed, to be safe */
+   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
    if( !buf ){
-      fprintf(stderr,"** NITA: failed to alloc %d bytes\n",65534);
+      Rc_fprintf_stderr("** NITA: failed to alloc %d bytes\n",65534);
       return NULL;
    }
 
@@ -6116,21 +6141,33 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
    sprintf( buf+strlen(buf) , "  image_offset = '%d'\n" , nim->iname_offset );
 
-                       sprintf( buf+strlen(buf), "  ndim = '%d'\n", nim->ndim);
-                       sprintf( buf+strlen(buf), "  nx = '%d'\n",   nim->nx  );
-   if( nim->ndim > 1 ) sprintf( buf+strlen(buf), "  ny = '%d'\n",   nim->ny  );
-   if( nim->ndim > 2 ) sprintf( buf+strlen(buf), "  nz = '%d'\n",   nim->nz  );
-   if( nim->ndim > 3 ) sprintf( buf+strlen(buf), "  nt = '%d'\n",   nim->nt  );
-   if( nim->ndim > 4 ) sprintf( buf+strlen(buf), "  nu = '%d'\n",   nim->nu  );
-   if( nim->ndim > 5 ) sprintf( buf+strlen(buf), "  nv = '%d'\n",   nim->nv  );
-   if( nim->ndim > 6 ) sprintf( buf+strlen(buf), "  nw = '%d'\n",   nim->nw  );
-                       sprintf( buf+strlen(buf), "  dx = '%g'\n",   nim->dx  );
-   if( nim->ndim > 1 ) sprintf( buf+strlen(buf), "  dy = '%g'\n",   nim->dy  );
-   if( nim->ndim > 2 ) sprintf( buf+strlen(buf), "  dz = '%g'\n",   nim->dz  );
-   if( nim->ndim > 3 ) sprintf( buf+strlen(buf), "  dt = '%g'\n",   nim->dt  );
-   if( nim->ndim > 4 ) sprintf( buf+strlen(buf), "  du = '%g'\n",   nim->du  );
-   if( nim->ndim > 5 ) sprintf( buf+strlen(buf), "  dv = '%g'\n",   nim->dv  );
-   if( nim->ndim > 6 ) sprintf( buf+strlen(buf), "  dw = '%g'\n",   nim->dw  );
+   sprintf(buf + strlen(buf), "  ndim = '%d'\n", nim->ndim);
+   sprintf(buf + strlen(buf), "  nx = '%d'\n", nim->nx);
+   if (nim->ndim > 1)
+     sprintf(buf + strlen(buf), "  ny = '%d'\n", nim->ny);
+   if (nim->ndim > 2)
+     sprintf(buf + strlen(buf), "  nz = '%d'\n", nim->nz);
+   if (nim->ndim > 3)
+     sprintf(buf + strlen(buf), "  nt = '%d'\n", nim->nt);
+   if (nim->ndim > 4)
+     sprintf(buf + strlen(buf), "  nu = '%d'\n", nim->nu);
+   if (nim->ndim > 5)
+     sprintf(buf + strlen(buf), "  nv = '%d'\n", nim->nv);
+   if (nim->ndim > 6)
+     sprintf(buf + strlen(buf), "  nw = '%d'\n", nim->nw);
+   sprintf(buf + strlen(buf), "  dx = '%g'\n", nim->dx);
+   if (nim->ndim > 1)
+     sprintf(buf + strlen(buf), "  dy = '%g'\n", nim->dy);
+   if (nim->ndim > 2)
+     sprintf(buf + strlen(buf), "  dz = '%g'\n", nim->dz);
+   if (nim->ndim > 3)
+     sprintf(buf + strlen(buf), "  dt = '%g'\n", nim->dt);
+   if (nim->ndim > 4)
+     sprintf(buf + strlen(buf), "  du = '%g'\n", nim->du);
+   if (nim->ndim > 5)
+     sprintf(buf + strlen(buf), "  dv = '%g'\n", nim->dv);
+   if (nim->ndim > 6)
+     sprintf(buf + strlen(buf), "  dw = '%g'\n", nim->dw);
 
    sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
    sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
@@ -6308,12 +6345,14 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
    nbuf = (int)strlen(buf) ;
    buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
-   if( !buf ) fprintf(stderr,"** NITA: failed to realloc %d bytes\n",nbuf+1);
+   if( !buf ) Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n",nbuf+1);
    return buf ;
+#endif
 }
 
 /*---------------------------------------------------------------------------*/
 
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 /*----------------------------------------------------------------------*/
 /*! get the byte order for this CPU
 
@@ -6329,6 +6368,7 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 
    return (fred.ss == 1) ? LSB_FIRST : MSB_FIRST ;
 }
+#endif
 
 /*---------------------------------------------------------------------------*/
 
@@ -6339,11 +6379,11 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 /* macro to check lhs string against "n1"; if it matches,
    interpret rhs string as a number, and put it into nim->"n2" */
 
-#define QQNUM(n1,n2) if( strcmp(lhs,#n1)==0 ) nim->n2=strtod(rhs,NULL)
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
 
 /* same, but where "n1" == "n2" */
 
-#define QNUM(nam)    QQNUM(nam,nam)
+#define QNUM(nam,tt)    QQNUM(nam,nam,tt)
 
 /* macro to check lhs string against "nam"; if it matches,
    put rhs string into nim->"nam" string, with max length = "ml" */
@@ -6371,7 +6411,6 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read )
    /* scan for opening string */
 
    spos = 0 ;
-   if(!strlen(str)) return NULL;
    ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ;
    if( ii == 0 || strcmp(lhs,"<nifti_image") != 0 ) return NULL ;
 
@@ -6379,7 +6418,7 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read )
 
    nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
    if( !nim ){
-      fprintf(stderr,"** NIFA: failed to alloc nifti_image\n");
+      Rc_fprintf_stderr("** NIFA: failed to alloc nifti_image\n");
       return NULL;
    }
 
@@ -6387,7 +6426,7 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read )
            = nim->nu = nim->nv = nim->nw = 1 ;
    nim->dx = nim->dy = nim->dz = nim->dt
            = nim->du = nim->dv = nim->dw = 0 ;
-   nim->qfac = 1.0 ;
+   nim->qfac = 1.0f ;
 
    nim->byteorder = nifti_short_order() ;
 
@@ -6459,54 +6498,54 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read )
        if( strcmp(rhs,"MSB_FIRST") == 0 ) nim->byteorder = MSB_FIRST ;
        if( strcmp(rhs,"LSB_FIRST") == 0 ) nim->byteorder = LSB_FIRST ;
      }
-     else QQNUM(image_offset,iname_offset) ;
-     else QNUM(datatype) ;
-     else QNUM(ndim) ;
-     else QNUM(nx) ;
-     else QNUM(ny) ;
-     else QNUM(nz) ;
-     else QNUM(nt) ;
-     else QNUM(nu) ;
-     else QNUM(nv) ;
-     else QNUM(nw) ;
-     else QNUM(dx) ;
-     else QNUM(dy) ;
-     else QNUM(dz) ;
-     else QNUM(dt) ;
-     else QNUM(du) ;
-     else QNUM(dv) ;
-     else QNUM(dw) ;
-     else QNUM(cal_min) ;
-     else QNUM(cal_max) ;
-     else QNUM(scl_slope) ;
-     else QNUM(scl_inter) ;
-     else QNUM(intent_code) ;
-     else QNUM(intent_p1) ;
-     else QNUM(intent_p2) ;
-     else QNUM(intent_p3) ;
+     else QQNUM(image_offset,iname_offset,int) ;
+     else QNUM(datatype,short int) ;
+     else QNUM(ndim,int) ;
+     else QNUM(nx,int) ;
+     else QNUM(ny,int) ;
+     else QNUM(nz,int) ;
+     else QNUM(nt,int) ;
+     else QNUM(nu,int) ;
+     else QNUM(nv,int) ;
+     else QNUM(nw,int) ;
+     else QNUM(dx,float) ;
+     else QNUM(dy,float) ;
+     else QNUM(dz,float) ;
+     else QNUM(dt,float) ;
+     else QNUM(du,float) ;
+     else QNUM(dv,float) ;
+     else QNUM(dw,float) ;
+     else QNUM(cal_min,float) ;
+     else QNUM(cal_max,float) ;
+     else QNUM(scl_slope,float) ;
+     else QNUM(scl_inter,float) ;
+     else QNUM(intent_code,short) ;
+     else QNUM(intent_p1,float) ;
+     else QNUM(intent_p2,float) ;
+     else QNUM(intent_p3,float) ;
      else QSTR(intent_name,15) ;
-     else QNUM(toffset) ;
-     else QNUM(xyz_units) ;
-     else QNUM(time_units) ;
+     else QNUM(toffset,float) ;
+     else QNUM(xyz_units,int) ;
+     else QNUM(time_units,int) ;
      else QSTR(descrip,79) ;
      else QSTR(aux_file,23) ;
-     else QNUM(qform_code) ;
-     else QNUM(quatern_b) ;
-     else QNUM(quatern_c) ;
-     else QNUM(quatern_d) ;
-     else QNUM(qoffset_x) ;
-     else QNUM(qoffset_y) ;
-     else QNUM(qoffset_z) ;
-     else QNUM(qfac) ;
-     else QNUM(sform_code) ;
-     else QNUM(freq_dim) ;
-     else QNUM(phase_dim) ;
-     else QNUM(slice_dim) ;
-     else QNUM(slice_code) ;
-     else QNUM(slice_start) ;
-     else QNUM(slice_end) ;
-     else QNUM(slice_duration) ;
-     else QNUM(num_ext) ;
+     else QNUM(qform_code,int) ;
+     else QNUM(quatern_b,float) ;
+     else QNUM(quatern_c,float) ;
+     else QNUM(quatern_d,float) ;
+     else QNUM(qoffset_x,float) ;
+     else QNUM(qoffset_y,float) ;
+     else QNUM(qoffset_z,float) ;
+     else QNUM(qfac,float) ;
+     else QNUM(sform_code,int) ;
+     else QNUM(freq_dim,int) ;
+     else QNUM(phase_dim,int) ;
+     else QNUM(slice_dim,int) ;
+     else QNUM(slice_code,int) ;
+     else QNUM(slice_start,int) ;
+     else QNUM(slice_end,int) ;
+     else QNUM(slice_duration,float) ;
+     else QNUM(num_ext,int) ;
 
    } /* end of while loop */
 
@@ -6539,8 +6578,8 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read )
                       nim->qfac                                      ) ;
    else
      nim->qto_xyz = nifti_quatern_to_mat44(
-                      0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 ,
-                      nim->dx , nim->dy , nim->dz , 0.0 ) ;
+                      0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f ,
+                      nim->dx , nim->dy , nim->dz , 0.0f ) ;
 
 
    nim->qto_ijk = nifti_mat44_inverse( nim->qto_xyz ) ;
@@ -6564,11 +6603,11 @@ int nifti_nim_is_valid(nifti_image * nim, int complain)
    int errs = 0;
 
    if( !nim ){
-      fprintf(stderr,"** is_valid_nim: nim is NULL\n");
+      Rc_fprintf_stderr("** is_valid_nim: nim is NULL\n");
       return 0;
    }
 
-   if( g_opts.debug > 2 ) fprintf(stderr,"-d nim_is_valid check...\n");
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nim_is_valid check...\n");
 
    /**- check that dim[] matches the individual values ndim, nx, ny, ... */
    if( ! nifti_nim_has_valid_dims(nim,complain) ){
@@ -6601,7 +6640,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
    if( nim->dim[0] <= 0 || nim->dim[0] > 7 ){
       errs++;
       if( complain )
-         fprintf(stderr,"** NVd: dim[0] (%d) out of range [1,7]\n",nim->dim[0]);
+         Rc_fprintf_stderr("** NVd: dim[0] (%d) out of range [1,7]\n",nim->dim[0]);
       return 0;
    }
 
@@ -6609,7 +6648,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
    if( nim->ndim != nim->dim[0] ){
       errs++;
       if( ! complain ) return 0;
-      fprintf(stderr,"** NVd: ndim != dim[0] (%d,%d)\n",nim->ndim,nim->dim[0]);
+      Rc_fprintf_stderr("** NVd: ndim != dim[0] (%d,%d)\n",nim->ndim,nim->dim[0]);
    }
 
    /**- compare each dim[i] to the proper nx, ny, ... */
@@ -6622,7 +6661,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
        ( (nim->dim[0] >= 7) && (nim->dim[7] != nim->nw) )   ){
       errs++;
       if( !complain ) return 0;
-      fprintf(stderr,"** NVd mismatch: dims    = %d,%d,%d,%d,%d,%d,%d\n"
+      Rc_fprintf_stderr("** NVd mismatch: dims    = %d,%d,%d,%d,%d,%d,%d\n"
                      "                 nxyz... = %d,%d,%d,%d,%d,%d,%d\n",
                      nim->dim[1], nim->dim[2], nim->dim[3],
                      nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7],
@@ -6631,9 +6670,9 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
    }
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"-d check dim[%d] =", nim->dim[0]);
-      for( c = 0; c < 7; c++ ) fprintf(stderr," %d", nim->dim[c]);
-      fputc('\n', stderr);
+      Rc_fprintf_stderr("-d check dim[%d] =", nim->dim[0]);
+      for( c = 0; c < 7; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]);
+      Rc_fputc_stderr('\n');
    }
 
    /**- check the dimensions, and that their product matches nvox */
@@ -6643,13 +6682,13 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
          prod *= nim->dim[c];
       else if( nim->dim[c] <= 0 ){
          if( !complain ) return 0;
-         fprintf(stderr,"** NVd: dim[%d] (=%d) <= 0\n",c, nim->dim[c]);
+         Rc_fprintf_stderr("** NVd: dim[%d] (=%d) <= 0\n",c, nim->dim[c]);
          errs++;
       }
    }
    if( prod != nim->nvox ){
       if( ! complain ) return 0;
-      fprintf(stderr,"** NVd: nvox does not match %d-dim product (%u, %u)\n",
+      Rc_fprintf_stderr("** NVd: nvox does not match %d-dim product (%u, %u)\n",
               nim->dim[0], (unsigned)nim->nvox, (unsigned)prod);
       errs++;
    }
@@ -6660,11 +6699,11 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain)
    if( g_opts.debug > 1 )
       for( c = nim->dim[0]+1; c <= 7; c++ )
          if( nim->dim[c] != 0 && nim->dim[c] != 1 )
-            fprintf(stderr,"** NVd warning: dim[%d] = %d, but ndim = %d\n",
+            Rc_fprintf_stderr("** NVd warning: dim[%d] = %d, but ndim = %d\n",
                     c, nim->dim[c], nim->dim[0]);
 
    if( g_opts.debug > 2 )
-      fprintf(stderr,"-d nim_has_valid_dims check, errs = %d\n", errs);
+      Rc_fprintf_stderr("-d nim_has_valid_dims check, errs = %d\n", errs);
 
    /**- return invalid or valid */
    if( errs > 0 ) return 0;
@@ -6751,29 +6790,29 @@ int nifti_read_collapsed_image( nifti_image * nim, const int dims [8],
 
    /** - check pointers for sanity */
    if( !nim || !dims || !data ){
-      fprintf(stderr,"** nifti_RCI: bad params %p, %p, %p\n",
-              (void *)nim, (void *)dims, (void *)data);
+      Rc_fprintf_stderr("** nifti_RCI: bad params %p, %p, %p\n",
+              (void *)nim, (const void *)dims, (void *)data);
       return -1;
    }
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"-d read_collapsed_image:\n        dims =");
-      for(c = 0; c < 8; c++) fprintf(stderr," %3d", dims[c]);
-      fprintf(stderr,"\n   nim->dims =");
-      for(c = 0; c < 8; c++) fprintf(stderr," %3d", nim->dim[c]);
-      fputc('\n', stderr);
+      Rc_fprintf_stderr("-d read_collapsed_image:\n        dims =");
+      for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3d", dims[c]);
+      Rc_fprintf_stderr("\n   nim->dims =");
+      for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3d", nim->dim[c]);
+      Rc_fputc_stderr('\n');
    }
 
    /** - verify that dim[] makes sense */
    if( ! nifti_nim_is_valid(nim, g_opts.debug > 0) ){
-      fprintf(stderr,"** invalid nim (file is '%s')\n", nim->fname );
+      Rc_fprintf_stderr("** invalid nim (file is '%s')\n", nim->fname );
       return -1;
    }
 
    /** - verify that dims[] makes sense for this dataset */
    for( c = 1; c <= nim->dim[0]; c++ ){
       if( dims[c] >= nim->dim[c] ){
-         fprintf(stderr,"** nifti_RCI: dims[%d] >= nim->dim[%d] (%d,%d)\n",
+         Rc_fprintf_stderr("** nifti_RCI: dims[%d] >= nim->dim[%d] (%d,%d)\n",
                  c, c, dims[c], nim->dim[c]);
          return -1;
       }
@@ -6797,7 +6836,7 @@ int nifti_read_collapsed_image( nifti_image * nim, const int dims [8],
    if( c < 0 ){ free(*data);  *data = NULL;  return -1; }    /* failure */
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"+d read %d bytes of collapsed image from %s\n",
+      Rc_fprintf_stderr("+d read %d bytes of collapsed image from %s\n",
               bytes, nim->fname);
 
    return bytes;
@@ -6848,8 +6887,8 @@ compute_strides(int *strides,const int *size,int nbyper)
         nifti_image_load, nifti_read_collapsed_image
 *//*-------------------------------------------------------------------------*/
 int nifti_read_subregion_image( nifti_image * nim,
-                                int *start_index,
-                                int *region_size,
+                                const int *start_index,
+                                const int *region_size,
                                 void ** data )
 {
   znzFile fp;                   /* file to read */
@@ -6918,7 +6957,7 @@ int nifti_read_subregion_image( nifti_image * nim,
       {
       if(g_opts.debug > 1)
         {
-        fprintf(stderr,"region doesn't fit within image size\n");
+        Rc_fprintf_stderr("region doesn't fit within image size\n");
         }
       return -1;
       }
@@ -6950,7 +6989,7 @@ int nifti_read_subregion_image( nifti_image * nim,
     {
     if(g_opts.debug > 1)
       {
-      fprintf(stderr,"allocation of %d bytes failed\n",total_alloc_size);
+      Rc_fprintf_stderr("allocation of %d bytes failed\n",total_alloc_size);
       return -1;
       }
     }
@@ -7001,7 +7040,7 @@ int nifti_read_subregion_image( nifti_image * nim,
                 {
                 if(g_opts.debug > 1)
                   {
-                  fprintf(stderr,"read of %d bytes failed\n",read_amount);
+                  Rc_fprintf_stderr("read of %d bytes failed\n",read_amount);
                   return -1;
                   }
                 }
@@ -7033,7 +7072,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods,
 
    /* bad check first - base_offset may not have been checked */
    if( nprods <= 0 ){
-      fprintf(stderr,"** rci_read_data, bad prods, %d\n", nprods);
+      Rc_fprintf_stderr("** rci_read_data, bad prods, %d\n", nprods);
       return -1;
    }
 
@@ -7043,7 +7082,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods,
 
       /* make sure things look good here */
       if( *pivots != 0 ){
-         fprintf(stderr,"** rciRD: final pivot == %d!\n", *pivots);
+         Rc_fprintf_stderr("** rciRD: final pivot == %d!\n", *pivots);
          return -1;
       }
 
@@ -7052,11 +7091,11 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods,
       bytes = (size_t)prods[0] * nim->nbyper;
       nread = nifti_read_buffer(fp, data, bytes, nim);
       if( nread != bytes ){
-         fprintf(stderr,"** rciRD: read only %u of %u bytes from '%s'\n",
+         Rc_fprintf_stderr("** rciRD: read only %u of %u bytes from '%s'\n",
                  (unsigned)nread, (unsigned)bytes, nim->fname);
          return -1;
       } else if( g_opts.debug > 3 )
-         fprintf(stderr,"+d successful read of %u bytes at offset %u\n",
+         Rc_fprintf_stderr("+d successful read of %u bytes at offset %u\n",
                  (unsigned)bytes, (unsigned)base_offset);
 
       return 0;  /* done with base case - return success */
@@ -7081,7 +7120,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods,
       offset *= nim->nbyper;
 
       if( g_opts.debug > 3 )
-         fprintf(stderr,"-d reading %u bytes, foff %u + %u, doff %u\n",
+         Rc_fprintf_stderr("-d reading %u bytes, foff %u + %u, doff %u\n",
                  (unsigned)read_size, (unsigned)base_offset, (unsigned)offset,
                  (unsigned)(c*read_size));
 
@@ -7102,32 +7141,32 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods,
 
    return total size on success, and < 0 on failure
 */
-static int rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper )
+static int rci_alloc_mem(void ** data, const int prods[8], int nprods, int nbyper )
 {
-   int size, index;
+   int size, memindex;
 
    if( nbyper < 0 || nprods < 1 || nprods > 8 ){
-      fprintf(stderr,"** rci_am: bad params, %d, %d\n", nbyper, nprods);
+      Rc_fprintf_stderr("** rci_am: bad params, %d, %d\n", nbyper, nprods);
       return -1;
    }
 
-   for( index = 0, size = 1; index < nprods; index++ )
-       size *= prods[index];
+   for( memindex = 0, size = 1; memindex < nprods; memindex++ )
+       size *= prods[memindex];
 
    size *= nbyper;
 
    if( ! *data ){   /* then allocate what is needed */
       if( g_opts.debug > 1 )
-         fprintf(stderr,"+d alloc %d (= %d x %d) bytes for collapsed image\n",
+         Rc_fprintf_stderr("+d alloc %d (= %d x %d) bytes for collapsed image\n",
                  size, size/nbyper, nbyper);
 
       *data = malloc(size);   /* actually allocate the memory */
       if( ! *data ){
-         fprintf(stderr,"** rci_am: failed to alloc %d bytes for data\n", size);
+         Rc_fprintf_stderr("** rci_am: failed to alloc %d bytes for data\n", size);
          return -1;
       }
    } else if( g_opts.debug > 1 )
-      fprintf(stderr,"-d rci_am: *data already set, need %d (%d x %d) bytes\n",
+      Rc_fprintf_stderr("-d rci_am: *data already set, need %d (%d x %d) bytes\n",
               size, size/nbyper, nbyper);
 
    return size;
@@ -7143,23 +7182,25 @@ static int rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper )
 static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
                                               int prods[], int * nprods )
 {
-   int len, index;
+   int len, dim_index;
 
    len = 0;
-   index = nim->dim[0];
-   while( index > 0 ){
+   dim_index = nim->dim[0];
+   while( dim_index > 0 ){
       prods[len] = 1;
-      while( index > 0 && (nim->dim[index] == 1 || dims[index] == -1) ){
-         prods[len] *= nim->dim[index];
-         index--;
+      while( dim_index > 0 && 
+             (nim->dim[dim_index] == 1 || dims[dim_index] == -1) ){
+         prods[len] *= nim->dim[dim_index];
+         dim_index--;
       }
-      pivots[len] = index;
+      pivots[len] = dim_index;
       len++;
-      index--;  /* fine, let it drop out at -1 */
+      dim_index--;  /* fine, let it drop out at -1 */
    }
 
    /* make sure to include 0 as a pivot (instead of just 1, if it is) */
-   if( pivots[len-1] != 0 ){
+   /* (check len, though we have already validated nifti_image) */
+   if( len > 0 && pivots[len-1] != 0 ){
       pivots[len] = 0;
       prods[len] = 1;
       len++;
@@ -7168,17 +7209,20 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
    *nprods = len;
 
    if( g_opts.debug > 2 ){
-      fprintf(stderr,"+d pivot list created, pivots :");
-      for(index = 0; index < len; index++) fprintf(stderr," %d", pivots[index]);
-      fprintf(stderr,", prods :");
-      for(index = 0; index < len; index++) fprintf(stderr," %d", prods[index]);
-      fputc('\n',stderr);
+      Rc_fprintf_stderr("+d pivot list created, pivots :");
+      for(dim_index = 0; dim_index < len; dim_index++)
+          Rc_fprintf_stderr(" %d", pivots[dim_index]);
+      Rc_fprintf_stderr(", prods :");
+      for(dim_index = 0; dim_index < len; dim_index++)
+          Rc_fprintf_stderr(" %d", prods[dim_index]);
+      Rc_fputc_stderr('\n');
    }
 
    return 0;
 }
 
 
+#ifndef RNIFTI_NIFTILIB_DEDUPLICATE
 #undef ISEND
 #define ISEND(c) ( (c)==']' || (c)=='}' || (c)=='\0' )
 
@@ -7210,6 +7254,7 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
 int * nifti_get_intlist( int nvals , const char * str )
 {
    int *subv = NULL ;
+   int *subv_realloc = NULL;
    int ii , ipos , nout , slen ;
    int ibot,itop,istep , nused ;
    char *cpt ;
@@ -7223,7 +7268,7 @@ int * nifti_get_intlist( int nvals , const char * str )
    /* skip initial '[' or '{' */
    subv = (int *)malloc( sizeof(int) * 2 ) ;
    if( !subv ) {
-      fprintf(stderr,"** nifti_get_intlist: failed alloc of 2 ints\n");
+      Rc_fprintf_stderr("** nifti_get_intlist: failed alloc of 2 ints\n");
       return NULL;
    }
    subv[0] = nout = 0 ;
@@ -7232,7 +7277,7 @@ int * nifti_get_intlist( int nvals , const char * str )
    if( str[ipos] == '[' || str[ipos] == '{' ) ipos++ ;
 
    if( g_opts.debug > 1 )
-      fprintf(stderr,"-d making int_list (vals = %d) from '%s'\n", nvals, str);
+      Rc_fprintf_stderr("-d making int_list (vals = %d) from '%s'\n", nvals, str);
 
    /**- for each sub-selector until end of input... */
 
@@ -7249,18 +7294,18 @@ int * nifti_get_intlist( int nvals , const char * str )
       } else {                 /* decode an integer */
          ibot = strtol( str+ipos , &cpt , 10 ) ;
          if( ibot < 0 ){
-           fprintf(stderr,"** ERROR: list index %d is out of range 0..%d\n",
+           Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n",
                    ibot,nvals-1) ;
            free(subv) ; return NULL ;
          }
          if( ibot >= nvals ){
-           fprintf(stderr,"** ERROR: list index %d is out of range 0..%d\n",
+           Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n",
                    ibot,nvals-1) ;
            free(subv) ; return NULL ;
          }
          nused = (cpt-(str+ipos)) ;
          if( ibot == 0 && nused == 0 ){
-           fprintf(stderr,"** ERROR: list syntax error '%s'\n",str+ipos) ;
+           Rc_fprintf_stderr("** ERROR: list syntax error '%s'\n",str+ipos) ;
            free(subv) ; return NULL ;
          }
          ipos += nused ;
@@ -7272,12 +7317,15 @@ int * nifti_get_intlist( int nvals , const char * str )
 
       if( str[ipos] == ',' || ISEND(str[ipos]) ){
          nout++ ;
-         subv = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ;
-         if( !subv ) {
-            fprintf(stderr,"** nifti_get_intlist: failed realloc of %d ints\n",
-                    nout+1);
-            return NULL;
+        subv_realloc = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ;
+         if( !subv_realloc ) {
+           free(subv);
+           Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %d ints\n",
+                   nout+1);
+           return NULL;
          }
+         subv=subv_realloc;
+
          subv[0]    = nout ;
          subv[nout] = ibot ;
          if( ISEND(str[ipos]) ) break ; /* done */
@@ -7291,7 +7339,7 @@ int * nifti_get_intlist( int nvals , const char * str )
       } else if( str[ipos] == '.' && str[ipos+1] == '.' ){
          ipos++ ; ipos++ ;
       } else {
-         fprintf(stderr,"** ERROR: index list syntax is bad: '%s'\n",
+         Rc_fprintf_stderr("** ERROR: index list syntax is bad: '%s'\n",
                  str+ipos) ;
          free(subv) ; return NULL ;
       }
@@ -7303,18 +7351,18 @@ int * nifti_get_intlist( int nvals , const char * str )
       } else {                 /* decode an integer */
          itop = strtol( str+ipos , &cpt , 10 ) ;
          if( itop < 0 ){
-           fprintf(stderr,"** ERROR: index %d is out of range 0..%d\n",
+           Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n",
                    itop,nvals-1) ;
            free(subv) ; return NULL ;
          }
          if( itop >= nvals ){
-           fprintf(stderr,"** ERROR: index %d is out of range 0..%d\n",
+           Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n",
                    itop,nvals-1) ;
            free(subv) ; return NULL ;
          }
          nused = (cpt-(str+ipos)) ;
          if( itop == 0 && nused == 0 ){
-           fprintf(stderr,"** ERROR: index list syntax error '%s'\n",str+ipos) ;
+           Rc_fprintf_stderr("** ERROR: index list syntax error '%s'\n",str+ipos) ;
            free(subv) ; return NULL ;
          }
          ipos += nused ;
@@ -7332,14 +7380,14 @@ int * nifti_get_intlist( int nvals , const char * str )
          ipos++ ;
          istep = strtol( str+ipos , &cpt , 10 ) ;
          if( istep == 0 ){
-           fprintf(stderr,"** ERROR: index loop step is 0!\n") ;
+           Rc_fprintf_stderr("** ERROR: index loop step is 0!\n") ;
            free(subv) ; return NULL ;
          }
          nused = (cpt-(str+ipos)) ;
          ipos += nused ;
          if( str[ipos] == ')' ) ipos++ ;
          if( (ibot-itop)*istep > 0 ){
-           fprintf(stderr,"** WARNING: index list '%d..%d(%d)' means nothing\n",
+           Rc_fprintf_stderr("** WARNING: index list '%d..%d(%d)' means nothing\n",
                    ibot,itop,istep ) ;
          }
       }
@@ -7348,12 +7396,14 @@ int * nifti_get_intlist( int nvals , const char * str )
 
       for( ii=ibot ; (ii-itop)*istep <= 0 ; ii += istep ){
          nout++ ;
-         subv = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ;
-         if( !subv ) {
-            fprintf(stderr,"** nifti_get_intlist: failed realloc of %d ints\n",
-                    nout+1);
-            return NULL;
+        subv_realloc = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ;
+         if( !subv_realloc ) {
+           free(subv);
+           Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %d ints\n",
+                   nout+1);
+           return NULL;
          }
+         subv=subv_realloc;
          subv[0]    = nout ;
          subv[nout] = ii ;
       }
@@ -7366,9 +7416,9 @@ int * nifti_get_intlist( int nvals , const char * str )
    }  /* end of loop through selector string */
 
    if( g_opts.debug > 1 ) {
-      fprintf(stderr,"+d int_list (vals = %d): ", subv[0]);
-      for( ii = 1; ii <= subv[0]; ii++ ) fprintf(stderr,"%d ", subv[ii]);
-      fputc('\n',stderr);
+      Rc_fprintf_stderr("+d int_list (vals = %d): ", subv[0]);
+      for( ii = 1; ii <= subv[0]; ii++ ) Rc_fprintf_stderr("%d ", subv[ii]);
+      Rc_fputc_stderr('\n');
    }
 
    if( subv[0] == 0 ){ free(subv); subv = NULL; }
@@ -7400,7 +7450,7 @@ int nifti_datatype_from_string( const char * name )
  *  corresponding macro label as a string.  The dtype code is the
  *  macro value defined in nifti1.h.
 *//*-------------------------------------------------------------------*/
-char * nifti_datatype_to_string( int dtype )
+const char * nifti_datatype_to_string( int dtype )
 {
     int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
     int c;
@@ -7459,7 +7509,7 @@ int nifti_test_datatype_sizes(int verb)
                 ssize != nifti_type_list[c].swapsize )
         {
             if( verb || g_opts.debug > 2 )
-                fprintf(stderr, "** type mismatch: %s, %d, %d, %d : %d, %d\n",
+                Rc_fprintf_stderr( "** type mismatch: %s, %d, %d, %d : %d, %d\n",
                     nifti_type_list[c].name, nifti_type_list[c].type,
                     nifti_type_list[c].nbyper, nifti_type_list[c].swapsize,
                     nbyper, ssize);
@@ -7468,9 +7518,9 @@ int nifti_test_datatype_sizes(int verb)
     }
 
     if( errs )
-        fprintf(stderr,"** nifti_test_datatype_sizes: found %d errors\n",errs);
+        Rc_fprintf_stderr("** nifti_test_datatype_sizes: found %d errors\n",errs);
     else if( verb || g_opts.debug > 1 )
-        fprintf(stderr,"-- nifti_test_datatype_sizes: all OK\n");
+        Rc_fprintf_stderr("-- nifti_test_datatype_sizes: all OK\n");
 
     return errs;
 }
@@ -7485,7 +7535,7 @@ int nifti_test_datatype_sizes(int verb)
 *//*-------------------------------------------------------------------*/
 int nifti_disp_type_list( int which )
 {
-    char * style;
+    const char * style;
     int    tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
     int    lwhich, c;
 
@@ -7493,14 +7543,14 @@ int nifti_disp_type_list( int which )
     else if( which == 2 ){ lwhich = 2; style = "NIFTI_TYPE_"; }
     else                 { lwhich = 3; style = "ALL"; }
 
-    printf("nifti_type_list entries (%s) :\n"
+    Rc_printf("nifti_type_list entries (%s) :\n"
            "  name                    type    nbyper    swapsize\n"
            "  ---------------------   ----    ------    --------\n", style);
 
     for( c = 0; c < tablen; c++ )
         if( (lwhich & 1 && nifti_type_list[c].name[0] == 'D')  ||
             (lwhich & 2 && nifti_type_list[c].name[0] == 'N')     )
-            printf("  %-22s %5d     %3d      %5d\n",
+            Rc_printf("  %-22s %5d     %3d      %5d\n",
                    nifti_type_list[c].name,
                    nifti_type_list[c].type,
                    nifti_type_list[c].nbyper,
@@ -7508,5 +7558,4 @@ int nifti_disp_type_list( int which )
 
     return 0;
 }
-
-
+#endif
diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h
new file mode 100644
index 00000000..2927d31a
--- /dev/null
+++ b/reg-io/niftilib/nifti1_io.h
@@ -0,0 +1,587 @@
+/** \file nifti1_io.h
+    \brief Data structures for using nifti1_io API.
+           - Written by Bob Cox, SSCC NIMH
+           - Revisions by Rick Reynolds, SSCC NIMH
+ */
+#ifndef _NIFTI_IO_HEADER_
+#define _NIFTI_IO_HEADER_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+
+#ifndef DONT_INCLUDE_ANALYZE_STRUCT
+#define DONT_INCLUDE_ANALYZE_STRUCT  /*** not needed herein ***/
+#endif
+#include "niftilib/nifti1.h"                  /*** NIFTI-1 header specification ***/
+
+#ifndef RNIFTI_NIFTILIB_VERSION
+#define RNIFTI_NIFTILIB_VERSION 1
+#endif
+
+#include "RNifti/NiftiImage_print.h"
+#include <znzlib/znzlib.h>
+
+/*=================*/
+#ifdef  __cplusplus
+extern "C" {
+#endif
+/*=================*/
+
+/*****===================================================================*****/
+/*****         File nifti1_io.h == Declarations for nifti1_io.c          *****/
+/*****...................................................................*****/
+/*****            This code is released to the public domain.            *****/
+/*****...................................................................*****/
+/*****  Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH          *****/
+/*****  Date:   August 2003                                              *****/
+/*****...................................................................*****/
+/*****  Neither the National Institutes of Health (NIH), nor any of its  *****/
+/*****  employees imply any warranty of usefulness of this software for  *****/
+/*****  any purpose, and do not assume any liability for damages,        *****/
+/*****  incidental or otherwise, caused by any use of this document.     *****/
+/*****===================================================================*****/
+
+/*
+   Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK)
+   Date: July/August 2004
+
+      Mainly adding low-level IO and changing things to allow gzipped files
+      to be read and written
+      Full backwards compatability should have been maintained
+
+   Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
+   Date: December 2004
+
+      Modified and added many routines for I/O.
+*/
+
+/********************** Some sample data structures **************************/
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+
+typedef struct {                   /** 4x4 matrix struct **/
+  float m[4][4] ;
+} mat44 ;
+
+typedef struct {                   /** 3x3 matrix struct **/
+  float m[3][3] ;
+} mat33 ;
+
+/*...........................................................................*/
+
+/*! \enum analyze_75_orient_code
+ *  \brief Old-style analyze75 orientation
+ *         codes.
+ */
+typedef enum _analyze75_orient_code {
+  a75_transverse_unflipped = 0,
+  a75_coronal_unflipped = 1,
+  a75_sagittal_unflipped = 2,
+  a75_transverse_flipped = 3,
+  a75_coronal_flipped = 4,
+  a75_sagittal_flipped = 5,
+  a75_orient_unknown = 6
+} analyze_75_orient_code;
+
+/*! \struct nifti_image
+    \brief High level data structure for open nifti datasets in the
+           nifti1_io API.  Note that this structure is not part of the
+           nifti1 format definition; it is used to implement one API
+           for reading/writing formats in the nifti1 format.
+ */
+typedef struct {                /*!< Image storage struct **/
+
+  int ndim ;                    /*!< last dimension greater than 1 (1..7) */
+  int nx ;                      /*!< dimensions of grid array             */
+  int ny ;                      /*!< dimensions of grid array             */
+  int nz ;                      /*!< dimensions of grid array             */
+  int nt ;                      /*!< dimensions of grid array             */
+  int nu ;                      /*!< dimensions of grid array             */
+  int nv ;                      /*!< dimensions of grid array             */
+  int nw ;                      /*!< dimensions of grid array             */
+  int dim[8] ;                  /*!< dim[0]=ndim, dim[1]=nx, etc.         */
+  size_t nvox ;                    /*!< number of voxels = nx*ny*nz*...*nw   */
+  int nbyper ;                  /*!< bytes per voxel, matches datatype    */
+  int datatype ;                /*!< type of data in voxels: DT_* code    */
+
+  float dx ;                    /*!< grid spacings      */
+  float dy ;                    /*!< grid spacings      */
+  float dz ;                    /*!< grid spacings      */
+  float dt ;                    /*!< grid spacings      */
+  float du ;                    /*!< grid spacings      */
+  float dv ;                    /*!< grid spacings      */
+  float dw ;                    /*!< grid spacings      */
+  float pixdim[8] ;             /*!< pixdim[1]=dx, etc. */
+
+  float scl_slope ;             /*!< scaling parameter - slope        */
+  float scl_inter ;             /*!< scaling parameter - intercept    */
+
+  float cal_min ;               /*!< calibration parameter, minimum   */
+  float cal_max ;               /*!< calibration parameter, maximum   */
+
+  int qform_code ;              /*!< codes for (x,y,z) space meaning  */
+  int sform_code ;              /*!< codes for (x,y,z) space meaning  */
+
+  int freq_dim  ;               /*!< indexes (1,2,3, or 0) for MRI    */
+  int phase_dim ;               /*!< directions in dim[]/pixdim[]     */
+  int slice_dim ;               /*!< directions in dim[]/pixdim[]     */
+
+  int   slice_code  ;           /*!< code for slice timing pattern    */
+  int   slice_start ;           /*!< index for start of slices        */
+  int   slice_end   ;           /*!< index for end of slices          */
+  float slice_duration ;        /*!< time between individual slices   */
+
+  /*! quaternion transform parameters
+    [when writing a dataset, these are used for qform, NOT qto_xyz]   */
+  float quatern_b , quatern_c , quatern_d ,
+        qoffset_x , qoffset_y , qoffset_z ,
+        qfac      ;
+
+  mat44 qto_xyz ;               /*!< qform: transform (i,j,k) to (x,y,z) */
+  mat44 qto_ijk ;               /*!< qform: transform (x,y,z) to (i,j,k) */
+
+  mat44 sto_xyz ;               /*!< sform: transform (i,j,k) to (x,y,z) */
+  mat44 sto_ijk ;               /*!< sform: transform (x,y,z) to (i,j,k) */
+
+  float toffset ;               /*!< time coordinate offset */
+
+  int xyz_units  ;              /*!< dx,dy,dz units: NIFTI_UNITS_* code  */
+  int time_units ;              /*!< dt       units: NIFTI_UNITS_* code  */
+
+  int nifti_type ;              /*!< 0==ANALYZE, 1==NIFTI-1 (1 file),
+                                                 2==NIFTI-1 (2 files),
+                                                 3==NIFTI-ASCII (1 file) */
+  int   intent_code ;           /*!< statistic type (or something)       */
+  float intent_p1 ;             /*!< intent parameters                   */
+  float intent_p2 ;             /*!< intent parameters                   */
+  float intent_p3 ;             /*!< intent parameters                   */
+  char  intent_name[16] ;       /*!< optional description of intent data */
+
+  char descrip[80]  ;           /*!< optional text to describe dataset   */
+  char aux_file[24] ;           /*!< auxiliary filename                  */
+
+  char *fname ;                 /*!< header filename (.hdr or .nii)         */
+  char *iname ;                 /*!< image filename  (.img or .nii)         */
+  int   iname_offset ;          /*!< offset into iname where data starts    */
+  int   swapsize ;              /*!< swap unit in image data (might be 0)   */
+  int   byteorder ;             /*!< byte order on disk (MSB_ or LSB_FIRST) */
+  void *data ;                  /*!< pointer to data: nbyper*nvox bytes     */
+
+  int                num_ext ;  /*!< number of extensions in ext_list       */
+  nifti1_extension * ext_list ; /*!< array of extension structs (with data) */
+  analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */
+
+} nifti1_image ;
+#endif
+
+
+/* struct for return from nifti_image_read_bricks() */
+typedef struct {
+  int       nbricks;    /* the number of allocated pointers in 'bricks' */
+  size_t    bsize;      /* the length of each data block, in bytes      */
+  void   ** bricks;     /* array of pointers to data blocks             */
+} nifti1_brick_list;
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+typedef nifti1_image        nifti_image;
+typedef nifti1_brick_list   nifti_brick_list;
+#endif
+
+/*****************************************************************************/
+/*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/
+
+/* (based on fsliolib/dbh.h, but updated for version 7.5) */
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+typedef struct {
+       /* header info fields - describes the header    overlap with NIfTI */
+       /*                                              ------------------ */
+       int sizeof_hdr;                  /* 0 + 4        same              */
+       char data_type[10];              /* 4 + 10       same              */
+       char db_name[18];                /* 14 + 18      same              */
+       int extents;                     /* 32 + 4       same              */
+       short int session_error;         /* 36 + 2       same              */
+       char regular;                    /* 38 + 1       same              */
+       char hkey_un0;                   /* 39 + 1                40 bytes */
+
+       /* image dimension fields - describes image sizes */
+       short int dim[8];                /* 0 + 16       same              */
+       short int unused8;               /* 16 + 2       intent_p1...      */
+       short int unused9;               /* 18 + 2         ...             */
+       short int unused10;              /* 20 + 2       intent_p2...      */
+       short int unused11;              /* 22 + 2         ...             */
+       short int unused12;              /* 24 + 2       intent_p3...      */
+       short int unused13;              /* 26 + 2         ...             */
+       short int unused14;              /* 28 + 2       intent_code       */
+       short int datatype;              /* 30 + 2       same              */
+       short int bitpix;                /* 32 + 2       same              */
+       short int dim_un0;               /* 34 + 2       slice_start       */
+       float pixdim[8];                 /* 36 + 32      same              */
+
+       float vox_offset;                /* 68 + 4       same              */
+       float funused1;                  /* 72 + 4       scl_slope         */
+       float funused2;                  /* 76 + 4       scl_inter         */
+       float funused3;                  /* 80 + 4       slice_end,        */
+                                                     /* slice_code,       */
+                                                     /* xyzt_units        */
+       float cal_max;                   /* 84 + 4       same              */
+       float cal_min;                   /* 88 + 4       same              */
+       float compressed;                /* 92 + 4       slice_duration    */
+       float verified;                  /* 96 + 4       toffset           */
+       int glmax,glmin;                 /* 100 + 8              108 bytes */
+
+       /* data history fields - optional */
+       char descrip[80];                /* 0 + 80       same              */
+       char aux_file[24];               /* 80 + 24      same              */
+       char orient;                     /* 104 + 1      NO GOOD OVERLAP   */
+       char originator[10];             /* 105 + 10     FROM HERE DOWN... */
+       char generated[10];              /* 115 + 10                       */
+       char scannum[10];                /* 125 + 10                       */
+       char patient_id[10];             /* 135 + 10                       */
+       char exp_date[10];               /* 145 + 10                       */
+       char exp_time[10];               /* 155 + 10                       */
+       char hist_un0[3];                /* 165 + 3                        */
+       int views;                       /* 168 + 4                        */
+       int vols_added;                  /* 172 + 4                        */
+       int start_field;                 /* 176 + 4                        */
+       int field_skip;                  /* 180 + 4                        */
+       int omax, omin;                  /* 184 + 8                        */
+       int smax, smin;                  /* 192 + 8              200 bytes */
+} nifti_analyze75;                                   /* total:  348 bytes */
+#endif
+
+/*****************************************************************************/
+/*--------------- Prototypes of functions defined in this file --------------*/
+
+char const * nifti_datatype_string   ( int dt ) ;
+char const *nifti_units_string      ( int uu ) ;
+char const *nifti_intent_string     ( int ii ) ;
+char const *nifti_xform_string      ( int xx ) ;
+char const *nifti_slice_string      ( int ss ) ;
+char const *nifti_orientation_string( int ii ) ;
+
+int   nifti_is_inttype( int dt ) ;
+
+mat44 nifti_mat44_inverse( mat44 R ) ;
+
+mat33 nifti_mat33_inverse( mat33 R ) ;
+mat33 nifti_mat33_polar  ( mat33 A ) ;
+float nifti_mat33_rownorm( mat33 A ) ;
+float nifti_mat33_colnorm( mat33 A ) ;
+float nifti_mat33_determ ( mat33 R ) ;
+mat33 nifti_mat33_mul    ( mat33 A , mat33 B ) ;
+
+#if RNIFTI_NIFTILIB_VERSION == 1
+void  nifti_swap_2bytes ( size_t n , void *ar ) ;
+void  nifti_swap_4bytes ( size_t n , void *ar ) ;
+void  nifti_swap_8bytes ( size_t n , void *ar ) ;
+void  nifti_swap_16bytes( size_t n , void *ar ) ;
+void  nifti_swap_Nbytes ( size_t n , int siz , void *ar ) ;
+#endif
+
+int    nifti_datatype_is_valid   (int dtype, int for_nifti);
+int    nifti_datatype_from_string(const char * name);
+const char * nifti_datatype_to_string  (int dtype);
+
+int   nifti_get_filesize( const char *pathname ) ;
+#if RNIFTI_NIFTILIB_VERSION == 1
+void  swap_nifti_header ( struct nifti_1_header *h , int is_nifti ) ;
+#endif
+void  old_swap_nifti_header( struct nifti_1_header *h , int is_nifti );
+#if RNIFTI_NIFTILIB_VERSION == 1
+int   nifti_swap_as_analyze( nifti_analyze75 *h );
+#endif
+
+
+/* main read/write routines */
+
+nifti_image *nifti_image_read_bricks(const char *hname , int nbricks,
+                                     const int *blist, nifti_brick_list * NBL);
+int          nifti_image_load_bricks(nifti_image *nim , int nbricks,
+                                     const int *blist, nifti_brick_list * NBL);
+void         nifti_free_NBL( nifti_brick_list * NBL );
+
+nifti_image *nifti_image_read    ( const char *hname , int read_data ) ;
+int          nifti_image_load    ( nifti_image *nim ) ;
+void         nifti_image_unload  ( nifti_image *nim ) ;
+void         nifti_image_free    ( nifti_image *nim ) ;
+
+int          nifti_read_collapsed_image( nifti_image * nim, const int dims [8],
+                                         void ** data );
+
+int          nifti_read_subregion_image( nifti_image * nim,
+                                         const int *start_index, const int *region_size,
+                                         void ** data );
+
+void         nifti_image_write   ( nifti_image * nim ) ;
+void         nifti_image_write_bricks(nifti_image * nim,
+                                      const nifti_brick_list * NBL);
+void         nifti_image_infodump( const nifti_image * nim ) ;
+
+void         nifti_disp_lib_hist( void ) ;     /* to display library history */
+void         nifti_disp_lib_version( void ) ;  /* to display library version */
+int          nifti_disp_matrix_orient( const char * mesg, mat44 mat );
+int          nifti_disp_type_list( int which );
+
+
+char *       nifti_image_to_ascii  ( const nifti_image * nim ) ;
+nifti_image *nifti_image_from_ascii( const char * str, int * bytes_read ) ;
+
+size_t       nifti_get_volsize(const nifti_image *nim) ;
+
+/* basic file operations */
+int    nifti_set_filenames(nifti_image * nim, const char * prefix, int check,
+                           int set_byte_order);
+char * nifti_makehdrname  (const char * prefix, int nifti_type, int check,
+                           int comp);
+char * nifti_makeimgname  (const char * prefix, int nifti_type, int check,
+                           int comp);
+int    is_nifti_file      (const char *hname);
+char * nifti_find_file_extension(const char * name);
+int    nifti_is_complete_filename(const char* fname);
+int    nifti_validfilename(const char* fname);
+
+int    disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ;
+void   nifti_set_debug_level( int level ) ;
+void   nifti_set_skip_blank_ext( int skip ) ;
+void   nifti_set_allow_upper_fext( int allow ) ;
+
+int    valid_nifti_brick_list(nifti_image * nim , int nbricks,
+                              const int * blist, int disp_error);
+
+/* znzFile operations */
+znzFile nifti_image_open(const char * hname, const char * opts, nifti_image ** nim);
+znzFile nifti_image_write_hdr_img(nifti_image *nim, int write_data,
+                                  const char* opts);
+znzFile nifti_image_write_hdr_img2( nifti_image *nim , int write_opts ,
+               const char* opts, znzFile imgfile, const nifti_brick_list * NBL);
+size_t  nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot,
+                         nifti_image *nim);
+int     nifti_write_all_data(znzFile fp, nifti_image * nim,
+                             const nifti_brick_list * NBL);
+size_t  nifti_write_buffer(znzFile fp, const void * buffer, size_t numbytes);
+nifti_image *nifti_read_ascii_image(znzFile fp, char *fname, int flen,
+                         int read_data);
+znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
+                         const char * opts, int write_data, int leave_open);
+
+
+void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ;
+
+void nifti_mat44_to_quatern( mat44 R ,
+                             float *qb, float *qc, float *qd,
+                             float *qx, float *qy, float *qz,
+                             float *dx, float *dy, float *dz, float *qfac ) ;
+
+mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
+                              float qx, float qy, float qz,
+                              float dx, float dy, float dz, float qfac );
+
+mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
+                               float r21, float r22, float r23 ,
+                               float r31, float r32, float r33  ) ;
+
+int nifti_short_order(void) ;              /* CPU byte order */
+
+
+/* Orientation codes that might be returned from nifti_mat44_to_orientation().*/
+
+#define NIFTI_L2R  1    /* Left to Right         */
+#define NIFTI_R2L  2    /* Right to Left         */
+#define NIFTI_P2A  3    /* Posterior to Anterior */
+#define NIFTI_A2P  4    /* Anterior to Posterior */
+#define NIFTI_I2S  5    /* Inferior to Superior  */
+#define NIFTI_S2I  6    /* Superior to Inferior  */
+
+void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ;
+
+/*--------------------- Low level IO routines ------------------------------*/
+
+char * nifti_findhdrname (const char* fname);
+char * nifti_findimgname (const char* fname , int nifti_type);
+int    nifti_is_gzfile   (const char* fname);
+
+char * nifti_makebasename(const char* fname);
+
+
+/* other routines */
+struct nifti_1_header   nifti_convert_nim2nhdr(const nifti_image* nim);
+nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype);
+nifti_1_header * nifti_read_header(const char *hname, int *swapped, int check);
+nifti_image    * nifti_copy_nim_info(const nifti_image * src);
+nifti_image    * nifti_make_new_nim(const int dims[], int datatype,
+                                                      int data_fill);
+nifti_image    * nifti_simple_init_nim(void);
+nifti_image    * nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
+                                        const char * fname);
+
+int    nifti_hdr_looks_good        (const nifti_1_header * hdr);
+int    nifti_is_valid_datatype     (int dtype);
+int    nifti_is_valid_ecode        (int ecode);
+int    nifti_nim_is_valid          (nifti_image * nim, int complain);
+int    nifti_nim_has_valid_dims    (nifti_image * nim, int complain);
+int    is_valid_nifti_type         (int nifti_type);
+int    nifti_test_datatype_sizes   (int verb);
+int    nifti_type_and_names_match  (nifti_image * nim, int show_warn);
+int    nifti_update_dims_from_array(nifti_image * nim);
+void   nifti_set_iname_offset      (nifti_image *nim);
+int    nifti_set_type_from_names   (nifti_image * nim);
+int    nifti_add_extension(nifti_image * nim, const char * data, int len,
+                           int ecode );
+int    nifti_compiled_with_zlib    (void);
+int    nifti_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src);
+int    nifti_free_extensions (nifti_image *nim);
+int  * nifti_get_intlist     (int nvals , const char *str);
+char * nifti_strdup          (const char *str);
+int    valid_nifti_extensions(const nifti_image *nim);
+
+
+/*-------------------- Some C convenience macros ----------------------------*/
+
+/* NIfTI-1.1 extension codes:
+   see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */
+
+#define NIFTI_ECODE_IGNORE           0  /* changed from UNKNOWN, 29 June 2005 */
+
+#define NIFTI_ECODE_DICOM            2  /* intended for raw DICOM attributes  */
+
+#define NIFTI_ECODE_AFNI             4  /* Robert W Cox: rwcox@nih.gov
+                                           https://afni.nimh.nih.gov/afni     */
+
+#define NIFTI_ECODE_COMMENT          6  /* plain ASCII text only              */
+
+#define NIFTI_ECODE_XCEDE            8  /* David B Keator: dbkeator@uci.edu
+                                           http://www.nbirn.net/Resources
+                                                /Users/Applications/
+                                                /xcede/index.htm              */
+
+#define NIFTI_ECODE_JIMDIMINFO      10  /* Mark A Horsfield:
+                                           mah5@leicester.ac.uk
+                                           http://someplace/something         */
+
+#define NIFTI_ECODE_WORKFLOW_FWDS   12  /* Kate Fissell: fissell@pitt.edu
+                                           http://kraepelin.wpic.pitt.edu
+                                            /~fissell/NIFTI_ECODE_WORKFLOW_FWDS
+                                            /NIFTI_ECODE_WORKFLOW_FWDS.html   */
+
+#define NIFTI_ECODE_FREESURFER      14  /* http://surfer.nmr.mgh.harvard.edu  */
+
+#define NIFTI_ECODE_PYPICKLE        16  /* embedded Python objects
+                                           http://niftilib.sourceforge.net
+                                                 /pynifti                     */
+
+        /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */
+#define NIFTI_ECODE_MIND_IDENT      18  /* Vishal Patel: vishal.patel@ucla.edu*/
+#define NIFTI_ECODE_B_VALUE         20
+#define NIFTI_ECODE_SPHERICAL_DIRECTION 22
+#define NIFTI_ECODE_DT_COMPONENT    24
+#define NIFTI_ECODE_SHC_DEGREEORDER 26  /* end LONI MiND codes                */
+
+#define NIFTI_ECODE_VOXBO           28  /* Dan Kimberg: www.voxbo.org         */
+
+#define NIFTI_ECODE_CARET           30  /* John Harwell: john@brainvis.wustl.edu
+                                           http://brainvis.wustl.edu/wiki
+                                             /index.php/Caret:Documentation
+                                             :CaretNiftiExtension             */
+
+#define NIFTI_ECODE_CIFTI           32  /* CIFTI-2_Main_FINAL_1March2014.pdf */
+
+#define NIFTI_ECODE_VARIABLE_FRAME_TIMING 34
+
+/* 36 is currently unassigned, waiting on NIFTI_ECODE_AGILENT_PROCPAR */
+
+#define NIFTI_ECODE_EVAL            38  /* Munster University Hospital */
+
+/* http://www.mathworks.com/matlabcentral/fileexchange/42997-dicom-to-nifti-converter */
+#define NIFTI_ECODE_MATLAB          40  /* MATLAB extension */
+
+/* Quantiphyse extension
+   https://quantiphyse.readthedocs.io/en/latest/advanced/nifti_extension.html*/
+#define NIFTI_ECODE_QUANTIPHYSE     42  /* Quantiphyse extension */
+
+/* Magnetic Resonance Spectroscopy (MRS)
+   link to come... */
+#define NIFTI_ECODE_MRS             44  /* MRS extension */
+
+#define NIFTI_MAX_ECODE             44  /******* maximum extension code *******/
+
+/* nifti_type file codes */
+#if RNIFTI_NIFTILIB_VERSION == 1
+#define NIFTI_FTYPE_ANALYZE   0
+#define NIFTI_FTYPE_NIFTI1_1  1
+#define NIFTI_FTYPE_NIFTI1_2  2
+#define NIFTI_FTYPE_ASCII     3
+#define NIFTI_MAX_FTYPE       3    /* this should match the maximum code */
+#endif
+
+/*------------------------------------------------------------------------*/
+/*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */
+/*                                                    Feb 9, 2005 [rickr] */
+#ifdef _NIFTI1_IO_C_
+
+typedef struct {
+    int debug;               /*!< debug level for status reports  */
+    int skip_blank_ext;      /*!< skip extender if no extensions  */
+    int allow_upper_fext;    /*!< allow uppercase file extensions */
+} nifti_global_options;
+
+typedef struct {
+    int    type;           /* should match the NIFTI_TYPE_ #define */
+    int    nbyper;         /* bytes per value, matches nifti_image */
+    int    swapsize;       /* bytes per swap piece, matches nifti_image */
+    char const * const name;           /* text string to match #define */
+} nifti_type_ele;
+
+#undef  LNI_FERR /* local nifti file error, to be compact and repetative */
+#ifdef USING_R
+#define LNI_FERR(func,msg,file)                                      \
+            Rf_warning("%s: %s '%s'\n",func,msg,file)
+#else
+#define LNI_FERR(func,msg,file)                                      \
+            Rc_fprintf_stderr("** ERROR (%s): %s '%s'\n",func,msg,file)
+#endif
+
+#undef  swap_2
+#undef  swap_4
+#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */
+#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */
+
+                        /***** isfinite() is a C99 macro, which is
+                               present in many C implementations already *****/
+
+#undef IS_GOOD_FLOAT
+#undef FIXED_FLOAT
+
+#ifdef isfinite       /* use isfinite() to check floats/doubles for goodness */
+#  define IS_GOOD_FLOAT(x) isfinite(x)       /* check if x is a "good" float */
+#  define FIXED_FLOAT(x)   (isfinite(x) ? (x) : 0)           /* fixed if bad */
+#else
+#  define IS_GOOD_FLOAT(x) 1                               /* don't check it */
+#  define FIXED_FLOAT(x)   (x)                               /* don't fix it */
+#endif
+
+#undef  ASSIF                                 /* assign v to *p, if possible */
+#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v)
+
+#undef  MSB_FIRST
+#undef  LSB_FIRST
+#undef  REVERSE_ORDER
+#define LSB_FIRST 1
+#define MSB_FIRST 2
+#define REVERSE_ORDER(x) (3-(x))    /* convert MSB_FIRST <--> LSB_FIRST */
+
+#define LNI_MAX_NIA_EXT_LEN 100000  /* consider a longer extension invalid */
+
+#endif  /* _NIFTI1_IO_C_ section */
+/*------------------------------------------------------------------------*/
+
+/*=================*/
+#ifdef  __cplusplus
+}
+#endif
+/*=================*/
+
+#endif /* _NIFTI_IO_HEADER_ */
diff --git a/reg-io/niftilib/nifti2.h b/reg-io/niftilib/nifti2.h
new file mode 100644
index 00000000..ab47f3cd
--- /dev/null
+++ b/reg-io/niftilib/nifti2.h
@@ -0,0 +1,117 @@
+/** \file nifti2.h
+    \brief Header structure for NIFTI-2 format.
+ */
+
+#ifndef __NIFTI2_HEADER
+#define __NIFTI2_HEADER
+
+/*---------------------------------------------------------------------------*/
+/* Changes to the header from NIFTI-1 to NIFTI-2 are intended to allow for
+   larger and more accurate fields.  The changes are as follows:
+
+      - short dim[8]         -> int64_t dim[8]
+      - float intent_p1,2,3  -> double intent_p1,2,3    (3 fields)
+      - float pixdim[8]      -> double pixdim[8]
+      - float vox_offset     -> int64_t vox_offset
+      - float scl_slope      -> double scl_slope
+      - float scl_inter      -> double scl_inter
+      - float cal_max        -> double cal_max
+      - float cal_min        -> double cal_min
+      - float slice_duration -> double slice_duration
+      - float toffset        -> double toffset
+      - short slice_start    -> int64_t slice_start
+      - short slice_end      -> int64_t slice_end
+      - char slice_code      -> int32_t slice_code
+      - char xyzt_units      -> int32_t xyzt_units
+      - short intent_code    -> int32_t intent_code
+      - short qform_code     -> int32_t qform_code
+      - short sform_code     -> int32_t sform_code
+      - float quatern_b,c,d  -> double quatern_b,c,d    (3 fields)
+      - float srow_x,y,z[4]  -> double srow_x,y,z[4]    (3 fields)
+      - char magic[4]        -> char magic[8]
+      - char unused_str[15]  -> padding added at the end of the header
+
+      - previously unused fields have been removed:
+           data_type, db_name, extents, session_error, regular, glmax, glmin
+
+      - the field order has been changed, notably with magic after sizeof_hdr
+
+                                                          2 Jan, 2014 [rickr]
+-----------------------------------------------------------------------------*/
+
+#include <stdint.h>
+
+/*=================*/
+#ifdef  __cplusplus
+extern "C" {
+#endif
+/*=================*/
+
+/*! \struct nifti_2_header
+    \brief Data structure defining the fields in the nifti2 header.
+           This binary header should be found at the beginning of a valid
+           NIFTI-2 header file.
+ */
+
+/* hopefully cross-platform solution to byte padding added by some compilers */
+#pragma pack(push)
+#pragma pack(1)
+
+                           /*****************************/ /***********************/ /************/
+struct nifti_2_header {    /* NIFTI-2 usage             */ /* NIFTI-1 usage       */ /*  offset  */
+                           /*****************************/ /***********************/ /************/
+   int32_t sizeof_hdr;     /*!< MUST be 540             */ /* MUST be 348         */ /*   0 */
+   char    magic[8];       /*!< MUST be valid signature */ /* char magic[4]       */ /*   4 */
+   int16_t datatype;       /*!< Defines data type!      */ /* short datatype      */ /*  12 */
+   int16_t bitpix;         /*!< Number bits/voxel       */ /* short bitpix        */ /*  14 */
+   int64_t dim[8];         /*!< Data array dimensions   */ /* short dim[8]        */ /*  16 */
+   double  intent_p1;      /*!< 1st intent parameter    */ /* float intent_p1     */ /*  80 */
+   double  intent_p2;      /*!< 2nd intent parameter    */ /* float intent_p2     */ /*  88 */
+   double  intent_p3;      /*!< 3rd intent parameter    */ /* float intent_p3     */ /*  96 */
+   double  pixdim[8];      /*!< Grid spacings           */ /* float pixdim[8]     */ /* 104 */
+   int64_t vox_offset;     /*!< Offset into .nii file   */ /* float vox_offset    */ /* 168 */
+   double  scl_slope;      /*!< Data scaling: slope     */ /* float scl_slope     */ /* 176 */
+   double  scl_inter;      /*!< Data scaling: offset    */ /* float scl_inter     */ /* 184 */
+   double  cal_max;        /*!< Max display intensity   */ /* float cal_max       */ /* 192 */
+   double  cal_min;        /*!< Min display intensity   */ /* float cal_min       */ /* 200 */
+   double  slice_duration; /*!< Time for 1 slice        */ /* float slice_duration*/ /* 208 */
+   double  toffset;        /*!< Time axis shift         */ /* float toffset       */ /* 216 */
+   int64_t slice_start;    /*!< First slice index       */ /* short slice_start   */ /* 224 */
+   int64_t slice_end;      /*!< Last slice index        */ /* short slice_end     */ /* 232 */
+   char    descrip[80];    /*!< any text you like       */ /* char descrip[80]    */ /* 240 */
+   char    aux_file[24];   /*!< auxiliary filename      */ /* char aux_file[24]   */ /* 320 */
+   int32_t qform_code;     /*!< NIFTI_XFORM_* code      */ /* short qform_code    */ /* 344 */
+   int32_t sform_code;     /*!< NIFTI_XFORM_* code      */ /* short sform_code    */ /* 348 */
+   double  quatern_b;      /*!< Quaternion b param      */ /* float quatern_b     */ /* 352 */
+   double  quatern_c;      /*!< Quaternion c param      */ /* float quatern_c     */ /* 360 */
+   double  quatern_d;      /*!< Quaternion d param      */ /* float quatern_d     */ /* 368 */
+   double  qoffset_x;      /*!< Quaternion x shift      */ /* float qoffset_x     */ /* 376 */
+   double  qoffset_y;      /*!< Quaternion y shift      */ /* float qoffset_y     */ /* 384 */
+   double  qoffset_z;      /*!< Quaternion z shift      */ /* float qoffset_z     */ /* 392 */
+   double  srow_x[4];      /*!< 1st row affine transform*/ /* float srow_x[4]     */ /* 400 */
+   double  srow_y[4];      /*!< 2nd row affine transform*/ /* float srow_y[4]     */ /* 432 */
+   double  srow_z[4];      /*!< 3rd row affine transform*/ /* float srow_z[4]     */ /* 464 */
+   int32_t slice_code;     /*!< Slice timing order      */ /* char slice_code     */ /* 496 */
+   int32_t xyzt_units;     /*!< Units of pixdim[1..4]   */ /* char xyzt_units     */ /* 500 */
+   int32_t intent_code;    /*!< NIFTI_INTENT_* code     */ /* short intent_code   */ /* 504 */
+   char    intent_name[16];/*!< name or meaning of data */ /* char intent_name[16]*/ /* 508 */
+   char    dim_info;       /*!< MRI slice ordering      */ /* char dim_info       */ /* 524 */
+   char    unused_str[15]; /*!< unused, filled with \0  */                           /* 525 */
+};                                                                 /****** total bytes: 540 */
+typedef struct nifti_2_header nifti_2_header;
+
+/* restore packing behavior */
+#pragma pack(pop)
+
+/* base swap test on the suggested version check, rather than dim[0]
+   swap4(348)==1543569408, swap4(540)==469893120 */
+#define NIFTI2_NEEDS_SWAP(h) \
+   ((h).sizeof_hdr == 1543569408 || (h).sizeof_hdr == 469893120)
+
+/*=================*/
+#ifdef  __cplusplus
+}
+#endif
+/*=================*/
+
+#endif /* __NIFTI2_HEADER */
diff --git a/reg-io/niftilib/nifti2_image.h b/reg-io/niftilib/nifti2_image.h
new file mode 100644
index 00000000..6e21b3c1
--- /dev/null
+++ b/reg-io/niftilib/nifti2_image.h
@@ -0,0 +1,106 @@
+#ifndef _NIFTI2_IMAGE_H_
+#define _NIFTI2_IMAGE_H_
+
+#include <inttypes.h>
+
+// This is repetitious and inelegant, but a definition for nifti2_image is needed to allow
+// conversion to/from nifti1_image. This is a straight copy of the relevant parts of nifti2_io.h.
+#if RNIFTI_NIFTILIB_VERSION == 1
+
+typedef struct {                   /** 4x4 matrix struct (double) **/
+  double m[4][4] ;
+} nifti_dmat44 ;
+
+
+typedef struct {                /*!< Image storage struct **/
+
+  int64_t ndim ;                /*!< last dimension greater than 1 (1..7) */
+  int64_t nx ;                  /*!< dimensions of grid array             */
+  int64_t ny ;                  /*!< dimensions of grid array             */
+  int64_t nz ;                  /*!< dimensions of grid array             */
+  int64_t nt ;                  /*!< dimensions of grid array             */
+  int64_t nu ;                  /*!< dimensions of grid array             */
+  int64_t nv ;                  /*!< dimensions of grid array             */
+  int64_t nw ;                  /*!< dimensions of grid array             */
+  int64_t dim[8] ;              /*!< dim[0]=ndim, dim[1]=nx, etc.         */
+  int64_t nvox ;                /*!< number of voxels = nx*ny*nz*...*nw   */
+  int nbyper ;                  /*!< bytes per voxel, matches datatype    */
+  int datatype ;                /*!< type of data in voxels: DT_* code    */
+
+  double dx ;                   /*!< grid spacings      */
+  double dy ;                   /*!< grid spacings      */
+  double dz ;                   /*!< grid spacings      */
+  double dt ;                   /*!< grid spacings      */
+  double du ;                   /*!< grid spacings      */
+  double dv ;                   /*!< grid spacings      */
+  double dw ;                   /*!< grid spacings      */
+  double pixdim[8] ;            /*!< pixdim[1]=dx, etc. */
+
+  double scl_slope ;            /*!< scaling parameter - slope        */
+  double scl_inter ;            /*!< scaling parameter - intercept    */
+
+  double cal_min ;              /*!< calibration parameter, minimum   */
+  double cal_max ;              /*!< calibration parameter, maximum   */
+
+  int qform_code ;              /*!< codes for (x,y,z) space meaning  */
+  int sform_code ;              /*!< codes for (x,y,z) space meaning  */
+
+  int freq_dim  ;               /*!< indexes (1,2,3, or 0) for MRI    */
+  int phase_dim ;               /*!< directions in dim[]/pixdim[]     */
+  int slice_dim ;               /*!< directions in dim[]/pixdim[]     */
+
+  int     slice_code  ;         /*!< code for slice timing pattern    */
+  int64_t slice_start ;         /*!< index for start of slices        */
+  int64_t slice_end   ;         /*!< index for end of slices          */
+  double  slice_duration ;      /*!< time between individual slices   */
+
+  /*! quaternion transform parameters
+    [when writing a dataset, these are used for qform, NOT qto_xyz]   */
+  double quatern_b , quatern_c , quatern_d ,
+         qoffset_x , qoffset_y , qoffset_z ,
+         qfac      ;
+
+  nifti_dmat44 qto_xyz ;        /*!< qform: transform (i,j,k) to (x,y,z) */
+  nifti_dmat44 qto_ijk ;        /*!< qform: transform (x,y,z) to (i,j,k) */
+
+  nifti_dmat44 sto_xyz ;        /*!< sform: transform (i,j,k) to (x,y,z) */
+  nifti_dmat44 sto_ijk ;        /*!< sform: transform (x,y,z) to (i,j,k) */
+
+  double toffset ;              /*!< time coordinate offset */
+
+  int xyz_units  ;              /*!< dx,dy,dz units: NIFTI_UNITS_* code  */
+  int time_units ;              /*!< dt       units: NIFTI_UNITS_* code  */
+
+  int nifti_type ;              /*!< see NIFTI_FTYPE_* codes, below:
+                                        0==ANALYZE,
+                                        1==NIFTI-1     (1 file),
+                                        2==NIFTI-1     (2 files),
+                                        3==NIFTI-ASCII (1 file)
+                                        4==NIFTI-2     (1 file),
+                                        5==NIFTI-2     (2 files) */
+
+  int    intent_code ;          /*!< statistic type (or something)       */
+  double intent_p1 ;            /*!< intent parameters                   */
+  double intent_p2 ;            /*!< intent parameters                   */
+  double intent_p3 ;            /*!< intent parameters                   */
+  char   intent_name[16] ;      /*!< optional description of intent data */
+
+  char descrip[80]  ;           /*!< optional text to describe dataset   */
+  char aux_file[24] ;           /*!< auxiliary filename                  */
+
+  char *fname ;                 /*!< header filename (.hdr or .nii)         */
+  char *iname ;                 /*!< image filename  (.img or .nii)         */
+  int64_t iname_offset ;        /*!< offset into iname where data starts    */
+  int   swapsize ;              /*!< swap unit in image data (might be 0)   */
+  int   byteorder ;             /*!< byte order on disk (MSB_ or LSB_FIRST) */
+  void *data ;                  /*!< pointer to data: nbyper*nvox bytes     */
+
+  int                num_ext ;  /*!< number of extensions in ext_list       */
+  nifti1_extension * ext_list ; /*!< array of extension structs (with data) */
+  analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */
+
+} nifti2_image ;
+
+#endif // RNIFTI_NIFTILIB_VERSION
+
+#endif
diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c
new file mode 100644
index 00000000..da972895
--- /dev/null
+++ b/reg-io/niftilib/nifti2_io.c
@@ -0,0 +1,9703 @@
+#define _NIFTI2_IO_C_
+
+#include "niftilib/nifti2_io.h"   /* typedefs, prototypes, macros, etc. */
+
+/*****===================================================================*****/
+/*****     Sample functions to deal with NIFTI-1,2 and ANALYZE files     *****/
+/*****...................................................................*****/
+/*****            This code is released to the public domain.            *****/
+/*****...................................................................*****/
+/*****  Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH          *****/
+/*****  Date:   August 2003                                              *****/
+/*****...................................................................*****/
+/*****  Neither the National Institutes of Health (NIH), nor any of its  *****/
+/*****  employees imply any warranty of usefulness of this software for  *****/
+/*****  any purpose, and do not assume any liability for damages,        *****/
+/*****  incidental or otherwise, caused by any use of this document.     *****/
+/*****===================================================================*****/
+
+/** \file nifti1_io.c
+    \brief main collection of nifti1 i/o routines
+           - written by Bob Cox, SSCC NIMH
+           - revised by Mark Jenkinson, FMRIB
+           - revised by Rick Reynolds, SSCC, NIMH
+           - revised by Kate Fissell, University of Pittsburgh
+
+        The library history can be viewed via "nifti_tool -nifti_hist".
+    <br>The library version can be viewed via "nifti_tool -nifti_ver".
+ */
+
+/*! global history and version strings, for printing */
+static char const * const gni1_history[] =
+{
+  "----------------------------------------------------------------------\n"
+  "history (of nifti-1 library changes):\n"
+  "\n",
+  "0.0  August, 2003 [rwcox]\n"
+  "     (Robert W Cox of the National Institutes of Health, SSCC/DIRP/NIMH)\n"
+  "   - initial version\n"
+  "\n",
+  "0.1  July/August, 2004 [Mark Jenkinson]\n"
+  "     (FMRIB Centre, University of Oxford, UK)\n"
+  "   - Mainly adding low-level IO and changing things to allow gzipped\n"
+  "     files to be read and written\n"
+  "   - Full backwards compatability should have been maintained\n"
+  "\n",
+  "0.2  16 Nov 2004 [rickr]\n"
+  "     (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n"
+  "   - included Mark's changes in the AFNI distribution (including znzlib/)\n"
+  "     (HAVE_ZLIB is commented out for the standard distribution)\n"
+  "   - modified nifti_validfilename() and nifti_makebasename()\n"
+  "   - added nifti_find_file_extension()\n"
+  "\n",
+  "0.3  3 Dec 2004 [rickr]\n"
+  "   - note: header extensions are not yet checked for\n"
+  "   - added formatted history as global string, for printing\n"
+  "   - added nifti_disp_lib_hist(), to display the nifti library history\n"
+  "   - added nifti_disp_lib_version(), to display the nifti library history\n",
+  "   - re-wrote nifti_findhdrname()\n"
+  "       o used nifti_find_file_extension()\n"
+  "       o changed order of file tests (default is .nii, depends on input)\n"
+  "       o free hdrname on failure\n"
+  "   - made similar changes to nifti_findimgname()\n"
+  "   - check for NULL return from nifti_findhdrname() calls\n",
+  "   - removed most of ERREX() macros\n"
+  "   - modified nifti_image_read()\n"
+  "       o added debug info and error checking (on gni_debug > 0, only)\n"
+  "       o fail if workingname is NULL\n"
+  "       o check for failure to open header file\n"
+  "       o free workingname on failure\n"
+  "       o check for failure of nifti_image_load()\n"
+  "       o check for failure of nifti_convert_nhdr2nim()\n",
+  "   - changed nifti_image_load() to int, and check nifti_read_buffer return\n"
+  "   - changed nifti_read_buffer() to fail on short read, and to count float\n"
+  "     fixes (to print on debug)\n"
+  "   - changed nifti_image_infodump to print to stderr\n"
+  "   - updated function header comments, or moved comments above header\n"
+  "   - removed const keyword\n"
+  "   - added LNI_FERR() macro for error reporting on input files\n"
+  "\n",
+  "0.4  10 Dec 2004 [rickr]  - added header extensions\n"
+  "   - in nifti1_io.h:\n"
+  "       o added num_ext and ext_list to the definition of nifti_image\n"
+  "       o made many functions static (more to follow)\n"
+  "       o added LNI_MAX_NIA_EXT_LEN, for max nifti_type 3 extension length\n",
+  "   - added __DATE__ to version output in nifti_disp_lib_version()\n"
+  "   - added nifti_disp_matrix_orient() to print orientation information\n"
+  "   - added '.nia' as a valid file extension in nifti_find_file_extension()\n"
+  "   - added much more debug output\n"
+  "   - in nifti_image_read(), in the case of an ASCII header, check for\n"
+  "     extensions after the end of the header\n",
+  "   - added nifti_read_extensions() function\n"
+  "   - added nifti_read_next_extension() function\n"
+  "   - added nifti_add_exten_to_list() function\n"
+  "   - added nifti_check_extension() function\n"
+  "   - added nifti_write_extensions() function\n"
+  "   - added nifti_extension_size() function\n"
+  "   - in nifti_set_iname_offest():\n"
+  "       o adjust offset by the extension size and the extender size\n",
+  "       o fixed the 'ceiling modulo 16' computation\n"
+  "   - in nifti_image_write_hdr_img2(): \n"
+  "       o added extension writing\n"
+  "       o check for NULL return from nifti_findimgname()\n"
+  "   - include number of extensions in nifti_image_to_ascii() output\n"
+  "   - in nifti_image_from_ascii():\n"
+  "       o return bytes_read as a parameter, computed from the final spos\n"
+  "       o extract num_ext from ASCII header\n"
+  "\n",
+  "0.5  14 Dec 2004 [rickr]  - added sub-brick reading functions\n"
+  "   - added nifti_brick_list type to nifti1_io.h, along with new prototypes\n"
+  "   - added main nifti_image_read_bricks() function, with description\n"
+  "   - added nifti_image_load_bricks() - library function (requires nim)\n"
+  "   - added valid_nifti_brick_list() - library function\n"
+  "   - added free_NBL() - library function\n",
+  "   - added update_nifti_image_for_brick_list() for dimension update\n"
+  "   - added nifti_load_NBL_bricks(), nifti_alloc_NBL_mem(),\n"
+  "           nifti_copynsort() and force_positive() (static functions)\n"
+  "   - in nifti_image_read(), check for failed load only if read_data is set\n"
+  "   - broke most of nifti_image_load() into nifti_image_load_prep()\n"
+  "\n",
+  "0.6  15 Dec 2004 [rickr]  - added sub-brick writing functionality\n"
+  "   - in nifti1_io.h, removed znzlib directory from include - all nifti\n"
+  "       library files are now under the nifti directory\n"
+  "   - nifti_read_extensions(): print no offset warning for nifti_type 3\n"
+  "   - nifti_write_all_data():\n"
+  "       o pass nifti_brick_list * NBL, for optional writing\n"
+  "       o if NBL, write each sub-brick, sequentially\n",
+  "   - nifti_set_iname_offset(): case 1 must have sizeof() cast to int\n"
+  "   - pass NBL to nifti_image_write_hdr_img2(), and allow NBL or data\n"
+  "   - added nifti_image_write_bricks() wrapper for ...write_hdr_img2()\n"
+  "   - included compression abilities\n"
+  "\n",
+  "0.7  16 Dec 2004 [rickr] - minor changes to extension reading\n"
+  "\n",
+  "0.8  21 Dec 2004 [rickr] - restrict extension reading, and minor changes\n"
+  "   - in nifti_image_read(), compute bytes for extensions (see remaining)\n"
+  "   - in nifti_read_extensions(), pass 'remain' as space for extensions,\n"
+  "        pass it to nifti_read_next_ext(), and update for each one read \n"
+  "   - in nifti_check_extension(), require (size <= remain)\n",
+  "   - in update_nifti_image_brick_list(), update nvox\n"
+  "   - in nifti_image_load_bricks(), make explicit check for nbricks <= 0\n"
+  "   - in int_force_positive(), check for (!list)\n"
+  "   - in swap_nifti_header(), swap sizeof_hdr, and reorder to struct order\n"
+  "   - change get_filesize functions to signed ( < 0 is no file or error )\n",
+  "   - in nifti_validfilename(), lose redundant (len < 0) check\n"
+  "   - make print_hex_vals() static\n"
+  "   - in disp_nifti_1_header, restrict string field widths\n"
+  "\n",
+  "0.9  23 Dec 2004 [rickr] - minor changes\n"
+  "   - broke ASCII header reading out of nifti_image_read(), into new\n"
+  "        functions has_ascii_header() and read_ascii_image()\n",
+  "   - check image_read failure and znzseek failure\n"
+  "   - altered some debug output\n"
+  "   - nifti_write_all_data() now returns an int\n"
+  "\n",
+  "0.10 29 Dec 2004 [rickr]\n"
+  "   - renamed nifti_valid_extension() to nifti_check_extension()\n"
+  "   - added functions nifti_makehdrname() and nifti_makeimgname()\n"
+  "   - added function valid_nifti_extensions()\n"
+  "   - in nifti_write_extensions(), check for validity before writing\n",
+  "   - rewrote nifti_image_write_hdr_img2():\n"
+  "       o set write_data and leave_open flags from write_opts\n"
+  "       o add debug print statements\n"
+  "       o use nifti_write_ascii_image() for the ascii case\n"
+  "       o rewrote the logic of all cases to be easier to follow\n",
+  "   - broke out code as nifti_write_ascii_image() function\n"
+  "   - added debug to top-level write functions, and free the znzFile\n"
+  "   - removed unused internal function nifti_image_open()\n"
+  "\n",
+  "0.11 30 Dec 2004 [rickr] - small mods\n"
+  "   - moved static function prototypes from header to C file\n"
+  "   - free extensions in nifti_image_free()\n"
+  "\n",
+  "1.0  07 Jan 2005 [rickr] - INITIAL RELEASE VERSION\n"
+  "   - added function nifti_set_filenames()\n"
+  "   - added function nifti_read_header()\n"
+  "   - added static function nhdr_looks_good()\n"
+  "   - added static function need_nhdr_swap()\n"
+  "   - exported nifti_add_exten_to_list symbol\n",
+  "   - fixed #bytes written in nifti_write_extensions()\n"
+  "   - only modify offset if it is too small (nifti_set_iname_offset)\n"
+  "   - added nifti_type 3 to nifti_makehdrname and nifti_makeimgname\n"
+  "   - added function nifti_set_filenames()\n"
+  "\n",
+  "1.1  07 Jan 2005 [rickr]\n"
+  "   - in nifti_read_header(), swap if needed\n"
+  "\n",
+  "1.2  07 Feb 2005 [kate fissell c/o rickr] \n"
+  "   - nifti1.h: added doxygen comments for main struct and #define groups\n"
+  "   - nifti1_io.h: added doxygen comments for file and nifti_image struct\n"
+  "   - nifti1_io.h: added doxygen comments for file and some functions\n"
+  "   - nifti1_io.c: changed nifti_copy_nim_info to use memcpy\n"
+  "\n",
+  "1.3  09 Feb 2005 [rickr]\n"
+  "   - nifti1.h: added doxygen comments for extension structs\n"
+  "   - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n"
+  "   - added a doxygen-style description to every exported function\n"
+  "   - added doxygen-style comments within some functions\n"
+  "   - re-exported many znzFile functions that I had made static\n"
+  "   - re-added nifti_image_open (sorry, Mark)\n"
+  "   - every exported function now has 'nifti' in the name (19 functions)\n",
+  "   - made sure every alloc() has a failure test\n"
+  "   - added nifti_copy_extensions function, for use in nifti_copy_nim_info\n"
+  "   - nifti_is_gzfile: added initial strlen test\n"
+  "   - nifti_set_filenames: added set_byte_order parameter option\n"
+  "     (it seems appropriate to set the BO when new files are associated)\n"
+  "   - disp_nifti_1_header: prints to stdout (a.o.t. stderr), with fflush\n"
+  "\n",
+  "1.4  23 Feb 2005 [rickr] - sourceforge merge\n"
+  "   - merged into the nifti_io CVS directory structure at sourceforge.net\n"
+  "   - merged in 4 changes by Mark, and re-added his const keywords\n"
+  "   - cast some pointers to (void *) for -pedantic compile option\n"
+  "   - added nifti_free_extensions()\n"
+  "\n",
+  "1.5  02 Mar 2005 [rickr] - started nifti global options\n"
+  "   - gni_debug is now g_opts.debug\n"
+  "   - added validity check parameter to nifti_read_header\n"
+  "   - need_nhdr_swap no longer does test swaps on the stack\n"
+  "\n",
+  "1.6  05 April 2005 [rickr] - validation and collapsed_image_read\n"
+  "   - added nifti_read_collapsed_image(), an interface for reading partial\n"
+  "     datasets, specifying a subset of array indices\n"
+  "   - for read_collapsed_image, added static functions: rci_read_data(),\n"
+  "     rci_alloc_mem(), and make_pivot_list()\n",
+  "   - added nifti_nim_is_valid() to check for consistency (more to do)\n"
+  "   - added nifti_nim_has_valid_dims() to do many dimensions tests\n"
+  "\n",
+  "1.7  08 April 2005 [rickr]\n"
+  "   - added nifti_update_dims_from_array() - to update dimensions\n"
+  "   - modified nifti_makehdrname() and nifti_makeimgname():\n"
+  "       if prefix has a valid extension, use it (else make one up)\n"
+  "   - added nifti_get_intlist - for making an array of ints\n"
+  "   - fixed init of NBL->bsize in nifti_alloc_NBL_mem()  {thanks, Bob}\n"
+  "\n",
+  "1.8  14 April 2005 [rickr]\n"
+  "   - added nifti_set_type_from_names(), for nifti_set_filenames()\n"
+  "     (only updates type if number of files does not match it)\n"
+  "   - added is_valid_nifti_type(), just to be sure\n"
+  "   - updated description of nifti_read_collapsed_image() for *data change\n"
+  "     (if *data is already set, assume memory exists for results)\n"
+  "   - modified rci_alloc_mem() to allocate only if *data is NULL\n"
+  "\n",
+  "1.9  19 April 2005 [rickr]\n"
+  "   - added extension codes NIFTI_ECODE_COMMENT and NIFTI_ECODE_XCEDE\n"
+  "   - added nifti_type codes NIFTI_MAX_ECODE and NIFTI_MAX_FTYPE\n"
+  "   - added nifti_add_extension() {exported}\n"
+  "   - added nifti_fill_extension() as a static function\n"
+  "   - added nifti_is_valid_ecode() {exported}\n",
+  "   - nifti_type values are now NIFTI_FTYPE_* file codes\n"
+  "   - in nifti_read_extensions(), decrement 'remain' by extender size, 4\n"
+  "   - in nifti_set_iname_offset(), case 1, update if offset differs\n"
+  "   - only output '-d writing nifti file' if debug > 1\n"
+  "\n",
+  "1.10 10 May 2005 [rickr]\n"
+  "   - files are read using ZLIB only if they end in '.gz'\n"
+  "\n",
+  "1.11 12 August 2005 [kate fissell]\n"
+  "   - Kate's 0.2 release packaging, for sourceforge\n"
+  "\n",
+  "1.12 17 August 2005 [rickr] - comment (doxygen) updates\n"
+  "   - updated comments for most functions (2 updates from Cinly Ooi)\n"
+  "   - added nifti_type_and_names_match()\n"
+  "\n",
+  "1.12a 24 August 2005 [rickr] - remove all tabs from Clibs/*/*.[ch]\n",
+  "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n",
+  "1.13  25 August 2005 [rickr]\n",
+  "   - finished changes by Hans for Insight\n"
+  "   - added const in all appropraite parameter locations (30-40)\n"
+  "     (any pointer referencing data that will not change)\n"
+  "   - shortened all string constants below 509 character limit\n"
+  "1.14  28 October 2005 [HJohnson]\n",
+  "   - use nifti_set_filenames() in nifti_convert_nhdr2nim()\n"
+  "1.15  02 November 2005 [rickr]\n",
+  "   - added skip_blank_ext to nifti_global_options\n"
+  "   - added nifti_set_skip_blank_ext(), to set option\n"
+  "   - if skip_blank_ext and no extensions, do not read/write extender\n"
+  "1.16 18 November 2005 [rickr]\n",
+  "   - removed any test or access of dim[i], i>dim[0]\n"
+  "   - do not set pixdim for collapsed dims to 1.0, leave them as they are\n"
+  "   - added magic and dim[i] tests in nifti_hdr_looks_good()\n"
+  "   - added 2 size_t casts\n"
+  "1.17 22 November 2005 [rickr]\n",
+  "   - in hdr->nim, for i > dim[0], pass 0 or 1, else set to 1\n"
+  "1.18 02 March 2006 [rickr]\n",
+  "   - in nifti_alloc_NBL_mem(), fixed nt=0 case from 1.17 change\n"
+  "1.19 23 May 2006 [HJohnson,rickr]\n",
+  "   - nifti_write_ascii_image(): free(hstr)\n"
+  "   - nifti_copy_extensions(): clear num_ext and ext_list\n"
+  "1.20 27 Jun 2006 [rickr]\n",
+  "   - nifti_findhdrname(): fixed assign of efirst to match stated logic\n"
+  "     (problem found by Atle Bjørnerud)\n"
+  "1.21 05 Sep 2006 [rickr] update for nifticlib-0.4 release\n",
+  "   - was reminded to actually add nifti_set_skip_blank_ext()\n"
+  "   - init g_opts.skip_blank_ext to 0\n"
+  "1.22 01 Jun 2007 nifticlib-0.5 release\n",
+  "1.23 05 Jun 2007 nifti_add_exten_to_list: revert on failure, free old list\n"
+  "1.24 07 Jun 2007 nifti_copy_extensions: use esize-8 for data size\n"
+  "1.25 12 Jun 2007 [rickr] EMPTY_IMAGE creation\n",
+  "   - added nifti_make_new_header() - to create from dims/dtype\n"
+  "   - added nifti_make_new_nim() - to create from dims/dtype/fill\n"
+  "   - added nifti_is_valid_datatype(), and more debug info\n",
+  "1.26 27 Jul 2007 [rickr] handle single volumes > 2^31 bytes (but < 2^32)\n",
+  "1.27 28 Jul 2007 [rickr] nim->nvox, NBL-bsize are now type size_t\n"
+  "1.28 30 Jul 2007 [rickr] size_t updates\n",
+  "1.29 08 Aug 2007 [rickr] for list, valid_nifti_brick_list requires 3 dims\n"
+  "1.30 08 Nov 2007 [Yaroslav/rickr]\n"
+  "   - fix ARM struct alignment problem in byte-swapping routines\n",
+  "1.31 29 Nov 2007 [rickr] for nifticlib-1.0.0\n"
+  "   - added nifti_datatype_to/from_string routines\n"
+  "   - added DT_RGBA32/NIFTI_TYPE_RGBA32 datatype macros (2304)\n"
+  "   - added NIFTI_ECODE_FREESURFER (14)\n",
+  "1.32 08 Dec 2007 [rickr]\n"
+  "   - nifti_hdr_looks_good() allows ANALYZE headers (req. by V. Luccio)\n"
+  "   - added nifti_datatype_is_valid()\n",
+  "1.33 05 Feb 2008 [hansj,rickr] - block nia.gz use\n"
+  "1.34 13 Jun 2008 [rickr] - added nifti_compiled_with_zlib()\n"
+  "1.35 03 Aug 2008 [rickr]\n",
+  "   - deal with swapping, so that CPU type does not affect output\n"
+  "     (motivated by C Burns)\n"
+  "   - added nifti_analyze75 structure and nifti_swap_as_analyze()\n"
+  "   - previous swap_nifti_header is saved as old_swap_nifti_header\n"
+  "   - also swap UNUSED fields in nifti_1_header struct\n",
+  "1.36 07 Oct 2008 [rickr]\n",
+  "   - added nifti_NBL_matches_nim() check for write_bricks()\n"
+  "1.37 10 Mar 2009 [rickr]\n",
+  "   - H Johnson cast updates (06 Feb)\n"
+  "   - added NIFTI_ECODE_PYPICKLE for PyNIfTI (06 Feb)\n"
+  "   - added NIFTI_ECODEs 18-28 for the LONI MiND group\n"
+  "1.38 28 Apr 2009 [rickr]\n",
+  "   - uppercase extensions are now valid (requested by M. Coursolle)\n"
+  "   - nifti_set_allow_upper_fext controls this option (req by C. Ooi)\n"
+  "1.39 23 Jun 2009 [rickr]: added 4 checks of alloc() returns\n",
+  "1.40 16 Mar 2010 [rickr]: added NIFTI_ECODE_VOXBO for D. Kimberg\n",
+  "1.41 28 Apr 2010 [rickr]: added NIFTI_ECODE_CARET for J. Harwell\n",
+  "1.42 06 Jul 2010 [rickr]: trouble with large (gz) files\n",
+  "   - noted/investigated by M Hanke and Y Halchenko\n"
+  "   - fixed znzread/write, noting example by M Adler\n"
+  "   - changed nifti_swap_* routines/calls to take size_t (6)\n"
+  "1.43 07 Jul 2010 [rickr]: fixed znzR/W to again return nmembers\n",
+  "1.44 19 Jul 2013 [rickr]: ITK compatibility updates from H Johnson\n",
+  "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n",
+  "1.46 26 Sep 2019 [rickr]:\n"
+  "   - nifti_read_ascii_image no longer closes fp or free's fname\n"
+  "----------------------------------------------------------------------\n"
+};
+
+/* rcr - todo
+
+   - nifti_tool -copy_sform SFORM_DSET.nii -infile ORIG.nii -prefix PP
+                -copy_orient SFORM_DSET.nii -infile ORIG.nii -prefix PP
+
+   - check converting nim 2 n2hdr
+   - update for n2 (and/or split from n1)
+      - is_nifti_file (maybe use nifti_header_version), nifti_hdr_looks_good
+   - extensions
+   - nifti_make_new_n1_header: check that dims are small enough (<2^15)
+   - nifti_convert_nim2nhdr: rename to nim2n1hdr and write nim2n2hdr
+     (maybe have nifti_convert_nim2nhdr wrap current version)
+   - nifti_set_iname_offset: n2 update via nifti_type
+   - track use of nifti_type
+   - nifti_image_write_hdr_img2: write nifti_2_header
+ */
+
+static char const * const gni2_history[] =
+{
+  "----------------------------------------------------------------------\n"
+  "history (of nifti-2 library changes):\n"
+  "\n",
+  "2.00 02 Jan, 2014 [rickr]\n"
+  "     Richard Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH\n"
+  "   - initial version - change types to 64-bit based on new nifti_image\n",
+  "2.01 04 Apr, 2014 [rickr]\n"
+  "   - added functionality for both nifti-1 and -2 headers\n"
+  "     (read/display/swap/convert2nim/make_new_n?_hdr)\n"
+  "   - still needs much nifti-2 functionality\n",
+  "2.02 11 May, 2015 [rickr]\n"
+  "   - added to repository 28 Apr, 2015\n"
+  "   - nifti_read_header() now returns found header struct\n"
+  "2.03 23 Jul, 2015 [rickr]\n"
+  "   - possibly alter dimensions on CIFTI read\n"
+  "   - return N-1 headers in unknown version cases\n",
+  "2.04 05 Aug, 2015 [rickr]\n"
+  "   - have writing try NIFTI-2 if NIFTI-1 seems insufficient\n"
+  "2.05 15 Apr, 2016 [rickr]\n"
+  "   - print int64_t using PRId64 macro, (ugly, but no warnings)\n"
+  "2.06 01 Oct, 2018 [rickr]\n"
+  "   - errors should all mention NIFTI, slight additional clarity\n"
+  "2.07 18 Dec, 2018 [hmjohnson]\n",
+  "   - added some const qualifiers\n"
+  "   - removed register keywords\n"
+  "   - fixed potential memory leaks in error conditions\n"
+  "   - appeased compilers\n"
+  "   - duped nifti1.h under nifti2, so directories do not cross reference\n"
+  "2.08 02 Jan, 2019 [rickr]\n"
+  "   - fixed CIFTI extension reading if not first\n"
+  "   - re-allow reading of ASCII headers (not part of standard)\n"
+  "   - nifti_set_iname_offset() now takes nifti_ver, to adjust for size\n",
+  "2.09 10 May, 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n"
+  "2.10 26 Sep, 2019 [rickr]: nifti_read_ascii_image no longer closes fp\n",
+  "2.11  3 Oct, 2019 [rickr]: added nifti_[d]mat33_mul\n",
+  "----------------------------------------------------------------------\n"
+};
+
+static const char gni_version[]
+        = "nifti-2 library version 2.11 (3 Oct, 2019)";
+
+/*! global nifti options structure - init with defaults */
+/*  see 'option accessor functions'                     */
+static nifti_global_options g_opts = {
+        1, /* debug level                                         */
+        0, /* skip_blank_ext    - skip extender if no extensions  */
+        1, /* allow_upper_fext  - allow uppercase file extensions */
+        0, /* alter_cifti       - alter CIFTI dims to use nx,t,u,v*/
+};
+
+char nifti1_magic[4] = { 'n', '+', '1', '\0' };
+char nifti2_magic[8] = { 'n', '+', '2', '\0', '\r', '\n', '\032', '\n' };
+
+/*! global nifti types structure list (per type, ordered oldest to newest) */
+static const nifti_type_ele nifti_type_list[] = {
+    /* type  nbyper  swapsize   name  */
+    {    0,     0,       0,   "DT_UNKNOWN"              },
+    {    0,     0,       0,   "DT_NONE"                 },
+    {    1,     0,       0,   "DT_BINARY"               },  /* not usable */
+    {    2,     1,       0,   "DT_UNSIGNED_CHAR"        },
+    {    2,     1,       0,   "DT_UINT8"                },
+    {    2,     1,       0,   "NIFTI_TYPE_UINT8"        },
+    {    4,     2,       2,   "DT_SIGNED_SHORT"         },
+    {    4,     2,       2,   "DT_INT16"                },
+    {    4,     2,       2,   "NIFTI_TYPE_INT16"        },
+    {    8,     4,       4,   "DT_SIGNED_INT"           },
+    {    8,     4,       4,   "DT_INT32"                },
+    {    8,     4,       4,   "NIFTI_TYPE_INT32"        },
+    {   16,     4,       4,   "DT_FLOAT"                },
+    {   16,     4,       4,   "DT_FLOAT32"              },
+    {   16,     4,       4,   "NIFTI_TYPE_FLOAT32"      },
+    {   32,     8,       4,   "DT_COMPLEX"              },
+    {   32,     8,       4,   "DT_COMPLEX64"            },
+    {   32,     8,       4,   "NIFTI_TYPE_COMPLEX64"    },
+    {   64,     8,       8,   "DT_DOUBLE"               },
+    {   64,     8,       8,   "DT_FLOAT64"              },
+    {   64,     8,       8,   "NIFTI_TYPE_FLOAT64"      },
+    {  128,     3,       0,   "DT_RGB"                  },
+    {  128,     3,       0,   "DT_RGB24"                },
+    {  128,     3,       0,   "NIFTI_TYPE_RGB24"        },
+    {  255,     0,       0,   "DT_ALL"                  },
+    {  256,     1,       0,   "DT_INT8"                 },
+    {  256,     1,       0,   "NIFTI_TYPE_INT8"         },
+    {  512,     2,       2,   "DT_UINT16"               },
+    {  512,     2,       2,   "NIFTI_TYPE_UINT16"       },
+    {  768,     4,       4,   "DT_UINT32"               },
+    {  768,     4,       4,   "NIFTI_TYPE_UINT32"       },
+    { 1024,     8,       8,   "DT_INT64"                },
+    { 1024,     8,       8,   "NIFTI_TYPE_INT64"        },
+    { 1280,     8,       8,   "DT_UINT64"               },
+    { 1280,     8,       8,   "NIFTI_TYPE_UINT64"       },
+    { 1536,    16,      16,   "DT_FLOAT128"             },
+    { 1536,    16,      16,   "NIFTI_TYPE_FLOAT128"     },
+    { 1792,    16,       8,   "DT_COMPLEX128"           },
+    { 1792,    16,       8,   "NIFTI_TYPE_COMPLEX128"   },
+    { 2048,    32,      16,   "DT_COMPLEX256"           },
+    { 2048,    32,      16,   "NIFTI_TYPE_COMPLEX256"   },
+    { 2304,     4,       0,   "DT_RGBA32"               },
+    { 2304,     4,       0,   "NIFTI_TYPE_RGBA32"       },
+};
+
+/*---------------------------------------------------------------------------*/
+/* prototypes for internal functions - not part of exported library          */
+
+/* extension routines */
+static int  nifti_read_extensions(nifti_image *nim, znzFile fp, int64_t remain);
+static int  nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,                                       int remain, znzFile fp );
+static int  nifti_check_extension(nifti_image *nim, int size,int code, int rem);
+static void update_nifti_image_for_brick_list(nifti_image * nim,
+                                              int64_t nbricks);
+static int  nifti_add_exten_to_list(nifti1_extension *  new_ext,
+                                    nifti1_extension ** list, int new_length);
+static int  nifti_fill_extension(nifti1_extension * ext, const char * data,
+                                 int len, int ecode);
+static void compute_strides(int64_t *strides,const int64_t *size,int nbyper);
+
+/* NBL routines */
+static int  nifti_load_NBL_bricks(nifti_image * nim , const int64_t * slist,
+                       const int64_t * sindex, nifti_brick_list * NBL, znzFile fp );
+static int  nifti_alloc_NBL_mem(  nifti_image * nim, int64_t nbricks,
+                                  nifti_brick_list * nbl);
+static int  nifti_copynsort(int64_t nbricks, const int64_t *blist,
+                            int64_t **slist, int64_t **sindex);
+static int  nifti_NBL_matches_nim(const nifti_image *nim,
+                                  const nifti_brick_list *NBL);
+
+/* for nifti_read_collapsed_image: */
+static int  rci_read_data(nifti_image *nim, int *pivots, int64_t *prods,
+                          int nprods, const int64_t dims[], char *data,
+                          znzFile fp, int64_t base_offset);
+static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper);
+static int  make_pivot_list(nifti_image * nim, const int64_t dims[],
+                            int pivots[], int64_t prods[], int * nprods );
+
+/* misc */
+static int   compare_strlist   (const char * str, char ** strlist, int len);
+static int   fileext_compare   (const char * test_ext, const char * known_ext);
+static int   fileext_n_compare (const char * test_ext,
+                                const char * known_ext, size_t maxlen);
+static int   is_mixedcase      (const char * str);
+static int   is_uppercase      (const char * str);
+static int   make_lowercase    (char * str);
+static int   make_uppercase    (char * str);
+static int   need_nhdr_swap    (short dim0, int hdrsize);
+static int   print_hex_vals    (const char * data, size_t nbytes, FILE * fp);
+static int   unescape_string   (char *str);  /* string utility functions */
+static char *escapize_string   (const char *str);
+
+/* consider for export */
+static int  nifti_ext_type_index(nifti_image * nim, int ecode);
+
+/* internal I/O routines */
+static znzFile nifti_image_load_prep( nifti_image *nim );
+static int     has_ascii_header(znzFile fp);
+/*---------------------------------------------------------------------------*/
+
+
+/* for calling from some main program */
+
+/*----------------------------------------------------------------------*/
+/*! display the nifti library module history (via stdout)
+*//*--------------------------------------------------------------------*/
+void nifti2_disp_lib_hist( int ver )
+{
+   int c, len;
+
+   switch ( ver ) {
+      default: {
+         Rc_fprintf_stderr("** NIFTI disp_lib_list: bad ver %d\n", ver);
+         break;
+      }
+
+      case 0:
+      case 2: {
+         len = sizeof(gni2_history)/sizeof(char *);
+         for( c = 0; c < len; c++ )
+             Rc_fputs_stdout(gni2_history[c]);
+         break;
+      }
+      case 1: {
+         len = sizeof(gni1_history)/sizeof(char *);
+         for( c = 0; c < len; c++ )
+             Rc_fputs_stdout(gni1_history[c]);
+         break;
+      }
+   }
+}
+
+/*----------------------------------------------------------------------*/
+/*! display the nifti library version (via stdout)
+*//*--------------------------------------------------------------------*/
+void nifti_disp_lib_version( void )
+{
+   Rc_printf("%s, compiled %s\n", gni_version, __DATE__);
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! nifti_image_read_bricks        - read nifti data as array of bricks
+ *
+ *                                   13 Dec 2004 [rickr]
+ *
+ *  \param  hname    - filename of dataset to read (must be valid)
+ *  \param  nbricks  - number of sub-bricks to read
+ *                     (if blist is valid, nbricks must be > 0)
+ *  \param  blist    - list of sub-bricks to read
+ *                     (can be NULL; if NULL, read complete dataset)
+ *  \param  NBL      - pointer to empty nifti_brick_list struct
+ *                     (must be a valid pointer)
+ *
+ *  \return
+ *     <br> nim      - same as nifti_image_read, but
+ *                          nim->nt       = NBL->nbricks (or nt*nu*nv*nw)
+ *                          nim->nu,nv,nw = 1
+ *                          nim->data     = NULL
+ *     <br> NBL      - filled with data volumes
+ *
+ * By default, this function will read the nifti dataset and break the data
+ * into a list of nt*nu*nv*nw sub-bricks, each having size nx*ny*nz elements.
+ * That is to say, instead of reading the entire dataset as a single array,
+ * break it up into sub-bricks (volumes), each of size nx*ny*nz elements.
+ *
+ * Note: in the returned nifti_image, nu, nv and nw will always be 1.  The
+ *       intention of this function is to collapse the dataset into a single
+ *       array of volumes (of length nbricks or nt*nu*nv*nw).
+ *
+ * If 'blist' is valid, it is taken to be a list of sub-bricks, of length
+ * 'nbricks'.  The data will still be separated into sub-bricks of size
+ * nx*ny*nz elements, but now 'nbricks' sub-bricks will be returned, of the
+ * caller's choosing via 'blist'.
+ *
+ * E.g. consider a dataset with 12 sub-bricks (numbered 0..11), and the
+ * following code:
+ *
+ * <pre>
+ * { nifti_brick_list   NB_orig, NB_select;
+ *   nifti_image      * nim_orig, * nim_select;
+ *   int                blist[5] = { 7, 0, 5, 5, 9 };
+ *
+ *   nim_orig   = nifti_image_read_bricks("myfile.nii", 0, NULL,  &NB_orig);
+ *   nim_select = nifti_image_read_bricks("myfile.nii", 5, blist, &NB_select);
+ * }
+ * </pre>
+ *
+ * Here, nim_orig gets the entire dataset, where NB_orig.nbricks = 12.  But
+ * nim_select has NB_select.nbricks = 5.
+ *
+ * Note that the first case is not quite the same as just calling the
+ * nifti_image_read function, as here the data is separated into sub-bricks.
+ *
+ * Note that valid blist elements are in [0..nt*nu*nv*nw-1],
+ * or written [ 0 .. (dim[4]*dim[5]*dim[6]*dim[7] - 1) ].
+ *
+ * Note that, as is the case with all of the reading functions, the
+ * data will be allocated, read in, and properly byte-swapped, if
+ * necessary.
+ *
+ * \sa nifti_image_load_bricks, nifti_free_NBL, valid_nifti_brick_list,
+       nifti_image_read
+*//*----------------------------------------------------------------------*/
+nifti_image *nifti2_image_read_bricks(const char * hname, int64_t nbricks,
+                                 const int64_t * blist, nifti_brick_list * NBL)
+{
+   nifti_image * nim;
+
+   if( !hname || !NBL ){
+      Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n",
+              hname, (void *)NBL);
+      return NULL;
+   }
+
+   if( blist && nbricks <= 0 ){
+      /* use PRId64 for printing int64_t     14 Apr 2016 */
+      Rc_fprintf_stderr("** nifti_image_read_bricks: bad nbricks, %" PRId64 "\n",
+              nbricks);
+      return NULL;
+   }
+
+   nim = nifti_image_read(hname, 0);  /* read header, but not data */
+
+   if( !nim ) return NULL;   /* errors were already printed */
+
+   /* if we fail, free image and return */
+   if( nifti_image_load_bricks(nim, nbricks, blist, NBL) <= 0 ){
+      nifti_image_free(nim);
+      return NULL;
+   }
+
+   if( blist ) update_nifti_image_for_brick_list(nim, nbricks);
+
+   return nim;
+}
+
+
+/*----------------------------------------------------------------------
+ * update_nifti_image_for_brick_list  - update nifti_image
+ *
+ * When loading a specific brick list, the distinction between
+ * nt, nu, nv and nw is lost.  So put everything in t, and set
+ * dim[0] = 4.
+ *----------------------------------------------------------------------*/
+static void update_nifti_image_for_brick_list( nifti_image * nim ,
+                                               int64_t nbricks )
+{
+   int64_t ndim;
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("+d updating image dimensions for %" PRId64
+              " bricks in list\n", nbricks);
+      Rc_fprintf_stderr("   ndim = %" PRId64 "\n",nim->ndim);
+      Rc_fprintf_stderr("   nx,ny,nz,nt,nu,nv,nw: (%" PRId64 ",%" PRId64
+             ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n",
+             nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
+   }
+
+   nim->nt = nbricks;
+   nim->nu = nim->nv = nim->nw = 1;
+   nim->dim[4] = nbricks;
+   nim->dim[5] = nim->dim[6] = nim->dim[7] = 1;
+
+   /* compute nvox                                                       */
+   /* do not rely on dimensions above dim[0]         16 Nov 2005 [rickr] */
+   for( nim->nvox = 1, ndim = 1; ndim <= nim->dim[0]; ndim++ )
+      nim->nvox *= nim->dim[ndim];
+
+   /* update the dimensions to 4 or lower */
+   for( ndim = 4; (ndim > 1) && (nim->dim[ndim] <= 1); ndim-- )
+       ;
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("+d ndim = %" PRId64 " -> %" PRId64 "\n",nim->ndim, ndim);
+      Rc_fprintf_stderr(" --> (%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64
+              ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n",
+              nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
+   }
+
+   nim->dim[0] = nim->ndim = ndim;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! nifti_update_dims_from_array  - update nx, ny, ... from nim->dim[]
+
+    Fix all the dimension information, based on a new nim->dim[].
+
+    Note: we assume that dim[0] will not increase.
+
+    Check for updates to pixdim[], dx,...,  nx,..., nvox, ndim, dim[0].
+*//*--------------------------------------------------------------------*/
+int nifti2_update_dims_from_array( nifti_image * nim )
+{
+   int     c;
+   int64_t ndim;
+
+   if( !nim ){
+      Rc_fprintf_stderr("** NIFTI update_dims: missing nim\n");
+      return 1;
+   }
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("+d updating image dimensions given nim->dim:");
+      for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %" PRId64, nim->dim[c]);
+      Rc_fputc_stderr('\n');
+   }
+
+   /* verify dim[0] first */
+   if(nim->dim[0] < 1 || nim->dim[0] > 7){
+      Rc_fprintf_stderr("** NIFTI: invalid dim[0], dim[] = ");
+      for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %" PRId64, nim->dim[c]);
+      Rc_fputc_stderr('\n');
+      return 1;
+   }
+
+   /* set nx, ny ..., dx, dy, ..., one by one */
+
+   /* less than 1, set to 1, else copy */
+   if(nim->dim[1] < 1) nim->nx = nim->dim[1] = 1;
+   else                nim->nx = nim->dim[1];
+   nim->dx = nim->pixdim[1];
+
+   /* if undefined, or less than 1, set to 1 */
+   if(nim->dim[0] < 2 || (nim->dim[0] >= 2 && nim->dim[2] < 1))
+      nim->ny = nim->dim[2] = 1;
+   else
+      nim->ny = nim->dim[2];
+   /* copy delta values, in any case */
+   nim->dy = nim->pixdim[2];
+
+   if(nim->dim[0] < 3 || (nim->dim[0] >= 3 && nim->dim[3] < 1))
+      nim->nz = nim->dim[3] = 1;
+   else /* just copy vals from arrays */
+      nim->nz = nim->dim[3];
+   nim->dz = nim->pixdim[3];
+
+   if(nim->dim[0] < 4 || (nim->dim[0] >= 4 && nim->dim[4] < 1))
+      nim->nt = nim->dim[4] = 1;
+   else /* just copy vals from arrays */
+      nim->nt = nim->dim[4];
+   nim->dt = nim->pixdim[4];
+
+   if(nim->dim[0] < 5 || (nim->dim[0] >= 5 && nim->dim[5] < 1))
+      nim->nu = nim->dim[5] = 1;
+   else /* just copy vals from arrays */
+      nim->nu = nim->dim[5];
+   nim->du = nim->pixdim[5];
+
+   if(nim->dim[0] < 6 || (nim->dim[0] >= 6 && nim->dim[6] < 1))
+      nim->nv = nim->dim[6] = 1;
+   else /* just copy vals from arrays */
+      nim->nv = nim->dim[6];
+   nim->dv = nim->pixdim[6];
+
+   if(nim->dim[0] < 7 || (nim->dim[0] >= 7 && nim->dim[7] < 1))
+      nim->nw = nim->dim[7] = 1;
+   else /* just copy vals from arrays */
+      nim->nw = nim->dim[7];
+   nim->dw = nim->pixdim[7];
+
+   for( c = 1, nim->nvox = 1; c <= nim->dim[0]; c++ )
+      nim->nvox *= nim->dim[c];
+
+   /* compute ndim, assuming it can be no larger than the old one */
+   for( ndim = nim->dim[0]; (ndim > 1) && (nim->dim[ndim] <= 1); ndim-- )
+       ;
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("+d ndim = %" PRId64 " -> %" PRId64 "\n",nim->ndim, ndim);
+      Rc_fprintf_stderr(" --> (%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64
+              ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n",
+              nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw);
+   }
+
+   nim->dim[0] = nim->ndim = ndim;
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! Load the image data from disk into an already-prepared image struct.
+ *
+ * \param    nim      - initialized nifti_image, without data
+ * \param    nbricks  - the length of blist (must be 0 if blist is NULL)
+ * \param    blist    - an array of xyz volume indices to read (can be NULL)
+ * \param    NBL      - pointer to struct where resulting data will be stored
+ *
+ * If blist is NULL, read all sub-bricks.
+ *
+ * \return the number of loaded bricks (NBL->nbricks),
+ *    0 on failure, < 0 on error
+ *
+ * NOTE: it is likely that another function will copy the data pointers
+ *       out of NBL, in which case the only pointer the calling function
+ *       will want to free is NBL->bricks (not each NBL->bricks[i]).
+*//*--------------------------------------------------------------------*/
+int nifti2_image_load_bricks( nifti_image * nim , int64_t nbricks,
+                             const int64_t * blist, nifti_brick_list * NBL )
+{
+   int64_t * slist = NULL, * sindex = NULL;
+   int       rv;
+   znzFile   fp;
+
+   /* we can have blist == NULL */
+   if( !nim || !NBL ){
+      Rc_fprintf_stderr("** nifti_image_load_bricks, bad params (%p,%p)\n",
+              (void *)nim, (void *)NBL);
+      return -1;
+   }
+
+   if( blist && nbricks <= 0 ){
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-d load_bricks: received blist with nbricks = "
+                 "%" PRId64 "," "ignoring blist\n", nbricks);
+      blist = NULL; /* pretend nothing was passed */
+   }
+
+   if( blist && ! valid_nifti_brick_list(nim, nbricks, blist, g_opts.debug>0) )
+      return -1;
+
+   /* for efficiency, let's read the file in order */
+   if( blist && nifti_copynsort( nbricks, blist, &slist, &sindex ) != 0 )
+      return -1;
+
+   /* open the file and position the FILE pointer */
+   fp = nifti_image_load_prep( nim );
+   if( !fp ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** nifti_image_load_bricks, failed load_prep\n");
+      if( blist ){ free(slist); free(sindex); }
+      return -1;
+   }
+
+   /* this will flag to allocate defaults */
+   if( !blist ) nbricks = 0;
+   if( nifti_alloc_NBL_mem( nim, nbricks, NBL ) != 0 ){
+      if( blist ){ free(slist); free(sindex); }
+      znzclose(fp);
+      return -1;
+   }
+
+   rv = nifti_load_NBL_bricks(nim, slist, sindex, NBL, fp);
+
+   if( rv != 0 ){
+      nifti_free_NBL( NBL );  /* failure! */
+      NBL->nbricks = 0; /* repetative, but clear */
+   }
+
+   if( slist ){ free(slist); free(sindex); }
+
+   znzclose(fp);
+
+   return NBL->nbricks;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! nifti_free_NBL      - free all pointers and clear structure
+ *
+ * note: this does not presume to free the structure pointer
+*//*--------------------------------------------------------------------*/
+void nifti2_free_NBL( nifti_brick_list * NBL )
+{
+   int c;
+
+   if( NBL->bricks ){
+      for( c = 0; c < NBL->nbricks; c++ )
+         if( NBL->bricks[c] ) free(NBL->bricks[c]);
+      free(NBL->bricks);
+      NBL->bricks = NULL;
+   }
+
+   NBL->bsize = NBL->nbricks = 0;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_load_NBL_bricks      - read the file data into the NBL struct
+ *
+ * return 0 on success, -1 on failure
+ *----------------------------------------------------------------------*/
+static int nifti_load_NBL_bricks( nifti_image * nim , const int64_t * slist,
+                        const int64_t * sindex, nifti_brick_list * NBL, znzFile fp )
+{
+   int64_t oposn, fposn;      /* orig and current file positions */
+   int64_t rv, test;
+   int64_t c;
+   int64_t prev, isrc, idest; /* previous/current sub-brick, and new index */
+
+   test = znztell(fp);  /* store current file position */
+   if( test < 0 ){
+      Rc_fprintf_stderr("** NIFTI load bricks: ztell failed??\n");
+      return -1;
+   }
+   fposn = oposn = test;
+
+   /* first, handle the default case, no passed blist */
+   if( !slist ){
+      for( c = 0; c < NBL->nbricks; c++ ) {
+         rv = nifti_read_buffer(fp, NBL->bricks[c], NBL->bsize, nim);
+         if( rv != NBL->bsize ){
+            Rc_fprintf_stderr("** NIFTI load bricks: cannot read brick %" PRId64
+                    " from '%s'\n",
+                    c, nim->iname ? nim->iname : nim->fname);
+            return -1;
+         }
+      }
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("+d read %" PRId64 " default %" PRId64
+                 "-byte bricks from file %s\n",
+                 NBL->nbricks, NBL->bsize,
+                 nim->iname ? nim->iname:nim->fname );
+      return 0;
+   }
+
+   if( !sindex ){
+      Rc_fprintf_stderr("** NIFTI load_NBL_bricks: missing index list\n");
+      return -1;
+   }
+
+   prev = -1;   /* use prev for previous sub-brick */
+   for( c = 0; c < NBL->nbricks; c++ ){
+       isrc = slist[c];   /* this is original brick index (c is new one) */
+       idest = sindex[c]; /* this is the destination index for this data */
+
+       /* if this sub-brick is not the previous, we must read from disk */
+       if( isrc != prev ){
+
+          /* if we are not looking at the correct sub-brick, scan forward */
+          if( fposn != (oposn + isrc*NBL->bsize) ){
+             fposn = oposn + isrc*NBL->bsize;
+             /* rcr - znz functions need to handle 64-bit cases, */
+             /* see setting _FILE_OFFSET_BITS                    */
+             if( znzseek(fp, fposn, SEEK_SET) < 0 ){
+                Rc_fprintf_stderr("** NIFTI: failed to locate brick %" PRId64
+                        " in file '%s'\n",
+                        isrc, nim->iname ? nim->iname : nim->fname);
+                return -1;
+             }
+          }
+
+          /* only 10,000 lines later and we're actually reading something! */
+          rv = nifti_read_buffer(fp, NBL->bricks[idest], NBL->bsize, nim);
+          if( rv != NBL->bsize ){
+             Rc_fprintf_stderr("** NIFTI: failed to read brick %" PRId64
+                     " from file '%s'\n",
+                     isrc, nim->iname ? nim->iname : nim->fname);
+             if( g_opts.debug > 1 )
+                Rc_fprintf_stderr("   (read %" PRId64 " of %" PRId64 " bytes)\n",
+                        rv, NBL->bsize);
+             return -1;
+          }
+          fposn += NBL->bsize;
+       } else {
+          /* we have already read this sub-brick, just copy the previous one */
+          /* note that this works because they are sorted */
+          memcpy(NBL->bricks[idest], NBL->bricks[sindex[c-1]], NBL->bsize);
+       }
+
+       prev = isrc;  /* in any case, note the now previous sub-brick */
+   }
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_alloc_NBL_mem      - allocate memory for bricks
+ *
+ * return 0 on success, -1 on failure
+ *----------------------------------------------------------------------*/
+static int nifti_alloc_NBL_mem(nifti_image * nim, int64_t nbricks,
+                               nifti_brick_list * nbl)
+{
+   int64_t c;
+
+   /* if nbricks is not specified, use the default */
+   if( nbricks > 0 ) nbl->nbricks = nbricks;
+   else {  /* I missed this one with the 1.17 change    02 Mar 2006 [rickr] */
+      nbl->nbricks = 1;
+      for( c = 4; c <= nim->ndim; c++ )
+          nbl->nbricks *= nim->dim[c];
+   }
+
+   nbl->bsize  = nim->nx * nim->ny * nim->nz * nim->nbyper; /* bytes */
+   nbl->bricks = (void **)malloc(nbl->nbricks * sizeof(void *));
+
+   if( ! nbl->bricks ){
+     Rc_fprintf_stderr("** NIFTI NANM: failed to alloc %" PRId64
+             " void ptrs\n",nbricks);
+     return -1;
+   }
+
+   for( c = 0; c < nbl->nbricks; c++ ){
+      nbl->bricks[c] = malloc(nbl->bsize);
+      if( ! nbl->bricks[c] ){
+         Rc_fprintf_stderr("** NIFTI NANM: failed to alloc %" PRId64
+                 " bytes for brick %" PRId64 "\n", nbl->bsize, c);
+         /* so free and clear everything before returning */
+         while( c > 0 ){
+            c--;
+            free(nbl->bricks[c]);
+         }
+         free(nbl->bricks);
+         nbl->bricks = NULL;
+         nbl->bsize = nbl->nbricks = 0;
+         return -1;
+      }
+   }
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d NANM: alloc'd %" PRId64 " bricks of %" PRId64
+              " bytes for NBL\n", nbl->nbricks, nbl->bsize);
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_copynsort      - copy int list, and sort with indices
+ *
+ * 1. duplicate the incoming list
+ * 2. create an sindex list, and init with 0..nbricks-1
+ * 3. do a slow insertion sort on the small slist, along with sindex list
+ * 4. check results, just to be positive
+ *
+ * So slist is sorted, and sindex hold original positions.
+ *
+ * return 0 on success, -1 on failure
+ *----------------------------------------------------------------------*/
+static int nifti_copynsort(int64_t nbricks, const int64_t *blist,
+                           int64_t ** slist, int64_t ** sindex)
+{
+   int64_t * stmp, * itmp;   /* for ease of typing/reading */
+   int64_t   c1, c2, spos, tmp;
+
+   *slist  = (int64_t *)malloc(nbricks * sizeof(int64_t));
+   *sindex = (int64_t *)malloc(nbricks * sizeof(int64_t));
+
+   if( !*slist || !*sindex ){
+      Rc_fprintf_stderr("** NIFTI NCS: failed to alloc %" PRId64
+              " ints for sorting\n", nbricks);
+      if(*slist)  free(*slist);   /* maybe one succeeded */
+      if(*sindex) free(*sindex);
+      return -1;
+   }
+
+   /* init the lists */
+   for( c1 = 0; c1 < nbricks; c1++ ) {
+      (*slist)[c1] = blist[c1];
+      (*sindex)[c1] = c1;
+   }
+
+   /* now actually sort slist */
+   stmp = *slist;
+   itmp = *sindex;
+   for( c1 = 0; c1 < nbricks-1; c1++ ) {
+      /* find smallest value, init to current */
+      spos = c1;
+      for( c2 = c1+1; c2 < nbricks; c2++ )
+         if( stmp[c2] < stmp[spos] ) spos = c2;
+      if( spos != c1 ) /* swap: fine, don't maintain sub-order, see if I care */
+      {
+         tmp        = stmp[c1];      /* first swap the sorting values */
+         stmp[c1]   = stmp[spos];
+         stmp[spos] = tmp;
+
+         tmp        = itmp[c1];      /* then swap the index values */
+         itmp[c1]   = itmp[spos];
+         itmp[spos] = tmp;
+      }
+   }
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr( "+d sorted indexing list:\n");
+      Rc_fprintf_stderr( "  orig   : ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %" PRId64, blist[c1]);
+      Rc_fprintf_stderr("\n  new    : ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %" PRId64, stmp[c1]);
+      Rc_fprintf_stderr("\n  indices: ");
+      for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr("  %" PRId64, itmp[c1]);
+      Rc_fputc_stderr('\n');
+   }
+
+   /* check the sort (why not?  I've got time...) */
+   for( c1 = 0; c1 < nbricks-1; c1++ ){
+       if( (stmp[c1] > stmp[c1+1]) || (blist[itmp[c1]] != stmp[c1]) ){
+          Rc_fprintf_stderr("** NIFTI sorting screw-up, way to go, rick!\n");
+          free(stmp); free(itmp); *slist = NULL; *sindex = NULL;
+          return -1;
+       }
+   }
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d sorting is okay\n");
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! valid_nifti_brick_list      - check sub-brick list for image
+ *
+ * This function verifies that nbricks and blist are appropriate
+ * for use with this nim, based on the dimensions.
+ *
+ * \param nim        nifti_image to check against
+ * \param nbricks    number of brick indices in blist
+ * \param blist      list of brick indices to check in nim
+ * \param disp_error if this flag is set, report errors to user
+ *
+ * \return 1 if valid, 0 if not
+*//*--------------------------------------------------------------------*/
+int valid_nifti2_brick_list(nifti_image * nim , int64_t nbricks,
+                           const int64_t * blist, int disp_error)
+{
+   int64_t c, nsubs;
+
+   if( !nim ){
+      if( disp_error || g_opts.debug > 0 )
+         Rc_fprintf_stderr("** valid_nifti_brick_list: missing nifti image\n");
+      return 0;
+   }
+
+   if( nbricks <= 0 || !blist ){
+      if( disp_error || g_opts.debug > 1 )
+         Rc_fprintf_stderr("** valid_nifti_brick_list: no brick list to check\n");
+      return 0;
+   }
+
+   if( nim->dim[0] < 3 ){
+      if( disp_error || g_opts.debug > 1 )
+        Rc_fprintf_stderr("** NIFTI: cannot read explict brick list from %" PRId64
+                "-D dataset\n", nim->dim[0]);
+      return 0;
+   }
+
+   /* nsubs sub-brick is nt*nu*nv*nw */
+   for( c = 4, nsubs = 1; c <= nim->dim[0]; c++ )
+      nsubs *= nim->dim[c];
+
+   if( nsubs <= 0 ){
+      Rc_fprintf_stderr("** NIFTI VNBL warning: bad dim list (%" PRId64 ",%"
+                     PRId64 ",%" PRId64 ",%" PRId64 ")\n",
+                     nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7]);
+      return 0;
+   }
+
+   for( c = 0; c < nbricks; c++ )
+      if( (blist[c] < 0) || (blist[c] >= nsubs) ){
+         if( disp_error || g_opts.debug > 1 )
+            Rc_fprintf_stderr(
+               "** NIFTI volume index %" PRId64 " (#%" PRId64 ")"
+               " is out of range [0,%" PRId64 "]\n", blist[c], c, nsubs-1);
+         return 0;
+      }
+
+   return 1;  /* all is well */
+}
+
+/*----------------------------------------------------------------------*/
+/* verify that NBL struct is a valid data source for the image
+ *
+ * return 1 if so, 0 otherwise
+*//*--------------------------------------------------------------------*/
+static int nifti_NBL_matches_nim(const nifti_image *nim,
+                                 const nifti_brick_list *NBL)
+{
+   int64_t volbytes = 0;     /* bytes per volume */
+   int64_t nvols = 0;
+   int     ind, errs = 0;
+
+
+   if( !nim || !NBL ) {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** nifti_NBL_matches_nim: NULL pointer(s)\n");
+      return 0;
+   }
+
+   /* for nim, compute volbytes and nvols */
+   if( nim->ndim > 0 ) {
+      /* first 3 indices are over a single volume */
+      volbytes = (int64_t)nim->nbyper;
+      for( ind = 1; ind <= nim->ndim && ind < 4; ind++ )
+         volbytes *= nim->dim[ind];
+
+      for( ind = 4, nvols = 1; ind <= nim->ndim; ind++ )
+         nvols *= nim->dim[ind];
+   }
+
+   if( volbytes != NBL->bsize ) {
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("** NIFTI NBL/nim mismatch, volbytes = %" PRId64
+                        ", %" PRId64 "\n", NBL->bsize, volbytes);
+      errs++;
+   }
+
+   if( nvols != NBL->nbricks ) {
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("** NIFTI NBL/nim mismatch, nvols = %" PRId64
+                        ", %" PRId64 "\n", NBL->nbricks, nvols);
+      errs++;
+   }
+
+   if( errs ) return 0;
+   else if ( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-- nim/NBL agree: nvols = %" PRId64
+                     ", nbytes = %" PRId64 "\n", nvols, volbytes);
+
+   return 1;
+}
+
+/* end of new nifti_image_read_bricks() functionality */
+
+/*----------------------------------------------------------------------*/
+/*! display the orientation from the quaternian fields
+ *
+ * \param mesg if non-NULL, display this message first
+ * \param mat  the matrix to convert to "nearest" orientation
+ *
+ * \return -1 if results cannot be determined, 0 if okay
+*//*--------------------------------------------------------------------*/
+int nifti2_disp_matrix_orient( const char * mesg, nifti_dmat44 mat )
+{
+   int i, j, k;
+
+   if ( mesg ) Rc_fputs_stderr( mesg );  /* use stdout? */
+
+   nifti_dmat44_to_orientation( mat, &i,&j,&k );
+   if ( i <= 0 || j <= 0 || k <= 0 ) return -1;
+
+   /* so we have good codes */
+   Rc_fprintf_stderr("  i orientation = '%s'\n"
+                   "  j orientation = '%s'\n"
+                   "  k orientation = '%s'\n",
+                   nifti_orientation_string(i),
+                   nifti_orientation_string(j),
+                   nifti_orientation_string(k) );
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! duplicate the given string (alloc length+1)
+ *
+ * \return allocated pointer (or NULL on failure)
+*//*--------------------------------------------------------------------*/
+char *nifti_strdup(const char *str)
+{
+  char *dup;
+
+  if( !str ) return NULL;       /* allow calls passing NULL */
+
+  dup = (char *)malloc(strlen(str) + 1);
+
+  /* check for failure */
+  if( dup ) strcpy(dup, str);
+  else      Rc_fprintf_stderr("** nifti_strdup: failed to alloc %" PRId64
+                           " bytes\n", (int64_t)(strlen(str)+1));
+
+  return dup;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI datatype.
+
+    \param dt NIfTI-1 datatype
+
+    \return pointer to static string holding the datatype name
+
+    \warning Do not free() or modify this string!
+             It points to static storage.
+
+    \sa NIFTI1_DATATYPES group in nifti1.h
+*//*-------------------------------------------------------------------------*/
+char const * nifti_datatype_string( int dt )
+{
+   switch( dt ){
+     case DT_UNKNOWN:    return "UNKNOWN"    ;
+     case DT_BINARY:     return "BINARY"     ;
+     case DT_INT8:       return "INT8"       ;
+     case DT_UINT8:      return "UINT8"      ;
+     case DT_INT16:      return "INT16"      ;
+     case DT_UINT16:     return "UINT16"     ;
+     case DT_INT32:      return "INT32"      ;
+     case DT_UINT32:     return "UINT32"     ;
+     case DT_INT64:      return "INT64"      ;
+     case DT_UINT64:     return "UINT64"     ;
+     case DT_FLOAT32:    return "FLOAT32"    ;
+     case DT_FLOAT64:    return "FLOAT64"    ;
+     case DT_FLOAT128:   return "FLOAT128"   ;
+     case DT_COMPLEX64:  return "COMPLEX64"  ;
+     case DT_COMPLEX128: return "COMPLEX128" ;
+     case DT_COMPLEX256: return "COMPLEX256" ;
+     case DT_RGB24:      return "RGB24"      ;
+     case DT_RGBA32:     return "RGBA32"     ;
+     default:            break               ;
+   }
+   return "**ILLEGAL**" ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! Determine if the datatype code dt is an integer type (1=YES, 0=NO).
+
+    \return whether the given NIfTI-1 datatype code is valid
+
+    \sa     NIFTI1_DATATYPES group in nifti1.h
+*//*--------------------------------------------------------------------*/
+int nifti_is_inttype( int dt )
+{
+   switch( dt ){
+     case DT_UNKNOWN:    return 0 ;
+     case DT_BINARY:     return 0 ;
+     case DT_INT8:       return 1 ;
+     case DT_UINT8:      return 1 ;
+     case DT_INT16:      return 1 ;
+     case DT_UINT16:     return 1 ;
+     case DT_INT32:      return 1 ;
+     case DT_UINT32:     return 1 ;
+     case DT_INT64:      return 1 ;
+     case DT_UINT64:     return 1 ;
+     case DT_FLOAT32:    return 0 ;
+     case DT_FLOAT64:    return 0 ;
+     case DT_FLOAT128:   return 0 ;
+     case DT_COMPLEX64:  return 0 ;
+     case DT_COMPLEX128: return 0 ;
+     case DT_COMPLEX256: return 0 ;
+     case DT_RGB24:      return 1 ;
+     case DT_RGBA32:     return 1 ;
+     default:            break    ;
+   }
+   return 0 ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI units type.
+
+    \param  uu NIfTI-1 unit code
+
+    \return pointer to static string for the given unit type
+
+    \warning Do not free() or modify this string!
+             It points to static storage.
+
+    \sa     NIFTI1_UNITS group in nifti1.h
+*//*-------------------------------------------------------------------------*/
+char const *nifti_units_string( int uu )
+{
+   switch( uu ){
+     case NIFTI_UNITS_METER:  return "m" ;
+     case NIFTI_UNITS_MM:     return "mm" ;
+     case NIFTI_UNITS_MICRON: return "um" ;
+     case NIFTI_UNITS_SEC:    return "s" ;
+     case NIFTI_UNITS_MSEC:   return "ms" ;
+     case NIFTI_UNITS_USEC:   return "us" ;
+     case NIFTI_UNITS_HZ:     return "Hz" ;
+     case NIFTI_UNITS_PPM:    return "ppm" ;
+     case NIFTI_UNITS_RADS:   return "rad/s" ;
+     default:                 break ;
+   }
+   return "Unknown" ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI transform type.
+
+    \param  xx NIfTI-1 xform code
+
+    \return pointer to static string describing xform code
+
+    \warning Do not free() or modify this string!
+             It points to static storage.
+
+    \sa     NIFTI1_XFORM_CODES group in nifti1.h
+*//*-------------------------------------------------------------------------*/
+char const *nifti_xform_string( int xx )
+{
+   switch( xx ){
+     case NIFTI_XFORM_SCANNER_ANAT:  return "Scanner Anat" ;
+     case NIFTI_XFORM_ALIGNED_ANAT:  return "Aligned Anat" ;
+     case NIFTI_XFORM_TALAIRACH:     return "Talairach" ;
+     case NIFTI_XFORM_MNI_152:       return "MNI_152" ;
+     default:                        break ;
+   }
+   return "Unknown" ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI intent type.
+
+    \param  ii NIfTI-1 intent code
+
+    \return pointer to static string describing code
+
+    \warning Do not free() or modify this string!
+             It points to static storage.
+
+    \sa     NIFTI1_INTENT_CODES group in nifti1.h
+*//*-------------------------------------------------------------------------*/
+char const *nifti_intent_string( int ii )
+{
+   switch( ii ){
+     case NIFTI_INTENT_CORREL:     return "Correlation statistic" ;
+     case NIFTI_INTENT_TTEST:      return "T-statistic" ;
+     case NIFTI_INTENT_FTEST:      return "F-statistic" ;
+     case NIFTI_INTENT_ZSCORE:     return "Z-score"     ;
+     case NIFTI_INTENT_CHISQ:      return "Chi-squared distribution" ;
+     case NIFTI_INTENT_BETA:       return "Beta distribution" ;
+     case NIFTI_INTENT_BINOM:      return "Binomial distribution" ;
+     case NIFTI_INTENT_GAMMA:      return "Gamma distribution" ;
+     case NIFTI_INTENT_POISSON:    return "Poisson distribution" ;
+     case NIFTI_INTENT_NORMAL:     return "Normal distribution" ;
+     case NIFTI_INTENT_FTEST_NONC: return "F-statistic noncentral" ;
+     case NIFTI_INTENT_CHISQ_NONC: return "Chi-squared noncentral" ;
+     case NIFTI_INTENT_LOGISTIC:   return "Logistic distribution" ;
+     case NIFTI_INTENT_LAPLACE:    return "Laplace distribution" ;
+     case NIFTI_INTENT_UNIFORM:    return "Uniform distribition" ;
+     case NIFTI_INTENT_TTEST_NONC: return "T-statistic noncentral" ;
+     case NIFTI_INTENT_WEIBULL:    return "Weibull distribution" ;
+     case NIFTI_INTENT_CHI:        return "Chi distribution" ;
+     case NIFTI_INTENT_INVGAUSS:   return "Inverse Gaussian distribution" ;
+     case NIFTI_INTENT_EXTVAL:     return "Extreme Value distribution" ;
+     case NIFTI_INTENT_PVAL:       return "P-value" ;
+
+     case NIFTI_INTENT_LOGPVAL:    return "Log P-value" ;
+     case NIFTI_INTENT_LOG10PVAL:  return "Log10 P-value" ;
+
+     case NIFTI_INTENT_ESTIMATE:   return "Estimate" ;
+     case NIFTI_INTENT_LABEL:      return "Label index" ;
+     case NIFTI_INTENT_NEURONAME:  return "NeuroNames index" ;
+     case NIFTI_INTENT_GENMATRIX:  return "General matrix" ;
+     case NIFTI_INTENT_SYMMATRIX:  return "Symmetric matrix" ;
+     case NIFTI_INTENT_DISPVECT:   return "Displacement vector" ;
+     case NIFTI_INTENT_VECTOR:     return "Vector" ;
+     case NIFTI_INTENT_POINTSET:   return "Pointset" ;
+     case NIFTI_INTENT_TRIANGLE:   return "Triangle" ;
+     case NIFTI_INTENT_QUATERNION: return "Quaternion" ;
+
+     case NIFTI_INTENT_DIMLESS:    return "Dimensionless number" ;
+     default:                      break ;
+   }
+   return "Unknown" ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI slice_code.
+
+    \param  ss NIfTI-1 slice order code
+
+    \return pointer to static string describing code
+
+    \warning Do not free() or modify this string!
+             It points to static storage.
+
+    \sa     NIFTI1_SLICE_ORDER group in nifti1.h
+*//*-------------------------------------------------------------------------*/
+char const *nifti_slice_string( int ss )
+{
+   switch( ss ){
+     case NIFTI_SLICE_SEQ_INC:  return "sequential_increasing"    ;
+     case NIFTI_SLICE_SEQ_DEC:  return "sequential_decreasing"    ;
+     case NIFTI_SLICE_ALT_INC:  return "alternating_increasing"   ;
+     case NIFTI_SLICE_ALT_DEC:  return "alternating_decreasing"   ;
+     case NIFTI_SLICE_ALT_INC2: return "alternating_increasing_2" ;
+     case NIFTI_SLICE_ALT_DEC2: return "alternating_decreasing_2" ;
+     default: break;
+   }
+   return "Unknown" ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Return a pointer to a string holding the name of a NIFTI orientation.
+
+    \param ii orientation code
+
+    \return pointer to static string holding the orientation information
+
+    \warning Do not free() or modify the return string!
+             It points to static storage.
+
+    \sa  NIFTI_L2R in nifti1_io.h
+*//*-------------------------------------------------------------------------*/
+char const *nifti_orientation_string( int ii )
+{
+   switch( ii ){
+     case NIFTI_L2R: return "Left-to-Right" ;
+     case NIFTI_R2L: return "Right-to-Left" ;
+     case NIFTI_P2A: return "Posterior-to-Anterior" ;
+     case NIFTI_A2P: return "Anterior-to-Posterior" ;
+     case NIFTI_I2S: return "Inferior-to-Superior" ;
+     case NIFTI_S2I: return "Superior-to-Inferior" ;
+     default:        break;
+   }
+   return "Unknown" ;
+}
+
+/*--------------------------------------------------------------------------*/
+/*! Given a datatype code, set number of bytes per voxel and the swapsize.
+
+    \param datatype nifti1 datatype code
+    \param nbyper   pointer to return value: number of bytes per voxel
+    \param swapsize pointer to return value: size of swap blocks
+
+    \return appropriate values at nbyper and swapsize
+
+    The swapsize is set to 0 if this datatype doesn't ever need swapping.
+
+    \sa NIFTI1_DATATYPES in nifti1.h
+*//*------------------------------------------------------------------------*/
+void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize )
+{
+   int nb=0, ss=0 ;
+   switch( datatype ){
+     case DT_INT8:
+     case DT_UINT8:       nb =  1 ; ss =  0 ; break ;
+
+     case DT_INT16:
+     case DT_UINT16:      nb =  2 ; ss =  2 ; break ;
+
+     case DT_RGB24:       nb =  3 ; ss =  0 ; break ;
+     case DT_RGBA32:      nb =  4 ; ss =  0 ; break ;
+
+     case DT_INT32:
+     case DT_UINT32:
+     case DT_FLOAT32:     nb =  4 ; ss =  4 ; break ;
+
+     case DT_COMPLEX64:   nb =  8 ; ss =  4 ; break ;
+
+     case DT_FLOAT64:
+     case DT_INT64:
+     case DT_UINT64:      nb =  8 ; ss =  8 ; break ;
+
+     case DT_FLOAT128:    nb = 16 ; ss = 16 ; break ;
+
+     case DT_COMPLEX128:  nb = 16 ; ss =  8 ; break ;
+
+     case DT_COMPLEX256:  nb = 32 ; ss = 16 ; break ;
+     default:             break;
+   }
+
+   ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; }
+
+
+/*-----------------------------------------------------------------*/
+/*! copy between float and double mat44 types 10 Jul, 2015 [rickr] */
+
+int nifti_mat44_to_dmat44(mat44 * fm, nifti_dmat44 * dm)
+{
+   int i, j;
+   if( !dm || !fm ) return 1;
+   for( i=0; i<4; i++ )
+      for( j=0; j<4; j++ )
+         dm->m[i][j] = (double)fm->m[i][j];
+   return 0;
+}
+
+int nifti_dmat44_to_mat44(nifti_dmat44 * dm, mat44 * fm)
+{
+   int i, j;
+   if( !dm || !fm ) return 1;
+   for( i=0; i<4; i++ )
+      for( j=0; j<4; j++ )
+         fm->m[i][j] = (float)dm->m[i][j];
+   return 0;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/*! Given the quaternion parameters (etc.), compute a transformation matrix
+    of doubles.
+
+   See comments in nifti1.h for details.
+     - qb,qc,qd = quaternion parameters
+     - qx,qy,qz = offset parameters
+     - dx,dy,dz = grid stepsizes (non-negative inputs are set to 1.0)
+     - qfac     = sign of dz step (< 0 is negative; >= 0 is positive)
+
+   <pre>
+   If qx=qy=qz=0, dx=dy=dz=1, then the output is a rotation matrix.
+   For qfac >= 0, the rotation is proper.
+   For qfac <  0, the rotation is improper.
+   </pre>
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+   \see nifti_mat44_to_quatern, nifti_make_orthog_mat44,
+       nifti_mat44_to_orientation
+
+*//*-------------------------------------------------------------------------*/
+nifti_dmat44 nifti_quatern_to_dmat44( double qb, double qc, double qd,
+                                double qx, double qy, double qz,
+                                double dx, double dy, double dz, double qfac )
+{
+   nifti_dmat44 R ;
+   double a,b=qb,c=qc,d=qd , xd,yd,zd ;
+
+   /* last row is always [ 0 0 0 1 ] */
+
+   R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0 ; R.m[3][3]= 1.0 ;
+
+   /* compute a parameter from b,c,d */
+
+   a = 1.0l - (b*b + c*c + d*d) ;
+   if( a < 1.e-7l ){                   /* special case */
+     a = 1.0l / sqrt(b*b+c*c+d*d) ;
+     b *= a ; c *= a ; d *= a ;        /* normalize (b,c,d) vector */
+     a = 0.0l ;                        /* a = 0 ==> 180 degree rotation */
+   } else{
+     a = sqrt(a) ;                     /* angle = 2*arccos(a) */
+   }
+
+   /* load rotation matrix, including scaling factors for voxel sizes */
+
+   xd = (dx > 0.0) ? dx : 1.0l ;       /* make sure are positive */
+   yd = (dy > 0.0) ? dy : 1.0l ;
+   zd = (dz > 0.0) ? dz : 1.0l ;
+
+   if( qfac < 0.0 ) zd = -zd ;         /* left handedness? */
+
+   R.m[0][0] = (a*a+b*b-c*c-d*d) * xd;
+   R.m[0][1] = 2.0l * (b*c-a*d        ) * yd ;
+   R.m[0][2] = 2.0l * (b*d+a*c        ) * zd ;
+   R.m[1][0] = 2.0l * (b*c+a*d        ) * xd ;
+   R.m[1][1] = (a*a+c*c-b*b-d*d) * yd;
+   R.m[1][2] = 2.0l * (c*d-a*b        ) * zd ;
+   R.m[2][0] = 2.0l * (b*d-a*c        ) * xd ;
+   R.m[2][1] = 2.0l * (c*d+a*b        ) * yd ;
+   R.m[2][2] = (a*a+d*d-c*c-b*b) * zd;
+
+   /* load offsets */
+
+   R.m[0][3] = qx ; R.m[1][3] = qy ; R.m[2][3] = qz ;
+
+   return R ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Given the quaternion parameters (etc.), compute a transformation matrix.
+
+   See comments in nifti1.h for details.
+     - qb,qc,qd = quaternion parameters
+     - qx,qy,qz = offset parameters
+     - dx,dy,dz = grid stepsizes (non-negative inputs are set to 1.0)
+     - qfac     = sign of dz step (< 0 is negative; >= 0 is positive)
+
+   <pre>
+   If qx=qy=qz=0, dx=dy=dz=1, then the output is a rotation matrix.
+   For qfac >= 0, the rotation is proper.
+   For qfac <  0, the rotation is improper.
+   </pre>
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+   \see nifti_mat44_to_quatern, nifti_make_orthog_mat44,
+       nifti_mat44_to_orientation
+
+*//*-------------------------------------------------------------------------*/
+mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
+                              float qx, float qy, float qz,
+                              float dx, float dy, float dz, float qfac )
+{
+   mat44 R ;
+   double a,b=qb,c=qc,d=qd , xd,yd,zd ;
+
+   /* last row is always [ 0 0 0 1 ] */
+
+   R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0f ; R.m[3][3]= 1.0f ;
+
+   /* compute a parameter from b,c,d */
+
+   a = 1.0l - (b*b + c*c + d*d) ;
+   if( a < 1.e-7l ){                   /* special case */
+     a = 1.0l / sqrt(b*b+c*c+d*d) ;
+     b *= a ; c *= a ; d *= a ;        /* normalize (b,c,d) vector */
+     a = 0.0l ;                        /* a = 0 ==> 180 degree rotation */
+   } else{
+     a = sqrt(a) ;                     /* angle = 2*arccos(a) */
+   }
+
+   /* load rotation matrix, including scaling factors for voxel sizes */
+
+   xd = (dx > 0.0) ? dx : 1.0l ;       /* make sure are positive */
+   yd = (dy > 0.0) ? dy : 1.0l ;
+   zd = (dz > 0.0) ? dz : 1.0l ;
+
+   if( qfac < 0.0 ) zd = -zd ;         /* left handedness? */
+
+   R.m[0][0] = (float)( (a*a+b*b-c*c-d*d) * xd) ;
+   R.m[0][1] = 2.0l * (b*c-a*d        ) * yd ;
+   R.m[0][2] = 2.0l * (b*d+a*c        ) * zd ;
+   R.m[1][0] = 2.0l * (b*c+a*d        ) * xd ;
+   R.m[1][1] = (float)( (a*a+c*c-b*b-d*d) * yd) ;
+   R.m[1][2] = 2.0l * (c*d-a*b        ) * zd ;
+   R.m[2][0] = 2.0l * (b*d-a*c        ) * xd ;
+   R.m[2][1] = 2.0l * (c*d+a*b        ) * yd ;
+   R.m[2][2] = (float)( (a*a+d*d-c*c-b*b) * zd) ;
+
+   /* load offsets */
+
+   R.m[0][3] = qx ; R.m[1][3] = qy ; R.m[2][3] = qz ;
+
+   return R ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Given the 3x4 upper corner of the matrix R, compute the quaternion
+   parameters that fit it.
+
+     - Any NULL pointer on input won't get assigned (e.g., if you don't want
+       dx,dy,dz, just pass NULL in for those pointers).
+     - If the 3 input matrix columns are NOT orthogonal, they will be
+       orthogonalized prior to calculating the parameters, using
+       the polar decomposition to find the orthogonal matrix closest
+       to the column-normalized input matrix.
+     - However, if the 3 input matrix columns are NOT orthogonal, then
+       the matrix produced by nifti_quatern_to_dmat44 WILL have orthogonal
+       columns, so it won't be the same as the matrix input here.
+       This "feature" is because the NIFTI 'qform' transform is
+       deliberately not fully general -- it is intended to model a volume
+       with perpendicular axes.
+     - If the 3 input matrix columns are not even linearly independent,
+       you'll just have to take your luck, won't you?
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+
+   \see nifti_quatern_to_dmat44, nifti_make_orthog_dmat44,
+       nifti_dmat44_to_orientation
+*//*-------------------------------------------------------------------------*/
+void nifti_dmat44_to_quatern(nifti_dmat44 R ,
+                             double *qb, double *qc, double *qd,
+                             double *qx, double *qy, double *qz,
+                             double *dx, double *dy, double *dz, double *qfac )
+{
+   double r11,r12,r13 , r21,r22,r23 , r31,r32,r33 ;
+   double xd,yd,zd , a,b,c,d ;
+   nifti_dmat33 P,Q ;
+
+   /* offset outputs are read write out of input matrix  */
+
+   ASSIF(qx,R.m[0][3]) ; ASSIF(qy,R.m[1][3]) ; ASSIF(qz,R.m[2][3]) ;
+
+   /* load 3x3 matrix into local variables */
+
+   r11 = R.m[0][0] ; r12 = R.m[0][1] ; r13 = R.m[0][2] ;
+   r21 = R.m[1][0] ; r22 = R.m[1][1] ; r23 = R.m[1][2] ;
+   r31 = R.m[2][0] ; r32 = R.m[2][1] ; r33 = R.m[2][2] ;
+
+   /* compute lengths of each column; these determine grid spacings  */
+
+   xd = sqrt( r11*r11 + r21*r21 + r31*r31 ) ;
+   yd = sqrt( r12*r12 + r22*r22 + r32*r32 ) ;
+   zd = sqrt( r13*r13 + r23*r23 + r33*r33 ) ;
+
+   /* if a column length is zero, patch the trouble */
+
+   if( xd == 0.0l ){ r11 = 1.0l ; r21 = r31 = 0.0l ; xd = 1.0l ; }
+   if( yd == 0.0l ){ r22 = 1.0l ; r12 = r32 = 0.0l ; yd = 1.0l ; }
+   if( zd == 0.0l ){ r33 = 1.0l ; r13 = r23 = 0.0l ; zd = 1.0l ; }
+
+   /* assign the output lengths */
+
+   ASSIF(dx,xd) ; ASSIF(dy,yd) ; ASSIF(dz,zd) ;
+
+   /* normalize the columns */
+
+   r11 /= xd ; r21 /= xd ; r31 /= xd ;
+   r12 /= yd ; r22 /= yd ; r32 /= yd ;
+   r13 /= zd ; r23 /= zd ; r33 /= zd ;
+
+   /* At this point, the matrix has normal columns, but we have to allow
+      for the fact that the hideous user may not have given us a matrix
+      with orthogonal columns.
+
+      So, now find the orthogonal matrix closest to the current matrix.
+
+      One reason for using the polar decomposition to get this
+      orthogonal matrix, rather than just directly orthogonalizing
+      the columns, is so that inputting the inverse matrix to R
+      will result in the inverse orthogonal matrix at this point.
+      If we just orthogonalized the columns, this wouldn't necessarily hold. */
+
+   Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */
+   Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ;
+   Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ;
+
+   P = nifti_dmat33_polar(Q) ;  /* P is orthog matrix closest to Q */
+
+   r11 = P.m[0][0] ; r12 = P.m[0][1] ; r13 = P.m[0][2] ; /* unload */
+   r21 = P.m[1][0] ; r22 = P.m[1][1] ; r23 = P.m[1][2] ;
+   r31 = P.m[2][0] ; r32 = P.m[2][1] ; r33 = P.m[2][2] ;
+
+   /*                            [ r11 r12 r13 ]               */
+   /* at this point, the matrix  [ r21 r22 r23 ] is orthogonal */
+   /*                            [ r31 r32 r33 ]               */
+
+   /* compute the determinant to determine if it is proper */
+
+   zd = r11*r22*r33-r11*r32*r23-r21*r12*r33
+       +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;  /* should be -1 or 1 */
+
+   if( zd > 0 ){             /* proper */
+     ASSIF(qfac,1.0) ;
+   } else {                  /* improper ==> flip 3rd column */
+     ASSIF(qfac,-1.0) ;
+     r13 = -r13 ; r23 = -r23 ; r33 = -r33 ;
+   }
+
+   /* now, compute quaternion parameters */
+
+   a = r11 + r22 + r33 + 1.0l ;
+
+   if( a > 0.5l ){                /* simplest case */
+     a = 0.5l * sqrt(a) ;
+     b = 0.25l * (r32-r23) / a ;
+     c = 0.25l * (r13-r31) / a ;
+     d = 0.25l * (r21-r12) / a ;
+   } else {                       /* trickier case */
+     xd = 1.0 + r11 - (r22+r33) ;  /* 4*b*b */
+     yd = 1.0 + r22 - (r11+r33) ;  /* 4*c*c */
+     zd = 1.0 + r33 - (r11+r22) ;  /* 4*d*d */
+     if( xd > 1.0 ){
+       b = 0.5l * sqrt(xd) ;
+       c = 0.25l* (r12+r21) / b ;
+       d = 0.25l* (r13+r31) / b ;
+       a = 0.25l* (r32-r23) / b ;
+     } else if( yd > 1.0 ){
+       c = 0.5l * sqrt(yd) ;
+       b = 0.25l* (r12+r21) / c ;
+       d = 0.25l* (r23+r32) / c ;
+       a = 0.25l* (r13-r31) / c ;
+     } else {
+       d = 0.5l * sqrt(zd) ;
+       b = 0.25l* (r13+r31) / d ;
+       c = 0.25l* (r23+r32) / d ;
+       a = 0.25l* (r21-r12) / d ;
+     }
+     /* to be mathematically consistent, this would include a = -a */
+     if( a < 0.0l ){ b=-b ; c=-c ; d=-d; }
+   }
+
+   ASSIF(qb,b) ; ASSIF(qc,c) ; ASSIF(qd,d) ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Given the 3x4 upper corner of the matrix R, compute the quaternion
+   parameters that fit it.
+
+     - Any NULL pointer on input won't get assigned (e.g., if you don't want
+       dx,dy,dz, just pass NULL in for those pointers).
+     - If the 3 input matrix columns are NOT orthogonal, they will be
+       orthogonalized prior to calculating the parameters, using
+       the polar decomposition to find the orthogonal matrix closest
+       to the column-normalized input matrix.
+     - However, if the 3 input matrix columns are NOT orthogonal, then
+       the matrix produced by nifti_quatern_to_mat44 WILL have orthogonal
+       columns, so it won't be the same as the matrix input here.
+       This "feature" is because the NIFTI 'qform' transform is
+       deliberately not fully general -- it is intended to model a volume
+       with perpendicular axes.
+     - If the 3 input matrix columns are not even linearly independent,
+       you'll just have to take your luck, won't you?
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+
+   \see nifti_quatern_to_mat44, nifti_make_orthog_mat44,
+       nifti_mat44_to_orientation
+*//*-------------------------------------------------------------------------*/
+void nifti_mat44_to_quatern( mat44 R ,
+                             float *qb, float *qc, float *qd,
+                             float *qx, float *qy, float *qz,
+                             float *dx, float *dy, float *dz, float *qfac )
+{
+   double r11,r12,r13 , r21,r22,r23 , r31,r32,r33 ;
+   double xd,yd,zd , a,b,c,d ;
+   mat33 P,Q ;
+
+   /* offset outputs are read write out of input matrix  */
+
+   ASSIF(qx,R.m[0][3]) ; ASSIF(qy,R.m[1][3]) ; ASSIF(qz,R.m[2][3]) ;
+
+   /* load 3x3 matrix into local variables */
+
+   r11 = R.m[0][0] ; r12 = R.m[0][1] ; r13 = R.m[0][2] ;
+   r21 = R.m[1][0] ; r22 = R.m[1][1] ; r23 = R.m[1][2] ;
+   r31 = R.m[2][0] ; r32 = R.m[2][1] ; r33 = R.m[2][2] ;
+
+   /* compute lengths of each column; these determine grid spacings  */
+
+   xd = sqrt( r11*r11 + r21*r21 + r31*r31 ) ;
+   yd = sqrt( r12*r12 + r22*r22 + r32*r32 ) ;
+   zd = sqrt( r13*r13 + r23*r23 + r33*r33 ) ;
+
+   /* if a column length is zero, patch the trouble */
+
+   if( xd == 0.0l ){ r11 = 1.0l ; r21 = r31 = 0.0l ; xd = 1.0l ; }
+   if( yd == 0.0l ){ r22 = 1.0l ; r12 = r32 = 0.0l ; yd = 1.0l ; }
+   if( zd == 0.0l ){ r33 = 1.0l ; r13 = r23 = 0.0l ; zd = 1.0l ; }
+
+   /* assign the output lengths */
+
+   ASSIF(dx,(float)xd) ; ASSIF(dy,(float)yd) ; ASSIF(dz,(float)zd) ;
+
+   /* normalize the columns */
+
+   r11 /= xd ; r21 /= xd ; r31 /= xd ;
+   r12 /= yd ; r22 /= yd ; r32 /= yd ;
+   r13 /= zd ; r23 /= zd ; r33 /= zd ;
+
+   /* At this point, the matrix has normal columns, but we have to allow
+      for the fact that the hideous user may not have given us a matrix
+      with orthogonal columns.
+
+      So, now find the orthogonal matrix closest to the current matrix.
+
+      One reason for using the polar decomposition to get this
+      orthogonal matrix, rather than just directly orthogonalizing
+      the columns, is so that inputting the inverse matrix to R
+      will result in the inverse orthogonal matrix at this point.
+      If we just orthogonalized the columns, this wouldn't necessarily hold. */
+
+   Q.m[0][0] = (float)r11 ; Q.m[0][1] = (float)r12 ; Q.m[0][2] = (float)r13 ; /* load Q */
+   Q.m[1][0] = (float)r21 ; Q.m[1][1] = (float)r22 ; Q.m[1][2] = (float)r23 ;
+   Q.m[2][0] = (float)r31 ; Q.m[2][1] = (float)r32 ; Q.m[2][2] = (float)r33 ;
+
+   P = nifti_mat33_polar(Q) ;  /* P is orthog matrix closest to Q */
+
+   r11 = P.m[0][0] ; r12 = P.m[0][1] ; r13 = P.m[0][2] ; /* unload */
+   r21 = P.m[1][0] ; r22 = P.m[1][1] ; r23 = P.m[1][2] ;
+   r31 = P.m[2][0] ; r32 = P.m[2][1] ; r33 = P.m[2][2] ;
+
+   /*                            [ r11 r12 r13 ]               */
+   /* at this point, the matrix  [ r21 r22 r23 ] is orthogonal */
+   /*                            [ r31 r32 r33 ]               */
+
+   /* compute the determinant to determine if it is proper */
+
+   zd = r11*r22*r33-r11*r32*r23-r21*r12*r33
+       +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;  /* should be -1 or 1 */
+
+   if( zd > 0 ){             /* proper */
+     ASSIF(qfac,1.0f) ;
+   } else {                  /* improper ==> flip 3rd column */
+     ASSIF(qfac,-1.0f) ;
+     r13 = -r13 ; r23 = -r23 ; r33 = -r33 ;
+   }
+
+   /* now, compute quaternion parameters */
+
+   a = r11 + r22 + r33 + 1.0l ;
+
+   if( a > 0.5l ){                /* simplest case */
+     a = 0.5l * sqrt(a) ;
+     b = 0.25l * (r32-r23) / a ;
+     c = 0.25l * (r13-r31) / a ;
+     d = 0.25l * (r21-r12) / a ;
+   } else {                       /* trickier case */
+     xd = 1.0 + r11 - (r22+r33) ;  /* 4*b*b */
+     yd = 1.0 + r22 - (r11+r33) ;  /* 4*c*c */
+     zd = 1.0 + r33 - (r11+r22) ;  /* 4*d*d */
+     if( xd > 1.0 ){
+       b = 0.5l * sqrt(xd) ;
+       c = 0.25l* (r12+r21) / b ;
+       d = 0.25l* (r13+r31) / b ;
+       a = 0.25l* (r32-r23) / b ;
+     } else if( yd > 1.0 ){
+       c = 0.5l * sqrt(yd) ;
+       b = 0.25l* (r12+r21) / c ;
+       d = 0.25l* (r23+r32) / c ;
+       a = 0.25l* (r13-r31) / c ;
+     } else {
+       d = 0.5l * sqrt(zd) ;
+       b = 0.25l* (r13+r31) / d ;
+       c = 0.25l* (r23+r32) / d ;
+       a = 0.25l* (r21-r12) / d ;
+     }
+     /* to be mathematically consistent, this would include a = -a */
+     if( a < 0.0l ){ b=-b ; c=-c ; d=-d; }
+   }
+
+   ASSIF(qb,(float)b) ; ASSIF(qc,(float)c) ; ASSIF(qd,(float)d) ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Compute the inverse of a bordered 4x4 matrix.
+
+     <pre>
+   - Some numerical code fragments were generated by Maple 8.
+   - If a singular matrix is input, the output matrix will be all zero.
+   - You can check for this by examining the [3][3] element, which will
+     be 1.0 for the normal case and 0.0 for the bad case.
+
+     The input matrix should have the form:
+        [ r11 r12 r13 v1 ]
+        [ r21 r22 r23 v2 ]
+        [ r31 r32 r33 v3 ]
+        [  0   0   0   1 ]
+     </pre>
+*//*-------------------------------------------------------------------------*/
+nifti_dmat44 nifti_dmat44_inverse( nifti_dmat44 R )
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33,v1,v2,v3 , deti ;
+   nifti_dmat44 Q ;
+                                                       /*  INPUT MATRIX IS:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 v1 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 v2 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 v3 ] */
+   v1  = R.m[0][3]; v2  = R.m[1][3]; v3  = R.m[2][3];  /* [  0   0   0   1 ] */
+
+   deti = r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+
+   if( deti != 0.0l ) deti = 1.0l / deti ;
+
+   Q.m[0][0] = deti*( r22*r33-r32*r23);
+   Q.m[0][1] = deti*(-r12*r33+r32*r13);
+   Q.m[0][2] = deti*( r12*r23-r22*r13);
+   Q.m[0][3] = deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3
+                     -r22*v1*r33-r32*r13*v2+r32*v1*r23);
+
+   Q.m[1][0] = deti*(-r21*r33+r31*r23);
+   Q.m[1][1] = deti*( r11*r33-r31*r13);
+   Q.m[1][2] = deti*(-r11*r23+r21*r13);
+   Q.m[1][3] = deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3
+                     +r21*v1*r33+r31*r13*v2-r31*v1*r23);
+
+   Q.m[2][0] = deti*( r21*r32-r31*r22);
+   Q.m[2][1] = deti*(-r11*r32+r31*r12);
+   Q.m[2][2] = deti*( r11*r22-r21*r12);
+   Q.m[2][3] = deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3
+                     -r21*r32*v1-r31*r12*v2+r31*r22*v1);
+
+   Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ;
+   Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */
+
+   return Q ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Compute the inverse of a bordered 4x4 matrix.
+
+     <pre>
+   - Some numerical code fragments were generated by Maple 8.
+   - If a singular matrix is input, the output matrix will be all zero.
+   - You can check for this by examining the [3][3] element, which will
+     be 1.0 for the normal case and 0.0 for the bad case.
+
+     The input matrix should have the form:
+        [ r11 r12 r13 v1 ]
+        [ r21 r22 r23 v2 ]
+        [ r31 r32 r33 v3 ]
+        [  0   0   0   1 ]
+     </pre>
+*//*-------------------------------------------------------------------------*/
+mat44 nifti_mat44_inverse( mat44 R )
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33,v1,v2,v3 , deti ;
+   mat44 Q ;
+                                                       /*  INPUT MATRIX IS:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 v1 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 v2 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 v3 ] */
+   v1  = R.m[0][3]; v2  = R.m[1][3]; v3  = R.m[2][3];  /* [  0   0   0   1 ] */
+
+   deti = r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+
+   if( deti != 0.0l ) deti = 1.0l / deti ;
+
+   Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ;
+   Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ;
+   Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ;
+   Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3
+                     -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ;
+
+   Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ;
+   Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ;
+   Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ;
+   Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3
+                     +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ;
+
+   Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ;
+   Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ;
+   Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ;
+   Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3
+                     -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ;
+
+   Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ;
+   Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */
+
+   return Q ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Input 9 floats and make an orthgonal nifti_dmat44 out of them.
+
+   Each row is normalized, then nifti_mat33_polar() is used to orthogonalize
+   them.  If row #3 (r31,r32,r33) is input as zero, then it will be taken to
+   be the cross product of rows #1 and #2.
+
+   This function can be used to create a rotation matrix for transforming
+   an oblique volume to anatomical coordinates.  For this application:
+    - row #1 (r11,r12,r13) is the direction vector along the image i-axis
+    - row #2 (r21,r22,r23) is the direction vector along the image j-axis
+    - row #3 (r31,r32,r33) is the direction vector along the slice direction
+      (if available; otherwise enter it as 0's)
+
+   The first 2 rows can be taken from the DICOM attribute (0020,0037)
+   "Image Orientation (Patient)".
+
+   After forming the rotation matrix, the complete affine transformation from
+   (i,j,k) grid indexes to (x,y,z) spatial coordinates can be computed by
+   multiplying each column by the appropriate grid spacing:
+    - column #1 (R.m[0][0],R.m[1][0],R.m[2][0]) by delta-x
+    - column #2 (R.m[0][1],R.m[1][1],R.m[2][1]) by delta-y
+    - column #3 (R.m[0][2],R.m[1][2],R.m[2][2]) by delta-z
+
+   and by then placing the center (x,y,z) coordinates of voxel (0,0,0) into
+   the column #4 (R.m[0][3],R.m[1][3],R.m[2][3]).
+
+   \sa nifti_quatern_to_dmat44, nifti_dmat44_to_quatern,
+       nifti_dmat44_to_orientation
+*//*-------------------------------------------------------------------------*/
+nifti_dmat44 nifti_make_orthog_dmat44( double r11, double r12, double r13 ,
+                               double r21, double r22, double r23 ,
+                               double r31, double r32, double r33  )
+{
+   nifti_dmat44 R ;
+   nifti_dmat33 Q , P ;
+   double val ;
+
+   R.m[3][0] = R.m[3][1] = R.m[3][2] = 0.0l ; R.m[3][3] = 1.0l ;
+
+   Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */
+   Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ;
+   Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ;
+
+   /* normalize row 1 */
+
+   val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[0][0] *= val ; Q.m[0][1] *= val ; Q.m[0][2] *= val ;
+   } else {
+     Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ;
+   }
+
+   /* normalize row 2 */
+
+   val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[1][0] *= val ; Q.m[1][1] *= val ; Q.m[1][2] *= val ;
+   } else {
+     Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ;
+   }
+
+   /* normalize row 3 */
+
+   val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[2][0] *= val ; Q.m[2][1] *= val ; Q.m[2][2] *= val ;
+   } else {
+     Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ;  /* cross */
+     Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ;  /* product */
+     Q.m[2][2] = Q.m[0][0]*Q.m[1][1] - Q.m[0][1]*Q.m[1][0] ;
+   }
+
+   P = nifti_dmat33_polar(Q) ;  /* P is orthog matrix closest to Q */
+
+   R.m[0][0] = P.m[0][0] ; R.m[0][1] = P.m[0][1] ; R.m[0][2] = P.m[0][2] ;
+   R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ;
+   R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ;
+
+   R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Input 9 floats and make an orthgonal mat44 out of them.
+
+   Each row is normalized, then nifti_mat33_polar() is used to orthogonalize
+   them.  If row #3 (r31,r32,r33) is input as zero, then it will be taken to
+   be the cross product of rows #1 and #2.
+
+   This function can be used to create a rotation matrix for transforming
+   an oblique volume to anatomical coordinates.  For this application:
+    - row #1 (r11,r12,r13) is the direction vector along the image i-axis
+    - row #2 (r21,r22,r23) is the direction vector along the image j-axis
+    - row #3 (r31,r32,r33) is the direction vector along the slice direction
+      (if available; otherwise enter it as 0's)
+
+   The first 2 rows can be taken from the DICOM attribute (0020,0037)
+   "Image Orientation (Patient)".
+
+   After forming the rotation matrix, the complete affine transformation from
+   (i,j,k) grid indexes to (x,y,z) spatial coordinates can be computed by
+   multiplying each column by the appropriate grid spacing:
+    - column #1 (R.m[0][0],R.m[1][0],R.m[2][0]) by delta-x
+    - column #2 (R.m[0][1],R.m[1][1],R.m[2][1]) by delta-y
+    - column #3 (R.m[0][2],R.m[1][2],R.m[2][2]) by delta-z
+
+   and by then placing the center (x,y,z) coordinates of voxel (0,0,0) into
+   the column #4 (R.m[0][3],R.m[1][3],R.m[2][3]).
+
+   \sa nifti_quatern_to_mat44, nifti_mat44_to_quatern,
+       nifti_mat44_to_orientation
+*//*-------------------------------------------------------------------------*/
+mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
+                               float r21, float r22, float r23 ,
+                               float r31, float r32, float r33  )
+{
+   mat44 R ;
+   mat33 Q , P ;
+   double val ;
+
+   R.m[3][0] = R.m[3][1] = R.m[3][2] = 0.0l ; R.m[3][3] = 1.0l ;
+
+   Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */
+   Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ;
+   Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ;
+
+   /* normalize row 1 */
+
+   val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[0][0] *= (float)val ; Q.m[0][1] *= (float)val ; Q.m[0][2] *= (float)val ;
+   } else {
+     Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ;
+   }
+
+   /* normalize row 2 */
+
+   val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[1][0] *= (float)val ; Q.m[1][1] *= (float)val ; Q.m[1][2] *= (float)val ;
+   } else {
+     Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ;
+   }
+
+   /* normalize row 3 */
+
+   val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ;
+   if( val > 0.0l ){
+     val = 1.0l / sqrt(val) ;
+     Q.m[2][0] *= (float)val ; Q.m[2][1] *= (float)val ; Q.m[2][2] *= (float)val ;
+   } else {
+     Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ;  /* cross */
+     Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ;  /* product */
+     Q.m[2][2] = Q.m[0][0]*Q.m[1][1] - Q.m[0][1]*Q.m[1][0] ;
+   }
+
+   P = nifti_mat33_polar(Q) ;  /* P is orthog matrix closest to Q */
+
+   R.m[0][0] = P.m[0][0] ; R.m[0][1] = P.m[0][1] ; R.m[0][2] = P.m[0][2] ;
+   R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ;
+   R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ;
+
+   R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the inverse of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+nifti_dmat33 nifti_dmat33_inverse( nifti_dmat33 R ) /* inverse of 3x3 matrix */
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33 , deti ;
+   nifti_dmat33 Q ;
+                                                       /*  INPUT MATRIX:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 ] */
+
+   deti = r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+
+   if( deti != 0.0l ) deti = 1.0l / deti ;
+
+   Q.m[0][0] = deti*( r22*r33-r32*r23);
+   Q.m[0][1] = deti*(-r12*r33+r32*r13);
+   Q.m[0][2] = deti*( r12*r23-r22*r13);
+
+   Q.m[1][0] = deti*(-r21*r33+r31*r23);
+   Q.m[1][1] = deti*( r11*r33-r31*r13);
+   Q.m[1][2] = deti*(-r11*r23+r21*r13);
+
+   Q.m[2][0] = deti*( r21*r32-r31*r22);
+   Q.m[2][1] = deti*(-r11*r32+r31*r12);
+   Q.m[2][2] = deti*( r11*r22-r21*r12);
+
+   return Q ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the inverse of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+mat33 nifti_mat33_inverse( mat33 R )   /* inverse of 3x3 matrix */
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33 , deti ;
+   mat33 Q ;
+                                                       /*  INPUT MATRIX:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 ] */
+
+   deti = r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+
+   if( deti != 0.0l ) deti = 1.0l / deti ;
+
+   Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ;
+   Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ;
+   Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ;
+
+   Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ;
+   Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ;
+   Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ;
+
+   Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ;
+   Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ;
+   Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ;
+
+   return Q ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the determinant of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+double nifti_dmat33_determ( nifti_dmat33 R )   /* determinant of 3x3 matrix */
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33 ;
+                                                       /*  INPUT MATRIX:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 ] */
+
+   return (r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13) ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the determinant of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+float nifti_mat33_determ( mat33 R )   /* determinant of 3x3 matrix */
+{
+   double r11,r12,r13,r21,r22,r23,r31,r32,r33 ;
+                                                       /*  INPUT MATRIX:  */
+   r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2];  /* [ r11 r12 r13 ] */
+   r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2];  /* [ r21 r22 r23 ] */
+   r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2];  /* [ r31 r32 r33 ] */
+
+   return (float)(r11*r22*r33-r11*r32*r23-r21*r12*r33
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13) ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the max row norm of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+double nifti_dmat33_rownorm( nifti_dmat33 A )  /* max row norm of 3x3 matrix */
+{
+   double r1,r2,r3 ;
+
+   r1 = fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]);
+   r2 = fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]);
+   r3 = fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]);
+   if( r1 < r2 ) r1 = r2 ;
+   if( r1 < r3 ) r1 = r3 ;
+   return r1 ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the max row norm of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+float nifti_mat33_rownorm( mat33 A )  /* max row norm of 3x3 matrix */
+{
+   float r1,r2,r3 ;
+
+   r1 = (float)( fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ) ;
+   r2 = (float)( fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ) ;
+   r3 = (float)( fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ) ;
+   if( r1 < r2 ) r1 = r2 ;
+   if( r1 < r3 ) r1 = r3 ;
+   return r1 ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the max column norm of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+double nifti_dmat33_colnorm( nifti_dmat33 A )/* max column norm of 3x3 matrix */
+{
+   double r1,r2,r3 ;
+
+   r1 = fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]);
+   r2 = fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]);
+   r3 = fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]);
+   if( r1 < r2 ) r1 = r2 ;
+   if( r1 < r3 ) r1 = r3 ;
+   return r1 ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! compute the max column norm of a 3x3 matrix
+*//*--------------------------------------------------------------------*/
+float nifti_mat33_colnorm( mat33 A )  /* max column norm of 3x3 matrix */
+{
+   float r1,r2,r3 ;
+
+   r1 = (float)( fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ) ;
+   r2 = (float)( fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ) ;
+   r3 = (float)( fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ) ;
+   if( r1 < r2 ) r1 = r2 ;
+   if( r1 < r3 ) r1 = r3 ;
+   return r1 ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! multiply 2 3x3 matrices
+*//*--------------------------------------------------------------------*/
+nifti_dmat33 nifti_dmat33_mul( nifti_dmat33 A , nifti_dmat33 B )
+/* multiply 2 3x3 matrices */
+{
+   nifti_dmat33 C ; int i,j ;
+   for( i=0 ; i < 3 ; i++ )
+    for( j=0 ; j < 3 ; j++ )
+      C.m[i][j] =  A.m[i][0] * B.m[0][j]
+                 + A.m[i][1] * B.m[1][j]
+                 + A.m[i][2] * B.m[2][j] ;
+   return C ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! multiply 2 3x3 matrices
+*//*--------------------------------------------------------------------*/
+mat33 nifti_mat33_mul( mat33 A , mat33 B )  /* multiply 2 3x3 matrices */
+{
+   mat33 C ; int i,j ;
+   for( i=0 ; i < 3 ; i++ )
+    for( j=0 ; j < 3 ; j++ )
+      C.m[i][j] =  A.m[i][0] * B.m[0][j]
+                 + A.m[i][1] * B.m[1][j]
+                 + A.m[i][2] * B.m[2][j] ;
+   return C ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! multiply 2 4x4 matrices
+*//*--------------------------------------------------------------------*/
+nifti_dmat44 nifti_dmat44_mul( nifti_dmat44 A , nifti_dmat44 B )
+{
+   nifti_dmat44 C ; int i,j,k ;
+   for( i=0 ; i < 4 ; i++ )
+      for( j=0 ; j < 4 ; j++ ) {
+         C.m[i][j] = 0.0;
+         for( k=0; k < 4; k++ )
+            C.m[i][j] += A.m[i][k] * B.m[k][j];
+      }
+   return C ;
+}
+
+/*----------------------------------------------------------------------*/
+/*! multiply 2 4x4 matrices
+*//*--------------------------------------------------------------------*/
+mat44 nifti_mat44_mul( mat44 A , mat44 B )
+{
+   mat44 C ; int i,j,k ;
+   for( i=0 ; i < 4 ; i++ )
+      for( j=0 ; j < 4 ; j++ ) {
+         C.m[i][j] = 0.0;
+         for( k=0; k < 4; k++ )
+            C.m[i][j] += A.m[i][k] * B.m[k][j];
+      }
+   return C ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! polar decomposition of a 3x3 matrix
+
+   This finds the closest orthogonal matrix to input A
+   (in both the Frobenius and L2 norms).
+
+   Algorithm is that from NJ Higham, SIAM J Sci Stat Comput, 7:1160-1174.
+*//*-------------------------------------------------------------------------*/
+nifti_dmat33 nifti_dmat33_polar( nifti_dmat33 A )
+{
+   nifti_dmat33 X , Y , Z ;
+   double alp,bet,gam,gmi , dif=1.0 ;
+   int k=0 ;
+
+   X = A ;
+
+   /* force matrix to be nonsingular */
+
+   gam = nifti_dmat33_determ(X) ;
+   while( gam == 0.0 ){        /* perturb matrix */
+     gam = 0.00001 * ( 0.001 + nifti_dmat33_rownorm(X) );
+     X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ;
+     gam = nifti_dmat33_determ(X) ;
+   }
+
+   while(1){
+     Y = nifti_dmat33_inverse(X) ;
+     if( dif > 0.3 ){     /* far from convergence */
+       alp = sqrt( nifti_dmat33_rownorm(X) * nifti_dmat33_colnorm(X) );
+       bet = sqrt( nifti_dmat33_rownorm(Y) * nifti_dmat33_colnorm(Y) );
+       gam = sqrt( bet / alp );
+       gmi = 1.0 / gam;
+     } else {
+       gam = gmi = 1.0f ;  /* close to convergence */
+     }
+     Z.m[0][0] = 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] );
+     Z.m[0][1] = 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] );
+     Z.m[0][2] = 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] );
+     Z.m[1][0] = 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] );
+     Z.m[1][1] = 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] );
+     Z.m[1][2] = 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] );
+     Z.m[2][0] = 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] );
+     Z.m[2][1] = 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] );
+     Z.m[2][2] = 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] );
+
+     dif = fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1])
+          +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0])
+          +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2])
+          +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1])
+          +fabs(Z.m[2][2]-X.m[2][2]);
+
+     k = k+1 ;
+     if( k > 100 || dif < 3.e-6 ) break ;  /* convergence or exhaustion */
+     X = Z ;
+   }
+
+   return Z ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! polar decomposition of a 3x3 matrix
+
+   This finds the closest orthogonal matrix to input A
+   (in both the Frobenius and L2 norms).
+
+   Algorithm is that from NJ Higham, SIAM J Sci Stat Comput, 7:1160-1174.
+*//*-------------------------------------------------------------------------*/
+mat33 nifti_mat33_polar( mat33 A )
+{
+   mat33 X , Y , Z ;
+   float alp,bet,gam,gmi , dif=1.0f ;
+   int k=0 ;
+
+   X = A ;
+
+   /* force matrix to be nonsingular */
+
+   gam = nifti_mat33_determ(X) ;
+   while( gam == 0.0 ){        /* perturb matrix */
+     gam = (float)( 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ) ;
+     X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ;
+     gam = nifti_mat33_determ(X) ;
+   }
+
+   while(1){
+     Y = nifti_mat33_inverse(X) ;
+     if( dif > 0.3 ){     /* far from convergence */
+       alp = (float)( sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ) ;
+       bet = (float)( sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ) ;
+       gam = (float)( sqrt( bet / alp ) ) ;
+       gmi = (float)( 1.0 / gam ) ;
+     } else {
+       gam = gmi = 1.0f ;  /* close to convergence */
+     }
+     Z.m[0][0] = (float)( 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ) ;
+     Z.m[0][1] = (float)( 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ) ;
+     Z.m[0][2] = (float)( 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ) ;
+     Z.m[1][0] = (float)( 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ) ;
+     Z.m[1][1] = (float)( 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ) ;
+     Z.m[1][2] = (float)( 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ) ;
+     Z.m[2][0] = (float)( 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ) ;
+     Z.m[2][1] = (float)( 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ) ;
+     Z.m[2][2] = (float)( 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ) ;
+
+     dif = (float)( fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1])
+          +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0])
+          +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2])
+          +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1])
+          +fabs(Z.m[2][2]-X.m[2][2])                          );
+
+     k = k+1 ;
+     if( k > 100 || dif < 3.e-6 ) break ;  /* convergence or exhaustion */
+     X = Z ;
+   }
+
+   return Z ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+
+   <pre>
+   Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
+           where +x=Right, +y=Anterior, +z=Superior.
+           (Only the upper-left 3x3 corner of R is used herein.)
+   Output: 3 orientation codes that correspond to the closest "standard"
+           anatomical orientation of the (i,j,k) axes.
+   Method: Find which permutation of (x,y,z) has the smallest angle to the
+           (i,j,k) axes directions, which are the columns of the R matrix.
+   Errors: The codes returned will be zero.
+
+   For example, an axial volume might get return values of
+     *icod = NIFTI_R2L   (i axis is mostly Right to Left)
+     *jcod = NIFTI_P2A   (j axis is mostly Posterior to Anterior)
+     *kcod = NIFTI_I2S   (k axis is mostly Inferior to Superior)
+   </pre>
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+
+   \see nifti_quatern_to_mat44, nifti_mat44_to_quatern,
+        nifti_make_orthog_mat44
+*//*-------------------------------------------------------------------------*/
+void nifti_dmat44_to_orientation( nifti_dmat44 R ,
+                                  int *icod, int *jcod, int *kcod )
+{
+   double xi,xj,xk , yi,yj,yk , zi,zj,zk , val,detQ,detP ;
+   nifti_dmat33 P , Q , M ;
+   int i,j,k=0,p,q,r , ibest,jbest,kbest,pbest,qbest,rbest ;
+   double vbest ;
+
+   if( icod == NULL || jcod == NULL || kcod == NULL ) return ; /* bad */
+
+   *icod = *jcod = *kcod = 0 ; /* error returns, if sh*t happens */
+
+   /* load column vectors for each (i,j,k) direction from matrix */
+
+   /*-- i axis --*/ /*-- j axis --*/ /*-- k axis --*/
+
+   xi = R.m[0][0] ; xj = R.m[0][1] ; xk = R.m[0][2] ;
+   yi = R.m[1][0] ; yj = R.m[1][1] ; yk = R.m[1][2] ;
+   zi = R.m[2][0] ; zj = R.m[2][1] ; zk = R.m[2][2] ;
+
+   /* normalize column vectors to get unit vectors along each ijk-axis */
+
+   /* normalize i axis */
+
+   val = sqrt( xi*xi + yi*yi + zi*zi ) ;
+   if( val == 0.0 ) return ;                 /* stupid input */
+   xi /= val ; yi /= val ; zi /= val ;
+
+   /* normalize j axis */
+
+   val = sqrt( xj*xj + yj*yj + zj*zj ) ;
+   if( val == 0.0 ) return ;                 /* stupid input */
+   xj /= val ; yj /= val ; zj /= val ;
+
+   /* orthogonalize j axis to i axis, if needed */
+
+   val = xi*xj + yi*yj + zi*zj ;    /* dot product between i and j */
+   if( fabs(val) > 1.e-4 ){
+     xj -= val*xi ; yj -= val*yi ; zj -= val*zi ;
+     val = sqrt( xj*xj + yj*yj + zj*zj ) ;  /* must renormalize */
+     if( val == 0.0 ) return ;              /* j was parallel to i? */
+     xj /= val ; yj /= val ; zj /= val ;
+   }
+
+   /* normalize k axis; if it is zero, make it the cross product i x j */
+
+   val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+   if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; }
+   else            { xk /= val ; yk /= val ; zk /= val ; }
+
+   /* orthogonalize k to i */
+
+   val = xi*xk + yi*yk + zi*zk ;    /* dot product between i and k */
+   if( fabs(val) > 1.e-4 ){
+     xk -= val*xi ; yk -= val*yi ; zk -= val*zi ;
+     val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+     if( val == 0.0 ) return ;      /* bad */
+     xk /= val ; yk /= val ; zk /= val ;
+   }
+
+   /* orthogonalize k to j */
+
+   val = xj*xk + yj*yk + zj*zk ;    /* dot product between j and k */
+   if( fabs(val) > 1.e-4 ){
+     xk -= val*xj ; yk -= val*yj ; zk -= val*zj ;
+     val = sqrt( xk*xk + yk*yk + zk*zk ) ;
+     if( val == 0.0 ) return ;      /* bad */
+     xk /= val ; yk /= val ; zk /= val ;
+   }
+
+   Q.m[0][0] = xi ; Q.m[0][1] = xj ; Q.m[0][2] = xk ;
+   Q.m[1][0] = yi ; Q.m[1][1] = yj ; Q.m[1][2] = yk ;
+   Q.m[2][0] = zi ; Q.m[2][1] = zj ; Q.m[2][2] = zk ;
+
+   /* at this point, Q is the rotation matrix from (i,j,k) to (x,y,z) axes */
+
+   detQ = nifti_dmat33_determ( Q ) ;
+   if( detQ == 0.0 ) return ; /* shouldn't happen unless user is a DUFIS */
+
+   /* Build and test all possible +1/-1 coordinate permutation matrices P;
+      then find the P such that the rotation matrix M=PQ is closest to the
+      identity, in the sense of M having the smallest total rotation angle. */
+
+   /* Despite the formidable looking 6 nested loops, there are
+      only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */
+
+   vbest = -666.0 ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ;
+   for( i=1 ; i <= 3 ; i++ ){     /* i = column number to use for row #1 */
+    for( j=1 ; j <= 3 ; j++ ){    /* j = column number to use for row #2 */
+     if( i == j ) continue ;
+      for( k=1 ; k <= 3 ; k++ ){  /* k = column number to use for row #3 */
+       if( i == k || j == k ) continue ;
+       P.m[0][0] = P.m[0][1] = P.m[0][2] =
+        P.m[1][0] = P.m[1][1] = P.m[1][2] =
+         P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0 ;
+       for( p=-1 ; p <= 1 ; p+=2 ){    /* p,q,r are -1 or +1      */
+        for( q=-1 ; q <= 1 ; q+=2 ){   /* and go into rows #1,2,3 */
+         for( r=-1 ; r <= 1 ; r+=2 ){
+           P.m[0][i-1] = p ; P.m[1][j-1] = q ; P.m[2][k-1] = r ;
+           detP = nifti_dmat33_determ(P) ;          /* sign of permutation */
+           if( detP * detQ <= 0.0 ) continue ;  /* doesn't match sign of Q */
+           M = nifti_dmat33_mul(P,Q) ;
+
+           /* angle of M rotation = 2.0*acos(0.5*sqrt(1.0+trace(M)))       */
+           /* we want largest trace(M) == smallest angle == M nearest to I */
+
+           val = M.m[0][0] + M.m[1][1] + M.m[2][2] ; /* trace */
+           if( val > vbest ){
+             vbest = val ;
+             ibest = i ; jbest = j ; kbest = k ;
+             pbest = p ; qbest = q ; rbest = r ;
+           }
+   }}}}}}
+
+   /* At this point ibest is 1 or 2 or 3; pbest is -1 or +1; etc.
+
+      The matrix P that corresponds is the best permutation approximation
+      to Q-inverse; that is, P (approximately) takes (x,y,z) coordinates
+      to the (i,j,k) axes.
+
+      For example, the first row of P (which contains pbest in column ibest)
+      determines the way the i axis points relative to the anatomical
+      (x,y,z) axes.  If ibest is 2, then the i axis is along the y axis,
+      which is direction P2A (if pbest > 0) or A2P (if pbest < 0).
+
+      So, using ibest and pbest, we can assign the output code for
+      the i axis.  Mutatis mutandis for the j and k axes, of course. */
+
+   switch( ibest*pbest ){
+     case  1: i = NIFTI_L2R ; break ;
+     case -1: i = NIFTI_R2L ; break ;
+     case  2: i = NIFTI_P2A ; break ;
+     case -2: i = NIFTI_A2P ; break ;
+     case  3: i = NIFTI_I2S ; break ;
+     case -3: i = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   switch( jbest*qbest ){
+     case  1: j = NIFTI_L2R ; break ;
+     case -1: j = NIFTI_R2L ; break ;
+     case  2: j = NIFTI_P2A ; break ;
+     case -2: j = NIFTI_A2P ; break ;
+     case  3: j = NIFTI_I2S ; break ;
+     case -3: j = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   switch( kbest*rbest ){
+     case  1: k = NIFTI_L2R ; break ;
+     case -1: k = NIFTI_R2L ; break ;
+     case  2: k = NIFTI_P2A ; break ;
+     case -2: k = NIFTI_A2P ; break ;
+     case  3: k = NIFTI_I2S ; break ;
+     case -3: k = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   *icod = i ; *jcod = j ; *kcod = k ; }
+
+/*---------------------------------------------------------------------------*/
+/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+
+   <pre>
+   Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
+           where +x=Right, +y=Anterior, +z=Superior.
+           (Only the upper-left 3x3 corner of R is used herein.)
+   Output: 3 orientation codes that correspond to the closest "standard"
+           anatomical orientation of the (i,j,k) axes.
+   Method: Find which permutation of (x,y,z) has the smallest angle to the
+           (i,j,k) axes directions, which are the columns of the R matrix.
+   Errors: The codes returned will be zero.
+
+   For example, an axial volume might get return values of
+     *icod = NIFTI_R2L   (i axis is mostly Right to Left)
+     *jcod = NIFTI_P2A   (j axis is mostly Posterior to Anterior)
+     *kcod = NIFTI_I2S   (k axis is mostly Inferior to Superior)
+   </pre>
+
+   \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h
+
+   \see nifti_quatern_to_mat44, nifti_mat44_to_quatern,
+        nifti_make_orthog_mat44
+*//*-------------------------------------------------------------------------*/
+void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
+{
+   float xi,xj,xk , yi,yj,yk , zi,zj,zk , val,detQ,detP ;
+   mat33 P , Q , M ;
+   int i,j,k=0,p,q,r , ibest,jbest,kbest,pbest,qbest,rbest ;
+   float vbest ;
+
+   if( icod == NULL || jcod == NULL || kcod == NULL ) return ; /* bad */
+
+   *icod = *jcod = *kcod = 0 ; /* error returns, if sh*t happens */
+
+   /* load column vectors for each (i,j,k) direction from matrix */
+
+   /*-- i axis --*/ /*-- j axis --*/ /*-- k axis --*/
+
+   xi = R.m[0][0] ; xj = R.m[0][1] ; xk = R.m[0][2] ;
+   yi = R.m[1][0] ; yj = R.m[1][1] ; yk = R.m[1][2] ;
+   zi = R.m[2][0] ; zj = R.m[2][1] ; zk = R.m[2][2] ;
+
+   /* normalize column vectors to get unit vectors along each ijk-axis */
+
+   /* normalize i axis */
+
+   val = (float)sqrt( xi*xi + yi*yi + zi*zi ) ;
+   if( val == 0.0 ) return ;                 /* stupid input */
+   xi /= val ; yi /= val ; zi /= val ;
+
+   /* normalize j axis */
+
+   val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ;
+   if( val == 0.0 ) return ;                 /* stupid input */
+   xj /= val ; yj /= val ; zj /= val ;
+
+   /* orthogonalize j axis to i axis, if needed */
+
+   val = xi*xj + yi*yj + zi*zj ;    /* dot product between i and j */
+   if( fabs(val) > 1.e-4 ){
+     xj -= val*xi ; yj -= val*yi ; zj -= val*zi ;
+     val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ;  /* must renormalize */
+     if( val == 0.0 ) return ;              /* j was parallel to i? */
+     xj /= val ; yj /= val ; zj /= val ;
+   }
+
+   /* normalize k axis; if it is zero, make it the cross product i x j */
+
+   val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
+   if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; }
+   else            { xk /= val ; yk /= val ; zk /= val ; }
+
+   /* orthogonalize k to i */
+
+   val = xi*xk + yi*yk + zi*zk ;    /* dot product between i and k */
+   if( fabs(val) > 1.e-4 ){
+     xk -= val*xi ; yk -= val*yi ; zk -= val*zi ;
+     val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
+     if( val == 0.0 ) return ;      /* bad */
+     xk /= val ; yk /= val ; zk /= val ;
+   }
+
+   /* orthogonalize k to j */
+
+   val = xj*xk + yj*yk + zj*zk ;    /* dot product between j and k */
+   if( fabs(val) > 1.e-4 ){
+     xk -= val*xj ; yk -= val*yj ; zk -= val*zj ;
+     val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ;
+     if( val == 0.0 ) return ;      /* bad */
+     xk /= val ; yk /= val ; zk /= val ;
+   }
+
+   Q.m[0][0] = xi ; Q.m[0][1] = xj ; Q.m[0][2] = xk ;
+   Q.m[1][0] = yi ; Q.m[1][1] = yj ; Q.m[1][2] = yk ;
+   Q.m[2][0] = zi ; Q.m[2][1] = zj ; Q.m[2][2] = zk ;
+
+   /* at this point, Q is the rotation matrix from the (i,j,k) to (x,y,z) axes */
+
+   detQ = nifti_mat33_determ( Q ) ;
+   if( detQ == 0.0 ) return ; /* shouldn't happen unless user is a DUFIS */
+
+   /* Build and test all possible +1/-1 coordinate permutation matrices P;
+      then find the P such that the rotation matrix M=PQ is closest to the
+      identity, in the sense of M having the smallest total rotation angle. */
+
+   /* Despite the formidable looking 6 nested loops, there are
+      only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */
+
+   vbest = -666.0f ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ;
+   for( i=1 ; i <= 3 ; i++ ){     /* i = column number to use for row #1 */
+    for( j=1 ; j <= 3 ; j++ ){    /* j = column number to use for row #2 */
+     if( i == j ) continue ;
+      for( k=1 ; k <= 3 ; k++ ){  /* k = column number to use for row #3 */
+       if( i == k || j == k ) continue ;
+       P.m[0][0] = P.m[0][1] = P.m[0][2] =
+        P.m[1][0] = P.m[1][1] = P.m[1][2] =
+         P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0f ;
+       for( p=-1 ; p <= 1 ; p+=2 ){    /* p,q,r are -1 or +1      */
+        for( q=-1 ; q <= 1 ; q+=2 ){   /* and go into rows #1,2,3 */
+         for( r=-1 ; r <= 1 ; r+=2 ){
+           P.m[0][i-1] = p ; P.m[1][j-1] = q ; P.m[2][k-1] = r ;
+           detP = nifti_mat33_determ(P) ;           /* sign of permutation */
+           if( detP * detQ <= 0.0 ) continue ;  /* doesn't match sign of Q */
+           M = nifti_mat33_mul(P,Q) ;
+
+           /* angle of M rotation = 2.0*acos(0.5*sqrt(1.0+trace(M)))       */
+           /* we want largest trace(M) == smallest angle == M nearest to I */
+
+           val = M.m[0][0] + M.m[1][1] + M.m[2][2] ; /* trace */
+           if( val > vbest ){
+             vbest = val ;
+             ibest = i ; jbest = j ; kbest = k ;
+             pbest = p ; qbest = q ; rbest = r ;
+           }
+   }}}}}}
+
+   /* At this point ibest is 1 or 2 or 3; pbest is -1 or +1; etc.
+
+      The matrix P that corresponds is the best permutation approximation
+      to Q-inverse; that is, P (approximately) takes (x,y,z) coordinates
+      to the (i,j,k) axes.
+
+      For example, the first row of P (which contains pbest in column ibest)
+      determines the way the i axis points relative to the anatomical
+      (x,y,z) axes.  If ibest is 2, then the i axis is along the y axis,
+      which is direction P2A (if pbest > 0) or A2P (if pbest < 0).
+
+      So, using ibest and pbest, we can assign the output code for
+      the i axis.  Mutatis mutandis for the j and k axes, of course. */
+
+   switch( ibest*pbest ){
+     case  1: i = NIFTI_L2R ; break ;
+     case -1: i = NIFTI_R2L ; break ;
+     case  2: i = NIFTI_P2A ; break ;
+     case -2: i = NIFTI_A2P ; break ;
+     case  3: i = NIFTI_I2S ; break ;
+     case -3: i = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   switch( jbest*qbest ){
+     case  1: j = NIFTI_L2R ; break ;
+     case -1: j = NIFTI_R2L ; break ;
+     case  2: j = NIFTI_P2A ; break ;
+     case -2: j = NIFTI_A2P ; break ;
+     case  3: j = NIFTI_I2S ; break ;
+     case -3: j = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   switch( kbest*rbest ){
+     case  1: k = NIFTI_L2R ; break ;
+     case -1: k = NIFTI_R2L ; break ;
+     case  2: k = NIFTI_P2A ; break ;
+     case -2: k = NIFTI_A2P ; break ;
+     case  3: k = NIFTI_I2S ; break ;
+     case -3: k = NIFTI_S2I ; break ;
+     default: break;
+   }
+
+   *icod = i ; *jcod = j ; *kcod = k ; }
+
+/*---------------------------------------------------------------------------*/
+/* Routines to swap byte arrays in various ways:
+    -  2 at a time:  ab               -> ba               [short]
+    -  4 at a time:  abcd             -> dcba             [int, float]
+    -  8 at a time:  abcdDCBA         -> ABCDdcba         [long long, double]
+    - 16 at a time:  abcdefghHGFEDCBA -> ABCDEFGHhgfedcba [long double]
+-----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*! swap each byte pair from the given list of n pairs
+ *
+ *  Due to alignment of structures at some architectures (e.g. on ARM),
+ *  stick to char varaibles.
+ *  Fixes http://bugs.debian.org/446893   Yaroslav <debian@onerussian.com>
+ *
+*//*--------------------------------------------------------------------*/
+void nifti_swap_2bytes( int64_t n , void *ar )    /* 2 bytes at a time */
+{
+   int64_t ii ;
+   unsigned char * cp1 = (unsigned char *)ar, * cp2 ;
+   unsigned char   tval;
+
+   for( ii=0 ; ii < n ; ii++ ){
+       cp2 = cp1 + 1;
+       tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
+       cp1 += 2;
+   }
+   }
+
+/*----------------------------------------------------------------------*/
+/*! swap 4 bytes at a time from the given list of n sets of 4 bytes
+*//*--------------------------------------------------------------------*/
+void nifti_swap_4bytes( int64_t n , void *ar )    /* 4 bytes at a time */
+{
+   int64_t ii ;
+   unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
+   unsigned char tval ;
+
+   for( ii=0 ; ii < n ; ii++ ){
+       cp1 = cp0; cp2 = cp0+3;
+       tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
+       cp1++;  cp2--;
+       tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
+       cp0 += 4;
+   }
+   }
+
+/*----------------------------------------------------------------------*/
+/*! swap 8 bytes at a time from the given list of n sets of 8 bytes
+ *
+ *  perhaps use this style for the general Nbytes, as Yaroslav suggests
+*//*--------------------------------------------------------------------*/
+void nifti_swap_8bytes( int64_t n , void *ar )    /* 8 bytes at a time */
+{
+   int64_t ii ;
+   unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
+   unsigned char tval ;
+
+   for( ii=0 ; ii < n ; ii++ ){
+       cp1 = cp0;  cp2 = cp0+7;
+       while ( cp2 > cp1 )      /* unroll? */
+       {
+           tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ;
+           cp1++; cp2--;
+       }
+       cp0 += 8;
+   }
+   }
+
+/*----------------------------------------------------------------------*/
+/*! swap 16 bytes at a time from the given list of n sets of 16 bytes
+*//*--------------------------------------------------------------------*/
+void nifti_swap_16bytes( int64_t n , void *ar )    /* 16 bytes at a time */
+{
+   int64_t ii ;
+   unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
+   unsigned char tval ;
+
+   for( ii=0 ; ii < n ; ii++ ){
+       cp1 = cp0;  cp2 = cp0+15;
+       while ( cp2 > cp1 )
+       {
+           tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ;
+           cp1++; cp2--;
+       }
+       cp0 += 16;
+   }
+   }
+
+#if 0  /* not important: save for version update     6 Jul 2010 [rickr] */
+
+/*----------------------------------------------------------------------*/
+/*! generic: swap siz bytes at a time from the given list of n sets
+*//*--------------------------------------------------------------------*/
+void nifti_swap_bytes( int64_t n , int siz , void *ar )
+{
+   int64_t ii ;
+   unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ;
+   unsigned char tval ;
+
+   for( ii=0 ; ii < n ; ii++ ){
+       cp1 = cp0;  cp2 = cp0+(siz-1);
+       while ( cp2 > cp1 )
+       {
+           tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ;
+           cp1++; cp2--;
+       }
+       cp0 += siz;
+   }
+   return ;
+}
+#endif
+
+/*---------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*! based on siz, call the appropriate nifti_swap_Nbytes() function
+*//*--------------------------------------------------------------------*/
+void nifti_swap_Nbytes( int64_t n , int siz , void *ar )  /* subsuming case */
+{
+   switch( siz ){
+     case 2:  nifti_swap_2bytes ( n , ar ) ; break ;
+     case 4:  nifti_swap_4bytes ( n , ar ) ; break ;
+     case 8:  nifti_swap_8bytes ( n , ar ) ; break ;
+     case 16: nifti_swap_16bytes( n , ar ) ; break ;
+     default:    /* nifti_swap_bytes  ( n , siz, ar ) ; */
+        Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz);
+        break ;
+   }
+   }
+
+
+/*-------------------------------------------------------------------------*/
+/*! Byte swap NIFTI file header, depending on the version.
+*//*---------------------------------------------------------------------- */
+void swap_nifti_header( void * hdr , int ni_ver )
+{
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("++ swapping NIFTI header via ni_ver %d\n", ni_ver);
+
+   if     ( ni_ver == 0 ) nifti_swap_as_analyze((nifti_analyze75 *)hdr);
+   else if( ni_ver == 1 ) nifti_swap_as_nifti1((nifti_1_header *)hdr);
+   else if( ni_ver == 2 ) nifti_swap_as_nifti2((nifti_2_header *)hdr);
+   else if( ni_ver >= 0 && ni_ver <= 9 ) {
+      Rc_fprintf_stderr("** swap_nifti_header: not ready for version %d\n",ni_ver);
+   } else {
+      Rc_fprintf_stderr("** swap_nifti_header: illegal version %d\n", ni_ver);
+   }
+}
+
+
+/*-------------------------------------------------------------------------*/
+/*! Byte swap NIFTI-2 file header.
+*//*---------------------------------------------------------------------- */
+void nifti_swap_as_nifti2( nifti_2_header * h )
+{
+   if ( ! h ) {
+     Rc_fprintf_stderr("** nifti_swap_as_nifti2: NULL pointer\n");
+     return;
+   }
+
+   nifti_swap_4bytes(1, &h->sizeof_hdr);
+
+   nifti_swap_2bytes(1, &h->datatype);
+   nifti_swap_2bytes(1, &h->bitpix);
+
+   nifti_swap_8bytes(8, h->dim);
+   nifti_swap_8bytes(1, &h->intent_p1);
+   nifti_swap_8bytes(1, &h->intent_p2);
+   nifti_swap_8bytes(1, &h->intent_p3);
+   nifti_swap_8bytes(8, h->pixdim);
+
+   nifti_swap_8bytes(1, &h->vox_offset);
+   nifti_swap_8bytes(1, &h->scl_slope);
+   nifti_swap_8bytes(1, &h->scl_inter);
+   nifti_swap_8bytes(1, &h->cal_max);
+   nifti_swap_8bytes(1, &h->cal_min);
+   nifti_swap_8bytes(1, &h->slice_duration);
+   nifti_swap_8bytes(1, &h->toffset);
+   nifti_swap_8bytes(1, &h->slice_start);
+   nifti_swap_8bytes(1, &h->slice_end);
+
+   nifti_swap_4bytes(1, &h->qform_code);
+   nifti_swap_4bytes(1, &h->sform_code);
+
+   nifti_swap_8bytes(1, &h->quatern_b);
+   nifti_swap_8bytes(1, &h->quatern_c);
+   nifti_swap_8bytes(1, &h->quatern_d);
+   nifti_swap_8bytes(1, &h->qoffset_x);
+   nifti_swap_8bytes(1, &h->qoffset_y);
+   nifti_swap_8bytes(1, &h->qoffset_z);
+
+   nifti_swap_8bytes(4, h->srow_x);
+   nifti_swap_8bytes(4, h->srow_y);
+   nifti_swap_8bytes(4, h->srow_z);
+
+   nifti_swap_4bytes(1, &h->slice_code);
+   nifti_swap_4bytes(1, &h->xyzt_units);
+   nifti_swap_4bytes(1, &h->intent_code);
+}
+
+/*-------------------------------------------------------------------------*/
+/*! Byte swap NIFTI-1 file header in various places and ways.
+ *  return 0 on success
+*//*---------------------------------------------------------------------- */
+void nifti_swap_as_nifti1( nifti_1_header * h )
+{
+   if ( ! h ) {
+     Rc_fprintf_stderr("** nifti_swap_as_nifti1: NULL pointer\n");
+     return;
+   }
+
+   nifti_swap_4bytes(1, &h->sizeof_hdr);
+   nifti_swap_4bytes(1, &h->extents);
+   nifti_swap_2bytes(1, &h->session_error);
+
+   nifti_swap_2bytes(8, h->dim);
+   nifti_swap_4bytes(1, &h->intent_p1);
+   nifti_swap_4bytes(1, &h->intent_p2);
+   nifti_swap_4bytes(1, &h->intent_p3);
+
+   nifti_swap_2bytes(1, &h->intent_code);
+   nifti_swap_2bytes(1, &h->datatype);
+   nifti_swap_2bytes(1, &h->bitpix);
+   nifti_swap_2bytes(1, &h->slice_start);
+
+   nifti_swap_4bytes(8, h->pixdim);
+
+   nifti_swap_4bytes(1, &h->vox_offset);
+   nifti_swap_4bytes(1, &h->scl_slope);
+   nifti_swap_4bytes(1, &h->scl_inter);
+   nifti_swap_2bytes(1, &h->slice_end);
+
+   nifti_swap_4bytes(1, &h->cal_max);
+   nifti_swap_4bytes(1, &h->cal_min);
+   nifti_swap_4bytes(1, &h->slice_duration);
+   nifti_swap_4bytes(1, &h->toffset);
+   nifti_swap_4bytes(1, &h->glmax);
+   nifti_swap_4bytes(1, &h->glmin);
+
+   nifti_swap_2bytes(1, &h->qform_code);
+   nifti_swap_2bytes(1, &h->sform_code);
+
+   nifti_swap_4bytes(1, &h->quatern_b);
+   nifti_swap_4bytes(1, &h->quatern_c);
+   nifti_swap_4bytes(1, &h->quatern_d);
+   nifti_swap_4bytes(1, &h->qoffset_x);
+   nifti_swap_4bytes(1, &h->qoffset_y);
+   nifti_swap_4bytes(1, &h->qoffset_z);
+
+   nifti_swap_4bytes(4, h->srow_x);
+   nifti_swap_4bytes(4, h->srow_y);
+   nifti_swap_4bytes(4, h->srow_z);
+}
+
+/*-------------------------------------------------------------------------*/
+/*! Byte swap as an ANALYZE 7.5 header
+ *
+ *  return non-zero on failure
+*//*---------------------------------------------------------------------- */
+void nifti_swap_as_analyze( nifti_analyze75 * h )
+{
+   if ( ! h ) {
+     Rc_fprintf_stderr("** nifti_swap_as_analyze: NULL pointer\n");
+     return;
+   }
+
+   nifti_swap_4bytes(1, &h->sizeof_hdr);
+   nifti_swap_4bytes(1, &h->extents);
+   nifti_swap_2bytes(1, &h->session_error);
+
+   nifti_swap_2bytes(8, h->dim);
+   nifti_swap_2bytes(1, &h->unused8);
+   nifti_swap_2bytes(1, &h->unused9);
+   nifti_swap_2bytes(1, &h->unused10);
+   nifti_swap_2bytes(1, &h->unused11);
+   nifti_swap_2bytes(1, &h->unused12);
+   nifti_swap_2bytes(1, &h->unused13);
+   nifti_swap_2bytes(1, &h->unused14);
+
+   nifti_swap_2bytes(1, &h->datatype);
+   nifti_swap_2bytes(1, &h->bitpix);
+   nifti_swap_2bytes(1, &h->dim_un0);
+
+   nifti_swap_4bytes(8, h->pixdim);
+
+   nifti_swap_4bytes(1, &h->vox_offset);
+   nifti_swap_4bytes(1, &h->funused1);
+   nifti_swap_4bytes(1, &h->funused2);
+   nifti_swap_4bytes(1, &h->funused3);
+
+   nifti_swap_4bytes(1, &h->cal_max);
+   nifti_swap_4bytes(1, &h->cal_min);
+   nifti_swap_4bytes(1, &h->compressed);
+   nifti_swap_4bytes(1, &h->verified);
+   nifti_swap_4bytes(1, &h->glmax);
+   nifti_swap_4bytes(1, &h->glmin);
+
+   nifti_swap_4bytes(1, &h->views);
+   nifti_swap_4bytes(1, &h->vols_added);
+   nifti_swap_4bytes(1, &h->start_field);
+   nifti_swap_4bytes(1, &h->field_skip);
+
+   nifti_swap_4bytes(1, &h->omax);
+   nifti_swap_4bytes(1, &h->omin);
+   nifti_swap_4bytes(1, &h->smax);
+   nifti_swap_4bytes(1, &h->smin);
+}
+
+/*-------------------------------------------------------------------------*/
+/*! OLD VERSION of swap_nifti_header (left for undo/compare operations)
+
+    Byte swap NIFTI-1 file header in various places and ways.
+
+    If is_nifti is nonzero, will also swap the NIFTI-specific
+    components of the header; otherwise, only the components
+    common to NIFTI and ANALYZE will be swapped.
+*//*---------------------------------------------------------------------- */
+void old_swap_nifti_header( nifti_1_header *h , int is_nifti )
+{
+   /* this stuff is always present, for ANALYZE and NIFTI */
+
+   swap_4(h->sizeof_hdr) ;
+   nifti_swap_2bytes( 8 , h->dim ) ;
+   nifti_swap_4bytes( 8 , h->pixdim ) ;
+
+   swap_2(h->datatype) ;
+   swap_2(h->bitpix) ;
+
+   swap_4(h->vox_offset); swap_4(h->cal_max); swap_4(h->cal_min);
+
+   /* this stuff is NIFTI specific */
+
+   if( is_nifti ){
+     swap_4(h->intent_p1); swap_4(h->intent_p2); swap_4(h->intent_p3);
+     swap_2(h->intent_code);
+
+     swap_2(h->slice_start);    swap_2(h->slice_end);
+     swap_4(h->scl_slope);      swap_4(h->scl_inter);
+     swap_4(h->slice_duration); swap_4(h->toffset);
+
+     swap_2(h->qform_code); swap_2(h->sform_code);
+     swap_4(h->quatern_b); swap_4(h->quatern_c); swap_4(h->quatern_d);
+     swap_4(h->qoffset_x); swap_4(h->qoffset_y); swap_4(h->qoffset_z);
+     nifti_swap_4bytes(4,h->srow_x);
+     nifti_swap_4bytes(4,h->srow_y);
+     nifti_swap_4bytes(4,h->srow_z);
+   }
+   }
+
+
+#define USE_STAT
+#ifdef  USE_STAT
+/*---------------------------------------------------------------------------*/
+/* Return the file length (0 if file not found or has no contents).
+   This is a Unix-specific function, since it uses stat().
+-----------------------------------------------------------------------------*/
+#include <sys/types.h>
+#include <sys/stat.h>
+
+/*---------------------------------------------------------------------------*/
+/*! return the size of a file, in bytes
+
+    \return size of file on success, -1 on error or no file
+
+    changed to return int, -1 means no file or error      20 Dec 2004 [rickr]
+*//*-------------------------------------------------------------------------*/
+int64_t nifti2_get_filesize( const char *pathname )
+{
+   struct stat buf ; int ii ;
+
+   if( pathname == NULL || *pathname == '\0' ) return -1 ;
+   ii = stat( pathname , &buf ); if( ii != 0 ) return -1 ;
+   return buf.st_size ;
+}
+
+#else  /*---------- non-Unix version of the above, less efficient -----------*/
+
+int64_t nifti2_get_filesize( const char *pathname )
+{
+   znzFile fp ; int64_t len ;
+
+   if( pathname == NULL || *pathname == '\0' ) return -1 ;
+   fp = znzopen(pathname,"rb",0); if( znz_isnull(fp) ) return -1 ;
+   znzseek(fp,0L,SEEK_END) ; len = znztell(fp) ;
+   znzclose(fp) ; return len ;
+}
+
+#endif /* USE_STAT */
+
+
+/*----------------------------------------------------------------------*/
+/*! return the total volume size, in bytes
+
+    This is computed as nvox * nbyper.
+*//*--------------------------------------------------------------------*/
+int64_t nifti2_get_volsize(const nifti_image *nim)
+{
+   return (int64_t)nim->nbyper * nim->nvox ; /* total bytes */
+}
+
+
+/*--------------------------------------------------------------------------*/
+/* Support functions for filenames in read and write
+   - allows for gzipped files
+*/
+
+
+/*----------------------------------------------------------------------*/
+/*! simple check for file existence
+
+    \return 1 on existence, 0 otherwise
+*//*--------------------------------------------------------------------*/
+int nifti_fileexists(const char* fname)
+{
+   znzFile fp;
+   fp = znzopen( fname , "rb" , nifti_is_gzfile(fname) ) ;
+   if( !znz_isnull(fp) )  { znzclose(fp);  return 1; }
+   return 0; /* fp is NULL */
+}
+
+/*----------------------------------------------------------------------*/
+/*! return whether the filename is valid
+
+    Note: uppercase extensions are now valid.    27 Apr 2009 [rickr]
+
+    The name is considered valid if the file basename has length greater than
+    zero, AND one of the valid nifti extensions is provided.
+    fname input          | return |
+    ===============================
+    "myimage"            |  0     |
+    "myimage.tif"        |  0     |
+    "myimage.tif.gz"     |  0     |
+    "myimage.nii"        |  1     |
+    ".nii"               |  0     |
+    ".myhiddenimage"     |  0     |
+    ".myhiddenimage.nii" |  1     |
+*//*--------------------------------------------------------------------*/
+int nifti_is_complete_filename(const char* fname)
+{
+   const char * ext;
+
+   /* check input file(s) for sanity */
+   if( fname == NULL || *fname == '\0' ){
+      if ( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n");
+      return 0;
+   }
+
+   ext = nifti_find_file_extension(fname);
+   if ( ext == NULL ) { /*Invalid extension given */
+      if ( g_opts.debug > 0 )
+         Rc_fprintf_stderr("-- no nifti valid extension for filename '%s'\n", fname);
+       return 0;
+   }
+
+   if ( ext && ext == fname ) {   /* then no filename prefix */
+      if ( g_opts.debug > 0 )
+         Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname);
+      return 0;
+   }
+   return 1;
+}
+
+/*----------------------------------------------------------------------*/
+/*! return whether the filename is valid
+
+    Allow uppercase extensions as valid.        27 Apr 2009 [rickr]
+    Any .gz extension case must match the base extension case.
+
+    The name is considered valid if its length is positive, excluding
+    any nifti filename extension.
+    fname input         |  return | result of nifti_makebasename
+    ====================================================================
+    "myimage"           |  1      | "myimage"
+    "myimage.tif"       |  1      | "myimage.tif"
+    "myimage.tif.gz"    |  1      | "myimage.tif"
+    "myimage.nii"       |  1      | "myimage"
+    ".nii"              |  0      | <ERROR - basename has zero length>
+    ".myhiddenimage"    |  1      | ".myhiddenimage"
+    ".myhiddenimage.nii |  1      | ".myhiddenimage"
+*//*--------------------------------------------------------------------*/
+int nifti_validfilename(const char* fname)
+{
+   const char * ext;
+
+   /* check input file(s) for sanity */
+   if( fname == NULL || *fname == '\0' ){
+      if ( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n");
+      return 0;
+   }
+
+   ext = nifti_find_file_extension(fname);
+
+   if ( ext && ext == fname ) {   /* then no filename prefix */
+      if ( g_opts.debug > 0 )
+         Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname);
+      return 0;
+   }
+
+   return 1;
+}
+
+/*----------------------------------------------------------------------*/
+/*! check the end of the filename for a valid nifti extension
+
+    Valid extensions are currently .nii, .hdr, .img, .nia,
+    or any of them followed by .gz.  Note that '.' is part of
+    the extension.
+
+    Uppercase extensions are also valid, but not mixed case.
+
+    \return a pointer to the extension substring within the original
+            function input parameter name, or NULL if not found.
+    \caution Note that if the input parameter is is immutabale
+             (i.e. a const char *) then this function performs an
+             implicit casting away of the mutability constraint and
+             the return parameter will appear as a mutable
+             even though it is part of the immuttable string.
+*//*--------------------------------------------------------------------*/
+char * nifti_find_file_extension( const char * name )
+{
+   const char * ext;
+   char extcopy[8];
+   int    len;
+   char   extnii[8] = ".nii";   /* modifiable, for possible uppercase */
+   char   exthdr[8] = ".hdr";   /* (leave space for .gz) */
+   char   extimg[8] = ".img";
+   char   extnia[8] = ".nia";
+   char   extgz[4]  = ".gz";
+   char * elist[4]  = { NULL, NULL, NULL, NULL};
+
+   /* stupid compiler... */
+   elist[0] = extnii; elist[1] = exthdr; elist[2] = extimg; elist[3] = extnia;
+
+   if ( ! name ) return NULL;
+
+   len = (int)strlen(name);
+   if ( len < 4 ) return NULL;
+
+   ext = name + len - 4;
+
+   /* make manipulation copy, and possibly convert to lowercase */
+   strcpy(extcopy, ext);
+   if( g_opts.allow_upper_fext ) make_lowercase(extcopy);
+
+   /* if it look like a basic extension, fail or return it */
+   if( compare_strlist(extcopy, elist, 4) >= 0 ) {
+      if( is_mixedcase(ext) ) {
+         Rc_fprintf_stderr("** NIFTI: mixed case extension '%s' is not valid\n",
+                 ext);
+         return NULL;
+      }
+      else return (char *)ext; /* Cast away the constness of the input parameter */
+   }
+
+#ifdef HAVE_ZLIB
+   if ( len < 7 ) return NULL;
+
+   ext = name + len - 7;
+
+   /* make manipulation copy, and possibly convert to lowercase */
+   strcpy(extcopy, ext);
+   if( g_opts.allow_upper_fext ) make_lowercase(extcopy);
+
+   /* go after .gz extensions using the modifiable strings */
+   strcat(elist[0], extgz); strcat(elist[1], extgz); strcat(elist[2], extgz);
+
+   if( compare_strlist(extcopy, elist, 3) >= 0 ) {
+      if( is_mixedcase(ext) ) {
+         Rc_fprintf_stderr("** NIFTI: mixed case extension '%s' is not valid\n",
+                        ext);
+         return NULL;
+      }
+      else return (char *)ext; /* Cast away the constness of the input parameter */
+   }
+
+#endif
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("** find_file_ext: failed for name '%s'\n", name);
+
+   return NULL;
+}
+
+/*----------------------------------------------------------------------*/
+/*! return whether the filename ends in ".gz"
+*//*--------------------------------------------------------------------*/
+int nifti_is_gzfile(const char* fname)
+{
+  /* return true if the filename ends with .gz */
+  if (fname == NULL) { return 0; }
+#ifdef HAVE_ZLIB
+  { /* just so len doesn't generate compile warning */
+     int len;
+     len = (int)strlen(fname);
+     if (len < 3) return 0;  /* so we don't search before the name */
+     if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; }
+  }
+#endif
+  return 0;
+}
+
+/*----------------------------------------------------------------------*/
+/*! return whether the given library was compiled with HAVE_ZLIB set
+*//*--------------------------------------------------------------------*/
+int nifti_compiled_with_zlib(void)
+{
+#ifdef HAVE_ZLIB
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+/*----------------------------------------------------------------------*/
+/*! duplicate the filename, while clearing any extension
+
+    This allocates memory for basename which should eventually be freed.
+*//*--------------------------------------------------------------------*/
+char * nifti_makebasename(const char* fname)
+{
+   char *basename;
+   const char *ext;
+
+   basename=nifti_strdup(fname);
+
+   ext = nifti_find_file_extension(basename);
+   if ( ext )
+   {
+     basename[strlen(basename)-strlen(ext)] = '\0';  /* clear out extension */
+   }
+
+   return basename;  /* in either case */
+}
+
+/*----------------------------------------------------------------------*/
+/* option accessor functions                                            */
+/*----------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*! set nifti's global debug level, for status reporting
+
+    - 0    : quiet, nothing is printed to the terminal, but errors
+    - 1    : normal execution (the default)
+    - 2, 3 : more details
+*//*--------------------------------------------------------------------*/
+void nifti_set_debug_level( int level )
+{
+    g_opts.debug = level;
+}
+
+/*----------------------------------------------------------------------*/
+/*! set nifti's global skip_blank_ext flag            5 Sep 2006 [rickr]
+
+    explicitly set to 0 or 1
+*//*--------------------------------------------------------------------*/
+void nifti_set_skip_blank_ext( int skip )
+{
+    g_opts.skip_blank_ext = skip ? 1 : 0;
+}
+
+/*----------------------------------------------------------------------*/
+/*! set nifti's global allow_upper_fext flag         28 Apr 2009 [rickr]
+
+    explicitly set to 0 or 1
+*//*--------------------------------------------------------------------*/
+void nifti_set_allow_upper_fext( int allow )
+{
+    g_opts.allow_upper_fext = allow ? 1 : 0;
+}
+
+/*----------------------------------------------------------------------*/
+/*! get nifti's global alter_cifti flag              22 Jul 2015 [rickr]
+*//*--------------------------------------------------------------------*/
+int nifti_get_alter_cifti( void )
+{
+    return g_opts.alter_cifti;
+}
+
+/*----------------------------------------------------------------------*/
+/*! set nifti's global alter_cifti flag              22 Jul 2015 [rickr]
+
+    explicitly set to 0 or 1
+*//*--------------------------------------------------------------------*/
+void nifti_set_alter_cifti( int alter_cifti )
+{
+    g_opts.alter_cifti = alter_cifti ? 1 : 0;
+}
+
+/*----------------------------------------------------------------------*/
+/*! check current directory for existing header file
+
+    \return filename of header on success and NULL if no appropriate file
+            could be found
+
+    If fname has an uppercase extension, check for uppercase files.
+
+    NB: it allocates memory for hdrname which should be freed
+        when no longer required
+*//*-------------------------------------------------------------------*/
+char * nifti_findhdrname(const char* fname)
+{
+   char *basename, *hdrname;
+   const char *ext;
+   char  elist[2][5] = { ".hdr", ".nii" };
+   char  extzip[4]   = ".gz";
+   int   efirst = 1;    /* init to .nii extension */
+   int   eisupper = 0;  /* init to lowercase extensions */
+
+   /**- check input file(s) for sanity */
+   if( !nifti_validfilename(fname) ) return NULL;
+
+   basename = nifti_makebasename(fname);
+   if( !basename ) return NULL;   /* only on string alloc failure */
+
+   /**- return filename if it has a valid extension and exists
+         (except if it is an .img file (and maybe .gz)) */
+   ext = nifti_find_file_extension(fname);
+
+   if( ext ) eisupper = is_uppercase(ext);  /* do we look for uppercase? */
+
+   /* if the file exists and is a valid header name (not .img), return it */
+   if ( ext && nifti_fileexists(fname) ) {
+     /* allow for uppercase extension */
+     if ( fileext_n_compare(ext,".img",4) != 0 ){
+        hdrname = nifti_strdup(fname);
+        free(basename);
+        return hdrname;
+     } else
+        efirst = 0;     /* note for below */
+   }
+
+   /* So the requested name is a basename, contains .img, or does not exist. */
+   /* In any case, use basename. */
+
+   /**- if .img, look for .hdr, .hdr.gz, .nii, .nii.gz, in that order */
+   /**- else,    look for .nii, .nii.gz, .hdr, .hdr.gz, in that order */
+
+   /* if we get more extension choices, this could be a loop */
+
+   /* note: efirst is 0 in the case of ".img" */
+
+   /* if the user passed an uppercase entension (.IMG), search for uppercase */
+   if( eisupper ) {
+      make_uppercase(elist[0]);
+      make_uppercase(elist[1]);
+      make_uppercase(extzip);
+   }
+
+   hdrname = (char *)calloc(sizeof(char),strlen(basename)+8);
+   if( !hdrname ){
+      Rc_fprintf_stderr("** nifti_findhdrname: failed to alloc hdrname\n");
+      free(basename);
+      return NULL;
+   }
+
+   strcpy(hdrname,basename);
+   strcat(hdrname,elist[efirst]);
+   if (nifti_fileexists(hdrname)) { free(basename); return hdrname; }
+#ifdef HAVE_ZLIB
+   strcat(hdrname,extzip);
+   if (nifti_fileexists(hdrname)) { free(basename); return hdrname; }
+#endif
+
+   /* okay, try the other possibility */
+
+   efirst = 1 - efirst;
+
+   strcpy(hdrname,basename);
+   strcat(hdrname,elist[efirst]);
+   if (nifti_fileexists(hdrname)) { free(basename); return hdrname; }
+#ifdef HAVE_ZLIB
+   strcat(hdrname,extzip);
+   if (nifti_fileexists(hdrname)) { free(basename); return hdrname; }
+#endif
+
+   /**- if nothing has been found, return NULL */
+   free(basename);
+   free(hdrname);
+   return NULL;
+}
+
+
+/*------------------------------------------------------------------------*/
+/*! check current directory for existing image file
+
+    \param fname filename to check for
+    \nifti_type  nifti_type for dataset - this determines whether to
+                 first check for ".nii" or ".img" (since both may exist)
+
+    \return filename of data/img file on success and NULL if no appropriate
+            file could be found
+
+    If fname has a valid, uppercase extension, apply all extensions as
+    uppercase.
+
+    NB: it allocates memory for the image filename, which should be freed
+        when no longer required
+*//*---------------------------------------------------------------------*/
+char * nifti_findimgname(const char* fname , int nifti_type)
+{
+   /* store all extensions as strings, in case we need to go uppercase */
+   char *basename, *imgname, elist[2][5] = { ".nii", ".img" };
+   char  extzip[4] = ".gz";
+   char  extnia[5] = ".nia";
+   const char *ext;
+   int   first;  /* first extension to use */
+
+   /* check input file(s) for sanity */
+   if( !nifti_validfilename(fname) ) return NULL;
+
+   basename =  nifti_makebasename(fname);
+   imgname = (char *)calloc(sizeof(char),strlen(basename)+8);
+   if( !imgname ){
+      Rc_fprintf_stderr("** nifti_findimgname: failed to alloc imgname\n");
+      free(basename);
+      return NULL;
+   }
+
+   /* if we are looking for uppercase, apply the fact now */
+   ext = nifti_find_file_extension(fname);
+   if( ext && is_uppercase(ext) ) {
+      make_uppercase(elist[0]);
+      make_uppercase(elist[1]);
+      make_uppercase(extzip);
+      make_uppercase(extnia);
+   }
+
+   /* only valid extension for ASCII type is .nia, handle first */
+   if( nifti_type == NIFTI_FTYPE_ASCII ){
+      strcpy(imgname,basename);
+      strcat(imgname,extnia);
+      if (nifti_fileexists(imgname)) { free(basename); return imgname; }
+
+   } else {
+
+      /**- test for .nii and .img (don't assume input type from image type) */
+      /**- if nifti_type = 1, check for .nii first, else .img first         */
+
+      /* if we get 3 or more extensions, can make a loop here... */
+
+      if (nifti_type == NIFTI_FTYPE_NIFTI1_1) first = 0; /* should match .nii */
+      else if (nifti_type == NIFTI_FTYPE_NIFTI2_1) first = 0;
+      else                                    first = 1; /* should match .img */
+
+      strcpy(imgname,basename);
+      strcat(imgname,elist[first]);
+      if (nifti_fileexists(imgname)) { free(basename); return imgname; }
+#ifdef HAVE_ZLIB  /* then also check for .gz */
+      strcat(imgname,extzip);
+      if (nifti_fileexists(imgname)) { free(basename); return imgname; }
+#endif
+
+      /* failed to find image file with expected extension, try the other */
+
+      strcpy(imgname,basename);
+      strcat(imgname,elist[1-first]);  /* can do this with only 2 choices */
+      if (nifti_fileexists(imgname)) { free(basename); return imgname; }
+#ifdef HAVE_ZLIB  /* then also check for .gz */
+      strcat(imgname,extzip);
+      if (nifti_fileexists(imgname)) { free(basename); return imgname; }
+#endif
+   }
+
+   /**- if nothing has been found, return NULL */
+   free(basename);
+   free(imgname);
+   return NULL;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! creates a filename for storing the header, based on nifti_type
+
+   \param   prefix      - this will be copied before the suffix is added
+   \param   nifti_type  - determines the extension, unless one is in prefix
+   \param   check       - check for existence (fail condition)
+   \param   comp        - add .gz for compressed name
+
+   Note that if prefix provides a file suffix, nifti_type is not used.
+
+   NB: this allocates memory which should be freed
+
+   \sa nifti_set_filenames
+*//*-------------------------------------------------------------------*/
+char * nifti_makehdrname(const char * prefix, int nifti_type, int check,
+                         int comp)
+{
+   char * iname;
+   const char * ext;
+   char   extnii[5] = ".nii";   /* modifiable, for possible uppercase */
+   char   exthdr[5] = ".hdr";
+   char   extimg[5] = ".img";
+   char   extnia[5] = ".nia";
+   char   extgz[5]  = ".gz";
+
+   if( !nifti_validfilename(prefix) ) return NULL;
+
+   /* add space for extension, optional ".gz", and null char */
+   iname = (char *)calloc(sizeof(char),strlen(prefix)+8);
+   if( !iname ){
+      Rc_fprintf_stderr("** NIFTI small malloc failure!\n");
+      return NULL;
+   }
+   strcpy(iname, prefix);
+
+   /* use any valid extension */
+   if( (ext = nifti_find_file_extension(iname)) != NULL ){
+      /* if uppercase, convert all extensions */
+      if( is_uppercase(ext) ) {
+         make_uppercase(extnii);
+         make_uppercase(exthdr);
+         make_uppercase(extimg);
+         make_uppercase(extnia);
+         make_uppercase(extgz);
+      }
+
+      if( strncmp(ext,extimg,4) == 0 )
+      {
+         memcpy(&(iname[strlen(iname)-strlen(ext)]),exthdr,4);   /* then convert img name to hdr */
+      }
+   }
+   /* otherwise, make one up */
+   else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii);
+   else if( nifti_type == NIFTI_FTYPE_NIFTI2_1 ) strcat(iname, extnii);
+   else if( nifti_type == NIFTI_FTYPE_ASCII )    strcat(iname, extnia);
+   else                                          strcat(iname, exthdr);
+
+#ifdef HAVE_ZLIB  /* if compression is requested, make sure of suffix */
+   if( comp && (!ext || !strstr(iname,extgz)) ) strcat(iname,extgz);
+#endif
+
+   /* check for existence failure */
+   if( check && nifti_fileexists(iname) ){
+      Rc_fprintf_stderr("** failure: NIFTI header file '%s' already exists\n",
+              iname);
+      free(iname);
+      return NULL;
+   }
+
+   if(g_opts.debug > 2) Rc_fprintf_stderr("+d made header filename '%s'\n", iname);
+
+   return iname;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! creates a filename for storing the image, based on nifti_type
+
+   \param   prefix      - this will be copied before the suffix is added
+   \param   nifti_type  - determines the extension, unless provided by prefix
+   \param   check       - check for existence (fail condition)
+   \param   comp        - add .gz for compressed name
+
+   Note that if prefix provides a file suffix, nifti_type is not used.
+
+   NB: it allocates memory which should be freed
+
+   \sa nifti_set_filenames
+*//*-------------------------------------------------------------------*/
+char * nifti_makeimgname(const char * prefix, int nifti_type, int check,
+                         int comp)
+{
+   char * iname;
+   const char * ext;
+   char   extnii[5] = ".nii";   /* modifiable, for possible uppercase */
+   char   exthdr[5] = ".hdr";
+   char   extimg[5] = ".img";
+   char   extnia[5] = ".nia";
+   char   extgz[5]  = ".gz";
+
+   if( !nifti_validfilename(prefix) ) return NULL;
+
+   /* add space for extension, optional ".gz", and null char */
+   iname = (char *)calloc(sizeof(char),strlen(prefix)+8);
+   if( !iname ){
+      Rc_fprintf_stderr("** NIFTI: small malloc failure!\n");
+      return NULL;
+   }
+   strcpy(iname, prefix);
+
+   /* use any valid extension */
+   if( (ext = nifti_find_file_extension(iname)) != NULL ){
+      /* if uppercase, convert all extensions */
+      if( is_uppercase(ext) ) {
+         make_uppercase(extnii);
+         make_uppercase(exthdr);
+         make_uppercase(extimg);
+         make_uppercase(extnia);
+         make_uppercase(extgz);
+      }
+
+      if( strncmp(ext,exthdr,4) == 0 )
+      {
+         memcpy(&(iname[strlen(iname)-strlen(ext)]),extimg,4);   /* then convert hdr name to img */
+      }
+   }
+   /* otherwise, make one up */
+   else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii);
+   else if( nifti_type == NIFTI_FTYPE_NIFTI2_1 ) strcat(iname, extnii);
+   else if( nifti_type == NIFTI_FTYPE_ASCII )    strcat(iname, extnia);
+   else                                          strcat(iname, extimg);
+
+#ifdef HAVE_ZLIB  /* if compression is requested, make sure of suffix */
+   if( comp && (!ext || !strstr(iname,extgz)) ) strcat(iname,extgz);
+#endif
+
+   /* check for existence failure */
+   if( check && nifti_fileexists(iname) ){
+      Rc_fprintf_stderr("** NIFTI failure: image file '%s' already exists\n",
+              iname);
+      free(iname);
+      return NULL;
+   }
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d made image filename '%s'\n",iname);
+
+   return iname;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! create and set new filenames, based on prefix and image type
+
+   \param nim            pointer to nifti_image in which to set filenames
+   \param prefix         (required) prefix for output filenames
+   \param check          check for previous existence of filename
+                         (existence is an error condition)
+   \param set_byte_order flag to set nim->byteorder here
+                         (this is probably a logical place to do so)
+
+   \return 0 on successful update
+
+   \warning this will free() any existing names and create new ones
+
+   \sa nifti_makeimgname, nifti_makehdrname, nifti_type_and_names_match
+*//*--------------------------------------------------------------------*/
+int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check,
+                         int set_byte_order )
+{
+   int comp = nifti_is_gzfile(prefix);
+
+   if( !nim || !prefix ){
+      Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
+              (void *)nim,prefix);
+      return -1;
+   }
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d modifying output filenames using prefix %s\n", prefix);
+
+   /* set and test output filenames */
+   if( nim->fname ) free(nim->fname);
+   if( nim->iname ) free(nim->iname);
+   nim->iname = NULL;
+   nim->fname = nifti_makehdrname(prefix, nim->nifti_type, check, comp);
+   if( nim->fname )
+      nim->iname = nifti_makeimgname(prefix, nim->nifti_type, check, comp);
+   if( !nim->fname || !nim->iname ) return -1; /* failure */
+
+   if( set_byte_order ) nim->byteorder = nifti_short_order() ;
+
+   if( nifti_set_type_from_names(nim) < 0 )
+      return -1;
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d have new filenames %s and %s\n",nim->fname,nim->iname);
+
+   return 0;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! check whether nifti_type matches fname and iname for the nifti_image
+
+    - if type 0 or 2, expect .hdr/.img pair
+    - if type 1, expect .nii (and names must match)
+
+    \param nim       given nifti_image
+    \param show_warn if set, print a warning message for any mis-match
+
+    \return
+        -   1 if the values seem to match
+        -   0 if there is a mis-match
+        -  -1 if there is not sufficient information to create file(s)
+
+    \sa NIFTI_FTYPE_* codes in nifti1_io.h
+    \sa nifti_set_type_from_names, is_valid_nifti_type
+*//*------------------------------------------------------------------------*/
+int nifti2_type_and_names_match( nifti_image * nim, int show_warn )
+{
+   char func[] = "nifti_type_and_names_match";
+   const char * ext_h;  /* header  filename extension */
+   const char * ext_i;  /* image filename extension */
+   int  errs = 0;          /* error counter */
+
+   /* sanity checks */
+   if( !nim ){
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing nifti_image\n", func);
+      return -1;
+   }
+   if( !nim->fname ){
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing header filename\n", func);
+      errs++;
+   }
+   if( !nim->iname ){
+      if( show_warn ) Rc_fprintf_stderr("** %s: missing image filename\n", func);
+      errs++;
+   }
+   if( !is_valid_nifti_type(nim->nifti_type) ){
+      if( show_warn )
+         Rc_fprintf_stderr("** %s: bad nifti_type %d\n", func, nim->nifti_type);
+      errs++;
+   }
+
+   if( errs ) return -1;   /* then do not proceed */
+
+   /* get pointers to extensions */
+   ext_h = nifti_find_file_extension( nim->fname );
+   ext_i = nifti_find_file_extension( nim->iname );
+
+   /* check for filename extensions */
+   if( !ext_h ){
+      if( show_warn )
+         Rc_fprintf_stderr("-d missing NIFTI extension in header filename, %s\n",
+                 nim->fname);
+      errs++;
+   }
+   if( !ext_i ){
+      if( show_warn )
+         Rc_fprintf_stderr("-d missing NIFTI extension in image filename, %s\n",
+                 nim->iname);
+      errs++;
+   }
+
+   if( errs ) return 0;   /* do not proceed, but this is just a mis-match */
+
+   /* general tests */
+   if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ||
+       (nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){  /* .nii */
+      if( fileext_n_compare(ext_h,".nii",4) ) {
+         if( show_warn )
+            Rc_fprintf_stderr(
+            "-d NIFTI_FTYPE 1, but no .nii extension in header filename, %s\n",
+            nim->fname);
+         errs++;
+      }
+      if( fileext_n_compare(ext_i,".nii",4) ) {
+         if( show_warn )
+            Rc_fprintf_stderr(
+            "-d NIFTI_FTYPE 1, but no .nii extension in image filename, %s\n",
+            nim->iname);
+         errs++;
+      }
+      if( strcmp(nim->fname, nim->iname) != 0 ){
+         if( show_warn )
+            Rc_fprintf_stderr(
+            "-d NIFTI_FTYPE 1, but header and image filenames differ: %s, %s\n",
+            nim->fname, nim->iname);
+         errs++;
+      }
+   }
+   else if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) || /* .hdr/.img */
+            (nim->nifti_type == NIFTI_FTYPE_NIFTI2_2) ||
+            (nim->nifti_type == NIFTI_FTYPE_ANALYZE) )
+   {
+      if( fileext_n_compare(ext_h,".hdr",4) != 0 ){
+         if( show_warn )
+            Rc_fprintf_stderr("-d no '.hdr' extension, but NIFTI type is %d, %s\n",
+                    nim->nifti_type, nim->fname);
+         errs++;
+      }
+      if( fileext_n_compare(ext_i,".img",4) != 0 ){
+         if( show_warn )
+            Rc_fprintf_stderr("-d no '.img' extension, but NIFTI type is %d, %s\n",
+                    nim->nifti_type, nim->iname);
+         errs++;
+      }
+   }
+   /* ignore any other nifti_type */
+
+   if( errs ) return 0;   /* types do not match */
+
+   return 1;
+}
+
+/* like strcmp, but also check against capitalization of known_ext
+ * (test as local string, with max length 7) */
+static int fileext_compare(const char * test_ext, const char * known_ext)
+{
+   char   caps[8] = "";
+   size_t c,len;
+   /* if equal, don't need to check case (store to avoid multiple calls) */
+   const int cmp = strcmp(test_ext, known_ext);
+   if( cmp == 0 ) return cmp;
+
+   /* if anything odd, use default */
+   if( !test_ext || !known_ext ) return cmp;
+
+   len = strlen(known_ext);
+   if( len > 7 ) return cmp;
+
+   /* if here, strings are different but need to check upper-case */
+
+   for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]);
+   caps[c] = '\0';
+
+   return strcmp(test_ext, caps);
+}
+
+/* like strncmp, but also check against capitalization of known_ext
+ * (test as local string, with max length 7) */
+static int fileext_n_compare(const char * test_ext,
+                             const char * known_ext, size_t maxlen)
+{
+   char   caps[8] = "";
+   size_t c,len;
+   /* if equal, don't need to check case (store to avoid multiple calls) */
+   const int  cmp = strncmp(test_ext, known_ext, maxlen);
+   if( cmp == 0 ) return cmp;
+
+   /* if anything odd, use default */
+   if( !test_ext || !known_ext ) return cmp;
+
+   len = strlen(known_ext);
+   if( len > maxlen ) len = maxlen;     /* ignore anything past maxlen */
+   if( len > 7 ) return cmp;
+
+   /* if here, strings are different but need to check upper-case */
+   for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]);
+   caps[c] = '\0';
+
+   return strncmp(test_ext, caps, maxlen);
+}
+
+/* return 1 if there are uppercase but no lowercase */
+static int is_uppercase(const char * str)
+{
+   size_t c;
+   int    hasupper = 0;
+
+   if( !str || !*str ) return 0;
+
+   for(c = 0; c < strlen(str); c++ ) {
+     if( islower((int) str[c]) ) return 0;
+     if( !hasupper && isupper((int) str[c]) ) hasupper = 1;
+   }
+
+   return hasupper;
+}
+
+/* return 1 if there are both uppercase and lowercase characters */
+static int is_mixedcase(const char * str)
+{
+   size_t c;
+   int    hasupper = 0, haslower = 0;
+
+   if( !str || !*str ) return 0;
+
+   for(c = 0; c < strlen(str); c++ ) {
+     if( !haslower && islower((int) str[c]) ) haslower = 1;
+     if( !hasupper && isupper((int) str[c]) ) hasupper = 1;
+
+      if( haslower && hasupper ) return 1;
+   }
+
+   return 0;
+}
+
+/* convert any lowercase chars to uppercase */
+static int make_uppercase(char * str)
+{
+   size_t c;
+
+   if( !str || !*str ) return 0;
+
+   for(c = 0; c < strlen(str); c++ )
+     if( islower((int) str[c]) ) str[c] = toupper((int) str[c]);
+
+   return 0;
+}
+
+/* convert any uppercase chars to lowercase */
+static int make_lowercase(char * str)
+{
+   size_t c;
+   if( !str || !*str ) return 0;
+
+   for(c = 0; c < strlen(str); c++ )
+     if( isupper((int) str[c]) ) str[c] = tolower((int) str[c]);
+
+   return 0;
+}
+
+/* run strcmp against of list of strings
+ * return index of equality, if found
+ * else return -1 */
+static int compare_strlist(const char * str, char ** strlist, int len)
+{
+   int c;
+   if( len <= 0 || !str || !strlist ) return -1;
+   for( c = 0; c < len; c++ )
+      if( strlist[c] && !strcmp(str, strlist[c]) ) return c;
+   return -1;
+}
+
+/*--------------------------------------------------------------------------*/
+/*! check whether the given type is on the "approved" list
+
+    The code is valid if it is non-negative, and does not exceed
+    NIFTI_MAX_FTYPE.
+
+    \return 1 if nifti_type is valid, 0 otherwise
+    \sa NIFTI_FTYPE_* codes in nifti1_io.h
+*//*------------------------------------------------------------------------*/
+int is_valid_nifti2_type( int nifti_type )
+{
+   if( nifti_type >= NIFTI_FTYPE_ANALYZE &&   /* smallest type, 0 */
+       nifti_type <= NIFTI_MAX_FTYPE )
+      return 1;
+   return 0;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! check whether the given type is on the "approved" list
+
+    The type is explicitly checked against the NIFTI_TYPE_* list
+    in nifti1.h.
+
+    \return 1 if dtype is valid, 0 otherwise
+    \sa NIFTI_TYPE_* codes in nifti1.h
+*//*------------------------------------------------------------------------*/
+int nifti_is_valid_datatype( int dtype )
+{
+   if( dtype == NIFTI_TYPE_UINT8        ||
+       dtype == NIFTI_TYPE_INT16        ||
+       dtype == NIFTI_TYPE_INT32        ||
+       dtype == NIFTI_TYPE_FLOAT32      ||
+       dtype == NIFTI_TYPE_COMPLEX64    ||
+       dtype == NIFTI_TYPE_FLOAT64      ||
+       dtype == NIFTI_TYPE_RGB24        ||
+       dtype == NIFTI_TYPE_RGBA32       ||
+       dtype == NIFTI_TYPE_INT8         ||
+       dtype == NIFTI_TYPE_UINT16       ||
+       dtype == NIFTI_TYPE_UINT32       ||
+       dtype == NIFTI_TYPE_INT64        ||
+       dtype == NIFTI_TYPE_UINT64       ||
+       dtype == NIFTI_TYPE_FLOAT128     ||
+       dtype == NIFTI_TYPE_COMPLEX128   ||
+       dtype == NIFTI_TYPE_COMPLEX256 ) return 1;
+   return 0;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! set the nifti_type field based on fname and iname
+
+    Note that nifti_type is changed only when it does not match
+    the filenames.
+
+    \return 0 on success, -1 on error
+
+    \sa is_valid_nifti_type, nifti_type_and_names_match
+*//*------------------------------------------------------------------------*/
+int nifti2_set_type_from_names( nifti_image * nim )
+{
+   /* error checking first */
+   if( !nim ){ Rc_fprintf_stderr("** NSTFN: no nifti_image\n");  return -1; }
+
+   if( !nim->fname || !nim->iname ){
+      Rc_fprintf_stderr("** NIFTI_STFN: NULL filename(s) fname @ %p, iname @ %p\n",
+              nim->fname, nim->iname);
+      return -1;
+   }
+
+   if( ! nifti_validfilename      ( nim->fname ) ||
+       ! nifti_validfilename      ( nim->iname ) ||
+       ! nifti_find_file_extension( nim->fname ) ||
+       ! nifti_find_file_extension( nim->iname )
+     ) {
+      Rc_fprintf_stderr("** NIFTI_STFN: invalid filename(s) "
+              "fname='%s', iname='%s'\n",
+              nim->fname, nim->iname);
+      return -1;
+   }
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d verify nifti_type from filenames: %d",nim->nifti_type);
+
+   /* type should be NIFTI_FTYPE_ASCII if extension is .nia */
+   if( (fileext_compare(nifti_find_file_extension(nim->fname),".nia")==0)){
+      nim->nifti_type = NIFTI_FTYPE_ASCII;
+   } else {
+      /* not too picky here, do what must be done, and then verify */
+      if( strcmp(nim->fname, nim->iname) == 0 )          /* one file, type 1 */
+         nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1;
+      else if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) /* cannot be type 1 */
+         nim->nifti_type = NIFTI_FTYPE_NIFTI1_2;
+      else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 )
+         nim->nifti_type = NIFTI_FTYPE_NIFTI2_2;
+   }
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr(" -> %d\n",nim->nifti_type);
+
+   if( g_opts.debug > 1 )  /* warn user about anything strange */
+      nifti_type_and_names_match(nim, 1);
+
+   if( is_valid_nifti_type(nim->nifti_type) ) return 0;  /* success! */
+
+   Rc_fprintf_stderr("** NSTFN: bad nifti_type %d, for '%s' and '%s'\n",
+           nim->nifti_type, nim->fname, nim->iname);
+
+   return -1;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Determine if this is a NIFTI-formatted file.
+
+   <pre>
+   \return  0 if file looks like ANALYZE 7.5 [checks sizeof_hdr field == 348]
+            1 if file marked as NIFTI (header+data in 1 file)
+            2 if file marked as NIFTI (header+data in 2 files)
+           -1 if it can't tell, file doesn't exist, etc.
+   </pre>
+*//*------------------------------------------------------------------------*/
+int is_nifti_file( const char *hname )
+{
+   nifti_1_header nhdr ;
+   znzFile fp ;
+   int ii ;
+   char *tmpname;
+
+/* rcr - update to check for nifti-1 or -2 */
+
+   /* bad input name? */
+
+   if( !nifti_validfilename(hname) ) return -1 ;
+
+   /* open file */
+
+   tmpname = nifti_findhdrname(hname);
+   if( tmpname == NULL ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** NIFTI: no header file found for '%s'\n",hname);
+      return -1;
+   }
+   fp = znzopen( tmpname , "rb" , nifti_is_gzfile(tmpname) ) ;
+   free(tmpname);
+   if (znz_isnull(fp))                      return -1 ;  /* bad open? */
+
+   /* read header, close file */
+
+   ii = (int)znzread( &nhdr , 1 , sizeof(nhdr) , fp ) ;
+   znzclose( fp ) ;
+   if( ii < (int) sizeof(nhdr) )               return -1 ;  /* bad read? */
+
+   /* check for NIFTI-ness */
+
+   if( NIFTI_VERSION(nhdr) != 0 ){
+     return ( NIFTI_ONEFILE(nhdr) ) ? 1 : 2 ;
+   }
+
+   /* check for ANALYZE-ness (sizeof_hdr field == 348) */
+
+   ii = nhdr.sizeof_hdr ;
+   if( ii == (int)sizeof(nhdr) ) return 0 ;  /* matches */
+
+   /* try byte-swapping header */
+
+   swap_4(ii) ;
+   if( ii == (int)sizeof(nhdr) ) return 0 ;  /* matches */
+
+   return -1 ;                          /* not good */
+}
+
+static int print_hex_vals( const char * data, size_t nbytes, FILE * fp )
+{
+   size_t c;
+
+   if ( !data || nbytes < 1 || !fp ) return -1;
+
+   fputs("0x", fp);
+   for ( c = 0; c < nbytes; c++ )
+      fprintf(fp, " %02x", data[c]);
+
+   return 0;
+}
+
+/*----------------------------------------------------------------------*/
+/*! display the contents of the nifti_1_header (send to stdout)
+
+   \param info if non-NULL, print this character string
+   \param hp   pointer to nifti_1_header
+*//*--------------------------------------------------------------------*/
+int disp_nifti_1_header( const char * info, const nifti_1_header * hp )
+{
+   int c;
+
+   Rc_fputs_stdout( "-------------------------------------------------------\n" );
+   if ( info )  Rc_fputs_stdout( info );
+   if ( !hp  ){ Rc_fputs_stdout(" ** no nifti_1_header to display!\n"); return 1; }
+
+   Rc_fprintf_stdout(" nifti_1_header :\n"
+           "    sizeof_hdr     = %d\n"
+           "    data_type[10]  = ", hp->sizeof_hdr);
+#ifndef USING_R
+   print_hex_vals(hp->data_type, 10, stdout);
+#endif
+   Rc_fprintf_stdout("\n"
+           "    db_name[18]    = ");
+#ifndef USING_R
+   print_hex_vals(hp->db_name, 18, stdout);
+#endif
+   Rc_fprintf_stdout("\n"
+           "    extents        = %d\n"
+           "    session_error  = %d\n"
+           "    regular        = 0x%x\n"
+           "    dim_info       = 0x%x\n",
+      hp->extents, hp->session_error, hp->regular, hp->dim_info );
+   Rc_fprintf_stdout("    dim[8]         =");
+   for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %d", hp->dim[c]);
+   Rc_fprintf_stdout("\n"
+           "    intent_p1      = %f\n"
+           "    intent_p2      = %f\n"
+           "    intent_p3      = %f\n"
+           "    intent_code    = %d\n"
+           "    datatype       = %d\n"
+           "    bitpix         = %d\n"
+           "    slice_start    = %d\n"
+           "    pixdim[8]      =",
+           hp->intent_p1, hp->intent_p2, hp->intent_p3, hp->intent_code,
+           hp->datatype, hp->bitpix, hp->slice_start);
+   /* break pixdim over 2 lines */
+   for ( c = 0; c < 4; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]);
+   Rc_fprintf_stdout("\n                    ");
+   for ( c = 4; c < 8; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]);
+   Rc_fprintf_stdout("\n"
+           "    vox_offset     = %f\n"
+           "    scl_slope      = %f\n"
+           "    scl_inter      = %f\n"
+           "    slice_end      = %d\n"
+           "    slice_code     = %d\n"
+           "    xyzt_units     = 0x%x\n"
+           "    cal_max        = %f\n"
+           "    cal_min        = %f\n"
+           "    slice_duration = %f\n"
+           "    toffset        = %f\n"
+           "    glmax          = %d\n"
+           "    glmin          = %d\n",
+           hp->vox_offset, hp->scl_slope, hp->scl_inter, hp->slice_end,
+           hp->slice_code, hp->xyzt_units, hp->cal_max, hp->cal_min,
+           hp->slice_duration, hp->toffset, hp->glmax, hp->glmin);
+   Rc_fprintf_stdout(
+           "    descrip        = '%.80s'\n"
+           "    aux_file       = '%.24s'\n"
+           "    qform_code     = %d\n"
+           "    sform_code     = %d\n"
+           "    quatern_b      = %f\n"
+           "    quatern_c      = %f\n"
+           "    quatern_d      = %f\n"
+           "    qoffset_x      = %f\n"
+           "    qoffset_y      = %f\n"
+           "    qoffset_z      = %f\n"
+           "    srow_x[4]      = %f, %f, %f, %f\n"
+           "    srow_y[4]      = %f, %f, %f, %f\n"
+           "    srow_z[4]      = %f, %f, %f, %f\n"
+           "    intent_name    = '%-.16s'\n"
+           "    magic          = '%-.4s'\n",
+           hp->descrip, hp->aux_file, hp->qform_code, hp->sform_code,
+           hp->quatern_b, hp->quatern_c, hp->quatern_d,
+           hp->qoffset_x, hp->qoffset_y, hp->qoffset_z,
+           hp->srow_x[0], hp->srow_x[1], hp->srow_x[2], hp->srow_x[3],
+           hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3],
+           hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3],
+           hp->intent_name, hp->magic);
+   Rc_fputs_stdout( "-------------------------------------------------------\n" );
+#ifndef USING_R
+   fflush(stdout);
+#endif
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! display the contents of the nifti_2_header (send to stdout)
+
+   \param info if non-NULL, print this character string
+   \param hp   pointer to nifti_2_header
+*//*--------------------------------------------------------------------*/
+int disp_nifti_2_header( const char * info, const nifti_2_header * hp )
+{
+   int    c;
+
+   Rc_fputs_stdout( "-------------------------------------------------------\n" );
+   if ( info )  Rc_fputs_stdout( info );
+   if ( !hp  ){ Rc_fputs_stdout(" ** no nifti_2_header to display!\n"); return 1; }
+
+   /* print fields one by one, makes changing order and copying easier */
+
+   Rc_fprintf_stdout(" nifti_2_header :\n");
+   Rc_fprintf_stdout("    sizeof_hdr     = %d\n", hp->sizeof_hdr);
+   Rc_fprintf_stdout("    magic[8]       = '%-.4s' + ", hp->magic);
+#ifndef USING_R
+   print_hex_vals(hp->magic+4, 4, stdout);
+#endif
+   Rc_fputc_stdout('\n');
+
+   Rc_fprintf_stdout("    datatype       = %d (%s)\n",
+           hp->datatype, nifti_datatype_to_string(hp->datatype));
+   Rc_fprintf_stdout("    bitpix         = %d\n", hp->bitpix);
+   Rc_fprintf_stdout( "    dim[8]         =");
+   for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %" PRId64, hp->dim[c]);
+   Rc_fputc_stdout('\n');
+
+   Rc_fprintf_stdout( "    intent_p1      = %lf\n", hp->intent_p1);
+   Rc_fprintf_stdout( "    intent_p2      = %lf\n", hp->intent_p2);
+   Rc_fprintf_stdout( "    intent_p3      = %lf\n", hp->intent_p3);
+   Rc_fprintf_stdout( "    pixdim[8]      =");
+   for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %lf", hp->pixdim[c]);
+   Rc_fputc_stdout('\n');
+
+   Rc_fprintf_stdout( "    vox_offset     = %" PRId64 "\n", hp->vox_offset);
+
+   Rc_fprintf_stdout( "    scl_slope      = %lf\n", hp->scl_slope);
+   Rc_fprintf_stdout( "    scl_inter      = %lf\n", hp->scl_inter);
+   Rc_fprintf_stdout( "    cal_max        = %lf\n", hp->cal_max);
+   Rc_fprintf_stdout( "    cal_min        = %lf\n", hp->cal_min);
+   Rc_fprintf_stdout( "    slice_duration = %lf\n", hp->slice_duration);
+   Rc_fprintf_stdout( "    toffset        = %lf\n", hp->toffset);
+
+   Rc_fprintf_stdout( "    slice_start    = %" PRId64 "\n", hp->slice_start);
+   Rc_fprintf_stdout( "    slice_end      = %" PRId64 "\n", hp->slice_end);
+
+   Rc_fprintf_stdout( "    descrip        = '%.80s'\n", hp->descrip);
+   Rc_fprintf_stdout( "    aux_file       = '%.24s'\n", hp->aux_file);
+
+   Rc_fprintf_stdout( "    qform_code     = %d\n", hp->qform_code);
+   Rc_fprintf_stdout( "    sform_code     = %d\n", hp->sform_code);
+
+   Rc_fprintf_stdout( "    quatern_b      = %lf\n", hp->quatern_b);
+   Rc_fprintf_stdout( "    quatern_c      = %lf\n", hp->quatern_c);
+   Rc_fprintf_stdout( "    quatern_d      = %lf\n", hp->quatern_d);
+   Rc_fprintf_stdout( "    qoffset_x      = %lf\n", hp->qoffset_x);
+   Rc_fprintf_stdout( "    qoffset_y      = %lf\n", hp->qoffset_y);
+   Rc_fprintf_stdout( "    qoffset_z      = %lf\n", hp->qoffset_z);
+   Rc_fprintf_stdout( "    srow_x[4]      = %lf, %lf, %lf, %lf\n",
+           hp->srow_x[0], hp->srow_x[1], hp->srow_x[2], hp->srow_x[3]);
+   Rc_fprintf_stdout( "    srow_y[4]      = %lf, %lf, %lf, %lf\n",
+           hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3]);
+   Rc_fprintf_stdout( "    srow_z[4]      = %lf, %lf, %lf, %lf\n",
+           hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3]);
+
+   Rc_fprintf_stdout( "    slice_code     = %d\n", hp->slice_code);
+   Rc_fprintf_stdout( "    xyzt_units     = %d\n", hp->xyzt_units);
+   Rc_fprintf_stdout( "    intent_code    = %d\n", hp->intent_code);
+
+   Rc_fprintf_stdout( "    intent_name    = '%-.16s'\n", hp->intent_name);
+   Rc_fprintf_stdout( "    dim_info       = 0x%02x\n",(unsigned char)hp->dim_info);
+   Rc_fprintf_stdout( "    unused_str     = 0x ");
+   for ( c = 0; c < 15; c++ ) Rc_fprintf_stdout(" %02x", hp->unused_str[c]);
+   Rc_fputc_stdout('\n');
+
+   Rc_fputs_stdout( "-------------------------------------------------------\n" );
+#ifndef USING_R
+   fflush(stdout);
+#endif
+
+   return 0;
+}
+
+
+#undef  ERREX
+#define ERREX(msg)                                                        \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_convert_n1hdr2nim: %s\n", (msg) ) ;  \
+     return NULL ; } while(0)
+
+/*----------------------------------------------------------------------*/
+/*! convert a nifti_1_header into a nift1_image
+
+   \return an allocated nifti_image, or NULL on failure
+*//*--------------------------------------------------------------------*/
+nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname)
+{
+   int   ii , doswap , ioff ;
+   int   ni_ver , is_onefile ;
+   nifti_image *nim;
+
+   nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
+   if( !nim ) ERREX("failed to allocate nifti image");
+
+   /* be explicit with pointers */
+   nim->fname = NULL;
+   nim->iname = NULL;
+   nim->data = NULL;
+
+   /**- check if we must swap bytes */
+
+   doswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */
+
+   if( doswap < 0 ){
+      free(nim);
+      if( doswap == -1 ) ERREX("bad dim[0]") ;
+      ERREX("bad sizeof_hdr") ;  /* else */
+   }
+
+   /**- determine if this is a NIFTI-1 compliant header */
+
+   ni_ver = NIFTI_VERSION(nhdr) ;
+   /*
+    * before swapping header, record the Analyze75 orient code
+    */
+   if(ni_ver == 0)
+     {
+     /**- in analyze75, the orient code is at the same address as
+      *   qform_code, but it's just one byte
+      *   the qform_code will be zero, at which point you can check
+      *   analyze75_orient if you care to.
+      */
+     unsigned char c = *((char *)(&nhdr.qform_code));
+     nim->analyze75_orient = (analyze_75_orient_code)c;
+     }
+   if( doswap ) {
+      if ( g_opts.debug > 3 ) disp_nifti_1_header("-d ni1 pre-swap: ", &nhdr);
+      swap_nifti_header( &nhdr , ni_ver ) ;
+   }
+
+   if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr2nim : ", &nhdr);
+
+   if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN  )
+   {
+     free(nim);
+     ERREX("bad datatype") ;
+   }
+
+   if( nhdr.dim[1] <= 0 )
+   {
+     free(nim);
+     ERREX("bad dim[1]") ;
+   }
+
+   /* fix bad dim[] values in the defined dimension range */
+   for( ii=2 ; ii <= nhdr.dim[0] ; ii++ )
+     if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
+
+   /* fix any remaining bad dim[] values, so garbage does not propagate */
+   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
+     if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
+
+#if 0  /* rely on dim[0], do not attempt to modify it   16 Nov 2005 [rickr] */
+
+   /**- get number of dimensions (ignoring dim[0] now) */
+   for( ii=7 ; ii >= 2 ; ii-- )            /* loop backwards until we  */
+     if( nhdr.dim[ii] > 1 ) break ;        /* find a dim bigger than 1 */
+   ndim = ii ;
+#endif
+
+   /**- set bad grid spacings to 1.0 */
+
+   for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){
+     if( nhdr.pixdim[ii] == 0.0         ||
+         !IS_GOOD_FLOAT(nhdr.pixdim[ii])  ) nhdr.pixdim[ii] = 1.0f ;
+   }
+
+  is_onefile = (ni_ver > 0) && NIFTI_ONEFILE(nhdr) ;
+
+  if( ni_ver ) nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI1_1
+                                              : NIFTI_FTYPE_NIFTI1_2 ;
+  else         nim->nifti_type = NIFTI_FTYPE_ANALYZE ;
+
+  ii = nifti_short_order() ;
+  if( doswap )   nim->byteorder = REVERSE_ORDER(ii) ;
+  else           nim->byteorder = ii ;
+
+
+  /**- set dimensions of data array */
+
+  nim->ndim = nim->dim[0] = nhdr.dim[0];
+  nim->nx   = nim->dim[1] = nhdr.dim[1];
+  nim->ny   = nim->dim[2] = nhdr.dim[2];
+  nim->nz   = nim->dim[3] = nhdr.dim[3];
+  nim->nt   = nim->dim[4] = nhdr.dim[4];
+  nim->nu   = nim->dim[5] = nhdr.dim[5];
+  nim->nv   = nim->dim[6] = nhdr.dim[6];
+  nim->nw   = nim->dim[7] = nhdr.dim[7];
+
+  for( ii=1, nim->nvox=1; ii <= nhdr.dim[0]; ii++ )
+     nim->nvox *= nhdr.dim[ii];
+
+  /**- set the type of data in voxels and how many bytes per voxel */
+
+  nim->datatype = nhdr.datatype ;
+
+  nifti_datatype_sizes( nim->datatype , &(nim->nbyper) , &(nim->swapsize) ) ;
+  if( nim->nbyper == 0 ){ free(nim); ERREX("bad datatype"); }
+
+  /**- set the grid spacings */
+
+  nim->dx = nim->pixdim[1] = nhdr.pixdim[1] ;
+  nim->dy = nim->pixdim[2] = nhdr.pixdim[2] ;
+  nim->dz = nim->pixdim[3] = nhdr.pixdim[3] ;
+  nim->dt = nim->pixdim[4] = nhdr.pixdim[4] ;
+  nim->du = nim->pixdim[5] = nhdr.pixdim[5] ;
+  nim->dv = nim->pixdim[6] = nhdr.pixdim[6] ;
+  nim->dw = nim->pixdim[7] = nhdr.pixdim[7] ;
+
+  /**- compute qto_xyz transformation from pixel indexes (i,j,k) to (x,y,z) */
+
+  if( !ni_ver || nhdr.qform_code <= 0 ){
+    /**- if not nifti or qform_code <= 0, use grid spacing for qto_xyz */
+
+    nim->qto_xyz.m[0][0] = nim->dx ;  /* grid spacings */
+    nim->qto_xyz.m[1][1] = nim->dy ;  /* along diagonal */
+    nim->qto_xyz.m[2][2] = nim->dz ;
+
+    /* off diagonal is zero */
+
+    nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f;
+    nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f;
+    nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f;
+
+    /* last row is always [ 0 0 0 1 ] */
+
+    nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f;
+    nim->qto_xyz.m[3][3]= 1.0f ;
+
+    nim->qform_code = NIFTI_XFORM_UNKNOWN ;
+
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n");
+  } else {
+    /**- else NIFTI: use the quaternion-specified transformation */
+
+    nim->quatern_b = FIXED_FLOAT( nhdr.quatern_b ) ;
+    nim->quatern_c = FIXED_FLOAT( nhdr.quatern_c ) ;
+    nim->quatern_d = FIXED_FLOAT( nhdr.quatern_d ) ;
+
+    nim->qoffset_x = FIXED_FLOAT(nhdr.qoffset_x) ;
+    nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ;
+    nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ;
+
+    nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0f : 1.0f ;  /* left-handedness? */
+
+    nim->qto_xyz = nifti_quatern_to_dmat44(
+                      nim->quatern_b, nim->quatern_c, nim->quatern_d,
+                      nim->qoffset_x, nim->qoffset_y, nim->qoffset_z,
+                      nim->dx       , nim->dy       , nim->dz       ,
+                      nim->qfac                                      ) ;
+
+    nim->qform_code = nhdr.qform_code ;
+
+    if( g_opts.debug > 1 )
+       nifti_disp_matrix_orient("-d qform orientations:\n", nim->qto_xyz);
+  }
+
+  /**- load inverse transformation (x,y,z) -> (i,j,k) */
+
+  nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ;
+
+  /**- load sto_xyz affine transformation, if present */
+
+  if( !ni_ver || nhdr.sform_code <= 0 ){
+    /**- if not nifti or sform_code <= 0, then no sto transformation */
+
+    nim->sform_code = NIFTI_XFORM_UNKNOWN ;
+
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n");
+
+  } else {
+    /**- else set the sto transformation from srow_*[] */
+
+    nim->sto_xyz.m[0][0] = nhdr.srow_x[0] ;
+    nim->sto_xyz.m[0][1] = nhdr.srow_x[1] ;
+    nim->sto_xyz.m[0][2] = nhdr.srow_x[2] ;
+    nim->sto_xyz.m[0][3] = nhdr.srow_x[3] ;
+
+    nim->sto_xyz.m[1][0] = nhdr.srow_y[0] ;
+    nim->sto_xyz.m[1][1] = nhdr.srow_y[1] ;
+    nim->sto_xyz.m[1][2] = nhdr.srow_y[2] ;
+    nim->sto_xyz.m[1][3] = nhdr.srow_y[3] ;
+
+    nim->sto_xyz.m[2][0] = nhdr.srow_z[0] ;
+    nim->sto_xyz.m[2][1] = nhdr.srow_z[1] ;
+    nim->sto_xyz.m[2][2] = nhdr.srow_z[2] ;
+    nim->sto_xyz.m[2][3] = nhdr.srow_z[3] ;
+
+    /* last row is always [ 0 0 0 1 ] */
+
+    nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f;
+    nim->sto_xyz.m[3][3]= 1.0f ;
+
+    nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ;
+
+    nim->sform_code = nhdr.sform_code ;
+
+    if( g_opts.debug > 1 )
+       nifti_disp_matrix_orient("-d sform orientations:\n", nim->sto_xyz);
+  }
+
+  /**- set miscellaneous NIFTI stuff */
+
+  if( ni_ver ){
+    nim->scl_slope   = FIXED_FLOAT( nhdr.scl_slope ) ;
+    nim->scl_inter   = FIXED_FLOAT( nhdr.scl_inter ) ;
+
+    nim->intent_code = nhdr.intent_code ;
+
+    nim->intent_p1 = FIXED_FLOAT( nhdr.intent_p1 ) ;
+    nim->intent_p2 = FIXED_FLOAT( nhdr.intent_p2 ) ;
+    nim->intent_p3 = FIXED_FLOAT( nhdr.intent_p3 ) ;
+
+    nim->toffset   = FIXED_FLOAT( nhdr.toffset ) ;
+
+    memcpy(nim->intent_name,nhdr.intent_name,15); nim->intent_name[15] = '\0';
+
+    nim->xyz_units  = XYZT_TO_SPACE(nhdr.xyzt_units) ;
+    nim->time_units = XYZT_TO_TIME (nhdr.xyzt_units) ;
+
+    nim->freq_dim  = DIM_INFO_TO_FREQ_DIM ( nhdr.dim_info ) ;
+    nim->phase_dim = DIM_INFO_TO_PHASE_DIM( nhdr.dim_info ) ;
+    nim->slice_dim = DIM_INFO_TO_SLICE_DIM( nhdr.dim_info ) ;
+
+    nim->slice_code     = nhdr.slice_code  ;
+    nim->slice_start    = nhdr.slice_start ;
+    nim->slice_end      = nhdr.slice_end   ;
+    nim->slice_duration = FIXED_FLOAT(nhdr.slice_duration) ;
+  }
+
+  /**- set Miscellaneous ANALYZE stuff */
+
+  nim->cal_min = FIXED_FLOAT(nhdr.cal_min) ;
+  nim->cal_max = FIXED_FLOAT(nhdr.cal_max) ;
+
+  memcpy(nim->descrip ,nhdr.descrip ,79) ; nim->descrip [79] = '\0' ;
+  memcpy(nim->aux_file,nhdr.aux_file,23) ; nim->aux_file[23] = '\0' ;
+
+   /**- set ioff from vox_offset (but at least sizeof(header)) */
+
+   is_onefile = ni_ver && NIFTI_ONEFILE(nhdr) ;
+
+   if( is_onefile ){
+     ioff = (int)nhdr.vox_offset ;
+     if( ioff < (int) sizeof(nhdr) ) ioff = (int) sizeof(nhdr) ;
+   } else {
+     ioff = (int)nhdr.vox_offset ;
+   }
+   nim->iname_offset = ioff ;
+
+
+   /**- deal with file names if set */
+   if (fname!=NULL) {
+       nifti_set_filenames(nim,fname,0,0);
+       if (nim->iname==NULL)  { ERREX("bad filename"); }
+   } else {
+     nim->fname = NULL;
+     nim->iname = NULL;
+   }
+
+   /* clear extension fields */
+   nim->num_ext = 0;
+   nim->ext_list = NULL;
+
+   return nim;
+}
+
+#undef  ERREX
+#define ERREX(msg)                                           \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_convert_n2hdr2nim: %s\n", (msg) ) ;  \
+     return NULL ; } while(0)
+
+/*----------------------------------------------------------------------*/
+/*! convert a nifti_2_header into a nifti_image
+
+   \return an allocated nifti_image, or NULL on failure
+*//*--------------------------------------------------------------------*/
+nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
+{
+   int          ii, doswap, ni_ver, is_onefile;
+   nifti_image *nim;
+
+   nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
+   if( !nim ) ERREX("failed to allocate nifti image");
+
+   /* be explicit with pointers */
+   nim->fname = NULL;
+   nim->iname = NULL;
+   nim->data = NULL;
+
+   /**- check if we must swap bytes */
+
+   doswap = NIFTI2_NEEDS_SWAP(nhdr); /* swap data flag */
+
+   /**- determine if this is a NIFTI-2 compliant header */
+
+   ni_ver = NIFTI_VERSION(nhdr) ;
+   if(ni_ver != 2) {
+      free(nim);
+      Rc_fprintf_stderr("** convert NIFTI-2 hdr2nim: bad version %d\n", ni_ver);
+      return NULL;
+   }
+
+   if( doswap ) {
+      if ( g_opts.debug > 3 ) disp_nifti_2_header("-d n2 pre-swap: ", &nhdr);
+      swap_nifti_header( &nhdr , ni_ver ) ;
+   } else if ( g_opts.debug > 3 ) Rc_fprintf_stderr("-- n2hdr2nim: no swap\n");
+
+   if ( g_opts.debug > 2 ) disp_nifti_2_header("-d n2hdr2nim : ", &nhdr);
+
+   if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN  )
+   {
+     free(nim);
+     ERREX("bad datatype") ;
+   }
+
+   if( nhdr.dim[1] <= 0 )
+   {
+     free(nim);
+     ERREX("bad dim[1]") ;
+   }
+
+   /* fix bad dim[] values in the defined dimension range */
+   for( ii=2 ; ii <= nhdr.dim[0] ; ii++ )
+     if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
+
+   /* fix any remaining bad dim[] values, so garbage does not propagate */
+   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
+     if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
+
+   /**- set bad grid spacings to 1.0 */
+   for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){
+     if( nhdr.pixdim[ii] == 0.0         ||
+         !IS_GOOD_FLOAT(nhdr.pixdim[ii])  ) nhdr.pixdim[ii] = 1.0 ;
+   }
+
+   is_onefile = (ni_ver > 0) && NIFTI_ONEFILE(nhdr) ;
+
+   nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI2_2;
+
+   ii = nifti_short_order() ;
+   if( doswap )   nim->byteorder = REVERSE_ORDER(ii) ;
+   else           nim->byteorder = ii ;
+
+
+  /**- set dimensions of data array */
+
+  nim->ndim = nim->dim[0] = nhdr.dim[0];
+  nim->nx   = nim->dim[1] = nhdr.dim[1];
+  nim->ny   = nim->dim[2] = nhdr.dim[2];
+  nim->nz   = nim->dim[3] = nhdr.dim[3];
+  nim->nt   = nim->dim[4] = nhdr.dim[4];
+  nim->nu   = nim->dim[5] = nhdr.dim[5];
+  nim->nv   = nim->dim[6] = nhdr.dim[6];
+  nim->nw   = nim->dim[7] = nhdr.dim[7];
+
+  for( ii=1, nim->nvox=1; ii <= nhdr.dim[0]; ii++ )
+     nim->nvox *= nhdr.dim[ii];
+
+  /**- set the type of data in voxels and how many bytes per voxel */
+
+  nim->datatype = nhdr.datatype ;
+
+  nifti_datatype_sizes( nim->datatype , &(nim->nbyper) , &(nim->swapsize) ) ;
+  if( nim->nbyper == 0 ){ free(nim); ERREX("bad datatype"); }
+
+  /**- set the grid spacings */
+
+  nim->dx = nim->pixdim[1] = nhdr.pixdim[1] ;
+  nim->dy = nim->pixdim[2] = nhdr.pixdim[2] ;
+  nim->dz = nim->pixdim[3] = nhdr.pixdim[3] ;
+  nim->dt = nim->pixdim[4] = nhdr.pixdim[4] ;
+  nim->du = nim->pixdim[5] = nhdr.pixdim[5] ;
+  nim->dv = nim->pixdim[6] = nhdr.pixdim[6] ;
+  nim->dw = nim->pixdim[7] = nhdr.pixdim[7] ;
+
+  /**- compute qto_xyz transformation from pixel indexes (i,j,k) to (x,y,z) */
+
+  if( !ni_ver || nhdr.qform_code <= 0 ){
+    /**- if not nifti or qform_code <= 0, use grid spacing for qto_xyz */
+
+    nim->qto_xyz.m[0][0] = nim->dx ;  /* grid spacings */
+    nim->qto_xyz.m[1][1] = nim->dy ;  /* along diagonal */
+    nim->qto_xyz.m[2][2] = nim->dz ;
+
+    /* off diagonal is zero */
+
+    nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f;
+    nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f;
+    nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f;
+
+    /* last row is always [ 0 0 0 1 ] */
+
+    nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f;
+    nim->qto_xyz.m[3][3]= 1.0f ;
+
+    nim->qform_code = NIFTI_XFORM_UNKNOWN ;
+
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n");
+  } else {
+    /**- else NIFTI: use the quaternion-specified transformation */
+
+    nim->quatern_b = FIXED_FLOAT( nhdr.quatern_b ) ;
+    nim->quatern_c = FIXED_FLOAT( nhdr.quatern_c ) ;
+    nim->quatern_d = FIXED_FLOAT( nhdr.quatern_d ) ;
+
+    nim->qoffset_x = FIXED_FLOAT(nhdr.qoffset_x) ;
+    nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ;
+    nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ;
+
+    nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0 : 1.0 ;  /* left-handedness? */
+
+    nim->qto_xyz = nifti_quatern_to_dmat44(
+                      nim->quatern_b, nim->quatern_c, nim->quatern_d,
+                      nim->qoffset_x, nim->qoffset_y, nim->qoffset_z,
+                      nim->dx       , nim->dy       , nim->dz       ,
+                      nim->qfac                                      ) ;
+
+    nim->qform_code = nhdr.qform_code ;
+
+    if( g_opts.debug > 1 )
+       nifti_disp_matrix_orient("-d qform orientations:\n", nim->qto_xyz);
+  }
+
+  /**- load inverse transformation (x,y,z) -> (i,j,k) */
+
+  nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ;
+
+  /**- load sto_xyz affine transformation, if present */
+
+  if( !ni_ver || nhdr.sform_code <= 0 ){
+    /**- if not nifti or sform_code <= 0, then no sto transformation */
+
+    nim->sform_code = NIFTI_XFORM_UNKNOWN ;
+
+    if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n");
+
+  } else {
+    /**- else set the sto transformation from srow_*[] */
+
+    nim->sto_xyz.m[0][0] = nhdr.srow_x[0] ;
+    nim->sto_xyz.m[0][1] = nhdr.srow_x[1] ;
+    nim->sto_xyz.m[0][2] = nhdr.srow_x[2] ;
+    nim->sto_xyz.m[0][3] = nhdr.srow_x[3] ;
+
+    nim->sto_xyz.m[1][0] = nhdr.srow_y[0] ;
+    nim->sto_xyz.m[1][1] = nhdr.srow_y[1] ;
+    nim->sto_xyz.m[1][2] = nhdr.srow_y[2] ;
+    nim->sto_xyz.m[1][3] = nhdr.srow_y[3] ;
+
+    nim->sto_xyz.m[2][0] = nhdr.srow_z[0] ;
+    nim->sto_xyz.m[2][1] = nhdr.srow_z[1] ;
+    nim->sto_xyz.m[2][2] = nhdr.srow_z[2] ;
+    nim->sto_xyz.m[2][3] = nhdr.srow_z[3] ;
+
+    /* last row is always [ 0 0 0 1 ] */
+
+    nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f;
+    nim->sto_xyz.m[3][3]= 1.0f ;
+
+    nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ;
+
+    nim->sform_code = nhdr.sform_code ;
+
+    if( g_opts.debug > 1 )
+       nifti_disp_matrix_orient("-d sform orientations:\n", nim->sto_xyz);
+  }
+
+  /**- set miscellaneous NIFTI stuff */
+
+  if( ni_ver ){
+    nim->scl_slope   = FIXED_FLOAT( nhdr.scl_slope ) ;
+    nim->scl_inter   = FIXED_FLOAT( nhdr.scl_inter ) ;
+
+    nim->intent_code = nhdr.intent_code ;
+
+    nim->intent_p1 = FIXED_FLOAT( nhdr.intent_p1 ) ;
+    nim->intent_p2 = FIXED_FLOAT( nhdr.intent_p2 ) ;
+    nim->intent_p3 = FIXED_FLOAT( nhdr.intent_p3 ) ;
+
+    nim->toffset   = FIXED_FLOAT( nhdr.toffset ) ;
+
+    memcpy(nim->intent_name,nhdr.intent_name,15); nim->intent_name[15] = '\0';
+
+    nim->xyz_units  = XYZT_TO_SPACE(nhdr.xyzt_units) ;
+    nim->time_units = XYZT_TO_TIME (nhdr.xyzt_units) ;
+
+    nim->freq_dim  = DIM_INFO_TO_FREQ_DIM ( nhdr.dim_info ) ;
+    nim->phase_dim = DIM_INFO_TO_PHASE_DIM( nhdr.dim_info ) ;
+    nim->slice_dim = DIM_INFO_TO_SLICE_DIM( nhdr.dim_info ) ;
+
+    nim->slice_code     = nhdr.slice_code  ;
+    nim->slice_start    = nhdr.slice_start ;
+    nim->slice_end      = nhdr.slice_end   ;
+    nim->slice_duration = FIXED_FLOAT(nhdr.slice_duration) ;
+  }
+
+  /**- set Miscellaneous ANALYZE stuff */
+
+  nim->cal_min = FIXED_FLOAT(nhdr.cal_min) ;
+  nim->cal_max = FIXED_FLOAT(nhdr.cal_max) ;
+
+  memcpy(nim->descrip ,nhdr.descrip ,79) ; nim->descrip [79] = '\0' ;
+  memcpy(nim->aux_file,nhdr.aux_file,23) ; nim->aux_file[23] = '\0' ;
+
+   /**- set ioff from vox_offset (but at least sizeof(header)) */
+
+   nim->iname_offset = nhdr.vox_offset;
+   if( is_onefile && nhdr.vox_offset < (int64_t)sizeof(nhdr) )
+      nim->iname_offset = (int64_t)sizeof(nhdr);
+
+   /**- deal with file names if set */
+   if (fname!=NULL) {
+      nifti_set_filenames(nim,fname,0,0);
+      if (nim->iname==NULL)  { ERREX("bad filename"); }
+   } else {
+      nim->fname = NULL;
+      nim->iname = NULL;
+   }
+
+   /* clear extension fields */
+   nim->num_ext = 0;
+   nim->ext_list = NULL;
+
+   return nim;
+}
+
+#undef  ERREX
+#define ERREX(msg)                                           \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_open(%s): %s\n",  \
+             (hname != NULL) ? hname : "(null)" , (msg) ) ;  \
+     return fptr ; } while(0)
+
+/***************************************************************
+ * nifti_image_open
+ ***************************************************************/
+/*! znzFile nifti_image_open( char *hname, char *opts , nifti_image **nim)
+    \brief Read in NIFTI-1 or ANALYZE-7.5 file (pair) header information into a nifti_image struct.
+
+    - The image data is not read from disk (it may be read later using
+        nifti_image_load(), for example).
+    - The image data will be stored in whatever data format the
+        input data is; no scaling will be applied.
+    - DT_BINARY data is not supported.
+    - nifti_image_free() can be used to delete the returned struct,
+        when you are done with it.
+
+    \param hname filename of dataset .hdr or .nii file
+    \param opts  options string for opening the header file
+    \param nim   pointer to pointer to nifti_image struct
+                 (this routine allocates the nifti_image struct)
+    \return file pointer (gzippable) to the file with the image data,
+                 ready for reading.
+        <br>NULL if something fails badly.
+    \sa nifti_image_load, nifti_image_free
+ */
+znzFile nifti2_image_open(const char * hname, char * opts, nifti_image ** nim)
+{
+  znzFile fptr=NULL;
+  /* open the hdr and reading it in, but do not load the data  */
+  *nim = nifti_image_read(hname,0);
+  /* open the image file, ready for reading (compressed works for all reads) */
+  if( ((*nim) == NULL)      || ((*nim)->iname == NULL) ||
+      ((*nim)->nbyper <= 0) || ((*nim)->nvox <= 0)       )
+     ERREX("bad header info") ;
+
+  /* open image data file */
+  fptr = znzopen( (*nim)->iname, opts, nifti_is_gzfile((*nim)->iname) );
+  if( znz_isnull(fptr) ) ERREX("Can't open data file") ;
+
+  return fptr;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! return an allocated and filled nifti_1_header struct
+
+    Read the binary header from disk, and swap bytes if necessary.
+
+    \return an allocated nifti_1_header struct, or NULL on failure
+
+    \param hname   name of file containing header
+    \param swapped if not NULL, return whether header bytes were swapped
+    \param check   flag to check for invalid nifti_1_header
+
+    \warning ASCII header type is not supported
+
+    \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks
+*//*--------------------------------------------------------------------*/
+nifti_1_header * nifti_read_n1_hdr(const char * hname, int *swapped, int check)
+{
+   nifti_1_header   nhdr, * hptr;
+   znzFile          fp;
+   int              bytes, lswap;
+   char           * hfile;
+   char             fname[] = { "nifti_read_n1_hdr" };
+
+   /* determine file name to use for header */
+   hfile = nifti_findhdrname(hname);
+   if( hfile == NULL ){
+      if( g_opts.debug > 0 )
+         LNI_FERR(fname,"failed to find header file for", hname);
+      return NULL;
+   } else if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile);
+
+   fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) );
+   if( znz_isnull(fp) ){
+      if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile);
+      free(hfile);
+      return NULL;
+   }
+
+   free(hfile);  /* done with filename */
+
+   if( has_ascii_header(fp) == 1 ){
+      znzclose( fp );
+      if( g_opts.debug > 0 )
+         LNI_FERR(fname,"ASCII header type not supported",hname);
+      return NULL;
+   }
+
+   /* read the binary header */
+   bytes = (int)znzread( &nhdr, 1, sizeof(nhdr), fp );
+   znzclose( fp );                      /* we are done with the file now */
+
+   if( bytes < (int)sizeof(nhdr) ){
+      if( g_opts.debug > 0 ){
+         LNI_FERR(fname,"bad binary header read for file", hname);
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",bytes, (int)sizeof(nhdr));
+      }
+      return NULL;
+   }
+
+   /* now just decide on byte swapping */
+   lswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */
+   if( check && lswap < 0 ){
+      LNI_FERR(fname,"bad nifti_1_header for file", hname);
+      return NULL;
+   } else if ( lswap < 0 ) {
+      lswap = 0;  /* if swapping does not help, don't do it */
+      if(g_opts.debug > 1) Rc_fprintf_stderr("-- swap failure, none applied\n");
+   }
+
+   if( lswap ) {
+      if ( g_opts.debug > 3 ) disp_nifti_1_header("-d nhdr pre-swap: ", &nhdr);
+      swap_nifti_header( &nhdr , NIFTI_VERSION(nhdr) ) ;
+   }
+
+   if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr post-swap: ", &nhdr);
+
+   if ( check && ! nifti_hdr1_looks_good(&nhdr) ){
+      LNI_FERR(fname,"nifti_1_header looks bad for file", hname);
+      return NULL;
+   }
+
+   /* all looks good, so allocate memory for and return the header */
+   hptr = (nifti_1_header *)malloc(sizeof(nifti_1_header));
+   if( ! hptr ){
+      Rc_fprintf_stderr("** nifti_read_hdr: failed to alloc nifti_1_header\n");
+      return NULL;
+   }
+
+   if( swapped ) *swapped = lswap;  /* only if they care <sniff!> */
+
+   memcpy(hptr, &nhdr, sizeof(nifti_1_header));
+
+   return hptr;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! return an allocated and filled nifti_2_header struct
+
+    Read the binary header from disk, and swap bytes if necessary.
+
+    \return an allocated nifti_2_header struct, or NULL on failure
+
+    \param hname   name of file containing header
+    \param swapped if not NULL, return whether header bytes were swapped
+    \param check   flag to check for invalid nifti_2_header
+
+    \warning ASCII header type is not supported
+             allow now, convert nim 2 hdr   [02 Jan 2019 rickr]
+
+    \sa nifti_read_header, nifti_read_n1_hdr,
+        nifti_image_read, nifti_image_read_bricks
+*//*--------------------------------------------------------------------*/
+nifti_2_header * nifti_read_n2_hdr(const char * hname, int * swapped,
+                                    int check)
+{
+   nifti_2_header   nhdr, * hptr;
+   nifti_image    * nim=NULL;
+   znzFile          fp;
+   int              bytes, lswap, rv;
+   char           * hfile;
+   char             fname[] = { "nifti_read_n2_hdr" };
+
+   /* determine file name to use for header */
+   hfile = nifti_findhdrname(hname);
+   if( hfile == NULL ){
+      if( g_opts.debug > 0 )
+         LNI_FERR(fname,"failed to find header file for", hname);
+      return NULL;
+   } else if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d %s: found N2 header filename '%s'\n",fname,hfile);
+
+   fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) );
+   if( znz_isnull(fp) ){
+      if( g_opts.debug > 0 )
+         LNI_FERR(fname,"failed to open N2 header file",hfile);
+      free(hfile);
+      return NULL;
+   }
+
+   free(hfile);  /* done with filename */
+
+   /* ASCII is not part of standard, but allow */
+   if( has_ascii_header(fp) == 1 ){
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("++ reading ASCII header via NIFTI-2 in %s\n", hname);
+      nim = nifti_read_ascii_image(fp, hname, -1, 0);
+      znzclose(fp) ;
+      if( ! nim ) return NULL;
+
+      hptr = (nifti_2_header *)malloc(sizeof(nifti_2_header));
+      rv = nifti_convert_nim2n2hdr(nim, hptr);
+      free(nim);
+
+      if( rv ) { free(hptr); return NULL; }
+      return hptr;
+   }
+
+   /* read the binary header */
+   bytes = (int)znzread( &nhdr, 1, sizeof(nhdr), fp );
+   znzclose( fp );                      /* we are done with the file now */
+
+   if( bytes < (int)sizeof(nhdr) ){
+      if( g_opts.debug > 0 ){
+         LNI_FERR(fname,"bad binary header read for N2 file", hname);
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",bytes, (int)sizeof(nhdr));
+      }
+      return NULL;
+   }
+
+   /* now just decide on byte swapping */
+   lswap = NIFTI2_NEEDS_SWAP(nhdr);
+   if( lswap ) {
+      if ( g_opts.debug > 3 ) disp_nifti_2_header("-d n2hdr pre-swap: ", &nhdr);
+      swap_nifti_header( &nhdr , 2 );  /* use explicit version */
+   }
+
+   if ( g_opts.debug > 2 ) disp_nifti_2_header("-d nhdr post-swap: ", &nhdr);
+
+   if ( check && ! nifti_hdr2_looks_good(&nhdr) ){
+      LNI_FERR(fname,"nifti_2_header looks bad for file", hname);
+      return NULL;
+   }
+
+   /* all looks good, so allocate memory for and return the header */
+   hptr = (nifti_2_header *)malloc(sizeof(nifti_2_header));
+   if( ! hptr ){
+      Rc_fprintf_stderr("** nifti2_read_hdr: failed to alloc nifti_2_header\n");
+      return NULL;
+   }
+
+   if( swapped ) *swapped = lswap;  /* only if they care <sniff!> */
+
+   memcpy(hptr, &nhdr, sizeof(nifti_2_header));
+
+   return hptr;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! decide if this nifti_1_header structure looks reasonable
+
+   Check dim[0], dim[1], sizeof_hdr, and datatype.
+   Check magic string for "n+1".
+   Maybe more tests will follow.
+
+   \return 1 if the header seems valid, 0 otherwise
+
+   \sa nifti_nim_is_valid, valid_nifti_extensions
+*//*--------------------------------------------------------------------*/
+int nifti_hdr1_looks_good(const nifti_1_header * hdr)
+{
+   int ni_ver, c, errs = 0;
+
+   /* check dim[0] and sizeof_hdr */
+   if( need_nhdr_swap(hdr->dim[0], hdr->sizeof_hdr) < 0 ){
+      if( g_opts.debug > 0 )
+        Rc_fprintf_stderr("** NIFTI: bad hdr1 fields: dim0, sizeof_hdr = %d, %d\n",
+                hdr->dim[0], hdr->sizeof_hdr);
+      errs++;
+   }
+
+   /* check the valid dimension sizes (maybe dim[0] is bad) */
+   for( c = 1; c <= hdr->dim[0] && c <= 7; c++ )
+      if( hdr->dim[c] <= 0 ){
+         if( g_opts.debug > 0 )
+            Rc_fprintf_stderr("** NIFTI: bad nhdr field: dim[%d] = %d\n",
+                    c,hdr->dim[c]);
+         errs++;
+      }
+
+   ni_ver = NIFTI_VERSION(*hdr);      /* determine header type */
+
+   if( ni_ver > 0 ){      /* NIFTI */
+
+      if( ! nifti_datatype_is_valid(hdr->datatype, 1) ){
+         if( g_opts.debug > 0 )
+            Rc_fprintf_stderr("** bad NIFTI datatype in hdr, %d\n",hdr->datatype);
+         errs++;
+      }
+
+   } else {             /* ANALYZE 7.5 */
+
+      if( g_opts.debug > 1 ) { /* maybe tell user it's an ANALYZE hdr */
+         Rc_fprintf_stderr(
+           "-- nhdr magic field implies ANALYZE: magic = '%.4s' : ",hdr->magic);
+#ifndef USING_R
+         print_hex_vals(hdr->magic, 4, stderr);
+#endif
+         Rc_fputc_stderr('\n');
+      }
+
+      if( ! nifti_datatype_is_valid(hdr->datatype, 0) ){
+         if( g_opts.debug > 0 )
+           Rc_fprintf_stderr("** NIFTI: bad ANALYZE datatype in hdr, %d\n",
+                   hdr->datatype);
+         errs++;
+      }
+   }
+
+   if( errs ) return 0;  /* problems */
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n");
+
+   return 1;   /* looks good */
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! check that sizeof() returns the proper size
+ *
+ *  if ni_ver is valid (1 or 2 right now), check those sizes
+ *  if ni_ver == 0, check all known sizes
+ *  else whine and fail
+*//*--------------------------------------------------------------------*/
+int nifti_valid_header_size(int ni_ver, int whine)
+{
+   int size, errs=0, checks=0;
+
+   if ( !ni_ver || (ni_ver == 1) ) {
+      size = 348;
+      checks++;
+      if( sizeof(nifti_1_header) != size ) {
+         if( whine )
+            Rc_fprintf_stderr(
+               "** warning: sizeof(nifti_1_header) = %d, expected %d\n",
+               (int)sizeof(nifti_1_header), size);
+         errs++;
+      }
+   }
+
+   if ( !ni_ver || (ni_ver == 2) ) {
+      size = 540;
+      checks++;
+      if( sizeof(nifti_2_header) != size ) {
+         if( whine )
+            Rc_fprintf_stderr(
+               "** warning: sizeof(nifti_2_header) = %d, expected %d\n",
+               (int)sizeof(nifti_2_header), size);
+         errs++;
+      }
+   }
+
+   if ( ! checks ) {
+      Rc_fprintf_stderr("** nifti_valid_header_size: bad ni_ver = %d\n",ni_ver);
+      return 0;
+   }
+
+   return errs ? 0 : 1;  /* though !errs seems more fun */
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! decide if this nifti_2_header structure looks reasonable
+ * swapping should have already happened
+
+   Check sizeof() and sizeof_hdr.
+   Check dim[0], dim[i], and datatype.
+   Check magic string for "n+2".
+
+   \return 1 if the header seems valid, 0 otherwise
+
+   \sa nifti_nim_is_valid, valid_nifti_extensions
+*//*--------------------------------------------------------------------*/
+int nifti_hdr2_looks_good(const nifti_2_header * hdr)
+{
+   int     ni_ver, c, errs = 0;
+   int64_t d0;
+
+   if( !hdr ) { Rc_fprintf_stderr("** NIFTI n2hdr: hdr is NULL\n"); return 0; }
+
+   /* for now, just warn if the header sizes are not right */
+   if( g_opts.debug > 0 ) (void)nifti_valid_header_size(0, 1);
+
+   if( hdr->sizeof_hdr != sizeof(nifti_2_header) ) {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** NIFTI bad n2hdr: sizeof_hdr = %d\n",
+                 hdr->sizeof_hdr);
+      errs++;
+   }
+
+   /* check the valid dimension sizes (maybe dim[0] is bad) */
+   d0 = hdr->dim[0];
+   if( d0 < 0 || d0 > 7 ) {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** NIFTI: bad n2hdr: dim0 = %" PRId64 "\n", d0);
+      errs++;
+   } else { /* only check dims if d0 is okay */
+      for( c = 1; c <= d0; c++ )
+         if( hdr->dim[c] <= 0 ){
+           if( g_opts.debug > 0 )
+             Rc_fprintf_stderr("** NIFTI: bad nhdr field: dim[%d] = %" PRId64 "\n",
+                     c, hdr->dim[c]);
+           errs++;
+         }
+   }
+
+   ni_ver = NIFTI_VERSION(*hdr);  /* note version */
+
+   if( ! nifti_datatype_is_valid(hdr->datatype, ni_ver) ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** bad %s NIFTI datatype in hdr, %d\n",
+                 ni_ver ? "NIFTI" : "ANALYZE", hdr->datatype);
+      errs++;
+   }
+
+   /* NIFTI_VERSION must return 2, or else sizes will not match */
+   if( ni_ver != 2 || memcmp((hdr->magic+4), nifti2_magic+4, 4) ) {
+      if( g_opts.debug > 0 ) {
+         Rc_fprintf_stderr("-- header magic not NIFTI-2, magic = '%.4s' + ",
+                         hdr->magic);
+#ifndef USING_R
+         print_hex_vals(hdr->magic+4, 4, stderr);
+#endif
+         Rc_fputc_stderr('\n');
+      }
+      errs++;
+   }
+
+   if( errs ) return 0;  /* problems */
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n");
+
+   return 1;   /* looks good */
+}
+
+
+/*----------------------------------------------------------------------
+ * check whether byte swapping is needed
+ *
+ * dim[0] should be in [0,7], and sizeof_hdr should be accurate
+ *
+ * \returns  > 0 : needs swap
+ *             0 : does not need swap
+ *           < 0 : error condition
+ *----------------------------------------------------------------------*/
+static int need_nhdr_swap( short dim0, int hdrsize )
+{
+   short d0    = dim0;     /* so we won't have to swap them on the stack */
+   int   hsize = hdrsize;
+
+   if( d0 != 0 ){     /* then use it for the check */
+      if( d0 > 0 && d0 <= 7 ) return 0;
+
+      nifti_swap_2bytes(1, &d0);        /* swap? */
+      if( d0 > 0 && d0 <= 7 ) return 1;
+
+      if( g_opts.debug > 1 ){
+         Rc_fprintf_stderr("** NIFTI: bad swapped d0 = %d, unswapped = ", d0);
+         nifti_swap_2bytes(1, &d0);        /* swap? */
+         Rc_fprintf_stderr("%d\n", d0);
+      }
+
+      return -1;        /* bad, naughty d0 */
+   }
+
+   /* dim[0] == 0 should not happen, but could, so try hdrsize */
+   if( hsize == sizeof(nifti_1_header) ) return 0;
+
+   nifti_swap_4bytes(1, &hsize);     /* swap? */
+   if( hsize == sizeof(nifti_1_header) ) return 1;
+
+   if( g_opts.debug > 1 ){
+      Rc_fprintf_stderr("** NIFTI: bad swapped hsize = %d, unswapped = ", hsize);
+      nifti_swap_4bytes(1, &hsize);        /* swap? */
+      Rc_fprintf_stderr("%d\n", hsize);
+   }
+
+   return -2;     /* bad, naughty hsize */
+}
+
+
+/* use macro LNI_FILE_ERROR instead of ERREX()
+#undef  ERREX
+#define ERREX(msg)                                           \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_read(%s): %s\n",  \
+             (hname != NULL) ? hname : "(null)" , (msg) ) ;  \
+     return NULL ; } while(0)
+*/
+
+
+/***************************************************************
+ * nifti_read_header
+ ***************************************************************/
+/*! \brief Read and return a nifti header, along with the found type
+
+        - The data buffer will be byteswapped if necessary.
+        - The data buffer will not be scaled.
+        - The data buffer is allocated with calloc().
+
+    \param hname filename of the nifti dataset
+    \param nver :
+    \return A void pointer, which should be cast based on the returned nver.
+            It points to an allocated header struct.
+*/
+void * nifti2_read_header( const char *hname, int *nver, int check )
+{
+   nifti_1_header  n1hdr;
+   nifti_2_header  n2hdr;
+   znzFile         fp;
+   void          * hresult = NULL;
+   int64_t         remain, h1size=0, h2size=0;
+   char            fname[] = { "nifti_read_header" };
+   char           *hfile=NULL, *posn;
+   int             ii, ni_ver;
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("-d reading header from '%s'",hname);
+      Rc_fprintf_stderr(", HAVE_ZLIB = %d\n", nifti_compiled_with_zlib());
+   }
+
+   /**- determine filename to use for header */
+   hfile = nifti_findhdrname(hname);
+   if( hfile == NULL ){
+      if(g_opts.debug > 0)
+         LNI_FERR(fname,"failed to find header file for", hname);
+      return NULL;  /* check return */
+   } else if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile);
+
+   h1size = sizeof(nifti_1_header);
+   h2size = sizeof(nifti_2_header);
+
+   /**- open file, separate reading of header, extensions and data */
+   fp = znzopen(hfile, "rb", nifti_is_gzfile(hfile));
+   if( znz_isnull(fp) ){
+      if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile);
+      free(hfile);
+      return NULL;
+   }
+
+   /**- first try to read dataset as ASCII (and return NIFTI2 if so) */
+   if( has_ascii_header( fp ) ) {
+      znzclose(fp) ;
+      free(hfile);
+      if( nver ) *nver = 2;
+      return nifti_read_n2_hdr(hname, NULL, check);
+   }
+
+   /**- next read into nifti_1_header and determine nifti type */
+   ii = (int)znzread(&n1hdr, 1, h1size, fp);
+
+   if( ii < (int)h1size ){      /* failure? */
+      if( g_opts.debug > 0 ){
+         LNI_FERR(fname,"bad binary header read for file", hfile);
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",ii, (int)h1size);
+      }
+      znzclose(fp) ;
+      free(hfile);
+      return NULL;
+   }
+
+   /* find out what type of header we have */
+   ni_ver = nifti_header_version((char *)&n1hdr, h1size);
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-- %s: NIFTI version = %d\n", fname, ni_ver);
+
+   /* maybe set return NIFTI version */
+   if( nver ) *nver = ni_ver;
+
+   /* if NIFTI-2, copy and finish reading header */
+   if ( ni_ver == 2 ) {
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-- %s: copying and filling NIFTI-2 header...\n",fname);
+      memcpy(&n2hdr, &n1hdr, h1size);   /* copy first part */
+      remain = h2size - h1size;
+      posn = (char *)&n2hdr + h1size;
+      ii = (int)znzread(posn, 1, remain, fp); /* read remaining part */
+      if( ii < (int)remain) {
+         LNI_FERR(fname,"short NIFTI-2 header read for file", hfile);
+         znzclose(fp);  free(hfile);  return NULL;
+      }
+   }
+
+   /* clean up */
+   znzclose(fp);
+   free(hfile);
+
+   /* allocate header space and return */
+   if( ni_ver == 0 || ni_ver == 1 ) {
+      hresult = malloc(h1size);
+      if( ! hresult ) {
+         LNI_FERR(fname,"failed to alloc NIFTI-1 header for file", hname);
+         return NULL;
+      }
+      memcpy(hresult, (void *)&n1hdr, h1size);
+
+      if ( check && ! nifti_hdr1_looks_good(hresult) ){
+         LNI_FERR(fname,"nifti_1_header looks bad for file", hname);
+         return hresult;
+      }
+   } else if ( ni_ver == 2 ) {
+      hresult = malloc(h2size);
+      if( ! hresult ) {
+         LNI_FERR(fname,"failed to alloc NIFTI-2 header for file", hname);
+         return NULL;
+      }
+      memcpy(hresult, &n2hdr, h2size);
+
+      if ( check && ! nifti_hdr2_looks_good(hresult) ){
+         LNI_FERR(fname,"nifti_2_header looks bad for file", hname);
+         return hresult;
+      }
+   } else {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** %s: bad nifti header version %d\n", hname, ni_ver);
+
+      /* return a nifti-1 header anyway */
+      hresult = malloc(h1size);
+      if( ! hresult ) {
+         LNI_FERR(fname,"failed to alloc NIFTI-?? header for file", hname);
+         return NULL;
+      }
+      memcpy(hresult, (void *)&n1hdr, h1size);
+   }
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-- returning NIFTI-%d header in %s\n", ni_ver, hname);
+
+   return hresult;
+}
+
+
+/***************************************************************
+ * nifti_image_read
+ ***************************************************************/
+/*! \brief Read a nifti header and optionally the data, creating a nifti_image.
+
+        - The data buffer will be byteswapped if necessary.
+        - The data buffer will not be scaled.
+        - The data buffer is allocated with calloc().
+
+    \param hname filename of the nifti dataset
+    \param read_data Flag, true=read data blob, false=don't read blob.
+    \return A pointer to the nifti_image data structure.
+
+    \sa nifti_image_free, nifti_free_extensions, nifti_image_read_bricks
+*/
+nifti_image *nifti2_image_read( const char *hname , int read_data )
+{
+   nifti_1_header  n1hdr;
+   nifti_2_header  n2hdr;
+   nifti_image    *nim;
+   znzFile         fp;
+   int             rv, ii, ni_ver, onefile=0;
+   int64_t         filesize, remain, h1size=0, h2size=0;
+   char            fname[] = { "nifti_image_read" };
+   char           *hfile=NULL, *posn;
+
+   if( g_opts.debug > 1 ){
+      Rc_fprintf_stderr("-d image_read from '%s', read_data = %d",hname,read_data);
+      Rc_fprintf_stderr(", HAVE_ZLIB = %d\n", nifti_compiled_with_zlib());
+   }
+
+   /**- determine filename to use for header */
+   hfile = nifti_findhdrname(hname);
+   if( hfile == NULL ){
+      if(g_opts.debug > 0)
+         LNI_FERR(fname,"failed to find header file for", hname);
+      return NULL;  /* check return */
+   } else if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile);
+
+   if( nifti_is_gzfile(hfile) ) filesize = -1;  /* unknown */
+   else                         filesize = nifti_get_filesize(hfile);
+
+   /**- open file, separate reading of header, extensions and data */
+   fp = znzopen(hfile, "rb", nifti_is_gzfile(hfile));
+   if( znz_isnull(fp) ){
+      if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile);
+      free(hfile);
+      return NULL;
+   }
+
+   /**- first try to read dataset as ASCII (and return if so) */
+   rv = has_ascii_header( fp );
+   if( rv < 0 ){
+      if( g_opts.debug > 0 ) LNI_FERR(fname,"short header read",hfile);
+      znzclose( fp );
+      free(hfile);
+      return NULL;
+   }
+   else if ( rv == 1 ) { /* process special file type */
+      nim = nifti_read_ascii_image( fp, hfile, filesize, read_data );
+      znzclose(fp);
+      free(hfile);
+      return nim;
+   }
+
+   h1size = sizeof(nifti_1_header);
+   h2size = sizeof(nifti_2_header);
+
+   /**- next read into nifti_1_header and determine nifti type */
+   ii = (int)znzread(&n1hdr, 1, h1size, fp);
+
+   if( ii < (int)h1size ){      /* failure? */
+      if( g_opts.debug > 0 ){
+         LNI_FERR(fname,"bad binary header read for file", hfile);
+         Rc_fprintf_stderr("  - read %d of %d bytes\n",ii, (int)h1size);
+      }
+      znzclose(fp) ;
+      free(hfile);
+      return NULL;
+   }
+
+   /* find out what type of header we have */
+   ni_ver = nifti_header_version((char *)&n1hdr, h1size);
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-- %s: NIFTI version = %d\n", fname, ni_ver);
+
+   if( ni_ver == 0 || ni_ver == 1 ) {
+      nim = nifti_convert_n1hdr2nim(n1hdr,hfile);
+      onefile = NIFTI_ONEFILE(n1hdr);
+   } else if ( ni_ver == 2 ) {
+      /* fill nifti-2 header and convert */
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-- %s: copying and filling NIFTI-2 header...\n",fname);
+      memcpy(&n2hdr, &n1hdr, h1size);   /* copy first part */
+      remain = h2size - h1size;
+      posn = (char *)&n2hdr + h1size;
+      ii = (int)znzread(posn, 1, remain, fp); /* read remaining part */
+      if( ii < (int)remain) {
+         LNI_FERR(fname,"short NIFTI-2 header read for file", hfile);
+         znzclose(fp);  free(hfile);  return NULL;
+      }
+      nim = nifti_convert_n2hdr2nim(n2hdr,hfile);
+      onefile = NIFTI_ONEFILE(n2hdr);
+   } else {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** %s: bad nifti im header version %d\n",fname,ni_ver);
+      znzclose(fp);  free(hfile);  return NULL;
+   }
+
+   if( nim == NULL ){
+      znzclose( fp ) ;                                   /* close the file */
+      if( g_opts.debug > 0 )
+         LNI_FERR(fname,"cannot create nifti image from header",hfile);
+      free(hfile); /* had to save this for debug message */
+      return NULL;
+   }
+
+   if( g_opts.debug > 3 ){
+      Rc_fprintf_stderr("+d nifti_image_read(), have nifti image:\n");
+      if( g_opts.debug > 2 ) nifti_image_infodump(nim);
+   }
+
+   /**- check for extensions (any errors here means no extensions) */
+   if ( onefile )     remain = nim->iname_offset;
+   else               remain = filesize;
+
+   if ( ni_ver <= 1 ) remain -= h1size;
+   else               remain -= h2size;
+
+   (void)nifti_read_extensions(nim, fp, remain);
+
+   znzclose( fp ) ;                                      /* close the file */
+   free(hfile);
+
+   if ( g_opts.alter_cifti && nifti_looks_like_cifti(nim) )
+      nifti_alter_cifti_dims(nim);
+
+   /**- read the data if desired, then bug out */
+   if( read_data ){
+      if( nifti_image_load( nim ) < 0 ){
+         nifti_image_free(nim);          /* take ball, go home. */
+         return NULL;
+      }
+   }
+   else nim->data = NULL ;
+
+   return nim ;
+}
+
+
+/*----------------------------------------------------------------------
+ # return the index of the first occurrence of the given ecode, else -1
+ *----------------------------------------------------------------------*/
+static int nifti_ext_type_index(nifti_image * nim, int ecode)
+{
+   int ind;
+
+   if ( !nim || ecode < 0 ) return -1;
+
+   for( ind = 0; ind < nim->num_ext; ind++ )
+      if( nim->ext_list[ind].ecode == ecode )
+         return ind;
+
+   return -1;
+}
+
+/*----------------------------------------------------------------------
+ *! does this dataset look like CIFTI?
+ *
+ * check dimensions and extension ecodes for CIFTI
+ *
+ * should have  - nx=ny=nz=nt=1, nu,nv>1, nw optional
+ *              - CIFTI extension
+ *----------------------------------------------------------------------*/
+int nifti_looks_like_cifti(nifti_image * nim)
+{
+   if( ! nim ) return 0;
+
+   if( nifti_ext_type_index(nim, NIFTI_ECODE_CIFTI) < 0 ) return 0;
+
+   if( nim->nx > 1 || nim->ny > 1 || nim->nz > 1 || nim->nt > 1 ) return 0;
+
+   if( nim->nu > 1 || nim->nv > 1 ) return 1;  /* looks like it */
+
+   return 0;
+}
+
+/*----------------------------------------------------------------------
+ *! alter the dims[] from CIFTI style
+ *
+ * convert nu -> nx, nv -> nt/nu, nw -> nv
+ *----------------------------------------------------------------------*/
+int nifti_alter_cifti_dims(nifti_image * nim)
+{
+   if( ! nifti_looks_like_cifti(nim) ) return 0;
+
+   /* the main effect, move position axis to x ... */
+   if( nim->nu > 1 || nim->dim[5] ) {
+      nim->nx = nim->nu;
+      nim->nu = 1;
+
+      nim->dim[1] = nim->dim[5];
+      nim->dim[5] = 1;
+   }
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------
+ * has_ascii_header  - see if the NIFTI header is an ASCII format
+ *
+ * If the file starts with the ASCII string "<nifti_image", then
+ * process the dataset as a type-3 .nia file.
+ *
+ * return:  -1 on error, 1 if true, or 0 if false
+ *
+ * NOTE: this is NOT part of the NIFTI-1 standard
+ *----------------------------------------------------------------------*/
+static int has_ascii_header( znzFile fp )
+{
+   char  buf[16];
+   int   nread;
+
+   if( znz_isnull(fp) ) return 0;
+
+   nread = (int)znzread( buf, 1, 12, fp );
+   buf[12] = '\0';
+
+   if( nread < 12 ) return -1;
+
+   znzrewind(fp);  /* move back to the beginning, and check */
+
+   if( strcmp(buf, "<nifti_image") == 0 ) return 1;
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! nifti_read_ascii_image  - process as a type-3 .nia image file
+
+   return NULL on failure
+
+   NOTE: this is NOT part of the NIFTI-1 standard
+*//*--------------------------------------------------------------------*/
+nifti_image * nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
+                                     int read_data)
+{
+   nifti_image * nim;
+   int           slen, txt_size, remain, rv = 0;
+   char        * sbuf, lfunc[25] = { "nifti_read_ascii_image" };
+
+   if( nifti_is_gzfile(fname) ){
+     LNI_FERR(lfunc,"compression not supported for file type NIFTI_FTYPE_ASCII",
+              fname);
+     return NULL;
+   }
+   slen = flen;  /* slen will be our buffer length */
+   if( slen <= 0 ) slen = nifti_get_filesize(fname);
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen);
+
+   if( slen > 65530 ) slen = 65530 ;
+   sbuf = (char *)calloc(sizeof(char),slen+1) ;
+   if( !sbuf ){
+      Rc_fprintf_stderr("** %s: failed to alloc %d bytes for sbuf",lfunc,65530);
+      return NULL;
+   }
+   znzread( sbuf , 1 , slen , fp ) ;
+   nim = nifti_image_from_ascii( sbuf, &txt_size ) ; free( sbuf ) ;
+   if( nim == NULL ){
+      LNI_FERR(lfunc,"failed nifti_image_from_ascii()",fname);
+      return NULL;
+   }
+   nim->nifti_type = NIFTI_FTYPE_ASCII ;
+
+   /* compute remaining space for extensions */
+   remain = flen - txt_size - (int)nifti_get_volsize(nim);
+   if( remain > 4 ){
+      /* read extensions (reposition file pointer, first) */
+      znzseek(fp, txt_size, SEEK_SET);
+      (void) nifti_read_extensions(nim, fp, (int64_t)remain);
+   }
+
+   nim->iname_offset = -1 ;  /* check from the end of the file */
+
+   if( read_data ) rv = nifti_image_load( nim ) ;
+   else            nim->data = NULL ;
+
+   /* check for nifti_image_load() failure, maybe bail out */
+   if( read_data && rv != 0 ){
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-d failed image_load, free nifti image struct\n");
+      free(nim);
+      return NULL;
+   }
+
+   return nim ;
+}
+
+
+/*----------------------------------------------------------------------
+ * Read the extensions into the nifti_image struct   08 Dec 2004 [rickr]
+ *
+ * This function is called just after the header struct is read in, and
+ * it is assumed the file pointer has not moved.  The value in remain
+ * is assumed to be accurate, reflecting the bytes of space for potential
+ * extensions.
+ *
+ * return the number of extensions read in, or < 0 on error
+ *----------------------------------------------------------------------*/
+static int nifti_read_extensions( nifti_image *nim, znzFile fp, int64_t remain )
+{
+   nifti1_extender    extdr;      /* defines extension existence  */
+   nifti1_extension   extn;       /* single extension to process  */
+   nifti1_extension * Elist;      /* list of processed extensions */
+   int64_t            posn, count;
+
+   /* rcr n2 - add and use nifti2_extension type? */
+
+   if( !nim || znz_isnull(fp) ) {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** nifti_read_extensions: bad inputs (%p,%p)\n",
+                 (void *)nim, (void *)fp);
+      return -1;
+   }
+
+   posn = znztell(fp);
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d nre: posn=%" PRId64 ", offset=%" PRId64
+                     ", type=%d, remain=%" PRId64 "\n",
+                     posn, nim->iname_offset, nim->nifti_type, remain);
+
+   if( remain < 16 ){
+      if( g_opts.debug > 2 ){
+         if( g_opts.skip_blank_ext )
+            Rc_fprintf_stderr("-d no extender in '%s' is okay, as "
+                           "skip_blank_ext is set\n",nim->fname);
+         else
+            Rc_fprintf_stderr("-d remain=%" PRId64 ", no space for extensions\n",
+                    remain);
+      }
+      return 0;
+   }
+
+   count = znzread( extdr.extension, 1, 4, fp ); /* get extender */
+
+   if( count < 4 ){
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-d file '%s' is too short for an extender\n",
+                 nim->fname);
+      return 0;
+   }
+
+   if( extdr.extension[0] != 1 ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d extender[0] (%d) shows no extensions for '%s'\n",
+                 extdr.extension[0], nim->fname);
+      return 0;
+   }
+
+   remain -= 4;
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d found valid 4-byte extender, remain = %" PRId64 "\n",
+              remain);
+
+   /* so we expect extensions, but have no idea of how many there may be */
+
+   count = 0;
+   Elist = NULL;
+   while (nifti_read_next_extension(&extn, nim, remain, fp) > 0)
+   {
+      if( nifti_add_exten_to_list(&extn, &Elist, (int)count+1) < 0 ){
+         free(Elist);
+         if( g_opts.debug > 0 )
+           Rc_fprintf_stderr("** NIFTI: failed adding ext %" PRId64 " to list\n",
+                    count);
+         return -1;
+      }
+
+      /* we have a new extension */
+      if( g_opts.debug > 1 ){
+         Rc_fprintf_stderr("+d found extension #%" PRId64
+                        ", code = 0x%x, size = %d\n",
+                 count, extn.ecode, extn.esize);
+         if( extn.ecode == NIFTI_ECODE_AFNI && g_opts.debug > 2 ) /* ~XML */
+            Rc_fprintf_stderr("   AFNI extension: %.*s\n",
+                    extn.esize-8,extn.edata);
+         else if( extn.ecode == NIFTI_ECODE_COMMENT && g_opts.debug > 2 )
+            Rc_fprintf_stderr("   COMMENT extension: %.*s\n",        /* TEXT */
+                    extn.esize-8,extn.edata);
+      }
+      remain -= extn.esize;
+      count++;
+   }
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d found %" PRId64 " extension(s)\n", count);
+   /* rcr n2 - allow int64_t num ext? */
+   nim->num_ext = (int)count;
+   nim->ext_list = Elist;
+
+   return count;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! nifti_add_extension - add an extension, with a copy of the data
+
+   Add an extension to the nim->ext_list array.
+   Fill this extension with a copy of the data, noting the
+       length and extension code.
+
+   \param nim    - nifti_image to add extension to
+   \param data   - raw extension data
+   \param length - length of raw extension data
+   \param ecode  - extension code
+
+   \sa extension codes NIFTI_ECODE_* in nifti1_io.h
+   \sa nifti_free_extensions, valid_nifti_extensions, nifti_copy_extensions
+
+   \return 0 on success, -1 on error (and free the entire list)
+*//*--------------------------------------------------------------------*/
+int nifti2_add_extension(nifti_image *nim, const char * data, int len, int ecode)
+{
+   nifti1_extension ext;
+
+   /* error are printed in functions */
+   if( nifti_fill_extension(&ext, data, len, ecode) )  { free(ext.edata);   return -1; }
+   if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) { free(ext.edata);   return -1; }
+
+   nim->num_ext++;  /* success, so increment */
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/* nifti_add_exten_to_list     - add a new nifti1_extension to the list
+
+   We will append via "malloc, copy and free", because on an error,
+   the list will revert to the previous one (sorry realloc(), only
+   quality dolphins get to become part of St@rk!st brand tunafish).
+
+   return 0 on success, -1 on error (and free the entire list)
+*//*--------------------------------------------------------------------*/
+static int nifti_add_exten_to_list( nifti1_extension *  new_ext,
+                                    nifti1_extension ** list, int new_length )
+{
+   nifti1_extension * tmplist;
+
+   tmplist = *list;
+   *list = (nifti1_extension *)malloc(new_length * sizeof(nifti1_extension));
+
+   /* check for failure first */
+   if( ! *list ){
+      Rc_fprintf_stderr("** NIFTI: failed to alloc %d ext structs (%d bytes)\n",
+              new_length, new_length*(int)sizeof(nifti1_extension));
+      if( !tmplist ) return -1;  /* no old list to lose */
+
+      *list = tmplist;  /* reset list to old one */
+      return -1;
+   }
+
+   /* if an old list exists, copy the pointers and free the list */
+   if( tmplist ){
+      memcpy(*list, tmplist, (new_length-1)*sizeof(nifti1_extension));
+      free(tmplist);
+   }
+
+   /* for some reason, I just don't like struct copy... */
+   (*list)[new_length-1].esize = new_ext->esize;
+   (*list)[new_length-1].ecode = new_ext->ecode;
+   (*list)[new_length-1].edata = new_ext->edata;
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d allocated and appended extension #%d to list\n",
+              new_length);
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/* nifti_fill_extension  - given data and length, fill an extension struct
+
+   Allocate memory for data, copy data, set the size and code.
+
+   return 0 on success, -1 on error (and free the entire list)
+*//*--------------------------------------------------------------------*/
+static int nifti_fill_extension( nifti1_extension *ext, const char * data,
+                                int len, int ecode)
+{
+   int esize;
+
+   if( !ext || !data || len < 0 ){
+      Rc_fprintf_stderr("** NIFTI fill_ext: bad params (%p,%p,%d)\n",
+              (void *)ext, (void *)data, len);
+      return -1;
+   } else if( ! nifti_is_valid_ecode(ecode) ){
+      Rc_fprintf_stderr("** NIFTI fill_ext: invalid ecode %d\n", ecode);
+      /* should not be fatal    29 Apr 2015 [rickr] */
+   }
+
+   /* compute esize, first : len+8, and take ceiling up to a mult of 16 */
+   esize = len+8;
+   if( esize & 0xf ) esize = (esize + 0xf) & ~0xf;
+   ext->esize = esize;
+
+   /* allocate esize-8 (maybe more than len), using calloc for fill */
+   ext->edata = (char *)calloc(esize-8, sizeof(char));
+   if( !ext->edata ){
+      Rc_fprintf_stderr("** NIFTI NFE: failed to alloc %d bytes for extension\n",
+              len);
+      return -1;
+   }
+
+   memcpy(ext->edata, data, len);  /* copy the data, using len */
+   ext->ecode = ecode;             /* set the ecode */
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d alloc %d bytes for ext len %d, ecode %d, esize %d\n",
+              esize-8, len, ecode, esize);
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_read_next_extension  - read a single extension from the file
+ *
+ * return (>= 0 is okay):
+ *
+ *     success      : esize
+ *     no extension : 0
+ *     error        : -1
+ *----------------------------------------------------------------------*/
+static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim,
+                                      int remain, znzFile fp )
+{
+   int swap = nim->byteorder != nifti_short_order();
+   int count, size, code = -1;
+
+   /* first clear nex */
+   nex->esize = nex->ecode = 0;
+   nex->edata = NULL;
+
+   if( remain < 16 ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d only %d bytes remain, so no extension\n", remain);
+      return 0;
+   }
+
+   /* must start with 4-byte size and code */
+   count = (int)znzread( &size, 4, 1, fp );
+   if( count == 1 ) count += (int)znzread( &code, 4, 1, fp );
+
+   if( count != 2 || code == -1 ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d current extension read failed\n");
+      znzseek(fp, -4*count, SEEK_CUR); /* back up past any read */
+      return 0;                        /* no extension, no error condition */
+   }
+
+   if( swap ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d pre-swap exts: code %d, size %d\n", code, size);
+
+      nifti_swap_4bytes(1, &size);
+      nifti_swap_4bytes(1, &code);
+   }
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d potential extension: code %d, size %d\n", code, size);
+
+   if( !nifti_check_extension(nim, size, code, remain) ){
+      if( znzseek(fp, -8, SEEK_CUR) < 0 ){      /* back up past any read */
+         Rc_fprintf_stderr("** NIFTI: failure to back out of extension read!\n");
+         return -1;
+      }
+      return 0;
+   }
+
+   /* now get the actual data */
+   nex->esize = size;
+   nex->ecode = code;
+
+   size -= 8;  /* subtract space for size and code in extension */
+   nex->edata = (char *)malloc(size * sizeof(char));
+   if( !nex->edata ){
+      Rc_fprintf_stderr("** NIFTI: failed to allocate %d bytes for extension\n",
+              size);
+      return -1;
+   }
+
+   count = (int)znzread(nex->edata, 1, size, fp);
+   if( count < size ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("-d read only %d (of %d) bytes for extension\n",
+                 count, size);
+      free(nex->edata);
+      nex->edata = NULL;
+      return -1;
+   }
+
+   /* success! */
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d successfully read extension, code %d, size %d\n",
+              nex->ecode, nex->esize);
+
+   return nex->esize;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! for each extension, check code, size and data pointer
+*//*--------------------------------------------------------------------*/
+int valid_nifti2_extensions(const nifti_image * nim)
+{
+   nifti1_extension * ext;
+   int                c, errs;
+
+   if( nim->num_ext <= 0 || nim->ext_list == NULL ){
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d empty extension list\n");
+      return 0;
+   }
+
+   /* for each extension, check code, size and data pointer */
+   ext = nim->ext_list;
+   errs = 0;
+   for ( c = 0; c < nim->num_ext; c++ ){
+      if( ! nifti_is_valid_ecode(ext->ecode) ) {
+         if( g_opts.debug > 1 )
+            Rc_fprintf_stderr("-d ext %d, invalid code %d\n", c, ext->ecode);
+         /* should not be fatal    29 Apr 2015 [rickr] */
+      }
+
+      if( ext->esize <= 0 ){
+         if( g_opts.debug > 1 )
+            Rc_fprintf_stderr("-d ext %d, bad size = %d\n", c, ext->esize);
+         errs++;
+      } else if( ext->esize & 0xf ){
+         if( g_opts.debug > 1 )
+            Rc_fprintf_stderr("-d ext %d, size %d not multiple of 16\n",
+                    c, ext->esize);
+         errs++;
+      }
+
+      if( ext->edata == NULL ){
+         if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d ext %d, missing data\n", c);
+         errs++;
+      }
+
+      ext++;
+   }
+
+   if( errs > 0 ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("-d had %d extension errors, none will be written\n",
+                 errs);
+      return 0;
+   }
+
+   /* if we're here, we're good */
+   return 1;
+}
+
+/*----------------------------------------------------------------------*/
+/*! determine NIFTI version from buffer (check sizeof_hdr and magic)
+
+       \return -1 on error, else NIFTI version
+ *//*--------------------------------------------------------------------*/
+int nifti_header_version(const char * buf, size_t nbytes){
+   nifti_1_header *n1p = (nifti_1_header *)buf;
+   nifti_2_header *n2p = (nifti_2_header *)buf;
+   char            fname[] = { "nifti_header_version" };
+   int             sizeof_hdr, sver, nver;
+
+   if( !buf ) {
+      if(g_opts.debug > 0)
+         Rc_fprintf_stderr("** %s: have NULL buffer pointer", fname);
+      return -1;
+   }
+
+   if( nbytes < sizeof(nifti_1_header) ) {
+      if(g_opts.debug > 0)
+         Rc_fprintf_stderr("** %s: nbytes=%zu, too small for test", fname, nbytes);
+      return -1;
+   }
+
+   /* try to determine the version based on sizeof_hdr */
+   sver = -1;
+   sizeof_hdr = n1p->sizeof_hdr;
+   if     ( sizeof_hdr == (int)sizeof(nifti_1_header) ) sver = 1;
+   else if( sizeof_hdr == (int)sizeof(nifti_2_header) ) sver = 2;
+   else { /* try swapping */
+      nifti_swap_4bytes(1, &sizeof_hdr);
+      if     ( sizeof_hdr == (int)sizeof(nifti_1_header) ) sver = 1;
+      else if( sizeof_hdr == (int)sizeof(nifti_2_header) ) sver = 2;
+   }
+
+   /* and check magic field */
+   if      ( sver == 1 ) nver = NIFTI_VERSION(*n1p);
+   else if ( sver == 2 ) nver = NIFTI_VERSION(*n2p);
+   else                  nver = -1;
+
+   /* now compare and return */
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-- %s: size ver = %d, ni ver = %d\n", fname, sver, nver);
+
+   if( sver == 1 ) {
+      nver = NIFTI_VERSION(*n1p);
+      if( nver == 0 ) return 0;        /* ANALYZE */
+      if( nver == 1 ) return 1;        /* NIFTI-1 */
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("** %s: bad NIFTI-1 magic= %.4s", fname, n1p->magic);
+      return -1;
+   } else if ( sver == 2 ) {
+      nver = NIFTI_VERSION(*n2p);
+      if( nver == 2 ) return 2;        /* NIFTI-2 */
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("** %s: bad NIFTI-2 magic4= %.4s", fname, n2p->magic);
+      return -1;
+   }
+
+   /* failure */
+
+   if( g_opts.debug > 0 )
+      Rc_fprintf_stderr("** %s: bad sizeof_hdr = %d\n", fname, n1p->sizeof_hdr);
+
+   return -1;
+}
+
+
+
+/*----------------------------------------------------------------------*/
+/*! check whether the extension code is valid
+
+    \return 1 if valid, 0 otherwise
+*//*--------------------------------------------------------------------*/
+int nifti_is_valid_ecode( int ecode )
+{
+   if( ecode < NIFTI_ECODE_IGNORE  ||   /* minimum code number (0) */
+       ecode > NIFTI_MAX_ECODE     ||   /* maximum code number     */
+       ecode & 1 )                      /* cannot be odd           */
+      return 0;
+
+   return 1;
+}
+
+
+/*----------------------------------------------------------------------
+ * check for valid size and code, as well as can be done
+ *----------------------------------------------------------------------*/
+static int nifti_check_extension(nifti_image *nim, int size, int code, int rem)
+{
+   /* check for bad code before bad size */
+   if( ! nifti_is_valid_ecode(code) ) {
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d invalid extension code %d\n",code);
+      /* should not be fatal    29 Apr 2015 [rickr] */
+   }
+
+   if( size < 16 ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d ext size %d, no extension\n",size);
+      return 0;
+   }
+
+   if( size > rem ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d ext size %d, space %d, no extension\n", size, rem);
+      return 0;
+   }
+
+   if( size & 0xf ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d nifti extension size %d not multiple of 16\n",size);
+      return 0;
+   }
+
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII && size > LNI_MAX_NIA_EXT_LEN ){
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d NVE, bad nifti_type 3 size %d\n", size);
+      return 0;
+   }
+
+   return 1;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_image_load_prep  - prepare to read data
+ *
+ * Check nifti_image fields, open the file and seek to the appropriate
+ * offset for reading.
+ *
+ * return NULL on failure
+ *----------------------------------------------------------------------*/
+static znzFile nifti_image_load_prep( nifti_image *nim )
+{
+   /* set up data space, open data file and seek, then call nifti_read_buffer */
+   int64_t ntot , ii , ioff;
+   znzFile fp;
+   char   *tmpimgname;
+   char    fname[] = { "nifti_image_load_prep" };
+
+   /**- perform sanity checks */
+   if( nim == NULL      || nim->iname == NULL ||
+       nim->nbyper <= 0 || nim->nvox <= 0       )
+   {
+      if ( g_opts.debug > 0 ){
+         if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
+         else Rc_fprintf_stderr("** ERROR: nifti_image_load: bad params (%p,%d,"
+                      "%" PRId64 ")\n", nim->iname, nim->nbyper, nim->nvox);
+      }
+      return NULL;
+   }
+
+   ntot = nifti_get_volsize(nim) ; /* total bytes to read */
+
+   /**- open image data file */
+
+   tmpimgname = nifti_findimgname(nim->iname , nim->nifti_type);
+   if( tmpimgname == NULL ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** NIFTI: no image file found for '%s'\n",nim->iname);
+      return NULL;
+   }
+
+   fp = znzopen(tmpimgname, "rb", nifti_is_gzfile(tmpimgname));
+   if (znz_isnull(fp)){
+       if(g_opts.debug > 0) LNI_FERR(fname,"cannot open data file",tmpimgname);
+       free(tmpimgname);
+       return NULL;  /* bad open? */
+   }
+   free(tmpimgname);
+
+   /**- get image offset: a negative offset means to figure from end of file */
+   if( nim->iname_offset < 0 ){
+     if( nifti_is_gzfile(nim->iname) ){
+        if( g_opts.debug > 0 )
+           LNI_FERR(fname,"negative offset for compressed file",nim->iname);
+        znzclose(fp);
+        return NULL;
+     }
+     ii = nifti_get_filesize( nim->iname ) ;
+     if( ii <= 0 ){
+        if( g_opts.debug > 0 ) LNI_FERR(fname,"empty data file",nim->iname);
+        znzclose(fp);
+        return NULL;
+     }
+     ioff = (ii > ntot) ? ii-ntot : 0 ;
+   } else {                              /* non-negative offset   */
+     ioff = nim->iname_offset ;          /* means use it directly */
+   }
+
+   /**- seek to the appropriate read position */
+   if( znzseek(fp , (long)ioff , SEEK_SET) < 0 ){
+      Rc_fprintf_stderr("** NIFTI: could not seek to offset %" PRId64
+                     " in file '%s'\n",
+              ioff, nim->iname);
+      znzclose(fp);
+      return NULL;
+   }
+
+   /**- and return the File pointer */
+   return fp;
+}
+
+
+/*----------------------------------------------------------------------
+ * nifti_image_load
+ *----------------------------------------------------------------------*/
+/*! \fn int nifti_image_load( nifti_image *nim )
+    \brief Load the image blob into a previously initialized nifti_image.
+
+        - If not yet set, the data buffer is allocated with calloc().
+        - The data buffer will be byteswapped if necessary.
+        - The data buffer will not be scaled.
+
+    This function is used to read the image from disk.  It should be used
+    after a function such as nifti_image_read(), so that the nifti_image
+    structure is already initialized.
+
+    \param  nim pointer to a nifti_image (previously initialized)
+    \return 0 on success, -1 on failure
+    \sa     nifti_image_read, nifti_image_free, nifti_image_unload
+*/
+int nifti2_image_load( nifti_image *nim )
+{
+   /* set up data space, open data file and seek, then call nifti_read_buffer */
+   int64_t ntot , ii ;
+   znzFile fp ;
+
+   /**- open the file and position the FILE pointer */
+   fp = nifti_image_load_prep( nim );
+
+   if( fp == NULL ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** nifti_image_load, failed load_prep\n");
+      return -1;
+   }
+
+   ntot = nifti_get_volsize(nim);
+
+   /**- if the data pointer is not yet set, get memory space for the image */
+
+   if( nim->data == NULL )
+   {
+     nim->data = calloc(1,ntot) ;  /* create image memory */
+     if( nim->data == NULL ){
+        if( g_opts.debug > 0 )
+           Rc_fprintf_stderr("** NIFTI: failed to alloc %d bytes for image data\n",
+                   (int)ntot);
+        znzclose(fp);
+        return -1;
+     }
+   }
+
+   /**- now that everything is set up, do the reading */
+   ii = nifti_read_buffer(fp,nim->data,ntot,nim);
+   if( ii < ntot ){
+      znzclose(fp) ;
+      free(nim->data) ;
+      nim->data = NULL ;
+      return -1 ;  /* errors were printed in nifti_read_buffer() */
+   }
+
+   /**- close the file */
+   znzclose( fp ) ;
+
+   return 0 ;
+}
+
+
+/* 30 Nov 2004 [rickr]
+#undef  ERREX
+#define ERREX(msg)                                               \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_read_buffer: %s\n",(msg)) ;  \
+     return 0; } while(0)
+*/
+
+/*----------------------------------------------------------------------*/
+/*! read ntot bytes of data from an open file and byte swaps if necessary
+
+   note that nifti_image is required for information on datatype, bsize
+   (for any needed byte swapping), etc.
+
+   This function does not allocate memory, so dataptr must be valid.
+*//*--------------------------------------------------------------------*/
+int64_t nifti2_read_buffer(znzFile fp, void* dataptr, int64_t ntot,
+                                nifti_image *nim)
+{
+  int64_t ii;
+
+  if( dataptr == NULL ){
+     if( g_opts.debug > 0 )
+        Rc_fprintf_stderr("** ERROR: nifti_read_buffer: NULL dataptr\n");
+     return -1;
+  }
+
+  ii = znzread( dataptr , 1 , ntot , fp ) ;             /* data input */
+
+  /* if read was short, fail */
+  if( ii < ntot ){
+    if( g_opts.debug > 0 )
+       Rc_fprintf_stderr("++ WARNING: nifti_read_buffer(%s):\n"
+               "   data bytes needed = %" PRId64 "\n"
+               "   data bytes input  = %" PRId64 "\n"
+               "   number missing    = %" PRId64 " (set to 0)\n",
+               nim->iname , ntot , ii , (ntot-ii) ) ;
+    /* memset( (char *)(dataptr)+ii , 0 , ntot-ii ) ;  now failure [rickr] */
+    return -1 ;
+  }
+
+  if( g_opts.debug > 2 )
+    Rc_fprintf_stderr("+d nifti_read_buffer: read %" PRId64 " bytes\n", ii);
+
+  /* byte swap array if needed */
+
+  /* ntot/swapsize might not fit as int, use int64_t    6 Jul 2010 [rickr] */
+  if( nim->swapsize > 1 && nim->byteorder != nifti_short_order() ) {
+    if( g_opts.debug > 1 )
+       Rc_fprintf_stderr("+d nifti_read_buffer: swapping data bytes...\n");
+    nifti_swap_Nbytes( (int)(ntot / nim->swapsize), nim->swapsize , dataptr ) ;
+  }
+
+#if defined(isfinite) && !defined(USING_R)
+{
+  /* check input float arrays for goodness, and fix bad floats */
+  int fix_count = 0 ;
+
+  switch( nim->datatype ){
+
+    case NIFTI_TYPE_FLOAT32:
+    case NIFTI_TYPE_COMPLEX64:{
+        float *far = (float *)dataptr ; int64_t jj,nj ;
+        nj = ntot / sizeof(float) ;
+        for( jj=0 ; jj < nj ; jj++ )   /* count fixes 30 Nov 2004 [rickr] */
+           if( !IS_GOOD_FLOAT(far[jj]) ){
+              far[jj] = 0 ;
+              fix_count++ ;
+           }
+      }
+      break ;
+
+    case NIFTI_TYPE_FLOAT64:
+    case NIFTI_TYPE_COMPLEX128:{
+        double *far = (double *)dataptr ; int64_t jj,nj ;
+        nj = ntot / sizeof(double) ;
+        for( jj=0 ; jj < nj ; jj++ )   /* count fixes 30 Nov 2004 [rickr] */
+           if( !IS_GOOD_FLOAT(far[jj]) ){
+              far[jj] = 0 ;
+              fix_count++ ;
+           }
+      }
+      break ;
+
+
+  }
+
+  if( g_opts.debug > 1 )
+     Rc_fprintf_stderr("+d in image, %d bad floats were set to 0\n", fix_count);
+}
+#endif
+
+  return ii;
+}
+
+/*--------------------------------------------------------------------------*/
+/*! Unload the data in a nifti_image struct, but keep the metadata.
+*//*------------------------------------------------------------------------*/
+void nifti2_image_unload( nifti_image *nim )
+{
+   if( nim != NULL && nim->data != NULL ){
+     free(nim->data) ; nim->data = NULL ;
+   }
+   }
+
+/*--------------------------------------------------------------------------*/
+/*! free 'everything' about a nifti_image struct (including the passed struct)
+
+    free (only fields which are not NULL):
+      - fname and iname
+      - data
+      - any ext_list[i].edata
+      - ext_list
+      - nim
+*//*------------------------------------------------------------------------*/
+void nifti2_image_free( nifti_image *nim )
+{
+   if( nim == NULL ) return ;
+   if( nim->fname != NULL ) free(nim->fname) ;
+   if( nim->iname != NULL ) free(nim->iname) ;
+   if( nim->data  != NULL ) free(nim->data ) ;
+   (void)nifti_free_extensions( nim ) ;
+   free(nim) ; }
+
+
+/*--------------------------------------------------------------------------*/
+/*! free the nifti extensions
+
+    - If any edata pointer is set in the extension list, free() it.
+    - Free ext_list, if it is set.
+    - Clear num_ext and ext_list from nim.
+
+    \return 0 on success, -1 on error
+
+    \sa nifti_add_extension, nifti_copy_extensions
+*//*------------------------------------------------------------------------*/
+int nifti2_free_extensions( nifti_image *nim )
+{
+   int c ;
+   if( nim == NULL ) return -1;
+   if( nim->num_ext > 0 && nim->ext_list ){
+      for( c = 0; c < nim->num_ext; c++ )
+         if ( nim->ext_list[c].edata ) free(nim->ext_list[c].edata);
+      free(nim->ext_list);
+   }
+   /* or if it is inconsistent, warn the user (if we are not in quiet mode) */
+   else if ( (nim->num_ext > 0 || nim->ext_list != NULL) && (g_opts.debug > 0) )
+      Rc_fprintf_stderr("** warning: nifti extension num/ptr mismatch (%d,%p)\n",
+              nim->num_ext, (void *)nim->ext_list);
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d free'd %d extension(s)\n", nim->num_ext);
+
+   nim->num_ext = 0;
+   nim->ext_list = NULL;
+
+   return 0;
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Print to stdout some info about a nifti_image struct.
+*//*------------------------------------------------------------------------*/
+void nifti2_image_infodump( const nifti_image *nim )
+{
+   char *str = nifti_image_to_ascii( nim ) ;
+   /* stdout -> stderr   2 Dec 2004 [rickr] */
+   if( str != NULL ){ Rc_fputs_stderr(str) ; free(str) ; }
+   }
+
+
+/*--------------------------------------------------------------------------
+ * nifti_write_buffer just check for a null znzFile and call znzwrite
+ *--------------------------------------------------------------------------*/
+/*! \fn int64_t nifti_write_buffer(znzFile fp, void *buffer, int64_t numbytes)
+    \brief write numbytes of buffer to file, fp
+
+    \param fp           File pointer (from znzopen) to gzippable nifti datafile
+    \param buffer       data buffer to be written
+    \param numbytes     number of bytes in buffer to write
+    \return number of bytes successfully written
+*/
+int64_t nifti_write_buffer(znzFile fp, const void *buffer, int64_t numbytes)
+{
+   /* Write all the image data at once (no swapping here) */
+   int64_t ss;
+   if (znz_isnull(fp)){
+      Rc_fprintf_stderr("** ERROR: nifti_write_buffer: null file pointer\n");
+      return 0;
+   }
+   ss = znzwrite( buffer , 1 , numbytes , fp ) ;
+   return ss;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! write the nifti_image data to file (from nim->data or from NBL)
+
+   If NBL is not NULL, write the data from that structure.  Otherwise,
+   write it out from nim->data.  No swapping is done here.
+
+   \param  fp  : File pointer
+   \param  nim : nifti_image corresponding to the data
+   \param  NBL : optional source of write data (if NULL use nim->data)
+
+   \return 0 on success, -1 on failure
+
+   Note: the nifti_image byte_order is set as that of the current CPU.
+         This is because such a conversion was made to the data upon
+         reading, while byte_order was not set (so the programs would
+         know what format the data was on disk).  Effectively, since
+         byte_order should match what is on disk, it should bet set to
+         that of the current CPU whenever new filenames are assigned.
+*//*--------------------------------------------------------------------*/
+int nifti2_write_all_data(znzFile fp, nifti_image * nim,
+                         const nifti_brick_list * NBL)
+{
+   int64_t ss, bnum;
+
+   if( !NBL ){ /* just write one buffer and get out of here */
+      if( nim->data == NULL ){
+         Rc_fprintf_stderr("** NIFTI ERROR (NWAD): no image data to write\n");
+         return -1;
+      }
+
+      ss = nifti_write_buffer(fp,nim->data,nim->nbyper * nim->nvox);
+      if (ss < nim->nbyper * nim->nvox){
+         Rc_fprintf_stderr(
+            "** NIFTI ERROR (NWAD): wrote only %" PRId64 " of %" PRId64
+            " bytes to file\n",
+            ss, nim->nbyper * nim->nvox);
+         return -1;
+      }
+
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("+d wrote single image of %" PRId64 " bytes\n", ss);
+   } else {
+      if( ! NBL->bricks || NBL->nbricks <= 0 || NBL->bsize <= 0 ){
+         Rc_fprintf_stderr("** NIFTI error (NWAD): no brick data to write (%p,%"
+                        PRId64 ",%" PRId64 ")\n",
+                 (void *)NBL->bricks, NBL->nbricks, NBL->bsize);
+         return -1;
+      }
+
+      for( bnum = 0; bnum < NBL->nbricks; bnum++ ){
+         ss = nifti_write_buffer(fp, NBL->bricks[bnum], NBL->bsize);
+         if( ss < NBL->bsize ){
+            Rc_fprintf_stderr(
+            "** NIFTI ERROR (NWAD): wrote only %" PRId64 " of %" PRId64
+            " bytes of brick %" PRId64 " of %" PRId64 " to file\n",
+            ss, NBL->bsize, bnum+1, NBL->nbricks);
+            return -1;
+         }
+      }
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("+d wrote image of %" PRId64
+                 " brick(s), each of %" PRId64 " bytes\n",
+                 NBL->nbricks, NBL->bsize);
+   }
+
+   /* mark as being in this CPU byte order */
+   nim->byteorder = nifti_short_order() ;
+
+   return 0;
+}
+
+/* return number of extensions written, or -1 on error */
+static int nifti_write_extensions(znzFile fp, nifti_image *nim)
+{
+   nifti1_extension * list;
+   char               extdr[4] = { 0, 0, 0, 0 };
+   int                c, size, ok = 1;
+
+   if( znz_isnull(fp) || !nim || nim->num_ext < 0 ){
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** nifti_write_extensions, bad params\n");
+      return -1;
+   }
+
+   /* if no extensions and user requests it, skip extender */
+   if( g_opts.skip_blank_ext && (nim->num_ext == 0 || ! nim->ext_list ) ){
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("-d no exts and skip_blank_ext set, "
+                        "so skipping 4-byte extender\n");
+      return 0;
+   }
+
+   /* if invalid extension list, clear num_ext */
+   if( ! valid_nifti_extensions(nim) ) nim->num_ext = 0;
+
+   /* write out extender block */
+   if( nim->num_ext > 0 ) extdr[0] = 1;
+   if( nifti_write_buffer(fp, extdr, 4) != 4 ){
+      Rc_fprintf_stderr("** NIFTI ERROR: failed to write extender\n");
+      return -1;
+   }
+
+   list = nim->ext_list;
+   for ( c = 0; c < nim->num_ext; c++ ){
+      size = (int)nifti_write_buffer(fp, &list->esize, sizeof(int));
+      ok = (size == (int)sizeof(int));
+      if( ok ){
+         size = (int)nifti_write_buffer(fp, &list->ecode, sizeof(int));
+         ok = (size == (int)sizeof(int));
+      }
+      if( ok ){
+         size = (int)nifti_write_buffer(fp, list->edata, list->esize - 8);
+         ok = (size == list->esize - 8);
+      }
+
+      if( !ok ){
+         Rc_fprintf_stderr("** NIFTI: failed while writing extension #%d\n",c);
+         return -1;
+      } else if ( g_opts.debug > 2 )
+         Rc_fprintf_stderr("+d wrote extension %d of %d bytes\n", c, size);
+
+      list++;
+   }
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d wrote out %d extension(s)\n", nim->num_ext);
+
+   return nim->num_ext;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! basic initialization of a nifti_image struct (to a 1x1x1 image)
+*//*--------------------------------------------------------------------*/
+nifti_image* nifti2_simple_init_nim(void)
+{
+   nifti_image *nim;
+   nifti_2_header nhdr;
+   int nbyper, swapsize;
+
+   memset(&nhdr,0,sizeof(nhdr)) ;  /* zero out header, to be safe */
+
+   nhdr.sizeof_hdr = sizeof(nhdr) ;
+
+   nhdr.dim[0] = 3 ;
+   nhdr.dim[1] = 1 ; nhdr.dim[2] = 1 ; nhdr.dim[3] = 1 ;
+   nhdr.dim[4] = 0 ;
+
+   nhdr.pixdim[0] = 0.0 ;
+   nhdr.pixdim[1] = 1.0 ; nhdr.pixdim[2] = 1.0 ; nhdr.pixdim[3] = 1.0 ;
+
+   nhdr.datatype = DT_FLOAT32 ;
+   nifti_datatype_sizes( nhdr.datatype , &nbyper, &swapsize );
+   nhdr.bitpix   = 8 * nbyper ;
+
+   memcpy(nhdr.magic, nifti2_magic, 8);  /* init to single file */
+
+   nim = nifti_convert_n2hdr2nim(nhdr,NULL);
+   nim->fname = NULL;
+   nim->iname = NULL;
+   return nim;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! basic initialization of a nifti_2_header struct (with given dimensions)
+
+   Return an allocated nifti_2_header struct, based on the given
+   dimensions and datatype.
+
+   \param arg_dims  : optional dim[8] array (default {3,1,1,1,0,0,0,0})
+   \param arg_dtype : optional datatype (default DT_FLOAT32)
+
+   \return pointer to allocated nifti_2_header struct
+*//*--------------------------------------------------------------------*/
+nifti_2_header * nifti_make_new_n2_header(const int64_t arg_dims[],
+                                          int arg_dtype)
+{
+   nifti_2_header * nhdr;
+   const int64_t    default_dims[8] = { 3, 1, 1, 1, 0, 0, 0, 0 };
+   const int64_t  * dim;  /* either passed or default dims  */
+   int              dtype; /* either passed or default dtype */
+   int              c, nbyper, swapsize;
+
+   /* if arg_dims is passed, apply it */
+   if( arg_dims ) dim = arg_dims;
+   else           dim = default_dims;
+
+   /* validate dim: if there is any problem, apply default_dims */
+   if( dim[0] < 1 || dim[0] > 7 ) {
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%" PRId64 "\n",
+              dim[0]);
+      dim = default_dims;
+   } else {
+      for( c = 1; c <= dim[0]; c++ )
+         if( dim[c] < 1 )
+         {
+            Rc_fprintf_stderr(
+                "** nifti_simple_hdr_with_dims: bad dim[%d]=%" PRId64 "\n",
+                c, dim[c]);
+            dim = default_dims;
+            break;
+         }
+   }
+
+   /* validate dtype, too */
+   dtype = arg_dtype;
+   if( ! nifti_is_valid_datatype(dtype) ) {
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype);
+      dtype = DT_FLOAT32;
+   }
+
+   /* now populate the header struct */
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d make_new_n2_header, dim[0] = %" PRId64
+              ", datatype = %d\n",
+              dim[0], dtype);
+
+   nhdr = (nifti_2_header *)calloc(1,sizeof(nifti_2_header));
+   if( !nhdr ){
+      Rc_fprintf_stderr("** NIFTI make_new_n2_header: failed to alloc hdr\n");
+      return NULL;
+   }
+
+   nhdr->sizeof_hdr = sizeof(nifti_2_header) ;
+
+   /* init dim and pixdim */
+   nhdr->dim[0] = dim[0];
+   nhdr->pixdim[0] = 0.0;
+   for( c = 1; c <= dim[0]; c++ ) {
+      nhdr->dim[c] = dim[c];
+      nhdr->pixdim[c] = 1.0;
+   }
+
+   nhdr->datatype = dtype ;
+   nifti_datatype_sizes( nhdr->datatype , &nbyper, &swapsize );
+   nhdr->bitpix   = 8 * nbyper ;
+
+   memcpy(nhdr->magic, nifti2_magic, 8);  /* init to single file */
+
+   return nhdr;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! basic initialization of a nifti_1_header struct (with given dimensions)
+
+   Return an allocated nifti_1_header struct, based on the given
+   dimensions and datatype.
+
+   \param arg_dims  : optional dim[8] array (default {3,1,1,1,0,0,0,0})
+   \param arg_dtype : optional datatype (default DT_FLOAT32)
+
+   \return pointer to allocated nifti_1_header struct
+*//*--------------------------------------------------------------------*/
+nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[],
+                                          int arg_dtype)
+{
+   nifti_1_header * nhdr;
+   const int64_t    default_dims[8] = { 3, 1, 1, 1, 0, 0, 0, 0 };
+   const int64_t  * dim;  /* either passed or default dims  */
+   int              dtype; /* either passed or default dtype */
+   int              c, nbyper, swapsize;
+
+   /* if arg_dims is passed, apply it */
+   if( arg_dims ) dim = arg_dims;
+   else           dim = default_dims;
+
+   /* validate dim: if there is any problem, apply default_dims */
+   if( dim[0] < 1 || dim[0] > 7 ) {
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%" PRId64 "\n",
+              dim[0]);
+      dim = default_dims;
+   } else {
+      for( c = 1; c <= dim[0]; c++ )
+         if( dim[c] < 1 )
+         {
+            Rc_fprintf_stderr(
+                "** nifti_simple_hdr_with_dims: bad dim[%d]=%" PRId64 "\n",                     c, dim[c]);
+            dim = default_dims;
+            break;
+         }
+   }
+
+   /* validate dtype, too */
+   dtype = arg_dtype;
+   if( ! nifti_is_valid_datatype(dtype) ) {
+      Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype);
+      dtype = DT_FLOAT32;
+   }
+
+   /* now populate the header struct */
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d make_new_n1_header, dim[0] = %" PRId64
+              ", datatype = %d\n",
+              dim[0], dtype);
+
+   nhdr = (nifti_1_header *)calloc(1,sizeof(nifti_1_header));
+   if( !nhdr ){
+      Rc_fprintf_stderr("** NIFTI make_new_n1_header: failed to alloc hdr\n");
+      return NULL;
+   }
+
+   nhdr->sizeof_hdr = sizeof(nifti_1_header) ;
+   nhdr->regular    = 'r' ;           /* for some stupid reason */
+
+   /* init dim and pixdim */
+   nhdr->dim[0] = (int)dim[0]; /* rcr n2 - check dim sizes for nifti-1 */
+                               /* (verify vals are < 2^15) */
+   nhdr->pixdim[0] = 0.0f;
+   for( c = 1; c <= dim[0]; c++ ) {
+      nhdr->dim[c] = (int)dim[c];
+      nhdr->pixdim[c] = 1.0f;
+   }
+
+   nhdr->datatype = dtype ;
+   nifti_datatype_sizes( nhdr->datatype , &nbyper, &swapsize );
+   nhdr->bitpix   = 8 * nbyper ;
+
+   strcpy(nhdr->magic, "n+1");  /* init to single file */
+
+   return nhdr;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! basic creation of a nifti_image struct
+
+   Create a nifti_image from the given dimensions and data type.
+   Optinally, allocate zero-filled data.
+
+   \param dims      : optional dim[8]   (default {3,1,1,1,0,0,0,0})
+   \param datatype  : optional datatype (default DT_FLOAT32)
+   \param data_fill : if flag is set, allocate zero-filled data for image
+
+   \return pointer to allocated nifti_image struct
+*//*--------------------------------------------------------------------*/
+nifti_image * nifti2_make_new_nim(const int64_t dims[], int datatype,
+                                 int data_fill)
+{
+   nifti_image    * nim;
+   nifti_2_header * nhdr;
+
+   nhdr = nifti_make_new_n2_header(dims, datatype);
+   if( !nhdr ) return NULL;  /* error already printed */
+
+   nim = nifti_convert_n2hdr2nim(*nhdr,NULL);
+   free(nhdr);               /* in any case, we are done with this */
+   if( !nim ){
+      Rc_fprintf_stderr("** NMNN: nifti_convert_n2hdr2nim failure\n");
+      return NULL;
+   }
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d nifti_make_new_nim, data_fill = %d\n",data_fill);
+
+   if( data_fill ) {
+      nim->data = calloc(nim->nvox, nim->nbyper);
+
+      /* if we cannot allocate data, take ball and go home */
+      if( !nim->data ) {
+         Rc_fprintf_stderr("** NIFTI NMNN: failed to alloc %" PRId64
+                        " bytes for data\n", nim->nvox*nim->nbyper);
+         nifti_image_free(nim);
+         nim = NULL;
+      }
+   }
+
+   return nim;
+}
+
+#undef N_CHECK_2BYTE_VAL
+#define N_CHECK_2BYTE_VAL(fn) do { if( ! NIFTI_IS_16_BIT_INT(nim->fn) ) { \
+   Rc_fprintf_stderr("** nim->%s = %" PRId64                                 \
+           " does not fit into NIFTI-1 header\n",                         \
+           #fn, (int64_t)nim->fn); return 1; } } while(0)
+
+
+/*----------------------------------------------------------------------*/
+/*! convert a nifti_image structure to a nifti_1_header struct
+
+    No allocation is done, this should be used via structure copy.
+    As in:
+    <pre>
+    nifti_1_header my_header;
+    my_header = nifti_convert_nim2n1hdr(my_nim_pointer);
+    </pre>
+*//*--------------------------------------------------------------------*/
+int nifti_convert_nim2n1hdr(const nifti_image * nim, nifti_1_header * hdr)
+{
+   nifti_1_header nhdr;
+
+   if( !hdr ) {
+      Rc_fprintf_stderr("** nifti_CN2N1hdr: no hdr to fill\n");
+      return 1;
+   }
+
+   memset(&nhdr,0,sizeof(nhdr)) ;  /* zero out header, to be safe */
+
+
+   /**- load the ANALYZE-7.5 generic parts of the header struct */
+
+   nhdr.sizeof_hdr = sizeof(nhdr) ;
+   nhdr.regular    = 'r' ;             /* for some stupid reason */
+
+   N_CHECK_2BYTE_VAL(ndim);
+   N_CHECK_2BYTE_VAL(nx);
+   N_CHECK_2BYTE_VAL(ny);
+   N_CHECK_2BYTE_VAL(nz);
+   N_CHECK_2BYTE_VAL(nt);
+   N_CHECK_2BYTE_VAL(nu);
+   N_CHECK_2BYTE_VAL(nv);
+   N_CHECK_2BYTE_VAL(nw);
+   N_CHECK_2BYTE_VAL(datatype);
+   N_CHECK_2BYTE_VAL(nbyper);
+
+   nhdr.dim[0] = nim->ndim ;
+   nhdr.dim[1] = nim->nx ; nhdr.dim[2] = nim->ny ; nhdr.dim[3] = nim->nz ;
+   nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ;
+   nhdr.dim[7] = nim->nw ;
+
+   nhdr.pixdim[0] = 0.0f ;
+   nhdr.pixdim[1] = nim->dx ; nhdr.pixdim[2] = nim->dy ;
+   nhdr.pixdim[3] = nim->dz ; nhdr.pixdim[4] = nim->dt ;
+   nhdr.pixdim[5] = nim->du ; nhdr.pixdim[6] = nim->dv ;
+   nhdr.pixdim[7] = nim->dw ;
+
+   nhdr.datatype = nim->datatype ;
+   nhdr.bitpix   = 8 * nim->nbyper ;
+
+   if( nim->cal_max > nim->cal_min ){
+     nhdr.cal_max = nim->cal_max ;
+     nhdr.cal_min = nim->cal_min ;
+   }
+
+   if( nim->scl_slope != 0.0 ){
+     nhdr.scl_slope = nim->scl_slope ;
+     nhdr.scl_inter = nim->scl_inter ;
+   }
+
+   if( nim->descrip[0] != '\0' ){
+     memcpy(nhdr.descrip ,nim->descrip ,79) ; nhdr.descrip[79] = '\0' ;
+   }
+   if( nim->aux_file[0] != '\0' ){
+     memcpy(nhdr.aux_file ,nim->aux_file ,23) ; nhdr.aux_file[23] = '\0' ;
+   }
+
+   /**- Load NIFTI specific stuff into the header */
+
+   if( nim->nifti_type > NIFTI_FTYPE_ANALYZE ){ /* then not ANALYZE */
+
+     if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcpy(nhdr.magic,"n+1") ;
+     else                                          strcpy(nhdr.magic,"ni1") ;
+
+     nhdr.pixdim[1] = (float)fabs(nhdr.pixdim[1]) ;
+     nhdr.pixdim[2] = (float)fabs(nhdr.pixdim[2]) ;
+     nhdr.pixdim[3] = (float)fabs(nhdr.pixdim[3]) ;
+     nhdr.pixdim[4] = (float)fabs(nhdr.pixdim[4]) ;
+     nhdr.pixdim[5] = (float)fabs(nhdr.pixdim[5]) ;
+     nhdr.pixdim[6] = (float)fabs(nhdr.pixdim[6]) ;
+     nhdr.pixdim[7] = (float)fabs(nhdr.pixdim[7]) ;
+
+     N_CHECK_2BYTE_VAL(intent_code);
+     N_CHECK_2BYTE_VAL(qform_code);
+     N_CHECK_2BYTE_VAL(sform_code);
+
+     nhdr.intent_code = nim->intent_code ;
+     nhdr.intent_p1   = nim->intent_p1 ;
+     nhdr.intent_p2   = nim->intent_p2 ;
+     nhdr.intent_p3   = nim->intent_p3 ;
+     if( nim->intent_name[0] != '\0' ){
+       memcpy(nhdr.intent_name,nim->intent_name,15) ;
+       nhdr.intent_name[15] = '\0' ;
+     }
+
+     nhdr.vox_offset  = (float) nim->iname_offset ;
+     nhdr.xyzt_units  = SPACE_TIME_TO_XYZT( nim->xyz_units, nim->time_units ) ;
+     nhdr.toffset     = nim->toffset ;
+
+     if( nim->qform_code > 0 ){
+       nhdr.qform_code = nim->qform_code ;
+       nhdr.quatern_b  = nim->quatern_b ;
+       nhdr.quatern_c  = nim->quatern_c ;
+       nhdr.quatern_d  = nim->quatern_d ;
+       nhdr.qoffset_x  = nim->qoffset_x ;
+       nhdr.qoffset_y  = nim->qoffset_y ;
+       nhdr.qoffset_z  = nim->qoffset_z ;
+       nhdr.pixdim[0]  = (nim->qfac >= 0.0) ? 1.0f : -1.0f ;
+     }
+
+     if( nim->sform_code > 0 ){
+       nhdr.sform_code = nim->sform_code ;
+       nhdr.srow_x[0]  = nim->sto_xyz.m[0][0] ;
+       nhdr.srow_x[1]  = nim->sto_xyz.m[0][1] ;
+       nhdr.srow_x[2]  = nim->sto_xyz.m[0][2] ;
+       nhdr.srow_x[3]  = nim->sto_xyz.m[0][3] ;
+       nhdr.srow_y[0]  = nim->sto_xyz.m[1][0] ;
+       nhdr.srow_y[1]  = nim->sto_xyz.m[1][1] ;
+       nhdr.srow_y[2]  = nim->sto_xyz.m[1][2] ;
+       nhdr.srow_y[3]  = nim->sto_xyz.m[1][3] ;
+       nhdr.srow_z[0]  = nim->sto_xyz.m[2][0] ;
+       nhdr.srow_z[1]  = nim->sto_xyz.m[2][1] ;
+       nhdr.srow_z[2]  = nim->sto_xyz.m[2][2] ;
+       nhdr.srow_z[3]  = nim->sto_xyz.m[2][3] ;
+     }
+
+     N_CHECK_2BYTE_VAL(sform_code);
+     N_CHECK_2BYTE_VAL(slice_start);
+     N_CHECK_2BYTE_VAL(slice_end);
+
+     nhdr.dim_info = FPS_INTO_DIM_INFO( nim->freq_dim ,
+                                        nim->phase_dim , nim->slice_dim ) ;
+     nhdr.slice_code     = nim->slice_code ;
+     nhdr.slice_start    = nim->slice_start ;
+     nhdr.slice_end      = nim->slice_end ;
+     nhdr.slice_duration = nim->slice_duration ;
+   }
+
+   memcpy(hdr, &nhdr, sizeof(nhdr));
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! convert a nifti_image structure to a nifti_2_header struct
+
+    No allocation is done, this should be used via structure copy.
+    As in:
+    <pre>
+    nifti_2_header my_header;
+    my_header = nifti_convert_nim2n2hdr(my_nim_pointer);
+    </pre>
+*//*--------------------------------------------------------------------*/
+int nifti_convert_nim2n2hdr(const nifti_image * nim, nifti_2_header * hdr)
+{
+   nifti_2_header nhdr;
+
+   if( !hdr ) {
+      Rc_fprintf_stderr("** nifti_CN2N2hdr: no hdr to fill\n");
+      return 1;
+   }
+
+   memset(&nhdr,0,sizeof(nhdr)) ;  /* zero out header, to be safe */
+
+
+   /**- load the ANALYZE-7.5 generic parts of the header struct */
+
+   nhdr.sizeof_hdr = sizeof(nhdr) ;
+   memcpy(nhdr.magic, nifti2_magic, 8);
+   if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) nhdr.magic[1] = 'i';
+
+   nhdr.datatype = nim->datatype ;
+   nhdr.bitpix   = 8 * nim->nbyper ;
+
+   nhdr.dim[0] = nim->ndim ;
+   nhdr.dim[1] = nim->nx ; nhdr.dim[2] = nim->ny ; nhdr.dim[3] = nim->nz ;
+   nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ;
+   nhdr.dim[7] = nim->nw ;
+
+   nhdr.intent_p1 = nim->intent_p1 ;
+   nhdr.intent_p2 = nim->intent_p2 ;
+   nhdr.intent_p3 = nim->intent_p3 ;
+
+   nhdr.pixdim[0] = 0.0 ;
+   nhdr.pixdim[1] = fabs(nim->dx) ; nhdr.pixdim[2] = fabs(nim->dy) ;
+   nhdr.pixdim[3] = fabs(nim->dz) ; nhdr.pixdim[4] = fabs(nim->dt) ;
+   nhdr.pixdim[5] = fabs(nim->du) ; nhdr.pixdim[6] = fabs(nim->dv) ;
+   nhdr.pixdim[7] = fabs(nim->dw) ;
+
+   nhdr.vox_offset  = nim->iname_offset ;
+
+   nhdr.scl_slope = nim->scl_slope ;
+   nhdr.scl_inter = nim->scl_inter ;
+
+   nhdr.cal_max = nim->cal_max ;
+   nhdr.cal_min = nim->cal_min ;
+
+   nhdr.slice_duration = nim->slice_duration ;
+   nhdr.toffset        = nim->toffset ;
+   nhdr.slice_start    = nim->slice_start ;
+   nhdr.slice_end      = nim->slice_end ;
+
+   if( nim->descrip[0] != '\0' ){
+     memcpy(nhdr.descrip ,nim->descrip ,79) ; nhdr.descrip[79] = '\0' ;
+   }
+   if( nim->aux_file[0] != '\0' ){
+     memcpy(nhdr.aux_file ,nim->aux_file ,23) ; nhdr.aux_file[23] = '\0' ;
+   }
+
+   if( nim->qform_code > 0 ){
+     nhdr.qform_code = nim->qform_code ;
+     nhdr.quatern_b  = nim->quatern_b ;
+     nhdr.quatern_c  = nim->quatern_c ;
+     nhdr.quatern_d  = nim->quatern_d ;
+     nhdr.qoffset_x  = nim->qoffset_x ;
+     nhdr.qoffset_y  = nim->qoffset_y ;
+     nhdr.qoffset_z  = nim->qoffset_z ;
+     nhdr.pixdim[0]  = (nim->qfac >= 0.0) ? 1.0f : -1.0f ;
+   }
+
+   if( nim->sform_code > 0 ){
+     nhdr.sform_code = nim->sform_code ;
+     nhdr.srow_x[0]  = nim->sto_xyz.m[0][0] ;
+     nhdr.srow_x[1]  = nim->sto_xyz.m[0][1] ;
+     nhdr.srow_x[2]  = nim->sto_xyz.m[0][2] ;
+     nhdr.srow_x[3]  = nim->sto_xyz.m[0][3] ;
+     nhdr.srow_y[0]  = nim->sto_xyz.m[1][0] ;
+     nhdr.srow_y[1]  = nim->sto_xyz.m[1][1] ;
+     nhdr.srow_y[2]  = nim->sto_xyz.m[1][2] ;
+     nhdr.srow_y[3]  = nim->sto_xyz.m[1][3] ;
+     nhdr.srow_z[0]  = nim->sto_xyz.m[2][0] ;
+     nhdr.srow_z[1]  = nim->sto_xyz.m[2][1] ;
+     nhdr.srow_z[2]  = nim->sto_xyz.m[2][2] ;
+     nhdr.srow_z[3]  = nim->sto_xyz.m[2][3] ;
+   }
+
+   nhdr.slice_code  = nim->slice_code ;
+   nhdr.xyzt_units  = SPACE_TIME_TO_XYZT( nim->xyz_units, nim->time_units ) ;
+   nhdr.intent_code = nim->intent_code ;
+   if( nim->intent_name[0] != '\0' ){
+     memcpy(nhdr.intent_name,nim->intent_name,15) ;
+     nhdr.intent_name[15] = '\0' ;
+   }
+
+   nhdr.dim_info = FPS_INTO_DIM_INFO( nim->freq_dim ,
+                                      nim->phase_dim , nim->slice_dim ) ;
+
+   nhdr.unused_str[0] = '\0' ;  /* not needed, but complete */
+
+   memcpy(hdr, &nhdr, sizeof(nhdr));
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! \fn int nifti_copy_extensions(nifti_image * nim_dest, nifti_image * nim_src)
+    \brief copy the nifti1_extension list from src to dest
+
+    Duplicate the list of nifti1_extensions.  The dest structure must
+    be clear of extensions.
+    \return 0 on success, -1 on failure
+
+    \sa nifti_add_extension, nifti_free_extensions
+*/
+int nifti2_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src)
+{
+   char   * data;
+   int64_t  bytes;
+   int      c, size, old_size;
+
+   if( nim_dest->num_ext > 0 || nim_dest->ext_list != NULL ){
+      Rc_fprintf_stderr("** NIFTI: will not copy over existing extensions\n");
+      return -1;
+   }
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d duplicating %d extension(s)\n", nim_src->num_ext);
+
+   if( nim_src->num_ext <= 0 ) return 0;
+
+   bytes = nim_src->num_ext * sizeof(nifti1_extension);  /* I'm lazy */
+   nim_dest->ext_list = (nifti1_extension *)malloc(bytes);
+   if( !nim_dest->ext_list ){
+      Rc_fprintf_stderr("** failed to allocate %d nifti1_extension structs\n",
+              nim_src->num_ext);
+      return -1;
+   }
+
+   /* copy the extension data */
+   nim_dest->num_ext = 0;
+   for( c = 0; c < nim_src->num_ext; c++ ){
+      size = old_size = nim_src->ext_list[c].esize;
+      if( size & 0xf ) size = (size + 0xf) & ~0xf; /* make multiple of 16 */
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("+d dup'ing ext #%d of size %d (from size %d)\n",
+                 c, size, old_size);
+      /* data length is size-8, as esize includes space for esize and ecode */
+      data = (char *)calloc(size-8,sizeof(char));      /* maybe size > old */
+      if( !data ){
+         Rc_fprintf_stderr("** NIFTI: failed to alloc %d bytes for extention\n",
+                 size);
+         if( c == 0 ) { free(nim_dest->ext_list); nim_dest->ext_list = NULL; }
+         /* otherwise, keep what we have (a.o.t. deleting them all) */
+         return -1;
+      }
+      /* finally, fill the new structure */
+      nim_dest->ext_list[c].esize = size;
+      nim_dest->ext_list[c].ecode = nim_src->ext_list[c].ecode;
+      nim_dest->ext_list[c].edata = data;
+      memcpy(data, nim_src->ext_list[c].edata, old_size-8);
+
+      nim_dest->num_ext++;
+   }
+
+   return 0;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! compute the total size of all extensions
+
+    \return the total of all esize fields
+
+    Note that each esize includes 4 bytes for ecode, 4 bytes for esize,
+    and the bytes used for the data.  Each esize also needs to be a
+    multiple of 16, so it may be greater than the sum of its 3 parts.
+*//*--------------------------------------------------------------------*/
+static int nifti_extension_size(nifti_image *nim)
+{
+   int c, size = 0;
+
+   if( !nim || nim->num_ext <= 0 ) return 0;
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d ext sizes:");
+
+   for ( c = 0; c < nim->num_ext; c++ ){
+      size += nim->ext_list[c].esize;
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("  %d",nim->ext_list[c].esize);
+   }
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr(" (total = %d)\n",size);
+
+   return size;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! set the nifti_image iname_offset field, based on nifti_type
+
+    - use nifti_ver to determine the size of the header
+      (0: default, else NIFTI-version)
+    - if writing to 2 files, set offset to 0
+    - if writing to a single NIFTI-1 file, set the offset to
+         352 + total extension size, then align to 16-byte boundary
+    - if writing an ASCII header, set offset to -1
+*//*--------------------------------------------------------------------*/
+void nifti2_set_iname_offset(nifti_image *nim, int nifti_ver)
+{
+   int64_t offset;
+   int64_t hsize = sizeof(nifti_1_header);  /* default */
+
+   if( nifti_ver < 0 || nifti_ver > 2 ) {
+      if( g_opts.debug > 0 )
+         Rc_fprintf_stderr("** invalid nifti_ver = %d for set_iname_offset\n",
+                 nifti_ver);
+      /* but stick with the default */
+   } else if( nifti_ver == 2 ) {
+      hsize = sizeof(nifti_2_header);
+   }
+
+   switch( nim->nifti_type ){
+
+     default:  /* writing into 2 files */
+       /* we only write files with 0 offset in the 2 file format */
+       nim->iname_offset = 0 ;
+     break ;
+
+     /* NIFTI-1 single binary file - always update */
+     case NIFTI_FTYPE_NIFTI1_1:
+     case NIFTI_FTYPE_NIFTI2_1:
+       offset = nifti_extension_size(nim) + hsize + 4;
+       /* be sure offset is aligned to a 16 byte boundary */
+       if ( ( offset % 16 ) != 0 )  offset = ((offset + 0xf) & ~0xf);
+       if( nim->iname_offset != offset ){
+          if( g_opts.debug > 1 )
+             Rc_fprintf_stderr("+d changing offset from %" PRId64 " to %" PRId64
+                     "\n", nim->iname_offset, offset);
+          nim->iname_offset = offset;
+       }
+     break ;
+
+     /* non-standard case: NIFTI-1 ASCII header + binary data (single file) */
+     case NIFTI_FTYPE_ASCII:
+       nim->iname_offset = -1 ;             /* compute offset from filesize */
+     break ;
+   }
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! write the nifti_image dataset to disk, optionally including data
+
+   This is just a front-end for nifti_image_write_hdr_img2.
+
+   \param nim        nifti_image to write to disk
+   \param write_data write options (see nifti_image_write_hdr_img2)
+   \param opts       file open options ("wb" from nifti_image_write)
+
+   \sa nifti_image_write, nifti_image_write_hdr_img2, nifti_image_free,
+       nifti_set_filenames
+*//*--------------------------------------------------------------------*/
+znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data ,
+                                          const char* opts )
+{
+  return nifti_image_write_hdr_img2(nim,write_data,opts,NULL,NULL);
+}
+
+
+#undef  ERREX
+#define ERREX(msg)                                                \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
+     return fp ; } while(0)
+
+
+/* ----------------------------------------------------------------------*/
+/*! This writes the header (and optionally the image data) to file
+ *
+ * If the image data file is left open it returns a valid znzFile handle.
+ * It also uses imgfile as the open image file is not null, and modifies
+ * it inside.
+ *
+ * \param nim        nifti_image to write to disk
+ * \param write_opts flags whether to write data and/or close file (see below)
+ * \param opts       file-open options, probably "wb" from nifti_image_write()
+ * \param imgfile    optional open znzFile struct, for writing image data
+                     (may be NULL)
+ * \param NBL        optional nifti_brick_list, containing the image data
+                     (may be NULL)
+ *
+ * Values for write_opts mode are based on two binary flags
+ * ( 0/1 for no-write/write data, and 0/2 for close/leave-open files ) :
+ *    -   0 = do not write data and close (do not open data file)
+ *    -   1 = write data        and close
+ *    -   2 = do not write data and leave data file open
+ *    -   3 = write data        and leave data file open
+ *
+ * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free,
+ *     nifti_set_filenames
+*//*---------------------------------------------------------------------*/
+znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
+               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+{
+   nifti_1_header n1hdr ;
+   nifti_2_header n2hdr ;
+   znzFile        fp=NULL;
+   int64_t        ss ;
+   int            write_data, leave_open;
+   int            nver=1, hsize=(int)sizeof(nifti_1_header);  /* 5 Aug 2015 */
+   char           func[] = { "nifti_image_write_hdr_img2" };
+
+   write_data = write_opts & 1;  /* just separate the bits now */
+   leave_open = write_opts & 2;
+
+   if( ! nim                              ) ERREX("NULL input") ;
+   if( ! nifti_validfilename(nim->fname)  ) ERREX("bad fname input") ;
+   if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ;
+
+   if( write_data && NBL && ! nifti_NBL_matches_nim(nim, NBL) )
+      ERREX("NBL does not match nim");
+
+   if( g_opts.debug > 1 ){
+      Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("-d nifti type %d, offset %" PRId64 "\n",
+                 nim->nifti_type, nim->iname_offset);
+   }
+
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII )   /* non-standard case */
+      return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+   else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) {
+      nifti_set_iname_offset(nim, 2);
+      if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) return NULL;
+      nver = 2;
+      hsize = (int)sizeof(nifti_2_header);
+   }
+   else {
+      nifti_set_iname_offset(nim, 1);
+      if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) return NULL;
+   }
+
+   /* if writing to 2 files, make sure iname is set and different from fname */
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){
+       if( nim->iname && strcmp(nim->iname,nim->fname) == 0 ){
+         free(nim->iname) ; nim->iname = NULL ;
+       }
+       if( nim->iname == NULL ){ /* then make a new one */
+         nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0);
+         if( nim->iname == NULL ) return NULL;
+       }
+   }
+
+   /* if we have an imgfile and will write the header there, use it */
+   if( ! znz_isnull(imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
+      fp = imgfile;
+   }
+   else {
+      if( g_opts.debug > 2 )
+         Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
+      fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
+      if( znz_isnull(fp) ){
+         LNI_FERR(func,"cannot open output file",nim->fname);
+         return fp;
+      }
+   }
+
+   /* write the header and extensions */
+
+   if( nver == 2 ) ss = znzwrite(&n2hdr , 1 , hsize , fp); /* write header */
+   else            ss = znzwrite(&n1hdr , 1 , hsize , fp); /* write header */
+
+   if( ss < hsize ){
+      LNI_FERR(func,"bad header write to output file",nim->fname);
+      znzclose(fp); return fp;
+   }
+
+   /* partial file exists, and errors have been printed, so ignore return */
+   if( nim->nifti_type != NIFTI_FTYPE_ANALYZE )
+      (void)nifti_write_extensions(fp,nim);
+
+   /* if the header is all we want, we are done */
+   if( ! write_data && ! leave_open ){
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
+      znzclose(fp); return(fp);
+   }
+
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */
+      znzclose(fp);         /* first, close header file */
+      if( ! znz_isnull(imgfile) ){
+         if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
+         fp = imgfile;
+      }
+      else {
+         if( g_opts.debug > 2 )
+            Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname);
+         fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ;
+         if( znz_isnull(fp) ) ERREX("cannot open image file") ;
+      }
+   }
+
+   znzseek(fp, nim->iname_offset, SEEK_SET);  /* in any case, seek to offset */
+
+   if( write_data ) nifti_write_all_data(fp,nim,NBL);
+   if( ! leave_open ) znzclose(fp);
+
+   return fp;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! write a nifti_image to disk in ASCII format
+*//*--------------------------------------------------------------------*/
+znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
+                              const char *opts, int write_data, int leave_open)
+{
+   znzFile   fp;
+   char    * hstr;
+
+   hstr = nifti_image_to_ascii( nim ) ;  /* get header in ASCII form */
+   if( ! hstr ){ Rc_fprintf_stderr("** failed image_to_ascii()\n"); return NULL; }
+
+   fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
+   if( znz_isnull(fp) ){
+      free(hstr);
+      Rc_fprintf_stderr("** NIFTI: failed to open '%s' for ascii write\n",
+              nim->fname);
+      return fp;
+   }
+
+   znzputs(hstr,fp);                                               /* header */
+   nifti_write_extensions(fp,nim);                             /* extensions */
+
+   if ( write_data   ) { nifti_write_all_data(fp,nim,NBL); }         /* data */
+   if ( ! leave_open ) { znzclose(fp); }
+   free(hstr);
+   return fp;  /* returned but may be closed */
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Write a nifti_image to disk.
+
+   Since data is properly byte-swapped upon reading, it is assumed
+   to be in the byte-order of the current CPU at write time.  Thus,
+   nim->byte_order should match that of the current CPU.  Note that
+   the nifti_set_filenames() function takes the flag, set_byte_order.
+
+   The following fields of nim affect how the output appears:
+    - nifti_type = 0 ==> ANALYZE-7.5 format file pair will be written
+    - nifti_type = 1 ==> NIFTI-1 format single file will be written
+                         (data offset will be 352+extensions)
+    - nifti_type = 2 ==> NIFTI_1 format file pair will be written
+    - nifti_type = 3 ==> NIFTI_1 ASCII single file will be written
+    - fname is the name of the output file (header or header+data)
+    - if a file pair is being written, iname is the name of the data file
+    - existing files WILL be overwritten with extreme prejudice
+    - if qform_code > 0, the quatern_*, qoffset_*, and qfac fields determine
+      the qform output, NOT the qto_xyz matrix; if you want to compute these
+      fields from the qto_xyz matrix, you can use the utility function
+      nifti_mat44_to_quatern()
+
+   \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames,
+       nifti_image_write_hdr_img
+*//*------------------------------------------------------------------------*/
+void nifti2_image_write( nifti_image *nim )
+{
+   znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
+   if( fp ){
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
+      free(fp);
+   }
+   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+
+   \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
+*//*--------------------------------------------------------------------*/
+void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+{
+   znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
+   if( fp ){
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
+      free(fp);
+   }
+   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! copy the nifti_image structure, without data
+
+    Duplicate the structure, including fname, iname and extensions.
+    Leave the data pointer as NULL.
+*//*--------------------------------------------------------------------*/
+nifti_image * nifti2_copy_nim_info(const nifti_image * src)
+{
+  nifti_image *dest;
+  dest = (nifti_image *)calloc(1,sizeof(nifti_image));
+  if( !dest ){
+     Rc_fprintf_stderr("** NCNI: failed to alloc nifti_image\n");
+     return NULL;
+  }
+  memcpy(dest, src, sizeof(nifti_image));
+  if( src->fname ) dest->fname = nifti_strdup(src->fname);
+  if( src->iname ) dest->iname = nifti_strdup(src->iname);
+  dest->num_ext = 0;
+  dest->ext_list = NULL;
+  /* errors will be printed in NCE(), continue in either case */
+  (void)nifti_copy_extensions(dest, src);
+
+  dest->data = NULL;
+
+  return dest;
+}
+
+
+/*------------------------------------------------------------------------*/
+/* Un-escape a C string in place -- that is, convert XML escape sequences
+   back into their characters.  (This can be done in place since the
+   replacement is always smaller than the input.)  Escapes recognized are:
+     -  &lt;   ->  <
+     -  &gt;   ->  >
+     -  &quot; ->  "
+     -  &apos; ->  '
+     -  &amp;  ->  &
+   Also replace CR LF pair (Microsoft), or CR alone (Macintosh) with
+   LF (Unix), per the XML standard.
+   Return value is number of replacements made (if you care).
+--------------------------------------------------------------------------*/
+
+#undef  CR
+#undef  LF
+#define CR 0x0D
+#define LF 0x0A
+
+static int unescape_string( char *str )
+{
+   int ii,jj , nn,ll ;
+
+   if( str == NULL ) return 0 ;                /* no string? */
+   ll = (int)strlen(str) ; if( ll == 0 ) return 0 ;
+
+   /* scan for escapes: &something; */
+
+   for( ii=jj=nn=0 ; ii<ll ; ii++,jj++ ){ /* scan at ii; results go in at jj */
+
+     if( str[ii] == '&' ){  /* start of escape? */
+
+             if( ii+3 < ll        &&   /* &lt; */
+                 str[ii+1] == 'l' &&
+                 str[ii+2] == 't' &&
+                 str[ii+3] == ';'   ){ str[jj] = '<' ; ii += 3 ; nn++ ; }
+
+        else if( ii+3 < ll        &&   /* &gt; */
+                 str[ii+1] == 'g' &&
+                 str[ii+2] == 't' &&
+                 str[ii+3] == ';'   ){ str[jj] = '>' ; ii += 3 ; nn++ ; }
+
+        else if( ii+5 < ll        &&   /* &quot; */
+                 str[ii+1] == 'q' &&
+                 str[ii+2] == 'u' &&
+                 str[ii+3] == 'o' &&
+                 str[ii+4] == 't' &&
+                 str[ii+5] == ';'   ){ str[jj] = '"' ; ii += 5 ; nn++ ; }
+
+        else if( ii+5 < ll        &&   /* &apos; */
+                 str[ii+1] == 'a' &&
+                 str[ii+2] == 'p' &&
+                 str[ii+3] == 'o' &&
+                 str[ii+4] == 's' &&
+                 str[ii+5] == ';'   ){ str[jj] = '\'' ; ii += 5 ; nn++ ; }
+
+        else if( ii+4 < ll        &&  /* &amp; */
+                 str[ii+1] == 'a' &&
+                 str[ii+2] == 'm' &&
+                 str[ii+3] == 'p' &&
+                 str[ii+4] == ';'   ){ str[jj] = '&' ; ii += 4 ; nn++ ; }
+
+        /* although the comments above don't mention it,
+           we also look for XML style numeric escapes
+           of the forms &#32; (decimal) and &#xfd; (hex) */
+
+        else if( ii+3 < ll        &&
+                 str[ii+1] == '#' &&
+                 isdigit((int) str[ii+2]) ){   /* &#dec; */
+
+           unsigned int val='?' ; int kk=ii+3 ;
+           while( kk < ll && kk != ';' ) kk++ ;
+           sscanf( str+ii+2 , "%u" , &val ) ;
+           str[jj] = (char) val ; ii = kk ; nn++ ;
+        }
+
+        else if( ii+4 < ll        &&
+                 str[ii+1] == '#' &&
+                 str[ii+2] == 'x' &&
+                 isxdigit((int) str[ii+3]) ){   /* &#hex; */
+
+           unsigned int val='?' ; int kk=ii+4 ;
+           while( kk < ll && kk != ';' ) kk++ ;
+           sscanf( str+ii+3 , "%x" , &val ) ;
+           str[jj] = (char) val ; ii = kk ; nn++ ;
+        }
+
+        /* didn't start a recognized escape, so just copy as normal */
+
+        else if( jj < ii ){ str[jj] = str[ii] ; }
+
+     } else if( str[ii] == CR ) {  /* is a carriage return */
+
+        if( str[ii+1] == LF ){ str[jj] = LF ; ii++ ; nn++ ; }  /* CR LF */
+        else                 { str[jj] = LF ;      ; nn++ ; }  /* CR only */
+
+     } else { /* is a normal character, just copy to output */
+
+             if( jj < ii ){ str[jj] = str[ii] ; }
+     }
+
+     /* at this point, ii=index of last character used up in scan
+                       jj=index of last character written to (jj <= ii) */
+   }
+
+   if( jj < ll ) str[jj] = '\0' ; /* end string properly */
+
+   return nn ;
+}
+
+/*------------------------------------------------------------------------*/
+/* Quotize (and escapize) one string, returning a new string.
+   Approximately speaking, this is the inverse of unescape_string().
+   The result should be free()-ed when you are done with it.
+--------------------------------------------------------------------------*/
+
+static char *escapize_string( const char * str )
+{
+   int ii,jj , lstr,lout ;
+   char *out ;
+
+   if( str == NULL || (lstr=(int)strlen(str)) == 0 ){      /* 0 length */
+     out = nifti_strdup("''") ; return out ;                /* string?? */
+   }
+
+   lout = 4 ;                      /* initialize length of output */
+   for( ii=0 ; ii < lstr ; ii++ ){ /* count characters for output */
+     switch( str[ii] ){
+       case '&':  lout += 5 ; break ;  /* replace '&' with "&amp;" */
+
+       case '<':
+       case '>':  lout += 4 ; break ;  /* replace '<' with "&lt;" */
+
+       case '"' :
+       case '\'': lout += 6 ; break ;  /* replace '"' with "&quot;" */
+
+       case CR:
+       case LF:   lout += 6 ; break ;  /* replace CR with "&#x0d;"
+                                                  LF with "&#x0a;" */
+
+       default: lout++ ; break ;      /* copy all other chars */
+     }
+   }
+   out = (char *)calloc(1,lout) ;     /* allocate output string */
+   if( !out ){
+      Rc_fprintf_stderr("** NIFTI escapize_string: failed to alloc %d bytes\n",
+              lout);
+      return NULL;
+   }
+   out[0] = '\'' ;                    /* opening quote mark */
+   for( ii=0,jj=1 ; ii < lstr ; ii++ ){
+      switch( str[ii] ){
+         default: out[jj++] = str[ii] ; break ;  /* normal characters */
+
+         case '&':  memcpy(out+jj,"&amp;",5)  ; jj+=5 ; break ;
+
+         case '<':  memcpy(out+jj,"&lt;",4)   ; jj+=4 ; break ;
+         case '>':  memcpy(out+jj,"&gt;",4)   ; jj+=4 ; break ;
+
+         case '"' : memcpy(out+jj,"&quot;",6) ; jj+=6 ; break ;
+
+         case '\'': memcpy(out+jj,"&apos;",6) ; jj+=6 ; break ;
+
+         case CR:   memcpy(out+jj,"&#x0d;",6) ; jj+=6 ; break ;
+         case LF:   memcpy(out+jj,"&#x0a;",6) ; jj+=6 ; break ;
+      }
+   }
+   out[jj++] = '\''  ;  /* closing quote mark */
+   out[jj]   = '\0' ;  /* terminate the string */
+   return out ;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! Dump the information in a NIFTI image header to an XML-ish ASCII string
+   that can later be converted back into a NIFTI header in
+   nifti_image_from_ascii().
+
+   The resulting string can be free()-ed when you are done with it.
+*//*-------------------------------------------------------------------------*/
+char *nifti2_image_to_ascii( const nifti_image *nim )
+{
+#ifdef USING_R
+   Rf_error("nifti2_image_to_ascii is currently unimplemented for R packages, for portability reasons");
+   return NULL;
+#else
+   char *buf , *ebuf ; int nbuf ;
+
+   if( nim == NULL ) return NULL ;   /* stupid caller */
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("+d converting %s to ASCII\n",nim->fname);
+
+   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
+   if( !buf ){
+      Rc_fprintf_stderr("** NIFTI NITA: failed to alloc %d bytes\n",65534);
+      return NULL;
+   }
+
+   sprintf( buf , "<nifti_image\n" ) ;   /* XML-ish opener */
+
+   sprintf( buf+strlen(buf) , "  nifti_type = '%s'\n" ,
+              (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+"
+             :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1"
+             :(nim->nifti_type == NIFTI_FTYPE_ASCII   ) ? "NIFTI-1A"
+             :(nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ? "NIFTI-2+"
+             :(nim->nifti_type == NIFTI_FTYPE_NIFTI2_2) ? "NIFTI-2"
+             :                         "ANALYZE-7.5" ) ;
+
+   /** Strings that we don't control (filenames, etc.) that might
+       contain "weird" characters (like quotes) are "escaped":
+       - A few special characters are replaced by XML-style escapes, using
+         the function escapize_string().
+       - On input, function unescape_string() reverses this process.
+       - The result is that the NIFTI ASCII-format header is XML-compliant. */
+
+   ebuf = escapize_string(nim->fname) ;
+   sprintf( buf+strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
+
+   ebuf = escapize_string(nim->iname) ;
+   sprintf( buf+strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
+
+   sprintf( buf+strlen(buf) , "  image_offset = '%" PRId64 "'\n" ,
+            nim->iname_offset );
+
+   sprintf( buf+strlen(buf), "  ndim = '%" PRId64 "'\n",nim->ndim);
+   sprintf( buf+strlen(buf), "  nx = '%" PRId64 "'\n",  nim->nx  );
+   if( nim->ndim > 1 )
+      sprintf( buf+strlen(buf), "  ny = '%" PRId64 "'\n",  nim->ny  );
+   if( nim->ndim > 2 )
+      sprintf( buf+strlen(buf), "  nz = '%" PRId64 "'\n",  nim->nz  );
+   if( nim->ndim > 3 )
+      sprintf( buf+strlen(buf), "  nt = '%" PRId64 "'\n",  nim->nt  );
+   if( nim->ndim > 4 )
+      sprintf( buf+strlen(buf), "  nu = '%" PRId64 "'\n",  nim->nu  );
+   if( nim->ndim > 5 )
+      sprintf( buf+strlen(buf), "  nv = '%" PRId64 "'\n",  nim->nv  );
+   if( nim->ndim > 6 )
+      sprintf( buf+strlen(buf), "  nw = '%" PRId64 "'\n",  nim->nw  );
+
+                       sprintf( buf+strlen(buf), "  dx = '%g'\n",   nim->dx  );
+   if( nim->ndim > 1 ) sprintf( buf+strlen(buf), "  dy = '%g'\n",   nim->dy  );
+   if( nim->ndim > 2 ) sprintf( buf+strlen(buf), "  dz = '%g'\n",   nim->dz  );
+   if( nim->ndim > 3 ) sprintf( buf+strlen(buf), "  dt = '%g'\n",   nim->dt  );
+   if( nim->ndim > 4 ) sprintf( buf+strlen(buf), "  du = '%g'\n",   nim->du  );
+   if( nim->ndim > 5 ) sprintf( buf+strlen(buf), "  dv = '%g'\n",   nim->dv  );
+   if( nim->ndim > 6 ) sprintf( buf+strlen(buf), "  dw = '%g'\n",   nim->dw  );
+
+   sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
+   sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
+                              nifti_datatype_string(nim->datatype) ) ;
+
+   sprintf( buf+strlen(buf) , "  nvox = '%" PRId64 "'\n" ,  nim->nvox ) ;
+   sprintf( buf+strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
+
+   sprintf( buf+strlen(buf) , "  byteorder = '%s'\n" ,
+            (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ;
+
+   if( nim->cal_min < nim->cal_max ){
+     sprintf( buf+strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
+     sprintf( buf+strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
+   }
+
+   if( nim->scl_slope != 0.0 ){
+     sprintf( buf+strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
+     sprintf( buf+strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
+   }
+
+   if( nim->intent_code > 0 ){
+     sprintf( buf+strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
+     sprintf( buf+strlen(buf) , "  intent_code_name = '%s'\n" ,
+                                nifti_intent_string(nim->intent_code) ) ;
+     sprintf( buf+strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
+     sprintf( buf+strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
+     sprintf( buf+strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
+
+     if( nim->intent_name[0] != '\0' ){
+       ebuf = escapize_string(nim->intent_name) ;
+       sprintf( buf+strlen(buf) , "  intent_name = %s\n",ebuf) ;
+       free(ebuf) ;
+     }
+   }
+
+   if( nim->toffset != 0.0 )
+     sprintf( buf+strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
+
+   if( nim->xyz_units > 0 )
+     sprintf( buf+strlen(buf) ,
+              "  xyz_units = '%d'\n"
+              "  xyz_units_name = '%s'\n" ,
+              nim->xyz_units , nifti_units_string(nim->xyz_units) ) ;
+
+   if( nim->time_units > 0 )
+     sprintf( buf+strlen(buf) ,
+              "  time_units = '%d'\n"
+              "  time_units_name = '%s'\n" ,
+              nim->time_units , nifti_units_string(nim->time_units) ) ;
+
+   if( nim->freq_dim > 0 )
+     sprintf( buf+strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
+   if( nim->phase_dim > 0 )
+     sprintf( buf+strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
+   if( nim->slice_dim > 0 )
+     sprintf( buf+strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
+   if( nim->slice_code > 0 )
+     sprintf( buf+strlen(buf) ,
+              "  slice_code = '%d'\n"
+              "  slice_code_name = '%s'\n" ,
+              nim->slice_code , nifti_slice_string(nim->slice_code) ) ;
+   if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start )
+     sprintf( buf+strlen(buf) ,
+              "  slice_start = '%" PRId64 "'\n"
+              "  slice_end = '%" PRId64 "'\n",
+              nim->slice_start , nim->slice_end ) ;
+   if( nim->slice_duration != 0.0 )
+     sprintf( buf+strlen(buf) , "  slice_duration = '%g'\n",
+              nim->slice_duration ) ;
+
+   if( nim->descrip[0] != '\0' ){
+     ebuf = escapize_string(nim->descrip) ;
+     sprintf( buf+strlen(buf) , "  descrip = %s\n",ebuf) ;
+     free(ebuf) ;
+   }
+
+   if( nim->aux_file[0] != '\0' ){
+     ebuf = escapize_string(nim->aux_file) ;
+     sprintf( buf+strlen(buf) , "  aux_file = %s\n",ebuf) ;
+     free(ebuf) ;
+   }
+
+   if( nim->qform_code > 0 ){
+     int i,j,k ;
+
+     sprintf( buf+strlen(buf) ,
+              "  qform_code = '%d'\n"
+              "  qform_code_name = '%s'\n"
+     "  qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
+         nim->qform_code      , nifti_xform_string(nim->qform_code) ,
+         nim->qto_xyz.m[0][0] , nim->qto_xyz.m[0][1] ,
+         nim->qto_xyz.m[0][2] , nim->qto_xyz.m[0][3] ,
+         nim->qto_xyz.m[1][0] , nim->qto_xyz.m[1][1] ,
+         nim->qto_xyz.m[1][2] , nim->qto_xyz.m[1][3] ,
+         nim->qto_xyz.m[2][0] , nim->qto_xyz.m[2][1] ,
+         nim->qto_xyz.m[2][2] , nim->qto_xyz.m[2][3] ,
+         nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] ,
+         nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3]  ) ;
+
+     sprintf( buf+strlen(buf) ,
+     "  qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
+         nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] ,
+         nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] ,
+         nim->qto_ijk.m[1][0] , nim->qto_ijk.m[1][1] ,
+         nim->qto_ijk.m[1][2] , nim->qto_ijk.m[1][3] ,
+         nim->qto_ijk.m[2][0] , nim->qto_ijk.m[2][1] ,
+         nim->qto_ijk.m[2][2] , nim->qto_ijk.m[2][3] ,
+         nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] ,
+         nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3]  ) ;
+
+     sprintf( buf+strlen(buf) ,
+              "  quatern_b = '%g'\n"
+              "  quatern_c = '%g'\n"
+              "  quatern_d = '%g'\n"
+              "  qoffset_x = '%g'\n"
+              "  qoffset_y = '%g'\n"
+              "  qoffset_z = '%g'\n"
+              "  qfac = '%g'\n" ,
+         nim->quatern_b , nim->quatern_c , nim->quatern_d ,
+         nim->qoffset_x , nim->qoffset_y , nim->qoffset_z , nim->qfac ) ;
+
+     nifti_dmat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ;
+     if( i > 0 && j > 0 && k > 0 )
+       sprintf( buf+strlen(buf) ,
+                "  qform_i_orientation = '%s'\n"
+                "  qform_j_orientation = '%s'\n"
+                "  qform_k_orientation = '%s'\n" ,
+                nifti_orientation_string(i) ,
+                nifti_orientation_string(j) ,
+                nifti_orientation_string(k)  ) ;
+   }
+
+   if( nim->sform_code > 0 ){
+     int i,j,k ;
+
+     sprintf( buf+strlen(buf) ,
+              "  sform_code = '%d'\n"
+              "  sform_code_name = '%s'\n"
+     "  sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
+         nim->sform_code      , nifti_xform_string(nim->sform_code) ,
+         nim->sto_xyz.m[0][0] , nim->sto_xyz.m[0][1] ,
+         nim->sto_xyz.m[0][2] , nim->sto_xyz.m[0][3] ,
+         nim->sto_xyz.m[1][0] , nim->sto_xyz.m[1][1] ,
+         nim->sto_xyz.m[1][2] , nim->sto_xyz.m[1][3] ,
+         nim->sto_xyz.m[2][0] , nim->sto_xyz.m[2][1] ,
+         nim->sto_xyz.m[2][2] , nim->sto_xyz.m[2][3] ,
+         nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] ,
+         nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3]  ) ;
+
+     sprintf( buf+strlen(buf) ,
+     "  sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
+         nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] ,
+         nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] ,
+         nim->sto_ijk.m[1][0] , nim->sto_ijk.m[1][1] ,
+         nim->sto_ijk.m[1][2] , nim->sto_ijk.m[1][3] ,
+         nim->sto_ijk.m[2][0] , nim->sto_ijk.m[2][1] ,
+         nim->sto_ijk.m[2][2] , nim->sto_ijk.m[2][3] ,
+         nim->sto_ijk.m[3][0] , nim->sto_ijk.m[3][1] ,
+         nim->sto_ijk.m[3][2] , nim->sto_ijk.m[3][3]  ) ;
+
+     nifti_dmat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ;
+     if( i > 0 && j > 0 && k > 0 )
+       sprintf( buf+strlen(buf) ,
+                "  sform_i_orientation = '%s'\n"
+                "  sform_j_orientation = '%s'\n"
+                "  sform_k_orientation = '%s'\n" ,
+                nifti_orientation_string(i) ,
+                nifti_orientation_string(j) ,
+                nifti_orientation_string(k)  ) ;
+   }
+
+   sprintf( buf+strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
+
+   sprintf( buf+strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
+
+   nbuf = (int)strlen(buf) ;
+   buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
+   if( !buf ) Rc_fprintf_stderr("** NIFTI NITA: failed to realloc %d bytes\n",
+                      nbuf+1);
+   return buf ;
+#endif
+}
+
+/*---------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------*/
+/*! get the byte order for this CPU
+
+    - LSB_FIRST means least significant byte, first (little endian)
+    - MSB_FIRST means most significant byte, first (big endian)
+*//*--------------------------------------------------------------------*/
+int nifti_short_order(void)   /* determine this CPU's byte order */
+{
+   union { unsigned char bb[2] ;
+           short         ss    ; } fred ;
+
+   fred.bb[0] = 1 ; fred.bb[1] = 0 ;
+
+   return (fred.ss == 1) ? LSB_FIRST : MSB_FIRST ;
+}
+
+/*---------------------------------------------------------------------------*/
+
+#undef  QQNUM
+#undef  QNUM
+#undef  QSTR
+
+/* macro to check lhs string against "n1"; if it matches,
+   interpret rhs string as a number, and put it into nim->"n2" */
+
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
+
+/* same, but where "n1" == "n2" */
+
+#define QNUM(nam,tt)    QQNUM(nam,nam,tt)
+
+/* macro to check lhs string against "nam"; if it matches,
+   put rhs string into nim->"nam" string, with max length = "ml" */
+
+#define QSTR(nam,ml) if( strcmp(lhs,#nam) == 0 )                           \
+                       strncpy(nim->nam,rhs,ml), nim->nam[ml]='\0'
+
+/*---------------------------------------------------------------------------*/
+/*! Take an XML-ish ASCII string and create a NIFTI image header to match.
+
+    NULL is returned if enough information isn't present in the input string.
+    - The image data can later be loaded with nifti_image_load().
+    - The struct returned here can be liberated with nifti_image_free().
+    - Not a lot of error checking is done here to make sure that the
+      input values are reasonable!
+*//*-------------------------------------------------------------------------*/
+nifti_image *nifti2_image_from_ascii( const char *str, int * bytes_read )
+{
+   char lhs[1024] , rhs[1024] ;
+   int ii , spos, nn ;
+   nifti_image *nim ;              /* will be output */
+
+   if( str == NULL || *str == '\0' ) return NULL ;  /* bad input!? */
+
+   /* scan for opening string */
+
+   spos = 0 ;
+   ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ;
+   if( ii == 0 || strcmp(lhs,"<nifti_image") != 0 ) return NULL ;
+
+   /* create empty image struct */
+
+   nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
+   if( !nim ){
+      Rc_fprintf_stderr("** NIFA: failed to alloc nifti_image\n");
+      return NULL;
+   }
+
+   nim->nx = nim->ny = nim->nz = nim->nt
+           = nim->nu = nim->nv = nim->nw = 1 ;
+   nim->dx = nim->dy = nim->dz = nim->dt
+           = nim->du = nim->dv = nim->dw = 0 ;
+   nim->qfac = 1.0f ;
+
+   nim->byteorder = nifti_short_order() ;
+
+   /* starting at str[spos], scan for "equations" of the form
+         lhs = 'rhs'
+      and assign rhs values into the struct component named by lhs */
+
+   while(1){
+
+     while( isspace((int) str[spos]) ) spos++ ;  /* skip whitespace */
+     if( str[spos] == '\0' ) break ;       /* end of string? */
+
+     /* get lhs string */
+
+     ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ;
+     if( ii == 0 || strcmp(lhs,"/>") == 0 ) break ;  /* end of input? */
+
+     /* skip whitespace and the '=' marker */
+
+     while( isspace((int) str[spos]) || str[spos] == '=' ) spos++ ;
+     if( str[spos] == '\0' ) break ;       /* end of string? */
+
+     /* if next character is a quote ', copy everything up to next '
+        otherwise, copy everything up to next nonblank              */
+
+     if( str[spos] == '\'' ){
+        ii = spos+1 ;
+        while( str[ii] != '\0' && str[ii] != '\'' ) ii++ ;
+        nn = ii-spos-1 ; if( nn > 1023 ) nn = 1023 ;
+        memcpy(rhs,str+spos+1,nn) ; rhs[nn] = '\0' ;
+        spos = (str[ii] == '\'') ? ii+1 : ii ;
+     } else {
+        ii = sscanf( str+spos , "%1023s%n" , rhs , &nn ) ; spos += nn ;
+        if( ii == 0 ) break ;  /* nothing found? */
+     }
+     unescape_string(rhs) ;  /* remove any XML escape sequences */
+
+     /* Now can do the assignment, based on lhs string.
+        Start with special cases that don't fit the QNUM/QSTR macros. */
+
+     if( strcmp(lhs,"nifti_type") == 0 ){
+            if( strcmp(rhs,"ANALYZE-7.5") == 0 )
+               nim->nifti_type = NIFTI_FTYPE_ANALYZE ;
+       else if( strcmp(rhs,"NIFTI-1+")    == 0 )
+               nim->nifti_type = NIFTI_FTYPE_NIFTI1_1 ;
+       else if( strcmp(rhs,"NIFTI-1")     == 0 )
+               nim->nifti_type = NIFTI_FTYPE_NIFTI1_2 ;
+       else if( strcmp(rhs,"NIFTI-1A")    == 0 )
+               nim->nifti_type = NIFTI_FTYPE_ASCII ;
+       else if( strcmp(rhs,"NIFTI-2+")    == 0 )
+               nim->nifti_type = NIFTI_FTYPE_NIFTI2_1 ;
+       else if( strcmp(rhs,"NIFTI-2")     == 0 )
+               nim->nifti_type = NIFTI_FTYPE_NIFTI2_2 ;
+     }
+     else if( strcmp(lhs,"header_filename") == 0 ){
+       nim->fname = nifti_strdup(rhs) ;
+     }
+     else if( strcmp(lhs,"image_filename") == 0 ){
+       nim->iname = nifti_strdup(rhs) ;
+     }
+     else if( strcmp(lhs,"sto_xyz_matrix") == 0 ){
+       sscanf( rhs , "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf" ,
+               &(nim->sto_xyz.m[0][0]) , &(nim->sto_xyz.m[0][1]) ,
+               &(nim->sto_xyz.m[0][2]) , &(nim->sto_xyz.m[0][3]) ,
+               &(nim->sto_xyz.m[1][0]) , &(nim->sto_xyz.m[1][1]) ,
+               &(nim->sto_xyz.m[1][2]) , &(nim->sto_xyz.m[1][3]) ,
+               &(nim->sto_xyz.m[2][0]) , &(nim->sto_xyz.m[2][1]) ,
+               &(nim->sto_xyz.m[2][2]) , &(nim->sto_xyz.m[2][3]) ,
+               &(nim->sto_xyz.m[3][0]) , &(nim->sto_xyz.m[3][1]) ,
+               &(nim->sto_xyz.m[3][2]) , &(nim->sto_xyz.m[3][3])  ) ;
+     }
+     else if( strcmp(lhs,"byteorder") == 0 ){
+       if( strcmp(rhs,"MSB_FIRST") == 0 ) nim->byteorder = MSB_FIRST ;
+       if( strcmp(rhs,"LSB_FIRST") == 0 ) nim->byteorder = LSB_FIRST ;
+     }
+     else QQNUM(image_offset,iname_offset,int) ;
+     else QNUM(datatype,short int) ;
+     else QNUM(ndim,int) ;
+     else QNUM(nx,int) ;
+     else QNUM(ny,int) ;
+     else QNUM(nz,int) ;
+     else QNUM(nt,int) ;
+     else QNUM(nu,int) ;
+     else QNUM(nv,int) ;
+     else QNUM(nw,int) ;
+     else QNUM(dx,float) ;
+     else QNUM(dy,float) ;
+     else QNUM(dz,float) ;
+     else QNUM(dt,float) ;
+     else QNUM(du,float) ;
+     else QNUM(dv,float) ;
+     else QNUM(dw,float) ;
+     else QNUM(cal_min,float) ;
+     else QNUM(cal_max,float) ;
+     else QNUM(scl_slope,float) ;
+     else QNUM(scl_inter,float) ;
+     else QNUM(intent_code,short) ;
+     else QNUM(intent_p1,float) ;
+     else QNUM(intent_p2,float) ;
+     else QNUM(intent_p3,float) ;
+     else QSTR(intent_name,15) ;
+     else QNUM(toffset,float) ;
+     else QNUM(xyz_units,int) ;
+     else QNUM(time_units,int) ;
+     else QSTR(descrip,79) ;
+     else QSTR(aux_file,23) ;
+     else QNUM(qform_code,int) ;
+     else QNUM(quatern_b,float) ;
+     else QNUM(quatern_c,float) ;
+     else QNUM(quatern_d,float) ;
+     else QNUM(qoffset_x,float) ;
+     else QNUM(qoffset_y,float) ;
+     else QNUM(qoffset_z,float) ;
+     else QNUM(qfac,float) ;
+     else QNUM(sform_code,int) ;
+     else QNUM(freq_dim,int) ;
+     else QNUM(phase_dim,int) ;
+     else QNUM(slice_dim,int) ;
+     else QNUM(slice_code,int) ;
+     else QNUM(slice_start,int) ;
+     else QNUM(slice_end,int) ;
+     else QNUM(slice_duration,float) ;
+     else QNUM(num_ext,int) ;
+
+   } /* end of while loop */
+
+   if( bytes_read ) *bytes_read = spos+1;         /* "process" last '\n' */
+
+   /* do miscellaneous checking and cleanup */
+
+   if( nim->ndim <= 0 ){ nifti_image_free(nim); return NULL; } /* bad! */
+
+   nifti_datatype_sizes( nim->datatype, &(nim->nbyper), &(nim->swapsize) );
+   if( nim->nbyper == 0 ){ nifti_image_free(nim); return NULL; } /* bad! */
+
+   nim->dim[0] = nim->ndim ;
+   nim->dim[1] = nim->nx ; nim->pixdim[1] = nim->dx ;
+   nim->dim[2] = nim->ny ; nim->pixdim[2] = nim->dy ;
+   nim->dim[3] = nim->nz ; nim->pixdim[3] = nim->dz ;
+   nim->dim[4] = nim->nt ; nim->pixdim[4] = nim->dt ;
+   nim->dim[5] = nim->nu ; nim->pixdim[5] = nim->du ;
+   nim->dim[6] = nim->nv ; nim->pixdim[6] = nim->dv ;
+   nim->dim[7] = nim->nw ; nim->pixdim[7] = nim->dw ;
+
+   nim->nvox = (int64_t)nim->nx * nim->ny * nim->nz
+                      * nim->nt * nim->nu * nim->nv * nim->nw ;
+
+   if( nim->qform_code > 0 )
+     nim->qto_xyz = nifti_quatern_to_dmat44(
+                      nim->quatern_b, nim->quatern_c, nim->quatern_d,
+                      nim->qoffset_x, nim->qoffset_y, nim->qoffset_z,
+                      nim->dx       , nim->dy       , nim->dz       ,
+                      nim->qfac                                      ) ;
+   else
+     nim->qto_xyz = nifti_quatern_to_dmat44(
+                      0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 ,
+                      nim->dx , nim->dy , nim->dz , 0.0 ) ;
+
+
+   nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ;
+
+   if( nim->sform_code > 0 )
+     nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ;
+
+   return nim ;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/*! validate the nifti_image
+
+    \return 1 if the structure seems valid, otherwise 0
+
+    \sa nifti_nim_has_valid_dims, nifti_hdr1_looks_good
+*//*-------------------------------------------------------------------------*/
+int nifti2_nim_is_valid(nifti_image * nim, int complain)
+{
+   int errs = 0;
+
+   if( !nim ){
+      Rc_fprintf_stderr("** NIFTI is_valid_nim: nim is NULL\n");
+      return 0;
+   }
+
+   if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nim_is_valid check...\n");
+
+   /**- check that dim[] matches the individual values ndim, nx, ny, ... */
+   if( ! nifti_nim_has_valid_dims(nim,complain) ){
+      if( !complain ) return 0;
+      errs++;
+   }
+
+   /* might check nbyper, pixdim, q/sforms, swapsize, nifti_type, ... */
+
+   /**- be explicit in return of 0 or 1 */
+   if( errs > 0 ) return 0;
+   else           return 1;
+}
+
+/*---------------------------------------------------------------------------*/
+/*! validate nifti dimensions
+
+    \return 1 if valid, 0 if not
+
+    \sa nifti_nim_is_valid, nifti_hdr1_looks_good
+
+    rely on dim[] as the master
+*//*-------------------------------------------------------------------------*/
+int nifti2_nim_has_valid_dims(nifti_image * nim, int complain)
+{
+   int64_t prod, c;
+   int     errs = 0;
+
+   /**- start with dim[0]: failure here is considered terminal */
+   if( nim->dim[0] <= 0 || nim->dim[0] > 7 ){
+      errs++;
+      if( complain )
+        Rc_fprintf_stderr("** NIFTI NVd: dim[0] (%" PRId64
+                       ") out of range [1,7]\n", nim->dim[0]);
+      return 0;
+   }
+
+   /**- check whether ndim equals dim[0] */
+   if( nim->ndim != nim->dim[0] ){
+      errs++;
+      if( ! complain ) return 0;
+      Rc_fprintf_stderr("** NIFTI NVd: ndim != dim[0] (%" PRId64 ",%" PRId64 ")\n",
+              nim->ndim,nim->dim[0]);
+   }
+
+   /**- compare each dim[i] to the proper nx, ny, ... */
+   if( ( (nim->dim[0] >= 1) && (nim->dim[1] != nim->nx) ) ||
+       ( (nim->dim[0] >= 2) && (nim->dim[2] != nim->ny) ) ||
+       ( (nim->dim[0] >= 3) && (nim->dim[3] != nim->nz) ) ||
+       ( (nim->dim[0] >= 4) && (nim->dim[4] != nim->nt) ) ||
+       ( (nim->dim[0] >= 5) && (nim->dim[5] != nim->nu) ) ||
+       ( (nim->dim[0] >= 6) && (nim->dim[6] != nim->nv) ) ||
+       ( (nim->dim[0] >= 7) && (nim->dim[7] != nim->nw) )   ){
+      errs++;
+      if( !complain ) return 0;
+      Rc_fprintf_stderr("** NIFTI NVd mismatch: dims    = %" PRId64 ",%" PRId64
+              ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 "\n"
+              "                 nxyz... = %" PRId64 ",%" PRId64 ",%" PRId64
+              ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 "\n",
+                     nim->dim[1], nim->dim[2], nim->dim[3],
+                     nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7],
+                     nim->nx, nim->ny, nim->nz,
+                     nim->nt, nim->nu, nim->nv, nim->nw );
+   }
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("-d check dim[%" PRId64 "] =", nim->dim[0]);
+      for( c = 0; c < 7; c++ ) Rc_fprintf_stderr(" %" PRId64 "", nim->dim[c]);
+      Rc_fputc_stderr('\n');
+   }
+
+   /**- check the dimensions, and that their product matches nvox */
+   prod = 1;
+   for( c = 1; c <= nim->dim[0]; c++ ){
+      if( nim->dim[c] > 0)
+         prod *= nim->dim[c];
+      else if( nim->dim[c] <= 0 ){
+         if( !complain ) return 0;
+         Rc_fprintf_stderr("** NIFTI NVd: dim[%" PRId64 "] (=%" PRId64 ") <= 0\n",
+                 c, nim->dim[c]);
+         errs++;
+      }
+   }
+   if( prod != nim->nvox ){
+      if( ! complain ) return 0;
+      Rc_fprintf_stderr("** NIFTI NVd: nvox does not match %" PRId64
+              "-dim product (%" PRId64 ", %" PRId64 ")\n",
+              nim->dim[0], nim->nvox, prod);
+      errs++;
+   }
+
+   /**- if debug, warn about any remaining dim that is neither 0, nor 1 */
+   /*   (values in dims above dim[0] are undefined, as reminded by Cinly
+         Ooi and Alle Meije Wink)                   16 Nov 2005 [rickr] */
+   if( g_opts.debug > 1 )
+      for( c = nim->dim[0]+1; c <= 7; c++ )
+         if( nim->dim[c] != 0 && nim->dim[c] != 1 )
+            Rc_fprintf_stderr("** NIFTI NVd warning: dim[%" PRId64 "] = %" PRId64
+                    ", but ndim = %" PRId64 "\n",
+                    c, nim->dim[c], nim->dim[0]);
+
+   if( g_opts.debug > 2 )
+      Rc_fprintf_stderr("-d nim_has_valid_dims check, errs = %d\n", errs);
+
+   /**- return invalid or valid */
+   if( errs > 0 ) return 0;
+   else           return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+/*! read a nifti image, collapsed across dimensions according to dims[8]  <pre>
+
+    This function may be used to read parts of a nifti dataset, such as
+    the time series for a single voxel, or perhaps a slice.  It is similar
+    to nifti_image_load(), though the passed 'data' parameter is used for
+    returning the image, not nim->data.
+
+    \param nim  given nifti_image struct, corresponding to the data file
+    \param dims given list of dimensions (see below)
+    \param data pointer to data pointer (if *data is NULL, data will be
+                allocated, otherwise not)
+
+    Here, dims is an array of 8 ints, similar to nim->dim[8].  While dims[0]
+    is unused at this point, the other indices specify which dimensions to
+    collapse (and at which index), and which not to collapse.  If dims[i] is
+    set to -1, then that entire dimension will be read in, from index 0 to
+    index (nim->dim[i] - 1).  If dims[i] >= 0, then only that index will be
+    read in (so dims[i] must also be < nim->dim[i]).
+
+    Example: given  nim->dim[8] = { 4, 64, 64, 21, 80, 1, 1, 1 } (4-D dataset)
+
+      if dims[8] = { 0,  5,  4, 17, -1, -1, -1, -1 }
+         -> read time series for voxel i,j,k = 5,4,17
+
+      if dims[8] = { 0, -1, -1, -1, 17, -1, -1, -1 }
+         -> read single volume at time point 17
+
+    Example: given  nim->dim[8] = { 6, 64, 64, 21, 80, 4, 3, 1 } (6-D dataset)
+
+      if dims[8] = { 0, 5, 4, 17, -1, 2, 1, 0 }
+         -> read time series for the voxel i,j,k = 5,4,17, and dim 5,6 = 2,1
+
+      if dims[8] = { 0, 5, 4, -1, -1, 0, 0, 0 }
+         -> read time series for slice at i,j = 5,4, and dim 5,6,7 = 0,0,0
+            (note that dims[7] is not relevant, but must be 0 or -1)
+
+    If *data is NULL, then *data will be set as a pointer to new memory,
+    allocated here for the resulting collapsed image data.
+
+      e.g. { int    dims[8] = { 0,  5,  4, 17, -1, -1, -1, -1 };
+             void * data    = NULL;
+             ret_val = nifti_read_collapsed_image(nim, dims, &data);
+             if( ret_val > 0 ){
+                process_time_series(data);
+                if( data != NULL ) free(data);
+             }
+           }
+
+    NOTE: If *data is not NULL, then it will be assumed that it points to
+          valid memory, sufficient to hold the results.  This is done for
+          speed and possibly repeated calls to this function.
+
+      e.g. { int64_t dims[8] = { 0,  -1, -1, -1, -1, -1, -1, -1 };
+             void  * data    = NULL;
+             for( zslice = 0; zslice < nzslices; zslice++ ){
+                dims[3] = zslice;
+                ret_val = nifti_read_collapsed_image(nim, dims, &data);
+                if( ret_val > 0 ) process_slice(zslice, data);
+             }
+             if( data != NULL ) free(data);
+           }
+
+    \return
+        -  the total number of bytes read, or < 0 on failure
+        -  the read and byte-swapped data, in 'data'            </pre>
+
+    \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks
+        nifti_image_load
+*//*-------------------------------------------------------------------------*/
+int64_t nifti2_read_collapsed_image( nifti_image * nim, const int64_t dims [8],
+                                void ** data )
+{
+   znzFile fp;
+   int64_t prods[8];          /* sizes are bounded by dims[], so 8 */
+   int     pivots[8], nprods; /* sizes are bounded by dims[], so 8 */
+   int64_t c, bytes;
+
+   /** - check pointers for sanity */
+   if( !nim || !dims || !data ){
+      Rc_fprintf_stderr("** nifti_RCI: bad params %p, %p, %p\n",
+              (void *)nim, (const void *)dims, (void *)data);
+      return -1;
+   }
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("-d read_collapsed_image:\n        dims =");
+      for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3" PRId64 "", dims[c]);
+      Rc_fprintf_stderr("\n   nim->dims =");
+      for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3" PRId64 "", nim->dim[c]);
+      Rc_fputc_stderr('\n');
+   }
+
+   /** - verify that dim[] makes sense */
+   if( ! nifti_nim_is_valid(nim, g_opts.debug > 0) ){
+      Rc_fprintf_stderr("** NIFTI: invalid nim (file is '%s')\n", nim->fname );
+      return -1;
+   }
+
+   /** - verify that dims[] makes sense for this dataset */
+   for( c = 1; c <= nim->dim[0]; c++ ){
+      if( dims[c] >= nim->dim[c] ){
+         Rc_fprintf_stderr("** nifti_RCI: dims[%" PRId64 "] >= nim->dim[%" PRId64
+                 "] (%" PRId64 ",%" PRId64 ")\n",
+                 c, c, dims[c], nim->dim[c]);
+         return -1;
+      }
+   }
+
+   /** - prepare pivot list - pivots are fixed indices */
+   if( make_pivot_list(nim, dims, pivots, prods, &nprods) < 0 ) return -1;
+
+   bytes = rci_alloc_mem(data, prods, nprods, nim->nbyper);
+   if( bytes < 0 ) return -1;
+
+   /** - open the image file for reading at the appropriate offset */
+   fp = nifti_image_load_prep( nim );
+   if( ! fp ){ free(*data);  *data = NULL;  return -1; }     /* failure */
+
+   /** - call the recursive reading function, passing nim, the pivot info,
+         location to store memory, and file pointer and position */
+   c = rci_read_data(nim, pivots, prods, nprods, dims, (char *)*data, fp,
+                     znztell(fp));
+   znzclose(fp);   /* in any case, close the file */
+   if( c < 0 ){ free(*data);  *data = NULL;  return -1; }    /* failure */
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("+d read %" PRId64 " bytes of collapsed image from %s\n",
+              bytes, nim->fname);
+
+   return bytes;
+}
+
+
+/* local function to find strides per dimension. assumes 7D size and
+** stride array.
+*/
+static void
+compute_strides(int64_t *strides,const int64_t *size,int nbyper)
+{
+  int i;
+  strides[0] = nbyper;
+  for(i = 1; i < 7; i++)
+    {
+    strides[i] = size[i-1] * strides[i-1];
+    }
+}
+
+/*---------------------------------------------------------------------------*/
+/*! read an arbitrary subregion from a nifti image
+
+    This function may be used to read a single arbitary subregion of any
+    rectangular size from a nifti dataset, such as a small 5x5x5 subregion
+    around the center of a 3D image.
+
+    \param nim  given nifti_image struct, corresponding to the data file
+    \param start_index the index location of first voxel that will be returned
+    \param region_size the size of the subregion to be returned
+    \param data pointer to data pointer (if *data is NULL, data will be
+                allocated, otherwise not)
+
+    Example: given  nim->dim[8] = {3, 64, 64, 64, 1, 1, 1, 1 } (3-D dataset)
+
+      if start_index[7] = { 29,  29, 29, 0, 0, 0, 0 } and
+         region_size[7] = {  5,   5,  5, 1, 1, 1, 1 }
+         -> read 5x5x5 region starting with the first voxel at (29,29,29)
+
+    NOTE: If *data is not NULL, then it will be assumed that it points to
+          valid memory, sufficient to hold the results.  This is done for
+          speed and possibly repeated calls to this function.
+    \return
+        -  the total number of bytes read, or < 0 on failure
+        -  the read and byte-swapped data, in 'data'            </pre>
+
+    \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks
+        nifti_image_load, nifti_read_collapsed_image
+*//*-------------------------------------------------------------------------*/
+int64_t nifti2_read_subregion_image( nifti_image * nim,
+                                const int64_t *start_index,
+                                const int64_t *region_size,
+                                void ** data )
+{
+  znzFile fp;                   /* file to read */
+  int64_t i,j,k,l,m,n;          /* indices for dims */
+  int64_t bytes = 0;            /* total # bytes read */
+  int64_t total_alloc_size;     /* size of buffer allocation */
+  char   *readptr;              /* where in *data to read next */
+  int64_t strides[7];           /* strides between dimensions */
+  int64_t collapsed_dims[8];    /* for read_collapsed_image */
+  int64_t *image_size;          /* pointer to dimensions in header */
+  int64_t initial_offset;
+  int64_t offset;               /* seek offset for reading current row */
+
+  /* probably ignored, but set to ndim for consistency*/
+  collapsed_dims[0] = nim->ndim;
+
+  /* build a dims array for collapsed image read */
+  for(i = 0; i < nim->ndim; i++) {
+    /* if you take the whole extent in this dimension */
+    if(start_index[i] == 0 && region_size[i] == nim->dim[i+1])
+      collapsed_dims[i+1] = -1;
+    /* if you specify a single element in this dimension */
+    else if(region_size[i] == 1)
+      collapsed_dims[i+1] = start_index[i];
+    else
+      collapsed_dims[i+1] = -2; /* sentinel value */
+  }
+  /* fill out end of collapsed_dims */
+  for(i = nim->ndim ; i < 7; i++)
+    collapsed_dims[i+1] = -1;
+
+  /* check to see whether collapsed read is possible */
+  for(i = 1; i <= nim->ndim; i++)
+    if(collapsed_dims[i] == -2) break;
+
+  /* if you get through all the dimensions without hitting
+  ** a subrange of size > 1, a collapsed read is possible
+  */
+  if(i > nim->ndim)
+    return nifti_read_collapsed_image(nim, collapsed_dims, data);
+
+  /* point past first element of dim, which holds nim->ndim */
+  image_size = &(nim->dim[1]);
+
+  /* check region sizes for sanity */
+  for(i = 0; i < nim->ndim; i++)
+    if(start_index[i]  + region_size[i] > image_size[i]) {
+      if(g_opts.debug > 1)
+        Rc_fprintf_stderr("region doesn't fit within image size\n");
+      return -1;
+    }
+
+  /* get the file open */
+  fp = nifti_image_load_prep( nim );
+  /* the current offset is just past the nifti header, save
+   * location so that SEEK_SET can be used below
+   */
+  initial_offset = znztell(fp);
+  /* get strides*/
+  compute_strides(strides,image_size,nim->nbyper);
+
+  total_alloc_size = nim->nbyper; /* size of pixel */
+
+  /* find alloc size */
+  for(i = 0; i < nim->ndim; i++) total_alloc_size *= region_size[i];
+
+  /* allocate buffer, if necessary */
+  if(! *data) *data = malloc(total_alloc_size);
+
+  if(! *data) {
+    if(g_opts.debug > 1)
+      Rc_fprintf_stderr("allocation of %" PRId64 " bytes failed\n",
+              total_alloc_size);
+    return -1;
+  }
+
+  /* point to start of data buffer as char * */
+  readptr = *((char **)data);
+  {
+  /* can't assume that start_index and region_size have any more than
+  ** nim->ndim elements so make local copies, filled out to seven elements
+  */
+  int64_t si[7], rs[7];
+  for(i = 0; i < nim->ndim; i++) {
+    si[i] = start_index[i];
+    rs[i] = region_size[i];
+  }
+  for(i = nim->ndim; i < 7; i++) {
+    si[i] = 0;
+    rs[i] = 1;
+  }
+
+  /* loop through subregion and read a row at a time */
+  for(i = si[6]; i < (si[6] + rs[6]); i++) {
+    for(j = si[5]; j < (si[5] + rs[5]); j++) {
+      for(k = si[4]; k < (si[4] + rs[4]); k++) {
+        for(l = si[3]; l < (si[3] + rs[3]); l++) {
+          for(m = si[2]; m < (si[2] + rs[2]); m++) {
+            for(n = si[1]; n < (si[1] + rs[1]); n++) {
+              int64_t nread,read_amount;
+              offset = initial_offset +
+                (i * strides[6]) +
+                (j * strides[5]) +
+                (k * strides[4]) +
+                (l * strides[3]) +
+                (m * strides[2]) +
+                (n * strides[1]) +
+                (si[0] * strides[0]);
+              znzseek(fp, offset, SEEK_SET); /* seek to current row */
+              read_amount = rs[0] * nim->nbyper; /* read a row of subregion */
+              nread = nifti_read_buffer(fp, readptr, read_amount, nim);
+              if(nread != read_amount) {
+                if(g_opts.debug > 1) {
+                  Rc_fprintf_stderr("read of %" PRId64 " bytes failed\n",
+                          read_amount);
+                  return -1;
+                }
+              }
+            bytes += nread;
+            readptr += read_amount;
+            }
+          }
+        }
+      }
+    }
+  }
+  }
+  znzclose(fp);
+  return bytes;
+}
+
+
+/* read the data from the file pointed to by fp
+
+   - this a recursive function, so start with the base case
+   - data is now (char *) for easy incrementing
+
+   return 0 on success, < 0 on failure
+*/
+static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods,
+                         int nprods, const int64_t dims[], char * data,
+                         znzFile fp, int64_t base_offset)
+{
+   int64_t sublen, offset, read_size;
+   int     c;
+
+   /* bad check first - base_offset may not have been checked */
+   if( nprods <= 0 ){
+      Rc_fprintf_stderr("** NIFTI rci_read_data, bad prods, %d\n", nprods);
+      return -1;
+   }
+
+   /* base case: actually read the data */
+   if( nprods == 1 ){
+      int64_t nread, bytes;
+
+      /* make sure things look good here */
+      if( *pivots != 0 ){
+         Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", *pivots);
+         return -1;
+      }
+
+      /* so just seek and read (prods[0] * nbyper) bytes from the file */
+      znzseek(fp, base_offset, SEEK_SET);
+      bytes = prods[0] * nim->nbyper;
+      nread = nifti_read_buffer(fp, data, bytes, nim);
+      if( nread != bytes ){
+         Rc_fprintf_stderr("** NIFTI rciRD: read only %" PRId64 " of %" PRId64
+                 " bytes from '%s'\n",
+                 nread, bytes, nim->fname);
+         return -1;
+      } else if( g_opts.debug > 3 )
+         Rc_fprintf_stderr("+d successful read of %" PRId64
+                 " bytes at offset %" PRId64 "\n",
+                 bytes, base_offset);
+
+      return 0;  /* done with base case - return success */
+   }
+
+   /* not the base case, so do a set of reduced reads */
+
+   /* compute size of sub-brick: all dimensions below pivot */
+   for( c = 1, sublen = 1; c < *pivots; c++ ) sublen *= nim->dim[c];
+
+   /* compute number of values to read, i.e. remaining prods */
+   for( c = 1, read_size = 1; c < nprods; c++ ) read_size *= prods[c];
+   read_size *= nim->nbyper;  /* and multiply by bytes per voxel */
+
+   /* now repeatedly compute offsets, and recursively read */
+   for( c = 0; c < prods[0]; c++ ){
+      /* offset is (c * sub-block size (including pivot dim))   */
+      /*         + (dims[] index into pivot sub-block)          */
+      /* the unneeded multiplication is to make this more clear */
+      offset = (int64_t)c * sublen * nim->dim[*pivots] +
+               (int64_t)sublen * dims[*pivots];
+      offset *= nim->nbyper;
+
+      if( g_opts.debug > 3 )
+         Rc_fprintf_stderr("-d reading %" PRId64 " bytes, foff %" PRId64
+                 " + %" PRId64 ", doff %" PRId64 "\n",
+                 read_size, base_offset, offset, c*read_size);
+
+      /* now read the next level down, adding this offset */
+      if( rci_read_data(nim, pivots+1, prods+1, nprods-1, dims,
+                    data + c * read_size, fp, base_offset + offset) < 0 )
+         return -1;
+   }
+
+   return 0;
+}
+
+
+/* allocate memory for all collapsed image data
+
+   If *data is already set, do not allocate, but still calculate
+   size for debug report.
+
+   return total size on success, and < 0 on failure
+*/
+static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper )
+{
+   int64_t size;
+   int     memindex;
+
+   if( nbyper < 0 || nprods < 1 || nprods > 8 ){
+      Rc_fprintf_stderr("** NIFTI rci_am: bad params, %d, %d\n", nbyper, nprods);
+      return -1;
+   }
+
+   for( memindex = 0, size = 1; memindex < nprods; memindex++ )
+       size *= prods[memindex];
+
+   size *= nbyper;
+
+   if( ! *data ){   /* then allocate what is needed */
+      if( g_opts.debug > 1 )
+         Rc_fprintf_stderr("+d alloc %" PRId64
+                 " (%" PRId64 " x %d) bytes for collapsed image\n",
+                 size, size/nbyper, nbyper);
+
+      *data = malloc(size);   /* actually allocate the memory */
+      if( ! *data ){
+        Rc_fprintf_stderr("** NIFTI rci_am: failed to alloc %" PRId64
+                " bytes for data\n", size);
+        return -1;
+      }
+   } else if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d rci_am: *data already set, need %" PRId64
+              " x %d bytes\n",
+              size/nbyper, nbyper);
+
+   return size;
+}
+
+
+/* prepare a pivot list for reading
+
+   The pivot points are the indices into dims where the calling function
+   wants to collapse a dimension.  The last pivot should always be zero
+   (note that we have space for that in the lists).
+*/
+static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[],
+                                             int64_t prods[], int * nprods )
+{
+   int len, dind;
+
+   len = 0;
+   dind = nim->dim[0];
+   while( dind > 0 ){
+      prods[len] = 1;
+      while( dind > 0 && (nim->dim[dind] == 1 || dims[dind] == -1) ){
+         prods[len] *= nim->dim[dind];
+         dind--;
+      }
+      pivots[len] = dind;
+      len++;
+      dind--;  /* fine, let it drop out at -1 */
+   }
+
+   /* make sure to include 0 as a pivot (instead of just 1, if it is) */
+   if( len > 0 && pivots[len-1] != 0 ){
+      pivots[len] = 0;
+      prods[len] = 1;
+      len++;
+   }
+
+   *nprods = len;
+
+   if( g_opts.debug > 2 ){
+      Rc_fprintf_stderr("+d pivot list created, pivots :");
+      for(dind = 0; dind < len; dind++)
+         Rc_fprintf_stderr(" %d", pivots[dind]);
+      Rc_fprintf_stderr(", prods :");
+      for(dind = 0; dind < len; dind++)
+         Rc_fprintf_stderr(" %" PRId64 "", prods[dind]);
+      Rc_fputc_stderr('\n');
+   }
+
+   return 0;
+}
+
+
+#undef ISEND
+#define ISEND(c) ( (c)==']' || (c)=='}' || (c)=='\0' )
+
+/*---------------------------------------------------------------------*/
+/*! Get an integer list in the range 0..(nvals-1), from the
+   character string str.  If we call the output pointer fred,
+   then fred[0] = number of integers in the list (> 0), and
+        fred[i] = i-th integer in the list for i=1..fred[0].
+   If on return, fred == NULL or fred[0] == 0, then something is
+   wrong, and the caller must deal with that.
+
+   Syntax of input string:
+     - initial '{' or '[' is skipped, if present
+     - ends when '}' or ']' or end of string is found
+     - contains entries separated by commas
+     - entries have one of these forms:
+       - a single number
+       - a dollar sign '$', which means nvals-1
+       - a sequence of consecutive numbers in the form "a..b" or
+         "a-b", where "a" and "b" are single numbers (or '$')
+       - a sequence of evenly spaced numbers in the form
+         "a..b(c)" or "a-b(c)", where "c" encodes the step
+     - Example:  "[2,7..4,3..9(2)]" decodes to the list
+         2 7 6 5 4 3 5 7 9
+     - entries should be in the range 0..nvals-1
+
+   (borrowed, with permission, from thd_intlist.c)
+*//*-------------------------------------------------------------------*/
+int64_t * nifti_get_int64list( int64_t nvals , const char * str )
+{
+   int64_t *subv = NULL ;
+   int64_t *subv_realloc = NULL;
+   int64_t ii , nout ;
+   int64_t ibot,itop,istep , nused ;
+   int     ipos , slen ;
+   char    *cpt ;
+
+   /* Meaningless input? */
+   if( nvals < 1 ) return NULL ;
+
+   /* No selection list? */
+   if( str == NULL || str[0] == '\0' ) return NULL ;
+
+   /* skip initial '[' or '{' */
+   subv = (int64_t *)malloc( sizeof(int64_t) * 2 ) ;
+   if( !subv ) {
+      Rc_fprintf_stderr("** nifti_get_intlist: failed alloc of 2 ints\n");
+      return NULL;
+   }
+   subv[0] = nout = 0 ;
+
+   ipos = 0 ;
+   if( str[ipos] == '[' || str[ipos] == '{' ) ipos++ ;
+
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d making int_list (vals = %" PRId64 ") from '%s'\n",
+              nvals, str);
+
+   /**- for each sub-selector until end of input... */
+
+   slen = (int)strlen(str) ;
+   while( ipos < slen && !ISEND(str[ipos]) ){
+
+     while( isspace((int) str[ipos]) ) ipos++ ;   /* skip blanks */
+      if( ISEND(str[ipos]) ) break ;         /* done */
+
+      /**- get starting value */
+
+      if( str[ipos] == '$' ){  /* special case */
+         ibot = nvals-1 ; ipos++ ;
+      } else {                 /* decode an integer */
+         ibot = strtoll( str+ipos , &cpt , 10 ) ;
+         if( ibot < 0 ){
+           Rc_fprintf_stderr("** NIFTI ERROR: list index %" PRId64
+                   " is out of range 0..%" PRId64 "\n",
+                   ibot,nvals-1) ;
+           free(subv) ; return NULL ;
+         }
+         if( ibot >= nvals ){
+           Rc_fprintf_stderr("** NIFTI ERROR: list index %" PRId64
+                   " is out of range 0..%" PRId64 "\n",
+                   ibot,nvals-1) ;
+           free(subv) ; return NULL ;
+         }
+         nused = (cpt-(str+ipos)) ;
+         if( ibot == 0 && nused == 0 ){
+           Rc_fprintf_stderr("** NIFTI : list syntax error '%s'\n",str+ipos) ;
+           free(subv) ; return NULL ;
+         }
+         ipos += nused ;
+      }
+
+      while( isspace((int) str[ipos]) ) ipos++ ;   /* skip blanks */
+
+      /**- if that's it for this sub-selector, add one value to list */
+
+      if( str[ipos] == ',' || ISEND(str[ipos]) ){
+         nout++ ;
+         subv_realloc = (int64_t *)realloc( (char *)subv , sizeof(int64_t)*(nout+1) ) ;
+         if( !subv_realloc ) {
+            free(subv);
+            Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %" PRId64
+                    " ints\n", nout+1);
+            return NULL;
+         }
+         subv = subv_realloc;
+         subv[0]    = nout ;
+         subv[nout] = ibot ;
+         if( ISEND(str[ipos]) ) break ; /* done */
+         ipos++ ; continue ;            /* re-start loop at next sub-selector */
+      }
+
+      /**- otherwise, must have '..' or '-' as next inputs */
+
+      if( str[ipos] == '-' ){
+         ipos++ ;
+      } else if( str[ipos] == '.' && str[ipos+1] == '.' ){
+         ipos++ ; ipos++ ;
+      } else {
+         Rc_fprintf_stderr("** NIFTI ERROR: index list syntax is bad: '%s'\n",
+                 str+ipos) ;
+         free(subv) ; return NULL ;
+      }
+
+      /**- get ending value for loop now */
+
+      if( str[ipos] == '$' ){  /* special case */
+         itop = nvals-1 ; ipos++ ;
+      } else {                 /* decode an integer */
+         itop = strtoll( str+ipos , &cpt , 10 ) ;
+         if( itop < 0 ){
+           Rc_fprintf_stderr("** NIFTI ERROR: index %" PRId64
+                   " is out of range 0..%" PRId64 "\n",
+                   itop,nvals-1) ;
+           free(subv) ; return NULL ;
+         }
+         if( itop >= nvals ){
+           Rc_fprintf_stderr("** NIFTI ERROR: index %" PRId64
+                   " is out of range 0..%" PRId64 "\n",
+                   itop,nvals-1) ;
+           free(subv) ; return NULL ;
+         }
+         nused = (cpt-(str+ipos)) ;
+         if( itop == 0 && nused == 0 ){
+           Rc_fprintf_stderr("** NIFTI: index list syntax error '%s'\n",
+                          str+ipos) ;
+           free(subv) ; return NULL ;
+         }
+         ipos += nused ;
+      }
+
+      /**- set default loop step */
+
+      istep = (ibot <= itop) ? 1 : -1 ;
+
+      while( isspace((int) str[ipos]) ) ipos++ ;            /* skip blanks */
+
+      /**- check if we have a non-default loop step */
+
+      if( str[ipos] == '(' ){  /* decode an integer */
+         ipos++ ;
+         istep = strtoll( str+ipos , &cpt , 10 ) ;
+         if( istep == 0 ){
+           Rc_fprintf_stderr("** NIFTI ERROR: index loop step is 0!\n") ;
+           free(subv) ; return NULL ;
+         }
+         nused = (cpt-(str+ipos)) ;
+         ipos += nused ;
+         if( str[ipos] == ')' ) ipos++ ;
+         if( (ibot-itop)*istep > 0 ){
+        Rc_fprintf_stderr("** NIFTI WARNING: index list '%" PRId64 "..%" PRId64
+                "(%" PRId64 ")' means nothing\n",
+                ibot,itop,istep ) ;
+         }
+      }
+
+      /**- add values to output */
+
+      for( ii=ibot ; (ii-itop)*istep <= 0 ; ii += istep ){
+         nout++ ;
+         subv_realloc = (int64_t *)realloc( (char *)subv , sizeof(int64_t)*(nout+1) ) ;
+         if( !subv_realloc ) {
+            free(subv);
+            Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %" PRId64
+                    " ints\n", nout+1);
+            return NULL;
+         }
+         subv = subv_realloc;
+         subv[0]    = nout ;
+         subv[nout] = ii ;
+      }
+
+      /**- check if we have a comma to skip over */
+
+      while( isspace((int) str[ipos]) ) ipos++ ;            /* skip blanks */
+      if( str[ipos] == ',' ) ipos++ ;                       /* skip commas */
+
+   }  /* end of loop through selector string */
+
+   if( g_opts.debug > 1 ) {
+      Rc_fprintf_stderr("+d int_list (vals = %" PRId64 "): ", subv[0]);
+      for( ii = 1; ii <= subv[0]; ii++ )
+         Rc_fprintf_stderr("%" PRId64 " ", subv[ii]);
+      Rc_fputc_stderr('\n');
+   }
+
+   if( subv[0] == 0 ){ free(subv); subv = NULL; }
+   return subv ;
+}
+
+/*! a 32-bit version of nifti_get_int64list */
+int * nifti_get_intlist( int nvals , const char * str )
+{
+   int     *ilist=NULL;
+   int64_t *i64list=NULL, nints, index;
+
+   i64list = nifti_get_int64list((int64_t)nvals, str);
+   if( !i64list ) return NULL;
+
+   /* check that the length is between 1 and INT_MAX */
+   nints = i64list[0];
+   if( nints <= 0 ) { free(i64list); return NULL; }
+
+   if( nints > INT_MAX ) {
+      Rc_fprintf_stderr("** nifti_get_intlist: %" PRId64
+              " ints is too long for 32-bits\n", nints);
+      free(i64list);
+      return NULL;
+   }
+
+   /* have a valid result, copy as ints */
+   ilist = (int *)malloc((nints+1) * sizeof(int));
+   if( !ilist ) {
+      Rc_fprintf_stderr("** nifti_get_intlist: failed to alloc %" PRId64 " ints\n",
+              nints);
+      free(i64list);
+      return NULL;
+   }
+
+   /* copy list, including length at index 0 */
+   for( index=0; index <= nints; index++ ) {
+      if( i64list[index] > INT_MAX ) {
+         Rc_fprintf_stderr("** nifti_get_intlist: value %" PRId64
+                 " too big for 32-bits\n",
+                 i64list[index]);
+         free(ilist);
+         free(i64list);
+         return NULL;
+      }
+      ilist[index] = (int)i64list[index];
+   }
+
+   free(i64list);
+
+   return ilist;
+}
+
+/*---------------------------------------------------------------------*/
+/*! Given a NIFTI_TYPE string, such as "NIFTI_TYPE_INT16", return the
+ *  corresponding integral type code.  The type code is the macro
+ *  value defined in nifti1.h.
+*//*-------------------------------------------------------------------*/
+int nifti_datatype_from_string( const char * name )
+{
+    int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
+    int c;
+
+    if( !name ) return DT_UNKNOWN;
+
+    for( c = tablen-1; c > 0; c-- )
+        if( !strcmp(name, nifti_type_list[c].name) )
+            break;
+
+    return nifti_type_list[c].type;
+}
+
+
+/*---------------------------------------------------------------------*/
+/*! Given a NIFTI_TYPE value, such as NIFTI_TYPE_INT16, return the
+ *  corresponding macro label as a string.  The dtype code is the
+ *  macro value defined in nifti1.h.
+*//*-------------------------------------------------------------------*/
+const char * nifti_datatype_to_string( int dtype )
+{
+    int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
+    int c;
+
+    for( c = tablen-1; c > 0; c-- )
+        if( nifti_type_list[c].type == dtype )
+            break;
+
+    return nifti_type_list[c].name;
+}
+
+
+/*---------------------------------------------------------------------*/
+/*! Determine whether dtype is a valid NIFTI_TYPE.
+ *
+ *  DT_UNKNOWN is considered invalid
+ *
+ *  The only difference 'for_nifti' makes is that DT_BINARY
+ *  should be invalid for a NIfTI dataset.
+*//*-------------------------------------------------------------------*/
+int nifti_datatype_is_valid( int dtype, int for_nifti )
+{
+    int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
+    int c;
+
+    /* special case */
+    if( for_nifti && dtype == DT_BINARY ) return 0;
+
+    for( c = tablen-1; c > 0; c-- )
+        if( nifti_type_list[c].type == dtype )
+            return 1;
+
+    return 0;
+}
+
+
+/*---------------------------------------------------------------------*/
+/*! Only as a test, verify that the new nifti_type_list table matches
+ *  the the usage of nifti_datatype_sizes (which could be changed to
+ *  use the table, if there were interest).
+ *
+ *  return the number of errors (so 0 is success, as usual)
+*//*-------------------------------------------------------------------*/
+int nifti_test_datatype_sizes(int verb)
+{
+    int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
+    int nbyper, ssize;
+    int c, errs = 0;
+
+    for( c = 0; c < tablen; c++ )
+    {
+        nbyper = ssize = -1;
+        nifti_datatype_sizes(nifti_type_list[c].type, &nbyper, &ssize);
+        if( nbyper < 0 || ssize < 0 ||
+                nbyper != nifti_type_list[c].nbyper ||
+                ssize != nifti_type_list[c].swapsize )
+        {
+            if( verb || g_opts.debug > 2 )
+                Rc_fprintf_stderr("** NIFTI type mismatch: "
+                    "%s, %d, %d, %d : %d, %d\n",
+                    nifti_type_list[c].name, nifti_type_list[c].type,
+                    nifti_type_list[c].nbyper, nifti_type_list[c].swapsize,
+                    nbyper, ssize);
+            errs++;
+        }
+    }
+
+    if( errs )
+        Rc_fprintf_stderr("** nifti_test_datatype_sizes: found %d errors\n",errs);
+    else if( verb || g_opts.debug > 1 )
+        Rc_fprintf_stderr("-- nifti_test_datatype_sizes: all OK\n");
+
+    return errs;
+}
+
+
+/*---------------------------------------------------------------------*/
+/*! Display the nifti_type_list table.
+ *
+ *  if which == 1  : display DT_*
+ *  if which == 2  : display NIFTI_TYPE*
+ *  else           : display all
+*//*-------------------------------------------------------------------*/
+int nifti_disp_type_list( int which )
+{
+    const char * style;
+    int    tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele);
+    int    lwhich, c;
+
+    if     ( which == 1 ){ lwhich = 1; style = "DT_"; }
+    else if( which == 2 ){ lwhich = 2; style = "NIFTI_TYPE_"; }
+    else                 { lwhich = 3; style = "ALL"; }
+
+    Rc_printf("nifti_type_list entries (%s) :\n"
+           "  name                    type    nbyper    swapsize\n"
+           "  ---------------------   ----    ------    --------\n", style);
+
+    for( c = 0; c < tablen; c++ )
+        if( (lwhich & 1 && nifti_type_list[c].name[0] == 'D')  ||
+            (lwhich & 2 && nifti_type_list[c].name[0] == 'N')     )
+            Rc_printf("  %-22s %5d     %3d      %5d\n",
+                   nifti_type_list[c].name,
+                   nifti_type_list[c].type,
+                   nifti_type_list[c].nbyper,
+                   nifti_type_list[c].swapsize);
+
+    return 0;
+}
diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h
new file mode 100644
index 00000000..c0ed0cbd
--- /dev/null
+++ b/reg-io/niftilib/nifti2_io.h
@@ -0,0 +1,830 @@
+/** \file nifti2_io.h
+    \brief Data structures for using nifti2_io API.
+           - Written by Bob Cox, SSCC NIMH
+           - Revisions by Rick Reynolds, SSCC NIMH
+ */
+#ifndef _NIFTI2_IO_HEADER_
+#define _NIFTI2_IO_HEADER_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+#include <ctype.h>
+#include <inttypes.h>
+
+#ifndef DONT_INCLUDE_ANALYZE_STRUCT
+#define DONT_INCLUDE_ANALYZE_STRUCT  /*** not needed herein ***/
+#endif
+#include "niftilib/nifti1.h"         /*** NIFTI-1 header specification ***/
+#include "niftilib/nifti2.h"         /*** NIFTI-2 header specification ***/
+
+#ifndef RNIFTI_NIFTILIB_VERSION
+#define RNIFTI_NIFTILIB_VERSION 2
+#endif
+
+#include "RNifti/NiftiImage_print.h"
+#include <znzlib/znzlib.h>
+
+/*=================*/
+#ifdef  __cplusplus
+extern "C" {
+#endif
+/*=================*/
+
+/*****===================================================================*****/
+/*****         File nifti2_io.h == Declarations for nifti2_io.c          *****/
+/*****...................................................................*****/
+/*****            This code is a modification of nifti1_io.h.            *****/
+/*****...................................................................*****/
+/*****            This code is released to the public domain.            *****/
+/*****...................................................................*****/
+/*****  Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH          *****/
+/*****  Date:   August 2003                                              *****/
+/*****...................................................................*****/
+/*****  Neither the National Institutes of Health (NIH), nor any of its  *****/
+/*****  employees imply any warranty of usefulness of this software for  *****/
+/*****  any purpose, and do not assume any liability for damages,        *****/
+/*****  incidental or otherwise, caused by any use of this document.     *****/
+/*****===================================================================*****/
+
+/* ......................................................................
+   Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK)
+   Date: July/August 2004
+
+      Mainly adding low-level IO and changing things to allow gzipped files
+      to be read and written
+      Full backwards compatability should have been maintained
+
+   ......................................................................
+   Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
+   Date: December 2004
+
+      Modified and added many routines for I/O, particularly involving
+      extensions and nifti_brick_list.
+
+   ......................................................................
+   Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
+   Date: August 2013
+
+      Converted to be based on nifti_2_header.
+
+      ** NOT BACKWARD COMPATABLE **
+
+      These routines will read/write both NIFTI-1 and NIFTI-2 image files,
+      but modification to the _calling_ routies is necessary, since:
+
+        a. the main nifti_image type has changed (to nifti2_image)
+        b. some image field types have been altered (to have larger size)
+        c. some routines have been changed to apply to multiple NIFTI types
+*/
+
+/********************** Some sample data structures **************************/
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+typedef struct {                   /** 4x4 matrix struct **/
+  float m[4][4] ;
+} mat44 ;
+
+typedef struct {                   /** 3x3 matrix struct **/
+  float m[3][3] ;
+} mat33 ;
+#endif
+
+typedef struct {                   /** 4x4 matrix struct (double) **/
+  double m[4][4] ;
+} nifti_dmat44 ;
+
+typedef struct {                   /** 3x3 matrix struct (double) **/
+  double m[3][3] ;
+} nifti_dmat33 ;
+
+/*...........................................................................*/
+
+/*! \enum analyze_75_orient_code
+ *  \brief Old-style analyze75 orientation
+ *         codes.
+ */
+#if RNIFTI_NIFTILIB_VERSION == 2
+typedef enum _analyze75_orient_code {
+  a75_transverse_unflipped = 0,
+  a75_coronal_unflipped = 1,
+  a75_sagittal_unflipped = 2,
+  a75_transverse_flipped = 3,
+  a75_coronal_flipped = 4,
+  a75_sagittal_flipped = 5,
+  a75_orient_unknown = 6
+} analyze_75_orient_code;
+#endif
+
+/*! \struct nifti_image
+    \brief High level data structure for open nifti datasets in the
+           nifti2_io API.  Note that this structure is not part of the
+           nifti2 format definition; it is used to implement one API
+           for reading/writing datasets in the nifti1 or nifti2 formats.
+
+    Field types changed for NIFTI-2 (note: ALL floats to doubles):
+        nx, ny, ..., nw, dim, nvox,
+        dx, dy, ..., dw, pixdim,
+        scl_slope, scl_inter, cal_min, cal_max,
+        slice_start, slice_end, slice_duration,
+        quatern_b,c,d, qoffset_x,y,z, qfac,
+        qto_xyz,ijk, sto_xyz,ijk,
+        toffset, intent_p1,2,3, iname_offset
+ */
+typedef struct {                /*!< Image storage struct **/
+
+  int64_t ndim ;                /*!< last dimension greater than 1 (1..7) */
+  int64_t nx ;                  /*!< dimensions of grid array             */
+  int64_t ny ;                  /*!< dimensions of grid array             */
+  int64_t nz ;                  /*!< dimensions of grid array             */
+  int64_t nt ;                  /*!< dimensions of grid array             */
+  int64_t nu ;                  /*!< dimensions of grid array             */
+  int64_t nv ;                  /*!< dimensions of grid array             */
+  int64_t nw ;                  /*!< dimensions of grid array             */
+  int64_t dim[8] ;              /*!< dim[0]=ndim, dim[1]=nx, etc.         */
+  int64_t nvox ;                /*!< number of voxels = nx*ny*nz*...*nw   */
+  int nbyper ;                  /*!< bytes per voxel, matches datatype    */
+  int datatype ;                /*!< type of data in voxels: DT_* code    */
+
+  double dx ;                   /*!< grid spacings      */
+  double dy ;                   /*!< grid spacings      */
+  double dz ;                   /*!< grid spacings      */
+  double dt ;                   /*!< grid spacings      */
+  double du ;                   /*!< grid spacings      */
+  double dv ;                   /*!< grid spacings      */
+  double dw ;                   /*!< grid spacings      */
+  double pixdim[8] ;            /*!< pixdim[1]=dx, etc. */
+
+  double scl_slope ;            /*!< scaling parameter - slope        */
+  double scl_inter ;            /*!< scaling parameter - intercept    */
+
+  double cal_min ;              /*!< calibration parameter, minimum   */
+  double cal_max ;              /*!< calibration parameter, maximum   */
+
+  int qform_code ;              /*!< codes for (x,y,z) space meaning  */
+  int sform_code ;              /*!< codes for (x,y,z) space meaning  */
+
+  int freq_dim  ;               /*!< indexes (1,2,3, or 0) for MRI    */
+  int phase_dim ;               /*!< directions in dim[]/pixdim[]     */
+  int slice_dim ;               /*!< directions in dim[]/pixdim[]     */
+
+  int     slice_code  ;         /*!< code for slice timing pattern    */
+  int64_t slice_start ;         /*!< index for start of slices        */
+  int64_t slice_end   ;         /*!< index for end of slices          */
+  double  slice_duration ;      /*!< time between individual slices   */
+
+  /*! quaternion transform parameters
+    [when writing a dataset, these are used for qform, NOT qto_xyz]   */
+  double quatern_b , quatern_c , quatern_d ,
+         qoffset_x , qoffset_y , qoffset_z ,
+         qfac      ;
+
+  nifti_dmat44 qto_xyz ;        /*!< qform: transform (i,j,k) to (x,y,z) */
+  nifti_dmat44 qto_ijk ;        /*!< qform: transform (x,y,z) to (i,j,k) */
+
+  nifti_dmat44 sto_xyz ;        /*!< sform: transform (i,j,k) to (x,y,z) */
+  nifti_dmat44 sto_ijk ;        /*!< sform: transform (x,y,z) to (i,j,k) */
+
+  double toffset ;              /*!< time coordinate offset */
+
+  int xyz_units  ;              /*!< dx,dy,dz units: NIFTI_UNITS_* code  */
+  int time_units ;              /*!< dt       units: NIFTI_UNITS_* code  */
+
+  int nifti_type ;              /*!< see NIFTI_FTYPE_* codes, below:
+                                        0==ANALYZE,
+                                        1==NIFTI-1     (1 file),
+                                        2==NIFTI-1     (2 files),
+                                        3==NIFTI-ASCII (1 file)
+                                        4==NIFTI-2     (1 file),
+                                        5==NIFTI-2     (2 files) */
+
+  int    intent_code ;          /*!< statistic type (or something)       */
+  double intent_p1 ;            /*!< intent parameters                   */
+  double intent_p2 ;            /*!< intent parameters                   */
+  double intent_p3 ;            /*!< intent parameters                   */
+  char   intent_name[16] ;      /*!< optional description of intent data */
+
+  char descrip[80]  ;           /*!< optional text to describe dataset   */
+  char aux_file[24] ;           /*!< auxiliary filename                  */
+
+  char *fname ;                 /*!< header filename (.hdr or .nii)         */
+  char *iname ;                 /*!< image filename  (.img or .nii)         */
+  int64_t iname_offset ;        /*!< offset into iname where data starts    */
+  int   swapsize ;              /*!< swap unit in image data (might be 0)   */
+  int   byteorder ;             /*!< byte order on disk (MSB_ or LSB_FIRST) */
+  void *data ;                  /*!< pointer to data: nbyper*nvox bytes     */
+
+  int                num_ext ;  /*!< number of extensions in ext_list       */
+  nifti1_extension * ext_list ; /*!< array of extension structs (with data) */
+  analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */
+
+} nifti2_image ;
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+typedef struct {
+
+  int ndim ;                    /*!< last dimension greater than 1 (1..7) */
+  int nx ;                      /*!< dimensions of grid array             */
+  int ny ;                      /*!< dimensions of grid array             */
+  int nz ;                      /*!< dimensions of grid array             */
+  int nt ;                      /*!< dimensions of grid array             */
+  int nu ;                      /*!< dimensions of grid array             */
+  int nv ;                      /*!< dimensions of grid array             */
+  int nw ;                      /*!< dimensions of grid array             */
+  int dim[8] ;                  /*!< dim[0]=ndim, dim[1]=nx, etc.         */
+  size_t nvox ;                 /*!< number of voxels = nx*ny*nz*...*nw   */
+  int nbyper ;                  /*!< bytes per voxel, matches datatype    */
+  int datatype ;                /*!< type of data in voxels: DT_* code    */
+
+  float dx ;                    /*!< grid spacings      */
+  float dy ;                    /*!< grid spacings      */
+  float dz ;                    /*!< grid spacings      */
+  float dt ;                    /*!< grid spacings      */
+  float du ;                    /*!< grid spacings      */
+  float dv ;                    /*!< grid spacings      */
+  float dw ;                    /*!< grid spacings      */
+  float pixdim[8] ;             /*!< pixdim[1]=dx, etc. */
+
+  float scl_slope ;             /*!< scaling parameter - slope        */
+  float scl_inter ;             /*!< scaling parameter - intercept    */
+
+  float cal_min ;               /*!< calibration parameter, minimum   */
+  float cal_max ;               /*!< calibration parameter, maximum   */
+
+  int qform_code ;              /*!< codes for (x,y,z) space meaning  */
+  int sform_code ;              /*!< codes for (x,y,z) space meaning  */
+
+  int freq_dim  ;               /*!< indexes (1,2,3, or 0) for MRI    */
+  int phase_dim ;               /*!< directions in dim[]/pixdim[]     */
+  int slice_dim ;               /*!< directions in dim[]/pixdim[]     */
+
+  int   slice_code  ;           /*!< code for slice timing pattern    */
+  int   slice_start ;           /*!< index for start of slices        */
+  int   slice_end   ;           /*!< index for end of slices          */
+  float slice_duration ;        /*!< time between individual slices   */
+
+  /*! quaternion transform parameters
+    [when writing a dataset, these are used for qform, NOT qto_xyz]   */
+  float quatern_b , quatern_c , quatern_d ,
+        qoffset_x , qoffset_y , qoffset_z ,
+        qfac      ;
+
+  mat44 qto_xyz ;               /*!< qform: transform (i,j,k) to (x,y,z) */
+  mat44 qto_ijk ;               /*!< qform: transform (x,y,z) to (i,j,k) */
+
+  mat44 sto_xyz ;               /*!< sform: transform (i,j,k) to (x,y,z) */
+  mat44 sto_ijk ;               /*!< sform: transform (x,y,z) to (i,j,k) */
+
+  float toffset ;               /*!< time coordinate offset */
+
+  int xyz_units  ;              /*!< dx,dy,dz units: NIFTI_UNITS_* code  */
+  int time_units ;              /*!< dt       units: NIFTI_UNITS_* code  */
+
+  int nifti_type ;              /*!< 0==ANALYZE, 1==NIFTI-1 (1 file),
+                                                 2==NIFTI-1 (2 files),
+                                                 3==NIFTI-ASCII (1 file) */
+  int   intent_code ;           /*!< statistic type (or something)       */
+  float intent_p1 ;             /*!< intent parameters                   */
+  float intent_p2 ;             /*!< intent parameters                   */
+  float intent_p3 ;             /*!< intent parameters                   */
+  char  intent_name[16] ;       /*!< optional description of intent data */
+
+  char descrip[80]  ;           /*!< optional text to describe dataset   */
+  char aux_file[24] ;           /*!< auxiliary filename                  */
+
+  char *fname ;                 /*!< header filename (.hdr or .nii)         */
+  char *iname ;                 /*!< image filename  (.img or .nii)         */
+  int   iname_offset ;          /*!< offset into iname where data starts    */
+  int   swapsize ;              /*!< swap unit in image data (might be 0)   */
+  int   byteorder ;             /*!< byte order on disk (MSB_ or LSB_FIRST) */
+  void *data ;                  /*!< pointer to data: nbyper*nvox bytes     */
+
+  int                num_ext ;  /*!< number of extensions in ext_list       */
+  nifti1_extension * ext_list ; /*!< array of extension structs (with data) */
+  analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */
+
+} nifti1_image ;
+#endif
+
+/* struct for return from nifti_image_read_bricks() */
+typedef struct {
+  int64_t   nbricks;    /* the number of allocated pointers in 'bricks' */
+  int64_t   bsize;      /* the length of each data block, in bytes      */
+  void   ** bricks;     /* array of pointers to data blocks             */
+} nifti2_brick_list;
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+typedef nifti2_image        nifti_image;
+typedef nifti2_brick_list   nifti_brick_list;
+#endif
+
+/*****************************************************************************/
+/*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/
+
+/* (based on fsliolib/dbh.h, but updated for version 7.5) */
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+typedef struct {
+       /* header info fields - describes the header    overlap with NIfTI */
+       /*                                              ------------------ */
+       int sizeof_hdr;                  /* 0 + 4        same              */
+       char data_type[10];              /* 4 + 10       same              */
+       char db_name[18];                /* 14 + 18      same              */
+       int extents;                     /* 32 + 4       same              */
+       short int session_error;         /* 36 + 2       same              */
+       char regular;                    /* 38 + 1       same              */
+       char hkey_un0;                   /* 39 + 1                40 bytes */
+
+       /* image dimension fields - describes image sizes */
+       short int dim[8];                /* 0 + 16       same              */
+       short int unused8;               /* 16 + 2       intent_p1...      */
+       short int unused9;               /* 18 + 2         ...             */
+       short int unused10;              /* 20 + 2       intent_p2...      */
+       short int unused11;              /* 22 + 2         ...             */
+       short int unused12;              /* 24 + 2       intent_p3...      */
+       short int unused13;              /* 26 + 2         ...             */
+       short int unused14;              /* 28 + 2       intent_code       */
+       short int datatype;              /* 30 + 2       same              */
+       short int bitpix;                /* 32 + 2       same              */
+       short int dim_un0;               /* 34 + 2       slice_start       */
+       float pixdim[8];                 /* 36 + 32      same              */
+
+       float vox_offset;                /* 68 + 4       same              */
+       float funused1;                  /* 72 + 4       scl_slope         */
+       float funused2;                  /* 76 + 4       scl_inter         */
+       float funused3;                  /* 80 + 4       slice_end,        */
+                                                     /* slice_code,       */
+                                                     /* xyzt_units        */
+       float cal_max;                   /* 84 + 4       same              */
+       float cal_min;                   /* 88 + 4       same              */
+       float compressed;                /* 92 + 4       slice_duration    */
+       float verified;                  /* 96 + 4       toffset           */
+       int glmax,glmin;                 /* 100 + 8              108 bytes */
+
+       /* data history fields - optional */
+       char descrip[80];                /* 0 + 80       same              */
+       char aux_file[24];               /* 80 + 24      same              */
+       char orient;                     /* 104 + 1      NO GOOD OVERLAP   */
+       char originator[10];             /* 105 + 10     FROM HERE DOWN... */
+       char generated[10];              /* 115 + 10                       */
+       char scannum[10];                /* 125 + 10                       */
+       char patient_id[10];             /* 135 + 10                       */
+       char exp_date[10];               /* 145 + 10                       */
+       char exp_time[10];               /* 155 + 10                       */
+       char hist_un0[3];                /* 165 + 3                        */
+       int views;                       /* 168 + 4                        */
+       int vols_added;                  /* 172 + 4                        */
+       int start_field;                 /* 176 + 4                        */
+       int field_skip;                  /* 180 + 4                        */
+       int omax, omin;                  /* 184 + 8                        */
+       int smax, smin;                  /* 192 + 8              200 bytes */
+} nifti_analyze75;                                   /* total:  348 bytes */
+#endif
+
+/*****************************************************************************/
+/*--------------- Prototypes of functions defined in this file --------------*/
+
+char const * nifti_datatype_string   ( int dt ) ;
+char const *nifti_units_string      ( int uu ) ;
+char const *nifti_intent_string     ( int ii ) ;
+char const *nifti_xform_string      ( int xx ) ;
+char const *nifti_slice_string      ( int ss ) ;
+char const *nifti_orientation_string( int ii ) ;
+
+int   nifti_is_inttype( int dt ) ;
+
+mat44        nifti_mat44_inverse ( mat44 R ) ;
+mat44        nifti_mat44_mul      ( mat44 A , mat44 B );
+nifti_dmat44 nifti_dmat44_inverse( nifti_dmat44 R ) ;
+int          nifti_mat44_to_dmat44(mat44 * fm, nifti_dmat44 * dm);
+int          nifti_dmat44_to_mat44(nifti_dmat44 * dm, mat44 * fm);
+nifti_dmat44 nifti_dmat44_mul     ( nifti_dmat44 A , nifti_dmat44 B );
+
+
+
+nifti_dmat33 nifti_dmat33_inverse( nifti_dmat33 R ) ;
+nifti_dmat33 nifti_dmat33_polar  ( nifti_dmat33 A ) ;
+double       nifti_dmat33_rownorm( nifti_dmat33 A ) ;
+double       nifti_dmat33_colnorm( nifti_dmat33 A ) ;
+double       nifti_dmat33_determ ( nifti_dmat33 R ) ;
+nifti_dmat33 nifti_dmat33_mul    ( nifti_dmat33 A , nifti_dmat33 B ) ;
+
+mat33 nifti_mat33_inverse( mat33 R ) ;
+mat33 nifti_mat33_polar  ( mat33 A ) ;
+float nifti_mat33_rownorm( mat33 A ) ;
+float nifti_mat33_colnorm( mat33 A ) ;
+float nifti_mat33_determ ( mat33 R ) ;
+mat33 nifti_mat33_mul    ( mat33 A , mat33 B ) ;
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+void  nifti_swap_2bytes ( int64_t n , void *ar ) ;
+void  nifti_swap_4bytes ( int64_t n , void *ar ) ;
+void  nifti_swap_8bytes ( int64_t n , void *ar ) ;
+void  nifti_swap_16bytes( int64_t n , void *ar ) ;
+void  nifti_swap_Nbytes ( int64_t n , int siz , void *ar ) ;
+#endif
+
+int    nifti_datatype_is_valid       (int dtype, int for_nifti);
+int    nifti_datatype_from_string    (const char * name);
+const char * nifti_datatype_to_string(int dtype);
+int    nifti_header_version          (const char * buf, size_t nbytes);
+
+int64_t nifti2_get_filesize( const char *pathname ) ;
+#if RNIFTI_NIFTILIB_VERSION == 2
+void  swap_nifti_header ( void * hdr , int ni_ver ) ;
+#endif
+void  old_swap_nifti_header( struct nifti_1_header *h , int is_nifti );
+#if RNIFTI_NIFTILIB_VERSION == 2
+void  nifti_swap_as_analyze( nifti_analyze75 *h );
+#endif
+void  nifti_swap_as_nifti1( nifti_1_header *h );
+void  nifti_swap_as_nifti2( nifti_2_header *h );
+
+
+/* main read/write routines */
+
+nifti_image *nifti2_image_read_bricks(const char *hname , int64_t nbricks,
+                               const int64_t *blist, nifti_brick_list * NBL);
+int          nifti2_image_load_bricks(nifti_image *nim , int64_t nbricks,
+                               const int64_t *blist, nifti_brick_list * NBL);
+void         nifti2_free_NBL( nifti_brick_list * NBL );
+
+nifti_image *nifti2_image_read    ( const char *hname , int read_data);
+int          nifti2_image_load    ( nifti_image *nim);
+void         nifti2_image_unload  ( nifti_image *nim);
+void         nifti2_image_free    ( nifti_image *nim);
+
+int64_t      nifti2_read_collapsed_image( nifti_image * nim,
+                                         const int64_t dims[8], void ** data);
+
+int64_t      nifti2_read_subregion_image(nifti_image *nim, const int64_t *start_index,
+                                        const int64_t *region_size, void ** data);
+
+void         nifti2_image_write   ( nifti_image * nim ) ;
+void         nifti2_image_write_bricks(nifti_image * nim,
+                                      const nifti_brick_list * NBL);
+void         nifti2_image_infodump( const nifti_image * nim ) ;
+
+void         nifti2_disp_lib_hist( int ver ) ;  /* to display library history */
+void         nifti_disp_lib_version( void ) ;  /* to display library version */
+int          nifti2_disp_matrix_orient( const char * mesg, nifti_dmat44 mat );
+int          nifti_disp_type_list( int which );
+
+
+char *       nifti2_image_to_ascii  ( const nifti_image * nim ) ;
+nifti_image *nifti2_image_from_ascii( const char * str, int * bytes_read ) ;
+
+int64_t      nifti2_get_volsize(const nifti_image *nim) ;
+
+/* basic file operations */
+int    nifti2_set_filenames(nifti_image * nim, const char * prefix, int check,
+                           int set_byte_order);
+char * nifti_makehdrname  (const char * prefix, int nifti_type, int check,
+                           int comp);
+char * nifti_makeimgname  (const char * prefix, int nifti_type, int check,
+                           int comp);
+int    is_nifti_file      (const char *hname);
+char * nifti_find_file_extension(const char * name);
+int    nifti_is_complete_filename(const char* fname);
+int    nifti_validfilename(const char* fname);
+
+
+int    disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ;
+int    disp_nifti_2_header( const char * info, const nifti_2_header * hp ) ;
+void   nifti_set_debug_level( int level ) ;
+void   nifti_set_skip_blank_ext( int skip ) ;
+void   nifti_set_allow_upper_fext( int allow ) ;
+int    nifti_get_alter_cifti( void );
+void   nifti_set_alter_cifti( int alter_cifti );
+
+int    nifti_alter_cifti_dims(nifti_image * nim);
+
+
+int    valid_nifti2_brick_list(nifti_image * nim , int64_t nbricks,
+                              const int64_t * blist, int disp_error);
+
+/* znzFile operations */
+znzFile nifti2_image_open(const char * hname, char * opts, nifti_image ** nim);
+znzFile nifti2_image_write_hdr_img(nifti_image *nim, int write_data,
+                                  const char* opts);
+znzFile nifti2_image_write_hdr_img2( nifti_image *nim , int write_opts ,
+               const char* opts, znzFile imgfile, const nifti_brick_list * NBL);
+int64_t nifti2_read_buffer(znzFile fp, void* dataptr, int64_t ntot,
+                         nifti_image *nim);
+int     nifti2_write_all_data(znzFile fp, nifti_image * nim,
+                             const nifti_brick_list * NBL);
+int64_t  nifti2_write_buffer(znzFile fp, const void * buffer, int64_t numbytes);
+nifti_image *nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
+                         int read_data);
+znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
+                         const char * opts, int write_data, int leave_open);
+
+
+void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ;
+
+void nifti_dmat44_to_quatern(nifti_dmat44 R ,
+                             double *qb, double *qc, double *qd,
+                             double *qx, double *qy, double *qz,
+                             double *dx, double *dy, double *dz, double *qfac);
+
+nifti_dmat44 nifti_quatern_to_dmat44( double qb, double qc, double qd,
+                             double qx, double qy, double qz,
+                             double dx, double dy, double dz, double qfac );
+
+nifti_dmat44 nifti_make_orthog_dmat44( double r11, double r12, double r13 ,
+                                 double r21, double r22, double r23 ,
+                                 double r31, double r32, double r33  ) ;
+
+void nifti_mat44_to_quatern( mat44 R ,
+                             float *qb, float *qc, float *qd,
+                             float *qx, float *qy, float *qz,
+                             float *dx, float *dy, float *dz, float *qfac ) ;
+
+mat44 nifti_quatern_to_mat44( float qb, float qc, float qd,
+                              float qx, float qy, float qz,
+                              float dx, float dy, float dz, float qfac );
+
+mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 ,
+                               float r21, float r22, float r23 ,
+                               float r31, float r32, float r33  ) ;
+
+int nifti_short_order(void) ;              /* CPU byte order */
+
+
+/* Orientation codes that might be returned from nifti_mat44_to_orientation().*/
+
+#define NIFTI_L2R  1    /* Left to Right         */
+#define NIFTI_R2L  2    /* Right to Left         */
+#define NIFTI_P2A  3    /* Posterior to Anterior */
+#define NIFTI_A2P  4    /* Anterior to Posterior */
+#define NIFTI_I2S  5    /* Inferior to Superior  */
+#define NIFTI_S2I  6    /* Superior to Inferior  */
+
+void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ;
+void nifti_dmat44_to_orientation( nifti_dmat44 R,
+                                  int *icod, int *jcod, int *kcod ) ;
+
+/*--------------------- Low level IO routines ------------------------------*/
+
+char * nifti_findhdrname (const char* fname);
+char * nifti_findimgname (const char* fname , int nifti_type);
+int    nifti_is_gzfile   (const char* fname);
+
+char * nifti_makebasename(const char* fname);
+
+
+/* other routines */
+int   nifti_convert_nim2n1hdr(const nifti_image* nim, nifti_1_header * hdr);
+int   nifti_convert_nim2n2hdr(const nifti_image* nim, nifti_2_header * hdr);
+nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[], int arg_dtype);
+nifti_2_header * nifti_make_new_n2_header(const int64_t arg_dims[], int arg_dtype);
+void           * nifti2_read_header(const char *hname, int *nver,   int check);
+nifti_1_header * nifti_read_n1_hdr(const char *hname, int *swapped, int check);
+nifti_2_header * nifti_read_n2_hdr(const char *hname, int *swapped, int check);
+nifti_image    * nifti2_copy_nim_info(const nifti_image * src);
+nifti_image    * nifti2_make_new_nim(const int64_t dims[], int datatype,
+                                    int data_fill);
+
+
+nifti_image    * nifti2_simple_init_nim(void);
+nifti_image    * nifti_convert_n1hdr2nim(nifti_1_header nhdr,const char *fname);
+nifti_image    * nifti_convert_n2hdr2nim(nifti_2_header nhdr,const char *fname);
+
+int    nifti_looks_like_cifti(nifti_image * nim);
+
+int    nifti_hdr1_looks_good       (const nifti_1_header * hdr);
+int    nifti_hdr2_looks_good       (const nifti_2_header * hdr);
+int    nifti_is_valid_datatype     (int dtype);
+int    nifti_is_valid_ecode        (int ecode);
+int    nifti2_nim_is_valid         (nifti_image * nim, int complain);
+int    nifti2_nim_has_valid_dims   (nifti_image * nim, int complain);
+int    is_valid_nifti2_type        (int nifti_type);
+int    nifti_test_datatype_sizes   (int verb);
+int    nifti2_type_and_names_match  (nifti_image * nim, int show_warn);
+int    nifti2_update_dims_from_array(nifti_image * nim);
+void   nifti2_set_iname_offset      (nifti_image *nim, int nifti_ver);
+int    nifti2_set_type_from_names   (nifti_image * nim);
+int    nifti2_add_extension(nifti_image * nim, const char * data, int len,
+                           int ecode );
+int    nifti_compiled_with_zlib    (void);
+int    nifti2_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src);
+int    nifti2_free_extensions (nifti_image *nim);
+int64_t * nifti_get_int64list(int64_t nvals , const char *str);
+int     * nifti_get_intlist  (int nvals , const char *str);
+char * nifti_strdup          (const char *str);
+int    valid_nifti2_extensions(const nifti_image *nim);
+int    nifti_valid_header_size(int ni_ver, int whine);
+
+
+// Remap functions names that have NIfTI-2 variants
+#if (RNIFTI_NIFTILIB_VERSION == 2) && !defined(NO_REMAP_NIFTI2_FUNCTIONS)
+
+#define nifti_get_filesize              nifti2_get_filesize
+
+#define nifti_image_read_bricks         nifti2_image_read_bricks
+#define nifti_image_load_bricks         nifti2_image_load_bricks
+#define nifti_free_NBL                  nifti2_free_NBL
+
+#define nifti_image_read                nifti2_image_read
+#define nifti_image_load                nifti2_image_load
+#define nifti_image_unload              nifti2_image_unload
+#define nifti_image_free                nifti2_image_free
+
+#define nifti_read_collapsed_image      nifti2_read_collapsed_image
+#define nifti_read_subregion_image      nifti2_read_subregion_image
+
+#define nifti_image_write               nifti2_image_write
+#define nifti_image_write_bricks        nifti2_image_write_bricks
+#define nifti_image_infodump            nifti2_image_infodump
+
+#define nifti_disp_lib_hist             nifti2_disp_lib_hist
+#define nifti_disp_matrix_orient        nifti2_disp_matrix_orient
+#define nifti_image_to_ascii            nifti2_image_to_ascii
+#define nifti_image_from_ascii          nifti2_image_from_ascii
+
+#define nifti_get_volsize               nifti2_get_volsize
+
+#define nifti_set_filenames             nifti2_set_filenames
+#define valid_nifti_brick_list          valid_nifti2_brick_list
+#define nifti_image_open                nifti2_image_open
+#define nifti_image_write_hdr_img       nifti2_image_write_hdr_img
+#define nifti_image_write_hdr_img2      nifti2_image_write_hdr_img2
+#define nifti_read_buffer               nifti2_read_buffer
+#define nifti_write_all_data            nifti2_write_all_data
+#define nifti_write_buffer              nifti2_write_buffer
+#define nifti_read_ascii_image          nifti2_read_ascii_image
+#define nifti_write_ascii_image         nifti2_write_ascii_image
+
+#define nifti_read_header               nifti2_read_header
+#define nifti_copy_nim_info             nifti2_copy_nim_info
+#define nifti_make_new_nim              nifti2_make_new_nim
+#define nifti_simple_init_nim           nifti2_simple_init_nim
+
+#define nifti_nim_is_valid              nifti2_nim_is_valid
+#define nifti_nim_has_valid_dims        nifti2_nim_has_valid_dims
+#define is_valid_nifti_type             is_valid_nifti2_type
+#define nifti_type_and_names_match      nifti2_type_and_names_match
+#define nifti_update_dims_from_array    nifti2_update_dims_from_array
+#define nifti_set_iname_offset          nifti2_set_iname_offset
+#define nifti_set_type_from_names       nifti2_set_type_from_names
+#define nifti_add_extension             nifti2_add_extension
+#define nifti_copy_extensions           nifti2_copy_extensions
+#define nifti_free_extensions           nifti2_free_extensions
+#define valid_nifti_extensions          valid_nifti2_extensions
+
+#endif
+
+/*-------------------- Some C convenience macros ----------------------------*/
+
+/* NIfTI-1.1 extension codes:
+   see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */
+
+#define NIFTI_ECODE_IGNORE           0  /* changed from UNKNOWN, 29 June 2005 */
+
+#define NIFTI_ECODE_DICOM            2  /* intended for raw DICOM attributes  */
+
+#define NIFTI_ECODE_AFNI             4  /* Robert W Cox: rwcox@nih.gov
+                                           https://afni.nimh.nih.gov/afni     */
+
+#define NIFTI_ECODE_COMMENT          6  /* plain ASCII text only              */
+
+#define NIFTI_ECODE_XCEDE            8  /* David B Keator: dbkeator@uci.edu
+                                           http://www.nbirn.net/Resources
+                                                /Users/Applications/
+                                                /xcede/index.htm              */
+
+#define NIFTI_ECODE_JIMDIMINFO      10  /* Mark A Horsfield:
+                                           mah5@leicester.ac.uk
+                                           http://someplace/something         */
+
+#define NIFTI_ECODE_WORKFLOW_FWDS   12  /* Kate Fissell: fissell@pitt.edu
+                                           http://kraepelin.wpic.pitt.edu
+                                            /~fissell/NIFTI_ECODE_WORKFLOW_FWDS
+                                            /NIFTI_ECODE_WORKFLOW_FWDS.html   */
+
+#define NIFTI_ECODE_FREESURFER      14  /* http://surfer.nmr.mgh.harvard.edu  */
+
+#define NIFTI_ECODE_PYPICKLE        16  /* embedded Python objects
+                                           http://niftilib.sourceforge.net
+                                                 /pynifti                     */
+
+        /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */
+#define NIFTI_ECODE_MIND_IDENT      18  /* Vishal Patel: vishal.patel@ucla.edu*/
+#define NIFTI_ECODE_B_VALUE         20
+#define NIFTI_ECODE_SPHERICAL_DIRECTION 22
+#define NIFTI_ECODE_DT_COMPONENT    24
+#define NIFTI_ECODE_SHC_DEGREEORDER 26  /* end LONI MiND codes                */
+
+#define NIFTI_ECODE_VOXBO           28  /* Dan Kimberg: www.voxbo.org         */
+
+#define NIFTI_ECODE_CARET           30  /* John Harwell: john@brainvis.wustl.edu
+                                           http://brainvis.wustl.edu/wiki
+                                             /index.php/Caret:Documentation
+                                             :CaretNiftiExtension             */
+
+#define NIFTI_ECODE_CIFTI           32  /* CIFTI-2_Main_FINAL_1March2014.pdf */
+
+#define NIFTI_ECODE_VARIABLE_FRAME_TIMING 34
+
+/* 36 is currently unassigned, waiting on NIFTI_ECODE_AGILENT_PROCPAR */
+
+#define NIFTI_ECODE_EVAL            38  /* Munster University Hospital */
+
+/* http://www.mathworks.com/matlabcentral/fileexchange/42997-dicom-to-nifti-converter */
+#define NIFTI_ECODE_MATLAB          40  /* MATLAB extension */
+
+/* Quantiphyse extension
+   https://quantiphyse.readthedocs.io/en/latest/advanced/nifti_extension.html*/
+#define NIFTI_ECODE_QUANTIPHYSE     42  /* Quantiphyse extension */
+
+/* Magnetic Resonance Spectroscopy (MRS)
+   link to come... */
+#define NIFTI_ECODE_MRS             44  /* MRS extension */
+
+#define NIFTI_MAX_ECODE             44  /******* maximum extension code *******/
+
+/* nifti_type file codes */
+#if RNIFTI_NIFTILIB_VERSION == 2
+#define NIFTI_FTYPE_ANALYZE   0         /* old ANALYZE */
+#define NIFTI_FTYPE_NIFTI1_1  1         /* NIFTI-1     */
+#define NIFTI_FTYPE_NIFTI1_2  2
+#define NIFTI_FTYPE_ASCII     3
+#define NIFTI_FTYPE_NIFTI2_1  4         /* NIFTI-2     */
+#define NIFTI_FTYPE_NIFTI2_2  5
+#define NIFTI_MAX_FTYPE       5         /* this should match the maximum code */
+#endif
+
+/*------------------------------------------------------------------------*/
+/*-- the rest of these apply only to nifti2_io.c, check for _NIFTI2_IO_C_ */
+
+#ifdef _NIFTI2_IO_C_
+
+typedef struct {
+    int debug;               /*!< debug level for status reports  */
+    int skip_blank_ext;      /*!< skip extender if no extensions  */
+    int allow_upper_fext;    /*!< allow uppercase file extensions */
+    int alter_cifti;         /*!< convert CIFTI dimensions        */
+} nifti_global_options;
+
+typedef struct {
+    int    type;           /* should match the NIFTI_TYPE_ #define */
+    int    nbyper;         /* bytes per value, matches nifti_image */
+    int    swapsize;       /* bytes per swap piece, matches nifti_image */
+    char const * const name;           /* text string to match #define */
+} nifti_type_ele;
+
+#undef  LNI_FERR /* local nifti file error, to be compact and repetative */
+#ifdef USING_R
+#define LNI_FERR(func,msg,file)                                      \
+            Rf_warning("%s: %s '%s'\n",func,msg,file)
+#else
+#define LNI_FERR(func,msg,file)                                      \
+            Rc_fprintf_stderr("** ERROR (%s): %s '%s'\n",func,msg,file)
+#endif
+
+#undef  swap_2
+#undef  swap_4
+#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */
+#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */
+
+                        /***** isfinite() is a C99 macro, which is
+                               present in many C implementations already *****/
+
+#undef IS_GOOD_FLOAT
+#undef FIXED_FLOAT
+
+#ifdef isfinite       /* use isfinite() to check floats/doubles for goodness */
+#  define IS_GOOD_FLOAT(x) isfinite(x)       /* check if x is a "good" float */
+#  define FIXED_FLOAT(x)   (isfinite(x) ? (x) : 0)           /* fixed if bad */
+#else
+#  define IS_GOOD_FLOAT(x) 1                               /* don't check it */
+#  define FIXED_FLOAT(x)   (x)                               /* don't fix it */
+#endif
+
+#undef  ASSIF                                 /* assign v to *p, if possible */
+#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v)
+
+#undef  MSB_FIRST
+#undef  LSB_FIRST
+#undef  REVERSE_ORDER
+#define LSB_FIRST 1
+#define MSB_FIRST 2
+#define REVERSE_ORDER(x) (3-(x))    /* convert MSB_FIRST <--> LSB_FIRST */
+
+#define LNI_MAX_NIA_EXT_LEN 100000  /* consider a longer extension invalid */
+
+#undef NIFTI_IS_16_BIT_INT
+#define NIFTI_IS_16_BIT_INT(x) ((x) <= 32767 && (x) >= -32768)
+
+#endif  /* _NIFTI2_IO_C_ section */
+/*------------------------------------------------------------------------*/
+
+/*=================*/
+#ifdef  __cplusplus
+}
+#endif
+/*=================*/
+
+#endif /* _NIFTI2_IO_HEADER_ */
diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h
index 014f58c4..5caa648b 100644
--- a/reg-io/nrrd/reg_nrrd.h
+++ b/reg-io/nrrd/reg_nrrd.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include "NrrdIO.h"
 #include "_reg_tools.h"
 #include "_reg_maths.h"
diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h
index 900552f5..d6d2a543 100644
--- a/reg-io/png/reg_png.h
+++ b/reg-io/png/reg_png.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include "_reg_tools.h"
 
 /* *************************************************************** */
diff --git a/reg-io/zlib/CMakeLists.txt b/reg-io/zlib/CMakeLists.txt
index 4d0ce45a..ef827947 100644
--- a/reg-io/zlib/CMakeLists.txt
+++ b/reg-io/zlib/CMakeLists.txt
@@ -6,7 +6,6 @@ if(NOT ZLIB_FOUND)
             LIBRARY DESTINATION lib COMPONENT Development
             ARCHIVE DESTINATION lib COMPONENT Development
     )
-    install(FILES zlib.h zutil.h DESTINATION include COMPONENT Development)
     set(ZLIB_LIBRARY "z")
 endif(NOT ZLIB_FOUND)
 #-----------------------------------------------------------------------------
diff --git a/reg-io/znzlib/CMakeLists.txt b/reg-io/znzlib/CMakeLists.txt
new file mode 100644
index 00000000..0122d3e2
--- /dev/null
+++ b/reg-io/znzlib/CMakeLists.txt
@@ -0,0 +1,8 @@
+#-----------------------------------------------------------------------------
+add_library(znz znzlib.c)
+install(TARGETS znz
+    RUNTIME DESTINATION bin COMPONENT Development
+    LIBRARY DESTINATION lib COMPONENT Development
+    ARCHIVE DESTINATION lib COMPONENT Development
+)
+#-----------------------------------------------------------------------------
diff --git a/reg-io/nifti/znzlib.c b/reg-io/znzlib/znzlib.c
old mode 100755
new mode 100644
similarity index 91%
rename from reg-io/nifti/znzlib.c
rename to reg-io/znzlib/znzlib.c
index 7364568c..170a6065
--- a/reg-io/nifti/znzlib.c
+++ b/reg-io/znzlib/znzlib.c
@@ -16,12 +16,13 @@ are required:
    that specifies whether to use compression (1) or not (0)
  - use znz_isnull rather than any (pointer == NULL) comparisons in the code
    for znzfile types (normally done after a return from znzopen)
- 
+
 NB: seeks for writable files with compression are quite restricted
 
  */
 
-#include "znzlib.h"
+#include "znzlib/znzlib.h"
+#include "RNifti/NiftiImage_print.h"
 
 /*
 znzlib.c  (zipped or non-zipped library)
@@ -39,7 +40,7 @@ znzlib.c  (zipped or non-zipped library)
 */
 
 
-/* Note extra argument (use_compression) where 
+/* Note extra argument (use_compression) where
    use_compression==0 is no compression
    use_compression!=0 uses zlib (gzip) compression
 */
@@ -49,7 +50,7 @@ znzFile znzopen(const char *path, const char *mode, int use_compression)
   znzFile file;
   file = (znzFile) calloc(1,sizeof(struct znzptr));
   if( file == NULL ){
-     fprintf(stderr,"** ERROR: znzopen failed to alloc znzptr\n");
+     Rc_fprintf_stderr("** ERROR: znzopen failed to alloc znzptr\n");
      return NULL;
   }
 
@@ -80,13 +81,13 @@ znzFile znzopen(const char *path, const char *mode, int use_compression)
   return file;
 }
 
-
+#ifdef COMPILE_NIFTIUNUSED_CODE
 znzFile znzdopen(int fd, const char *mode, int use_compression)
 {
   znzFile file;
   file = (znzFile) calloc(1,sizeof(struct znzptr));
   if( file == NULL ){
-     fprintf(stderr,"** ERROR: znzdopen failed to alloc znzptr\n");
+     Rc_fprintf_stderr("** ERROR: znzdopen failed to alloc znzptr\n");
      return NULL;
   }
 #ifdef HAVE_ZLIB
@@ -106,6 +107,7 @@ znzFile znzdopen(int fd, const char *mode, int use_compression)
 #endif
   return file;
 }
+#endif
 
 
 int Xznzclose(znzFile * file)
@@ -116,7 +118,7 @@ int Xznzclose(znzFile * file)
     if ((*file)->zfptr!=NULL)  { retval = gzclose((*file)->zfptr); }
 #endif
     if ((*file)->nzfptr!=NULL) { retval = fclose((*file)->nzfptr); }
-                                                                                
+
     free(*file);
     *file = NULL;
   }
@@ -154,7 +156,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file)
 
     /* warn of a short read that will seem complete */
     if( remain > 0 && remain < size )
-       fprintf(stderr,"** znzread: read short by %u bytes\n",(unsigned)remain);
+       Rc_fprintf_stderr("** znzread: read short by %u bytes\n",(unsigned)remain);
 
     return nmemb - remain/size;   /* return number of members processed */
   }
@@ -165,7 +167,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file)
 size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file)
 {
   size_t     remain = size*nmemb;
-  char     * cbuf = (char *)buf;
+  const char * cbuf = (const char *)buf;
   unsigned   n2write;
   int        nwritten;
 
@@ -174,7 +176,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file)
   if (file->zfptr!=NULL) {
     while( remain > 0 ) {
        n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE;
-       nwritten = gzwrite(file->zfptr, (void *)cbuf, n2write);
+       nwritten = gzwrite(file->zfptr, (const void *)cbuf, n2write);
 
        /* gzread returns 0 on error, but in case that ever changes... */
        if( nwritten < 0 ) return nwritten;
@@ -188,7 +190,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file)
 
     /* warn of a short write that will seem complete */
     if( remain > 0 && remain < size )
-      fprintf(stderr,"** znzwrite: write short by %u bytes\n",(unsigned)remain);
+      Rc_fprintf_stderr("** znzwrite: write short by %u bytes\n",(unsigned)remain);
 
     return nmemb - remain/size;   /* return number of members processed */
   }
@@ -239,7 +241,7 @@ int znzputs(const char * str, znzFile file)
   return fputs(str,file->nzfptr);
 }
 
-
+#ifdef COMPILE_NIFTIUNUSED_CODE
 char * znzgets(char* str, int size, znzFile file)
 {
   if (file==NULL) { return NULL; }
@@ -303,13 +305,13 @@ int znzprintf(znzFile stream, const char *format, ...)
     size = strlen(format) + 1000000;  /* overkill I hope */
     tmpstr = (char *)calloc(1, size);
     if( tmpstr == NULL ){
-       fprintf(stderr,"** ERROR: znzprintf failed to alloc %d bytes\n", size);
+       Rc_fprintf_stderr("** ERROR: znzprintf failed to alloc %d bytes\n", size);
        return retval;
     }
-    vsprintf(tmpstr,format,va);
+    vsnprintf(tmpstr,size,format,va);
     retval=gzprintf(stream->zfptr,"%s",tmpstr);
     free(tmpstr);
-  } else 
+  } else
 #endif
   {
    retval=vfprintf(stream->nzfptr,format,va);
@@ -317,6 +319,6 @@ int znzprintf(znzFile stream, const char *format, ...)
   va_end(va);
   return retval;
 }
-
 #endif
 
+#endif
diff --git a/reg-io/nifti/znzlib.h b/reg-io/znzlib/znzlib.h
old mode 100755
new mode 100644
similarity index 54%
rename from reg-io/nifti/znzlib.h
rename to reg-io/znzlib/znzlib.h
index 6f2f2936..d17a8bc6
--- a/reg-io/nifti/znzlib.h
+++ b/reg-io/znzlib/znzlib.h
@@ -1,4 +1,5 @@
-#pragma once
+#ifndef _ZNZLIB_H_
+#define _ZNZLIB_H_
 
 /*
 znzlib.h  (zipped or non-zipped library)
@@ -39,83 +40,87 @@ NB: seeks for writable files with compression are quite restricted
 #ifdef  __cplusplus
 extern "C" {
 #endif
-   /*=================*/
+/*=================*/
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 
-   /* include optional check for HAVE_FDOPEN here, from deleted config.h:
+/* include optional check for HAVE_FDOPEN here, from deleted config.h:
 
-      uncomment the following line if fdopen() exists for your compiler and
-      compiler options
-   */
-   /* #define HAVE_FDOPEN */
+   uncomment the following line if fdopen() exists for your compiler and
+   compiler options
+*/
+/* #define HAVE_FDOPEN */
 
 
 #ifdef HAVE_ZLIB
-#if defined(ITKZLIB)
+#if defined(ITKZLIB) && !defined(ITK_USE_SYSTEM_ZLIB)
 #include "itk_zlib.h"
 #else
-#include "zlib.h"
+#include "zlib/zlib.h"
 #endif
 #endif
 
-
-   struct znzptr
-   {
-      int withz;
-      FILE* nzfptr;
+struct znzptr {
+  int withz;
+  FILE* nzfptr;
 #ifdef HAVE_ZLIB
-      gzFile zfptr;
+  gzFile zfptr;
 #endif
-   } ;
+} ;
 
-   /* the type for all file pointers */
-   typedef struct znzptr * znzFile;
+/* the type for all file pointers */
+typedef struct znzptr * znzFile;
 
 
-   /* int znz_isnull(znzFile f); */
-   /* int znzclose(znzFile f); */
+/* int znz_isnull(znzFile f); */
+/* int znzclose(znzFile f); */
 #define znz_isnull(f) ((f) == NULL)
 #define znzclose(f)   Xznzclose(&(f))
 
-   /* Note extra argument (use_compression) where
-      use_compression==0 is no compression
-      use_compression!=0 uses zlib (gzip) compression
-   */
+/* Note extra argument (use_compression) where
+   use_compression==0 is no compression
+   use_compression!=0 uses zlib (gzip) compression
+*/
 
-   znzFile znzopen(const char *path, const char *mode, int use_compression);
+znzFile znzopen(const char *path, const char *mode, int use_compression);
 
-   znzFile znzdopen(int fd, const char *mode, int use_compression);
+#ifdef COMPILE_NIFTIUNUSED_CODE
+znzFile znzdopen(int fd, const char *mode, int use_compression);
+#endif
 
-   int Xznzclose(znzFile * file);
+int Xznzclose(znzFile * file);
 
-   size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file);
+size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file);
 
-   size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file);
+size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file);
 
-   long znzseek(znzFile file, long offset, int whence);
+long znzseek(znzFile file, long offset, int whence);
 
-   int znzrewind(znzFile stream);
+int znzrewind(znzFile stream);
 
-   long znztell(znzFile file);
+long znztell(znzFile file);
 
-   int znzputs(const char *str, znzFile file);
+int znzputs(const char *str, znzFile file);
 
-   char * znzgets(char* str, int size, znzFile file);
+#ifdef COMPILE_NIFTIUNUSED_CODE
+char * znzgets(char* str, int size, znzFile file);
 
-   int znzputc(int c, znzFile file);
+int znzputc(int c, znzFile file);
 
-   int znzgetc(znzFile file);
+int znzgetc(znzFile file);
 
 #if !defined(WIN32)
-   int znzprintf(znzFile stream, const char *format, ...);
+int znzprintf(znzFile stream, const char *format, ...);
+#endif
 #endif
 
-   /*=================*/
+/*=================*/
 #ifdef  __cplusplus
 }
 #endif
 /*=================*/
+
+#endif
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index bc1be24b..995f1b2d 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Kernel.h"
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 class ConvolutionKernel: public Kernel {
 public:
diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h
index 16e3c133..83853cfc 100644
--- a/reg-lib/ResampleImageKernel.h
+++ b/reg-lib/ResampleImageKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Kernel.h"
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 class ResampleImageKernel: public Kernel {
 public:
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h
index 60686878..d923f5ed 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.h
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.h
@@ -2,7 +2,7 @@
 
 #include "BlockMatchingKernel.h"
 #include "_reg_blockMatching.h"
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include "AladinContent.h"
 
 class CpuBlockMatchingKernel: public BlockMatchingKernel {
diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h
index e8b27959..df9865b2 100644
--- a/reg-lib/cpu/CpuOptimiseKernel.h
+++ b/reg-lib/cpu/CpuOptimiseKernel.h
@@ -2,7 +2,7 @@
 
 #include "OptimiseKernel.h"
 #include "_reg_blockMatching.h"
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include "AladinContent.h"
 
 class CpuOptimiseKernel: public OptimiseKernel {
diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h
index 8ea483cb..103ede88 100644
--- a/reg-lib/cpu/_reg_femTrans.h
+++ b/reg-lib/cpu/_reg_femTrans.h
@@ -15,7 +15,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include <fstream>
 #include "_reg_maths.h"
 
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index 9d17b595..b2eeeb7e 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include "_reg_tools.h"
 /* *************************************************************** */
 /// @brief Structure that is used to store the distance between two corresponding voxel
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 2aa2ff61..6b612905 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -20,7 +20,7 @@
 #include <iostream>
 #include <vector>
 #include <stdexcept>
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 #ifdef _OPENMP
 #include <omp.h>
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 7bd48f42..5a44ef0b 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -2,7 +2,7 @@
 
 #include "_reg_maths_eigen.h"
 #include "_reg_maths.h"
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 // Eigen headers are in there because of the nvcc preprocessing step
 #include "Eigen/Core"
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
index 6288764c..8b3239cb 100644
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ b/reg-lib/cpu/_reg_maths_eigen.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 /* *************************************************************** */
 /* Functions calling the Eigen library                             */
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index f2945c33..3705e810 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -13,7 +13,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 /** @brief This function resample a floating image into the space of a reference/warped image.
  * The deformation is provided by a 4D nifti image which is in the space of the reference image.
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 46b02298..99782acc 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -9,7 +9,7 @@
 
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 #include <cuda_runtime.h>
 #include <cuda.h>
 
diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h
index 3a584814..a2455525 100644
--- a/reg-lib/cuda/affineDeformationKernel.h
+++ b/reg-lib/cuda/affineDeformationKernel.h
@@ -1,4 +1,4 @@
 #pragma once
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 //
 void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false);
\ No newline at end of file
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
index 19879dcc..8b76e56b 100644
--- a/reg-lib/cuda/optimizeKernel.h
+++ b/reg-lib/cuda/optimizeKernel.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 /*
 extern "C++"
diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h
index dfbce71b..3507d90b 100644
--- a/reg-lib/cuda/resampleKernel.h
+++ b/reg-lib/cuda/resampleKernel.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "nifti1_io.h"
+#include "niftilib/nifti1_io.h"
 
 void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis);
 void launchResample(nifti_image *floatingImage, nifti_image *warpedImage,  int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d);

From 414622bbaac57ac0dd8658316ce6e00e50c174a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 7 Mar 2023 15:21:28 +0000
Subject: [PATCH 073/314] Add NiftiImageData::Iterator::reset() to reset the
 iterator

---
 niftyreg_build_version.txt | 2 +-
 reg-io/RNifti/NiftiImage.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bc3d5444..9870ccc7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-186
+187
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 92183705..c8c4ea27 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -385,6 +385,11 @@ class NiftiImageData
         Iterator (const Iterator &other)
             : parent(other.parent), ptr(other.ptr), step(other.step) {}
 
+        /**
+         * Reset the iterator to point to the start of the data blob
+        **/
+        void reset () { ptr = parent.dataPtr; }
+
         Iterator & operator++ () { ptr = static_cast<char*>(ptr) + step; return *this; }
         Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast<char*>(ptr) + step; return copy; }
         Iterator operator+ (ptrdiff_t n) const

From c631dc085c7536ef87869f216cf19f5affecd78e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 7 Mar 2023 15:23:34 +0000
Subject: [PATCH 074/314] Add move constructor and assignment operator for
 NiftiImage

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 33 +++++++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9870ccc7..e702a30b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-187
+188
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index c8c4ea27..8a83be1a 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1370,6 +1370,16 @@ class NiftiImage
     void setPixunits (const std::vector<std::string> &pixunits);
 
 public:
+    /**
+     * Swap the contents of two \c NiftiImage objects
+    */
+    friend void swap (NiftiImage &first, NiftiImage &second)
+    {
+        using std::swap;
+        swap(first.image, second.image);
+        swap(first.refCount, second.refCount);
+    }
+
     /**
      * Default constructor
     **/
@@ -1394,6 +1404,19 @@ class NiftiImage
 #endif
     }
 
+    /**
+     * Move constructor
+     * @param source Another \c NiftiImage object
+    **/
+    NiftiImage (NiftiImage &&source)
+        : NiftiImage()
+    {
+        swap(*this, source);
+#ifndef NDEBUG
+        Rc_printf("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
+#endif
+    }
+
     /**
      * Initialise from a block, copying in the data
      * @param source A \c Block object, referring to part of another \c NiftiImage
@@ -1496,15 +1519,13 @@ class NiftiImage
     nifti_image * operator-> () { return image; }
 
     /**
-     * Copy assignment operator, which copies from its argument
+     * Copy and move assignment operator
      * @param source Another \c NiftiImage
+     * @note Uses copy-and-swap idiom (https://stackoverflow.com/questions/3279543/what-is-the-copy-and-swap-idiom/3279550#3279550)
     **/
-    NiftiImage & operator= (const NiftiImage &source)
+    NiftiImage & operator= (NiftiImage source)
     {
-        copy(source);
-#ifndef NDEBUG
-        Rc_printf("Creating NiftiImage (v%d), with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        swap(*this, source);
         return *this;
     }
 

From 4015cbfcf341477ef262231c2853386c475a0ff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 7 Mar 2023 15:30:39 +0000
Subject: [PATCH 075/314] Refactorise NiftiImage

---
 niftyreg_build_version.txt        |   2 +-
 reg-io/RNifti.h                   |  11 +-
 reg-io/RNifti/NiftiImage.h        | 166 ++++++++++++---------------
 reg-io/RNifti/NiftiImage_impl.h   | 182 ++++++++++++++----------------
 reg-io/RNifti/NiftiImage_matrix.h |   5 +-
 reg-io/RNifti/NiftiImage_print.h  |   5 +-
 reg-io/niftilib/nifti1.h          |   7 +-
 reg-io/niftilib/nifti1_io.h       |   5 +-
 reg-io/niftilib/nifti2.h          |   5 +-
 reg-io/niftilib/nifti2_image.h    |   5 +-
 reg-io/niftilib/nifti2_io.h       |   5 +-
 reg-io/znzlib/znzlib.h            |   5 +-
 12 files changed, 174 insertions(+), 229 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e702a30b..6c412452 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-188
+189
diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h
index 2327b601..121053e5 100644
--- a/reg-io/RNifti.h
+++ b/reg-io/RNifti.h
@@ -1,5 +1,4 @@
-#ifndef _RNIFTI_H_
-#define _RNIFTI_H_
+#pragma once
 
 // RNiftyReg and divest have used HAVE_R, so accept this variant for compatibility
 #if !defined(USING_R) && defined(HAVE_R)
@@ -12,7 +11,7 @@
 
 // Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some
 // work to get them to play nicely:
-// 
+//
 // - The compile-time constant RNIFTI_NIFTILIB_VERSION indicates which version of the library has
 //   precedence. nifti1_io.h sets this to 1, and nifti2.io.h to 2, so the first-included header
 //   wins unless the user sets a value explicitly.
@@ -23,9 +22,9 @@
 // - Library functions that are essentially the same in the two versions are fenced out of
 //   nifti1_io.c (if RNIFTI_NIFTILIB_DEDUPLICATE is defined), to avoid duplicate symbols in the
 //   compiled package library.
-// 
+//
 // There are therefore several possible modes of usage:
-// 
+//
 // 1. Standalone programs that include RNifti.h can *first* define RNIFTI_NIFTILIB_VERSION to
 //    choose the library version required (the default is 1). They should link against nifti1_io.o
 //    or nifti2_io.o, accordingly. (A mismatch will result in compiler/linker errors.) See the
@@ -60,5 +59,3 @@ extern void niftilib_register_all (void);
 #ifdef __cplusplus
 } // extern "C"
 #endif
-
-#endif
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 8a83be1a..5a714cbc 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1,6 +1,4 @@
-#ifndef _NIFTI_IMAGE_H_
-#define _NIFTI_IMAGE_H_
-
+#pragma once
 
 #ifdef USING_R
 
@@ -168,13 +166,13 @@ class NiftiImageData
 
     /**
      * Create a concrete type handler appropriate to the datatype code stored with the data
-     * @return The newly allocated type handler, or \c NULL
+     * @return The newly allocated type handler, or \c nullptr
      * @exception runtime_error If the current datatype is unsupported
     **/
     TypeHandler * createHandler ()
     {
         if (_datatype == DT_NONE)
-            return NULL;
+            return nullptr;
 
         switch (_datatype)
         {
@@ -206,13 +204,13 @@ class NiftiImageData
 
     /**
      * Initialiser method, used by constructors
-     * @param data Pointer to a preallocated data blob, or \c NULL
+     * @param data Pointer to a preallocated data blob, or \c nullptr
      * @param length Number of elements in the blob
      * @param datatype NIfTI datatype code appropriate to the blob
      * @param slope Slope parameter for scaling values
      * @param intercept Intercept parameter for scaling values
-     * @param alloc If \c true, the default, and \c data is \c NULL, memory will be allocated for
-     *   the blob. If \c false, the blob will be \c NULL in this case
+     * @param alloc If \c true, the default, and \c data is \c nullptr, memory will be allocated for
+     *   the blob. If \c false, the blob will be \c nullptr in this case
     **/
     void init (void *data, const size_t length, const int datatype, const double slope, const double intercept, const bool alloc = true)
     {
@@ -223,9 +221,9 @@ class NiftiImageData
 
         owner = false;
         handler = createHandler();
-        if (handler == NULL)
-            dataPtr = NULL;
-        else if (alloc && data == NULL)
+        if (handler == nullptr)
+            dataPtr = nullptr;
+        else if (alloc && data == nullptr)
         {
             dataPtr = calloc(length, handler->size());
             owner = true;
@@ -249,7 +247,7 @@ class NiftiImageData
         {
             double dataMin, dataMax, typeMin, typeMax;
             data.minmax(&dataMin, &dataMax);
-            handler->minmax(NULL, 0, &typeMin, &typeMax);
+            handler->minmax(nullptr, 0, &typeMin, &typeMax);
 
             // If the source type is floating-point but values are in range, we will just round them
             if (dataMin < typeMin || dataMax > typeMax)
@@ -274,13 +272,13 @@ class NiftiImageData
         /**
          * Primary constructor
          * @param parent A reference to the parent object
-         * @param ptr An opaque pointer to the element. If \c NULL, the start of the data blob
+         * @param ptr An opaque pointer to the element. If \c nullptr, the start of the data blob
          *   encapsulated by the parent will be used
         **/
-        Element (const NiftiImageData &parent, void *ptr = NULL)
+        Element (const NiftiImageData &parent, void *ptr = nullptr)
             : parent(parent)
         {
-            this->ptr = (ptr == NULL ? parent.dataPtr : ptr);
+            this->ptr = (ptr == nullptr ? parent.dataPtr : ptr);
         }
 
         /**
@@ -371,10 +369,10 @@ class NiftiImageData
          * @param step The increment between elements within the blob, in bytes. If zero, the
          *   default, the width associated with the stored datatype will be used.
         **/
-        Iterator (const NiftiImageData &parent, void *ptr = NULL, const size_t step = 0)
+        Iterator (const NiftiImageData &parent, void *ptr = nullptr, const size_t step = 0)
             : parent(parent)
         {
-            this->ptr = (ptr == NULL ? parent.dataPtr : ptr);
+            this->ptr = (ptr == nullptr ? parent.dataPtr : ptr);
             this->step = (step == 0 ? parent.handler->size() : step);
         }
 
@@ -426,11 +424,11 @@ class NiftiImageData
      * Default constructor, creating an empty data object
     **/
     NiftiImageData ()
-        : slope(1.0), intercept(0.0), dataPtr(NULL), _datatype(DT_NONE), handler(NULL), _length(0), owner(false) {}
+        : slope(1.0), intercept(0.0), dataPtr(nullptr), _datatype(DT_NONE), handler(nullptr), _length(0), owner(false) {}
 
     /**
      * Primary constructor
-     * @param data A pointer to a pre-allocated data blob, or \c NULL. In the latter case, memory
+     * @param data A pointer to a pre-allocated data blob, or \c nullptr. In the latter case, memory
      *   will be allocated by the object, and cleaned up at destruction unless it is disowned
      * @param length The number of elements in the blob
      * @param datatype The NIfTI datatype code corresponding to the type of the data elements
@@ -448,8 +446,8 @@ class NiftiImageData
     **/
     NiftiImageData (nifti_image *image)
     {
-        if (image == NULL)
-            init(NULL, 0, DT_NONE, 0.0, 0.0, false);
+        if (image == nullptr)
+            init(nullptr, 0, DT_NONE, 0.0, 0.0, false);
         else
             init(image->data, image->nvox, image->datatype, static_cast<double>(image->scl_slope), static_cast<double>(image->scl_inter), false);
     }
@@ -463,7 +461,7 @@ class NiftiImageData
     **/
     NiftiImageData (const NiftiImageData &source, const int datatype = DT_NONE)
     {
-        init(NULL, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept);
+        init(nullptr, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept);
 
         if (datatype == DT_NONE || datatype == source.datatype())
             memcpy(dataPtr, source.dataPtr, source.totalBytes());
@@ -485,7 +483,7 @@ class NiftiImageData
     NiftiImageData (InputIterator from, InputIterator to, const int datatype)
     {
         const size_t length = static_cast<size_t>(std::distance(from, to));
-        init(NULL, length, datatype, 1.0, 0.0);
+        init(nullptr, length, datatype, 1.0, 0.0);
         std::copy(from, to, this->begin());
     }
 
@@ -506,12 +504,12 @@ class NiftiImageData
     **/
     NiftiImageData & operator= (const NiftiImageData &source)
     {
-        if (source.dataPtr != NULL)
+        if (source.dataPtr != nullptr)
         {
             // Free the old data, if we allocated it
             if (owner)
                 free(dataPtr);
-            init(NULL, source.length(), source.datatype(), source.slope, source.intercept);
+            init(nullptr, source.length(), source.datatype(), source.slope, source.intercept);
             memcpy(dataPtr, source.dataPtr, source.totalBytes());
         }
         return *this;
@@ -522,17 +520,17 @@ class NiftiImageData
     size_t length () const           { return _length; }                /**< Return the number of elements in the data */
     size_t size () const             { return _length; }                /**< Return the number of elements in the data */
 
-    /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c NULL */
-    size_t bytesPerPixel () const    { return (handler == NULL ? 0 : handler->size()); }
+    /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c nullptr */
+    size_t bytesPerPixel () const    { return (handler == nullptr ? 0 : handler->size()); }
 
     /** Return the total size of the data blob, in bytes */
     size_t totalBytes () const       { return _length * bytesPerPixel(); }
 
     /**
      * Determine whether or not the object is empty
-     * @return \c true if the data pointer is \c NULL; \c false otherwise
+     * @return \c true if the data pointer is \c nullptr; \c false otherwise
     **/
-    bool isEmpty () const            { return (dataPtr == NULL); }
+    bool isEmpty () const            { return (dataPtr == nullptr); }
 
     /**
      * Determine whether the object uses data scaling
@@ -613,7 +611,7 @@ class NiftiImageData
     **/
     void minmax (double *min, double *max) const
     {
-        if (handler == NULL)
+        if (handler == nullptr)
         {
             *min = 0.0;
             *max = 0.0;
@@ -962,10 +960,10 @@ class NiftiImage
 
     public:
         /**
-         * Default constructor, wrapping \c NULL
+         * Default constructor, wrapping \c nullptr
         **/
         Extension ()
-            : ext(NULL) {}
+            : ext(nullptr) {}
 
         /**
          * Initialise from an existing \c nifti1_extension (which is used by both NIfTI-1 and
@@ -976,7 +974,7 @@ class NiftiImage
         **/
         Extension (nifti1_extension * const extension, const bool copy = false)
         {
-            if (!copy || extension == NULL)
+            if (!copy || extension == nullptr)
                 this->ext = extension;
             else
                 this->copy(extension);
@@ -1039,27 +1037,27 @@ class NiftiImage
 
         /**
          * Return the code associated with the extension
-         * @return An integer code giving the relevant code, or -1 if the extension is \c NULL
+         * @return An integer code giving the relevant code, or -1 if the extension is \c nullptr
         **/
-        int code () const { return (ext == NULL ? -1 : ext->ecode); }
+        int code () const { return (ext == nullptr ? -1 : ext->ecode); }
 
         /**
          * Return the data blob associated with the extension
          * @return The data, as a byte array
         **/
-        const char * data () const { return (ext == NULL ? NULL : ext->edata); }
+        const char * data () const { return (ext == nullptr ? nullptr : ext->edata); }
 
         /**
          * Return the length of the data array
          * @return The length of the data array, in bytes
         **/
-        size_t length () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
+        size_t length () const { return (ext == nullptr || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
 
         /**
          * Return the length of the data array
          * @return The length of the data array, in bytes
         **/
-        size_t size () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
+        size_t size () const { return (ext == nullptr || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); }
 
 #ifdef USING_R
         /**
@@ -1067,7 +1065,7 @@ class NiftiImage
         **/
         operator SEXP () const
         {
-            if (ext == NULL || ext->esize < 8)
+            if (ext == nullptr || ext->esize < 8)
                 return R_NilValue;
 
             const int length = ext->esize - 8;
@@ -1109,7 +1107,7 @@ class NiftiImage
 
         /**
          * Replace the current matrix with a new one. This function propagates the changes to the
-         * linked arrays, if they are not \c NULL.
+         * linked arrays, if they are not \c nullptr.
         **/
         void replace (const Matrix &source);
 
@@ -1118,32 +1116,32 @@ class NiftiImage
          * Default constructor
         **/
         Xform ()
-            : forward(NULL), inverse(NULL), qparams(NULL), mat() {}
+            : forward(nullptr), inverse(nullptr), qparams(nullptr), mat() {}
 
         /**
          * Initialise from a 4x4 \ref SquareMatrix
         **/
         Xform (const Matrix &source)
-            : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {}
+            : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(source) {}
 
         /**
          * Initialise from a constant NIfTI \c mat44 or \c dmat44
         **/
         Xform (const Matrix::NativeType &source)
-            : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {}
+            : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(source) {}
 
         /**
          * Initialise from a NIfTI \c mat44 or \c dmat44. The data in the linked matrix will be
          * replaced if this object is updated.
         **/
         Xform (Matrix::NativeType &source)
-            : forward(*source.m), inverse(NULL), qparams(NULL), mat(source) {}
+            : forward(*source.m), inverse(nullptr), qparams(nullptr), mat(source) {}
 
         /**
          * Initialise from forward and backward matrices, and optionally quaternion parameters.
          * These will all be linked to the new object and replaced if it is updated.
         **/
-        Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = NULL)
+        Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = nullptr)
             : forward(*source.m), inverse(*inverse.m), qparams(qparams), mat(source) {}
 
 #ifdef USING_R
@@ -1151,7 +1149,7 @@ class NiftiImage
          * Initialise from an R numeric matrix object
         **/
         Xform (SEXP source)
-            : forward(NULL), inverse(NULL), qparams(NULL), mat(Matrix(source)) {}
+            : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(Matrix(source)) {}
 #endif
 
         /**
@@ -1285,17 +1283,7 @@ class NiftiImage
     void acquire (nifti_image * const image);
 
     /**
-     * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count
-     * @param source A reference to a \c NiftiImage
-    **/
-    void acquire (const NiftiImage &source)
-    {
-        refCount = source.refCount;
-        acquire(source.image);
-    }
-
-    /**
-     * Release the currently wrapped pointer, if it is not \c NULL, decrementing the reference
+     * Release the currently wrapped pointer, if it is not \c nullptr, decrementing the reference
      * count and releasing memory if there are no remaining references to the pointer
     **/
     void release ();
@@ -1306,12 +1294,6 @@ class NiftiImage
     **/
     void copy (const nifti_image *source);
 
-    /**
-     * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer
-     * @param source A reference to a \c NiftiImage
-    **/
-    void copy (const NiftiImage &source);
-
     /**
      * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
      * @param source A reference to a \ref Block
@@ -1384,7 +1366,7 @@ class NiftiImage
      * Default constructor
     **/
     NiftiImage ()
-        : image(NULL), refCount(NULL) {}
+        : image(nullptr), refCount(nullptr) {}
 
     /**
      * Copy constructor
@@ -1393,12 +1375,14 @@ class NiftiImage
      * object wraps the same \c nifti_image and increments the shared reference count
     **/
     NiftiImage (const NiftiImage &source, const bool copy = true)
-        : image(NULL), refCount(NULL)
+        : image(nullptr), refCount(nullptr)
     {
-        if (copy)
+        if (copy) {
             this->copy(source);
-        else
-            acquire(source);
+        } else {
+            refCount = source.refCount;
+            acquire(source.image);
+        }
 #ifndef NDEBUG
         Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
 #endif
@@ -1422,7 +1406,7 @@ class NiftiImage
      * @param source A \c Block object, referring to part of another \c NiftiImage
     **/
     NiftiImage (const Block &source)
-        : image(NULL), refCount(NULL)
+        : NiftiImage()
     {
         this->copy(source);
 #ifndef NDEBUG
@@ -1432,12 +1416,12 @@ class NiftiImage
 
     /**
      * Initialise using an existing \c nifti_image pointer
-     * @param image An existing \c nifti_image pointer, possibly \c NULL
+     * @param image An existing \c nifti_image pointer, possibly \c nullptr
      * @param copy If \c true, the image data will be copied; otherwise this object just wraps
      * the pointer passed to it
     **/
     NiftiImage (nifti_image * const image, const bool copy = false)
-        : image(NULL), refCount(NULL)
+        : NiftiImage()
     {
         if (copy)
             this->copy(image);
@@ -1553,16 +1537,16 @@ class NiftiImage
     NiftiImage & setPersistence (const bool persistent) { return *this; }
 
     /**
-     * Determine whether or not the wrapped pointer is \c NULL
-     * @return \c true if the wrapped pointer is \c NULL; \c false otherwise
+     * Determine whether or not the wrapped pointer is \c nullptr
+     * @return \c true if the wrapped pointer is \c nullptr; \c false otherwise
     **/
-    bool isNull () const { return (image == NULL); }
+    bool isNull () const { return (image == nullptr); }
 
     /**
      * Determine whether the wrapped pointer is shared with another \c NiftiImage
      * @return \c true if the reference count is greater than 1; \c false otherwise
     **/
-    bool isShared () const { return (refCount != NULL && *refCount > 1); }
+    bool isShared () const { return (refCount != nullptr && *refCount > 1); }
 
     /**
      * Determine whether or not the image is marked as persistent
@@ -1577,7 +1561,7 @@ class NiftiImage
      * @return \c true if the object wraps an image pointer, its slope is not zero and the slope
      *         and intercept are not exactly one and zero; \c false otherwise
     **/
-    bool isDataScaled () const { return (image != NULL && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); }
+    bool isDataScaled () const { return (image != nullptr && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); }
 
     /**
      * Return the number of dimensions in the image
@@ -1585,7 +1569,7 @@ class NiftiImage
     **/
     int nDims () const
     {
-        if (image == NULL)
+        if (image == nullptr)
             return 0;
         else
             return image->ndim;
@@ -1597,7 +1581,7 @@ class NiftiImage
     **/
     std::vector<dim_t> dim () const
     {
-        if (image == NULL)
+        if (image == nullptr)
             return std::vector<dim_t>();
         else
             return std::vector<dim_t>(image->dim+1, image->dim+image->ndim+1);
@@ -1609,7 +1593,7 @@ class NiftiImage
     **/
     std::vector<pixdim_t> pixdim () const
     {
-        if (image == NULL)
+        if (image == nullptr)
             return std::vector<pixdim_t>();
         else
             return std::vector<pixdim_t>(image->pixdim+1, image->pixdim+image->ndim+1);
@@ -1756,31 +1740,31 @@ class NiftiImage
      * Access the qform matrix
      * @return An \ref Xform object
     **/
-    const Xform qform () const { return (image == NULL ? Xform() : Xform(image->qto_xyz)); }
+    const Xform qform () const { return (image == nullptr ? Xform() : Xform(image->qto_xyz)); }
 
     /**
      * Access the qform matrix
      * @return An \ref Xform object
     **/
-    Xform qform () { return (image == NULL ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); }
+    Xform qform () { return (image == nullptr ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); }
 
     /**
      * Access the sform matrix
      * @return An \ref Xform object
     **/
-    const Xform sform () const { return (image == NULL ? Xform() : Xform(image->sto_xyz)); }
+    const Xform sform () const { return (image == nullptr ? Xform() : Xform(image->sto_xyz)); }
 
     /**
      * Access the sform matrix
      * @return An \ref Xform object
     **/
-    Xform sform () { return (image == NULL ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); }
+    Xform sform () { return (image == nullptr ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); }
 
     /**
      * Return the number of blocks in the image
      * @return An integer giving the number of blocks in the image
     **/
-    dim_t nBlocks () const { return (image == NULL ? 0 : image->dim[image->ndim]); }
+    dim_t nBlocks () const { return (image == nullptr ? 0 : image->dim[image->ndim]); }
 
     /**
      * Extract a block from the image
@@ -1835,7 +1819,7 @@ class NiftiImage
     **/
     int nChannels () const
     {
-        if (image == NULL)
+        if (image == nullptr)
             return 0;
         else
         {
@@ -1853,13 +1837,13 @@ class NiftiImage
      * Return the number of voxels in the image
      * @return An integer giving the number of voxels in the image
     **/
-    size_t nVoxels () const { return (image == NULL ? 0 : image->nvox); }
+    size_t nVoxels () const { return (image == nullptr ? 0 : image->nvox); }
 
     /**
      * Return the number of extensions associated with the image
      * @return An integer giving the number of extensions
     **/
-    int nExtensions () const { return (image == NULL ? 0 : image->num_ext); }
+    int nExtensions () const { return (image == nullptr ? 0 : image->num_ext); }
 
     /**
      * Return a list of the extensions associated with the image
@@ -1869,7 +1853,7 @@ class NiftiImage
     **/
     std::list<Extension> extensions (const int code = -1) const
     {
-        if (image == NULL)
+        if (image == nullptr)
             return std::list<Extension>();
         else
         {
@@ -1891,7 +1875,7 @@ class NiftiImage
     **/
     NiftiImage & addExtension (const Extension &extension)
     {
-        if (image != NULL)
+        if (image != nullptr)
 #if RNIFTI_NIFTILIB_VERSION == 1
             nifti_add_extension(image, extension.data(), int(extension.length()), extension.code());
 #elif RNIFTI_NIFTILIB_VERSION == 2
@@ -1919,7 +1903,7 @@ class NiftiImage
     **/
     NiftiImage & dropExtensions ()
     {
-        if (image != NULL)
+        if (image != nullptr)
 #if RNIFTI_NIFTILIB_VERSION == 1
             nifti_free_extensions(image);
 #elif RNIFTI_NIFTILIB_VERSION == 2
@@ -1979,5 +1963,3 @@ class NiftiImage
 #include "RNifti/NiftiImage_impl.h"
 
 } // main namespace
-
-#endif
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 2e7c6b7a..75018afb 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -1,5 +1,4 @@
-#ifndef _NIFTI_IMAGE_IMPL_H_
-#define _NIFTI_IMAGE_IMPL_H_
+#pragma once
 
 namespace internal {
 
@@ -117,8 +116,8 @@ struct ElementConverter
 // By nature this is a risky operation, which has to make assumptions about the layout of the structs in memory
 inline nifti1_image * convertImageV2to1 (nifti2_image *image)
 {
-    if (image == NULL)
-        return NULL;
+    if (image == nullptr)
+        return nullptr;
 
     nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image));
 
@@ -144,7 +143,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image)
     // Copy buffers, since the memory-freeing logic isn't portable between struct versions
     result->fname = nifti_strdup(image->fname);
     result->iname = nifti_strdup(image->iname);
-    if (image->data != NULL)
+    if (image->data != nullptr)
     {
         result->data = calloc(result->nvox, result->nbyper);
         memcpy(result->data, image->data, result->nvox * result->nbyper);
@@ -152,7 +151,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image)
 
     // Copy extensions
     result->num_ext = image->num_ext;
-    result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
+    result->ext_list = result->num_ext == 0 ? nullptr : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
     for (int i=0; i<result->num_ext; i++)
     {
         result->ext_list[i].esize = image->ext_list[i].esize;
@@ -173,8 +172,8 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image)
 // Byte-by-byte conversion of nifti1_image struct to a nifti2_image
 inline nifti2_image * convertImageV1to2 (nifti1_image *image)
 {
-    if (image == NULL)
-        return NULL;
+    if (image == nullptr)
+        return nullptr;
 
     nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image));
 
@@ -198,14 +197,14 @@ inline nifti2_image * convertImageV1to2 (nifti1_image *image)
 
     result->fname = nifti_strdup(image->fname);
     result->iname = nifti_strdup(image->iname);
-    if (image->data != NULL)
+    if (image->data != nullptr)
     {
         result->data = calloc(result->nvox, result->nbyper);
         memcpy(result->data, image->data, result->nvox * result->nbyper);
     }
 
     result->num_ext = image->num_ext;
-    result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
+    result->ext_list = result->num_ext == 0 ? nullptr : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension));
     for (int i=0; i<result->num_ext; i++)
     {
         result->ext_list[i].esize = image->ext_list[i].esize;
@@ -269,7 +268,7 @@ inline void copyIfPresent (const Rcpp::List &list, const std::set<std::string> n
 
 inline void updateHeader (nifti_1_header *header, const Rcpp::List &list, const bool ignoreDatatype = false)
 {
-    if (header == NULL || Rf_isNull(list.names()))
+    if (header == nullptr || Rf_isNull(list.names()))
         return;
 
     const Rcpp::CharacterVector _names = list.names();
@@ -409,7 +408,7 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b
 template <typename Type, bool alpha>
 inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr, const size_t length, double *min, double *max) const
 {
-    if (ptr == NULL || length < 1)
+    if (ptr == nullptr || length < 1)
     {
         *min = static_cast<double>(std::numeric_limits<Type>::min());
         *max = static_cast<double>(std::numeric_limits<Type>::max());
@@ -434,7 +433,7 @@ inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr,
 template <typename ElementType>
 inline void NiftiImageData::ConcreteTypeHandler<std::complex<ElementType>,false>::minmax (void *ptr, const size_t length, double *min, double *max) const
 {
-    if (ptr == NULL || length < 1)
+    if (ptr == nullptr || length < 1)
     {
         *min = static_cast<double>(std::numeric_limits<ElementType>::min());
         *max = static_cast<double>(std::numeric_limits<ElementType>::max());
@@ -514,14 +513,14 @@ inline NiftiImageData::Element & NiftiImageData::Element::operator= (const Nifti
 
 inline void NiftiImage::Extension::copy (const nifti1_extension *source)
 {
-    if (source == NULL)
-        ext = NULL;
+    if (source == nullptr)
+        ext = nullptr;
     else
     {
         ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension));
         ext->esize = source->esize;
         ext->ecode = source->ecode;
-        if (source->edata != NULL && source->esize > 8)
+        if (source->edata != nullptr && source->esize > 8)
         {
             ext->edata = (char *) calloc(source->esize - 8, 1);
             memcpy(ext->edata, source->edata, source->esize - 8);
@@ -532,8 +531,8 @@ inline void NiftiImage::Extension::copy (const nifti1_extension *source)
 template <typename SourceType>
 inline void NiftiImage::Extension::copy (const SourceType *data, const size_t length, const int code)
 {
-    if (data == NULL)
-        ext = NULL;
+    if (data == nullptr)
+        ext = nullptr;
     else
     {
         const size_t bytes = length * sizeof(SourceType);
@@ -550,19 +549,19 @@ inline void NiftiImage::Extension::copy (const SourceType *data, const size_t le
 inline void NiftiImage::Xform::replace (const Matrix &source)
 {
     mat = source;
-    if (forward != NULL)
+    if (forward != nullptr)
         std::copy(source.begin(), source.end(), forward);
-    if (inverse != NULL)
+    if (inverse != nullptr)
     {
         Matrix inv = source.inverse();
         std::copy(inv.begin(), inv.end(), inverse);
     }
-    if (qparams != NULL)
+    if (qparams != nullptr)
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
-        nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6);
+        nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, nullptr, nullptr, nullptr, qparams+6);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-        nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6);
+        nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, nullptr, nullptr, nullptr, qparams+6);
 #endif
     }
 }
@@ -583,10 +582,10 @@ inline NiftiImage::Xform::Submatrix NiftiImage::Xform::rotation () const
     NiftiImage::Xform::Vector3 qbcd;
     NiftiImage::Xform::Element qfac;
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac);
     NiftiImage::Xform rotation = nifti_quatern_to_mat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac);
     NiftiImage::Xform rotation = nifti_quatern_to_dmat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac);
 #endif
     return rotation.submatrix();
@@ -596,9 +595,9 @@ inline NiftiImage::Xform::Element NiftiImage::Xform::handedness () const
 {
     NiftiImage::Xform::Element qfac;
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    nifti_mat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac);
+    nifti_dmat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac);
 #endif
     return qfac;
 }
@@ -607,9 +606,9 @@ inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const
 {
     NiftiImage::Xform::Vector4 q;
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+    nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+    nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
 #endif
     q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]);
     return q;
@@ -627,9 +626,9 @@ inline NiftiImage::Xform::Vector3 NiftiImage::Xform::spacing () const
 {
     NiftiImage::Xform::Vector3 vec;
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL);
+    nifti_mat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &vec[0], &vec[1], &vec[2], nullptr);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL);
+    nifti_dmat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &vec[0], &vec[1], &vec[2], nullptr);
 #endif
     return vec;
 }
@@ -663,8 +662,8 @@ inline std::string NiftiImage::Xform::orientation () const
 inline int NiftiImage::fileVersion (const std::string &path)
 {
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_1_header *header = nifti_read_header(internal::stringToPath(path), NULL, false);
-    if (header == NULL)
+    nifti_1_header *header = nifti_read_header(internal::stringToPath(path), nullptr, false);
+    if (header == nullptr)
         return -1;
     else
     {
@@ -691,7 +690,7 @@ inline int NiftiImage::fileVersion (const std::string &path)
 #elif RNIFTI_NIFTILIB_VERSION == 2
     int version;
     void *header = nifti2_read_header(internal::stringToPath(path), &version, true);
-    if (header == NULL)
+    if (header == nullptr)
         return -1;
     free(header);
     return version;
@@ -701,14 +700,14 @@ inline int NiftiImage::fileVersion (const std::string &path)
 inline void NiftiImage::acquire (nifti_image * const image)
 {
     // If we're taking ownership of a new image, release the old one
-    if (this->image != NULL && this->image != image)
+    if (this->image != nullptr && this->image != image)
         release();
 
     // Set the internal pointer and create or update the reference counter
     this->image = image;
-    if (image != NULL)
+    if (image != nullptr)
     {
-        if (this->refCount == NULL)
+        if (this->refCount == nullptr)
             this->refCount = new int(1);
         else
             (*this->refCount)++;
@@ -721,9 +720,9 @@ inline void NiftiImage::acquire (nifti_image * const image)
 
 inline void NiftiImage::release ()
 {
-    if (this->image != NULL)
+    if (this->image != nullptr)
     {
-        if (this->refCount != NULL)
+        if (this->refCount != nullptr)
         {
             (*this->refCount)--;
 #ifndef NDEBUG
@@ -736,9 +735,9 @@ inline void NiftiImage::release ()
 #elif RNIFTI_NIFTILIB_VERSION == 2
                 nifti2_image_free(this->image);
 #endif
-                this->image = NULL;
+                this->image = nullptr;
                 delete this->refCount;
-                this->refCount = NULL;
+                this->refCount = nullptr;
             }
         }
         else
@@ -748,13 +747,13 @@ inline void NiftiImage::release ()
 
 inline void NiftiImage::copy (const nifti_image *source)
 {
-    if (source == NULL)
-        acquire(NULL);
+    if (source == nullptr)
+        acquire(nullptr);
     else
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         acquire(nifti_copy_nim_info(source));
-        if (source->data != NULL)
+        if (source->data != nullptr)
         {
             size_t dataSize = nifti_get_volsize(source);
             image->data = calloc(1, dataSize);
@@ -762,7 +761,7 @@ inline void NiftiImage::copy (const nifti_image *source)
         }
 #elif RNIFTI_NIFTILIB_VERSION == 2
         acquire(nifti2_copy_nim_info(source));
-        if (source->data != NULL)
+        if (source->data != nullptr)
         {
             size_t dataSize = nifti2_get_volsize(source);
             image->data = calloc(1, dataSize);
@@ -772,18 +771,11 @@ inline void NiftiImage::copy (const nifti_image *source)
     }
 }
 
-inline void NiftiImage::copy (const NiftiImage &source)
-{
-    const nifti_image *sourceStruct = source;
-
-    copy(sourceStruct);
-}
-
 inline void NiftiImage::copy (const Block &source)
 {
     const nifti_image *sourceStruct = source.image;
-    if (sourceStruct == NULL)
-        acquire(NULL);
+    if (sourceStruct == nullptr)
+        acquire(nullptr);
     else
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
@@ -793,7 +785,7 @@ inline void NiftiImage::copy (const Block &source)
         image->pixdim[source.dimension] = 1.0;
         nifti_update_dims_from_array(image);
 
-        if (sourceStruct->data != NULL)
+        if (sourceStruct->data != nullptr)
         {
             size_t blockSize = nifti_get_volsize(image);
             image->data = calloc(1, blockSize);
@@ -806,7 +798,7 @@ inline void NiftiImage::copy (const Block &source)
         image->pixdim[source.dimension] = 1.0;
         nifti2_update_dims_from_array(image);
 
-        if (sourceStruct->data != NULL)
+        if (sourceStruct->data != nullptr)
         {
             size_t blockSize = nifti2_get_volsize(image);
             image->data = calloc(1, blockSize);
@@ -895,14 +887,14 @@ inline void NiftiImage::initFromNiftiS4 (const Rcpp::RObject &object, const bool
         throw std::runtime_error("Data type is not supported");
 
 #if RNIFTI_NIFTILIB_VERSION == 1
-    acquire(nifti_convert_nhdr2nim(header, NULL));
+    acquire(nifti_convert_nhdr2nim(header, nullptr));
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    acquire(nifti_convert_n1hdr2nim(header, NULL));
+    acquire(nifti_convert_n1hdr2nim(header, nullptr));
 #endif
 
     const Rcpp::RObject data = object.slot(".Data");
     if (!copyData || Rf_length(data) <= 1)
-        this->image->data = NULL;
+        this->image->data = nullptr;
     else if (header.datatype == DT_INT32)
     {
         Rcpp::IntegerVector intData(data);
@@ -921,7 +913,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
     Rcpp::Function getXform = mriImage.field("getXform");
     Rcpp::NumericMatrix xform = getXform();
 
-    acquire(NULL);
+    acquire(nullptr);
 
     if (Rf_length(mriImage.field("tags")) > 0)
         initFromList(mriImage.field("tags"));
@@ -946,7 +938,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
         nVoxels *= dimVector[i];
     }
 
-    if (this->image == NULL)
+    if (this->image == nullptr)
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         acquire(nifti_make_new_nim(dims, datatype, FALSE));
@@ -958,7 +950,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
     {
         std::copy(dims, dims+8, this->image->dim);
         this->image->datatype = datatype;
-        nifti_datatype_sizes(image->datatype, &image->nbyper, NULL);
+        nifti_datatype_sizes(image->datatype, &image->nbyper, nullptr);
     }
 
     if (copyData && !Rf_isNull(data))
@@ -972,7 +964,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
             memcpy(this->image->data, REAL(data), dataSize);
     }
     else
-        this->image->data = NULL;
+        this->image->data = nullptr;
 
     const std::vector<pixdim_t> pixdimVector = mriImage.field("voxelDims");
     const int pixdimLength = pixdimVector.size();
@@ -997,15 +989,15 @@ inline void NiftiImage::initFromList (const Rcpp::RObject &object)
 {
     Rcpp::List list(object);
 #if RNIFTI_NIFTILIB_VERSION == 1
-    nifti_1_header *header = nifti_make_new_header(NULL, DT_FLOAT64);
+    nifti_1_header *header = nifti_make_new_header(nullptr, DT_FLOAT64);
     internal::updateHeader(header, list);
-    acquire(nifti_convert_nhdr2nim(*header, NULL));
+    acquire(nifti_convert_nhdr2nim(*header, nullptr));
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    nifti_1_header *header = nifti_make_new_n1_header(NULL, DT_FLOAT64);
+    nifti_1_header *header = nifti_make_new_n1_header(nullptr, DT_FLOAT64);
     internal::updateHeader(header, list);
-    acquire(nifti_convert_n1hdr2nim(*header, NULL));
+    acquire(nifti_convert_n1hdr2nim(*header, nullptr));
 #endif
-    this->image->data = NULL;
+    this->image->data = nullptr;
     free(header);
 }
 
@@ -1052,7 +1044,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c
             memcpy(this->image->data, REAL(object), dataSize);
     }
     else
-        this->image->data = NULL;
+        this->image->data = nullptr;
 
     if (object.hasAttribute("pixdim"))
     {
@@ -1070,7 +1062,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c
 }
 
 inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly)
-    : image(NULL), refCount(NULL)
+    : image(nullptr), refCount(nullptr)
 {
     Rcpp::RObject imageObject(object);
     bool resolved = false;
@@ -1079,7 +1071,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
     {
         Rcpp::XPtr<NiftiImage> imagePtr(SEXP(imageObject.attr(".nifti_image_ptr")));
         NiftiImage *ptr = imagePtr.get();
-        if (ptr != NULL)
+        if (ptr != nullptr)
         {
 #if RNIFTI_NIFTILIB_VERSION == 1
             if (imageObject.hasAttribute(".nifti_image_ver") && int(imageObject.attr(".nifti_image_ver")) == 2)
@@ -1108,7 +1100,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
     if (!resolved)
     {
         if (Rf_isNull(object))
-            acquire(NULL);
+            acquire(nullptr);
         else if (Rf_isString(object))
         {
             const std::string path = Rcpp::as<std::string>(object);
@@ -1117,7 +1109,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
 #elif RNIFTI_NIFTILIB_VERSION == 2
             acquire(nifti2_image_read(internal::stringToPath(path), readData));
 #endif
-            if (this->image == NULL)
+            if (this->image == nullptr)
                 throw std::runtime_error("Failed to read image from path " + path);
         }
         else if (imageObject.inherits("nifti"))
@@ -1136,7 +1128,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
             throw std::runtime_error("Cannot convert unclassed non-array object");
     }
 
-    if (this->image != NULL)
+    if (this->image != nullptr)
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         nifti_update_dims_from_array(this->image);
@@ -1164,12 +1156,12 @@ inline void NiftiImage::initFromDims (const std::vector<dim_t> &dim, const int d
     acquire(nifti2_make_new_nim(dims, datatype, 1));
 #endif
 
-    if (image == NULL)
+    if (image == nullptr)
         throw std::runtime_error("Failed to create image from scratch");
 }
 
 inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype)
-    : image(NULL), refCount(NULL)
+    : image(nullptr), refCount(nullptr)
 {
     initFromDims(dim, datatype);
 #ifndef NDEBUG
@@ -1178,7 +1170,7 @@ inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype
 }
 
 inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string &datatype)
-    : image(NULL), refCount(NULL)
+    : image(nullptr), refCount(nullptr)
 {
     initFromDims(dim, internal::stringToDatatype(datatype));
 #ifndef NDEBUG
@@ -1187,7 +1179,7 @@ inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
-    : image(NULL), refCount(NULL)
+    : image(nullptr), refCount(nullptr)
 {
 #if RNIFTI_NIFTILIB_VERSION == 1
     acquire(nifti_image_read(internal::stringToPath(path), readData));
@@ -1195,7 +1187,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
     acquire(nifti2_image_read(internal::stringToPath(path), readData));
 #endif
 
-    if (image == NULL)
+    if (image == nullptr)
         throw std::runtime_error("Failed to read image from path " + path);
 
 #ifndef NDEBUG
@@ -1204,7 +1196,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t> &volumes)
-    : image(NULL), refCount(NULL)
+    : image(nullptr), refCount(nullptr)
 {
     if (volumes.empty())
         throw std::runtime_error("The vector of volumes is empty");
@@ -1214,7 +1206,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
 #if RNIFTI_NIFTILIB_VERSION == 1
     acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast<int>(volumes.size()), &volumes.front(), &brickList));
 
-    if (image == NULL)
+    if (image == nullptr)
         throw std::runtime_error("Failed to read image from path " + path);
 
     size_t brickSize = image->nbyper * image->nx * image->ny * image->nz;
@@ -1226,7 +1218,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
 #elif RNIFTI_NIFTILIB_VERSION == 2
     acquire(nifti2_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList));
 
-    if (image == NULL)
+    if (image == nullptr)
         throw std::runtime_error("Failed to read image from path " + path);
 
     size_t brickSize = image->nbyper * image->nx * image->ny * image->nz;
@@ -1459,7 +1451,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons
     strides[locs[2]] = strides[locs[1]] * image->dim[locs[1]+1];
 
     // Permute the data (if present)
-    if (image->data != NULL)
+    if (image->data != nullptr)
     {
         size_t volSize = size_t(image->nx * image->ny * image->nz);
         size_t nVolumes = std::max(size_t(1), size_t(image->nvox) / volSize);
@@ -1560,13 +1552,13 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
     if (Rf_isVectorList(object))
     {
         Rcpp::List list(object);
-        nifti_1_header *header = NULL;
+        nifti_1_header *header = nullptr;
         if (this->isNull())
         {
 #if RNIFTI_NIFTILIB_VERSION == 1
-            header = nifti_make_new_header(NULL, DT_FLOAT64);
+            header = nifti_make_new_header(nullptr, DT_FLOAT64);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-            header = nifti_make_new_n1_header(NULL, DT_FLOAT64);
+            header = nifti_make_new_n1_header(nullptr, DT_FLOAT64);
 #endif
             internal::updateHeader(header, list, true);
         }
@@ -1581,25 +1573,25 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
             internal::updateHeader(header, list, true);
         }
 
-        if (header != NULL)
+        if (header != nullptr)
         {
             // Retain the data pointer, but otherwise overwrite the stored object with one created from the header
             // The file names can't be preserved through the round-trip, so free them
             void *dataPtr = image->data;
 #if RNIFTI_NIFTILIB_VERSION == 1
-            nifti_image *tempImage = nifti_convert_nhdr2nim(*header, NULL);
+            nifti_image *tempImage = nifti_convert_nhdr2nim(*header, nullptr);
 #elif RNIFTI_NIFTILIB_VERSION == 2
-            nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, NULL);
+            nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, nullptr);
 #endif
 
-            if (image->fname != NULL)
+            if (image->fname != nullptr)
                 free(image->fname);
-            if (image->iname != NULL)
+            if (image->iname != nullptr)
                 free(image->iname);
 
             memcpy(image, tempImage, sizeof(nifti_image));
             image->num_ext = 0;
-            image->ext_list = NULL;
+            image->ext_list = nullptr;
             image->data = dataPtr;
 
 #if RNIFTI_NIFTILIB_VERSION == 1
@@ -1647,7 +1639,7 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
             const int channels = object.attr("channels");
             image->datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24);
         }
-        nifti_datatype_sizes(image->datatype, &image->nbyper, NULL);
+        nifti_datatype_sizes(image->datatype, &image->nbyper, nullptr);
 
 #if RNIFTI_NIFTILIB_VERSION == 1
         nifti_image_unload(image);
@@ -1678,7 +1670,7 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
 
 inline const NiftiImage::Xform NiftiImage::xform (const bool preferQuaternion) const
 {
-    if (image == NULL)
+    if (image == nullptr)
         return Xform();
     else if (image->qform_code <= 0 && image->sform_code <= 0)
     {
@@ -1878,5 +1870,3 @@ inline Rcpp::RObject NiftiImage::toArrayOrPointer (const bool internal, const st
 }
 
 #endif // USING_R
-
-#endif
diff --git a/reg-io/RNifti/NiftiImage_matrix.h b/reg-io/RNifti/NiftiImage_matrix.h
index e89695db..6bb1ac74 100644
--- a/reg-io/RNifti/NiftiImage_matrix.h
+++ b/reg-io/RNifti/NiftiImage_matrix.h
@@ -1,5 +1,4 @@
-#ifndef _NIFTI_IMAGE_MATRIX_H_
-#define _NIFTI_IMAGE_MATRIX_H_
+#pragma once
 
 template <>
 inline SquareMatrix<mat33,float,3> SquareMatrix<mat33,float,3>::inverse () const
@@ -131,5 +130,3 @@ inline Vector<ElementType,Order> SquareMatrix<NiftiType,ElementType,Order>::mult
     }
     return result;
 }
-
-#endif
diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h
index 92689ba2..8d8bc42e 100644
--- a/reg-io/RNifti/NiftiImage_print.h
+++ b/reg-io/RNifti/NiftiImage_print.h
@@ -1,5 +1,4 @@
-#ifndef _NIFTI_IMAGE_PRINT_H_
-#define _NIFTI_IMAGE_PRINT_H_
+#pragma once
 
 #ifdef USING_R
 
@@ -32,5 +31,3 @@
 #define Rprintf(...) fprintf(stderr, __VA_ARGS__)
 
 #endif // USING_R
-
-#endif // _PRINT_H_
diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h
index 4121dc0f..8a442265 100644
--- a/reg-io/niftilib/nifti1.h
+++ b/reg-io/niftilib/nifti1.h
@@ -9,12 +9,11 @@
                 TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE
 
         08 Mar 2019 [PT,DRG]
-           - Updated to include [qs]form_code = 5 
+           - Updated to include [qs]form_code = 5
 
  */
 
-#ifndef _NIFTI_HEADER_
-#define _NIFTI_HEADER_
+#pragma once
 
 /*****************************************************************************
       ** This file defines the "NIFTI-1" header format.               **
@@ -1524,5 +1523,3 @@ typedef struct { unsigned char r,g,b; } rgb_byte ;
 }
 #endif
 /*=================*/
-
-#endif /* _NIFTI_HEADER_ */
diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h
index 2927d31a..14ed0d3a 100644
--- a/reg-io/niftilib/nifti1_io.h
+++ b/reg-io/niftilib/nifti1_io.h
@@ -3,8 +3,7 @@
            - Written by Bob Cox, SSCC NIMH
            - Revisions by Rick Reynolds, SSCC NIMH
  */
-#ifndef _NIFTI_IO_HEADER_
-#define _NIFTI_IO_HEADER_
+#pragma once
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -583,5 +582,3 @@ typedef struct {
 }
 #endif
 /*=================*/
-
-#endif /* _NIFTI_IO_HEADER_ */
diff --git a/reg-io/niftilib/nifti2.h b/reg-io/niftilib/nifti2.h
index ab47f3cd..97bf2e85 100644
--- a/reg-io/niftilib/nifti2.h
+++ b/reg-io/niftilib/nifti2.h
@@ -2,8 +2,7 @@
     \brief Header structure for NIFTI-2 format.
  */
 
-#ifndef __NIFTI2_HEADER
-#define __NIFTI2_HEADER
+#pragma once
 
 /*---------------------------------------------------------------------------*/
 /* Changes to the header from NIFTI-1 to NIFTI-2 are intended to allow for
@@ -113,5 +112,3 @@ typedef struct nifti_2_header nifti_2_header;
 }
 #endif
 /*=================*/
-
-#endif /* __NIFTI2_HEADER */
diff --git a/reg-io/niftilib/nifti2_image.h b/reg-io/niftilib/nifti2_image.h
index 6e21b3c1..8f84c080 100644
--- a/reg-io/niftilib/nifti2_image.h
+++ b/reg-io/niftilib/nifti2_image.h
@@ -1,5 +1,4 @@
-#ifndef _NIFTI2_IMAGE_H_
-#define _NIFTI2_IMAGE_H_
+#pragma once
 
 #include <inttypes.h>
 
@@ -102,5 +101,3 @@ typedef struct {                /*!< Image storage struct **/
 } nifti2_image ;
 
 #endif // RNIFTI_NIFTILIB_VERSION
-
-#endif
diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h
index c0ed0cbd..ff215d19 100644
--- a/reg-io/niftilib/nifti2_io.h
+++ b/reg-io/niftilib/nifti2_io.h
@@ -3,8 +3,7 @@
            - Written by Bob Cox, SSCC NIMH
            - Revisions by Rick Reynolds, SSCC NIMH
  */
-#ifndef _NIFTI2_IO_HEADER_
-#define _NIFTI2_IO_HEADER_
+#pragma once
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -826,5 +825,3 @@ typedef struct {
 }
 #endif
 /*=================*/
-
-#endif /* _NIFTI2_IO_HEADER_ */
diff --git a/reg-io/znzlib/znzlib.h b/reg-io/znzlib/znzlib.h
index d17a8bc6..d0e95aa1 100644
--- a/reg-io/znzlib/znzlib.h
+++ b/reg-io/znzlib/znzlib.h
@@ -1,5 +1,4 @@
-#ifndef _ZNZLIB_H_
-#define _ZNZLIB_H_
+#pragma once
 
 /*
 znzlib.h  (zipped or non-zipped library)
@@ -122,5 +121,3 @@ int znzprintf(znzFile stream, const char *format, ...);
 }
 #endif
 /*=================*/
-
-#endif

From 5bf6ca862d406d80a2c456e3577fd8bd517173db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 13:45:25 +0000
Subject: [PATCH 076/314] Add utility functions to NiftiImage

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 49 ++++++++++++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6c412452..598ed30e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-189
+190
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 5a714cbc..22d8e858 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1527,6 +1527,12 @@ class NiftiImage
         return *this;
     }
 
+    /**
+     * Boolean operator, which allows a \c NiftiImage to be used in a boolean context
+     * @return \c true if the wrapped pointer is not \c nullptr; \c false otherwise
+    */
+    operator bool () const { return (image != nullptr); }
+
     /**
      * Mark the image as persistent, so that it can be passed back to R
      * @param persistent The new persistence state of the object
@@ -1567,13 +1573,7 @@ class NiftiImage
      * Return the number of dimensions in the image
      * @return An integer giving the image dimensionality
     **/
-    int nDims () const
-    {
-        if (image == nullptr)
-            return 0;
-        else
-            return image->ndim;
-    }
+    int nDims () const { return (image == nullptr ? 0 : image->ndim); }
 
     /**
      * Return the dimensions of the image
@@ -1833,12 +1833,47 @@ class NiftiImage
         }
     }
 
+    /**
+     * Calculate the number of voxels in the image
+     * @param image Input image
+     * @param dimCount Number of dimensions to consider
+     * @return The number of voxels in the image
+     */
+    static size_t calcVoxelNumber(const nifti_image *image, const int& dimCount) {
+        if (image == nullptr)
+            return 0;
+        size_t voxelNumber = 1;
+        for (int i = 1; i <= dimCount; i++)
+            voxelNumber *= static_cast<size_t>(std::abs(image->dim[i]));
+        return voxelNumber;
+    }
+
+    /**
+     * Recalculate the number of voxels in the image
+    */
+    void recalcVoxelNumber() {
+        if (image != nullptr)
+            image->nvox = calcVoxelNumber(image, image->ndim);
+    }
+
     /**
      * Return the number of voxels in the image
      * @return An integer giving the number of voxels in the image
     **/
     size_t nVoxels () const { return (image == nullptr ? 0 : image->nvox); }
 
+    /**
+     * Return the number of voxels per slice
+     * @return An integer giving the number of voxels per slice
+    */
+    size_t nVoxelsPerSlice () const { return calcVoxelNumber(*this, 2); }
+
+    /**
+     * Return the number of voxels per volume
+     * @return An integer giving the number of voxels per volume
+    */
+    size_t nVoxelsPerVolume () const { return calcVoxelNumber(*this, 3); }
+
     /**
      * Return the number of extensions associated with the image
      * @return An integer giving the number of extensions

From a3b0cc98a7e9e46a2602ece6ffbb7d263bfb7198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 14:10:43 +0000
Subject: [PATCH 077/314] Fix dimensions after initialisation of NiftiImage

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/RNifti/NiftiImage.h      |  7 +++-
 reg-io/RNifti/NiftiImage_impl.h | 72 ++++++++++++++++++++++++++++++---
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 598ed30e..88b2e783 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-190
+191
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 22d8e858..30943fbd 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1352,6 +1352,11 @@ class NiftiImage
     void setPixunits (const std::vector<std::string> &pixunits);
 
 public:
+    /**
+     * Correct dimensions of the image
+    */
+    void correctDimensions ();
+
     /**
      * Swap the contents of two \c NiftiImage objects
     */
@@ -1375,7 +1380,7 @@ class NiftiImage
      * object wraps the same \c nifti_image and increments the shared reference count
     **/
     NiftiImage (const NiftiImage &source, const bool copy = true)
-        : image(nullptr), refCount(nullptr)
+        : NiftiImage()
     {
         if (copy) {
             this->copy(source);
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 75018afb..e9692998 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -405,6 +405,62 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b
 
 }       // internal namespace
 
+inline void NiftiImage::correctDimensions() {
+    // Ensure that no dimension is set to zero
+    if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1;
+    if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1;
+    if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1;
+    if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1;
+    if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1;
+    if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1;
+    if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1;
+    //Correcting the dim of the images
+    for (int i = 1; i < 8; ++i) {
+        if (image->dim[i] > 1) {
+            image->dim[0] = image->ndim = i;
+        }
+    }
+    // Set the slope to 1 if undefined
+    if (image->scl_slope == 0) image->scl_slope = 1.f;
+    // Ensure that no spacing is set to zero
+    if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0))
+        image->dy = image->pixdim[2] = 1;
+    if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0))
+        image->dz = image->pixdim[3] = 1;
+    // Create the qform matrix if required
+    if (image->qform_code == 0 && image->sform_code == 0) {
+        image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b,
+                                                image->quatern_c,
+                                                image->quatern_d,
+                                                image->qoffset_x,
+                                                image->qoffset_y,
+                                                image->qoffset_z,
+                                                image->dx,
+                                                image->dy,
+                                                image->dz,
+                                                image->qfac);
+        image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
+    }
+    // Set the voxel spacing to millimetres
+    if (image->xyz_units == NIFTI_UNITS_MICRON) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] /= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    if (image->xyz_units == NIFTI_UNITS_METER) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] *= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    image->dx = image->pixdim[1];
+    image->dy = image->pixdim[2];
+    image->dz = image->pixdim[3];
+    image->dt = image->pixdim[4];
+    image->du = image->pixdim[5];
+    image->dv = image->pixdim[6];
+    image->dw = image->pixdim[7];
+}
+
 template <typename Type, bool alpha>
 inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr, const size_t length, double *min, double *max) const
 {
@@ -1062,7 +1118,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c
 }
 
 inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly)
-    : image(nullptr), refCount(nullptr)
+    : NiftiImage()
 {
     Rcpp::RObject imageObject(object);
     bool resolved = false;
@@ -1158,10 +1214,12 @@ inline void NiftiImage::initFromDims (const std::vector<dim_t> &dim, const int d
 
     if (image == nullptr)
         throw std::runtime_error("Failed to create image from scratch");
+
+    correctDimensions();
 }
 
 inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype)
-    : image(nullptr), refCount(nullptr)
+    : NiftiImage()
 {
     initFromDims(dim, datatype);
 #ifndef NDEBUG
@@ -1170,7 +1228,7 @@ inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype
 }
 
 inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string &datatype)
-    : image(nullptr), refCount(nullptr)
+    : NiftiImage()
 {
     initFromDims(dim, internal::stringToDatatype(datatype));
 #ifndef NDEBUG
@@ -1179,7 +1237,7 @@ inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
-    : image(nullptr), refCount(nullptr)
+    : NiftiImage()
 {
 #if RNIFTI_NIFTILIB_VERSION == 1
     acquire(nifti_image_read(internal::stringToPath(path), readData));
@@ -1190,13 +1248,15 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
     if (image == nullptr)
         throw std::runtime_error("Failed to read image from path " + path);
 
+    correctDimensions();
+
 #ifndef NDEBUG
     Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image);
 #endif
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t> &volumes)
-    : image(nullptr), refCount(nullptr)
+    : NiftiImage()
 {
     if (volumes.empty())
         throw std::runtime_error("The vector of volumes is empty");
@@ -1229,6 +1289,8 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
     nifti2_free_NBL(&brickList);
 #endif
 
+    correctDimensions();
+
 #ifndef NDEBUG
     Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image);
 #endif

From c86394602390f9a4386aca958e4c29bbebf29ec0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 14:35:27 +0000
Subject: [PATCH 078/314] Fix a bug causing accessing freed memory in
 reg_io_WriteImageFile()

---
 niftyreg_build_version.txt     |   2 +-
 reg-io/_reg_ReadWriteImage.cpp | 333 +++++++++++++++------------------
 reg-io/_reg_ReadWriteImage.h   |   4 +-
 3 files changed, 158 insertions(+), 181 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 88b2e783..86a03071 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-191
+192
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index eba5b063..67017446 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -14,228 +14,205 @@
 #include "_reg_stringFormat.h"
 
 /* *************************************************************** */
-void reg_hack_filename(nifti_image *image, const char *filename)
-{
-   std::string name(filename);
-   name.append("\0");
-   // Free the char arrays if already allocated
-   if(image->fname) free(image->fname);
-   if(image->iname) free(image->iname);
-   // Allocate the char arrays
-   image->fname = (char *)malloc((name.size()+1)*sizeof(char));
-   image->iname = (char *)malloc((name.size()+1)*sizeof(char));
-   // Copy the new name in the char arrays
-   strcpy(image->fname,name.c_str());
-   strcpy(image->iname,name.c_str());
-   // Returns at the end of the function
-   return;
+void reg_hack_filename(nifti_image *image, std::string filename) {
+    filename.append("\0");
+    // Free the char arrays if already allocated
+    if (image->fname) free(image->fname);
+    if (image->iname) free(image->iname);
+    // Allocate the char arrays
+    image->fname = (char *)malloc((filename.size() + 1) * sizeof(char));
+    image->iname = (char *)malloc((filename.size() + 1) * sizeof(char));
+    // Copy the new name in the char arrays
+    strcpy(image->fname, filename.c_str());
+    strcpy(image->iname, filename.c_str());
 }
 /* *************************************************************** */
-int reg_io_checkFileFormat(const char *filename)
-{
-   // Nifti format is used by default
-   // Check the extention of the provided filename
-   std::string b(filename);
-   if(b.find( ".nii.gz") != std::string::npos)
-      return NR_NII_FORMAT;
-   else if(b.find( ".nii") != std::string::npos)
-      return NR_NII_FORMAT;
-   else if(b.find( ".hdr") != std::string::npos)
-      return NR_NII_FORMAT;
-   else if(b.find( ".img.gz") != std::string::npos)
-      return NR_NII_FORMAT;
-   else if(b.find( ".img") != std::string::npos)
-      return NR_NII_FORMAT;
-   else if(b.find( ".png") != std::string::npos)
-      return NR_PNG_FORMAT;
+int reg_io_checkFileFormat(const std::string& filename) {
+    // Nifti format is used by default
+    // Check the extention of the provided filename
+    if (filename.find(".nii.gz") != std::string::npos)
+        return NR_NII_FORMAT;
+    else if (filename.find(".nii") != std::string::npos)
+        return NR_NII_FORMAT;
+    else if (filename.find(".hdr") != std::string::npos)
+        return NR_NII_FORMAT;
+    else if (filename.find(".img.gz") != std::string::npos)
+        return NR_NII_FORMAT;
+    else if (filename.find(".img") != std::string::npos)
+        return NR_NII_FORMAT;
+    else if (filename.find(".png") != std::string::npos)
+        return NR_PNG_FORMAT;
 #ifdef _USE_NRRD
-   else if(b.find( ".nrrd") != std::string::npos)
-      return NR_NRRD_FORMAT;
-   else if(b.find( ".nhdr") != std::string::npos)
-      return NR_NRRD_FORMAT;
+    else if (filename.find(".nrrd") != std::string::npos)
+        return NR_NRRD_FORMAT;
+    else if (filename.find(".nhdr") != std::string::npos)
+        return NR_NRRD_FORMAT;
 #endif
-   else
-   {
-      reg_print_fct_warn("reg_io_checkFileFormat");
-      reg_print_msg_warn("No filename extension provided - the Nifti library is used by default");
-   }
+    else {
+        reg_print_fct_warn("reg_io_checkFileFormat");
+        reg_print_msg_warn("No filename extension provided - the Nifti library is used by default");
+    }
 
-   return NR_NII_FORMAT;
+    return NR_NII_FORMAT;
 }
 /* *************************************************************** */
-nifti_image *reg_io_ReadImageFile(const char *filename)
-{
-   // First read the fileformat in order to use the correct library
-   int fileFormat=reg_io_checkFileFormat(filename);
+nifti_image* reg_io_ReadImageFile(const char *filename) {
+    // First read the file format in order to use the correct library
+    const int fileFormat = reg_io_checkFileFormat(filename);
 
-   // Create the nifti image pointer
-   nifti_image *image=nullptr;
+    // Create the nifti image pointer
+    nifti_image *image = nullptr;
 
-   // Read the image and convert it to nifti format if required
-   switch(fileFormat)
-   {
-   case NR_NII_FORMAT:
-      image=nifti_image_read(filename,true);
-      reg_hack_filename(image,filename);
-      break;
-   case NR_PNG_FORMAT:
-      image=reg_io_readPNGfile(filename,true);
-      reg_hack_filename(image,filename);
-      break;
+    // Read the image and convert it to nifti format if required
+    switch (fileFormat) {
+    case NR_NII_FORMAT:
+        image = nifti_image_read(filename, true);
+        reg_hack_filename(image, filename);
+        break;
+    case NR_PNG_FORMAT:
+        image = reg_io_readPNGfile(filename, true);
+        reg_hack_filename(image, filename);
+        break;
 #ifdef _USE_NRRD
-   case NR_NRRD_FORMAT:
-      Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
-      image = reg_io_nrdd2nifti(nrrdImage);
-      nrrdNuke(nrrdImage);
-      reg_hack_filename(image,filename);
-      break;
+    case NR_NRRD_FORMAT:
+        Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
+        image = reg_io_nrdd2nifti(nrrdImage);
+        nrrdNuke(nrrdImage);
+        reg_hack_filename(image, filename);
+        break;
 #endif
-   }
-   reg_checkAndCorrectDimension(image);
+    }
+    reg_checkAndCorrectDimension(image);
 
-   // Return the nifti image
-   return image;
+    // Return the nifti image
+    return image;
 }
 /* *************************************************************** */
-nifti_image *reg_io_ReadImageHeader(const char *filename)
-{
-   // First read the fileformat in order to use the correct library
-   int fileFormat=reg_io_checkFileFormat(filename);
+nifti_image* reg_io_ReadImageHeader(const char *filename) {
+    // First read the file format in order to use the correct library
+    const int fileFormat = reg_io_checkFileFormat(filename);
 
-   // Create the nifti image pointer
-   nifti_image *image=nullptr;
+    // Create the nifti image pointer
+    nifti_image *image = nullptr;
 
-   // Read the image and convert it to nifti format if required
-   switch(fileFormat)
-   {
-   case NR_NII_FORMAT:
-      image=nifti_image_read(filename,false);
-      break;
-   case NR_PNG_FORMAT:
-      image=reg_io_readPNGfile(filename,false);
-      reg_hack_filename(image,filename);
-      break;
+    // Read the image and convert it to nifti format if required
+    switch (fileFormat) {
+    case NR_NII_FORMAT:
+        image = nifti_image_read(filename, false);
+        break;
+    case NR_PNG_FORMAT:
+        image = reg_io_readPNGfile(filename, false);
+        reg_hack_filename(image, filename);
+        break;
 #ifdef _USE_NRRD
-   case NR_NRRD_FORMAT:
-      Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
-      image = reg_io_nrdd2nifti(nrrdImage);
-      nrrdNuke(nrrdImage);
-      reg_hack_filename(image,filename);
-      break;
+    case NR_NRRD_FORMAT:
+        Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
+        image = reg_io_nrdd2nifti(nrrdImage);
+        nrrdNuke(nrrdImage);
+        reg_hack_filename(image, filename);
+        break;
 #endif
-   }
-   reg_checkAndCorrectDimension(image);
+    }
+    reg_checkAndCorrectDimension(image);
 
-   // Return the nifti image
-   return image;
+    // Return the nifti image
+    return image;
 }
 /* *************************************************************** */
-void reg_io_WriteImageFile(nifti_image *image, const char *filename)
-{
-   // First read the fileformat in order to use the correct library
-   int fileFormat=reg_io_checkFileFormat(filename);
+void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
+    // First read the file format in order to use the correct library
+    int fileFormat = reg_io_checkFileFormat(filename);
 
-   // Check if the images can be saved as a png file
-   if( (image->nz>1 ||
-        image->nt>1 ||
-        image->nu>1 ||
-        image->nv>1 ||
-        image->nw>1 ) &&
-       fileFormat==NR_PNG_FORMAT)
-   {
-      // If the image has more than two dimension,
-      // the filename is converted to nifti
-      std::string b(filename);
-      b.replace(b.find( ".png"),4,".nii.gz");
-      reg_print_msg_warn("The file can not be saved as png and is converted to nifti");
-      char text[255];sprintf(text,"%s -> %s", filename, b.c_str());
-      reg_print_msg_warn(text);
-      filename=b.c_str();
-      fileFormat=NR_NII_FORMAT;
-   }
+    // Check if the images can be saved as a png file
+    std::string fname;
+    if ((image->nz > 1 ||
+         image->nt > 1 ||
+         image->nu > 1 ||
+         image->nv > 1 ||
+         image->nw > 1) &&
+        fileFormat == NR_PNG_FORMAT) {
+        // If the image has more than two dimension,
+        // the filename is converted to nifti
+        fname = filename;
+        fname.replace(fname.find(".png"), 4, ".nii.gz");
+        reg_print_msg_warn("The file can not be saved as png and is converted to nifti");
+        char text[255]; sprintf(text, "%s -> %s", filename, fname.c_str());
+        reg_print_msg_warn(text);
+        filename = fname.c_str();
+        fileFormat = NR_NII_FORMAT;
+    }
 
-   // Convert the image to the correct format if required, set the filename and save the file
-   switch(fileFormat)
-   {
-   case NR_NII_FORMAT:
-      nifti_set_filenames(image,filename,0,0);
-      nifti_image_write(image);
-      break;
-   case NR_PNG_FORMAT:
-      reg_io_writePNGfile(image,filename);
-      break;
+    // Convert the image to the correct format if required, set the filename and save the file
+    switch (fileFormat) {
+    case NR_NII_FORMAT:
+        nifti_set_filenames(image, filename, 0, 0);
+        nifti_image_write(image);
+        break;
+    case NR_PNG_FORMAT:
+        reg_io_writePNGfile(image, filename);
+        break;
 #ifdef _USE_NRRD
-   case NR_NRRD_FORMAT:
-      Nrrd *nrrdImage = reg_io_nifti2nrrd(image);
-      reg_io_writeNRRDfile(nrrdImage,filename);
-      nrrdNuke(nrrdImage);
-      break;
+    case NR_NRRD_FORMAT:
+        Nrrd *nrrdImage = reg_io_nifti2nrrd(image);
+        reg_io_writeNRRDfile(nrrdImage, filename);
+        nrrdNuke(nrrdImage);
+        break;
 #endif
-   }
-
-   // Return
-   return;
+    }
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_io_diplayImageData1(nifti_image *image)
-{
+void reg_io_displayImageData1(nifti_image *image) {
     reg_print_msg_debug("image values:");
     DataType *data = static_cast<DataType *>(image->data);
     std::string text;
 
-    size_t voxelIndex=0;
-    for(int z=0; z<image->nz; z++)
-    {
-       for(int y=0; y<image->ny; y++)
-       {
-          for(int x=0; x<image->nx; x++)
-          {
-             text = stringFormat("[%d - %d - %d] = [", x, y, z);
-             for(int tu=0;tu<image->nt*image->nu; ++tu){
-                text = stringFormat("%s%g ", text.c_str(),
-                    static_cast<double>(data[voxelIndex + tu*CalcVoxelNumber(*image)]));
-             }
-             text = stringFormat("%s]", text.c_str());
-             reg_print_msg_debug(text.c_str());
-          }
-       }
+    size_t voxelIndex = 0;
+    for (int z = 0; z < image->nz; z++) {
+        for (int y = 0; y < image->ny; y++) {
+            for (int x = 0; x < image->nx; x++) {
+                text = stringFormat("[%d - %d - %d] = [", x, y, z);
+                for (int tu = 0; tu < image->nt * image->nu; ++tu) {
+                    text = stringFormat("%s%g ", text.c_str(),
+                                        static_cast<double>(data[voxelIndex + tu * CalcVoxelNumber(*image)]));
+                }
+                text = stringFormat("%s]", text.c_str());
+                reg_print_msg_debug(text.c_str());
+            }
+        }
     }
 }
 /* *************************************************************** */
-void reg_io_diplayImageData(nifti_image *image)
-{
-    switch(image->datatype)
-    {
+void reg_io_displayImageData(nifti_image *image) {
+    switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-       reg_io_diplayImageData1<unsigned char>(image);
-       break;
+        reg_io_displayImageData1<unsigned char>(image);
+        break;
     case NIFTI_TYPE_INT8:
-       reg_io_diplayImageData1<char>(image);
-       break;
+        reg_io_displayImageData1<char>(image);
+        break;
     case NIFTI_TYPE_UINT16:
-       reg_io_diplayImageData1<unsigned short>(image);
-       break;
+        reg_io_displayImageData1<unsigned short>(image);
+        break;
     case NIFTI_TYPE_INT16:
-       reg_io_diplayImageData1<short>(image);
-       break;
+        reg_io_displayImageData1<short>(image);
+        break;
     case NIFTI_TYPE_UINT32:
-       reg_io_diplayImageData1<unsigned int>(image);
-       break;
+        reg_io_displayImageData1<unsigned int>(image);
+        break;
     case NIFTI_TYPE_INT32:
-       reg_io_diplayImageData1<int>(image);
-       break;
+        reg_io_displayImageData1<int>(image);
+        break;
     case NIFTI_TYPE_FLOAT32:
-       reg_io_diplayImageData1<float>(image);
-       break;
+        reg_io_displayImageData1<float>(image);
+        break;
     case NIFTI_TYPE_FLOAT64:
-       reg_io_diplayImageData1<double>(image);
-       break;
+        reg_io_displayImageData1<double>(image);
+        break;
     default:
-       reg_print_fct_error("reg_io_diplayImageData");
-       reg_print_msg_error("Unsupported datatype");
-       reg_exit();
+        reg_print_fct_error("reg_io_displayImageData");
+        reg_print_msg_error("Unsupported datatype");
+        reg_exit();
     }
-   return;
 }
 /* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index 0b1b6d98..1c39bfdb 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -38,7 +38,7 @@
   * @param filename Filename of the input images
   * @return Code, NIFTYREG_FILEFORMAT_TYPE,  that encode the file format
   */
-int reg_io_checkFileFormat(const char *filename);
+int reg_io_checkFileFormat(const std::string& filename);
 /* *************************************************************** */
 /** The function expects a filename and returns a nifti_image structure
   * The function will use to correct library and will return a NULL image
@@ -69,5 +69,5 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename);
   * The image will be displayed on the standard output
   * @param Nifti image to be displayed
   */
-void reg_io_diplayImageData(nifti_image *image);
+void reg_io_displayImageData(nifti_image *image);
 /* *************************************************************** */

From 43686abf171807a92d558fee848d396c1f45e0b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 14:41:40 +0000
Subject: [PATCH 079/314] Refactorisations

---
 niftyreg_build_version.txt                   |   2 +-
 reg-apps/reg_aladin.cpp                      |   4 +-
 reg-io/_reg_ReadWriteMatrix.cpp              | 194 ++++++++-----------
 reg-io/_reg_ReadWriteMatrix.h                |  10 +-
 reg-io/niftilib/nifti1.h                     |   2 +-
 reg-lib/_reg_aladin.h                        |   2 +-
 reg-lib/cpu/_reg_blockMatching.cpp           |   2 +-
 reg-lib/cpu/_reg_discrete_init.cpp           |   2 +-
 reg-lib/cpu/_reg_discrete_init.h             |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp              |  21 +-
 reg-lib/cpu/_reg_localTrans.h                |   4 +-
 reg-lib/cpu/_reg_mrf.cpp                     |   4 +-
 reg-lib/cpu/_reg_mrf.h                       |   2 +-
 reg-lib/cpu/_reg_tools.cpp                   |   2 +-
 reg-test/reg_test_affineDeformationField.cpp |   4 +-
 reg-test/reg_test_common.h                   |   7 +-
 reg-test/reg_test_interpolation.cpp          |  10 +-
 17 files changed, 115 insertions(+), 159 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 86a03071..2455a46a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-192
+193
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index cfd6a6a2..6ba851a0 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -31,7 +31,6 @@ void PetitUsage(char *exec) {
     reg_print_msg_error(text);
     reg_print_msg_error("\tSee the help for more details (-h).");
     reg_print_msg_error("");
-    return;
 }
 
 void Usage(char *exec) {
@@ -57,7 +56,7 @@ void Usage(char *exec) {
 
     reg_print_info(exec, "\t-rmask <filename>\tFilename of a mask image in the reference space.");
     reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space. (Only used when symmetric turned on)");
-    reg_print_info(exec, "\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii]");
+    reg_print_info(exec, "\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii.gz]");
 
     reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]");
     reg_print_info(exec, "\t-ln <int>\t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]");
@@ -113,7 +112,6 @@ void Usage(char *exec) {
     sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
     reg_print_info(exec, text);
     reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    return;
 }
 
 int main(int argc, char **argv) {
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 524abc72..6aef5626 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -1,35 +1,29 @@
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_maths.h"
-//STD
 #include <string>
-/* *************************************************************** */
+
 /* *************************************************************** */
 void reg_tool_ReadAffineFile(mat44 *mat,
                              nifti_image *referenceImage,
                              nifti_image *floatingImage,
                              char *fileName,
-                             bool flirtFile)
-{
+                             bool flirtFile) {
     std::ifstream affineFile;
     affineFile.open(fileName);
-    if(affineFile.is_open())
-    {
-        int i=0;
-        double value1,value2,value3,value4;
-        while(!affineFile.eof())
-        {
+    if (affineFile.is_open()) {
+        int i = 0;
+        double value1, value2, value3, value4;
+        while (!affineFile.eof()) {
             affineFile >> value1 >> value2 >> value3 >> value4;
-            mat->m[i][0] = (float) value1;
-            mat->m[i][1] = (float) value2;
-            mat->m[i][2] = (float) value3;
-            mat->m[i][3] = (float) value4;
+            mat->m[i][0] = (float)value1;
+            mat->m[i][1] = (float)value2;
+            mat->m[i][2] = (float)value3;
+            mat->m[i][3] = (float)value4;
             i++;
-            if(i>3) break;
+            if (i > 3) break;
         }
-    }
-    else
-    {
-        char text[255];sprintf(text, "The affine file can not be read: %s", fileName);
+    } else {
+        char text[255]; sprintf(text, "The affine file can not be read: %s", fileName);
         reg_print_fct_error("reg_tool_ReadAffineFile");
         reg_print_msg_error(text);
         reg_exit();
@@ -40,48 +34,40 @@ void reg_tool_ReadAffineFile(mat44 *mat,
     reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Read affine transformation");
 #endif
 
-    if(flirtFile)
-    {
+    if (flirtFile) {
         mat44 absoluteReference;
         mat44 absoluteFloating;
-        for(int i=0; i<4; i++)
-        {
-            for(int j=0; j<4; j++)
-            {
-                absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0;
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++) {
+                absoluteReference.m[i][j] = absoluteFloating.m[i][j] = 0;
             }
         }
         //If the reference sform is defined, it is used; qform otherwise;
         mat44 *referenceMatrix;
-        if(referenceImage->sform_code > 0)
-        {
+        if (referenceImage->sform_code > 0) {
             referenceMatrix = &(referenceImage->sto_xyz);
 #ifndef NDEBUG
             reg_print_msg_debug("The reference sform matrix is defined and used");
 #endif
-        }
-        else referenceMatrix = &(referenceImage->qto_xyz);
+        } else referenceMatrix = &(referenceImage->qto_xyz);
         //If the floating sform is defined, it is used; qform otherwise;
         mat44 *floatingMatrix;
-        if(floatingImage->sform_code > 0)
-        {
+        if (floatingImage->sform_code > 0) {
 #ifndef NDEBUG
             reg_print_msg_debug(" The floating sform matrix is defined and used");
 #endif
             floatingMatrix = &(floatingImage->sto_xyz);
-        }
-        else floatingMatrix = &(floatingImage->qto_xyz);
+        } else floatingMatrix = &(floatingImage->qto_xyz);
 
-        for(int i=0; i<3; i++)
-        {
-            absoluteReference.m[i][i]=sqrt(referenceMatrix->m[0][i]*referenceMatrix->m[0][i]
-                    + referenceMatrix->m[1][i]*referenceMatrix->m[1][i]
-                    + referenceMatrix->m[2][i]*referenceMatrix->m[2][i]);
-            absoluteFloating.m[i][i]=sqrt(floatingMatrix->m[0][i]*floatingMatrix->m[0][i]
-                    + floatingMatrix->m[1][i]*floatingMatrix->m[1][i]
-                    + floatingMatrix->m[2][i]*floatingMatrix->m[2][i]);
+        for (int i = 0; i < 3; i++) {
+            absoluteReference.m[i][i] = sqrt(referenceMatrix->m[0][i] * referenceMatrix->m[0][i]
+                                             + referenceMatrix->m[1][i] * referenceMatrix->m[1][i]
+                                             + referenceMatrix->m[2][i] * referenceMatrix->m[2][i]);
+            absoluteFloating.m[i][i] = sqrt(floatingMatrix->m[0][i] * floatingMatrix->m[0][i]
+                                            + floatingMatrix->m[1][i] * floatingMatrix->m[1][i]
+                                            + floatingMatrix->m[2][i] * floatingMatrix->m[2][i]);
         }
-        absoluteReference.m[3][3]=absoluteFloating.m[3][3]=1.0;
+        absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0;
 #ifndef NDEBUG
         reg_print_msg_debug("An flirt affine file is assumed and is converted to a real word affine matrix");
         reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Matrix read from the input file");
@@ -94,9 +80,9 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         absoluteFloating = nifti_mat44_inverse(absoluteFloating);
         *mat = nifti_mat44_inverse(*mat);
 
-        *mat = reg_mat44_mul(&absoluteFloating,mat);
+        *mat = reg_mat44_mul(&absoluteFloating, mat);
         *mat = reg_mat44_mul(mat, &absoluteReference);
-        *mat = reg_mat44_mul(floatingMatrix,mat);
+        *mat = reg_mat44_mul(floatingMatrix, mat);
         mat44 tmp = nifti_mat44_inverse(*referenceMatrix);
         *mat = reg_mat44_mul(mat, &tmp);
     }
@@ -106,40 +92,33 @@ void reg_tool_ReadAffineFile(mat44 *mat,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_tool_ReadAffineFile(mat44 *mat,
-                             char *fileName)
-{
+void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
     std::ifstream affineFile;
     affineFile.open(fileName);
-    if(affineFile.is_open())
-    {
-        int i=0;
-        double value1,value2,value3,value4;
+    if (affineFile.is_open()) {
+        int i = 0;
+        double value1, value2, value3, value4;
 #ifndef NDEBUG
         char text_header[255];
         sprintf(text_header, "Affine matrix values:");
         reg_print_msg_debug(text_header);
 #endif
-        while(!affineFile.eof())
-        {
+        while (!affineFile.eof()) {
             affineFile >> value1 >> value2 >> value3 >> value4;
 #ifndef NDEBUG
             char text[255];
             sprintf(text, "%f - %f - %f - %f", value1, value2, value3, value4);
             reg_print_msg_debug(text);
 #endif
-            mat->m[i][0] = (float) value1;
-            mat->m[i][1] = (float) value2;
-            mat->m[i][2] = (float) value3;
-            mat->m[i][3] = (float) value4;
+            mat->m[i][0] = (float)value1;
+            mat->m[i][1] = (float)value2;
+            mat->m[i][2] = (float)value3;
+            mat->m[i][3] = (float)value4;
             i++;
-            if(i>3) break;
+            if (i > 3) break;
         }
-    }
-    else
-    {
-        char text[255];sprintf(text, "The affine file can not be read: %s", fileName);
+    } else {
+        char text[255]; sprintf(text, "The affine file can not be read: %s", fileName);
         reg_print_fct_error("reg_tool_ReadAffineFile");
         reg_print_msg_error(text);
         reg_exit();
@@ -147,22 +126,15 @@ void reg_tool_ReadAffineFile(mat44 *mat,
     affineFile.close();
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_tool_WriteAffineFile(mat44 *mat,
-                              const char *fileName)
-{
+void reg_tool_WriteAffineFile(mat44 *mat, const char *fileName) {
     FILE *affineFile;
-    affineFile=fopen(fileName, "w");
-    for(int i=0; i<4; i++)
+    affineFile = fopen(fileName, "w");
+    for (int i = 0; i < 4; i++)
         fprintf(affineFile, "%.7g %.7g %.7g %.7g\n", mat->m[i][0], mat->m[i][1], mat->m[i][2], mat->m[i][3]);
     fclose(affineFile);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename)
-{
+std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename) {
     //FIRST LET'S DETERMINE THE NUMBER OF LINE AND COLUMN
     std::string line;
     std::ifstream matrixFile(filename);
@@ -187,42 +159,36 @@ std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename)
         }
         //
         matrixFile.close();
-    }
-    else {
+    } else {
         char text[255];
         sprintf(text, "The file can not be read: %s", filename);
         reg_print_fct_error("reg_tool_ReadMatrixFile");
         reg_print_msg_error(text);
         reg_exit();
     }
-    std::pair <size_t, size_t> result(nbLine, nbColumn);
-    return result;
+    return { nbLine, nbColumn };
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
-void reg_tool_WriteMatrixFile(char *filename, T **mat, size_t nbLine, size_t nbColumn)
-{
-   // Create a file
-   std::ofstream outFile;
-   outFile.open(filename);
-   // Loop over all values
-   for(size_t l=0;l<nbLine;++l){
-      for(size_t c=0;c<nbColumn;++c){
-         outFile << mat[l][c] << " ";
-      }
-      outFile << "\n";
-   }
-   // Close the opened file
-   outFile.close();
+void reg_tool_WriteMatrixFile(char *filename, T **mat, size_t nbLine, size_t nbColumn) {
+    // Create a file
+    std::ofstream outFile;
+    outFile.open(filename);
+    // Loop over all values
+    for (size_t l = 0; l < nbLine; ++l) {
+        for (size_t c = 0; c < nbColumn; ++c) {
+            outFile << mat[l][c] << " ";
+        }
+        outFile << "\n";
+    }
+    // Close the opened file
+    outFile.close();
 }
-template void reg_tool_WriteMatrixFile<float>(char *, float **, size_t , size_t);
-template void reg_tool_WriteMatrixFile<double>(char *, double **, size_t , size_t);
-/* *************************************************************** */
+template void reg_tool_WriteMatrixFile<float>(char *, float **, size_t, size_t);
+template void reg_tool_WriteMatrixFile<double>(char *, double **, size_t, size_t);
 /* *************************************************************** */
 template<class T>
-T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn)
-{
+T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) {
     //THEN CONSTRUCT THE MATRIX
     // Allocate the matrices
     T** mat = reg_matrix2DAllocate<T>(nbLine, nbColumn);
@@ -232,14 +198,12 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn)
     double currentValue = 0;
     if (matrixFile.is_open()) {
         int j = 0;
-        while (std::getline(matrixFile, line))
-        {
+        while (std::getline(matrixFile, line)) {
             std::string delimiter = " ";
             int i = 0;
             size_t pos = 0;
             std::string token;
-            while ((pos = line.find(delimiter)) != std::string::npos)
-            {
+            while ((pos = line.find(delimiter)) != std::string::npos) {
                 token = line.substr(0, pos);
                 currentValue = atof(token.c_str());
                 mat[j][i] = currentValue;
@@ -251,24 +215,20 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn)
             j++;
         }
         matrixFile.close();
-    }
-    else
-    {
+    } else {
         char text[255];
         sprintf(text, "The matrix file can not be read: %s", filename);
         reg_print_fct_error("reg_tool_ReadMatrixFile");
         reg_print_msg_error(text);
         reg_exit();
     }
-    //
+
     return mat;
 }
 template float** reg_tool_ReadMatrixFile<float>(char *filename, size_t nbLine, size_t nbColumn);
 template double** reg_tool_ReadMatrixFile<double>(char *filename, size_t nbLine, size_t nbColumn);
 /* *************************************************************** */
-/* *************************************************************** */
-mat44* reg_tool_ReadMat44File(char *fileName)
-{
+mat44* reg_tool_ReadMat44File(char *fileName) {
     mat44 *mat = (mat44 *)malloc(sizeof(mat44));
     std::ifstream matrixFile;
     matrixFile.open(fileName);
@@ -278,15 +238,14 @@ mat44* reg_tool_ReadMat44File(char *fileName)
         while (!matrixFile.eof()) {
             matrixFile >> value1 >> value2 >> value3 >> value4;
 
-            mat->m[i][0] = (float) value1;
-            mat->m[i][1] = (float) value2;
-            mat->m[i][2] = (float) value3;
-            mat->m[i][3] = (float) value4;
+            mat->m[i][0] = (float)value1;
+            mat->m[i][1] = (float)value2;
+            mat->m[i][2] = (float)value3;
+            mat->m[i][3] = (float)value4;
             i++;
-            if (i>3) break;
+            if (i > 3) break;
         }
-    }
-    else {
+    } else {
         char text[255]; sprintf(text, "The mat44 file can not be read: %s", fileName);
         reg_print_fct_error("reg_tool_ReadMat44File");
         reg_print_msg_error(text);
@@ -301,4 +260,3 @@ mat44* reg_tool_ReadMat44File(char *fileName)
     return mat;
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index ef625c74..f30d19dd 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -22,7 +22,7 @@
 /** @brief Read a text file that contains a affine transformation
  * and store it into a mat44 structure. This function can also read
  * affine parametrisation from Flirt (FSL package) and convert it
- * to a standard millimeter parametrisation
+ * to a standard millimetre parametrisation
  * @param mat Structure that will be updated with the affine
  * transformation matrix
  * @param referenceImage Reference image of the current transformation
@@ -79,8 +79,8 @@ std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename);
 * @brief Read a file that contains a m-by-n matrix and store it into
 * an appropriate structure
 * @param filename Filename of the text file that contains the matrix to read
-* @param nbLine number of line of the imput matrix
-* @param nbColumn number of column of the imput matrix
+* @param nbLine number of line of the input matrix
+* @param nbColumn number of column of the input matrix
 * @return a pointer to a 2D array that points the read matrix
 **/
 extern "C++" template <class T>
@@ -92,8 +92,8 @@ T** reg_tool_ReadMatrixFile(char *filename,
 * @brief Write a file that contains a m-by-n matrix into a text file
 * @param filename Filename of the text file to be written
 * @param mat Input matrix to be saved
-* @param nbLine number of line of the imput matrix
-* @param nbColumn number of column of the imput matrix
+* @param nbLine number of line of the input matrix
+* @param nbColumn number of column of the input matrix
 **/
 extern "C++" template <class T>
 void reg_tool_WriteMatrixFile(char *filename,
diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h
index 8a442265..49e7602b 100644
--- a/reg-io/niftilib/nifti1.h
+++ b/reg-io/niftilib/nifti1.h
@@ -1318,7 +1318,7 @@ typedef struct { unsigned char r,g,b; } rgb_byte ;
                                /** Space codes are multiples of 1. **/
                                /*! NIFTI code for meters. */
 #define NIFTI_UNITS_METER   1
-                               /*! NIFTI code for millimeters. */
+                               /*! NIFTI code for millimetres. */
 #define NIFTI_UNITS_MM      2
                                /*! NIFTI code for micrometers. */
 #define NIFTI_UNITS_MICRON  3
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 4abfcd4a..03b00116 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -54,7 +54,7 @@
  *
  * Possible improvement: Take care of anisotropic data. Right now, we specify
  * the block size, neighborhood and the step sizes in voxels and it would be
- * better to specify it in millimeters and take the voxel size into account.
+ * better to specify it in millimetres and take the voxel size into account.
  * However, it would be more efficient to calculate this once (outside this
  * module) and pass these values for each axes. For the time being, we do this
  * simple implementation.
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 8f32e33d..98b96495 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -710,7 +710,7 @@ void optimize(_reg_blockMatchingParam *params,
               mat44 *transformation_matrix,
               bool affine)
 {
-   // The block matching provide correspondences in millimeters
+   // The block matching provide correspondences in millimetres
    // in the space of the reference image. All warped image coordinates
    // are updated to be in the original warped space
    //    mat44 inverseMatrix = nifti_mat44_inverse(*transformation_matrix);
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 6e959816..8c592e3c 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -39,7 +39,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure,
       currentValue+=this->discrete_increment;
    }
 
-   // Allocate the discretised values in millimeter
+   // Allocate the discretised values in millimetre
    this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
    for(int i=0;i<this->image_dim;++i){
       this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h
index 553f6b3d..d8e1e948 100644
--- a/reg-lib/cpu/_reg_discrete_init.h
+++ b/reg-lib/cpu/_reg_discrete_init.h
@@ -59,7 +59,7 @@ class reg_discrete_init
    int image_dim; ///< Dimension of the reference image
    size_t node_number; ///< Number of nodes in the tree
 
-   float **discrete_values_mm; ///< All discretised values in millimeter
+   float **discrete_values_mm; ///< All discretised values in millimetre
 
    int label_1D_num; ///< Number of discretised values per axis
    int label_nD_num; ///< Total number of discretised values
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index ace0ff95..98e4aaeb 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -426,8 +426,7 @@ template<class DataType>
 void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                              nifti_image *deformationField,
                                              int *mask,
-                                             bool composition
-                                             )
+                                             bool composition)
 {
    int coord;
 
@@ -1750,7 +1749,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
                                        nifti_image *voxelImage,
                                        float weight,
                                        bool update,
-                                       const mat44 *voxelToMillimeter)
+                                       const mat44 *voxelToMillimetre)
 {
    const size_t nodeNumber = CalcVoxelNumber(*nodeImage);
    const size_t voxelNumber = CalcVoxelNumber(*voxelImage);
@@ -1770,7 +1769,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
 
    // The transformation between the image and the grid is used
    mat44 transformation;
-   // voxel to millimeter in the grid image
+   // voxel to millimetre in the grid image
    if(nodeImage->sform_code>0)
       transformation=nodeImage->sto_xyz;
    else transformation=nodeImage->qto_xyz;
@@ -1784,18 +1783,18 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
          transformation = reg_mat44_mul(&temp,&transformation);
       }
    }
-   // millimeter to voxel in the reference image
+   // millimetre to voxel in the reference image
    if(voxelImage->sform_code>0)
       transformation = reg_mat44_mul(&voxelImage->sto_ijk,&transformation);
    else transformation = reg_mat44_mul(&voxelImage->qto_ijk,&transformation);
 
    // The information has to be reoriented
    mat33 reorientation;
-   // Voxel to millimeter contains the orientation of the image that is used
+   // Voxel to millimetre contains the orientation of the image that is used
    // to compute the spatial gradient (floating image)
-   if(voxelToMillimeter!=nullptr)
+   if(voxelToMillimetre!=nullptr)
    {
-      reorientation=reg_mat44_to_mat33(voxelToMillimeter);
+      reorientation=reg_mat44_to_mat33(voxelToMillimetre);
       if(nodeImage->num_ext>0)
       {
          if(nodeImage->ext_list[0].edata!=nullptr)
@@ -1923,7 +1922,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
                                   bool update,
-                                  const mat44 *voxelToMillimeter)
+                                  const mat44 *voxelToMillimetre)
 {
    if(nodeImage->datatype!=voxelImage->datatype)
    {
@@ -1936,11 +1935,11 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
    {
    case NIFTI_TYPE_FLOAT32:
       reg_voxelCentric2NodeCentric_core<float>
-            (nodeImage, voxelImage, weight, update, voxelToMillimeter);
+            (nodeImage, voxelImage, weight, update, voxelToMillimetre);
       break;
    case NIFTI_TYPE_FLOAT64:
       reg_voxelCentric2NodeCentric_core<double>
-            (nodeImage, voxelImage, weight, update, voxelToMillimeter);
+            (nodeImage, voxelImage, weight, update, voxelToMillimetre);
       break;
    default:
       reg_print_fct_error("reg_voxelCentric2NodeCentric");
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index 30d1aec7..bf8e8127 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -69,7 +69,7 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
                                     bool force_no_lut = false);
 /* *************************************************************** */
 /** @brief Upsample an image from voxel space to node space using
- * millimiter correspendences.
+ * millimetre correspondences.
  * @param nodeImage This image is a coarse representation of the
  * transformation (typically a grid of control point). This image
  * values are going to be updated
@@ -85,7 +85,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
                                   bool update,
-                                  const mat44 *voxelToMillimeter = nullptr);
+                                  const mat44 *voxelToMillimetre = nullptr);
 /* *************************************************************** */
 /** @brief Refine a grid of control points
  * @param referenceImage Image that defined the space of the reference
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index b92118d1..eb75940c 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -24,7 +24,7 @@ reg_mrf::reg_mrf(int _discrete_radius,
     this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
     this->node_number = _node_number;
 
-    // Allocate the discretised values in millimeter
+    // Allocate the discretised values in millimetre
     this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
     for(int i=0;i<this->image_dim;++i){
         this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
@@ -71,7 +71,7 @@ reg_mrf::reg_mrf(reg_measure *_measure,
       currentValue+=this->discrete_increment;
    }
 
-   // Allocate the discretised values in millimeter
+   // Allocate the discretised values in millimetre
    this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
    for(int i=0;i<this->image_dim;++i){
        this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h
index e6584ce4..75a91ea4 100644
--- a/reg-lib/cpu/_reg_mrf.h
+++ b/reg-lib/cpu/_reg_mrf.h
@@ -85,7 +85,7 @@ class reg_mrf
    int image_dim; ///< Dimension of the reference image
    size_t node_number; ///< Number of nodes in the tree
 
-   float **discrete_values_mm; ///< All discretised values in millimeter
+   float **discrete_values_mm; ///< All discretised values in millimetre
 
    int* orderedList; ///< Ordered list of nodes from the root to the leaves
    int* parentsList; ///< List that gives parent's index for each node
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index b7bec647..0c6dca62 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -50,7 +50,7 @@ void reg_checkAndCorrectDimension(nifti_image *image) {
                                                 image->qfac);
         image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
     }
-    // Set the voxel spacing to millimeters
+    // Set the voxel spacing to millimetres
     if (image->xyz_units == NIFTI_UNITS_MICRON) {
         for (int d = 1; d <= image->ndim; ++d)
             image->pixdim[d] /= 1000.f;
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index 9285e8bd..1c0ddc96 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -28,7 +28,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     reg_checkAndCorrectDimension(reference3d);
 
     // Generate the different test cases
-    std::vector<TestData> testCases;
+    vector<TestData> testCases;
 
     // Identity use case - 2D
     mat44 identity;
@@ -149,7 +149,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase;
 
         // Accumulate all required contents with a vector
-        std::vector<ContentDesc> contentDescs;
+        vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
             unique_ptr<Platform> platform{ new Platform(platformType) };
             unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index a1b53590..bfe326f8 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -1,6 +1,7 @@
 // Enable testing
 #define NR_TESTING
 
+#include <random>
 #include <catch2/catch_test_macros.hpp>
 #include "_reg_localTrans.h"
 #include "Platform.h"
@@ -9,7 +10,7 @@
 
 
 template <typename T>
-void interpCubicSplineKernel(T relative, T (&basis)[4]) {
+void InterpCubicSplineKernel(T relative, T (&basis)[4]) {
     if (relative < 0) relative = 0; //reg_rounding error
     const T relative2 = relative * relative;
     basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f;
@@ -19,8 +20,8 @@ void interpCubicSplineKernel(T relative, T (&basis)[4]) {
 }
 
 template <typename T>
-void interpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
-    interpCubicSplineKernel(relative, basis);
+void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
+    InterpCubicSplineKernel(relative, basis);
     if (relative < 0) relative = 0; //reg_rounding error
     const T relative2 = relative * relative;
     derivative[0] = (4.f * relative - 3.f * relative2 - 1.f) / 2.f;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index a00f9b9e..91cdb08f 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -69,7 +69,7 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     def3dPtr[2] = 1.4f;
 
     // Generate the different test cases
-    std::vector<TestData> testCases;
+    vector<TestData> testCases;
 
     // Linear interpolation - 2D
     // coordinate in image: [1.2, 1.3]
@@ -108,8 +108,8 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     // coordinate in image: [1.2, 1.3]
     float resCubic2d[1] = {0};
     float xBasis[4], yBasis[4];
-    interpCubicSplineKernel(0.2f, xBasis);
-    interpCubicSplineKernel(0.3f, yBasis);
+    InterpCubicSplineKernel(0.2f, xBasis);
+    InterpCubicSplineKernel(0.3f, yBasis);
     for (int y = 0; y <= 3; ++y) {
         for (int x = 0; x <= 3; ++x) {
             resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y];
@@ -165,7 +165,7 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     // coordinate in image: [1.2, 1.3, 1.4]
     float resCubic3d[1] = {0};
     float zBasis[4];
-    interpCubicSplineKernel(0.4f, zBasis);
+    InterpCubicSplineKernel(0.4f, zBasis);
     for (int z = 0; z <= 3; ++z) {
         for (int y = 0; y <= 3; ++y) {
             for (int x = 0; x <= 3; ++x) {
@@ -189,7 +189,7 @@ TEST_CASE("Interpolation", "[Interpolation]") {
         auto&& [testName, reference, defField, interp, testResult] = testCase;
 
         // Accumulate all required contents with a vector
-        std::vector<ContentDesc> contentDescs;
+        vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
             shared_ptr<Platform> platform{ new Platform(platformType) };
             // Add Aladin content

From 61de02332ee1f90770181f2289ad74d9cbac6114 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 14:54:49 +0000
Subject: [PATCH 080/314] Refactor reg_createControlPointGrid() using automatic
 memory management

---
 niftyreg_build_version.txt      |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp | 167 +++++++++++++++-----------------
 reg-lib/cpu/_reg_localTrans.h   |   8 +-
 3 files changed, 85 insertions(+), 92 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2455a46a..205a12b5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-193
+194
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 98e4aaeb..45b66e64 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -17,75 +17,68 @@
 /* *************************************************************** */
 /* *************************************************************** */
 template <class DataType>
-void reg_createControlPointGrid(nifti_image **controlPointGridImage,
-                                nifti_image *referenceImage,
-                                float *spacingMillimeter)
+void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
+                                const NiftiImage& referenceImage,
+                                const float *spacing)
 {
    // Define the control point grid dimension
-   int dim_cpp[8];
-   dim_cpp[0]=5;
-   dim_cpp[1]=static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx/spacingMillimeter[0])+3.f);
-   dim_cpp[2]=static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy/spacingMillimeter[1])+3.f);
-   dim_cpp[3]=1;
-   dim_cpp[5]=2;
-   if(referenceImage->nz>1)
-   {
-      dim_cpp[3]=static_cast<int>(reg_ceil(referenceImage->nz*referenceImage->dz/spacingMillimeter[2])+3.f);
-      dim_cpp[5]=3;
-   }
-   dim_cpp[4]=dim_cpp[6]=dim_cpp[7]=1;
+   vector<NiftiImage::dim_t> dims{
+      static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx / spacing[0]) + 3.f),
+      static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy / spacing[1]) + 3.f),
+      referenceImage->nz > 1 ? static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1,
+      1,
+      referenceImage->nz > 1 ? 3 : 2
+   };
 
    // Create the new control point grid image and allocate its space
-   if(sizeof(DataType)==4)
-      *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT32, true);
-   else *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT64, true);
+   controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
 
    // Fill the header information
-   (*controlPointGridImage)->cal_min=0;
-   (*controlPointGridImage)->cal_max=0;
-   (*controlPointGridImage)->pixdim[0]=1.0f;
-   (*controlPointGridImage)->pixdim[1]=(*controlPointGridImage)->dx=spacingMillimeter[0];
-   (*controlPointGridImage)->pixdim[2]=(*controlPointGridImage)->dy=spacingMillimeter[1];
+   controlPointGridImage->cal_min=0;
+   controlPointGridImage->cal_max=0;
+   controlPointGridImage->pixdim[0]=1.0f;
+   controlPointGridImage->pixdim[1]=controlPointGridImage->dx=spacing[0];
+   controlPointGridImage->pixdim[2]=controlPointGridImage->dy=spacing[1];
    if(referenceImage->nz==1)
    {
-      (*controlPointGridImage)->pixdim[3]=(*controlPointGridImage)->dz=1.0f;
+      controlPointGridImage->pixdim[3]=controlPointGridImage->dz=1.0f;
    }
-   else (*controlPointGridImage)->pixdim[3]=(*controlPointGridImage)->dz=spacingMillimeter[2];
-   (*controlPointGridImage)->pixdim[4]=(*controlPointGridImage)->dt=1.0f;
-   (*controlPointGridImage)->pixdim[5]=(*controlPointGridImage)->du=1.0f;
-   (*controlPointGridImage)->pixdim[6]=(*controlPointGridImage)->dv=1.0f;
-   (*controlPointGridImage)->pixdim[7]=(*controlPointGridImage)->dw=1.0f;
+   else controlPointGridImage->pixdim[3]=controlPointGridImage->dz=spacing[2];
+   controlPointGridImage->pixdim[4]=controlPointGridImage->dt=1.0f;
+   controlPointGridImage->pixdim[5]=controlPointGridImage->du=1.0f;
+   controlPointGridImage->pixdim[6]=controlPointGridImage->dv=1.0f;
+   controlPointGridImage->pixdim[7]=controlPointGridImage->dw=1.0f;
 
    // Reproduce the orientation of the reference image and add a one voxel shift
    if(referenceImage->qform_code+referenceImage->sform_code>0)
    {
-      (*controlPointGridImage)->qform_code=referenceImage->qform_code;
-      (*controlPointGridImage)->sform_code=referenceImage->sform_code;
+      controlPointGridImage->qform_code=referenceImage->qform_code;
+      controlPointGridImage->sform_code=referenceImage->sform_code;
    }
    else
    {
-      (*controlPointGridImage)->qform_code=1;
-      (*controlPointGridImage)->sform_code=0;
+      controlPointGridImage->qform_code=1;
+      controlPointGridImage->sform_code=0;
    }
 
    // The qform (and sform) are set for the control point position image
-   (*controlPointGridImage)->quatern_b=referenceImage->quatern_b;
-   (*controlPointGridImage)->quatern_c=referenceImage->quatern_c;
-   (*controlPointGridImage)->quatern_d=referenceImage->quatern_d;
-   (*controlPointGridImage)->qoffset_x=referenceImage->qoffset_x;
-   (*controlPointGridImage)->qoffset_y=referenceImage->qoffset_y;
-   (*controlPointGridImage)->qoffset_z=referenceImage->qoffset_z;
-   (*controlPointGridImage)->qfac=referenceImage->qfac;
-   (*controlPointGridImage)->qto_xyz = nifti_quatern_to_mat44((*controlPointGridImage)->quatern_b,
-                                                              (*controlPointGridImage)->quatern_c,
-                                                              (*controlPointGridImage)->quatern_d,
-                                                              (*controlPointGridImage)->qoffset_x,
-                                                              (*controlPointGridImage)->qoffset_y,
-                                                              (*controlPointGridImage)->qoffset_z,
-                                                              (*controlPointGridImage)->dx,
-                                                              (*controlPointGridImage)->dy,
-                                                              (*controlPointGridImage)->dz,
-                                                              (*controlPointGridImage)->qfac);
+   controlPointGridImage->quatern_b=referenceImage->quatern_b;
+   controlPointGridImage->quatern_c=referenceImage->quatern_c;
+   controlPointGridImage->quatern_d=referenceImage->quatern_d;
+   controlPointGridImage->qoffset_x=referenceImage->qoffset_x;
+   controlPointGridImage->qoffset_y=referenceImage->qoffset_y;
+   controlPointGridImage->qoffset_z=referenceImage->qoffset_z;
+   controlPointGridImage->qfac=referenceImage->qfac;
+   controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b,
+                                                           controlPointGridImage->quatern_c,
+                                                           controlPointGridImage->quatern_d,
+                                                           controlPointGridImage->qoffset_x,
+                                                           controlPointGridImage->qoffset_y,
+                                                           controlPointGridImage->qoffset_z,
+                                                           controlPointGridImage->dx,
+                                                           controlPointGridImage->dy,
+                                                           controlPointGridImage->dz,
+                                                           controlPointGridImage->qfac);
 
    // Origin is shifted from 1 control point in the qform
    float originIndex[3];
@@ -94,53 +87,53 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage,
    originIndex[1] = -1.0f;
    originIndex[2] = 0.0f;
    if(referenceImage->nz>1) originIndex[2] = -1.0f;
-   reg_mat44_mul(&((*controlPointGridImage)->qto_xyz), originIndex, originReal);
-   (*controlPointGridImage)->qto_xyz.m[0][3] = (*controlPointGridImage)->qoffset_x = originReal[0];
-   (*controlPointGridImage)->qto_xyz.m[1][3] = (*controlPointGridImage)->qoffset_y = originReal[1];
-   (*controlPointGridImage)->qto_xyz.m[2][3] = (*controlPointGridImage)->qoffset_z = originReal[2];
+   reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal);
+   controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0];
+   controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1];
+   controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2];
 
-   (*controlPointGridImage)->qto_ijk = nifti_mat44_inverse((*controlPointGridImage)->qto_xyz);
+   controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz);
 
    // Update the sform if required
-   if((*controlPointGridImage)->sform_code>0)
+   if(controlPointGridImage->sform_code>0)
    {
       float scalingRatio[3];
-      scalingRatio[0]= (*controlPointGridImage)->dx / referenceImage->dx;
-      scalingRatio[1]= (*controlPointGridImage)->dy / referenceImage->dy;
-      scalingRatio[2]= (*controlPointGridImage)->dz / referenceImage->dz;
-
-      (*controlPointGridImage)->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
-      (*controlPointGridImage)->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
-      (*controlPointGridImage)->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
-      (*controlPointGridImage)->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0];
-      (*controlPointGridImage)->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
-      (*controlPointGridImage)->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
-      (*controlPointGridImage)->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
-      (*controlPointGridImage)->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1];
-      (*controlPointGridImage)->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
-      (*controlPointGridImage)->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
-      (*controlPointGridImage)->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
-      (*controlPointGridImage)->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2];
-      (*controlPointGridImage)->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3];
-      (*controlPointGridImage)->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3];
-      (*controlPointGridImage)->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3];
-      (*controlPointGridImage)->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3];
+      scalingRatio[0]= controlPointGridImage->dx / referenceImage->dx;
+      scalingRatio[1]= controlPointGridImage->dy / referenceImage->dy;
+      scalingRatio[2]= controlPointGridImage->dz / referenceImage->dz;
+
+      controlPointGridImage->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
+      controlPointGridImage->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
+      controlPointGridImage->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
+      controlPointGridImage->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0];
+      controlPointGridImage->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
+      controlPointGridImage->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
+      controlPointGridImage->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
+      controlPointGridImage->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1];
+      controlPointGridImage->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
+      controlPointGridImage->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
+      controlPointGridImage->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
+      controlPointGridImage->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2];
+      controlPointGridImage->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3];
+      controlPointGridImage->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3];
+      controlPointGridImage->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3];
+      controlPointGridImage->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3];
 
       // Origin is shifted from 1 control point in the sform
-      reg_mat44_mul(&((*controlPointGridImage)->sto_xyz), originIndex, originReal);
-      (*controlPointGridImage)->sto_xyz.m[0][3] = originReal[0];
-      (*controlPointGridImage)->sto_xyz.m[1][3] = originReal[1];
-      (*controlPointGridImage)->sto_xyz.m[2][3] = originReal[2];
-      (*controlPointGridImage)->sto_ijk = nifti_mat44_inverse((*controlPointGridImage)->sto_xyz);
+      reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal);
+      controlPointGridImage->sto_xyz.m[0][3] = originReal[0];
+      controlPointGridImage->sto_xyz.m[1][3] = originReal[1];
+      controlPointGridImage->sto_xyz.m[2][3] = originReal[2];
+      controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz);
    }
 
-   (*controlPointGridImage)->intent_code=NIFTI_INTENT_VECTOR;
-   memset((*controlPointGridImage)->intent_name, 0, 16);
-   strcpy((*controlPointGridImage)->intent_name,"NREG_TRANS");
-   (*controlPointGridImage)->intent_p1=CUB_SPLINE_GRID;
+   controlPointGridImage->intent_code=NIFTI_INTENT_VECTOR;
+   memset(controlPointGridImage->intent_name, 0, 16);
+   strcpy(controlPointGridImage->intent_name,"NREG_TRANS");
+   controlPointGridImage->intent_p1=CUB_SPLINE_GRID;
 }
-template void reg_createControlPointGrid<float>(nifti_image **, nifti_image *, float *);
-template void reg_createControlPointGrid<double>(nifti_image **, nifti_image *, float *);
+template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&, const float*);
+template void reg_createControlPointGrid<double>(NiftiImage&, const NiftiImage&, const float*);
 /* *************************************************************** */
 template <class DataType>
 void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index bf8e8127..dbfae801 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -33,12 +33,12 @@
  * store in this pointer
  * @param referenceImage Reference image which dimension will be used to
  * define the control point grid image space
- * @param spacingMillimeter Control point spacing along each axis
+ * @param spacing Control point spacing along each axis
  */
 extern "C++" template <class DataType>
-void reg_createControlPointGrid(nifti_image **controlPointGridImage,
-                                nifti_image *referenceImage,
-                                float *spacingMillimeter);
+void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
+                                const NiftiImage& referenceImage,
+                                const float *spacing);
 
 extern "C++" template <class DataType>
 void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,

From 001d4982a9a35ea45a665feff3e208dedf558027 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 15:02:21 +0000
Subject: [PATCH 081/314] Refactor reg_createSymmetricControlPointGrids() using
 automatic memory management

---
 niftyreg_build_version.txt      |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp | 154 ++++++++++++++------------------
 reg-lib/cpu/_reg_localTrans.h   |  12 +--
 3 files changed, 76 insertions(+), 92 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 205a12b5..6bb2f98f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-194
+195
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 45b66e64..7a5a29fe 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -136,22 +136,15 @@ template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&,
 template void reg_createControlPointGrid<double>(NiftiImage&, const NiftiImage&, const float*);
 /* *************************************************************** */
 template <class DataType>
-void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
-                                          nifti_image **backwardGridImage,
-                                          nifti_image *referenceImage,
-                                          nifti_image *floatingImage,
-                                          mat44 *forwardAffineTrans,
-                                          float *spacing)
+void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
+                                          NiftiImage& backwardGridImage,
+                                          const NiftiImage& referenceImage,
+                                          const NiftiImage& floatingImage,
+                                          const mat44 *forwardAffineTrans,
+                                          const float *spacing)
 {
-   // Delete the grid if they are already initialised
-   if(*forwardGridImage!=nullptr)
-      nifti_image_free(*forwardGridImage);
-   *forwardGridImage=nullptr;
-   if(*backwardGridImage!=nullptr)
-      nifti_image_free(*backwardGridImage);
-   *backwardGridImage=nullptr;
    // We specified a space which is in-between both input images
-   // // Get the reference image space
+   // Get the reference image space
    mat44 referenceImageSpace = referenceImage->qto_xyz;
    if(referenceImage->sform_code>0)
       referenceImageSpace = referenceImage->sto_xyz;
@@ -307,112 +300,103 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
    }
 
    // Compute the dimension of the control point grids
-   const int dim[8]= {5,
-                      static_cast<int>(reg_ceil((maxPosition[0]-minPosition[0])/spacing[0])+3),
-                      static_cast<int>(reg_ceil((maxPosition[1]-minPosition[1])/spacing[1])+3),
-                      referenceImage->nz>1?static_cast<int>(reg_ceil((maxPosition[2]-minPosition[2])/spacing[2])+3):1,
-                      1,
-                      referenceImage->nz>1?3:2,
-                      1,
-                      1
-                     };
+   const vector<NiftiImage::dim_t> dims{
+      static_cast<int>(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3),
+      static_cast<int>(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3),
+      referenceImage->nz > 1 ? static_cast<int>(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1,
+      1,
+      referenceImage->nz > 1 ? 3 : 2
+   };
 
    // Create the control point grid image
-   if(sizeof(DataType)==sizeof(float))
-   {
-      (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true);
-      (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true);
-   }
-   else
-   {
-      (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT64,true);
-      (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT64,true);
-   }
+   forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
+   backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
+
    // Set the control point grid spacing
-   (*forwardGridImage)->pixdim[1]=(*forwardGridImage)->dx=(*backwardGridImage)->pixdim[1]=(*backwardGridImage)->dx=spacing[0];
-   (*forwardGridImage)->pixdim[2]=(*forwardGridImage)->dy=(*backwardGridImage)->pixdim[2]=(*backwardGridImage)->dy=spacing[1];
+   forwardGridImage->pixdim[1]=forwardGridImage->dx=backwardGridImage->pixdim[1]=backwardGridImage->dx=spacing[0];
+   forwardGridImage->pixdim[2]=forwardGridImage->dy=backwardGridImage->pixdim[2]=backwardGridImage->dy=spacing[1];
    if(referenceImage->nz>1)
-      (*forwardGridImage)->pixdim[3]=(*forwardGridImage)->dz=(*backwardGridImage)->pixdim[3]=(*backwardGridImage)->dz=spacing[2];
+      forwardGridImage->pixdim[3]=forwardGridImage->dz=backwardGridImage->pixdim[3]=backwardGridImage->dz=spacing[2];
    // Set the control point grid image orientation
-   (*forwardGridImage)->qform_code=(*backwardGridImage)->qform_code=0;
-   (*forwardGridImage)->sform_code=(*backwardGridImage)->sform_code=1;
-   reg_mat44_eye(&(*forwardGridImage)->sto_xyz);
-   reg_mat44_eye(&(*backwardGridImage)->sto_xyz);
-   reg_mat44_eye(&(*forwardGridImage)->sto_ijk);
-   reg_mat44_eye(&(*backwardGridImage)->sto_ijk);
+   forwardGridImage->qform_code=backwardGridImage->qform_code=0;
+   forwardGridImage->sform_code=backwardGridImage->sform_code=1;
+   reg_mat44_eye(&forwardGridImage->sto_xyz);
+   reg_mat44_eye(&backwardGridImage->sto_xyz);
+   reg_mat44_eye(&forwardGridImage->sto_ijk);
+   reg_mat44_eye(&backwardGridImage->sto_ijk);
    for(unsigned int i=0; i<3; ++i)
    {
       if(referenceImage->nz>1 || i<2)
       {
-         (*forwardGridImage)->sto_xyz.m[i][i]=(*backwardGridImage)->sto_xyz.m[i][i]=spacing[i];
-         (*forwardGridImage)->sto_xyz.m[i][3]=(*backwardGridImage)->sto_xyz.m[i][3]=minPosition[i]-spacing[i];
+         forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=spacing[i];
+         forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=minPosition[i]-spacing[i];
       }
       else
       {
-         (*forwardGridImage)->sto_xyz.m[i][i]=(*backwardGridImage)->sto_xyz.m[i][i]=1.f;
-         (*forwardGridImage)->sto_xyz.m[i][3]=(*backwardGridImage)->sto_xyz.m[i][3]=0.f;
+         forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=1.f;
+         forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=0.f;
       }
    }
-   (*forwardGridImage)->sto_ijk=(*backwardGridImage)->sto_ijk=nifti_mat44_inverse((*forwardGridImage)->sto_xyz);
+   forwardGridImage->sto_ijk=backwardGridImage->sto_ijk=nifti_mat44_inverse(forwardGridImage->sto_xyz);
    // Set the intent type
-   (*forwardGridImage)->intent_code=(*backwardGridImage)->intent_code=NIFTI_INTENT_VECTOR;
-   memset((*forwardGridImage)->intent_name, 0, 16);
-   memset((*backwardGridImage)->intent_name, 0, 16);
-   strcpy((*forwardGridImage)->intent_name,"NREG_TRANS");
-   strcpy((*backwardGridImage)->intent_name,"NREG_TRANS");
-   (*forwardGridImage)->intent_p1=(*backwardGridImage)->intent_p1=CUB_SPLINE_GRID;
+   forwardGridImage->intent_code=backwardGridImage->intent_code=NIFTI_INTENT_VECTOR;
+   memset(forwardGridImage->intent_name, 0, 16);
+   memset(backwardGridImage->intent_name, 0, 16);
+   strcpy(forwardGridImage->intent_name,"NREG_TRANS");
+   strcpy(backwardGridImage->intent_name,"NREG_TRANS");
+   forwardGridImage->intent_p1=backwardGridImage->intent_p1=CUB_SPLINE_GRID;
    // Set the affine matrices
    mat44 identity;
    reg_mat44_eye(&identity);
-   if((*forwardGridImage)->ext_list!=nullptr)
-      free((*forwardGridImage)->ext_list);
-   if((*backwardGridImage)->ext_list!=nullptr)
-      free((*backwardGridImage)->ext_list);
-   (*forwardGridImage)->num_ext=0;
-   (*backwardGridImage)->num_ext=0;
+   if(forwardGridImage->ext_list!=nullptr)
+      free(forwardGridImage->ext_list);
+   if(backwardGridImage->ext_list!=nullptr)
+      free(backwardGridImage->ext_list);
+   forwardGridImage->num_ext=0;
+   backwardGridImage->num_ext=0;
    if(identity!=halfForwardAffine && identity!=halfBackwardAffine)
    {
       // Create extensions to store the affine parametrisations for the forward transformation
-      (*forwardGridImage)->num_ext=2;
-      (*forwardGridImage)->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
-      (*forwardGridImage)->ext_list[0].esize=16*sizeof(float)+16;
-      (*forwardGridImage)->ext_list[1].esize=16*sizeof(float)+16;
-      (*forwardGridImage)->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
-      (*forwardGridImage)->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
-      (*forwardGridImage)->ext_list[0].edata=(char *)calloc((*forwardGridImage)->ext_list[0].esize-8,sizeof(float));
-      (*forwardGridImage)->ext_list[1].edata=(char *)calloc((*forwardGridImage)->ext_list[1].esize-8,sizeof(float));
-      memcpy((*forwardGridImage)->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
-      memcpy((*forwardGridImage)->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
+      forwardGridImage->num_ext=2;
+      forwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
+      forwardGridImage->ext_list[0].esize=16*sizeof(float)+16;
+      forwardGridImage->ext_list[1].esize=16*sizeof(float)+16;
+      forwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
+      forwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
+      forwardGridImage->ext_list[0].edata=(char *)calloc(forwardGridImage->ext_list[0].esize-8,sizeof(float));
+      forwardGridImage->ext_list[1].edata=(char *)calloc(forwardGridImage->ext_list[1].esize-8,sizeof(float));
+      memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
+      memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
 #ifndef NDEBUG
       reg_mat44_disp(&halfForwardAffine,(char *)"[NiftyReg DEBUG] Forward transformation half-affine");
 #endif
       // Create extensions to store the affine parametrisations for the backward transformation
-      (*backwardGridImage)->num_ext=2;
-      (*backwardGridImage)->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
-      (*backwardGridImage)->ext_list[0].esize=16*sizeof(float)+16;
-      (*backwardGridImage)->ext_list[1].esize=16*sizeof(float)+16;
-      (*backwardGridImage)->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
-      (*backwardGridImage)->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
-      (*backwardGridImage)->ext_list[0].edata=(char *)calloc((*backwardGridImage)->ext_list[0].esize-8,sizeof(float));
-      (*backwardGridImage)->ext_list[1].edata=(char *)calloc((*backwardGridImage)->ext_list[1].esize-8,sizeof(float));
-      memcpy((*backwardGridImage)->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
-      memcpy((*backwardGridImage)->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
+      backwardGridImage->num_ext=2;
+      backwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
+      backwardGridImage->ext_list[0].esize=16*sizeof(float)+16;
+      backwardGridImage->ext_list[1].esize=16*sizeof(float)+16;
+      backwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
+      backwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
+      backwardGridImage->ext_list[0].edata=(char *)calloc(backwardGridImage->ext_list[0].esize-8,sizeof(float));
+      backwardGridImage->ext_list[1].edata=(char *)calloc(backwardGridImage->ext_list[1].esize-8,sizeof(float));
+      memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
+      memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
 #ifndef NDEBUG
       reg_mat44_disp(&halfBackwardAffine,(char *)"[NiftyReg DEBUG] Backward transformation half-affine");
 #endif
    }
    // Initialise the grid with identity transformations
-   reg_tools_multiplyValueToImage(*forwardGridImage,*forwardGridImage,0.f);
-   reg_tools_multiplyValueToImage(*backwardGridImage,*backwardGridImage,0.f);
+   reg_tools_multiplyValueToImage(forwardGridImage,forwardGridImage,0.f);
+   reg_tools_multiplyValueToImage(backwardGridImage,backwardGridImage,0.f);
    // Convert the parametrisations into deformation fields
-   reg_getDeformationFromDisplacement(*forwardGridImage);
-   reg_getDeformationFromDisplacement(*backwardGridImage);
+   reg_getDeformationFromDisplacement(forwardGridImage);
+   reg_getDeformationFromDisplacement(backwardGridImage);
 }
 /* *************************************************************** */
 template void reg_createSymmetricControlPointGrids<float>
-(nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *);
+(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*);
 template void reg_createSymmetricControlPointGrids<double>
-(nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *);
+(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*);
 /* *************************************************************** */
 /* *************************************************************** */
 template<class DataType>
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index dbfae801..bff164f1 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -41,12 +41,12 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
                                 const float *spacing);
 
 extern "C++" template <class DataType>
-void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage,
-                                          nifti_image **backwardGridImage,
-                                          nifti_image *referenceImage,
-                                          nifti_image *floatingImage,
-                                          mat44 *forwardAffineTrans,
-                                          float *spacing);
+void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
+                                          NiftiImage& backwardGridImage,
+                                          const NiftiImage& referenceImage,
+                                          const NiftiImage& floatingImage,
+                                          const mat44 *forwardAffineTrans,
+                                          const float *spacing);
 /* *************************************************************** */
 /** @brief Compute a dense deformation field in the space of a reference
  * image from a grid of control point.

From 058d4e9b56aeabb9db347d04a6d1d360dbbad31c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 15:04:30 +0000
Subject: [PATCH 082/314] Refactor reg_createImagePyramid() using automatic
 memory management

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/cpu/_reg_maths.h   |  2 +-
 reg-lib/cpu/_reg_tools.cpp | 11 +++++------
 reg-lib/cpu/_reg_tools.h   | 11 +++++++----
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6bb2f98f..0f11735f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-195
+196
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 6b612905..7787e3c1 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -20,7 +20,7 @@
 #include <iostream>
 #include <vector>
 #include <stdexcept>
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 #ifdef _OPENMP
 #include <omp.h>
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 0c6dca62..27ef13db 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1825,9 +1825,9 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
 }
 /* *************************************************************** */
 template <class DataType>
-int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
+void reg_createImagePyramid(const NiftiImage& inputImage, vector<NiftiImage>& pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
-    pyramid[levelToPerform - 1] = nifti_dup(*inputImage);
+    pyramid[levelToPerform - 1] = inputImage;
     reg_tools_changeDatatype<DataType>(pyramid[levelToPerform - 1]);
     reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]);
 
@@ -1843,7 +1843,7 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid,
     // Images for each subsequent levels are allocated and downsampled if appropriate
     for (int l = levelToPerform - 2; l >= 0; l--) {
         // Allocation of the image
-        pyramid[l] = nifti_dup(*pyramid[l + 1]);
+        pyramid[l] = pyramid[l + 1];
 
         // Downsample the image if appropriate
         bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
@@ -1852,10 +1852,9 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid,
         if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DataType>(pyramid[l], 1, downsampleAxis);
     }
-    return EXIT_SUCCESS;
 }
-template int reg_createImagePyramid<float>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
-template int reg_createImagePyramid<double>(const nifti_image*, nifti_image**, unsigned int, unsigned int);
+template void reg_createImagePyramid<float>(const NiftiImage&, vector<NiftiImage>&, unsigned int, unsigned int);
+template void reg_createImagePyramid<double>(const NiftiImage&, vector<NiftiImage>&, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class DataType>
 int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index bcbe3df1..69e339e9 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -24,6 +24,9 @@
 
 using std::unique_ptr;
 using std::shared_ptr;
+using std::vector;
+using RNifti::NiftiImage;
+using RNifti::NiftiImageData;
 
 typedef enum {
     MEAN_KERNEL,
@@ -327,10 +330,10 @@ float reg_tools_getSTDValue(const nifti_image *img);
  * the registration.
  */
 extern "C++" template<class DataType>
-int reg_createImagePyramid(const nifti_image *input,
-                           nifti_image **pyramid,
-                           unsigned int levelNumber,
-                           unsigned int levelToPerform);
+void reg_createImagePyramid(const NiftiImage& input,
+                            vector<NiftiImage>& pyramid,
+                            unsigned int levelNumber,
+                            unsigned int levelToPerform);
 /* *************************************************************** */
 /** @brief Generate a pyramid from an input mask image.
  * @param input Input image to be downsampled to create the pyramid

From 0c1e715f019f87036f1eebd6338968ea7b288f7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 15:06:15 +0000
Subject: [PATCH 083/314] Refactor reg_createMaskPyramid() using automatic
 memory management

---
 niftyreg_build_version.txt |  2 +-
 reg-apps/reg_measure.cpp   | 45 ++++++++++++++++----------------------
 reg-lib/cpu/_reg_tools.cpp | 28 ++++++++++--------------
 reg-lib/cpu/_reg_tools.h   |  8 +++----
 4 files changed, 36 insertions(+), 47 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0f11735f..53816522 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-196
+197
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index 10380334..97a127fc 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -215,40 +215,38 @@ int main(int argc, char **argv)
    }
 
    /* Read the reference image */
-   nifti_image *refImage = reg_io_ReadImageFile(param->refImageName);
-   if(refImage == nullptr)
+   NiftiImage refImage = reg_io_ReadImageFile(param->refImageName);
+   if(!refImage)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
-              param->refImageName);
+      fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->refImageName);
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(refImage);
 
    /* Read the floating image */
-   nifti_image *floImage = reg_io_ReadImageFile(param->floImageName);
-   if(floImage == nullptr)
+   NiftiImage floImage = reg_io_ReadImageFile(param->floImageName);
+   if(!floImage)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
-              param->floImageName);
+      fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floImageName);
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(floImage);
 
    /* Read and create the mask array */
-   int *refMask=nullptr;
-   size_t refMaskVoxNumber = CalcVoxelNumber(*refImage);
+   vector<unique_ptr<int[]>> refMasks(1);
+   unique_ptr<int[]>& refMask = refMasks[0];
+   size_t refMaskVoxNumber = refImage.nVoxelsPerVolume();
    if(flag->refMaskImageFlag){
-      nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName);
-      if(refMaskImage == nullptr)
+      NiftiImage refMaskImage = reg_io_ReadImageFile(param->refMaskImageName);
+      if(!refMaskImage)
       {
-         fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n",
-                 param->refMaskImageName);
+         fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", param->refMaskImageName);
          return EXIT_FAILURE;
       }
-      reg_createMaskPyramid<float>(refMaskImage, &refMask, 1, 1);
+      reg_createMaskPyramid<float>(refMaskImage, refMasks, 1, 1);
    }
    else{
-      refMask = (int *)calloc(refMaskVoxNumber,sizeof(int));
+      refMask = unique_ptr<int[]>(new int[refMaskVoxNumber]());
       for(size_t i=0;i<refMaskVoxNumber;++i) refMask[i]=i;
    }
 
@@ -285,7 +283,7 @@ int main(int argc, char **argv)
    reg_resampleImage(floImage,
                      warpedFloImage,
                      defField,
-                     refMask,
+                     refMask.get(),
                      param->interpolation,
                      param->paddingValue);
    nifti_image_free(defField);
@@ -338,7 +336,7 @@ int main(int argc, char **argv)
          lncc_object->SetTimepointWeight(i,1.0);
       lncc_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
-                                    refMask,
+                                    refMask.get(),
                                     warpedFloImage,
                                     nullptr,
                                     nullptr);
@@ -355,7 +353,7 @@ int main(int argc, char **argv)
         nmi_object->SetTimepointWeight(i, 1.0);
       nmi_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
-                                    refMask,
+                                    refMask.get(),
                                     warpedFloImage,
                                     nullptr,
                                     nullptr);
@@ -372,7 +370,7 @@ int main(int argc, char **argv)
         ssd_object->SetTimepointWeight(i, 1.0);
       ssd_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
-                                    refMask,
+                                    refMask.get(),
                                     warpedFloImage,
                                     nullptr,
                                     nullptr,
@@ -390,7 +388,7 @@ int main(int argc, char **argv)
         mind_object->SetTimepointWeight(i, 1.0);
       mind_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
-                                    refMask,
+                                    refMask.get(),
                                     warpedFloImage,
                                     nullptr,
                                     nullptr);
@@ -405,11 +403,6 @@ int main(int argc, char **argv)
    if(outFile!=nullptr)
       fclose(outFile);
 
-   // Free the allocated images
-   nifti_image_free(refImage);
-   nifti_image_free(floImage);
-   free(refMask);
-
    free(flag);
    free(param);
    return EXIT_SUCCESS;
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 27ef13db..4c6f68ce 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1857,10 +1857,10 @@ template void reg_createImagePyramid<float>(const NiftiImage&, vector<NiftiImage
 template void reg_createImagePyramid<double>(const NiftiImage&, vector<NiftiImage>&, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class DataType>
-int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
+void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector<unique_ptr<int[]>>& maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
-    nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *));
-    tempMaskImagePyramid[levelToPerform - 1] = nifti_dup(*inputMaskImage);
+    vector<NiftiImage> tempMaskImagePyramid(levelToPerform);
+    tempMaskImagePyramid[levelToPerform - 1] = inputMaskImage;
     reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]);
     reg_tools_changeDatatype<unsigned char>(tempMaskImagePyramid[levelToPerform - 1]);
 
@@ -1872,14 +1872,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DataType>(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis);
     }
-    size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]);
-    maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int));
-    reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]);
+    size_t voxelNumber = tempMaskImagePyramid[levelToPerform - 1].nVoxelsPerVolume();
+    maskPyramid[levelToPerform - 1] = std::make_unique<int[]>(voxelNumber);
+    reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1].get());
 
     // Images for each subsequent levels are allocated and downsampled if appropriate
     for (int l = (int)levelToPerform - 2; l >= 0; l--) {
         // Allocation of the reference image
-        tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]);
+        tempMaskImagePyramid[l] = tempMaskImagePyramid[l + 1];
 
         // Downsample the image if appropriate
         bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
@@ -1888,17 +1888,13 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid,
         if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false;
         reg_downsampleImage<DataType>(tempMaskImagePyramid[l], 0, downsampleAxis);
 
-        voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]);
-        maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int));
-        reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]);
+        voxelNumber = tempMaskImagePyramid[l].nVoxelsPerVolume();
+        maskPyramid[l] = std::make_unique<int[]>(voxelNumber);
+        reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l].get());
     }
-    for (unsigned int l = 0; l < levelToPerform; ++l)
-        nifti_image_free(tempMaskImagePyramid[l]);
-    free(tempMaskImagePyramid);
-    return EXIT_SUCCESS;
 }
-template int reg_createMaskPyramid<float>(const nifti_image*, int**, unsigned int, unsigned int);
-template int reg_createMaskPyramid<double>(const nifti_image*, int**, unsigned int, unsigned int);
+template void reg_createMaskPyramid<float>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned int, unsigned int);
+template void reg_createMaskPyramid<double>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned int, unsigned int);
 /* *************************************************************** */
 template <class ImageType, class MaskType>
 int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 69e339e9..c6361c7f 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -345,10 +345,10 @@ void reg_createImagePyramid(const NiftiImage& input,
  * the registration.
  */
 extern "C++" template<class DataType>
-int reg_createMaskPyramid(const nifti_image *input,
-                          int **pyramid,
-                          unsigned int levelNumber,
-                          unsigned int levelToPerform);
+void reg_createMaskPyramid(const NiftiImage& input,
+                           vector<unique_ptr<int[]>>& pyramid,
+                           unsigned int levelNumber,
+                           unsigned int levelToPerform);
 /* *************************************************************** */
 /** @brief this function will threshold an image to the values provided,
  * set the scl_slope and sct_inter of the image to 1 and 0

From 3a6d10c7bc07d2f0d6e005f4bb0f0f1a4b1ed95a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 16:08:54 +0000
Subject: [PATCH 084/314] Refactor reg_aladin class using automatic memory
 management

---
 niftyreg_build_version.txt |   2 +-
 reg-lib/_reg_aladin.cpp    | 160 ++++++++++++-------------------------
 reg-lib/_reg_aladin.h      |  34 ++++----
 3 files changed, 67 insertions(+), 129 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 53816522..485369e4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-197
+205
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index dfdae9d7..6b010090 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -4,24 +4,11 @@
 template<class T>
 reg_aladin<T>::reg_aladin() {
     this->executableName = (char*)"Aladin";
-    this->inputReference = nullptr;
-    this->inputFloating = nullptr;
-    this->inputReferenceMask = nullptr;
-    this->referencePyramid = nullptr;
-    this->floatingPyramid = nullptr;
-    this->referenceMaskPyramid = nullptr;
 
     this->transformationMatrix = new mat44;
     this->inputTransformName = nullptr;
 
-    this->affineTransformation3DKernel = nullptr;
-    this->blockMatchingKernel = nullptr;
-    this->optimiseKernel = nullptr;
-    this->resamplingKernel = nullptr;
-
-    this->con = nullptr;
     this->blockMatchingParams = nullptr;
-    this->platform = nullptr;
 
     this->verbose = true;
 
@@ -40,16 +27,16 @@ reg_aladin<T>::reg_aladin() {
     this->alignCentre = 1;
     this->alignCentreMass = 0;
 
-    this->interpolation = 1;
+    this->interpolation = 1;    // linear
 
     this->floatingSigma = 0;
     this->referenceSigma = 0;
 
     this->referenceUpperThreshold = std::numeric_limits<T>::max();
-    this->referenceLowerThreshold = -std::numeric_limits<T>::max();
+    this->referenceLowerThreshold = std::numeric_limits<T>::min();
 
     this->floatingUpperThreshold = std::numeric_limits<T>::max();
-    this->floatingLowerThreshold = -std::numeric_limits<T>::max();
+    this->floatingLowerThreshold = std::numeric_limits<T>::min();
 
     this->warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
 
@@ -67,39 +54,9 @@ reg_aladin<T>::reg_aladin() {
 /* *************************************************************** */
 template<class T>
 reg_aladin<T>::~reg_aladin() {
-    if (this->transformationMatrix != nullptr)
+    if (this->transformationMatrix)
         delete this->transformationMatrix;
-    this->transformationMatrix = nullptr;
 
-    if (this->referencePyramid != nullptr) {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            if (this->referencePyramid[l] != nullptr)
-                nifti_image_free(this->referencePyramid[l]);
-            this->referencePyramid[l] = nullptr;
-        }
-        free(this->referencePyramid);
-        this->referencePyramid = nullptr;
-    }
-    if (this->floatingPyramid != nullptr) {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            if (this->floatingPyramid[l] != nullptr)
-                nifti_image_free(this->floatingPyramid[l]);
-            this->floatingPyramid[l] = nullptr;
-        }
-        free(this->floatingPyramid);
-        this->floatingPyramid = nullptr;
-    }
-    if (this->referenceMaskPyramid != nullptr) {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            if (this->referenceMaskPyramid[l] != nullptr)
-                free(this->referenceMaskPyramid[l]);
-            this->referenceMaskPyramid[l] = nullptr;
-        }
-        free(this->referenceMaskPyramid);
-        this->referenceMaskPyramid = nullptr;
-    }
-    if (this->platform != nullptr)
-        delete this->platform;
 #ifndef NDEBUG
     reg_print_msg_debug("reg_aladin destructor called");
 #endif
@@ -147,13 +104,13 @@ void reg_aladin<T>::SetVerbose(bool _verbose) {
 template<class T>
 int reg_aladin<T>::Check() {
     //This does all the initial checking
-    if (this->inputReference == nullptr) {
+    if (!this->inputReference) {
         reg_print_fct_error("reg_aladin<T>::Check()");
         reg_print_msg_error("No reference image has been specified or it can not be read");
         return EXIT_FAILURE;
     }
 
-    if (this->inputFloating == nullptr) {
+    if (!this->inputFloating) {
         reg_print_fct_error("reg_aladin<T>::Check()");
         reg_print_msg_error("No floating image has been specified or it can not be read");
         return EXIT_FAILURE;
@@ -164,12 +121,12 @@ int reg_aladin<T>::Check() {
 /* *************************************************************** */
 template<class T>
 int reg_aladin<T>::Print() {
-    if (this->inputReference == nullptr) {
+    if (!this->inputReference) {
         reg_print_fct_error("reg_aladin<T>::Print()");
         reg_print_msg_error("No reference image has been specified");
         return EXIT_FAILURE;
     }
-    if (this->inputFloating == nullptr) {
+    if (!this->inputFloating) {
         reg_print_fct_error("reg_aladin<T>::Print()");
         reg_print_msg_error("No floating image has been specified");
         return EXIT_FAILURE;
@@ -221,15 +178,15 @@ void reg_aladin<T>::InitialiseRegistration() {
     reg_print_fct_debug("reg_aladin::InitialiseRegistration()");
 #endif
 
-    this->platform = new Platform(this->platformType);
+    this->platform.reset(new Platform(this->platformType));
     this->platform->SetGpuIdx(this->gpuIdx);
 
     this->Print();
 
     // CREATE THE PYRAMID IMAGES
-    this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
-    this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *));
-    this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *));
+    this->referencePyramid = vector<NiftiImage>(this->levelsToPerform);
+    this->floatingPyramid = vector<NiftiImage>(this->levelsToPerform);
+    this->referenceMaskPyramid = vector<unique_ptr<int[]>>(this->levelsToPerform);
 
     // FINEST LEVEL OF REGISTRATION
     reg_createImagePyramid<T>(this->inputReference,
@@ -241,47 +198,39 @@ void reg_aladin<T>::InitialiseRegistration() {
                               this->numberOfLevels,
                               this->levelsToPerform);
 
-    if (this->inputReferenceMask != nullptr)
+    if (this->inputReferenceMask)
         reg_createMaskPyramid<T>(this->inputReferenceMask,
                                  this->referenceMaskPyramid,
                                  this->numberOfLevels,
                                  this->levelsToPerform);
-    else {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
-            const size_t voxelNumber = CalcVoxelNumber(*this->referencePyramid[l]);
-            this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int));
-        }
-    }
+    else
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l)
+            this->referenceMaskPyramid[l].reset(new int[this->referencePyramid[l].nVoxelsPerVolume()]());
 
-    Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr);
+    unique_ptr<Kernel> convolutionKernel(this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr));
     // SMOOTH THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < this->levelsToPerform; l++) {
         if (this->referenceSigma != 0) {
             // Only the first image is smoothed
-            bool *active = new bool[this->referencePyramid[l]->nt];
-            float *sigma = new float[this->referencePyramid[l]->nt];
+            unique_ptr<bool[]> active(new bool[this->referencePyramid[l]->nt]);
+            unique_ptr<float[]> sigma(new float[this->referencePyramid[l]->nt]);
             active[0] = true;
             for (int i = 1; i < this->referencePyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = this->referenceSigma;
-            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->referencePyramid[l], sigma, 0, nullptr, active);
-            delete[] active;
-            delete[] sigma;
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->referencePyramid[l], sigma.get(), 0, nullptr, active.get());
         }
         if (this->floatingSigma != 0) {
             // Only the first image is smoothed
-            bool *active = new bool[this->floatingPyramid[l]->nt];
-            float *sigma = new float[this->floatingPyramid[l]->nt];
+            unique_ptr<bool[]> active(new bool[this->floatingPyramid[l]->nt]);
+            unique_ptr<float[]> sigma(new float[this->floatingPyramid[l]->nt]);
             active[0] = true;
             for (int i = 1; i < this->floatingPyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = this->floatingSigma;
-            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->floatingPyramid[l], sigma, 0, nullptr, active);
-            delete[] active;
-            delete[] sigma;
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->floatingPyramid[l], sigma.get(), 0, nullptr, active.get());
         }
     }
-    delete convolutionKernel;
 
     // THRESHOLD THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < this->levelsToPerform; l++) {
@@ -294,8 +243,7 @@ void reg_aladin<T>::InitialiseRegistration() {
         if (FILE *aff = fopen(this->inputTransformName, "r")) {
             fclose(aff);
         } else {
-            std::string text;
-            text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName);
+            std::string text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName);
             reg_print_fct_error("reg_aladin<T>::InitialiseRegistration()");
             reg_print_msg_error(text.c_str());
             reg_exit();
@@ -330,7 +278,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
             this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2];
         } else if (this->alignCentreMass == 2) {
-            float referenceCentre[3] = {0, 0, 0};
+            float referenceCentre[3] = { 0, 0, 0 };
             float referenceCount = 0;
             reg_tools_changeDatatype<float>(this->inputReference);
             float *refPtr = static_cast<float *>(this->inputReference->data);
@@ -354,7 +302,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             if (this->inputReference->sform_code > 0)
                 reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM);
 
-            float floatingCentre[3] = {0, 0, 0};
+            float floatingCentre[3] = { 0, 0, 0 };
             float floatingCount = 0;
             reg_tools_changeDatatype<float>(this->inputFloating);
             float *floPtr = static_cast<float *>(this->inputFloating->data);
@@ -387,23 +335,18 @@ void reg_aladin<T>::InitialiseRegistration() {
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DeallocateCurrentInputImage() {
-    nifti_image_free(this->referencePyramid[this->currentLevel]);
     this->referencePyramid[this->currentLevel] = nullptr;
-
-    nifti_image_free(this->floatingPyramid[this->currentLevel]);
     this->floatingPyramid[this->currentLevel] = nullptr;
-
-    free(this->referenceMaskPyramid[this->currentLevel]);
     this->referenceMaskPyramid[this->currentLevel] = nullptr;
 }
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::CreateKernels() {
-    this->affineTransformation3DKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con);
-    this->resamplingKernel = platform->CreateKernel(ResampleImageKernel::GetName(), this->con);
-    if (this->blockMatchingParams != nullptr) {
-        this->blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), this->con);
-        this->optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), this->con);
+    this->affineTransformation3DKernel.reset(platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con.get()));
+    this->resamplingKernel.reset(platform->CreateKernel(ResampleImageKernel::GetName(), this->con.get()));
+    if (this->blockMatchingParams) {
+        this->blockMatchingKernel.reset(platform->CreateKernel(BlockMatchingKernel::GetName(), this->con.get()));
+        this->optimiseKernel.reset(platform->CreateKernel(OptimiseKernel::GetName(), this->con.get()));
     } else {
         this->blockMatchingKernel = nullptr;
         this->optimiseKernel = nullptr;
@@ -412,12 +355,10 @@ void reg_aladin<T>::CreateKernels() {
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DeallocateKernels() {
-    delete this->affineTransformation3DKernel;
-    delete this->resamplingKernel;
-    if (this->blockMatchingKernel != nullptr)
-        delete this->blockMatchingKernel;
-    if (this->optimiseKernel != nullptr)
-        delete this->optimiseKernel;
+    this->affineTransformation3DKernel = nullptr;
+    this->resamplingKernel = nullptr;
+    this->blockMatchingKernel = nullptr;
+    this->optimiseKernel = nullptr;
 }
 /* *************************************************************** */
 template<class T>
@@ -451,13 +392,13 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       unsigned int inlierLts,
                                       unsigned int blockStepSize) {
     unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
-    this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
+    this->con.reset(contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize));
     this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DeinitAladinContent() {
-    delete this->con;
+    this->con = nullptr;
 }
 /* *************************************************************** */
 template<class T>
@@ -485,8 +426,8 @@ void reg_aladin<T>::Run() {
     //Main loop over the levels:
     for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) {
         this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel],
-                                this->referenceMaskPyramid[currentLevel], this->transformationMatrix, sizeof(T), this->blockPercentage,
-                                this->inlierLts, this->blockStepSize);
+                                this->referenceMaskPyramid[currentLevel].get(), this->transformationMatrix, sizeof(T),
+                                this->blockPercentage, this->inlierLts, this->blockStepSize);
         this->CreateKernels();
 
         // Twice more iterations are performed during the first level
@@ -545,40 +486,37 @@ void reg_aladin<T>::Run() {
 #ifndef NDEBUG
     reg_print_msg_debug("reg_aladin::Run() done");
 #endif
-    return;
 }
 /* *************************************************************** */
 template<class T>
-nifti_image* reg_aladin<T>::GetFinalWarpedImage() {
+NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
     // The initial images are used
-    if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) {
+    if (!this->inputReference || !this->inputFloating || !this->transformationMatrix) {
         reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
         reg_print_msg_error("The reference, floating images and the transformation have to be defined");
         reg_exit();
     }
 
-    int *mask = (int *)calloc(CalcVoxelNumber(*this->inputReference), sizeof(int));
+    unique_ptr<int[]> mask(new int[this->inputReference.nVoxelsPerVolume()]());
 
     reg_aladin<T>::InitAladinContent(this->inputReference,
                                      this->inputFloating,
-                                     mask,
+                                     mask.get(),
                                      this->transformationMatrix,
                                      sizeof(T));
     reg_aladin<T>::CreateKernels();
 
     reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
-    nifti_image *warped = this->con->GetWarped();
 
-    free(mask);
-    nifti_image *resultImage = nifti_dup(*warped);
-    resultImage->cal_min = this->inputFloating->cal_min;
-    resultImage->cal_max = this->inputFloating->cal_max;
-    resultImage->scl_slope = this->inputFloating->scl_slope;
-    resultImage->scl_inter = this->inputFloating->scl_inter;
+    NiftiImage warpedImage(this->con->GetWarped(), true);
+    warpedImage->cal_min = this->inputFloating->cal_min;
+    warpedImage->cal_max = this->inputFloating->cal_max;
+    warpedImage->scl_slope = this->inputFloating->scl_slope;
+    warpedImage->scl_inter = this->inputFloating->scl_inter;
 
     reg_aladin<T>::DeallocateKernels();
     reg_aladin<T>::DeinitAladinContent();
-    return resultImage;
+    return warpedImage;
 }
 /* *************************************************************** */
 template<class T>
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 03b00116..c3d7d0e2 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -63,12 +63,12 @@ template<class T>
 class reg_aladin {
 protected:
     char *executableName;
-    nifti_image *inputReference;
-    nifti_image *inputFloating;
-    nifti_image *inputReferenceMask;
-    nifti_image **referencePyramid;
-    nifti_image **floatingPyramid;
-    int **referenceMaskPyramid;
+    NiftiImage inputReference;
+    NiftiImage inputFloating;
+    NiftiImage inputReferenceMask;
+    vector<NiftiImage> referencePyramid;
+    vector<NiftiImage> floatingPyramid;
+    vector<unique_ptr<int[]>> referenceMaskPyramid;
 
     char *inputTransformName;
     mat44 *transformationMatrix;
@@ -104,7 +104,7 @@ class reg_aladin {
     float floatingLowerThreshold;
     float warpedPaddingValue;
 
-    Platform *platform;
+    unique_ptr<Platform> platform;
     PlatformType platformType;
     unsigned gpuIdx;
 
@@ -134,28 +134,30 @@ class reg_aladin {
     virtual void DeallocateKernels();
 
 public:
+    unique_ptr<AladinContent> con;
+
     reg_aladin();
     virtual ~reg_aladin();
     GetStringMacro(ExecutableName, executableName);
 
     //No allocating of the images here...
-    void SetInputReference(nifti_image *input) {
+    void SetInputReference(NiftiImage input) {
         this->inputReference = input;
     }
-    nifti_image* GetInputReference() {
+    NiftiImage GetInputReference() {
         return this->inputReference;
     }
-    void SetInputFloating(nifti_image *input) {
+    void SetInputFloating(NiftiImage input) {
         this->inputFloating = input;
     }
-    nifti_image* GetInputFloating() {
+    NiftiImage GetInputFloating() {
         return this->inputFloating;
     }
 
-    void SetInputMask(nifti_image *input) {
+    void SetInputMask(NiftiImage input) {
         this->inputReferenceMask = input;
     }
-    nifti_image* GetInputMask() {
+    NiftiImage GetInputMask() {
         return this->inputReferenceMask;
     }
 
@@ -167,7 +169,7 @@ class reg_aladin {
     mat44* GetTransformationMatrix() {
         return this->transformationMatrix;
     }
-    nifti_image* GetFinalWarpedImage();
+    NiftiImage GetFinalWarpedImage();
 
     void SetPlatformType(const PlatformType& platformTypeIn) {
         this->platformType = platformTypeIn;
@@ -260,10 +262,8 @@ class reg_aladin {
         funcProgressCallback = funcProgCallback;
         paramsProgressCallback = paramsProgCallback;
     }
-    AladinContent *con;
 
 private:
-    Kernel *affineTransformation3DKernel, *blockMatchingKernel;
-    Kernel *optimiseKernel, *resamplingKernel;
+    unique_ptr<Kernel> affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel;
     void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag);
 };

From 1130e1ffbffceca5a82a7ccf6bad9958e50c8a47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 16:16:13 +0000
Subject: [PATCH 085/314] Refactor reg_aladin_sym class using automatic memory
 management

---
 niftyreg_build_version.txt  |   2 +-
 reg-lib/_reg_aladin_sym.cpp | 466 +++++++++++++++---------------------
 reg-lib/_reg_aladin_sym.h   |  58 ++---
 3 files changed, 229 insertions(+), 297 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 485369e4..b35cfafd 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-205
+206
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index fd61974d..fcce8132 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -3,338 +3,270 @@
 
 /* *************************************************************** */
 template <class T>
-reg_aladin_sym<T>::reg_aladin_sym ()
-   :reg_aladin<T>::reg_aladin()
-{
-   this->executableName=(char*) "reg_aladin_sym";
+reg_aladin_sym<T>::reg_aladin_sym()
+    :reg_aladin<T>::reg_aladin() {
+    this->executableName = (char*)"reg_aladin_sym";
 
-   this->InputFloatingMask=nullptr;
-   this->FloatingMaskPyramid=nullptr;
+    this->backwardTransformationMatrix = new mat44;
 
-   this->BackwardTransformationMatrix=new mat44;
+    this->backwardBlockMatchingParams = nullptr;
 
-   this->bAffineTransformation3DKernel = nullptr;
-   this->bConvolutionKernel=nullptr;
-   this->bBlockMatchingKernel=nullptr;
-   this->bOptimiseKernel=nullptr;
-   this->bResamplingKernel=nullptr;
-
-   this->backCon = nullptr;
-   this->BackwardBlockMatchingParams=nullptr;
-
-   this->floatingUpperThreshold=std::numeric_limits<T>::max();
-   this->floatingLowerThreshold=-std::numeric_limits<T>::max();
+    this->floatingUpperThreshold = std::numeric_limits<T>::max();
+    this->floatingLowerThreshold = std::numeric_limits<T>::min();
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_aladin_sym constructor called");
+    reg_print_msg_debug("reg_aladin_sym constructor called");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-reg_aladin_sym<T>::~reg_aladin_sym()
-{
-   if(this->BackwardTransformationMatrix!=nullptr)
-      delete this->BackwardTransformationMatrix;
-   this->BackwardTransformationMatrix=nullptr;
-
-   if(this->FloatingMaskPyramid!=nullptr)
-   {
-      for(unsigned int i=0; i<this->levelsToPerform; ++i)
-      {
-         if(this->FloatingMaskPyramid[i]!=nullptr)
-         {
-           if(this->FloatingMaskPyramid!=nullptr)
-             free(this->FloatingMaskPyramid[i]);
-            this->FloatingMaskPyramid[i]=nullptr;
-         }
-      }
-      free(this->FloatingMaskPyramid);
-      this->FloatingMaskPyramid=nullptr;
-   }
+reg_aladin_sym<T>::~reg_aladin_sym() {
+    if (this->backwardTransformationMatrix)
+        delete this->backwardTransformationMatrix;
 
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_aladin_sym destructor called");
+    reg_print_msg_debug("reg_aladin_sym destructor called");
 #endif
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::SetInputFloatingMask(nifti_image *m)
-{
-   this->InputFloatingMask = m;
-   return;
+void reg_aladin_sym<T>::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) {
+    this->inputFloatingMask = inputFloatingMaskIn;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::InitialiseRegistration()
-{
+void reg_aladin_sym<T>::InitialiseRegistration() {
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called");
+    reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called");
 #endif
 
-   reg_aladin<T>::InitialiseRegistration();
-   this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *));
-   if (this->InputFloatingMask!=nullptr)
-   {
-      reg_createMaskPyramid<T>(this->InputFloatingMask,
-                               this->FloatingMaskPyramid,
-                               this->numberOfLevels,
-                               this->levelsToPerform);
-   }
-   else
-   {
-      for(unsigned int l=0; l<this->levelsToPerform; ++l)
-      {
-         const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]);
-         this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int));
-      }
-   }
+    reg_aladin<T>::InitialiseRegistration();
 
-   // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK
-   if(this->floatingUpperThreshold!=std::numeric_limits<T>::max())
-   {
-      for(unsigned int l=0; l<this->levelsToPerform; ++l)
-      {
-         T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
-         int *mskPtr = this->FloatingMaskPyramid[l];
-         for(size_t i=0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i)
-         {
-            if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold)
-               mskPtr[i] = -1;
-         }
-      }
-   }
-   if(this->floatingLowerThreshold!=-std::numeric_limits<T>::max())
-   {
-      for(unsigned int l=0; l<this->levelsToPerform; ++l)
-      {
-         T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
-         int *mskPtr = this->FloatingMaskPyramid[l];
-         for (size_t i = 0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i)
-         {
-            if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold)
-               mskPtr[i] = -1;
-         }
-      }
-   }
+    this->floatingMaskPyramid = vector<unique_ptr<int[]>>(this->levelsToPerform);
+    if (this->inputFloatingMask)
+        reg_createMaskPyramid<T>(this->inputFloatingMask,
+                                 this->floatingMaskPyramid,
+                                 this->numberOfLevels,
+                                 this->levelsToPerform);
+    else
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l)
+            this->floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]());
 
-   if(this->alignCentreMass==1 && this->inputTransformName==nullptr)
-   {
-      if(!this->inputReferenceMask && !this->InputFloatingMask){
-         reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified");
-         reg_exit();
-      }
-      float referenceCentre[3]={0,0,0};
-      float referenceCount=0;
-      reg_tools_changeDatatype<float>(this->inputReferenceMask);
-      float *refMaskPtr=static_cast<float *>(this->inputReferenceMask->data);
-      size_t refIndex=0;
-      for(int z=0;z<this->inputReferenceMask->nz;++z){
-         for(int y=0;y<this->inputReferenceMask->ny;++y){
-            for(int x=0;x<this->inputReferenceMask->nx;++x){
-               if(refMaskPtr[refIndex]!=0.f){
-                  referenceCentre[0]+=x;
-                  referenceCentre[1]+=y;
-                  referenceCentre[2]+=z;
-                  referenceCount++;
-               }
-               refIndex++;
+    // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK
+    if (this->floatingUpperThreshold != std::numeric_limits<T>::max()) {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
+            int *mskPtr = this->floatingMaskPyramid[l].get();
+            for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) {
+                if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold)
+                    mskPtr[i] = -1;
             }
-         }
-      }
-      referenceCentre[0]/=referenceCount;
-      referenceCentre[1]/=referenceCount;
-      referenceCentre[2]/=referenceCount;
-      float refCOG[3];
-      if(this->inputReference->sform_code>0)
-         reg_mat44_mul(&(this->inputReference->sto_xyz),referenceCentre,refCOG);
+        }
+    }
+    if (this->floatingLowerThreshold != std::numeric_limits<T>::min()) {
+        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+            T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
+            int *mskPtr = this->floatingMaskPyramid[l].get();
+            for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) {
+                if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold)
+                    mskPtr[i] = -1;
+            }
+        }
+    }
 
-      float floatingCentre[3]={0,0,0};
-      float floatingCount=0;
-      reg_tools_changeDatatype<float>(this->InputFloatingMask);
-      float *floMaskPtr=static_cast<float *>(this->InputFloatingMask->data);
-      size_t floIndex=0;
-      for(int z=0;z<this->InputFloatingMask->nz;++z){
-         for(int y=0;y<this->InputFloatingMask->ny;++y){
-            for(int x=0;x<this->InputFloatingMask->nx;++x){
-               if(floMaskPtr[floIndex]!=0.f){
-                  floatingCentre[0]+=x;
-                  floatingCentre[1]+=y;
-                  floatingCentre[2]+=z;
-                  floatingCount++;
-               }
-               floIndex++;
+    if (this->alignCentreMass == 1 && this->inputTransformName == nullptr) {
+        if (!this->inputReferenceMask && !this->inputFloatingMask) {
+            reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified");
+            reg_exit();
+        }
+        float referenceCentre[3] = { 0, 0, 0 };
+        float referenceCount = 0;
+        reg_tools_changeDatatype<float>(this->inputReferenceMask);
+        float *refMaskPtr = static_cast<float *>(this->inputReferenceMask->data);
+        size_t refIndex = 0;
+        for (int z = 0; z < this->inputReferenceMask->nz; ++z) {
+            for (int y = 0; y < this->inputReferenceMask->ny; ++y) {
+                for (int x = 0; x < this->inputReferenceMask->nx; ++x) {
+                    if (refMaskPtr[refIndex] != 0.f) {
+                        referenceCentre[0] += x;
+                        referenceCentre[1] += y;
+                        referenceCentre[2] += z;
+                        referenceCount++;
+                    }
+                    refIndex++;
+                }
             }
-         }
-      }
-      floatingCentre[0]/=floatingCount;
-      floatingCentre[1]/=floatingCount;
-      floatingCentre[2]/=floatingCount;
-      float floCOG[3];
-      if(this->inputFloating->sform_code>0)
-         reg_mat44_mul(&(this->inputFloating->sto_xyz),floatingCentre,floCOG);
-      reg_mat44_eye(this->transformationMatrix);
-      this->transformationMatrix->m[0][3]=floCOG[0]-refCOG[0];
-      this->transformationMatrix->m[1][3]=floCOG[1]-refCOG[1];
-      this->transformationMatrix->m[2][3]=floCOG[2]-refCOG[2];
-   }
-   *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->transformationMatrix));
+        }
+        referenceCentre[0] /= referenceCount;
+        referenceCentre[1] /= referenceCount;
+        referenceCentre[2] /= referenceCount;
+        float refCOG[3];
+        if (this->inputReference->sform_code > 0)
+            reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG);
 
+        float floatingCentre[3] = { 0, 0, 0 };
+        float floatingCount = 0;
+        reg_tools_changeDatatype<float>(this->inputFloatingMask);
+        float *floMaskPtr = static_cast<float *>(this->inputFloatingMask->data);
+        size_t floIndex = 0;
+        for (int z = 0; z < this->inputFloatingMask->nz; ++z) {
+            for (int y = 0; y < this->inputFloatingMask->ny; ++y) {
+                for (int x = 0; x < this->inputFloatingMask->nx; ++x) {
+                    if (floMaskPtr[floIndex] != 0.f) {
+                        floatingCentre[0] += x;
+                        floatingCentre[1] += y;
+                        floatingCentre[2] += z;
+                        floatingCount++;
+                    }
+                    floIndex++;
+                }
+            }
+        }
+        floatingCentre[0] /= floatingCount;
+        floatingCentre[1] /= floatingCount;
+        floatingCentre[2] /= floatingCount;
+        float floCOG[3];
+        if (this->inputFloating->sform_code > 0)
+            reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG);
+        reg_mat44_eye(this->transformationMatrix);
+        this->transformationMatrix->m[0][3] = floCOG[0] - refCOG[0];
+        this->transformationMatrix->m[1][3] = floCOG[1] - refCOG[1];
+        this->transformationMatrix->m[2][3] = floCOG[2] - refCOG[2];
+    }
+    *this->backwardTransformationMatrix = nifti_mat44_inverse(*this->transformationMatrix);
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::GetBackwardDeformationField()
-{
-   this->bAffineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->Calculate();
+void reg_aladin_sym<T>::GetBackwardDeformationField() {
+    this->bAffineTransformation3DKernel->template castTo<AffineDeformationFieldKernel>()->Calculate();
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::GetWarpedImage(int interp, float padding)
-{
-   reg_aladin<T>::GetWarpedImage(interp, padding);
-   this->GetBackwardDeformationField();
-   this->bResamplingKernel->template castTo<ResampleImageKernel>()->Calculate(interp, padding);
-
+void reg_aladin_sym<T>::GetWarpedImage(int interp, float padding) {
+    reg_aladin<T>::GetWarpedImage(interp, padding);
+    this->GetBackwardDeformationField();
+    this->bResamplingKernel->template castTo<ResampleImageKernel>()->Calculate(interp, padding);
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::UpdateTransformationMatrix(int type){
-
-  reg_aladin<T>::UpdateTransformationMatrix(type);
+void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
+    reg_aladin<T>::UpdateTransformationMatrix(type);
 
-  // Update now the backward transformation matrix
-  this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
-  this->bOptimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
+    // Update now the backward transformation matrix
+    this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
+    this->bOptimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
 
 #ifndef NDEBUG
-   reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
-   reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
+    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
+    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
 #endif
-   // Forward and backward matrix are inverted
-   mat44 fInverted = nifti_mat44_inverse(*(this->transformationMatrix));
-   mat44 bInverted = nifti_mat44_inverse(*(this->BackwardTransformationMatrix));
+    // Forward and backward matrix are inverted
+    mat44 fInverted = nifti_mat44_inverse(*this->transformationMatrix);
+    mat44 bInverted = nifti_mat44_inverse(*this->backwardTransformationMatrix);
 
-   // We average the forward and inverted backward matrix
-   *(this->transformationMatrix)=reg_mat44_avg2(this->transformationMatrix, &bInverted );
-   // We average the inverted forward and backward matrix
-   *(this->BackwardTransformationMatrix)=reg_mat44_avg2(&fInverted, this->BackwardTransformationMatrix );
-   for(int i=0;i<3;++i){
-      this->transformationMatrix->m[3][i]=0.f;
-      this->BackwardTransformationMatrix->m[3][i]=0.f;
-   }
-   this->transformationMatrix->m[3][3]=1.f;
-   this->BackwardTransformationMatrix->m[3][3]=1.f;
+    // We average the forward and inverted backward matrix
+    *this->transformationMatrix = reg_mat44_avg2(this->transformationMatrix, &bInverted);
+    // We average the inverted forward and backward matrix
+    *this->backwardTransformationMatrix = reg_mat44_avg2(&fInverted, this->backwardTransformationMatrix);
+    for (int i = 0; i < 3; ++i) {
+        this->transformationMatrix->m[3][i] = 0.f;
+        this->backwardTransformationMatrix->m[3][i] = 0.f;
+    }
+    this->transformationMatrix->m[3][3] = 1.f;
+    this->backwardTransformationMatrix->m[3][3] = 1.f;
 #ifndef NDEBUG
-   reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix");
-   reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix");
+    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix");
+    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix");
 #endif
 }
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
-                        nifti_image *flo,
-                        int *mask,
-                        mat44 *transMat,
-                        size_t bytes,
-                        unsigned int blockPercentage,
-                        unsigned int inlierLts,
-                        unsigned int blockStepSize)
-{
-   reg_aladin<T>::InitAladinContent(ref,
-                              flo,
-                              mask,
-                              transMat,
-                              bytes,
-                              blockPercentage,
-                              inlierLts,
-                              blockStepSize);
-
-   unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
-   this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize);
-   this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
+                                          nifti_image *flo,
+                                          int *mask,
+                                          mat44 *transMat,
+                                          size_t bytes,
+                                          unsigned int blockPercentage,
+                                          unsigned int inlierLts,
+                                          unsigned int blockStepSize) {
+    reg_aladin<T>::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
+    unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
+    this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize));
+    this->backwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::DeallocateCurrentInputImage()
-{
-   reg_aladin<T>::DeallocateCurrentInputImage();
-   if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr)
-      free(this->FloatingMaskPyramid[this->currentLevel]);
-   this->FloatingMaskPyramid[this->currentLevel]=nullptr;
+void reg_aladin_sym<T>::DeallocateCurrentInputImage() {
+    reg_aladin<T>::DeallocateCurrentInputImage();
+    this->floatingMaskPyramid[this->currentLevel] = nullptr;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::CreateKernels()
-{
-  reg_aladin<T>::CreateKernels();
-  this->bAffineTransformation3DKernel = this->platform->CreateKernel (AffineDeformationFieldKernel::GetName(), this->backCon);
-  this->bBlockMatchingKernel = this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon);
-  this->bResamplingKernel = this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon);
-  this->bOptimiseKernel = this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon);
+void reg_aladin_sym<T>::CreateKernels() {
+    reg_aladin<T>::CreateKernels();
+    this->bAffineTransformation3DKernel.reset(this->platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->backCon.get()));
+    this->bBlockMatchingKernel.reset(this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon.get()));
+    this->bResamplingKernel.reset(this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon.get()));
+    this->bOptimiseKernel.reset(this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon.get()));
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::DeinitAladinContent()
-{
-  reg_aladin<T>::DeinitAladinContent();
-  delete this->backCon;
+void reg_aladin_sym<T>::DeinitAladinContent() {
+    reg_aladin<T>::DeinitAladinContent();
+    this->backCon = nullptr;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::DeallocateKernels()
-{
-  reg_aladin<T>::DeallocateKernels();
-  delete this->bResamplingKernel;
-  delete this->bAffineTransformation3DKernel;
-  delete this->bBlockMatchingKernel;
-  delete this->bOptimiseKernel;
+void reg_aladin_sym<T>::DeallocateKernels() {
+    reg_aladin<T>::DeallocateKernels();
+    this->bResamplingKernel = nullptr;
+    this->bAffineTransformation3DKernel = nullptr;
+    this->bBlockMatchingKernel = nullptr;
+    this->bOptimiseKernel = nullptr;
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::DebugPrintLevelInfoStart()
-{
-   char text[255];
-   sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels);
-   reg_print_info(this->executableName,text);
-   sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->GetReference()->nx,
-           this->con->GetReference()->ny,
-           this->con->GetReference()->nz,
-           this->con->GetReference()->dx,
-           this->con->GetReference()->dy,
-           this->con->GetReference()->dz);
-   reg_print_info(this->executableName,text);
-   sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-           this->con->GetFloating()->nx,
-           this->con->GetFloating()->ny,
-           this->con->GetFloating()->nz,
-           this->con->GetFloating()->dx,
-           this->con->GetFloating()->dy,
-           this->con->GetFloating()->dz);
-   reg_print_info(this->executableName,text);
-   if(this->con->GetReference()->nz==1){
-      reg_print_info(this->executableName, "Block size = [4 4 1]");
-   }
-   else reg_print_info(this->executableName, "Block size = [4 4 4]");
-   reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
-          this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
-   reg_print_info(this->executableName, text);
-   sprintf(text, "Backward Block number = [%i %i %i]", this->BackwardBlockMatchingParams->blockNumber[0],
-          this->BackwardBlockMatchingParams->blockNumber[1], this->BackwardBlockMatchingParams->blockNumber[2]);
-   reg_print_info(this->executableName, text);
-   reg_mat44_disp(this->transformationMatrix,
-                  (char *)"[reg_aladin_sym] Initial forward transformation matrix:");
-   reg_mat44_disp(this->BackwardTransformationMatrix,
-                  (char *)"[reg_aladin_sym] Initial backward transformation matrix:");
-   reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-
+void reg_aladin_sym<T>::DebugPrintLevelInfoStart() {
+    char text[255];
+    sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels);
+    reg_print_info(this->executableName, text);
+    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
+            this->con->GetReference()->nx,
+            this->con->GetReference()->ny,
+            this->con->GetReference()->nz,
+            this->con->GetReference()->dx,
+            this->con->GetReference()->dy,
+            this->con->GetReference()->dz);
+    reg_print_info(this->executableName, text);
+    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
+            this->con->GetFloating()->nx,
+            this->con->GetFloating()->ny,
+            this->con->GetFloating()->nz,
+            this->con->GetFloating()->dx,
+            this->con->GetFloating()->dy,
+            this->con->GetFloating()->dz);
+    reg_print_info(this->executableName, text);
+    if (this->con->GetReference()->nz == 1) {
+        reg_print_info(this->executableName, "Block size = [4 4 1]");
+    } else reg_print_info(this->executableName, "Block size = [4 4 4]");
+    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
+            this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
+    reg_print_info(this->executableName, text);
+    sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0],
+            this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]);
+    reg_print_info(this->executableName, text);
+    reg_mat44_disp(this->transformationMatrix,
+                   (char *)"[reg_aladin_sym] Initial forward transformation matrix:");
+    reg_mat44_disp(this->backwardTransformationMatrix,
+                   (char *)"[reg_aladin_sym] Initial backward transformation matrix:");
+    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::DebugPrintLevelInfoEnd()
-{
-   reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:");
-   reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:");
+void reg_aladin_sym<T>::DebugPrintLevelInfoEnd() {
+    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:");
+    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:");
 }
 /* *************************************************************** */
 template class reg_aladin_sym<float>;
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 35434d56..dbe534d0 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -16,42 +16,42 @@
 
 /// @brief Symmetric Block matching registration class
 template <class T>
-class reg_aladin_sym : public reg_aladin<T> {
+class reg_aladin_sym: public reg_aladin<T> {
 private:
-  AladinContent *backCon;
-  Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel;
-
-  virtual void InitAladinContent(nifti_image *ref,
-                                 nifti_image *flo,
-                                 int *mask,
-                                 mat44 *transMat,
-                                 size_t bytes,
-                                 unsigned int blockPercentage = 0,
-                                 unsigned int inlierLts = 0,
-                                 unsigned int blockStepSize = 0);
-  virtual void DeinitAladinContent();
-  virtual void CreateKernels();
-  virtual void DeallocateKernels();
+    unique_ptr<AladinContent> backCon;
+    unique_ptr<Kernel> bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bOptimiseKernel, bResamplingKernel;
+
+    virtual void InitAladinContent(nifti_image *ref,
+                                   nifti_image *flo,
+                                   int *mask,
+                                   mat44 *transMat,
+                                   size_t bytes,
+                                   unsigned int blockPercentage = 0,
+                                   unsigned int inlierLts = 0,
+                                   unsigned int blockStepSize = 0);
+    virtual void DeinitAladinContent();
+    virtual void CreateKernels();
+    virtual void DeallocateKernels();
 
 protected:
-  nifti_image *InputFloatingMask;
-  int **FloatingMaskPyramid;
+    NiftiImage inputFloatingMask;
+    vector<unique_ptr<int[]>> floatingMaskPyramid;
 
-  _reg_blockMatchingParam *BackwardBlockMatchingParams;
+    _reg_blockMatchingParam *backwardBlockMatchingParams;
 
-  mat44 *BackwardTransformationMatrix;
+    mat44 *backwardTransformationMatrix;
 
-  virtual void DeallocateCurrentInputImage();
-  virtual void GetBackwardDeformationField();
-  virtual void UpdateTransformationMatrix(int);
+    virtual void DeallocateCurrentInputImage();
+    virtual void GetBackwardDeformationField();
+    virtual void UpdateTransformationMatrix(int);
 
-  virtual void DebugPrintLevelInfoStart();
-  virtual void DebugPrintLevelInfoEnd();
-  virtual void InitialiseRegistration();
-  virtual void GetWarpedImage(int, float);
+    virtual void DebugPrintLevelInfoStart();
+    virtual void DebugPrintLevelInfoEnd();
+    virtual void InitialiseRegistration();
+    virtual void GetWarpedImage(int, float);
 
 public:
-  reg_aladin_sym();
-  virtual ~reg_aladin_sym();
-  virtual void SetInputFloatingMask(nifti_image*);
+    reg_aladin_sym();
+    virtual ~reg_aladin_sym();
+    virtual void SetInputFloatingMask(NiftiImage);
 };

From 591fa91a531346734fe89f9b2675a0ea9a4d1d49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 16:43:03 +0000
Subject: [PATCH 086/314] Refactor reg_aladin app using automatic memory
 management

---
 niftyreg_build_version.txt |   2 +-
 reg-apps/reg_aladin.cpp    | 145 ++++++++++++++-----------------------
 2 files changed, 54 insertions(+), 93 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b35cfafd..c92ba568 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-206
+207
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 6ba851a0..133557c4 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -134,7 +134,7 @@ int main(int argc, char **argv) {
     char *floatingImageName = nullptr;
     int floatingImageFlag = 0;
 
-    char *outputAffineName = nullptr;
+    const char *outputAffineName = "outputAffine.txt";
     int outputAffineFlag = 0;
 
     char *inputAffineName = nullptr;
@@ -146,7 +146,7 @@ int main(int argc, char **argv) {
     char *floatingMaskName = nullptr;
     int floatingMaskFlag = 0;
 
-    char *outputResultName = nullptr;
+    const char *outputResultName = "outputResult.nii.gz";
     int outputResultFlag = 0;
 
     int maxIter = 5;
@@ -163,9 +163,9 @@ int main(int argc, char **argv) {
     float floatingSigma = 0;
     float referenceSigma = 0;
 
-    float referenceLowerThr = -std::numeric_limits<PrecisionType>::max();
+    float referenceLowerThr = std::numeric_limits<PrecisionType>::min();
     float referenceUpperThr = std::numeric_limits<PrecisionType>::max();
-    float floatingLowerThr = -std::numeric_limits<PrecisionType>::max();
+    float floatingLowerThr = std::numeric_limits<PrecisionType>::min();
     float floatingUpperThr = std::numeric_limits<PrecisionType>::max();
     float paddingValue = std::numeric_limits<PrecisionType>::quiet_NaN();
 
@@ -347,56 +347,45 @@ int main(int argc, char **argv) {
     }
 #endif
 
-    reg_aladin<PrecisionType> *REG;
+    unique_ptr<reg_aladin<PrecisionType>> reg;
     if (symFlag) {
-        REG = new reg_aladin_sym<PrecisionType>;
+        reg.reset(new reg_aladin_sym<PrecisionType>);
         if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) {
             reg_print_msg_warn("You have one image mask option turned on but not the other.");
             reg_print_msg_warn("This will affect the degree of symmetry achieved.");
         }
     } else {
-        REG = new reg_aladin<PrecisionType>;
+        reg.reset(new reg_aladin<PrecisionType>);
         if (floatingMaskFlag) {
             reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option");
         }
     }
 
     /* Read the reference image and check its dimension */
-    nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName);
-    if (referenceHeader == nullptr) {
+    NiftiImage referenceHeader = reg_io_ReadImageFile(referenceImageName);
+    if (!referenceHeader) {
         sprintf(text, "Error when reading the reference image: %s", referenceImageName);
         reg_print_msg_error(text);
         return EXIT_FAILURE;
     }
 
     /* Read the floating image and check its dimension */
-    nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName);
-    if (floatingHeader == nullptr) {
+    NiftiImage floatingHeader = reg_io_ReadImageFile(floatingImageName);
+    if (!floatingHeader) {
         sprintf(text, "Error when reading the floating image: %s", floatingImageName);
         reg_print_msg_error(text);
         return EXIT_FAILURE;
     }
 
     // Set the reference and floating images
-    nifti_image *isoRefImage = nullptr;
-    nifti_image *isoFloImage = nullptr;
-    if (iso) {
-        // make the images isotropic if required
-        isoRefImage = reg_makeIsotropic(referenceHeader, 1);
-        isoFloImage = reg_makeIsotropic(floatingHeader, 1);
-        REG->SetInputReference(isoRefImage);
-        REG->SetInputFloating(isoFloImage);
-    } else {
-        REG->SetInputReference(referenceHeader);
-        REG->SetInputFloating(floatingHeader);
-    }
+    // make the images isotropic if required
+    reg->SetInputReference(iso ? reg_makeIsotropic(referenceHeader, 1) : referenceHeader);
+    reg->SetInputFloating(iso ? reg_makeIsotropic(floatingHeader, 1) : floatingHeader);
 
     /* read the reference mask image */
-    nifti_image *referenceMaskImage = nullptr;
-    nifti_image *isoRefMaskImage = nullptr;
     if (referenceMaskFlag) {
-        referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
-        if (referenceMaskImage == nullptr) {
+        NiftiImage referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
+        if (!referenceMaskImage) {
             sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName);
             reg_print_msg_error(text);
             return EXIT_FAILURE;
@@ -408,18 +397,13 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
         }
-        if (iso) {
-            // make the image isotropic if required
-            isoRefMaskImage = reg_makeIsotropic(referenceMaskImage, 0);
-            REG->SetInputMask(isoRefMaskImage);
-        } else REG->SetInputMask(referenceMaskImage);
+        // make the image isotropic if required
+        reg->SetInputMask(iso ? reg_makeIsotropic(referenceMaskImage, 0) : std::move(referenceMaskImage));
     }
     /* Read the floating mask image */
-    nifti_image *floatingMaskImage = nullptr;
-    nifti_image *isoFloMaskImage = nullptr;
     if (floatingMaskFlag && symFlag) {
-        floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
-        if (floatingMaskImage == nullptr) {
+        NiftiImage floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
+        if (!floatingMaskImage) {
             sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName);
             reg_print_msg_error(text);
             return EXIT_FAILURE;
@@ -431,51 +415,48 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
         }
-        if (iso) {
-            // make the image isotropic if required
-            isoFloMaskImage = reg_makeIsotropic(floatingMaskImage, 0);
-            REG->SetInputFloatingMask(isoFloMaskImage);
-        } else REG->SetInputFloatingMask(floatingMaskImage);
+        // make the image isotropic if required
+        reg->SetInputFloatingMask(iso ? reg_makeIsotropic(floatingMaskImage, 0) : std::move(floatingMaskImage));
     }
 
-    REG->SetMaxIterations(maxIter);
-    REG->SetNumberOfLevels(nLevels);
-    REG->SetLevelsToPerform(levelsToPerform);
-    REG->SetReferenceSigma(referenceSigma);
-    REG->SetFloatingSigma(floatingSigma);
-    REG->SetAlignCentre(alignCentre);
-    REG->SetAlignCentreMass(alignCentreOfMass);
-    REG->SetPerformAffine(affineFlag);
-    REG->SetPerformRigid(rigidFlag);
-    REG->SetBlockStepSize(blockStepSize);
-    REG->SetBlockPercentage(blockPercentage);
-    REG->SetInlierLts(inlierLts);
-    REG->SetInterpolation(interpolation);
-    REG->SetCaptureRangeVox(captureRangeVox);
-    REG->SetPlatformType(platformType);
-    REG->SetGpuIdx(gpuIdx);
+    reg->SetMaxIterations(maxIter);
+    reg->SetNumberOfLevels(nLevels);
+    reg->SetLevelsToPerform(levelsToPerform);
+    reg->SetReferenceSigma(referenceSigma);
+    reg->SetFloatingSigma(floatingSigma);
+    reg->SetAlignCentre(alignCentre);
+    reg->SetAlignCentreMass(alignCentreOfMass);
+    reg->SetPerformAffine(affineFlag);
+    reg->SetPerformRigid(rigidFlag);
+    reg->SetBlockStepSize(blockStepSize);
+    reg->SetBlockPercentage(blockPercentage);
+    reg->SetInlierLts(inlierLts);
+    reg->SetInterpolation(interpolation);
+    reg->SetCaptureRangeVox(captureRangeVox);
+    reg->SetPlatformType(platformType);
+    reg->SetGpuIdx(gpuIdx);
 
     if (referenceLowerThr != referenceUpperThr) {
-        REG->SetReferenceLowerThreshold(referenceLowerThr);
-        REG->SetReferenceUpperThreshold(referenceUpperThr);
+        reg->SetReferenceLowerThreshold(referenceLowerThr);
+        reg->SetReferenceUpperThreshold(referenceUpperThr);
     }
 
     if (floatingLowerThr != floatingUpperThr) {
-        REG->SetFloatingLowerThreshold(floatingLowerThr);
-        REG->SetFloatingUpperThreshold(floatingUpperThr);
+        reg->SetFloatingLowerThreshold(floatingLowerThr);
+        reg->SetFloatingUpperThreshold(floatingUpperThr);
     }
 
-    REG->SetWarpedPaddingValue(paddingValue);
+    reg->SetWarpedPaddingValue(paddingValue);
 
-    if (REG->GetLevelsToPerform() > REG->GetNumberOfLevels())
-        REG->SetLevelsToPerform(REG->GetNumberOfLevels());
+    if (reg->GetLevelsToPerform() > reg->GetNumberOfLevels())
+        reg->SetLevelsToPerform(reg->GetNumberOfLevels());
 
     // Set the input affine transformation if defined
     if (inputAffineFlag == 1)
-        REG->SetInputTransform(inputAffineName);
+        reg->SetInputTransform(inputAffineName);
 
     // Set the verbose type
-    REG->SetVerbose(verbose);
+    reg->SetVerbose(verbose);
 
 #ifndef NDEBUG
     reg_print_msg_debug("*******************************************");
@@ -496,39 +477,19 @@ int main(int argc, char **argv) {
 #endif // _OPENMP
 
     // Run the registration
-    REG->Run();
+    reg->Run();
 
     // The warped image is saved
     if (iso) {
-        REG->SetInputReference(referenceHeader);
-        REG->SetInputFloating(floatingHeader);
+        reg->SetInputReference(referenceHeader);
+        reg->SetInputFloating(floatingHeader);
     }
-    nifti_image *outputResultImage = REG->GetFinalWarpedImage();
-    if (!outputResultFlag) outputResultName = (char *)"outputResult.nii.gz";
+    NiftiImage outputResultImage = reg->GetFinalWarpedImage();
     reg_io_WriteImageFile(outputResultImage, outputResultName);
-    nifti_image_free(outputResultImage);
 
     /* The affine transformation is saved */
-    if (outputAffineFlag)
-        reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName);
-    else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt");
-
-    nifti_image_free(referenceHeader);
-    nifti_image_free(floatingHeader);
-    if (isoRefImage != nullptr)
-        nifti_image_free(isoRefImage);
-    if (isoFloImage != nullptr)
-        nifti_image_free(isoFloImage);
-    if (referenceMaskImage != nullptr)
-        nifti_image_free(referenceMaskImage);
-    if (floatingMaskImage != nullptr)
-        nifti_image_free(floatingMaskImage);
-    if (isoRefMaskImage != nullptr)
-        nifti_image_free(isoRefMaskImage);
-    if (isoFloMaskImage != nullptr)
-        nifti_image_free(isoFloMaskImage);
-
-    delete REG;
+    reg_tool_WriteAffineFile(reg->GetTransformationMatrix(), outputAffineName);
+
 #ifdef NDEBUG
     if (verbose) {
 #endif

From 4e5db2c86d1905880805e416c01e7d7a82d81636 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 17:13:34 +0000
Subject: [PATCH 087/314] Refactor reg_base class using automatic memory
 management

---
 niftyreg_build_version.txt |   2 +-
 reg-lib/_reg_base.cpp      | 327 ++++++++++---------------------------
 reg-lib/_reg_base.h        |  60 ++++---
 3 files changed, 117 insertions(+), 272 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c92ba568..7d645f58 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-207
+208
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index c267f535..f684dc38 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -15,11 +15,9 @@
 /* *************************************************************** */
 template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
-    platform = nullptr;
     platformType = PlatformType::Cpu;
     gpuIdx = 999;
 
-    optimiser = nullptr;
     maxIterationNumber = 150;
     optimiseX = true;
     optimiseY = true;
@@ -28,39 +26,24 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     useConjGradient = true;
     useApproxGradient = false;
 
-    measure_ssd = nullptr;
-    measure_kld = nullptr;
-    measure_dti = nullptr;
-    measure_lncc = nullptr;
-    measure_nmi = nullptr;
-    measure_mind = nullptr;
-    measure_mindssc = nullptr;
-    localWeightSimInput = nullptr;
-
     similarityWeight = 0; // automatically set depending of the penalty term weights
 
     executableName = (char*)"NiftyReg BASE";
     referenceTimePoint = refTimePoint;
     floatingTimePoint = floTimePoint;
-    inputReference = nullptr; // pointer to external
-    inputFloating = nullptr; // pointer to external
-    maskImage = nullptr; // pointer to external
     affineTransformation = nullptr;  // pointer to external
-    referenceMask = nullptr;
     referenceSmoothingSigma = 0;
     floatingSmoothingSigma = 0;
-    referenceThresholdUp = new float[referenceTimePoint];
-    referenceThresholdLow = new float[referenceTimePoint];
-    floatingThresholdUp = new float[floatingTimePoint];
-    floatingThresholdLow = new float[floatingTimePoint];
-    for (int i = 0; i < referenceTimePoint; i++) {
-        referenceThresholdUp[i] = std::numeric_limits<T>::max();
-        referenceThresholdLow[i] = -std::numeric_limits<T>::max();
-    }
-    for (int i = 0; i < floatingTimePoint; i++) {
-        floatingThresholdUp[i] = std::numeric_limits<T>::max();
-        floatingThresholdLow[i] = -std::numeric_limits<T>::max();
-    }
+
+    referenceThresholdUp.reset(new T[referenceTimePoint]);
+    std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits<T>::max());
+    referenceThresholdLow.reset(new T[referenceTimePoint]);
+    std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits<T>::min());
+    floatingThresholdUp.reset(new T[floatingTimePoint]);
+    std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits<T>::max());
+    floatingThresholdLow.reset(new T[floatingTimePoint]);
+    std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits<T>::min());
+
     robustRange = false;
     warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
     levelNumber = 3;
@@ -70,11 +53,8 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     usePyramid = true;
 
     initialised = false;
-    referencePyramid = nullptr;
-    floatingPyramid = nullptr;
-    maskPyramid = nullptr;
 
-    interpolation = 1;
+    interpolation = 1;  // linear
 
     landmarkRegWeight = 0;
     landmarkRegNumber = 0;
@@ -87,112 +67,16 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
 }
 /* *************************************************************** */
 template<class T>
-reg_base<T>::~reg_base() {
-    if (referencePyramid) {
-        if (usePyramid) {
-            for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (referencePyramid[i]) {
-                    nifti_image_free(referencePyramid[i]);
-                    referencePyramid[i] = nullptr;
-                }
-            }
-        } else {
-            if (referencePyramid[0]) {
-                nifti_image_free(referencePyramid[0]);
-                referencePyramid[0] = nullptr;
-            }
-        }
-        free(referencePyramid);
-        referencePyramid = nullptr;
-    }
-    if (maskPyramid) {
-        if (usePyramid) {
-            for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (maskPyramid[i]) {
-                    free(maskPyramid[i]);
-                    maskPyramid[i] = nullptr;
-                }
-            }
-        } else {
-            if (maskPyramid[0]) {
-                free(maskPyramid[0]);
-                maskPyramid[0] = nullptr;
-            }
-        }
-        free(maskPyramid);
-        maskPyramid = nullptr;
-    }
-    if (floatingPyramid) {
-        if (usePyramid) {
-            for (unsigned int i = 0; i < levelToPerform; i++) {
-                if (floatingPyramid[i]) {
-                    nifti_image_free(floatingPyramid[i]);
-                    floatingPyramid[i] = nullptr;
-                }
-            }
-        } else {
-            if (floatingPyramid[0]) {
-                nifti_image_free(floatingPyramid[0]);
-                floatingPyramid[0] = nullptr;
-            }
-        }
-        free(floatingPyramid);
-        floatingPyramid = nullptr;
-    }
-    if (referenceThresholdUp) {
-        delete[]referenceThresholdUp;
-        referenceThresholdUp = nullptr;
-    }
-    if (referenceThresholdLow) {
-        delete[]referenceThresholdLow;
-        referenceThresholdLow = nullptr;
-    }
-    if (floatingThresholdUp) {
-        delete[]floatingThresholdUp;
-        floatingThresholdUp = nullptr;
-    }
-    if (floatingThresholdLow) {
-        delete[]floatingThresholdLow;
-        floatingThresholdLow = nullptr;
-    }
-    if (optimiser) {
-        delete optimiser;
-        optimiser = nullptr;
-    }
-
-    if (measure_nmi)
-        delete measure_nmi;
-    if (measure_ssd)
-        delete measure_ssd;
-    if (measure_kld)
-        delete measure_kld;
-    if (measure_dti)
-        delete measure_dti;
-    if (measure_lncc)
-        delete measure_lncc;
-    if (measure_mind)
-        delete measure_mind;
-    if (measure_mindssc)
-        delete measure_mindssc;
-
-    delete measure;
-    delete platform;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::~reg_base");
-#endif
-}
-/* *************************************************************** */
-template<class T>
-void reg_base<T>::SetReferenceImage(nifti_image *r) {
-    inputReference = r;
+void reg_base<T>::SetReferenceImage(NiftiImage inputReferenceIn) {
+    inputReference = inputReferenceIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceImage");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingImage(nifti_image *f) {
-    inputFloating = f;
+void reg_base<T>::SetFloatingImage(NiftiImage inputFloatingIn) {
+    inputFloating = inputFloatingIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingImage");
 #endif
@@ -207,32 +91,32 @@ void reg_base<T>::SetMaximalIterationNumber(unsigned int iter) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceMask(nifti_image *m) {
-    maskImage = m;
+void reg_base<T>::SetReferenceMask(NiftiImage maskImageIn) {
+    maskImage = maskImageIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceMask");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetAffineTransformation(mat44 *a) {
-    affineTransformation = a;
+void reg_base<T>::SetAffineTransformation(mat44 *affineTransformationIn) {
+    affineTransformation = affineTransformationIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceSmoothingSigma(T s) {
-    referenceSmoothingSigma = s;
+void reg_base<T>::SetReferenceSmoothingSigma(T referenceSmoothingSigmaIn) {
+    referenceSmoothingSigma = referenceSmoothingSigmaIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceSmoothingSigma");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingSmoothingSigma(T s) {
-    floatingSmoothingSigma = s;
+void reg_base<T>::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) {
+    floatingSmoothingSigma = floatingSmoothingSigmaIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingSmoothingSigma");
 #endif
@@ -287,32 +171,32 @@ void reg_base<T>::DoNotUseRobustRange() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetWarpedPaddingValue(float p) {
-    warpedPaddingValue = p;
+void reg_base<T>::SetWarpedPaddingValue(float warpedPaddingValueIn) {
+    warpedPaddingValue = warpedPaddingValueIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetWarpedPaddingValue");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelNumber(unsigned int l) {
-    levelNumber = l;
+void reg_base<T>::SetLevelNumber(unsigned int levelNumberIn) {
+    levelNumber = levelNumberIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelNumber");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelToPerform(unsigned int l) {
-    levelToPerform = l;
+void reg_base<T>::SetLevelToPerform(unsigned int levelToPerformIn) {
+    levelToPerform = levelToPerformIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
 #endif
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetGradientSmoothingSigma(T g) {
-    gradientSmoothingSigma = g;
+void reg_base<T>::SetGradientSmoothingSigma(T gradientSmoothingSigmaIn) {
+    gradientSmoothingSigma = gradientSmoothingSigmaIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetGradientSmoothingSigma");
 #endif
@@ -444,7 +328,7 @@ void reg_base<T>::CheckParameters() {
     // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
     if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc &&
         !measure_kld && !measure_mind && !measure_mindssc) {
-        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
         for (int i = 0; i < inputReference->nt; ++i)
             measure_nmi->SetTimepointWeight(i, 1.0);
     }
@@ -463,7 +347,7 @@ void reg_base<T>::CheckParameters() {
             reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
             reg_exit();
         }
-        double *chanWeightSum = new double[inputReference->nt]();
+        unique_ptr<double[]> chanWeightSum(new double[inputReference->nt]());
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
         if (measure_nmi) {
@@ -562,7 +446,6 @@ void reg_base<T>::CheckParameters() {
             if (measure_lncc)
                 measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
         }
-        delete[] chanWeightSum;
     }
 
 #ifndef NDEBUG
@@ -573,7 +456,7 @@ void reg_base<T>::CheckParameters() {
 template<class T>
 void reg_base<T>::InitialiseSimilarity() {
     // TODO Move this function to reg_f3d
-    F3dContent& con = *dynamic_cast<F3dContent*>(this->con);
+    F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
 
     if (measure_nmi)
         measure->Initialise(*measure_nmi, con);
@@ -605,109 +488,80 @@ template<class T>
 void reg_base<T>::Initialise() {
     if (initialised) return;
 
-    platform = new Platform(platformType);
+    platform.reset(new Platform(platformType));
     platform->SetGpuIdx(gpuIdx);
-    measure = platform->CreateMeasure();
+    measure.reset(platform->CreateMeasure());
 
     CheckParameters();
 
     // CREATE THE PYRAMID IMAGES
-    if (usePyramid) {
-        referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
-        floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*));
-        maskPyramid = (int**)malloc(levelToPerform * sizeof(int*));
-    } else {
-        referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*));
-        floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*));
-        maskPyramid = (int**)malloc(sizeof(int*));
-    }
+    const unsigned int imageCount = usePyramid ? levelToPerform : 1;
+    referencePyramid = vector<NiftiImage>(imageCount);
+    floatingPyramid = vector<NiftiImage>(imageCount);
+    maskPyramid = vector<unique_ptr<int[]>>(imageCount);
 
     // Update the input images threshold if required
     if (robustRange) {
         // Create a copy of the reference image to extract the robust range
-        nifti_image *temp_reference = nifti_dup(*inputReference);
-        reg_tools_changeDatatype<T>(temp_reference);
+        NiftiImage tmpReference = inputReference;
+        reg_tools_changeDatatype<T>(tmpReference);
         // Extract the robust range of the reference image
-        T *refDataPtr = static_cast<T *>(temp_reference->data);
-        reg_heapSort(refDataPtr, temp_reference->nvox);
+        T *refDataPtr = static_cast<T *>(tmpReference->data);
+        reg_heapSort(refDataPtr, tmpReference->nvox);
         // Update the reference threshold values if no value has been setup by the user
-        if (referenceThresholdLow[0] == -std::numeric_limits<T>::max())
-            referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)];
+        if (referenceThresholdLow[0] == std::numeric_limits<T>::min())
+            referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)];
         if (referenceThresholdUp[0] == std::numeric_limits<T>::max())
-            referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)];
-        // Free the temporarily allocated image
-        nifti_image_free(temp_reference);
+            referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)];
 
         // Create a copy of the floating image to extract the robust range
-        nifti_image *temp_floating = nifti_dup(*inputFloating);
-        reg_tools_changeDatatype<T>(temp_floating);
+        NiftiImage tmpFloating = inputFloating;
+        reg_tools_changeDatatype<T>(tmpFloating);
         // Extract the robust range of the floating image
-        T *floDataPtr = static_cast<T *>(temp_floating->data);
-        reg_heapSort(floDataPtr, temp_floating->nvox);
+        T *floDataPtr = static_cast<T *>(tmpFloating->data);
+        reg_heapSort(floDataPtr, tmpFloating->nvox);
         // Update the floating threshold values if no value has been setup by the user
-        if (floatingThresholdLow[0] == -std::numeric_limits<T>::max())
-            floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)];
+        if (floatingThresholdLow[0] == std::numeric_limits<T>::min())
+            floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)];
         if (floatingThresholdUp[0] == std::numeric_limits<T>::max())
-            floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)];
-        // Free the temporarily allocated image
-        nifti_image_free(temp_floating);
+            floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)];
     }
 
     // FINEST LEVEL OF REGISTRATION
-    if (usePyramid) {
-        reg_createImagePyramid<T>(inputReference, referencePyramid, levelNumber, levelToPerform);
-        reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelNumber, levelToPerform);
-        if (maskImage)
-            reg_createMaskPyramid<T>(maskImage, maskPyramid, levelNumber, levelToPerform);
-        else {
-            for (unsigned int l = 0; l < levelToPerform; ++l) {
-                const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[l]);
-                maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int));
-            }
-        }
-    } else {
-        reg_createImagePyramid<T>(inputReference, referencePyramid, 1, 1);
-        reg_createImagePyramid<T>(inputFloating, floatingPyramid, 1, 1);
-        if (maskImage)
-            reg_createMaskPyramid<T>(maskImage, maskPyramid, 1, 1);
-        else {
-            const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[0]);
-            maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int));
-        }
-    }
-
-    unsigned int pyramidalLevelNumber = 1;
-    if (usePyramid) pyramidalLevelNumber = levelToPerform;
+    const unsigned int levelCount = usePyramid ? levelNumber : 1;
+    reg_createImagePyramid<T>(inputReference, referencePyramid, levelCount, imageCount);
+    reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelCount, imageCount);
+    if (maskImage)
+        reg_createMaskPyramid<T>(maskImage, maskPyramid, levelCount, imageCount);
+    else
+        for (unsigned int l = 0; l < imageCount; ++l)
+            maskPyramid[l].reset(new int[referencePyramid[l].nVoxelsPerVolume()]());
 
     // SMOOTH THE INPUT IMAGES IF REQUIRED
     for (unsigned int l = 0; l < levelToPerform; l++) {
         if (referenceSmoothingSigma != 0) {
-            bool *active = new bool[referencePyramid[l]->nt];
-            float *sigma = new float[referencePyramid[l]->nt];
+            unique_ptr<bool[]> active(new bool[referencePyramid[l]->nt]);
+            unique_ptr<float[]> sigma(new float[referencePyramid[l]->nt]);
             active[0] = true;
             for (int i = 1; i < referencePyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = referenceSmoothingSigma;
-            reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
-            delete[] active;
-            delete[] sigma;
+            reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get());
         }
         if (floatingSmoothingSigma != 0) {
             // Only the first image is smoothed
-            bool *active = new bool[floatingPyramid[l]->nt];
-            float *sigma = new float[floatingPyramid[l]->nt];
+            unique_ptr<bool[]> active(new bool[floatingPyramid[l]->nt]);
+            unique_ptr<float[]> sigma(new float[floatingPyramid[l]->nt]);
             active[0] = true;
             for (int i = 1; i < floatingPyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = floatingSmoothingSigma;
-            reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active);
-            delete[] active;
-            delete[] sigma;
+            reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get());
         }
     }
 
     // THRESHOLD THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < pyramidalLevelNumber; l++) {
+    for (unsigned int l = 0; l < imageCount; l++) {
         reg_thresholdImage<T>(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
         reg_thresholdImage<T>(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
     }
@@ -752,14 +606,14 @@ template<class T>
 void reg_base<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
     // TODO Temporarily call F3dContent. This function will be moved to reg_f3d
-    dynamic_cast<F3dContent*>(con)->ZeroVoxelBasedMeasureGradient();
+    dynamic_cast<F3dContent&>(*con).ZeroVoxelBasedMeasureGradient();
 
     // The intensity gradient is first computed
-    //   if(measure_nmi!=nullptr || measure_ssd!=nullptr ||
-    //         measure_kld!=nullptr || measure_lncc!=nullptr ||
-    //         measure_dti!=nullptr)
+    //   if(measure_nmi || measure_ssd ||
+    //         measure_kld || measure_lncc ||
+    //         measure_dti)
     //   {
-    //    if(measure_dti!=nullptr){
+    //    if(measure_dti){
     //        reg_getImageGradient(floating,
     //                             warpedGradient,
     //                             deformationFieldImage,
@@ -774,7 +628,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
     //    }
     //   }
 
-    //   if(measure_dti!=nullptr)
+    //   if(measure_dti)
     //      measure_dti->GetVoxelBasedSimilarityMeasureGradient();
 
     for (int t = 0; t < con->Content::GetReference()->nt; ++t) {
@@ -809,7 +663,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::ApproximateParzenWindow()
 //{
 //    if(!measure_nmi)
-//        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
+//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
 //    measure_nmi=approxParzenWindow = true;
 //}
 ///* *************************************************************** */
@@ -817,14 +671,14 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
 //    if(!measure_nmi)
-//        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
+//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
 //    measure_nmi=approxParzenWindow = false;
 //}
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
     if (!measure_nmi)
-        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -837,7 +691,7 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
 template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
     if (!measure_nmi)
-        measure_nmi = dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
     measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -850,7 +704,7 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
 template<class T>
 void reg_base<T>::UseSSD(int timepoint, bool normalise) {
     if (!measure_ssd)
-        measure_ssd = dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd));
+        measure_ssd.reset(dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd)));
     measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
 #ifndef NDEBUG
@@ -861,7 +715,7 @@ void reg_base<T>::UseSSD(int timepoint, bool normalise) {
 template<class T>
 void reg_base<T>::UseMIND(int timepoint, int offset) {
     if (!measure_mind)
-        measure_mind = dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind));
+        measure_mind.reset(dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind)));
     measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mind->SetDescriptorOffset(offset);
 #ifndef NDEBUG
@@ -872,7 +726,7 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
 template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
     if (!measure_mindssc)
-        measure_mindssc = dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::Mindssc));
+        measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::Mindssc)));
     measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
 #ifndef NDEBUG
@@ -883,7 +737,7 @@ void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
 template<class T>
 void reg_base<T>::UseKLDivergence(int timepoint) {
     if (!measure_kld)
-        measure_kld = dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld));
+        measure_kld.reset(dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld)));
     measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::UseKLDivergence");
@@ -893,7 +747,7 @@ void reg_base<T>::UseKLDivergence(int timepoint) {
 template<class T>
 void reg_base<T>::UseLNCC(int timepoint, float stddev) {
     if (!measure_lncc)
-        measure_lncc = dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc));
+        measure_lncc.reset(dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)));
     measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
     measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
 #ifndef NDEBUG
@@ -920,7 +774,7 @@ void reg_base<T>::UseDTI(bool *timepoint) {
     reg_exit();
 
     if (!measure_dti)
-        measure_dti = dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti));
+        measure_dti.reset(dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti)));
     for (int i = 0; i < inputReference->nt; ++i) {
         if (timepoint[i])
             measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
@@ -971,8 +825,8 @@ void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLocalWeightSim(nifti_image *i) {
-    localWeightSimInput = i;
+void reg_base<T>::SetLocalWeightSim(NiftiImage localWeightSimInputIn) {
+    localWeightSimInput = localWeightSimInputIn;
     reg_tools_changeDatatype<T>(localWeightSimInput);
 }
 /* *************************************************************** */
@@ -1003,22 +857,15 @@ void reg_base<T>::WarpFloatingImage(int inter) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DeinitCurrentLevel(int currentLevel) {
-    delete optimiser;
     optimiser = nullptr;
     if (currentLevel >= 0) {
         if (usePyramid) {
-            nifti_image_free(referencePyramid[currentLevel]);
             referencePyramid[currentLevel] = nullptr;
-            nifti_image_free(floatingPyramid[currentLevel]);
             floatingPyramid[currentLevel] = nullptr;
-            free(maskPyramid[currentLevel]);
             maskPyramid[currentLevel] = nullptr;
         } else if (currentLevel == levelToPerform - 1) {
-            nifti_image_free(referencePyramid[0]);
             referencePyramid[0] = nullptr;
-            nifti_image_free(floatingPyramid[0]);
             floatingPyramid[0] = nullptr;
-            free(maskPyramid[0]);
             maskPyramid[0] = nullptr;
         }
     }
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 3a5f0146..42645fb4 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -34,21 +34,21 @@ template<class T>
 class reg_base: public InterfaceOptimiser {
 protected:
     // Platform
-    Platform *platform;
+    unique_ptr<Platform> platform;
     PlatformType platformType;
     unsigned gpuIdx;
 
     // Content
-    Content *con = nullptr;
+    unique_ptr<Content> con;
 
     // Compute
-    Compute *compute = nullptr;
+    unique_ptr<Compute> compute;
 
     // Measure
-    Measure *measure = nullptr;
+    unique_ptr<Measure> measure;
 
     // Optimiser-related variables
-    reg_optimiser<T> *optimiser;
+    unique_ptr<reg_optimiser<T>> optimiser;
     size_t maxIterationNumber;
     size_t perturbationNumber;
     bool optimiseX;
@@ -56,29 +56,28 @@ class reg_base: public InterfaceOptimiser {
     bool optimiseZ;
 
     // Measure-related variables
-    reg_ssd *measure_ssd;
-    reg_kld *measure_kld;
-    reg_dti *measure_dti;
-    reg_lncc *measure_lncc;
-    reg_nmi *measure_nmi;
-    reg_mind *measure_mind;
-    reg_mindssc *measure_mindssc;
-    nifti_image *localWeightSimInput;
+    unique_ptr<reg_ssd> measure_ssd;
+    unique_ptr<reg_kld> measure_kld;
+    unique_ptr<reg_dti> measure_dti;
+    unique_ptr<reg_lncc> measure_lncc;
+    unique_ptr<reg_nmi> measure_nmi;
+    unique_ptr<reg_mind> measure_mind;
+    unique_ptr<reg_mindssc> measure_mindssc;
+    NiftiImage localWeightSimInput;
 
     char *executableName;
     int referenceTimePoint;
     int floatingTimePoint;
-    nifti_image *inputReference; // pointer to external
-    nifti_image *inputFloating; // pointer to external
-    nifti_image *maskImage; // pointer to external
+    NiftiImage inputReference; // pointer to external
+    NiftiImage inputFloating; // pointer to external
+    NiftiImage maskImage; // pointer to external
     mat44 *affineTransformation; // pointer to external
-    int *referenceMask;
     T referenceSmoothingSigma;
     T floatingSmoothingSigma;
-    float *referenceThresholdUp;
-    float *referenceThresholdLow;
-    float *floatingThresholdUp;
-    float *floatingThresholdLow;
+    unique_ptr<T[]> referenceThresholdUp;
+    unique_ptr<T[]> referenceThresholdLow;
+    unique_ptr<T[]> floatingThresholdUp;
+    unique_ptr<T[]> floatingThresholdLow;
     bool robustRange;
     float warpedPaddingValue;
     unsigned int levelNumber;
@@ -93,9 +92,9 @@ class reg_base: public InterfaceOptimiser {
     int interpolation;
 
     bool initialised;
-    nifti_image **referencePyramid;
-    nifti_image **floatingPyramid;
-    int **maskPyramid;
+    vector<NiftiImage> referencePyramid;
+    vector<NiftiImage> floatingPyramid;
+    vector<unique_ptr<int[]>> maskPyramid;
 
     double bestWMeasure;
     double currentWMeasure;
@@ -139,10 +138,9 @@ class reg_base: public InterfaceOptimiser {
 
 public:
     reg_base(int refTimePoint, int floTimePoint);
-    virtual ~reg_base();
 
     virtual void Run();
-    virtual nifti_image** GetWarpedImage() = 0;
+    virtual vector<NiftiImage> GetWarpedImage() = 0;
     virtual char* GetExecutableName() { return executableName; }
     virtual bool GetSymmetricStatus() { return false; }
 
@@ -172,16 +170,16 @@ class reg_base: public InterfaceOptimiser {
     virtual void UseDTI(bool*);
     virtual void UseLNCC(int, float);
     virtual void SetLNCCKernelType(int type);
-    virtual void SetLocalWeightSim(nifti_image*);
+    virtual void SetLocalWeightSim(NiftiImage);
 
     virtual void SetNMIWeight(int, double);
     virtual void SetSSDWeight(int, double);
     virtual void SetKLDWeight(int, double);
     virtual void SetLNCCWeight(int, double);
 
-    virtual void SetReferenceImage(nifti_image*);
-    virtual void SetFloatingImage(nifti_image*);
-    virtual void SetReferenceMask(nifti_image*);
+    virtual void SetReferenceImage(NiftiImage);
+    virtual void SetFloatingImage(NiftiImage);
+    virtual void SetReferenceMask(NiftiImage);
     virtual void SetAffineTransformation(mat44*);
     virtual void SetReferenceSmoothingSigma(T);
     virtual void SetFloatingSmoothingSigma(T);
@@ -211,5 +209,5 @@ class reg_base: public InterfaceOptimiser {
     }
 
     // For testing
-    virtual void reg_test_setOptimiser(reg_optimiser<T> *opt) { optimiser = opt; }
+    virtual void reg_test_setOptimiser(reg_optimiser<T> *opt) { optimiser.reset(opt); }
 };

From 099572c140edd9c57f6dce611587771285c1dfcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 17:18:04 +0000
Subject: [PATCH 088/314] Refactor reg_f3d class using automatic memory
 management

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/_reg_f3d.cpp       | 74 +++++++++++++++-----------------------
 reg-lib/_reg_f3d.h         | 15 ++++----
 3 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7d645f58..7fba2b43 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-208
+209
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 6cb183ac..4f9d48b2 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -19,8 +19,6 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
     reg_base<T>::reg_base(refTimePoint, floTimePoint) {
 
     this->executableName = (char*)"NiftyReg F3D";
-    inputControlPointGrid = nullptr; // pointer to external
-    controlPointGrid = nullptr;
     bendingEnergyWeight = 0.001;
     linearEnergyWeight = 0.01;
     jacobianLogWeight = 0;
@@ -38,19 +36,8 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
 }
 /* *************************************************************** */
 template<class T>
-reg_f3d<T>::~reg_f3d() {
-    if (controlPointGrid) {
-        nifti_image_free(controlPointGrid);
-        controlPointGrid = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::~reg_f3d");
-#endif
-}
-/* *************************************************************** */
-template<class T>
-void reg_f3d<T>::SetControlPointGridImage(nifti_image *cp) {
-    inputControlPointGrid = cp;
+void reg_f3d<T>::SetControlPointGridImage(NiftiImage inputControlPointGridIn) {
+    inputControlPointGrid = inputControlPointGridIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetControlPointGridImage");
 #endif
@@ -107,8 +94,8 @@ void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
     unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
-    this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T));
-    this->compute = this->platform->CreateCompute(*this->con);
+    this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)));
+    this->compute.reset(this->platform->CreateCompute(*this->con));
 }
 /* *************************************************************** */
 template<class T>
@@ -124,7 +111,7 @@ T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
         const int index = this->usePyramid ? currentLevel : 0;
         reference = this->referencePyramid[index];
         floating = this->floatingPyramid[index];
-        mask = this->maskPyramid[index];
+        mask = this->maskPyramid[index].get();
     }
 
     // Set the initial step size for the gradient ascent
@@ -155,16 +142,14 @@ T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
 template<class T>
 void reg_f3d<T>::DeinitCurrentLevel(int currentLevel) {
     reg_base<T>::DeinitCurrentLevel(currentLevel);
-    delete this->compute;
     this->compute = nullptr;
-    delete this->con;
     this->con = nullptr;
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
-    // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
+    // Normalise the objective function weights
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight;
         if (penaltySum >= 1) {
@@ -186,7 +171,7 @@ void reg_f3d<T>::Initialise() {
 
     reg_base<T>::Initialise();
 
-    // DETERMINE THE GRID SPACING AND CREATE THE GRID
+    // Determine the grid spacing and create the grid
     if (!inputControlPointGrid) {
         // Set the spacing along y and z if undefined. Their values are set to match
         // the spacing along the x axis
@@ -194,21 +179,21 @@ void reg_f3d<T>::Initialise() {
         if (spacing[2] != spacing[2]) spacing[2] = spacing[0];
 
         /* Convert the spacing from voxel to mm if necessary */
-        float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]};
-        if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -this->inputReference->dx;
-        if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -this->inputReference->dy;
-        if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -this->inputReference->dz;
+        float spacingInMillimetre[3] = {spacing[0], spacing[1], spacing[2]};
+        if (spacingInMillimetre[0] < 0) spacingInMillimetre[0] *= -this->inputReference->dx;
+        if (spacingInMillimetre[1] < 0) spacingInMillimetre[1] *= -this->inputReference->dy;
+        if (spacingInMillimetre[2] < 0) spacingInMillimetre[2] *= -this->inputReference->dz;
 
         // Define the spacing for the first level
         float gridSpacing[3];
-        gridSpacing[0] = spacingInMillimeter[0] * powf(2, this->levelNumber - 1);
-        gridSpacing[1] = spacingInMillimeter[1] * powf(2, this->levelNumber - 1);
+        gridSpacing[0] = spacingInMillimetre[0] * powf(2, this->levelNumber - 1);
+        gridSpacing[1] = spacingInMillimetre[1] * powf(2, this->levelNumber - 1);
         gridSpacing[2] = 1;
         if (this->referencePyramid[0]->nz > 1)
-            gridSpacing[2] = spacingInMillimeter[2] * powf(2, this->levelNumber - 1);
+            gridSpacing[2] = spacingInMillimetre[2] * powf(2, this->levelNumber - 1);
 
         // Create and allocate the control point image
-        reg_createControlPointGrid<T>(&controlPointGrid, this->referencePyramid[0], gridSpacing);
+        reg_createControlPointGrid<T>(controlPointGrid, this->referencePyramid[0], gridSpacing);
 
         // The control point position image is initialised with the affine transformation
         if (!this->affineTransformation) {
@@ -216,7 +201,7 @@ void reg_f3d<T>::Initialise() {
         } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid);
     } else {
         // The control point grid image is initialised with the provided grid
-        controlPointGrid = nifti_dup(*inputControlPointGrid);
+        controlPointGrid = inputControlPointGrid;
         // The final grid spacing is computed
         spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1);
         spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1);
@@ -610,13 +595,13 @@ void reg_f3d<T>::UpdateParameters(float scale) {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetOptimiser() {
-    this->optimiser = this->platform->template CreateOptimiser<T>(*dynamic_cast<F3dContent*>(this->con),
-                                                                  *this,
-                                                                  this->maxIterationNumber,
-                                                                  this->useConjGradient,
-                                                                  this->optimiseX,
-                                                                  this->optimiseY,
-                                                                  this->optimiseZ);
+    this->optimiser.reset(this->platform->template CreateOptimiser<T>(dynamic_cast<F3dContent&>(*this->con),
+                                                                      *this,
+                                                                      this->maxIterationNumber,
+                                                                      this->useConjGradient,
+                                                                      this->optimiseX,
+                                                                      this->optimiseY,
+                                                                      this->optimiseZ));
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
 #endif
@@ -640,7 +625,7 @@ void reg_f3d<T>::GetApproximatedGradient() {
 }
 /* *************************************************************** */
 template<class T>
-nifti_image** reg_f3d<T>::GetWarpedImage() {
+vector<NiftiImage> reg_f3d<T>::GetWarpedImage() {
     // The initial images are used
     if (!this->inputReference || !this->inputFloating || !controlPointGrid) {
         reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
@@ -652,22 +637,21 @@ nifti_image** reg_f3d<T>::GetWarpedImage() {
 
     this->WarpFloatingImage(3); // cubic spline interpolation
 
-    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = nifti_dup(*this->con->GetWarped());
+    NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), true);
 
     DeinitCurrentLevel(-1);
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
 #endif
-    return warpedImage;
+    return { warpedImage };
 }
 /* *************************************************************** */
 template<class T>
-nifti_image* reg_f3d<T>::GetControlPointPositionImage() {
+NiftiImage reg_f3d<T>::GetControlPointPositionImage() {
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::GetControlPointPositionImage");
 #endif
-    return nifti_dup(*controlPointGrid);
+    return controlPointGrid;
 }
 /* *************************************************************** */
 template<class T>
@@ -729,7 +713,7 @@ void reg_f3d<T>::GetObjectiveFunctionGradient() {
             this->WarpFloatingImage(this->interpolation);
             GetSimilarityMeasureGradient();
         } else {
-            dynamic_cast<F3dContent*>(this->con)->ZeroTransformationGradient();
+            dynamic_cast<F3dContent&>(*this->con).ZeroTransformationGradient();
         }
         // Compute the penalty term gradients if required
         GetBendingEnergyGradient();
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 3ef13cd5..0950dbed 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -18,8 +18,8 @@
 template <class T>
 class reg_f3d: public reg_base<T> {
 protected:
-    nifti_image *inputControlPointGrid; // pointer to external
-    nifti_image *controlPointGrid;
+    NiftiImage inputControlPointGrid; // pointer to external
+    NiftiImage controlPointGrid;
     T bendingEnergyWeight;
     T linearEnergyWeight;
     T jacobianLogWeight;
@@ -64,12 +64,11 @@ class reg_f3d: public reg_base<T> {
 
 public:
     reg_f3d(int refTimePoint, int floTimePoint);
-    virtual ~reg_f3d();
 
-    virtual nifti_image* GetControlPointPositionImage();
-    virtual nifti_image** GetWarpedImage() override;
+    virtual NiftiImage GetControlPointPositionImage();
+    virtual vector<NiftiImage> GetWarpedImage() override;
 
-    virtual void SetControlPointGridImage(nifti_image*);
+    virtual void SetControlPointGridImage(NiftiImage);
     virtual void SetBendingEnergyWeight(T);
     virtual void SetLinearEnergyWeight(T);
     virtual void SetJacobianLogWeight(T);
@@ -79,10 +78,10 @@ class reg_f3d: public reg_base<T> {
     virtual void NoGridRefinement() { gridRefinement = false; }
 
     // F3D2 specific options
-    virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; }
+    virtual NiftiImage GetBackwardControlPointPositionImage() { return {}; }
     virtual void UseBCHUpdate(int) {}
     virtual void UseGradientCumulativeExp() {}
     virtual void DoNotUseGradientCumulativeExp() {}
-    virtual void SetFloatingMask(nifti_image*) {}
+    virtual void SetFloatingMask(NiftiImage) {}
     virtual void SetInverseConsistencyWeight(T) {}
 };

From 76c6652290e1f9e2336d2757e97fe87c3f482d10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 17:34:34 +0000
Subject: [PATCH 089/314] Refactor reg_f3d2 class using automatic memory
 management

---
 niftyreg_build_version.txt |   2 +-
 reg-lib/_reg_f3d2.cpp      | 149 ++++++++++++-------------------------
 reg-lib/_reg_f3d2.h        |  19 +++--
 3 files changed, 57 insertions(+), 113 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7fba2b43..cd7da05e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-209
+210
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index dc51ddcf..1c4f6c82 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -18,10 +18,6 @@ template <class T>
 reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
     reg_f3d<T>::reg_f3d(refTimePoint, floTimePoint) {
     this->executableName = (char*)"NiftyReg F3D2";
-    controlPointGridBw = nullptr;
-    floatingMaskImage = nullptr;
-    floatingMaskPyramid = nullptr;
-    affineTransformationBw = nullptr;
     inverseConsistencyWeight = 0;
     bchUpdate = false;
     useGradientCumulativeExp = true;
@@ -32,43 +28,9 @@ reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
 #endif
 }
 /* *************************************************************** */
-template <class T>
-reg_f3d2<T>::~reg_f3d2() {
-    if (controlPointGridBw) {
-        nifti_image_free(controlPointGridBw);
-        controlPointGridBw = nullptr;
-    }
-
-    if (floatingMaskPyramid) {
-        if (this->usePyramid) {
-            for (unsigned int i = 0; i < this->levelToPerform; i++) {
-                if (floatingMaskPyramid[i]) {
-                    free(floatingMaskPyramid[i]);
-                    floatingMaskPyramid[i] = nullptr;
-                }
-            }
-        } else {
-            if (floatingMaskPyramid[0]) {
-                free(floatingMaskPyramid[0]);
-                floatingMaskPyramid[0] = nullptr;
-            }
-        }
-        free(floatingMaskPyramid);
-        floatingMaskPyramid = nullptr;
-    }
-
-    if (affineTransformationBw) {
-        delete affineTransformationBw;
-        affineTransformationBw = nullptr;
-    }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_f3d2 destructor called");
-#endif
-}
-/* *************************************************************** */
 template<class T>
-void reg_f3d2<T>::SetFloatingMask(nifti_image *m) {
-    floatingMaskImage = m;
+void reg_f3d2<T>::SetFloatingMask(NiftiImage floatingMaskImageIn) {
+    floatingMaskImage = floatingMaskImageIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::~SetFloatingMask");
 #endif
@@ -85,8 +47,8 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 template<class T>
 void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
     unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
-    conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T));
-    computeBw = this->platform->CreateCompute(*conBw);
+    conBw.reset(contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw.get(), sizeof(T)));
+    computeBw.reset(this->platform->CreateCompute(*conBw));
 }
 /* *************************************************************** */
 template <class T>
@@ -103,8 +65,8 @@ T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
         const int index = this->usePyramid ? currentLevel : 0;
         reference = this->referencePyramid[index];
         floating = this->floatingPyramid[index];
-        referenceMask = this->maskPyramid[index];
-        floatingMask = floatingMaskPyramid[index];
+        referenceMask = this->maskPyramid[index].get();
+        floatingMask = floatingMaskPyramid[index].get();
     }
 
     // Define the initial step size for the gradient ascent optimisation
@@ -143,16 +105,12 @@ T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
 template<class T>
 void reg_f3d2<T>::DeinitCurrentLevel(int currentLevel) {
     reg_f3d<T>::DeinitCurrentLevel(currentLevel);
-    delete computeBw;
     computeBw = nullptr;
-    delete conBw;
     conBw = nullptr;
     if (currentLevel >= 0) {
         if (this->usePyramid) {
-            free(floatingMaskPyramid[currentLevel]);
             floatingMaskPyramid[currentLevel] = nullptr;
         } else if (currentLevel == this->levelToPerform - 1) {
-            free(floatingMaskPyramid[0]);
             floatingMaskPyramid[0] = nullptr;
         }
     }
@@ -330,11 +288,11 @@ double reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm() {
 template <class T>
 void reg_f3d2<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is initialised with zeros
-    dynamic_cast<F3dContent*>(this->con)->ZeroVoxelBasedMeasureGradient();
+    dynamic_cast<F3dContent&>(*this->con).ZeroVoxelBasedMeasureGradient();
     conBw->ZeroVoxelBasedMeasureGradient();
 
     // The intensity gradient is first computed
-    //    if(this->measure_dti!=nullptr){
+    //    if(this->measure_dti){
     //        reg_getImageGradient(this->floating,
     //                             this->warpedGradient,
     //                             this->deformationFieldImage,
@@ -354,7 +312,7 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
     //                             this->measure_dti->GetActiveTimepoints(),
     //                             backwardJacobianMatrix,
     //                             backwardWarped);
-    //   if(this->measure_dti!=nullptr)
+    //   if(this->measure_dti)
     //      this->measure_dti->GetVoxelBasedSimilarityMeasureGradient();
     //    }
     //    else{
@@ -518,7 +476,7 @@ void reg_f3d2<T>::GetObjectiveFunctionGradient() {
             WarpFloatingImage(this->interpolation);
             GetSimilarityMeasureGradient();
         } else {
-            dynamic_cast<F3dContent*>(this->con)->ZeroTransformationGradient();
+            dynamic_cast<F3dContent&>(*this->con).ZeroTransformationGradient();
             conBw->ZeroTransformationGradient();
         }
     } else GetApproximatedGradient();
@@ -569,14 +527,14 @@ void reg_f3d2<T>::DisplayCurrentLevelParameters(int currentLevel) {
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::SetOptimiser() {
-    this->optimiser = this->platform->template CreateOptimiser<T>(*dynamic_cast<F3dContent*>(this->con),
-                                                                  *this,
-                                                                  this->maxIterationNumber,
-                                                                  this->useConjGradient,
-                                                                  this->optimiseX,
-                                                                  this->optimiseY,
-                                                                  this->optimiseZ,
-                                                                  conBw);
+    this->optimiser.reset(this->platform->template CreateOptimiser<T>(dynamic_cast<F3dContent&>(*this->con),
+                                                                      *this,
+                                                                      this->maxIterationNumber,
+                                                                      this->useConjGradient,
+                                                                      this->optimiseX,
+                                                                      this->optimiseY,
+                                                                      this->optimiseZ,
+                                                                      conBw.get()));
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::SetOptimiser");
 #endif
@@ -657,28 +615,28 @@ double reg_f3d2<T>::GetObjectiveFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::InitialiseSimilarity() {
-    F3dContent& con = *dynamic_cast<F3dContent*>(this->con);
+    F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
 
     if (this->measure_nmi)
-        this->measure->Initialise(*this->measure_nmi, con, conBw);
+        this->measure->Initialise(*this->measure_nmi, con, conBw.get());
 
     if (this->measure_ssd)
-        this->measure->Initialise(*this->measure_ssd, con, conBw);
+        this->measure->Initialise(*this->measure_ssd, con, conBw.get());
 
     if (this->measure_kld)
-        this->measure->Initialise(*this->measure_kld, con, conBw);
+        this->measure->Initialise(*this->measure_kld, con, conBw.get());
 
     if (this->measure_lncc)
-        this->measure->Initialise(*this->measure_lncc, con, conBw);
+        this->measure->Initialise(*this->measure_lncc, con, conBw.get());
 
     if (this->measure_dti)
-        this->measure->Initialise(*this->measure_dti, con, conBw);
+        this->measure->Initialise(*this->measure_dti, con, conBw.get());
 
     if (this->measure_mind)
-        this->measure->Initialise(*this->measure_mind, con, conBw);
+        this->measure->Initialise(*this->measure_mind, con, conBw.get());
 
     if (this->measure_mindssc)
-        this->measure->Initialise(*this->measure_mindssc, con, conBw);
+        this->measure->Initialise(*this->measure_mindssc, con, conBw.get());
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::InitialiseSimilarity");
@@ -686,11 +644,11 @@ void reg_f3d2<T>::InitialiseSimilarity() {
 }
 /* *************************************************************** */
 template<class T>
-nifti_image* reg_f3d2<T>::GetBackwardControlPointPositionImage() {
+NiftiImage reg_f3d2<T>::GetBackwardControlPointPositionImage() {
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::GetBackwardControlPointPositionImage");
 #endif
-    return nifti_dup(*controlPointGridBw);
+    return controlPointGridBw;
 }
 /* *************************************************************** */
 template <class T>
@@ -729,22 +687,22 @@ void reg_f3d2<T>::Initialise() {
         gridSpacing[2] *= powf(2, this->levelNumber - 1);
 
         // Create the forward and backward control point grids
-        reg_createSymmetricControlPointGrids<T>(&this->controlPointGrid,
-                                                &controlPointGridBw,
+        reg_createSymmetricControlPointGrids<T>(this->controlPointGrid,
+                                                controlPointGridBw,
                                                 this->referencePyramid[0],
                                                 this->floatingPyramid[0],
                                                 this->affineTransformation,
                                                 gridSpacing);
     } else {
         // The control point grid image is initialised with the provided grid
-        this->controlPointGrid = nifti_dup(*this->inputControlPointGrid);
+        this->controlPointGrid = this->inputControlPointGrid;
         // The final grid spacing is computed
         this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1);
         this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1);
         if (this->controlPointGrid->nz > 1)
             this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1);
         // The backward grid is derived from the forward
-        controlPointGridBw = nifti_dup(*this->controlPointGrid);
+        controlPointGridBw = this->controlPointGrid;
         reg_getDisplacementFromDeformation(controlPointGridBw);
         reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1);
         reg_getDeformationFromDisplacement(controlPointGridBw);
@@ -755,29 +713,15 @@ void reg_f3d2<T>::Initialise() {
     }
 
     // Set the floating mask image pyramid
-    if (this->usePyramid) {
-        floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*));
-    } else {
-        floatingMaskPyramid = (int**)malloc(sizeof(int*));
-    }
+    const unsigned int imageCount = this->usePyramid ? this->levelToPerform : 1;
+    const unsigned int levelCount = this->usePyramid ? this->levelNumber : 1;
+    floatingMaskPyramid = vector<unique_ptr<int[]>>(imageCount);
 
-    if (this->usePyramid) {
-        if (floatingMaskImage) {
-            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform);
-        } else {
-            for (unsigned int l = 0; l < this->levelToPerform; ++l) {
-                const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]);
-                floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int));
-            }
-        }
-    } else {  // no pyramid
-        if (floatingMaskImage)
-            reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, 1, 1);
-        else {
-            const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[0]);
-            floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int));
-        }
-    }
+    if (floatingMaskImage)
+        reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, levelCount, imageCount);
+    else
+        for (unsigned int l = 0; l < imageCount; ++l)
+            floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]());
 
 #ifdef NDEBUG
     if (this->verbose) {
@@ -798,7 +742,7 @@ void reg_f3d2<T>::Initialise() {
     this->controlPointGrid->intent_p2 = controlPointGridBw->intent_p2 = 6;
 
     if (this->affineTransformation)
-        affineTransformationBw = new mat44(nifti_mat44_inverse(*this->affineTransformation));
+        affineTransformationBw.reset(new mat44(nifti_mat44_inverse(*this->affineTransformation)));
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_f3d2::Initialise() done");
@@ -866,7 +810,7 @@ void reg_f3d2<T>::UpdateParameters(float scale) {
 }
 /* *************************************************************** */
 template<class T>
-nifti_image** reg_f3d2<T>::GetWarpedImage() {
+vector<NiftiImage> reg_f3d2<T>::GetWarpedImage() {
     // The initial images are used
     if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) {
         reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
@@ -878,10 +822,11 @@ nifti_image** reg_f3d2<T>::GetWarpedImage() {
 
     WarpFloatingImage(3); // cubic spline interpolation
 
-    F3dContent *con = dynamic_cast<F3dContent*>(this->con);
-    nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*));
-    warpedImage[0] = nifti_dup(*con->GetWarped());
-    warpedImage[1] = nifti_dup(*conBw->GetWarped());
+    F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
+    vector<NiftiImage> warpedImage{
+        NiftiImage(con.GetWarped(), true),
+        NiftiImage(conBw->GetWarped(), true)
+    };
 
     DeinitCurrentLevel(-1);
 #ifndef NDEBUG
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index 73124c04..e8d6fdec 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -18,20 +18,20 @@
 template <class T>
 class reg_f3d2: public reg_f3d<T> {
 protected:
-    nifti_image *floatingMaskImage;
-    int **floatingMaskPyramid;
-    nifti_image *controlPointGridBw;
-    mat44 *affineTransformationBw;
+    NiftiImage floatingMaskImage;
+    vector<unique_ptr<int[]>> floatingMaskPyramid;
+    NiftiImage controlPointGridBw;
+    unique_ptr<mat44> affineTransformationBw;
     T inverseConsistencyWeight;
     bool bchUpdate;
     bool useGradientCumulativeExp;
     int bchUpdateValue;
 
     // Content backwards
-    F3dContent *conBw = nullptr;
+    unique_ptr<F3dContent> conBw;
 
     // Compute backwards
-    Compute *computeBw = nullptr;
+    unique_ptr<Compute> computeBw;
 
     virtual void SetOptimiser() override;
     virtual double ComputeBendingEnergyPenaltyTerm() override;
@@ -67,13 +67,12 @@ class reg_f3d2: public reg_f3d<T> {
 
 public:
     reg_f3d2(int refTimePoint, int floTimePoint);
-    virtual ~reg_f3d2();
 
-    virtual nifti_image* GetBackwardControlPointPositionImage() override;
-    virtual nifti_image** GetWarpedImage() override;
+    virtual NiftiImage GetBackwardControlPointPositionImage() override;
+    virtual vector<NiftiImage> GetWarpedImage() override;
     virtual bool GetSymmetricStatus() override { return true; }
 
-    virtual void SetFloatingMask(nifti_image*) override;
+    virtual void SetFloatingMask(NiftiImage) override;
     virtual void SetInverseConsistencyWeight(T) override;
     virtual void UseBCHUpdate(int) override;
     virtual void UseGradientCumulativeExp() override;

From ea8fac0c0414e55c56c436ac70a2563ba28ce5d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 18:04:40 +0000
Subject: [PATCH 090/314] Refactor reg_f3d app using automatic memory
 management

---
 niftyreg_build_version.txt |   2 +-
 reg-apps/reg_f3d.cpp       | 187 +++++++++++++++----------------------
 2 files changed, 74 insertions(+), 115 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cd7da05e..dba40afc 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-210
+211
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index f273e138..4dda0b6d 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -32,7 +32,6 @@ void PetitUsage(char *exec) {
     reg_print_msg_error(text);
     reg_print_msg_error("\tSee the help for more details (-h)");
     reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    return;
 }
 
 void Usage(char *exec) {
@@ -169,7 +168,6 @@ void Usage(char *exec) {
     sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
     reg_print_info(exec, text);
     reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    return;
 }
 
 int main(int argc, char **argv) {
@@ -245,12 +243,11 @@ int main(int argc, char **argv) {
 
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
     // Read the reference and floating image
-    nifti_image *referenceImage = nullptr;
-    nifti_image *floatingImage = nullptr;
+    NiftiImage referenceImage, floatingImage;
     for (int i = 1; i < argc; i++) {
         if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) {
             referenceImage = reg_io_ReadImageFile(argv[++i]);
-            if (referenceImage == nullptr) {
+            if (!referenceImage) {
                 reg_print_msg_error("Error when reading the reference image:");
                 reg_print_msg_error(argv[i - 1]);
                 return EXIT_FAILURE;
@@ -258,7 +255,7 @@ int main(int argc, char **argv) {
         }
         if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) {
             floatingImage = reg_io_ReadImageFile(argv[++i]);
-            if (floatingImage == nullptr) {
+            if (!floatingImage) {
                 reg_print_msg_error("Error when reading the floating image:");
                 reg_print_msg_error(argv[i - 1]);
                 return EXIT_FAILURE;
@@ -266,27 +263,25 @@ int main(int argc, char **argv) {
         }
     }
     // Check that both reference and floating image have been defined
-    if (referenceImage == nullptr) {
+    if (!referenceImage) {
         reg_print_msg_error("Error. No reference image has been defined");
         PetitUsage((argv[0]));
         return EXIT_FAILURE;
     }
     // Read the floating image
-    if (floatingImage == nullptr) {
+    if (!floatingImage) {
         reg_print_msg_error("Error. No floating image has been defined");
         PetitUsage((argv[0]));
         return EXIT_FAILURE;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
     // Check the type of registration object to create
-    reg_f3d<float> *reg = nullptr;
-    float *referenceLandmark = nullptr;
-    float *floatingLandmark = nullptr;
+    unique_ptr<reg_f3d<float>> reg;
     PlatformType platformType(PlatformType::Cpu);
     unsigned gpuIdx = 999;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
-            reg = new reg_f3d2<float>(referenceImage->nt, floatingImage->nt);
+            reg.reset(new reg_f3d2<float>(referenceImage->nt, floatingImage->nt));
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
             PlatformType value{ atoi(argv[++i]) };
             if (value < PlatformType::Cpu || value > PlatformType::Cuda) {
@@ -308,21 +303,16 @@ int main(int argc, char **argv) {
             gpuIdx = unsigned(atoi(argv[++i]));
         }
     }
-    if (reg == nullptr)
-        reg = new reg_f3d<float>(referenceImage->nt, floatingImage->nt);
+    if (!reg)
+        reg.reset(new reg_f3d<float>(referenceImage->nt, floatingImage->nt));
     reg->SetReferenceImage(referenceImage);
     reg->SetFloatingImage(floatingImage);
     reg->SetPlatformType(platformType);
     reg->SetGpuIdx(gpuIdx);
 
     // Create some pointers that could be used
-    mat44 affineMatrix;
-    nifti_image *inputCCPImage = nullptr;
-    nifti_image *referenceMaskImage = nullptr;
-    nifti_image *floatingMaskImage = nullptr;
-    nifti_image *refLocalWeightSim = nullptr;
-    char *outputWarpedImageName = nullptr;
-    char *outputCPPImageName = nullptr;
+    const char *outputWarpedImageName = "outputResult.nii";
+    const char *outputCPPImageName = "outputCPP.nii";
     bool useMeanLNCC = false;
     int refBinNumber = 0;
     int floBinNumber = 0;
@@ -349,26 +339,26 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
             // Read the affine matrix
-            reg_tool_ReadAffineFile(&affineMatrix,
-                                    affineTransformationName);
+            mat44 affineMatrix;
+            reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName);
             // Send the transformation to the registration object
             reg->SetAffineTransformation(&affineMatrix);
         } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) {
-            inputCCPImage = reg_io_ReadImageFile(argv[++i]);
-            if (inputCCPImage == nullptr) {
+            NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]);
+            if (!inputCCPImage) {
                 reg_print_msg_error("Error when reading the input control point grid image:");
                 reg_print_msg_error(argv[i - 1]);
                 return EXIT_FAILURE;
             }
-            reg->SetControlPointGridImage(inputCCPImage);
+            reg->SetControlPointGridImage(std::move(inputCCPImage));
         } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) {
-            referenceMaskImage = reg_io_ReadImageFile(argv[++i]);
-            if (referenceMaskImage == nullptr) {
+            NiftiImage referenceMaskImage = reg_io_ReadImageFile(argv[++i]);
+            if (!referenceMaskImage) {
                 reg_print_msg_error("Error when reading the reference mask image:");
                 reg_print_msg_error(argv[i - 1]);
                 return EXIT_FAILURE;
             }
-            reg->SetReferenceMask(referenceMaskImage);
+            reg->SetReferenceMask(std::move(referenceMaskImage));
         } else if ((strcmp(argv[i], "-res") == 0) || (strcmp(argv[i], "-result") == 0) || (strcmp(argv[i], "--res") == 0)) {
             outputWarpedImageName = argv[++i];
         } else if (strcmp(argv[i], "-cpp") == 0 || (strcmp(argv[i], "--cpp") == 0)) {
@@ -441,8 +431,8 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
             float **allLandmarks = reg_tool_ReadMatrixFile<float>(filename, landmarkNumber, n);
-            referenceLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float));
-            floatingLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float));
+            unique_ptr<float[]> referenceLandmark(new float[landmarkNumber * n / 2]);
+            unique_ptr<float[]> floatingLandmark(new float[landmarkNumber * n / 2]);
             for (size_t l = 0, index = 0; l < landmarkNumber; ++l) {
                 referenceLandmark[index] = allLandmarks[l][0];
                 referenceLandmark[index + 1] = allLandmarks[l][1];
@@ -459,8 +449,8 @@ int main(int argc, char **argv) {
                 }
             }
             reg->SetLandmarkRegularisationParam(landmarkNumber,
-                                                referenceLandmark,
-                                                floatingLandmark,
+                                                referenceLandmark.get(),
+                                                floatingLandmark.get(),
                                                 weight);
             for (size_t l = 0; l < landmarkNumber; ++l)
                 free(allLandmarks[l]);
@@ -559,7 +549,7 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-lnccMean") == 0) {
             useMeanLNCC = true;
         } else if (strcmp(argv[i], "-dti") == 0 || strcmp(argv[i], "--dti") == 0) {
-            bool *timePoint = new bool[referenceImage->nt];
+            unique_ptr<bool[]> timePoint(new bool[referenceImage->nt]);
             for (int t = 0; t < referenceImage->nt; ++t)
                 timePoint[t] = false;
             timePoint[atoi(argv[++i])] = true;
@@ -570,8 +560,7 @@ int main(int argc, char **argv) {
                 timePoint[atoi(argv[++i])] = true;
                 timePoint[atoi(argv[++i])] = true;
             }
-            reg->UseDTI(timePoint);
-            delete[]timePoint;
+            reg->UseDTI(timePoint.get());
         } else if (strcmp(argv[i], "-nmiw") == 0) {
             int tp = atoi(argv[++i]);
             double w = atof(argv[++i]);
@@ -589,8 +578,8 @@ int main(int argc, char **argv) {
             double w = atof(argv[++i]);
             reg->SetKLDWeight(tp, w);
         } else if (strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0) {
-            refLocalWeightSim = reg_io_ReadImageFile(argv[++i]);
-            reg->SetLocalWeightSim(refLocalWeightSim);
+            NiftiImage refLocalWeightSim = reg_io_ReadImageFile(argv[++i]);
+            reg->SetLocalWeightSim(std::move(refLocalWeightSim));
         } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) {
             reg->SetWarpedPaddingValue(atof(argv[++i]));
         } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) {
@@ -614,13 +603,13 @@ int main(int argc, char **argv) {
             }
         } else if ((strcmp(argv[i], "-fmask") == 0) || (strcmp(argv[i], "-smask") == 0) ||
                  (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) {
-            floatingMaskImage = reg_io_ReadImageFile(argv[++i]);
-            if (floatingMaskImage == nullptr) {
+            NiftiImage floatingMaskImage = reg_io_ReadImageFile(argv[++i]);
+            if (!floatingMaskImage) {
                 reg_print_msg_error("Error when reading the floating mask image:");
                 reg_print_msg_error(argv[i - 1]);
                 return EXIT_FAILURE;
             }
-            reg->SetFloatingMask(floatingMaskImage);
+            reg->SetFloatingMask(std::move(floatingMaskImage));
         } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) {
             reg->SetInverseConsistencyWeight(atof(argv[++i]));
         } else if (strcmp(argv[i], "-nox") == 0) {
@@ -638,7 +627,6 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-bch") == 0 || strcmp(argv[i], "--bch") == 0) {
             reg->UseBCHUpdate(atoi(argv[++i]));
         }
-
         else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
 #ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
@@ -687,99 +675,70 @@ int main(int argc, char **argv) {
     reg->Run();
 
     // Save the control point image
-    nifti_image *outputControlPointGridImage = reg->GetControlPointPositionImage();
-    if (outputCPPImageName == nullptr) outputCPPImageName = (char *)"outputCPP.nii";
+    NiftiImage outputControlPointGridImage = reg->GetControlPointPositionImage();
     memset(outputControlPointGridImage->descrip, 0, 80);
     strcpy(outputControlPointGridImage->descrip, "Control point position from NiftyReg (reg_f3d)");
     if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0)
         strcpy(outputControlPointGridImage->descrip, "Velocity field grid from NiftyReg (reg_f3d2)");
     reg_io_WriteImageFile(outputControlPointGridImage, outputCPPImageName);
-    nifti_image_free(outputControlPointGridImage);
-    outputControlPointGridImage = nullptr;
 
     // Save the backward control point image
     if (reg->GetSymmetricStatus()) {
         // _backward is added to the forward control point grid image name
-        std::string b(outputCPPImageName);
-        if (b.find(".nii.gz") != std::string::npos)
-            b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz");
-        else if (b.find(".nii") != std::string::npos)
-            b.replace(b.find(".nii"), 4, "_backward.nii");
-        else if (b.find(".hdr") != std::string::npos)
-            b.replace(b.find(".hdr"), 4, "_backward.hdr");
-        else if (b.find(".img.gz") != std::string::npos)
-            b.replace(b.find(".img.gz"), 7, "_backward.img.gz");
-        else if (b.find(".img") != std::string::npos)
-            b.replace(b.find(".img"), 4, "_backward.img");
-        else if (b.find(".png") != std::string::npos)
-            b.replace(b.find(".png"), 4, "_backward.png");
-        else if (b.find(".nrrd") != std::string::npos)
-            b.replace(b.find(".nrrd"), 5, "_backward.nrrd");
-        else b.append("_backward.nii");
-        nifti_image *outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage();
+        std::string fname(outputCPPImageName);
+        if (fname.find(".nii.gz") != std::string::npos)
+            fname.replace(fname.find(".nii.gz"), 7, "_backward.nii.gz");
+        else if (fname.find(".nii") != std::string::npos)
+            fname.replace(fname.find(".nii"), 4, "_backward.nii");
+        else if (fname.find(".hdr") != std::string::npos)
+            fname.replace(fname.find(".hdr"), 4, "_backward.hdr");
+        else if (fname.find(".img.gz") != std::string::npos)
+            fname.replace(fname.find(".img.gz"), 7, "_backward.img.gz");
+        else if (fname.find(".img") != std::string::npos)
+            fname.replace(fname.find(".img"), 4, "_backward.img");
+        else if (fname.find(".png") != std::string::npos)
+            fname.replace(fname.find(".png"), 4, "_backward.png");
+        else if (fname.find(".nrrd") != std::string::npos)
+            fname.replace(fname.find(".nrrd"), 5, "_backward.nrrd");
+        else fname.append("_backward.nii");
+        NiftiImage outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage();
         memset(outputBackwardControlPointGridImage->descrip, 0, 80);
         strcpy(outputBackwardControlPointGridImage->descrip, "Backward Control point position from NiftyReg (reg_f3d)");
         if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0)
             strcpy(outputBackwardControlPointGridImage->descrip, "Backward velocity field grid from NiftyReg (reg_f3d2)");
-        reg_io_WriteImageFile(outputBackwardControlPointGridImage, b.c_str());
-        nifti_image_free(outputBackwardControlPointGridImage);
-        outputBackwardControlPointGridImage = nullptr;
+        reg_io_WriteImageFile(outputBackwardControlPointGridImage, fname.c_str());
     }
 
     // Save the warped image(s)
-    nifti_image **outputWarpedImage = reg->GetWarpedImage();
-    if (outputWarpedImageName == nullptr)
-        outputWarpedImageName = (char*)"outputResult.nii";
-    memset(outputWarpedImage[0]->descrip, 0, 80);
-    strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)");
+    auto outputWarpedImages = reg->GetWarpedImage();
+    memset(outputWarpedImages[0]->descrip, 0, 80);
+    strcpy(outputWarpedImages[0]->descrip, "Warped image using NiftyReg (reg_f3d)");
     if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) {
-        strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d2)");
-        strcpy(outputWarpedImage[1]->descrip, "Warped image using NiftyReg (reg_f3d2)");
+        strcpy(outputWarpedImages[0]->descrip, "Warped image using NiftyReg (reg_f3d2)");
+        strcpy(outputWarpedImages[1]->descrip, "Warped image using NiftyReg (reg_f3d2)");
     }
     if (reg->GetSymmetricStatus()) {
-        if (outputWarpedImage[1] != nullptr) {
-            std::string b(outputWarpedImageName);
-            if (b.find(".nii.gz") != std::string::npos)
-                b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz");
-            else if (b.find(".nii") != std::string::npos)
-                b.replace(b.find(".nii"), 4, "_backward.nii");
-            else if (b.find(".hdr") != std::string::npos)
-                b.replace(b.find(".hdr"), 4, "_backward.hdr");
-            else if (b.find(".img.gz") != std::string::npos)
-                b.replace(b.find(".img.gz"), 7, "_backward.img.gz");
-            else if (b.find(".img") != std::string::npos)
-                b.replace(b.find(".img"), 4, "_backward.img");
-            else if (b.find(".png") != std::string::npos)
-                b.replace(b.find(".png"), 4, "_backward.png");
-            else if (b.find(".nrrd") != std::string::npos)
-                b.replace(b.find(".nrrd"), 5, "_backward.nrrd");
-            else b.append("_backward.nii");
-            reg_io_WriteImageFile(outputWarpedImage[1], b.c_str());
+        if (outputWarpedImages[1]) {
+            std::string fname(outputWarpedImageName);
+            if (fname.find(".nii.gz") != std::string::npos)
+                fname.replace(fname.find(".nii.gz"), 7, "_backward.nii.gz");
+            else if (fname.find(".nii") != std::string::npos)
+                fname.replace(fname.find(".nii"), 4, "_backward.nii");
+            else if (fname.find(".hdr") != std::string::npos)
+                fname.replace(fname.find(".hdr"), 4, "_backward.hdr");
+            else if (fname.find(".img.gz") != std::string::npos)
+                fname.replace(fname.find(".img.gz"), 7, "_backward.img.gz");
+            else if (fname.find(".img") != std::string::npos)
+                fname.replace(fname.find(".img"), 4, "_backward.img");
+            else if (fname.find(".png") != std::string::npos)
+                fname.replace(fname.find(".png"), 4, "_backward.png");
+            else if (fname.find(".nrrd") != std::string::npos)
+                fname.replace(fname.find(".nrrd"), 5, "_backward.nrrd");
+            else fname.append("_backward.nii");
+            reg_io_WriteImageFile(outputWarpedImages[1], fname.c_str());
         }
     }
-    reg_io_WriteImageFile(outputWarpedImage[0], outputWarpedImageName);
-    if (outputWarpedImage[0] != nullptr)
-        nifti_image_free(outputWarpedImage[0]);
-    outputWarpedImage[0] = nullptr;
-    if (outputWarpedImage[1] != nullptr)
-        nifti_image_free(outputWarpedImage[1]);
-    outputWarpedImage[1] = nullptr;
-    free(outputWarpedImage);
-    outputWarpedImage = nullptr;
-    // Free the allocated landmarks if used
-    free(referenceLandmark);
-    free(floatingLandmark);
-
-    // Erase the registration object
-    delete reg;
-
-    // Clean the allocated images
-    if (refLocalWeightSim != nullptr) nifti_image_free(refLocalWeightSim);
-    if (referenceImage != nullptr) nifti_image_free(referenceImage);
-    if (floatingImage != nullptr) nifti_image_free(floatingImage);
-    if (inputCCPImage != nullptr) nifti_image_free(inputCCPImage);
-    if (referenceMaskImage != nullptr) nifti_image_free(referenceMaskImage);
-    if (floatingMaskImage != nullptr) nifti_image_free(floatingMaskImage);
+    reg_io_WriteImageFile(outputWarpedImages[0], outputWarpedImageName);
 
 #ifdef NDEBUG
     if (verbose) {

From d9bc22be5549fcdcb437ac02fc63190bebdf39e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 18:23:39 +0000
Subject: [PATCH 091/314] Refactor reg_test_imageGradient using NiftiImage

---
 reg-test/reg_test_common.h          | 14 ++---
 reg-test/reg_test_imageGradient.cpp | 86 +++++++++++++----------------
 2 files changed, 45 insertions(+), 55 deletions(-)

diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index bfe326f8..7f391f8b 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -30,21 +30,21 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
     derivative[3] = (3.f * relative - 2.f) * relative / 2.f;
 }
 
-nifti_image* CreateControlPointGrid(nifti_image *reference) {
+NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     // Set the spacing for the control point grid
-    float spacingInMillimeter[3] = { reference->dx, reference->dy, reference->dz };
+    float spacingInMillimetre[3] = { reference->dx, reference->dy, reference->dz };
 
     // Define the spacing for the first level
     float gridSpacing[3];
-    gridSpacing[0] = spacingInMillimeter[0];
-    gridSpacing[1] = spacingInMillimeter[1];
+    gridSpacing[0] = spacingInMillimetre[0];
+    gridSpacing[1] = spacingInMillimetre[1];
     gridSpacing[2] = 1;
     if (reference->nz > 1)
-        gridSpacing[2] = spacingInMillimeter[2];
+        gridSpacing[2] = spacingInMillimetre[2];
 
     // Create and allocate the control point image
-    nifti_image *controlPointGrid = nullptr;
-    reg_createControlPointGrid<float>(&controlPointGrid, reference, gridSpacing);
+    NiftiImage controlPointGrid;
+    reg_createControlPointGrid<float>(controlPointGrid, reference, gridSpacing);
 
     // The control point position image is initialised with the affine transformation
     reg_getDeformationFromDisplacement(controlPointGrid);
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index ab0e1249..a9992924 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -14,61 +14,50 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
+typedef std::tuple<std::string, NiftiImage, NiftiImage, int, float*> TestData;
 typedef std::tuple<unique_ptr<F3dContent>, unique_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Image gradient", "[ImageGradient]") {
     // Create a reference 2D image
-    int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2d);
+    vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+    NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
 
     // Fill image with distance from identity
-    auto ref2dPtr = static_cast<float*>(reference2d->data);
-    for (auto y = 0; y < reference2d->ny; ++y) {
-        for (auto x = 0; x < reference2d->nx; ++x) {
-            *ref2dPtr = sqrtf(float(x * x) + float(y * y));
-            ref2dPtr++;
-        }
-    }
-    ref2dPtr = static_cast<float*>(reference2d->data);
+    const auto ref2dPtr = reference2d.data();
+    auto ref2dIt = ref2dPtr.begin();
+    for (auto y = 0; y < reference2d->ny; ++y)
+        for (auto x = 0; x < reference2d->nx; ++x)
+            *ref2dIt++ = sqrtf(float(x * x) + float(y * y));
 
     // Create a corresponding 2D deformation field
-    int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
-    nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField2d);
-    auto def2dPtr = static_cast<float*>(deformationField2d->data);
+    vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
+    NiftiImage deformationField2d(dimDef, NIFTI_TYPE_FLOAT32);
+    auto def2dPtr = deformationField2d.data();
     def2dPtr[0] = 1.2f;
     def2dPtr[1] = 1.3f;
 
     // Create a reference 3D image
-    dimFlo[0] = 3; dimFlo[3] = 4;
-    nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3d);
+    dimFlo.push_back(4);
+    NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
 
     // Fill image with distance from identity
-    auto ref3dPtr = static_cast<float*>(reference3d->data);
-    for (auto z = 0; z < reference3d->nz; ++z) {
-        for (auto y = 0; y < reference3d->ny; ++y) {
-            for (auto x = 0; x < reference3d->nx; ++x) {
-                *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z));
-                ref3dPtr++;
-            }
-        }
-    }
-    ref3dPtr = static_cast<float*>(reference3d->data);
+    const auto ref3dPtr = reference3d.data();
+    auto ref3dIt = ref3dPtr.begin();
+    for (auto z = 0; z < reference3d->nz; ++z)
+        for (auto y = 0; y < reference3d->ny; ++y)
+            for (auto x = 0; x < reference3d->nx; ++x)
+                *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z));
 
     // Create a corresponding 3D deformation field
-    dimDef[5] = 3;
-    nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField3d);
-    auto def3dPtr = static_cast<float*>(deformationField3d->data);
+    dimDef[4] = 3;
+    NiftiImage deformationField3d(dimDef, NIFTI_TYPE_FLOAT32);
+    auto def3dPtr = deformationField3d.data();
     def3dPtr[0] = 1.2f;
     def3dPtr[1] = 1.3f;
     def3dPtr[2] = 1.4f;
 
     // Generate the different test cases
-    std::vector<TestData> testCases;
+    vector<TestData> testCases;
 
     // Linear image gradient - 2D
     // coordinate in image: [1.2, 1.3]
@@ -78,7 +67,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     const float yBasisLinear[2] = { 0.7f, 0.3f };
     for (int y = 0; y < 2; ++y) {
         for (int x = 0; x < 2; ++x) {
-            const auto coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)];
+            const auto coeff = (float)ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)];
             resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y];
             resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y];
         }
@@ -97,11 +86,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     float resCubic2d[2] = {};
     float xBasisCubic[4], yBasisCubic[4];
     float xDerivCubic[4], yDerivCubic[4];
-    interpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic);
-    interpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic);
+    InterpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic);
+    InterpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic);
     for (int y = 0; y <= 3; ++y) {
         for (int x = 0; x <= 3; ++x) {
-            const auto coeff = ref2dPtr[y * dimFlo[1] + x];
+            const auto coeff = (float)ref2dPtr[y * dimFlo[1] + x];
             resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y];
             resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y];
         }
@@ -123,7 +112,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     for (int z = 0; z < 2; ++z) {
         for (int y = 0; y < 2; ++y) {
             for (int x = 0; x < 2; ++x) {
-                const auto coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)];
+                const auto coeff = (float)ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)];
                 resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z];
                 resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z];
                 resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z];
@@ -144,11 +133,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     // coordinate in image: [1.2, 1.3, 1.4]
     float resCubic3d[3] = {};
     float zBasisCubic[4], zDerivCubic[4];
-    interpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic);
+    InterpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic);
     for (int z = 0; z <= 3; ++z) {
         for (int y = 0; y <= 3; ++y) {
             for (int x = 0; x <= 3; ++x) {
-                const auto coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x];
+                const auto coeff = (float)ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x];
                 resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z];
                 resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z];
                 resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z];
@@ -170,17 +159,17 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         // Retrieve test information
         auto&& [testName, reference, defField, interp, testResult] = testCase;
         // Create the control point grid
-        unique_ptr<nifti_image> controlPointGrid{ CreateControlPointGrid(reference) };
+        NiftiImage controlPointGrid(CreateControlPointGrid(reference));
 
         // Accumulate all required contents with a vector
-        std::vector<ContentDesc> contentDescs;
+        vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
             unique_ptr<Platform> platform{ new Platform(platformType) };
             // Add content
             if (platformType == PlatformType::Cuda && interp != 1)
                 continue;   // CUDA platform only supports linear interpolation
             unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid.get()) };
+            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
             contentDescs.push_back({ std::move(content), std::move(platform) });
         }
 
@@ -195,7 +184,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 warpedGradient->dim[2] = warpedGradient->ny = 1;
                 warpedGradient->dim[3] = warpedGradient->nz = 1;
                 warpedGradient->dim[5] = warpedGradient->nu = defField->nu;
-                warpedGradient->nvox = CalcVoxelNumber(*warpedGradient, warpedGradient->ndim);
+                warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim);
 
                 // Set the deformation field
                 content->SetDeformationField(defField);
@@ -204,6 +193,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->GetImageGradient(interp, 0, 0);
 
+                // TODO: Fix this
+                // To prevent the content from deleting the deformation field
+                content->SetDeformationField(nullptr);
+
                 // Check all values
                 warpedGradient = content->GetWarpedGradient();
                 auto warpedGradPtr = static_cast<float*>(warpedGradient->data);
@@ -214,7 +207,4 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
             }
         }
     }
-    // Clean up
-    nifti_image_free(reference2d);
-    nifti_image_free(reference3d);
 }

From 4f22230ae5db73337f81ff1b9de1f2e3786c1973 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Mar 2023 18:47:01 +0000
Subject: [PATCH 092/314] Small fixes

---
 niftyreg_build_version.txt | 2 +-
 reg-apps/reg_aladin.cpp    | 8 ++++----
 reg-io/RNifti/NiftiImage.h | 4 ++--
 reg-lib/cpu/_reg_tools.cpp | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dba40afc..0d389107 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-211
+212
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 133557c4..1ced15cb 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -379,8 +379,8 @@ int main(int argc, char **argv) {
 
     // Set the reference and floating images
     // make the images isotropic if required
-    reg->SetInputReference(iso ? reg_makeIsotropic(referenceHeader, 1) : referenceHeader);
-    reg->SetInputFloating(iso ? reg_makeIsotropic(floatingHeader, 1) : floatingHeader);
+    reg->SetInputReference(iso ? NiftiImage(reg_makeIsotropic(referenceHeader, 1)) : referenceHeader);
+    reg->SetInputFloating(iso ? NiftiImage(reg_makeIsotropic(floatingHeader, 1)) : floatingHeader);
 
     /* read the reference mask image */
     if (referenceMaskFlag) {
@@ -398,7 +398,7 @@ int main(int argc, char **argv) {
             }
         }
         // make the image isotropic if required
-        reg->SetInputMask(iso ? reg_makeIsotropic(referenceMaskImage, 0) : std::move(referenceMaskImage));
+        reg->SetInputMask(iso ? NiftiImage(reg_makeIsotropic(referenceMaskImage, 0)) : std::move(referenceMaskImage));
     }
     /* Read the floating mask image */
     if (floatingMaskFlag && symFlag) {
@@ -416,7 +416,7 @@ int main(int argc, char **argv) {
             }
         }
         // make the image isotropic if required
-        reg->SetInputFloatingMask(iso ? reg_makeIsotropic(floatingMaskImage, 0) : std::move(floatingMaskImage));
+        reg->SetInputFloatingMask(iso ? NiftiImage(reg_makeIsotropic(floatingMaskImage, 0)) : std::move(floatingMaskImage));
     }
 
     reg->SetMaxIterations(maxIter);
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 30943fbd..50a8a435 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1871,13 +1871,13 @@ class NiftiImage
      * Return the number of voxels per slice
      * @return An integer giving the number of voxels per slice
     */
-    size_t nVoxelsPerSlice () const { return calcVoxelNumber(*this, 2); }
+    size_t nVoxelsPerSlice () const { return calcVoxelNumber(image, 2); }
 
     /**
      * Return the number of voxels per volume
      * @return An integer giving the number of voxels per volume
     */
-    size_t nVoxelsPerVolume () const { return calcVoxelNumber(*this, 3); }
+    size_t nVoxelsPerVolume () const { return calcVoxelNumber(image, 3); }
 
     /**
      * Return the number of extensions associated with the image
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 4c6f68ce..d113001f 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1338,7 +1338,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                             }
                             currIterator = tmp_lab.begin();
                             maxindex = 0;
-                            maxval = -std::numeric_limits<float>::max();;
+                            maxval = std::numeric_limits<float>::min();
                             while (currIterator != tmp_lab.end()) {
                                 if (currIterator->second > maxval) {
                                     maxindex = currIterator->first;

From 876a88d37d0430548ee41ec96ab50a6f8005d09c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 16:33:41 +0000
Subject: [PATCH 093/314] Add NiftiImage::disown() to release the wrapped
 pointer

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0d389107..964480f6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-212
+213
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 50a8a435..f0ab5de1 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1538,6 +1538,16 @@ class NiftiImage
     */
     operator bool () const { return (image != nullptr); }
 
+    /**
+     * Disown the wrapped pointer, removing responsibility for freeing it upon destruction
+     * @return The wrapped pointer
+    */
+    nifti_image* disown () {
+        nifti_image *img = image;
+        image = nullptr;
+        return img;
+    }
+
     /**
      * Mark the image as persistent, so that it can be passed back to R
      * @param persistent The new persistence state of the object

From 495ce95c3f9441234fd01cb027351c3c012b2c3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 16:34:42 +0000
Subject: [PATCH 094/314] Use NiftiImage::disown() in reg_test_imageGradient

---
 niftyreg_build_version.txt          | 2 +-
 reg-test/reg_test_imageGradient.cpp | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 964480f6..9d683f8c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-213
+214
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index a9992924..159dc2b5 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -187,16 +187,12 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim);
 
                 // Set the deformation field
-                content->SetDeformationField(defField);
+                content->SetDeformationField(defField.disown());
 
                 // Do the computation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->GetImageGradient(interp, 0, 0);
 
-                // TODO: Fix this
-                // To prevent the content from deleting the deformation field
-                content->SetDeformationField(nullptr);
-
                 // Check all values
                 warpedGradient = content->GetWarpedGradient();
                 auto warpedGradPtr = static_cast<float*>(warpedGradient->data);

From 8f96921616c4362bbe7c7c9a4ab2c19b320a4dbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 17:00:23 +0000
Subject: [PATCH 095/314] Refactor reg_test_interpolation using NiftiImage

---
 niftyreg_build_version.txt          |  2 +-
 reg-test/reg_test_interpolation.cpp | 88 ++++++++++++-----------------
 2 files changed, 38 insertions(+), 52 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9d683f8c..c34a8046 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-214
+215
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 91cdb08f..419f544e 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -15,55 +15,44 @@
 */
 
 
-typedef std::tuple<std::string, nifti_image*, nifti_image*, int, float*> TestData;
+typedef std::tuple<std::string, NiftiImage, NiftiImage, int, float*> TestData;
 typedef std::tuple<unique_ptr<Content>, shared_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Interpolation", "[Interpolation]") {
     // Create a reference 2D image
-    int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2d);
+    vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+    NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
 
     // Fill image with distance from identity
-    auto ref2dPtr = static_cast<float*>(reference2d->data);
-    for (auto y = 0; y < reference2d->ny; ++y) {
-        for (auto x = 0; x < reference2d->nx; ++x) {
-            *ref2dPtr = sqrtf(float(x * x) + float(y * y));
-            ref2dPtr++;
-        }
-    }
-    ref2dPtr = static_cast<float*>(reference2d->data);
+    const auto ref2dPtr = reference2d.data();
+    auto ref2dIt = ref2dPtr.begin();
+    for (auto y = 0; y < reference2d->ny; ++y)
+        for (auto x = 0; x < reference2d->nx; ++x)
+            *ref2dIt++ = sqrtf(float(x * x) + float(y * y));
 
     // Create a corresponding 2D deformation field
-    int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 };
-    nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField2d);
-    auto def2dPtr = static_cast<float*>(deformationField2d->data);
+    vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
+    NiftiImage deformationField2d(dimDef, NIFTI_TYPE_FLOAT32);
+    auto def2dPtr = deformationField2d.data();
     def2dPtr[0] = 1.2f;
     def2dPtr[1] = 1.3f;
 
     // Create a reference 3D image
-    dimFlo[0] = 3; dimFlo[3] = 4;
-    nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3d);
+    dimFlo.push_back(4);
+    NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
 
     // Fill image with distance from identity
-    auto ref3dPtr = static_cast<float*>(reference3d->data);
-    for (auto z = 0; z < reference3d->nz; ++z) {
-        for (auto y = 0; y < reference3d->ny; ++y) {
-            for (auto x = 0; x < reference3d->nx; ++x) {
-                *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z));
-                ref3dPtr++;
-            }
-        }
-    }
-    ref3dPtr = static_cast<float*>(reference3d->data);
+    const auto ref3dPtr = reference3d.data();
+    auto ref3dIt = ref3dPtr.begin();
+    for (auto z = 0; z < reference3d->nz; ++z)
+        for (auto y = 0; y < reference3d->ny; ++y)
+            for (auto x = 0; x < reference3d->nx; ++x)
+                *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z));
 
     // Create a corresponding 3D deformation field
-    dimDef[5] = 3;
-    nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(deformationField3d);
-    auto def3dPtr = static_cast<float*>(deformationField3d->data);
+    dimDef[4] = 3;
+    NiftiImage deformationField3d(dimDef, NIFTI_TYPE_FLOAT32);
+    auto def3dPtr = deformationField3d.data();
     def3dPtr[0] = 1.2f;
     def3dPtr[1] = 1.3f;
     def3dPtr[2] = 1.4f;
@@ -73,12 +62,12 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Linear interpolation - 2D
     // coordinate in image: [1.2, 1.3]
-    float resLinear2d[1] = {0};
+    float resLinear2d[1] = {};
     for (int y = 1; y <= 2; ++y) {
         for (int x = 1; x <= 2; ++x) {
-            resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] *
-                abs(2.0f - (float)x - 0.2f) *
-                abs(2.0f - (float)y - 0.3f);
+            resLinear2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) *
+                abs(2.0f - float(x) - 0.2f) *
+                abs(2.0f - float(y) - 0.3f);
         }
     }
 
@@ -106,13 +95,13 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Cubic spline interpolation - 2D
     // coordinate in image: [1.2, 1.3]
-    float resCubic2d[1] = {0};
+    float resCubic2d[1] = {};
     float xBasis[4], yBasis[4];
     InterpCubicSplineKernel(0.2f, xBasis);
     InterpCubicSplineKernel(0.3f, yBasis);
     for (int y = 0; y <= 3; ++y) {
         for (int x = 0; x <= 3; ++x) {
-            resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y];
+            resCubic2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y];
         }
     }
 
@@ -127,14 +116,14 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Linear interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
-    float resLinear3d[1] = {0};
+    float resLinear3d[1] = {};
     for (int z = 1; z <= 2; ++z) {
         for (int y = 1; y <= 2; ++y) {
             for (int x = 1; x <= 2; ++x) {
-                resLinear3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] *
-                    abs(2.0f - (float)x - 0.2f) *
-                    abs(2.0f - (float)y - 0.3f) *
-                    abs(2.0f - (float)z - 0.4f);
+                resLinear3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) *
+                    abs(2.0f - float(x) - 0.2f) *
+                    abs(2.0f - float(y) - 0.3f) *
+                    abs(2.0f - float(z) - 0.4f);
             }
         }
     }
@@ -163,13 +152,13 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Cubic spline interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
-    float resCubic3d[1] = {0};
+    float resCubic3d[1] = {};
     float zBasis[4];
     InterpCubicSplineKernel(0.4f, zBasis);
     for (int z = 0; z <= 3; ++z) {
         for (int y = 0; y <= 3; ++y) {
             for (int x = 0; x <= 3; ++x) {
-                resCubic3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x] * yBasis[y] * zBasis[z];
+                resCubic3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z];
             }
         }
     }
@@ -217,12 +206,12 @@ TEST_CASE("Interpolation", "[Interpolation]") {
                 warped->dim[2] = warped->ny = 1;
                 warped->dim[3] = warped->nz = 1;
                 warped->dim[5] = warped->nu = 1;
-                warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
+                warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim);
                 warped->data = calloc(warped->nvox, warped->nbyper);
                 content->SetWarped(warped);
 
                 // Set the deformation field
-                content->SetDeformationField(defField);
+                content->SetDeformationField(defField.disown());
 
                 // Do the computation
                 if (isAladinContent) {
@@ -243,7 +232,4 @@ TEST_CASE("Interpolation", "[Interpolation]") {
             }
         }
     }
-    // Clean up
-    nifti_image_free(reference2d);
-    nifti_image_free(reference3d);
 }

From 751f44730c273ee07105f715fbff1bfc43025a0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 21:26:46 +0000
Subject: [PATCH 096/314] Add NiftiImage::realloc() to reallocate the image
 data

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 37 ++++++++++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c34a8046..a817176f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-215
+216
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index f0ab5de1..6d6cae5c 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1487,6 +1487,17 @@ class NiftiImage
     **/
     virtual ~NiftiImage () { release(); }
 
+    /**
+     * Disown the wrapped pointer, removing responsibility for freeing it upon destruction
+     * @return The wrapped pointer
+    */
+    nifti_image* disown ()
+    {
+        nifti_image *img = image;
+        image = nullptr;
+        return img;
+    }
+
     /**
      * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image
     **/
@@ -1538,16 +1549,6 @@ class NiftiImage
     */
     operator bool () const { return (image != nullptr); }
 
-    /**
-     * Disown the wrapped pointer, removing responsibility for freeing it upon destruction
-     * @return The wrapped pointer
-    */
-    nifti_image* disown () {
-        nifti_image *img = image;
-        image = nullptr;
-        return img;
-    }
-
     /**
      * Mark the image as persistent, so that it can be passed back to R
      * @param persistent The new persistence state of the object
@@ -1706,6 +1707,20 @@ class NiftiImage
         return *this;
     }
 
+    /**
+     * Reallocate the image data, preserving the metadata
+     * @note Recalculates the number of voxels in the image and updates the nvox field
+    */
+    void realloc ()
+    {
+        if (image == nullptr)
+            return;
+        if (image->data)
+            free(image->data);
+        recalcVoxelNumber();
+        image->data = calloc(1, nifti_get_volsize(image));
+    }
+
     /**
      * Rescale the image, changing its image dimensions and pixel dimensions
      * @param scales Vector of scale factors along each dimension
@@ -1864,7 +1879,7 @@ class NiftiImage
     }
 
     /**
-     * Recalculate the number of voxels in the image
+     * Recalculate the number of voxels in the image and update the nvox field
     */
     void recalcVoxelNumber() {
         if (image != nullptr)

From 19883fbe81109d88654412f3665a57ebb4687830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 21:33:38 +0000
Subject: [PATCH 097/314] Add NiftiImage::setDim() to set a dimension of the
 image

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a817176f..0ddd619c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-216
+217
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 6d6cae5c..12f13541 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1603,6 +1603,43 @@ class NiftiImage
             return std::vector<dim_t>(image->dim+1, image->dim+image->ndim+1);
     }
 
+    /**
+     * Set a dimension of the image
+     * @param dim The dimension to set
+     * @param value The new value of the dimension
+    */
+    void setDim (const int dim, const dim_t value)
+    {
+        if (image == nullptr)
+            return;
+        switch (dim) {
+        case 0:
+            image->dim[0] = image->ndim = value;
+            break;
+        case 1:
+            image->dim[1] = image->nx = value;
+            break;
+        case 2:
+            image->dim[2] = image->ny = value;
+            break;
+        case 3:
+            image->dim[3] = image->nz = value;
+            break;
+        case 4:
+            image->dim[4] = image->nt = value;
+            break;
+        case 5:
+            image->dim[5] = image->nu = value;
+            break;
+        case 6:
+            image->dim[6] = image->nv = value;
+            break;
+        case 7:
+            image->dim[7] = image->nw = value;
+            break;
+        }
+    }
+
     /**
      * Return the dimensions of the pixels or voxels in the image
      * @return A vector of floating-point values giving the pixel width in each dimension

From d13cf2d0ea398292a0992f8b2259d26655a34f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 21:35:37 +0000
Subject: [PATCH 098/314] Add ability to NiftiImage for copying only image info

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/RNifti/NiftiImage.h      | 13 ++++++++-----
 reg-io/RNifti/NiftiImage_impl.h |  6 +++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0ddd619c..dc6f4a87 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-217
+218
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 12f13541..031a7ed2 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1291,8 +1291,9 @@ class NiftiImage
     /**
      * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer
      * @param source A pointer to a \c nifti_image
+     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied
     **/
-    void copy (const nifti_image *source);
+    void copy (const nifti_image *source, const bool onlyImageInfo);
 
     /**
      * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
@@ -1378,12 +1379,13 @@ class NiftiImage
      * @param source Another \c NiftiImage object
      * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new
      * object wraps the same \c nifti_image and increments the shared reference count
+     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied
     **/
-    NiftiImage (const NiftiImage &source, const bool copy = true)
+    NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false)
         : NiftiImage()
     {
         if (copy) {
-            this->copy(source);
+            this->copy(source, onlyImageInfo);
         } else {
             refCount = source.refCount;
             acquire(source.image);
@@ -1424,12 +1426,13 @@ class NiftiImage
      * @param image An existing \c nifti_image pointer, possibly \c nullptr
      * @param copy If \c true, the image data will be copied; otherwise this object just wraps
      * the pointer passed to it
+     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied
     **/
-    NiftiImage (nifti_image * const image, const bool copy = false)
+    NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false)
         : NiftiImage()
     {
         if (copy)
-            this->copy(image);
+            this->copy(image, onlyImageInfo);
         else
             acquire(image);
 #ifndef NDEBUG
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index e9692998..8136b963 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -801,7 +801,7 @@ inline void NiftiImage::release ()
     }
 }
 
-inline void NiftiImage::copy (const nifti_image *source)
+inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo)
 {
     if (source == nullptr)
         acquire(nullptr);
@@ -809,7 +809,7 @@ inline void NiftiImage::copy (const nifti_image *source)
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         acquire(nifti_copy_nim_info(source));
-        if (source->data != nullptr)
+        if (!onlyImageInfo && source->data != nullptr)
         {
             size_t dataSize = nifti_get_volsize(source);
             image->data = calloc(1, dataSize);
@@ -817,7 +817,7 @@ inline void NiftiImage::copy (const nifti_image *source)
         }
 #elif RNIFTI_NIFTILIB_VERSION == 2
         acquire(nifti2_copy_nim_info(source));
-        if (source->data != nullptr)
+        if (!onlyImageInfo && source->data != nullptr)
         {
             size_t dataSize = nifti2_get_volsize(source);
             image->data = calloc(1, dataSize);

From 5f92c68f8fb0564126be7ed17db651f1c9eb1763 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Mar 2023 21:43:40 +0000
Subject: [PATCH 099/314] Update tests to leverage new abilities of NiftiImage

---
 niftyreg_build_version.txt          |  2 +-
 reg-test/reg_test_imageGradient.cpp | 22 ++++++++++++----------
 reg-test/reg_test_interpolation.cpp | 24 ++++++++++++------------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dc6f4a87..037ba971 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-218
+219
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 159dc2b5..fed67f71 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -178,13 +178,14 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
             auto&& [content, platform] = contentDesc;
             SECTION(testName + " " + platform->GetName()) {
                 // Set the warped gradient image to host the computation
-                auto warpedGradient = content->GetWarpedGradient();
-                warpedGradient->ndim = warpedGradient->dim[0] = defField->ndim;
-                warpedGradient->dim[1] = warpedGradient->nx = 1;
-                warpedGradient->dim[2] = warpedGradient->ny = 1;
-                warpedGradient->dim[3] = warpedGradient->nz = 1;
-                warpedGradient->dim[5] = warpedGradient->nu = defField->nu;
-                warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim);
+                NiftiImage warpedGradient(content->GetWarpedGradient());
+                warpedGradient.setDim(0, defField->ndim);
+                warpedGradient.setDim(1, 1);
+                warpedGradient.setDim(2, 1);
+                warpedGradient.setDim(3, 1);
+                warpedGradient.setDim(5, defField->nu);
+                warpedGradient.recalcVoxelNumber();
+                warpedGradient.disown();
 
                 // Set the deformation field
                 content->SetDeformationField(defField.disown());
@@ -195,11 +196,12 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
 
                 // Check all values
                 warpedGradient = content->GetWarpedGradient();
-                auto warpedGradPtr = static_cast<float*>(warpedGradient->data);
+                auto warpedGradPtr = warpedGradient.data();
                 for (size_t i = 0; i < warpedGradient->nvox; ++i) {
-                    std::cout << i << " " << warpedGradPtr[i] << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(warpedGradPtr[i] - testResult[i]) < EPS);
+                    std::cout << i << " " << float(warpedGradPtr[i]) << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(float(warpedGradPtr[i]) - testResult[i]) < EPS);
                 }
+                warpedGradient.disown();
             }
         }
     }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 419f544e..4f29a66e 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -200,15 +200,14 @@ TEST_CASE("Interpolation", "[Interpolation]") {
             auto contentName = isAladinContent ? "Aladin" : "Base";
             SECTION(testName + " " + platform->GetName() + " - " + contentName) {
                 // Create and set a warped image to host the computation
-                nifti_image *warped = nifti_copy_nim_info(defField);
-                warped->ndim = warped->dim[0] = defField->nu;
-                warped->dim[1] = warped->nx = 1;
-                warped->dim[2] = warped->ny = 1;
-                warped->dim[3] = warped->nz = 1;
-                warped->dim[5] = warped->nu = 1;
-                warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim);
-                warped->data = calloc(warped->nvox, warped->nbyper);
-                content->SetWarped(warped);
+                NiftiImage warped(defField, true, true);
+                warped.setDim(0, defField->nu);
+                warped.setDim(1, 1);
+                warped.setDim(2, 1);
+                warped.setDim(3, 1);
+                warped.setDim(5, 1);
+                warped.realloc();
+                content->SetWarped(warped.disown());
 
                 // Set the deformation field
                 content->SetDeformationField(defField.disown());
@@ -224,11 +223,12 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
                 // Check all values
                 warped = content->GetWarped();
-                auto warpedPtr = static_cast<float*>(warped->data);
+                auto warpedPtr = warped.data();
                 for (size_t i = 0; i < warped->nvox; ++i) {
-                    std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS);
+                    std::cout << i << " " << float(warpedPtr[i]) << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(float(warpedPtr[i]) - testResult[i]) < EPS);
                 }
+                warped.disown();
             }
         }
     }

From 4947c2ee25d7e7bdf7ae81880c69c5620d50a648 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Mar 2023 19:48:44 +0000
Subject: [PATCH 100/314] Add an enum for NiftiImage dimensions

---
 niftyreg_build_version.txt          |  2 +-
 reg-io/RNifti/NiftiImage.h          | 20 +++++++++++---------
 reg-lib/cpu/_reg_tools.h            |  1 +
 reg-test/reg_test_imageGradient.cpp | 10 +++++-----
 reg-test/reg_test_interpolation.cpp | 10 +++++-----
 5 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 037ba971..3d4c7bfe 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-219
+220
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 031a7ed2..4939287c 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -855,6 +855,8 @@ class NiftiImage
     typedef double scale_t;             /**< Type used for scale elements */
 #endif
 
+    enum class Dim { X, Y, Z, T, U, V, W, NDim };    /**< Dimension enumeration */
+
     /**
      * Inner class referring to a subset of an image. Currently must refer to the last
      * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image
@@ -1611,33 +1613,33 @@ class NiftiImage
      * @param dim The dimension to set
      * @param value The new value of the dimension
     */
-    void setDim (const int dim, const dim_t value)
+    void setDim (const Dim dim, const dim_t value)
     {
         if (image == nullptr)
             return;
         switch (dim) {
-        case 0:
+        case Dim::NDim:
             image->dim[0] = image->ndim = value;
             break;
-        case 1:
+        case Dim::X:
             image->dim[1] = image->nx = value;
             break;
-        case 2:
+        case Dim::Y:
             image->dim[2] = image->ny = value;
             break;
-        case 3:
+        case Dim::Z:
             image->dim[3] = image->nz = value;
             break;
-        case 4:
+        case Dim::T:
             image->dim[4] = image->nt = value;
             break;
-        case 5:
+        case Dim::U:
             image->dim[5] = image->nu = value;
             break;
-        case 6:
+        case Dim::V:
             image->dim[6] = image->nv = value;
             break;
-        case 7:
+        case Dim::W:
             image->dim[7] = image->nw = value;
             break;
         }
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index c6361c7f..9d1b577d 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -27,6 +27,7 @@ using std::shared_ptr;
 using std::vector;
 using RNifti::NiftiImage;
 using RNifti::NiftiImageData;
+using NiftiDim = NiftiImage::Dim;
 
 typedef enum {
     MEAN_KERNEL,
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index fed67f71..2e51ca1c 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -179,11 +179,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
             SECTION(testName + " " + platform->GetName()) {
                 // Set the warped gradient image to host the computation
                 NiftiImage warpedGradient(content->GetWarpedGradient());
-                warpedGradient.setDim(0, defField->ndim);
-                warpedGradient.setDim(1, 1);
-                warpedGradient.setDim(2, 1);
-                warpedGradient.setDim(3, 1);
-                warpedGradient.setDim(5, defField->nu);
+                warpedGradient.setDim(NiftiDim::NDim, defField->ndim);
+                warpedGradient.setDim(NiftiDim::X, 1);
+                warpedGradient.setDim(NiftiDim::Y, 1);
+                warpedGradient.setDim(NiftiDim::Z, 1);
+                warpedGradient.setDim(NiftiDim::U, defField->nu);
                 warpedGradient.recalcVoxelNumber();
                 warpedGradient.disown();
 
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 4f29a66e..38efe61d 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -201,11 +201,11 @@ TEST_CASE("Interpolation", "[Interpolation]") {
             SECTION(testName + " " + platform->GetName() + " - " + contentName) {
                 // Create and set a warped image to host the computation
                 NiftiImage warped(defField, true, true);
-                warped.setDim(0, defField->nu);
-                warped.setDim(1, 1);
-                warped.setDim(2, 1);
-                warped.setDim(3, 1);
-                warped.setDim(5, 1);
+                warped.setDim(NiftiDim::NDim, defField->nu);
+                warped.setDim(NiftiDim::X, 1);
+                warped.setDim(NiftiDim::Y, 1);
+                warped.setDim(NiftiDim::Z, 1);
+                warped.setDim(NiftiDim::U, 1);
                 warped.realloc();
                 content->SetWarped(warped.disown());
 

From d087265b10a55fa54be288137687f9f500fa9612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Mar 2023 19:50:15 +0000
Subject: [PATCH 101/314] Small fixes

---
 niftyreg_build_version.txt          |   2 +-
 reg-io/RNifti/NiftiImage.h          |   6 +-
 reg-io/RNifti/NiftiImage_impl.h     | 112 ++++++++++++++--------------
 reg-lib/cpu/_reg_tools.cpp          |   4 +-
 reg-test/reg_test_imageGradient.cpp |   5 +-
 reg-test/reg_test_interpolation.cpp |   5 +-
 6 files changed, 69 insertions(+), 65 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3d4c7bfe..7b473380 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-220
+221
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 4939287c..ccfe6782 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1915,8 +1915,10 @@ class NiftiImage
         if (image == nullptr)
             return 0;
         size_t voxelNumber = 1;
-        for (int i = 1; i <= dimCount; i++)
-            voxelNumber *= static_cast<size_t>(std::abs(image->dim[i]));
+        for (int i = 1; i <= dimCount; i++) {
+            const size_t dim = static_cast<size_t>(std::abs(image->dim[i]));
+            voxelNumber *= dim > 0 ? dim : 1;
+        }
         return voxelNumber;
     }
 
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 8136b963..d3dbd313 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -405,62 +405,6 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b
 
 }       // internal namespace
 
-inline void NiftiImage::correctDimensions() {
-    // Ensure that no dimension is set to zero
-    if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1;
-    if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1;
-    if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1;
-    if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1;
-    if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1;
-    if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1;
-    if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1;
-    //Correcting the dim of the images
-    for (int i = 1; i < 8; ++i) {
-        if (image->dim[i] > 1) {
-            image->dim[0] = image->ndim = i;
-        }
-    }
-    // Set the slope to 1 if undefined
-    if (image->scl_slope == 0) image->scl_slope = 1.f;
-    // Ensure that no spacing is set to zero
-    if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0))
-        image->dy = image->pixdim[2] = 1;
-    if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0))
-        image->dz = image->pixdim[3] = 1;
-    // Create the qform matrix if required
-    if (image->qform_code == 0 && image->sform_code == 0) {
-        image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b,
-                                                image->quatern_c,
-                                                image->quatern_d,
-                                                image->qoffset_x,
-                                                image->qoffset_y,
-                                                image->qoffset_z,
-                                                image->dx,
-                                                image->dy,
-                                                image->dz,
-                                                image->qfac);
-        image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
-    }
-    // Set the voxel spacing to millimetres
-    if (image->xyz_units == NIFTI_UNITS_MICRON) {
-        for (int d = 1; d <= image->ndim; ++d)
-            image->pixdim[d] /= 1000.f;
-        image->xyz_units = NIFTI_UNITS_MM;
-    }
-    if (image->xyz_units == NIFTI_UNITS_METER) {
-        for (int d = 1; d <= image->ndim; ++d)
-            image->pixdim[d] *= 1000.f;
-        image->xyz_units = NIFTI_UNITS_MM;
-    }
-    image->dx = image->pixdim[1];
-    image->dy = image->pixdim[2];
-    image->dz = image->pixdim[3];
-    image->dt = image->pixdim[4];
-    image->du = image->pixdim[5];
-    image->dv = image->pixdim[6];
-    image->dw = image->pixdim[7];
-}
-
 template <typename Type, bool alpha>
 inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr, const size_t length, double *min, double *max) const
 {
@@ -1200,6 +1144,62 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
 
 #endif // USING_R
 
+inline void NiftiImage::correctDimensions() {
+    // Ensure that no dimension is set to zero
+    if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1;
+    if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1;
+    if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1;
+    if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1;
+    if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1;
+    if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1;
+    if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1;
+    //Correcting the dim of the images
+    for (int i = 1; i < 8; ++i) {
+        if (image->dim[i] > 1) {
+            image->dim[0] = image->ndim = i;
+        }
+    }
+    // Set the slope to 1 if undefined
+    if (image->scl_slope == 0) image->scl_slope = 1.f;
+    // Ensure that no spacing is set to zero
+    if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0))
+        image->dy = image->pixdim[2] = 1;
+    if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0))
+        image->dz = image->pixdim[3] = 1;
+    // Create the qform matrix if required
+    if (image->qform_code == 0 && image->sform_code == 0) {
+        image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b,
+                                                image->quatern_c,
+                                                image->quatern_d,
+                                                image->qoffset_x,
+                                                image->qoffset_y,
+                                                image->qoffset_z,
+                                                image->dx,
+                                                image->dy,
+                                                image->dz,
+                                                image->qfac);
+        image->qto_ijk = nifti_mat44_inverse(image->qto_xyz);
+    }
+    // Set the voxel spacing to millimetres
+    if (image->xyz_units == NIFTI_UNITS_MICRON) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] /= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    if (image->xyz_units == NIFTI_UNITS_METER) {
+        for (int d = 1; d <= image->ndim; ++d)
+            image->pixdim[d] *= 1000.f;
+        image->xyz_units = NIFTI_UNITS_MM;
+    }
+    image->dx = image->pixdim[1];
+    image->dy = image->pixdim[2];
+    image->dz = image->pixdim[3];
+    image->dt = image->pixdim[4];
+    image->du = image->pixdim[5];
+    image->dv = image->pixdim[6];
+    image->dw = image->pixdim[7];
+}
+
 inline void NiftiImage::initFromDims (const std::vector<dim_t> &dim, const int datatype)
 {
     const int nDims = std::min(7, int(dim.size()));
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index d113001f..a9646702 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -365,10 +365,10 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
                                    const bool& optimiseZ) {
     switch (image->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        return reg_getMaximalLength<PrecisionType, float>(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ);
+        return reg_getMaximalLength<PrecisionType, float>(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false);
         break;
     case NIFTI_TYPE_FLOAT64:
-        return reg_getMaximalLength<PrecisionType, double>(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ);
+        return reg_getMaximalLength<PrecisionType, double>(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false);
         break;
     }
     return EXIT_SUCCESS;
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 2e51ca1c..bc2893af 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -198,8 +198,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 warpedGradient = content->GetWarpedGradient();
                 auto warpedGradPtr = warpedGradient.data();
                 for (size_t i = 0; i < warpedGradient->nvox; ++i) {
-                    std::cout << i << " " << float(warpedGradPtr[i]) << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(float(warpedGradPtr[i]) - testResult[i]) < EPS);
+                    const float warpedGradVal = warpedGradPtr[i];
+                    std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS);
                 }
                 warpedGradient.disown();
             }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 38efe61d..aedb41fe 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -225,8 +225,9 @@ TEST_CASE("Interpolation", "[Interpolation]") {
                 warped = content->GetWarped();
                 auto warpedPtr = warped.data();
                 for (size_t i = 0; i < warped->nvox; ++i) {
-                    std::cout << i << " " << float(warpedPtr[i]) << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(float(warpedPtr[i]) - testResult[i]) < EPS);
+                    const float warpedValue = warpedPtr[i];
+                    std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl;
+                    REQUIRE(fabs(warpedValue - testResult[i]) < EPS);
                 }
                 warped.disown();
             }

From 379c8f9420a8890397aadc1ebe81f8ab13938c30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Mar 2023 19:53:41 +0000
Subject: [PATCH 102/314] Add ability to NiftiImageData for extracting volume
 data

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/RNifti/NiftiImage.h      | 15 ++++++---------
 reg-io/RNifti/NiftiImage_impl.h | 18 ++++++++++++++++++
 3 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7b473380..c200906e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-221
+222
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index ccfe6782..728d0e35 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -443,14 +443,9 @@ class NiftiImageData
     /**
      * Convenience constructor for a \c nifti_image
      * @param image The image struct whose data the object will wrap
+     * @param vol The volume to extract, or \c -1 for the whole image
     **/
-    NiftiImageData (nifti_image *image)
-    {
-        if (image == nullptr)
-            init(nullptr, 0, DT_NONE, 0.0, 0.0, false);
-        else
-            init(image->data, image->nvox, image->datatype, static_cast<double>(image->scl_slope), static_cast<double>(image->scl_inter), false);
-    }
+    NiftiImageData (nifti_image *image, const int vol = -1);
 
     /**
      * Copy constructor with optional type conversion
@@ -1675,15 +1670,17 @@ class NiftiImage
 
     /**
      * Obtain the pixel data within the image
+     * @param dim The volume to extract, use \c -1 for the whole image
      * @return A constant \c NiftiImageData object encapsulating the data
     **/
-    const NiftiImageData data () const { return NiftiImageData(image); }
+    const NiftiImageData data (const int vol = -1) const { return NiftiImageData(image, vol); }
 
     /**
      * Obtain the pixel data within the image
+     * @param dim The volume to extract, use \c -1 for the whole image
      * @return A mutable \c NiftiImageData object encapsulating the data
     **/
-    NiftiImageData data () { return NiftiImageData(image); }
+    NiftiImageData data (const int vol = -1) { return NiftiImageData(image, vol); }
 
     /**
      * Extract a vector of data from the image, casting it to any required element type
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index d3dbd313..d8c33555 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -511,6 +511,24 @@ inline NiftiImageData::Element & NiftiImageData::Element::operator= (const Nifti
     return *this;
 }
 
+inline NiftiImageData::NiftiImageData (nifti_image *image, const int vol)
+    : NiftiImageData()
+{
+    if (image != nullptr) {
+        size_t offset = 0;
+        size_t length = NiftiImage::calcVoxelNumber(image, image->ndim);
+        if (vol >= 0) {
+            const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3);
+            offset = static_cast<size_t>(vol) * voxelsPerVolume;
+            if (length > offset) {
+                length = voxelsPerVolume;
+                offset *= image->nbyper;
+            } else return;
+        }
+        init(static_cast<char*>(image->data) + offset, length, image->datatype, static_cast<double>(image->scl_slope), static_cast<double>(image->scl_inter), false);
+    }
+}
+
 inline void NiftiImage::Extension::copy (const nifti1_extension *source)
 {
     if (source == nullptr)

From 4061036259c1d15baeae79412fb333df37b7dc63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Mar 2023 19:55:54 +0000
Subject: [PATCH 103/314] Handle optimise* variables in
 Compute::NormaliseGradient()

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/Compute.cpp        | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c200906e..5f277ae7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-222
+223
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index cee5b7de..de8ce5c7 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -143,9 +143,36 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis
 }
 /* *************************************************************** */
 void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ
-    nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
-    reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength);
+    NiftiImage transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
+    const bool hasZ = transformationGradient->nz > 1;
+    if (!hasZ)
+        optimiseZ = false;
+    NiftiImageData ptrX = transformationGradient.data(0);
+    NiftiImageData ptrY = transformationGradient.data(1);
+    NiftiImageData ptrZ = hasZ ? transformationGradient.data(2) : nullptr;
+
+#ifdef _WIN32
+    long i;
+    const long voxelNumber = static_cast<long>(transformationGradient.nVoxelsPerVolume());
+#else
+    size_t i;
+    const size_t voxelNumber = transformationGradient.nVoxelsPerVolume();
+#endif
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+    shared(voxelNumber, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength)
+#endif
+    for (i = 0; i < voxelNumber; ++i) {
+        const double valX = optimiseX ? static_cast<double>(ptrX[i]) : 0;
+        const double valY = optimiseY ? static_cast<double>(ptrY[i]) : 0;
+        const double valZ = optimiseZ ? static_cast<double>(ptrZ[i]) : 0;
+        ptrX[i] = valX / maxGradLength;
+        ptrY[i] = valY / maxGradLength;
+        if (hasZ)
+            ptrZ[i] = valZ / maxGradLength;
+    }
+    transformationGradient.disown();
 }
 /* *************************************************************** */
 void Compute::SmoothGradient(float sigma) {

From b2266876a81dc6b0801788d1e83663295d6a7710 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:07:19 +0000
Subject: [PATCH 104/314] Refactorisations

---
 niftyreg_build_version.txt                   |  2 +-
 reg-io/RNifti/NiftiImage.h                   |  2 +-
 reg-lib/Compute.cpp                          |  8 +--
 reg-lib/cpu/_reg_tools.cpp                   |  8 +--
 reg-test/reg_test_affineDeformationField.cpp | 34 ++++-----
 reg-test/reg_test_common.h                   |  4 +-
 reg-test/reg_test_imageGradient.cpp          | 39 ++++++-----
 reg-test/reg_test_interpolation.cpp          | 73 ++++++++++----------
 8 files changed, 84 insertions(+), 86 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5f277ae7..20c90807 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-223
+224
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 728d0e35..29855b40 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -850,7 +850,7 @@ class NiftiImage
     typedef double scale_t;             /**< Type used for scale elements */
 #endif
 
-    enum class Dim { X, Y, Z, T, U, V, W, NDim };    /**< Dimension enumeration */
+    enum class Dim { NDim, X, Y, Z, T, U, V, W };    /**< Dimension enumeration */
 
     /**
      * Inner class referring to a subset of an image. Currently must refer to the last
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index de8ce5c7..73f8cb1c 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -153,17 +153,17 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool op
 
 #ifdef _WIN32
     long i;
-    const long voxelNumber = static_cast<long>(transformationGradient.nVoxelsPerVolume());
+    const long voxelsPerVolume = static_cast<long>(transformationGradient.nVoxelsPerVolume());
 #else
     size_t i;
-    const size_t voxelNumber = transformationGradient.nVoxelsPerVolume();
+    const size_t voxelsPerVolume = transformationGradient.nVoxelsPerVolume();
 #endif
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength)
+    shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength)
 #endif
-    for (i = 0; i < voxelNumber; ++i) {
+    for (i = 0; i < voxelsPerVolume; ++i) {
         const double valX = optimiseX ? static_cast<double>(ptrX[i]) : 0;
         const double valY = optimiseY ? static_cast<double>(ptrY[i]) : 0;
         const double valZ = optimiseZ ? static_cast<double>(ptrZ[i]) : 0;
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index a9646702..cf06669d 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -349,10 +349,10 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
     const DataType *dataPtrZ = &dataPtrY[voxelNumber];
     PrecisionType max = 0;
     for (size_t i = 0; i < voxelNumber; i++) {
-        PrecisionType valX = optimiseX ? static_cast<PrecisionType>(*dataPtrX++) : 0;
-        PrecisionType valY = optimiseY ? static_cast<PrecisionType>(*dataPtrY++) : 0;
-        PrecisionType valZ = optimiseZ ? static_cast<PrecisionType>(*dataPtrZ++) : 0;
-        PrecisionType length = static_cast<PrecisionType>(sqrt(valX * valX + valY * valY + valZ * valZ));
+        const PrecisionType valX = optimiseX ? static_cast<PrecisionType>(*dataPtrX++) : 0;
+        const PrecisionType valY = optimiseY ? static_cast<PrecisionType>(*dataPtrY++) : 0;
+        const PrecisionType valZ = optimiseZ ? static_cast<PrecisionType>(*dataPtrZ++) : 0;
+        const PrecisionType length = static_cast<PrecisionType>(sqrt(valX * valX + valY * valY + valZ * valZ));
         max = std::max(length, max);
     }
     return max;
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index 1c0ddc96..b2177da1 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -42,8 +42,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &identity,
         identityResult2x,
         identityResult2y,
-        nullptr)
-    );
+        nullptr
+    ));
 
     // Identity use case - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
@@ -56,8 +56,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &identity,
         identityResult3x,
         identityResult3y,
-        identityResult3z)
-    );
+        identityResult3z
+    ));
 
     // Translation - 2D
     mat44 translation;
@@ -74,8 +74,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &translation,
         translationResult2x,
         translationResult2y,
-        nullptr)
-    );
+        nullptr
+    ));
 
     // Translation - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
@@ -88,8 +88,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &translation,
         translationResult3x,
         translationResult3y,
-        translationResult3z)
-    );
+        translationResult3z
+    ));
 
     // Full affine - 2D
     // Test order [0,0] [1,0] [0,1] [1,1]
@@ -98,14 +98,14 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
     affine.m[0][3] = -0.5;
     affine.m[1][3] = 1.5;
     affine.m[2][3] = 0.75;
-    for (auto i = 0; i < 4; ++i) {
-        for (auto j = 0; j < 4; ++j) {
-            affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f;
+    for (int i = 0; i < 4; ++i) {
+        for (int j = 0; j < 4; ++j) {
+            affine.m[i][j] += ((static_cast<float>(rand()) / RAND_MAX) - 0.5f) / 10.f;
         }
     }
     float affineResult2x[4];
     float affineResult2y[4];
-    for (auto i = 0; i < 4; ++i) {
+    for (int i = 0; i < 4; ++i) {
         auto x = identityResult2x[i];
         auto y = identityResult2y[i];
         affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y;
@@ -118,15 +118,15 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &affine,
         affineResult2x,
         affineResult2y,
-        nullptr)
-    );
+        nullptr
+    ));
 
     // Full affine - 3D
     // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
     float affineResult3x[8];
     float affineResult3y[8];
     float affineResult3z[8];
-    for (auto i = 0; i < 8; ++i) {
+    for (int i = 0; i < 8; ++i) {
         auto x = identityResult3x[i];
         auto y = identityResult3y[i];
         auto z = identityResult3z[i];
@@ -140,8 +140,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         &affine,
         affineResult3x,
         affineResult3y,
-        affineResult3z)
-    );
+        affineResult3z
+    ));
 
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 7f391f8b..1a3b35d7 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -9,7 +9,7 @@
 #include "AffineDeformationFieldKernel.h"
 
 
-template <typename T>
+template<typename T>
 void InterpCubicSplineKernel(T relative, T (&basis)[4]) {
     if (relative < 0) relative = 0; //reg_rounding error
     const T relative2 = relative * relative;
@@ -19,7 +19,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4]) {
     basis[3] = (relative - 1.f) * relative2 / 2.f;
 }
 
-template <typename T>
+template<typename T>
 void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
     InterpCubicSplineKernel(relative, basis);
     if (relative < 0) relative = 0; //reg_rounding error
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index bc2893af..8432ebe9 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -25,9 +25,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     // Fill image with distance from identity
     const auto ref2dPtr = reference2d.data();
     auto ref2dIt = ref2dPtr.begin();
-    for (auto y = 0; y < reference2d->ny; ++y)
-        for (auto x = 0; x < reference2d->nx; ++x)
-            *ref2dIt++ = sqrtf(float(x * x) + float(y * y));
+    for (int y = 0; y < reference2d->ny; ++y)
+        for (int x = 0; x < reference2d->nx; ++x)
+            *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
 
     // Create a corresponding 2D deformation field
     vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
@@ -43,10 +43,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     // Fill image with distance from identity
     const auto ref3dPtr = reference3d.data();
     auto ref3dIt = ref3dPtr.begin();
-    for (auto z = 0; z < reference3d->nz; ++z)
-        for (auto y = 0; y < reference3d->ny; ++y)
-            for (auto x = 0; x < reference3d->nx; ++x)
-                *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z));
+    for (int z = 0; z < reference3d->nz; ++z)
+        for (int y = 0; y < reference3d->ny; ++y)
+            for (int x = 0; x < reference3d->nx; ++x)
+                *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
     // Create a corresponding 3D deformation field
     dimDef[4] = 3;
@@ -67,19 +67,20 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     const float yBasisLinear[2] = { 0.7f, 0.3f };
     for (int y = 0; y < 2; ++y) {
         for (int x = 0; x < 2; ++x) {
-            const auto coeff = (float)ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)];
+            const float coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)];
             resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y];
             resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y];
         }
     }
+
     // Create the test case
     testCases.emplace_back(TestData(
         "Linear 2D",
         reference2d,
         deformationField2d,
         1,
-        resLinear2d)
-    );
+        resLinear2d
+    ));
 
     // Cubic spline image gradient - 2D
     // coordinate in image: [1.2, 1.3]
@@ -90,7 +91,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     InterpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic);
     for (int y = 0; y <= 3; ++y) {
         for (int x = 0; x <= 3; ++x) {
-            const auto coeff = (float)ref2dPtr[y * dimFlo[1] + x];
+            const float coeff = ref2dPtr[y * dimFlo[1] + x];
             resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y];
             resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y];
         }
@@ -102,8 +103,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         reference2d,
         deformationField2d,
         3,
-        resCubic2d)
-    );
+        resCubic2d
+    ));
 
     // Linear image gradient - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
@@ -112,7 +113,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     for (int z = 0; z < 2; ++z) {
         for (int y = 0; y < 2; ++y) {
             for (int x = 0; x < 2; ++x) {
-                const auto coeff = (float)ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)];
+                const float coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)];
                 resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z];
                 resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z];
                 resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z];
@@ -126,8 +127,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         reference3d,
         deformationField3d,
         1,
-        resLinear3d)
-    );
+        resLinear3d
+    ));
 
     // Cubic spline image gradient - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
@@ -137,7 +138,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     for (int z = 0; z <= 3; ++z) {
         for (int y = 0; y <= 3; ++y) {
             for (int x = 0; x <= 3; ++x) {
-                const auto coeff = (float)ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x];
+                const float coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x];
                 resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z];
                 resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z];
                 resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z];
@@ -151,8 +152,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         reference3d,
         deformationField3d,
         3,
-        resCubic3d)
-    );
+        resCubic3d
+    ));
 
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index aedb41fe..923efdc9 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -26,9 +26,9 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     // Fill image with distance from identity
     const auto ref2dPtr = reference2d.data();
     auto ref2dIt = ref2dPtr.begin();
-    for (auto y = 0; y < reference2d->ny; ++y)
-        for (auto x = 0; x < reference2d->nx; ++x)
-            *ref2dIt++ = sqrtf(float(x * x) + float(y * y));
+    for (int y = 0; y < reference2d->ny; ++y)
+        for (int x = 0; x < reference2d->nx; ++x)
+            *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
 
     // Create a corresponding 2D deformation field
     vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
@@ -44,10 +44,10 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     // Fill image with distance from identity
     const auto ref3dPtr = reference3d.data();
     auto ref3dIt = ref3dPtr.begin();
-    for (auto z = 0; z < reference3d->nz; ++z)
-        for (auto y = 0; y < reference3d->ny; ++y)
-            for (auto x = 0; x < reference3d->nx; ++x)
-                *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z));
+    for (int z = 0; z < reference3d->nz; ++z)
+        for (int y = 0; y < reference3d->ny; ++y)
+            for (int x = 0; x < reference3d->nx; ++x)
+                *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
     // Create a corresponding 3D deformation field
     dimDef[4] = 3;
@@ -65,9 +65,9 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     float resLinear2d[1] = {};
     for (int y = 1; y <= 2; ++y) {
         for (int x = 1; x <= 2; ++x) {
-            resLinear2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) *
-                abs(2.0f - float(x) - 0.2f) *
-                abs(2.0f - float(y) - 0.3f);
+            resLinear2d[0] += static_cast<float>(ref2dPtr[y * dimFlo[1] + x]) *
+                abs(2.0f - static_cast<float>(x) - 0.2f) *
+                abs(2.0f - static_cast<float>(y) - 0.3f);
         }
     }
 
@@ -77,21 +77,22 @@ TEST_CASE("Interpolation", "[Interpolation]") {
         reference2d,
         deformationField2d,
         1,
-        resLinear2d)
-    );
+        resLinear2d
+    ));
 
     // Nearest neighbour interpolation - 2D
     // coordinate in image: [1.2, 1.3]
     float resNearest2d[1];
     resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1];
+
     // Create the test case
     testCases.emplace_back(TestData(
         "Nearest Neighbour 2D",
         reference2d,
         deformationField2d,
         0,
-        resNearest2d)
-    );
+        resNearest2d
+    ));
 
     // Cubic spline interpolation - 2D
     // coordinate in image: [1.2, 1.3]
@@ -99,11 +100,9 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     float xBasis[4], yBasis[4];
     InterpCubicSplineKernel(0.2f, xBasis);
     InterpCubicSplineKernel(0.3f, yBasis);
-    for (int y = 0; y <= 3; ++y) {
-        for (int x = 0; x <= 3; ++x) {
-            resCubic2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y];
-        }
-    }
+    for (int y = 0; y <= 3; ++y)
+        for (int x = 0; x <= 3; ++x)
+            resCubic2d[0] += static_cast<float>(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y];
 
     // Create the test case
     testCases.emplace_back(TestData(
@@ -111,8 +110,8 @@ TEST_CASE("Interpolation", "[Interpolation]") {
         reference2d,
         deformationField2d,
         3,
-        resCubic2d)
-    );
+        resCubic2d
+    ));
 
     // Linear interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
@@ -120,10 +119,10 @@ TEST_CASE("Interpolation", "[Interpolation]") {
     for (int z = 1; z <= 2; ++z) {
         for (int y = 1; y <= 2; ++y) {
             for (int x = 1; x <= 2; ++x) {
-                resLinear3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) *
-                    abs(2.0f - float(x) - 0.2f) *
-                    abs(2.0f - float(y) - 0.3f) *
-                    abs(2.0f - float(z) - 0.4f);
+                resLinear3d[0] += static_cast<float>(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) *
+                    abs(2.0f - static_cast<float>(x) - 0.2f) *
+                    abs(2.0f - static_cast<float>(y) - 0.3f) *
+                    abs(2.0f - static_cast<float>(z) - 0.4f);
             }
         }
     }
@@ -134,34 +133,32 @@ TEST_CASE("Interpolation", "[Interpolation]") {
         reference3d,
         deformationField3d,
         1,
-        resLinear3d)
-    );
+        resLinear3d
+    ));
 
     // Nearest neighbour interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
     float resNearest3d[1];
     resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1];
+
     // Create the test case
     testCases.emplace_back(TestData(
         "Nearest Neighbour 3D",
         reference3d,
         deformationField3d,
         0,
-        resNearest3d)
-    );
+        resNearest3d
+    ));
 
     // Cubic spline interpolation - 3D
     // coordinate in image: [1.2, 1.3, 1.4]
     float resCubic3d[1] = {};
     float zBasis[4];
     InterpCubicSplineKernel(0.4f, zBasis);
-    for (int z = 0; z <= 3; ++z) {
-        for (int y = 0; y <= 3; ++y) {
-            for (int x = 0; x <= 3; ++x) {
-                resCubic3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z];
-            }
-        }
-    }
+    for (int z = 0; z <= 3; ++z)
+        for (int y = 0; y <= 3; ++y)
+            for (int x = 0; x <= 3; ++x)
+                resCubic3d[0] += static_cast<float>(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z];
 
     // Create the test case
     testCases.emplace_back(TestData(
@@ -169,8 +166,8 @@ TEST_CASE("Interpolation", "[Interpolation]") {
         reference3d,
         deformationField3d,
         3,
-        resCubic3d)
-    );
+        resCubic3d
+    ));
 
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {

From e9f5eaf1713038e3efe2770cb4ebb32700d7ad28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:09:58 +0000
Subject: [PATCH 105/314] Add NiftiImage::copyData() to copy the pixel data
 from another image

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/RNifti/NiftiImage.h      |  8 ++++++++
 reg-io/RNifti/NiftiImage_impl.h | 21 +++++++++++++++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 20c90807..188ccfe5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-224
+225
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 29855b40..e2ca9601 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1731,6 +1731,14 @@ class NiftiImage
     **/
     NiftiImage & replaceData (const NiftiImageData &data);
 
+    /**
+     * Copy the pixel data from another image
+     * @param other The image from which to copy the data
+     * @exception runtime_error If the lengths and datatypes of the two images do not match
+     * @return Self, after copying the data
+    */
+    NiftiImage & copyData (const nifti_image *other);
+
     /**
      * Drop the data from the image, retaining only the metadata. This method invalidates any
      * \ref NiftiImageData objects referencing the old data
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index d8c33555..6bb165b7 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -1864,6 +1864,27 @@ inline NiftiImage & NiftiImage::replaceData (const NiftiImageData &data)
     return *this;
 }
 
+inline NiftiImage & NiftiImage::copyData (const nifti_image *other)
+{
+    if (this->isNull())
+        return *this;
+    else if (other == nullptr || other->data == nullptr)
+        throw std::runtime_error("Cannot copy data from a null image");
+    else if (other->nvox != image->nvox)
+        throw std::runtime_error("Cannot copy data from an image with a different length");
+    else if (other->datatype != image->datatype)
+        throw std::runtime_error("Cannot copy data from an image with a different datatype");
+
+    // Copy the data
+    memcpy(image->data, other->data, totalBytes());
+    image->scl_slope = other->scl_slope;
+    image->scl_inter = other->scl_inter;
+    image->cal_min = other->cal_min;
+    image->cal_max = other->cal_max;
+
+    return *this;
+}
+
 inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const
 {
     const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype);

From ade981cc7f20ab9d36d71ed15a3747af7e4b7625 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:16:51 +0000
Subject: [PATCH 106/314] Extend NiftiImage constructor to optionally allocate
 image data

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/RNifti/NiftiImage.h      | 17 ++++++++++-------
 reg-io/RNifti/NiftiImage_impl.h | 18 +++++++++++++-----
 3 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 188ccfe5..f4146713 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-225
+226
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index e2ca9601..e2712c17 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1289,8 +1289,9 @@ class NiftiImage
      * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer
      * @param source A pointer to a \c nifti_image
      * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied
+     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
     **/
-    void copy (const nifti_image *source, const bool onlyImageInfo);
+    void copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData);
 
     /**
      * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
@@ -1376,13 +1377,14 @@ class NiftiImage
      * @param source Another \c NiftiImage object
      * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new
      * object wraps the same \c nifti_image and increments the shared reference count
-     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied
+     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true
+     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
     **/
-    NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false)
+    NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false, const bool allocData = false)
         : NiftiImage()
     {
         if (copy) {
-            this->copy(source, onlyImageInfo);
+            this->copy(source, onlyImageInfo, allocData);
         } else {
             refCount = source.refCount;
             acquire(source.image);
@@ -1423,13 +1425,14 @@ class NiftiImage
      * @param image An existing \c nifti_image pointer, possibly \c nullptr
      * @param copy If \c true, the image data will be copied; otherwise this object just wraps
      * the pointer passed to it
-     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied
+     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true
+     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
     **/
-    NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false)
+    NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false, const bool allocData = false)
         : NiftiImage()
     {
         if (copy)
-            this->copy(image, onlyImageInfo);
+            this->copy(image, onlyImageInfo, allocData);
         else
             acquire(image);
 #ifndef NDEBUG
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 6bb165b7..0688a681 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -763,7 +763,7 @@ inline void NiftiImage::release ()
     }
 }
 
-inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo)
+inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData)
 {
     if (source == nullptr)
         acquire(nullptr);
@@ -771,17 +771,25 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         acquire(nifti_copy_nim_info(source));
-        if (!onlyImageInfo && source->data != nullptr)
+        if (onlyImageInfo)
         {
-            size_t dataSize = nifti_get_volsize(source);
+            if (allocData)
+                realloc();
+        } else if (source->data != nullptr)
+        {
+            const size_t dataSize = nifti_get_volsize(source);
             image->data = calloc(1, dataSize);
             memcpy(image->data, source->data, dataSize);
         }
 #elif RNIFTI_NIFTILIB_VERSION == 2
         acquire(nifti2_copy_nim_info(source));
-        if (!onlyImageInfo && source->data != nullptr)
+        if (onlyImageInfo)
+        {
+            if (allocData)
+                realloc();
+        } else if (source->data != nullptr)
         {
-            size_t dataSize = nifti2_get_volsize(source);
+            const size_t dataSize = nifti2_get_volsize(source);
             image->data = calloc(1, dataSize);
             memcpy(image->data, source->data, dataSize);
         }

From 961700a638d9b9ccf7aaa10680ffffe5d3d08c5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:18:27 +0000
Subject: [PATCH 107/314] Add NiftiImage::totalBytes() to return the total size
 of the image data in bytes

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f4146713..2c36bbda 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-226
+227
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index e2712c17..accc8d60 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1956,6 +1956,18 @@ class NiftiImage
     */
     size_t nVoxelsPerVolume () const { return calcVoxelNumber(image, 3); }
 
+    /**
+     * Return the total size of the image data in bytes
+    */
+    size_t totalBytes() const
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        return nifti_get_volsize(image);
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        return nifti2_get_volsize(image);
+#endif
+    }
+
     /**
      * Return the number of extensions associated with the image
      * @return An integer giving the number of extensions

From 53ad3a4dd9abf5e3e00f5980425334cb91684455 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:19:53 +0000
Subject: [PATCH 108/314] Add move constructor and assignment operator to
 NiftiImageData

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 40 +++++++++++++++++++++++++++-----------
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2c36bbda..9be0dc9a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-227
+228
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index accc8d60..beb893ff 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -420,6 +420,21 @@ class NiftiImageData
         Element operator[] (const size_t i) { return Element(parent, static_cast<char*>(ptr) + (i * step)); }
     };
 
+    /**
+     * Swap the contents of two \c NiftiImageData objects
+    */
+    friend void swap (NiftiImageData &first, NiftiImageData &second)
+    {
+        using std::swap;
+        swap(first.slope, second.slope);
+        swap(first.intercept, second.intercept);
+        swap(first.dataPtr, second.dataPtr);
+        swap(first._datatype, second._datatype);
+        swap(first.handler, second.handler);
+        swap(first._length, second._length);
+        swap(first.owner, second.owner);
+    }
+
     /**
      * Default constructor, creating an empty data object
     **/
@@ -468,6 +483,16 @@ class NiftiImageData
         }
     }
 
+    /**
+     * Move constructor
+     * @param source Another \c NiftiImageData object
+    */
+    NiftiImageData (NiftiImageData &&source)
+        : NiftiImageData()
+    {
+        swap(*this, source);
+    }
+
     /**
      * Iterator-based constructor
      * @param from Iterator type representing the start of the source data to be copied
@@ -493,20 +518,13 @@ class NiftiImageData
     }
 
     /**
-     * Copy assignment operator
-     * @param source Another \c NiftiImageData object, from which the data and metadata are copied
+     * Copy and move assignment operator
+     * @param source Another \c NiftiImageData object
      * @return A reference to the callee
     **/
-    NiftiImageData & operator= (const NiftiImageData &source)
+    NiftiImageData & operator= (NiftiImageData source)
     {
-        if (source.dataPtr != nullptr)
-        {
-            // Free the old data, if we allocated it
-            if (owner)
-                free(dataPtr);
-            init(nullptr, source.length(), source.datatype(), source.slope, source.intercept);
-            memcpy(dataPtr, source.dataPtr, source.totalBytes());
-        }
+        swap(*this, source);
         return *this;
     }
 

From f3ba1f59b08639fb497f320f71a6c2dd08c3ca71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:23:40 +0000
Subject: [PATCH 109/314] Remove nodeNumber parameter from GetMaximalLength()
 and NormaliseGradient() of *Compute classes

---
 niftyreg_build_version.txt   |  2 +-
 reg-lib/Compute.cpp          |  6 +++---
 reg-lib/Compute.h            |  4 ++--
 reg-lib/_reg_f3d.cpp         |  4 ++--
 reg-lib/_reg_f3d2.cpp        |  7 +++----
 reg-lib/cuda/CudaCompute.cpp | 12 ++++++++----
 reg-lib/cuda/CudaCompute.h   |  4 ++--
 7 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9be0dc9a..bf18240e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-228
+229
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 73f8cb1c..de07f696 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -131,8 +131,8 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
                          activeTimepoint);
 }
 /* *************************************************************** */
-double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
+double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+    const nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     switch (transformationGradient->datatype) {
     case NIFTI_TYPE_FLOAT32:
         return reg_getMaximalLength<float>(transformationGradient, optimiseX, optimiseY, optimiseZ);
@@ -142,7 +142,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis
     return 0;
 }
 /* *************************************************************** */
-void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
+void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     NiftiImage transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     const bool hasZ = transformationGradient->nz > 1;
     if (!hasZ)
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index aef76487..0390004b 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -21,8 +21,8 @@ class Compute {
     virtual void GetDeformationField(bool composition, bool bspline);
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
-    virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ);
-    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void SmoothGradient(float sigma);
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt);
     virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 4f9d48b2..6991cfd0 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -479,12 +479,12 @@ void reg_f3d<T>::GetLandmarkDistanceGradient() {
 template<class T>
 T reg_f3d<T>::NormaliseGradient() {
     // First compute the gradient max length for normalisation purpose
-    T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiser->GetVoxNumber(), this->optimiseX, this->optimiseY, this->optimiseZ);
+    T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ);
 
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d2
-        this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
+        this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
 #ifndef NDEBUG
         char text[255];
         sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 1c4f6c82..b13ec33d 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -442,8 +442,7 @@ T reg_f3d2<T>::NormaliseGradient() {
     const T forwardMaxGradLength = reg_f3d<T>::NormaliseGradient();
 
     // The backward gradient max length is computed
-    const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiser->GetVoxNumber_b(),
-                                                                   this->optimiseX,
+    const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiseX,
                                                                    this->optimiseY,
                                                                    this->optimiseZ);
 
@@ -457,9 +456,9 @@ T reg_f3d2<T>::NormaliseGradient() {
 #endif
 
     // The forward gradient is normalised
-    this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
+    this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
     // The backward gradient is normalised
-    computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
+    computeBw->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
 
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d2<T>::NormaliseGradient");
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 2717cc83..e00aad90 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -115,14 +115,18 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
                              paddingValue);
 }
 /* *************************************************************** */
-double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) {
+double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
-    return reg_getMaximalLength_gpu(dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), nodeNumber);
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
+    return reg_getMaximalLength_gpu(con.GetTransformationGradientCuda(), voxelsPerVolume);
 }
 /* *************************************************************** */
-void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
+void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
-    reg_multiplyValue_gpu(nodeNumber, dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda(), float(1 / maxGradLength));
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
+    reg_multiplyValue_gpu(voxelsPerVolume, con.GetTransformationGradientCuda(), float(1 / maxGradLength));
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 85d3904e..5f53b12e 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -19,8 +19,8 @@ class CudaCompute: public Compute {
     virtual void GetDeformationField(bool composition, bool bspline) override;
     virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
-    virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override;
-    virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void SmoothGradient(float sigma) override;
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override;
     virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override;

From 3592c61984041c4166cf1ced4e3f39110cb54e0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Mar 2023 12:27:59 +0000
Subject: [PATCH 110/314] Fix possible bugs causing accessing freed memory

---
 niftyreg_build_version.txt          | 2 +-
 reg-lib/Compute.cpp                 | 3 ++-
 reg-test/reg_test_imageGradient.cpp | 6 +++---
 reg-test/reg_test_interpolation.cpp | 6 +++---
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bf18240e..dcb6b5ba 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-229
+230
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index de07f696..d9578ee6 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -159,6 +159,8 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim
     const size_t voxelsPerVolume = transformationGradient.nVoxelsPerVolume();
 #endif
 
+    transformationGradient.disown();
+
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength)
@@ -172,7 +174,6 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim
         if (hasZ)
             ptrZ[i] = valZ / maxGradLength;
     }
-    transformationGradient.disown();
 }
 /* *************************************************************** */
 void Compute::SmoothGradient(float sigma) {
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 8432ebe9..79248726 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -197,13 +197,13 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
 
                 // Check all values
                 warpedGradient = content->GetWarpedGradient();
-                auto warpedGradPtr = warpedGradient.data();
-                for (size_t i = 0; i < warpedGradient->nvox; ++i) {
+                const auto warpedGradPtr = warpedGradient.data();
+                warpedGradient.disown();
+                for (size_t i = 0; i < warpedGradient.nVoxels(); ++i) {
                     const float warpedGradVal = warpedGradPtr[i];
                     std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS);
                 }
-                warpedGradient.disown();
             }
         }
     }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 923efdc9..a264dbf3 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -220,13 +220,13 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
                 // Check all values
                 warped = content->GetWarped();
-                auto warpedPtr = warped.data();
-                for (size_t i = 0; i < warped->nvox; ++i) {
+                const auto warpedPtr = warped.data();
+                warped.disown();
+                for (size_t i = 0; i < warped.nVoxels(); ++i) {
                     const float warpedValue = warpedPtr[i];
                     std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedValue - testResult[i]) < EPS);
                 }
-                warped.disown();
             }
         }
     }

From 21ed730741d3db314106fda73430767574a65844 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sun, 19 Mar 2023 22:11:59 +0000
Subject: [PATCH 111/314] Add tests for *Compute::GetMaximalLength() and
 *Compute::NormaliseGradient()

---
 niftyreg_build_version.txt              |   2 +-
 reg-test/CMakeLists.txt                 |   1 +
 reg-test/reg_test_normaliseGradient.cpp | 207 ++++++++++++++++++++++++
 3 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_normaliseGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dcb6b5ba..71d936fd 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-230
+231
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 89e51322..aecfebd8 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -110,6 +110,7 @@ include(Catch)
 set(EXEC_LIST reg_test_affineDeformationField)
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
+set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
new file mode 100644
index 00000000..4a8572d9
--- /dev/null
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -0,0 +1,207 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+
+#define EPS 0.000001
+
+/*
+    This test file contains the following unit tests:
+    test functions:
+    In 2D and 3D
+    Maximal length
+    Normalise gradient
+*/
+
+
+class NormaliseGradientTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<TestData, unique_ptr<F3dContent>, unique_ptr<Platform>>;
+
+    vector<TestData> testData;
+    vector<TestCase> testCases;
+
+public:
+    NormaliseGradientTest() {
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference 2D image
+        vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Fill image with distance from identity
+        const auto ref2dPtr = reference2d.data();
+        auto ref2dIt = ref2dPtr.begin();
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x)
+                *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+
+        // Create a reference 3D image
+        dimFlo.push_back(4);
+        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Fill image with distance from identity
+        const auto ref3dPtr = reference3d.data();
+        auto ref3dIt = ref3dPtr.begin();
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x)
+                    *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+
+        // Generate the different test cases
+        // Test 2D
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true);
+        auto transGrad2dPtr = transformationGradient2d.data();
+        for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i)
+            transGrad2dPtr[i] = distr(gen);
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d),
+            std::move(transformationGradient2d)
+        ));
+
+        // Test 3D
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true);
+        auto transGrad3dPtr = transformationGradient3d.data();
+        for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i)
+            transGrad3dPtr[i] = distr(gen);
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d),
+            std::move(transformationGradient3d)
+        ));
+
+        // Add platforms to the test data
+        for (auto&& testData : testData) {
+            auto&& [testName, reference, controlPointGrid, testGrad] = testData;
+
+            for (auto&& platformType : PlatformTypes) {
+                unique_ptr<Platform> platform{ new Platform(platformType) };
+                // Add content
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                testCases.push_back({ testData, std::move(content), std::move(platform) });
+            }
+        }
+    }
+
+    template<typename T>
+    T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+        const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
+        const T *ptrX = static_cast<T*>(transformationGradient->data);
+        const T *ptrY = &ptrX[voxelsPerVolume];
+        const T *ptrZ = &ptrY[voxelsPerVolume];
+        T maxGradValue = 0;
+
+        if (transformationGradient->nz > 1) {
+            for (size_t i = 0; i < voxelsPerVolume; i++) {
+                T valX = 0, valY = 0, valZ = 0;
+                if (optimiseX)
+                    valX = *ptrX++;
+                if (optimiseY)
+                    valY = *ptrY++;
+                if (optimiseZ)
+                    valZ = *ptrZ++;
+                maxGradValue = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradValue);
+            }
+        } else {
+            for (size_t i = 0; i < voxelsPerVolume; i++) {
+                T valX = 0, valY = 0;
+                if (optimiseX)
+                    valX = *ptrX++;
+                if (optimiseY)
+                    valY = *ptrY++;
+                maxGradValue = std::max(sqrt(valX * valX + valY * valY), maxGradValue);
+            }
+        }
+
+        return maxGradValue;
+    }
+
+    template<typename T>
+    void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradValue, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+        const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
+        T *ptrX = static_cast<T*>(transformationGradient->data);
+        T *ptrY = &ptrX[voxelsPerVolume];
+        T *ptrZ = &ptrY[voxelsPerVolume];
+        if (transformationGradient->nz > 1) {
+            for (size_t i = 0; i < voxelsPerVolume; ++i) {
+                T valX = 0, valY = 0, valZ = 0;
+                if (optimiseX)
+                    valX = ptrX[i];
+                if (optimiseY)
+                    valY = ptrY[i];
+                if (optimiseZ)
+                    valZ = ptrZ[i];
+                ptrX[i] = valX / maxGradValue;
+                ptrY[i] = valY / maxGradValue;
+                ptrZ[i] = valZ / maxGradValue;
+            }
+        } else {
+            for (size_t i = 0; i < voxelsPerVolume; ++i) {
+                T valX = 0, valY = 0;
+                if (optimiseX)
+                    valX = ptrX[i];
+                if (optimiseY)
+                    valY = ptrY[i];
+                ptrX[i] = valX / maxGradValue;
+                ptrY[i] = valY / maxGradValue;
+            }
+        }
+    }
+};
+
+TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradient]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testData, content, platform] = testCase;
+        auto&& [testName, reference, controlPointGrid, testGrad] = testData;
+
+        SECTION(testName + " " + platform->GetName()) {
+            // Set the transformation gradient image to host the computation
+            NiftiImage transGrad = content->GetTransformationGradient();
+            transGrad.copyData(testGrad);
+            transGrad.disown();
+            content->UpdateTransformationGradient();
+
+            // Get the number of voxels per volume
+            const auto voxelsPerVolume = testGrad.nVoxelsPerVolume();
+
+            // Calculate the maximal length
+            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+            const auto maxLength = static_cast<float>(compute->GetMaximalLength(true, true, true));
+            const auto testLength = GetMaximalLength<float>(testGrad, true, true, true);
+            // Check the results
+            REQUIRE(fabs(maxLength - testLength) < EPS);
+
+            // Normalise the gradient
+            compute->NormaliseGradient(maxLength, true, true, true);
+            NormaliseGradient<float>(testGrad, testLength, true, true, true);
+
+            // Check the results
+            transGrad = content->GetTransformationGradient();
+            const auto transGradPtr = transGrad.data();
+            const auto testGradPtr = testGrad.data();
+            transGrad.disown();
+            for (size_t i = 0; i < testGrad.nVoxels(); ++i) {
+                const float transGradVal = transGradPtr[i];
+                const float testGradVal = testGradPtr[i];
+                std::cout << i << " " << transGradVal << " " << testGradVal << std::endl;
+                REQUIRE(fabs(transGradVal - testGradVal) < EPS);
+            }
+        }
+    }
+}

From 1d507022d92b5c31644991c5b9ad070c099bc840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sun, 19 Mar 2023 22:29:51 +0000
Subject: [PATCH 112/314] Fix a bug in tests causing wrong voxel count
 calculation

---
 niftyreg_build_version.txt          | 2 +-
 reg-test/reg_test_imageGradient.cpp | 3 ++-
 reg-test/reg_test_interpolation.cpp | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 71d936fd..7c022aed 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-231
+232
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 79248726..9fb73ca7 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -198,8 +198,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 // Check all values
                 warpedGradient = content->GetWarpedGradient();
                 const auto warpedGradPtr = warpedGradient.data();
+                const size_t nVoxels = warpedGradient.nVoxels();
                 warpedGradient.disown();
-                for (size_t i = 0; i < warpedGradient.nVoxels(); ++i) {
+                for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedGradVal = warpedGradPtr[i];
                     std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS);
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index a264dbf3..8eaa95b5 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -221,8 +221,9 @@ TEST_CASE("Interpolation", "[Interpolation]") {
                 // Check all values
                 warped = content->GetWarped();
                 const auto warpedPtr = warped.data();
+                const size_t nVoxels = warped.nVoxels();
                 warped.disown();
-                for (size_t i = 0; i < warped.nVoxels(); ++i) {
+                for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedValue = warpedPtr[i];
                     std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedValue - testResult[i]) < EPS);

From c597115c0bb95806ba056bb82ab946a3f2845c49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Mar 2023 12:55:30 +0000
Subject: [PATCH 113/314] Fix HAVE_ZLIB definition

---
 niftyreg_build_version.txt     | 2 +-
 reg-io/CMakeLists.txt          | 1 +
 reg-io/niftilib/CMakeLists.txt | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7c022aed..0c56bea5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-232
+233
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index 74712e43..0907b52b 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -2,6 +2,7 @@
 if(NOT ZLIB_FOUND OR BUILD_ALL_DEP)
     subdirs(zlib)
 endif(NOT ZLIB_FOUND OR BUILD_ALL_DEP)
+add_definitions(-DHAVE_ZLIB)
 
 # Build the znz library
 subdirs(znzlib)
diff --git a/reg-io/niftilib/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt
index a2b1d466..9a18dad0 100644
--- a/reg-io/niftilib/CMakeLists.txt
+++ b/reg-io/niftilib/CMakeLists.txt
@@ -4,7 +4,6 @@ mark_as_advanced(FORCE USE_NII_NAN)
 if(USE_NII_NAN)
     add_definitions(-DUSE_NII_NAN)
 endif(USE_NII_NAN)
-add_definitions(-DHAVE_ZLIB)
 set(NAME reg_nifti)
 add_library(${NAME} nifti1_io.c)
 target_link_libraries(${NAME} z znz)

From c5dbb4286c3fc61c27717f7f4d1ad3de3ff6f205 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Mar 2023 14:52:13 +0000
Subject: [PATCH 114/314] Refactor CMakeLists

---
 niftyreg_build_version.txt     |  2 +-
 reg-io/CMakeLists.txt          | 10 ++--
 reg-io/niftilib/CMakeLists.txt |  2 +-
 reg-io/nrrd/CMakeLists.txt     | 10 ++--
 reg-io/png/CMakeLists.txt      |  7 ++-
 reg-io/znzlib/CMakeLists.txt   |  1 +
 reg-lib/CMakeLists.txt         | 89 ++--------------------------------
 reg-lib/cl/CMakeLists.txt      | 18 +------
 reg-lib/cuda/CMakeLists.txt    |  8 +--
 reg-test/CMakeLists.txt        |  4 +-
 10 files changed, 21 insertions(+), 130 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0c56bea5..7b5813c6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-233
+234
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index 0907b52b..b546a992 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -10,11 +10,10 @@ subdirs(znzlib)
 # Build the nifti file format library
 subdirs(niftilib)
 
-set(LIBRARIES reg_nifti z znz)
-
 # Build the png library if required
 subdirs(png)
-set(LIBRARIES ${LIBRARIES} reg_png)
+
+set(LIBRARIES reg_nifti reg_png)
 
 # Build the NRRD file format library if required
 if(USE_NRRD)
@@ -26,15 +25,12 @@ endif(USE_NRRD)
 SET(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${LIBRARIES}")
 
 # Create the reg_io library
-add_library(_reg_ReadWriteImage _reg_ReadWriteImage.h _reg_ReadWriteImage.cpp
-_reg_ReadWriteMatrix.h _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.h
-_reg_ReadWriteBinary.cpp _reg_stringFormat.h _reg_stringFormat.cpp)
+add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp _reg_stringFormat.cpp)
 target_link_libraries(_reg_ReadWriteImage ${LIBRARIES})
 install(TARGETS _reg_ReadWriteImage
         RUNTIME DESTINATION bin COMPONENT Development
         LIBRARY DESTINATION lib COMPONENT Development
         ARCHIVE DESTINATION lib COMPONENT Development
 )
-install(FILES _reg_ReadWriteImage.h _reg_ReadWriteMatrix.h _reg_stringFormat.h DESTINATION include COMPONENT Development)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_ReadWriteImage")
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
\ No newline at end of file
diff --git a/reg-io/niftilib/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt
index 9a18dad0..f2bea9ee 100644
--- a/reg-io/niftilib/CMakeLists.txt
+++ b/reg-io/niftilib/CMakeLists.txt
@@ -6,7 +6,7 @@ if(USE_NII_NAN)
 endif(USE_NII_NAN)
 set(NAME reg_nifti)
 add_library(${NAME} nifti1_io.c)
-target_link_libraries(${NAME} z znz)
+target_link_libraries(${NAME} znz)
 install(TARGETS ${NAME}
         RUNTIME DESTINATION bin
         LIBRARY DESTINATION lib
diff --git a/reg-io/nrrd/CMakeLists.txt b/reg-io/nrrd/CMakeLists.txt
index 8ab3b82f..fffd363c 100644
--- a/reg-io/nrrd/CMakeLists.txt
+++ b/reg-io/nrrd/CMakeLists.txt
@@ -51,9 +51,7 @@ NrrdIO/simple.c NrrdIO/comment.c NrrdIO/keyvalue.c NrrdIO/endianNrrd.c NrrdIO/pa
 NrrdIO/gzio.c NrrdIO/read.c NrrdIO/write.c NrrdIO/format.c NrrdIO/formatNRRD.c NrrdIO/encoding.c
 NrrdIO/encodingRaw.c NrrdIO/encodingAscii.c NrrdIO/encodingHex.c NrrdIO/encodingGzip.c
 NrrdIO/subset.c NrrdIO/encodingBzip2.c NrrdIO/formatEPS.c NrrdIO/formatPNG.c NrrdIO/formatPNM.c
-NrrdIO/formatText.c NrrdIO/formatVTK.c NrrdIO/biff.h NrrdIO/NrrdIO.h
-NrrdIO/privateAir.h NrrdIO/privateNrrd.h NrrdIO/teem32bit.h NrrdIO/teemDio.h NrrdIO/teemEndian.h
-NrrdIO/teemPng.h NrrdIO/teemQnanhibit.h)
+NrrdIO/formatText.c NrrdIO/formatVTK.c)
 
 add_library(reg_NrrdIO ${nrrdio_SRCS})
 install(TARGETS reg_NrrdIO
@@ -64,11 +62,11 @@ install(TARGETS reg_NrrdIO
 target_link_libraries(reg_NrrdIO z)
 ############################################################
 ############################################################
-add_library(reg_nrrd reg_nrrd.cpp reg_nrrd.h)
-target_link_libraries(reg_nrrd _reg_tools _reg_maths reg_NrrdIO reg_nifti z)
+add_library(reg_nrrd reg_nrrd.cpp)
+target_link_libraries(reg_nrrd _reg_tools reg_NrrdIO)
 install(TARGETS reg_nrrd
         RUNTIME DESTINATION bin COMPONENT Development
         LIBRARY DESTINATION lib COMPONENT Development
         ARCHIVE DESTINATION lib COMPONENT Development
 )
-install(FILES reg_nrrd.h NrrdIO/NrrdIO.h ${CMAKE_BINARY_DIR}/NrrdConfigure.h DESTINATION include COMPONENT Development)
+install(FILES NrrdIO/NrrdIO.h ${CMAKE_BINARY_DIR}/NrrdConfigure.h DESTINATION include COMPONENT Development)
diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt
index 49223bb8..afbf0dc5 100644
--- a/reg-io/png/CMakeLists.txt
+++ b/reg-io/png/CMakeLists.txt
@@ -51,7 +51,7 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
       lpng1510/pngwutil.c
     )
     # Build the library
-    add_library(png STATIC ${png_SRCS} ${png_HDRS})
+    add_library(png STATIC ${png_SRCS})
     target_link_libraries(png z)
     install(TARGETS png
         LIBRARY DESTINATION lib COMPONENT Development
@@ -60,11 +60,10 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
     install(FILES ${png_HDRS} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development)
 endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
 
-add_library(reg_png reg_png.cpp reg_png.h readpng.cpp readpng.h)
-target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools reg_nifti z)
+add_library(reg_png reg_png.cpp readpng.cpp)
+target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools)
 install(TARGETS reg_png
         RUNTIME DESTINATION bin COMPONENT Development
         LIBRARY DESTINATION lib COMPONENT Development
         ARCHIVE DESTINATION lib COMPONENT Development
 )
-install(FILES reg_png.h readpng.h DESTINATION include COMPONENT Development)
diff --git a/reg-io/znzlib/CMakeLists.txt b/reg-io/znzlib/CMakeLists.txt
index 0122d3e2..48eb1d69 100644
--- a/reg-io/znzlib/CMakeLists.txt
+++ b/reg-io/znzlib/CMakeLists.txt
@@ -1,5 +1,6 @@
 #-----------------------------------------------------------------------------
 add_library(znz znzlib.c)
+target_link_libraries(znz z)
 install(TARGETS znz
     RUNTIME DESTINATION bin COMPONENT Development
     LIBRARY DESTINATION lib COMPONENT Development
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 73e59e8d..4b20646d 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -23,7 +23,6 @@ install(TARGETS _reg_maths
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_maths.h cpu/_reg_maths_eigen.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_maths")
 #-----------------------------------------------------------------------------
 add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_tools.cpp)
@@ -36,7 +35,6 @@ install(TARGETS _reg_tools
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_tools.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_tools")
 #-----------------------------------------------------------------------------
 add_library(_reg_globalTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_globalTrans.cpp)
@@ -46,17 +44,12 @@ install(TARGETS _reg_globalTrans
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_globalTrans.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_globalTrans")
 #-----------------------------------------------------------------------------
 add_library(_reg_localTrans ${NIFTYREG_LIBRARY_TYPE}
-  cpu/_reg_splineBasis.h
   cpu/_reg_splineBasis.cpp
-  cpu/_reg_localTrans.h
   cpu/_reg_localTrans.cpp
-  cpu/_reg_localTrans_regul.h
   cpu/_reg_localTrans_regul.cpp
-  cpu/_reg_localTrans_jac.h
   cpu/_reg_localTrans_jac.cpp
 )
 target_link_libraries(_reg_localTrans
@@ -68,22 +61,14 @@ install(TARGETS _reg_localTrans
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_localTrans.h cpu/_reg_splineBasis.h cpu/_reg_localTrans_regul.h cpu/_reg_localTrans_jac.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_localTrans")
 #-----------------------------------------------------------------------------
 set(measure_files
-  cpu/_reg_measure.h
-  cpu/_reg_nmi.h
   cpu/_reg_nmi.cpp
-  cpu/_reg_ssd.h
   cpu/_reg_ssd.cpp
-  cpu/_reg_kld.h
   cpu/_reg_kld.cpp
-  cpu/_reg_lncc.h
   cpu/_reg_lncc.cpp
-  cpu/_reg_dti.h
   cpu/_reg_dti.cpp
-  cpu/_reg_mind.h
   cpu/_reg_mind.cpp
 )
 add_library(_reg_measure ${NIFTYREG_LIBRARY_TYPE} ${measure_files})
@@ -93,15 +78,6 @@ install(TARGETS _reg_measure
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES
-  cpu/_reg_measure.h
-  cpu/_reg_nmi.h
-  cpu/_reg_ssd.h
-  cpu/_reg_kld.h
-  cpu/_reg_lncc.h
-  cpu/_reg_dti.h
-  cpu/_reg_mind.h DESTINATION include
-)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_measure")
 #-----------------------------------------------------------------------------
 add_library(_reg_resampling ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_resampling.cpp)
@@ -111,7 +87,6 @@ install(TARGETS _reg_resampling
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_resampling.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_resampling")
 #-----------------------------------------------------------------------------
 add_library(_reg_blockMatching ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_blockMatching.cpp)
@@ -121,7 +96,6 @@ install(TARGETS _reg_blockMatching
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_blockMatching.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching")
 #-----------------------------------------------------------------------------
 add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp)
@@ -131,22 +105,15 @@ install(TARGETS _reg_femTrans
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES cpu/_reg_femTrans.h DESTINATION include)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans")
 #-----------------------------------------------------------------------------
 add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   Compute.cpp
-  Compute.h
   AladinContent.cpp
-  AladinContent.h
   Content.cpp
-  Content.h
   F3dContent.cpp
-  F3dContent.h
   Platform.cpp
-  Platform.h
   Measure.cpp
-  Measure.h
 )
 target_link_libraries(_reg_compute _reg_measure)
 install(TARGETS _reg_compute
@@ -154,30 +121,15 @@ install(TARGETS _reg_compute
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES
-  Compute.h
-  ComputeFactory.h
-  AladinContent.h
-  Content.h
-  F3dContent.h
-  Platform.h
-  Measure.h DESTINATION include
-)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute")
 #-----------------------------------------------------------------------------
 add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE}
   cpu/CpuKernelFactory.cpp
-  cpu/CpuKernelFactory.h
   cpu/CpuAffineDeformationFieldKernel.cpp
-  cpu/CpuAffineDeformationFieldKernel.h
   cpu/CpuBlockMatchingKernel.cpp
-  cpu/CpuBlockMatchingKernel.h
   cpu/CpuConvolutionKernel.cpp
-  cpu/CpuConvolutionKernel.h
   cpu/CpuOptimiseKernel.cpp
-  cpu/CpuOptimiseKernel.h
   cpu/CpuResampleImageKernel.cpp
-  cpu/CpuResampleImageKernel.h
 )
 target_link_libraries(_reg_kernels
   _reg_blockMatching
@@ -187,21 +139,6 @@ install(TARGETS _reg_kernels
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES
-  KernelFactory.h
-  AffineDeformationFieldKernel.h
-  BlockMatchingKernel.h
-  ConvolutionKernel.h
-  Kernel.h
-  OptimiseKernel.h
-  ResampleImageKernel.h
-  cpu/CpuKernelFactory.h
-  cpu/CpuAffineDeformationFieldKernel.h
-  cpu/CpuBlockMatchingKernel.h
-  cpu/CpuConvolutionKernel.h
-  cpu/CpuOptimiseKernel.h
-  cpu/CpuResampleImageKernel.h DESTINATION include
-)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_kernels")
 #-----------------------------------------------------------------------------
 ## BUILD THE ALADIN LIBRARY
@@ -223,22 +160,14 @@ install(TARGETS _reg_aladin
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES
-  _reg_aladin.h
-  _reg_aladin_sym.h
-  cpu/_reg_macros.h DESTINATION include
-)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 ## BUILD THE F3D LIBRARY
 set(_reg_f3d_files
   _reg_base.cpp
-  _reg_base.h
   _reg_f3d.cpp
-  _reg_f3d.h
   _reg_f3d2.cpp
-  _reg_f3d2.h
 )
 set(_reg_f3d_libraries
   _reg_blockMatching
@@ -260,23 +189,15 @@ install(TARGETS _reg_f3d
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES
-  _reg_base.h
-  _reg_f3d.h
-  _reg_f3d2.h
-  _reg_f3d_sym.h
-  cpu/_reg_optimiser.cpp
-  cpu/_reg_optimiser.h DESTINATION include
-)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 # BUILD THE TPS LIBRARY
 #set(NAME _reg_thinPlateSpline)
 #if(APPLE)
-#	add_library(${NAME} SHARED cpu/${NAME}.h cpu/${NAME}.cpp)
+#	add_library(${NAME} SHARED cpu/${NAME}.cpp)
 #else(APPLE)
-#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.h cpu/${NAME}.cpp)
+#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.cpp)
 #endif(APPLE)
 #target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage)
 #install(TARGETS ${NAME}
@@ -284,15 +205,14 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d")
 #	LIBRARY DESTINATION lib
 #	ARCHIVE DESTINATION lib
 #	)
-#install(FILES cpu/${NAME}.h  DESTINATION include)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 ## BUILD THE POLYAFFINE LIBRARY
 #set(NAME _reg_polyAffine)
 #if(APPLE)
-#	add_library(${NAME} SHARED _reg_base.h _reg_base.cpp ${NAME}.h ${NAME}.cpp)
+#	add_library(${NAME} SHARED _reg_base.cpp ${NAME}.cpp)
 #else(APPLE)
-#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.h _reg_base.cpp ${NAME}.h ${NAME}.cpp)
+#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.cpp ${NAME}.cpp)
 #endif(APPLE)
 #target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage)
 #install(TARGETS ${NAME}
@@ -300,7 +220,6 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d")
 #	LIBRARY DESTINATION lib
 #	ARCHIVE DESTINATION lib
 #	)
-#install(FILES ${NAME}.h  DESTINATION include)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
\ No newline at end of file
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index 431aefb8..c3ed44ad 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -23,30 +23,14 @@ install(TARGETS ${NAME}
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
-install(FILES
-  ClCompute.h
-  ClContextSingleton.h
-  ClAladinContent.h
-  ClKernelFactory.h
-  ClAffineDeformationFieldKernel.h
-  ClBlockMatchingKernel.h
-  ClConvolutionKernel.h
-  ClOptimiseKernel.h
-  ClResampleImageKernel.h
-  resampleKernel.cl
-  affineDeformationKernel.cl
-  blockMatchingKernel.cl DESTINATION include/cl
-)
-#-----------------------------------------------------------------------------
 set(NAME _reg_openclinfo)
-add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp)
+add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ClContextSingleton.cpp)
 target_link_libraries(${NAME} ${OpenCL_LIBRARIES})
 install(TARGETS ${NAME}
   RUNTIME DESTINATION lib
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib
 )
-install(FILES ${NAME}.h DESTINATION include/cl)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
\ No newline at end of file
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 5f842fff..4f8d889e 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -48,14 +48,13 @@ else(NOT COMPILE_RESULT_VAR)
 endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
 set(NAME _reg_common_cuda)
-cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.h ${NAME}.cu)
+cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu)
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
     RUNTIME DESTINATION bin
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
 )
-install(FILES ${NAME}.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
@@ -90,19 +89,16 @@ install(TARGETS ${NAME}
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
 )
-install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaMeasure.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda)
-install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cudainfo)
-cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h)
+cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp)
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
 )
-install(FILES ${NAME}.h DESTINATION include/cuda)
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index aecfebd8..e08b18ac 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -114,9 +114,7 @@ set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
-  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain)
-  target_link_libraries(${EXEC} PRIVATE _reg_aladin)
-  target_link_libraries(${EXEC} PRIVATE _reg_f3d)
+  target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain _reg_aladin _reg_f3d)
   catch_discover_tests(${EXEC})
 endforeach(EXEC)
 #-----------------------------------------------------------------------------

From 7deef0b6761a4288c2e010012d96be37031ddc60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Mar 2023 14:22:15 +0000
Subject: [PATCH 115/314] Refactorisations

---
 niftyreg_build_version.txt            |  2 +-
 reg-lib/Compute.cpp                   | 14 ++++++++------
 reg-lib/cuda/CudaContextSingleton.cpp |  1 -
 reg-lib/cuda/CudaF3dContent.h         |  1 -
 reg-lib/cuda/_reg_common_cuda.cu      |  7 +++----
 reg-lib/cuda/_reg_common_cuda.h       |  3 ++-
 reg-lib/cuda/_reg_resampling_gpu.cu   | 14 +++++++-------
 reg-lib/cuda/_reg_resampling_gpu.h    |  1 -
 reg-lib/cuda/_reg_tools_gpu.cu        |  6 +++---
 reg-lib/cuda/_reg_tools_gpu.h         |  6 +++---
 10 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7b5813c6..f8c9d43a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-234
+235
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index d9578ee6..1d720b88 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -132,6 +132,7 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
 }
 /* *************************************************************** */
 double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
+    if (!optimiseX && !optimiseY && !optimiseZ) return 0;
     const nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     switch (transformationGradient->datatype) {
     case NIFTI_TYPE_FLOAT32:
@@ -143,13 +144,14 @@ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ)
 }
 /* *************************************************************** */
 void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
+    if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
     NiftiImage transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     const bool hasZ = transformationGradient->nz > 1;
-    if (!hasZ)
-        optimiseZ = false;
+    if (!hasZ) optimiseZ = false;
     NiftiImageData ptrX = transformationGradient.data(0);
     NiftiImageData ptrY = transformationGradient.data(1);
     NiftiImageData ptrZ = hasZ ? transformationGradient.data(2) : nullptr;
+    const double maxGradLenInv = 1.0 / maxGradLength;
 
 #ifdef _WIN32
     long i;
@@ -163,16 +165,16 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength)
+    shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLenInv)
 #endif
     for (i = 0; i < voxelsPerVolume; ++i) {
         const double valX = optimiseX ? static_cast<double>(ptrX[i]) : 0;
         const double valY = optimiseY ? static_cast<double>(ptrY[i]) : 0;
         const double valZ = optimiseZ ? static_cast<double>(ptrZ[i]) : 0;
-        ptrX[i] = valX / maxGradLength;
-        ptrY[i] = valY / maxGradLength;
+        ptrX[i] = valX * maxGradLenInv;
+        ptrY[i] = valY * maxGradLenInv;
         if (hasZ)
-            ptrZ[i] = valZ / maxGradLength;
+            ptrZ[i] = valZ * maxGradLenInv;
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp
index ec968e6d..fc61aa90 100644
--- a/reg-lib/cuda/CudaContextSingleton.cpp
+++ b/reg-lib/cuda/CudaContextSingleton.cpp
@@ -1,6 +1,5 @@
 #include "CudaContextSingleton.h"
 #include "_reg_common_cuda.h"
-#include "_reg_blocksize_gpu.h"
 
 /* *************************************************************** */
 CudaContextSingleton::CudaContextSingleton() {
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index 770a501c..0b6dc363 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -2,7 +2,6 @@
 
 #include "F3dContent.h"
 #include "CudaContent.h"
-#include "_reg_blocksize_gpu.h"
 
 class CudaF3dContent: public F3dContent, public CudaContent {
 public:
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index a401e995..4272a821 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -10,7 +10,6 @@
  */
 
 #include "_reg_common_cuda.h"
-#include "_reg_blocksize_gpu.h"
 
 /* *************************************************************** */
 template <class NiftiType>
@@ -628,7 +627,7 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
     delete texObj;
 }
 /* *************************************************************** */
-UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
+UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
                                                       cudaResourceType resType,
                                                       bool normalizedCoordinates,
                                                       size_t size,
@@ -640,7 +639,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
     resDesc.resType = resType;
     switch (resType) {
     case cudaResourceTypeLinear:
-        resDesc.res.linear.devPtr = devPtr;
+        resDesc.res.linear.devPtr = const_cast<void*>(devPtr);
         resDesc.res.linear.desc.f = channelFormat;
         resDesc.res.linear.desc.x = 32;
         if (channelCount > 1)
@@ -652,7 +651,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
         resDesc.res.linear.sizeInBytes = size;
         break;
     case cudaResourceTypeArray:
-        resDesc.res.array.array = static_cast<cudaArray*>(devPtr);
+        resDesc.res.array.array = static_cast<cudaArray*>(const_cast<void*>(devPtr));
         break;
     default:
         reg_print_fct_error("reg_createTextureObject");
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 93e31d75..7dd1c1c1 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -12,6 +12,7 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include "_reg_tools.h"
+#include "_reg_blocksize_gpu.h"
 
 /* *************************************************************** */
 #ifndef __VECTOR_TYPES_H__
@@ -141,7 +142,7 @@ int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned
 using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
 extern "C++"
-UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr,
+UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
 													  cudaResourceType resType,
 													  bool normalizedCoordinates = false,
 													  size_t size = 0,
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index bb86b9cd..f2cb0578 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -26,19 +26,19 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    // Create texture object for the floating image
+    // Create the texture object for the floating image
     auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
 
-    // Create texture object for the deformation field
+    // Create the texture object for the deformation field
     auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
                                                                     false, activeVoxelNumber * sizeof(float4),
                                                                     cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
-    // Create texture object for the mask
+    // Create the texture object for the mask
     auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int),
                                                         cudaChannelFormatKindSigned, 1, cudaFilterModePoint);
 
-    // Bind the real to voxel matrix to texture
+    // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;
     if (floatingImage->sform_code > 0)
         floatingMatrix = floatingImage->sto_ijk;
@@ -70,15 +70,15 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
-    // Create texture object for the floating image
+    // Create the texture object for the floating image
     auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true);
 
-    // Create texture object for the deformation field
+    // Create the texture object for the deformation field
     auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
                                                                     false, activeVoxelNumber * sizeof(float4),
                                                                     cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
-    // Bind the real to voxel matrix to texture
+    // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;
     if (floatingImage->sform_code > 0)
         floatingMatrix = floatingImage->sto_ijk;
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 4dcf81fe..7fcfe95f 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -13,7 +13,6 @@
 #pragma once
 
 #include "_reg_common_cuda.h"
-#include "_reg_blocksize_gpu.h"
 
 extern "C++"
 void reg_resampleImage_gpu(nifti_image *sourceImage,
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 0c2c511a..efaceec3 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -330,19 +330,19 @@ void reg_fillMaskArray_gpu(int num, int *array1_d)
     NR_CUDA_CHECK_KERNEL(G,B)
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *array_d,int size)
+float reg_sumReduction_gpu(float *array_d,size_t size)
 {
     thrust::device_ptr<float> dptr(array_d);
     return thrust::reduce(dptr,dptr+size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *array_d,int size)
+float reg_maxReduction_gpu(float *array_d,size_t size)
 {
     thrust::device_ptr<float> dptr(array_d);
     return thrust::reduce(dptr, dptr+size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *array_d,int size)
+float reg_minReduction_gpu(float *array_d,size_t size)
 {
     thrust::device_ptr<float> dptr(array_d);
     return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum<float>());
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 97d454c2..12374e63 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -58,11 +58,11 @@ extern "C++"
 void reg_fillMaskArray_gpu(int num, int *array1_d);
 /* *************************************************************** */
 extern "C++"
-float reg_sumReduction_gpu(float *array_d, int size);
+float reg_sumReduction_gpu(float *array_d, size_t size);
 /* *************************************************************** */
 extern "C++"
-float reg_maxReduction_gpu(float *array_d, int size);
+float reg_maxReduction_gpu(float *array_d, size_t size);
 /* *************************************************************** */
 extern "C++"
-float reg_minReduction_gpu(float *array_d, int size);
+float reg_minReduction_gpu(float *array_d, size_t size);
 /* *************************************************************** */

From 26d195bd472fe65e080a44303f097166c4a73f87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Mar 2023 14:36:39 +0000
Subject: [PATCH 116/314] Add NormaliseGradient() for CUDA and refactor
 GetMaximalLength()

---
 niftyreg_build_version.txt             |  2 +-
 reg-lib/cuda/CMakeLists.txt            |  1 +
 reg-lib/cuda/CudaCompute.cpp           |  9 ++--
 reg-lib/cuda/NormaliseGradient.cu      | 74 ++++++++++++++++++++++++++
 reg-lib/cuda/NormaliseGradient.hpp     | 38 +++++++++++++
 reg-lib/cuda/_reg_blocksize_gpu.cu     |  6 +--
 reg-lib/cuda/_reg_blocksize_gpu.h      |  2 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu     | 26 ---------
 reg-lib/cuda/_reg_optimiser_gpu.h      |  6 ---
 reg-lib/cuda/_reg_optimiser_kernels.cu | 10 ----
 10 files changed, 123 insertions(+), 51 deletions(-)
 create mode 100644 reg-lib/cuda/NormaliseGradient.cu
 create mode 100644 reg-lib/cuda/NormaliseGradient.hpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f8c9d43a..f1f094b1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-235
+236
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 4f8d889e..f9197bdc 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -82,6 +82,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
+    NormaliseGradient.cu
 )
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
 install(TARGETS ${NAME}
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index e00aad90..47aaaf29 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -3,6 +3,7 @@
 #include "_reg_resampling_gpu.h"
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_optimiser_gpu.h"
+#include "NormaliseGradient.hpp"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int inter, float paddingValue) {
@@ -116,17 +117,17 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
 }
 /* *************************************************************** */
 double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
-    // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ
+    if (!optimiseX && !optimiseY && !optimiseZ) return 0;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
-    return reg_getMaximalLength_gpu(con.GetTransformationGradientCuda(), voxelsPerVolume);
+    return NiftyReg::Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ
+    if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
-    reg_multiplyValue_gpu(voxelsPerVolume, con.GetTransformationGradientCuda(), float(1 / maxGradLength));
+    NiftyReg::Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast<float>(maxGradLength), optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/NormaliseGradient.cu
new file mode 100644
index 00000000..1f44fbc7
--- /dev/null
+++ b/reg-lib/cuda/NormaliseGradient.cu
@@ -0,0 +1,74 @@
+#include "NormaliseGradient.hpp"
+#include "_reg_tools_gpu.h"
+
+/* *************************************************************** */
+__global__ static void GetMaximalLengthKernel(float *dists,
+                                              cudaTextureObject_t imageTexture,
+                                              const size_t nVoxels,
+                                              const bool optimiseX,
+                                              const bool optimiseY,
+                                              const bool optimiseZ) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
+        float4 gradValue = tex1Dfetch<float4>(imageTexture, tid);
+        dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) +
+                           (optimiseY ? gradValue.y * gradValue.y : 0) +
+                           (optimiseZ ? gradValue.z * gradValue.z : 0));
+    }
+}
+/* *************************************************************** */
+float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
+                                       const size_t& nVoxels,
+                                       const bool& optimiseX,
+                                       const bool& optimiseY,
+                                       const bool& optimiseZ) {
+    // Create a texture object for the imageCuda
+    auto&& imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                         cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+
+    float *dists = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
+
+    const unsigned int block = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength);
+    const unsigned int grid = static_cast<unsigned int>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(block))));
+    dim3 B1(block, 1, 1);
+    dim3 G1(grid, grid, 1);
+    GetMaximalLengthKernel<<<G1, B1>>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+
+    const float maxDistance = reg_maxReduction_gpu(dists, nVoxels);
+    NR_CUDA_SAFE_CALL(cudaFree(dists));
+
+    return maxDistance;
+}
+/* *************************************************************** */
+__global__ static void NormaliseGradientKernel(float4 *imageCuda,
+                                               const size_t nVoxels,
+                                               const float maxGradLenInv,
+                                               const bool optimiseX,
+                                               const bool optimiseY,
+                                               const bool optimiseZ) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
+        float4 grad = imageCuda[tid];
+        imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0,
+                                     optimiseY ? grad.y * maxGradLenInv : 0,
+                                     optimiseZ ? grad.z * maxGradLenInv : 0,
+                                     grad.w);
+    }
+}
+/* *************************************************************** */
+void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
+                                       const size_t& nVoxels,
+                                       const float& maxGradLength,
+                                       const bool& optimiseX,
+                                       const bool& optimiseY,
+                                       const bool& optimiseZ) {
+    const unsigned int block = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic);
+    const unsigned int grid = static_cast<unsigned int>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(block))));
+    const dim3 G(grid, grid, 1);
+    const dim3 B(block, 1, 1);
+    NormaliseGradientKernel<<<G, B>>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
+    NR_CUDA_CHECK_KERNEL(G, B);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/NormaliseGradient.hpp b/reg-lib/cuda/NormaliseGradient.hpp
new file mode 100644
index 00000000..7b7c8ce8
--- /dev/null
+++ b/reg-lib/cuda/NormaliseGradient.hpp
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "_reg_common_cuda.h"
+
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+/**
+ * @brief Get maximal value of the gradient image
+ * @param imageCuda Cuda device pointer to the gradient image
+ * @param nVoxels Number of voxels in the image
+ * @param optimiseX Flag to indicate if the x component of the gradient is optimised
+ * @param optimiseY Flag to indicate if the y component of the gradient is optimised
+ * @param optimiseZ Flag to indicate if the z component of the gradient is optimised
+ * @return The maximal value of the gradient image
+*/
+float GetMaximalLength(const float4 *imageCuda,
+                       const size_t& nVoxels,
+                       const bool& optimiseX,
+                       const bool& optimiseY,
+                       const bool& optimiseZ);
+/* *************************************************************** */
+/**
+ * @brief Normalise the gradient image
+ * @param imageCuda Cuda device pointer to the gradient image
+ * @param nVoxels Number of voxels in the image
+ * @param maxGradLength The maximal value of the gradient image
+ * @param optimiseX Flag to indicate if the x component of the gradient is optimised
+ * @param optimiseY Flag to indicate if the y component of the gradient is optimised
+ * @param optimiseZ Flag to indicate if the z component of the gradient is optimised
+*/
+void NormaliseGradient(float4 *imageCuda,
+                       const size_t& nVoxels,
+                       const float& maxGradLength,
+                       const bool& optimiseX,
+                       const bool& optimiseY,
+                       const bool& optimiseZ);
+/* *************************************************************** */
+}   // namespace NiftyReg::Cuda
\ No newline at end of file
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu
index cea4c212..32be98ec 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.cu
+++ b/reg-lib/cuda/_reg_blocksize_gpu.cu
@@ -60,7 +60,7 @@ NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() {
     Block_reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
     Block_reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem
     Block_reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem
-    Block_reg_getEuclideanDistance = 384; // 04 reg - 24 smem
+    Block_GetMaximalLength = 384; // 04 reg - 24 smem
     Block_reg_updateControlPointPosition = 384; // 08 reg - 24 smem
     /* _reg_ssd_gpu */
     Block_reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem
@@ -128,7 +128,7 @@ NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() {
 //    Block_reg_initialiseConjugateGradient = ; //
 //    Block_reg_GetConjugateGradient1 = ; //
 //    Block_reg_GetConjugateGradient2 = ; //
-//    Block_reg_getEuclideanDistance = ; //
+//    Block_GetMaximalLength = ; //
 //    Block_reg_updateControlPointPosition = ; //
 //    /* _reg_ssd_gpu */
 //    Block_reg_getSquaredDifference = ; //
@@ -196,7 +196,7 @@ NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() {
     Block_reg_initialiseConjugateGradient = 1024; // 20 reg
     Block_reg_GetConjugateGradient1 = 1024; // 22 reg
     Block_reg_GetConjugateGradient2 = 1024; // 25 reg
-    Block_reg_getEuclideanDistance = 1024; // 20 reg
+    Block_GetMaximalLength = 1024; // 20 reg
     Block_reg_updateControlPointPosition = 1024; // 22 reg
     /* _reg_ssd_gpu */
     Block_reg_getSquaredDifference = 768; // 34 reg
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
index 99782acc..5f341078 100755
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ b/reg-lib/cuda/_reg_blocksize_gpu.h
@@ -70,7 +70,7 @@ class NiftyReg_CudaBlock100 {
     size_t Block_reg_initialiseConjugateGradient;
     size_t Block_reg_GetConjugateGradient1;
     size_t Block_reg_GetConjugateGradient2;
-    size_t Block_reg_getEuclideanDistance;
+    size_t Block_GetMaximalLength;
     size_t Block_reg_updateControlPointPosition;
     /* _reg_ssd_gpu */
     size_t Block_reg_getSquaredDifference;
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index ef369a52..5e4161bb 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -265,32 +265,6 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
-
-    // Copy constant memory value and bind texture
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
-
-    float *dist_d = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d, nodeNumber * sizeof(float)));
-
-    const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_getEuclideanDistance));
-    dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance, 1, 1);
-    dim3 G1(Grid_reg_getEuclideanDistance, Grid_reg_getEuclideanDistance, 1);
-    reg_getEuclideanDistance_kernel <<< G1, B1 >>> (dist_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    // Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-
-    float maxDistance = reg_maxReduction_gpu(dist_d, nodeNumber);
-    NR_CUDA_SAFE_CALL(cudaFree(dist_d));
-
-    return maxDistance;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
                                         float4 *controlPointImageArray_d,
                                         float4 *bestControlPointPosition_d,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 44659e65..41b9082a 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -103,12 +103,6 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
                                   float4 *conjugateH_d,
                                   int nodeNumber);
 
-/** @brief
- */
-extern "C++"
-float reg_getMaximalLength_gpu(float4 *gradientArray_d,
-                               int nodeNumber);
-
 /** @brief
  */
 extern "C++"
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index fdabd803..27c00ea8 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -54,16 +54,6 @@ __global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d,
     }
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-__global__ void reg_getEuclideanDistance_kernel(float *distance_d)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid < c_NodeNumber){
-
-        float4 gradValue = tex1Dfetch(gradientImageTexture,tid);
-        distance_d[tid] = sqrtf(gradValue.x*gradValue.x + gradValue.y*gradValue.y + gradValue.z*gradValue.z);
-    }
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageArray_d)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;

From 516b5ab66490cbf425706f620fc9016560ec27f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Mar 2023 14:40:15 +0000
Subject: [PATCH 117/314] Extend tests for *Compute::GetMaximalLength() and
 *Compute::NormaliseGradient() to handle optimise* parameters

---
 niftyreg_build_version.txt              |  2 +-
 reg-test/reg_test_normaliseGradient.cpp | 61 ++++++++++++++-----------
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f1f094b1..997def45 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-236
+237
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 4a8572d9..5326af4c 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -17,9 +17,8 @@
 class NormaliseGradientTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<TestData, unique_ptr<F3dContent>, unique_ptr<Platform>>;
+    using TestCase = std::tuple<TestData, unique_ptr<F3dContent>, unique_ptr<Platform>, bool, bool, bool>;
 
-    vector<TestData> testData;
     vector<TestCase> testCases;
 
 public:
@@ -61,6 +60,7 @@ class NormaliseGradientTest {
             transGrad2dPtr[i] = distr(gen);
 
         // Add the test data
+        vector<TestData> testData;
         testData.emplace_back(TestData(
             "2D",
             std::move(reference2d),
@@ -83,27 +83,35 @@ class NormaliseGradientTest {
             std::move(transformationGradient3d)
         ));
 
-        // Add platforms to the test data
+        // Add platforms and optimise* to the test data
         for (auto&& testData : testData) {
-            auto&& [testName, reference, controlPointGrid, testGrad] = testData;
-
             for (auto&& platformType : PlatformTypes) {
-                unique_ptr<Platform> platform{ new Platform(platformType) };
-                // Add content
-                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                testCases.push_back({ testData, std::move(content), std::move(platform) });
+                for (int optimiseX = 0; optimiseX < 2; optimiseX++) {
+                    for (int optimiseY = 0; optimiseY < 2; optimiseY++) {
+                        for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
+                            // Make a copy of the test data
+                            auto td = testData;
+                            auto&& [testName, reference, controlPointGrid, testGrad] = td;
+                            // Add content
+                            unique_ptr<Platform> platform{ new Platform(platformType) };
+                            unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                            testCases.push_back({ std::move(td), std::move(content), std::move(platform), optimiseX, optimiseY, optimiseZ });
+                        }
+                    }
+                }
             }
         }
     }
 
     template<typename T>
     T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+        if (!optimiseX && !optimiseY && !optimiseZ) return 0;
         const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         const T *ptrX = static_cast<T*>(transformationGradient->data);
         const T *ptrY = &ptrX[voxelsPerVolume];
         const T *ptrZ = &ptrY[voxelsPerVolume];
-        T maxGradValue = 0;
+        T maxGradLength = 0;
 
         if (transformationGradient->nz > 1) {
             for (size_t i = 0; i < voxelsPerVolume; i++) {
@@ -114,7 +122,7 @@ class NormaliseGradientTest {
                     valY = *ptrY++;
                 if (optimiseZ)
                     valZ = *ptrZ++;
-                maxGradValue = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradValue);
+                maxGradLength = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradLength);
             }
         } else {
             for (size_t i = 0; i < voxelsPerVolume; i++) {
@@ -123,15 +131,16 @@ class NormaliseGradientTest {
                     valX = *ptrX++;
                 if (optimiseY)
                     valY = *ptrY++;
-                maxGradValue = std::max(sqrt(valX * valX + valY * valY), maxGradValue);
+                maxGradLength = std::max(sqrt(valX * valX + valY * valY), maxGradLength);
             }
         }
 
-        return maxGradValue;
+        return maxGradLength;
     }
 
     template<typename T>
-    void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradValue, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+    void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+        if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
         const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         T *ptrX = static_cast<T*>(transformationGradient->data);
         T *ptrY = &ptrX[voxelsPerVolume];
@@ -145,9 +154,9 @@ class NormaliseGradientTest {
                     valY = ptrY[i];
                 if (optimiseZ)
                     valZ = ptrZ[i];
-                ptrX[i] = valX / maxGradValue;
-                ptrY[i] = valY / maxGradValue;
-                ptrZ[i] = valZ / maxGradValue;
+                ptrX[i] = valX / maxGradLength;
+                ptrY[i] = valY / maxGradLength;
+                ptrZ[i] = valZ / maxGradLength;
             }
         } else {
             for (size_t i = 0; i < voxelsPerVolume; ++i) {
@@ -156,8 +165,8 @@ class NormaliseGradientTest {
                     valX = ptrX[i];
                 if (optimiseY)
                     valY = ptrY[i];
-                ptrX[i] = valX / maxGradValue;
-                ptrY[i] = valY / maxGradValue;
+                ptrX[i] = valX / maxGradLength;
+                ptrY[i] = valY / maxGradLength;
             }
         }
     }
@@ -167,10 +176,10 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [testData, content, platform] = testCase;
+        auto&& [testData, content, platform, optimiseX, optimiseY, optimiseZ] = testCase;
         auto&& [testName, reference, controlPointGrid, testGrad] = testData;
 
-        SECTION(testName + " " + platform->GetName()) {
+        SECTION(testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ")) {
             // Set the transformation gradient image to host the computation
             NiftiImage transGrad = content->GetTransformationGradient();
             transGrad.copyData(testGrad);
@@ -182,14 +191,14 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
 
             // Calculate the maximal length
             unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-            const auto maxLength = static_cast<float>(compute->GetMaximalLength(true, true, true));
-            const auto testLength = GetMaximalLength<float>(testGrad, true, true, true);
+            const auto maxLength = static_cast<float>(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ));
+            const auto testLength = GetMaximalLength<float>(testGrad, optimiseX, optimiseY, optimiseZ);
             // Check the results
             REQUIRE(fabs(maxLength - testLength) < EPS);
 
             // Normalise the gradient
-            compute->NormaliseGradient(maxLength, true, true, true);
-            NormaliseGradient<float>(testGrad, testLength, true, true, true);
+            compute->NormaliseGradient(maxLength, optimiseX, optimiseY, optimiseZ);
+            NormaliseGradient<float>(testGrad, testLength, optimiseX, optimiseY, optimiseZ);
 
             // Check the results
             transGrad = content->GetTransformationGradient();

From 72b1874ef087b52ef5868c1c1028df411cc8d269 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Mar 2023 15:18:11 +0000
Subject: [PATCH 118/314] Move platform and measure initialisation into
 reg_base::SetPlatformType()

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/_reg_base.cpp      | 7 +------
 reg-lib/_reg_base.h        | 9 +++++----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 997def45..1cf253f9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-237
+238
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index f684dc38..2949bddd 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -15,8 +15,7 @@
 /* *************************************************************** */
 template<class T>
 reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
-    platformType = PlatformType::Cpu;
-    gpuIdx = 999;
+    SetPlatformType(PlatformType::Cpu);
 
     maxIterationNumber = 150;
     optimiseX = true;
@@ -488,10 +487,6 @@ template<class T>
 void reg_base<T>::Initialise() {
     if (initialised) return;
 
-    platform.reset(new Platform(platformType));
-    platform->SetGpuIdx(gpuIdx);
-    measure.reset(platform->CreateMeasure());
-
     CheckParameters();
 
     // CREATE THE PYRAMID IMAGES
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 42645fb4..bed799bf 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -35,8 +35,6 @@ class reg_base: public InterfaceOptimiser {
 protected:
     // Platform
     unique_ptr<Platform> platform;
-    PlatformType platformType;
-    unsigned gpuIdx;
 
     // Content
     unique_ptr<Content> con;
@@ -145,8 +143,11 @@ class reg_base: public InterfaceOptimiser {
     virtual bool GetSymmetricStatus() { return false; }
 
     // Platform
-    virtual void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; }
-    virtual void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; }
+    virtual void SetPlatformType(const PlatformType& platformType) {
+        platform.reset(new Platform(platformType));
+        measure.reset(platform->CreateMeasure());
+    }
+    virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); }
 
     // Optimisation-related functions
     virtual void SetMaximalIterationNumber(unsigned int);

From b58b3c85988943ce8a48792e0706c9d529cca497 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Mar 2023 14:08:44 +0100
Subject: [PATCH 119/314] Refactor NR_CUDA_SAFE_CALL() and
 NR_CUDA_CHECK_KERNEL()

---
 niftyreg_build_version.txt                   |   2 +-
 reg-lib/cuda/_reg_common_cuda.h              |  75 ++--
 reg-lib/cuda/_reg_localTransformation_gpu.cu | 351 +++++++++----------
 reg-lib/cuda/_reg_tools_gpu.cu               | 127 ++++---
 reg-lib/cuda/affineDeformationKernel.cu      |   2 +-
 reg-lib/cuda/resampleKernel.cu               |   2 +-
 6 files changed, 273 insertions(+), 286 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1cf253f9..b4249c47 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-238
+239
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 7dd1c1c1..e19d54bc 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -22,52 +22,43 @@ struct __attribute__((aligned(4))) float4 {
 };
 #endif
 /* *************************************************************** */
+namespace NiftyReg::Cuda::Internal {
+/* *************************************************************** */
+inline void SafeCall(const char *file, const int& line) {
 #if CUDART_VERSION >= 3200
-#   define NR_CUDA_SAFE_CALL(call) { \
-		call; \
-		cudaError err = cudaPeekAtLastError(); \
-		if( cudaSuccess != err) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			reg_exit(); \
-		} \
-	}
-#   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaDeviceSynchronize(); \
-		cudaError err = cudaPeekAtLastError(); \
-		if( err != cudaSuccess) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \
-			grid.x,grid.y,grid.z,block.x,block.y,block.z); \
-			reg_exit(); \
-		} \
-		else{\
-			printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", \
-			  cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);\
-		}\
+	cudaError_t err = cudaPeekAtLastError();
+#else
+	cudaError_t err = cudaDeviceSynchronize();
+#endif
+	if (err != cudaSuccess) {
+		fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
+		reg_exit();
 	}
-#else //CUDART_VERSION >= 3200
-#   define NR_CUDA_SAFE_CALL(call) { \
-		call; \
-		cudaError err = cudaDeviceSynchronize(); \
-		if( cudaSuccess != err) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			reg_exit(); \
-		} \
+}
+/* *************************************************************** */
+inline void CheckKernel(const char *file, const int& line, const dim3& grid, const dim3& block) {
+#if CUDART_VERSION >= 3200
+	cudaDeviceSynchronize();
+	cudaError_t err = cudaPeekAtLastError();
+#else
+	cudaError_t err = cudaDeviceSynchronize();
+#endif
+	if (err != cudaSuccess) {
+		fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
+		fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", grid.x, grid.y, grid.z, block.x, block.y, block.z);
+		reg_exit();
 	}
-#   define NR_CUDA_CHECK_KERNEL(grid,block) { \
-		cudaError err = cudaDeviceSynchronize(); \
-		if( err != cudaSuccess) { \
-			fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \
-			__FILE__, __LINE__, cudaGetErrorString(err)); \
-			fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \
-			grid.x,grid.y,grid.z,block.x,block.y,block.z); \
-			reg_exit(); \
-		} \
+#ifndef NDEBUG
+	else {
+		printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
+			cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);
 	}
-#endif //CUDART_VERSION >= 3200
+#endif
+}
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda::Internal
+#define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__); }
+#define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, grid, block)
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 1d6a3e0f..180b7438 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -37,15 +37,15 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 		controlPointImage->dy / reference->dy,
 		controlPointImage->dz / reference->dz);
 
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)));
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)));
 
 	if(reference->nz>1){
 		const unsigned int Grid_reg_spline_getDeformationField3D =
@@ -55,7 +55,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 		// 8 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField3D
 				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		const unsigned int Grid_reg_spline_getDeformationField2D =
@@ -65,12 +65,11 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 		// 4 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField2D
 				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture))
-	return;
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -83,64 +82,64 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
 
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
 
 	// First compute all the second derivatives
 	float4 *secondDerivativeValues_d;
 	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem));
 		const unsigned int Grid_bspline_getApproxSecondDerivatives =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1);
 		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem))
-				const unsigned int Grid_bspline_getApproxSecondDerivatives =
-					(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D)));
+		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem));
+		const unsigned int Grid_bspline_getApproxSecondDerivatives =
+			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1);
 		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 
 	// Compute the bending energy from the second derivatives
 	float *penaltyTerm_d;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float)))
+	NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float)));
 
 	if(controlPointImage->nz>1){
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
-										  6*controlPointGridMem))
+										  6*controlPointGridMem));
 		const unsigned int Grid_reg_spline_ApproxBendingEnergy =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D)));
 		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D,1,1);
 		reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2)
+		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
 	else{
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
-										  3*controlPointGridMem))
+										  3*controlPointGridMem));
 		const unsigned int Grid_reg_spline_ApproxBendingEnergy =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D)));
 		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D,1,1);
 		reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2)
+		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture))
-	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
+	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
 
 	// Compute the mean bending energy value
 	double penaltyValue=reg_sumReduction_gpu(penaltyTerm_d,controlPointNumber);
-	NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d))
+	NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d));
 
 	return (float)(penaltyValue/(double)controlPointImage->nvox);
 }
@@ -158,61 +157,59 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
 
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
 
 	// First compute all the second derivatives
 	float4 *secondDerivativeValues_d;
 	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4)));
 		const unsigned int Grid_bspline_getApproxSecondDerivatives =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1);
 		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4)));
 		const unsigned int Grid_bspline_getApproxSecondDerivatives =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1);
 		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 
 	// Compute the gradient
 	bendingEnergyWeight *= 1.f / (float)controlPointNumber;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float)));
 	if(controlPointImage->nz>1){
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
-										  6*controlPointNumber*sizeof(float4)))
+										  6*controlPointNumber*sizeof(float4)));
 		const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1);
 		reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2)
+		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
 	else{
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
-										  3*controlPointNumber*sizeof(float4)))
+										  3*controlPointNumber*sizeof(float4)));
 		const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
 		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1);
 		reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2)
+		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture))
-	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d))
-
-	return;
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
+	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -230,21 +227,21 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz);
 	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz);
 	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
 
 	// Bind some variables
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
 	const int controlPointGridMem = controlPointNumber*sizeof(float4);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
 
 	// The Jacobian matrix is computed for every control point
 	if(controlPointImage->nz>1){
@@ -253,7 +250,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1);
 		reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		const unsigned int Grid_reg_spline_getApproxJacobianValues2D =
@@ -261,9 +258,9 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1);
 		reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
 void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
@@ -281,11 +278,11 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz);
 	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz);
 	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
 
 	// Bind some variables
 	const int voxelNumber = CalcVoxelNumber(*referenceImage);
@@ -297,13 +294,13 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 			controlPointImage->dx / referenceImage->dx,
 			controlPointImage->dy / referenceImage->dy,
 			controlPointImage->dz / referenceImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)));
 
 	// The Jacobian matrix is computed for every voxel
 	if(controlPointImage->nz>1){
@@ -315,7 +312,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 		reg_spline_getJacobianValues3D_kernel
 				<<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>>
 				(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		const unsigned int Grid_reg_spline_getJacobianValues2D =
@@ -325,9 +322,9 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 		reg_spline_getJacobianValues2D_kernel
 				<<< G1, B1>>>
 				(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -350,13 +347,13 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 		if(controlPointImage->nz>1){
 			jacSum *= controlPointImage->nz-2;
 			// Allocate array for 3x3 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
+			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
 		}
 		else{
 			// Allocate array for 2x2 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
+			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
 		}
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 											   controlPointImageArray_d,
 											   jacobianMatrices_d,
@@ -367,32 +364,32 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 		jacSum=jacNumber;
 		if(controlPointImage->nz>1){
 			// Allocate array for 3x3 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
+			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
 		}
 		else{
 			// Allocate array for 2x2 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
+			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
 		}
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										 referenceImage,
 										 controlPointImageArray_d,
 										 jacobianMatrices_d,
 										 jacobianDet_d);
 	}
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d))
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
 
 	// The Jacobian determinant are squared and logged (might not be english but will do)
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)));
 	const unsigned int Grid_reg_spline_logSquaredValues =
 		(unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues)));
 	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
 	dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1);
 	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 	// Perform the reduction
 	double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d,jacNumber);
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d))
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
 	return penaltyTermValue/jacSum;
 }
 /* *************************************************************** */
@@ -414,8 +411,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		jacNumber=CalcVoxelNumber(*controlPointImage);
 		if(controlPointImage->nz>1)
 			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 												controlPointImageArray_d,
 												jacobianMatrices_d,
@@ -425,8 +422,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		jacNumber=CalcVoxelNumber(*referenceImage);
 		if(controlPointImage->nz>1)
 			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)))
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										  referenceImage,
 										  controlPointImageArray_d,
@@ -440,32 +437,32 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk);
 	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk);
 	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
 
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d,
-									  jacNumber*sizeof(float)))
+									  jacNumber*sizeof(float)));
 	if(controlPointImage->nz>1)
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
 										  9*jacNumber*sizeof(float)))
 	else NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
-										   4*jacNumber*sizeof(float)))
+										   4*jacNumber*sizeof(float)));
 
 	// Bind some variables
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
 	float3 weight=make_float3(
 				referenceImage->dx*jacobianWeight / ((float)jacNumber*controlPointImage->dx),
 				referenceImage->dy*jacobianWeight / ((float)jacNumber*controlPointImage->dy),
 				referenceImage->dz*jacobianWeight / ((float)jacNumber*controlPointImage->dz));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3)));
 	if(approx){
 		if(controlPointImage->nz>1){
 			const unsigned int Grid_reg_spline_computeApproxJacGradient3D =
@@ -473,7 +470,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1);
 			reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1)
+			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 		else{
 			const unsigned int Grid_reg_spline_computeApproxJacGradient2D =
@@ -481,7 +478,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1);
 			reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1)
+			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 	}
 	else{
@@ -491,16 +488,16 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 				controlPointImage->dx / referenceImage->dx,
 				controlPointImage->dy / referenceImage->dy,
 				controlPointImage->dz / referenceImage->dz);
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
 		if(controlPointImage->nz>1){
 			const unsigned int Grid_reg_spline_computeJacGradient3D =
 				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D)));
 			dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1);
 			reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1)
+			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 		else{
 			const unsigned int Grid_reg_spline_computeJacGradient2D =
@@ -508,13 +505,13 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 			dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1);
 			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1);
 			reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1)
+			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture))
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d))
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
 }
 /* *************************************************************** */
 double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
@@ -533,8 +530,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	if(approx){
 		jacNumber=CalcVoxelNumber(*controlPointImage);
 		jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2);
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeApproxJacobianValues(controlPointImage,
 												controlPointImageArray_d,
 												jacobianMatrices_d,
@@ -542,8 +539,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	}
 	else{
 		jacSum=jacNumber=CalcVoxelNumber(*referenceImage);
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)))
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
+		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
 		reg_spline_ComputeJacobianValues(controlPointImage,
 										  referenceImage,
 										  controlPointImageArray_d,
@@ -552,63 +549,63 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	}
 
 	// Check if the Jacobian determinant average
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)));
 	float *jacobianDet2_d;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float)))
-	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice))
+	NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float)));
+	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice));
 	const unsigned int Grid_reg_spline_logSquaredValues =
 		(unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues)));
 	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
 	dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1);
 	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 	float *jacobianDet_h;
-	NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float)))
+	NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float)));
 	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h,jacobianDet2_d,
 								 jacNumber*sizeof(float),
-								 cudaMemcpyDeviceToHost))
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d))
+								 cudaMemcpyDeviceToHost));
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d));
 	double penaltyTermValue=0.;
 	for(int i=0;i<jacNumber;++i) penaltyTermValue += jacobianDet_h[i];
-	NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h))
+	NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h));
 	penaltyTermValue /= jacSum;
 	if(penaltyTermValue==penaltyTermValue){
-		NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d))
-		NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d))
+		NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+		NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
 		return penaltyTermValue;
 	}
 
-	// Need to desorient the Jacobian matrix using the header information - voxel to real conversion
+	// Need to disorient the Jacobian matrix using the header information - voxel to real conversion
 	mat33 reorientation;
 	if(controlPointImage->sform_code>0)
 		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk);
 	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk);
 	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
 
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d,
-									  jacNumber*sizeof(float)))
+									  jacNumber*sizeof(float)));
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
-									  9*jacNumber*sizeof(float)))
+									  9*jacNumber*sizeof(float)));
 
 	// Bind some variables
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
 	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
 	if(approx){
 		const unsigned int Grid_reg_spline_approxCorrectFolding =
 			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D)));
 		dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1);
 		reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		const int voxelNumber = CalcVoxelNumber(*referenceImage);
@@ -617,20 +614,20 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 				controlPointImage->dx / referenceImage->dx,
 				controlPointImage->dy / referenceImage->dy,
 				controlPointImage->dz / referenceImage->dz);
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)))
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
+		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
 		const unsigned int Grid_reg_spline_correctFolding =
 		(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D)));
 		dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1);
 		dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1);
 		reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture))
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d))
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
 	return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
@@ -644,24 +641,24 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr
 	mat44 temp_mat=image->qto_xyz;
 	if(image->sform_code>0) temp_mat=image->sto_xyz;
 	float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
 
 	const int voxelNumber = CalcVoxelNumber(*image);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
 
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
 
 	const unsigned int Grid_reg_getDeformationFromDisplacement =
 	(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement)));
 	dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1);
 	dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1);
 	reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -674,24 +671,24 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr
 	mat44 temp_mat=image->qto_xyz;
 	if(image->sform_code>0) temp_mat=image->sto_xyz;
 	float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
 
 	const int voxelNumber = CalcVoxelNumber(*image);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
 
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
 
 	const unsigned int Grid_reg_getDisplacementFromDeformation =
 		(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation)));
 	dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1);
 	dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1);
 	reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -704,12 +701,12 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 
 	// Create a mask array where no voxel are excluded
 	int *mask_gpu=nullptr;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int)));
 	reg_fillMaskArray_gpu(voxelNumber,mask_gpu);
 
 	// Define some variables for the deformation fields
 	float4 *tempDef_gpu=nullptr;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4)));
 
 	// The deformation field is computed
 	reg_spline_getDeformationField_gpu(cpp_h,
@@ -745,7 +742,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 	for(unsigned int i=0;i<squaringNumber;++i){
 
 		// The deformation field arrays are updated
-		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice))
+		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice));
 
 		// The deformation fields are composed
 		reg_defField_compose_gpu(def_h,
@@ -755,8 +752,8 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 								 voxelNumber);
 	}
 
-	NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu))
-	NR_CUDA_SAFE_CALL(cudaFree(mask_gpu))
+	NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu));
+	NR_CUDA_SAFE_CALL(cudaFree(mask_gpu));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -776,28 +773,28 @@ void reg_defField_compose_gpu(nifti_image *def,
 	if(def->sform_code>0) temp_mat=def->sto_ijk;
 	float4 temp;
 	temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
 
 	temp_mat=def->qto_xyz;
 	if(def->sform_code>0) temp_mat=def->sto_xyz;
 	temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4)));
 	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4)));
 
 	const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz);
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4)))
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4)));
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int)));
 
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
 
 	if(def->nz>1){
 		const unsigned int Grid_reg_defField_compose3D =
@@ -805,7 +802,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 		dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1);
 		dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1);
 		reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		const unsigned int Grid_reg_defField_compose2D =
@@ -813,11 +810,11 @@ void reg_defField_compose_gpu(nifti_image *def,
 		dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1);
 		dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1);
 		reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu);
-		NR_CUDA_CHECK_KERNEL(G1,B1)
+		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture))
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -831,31 +828,31 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
 	const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz);
 	const int voxelNumber = referenceDim.x*referenceDim.y*referenceDim.z;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3)))
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3)));
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3)));
 
 	mat33 reorientation;
 	if(deformationField->sform_code>0)
 		reorientation=reg_mat44_to_mat33(&deformationField->sto_xyz);
 	else reorientation=reg_mat44_to_mat33(&deformationField->qto_xyz);
 	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
 	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)))
+	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
 
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4)))
+	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4)));
 
 	const unsigned int Grid_reg_defField_getJacobianMatrix =
 		(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_getJacobianMatrix)));
 	dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1);
 	dim3 B1(NR_BLOCK->Block_reg_defField_getJacobianMatrix);
 	reg_defField_getJacobianMatrix3D_kernel<<<G1,B1>>>(*jacobianMatrices_gpu);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index efaceec3..21ccde5a 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -37,21 +37,21 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
 	// Ensure that Z=0 if 2D images
 	if(gridSize.z==1) voxelNodeRatio_h.z=0;
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float)));
 
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4)));
 
     const unsigned int Grid_reg_voxelCentric2NodeCentric = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_voxelCentric2NodeCentric));
     dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1);
 	dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1);
     reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1)
+	NR_CUDA_CHECK_KERNEL(G1,B1);
 
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
+	NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -63,17 +63,17 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
     const int nodeNumber = CalcVoxelNumber(*controlPointImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)));
 
-    float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4)))
+    float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4)));
     matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]);
     matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]);
     matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]);
     float4 *matrix_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4)))
-    NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice))
-    NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h))
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4)))
+    NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4)));
 
     const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace =
         (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace));
@@ -81,9 +81,9 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
     dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1);
 
     _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1)
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture))
-    NR_CUDA_SAFE_CALL(cudaFree(matrix_d))
+    NR_CUDA_CHECK_KERNEL(G1,B1);
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(matrix_d));
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -91,7 +91,6 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                                 float4 *imageArray_d,
                                 float sigma,
                                 bool smoothXYZ[8])
-
 {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
@@ -99,8 +98,8 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)));
 
     bool axisToSmooth[8];
     if(smoothXYZ==nullptr){
@@ -119,7 +118,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
             if(radius>0){
                 int kernelSize = 1+radius*2;
                 float *kernel_h;
-                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)))
+                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)));
                 float kernelSum=0;
                 for(int i=-radius; i<=radius; i++){
 					kernel_h[radius+i]=(float)(exp( -((float)i*(float)i)/(2.0*currentSigma*currentSigma)) /
@@ -131,15 +130,15 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
 					kernel_h[i] /= kernelSum;
 
                 float *kernel_d;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)))
-                NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice))
-                NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h))
+                NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice));
+                NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
 
                 float4 *smoothedImage;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4)))
+                NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4)));
 
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)))
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)))
+                NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)));
+                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
 
 				unsigned int Grid_reg_ApplyConvolutionWindow;
                 dim3 B,G;
@@ -150,7 +149,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                         B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B)
+                        NR_CUDA_CHECK_KERNEL(G,B);
                         break;
                     case 2:
                         Grid_reg_ApplyConvolutionWindow =
@@ -158,7 +157,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                         B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B)
+                        NR_CUDA_CHECK_KERNEL(G,B);
                         break;
                     case 3:
                         Grid_reg_ApplyConvolutionWindow =
@@ -166,14 +165,14 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                         B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B)
+                        NR_CUDA_CHECK_KERNEL(G,B);
                         break;
                 }
-                NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture))
-                NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
-                NR_CUDA_SAFE_CALL(cudaFree(kernel_d))
-                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
-                NR_CUDA_SAFE_CALL(cudaFree(smoothedImage))
+                NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
+                NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
+                NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice));
+                NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
             }
 		}
 	}
@@ -189,8 +188,8 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)));
 
 	for(int n=0; n<3; n++){
 		if(spacingVoxel[n]>0 && image->dim[n+1]>1){
@@ -198,7 +197,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 			int kernelSize = 1+radius*2;
 
             float *kernel_h;
-            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)))
+            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)));
 
 			float coeffSum=0;
 			for(int it=-radius; it<=radius; it++){
@@ -211,15 +210,15 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 			for(int it=0;it<kernelSize;it++) kernel_h[it] /= coeffSum;
 
             float *kernel_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)))
-            NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice))
-            NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h))
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)))
+            NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice));
+            NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
+            NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)));
 
             float4 *smoothedImage_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d,voxelNumber*sizeof(float4)))
+            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d,voxelNumber*sizeof(float4)));
 
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)))
+            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
 
             unsigned int Grid_reg_ApplyConvolutionWindow;
             dim3 B,G;
@@ -230,7 +229,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
                     B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B)
+                    NR_CUDA_CHECK_KERNEL(G,B);
                     break;
                 case 1:
                     Grid_reg_ApplyConvolutionWindow =
@@ -238,7 +237,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
                     B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B)
+                    NR_CUDA_CHECK_KERNEL(G,B);
                     break;
                 case 2:
                     Grid_reg_ApplyConvolutionWindow =
@@ -246,14 +245,14 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
                     B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B)
+                    NR_CUDA_CHECK_KERNEL(G,B);
                     break;
             }
-            NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture))
-            NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture))
-            NR_CUDA_SAFE_CALL(cudaFree(kernel_d))
-            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice))
-            NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d))
+            NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
+            NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
+            NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice));
+            NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d));
         }
     }
 }
@@ -263,14 +262,14 @@ void reg_multiplyValue_gpu(int num, float4 *array_d, float value)
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
 
     const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
     reg_multiplyValue_kernel_float4<<<G,B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G,B)
+    NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_addValue_gpu(int num, float4 *array_d, float value)
@@ -278,14 +277,14 @@ void reg_addValue_gpu(int num, float4 *array_d, float value)
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
 
     const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
     reg_addValue_kernel_float4<<<G,B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G,B)
+    NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
@@ -293,13 +292,13 @@ void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
     const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
     reg_multiplyArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
-    NR_CUDA_CHECK_KERNEL(G,B)
+    NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
@@ -307,13 +306,13 @@ void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
     const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
     reg_addArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
-    NR_CUDA_CHECK_KERNEL(G,B)
+    NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int num, int *array1_d)
@@ -321,13 +320,13 @@ void reg_fillMaskArray_gpu(int num, int *array1_d)
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)))
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
     const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
     dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1);
     dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
     reg_fillMaskArray_kernel<<<G,B>>>(array1_d);
-    NR_CUDA_CHECK_KERNEL(G,B)
+    NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 float reg_sumReduction_gpu(float *array_d,size_t size)
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index eb0d74c1..8f86fa90 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -91,7 +91,7 @@ void launchAffine(mat44 *affineTransformation,
    affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose);
 
 #ifndef NDEBUG
-   NR_CUDA_CHECK_KERNEL(G1_b, B1_b)
+   NR_CUDA_CHECK_KERNEL(G1_b, B1_b);
 #else
    NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index be78998d..ef4f0e07 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -438,7 +438,7 @@ void launchResample(nifti_image *floatingImage,
 																interp);
 	 }
 #ifndef NDEBUG
-	NR_CUDA_CHECK_KERNEL(mygrid, myblocks)
+	NR_CUDA_CHECK_KERNEL(mygrid, myblocks);
 #else
 	NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif

From 84afbe430b3bef6bffae937229496b04492fc55c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 28 Mar 2023 15:44:59 +0100
Subject: [PATCH 120/314] Refactorisations

---
 niftyreg_build_version.txt              |   2 +-
 reg-lib/Compute.cpp                     |  38 +--
 reg-lib/Compute.h                       |   2 +-
 reg-lib/_reg_f3d.cpp                    |   9 +-
 reg-lib/_reg_f3d2.cpp                   |   4 +-
 reg-lib/cpu/_reg_optimiser.cpp          | 347 +++++++++++-------------
 reg-lib/cpu/_reg_optimiser.h            | 102 +++----
 reg-lib/cuda/CudaCompute.cpp            |  14 +-
 reg-lib/cuda/CudaCompute.h              |   2 +-
 reg-lib/cuda/NormaliseGradient.cu       |  28 +-
 reg-lib/cuda/_reg_common_cuda.cu        |  12 +-
 reg-lib/cuda/_reg_common_cuda.h         |  12 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu      | 170 +++++-------
 reg-lib/cuda/_reg_optimiser_gpu.h       |  63 ++---
 reg-lib/cuda/_reg_resampling_gpu.cu     |  20 +-
 reg-test/reg_test_normaliseGradient.cpp |  23 +-
 16 files changed, 401 insertions(+), 447 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b4249c47..eb08bc0b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-239
+240
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 1d720b88..e211b885 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -88,34 +88,40 @@ void Compute::GetDeformationField(bool composition, bool bspline) {
                                    bspline);
 }
 /* *************************************************************** */
-void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).GetControlPointGrid();
+void Compute::UpdateControlPointPosition(float *currentDof,
+                                         const float *bestDof,
+                                         const float *gradient,
+                                         const float& scale,
+                                         const bool& optimiseX,
+                                         const bool& optimiseY,
+                                         const bool& optimiseZ) {
+    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).GetControlPointGrid();
     if (optimiseX && optimiseY && optimiseZ) {
         // Update the values for all axis displacement
         for (size_t i = 0; i < controlPointGrid->nvox; ++i)
-            currentDOF[i] = bestDOF[i] + scale * gradient[i];
+            currentDof[i] = bestDof[i] + scale * gradient[i];
     } else {
-        size_t voxNumber = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2);
+        const size_t nVoxelsPerDim = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2);
         // Update the values for the x-axis displacement
         if (optimiseX) {
-            for (size_t i = 0; i < voxNumber; ++i)
-                currentDOF[i] = bestDOF[i] + scale * gradient[i];
+            for (size_t i = 0; i < nVoxelsPerDim; ++i)
+                currentDof[i] = bestDof[i] + scale * gradient[i];
         }
         // Update the values for the y-axis displacement
         if (optimiseY) {
-            float *currentDOFY = &currentDOF[voxNumber];
-            float *bestDOFY = &bestDOF[voxNumber];
-            float *gradientY = &gradient[voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i)
-                currentDOFY[i] = bestDOFY[i] + scale * gradientY[i];
+            float *currentDofY = &currentDof[nVoxelsPerDim];
+            const float *bestDofY = &bestDof[nVoxelsPerDim];
+            const float *gradientY = &gradient[nVoxelsPerDim];
+            for (size_t i = 0; i < nVoxelsPerDim; ++i)
+                currentDofY[i] = bestDofY[i] + scale * gradientY[i];
         }
         // Update the values for the z-axis displacement
         if (optimiseZ && controlPointGrid->nz > 1) {
-            float *currentDOFZ = &currentDOF[2 * voxNumber];
-            float *bestDOFZ = &bestDOF[2 * voxNumber];
-            float *gradientZ = &gradient[2 * voxNumber];
-            for (size_t i = 0; i < voxNumber; ++i)
-                currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i];
+            float *currentDofZ = &currentDof[2 * nVoxelsPerDim];
+            const float *bestDofZ = &bestDof[2 * nVoxelsPerDim];
+            const float *gradientZ = &gradient[2 * nVoxelsPerDim];
+            for (size_t i = 0; i < nVoxelsPerDim; ++i)
+                currentDofZ[i] = bestDofZ[i] + scale * gradientZ[i];
         }
     }
 }
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 0390004b..efa43bf4 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -19,7 +19,7 @@ class Compute {
     virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating);
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight);
     virtual void GetDeformationField(bool composition, bool bspline);
-    virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
+    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 6991cfd0..83d95d02 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -167,8 +167,6 @@ void reg_f3d<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::Initialise() {
-    if (this->initialised) return;
-
     reg_base<T>::Initialise();
 
     // Determine the grid spacing and create the grid
@@ -331,7 +329,6 @@ void reg_f3d<T>::Initialise() {
     }
 #endif
 
-    this->initialised = true;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::Initialise");
 #endif
@@ -366,7 +363,7 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     }
     if (type > 0) {
         if (value != value) {
-            this->optimiser->RestoreBestDOF();
+            this->optimiser->RestoreBestDof();
             reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
             reg_print_msg_warn("The folding correction scheme failed");
         } else {
@@ -581,8 +578,8 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::UpdateParameters(float scale) {
-    this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(),
-                                              this->optimiser->GetBestDOF(),
+    this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDof(),
+                                              this->optimiser->GetBestDof(),
                                               this->optimiser->GetGradient(),
                                               scale,
                                               this->optimiseX,
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index b13ec33d..f1a6823b 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -219,7 +219,7 @@ double reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     }
     if (type > 0 && it > 0) {
         if (backwardPenaltyTerm != backwardPenaltyTerm) {
-            this->optimiser->RestoreBestDOF();
+            this->optimiser->RestoreBestDof();
 #ifndef NDEBUG
             reg_print_fct_warn("reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm()");
             reg_print_msg_warn("The backward transformation folding correction scheme failed");
@@ -772,7 +772,7 @@ void reg_f3d2<T>::ExponentiateGradient() {
 template <class T>
 void reg_f3d2<T>::UpdateParameters(float scale) {
     // Restore the last successful control point grids
-    this->optimiser->RestoreBestDOF();
+    this->optimiser->RestoreBestDof();
 
     // The scaled gradient image is added to the current estimate of the transformation using
     // a simple addition or by computing the BCH update
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index f04f64a5..5b1a759c 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -9,23 +9,23 @@
 template <class T>
 reg_optimiser<T>::reg_optimiser() {
     this->dofNumber = 0;
-    this->dofNumber_b = 0;
+    this->dofNumberBw = 0;
     this->ndim = 3;
     this->optimiseX = true;
     this->optimiseY = true;
     this->optimiseZ = true;
-    this->currentDOF = nullptr;
-    this->currentDOF_b = nullptr;
-    this->bestDOF = nullptr;
-    this->bestDOF_b = nullptr;
-    this->backward = false;
+    this->currentDof = nullptr;
+    this->currentDofBw = nullptr;
+    this->bestDof = nullptr;
+    this->bestDofBw = nullptr;
+    this->isBackwards = false;
     this->gradient = nullptr;
     this->currentIterationNumber = 0;
     this->currentObjFunctionValue = 0;
     this->maxIterationNumber = 0;
     this->bestObjFunctionValue = 0;
-    this->objFunc = nullptr;
-    this->gradient_b = nullptr;
+    this->intOpt = nullptr;
+    this->gradientBw = nullptr;
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_optimiser<T>::reg_optimiser() called");
@@ -34,12 +34,14 @@ reg_optimiser<T>::reg_optimiser() {
 /* *************************************************************** */
 template <class T>
 reg_optimiser<T>::~reg_optimiser() {
-    if (this->bestDOF != nullptr)
-        free(this->bestDOF);
-    this->bestDOF = nullptr;
-    if (this->bestDOF_b != nullptr)
-        free(this->bestDOF_b);
-    this->bestDOF_b = nullptr;
+    if (this->bestDof) {
+        free(this->bestDof);
+        this->bestDof = nullptr;
+    }
+    if (this->bestDofBw) {
+        free(this->bestDofBw);
+        this->bestDofBw = nullptr;
+    }
 #ifndef NDEBUG
     reg_print_msg_debug("reg_optimiser<T>::~reg_optimiser() called");
 #endif
@@ -47,46 +49,46 @@ reg_optimiser<T>::~reg_optimiser() {
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Initialise(size_t nvox,
-                                  int dim,
+                                  int ndim,
                                   bool optX,
                                   bool optY,
                                   bool optZ,
-                                  size_t maxit,
-                                  size_t start,
-                                  InterfaceOptimiser *obj,
+                                  size_t maxIt,
+                                  size_t startIt,
+                                  InterfaceOptimiser *intOpt,
                                   T *cppData,
                                   T *gradData,
-                                  size_t nvox_b,
-                                  T *cppData_b,
-                                  T *gradData_b) {
+                                  size_t nvoxBw,
+                                  T *cppDataBw,
+                                  T *gradDataBw) {
     this->dofNumber = nvox;
-    this->ndim = dim;
+    this->ndim = ndim;
     this->optimiseX = optX;
     this->optimiseY = optY;
     this->optimiseZ = optZ;
-    this->maxIterationNumber = maxit;
-    this->currentIterationNumber = start;
-    this->currentDOF = cppData;
-    if (this->bestDOF != nullptr) free(this->bestDOF);
-    this->bestDOF = (T*)malloc(this->dofNumber * sizeof(T));
-    memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T));
-    if (gradData != nullptr)
+    this->maxIterationNumber = maxIt;
+    this->currentIterationNumber = startIt;
+    this->currentDof = cppData;
+    if (this->bestDof) free(this->bestDof);
+    this->bestDof = (T*)malloc(this->dofNumber * sizeof(T));
+    memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
+    if (gradData)
         this->gradient = gradData;
 
-    if (nvox_b > 0)
-        this->dofNumber_b = nvox_b;
-    if (cppData_b != nullptr) {
-        this->currentDOF_b = cppData_b;
-        this->backward = true;
-        if (this->bestDOF_b != nullptr) free(this->bestDOF_b);
-        this->bestDOF_b = (T*)malloc(this->dofNumber_b * sizeof(T));
-        memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T));
+    if (nvoxBw > 0)
+        this->dofNumberBw = nvoxBw;
+    if (cppDataBw) {
+        this->currentDofBw = cppDataBw;
+        this->isBackwards = true;
+        if (this->bestDofBw) free(this->bestDofBw);
+        this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T));
+        memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T));
     }
-    if (gradData_b != nullptr)
-        this->gradient_b = gradData_b;
+    if (gradDataBw)
+        this->gradientBw = gradDataBw;
 
-    this->objFunc = obj;
-    this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
+    this->intOpt = intOpt;
+    this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_optimiser<T>::Initialise called");
@@ -94,46 +96,44 @@ void reg_optimiser<T>::Initialise(size_t nvox,
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::RestoreBestDOF() {
+void reg_optimiser<T>::RestoreBestDof() {
     // restore forward transformation
-    memcpy(this->currentDOF, this->bestDOF, this->dofNumber * sizeof(T));
+    memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T));
     // restore backward transformation if required
-    if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0)
-        memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T));
+    if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0)
+        memcpy(this->currentDofBw, this->bestDofBw, this->dofNumberBw * sizeof(T));
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::StoreCurrentDOF() {
+void reg_optimiser<T>::StoreCurrentDof() {
     // save forward transformation
-    memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T));
+    memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
     // save backward transformation if required
-    if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0)
-        memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T));
+    if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0)
+        memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T));
 }
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Perturbation(float length) {
     // initialise the randomiser
-    srand(time(nullptr));
+    srand((unsigned)time(nullptr));
     // Reset the number of iteration
     this->currentIterationNumber = 0;
     // Create some perturbation for degree of freedom
     for (size_t i = 0; i < this->dofNumber; ++i) {
-        this->currentDOF[i] = this->bestDOF[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f);
+        this->currentDof[i] = this->bestDof[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f);
     }
-    if (this->backward) {
-        for (size_t i = 0; i < this->dofNumber_b; ++i) {
-            this->currentDOF_b[i] = this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f;
+    if (this->isBackwards) {
+        for (size_t i = 0; i < this->dofNumberBw; ++i) {
+            this->currentDofBw[i] = this->bestDofBw[i] + length * (float)(rand() % 2001 - 1000) / 1000.f;
         }
     }
-    this->StoreCurrentDOF();
-    this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
+    this->StoreCurrentDof();
+    this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Optimise(T maxLength,
-                                T smallLength,
-                                T &startLength) {
+void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T &startLength) {
     size_t lineIteration = 0;
     float addedLength = 0;
     float currentLength = startLength;
@@ -146,10 +146,10 @@ void reg_optimiser<T>::Optimise(T maxLength,
         // Compute the gradient normalisation value
         float normValue = -currentLength;
 
-        this->objFunc->UpdateParameters(normValue);
+        this->intOpt->UpdateParameters(normValue);
 
         // Compute the new value
-        this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
+        this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
         // Check if the update lead to an improvement of the objective function
         if (this->currentObjFunctionValue > this->bestObjFunctionValue) {
@@ -162,7 +162,7 @@ void reg_optimiser<T>::Optimise(T maxLength,
             reg_print_msg_debug(text);
 #endif
             // Improvement - Save the new objective function value
-            this->objFunc->UpdateBestObjFunctionValue();
+            this->intOpt->UpdateBestObjFunctionValue();
             this->bestObjFunctionValue = this->currentObjFunctionValue;
             // Update the total added length
             addedLength += currentLength;
@@ -170,7 +170,7 @@ void reg_optimiser<T>::Optimise(T maxLength,
             currentLength *= 1.1f;
             currentLength = std::min(currentLength, static_cast<float>(maxLength));
             // Save the current deformation parametrisation
-            this->StoreCurrentDOF();
+            this->StoreCurrentDof();
         } else {
 #ifndef NDEBUG
             char text[255];
@@ -189,20 +189,20 @@ void reg_optimiser<T>::Optimise(T maxLength,
     // update the current size for the next iteration
     startLength = addedLength;
     // Restore the last best deformation parametrisation
-    this->RestoreBestDOF();
+    this->RestoreBestDof();
 }
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::reg_test_optimiser() {
-    this->objFunc->UpdateParameters(1.f);
+    this->intOpt->UpdateParameters(1.f);
 }
 /* *************************************************************** */
 template <class T>
 reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
     this->array1 = nullptr;
     this->array2 = nullptr;
-    this->array1_b = nullptr;
-    this->array2_b = nullptr;
+    this->array1Bw = nullptr;
+    this->array2Bw = nullptr;
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
@@ -211,21 +211,25 @@ reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimis
 /* *************************************************************** */
 template <class T>
 reg_conjugateGradient<T>::~reg_conjugateGradient() {
-    if (this->array1 != nullptr)
+    if (this->array1) {
         free(this->array1);
-    this->array1 = nullptr;
+        this->array1 = nullptr;
+    }
 
-    if (this->array2 != nullptr)
+    if (this->array2) {
         free(this->array2);
-    this->array2 = nullptr;
+        this->array2 = nullptr;
+    }
 
-    if (this->array1_b != nullptr)
-        free(this->array1_b);
-    this->array1_b = nullptr;
+    if (this->array1Bw) {
+        free(this->array1Bw);
+        this->array1Bw = nullptr;
+    }
 
-    if (this->array2_b != nullptr)
-        free(this->array2_b);
-    this->array2_b = nullptr;
+    if (this->array2Bw) {
+        free(this->array2Bw);
+        this->array2Bw = nullptr;
+    }
 
 #ifndef NDEBUG
     reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
@@ -234,42 +238,30 @@ reg_conjugateGradient<T>::~reg_conjugateGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_conjugateGradient<T>::Initialise(size_t nvox,
-                                          int dim,
+                                          int ndim,
                                           bool optX,
                                           bool optY,
                                           bool optZ,
-                                          size_t maxit,
-                                          size_t start,
-                                          InterfaceOptimiser *o,
+                                          size_t maxIt,
+                                          size_t startIt,
+                                          InterfaceOptimiser *intOpt,
                                           T *cppData,
                                           T *gradData,
-                                          size_t nvox_b,
-                                          T *cppData_b,
-                                          T *gradData_b) {
-    reg_optimiser<T>::Initialise(nvox,
-                                 dim,
-                                 optX,
-                                 optY,
-                                 optZ,
-                                 maxit,
-                                 start,
-                                 o,
-                                 cppData,
-                                 gradData,
-                                 nvox_b,
-                                 cppData_b,
-                                 gradData_b);
-    this->firstcall = true;
-    if (this->array1 != nullptr) free(this->array1);
-    if (this->array2 != nullptr) free(this->array2);
+                                          size_t nvoxBw,
+                                          T *cppDataBw,
+                                          T *gradDataBw) {
+    reg_optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
+    this->firstCall = true;
+    if (this->array1) free(this->array1);
+    if (this->array2) free(this->array2);
     this->array1 = (T*)malloc(this->dofNumber * sizeof(T));
     this->array2 = (T*)malloc(this->dofNumber * sizeof(T));
 
-    if (cppData_b != nullptr && gradData_b != nullptr && nvox_b > 0) {
-        if (this->array1_b != nullptr) free(this->array1_b);
-        if (this->array2_b != nullptr) free(this->array2_b);
-        this->array1_b = (T*)malloc(this->dofNumber_b * sizeof(T));
-        this->array2_b = (T*)malloc(this->dofNumber_b * sizeof(T));
+    if (cppDataBw && gradDataBw && nvoxBw > 0) {
+        if (this->array1Bw) free(this->array1Bw);
+        if (this->array2Bw) free(this->array2Bw);
+        this->array1Bw = (T*)malloc(this->dofNumberBw * sizeof(T));
+        this->array2Bw = (T*)malloc(this->dofNumberBw * sizeof(T));
     }
 
 #ifndef NDEBUG
@@ -282,45 +274,43 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #ifdef WIN32
     long i;
     long num = (long)this->dofNumber;
-    long num_b = (long)this->dofNumber_b;
+    long numBw = (long)this->dofNumberBw;
 #else
     size_t i;
     size_t num = (size_t)this->dofNumber;
-    size_t num_b = (size_t)this->dofNumber_b;
+    size_t numBw = (size_t)this->dofNumberBw;
 #endif
 
     T *gradientPtr = this->gradient;
     T *array1Ptr = this->array1;
     T *array2Ptr = this->array2;
 
-    T *gradientPtr_b = this->gradient_b;
-    T *array1Ptr_b = this->array1_b;
-    T *array2Ptr_b = this->array2_b;
+    T *gradientPtrBw = this->gradientBw;
+    T *array1PtrBw = this->array1Bw;
+    T *array2PtrBw = this->array2Bw;
 
-    if (this->firstcall) {
+    if (this->firstCall) {
 #ifndef NDEBUG
         reg_print_msg_debug("Conjugate gradient initialisation");
 #endif
         // first conjugate gradient iteration
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(num,array1Ptr,array2Ptr,gradientPtr) \
-    private(i)
+    shared(num,array1Ptr,array2Ptr,gradientPtr)
 #endif
         for (i = 0; i < num; i++) {
             array2Ptr[i] = array1Ptr[i] = -gradientPtr[i];
         }
-        if (this->dofNumber_b > 0) {
+        if (this->dofNumberBw > 0) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
-    private(i)
+    shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw)
 #endif
-            for (i = 0; i < num_b; i++) {
-                array2Ptr_b[i] = array1Ptr_b[i] = -gradientPtr_b[i];
+            for (i = 0; i < numBw; i++) {
+                array2PtrBw[i] = array1PtrBw[i] = -gradientPtrBw[i];
             }
         }
-        this->firstcall = false;
+        this->firstCall = false;
     } else {
 #ifndef NDEBUG
         reg_print_msg_debug("Conjugate gradient update");
@@ -329,7 +319,6 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr) \
-    private(i) \
     reduction(+:gg) \
     reduction(+:dgg)
 #endif
@@ -339,41 +328,38 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         }
         double gam = dgg / gg;
 
-        if (this->dofNumber_b > 0) {
-            double dgg_b = 0, gg_b = 0;
+        if (this->dofNumberBw > 0) {
+            double dggBw = 0, ggBw = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \
-    private(i) \
-    reduction(+:gg_b) \
-    reduction(+:dgg_b)
+    shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw) \
+    reduction(+:ggBw) \
+    reduction(+:dggBw)
 #endif
-            for (i = 0; i < num_b; i++) {
-                gg_b += array2Ptr_b[i] * array1Ptr_b[i];
-                dgg_b += (gradientPtr_b[i] + array1Ptr_b[i]) * gradientPtr_b[i];
+            for (i = 0; i < numBw; i++) {
+                ggBw += array2PtrBw[i] * array1PtrBw[i];
+                dggBw += (gradientPtrBw[i] + array1PtrBw[i]) * gradientPtrBw[i];
             }
-            gam = (dgg + dgg_b) / (gg + gg_b);
+            gam = (dgg + dggBw) / (gg + ggBw);
         }
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \
-    private(i)
+    shared(num,array1Ptr,array2Ptr,gradientPtr,gam)
 #endif
         for (i = 0; i < num; i++) {
             array1Ptr[i] = -gradientPtr[i];
             array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]);
             gradientPtr[i] = -array2Ptr[i];
         }
-        if (this->dofNumber_b > 0) {
+        if (this->dofNumberBw > 0) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \
-    private(i)
+    shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw,gam)
 #endif
-            for (i = 0; i < num_b; i++) {
-                array1Ptr_b[i] = -gradientPtr_b[i];
-                array2Ptr_b[i] = (array1Ptr_b[i] + gam * array2Ptr_b[i]);
-                gradientPtr_b[i] = -array2Ptr_b[i];
+            for (i = 0; i < numBw; i++) {
+                array1PtrBw[i] = -gradientPtrBw[i];
+                array2PtrBw[i] = (array1PtrBw[i] + gam * array2PtrBw[i]);
+                gradientPtrBw[i] = -array2PtrBw[i];
             }
         }
     }
@@ -392,7 +378,7 @@ void reg_conjugateGradient<T>::Optimise(T maxLength,
 template <class T>
 void reg_conjugateGradient<T>::Perturbation(float length) {
     reg_optimiser<T>::Perturbation(length);
-    this->firstcall = true;
+    this->firstCall = true;
 }
 /* *************************************************************** */
 template <class T>
@@ -405,78 +391,72 @@ template <class T>
 reg_lbfgs<T>::reg_lbfgs()
     :reg_optimiser<T>::reg_optimiser() {
     this->stepToKeep = 5;
-    this->oldDOF = nullptr;
+    this->oldDof = nullptr;
     this->oldGrad = nullptr;
-    this->diffDOF = nullptr;
+    this->diffDof = nullptr;
     this->diffGrad = nullptr;
 }
 /* *************************************************************** */
 template <class T>
 reg_lbfgs<T>::~reg_lbfgs() {
-    if (this->oldDOF != nullptr)
-        free(this->oldDOF);
-    this->oldDOF = nullptr;
-    if (this->oldGrad != nullptr)
+    if (this->oldDof) {
+        free(this->oldDof);
+        this->oldDof = nullptr;
+    }
+    if (this->oldGrad) {
         free(this->oldGrad);
-    this->oldGrad = nullptr;
+        this->oldGrad = nullptr;
+    }
     for (size_t i = 0; i < this->stepToKeep; ++i) {
-        if (this->diffDOF[i] != nullptr)
-            free(this->diffDOF[i]);
-        this->diffDOF[i] = nullptr;
-        if (this->diffGrad[i] != nullptr)
+        if (this->diffDof[i]) {
+            free(this->diffDof[i]);
+            this->diffDof[i] = nullptr;
+        }
+        if (this->diffGrad[i]) {
             free(this->diffGrad[i]);
-        this->diffGrad[i] = nullptr;
+            this->diffGrad[i] = nullptr;
+        }
+    }
+    if (this->diffDof) {
+        free(this->diffDof);
+        this->diffDof = nullptr;
     }
-    if (this->diffDOF != nullptr)
-        free(this->diffDOF);
-    this->diffDOF = nullptr;
-    if (this->diffGrad != nullptr)
+    if (this->diffGrad) {
         free(this->diffGrad);
-    this->diffGrad = nullptr;
+        this->diffGrad = nullptr;
+    }
 }
 /* *************************************************************** */
 template <class T>
 void reg_lbfgs<T>::Initialise(size_t nvox,
-                              int dim,
+                              int ndim,
                               bool optX,
                               bool optY,
                               bool optZ,
-                              size_t maxit,
-                              size_t start,
-                              InterfaceOptimiser *o,
+                              size_t maxIt,
+                              size_t startIt,
+                              InterfaceOptimiser *intOpt,
                               T *cppData,
                               T *gradData,
-                              size_t nvox_b,
-                              T *cppData_b,
-                              T *gradData_b) {
-    reg_optimiser<T>::Initialise(nvox,
-                                 dim,
-                                 optX,
-                                 optY,
-                                 optZ,
-                                 maxit,
-                                 start,
-                                 o,
-                                 cppData,
-                                 gradData,
-                                 nvox_b,
-                                 cppData_b,
-                                 gradData_b);
+                              size_t nvoxBw,
+                              T *cppDataBw,
+                              T *gradDataBw) {
+    reg_optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->stepToKeep = 5;
-    this->diffDOF = (T**)malloc(this->stepToKeep * sizeof(T*));
+    this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*));
     this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*));
     for (size_t i = 0; i < this->stepToKeep; ++i) {
-        this->diffDOF[i] = (T*)malloc(this->dofNumber * sizeof(T));
+        this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T));
         this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T));
-        if (this->diffDOF[i] == nullptr || this->diffGrad[i] == nullptr) {
+        if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) {
             reg_print_fct_error("reg_lbfgs<T>::Initialise");
             reg_print_msg_error("Out of memory");
             reg_exit();
         }
     }
-    this->oldDOF = (T*)malloc(this->dofNumber * sizeof(T));
+    this->oldDof = (T*)malloc(this->dofNumber * sizeof(T));
     this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T));
-    if (this->oldDOF == nullptr || this->oldGrad == nullptr) {
+    if (this->oldDof == nullptr || this->oldGrad == nullptr) {
         reg_print_fct_error("reg_lbfgs<T>::Initialise");
         reg_print_msg_error("Out of memory");
         reg_exit();
@@ -498,6 +478,3 @@ void reg_lbfgs<T>::Optimise(T maxLength,
                                startLength);
 }
 /* *************************************************************** */
-//template class reg_optimiser<float>;
-//template class reg_conjugateGradient<float>;
-//template class reg_lbfgs<float>;
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index d15b1365..ca6a89b0 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -29,16 +29,16 @@ class InterfaceOptimiser {
 template <class T>
 class reg_optimiser {
 protected:
-    bool backward;
+    bool isBackwards;
     size_t dofNumber;
-    size_t dofNumber_b;
+    size_t dofNumberBw;
     size_t ndim;
-    T *currentDOF; // pointer to the cpp nifti image array
-    T *currentDOF_b; // pointer to the cpp nifti image array (backward)
-    T *bestDOF;
-    T *bestDOF_b;
+    T *currentDof; // pointer to the cpp nifti image array
+    T *currentDofBw; // pointer to the cpp nifti image array (backwards)
+    T *bestDof;
+    T *bestDofBw;
     T *gradient;
-    T *gradient_b;
+    T *gradientBw;
     bool optimiseX;
     bool optimiseY;
     bool optimiseZ;
@@ -46,18 +46,18 @@ class reg_optimiser {
     size_t currentIterationNumber;
     double bestObjFunctionValue;
     double currentObjFunctionValue;
-    InterfaceOptimiser *objFunc;
+    InterfaceOptimiser *intOpt;
 
 public:
     reg_optimiser();
     virtual ~reg_optimiser();
-    virtual void StoreCurrentDOF();
-    virtual void RestoreBestDOF();
-    virtual size_t GetDOFNumber() {
+    virtual void StoreCurrentDof();
+    virtual void RestoreBestDof();
+    virtual size_t GetDofNumber() {
         return this->dofNumber;
     }
-    virtual size_t GetDOFNumber_b() {
-        return this->dofNumber_b;
+    virtual size_t GetDofNumberBw() {
+        return this->dofNumberBw;
     }
     virtual size_t GetNDim() {
         return this->ndim;
@@ -65,26 +65,26 @@ class reg_optimiser {
     virtual size_t GetVoxNumber() {
         return this->dofNumber / this->ndim;
     }
-    virtual size_t GetVoxNumber_b() {
-        return this->dofNumber_b / this->ndim;
+    virtual size_t GetVoxNumberBw() {
+        return this->dofNumberBw / this->ndim;
     }
-    virtual T* GetBestDOF() {
-        return this->bestDOF;
+    virtual T* GetBestDof() {
+        return this->bestDof;
     }
-    virtual T* GetBestDOF_b() {
-        return this->bestDOF_b;
+    virtual T* GetBestDofBw() {
+        return this->bestDofBw;
     }
-    virtual T* GetCurrentDOF() {
-        return this->currentDOF;
+    virtual T* GetCurrentDof() {
+        return this->currentDof;
     }
-    virtual T* GetCurrentDOF_b() {
-        return this->currentDOF_b;
+    virtual T* GetCurrentDofBw() {
+        return this->currentDofBw;
     }
     virtual T* GetGradient() {
         return this->gradient;
     }
-    virtual T* GetGradient_b() {
-        return this->gradient_b;
+    virtual T* GetGradientBw() {
+        return this->gradientBw;
     }
     virtual bool GetOptimiseX() {
         return this->optimiseX;
@@ -117,18 +117,18 @@ class reg_optimiser {
         this->currentIterationNumber++;
     }
     virtual void Initialise(size_t nvox,
-                            int dim,
+                            int ndim,
                             bool optX,
                             bool optY,
                             bool optZ,
-                            size_t maxit,
-                            size_t start,
-                            InterfaceOptimiser *o,
+                            size_t maxIt,
+                            size_t startIt,
+                            InterfaceOptimiser *intOpt,
                             T *cppData,
                             T *gradData = nullptr,
-                            size_t nvox_b = 0,
-                            T *cppData_b = nullptr,
-                            T *gradData_b = nullptr);
+                            size_t nvoxBw = 0,
+                            T *cppDataBw = nullptr,
+                            T *gradDataBw = nullptr);
     virtual void Optimise(T maxLength,
                           T smallLength,
                           T &startLength);
@@ -145,10 +145,10 @@ template <class T>
 class reg_conjugateGradient: public reg_optimiser<T> {
 protected:
     T *array1;
-    T *array1_b;
+    T *array1Bw;
     T *array2;
-    T *array2_b;
-    bool firstcall;
+    T *array2Bw;
+    bool firstCall;
 
     void UpdateGradientValues(); /// @brief Update the gradient array
 
@@ -156,18 +156,18 @@ class reg_conjugateGradient: public reg_optimiser<T> {
     reg_conjugateGradient();
     virtual ~reg_conjugateGradient();
     virtual void Initialise(size_t nvox,
-                            int dim,
+                            int ndim,
                             bool optX,
                             bool optY,
                             bool optZ,
-                            size_t maxit,
-                            size_t start,
-                            InterfaceOptimiser *o,
+                            size_t maxIt,
+                            size_t startIt,
+                            InterfaceOptimiser *intOpt,
                             T *cppData = nullptr,
                             T *gradData = nullptr,
-                            size_t nvox_b = 0,
-                            T *cppData_b = nullptr,
-                            T *gradData_b = nullptr) override;
+                            size_t nvoxBw = 0,
+                            T *cppDataBw = nullptr,
+                            T *gradDataBw = nullptr) override;
     virtual void Optimise(T maxLength,
                           T smallLength,
                           T &startLength) override;
@@ -184,27 +184,27 @@ template <class T>
 class reg_lbfgs: public reg_optimiser<T> {
 protected:
     size_t stepToKeep;
-    T *oldDOF;
+    T *oldDof;
     T *oldGrad;
-    T **diffDOF;
+    T **diffDof;
     T **diffGrad;
 
 public:
     reg_lbfgs();
     virtual ~reg_lbfgs();
     virtual void Initialise(size_t nvox,
-                            int dim,
+                            int ndim,
                             bool optX,
                             bool optY,
                             bool optZ,
-                            size_t maxit,
-                            size_t start,
-                            InterfaceOptimiser *o,
+                            size_t maxIt,
+                            size_t startIt,
+                            InterfaceOptimiser *intOpt,
                             T *cppData = nullptr,
                             T *gradData = nullptr,
-                            size_t nvox_b = 0,
-                            T *cppData_b = nullptr,
-                            T *gradData_b = nullptr) override;
+                            size_t nvoxBw = 0,
+                            T *cppDataBw = nullptr,
+                            T *gradDataBw = nullptr) override;
     virtual void Optimise(T maxLength,
                           T smallLength,
                           T &startLength) override;
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 47aaaf29..5284024c 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -96,12 +96,18 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) {
                                        bspline);
 }
 /* *************************************************************** */
-void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
+void CudaCompute::UpdateControlPointPosition(float *currentDof,
+                                             const float *bestDof,
+                                             const float *gradient,
+                                             const float& scale,
+                                             const bool& optimiseX,
+                                             const bool& optimiseY,
+                                             const bool& optimiseZ) {
     // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ
     reg_updateControlPointPosition_gpu(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(),
-                                       reinterpret_cast<float4*>(currentDOF),
-                                       reinterpret_cast<float4*>(bestDOF),
-                                       reinterpret_cast<float4*>(gradient),
+                                       reinterpret_cast<float4*>(currentDof),
+                                       reinterpret_cast<const float4*>(bestDof),
+                                       reinterpret_cast<const float4*>(gradient),
                                        scale);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 5f53b12e..fbde281d 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -17,7 +17,7 @@ class CudaCompute: public Compute {
     virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override;
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override;
     virtual void GetDeformationField(bool composition, bool bspline) override;
-    virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
+    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/NormaliseGradient.cu
index 1f44fbc7..4d5ed26f 100644
--- a/reg-lib/cuda/NormaliseGradient.cu
+++ b/reg-lib/cuda/NormaliseGradient.cu
@@ -23,18 +23,18 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
     // Create a texture object for the imageCuda
-    auto&& imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                         cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                       cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     float *dists = nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
 
-    const unsigned int block = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength);
-    const unsigned int grid = static_cast<unsigned int>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(block))));
-    dim3 B1(block, 1, 1);
-    dim3 G1(grid, grid, 1);
-    GetMaximalLengthKernel<<<G1, B1>>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
+    const unsigned int blocks = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength);
+    const unsigned int grids = static_cast<unsigned int>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(blocks))));
+    dim3 blockDims(blocks, 1, 1);
+    dim3 gridDims(grids, grids, 1);
+    GetMaximalLengthKernel<<<gridDims, blockDims>>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     const float maxDistance = reg_maxReduction_gpu(dists, nVoxels);
     NR_CUDA_SAFE_CALL(cudaFree(dists));
@@ -64,11 +64,11 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
                                        const bool& optimiseX,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
-    const unsigned int block = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic);
-    const unsigned int grid = static_cast<unsigned int>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(block))));
-    const dim3 G(grid, grid, 1);
-    const dim3 B(block, 1, 1);
-    NormaliseGradientKernel<<<G, B>>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
-    NR_CUDA_CHECK_KERNEL(G, B);
+    const unsigned int blocks = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic);
+    const unsigned int grids = static_cast<unsigned int>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(blocks))));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    NormaliseGradientKernel<<<gridDims, blockDims>>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 4272a821..5d2d10f5 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -628,12 +628,12 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
 }
 /* *************************************************************** */
 UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
-                                                      cudaResourceType resType,
-                                                      bool normalizedCoordinates,
-                                                      size_t size,
-                                                      cudaChannelFormatKind channelFormat,
-                                                      unsigned channelCount,
-                                                      cudaTextureFilterMode filterMode) {
+                                                      const cudaResourceType& resType,
+                                                      const bool& normalizedCoordinates,
+                                                      const size_t& size,
+                                                      const cudaChannelFormatKind& channelFormat,
+                                                      const unsigned& channelCount,
+                                                      const cudaTextureFilterMode& filterMode) {
     // Specify texture
     cudaResourceDesc resDesc{};
     resDesc.resType = resType;
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index e19d54bc..2eb0a944 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -134,10 +134,10 @@ using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cuda
 /* *************************************************************** */
 extern "C++"
 UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
-													  cudaResourceType resType,
-													  bool normalizedCoordinates = false,
-													  size_t size = 0,
-													  cudaChannelFormatKind channelFormat = cudaChannelFormatKindNone,
-													  unsigned channelCount = 1,
-													  cudaTextureFilterMode filterMode = cudaFilterModeLinear);
+													  const cudaResourceType& resType,
+													  const bool& normalizedCoordinates = false,
+													  const size_t& size = 0,
+													  const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone,
+													  const unsigned& channelCount = 1,
+													  const cudaTextureFilterMode& filterMode = cudaFilterModeLinear);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 5e4161bb..32d407e3 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -1,99 +1,92 @@
 #include "_reg_optimiser_gpu.h"
 #include "_reg_optimiser_kernels.cu"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
-    this->currentDOF_gpu = nullptr;
-    this->bestDOF_gpu = nullptr;
-    this->gradient_gpu = nullptr;
+    this->currentDofCuda = nullptr;
+    this->bestDofCuda = nullptr;
+    this->gradientCuda = nullptr;
 
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_optimiser_gpu::~reg_optimiser_gpu() {
-    if (this->bestDOF_gpu != nullptr) {
-        cudaCommon_free(this->bestDOF_gpu);
-        this->bestDOF_gpu = nullptr;
+    if (this->bestDofCuda) {
+        cudaCommon_free(this->bestDofCuda);
+        this->bestDofCuda = nullptr;
     }
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_optimiser_gpu::Initialise(size_t nvox,
-                                   int dim,
+                                   int ndim,
                                    bool optX,
                                    bool optY,
                                    bool optZ,
-                                   size_t maxit,
-                                   size_t start,
-                                   InterfaceOptimiser *obj,
+                                   size_t maxIt,
+                                   size_t startIt,
+                                   InterfaceOptimiser *intOpt,
                                    float *cppData,
                                    float *gradData,
-                                   size_t a,
-                                   float *b,
-                                   float *c) {
+                                   size_t nvoxBw,
+                                   float *cppDataBw,
+                                   float *gradDataBw) {
     this->dofNumber = nvox;
-    this->ndim = dim;
+    this->ndim = ndim;
     this->optimiseX = optX;
     this->optimiseY = optY;
     this->optimiseZ = optZ;
-    this->maxIterationNumber = maxit;
-    this->currentIterationNumber = start;
+    this->maxIterationNumber = maxIt;
+    this->currentIterationNumber = startIt;
 
     // Arrays are converted from float to float4
-    this->currentDOF_gpu = reinterpret_cast<float4*>(cppData);
+    this->currentDofCuda = reinterpret_cast<float4*>(cppData);
 
-    if (gradData != nullptr)
-        this->gradient_gpu = reinterpret_cast<float4*>(gradData);
+    if (gradData)
+        this->gradientCuda = reinterpret_cast<float4*>(gradData);
 
-    if (this->bestDOF_gpu != nullptr)
-        cudaCommon_free(this->bestDOF_gpu);
+    if (this->bestDofCuda)
+        cudaCommon_free(this->bestDofCuda);
 
-    if (cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))) {
+    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)(this->GetVoxNumber()))) {
         printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n");
         reg_exit();
     }
 
-    this->StoreCurrentDOF();
+    this->StoreCurrentDof();
 
-    this->objFunc = obj;
-    this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue();
+    this->intOpt = intOpt;
+    this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_optimiser_gpu::RestoreBestDOF() {
+/* *************************************************************** */
+void reg_optimiser_gpu::RestoreBestDof() {
     // restore forward transformation
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDOF_gpu,
-                                 this->bestDOF_gpu,
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda,
+                                 this->bestDofCuda,
                                  this->GetVoxNumber() * sizeof(float4),
                                  cudaMemcpyDeviceToDevice));
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_optimiser_gpu::StoreCurrentDOF() {
+/* *************************************************************** */
+void reg_optimiser_gpu::StoreCurrentDof() {
     // Store forward transformation
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDOF_gpu,
-                                 this->currentDOF_gpu,
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda,
+                                 this->currentDofCuda,
                                  this->GetVoxNumber() * sizeof(float4),
                                  cudaMemcpyDeviceToDevice));
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_optimiser_gpu::Perturbation(float length) {
     // TODO: Implement reg_optimiser_gpu::Perturbation()
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() {
     this->array1 = nullptr;
     this->array2 = nullptr;
@@ -101,15 +94,14 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
-    if (this->array1 != nullptr) {
+    if (this->array1) {
         cudaCommon_free(this->array1);
         this->array1 = nullptr;
     }
 
-    if (this->array2 != nullptr) {
+    if (this->array2) {
         cudaCommon_free(this->array2);
         this->array2 = nullptr;
     }
@@ -117,62 +109,50 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_conjugateGradient_gpu::Initialise(size_t nvox,
-                                           int dim,
+                                           int ndim,
                                            bool optX,
                                            bool optY,
                                            bool optZ,
-                                           size_t maxit,
-                                           size_t start,
-                                           InterfaceOptimiser *obj,
+                                           size_t maxIt,
+                                           size_t startIt,
+                                           InterfaceOptimiser *intOpt,
                                            float *cppData,
                                            float *gradData,
-                                           size_t a,
-                                           float *b,
-                                           float *c) {
-    reg_optimiser_gpu::Initialise(nvox,
-                                  dim,
-                                  optX,
-                                  optY,
-                                  optZ,
-                                  maxit,
-                                  start,
-                                  obj,
-                                  cppData,
-                                  gradData);
-    this->firstcall = true;
+                                           size_t nvoxBw,
+                                           float *cppDataBw,
+                                           float *gradDataBw) {
+    reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData);
+    this->firstCall = true;
     if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, (int)(this->GetVoxNumber()))) {
-        printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n");
+        printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient array on the GPU.\n");
         reg_exit();
     }
     if (cudaCommon_allocateArrayToDevice<float4>(&this->array2, (int)(this->GetVoxNumber()))) {
-        printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n");
+        printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient array on the GPU.\n");
         reg_exit();
     }
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_conjugateGradient_gpu::UpdateGradientValues() {
-    if (this->firstcall) {
-        reg_initialiseConjugateGradient_gpu(this->gradient_gpu,
+    if (this->firstCall) {
+        reg_initialiseConjugateGradient_gpu(this->gradientCuda,
                                             this->array1,
                                             this->array2,
-                                            (int)(this->GetVoxNumber()));
-        this->firstcall = false;
+                                            this->GetVoxNumber());
+        this->firstCall = false;
     } else {
-        reg_GetConjugateGradient_gpu(this->gradient_gpu,
+        reg_GetConjugateGradient_gpu(this->gradientCuda,
                                      this->array1,
                                      this->array2,
-                                     (int)(this->GetVoxNumber()));
+                                     this->GetVoxNumber());
     }
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_conjugateGradient_gpu::Optimise(float maxLength,
                                          float smallLength,
                                          float &startLength) {
@@ -181,20 +161,17 @@ void reg_conjugateGradient_gpu::Optimise(float maxLength,
                             smallLength,
                             startLength);
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_conjugateGradient_gpu::Perturbation(float length) {
     reg_optimiser_gpu::Perturbation(length);
-    this->firstcall = true;
+    this->firstCall = true;
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_conjugateGradient_gpu::reg_test_optimiser() {
     this->UpdateGradientValues();
     reg_optimiser_gpu::reg_test_optimiser();
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
                                          float4 *conjugateG_d,
                                          float4 *conjugateH_d,
@@ -215,8 +192,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
     NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
                                   float4 *conjugateG_d,
                                   float4 *conjugateH_d,
@@ -263,13 +239,12 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
+/* *************************************************************** */
+void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage,
                                         float4 *controlPointImageArray_d,
-                                        float4 *bestControlPointPosition_d,
-                                        float4 *gradientArray_d,
-                                        float currentLength) {
+                                        const float4 *bestControlPointPosition_d,
+                                        const float4 *gradientArray_d,
+                                        const float& currentLength) {
     // Get the BlockSize - The values have been set in CudaContextSingleton
     NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
 
@@ -293,5 +268,4 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
     printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 41b9082a..bf94b64c 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -4,52 +4,50 @@
 #include "_reg_optimiser.h"
 #include "_reg_tools_gpu.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /** @class reg_optimiser_gpu
  * @brief Standard gradient ascent optimisation for GPU
  */
 class reg_optimiser_gpu: public reg_optimiser<float> {
 protected:
-    float4 *currentDOF_gpu; // pointers
-    float4 *gradient_gpu; // pointers
-    float4 *bestDOF_gpu; // allocated here
+    float4 *currentDofCuda; // pointers
+    float4 *gradientCuda; // pointers
+    float4 *bestDofCuda; // allocated here
 
 public:
     reg_optimiser_gpu();
     virtual ~reg_optimiser_gpu();
 
     // Float4 are casted to float for compatibility with the cpu class
-    virtual float* GetCurrentDOF() override {
-        return reinterpret_cast<float*>(this->currentDOF_gpu);
+    virtual float* GetCurrentDof() override {
+        return reinterpret_cast<float*>(this->currentDofCuda);
     }
-    virtual float* GetBestDOF() override {
-        return reinterpret_cast<float*>(this->bestDOF_gpu);
+    virtual float* GetBestDof() override {
+        return reinterpret_cast<float*>(this->bestDofCuda);
     }
     virtual float* GetGradient() override {
-        return reinterpret_cast<float*>(this->gradient_gpu);
+        return reinterpret_cast<float*>(this->gradientCuda);
     }
 
-    virtual void RestoreBestDOF() override;
-    virtual void StoreCurrentDOF() override;
+    virtual void RestoreBestDof() override;
+    virtual void StoreCurrentDof() override;
 
     virtual void Initialise(size_t nvox,
-                            int dim,
+                            int ndim,
                             bool optX,
                             bool optY,
                             bool optZ,
-                            size_t maxit,
+                            size_t maxIt,
                             size_t start,
-                            InterfaceOptimiser *o,
+                            InterfaceOptimiser *intOpt,
                             float *cppData,
                             float *gradData = nullptr,
-                            size_t a = 0,
-                            float *b = nullptr,
-                            float *c = nullptr) override;
+                            size_t nvoxBw = 0,
+                            float *cppDataBw = nullptr,
+                            float *gradDataBw = nullptr) override;
     virtual void Perturbation(float length) override;
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /** @class reg_conjugateGradient_gpu
  * @brief Conjugate gradient ascent optimisation for GPU
  */
@@ -57,7 +55,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 protected:
     float4 *array1;
     float4 *array2;
-    bool firstcall;
+    bool firstCall;
     void UpdateGradientValues(); /// @brief Update the gradient array
 
 public:
@@ -65,18 +63,18 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
     virtual ~reg_conjugateGradient_gpu();
 
     virtual void Initialise(size_t nvox,
-                            int dim,
+                            int ndim,
                             bool optX,
                             bool optY,
                             bool optZ,
-                            size_t maxit,
+                            size_t maxIt,
                             size_t start,
-                            InterfaceOptimiser *o,
+                            InterfaceOptimiser *intOpt,
                             float *cppData,
                             float *gradData = nullptr,
-                            size_t a = 0,
-                            float *b = nullptr,
-                            float *c = nullptr) override;
+                            size_t nvoxBw = 0,
+                            float *cppDataBw = nullptr,
+                            float *gradDataBw = nullptr) override;
     virtual void Optimise(float maxLength,
                           float smallLength,
                           float &startLength) override;
@@ -85,8 +83,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
     // Function used for testing
     virtual void reg_test_optimiser() override;
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /** @brief
  */
 extern "C++"
@@ -106,8 +103,8 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
 /** @brief
  */
 extern "C++"
-void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage,
+void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage,
                                         float4 *controlPointImageArray_d,
-                                        float4 *bestControlPointPosition_d,
-                                        float4 *gradientArray_d,
-                                        float currentLength);
+                                        const float4 *bestControlPointPosition_d,
+                                        const float4 *gradientArray_d,
+                                        const float& currentLength);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index f2cb0578..0559768b 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -27,16 +27,16 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
 
     // Create the texture object for the deformation field
-    auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
-                                                                    false, activeVoxelNumber * sizeof(float4),
-                                                                    cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+                                                                  false, activeVoxelNumber * sizeof(float4),
+                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     // Create the texture object for the mask
-    auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int),
-                                                        cudaChannelFormatKindSigned, 1, cudaFilterModePoint);
+    auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int),
+                                                      cudaChannelFormatKindSigned, 1, cudaFilterModePoint);
 
     // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;
@@ -71,12 +71,12 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true);
 
     // Create the texture object for the deformation field
-    auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
-                                                                    false, activeVoxelNumber * sizeof(float4),
-                                                                    cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+                                                                  false, activeVoxelNumber * sizeof(float4),
+                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 5326af4c..f0c83022 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -107,14 +107,14 @@ class NormaliseGradientTest {
     template<typename T>
     T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
         if (!optimiseX && !optimiseY && !optimiseZ) return 0;
-        const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
+        const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         const T *ptrX = static_cast<T*>(transformationGradient->data);
-        const T *ptrY = &ptrX[voxelsPerVolume];
-        const T *ptrZ = &ptrY[voxelsPerVolume];
+        const T *ptrY = &ptrX[nVoxelsPerVolume];
+        const T *ptrZ = &ptrY[nVoxelsPerVolume];
         T maxGradLength = 0;
 
         if (transformationGradient->nz > 1) {
-            for (size_t i = 0; i < voxelsPerVolume; i++) {
+            for (size_t i = 0; i < nVoxelsPerVolume; i++) {
                 T valX = 0, valY = 0, valZ = 0;
                 if (optimiseX)
                     valX = *ptrX++;
@@ -125,7 +125,7 @@ class NormaliseGradientTest {
                 maxGradLength = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradLength);
             }
         } else {
-            for (size_t i = 0; i < voxelsPerVolume; i++) {
+            for (size_t i = 0; i < nVoxelsPerVolume; i++) {
                 T valX = 0, valY = 0;
                 if (optimiseX)
                     valX = *ptrX++;
@@ -141,12 +141,12 @@ class NormaliseGradientTest {
     template<typename T>
     void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
         if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
-        const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
+        const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         T *ptrX = static_cast<T*>(transformationGradient->data);
-        T *ptrY = &ptrX[voxelsPerVolume];
-        T *ptrZ = &ptrY[voxelsPerVolume];
+        T *ptrY = &ptrX[nVoxelsPerVolume];
+        T *ptrZ = &ptrY[nVoxelsPerVolume];
         if (transformationGradient->nz > 1) {
-            for (size_t i = 0; i < voxelsPerVolume; ++i) {
+            for (size_t i = 0; i < nVoxelsPerVolume; ++i) {
                 T valX = 0, valY = 0, valZ = 0;
                 if (optimiseX)
                     valX = ptrX[i];
@@ -159,7 +159,7 @@ class NormaliseGradientTest {
                 ptrZ[i] = valZ / maxGradLength;
             }
         } else {
-            for (size_t i = 0; i < voxelsPerVolume; ++i) {
+            for (size_t i = 0; i < nVoxelsPerVolume; ++i) {
                 T valX = 0, valY = 0;
                 if (optimiseX)
                     valX = ptrX[i];
@@ -186,9 +186,6 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
             transGrad.disown();
             content->UpdateTransformationGradient();
 
-            // Get the number of voxels per volume
-            const auto voxelsPerVolume = testGrad.nVoxelsPerVolume();
-
             // Calculate the maximal length
             unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
             const auto maxLength = static_cast<float>(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ));

From a6401b971b1ef610ef5155822e3e589fa089ee84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 29 Mar 2023 15:28:48 +0100
Subject: [PATCH 121/314] Modernise CUDA conjugate gradient functions

- Ditch old texture objects and use up-to-date ones
- Make texture objects managed
- Ditch CUDA symbols and pass them as kernel function parameters
- Extend reg_updateControlPointPosition_gpu() to handle optimise* parameters
---
 niftyreg_build_version.txt             |   2 +-
 reg-lib/Compute.cpp                    |  10 +-
 reg-lib/cuda/CudaCompute.cpp           |   8 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu     | 163 +++++++++++--------------
 reg-lib/cuda/_reg_optimiser_gpu.h      |  34 +++---
 reg-lib/cuda/_reg_optimiser_kernels.cu | 123 ++++++++++---------
 6 files changed, 167 insertions(+), 173 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index eb08bc0b..f06fa6c9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-240
+241
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index e211b885..642ee316 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -243,7 +243,7 @@ void Compute::ConvolveImage(nifti_image *image) {
     const int kernelType = CUBIC_SPLINE_KERNEL;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
-    bool activeAxis[3] = {1, 0, 0};
+    bool activeAxis[3] = { 1, 0, 0 };
     reg_tools_kernelConvolution(image,
                                 currentNodeSpacing,
                                 kernelType,
@@ -392,11 +392,11 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) {
 
     // and subtracted (sum and negation)
     reg_tools_subtractImageFromImage(controlPointGridBw,  // displacement
-                                   warpedTrans,         // displacement
-                                   controlPointGridBw); // displacement output
+                                     warpedTrans,         // displacement
+                                     controlPointGridBw); // displacement output
     reg_tools_subtractImageFromImage(controlPointGrid,  // displacement
-                                   warpedTransBw,     // displacement
-                                   controlPointGrid); // displacement output
+                                     warpedTransBw,     // displacement
+                                     controlPointGrid); // displacement output
 
     // Divide by 2
     reg_tools_multiplyValueToImage(controlPointGridBw, // displacement
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 5284024c..58195be2 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -103,12 +103,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const bool& optimiseX,
                                              const bool& optimiseY,
                                              const bool& optimiseZ) {
-    // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ
-    reg_updateControlPointPosition_gpu(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(),
+    reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(), 3),
                                        reinterpret_cast<float4*>(currentDof),
                                        reinterpret_cast<const float4*>(bestDof),
                                        reinterpret_cast<const float4*>(gradient),
-                                       scale);
+                                       scale,
+                                       optimiseX,
+                                       optimiseY,
+                                       optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 32d407e3..c935820d 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -172,100 +172,83 @@ void reg_conjugateGradient_gpu::reg_test_optimiser() {
     reg_optimiser_gpu::reg_test_optimiser();
 }
 /* *************************************************************** */
-void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
-                                         float4 *conjugateG_d,
-                                         float4 *conjugateH_d,
-                                         int nodeNumber) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
-
-    const unsigned int Grid_reg_initialiseConjugateGradient =
-        (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_initialiseConjugateGradient));
-    dim3 G1(Grid_reg_initialiseConjugateGradient, Grid_reg_initialiseConjugateGradient, 1);
-    dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient, 1, 1);
-
-    reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
-}
-/* *************************************************************** */
-void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
-                                  float4 *conjugateG_d,
-                                  float4 *conjugateH_d,
-                                  int nodeNumber) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
+void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
+                                         float4 *conjugateGCuda,
+                                         float4 *conjugateHCuda,
+                                         const size_t& nVoxels) {
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+
+    const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_initialiseConjugateGradient;
+    const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+
+    reg_initialiseConjugateGradient_kernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, nVoxels);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice));
+}
+/* *************************************************************** */
+void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
+                                  float4 *conjugateGCuda,
+                                  float4 *conjugateHCuda,
+                                  const size_t& nVoxels) {
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                            cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                            cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient1));
-    dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1, 1, 1);
-    dim3 G1(Grid_reg_GetConjugateGradient1, Grid_reg_GetConjugateGradient1, 1);
-
-    float2 *sum_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2)));
-    reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    float2 *sum_h;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost));
-    NR_CUDA_SAFE_CALL(cudaFree(sum_d));
+    unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient1;
+    unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    dim3 blockDims(blocks, 1, 1);
+    dim3 gridDims(grids, grids, 1);
+
+    float2 *sumsCuda;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2)));
+    reg_GetConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, nVoxels);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    float2 *sums;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(sums, sumsCuda, nVoxels * sizeof(float2), cudaMemcpyDeviceToHost));
+    NR_CUDA_SAFE_CALL(cudaFree(sumsCuda));
     double dgg = 0;
     double gg = 0;
-    for (int i = 0; i < nodeNumber; i++) {
-        dgg += sum_h[i].x;
-        gg += sum_h[i].y;
+    for (size_t i = 0; i < nVoxels; i++) {
+        dgg += sums[i].x;
+        gg += sums[i].y;
     }
-    float gam = (float)(dgg / gg);
-    NR_CUDA_SAFE_CALL(cudaFreeHost(sum_h));
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float)));
-    const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2));
-    dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2, 1, 1);
-    dim3 G2(Grid_reg_GetConjugateGradient2, Grid_reg_GetConjugateGradient2, 1);
-    reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-
-}
-/* *************************************************************** */
-void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage,
-                                        float4 *controlPointImageArray_d,
-                                        const float4 *bestControlPointPosition_d,
-                                        const float4 *gradientArray_d,
-                                        const float& currentLength) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
-
-    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &currentLength, sizeof(float)));
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4)));
-
-    const unsigned int Grid_reg_updateControlPointPosition =
-        (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_updateControlPointPosition));
-    dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition, 1, 1);
-    dim3 G1(Grid_reg_updateControlPointPosition, Grid_reg_updateControlPointPosition, 1);
-    reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    // Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n");
-#endif
+    const float gam = (float)(dgg / gg);
+    NR_CUDA_SAFE_CALL(cudaFreeHost(sums));
+
+    blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient2;
+    grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    gridDims = dim3(blocks, 1, 1);
+    blockDims = dim3(grids, grids, 1);
+    reg_GetConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, nVoxels, gam);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+}
+/* *************************************************************** */
+void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
+                                        float4 *controlPointImageCuda,
+                                        const float4 *bestControlPointCuda,
+                                        const float4 *gradientImageCuda,
+                                        const float& scale,
+                                        const bool& optimiseX,
+                                        const bool& optimiseY,
+                                        const bool& optimiseZ) {
+    auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
+                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+
+    const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_updateControlPointPosition;
+    const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const dim3 blockDims(blocks, 1, 1);
+    const dim3 gridDims(grids, grids, 1);
+    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, nVoxels, scale, optimiseX, optimiseY, optimiseZ);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index bf94b64c..a621a76f 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -87,24 +87,28 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 /** @brief
  */
 extern "C++"
-void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d,
-                                         float4 *conjugateG_d,
-                                         float4 *conjugateH_d,
-                                         int nodeNumber);
-
+void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
+                                         float4 *conjugateGCuda,
+                                         float4 *conjugateHCuda,
+                                         const size_t& nVoxels);
+/* *************************************************************** */
 /** @brief
  */
 extern "C++"
-void reg_GetConjugateGradient_gpu(float4 *gradientArray_d,
-                                  float4 *conjugateG_d,
-                                  float4 *conjugateH_d,
-                                  int nodeNumber);
-
+void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
+                                  float4 *conjugateGCuda,
+                                  float4 *conjugateHCuda,
+                                  const size_t& nVoxels);
+/* *************************************************************** */
 /** @brief
  */
 extern "C++"
-void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage,
-                                        float4 *controlPointImageArray_d,
-                                        const float4 *bestControlPointPosition_d,
-                                        const float4 *gradientArray_d,
-                                        const float& currentLength);
+void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
+                                        float4 *controlPointImageCuda,
+                                        const float4 *bestControlPointCuda,
+                                        const float4 *gradientImageCuda,
+                                        const float& scale,
+                                        const bool& optimiseX,
+                                        const bool& optimiseY,
+                                        const bool& optimiseZ);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 27c00ea8..7ea3d201 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -1,72 +1,77 @@
-__device__ __constant__ int c_NodeNumber;
-__device__ __constant__ float c_ScalingFactor;
-
-texture<float4, 1, cudaReadModeElementType> gradientImageTexture;
-texture<float4, 1, cudaReadModeElementType> conjugateGTexture;
-texture<float4, 1, cudaReadModeElementType> conjugateHTexture;
-texture<float4, 1, cudaReadModeElementType> controlPointTexture;
-
-__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateG_d)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid < c_NodeNumber){
-        float4 gradValue = tex1Dfetch(gradientImageTexture,tid);
-        conjugateG_d[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z,0.0f);
+/* *************************************************************** */
+__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda,
+                                                       cudaTextureObject_t gradientImageTexture,
+                                                       const size_t nVoxels) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
+        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
+        conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0);
     }
 }
+/* *************************************************************** */
+__global__ void reg_GetConjugateGradient1_kernel(float2 *sums,
+                                                 cudaTextureObject_t gradientImageTexture,
+                                                 cudaTextureObject_t conjugateGTexture,
+                                                 cudaTextureObject_t conjugateHTexture,
+                                                 const size_t nVoxels) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
+        const float4 valueH = tex1Dfetch<float4>(conjugateHTexture, tid);
+        const float4 valueG = tex1Dfetch<float4>(conjugateGTexture, tid);
+        const float gg = valueG.x * valueH.x + valueG.y * valueH.y + valueG.z * valueH.z;
 
+        const float4 grad = tex1Dfetch<float4>(gradientImageTexture, tid);
+        const float dgg = (grad.x + valueG.x) * grad.x + (grad.y + valueG.y) * grad.y + (grad.z + valueG.z) * grad.z;
 
-__global__ void reg_GetConjugateGradient1_kernel(float2 *sum)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid < c_NodeNumber){
-        float4 valueH = tex1Dfetch(conjugateHTexture,tid);
-        float4 valueG = tex1Dfetch(conjugateGTexture,tid);
-        float gg= valueG.x*valueH.x + valueG.y*valueH.y + valueG.z*valueH.z;
-
-        float4 grad = tex1Dfetch(gradientImageTexture,tid);
-        float dgg= (grad.x+valueG.x)*grad.x + (grad.y+valueG.y)*grad.y + (grad.z+valueG.z)*grad.z;
-
-        sum[tid]=make_float2(dgg,gg);
+        sums[tid] = make_float2(dgg, gg);
     }
 }
-
-__global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d,
-                                                 float4 *conjugateG_d,
-                                                 float4 *conjugateH_d)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid < c_NodeNumber){
+/* *************************************************************** */
+__global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda,
+                                                 float4 *conjugateGCuda,
+                                                 float4 *conjugateHCuda,
+                                                 const size_t nVoxels,
+                                                 const float scale) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
         // G = - grad
-        float4 gradGValue = nodeNMIGradientArray_d[tid];
-        gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0.0f);
-        conjugateG_d[tid]=gradGValue;
+        float4 gradGValue = gradientImageCuda[tid];
+        gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0);
+        conjugateGCuda[tid] = gradGValue;
 
         // H = G + gam * H
-        float4 gradHValue = conjugateH_d[tid];
-        gradHValue=make_float4(
-                gradGValue.x + c_ScalingFactor * gradHValue.x,
-                gradGValue.y + c_ScalingFactor * gradHValue.y,
-                gradGValue.z + c_ScalingFactor * gradHValue.z,
-                0.0f);
-        conjugateH_d[tid]=gradHValue;
-        nodeNMIGradientArray_d[tid]=make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0.0f);
+        float4 gradHValue = conjugateHCuda[tid];
+        gradHValue = make_float4(gradGValue.x + scale * gradHValue.x,
+                                 gradGValue.y + scale * gradHValue.y,
+                                 gradGValue.z + scale * gradHValue.z,
+                                 0);
+        conjugateHCuda[tid] = gradHValue;
+
+        gradientImageCuda[tid] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0);
     }
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageArray_d)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid < c_NodeNumber){
-		float scaling = c_ScalingFactor;
-		float4 value = tex1Dfetch(controlPointTexture,tid);
-		float4 gradValue = tex1Dfetch(gradientImageTexture,tid);
-		value.x += scaling * gradValue.x;
-		value.y += scaling * gradValue.y;
-		value.z += scaling * gradValue.z;
-		value.w = 0.0f;
-		controlPointImageArray_d[tid]=value;
-
+/* *************************************************************** */
+__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda,
+                                                      cudaTextureObject_t bestControlPointTexture,
+                                                      cudaTextureObject_t gradientImageTexture,
+                                                      const size_t nVoxels,
+                                                      const float scale,
+                                                      const bool optimiseX,
+                                                      const bool optimiseY,
+                                                      const bool optimiseZ) {
+    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nVoxels) {
+        float4 value = controlPointImageCuda[tid];
+        const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
+        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
+        if (optimiseX)
+            value.x = bestValue.x + scale * gradValue.x;
+        if (optimiseY)
+            value.y = bestValue.y + scale * gradValue.y;
+        if (optimiseZ)
+            value.z = bestValue.z + scale * gradValue.z;
+        value.w = 0;
+        controlPointImageCuda[tid] = value;
     }
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */

From 66e48c8cf780d40b7ffb1a24cfa5b4bd7d6f9840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 29 Mar 2023 16:23:02 +0100
Subject: [PATCH 122/314] Remove unnecessary testing functions

---
 niftyreg_build_version.txt         |  2 +-
 reg-lib/_reg_base.h                |  3 ---
 reg-lib/cpu/_reg_optimiser.cpp     | 11 -----------
 reg-lib/cpu/_reg_optimiser.h       |  6 ------
 reg-lib/cuda/_reg_optimiser_gpu.cu |  5 -----
 reg-lib/cuda/_reg_optimiser_gpu.h  |  3 ---
 6 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f06fa6c9..bfd03aba 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-241
+242
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index bed799bf..3f676875 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -208,7 +208,4 @@ class reg_base: public InterfaceOptimiser {
         funcProgressCallback = funcProgCallback;
         paramsProgressCallback = paramsProgCallback;
     }
-
-    // For testing
-    virtual void reg_test_setOptimiser(reg_optimiser<T> *opt) { optimiser.reset(opt); }
 };
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 5b1a759c..30b8a069 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -193,11 +193,6 @@ void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T &startLength) {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::reg_test_optimiser() {
-    this->intOpt->UpdateParameters(1.f);
-}
-/* *************************************************************** */
-template <class T>
 reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
     this->array1 = nullptr;
     this->array2 = nullptr;
@@ -382,12 +377,6 @@ void reg_conjugateGradient<T>::Perturbation(float length) {
 }
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::reg_test_optimiser() {
-    this->UpdateGradientValues();
-    reg_optimiser<T>::reg_test_optimiser();
-}
-/* *************************************************************** */
-template <class T>
 reg_lbfgs<T>::reg_lbfgs()
     :reg_optimiser<T>::reg_optimiser() {
     this->stepToKeep = 5;
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index ca6a89b0..db069cf5 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -133,9 +133,6 @@ class reg_optimiser {
                           T smallLength,
                           T &startLength);
     virtual void Perturbation(float length);
-
-    // Function used for testing
-    virtual void reg_test_optimiser();
 };
 /* *************************************************************** */
 /** @class reg_conjugateGradient
@@ -172,9 +169,6 @@ class reg_conjugateGradient: public reg_optimiser<T> {
                           T smallLength,
                           T &startLength) override;
     virtual void Perturbation(float length) override;
-
-    // Function used for testing
-    virtual void reg_test_optimiser() override;
 };
 /* *************************************************************** */
 /** @class Global optimisation class
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index c935820d..fe8da863 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -167,11 +167,6 @@ void reg_conjugateGradient_gpu::Perturbation(float length) {
     this->firstCall = true;
 }
 /* *************************************************************** */
-void reg_conjugateGradient_gpu::reg_test_optimiser() {
-    this->UpdateGradientValues();
-    reg_optimiser_gpu::reg_test_optimiser();
-}
-/* *************************************************************** */
 void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index a621a76f..aa3706b7 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -79,9 +79,6 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
                           float smallLength,
                           float &startLength) override;
     virtual void Perturbation(float length) override;
-
-    // Function used for testing
-    virtual void reg_test_optimiser() override;
 };
 /* *************************************************************** */
 /** @brief

From a4ae03226e19e66e2c869e7b6c086b2ac6f3b4f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 30 Mar 2023 14:18:36 +0100
Subject: [PATCH 123/314] Add tests for conjugate gradient

---
 niftyreg_build_version.txt              |   2 +-
 reg-lib/cpu/_reg_optimiser.h            |  17 +-
 reg-lib/cuda/_reg_optimiser_gpu.h       |   6 +-
 reg-test/CMakeLists.txt                 |   1 +
 reg-test/reg_test_conjugateGradient.cpp | 278 ++++++++++++++++++++++++
 5 files changed, 300 insertions(+), 4 deletions(-)
 create mode 100644 reg-test/reg_test_conjugateGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bfd03aba..77f83230 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-242
+243
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index db069cf5..4cdb5eff 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -48,6 +48,12 @@ class reg_optimiser {
     double currentObjFunctionValue;
     InterfaceOptimiser *intOpt;
 
+#ifdef NR_TESTING
+public:
+#endif
+    /// @brief Update the gradient array
+    virtual void UpdateGradientValues() {}
+
 public:
     reg_optimiser();
     virtual ~reg_optimiser();
@@ -147,7 +153,10 @@ class reg_conjugateGradient: public reg_optimiser<T> {
     T *array2Bw;
     bool firstCall;
 
-    void UpdateGradientValues(); /// @brief Update the gradient array
+#ifdef NR_TESTING
+public:
+#endif
+    virtual void UpdateGradientValues() override;
 
 public:
     reg_conjugateGradient();
@@ -183,6 +192,11 @@ class reg_lbfgs: public reg_optimiser<T> {
     T **diffDof;
     T **diffGrad;
 
+#ifdef NR_TESTING
+public:
+#endif
+    virtual void UpdateGradientValues() override;
+
 public:
     reg_lbfgs();
     virtual ~reg_lbfgs();
@@ -202,7 +216,6 @@ class reg_lbfgs: public reg_optimiser<T> {
     virtual void Optimise(T maxLength,
                           T smallLength,
                           T &startLength) override;
-    virtual void UpdateGradientValues() override;
 };
 /* *************************************************************** */
 #include "_reg_optimiser.cpp"
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index aa3706b7..3f602b17 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -56,7 +56,11 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
     float4 *array1;
     float4 *array2;
     bool firstCall;
-    void UpdateGradientValues(); /// @brief Update the gradient array
+
+#ifdef NR_TESTING
+public:
+#endif
+    virtual void UpdateGradientValues() override;
 
 public:
     reg_conjugateGradient_gpu();
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index e08b18ac..d2bab9af 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -108,6 +108,7 @@ include(Catch)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 set(EXEC_LIST reg_test_affineDeformationField)
+set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
new file mode 100644
index 00000000..43817321
--- /dev/null
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -0,0 +1,278 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+
+#define EPS 0.000001
+
+/*
+    This test file contains the following unit tests:
+    test functions: conjugate gradient
+    In 2D and 3D
+    Update control point grid
+    Update transformation gradient
+*/
+
+
+class ConjugateGradientTest: public InterfaceOptimiser {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool, bool, float>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ConjugateGradientTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference 2D image
+        vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Fill image with distance from identity
+        const auto ref2dPtr = reference2d.data();
+        auto ref2dIt = ref2dPtr.begin();
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x)
+                *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+
+        // Create a reference 3D image
+        dimFlo.push_back(4);
+        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Fill image with distance from identity
+        const auto ref3dPtr = reference3d.data();
+        auto ref3dIt = ref3dPtr.begin();
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x)
+                    *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+
+        // Generate the different test cases
+        // Test 2D
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage bestControlPointGrid2d(controlPointGrid2d, true, true, true);
+        NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true);
+        auto bestCpp2dPtr = bestControlPointGrid2d.data();
+        auto transGrad2dPtr = transformationGradient2d.data();
+        for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) {
+            bestCpp2dPtr[i] = distr(gen);
+            transGrad2dPtr[i] = distr(gen);
+        }
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d),
+            std::move(bestControlPointGrid2d),
+            std::move(transformationGradient2d)
+        ));
+
+        // Test 3D
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        NiftiImage bestControlPointGrid3d(controlPointGrid3d, true, true, true);
+        NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true);
+        auto bestCpp3dPtr = bestControlPointGrid3d.data();
+        auto transGrad3dPtr = transformationGradient3d.data();
+        for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) {
+            bestCpp3dPtr[i] = distr(gen);
+            transGrad3dPtr[i] = distr(gen);
+        }
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d),
+            std::move(bestControlPointGrid3d),
+            std::move(transformationGradient3d)
+        ));
+
+        // Add platforms, optimise*, and scale to the test data
+        distr = std::uniform_real_distribution<float>(0, 10);
+        for (auto&& testData : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                for (int optimiseX = 0; optimiseX < 2; optimiseX++) {
+                    for (int optimiseY = 0; optimiseY < 2; optimiseY++) {
+                        for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
+                            // Make a copy of the test data
+                            auto td = testData;
+                            auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = td;
+                            // Add content
+                            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                            testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) });
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void UpdateControlPointPosition(NiftiImage& currentDof,
+                                    const NiftiImage& bestDof,
+                                    const NiftiImage& gradient,
+                                    const float& scale,
+                                    const bool& optimiseX,
+                                    const bool& optimiseY,
+                                    const bool& optimiseZ) {
+        // Update the values for the x-axis displacement
+        if (optimiseX) {
+            auto currentDofPtr = currentDof.data(0);
+            const auto bestDofPtr = bestDof.data(0);
+            const auto gradientPtr = gradient.data(0);
+            for (size_t i = 0; i < currentDofPtr.length(); ++i)
+                currentDofPtr[i] = static_cast<float>(bestDofPtr[i]) + scale * static_cast<float>(gradientPtr[i]);
+        }
+        // Update the values for the y-axis displacement
+        if (optimiseY) {
+            auto currentDofPtr = currentDof.data(1);
+            const auto bestDofPtr = bestDof.data(1);
+            const auto gradientPtr = gradient.data(1);
+            for (size_t i = 0; i < currentDofPtr.length(); ++i)
+                currentDofPtr[i] = static_cast<float>(bestDofPtr[i]) + scale * static_cast<float>(gradientPtr[i]);
+        }
+        // Update the values for the z-axis displacement
+        if (optimiseZ && currentDof->nz > 1) {
+            auto currentDofPtr = currentDof.data(2);
+            const auto bestDofPtr = bestDof.data(2);
+            const auto gradientPtr = gradient.data(2);
+            for (size_t i = 0; i < currentDofPtr.length(); ++i)
+                currentDofPtr[i] = static_cast<float>(bestDofPtr[i]) + scale * static_cast<float>(gradientPtr[i]);
+        }
+    }
+
+    void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall) {
+        // Create array1 and array2
+        static NiftiImage array1;
+        static NiftiImage array2;
+        if (firstCall) {
+            array1 = NiftiImage(gradient, true, true, true);
+            array2 = NiftiImage(gradient, true, true, true);
+        }
+
+        auto gradientPtr = gradient.data();
+        auto array1Ptr = array1.data();
+        auto array2Ptr = array2.data();
+
+        if (firstCall) {
+            // Initialise array1 and array2
+            for (size_t i = 0; i < gradient.nVoxels(); i++)
+                array2Ptr[i] = array1Ptr[i] = -static_cast<float>(gradientPtr[i]);
+        } else {
+            // Calculate gam
+            double dgg = 0, gg = 0;
+            for (size_t i = 0; i < gradient.nVoxels(); i++) {
+                gg += static_cast<float>(array2Ptr[i]) * static_cast<float>(array1Ptr[i]);
+                dgg += (static_cast<float>(gradientPtr[i]) + static_cast<float>(array1Ptr[i])) * static_cast<float>(gradientPtr[i]);
+            }
+            const double gam = dgg / gg;
+
+            // Update gradient values
+            for (size_t i = 0; i < gradient.nVoxels(); i++) {
+                array1Ptr[i] = -static_cast<float>(gradientPtr[i]);
+                array2Ptr[i] = static_cast<float>(array1Ptr[i]) + gam * static_cast<float>(array2Ptr[i]);
+                gradientPtr[i] = -static_cast<float>(array2Ptr[i]);
+            }
+        }
+    }
+
+    // Required for InterfaceOptimiser
+    virtual double GetObjectiveFunctionValue() { return 0; }
+    virtual void UpdateParameters(float) {}
+    virtual void UpdateBestObjFunctionValue() {}
+};
+
+TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradient]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase;
+        auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = testData;
+        const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale);
+
+        SECTION(sectionName) {
+            std::cout << "******** UpdateControlPointPosition " << sectionName << " ********" << std::endl;
+
+            // Set the control point grid
+            NiftiImage img = content->GetControlPointGrid();
+            // Use bestControlPointGrid to store bestDof during initialisation of the optimiser
+            img.copyData(bestControlPointGrid);
+            img.disown();
+            content->UpdateControlPointGrid();
+
+            // Set the transformation gradient
+            img = content->GetTransformationGradient();
+            img.copyData(transGrad);
+            img.disown();
+            content->UpdateTransformationGradient();
+
+            // Create a copy of the control point grid for expected results
+            NiftiImage controlPointGridExpected = bestControlPointGrid;
+
+            // Update the control point position
+            unique_ptr<reg_optimiser<float>> optimiser{ platform->template CreateOptimiser<float>(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) };
+            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+            compute->UpdateControlPointPosition(optimiser->GetCurrentDof(), optimiser->GetBestDof(), optimiser->GetGradient(), scale, optimiseX, optimiseY, optimiseZ);
+            UpdateControlPointPosition(controlPointGridExpected, bestControlPointGrid, transGrad, scale, optimiseX, optimiseY, optimiseZ);
+
+            // Check the results
+            img = content->GetControlPointGrid();
+            const auto cppPtr = img.data();
+            const auto cppExpPtr = controlPointGridExpected.data();
+            img.disown();
+            for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) {
+                const float cppVal = cppPtr[i];
+                const float cppExpVal = cppExpPtr[i];
+                std::cout << i << " " << cppVal << " " << cppExpVal << std::endl;
+                REQUIRE(fabs(cppVal - cppExpVal) < EPS);
+            }
+
+            // Update the gradient values
+            // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations
+            if (!optimiseX && !optimiseY && !optimiseZ) {
+                std::cout << "******** UpdateGradientValues " << sectionName << " ********" << std::endl;
+
+                // Initialise the conjugate gradient
+                optimiser->UpdateGradientValues();
+                UpdateGradientValues(transGrad, true);
+                // Fill the gradient with random values
+                std::random_device rd;
+                std::mt19937 gen(rd());
+                std::uniform_real_distribution<float> distr(0, 1);
+                auto gradientPtr = transGrad.data();
+                for (size_t i = 0; i < transGrad.nVoxels(); i++)
+                    gradientPtr[i] = distr(gen);
+                // Update the transformation gradient
+                img = content->GetTransformationGradient();
+                img.copyData(transGrad);
+                img.disown();
+                content->UpdateTransformationGradient();
+                // Get the gradient values
+                optimiser->UpdateGradientValues();
+                UpdateGradientValues(transGrad, false);
+
+                // Check the results
+                img = content->GetTransformationGradient();
+                const auto gradPtr = img.data();
+                const auto gradExpPtr = transGrad.data();
+                img.disown();
+                for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
+                    const float gradVal = gradPtr[i];
+                    const float gradExpVal = gradExpPtr[i];
+                    std::cout << i << " " << gradVal << " " << gradExpVal << std::endl;
+                    REQUIRE(fabs(gradVal - gradExpVal) < EPS);
+                }
+            }
+        }
+    }
+}

From 66db3e86c8835b2cb9dd8f22d665f784f5c4d219 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 3 Apr 2023 15:50:21 +0100
Subject: [PATCH 124/314] Refactorisations

 - Rename CudaContextSingleton as CudaContext, and move it into NiftyReg namespace
 - Rename NiftyReg_CudaBlock100 as BlockSize, and move it into NiftyReg namespace
 - Move BlockSize implementation into the header
 - Change the type of BlockSize members as unsigned
 - Move BlockSize instance into CudaContext
 - Use unsigned instead of size_t in CUDA kernels
 - Initialise the CUDA or OpenCL device in Platform's constructor
 - Rename `unsigned int`s as `unsigned`
---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_average.cpp                      |   4 +-
 reg-apps/reg_benchmark.cpp                    |   4 +-
 reg-apps/reg_ppcnr.cpp                        |   2 +-
 reg-apps/reg_tools.cpp                        |   4 +-
 reg-io/_reg_ReadWriteImage.cpp                |   2 +-
 reg-io/nrrd/reg_nrrd.cpp                      |   8 +-
 reg-lib/AladinContent.cpp                     |   4 +-
 reg-lib/AladinContent.h                       |   8 +-
 reg-lib/AladinContentCreator.h                |   4 +-
 reg-lib/Platform.cpp                          |  26 +-
 reg-lib/Platform.h                            |   2 +-
 reg-lib/_reg_aladin.cpp                       |  20 +-
 reg-lib/_reg_aladin.h                         |  28 +-
 reg-lib/_reg_aladin_sym.cpp                   |  12 +-
 reg-lib/_reg_aladin_sym.h                     |   6 +-
 reg-lib/_reg_base.cpp                         |  24 +-
 reg-lib/_reg_base.h                           |  18 +-
 reg-lib/_reg_f3d.cpp                          |   6 +-
 reg-lib/_reg_f3d.h                            |   2 +-
 reg-lib/_reg_f3d2.cpp                         |  10 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |  34 +-
 reg-lib/cl/ClAladinContent.cpp                |  52 +-
 reg-lib/cl/ClAladinContent.h                  | 178 +++---
 reg-lib/cl/ClAladinContentCreator.h           |   4 +-
 reg-lib/cl/ClBlockMatchingKernel.cpp          |  34 +-
 reg-lib/cl/ClContextSingleton.cpp             | 564 +++++++++---------
 reg-lib/cl/ClContextSingleton.h               | 105 ++--
 reg-lib/cl/ClResampleImageKernel.cpp          |  34 +-
 reg-lib/cl/InfoDevice.h                       |  12 +-
 reg-lib/cl/_reg_openclinfo.cpp                |  14 +-
 reg-lib/cl/affineDeformationKernel.cl         |  16 +-
 reg-lib/cl/blockMatchingKernel.cl             |  60 +-
 reg-lib/cl/resampleKernel.cl                  |  26 +-
 reg-lib/cpu/_reg_blockMatching.cpp            |  58 +-
 reg-lib/cpu/_reg_blockMatching.h              |   4 +-
 reg-lib/cpu/_reg_dti.cpp                      |  14 +-
 reg-lib/cpu/_reg_dti.h                        |   6 +-
 reg-lib/cpu/_reg_femTrans.cpp                 |  24 +-
 reg-lib/cpu/_reg_femTrans.h                   |  10 +-
 reg-lib/cpu/_reg_globalTrans.cpp              |  20 +-
 reg-lib/cpu/_reg_globalTrans.h                |   4 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  18 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp           |   4 +-
 reg-lib/cpu/_reg_mind.cpp                     |   4 +-
 reg-lib/cpu/_reg_nmi.h                        |  12 +-
 reg-lib/cpu/_reg_resampling.cpp               |  12 +-
 reg-lib/cpu/_reg_tools.cpp                    | 100 ++--
 reg-lib/cpu/_reg_tools.h                      |  14 +-
 reg-lib/cuda/BlockSize.hpp                    | 222 +++++++
 reg-lib/cuda/CMakeLists.txt                   |   5 +-
 reg-lib/cuda/CudaAladinContent.cpp            |  18 +-
 reg-lib/cuda/CudaAladinContent.h              | 192 +++---
 reg-lib/cuda/CudaAladinContentCreator.h       |   4 +-
 reg-lib/cuda/CudaCompute.cpp                  |   2 +-
 reg-lib/cuda/CudaContent.cpp                  |   6 +-
 reg-lib/cuda/CudaContent.h                    | 123 ++--
 ...daContextSingleton.cpp => CudaContext.cpp} |  80 +--
 reg-lib/cuda/CudaContext.hpp                  |  42 ++
 reg-lib/cuda/CudaContextSingleton.h           |  34 --
 reg-lib/cuda/CudaConvolutionKernel.h          |   2 +-
 ...seGradient.cu => CudaNormaliseGradient.cu} |  30 +-
 ...Gradient.hpp => CudaNormaliseGradient.hpp} |   0
 reg-lib/cuda/_reg_blocksize_gpu.cu            | 219 -------
 reg-lib/cuda/_reg_blocksize_gpu.h             | 127 ----
 reg-lib/cuda/_reg_common_cuda.cu              |  40 +-
 reg-lib/cuda/_reg_common_cuda.h               |   8 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |  60 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  | 208 +++----
 .../cuda/_reg_localTransformation_kernels.cu  |  64 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  15 +-
 reg-lib/cuda/_reg_nmi_gpu.h                   |   1 -
 reg-lib/cuda/_reg_nmi_kernels.cu              |  54 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  24 +-
 reg-lib/cuda/_reg_optimiser_kernels.cu        |  16 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           |  30 +-
 reg-lib/cuda/_reg_resampling_kernels.cu       |  40 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  18 +-
 reg-lib/cuda/_reg_ssd_kernels.cu              |   8 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |  85 ++-
 reg-lib/cuda/_reg_tools_gpu.h                 |   1 -
 reg-lib/cuda/affineDeformationKernel.cu       |  22 +-
 reg-lib/cuda/blockMatchingKernel.cu           | 124 ++--
 reg-lib/cuda/checkCudaCard.cpp                |   2 +-
 reg-lib/cuda/optimizeKernel.cu                |  64 +-
 reg-lib/cuda/optimizeKernel.h                 |   8 +-
 reg-lib/cuda/resampleKernel.cu                |  20 +-
 reg-lib/cuda/resampleKernel.h                 |   2 +-
 reg-test/reg_test_conjugateGradient.cpp       |   2 +
 89 files changed, 1734 insertions(+), 1921 deletions(-)
 create mode 100644 reg-lib/cuda/BlockSize.hpp
 rename reg-lib/cuda/{CudaContextSingleton.cpp => CudaContext.cpp} (64%)
 create mode 100644 reg-lib/cuda/CudaContext.hpp
 delete mode 100644 reg-lib/cuda/CudaContextSingleton.h
 rename reg-lib/cuda/{NormaliseGradient.cu => CudaNormaliseGradient.cu} (73%)
 rename reg-lib/cuda/{NormaliseGradient.hpp => CudaNormaliseGradient.hpp} (100%)
 delete mode 100755 reg-lib/cuda/_reg_blocksize_gpu.cu
 delete mode 100755 reg-lib/cuda/_reg_blocksize_gpu.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 77f83230..7f05eede 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-243
+244
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index aea56da1..2f95c3f8 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -79,8 +79,8 @@ void average_norm_intensity(nifti_image *image)
    PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType));
    memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType));
    reg_heapSort(rankedIntensities,static_cast<int>(image->nvox));
-   PrecisionType lowerValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.03f)];
-   PrecisionType higherValue=rankedIntensities[static_cast<unsigned int>(static_cast<float>(image->nvox)*0.97f)];
+   PrecisionType lowerValue=rankedIntensities[static_cast<unsigned>(static_cast<float>(image->nvox)*0.03f)];
+   PrecisionType higherValue=rankedIntensities[static_cast<unsigned>(static_cast<float>(image->nvox)*0.97f)];
    reg_tools_subtractValueFromImage(image,image,lowerValue);
    reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue));
    free(rankedIntensities);
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 2bde68ef..c7e23e45 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
 {
    int dimension = 100;
    float gridSpacing = 10.0f;
-   unsigned int binning = 68;
+   unsigned binning = 68;
    char *outputFileName = (char *)"benchmark_result.txt";
    bool runGPU=1;
 
@@ -99,7 +99,7 @@ int main(int argc, char **argv)
    float *targetPtr=static_cast<float *>(targetImage->data);
    float *sourcePtr=static_cast<float *>(sourceImage->data);
    srand((unsigned)time(0));
-   for(unsigned int i=0; i<targetImage->nvox; ++i)
+   for(unsigned i=0; i<targetImage->nvox; ++i)
    {
       *targetPtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f;
       *sourcePtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f;
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index fda85e82..f7c2fa5f 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -526,7 +526,7 @@ int main(int argc, char **argv)
       printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
 
       // Read images and find image means
-      unsigned int voxelNumber = image->nvox/image->nt;
+      unsigned voxelNumber = image->nvox/image->nt;
       PrecisionType *intensityPtr = static_cast<PrecisionType *>(image->data);
       PrecisionType *intensityPtrM = static_cast<PrecisionType *>(mask->data);
       for(int t=0; t<image->nt; t++)
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index cebab176..8ddf43f2 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -604,8 +604,8 @@ int main(int argc, char **argv)
                 reg_tools_changeDatatype<short>(image2,NIFTI_TYPE_INT16);
                 break;
             case NIFTI_TYPE_UINT32:
-                reg_tools_changeDatatype<unsigned int>(image,NIFTI_TYPE_UINT32);
-                reg_tools_changeDatatype<unsigned int>(image2,NIFTI_TYPE_UINT32);
+                reg_tools_changeDatatype<unsigned>(image,NIFTI_TYPE_UINT32);
+                reg_tools_changeDatatype<unsigned>(image2,NIFTI_TYPE_UINT32);
                 break;
             case NIFTI_TYPE_INT32:
                 reg_tools_changeDatatype<int>(image,NIFTI_TYPE_INT32);
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 67017446..a23f2c7f 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -198,7 +198,7 @@ void reg_io_displayImageData(nifti_image *image) {
         reg_io_displayImageData1<short>(image);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_io_displayImageData1<unsigned int>(image);
+        reg_io_displayImageData1<unsigned>(image);
         break;
     case NIFTI_TYPE_INT32:
         reg_io_displayImageData1<int>(image);
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 57fd436b..20c89f2f 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -269,8 +269,8 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
       }
       // The matrix is flipped to go from nrrd to nifti
       // and follow the ITK style
-      for(unsigned int i=0; i<2; ++i)
-         for(unsigned int j=0; j<4; ++j)
+      for(unsigned i=0; i<2; ++i)
+         for(unsigned j=0; j<4; ++j)
             niiImage->sto_xyz.m[i][j]*=-1.0f;
       niiImage->sto_ijk=nifti_mat44_inverse(niiImage->sto_xyz);
    }
@@ -445,8 +445,8 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
 
       // The matrix is flipped to go from nifti to nrrd
       // and follow the ITK style
-      for(unsigned int i=0; i<2; ++i)
-         for(unsigned int j=0; j<4; ++j)
+      for(unsigned i=0; i<2; ++i)
+         for(unsigned j=0; j<4; ++j)
             currentAffineMatrix.m[i][j]*=-1.0f;
 
       // the space direction is initialised to identity
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index 84070fed..30b4af23 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -8,8 +8,8 @@ AladinContent::AladinContent(nifti_image *referenceIn,
                              int *referenceMaskIn,
                              mat44 *transformationMatrixIn,
                              size_t bytesIn,
-                             const unsigned int currentPercentageOfBlockToUseIn,
-                             const unsigned int inlierLtsIn,
+                             const unsigned currentPercentageOfBlockToUseIn,
+                             const unsigned inlierLtsIn,
                              int stepSizeBlockIn) :
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn),
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 0cc6e16d..ca7e6cd4 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -16,8 +16,8 @@ class AladinContent: public Content {
                   int *referenceMaskIn = nullptr,
                   mat44 *transformationMatrixIn = nullptr,
                   size_t bytesIn = sizeof(float),
-                  const unsigned int percentageOfBlocks = 0,
-                  const unsigned int inlierLts = 0,
+                  const unsigned percentageOfBlocks = 0,
+                  const unsigned inlierLts = 0,
                   int blockStepSize = 0);
 
     virtual ~AladinContent();
@@ -27,8 +27,8 @@ class AladinContent: public Content {
 
 protected:
     _reg_blockMatchingParam* blockMatchingParams;
-    unsigned int currentPercentageOfBlockToUse;
-    unsigned int inlierLts;
+    unsigned currentPercentageOfBlockToUse;
+    unsigned inlierLts;
     int stepSizeBlock;
 
 #ifdef NR_TESTING
diff --git a/reg-lib/AladinContentCreator.h b/reg-lib/AladinContentCreator.h
index 58d42853..91d03be8 100644
--- a/reg-lib/AladinContentCreator.h
+++ b/reg-lib/AladinContentCreator.h
@@ -10,8 +10,8 @@ class AladinContentCreator: public ContentCreator {
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
-                                  const unsigned int percentageOfBlocks = 0,
-                                  const unsigned int inlierLts = 0,
+                                  const unsigned percentageOfBlocks = 0,
+                                  const unsigned inlierLts = 0,
                                   int blockStepSize = 0) {
         return new AladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
     }
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 070dbbf8..170101f4 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,7 +1,7 @@
 #include "Platform.h"
 #include "CpuKernelFactory.h"
 #ifdef _USE_CUDA
-#include "CudaContextSingleton.h"
+#include "CudaContext.hpp"
 #include "CudaF3dContent.h"
 #include "CudaComputeFactory.h"
 #include "CudaContentCreatorFactory.h"
@@ -20,27 +20,29 @@
 Platform::Platform(const PlatformType& platformTypeIn) {
     platformType = platformTypeIn;
     if (platformType == PlatformType::Cpu) {
+        platformName = "CPU";
         computeFactory = new ComputeFactory();
         contentCreatorFactory = new ContentCreatorFactory();
         kernelFactory = new CpuKernelFactory();
         measureFactory = new MeasureFactory();
-        platformName = "CPU";
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
+        platformName = "CUDA";
+        SetGpuIdx(999);
         computeFactory = new CudaComputeFactory();
         contentCreatorFactory = new CudaContentCreatorFactory();
         kernelFactory = new CudaKernelFactory();
         measureFactory = new CudaMeasureFactory();
-        platformName = "CUDA";
     }
 #endif
 #ifdef _USE_OPENCL
     else if (platformType == PlatformType::OpenCl) {
+        platformName = "OpenCL";
+        SetGpuIdx(999);
         computeFactory = new ClComputeFactory();
         contentCreatorFactory = new ClContentCreatorFactory();
         kernelFactory = new ClKernelFactory();
-        platformName = "OpenCL";
     }
 #endif
     else {
@@ -65,7 +67,7 @@ PlatformType Platform::GetPlatformType() const {
     return platformType;
 }
 /* *************************************************************** */
-unsigned int Platform::GetGpuIdx() const {
+unsigned Platform::GetGpuIdx() const {
     return gpuIdx;
 }
 /* *************************************************************** */
@@ -75,27 +77,27 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
-        CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance();
+        NiftyReg::CudaContext& cudaContext = NiftyReg::CudaContext::GetInstance();
         if (gpuIdxIn != 999) {
             gpuIdx = gpuIdxIn;
-            cudaContext->SetCudaIdx(gpuIdxIn);
+            cudaContext.SetCudaIdx(gpuIdxIn);
         }
     }
 #endif
 #ifdef _USE_OPENCL
     else if (platformType == PlatformType::OpenCl) {
-        ClContextSingleton *sContext = &ClContextSingleton::Instance();
+        ClContextSingleton& clContext = ClContextSingleton::GetInstance();
         if (gpuIdxIn != 999) {
             gpuIdx = gpuIdxIn;
-            sContext->SetClIdx(gpuIdxIn);
+            clContext.SetClIdx(gpuIdxIn);
         }
 
         std::size_t paramValueSize;
-        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
+        clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
         cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
-        sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
+        clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
         if (CL_DEVICE_TYPE_CPU == *field) {
-            reg_print_fct_error("Platform::setClIdx");
+            reg_print_fct_error("Platform::SetGpuIdx");
             reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
             reg_exit();
         }
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 0b195873..5c7ed55f 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -25,7 +25,7 @@ class Platform {
 
     std::string GetName() const;
     PlatformType GetPlatformType() const;
-    unsigned int GetGpuIdx() const;
+    unsigned GetGpuIdx() const;
     void SetGpuIdx(unsigned gpuIdxIn);
 
     Compute* CreateCompute(Content& con) const;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 6b010090..29e11524 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -204,12 +204,12 @@ void reg_aladin<T>::InitialiseRegistration() {
                                  this->numberOfLevels,
                                  this->levelsToPerform);
     else
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l)
+        for (unsigned l = 0; l < this->levelsToPerform; ++l)
             this->referenceMaskPyramid[l].reset(new int[this->referencePyramid[l].nVoxelsPerVolume()]());
 
     unique_ptr<Kernel> convolutionKernel(this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr));
     // SMOOTH THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < this->levelsToPerform; l++) {
+    for (unsigned l = 0; l < this->levelsToPerform; l++) {
         if (this->referenceSigma != 0) {
             // Only the first image is smoothed
             unique_ptr<bool[]> active(new bool[this->referencePyramid[l]->nt]);
@@ -233,7 +233,7 @@ void reg_aladin<T>::InitialiseRegistration() {
     }
 
     // THRESHOLD THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < this->levelsToPerform; l++) {
+    for (unsigned l = 0; l < this->levelsToPerform; l++) {
         reg_thresholdImage<T>(this->referencePyramid[l], this->referenceLowerThreshold, this->referenceUpperThreshold);
         reg_thresholdImage<T>(this->floatingPyramid[l], this->floatingLowerThreshold, this->floatingUpperThreshold);
     }
@@ -388,9 +388,9 @@ void reg_aladin<T>::InitAladinContent(nifti_image *ref,
                                       int *mask,
                                       mat44 *transMat,
                                       size_t bytes,
-                                      unsigned int blockPercentage,
-                                      unsigned int inlierLts,
-                                      unsigned int blockStepSize) {
+                                      unsigned blockPercentage,
+                                      unsigned inlierLts,
+                                      unsigned blockStepSize) {
     unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
     this->con.reset(contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize));
     this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams();
@@ -402,8 +402,8 @@ void reg_aladin<T>::DeinitAladinContent() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag) {
-    unsigned int iteration = 0;
+void reg_aladin<T>::ResolveMatrix(unsigned iterations, const unsigned optimizationFlag) {
+    unsigned iteration = 0;
     while (iteration < iterations) {
 #ifndef NDEBUG
         char text[255];
@@ -432,7 +432,7 @@ void reg_aladin<T>::Run() {
 
         // Twice more iterations are performed during the first level
         // All the blocks are used during the first level
-        const unsigned int maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations;
+        const unsigned maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations;
 
 #ifdef NDEBUG
         if (this->verbose) {
@@ -457,7 +457,7 @@ void reg_aladin<T>::Run() {
         /* Rigid registration */
         /* ****************** */
         if ((this->performRigid && !this->performAffine) || (this->performAffine && this->performRigid && this->currentLevel == 0)) {
-            const unsigned int ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1;
+            const unsigned ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1;
             ResolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID);
         }
 
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index c3d7d0e2..3921d3d0 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -75,11 +75,11 @@ class reg_aladin {
 
     bool verbose;
 
-    unsigned int maxIterations;
+    unsigned maxIterations;
 
-    unsigned int currentLevel;
-    unsigned int numberOfLevels;
-    unsigned int levelsToPerform;
+    unsigned currentLevel;
+    unsigned numberOfLevels;
+    unsigned levelsToPerform;
 
     bool performRigid;
     bool performAffine;
@@ -126,9 +126,9 @@ class reg_aladin {
                                    int *mask,
                                    mat44 *transMat,
                                    size_t bytes,
-                                   unsigned int blockPercentage = 0,
-                                   unsigned int inlierLts = 0,
-                                   unsigned int blockStepSize = 0);
+                                   unsigned blockPercentage = 0,
+                                   unsigned inlierLts = 0,
+                                   unsigned blockStepSize = 0);
     virtual void DeinitAladinContent();
     virtual void CreateKernels();
     virtual void DeallocateKernels();
@@ -178,14 +178,14 @@ class reg_aladin {
         this->gpuIdx = gpuIdxIn;
     }
 
-    SetMacro(MaxIterations, maxIterations, unsigned int);
-    GetMacro(MaxIterations, maxIterations, unsigned int);
+    SetMacro(MaxIterations, maxIterations, unsigned);
+    GetMacro(MaxIterations, maxIterations, unsigned);
 
-    SetMacro(NumberOfLevels, numberOfLevels, unsigned int);
-    GetMacro(NumberOfLevels, numberOfLevels, unsigned int);
+    SetMacro(NumberOfLevels, numberOfLevels, unsigned);
+    GetMacro(NumberOfLevels, numberOfLevels, unsigned);
 
-    SetMacro(LevelsToPerform, levelsToPerform, unsigned int);
-    GetMacro(LevelsToPerform, levelsToPerform, unsigned int);
+    SetMacro(LevelsToPerform, levelsToPerform, unsigned);
+    GetMacro(LevelsToPerform, levelsToPerform, unsigned);
 
     SetMacro(BlockPercentage, blockPercentage, int);
     GetMacro(BlockPercentage, blockPercentage, int);
@@ -265,5 +265,5 @@ class reg_aladin {
 
 private:
     unique_ptr<Kernel> affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel;
-    void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag);
+    void ResolveMatrix(unsigned iterations, const unsigned optimizationFlag);
 };
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index fcce8132..fe97cca0 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -49,12 +49,12 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
                                  this->numberOfLevels,
                                  this->levelsToPerform);
     else
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l)
+        for (unsigned l = 0; l < this->levelsToPerform; ++l)
             this->floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]());
 
     // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK
     if (this->floatingUpperThreshold != std::numeric_limits<T>::max()) {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+        for (unsigned l = 0; l < this->levelsToPerform; ++l) {
             T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
             int *mskPtr = this->floatingMaskPyramid[l].get();
             for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) {
@@ -64,7 +64,7 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         }
     }
     if (this->floatingLowerThreshold != std::numeric_limits<T>::min()) {
-        for (unsigned int l = 0; l < this->levelsToPerform; ++l) {
+        for (unsigned l = 0; l < this->levelsToPerform; ++l) {
             T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
             int *mskPtr = this->floatingMaskPyramid[l].get();
             for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) {
@@ -186,9 +186,9 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                                           int *mask,
                                           mat44 *transMat,
                                           size_t bytes,
-                                          unsigned int blockPercentage,
-                                          unsigned int inlierLts,
-                                          unsigned int blockStepSize) {
+                                          unsigned blockPercentage,
+                                          unsigned inlierLts,
+                                          unsigned blockStepSize) {
     reg_aladin<T>::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
     unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
     this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize));
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index dbe534d0..405b4038 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -26,9 +26,9 @@ class reg_aladin_sym: public reg_aladin<T> {
                                    int *mask,
                                    mat44 *transMat,
                                    size_t bytes,
-                                   unsigned int blockPercentage = 0,
-                                   unsigned int inlierLts = 0,
-                                   unsigned int blockStepSize = 0);
+                                   unsigned blockPercentage = 0,
+                                   unsigned inlierLts = 0,
+                                   unsigned blockStepSize = 0);
     virtual void DeinitAladinContent();
     virtual void CreateKernels();
     virtual void DeallocateKernels();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 2949bddd..8cd8419d 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -82,7 +82,7 @@ void reg_base<T>::SetFloatingImage(NiftiImage inputFloatingIn) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetMaximalIterationNumber(unsigned int iter) {
+void reg_base<T>::SetMaximalIterationNumber(unsigned iter) {
     maxIterationNumber = iter;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetMaximalIterationNumber");
@@ -122,7 +122,7 @@ void reg_base<T>::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t) {
+void reg_base<T>::SetReferenceThresholdUp(unsigned i, T t) {
     referenceThresholdUp[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceThresholdUp");
@@ -130,7 +130,7 @@ void reg_base<T>::SetReferenceThresholdUp(unsigned int i, T t) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t) {
+void reg_base<T>::SetReferenceThresholdLow(unsigned i, T t) {
     referenceThresholdLow[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetReferenceThresholdLow");
@@ -138,7 +138,7 @@ void reg_base<T>::SetReferenceThresholdLow(unsigned int i, T t) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t) {
+void reg_base<T>::SetFloatingThresholdUp(unsigned i, T t) {
     floatingThresholdUp[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingThresholdUp");
@@ -146,7 +146,7 @@ void reg_base<T>::SetFloatingThresholdUp(unsigned int i, T t) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetFloatingThresholdLow(unsigned int i, T t) {
+void reg_base<T>::SetFloatingThresholdLow(unsigned i, T t) {
     floatingThresholdLow[i] = t;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetFloatingThresholdLow");
@@ -178,7 +178,7 @@ void reg_base<T>::SetWarpedPaddingValue(float warpedPaddingValueIn) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelNumber(unsigned int levelNumberIn) {
+void reg_base<T>::SetLevelNumber(unsigned levelNumberIn) {
     levelNumber = levelNumberIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelNumber");
@@ -186,7 +186,7 @@ void reg_base<T>::SetLevelNumber(unsigned int levelNumberIn) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLevelToPerform(unsigned int levelToPerformIn) {
+void reg_base<T>::SetLevelToPerform(unsigned levelToPerformIn) {
     levelToPerform = levelToPerformIn;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
@@ -490,7 +490,7 @@ void reg_base<T>::Initialise() {
     CheckParameters();
 
     // CREATE THE PYRAMID IMAGES
-    const unsigned int imageCount = usePyramid ? levelToPerform : 1;
+    const unsigned imageCount = usePyramid ? levelToPerform : 1;
     referencePyramid = vector<NiftiImage>(imageCount);
     floatingPyramid = vector<NiftiImage>(imageCount);
     maskPyramid = vector<unique_ptr<int[]>>(imageCount);
@@ -523,17 +523,17 @@ void reg_base<T>::Initialise() {
     }
 
     // FINEST LEVEL OF REGISTRATION
-    const unsigned int levelCount = usePyramid ? levelNumber : 1;
+    const unsigned levelCount = usePyramid ? levelNumber : 1;
     reg_createImagePyramid<T>(inputReference, referencePyramid, levelCount, imageCount);
     reg_createImagePyramid<T>(inputFloating, floatingPyramid, levelCount, imageCount);
     if (maskImage)
         reg_createMaskPyramid<T>(maskImage, maskPyramid, levelCount, imageCount);
     else
-        for (unsigned int l = 0; l < imageCount; ++l)
+        for (unsigned l = 0; l < imageCount; ++l)
             maskPyramid[l].reset(new int[referencePyramid[l].nVoxelsPerVolume()]());
 
     // SMOOTH THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < levelToPerform; l++) {
+    for (unsigned l = 0; l < levelToPerform; l++) {
         if (referenceSmoothingSigma != 0) {
             unique_ptr<bool[]> active(new bool[referencePyramid[l]->nt]);
             unique_ptr<float[]> sigma(new float[referencePyramid[l]->nt]);
@@ -556,7 +556,7 @@ void reg_base<T>::Initialise() {
     }
 
     // THRESHOLD THE INPUT IMAGES IF REQUIRED
-    for (unsigned int l = 0; l < imageCount; l++) {
+    for (unsigned l = 0; l < imageCount; l++) {
         reg_thresholdImage<T>(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
         reg_thresholdImage<T>(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]);
     }
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 3f676875..f16184d1 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -78,8 +78,8 @@ class reg_base: public InterfaceOptimiser {
     unique_ptr<T[]> floatingThresholdLow;
     bool robustRange;
     float warpedPaddingValue;
-    unsigned int levelNumber;
-    unsigned int levelToPerform;
+    unsigned levelNumber;
+    unsigned levelToPerform;
     T gradientSmoothingSigma;
     T similarityWeight;
     bool additive_mc_nmi;
@@ -150,7 +150,7 @@ class reg_base: public InterfaceOptimiser {
     virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); }
 
     // Optimisation-related functions
-    virtual void SetMaximalIterationNumber(unsigned int);
+    virtual void SetMaximalIterationNumber(unsigned);
     virtual void NoOptimisationAlongX() { optimiseX = false; }
     virtual void NoOptimisationAlongY() { optimiseY = false; }
     virtual void NoOptimisationAlongZ() { optimiseZ = false; }
@@ -185,15 +185,15 @@ class reg_base: public InterfaceOptimiser {
     virtual void SetReferenceSmoothingSigma(T);
     virtual void SetFloatingSmoothingSigma(T);
     virtual void SetGradientSmoothingSigma(T);
-    virtual void SetReferenceThresholdUp(unsigned int, T);
-    virtual void SetReferenceThresholdLow(unsigned int, T);
-    virtual void SetFloatingThresholdUp(unsigned int, T);
-    virtual void SetFloatingThresholdLow(unsigned int, T);
+    virtual void SetReferenceThresholdUp(unsigned, T);
+    virtual void SetReferenceThresholdLow(unsigned, T);
+    virtual void SetFloatingThresholdUp(unsigned, T);
+    virtual void SetFloatingThresholdLow(unsigned, T);
     virtual void UseRobustRange();
     virtual void DoNotUseRobustRange();
     virtual void SetWarpedPaddingValue(float);
-    virtual void SetLevelNumber(unsigned int);
-    virtual void SetLevelToPerform(unsigned int);
+    virtual void SetLevelNumber(unsigned);
+    virtual void SetLevelToPerform(unsigned);
     virtual void PrintOutInformation();
     virtual void DoNotPrintOutInformation();
     virtual void DoNotUsePyramidalApproach();
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 83d95d02..611d74d6 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -84,7 +84,7 @@ void reg_f3d<T>::DoNotApproximateJacobianLog() {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::SetSpacing(unsigned int i, T s) {
+void reg_f3d<T>::SetSpacing(unsigned i, T s) {
     spacing[i] = s;
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::SetSpacing");
@@ -351,9 +351,9 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
 
     double value = this->compute->GetJacobianPenaltyTerm(approx);
 
-    unsigned int maxit = 5;
+    unsigned maxit = 5;
     if (type > 0) maxit = 20;
-    unsigned int it = 0;
+    unsigned it = 0;
     while (value != value && it < maxit) {
         value = this->compute->CorrectFolding(approx);
 #ifndef NDEBUG
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 0950dbed..9125ba15 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -74,7 +74,7 @@ class reg_f3d: public reg_base<T> {
     virtual void SetJacobianLogWeight(T);
     virtual void ApproximateJacobianLog();
     virtual void DoNotApproximateJacobianLog();
-    virtual void SetSpacing(unsigned int, T);
+    virtual void SetSpacing(unsigned, T);
     virtual void NoGridRefinement() { gridRefinement = false; }
 
     // F3D2 specific options
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index f1a6823b..e218b9ec 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -207,9 +207,9 @@ double reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm(int type) {
 
     double backwardPenaltyTerm = computeBw->GetJacobianPenaltyTerm(approx);
 
-    unsigned int maxit = 5;
+    unsigned maxit = 5;
     if (type > 0) maxit = 20;
-    unsigned int it = 0;
+    unsigned it = 0;
     while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) {
         backwardPenaltyTerm = computeBw->CorrectFolding(approx);
 #ifndef NDEBUG
@@ -712,14 +712,14 @@ void reg_f3d2<T>::Initialise() {
     }
 
     // Set the floating mask image pyramid
-    const unsigned int imageCount = this->usePyramid ? this->levelToPerform : 1;
-    const unsigned int levelCount = this->usePyramid ? this->levelNumber : 1;
+    const unsigned imageCount = this->usePyramid ? this->levelToPerform : 1;
+    const unsigned levelCount = this->usePyramid ? this->levelNumber : 1;
     floatingMaskPyramid = vector<unique_ptr<int[]>>(imageCount);
 
     if (floatingMaskImage)
         reg_createMaskPyramid<T>(floatingMaskImage, floatingMaskPyramid, levelCount, imageCount);
     else
-        for (unsigned int l = 0; l < imageCount; ++l)
+        for (unsigned l = 0; l < imageCount; ++l)
             floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]());
 
 #ifdef NDEBUG
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index 511b877e..0ffd4234 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -37,7 +37,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
     }
 
     //get opencl context params
-    sContext = &ClContextSingleton::Instance();
+    sContext = &ClContextSingleton::GetInstance();
     clContext = sContext->GetContext();
     commandQueue = sContext->GetCommandQueue();
     program = sContext->CreateProgram(clKernelPath.c_str());
@@ -52,7 +52,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
     if (deformationFieldImage->nz > 1)
         kernel = clCreateKernel(program, "affineKernel3D", &errNum);
     else kernel = clCreateKernel(program, "affineKernel2D", &errNum);
-    sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel.");
+    sContext->CheckErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel.");
 
     //get cl ptrs
     clDeformationField = con->GetDeformationFieldArrayClmem();
@@ -60,7 +60,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
 
     //set some final kernel args
     errNum = clSetKernelArg(kernel, 2, sizeof(cl_mem), &clMask);
-    sContext->checkErrNum(errNum, "Error setting clMask.");
+    sContext->CheckErrNum(errNum, "Error setting clMask.");
 
 }
 /* *************************************************************** */
@@ -70,16 +70,16 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     cl_int errNum;
     std::size_t paramValueSize;
     errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
-    sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
+    sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
     cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize);
     errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
-    sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
+    sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
     maxWG = *info;
 
     //8=default value
-    unsigned int xThreads = 8;
-    unsigned int yThreads = 8;
-    unsigned int zThreads = 8;
+    unsigned xThreads = 8;
+    unsigned yThreads = 8;
+    unsigned zThreads = 8;
 
     while (xThreads * yThreads * zThreads > maxWG) {
         xThreads = xThreads / 2;
@@ -87,11 +87,11 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
         zThreads = zThreads / 2;
     }
 
-    const unsigned int xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ?
+    const unsigned xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ?
         (deformationFieldImage->nx / xThreads) : (deformationFieldImage->nx / xThreads) + 1;
-    const unsigned int yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ?
+    const unsigned yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ?
         (deformationFieldImage->ny / yThreads) : (deformationFieldImage->ny / yThreads) + 1;
-    const unsigned int zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ?
+    const unsigned zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ?
         (deformationFieldImage->nz / zThreads) : (deformationFieldImage->nz / zThreads) + 1;
     //const cl_uint dims = deformationFieldImage->nz>1?3:2;
     //Back to the old version... at least I could compile
@@ -111,21 +111,21 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
 
     cl_mem cltransMat = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                        sizeof(float) * 16, trans, &errNum);
-    sContext->checkErrNum(errNum,
+    sContext->CheckErrNum(errNum,
                           "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): ");
 
     cl_uint composition = compose;
     errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cltransMat);
-    sContext->checkErrNum(errNum, "Error setting cltransMat.");
+    sContext->CheckErrNum(errNum, "Error setting cltransMat.");
     errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &clDeformationField);
-    sContext->checkErrNum(errNum, "Error setting clDeformationField.");
+    sContext->CheckErrNum(errNum, "Error setting clDeformationField.");
     errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint3), &pms_d);
-    sContext->checkErrNum(errNum, "Error setting kernel arguments.");
+    sContext->CheckErrNum(errNum, "Error setting kernel arguments.");
     errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &composition);
-    sContext->checkErrNum(errNum, "Error setting kernel arguments.");
+    sContext->CheckErrNum(errNum, "Error setting kernel arguments.");
 
     errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
-    sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution");
+    sContext->CheckErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution");
     clFinish(commandQueue);
 
     free(trans);
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index a2d51605..07b263ae 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -7,8 +7,8 @@ ClAladinContent::ClAladinContent(nifti_image *referenceIn,
                                  int *referenceMaskIn,
                                  mat44 *transformationMatrixIn,
                                  size_t bytesIn,
-                                 const unsigned int percentageOfBlocks,
-                                 const unsigned int inlierLts,
+                                 const unsigned percentageOfBlocks,
+                                 const unsigned inlierLts,
                                  int blockStepSize) :
     AladinContent(referenceIn,
                   floatingIn,
@@ -43,7 +43,7 @@ void ClAladinContent::InitVars() {
         if (warped != nullptr)
             reg_tools_changeDatatype<float>(warped);
     }
-    sContext = &ClContextSingleton::Instance();
+    sContext = &ClContextSingleton::GetInstance();
     clContext = sContext->GetContext();
     commandQueue = sContext->GetCommandQueue();
     //numBlocks = (blockMatchingParams != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
@@ -52,32 +52,32 @@ void ClAladinContent::InitVars() {
 void ClAladinContent::AllocateClPtrs() {
     if (warped != nullptr) {
         warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
     }
     if (deformationField != nullptr) {
         deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * deformationField->nvox, deformationField->data, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
     }
     if (floating != nullptr) {
         floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * floating->nvox, floating->data, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): ");
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
         mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
         floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum);
-        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
+        sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
         free(sourceIJKMatrix_h);
     }
     if (reference != nullptr) {
         referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                              sizeof(float) * reference->nvox,
                                              reference->data, &errNum);
-        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
+        sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): ");
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
         mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
         refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum);
-        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
+        sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
         free(targetMat);
     }
     if (blockMatchingParams != nullptr) {
@@ -86,27 +86,27 @@ void ClAladinContent::AllocateClPtrs() {
             referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                                     blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
                                                     blockMatchingParams->referencePosition, &errNum);
-            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
+            sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
         }
         if (blockMatchingParams->warpedPosition != nullptr) {
             //resultPositionClmem
             warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                                  blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
                                                  blockMatchingParams->warpedPosition, &errNum);
-            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
+            sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
         }
         if (blockMatchingParams->totalBlock != nullptr) {
             //totalBlockClmem
             totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                              blockMatchingParams->totalBlockNumber * sizeof(int),
                                              blockMatchingParams->totalBlock, &errNum);
-            sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
+            sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
         }
     }
     if (referenceMask != nullptr && reference != nullptr) {
         maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                    CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum);
-        sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
+        sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
 /* *************************************************************** */
@@ -117,15 +117,15 @@ nifti_image* ClAladinContent::GetWarped() {
 /* *************************************************************** */
 nifti_image* ClAladinContent::GetDeformationField() {
     errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, deformationField->nvox * sizeof(float), deformationField->data, 0, nullptr, nullptr); //CLCONTEXT
-    sContext->checkErrNum(errNum, "Get: failed deformationField: ");
+    sContext->CheckErrNum(errNum, "Get: failed deformationField: ");
     return deformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() {
     errNum = clEnqueueReadBuffer(commandQueue, warpedPositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT
-    sContext->checkErrNum(errNum, "CLContext: failed result position: ");
+    sContext->CheckErrNum(errNum, "CLContext: failed result position: ");
     errNum = clEnqueueReadBuffer(commandQueue, referencePositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT
-    sContext->checkErrNum(errNum, "CLContext: failed target position: ");
+    sContext->CheckErrNum(errNum, "CLContext: failed target position: ");
     return blockMatchingParams;
 }
 /* *************************************************************** */
@@ -139,7 +139,7 @@ void ClAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
 
     AladinContent::SetDeformationField(deformationFieldIn);
     deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, deformationField->nvox * sizeof(float), deformationField->data, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): ");
+    sContext->CheckErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetReferenceMask(int *referenceMaskIn) {
@@ -147,7 +147,7 @@ void ClAladinContent::SetReferenceMask(int *referenceMaskIn) {
         clReleaseMemObject(maskClmem);
     AladinContent::SetReferenceMask(referenceMaskIn);
     maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, reference->nvox * sizeof(int), referenceMask, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): ");
+    sContext->CheckErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetWarped(nifti_image *warped) {
@@ -159,7 +159,7 @@ void ClAladinContent::SetWarped(nifti_image *warped) {
     }
     AladinContent::SetWarped(warped);
     warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
-    sContext->checkErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): ");
+    sContext->CheckErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
@@ -168,19 +168,19 @@ void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
         clReleaseMemObject(referencePositionClmem);
         //referencePositionClmem
         referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
     }
     if (blockMatchingParams->warpedPosition != nullptr) {
         clReleaseMemObject(warpedPositionClmem);
         //warpedPositionClmem
         warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
     }
     if (blockMatchingParams->totalBlock != nullptr) {
         clReleaseMemObject(totalBlockClmem);
         //totalBlockClmem
         totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum);
-        sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
+        sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): ");
     }
 }
 /* *************************************************************** */
@@ -257,7 +257,7 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
         if (intensity != intensity)
             intensity = 0;
         intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
         break;
     default:
         if (intensity != intensity)
@@ -280,7 +280,7 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int
 
     errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0,
                                  size * sizeof(float), buffer, 0, nullptr, nullptr);
-    sContext->checkErrNum(errNum, "Error reading warped buffer.");
+    sContext->CheckErrNum(errNum, "Error reading warped buffer.");
 
     free(image->data);
     image->datatype = type;
@@ -313,7 +313,7 @@ void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int
         FillImageData<short>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned int>(image, memoryObject, datatype);
+        FillImageData<unsigned>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_INT32:
         FillImageData<int>(image, memoryObject, datatype);
@@ -349,6 +349,6 @@ void ClAladinContent::FreeClPtrs() {
 }
 /* *************************************************************** */
 bool ClAladinContent::IsCurrentComputationDoubleCapable() {
-    return sContext->GetIsCardDoubleCapable();
+    return sContext->IsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index fa2418f4..5c11f081 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -1,89 +1,89 @@
-#pragma once
-
-#include "AladinContent.h"
-#include "ClContextSingleton.h"
-
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-class ClAladinContent: public AladinContent {
-public:
-    //constructors
-    ClAladinContent(nifti_image *referenceIn,
-                    nifti_image *floatingIn,
-                    int *referenceMaskIn = nullptr,
-                    mat44 *transformationMatrixIn = nullptr,
-                    size_t bytesIn = sizeof(float),
-                    const unsigned int percentageOfBlocks = 0,
-                    const unsigned int inlierLts = 0,
-                    int blockStepSize = 0);
-    virtual ~ClAladinContent();
-
-    virtual bool IsCurrentComputationDoubleCapable() override;
-
-    // OpenCL getters
-    virtual cl_mem GetReferenceImageArrayClmem();
-    virtual cl_mem GetFloatingImageArrayClmem();
-    virtual cl_mem GetWarpedImageClmem();
-    virtual cl_mem GetReferencePositionClmem();
-    virtual cl_mem GetWarpedPositionClmem();
-    virtual cl_mem GetDeformationFieldArrayClmem();
-    virtual cl_mem GetTotalBlockClmem();
-    virtual cl_mem GetMaskClmem();
-    virtual cl_mem GetRefMatClmem();
-    virtual cl_mem GetFloMatClmem();
-    virtual int* GetReferenceDims();
-    virtual int* GetFloatingDims();
-
-    // CPU getters with data downloaded from device
-    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
-
-private:
-    void InitVars();
-    void AllocateClPtrs();
-    void FreeClPtrs();
-
-    ClContextSingleton *sContext;
-    cl_context clContext;
-    cl_int errNum;
-    cl_command_queue commandQueue;
-
-    cl_mem referenceImageClmem;
-    cl_mem floatingImageClmem;
-    cl_mem warpedImageClmem;
-    cl_mem deformationFieldClmem;
-    cl_mem referencePositionClmem;
-    cl_mem warpedPositionClmem;
-    cl_mem totalBlockClmem;
-    cl_mem maskClmem;
-    cl_mem refMatClmem;
-    cl_mem floMatClmem;
-
-    int referenceDims[4];
-    int floatingDims[4];
-
-    unsigned int nVoxels;
-
-    void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
-    template<class T>
-    void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
-    template<class T>
-    T FillWarpedImageData(float intensity, int datatype);
-
-#ifdef NR_TESTING
-public:
-#else
-protected:
-#endif
-    // Functions for testing
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
-};
+#pragma once
+
+#include "AladinContent.h"
+#include "ClContextSingleton.h"
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+class ClAladinContent: public AladinContent {
+public:
+    //constructors
+    ClAladinContent(nifti_image *referenceIn,
+                    nifti_image *floatingIn,
+                    int *referenceMaskIn = nullptr,
+                    mat44 *transformationMatrixIn = nullptr,
+                    size_t bytesIn = sizeof(float),
+                    const unsigned percentageOfBlocks = 0,
+                    const unsigned inlierLts = 0,
+                    int blockStepSize = 0);
+    virtual ~ClAladinContent();
+
+    virtual bool IsCurrentComputationDoubleCapable() override;
+
+    // OpenCL getters
+    virtual cl_mem GetReferenceImageArrayClmem();
+    virtual cl_mem GetFloatingImageArrayClmem();
+    virtual cl_mem GetWarpedImageClmem();
+    virtual cl_mem GetReferencePositionClmem();
+    virtual cl_mem GetWarpedPositionClmem();
+    virtual cl_mem GetDeformationFieldArrayClmem();
+    virtual cl_mem GetTotalBlockClmem();
+    virtual cl_mem GetMaskClmem();
+    virtual cl_mem GetRefMatClmem();
+    virtual cl_mem GetFloMatClmem();
+    virtual int* GetReferenceDims();
+    virtual int* GetFloatingDims();
+
+    // CPU getters with data downloaded from device
+    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped() override;
+
+private:
+    void InitVars();
+    void AllocateClPtrs();
+    void FreeClPtrs();
+
+    ClContextSingleton *sContext;
+    cl_context clContext;
+    cl_int errNum;
+    cl_command_queue commandQueue;
+
+    cl_mem referenceImageClmem;
+    cl_mem floatingImageClmem;
+    cl_mem warpedImageClmem;
+    cl_mem deformationFieldClmem;
+    cl_mem referencePositionClmem;
+    cl_mem warpedPositionClmem;
+    cl_mem totalBlockClmem;
+    cl_mem maskClmem;
+    cl_mem refMatClmem;
+    cl_mem floMatClmem;
+
+    int referenceDims[4];
+    int floatingDims[4];
+
+    unsigned nVoxels;
+
+    void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
+    template<class T>
+    void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
+    template<class T>
+    T FillWarpedImageData(float intensity, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+};
diff --git a/reg-lib/cl/ClAladinContentCreator.h b/reg-lib/cl/ClAladinContentCreator.h
index a1f2f5fe..84442142 100644
--- a/reg-lib/cl/ClAladinContentCreator.h
+++ b/reg-lib/cl/ClAladinContentCreator.h
@@ -10,8 +10,8 @@ class ClAladinContentCreator: public AladinContentCreator {
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
-                                  const unsigned int percentageOfBlocks = 0,
-                                  const unsigned int inlierLts = 0,
+                                  const unsigned percentageOfBlocks = 0,
+                                  const unsigned inlierLts = 0,
                                   int blockStepSize = 0) override {
         return new ClAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
     }
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index 4dd1bb8d..9cea76c7 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -35,7 +35,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
    }
 
    //get opencl context params
-   sContext = &ClContextSingleton::Instance();
+   sContext = &ClContextSingleton::GetInstance();
    clContext = sContext->GetContext();
    commandQueue = sContext->GetCommandQueue();
    program = sContext->CreateProgram(clKernelPath.c_str());
@@ -47,7 +47,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
    } else {
       kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum);
    }
-   sContext->checkErrNum(errNum, "Error setting bm kernel.");
+   sContext->CheckErrNum(errNum, "Error setting bm kernel.");
 
    //get cl ptrs
    clTotalBlock = con->GetTotalBlockClmem();
@@ -73,7 +73,7 @@ void ClBlockMatchingKernel::Calculate() {
    params->definedActiveBlockNumber = 0;
    cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                           sizeof(int), &(params->definedActiveBlockNumber), &errNum);
-   sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
+   sContext->CheckErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) ");
 
    const cl_uint4 imageSize = {{(cl_uint)reference->nx,
       (cl_uint)reference->ny,
@@ -84,7 +84,7 @@ void ClBlockMatchingKernel::Calculate() {
       (size_t)params->blockNumber[1] * 4,
       (size_t)params->blockNumber[2] * 4};
    size_t localWorkSize[3] = {4, 4, 4};
-   unsigned int sMemSize = 1728; // (3*4)^3
+   unsigned sMemSize = 1728; // (3*4)^3
    if (reference->nz == 1) {
       globalWorkSize[2] = 1;
       localWorkSize[2] = 1;
@@ -92,36 +92,36 @@ void ClBlockMatchingKernel::Calculate() {
    }
 
    errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr);
-   sContext->checkErrNum(errNum, "Error setting shared memory.");
+   sContext->CheckErrNum(errNum, "Error setting shared memory.");
    errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clWarpedImageArray);
-   sContext->checkErrNum(errNum, "Error setting resultImageArray.");
+   sContext->CheckErrNum(errNum, "Error setting resultImageArray.");
    errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &clReferenceImageArray);
-   sContext->checkErrNum(errNum, "Error setting targetImageArray.");
+   sContext->CheckErrNum(errNum, "Error setting targetImageArray.");
    errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &clWarpedPosition);
-   sContext->checkErrNum(errNum, "Error setting resultPosition.");
+   sContext->CheckErrNum(errNum, "Error setting resultPosition.");
    errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &clReferencePosition);
-   sContext->checkErrNum(errNum, "Error setting targetPosition.");
+   sContext->CheckErrNum(errNum, "Error setting targetPosition.");
    errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &clTotalBlock);
-   sContext->checkErrNum(errNum, "Error setting mask.");
+   sContext->CheckErrNum(errNum, "Error setting mask.");
    errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &clMask);
-   sContext->checkErrNum(errNum, "Error setting mask.");
+   sContext->CheckErrNum(errNum, "Error setting mask.");
    errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &clReferenceMat);
-   sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz.");
+   sContext->CheckErrNum(errNum, "Error setting targetMatrix_xyz.");
    errNum |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &cldefinedBlock);
-   sContext->checkErrNum(errNum, "Error setting cldefinedBlock.");
+   sContext->CheckErrNum(errNum, "Error setting cldefinedBlock.");
    errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize);
-   sContext->checkErrNum(errNum, "Error setting image size.");
+   sContext->CheckErrNum(errNum, "Error setting image size.");
 
    errNum = clEnqueueNDRangeKernel(commandQueue, kernel, params->dim, nullptr,
                                    globalWorkSize, localWorkSize, 0, nullptr, nullptr);
-   sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution ");
+   sContext->CheckErrNum(errNum, "Error queuing blockmatching kernel for execution ");
 
    errNum = clFinish(commandQueue);
-   sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel");
+   sContext->CheckErrNum(errNum, "Error after clFinish ClBlockMatchingKernel");
 
    errNum = clEnqueueReadBuffer(commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int),
                                 &(params->definedActiveBlockNumber), 0, nullptr, nullptr);
-   sContext->checkErrNum(errNum, "Error reading  var after ClBlockMatchingKernel execution ");
+   sContext->CheckErrNum(errNum, "Error reading  var after ClBlockMatchingKernel execution ");
 
    if (params->definedActiveBlockNumber == 0) {
       reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution");
diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp
index 38695780..c3d3d1fc 100644
--- a/reg-lib/cl/ClContextSingleton.cpp
+++ b/reg-lib/cl/ClContextSingleton.cpp
@@ -1,365 +1,335 @@
 #include "ClContextSingleton.h"
 
 /* *************************************************************** */
-ClContextSingleton::ClContextSingleton()
-{
-	this->commandQueue = nullptr;
-	this->context = nullptr;
-	this->clIdx = 999;
-	init();
+ClContextSingleton::ClContextSingleton() {
+    this->commandQueue = nullptr;
+    this->context = nullptr;
+    this->clIdx = 999;
+    Init();
 }
 /* *************************************************************** */
-void ClContextSingleton::init()
-{
-	// Query the number of platforms
-	cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms);
-	checkErrNum(errNum, "Failed to find CL platforms.");
+void ClContextSingleton::Init() {
+    // Query the number of platforms
+    cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms);
+    CheckErrNum(errNum, "Failed to find CL platforms.");
 
-	this->platformIds = (cl_platform_id *) alloca(sizeof(cl_platform_id) * this->numPlatforms);
-	errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr);
-	checkErrNum(errNum, "Failed to find any OpenCL platforms.");
+    this->platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * this->numPlatforms);
+    errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr);
+    CheckErrNum(errNum, "Failed to find any OpenCL platforms.");
 
-	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices);
-	checkErrNum(errNum, "Failed to find OpenCL devices.");
+    errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices);
+    CheckErrNum(errNum, "Failed to find OpenCL devices.");
 
-	this->devices = new cl_device_id[this->numDevices];
-	errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr);
+    this->devices = new cl_device_id[this->numDevices];
+    errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr);
 
-	PickCard(this->clIdx);
+    PickCard(this->clIdx);
 
-	cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) this->platformIds[0], 0 };
-	this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum);
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)this->platformIds[0], 0 };
+    this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum);
 
-	if (errNum != CL_SUCCESS) {
-		std::cout << "Could not create GPU context, trying CPU..." << std::endl;
-		context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
-													 nullptr, nullptr, &errNum);
-		if (errNum != CL_SUCCESS) {
-			std::cerr << "Failed to create an OpenCL GPU or CPU context."
-						 << std::endl;
-			return;
-		}
-	}
+    if (errNum != CL_SUCCESS) {
+        std::cout << "Could not create GPU context, trying CPU..." << std::endl;
+        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &errNum);
+        if (errNum != CL_SUCCESS) {
+            std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
+            return;
+        }
+    }
 
-	this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr);
-	checkErrNum(errNum, "Failed to create commandQueue for device ");
+    this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr);
+    CheckErrNum(errNum, "Failed to create commandQueue for device ");
 
-	this->deviceId = this->devices[this->clIdx];
-	queryGridDims();
+    this->deviceId = this->devices[this->clIdx];
+    QueryGridDims();
 }
 /* *************************************************************** */
-void ClContextSingleton::SetClIdx(int clIdxIn)
-{
-   clIdx=clIdxIn;
-   this->init();
+void ClContextSingleton::SetClIdx(int clIdxIn) {
+    clIdx = clIdxIn;
+    this->Init();
 }
 /* *************************************************************** */
-void ClContextSingleton::queryGridDims()
-{
-	std::size_t paramValueSize;
-	cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
-	checkErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE");
+void ClContextSingleton::QueryGridDims() {
+    std::size_t paramValueSize;
+    cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
+    CheckErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE");
 
-	size_t* info = (size_t *) alloca(sizeof(size_t) * paramValueSize);
-	errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
-	checkErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE2");
-	this->maxThreads = *info;
-	this->maxBlocks = 65535;
+    size_t *info = (size_t*)alloca(sizeof(size_t) * paramValueSize);
+    errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
+    CheckErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE2");
+    this->maxThreads = *info;
+    this->maxBlocks = 65535;
 }
 /* *************************************************************** */
-void ClContextSingleton::PickCard(cl_uint deviceId)
-{
-   cl_int errNum;
-   std::size_t paramValueSize;
-   cl_uint maxProcs = 0;
-   this->clIdx = 0;
-   this->isCardDoubleCapable = 0;
+void ClContextSingleton::PickCard(cl_uint deviceId) {
+    cl_int errNum;
+    std::size_t paramValueSize;
+    cl_uint maxProcs = 0;
+    this->clIdx = 0;
+    this->isCardDoubleCapable = 0;
 
-   std::size_t paramValueSizeDOUBE1;
-   std::size_t paramValueSizeDOUBE2;
+    std::size_t paramValueSizeDOUBE1;
+    std::size_t paramValueSizeDOUBE2;
 
-   if(deviceId < this->numDevices){
-      this->clIdx=deviceId;
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint numProcs = *info;
-      maxProcs = numProcs;
+    if (deviceId < this->numDevices) {
+        this->clIdx = deviceId;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize);
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint numProcs = *info;
+        maxProcs = numProcs;
 
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint numD1 = *infoD1;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint numD1 = *infoD1;
 
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-      errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
-      checkErrNum(errNum, "Failed to find OpenCL device info ");
-      cl_uint numD2 = *infoD2;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info ");
+        cl_uint numD2 = *infoD2;
 
-      if(numD1 > 0 || numD2 > 0) {
-          this->isCardDoubleCapable = true;
-      } else {
-          this->isCardDoubleCapable = false;
-      }
-      return;
-   }
-   else if(deviceId != 999){
-      reg_print_msg_error("The specified opencl card id is not defined");
-      reg_print_msg_error("Run reg_gpuinfo to get the proper id");
-      reg_exit();
-   }
+        if (numD1 > 0 || numD2 > 0) {
+            this->isCardDoubleCapable = true;
+        } else {
+            this->isCardDoubleCapable = false;
+        }
+        return;
+    } else if (deviceId != 999) {
+        reg_print_msg_error("The specified opencl card id is not defined");
+        reg_print_msg_error("Run reg_gpuinfo to get the proper id");
+        reg_exit();
+    }
 
-   for(cl_uint i = 0; i < this->numDevices; ++i) {
-      cl_device_type dev_type;
-      clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr);
-      if (dev_type == CL_DEVICE_TYPE_GPU) {
-         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
-         checkErrNum(errNum, "Failed to find OpenCL device info ");
-         cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize);
-         errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
-         checkErrNum(errNum, "Failed to find OpenCL device info ");
-         cl_uint numProcs = *info;
-         const bool found = numProcs > maxProcs;
-         this->clIdx = found ? i : this->clIdx;
-         maxProcs = found ? numProcs : maxProcs;
+    for (cl_uint i = 0; i < this->numDevices; ++i) {
+        cl_device_type dev_type;
+        clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr);
+        if (dev_type == CL_DEVICE_TYPE_GPU) {
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
+            CheckErrNum(errNum, "Failed to find OpenCL device info ");
+            cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize);
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
+            CheckErrNum(errNum, "Failed to find OpenCL device info ");
+            cl_uint numProcs = *info;
+            const bool found = numProcs > maxProcs;
+            this->clIdx = found ? i : this->clIdx;
+            maxProcs = found ? numProcs : maxProcs;
 
-         if(found) {
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
-            checkErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
-            checkErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint numD1 = *infoD1;
+            if (found) {
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
+                CheckErrNum(errNum, "Failed to find OpenCL device info ");
+                cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
+                CheckErrNum(errNum, "Failed to find OpenCL device info ");
+                cl_uint numD1 = *infoD1;
 
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
-            checkErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
-            checkErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint numD2 = *infoD2;
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
+                CheckErrNum(errNum, "Failed to find OpenCL device info ");
+                cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
+                CheckErrNum(errNum, "Failed to find OpenCL device info ");
+                cl_uint numD2 = *infoD2;
 
-            if(numD1 > 0 || numD2 > 0) {
-               this->isCardDoubleCapable = true;
-            } else {
-               this->isCardDoubleCapable = false;
+                if (numD1 > 0 || numD2 > 0) {
+                    this->isCardDoubleCapable = true;
+                } else {
+                    this->isCardDoubleCapable = false;
+                }
             }
-         }
-      }
-   }
+        }
+    }
 }
 /* *************************************************************** */
-cl_program ClContextSingleton::CreateProgram(const char* fileName)
-{
-	cl_int errNum;
-	cl_program program;
-	std::ifstream kernelFile(fileName, std::ios::in);
-	if (!kernelFile.is_open()) {
-		std::cerr << "Failed to open file for reading: " << fileName << std::endl;
-		return nullptr;
-	}
-	std::ostringstream oss;
-	oss << kernelFile.rdbuf();
-	std::string srcStdStr = oss.str();
-	const char *srcStr = srcStdStr.c_str();
-	program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, nullptr, &errNum);
-	checkErrNum(errNum, "Failed to create CL program");
+cl_program ClContextSingleton::CreateProgram(const char *fileName) {
+    cl_int errNum;
+    cl_program program;
+    std::ifstream kernelFile(fileName, std::ios::in);
+    if (!kernelFile.is_open()) {
+        std::cerr << "Failed to open file for reading: " << fileName << std::endl;
+        return nullptr;
+    }
+    std::ostringstream oss;
+    oss << kernelFile.rdbuf();
+    std::string srcStdStr = oss.str();
+    const char *srcStr = srcStdStr.c_str();
+    program = clCreateProgramWithSource(this->context, 1, (const char**)&srcStr, nullptr, &errNum);
+    CheckErrNum(errNum, "Failed to create CL program");
 
-	errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
-	if (errNum != CL_SUCCESS) {
-		checDebugKernelInfo(program,this->deviceId, (char *)"Errors in kernel: ");
-		//create log
-		size_t length;
-		char buffer[2048];
-		clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length);
-		std::cout<<"--- Build log ---\n "<<buffer<<std::endl;
-		reg_exit();
-	}
+    errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
+    if (errNum != CL_SUCCESS) {
+        CheckDebugKernelInfo(program, this->deviceId, (char*)"Errors in kernel: ");
+        //create log
+        size_t length;
+        char buffer[2048];
+        clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length);
+        std::cout << "--- Build log ---\n " << buffer << std::endl;
+        reg_exit();
+    }
 
-	return program;
+    return program;
 }
 /* *************************************************************** */
-void ClContextSingleton::shutDown()
-{
-	/*std::cout << "Shutting down cl" << std::endl;*/
-	if (this->context != 0) clReleaseContext(this->context);
-	if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue);
-
-	 delete[] this->devices;
+ClContextSingleton::~ClContextSingleton() {
+    /*std::cout << "Shutting down cl" << std::endl;*/
+    if (this->context != 0) clReleaseContext(this->context);
+    if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue);
+    delete[] this->devices;
 }
 /* *************************************************************** */
-void ClContextSingleton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message)
-{
-	char buffer[10240];
-
-	clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
-	reg_print_fct_error(message);
-	reg_print_fct_error(buffer);
+void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) {
+    char buffer[10240];
+    clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
+    reg_print_fct_error(message);
+    reg_print_fct_error(buffer);
 }
 /* *************************************************************** */
-void ClContextSingleton::checkErrNum(cl_int errNum, std::string message)
-{
-	if (errNum != CL_SUCCESS)
-	{
-		reg_print_msg_error(message.c_str());
-		switch(errNum){
-		case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND");break;
-		case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE");break;
-		case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE");break;
-		case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE");break;
-		case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES");break;
-		case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY");break;
-		case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE");break;
-		case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP");break;
-		case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH");break;
-		case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED");break;
-		case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE");break;
-		case -12: reg_print_msg_error("CL_MAP_FAILURE");break;
-		case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET");break;
-		case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST");break;
-		case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE");break;
-		case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE");break;
-		case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE");break;
-		case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED");break;
-		case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE");break;
-		case -30: reg_print_msg_error("CL_INVALID_VALUE");break;
-		case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE");break;
-		case -32: reg_print_msg_error("CL_INVALID_PLATFORM");break;
-		case -33: reg_print_msg_error("CL_INVALID_DEVICE");break;
-		case -34: reg_print_msg_error("CL_INVALID_CONTEXT");break;
-		case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES");break;
-		case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE");break;
-		case -37: reg_print_msg_error("CL_INVALID_HOST_PTR");break;
-		case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT");break;
-		case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR");break;
-		case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE");break;
-		case -41: reg_print_msg_error("CL_INVALID_SAMPLER");break;
-		case -42: reg_print_msg_error("CL_INVALID_BINARY");break;
-		case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS");break;
-		case -44: reg_print_msg_error("CL_INVALID_PROGRAM");break;
-		case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE");break;
-		case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME");break;
-		case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION");break;
-		case -48: reg_print_msg_error("CL_INVALID_KERNEL");break;
-		case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX");break;
-		case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE");break;
-		case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE");break;
-		case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS");break;
-		case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION");break;
-		case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE");break;
-		case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE");break;
-		case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET");break;
-		case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST");break;
-		case -58: reg_print_msg_error("CL_INVALID_EVENT");break;
-		case -59: reg_print_msg_error("CL_INVALID_OPERATION");break;
-		case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT");break;
-		case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE");break;
-		case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL");break;
-		case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE");break;
-		case -64: reg_print_msg_error("CL_INVALID_PROPERTY");break;
-		case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR");break;
-		case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS");break;
-		case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS");break;
-		case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT");break;
-		default : reg_print_msg_error("Unknown error type");break;
-		}
-		reg_exit();
-	}
+void ClContextSingleton::CheckErrNum(cl_int errNum, std::string message) {
+    if (errNum != CL_SUCCESS) {
+        reg_print_msg_error(message.c_str());
+        switch (errNum) {
+        case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND"); break;
+        case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE"); break;
+        case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE"); break;
+        case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE"); break;
+        case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES"); break;
+        case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY"); break;
+        case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE"); break;
+        case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP"); break;
+        case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH"); break;
+        case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED"); break;
+        case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE"); break;
+        case -12: reg_print_msg_error("CL_MAP_FAILURE"); break;
+        case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET"); break;
+        case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); break;
+        case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE"); break;
+        case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE"); break;
+        case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE"); break;
+        case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED"); break;
+        case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE"); break;
+        case -30: reg_print_msg_error("CL_INVALID_VALUE"); break;
+        case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE"); break;
+        case -32: reg_print_msg_error("CL_INVALID_PLATFORM"); break;
+        case -33: reg_print_msg_error("CL_INVALID_DEVICE"); break;
+        case -34: reg_print_msg_error("CL_INVALID_CONTEXT"); break;
+        case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES"); break;
+        case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE"); break;
+        case -37: reg_print_msg_error("CL_INVALID_HOST_PTR"); break;
+        case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT"); break;
+        case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); break;
+        case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE"); break;
+        case -41: reg_print_msg_error("CL_INVALID_SAMPLER"); break;
+        case -42: reg_print_msg_error("CL_INVALID_BINARY"); break;
+        case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS"); break;
+        case -44: reg_print_msg_error("CL_INVALID_PROGRAM"); break;
+        case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE"); break;
+        case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME"); break;
+        case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION"); break;
+        case -48: reg_print_msg_error("CL_INVALID_KERNEL"); break;
+        case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX"); break;
+        case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE"); break;
+        case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE"); break;
+        case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS"); break;
+        case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION"); break;
+        case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE"); break;
+        case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE"); break;
+        case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET"); break;
+        case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST"); break;
+        case -58: reg_print_msg_error("CL_INVALID_EVENT"); break;
+        case -59: reg_print_msg_error("CL_INVALID_OPERATION"); break;
+        case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT"); break;
+        case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE"); break;
+        case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL"); break;
+        case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE"); break;
+        case -64: reg_print_msg_error("CL_INVALID_PROPERTY"); break;
+        case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR"); break;
+        case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS"); break;
+        case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS"); break;
+        case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT"); break;
+        default: reg_print_msg_error("Unknown error type"); break;
+        }
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-cl_context ClContextSingleton::GetContext()
-{
-	return this->context;
+cl_context ClContextSingleton::GetContext() {
+    return this->context;
 }
 /* *************************************************************** */
-cl_device_id ClContextSingleton::GetDeviceId()
-{
-	return this->deviceId;
+cl_device_id ClContextSingleton::GetDeviceId() {
+    return this->deviceId;
 }
 /* *************************************************************** */
-cl_device_id* ClContextSingleton::GetDevices()
-{
-	return this->devices;
+cl_device_id* ClContextSingleton::GetDevices() {
+    return this->devices;
 }
 /* *************************************************************** */
-cl_command_queue ClContextSingleton::GetCommandQueue()
-{
-	return this->commandQueue;
+cl_command_queue ClContextSingleton::GetCommandQueue() {
+    return this->commandQueue;
 }
 /* *************************************************************** */
-cl_uint ClContextSingleton::GetNumPlatforms()
-{
-	return this->numPlatforms;
+cl_uint ClContextSingleton::GetNumPlatforms() {
+    return this->numPlatforms;
 }
 /* *************************************************************** */
-cl_platform_id* ClContextSingleton::GetPlatformIds()
-{
-	return this->platformIds;
+cl_platform_id* ClContextSingleton::GetPlatformIds() {
+    return this->platformIds;
 }
 /* *************************************************************** */
-cl_uint ClContextSingleton::GetNumDevices()
-{
-	return this->numDevices;
+cl_uint ClContextSingleton::GetNumDevices() {
+    return this->numDevices;
 }
 /* *************************************************************** */
-size_t ClContextSingleton::GetMaxThreads()
-{
-	return this->maxThreads;
+size_t ClContextSingleton::GetMaxThreads() {
+    return this->maxThreads;
 }
 /* *************************************************************** */
-bool ClContextSingleton::GetIsCardDoubleCapable()
-{
-	 return this->isCardDoubleCapable;
+bool ClContextSingleton::IsCardDoubleCapable() {
+    return this->isCardDoubleCapable;
 }
 /* *************************************************************** */
-unsigned int ClContextSingleton::GetMaxBlocks()
-{
-	return this->maxBlocks;
+unsigned ClContextSingleton::GetMaxBlocks() {
+    return this->maxBlocks;
 }
 /* *************************************************************** */
-size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel)
-{
-	size_t local;
-	// Get the maximum work group size for executing the kernel on the device
-	cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
-	checkErrNum(err, "Error: Failed to retrieve kernel work group info!");
-
-	return local;
+size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel) {
+    size_t local;
+    // Get the maximum work group size for executing the kernel on the device
+    cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
+    CheckErrNum(err, "Error: Failed to retrieve kernel work group info!");
+    return local;
 }
 /* *************************************************************** */
-cl_kernel ClContextSingleton::dummyKernel(cl_device_id deviceIdIn) {
-
-   const char *source = "\n"
-            "__kernel void dummy(                                                \n"
-         "   __global float* in,                                              \n"
-         "   __global float* out,                                             \n"
-            "   const unsigned int count)                                        \n"
-            "{                                                                   \n"
-            "   int i = get_global_id(0);                                        \n"
-            "   if(i < count)                                                    \n"
-            "       out[i] = in[i] * out[i];                                     \n"
-            "}                                                                   \n"
-         "\n";
+cl_kernel ClContextSingleton::DummyKernel(cl_device_id deviceIdIn) {
+    const char *source = "\n"
+        "__kernel void dummy(                                \n"
+        "   __global float* in,                              \n"
+        "   __global float* out,                             \n"
+        "   const unsigned count)                            \n"
+        "{                                                   \n"
+        "   int i = get_global_id(0);                        \n"
+        "   if(i < count)                                    \n"
+        "       out[i] = in[i] * out[i];                     \n"
+        "}                                                   \n"
+        "\n";
 
-	cl_int  err ;
-	cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, nullptr, &err);
-	checkErrNum(err, "Failed to create CL program");
-	err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
-	if (err != CL_SUCCESS) checDebugKernelInfo(program,deviceIdIn, (char *)"Errors in kernel: ");
+    cl_int  err;
+    cl_program program = clCreateProgramWithSource(this->context, 1, (const char **)&source, nullptr, &err);
+    CheckErrNum(err, "Failed to create CL program");
+    err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
+    if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, (char *)"Errors in kernel: ");
 
-	// Create the compute kernel in the program we wish to run
-	//
-	cl_kernel kernel = clCreateKernel(program, "dummy", &err);
-	if (!kernel || err != CL_SUCCESS)
-	{
-		reg_print_fct_error("Error: Failed to create compute kernel!");
-		return nullptr;
-	}
-	return kernel;
+    // Create the compute kernel in the program we wish to run
+    cl_kernel kernel = clCreateKernel(program, "dummy", &err);
+    if (!kernel || err != CL_SUCCESS) {
+        reg_print_fct_error("Error: Failed to create compute kernel!");
+        return nullptr;
+    }
+    return kernel;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h
index 99020b7a..c574933d 100644
--- a/reg-lib/cl/ClContextSingleton.h
+++ b/reg-lib/cl/ClContextSingleton.h
@@ -14,70 +14,53 @@
 #include <sstream>
 
 
-// Declaration
-class ClContextSingleton
-{
+class ClContextSingleton {
 public:
-
-	static ClContextSingleton& Instance()
-	{
-		static ClContextSingleton instance; // Guaranteed to be destroyed.
-		// Instantiated on first use.
-		return instance;
-	}
-
-	void queryGridDims();
-	void CreateContext();
-	void checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message);
-	void CreateCommandQueue();
-	void init();
-	cl_kernel dummyKernel(cl_device_id deviceIdIn);
-	void SetClIdx(int clIdxIn);
-
-	cl_program CreateProgram( const char* fileName);
-
-
-	void Cleanup(cl_program program, cl_kernel kernel, cl_mem* memObjects, int length);
-	void checkErrNum(cl_int errNum, std::string message);
-	void shutDown();
-
-	cl_context GetContext();
-	cl_device_id GetDeviceId();
-	cl_device_id* GetDevices();
-	cl_command_queue GetCommandQueue();
-	cl_uint GetNumPlatforms();
-	cl_platform_id* GetPlatformIds();
-	cl_uint GetNumDevices();
-	size_t GetMaxThreads();
-
-	unsigned int GetMaxBlocks();
-    bool GetIsCardDoubleCapable();
-
-	size_t GetWarpGroupLength(cl_kernel kernel);
+    ClContextSingleton(ClContextSingleton const&) = delete;
+    void operator=(ClContextSingleton const&) = delete;
+
+    static ClContextSingleton& GetInstance() {
+        // Instantiated on first use.
+        static ClContextSingleton instance; // Guaranteed to be destroyed.
+        return instance;
+    }
+
+    cl_program CreateProgram(const char *fileName);
+    void CheckErrNum(cl_int errNum, std::string message);
+    cl_kernel DummyKernel(cl_device_id deviceIdIn);
+    void SetClIdx(int clIdxIn);
+
+    cl_context GetContext();
+    cl_device_id GetDeviceId();
+    cl_device_id* GetDevices();
+    cl_command_queue GetCommandQueue();
+    cl_uint GetNumPlatforms();
+    cl_platform_id* GetPlatformIds();
+    cl_uint GetNumDevices();
+    size_t GetMaxThreads();
+    unsigned GetMaxBlocks();
+    size_t GetWarpGroupLength(cl_kernel kernel);
+    bool IsCardDoubleCapable();
 
 private:
-	static ClContextSingleton* _instance;
-
-	ClContextSingleton();
-    ~ClContextSingleton() {
-        shutDown();
-	}
-
-	ClContextSingleton(ClContextSingleton const&);// Don't Implement
-	void operator=(ClContextSingleton const&); // Don't implement
-
-	void PickCard(cl_uint deviceId);
-
-	cl_context context;
-	cl_device_id deviceId;
-	cl_device_id *devices;
-	cl_command_queue commandQueue;
-	cl_uint numPlatforms;
-	cl_platform_id* platformIds;
-	cl_uint  numDevices;
-	size_t maxThreads;
+    ClContextSingleton();
+    ~ClContextSingleton();
+
+    void Init();
+    void PickCard(cl_uint deviceId);
+    void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char *message);
+    void QueryGridDims();
+
+    cl_context context;
+    cl_device_id deviceId;
+    cl_device_id *devices;
+    cl_command_queue commandQueue;
+    cl_uint numPlatforms;
+    cl_platform_id *platformIds;
+    cl_uint numDevices;
+    size_t maxThreads;
 
     bool isCardDoubleCapable;
-	unsigned int maxBlocks;
-	unsigned clIdx;
+    unsigned maxBlocks;
+    unsigned clIdx;
 };
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 82da961e..29ff7f36 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -36,7 +36,7 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern
     }
 
     //get opencl context params
-    sContext = &ClContextSingleton::Instance();
+    sContext = &ClContextSingleton::GetInstance();
     clContext = sContext->GetContext();
     commandQueue = sContext->GetCommandQueue();
     program = sContext->CreateProgram(clKernelPath.c_str());
@@ -79,13 +79,13 @@ void ClResampleImageKernel::Calculate(int interp,
         reg_print_msg_error("The image dimension is not supported. Exit.");
         reg_exit();
     }
-    sContext->checkErrNum(errNum, "Error setting kernel ResampleImage.");
+    sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage.");
 
     const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage);
-    const unsigned int maxThreads = sContext->GetMaxThreads();
-    const unsigned int maxBlocks = sContext->GetMaxBlocks();
+    const unsigned maxThreads = sContext->GetMaxThreads();
+    const unsigned maxBlocks = sContext->GetMaxBlocks();
 
-    unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads;
+    unsigned blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads;
     blocks = std::min(blocks, maxBlocks);
 
     const cl_uint dims = 1;
@@ -105,30 +105,30 @@ void ClResampleImageKernel::Calculate(int interp,
     int datatype = this->floatingImage->datatype;
 
     errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 0.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 0.");
     errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clDeformationField);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 1.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 1.");
     errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clWarped);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 2.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 2.");
     errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clMask);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 3.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 3.");
     errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &this->floMat);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 4.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 4.");
     errNum |= clSetKernelArg(kernel, 5, sizeof(cl_long2), &voxelNumber);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 5.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 5.");
     errNum |= clSetKernelArg(kernel, 6, sizeof(cl_uint3), &fi_xyz);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 6.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 6.");
     errNum |= clSetKernelArg(kernel, 7, sizeof(cl_uint2), &wi_tu);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 7.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 7.");
     errNum |= clSetKernelArg(kernel, 8, sizeof(float), &paddingValue);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 8.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 8.");
     errNum |= clSetKernelArg(kernel, 9, sizeof(cl_int), &interp);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 9.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 9.");
     errNum |= clSetKernelArg(kernel, 10, sizeof(cl_int), &datatype);
-    sContext->checkErrNum(errNum, "Error setting interp kernel arguments 10.");
+    sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 10.");
 
     errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
-    sContext->checkErrNum(errNum, "Error queuing interp kernel for execution: ");
+    sContext->CheckErrNum(errNum, "Error queuing interp kernel for execution: ");
 
     clFinish(commandQueue);
 }
diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h
index 6a51408b..a4831445 100644
--- a/reg-lib/cl/InfoDevice.h
+++ b/reg-lib/cl/InfoDevice.h
@@ -22,12 +22,12 @@ class DeviceLog {
 	{
 		std::size_t paramValueSize;
 		std::string clInfo;
-		ClContextSingleton *sContext = &ClContextSingleton::Instance();
+		ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
 
-		sContext->checkErrNum(clGetDeviceInfo(id, name, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
+		sContext->CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
 
 		T * field = (T *) alloca(sizeof(T) * paramValueSize);
-		sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
+		sContext->CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
 
 		switch (name) {
 		case CL_DEVICE_TYPE: {
@@ -77,7 +77,7 @@ class DeviceLog {
 		case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
 				cl_uint maxWorkItemDimensions;
 
-				sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
+				sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
 				std::cout << str << ":\t";
 				for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
 					std::cout << field[i] << " ";
@@ -101,9 +101,9 @@ class DeviceLog {
 	{
 		cl_int errNum;
 		size_t local;
-		ClContextSingleton *sContext = &ClContextSingleton::Instance();
+		ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
 
-		errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
+		errNum = clGetKernelWorkGroupInfo(sContext->DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
 
 		switch (name) {
 		case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp
index aa9a56d8..ee0d9671 100644
--- a/reg-lib/cl/_reg_openclinfo.cpp
+++ b/reg-lib/cl/_reg_openclinfo.cpp
@@ -2,7 +2,7 @@
 
 void showCLInfo(void)
 {
-   ClContextSingleton *sContext = &ClContextSingleton::Instance();
+   ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
    cl_uint numPlatforms = sContext->GetNumPlatforms();
 
    for (cl_uint i = 0; i < numPlatforms; i++)
@@ -15,23 +15,23 @@ void showCLInfo(void)
       // Iterate through each device, displaying associated information
       for (cl_uint j = 0; j < numDevices; j++)
       {
-         printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned int)j);
+         printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned)j);
          DeviceLog<char >::show(devices[j], CL_DEVICE_NAME, "Device Name");
 //         DeviceLog<char >::show(devices[j], CL_DEVICE_VENDOR, "**** CL_DEVICE_VENDOR");
 //         DeviceLog<char >::show(devices[j], CL_DRIVER_VERSION, "**** CL_DRIVER_VERSION");
          DeviceLog<char >::show(devices[j], CL_DEVICE_VERSION, "OpenCL version");
-         DeviceLog<long long unsigned int>::show(devices[j], CL_DEVICE_TYPE, "Device type");
-         DeviceLog<unsigned int>::show(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, "Multiprocessor number");
+         DeviceLog<long long unsigned>::show(devices[j], CL_DEVICE_TYPE, "Device type");
+         DeviceLog<unsigned>::show(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, "Multiprocessor number");
 //         DeviceLog<cl_uint>::show(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
          DeviceLog<size_t>::showKernelInfo(devices[j], CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE");
 //         DeviceLog<size_t> ::show(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, "CL_DEVICE_MAX_WORK_ITEM_SIZES");
 //         DeviceLog<size_t>::show(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, "CL_DEVICE_MAX_WORK_GROUP_SIZE");
-         DeviceLog<unsigned int>::show(devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "Clock rate (Mhz)");
-         DeviceLog<long long unsigned int>::show(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size");
+         DeviceLog<unsigned>::show(devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "Clock rate (Mhz)");
+         DeviceLog<long long unsigned>::show(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size");
 //         DeviceLog<cl_ulong>::show(devices[j], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE");
 //         DeviceLog<cl_uint>::show(devices[j], CL_DEVICE_MAX_CONSTANT_ARGS, "CL_DEVICE_MAX_CONSTANT_ARGS");
 //         DeviceLog<cl_device_local_mem_type>::show(devices[j], CL_DEVICE_LOCAL_MEM_TYPE, "CL_DEVICE_LOCAL_MEM_TYPE");
-         DeviceLog<long long unsigned int>::show(devices[j], CL_DEVICE_LOCAL_MEM_SIZE, "Device memory size");
+         DeviceLog<long long unsigned>::show(devices[j], CL_DEVICE_LOCAL_MEM_SIZE, "Device memory size");
 //         DeviceLog<cl_bool>::show(devices[j], CL_DEVICE_AVAILABLE, "CL_DEVICE_AVAILABLE");
 //         DeviceLog<cl_bool>::show(devices[j], CL_DEVICE_COMPILER_AVAILABLE, "CL_DEVICE_COMPILER_AVAILABLE");
 //         DeviceLog<cl_device_exec_capabilities>::show(devices[j], CL_DEVICE_EXECUTION_CAPABILITIES, "CL_DEVICE_EXECUTION_CAPABILITIES");
diff --git a/reg-lib/cl/affineDeformationKernel.cl b/reg-lib/cl/affineDeformationKernel.cl
index ade108b7..0649b74a 100755
--- a/reg-lib/cl/affineDeformationKernel.cl
+++ b/reg-lib/cl/affineDeformationKernel.cl
@@ -35,7 +35,7 @@ typedef float16 real16_t;
 /* *************************************************************** */
 __inline__ real_t getPosition(__global float* matrix,
                               real_t* voxel,
-                              const unsigned int idx)
+                              const unsigned idx)
 {
    size_t index = idx*4;
    return (real_t)matrix[index++] * voxel[0] +
@@ -49,11 +49,11 @@ __kernel void affineKernel2D(__global float* transformationMatrix,
 									  __global float* defField,
 									  __global int *mask,
 									  const uint3 params,
-									  const unsigned int composition)
+									  const unsigned composition)
 {
 	// Get the current coordinate
-	const unsigned int x = get_group_id(0)*get_local_size(0) + get_local_id(0);
-	const unsigned int y = get_group_id(1)*get_local_size(1) + get_local_id(1);
+	const unsigned x = get_group_id(0)*get_local_size(0) + get_local_id(0);
+	const unsigned y = get_group_id(1)*get_local_size(1) + get_local_id(1);
 	const unsigned long index = x + params.x * y;
 
 	if(y<params.y && x<params.x &&  mask[index] >= 0 )
@@ -78,12 +78,12 @@ __kernel void affineKernel3D(__global float* transformationMatrix,
 									  __global float* defField,
 									  __global int *mask,
 									  const uint3 params,
-									  const unsigned int composition)
+									  const unsigned composition)
 {
 	// Get the current coordinate
-	const unsigned int x = get_group_id(0)*get_local_size(0) + get_local_id(0);
-	const unsigned int y = get_group_id(1)*get_local_size(1) + get_local_id(1);
-	const unsigned int z = get_group_id(2)*get_local_size(2) + get_local_id(2);
+	const unsigned x = get_group_id(0)*get_local_size(0) + get_local_id(0);
+	const unsigned y = get_group_id(1)*get_local_size(1) + get_local_id(1);
+	const unsigned z = get_group_id(2)*get_local_size(2) + get_local_id(2);
 	const unsigned long index = x + params.x * ( y + z * params.y);
 
 	if( z<params.z && y<params.y && x<params.x &&  mask[index] >= 0 )
diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl
index 6e17deb9..f7a63a7e 100755
--- a/reg-lib/cl/blockMatchingKernel.cl
+++ b/reg-lib/cl/blockMatchingKernel.cl
@@ -69,12 +69,12 @@ void reg_mat44_mul_cl(__global float* mat,
 /* *************************************************************** */
 __inline__ float reduce2DCustom(__local float* sData2,
                                 float data,
-                                const unsigned int tid)
+                                const unsigned tid)
 {
 	sData2[tid] = data;
 	barrier(CLK_LOCAL_MEM_FENCE);
 
-	for (unsigned int i = 8; i > 0; i >>= 1){
+	for (unsigned i = 8; i > 0; i >>= 1){
 		if (tid < i) sData2[tid] += sData2[tid + i];
 		barrier(CLK_LOCAL_MEM_FENCE);
 	}
@@ -87,12 +87,12 @@ __inline__ float reduce2DCustom(__local float* sData2,
 /* *************************************************************** */
 __inline__ float reduceCustom(__local float* sData2,
                               float data,
-                              const unsigned int tid)
+                              const unsigned tid)
 {
 	sData2[tid] = data;
 	barrier(CLK_LOCAL_MEM_FENCE);
 
-	for (unsigned int i = 32; i > 0; i >>= 1){
+	for (unsigned i = 32; i > 0; i >>= 1){
 		if (tid < i) sData2[tid] += sData2[tid + i];
 		barrier(CLK_LOCAL_MEM_FENCE);
 	}
@@ -119,20 +119,20 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 	__local float sData[16];
 
 	// Compute the current block index
-	const unsigned int bid = get_group_id(1) * get_num_groups(0) + get_group_id(0);
+	const unsigned bid = get_group_id(1) * get_num_groups(0) + get_group_id(0);
 
 	// Check if the current block is active
 	const int currentBlockIndex = totalBlock[bid];
 	if (currentBlockIndex > -1){
 
 		// Assign the current coordonate of the voxel in the block
-		const unsigned int idx = get_local_id(0);
-		const unsigned int idy = get_local_id(1);
-		const unsigned int tid = idy * 4 + idx;
+		const unsigned idx = get_local_id(0);
+		const unsigned idy = get_local_id(1);
+		const unsigned tid = idy * 4 + idx;
 
 		// Compute the coordinate of the current voxel in the whole image
-		const unsigned int xImage = get_group_id(0) * 4 + idx;
-		const unsigned int yImage = get_group_id(1) * 4 + idy;
+		const unsigned xImage = get_group_id(0) * 4 + idx;
+		const unsigned yImage = get_group_id(1) * 4 + idy;
 
 		// Populate shared memory with the warped image values
 		for (int y=-1; y<2; ++y) {
@@ -171,7 +171,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 		rReferenceValue = finiteReference ? rReferenceValue : 0.0f;
 
 		// Compute the number of voxel different from 0
-		const unsigned int referenceSize = REDUCE2D(sData, finiteReference ? 1.0f : 0.0f, tid);
+		const unsigned referenceSize = REDUCE2D(sData, finiteReference ? 1.0f : 0.0f, tid);
 
 		// Define temp variables to store the displacements and measure of similarity
                 float bestDisplacement[2] = {NAN, 0.0f};
@@ -189,17 +189,17 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 
 			// Iteration of the 7 x 7 blocks in the neighborhood (3*2+1)^2
 			// Starts at 1 since we stored to many voxels in the shared
-			for (unsigned int y=1; y<8; ++y){
-				for (unsigned int x=1; x<8; ++x){
+			for (unsigned y=1; y<8; ++y){
+				for (unsigned x=1; x<8; ++x){
 
 					// Compute the coordinate of the voxel in the shared memory
-					const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx;
+					const unsigned sharedIndex = ( y + idy ) * 12 + x + idx;
 					// Get the warped value
 					const float rWarpedValue = sWarpedValues[sharedIndex];
 					// Check if the warped and reference are defined
 					const bool overlap = isfinite(rWarpedValue) && finiteReference;
 					// Compute the number of defined value in the block
-					const unsigned int warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid);
+					const unsigned warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid);
 
 					// Subsequent computation is performed if the more than half the voxel are defined
 					if (warpedSize > 8){
@@ -236,7 +236,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 
 		// Only the first thread can update the global array with the new result
 		if(tid==0){
-			const unsigned int posIdx = 2 * currentBlockIndex;
+			const unsigned posIdx = 2 * currentBlockIndex;
 			const float referencePosition_temp[2] = { (float)(xImage), (float)(yImage)};
 
 			bestDisplacement[0] += referencePosition_temp[0];
@@ -268,7 +268,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 	__local float sData[64];
 
 	// Compute the current block index
-	const unsigned int bid = (get_group_id(2)*get_num_groups(1)+get_group_id(1) ) *
+	const unsigned bid = (get_group_id(2)*get_num_groups(1)+get_group_id(1) ) *
 			get_num_groups(0) + get_group_id(0);
 
 	// Check if the current block is active
@@ -276,17 +276,17 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 	if (currentBlockIndex > -1){
 
 		// Assign the current coordonate of the voxel in the block
-		const unsigned int idx = get_local_id(0);
-		const unsigned int idy = get_local_id(1);
-		const unsigned int idz = get_local_id(2);
+		const unsigned idx = get_local_id(0);
+		const unsigned idy = get_local_id(1);
+		const unsigned idz = get_local_id(2);
 
 		// Compute the current voxel index in the block
-		const unsigned int tid = idz * 16 + idy * 4 + idx;
+		const unsigned tid = idz * 16 + idy * 4 + idx;
 
 		// Compute the coordinate of the current voxel in the whole image
-		const unsigned int xImage = get_group_id(0) * 4 + idx;
-		const unsigned int yImage = get_group_id(1) * 4 + idy;
-		const unsigned int zImage = get_group_id(2) * 4 + idz;
+		const unsigned xImage = get_group_id(0) * 4 + idx;
+		const unsigned yImage = get_group_id(1) * 4 + idy;
+		const unsigned zImage = get_group_id(2) * 4 + idz;
 
 		// Populate shared memory with the warped image values
 		for (int n=-1; n<2; ++n) {
@@ -300,7 +300,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 					const int sharedIndex = (((n+1)*4+idz)*12+(m+1)*4+idy)*12+(l+1)*4+idx;
 
 					// Compute the index of the voxel under consideration
-					const unsigned int indexXYZIn = xImageIn + c_ImageSize.x *
+					const unsigned indexXYZIn = xImageIn + c_ImageSize.x *
 							(yImageIn + zImageIn * c_ImageSize.y);
 
 					// Check if the current voxel belongs to the image
@@ -316,7 +316,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 		}
 
 		// Compute the index of the current voxel in the whole image
-		const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) *
+		const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) *
 				c_ImageSize.x + xImage;
 		// Define a boolean to check if the current voxel is in the input image space
 		const bool referenceInBounds =
@@ -332,7 +332,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 		rReferenceValue = finiteReference ? rReferenceValue : 0.0f;
 
 		// Compute the number of voxel different from 0
-		const unsigned int referenceSize = REDUCE(sData, finiteReference ? 1.0f : 0.0f, tid);
+		const unsigned referenceSize = REDUCE(sData, finiteReference ? 1.0f : 0.0f, tid);
 
 		// Define temp variables to store the displacements and measure of similarity
                 float bestDisplacement[3] = {NAN, 0.0f, 0.0f };
@@ -355,14 +355,14 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 					for (int l=1; l < 8; ++l) {
 
 						// Compute the coordinate of the voxel in the shared memory
-						const unsigned int sharedIndex = ( (n+idz) * 12 + m + idy ) * 12 + l + idx;
+						const unsigned sharedIndex = ( (n+idz) * 12 + m + idy ) * 12 + l + idx;
 
 						// Get the warped value
 						const float rWarpedValue = sWarpedValues[sharedIndex];
 						// Check if the warped and reference are defined
 						const bool overlap = isfinite(rWarpedValue) && finiteReference;
 						// Compute the number of defined value in the block
-						const unsigned int warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid);
+						const unsigned warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid);
 
 						// Subsequent computation is performed if the more than half the voxel are defined
 						if (warpedSize > 32){
@@ -401,7 +401,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 
 		// Only the first thread can update the global array with the new result
 		if (tid==0){
-			const unsigned int posIdx = 3 * currentBlockIndex;
+			const unsigned posIdx = 3 * currentBlockIndex;
 			const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage};
 
 			bestDisplacement[0] += referencePosition_temp[0];
diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl
index b6b0309b..b1c1a468 100755
--- a/reg-lib/cl/resampleKernel.cl
+++ b/reg-lib/cl/resampleKernel.cl
@@ -110,20 +110,20 @@ __inline real_t interpLoop2D(__global float* floatingIntensity,
     int *previous,
     uint3 fi_xyz,
     float paddingValue,
-    unsigned int kernel_size)
+    unsigned kernel_size)
 {
     real_t intensity = (real_t) 0.0;
-    
-        for (unsigned int b = 0; b < kernel_size; b++) {
+
+        for (unsigned b = 0; b < kernel_size; b++) {
             int Y = previous[1] + b;
             bool yInBounds = -1 < Y && Y < fi_xyz.y;
             real_t xTempNewValue = (real_t) 0.0;
-            
-            for (unsigned int a = 0; a < kernel_size; a++) {
+
+            for (unsigned a = 0; a < kernel_size; a++) {
                 int X = previous[0] + a;
                 bool xInBounds = -1 < X && X < fi_xyz.x;
 
-                const unsigned int idx = Y * fi_xyz.x + X;
+                const unsigned idx = Y * fi_xyz.x + X;
 
                 xTempNewValue += (xInBounds && yInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a];
             }
@@ -141,21 +141,21 @@ __inline real_t interpLoop3D(__global float* floatingIntensity,
     int *previous,
     uint3 fi_xyz,
     float paddingValue,
-    unsigned int kernel_size)
+    unsigned kernel_size)
 {
     real_t intensity = (real_t) 0.0;
-    for (unsigned int c = 0; c < kernel_size; c++) {
+    for (unsigned c = 0; c < kernel_size; c++) {
         int Z = previous[2] + c;
         bool zInBounds = -1 < Z && Z < fi_xyz.z;
         real_t yTempNewValue = (real_t) 0.0;
-        for (unsigned int b = 0; b < kernel_size; b++) {
+        for (unsigned b = 0; b < kernel_size; b++) {
             int Y = previous[1] + b;
             bool yInBounds = -1 < Y && Y < fi_xyz.y;
             real_t xTempNewValue = (real_t) 0.0;
-            for (unsigned int a = 0; a < kernel_size; a++) {
+            for (unsigned a = 0; a < kernel_size; a++) {
                 int X = previous[0] + a;
                 bool xInBounds = -1 < X && X < fi_xyz.x;
-                const unsigned int idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X;
+                const unsigned idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X;
 
                 xTempNewValue += (xInBounds && yInBounds  && zInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a];
             }
@@ -223,7 +223,7 @@ __kernel void ResampleImage2D(__global float* floatingImage,
     long index = get_group_id(0)*get_local_size(0) + get_local_id(0);
     while (index < voxelNumber.x) {
 
-        for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) {
+        for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) {
 
             __global float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x];
             __global float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y];
@@ -315,7 +315,7 @@ __kernel void ResampleImage3D(__global float* floatingImage,
     long index = get_group_id(0)*get_local_size(0) + get_local_id(0);
     while (index < voxelNumber.x) {
 
-        for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) {
+        for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) {
 
             __global float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x];
             __global float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y];
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 98b96495..b54ac854 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -38,28 +38,28 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
 
    if (referenceImage->nz > 1) {
       // Version using 3D blocks
-      for (unsigned int k = 0; k < params->blockNumber[2]; k++) {
-         for (unsigned int j = 0; j < params->blockNumber[1]; j++) {
-            for (unsigned int i = 0; i < params->blockNumber[0]; i++) {
+      for (unsigned k = 0; k < params->blockNumber[2]; k++) {
+         for (unsigned j = 0; j < params->blockNumber[1]; j++) {
+            for (unsigned i = 0; i < params->blockNumber[0]; i++) {
 
-               for (unsigned int n = 0; n < BLOCK_3D_SIZE; n++)
+               for (unsigned n = 0; n < BLOCK_3D_SIZE; n++)
                   referenceValues[n] = (DataType)std::numeric_limits<float>::quiet_NaN();
 
                float mean = 0.0f;
                float voxelNumber = 0.0f;
                int coord = 0;
-               for (unsigned int z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) {
-                  if (z < (unsigned int)referenceImage->nz) {
+               for (unsigned z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) {
+                  if (z < (unsigned)referenceImage->nz) {
                      index = z * referenceImage->nx * referenceImage->ny;
                      DataType *referencePtrZ = &referencePtr[index];
                      int *maskPtrZ = &maskPtr[index];
-                     for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
-                        if (y < (unsigned int)referenceImage->ny) {
+                     for (unsigned y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
+                        if (y < (unsigned)referenceImage->ny) {
                            index = y * referenceImage->nx + i * BLOCK_WIDTH;
                            DataType *referencePtrXYZ = &referencePtrZ[index];
                            int *maskPtrXYZ = &maskPtrZ[index];
-                           for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
-                              if (x < (unsigned int)referenceImage->nx) {
+                           for (unsigned x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
+                              if (x < (unsigned)referenceImage->nx) {
                                  referenceValues[coord] = *referencePtrXYZ;
                                  if (referenceValues[coord] == referenceValues[coord] && *maskPtrXYZ > -1) {
                                     mean += (float)referenceValues[coord];
@@ -99,22 +99,22 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
    }
    else {
       // Version using 2D blocks
-      for (unsigned int j = 0; j < params->blockNumber[1]; j++) {
-         for (unsigned int i = 0; i < params->blockNumber[0]; i++) {
+      for (unsigned j = 0; j < params->blockNumber[1]; j++) {
+         for (unsigned i = 0; i < params->blockNumber[0]; i++) {
 
-            for (unsigned int n = 0; n < BLOCK_2D_SIZE; n++)
+            for (unsigned n = 0; n < BLOCK_2D_SIZE; n++)
                referenceValues[n] = std::numeric_limits<DataType>::quiet_NaN();
 
             float mean = 0.0f;
             float voxelNumber = 0.0f;
             int coord = 0;
 
-            for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
+            for (unsigned y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) {
                if (y < (unsigned )referenceImage->ny) {
                   index = y * referenceImage->nx + i * BLOCK_WIDTH;
                   DataType *referencePtrXY = &referencePtr[index];
                   int *maskPtrXY = &maskPtr[index];
-                  for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
+                  for (unsigned x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) {
                      if (x < (unsigned)referenceImage->nx) {
                         referenceValues[coord] = *referencePtrXY;
                         if (referenceValues[coord] == referenceValues[coord] && *maskPtrXY > -1) {
@@ -267,22 +267,22 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
    else
       referenceMatrix_xyz = &(reference->qto_xyz);
 
-   unsigned int referenceIndex_start_x;
-   unsigned int referenceIndex_start_y;
-   unsigned int referenceIndex_end_x;
-   unsigned int referenceIndex_end_y;
+   unsigned referenceIndex_start_x;
+   unsigned referenceIndex_start_y;
+   unsigned referenceIndex_end_x;
+   unsigned referenceIndex_end_y;
    int warpedIndex_start_x;
    int warpedIndex_start_y;
    int warpedIndex_end_x;
    int warpedIndex_end_y;
 
-   unsigned int referenceIndex;
-   unsigned int warpedIndex;
+   unsigned referenceIndex;
+   unsigned warpedIndex;
 
-   unsigned int blockIndex = 0;
+   unsigned blockIndex = 0;
 
    int index, l, m, x, y, z = 0;
-   unsigned int i, j;
+   unsigned i, j;
    int *maskPtr_XY;
    DataType *referencePtr_XY, *warpedPtr_XY;
    DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar;
@@ -448,12 +448,12 @@ void block_matching_method3D(nifti_image * reference,
    else
       referenceMatrix_xyz = &(reference->qto_xyz);
 
-   unsigned int referenceIndex_start_x;
-   unsigned int referenceIndex_start_y;
-   unsigned int referenceIndex_start_z;
-   unsigned int referenceIndex_end_x;
-   unsigned int referenceIndex_end_y;
-   unsigned int referenceIndex_end_z;
+   unsigned referenceIndex_start_x;
+   unsigned referenceIndex_start_y;
+   unsigned referenceIndex_start_z;
+   unsigned referenceIndex_end_x;
+   unsigned referenceIndex_end_y;
+   unsigned referenceIndex_end_z;
    int warpedIndex_start_x;
    int warpedIndex_start_y;
    int warpedIndex_start_z;
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index 483554d2..958c4bec 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -34,11 +34,11 @@ struct _reg_blockMatchingParam
 {
    int totalBlockNumber;
    int *totalBlock;
-   unsigned int blockNumber[3];
+   unsigned blockNumber[3];
    //Number of block we keep for LTS
    int percent_to_keep;
 
-   unsigned int dim;
+   unsigned dim;
    float *referencePosition;
    float *warpedPosition;
 
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 2f0c66e0..c81ab780 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -85,7 +85,7 @@ template<class DataType>
 double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
-                              unsigned int * dtIndicies
+                              unsigned *dtIndicies
                              )
 {
 #ifdef _WIN32
@@ -151,8 +151,8 @@ reduction(+:n)
    } // loop over voxels
    return DTI_cost/n;
 }
-template double reg_getDTIMeasureValue<float>(nifti_image *,nifti_image *,int *, unsigned int *);
-template double reg_getDTIMeasureValue<double>(nifti_image *,nifti_image *,int *, unsigned int *);
+template double reg_getDTIMeasureValue<float>(nifti_image *,nifti_image *,int *, unsigned *);
+template double reg_getDTIMeasureValue<double>(nifti_image *,nifti_image *,int *, unsigned *);
 /* *************************************************************** */
 double reg_dti::GetSimilarityMeasureValue()
 {
@@ -232,7 +232,7 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *warpedGradient,
       nifti_image *dtiMeasureGradientImage,
       int *mask,
-      unsigned int * dtIndicies)
+      unsigned *dtIndicies)
 {
    // Create pointers to the reference and warped images
 #ifdef _WIN32
@@ -264,7 +264,7 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
    // THE FOLLOWING IS WRONG
    reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
    reg_exit();
-   unsigned int gradientVoxels = warpedGradient->nu*voxelNumber;
+   unsigned gradientVoxels = warpedGradient->nu*voxelNumber;
    DataType *firstGradVox = static_cast<DataType *>(warpedGradient->data);
    DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]];
    DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]];
@@ -327,9 +327,9 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedDTIMeasureGradient<float>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned int *);
+(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *);
 template void reg_getVoxelBasedDTIMeasureGradient<double>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned int *);
+(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *);
 /* *************************************************************** */
 void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
 {
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 3aafa4be..6df167b6 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -45,7 +45,7 @@ class reg_dti: public reg_measure {
 
 protected:
     // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
-    unsigned int dtIndicies[6];
+    unsigned dtIndicies[6];
     float currentValue;
 };
 /* *************************************************************** */
@@ -61,7 +61,7 @@ extern "C++" template <class DataType>
 double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
-                              unsigned int *dtIndicies);
+                              unsigned *dtIndicies);
 
 /** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
@@ -80,4 +80,4 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
                                          nifti_image *warpedGradient,
                                          nifti_image *dtiMeasureGradientImage,
                                          int *mask,
-                                         unsigned int *dtIndicies);
+                                         unsigned *dtIndicies);
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index ff6fdc2b..51c22017 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -31,10 +31,10 @@ float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *nod
 }
 
 void reg_fem_InitialiseTransformation(int *elementNodes,
-                                      unsigned int elementNumber,
+                                      unsigned elementNumber,
                                       float *nodePositions,
                                       nifti_image *deformationFieldImage,
-                                      unsigned int *closestNodes,
+                                      unsigned *closestNodes,
                                       float *femInterpolationWeight
                                      )
 {
@@ -59,10 +59,10 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
    float fullVolume;
    float subVolume[4];
 
-   for(unsigned int element=0; element<elementNumber; ++element)
+   for(unsigned element=0; element<elementNumber; ++element)
    {
       // Compute the element bounding box in voxel coordinate
-      for(unsigned int i=0; i<4; ++i)
+      for(unsigned i=0; i<4; ++i)
       {
          currentNodes[i]=elementNodes[4*element+i];
          nodeRealPosition[0]=nodePositions[3*currentNodes[i]];
@@ -74,7 +74,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
       int xRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][0]), (int)reg_floor(nodeVoxelIndices[0][0])};
       int yRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][1]), (int)reg_floor(nodeVoxelIndices[0][1])};
       int zRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][2]), (int)reg_floor(nodeVoxelIndices[0][2])};
-      for(unsigned int i=1; i<4; ++i)
+      for(unsigned i=1; i<4; ++i)
       {
          xRange[0]=xRange[0]<(int)reg_ceil(nodeVoxelIndices[i][0])?xRange[0]:(int)reg_ceil(nodeVoxelIndices[i][0]);
          xRange[1]=xRange[1]>(int)reg_floor(nodeVoxelIndices[i][0])?xRange[1]:(int)reg_floor(nodeVoxelIndices[i][0]);
@@ -128,7 +128,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
                if(fabs(fullVolume/(subVolume[0]+subVolume[1]+subVolume[2]+subVolume[3])-1.f)<.000001f)
                {
                   int index=(z*deformationFieldImage->ny+y)*deformationFieldImage->nx+x;
-                  for(unsigned int i=0; i<4; ++i)
+                  for(unsigned i=0; i<4; ++i)
                   {
                      closestNodes[4*index+i]=currentNodes[i];
                      femInterpolationWeight[4*index+i]=subVolume[i]/fullVolume;
@@ -144,7 +144,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
 
 void reg_fem_getDeformationField(float *nodePositions,
                                  nifti_image *deformationFieldImage,
-                                 unsigned int *closestNodes,
+                                 unsigned *closestNodes,
                                  float *femInterpolationWeight
                                 )
 {
@@ -210,9 +210,9 @@ void reg_fem_getDeformationField(float *nodePositions,
 }// reg_fem_getDeformationField
 
 void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
-                                 unsigned int *closestNodes,
+                                 unsigned *closestNodes,
                                  float *femInterpolationWeight,
-                                 unsigned int nodeNumber,
+                                 unsigned nodeNumber,
                                  float *femBasedGradient)
 {
    const size_t voxelNumber = CalcVoxelNumber(*voxelBasedGradient);
@@ -220,10 +220,10 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
    float *voxGradPtrY = &voxGradPtrX[voxelNumber];
    float *voxGradPtrZ = &voxGradPtrY[voxelNumber];
 
-   for(unsigned int node=0; node<3*nodeNumber; ++node)
+   for(unsigned node=0; node<3*nodeNumber; ++node)
       femBasedGradient[node]=0.f;
 
-   unsigned int currentNodes[4];
+   unsigned currentNodes[4];
    float currentGradient[3];
    float coefficients[4];
    for(size_t voxel=0; voxel<voxelNumber; ++voxel)
@@ -242,7 +242,7 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
       currentGradient[1]=voxGradPtrY[voxel];
       currentGradient[2]=voxGradPtrZ[voxel];
 
-      for(unsigned int i=0; i<4; ++i)
+      for(unsigned i=0; i<4; ++i)
       {
          femBasedGradient[3*currentNodes[i]  ] += currentGradient[0]*coefficients[i];
          femBasedGradient[3*currentNodes[i]+1] += currentGradient[1]*coefficients[i];
diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h
index 103ede88..3c0802d5 100644
--- a/reg-lib/cpu/_reg_femTrans.h
+++ b/reg-lib/cpu/_reg_femTrans.h
@@ -32,10 +32,10 @@
  * the weight associated with the closest node.
  */
 void reg_fem_InitialiseTransformation(int *elementNodes,
-                                      unsigned int elementNumber,
+                                      unsigned elementNumber,
                                       float *nodePositions,
                                       nifti_image *deformationFieldImage,
-                                      unsigned int *closestNodes,
+                                      unsigned *closestNodes,
                                       float *femInterpolationWeight
                                      );
 
@@ -51,7 +51,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
  */
 void reg_fem_getDeformationField(float *nodePositions,
                                  nifti_image *deformationFieldImage,
-                                 unsigned int *closestNodes,
+                                 unsigned *closestNodes,
                                  float *femInterpolationWeight
                                 );
 
@@ -66,7 +66,7 @@ void reg_fem_getDeformationField(float *nodePositions,
  * every node.
  */
 void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
-                                 unsigned int *closestNodes,
+                                 unsigned *closestNodes,
                                  float *femInterpolationWeight,
-                                 unsigned int nodeNumber,
+                                 unsigned nodeNumber,
                                  float *femBasedGradient);
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index ff387fa9..3f27b7b7 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -295,10 +295,10 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p
 void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44 * transformation)
 {
 
-   unsigned int num_points = points.size();
+   unsigned num_points = points.size();
    float** points1 = reg_matrix2DAllocate<float>(num_points, 2);
    float** points2 = reg_matrix2DAllocate<float>(num_points, 2);
-   for (unsigned int i = 0; i < num_points; i++) {
+   for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
       points2[i][0] = points[i].warped[0];
@@ -429,10 +429,10 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p
 /* *************************************************************** */
 void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation)
 {
-   unsigned int num_points = points.size();
+   unsigned num_points = points.size();
    float** points1 = reg_matrix2DAllocate<float>(num_points, 3);
    float** points2 = reg_matrix2DAllocate<float>(num_points, 3);
-   for (unsigned int i = 0; i < num_points; i++) {
+   for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
       points1[i][2] = points[i].reference[2];
@@ -536,10 +536,10 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_
 /* *************************************************************** */
 void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44 * transformation)
 {
-   unsigned int num_points = points.size();
+   unsigned num_points = points.size();
    float** points1 = reg_matrix2DAllocate<float>(num_points, 2);
    float** points2 = reg_matrix2DAllocate<float>(num_points, 2);
-   for (unsigned int i = 0; i < num_points; i++) {
+   for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
       points2[i][0] = points[i].warped[0];
@@ -660,10 +660,10 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
 // estimate an affine transformation using least square
 void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation)
 {
-   unsigned int num_points = points.size();
+   unsigned num_points = points.size();
    float** points1 = reg_matrix2DAllocate<float>(num_points, 3);
    float** points2 = reg_matrix2DAllocate<float>(num_points, 3);
-   for (unsigned int i = 0; i < num_points; i++) {
+   for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
       points1[i][2] = points[i].reference[2];
@@ -679,7 +679,7 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points,
 /* *************************************************************** */
 ///LTS 2D
 void optimize_2D(float* referencePosition, float* warpedPosition,
-                 unsigned int activeBlockNumber, int percent_to_keep, int max_iter, double tol,
+                 unsigned activeBlockNumber, int percent_to_keep, int max_iter, double tol,
                  mat44 * final, bool affine) {
 
    // Set the current transformation to identity
@@ -762,7 +762,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition,
 /* *************************************************************** */
 ///LTS 3D
 void optimize_3D(float *referencePosition, float *warpedPosition,
-                 unsigned int activeBlockNumber, int percent_to_keep, int max_iter, double tol,
+                 unsigned activeBlockNumber, int percent_to_keep, int max_iter, double tol,
                  mat44 *final, bool affine) {
 
    // Set the current transformation to identity
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index b2eeeb7e..06c47bbc 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -82,7 +82,7 @@ void reg_affine_getDeformationField(mat44 *affine,
                                     int *mask = nullptr);
 /* *************************************************************** */
 void optimize_2D(float* referencePosition, float* warpedPosition,
-    unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol,
+    unsigned definedActiveBlock, int percent_to_keep, int max_iter, double tol,
     mat44* final, bool affine);
 /* *************************************************************** */
 void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44* transformation);
@@ -90,7 +90,7 @@ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points,
 void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44* transformation);
 /* *************************************************************** */
 void optimize_3D(float* referencePosition, float* warpedPosition,
-    unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol,
+    unsigned definedActiveBlock, int percent_to_keep, int max_iter, double tol,
     mat44* final, bool affine);
 /* *************************************************************** */
 void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44* transformation);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 7a5a29fe..c09b15e3 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -324,7 +324,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
    reg_mat44_eye(&backwardGridImage->sto_xyz);
    reg_mat44_eye(&forwardGridImage->sto_ijk);
    reg_mat44_eye(&backwardGridImage->sto_ijk);
-   for(unsigned int i=0; i<3; ++i)
+   for(unsigned i=0; i<3; ++i)
    {
       if(referenceImage->nz>1 || i<2)
       {
@@ -2492,7 +2492,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
    else
    {
       // The voxel spacing is reduced by two
-      for(unsigned int i=0; i<3; ++i)
+      for(unsigned i=0; i<3; ++i)
       {
          controlPointGrid->sto_xyz.m[0][i] /= 2.f;
          controlPointGrid->sto_xyz.m[1][i] /= 2.f;
@@ -3546,9 +3546,9 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          yReal=0;
  #if _USE_SSE
          coord=0;
-         for(unsigned int b=0; b<4; b++)
+         for(unsigned b=0; b<4; b++)
          {
-            for(unsigned int a=0; a<4; a++)
+            for(unsigned a=0; a<4; a++)
             {
                xyBasis[coord++] = xBasis[a] * yBasis[b];
             }
@@ -3560,7 +3560,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0];
          __m128 *ptrBasis   = (__m128 *) &xyBasis[0];
          //addition and multiplication of the 16 basis value and CP position for each axis
-         for(unsigned int a=0; a<4; a++)
+         for(unsigned a=0; a<4; a++)
          {
             tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX), tempX );
             tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY), tempY );
@@ -3575,9 +3575,9 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
          yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
  #else
          coord=0;
-         for(unsigned int b=0; b<4; b++)
+         for(unsigned b=0; b<4; b++)
          {
-            for(unsigned int a=0; a<4; a++)
+            for(unsigned a=0; a<4; a++)
             {
                DataType tempValue = xBasis[a] * yBasis[b];
                xReal += xControlPointCoordinates[coord] * tempValue;
@@ -3807,9 +3807,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
             ptrY = (__m128 *) &yControlPointCoordinates[0];
             ptrZ = (__m128 *) &zControlPointCoordinates[0];
 
-            for(unsigned int c=0; c<4; c++)
+            for(unsigned c=0; c<4; c++)
             {
-               for(unsigned int b=0; b<4; b++)
+               for(unsigned b=0; b<4; b++)
                {
                   _yBasis_sse  = _mm_set_ps1(yBasis[b]);
                   _zBasis_sse  = _mm_set_ps1(zBasis[c]);
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 7e3baadf..88262208 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -1495,7 +1495,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
          DataType xBasis, yBasis, basis;
          DataType xFirst, yFirst;
          DataType basisValues[2];
-         unsigned int jacIndex;
+         unsigned jacIndex;
 
          int x, y, xPre, yPre, pixelX, pixelY, index;
          DataType jacobianConstraint[2];
@@ -1759,7 +1759,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
          DataType xBasis, yBasis, zBasis, basis;
          DataType xFirst, yFirst, zFirst;
          DataType basisValues[3];
-         unsigned int jacIndex;
+         unsigned jacIndex;
 
          int x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, index;
          DataType jacobianConstraint[3];
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index d2708c41..cd4196d4 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -117,7 +117,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
         reg_tools_addImageToImage(meanImage, diff_image, meanImage);
 
         // Store the current descriptor
-        unsigned int index = i * diff_image->nvox;
+        unsigned index = i * diff_image->nvox;
         memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox);
     }
     // Compute the mean over the number of sample
@@ -265,7 +265,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 
             reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
             // Store the current descriptor
-            unsigned int index = compteurId * diff_imageShifted->nvox;
+            unsigned index = compteurId * diff_imageShifted->nvox;
             memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data,
                    diff_imageShifted->nbyper * diff_imageShifted->nvox);
             compteurId++;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 2068a340..80e65781 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -273,8 +273,8 @@ class reg_multichannel_nmi: public reg_measure {
 extern "C++"
 void reg_getMultiChannelNMIValue(nifti_image *referenceImages,
                                  nifti_image *warpedImages,
-                                 unsigned int *reference_bins, // should be an array of size num_reference_volumes
-                                 unsigned int *warped_bins, // should be an array of size num_warped_volumes
+                                 unsigned *reference_bins, // should be an array of size num_reference_volumes
+                                 unsigned *warped_bins, // should be an array of size num_warped_volumes
                                  double *probaJointHistogram,
                                  double *logJointHistogram,
                                  double *entropies,
@@ -286,8 +286,8 @@ extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
-                                                unsigned int *reference_bins,
-                                                unsigned int *warped_bins,
+                                                unsigned *reference_bins,
+                                                unsigned *warped_bins,
                                                 double *logJointHistogram,
                                                 double *entropies,
                                                 nifti_image *nmiGradientImage,
@@ -298,8 +298,8 @@ extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
-                                                unsigned int *reference_bins,
-                                                unsigned int *warped_bins,
+                                                unsigned *reference_bins,
+                                                unsigned *warped_bins,
                                                 double *logJointHistogram,
                                                 double *entropies,
                                                 nifti_image *nmiGradientImage,
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 48251afc..d8b12719 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -123,7 +123,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
         char text[255];
         reg_print_msg_debug("DTI indices:");
         sprintf(text, "Active time point:");
-        for(unsigned int i = 0; i < 6; i++ )
+        for(unsigned i = 0; i < 6; i++ )
             sprintf(text, "%s %i", text, dtIndicies[i]);
         reg_print_msg_debug(text);
 #endif
@@ -882,7 +882,7 @@ void reg_resampleImage(nifti_image *floatingImage,
                                             jacMat);
             break;
         case NIFTI_TYPE_UINT32:
-            reg_resampleImage2<float,unsigned int>(floatingImage,
+            reg_resampleImage2<float,unsigned>(floatingImage,
                                                    warpedImage,
                                                    deformationField,
                                                    mask,
@@ -970,7 +970,7 @@ void reg_resampleImage(nifti_image *floatingImage,
                                              jacMat);
             break;
         case NIFTI_TYPE_UINT32:
-            reg_resampleImage2<double,unsigned int>(floatingImage,
+            reg_resampleImage2<double,unsigned>(floatingImage,
                                                     warpedImage,
                                                     deformationField,
                                                     mask,
@@ -1888,7 +1888,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
                                                 algorithm);
             break;
         case NIFTI_TYPE_UINT32:
-            reg_resampleImage2_PSF<float,unsigned int>(floatingImage,
+            reg_resampleImage2_PSF<float,unsigned>(floatingImage,
                                                        warpedImage,
                                                        deformationField,
                                                        mask,
@@ -1976,7 +1976,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage,
                                                  algorithm);
             break;
         case NIFTI_TYPE_UINT32:
-            reg_resampleImage2_PSF<double,unsigned int>(floatingImage,
+            reg_resampleImage2_PSF<double,unsigned>(floatingImage,
                                                         warpedImage,
                                                         deformationField,
                                                         mask,
@@ -3287,7 +3287,7 @@ void reg_getImageGradient1(nifti_image *floatingImage,
                 (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_getImageGradient2<FieldTYPE,unsigned int>
+        reg_getImageGradient2<FieldTYPE,unsigned>
                 (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
         break;
     case NIFTI_TYPE_INT32:
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index cf06669d..015be4d4 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -119,8 +119,8 @@ void reg_intensityRescale_core(nifti_image *image,
         currentMax = (DataType)std::numeric_limits<short>::min();
         break;
     case NIFTI_TYPE_UINT32:
-        currentMin = (DataType)std::numeric_limits<unsigned int>::max();
-        currentMax = (DataType)std::numeric_limits<unsigned int>::min();
+        currentMin = (DataType)std::numeric_limits<unsigned>::max();
+        currentMax = (DataType)std::numeric_limits<unsigned>::min();
         break;
     case NIFTI_TYPE_INT32:
         currentMin = (DataType)std::numeric_limits<int>::max();
@@ -191,7 +191,7 @@ void reg_intensityRescale(nifti_image *image,
         reg_intensityRescale_core<short>(image, timepoint, newMin, newMax);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_intensityRescale_core<unsigned int>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<unsigned>(image, timepoint, newMin, newMax);
         break;
     case NIFTI_TYPE_INT32:
         reg_intensityRescale_core<int>(image, timepoint, newMin, newMax);
@@ -236,7 +236,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) {
         reg_tools_removeSCLInfo<short>(image);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_removeSCLInfo<unsigned int>(image);
+        reg_tools_removeSCLInfo<unsigned>(image);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_removeSCLInfo<int>(image);
@@ -318,7 +318,7 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
         reg_thresholdImage<T, short>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_thresholdImage<T, unsigned int>(image, lowThr, upThr);
+        reg_thresholdImage<T, unsigned>(image, lowThr, upThr);
         break;
     case NIFTI_TYPE_INT32:
         reg_thresholdImage<T, int>(image, lowThr, upThr);
@@ -433,7 +433,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) {
         reg_tools_changeDatatype<NewType, short>(image, type);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_changeDatatype<NewType, unsigned int>(image, type);
+        reg_tools_changeDatatype<NewType, unsigned>(image, type);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_changeDatatype<NewType, int>(image, type);
@@ -452,7 +452,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) {
 }
 template void reg_tools_changeDatatype<unsigned char>(nifti_image*, int);
 template void reg_tools_changeDatatype<unsigned short>(nifti_image*, int);
-template void reg_tools_changeDatatype<unsigned int>(nifti_image*, int);
+template void reg_tools_changeDatatype<unsigned>(nifti_image*, int);
 template void reg_tools_changeDatatype<char>(nifti_image*, int);
 template void reg_tools_changeDatatype<short>(nifti_image*, int);
 template void reg_tools_changeDatatype<int>(nifti_image*, int);
@@ -542,7 +542,7 @@ void reg_tools_addImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
+        reg_tools_operationImageToImage<unsigned>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationImageToImage<int>(img1, img2, res, operation);
@@ -588,7 +588,7 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1,
         reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
+        reg_tools_operationImageToImage<unsigned>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationImageToImage<int>(img1, img2, res, operation);
@@ -634,7 +634,7 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
+        reg_tools_operationImageToImage<unsigned>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationImageToImage<int>(img1, img2, res, operation);
@@ -680,7 +680,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<short>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationImageToImage<unsigned int>(img1, img2, res, operation);
+        reg_tools_operationImageToImage<unsigned>(img1, img2, res, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationImageToImage<int>(img1, img2, res, operation);
@@ -756,7 +756,7 @@ void reg_tools_addValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
+        reg_tools_operationValueToImage<unsigned>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationValueToImage<int>(img, res, val, operation);
@@ -802,7 +802,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
         reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
+        reg_tools_operationValueToImage<unsigned>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationValueToImage<int>(img, res, val, operation);
@@ -848,7 +848,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
+        reg_tools_operationValueToImage<unsigned>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationValueToImage<int>(img, res, val, operation);
@@ -894,7 +894,7 @@ void reg_tools_divideValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<short>(img, res, val, operation);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_operationValueToImage<unsigned int>(img, res, val, operation);
+        reg_tools_operationValueToImage<unsigned>(img, res, val, operation);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_operationValueToImage<int>(img, res, val, operation);
@@ -1389,7 +1389,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
         reg_tools_labelKernelConvolution_core<short>(image, varianceX, varianceY, varianceZ, mask, timePoint);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_labelKernelConvolution_core<unsigned int>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<unsigned>(image, varianceX, varianceY, varianceZ, mask, timePoint);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_labelKernelConvolution_core<int>(image, varianceX, varianceY, varianceZ, mask, timePoint);
@@ -1594,7 +1594,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
         reg_downsampleImage<PrecisionType, short>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_downsampleImage<PrecisionType, unsigned int>(image, type, downsampleAxis);
+        reg_downsampleImage<PrecisionType, unsigned>(image, type, downsampleAxis);
         break;
     case NIFTI_TYPE_INT32:
         reg_downsampleImage<PrecisionType, int>(image, type, downsampleAxis);
@@ -1638,7 +1638,7 @@ void reg_tools_binarise_image(nifti_image *image) {
         reg_tools_binarise_image<short>(image);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binarise_image<unsigned int>(image);
+        reg_tools_binarise_image<unsigned>(image);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_binarise_image<int>(image);
@@ -1678,7 +1678,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
         reg_tools_binarise_image<short>(image, threshold);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binarise_image<unsigned int>(image, threshold);
+        reg_tools_binarise_image<unsigned>(image, threshold);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_binarise_image<int>(image, threshold);
@@ -1718,7 +1718,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
         reg_tools_binaryImage2int<short>(image, array);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_binaryImage2int<unsigned int>(image, array);
+        reg_tools_binaryImage2int<unsigned>(image, array);
         break;
     case NIFTI_TYPE_INT32:
         reg_tools_binaryImage2int<int>(image, array);
@@ -1785,7 +1785,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
     case NIFTI_TYPE_INT16:
         return reg_tools_getMeanRMS<AType, short>(imageA, imageB);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanRMS<AType, unsigned int>(imageA, imageB);
+        return reg_tools_getMeanRMS<AType, unsigned>(imageA, imageB);
     case NIFTI_TYPE_INT32:
         return reg_tools_getMeanRMS<AType, int>(imageA, imageB);
     case NIFTI_TYPE_FLOAT32:
@@ -1810,7 +1810,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
     case NIFTI_TYPE_INT16:
         return reg_tools_getMeanRMS<short>(imageA, imageB);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanRMS<unsigned int>(imageA, imageB);
+        return reg_tools_getMeanRMS<unsigned>(imageA, imageB);
     case NIFTI_TYPE_INT32:
         return reg_tools_getMeanRMS<int>(imageA, imageB);
     case NIFTI_TYPE_FLOAT32:
@@ -1825,14 +1825,14 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_createImagePyramid(const NiftiImage& inputImage, vector<NiftiImage>& pyramid, unsigned int levelNumber, unsigned int levelToPerform) {
+void reg_createImagePyramid(const NiftiImage& inputImage, vector<NiftiImage>& pyramid, unsigned levelNumber, unsigned levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     pyramid[levelToPerform - 1] = inputImage;
     reg_tools_changeDatatype<DataType>(pyramid[levelToPerform - 1]);
     reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]);
 
     // Images are downsampled if appropriate
-    for (unsigned int l = levelToPerform; l < levelNumber; l++) {
+    for (unsigned l = levelToPerform; l < levelNumber; l++) {
         bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
@@ -1853,11 +1853,11 @@ void reg_createImagePyramid(const NiftiImage& inputImage, vector<NiftiImage>& py
         reg_downsampleImage<DataType>(pyramid[l], 1, downsampleAxis);
     }
 }
-template void reg_createImagePyramid<float>(const NiftiImage&, vector<NiftiImage>&, unsigned int, unsigned int);
-template void reg_createImagePyramid<double>(const NiftiImage&, vector<NiftiImage>&, unsigned int, unsigned int);
+template void reg_createImagePyramid<float>(const NiftiImage&, vector<NiftiImage>&, unsigned, unsigned);
+template void reg_createImagePyramid<double>(const NiftiImage&, vector<NiftiImage>&, unsigned, unsigned);
 /* *************************************************************** */
 template <class DataType>
-void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector<unique_ptr<int[]>>& maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) {
+void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector<unique_ptr<int[]>>& maskPyramid, unsigned levelNumber, unsigned levelToPerform) {
     // FINEST LEVEL OF REGISTRATION
     vector<NiftiImage> tempMaskImagePyramid(levelToPerform);
     tempMaskImagePyramid[levelToPerform - 1] = inputMaskImage;
@@ -1865,7 +1865,7 @@ void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector<unique_ptr<i
     reg_tools_changeDatatype<unsigned char>(tempMaskImagePyramid[levelToPerform - 1]);
 
     // Image is downsampled if appropriate
-    for (unsigned int l = levelToPerform; l < levelNumber; l++) {
+    for (unsigned l = levelToPerform; l < levelNumber; l++) {
         bool downsampleAxis[8] = { false, true, true, true, false, false, false, false };
         if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false;
         if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false;
@@ -1893,8 +1893,8 @@ void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector<unique_ptr<i
         reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l].get());
     }
 }
-template void reg_createMaskPyramid<float>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned int, unsigned int);
-template void reg_createMaskPyramid<double>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned int, unsigned int);
+template void reg_createMaskPyramid<float>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned, unsigned);
+template void reg_createMaskPyramid<double>(const NiftiImage&, vector<unique_ptr<int[]>>&, unsigned, unsigned);
 /* *************************************************************** */
 template <class ImageType, class MaskType>
 int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
@@ -1924,7 +1924,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     case NIFTI_TYPE_INT16:
         return reg_tools_nanMask_image<ImageType, short>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_nanMask_image<ImageType, unsigned int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<ImageType, unsigned>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT32:
         return reg_tools_nanMask_image<ImageType, int>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT32:
@@ -1961,7 +1961,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     case NIFTI_TYPE_INT16:
         return reg_tools_nanMask_image<short>(image, maskImage, outputImage);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_nanMask_image<unsigned int>(image, maskImage, outputImage);
+        return reg_tools_nanMask_image<unsigned>(image, maskImage, outputImage);
     case NIFTI_TYPE_INT32:
         return reg_tools_nanMask_image<int>(image, maskImage, outputImage);
     case NIFTI_TYPE_FLOAT32:
@@ -2038,7 +2038,7 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) {
     case NIFTI_TYPE_INT16:
         return reg_tools_getMinMaxValue<short>(image, timepoint);
     case NIFTI_TYPE_UINT32:
-        return (float)reg_tools_getMinMaxValue<unsigned int>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<unsigned>(image, timepoint);
     case NIFTI_TYPE_INT32:
         return (float)reg_tools_getMinMaxValue<int>(image, timepoint);
     case NIFTI_TYPE_FLOAT32:
@@ -2064,7 +2064,7 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
     case NIFTI_TYPE_INT16:
         return reg_tools_getMinMaxValue<short>(image, timepoint, false);
     case NIFTI_TYPE_UINT32:
-        return (float)reg_tools_getMinMaxValue<unsigned int>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<unsigned>(image, timepoint, false);
     case NIFTI_TYPE_INT32:
         return (float)reg_tools_getMinMaxValue<int>(image, timepoint, false);
     case NIFTI_TYPE_FLOAT32:
@@ -2103,7 +2103,7 @@ float reg_tools_getMeanValue(const nifti_image *image) {
     case NIFTI_TYPE_INT16:
         return reg_tools_getMeanValue<short>(image);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getMeanValue<unsigned int>(image);
+        return reg_tools_getMeanValue<unsigned>(image);
     case NIFTI_TYPE_INT32:
         return reg_tools_getMeanValue<int>(image);
     case NIFTI_TYPE_FLOAT32:
@@ -2143,7 +2143,7 @@ float reg_tools_getSTDValue(const nifti_image *image) {
     case NIFTI_TYPE_INT16:
         return reg_tools_getSTDValue<short>(image);
     case NIFTI_TYPE_UINT32:
-        return reg_tools_getSTDValue<unsigned int>(image);
+        return reg_tools_getSTDValue<unsigned>(image);
     case NIFTI_TYPE_INT32:
         return reg_tools_getSTDValue<int>(image);
     case NIFTI_TYPE_FLOAT32:
@@ -2220,7 +2220,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
         reg_flipAxis<short>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_flipAxis<unsigned int>(image, outputArray, cmd);
+        reg_flipAxis<unsigned>(image, outputArray, cmd);
         break;
     case NIFTI_TYPE_INT32:
         reg_flipAxis<int>(image, outputArray, cmd);
@@ -2595,7 +2595,7 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB)
     case NIFTI_TYPE_UINT16:
         return reg_test_compare_images<unsigned short>(imgA, imgB);
     case NIFTI_TYPE_UINT32:
-        return reg_test_compare_images<unsigned int>(imgA, imgB);
+        return reg_test_compare_images<unsigned>(imgA, imgB);
     case NIFTI_TYPE_INT8:
         return reg_test_compare_images<char>(imgA, imgB);
     case NIFTI_TYPE_INT16:
@@ -2629,7 +2629,7 @@ void reg_tools_abs_image(nifti_image *img) {
         reg_tools_abs_image<unsigned short>(img);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_abs_image<unsigned int>(img);
+        reg_tools_abs_image<unsigned>(img);
         break;
     case NIFTI_TYPE_INT8:
         reg_tools_abs_image<char>(img);
@@ -2669,7 +2669,7 @@ void cPtrToMat44(mat44 *mat, const float *cMat) {
     }
 }
 /* *************************************************************** */
-void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats) {
+void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats) {
     for (size_t k = 0; k < numMats; k++) {
         for (int i = 0; i < 3; i++) {
             for (int j = 0; j < 3; j++) {
@@ -2688,26 +2688,26 @@ void cPtrToMat33(mat33 *mat, const float *cMat) {
 }
 /* *************************************************************** */
 template<typename T>
-void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n) {
-    for (unsigned int i = 0; i < m; i++) {
-        for (unsigned int j = 0; j < n; j++) {
+void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n) {
+    for (unsigned i = 0; i < m; i++) {
+        for (unsigned j = 0; j < n; j++) {
             cMat[i * n + j] = mat[i][j];
         }
     }
 }
-template void matmnToCptr<float>(const float**, float*, unsigned int, unsigned int);
-template void matmnToCptr<double>(const double**, double*, unsigned int, unsigned int);
+template void matmnToCptr<float>(const float**, float*, unsigned, unsigned);
+template void matmnToCptr<double>(const double**, double*, unsigned, unsigned);
 /* *************************************************************** */
 template<typename T>
-void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n) {
-    for (unsigned int i = 0; i < m; i++) {
-        for (unsigned int j = 0; j < n; j++) {
+void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n) {
+    for (unsigned i = 0; i < m; i++) {
+        for (unsigned j = 0; j < n; j++) {
             mat[i][j] = cMat[i * n + j];
         }
     }
 }
-template void cPtrToMatmn<float>(float**, const float*, unsigned int, unsigned int);
-template void cPtrToMatmn<double>(double**, const double*, unsigned int, unsigned int);
+template void cPtrToMatmn<float>(float**, const float*, unsigned, unsigned);
+template void cPtrToMatmn<double>(double**, const double*, unsigned, unsigned);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) {
     x = index % (maxValue_x + 1);
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 9d1b577d..7470e788 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -333,8 +333,8 @@ float reg_tools_getSTDValue(const nifti_image *img);
 extern "C++" template<class DataType>
 void reg_createImagePyramid(const NiftiImage& input,
                             vector<NiftiImage>& pyramid,
-                            unsigned int levelNumber,
-                            unsigned int levelToPerform);
+                            unsigned levelNumber,
+                            unsigned levelToPerform);
 /* *************************************************************** */
 /** @brief Generate a pyramid from an input mask image.
  * @param input Input image to be downsampled to create the pyramid
@@ -348,8 +348,8 @@ void reg_createImagePyramid(const NiftiImage& input,
 extern "C++" template<class DataType>
 void reg_createMaskPyramid(const NiftiImage& input,
                            vector<unique_ptr<int[]>>& pyramid,
-                           unsigned int levelNumber,
-                           unsigned int levelToPerform);
+                           unsigned levelNumber,
+                           unsigned levelToPerform);
 /* *************************************************************** */
 /** @brief this function will threshold an image to the values provided,
  * set the scl_slope and sct_inter of the image to 1 and 0
@@ -438,16 +438,16 @@ extern "C++"
 void cPtrToMat44(mat44 *mat, const float *cMat);
 /* *************************************************************** */
 extern "C++"
-void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats);
+void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats);
 /* *************************************************************** */
 extern "C++"
 void cPtrToMat33(mat33 *mat, const float *cMat);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n);
+void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n);
 /* *************************************************************** */
 extern "C++" template<typename T>
-void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n);
+void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
new file mode 100644
index 00000000..fe380b26
--- /dev/null
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -0,0 +1,222 @@
+/** @file BlockSize.hpp
+ * @author Marc Modat
+ * @date 25/03/2009.
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ * See the LICENSE.txt file in the nifty_reg root folder
+ */
+
+#pragma once
+
+#include <memory>
+
+namespace NiftyReg {
+/* *************************************************************** */
+struct BlockSize {
+    /* _reg_blockMatching_gpu */
+    unsigned target_block;
+    unsigned result_block;
+    /* _reg_mutualinformation_gpu */
+    unsigned reg_smoothJointHistogramX;
+    unsigned reg_smoothJointHistogramY;
+    unsigned reg_smoothJointHistogramZ;
+    unsigned reg_smoothJointHistogramW;
+    unsigned reg_marginaliseTargetX;
+    unsigned reg_marginaliseTargetXY;
+    unsigned reg_marginaliseResultX;
+    unsigned reg_marginaliseResultXY;
+    unsigned reg_getVoxelBasedNMIGradientUsingPW2D;
+    unsigned reg_getVoxelBasedNMIGradientUsingPW3D;
+    unsigned reg_getVoxelBasedNMIGradientUsingPW2x2;
+    /* _reg_globalTransformation_gpu */
+    unsigned reg_affine_deformationField;
+    /* _reg_localTransformation_gpu */
+    unsigned reg_spline_getDeformationField2D;
+    unsigned reg_spline_getDeformationField3D;
+    unsigned reg_spline_getApproxSecondDerivatives2D;
+    unsigned reg_spline_getApproxSecondDerivatives3D;
+    unsigned reg_spline_getApproxBendingEnergy2D;
+    unsigned reg_spline_getApproxBendingEnergy3D;
+    unsigned reg_spline_getApproxBendingEnergyGradient2D;
+    unsigned reg_spline_getApproxBendingEnergyGradient3D;
+    unsigned reg_spline_getApproxJacobianValues2D;
+    unsigned reg_spline_getApproxJacobianValues3D;
+    unsigned reg_spline_getJacobianValues2D;
+    unsigned reg_spline_getJacobianValues3D;
+    unsigned reg_spline_logSquaredValues;
+    unsigned reg_spline_computeApproxJacGradient2D;
+    unsigned reg_spline_computeApproxJacGradient3D;
+    unsigned reg_spline_computeJacGradient2D;
+    unsigned reg_spline_computeJacGradient3D;
+    unsigned reg_spline_approxCorrectFolding3D;
+    unsigned reg_spline_correctFolding3D;
+    unsigned reg_getDeformationFromDisplacement;
+    unsigned reg_getDisplacementFromDeformation;
+    unsigned reg_defField_compose2D;
+    unsigned reg_defField_compose3D;
+    unsigned reg_defField_getJacobianMatrix;
+    /* _reg_optimiser_gpu */
+    unsigned reg_initialiseConjugateGradient;
+    unsigned reg_GetConjugateGradient1;
+    unsigned reg_GetConjugateGradient2;
+    unsigned GetMaximalLength;
+    unsigned reg_updateControlPointPosition;
+    /* _reg_ssd_gpu */
+    unsigned reg_getSquaredDifference;
+    unsigned reg_getSSDGradient;
+    /* _reg_tools_gpu */
+    unsigned reg_voxelCentric2NodeCentric;
+    unsigned reg_convertNMIGradientFromVoxelToRealSpace;
+    unsigned reg_ApplyConvolutionWindowAlongX;
+    unsigned reg_ApplyConvolutionWindowAlongY;
+    unsigned reg_ApplyConvolutionWindowAlongZ;
+    unsigned reg_arithmetic;
+    /* _reg_resampling_gpu */
+    unsigned reg_resampleImage2D;
+    unsigned reg_resampleImage3D;
+    unsigned reg_getImageGradient2D;
+    unsigned reg_getImageGradient3D;
+};
+/* *************************************************************** */
+struct BlockSize100: public BlockSize {
+    BlockSize100() {
+        target_block = 512; // 15 reg - 32 smem - 24 cmem
+        result_block = 384; // 21 reg - 11048 smem - 24 cmem
+        /* _reg_mutualinformation_gpu */
+        reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem
+        reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem
+        reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem
+        reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem
+        reg_marginaliseTargetX = 384; // 06 reg - 24 smem
+        reg_marginaliseTargetXY = 384; // 07 reg - 24 smem
+        reg_marginaliseResultX = 384; // 06 reg - 24 smem
+        reg_marginaliseResultXY = 384; // 07 reg - 24 smem
+        reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem
+        reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem
+        reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem
+        /* _reg_globalTransformation_gpu */
+        reg_affine_deformationField = 512; // 16 reg - 24 smem
+        /* _reg_localTransformation_gpu */
+        reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
+        reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
+        reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem
+        reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem
+        reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem
+        reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem
+        reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem
+        reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem
+        reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem
+        reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem
+        reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
+        reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
+        reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
+        reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem
+        reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem
+        reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem
+        reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem
+        reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem
+        reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem
+        reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem
+        reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem
+        reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
+        reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
+        reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
+        /* _reg_optimiser_gpu */
+        reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
+        reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem
+        reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem
+        GetMaximalLength = 384; // 04 reg - 24 smem
+        reg_updateControlPointPosition = 384; // 08 reg - 24 smem
+        /* _reg_ssd_gpu */
+        reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem
+        reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem
+        /* _reg_tools_gpu */
+        reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem
+        reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
+        reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
+        reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
+        reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
+        reg_arithmetic = 384; // 5 reg - 24 smem
+        /* _reg_resampling_gpu */
+        reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem
+        reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
+        reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem
+        reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem
+#ifndef NDEBUG
+        printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n");
+#endif
+    }
+};
+/* *************************************************************** */
+struct BlockSize300: public BlockSize {
+    BlockSize300() {
+        target_block = 640; // 45 reg
+        result_block = 640; // 47 reg - ????? smem
+        /* _reg_mutualinformation_gpu */
+        reg_smoothJointHistogramX = 768; // 34 reg
+        reg_smoothJointHistogramY = 768; // 34 reg
+        reg_smoothJointHistogramZ = 768; // 34 reg
+        reg_smoothJointHistogramW = 768; // 34 reg
+        reg_marginaliseTargetX = 1024; // 24 reg
+        reg_marginaliseTargetXY = 1024; // 24 reg
+        reg_marginaliseResultX = 1024; // 24 reg
+        reg_marginaliseResultXY = 1024; // 24 reg
+        reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg
+        reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg
+        reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg
+        /* _reg_globalTransformation_gpu */
+        reg_affine_deformationField = 1024; // 23 reg
+        /* _reg_localTransformation_gpu */
+        reg_spline_getDeformationField2D = 768; // 34 reg
+        reg_spline_getDeformationField3D = 768; // 34 reg
+        reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg
+        reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg
+        reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg
+        reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg
+        reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg
+        reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg
+        reg_spline_getApproxJacobianValues2D = 768; // 34 reg
+        reg_spline_getApproxJacobianValues3D = 640; // 46 reg
+        reg_spline_getJacobianValues2D = 768; // 34 reg
+        reg_spline_getJacobianValues3D = 768; // 34 reg
+        reg_spline_logSquaredValues = 1024; // 23 reg
+        reg_spline_computeApproxJacGradient2D = 768; // 34 reg
+        reg_spline_computeApproxJacGradient3D = 768; // 38 reg
+        reg_spline_computeJacGradient2D = 768; // 34 reg
+        reg_spline_computeJacGradient3D = 768; // 37 reg
+        reg_spline_approxCorrectFolding3D = 768; // 34 reg
+        reg_spline_correctFolding3D = 768; // 34 reg
+        reg_getDeformationFromDisplacement = 1024; // 18 reg
+        reg_getDisplacementFromDeformation = 1024; // 18 reg
+        reg_defField_compose2D = 1024; // 23 reg
+        reg_defField_compose3D = 1024; // 24 reg
+        reg_defField_getJacobianMatrix = 768; // 34 reg
+        /* _reg_optimiser_gpu */
+        reg_initialiseConjugateGradient = 1024; // 20 reg
+        reg_GetConjugateGradient1 = 1024; // 22 reg
+        reg_GetConjugateGradient2 = 1024; // 25 reg
+        GetMaximalLength = 1024; // 20 reg
+        reg_updateControlPointPosition = 1024; // 22 reg
+        /* _reg_ssd_gpu */
+        reg_getSquaredDifference = 768; // 34 reg
+        reg_getSSDGradient = 768; // 34 reg
+        /* _reg_tools_gpu */
+        reg_voxelCentric2NodeCentric = 1024; // 23 reg
+        reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
+        reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
+        reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
+        reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
+        reg_arithmetic = 1024; //
+        /* _reg_resampling_gpu */
+        reg_resampleImage2D = 1024; // 23 reg
+        reg_resampleImage3D = 1024; // 24 reg
+        reg_getImageGradient2D = 768; // 34 reg
+        reg_getImageGradient3D = 768; // 34 reg
+#ifndef NDEBUG
+        printf("[NiftyReg DEBUG] BlockSize300 constructor called\n");
+#endif
+    }
+};
+/* *************************************************************** */
+} // End namespace NiftyReg::Cuda
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index f9197bdc..efef0521 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -62,7 +62,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaAladinContent.cpp
     CudaCompute.cpp
     CudaContent.cpp
-    CudaContextSingleton.cpp
+    CudaContext.cpp
     CudaF3dContent.cpp
     CudaKernelFactory.cpp
     CudaMeasure.cpp
@@ -72,17 +72,16 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaAffineDeformationFieldKernel.cpp
     CudaBlockMatchingKernel.cpp
     CudaConvolutionKernel.cpp
+    CudaNormaliseGradient.cu
     CudaOptimiseKernel.cpp
     CudaResampleImageKernel.cpp
     ../AladinContent.cpp
     _reg_resampling_gpu.cu
-    _reg_blocksize_gpu.cu
     _reg_tools_gpu.cu
     _reg_localTransformation_gpu.cu
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
-    NormaliseGradient.cu
 )
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
 install(TARGETS ${NAME}
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 14850439..eccdb1ea 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -9,8 +9,8 @@ CudaAladinContent::CudaAladinContent(nifti_image *referenceIn,
                                      int *referenceMaskIn,
                                      mat44 *transformationMatrixIn,
                                      size_t bytesIn,
-                                     const unsigned int percentageOfBlocks,
-                                     const unsigned int inlierLts,
+                                     const unsigned percentageOfBlocks,
+                                     const unsigned inlierLts,
                                      int blockStepSize) :
     AladinContent(referenceIn,
                   floatingIn,
@@ -114,8 +114,8 @@ void CudaAladinContent::AllocateCuPtrs() {
         }
         /* // Removed until CUDA SVD is added back
         if (blockMatchingParams->activeBlockNumber > 0 ) {
-           unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
-           unsigned int n = 0;
+           unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
+           unsigned n = 0;
 
            if (blockMatchingParams->dim == 2) {
               n = 6;
@@ -213,8 +213,8 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     }
     /* // Removed until CUDA SVD is added back
      if (blockMatchingParams->activeBlockNumber > 0) {
-         unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
-         unsigned int n = 0;
+         unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
+         unsigned n = 0;
 
          if (blockMatchingParams->dim == 2) {
              n = 6;
@@ -252,7 +252,7 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
         break;
     case NIFTI_TYPE_UINT32:
         intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
         break;
     default:
         return static_cast<DataType>(reg_round(intensity));
@@ -298,7 +298,7 @@ void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, i
         FillImageData<short>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned int>(image, memoryObject, datatype);
+        FillImageData<unsigned>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_INT32:
         FillImageData<int>(image, memoryObject, datatype);
@@ -442,6 +442,6 @@ void CudaAladinContent::FreeCuPtrs() {
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
-    return CudaContextSingleton::Instance().GetIsCardDoubleCapable();
+    return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index b210e294..e8eaad82 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -1,96 +1,96 @@
-#pragma once
-
-#include "AladinContent.h"
-#include "CudaContextSingleton.h"
-#include "_reg_tools.h"
-
-class CudaAladinContent: public AladinContent {
-public:
-    CudaAladinContent(nifti_image *referenceIn,
-                      nifti_image *floatingIn,
-                      int *referenceMaskIn = nullptr,
-                      mat44 *transformationMatrixIn = nullptr,
-                      size_t bytesIn = sizeof(float),
-                      const unsigned int percentageOfBlocks = 0,
-                      const unsigned int inlierLts = 0,
-                      int blockStepSize = 0);
-    virtual ~CudaAladinContent();
-
-    virtual bool IsCurrentComputationDoubleCapable() override;
-
-    // Device getters
-    virtual float* GetReferenceImageArray_d();
-    virtual float* GetFloatingImageArray_d();
-    virtual float* GetWarpedImageArray_d();
-    virtual float* GetTransformationMatrix_d();
-    virtual float* GetReferencePosition_d();
-    virtual float* GetWarpedPosition_d();
-    virtual float* GetDeformationFieldArray_d();
-    virtual float* GetReferenceMat_d();
-    virtual float* GetFloIJKMat_d();
-
-    //	float* GetAR_d(); // Removed until CUDA SVD is added back
-    //	float* GetU_d(); // Removed until CUDA SVD is added back
-    //	float* GetVT_d(); // Removed until CUDA SVD is added back
-    //	float* GetSigma_d(); // Removed until CUDA SVD is added back
-    //	float* GetLengths_d(); // Removed until CUDA SVD is added back
-    //	float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
-
-    virtual int* GetTotalBlock_d();
-    virtual int* GetMask_d();
-
-    virtual int* GetReferenceDims();
-    virtual int* GetFloatingDims();
-
-    // CPU getters with data downloaded from device
-    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
-
-private:
-    void InitVars();
-    void AllocateCuPtrs();
-    void FreeCuPtrs();
-
-    float *referenceImageArray_d;
-    float *floatingImageArray_d;
-    float *warpedImageArray_d;
-    float *deformationFieldArray_d;
-    float *referencePosition_d;
-    float *warpedPosition_d;
-    int   *totalBlock_d, *mask_d;
-
-    float *transformationMatrix_d;
-    float *referenceMat_d;
-    float *floIJKMat_d;
-
-    //svd
-    //	float *AR_d;//A and then pseudoinverse  // Removed until CUDA SVD is added back
-    //	float *U_d; // Removed until CUDA SVD is added back
-    //	float *VT_d; // Removed until CUDA SVD is added back
-    //	float *Sigma_d; // Removed until CUDA SVD is added back
-    //	float *lengths_d; // Removed until CUDA SVD is added back
-    //	float *newWarpedPos_d; // Removed until CUDA SVD is added back
-
-    int referenceDims[4];
-    int floatingDims[4];
-
-    void DownloadImage(nifti_image *image, float* memoryObject, int datatype);
-    template<class T>
-    void FillImageData(nifti_image *image, float* memoryObject, int type);
-
-    template<class FloatingTYPE>
-    FloatingTYPE FillWarpedImageData(float intensity, int datatype);
-
-#ifdef NR_TESTING
-public:
-#else
-protected:
-#endif
-    // Functions for testing
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
-};
+#pragma once
+
+#include "AladinContent.h"
+#include "CudaContext.hpp"
+#include "_reg_tools.h"
+
+class CudaAladinContent: public AladinContent {
+public:
+    CudaAladinContent(nifti_image *referenceIn,
+                      nifti_image *floatingIn,
+                      int *referenceMaskIn = nullptr,
+                      mat44 *transformationMatrixIn = nullptr,
+                      size_t bytesIn = sizeof(float),
+                      const unsigned percentageOfBlocks = 0,
+                      const unsigned inlierLts = 0,
+                      int blockStepSize = 0);
+    virtual ~CudaAladinContent();
+
+    virtual bool IsCurrentComputationDoubleCapable() override;
+
+    // Device getters
+    virtual float* GetReferenceImageArray_d();
+    virtual float* GetFloatingImageArray_d();
+    virtual float* GetWarpedImageArray_d();
+    virtual float* GetTransformationMatrix_d();
+    virtual float* GetReferencePosition_d();
+    virtual float* GetWarpedPosition_d();
+    virtual float* GetDeformationFieldArray_d();
+    virtual float* GetReferenceMat_d();
+    virtual float* GetFloIJKMat_d();
+
+    //	float* GetAR_d(); // Removed until CUDA SVD is added back
+    //	float* GetU_d(); // Removed until CUDA SVD is added back
+    //	float* GetVT_d(); // Removed until CUDA SVD is added back
+    //	float* GetSigma_d(); // Removed until CUDA SVD is added back
+    //	float* GetLengths_d(); // Removed until CUDA SVD is added back
+    //	float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
+
+    virtual int* GetTotalBlock_d();
+    virtual int* GetMask_d();
+
+    virtual int* GetReferenceDims();
+    virtual int* GetFloatingDims();
+
+    // CPU getters with data downloaded from device
+    virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped() override;
+
+private:
+    void InitVars();
+    void AllocateCuPtrs();
+    void FreeCuPtrs();
+
+    float *referenceImageArray_d;
+    float *floatingImageArray_d;
+    float *warpedImageArray_d;
+    float *deformationFieldArray_d;
+    float *referencePosition_d;
+    float *warpedPosition_d;
+    int   *totalBlock_d, *mask_d;
+
+    float *transformationMatrix_d;
+    float *referenceMat_d;
+    float *floIJKMat_d;
+
+    //svd
+    //	float *AR_d;//A and then pseudoinverse  // Removed until CUDA SVD is added back
+    //	float *U_d; // Removed until CUDA SVD is added back
+    //	float *VT_d; // Removed until CUDA SVD is added back
+    //	float *Sigma_d; // Removed until CUDA SVD is added back
+    //	float *lengths_d; // Removed until CUDA SVD is added back
+    //	float *newWarpedPos_d; // Removed until CUDA SVD is added back
+
+    int referenceDims[4];
+    int floatingDims[4];
+
+    void DownloadImage(nifti_image *image, float* memoryObject, int datatype);
+    template<class T>
+    void FillImageData(nifti_image *image, float* memoryObject, int type);
+
+    template<class FloatingTYPE>
+    FloatingTYPE FillWarpedImageData(float intensity, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+};
diff --git a/reg-lib/cuda/CudaAladinContentCreator.h b/reg-lib/cuda/CudaAladinContentCreator.h
index 278e6f1f..7da8c0fd 100644
--- a/reg-lib/cuda/CudaAladinContentCreator.h
+++ b/reg-lib/cuda/CudaAladinContentCreator.h
@@ -10,8 +10,8 @@ class CudaAladinContentCreator: public AladinContentCreator {
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
-                                  const unsigned int percentageOfBlocks = 0,
-                                  const unsigned int inlierLts = 0,
+                                  const unsigned percentageOfBlocks = 0,
+                                  const unsigned inlierLts = 0,
                                   int blockStepSize = 0) override {
         return new CudaAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize);
     }
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 58195be2..3b9db5e5 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -1,9 +1,9 @@
 #include "CudaCompute.h"
 #include "CudaF3dContent.h"
+#include "CudaNormaliseGradient.hpp"
 #include "_reg_resampling_gpu.h"
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_optimiser_gpu.h"
-#include "NormaliseGradient.hpp"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int inter, float paddingValue) {
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 94bd9034..ddc464ce 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -67,7 +67,7 @@ void CudaContent::DeallocateWarped() {
 }
 /* *************************************************************** */
 bool CudaContent::IsCurrentComputationDoubleCapable() {
-    return CudaContextSingleton::Instance().GetIsCardDoubleCapable();
+    return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable();
 }
 /* *************************************************************** */
 nifti_image* CudaContent::GetDeformationField() {
@@ -165,7 +165,7 @@ DataType CudaContent::CastImageData(float intensity, int datatype) {
         break;
     case NIFTI_TYPE_UINT32:
         intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned int>(intensity > 0 ? reg_round(intensity) : 0);
+        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
         break;
     default:
         return static_cast<DataType>(reg_round(intensity));
@@ -211,7 +211,7 @@ void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int dat
         FillImageData<short>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned int>(image, memoryObject, datatype);
+        FillImageData<unsigned>(image, memoryObject, datatype);
         break;
     case NIFTI_TYPE_INT32:
         FillImageData<int>(image, memoryObject, datatype);
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index a32316ac..7e1f08c1 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -1,62 +1,61 @@
-#pragma once
-
-#include "Content.h"
-#include "CudaContextSingleton.h"
-#include "_reg_common_cuda.h"
-#include "_reg_tools.h"
-
-class CudaContent: public virtual Content {
-public:
-    CudaContent() = delete;
-    CudaContent(nifti_image *referenceIn,
-                nifti_image *floatingIn,
-                int *referenceMaskIn = nullptr,
-                mat44 *transformationMatrixIn = nullptr,
-                size_t bytesIn = sizeof(float));
-    virtual ~CudaContent();
-
-    virtual bool IsCurrentComputationDoubleCapable() override;
-
-    // Getters
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
-    virtual cudaArray* GetReferenceCuda() { return referenceCuda; }
-    virtual cudaArray* GetFloatingCuda() { return floatingCuda; }
-    virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
-    virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; }
-    virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
-    virtual float* GetWarpedCuda() { return warpedCuda; }
-
-    // Methods for transferring data from nifti to device
-    virtual void UpdateDeformationField() override;
-
-protected:
-    cudaArray *referenceCuda = nullptr;
-    cudaArray *floatingCuda = nullptr;
-    float4 *deformationFieldCuda = nullptr;
-    int *referenceMaskCuda = nullptr;
-    float *transformationMatrixCuda = nullptr;
-    float *warpedCuda = nullptr;
-
-private:
-    void AllocateImages();
-    void DeallocateImages();
-    void AllocateDeformationField();
-    void DeallocateDeformationField();
-    void AllocateWarped();
-    void DeallocateWarped();
-    template<class DataType> DataType CastImageData(float intensity, int datatype);
-    template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
-    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
-
-#ifdef NR_TESTING
-public:
-#else
-protected:
-#endif
-    // Functions for testing
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
-    virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedIn) override;
-};
+#pragma once
+
+#include "Content.h"
+#include "_reg_common_cuda.h"
+#include "_reg_tools.h"
+
+class CudaContent: public virtual Content {
+public:
+    CudaContent() = delete;
+    CudaContent(nifti_image *referenceIn,
+                nifti_image *floatingIn,
+                int *referenceMaskIn = nullptr,
+                mat44 *transformationMatrixIn = nullptr,
+                size_t bytesIn = sizeof(float));
+    virtual ~CudaContent();
+
+    virtual bool IsCurrentComputationDoubleCapable() override;
+
+    // Getters
+    virtual nifti_image* GetDeformationField() override;
+    virtual nifti_image* GetWarped() override;
+    virtual cudaArray* GetReferenceCuda() { return referenceCuda; }
+    virtual cudaArray* GetFloatingCuda() { return floatingCuda; }
+    virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
+    virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; }
+    virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
+    virtual float* GetWarpedCuda() { return warpedCuda; }
+
+    // Methods for transferring data from nifti to device
+    virtual void UpdateDeformationField() override;
+
+protected:
+    cudaArray *referenceCuda = nullptr;
+    cudaArray *floatingCuda = nullptr;
+    float4 *deformationFieldCuda = nullptr;
+    int *referenceMaskCuda = nullptr;
+    float *transformationMatrixCuda = nullptr;
+    float *warpedCuda = nullptr;
+
+private:
+    void AllocateImages();
+    void DeallocateImages();
+    void AllocateDeformationField();
+    void DeallocateDeformationField();
+    void AllocateWarped();
+    void DeallocateWarped();
+    template<class DataType> DataType CastImageData(float intensity, int datatype);
+    template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
+    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+
+#ifdef NR_TESTING
+public:
+#else
+protected:
+#endif
+    // Functions for testing
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetReferenceMask(int *referenceMaskIn) override;
+    virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
+    virtual void SetWarped(nifti_image *warpedIn) override;
+};
diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContext.cpp
similarity index 64%
rename from reg-lib/cuda/CudaContextSingleton.cpp
rename to reg-lib/cuda/CudaContext.cpp
index fc61aa90..70351a43 100644
--- a/reg-lib/cuda/CudaContextSingleton.cpp
+++ b/reg-lib/cuda/CudaContext.cpp
@@ -1,8 +1,9 @@
-#include "CudaContextSingleton.h"
+#include "CudaContext.hpp"
 #include "_reg_common_cuda.h"
 
+namespace NiftyReg {
 /* *************************************************************** */
-CudaContextSingleton::CudaContextSingleton() {
+CudaContext::CudaContext() {
     // The CUDA card is setup
     cuInit(0);
     int device_count = 0;
@@ -12,50 +13,57 @@ CudaContextSingleton::CudaContextSingleton() {
     sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count);
     reg_print_msg_debug(text);
 #endif
-    this->cudaContext = nullptr;
-    this->numDevices = device_count;
-    this->cudaIdx = 999;
-    PickCard(this->cudaIdx);
+    cudaContext = nullptr;
+    numDevices = device_count;
+    cudaIdx = 999;
+    PickCard(cudaIdx);
 }
 /* *************************************************************** */
-void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) {
-    if (cudaIdxIn >= this->numDevices) {
+void CudaContext::SetCudaIdx(unsigned cudaIdxIn) {
+    if (cudaIdxIn >= numDevices) {
         reg_print_msg_error("The specified cuda card id is not defined");
         reg_print_msg_error("Run reg_gpuinfo to get the proper id");
         reg_exit();
     }
-    this->cudaIdx = cudaIdxIn;
-    PickCard(this->cudaIdx);
+    cudaIdx = cudaIdxIn;
+    PickCard(cudaIdx);
 }
 /* *************************************************************** */
-CUcontext CudaContextSingleton::GetContext() {
-    return this->cudaContext;
+CUcontext CudaContext::GetContext() {
+    return cudaContext;
 }
 /* *************************************************************** */
-void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
+void CudaContext::SetBlockSize(int major) {
+    if (major >= 3)
+        blockSize.reset(new BlockSize300());
+    else
+        blockSize.reset(new BlockSize100());
+}
+/* *************************************************************** */
+void CudaContext::PickCard(unsigned deviceId = 999) {
     struct cudaDeviceProp deviceProp;
-    if (deviceId < this->numDevices) {
-        this->cudaIdx = deviceId;
-        NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx));
-        NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx));
+    if (deviceId < numDevices) {
+        cudaIdx = deviceId;
+        NR_CUDA_SAFE_CALL(cudaSetDevice(cudaIdx));
+        NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, cudaIdx));
 
-        cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
+        cudaGetDeviceProperties(&deviceProp, cudaIdx);
         if (deviceProp.major > 1) {
-            this->isCardDoubleCapable = true;
+            isCardDoubleCapable = true;
         } else if (deviceProp.major == 1 && deviceProp.minor > 2) {
-            this->isCardDoubleCapable = true;
+            isCardDoubleCapable = true;
         } else {
-            this->isCardDoubleCapable = false;
+            isCardDoubleCapable = false;
         }
-        NiftyReg_CudaBlock::GetInstance(deviceProp.major);
+        SetBlockSize(deviceProp.major);
         return;
     }
 
     // following code is from cutGetMaxGflopsDeviceId()
     int max_gflops_device = 0;
     int max_gflops = 0;
-    unsigned int current_device = 0;
-    while (current_device < this->numDevices) {
+    unsigned current_device = 0;
+    while (current_device < numDevices) {
         cudaGetDeviceProperties(&deviceProp, current_device);
         int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
         if (gflops > max_gflops) {
@@ -65,7 +73,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
         ++current_device;
     }
     NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
-    NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device));
+    NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device));
     NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
 
     if (deviceProp.major < 1) {
@@ -92,23 +100,25 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) {
         printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000);
         printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount);
 #endif
-        this->cudaIdx = max_gflops_device;
-        cudaGetDeviceProperties(&deviceProp, this->cudaIdx);
+        cudaIdx = max_gflops_device;
+        cudaGetDeviceProperties(&deviceProp, cudaIdx);
         if (deviceProp.major > 1) {
-            this->isCardDoubleCapable = true;
+            isCardDoubleCapable = true;
         } else if (deviceProp.major == 1 && deviceProp.minor > 2) {
-            this->isCardDoubleCapable = true;
+            isCardDoubleCapable = true;
         } else {
-            this->isCardDoubleCapable = false;
+            isCardDoubleCapable = false;
         }
-        NiftyReg_CudaBlock::GetInstance(deviceProp.major);
+        SetBlockSize(deviceProp.major);
     }
 }
 /* *************************************************************** */
-bool CudaContextSingleton::GetIsCardDoubleCapable() {
-    return this->isCardDoubleCapable;
+bool CudaContext::IsCardDoubleCapable() {
+    return isCardDoubleCapable;
 }
 /* *************************************************************** */
-CudaContextSingleton::~CudaContextSingleton() {
-    cuCtxDestroy(this->cudaContext);
+CudaContext::~CudaContext() {
+    cuCtxDestroy(cudaContext);
 }
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp
new file mode 100644
index 00000000..0e4af74e
--- /dev/null
+++ b/reg-lib/cuda/CudaContext.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <cuda.h>
+#include "_reg_maths.h"
+#include "BlockSize.hpp"
+
+namespace NiftyReg {
+/* *************************************************************** */
+class CudaContext {
+public:
+    CudaContext(CudaContext const&) = delete;
+    void operator=(CudaContext const&) = delete;
+
+    static CudaContext& GetInstance() {
+        // Instantiated on first use.
+        static CudaContext instance; // Guaranteed to be destroyed.
+        return instance;
+    }
+
+    static const BlockSize* GetBlockSize() {
+        return GetInstance().blockSize.get();
+    }
+
+    void SetCudaIdx(unsigned cudaIdxIn);
+    CUcontext GetContext();
+    bool IsCardDoubleCapable();
+
+private:
+    CudaContext();
+    ~CudaContext();
+
+    bool isCardDoubleCapable;
+    CUcontext cudaContext;
+    unsigned numDevices;
+    unsigned cudaIdx;
+    std::unique_ptr<BlockSize> blockSize;
+
+    void PickCard(unsigned deviceId);
+    void SetBlockSize(int major);
+};
+/* *************************************************************** */
+}   // namespace NiftyReg
diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h
deleted file mode 100644
index b46cb879..00000000
--- a/reg-lib/cuda/CudaContextSingleton.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-
-#include "_reg_maths.h"
-#include <cuda.h>
-
-class CudaContextSingleton {
-public:
-    static CudaContextSingleton& Instance() {
-        static CudaContextSingleton instance; // Guaranteed to be destroyed.
-        // Instantiated on first use.
-        return instance;
-    }
-    void SetCudaIdx(unsigned int cudaIdxIn);
-    void PickCard(unsigned deviceId);
-
-    CUcontext GetContext();
-
-    bool GetIsCardDoubleCapable();
-
-private:
-
-    static CudaContextSingleton* _instance;
-
-    CudaContextSingleton();
-    ~CudaContextSingleton();
-
-    CudaContextSingleton(CudaContextSingleton const&);// Don't Implement
-    void operator=(CudaContextSingleton const&); // Don't implement
-
-    bool isCardDoubleCapable;
-    CUcontext cudaContext;
-    unsigned numDevices;
-    unsigned cudaIdx;
-};
diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h
index 1fa5be8e..832ec853 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.h
+++ b/reg-lib/cuda/CudaConvolutionKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "ConvolutionKernel.h"
-#include "CudaContextSingleton.h"
+#include "CudaContext.hpp"
 
 // A kernel function for convolution (gaussian smoothing?)
 class CudaConvolutionKernel: public ConvolutionKernel {
diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
similarity index 73%
rename from reg-lib/cuda/NormaliseGradient.cu
rename to reg-lib/cuda/CudaNormaliseGradient.cu
index 4d5ed26f..674dff82 100644
--- a/reg-lib/cuda/NormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -1,14 +1,14 @@
-#include "NormaliseGradient.hpp"
+#include "CudaNormaliseGradient.hpp"
 #include "_reg_tools_gpu.h"
 
 /* *************************************************************** */
 __global__ static void GetMaximalLengthKernel(float *dists,
                                               cudaTextureObject_t imageTexture,
-                                              const size_t nVoxels,
+                                              const unsigned nVoxels,
                                               const bool optimiseX,
                                               const bool optimiseY,
                                               const bool optimiseZ) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 gradValue = tex1Dfetch<float4>(imageTexture, tid);
         dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) +
@@ -29,11 +29,11 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
     float *dists = nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
 
-    const unsigned int blocks = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength);
-    const unsigned int grids = static_cast<unsigned int>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(blocks))));
-    dim3 blockDims(blocks, 1, 1);
-    dim3 gridDims(grids, grids, 1);
-    GetMaximalLengthKernel<<<gridDims, blockDims>>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ);
+    const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->GetMaximalLength;
+    const unsigned blocks = static_cast<unsigned>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
+    dim3 blockDims(threads, 1, 1);
+    dim3 gridDims(blocks, blocks, 1);
+    GetMaximalLengthKernel<<<gridDims, blockDims>>>(dists, *imageTexture, static_cast<unsigned>(nVoxels), optimiseX, optimiseY, optimiseZ);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     const float maxDistance = reg_maxReduction_gpu(dists, nVoxels);
@@ -43,12 +43,12 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
 }
 /* *************************************************************** */
 __global__ static void NormaliseGradientKernel(float4 *imageCuda,
-                                               const size_t nVoxels,
+                                               const unsigned nVoxels,
                                                const float maxGradLenInv,
                                                const bool optimiseX,
                                                const bool optimiseY,
                                                const bool optimiseZ) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 grad = imageCuda[tid];
         imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0,
@@ -64,11 +64,11 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
                                        const bool& optimiseX,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
-    const unsigned int blocks = static_cast<unsigned int>(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic);
-    const unsigned int grids = static_cast<unsigned int>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(blocks))));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    NormaliseGradientKernel<<<gridDims, blockDims>>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
+    const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = static_cast<unsigned>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
+    const dim3 blockDims(threads, 1, 1);
+    const dim3 gridDims(blocks, blocks, 1);
+    NormaliseGradientKernel<<<gridDims, blockDims>>>(imageCuda, static_cast<unsigned>(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/NormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp
similarity index 100%
rename from reg-lib/cuda/NormaliseGradient.hpp
rename to reg-lib/cuda/CudaNormaliseGradient.hpp
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu
deleted file mode 100755
index 32be98ec..00000000
--- a/reg-lib/cuda/_reg_blocksize_gpu.cu
+++ /dev/null
@@ -1,219 +0,0 @@
-/** @file _reg_blocksize_gpu.cu
- * @author Marc Modat
- * @date 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- */
-
-#include "_reg_blocksize_gpu.h"
-
-/* ******************************** */
-/* ******************************** */
-NiftyReg_CudaBlock100 *NiftyReg_CudaBlock::instance = nullptr;
-/* ******************************** */
-/* ******************************** */
-NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() {
-    Block_target_block = 512; // 15 reg - 32 smem - 24 cmem
-    Block_result_block = 384; // 21 reg - 11048 smem - 24 cmem
-    /* _reg_mutualinformation_gpu */
-    Block_reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem
-    Block_reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem
-    Block_reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem
-    Block_reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem
-    Block_reg_marginaliseTargetX = 384; // 06 reg - 24 smem
-    Block_reg_marginaliseTargetXY = 384; // 07 reg - 24 smem
-    Block_reg_marginaliseResultX = 384; // 06 reg - 24 smem
-    Block_reg_marginaliseResultXY = 384; // 07 reg - 24 smem
-    Block_reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem
-    Block_reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem
-    Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem
-    /* _reg_globalTransformation_gpu */
-    Block_reg_affine_deformationField = 512; // 16 reg - 24 smem
-    /* _reg_localTransformation_gpu */
-    Block_reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
-    Block_reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
-    Block_reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem
-    Block_reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem
-    Block_reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem
-    Block_reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem
-    Block_reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem
-    Block_reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem
-    Block_reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem
-    Block_reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem
-    Block_reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
-    Block_reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
-    Block_reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
-    Block_reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem
-    Block_reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem
-    Block_reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem
-    Block_reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem
-    Block_reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem
-    Block_reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem
-    Block_reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem
-    Block_reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem
-    Block_reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
-    Block_reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
-    Block_reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-    /* _reg_optimiser_gpu */
-    Block_reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
-    Block_reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem
-    Block_reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem
-    Block_GetMaximalLength = 384; // 04 reg - 24 smem
-    Block_reg_updateControlPointPosition = 384; // 08 reg - 24 smem
-    /* _reg_ssd_gpu */
-    Block_reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem
-    Block_reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem
-    /* _reg_tools_gpu */
-    Block_reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem
-    Block_reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
-    Block_reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
-    Block_reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
-    Block_reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
-    Block_reg_arithmetic = 384; // 5 reg - 24 smem
-    /* _reg_resampling_gpu */
-    Block_reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem
-    Block_reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
-    Block_reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem
-    Block_reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n");
-#endif
-}
-/* ******************************** */
-NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() {
-//    Block_target_block = ; //
-//    Block_result_block = ; //
-//    /* _reg_mutualinformation_gpu */
-//    Block_reg_smoothJointHistogramX = ; //
-//    Block_reg_smoothJointHistogramY = ; //
-//    Block_reg_smoothJointHistogramZ = ; //
-//    Block_reg_smoothJointHistogramW = ; //
-//    Block_reg_marginaliseTargetX = ; //
-//    Block_reg_marginaliseTargetXY = ; //
-//    Block_reg_marginaliseResultX = ; //
-//    Block_reg_marginaliseResultXY = ; //
-//    Block_reg_getVoxelBasedNMIGradientUsingPW2D = ; //
-//    Block_reg_getVoxelBasedNMIGradientUsingPW3D = ; //
-//    Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = ; //
-//    /* _reg_globalTransformation_gpu */
-//    Block_reg_affine_deformationField = ; //
-//    /* _reg_localTransformation_gpu */
-//    Block_reg_spline_getDeformationField2D = ; //
-//    Block_reg_spline_getDeformationField3D = ; //
-//    Block_reg_spline_getApproxSecondDerivatives2D = ; //
-//    Block_reg_spline_getApproxSecondDerivatives3D = ; //
-//    Block_reg_spline_getApproxBendingEnergy2D = ; //
-//    Block_reg_spline_getApproxBendingEnergy3D = ; //
-//    Block_reg_spline_getApproxBendingEnergyGradient2D = ; //
-//    Block_reg_spline_getApproxBendingEnergyGradient3D = ; //
-//    Block_reg_spline_getApproxJacobianValues2D = ; //
-//    Block_reg_spline_getApproxJacobianValues3D = ; //
-//    Block_reg_spline_getJacobianValues2D = ; //
-//    Block_reg_spline_getJacobianValues3D = ; //
-//    Block_reg_spline_logSquaredValues = ; //
-//    Block_reg_spline_computeApproxJacGradient2D = ; //
-//    Block_reg_spline_computeApproxJacGradient3D = ; //
-//    Block_reg_spline_computeJacGradient2D = ; //
-//    Block_reg_spline_computeJacGradient3D = ; //
-//    Block_reg_spline_approxCorrectFolding3D = ; //
-//    Block_reg_spline_correctFolding3D = ; //
-//    Block_reg_getDeformationFromDisplacement = ; //
-//    Block_reg_getDisplacementFromDeformation = ; //
-//    Block_reg_defField_compose2D = ; //
-//    Block_reg_defField_compose3D = ; //
-//    Block_reg_defField_getJacobianMatrix = ; //
-//    /* _reg_optimiser_gpu */
-//    Block_reg_initialiseConjugateGradient = ; //
-//    Block_reg_GetConjugateGradient1 = ; //
-//    Block_reg_GetConjugateGradient2 = ; //
-//    Block_GetMaximalLength = ; //
-//    Block_reg_updateControlPointPosition = ; //
-//    /* _reg_ssd_gpu */
-//    Block_reg_getSquaredDifference = ; //
-//    Block_reg_getSSDGradient = ; //
-//    /* _reg_tools_gpu */
-//    Block_reg_voxelCentric2NodeCentric = ; //
-//    Block_reg_convertNMIGradientFromVoxelToRealSpace = ; //
-//    Block_reg_ApplyConvolutionWindowAlongX = ; //
-//    Block_reg_ApplyConvolutionWindowAlongY = ; //
-//    Block_reg_ApplyConvolutionWindowAlongZ = ; //
-//    Block_reg_arithmetic = ; //
-//    /* _reg_resampling_gpu */
-//    Block_reg_resampleImage2D = ; //
-//    Block_reg_resampleImage3D = ; //
-//    Block_reg_getImageGradient2D = ; //
-//    Block_reg_getImageGradient3D = ; //
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] NiftyReg_CudaBlock200 constructor called\n");
-#endif
-}
-/* ******************************** */
-NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() {
-    Block_target_block = 640; // 45 reg
-    Block_result_block = 640; // 47 reg - ????? smem
-    /* _reg_mutualinformation_gpu */
-    Block_reg_smoothJointHistogramX = 768; // 34 reg
-    Block_reg_smoothJointHistogramY = 768; // 34 reg
-    Block_reg_smoothJointHistogramZ = 768; // 34 reg
-    Block_reg_smoothJointHistogramW = 768; // 34 reg
-    Block_reg_marginaliseTargetX = 1024; // 24 reg
-    Block_reg_marginaliseTargetXY = 1024; // 24 reg
-    Block_reg_marginaliseResultX = 1024; // 24 reg
-    Block_reg_marginaliseResultXY = 1024; // 24 reg
-    Block_reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg
-    Block_reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg
-    Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg
-    /* _reg_globalTransformation_gpu */
-    Block_reg_affine_deformationField = 1024; // 23 reg
-    /* _reg_localTransformation_gpu */
-    Block_reg_spline_getDeformationField2D = 768; // 34 reg
-    Block_reg_spline_getDeformationField3D = 768; // 34 reg
-    Block_reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg
-    Block_reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg
-    Block_reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg
-    Block_reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg
-    Block_reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg
-    Block_reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg
-    Block_reg_spline_getApproxJacobianValues2D = 768; // 34 reg
-    Block_reg_spline_getApproxJacobianValues3D = 640; // 46 reg
-    Block_reg_spline_getJacobianValues2D = 768; // 34 reg
-    Block_reg_spline_getJacobianValues3D = 768; // 34 reg
-    Block_reg_spline_logSquaredValues = 1024; // 23 reg
-    Block_reg_spline_computeApproxJacGradient2D = 768; // 34 reg
-    Block_reg_spline_computeApproxJacGradient3D = 768; // 38 reg
-    Block_reg_spline_computeJacGradient2D = 768; // 34 reg
-    Block_reg_spline_computeJacGradient3D = 768; // 37 reg
-    Block_reg_spline_approxCorrectFolding3D = 768; // 34 reg
-    Block_reg_spline_correctFolding3D = 768; // 34 reg
-    Block_reg_getDeformationFromDisplacement = 1024; // 18 reg
-    Block_reg_getDisplacementFromDeformation = 1024; // 18 reg
-    Block_reg_defField_compose2D = 1024; // 23 reg
-    Block_reg_defField_compose3D = 1024; // 24 reg
-    Block_reg_defField_getJacobianMatrix = 768; // 34 reg
-    /* _reg_optimiser_gpu */
-    Block_reg_initialiseConjugateGradient = 1024; // 20 reg
-    Block_reg_GetConjugateGradient1 = 1024; // 22 reg
-    Block_reg_GetConjugateGradient2 = 1024; // 25 reg
-    Block_GetMaximalLength = 1024; // 20 reg
-    Block_reg_updateControlPointPosition = 1024; // 22 reg
-    /* _reg_ssd_gpu */
-    Block_reg_getSquaredDifference = 768; // 34 reg
-    Block_reg_getSSDGradient = 768; // 34 reg
-    /* _reg_tools_gpu */
-    Block_reg_voxelCentric2NodeCentric = 1024; // 23 reg
-    Block_reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
-    Block_reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
-    Block_reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
-    Block_reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
-    Block_reg_arithmetic = 1024; //
-    /* _reg_resampling_gpu */
-    Block_reg_resampleImage2D = 1024; // 23 reg
-    Block_reg_resampleImage3D = 1024; // 24 reg
-    Block_reg_getImageGradient2D = 768; // 34 reg
-    Block_reg_getImageGradient3D = 768; // 34 reg
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] NiftyReg_CudaBlock300 constructor called\n");
-#endif
-}
diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h
deleted file mode 100755
index 5f341078..00000000
--- a/reg-lib/cuda/_reg_blocksize_gpu.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/** @file _reg_blocksize_gpu.h
- * @author Marc Modat
- * @date 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- */
-
-#pragma once
-
-#include "niftilib/nifti1_io.h"
-#include <cuda_runtime.h>
-#include <cuda.h>
-
-/* ******************************** */
-/* ******************************** */
-#ifndef __VECTOR_TYPES_H__
-#define __VECTOR_TYPES_H__
-struct __attribute__((aligned(4))) float4 {
-    float x, y, z, w;
-};
-#endif
-/* ******************************** */
-/* ******************************** */
-class NiftyReg_CudaBlock100 {
-public:    /* _reg_blockMatching_gpu */
-    size_t Block_target_block;
-    size_t Block_result_block;
-    /* _reg_mutualinformation_gpu */
-    size_t Block_reg_smoothJointHistogramX;
-    size_t Block_reg_smoothJointHistogramY;
-    size_t Block_reg_smoothJointHistogramZ;
-    size_t Block_reg_smoothJointHistogramW;
-    size_t Block_reg_marginaliseTargetX;
-    size_t Block_reg_marginaliseTargetXY;
-    size_t Block_reg_marginaliseResultX;
-    size_t Block_reg_marginaliseResultXY;
-    size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D;
-    size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D;
-    size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2;
-    /* _reg_globalTransformation_gpu */
-    size_t Block_reg_affine_deformationField;
-    /* _reg_localTransformation_gpu */
-    size_t Block_reg_spline_getDeformationField2D;
-    size_t Block_reg_spline_getDeformationField3D;
-    size_t Block_reg_spline_getApproxSecondDerivatives2D;
-    size_t Block_reg_spline_getApproxSecondDerivatives3D;
-    size_t Block_reg_spline_getApproxBendingEnergy2D;
-    size_t Block_reg_spline_getApproxBendingEnergy3D;
-    size_t Block_reg_spline_getApproxBendingEnergyGradient2D;
-    size_t Block_reg_spline_getApproxBendingEnergyGradient3D;
-    size_t Block_reg_spline_getApproxJacobianValues2D;
-    size_t Block_reg_spline_getApproxJacobianValues3D;
-    size_t Block_reg_spline_getJacobianValues2D;
-    size_t Block_reg_spline_getJacobianValues3D;
-    size_t Block_reg_spline_logSquaredValues;
-    size_t Block_reg_spline_computeApproxJacGradient2D;
-    size_t Block_reg_spline_computeApproxJacGradient3D;
-    size_t Block_reg_spline_computeJacGradient2D;
-    size_t Block_reg_spline_computeJacGradient3D;
-    size_t Block_reg_spline_approxCorrectFolding3D;
-    size_t Block_reg_spline_correctFolding3D;
-    size_t Block_reg_getDeformationFromDisplacement;
-    size_t Block_reg_getDisplacementFromDeformation;
-    size_t Block_reg_defField_compose2D;
-    size_t Block_reg_defField_compose3D;
-    size_t Block_reg_defField_getJacobianMatrix;
-    /* _reg_optimiser_gpu */
-    size_t Block_reg_initialiseConjugateGradient;
-    size_t Block_reg_GetConjugateGradient1;
-    size_t Block_reg_GetConjugateGradient2;
-    size_t Block_GetMaximalLength;
-    size_t Block_reg_updateControlPointPosition;
-    /* _reg_ssd_gpu */
-    size_t Block_reg_getSquaredDifference;
-    size_t Block_reg_getSSDGradient;
-    /* _reg_tools_gpu */
-    size_t Block_reg_voxelCentric2NodeCentric;
-    size_t Block_reg_convertNMIGradientFromVoxelToRealSpace;
-    size_t Block_reg_ApplyConvolutionWindowAlongX;
-    size_t Block_reg_ApplyConvolutionWindowAlongY;
-    size_t Block_reg_ApplyConvolutionWindowAlongZ;
-    size_t Block_reg_arithmetic;
-    /* _reg_resampling_gpu */
-    size_t Block_reg_resampleImage2D;
-    size_t Block_reg_resampleImage3D;
-    size_t Block_reg_getImageGradient2D;
-    size_t Block_reg_getImageGradient3D;
-
-    NiftyReg_CudaBlock100();
-};
-/* ******************************** */
-class NiftyReg_CudaBlock200: public NiftyReg_CudaBlock100 {
-public:
-    NiftyReg_CudaBlock200();
-};
-/* ******************************** */
-class NiftyReg_CudaBlock300: public NiftyReg_CudaBlock100 {
-public:
-    NiftyReg_CudaBlock300();
-};
-/* ******************************** */
-class NiftyReg_CudaBlock {
-public:
-    static NiftyReg_CudaBlock100* GetInstance(int major) {
-        if (instance) return instance;
-        else {
-            switch (major) {
-            case 3:
-                instance = new NiftyReg_CudaBlock300();
-                break;
-            case 2:
-                instance = new NiftyReg_CudaBlock200();
-                break;
-            default:
-                instance = new NiftyReg_CudaBlock100();
-                break;
-            }
-        }
-        return instance;
-    }
-private:
-    static NiftyReg_CudaBlock100 *instance;
-};
-/* ******************************** */
-/* ******************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 5d2d10f5..5edc014d 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -14,7 +14,7 @@
 /* *************************************************************** */
 template <class NiftiType>
 int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) {
-    const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType);
+    const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType);
 
     int *g_dim;
     float* g_pixdim;
@@ -43,7 +43,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
+        const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
         NiftiType *array_h = static_cast<NiftiType*>(img->data);
         NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
     }
@@ -101,7 +101,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
+        const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
         NiftiType *array_h = static_cast<NiftiType*>(img->data);
         NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
         NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
@@ -369,7 +369,7 @@ template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**,
 /* *************************************************************** */
 template <class DataType>
 int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) {
-    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
+    const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     return EXIT_SUCCESS;
 }
@@ -380,7 +380,7 @@ template int cudaCommon_allocateArrayToDevice<float4>(float4**, int*); // for de
 /* *************************************************************** */
 template <class DataType>
 int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) {
-    const unsigned int memSize = vox * sizeof(DataType);
+    const unsigned memSize = vox * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     return EXIT_SUCCESS;
 }
@@ -391,7 +391,7 @@ template int cudaCommon_allocateArrayToDevice<float4>(float4**, int); // for def
 /* *************************************************************** */
 template <class DataType>
 int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) {
-    const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
+    const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
     NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize));
     return EXIT_SUCCESS;
@@ -401,12 +401,12 @@ template int cudaCommon_allocateArrayToDevice<double>(double**, double**, int*);
 template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned int nElements) {
+int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned nElements) {
     NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned int nElements);
-template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned int nElements);
+template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned nElements);
+template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned nElements);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) {
@@ -594,7 +594,7 @@ template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, nifti_ima
 template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, nifti_image*);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned int nvox) {
+int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned nvox) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
@@ -603,24 +603,24 @@ template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, float*,
 template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, double*, const unsigned);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned int nElements) {
-    const unsigned int memSize = nElements * sizeof(DataType);
+int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned nElements) {
+    const unsigned memSize = nElements * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsigned int);
-template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned int);
-template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned int);
+template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsigned);
+template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned);
+template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned int nElements) {
-    const unsigned int memSize = nElements * sizeof(DataType);
+int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned nElements) {
+    const unsigned memSize = nElements * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsigned int);
-template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned int);
-template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned int);
+template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsigned);
+template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned);
+template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned);
 /* *************************************************************** */
 void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
     NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj));
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 2eb0a944..45f8aa26 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -12,7 +12,7 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include "_reg_tools.h"
-#include "_reg_blocksize_gpu.h"
+#include "CudaContext.hpp"
 
 /* *************************************************************** */
 #ifndef __VECTOR_TYPES_H__
@@ -120,15 +120,15 @@ int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsi
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned int);
+int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned int);
+int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int);
+int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned);
 /* *************************************************************** */
 using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index a55d8463..71cd8df7 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -14,56 +14,48 @@
 #include "_reg_globalTransformation_kernels.cu"
 
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_affine_positionField_gpu(	mat44 *affineMatrix,
-					nifti_image *targetImage,
-					float4 *array_d)
-{
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+void reg_affine_positionField_gpu(mat44 *affineMatrix,
+                                  nifti_image *targetImage,
+                                  float4 *array_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&(targetImage->nvox),sizeof(int)));
+    int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &(targetImage->nvox), sizeof(int)));
 
     // If the target sform is defined, it is used. The qform is used otherwise
     mat44 *targetMatrix;
-    if(targetImage->sform_code>0)
-            targetMatrix=&(targetImage->sto_xyz);
-    else targetMatrix=&(targetImage->qto_xyz);
+    if (targetImage->sform_code > 0)
+        targetMatrix = &(targetImage->sto_xyz);
+    else targetMatrix = &(targetImage->qto_xyz);
 
     // We here performed Affine * TargetMat * voxelIndex
     // Affine * TargetMat is constant
     mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
 
-    // The transformation matrix is binded to a texture
+    // The transformation matrix is bound to a texture
     float4 *transformationMatrix_h;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3*sizeof(float4)));
-	float4 *transformationMatrix_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3*sizeof(float4)));
-    for(int i=0; i<3; i++){
-            transformationMatrix_h[i].x=transformationMatrix.m[i][0];
-            transformationMatrix_h[i].y=transformationMatrix.m[i][1];
-            transformationMatrix_h[i].z=transformationMatrix.m[i][2];
-            transformationMatrix_h[i].w=transformationMatrix.m[i][3];
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3 * sizeof(float4)));
+    float4 *transformationMatrix_d;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3 * sizeof(float4)));
+    for (int i = 0; i < 3; i++) {
+        transformationMatrix_h[i].x = transformationMatrix.m[i][0];
+        transformationMatrix_h[i].y = transformationMatrix.m[i][1];
+        transformationMatrix_h[i].z = transformationMatrix.m[i][2];
+        transformationMatrix_h[i].w = transformationMatrix.m[i][3];
     }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice));
-	cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
+    cudaBindTexture(0, txAffineTransformation, transformationMatrix_d, 3 * sizeof(float4));
     NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h));
 
-        const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField));
-    dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1);
-        dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1);
+    const unsigned Grid_reg_affine_deformationField = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blockSize->reg_affine_deformationField));
+    dim3 B1(blockSize->reg_affine_deformationField, 1, 1);
+    dim3 G1(Grid_reg_affine_deformationField, Grid_reg_affine_deformationField, 1);
 
-    reg_affine_deformationField_kernel <<< G1, B1 >>> (array_d);
-        NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-#ifndef NDEBUG
-    printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
-	       cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z);
-#endif
+    reg_affine_deformationField_kernel<<<G1, B1>>>(array_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
 
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(txAffineTransformation));
     NR_CUDA_SAFE_CALL(cudaFree(transformationMatrix_d));
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 180b7438..92a3f35d 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -23,8 +23,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 										int activeVoxelNumber,
 										bool bspline)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	const int voxelNumber = CalcVoxelNumber(*reference);
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
@@ -48,23 +47,23 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)));
 
 	if(reference->nz>1){
-		const unsigned int Grid_reg_spline_getDeformationField3D =
-			(unsigned int)ceilf(sqrtf((float)activeVoxelNumber/(float)(NR_BLOCK->Block_reg_spline_getDeformationField3D)));
+		const unsigned Grid_reg_spline_getDeformationField3D =
+			(unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField3D)));
 		dim3 G1(Grid_reg_spline_getDeformationField3D,Grid_reg_spline_getDeformationField3D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField3D,1,1);
+		dim3 B1(blockSize->reg_spline_getDeformationField3D,1,1);
 		// 8 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField3D
-				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d);
+				<<< G1, B1, blockSize->reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		const unsigned int Grid_reg_spline_getDeformationField2D =
-			(unsigned int)ceilf(sqrtf((float)activeVoxelNumber/(float)(NR_BLOCK->Block_reg_spline_getDeformationField2D)));
+		const unsigned Grid_reg_spline_getDeformationField2D =
+			(unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField2D)));
 		dim3 G1(Grid_reg_spline_getDeformationField2D,Grid_reg_spline_getDeformationField2D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField2D,1,1);
+		dim3 B1(blockSize->reg_spline_getDeformationField2D,1,1);
 		// 4 floats of shared memory are allocated per thread
 		reg_spline_getDeformationField2D
-				<<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d);
+				<<< G1, B1, blockSize->reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 
@@ -75,8 +74,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
 /* *************************************************************** */
 float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -90,19 +88,19 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4
 	float4 *secondDerivativeValues_d;
 	if(controlPointImage->nz>1){
 		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem));
-		const unsigned int Grid_bspline_getApproxSecondDerivatives =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D)));
+		const unsigned Grid_bspline_getApproxSecondDerivatives =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1);
 		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem));
-		const unsigned int Grid_bspline_getApproxSecondDerivatives =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D)));
+		const unsigned Grid_bspline_getApproxSecondDerivatives =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1);
 		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -116,10 +114,10 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
 										  6*controlPointGridMem));
-		const unsigned int Grid_reg_spline_ApproxBendingEnergy =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D)));
+		const unsigned Grid_reg_spline_ApproxBendingEnergy =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy3D)));
 		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
-		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D,1,1);
+		dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D,1,1);
 		reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
@@ -127,10 +125,10 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
 										  3*controlPointGridMem));
-		const unsigned int Grid_reg_spline_ApproxBendingEnergy =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D)));
+		const unsigned Grid_reg_spline_ApproxBendingEnergy =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy2D)));
 		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
-		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D,1,1);
+		dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D,1,1);
 		reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
@@ -150,8 +148,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 												float4 *nodeGradientArray_d,
 												float bendingEnergyWeight)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
 	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -165,19 +162,19 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 	float4 *secondDerivativeValues_d;
 	if(controlPointImage->nz>1){
 		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4)));
-		const unsigned int Grid_bspline_getApproxSecondDerivatives =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D)));
+		const unsigned Grid_bspline_getApproxSecondDerivatives =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1);
 		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
 		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4)));
-		const unsigned int Grid_bspline_getApproxSecondDerivatives =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D)));
+		const unsigned Grid_bspline_getApproxSecondDerivatives =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
 		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1);
 		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -190,10 +187,10 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
 										  6*controlPointNumber*sizeof(float4)));
-		const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D)));
+		const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
-		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1);
+		dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D,1,1);
 		reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
@@ -201,10 +198,10 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
 		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
 										  secondDerivativeValues_d,
 										  3*controlPointNumber*sizeof(float4)));
-		const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D)));
+		const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D)));
 		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
-		dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1);
+		dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D,1,1);
 		reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
 		NR_CUDA_CHECK_KERNEL(G2,B2);
 	}
@@ -218,8 +215,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 											float *jacobianMatrices_d,
 											float *jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
 	mat33 reorientation;
@@ -245,18 +241,18 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
 
 	// The Jacobian matrix is computed for every control point
 	if(controlPointImage->nz>1){
-		const unsigned int Grid_reg_spline_getApproxJacobianValues3D =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D)));
+		const unsigned Grid_reg_spline_getApproxJacobianValues3D =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues3D)));
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D,1,1);
 		reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		const unsigned int Grid_reg_spline_getApproxJacobianValues2D =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D)));
+		const unsigned Grid_reg_spline_getApproxJacobianValues2D =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues2D)));
 		dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1);
+		dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D,1,1);
 		reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -269,8 +265,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 									   float *jacobianMatrices_d,
 									   float *jacobianDet_d)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
 	mat33 reorientation;
@@ -304,21 +299,21 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
 
 	// The Jacobian matrix is computed for every voxel
 	if(controlPointImage->nz>1){
-		const unsigned int Grid_reg_spline_getJacobianValues3D =
-			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_spline_getJacobianValues3D)));
+		const unsigned Grid_reg_spline_getJacobianValues3D =
+			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues3D)));
 		dim3 G1(Grid_reg_spline_getJacobianValues3D,Grid_reg_spline_getJacobianValues3D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues3D,1,1);
+		dim3 B1(blockSize->reg_spline_getJacobianValues3D,1,1);
 		// 8 floats of shared memory are allocated per thread
 		reg_spline_getJacobianValues3D_kernel
-				<<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>>
+				<<< G1, B1, blockSize->reg_spline_getJacobianValues3D*8*sizeof(float)>>>
 				(jacobianMatrices_d, jacobianDet_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		const unsigned int Grid_reg_spline_getJacobianValues2D =
-			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_spline_getJacobianValues2D)));
+		const unsigned Grid_reg_spline_getJacobianValues2D =
+			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues2D)));
 		dim3 G1(Grid_reg_spline_getJacobianValues2D,Grid_reg_spline_getJacobianValues2D,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues2D,1,1);
+		dim3 B1(blockSize->reg_spline_getJacobianValues2D,1,1);
 		reg_spline_getJacobianValues2D_kernel
 				<<< G1, B1>>>
 				(jacobianMatrices_d, jacobianDet_d);
@@ -333,8 +328,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 											 float4 *controlPointImageArray_d,
 											 bool approx)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -381,10 +375,10 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
 
 	// The Jacobian determinant are squared and logged (might not be english but will do)
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)));
-	const unsigned int Grid_reg_spline_logSquaredValues =
-		(unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues)));
+	const unsigned Grid_reg_spline_logSquaredValues =
+		(unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues)));
 	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
-	dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1);
+	dim3 B1(blockSize->reg_spline_logSquaredValues,1,1);
 	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
 	// Perform the reduction
@@ -400,8 +394,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 												   float jacobianWeight,
 												   bool approx)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -465,18 +458,18 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3)));
 	if(approx){
 		if(controlPointImage->nz>1){
-			const unsigned int Grid_reg_spline_computeApproxJacGradient3D =
-				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D)));
+			const unsigned Grid_reg_spline_computeApproxJacGradient3D =
+				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient3D)));
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1);
-			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1);
+			dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D,1,1);
 			reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 		else{
-			const unsigned int Grid_reg_spline_computeApproxJacGradient2D =
-				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D)));
+			const unsigned Grid_reg_spline_computeApproxJacGradient2D =
+				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient2D)));
 			dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1);
-			dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1);
+			dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D,1,1);
 			reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
@@ -492,18 +485,18 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
 		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
 		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
 		if(controlPointImage->nz>1){
-			const unsigned int Grid_reg_spline_computeJacGradient3D =
-				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D)));
+			const unsigned Grid_reg_spline_computeJacGradient3D =
+				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient3D)));
 			dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1);
-			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1);
+			dim3 B1(blockSize->reg_spline_computeJacGradient3D,1,1);
 			reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
 		else{
-			const unsigned int Grid_reg_spline_computeJacGradient2D =
-				(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient2D)));
+			const unsigned Grid_reg_spline_computeJacGradient2D =
+				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient2D)));
 			dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1);
-			dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1);
+			dim3 B1(blockSize->reg_spline_computeJacGradient2D,1,1);
 			reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
 			NR_CUDA_CHECK_KERNEL(G1,B1);
 		}
@@ -519,8 +512,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 									  float4 *controlPointImageArray_d,
 									  bool approx)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// The Jacobian matrices and determinants are computed
 	float *jacobianMatrices_d;
@@ -553,10 +545,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	float *jacobianDet2_d;
 	NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float)));
 	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice));
-	const unsigned int Grid_reg_spline_logSquaredValues =
-		(unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues)));
+	const unsigned Grid_reg_spline_logSquaredValues =
+		(unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues)));
 	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
-	dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1);
+	dim3 B1(blockSize->reg_spline_logSquaredValues,1,1);
 	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
 	float *jacobianDet_h;
@@ -600,10 +592,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
 	if(approx){
-		const unsigned int Grid_reg_spline_approxCorrectFolding =
-			(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D)));
+		const unsigned Grid_reg_spline_approxCorrectFolding =
+			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_approxCorrectFolding3D)));
 		dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1);
+		dim3 B1(blockSize->reg_spline_approxCorrectFolding3D,1,1);
 		reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -617,10 +609,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
 		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
 		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
-		const unsigned int Grid_reg_spline_correctFolding =
-		(unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D)));
+		const unsigned Grid_reg_spline_correctFolding =
+		(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_correctFolding3D)));
 		dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1);
-		dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1);
+		dim3 B1(blockSize->reg_spline_correctFolding3D,1,1);
 		reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -634,8 +626,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
 /* *************************************************************** */
 void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// Bind the qform or sform
 	mat44 temp_mat=image->qto_xyz;
@@ -653,10 +644,10 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
 
-	const unsigned int Grid_reg_getDeformationFromDisplacement =
-	(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement)));
+	const unsigned Grid_reg_getDeformationFromDisplacement =
+	(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDeformationFromDisplacement)));
 	dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1);
-	dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1);
+	dim3 B1(blockSize->reg_getDeformationFromDisplacement,1,1);
 	reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
 }
@@ -664,8 +655,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr
 /* *************************************************************** */
 void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	// Bind the qform or sform
 	mat44 temp_mat=image->qto_xyz;
@@ -683,10 +673,10 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr
 	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
 
-	const unsigned int Grid_reg_getDisplacementFromDeformation =
-		(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation)));
+	const unsigned Grid_reg_getDisplacementFromDeformation =
+		(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDisplacementFromDeformation)));
 	dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1);
-	dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1);
+	dim3 B1(blockSize->reg_getDisplacementFromDeformation,1,1);
 	reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
 }
@@ -738,8 +728,8 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
 
 
 	// The deformation field is squared
-	unsigned int squaringNumber = (unsigned int)fabs(cpp_h->intent_p1);
-	for(unsigned int i=0;i<squaringNumber;++i){
+	unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1);
+	for(unsigned i=0;i<squaringNumber;++i){
 
 		// The deformation field arrays are updated
 		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice));
@@ -763,8 +753,7 @@ void reg_defField_compose_gpu(nifti_image *def,
 							  int *mask_gpu,
 							  int activeVoxel)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	const int voxelNumber = CalcVoxelNumber(*def);
 
@@ -797,18 +786,18 @@ void reg_defField_compose_gpu(nifti_image *def,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
 
 	if(def->nz>1){
-		const unsigned int Grid_reg_defField_compose3D =
-			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose3D)));
+		const unsigned Grid_reg_defField_compose3D =
+			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose3D)));
 		dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1);
-		dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1);
+		dim3 B1(blockSize->reg_defField_compose3D,1,1);
 		reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
 	else{
-		const unsigned int Grid_reg_defField_compose2D =
-			(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose2D)));
+		const unsigned Grid_reg_defField_compose2D =
+			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose2D)));
 		dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1);
-		dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1);
+		dim3 B1(blockSize->reg_defField_compose2D,1,1);
 		reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu);
 		NR_CUDA_CHECK_KERNEL(G1,B1);
 	}
@@ -822,8 +811,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 										float4 **deformationField_gpu,
 										float **jacobianMatrices_gpu)
 {
-	// Get the BlockSize - The values have been set in CudaContextSingleton
-	NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
 	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
 	const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz);
@@ -845,10 +833,10 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
 
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4)));
 
-	const unsigned int Grid_reg_defField_getJacobianMatrix =
-		(unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_getJacobianMatrix)));
+	const unsigned Grid_reg_defField_getJacobianMatrix =
+		(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_getJacobianMatrix)));
 	dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1);
-	dim3 B1(NR_BLOCK->Block_reg_defField_getJacobianMatrix);
+	dim3 B1(blockSize->reg_defField_getJacobianMatrix);
 	reg_defField_getJacobianMatrix3D_kernel<<<G1,B1>>>(*jacobianMatrices_gpu);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
 
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 30a93e54..329c011f 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -438,7 +438,7 @@ __device__ float4 get_SlidedValues_gpu(int x, int y, int z)
 /* *************************************************************** */
 __global__ void reg_spline_getDeformationField3D(float4 *positionField)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ActiveVoxelNumber){
 
 		// Allocate the shared memory
@@ -448,7 +448,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField)
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tex1Dfetch(maskTexture,tid);
+		unsigned tempIndex=tex1Dfetch(maskTexture,tid);
 		const int z = tempIndex/(imageSize.x*imageSize.y);
 		tempIndex  -= z*imageSize.x*imageSize.y;
 		const int y = tempIndex/imageSize.x;
@@ -531,7 +531,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField)
 /* *************************************************************** */
 __global__ void reg_spline_getDeformationField2D(float4 *positionField)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ActiveVoxelNumber){
 
 		// Allocate the shared memory
@@ -539,7 +539,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *positionField)
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tex1Dfetch(maskTexture,tid);
+		unsigned tempIndex=tex1Dfetch(maskTexture,tid);
 		const int y = tempIndex/imageSize.x;
 		const int x = tempIndex - y*imageSize.x;
 
@@ -844,7 +844,7 @@ __global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeG
 			for(int b=y-1; b<y+2; ++b){
 				for(int a=x-1; a<x+2; ++a){
 					if(-1<a && -1<b && -1<c && a<gridSize.x && b<gridSize.y && c<gridSize.z){
-						unsigned int indexXYZ = 6*((c*gridSize.y+b)*gridSize.x+a);
+						unsigned indexXYZ = 6*((c*gridSize.y+b)*gridSize.x+a);
 						secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXYZ++); // XX
 						gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
 						gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
@@ -898,7 +898,7 @@ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatri
 										ybasis);
 	__syncthreads();
 
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
@@ -969,7 +969,7 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri
 									  zbasis);
 	__syncthreads();
 
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
@@ -1059,12 +1059,12 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri
 __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
 													 float *jacobianDet)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		int2 imageSize = make_int2(c_ReferenceImageDim.x,c_ReferenceImageDim.y);
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int y = tempIndex/imageSize.x;
 		const int x = tempIndex - y*imageSize.x;
 
@@ -1131,12 +1131,12 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
 __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
 													 float *jacobianDet)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(imageSize.x*imageSize.y);
 		tempIndex  -= z*imageSize.x*imageSize.y;
 		const int y = tempIndex/imageSize.x;
@@ -1238,7 +1238,7 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
 /* *************************************************************** */
 __global__ void reg_spline_logSquaredValues_kernel(float *det)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 		float val = logf(det[tid]);
 		det[tid]=val*val;
@@ -1293,12 +1293,12 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient)
 										ybasis);
 	__syncthreads();
 
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int y =(int)(tempIndex/(gridSize.x));
 		const int x = tempIndex - y*(gridSize.x);
 
@@ -1359,12 +1359,12 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient)
 									  zbasis);
 	__syncthreads();
 
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z =(int)(tempIndex/(gridSize.x*gridSize.y));
 		tempIndex -= z*(gridSize.x)*(gridSize.y);
 		const int y =(int)(tempIndex/(gridSize.x));
@@ -1433,7 +1433,7 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient)
 /* *************************************************************** */
 __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
@@ -1506,7 +1506,7 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient)
 /* *************************************************************** */
 __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
@@ -1605,12 +1605,12 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient)
 /* *************************************************************** */
 __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(gridSize.x*gridSize.y);
 		tempIndex  -= z*gridSize.x*gridSize.y;
 		const int y = tempIndex/gridSize.x;
@@ -1692,12 +1692,12 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri
 /* *************************************************************** */
 __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_ControlPointNumber){
 
 		int3 gridSize = c_ControlPointImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(gridSize.x*gridSize.y);
 		tempIndex  -= z*gridSize.x*gridSize.y;
 		const int y = tempIndex/gridSize.x;
@@ -1793,12 +1793,12 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d)
 /* *************************************************************** */
 __global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(imageSize.x*imageSize.y);
 		tempIndex  -= z*imageSize.x*imageSize.y;
 		const int y = tempIndex/imageSize.x;
@@ -1816,12 +1816,12 @@ __global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d
 /* *************************************************************** */
 __global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(imageSize.x*imageSize.y);
 		tempIndex  -= z*imageSize.x*imageSize.y;
 		const int y = tempIndex/imageSize.x;
@@ -1839,7 +1839,7 @@ __global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d
 /* *************************************************************** */
 __global__ void reg_defField_compose2D_kernel(float4 *outDef)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		// Extract the original voxel position
@@ -1870,7 +1870,7 @@ __global__ void reg_defField_compose2D_kernel(float4 *outDef)
 
 		for(int b=0;b<2;++b){
 			for(int a=0;a<2;++a){
-				unsigned int index=(ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
+				unsigned index=(ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
 				float4 deformation;
 				if((ante.x+a)>-1 && (ante.y+b)>-1 &&
 				   (ante.x+a)<c_ReferenceImageDim.x &&
@@ -1891,7 +1891,7 @@ __global__ void reg_defField_compose2D_kernel(float4 *outDef)
 /* *************************************************************** */
 __global__ void reg_defField_compose3D_kernel(float4 *outDef)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		// Extract the original voxel position
@@ -1931,7 +1931,7 @@ __global__ void reg_defField_compose3D_kernel(float4 *outDef)
 		for(int c=0;c<2;++c){
 			for(int b=0;b<2;++b){
 				for(int a=0;a<2;++a){
-					unsigned int index=((ante.z+c)*c_ReferenceImageDim.y+ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
+					unsigned index=((ante.z+c)*c_ReferenceImageDim.y+ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
 					float4 deformation;
 					if((ante.x+a)>-1 && (ante.y+b)>-1 && (ante.z+c)>-1 &&
 					   (ante.x+a)<c_ReferenceImageDim.x &&
@@ -1955,12 +1955,12 @@ __global__ void reg_defField_compose3D_kernel(float4 *outDef)
 /* *************************************************************** */
 __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices)
 {
-	const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
+	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
 	if(tid<c_VoxelNumber){
 
 		int3 imageSize = c_ReferenceImageDim;
 
-		unsigned int tempIndex=tid;
+		unsigned tempIndex=tid;
 		const int z = tempIndex/(imageSize.x*imageSize.y);
 		tempIndex  -= z*imageSize.x*imageSize.y;
 		const int y = tempIndex/imageSize.x;
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 07a708f9..bf59fe7f 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -153,8 +153,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
                                       double *entropies,
                                       int refBinning,
                                       int floBinning) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*referenceImage);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -187,16 +186,16 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4)));
 
     if (referenceImage->nz > 1) {
-        const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D));
-        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1);
+        const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
+            (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW3D));
+        dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1);
         dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1);
         reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
-        const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D =
-            (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D));
-        dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1);
+        const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW2D =
+            (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW2D));
+        dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1);
         dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1);
         reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
         NR_CUDA_CHECK_KERNEL(G1, B1);
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 77b78ebd..47cdbb40 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -14,7 +14,6 @@
 
 #include "_reg_nmi.h"
 #include "_reg_measure_gpu.h"
-#include "_reg_blocksize_gpu.h"
 
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu
index 939b5253..d7108bb2 100755
--- a/reg-lib/cuda/_reg_nmi_kernels.cu
+++ b/reg-lib/cuda/_reg_nmi_kernels.cu
@@ -408,14 +408,14 @@ __global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram)
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_secondTargetBin*c_firstResultBin*c_secondResultBin){
         // The starting index is computed
-        unsigned int startingPoint=tid*c_firstTargetBin;
-        unsigned int finishPoint=startingPoint+c_firstTargetBin;
+        unsigned startingPoint=tid*c_firstTargetBin;
+        unsigned finishPoint=startingPoint+c_firstTargetBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
                                        tex1Dfetch(histogramTexture, startingPoint+1) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned int i=startingPoint+1; i<finishPoint-1; ++i){
+        for(unsigned i=startingPoint+1; i<finishPoint-1; ++i){
             tempHistogram[i] = tex1Dfetch(histogramTexture, i-1) * COEFF_L +
                                tex1Dfetch(histogramTexture, i) * COEFF_C +
                                tex1Dfetch(histogramTexture, i+1) * COEFF_L;
@@ -432,16 +432,16 @@ __global__ void reg_smoothJointHistogramY_kernel(float *tempHistogram)
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstTargetBin*c_firstResultBin*c_secondResultBin){
         // The starting index is computed
-        unsigned int startingPoint=tid + c_firstTargetBin*(c_secondTargetBin-1)*(c_firstResultBin*(int)(tid/(c_firstTargetBin*c_firstResultBin)) +
+        unsigned startingPoint=tid + c_firstTargetBin*(c_secondTargetBin-1)*(c_firstResultBin*(int)(tid/(c_firstTargetBin*c_firstResultBin)) +
                                    (int)(tid/c_firstTargetBin - c_firstResultBin * (int)(tid/(c_firstTargetBin*c_firstResultBin))));
-        unsigned int increment = c_firstTargetBin;
-        unsigned int finishPoint=startingPoint+increment*c_secondTargetBin;
+        unsigned increment = c_firstTargetBin;
+        unsigned finishPoint=startingPoint+increment*c_secondTargetBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
                                        tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned int i=startingPoint+increment; i<finishPoint-increment; i+=increment){
+        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
             tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
                                tex1Dfetch(histogramTexture, i) * COEFF_C +
                                tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
@@ -458,15 +458,15 @@ __global__ void reg_smoothJointHistogramZ_kernel(float *tempHistogram)
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstTargetBin*c_secondTargetBin*c_secondResultBin){
         // The starting index is computed
-        unsigned int startingPoint=tid+c_firstTargetBin*c_secondTargetBin*(c_firstResultBin-1)*(int)(tid/(c_firstTargetBin*c_secondTargetBin));
-        unsigned int increment = c_firstTargetBin*c_secondTargetBin;
-        unsigned int finishPoint=startingPoint+increment*c_firstResultBin;
+        unsigned startingPoint=tid+c_firstTargetBin*c_secondTargetBin*(c_firstResultBin-1)*(int)(tid/(c_firstTargetBin*c_secondTargetBin));
+        unsigned increment = c_firstTargetBin*c_secondTargetBin;
+        unsigned finishPoint=startingPoint+increment*c_firstResultBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
                                        tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned int i=startingPoint+increment; i<finishPoint-increment; i+=increment){
+        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
             tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
                                tex1Dfetch(histogramTexture, i) * COEFF_C +
                                tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
@@ -483,15 +483,15 @@ __global__ void reg_smoothJointHistogramW_kernel(float *tempHistogram)
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstTargetBin*c_secondTargetBin*c_firstResultBin){
         // The starting index is computed
-        unsigned int startingPoint=tid;
-        unsigned int increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
-        unsigned int finishPoint=increment*c_secondResultBin;
+        unsigned startingPoint=tid;
+        unsigned increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
+        unsigned finishPoint=increment*c_secondResultBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
                                        tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned int i=startingPoint+increment; i<finishPoint-increment; i+=increment){
+        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
             tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
                                tex1Dfetch(histogramTexture, i) * COEFF_C +
                                tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
@@ -508,12 +508,12 @@ __global__ void reg_marginaliseTargetX_kernel(float *babyHisto)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_secondTargetBin*c_firstResultBin*c_secondResultBin){
-        unsigned int startingPoint=tid*c_firstTargetBin;
-        unsigned int finishPoint=startingPoint+c_firstTargetBin;
+        unsigned startingPoint=tid*c_firstTargetBin;
+        unsigned finishPoint=startingPoint+c_firstTargetBin;
 
         float sum=tex1Dfetch(histogramTexture, startingPoint);
         float c=0.f,Y,t;
-        for(unsigned int i=startingPoint+1; i<finishPoint; ++i){
+        for(unsigned i=startingPoint+1; i<finishPoint; ++i){
             Y = tex1Dfetch(histogramTexture, i) - c;
             t = sum + Y;
             c = (t-sum)-Y;
@@ -527,12 +527,12 @@ __global__ void reg_marginaliseTargetXY_kernel(float *babyHisto)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstResultBin*c_secondResultBin){
-        unsigned int startingPoint=tid*c_secondTargetBin;
-        unsigned int finishPoint=startingPoint+c_secondTargetBin;
+        unsigned startingPoint=tid*c_secondTargetBin;
+        unsigned finishPoint=startingPoint+c_secondTargetBin;
 
         float sum=tex1Dfetch(histogramTexture, startingPoint);
         float c=0.f,Y,t;
-        for(unsigned int i=startingPoint+1; i<finishPoint; ++i){
+        for(unsigned i=startingPoint+1; i<finishPoint; ++i){
             Y = tex1Dfetch(histogramTexture, i) - c;
             t = sum + Y;
             c = (t-sum)-Y;
@@ -546,13 +546,13 @@ __global__ void reg_marginaliseResultX_kernel(float *babyHisto)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstTargetBin*c_secondTargetBin*c_firstResultBin){
-        unsigned int startingPoint = tid;
+        unsigned startingPoint = tid;
         float sum=tex1Dfetch(histogramTexture, startingPoint);
         // increment by a the cube
-        unsigned int increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
+        unsigned increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
         float c=0.f,Y,t;
 
-        for (unsigned int i = 1; i < c_secondResultBin; ++i)
+        for (unsigned i = 1; i < c_secondResultBin; ++i)
         {
             Y = tex1Dfetch(histogramTexture, startingPoint + i *increment) - c;
             t = sum + Y;
@@ -567,12 +567,12 @@ __global__ void reg_marginaliseResultXY_kernel(float *babyHisto)
 {
     const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
     if(tid<c_firstTargetBin*c_secondTargetBin){
-        unsigned int startingPoint=tid;
+        unsigned startingPoint=tid;
         float sum=tex1Dfetch(histogramTexture, startingPoint);
         // increment by the plane.
-        unsigned int increment = c_firstTargetBin*c_secondTargetBin;
+        unsigned increment = c_firstTargetBin*c_secondTargetBin;
         float c=0.f,Y,t;
-        for (unsigned int i = 1; i < c_firstResultBin; ++i)
+        for (unsigned i = 1; i < c_firstResultBin; ++i)
         {
             Y = tex1Dfetch(histogramTexture, startingPoint + i *increment) - c;
             t = sum + Y;
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index fe8da863..16a6efc6 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -174,12 +174,12 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
     auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
                                                                cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
-    const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_initialiseConjugateGradient;
-    const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
+    const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
 
-    reg_initialiseConjugateGradient_kernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, nVoxels);
+    reg_initialiseConjugateGradient_kernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
@@ -196,14 +196,14 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
                                                             cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient1;
-    unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1;
+    unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     dim3 blockDims(blocks, 1, 1);
     dim3 gridDims(grids, grids, 1);
 
     float2 *sumsCuda;
     NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2)));
-    reg_GetConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, nVoxels);
+    reg_GetConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     float2 *sums;
     NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2)));
@@ -218,11 +218,11 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
     const float gam = (float)(dgg / gg);
     NR_CUDA_SAFE_CALL(cudaFreeHost(sums));
 
-    blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient2;
-    grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient2;
+    grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     gridDims = dim3(blocks, 1, 1);
     blockDims = dim3(grids, grids, 1);
-    reg_GetConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, nVoxels, gam);
+    reg_GetConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
@@ -239,11 +239,11 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
     auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
                                                                cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
 
-    const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_updateControlPointPosition;
-    const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition;
+    const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
-    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, nVoxels, scale, optimiseX, optimiseY, optimiseZ);
+    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 7ea3d201..33032095 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -1,8 +1,8 @@
 /* *************************************************************** */
 __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda,
                                                        cudaTextureObject_t gradientImageTexture,
-                                                       const size_t nVoxels) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                                       const unsigned nVoxels) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
         conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0);
@@ -13,8 +13,8 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums,
                                                  cudaTextureObject_t gradientImageTexture,
                                                  cudaTextureObject_t conjugateGTexture,
                                                  cudaTextureObject_t conjugateHTexture,
-                                                 const size_t nVoxels) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                                 const unsigned nVoxels) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         const float4 valueH = tex1Dfetch<float4>(conjugateHTexture, tid);
         const float4 valueG = tex1Dfetch<float4>(conjugateGTexture, tid);
@@ -30,9 +30,9 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums,
 __global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda,
                                                  float4 *conjugateGCuda,
                                                  float4 *conjugateHCuda,
-                                                 const size_t nVoxels,
+                                                 const unsigned nVoxels,
                                                  const float scale) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         // G = - grad
         float4 gradGValue = gradientImageCuda[tid];
@@ -54,12 +54,12 @@ __global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda,
 __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda,
                                                       cudaTextureObject_t bestControlPointTexture,
                                                       cudaTextureObject_t gradientImageTexture,
-                                                      const size_t nVoxels,
+                                                      const unsigned nVoxels,
                                                       const float scale,
                                                       const bool optimiseX,
                                                       const bool optimiseY,
                                                       const bool optimiseZ) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 value = controlPointImageCuda[tid];
         const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 0559768b..7a48d774 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -21,8 +21,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
                            int *mask_d,
                            size_t activeVoxelNumber,
                            float paddingValue) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
@@ -45,16 +44,16 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D));
-        dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1);
+        const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage3D));
+        dim3 B1(blockSize->reg_resampleImage3D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1);
-        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
+        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
-        const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D));
-        dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1);
+        const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage2D));
+        dim3 B1(blockSize->reg_resampleImage2D, 1, 1);
         dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1);
-        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
+        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     }
 }
@@ -65,8 +64,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
                               float4 *warpedGradientArray_d,
                               size_t activeVoxelNumber,
                               float paddingValue) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
@@ -85,16 +83,16 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     else floatingMatrix = floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D));
-        dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1);
+        const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient3D));
+        dim3 B1(blockSize->reg_getImageGradient3D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1);
-        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
+        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     } else {
-        const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D));
-        dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1);
+        const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient2D));
+        dim3 B1(blockSize->reg_getImageGradient2D, 1, 1);
         dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1);
-        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue);
+        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(G1, B1);
     }
 }
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index f37b4528..05351f38 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -15,11 +15,11 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
                                            cudaTextureObject_t floatingTexture,
                                            cudaTextureObject_t deformationFieldTexture,
                                            cudaTextureObject_t maskTexture,
-                                           mat44 floatingMatrix,
-                                           int3 floatingDim,
-                                           size_t activeVoxelNumber,
-                                           float paddingValue) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                           const mat44 floatingMatrix,
+                                           const int3 floatingDim,
+                                           const unsigned activeVoxelNumber,
+                                           const float paddingValue) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
@@ -45,11 +45,11 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
                                            cudaTextureObject_t floatingTexture,
                                            cudaTextureObject_t deformationFieldTexture,
                                            cudaTextureObject_t maskTexture,
-                                           mat44 floatingMatrix,
-                                           int3 floatingDim,
-                                           size_t activeVoxelNumber,
-                                           float paddingValue) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                           const mat44 floatingMatrix,
+                                           const int3 floatingDim,
+                                           const unsigned activeVoxelNumber,
+                                           const float paddingValue) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
 
@@ -82,11 +82,11 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
 __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
                                               cudaTextureObject_t floatingTexture,
                                               cudaTextureObject_t deformationFieldTexture,
-                                              mat44 floatingMatrix,
-                                              int3 floatingDim,
-                                              size_t activeVoxelNumber,
-                                              float paddingValue) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                              const mat44 floatingMatrix,
+                                              const int3 floatingDim,
+                                              const unsigned activeVoxelNumber,
+                                              const float paddingValue) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
@@ -142,11 +142,11 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
 __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
                                               cudaTextureObject_t floatingTexture,
                                               cudaTextureObject_t deformationFieldTexture,
-                                              mat44 floatingMatrix,
-                                              int3 floatingDim,
-                                              size_t activeVoxelNumber,
-                                              float paddingValue) {
-    const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+                                              const mat44 floatingMatrix,
+                                              const int3 floatingDim,
+                                              const unsigned activeVoxelNumber,
+                                              const float paddingValue) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         //Get the real world deformation in the floating space
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 2ce6057e..dbf09b17 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -80,8 +80,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
                           float **warped_d,
                           int **mask_d,
                           int activeVoxelNumber) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     // Copy the constant memory variables
     const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -102,9 +101,9 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage,
     float *absoluteValues_d;
     NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float)));
     // Compute the absolute values
-    const unsigned int Grid_reg_getSquaredDifference =
-        (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSquaredDifference));
-    dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference, 1, 1);
+    const unsigned Grid_reg_getSquaredDifference =
+        (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSquaredDifference));
+    dim3 B1(blockSize->reg_getSquaredDifference, 1, 1);
     dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1);
     if (referenceDim.z > 1)
         reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d);
@@ -141,8 +140,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
                                       float maxSD,
                                       int *mask_d,
                                       int activeVoxelNumber) {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     // Copy the constant memory variables
     const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -163,9 +161,9 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4)));
     // Set the gradient image to zero
     NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4)))
-        const unsigned int Grid_reg_getSSDGradient =
-        (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSSDGradient));
-    dim3 B1(NR_BLOCK->Block_reg_getSSDGradient, 1, 1);
+        const unsigned Grid_reg_getSSDGradient =
+        (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSSDGradient));
+    dim3 B1(blockSize->reg_getSSDGradient, 1, 1);
     dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1);
     if (referenceDim.z > 1)
         reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d);
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 24b8fd10..d145915b 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -31,7 +31,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference)
     if(tid<c_ActiveVoxelNumber){
 
         int3 imageSize = c_ReferenceImageDim;
-        unsigned int index=tex1Dfetch(maskTexture,tid);
+        unsigned index=tex1Dfetch(maskTexture,tid);
         const int z = index/(imageSize.x*imageSize.y);
         const int tempIndex = index - z*imageSize.x*imageSize.y;
         const int y = tempIndex/imageSize.x;
@@ -54,7 +54,7 @@ __global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference)
     if(tid<c_ActiveVoxelNumber){
 
         int3 imageSize = c_ReferenceImageDim;
-        unsigned int index=tex1Dfetch(maskTexture,tid);
+        unsigned index=tex1Dfetch(maskTexture,tid);
         const int y = index/imageSize.x;
         const int x = index - y*imageSize.x;
 
@@ -75,7 +75,7 @@ __global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient)
     if(tid<c_ActiveVoxelNumber){
 
         int3 imageSize = c_ReferenceImageDim;
-        unsigned int index = tex1Dfetch(maskTexture,tid);
+        unsigned index = tex1Dfetch(maskTexture,tid);
         const int y = index/imageSize.x;
         const int x = index - y*imageSize.x;
 
@@ -112,7 +112,7 @@ __global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient)
     if(tid<c_ActiveVoxelNumber){
 
         int3 imageSize = c_ReferenceImageDim;
-        unsigned int index = tex1Dfetch(maskTexture,tid);
+        unsigned index = tex1Dfetch(maskTexture,tid);
         const int z = index/(imageSize.x*imageSize.y);
         const int tempIndex = index - z*imageSize.x*imageSize.y;
         const int y = tempIndex/imageSize.x;
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 21ccde5a..d2e1b7ad 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -23,8 +23,7 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       float4 *nodeNMIGradientArray_d,
                                       float weight)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int nodeNumber = CalcVoxelNumber(*controlPointImage);
     const int voxelNumber = CalcVoxelNumber(*targetImage);
@@ -45,8 +44,8 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
 
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4)));
 
-    const unsigned int Grid_reg_voxelCentric2NodeCentric = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_voxelCentric2NodeCentric));
-    dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1);
+    const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_voxelCentric2NodeCentric));
+    dim3 B1(blockSize->reg_voxelCentric2NodeCentric,1,1);
 	dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1);
     reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
 	NR_CUDA_CHECK_KERNEL(G1,B1);
@@ -59,8 +58,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
                                                     nifti_image *controlPointImage,
                                                     float4 *nodeNMIGradientArray_d)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int nodeNumber = CalcVoxelNumber(*controlPointImage);
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)));
@@ -75,10 +73,10 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
     NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h));
     NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4)));
 
-    const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace =
-        (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace));
+    const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace =
+        (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace));
     dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1);
-    dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1);
+    dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace,1,1);
 
     _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
     NR_CUDA_CHECK_KERNEL(G1,B1);
@@ -92,8 +90,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                                 float sigma,
                                 bool smoothXYZ[8])
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -140,29 +137,29 @@ void reg_gaussianSmoothing_gpu( nifti_image *image,
                 NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)));
                 NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
 
-				unsigned int Grid_reg_ApplyConvolutionWindow;
+				unsigned Grid_reg_ApplyConvolutionWindow;
                 dim3 B,G;
                 switch(n){
                     case 1:
                         Grid_reg_ApplyConvolutionWindow =
-                            (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX));
-                        B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1);
+                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX));
+                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize);
                         NR_CUDA_CHECK_KERNEL(G,B);
                         break;
                     case 2:
                         Grid_reg_ApplyConvolutionWindow =
-                            (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY));
-                        B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1);
+                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY));
+                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize);
                         NR_CUDA_CHECK_KERNEL(G,B);
                         break;
                     case 3:
                         Grid_reg_ApplyConvolutionWindow =
-                            (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ));
-                        B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1);
+                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ));
+                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1);
                         G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                         _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize);
                         NR_CUDA_CHECK_KERNEL(G,B);
@@ -182,8 +179,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
                                         float4 *imageArray_d,
 										float *spacingVoxel)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -220,29 +216,29 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 
             NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
 
-            unsigned int Grid_reg_ApplyConvolutionWindow;
+            unsigned Grid_reg_ApplyConvolutionWindow;
             dim3 B,G;
             switch(n){
                 case 0:
                     Grid_reg_ApplyConvolutionWindow =
-                        (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX));
-                    B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1);
+                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX));
+                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
                     NR_CUDA_CHECK_KERNEL(G,B);
                     break;
                 case 1:
                     Grid_reg_ApplyConvolutionWindow =
-                        (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY));
-                    B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1);
+                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY));
+                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
                     NR_CUDA_CHECK_KERNEL(G,B);
                     break;
                 case 2:
                     Grid_reg_ApplyConvolutionWindow =
-                        (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ));
-                    B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1);
+                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ));
+                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1);
                     G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
                     _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
                     NR_CUDA_CHECK_KERNEL(G,B);
@@ -259,72 +255,67 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
 /* *************************************************************** */
 void reg_multiplyValue_gpu(int num, float4 *array_d, float value)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
 
-    const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
+    const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1);
-    dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
+    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
     reg_multiplyValue_kernel_float4<<<G,B>>>(array_d);
     NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_addValue_gpu(int num, float4 *array_d, float value)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
 
-    const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
+    const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
     dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1);
-    dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
+    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
     reg_addValue_kernel_float4<<<G,B>>>(array_d);
     NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
-    const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
+    const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
     dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1);
-    dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
+    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
     reg_multiplyArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
     NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
-    const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
+    const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
     dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1);
-    dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
+    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
     reg_addArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
     NR_CUDA_CHECK_KERNEL(G,B);
 }
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int num, int *array1_d)
 {
-    // Get the BlockSize - The values have been set in CudaContextSingleton
-    NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0);
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
 
-    const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic));
+    const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
     dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1);
-    dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1);
+    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
     reg_fillMaskArray_kernel<<<G,B>>>(array1_d);
     NR_CUDA_CHECK_KERNEL(G,B);
 }
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 12374e63..0e5dca7c 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -14,7 +14,6 @@
 
 #include "_reg_common_cuda.h"
 #include "_reg_tools.h"
-#include "_reg_blocksize_gpu.h"
 #include <thrust/device_ptr.h>
 #include <thrust/reduce.h>
 
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index 8f86fa90..0124a95c 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -14,7 +14,7 @@
 #include "affineDeformationKernel.h"
 //CUDA affine kernel
 /* *************************************************************** */
-__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned int idx)
+__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned idx)
 {
    position[idx] = (float) ((double) matrix[idx * 4 + 0] * voxel[0] +
          (double) matrix[idx * 4 + 1] * voxel[1] +
@@ -22,7 +22,7 @@ __device__ __inline__ void getPosition(float* position, float* matrix, double* v
          (double) matrix[idx * 4 + 3]);
 }
 /* *************************************************************** */
-__device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned int idx)
+__device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx)
 {
    unsigned long index = idx * 4;
    return (double)matrix[index++] * voxel[0] +
@@ -39,9 +39,9 @@ __global__ void affineKernel(float* transformationMatrix,
                              const bool composition)
 {
    // Get the current coordinate
-   const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
-   const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
-   const unsigned int z = blockIdx.z * blockDim.z + threadIdx.z;
+   const unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
+   const unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
+   const unsigned z = blockIdx.z * blockDim.z + threadIdx.z;
    const unsigned long index = x + dims.x * (y + z * dims.y);
 
    if (z<dims.z && y<dims.y && x<dims.x &&  mask[index] >= 0)
@@ -69,13 +69,13 @@ void launchAffine(mat44 *affineTransformation,
                   float **trans_d,
                   bool compose) {
 
-   const unsigned int xThreads = 8;
-   const unsigned int yThreads = 8;
-   const unsigned int zThreads = 8;
+   const unsigned xThreads = 8;
+   const unsigned yThreads = 8;
+   const unsigned zThreads = 8;
 
-   const unsigned int xBlocks = ((deformationField->nx % xThreads) == 0) ? (deformationField->nx / xThreads) : (deformationField->nx / xThreads) + 1;
-   const unsigned int yBlocks = ((deformationField->ny % yThreads) == 0) ? (deformationField->ny / yThreads) : (deformationField->ny / yThreads) + 1;
-   const unsigned int zBlocks = ((deformationField->nz % zThreads) == 0) ? (deformationField->nz / zThreads) : (deformationField->nz / zThreads) + 1;
+   const unsigned xBlocks = ((deformationField->nx % xThreads) == 0) ? (deformationField->nx / xThreads) : (deformationField->nx / xThreads) + 1;
+   const unsigned yBlocks = ((deformationField->ny % yThreads) == 0) ? (deformationField->ny / yThreads) : (deformationField->ny / yThreads) + 1;
+   const unsigned zBlocks = ((deformationField->nz % zThreads) == 0) ? (deformationField->nz / zThreads) : (deformationField->nz / zThreads) + 1;
 
    dim3 G1_b(xBlocks, yBlocks, zBlocks);
    dim3 B1_b(xThreads, yThreads, zThreads);
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 9aa08e44..1947f066 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -87,7 +87,7 @@ float blockReduce2DSum(float val, int tid)
    shared[tid] = val;
    __syncthreads();
 
-	for (unsigned int i = 8; i > 0; i >>= 1){
+	for (unsigned i = 8; i > 0; i >>= 1){
         if (tid < i) {
             shared[tid] += shared[tid + i];
         }
@@ -103,7 +103,7 @@ float blockReduceSum(float val, int tid)
    shared[tid] = val;
    __syncthreads();
 
-	for (unsigned int i = 32; i > 0; i >>= 1){
+	for (unsigned i = 32; i > 0; i >>= 1){
         if (tid < i) {
             shared[tid] += shared[tid + i];
         }
@@ -116,21 +116,21 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
                                       float *referencePosition,
                                       int *mask,
                                       float* referenceMatrix_xyz,
-                                      unsigned int *definedBlock)
+                                      unsigned *definedBlock)
 {
 	extern __shared__ float sWarpedValues[];
 	// Compute the current block index
-    const unsigned int bid = blockIdx.y * gridDim.x + blockIdx.x;
+    const unsigned bid = blockIdx.y * gridDim.x + blockIdx.x;
 
 	const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid);
 	if (currentBlockIndex > -1) {
 
-		const unsigned int idy = threadIdx.x;
-		const unsigned int idx = threadIdx.y;
-		const unsigned int tid = idy * 4 + idx;
+		const unsigned idy = threadIdx.x;
+		const unsigned idx = threadIdx.y;
+		const unsigned tid = idy * 4 + idx;
 
-		const unsigned int xImage = blockIdx.x * 4 + idx;
-		const unsigned int yImage = blockIdx.y * 4 + idy;
+		const unsigned xImage = blockIdx.x * 4 + idx;
+		const unsigned yImage = blockIdx.y * 4 + idy;
 
 		//populate shared memory with resultImageArray's values
 		for (int y=-1; y<2; ++y) {
@@ -160,7 +160,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 					tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN");
 		const bool finiteReference = isfinite(rReferenceValue);
 		rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-		const unsigned int referenceSize = __syncthreads_count(finiteReference);
+		const unsigned referenceSize = __syncthreads_count(finiteReference);
 
         float bestDisplacement[2] = {nanf("sNaN"), 0.0f};
         float bestCC = 0;
@@ -171,13 +171,13 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 			const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
 			const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid);
 			// iteration over the result blocks (block matching part)
-			for (unsigned int y=1; y<8; ++y) {
-				for (unsigned int x=1; x<8; ++x) {
+			for (unsigned y=1; y<8; ++y) {
+				for (unsigned x=1; x<8; ++x) {
 
-					const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx;
+					const unsigned sharedIndex = ( y + idy ) * 12 + x + idx;
 					const float rWarpedValue = sWarpedValues[sharedIndex];
 					const bool overlap = isfinite(rWarpedValue) && finiteReference;
-					const unsigned int warpedSize = __syncthreads_count(overlap);
+					const unsigned warpedSize = __syncthreads_count(overlap);
 
                     if (warpedSize > 8) {
                         //the reference values must remain intact at each loop, so please do not touch this!
@@ -209,7 +209,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 		}
 
         if (tid==0){
-			const unsigned int posIdx = 2 * currentBlockIndex;
+			const unsigned posIdx = 2 * currentBlockIndex;
 			const float referencePosition_temp[2] = {(float)xImage, (float)yImage};
 
 			bestDisplacement[0] += referencePosition_temp[0];
@@ -229,13 +229,13 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 __inline__ __device__
 float2 REDUCE_TEST(float* sData,
                    float data,
-                   unsigned int tid)
+                   unsigned tid)
 {
 	sData[tid] = data;
 	__syncthreads();
 
 	bool seconHalf = tid > 63 ? true : false;
-	for (unsigned int i = 32; i > 0; i >>= 1){
+	for (unsigned i = 32; i > 0; i >>= 1){
 		if (tid < i) sData[tid] += sData[tid + i];
 		if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i];
 		__syncthreads();
@@ -250,26 +250,26 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                                       float *referencePosition,
                                       int *mask,
                                       float* referenceMatrix_xyz,
-                                      unsigned int *definedBlock)
+                                      unsigned *definedBlock)
 {
    extern __shared__ float sWarpedValues[];
    float *sData = &sWarpedValues[12*12*16];
 
    // Compute the current block index
-   const unsigned int bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) *
+   const unsigned bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) *
          gridDim.x + blockIdx.x;
-   const unsigned int bid1 = bid0 + gridDim.x * gridDim.y;
+   const unsigned bid1 = bid0 + gridDim.x * gridDim.y;
    int currentBlockIndex[2] = {tex1Dfetch(totalBlock_texture, bid0),
                                tex1Dfetch(totalBlock_texture, bid1)};
    currentBlockIndex[1] = (2*blockIdx.z+1)<c_BlockDim.z ? currentBlockIndex[1] : -1;
    if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) {
-      const unsigned int idx = threadIdx.x;
-      const unsigned int idy = threadIdx.y;
-      const unsigned int idz = threadIdx.z;
-      const unsigned int tid = (idz*4+idy)*4+idx;
-      const unsigned int xImage = blockIdx.x * 4 + idx;
-      const unsigned int yImage = blockIdx.y * 4 + idy;
-      const unsigned int zImage = blockIdx.z * 8 + idz;
+      const unsigned idx = threadIdx.x;
+      const unsigned idy = threadIdx.y;
+      const unsigned idz = threadIdx.z;
+      const unsigned tid = (idz*4+idy)*4+idx;
+      const unsigned xImage = blockIdx.x * 4 + idx;
+      const unsigned yImage = blockIdx.y * 4 + idy;
+      const unsigned zImage = blockIdx.z * 8 + idz;
 
       //populate shared memory with resultImageArray's values
       for (int z=-1 ; z<2; z+=2) {
@@ -281,7 +281,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 
                const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx;
 
-               const unsigned int indexXYZIn = xImageIn + c_ImageSize.x *
+               const unsigned indexXYZIn = xImageIn + c_ImageSize.x *
                      (yImageIn + zImageIn * c_ImageSize.y);
 
                const bool valid =
@@ -294,7 +294,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
          }
       }
 
-      const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) *
+      const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) *
             c_ImageSize.x + xImage;
       const bool referenceInBounds =
             xImage < c_ImageSize.x &&
@@ -321,11 +321,11 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
          float2 referenceVar = REDUCE_TEST(sData, referenceTemp*referenceTemp, tid);
 
          // iteration over the result blocks (block matching part)
-         for (unsigned int z=1; z<8; ++z) {
-            for (unsigned int y=1; y<8; ++y) {
-               for (unsigned int x=1; x<8; ++x) {
+         for (unsigned z=1; z<8; ++z) {
+            for (unsigned y=1; y<8; ++y) {
+               for (unsigned x=1; x<8; ++x) {
 
-                  const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
+                  const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
                   const float rWarpedValue = sWarpedValues[sharedIndex];
                   const bool overlap = isfinite(rWarpedValue) && finiteReference;
                   tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid);
@@ -384,7 +384,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
       }
 
       if(tid==0 && currentBlockIndex[0]>-1){
-         const unsigned int posIdx = 3 * currentBlockIndex[0];
+         const unsigned posIdx = 3 * currentBlockIndex[0];
          warpedPosition[posIdx] = NAN;
          if (isfinite(bestDisp[0][0])){
             const float referencePosition_temp[3] = { (float)xImage,
@@ -403,7 +403,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
          }
       }
       if(tid==64 && currentBlockIndex[1]>-1){
-         const unsigned int posIdx = 3 * currentBlockIndex[1];
+         const unsigned posIdx = 3 * currentBlockIndex[1];
          warpedPosition[posIdx] = NAN;
          if (isfinite(bestDisp[1][0])){
             const float referencePosition_temp[3] = {(float)xImage,
@@ -430,21 +430,21 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                                       float *referencePosition,
                                       int *mask,
                                       float* referenceMatrix_xyz,
-                                      unsigned int *definedBlock)
+                                      unsigned *definedBlock)
 {
 	extern __shared__ float sWarpedValues[];
 	// Compute the current block index
-	const unsigned int bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ;
+	const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ;
 
 	const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid);
 	if (currentBlockIndex > -1) {
-		const unsigned int idx = threadIdx.x;
-		const unsigned int idy = threadIdx.y;
-		const unsigned int idz = threadIdx.z;
-		const unsigned int tid = (idz*4+idy)*4+idx;
-		const unsigned int xImage = blockIdx.x * 4 + idx;
-		const unsigned int yImage = blockIdx.y * 4 + idy;
-		const unsigned int zImage = blockIdx.z * 4 + idz;
+		const unsigned idx = threadIdx.x;
+		const unsigned idy = threadIdx.y;
+		const unsigned idz = threadIdx.z;
+		const unsigned tid = (idz*4+idy)*4+idx;
+		const unsigned xImage = blockIdx.x * 4 + idx;
+		const unsigned yImage = blockIdx.y * 4 + idy;
+		const unsigned zImage = blockIdx.z * 4 + idz;
 
 		//populate shared memory with resultImageArray's values
 		for (int z=-1 ; z<2; ++z) {
@@ -456,7 +456,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 
 					const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx;
 
-					const unsigned int indexXYZIn = xImageIn + c_ImageSize.x *
+					const unsigned indexXYZIn = xImageIn + c_ImageSize.x *
 							(yImageIn + zImageIn * c_ImageSize.y);
 
 					const bool valid =
@@ -471,7 +471,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 
 		//for most cases we need this out of th loop
 		//value if the block is 4x4x4 NaN otherwise
-		const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) *
+		const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) *
 				c_ImageSize.x + xImage;
 		const bool referenceInBounds =
 				xImage < c_ImageSize.x &&
@@ -481,7 +481,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 					tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN");
 		const bool finiteReference = isfinite(rReferenceValue);
 		rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-		const unsigned int referenceSize = __syncthreads_count(finiteReference);
+		const unsigned referenceSize = __syncthreads_count(finiteReference);
 
         float bestDisplacement[3] = {nanf("sNaN"), 0.0f, 0.0f };
         float bestCC = 0.0f;
@@ -493,14 +493,14 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 			const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid);
 
 			// iteration over the result blocks (block matching part)
-			for (unsigned int z=1; z<8; ++z) {
-				for (unsigned int y=1; y<8; ++y) {
-					for (unsigned int x=1; x<8; ++x) {
+			for (unsigned z=1; z<8; ++z) {
+				for (unsigned y=1; y<8; ++y) {
+					for (unsigned x=1; x<8; ++x) {
 
-						const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
+						const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
 						const float rWarpedValue = sWarpedValues[sharedIndex];
 						const bool overlap = isfinite(rWarpedValue) && finiteReference;
-						const unsigned int warpedSize = __syncthreads_count(overlap);
+						const unsigned warpedSize = __syncthreads_count(overlap);
 
 						if (warpedSize > 32) {
 
@@ -535,7 +535,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 		}
 
 		if (tid==0) {
-			const unsigned int posIdx = 3 * currentBlockIndex;
+			const unsigned posIdx = 3 * currentBlockIndex;
 			const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage };
 
 			bestDisplacement[0] += referencePosition_temp[0];
@@ -573,16 +573,16 @@ void block_matching_method_gpu(nifti_image *targetImage,
 	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_BlockDim,&blockSize,sizeof(uint3)));
 
 	// Texture binding
-	const unsigned int numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
+	const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, referenceImageArray_texture, *targetImageArray_d, targetImage->nvox * sizeof(float)));
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedImageArray_texture, *resultImageArray_d, targetImage->nvox * sizeof(float)));
 	NR_CUDA_SAFE_CALL(cudaBindTexture(0, totalBlock_texture, *totalBlock_d, numBlocks * sizeof(int)));
 
-	unsigned int *definedBlock_d;
-	unsigned int *definedBlock_h = (unsigned int*) malloc(sizeof(unsigned int));
+	unsigned *definedBlock_d;
+	unsigned *definedBlock_h = (unsigned*) malloc(sizeof(unsigned));
 	*definedBlock_h = 0;
-	NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned int), cudaMemcpyHostToDevice));
+	NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned)));
+	NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned), cudaMemcpyHostToDevice));
 
 
 	if (params->stepSize!=1 || params->voxelCaptureRange!=3){
@@ -595,15 +595,15 @@ void block_matching_method_gpu(nifti_image *targetImage,
 	dim3 BlocksGrid3D(
 				params->blockNumber[0],
 			params->blockNumber[1],
-			(unsigned int)reg_ceil((float)params->blockNumber[2]/2.f));
-	unsigned int sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float);
+			(unsigned)reg_ceil((float)params->blockNumber[2]/2.f));
+	unsigned sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float);
 #else
     dim3 BlockDims1D(4,4,4);
     dim3 BlocksGrid3D(
                 params->blockNumber[0],
             params->blockNumber[1],
             params->blockNumber[2]);
-    unsigned int sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3
+    unsigned sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3
 #endif
 
 	if (targetImage->nz == 1){
@@ -629,7 +629,7 @@ void block_matching_method_gpu(nifti_image *targetImage,
     NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
 #endif
 
-	NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned int), cudaMemcpyDeviceToHost));
+	NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned), cudaMemcpyDeviceToHost));
 	params->definedActiveBlockNumber = *definedBlock_h;
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceImageArray_texture));
 	NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedImageArray_texture));
diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cpp
index 58fd613d..9ca46a7d 100755
--- a/reg-lib/cuda/checkCudaCard.cpp
+++ b/reg-lib/cuda/checkCudaCard.cpp
@@ -23,7 +23,7 @@ int main() {
     }
 
     //detects device capability and picks the best
-    for( unsigned int i = 0; i < deviceCount; ++i ) {
+    for( unsigned i = 0; i < deviceCount; ++i ) {
         cudaSetDevice(i);
         cudaDeviceProp deviceProp;
         cudaGetDeviceProperties(&deviceProp, i);
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index a30cfce3..47615c5f 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -81,11 +81,11 @@ void uploadMat44(mat44 lastTransformation, float* transform_d) {
 }
 /* *************************************************************** */
 //threads: 512 | blocks:numEquations/512
-__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned int definedBlockNum)
+__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned definedBlockNum)
 {
-    const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
+    const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
     if (tid < definedBlockNum) {
-        const unsigned int posIdx = 3 * tid;
+        const unsigned posIdx = 3 * tid;
         in += posIdx;
         out += posIdx;
         reg_mat44_mul_cuda<float>(transform, in, out);
@@ -99,10 +99,10 @@ __global__ void trimAndInvertSingularValuesKernel(float* sigma)
 }
 /* *************************************************************** */
 //launched as ldm blocks n threads
-__global__ void scaleV(float* V, const unsigned int ldm, const unsigned int n, float*w)
+__global__ void scaleV(float* V, const unsigned ldm, const unsigned n, float*w)
 {
-    unsigned int k = blockIdx.x;
-    unsigned int j = threadIdx.x;
+    unsigned k = blockIdx.x;
+    unsigned j = threadIdx.x;
     V[IDX2C(j, k, ldm)] = (float)((double)V[IDX2C(j, k, ldm)] * (double)w[j]);
 }
 /* *************************************************************** */
@@ -110,12 +110,12 @@ __global__ void scaleV(float* V, const unsigned int ldm, const unsigned int n, f
 __global__ void permuteAffineMatrix(float* transform)
 {
     __shared__ float buffer[16];
-    const unsigned int i = threadIdx.x;
+    const unsigned i = threadIdx.x;
 
     buffer[i] = transform[i];
     __syncthreads();
-    const unsigned int idx33 = (i / 3) * 4 + i % 3;
-    const unsigned int idx34 = (i % 3) * 4 + 3;
+    const unsigned idx33 = (i / 3) * 4 + i % 3;
+    const unsigned idx34 = (i % 3) * 4 + 3;
 
     if (i < 9) transform[idx33] = buffer[i];
     else if (i < 12)transform[idx34] = buffer[i];
@@ -124,12 +124,12 @@ __global__ void permuteAffineMatrix(float* transform)
 }
 /* *************************************************************** */
 //threads: 512 | blocks:numEquations/512
-__global__ void populateMatrixA(float* A, float *reference, unsigned int numBlocks)
+__global__ void populateMatrixA(float* A, float *reference, unsigned numBlocks)
 {
-    const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int c = tid * 3;
-    //	const unsigned int n = 12;
-    const unsigned int lda = numBlocks * 3;
+    const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
+    const unsigned c = tid * 3;
+    //	const unsigned n = 12;
+    const unsigned lda = numBlocks * 3;
 
     if (tid < numBlocks) {
         reference += c;
@@ -155,10 +155,10 @@ __global__ void populateMatrixA(float* A, float *reference, unsigned int numBloc
 }
 /* *************************************************************** */
 //threads: 512 | blocks:numEquations/512
-__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned int numEquations)
+__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned numEquations)
 {
-    unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
-    unsigned int c = tid * 3;
+    unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned c = tid * 3;
 
     if (tid < numEquations) {
         newWarped_d += c;
@@ -169,7 +169,7 @@ __global__ void populateLengthsKernel(float* lengths, float* warped_d, float* ne
 }
 /* *************************************************************** */
 //launched as 1 block 1 thread
-__global__ void outputMatFlat(float* mat, const unsigned int ldm, const unsigned int n, char* msg)
+__global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg)
 {
     for (int i = 0; i < ldm * n; ++i)
         printf("%f | ", mat[i]);
@@ -177,7 +177,7 @@ __global__ void outputMatFlat(float* mat, const unsigned int ldm, const unsigned
 }
 /* *************************************************************** */
 //launched as 1 block 1 thread
-__global__ void outputMat(float* mat, const unsigned int ldm, const unsigned int n, char* msg)
+__global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg)
 {
     for (int i = 0; i < ldm; ++i) {
         printf("%d ", i);
@@ -193,7 +193,7 @@ __global__ void outputMat(float* mat, const unsigned int ldm, const unsigned int
 * the function computes the SVD of a matrix A
 * A = V* x S x U, where V* is a (conjugate) transpose of V
 * */
-void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* VT_d, float* U_d) {
+void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d) {
 
     //CAST float* to double*
     /*
@@ -259,7 +259,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float*
 * the function computes the Pseudoinverse from the products of the SVD factorisation of A
 * R = V x inv(S) x U*
 * */
-void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned int m, const unsigned int n) {
+void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned m, const unsigned n) {
     // First we make sure that the really small singular values
     // are set to 0. and compute the inverse by taking the reciprocal of the entries
 
@@ -299,9 +299,9 @@ double sortAndReduce(float* lengths_d,
                         float* reference_d,
                         float* warped_d,
                         float* newWarped_d,
-                        const unsigned int numBlocks,
-                        const unsigned int numToKeep,
-                        const unsigned int m) {
+                        const unsigned numBlocks,
+                        const unsigned numToKeep,
+                        const unsigned m) {
     //populateLengthsKernel
     populateLengthsKernel <<< numBlocks, 512 >>>(lengths_d, warped_d, newWarped_d, m / 3);
 
@@ -332,7 +332,7 @@ double sortAndReduce(float* lengths_d,
 /* *************************************************************** */
 //OPTIMIZER-----------------------------------------------
 // estimate an affine transformation using least square
-void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned int numBlocks, unsigned int m, unsigned int n) {
+void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n) {
 
     //populate A
     populateMatrixA <<< numBlocks, 512 >>>(AR_d, reference_d, m / 3); //test 2
@@ -344,7 +344,7 @@ void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float*
 
 }
 /* *************************************************************** */
-void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned int numBlocks, const unsigned int num_to_keep, const unsigned int m, const unsigned int n) {
+void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n) {
 
     double lastDistance = std::numeric_limits<double>::max();
 
@@ -354,7 +354,7 @@ void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float*
     //get initial affine matrix
     getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n);
 
-    for (unsigned int count = 0; count < MAX_ITERATIONS; ++count) {
+    for (unsigned count = 0; count < MAX_ITERATIONS; ++count) {
 
         // Transform the points in the reference
         transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, referencePos_d, newWarpedPos_d, m / 3); //test 1
@@ -384,16 +384,16 @@ void optimize_affine3D_cuda(mat44* cpuMat,
                             float* reference_d,
                             float* warped_d,
                             float* newWarped_d,
-                            unsigned int m,
-                            unsigned int n,
-                            const unsigned int numToKeep,
+                            unsigned m,
+                            unsigned n,
+                            const unsigned numToKeep,
                             bool ilsIn,
                             bool isAffine) {
 
     //m | blockMatchingParams->activeBlockNumber * 3
     //n | 12
-    const unsigned int numEquations = m;
-    const unsigned int numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512;
+    const unsigned numEquations = m;
+    const unsigned numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512;
 
     uploadMat44(*cpuMat, final_d);
     transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, warped_d, newWarped_d, m / 3); //test 1
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
index 8b76e56b..7e7926b4 100644
--- a/reg-lib/cuda/optimizeKernel.h
+++ b/reg-lib/cuda/optimizeKernel.h
@@ -11,16 +11,16 @@ void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams,
                     bool affine = true);
 
 extern "C++"
-void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned int numBlocks, const unsigned int num_to_keep, const unsigned int m, const unsigned int n);
+void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n);
 */
 extern "C++"
-void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* VT_d, float* U_d);
+void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d);
 
 extern "C++"
-void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned int m, unsigned int n, const unsigned int numToKeep, bool ilsIn, bool isAffine);
+void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine);
 /*
 extern "C++"
-void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned int numBlocks, unsigned int m, unsigned int n);
+void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n);
 
 extern "C++"
 void downloadMat44(mat44 *lastTransformation, float* transform_d);
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index ef4f0e07..aa2b044c 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -13,7 +13,7 @@
 #define SINC_KERNEL_SIZE SINC_KERNEL_RADIUS*2
 
 /* *************************************************************** */
-unsigned int min1(unsigned int a, unsigned int b)
+unsigned min1(unsigned a, unsigned b)
 {
 	return (a < b) ? a : b;
 }
@@ -136,7 +136,7 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity,
     int *previous,
     uint3 fi_xyz,
     float paddingValue,
-    unsigned int kernel_size)
+    unsigned kernel_size)
 {
     double intensity = 0;
 
@@ -149,7 +149,7 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity,
                 int X = previous[0] + a;
                 bool xInBounds = -1 < X && X < fi_xyz.x;
 
-                const unsigned int idx = Y * fi_xyz.x + X;
+                const unsigned idx = Y * fi_xyz.x + X;
 
                 xTempNewValue += (xInBounds && yInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a];
             }
@@ -165,7 +165,7 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity,
                                           int *previous,
                                           uint3 fi_xyz,
                                           float paddingValue,
-                                          unsigned int kernel_size)
+                                          unsigned kernel_size)
 {
 	double intensity = 0;
 	for (int c = 0; c < kernel_size; c++) {
@@ -179,7 +179,7 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity,
 			for (int a = 0; a < kernel_size; a++) {
 				int X = previous[0] + a;
 				bool xInBounds = -1 < X && X < fi_xyz.x;
-				const unsigned int idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X;
+				const unsigned idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X;
 
 				xTempNewValue += (xInBounds && yInBounds && zInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a];
 			}
@@ -212,7 +212,7 @@ __global__ void ResampleImage2D(float* floatingImage,
 
     while (index < voxelNumber.x) {
 
-        for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) {
+        for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) {
 
             float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x];
             float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y];
@@ -305,7 +305,7 @@ __global__ void ResampleImage3D(float* floatingImage,
 
 	while (index < voxelNumber.x) {
 
-		for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) {
+		for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) {
 
 			float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x];
 			float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y];
@@ -402,9 +402,9 @@ void launchResample(nifti_image *floatingImage,
 	//the below lines need to be moved to cu common
 	cudaDeviceProp prop;
 	cudaGetDeviceProperties(&prop, 0);
-	unsigned int maxThreads = 512;
-	unsigned int maxBlocks = 65365;
-	unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads;
+	unsigned maxThreads = 512;
+	unsigned maxBlocks = 65365;
+	unsigned blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads;
 	blocks = min1(blocks, maxBlocks);
 
 	dim3 mygrid(blocks, 1, 1);
diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h
index 3507d90b..c1055f59 100644
--- a/reg-lib/cuda/resampleKernel.h
+++ b/reg-lib/cuda/resampleKernel.h
@@ -5,4 +5,4 @@ void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *ma
 void launchResample(nifti_image *floatingImage, nifti_image *warpedImage,  int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d);
 void launchOptimizer();//TODO
 
-double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned int numBlocks, const unsigned int numToKeep, const unsigned int m);
+double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned numBlocks, const unsigned numToKeep, const unsigned m);
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 43817321..3a064337 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -273,6 +273,8 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
                     REQUIRE(fabs(gradVal - gradExpVal) < EPS);
                 }
             }
+            // Ensure the termination of content before CudaContext
+            content.reset();
         }
     }
 }

From 9db2187b2aba9e5f9af7fcb8892ae95a1cf238a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 3 Apr 2023 17:28:42 +0100
Subject: [PATCH 125/314] Generate test data only once for
 NormaliseGradientTest

---
 niftyreg_build_version.txt              |  2 +-
 reg-test/reg_test_normaliseGradient.cpp | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7f05eede..2c2b1af8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-244
+245
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index f0c83022..9b65c59e 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -17,12 +17,15 @@
 class NormaliseGradientTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<TestData, unique_ptr<F3dContent>, unique_ptr<Platform>, bool, bool, bool>;
+    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool, bool>;
 
-    vector<TestCase> testCases;
+    inline static vector<TestCase> testCases;
 
 public:
     NormaliseGradientTest() {
+        if (!testCases.empty())
+            return;
+
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
@@ -86,6 +89,8 @@ class NormaliseGradientTest {
         // Add platforms and optimise* to the test data
         for (auto&& testData : testData) {
             for (auto&& platformType : PlatformTypes) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 for (int optimiseX = 0; optimiseX < 2; optimiseX++) {
                     for (int optimiseY = 0; optimiseY < 2; optimiseY++) {
                         for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
@@ -93,10 +98,8 @@ class NormaliseGradientTest {
                             auto td = testData;
                             auto&& [testName, reference, controlPointGrid, testGrad] = td;
                             // Add content
-                            unique_ptr<Platform> platform{ new Platform(platformType) };
-                            unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                             unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                            testCases.push_back({ std::move(td), std::move(content), std::move(platform), optimiseX, optimiseY, optimiseZ });
+                            testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ });
                         }
                     }
                 }
@@ -176,10 +179,13 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [testData, content, platform, optimiseX, optimiseY, optimiseZ] = testCase;
+        auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ] = testCase;
         auto&& [testName, reference, controlPointGrid, testGrad] = testData;
+        const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
+
+        SECTION(sectionName) {
+            std::cout << "******** Section " << sectionName << " ********" << std::endl;
 
-        SECTION(testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ")) {
             // Set the transformation gradient image to host the computation
             NiftiImage transGrad = content->GetTransformationGradient();
             transGrad.copyData(testGrad);
@@ -208,6 +214,8 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
                 std::cout << i << " " << transGradVal << " " << testGradVal << std::endl;
                 REQUIRE(fabs(transGradVal - testGradVal) < EPS);
             }
+            // Ensure the termination of content before CudaContext
+            content.reset();
         }
     }
 }

From 18495c9b193461b35dc4af26e8712c63d2e8e296 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 5 Apr 2023 13:03:06 +0100
Subject: [PATCH 126/314] Refactorisations

---
 niftyreg_build_version.txt   |   2 +-
 reg-lib/_reg_base.cpp        |   2 +-
 reg-lib/cpu/_reg_nmi.cpp     | 100 +++++++++++++++++------------------
 reg-lib/cpu/_reg_nmi.h       |  40 +++++++-------
 reg-lib/cuda/_reg_nmi_gpu.cu |  20 +++----
 reg-lib/cuda/_reg_nmi_gpu.h  |  22 ++++----
 6 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2c2b1af8..5d165ff2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-245
+246
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 8cd8419d..8e208d96 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -928,8 +928,8 @@ void reg_base<T>::Run() {
 
                 // Update the objective function variables and print some information
                 PrintCurrentObjFunctionValue(currentSize);
+            }
 
-            } // while
             if (perturbation < perturbationNumber) {
                 optimiser->Perturbation(smallestSize);
                 currentSize = maxStepSize;
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 745ed3f5..9ecdb6af 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -138,11 +138,11 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
             reg_intensityRescale(this->referenceImagePointer,
                                  i,
                                  2.f,
-                                 this->referenceBinNumber[i] - 3);
+                                 this->referenceBinNumber[i] - 3.f);
             reg_intensityRescale(this->floatingImagePointer,
                                  i,
                                  2.f,
-                                 this->floatingBinNumber[i] - 3);
+                                 this->floatingBinNumber[i] - 3.f);
         }
     }
     // Create the joint histograms
@@ -475,17 +475,17 @@ double reg_nmi::GetSimilarityMeasureValue() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
-                                    nifti_image *warpedImage,
-                                    unsigned short *referenceBinNumber,
-                                    unsigned short *floatingBinNumber,
-                                    double **jointHistogramLog,
-                                    double **entropyValues,
-                                    nifti_image *warpedGradient,
+void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
+                                    const nifti_image *warpedImage,
+                                    const unsigned short *referenceBinNumber,
+                                    const unsigned short *floatingBinNumber,
+                                    const double *const *jointHistogramLog,
+                                    const double *const *entropyValues,
+                                    const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
-                                    int *referenceMask,
-                                    int current_timepoint,
-                                    double timepoint_weight) {
+                                    const int *referenceMask,
+                                    const int& current_timepoint,
+                                    const double& timepoint_weight) {
     if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
@@ -494,25 +494,25 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 
     // Pointers to the image data
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
-    DataType *warGradPtrY = &warGradPtrX[voxelNumber];
+    const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    const DataType *warGradPtrY = &warGradPtrX[voxelNumber];
 
     // Pointers to the measure of similarity gradient
     DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
 
     // Create pointers to the current joint histogram
-    double *logHistoPtr = jointHistogramLog[current_timepoint];
-    double *entropyPtr = entropyValues[current_timepoint];
-    double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
-    size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    const double *logHistoPtr = jointHistogramLog[current_timepoint];
+    const double *entropyPtr = entropyValues[current_timepoint];
+    const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
+    const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
     // Iterate over all voxel
     for (size_t i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -561,22 +561,22 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient2D<float>
-(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
+(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 template void reg_getVoxelBasedNMIGradient2D<double>
-(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
+(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
-                                    nifti_image *warpedImage,
-                                    unsigned short *referenceBinNumber,
-                                    unsigned short *floatingBinNumber,
-                                    double **jointHistogramLog,
-                                    double **entropyValues,
-                                    nifti_image *warpedGradient,
+void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
+                                    const nifti_image *warpedImage,
+                                    const unsigned short *referenceBinNumber,
+                                    const unsigned short *floatingBinNumber,
+                                    const double *const *jointHistogramLog,
+                                    const double *const *entropyValues,
+                                    const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
-                                    int *referenceMask,
-                                    int current_timepoint,
-                                    double timepoint_weight) {
+                                    const int *referenceMask,
+                                    const int& current_timepoint,
+                                    const double& timepoint_weight) {
     if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedNMIGradient3D");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
@@ -591,15 +591,15 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
     const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
 #endif
     // Pointers to the image data
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
-    DataType *warGradPtrY = &warGradPtrX[voxelNumber];
-    DataType *warGradPtrZ = &warGradPtrY[voxelNumber];
+    const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    const DataType *warGradPtrY = &warGradPtrX[voxelNumber];
+    const DataType *warGradPtrZ = &warGradPtrY[voxelNumber];
 
     // Pointers to the measure of similarity gradient
     DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
@@ -607,18 +607,18 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
     DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create pointers to the current joint histogram
-    double *logHistoPtr = jointHistogramLog[current_timepoint];
-    double *entropyPtr = entropyValues[current_timepoint];
-    double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
-    size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    const double *logHistoPtr = jointHistogramLog[current_timepoint];
+    const double *entropyPtr = entropyValues[current_timepoint];
+    const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
+    const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
     int r, w;
     DataType refValue, warValue, gradX, gradY, gradZ;
     double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog;
     // Iterate over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \
+    private(r,w,refValue,warValue,gradX,gradY,gradZ, \
     jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
@@ -678,9 +678,9 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
 }
 /* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient3D<float>
-(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
+(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 template void reg_getVoxelBasedNMIGradient3D<double>
-(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double);
+(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 /* *************************************************************** */
 void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     // Check if the specified time point exists and is active
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 80e65781..a48583c8 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -92,31 +92,31 @@ void reg_getNMIValue(nifti_image *referenceImage,
 );
 /* *************************************************************** */
 extern "C++" template <class DataType>
-void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage,
-                                    nifti_image *warpedImage,
-                                    unsigned short *referenceBinNumber,
-                                    unsigned short *floatingBinNumber,
-                                    double **jointHistogramLog,
-                                    double **entropyValues,
-                                    nifti_image *warpedGradient,
+void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
+                                    const nifti_image *warpedImage,
+                                    const unsigned short *referenceBinNumber,
+                                    const unsigned short *floatingBinNumber,
+                                    const double *const *jointHistogramLog,
+                                    const double *const *entropyValues,
+                                    const nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
-                                    int *referenceMask,
-                                    int current_timepoint,
-                                    double timepoint_weight
+                                    const int *referenceMask,
+                                    const int& current_timepoint,
+                                    const double& timepoint_weight
 );
 /* *************************************************************** */
 extern "C++" template <class DataType>
-void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage,
-                                    nifti_image *warpedImage,
-                                    unsigned short *referenceBinNumber,
-                                    unsigned short *floatingBinNumber,
-                                    double **jointHistogramLog,
-                                    double **entropyValues,
-                                    nifti_image *warpedGradient,
+void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
+                                    const nifti_image *warpedImage,
+                                    const unsigned short *referenceBinNumber,
+                                    const unsigned short *floatingBinNumber,
+                                    const double *const *jointHistogramLog,
+                                    const double *const *entropyValues,
+                                    const nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
-                                    int *referenceMask,
-                                    int current_timepoint,
-                                    double timepoint_weight
+                                    const int *referenceMask,
+                                    const int& current_timepoint,
+                                    const double& timepoint_weight
 );
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index bf59fe7f..207a0fd0 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -142,17 +142,17 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() {
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /// Called when we only have one target and one source image
-void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray *referenceImageArray_d,
-                                      float *warpedImageArray_d,
-                                      float4 *warpedGradientArray_d,
-                                      float *logJointHistogram_d,
+void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
+                                      const cudaArray *referenceImageArray_d,
+                                      const float *warpedImageArray_d,
+                                      const float4 *warpedGradientArray_d,
+                                      const float *logJointHistogram_d,
                                       float4 *voxelNMIGradientArray_d,
-                                      int *mask_d,
-                                      int activeVoxelNumber,
-                                      double *entropies,
-                                      int refBinning,
-                                      int floBinning) {
+                                      const int *mask_d,
+                                      const int activeVoxelNumber,
+                                      const double *entropies,
+                                      const int refBinning,
+                                      const int floBinning) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*referenceImage);
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 47cdbb40..dc6ccbe7 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -68,7 +68,7 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
                            int *refMskDevicePtr,
                            float *warFloDevicePtr,
                            float4 *warFloGradDevicePtr,
-                           float4 *forVoxBasedGraDevicePtr) {}
+                           float4 *forVoxBasedGraDevicePtr) override {}
     /// @brief reg_nmi class constructor
     reg_multichannel_nmi_gpu() {}
     /// @brief reg_nmi class destructor
@@ -81,14 +81,14 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 extern "C++"
-void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray *referenceImageArray_d,
-                                      float *warpedImageArray_d,
-                                      float4 *resultGradientArray_d,
-                                      float *logJointHistogram_d,
+void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
+                                      const cudaArray *referenceImageArray_d,
+                                      const float *warpedImageArray_d,
+                                      const float4 *resultGradientArray_d,
+                                      const float *logJointHistogram_d,
                                       float4 *voxelNMIGradientArray_d,
-                                      int *targetMask_d,
-                                      int activeVoxelNumber,
-                                      double *entropies,
-                                      int refBinning,
-                                      int floBinning);
+                                      const int *targetMask_d,
+                                      const int activeVoxelNumber,
+                                      const double *entropies,
+                                      const int refBinning,
+                                      const int floBinning);

From 1f84a28b82e2345cea75cd2fa3a46c8c0cf42eaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 5 Apr 2023 13:07:39 +0100
Subject: [PATCH 127/314] Add *Content::UpdateWarped() to transfer warped image
 to the CUDA device

---
 niftyreg_build_version.txt   | 2 +-
 reg-lib/Content.h            | 1 +
 reg-lib/cuda/CudaContent.cpp | 4 ++++
 reg-lib/cuda/CudaContent.h   | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5d165ff2..e06108c0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-246
+247
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 4731b084..f4c8f86a 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -25,6 +25,7 @@ class Content {
 
     // Methods for transferring data from nifti to device
     virtual void UpdateDeformationField() {}
+    virtual void UpdateWarped() {}
 
     // Auxiliary methods
     static mat44* GetXYZMatrix(nifti_image& image) {
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index ddc464ce..997676ca 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -146,6 +146,10 @@ void CudaContent::SetWarped(nifti_image *warpedIn) {
     cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
+void CudaContent::UpdateWarped() {
+    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped);
+}
+/* *************************************************************** */
 template<class DataType>
 DataType CudaContent::CastImageData(float intensity, int datatype) {
     switch (datatype) {
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index 7e1f08c1..a9fd1f4f 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -28,6 +28,7 @@ class CudaContent: public virtual Content {
 
     // Methods for transferring data from nifti to device
     virtual void UpdateDeformationField() override;
+    virtual void UpdateWarped() override;
 
 protected:
     cudaArray *referenceCuda = nullptr;

From a753c7f55a1012fd27c8eff17c96fbbe850d2b92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 5 Apr 2023 14:30:44 +0100
Subject: [PATCH 128/314] Use CPU for GetWarpedImage()

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/_reg_f3d.cpp       | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e06108c0..5d0b6c41 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-247
+248
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 611d74d6..c8ddcb5d 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -104,6 +104,10 @@ T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
     nifti_image *reference, *floating;
     int *mask;
     if (currentLevel < 0) {
+        // Settings for GetWarpedImage()
+        // Use CPU for warping since CUDA isn't supporting Cubic interpolation
+        // TODO Remove this when CUDA supports Cubic interpolation
+        this->SetPlatformType(PlatformType::Cpu);
         reference = this->inputReference;
         floating = this->inputFloating;
         mask = nullptr;

From 54a9990e471ce4883c2ff8017cf5aba987e6bc44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 6 Apr 2023 13:02:02 +0100
Subject: [PATCH 129/314] Use CPU for reg_f3d2::GetWarpedImage()

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/_reg_f3d2.cpp      | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5d0b6c41..720fe955 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-248
+249
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index e218b9ec..ae9c4bac 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -57,6 +57,10 @@ T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
     nifti_image *reference, *floating;
     int *referenceMask, *floatingMask;
     if (currentLevel < 0) {
+        // Settings for GetWarpedImage()
+        // Use CPU for warping since CUDA isn't supporting Cubic interpolation
+        // TODO Remove this when CUDA supports Cubic interpolation
+        this->SetPlatformType(PlatformType::Cpu);
         reference = this->inputReference;
         floating = this->inputFloating;
         referenceMask = nullptr;

From 28bfb04b21de80c1007f6f3d65fea44913f72374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 8 Jun 2023 15:26:16 +0100
Subject: [PATCH 130/314] Handle NiftiImage copy options with an enum

---
 niftyreg_build_version.txt              |  2 +-
 reg-io/RNifti/NiftiImage.h              | 34 ++++++++++++-------------
 reg-io/RNifti/NiftiImage_impl.h         | 12 ++++-----
 reg-lib/_reg_aladin.cpp                 |  2 +-
 reg-lib/_reg_f3d.cpp                    |  2 +-
 reg-lib/_reg_f3d2.cpp                   |  4 +--
 reg-test/reg_test_conjugateGradient.cpp | 12 ++++-----
 reg-test/reg_test_interpolation.cpp     |  2 +-
 reg-test/reg_test_normaliseGradient.cpp |  4 +--
 9 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 720fe955..cb1a40df 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-249
+250
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index beb893ff..d63f0f53 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -870,6 +870,13 @@ class NiftiImage
 
     enum class Dim { NDim, X, Y, Z, T, U, V, W };    /**< Dimension enumeration */
 
+    enum class Copy {           /**< Enumeration of copy options of the constructor */
+        None,                   /**< Do not copy the image */
+        Image,                  /**< Copy the entire image */
+        ImageInfo,              /**< Copy only the image info, and do not allocate data */
+        ImageInfoAndAllocData   /**< Copy only the image info, and allocate and zero the data */
+    };
+
     /**
      * Inner class referring to a subset of an image. Currently must refer to the last
      * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image
@@ -1306,10 +1313,9 @@ class NiftiImage
     /**
      * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer
      * @param source A pointer to a \c nifti_image
-     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied
-     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
+     * @param copy A \ref Copy value indicating which part of the image data to copy
     **/
-    void copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData);
+    void copy (const nifti_image *source, const Copy copy);
 
     /**
      * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
@@ -1393,16 +1399,13 @@ class NiftiImage
     /**
      * Copy constructor
      * @param source Another \c NiftiImage object
-     * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new
-     * object wraps the same \c nifti_image and increments the shared reference count
-     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true
-     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
+     * @param copy If \c Copy::None, the new object just wraps the same pointer as \c source; otherwise the image data is copied
     **/
-    NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false, const bool allocData = false)
+    NiftiImage (const NiftiImage &source, const Copy copy = Copy::Image)
         : NiftiImage()
     {
-        if (copy) {
-            this->copy(source, onlyImageInfo, allocData);
+        if (copy != Copy::None) {
+            this->copy(source, copy);
         } else {
             refCount = source.refCount;
             acquire(source.image);
@@ -1441,16 +1444,13 @@ class NiftiImage
     /**
      * Initialise using an existing \c nifti_image pointer
      * @param image An existing \c nifti_image pointer, possibly \c nullptr
-     * @param copy If \c true, the image data will be copied; otherwise this object just wraps
-     * the pointer passed to it
-     * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true
-     * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true
+     * @param copy If \c Copy::None, the new object just wraps the pointer passed to it; otherwise the image data is copied
     **/
-    NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false, const bool allocData = false)
+    NiftiImage (nifti_image * const image, const Copy copy = Copy::None)
         : NiftiImage()
     {
-        if (copy)
-            this->copy(image, onlyImageInfo, allocData);
+        if (copy != Copy::None)
+            this->copy(image, copy);
         else
             acquire(image);
 #ifndef NDEBUG
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 0688a681..6d12dbbc 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -763,7 +763,7 @@ inline void NiftiImage::release ()
     }
 }
 
-inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData)
+inline void NiftiImage::copy (const nifti_image *source, const Copy copy)
 {
     if (source == nullptr)
         acquire(nullptr);
@@ -771,9 +771,9 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         acquire(nifti_copy_nim_info(source));
-        if (onlyImageInfo)
+        if (copy != Copy::Image)
         {
-            if (allocData)
+            if (copy == Copy::ImageInfoAndAllocData)
                 realloc();
         } else if (source->data != nullptr)
         {
@@ -783,9 +783,9 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf
         }
 #elif RNIFTI_NIFTILIB_VERSION == 2
         acquire(nifti2_copy_nim_info(source));
-        if (onlyImageInfo)
+        if (copy != Copy::Image)
         {
-            if (allocData)
+            if (copy == Copy::ImageInfoAndAllocData)
                 realloc();
         } else if (source->data != nullptr)
         {
@@ -1898,7 +1898,7 @@ inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string
     const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype);
 
     // Copy the source image only if the datatype will be changed
-    NiftiImage imageToWrite(*this, changingDatatype);
+    NiftiImage imageToWrite(*this, Copy(changingDatatype));
 
     if (changingDatatype)
         imageToWrite.changeDatatype(datatype, true);
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 29e11524..70df10c2 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -508,7 +508,7 @@ NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
 
     reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
 
-    NiftiImage warpedImage(this->con->GetWarped(), true);
+    NiftiImage warpedImage(this->con->GetWarped(), NiftiImage::Copy::Image);
     warpedImage->cal_min = this->inputFloating->cal_min;
     warpedImage->cal_max = this->inputFloating->cal_max;
     warpedImage->scl_slope = this->inputFloating->scl_slope;
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index c8ddcb5d..ac569c97 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -638,7 +638,7 @@ vector<NiftiImage> reg_f3d<T>::GetWarpedImage() {
 
     this->WarpFloatingImage(3); // cubic spline interpolation
 
-    NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), true);
+    NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), NiftiImage::Copy::Image);
 
     DeinitCurrentLevel(-1);
 #ifndef NDEBUG
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index ae9c4bac..4ee3b9cf 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -827,8 +827,8 @@ vector<NiftiImage> reg_f3d2<T>::GetWarpedImage() {
 
     F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
     vector<NiftiImage> warpedImage{
-        NiftiImage(con.GetWarped(), true),
-        NiftiImage(conBw->GetWarped(), true)
+        NiftiImage(con.GetWarped(), NiftiImage::Copy::Image),
+        NiftiImage(conBw->GetWarped(), NiftiImage::Copy::Image)
     };
 
     DeinitCurrentLevel(-1);
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 3a064337..ddf22890 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -57,8 +57,8 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         // Generate the different test cases
         // Test 2D
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
-        NiftiImage bestControlPointGrid2d(controlPointGrid2d, true, true, true);
-        NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true);
+        NiftiImage bestControlPointGrid2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto bestCpp2dPtr = bestControlPointGrid2d.data();
         auto transGrad2dPtr = transformationGradient2d.data();
         for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) {
@@ -78,8 +78,8 @@ class ConjugateGradientTest: public InterfaceOptimiser {
 
         // Test 3D
         NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
-        NiftiImage bestControlPointGrid3d(controlPointGrid3d, true, true, true);
-        NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true);
+        NiftiImage bestControlPointGrid3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto bestCpp3dPtr = bestControlPointGrid3d.data();
         auto transGrad3dPtr = transformationGradient3d.data();
         for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) {
@@ -156,8 +156,8 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         static NiftiImage array1;
         static NiftiImage array2;
         if (firstCall) {
-            array1 = NiftiImage(gradient, true, true, true);
-            array2 = NiftiImage(gradient, true, true, true);
+            array1 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData);
+            array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData);
         }
 
         auto gradientPtr = gradient.data();
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 8eaa95b5..451f731d 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -197,7 +197,7 @@ TEST_CASE("Interpolation", "[Interpolation]") {
             auto contentName = isAladinContent ? "Aladin" : "Base";
             SECTION(testName + " " + platform->GetName() + " - " + contentName) {
                 // Create and set a warped image to host the computation
-                NiftiImage warped(defField, true, true);
+                NiftiImage warped(defField, NiftiImage::Copy::ImageInfo);
                 warped.setDim(NiftiDim::NDim, defField->nu);
                 warped.setDim(NiftiDim::X, 1);
                 warped.setDim(NiftiDim::Y, 1);
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 9b65c59e..ef108f2d 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -57,7 +57,7 @@ class NormaliseGradientTest {
         // Generate the different test cases
         // Test 2D
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
-        NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true);
+        NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto transGrad2dPtr = transformationGradient2d.data();
         for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i)
             transGrad2dPtr[i] = distr(gen);
@@ -73,7 +73,7 @@ class NormaliseGradientTest {
 
         // Test 3D
         NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
-        NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true);
+        NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto transGrad3dPtr = transformationGradient3d.data();
         for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i)
             transGrad3dPtr[i] = distr(gen);

From e5a0f425a2265672d9706d33a182ed374a8ebda7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 9 Jun 2023 12:09:43 +0100
Subject: [PATCH 131/314] Fix GPU version of ResampleImage() to make on a par
 with CPU version

---
 niftyreg_build_version.txt              |  2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu     |  3 +-
 reg-lib/cuda/_reg_resampling_kernels.cu | 86 ++++++++++++++++++++-----
 reg-test/reg_test_interpolation.cpp     |  2 +-
 4 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cb1a40df..1f7e0d6e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-250
+251
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 7a48d774..bbc5ba58 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -26,7 +26,8 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0,
+                                                          cudaChannelFormatKindNone, 1, cudaFilterModePoint);
 
     // Create the texture object for the deformation field
     auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 05351f38..0bada174 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -10,6 +10,13 @@
  *
  */
 
+/* *************************************************************** */
+__inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2]) {
+    if (relative < 0)
+        relative = 0;  // reg_rounding error
+    basis[1] = relative;
+    basis[0] = 1.0 - relative;
+}
 /* *************************************************************** */
 __global__ void reg_resampleImage2D_kernel(float *resultArray,
                                            cudaTextureObject_t floatingTexture,
@@ -21,11 +28,11 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
                                            const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        //Get the real world deformation in the floating space
+        // Get the real world deformation in the floating space
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
-        //Get the voxel-based deformation in the floating space
+        // Get the voxel-based deformation in the floating space
         float2 voxelDeformation;
         voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
                               floatingMatrix.m[0][1] * realDeformation.y +
@@ -34,10 +41,30 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
                               floatingMatrix.m[1][1] * realDeformation.y +
                               floatingMatrix.m[1][3]);
 
-        if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 &&
-            voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1) {
-            resultArray[tid2] = tex3D<float>(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, 0.5f);
-        } else resultArray[tid2] = paddingValue;
+        // Compute the linear interpolation
+        const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) };
+        const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
+        float xBasis[2], yBasis[2];
+        InterpLinearKernel(relative.x, xBasis);
+        InterpLinearKernel(relative.y, yBasis);
+
+        float intensity = 0;
+        for (short b = 0; b < 2; b++) {
+            const int y = previous.y + b;
+            float xTempNewValue = 0;
+            for (short a = 0; a < 2; a++) {
+                const int x = previous.x + a;
+                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
+                    xTempNewValue += tex3D<float>(floatingTexture, x, y, 0) * xBasis[a];
+                } else {
+                    // Padding value
+                    xTempNewValue += paddingValue * xBasis[a];
+                }
+            }
+            intensity += xTempNewValue * yBasis[b];
+        }
+
+        resultArray[tid2] = intensity;
     }
 }
 /* *************************************************************** */
@@ -53,10 +80,10 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
     if (tid < activeVoxelNumber) {
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
 
-        //Get the real world deformation in the floating space
+        // Get the real world deformation in the floating space
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
-        //Get the voxel-based deformation in the floating space
+        // Get the voxel-based deformation in the floating space
         float3 voxelDeformation;
         voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
                               floatingMatrix.m[0][1] * realDeformation.y +
@@ -71,11 +98,36 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
                               floatingMatrix.m[2][2] * realDeformation.z +
                               floatingMatrix.m[2][3]);
 
-        if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 &&
-            voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1 &&
-            voxelDeformation.z >= 0.0f && voxelDeformation.z <= floatingDim.z - 1) {
-            resultArray[tid2] = tex3D<float>(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, voxelDeformation.z + 0.5f);
-        } else resultArray[tid2] = paddingValue;
+        // Compute the linear interpolation
+        const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) };
+        const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
+        float xBasis[2], yBasis[2], zBasis[2];
+        InterpLinearKernel(relative.x, xBasis);
+        InterpLinearKernel(relative.y, yBasis);
+        InterpLinearKernel(relative.z, zBasis);
+
+        float intensity = 0;
+        for (short c = 0; c < 2; c++) {
+            const int z = previous.z + c;
+            float yTempNewValue = 0;
+            for (short b = 0; b < 2; b++) {
+                const int y = previous.y + b;
+                float xTempNewValue = 0;
+                for (short a = 0; a < 2; a++) {
+                    const int x = previous.x + a;
+                    if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
+                        xTempNewValue += tex3D<float>(floatingTexture, x, y, z) * xBasis[a];
+                    } else {
+                        // Padding value
+                        xTempNewValue += paddingValue * xBasis[a];
+                    }
+                }
+                yTempNewValue += xTempNewValue * yBasis[b];
+            }
+            intensity += yTempNewValue * zBasis[c];
+        }
+
+        resultArray[tid2] = intensity;
     }
 }
 /* *************************************************************** */
@@ -88,10 +140,10 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
                                               const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        //Get the real world deformation in the floating space
+        // Get the real world deformation in the floating space
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
-        //Get the voxel-based deformation in the floating space
+        // Get the voxel-based deformation in the floating space
         float3 voxelDeformation;
         voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
                               floatingMatrix.m[0][1] * realDeformation.y +
@@ -148,10 +200,10 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
                                               const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        //Get the real world deformation in the floating space
+        // Get the real world deformation in the floating space
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
-        //Get the voxel-based deformation in the floating space
+        // Get the voxel-based deformation in the floating space
         float3 voxelDeformation;
         voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
                               floatingMatrix.m[0][1] * realDeformation.y +
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 451f731d..d97b9548 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -3,7 +3,7 @@
 
 #include "reg_test_common.h"
 
-#define EPS 0.001
+#define EPS 0.000001
 
 /*
     This test file contains the following unit tests:

From 3f92721063e410e5856728b180868eb794fa3a20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 9 Jun 2023 16:20:32 +0100
Subject: [PATCH 132/314] Fix GPU version of GetImageGradient() to make on a
 par with CPU version

---
 niftyreg_build_version.txt              |  2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu     |  3 +-
 reg-lib/cuda/_reg_resampling_kernels.cu | 85 +++++++++----------------
 3 files changed, 34 insertions(+), 56 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1f7e0d6e..2197544d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-251
+252
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index bbc5ba58..90628ff6 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -70,7 +70,8 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0,
+                                                          cudaChannelFormatKindNone, 1, cudaFilterModePoint);
 
     // Create the texture object for the deformation field
     auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 0bada174..7f0bf7a7 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -152,34 +152,24 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
                               floatingMatrix.m[1][1] * realDeformation.y +
                               floatingMatrix.m[1][3]);
 
-        int2 voxel;
-        voxel.x = (int)(voxelDeformation.x);
-        voxel.y = (int)(voxelDeformation.y);
-
-        float xBasis[2];
-        float relative = fabsf(voxelDeformation.x - (float)voxel.x);
-        xBasis[0] = 1.0f - relative;
-        xBasis[1] = relative;
-        float yBasis[2];
-        relative = fabsf(voxelDeformation.y - (float)voxel.y);
-        yBasis[0] = 1.0f - relative;
-        yBasis[1] = relative;
-        float deriv[2];
-        deriv[0] = -1.0f;
-        deriv[1] = 1.0f;
+        // Compute the gradient
+        const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) };
+        float xBasis[2], yBasis[2];
+        const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
+        InterpLinearKernel(relative.x, xBasis);
+        InterpLinearKernel(relative.y, yBasis);
+        const float deriv[] = { -1.0f, 1.0f };
 
-        float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        float2 relativeDeformation;
+        float4 gradientValue{};
         for (short b = 0; b < 2; b++) {
-            float2 tempValueX = make_float2(0.0f, 0.0f);
-            relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y;
+            float2 tempValueX{};
+            const int y = previous.y + b;
             for (short a = 0; a < 2; a++) {
-                relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x;
+                const int x = previous.x + a;
                 float intensity = paddingValue;
 
-                if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f &&
-                    0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f)
-                    intensity = tex3D<float>(floatingTexture, relativeDeformation.x, relativeDeformation.y, 0.5f);
+                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y)
+                    intensity = tex3D<float>(floatingTexture, x, y, 0);
 
                 tempValueX.x += intensity * deriv[a];
                 tempValueX.y += intensity * xBasis[a];
@@ -187,6 +177,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
             gradientValue.x += tempValueX.x * yBasis[b];
             gradientValue.y += tempValueX.y * deriv[b];
         }
+
         gradientArray[tid] = gradientValue;
     }
 }
@@ -218,43 +209,28 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
                               floatingMatrix.m[2][2] * realDeformation.z +
                               floatingMatrix.m[2][3]);
 
-        int3 voxel;
-        voxel.x = (int)(voxelDeformation.x);
-        voxel.y = (int)(voxelDeformation.y);
-        voxel.z = (int)(voxelDeformation.z);
-
-        float xBasis[2];
-        float relative = fabsf(voxelDeformation.x - (float)voxel.x);
-        xBasis[0] = 1.0f - relative;
-        xBasis[1] = relative;
-        float yBasis[2];
-        relative = fabsf(voxelDeformation.y - (float)voxel.y);
-        yBasis[0] = 1.0f - relative;
-        yBasis[1] = relative;
-        float zBasis[2];
-        relative = fabsf(voxelDeformation.z - (float)voxel.z);
-        zBasis[0] = 1.0f - relative;
-        zBasis[1] = relative;
-        float deriv[2];
-        deriv[0] = -1.0f;
-        deriv[1] = 1.0f;
+        // Compute the gradient
+        const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) };
+        float xBasis[2], yBasis[2], zBasis[2];
+        const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
+        InterpLinearKernel(relative.x, xBasis);
+        InterpLinearKernel(relative.y, yBasis);
+        InterpLinearKernel(relative.z, zBasis);
+        const float deriv[] = { -1.0f, 1.0f };
 
-        float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        float3 relativeDeformation;
+        float4 gradientValue{};
         for (short c = 0; c < 2; c++) {
-            relativeDeformation.z = ((float)voxel.z + (float)c + 0.5f) / (float)floatingDim.z;
-            float3 tempValueY = make_float3(0.0f, 0.0f, 0.0f);
+            const int z = previous.z + c;
+            float3 tempValueY{};
             for (short b = 0; b < 2; b++) {
-                float2 tempValueX = make_float2(0.0f, 0.0f);
-                relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y;
+                float2 tempValueX{};
+                const int y = previous.y + b;
                 for (short a = 0; a < 2; a++) {
-                    relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x;
+                    const int x = previous.x + a;
                     float intensity = paddingValue;
 
-                    if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f &&
-                        0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f &&
-                        0.f <= relativeDeformation.z && relativeDeformation.z <= 1.f)
-                        intensity = tex3D<float>(floatingTexture, relativeDeformation.x, relativeDeformation.y, relativeDeformation.z);
+                    if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z)
+                        intensity = tex3D<float>(floatingTexture, x, y, z);
 
                     tempValueX.x += intensity * deriv[a];
                     tempValueX.y += intensity * xBasis[a];
@@ -267,6 +243,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
             gradientValue.y += tempValueY.y * zBasis[c];
             gradientValue.z += tempValueY.z * deriv[c];
         }
+
         gradientArray[tid] = gradientValue;
     }
 }

From 5009c86d1683e23411aa652e1077ea585bbf57ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 9 Jun 2023 17:17:21 +0100
Subject: [PATCH 133/314] Refactor cudaCommon_createTextureObject()

---
 niftyreg_build_version.txt            |  2 +-
 reg-lib/cuda/CudaNormaliseGradient.cu |  4 ++--
 reg-lib/cuda/_reg_common_cuda.cu      |  4 ++--
 reg-lib/cuda/_reg_common_cuda.h       |  4 ++--
 reg-lib/cuda/_reg_optimiser_gpu.cu    | 26 +++++++++++++-------------
 reg-lib/cuda/_reg_resampling_gpu.cu   | 16 ++++++----------
 6 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2197544d..63fe24a5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-252
+253
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 674dff82..96810cfe 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -23,8 +23,8 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
     // Create a texture object for the imageCuda
-    auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                       cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     float *dists = nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 5edc014d..09351400 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -629,11 +629,11 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
 /* *************************************************************** */
 UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
                                                       const cudaResourceType& resType,
-                                                      const bool& normalizedCoordinates,
                                                       const size_t& size,
                                                       const cudaChannelFormatKind& channelFormat,
                                                       const unsigned& channelCount,
-                                                      const cudaTextureFilterMode& filterMode) {
+                                                      const cudaTextureFilterMode& filterMode,
+                                                      const bool& normalizedCoordinates) {
     // Specify texture
     cudaResourceDesc resDesc{};
     resDesc.resType = resType;
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 45f8aa26..31fc61dd 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -135,9 +135,9 @@ using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cuda
 extern "C++"
 UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
 													  const cudaResourceType& resType,
-													  const bool& normalizedCoordinates = false,
 													  const size_t& size = 0,
 													  const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone,
 													  const unsigned& channelCount = 1,
-													  const cudaTextureFilterMode& filterMode = cudaFilterModeLinear);
+													  const cudaTextureFilterMode& filterMode = cudaFilterModePoint,
+													  const bool& normalizedCoordinates = false);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 16a6efc6..ac1d1d79 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -171,10 +171,10 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
                                          const size_t& nVoxels) {
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
-    const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
     const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -188,12 +188,12 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGCuda,
                                   float4 *conjugateHCuda,
                                   const size_t& nVoxels) {
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
-    auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                            cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
-    auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                            cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear,
+                                                            nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear,
+                                                            nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // gam = sum((grad+g)*grad)/sum(HxG);
     unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1;
@@ -234,10 +234,10 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
                                         const bool& optimiseX,
                                         const bool& optimiseY,
                                         const bool& optimiseZ) {
-    auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4),
-                                                               cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+    auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear,
+                                                                  nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition;
     const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 90628ff6..cc14aae4 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -26,17 +26,15 @@ void reg_resampleImage_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0,
-                                                          cudaChannelFormatKindNone, 1, cudaFilterModePoint);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
 
     // Create the texture object for the deformation field
     auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
-                                                                  false, activeVoxelNumber * sizeof(float4),
-                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Create the texture object for the mask
-    auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int),
-                                                      cudaChannelFormatKindSigned, 1, cudaFilterModePoint);
+    auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                      cudaChannelFormatKindSigned, 1);
 
     // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;
@@ -70,13 +68,11 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage,
     int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0,
-                                                          cudaChannelFormatKindNone, 1, cudaFilterModePoint);
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
 
     // Create the texture object for the deformation field
     auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
-                                                                  false, activeVoxelNumber * sizeof(float4),
-                                                                  cudaChannelFormatKindFloat, 4, cudaFilterModePoint);
+                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Bind the real to voxel matrix to the texture
     mat44 floatingMatrix;

From 6739421d7d9905aed71bd8b45049dee5845d21db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 9 Jun 2023 17:25:09 +0100
Subject: [PATCH 134/314] Move EPS value into reg_test_common.h

---
 niftyreg_build_version.txt                   | 2 +-
 reg-test/reg_test_affineDeformationField.cpp | 2 --
 reg-test/reg_test_common.h                   | 4 ++--
 reg-test/reg_test_conjugateGradient.cpp      | 2 --
 reg-test/reg_test_imageGradient.cpp          | 2 --
 reg-test/reg_test_interpolation.cpp          | 2 --
 reg-test/reg_test_normaliseGradient.cpp      | 2 --
 7 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 63fe24a5..f1aaa905 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-253
+254
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index b2177da1..18e2a202 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -1,7 +1,5 @@
 #include "reg_test_common.h"
 
-#define EPS 0.0001
-
 /*
     This test file contains the following unit tests:
     test function: creation of a deformation field from an affine matrix
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 1a3b35d7..4d5c168f 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -1,5 +1,5 @@
-// Enable testing
-#define NR_TESTING
+#define NR_TESTING  // Enable testing
+#define EPS     0.000001
 
 #include <random>
 #include <catch2/catch_test_macros.hpp>
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index ddf22890..8f61cbd6 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -3,8 +3,6 @@
 
 #include "reg_test_common.h"
 
-#define EPS 0.000001
-
 /*
     This test file contains the following unit tests:
     test functions: conjugate gradient
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 9fb73ca7..06a535cd 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -3,8 +3,6 @@
 
 #include "reg_test_common.h"
 
-#define EPS 0.000001
-
 /*
     This test file contains the following unit tests:
     test function: image gradient
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index d97b9548..7587b499 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -3,8 +3,6 @@
 
 #include "reg_test_common.h"
 
-#define EPS 0.000001
-
 /*
     This test file contains the following unit tests:
     test function: image resampling
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index ef108f2d..73c2fd66 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -3,8 +3,6 @@
 
 #include "reg_test_common.h"
 
-#define EPS 0.000001
-
 /*
     This test file contains the following unit tests:
     test functions:

From eb246e61cf3d46718b6e8641e41167d4883c0dd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 12 Jun 2023 15:37:32 +0100
Subject: [PATCH 135/314] Refactor reg_localTransformation_kernels.cu

---
 niftyreg_build_version.txt                    |    2 +-
 .../cuda/_reg_localTransformation_kernels.cu  | 3466 ++++++++---------
 2 files changed, 1575 insertions(+), 1893 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f1aaa905..ace9d036 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-254
+255
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 329c011f..9c83e29f 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -34,1993 +34,1675 @@ __device__ __constant__ float4 c_AffineMatrix0c;
 __device__ __constant__ float4 c_AffineMatrix1c;
 __device__ __constant__ float4 c_AffineMatrix2c;
 /* *************************************************************** */
-/* *************************************************************** */
 texture<float4, 1, cudaReadModeElementType> controlPointTexture;
 texture<float4, 1, cudaReadModeElementType> secondDerivativesTexture;
-texture<float4,1, cudaReadModeElementType> voxelDeformationTexture;
+texture<float4, 1, cudaReadModeElementType> voxelDeformationTexture;
 texture<int, 1, cudaReadModeElementType> maskTexture;
-texture<float,1, cudaReadModeElementType> jacobianDeterminantTexture;
-texture<float,1, cudaReadModeElementType> jacobianMatricesTexture;
-/* *************************************************************** */
+texture<float, 1, cudaReadModeElementType> jacobianDeterminantTexture;
+texture<float, 1, cudaReadModeElementType> jacobianMatricesTexture;
 /* *************************************************************** */
-__device__ float2 operator*(float a, float2 b){
-	return make_float2(a*b.x, a*b.y);
+__device__ float2 operator*(float a, float2 b) {
+    return make_float2(a * b.x, a * b.y);
 }
-__device__ float3 operator*(float a, float3 b){
-	return make_float3(a*b.x, a*b.y, a*b.z);
+__device__ float3 operator*(float a, float3 b) {
+    return make_float3(a * b.x, a * b.y, a * b.z);
 }
-__device__ float3 operator*(float3 a, float3 b){
-	return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
+__device__ float3 operator*(float3 a, float3 b) {
+    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
 }
-__device__ float4 operator*(float4 a, float4 b){
-	return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+__device__ float4 operator*(float4 a, float4 b) {
+    return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
 }
-__device__ float4 operator*(float a, float4 b){
-	return make_float4(a*b.x, a*b.y, a*b.z, 0.0f);
+__device__ float4 operator*(float a, float4 b) {
+    return make_float4(a * b.x, a * b.y, a * b.z, 0.0f);
 }
 /* *************************************************************** */
-__device__ float2 operator/(float2 a, float2 b){
-	return make_float2(a.x/b.x, a.y/b.y);
+__device__ float2 operator/(float2 a, float2 b) {
+    return make_float2(a.x / b.x, a.y / b.y);
 }
-__device__ float3 operator/(float3 a, float b){
-	return make_float3(a.x/b, a.y/b, a.z/b);
+__device__ float3 operator/(float3 a, float b) {
+    return make_float3(a.x / b, a.y / b, a.z / b);
 }
-__device__ float3 operator/(float3 a, float3 b){
-	return make_float3(a.x/b.x, a.y/b.y, a.z/b.z);
+__device__ float3 operator/(float3 a, float3 b) {
+    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
 }
 /* *************************************************************** */
-__device__ float2 operator+(float2 a, float2 b){
-	return make_float2(a.x+b.x, a.y+b.y);
+__device__ float2 operator+(float2 a, float2 b) {
+    return make_float2(a.x + b.x, a.y + b.y);
 }
-__device__ float4 operator+(float4 a, float4 b){
-	return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, 0.0f);
+__device__ float4 operator+(float4 a, float4 b) {
+    return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, 0.0f);
 }
-__device__ float3 operator+(float3 a, float3 b){
-	return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
+__device__ float3 operator+(float3 a, float3 b) {
+    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
 }
 /* *************************************************************** */
-__device__ float3 operator-(float3 a, float3 b){
-	return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
+__device__ float3 operator-(float3 a, float3 b) {
+    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
 }
-__device__ float4 operator-(float4 a, float4 b){
-	return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, 0.f);
+__device__ float4 operator-(float4 a, float4 b) {
+    return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__device__ void GetBasisBSplineValues(float basis, float *values)
-{
-	float FF= basis*basis;
-	float FFF= FF*basis;
-	float MF=1.f-basis;
-	values[0] = (MF)*(MF)*(MF)/(6.f);
-	values[1] = (3.f*FFF - 6.f*FF + 4.f)/6.f;
-	values[2] = (-3.f*FFF + 3.f*FF + 3.f*basis + 1.f)/6.f;
-	values[3] = (FFF/6.f);
+__device__ void GetBasisBSplineValues(float basis, float *values) {
+    float ff = basis * basis;
+    float fff = ff * basis;
+    float mf = 1.f - basis;
+    values[0] = (mf) * (mf) * (mf) / (6.f);
+    values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f;
+    values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f;
+    values[3] = (fff / 6.f);
 }
 /* *************************************************************** */
-__device__ void GetFirstBSplineValues(float basis, float *values, float *first)
-{
-	GetBasisBSplineValues(basis, values);
-	first[3]= basis * basis / 2.f;
-	first[0]= basis - 0.5f - first[3];
-	first[2]= 1.f + first[0] - 2.f*first[3];
-	first[1]= - first[0] - first[2] - first[3];
+__device__ void GetFirstBSplineValues(float basis, float *values, float *first) {
+    GetBasisBSplineValues(basis, values);
+    first[3] = basis * basis / 2.f;
+    first[0] = basis - 0.5f - first[3];
+    first[2] = 1.f + first[0] - 2.f * first[3];
+    first[1] = -first[0] - first[2] - first[3];
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__device__ void GetBasisSplineValues(float basis, float *values)
-{
-	float FF= basis*basis;
-	values[0] = (basis * ((2.f-basis)*basis - 1.f))/2.f;
-	values[1] = (FF * (3.f*basis-5.f) + 2.f)/2.f;
-	values[2] = (basis * ((4.f-3.f*basis)*basis + 1.f))/2.f;
-	values[3] = (basis-1.f) * FF/2.f;
+__device__ void GetBasisSplineValues(float basis, float *values) {
+    float FF = basis * basis;
+    values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
+    values[1] = (FF * (3.f * basis - 5.f) + 2.f) / 2.f;
+    values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
+    values[3] = (basis - 1.f) * FF / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValuesX(float basis, float4 *values)
-{
-	float FF= basis*basis;
-	values->x = (basis * ((2.f-basis)*basis - 1.f))/2.f;
-	values->y = (FF * (3.f*basis-5.f) + 2.f)/2.f;
-	values->z = (basis * ((4.f-3.f*basis)*basis + 1.f))/2.f;
-	values->w = (basis-1.f) * FF/2.f;
+__device__ void GetBasisSplineValuesX(float basis, float4 *values) {
+    float FF = basis * basis;
+    values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
+    values->y = (FF * (3.f * basis - 5.f) + 2.f) / 2.f;
+    values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
+    values->w = (basis - 1.f) * FF / 2.f;
 }
 /* *************************************************************** */
-__device__ void getBSplineBasisValue(float basis, int index, float *value, float *first)
-{
-	switch(index){
-		case 0:
-			*value = (1.f-basis)*(1.f-basis)*(1.f-basis)/6.f;
-			*first = (2.f*basis - basis*basis - 1.f)/2.f;
-			break;
-		case 1:
-			*value = (3.f*basis*basis*basis - 6.f*basis*basis + 4.f)/6.f;
-			*first = (3.f*basis*basis - 4.f*basis)/2.f;
-			break;
-		case 2:
-			*value = (3.f*basis*basis - 3.f*basis*basis*basis + 3.f*basis + 1.f)/6.f;
-			*first = (2.f*basis - 3.f*basis*basis + 1.f)/2.f;
-			break;
-		case 3:
-			*value = basis*basis*basis/6.f;
-			*first = basis*basis/2.f;
-			break;
-		 default:
-			*value = 0.f;
-			*first = 0.f;
-			break;
-	}
+__device__ void GetBSplineBasisValue(float basis, int index, float *value, float *first) {
+    switch (index) {
+    case 0:
+        *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f;
+        *first = (2.f * basis - basis * basis - 1.f) / 2.f;
+        break;
+    case 1:
+        *value = (3.f * basis * basis * basis - 6.f * basis * basis + 4.f) / 6.f;
+        *first = (3.f * basis * basis - 4.f * basis) / 2.f;
+        break;
+    case 2:
+        *value = (3.f * basis * basis - 3.f * basis * basis * basis + 3.f * basis + 1.f) / 6.f;
+        *first = (2.f * basis - 3.f * basis * basis + 1.f) / 2.f;
+        break;
+    case 3:
+        *value = basis * basis * basis / 6.f;
+        *first = basis * basis / 2.f;
+        break;
+    default:
+        *value = 0.f;
+        *first = 0.f;
+        break;
+    }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues2D(int index,
-												float *xBasis,
-												float *yBasis){
-	switch(index){
-	case 0: xBasis[0]=-0.0833333f;yBasis[0]=-0.0833333f;break;
-	case 1: xBasis[1]=0.f;yBasis[1]=-0.333333f;break;
-	case 2: xBasis[2]=0.0833333f;yBasis[2]=-0.0833333f;break;
-	case 3: xBasis[3]=-0.333333f;yBasis[3]=0.f;break;
-	case 4: xBasis[4]=0.f;yBasis[4]=0.f;break;
-	case 5: xBasis[5]=0.333333f;yBasis[5]=0.f;break;
-	case 6: xBasis[6]=-0.0833333f;yBasis[6]=0.0833333f;break;
-	case 7: xBasis[7]=0.f;yBasis[7]=0.333333f;break;
-	case 8: xBasis[8]=0.0833333f;yBasis[8]=0.0833333f;break;
-	}
+__device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float *yBasis) {
+    switch (index) {
+    case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break;
+    case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break;
+    case 2: xBasis[2] = 0.0833333f; yBasis[2] = -0.0833333f; break;
+    case 3: xBasis[3] = -0.333333f; yBasis[3] = 0.f; break;
+    case 4: xBasis[4] = 0.f; yBasis[4] = 0.f; break;
+    case 5: xBasis[5] = 0.333333f; yBasis[5] = 0.f; break;
+    case 6: xBasis[6] = -0.0833333f; yBasis[6] = 0.0833333f; break;
+    case 7: xBasis[7] = 0.f; yBasis[7] = 0.333333f; break;
+    case 8: xBasis[8] = 0.0833333f; yBasis[8] = 0.0833333f; break;
+    }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues3D(int index,
-												float *xBasis,
-												float *yBasis,
-												float *zBasis){
-	switch(index){
-	case 0: xBasis[0]=-0.013889f;yBasis[0]=-0.013889f;zBasis[0]=-0.013889f;break;
-	case 1: xBasis[1]=0.000000f;yBasis[1]=-0.055556f;zBasis[1]=-0.055556f;break;
-	case 2: xBasis[2]=0.013889f;yBasis[2]=-0.013889f;zBasis[2]=-0.013889f;break;
-	case 3: xBasis[3]=-0.055556f;yBasis[3]=0.000000f;zBasis[3]=-0.055556f;break;
-	case 4: xBasis[4]=0.000000f;yBasis[4]=0.000000f;zBasis[4]=-0.222222f;break;
-	case 5: xBasis[5]=0.055556f;yBasis[5]=0.000000f;zBasis[5]=-0.055556f;break;
-	case 6: xBasis[6]=-0.013889f;yBasis[6]=0.013889f;zBasis[6]=-0.013889f;break;
-	case 7: xBasis[7]=0.000000f;yBasis[7]=0.055556f;zBasis[7]=-0.055556f;break;
-	case 8: xBasis[8]=0.013889f;yBasis[8]=0.013889f;zBasis[8]=-0.013889f;break;
-	case 9: xBasis[9]=-0.055556f;yBasis[9]=-0.055556f;zBasis[9]=0.000000f;break;
-	case 10: xBasis[10]=0.000000f;yBasis[10]=-0.222222f;zBasis[10]=0.000000f;break;
-	case 11: xBasis[11]=0.055556f;yBasis[11]=-0.055556f;zBasis[11]=0.000000f;break;
-	case 12: xBasis[12]=-0.222222f;yBasis[12]=0.000000f;zBasis[12]=0.000000f;break;
-	case 13: xBasis[13]=0.000000f;yBasis[13]=0.000000f;zBasis[13]=0.000000f;break;
-	case 14: xBasis[14]=0.222222f;yBasis[14]=0.000000f;zBasis[14]=0.000000f;break;
-	case 15: xBasis[15]=-0.055556f;yBasis[15]=0.055556f;zBasis[15]=0.000000f;break;
-	case 16: xBasis[16]=0.000000f;yBasis[16]=0.222222f;zBasis[16]=0.000000f;break;
-	case 17: xBasis[17]=0.055556f;yBasis[17]=0.055556f;zBasis[17]=0.000000f;break;
-	case 18: xBasis[18]=-0.013889f;yBasis[18]=-0.013889f;zBasis[18]=0.013889f;break;
-	case 19: xBasis[19]=0.000000f;yBasis[19]=-0.055556f;zBasis[19]=0.055556f;break;
-	case 20: xBasis[20]=0.013889f;yBasis[20]=-0.013889f;zBasis[20]=0.013889f;break;
-	case 21: xBasis[21]=-0.055556f;yBasis[21]=0.000000f;zBasis[21]=0.055556f;break;
-	case 22: xBasis[22]=0.000000f;yBasis[22]=0.000000f;zBasis[22]=0.222222f;break;
-	case 23: xBasis[23]=0.055556f;yBasis[23]=0.000000f;zBasis[23]=0.055556f;break;
-	case 24: xBasis[24]=-0.013889f;yBasis[24]=0.013889f;zBasis[24]=0.013889f;break;
-	case 25: xBasis[25]=0.000000f;yBasis[25]=0.055556f;zBasis[25]=0.055556f;break;
-	case 26: xBasis[26]=0.013889f;yBasis[26]=0.013889f;zBasis[26]=0.013889f;break;
-	}
+__device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float *yBasis, float *zBasis) {
+    switch (index) {
+    case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break;
+    case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break;
+    case 2: xBasis[2] = 0.013889f; yBasis[2] = -0.013889f; zBasis[2] = -0.013889f; break;
+    case 3: xBasis[3] = -0.055556f; yBasis[3] = 0.000000f; zBasis[3] = -0.055556f; break;
+    case 4: xBasis[4] = 0.000000f; yBasis[4] = 0.000000f; zBasis[4] = -0.222222f; break;
+    case 5: xBasis[5] = 0.055556f; yBasis[5] = 0.000000f; zBasis[5] = -0.055556f; break;
+    case 6: xBasis[6] = -0.013889f; yBasis[6] = 0.013889f; zBasis[6] = -0.013889f; break;
+    case 7: xBasis[7] = 0.000000f; yBasis[7] = 0.055556f; zBasis[7] = -0.055556f; break;
+    case 8: xBasis[8] = 0.013889f; yBasis[8] = 0.013889f; zBasis[8] = -0.013889f; break;
+    case 9: xBasis[9] = -0.055556f; yBasis[9] = -0.055556f; zBasis[9] = 0.000000f; break;
+    case 10: xBasis[10] = 0.000000f; yBasis[10] = -0.222222f; zBasis[10] = 0.000000f; break;
+    case 11: xBasis[11] = 0.055556f; yBasis[11] = -0.055556f; zBasis[11] = 0.000000f; break;
+    case 12: xBasis[12] = -0.222222f; yBasis[12] = 0.000000f; zBasis[12] = 0.000000f; break;
+    case 13: xBasis[13] = 0.000000f; yBasis[13] = 0.000000f; zBasis[13] = 0.000000f; break;
+    case 14: xBasis[14] = 0.222222f; yBasis[14] = 0.000000f; zBasis[14] = 0.000000f; break;
+    case 15: xBasis[15] = -0.055556f; yBasis[15] = 0.055556f; zBasis[15] = 0.000000f; break;
+    case 16: xBasis[16] = 0.000000f; yBasis[16] = 0.222222f; zBasis[16] = 0.000000f; break;
+    case 17: xBasis[17] = 0.055556f; yBasis[17] = 0.055556f; zBasis[17] = 0.000000f; break;
+    case 18: xBasis[18] = -0.013889f; yBasis[18] = -0.013889f; zBasis[18] = 0.013889f; break;
+    case 19: xBasis[19] = 0.000000f; yBasis[19] = -0.055556f; zBasis[19] = 0.055556f; break;
+    case 20: xBasis[20] = 0.013889f; yBasis[20] = -0.013889f; zBasis[20] = 0.013889f; break;
+    case 21: xBasis[21] = -0.055556f; yBasis[21] = 0.000000f; zBasis[21] = 0.055556f; break;
+    case 22: xBasis[22] = 0.000000f; yBasis[22] = 0.000000f; zBasis[22] = 0.222222f; break;
+    case 23: xBasis[23] = 0.055556f; yBasis[23] = 0.000000f; zBasis[23] = 0.055556f; break;
+    case 24: xBasis[24] = -0.013889f; yBasis[24] = 0.013889f; zBasis[24] = 0.013889f; break;
+    case 25: xBasis[25] = 0.000000f; yBasis[25] = 0.055556f; zBasis[25] = 0.055556f; break;
+    case 26: xBasis[26] = 0.013889f; yBasis[26] = 0.013889f; zBasis[26] = 0.013889f; break;
+    }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues2D(int index,
-												 float *xxBasis,
-												 float *yyBasis,
-												 float *xyBasis){
-	switch(index){
-	case 0:
-		xxBasis[0]=0.166667f;yyBasis[0]=0.166667f;xyBasis[0]=0.25f;
-		break;
-	case 1:
-		xxBasis[1]=-0.333333f;yyBasis[1]=0.666667f;xyBasis[1]=-0.f;
-		break;
-	case 2:
-		xxBasis[2]=0.166667f;yyBasis[2]=0.166667f;xyBasis[2]=-0.25f;
-		break;
-	case 3:
-		xxBasis[3]=0.666667f;yyBasis[3]=-0.333333f;xyBasis[3]=-0.f;
-		break;
-	case 4:
-		xxBasis[4]=-1.33333f;yyBasis[4]=-1.33333f;xyBasis[4]=0.f;
-		break;
-	case 5:
-		xxBasis[5]=0.666667f;yyBasis[5]=-0.333333f;xyBasis[5]=0.f;
-		break;
-	case 6:
-		xxBasis[6]=0.166667f;yyBasis[6]=0.166667f;xyBasis[6]=-0.25f;
-		break;
-	case 7:
-		xxBasis[7]=-0.333333f;yyBasis[7]=0.666667f;xyBasis[7]=0.f;
-		break;
-	case 8:
-		xxBasis[8]=0.166667f;yyBasis[8]=0.166667f;xyBasis[8]=0.25f;
-		break;
-	}
+__device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, float *yyBasis, float *xyBasis) {
+    switch (index) {
+    case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break;
+    case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break;
+    case 2: xxBasis[2] = 0.166667f; yyBasis[2] = 0.166667f; xyBasis[2] = -0.25f; break;
+    case 3: xxBasis[3] = 0.666667f; yyBasis[3] = -0.333333f; xyBasis[3] = -0.f; break;
+    case 4: xxBasis[4] = -1.33333f; yyBasis[4] = -1.33333f; xyBasis[4] = 0.f; break;
+    case 5: xxBasis[5] = 0.666667f; yyBasis[5] = -0.333333f; xyBasis[5] = 0.f; break;
+    case 6: xxBasis[6] = 0.166667f; yyBasis[6] = 0.166667f; xyBasis[6] = -0.25f; break;
+    case 7: xxBasis[7] = -0.333333f; yyBasis[7] = 0.666667f; xyBasis[7] = 0.f; break;
+    case 8: xxBasis[8] = 0.166667f; yyBasis[8] = 0.166667f; xyBasis[8] = 0.25f; break;
+    }
 }
 /* *************************************************************** */
 __device__ void GetSecondDerivativeBasisValues3D(int index,
-												 float *xxBasis,
-												 float *yyBasis,
-												 float *zzBasis,
-												 float *xyBasis,
-												 float *yzBasis,
-												 float *xzBasis){
-	switch(index){
-		case 0:
-			xxBasis[0]=0.027778f;yyBasis[0]=0.027778f;zzBasis[0]=0.027778f;
-			xyBasis[0]=0.041667f;yzBasis[0]=0.041667f;xzBasis[0]=0.041667f;
-			break;
-		case 1:
-			xxBasis[1]=-0.055556f;yyBasis[1]=0.111111f;zzBasis[1]=0.111111f;
-			xyBasis[1]=-0.000000f;yzBasis[1]=0.166667f;xzBasis[1]=-0.000000f;
-			break;
-		case 2:
-			xxBasis[2]=0.027778f;yyBasis[2]=0.027778f;zzBasis[2]=0.027778f;
-			xyBasis[2]=-0.041667f;yzBasis[2]=0.041667f;xzBasis[2]=-0.041667f;
-			break;
-		case 3:
-			xxBasis[3]=0.111111f;yyBasis[3]=-0.055556f;zzBasis[3]=0.111111f;
-			xyBasis[3]=-0.000000f;yzBasis[3]=-0.000000f;xzBasis[3]=0.166667f;
-			break;
-		case 4:
-			xxBasis[4]=-0.222222f;yyBasis[4]=-0.222222f;zzBasis[4]=0.444444f;
-			xyBasis[4]=0.000000f;yzBasis[4]=-0.000000f;xzBasis[4]=-0.000000f;
-			break;
-		case 5:
-			xxBasis[5]=0.111111f;yyBasis[5]=-0.055556f;zzBasis[5]=0.111111f;
-			xyBasis[5]=0.000000f;yzBasis[5]=-0.000000f;xzBasis[5]=-0.166667f;
-			break;
-		case 6:
-			xxBasis[6]=0.027778f;yyBasis[6]=0.027778f;zzBasis[6]=0.027778f;
-			xyBasis[6]=-0.041667f;yzBasis[6]=-0.041667f;xzBasis[6]=0.041667f;
-			break;
-		case 7:
-			xxBasis[7]=-0.055556f;yyBasis[7]=0.111111f;zzBasis[7]=0.111111f;
-			xyBasis[7]=0.000000f;yzBasis[7]=-0.166667f;xzBasis[7]=-0.000000f;
-			break;
-		case 8:
-			xxBasis[8]=0.027778f;yyBasis[8]=0.027778f;zzBasis[8]=0.027778f;
-			xyBasis[8]=0.041667f;yzBasis[8]=-0.041667f;xzBasis[8]=-0.041667f;
-			break;
-		case 9:
-			xxBasis[9]=0.111111f;yyBasis[9]=0.111111f;zzBasis[9]=-0.055556f;
-			xyBasis[9]=0.166667f;yzBasis[9]=-0.000000f;xzBasis[9]=-0.000000f;
-			break;
-		case 10:
-			xxBasis[10]=-0.222222f;yyBasis[10]=0.444444f;zzBasis[10]=-0.222222f;
-			xyBasis[10]=-0.000000f;yzBasis[10]=-0.000000f;xzBasis[10]=0.000000f;
-			break;
-		case 11:
-			xxBasis[11]=0.111111f;yyBasis[11]=0.111111f;zzBasis[11]=-0.055556f;
-			xyBasis[11]=-0.166667f;yzBasis[11]=-0.000000f;xzBasis[11]=0.000000f;
-			break;
-		case 12:
-			xxBasis[12]=0.444444f;yyBasis[12]=-0.222222f;zzBasis[12]=-0.222222f;
-			xyBasis[12]=-0.000000f;yzBasis[12]=0.000000f;xzBasis[12]=-0.000000f;
-			break;
-		case 13:
-			xxBasis[13]=-0.888889f;yyBasis[13]=-0.888889f;zzBasis[13]=-0.888889f;
-			xyBasis[13]=0.000000f;yzBasis[13]=0.000000f;xzBasis[13]=0.000000f;
-			break;
-		case 14:
-			xxBasis[14]=0.444444f;yyBasis[14]=-0.222222f;zzBasis[14]=-0.222222f;
-			xyBasis[14]=0.000000f;yzBasis[14]=0.000000f;xzBasis[14]=0.000000f;
-			break;
-		case 15:
-			xxBasis[15]=0.111111f;yyBasis[15]=0.111111f;zzBasis[15]=-0.055556f;
-			xyBasis[15]=-0.166667f;yzBasis[15]=0.000000f;xzBasis[15]=-0.000000f;
-			break;
-		case 16:
-			xxBasis[16]=-0.222222f;yyBasis[16]=0.444444f;zzBasis[16]=-0.222222f;
-			xyBasis[16]=0.000000f;yzBasis[16]=0.000000f;xzBasis[16]=0.000000f;
-			break;
-		case 17:
-			xxBasis[17]=0.111111f;yyBasis[17]=0.111111f;zzBasis[17]=-0.055556f;
-			xyBasis[17]=0.166667f;yzBasis[17]=0.000000f;xzBasis[17]=0.000000f;
-			break;
-		case 18:
-			xxBasis[18]=0.027778f;yyBasis[18]=0.027778f;zzBasis[18]=0.027778f;
-			xyBasis[18]=0.041667f;yzBasis[18]=-0.041667f;xzBasis[18]=-0.041667f;
-			break;
-		case 19:
-			xxBasis[19]=-0.055556f;yyBasis[19]=0.111111f;zzBasis[19]=0.111111f;
-			xyBasis[19]=-0.000000f;yzBasis[19]=-0.166667f;xzBasis[19]=0.000000f;
-			break;
-		case 20:
-			xxBasis[20]=0.027778f;yyBasis[20]=0.027778f;zzBasis[20]=0.027778f;
-			xyBasis[20]=-0.041667f;yzBasis[20]=-0.041667f;xzBasis[20]=0.041667f;
-			break;
-		case 21:
-			xxBasis[21]=0.111111f;yyBasis[21]=-0.055556f;zzBasis[21]=0.111111f;
-			xyBasis[21]=-0.000000f;yzBasis[21]=0.000000f;xzBasis[21]=-0.166667f;
-			break;
-		case 22:
-			xxBasis[22]=-0.222222f;yyBasis[22]=-0.222222f;zzBasis[22]=0.444444f;
-			xyBasis[22]=0.000000f;yzBasis[22]=0.000000f;xzBasis[22]=0.000000f;
-			break;
-		case 23:
-			xxBasis[23]=0.111111f;yyBasis[23]=-0.055556f;zzBasis[23]=0.111111f;
-			xyBasis[23]=0.000000f;yzBasis[23]=0.000000f;xzBasis[23]=0.166667f;
-			break;
-		case 24:
-			xxBasis[24]=0.027778f;yyBasis[24]=0.027778f;zzBasis[24]=0.027778f;
-			xyBasis[24]=-0.041667f;yzBasis[24]=0.041667f;xzBasis[24]=-0.041667f;
-			break;
-		case 25:
-			xxBasis[25]=-0.055556f;yyBasis[25]=0.111111f;zzBasis[25]=0.111111f;
-			xyBasis[25]=0.000000f;yzBasis[25]=0.166667f;xzBasis[25]=0.000000f;
-			break;
-		case 26:
-			xxBasis[26]=0.027778f;yyBasis[26]=0.027778f;zzBasis[26]=0.027778f;
-			xyBasis[26]=0.041667f;yzBasis[26]=0.041667f;xzBasis[26]=0.041667f;
-			break;
-	}
+                                                 float *xxBasis,
+                                                 float *yyBasis,
+                                                 float *zzBasis,
+                                                 float *xyBasis,
+                                                 float *yzBasis,
+                                                 float *xzBasis) {
+    switch (index) {
+    case 0:
+        xxBasis[0] = 0.027778f; yyBasis[0] = 0.027778f; zzBasis[0] = 0.027778f;
+        xyBasis[0] = 0.041667f; yzBasis[0] = 0.041667f; xzBasis[0] = 0.041667f;
+        break;
+    case 1:
+        xxBasis[1] = -0.055556f; yyBasis[1] = 0.111111f; zzBasis[1] = 0.111111f;
+        xyBasis[1] = -0.000000f; yzBasis[1] = 0.166667f; xzBasis[1] = -0.000000f;
+        break;
+    case 2:
+        xxBasis[2] = 0.027778f; yyBasis[2] = 0.027778f; zzBasis[2] = 0.027778f;
+        xyBasis[2] = -0.041667f; yzBasis[2] = 0.041667f; xzBasis[2] = -0.041667f;
+        break;
+    case 3:
+        xxBasis[3] = 0.111111f; yyBasis[3] = -0.055556f; zzBasis[3] = 0.111111f;
+        xyBasis[3] = -0.000000f; yzBasis[3] = -0.000000f; xzBasis[3] = 0.166667f;
+        break;
+    case 4:
+        xxBasis[4] = -0.222222f; yyBasis[4] = -0.222222f; zzBasis[4] = 0.444444f;
+        xyBasis[4] = 0.000000f; yzBasis[4] = -0.000000f; xzBasis[4] = -0.000000f;
+        break;
+    case 5:
+        xxBasis[5] = 0.111111f; yyBasis[5] = -0.055556f; zzBasis[5] = 0.111111f;
+        xyBasis[5] = 0.000000f; yzBasis[5] = -0.000000f; xzBasis[5] = -0.166667f;
+        break;
+    case 6:
+        xxBasis[6] = 0.027778f; yyBasis[6] = 0.027778f; zzBasis[6] = 0.027778f;
+        xyBasis[6] = -0.041667f; yzBasis[6] = -0.041667f; xzBasis[6] = 0.041667f;
+        break;
+    case 7:
+        xxBasis[7] = -0.055556f; yyBasis[7] = 0.111111f; zzBasis[7] = 0.111111f;
+        xyBasis[7] = 0.000000f; yzBasis[7] = -0.166667f; xzBasis[7] = -0.000000f;
+        break;
+    case 8:
+        xxBasis[8] = 0.027778f; yyBasis[8] = 0.027778f; zzBasis[8] = 0.027778f;
+        xyBasis[8] = 0.041667f; yzBasis[8] = -0.041667f; xzBasis[8] = -0.041667f;
+        break;
+    case 9:
+        xxBasis[9] = 0.111111f; yyBasis[9] = 0.111111f; zzBasis[9] = -0.055556f;
+        xyBasis[9] = 0.166667f; yzBasis[9] = -0.000000f; xzBasis[9] = -0.000000f;
+        break;
+    case 10:
+        xxBasis[10] = -0.222222f; yyBasis[10] = 0.444444f; zzBasis[10] = -0.222222f;
+        xyBasis[10] = -0.000000f; yzBasis[10] = -0.000000f; xzBasis[10] = 0.000000f;
+        break;
+    case 11:
+        xxBasis[11] = 0.111111f; yyBasis[11] = 0.111111f; zzBasis[11] = -0.055556f;
+        xyBasis[11] = -0.166667f; yzBasis[11] = -0.000000f; xzBasis[11] = 0.000000f;
+        break;
+    case 12:
+        xxBasis[12] = 0.444444f; yyBasis[12] = -0.222222f; zzBasis[12] = -0.222222f;
+        xyBasis[12] = -0.000000f; yzBasis[12] = 0.000000f; xzBasis[12] = -0.000000f;
+        break;
+    case 13:
+        xxBasis[13] = -0.888889f; yyBasis[13] = -0.888889f; zzBasis[13] = -0.888889f;
+        xyBasis[13] = 0.000000f; yzBasis[13] = 0.000000f; xzBasis[13] = 0.000000f;
+        break;
+    case 14:
+        xxBasis[14] = 0.444444f; yyBasis[14] = -0.222222f; zzBasis[14] = -0.222222f;
+        xyBasis[14] = 0.000000f; yzBasis[14] = 0.000000f; xzBasis[14] = 0.000000f;
+        break;
+    case 15:
+        xxBasis[15] = 0.111111f; yyBasis[15] = 0.111111f; zzBasis[15] = -0.055556f;
+        xyBasis[15] = -0.166667f; yzBasis[15] = 0.000000f; xzBasis[15] = -0.000000f;
+        break;
+    case 16:
+        xxBasis[16] = -0.222222f; yyBasis[16] = 0.444444f; zzBasis[16] = -0.222222f;
+        xyBasis[16] = 0.000000f; yzBasis[16] = 0.000000f; xzBasis[16] = 0.000000f;
+        break;
+    case 17:
+        xxBasis[17] = 0.111111f; yyBasis[17] = 0.111111f; zzBasis[17] = -0.055556f;
+        xyBasis[17] = 0.166667f; yzBasis[17] = 0.000000f; xzBasis[17] = 0.000000f;
+        break;
+    case 18:
+        xxBasis[18] = 0.027778f; yyBasis[18] = 0.027778f; zzBasis[18] = 0.027778f;
+        xyBasis[18] = 0.041667f; yzBasis[18] = -0.041667f; xzBasis[18] = -0.041667f;
+        break;
+    case 19:
+        xxBasis[19] = -0.055556f; yyBasis[19] = 0.111111f; zzBasis[19] = 0.111111f;
+        xyBasis[19] = -0.000000f; yzBasis[19] = -0.166667f; xzBasis[19] = 0.000000f;
+        break;
+    case 20:
+        xxBasis[20] = 0.027778f; yyBasis[20] = 0.027778f; zzBasis[20] = 0.027778f;
+        xyBasis[20] = -0.041667f; yzBasis[20] = -0.041667f; xzBasis[20] = 0.041667f;
+        break;
+    case 21:
+        xxBasis[21] = 0.111111f; yyBasis[21] = -0.055556f; zzBasis[21] = 0.111111f;
+        xyBasis[21] = -0.000000f; yzBasis[21] = 0.000000f; xzBasis[21] = -0.166667f;
+        break;
+    case 22:
+        xxBasis[22] = -0.222222f; yyBasis[22] = -0.222222f; zzBasis[22] = 0.444444f;
+        xyBasis[22] = 0.000000f; yzBasis[22] = 0.000000f; xzBasis[22] = 0.000000f;
+        break;
+    case 23:
+        xxBasis[23] = 0.111111f; yyBasis[23] = -0.055556f; zzBasis[23] = 0.111111f;
+        xyBasis[23] = 0.000000f; yzBasis[23] = 0.000000f; xzBasis[23] = 0.166667f;
+        break;
+    case 24:
+        xxBasis[24] = 0.027778f; yyBasis[24] = 0.027778f; zzBasis[24] = 0.027778f;
+        xyBasis[24] = -0.041667f; yzBasis[24] = 0.041667f; xzBasis[24] = -0.041667f;
+        break;
+    case 25:
+        xxBasis[25] = -0.055556f; yyBasis[25] = 0.111111f; zzBasis[25] = 0.111111f;
+        xyBasis[25] = 0.000000f; yzBasis[25] = 0.166667f; xzBasis[25] = 0.000000f;
+        break;
+    case 26:
+        xxBasis[26] = 0.027778f; yyBasis[26] = 0.027778f; zzBasis[26] = 0.027778f;
+        xyBasis[26] = 0.041667f; yzBasis[26] = 0.041667f; xzBasis[26] = 0.041667f;
+        break;
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__device__ float4 get_SlidedValues_gpu(int x, int y)
-{
-	int newX=x;
-	int newY=y;
-	if(x<0){
-		newX=0;
-	}
-	else if(x>=c_ReferenceImageDim.x){
-		newX=c_ReferenceImageDim.x-1;
-	}
-	if(y<0){
-		newY=0;
-	}
-	else if(y>=c_ReferenceImageDim.y){
-		newY=c_ReferenceImageDim.y-1;
-	}
-
-	x=x-newX;
-	y=y-newY;
-	float4 slidedValues = make_float4(
-				x * c_AffineMatrix0c.x +
-				y * c_AffineMatrix0c.y,
-				x * c_AffineMatrix1c.x +
-				y * c_AffineMatrix1c.y,
-				0.f,
-				0.f);
-	slidedValues = slidedValues +
-			tex1Dfetch(voxelDeformationTexture,
-					   newY*c_ReferenceImageDim.x+newX);
-	return slidedValues;
+__device__ float4 GetSlidedValues(int x, int y) {
+    int newX = x;
+    int newY = y;
+    if (x < 0) {
+        newX = 0;
+    } else if (x >= c_ReferenceImageDim.x) {
+        newX = c_ReferenceImageDim.x - 1;
+    }
+    if (y < 0) {
+        newY = 0;
+    } else if (y >= c_ReferenceImageDim.y) {
+        newY = c_ReferenceImageDim.y - 1;
+    }
+
+    x -= newX;
+    y -= newY;
+    const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y,
+                                            x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y,
+                                            0.f, 0.f);
+    return slidedValues + tex1Dfetch(voxelDeformationTexture, newY * c_ReferenceImageDim.x + newX);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__device__ float4 get_SlidedValues_gpu(int x, int y, int z)
-{
-	int newX=x;
-	int newY=y;
-	int newZ=z;
-	if(x<0){
-		newX=0;
-	}
-	else if(x>=c_ReferenceImageDim.x){
-		newX=c_ReferenceImageDim.x-1;
-	}
-	if(y<0){
-		newY=0;
-	}
-	else if(y>=c_ReferenceImageDim.y){
-		newY=c_ReferenceImageDim.y-1;
-	}
-	if(z<0){
-		newZ=0;
-	}
-	else if(z>=c_ReferenceImageDim.z){
-		newZ=c_ReferenceImageDim.z-1;
-	}
-
-	x=x-newX;
-	y=y-newY;
-	z=z-newZ;
-	float4 slidedValues = make_float4(
-				x * c_AffineMatrix0c.x +
-				y * c_AffineMatrix0c.y +
-				z * c_AffineMatrix0c.z,
-				x * c_AffineMatrix1c.x +
-				y * c_AffineMatrix1c.y +
-				z * c_AffineMatrix1c.z,
-				x * c_AffineMatrix2c.x +
-				y * c_AffineMatrix2c.y +
-				z * c_AffineMatrix2c.z,
-				0.f);
-	slidedValues = slidedValues +
-			tex1Dfetch(voxelDeformationTexture,
-					   (newZ*c_ReferenceImageDim.y+newY)*c_ReferenceImageDim.x+newX);
-	return slidedValues;
+__device__ float4 GetSlidedValues(int x, int y, int z) {
+    int newX = x;
+    int newY = y;
+    int newZ = z;
+    if (x < 0) {
+        newX = 0;
+    } else if (x >= c_ReferenceImageDim.x) {
+        newX = c_ReferenceImageDim.x - 1;
+    }
+    if (y < 0) {
+        newY = 0;
+    } else if (y >= c_ReferenceImageDim.y) {
+        newY = c_ReferenceImageDim.y - 1;
+    }
+    if (z < 0) {
+        newZ = 0;
+    } else if (z >= c_ReferenceImageDim.z) {
+        newZ = c_ReferenceImageDim.z - 1;
+    }
+
+    x -= newX;
+    y -= newY;
+    z -= newZ;
+    const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y + z * c_AffineMatrix0c.z,
+                                            x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y + z * c_AffineMatrix1c.z,
+                                            x * c_AffineMatrix2c.x + y * c_AffineMatrix2c.y + z * c_AffineMatrix2c.z,
+                                            0.f);
+    return slidedValues + tex1Dfetch(voxelDeformationTexture, (newZ * c_ReferenceImageDim.y + newY) * c_ReferenceImageDim.x + newX);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getDeformationField3D(float4 *positionField)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ActiveVoxelNumber){
-
-		// Allocate the shared memory
-		extern __shared__ float yBasis[];
-		// Compute the shared memory offset which correspond to four times the number of thread per block
-		float *zBasis=&yBasis[4*blockDim.x*blockDim.y*blockDim.z];
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tex1Dfetch(maskTexture,tid);
-		const int z = tempIndex/(imageSize.x*imageSize.y);
-		tempIndex  -= z*imageSize.x*imageSize.y;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		// the "nearest previous" node is determined [0,0,0]
-		int3 nodeAnte;
-		float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
-		nodeAnte.x = (int)floorf((float)x/gridVoxelSpacing.x);
-		nodeAnte.y = (int)floorf((float)y/gridVoxelSpacing.y);
-		nodeAnte.z = (int)floorf((float)z/gridVoxelSpacing.z);
-
-		const int shareMemIndex = 4*threadIdx.x;
-
-		// Z basis values
-		float relative = fabsf((float)z/gridVoxelSpacing.z-(float)nodeAnte.z);
-		relative=relative>0?relative:0.f;
-		if(c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]);
-		else GetBasisSplineValues(relative, &zBasis[shareMemIndex]);
-		// Y basis values
-		relative = fabsf((float)y/gridVoxelSpacing.y-(float)nodeAnte.y);
-		relative=relative>0?relative:0.f;
-		if(c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
-		else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
-		// X basis values
-		float xBasis[4];
-		relative = fabsf((float)x/gridVoxelSpacing.x-(float)nodeAnte.x);
-		relative=relative>0?relative:0.f;
-		if(c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
-		else GetBasisSplineValues(relative, xBasis);
-
-		int3 controlPointImageDim = c_ControlPointImageDim;
-		float4 displacement=make_float4(0.0f,0.0f,0.0f,0.0f);
-		float basis;
-		float3 tempDisplacement;
-
-		for(int c=0; c<4; c++){
-			tempDisplacement=make_float3(0.0f,0.0f,0.0f);
-			int indexYZ= ( (nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x;
-			for(int b=0; b<4; b++){
-
-				int indexXYZ = indexYZ + nodeAnte.x;
-				float4 nodeCoefficientA = tex1Dfetch(controlPointTexture,indexXYZ++);
-				float4 nodeCoefficientB = tex1Dfetch(controlPointTexture,indexXYZ++);
-				float4 nodeCoefficientC = tex1Dfetch(controlPointTexture,indexXYZ++);
-				float4 nodeCoefficientD = tex1Dfetch(controlPointTexture,indexXYZ);
-
-				basis=yBasis[shareMemIndex+b];
-				tempDisplacement.x += (
-							nodeCoefficientA.x * xBasis[0] +
-							nodeCoefficientB.x * xBasis[1] +
-							nodeCoefficientC.x * xBasis[2] +
-							nodeCoefficientD.x * xBasis[3] ) * basis;
-
-				tempDisplacement.y += (
-							nodeCoefficientA.y * xBasis[0] +
-							nodeCoefficientB.y * xBasis[1] +
-							nodeCoefficientC.y * xBasis[2] +
-							nodeCoefficientD.y * xBasis[3] ) * basis;
-
-				tempDisplacement.z += (
-							nodeCoefficientA.z * xBasis[0] +
-							nodeCoefficientB.z * xBasis[1] +
-							nodeCoefficientC.z * xBasis[2] +
-							nodeCoefficientD.z * xBasis[3] ) * basis;
-
-				indexYZ += controlPointImageDim.x;
-			}
-
-			basis = zBasis[shareMemIndex+c];
-			displacement.x += tempDisplacement.x * basis;
-			displacement.y += tempDisplacement.y * basis;
-			displacement.z += tempDisplacement.z * basis;
-		}
-		positionField[tid] = displacement;
-	}
-	return;
+__global__ void reg_spline_getDeformationField3D(float4 *positionField) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ActiveVoxelNumber) {
+        // Allocate the shared memory
+        extern __shared__ float yBasis[];
+        // Compute the shared memory offset which corresponds to four times the number of thread per block
+        float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
+
+        const int3 imageSize = c_ReferenceImageDim;
+
+        int tempIndex = tex1Dfetch(maskTexture, tid);
+        const int z = tempIndex / (imageSize.x * imageSize.y);
+        tempIndex -= z * imageSize.x * imageSize.y;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        // the "nearest previous" node is determined [0,0,0]
+        const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
+        const int3 nodeAnte = {
+            (int)floorf((float)x / gridVoxelSpacing.x),
+            (int)floorf((float)y / gridVoxelSpacing.y),
+            (int)floorf((float)z / gridVoxelSpacing.z)
+        };
+
+        const unsigned shareMemIndex = 4 * threadIdx.x;
+
+        // Z basis values
+        float relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z);
+        relative = relative > 0 ? relative : 0.f;
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]);
+        else GetBasisSplineValues(relative, &zBasis[shareMemIndex]);
+        // Y basis values
+        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        relative = relative > 0 ? relative : 0.f;
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
+        else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
+        // X basis values
+        float xBasis[4];
+        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        relative = relative > 0 ? relative : 0.f;
+        if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
+        else GetBasisSplineValues(relative, xBasis);
+
+        const int3 controlPointImageDim = c_ControlPointImageDim;
+        float4 displacement{};
+        float basis;
+
+        for (int c = 0; c < 4; c++) {
+            float3 tempDisplacement{};
+            int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x;
+            for (int b = 0; b < 4; b++) {
+                int indexXYZ = indexYZ + nodeAnte.x;
+                const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ);
+
+                basis = yBasis[shareMemIndex + b];
+                tempDisplacement.x += (nodeCoefficientA.x * xBasis[0] +
+                                       nodeCoefficientB.x * xBasis[1] +
+                                       nodeCoefficientC.x * xBasis[2] +
+                                       nodeCoefficientD.x * xBasis[3]) * basis;
+
+                tempDisplacement.y += (nodeCoefficientA.y * xBasis[0] +
+                                       nodeCoefficientB.y * xBasis[1] +
+                                       nodeCoefficientC.y * xBasis[2] +
+                                       nodeCoefficientD.y * xBasis[3]) * basis;
+
+                tempDisplacement.z += (nodeCoefficientA.z * xBasis[0] +
+                                       nodeCoefficientB.z * xBasis[1] +
+                                       nodeCoefficientC.z * xBasis[2] +
+                                       nodeCoefficientD.z * xBasis[3]) * basis;
+
+                indexYZ += controlPointImageDim.x;
+            }
+
+            basis = zBasis[shareMemIndex + c];
+            displacement.x += tempDisplacement.x * basis;
+            displacement.y += tempDisplacement.y * basis;
+            displacement.z += tempDisplacement.z * basis;
+        }
+
+        positionField[tid] = displacement;
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getDeformationField2D(float4 *positionField)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ActiveVoxelNumber){
-
-		// Allocate the shared memory
-		extern __shared__ float yBasis[];
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tex1Dfetch(maskTexture,tid);
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		// the "nearest previous" node is determined [0,0,0]
-		int2 nodeAnte;
-		float2 gridVoxelSpacing = make_float2(c_ControlPointVoxelSpacing.x,
-											  c_ControlPointVoxelSpacing.y);
-		nodeAnte.x = (int)floorf((float)x/gridVoxelSpacing.x);
-		nodeAnte.y = (int)floorf((float)y/gridVoxelSpacing.y);
-
-		const int shareMemIndex = 4*threadIdx.x;
-
-		// Y basis values
-		float relative = fabsf((float)y/gridVoxelSpacing.y-(float)nodeAnte.y);
-		if(c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
-		else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
-		// X basis values
-		float xBasis[4];
-		relative = fabsf((float)x/gridVoxelSpacing.x-(float)nodeAnte.x);
-		if(c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
-		else GetBasisSplineValues(relative, xBasis);
-
-		int2 controlPointImageDim = make_int2(c_ControlPointImageDim.x,
-											  c_ControlPointImageDim.y);
-		float4 displacement=make_float4(0.0f,0.0f,0.0f,0.0f);
-		float basis;
-
-		for(int b=0; b<4; b++){
-			int index =  (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
-
-			float4 nodeCoefficientA = tex1Dfetch(controlPointTexture,index++);
-			float4 nodeCoefficientB = tex1Dfetch(controlPointTexture,index++);
-			float4 nodeCoefficientC = tex1Dfetch(controlPointTexture,index++);
-			float4 nodeCoefficientD = tex1Dfetch(controlPointTexture,index);
-
-			basis=yBasis[shareMemIndex+b];
-			displacement.x += basis * (
-						nodeCoefficientA.x * xBasis[0] +
-						nodeCoefficientB.x * xBasis[1] +
-						nodeCoefficientC.x * xBasis[2] +
-						nodeCoefficientD.x * xBasis[3]);
-
-			displacement.y += basis * (
-						nodeCoefficientA.y * xBasis[0] +
-						nodeCoefficientB.y * xBasis[1] +
-						nodeCoefficientC.y * xBasis[2] +
-						nodeCoefficientD.y * xBasis[3]);
-
-		}
-		positionField[tid] = displacement;
-	}
-	return;
+__global__ void reg_spline_getDeformationField2D(float4 *positionField) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ActiveVoxelNumber) {
+        // Allocate the shared memory
+        extern __shared__ float yBasis[];
+
+        const int3 imageSize = c_ReferenceImageDim;
+
+        const int tempIndex = tex1Dfetch(maskTexture, tid);
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        // the "nearest previous" node is determined [0,0,0]
+        const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y };
+        const int2 nodeAnte = {
+            (int)floorf((float)x / gridVoxelSpacing.x),
+            (int)floorf((float)y / gridVoxelSpacing.y)
+        };
+
+        const unsigned shareMemIndex = 4 * threadIdx.x;
+
+        // Y basis values
+        float relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
+        else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
+        // X basis values
+        float xBasis[4];
+        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
+        else GetBasisSplineValues(relative, xBasis);
+
+        const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y };
+        float4 displacement{};
+        float basis;
+
+        for (int b = 0; b < 4; b++) {
+            int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
+
+            const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, index++);
+            const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, index++);
+            const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++);
+            const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index);
+
+            basis = yBasis[shareMemIndex + b];
+            displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
+                                       nodeCoefficientB.x * xBasis[1] +
+                                       nodeCoefficientC.x * xBasis[2] +
+                                       nodeCoefficientD.x * xBasis[3]);
+
+            displacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
+                                       nodeCoefficientB.y * xBasis[1] +
+                                       nodeCoefficientC.y * xBasis[2] +
+                                       nodeCoefficientD.y * xBasis[3]);
+        }
+
+        positionField[tid] = displacement;
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues)
-{
-	__shared__ float xxbasis[9];
-	__shared__ float yybasis[9];
-	__shared__ float xybasis[9];
-
-	if(threadIdx.x<9)
-		GetSecondDerivativeBasisValues2D(threadIdx.x,
-										 xxbasis,
-										 yybasis,
-										 xybasis);
-	__syncthreads();
-
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		const int y =(int)(tid/gridSize.x);
-		const int x = int(tid - y*gridSize.x);
-
-		float4 XX = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 YY = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 XY = make_float4(0.0f,0.0f,0.0f,0.0f);
-
-		int tempIndex;
-		if(0<x && x<gridSize.x-1 &&
-		   0<y && y<gridSize.y-1){
-
-			tempIndex=0;
-			for(int b=y-1; b<y+2; ++b){
-				for(int a=x-1; a<x+2; ++a){
-					int indexXY = b*gridSize.x+a;
-					float4 controlPointValues = tex1Dfetch(controlPointTexture,indexXY);
-					XX.x = XX.x + xxbasis[tempIndex] * controlPointValues.x;
-					XX.y = XX.y + xxbasis[tempIndex] * controlPointValues.y;
-					YY.x = YY.x + yybasis[tempIndex] * controlPointValues.x;
-					YY.y = YY.y + yybasis[tempIndex] * controlPointValues.y;
-					XY.x = XY.x + xybasis[tempIndex] * controlPointValues.x;
-					XY.y = XY.y + xybasis[tempIndex] * controlPointValues.y;
-					++tempIndex;
-				}
-			}
-		}
-
-		tempIndex=3*tid;
-		secondDerivativeValues[tempIndex++]=XX;
-		secondDerivativeValues[tempIndex++]=YY;
-		secondDerivativeValues[tempIndex] = XY;
-	}
-	return;
+__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues) {
+    __shared__ float xxbasis[9];
+    __shared__ float yybasis[9];
+    __shared__ float xybasis[9];
+
+    if (threadIdx.x < 9)
+        GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        const int y = tid / gridSize.x;
+        const int x = tid - y * gridSize.x;
+
+        float4 xx{};
+        float4 yy{};
+        float4 xy{};
+
+        unsigned tempIndex;
+        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) {
+            tempIndex = 0;
+            for (int b = y - 1; b < y + 2; ++b) {
+                for (int a = x - 1; a < x + 2; ++a) {
+                    const int indexXY = b * gridSize.x + a;
+                    float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY);
+                    xx.x += xxbasis[tempIndex] * controlPointValues.x;
+                    xx.y += xxbasis[tempIndex] * controlPointValues.y;
+                    yy.x += yybasis[tempIndex] * controlPointValues.x;
+                    yy.y += yybasis[tempIndex] * controlPointValues.y;
+                    xy.x += xybasis[tempIndex] * controlPointValues.x;
+                    xy.y += xybasis[tempIndex] * controlPointValues.y;
+                    ++tempIndex;
+                }
+            }
+        }
+
+        tempIndex = 3 * tid;
+        secondDerivativeValues[tempIndex++] = xx;
+        secondDerivativeValues[tempIndex++] = yy;
+        secondDerivativeValues[tempIndex] = xy;
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues)
-{
-	__shared__ float xxbasis[27];
-	__shared__ float yybasis[27];
-	__shared__ float zzbasis[27];
-	__shared__ float xybasis[27];
-	__shared__ float yzbasis[27];
-	__shared__ float xzbasis[27];
-
-	if(threadIdx.x<27)
-		GetSecondDerivativeBasisValues3D(threadIdx.x,
-										 xxbasis,
-										 yybasis,
-										 zzbasis,
-										 xybasis,
-										 yzbasis,
-										 xzbasis);
-	__syncthreads();
-
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int z =(int)(tempIndex/(gridSize.x*gridSize.y));
-		tempIndex -= int(z*gridSize.x*gridSize.y);
-		const int y =(int)(tempIndex/gridSize.x);
-		const int x = int(tempIndex - y*gridSize.x);
-
-		float4 XX = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 YY = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 ZZ = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 XY = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 YZ = make_float4(0.0f,0.0f,0.0f,0.0f);
-		float4 XZ = make_float4(0.0f,0.0f,0.0f,0.0f);
-
-		if(0<x && x<gridSize.x-1 &&
-		   0<y && y<gridSize.y-1 &&
-		   0<z && z<gridSize.z-1){
-
-			tempIndex=0;
-			for(int c=z-1; c<z+2; ++c){
-				for(int b=y-1; b<y+2; ++b){
-					for(int a=x-1; a<x+2; ++a){
-						int indexXYZ = (c*gridSize.y+b)*gridSize.x+a;
-						float4 controlPointValues = tex1Dfetch(controlPointTexture,indexXYZ);
-						XX = XX + xxbasis[tempIndex] * controlPointValues;
-						YY = YY + yybasis[tempIndex] * controlPointValues;
-						ZZ = ZZ + zzbasis[tempIndex] * controlPointValues;
-						XY = XY + xybasis[tempIndex] * controlPointValues;
-						YZ = YZ + yzbasis[tempIndex] * controlPointValues;
-						XZ = XZ + xzbasis[tempIndex] * controlPointValues;
-						tempIndex++;
-					}
-				}
-			}
-		}
-
-		tempIndex=6*tid;
-		secondDerivativeValues[tempIndex++]=XX;
-		secondDerivativeValues[tempIndex++]=YY;
-		secondDerivativeValues[tempIndex++]=ZZ;
-		secondDerivativeValues[tempIndex++]=XY;
-		secondDerivativeValues[tempIndex++]=YZ;
-		secondDerivativeValues[tempIndex] = XZ;
-	}
+__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues) {
+    __shared__ float xxbasis[27];
+    __shared__ float yybasis[27];
+    __shared__ float zzbasis[27];
+    __shared__ float xybasis[27];
+    __shared__ float yzbasis[27];
+    __shared__ float xzbasis[27];
+
+    if (threadIdx.x < 27)
+        GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float4 xx{};
+        float4 yy{};
+        float4 zz{};
+        float4 xy{};
+        float4 yz{};
+        float4 xz{};
+
+        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) {
+            tempIndex = 0;
+            for (int c = z - 1; c < z + 2; ++c) {
+                for (int b = y - 1; b < y + 2; ++b) {
+                    for (int a = x - 1; a < x + 2; ++a) {
+                        int indexXYZ = (c * gridSize.y + b) * gridSize.x + a;
+                        float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ);
+                        xx = xx + xxbasis[tempIndex] * controlPointValues;
+                        yy = yy + yybasis[tempIndex] * controlPointValues;
+                        zz = zz + zzbasis[tempIndex] * controlPointValues;
+                        xy = xy + xybasis[tempIndex] * controlPointValues;
+                        yz = yz + yzbasis[tempIndex] * controlPointValues;
+                        xz = xz + xzbasis[tempIndex] * controlPointValues;
+                        tempIndex++;
+                    }
+                }
+            }
+        }
+
+        tempIndex = 6 * tid;
+        secondDerivativeValues[tempIndex++] = xx;
+        secondDerivativeValues[tempIndex++] = yy;
+        secondDerivativeValues[tempIndex++] = zz;
+        secondDerivativeValues[tempIndex++] = xy;
+        secondDerivativeValues[tempIndex++] = yz;
+        secondDerivativeValues[tempIndex] = xz;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-		int index=tid*3;
-		float4 XX = tex1Dfetch(secondDerivativesTexture,index++);XX=XX*XX;
-		float4 YY = tex1Dfetch(secondDerivativesTexture,index++);YY=YY*YY;
-		float4 XY = tex1Dfetch(secondDerivativesTexture,index++);XY=XY*XY;
-
-		penaltyTerm[tid]=
-				XX.x + XX.y +
-				YY.x + YY.y +
-				2.f*(XY.x + XY.y);
-	}
-	return;
+__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        unsigned index = tid * 3;
+        float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx;
+        float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy;
+        float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy;
+        penaltyTerm[tid] = xx.x + xx.y + yy.x + yy.y + 2.f * (xy.x + xy.y);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-		int index=tid*6;
-		float4 XX = tex1Dfetch(secondDerivativesTexture,index++);XX=XX*XX;
-		float4 YY = tex1Dfetch(secondDerivativesTexture,index++);YY=YY*YY;
-		float4 ZZ = tex1Dfetch(secondDerivativesTexture,index++);ZZ=ZZ*ZZ;
-		float4 XY = tex1Dfetch(secondDerivativesTexture,index++);XY=XY*XY;
-		float4 YZ = tex1Dfetch(secondDerivativesTexture,index++);YZ=YZ*YZ;
-		float4 XZ = tex1Dfetch(secondDerivativesTexture,index);XZ=XZ*XZ;
-
-		penaltyTerm[tid]=
-				XX.x + XX.y + XX.z +
-				YY.x + YY.y + YY.z +
-				ZZ.x + ZZ.y + ZZ.z +
-				2.f*(XY.x + XY.y + XY.z +
-					 YZ.x + YZ.y + YZ.z +
-					 XZ.x + XZ.y + XZ.z);
-	}
-	return;
+__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        unsigned index = tid * 6;
+        float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx;
+        float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy;
+        float4 zz = tex1Dfetch(secondDerivativesTexture, index++); zz = zz * zz;
+        float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy;
+        float4 yz = tex1Dfetch(secondDerivativesTexture, index++); yz = yz * yz;
+        float4 xz = tex1Dfetch(secondDerivativesTexture, index); xz = xz * xz;
+        penaltyTerm[tid] = xx.x + xx.y + xx.z + yy.x + yy.y + yy.z + zz.x + zz.y + zz.z +
+            2.f * (xy.x + xy.y + xy.z + yz.x + yz.y + yz.z + xz.x + xz.y + xz.z);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradientArray)
-{
-	__shared__ float xxbasis[9];
-	__shared__ float yybasis[9];
-	__shared__ float xybasis[9];
-
-	if(threadIdx.x<9)
-		GetSecondDerivativeBasisValues2D(threadIdx.x,
-										 xxbasis,
-										 yybasis,
-										 xybasis);
-	__syncthreads();
-
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		const int y = tid/gridSize.x;
-		const int x = tid - y*gridSize.x;
-
-		float2 gradientValue=make_float2(0.0f,0.0f);
-		float4 secondDerivativeValues;
-
-		int coord=0;
-		for(int b=y-1; b<y+2; ++b){
-			for(int a=x-1; a<x+2; ++a){
-				if(-1<a && -1<b && a<gridSize.x && b<gridSize.y){
-					int indexXY = 3*(b*gridSize.x+a);
-					secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXY++); // XX
-					gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
-					gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
-					secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXY++); // YY
-					gradientValue.x += secondDerivativeValues.x * yybasis[coord];
-					gradientValue.y += secondDerivativeValues.y * yybasis[coord];
-					secondDerivativeValues = 2.f*tex1Dfetch(secondDerivativesTexture,indexXY); // XY
-					gradientValue.x += secondDerivativeValues.x * xybasis[coord];
-					gradientValue.y += secondDerivativeValues.y * xybasis[coord];
-				}
-				coord++;
-			}
-		}
-
-		nodeGradientArray[tid].x += c_Weight*gradientValue.x;
-		nodeGradientArray[tid].y += c_Weight*gradientValue.y;
-	}
+__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradientArray) {
+    __shared__ float xxbasis[9];
+    __shared__ float yybasis[9];
+    __shared__ float xybasis[9];
+
+    if (threadIdx.x < 9)
+        GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        const int y = tid / gridSize.x;
+        const int x = tid - y * gridSize.x;
+
+        float2 gradientValue{};
+        float4 secondDerivativeValues;
+
+        int coord = 0;
+        for (int b = y - 1; b < y + 2; ++b) {
+            for (int a = x - 1; a < x + 2; ++a) {
+                if (-1 < a && -1 < b && a < gridSize.x && b < gridSize.y) {
+                    int indexXY = 3 * (b * gridSize.x + a);
+                    secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // XX
+                    gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
+                    gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
+                    secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // YY
+                    gradientValue.x += secondDerivativeValues.x * yybasis[coord];
+                    gradientValue.y += secondDerivativeValues.y * yybasis[coord];
+                    secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXY); // XY
+                    gradientValue.x += secondDerivativeValues.x * xybasis[coord];
+                    gradientValue.y += secondDerivativeValues.y * xybasis[coord];
+                }
+                coord++;
+            }
+        }
+
+        nodeGradientArray[tid].x += c_Weight * gradientValue.x;
+        nodeGradientArray[tid].y += c_Weight * gradientValue.y;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradientArray)
-{
-	__shared__ float xxbasis[27];
-	__shared__ float yybasis[27];
-	__shared__ float zzbasis[27];
-	__shared__ float xybasis[27];
-	__shared__ float yzbasis[27];
-	__shared__ float xzbasis[27];
-
-	if(threadIdx.x<27)
-		GetSecondDerivativeBasisValues3D(threadIdx.x,
-										 xxbasis,
-										 yybasis,
-										 zzbasis,
-										 xybasis,
-										 yzbasis,
-										 xzbasis);
-	__syncthreads();
-
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int z = tempIndex/(gridSize.x*gridSize.y);
-		tempIndex  -= z*gridSize.x*gridSize.y;
-		const int y = tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		float3 gradientValue=make_float3(0.0f,0.0f,0.0f);
-		float4 secondDerivativeValues;
-
-		int coord=0;
-		for(int c=z-1; c<z+2; ++c){
-			for(int b=y-1; b<y+2; ++b){
-				for(int a=x-1; a<x+2; ++a){
-					if(-1<a && -1<b && -1<c && a<gridSize.x && b<gridSize.y && c<gridSize.z){
-						unsigned indexXYZ = 6*((c*gridSize.y+b)*gridSize.x+a);
-						secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXYZ++); // XX
-						gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
-						gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
-						gradientValue.z += secondDerivativeValues.z * xxbasis[coord];
-						secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXYZ++); // YY
-						gradientValue.x += secondDerivativeValues.x * yybasis[coord];
-						gradientValue.y += secondDerivativeValues.y * yybasis[coord];
-						gradientValue.z += secondDerivativeValues.z * yybasis[coord];
-						secondDerivativeValues = tex1Dfetch(secondDerivativesTexture,indexXYZ++); //ZZ
-						gradientValue.x += secondDerivativeValues.x * zzbasis[coord];
-						gradientValue.y += secondDerivativeValues.y * zzbasis[coord];
-						gradientValue.z += secondDerivativeValues.z * zzbasis[coord];
-						secondDerivativeValues = 2.f*tex1Dfetch(secondDerivativesTexture,indexXYZ++); // XY
-						gradientValue.x += secondDerivativeValues.x * xybasis[coord];
-						gradientValue.y += secondDerivativeValues.y * xybasis[coord];
-						gradientValue.z += secondDerivativeValues.z * xybasis[coord];
-						secondDerivativeValues = 2.f*tex1Dfetch(secondDerivativesTexture,indexXYZ++); // YZ
-						gradientValue.x += secondDerivativeValues.x * yzbasis[coord];
-						gradientValue.y += secondDerivativeValues.y * yzbasis[coord];
-						gradientValue.z += secondDerivativeValues.z * yzbasis[coord];
-						secondDerivativeValues = 2.f*tex1Dfetch(secondDerivativesTexture,indexXYZ); //XZ
-						gradientValue.x += secondDerivativeValues.x * xzbasis[coord];
-						gradientValue.y += secondDerivativeValues.y * xzbasis[coord];
-						gradientValue.z += secondDerivativeValues.z * xzbasis[coord];
-					}
-					coord++;
-				}
-			}
-		}
-		gradientValue = c_Weight * gradientValue;
-
-		float4 metricGradientValue;
-		metricGradientValue = nodeGradientArray[tid];
-		metricGradientValue.x += gradientValue.x;
-		metricGradientValue.y += gradientValue.y;
-		metricGradientValue.z += gradientValue.z;
-		nodeGradientArray[tid]=metricGradientValue;
-	}
+__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradientArray) {
+    __shared__ float xxbasis[27];
+    __shared__ float yybasis[27];
+    __shared__ float zzbasis[27];
+    __shared__ float xybasis[27];
+    __shared__ float yzbasis[27];
+    __shared__ float xzbasis[27];
+
+    if (threadIdx.x < 27)
+        GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float3 gradientValue{};
+        float4 secondDerivativeValues;
+
+        int coord = 0;
+        for (int c = z - 1; c < z + 2; ++c) {
+            for (int b = y - 1; b < y + 2; ++b) {
+                for (int a = x - 1; a < x + 2; ++a) {
+                    if (-1 < a && -1 < b && -1 < c && a < gridSize.x && b < gridSize.y && c < gridSize.z) {
+                        unsigned indexXYZ = 6 * ((c * gridSize.y + b) * gridSize.x + a);
+                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XX
+                        gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * xxbasis[coord];
+                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YY
+                        gradientValue.x += secondDerivativeValues.x * yybasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * yybasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * yybasis[coord];
+                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); //ZZ
+                        gradientValue.x += secondDerivativeValues.x * zzbasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * zzbasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * zzbasis[coord];
+                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XY
+                        gradientValue.x += secondDerivativeValues.x * xybasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * xybasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * xybasis[coord];
+                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YZ
+                        gradientValue.x += secondDerivativeValues.x * yzbasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * yzbasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * yzbasis[coord];
+                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ); //XZ
+                        gradientValue.x += secondDerivativeValues.x * xzbasis[coord];
+                        gradientValue.y += secondDerivativeValues.y * xzbasis[coord];
+                        gradientValue.z += secondDerivativeValues.z * xzbasis[coord];
+                    }
+                    coord++;
+                }
+            }
+        }
+        gradientValue = c_Weight * gradientValue;
+
+        float4 metricGradientValue;
+        metricGradientValue = nodeGradientArray[tid];
+        metricGradientValue.x += gradientValue.x;
+        metricGradientValue.y += gradientValue.y;
+        metricGradientValue.z += gradientValue.z;
+        nodeGradientArray[tid] = metricGradientValue;
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices,
-															float *jacobianDet)
-{
-	__shared__ float xbasis[9];
-	__shared__ float ybasis[9];
-
-	if(threadIdx.x<9)
-		GetFirstDerivativeBasisValues2D(threadIdx.x,
-										xbasis,
-										ybasis);
-	__syncthreads();
-
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int y =tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		if(0<x && x<gridSize.x-1 &&
-		   0<y && y<gridSize.y-1){
-
-			float Tx_x=0, Tx_y=0;
-			float Ty_x=0, Ty_y=0;
-
-			tempIndex=0;
-			for(int b=y-1; b<y+2; ++b){
-				for(int a=x-1; a<x+2; ++a){
-					int indexXY = b * gridSize.x + a;
-					float4 controlPointValues = tex1Dfetch(controlPointTexture,indexXY);
-					Tx_x += xbasis[tempIndex]*controlPointValues.x;
-					Tx_y += ybasis[tempIndex]*controlPointValues.x;
-					Ty_x += xbasis[tempIndex]*controlPointValues.y;
-					Ty_y += ybasis[tempIndex]*controlPointValues.y;
-					tempIndex++;
-				}
-			}
-
-			// The jacobian matrix is reoriented
-			float Tx_x2=c_AffineMatrix0.x*Tx_x + c_AffineMatrix0.y*Ty_x;
-			float Tx_y2=c_AffineMatrix0.x*Tx_y + c_AffineMatrix0.y*Ty_y;
-			float Ty_x2=c_AffineMatrix1.x*Tx_x + c_AffineMatrix1.y*Ty_x;
-			float Ty_y2=c_AffineMatrix1.x*Tx_y + c_AffineMatrix1.y*Ty_y;
-
-			// The Jacobian matrix is stored
-			tempIndex=tid*4;
-			jacobianMatrices[tempIndex++]=Tx_x2;
-			jacobianMatrices[tempIndex++]=Tx_y2;
-			jacobianMatrices[tempIndex++]=Ty_x2;
-			jacobianMatrices[tempIndex] = Ty_y2;
-
-			// The Jacobian determinant is computed and stored
-			jacobianDet[tid]= Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
-		}
-		else{
-			tempIndex=tid*4;
-			jacobianMatrices[tempIndex++]=1.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex]=1.f;
-			jacobianDet[tid]= 1.0f;
-		}
-	}
-	return;
+__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) {
+    __shared__ float xbasis[9];
+    __shared__ float ybasis[9];
+
+    if (threadIdx.x < 9)
+        GetFirstDerivativeBasisValues2D(threadIdx.x, xbasis, ybasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) {
+            float Tx_x = 0, Tx_y = 0;
+            float Ty_x = 0, Ty_y = 0;
+
+            tempIndex = 0;
+            for (int b = y - 1; b < y + 2; ++b) {
+                for (int a = x - 1; a < x + 2; ++a) {
+                    int indexXY = b * gridSize.x + a;
+                    float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY);
+                    Tx_x += xbasis[tempIndex] * controlPointValues.x;
+                    Tx_y += ybasis[tempIndex] * controlPointValues.x;
+                    Ty_x += xbasis[tempIndex] * controlPointValues.y;
+                    Ty_y += ybasis[tempIndex] * controlPointValues.y;
+                    tempIndex++;
+                }
+            }
+
+            // The jacobian matrix is reoriented
+            float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x;
+            float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y;
+            float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x;
+            float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y;
+
+            // The Jacobian matrix is stored
+            tempIndex = tid * 4;
+            jacobianMatrices[tempIndex++] = Tx_x2;
+            jacobianMatrices[tempIndex++] = Tx_y2;
+            jacobianMatrices[tempIndex++] = Ty_x2;
+            jacobianMatrices[tempIndex] = Ty_y2;
+
+            // The Jacobian determinant is computed and stored
+            jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
+        } else {
+            tempIndex = tid * 4;
+            jacobianMatrices[tempIndex++] = 1.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex] = 1.f;
+            jacobianDet[tid] = 1.0f;
+        }
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices,
-														   float *jacobianDet)
-{
-	__shared__ float xbasis[27];
-	__shared__ float ybasis[27];
-	__shared__ float zbasis[27];
-
-	if(threadIdx.x<27)
-		GetFirstDerivativeBasisValues3D(threadIdx.x,
-									  xbasis,
-									  ybasis,
-									  zbasis);
-	__syncthreads();
-
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int z =tempIndex/(gridSize.x*gridSize.y);
-		tempIndex -= z*gridSize.x*gridSize.y;
-		const int y =tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		if(0<x && x<gridSize.x-1 &&
-		   0<y && y<gridSize.y-1 &&
-		   0<z && z<gridSize.z-1){
-
-			float Tx_x=0, Tx_y=0, Tx_z=0;
-			float Ty_x=0, Ty_y=0, Ty_z=0;
-			float Tz_x=0, Tz_y=0, Tz_z=0;
-
-			tempIndex=0;
-			for(int c=z-1; c<z+2; ++c){
-				for(int b=y-1; b<y+2; ++b){
-					for(int a=x-1; a<x+2; ++a){
-						int indexXYZ = (c*gridSize.y+b)*gridSize.x+a;
-						float4 controlPointValues = tex1Dfetch(controlPointTexture,indexXYZ);
-						Tx_x += xbasis[tempIndex]*controlPointValues.x;
-						Tx_y += ybasis[tempIndex]*controlPointValues.x;
-						Tx_z += zbasis[tempIndex]*controlPointValues.x;
-						Ty_x += xbasis[tempIndex]*controlPointValues.y;
-						Ty_y += ybasis[tempIndex]*controlPointValues.y;
-						Ty_z += zbasis[tempIndex]*controlPointValues.y;
-						Tz_x += xbasis[tempIndex]*controlPointValues.z;
-						Tz_y += ybasis[tempIndex]*controlPointValues.z;
-						Tz_z += zbasis[tempIndex]*controlPointValues.z;
-						tempIndex++;
-					}
-				}
-			}
-
-			// The jacobian matrix is reoriented
-			float Tx_x2=c_AffineMatrix0.x*Tx_x + c_AffineMatrix0.y*Ty_x + c_AffineMatrix0.z*Tz_x;
-			float Tx_y2=c_AffineMatrix0.x*Tx_y + c_AffineMatrix0.y*Ty_y + c_AffineMatrix0.z*Tz_y;
-			float Tx_z2=c_AffineMatrix0.x*Tx_z + c_AffineMatrix0.y*Ty_z + c_AffineMatrix0.z*Tz_z;
-			float Ty_x2=c_AffineMatrix1.x*Tx_x + c_AffineMatrix1.y*Ty_x + c_AffineMatrix1.z*Tz_x;
-			float Ty_y2=c_AffineMatrix1.x*Tx_y + c_AffineMatrix1.y*Ty_y + c_AffineMatrix1.z*Tz_y;
-			float Ty_z2=c_AffineMatrix1.x*Tx_z + c_AffineMatrix1.y*Ty_z + c_AffineMatrix1.z*Tz_z;
-			float Tz_x2=c_AffineMatrix2.x*Tx_x + c_AffineMatrix2.y*Ty_x + c_AffineMatrix2.z*Tz_x;
-			float Tz_y2=c_AffineMatrix2.x*Tx_y + c_AffineMatrix2.y*Ty_y + c_AffineMatrix2.z*Tz_y;
-			float Tz_z2=c_AffineMatrix2.x*Tx_z + c_AffineMatrix2.y*Ty_z + c_AffineMatrix2.z*Tz_z;
-
-			// The Jacobian matrix is stored
-			tempIndex=tid*9;
-			jacobianMatrices[tempIndex++]=Tx_x2;
-			jacobianMatrices[tempIndex++]=Tx_y2;
-			jacobianMatrices[tempIndex++]=Tx_z2;
-			jacobianMatrices[tempIndex++]=Ty_x2;
-			jacobianMatrices[tempIndex++]=Ty_y2;
-			jacobianMatrices[tempIndex++]=Ty_z2;
-			jacobianMatrices[tempIndex++]=Tz_x2;
-			jacobianMatrices[tempIndex++]=Tz_y2;
-			jacobianMatrices[tempIndex] = Tz_z2;
-
-			// The Jacobian determinant is computed and stored
-			jacobianDet[tid]= Tx_x2*Ty_y2*Tz_z2
-							+ Tx_y2*Ty_z2*Tz_x2
-							+ Tx_z2*Ty_x2*Tz_y2
-							- Tx_x2*Ty_z2*Tz_y2
-							- Tx_y2*Ty_x2*Tz_z2
-							- Tx_z2*Ty_y2*Tz_x2;
-		}
-		else{
-			tempIndex=tid*9;
-			jacobianMatrices[tempIndex++]=1.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=1.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex++]=0.f;
-			jacobianMatrices[tempIndex]=1.f;
-			jacobianDet[tid]= 1.0f;
-		}
-	}
-	return;
+__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) {
+    __shared__ float xbasis[27];
+    __shared__ float ybasis[27];
+    __shared__ float zbasis[27];
+
+    if (threadIdx.x < 27)
+        GetFirstDerivativeBasisValues3D(threadIdx.x, xbasis, ybasis, zbasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) {
+            float Tx_x = 0, Tx_y = 0, Tx_z = 0;
+            float Ty_x = 0, Ty_y = 0, Ty_z = 0;
+            float Tz_x = 0, Tz_y = 0, Tz_z = 0;
+
+            tempIndex = 0;
+            for (int c = z - 1; c < z + 2; ++c) {
+                for (int b = y - 1; b < y + 2; ++b) {
+                    for (int a = x - 1; a < x + 2; ++a) {
+                        int indexXYZ = (c * gridSize.y + b) * gridSize.x + a;
+                        float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ);
+                        Tx_x += xbasis[tempIndex] * controlPointValues.x;
+                        Tx_y += ybasis[tempIndex] * controlPointValues.x;
+                        Tx_z += zbasis[tempIndex] * controlPointValues.x;
+                        Ty_x += xbasis[tempIndex] * controlPointValues.y;
+                        Ty_y += ybasis[tempIndex] * controlPointValues.y;
+                        Ty_z += zbasis[tempIndex] * controlPointValues.y;
+                        Tz_x += xbasis[tempIndex] * controlPointValues.z;
+                        Tz_y += ybasis[tempIndex] * controlPointValues.z;
+                        Tz_z += zbasis[tempIndex] * controlPointValues.z;
+                        tempIndex++;
+                    }
+                }
+            }
+
+            // The jacobian matrix is reoriented
+            float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x + c_AffineMatrix0.z * Tz_x;
+            float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y + c_AffineMatrix0.z * Tz_y;
+            float Tx_z2 = c_AffineMatrix0.x * Tx_z + c_AffineMatrix0.y * Ty_z + c_AffineMatrix0.z * Tz_z;
+            float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x + c_AffineMatrix1.z * Tz_x;
+            float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y + c_AffineMatrix1.z * Tz_y;
+            float Ty_z2 = c_AffineMatrix1.x * Tx_z + c_AffineMatrix1.y * Ty_z + c_AffineMatrix1.z * Tz_z;
+            float Tz_x2 = c_AffineMatrix2.x * Tx_x + c_AffineMatrix2.y * Ty_x + c_AffineMatrix2.z * Tz_x;
+            float Tz_y2 = c_AffineMatrix2.x * Tx_y + c_AffineMatrix2.y * Ty_y + c_AffineMatrix2.z * Tz_y;
+            float Tz_z2 = c_AffineMatrix2.x * Tx_z + c_AffineMatrix2.y * Ty_z + c_AffineMatrix2.z * Tz_z;
+
+            // The Jacobian matrix is stored
+            tempIndex = tid * 9;
+            jacobianMatrices[tempIndex++] = Tx_x2;
+            jacobianMatrices[tempIndex++] = Tx_y2;
+            jacobianMatrices[tempIndex++] = Tx_z2;
+            jacobianMatrices[tempIndex++] = Ty_x2;
+            jacobianMatrices[tempIndex++] = Ty_y2;
+            jacobianMatrices[tempIndex++] = Ty_z2;
+            jacobianMatrices[tempIndex++] = Tz_x2;
+            jacobianMatrices[tempIndex++] = Tz_y2;
+            jacobianMatrices[tempIndex] = Tz_z2;
+
+            // The Jacobian determinant is computed and stored
+            jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2
+                + Tx_y2 * Ty_z2 * Tz_x2
+                + Tx_z2 * Ty_x2 * Tz_y2
+                - Tx_x2 * Ty_z2 * Tz_y2
+                - Tx_y2 * Ty_x2 * Tz_z2
+                - Tx_z2 * Ty_y2 * Tz_x2;
+        } else {
+            tempIndex = tid * 9;
+            jacobianMatrices[tempIndex++] = 1.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 1.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex++] = 0.f;
+            jacobianMatrices[tempIndex] = 1.f;
+            jacobianDet[tid] = 1.0f;
+        }
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
-													 float *jacobianDet)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		int2 imageSize = make_int2(c_ReferenceImageDim.x,c_ReferenceImageDim.y);
-
-		unsigned tempIndex=tid;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		// the "nearest previous" node is determined [0,0,0]
-		int2 nodeAnte;
-		float2 gridVoxelSpacing = make_float2(c_ControlPointVoxelSpacing.x,c_ControlPointVoxelSpacing.y);
-		nodeAnte.x = (int)floorf((float)x/gridVoxelSpacing.x);
-		nodeAnte.y = (int)floorf((float)y/gridVoxelSpacing.y);
-
-		float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
-
-		relative = fabsf((float)x/gridVoxelSpacing.x-(float)nodeAnte.x);
-		GetFirstBSplineValues(relative, xBasis, xFirst);
-
-		relative = fabsf((float)y/gridVoxelSpacing.y-(float)nodeAnte.y);
-		GetFirstBSplineValues(relative, yBasis, yFirst);
-
-		int2 controlPointImageDim = make_int2(c_ControlPointImageDim.x,c_ControlPointImageDim.y);
-		float2 Tx=make_float2(0.f,0.f);
-		float2 Ty=make_float2(0.f,0.f);
-
-		for(int b=0; b<4; ++b){
-			int indexXY= ( nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
-
-			float4 nodeCoefficient = tex1Dfetch(controlPointTexture,indexXY++);
-			float2 tempBasis = make_float2(xFirst[0]*yBasis[b], xBasis[0]*yFirst[b]);
-			Tx = Tx + nodeCoefficient.x * tempBasis;
-			Ty = Ty + nodeCoefficient.y * tempBasis;
-
-			nodeCoefficient = tex1Dfetch(controlPointTexture,indexXY++);
-			tempBasis = make_float2(xFirst[1]*yBasis[b], xBasis[1]*yFirst[b]);
-			Tx = Tx + nodeCoefficient.x * tempBasis;
-			Ty = Ty + nodeCoefficient.y * tempBasis;
-
-			nodeCoefficient = tex1Dfetch(controlPointTexture,indexXY++);
-			tempBasis = make_float2(xFirst[2]*yBasis[b], xBasis[2]*yFirst[b]);
-			Tx = Tx + nodeCoefficient.x * tempBasis;
-			Ty = Ty + nodeCoefficient.y * tempBasis;
-
-			nodeCoefficient = tex1Dfetch(controlPointTexture,indexXY);
-			tempBasis = make_float2(xFirst[3]*yBasis[b], xBasis[3]*yFirst[b]);
-			Tx = Tx + nodeCoefficient.x * tempBasis;
-			Ty = Ty + nodeCoefficient.y * tempBasis;
-		}
-
-		// The jacobian matrix is reoriented
-		float Tx_x2=c_AffineMatrix0.x*Tx.x + c_AffineMatrix0.y*Ty.x;
-		float Tx_y2=c_AffineMatrix0.x*Tx.y + c_AffineMatrix0.y*Ty.y;
-		float Ty_x2=c_AffineMatrix1.x*Tx.x + c_AffineMatrix1.y*Ty.x;
-		float Ty_y2=c_AffineMatrix1.x*Tx.y + c_AffineMatrix1.y*Ty.y;
-
-		// The Jacobian matrix is stored
-		tempIndex=tid*4;
-		jacobianMatrices[tempIndex++]=Tx_x2;
-		jacobianMatrices[tempIndex++]=Tx_y2;
-		jacobianMatrices[tempIndex++]=Ty_x2;
-		jacobianMatrices[tempIndex] = Ty_y2;
-
-		// The Jacobian determinant is computed and stored
-		jacobianDet[tid]= Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
-	}
+__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const int2 imageSize = { c_ReferenceImageDim.x, c_ReferenceImageDim.y };
+
+        int tempIndex = tid;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        // the "nearest previous" node is determined [0,0,0]
+        const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y };
+        const int2 nodeAnte = { (int)floorf((float)x / gridVoxelSpacing.x), (int)floorf((float)y / gridVoxelSpacing.y) };
+
+        float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
+
+        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        GetFirstBSplineValues(relative, xBasis, xFirst);
+
+        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        GetFirstBSplineValues(relative, yBasis, yFirst);
+
+        const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y };
+        float2 Tx{};
+        float2 Ty{};
+
+        for (int b = 0; b < 4; ++b) {
+            int indexXY = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
+
+            float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
+            float2 tempBasis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]);
+            Tx = Tx + nodeCoefficient.x * tempBasis;
+            Ty = Ty + nodeCoefficient.y * tempBasis;
+
+            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
+            tempBasis = make_float2(xFirst[1] * yBasis[b], xBasis[1] * yFirst[b]);
+            Tx = Tx + nodeCoefficient.x * tempBasis;
+            Ty = Ty + nodeCoefficient.y * tempBasis;
+
+            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
+            tempBasis = make_float2(xFirst[2] * yBasis[b], xBasis[2] * yFirst[b]);
+            Tx = Tx + nodeCoefficient.x * tempBasis;
+            Ty = Ty + nodeCoefficient.y * tempBasis;
+
+            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY);
+            tempBasis = make_float2(xFirst[3] * yBasis[b], xBasis[3] * yFirst[b]);
+            Tx = Tx + nodeCoefficient.x * tempBasis;
+            Ty = Ty + nodeCoefficient.y * tempBasis;
+        }
+
+        // The jacobian matrix is reoriented
+        const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x;
+        const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y;
+        const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x;
+        const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y;
+
+        // The Jacobian matrix is stored
+        tempIndex = tid * 4;
+        jacobianMatrices[tempIndex++] = Tx_x2;
+        jacobianMatrices[tempIndex++] = Tx_y2;
+        jacobianMatrices[tempIndex++] = Ty_x2;
+        jacobianMatrices[tempIndex] = Ty_y2;
+
+        // The Jacobian determinant is computed and stored
+        jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
-													 float *jacobianDet)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(imageSize.x*imageSize.y);
-		tempIndex  -= z*imageSize.x*imageSize.y;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		// the "nearest previous" node is determined [0,0,0]
-		int3 nodeAnte;
-		float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
-		nodeAnte.x = (int)floorf((float)x/gridVoxelSpacing.x);
-		nodeAnte.y = (int)floorf((float)y/gridVoxelSpacing.y);
-		nodeAnte.z = (int)floorf((float)z/gridVoxelSpacing.z);
-
-		extern __shared__ float yFirst[];
-		float *zFirst=&yFirst[4*blockDim.x*blockDim.y*blockDim.z];
-
-		float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative;
-
-		const int shareMemIndex = 4*threadIdx.x;
-
-		relative = fabsf((float)x/gridVoxelSpacing.x-(float)nodeAnte.x);
-		GetFirstBSplineValues(relative, xBasis, xFirst);
-
-		relative = fabsf((float)y/gridVoxelSpacing.y-(float)nodeAnte.y);
-		GetFirstBSplineValues(relative, yBasis, &yFirst[shareMemIndex]);
-
-		relative = fabsf((float)z/gridVoxelSpacing.z-(float)nodeAnte.z);
-		GetFirstBSplineValues(relative, zBasis, &zFirst[shareMemIndex]);
-
-		int3 controlPointImageDim = c_ControlPointImageDim;
-		float3 Tx=make_float3(0.f,0.f,0.f);
-		float3 Ty=make_float3(0.f,0.f,0.f);
-		float3 Tz=make_float3(0.f,0.f,0.f);
-
-		for(int c=0; c<4; ++c){
-			for(int b=0; b<4; ++b){
-				int indexXYZ= ( (nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
-				float3 tempBasisXY=make_float3(yBasis[b]*zBasis[c],
-										yFirst[shareMemIndex+b]*zBasis[c],
-										yBasis[b]*zFirst[shareMemIndex+c]);
-
-				float4 nodeCoefficient = tex1Dfetch(controlPointTexture,indexXYZ++);
-				float3 tempBasis = make_float3(xFirst[0],xBasis[0],xBasis[0])*tempBasisXY;
-				Tx = Tx + nodeCoefficient.x * tempBasis;
-				Ty = Ty + nodeCoefficient.y * tempBasis;
-				Tz = Tz + nodeCoefficient.z * tempBasis;
-
-				nodeCoefficient = tex1Dfetch(controlPointTexture,indexXYZ++);
-				tempBasis = make_float3(xFirst[1],xBasis[1],xBasis[1])*tempBasisXY;
-				Tx = Tx + nodeCoefficient.x * tempBasis;
-				Ty = Ty + nodeCoefficient.y * tempBasis;
-				Tz = Tz + nodeCoefficient.z * tempBasis;
-
-				nodeCoefficient = tex1Dfetch(controlPointTexture,indexXYZ++);
-				tempBasis = make_float3(xFirst[2],xBasis[2],xBasis[2])*tempBasisXY;
-				Tx = Tx + nodeCoefficient.x * tempBasis;
-				Ty = Ty + nodeCoefficient.y * tempBasis;
-				Tz = Tz + nodeCoefficient.z * tempBasis;
-
-				nodeCoefficient = tex1Dfetch(controlPointTexture,indexXYZ);
-				tempBasis = make_float3(xFirst[3],xBasis[3],xBasis[3])*tempBasisXY;
-				Tx = Tx + nodeCoefficient.x * tempBasis;
-				Ty = Ty + nodeCoefficient.y * tempBasis;
-				Tz = Tz + nodeCoefficient.z * tempBasis;
-			}
-		}
-
-		// The jacobian matrix is reoriented
-		float Tx_x2=c_AffineMatrix0.x*Tx.x + c_AffineMatrix0.y*Ty.x + c_AffineMatrix0.z*Tz.x;
-		float Tx_y2=c_AffineMatrix0.x*Tx.y + c_AffineMatrix0.y*Ty.y + c_AffineMatrix0.z*Tz.y;
-		float Tx_z2=c_AffineMatrix0.x*Tx.z + c_AffineMatrix0.y*Ty.z + c_AffineMatrix0.z*Tz.z;
-		float Ty_x2=c_AffineMatrix1.x*Tx.x + c_AffineMatrix1.y*Ty.x + c_AffineMatrix1.z*Tz.x;
-		float Ty_y2=c_AffineMatrix1.x*Tx.y + c_AffineMatrix1.y*Ty.y + c_AffineMatrix1.z*Tz.y;
-		float Ty_z2=c_AffineMatrix1.x*Tx.z + c_AffineMatrix1.y*Ty.z + c_AffineMatrix1.z*Tz.z;
-		float Tz_x2=c_AffineMatrix2.x*Tx.x + c_AffineMatrix2.y*Ty.x + c_AffineMatrix2.z*Tz.x;
-		float Tz_y2=c_AffineMatrix2.x*Tx.y + c_AffineMatrix2.y*Ty.y + c_AffineMatrix2.z*Tz.y;
-		float Tz_z2=c_AffineMatrix2.x*Tx.z + c_AffineMatrix2.y*Ty.z + c_AffineMatrix2.z*Tz.z;
-
-		// The Jacobian matrix is stored
-		tempIndex=tid*9;
-		jacobianMatrices[tempIndex++]=Tx_x2;
-		jacobianMatrices[tempIndex++]=Tx_y2;
-		jacobianMatrices[tempIndex++]=Tx_z2;
-		jacobianMatrices[tempIndex++]=Ty_x2;
-		jacobianMatrices[tempIndex++]=Ty_y2;
-		jacobianMatrices[tempIndex++]=Ty_z2;
-		jacobianMatrices[tempIndex++]=Tz_x2;
-		jacobianMatrices[tempIndex++]=Tz_y2;
-		jacobianMatrices[tempIndex] = Tz_z2;
-
-		// The Jacobian determinant is computed and stored
-		jacobianDet[tid]= Tx_x2*Ty_y2*Tz_z2
-						+ Tx_y2*Ty_z2*Tz_x2
-						+ Tx_z2*Ty_x2*Tz_y2
-						- Tx_x2*Ty_z2*Tz_y2
-						- Tx_y2*Ty_x2*Tz_z2
-						- Tx_z2*Ty_y2*Tz_x2;
-	}
+__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const int3 imageSize = c_ReferenceImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (imageSize.x * imageSize.y);
+        tempIndex -= z * imageSize.x * imageSize.y;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        // the "nearest previous" node is determined [0,0,0]
+        const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
+        const int3 nodeAnte = {
+            (int)floorf((float)x / gridVoxelSpacing.x),
+            (int)floorf((float)y / gridVoxelSpacing.y),
+            (int)floorf((float)z / gridVoxelSpacing.z)
+        };
+
+        extern __shared__ float yFirst[];
+        float *zFirst = &yFirst[4 * blockDim.x * blockDim.y * blockDim.z];
+
+        float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative;
+
+        const unsigned shareMemIndex = 4 * threadIdx.x;
+
+        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        GetFirstBSplineValues(relative, xBasis, xFirst);
+
+        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        GetFirstBSplineValues(relative, yBasis, &yFirst[shareMemIndex]);
+
+        relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z);
+        GetFirstBSplineValues(relative, zBasis, &zFirst[shareMemIndex]);
+
+        const int3 controlPointImageDim = c_ControlPointImageDim;
+        float3 Tx{};
+        float3 Ty{};
+        float3 Tz{};
+
+        for (int c = 0; c < 4; ++c) {
+            for (int b = 0; b < 4; ++b) {
+                int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
+                float3 tempBasisXY = make_float3(yBasis[b] * zBasis[c],
+                                                 yFirst[shareMemIndex + b] * zBasis[c],
+                                                 yBasis[b] * zFirst[shareMemIndex + c]);
+
+                float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
+                float3 tempBasis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * tempBasisXY;
+                Tx = Tx + nodeCoefficient.x * tempBasis;
+                Ty = Ty + nodeCoefficient.y * tempBasis;
+                Tz = Tz + nodeCoefficient.z * tempBasis;
+
+                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
+                tempBasis = make_float3(xFirst[1], xBasis[1], xBasis[1]) * tempBasisXY;
+                Tx = Tx + nodeCoefficient.x * tempBasis;
+                Ty = Ty + nodeCoefficient.y * tempBasis;
+                Tz = Tz + nodeCoefficient.z * tempBasis;
+
+                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
+                tempBasis = make_float3(xFirst[2], xBasis[2], xBasis[2]) * tempBasisXY;
+                Tx = Tx + nodeCoefficient.x * tempBasis;
+                Ty = Ty + nodeCoefficient.y * tempBasis;
+                Tz = Tz + nodeCoefficient.z * tempBasis;
+
+                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ);
+                tempBasis = make_float3(xFirst[3], xBasis[3], xBasis[3]) * tempBasisXY;
+                Tx = Tx + nodeCoefficient.x * tempBasis;
+                Ty = Ty + nodeCoefficient.y * tempBasis;
+                Tz = Tz + nodeCoefficient.z * tempBasis;
+            }
+        }
+
+        // The jacobian matrix is reoriented
+        const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x + c_AffineMatrix0.z * Tz.x;
+        const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y + c_AffineMatrix0.z * Tz.y;
+        const float Tx_z2 = c_AffineMatrix0.x * Tx.z + c_AffineMatrix0.y * Ty.z + c_AffineMatrix0.z * Tz.z;
+        const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x + c_AffineMatrix1.z * Tz.x;
+        const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y + c_AffineMatrix1.z * Tz.y;
+        const float Ty_z2 = c_AffineMatrix1.x * Tx.z + c_AffineMatrix1.y * Ty.z + c_AffineMatrix1.z * Tz.z;
+        const float Tz_x2 = c_AffineMatrix2.x * Tx.x + c_AffineMatrix2.y * Ty.x + c_AffineMatrix2.z * Tz.x;
+        const float Tz_y2 = c_AffineMatrix2.x * Tx.y + c_AffineMatrix2.y * Ty.y + c_AffineMatrix2.z * Tz.y;
+        const float Tz_z2 = c_AffineMatrix2.x * Tx.z + c_AffineMatrix2.y * Ty.z + c_AffineMatrix2.z * Tz.z;
+
+        // The Jacobian matrix is stored
+        tempIndex = tid * 9;
+        jacobianMatrices[tempIndex++] = Tx_x2;
+        jacobianMatrices[tempIndex++] = Tx_y2;
+        jacobianMatrices[tempIndex++] = Tx_z2;
+        jacobianMatrices[tempIndex++] = Ty_x2;
+        jacobianMatrices[tempIndex++] = Ty_y2;
+        jacobianMatrices[tempIndex++] = Ty_z2;
+        jacobianMatrices[tempIndex++] = Tz_x2;
+        jacobianMatrices[tempIndex++] = Tz_y2;
+        jacobianMatrices[tempIndex] = Tz_z2;
+
+        // The Jacobian determinant is computed and stored
+        jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2
+            + Tx_y2 * Ty_z2 * Tz_x2
+            + Tx_z2 * Ty_x2 * Tz_y2
+            - Tx_x2 * Ty_z2 * Tz_y2
+            - Tx_y2 * Ty_x2 * Tz_z2
+            - Tx_z2 * Ty_y2 * Tz_x2;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_logSquaredValues_kernel(float *det)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-		float val = logf(det[tid]);
-		det[tid]=val*val;
-	}
+__global__ void reg_spline_logSquaredValues_kernel(float *det) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const float val = logf(det[tid]);
+        det[tid] = val * val;
+    }
 }
 /* *************************************************************** */
 __device__ void getJacobianGradientValues2D(float *jacobianMatrix,
-											float detJac,
-											float basisX,
-											float basisY,
-											float2 *jacobianConstraint)
-{
-	jacobianConstraint->x += detJac * (
-			basisX * jacobianMatrix[3] -
-			basisY * jacobianMatrix[2] );
-	jacobianConstraint->y += detJac * (
-			basisY * jacobianMatrix[0] -
-			basisX * jacobianMatrix[1] );
+                                            float detJac,
+                                            float basisX,
+                                            float basisY,
+                                            float2 *jacobianConstraint) {
+    jacobianConstraint->x += detJac * (basisX * jacobianMatrix[3] - basisY * jacobianMatrix[2]);
+    jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]);
 }
 /* *************************************************************** */
 __device__ void getJacobianGradientValues3D(float *jacobianMatrix,
-											float detJac,
-											float basisX,
-											float basisY,
-											float basisZ,
-											float3 *jacobianConstraint)
-{
-	jacobianConstraint->x += detJac * (
-			basisX * (jacobianMatrix[4]*jacobianMatrix[8] - jacobianMatrix[5]*jacobianMatrix[7]) +
-			basisY * (jacobianMatrix[5]*jacobianMatrix[6] - jacobianMatrix[3]*jacobianMatrix[8]) +
-			basisZ * (jacobianMatrix[3]*jacobianMatrix[7] - jacobianMatrix[4]*jacobianMatrix[6]) );
-
-	jacobianConstraint->y += detJac * (
-			basisX * (jacobianMatrix[2]*jacobianMatrix[7] - jacobianMatrix[1]*jacobianMatrix[8]) +
-			basisY * (jacobianMatrix[0]*jacobianMatrix[8] - jacobianMatrix[2]*jacobianMatrix[6]) +
-			basisZ * (jacobianMatrix[1]*jacobianMatrix[6] - jacobianMatrix[0]*jacobianMatrix[7]) );
-
-	jacobianConstraint->z += detJac * (
-			basisX * (jacobianMatrix[1]*jacobianMatrix[5] - jacobianMatrix[2]*jacobianMatrix[4]) +
-			basisY * (jacobianMatrix[2]*jacobianMatrix[3] - jacobianMatrix[0]*jacobianMatrix[5]) +
-			basisZ * (jacobianMatrix[0]*jacobianMatrix[4] - jacobianMatrix[1]*jacobianMatrix[3]) );
+                                            float detJac,
+                                            float basisX,
+                                            float basisY,
+                                            float basisZ,
+                                            float3 *jacobianConstraint) {
+    jacobianConstraint->x += detJac * (
+        basisX * (jacobianMatrix[4] * jacobianMatrix[8] - jacobianMatrix[5] * jacobianMatrix[7]) +
+        basisY * (jacobianMatrix[5] * jacobianMatrix[6] - jacobianMatrix[3] * jacobianMatrix[8]) +
+        basisZ * (jacobianMatrix[3] * jacobianMatrix[7] - jacobianMatrix[4] * jacobianMatrix[6]));
+
+    jacobianConstraint->y += detJac * (
+        basisX * (jacobianMatrix[2] * jacobianMatrix[7] - jacobianMatrix[1] * jacobianMatrix[8]) +
+        basisY * (jacobianMatrix[0] * jacobianMatrix[8] - jacobianMatrix[2] * jacobianMatrix[6]) +
+        basisZ * (jacobianMatrix[1] * jacobianMatrix[6] - jacobianMatrix[0] * jacobianMatrix[7]));
+
+    jacobianConstraint->z += detJac * (
+        basisX * (jacobianMatrix[1] * jacobianMatrix[5] - jacobianMatrix[2] * jacobianMatrix[4]) +
+        basisY * (jacobianMatrix[2] * jacobianMatrix[3] - jacobianMatrix[0] * jacobianMatrix[5]) +
+        basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3]));
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient)
-{
-	__shared__ float xbasis[9];
-	__shared__ float ybasis[9];
-
-	if(threadIdx.x<9)
-		GetFirstDerivativeBasisValues2D(threadIdx.x,
-										xbasis,
-										ybasis);
-	__syncthreads();
-
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		unsigned tempIndex=tid;
-		const int y =(int)(tempIndex/(gridSize.x));
-		const int x = tempIndex - y*(gridSize.x);
-
-		float2 jacobianGradient=make_float2(0.f,0.f);
-		tempIndex=8;
-		for(int pixelY=(int)(y-1); pixelY<(int)(y+2); ++pixelY){
-			if(pixelY>0 && pixelY<gridSize.y-1){
-
-				int jacIndex = pixelY*gridSize.x+x-1;
-				for(int pixelX=(int)(x-1); pixelX<(int)(x+2); ++pixelX){
-					if(pixelX>0 && pixelX<gridSize.x-1){
-
-						float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-						if(detJac>0.f){
-							detJac = 2.f*logf(detJac) / detJac;
-							float jacobianMatrix[4];
-							jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4);
-							jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+1);
-							jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+2);
-							jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+3);
-
-							getJacobianGradientValues2D(jacobianMatrix,
-														detJac,
-														xbasis[tempIndex],
-														ybasis[tempIndex],
-														&jacobianGradient);
-						}
-					}
-					jacIndex++;
-					tempIndex--;
-				}
-			}
-			else tempIndex-=3;
-		}
-		gradient[tid] = gradient[tid] + make_float4(c_Weight3.x
-													* (c_AffineMatrix0.x * jacobianGradient.x
-													   + c_AffineMatrix0.y * jacobianGradient.y),
-													c_Weight3.y
-													* (c_AffineMatrix1.x * jacobianGradient.x
-													   + c_AffineMatrix1.y * jacobianGradient.y),
-													0.f,
-													0.f);
-
-	}
+__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) {
+    __shared__ float xbasis[9];
+    __shared__ float ybasis[9];
+
+    if (threadIdx.x < 9)
+        GetFirstDerivativeBasisValues2D(threadIdx.x, xbasis, ybasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int y = tempIndex / (gridSize.x);
+        const int x = tempIndex - y * gridSize.x;
+
+        float2 jacobianGradient{};
+        tempIndex = 8;
+        for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
+            if (pixelY > 0 && pixelY < gridSize.y - 1) {
+                int jacIndex = pixelY * gridSize.x + x - 1;
+                for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) {
+                    if (pixelX > 0 && pixelX < gridSize.x - 1) {
+                        float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+
+                        if (detJac > 0.f) {
+                            detJac = 2.f * logf(detJac) / detJac;
+                            float jacobianMatrix[4];
+                            jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4);
+                            jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1);
+                            jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2);
+                            jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3);
+
+                            getJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[tempIndex], ybasis[tempIndex], &jacobianGradient);
+                        }
+                    }
+                    jacIndex++;
+                    tempIndex--;
+                }
+            } else tempIndex -= 3;
+        }
+
+        gradient[tid] = gradient[tid] + make_float4(
+            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y),
+            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y),
+            0.f, 0.f);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient)
-{
-	__shared__ float xbasis[27];
-	__shared__ float ybasis[27];
-	__shared__ float zbasis[27];
-
-	if(threadIdx.x<27)
-		GetFirstDerivativeBasisValues3D(threadIdx.x,
-									  xbasis,
-									  ybasis,
-									  zbasis);
-	__syncthreads();
-
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		unsigned tempIndex=tid;
-		const int z =(int)(tempIndex/(gridSize.x*gridSize.y));
-		tempIndex -= z*(gridSize.x)*(gridSize.y);
-		const int y =(int)(tempIndex/(gridSize.x));
-		const int x = tempIndex - y*(gridSize.x);
-
-		float3 jacobianGradient=make_float3(0.f,0.f,0.f);
-		tempIndex=26;
-		for(int pixelZ=(int)(z-1); pixelZ<(int)(z+2); ++pixelZ){
-			if(pixelZ>0 && pixelZ<gridSize.z-1){
-
-				for(int pixelY=(int)(y-1); pixelY<(int)(y+2); ++pixelY){
-					if(pixelY>0 && pixelY<gridSize.y-1){
-
-						int jacIndex = (pixelZ*gridSize.y+pixelY)*gridSize.x+x-1;
-						for(int pixelX=(int)(x-1); pixelX<(int)(x+2); ++pixelX){
-							if(pixelX>0 && pixelX<gridSize.x-1){
-
-								float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-								if(detJac>0.f){
-									detJac = 2.f*logf(detJac) / detJac;
-									float jacobianMatrix[9];
-									jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9);
-									jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+1);
-									jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+2);
-									jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+3);
-									jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+4);
-									jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+5);
-									jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+6);
-									jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+7);
-									jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+8);
-
-									getJacobianGradientValues3D(jacobianMatrix,
-															  detJac,
-															  xbasis[tempIndex],
-															  ybasis[tempIndex],
-															  zbasis[tempIndex],
-															  &jacobianGradient);
-								}
-							}
-							jacIndex++;
-							tempIndex--;
-						}
-					}
-					else tempIndex-=3;
-				}
-			}
-			else tempIndex-=9;
-		}
-		gradient[tid] = gradient[tid] + make_float4(c_Weight3.x
-													* (c_AffineMatrix0.x * jacobianGradient.x
-													   + c_AffineMatrix0.y * jacobianGradient.y
-													   + c_AffineMatrix0.z * jacobianGradient.z),
-													c_Weight3.y
-													* (c_AffineMatrix1.x * jacobianGradient.x
-													   + c_AffineMatrix1.y * jacobianGradient.y
-													   + c_AffineMatrix1.z * jacobianGradient.z),
-													c_Weight3.z
-													* (c_AffineMatrix2.x * jacobianGradient.x
-													   + c_AffineMatrix2.y * jacobianGradient.y
-													   + c_AffineMatrix2.z * jacobianGradient.z),
-													0.f);
-
-	}
+__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) {
+    __shared__ float xbasis[27];
+    __shared__ float ybasis[27];
+    __shared__ float zbasis[27];
+
+    if (threadIdx.x < 27)
+        GetFirstDerivativeBasisValues3D(threadIdx.x, xbasis, ybasis, zbasis);
+    __syncthreads();
+
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float3 jacobianGradient{};
+        tempIndex = 26;
+        for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
+            if (pixelZ > 0 && pixelZ < gridSize.z - 1) {
+                for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
+                    if (pixelY > 0 && pixelY < gridSize.y - 1) {
+                        int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + x - 1;
+                        for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
+                            if (pixelX > 0 && pixelX < gridSize.x - 1) {
+                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                                if (detJac > 0.f) {
+                                    detJac = 2.f * logf(detJac) / detJac;
+                                    float jacobianMatrix[9];
+                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9);
+                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 1);
+                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 2);
+                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 3);
+                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 4);
+                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 5);
+                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6);
+                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7);
+                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8);
+                                    getJacobianGradientValues3D(jacobianMatrix,
+                                                                detJac,
+                                                                xbasis[tempIndex],
+                                                                ybasis[tempIndex],
+                                                                zbasis[tempIndex],
+                                                                &jacobianGradient);
+                                }
+                            }
+                            jacIndex++;
+                            tempIndex--;
+                        }
+                    } else tempIndex -= 3;
+                }
+            } else tempIndex -= 9;
+        }
+
+        gradient[tid] = gradient[tid] + make_float4(
+            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z),
+            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z),
+            c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z),
+            0.f);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int y = tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		float2 jacobianGradient=make_float2(0.f,0.f);
-
-		float3 spacingVoxel = c_ControlPointVoxelSpacing;
-
-		for(int pixelY=(int)ceilf((y-3)*spacingVoxel.y);
-			pixelY<=(int)ceilf((y+1)*spacingVoxel.y);
-			++pixelY){
-			if(pixelY>-1 && pixelY<c_ReferenceImageDim.y){
-
-				int yPre = (int)((float)pixelY/spacingVoxel.y);
-				float basis = (float)pixelY/spacingVoxel.y - (float)yPre;
-				float yBasis, yFirst;
-				getBSplineBasisValue(basis,y-yPre,&yBasis,&yFirst);
-
-				for(int pixelX=(int)ceilf((x-3)*spacingVoxel.x);
-					pixelX<=(int)ceilf((x+1)*spacingVoxel.x);
-					++pixelX){
-					if(pixelX>-1 && pixelX<c_ReferenceImageDim.x && (yFirst!=0.f || yBasis!=0.f)){
-
-						int xPre = (int)((float)pixelX/spacingVoxel.x);
-						basis = (float)pixelX/spacingVoxel.x - (float)xPre;
-						float xBasis, xFirst;
-						getBSplineBasisValue(basis,x-xPre,&xBasis,&xFirst);
-
-						int jacIndex = pixelY*c_ReferenceImageDim.x + pixelX;
-
-						float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-						if(detJac>0.f && (xFirst!=0.f || xBasis!=0.f)){
-							detJac = 2.f*logf(detJac) / detJac;
-							float jacobianMatrix[4];
-							jacIndex *= 4;
-							jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-							jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-							jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-							jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex);
-
-							float2 basisValues = make_float2(
-										xFirst*yBasis,
-										xBasis*yFirst);
-							getJacobianGradientValues2D(jacobianMatrix,
-														detJac,
-														basisValues.x,
-														basisValues.y,
-														&jacobianGradient);
-						}
-					}
-				}
-			}
-		}
-		gradient[tid] = gradient[tid] + make_float4(
-						c_Weight3.x
-						* (c_AffineMatrix0.x * jacobianGradient.x
-						   + c_AffineMatrix0.y * jacobianGradient.y),
-						c_Weight3.y
-						* (c_AffineMatrix1.x * jacobianGradient.x
-						   + c_AffineMatrix1.y * jacobianGradient.y),
-						0.f,
-						0.f);
-   }
+__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float2 jacobianGradient{};
+        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
+
+        for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
+            if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) {
+                const int yPre = (int)((float)pixelY / spacingVoxel.y);
+                float basis = (float)pixelY / spacingVoxel.y - (float)yPre;
+                float yBasis, yFirst;
+                GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
+
+                for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
+                    if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                        const int xPre = (int)((float)pixelX / spacingVoxel.x);
+                        basis = (float)pixelX / spacingVoxel.x - (float)xPre;
+                        float xBasis, xFirst;
+                        GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
+
+                        int jacIndex = pixelY * c_ReferenceImageDim.x + pixelX;
+                        float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+
+                        if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
+                            detJac = 2.f * logf(detJac) / detJac;
+                            float jacobianMatrix[4];
+                            jacIndex *= 4;
+                            jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+                            const float2 basisValues = { xFirst * yBasis, xBasis * yFirst };
+                            getJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient);
+                        }
+                    }
+                }
+            }
+        }
+        gradient[tid] = gradient[tid] + make_float4(
+            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y),
+            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y),
+            0.f, 0.f);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		int tempIndex=tid;
-		const int z = tempIndex/(gridSize.x*gridSize.y);
-		tempIndex  -= z*gridSize.x*gridSize.y;
-		const int y = tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		float3 jacobianGradient=make_float3(0.f,0.f,0.f);
-
-		float3 spacingVoxel = c_ControlPointVoxelSpacing;
-
-		for(int pixelZ=(int)ceilf((z-3)*spacingVoxel.z);
-			pixelZ<=(int)ceilf((z+1)*spacingVoxel.z);
-			++pixelZ){
-			if(pixelZ>-1 && pixelZ<c_ReferenceImageDim.z){
-
-				int zPre = (int)((float)pixelZ/spacingVoxel.z);
-				float basis = (float)pixelZ/spacingVoxel.z - (float)zPre;
-				float zBasis, zFirst;
-				getBSplineBasisValue(basis,z-zPre,&zBasis,&zFirst);
-
-				for(int pixelY=(int)ceilf((y-3)*spacingVoxel.y);
-					pixelY<=(int)ceilf((y+1)*spacingVoxel.y);
-					++pixelY){
-					if(pixelY>-1 && pixelY<c_ReferenceImageDim.y && (zFirst!=0.f || zBasis!=0.f)){
-
-						int yPre = (int)((float)pixelY/spacingVoxel.y);
-						basis = (float)pixelY/spacingVoxel.y - (float)yPre;
-						float yBasis, yFirst;
-						getBSplineBasisValue(basis,y-yPre,&yBasis,&yFirst);
-
-						for(int pixelX=(int)ceilf((x-3)*spacingVoxel.x);
-							pixelX<=(int)ceilf((x+1)*spacingVoxel.x);
-							++pixelX){
-							if(pixelX>-1 && pixelX<c_ReferenceImageDim.x && (yFirst!=0.f || yBasis!=0.f)){
-
-								int xPre = (int)((float)pixelX/spacingVoxel.x);
-								basis = (float)pixelX/spacingVoxel.x - (float)xPre;
-								float xBasis, xFirst;
-								getBSplineBasisValue(basis,x-xPre,&xBasis,&xFirst);
-
-								int jacIndex = (pixelZ*c_ReferenceImageDim.y+pixelY)*c_ReferenceImageDim.x + pixelX;
-
-								float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-								if(detJac>0.f && (xFirst!=0.f || xBasis!=0.f)){
-									detJac = 2.f*logf(detJac) / detJac;
-									float jacobianMatrix[9];
-									jacIndex *= 9;
-									jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex);
-
-									float3 basisValues = make_float3(
-											xFirst*yBasis*zBasis,
-											xBasis*yFirst*zBasis,
-											xBasis*yBasis*zFirst);
-									getJacobianGradientValues3D(jacobianMatrix,
-															  detJac,
-															  basisValues.x,
-															  basisValues.y,
-															  basisValues.z,
-															  &jacobianGradient);
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-		gradient[tid] = gradient[tid] + make_float4(
-						c_Weight3.x
-						* (c_AffineMatrix0.x * jacobianGradient.x
-						   + c_AffineMatrix0.y * jacobianGradient.y
-						   + c_AffineMatrix0.z * jacobianGradient.z),
-						c_Weight3.y
-						* (c_AffineMatrix1.x * jacobianGradient.x
-						   + c_AffineMatrix1.y * jacobianGradient.y
-						   + c_AffineMatrix1.z * jacobianGradient.z),
-						c_Weight3.z
-						* (c_AffineMatrix2.x * jacobianGradient.x
-						   + c_AffineMatrix2.y * jacobianGradient.y
-						   + c_AffineMatrix2.z * jacobianGradient.z),
-						0.f);
-   }
+__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float3 jacobianGradient{};
+        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
+
+        for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ <= (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) {
+            if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) {
+                const int zPre = (int)((float)pixelZ / spacingVoxel.z);
+                float basis = (float)pixelZ / spacingVoxel.z - (float)zPre;
+                float zBasis, zFirst;
+                GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst);
+
+                for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
+                    if (pixelY > -1 && pixelY < c_ReferenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) {
+                        const int yPre = (int)((float)pixelY / spacingVoxel.y);
+                        basis = (float)pixelY / spacingVoxel.y - (float)yPre;
+                        float yBasis, yFirst;
+                        GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
+
+                        for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
+                            if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                                const int xPre = (int)((float)pixelX / spacingVoxel.x);
+                                basis = (float)pixelX / spacingVoxel.x - (float)xPre;
+                                float xBasis, xFirst;
+                                GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
+
+                                int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX;
+                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+
+                                if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
+                                    detJac = 2.f * logf(detJac) / detJac;
+                                    float jacobianMatrix[9];
+                                    jacIndex *= 9;
+                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+
+                                    const float3 basisValues = {
+                                        xFirst * yBasis * zBasis,
+                                        xBasis * yFirst * zBasis,
+                                        xBasis * yBasis * zFirst
+                                    };
+                                    getJacobianGradientValues3D(jacobianMatrix,
+                                                                detJac,
+                                                                basisValues.x,
+                                                                basisValues.y,
+                                                                basisValues.z,
+                                                                &jacobianGradient);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        gradient[tid] = gradient[tid] + make_float4(
+            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z),
+            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z),
+            c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z),
+            0.f);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(gridSize.x*gridSize.y);
-		tempIndex  -= z*gridSize.x*gridSize.y;
-		const int y = tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		float3 foldingCorrection=make_float3(0.f,0.f,0.f);
-		for(int pixelZ=(int)(z-1); pixelZ<(int)(z+2); ++pixelZ){
-			if(pixelZ>0 && pixelZ<gridSize.z-1){
-
-				for(int pixelY=(int)(y-1); pixelY<(int)(y+2); ++pixelY){
-					if(pixelY>0 && pixelY<gridSize.y-1){
-
-						for(int pixelX=(int)(x-1); pixelX<(int)(x+2); ++pixelX){
-							if(pixelX>0 && pixelX<gridSize.x-1){
-
-								int jacIndex = (pixelZ*gridSize.y+pixelY)*gridSize.x+pixelX;
-								float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-								if(detJac<=0.f){
-
-									float jacobianMatrix[9];
-									jacIndex*=9;
-									jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex);
-
-									float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
-									getBSplineBasisValue(0.f,x-pixelX+1,&xBasis,&xFirst);
-									getBSplineBasisValue(0.f,y-pixelY+1,&yBasis,&yFirst);
-									getBSplineBasisValue(0.f,z-pixelZ+1,&zBasis,&zFirst);
-
-									float3 basisValue = make_float3(
-											xFirst*yBasis*zBasis,
-											xBasis*yFirst*zBasis,
-											xBasis*yBasis*zFirst);
-
-									getJacobianGradientValues3D(jacobianMatrix,
-															  1.f,
-															  basisValue.x,
-															  basisValue.y,
-															  basisValue.z,
-															  &foldingCorrection);
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-		if(foldingCorrection.x!=0.f && foldingCorrection.y!=0.f && foldingCorrection.z!=0.f){
-			float3 gradient = make_float3(
-				c_AffineMatrix0.x * foldingCorrection.x
-				+ c_AffineMatrix0.y * foldingCorrection.y
-				+ c_AffineMatrix0.z * foldingCorrection.z,
-				c_AffineMatrix1.x * foldingCorrection.x
-			   + c_AffineMatrix1.y * foldingCorrection.y
-			   + c_AffineMatrix1.z * foldingCorrection.z,
-			   c_AffineMatrix2.x * foldingCorrection.x
-			   + c_AffineMatrix2.y * foldingCorrection.y
-			   + c_AffineMatrix2.z * foldingCorrection.z);
-
-			float norm = 5.f * sqrtf(gradient.x*gradient.x
-									 + gradient.y*gradient.y
-									 + gradient.z*gradient.z);
-			controlPointGrid_d[tid] = controlPointGrid_d[tid] +
-									  make_float4(gradient.x*c_ControlPointSpacing.x/norm,
-												  gradient.y*c_ControlPointSpacing.y/norm,
-												  gradient.z*c_ControlPointSpacing.z/norm,
-												  0.f);
-		}
-	}
+__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        float3 foldingCorrection{};
+        for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
+            if (pixelZ > 0 && pixelZ < gridSize.z - 1) {
+                for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
+                    if (pixelY > 0 && pixelY < gridSize.y - 1) {
+                        for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
+                            if (pixelX > 0 && pixelX < gridSize.x - 1) {
+                                int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + pixelX;
+                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                                if (detJac <= 0.f) {
+                                    float jacobianMatrix[9];
+                                    jacIndex *= 9;
+                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+
+                                    float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
+                                    GetBSplineBasisValue(0.f, x - pixelX + 1, &xBasis, &xFirst);
+                                    GetBSplineBasisValue(0.f, y - pixelY + 1, &yBasis, &yFirst);
+                                    GetBSplineBasisValue(0.f, z - pixelZ + 1, &zBasis, &zFirst);
+
+                                    const float3 basisValue = {
+                                        xFirst * yBasis * zBasis,
+                                        xBasis * yFirst * zBasis,
+                                        xBasis * yBasis * zFirst
+                                    };
+                                    getJacobianGradientValues3D(jacobianMatrix,
+                                                                1.f,
+                                                                basisValue.x,
+                                                                basisValue.y,
+                                                                basisValue.z,
+                                                                &foldingCorrection);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) {
+            const float3 gradient = {
+                c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z,
+                c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z,
+                c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z
+            };
+            const float norm = 5 * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z);
+            controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm,
+                                                                            gradient.y * c_ControlPointSpacing.y / norm,
+                                                                            gradient.z * c_ControlPointSpacing.z / norm,
+                                                                            0.f);
+        }
+    }
 }
 /* *************************************************************** */
-__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ControlPointNumber){
-
-		int3 gridSize = c_ControlPointImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(gridSize.x*gridSize.y);
-		tempIndex  -= z*gridSize.x*gridSize.y;
-		const int y = tempIndex/gridSize.x;
-		const int x = tempIndex - y*gridSize.x;
-
-		float3 spacingVoxel = c_ControlPointVoxelSpacing;
-		float3 foldingCorrection=make_float3(0.f,0.f,0.f);
-
-		for(int pixelZ=(int)ceilf((z-3)*spacingVoxel.z);
-			pixelZ<(int)ceilf((z+1)*spacingVoxel.z);
-			++pixelZ){
-			if(pixelZ>-1 && pixelZ<c_ReferenceImageDim.z){
-
-				for(int pixelY=(int)ceilf((y-3)*spacingVoxel.y);
-					pixelY<(int)ceilf((y+1)*spacingVoxel.y);
-					++pixelY){
-					if(pixelY>-1 && pixelY<c_ReferenceImageDim.y){
-
-						for(int pixelX=(int)ceilf((x-3)*spacingVoxel.x);
-							pixelX<(int)ceilf((x+1)*spacingVoxel.x);
-							++pixelX){
-							if(pixelX>-1 && pixelX<c_ReferenceImageDim.x){
-
-								int jacIndex = (pixelZ*c_ReferenceImageDim.y+pixelY)*c_ReferenceImageDim.x+pixelX;
-								float detJac = tex1Dfetch(jacobianDeterminantTexture,jacIndex);
-
-								if(detJac<=0.f){
-
-									float jacobianMatrix[9];
-									jacIndex*=9;
-									jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex++);
-									jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex);
-
-									float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
-									int pre=(int)((float)pixelX/spacingVoxel.x);
-									float basis=(float)pixelX/spacingVoxel.x-(float)pre;
-									getBSplineBasisValue(basis,x-pre,&xBasis,&xFirst);
-									pre=(int)((float)pixelY/spacingVoxel.y);
-									basis=(float)pixelY/spacingVoxel.y-(float)pre;
-									getBSplineBasisValue(basis,y-pre,&yBasis,&yFirst);
-									pre=(int)((float)pixelZ/spacingVoxel.z);
-									basis=(float)pixelZ/spacingVoxel.z-(float)pre;
-									getBSplineBasisValue(basis,z-pre,&zBasis,&zFirst);
-
-									float3 basisValue = make_float3(
-											xFirst*yBasis*zBasis,
-											xBasis*yFirst*zBasis,
-											xBasis*yBasis*zFirst);
-
-									getJacobianGradientValues3D(jacobianMatrix,
-															  1.f,
-															  basisValue.x,
-															  basisValue.y,
-															  basisValue.z,
-															  &foldingCorrection);
-								}
-							}
-						}
-					}
-				}
-			}
-		}
-		if(foldingCorrection.x!=0.f && foldingCorrection.y!=0.f && foldingCorrection.z!=0.f){
-			float3 gradient = make_float3(
-				c_AffineMatrix0.x * foldingCorrection.x
-				+ c_AffineMatrix0.y * foldingCorrection.y
-				+ c_AffineMatrix0.z * foldingCorrection.z,
-				c_AffineMatrix1.x * foldingCorrection.x
-			   + c_AffineMatrix1.y * foldingCorrection.y
-			   + c_AffineMatrix1.z * foldingCorrection.z,
-			   c_AffineMatrix2.x * foldingCorrection.x
-			   + c_AffineMatrix2.y * foldingCorrection.y
-			   + c_AffineMatrix2.z * foldingCorrection.z);
-
-			float norm = 5.f * sqrtf(gradient.x*gradient.x
-									 + gradient.y*gradient.y
-									 + gradient.z*gradient.z);
-			controlPointGrid_d[tid] = controlPointGrid_d[tid] +
-									  make_float4(gradient.x*c_ControlPointSpacing.x/norm,
-												  gradient.y*c_ControlPointSpacing.y/norm,
-												  gradient.z*c_ControlPointSpacing.z/norm,
-												  0.f);
-		}
-	}
+__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ControlPointNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (gridSize.x * gridSize.y);
+        tempIndex -= z * gridSize.x * gridSize.y;
+        const int y = tempIndex / gridSize.x;
+        const int x = tempIndex - y * gridSize.x;
+
+        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
+        float3 foldingCorrection{};
+
+        for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ < (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) {
+            if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) {
+                for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY < (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
+                    if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) {
+                        for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX < (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
+                            if (pixelX > -1 && pixelX < c_ReferenceImageDim.x) {
+                                int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX;
+                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                                if (detJac <= 0.f) {
+                                    float jacobianMatrix[9];
+                                    jacIndex *= 9;
+                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+
+                                    float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
+                                    int pre = (int)((float)pixelX / spacingVoxel.x);
+                                    float basis = (float)pixelX / spacingVoxel.x - (float)pre;
+                                    GetBSplineBasisValue(basis, x - pre, &xBasis, &xFirst);
+                                    pre = (int)((float)pixelY / spacingVoxel.y);
+                                    basis = (float)pixelY / spacingVoxel.y - (float)pre;
+                                    GetBSplineBasisValue(basis, y - pre, &yBasis, &yFirst);
+                                    pre = (int)((float)pixelZ / spacingVoxel.z);
+                                    basis = (float)pixelZ / spacingVoxel.z - (float)pre;
+                                    GetBSplineBasisValue(basis, z - pre, &zBasis, &zFirst);
+
+                                    const float3 basisValue = {
+                                        xFirst * yBasis * zBasis,
+                                        xBasis * yFirst * zBasis,
+                                        xBasis * yBasis * zFirst
+                                    };
+                                    getJacobianGradientValues3D(jacobianMatrix,
+                                                                1.f,
+                                                                basisValue.x,
+                                                                basisValue.y,
+                                                                basisValue.z,
+                                                                &foldingCorrection);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) {
+            const float3 gradient = {
+                c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z,
+                c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z,
+                c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z
+            };
+            const float norm = 5.f * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z);
+            controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm,
+                                                                            gradient.y * c_ControlPointSpacing.y / norm,
+                                                                            gradient.z * c_ControlPointSpacing.z / norm,
+                                                                            0.f);
+        }
+    }
 }
 /* *************************************************************** */
-__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(imageSize.x*imageSize.y);
-		tempIndex  -= z*imageSize.x*imageSize.y;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		float4 initialPosition;
-		initialPosition.x=x*c_AffineMatrix0b.x + y*c_AffineMatrix0b.y + z*c_AffineMatrix0b.z + c_AffineMatrix0b.w;
-		initialPosition.y=x*c_AffineMatrix1b.x + y*c_AffineMatrix1b.y + z*c_AffineMatrix1b.z + c_AffineMatrix1b.w;
-		initialPosition.z=x*c_AffineMatrix2b.x + y*c_AffineMatrix2b.y + z*c_AffineMatrix2b.z + c_AffineMatrix2b.w;
-		initialPosition.w=0.f;
-
-		imageArray_d[tid] = imageArray_d[tid] + initialPosition;
-	}
+__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const int3 imageSize = c_ReferenceImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (imageSize.x * imageSize.y);
+        tempIndex -= z * imageSize.x * imageSize.y;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        const float4 initialPosition = {
+            x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
+            x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
+            x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
+            0.f
+        };
+
+        imageArray_d[tid] = imageArray_d[tid] + initialPosition;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(imageSize.x*imageSize.y);
-		tempIndex  -= z*imageSize.x*imageSize.y;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		float4 initialPosition;
-		initialPosition.x=x*c_AffineMatrix0b.x + y*c_AffineMatrix0b.y + z*c_AffineMatrix0b.z + c_AffineMatrix0b.w;
-		initialPosition.y=x*c_AffineMatrix1b.x + y*c_AffineMatrix1b.y + z*c_AffineMatrix1b.z + c_AffineMatrix1b.w;
-		initialPosition.z=x*c_AffineMatrix2b.x + y*c_AffineMatrix2b.y + z*c_AffineMatrix2b.z + c_AffineMatrix2b.w;
-		initialPosition.w=0.f;
-
-		imageArray_d[tid] = imageArray_d[tid] - initialPosition;
-	}
+__global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const int3 imageSize = c_ReferenceImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (imageSize.x * imageSize.y);
+        tempIndex -= z * imageSize.x * imageSize.y;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        const float4 initialPosition = {
+            x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
+            x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
+            x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
+            0.f
+        };
+
+        imageArray_d[tid] = imageArray_d[tid] - initialPosition;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose2D_kernel(float4 *outDef)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		// Extract the original voxel position
-		float4 position=outDef[tid];
-
-		// Conversion from real position to voxel coordinate
-		float4 voxelPosition;
-		voxelPosition.x=
-				position.x*c_AffineMatrix0b.x +
-				position.y*c_AffineMatrix0b.y +
-				c_AffineMatrix0b.w;
-		voxelPosition.y=
-				position.x*c_AffineMatrix1b.x +
-				position.y*c_AffineMatrix1b.y +
-				c_AffineMatrix1b.w;
-		voxelPosition.z=0.f;
-		voxelPosition.w=0.f;
-
-		// linear interpolation
-		int2 ante=make_int2(floorf(voxelPosition.x),
-							floorf(voxelPosition.y));
-
-		float relX[2], relY[2];
-		relX[1]=voxelPosition.x-(float)ante.x;relX[0]=1.f-relX[1];
-		relY[1]=voxelPosition.y-(float)ante.y;relY[0]=1.f-relY[1];
-
-		position=make_float4(0.f,0.f,0.f,0.f);
-
-		for(int b=0;b<2;++b){
-			for(int a=0;a<2;++a){
-				unsigned index=(ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
-				float4 deformation;
-				if((ante.x+a)>-1 && (ante.y+b)>-1 &&
-				   (ante.x+a)<c_ReferenceImageDim.x &&
-				   (ante.y+b)<c_ReferenceImageDim.y){
-					deformation=tex1Dfetch(voxelDeformationTexture,index);
-				}
-				else{
-					deformation = get_SlidedValues_gpu((ante.x+a),
-													   (ante.y+b));
-				}
-				float basis=relX[a]*relY[b];
-				position=position+basis*deformation;
-			}
-		}
-		outDef[tid]=position;
-	}
+__global__ void reg_defField_compose2D_kernel(float4 *outDef) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        // Extract the original voxel position
+        float4 position = outDef[tid];
+
+        // Conversion from real position to voxel coordinate
+        float4 voxelPosition = {
+            position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + c_AffineMatrix0b.w,
+            position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + c_AffineMatrix1b.w,
+            0.f,
+            0.f
+        };
+
+        // Linear interpolation
+        const int2 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y) };
+
+        float relX[2], relY[2];
+        relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
+        relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
+
+        position = make_float4(0.f, 0.f, 0.f, 0.f);
+
+        for (int b = 0; b < 2; ++b) {
+            for (int a = 0; a < 2; ++a) {
+                const int index = (ante.y + b) * c_ReferenceImageDim.x + ante.x + a;
+                float4 deformation;
+                if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x &&
+                    -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y) {
+                    deformation = tex1Dfetch(voxelDeformationTexture, index);
+                } else {
+                    deformation = GetSlidedValues(ante.x + a, ante.y + b);
+                }
+                const float basis = relX[a] * relY[b];
+                position = position + basis * deformation;
+            }
+        }
+        outDef[tid] = position;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose3D_kernel(float4 *outDef)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		// Extract the original voxel position
-		float4 position=outDef[tid];
-
-		// Conversion from real position to voxel coordinate
-		float4 voxelPosition;
-		voxelPosition.x=
-				position.x*c_AffineMatrix0b.x +
-				position.y*c_AffineMatrix0b.y +
-				position.z*c_AffineMatrix0b.z +
-				c_AffineMatrix0b.w;
-		voxelPosition.y=
-				position.x*c_AffineMatrix1b.x +
-				position.y*c_AffineMatrix1b.y +
-				position.z*c_AffineMatrix1b.z +
-				c_AffineMatrix1b.w;
-		voxelPosition.z=
-				position.x*c_AffineMatrix2b.x +
-				position.y*c_AffineMatrix2b.y +
-				position.z*c_AffineMatrix2b.z +
-				c_AffineMatrix2b.w;
-		voxelPosition.w=0.f;
-
-		// linear interpolation
-		int3 ante=make_int3(floorf(voxelPosition.x),
-							floorf(voxelPosition.y),
-							floorf(voxelPosition.z));
-
-		float relX[2], relY[2], relZ[2];
-		relX[1]=voxelPosition.x-(float)ante.x;relX[0]=1.f-relX[1];
-		relY[1]=voxelPosition.y-(float)ante.y;relY[0]=1.f-relY[1];
-		relZ[1]=voxelPosition.z-(float)ante.z;relZ[0]=1.f-relZ[1];
-
-		position=make_float4(0.f,0.f,0.f,0.f);
-
-		for(int c=0;c<2;++c){
-			for(int b=0;b<2;++b){
-				for(int a=0;a<2;++a){
-					unsigned index=((ante.z+c)*c_ReferenceImageDim.y+ante.y+b)*c_ReferenceImageDim.x+ante.x+a;
-					float4 deformation;
-					if((ante.x+a)>-1 && (ante.y+b)>-1 && (ante.z+c)>-1 &&
-					   (ante.x+a)<c_ReferenceImageDim.x &&
-					   (ante.y+b)<c_ReferenceImageDim.y &&
-					   (ante.z+c)<c_ReferenceImageDim.z){
-						deformation=tex1Dfetch(voxelDeformationTexture,index);
-					}
-					else{
-						deformation = get_SlidedValues_gpu((ante.x+a),
-														   (ante.y+b),
-														   (ante.z+c));
-					}
-					float basis=relX[a]*relY[b]*relZ[c];
-					position=position+basis*deformation;
-				}
-			}
-		}
-		outDef[tid]=position;
-	}
+__global__ void reg_defField_compose3D_kernel(float4 *outDef) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        // Extract the original voxel position
+        float4 position = outDef[tid];
+
+        // Conversion from real position to voxel coordinate
+        const float4 voxelPosition = {
+            position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + position.z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
+            position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + position.z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
+            position.x * c_AffineMatrix2b.x + position.y * c_AffineMatrix2b.y + position.z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
+            0.f
+        };
+
+        // Linear interpolation
+        const int3 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y), (int)floorf(voxelPosition.z) };
+
+        float relX[2], relY[2], relZ[2];
+        relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
+        relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
+        relZ[1] = voxelPosition.z - (float)ante.z; relZ[0] = 1.f - relZ[1];
+
+        position = make_float4(0.f, 0.f, 0.f, 0.f);
+
+        for (int c = 0; c < 2; ++c) {
+            for (int b = 0; b < 2; ++b) {
+                for (int a = 0; a < 2; ++a) {
+                    const int index = ((ante.z + c) * c_ReferenceImageDim.y + ante.y + b) * c_ReferenceImageDim.x + ante.x + a;
+                    float4 deformation;
+                    if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x &&
+                        -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y &&
+                        -1 < ante.z + c && ante.z + c < c_ReferenceImageDim.z) {
+                        deformation = tex1Dfetch(voxelDeformationTexture, index);
+                    } else {
+                        deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c);
+                    }
+                    const float basis = relX[a] * relY[b] * relZ[c];
+                    position = position + basis * deformation;
+                }
+            }
+        }
+        outDef[tid] = position;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices)
-{
-	const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_VoxelNumber){
-
-		int3 imageSize = c_ReferenceImageDim;
-
-		unsigned tempIndex=tid;
-		const int z = tempIndex/(imageSize.x*imageSize.y);
-		tempIndex  -= z*imageSize.x*imageSize.y;
-		const int y = tempIndex/imageSize.x;
-		const int x = tempIndex - y*imageSize.x;
-
-		if(x==imageSize.x-1 ||
-		   y==imageSize.y-1 ||
-		   z==imageSize.z-1 ){
-			int index=tid*9;
-			jacobianMatrices[index++]=1;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index++]=1;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index++]=0;
-			jacobianMatrices[index]=1;
-			return;
-		}
-
-		float matrix[9];
-		int index=(z*imageSize.y+y)*imageSize.x+x;
-		float4 deformation = tex1Dfetch(voxelDeformationTexture,index);
-		matrix[0] = deformation.x * -1.f;
-		matrix[1] = deformation.x * -1.f;
-		matrix[2] = deformation.x * -1.f;
-		matrix[3] = deformation.y * -1.f;
-		matrix[4] = deformation.y * -1.f;
-		matrix[5] = deformation.y * -1.f;
-		matrix[6] = deformation.z * -1.f;
-		matrix[7] = deformation.z * -1.f;
-		matrix[8] = deformation.z * -1.f;
-		deformation = tex1Dfetch(voxelDeformationTexture,index+1);
-		matrix[0] += deformation.x * 1.f;
-		matrix[3] += deformation.y * 1.f;
-		matrix[6] += deformation.z * 1.f;
-		index=(z*imageSize.y+y+1)*imageSize.x+x;
-		deformation = tex1Dfetch(voxelDeformationTexture,index);
-		matrix[1] += deformation.x * 1.f;
-		matrix[4] += deformation.y * 1.f;
-		matrix[7] += deformation.z * 1.f;
-		index=((z+1)*imageSize.y+y)*imageSize.x+x;
-		deformation = tex1Dfetch(voxelDeformationTexture,index);
-		matrix[2] += deformation.x * 1.f;
-		matrix[5] += deformation.y * 1.f;
-		matrix[8] += deformation.z * 1.f;
-
-		index=tid*9;
-		jacobianMatrices[index++]=c_AffineMatrix0.x*matrix[0] + c_AffineMatrix0.y*matrix[3] + c_AffineMatrix0.z*matrix[6];
-		jacobianMatrices[index++]=c_AffineMatrix0.x*matrix[1] + c_AffineMatrix0.y*matrix[4] + c_AffineMatrix0.z*matrix[7];
-		jacobianMatrices[index++]=c_AffineMatrix0.x*matrix[2] + c_AffineMatrix0.y*matrix[5] + c_AffineMatrix0.z*matrix[8];
-		jacobianMatrices[index++]=c_AffineMatrix1.x*matrix[0] + c_AffineMatrix1.y*matrix[3] + c_AffineMatrix1.z*matrix[6];
-		jacobianMatrices[index++]=c_AffineMatrix1.x*matrix[1] + c_AffineMatrix1.y*matrix[4] + c_AffineMatrix1.z*matrix[7];
-		jacobianMatrices[index++]=c_AffineMatrix1.x*matrix[2] + c_AffineMatrix1.y*matrix[5] + c_AffineMatrix1.z*matrix[8];
-		jacobianMatrices[index++]=c_AffineMatrix2.x*matrix[0] + c_AffineMatrix2.y*matrix[3] + c_AffineMatrix2.z*matrix[6];
-		jacobianMatrices[index++]=c_AffineMatrix2.x*matrix[1] + c_AffineMatrix2.y*matrix[4] + c_AffineMatrix2.z*matrix[7];
-		jacobianMatrices[index] = c_AffineMatrix2.x*matrix[2] + c_AffineMatrix2.y*matrix[5] + c_AffineMatrix2.z*matrix[8];
-	}
+__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        const int3 imageSize = c_ReferenceImageDim;
+
+        int tempIndex = tid;
+        const int z = tempIndex / (imageSize.x * imageSize.y);
+        tempIndex -= z * imageSize.x * imageSize.y;
+        const int y = tempIndex / imageSize.x;
+        const int x = tempIndex - y * imageSize.x;
+
+        if (x == imageSize.x - 1 || y == imageSize.y - 1 || z == imageSize.z - 1) {
+            int index = tid * 9;
+            jacobianMatrices[index++] = 1;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index++] = 1;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index++] = 0;
+            jacobianMatrices[index] = 1;
+            return;
+        }
+
+        int index = (z * imageSize.y + y) * imageSize.x + x;
+        float4 deformation = tex1Dfetch(voxelDeformationTexture, index);
+        float matrix[9] = {
+            -deformation.x, -deformation.x, -deformation.x,
+            -deformation.y, -deformation.y, -deformation.y,
+            -deformation.z, -deformation.z, -deformation.z
+        };
+        deformation = tex1Dfetch(voxelDeformationTexture, index + 1);
+        matrix[0] += deformation.x;
+        matrix[3] += deformation.y;
+        matrix[6] += deformation.z;
+        index = (z * imageSize.y + y + 1) * imageSize.x + x;
+        deformation = tex1Dfetch(voxelDeformationTexture, index);
+        matrix[1] += deformation.x;
+        matrix[4] += deformation.y;
+        matrix[7] += deformation.z;
+        index = ((z + 1) * imageSize.y + y) * imageSize.x + x;
+        deformation = tex1Dfetch(voxelDeformationTexture, index);
+        matrix[2] += deformation.x;
+        matrix[5] += deformation.y;
+        matrix[8] += deformation.z;
+
+        index = tid * 9;
+        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[0] + c_AffineMatrix0.y * matrix[3] + c_AffineMatrix0.z * matrix[6];
+        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[1] + c_AffineMatrix0.y * matrix[4] + c_AffineMatrix0.z * matrix[7];
+        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[2] + c_AffineMatrix0.y * matrix[5] + c_AffineMatrix0.z * matrix[8];
+        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[0] + c_AffineMatrix1.y * matrix[3] + c_AffineMatrix1.z * matrix[6];
+        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[1] + c_AffineMatrix1.y * matrix[4] + c_AffineMatrix1.z * matrix[7];
+        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[2] + c_AffineMatrix1.y * matrix[5] + c_AffineMatrix1.z * matrix[8];
+        jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[0] + c_AffineMatrix2.y * matrix[3] + c_AffineMatrix2.z * matrix[6];
+        jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[1] + c_AffineMatrix2.y * matrix[4] + c_AffineMatrix2.z * matrix[7];
+        jacobianMatrices[index] = c_AffineMatrix2.x * matrix[2] + c_AffineMatrix2.y * matrix[5] + c_AffineMatrix2.z * matrix[8];
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */

From 2cccd64b0b4ede2625f472d4cbeda36f545ee07e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 12 Jun 2023 16:15:24 +0100
Subject: [PATCH 136/314] Refactor reg_localTransformation_gpu.cu

---
 niftyreg_build_version.txt                   |    2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu | 1484 ++++++++----------
 2 files changed, 691 insertions(+), 795 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ace9d036..9183bf03 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-255
+256
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 92a3f35d..2b95f454 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -13,834 +13,730 @@
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_localTransformation_kernels.cu"
 
-/* *************************************************************** */
 /* *************************************************************** */
 void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
-										nifti_image *reference,
-										float4 *controlPointImageArray_d,
-										float4 *positionFieldImageArray_d,
-										int *mask_d,
-										int activeVoxelNumber,
-										bool bspline)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	const int voxelNumber = CalcVoxelNumber(*reference);
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const int useBSpline = static_cast<int>(bspline);
-
-	const float3 controlPointVoxelSpacing = make_float3(
-		controlPointImage->dx / reference->dx,
-		controlPointImage->dy / reference->dy,
-		controlPointImage->dz / reference->dz);
-
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int)));
-
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int)));
-
-	if(reference->nz>1){
-		const unsigned Grid_reg_spline_getDeformationField3D =
-			(unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField3D)));
-		dim3 G1(Grid_reg_spline_getDeformationField3D,Grid_reg_spline_getDeformationField3D,1);
-		dim3 B1(blockSize->reg_spline_getDeformationField3D,1,1);
-		// 8 floats of shared memory are allocated per thread
-		reg_spline_getDeformationField3D
-				<<< G1, B1, blockSize->reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		const unsigned Grid_reg_spline_getDeformationField2D =
-			(unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField2D)));
-		dim3 G1(Grid_reg_spline_getDeformationField2D,Grid_reg_spline_getDeformationField2D,1);
-		dim3 B1(blockSize->reg_spline_getDeformationField2D,1,1);
-		// 4 floats of shared memory are allocated per thread
-		reg_spline_getDeformationField2D
-				<<< G1, B1, blockSize->reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+                                        nifti_image *reference,
+                                        float4 *controlPointImageArray_d,
+                                        float4 *positionFieldImageArray_d,
+                                        int *mask_d,
+                                        int activeVoxelNumber,
+                                        bool bspline) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    const int voxelNumber = CalcVoxelNumber(*reference);
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int useBSpline = static_cast<int>(bspline);
+
+    const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / reference->dx,
+                                                        controlPointImage->dy / reference->dy,
+                                                        controlPointImage->dz / reference->dz);
+
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline, &useBSpline, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
+
+    if (reference->nz > 1) {
+        const unsigned Grid_reg_spline_getDeformationField3D =
+            (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField3D)));
+        dim3 G1(Grid_reg_spline_getDeformationField3D, Grid_reg_spline_getDeformationField3D, 1);
+        dim3 B1(blockSize->reg_spline_getDeformationField3D, 1, 1);
+        // 8 floats of shared memory are allocated per thread
+        reg_spline_getDeformationField3D<<<G1, B1, blockSize->reg_spline_getDeformationField3D * 8 * sizeof(float)>>>(positionFieldImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned Grid_reg_spline_getDeformationField2D =
+            (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField2D)));
+        dim3 G1(Grid_reg_spline_getDeformationField2D, Grid_reg_spline_getDeformationField2D, 1);
+        dim3 B1(blockSize->reg_spline_getDeformationField2D, 1, 1);
+        // 4 floats of shared memory are allocated per thread
+        reg_spline_getDeformationField2D<<<G1, B1, blockSize->reg_spline_getDeformationField2D * 4 * sizeof(float)>>>(positionFieldImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
-/* *************************************************************** */
-float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const int controlPointGridMem = controlPointNumber*sizeof(float4);
-
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
-
-	// First compute all the second derivatives
-	float4 *secondDerivativeValues_d;
-	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem));
-		const unsigned Grid_bspline_getApproxSecondDerivatives =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
-		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1);
-		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem));
-		const unsigned Grid_bspline_getApproxSecondDerivatives =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
-		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1);
-		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
-
-	// Compute the bending energy from the second derivatives
-	float *penaltyTerm_d;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float)));
-
-	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
-										  secondDerivativeValues_d,
-										  6*controlPointGridMem));
-		const unsigned Grid_reg_spline_ApproxBendingEnergy =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy3D)));
-		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
-		dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D,1,1);
-		reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2);
-	}
-	else{
-		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
-										  secondDerivativeValues_d,
-										  3*controlPointGridMem));
-		const unsigned Grid_reg_spline_ApproxBendingEnergy =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy2D)));
-		dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1);
-		dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D,1,1);
-		reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
-	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
-
-	// Compute the mean bending energy value
-	double penaltyValue=reg_sumReduction_gpu(penaltyTerm_d,controlPointNumber);
-	NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d));
-
-	return (float)(penaltyValue/(double)controlPointImage->nvox);
+float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int controlPointGridMem = controlPointNumber * sizeof(float4);
+
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+
+    // First compute all the second derivatives
+    float4 *secondDerivativeValues_d;
+    if (controlPointImage->nz > 1) {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointGridMem));
+        const unsigned Grid_bspline_getApproxSecondDerivatives =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
+        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
+        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1);
+        reg_spline_getApproxSecondDerivatives3D<<<G1, B1>>>(secondDerivativeValues_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointGridMem));
+        const unsigned Grid_bspline_getApproxSecondDerivatives =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
+        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
+        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1);
+        reg_spline_getApproxSecondDerivatives2D<<<G1, B1>>>(secondDerivativeValues_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+
+    // Compute the bending energy from the second derivatives
+    float *penaltyTerm_d;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber * sizeof(float)));
+
+    if (controlPointImage->nz > 1) {
+        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointGridMem));
+        const unsigned Grid_reg_spline_ApproxBendingEnergy =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy3D)));
+        dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1);
+        dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D, 1, 1);
+        reg_spline_getApproxBendingEnergy3D_kernel<<<G2, B2>>>(penaltyTerm_d);
+        NR_CUDA_CHECK_KERNEL(G2, B2);
+    } else {
+        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointGridMem));
+        const unsigned Grid_reg_spline_ApproxBendingEnergy =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy2D)));
+        dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1);
+        dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D, 1, 1);
+        reg_spline_getApproxBendingEnergy2D_kernel<<<G2, B2>>>(penaltyTerm_d);
+        NR_CUDA_CHECK_KERNEL(G2, B2);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
+
+    // Compute the mean bending energy value
+    double penaltyValue = reg_sumReduction_gpu(penaltyTerm_d, controlPointNumber);
+    NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d));
+
+    return (float)(penaltyValue / (double)controlPointImage->nvox);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-												float4 *controlPointImageArray_d,
-												float4 *nodeGradientArray_d,
-												float bendingEnergyWeight)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const int controlPointGridMem = controlPointNumber*sizeof(float4);
-
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
-
-	// First compute all the second derivatives
-	float4 *secondDerivativeValues_d;
-	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4)));
-		const unsigned Grid_bspline_getApproxSecondDerivatives =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
-		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1);
-		reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4)));
-		const unsigned Grid_bspline_getApproxSecondDerivatives =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
-		dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1);
-		dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1);
-		reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
-
-	// Compute the gradient
-	bendingEnergyWeight *= 1.f / (float)controlPointNumber;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float)));
-	if(controlPointImage->nz>1){
-		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
-										  secondDerivativeValues_d,
-										  6*controlPointNumber*sizeof(float4)));
-		const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D)));
-		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
-		dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D,1,1);
-		reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2);
-	}
-	else{
-		NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture,
-										  secondDerivativeValues_d,
-										  3*controlPointNumber*sizeof(float4)));
-		const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D)));
-		dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1);
-		dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D,1,1);
-		reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d);
-		NR_CUDA_CHECK_KERNEL(G2,B2);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
-	NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
+                                                float4 *controlPointImageArray_d,
+                                                float4 *nodeGradientArray_d,
+                                                float bendingEnergyWeight) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int controlPointGridMem = controlPointNumber * sizeof(float4);
+
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+
+    // First compute all the second derivatives
+    float4 *secondDerivativeValues_d;
+    if (controlPointImage->nz > 1) {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4)));
+        const unsigned Grid_bspline_getApproxSecondDerivatives =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
+        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
+        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1);
+        reg_spline_getApproxSecondDerivatives3D<<<G1, B1>>>(secondDerivativeValues_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4)));
+        const unsigned Grid_bspline_getApproxSecondDerivatives =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
+        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
+        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1);
+        reg_spline_getApproxSecondDerivatives2D<<<G1, B1>>>(secondDerivativeValues_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+
+    // Compute the gradient
+    bendingEnergyWeight *= 1.f / (float)controlPointNumber;
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &bendingEnergyWeight, sizeof(float)));
+    if (controlPointImage->nz > 1) {
+        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4)));
+        const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D)));
+        dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1);
+        dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D, 1, 1);
+        reg_spline_getApproxBendingEnergyGradient3D_kernel<<<G2, B2>>>(nodeGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G2, B2);
+    } else {
+        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4)));
+        const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D)));
+        dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1);
+        dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D, 1, 1);
+        reg_spline_getApproxBendingEnergyGradient2D_kernel<<<G2, B2>>>(nodeGradientArray_d);
+        NR_CUDA_CHECK_KERNEL(G2, B2);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
-											float4 *controlPointImageArray_d,
-											float *jacobianMatrices_d,
-											float *jacobianDet_d)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-	mat33 reorientation;
-	if(controlPointImage->sform_code>0)
-		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz);
-	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz);
-	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
-
-	// Bind some variables
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	const int controlPointGridMem = controlPointNumber*sizeof(float4);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem));
-
-	// The Jacobian matrix is computed for every control point
-	if(controlPointImage->nz>1){
-		const unsigned Grid_reg_spline_getApproxJacobianValues3D =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues3D)));
-		dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1);
-		dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D,1,1);
-		reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		const unsigned Grid_reg_spline_getApproxJacobianValues2D =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues2D)));
-		dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1);
-		dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D,1,1);
-		reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+                                            float4 *controlPointImageArray_d,
+                                            float *jacobianMatrices_d,
+                                            float *jacobianDet_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
+    mat33 reorientation;
+    if (controlPointImage->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz);
+    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz);
+    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
+
+    // Bind some variables
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
+    const int controlPointGridMem = controlPointNumber * sizeof(float4);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+
+    // The Jacobian matrix is computed for every control point
+    if (controlPointImage->nz > 1) {
+        const unsigned Grid_reg_spline_getApproxJacobianValues3D =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues3D)));
+        dim3 G1(Grid_reg_spline_getApproxJacobianValues3D, Grid_reg_spline_getApproxJacobianValues3D, 1);
+        dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D, 1, 1);
+        reg_spline_getApproxJacobianValues3D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned Grid_reg_spline_getApproxJacobianValues2D =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues2D)));
+        dim3 G1(Grid_reg_spline_getApproxJacobianValues2D, Grid_reg_spline_getApproxJacobianValues2D, 1);
+        dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D, 1, 1);
+        reg_spline_getApproxJacobianValues2D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
 void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
-									   nifti_image *referenceImage,
-									   float4 *controlPointImageArray_d,
-									   float *jacobianMatrices_d,
-									   float *jacobianDet_d)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-	mat33 reorientation;
-	if(controlPointImage->sform_code>0)
-		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz);
-	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz);
-	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
-
-	// Bind some variables
-	const int voxelNumber = CalcVoxelNumber(*referenceImage);
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	const float3 controlPointVoxelSpacing = make_float3(
-			controlPointImage->dx / referenceImage->dx,
-			controlPointImage->dy / referenceImage->dy,
-			controlPointImage->dz / referenceImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4)));
-
-	// The Jacobian matrix is computed for every voxel
-	if(controlPointImage->nz>1){
-		const unsigned Grid_reg_spline_getJacobianValues3D =
-			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues3D)));
-		dim3 G1(Grid_reg_spline_getJacobianValues3D,Grid_reg_spline_getJacobianValues3D,1);
-		dim3 B1(blockSize->reg_spline_getJacobianValues3D,1,1);
-		// 8 floats of shared memory are allocated per thread
-		reg_spline_getJacobianValues3D_kernel
-				<<< G1, B1, blockSize->reg_spline_getJacobianValues3D*8*sizeof(float)>>>
-				(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		const unsigned Grid_reg_spline_getJacobianValues2D =
-			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues2D)));
-		dim3 G1(Grid_reg_spline_getJacobianValues2D,Grid_reg_spline_getJacobianValues2D,1);
-		dim3 B1(blockSize->reg_spline_getJacobianValues2D,1,1);
-		reg_spline_getJacobianValues2D_kernel
-				<<< G1, B1>>>
-				(jacobianMatrices_d, jacobianDet_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
+                                      nifti_image *referenceImage,
+                                      float4 *controlPointImageArray_d,
+                                      float *jacobianMatrices_d,
+                                      float *jacobianDet_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
+    mat33 reorientation;
+    if (controlPointImage->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz);
+    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz);
+    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
+
+    // Bind some variables
+    const int voxelNumber = CalcVoxelNumber(*referenceImage);
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
+    const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
+                                                        controlPointImage->dy / referenceImage->dy,
+                                                        controlPointImage->dz / referenceImage->dz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4)));
+
+    // The Jacobian matrix is computed for every voxel
+    if (controlPointImage->nz > 1) {
+        const unsigned Grid_reg_spline_getJacobianValues3D =
+            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues3D)));
+        dim3 G1(Grid_reg_spline_getJacobianValues3D, Grid_reg_spline_getJacobianValues3D, 1);
+        dim3 B1(blockSize->reg_spline_getJacobianValues3D, 1, 1);
+        // 8 floats of shared memory are allocated per thread
+        reg_spline_getJacobianValues3D_kernel<<<G1, B1, blockSize->reg_spline_getJacobianValues3D * 8 * sizeof(float)>>>(jacobianMatrices_d, jacobianDet_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned Grid_reg_spline_getJacobianValues2D =
+            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues2D)));
+        dim3 G1(Grid_reg_spline_getJacobianValues2D, Grid_reg_spline_getJacobianValues2D, 1);
+        dim3 B1(blockSize->reg_spline_getJacobianValues2D, 1, 1);
+        reg_spline_getJacobianValues2D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
-											 nifti_image *controlPointImage,
-											 float4 *controlPointImageArray_d,
-											 bool approx)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// The Jacobian matrices and determinants are computed
-	float *jacobianMatrices_d;
-	float *jacobianDet_d;
-	int jacNumber;
-	double jacSum;
-	if(approx){
-		jacNumber = CalcVoxelNumber(*controlPointImage);
-		jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2);
-		if(controlPointImage->nz>1){
-			jacSum *= controlPointImage->nz-2;
-			// Allocate array for 3x3 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
-		}
-		else{
-			// Allocate array for 2x2 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
-		}
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeApproxJacobianValues(controlPointImage,
-											   controlPointImageArray_d,
-											   jacobianMatrices_d,
-											   jacobianDet_d);
-	}
-	else{
-		jacNumber = CalcVoxelNumber(*referenceImage);
-		jacSum=jacNumber;
-		if(controlPointImage->nz>1){
-			// Allocate array for 3x3 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
-		}
-		else{
-			// Allocate array for 2x2 matrices
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
-		}
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeJacobianValues(controlPointImage,
-										 referenceImage,
-										 controlPointImageArray_d,
-										 jacobianMatrices_d,
-										 jacobianDet_d);
-	}
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
-
-	// The Jacobian determinant are squared and logged (might not be english but will do)
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)));
-	const unsigned Grid_reg_spline_logSquaredValues =
-		(unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues)));
-	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
-	dim3 B1(blockSize->reg_spline_logSquaredValues,1,1);
-	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
-	// Perform the reduction
-	double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d,jacNumber);
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-	return penaltyTermValue/jacSum;
+                                             nifti_image *controlPointImage,
+                                             float4 *controlPointImageArray_d,
+                                             bool approx) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // The Jacobian matrices and determinants are computed
+    float *jacobianMatrices_d;
+    float *jacobianDet_d;
+    int jacNumber;
+    double jacSum;
+    if (approx) {
+        jacNumber = CalcVoxelNumber(*controlPointImage);
+        jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2);
+        if (controlPointImage->nz > 1) {
+            jacSum *= controlPointImage->nz - 2;
+            // Allocate array for 3x3 matrices
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
+        } else {
+            // Allocate array for 2x2 matrices
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
+        }
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    } else {
+        jacNumber = CalcVoxelNumber(*referenceImage);
+        jacSum = jacNumber;
+        if (controlPointImage->nz > 1) {
+            // Allocate array for 3x3 matrices
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
+        } else {
+            // Allocate array for 2x2 matrices
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
+        }
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    }
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+
+    // The Jacobian determinant are squared and logged (might not be english but will do)
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int)));
+    const unsigned Grid_reg_spline_logSquaredValues =
+        (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues)));
+    dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1);
+    dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1);
+    reg_spline_logSquaredValues_kernel<<<G1, B1>>>(jacobianDet_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+
+    // Perform the reduction
+    double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d, jacNumber);
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+    return penaltyTermValue / jacSum;
 }
 /* *************************************************************** */
 void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
-												   nifti_image *controlPointImage,
-												   float4 *controlPointImageArray_d,
-												   float4 *nodeGradientArray_d,
-												   float jacobianWeight,
-												   bool approx)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// The Jacobian matrices and determinants are computed
-	float *jacobianMatrices_d;
-	float *jacobianDet_d;
-	int jacNumber;
-	if(approx){
-		jacNumber=CalcVoxelNumber(*controlPointImage);
-		if(controlPointImage->nz>1)
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeApproxJacobianValues(controlPointImage,
-												controlPointImageArray_d,
-												jacobianMatrices_d,
-												jacobianDet_d);
-	}
-	else{
-		jacNumber=CalcVoxelNumber(*referenceImage);
-		if(controlPointImage->nz>1)
-			NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)))
-		else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float)));
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeJacobianValues(controlPointImage,
-										  referenceImage,
-										  controlPointImageArray_d,
-										  jacobianMatrices_d,
-										  jacobianDet_d);
-	}
-
-	// Need to desorient the Jacobian matrix using the header information - voxel to real conversion
-	mat33 reorientation;
-	if(controlPointImage->sform_code>0)
-		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk);
-	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk);
-	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
-
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d,
-									  jacNumber*sizeof(float)));
-	if(controlPointImage->nz>1)
-		NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
-										  9*jacNumber*sizeof(float)))
-	else NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
-										   4*jacNumber*sizeof(float)));
-
-	// Bind some variables
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
-	float3 weight=make_float3(
-				referenceImage->dx*jacobianWeight / ((float)jacNumber*controlPointImage->dx),
-				referenceImage->dy*jacobianWeight / ((float)jacNumber*controlPointImage->dy),
-				referenceImage->dz*jacobianWeight / ((float)jacNumber*controlPointImage->dz));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3)));
-	if(approx){
-		if(controlPointImage->nz>1){
-			const unsigned Grid_reg_spline_computeApproxJacGradient3D =
-				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient3D)));
-			dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1);
-			dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D,1,1);
-			reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1);
-		}
-		else{
-			const unsigned Grid_reg_spline_computeApproxJacGradient2D =
-				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient2D)));
-			dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1);
-			dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D,1,1);
-			reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1);
-		}
-	}
-	else{
-		const int voxelNumber = CalcVoxelNumber(*referenceImage);
-		const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-		const float3 controlPointVoxelSpacing = make_float3(
-				controlPointImage->dx / referenceImage->dx,
-				controlPointImage->dy / referenceImage->dy,
-				controlPointImage->dz / referenceImage->dz);
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
-		if(controlPointImage->nz>1){
-			const unsigned Grid_reg_spline_computeJacGradient3D =
-				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient3D)));
-			dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1);
-			dim3 B1(blockSize->reg_spline_computeJacGradient3D,1,1);
-			reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1);
-		}
-		else{
-			const unsigned Grid_reg_spline_computeJacGradient2D =
-				(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient2D)));
-			dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1);
-			dim3 B1(blockSize->reg_spline_computeJacGradient2D,1,1);
-			reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d);
-			NR_CUDA_CHECK_KERNEL(G1,B1);
-		}
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+                                                   nifti_image *controlPointImage,
+                                                   float4 *controlPointImageArray_d,
+                                                   float4 *nodeGradientArray_d,
+                                                   float jacobianWeight,
+                                                   bool approx) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // The Jacobian matrices and determinants are computed
+    float *jacobianMatrices_d;
+    float *jacobianDet_d;
+    int jacNumber;
+    if (approx) {
+        jacNumber = CalcVoxelNumber(*controlPointImage);
+        if (controlPointImage->nz > 1)
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
+        else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    } else {
+        jacNumber = CalcVoxelNumber(*referenceImage);
+        if (controlPointImage->nz > 1)
+            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
+        else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    }
+
+    // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
+    mat33 reorientation;
+    if (controlPointImage->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk);
+    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float)));
+    if (controlPointImage->nz > 1)
+        NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
+    else NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
+
+    // Bind some variables
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
+    float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
+                                referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
+                                referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3, &weight, sizeof(float3)));
+    if (approx) {
+        if (controlPointImage->nz > 1) {
+            const unsigned Grid_reg_spline_computeApproxJacGradient3D =
+                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient3D)));
+            dim3 G1(Grid_reg_spline_computeApproxJacGradient3D, Grid_reg_spline_computeApproxJacGradient3D, 1);
+            dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D, 1, 1);
+            reg_spline_computeApproxJacGradient3D_kernel<<<G1, B1>>>(nodeGradientArray_d);
+            NR_CUDA_CHECK_KERNEL(G1, B1);
+        } else {
+            const unsigned Grid_reg_spline_computeApproxJacGradient2D =
+                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient2D)));
+            dim3 G1(Grid_reg_spline_computeApproxJacGradient2D, Grid_reg_spline_computeApproxJacGradient2D, 1);
+            dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D, 1, 1);
+            reg_spline_computeApproxJacGradient2D_kernel<<<G1, B1>>>(nodeGradientArray_d);
+            NR_CUDA_CHECK_KERNEL(G1, B1);
+        }
+    } else {
+        const int voxelNumber = CalcVoxelNumber(*referenceImage);
+        const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+        const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
+                                                            controlPointImage->dy / referenceImage->dy,
+                                                            controlPointImage->dz / referenceImage->dz);
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
+        if (controlPointImage->nz > 1) {
+            const unsigned Grid_reg_spline_computeJacGradient3D =
+                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient3D)));
+            dim3 G1(Grid_reg_spline_computeJacGradient3D, Grid_reg_spline_computeJacGradient3D, 1);
+            dim3 B1(blockSize->reg_spline_computeJacGradient3D, 1, 1);
+            reg_spline_computeJacGradient3D_kernel<<<G1, B1>>>(nodeGradientArray_d);
+            NR_CUDA_CHECK_KERNEL(G1, B1);
+        } else {
+            const unsigned Grid_reg_spline_computeJacGradient2D =
+                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient2D)));
+            dim3 G1(Grid_reg_spline_computeJacGradient2D, Grid_reg_spline_computeJacGradient2D, 1);
+            dim3 B1(blockSize->reg_spline_computeJacGradient2D, 1, 1);
+            reg_spline_computeJacGradient2D_kernel<<<G1, B1>>>(nodeGradientArray_d);
+            NR_CUDA_CHECK_KERNEL(G1, B1);
+        }
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
 }
 /* *************************************************************** */
 double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
-									  nifti_image *controlPointImage,
-									  float4 *controlPointImageArray_d,
-									  bool approx)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// The Jacobian matrices and determinants are computed
-	float *jacobianMatrices_d;
-	float *jacobianDet_d;
-	int jacNumber;
-	double jacSum;
-	if(approx){
-		jacNumber=CalcVoxelNumber(*controlPointImage);
-		jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2);
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeApproxJacobianValues(controlPointImage,
-												controlPointImageArray_d,
-												jacobianMatrices_d,
-												jacobianDet_d);
-	}
-	else{
-		jacSum=jacNumber=CalcVoxelNumber(*referenceImage);
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float)));
-		NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float)));
-		reg_spline_ComputeJacobianValues(controlPointImage,
-										  referenceImage,
-										  controlPointImageArray_d,
-										  jacobianMatrices_d,
-										  jacobianDet_d);
-	}
-
-	// Check if the Jacobian determinant average
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int)));
-	float *jacobianDet2_d;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice));
-	const unsigned Grid_reg_spline_logSquaredValues =
-		(unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues)));
-	dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1);
-	dim3 B1(blockSize->reg_spline_logSquaredValues,1,1);
-	reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
-	float *jacobianDet_h;
-	NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h,jacobianDet2_d,
-								 jacNumber*sizeof(float),
-								 cudaMemcpyDeviceToHost));
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d));
-	double penaltyTermValue=0.;
-	for(int i=0;i<jacNumber;++i) penaltyTermValue += jacobianDet_h[i];
-	NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h));
-	penaltyTermValue /= jacSum;
-	if(penaltyTermValue==penaltyTermValue){
-		NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-		NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
-		return penaltyTermValue;
-	}
-
-	// Need to disorient the Jacobian matrix using the header information - voxel to real conversion
-	mat33 reorientation;
-	if(controlPointImage->sform_code>0)
-		reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk);
-	else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk);
-	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
-
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d,
-									  jacNumber*sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d,
-									  9*jacNumber*sizeof(float)));
-
-	// Bind some variables
-	const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-	const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3)));
-	if(approx){
-		const unsigned Grid_reg_spline_approxCorrectFolding =
-			(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_approxCorrectFolding3D)));
-		dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1);
-		dim3 B1(blockSize->reg_spline_approxCorrectFolding3D,1,1);
-		reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		const int voxelNumber = CalcVoxelNumber(*referenceImage);
-		const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-		const float3 controlPointVoxelSpacing = make_float3(
-				controlPointImage->dx / referenceImage->dx,
-				controlPointImage->dy / referenceImage->dy,
-				controlPointImage->dz / referenceImage->dz);
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
-		NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3)));
-		const unsigned Grid_reg_spline_correctFolding =
-		(unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_correctFolding3D)));
-		dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1);
-		dim3 B1(blockSize->reg_spline_correctFolding3D,1,1);
-		reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-	NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
-	return std::numeric_limits<double>::quiet_NaN();
+                                     nifti_image *controlPointImage,
+                                     float4 *controlPointImageArray_d,
+                                     bool approx) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // The Jacobian matrices and determinants are computed
+    float *jacobianMatrices_d;
+    float *jacobianDet_d;
+    int jacNumber;
+    double jacSum;
+    if (approx) {
+        jacNumber = CalcVoxelNumber(*controlPointImage);
+        jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2);
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    } else {
+        jacSum = jacNumber = CalcVoxelNumber(*referenceImage);
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+    }
+
+    // Check if the Jacobian determinant average
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int)));
+    float *jacobianDet2_d;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d, jacNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d, jacobianDet_d, jacNumber * sizeof(float), cudaMemcpyDeviceToDevice));
+    const unsigned Grid_reg_spline_logSquaredValues =
+        (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues)));
+    dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1);
+    dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1);
+    reg_spline_logSquaredValues_kernel<<<G1, B1>>>(jacobianDet2_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+    float *jacobianDet_h;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h, jacNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h, jacobianDet2_d, jacNumber * sizeof(float), cudaMemcpyDeviceToHost));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d));
+    double penaltyTermValue = 0;
+    for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet_h[i];
+    NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h));
+    penaltyTermValue /= jacSum;
+    if (penaltyTermValue == penaltyTermValue) {
+        NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+        NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+        return penaltyTermValue;
+    }
+
+    // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
+    mat33 reorientation;
+    if (controlPointImage->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk);
+    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk);
+    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
+
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
+
+    // Bind some variables
+    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
+    if (approx) {
+        const unsigned Grid_reg_spline_approxCorrectFolding =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_approxCorrectFolding3D)));
+        dim3 G1(Grid_reg_spline_approxCorrectFolding, Grid_reg_spline_approxCorrectFolding, 1);
+        dim3 B1(blockSize->reg_spline_approxCorrectFolding3D, 1, 1);
+        reg_spline_approxCorrectFolding3D_kernel<<<G1, B1>>>(controlPointImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const int voxelNumber = CalcVoxelNumber(*referenceImage);
+        const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+        const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
+                                                            controlPointImage->dy / referenceImage->dy,
+                                                            controlPointImage->dz / referenceImage->dz);
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
+        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
+        const unsigned Grid_reg_spline_correctFolding =
+            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_correctFolding3D)));
+        dim3 G1(Grid_reg_spline_correctFolding, Grid_reg_spline_correctFolding, 1);
+        dim3 B1(blockSize->reg_spline_correctFolding3D, 1, 1);
+        reg_spline_correctFolding3D_kernel<<<G1, B1>>>(controlPointImageArray_d);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+    return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// Bind the qform or sform
-	mat44 temp_mat=image->qto_xyz;
-	if(image->sform_code>0) temp_mat=image->sto_xyz;
-	float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
-
-	const int voxelNumber = CalcVoxelNumber(*image);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-
-	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
-
-	const unsigned Grid_reg_getDeformationFromDisplacement =
-	(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDeformationFromDisplacement)));
-	dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1);
-	dim3 B1(blockSize->reg_getDeformationFromDisplacement,1,1);
-	reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // Bind the qform or sform
+    mat44 temp_mat = image->qto_xyz;
+    if (image->sform_code > 0) temp_mat = image->sto_xyz;
+    float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
+
+    const int voxelNumber = CalcVoxelNumber(*image);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+
+    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3)));
+
+    const unsigned Grid_reg_getDeformationFromDisplacement =
+        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDeformationFromDisplacement)));
+    dim3 G1(Grid_reg_getDeformationFromDisplacement, Grid_reg_getDeformationFromDisplacement, 1);
+    dim3 B1(blockSize->reg_getDeformationFromDisplacement, 1, 1);
+    reg_getDeformationFromDisplacement3D_kernel<<<G1, B1>>>(imageArray_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	// Bind the qform or sform
-	mat44 temp_mat=image->qto_xyz;
-	if(image->sform_code>0) temp_mat=image->sto_xyz;
-	float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
-
-	const int voxelNumber = CalcVoxelNumber(*image);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-
-	const int3 imageDim=make_int3(image->nx,image->ny,image->nz);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3)));
-
-	const unsigned Grid_reg_getDisplacementFromDeformation =
-		(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDisplacementFromDeformation)));
-	dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1);
-	dim3 B1(blockSize->reg_getDisplacementFromDeformation,1,1);
-	reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
+void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    // Bind the qform or sform
+    mat44 temp_mat = image->qto_xyz;
+    if (image->sform_code > 0) temp_mat = image->sto_xyz;
+    float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
+
+    const int voxelNumber = CalcVoxelNumber(*image);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+
+    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3)));
+
+    const unsigned Grid_reg_getDisplacementFromDeformation =
+        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDisplacementFromDeformation)));
+    dim3 G1(Grid_reg_getDisplacementFromDeformation, Grid_reg_getDisplacementFromDeformation, 1);
+    dim3 B1(blockSize->reg_getDisplacementFromDeformation, 1, 1);
+    reg_getDisplacementFromDeformation3D_kernel<<<G1, B1>>>(imageArray_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
-												 nifti_image *def_h,
-												 float4 *cpp_gpu,
-												 float4 *def_gpu)
-{
-	const int voxelNumber = CalcVoxelNumber(*def_h);
-
-	// Create a mask array where no voxel are excluded
-	int *mask_gpu=nullptr;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int)));
-	reg_fillMaskArray_gpu(voxelNumber,mask_gpu);
-
-	// Define some variables for the deformation fields
-	float4 *tempDef_gpu=nullptr;
-	NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4)));
-
-	// The deformation field is computed
-	reg_spline_getDeformationField_gpu(cpp_h,
-									   def_h,
-									   cpp_gpu,
-									   def_gpu,
-									   mask_gpu,
-									   voxelNumber,
-									   true); // non-interpolant spline are used
-
-	// The deformation field is converted into a displacement field
-	reg_getDisplacementFromDeformation_gpu(def_h,def_gpu);
-
-	// Scaling of the deformation field
-	float scalingValue = pow(2.0f,fabs(cpp_h->intent_p1));
-	if(cpp_h->intent_p1<0)
-		// backward deformation field is scaled down
-		reg_multiplyValue_gpu(voxelNumber,
-							  def_gpu,
-							  -1.f/scalingValue);
-	else
-		// forward deformation field is scaled down
-		reg_multiplyValue_gpu(voxelNumber,
-							  def_gpu,
-							  1.f/scalingValue);
-
-	// The displacement field is converted back into a deformation field
-	reg_getDeformationFromDisplacement_gpu(def_h,def_gpu);
-
-
-	// The deformation field is squared
-	unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1);
-	for(unsigned i=0;i<squaringNumber;++i){
-
-		// The deformation field arrays are updated
-		NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu,def_gpu,voxelNumber*sizeof(float4),cudaMemcpyDeviceToDevice));
-
-		// The deformation fields are composed
-		reg_defField_compose_gpu(def_h,
-								 tempDef_gpu,
-								 def_gpu,
-								 mask_gpu,
-								 voxelNumber);
-	}
-
-	NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu));
-	NR_CUDA_SAFE_CALL(cudaFree(mask_gpu));
+                                                 nifti_image *def_h,
+                                                 float4 *cpp_gpu,
+                                                 float4 *def_gpu) {
+    const int voxelNumber = CalcVoxelNumber(*def_h);
+
+    // Create a mask array where no voxel are excluded
+    int *mask_gpu = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber * sizeof(int)));
+    reg_fillMaskArray_gpu(voxelNumber, mask_gpu);
+
+    // Define some variables for the deformation fields
+    float4 *tempDef_gpu = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu, voxelNumber * sizeof(float4)));
+
+    // The deformation field is computed
+    reg_spline_getDeformationField_gpu(cpp_h, def_h, cpp_gpu, def_gpu, mask_gpu, voxelNumber, true); // non-interpolant spline are used
+
+    // The deformation field is converted into a displacement field
+    reg_getDisplacementFromDeformation_gpu(def_h, def_gpu);
+
+    // Scaling of the deformation field
+    float scalingValue = pow(2.0f, fabs(cpp_h->intent_p1));
+    if (cpp_h->intent_p1 < 0)
+        // backward deformation field is scaled down
+        reg_multiplyValue_gpu(voxelNumber, def_gpu, -1.f / scalingValue);
+    else
+        // forward deformation field is scaled down
+        reg_multiplyValue_gpu(voxelNumber, def_gpu, 1.f / scalingValue);
+
+    // The displacement field is converted back into a deformation field
+    reg_getDeformationFromDisplacement_gpu(def_h, def_gpu);
+
+    // The deformation field is squared
+    unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1);
+    for (unsigned i = 0; i < squaringNumber; ++i) {
+        // The deformation field arrays are updated
+        NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu, def_gpu, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
+
+        // The deformation fields are composed
+        reg_defField_compose_gpu(def_h, tempDef_gpu, def_gpu, mask_gpu, voxelNumber);
+    }
+
+    NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu));
+    NR_CUDA_SAFE_CALL(cudaFree(mask_gpu));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_defField_compose_gpu(nifti_image *def,
-							  float4 *def_gpu,
-							  float4 *defOut_gpu,
-							  int *mask_gpu,
-							  int activeVoxel)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	const int voxelNumber = CalcVoxelNumber(*def);
-
-	// Bind the qform or sform
-	mat44 temp_mat=def->qto_ijk;
-	if(def->sform_code>0) temp_mat=def->sto_ijk;
-	float4 temp;
-	temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4)));
-
-	temp_mat=def->qto_xyz;
-	if(def->sform_code>0) temp_mat=def->sto_xyz;
-	temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4)));
-	temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4)));
-
-	const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz);
-
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int)));
-
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3)));
-
-	if(def->nz>1){
-		const unsigned Grid_reg_defField_compose3D =
-			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose3D)));
-		dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1);
-		dim3 B1(blockSize->reg_defField_compose3D,1,1);
-		reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-	else{
-		const unsigned Grid_reg_defField_compose2D =
-			(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose2D)));
-		dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1);
-		dim3 B1(blockSize->reg_defField_compose2D,1,1);
-		reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu);
-		NR_CUDA_CHECK_KERNEL(G1,B1);
-	}
-
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+                              float4 *def_gpu,
+                              float4 *defOut_gpu,
+                              int *mask_gpu,
+                              int activeVoxel) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    const int voxelNumber = CalcVoxelNumber(*def);
+
+    // Bind the qform or sform
+    mat44 temp_mat = def->qto_ijk;
+    if (def->sform_code > 0) temp_mat = def->sto_ijk;
+    float4 temp;
+    temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
+
+    temp_mat = def->qto_xyz;
+    if (def->sform_code > 0) temp_mat = def->sto_xyz;
+    temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c, &temp, sizeof(float4)));
+    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c, &temp, sizeof(float4)));
+
+    const int3 referenceImageDim = make_int3(def->nx, def->ny, def->nz);
+
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, def_gpu, activeVoxel * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_gpu, activeVoxel * sizeof(int)));
+
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
+
+    if (def->nz > 1) {
+        const unsigned Grid_reg_defField_compose3D =
+            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose3D)));
+        dim3 G1(Grid_reg_defField_compose3D, Grid_reg_defField_compose3D, 1);
+        dim3 B1(blockSize->reg_defField_compose3D, 1, 1);
+        reg_defField_compose3D_kernel<<<G1, B1>>>(defOut_gpu);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    } else {
+        const unsigned Grid_reg_defField_compose2D =
+            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose2D)));
+        dim3 G1(Grid_reg_defField_compose2D, Grid_reg_defField_compose2D, 1);
+        dim3 B1(blockSize->reg_defField_compose2D, 1, 1);
+        reg_defField_compose2D_kernel<<<G1, B1>>>(defOut_gpu);
+        NR_CUDA_CHECK_KERNEL(G1, B1);
+    }
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
-										float4 **deformationField_gpu,
-										float **jacobianMatrices_gpu)
-{
-	auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-	const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz);
-	const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz);
-	const int voxelNumber = referenceDim.x*referenceDim.y*referenceDim.z;
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3)));
-
-	mat33 reorientation;
-	if(deformationField->sform_code>0)
-		reorientation=reg_mat44_to_mat33(&deformationField->sto_xyz);
-	else reorientation=reg_mat44_to_mat33(&deformationField->qto_xyz);
-	float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3)));
-	temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3)));
-
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4)));
-
-	const unsigned Grid_reg_defField_getJacobianMatrix =
-		(unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_getJacobianMatrix)));
-	dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1);
-	dim3 B1(blockSize->reg_defField_getJacobianMatrix);
-	reg_defField_getJacobianMatrix3D_kernel<<<G1,B1>>>(*jacobianMatrices_gpu);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
-
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
+                                        float4 **deformationField_gpu,
+                                        float **jacobianMatrices_gpu) {
+    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+
+    const int3 referenceDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
+    const float3 referenceSpacing = make_float3(deformationField->dx, deformationField->dy, deformationField->dz);
+    const int voxelNumber = referenceDim.x * referenceDim.y * referenceDim.z;
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing, &referenceSpacing, sizeof(float3)));
+
+    mat33 reorientation;
+    if (deformationField->sform_code > 0)
+        reorientation = reg_mat44_to_mat33(&deformationField->sto_xyz);
+    else reorientation = reg_mat44_to_mat33(&deformationField->qto_xyz);
+    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
+    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
+
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, *deformationField_gpu, voxelNumber * sizeof(float4)));
+
+    const unsigned Grid_reg_defField_getJacobianMatrix =
+        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_getJacobianMatrix)));
+    dim3 G1(Grid_reg_defField_getJacobianMatrix, Grid_reg_defField_getJacobianMatrix, 1);
+    dim3 B1(blockSize->reg_defField_getJacobianMatrix);
+    reg_defField_getJacobianMatrix3D_kernel << <G1, B1>>>(*jacobianMatrices_gpu);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
 }
 /* *************************************************************** */
-/* *************************************************************** */

From c7971ae72d8e7fb6de098d1c80e440b704f7ba40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 16 Jun 2023 15:31:03 +0100
Subject: [PATCH 137/314] Fix GPU version of GetDeformationField() to make on a
 par with CPU version

---
 niftyreg_build_version.txt                    |   2 +-
 .../cuda/_reg_localTransformation_kernels.cu  | 158 +++++++++---------
 2 files changed, 77 insertions(+), 83 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9183bf03..a700e799 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-256
+257
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 9c83e29f..54e8fb30 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -84,17 +84,17 @@ __device__ float4 operator-(float4 a, float4 b) {
     return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f);
 }
 /* *************************************************************** */
-__device__ void GetBasisBSplineValues(float basis, float *values) {
-    float ff = basis * basis;
-    float fff = ff * basis;
-    float mf = 1.f - basis;
-    values[0] = (mf) * (mf) * (mf) / (6.f);
-    values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f;
-    values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f;
-    values[3] = (fff / 6.f);
+__device__ void GetBasisBSplineValues(const double basis, float *values) {
+    const double ff = basis * basis;
+    const double fff = basis * basis * basis;
+    const double mf = 1.0 - basis;
+    values[0] = static_cast<float>(mf * mf * mf / 6.0);
+    values[1] = static_cast<float>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
+    values[2] = static_cast<float>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
+    values[3] = static_cast<float>(fff / 6.0);
 }
 /* *************************************************************** */
-__device__ void GetFirstBSplineValues(float basis, float *values, float *first) {
+__device__ void GetFirstBSplineValues(const float& basis, float *values, float *first) {
     GetBasisBSplineValues(basis, values);
     first[3] = basis * basis / 2.f;
     first[0] = basis - 0.5f - first[3];
@@ -102,23 +102,23 @@ __device__ void GetFirstBSplineValues(float basis, float *values, float *first)
     first[1] = -first[0] - first[2] - first[3];
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValues(float basis, float *values) {
-    float FF = basis * basis;
+__device__ void GetBasisSplineValues(const float& basis, float *values) {
+    const float ff = basis * basis;
     values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
-    values[1] = (FF * (3.f * basis - 5.f) + 2.f) / 2.f;
+    values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
     values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
-    values[3] = (basis - 1.f) * FF / 2.f;
+    values[3] = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValuesX(float basis, float4 *values) {
-    float FF = basis * basis;
+__device__ void GetBasisSplineValuesX(const float& basis, float4 *values) {
+    const float ff = basis * basis;
     values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
-    values->y = (FF * (3.f * basis - 5.f) + 2.f) / 2.f;
+    values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
     values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
-    values->w = (basis - 1.f) * FF / 2.f;
+    values->w = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBSplineBasisValue(float basis, int index, float *value, float *first) {
+__device__ void GetBSplineBasisValue(const float& basis, const int& index, float *value, float *first) {
     switch (index) {
     case 0:
         *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f;
@@ -143,7 +143,7 @@ __device__ void GetBSplineBasisValue(float basis, int index, float *value, float
     }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float *yBasis) {
+__device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis, float *yBasis) {
     switch (index) {
     case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break;
     case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break;
@@ -157,7 +157,7 @@ __device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float
     }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float *yBasis, float *zBasis) {
+__device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis, float *yBasis, float *zBasis) {
     switch (index) {
     case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break;
     case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break;
@@ -189,7 +189,7 @@ __device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float
     }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, float *yyBasis, float *xyBasis) {
+__device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasis, float *yyBasis, float *xyBasis) {
     switch (index) {
     case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break;
     case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break;
@@ -203,7 +203,7 @@ __device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, floa
     }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues3D(int index,
+__device__ void GetSecondDerivativeBasisValues3D(const int& index,
                                                  float *xxBasis,
                                                  float *yyBasis,
                                                  float *zzBasis,
@@ -377,11 +377,6 @@ __device__ float4 GetSlidedValues(int x, int y, int z) {
 __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < c_ActiveVoxelNumber) {
-        // Allocate the shared memory
-        extern __shared__ float yBasis[];
-        // Compute the shared memory offset which corresponds to four times the number of thread per block
-        float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
-
         const int3 imageSize = c_ReferenceImageDim;
 
         int tempIndex = tex1Dfetch(maskTexture, tid);
@@ -390,36 +385,39 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
         const int y = tempIndex / imageSize.x;
         const int x = tempIndex - y * imageSize.x;
 
-        // the "nearest previous" node is determined [0,0,0]
+        // The "nearest previous" node is determined [0,0,0]
         const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
         const int3 nodeAnte = {
-            (int)floorf((float)x / gridVoxelSpacing.x),
-            (int)floorf((float)y / gridVoxelSpacing.y),
-            (int)floorf((float)z / gridVoxelSpacing.z)
+            int((float)x / gridVoxelSpacing.x),
+            int((float)y / gridVoxelSpacing.y),
+            int((float)z / gridVoxelSpacing.z)
         };
 
-        const unsigned shareMemIndex = 4 * threadIdx.x;
-
         // Z basis values
-        float relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z);
-        relative = relative > 0 ? relative : 0.f;
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]);
-        else GetBasisSplineValues(relative, &zBasis[shareMemIndex]);
+        extern __shared__ float yBasis[];   // Shared memory
+        const unsigned sharedMemIndex = 4 * threadIdx.x;
+        // Compute the shared memory offset which corresponds to four times the number of thread per block
+        float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
+        float relative = (float)z / gridVoxelSpacing.z - (float)nodeAnte.z;
+        if (relative < 0) relative = 0; // rounding error
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]);
+        else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]);
+
         // Y basis values
-        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
-        relative = relative > 0 ? relative : 0.f;
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
-        else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
+        relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y;
+        if (relative < 0) relative = 0; // rounding error
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
+        else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
+
         // X basis values
         float xBasis[4];
-        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
-        relative = relative > 0 ? relative : 0.f;
+        relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x;
+        if (relative < 0) relative = 0; // rounding error
         if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
         else GetBasisSplineValues(relative, xBasis);
 
         const int3 controlPointImageDim = c_ControlPointImageDim;
         float4 displacement{};
-        float basis;
 
         for (int c = 0; c < 4; c++) {
             float3 tempDisplacement{};
@@ -431,29 +429,29 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
                 const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++);
                 const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ);
 
-                basis = yBasis[shareMemIndex + b];
-                tempDisplacement.x += (nodeCoefficientA.x * xBasis[0] +
-                                       nodeCoefficientB.x * xBasis[1] +
-                                       nodeCoefficientC.x * xBasis[2] +
-                                       nodeCoefficientD.x * xBasis[3]) * basis;
+                const float& basis = yBasis[sharedMemIndex + b];
+                tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
+                                               nodeCoefficientB.x * xBasis[1] +
+                                               nodeCoefficientC.x * xBasis[2] +
+                                               nodeCoefficientD.x * xBasis[3]);
 
-                tempDisplacement.y += (nodeCoefficientA.y * xBasis[0] +
-                                       nodeCoefficientB.y * xBasis[1] +
-                                       nodeCoefficientC.y * xBasis[2] +
-                                       nodeCoefficientD.y * xBasis[3]) * basis;
+                tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
+                                               nodeCoefficientB.y * xBasis[1] +
+                                               nodeCoefficientC.y * xBasis[2] +
+                                               nodeCoefficientD.y * xBasis[3]);
 
-                tempDisplacement.z += (nodeCoefficientA.z * xBasis[0] +
-                                       nodeCoefficientB.z * xBasis[1] +
-                                       nodeCoefficientC.z * xBasis[2] +
-                                       nodeCoefficientD.z * xBasis[3]) * basis;
+                tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] +
+                                               nodeCoefficientB.z * xBasis[1] +
+                                               nodeCoefficientC.z * xBasis[2] +
+                                               nodeCoefficientD.z * xBasis[3]);
 
                 indexYZ += controlPointImageDim.x;
             }
 
-            basis = zBasis[shareMemIndex + c];
-            displacement.x += tempDisplacement.x * basis;
-            displacement.y += tempDisplacement.y * basis;
-            displacement.z += tempDisplacement.z * basis;
+            const float& basis = zBasis[sharedMemIndex + c];
+            displacement.x += basis * tempDisplacement.x;
+            displacement.y += basis * tempDisplacement.y;
+            displacement.z += basis * tempDisplacement.z;
         }
 
         positionField[tid] = displacement;
@@ -463,37 +461,33 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
 __global__ void reg_spline_getDeformationField2D(float4 *positionField) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < c_ActiveVoxelNumber) {
-        // Allocate the shared memory
-        extern __shared__ float yBasis[];
-
         const int3 imageSize = c_ReferenceImageDim;
 
         const int tempIndex = tex1Dfetch(maskTexture, tid);
         const int y = tempIndex / imageSize.x;
         const int x = tempIndex - y * imageSize.x;
 
-        // the "nearest previous" node is determined [0,0,0]
+        // The "nearest previous" node is determined [0,0,0]
         const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y };
-        const int2 nodeAnte = {
-            (int)floorf((float)x / gridVoxelSpacing.x),
-            (int)floorf((float)y / gridVoxelSpacing.y)
-        };
-
-        const unsigned shareMemIndex = 4 * threadIdx.x;
+        const int2 nodeAnte = { int((float)x / gridVoxelSpacing.x), int((float)y / gridVoxelSpacing.y) };
 
         // Y basis values
-        float relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]);
-        else GetBasisSplineValues(relative, &yBasis[shareMemIndex]);
+        extern __shared__ float yBasis[];   // Shared memory
+        const unsigned sharedMemIndex = 4 * threadIdx.x;
+        float relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y;
+        if (relative < 0) relative = 0; // rounding error
+        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
+        else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
+
         // X basis values
         float xBasis[4];
-        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x;
+        if (relative < 0) relative = 0; // rounding error
         if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
         else GetBasisSplineValues(relative, xBasis);
 
         const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y };
         float4 displacement{};
-        float basis;
 
         for (int b = 0; b < 4; b++) {
             int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
@@ -503,7 +497,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *positionField) {
             const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++);
             const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index);
 
-            basis = yBasis[shareMemIndex + b];
+            const float& basis = yBasis[sharedMemIndex + b];
             displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
                                        nodeCoefficientB.x * xBasis[1] +
                                        nodeCoefficientC.x * xBasis[2] +
@@ -1002,16 +996,16 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, f
 
         float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative;
 
-        const unsigned shareMemIndex = 4 * threadIdx.x;
+        const unsigned sharedMemIndex = 4 * threadIdx.x;
 
         relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
         GetFirstBSplineValues(relative, xBasis, xFirst);
 
         relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
-        GetFirstBSplineValues(relative, yBasis, &yFirst[shareMemIndex]);
+        GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]);
 
         relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z);
-        GetFirstBSplineValues(relative, zBasis, &zFirst[shareMemIndex]);
+        GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]);
 
         const int3 controlPointImageDim = c_ControlPointImageDim;
         float3 Tx{};
@@ -1022,8 +1016,8 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, f
             for (int b = 0; b < 4; ++b) {
                 int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
                 float3 tempBasisXY = make_float3(yBasis[b] * zBasis[c],
-                                                 yFirst[shareMemIndex + b] * zBasis[c],
-                                                 yBasis[b] * zFirst[shareMemIndex + c]);
+                                                 yFirst[sharedMemIndex + b] * zBasis[c],
+                                                 yBasis[b] * zFirst[sharedMemIndex + c]);
 
                 float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
                 float3 tempBasis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * tempBasisXY;

From e15d3283b9cfa4d834839d1eeb2a313ac36af803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 16 Jun 2023 15:37:46 +0100
Subject: [PATCH 138/314] Add tests for *Compute::GetDeformationField()

---
 niftyreg_build_version.txt                |   2 +-
 reg-test/CMakeLists.txt                   |   1 +
 reg-test/reg_test_getDeformationField.cpp | 474 ++++++++++++++++++++++
 3 files changed, 476 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_getDeformationField.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a700e799..8c9cf7e2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-257
+258
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index d2bab9af..ed1d77b3 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -109,6 +109,7 @@ include(Catch)
 #-----------------------------------------------------------------------------
 set(EXEC_LIST reg_test_affineDeformationField)
 set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
+set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
new file mode 100644
index 00000000..c1ceb951
--- /dev/null
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -0,0 +1,474 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+#include <iomanip>
+
+/*
+    This test file contains the following unit tests:
+    test functions: creation of a deformation field from a control point grid
+    In 2D and 3D
+    Cubic spline
+*/
+
+
+class GetDeformationFieldTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    GetDeformationFieldTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a 2D reference image
+        vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Create a 3D reference image
+        dimFlo.push_back(4);
+        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Generate the different test cases
+        // Test 2D
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        auto cpp2dPtr = controlPointGrid2d.data();
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i)
+            cpp2dPtr[i] = distr(gen);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d)
+        ));
+
+        // Test 3D
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        auto cpp3dPtr = controlPointGrid3d.data();
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i)
+            cpp3dPtr[i] = distr(gen);
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d)
+        ));
+
+        // Add platforms, composition, and bspline to the test data
+        for (auto&& testData : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                for (int composition = 0; composition < 2; composition++) {
+                    if (platformType == PlatformType::Cuda && composition)
+                        continue;   // CUDA platform does not support composition
+                    for (int bspline = 0; bspline < 2; bspline++) {
+                        // Make a copy of the test data
+                        auto td = testData;
+                        auto&& [testName, reference, controlPointGrid] = td;
+                        // Add content
+                        unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                        testCases.push_back({ platform, std::move(content), std::move(td), composition, bspline });
+                    }
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) {
+        const DataType ff = basis * basis;
+        const DataType fff = ff * basis;
+        const DataType mf = static_cast<DataType>(1.0 - basis);
+        values[0] = static_cast<DataType>(mf * mf * mf / 6.0);
+        values[1] = static_cast<DataType>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
+        values[2] = static_cast<DataType>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
+        values[3] = static_cast<DataType>(fff / 6.0);
+    }
+
+    template<class DataType>
+    void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) {
+        const DataType ff = basis * basis;
+        values[0] = static_cast<DataType>((basis * ((2.0 - basis) * basis - 1.0)) / 2.0);
+        values[1] = static_cast<DataType>((ff * (3.0 * basis - 5.0) + 2.0) / 2.0);
+        values[2] = static_cast<DataType>((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0);
+        values[3] = static_cast<DataType>((basis - 1.0) * ff / 2.0);
+    }
+
+    void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) {
+        const auto cppPtr = controlPointGrid.data();
+        const auto cppPtrX = cppPtr.begin();
+        const auto cppPtrY = cppPtrX + NiftiImage::calcVoxelNumber(controlPointGrid, 2);
+        size_t coord = 0;
+        for (int y = yPre; y < yPre + 4; y++) {
+            const bool in = -1 < y && y < controlPointGrid->ny;
+            const size_t index = y * controlPointGrid->nx;
+            for (int x = xPre; x < xPre + 4; x++) {
+                if (in && -1 < x && x < controlPointGrid->nx) {
+                    xControlPointCoordinates[coord] = cppPtrX[index + x];
+                    yControlPointCoordinates[coord] = cppPtrY[index + x];
+                } else {
+                    xControlPointCoordinates[coord] = 0;
+                    yControlPointCoordinates[coord] = 0;
+                }
+                coord++;
+            }
+        }
+    }
+
+    void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) {
+        const size_t cppVoxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+        const auto cppPtr = controlPointGrid.data();
+        const auto cppPtrX = cppPtr.begin();
+        const auto cppPtrY = cppPtrX + cppVoxelNumber;
+        const auto cppPtrZ = cppPtrY + cppVoxelNumber;
+        size_t coord = 0, yIndex, zIndex;
+        for (int z = zPre; z < zPre + 4; z++) {
+            bool in = true;
+            if (-1 < z && z < controlPointGrid->nz)
+                zIndex = z * controlPointGrid->nx * controlPointGrid->ny;
+            else in = false;
+            for (int y = yPre; y < yPre + 4; y++) {
+                if (in && -1 < y && y < controlPointGrid->ny)
+                    yIndex = y * controlPointGrid->nx;
+                else in = false;
+                for (int x = xPre; x < xPre + 4; x++) {
+                    if (in && -1 < x && x < controlPointGrid->nx) {
+                        xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x];
+                        yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x];
+                        zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x];
+                    } else {
+                        xControlPointCoordinates[coord] = 0;
+                        yControlPointCoordinates[coord] = 0;
+                        zControlPointCoordinates[coord] = 0;
+                    }
+                    coord++;
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
+        if (controlPointGrid->nz > 1)
+            GetDeformationField3D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+        else
+            GetDeformationField2D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+    }
+
+    template<class DataType>
+    void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
+        auto defFieldPtr = defField.data();
+        auto defFieldPtrX = defFieldPtr.begin();
+        auto defFieldPtrY = defFieldPtrX + NiftiImage::calcVoxelNumber(defField, 3);
+
+        const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy };
+        DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16];
+        int oldXPre = -1, oldYPre = -1;
+
+        if (composition) {  // Composition of deformation fields
+            // Read the ijk sform or qform, as appropriate
+            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
+
+            for (int y = 0; y < defField->ny; y++) {
+                size_t index = y * defField->nx;
+                for (int x = 0; x < defField->nx; x++) {
+                    // The previous position at the current pixel position is read
+                    DataType xReal = defFieldPtrX[index];
+                    DataType yReal = defFieldPtrY[index];
+
+                    // From real to pixel position in the CPP
+                    const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3];
+                    const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3];
+
+                    // The spline coefficients are computed
+                    int xPre = reg_floor(xVoxel);
+                    DataType basis = xVoxel - (DataType)xPre--;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                    else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                    int yPre = reg_floor(yVoxel);
+                    basis = yVoxel - (DataType)yPre--;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                    else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                    if (xVoxel >= 0 && xVoxel <= defField->nx - 1 &&
+                        yVoxel >= 0 && yVoxel <= defField->ny - 1) {
+                        // The control point positions are extracted
+                        if (oldXPre != xPre || oldYPre != yPre) {
+                            GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
+                            oldXPre = xPre;
+                            oldYPre = yPre;
+                        }
+
+                        xReal = 0; yReal = 0;
+                        if (mask[index] > -1) {
+                            for (int b = 0; b < 4; b++) {
+                                for (int a = 0; a < 4; a++) {
+                                    const DataType xyBasis = xBasis[a] * yBasis[b];
+                                    xReal += xControlPointCoordinates[b * 4 + a] * xyBasis;
+                                    yReal += yControlPointCoordinates[b * 4 + a] * xyBasis;
+                                }
+                            }
+                        }
+
+                        defFieldPtrX[index] = xReal;
+                        defFieldPtrY[index] = yReal;
+                    }
+                    index++;
+                }
+            }
+        } else {    // If the deformation field is blank - !composition
+            for (int y = 0; y < defField->ny; y++) {
+                size_t index = y * defField->nx;
+
+                int yPre = (int)((DataType)y / gridVoxelSpacing[1]);
+                DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
+                if (basis < 0) basis = 0; // rounding error
+                if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                for (int x = 0; x < defField->nx; x++) {
+                    int xPre = (int)((DataType)x / gridVoxelSpacing[0]);
+                    basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                    else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                    size_t coord = 0;
+                    for (int a = 0; a < 4; a++) {
+                        xyBasis[coord++] = xBasis[0] * yBasis[a];
+                        xyBasis[coord++] = xBasis[1] * yBasis[a];
+                        xyBasis[coord++] = xBasis[2] * yBasis[a];
+                        xyBasis[coord++] = xBasis[3] * yBasis[a];
+                    }
+
+                    if (oldXPre != xPre || oldYPre != yPre) {
+                        GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
+                        oldXPre = xPre;
+                        oldYPre = yPre;
+                    }
+
+                    DataType xReal = 0, yReal = 0;
+                    if (mask[index] > -1) {
+                        for (int a = 0; a < 16; a++) {
+                            xReal += xControlPointCoordinates[a] * xyBasis[a];
+                            yReal += yControlPointCoordinates[a] * xyBasis[a];
+                        }
+                    }
+                    defFieldPtrX[index] = xReal;
+                    defFieldPtrY[index] = yReal;
+                    index++;
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
+        DataType xBasis[4], yBasis[4], zBasis[4];
+        DataType xControlPointCoordinates[64];
+        DataType yControlPointCoordinates[64];
+        DataType zControlPointCoordinates[64];
+
+        const size_t defFieldVoxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
+        auto defFieldPtr = defField.data();
+        auto defFieldPtrX = defFieldPtr.begin();
+        auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber;
+        auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber;
+
+        if (composition) {  // Composition of deformation fields
+            // Read the ijk sform or qform, as appropriate
+            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
+            for (int z = 0; z < defField->nz; z++) {
+                size_t index = z * defField->nx * defField->ny;
+                int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99;
+                for (int y = 0; y < defField->ny; y++) {
+                    for (int x = 0; x < defField->nx; x++) {
+                        if (mask[index] > -1) {
+                            // The previous position at the current pixel position is read
+                            DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] };
+
+                            // From real to pixel position in the control point space
+                            DataType voxel[3];
+                            voxel[0] =
+                                realToVoxel->m[0][0] * real[0] +
+                                realToVoxel->m[0][1] * real[1] +
+                                realToVoxel->m[0][2] * real[2] +
+                                realToVoxel->m[0][3];
+                            voxel[1] =
+                                realToVoxel->m[1][0] * real[0] +
+                                realToVoxel->m[1][1] * real[1] +
+                                realToVoxel->m[1][2] * real[2] +
+                                realToVoxel->m[1][3];
+                            voxel[2] =
+                                realToVoxel->m[2][0] * real[0] +
+                                realToVoxel->m[2][1] * real[1] +
+                                realToVoxel->m[2][2] * real[2] +
+                                realToVoxel->m[2][3];
+
+                            // The spline coefficients are computed
+                            int xPre = reg_floor(voxel[0]);
+                            DataType basis = voxel[0] - (DataType)xPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                            else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                            int yPre = reg_floor(voxel[1]);
+                            basis = voxel[1] - (DataType)yPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                            else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                            int zPre = reg_floor(voxel[2]);
+                            basis = voxel[2] - (DataType)zPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
+                            else GetSplineBasisValues<DataType>(basis, zBasis);
+
+                            // The control point positions are extracted
+                            if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) {
+                                GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
+                                oldPreX = xPre;
+                                oldPreY = yPre;
+                                oldPreZ = zPre;
+                            }
+
+                            real[0] = real[1] = real[2] = 0;
+                            int coord = 0;
+                            for (int c = 0; c < 4; c++) {
+                                for (int b = 0; b < 4; b++) {
+                                    for (int a = 0; a < 4; a++) {
+                                        DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                                        real[0] += xControlPointCoordinates[coord] * tempValue;
+                                        real[1] += yControlPointCoordinates[coord] * tempValue;
+                                        real[2] += zControlPointCoordinates[coord] * tempValue;
+                                        coord++;
+                                    }
+                                }
+                            }
+                            defFieldPtrX[index] = real[0];
+                            defFieldPtrY[index] = real[1];
+                            defFieldPtrZ[index] = real[2];
+                        }
+                        index++;
+                    }
+                }
+            }
+        } else {    // If the deformation field is blank - !composition
+            const DataType gridVoxelSpacing[3] = {
+                controlPointGrid->dx / defField->dx,
+                controlPointGrid->dy / defField->dy,
+                controlPointGrid->dz / defField->dz
+            };
+
+            for (int z = 0; z < defField->nz; z++) {
+                size_t index = z * defField->nx * defField->ny;
+                DataType oldBasis = DataType(1.1);
+
+                int zPre = int(DataType(z) / gridVoxelSpacing[2]);
+                DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre;
+                if (basis < 0) basis = 0; // rounding error
+                if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
+                else GetSplineBasisValues<DataType>(basis, zBasis);
+
+                for (int y = 0; y < defField->ny; y++) {
+                    int yPre = int(DataType(y) / gridVoxelSpacing[1]);
+                    basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                    else GetSplineBasisValues<DataType>(basis, yBasis);
+                    int coord = 0;
+                    DataType yzBasis[16];
+                    for (int a = 0; a < 4; a++) {
+                        yzBasis[coord++] = yBasis[0] * zBasis[a];
+                        yzBasis[coord++] = yBasis[1] * zBasis[a];
+                        yzBasis[coord++] = yBasis[2] * zBasis[a];
+                        yzBasis[coord++] = yBasis[3] * zBasis[a];
+                    }
+
+                    for (int x = 0; x < defField->nx; x++) {
+                        int xPre = int(DataType(x) / gridVoxelSpacing[0]);
+                        basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
+                        if (basis < 0) basis = 0; // rounding error
+                        if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                        else GetSplineBasisValues<DataType>(basis, xBasis);
+                        coord = 0;
+                        DataType xyzBasis[64];
+                        for (int a = 0; a < 16; a++) {
+                            xyzBasis[coord++] = xBasis[0] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[1] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[2] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[3] * yzBasis[a];
+                        }
+                        if (basis <= oldBasis || x == 0)
+                            GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
+                        oldBasis = basis;
+
+                        DataType real[3]{};
+                        if (mask[index] > -1) {
+                            for (int a = 0; a < 64; a++) {
+                                real[0] += xControlPointCoordinates[a] * xyzBasis[a];
+                                real[1] += yControlPointCoordinates[a] * xyzBasis[a];
+                                real[2] += zControlPointCoordinates[a] * xyzBasis[a];
+                            }
+                        }// mask
+                        defFieldPtrX[index] = real[0];
+                        defFieldPtrY[index] = real[1];
+                        defFieldPtrZ[index] = real[2];
+                        index++;
+                    } // x
+                } // y
+            } // z
+        } // composition
+    }
+};
+
+TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformationField]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [platform, content, testData, composition, bspline] = testCase;
+        auto&& [testName, reference, controlPointGrid] = testData;
+        const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline);
+
+        SECTION(sectionName) {
+            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+            // Compute the deformation field
+            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+            compute->GetDeformationField(composition, bspline);
+            NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::ImageInfoAndAllocData);
+            GetDeformationField<float>(controlPointGrid, defFieldExp, content->GetReferenceMask(), composition, bspline);
+
+            // Check the results
+            NiftiImage defField = content->GetDeformationField();
+            const auto defFieldPtr = defField.data();
+            const auto defFieldExpPtr = defFieldExp.data();
+            defField.disown();
+            // Increase the precision for the output
+            std::cout << std::fixed << std::setprecision(10);
+            for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) {
+                const double defFieldVal = defFieldPtr[i];
+                const double defFieldExpVal = defFieldExpPtr[i];
+                std::cout << i << " " << defFieldVal << " " << defFieldExpVal << std::endl;
+                REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS);
+            }
+            // Ensure the termination of content before CudaContext
+            content.reset();
+        }
+    }
+}

From 467027dbf296d9b43d302e66030768f81592466f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 16 Jun 2023 16:27:31 +0100
Subject: [PATCH 139/314] Refactorisations

---
 niftyreg_build_version.txt              |   2 +-
 reg-lib/cuda/CMakeLists.txt             |   1 +
 reg-lib/cuda/_reg_nmi_gpu.cu            |   2 +-
 reg-lib/cuda/_reg_resampling_kernels.cu |   2 +-
 reg-lib/cuda/_reg_tools_gpu.cu          | 400 +++++++++++------------
 reg-lib/cuda/_reg_tools_kernels.cu      | 409 +++++++++++-------------
 reg-test/reg_test_conjugateGradient.cpp |   4 +-
 reg-test/reg_test_imageGradient.cpp     |   3 +-
 reg-test/reg_test_normaliseGradient.cpp |   4 +-
 9 files changed, 390 insertions(+), 437 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8c9cf7e2..98ecf581 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-258
+259
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index efef0521..d56a72f6 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -44,6 +44,7 @@ else(NOT COMPILE_RESULT_VAR)
     endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
     if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -use_fast_math")
+        message(STATUS "CUDA fast math enabled")
     endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
 endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 207a0fd0..b907d8bd 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -219,7 +219,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
                cudaMemcpyHostToDevice);
     free(temp);
 
-    // THe gradient of the NMI is computed on the GPU
+    // The gradient of the NMI is computed on the GPU
     reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer,
                                      this->referenceDevicePointer,
                                      this->warpedFloatingDevicePointer,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 7f0bf7a7..07506c8d 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -15,7 +15,7 @@ __inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2])
     if (relative < 0)
         relative = 0;  // reg_rounding error
     basis[1] = relative;
-    basis[0] = 1.0 - relative;
+    basis[0] = 1.f - relative;
 }
 /* *************************************************************** */
 __global__ void reg_resampleImage2D_kernel(float *resultArray,
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index d2e1b7ad..3c40f899 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -14,327 +14,307 @@
 #include "_reg_tools_gpu.h"
 #include "_reg_tools_kernels.cu"
 
-
-/* *************************************************************** */
 /* *************************************************************** */
 void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
                                       nifti_image *controlPointImage,
                                       float4 *voxelNMIGradientArray_d,
                                       float4 *nodeNMIGradientArray_d,
-                                      float weight)
-{
+                                      float weight) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int nodeNumber = CalcVoxelNumber(*controlPointImage);
     const int voxelNumber = CalcVoxelNumber(*targetImage);
     const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
     const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-	float3 voxelNodeRatio_h = make_float3(
-            controlPointImage->dx / targetImage->dx,
-            controlPointImage->dy / targetImage->dy,
-            controlPointImage->dz / targetImage->dz);
-	// Ensure that Z=0 if 2D images
-	if(gridSize.z==1) voxelNodeRatio_h.z=0;
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float)));
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4)));
-
-    const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_voxelCentric2NodeCentric));
-    dim3 B1(blockSize->reg_voxelCentric2NodeCentric,1,1);
-	dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1);
-    reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
-	NR_CUDA_CHECK_KERNEL(G1,B1);
-
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
+    float3 voxelNodeRatio_h = make_float3(controlPointImage->dx / targetImage->dx,
+                                          controlPointImage->dy / targetImage->dy,
+                                          controlPointImage->dz / targetImage->dz);
+    // Ensure that Z=0 if 2D images
+    if (gridSize.z == 1) voxelNodeRatio_h.z = 0;
+
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim, &targetImageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &gridSize, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio, &voxelNodeRatio_h, sizeof(float3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &weight, sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber * sizeof(float4)));
+
+    const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_voxelCentric2NodeCentric));
+    dim3 B1(blockSize->reg_voxelCentric2NodeCentric, 1, 1);
+    dim3 G1(Grid_reg_voxelCentric2NodeCentric, Grid_reg_voxelCentric2NodeCentric, 1);
+    reg_voxelCentric2NodeCentric_kernel<<<G1, B1>>>(nodeNMIGradientArray_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
+
+    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
                                                     nifti_image *controlPointImage,
-                                                    float4 *nodeNMIGradientArray_d)
-{
+                                                    float4 *nodeNMIGradientArray_d) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int nodeNumber = CalcVoxelNumber(*controlPointImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
 
-    float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4)));
+    float4 *matrix_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3 * sizeof(float4)));
     matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]);
     matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]);
     matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]);
     float4 *matrix_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3 * sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
     NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4)));
+    NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3 * sizeof(float4)));
 
     const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace =
-        (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace));
-    dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1);
-    dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace,1,1);
+        (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace));
+    dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace, Grid_reg_convertNMIGradientFromVoxelToRealSpace, 1);
+    dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace, 1, 1);
+    _reg_convertNMIGradientFromVoxelToRealSpace_kernel<<<G1, B1>>>(nodeNMIGradientArray_d);
+    NR_CUDA_CHECK_KERNEL(G1, B1);
 
-    _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d);
-    NR_CUDA_CHECK_KERNEL(G1,B1);
     NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture));
     NR_CUDA_SAFE_CALL(cudaFree(matrix_d));
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_gaussianSmoothing_gpu( nifti_image *image,
-                                float4 *imageArray_d,
-                                float sigma,
-                                bool smoothXYZ[8])
-{
+void reg_gaussianSmoothing_gpu(nifti_image *image,
+                               float4 *imageArray_d,
+                               float sigma,
+                               bool smoothXYZ[8]) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
 
     bool axisToSmooth[8];
-    if(smoothXYZ==nullptr){
-        for(int i=0; i<8; i++) axisToSmooth[i]=true;
-    }
-    else{
-        for(int i=0; i<8; i++) axisToSmooth[i]=smoothXYZ[i];
+    if (smoothXYZ == nullptr) {
+        for (int i = 0; i < 8; i++) axisToSmooth[i] = true;
+    } else {
+        for (int i = 0; i < 8; i++) axisToSmooth[i] = smoothXYZ[i];
     }
 
-	for(int n=1; n<4; n++){
-		if(axisToSmooth[n] && image->dim[n]>1){
+    for (int n = 1; n < 4; n++) {
+        if (axisToSmooth[n] && image->dim[n] > 1) {
             float currentSigma;
-            if(sigma>0) currentSigma=sigma/image->pixdim[n];
-            else currentSigma=fabs(sigma); // voxel based if negative value
-            int radius=(int)ceil(currentSigma*3.0f);
-            if(radius>0){
-                int kernelSize = 1+radius*2;
+            if (sigma > 0) currentSigma = sigma / image->pixdim[n];
+            else currentSigma = fabs(sigma); // voxel based if negative value
+            int radius = (int)ceil(currentSigma * 3.0f);
+            if (radius > 0) {
+                int kernelSize = 1 + radius * 2;
                 float *kernel_h;
-                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)));
-                float kernelSum=0;
-                for(int i=-radius; i<=radius; i++){
-					kernel_h[radius+i]=(float)(exp( -((float)i*(float)i)/(2.0*currentSigma*currentSigma)) /
-											   (currentSigma*2.506628274631));
-					// 2.506... = sqrt(2*pi)
-                    kernelSum += kernel_h[radius+i];
+                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float)));
+                float kernelSum = 0;
+                for (int i = -radius; i <= radius; i++) {
+                    kernel_h[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) /
+                                                   (currentSigma * 2.506628274631));
+                    // 2.506... = sqrt(2*pi)
+                    kernelSum += kernel_h[radius + i];
                 }
-				for(int i=0; i<kernelSize; i++)
-					kernel_h[i] /= kernelSum;
+                for (int i = 0; i < kernelSize; i++)
+                    kernel_h[i] /= kernelSum;
 
                 float *kernel_d;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice));
+                NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float)));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
                 NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
 
                 float4 *smoothedImage;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4)));
-
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)));
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
-
-				unsigned Grid_reg_ApplyConvolutionWindow;
-                dim3 B,G;
-                switch(n){
-                    case 1:
-                        Grid_reg_ApplyConvolutionWindow =
-                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX));
-                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1);
-                        G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                        _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B);
-                        break;
-                    case 2:
-                        Grid_reg_ApplyConvolutionWindow =
-                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY));
-                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1);
-                        G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                        _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B);
-                        break;
-                    case 3:
-                        Grid_reg_ApplyConvolutionWindow =
-                            (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ));
-                        B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1);
-                        G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                        _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize);
-                        NR_CUDA_CHECK_KERNEL(G,B);
-                        break;
+                NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
+                NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float)));
+                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4)));
+
+                unsigned Grid_reg_ApplyConvolutionWindow;
+                dim3 B, G;
+                switch (n) {
+                case 1:
+                    Grid_reg_ApplyConvolutionWindow =
+                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX));
+                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1);
+                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                    _reg_ApplyConvolutionWindowAlongX_kernel<<<G, B>>>(smoothedImage, kernelSize);
+                    NR_CUDA_CHECK_KERNEL(G, B);
+                    break;
+                case 2:
+                    Grid_reg_ApplyConvolutionWindow =
+                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY));
+                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1);
+                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                    _reg_ApplyConvolutionWindowAlongY_kernel<<<G, B>>>(smoothedImage, kernelSize);
+                    NR_CUDA_CHECK_KERNEL(G, B);
+                    break;
+                case 3:
+                    Grid_reg_ApplyConvolutionWindow =
+                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ));
+                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1);
+                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                    _reg_ApplyConvolutionWindowAlongZ_kernel<<<G, B>>>(smoothedImage, kernelSize);
+                    NR_CUDA_CHECK_KERNEL(G, B);
+                    break;
                 }
                 NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
                 NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
                 NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
                 NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
             }
-		}
-	}
+        }
+    }
 }
 /* *************************************************************** */
-void reg_smoothImageForCubicSpline_gpu( nifti_image *image,
-                                        float4 *imageArray_d,
-										float *spacingVoxel)
-{
+void reg_smoothImageForCubicSpline_gpu(nifti_image *image,
+                                       float4 *imageArray_d,
+                                       float *spacingVoxel) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     const int voxelNumber = CalcVoxelNumber(*image);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
 
-	for(int n=0; n<3; n++){
-		if(spacingVoxel[n]>0 && image->dim[n+1]>1){
-			int radius = static_cast<int>(reg_ceil(2.0*spacingVoxel[n]));
-			int kernelSize = 1+radius*2;
+    for (int n = 0; n < 3; n++) {
+        if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) {
+            int radius = static_cast<int>(reg_ceil(2.0 * spacingVoxel[n]));
+            int kernelSize = 1 + radius * 2;
 
             float *kernel_h;
-            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float)));
-
-			float coeffSum=0;
-			for(int it=-radius; it<=radius; it++){
-				float coeff = (float)(fabs((float)(float)it/(float)spacingVoxel[0]));
-				if(coeff<1.0) kernel_h[it+radius] = (float)(2.0/3.0 - coeff*coeff + 0.5*coeff*coeff*coeff);
-				else if (coeff<2.0) kernel_h[it+radius] = (float)(-(coeff-2.0)*(coeff-2.0)*(coeff-2.0)/6.0);
-				else kernel_h[it+radius]=0;
-				coeffSum += kernel_h[it+radius];
-			}
-			for(int it=0;it<kernelSize;it++) kernel_h[it] /= coeffSum;
+            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float)));
+
+            float coeffSum = 0;
+            for (int it = -radius; it <= radius; it++) {
+                float coeff = (float)(fabs((float)(float)it / (float)spacingVoxel[0]));
+                if (coeff < 1.0) kernel_h[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff);
+                else if (coeff < 2.0) kernel_h[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0);
+                else kernel_h[it + radius] = 0;
+                coeffSum += kernel_h[it + radius];
+            }
+            for (int it = 0; it < kernelSize; it++) kernel_h[it] /= coeffSum;
 
             float *kernel_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float)));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice));
+            NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float)));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
             NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float)));
+            NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float)));
 
             float4 *smoothedImage_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d,voxelNumber*sizeof(float4)));
-
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4)));
+            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d, voxelNumber * sizeof(float4)));
+            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4)));
 
             unsigned Grid_reg_ApplyConvolutionWindow;
-            dim3 B,G;
-            switch(n){
-                case 0:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX));
-                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1);
-                    G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                    _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B);
-                    break;
-                case 1:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY));
-                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1);
-                    G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                    _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B);
-                    break;
-                case 2:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ));
-                    B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1);
-                    G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1);
-                    _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G,B);
-                    break;
+            dim3 B, G;
+            switch (n) {
+            case 0:
+                Grid_reg_ApplyConvolutionWindow =
+                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX));
+                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1);
+                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                _reg_ApplyConvolutionWindowAlongX_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
+                NR_CUDA_CHECK_KERNEL(G, B);
+                break;
+            case 1:
+                Grid_reg_ApplyConvolutionWindow =
+                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY));
+                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1);
+                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                _reg_ApplyConvolutionWindowAlongY_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
+                NR_CUDA_CHECK_KERNEL(G, B);
+                break;
+            case 2:
+                Grid_reg_ApplyConvolutionWindow =
+                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ));
+                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1);
+                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
+                _reg_ApplyConvolutionWindowAlongZ_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
+                NR_CUDA_CHECK_KERNEL(G, B);
+                break;
             }
+
             NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
             NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
             NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
             NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d));
         }
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(int num, float4 *array_d, float value)
-{
+void reg_multiplyValue_gpu(int num, float4 *array_d, float value) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float)));
 
-    const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
-    dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1);
-    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
-    reg_multiplyValue_kernel_float4<<<G,B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G,B);
+    const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
+    dim3 G = dim3(Grid_reg_multiplyValues, Grid_reg_multiplyValues, 1);
+    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
+    reg_multiplyValue_kernel_float4<<<G, B>>>(array_d);
+    NR_CUDA_CHECK_KERNEL(G, B);
 }
 /* *************************************************************** */
-void reg_addValue_gpu(int num, float4 *array_d, float value)
-{
+void reg_addValue_gpu(int num, float4 *array_d, float value) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float)));
 
-    const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
-    dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1);
-    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
-    reg_addValue_kernel_float4<<<G,B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G,B);
+    const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
+    dim3 G = dim3(Grid_reg_addValues, Grid_reg_addValues, 1);
+    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
+    reg_addValue_kernel_float4<<<G, B>>>(array_d);
+    NR_CUDA_CHECK_KERNEL(G, B);
 }
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
-{
+void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
 
-    const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
-    dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1);
-    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
-    reg_multiplyArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
-    NR_CUDA_CHECK_KERNEL(G,B);
+    const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
+    dim3 G = dim3(Grid_reg_multiplyArrays, Grid_reg_multiplyArrays, 1);
+    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
+    reg_multiplyArrays_kernel_float4<<<G, B>>>(array1_d, array2_d);
+    NR_CUDA_CHECK_KERNEL(G, B);
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d)
-{
+void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
 
-    const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
-    dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1);
-    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
-    reg_addArrays_kernel_float4<<<G,B>>>(array1_d,array2_d);
-    NR_CUDA_CHECK_KERNEL(G,B);
+    const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
+    dim3 G = dim3(Grid_reg_addArrays, Grid_reg_addArrays, 1);
+    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
+    reg_addArrays_kernel_float4<<<G, B>>>(array1_d, array2_d);
+    NR_CUDA_CHECK_KERNEL(G, B);
 }
 /* *************************************************************** */
-void reg_fillMaskArray_gpu(int num, int *array1_d)
-{
+void reg_fillMaskArray_gpu(int num, int *array1_d) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int)));
+    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
 
-    const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic));
-    dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1);
-    dim3 B=dim3(blockSize->reg_arithmetic,1,1);
-    reg_fillMaskArray_kernel<<<G,B>>>(array1_d);
-    NR_CUDA_CHECK_KERNEL(G,B);
+    const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
+    dim3 G = dim3(Grid_reg_fillMaskArray, Grid_reg_fillMaskArray, 1);
+    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
+    reg_fillMaskArray_kernel<<<G, B>>>(array1_d);
+    NR_CUDA_CHECK_KERNEL(G, B);
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *array_d,size_t size)
-{
+float reg_sumReduction_gpu(float *array_d, size_t size) {
     thrust::device_ptr<float> dptr(array_d);
-    return thrust::reduce(dptr,dptr+size, 0.f, thrust::plus<float>());
+    return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *array_d,size_t size)
-{
+float reg_maxReduction_gpu(float *array_d, size_t size) {
     thrust::device_ptr<float> dptr(array_d);
-    return thrust::reduce(dptr, dptr+size, 0.f, thrust::maximum<float>());
+    return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *array_d,size_t size)
-{
+float reg_minReduction_gpu(float *array_d, size_t size) {
     thrust::device_ptr<float> dptr(array_d);
-    return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum<float>());
+    return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 584e274a..df2897a6 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -22,265 +22,238 @@ texture<float4, 1, cudaReadModeElementType> gradientImageTexture;
 texture<float4, 1, cudaReadModeElementType> matrixTexture;
 texture<float, 1, cudaReadModeElementType> convolutionKernelTexture;
 /* *************************************************************** */
-__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeNMIGradientArray_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_NodeNumber){
+__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeNMIGradientArray_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_NodeNumber) {
+        const int3 gridSize = c_ControlPointImageDim;
+        int tempIndex = tid;
+        const short z = (int)(tempIndex / (gridSize.x * gridSize.y));
+        tempIndex -= z * (gridSize.x) * (gridSize.y);
+        const short y = (int)(tempIndex / (gridSize.x));
+        const short x = tempIndex - y * (gridSize.x);
 
-		const int3 gridSize = c_ControlPointImageDim;
-		int tempIndex=tid;
-		const short z =(int)(tempIndex/(gridSize.x*gridSize.y));
-		tempIndex -= z*(gridSize.x)*(gridSize.y);
-		const short y =(int)(tempIndex/(gridSize.x));
-		const short x = tempIndex - y*(gridSize.x);
+        const float3 ratio = c_VoxelNodeRatio;
+        const short X = round((x - 1) * ratio.x);
+        const short Y = round((y - 1) * ratio.y);
+        const short Z = round((z - 1) * ratio.z);
 
-		const float3 ratio = c_VoxelNodeRatio;
-		const short X = round((x-1)*ratio.x);
-		const short Y = round((y-1)*ratio.y);
-		const short Z = round((z-1)*ratio.z);
+        const int3 imageSize = c_TargetImageDim;
 
-		const int3 imageSize = c_TargetImageDim;
-
-		if(-1<X && X<imageSize.x && -1<Y && Y<imageSize.y && -1<Z && Z<imageSize.z){
-			int index = (Z*imageSize.y+Y)*imageSize.x+X;
-			float4 gradientValue = tex1Dfetch(gradientImageTexture,index);
-			nodeNMIGradientArray_d[tid] = make_float4(c_Weight*gradientValue.x,
-													  c_Weight*gradientValue.y,
-													  c_Weight*gradientValue.z,
-													  0.0f);
-		}
-		else nodeNMIGradientArray_d[tid]=make_float4(0, 0.0f, 0.0f, 0.0f);
-	}
+        if (-1 < X && X < imageSize.x && -1 < Y && Y < imageSize.y && -1 < Z && Z < imageSize.z) {
+            int index = (Z * imageSize.y + Y) * imageSize.x + X;
+            float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
+            nodeNMIGradientArray_d[tid] = make_float4(c_Weight * gradientValue.x,
+                                                      c_Weight * gradientValue.y,
+                                                      c_Weight * gradientValue.z,
+                                                      0.0f);
+        } else nodeNMIGradientArray_d[tid] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+    }
 }
 /* *************************************************************** */
-__global__ void _reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_NodeNumber){
-		float4 voxelGradient = gradient[tid];
-		float4 realGradient;
-		float4 matrix = tex1Dfetch(matrixTexture,0);
-		realGradient.x =	matrix.x*voxelGradient.x + matrix.y*voxelGradient.y  +
-								matrix.z*voxelGradient.z;
-		matrix = tex1Dfetch(matrixTexture,1);
-		realGradient.y =	matrix.x*voxelGradient.x + matrix.y*voxelGradient.y  +
-								matrix.z*voxelGradient.z;
-		matrix = tex1Dfetch(matrixTexture,2);
-		realGradient.z =	matrix.x*voxelGradient.x + matrix.y*voxelGradient.y  +
-								matrix.z*voxelGradient.z;
+__global__ void _reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_NodeNumber) {
+        float4 voxelGradient = gradient[tid];
+        float4 realGradient;
+        float4 matrix = tex1Dfetch(matrixTexture, 0);
+        realGradient.x = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
+        matrix = tex1Dfetch(matrixTexture, 1);
+        realGradient.y = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
+        matrix = tex1Dfetch(matrixTexture, 2);
+        realGradient.z = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
 
-		gradient[tid]=realGradient;
-	}
+        gradient[tid] = realGradient;
+    }
 }
+/* *************************************************************** */
+__global__ void _reg_ApplyConvolutionWindowAlongX_kernel(float4 *smoothedImage, int windowSize) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        int3 imageSize = c_ImageDim;
 
-__global__ void _reg_ApplyConvolutionWindowAlongX_kernel(   float4 *smoothedImage,
-															int windowSize)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-
-		int3 imageSize = c_ImageDim;
-
-		int temp=tid;
-		const short z=(int)(temp/(imageSize.x*imageSize.y));
-		temp -= z*imageSize.x*imageSize.y;
-		const short y =(int)(temp/(imageSize.x));
-		short x = temp - y*(imageSize.x);
+        int temp = tid;
+        const short z = (int)(temp / (imageSize.x * imageSize.y));
+        temp -= z * imageSize.x * imageSize.y;
+        const short y = (int)(temp / (imageSize.x));
+        short x = temp - y * (imageSize.x);
 
-		int radius = (windowSize-1)/2;
-		int index = tid - radius;
-		x -= radius;
+        int radius = (windowSize - 1) / 2;
+        int index = tid - radius;
+        x -= radius;
 
-		float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 
-		// Kahan summation used here
-		float3 c=make_float3(0.f,0.f,0.f), Y, t;
-		float windowValue;
-		for(int i=0; i<windowSize; i++){
-			if(-1<x && x<imageSize.x){
-				float4 gradientValue = tex1Dfetch(gradientImageTexture,index);
-				windowValue = tex1Dfetch(convolutionKernelTexture,i);
+        // Kahan summation used here
+        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float windowValue;
+        for (int i = 0; i < windowSize; i++) {
+            if (-1 < x && x < imageSize.x) {
+                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
+                windowValue = tex1Dfetch(convolutionKernelTexture, i);
 
-				Y.x = gradientValue.x * windowValue - c.x;
-				Y.y = gradientValue.y * windowValue - c.y;
-				Y.z = gradientValue.z * windowValue - c.z;
-				t.x = finalValue.x + Y.x;
-				t.y = finalValue.y + Y.y;
-				t.z = finalValue.z + Y.z;
-				c.x = (t.x - finalValue.x) - Y.x;
-				c.y = (t.y - finalValue.y) - Y.y;
-				c.z = (t.z - finalValue.z) - Y.z;
-				finalValue = make_float4(t.x, t.y, t.z, 0.f);
-			}
-			index++;
-			x++;
-		}
-		smoothedImage[tid] = finalValue;
-	}
-	return;
+                Y.x = gradientValue.x * windowValue - c.x;
+                Y.y = gradientValue.y * windowValue - c.y;
+                Y.z = gradientValue.z * windowValue - c.z;
+                t.x = finalValue.x + Y.x;
+                t.y = finalValue.y + Y.y;
+                t.z = finalValue.z + Y.z;
+                c.x = (t.x - finalValue.x) - Y.x;
+                c.y = (t.y - finalValue.y) - Y.y;
+                c.z = (t.z - finalValue.z) - Y.z;
+                finalValue = make_float4(t.x, t.y, t.z, 0.f);
+            }
+            index++;
+            x++;
+        }
+        smoothedImage[tid] = finalValue;
+    }
 }
+/* *************************************************************** */
+__global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage, int windowSize) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        int3 imageSize = c_ImageDim;
 
-__global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage,
-														 int windowSize)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		int3 imageSize = c_ImageDim;
-
-		const short z=(int)(tid/(imageSize.x*imageSize.y));
-		int index = tid - z*imageSize.x*imageSize.y;
-		short y=(int)(index/imageSize.x);
+        const short z = (int)(tid / (imageSize.x * imageSize.y));
+        int index = tid - z * imageSize.x * imageSize.y;
+        short y = (int)(index / imageSize.x);
 
-		int radius = (windowSize-1)/2;
-		index = tid - imageSize.x*radius;
-		y -= radius;
+        int radius = (windowSize - 1) / 2;
+        index = tid - imageSize.x * radius;
+        y -= radius;
 
-		float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 
-		// Kahan summation used here
-		float3 c=make_float3(0.f,0.f,0.f), Y, t;
-		float windowValue;
-		for(int i=0; i<windowSize; i++){
-			if(-1<y && y<imageSize.y){
-				float4 gradientValue = tex1Dfetch(gradientImageTexture,index);
-				windowValue = tex1Dfetch(convolutionKernelTexture,i);
+        // Kahan summation used here
+        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float windowValue;
+        for (int i = 0; i < windowSize; i++) {
+            if (-1 < y && y < imageSize.y) {
+                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
+                windowValue = tex1Dfetch(convolutionKernelTexture, i);
 
-				Y.x = gradientValue.x * windowValue - c.x;
-				Y.y = gradientValue.y * windowValue - c.y;
-				Y.z = gradientValue.z * windowValue - c.z;
-				t.x = finalValue.x + Y.x;
-				t.y = finalValue.y + Y.y;
-				t.z = finalValue.z + Y.z;
-				c.x = (t.x - finalValue.x) - Y.x;
-				c.y = (t.y - finalValue.y) - Y.y;
-				c.z = (t.z - finalValue.z) - Y.z;
-				finalValue = make_float4(t.x, t.y, t.z, 0.f);
-			}
-			index += imageSize.x;
-			y++;
-		}
-		smoothedImage[tid] = finalValue;
-	}
-	return;
+                Y.x = gradientValue.x * windowValue - c.x;
+                Y.y = gradientValue.y * windowValue - c.y;
+                Y.z = gradientValue.z * windowValue - c.z;
+                t.x = finalValue.x + Y.x;
+                t.y = finalValue.y + Y.y;
+                t.z = finalValue.z + Y.z;
+                c.x = (t.x - finalValue.x) - Y.x;
+                c.y = (t.y - finalValue.y) - Y.y;
+                c.z = (t.z - finalValue.z) - Y.z;
+                finalValue = make_float4(t.x, t.y, t.z, 0.f);
+            }
+            index += imageSize.x;
+            y++;
+        }
+        smoothedImage[tid] = finalValue;
+    }
 }
+/* *************************************************************** */
+__global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, int windowSize) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        int3 imageSize = c_ImageDim;
 
+        short z = (int)(tid / ((imageSize.x) * (imageSize.y)));
 
-__global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage,
-														 int windowSize)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		int3 imageSize = c_ImageDim;
-
-		short z=(int)(tid/((imageSize.x)*(imageSize.y)));
-
-		int radius = (windowSize-1)/2;
-		int index = tid - imageSize.x*imageSize.y*radius;
-		z -= radius;
+        int radius = (windowSize - 1) / 2;
+        int index = tid - imageSize.x * imageSize.y * radius;
+        z -= radius;
 
-		float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 
-		// Kahan summation used here
-		float3 c=make_float3(0.f,0.f,0.f), Y, t;
-		float windowValue;
-		for(int i=0; i<windowSize; i++){
-			if(-1<z && z<imageSize.z){
-				float4 gradientValue = tex1Dfetch(gradientImageTexture,index);
-				windowValue = tex1Dfetch(convolutionKernelTexture,i);
+        // Kahan summation used here
+        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float windowValue;
+        for (int i = 0; i < windowSize; i++) {
+            if (-1 < z && z < imageSize.z) {
+                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
+                windowValue = tex1Dfetch(convolutionKernelTexture, i);
 
-				Y.x = gradientValue.x * windowValue - c.x;
-				Y.y = gradientValue.y * windowValue - c.y;
-				Y.z = gradientValue.z * windowValue - c.z;
-				t.x = finalValue.x + Y.x;
-				t.y = finalValue.y + Y.y;
-				t.z = finalValue.z + Y.z;
-				c.x = (t.x - finalValue.x) - Y.x;
-				c.y = (t.y - finalValue.y) - Y.y;
-				c.z = (t.z - finalValue.z) - Y.z;
-				finalValue = make_float4(t.x, t.y, t.z, 0.f);
-			}
-			index += imageSize.x*imageSize.y;
-			z++;
-		}
-		smoothedImage[tid] = finalValue;
-	}
-	return;
+                Y.x = gradientValue.x * windowValue - c.x;
+                Y.y = gradientValue.y * windowValue - c.y;
+                Y.z = gradientValue.z * windowValue - c.z;
+                t.x = finalValue.x + Y.x;
+                t.y = finalValue.y + Y.y;
+                t.z = finalValue.z + Y.z;
+                c.x = (t.x - finalValue.x) - Y.x;
+                c.y = (t.y - finalValue.y) - Y.y;
+                c.z = (t.z - finalValue.z) - Y.z;
+                finalValue = make_float4(t.x, t.y, t.z, 0.f);
+            }
+            index += imageSize.x * imageSize.y;
+            z++;
+        }
+        smoothedImage[tid] = finalValue;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float(float *array_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		array_d[tid] *= c_Weight;
-	}
+__global__ void reg_multiplyValue_kernel_float(float *array_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        array_d[tid] *= c_Weight;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float4(float4 *array_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		float4 temp = array_d[tid];
-		array_d[tid] = make_float4(temp.x*c_Weight,temp.y*c_Weight,temp.z*c_Weight,temp.w*c_Weight);
-	}
+__global__ void reg_multiplyValue_kernel_float4(float4 *array_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        float4 temp = array_d[tid];
+        array_d[tid] = make_float4(temp.x * c_Weight, temp.y * c_Weight, temp.z * c_Weight, temp.w * c_Weight);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_addValue_kernel_float(float *array_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		array_d[tid] += c_Weight;
-	}
+__global__ void reg_addValue_kernel_float(float *array_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        array_d[tid] += c_Weight;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_addValue_kernel_float4(float4 *array_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		float4 temp = array_d[tid];
-		array_d[tid] = make_float4(temp.x+c_Weight,temp.y+c_Weight,temp.z+c_Weight,temp.w+c_Weight);
-	}
+__global__ void reg_addValue_kernel_float4(float4 *array_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        float4 temp = array_d[tid];
+        array_d[tid] = make_float4(temp.x + c_Weight, temp.y + c_Weight, temp.z + c_Weight, temp.w + c_Weight);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float(float *array1_d, float *array2_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		array1_d[tid] *= array2_d[tid];
-	}
+__global__ void reg_multiplyArrays_kernel_float(float *array1_d, float *array2_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        array1_d[tid] *= array2_d[tid];
+    }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float4(float4 *array1_d, float4 *array2_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		float4 a = array1_d[tid];
-		float4 b = array1_d[tid];
-		array1_d[tid] = make_float4(a.x*b.x,a.y*b.y,a.z*b.z,a.w*b.w);
-	}
+__global__ void reg_multiplyArrays_kernel_float4(float4 *array1_d, float4 *array2_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        float4 a = array1_d[tid];
+        float4 b = array1_d[tid];
+        array1_d[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_addArrays_kernel_float(float *array1_d, float *array2_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		array1_d[tid] += array2_d[tid];
-	}
+__global__ void reg_addArrays_kernel_float(float *array1_d, float *array2_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        array1_d[tid] += array2_d[tid];
+    }
 }
 /* *************************************************************** */
-__global__ void reg_addArrays_kernel_float4(float4 *array1_d, float4 *array2_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber){
-		float4 a = array1_d[tid];
-		float4 b = array1_d[tid];
-		array1_d[tid] = make_float4(a.x+b.x,a.y+b.y,a.z+b.z,a.w+b.w);
-	}
+__global__ void reg_addArrays_kernel_float4(float4 *array1_d, float4 *array2_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber) {
+        float4 a = array1_d[tid];
+        float4 b = array1_d[tid];
+        array1_d[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+    }
 }
 /* *************************************************************** */
-__global__ void reg_fillMaskArray_kernel(int *array1_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid < c_VoxelNumber)
-		array1_d[tid] = tid;
+__global__ void reg_fillMaskArray_kernel(int *array1_d) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_VoxelNumber)
+        array1_d[tid] = tid;
 }
 /* *************************************************************** */
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 8f61cbd6..5eebec14 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -199,7 +199,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
         const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale);
 
         SECTION(sectionName) {
-            std::cout << "******** UpdateControlPointPosition " << sectionName << " ********" << std::endl;
+            std::cout << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl;
 
             // Set the control point grid
             NiftiImage img = content->GetControlPointGrid();
@@ -238,7 +238,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             // Update the gradient values
             // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations
             if (!optimiseX && !optimiseY && !optimiseZ) {
-                std::cout << "******** UpdateGradientValues " << sectionName << " ********" << std::endl;
+                std::cout << "\n**************** UpdateGradientValues " << sectionName << " ****************" << std::endl;
 
                 // Initialise the conjugate gradient
                 optimiser->UpdateGradientValues();
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 06a535cd..3c99f312 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -163,10 +163,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         // Accumulate all required contents with a vector
         vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
-            unique_ptr<Platform> platform{ new Platform(platformType) };
-            // Add content
             if (platformType == PlatformType::Cuda && interp != 1)
                 continue;   // CUDA platform only supports linear interpolation
+            unique_ptr<Platform> platform{ new Platform(platformType) };
             unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
             unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
             contentDescs.push_back({ std::move(content), std::move(platform) });
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 73c2fd66..fe59bec9 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -140,7 +140,7 @@ class NormaliseGradientTest {
     }
 
     template<typename T>
-    void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+    void NormaliseGradient(nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
         if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
         const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         T *ptrX = static_cast<T*>(transformationGradient->data);
@@ -182,7 +182,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
         const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
 
         SECTION(sectionName) {
-            std::cout << "******** Section " << sectionName << " ********" << std::endl;
+            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
 
             // Set the transformation gradient image to host the computation
             NiftiImage transGrad = content->GetTransformationGradient();

From 3cbbb00a5f4a1163d523551da9904ab327f7dfae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 21 Jun 2023 18:35:16 +0100
Subject: [PATCH 140/314] Add tests for *Compute::VoxelCentricToNodeCentric()

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/Compute.cpp                           |  13 +-
 reg-lib/Compute.h                             |   5 +
 reg-lib/cuda/CudaCompute.cpp                  |  17 +-
 reg-lib/cuda/CudaCompute.h                    |   5 +
 reg-test/CMakeLists.txt                       |   1 +
 reg-test/reg_test_common.h                    |   1 +
 .../reg_test_voxelCentricToNodeCentric.cpp    | 272 ++++++++++++++++++
 8 files changed, 305 insertions(+), 11 deletions(-)
 create mode 100644 reg-test/reg_test_voxelCentricToNodeCentric.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 98ecf581..98da127e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-259
+260
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 642ee316..49bb2937 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -274,11 +274,8 @@ void Compute::ConvolveImage(nifti_image *image) {
     }
 }
 /* *************************************************************** */
-void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
+void Compute::VoxelCentricToNodeCentric(float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    ConvolveImage(con.GetVoxelBasedMeasureGradient());
-
-    // The node-based NMI gradient is extracted
     mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating());
     reg_voxelCentric2NodeCentric(con.GetTransformationGradient(),
                                  con.GetVoxelBasedMeasureGradient(),
@@ -287,6 +284,14 @@ void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
                                  reorientation);
 }
 /* *************************************************************** */
+void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
+    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    ConvolveImage(con.GetVoxelBasedMeasureGradient());
+
+    // The node-based NMI gradient is extracted from the voxel-based gradient
+    VoxelCentricToNodeCentric(weight);
+}
+/* *************************************************************** */
 void Compute::ExponentiateGradient(Content& conBwIn) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     F3dContent& conBw = dynamic_cast<F3dContent&>(conBwIn);
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index efa43bf4..3038bf85 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -37,6 +37,11 @@ class Compute {
 
     void ConvolveImage(nifti_image*);
 
+#ifdef NR_TESTING
+public:
+#endif
+    virtual void VoxelCentricToNodeCentric(float weight);
+
 private:
     template<typename Type> void GetApproximatedGradient(InterfaceOptimiser&);
     nifti_image* ScaleGradient(const nifti_image&, float);
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 3b9db5e5..9d2e6032 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -165,6 +165,15 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
     con.UpdateDeformationField();
 }
 /* *************************************************************** */
+void CudaCompute::VoxelCentricToNodeCentric(float weight) {
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
+                                     con.F3dContent::GetControlPointGrid(),
+                                     con.GetVoxelBasedMeasureGradientCuda(),
+                                     con.GetTransformationGradientCuda(),
+                                     weight);
+}
+/* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
     // TODO Implement this for CUDA
     // Use CPU temporarily
@@ -173,12 +182,8 @@ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
     // Transfer the data back to the CUDA device
     con.UpdateVoxelBasedMeasureGradient();
 
-    // The node-based NMI gradient is extracted
-    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
-                                     con.F3dContent::GetControlPointGrid(),
-                                     con.GetVoxelBasedMeasureGradientCuda(),
-                                     con.GetTransformationGradientCuda(),
-                                     weight);
+    // The node-based NMI gradient is extracted from the voxel-based gradient
+    VoxelCentricToNodeCentric(weight);
 }
 /* *************************************************************** */
 void CudaCompute::ExponentiateGradient(Content& conBwIn) {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index fbde281d..0e71b10e 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -29,4 +29,9 @@ class CudaCompute: public Compute {
     virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void BchUpdate(float scale, int bchUpdateValue) override;
     virtual void SymmetriseVelocityFields(Content& conBw) override;
+
+#ifndef NR_TESTING
+protected:
+#endif
+    virtual void VoxelCentricToNodeCentric(float weight) override;
 };
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index ed1d77b3..793b9448 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -113,6 +113,7 @@ set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
+set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 4d5c168f..a70a052e 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -1,6 +1,7 @@
 #define NR_TESTING  // Enable testing
 #define EPS     0.000001
 
+#include <array>
 #include <random>
 #include <catch2/catch_test_macros.hpp>
 #include "_reg_localTrans.h"
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
new file mode 100644
index 00000000..4cb02f1f
--- /dev/null
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -0,0 +1,272 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+
+/*
+    This test file contains the following unit tests:
+    test functions: The node-based NMI gradient is extracted from the voxel-based NMI gradient
+    In 2D and 3D
+*/
+
+
+class VoxelCentricToNodeCentricTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, std::array<mat44, 4>, float>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    VoxelCentricToNodeCentricTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a 2D reference image
+        vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Create a 3D reference image
+        dimFlo.push_back(4);
+        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Create the voxel-based measure gradients
+        vector<NiftiImage::dim_t> dimGrad{ 4, 4, 1, 1, 2 };
+        NiftiImage voxelBasedMeasureGradient2d(dimGrad, NIFTI_TYPE_FLOAT32);
+        dimGrad[2] = 4; dimGrad[4] = 3;
+        NiftiImage voxelBasedMeasureGradient3d(dimGrad, NIFTI_TYPE_FLOAT32);
+
+        // Create the control point grids
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+
+        // Create the matrices and fill them with random values
+        std::array<mat44, 4> matrices{};
+        for (int i = 0; i < 4; ++i)
+            for (int j = 0; j < 4; ++j)
+                for (int k = 0; k < 4; ++k)
+                    matrices[i].m[j][k] = j == k ? distr(gen) : 0;
+
+        // Generate the different test cases
+        // Test 2D
+        auto grad2dPtr = voxelBasedMeasureGradient2d.data();
+        for (size_t i = 0; i < voxelBasedMeasureGradient2d.nVoxels(); ++i)
+            grad2dPtr[i] = distr(gen);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d),
+            std::move(voxelBasedMeasureGradient2d)
+        ));
+
+        // Test 3D
+        auto grad3dPtr = voxelBasedMeasureGradient3d.data();
+        for (size_t i = 0; i < voxelBasedMeasureGradient3d.nVoxels(); ++i)
+            grad3dPtr[i] = distr(gen);
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d),
+            std::move(voxelBasedMeasureGradient3d)
+        ));
+
+        // Add platforms, composition, and bspline to the test data
+        for (auto&& testData : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                // Make a copy of the test data
+                auto td = testData;
+                auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td;
+                // Add content
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                testCases.push_back({ platform, std::move(content), std::move(td), matrices, distr(gen) });
+            }
+        }
+    }
+
+    template<typename DataType>
+    void VoxelCentricToNodeCentric(const nifti_image *floating, NiftiImage& nodeGrad, const NiftiImage& voxelGrad, float weight) {
+        const mat44 *voxelToMillimetre = floating->sform_code > 0 ? &floating->sto_ijk : &floating->qto_ijk;
+        const bool is3d = nodeGrad->nz > 1;
+
+        const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeGrad, 3);
+        auto nodePtr = nodeGrad.data();
+        auto nodePtrX = nodePtr.begin();
+        auto nodePtrY = nodePtrX + nodeNumber;
+        auto nodePtrZ = nodePtrY + nodeNumber;
+
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelGrad, 3);
+        auto voxelPtr = voxelGrad.data();
+        auto voxelPtrX = voxelPtr.begin();
+        auto voxelPtrY = voxelPtrX + voxelNumber;
+        auto voxelPtrZ = voxelPtrY + voxelNumber;
+
+        // The transformation between the image and the grid
+        mat44 transformation;
+        // Voxel to millimetre in the grid image
+        if (nodeGrad->sform_code > 0)
+            transformation = nodeGrad->sto_xyz;
+        else transformation = nodeGrad->qto_xyz;
+        // Affine transformation between the grid and the reference image
+        if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) {
+            mat44 temp = *(reinterpret_cast<mat44*>(nodeGrad->ext_list[0].edata));
+            temp = nifti_mat44_inverse(temp);
+            transformation = reg_mat44_mul(&temp, &transformation);
+        }
+        // Millimetre to voxel in the reference image
+        if (voxelGrad->sform_code > 0)
+            transformation = reg_mat44_mul(&voxelGrad->sto_ijk, &transformation);
+        else transformation = reg_mat44_mul(&voxelGrad->qto_ijk, &transformation);
+
+        // The information has to be reoriented
+        // Voxel to millimetre contains the orientation of the image that is used
+        // to compute the spatial gradient (floating image)
+        mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) {
+            mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeGrad->ext_list[0].edata));
+            temp = nifti_mat33_inverse(temp);
+            reorientation = nifti_mat33_mul(temp, reorientation);
+        }
+        // The information has to be weighted
+        float ratio[3] = { nodeGrad->dx, nodeGrad->dy, nodeGrad->dz };
+        for (int i = 0; i < (is3d ? 3 : 2); ++i) {
+            if (nodeGrad->sform_code > 0) {
+                ratio[i] = sqrt(reg_pow2(nodeGrad->sto_xyz.m[i][0]) +
+                                reg_pow2(nodeGrad->sto_xyz.m[i][1]) +
+                                reg_pow2(nodeGrad->sto_xyz.m[i][2]));
+            }
+            ratio[i] /= voxelGrad->pixdim[i + 1];
+            weight *= ratio[i];
+        }
+        // For each node, the corresponding voxel is computed
+        float nodeCoord[3], voxelCoord[3];
+        for (int z = 0; z < nodeGrad->nz; z++) {
+            nodeCoord[2] = static_cast<float>(z);
+            for (int y = 0; y < nodeGrad->ny; y++) {
+                nodeCoord[1] = static_cast<float>(y);
+                for (int x = 0; x < nodeGrad->nx; x++) {
+                    nodeCoord[0] = static_cast<float>(x);
+                    reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
+                    // Linear interpolation
+                    DataType basisX[2], basisY[2], basisZ[2];
+                    const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) };
+                    basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
+                    basisX[0] = static_cast<DataType>(1) - basisX[1];
+                    basisY[1] = voxelCoord[1] - static_cast<DataType>(pre[1]);
+                    basisY[0] = static_cast<DataType>(1) - basisY[1];
+                    if (is3d) {
+                        basisZ[1] = voxelCoord[2] - static_cast<DataType>(pre[2]);
+                        basisZ[0] = static_cast<DataType>(1) - basisZ[1];
+                    }
+                    DataType interpolatedValue[3]{};
+                    for (int c = 0; c < 2; ++c) {
+                        const int indexZ = pre[2] + c;
+                        if (-1 < indexZ && indexZ < voxelGrad->nz) {
+                            for (int b = 0; b < 2; ++b) {
+                                const int indexY = pre[1] + b;
+                                if (-1 < indexY && indexY < voxelGrad->ny) {
+                                    for (int a = 0; a < 2; ++a) {
+                                        const int indexX = pre[0] + a;
+                                        if (-1 < indexX && indexX < voxelGrad->nx) {
+                                            const int index = (indexZ * voxelGrad->ny + indexY) * voxelGrad->nx + indexX;
+                                            const DataType linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1);
+                                            interpolatedValue[0] += linearWeight * static_cast<DataType>(voxelPtrX[index]);
+                                            interpolatedValue[1] += linearWeight * static_cast<DataType>(voxelPtrY[index]);
+                                            if (is3d)
+                                                interpolatedValue[2] += linearWeight * static_cast<DataType>(voxelPtrZ[index]);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    DataType reorientedValue[3]{};
+                    reorientedValue[0] =
+                        reorientation.m[0][0] * interpolatedValue[0] +
+                        reorientation.m[1][0] * interpolatedValue[1] +
+                        reorientation.m[2][0] * interpolatedValue[2];
+                    reorientedValue[1] =
+                        reorientation.m[0][1] * interpolatedValue[0] +
+                        reorientation.m[1][1] * interpolatedValue[1] +
+                        reorientation.m[2][1] * interpolatedValue[2];
+                    if (is3d)
+                        reorientedValue[2] =
+                        reorientation.m[0][2] * interpolatedValue[0] +
+                        reorientation.m[1][2] * interpolatedValue[1] +
+                        reorientation.m[2][2] * interpolatedValue[2];
+                    *nodePtrX++ = reorientedValue[0] * static_cast<DataType>(weight);
+                    *nodePtrY++ = reorientedValue[1] * static_cast<DataType>(weight);
+                    if (is3d)
+                        *nodePtrZ++ = reorientedValue[2] * static_cast<DataType>(weight);
+                } // x
+            } // y
+        } // z
+    }
+};
+
+TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", "[VoxelCentricToNodeCentric]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [platform, content, testData, matrices, weight] = testCase;
+        auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData;
+        const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight);
+
+        SECTION(sectionName) {
+            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
+            // Set the matrices required for computation
+            nifti_image *floating = content->Content::GetFloating();
+            if (floating->sform_code > 0)
+                floating->sto_ijk = matrices[0];
+            else floating->qto_ijk = matrices[0];
+            NiftiImage transGrad = content->F3dContent::GetTransformationGradient();
+            static int sfc = 0;
+            transGrad->sform_code = sfc++ % 2;
+            if (transGrad->sform_code > 0)
+                transGrad->sto_xyz = matrices[1];
+            else transGrad->qto_xyz = matrices[1];
+            const mat44 invMatrix = nifti_mat44_inverse(matrices[2]);
+            nifti_add_extension(transGrad, reinterpret_cast<const char*>(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE);
+
+            // Set the voxel-based measure gradient to host the computation
+            NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient();
+            if (voxelGrad->sform_code > 0)
+                voxelGrad->sto_ijk = matrices[3];
+            else voxelGrad->qto_ijk = matrices[3];
+            voxelGrad.copyData(voxelBasedMeasureGradient);
+            content->UpdateVoxelBasedMeasureGradient();
+
+            // Extract the node-based NMI gradient from the voxel-based NMI gradient
+            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+            compute->VoxelCentricToNodeCentric(weight);
+            NiftiImage transGradExp(transGrad, NiftiImage::Copy::ImageInfoAndAllocData);
+            VoxelCentricToNodeCentric<float>(floating, transGradExp, voxelGrad, weight);
+            transGrad.disown(); voxelGrad.disown();
+
+            // Check the results
+            transGrad = content->GetTransformationGradient();
+            const auto transGradPtr = transGrad.data();
+            const auto transGradExpPtr = transGradExp.data();
+            transGrad.disown();
+            for (size_t i = 0; i < transGradExp.nVoxels(); ++i) {
+                const float transGradVal = transGradPtr[i];
+                const float transGradExpVal = transGradExpPtr[i];
+                std::cout << i << " " << transGradVal << " " << transGradExpVal << std::endl;
+                REQUIRE(fabs(transGradVal - transGradExpVal) < EPS);
+            }
+            // Ensure the termination of content before CudaContext
+            content.reset();
+        }
+    }
+}

From 1f6452c6c9d1f273dd784d6402adf59e4a7a2e36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 21 Jun 2023 18:54:54 +0100
Subject: [PATCH 141/314] Fix GPU version of VoxelCentricToNodeCentric() to
 make on a par with CPU version

---
 niftyreg_build_version.txt         |  2 +-
 reg-lib/cuda/CudaCompute.cpp       | 10 ++--
 reg-lib/cuda/_reg_tools_gpu.cu     | 89 ++++++++++++++++++++----------
 reg-lib/cuda/_reg_tools_gpu.h      | 11 ++--
 reg-lib/cuda/_reg_tools_kernels.cu | 86 +++++++++++++++++++++--------
 5 files changed, 136 insertions(+), 62 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 98da127e..4fc233b7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-260
+261
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 9d2e6032..ec1398b2 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -167,11 +167,13 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
 /* *************************************************************** */
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(),
-                                     con.F3dContent::GetControlPointGrid(),
-                                     con.GetVoxelBasedMeasureGradientCuda(),
+    const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
+    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetTransformationGradient(),
+                                     con.F3dContent::GetVoxelBasedMeasureGradient(),
                                      con.GetTransformationGradientCuda(),
-                                     weight);
+                                     con.GetVoxelBasedMeasureGradientCuda(),
+                                     weight,
+                                     reorientation);
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 3c40f899..fcb8d885 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -15,37 +15,66 @@
 #include "_reg_tools_kernels.cu"
 
 /* *************************************************************** */
-void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
-                                      nifti_image *controlPointImage,
-                                      float4 *voxelNMIGradientArray_d,
-                                      float4 *nodeNMIGradientArray_d,
-                                      float weight) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
-    const int voxelNumber = CalcVoxelNumber(*targetImage);
-    const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
-    const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    float3 voxelNodeRatio_h = make_float3(controlPointImage->dx / targetImage->dx,
-                                          controlPointImage->dy / targetImage->dy,
-                                          controlPointImage->dz / targetImage->dz);
-    // Ensure that Z=0 if 2D images
-    if (gridSize.z == 1) voxelNodeRatio_h.z = 0;
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim, &targetImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &gridSize, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio, &voxelNodeRatio_h, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &weight, sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber * sizeof(float4)));
-
-    const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_voxelCentric2NodeCentric));
-    dim3 B1(blockSize->reg_voxelCentric2NodeCentric, 1, 1);
-    dim3 G1(Grid_reg_voxelCentric2NodeCentric, Grid_reg_voxelCentric2NodeCentric, 1);
-    reg_voxelCentric2NodeCentric_kernel<<<G1, B1>>>(nodeNMIGradientArray_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
+void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
+                                      const nifti_image *voxelImage,
+                                      float4 *nodeImageCuda,
+                                      float4 *voxelImageCuda,
+                                      float weight,
+                                      const mat44 *voxelToMillimetre) {
+    const bool is3d = nodeImage->nz > 1;
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
+    const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz);
+    const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz);
+
+    auto voxelImageTexture = cudaCommon_createTextureObject(voxelImageCuda, cudaResourceTypeLinear,
+                                                            voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+
+    // The transformation between the image and the grid
+    mat44 transformation;
+    // Voxel to millimetre in the grid image
+    if (nodeImage->sform_code > 0)
+        transformation = nodeImage->sto_xyz;
+    else transformation = nodeImage->qto_xyz;
+    // Affine transformation between the grid and the reference image
+    if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
+        mat44 temp = *(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+        temp = nifti_mat44_inverse(temp);
+        transformation = reg_mat44_mul(&temp, &transformation);
+    }
+    // Millimetre to voxel in the reference image
+    if (voxelImage->sform_code > 0)
+        transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation);
+    else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation);
+
+    // The information has to be reoriented
+    // Voxel to millimetre contains the orientation of the image that is used
+    // to compute the spatial gradient (floating image)
+    mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+    if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
+        mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+        temp = nifti_mat33_inverse(temp);
+        reorientation = nifti_mat33_mul(temp, reorientation);
+    }
+    // The information has to be weighted
+    float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
+    for (int i = 0; i < (is3d ? 3 : 2); ++i) {
+        if (nodeImage->sform_code > 0) {
+            ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) +
+                            reg_pow2(nodeImage->sto_xyz.m[i][1]) +
+                            reg_pow2(nodeImage->sto_xyz.m[i][2]));
+        }
+        ratio[i] /= voxelImage->pixdim[i + 1];
+        weight *= ratio[i];
+    }
 
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
+    const dim3 blockDims(blocks, 1, 1);
+    const dim3 gridDims(grids, grids, 1);
+    reg_voxelCentric2NodeCentric_kernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
+                                                                 voxelImageDims, is3d, weight, transformation, reorientation);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 0e5dca7c..cccd33ef 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -19,11 +19,12 @@
 
 /* *************************************************************** */
 extern "C++"
-void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage,
-                                      nifti_image *controlPointImage,
-                                      float4 *voxelNMIGradientArray_d,
-                                      float4 *nodeNMIGradientArray_d,
-                                      float weight);
+void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
+                                      const nifti_image *voxelImage,
+                                      float4 *nodeImageCuda,
+                                      float4 *voxelImageCuda,
+                                      float weight,
+                                      const mat44 *voxelToMillimetre);
 /* *************************************************************** */
 extern "C++"
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index df2897a6..994e0787 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -22,31 +22,73 @@ texture<float4, 1, cudaReadModeElementType> gradientImageTexture;
 texture<float4, 1, cudaReadModeElementType> matrixTexture;
 texture<float, 1, cudaReadModeElementType> convolutionKernelTexture;
 /* *************************************************************** */
-__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeNMIGradientArray_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_NodeNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
+__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
+    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
+    out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
+    out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0;
+}
+/* *************************************************************** */
+__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) {
+    out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3];
+    out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3];
+    out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
+}
+/* *************************************************************** */
+__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
+                                                    cudaTextureObject_t voxelImageTexture,
+                                                    const unsigned nodeNumber,
+                                                    const int3 nodeImageDims,
+                                                    const int3 voxelImageDims,
+                                                    const bool is3d,
+                                                    const float weight,
+                                                    const mat44 transformation,
+                                                    const mat33 reorientation) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nodeNumber) {
+        float nodeCoord[3], voxelCoord[3], reorientedValue[3];
         int tempIndex = tid;
-        const short z = (int)(tempIndex / (gridSize.x * gridSize.y));
-        tempIndex -= z * (gridSize.x) * (gridSize.y);
-        const short y = (int)(tempIndex / (gridSize.x));
-        const short x = tempIndex - y * (gridSize.x);
-
-        const float3 ratio = c_VoxelNodeRatio;
-        const short X = round((x - 1) * ratio.x);
-        const short Y = round((y - 1) * ratio.y);
-        const short Z = round((z - 1) * ratio.z);
+        nodeCoord[2] = tempIndex / (nodeImageDims.x * nodeImageDims.y);
+        tempIndex -= nodeCoord[2] * nodeImageDims.x * nodeImageDims.y;
+        nodeCoord[1] = tempIndex / nodeImageDims.x;
+        nodeCoord[0] = tempIndex - nodeCoord[1] * nodeImageDims.x;
+        reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d);
 
-        const int3 imageSize = c_TargetImageDim;
+        // Linear interpolation
+        float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
+        const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) };
+        basisX[1] = voxelCoord[0] - static_cast<float>(pre[0]);
+        basisX[0] = 1.f - basisX[1];
+        basisY[1] = voxelCoord[1] - static_cast<float>(pre[1]);
+        basisY[0] = 1.f - basisY[1];
+        if (is3d) {
+            basisZ[1] = voxelCoord[2] - static_cast<float>(pre[2]);
+            basisZ[0] = 1.f - basisZ[1];
+        }
+        for (short c = 0; c < 2; ++c) {
+            const int indexZ = pre[2] + c;
+            if (-1 < indexZ && indexZ < voxelImageDims.z) {
+                for (short b = 0; b < 2; ++b) {
+                    const int indexY = pre[1] + b;
+                    if (-1 < indexY && indexY < voxelImageDims.y) {
+                        for (short a = 0; a < 2; ++a) {
+                            const int indexX = pre[0] + a;
+                            if (-1 < indexX && indexX < voxelImageDims.x) {
+                                const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX;
+                                const float linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1);
+                                const float4 voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
+                                interpolatedValue[0] += linearWeight * voxelValue.x;
+                                interpolatedValue[1] += linearWeight * voxelValue.y;
+                                if (is3d)
+                                    interpolatedValue[2] += linearWeight * voxelValue.z;
+                            }
+                        }
+                    }
+                }
+            }
+        }
 
-        if (-1 < X && X < imageSize.x && -1 < Y && Y < imageSize.y && -1 < Z && Z < imageSize.z) {
-            int index = (Z * imageSize.y + Y) * imageSize.x + X;
-            float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
-            nodeNMIGradientArray_d[tid] = make_float4(c_Weight * gradientValue.x,
-                                                      c_Weight * gradientValue.y,
-                                                      c_Weight * gradientValue.z,
-                                                      0.0f);
-        } else nodeNMIGradientArray_d[tid] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue, is3d);
+        nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
     }
 }
 /* *************************************************************** */

From d59deb96edaa4c6ed5c9fb296f84013cfb5f4e4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 22 Jun 2023 14:01:30 +0100
Subject: [PATCH 142/314] Fix macOS compilation errors

---
 niftyreg_build_version.txt                      | 2 +-
 reg-test/reg_test_voxelCentricToNodeCentric.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4fc233b7..5484d829 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-261
+262
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 4cb02f1f..c7f1f232 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -160,7 +160,7 @@ class VoxelCentricToNodeCentricTest {
                     reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
                     // Linear interpolation
                     DataType basisX[2], basisY[2], basisZ[2];
-                    const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) };
+                    const int pre[3] = { (int)reg_floor(voxelCoord[0]), (int)reg_floor(voxelCoord[1]), (int)reg_floor(voxelCoord[2]) };
                     basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
                     basisX[0] = static_cast<DataType>(1) - basisX[1];
                     basisY[1] = voxelCoord[1] - static_cast<DataType>(pre[1]);

From e6855af5d45634d6c35b913c1c1f746b61208039 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 22 Jun 2023 14:02:06 +0100
Subject: [PATCH 143/314] Fix the precision bug of reg_lncc

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/cpu/_reg_lncc.cpp  | 17 ++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5484d829..175b6c5d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-262
+263
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 13134155..8c9545cf 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -124,8 +124,7 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \
-    private(voxel)
+    shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // G*(I^2) - (G*I)^2
@@ -303,7 +302,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
     refSdevPtr,warSdevPtr,correlaPtr) \
-    private(voxel,lncc_value) \
+    private(lncc_value) \
     reduction(+:lncc_value_sum) \
     reduction(+:activeVoxel_num)
 #endif
@@ -495,7 +494,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
     refSdevPtr,warSdevPtr,correlaPtr) \
-    private(voxel,refMeanValue,warMeanValue,refSdevValue, \
+    private(refMeanValue,warMeanValue,refSdevValue, \
     warSdevValue, correlaValue, temp1, temp2, temp3) \
     reduction(+:activeVoxel_num)
 #endif
@@ -560,17 +559,17 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
     warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \
     measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \
-    private(voxel, common)
+    private(common)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
             common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel];
             common *= adjusted_weight;
-            measureGradPtrX[voxel] -= warpGradPtrX[voxel] * static_cast<DataType>(common);
-            measureGradPtrY[voxel] -= warpGradPtrY[voxel] * static_cast<DataType>(common);
+            measureGradPtrX[voxel] -= static_cast<DataType>(warpGradPtrX[voxel] * common);
+            measureGradPtrY[voxel] -= static_cast<DataType>(warpGradPtrY[voxel] * common);
             if (warpGradPtrZ != nullptr)
-                measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * static_cast<DataType>(common);
+                measureGradPtrZ[voxel] -= static_cast<DataType>(warpGradPtrZ[voxel] * common);
         }
     }
     // Check for NaN
@@ -583,7 +582,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,measureGradPtrX) \
-    private(voxel, val)
+    private(val)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         val = measureGradPtrX[voxel];

From 6cfe8d7df79acdbaeb6dfc81757ac7023bc38205 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 23 Jun 2023 13:21:21 +0100
Subject: [PATCH 144/314] Optimise reg_voxelCentric2NodeCentric_kernel()

---
 niftyreg_build_version.txt                     |  2 +-
 reg-lib/cuda/_reg_tools_kernels.cu             | 18 +++++++++++++-----
 .../reg_test_voxelCentricToNodeCentric.cpp     |  6 +++---
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 175b6c5d..10b0c0db 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-263
+264
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 994e0787..112ec7b3 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -34,6 +34,12 @@ __device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in
     out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
 }
 /* *************************************************************** */
+__device__ __inline__ void div(const int num, const int denom, int& quot, int& rem) {
+    // This will be optimised by the compiler into a single div instruction
+    quot = num / denom;
+    rem = num % denom;
+}
+/* *************************************************************** */
 __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
                                                     cudaTextureObject_t voxelImageTexture,
                                                     const unsigned nodeNumber,
@@ -46,11 +52,13 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nodeNumber) {
         float nodeCoord[3], voxelCoord[3], reorientedValue[3];
-        int tempIndex = tid;
-        nodeCoord[2] = tempIndex / (nodeImageDims.x * nodeImageDims.y);
-        tempIndex -= nodeCoord[2] * nodeImageDims.x * nodeImageDims.y;
-        nodeCoord[1] = tempIndex / nodeImageDims.x;
-        nodeCoord[0] = tempIndex - nodeCoord[1] * nodeImageDims.x;
+        // Calculate the node coordinates
+        int quot, rem;
+        div(tid, nodeImageDims.x * nodeImageDims.y, quot, rem);
+        nodeCoord[2] = quot;
+        div(rem, nodeImageDims.x, quot, rem);
+        nodeCoord[1] = quot; nodeCoord[0] = rem;
+        // Transform into voxel coordinates
         reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d);
 
         // Linear interpolation
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index c7f1f232..027e5467 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -13,7 +13,7 @@
 class VoxelCentricToNodeCentricTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, std::array<mat44, 4>, float>;
+    using TestCase = std::tuple<unique_ptr<Platform>, unique_ptr<F3dContent>, TestData, std::array<mat44, 4>, float>;
 
     inline static vector<TestCase> testCases;
 
@@ -83,14 +83,14 @@ class VoxelCentricToNodeCentricTest {
         // Add platforms, composition, and bspline to the test data
         for (auto&& testData : testData) {
             for (auto&& platformType : PlatformTypes) {
-                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 // Make a copy of the test data
                 auto td = testData;
                 auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td;
                 // Add content
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                testCases.push_back({ platform, std::move(content), std::move(td), matrices, distr(gen) });
+                testCases.push_back({ std::move(platform), std::move(content), std::move(td), matrices, distr(gen) });
             }
         }
     }

From d37b15ca9c1eaec4ac47024f88fec1f98270c5f0 Mon Sep 17 00:00:00 2001
From: mmodat <marc.modat@gmail.com>
Date: Fri, 23 Jun 2023 15:43:53 +0100
Subject: [PATCH 145/314] Issue #92: Added LNCC unit tests

---
 CMakeLists.txt             |   2 +-
 niftyreg_build_version.txt |   2 +-
 reg-test/CMakeLists.txt    |   3 +
 reg-test/reg_test_common.h |   1 +
 reg-test/reg_test_lncc.cpp | 360 +++++++++++++++++++++++++++++++++++++
 5 files changed, 366 insertions(+), 2 deletions(-)
 create mode 100644 reg-test/reg_test_lncc.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1c7b9840..9e872c48 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -219,7 +219,7 @@ add_subdirectory(cmake)
 #-----------------------------------------------------------------------------
 if(BUILD_TESTING)
   enable_testing()
-  add_subdirectory(reg-test)
+  add_subdirectory(${CMAKE_SOURCE_DIR}/reg-test)
 endif(BUILD_TESTING)
 #-----------------------------------------------------------------------------
 # add a target to generate API documentation with Doxygen
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 175b6c5d..10b0c0db 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-263
+264
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 793b9448..a429150e 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -5,6 +5,8 @@ if(NOT Catch2_FOUND)
   message(SEND_ERROR "Catch2 is required to generate the unit test.
   The BUILD_TESTING flag is turned OFF")
   return()
+else(NOT Catch2_FOUND)
+  message(STATUS "Found Catch2")
 endif(NOT Catch2_FOUND)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
@@ -114,6 +116,7 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
+set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index a70a052e..9be31b61 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -4,6 +4,7 @@
 #include <array>
 #include <random>
 #include <catch2/catch_test_macros.hpp>
+#include "_reg_lncc.h"
 #include "_reg_localTrans.h"
 #include "Platform.h"
 #include "ResampleImageKernel.h"
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
new file mode 100644
index 00000000..1d84f86c
--- /dev/null
+++ b/reg-test/reg_test_lncc.cpp
@@ -0,0 +1,360 @@
+// OpenCL and CUDA are not supported for this test yet
+#undef _USE_OPENCL
+#undef _USE_CUDA
+
+#include "reg_test_common.h"
+#include "_reg_lncc.h"
+
+/*
+    This test file contains the following unit tests:
+    test function: LNCC computation and its voxel wise gradient
+    In 2D and 3D
+*/
+
+class LNCCTest {
+    /*
+    Class to compute the LNCC between two values without any convolution
+    Will take some time, don't judge me!!
+    */
+public:
+    LNCCTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference and floating 2D images
+        vector<NiftiImage::dim_t> dim{ 16, 16 };
+        reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create a reference 3D image
+        dim.push_back(16);
+        reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create corresponding identify control point grids
+        cpp2d = CreateControlPointGrid(reference2d);
+        cpp3d = CreateControlPointGrid(reference3d);
+
+        // Fill images with random values
+        float *ref2dPtr = static_cast<float *>(reference2d->data);
+        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x) {
+                *ref2dPtr++ = distr(gen);
+                *flo2dPtr++ = distr(gen);
+            }
+
+        // Fill images with random values
+        float *ref3dPtr = static_cast<float *>(reference3d->data);
+        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x) {
+                    *ref3dPtr++ = distr(gen);
+                    *flo3dPtr++ = distr(gen);
+                }
+
+        // Create the object to compute the expected values
+        vector<TestData> testData;
+        this->_ref = reference2d;
+        this->_flo = floating2d;
+        testData.emplace_back(TestData(
+            "LNCC 2D -1",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(floating2d)),
+            std::move(NiftiImage(cpp2d)),
+            -1.f,
+            this->GetLNCCNoConv(1)
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 2D -1 same image",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(cpp2d)),
+            -1.f,
+            1.f
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 2D -5",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(floating2d)),
+            std::move(NiftiImage(cpp2d)),
+            -5.f,
+            this->GetLNCCNoConv(5)
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 2D -5 same image",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(cpp2d)),
+            -5.f,
+            1.f
+        ));
+        reg_tools_multiplyValueToImage(reference2d, floating2d, -1.f);
+        testData.emplace_back(TestData(
+            "LNCC 2D -1 same image negated",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(floating2d)),
+            std::move(NiftiImage(cpp2d)),
+            -1.f,
+            1.f
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 2D -5 same image negated",
+            std::move(NiftiImage(reference2d)),
+            std::move(NiftiImage(floating2d)),
+            std::move(NiftiImage(cpp2d)),
+            -5.f,
+            1.f
+        ));
+        this->_ref = reference3d;
+        this->_flo = floating3d;
+        testData.emplace_back(TestData(
+            "LNCC 3D -1",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(floating3d)),
+            std::move(NiftiImage(cpp3d)),
+            -1.f,
+            this->GetLNCCNoConv(1)
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 3D -1 same image",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(cpp3d)),
+            -1.f,
+            1.f
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 3D -5",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(floating3d)),
+            std::move(NiftiImage(cpp3d)),
+            -5.f,
+            this->GetLNCCNoConv(5)
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 3D -5 same image",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(cpp3d)),
+            -5.f,
+            1.f
+        ));
+        reg_tools_multiplyValueToImage(reference3d, floating3d, -1.f);
+        testData.emplace_back(TestData(
+            "LNCC 3D -1 same image negated",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(floating3d)),
+            std::move(NiftiImage(cpp3d)),
+            -1.f,
+            1.f
+        ));
+        testData.emplace_back(TestData(
+            "LNCC 3D -5 same image negated",
+            std::move(NiftiImage(reference3d)),
+            std::move(NiftiImage(floating3d)),
+            std::move(NiftiImage(cpp3d)),
+            -5.f,
+            1.f
+        ));
+        for (auto&& data : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                // Make a copy of the test data
+                auto td = data;
+                auto&& [testName, reference, floating, cpp, sigma, result] = td;
+                // Create content creator
+                unique_ptr<F3dContentCreator> contentCreator{
+                    dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d))
+                };
+                // Create the content
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, floating, cpp) };
+                // Initialise the warped image using nearest neigh interpolation
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                compute->ResampleImage(0, 0);
+                content->SetWarped(floating.disown());
+                // Create the measure
+                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Use LNCC as a measure
+                unique_ptr<reg_lncc> measure_lncc{ dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)) };
+                measure_lncc->SetKernelStandardDeviation(0, sigma);
+                measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure->Initialise(*measure_lncc, *content);
+
+                testCases.push_back({ std::move(content), std::move(measure_lncc), platform, std::move(td) });
+            }
+        }
+    }
+
+    ~LNCCTest() {
+        if (this->_kernel != nullptr) delete[] this->_kernel;
+    }
+
+protected:
+    NiftiImage reference2d;
+    NiftiImage reference3d;
+    NiftiImage floating2d;
+    NiftiImage floating3d;
+    NiftiImage cpp2d;
+    NiftiImage cpp3d;
+    nifti_image *_ref = nullptr;
+    nifti_image *_flo = nullptr;
+    float *_kernel = nullptr;
+    float _kernelStdVoxel=5;
+    int _kernel_radius[3];
+    int _kernel_size[3];
+    using LocalStats = std::tuple<float, float>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, int, float>;
+    using TestCase = std::tuple<unique_ptr<Content>, unique_ptr<reg_lncc>, shared_ptr<Platform>, TestData>;
+
+    inline static vector<TestCase> testCases;
+
+    float GetLNCCNoConv(int kernelStd) {
+        double lncc_value = 0;
+        // Compute the kernel
+        this->_kernelStdVoxel = fabs(kernelStd);
+        this->InitialiseKernel();
+        float lncc = 0;
+        float voxelNumber = 0;
+        for (int z = 0; z < this->_ref->nz; ++z) {
+            for (int y = 0; y < this->_ref->ny; ++y) {
+                for (int x = 0; x < this->_ref->nx; ++x) {
+                    lncc += fabs(this->GetLocalCC(x, y, z, this->GetLocalMeans(x, y, z)));
+                    voxelNumber++;
+                }
+            }
+        }
+        return lncc / voxelNumber;
+    }
+
+    void InitialiseKernel() {
+        if (this->_kernel != nullptr) {
+            delete[] this->_kernel;
+        }
+        this->_kernel_radius[0] = 3 * this->_kernelStdVoxel;
+        this->_kernel_radius[1] = 3 * this->_kernelStdVoxel;
+        this->_kernel_radius[2] = 0;
+        if (this->_ref->ndim > 2)
+            this->_kernel_radius[2] = 3 * this->_kernelStdVoxel;
+        this->_kernel_size[0] = this->_kernel_radius[0] * 2 + 1;
+        this->_kernel_size[1] = this->_kernel_radius[1] * 2 + 1;
+        this->_kernel_size[2] = this->_kernel_radius[2] * 2 + 1;
+        this->_kernel = new float[this->_kernel_size[0] *
+            this->_kernel_size[1] *
+            this->_kernel_size[2]];
+        float *kernelPtr = this->_kernel;
+
+        for (int z = -this->_kernel_radius[2]; z <= this->_kernel_radius[2]; z++) {
+            float z_value = static_cast<float>(
+                exp(-(z * z) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
+                (this->_kernelStdVoxel * 2.506628274631)
+                );
+            for (int y = -this->_kernel_radius[1]; y <= this->_kernel_radius[1]; y++) {
+                float y_value = static_cast<float>(
+                    exp(-(y * y) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
+                    (this->_kernelStdVoxel * 2.506628274631)
+                    );
+                for (int x = -this->_kernel_radius[0]; x <= this->_kernel_radius[0]; x++) {
+                    float x_value = static_cast<float>(
+                        exp(-(x * x) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
+                        (this->_kernelStdVoxel * 2.506628274631)
+                        );
+                    *kernelPtr++ = x_value * y_value * z_value;
+                }
+            }
+        }
+    }
+
+    LocalStats GetLocalMeans(int x, int y, int z) {
+        double mean_ref = 0.;
+        double mean_flo = 0.;
+        double sum_kernel = 0.;
+        float *kernelPtr = this->_kernel;
+        float *refPtr = static_cast<float *>(this->_ref->data);
+        float *floPtr = static_cast<float *>(this->_flo->data);
+        for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) {
+            int zz = z + k;
+            if (0 <= zz && zz < this->_ref->nz) {
+                for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) {
+                    int yy = y + j;
+                    if (0 <= yy && yy < this->_ref->ny) {
+                        for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) {
+                            int xx = x + i;
+                            if (0 <= xx && xx < this->_ref->nx) {
+                                double kernelValue = *kernelPtr;
+                                int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx;
+                                mean_ref += kernelValue * refPtr[index];
+                                mean_flo += kernelValue * floPtr[index];
+                                sum_kernel += kernelValue;
+                            }
+                            kernelPtr++;
+                        }
+                    } else kernelPtr += this->_kernel_size[0];
+                }
+            } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1];
+        }
+        return LocalStats(mean_ref / sum_kernel, mean_flo / sum_kernel);
+    }
+
+    float GetLocalCC(int x, int y, int z, LocalStats means) {
+        float *kernelPtr = this->_kernel;
+        float *refPtr = static_cast<float *>(this->_ref->data);
+        float *floPtr = static_cast<float *>(this->_flo->data);
+        auto &&[mean_ref, mean_flo] = means;
+        double var_ref = 0.;
+        double var_flo = 0.;
+        double wdiff = 0.;
+        double sum_kernel = 0.;
+        for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) {
+            int zz = z + k;
+            if (0 <= zz && zz < this->_ref->nz) {
+                for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) {
+                    int yy = y + j;
+                    if (0 <= yy && yy < this->_ref->ny) {
+                        for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) {
+                            int xx = x + i;
+                            if (0 <= xx && xx < this->_ref->nx) {
+                                int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx;
+                                float refValue = refPtr[index];
+                                float floValue = floPtr[index];
+                                float kernelValue = *kernelPtr;
+                                var_ref += kernelValue * (refValue - mean_ref) * (refValue - mean_ref);
+                                var_flo += kernelValue * (floValue - mean_flo) * (floValue - mean_flo);
+                                wdiff += kernelValue * (refValue - mean_ref) * (floValue - mean_flo);
+                                sum_kernel += kernelValue;
+                            }
+                            kernelPtr++;
+                        }
+                    } else kernelPtr += this->_kernel_size[0];
+                }
+
+            } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1];
+        }
+        var_ref /= sum_kernel;
+        var_flo /= sum_kernel;
+        wdiff /= sum_kernel;
+        return wdiff / (sqrtf(var_ref) * sqrtf(var_flo));
+    }
+};
+
+TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : this->testCases) {
+        // Retrieve test information
+        auto&& [content, measure, platform, testData] = testCase;
+        auto&& [testName, reference, floating, cpp, sigma, value] = testData;
+
+        SECTION(testName) {
+            float lncc = measure->GetSimilarityMeasureValue();
+            std::cout << lncc << " " << value << std::endl;
+            REQUIRE(fabs(lncc - value) < EPS);
+            content.reset();
+        }
+    }
+}
\ No newline at end of file

From aa79bf2276bf3ede331aeec2ca61baab56cc91c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 23 Jun 2023 19:43:49 +0100
Subject: [PATCH 146/314] Refactorisations

---
 niftyreg_build_version.txt       |  2 +-
 reg-apps/reg_f3d.cpp             | 54 +++++++++++++++++---------------
 reg-lib/cuda/_reg_common_cuda.cu |  2 +-
 reg-test/CMakeLists.txt          |  2 +-
 4 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 10b0c0db..2b930fc4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-264
+265
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 4dda0b6d..30489b3c 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -24,6 +24,8 @@
 #   include <time.h>
 #endif
 
+using PrecisionType = float;
+
 void PetitUsage(char *exec) {
     char text[255];
     reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
@@ -79,7 +81,7 @@ void Usage(char *exec) {
     reg_print_info(exec, "");
     reg_print_info(exec, "*** Regularisation options:");
     reg_print_info(exec, "\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
-    reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]");
+    reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]");
     reg_print_info(exec, "\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
     reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
     reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
@@ -276,12 +278,12 @@ int main(int argc, char **argv) {
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
     // Check the type of registration object to create
-    unique_ptr<reg_f3d<float>> reg;
+    unique_ptr<reg_f3d<PrecisionType>> reg;
     PlatformType platformType(PlatformType::Cpu);
     unsigned gpuIdx = 999;
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) {
-            reg.reset(new reg_f3d2<float>(referenceImage->nt, floatingImage->nt));
+            reg.reset(new reg_f3d2<PrecisionType>(referenceImage->nt, floatingImage->nt));
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
             PlatformType value{ atoi(argv[++i]) };
             if (value < PlatformType::Cpu || value > PlatformType::Cuda) {
@@ -304,7 +306,7 @@ int main(int argc, char **argv) {
         }
     }
     if (!reg)
-        reg.reset(new reg_f3d<float>(referenceImage->nt, floatingImage->nt));
+        reg.reset(new reg_f3d<PrecisionType>(referenceImage->nt, floatingImage->nt));
     reg->SetReferenceImage(referenceImage);
     reg->SetFloatingImage(floatingImage);
     reg->SetPlatformType(platformType);
@@ -366,11 +368,11 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) {
             reg->SetMaximalIterationNumber(atoi(argv[++i]));
         } else if (strcmp(argv[i], "-sx") == 0 || strcmp(argv[i], "--sx") == 0) {
-            reg->SetSpacing(0, (float)atof(argv[++i]));
+            reg->SetSpacing(0, (PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-sy") == 0 || strcmp(argv[i], "--sy") == 0) {
-            reg->SetSpacing(1, (float)atof(argv[++i]));
+            reg->SetSpacing(1, (PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-sz") == 0 || strcmp(argv[i], "--sz") == 0) {
-            reg->SetSpacing(2, (float)atof(argv[++i]));
+            reg->SetSpacing(2, (PrecisionType)atof(argv[++i]));
         } else if ((strcmp(argv[i], "--nmi") == 0)) {
             int bin = 64;
             if (refBinNumber != 0)
@@ -407,15 +409,15 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) {
             reg->SetLevelToPerform(atoi(argv[++i]));
         } else if (strcmp(argv[i], "-be") == 0 || strcmp(argv[i], "--be") == 0) {
-            reg->SetBendingEnergyWeight(atof(argv[++i]));
+            reg->SetBendingEnergyWeight((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-le") == 0 || strcmp(argv[i], "--le") == 0) {
-            reg->SetLinearEnergyWeight(atof(argv[++i]));
+            reg->SetLinearEnergyWeight((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-jl") == 0 || strcmp(argv[i], "--jl") == 0) {
-            reg->SetJacobianLogWeight(atof(argv[++i]));
+            reg->SetJacobianLogWeight((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-noAppJL") == 0 || strcmp(argv[i], "--noAppJL") == 0) {
             reg->DoNotApproximateJacobianLog();
         } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) {
-            float weight = atof(argv[++i]);
+            float weight = (float)atof(argv[++i]);
             char *filename = argv[++i];
             std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(filename);
             size_t landmarkNumber = inputMatrixSize.first;
@@ -456,45 +458,45 @@ int main(int argc, char **argv) {
                 free(allLandmarks[l]);
             free(allLandmarks);
         } else if ((strcmp(argv[i], "-smooR") == 0) || (strcmp(argv[i], "-smooT") == 0) || strcmp(argv[i], "--smooR") == 0) {
-            reg->SetReferenceSmoothingSigma(atof(argv[++i]));
+            reg->SetReferenceSmoothingSigma((PrecisionType)atof(argv[++i]));
         } else if ((strcmp(argv[i], "-smooF") == 0) || (strcmp(argv[i], "-smooS") == 0) || strcmp(argv[i], "--smooF") == 0) {
-            reg->SetFloatingSmoothingSigma(atof(argv[++i]));
+            reg->SetFloatingSmoothingSigma((PrecisionType)atof(argv[++i]));
         } else if ((strcmp(argv[i], "-rLwTh") == 0) || (strcmp(argv[i], "-tLwTh") == 0)) {
             int tp = atoi(argv[++i]);
-            float val = atof(argv[++i]);
+            PrecisionType val = (PrecisionType)atof(argv[++i]);
             reg->SetReferenceThresholdLow(tp, val);
         } else if ((strcmp(argv[i], "-rUpTh") == 0) || strcmp(argv[i], "-tUpTh") == 0) {
             int tp = atoi(argv[++i]);
-            float val = atof(argv[++i]);
+            PrecisionType val = (PrecisionType)atof(argv[++i]);
             reg->SetReferenceThresholdUp(tp, val);
         } else if ((strcmp(argv[i], "-fLwTh") == 0) || (strcmp(argv[i], "-sLwTh") == 0)) {
             int tp = atoi(argv[++i]);
-            float val = atof(argv[++i]);
+            PrecisionType val = (PrecisionType)atof(argv[++i]);
             reg->SetFloatingThresholdLow(tp, val);
         } else if ((strcmp(argv[i], "-fUpTh") == 0) || (strcmp(argv[i], "-sUpTh") == 0)) {
             int tp = atoi(argv[++i]);
-            float val = atof(argv[++i]);
+            PrecisionType val = (PrecisionType)atof(argv[++i]);
             reg->SetFloatingThresholdUp(tp, val);
         } else if ((strcmp(argv[i], "--rLwTh") == 0)) {
-            float threshold = atof(argv[++i]);
+            PrecisionType threshold = (PrecisionType)atof(argv[++i]);
             for (int t = 0; t < referenceImage->nt; ++t)
                 reg->SetReferenceThresholdLow(t, threshold);
         } else if ((strcmp(argv[i], "--rUpTh") == 0)) {
-            float threshold = atof(argv[++i]);
+            PrecisionType threshold = (PrecisionType)atof(argv[++i]);
             for (int t = 0; t < referenceImage->nt; ++t)
                 reg->SetReferenceThresholdUp(t, threshold);
         } else if ((strcmp(argv[i], "--fLwTh") == 0)) {
-            float threshold = atof(argv[++i]);
+            PrecisionType threshold = (PrecisionType)atof(argv[++i]);
             for (int t = 0; t < floatingImage->nt; ++t)
                 reg->SetFloatingThresholdLow(t, threshold);
         } else if ((strcmp(argv[i], "--fUpTh") == 0)) {
-            float threshold = atof(argv[++i]);
+            PrecisionType threshold = (PrecisionType)atof(argv[++i]);
             for (int t = 0; t < floatingImage->nt; ++t)
                 reg->SetFloatingThresholdUp(t, threshold);
         } else if (strcmp(argv[i], "-smoothGrad") == 0) {
-            reg->SetGradientSmoothingSigma(atof(argv[++i]));
+            reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "--smoothGrad") == 0) {
-            reg->SetGradientSmoothingSigma(atof(argv[++i]));
+            reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-ssd") == 0) {
             int timepoint = atoi(argv[++i]);
             bool normalise = 1;
@@ -538,7 +540,7 @@ int main(int argc, char **argv) {
             reg->UseRobustRange();
         } else if (strcmp(argv[i], "-lncc") == 0) {
             int tp = atoi(argv[++i]);
-            float stdev = atof(argv[++i]);
+            float stdev = (float)atof(argv[++i]);
             reg->UseLNCC(tp, stdev);
         } else if (strcmp(argv[i], "--lncc") == 0) {
             float stdev = (float)atof(argv[++i]);
@@ -581,7 +583,7 @@ int main(int argc, char **argv) {
             NiftiImage refLocalWeightSim = reg_io_ReadImageFile(argv[++i]);
             reg->SetLocalWeightSim(std::move(refLocalWeightSim));
         } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) {
-            reg->SetWarpedPaddingValue(atof(argv[++i]));
+            reg->SetWarpedPaddingValue((float)atof(argv[++i]));
         } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) {
             reg->DoNotUsePyramidalApproach();
         } else if (strcmp(argv[i], "-noConj") == 0 || strcmp(argv[i], "--noConj") == 0) {
@@ -611,7 +613,7 @@ int main(int argc, char **argv) {
             }
             reg->SetFloatingMask(std::move(floatingMaskImage));
         } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) {
-            reg->SetInverseConsistencyWeight(atof(argv[++i]));
+            reg->SetInverseConsistencyWeight((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-nox") == 0) {
             reg->NoOptimisationAlongX();
         } else if (strcmp(argv[i], "-noy") == 0) {
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 09351400..26eefc07 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -654,7 +654,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
         resDesc.res.array.array = static_cast<cudaArray*>(const_cast<void*>(devPtr));
         break;
     default:
-        reg_print_fct_error("reg_createTextureObject");
+        reg_print_fct_error("cudaCommon_createTextureObject");
         reg_print_msg_error("Unsupported resource type");
         reg_exit();
     }
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index a429150e..09c72cd7 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -114,9 +114,9 @@ set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
+set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
-set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)

From a34958585142fe22ec2b0c83409810398944ee55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 28 Jun 2023 18:10:27 +0100
Subject: [PATCH 147/314] Fix a bug causing wrong calculation of the affine
 transformation matrix

---
 niftyreg_build_version.txt  |  2 +-
 reg-apps/reg_aladin.cpp     |  4 ++--
 reg-lib/_reg_aladin.cpp     |  4 ++--
 reg-lib/_reg_aladin_sym.cpp |  5 +----
 reg-lib/_reg_base.cpp       | 12 ++++++------
 reg-lib/cpu/_reg_tools.cpp  | 22 +++++++++++-----------
 6 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2b930fc4..c1d1ffbb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-265
+266
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 1ced15cb..c9c82ec5 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -163,9 +163,9 @@ int main(int argc, char **argv) {
     float floatingSigma = 0;
     float referenceSigma = 0;
 
-    float referenceLowerThr = std::numeric_limits<PrecisionType>::min();
+    float referenceLowerThr = std::numeric_limits<PrecisionType>::lowest();
     float referenceUpperThr = std::numeric_limits<PrecisionType>::max();
-    float floatingLowerThr = std::numeric_limits<PrecisionType>::min();
+    float floatingLowerThr = std::numeric_limits<PrecisionType>::lowest();
     float floatingUpperThr = std::numeric_limits<PrecisionType>::max();
     float paddingValue = std::numeric_limits<PrecisionType>::quiet_NaN();
 
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 70df10c2..0cc6aa68 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -32,11 +32,11 @@ reg_aladin<T>::reg_aladin() {
     this->floatingSigma = 0;
     this->referenceSigma = 0;
 
+    this->referenceLowerThreshold = std::numeric_limits<T>::lowest();
     this->referenceUpperThreshold = std::numeric_limits<T>::max();
-    this->referenceLowerThreshold = std::numeric_limits<T>::min();
 
+    this->floatingLowerThreshold = std::numeric_limits<T>::lowest();
     this->floatingUpperThreshold = std::numeric_limits<T>::max();
-    this->floatingLowerThreshold = std::numeric_limits<T>::min();
 
     this->warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
 
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index fe97cca0..f131fea6 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -11,9 +11,6 @@ reg_aladin_sym<T>::reg_aladin_sym()
 
     this->backwardBlockMatchingParams = nullptr;
 
-    this->floatingUpperThreshold = std::numeric_limits<T>::max();
-    this->floatingLowerThreshold = std::numeric_limits<T>::min();
-
 #ifndef NDEBUG
     reg_print_msg_debug("reg_aladin_sym constructor called");
 #endif
@@ -63,7 +60,7 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
             }
         }
     }
-    if (this->floatingLowerThreshold != std::numeric_limits<T>::min()) {
+    if (this->floatingLowerThreshold != std::numeric_limits<T>::lowest()) {
         for (unsigned l = 0; l < this->levelsToPerform; ++l) {
             T *refPtr = static_cast<T *>(this->floatingPyramid[l]->data);
             int *mskPtr = this->floatingMaskPyramid[l].get();
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 8e208d96..54eb63ab 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -34,14 +34,14 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     referenceSmoothingSigma = 0;
     floatingSmoothingSigma = 0;
 
+    referenceThresholdLow.reset(new T[referenceTimePoint]);
+    std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits<T>::lowest());
     referenceThresholdUp.reset(new T[referenceTimePoint]);
     std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits<T>::max());
-    referenceThresholdLow.reset(new T[referenceTimePoint]);
-    std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits<T>::min());
+    floatingThresholdLow.reset(new T[floatingTimePoint]);
+    std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits<T>::lowest());
     floatingThresholdUp.reset(new T[floatingTimePoint]);
     std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits<T>::max());
-    floatingThresholdLow.reset(new T[floatingTimePoint]);
-    std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits<T>::min());
 
     robustRange = false;
     warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
@@ -504,7 +504,7 @@ void reg_base<T>::Initialise() {
         T *refDataPtr = static_cast<T *>(tmpReference->data);
         reg_heapSort(refDataPtr, tmpReference->nvox);
         // Update the reference threshold values if no value has been setup by the user
-        if (referenceThresholdLow[0] == std::numeric_limits<T>::min())
+        if (referenceThresholdLow[0] == std::numeric_limits<T>::lowest())
             referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)];
         if (referenceThresholdUp[0] == std::numeric_limits<T>::max())
             referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)];
@@ -516,7 +516,7 @@ void reg_base<T>::Initialise() {
         T *floDataPtr = static_cast<T *>(tmpFloating->data);
         reg_heapSort(floDataPtr, tmpFloating->nvox);
         // Update the floating threshold values if no value has been setup by the user
-        if (floatingThresholdLow[0] == std::numeric_limits<T>::min())
+        if (floatingThresholdLow[0] == std::numeric_limits<T>::lowest())
             floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)];
         if (floatingThresholdUp[0] == std::numeric_limits<T>::max())
             floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)];
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 015be4d4..9b4dc6f9 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -104,35 +104,35 @@ void reg_intensityRescale_core(nifti_image *image,
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
         currentMin = (DataType)std::numeric_limits<unsigned char>::max();
-        currentMax = 0;
+        currentMax = (DataType)std::numeric_limits<unsigned char>::lowest();
         break;
     case NIFTI_TYPE_INT8:
         currentMin = (DataType)std::numeric_limits<char>::max();
-        currentMax = (DataType)std::numeric_limits<char>::min();
+        currentMax = (DataType)std::numeric_limits<char>::lowest();
         break;
     case NIFTI_TYPE_UINT16:
         currentMin = (DataType)std::numeric_limits<unsigned short>::max();
-        currentMax = (DataType)std::numeric_limits<unsigned short>::min();
+        currentMax = (DataType)std::numeric_limits<unsigned short>::lowest();
         break;
     case NIFTI_TYPE_INT16:
         currentMin = (DataType)std::numeric_limits<short>::max();
-        currentMax = (DataType)std::numeric_limits<short>::min();
+        currentMax = (DataType)std::numeric_limits<short>::lowest();
         break;
     case NIFTI_TYPE_UINT32:
         currentMin = (DataType)std::numeric_limits<unsigned>::max();
-        currentMax = (DataType)std::numeric_limits<unsigned>::min();
+        currentMax = (DataType)std::numeric_limits<unsigned>::lowest();
         break;
     case NIFTI_TYPE_INT32:
         currentMin = (DataType)std::numeric_limits<int>::max();
-        currentMax = (DataType)std::numeric_limits<int>::min();
+        currentMax = (DataType)std::numeric_limits<int>::lowest();
         break;
     case NIFTI_TYPE_FLOAT32:
         currentMin = (DataType)std::numeric_limits<float>::max();
-        currentMax = (DataType)std::numeric_limits<float>::min();
+        currentMax = (DataType)std::numeric_limits<float>::lowest();
         break;
     case NIFTI_TYPE_FLOAT64:
         currentMin = (DataType)std::numeric_limits<double>::max();
-        currentMax = (DataType)std::numeric_limits<double>::min();
+        currentMax = (DataType)std::numeric_limits<double>::lowest();
         break;
     }
 
@@ -284,7 +284,7 @@ template<class T, class DataType>
 void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
     DataType *imagePtr = static_cast<DataType*>(image->data);
     T currentMin = std::numeric_limits<T>::max();
-    T currentMax = std::numeric_limits<T>::min();
+    T currentMax = std::numeric_limits<T>::lowest();
 
     if (image->scl_slope == 0)image->scl_slope = 1.0;
 
@@ -1338,7 +1338,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                             }
                             currIterator = tmp_lab.begin();
                             maxindex = 0;
-                            maxval = std::numeric_limits<float>::min();
+                            maxval = std::numeric_limits<float>::lowest();
                             while (currIterator != tmp_lab.end()) {
                                 if (currIterator->second > maxval) {
                                     maxindex = currIterator->first;
@@ -2008,7 +2008,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool
         reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists");
 
     const DataType *imgPtr = static_cast<DataType*>(image->data);
-    DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::min();
+    DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::lowest();
     const size_t voxelNumber = CalcVoxelNumber(*image);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 

From 5b7d8feaa780ee914c8bc15fbcfc2f8a3c4adae8 Mon Sep 17 00:00:00 2001
From: mmodat <marc.modat@gmail.com>
Date: Fri, 7 Jul 2023 14:49:25 +0100
Subject: [PATCH 148/314] Issue #92: fix affine initialisation in f3d. matrix
 went out of scope during refactoring.

---
 niftyreg_build_version.txt     |  2 +-
 reg-apps/reg_aladin.cpp        | 30 +++++++++++++++---------------
 reg-apps/reg_f3d.cpp           |  6 ++++--
 reg-lib/cpu/_reg_optimiser.cpp |  4 ++--
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c1d1ffbb..81e5b7ce 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-266
+267
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index c9c82ec5..26413b68 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -156,7 +156,7 @@ int main(int argc, char **argv) {
     int rigidFlag = 1;
     int blockStepSize = 1;
     int blockPercentage = 50;
-    float inlierLts = 50.0f;
+    int inlierLts = 50;
     int alignCentre = 1;
     int alignCentreOfMass = 0;
     int interpolation = 1;
@@ -255,16 +255,16 @@ int main(int argc, char **argv) {
             alignCentre = 0;
             alignCentreOfMass = 2;
         } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) {
-            float value = atof(argv[++i]);
-            if (value < 0.f || value>100.f) {
-                reg_print_msg_error("The variance argument is expected to be between 0 and 100");
+            int value = atoi(argv[++i]);
+            if (value < 1 || value>100) {
+                reg_print_msg_error("The variance argument is expected to be an integer between 1 and 100");
                 return EXIT_FAILURE;
             }
             blockPercentage = value;
         } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) {
-            float value = atof(argv[++i]);
-            if (value < 0.f || value>100.f) {
-                reg_print_msg_error("The inlier argument is expected to be between 0 and 100");
+            int value = atoi(argv[++i]);
+            if (value < 1 || value>100) {
+                reg_print_msg_error("The inlier argument is expected to be an integer between 1 and 100");
                 return EXIT_FAILURE;
             }
             inlierLts = value;
@@ -273,17 +273,17 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) {
             interpolation = atoi(argv[++i]);
         } else if (strcmp(argv[i], "-refLowThr") == 0 || strcmp(argv[i], "--refLowThr") == 0) {
-            referenceLowerThr = atof(argv[++i]);
+            referenceLowerThr = std::stof(argv[++i]);
         } else if (strcmp(argv[i], "-refUpThr") == 0 || strcmp(argv[i], "--refUpThr") == 0) {
-            referenceUpperThr = atof(argv[++i]);
+            referenceUpperThr = std::stof(argv[++i]);
         } else if (strcmp(argv[i], "-floLowThr") == 0 || strcmp(argv[i], "--floLowThr") == 0) {
-            floatingLowerThr = atof(argv[++i]);
+            floatingLowerThr = std::stof(argv[++i]);
         } else if (strcmp(argv[i], "-floUpThr") == 0 || strcmp(argv[i], "--floUpThr") == 0) {
-            floatingUpperThr = atof(argv[++i]);
+            floatingUpperThr = std::stof(argv[++i]);
         }
 
         else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) {
-            paddingValue = atof(argv[++i]);
+            paddingValue = std::stof(argv[++i]);
         } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) {
             iso = true;
         } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) {
@@ -495,9 +495,9 @@ int main(int argc, char **argv) {
 #endif
         time_t end;
         time(&end);
-        int minutes = (int)floorf((end - start) / 60.0f);
-        int seconds = (int)(end - start - 60 * minutes);
-        sprintf(text, "Registration performed in %i min %i sec", minutes, seconds);
+        float minutes = floorf((end - start) / 60.0f);
+        float seconds = (end - start - 60 * minutes);
+        sprintf(text, "Registration performed in %i min %i sec", (int)minutes, (int)seconds);
         reg_print_info((argv[0]), text);
         reg_print_info((argv[0]), "Have a good day !");
 #ifdef NDEBUG
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 30489b3c..460f26ec 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -319,6 +319,9 @@ int main(int argc, char **argv) {
     int refBinNumber = 0;
     int floBinNumber = 0;
 
+    // mat44 to store the affine matrix if needed
+    mat44 affineMatrix;
+
     /* read the input parameter */
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 ||
@@ -341,7 +344,6 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
             // Read the affine matrix
-            mat44 affineMatrix;
             reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName);
             // Send the transformation to the registration object
             reg->SetAffineTransformation(&affineMatrix);
@@ -748,7 +750,7 @@ int main(int argc, char **argv) {
         time_t end;
         time(&end);
         int minutes = (int)floorf((end - start) / 60.0f);
-        int seconds = (int)(end - start - 60 * minutes);
+        int seconds = ((int)(end - start) - 60 * minutes);
         text = stringFormat("Registration performed in %i min %i sec", minutes, seconds);
         reg_print_info((argv[0]), text.c_str());
         reg_print_info((argv[0]), "Have a good day !");
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 30b8a069..3acbb846 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -69,7 +69,7 @@ void reg_optimiser<T>::Initialise(size_t nvox,
     this->maxIterationNumber = maxIt;
     this->currentIterationNumber = startIt;
     this->currentDof = cppData;
-    if (this->bestDof) free(this->bestDof);
+    if (this->bestDof != nullptr) free(this->bestDof);
     this->bestDof = (T*)malloc(this->dofNumber * sizeof(T));
     memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
     if (gradData)
@@ -80,7 +80,7 @@ void reg_optimiser<T>::Initialise(size_t nvox,
     if (cppDataBw) {
         this->currentDofBw = cppDataBw;
         this->isBackwards = true;
-        if (this->bestDofBw) free(this->bestDofBw);
+        if (this->bestDofBw != nullptr) free(this->bestDofBw);
         this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T));
         memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T));
     }

From a1ed246bcf82fc40019ea2c84af40d0f8f68ff35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 11 Jul 2023 13:42:13 +0100
Subject: [PATCH 149/314] Copy affine transformation into reg_base instead of
 linking

---
 niftyreg_build_version.txt |  2 +-
 reg-apps/reg_f3d.cpp       |  6 ++----
 reg-lib/_reg_base.cpp      |  5 ++---
 reg-lib/_reg_base.h        | 10 +++++-----
 reg-lib/_reg_f3d.cpp       |  4 ++--
 reg-lib/_reg_f3d.h         |  2 +-
 reg-lib/_reg_f3d2.cpp      |  2 +-
 7 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 81e5b7ce..864d5650 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-267
+268
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 460f26ec..7eb5b265 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -319,9 +319,6 @@ int main(int argc, char **argv) {
     int refBinNumber = 0;
     int floBinNumber = 0;
 
-    // mat44 to store the affine matrix if needed
-    mat44 affineMatrix;
-
     /* read the input parameter */
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 ||
@@ -344,9 +341,10 @@ int main(int argc, char **argv) {
                 return EXIT_FAILURE;
             }
             // Read the affine matrix
+            mat44 affineMatrix;
             reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName);
             // Send the transformation to the registration object
-            reg->SetAffineTransformation(&affineMatrix);
+            reg->SetAffineTransformation(affineMatrix);
         } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) {
             NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]);
             if (!inputCCPImage) {
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 54eb63ab..308978df 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -30,7 +30,6 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     executableName = (char*)"NiftyReg BASE";
     referenceTimePoint = refTimePoint;
     floatingTimePoint = floTimePoint;
-    affineTransformation = nullptr;  // pointer to external
     referenceSmoothingSigma = 0;
     floatingSmoothingSigma = 0;
 
@@ -98,8 +97,8 @@ void reg_base<T>::SetReferenceMask(NiftiImage maskImageIn) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetAffineTransformation(mat44 *affineTransformationIn) {
-    affineTransformation = affineTransformationIn;
+void reg_base<T>::SetAffineTransformation(const mat44& affineTransformationIn) {
+    affineTransformation.reset(new mat44(affineTransformationIn));
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
 #endif
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index f16184d1..e912977b 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -66,10 +66,10 @@ class reg_base: public InterfaceOptimiser {
     char *executableName;
     int referenceTimePoint;
     int floatingTimePoint;
-    NiftiImage inputReference; // pointer to external
-    NiftiImage inputFloating; // pointer to external
-    NiftiImage maskImage; // pointer to external
-    mat44 *affineTransformation; // pointer to external
+    NiftiImage inputReference;
+    NiftiImage inputFloating;
+    NiftiImage maskImage;
+    unique_ptr<mat44> affineTransformation;
     T referenceSmoothingSigma;
     T floatingSmoothingSigma;
     unique_ptr<T[]> referenceThresholdUp;
@@ -181,7 +181,7 @@ class reg_base: public InterfaceOptimiser {
     virtual void SetReferenceImage(NiftiImage);
     virtual void SetFloatingImage(NiftiImage);
     virtual void SetReferenceMask(NiftiImage);
-    virtual void SetAffineTransformation(mat44*);
+    virtual void SetAffineTransformation(const mat44&);
     virtual void SetReferenceSmoothingSigma(T);
     virtual void SetFloatingSmoothingSigma(T);
     virtual void SetGradientSmoothingSigma(T);
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index ac569c97..da1089f5 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -94,7 +94,7 @@ void reg_f3d<T>::SetSpacing(unsigned i, T s) {
 template<class T>
 void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
     unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
-    this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)));
+    this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation.get(), sizeof(T)));
     this->compute.reset(this->platform->CreateCompute(*this->con));
 }
 /* *************************************************************** */
@@ -200,7 +200,7 @@ void reg_f3d<T>::Initialise() {
         // The control point position image is initialised with the affine transformation
         if (!this->affineTransformation) {
             reg_getDeformationFromDisplacement(controlPointGrid);
-        } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid);
+        } else reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid);
     } else {
         // The control point grid image is initialised with the provided grid
         controlPointGrid = inputControlPointGrid;
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 9125ba15..882020b4 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -18,7 +18,7 @@
 template <class T>
 class reg_f3d: public reg_base<T> {
 protected:
-    NiftiImage inputControlPointGrid; // pointer to external
+    NiftiImage inputControlPointGrid;
     NiftiImage controlPointGrid;
     T bendingEnergyWeight;
     T linearEnergyWeight;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 4ee3b9cf..f56d6a48 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -694,7 +694,7 @@ void reg_f3d2<T>::Initialise() {
                                                 controlPointGridBw,
                                                 this->referencePyramid[0],
                                                 this->floatingPyramid[0],
-                                                this->affineTransformation,
+                                                this->affineTransformation.get(),
                                                 gridSpacing);
     } else {
         // The control point grid image is initialised with the provided grid

From 19f45f6407937bd9a5a48af857b7823a04fcad5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 11 Jul 2023 13:44:39 +0100
Subject: [PATCH 150/314] Make affine transformation memory managed in
 reg_aladin*

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/_reg_ReadWriteMatrix.cpp |  2 +-
 reg-io/_reg_ReadWriteMatrix.h   |  2 +-
 reg-lib/_reg_aladin.cpp         | 54 ++++++++++++-----------------
 reg-lib/_reg_aladin.h           |  7 ++--
 reg-lib/_reg_aladin_sym.cpp     | 60 ++++++++++++++-------------------
 reg-lib/_reg_aladin_sym.h       |  3 +-
 7 files changed, 54 insertions(+), 76 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 864d5650..c48f9e04 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-268
+269
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 6aef5626..7b420d2c 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -126,7 +126,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
     affineFile.close();
 }
 /* *************************************************************** */
-void reg_tool_WriteAffineFile(mat44 *mat, const char *fileName) {
+void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) {
     FILE *affineFile;
     affineFile = fopen(fileName, "w");
     for (int i = 0; i < 4; i++)
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index f30d19dd..ce314ba5 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -65,7 +65,7 @@ mat44* reg_tool_ReadMat44File(char *fileName);
  * @param filename Name of the text file to save on the disk
  */
 extern "C++"
-void reg_tool_WriteAffineFile(mat44 *mat,
+void reg_tool_WriteAffineFile(const mat44 *mat,
                               const char *fileName);
 
 /**
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 0cc6aa68..01c8b13b 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -5,7 +5,7 @@ template<class T>
 reg_aladin<T>::reg_aladin() {
     this->executableName = (char*)"Aladin";
 
-    this->transformationMatrix = new mat44;
+    this->affineTransformation.reset(new mat44);
     this->inputTransformName = nullptr;
 
     this->blockMatchingParams = nullptr;
@@ -53,16 +53,6 @@ reg_aladin<T>::reg_aladin() {
 }
 /* *************************************************************** */
 template<class T>
-reg_aladin<T>::~reg_aladin() {
-    if (this->transformationMatrix)
-        delete this->transformationMatrix;
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin destructor called");
-#endif
-}
-/* *************************************************************** */
-template<class T>
 bool reg_aladin<T>::TestMatrixConvergence(mat44 *mat) {
     bool convergence = true;
     if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS)
@@ -248,13 +238,13 @@ void reg_aladin<T>::InitialiseRegistration() {
             reg_print_msg_error(text.c_str());
             reg_exit();
         }
-        reg_tool_ReadAffineFile(this->transformationMatrix, this->inputTransformName);
+        reg_tool_ReadAffineFile(this->affineTransformation.get(), this->inputTransformName);
     } else { // No input affine transformation
         for (int i = 0; i < 4; i++) {
             for (int j = 0; j < 4; j++) {
-                this->transformationMatrix->m[i][j] = 0;
+                this->affineTransformation->m[i][j] = 0;
             }
-            this->transformationMatrix->m[i][i] = 1;
+            this->affineTransformation->m[i][i] = 1;
         }
         if (this->alignCentre && this->alignCentreMass == 0) {
             const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz);
@@ -274,9 +264,9 @@ void reg_aladin<T>::InitialiseRegistration() {
             float referenceRealPosition[3];
             reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition);
             //Set translation to the transformation matrix
-            this->transformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0];
-            this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
-            this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2];
+            this->affineTransformation->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0];
+            this->affineTransformation->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
+            this->affineTransformation->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2];
         } else if (this->alignCentreMass == 2) {
             float referenceCentre[3] = { 0, 0, 0 };
             float referenceCount = 0;
@@ -325,10 +315,10 @@ void reg_aladin<T>::InitialiseRegistration() {
             float floCOM[3];
             if (this->inputFloating->sform_code > 0)
                 reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM);
-            reg_mat44_eye(this->transformationMatrix);
-            this->transformationMatrix->m[0][3] = floCOM[0] - refCOM[0];
-            this->transformationMatrix->m[1][3] = floCOM[1] - refCOM[1];
-            this->transformationMatrix->m[2][3] = floCOM[2] - refCOM[2];
+            reg_mat44_eye(this->affineTransformation.get());
+            this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0];
+            this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1];
+            this->affineTransformation->m[2][3] = floCOM[2] - refCOM[2];
         }
     }
 }
@@ -378,7 +368,7 @@ void reg_aladin<T>::UpdateTransformationMatrix(int type) {
     this->optimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
 
 #ifndef NDEBUG
-    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward matrix");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix");
 #endif
 }
 /* *************************************************************** */
@@ -408,7 +398,7 @@ void reg_aladin<T>::ResolveMatrix(unsigned iterations, const unsigned optimizati
 #ifndef NDEBUG
         char text[255];
         sprintf(text, "%s - level: %i/%i - iteration %i/%i",
-                optimizationFlag ? (char *)"Affine" : (char *)"Rigid",
+                optimizationFlag ? (char*)"Affine" : (char*)"Rigid",
                 this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations);
         reg_print_msg_debug(text);
 #endif
@@ -426,7 +416,7 @@ void reg_aladin<T>::Run() {
     //Main loop over the levels:
     for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) {
         this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel],
-                                this->referenceMaskPyramid[currentLevel].get(), this->transformationMatrix, sizeof(T),
+                                this->referenceMaskPyramid[currentLevel].get(), this->affineTransformation.get(), sizeof(T),
                                 this->blockPercentage, this->inlierLts, this->blockStepSize);
         this->CreateKernels();
 
@@ -444,13 +434,13 @@ void reg_aladin<T>::Run() {
 
 #ifndef NDEBUG
         if (this->con->GetReference()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
+            reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
         else
-            reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
+            reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
         if (this->con->GetFloating()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
+            reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
         else
-            reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
+            reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
 #endif
 
         /* ****************** */
@@ -491,7 +481,7 @@ void reg_aladin<T>::Run() {
 template<class T>
 NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
     // The initial images are used
-    if (!this->inputReference || !this->inputFloating || !this->transformationMatrix) {
+    if (!this->inputReference || !this->inputFloating || !this->affineTransformation) {
         reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
         reg_print_msg_error("The reference, floating images and the transformation have to be defined");
         reg_exit();
@@ -502,7 +492,7 @@ NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
     reg_aladin<T>::InitAladinContent(this->inputReference,
                                      this->inputFloating,
                                      mask.get(),
-                                     this->transformationMatrix,
+                                     this->affineTransformation.get(),
                                      sizeof(T));
     reg_aladin<T>::CreateKernels();
 
@@ -548,12 +538,12 @@ void reg_aladin<T>::DebugPrintLevelInfoStart() {
     sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
             this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
     reg_print_info(this->executableName, text);
-    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Initial transformation matrix:");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Initial transformation matrix:");
 }
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DebugPrintLevelInfoEnd() {
-    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Final transformation matrix:");
 }
 /* *************************************************************** */
 template class reg_aladin<float>;
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 3921d3d0..b1515195 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -71,7 +71,7 @@ class reg_aladin {
     vector<unique_ptr<int[]>> referenceMaskPyramid;
 
     char *inputTransformName;
-    mat44 *transformationMatrix;
+    unique_ptr<mat44> affineTransformation;
 
     bool verbose;
 
@@ -137,7 +137,6 @@ class reg_aladin {
     unique_ptr<AladinContent> con;
 
     reg_aladin();
-    virtual ~reg_aladin();
     GetStringMacro(ExecutableName, executableName);
 
     //No allocating of the images here...
@@ -166,8 +165,8 @@ class reg_aladin {
         return this->inputTransformName;
     }
 
-    mat44* GetTransformationMatrix() {
-        return this->transformationMatrix;
+    const mat44* GetTransformationMatrix() {
+        return this->affineTransformation.get();
     }
     NiftiImage GetFinalWarpedImage();
 
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index f131fea6..2cafb89e 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -7,7 +7,7 @@ reg_aladin_sym<T>::reg_aladin_sym()
     :reg_aladin<T>::reg_aladin() {
     this->executableName = (char*)"reg_aladin_sym";
 
-    this->backwardTransformationMatrix = new mat44;
+    this->affineTransformationBw.reset(new mat44);
 
     this->backwardBlockMatchingParams = nullptr;
 
@@ -17,16 +17,6 @@ reg_aladin_sym<T>::reg_aladin_sym()
 }
 /* *************************************************************** */
 template <class T>
-reg_aladin_sym<T>::~reg_aladin_sym() {
-    if (this->backwardTransformationMatrix)
-        delete this->backwardTransformationMatrix;
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin_sym destructor called");
-#endif
-}
-/* *************************************************************** */
-template <class T>
 void reg_aladin_sym<T>::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) {
     this->inputFloatingMask = inputFloatingMaskIn;
 }
@@ -125,12 +115,12 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         float floCOG[3];
         if (this->inputFloating->sform_code > 0)
             reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG);
-        reg_mat44_eye(this->transformationMatrix);
-        this->transformationMatrix->m[0][3] = floCOG[0] - refCOG[0];
-        this->transformationMatrix->m[1][3] = floCOG[1] - refCOG[1];
-        this->transformationMatrix->m[2][3] = floCOG[2] - refCOG[2];
+        reg_mat44_eye(this->affineTransformation.get());
+        this->affineTransformation->m[0][3] = floCOG[0] - refCOG[0];
+        this->affineTransformation->m[1][3] = floCOG[1] - refCOG[1];
+        this->affineTransformation->m[2][3] = floCOG[2] - refCOG[2];
     }
-    *this->backwardTransformationMatrix = nifti_mat44_inverse(*this->transformationMatrix);
+    *this->affineTransformationBw = nifti_mat44_inverse(*this->affineTransformation);
 }
 /* *************************************************************** */
 template <class T>
@@ -154,26 +144,26 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     this->bOptimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
 
 #ifndef NDEBUG
-    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
-    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
+    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
 #endif
     // Forward and backward matrix are inverted
-    mat44 fInverted = nifti_mat44_inverse(*this->transformationMatrix);
-    mat44 bInverted = nifti_mat44_inverse(*this->backwardTransformationMatrix);
+    mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation);
+    mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw);
 
     // We average the forward and inverted backward matrix
-    *this->transformationMatrix = reg_mat44_avg2(this->transformationMatrix, &bInverted);
+    *this->affineTransformation = reg_mat44_avg2(this->affineTransformation.get(), &bInverted);
     // We average the inverted forward and backward matrix
-    *this->backwardTransformationMatrix = reg_mat44_avg2(&fInverted, this->backwardTransformationMatrix);
+    *this->affineTransformationBw = reg_mat44_avg2(&fInverted, this->affineTransformationBw.get());
     for (int i = 0; i < 3; ++i) {
-        this->transformationMatrix->m[3][i] = 0.f;
-        this->backwardTransformationMatrix->m[3][i] = 0.f;
+        this->affineTransformation->m[3][i] = 0.f;
+        this->affineTransformationBw->m[3][i] = 0.f;
     }
-    this->transformationMatrix->m[3][3] = 1.f;
-    this->backwardTransformationMatrix->m[3][3] = 1.f;
+    this->affineTransformation->m[3][3] = 1.f;
+    this->affineTransformationBw->m[3][3] = 1.f;
 #ifndef NDEBUG
-    reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix");
-    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward transformation matrix");
+    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] updated backward transformation matrix");
 #endif
 }
 /* *************************************************************** */
@@ -188,7 +178,7 @@ void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
                                           unsigned blockStepSize) {
     reg_aladin<T>::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize);
     unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(this->platform->CreateContentCreator(ContentType::Aladin)) };
-    this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize));
+    this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->affineTransformationBw.get(), bytes, blockPercentage, inlierLts, blockStepSize));
     this->backwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
@@ -253,17 +243,17 @@ void reg_aladin_sym<T>::DebugPrintLevelInfoStart() {
     sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0],
             this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]);
     reg_print_info(this->executableName, text);
-    reg_mat44_disp(this->transformationMatrix,
-                   (char *)"[reg_aladin_sym] Initial forward transformation matrix:");
-    reg_mat44_disp(this->backwardTransformationMatrix,
-                   (char *)"[reg_aladin_sym] Initial backward transformation matrix:");
+    reg_mat44_disp(this->affineTransformation.get(),
+                   (char*)"[reg_aladin_sym] Initial forward transformation matrix:");
+    reg_mat44_disp(this->affineTransformationBw.get(),
+                   (char*)"[reg_aladin_sym] Initial backward transformation matrix:");
     reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoEnd() {
-    reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:");
-    reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:");
+    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin_sym] Final forward transformation matrix:");
+    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[reg_aladin_sym] Final backward transformation matrix:");
 }
 /* *************************************************************** */
 template class reg_aladin_sym<float>;
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 405b4038..58e71378 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -39,7 +39,7 @@ class reg_aladin_sym: public reg_aladin<T> {
 
     _reg_blockMatchingParam *backwardBlockMatchingParams;
 
-    mat44 *backwardTransformationMatrix;
+    unique_ptr<mat44> affineTransformationBw;
 
     virtual void DeallocateCurrentInputImage();
     virtual void GetBackwardDeformationField();
@@ -52,6 +52,5 @@ class reg_aladin_sym: public reg_aladin<T> {
 
 public:
     reg_aladin_sym();
-    virtual ~reg_aladin_sym();
     virtual void SetInputFloatingMask(NiftiImage);
 };

From 570d7a99ac3729f0967b3671954550c1881014c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 11 Jul 2023 14:15:30 +0100
Subject: [PATCH 151/314] Fix CUDA 12 incompatibilities

---
 niftyreg_build_version.txt                    |    2 +-
 reg-lib/cuda/BlockSize.hpp                    |    3 -
 reg-lib/cuda/CudaBlockMatchingKernel.cpp      |   14 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      |   71 +
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   54 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |    7 +-
 .../cuda/_reg_globalTransformation_kernels.cu |   53 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  982 +++++------
 reg-lib/cuda/_reg_localTransformation_gpu.h   |  101 +-
 .../cuda/_reg_localTransformation_kernels.cu  | 1432 ++++++++---------
 reg-lib/cuda/_reg_measure_gpu.h               |   10 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  128 +-
 reg-lib/cuda/_reg_nmi_gpu.h                   |   25 +-
 reg-lib/cuda/_reg_nmi_kernels.cu              |  807 +++++-----
 reg-lib/cuda/_reg_resampling_gpu.cu           |  103 +-
 reg-lib/cuda/_reg_resampling_gpu.h            |   30 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  161 +-
 reg-lib/cuda/_reg_ssd_gpu.h                   |   25 +-
 reg-lib/cuda/_reg_ssd_kernels.cu              |  205 ++-
 reg-lib/cuda/_reg_tools_gpu.cu                |  330 ++--
 reg-lib/cuda/_reg_tools_gpu.h                 |   36 +-
 reg-lib/cuda/_reg_tools_kernels.cu            |  237 ++-
 reg-lib/cuda/blockMatchingKernel.cu           | 1021 ++++++------
 reg-lib/cuda/blockMatchingKernel.h            |   32 +-
 24 files changed, 2704 insertions(+), 3165 deletions(-)
 create mode 100644 reg-lib/cuda/_reg_common_cuda_kernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c48f9e04..67f3f23e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-269
+270
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index fe380b26..68880b58 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -52,7 +52,6 @@ struct BlockSize {
     unsigned reg_spline_approxCorrectFolding3D;
     unsigned reg_spline_correctFolding3D;
     unsigned reg_getDeformationFromDisplacement;
-    unsigned reg_getDisplacementFromDeformation;
     unsigned reg_defField_compose2D;
     unsigned reg_defField_compose3D;
     unsigned reg_defField_getJacobianMatrix;
@@ -118,7 +117,6 @@ struct BlockSize100: public BlockSize {
         reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem
         reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem
         reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem
-        reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem
         reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
         reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
@@ -188,7 +186,6 @@ struct BlockSize300: public BlockSize {
         reg_spline_approxCorrectFolding3D = 768; // 34 reg
         reg_spline_correctFolding3D = 768; // 34 reg
         reg_getDeformationFromDisplacement = 1024; // 18 reg
-        reg_getDisplacementFromDeformation = 1024; // 18 reg
         reg_defField_compose2D = 1024; // 23 reg
         reg_defField_compose3D = 1024; // 24 reg
         reg_defField_getJacobianMatrix = 768; // 34 reg
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
index 4cc7fe18..fe8b36a4 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp
@@ -23,12 +23,12 @@ CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatching
 void CudaBlockMatchingKernel::Calculate() {
     block_matching_method_gpu(reference,
                               params,
-                              &referenceImageArray_d,
-                              &warpedImageArray_d,
-                              &referencePosition_d,
-                              &warpedPosition_d,
-                              &totalBlock_d,
-                              &mask_d,
-                              &referenceMat_d);
+                              referenceImageArray_d,
+                              warpedImageArray_d,
+                              referencePosition_d,
+                              warpedPosition_d,
+                              totalBlock_d,
+                              mask_d,
+                              referenceMat_d);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
new file mode 100644
index 00000000..8de94c04
--- /dev/null
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ */
+
+#pragma once
+
+/* *************************************************************** */
+__device__ __inline__ float2 operator*(float a, float2 b) {
+    return { a * b.x, a * b.y };
+}
+__device__ __inline__ float3 operator*(float a, float3 b) {
+    return { a * b.x, a * b.y, a * b.z };
+}
+__device__ __inline__ float3 operator*(float3 a, float3 b) {
+    return { a.x * b.x, a.y * b.y, a.z * b.z };
+}
+__device__ __inline__ float4 operator*(float4 a, float4 b) {
+    return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
+}
+__device__ __inline__ float4 operator*(float a, float4 b) {
+    return { a * b.x, a * b.y, a * b.z, 0.0f };
+}
+/* *************************************************************** */
+__device__ __inline__ float2 operator/(float2 a, float2 b) {
+    return { a.x / b.x, a.y / b.y };
+}
+__device__ __inline__ float3 operator/(float3 a, float b) {
+    return { a.x / b, a.y / b, a.z / b };
+}
+__device__ __inline__ float3 operator/(float3 a, float3 b) {
+    return { a.x / b.x, a.y / b.y, a.z / b.z };
+}
+/* *************************************************************** */
+__device__ __inline__ float2 operator+(float2 a, float2 b) {
+    return { a.x + b.x, a.y + b.y };
+}
+__device__ __inline__ float4 operator+(float4 a, float4 b) {
+    return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f };
+}
+__device__ __inline__ float3 operator+(float3 a, float3 b) {
+    return { a.x + b.x, a.y + b.y, a.z + b.z };
+}
+/* *************************************************************** */
+__device__ __inline__ float3 operator-(float3 a, float3 b) {
+    return { a.x - b.x, a.y - b.y, a.z - b.z };
+}
+__device__ __inline__ float4 operator-(float4 a, float4 b) {
+    return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f };
+}
+/* *************************************************************** */
+__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
+    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
+    out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
+    out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0;
+}
+/* *************************************************************** */
+__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) {
+    out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3];
+    out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3];
+    out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
+}
+/* *************************************************************** */
+__device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) {
+    // This will be optimised by the compiler into a single div instruction
+    quot = num / denom;
+    rem = num % denom;
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 71cd8df7..fcea21ea 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -14,48 +14,24 @@
 #include "_reg_globalTransformation_kernels.cu"
 
 /* *************************************************************** */
-void reg_affine_positionField_gpu(mat44 *affineMatrix,
-                                  nifti_image *targetImage,
-                                  float4 *array_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &(targetImage->nvox), sizeof(int)));
+void reg_affine_positionField_gpu(const mat44 *affineMatrix,
+                                  const nifti_image *targetImage,
+                                  float4 *deformationFieldCuda) {
+    const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
+    const size_t voxelNumber = targetImage->nvox;
 
     // If the target sform is defined, it is used. The qform is used otherwise
-    mat44 *targetMatrix;
-    if (targetImage->sform_code > 0)
-        targetMatrix = &(targetImage->sto_xyz);
-    else targetMatrix = &(targetImage->qto_xyz);
+    const mat44 *targetMatrix = targetImage->sform_code > 0 ? &targetImage->sto_xyz : &targetImage->qto_xyz;
 
-    // We here performed Affine * TargetMat * voxelIndex
+    // Affine * TargetMat * voxelIndex is performed
     // Affine * TargetMat is constant
-    mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
-
-    // The transformation matrix is bound to a texture
-    float4 *transformationMatrix_h;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3 * sizeof(float4)));
-    float4 *transformationMatrix_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3 * sizeof(float4)));
-    for (int i = 0; i < 3; i++) {
-        transformationMatrix_h[i].x = transformationMatrix.m[i][0];
-        transformationMatrix_h[i].y = transformationMatrix.m[i][1];
-        transformationMatrix_h[i].z = transformationMatrix.m[i][2];
-        transformationMatrix_h[i].w = transformationMatrix.m[i][3];
-    }
-    NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
-    cudaBindTexture(0, txAffineTransformation, transformationMatrix_d, 3 * sizeof(float4));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h));
-
-    const unsigned Grid_reg_affine_deformationField = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blockSize->reg_affine_deformationField));
-    dim3 B1(blockSize->reg_affine_deformationField, 1, 1);
-    dim3 G1(Grid_reg_affine_deformationField, Grid_reg_affine_deformationField, 1);
-
-    reg_affine_deformationField_kernel<<<G1, B1>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(txAffineTransformation));
-    NR_CUDA_SAFE_CALL(cudaFree(transformationMatrix_d));
+    const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
+
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_affine_deformationField;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_affine_deformationField_kernel<<<gridDims, blockDims>>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 754f10e4..33efd396 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -13,9 +13,8 @@
 #pragma once
 
 #include "_reg_common_cuda.h"
-// #include "_reg_globalTransformation.h"
 
 extern "C++"
-void reg_affine_positionField_gpu(mat44 *,
-                                  nifti_image *,
-                                  float4 *);
+void reg_affine_positionField_gpu(const mat44 *affineMatrix,
+                                  const nifti_image *targetImage,
+                                  float4 *deformationFieldCuda);
diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
index fcf00af6..bbb8b1ce 100755
--- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
@@ -10,45 +10,30 @@
  *
  */
 
-#include "_reg_common_cuda.h"
+#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-/* *************************************************************** */
-__device__ __constant__ int3 c_ImageSize;
-__device__ __constant__ int c_VoxelNumber;
-/* *************************************************************** */
-texture<float4, 1, cudaReadModeElementType> txAffineTransformation;
-/* *************************************************************** */
-/* *************************************************************** */
-__global__
-void reg_affine_deformationField_kernel(float4 *PositionFieldArray)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_VoxelNumber){
-
-        int3 imageSize = c_ImageSize;
-        short3 voxelIndex;
-        int tempIndex=tid;
-        voxelIndex.z=(int)(tempIndex/((imageSize.x)*(imageSize.y)));
-        tempIndex -= voxelIndex.z*(imageSize.x)*(imageSize.y);
-        voxelIndex.y=(int)(tempIndex/(imageSize.x));
-        voxelIndex.x = tempIndex - voxelIndex.y*(imageSize.x);
+__global__ void reg_affine_deformationField_kernel(float4 *deformationField,
+                                                   const mat44 affineMatrix,
+                                                   const int3 imageSize,
+                                                   const unsigned voxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, imageSize.x, quot, rem);
+        const int y = quot, x = rem;
 
         /* The transformation is applied */
-        float4 position;
-        float4 matrix = tex1Dfetch(txAffineTransformation,0);
-        position.x = 	matrix.x*voxelIndex.x + matrix.y*voxelIndex.y  +
-                        matrix.z*voxelIndex.z  +  matrix.w;
-        matrix = tex1Dfetch(txAffineTransformation,1);
-        position.y = 	matrix.x*voxelIndex.x + matrix.y*voxelIndex.y  +
-                        matrix.z*voxelIndex.z  +  matrix.w;
-        matrix = tex1Dfetch(txAffineTransformation,2);
-        position.z = 	matrix.x*voxelIndex.x + matrix.y*voxelIndex.y  +
-                        matrix.z*voxelIndex.z  +  matrix.w;
-        position.w=0.0f;
+        const float4 position = {
+            affineMatrix.m[0][0] * x + affineMatrix.m[0][1] * y + affineMatrix.m[0][2] * z + affineMatrix.m[0][3],
+            affineMatrix.m[1][0] * x + affineMatrix.m[1][1] * y + affineMatrix.m[1][2] * z + affineMatrix.m[1][3],
+            affineMatrix.m[2][0] * x + affineMatrix.m[2][1] * y + affineMatrix.m[2][2] * z + affineMatrix.m[2][3],
+            0.f
+        };
         /* the deformation field (real coordinates) is stored */
-        PositionFieldArray[tid] = position;
+        deformationField[tid] = position;
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 2b95f454..e1a251e7 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -14,729 +14,587 @@
 #include "_reg_localTransformation_kernels.cu"
 
 /* *************************************************************** */
-void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
-                                        nifti_image *reference,
-                                        float4 *controlPointImageArray_d,
-                                        float4 *positionFieldImageArray_d,
-                                        int *mask_d,
-                                        int activeVoxelNumber,
-                                        bool bspline) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int voxelNumber = CalcVoxelNumber(*reference);
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-    const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz);
+void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
+                                        const nifti_image *referenceImage,
+                                        const float4 *controlPointImageCuda,
+                                        float4 *deformationFieldCuda,
+                                        const int *maskCuda,
+                                        const size_t& activeVoxelNumber,
+                                        const bool& bspline) {
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
+    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const int useBSpline = static_cast<int>(bspline);
-
-    const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / reference->dx,
-                                                        controlPointImage->dy / reference->dy,
-                                                        controlPointImage->dz / reference->dz);
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline, &useBSpline, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
-
-    if (reference->nz > 1) {
-        const unsigned Grid_reg_spline_getDeformationField3D =
-            (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField3D)));
-        dim3 G1(Grid_reg_spline_getDeformationField3D, Grid_reg_spline_getDeformationField3D, 1);
-        dim3 B1(blockSize->reg_spline_getDeformationField3D, 1, 1);
+    const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
+                                                        controlPointImage->dy / referenceImage->dy,
+                                                        controlPointImage->dz / referenceImage->dz);
+
+    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear,
+                                                      activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
+
+    if (referenceImage->nz > 1) {
+        const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
-        reg_spline_getDeformationField3D<<<G1, B1, blockSize->reg_spline_getDeformationField3D * 8 * sizeof(float)>>>(positionFieldImageArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        reg_spline_getDeformationField3D<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
+                                                                                              *controlPointTexture,
+                                                                                              *maskTexture,
+                                                                                              referenceImageDim,
+                                                                                              controlPointImageDim,
+                                                                                              controlPointVoxelSpacing,
+                                                                                              (unsigned)activeVoxelNumber,
+                                                                                              bspline);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_spline_getDeformationField2D =
-            (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField2D)));
-        dim3 G1(Grid_reg_spline_getDeformationField2D, Grid_reg_spline_getDeformationField2D, 1);
-        dim3 B1(blockSize->reg_spline_getDeformationField2D, 1, 1);
+        const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
         // 4 floats of shared memory are allocated per thread
-        reg_spline_getDeformationField2D<<<G1, B1, blockSize->reg_spline_getDeformationField2D * 4 * sizeof(float)>>>(positionFieldImageArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        reg_spline_getDeformationField2D<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
+                                                                                              *controlPointTexture,
+                                                                                              *maskTexture,
+                                                                                              referenceImageDim,
+                                                                                              controlPointImageDim,
+                                                                                              controlPointVoxelSpacing,
+                                                                                              (unsigned)activeVoxelNumber,
+                                                                                              bspline);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
-float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) {
+float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const int controlPointGridMem = controlPointNumber * sizeof(float4);
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+    const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
+    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                              controlPointGridSize, cudaChannelFormatKindFloat, 4);
 
     // First compute all the second derivatives
-    float4 *secondDerivativeValues_d;
+    float4 *secondDerivativeValuesCuda;
+    size_t secondDerivativeValuesSize;
     if (controlPointImage->nz > 1) {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointGridMem));
-        const unsigned Grid_bspline_getApproxSecondDerivatives =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
-        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
-        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1);
-        reg_spline_getApproxSecondDerivatives3D<<<G1, B1>>>(secondDerivativeValues_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        secondDerivativeValuesSize = 6 * controlPointGridSize;
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
+        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
+                                                                         controlPointImageDim, (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointGridMem));
-        const unsigned Grid_bspline_getApproxSecondDerivatives =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
-        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
-        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1);
-        reg_spline_getApproxSecondDerivatives2D<<<G1, B1>>>(secondDerivativeValues_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        secondDerivativeValuesSize = 3 * controlPointGridSize;
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
+        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
+                                                                         controlPointImageDim, (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 
     // Compute the bending energy from the second derivatives
-    float *penaltyTerm_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber * sizeof(float)));
-
+    float *penaltyTermCuda;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float)));
+    auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
+                                                                   secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
-        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointGridMem));
-        const unsigned Grid_reg_spline_ApproxBendingEnergy =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy3D)));
-        dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1);
-        dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D, 1, 1);
-        reg_spline_getApproxBendingEnergy3D_kernel<<<G2, B2>>>(penaltyTerm_d);
-        NR_CUDA_CHECK_KERNEL(G2, B2);
+        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxBendingEnergy3D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
+                                                                            (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointGridMem));
-        const unsigned Grid_reg_spline_ApproxBendingEnergy =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy2D)));
-        dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1);
-        dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D, 1, 1);
-        reg_spline_getApproxBendingEnergy2D_kernel<<<G2, B2>>>(penaltyTerm_d);
-        NR_CUDA_CHECK_KERNEL(G2, B2);
+        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxBendingEnergy2D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
+                                                                            (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
+    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda));
 
     // Compute the mean bending energy value
-    double penaltyValue = reg_sumReduction_gpu(penaltyTerm_d, controlPointNumber);
-    NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d));
+    double penaltyValue = reg_sumReduction_gpu(penaltyTermCuda, controlPointNumber);
+    NR_CUDA_SAFE_CALL(cudaFree(penaltyTermCuda));
 
     return (float)(penaltyValue / (double)controlPointImage->nvox);
 }
 /* *************************************************************** */
-void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-                                                float4 *controlPointImageArray_d,
-                                                float4 *nodeGradientArray_d,
+void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage,
+                                                const float4 *controlPointImageCuda,
+                                                float4 *transGradientCuda,
                                                 float bendingEnergyWeight) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const int controlPointGridMem = controlPointNumber * sizeof(float4);
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+    const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
+    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                              controlPointGridSize, cudaChannelFormatKindFloat, 4);
 
     // First compute all the second derivatives
-    float4 *secondDerivativeValues_d;
+    float4 *secondDerivativeValuesCuda;
+    size_t secondDerivativeValuesSize;
     if (controlPointImage->nz > 1) {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4)));
-        const unsigned Grid_bspline_getApproxSecondDerivatives =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D)));
-        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
-        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1);
-        reg_spline_getApproxSecondDerivatives3D<<<G1, B1>>>(secondDerivativeValues_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4);
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
+        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
+                                                                         controlPointImageDim, (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4)));
-        const unsigned Grid_bspline_getApproxSecondDerivatives =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D)));
-        dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1);
-        dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1);
-        reg_spline_getApproxSecondDerivatives2D<<<G1, B1>>>(secondDerivativeValues_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4);
+        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
+        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
+                                                                         controlPointImageDim, (unsigned)controlPointNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 
     // Compute the gradient
     bendingEnergyWeight *= 1.f / (float)controlPointNumber;
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &bendingEnergyWeight, sizeof(float)));
+    auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
+                                                                   secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
-        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4)));
-        const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D)));
-        dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1);
-        dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D, 1, 1);
-        reg_spline_getApproxBendingEnergyGradient3D_kernel<<<G2, B2>>>(nodeGradientArray_d);
-        NR_CUDA_CHECK_KERNEL(G2, B2);
+        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxBendingEnergyGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
+                                                                                    controlPointImageDim, (unsigned)controlPointNumber,
+                                                                                    bendingEnergyWeight);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4)));
-        const unsigned Grid_reg_spline_getApproxBendingEnergyGradient =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D)));
-        dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1);
-        dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D, 1, 1);
-        reg_spline_getApproxBendingEnergyGradient2D_kernel<<<G2, B2>>>(nodeGradientArray_d);
-        NR_CUDA_CHECK_KERNEL(G2, B2);
+        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxBendingEnergyGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
+                                                                                    controlPointImageDim, (unsigned)controlPointNumber,
+                                                                                    bendingEnergyWeight);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d));
+    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda));
 }
 /* *************************************************************** */
-void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage,
-                                            float4 *controlPointImageArray_d,
-                                            float *jacobianMatrices_d,
-                                            float *jacobianDet_d) {
+void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage,
+                                            const float4 *controlPointImageCuda,
+                                            float *jacobianMatricesCuda,
+                                            float *jacobianDetCuda) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-    mat33 reorientation;
-    if (controlPointImage->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz);
-    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz);
-    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
-
-    // Bind some variables
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    const int controlPointGridMem = controlPointNumber * sizeof(float4);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem));
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
 
     // The Jacobian matrix is computed for every control point
     if (controlPointImage->nz > 1) {
-        const unsigned Grid_reg_spline_getApproxJacobianValues3D =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues3D)));
-        dim3 G1(Grid_reg_spline_getApproxJacobianValues3D, Grid_reg_spline_getApproxJacobianValues3D, 1);
-        dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D, 1, 1);
-        reg_spline_getApproxJacobianValues3D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxJacobianValues3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                                             controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_spline_getApproxJacobianValues2D =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues2D)));
-        dim3 G1(Grid_reg_spline_getApproxJacobianValues2D, Grid_reg_spline_getApproxJacobianValues2D, 1);
-        dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D, 1, 1);
-        reg_spline_getApproxJacobianValues2D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getApproxJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                                             controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
-void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage,
-                                      nifti_image *referenceImage,
-                                      float4 *controlPointImageArray_d,
-                                      float *jacobianMatrices_d,
-                                      float *jacobianDet_d) {
+void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
+                                      const nifti_image *referenceImage,
+                                      const float4 *controlPointImageCuda,
+                                      float *jacobianMatricesCuda,
+                                      float *jacobianDetCuda) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-    mat33 reorientation;
-    if (controlPointImage->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz);
-    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz);
-    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
-
-    // Bind some variables
-    const int voxelNumber = CalcVoxelNumber(*referenceImage);
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
-                                                        controlPointImage->dy / referenceImage->dy,
-                                                        controlPointImage->dz / referenceImage->dz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4)));
+    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+
+    // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
 
     // The Jacobian matrix is computed for every voxel
     if (controlPointImage->nz > 1) {
-        const unsigned Grid_reg_spline_getJacobianValues3D =
-            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues3D)));
-        dim3 G1(Grid_reg_spline_getJacobianValues3D, Grid_reg_spline_getJacobianValues3D, 1);
-        dim3 B1(blockSize->reg_spline_getJacobianValues3D, 1, 1);
+        const unsigned blocks = blockSize->reg_spline_getJacobianValues3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
-        reg_spline_getJacobianValues3D_kernel<<<G1, B1, blockSize->reg_spline_getJacobianValues3D * 8 * sizeof(float)>>>(jacobianMatrices_d, jacobianDet_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned sharedMemSize = blocks * 8 * sizeof(float);
+        reg_spline_getJacobianValues3D_kernel<<<gridDims, blockDims, sharedMemSize>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                                                      controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                                                      (unsigned)voxelNumber, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_spline_getJacobianValues2D =
-            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues2D)));
-        dim3 G1(Grid_reg_spline_getJacobianValues2D, Grid_reg_spline_getJacobianValues2D, 1);
-        dim3 B1(blockSize->reg_spline_getJacobianValues2D, 1, 1);
-        reg_spline_getJacobianValues2D_kernel<<<G1, B1>>>(jacobianMatrices_d, jacobianDet_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_spline_getJacobianValues2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_getJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                                       controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                                       (unsigned)voxelNumber, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture));
 }
 /* *************************************************************** */
-double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
-                                             nifti_image *controlPointImage,
-                                             float4 *controlPointImageArray_d,
-                                             bool approx) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
+double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
+                                             const nifti_image *controlPointImage,
+                                             const float4 *controlPointImageCuda,
+                                             const bool& approx) {
     // The Jacobian matrices and determinants are computed
-    float *jacobianMatrices_d;
-    float *jacobianDet_d;
-    int jacNumber;
-    double jacSum;
+    float *jacobianMatricesCuda, *jacobianDetCuda;
+    size_t jacNumber; double jacSum;
     if (approx) {
-        jacNumber = CalcVoxelNumber(*controlPointImage);
+        jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
         jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2);
-        if (controlPointImage->nz > 1) {
+        if (controlPointImage->nz > 1)
             jacSum *= controlPointImage->nz - 2;
-            // Allocate array for 3x3 matrices
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
-        } else {
-            // Allocate array for 2x2 matrices
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
-        }
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
-        jacNumber = CalcVoxelNumber(*referenceImage);
-        jacSum = jacNumber;
-        if (controlPointImage->nz > 1) {
-            // Allocate array for 3x3 matrices
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
-        } else {
-            // Allocate array for 2x2 matrices
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
-        }
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+        jacSum = static_cast<double>(jacNumber);
+        // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
 
     // The Jacobian determinant are squared and logged (might not be english but will do)
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int)));
-    const unsigned Grid_reg_spline_logSquaredValues =
-        (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues)));
-    dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1);
-    dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1);
-    reg_spline_logSquaredValues_kernel<<<G1, B1>>>(jacobianDet_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_logSquaredValues;
+    const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDetCuda, (unsigned)jacNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Perform the reduction
-    double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d, jacNumber);
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
+    const double penaltyTermValue = reg_sumReduction_gpu(jacobianDetCuda, jacNumber);
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
     return penaltyTermValue / jacSum;
 }
 /* *************************************************************** */
-void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
-                                                   nifti_image *controlPointImage,
-                                                   float4 *controlPointImageArray_d,
-                                                   float4 *nodeGradientArray_d,
-                                                   float jacobianWeight,
-                                                   bool approx) {
+void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage,
+                                                   const nifti_image *controlPointImage,
+                                                   const float4 *controlPointImageCuda,
+                                                   float4 *transGradientCuda,
+                                                   const float& jacobianWeight,
+                                                   const bool& approx) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
-    float *jacobianMatrices_d;
-    float *jacobianDet_d;
-    int jacNumber;
+    float *jacobianMatricesCuda, *jacobianDetCuda;
+    size_t jacNumber;
     if (approx) {
-        jacNumber = CalcVoxelNumber(*controlPointImage);
-        if (controlPointImage->nz > 1)
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
-        else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
+        // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
-        jacNumber = CalcVoxelNumber(*referenceImage);
-        if (controlPointImage->nz > 1)
-            NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
-        else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+        // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
 
     // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
-    mat33 reorientation;
-    if (controlPointImage->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk);
-    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float)));
-    if (controlPointImage->nz > 1)
-        NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float)))
-    else NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 4 * jacNumber * sizeof(float)));
-
-    // Bind some variables
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
+
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
-    float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
-                                referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
-                                referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3, &weight, sizeof(float3)));
+    const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
+                                      referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
+                                      referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz));
+    auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float),
+                                                                     cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear,
+                                                                  (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float),
+                                                                  cudaChannelFormatKindFloat, 1);
     if (approx) {
         if (controlPointImage->nz > 1) {
-            const unsigned Grid_reg_spline_computeApproxJacGradient3D =
-                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient3D)));
-            dim3 G1(Grid_reg_spline_computeApproxJacGradient3D, Grid_reg_spline_computeApproxJacGradient3D, 1);
-            dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D, 1, 1);
-            reg_spline_computeApproxJacGradient3D_kernel<<<G1, B1>>>(nodeGradientArray_d);
-            NR_CUDA_CHECK_KERNEL(G1, B1);
+            const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D;
+            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            reg_spline_computeApproxJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                                  *jacobianMatricesTexture, controlPointImageDim,
+                                                                                  (unsigned)controlPointNumber, reorientation, weight);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
-            const unsigned Grid_reg_spline_computeApproxJacGradient2D =
-                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient2D)));
-            dim3 G1(Grid_reg_spline_computeApproxJacGradient2D, Grid_reg_spline_computeApproxJacGradient2D, 1);
-            dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D, 1, 1);
-            reg_spline_computeApproxJacGradient2D_kernel<<<G1, B1>>>(nodeGradientArray_d);
-            NR_CUDA_CHECK_KERNEL(G1, B1);
+            const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D;
+            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            reg_spline_computeApproxJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                                  *jacobianMatricesTexture, controlPointImageDim,
+                                                                                  (unsigned)controlPointNumber, reorientation, weight);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     } else {
-        const int voxelNumber = CalcVoxelNumber(*referenceImage);
         const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
         const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
         if (controlPointImage->nz > 1) {
-            const unsigned Grid_reg_spline_computeJacGradient3D =
-                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient3D)));
-            dim3 G1(Grid_reg_spline_computeJacGradient3D, Grid_reg_spline_computeJacGradient3D, 1);
-            dim3 B1(blockSize->reg_spline_computeJacGradient3D, 1, 1);
-            reg_spline_computeJacGradient3D_kernel<<<G1, B1>>>(nodeGradientArray_d);
-            NR_CUDA_CHECK_KERNEL(G1, B1);
+            const unsigned blocks = blockSize->reg_spline_computeJacGradient3D;
+            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            reg_spline_computeJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                            *jacobianMatricesTexture, controlPointImageDim,
+                                                                            controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                                            referenceImageDim, reorientation, weight);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
-            const unsigned Grid_reg_spline_computeJacGradient2D =
-                (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient2D)));
-            dim3 G1(Grid_reg_spline_computeJacGradient2D, Grid_reg_spline_computeJacGradient2D, 1);
-            dim3 B1(blockSize->reg_spline_computeJacGradient2D, 1, 1);
-            reg_spline_computeJacGradient2D_kernel<<<G1, B1>>>(nodeGradientArray_d);
-            NR_CUDA_CHECK_KERNEL(G1, B1);
+            const unsigned blocks = blockSize->reg_spline_computeJacGradient2D;
+            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            reg_spline_computeJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                            *jacobianMatricesTexture, controlPointImageDim,
+                                                                            controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                                            referenceImageDim, reorientation, weight);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
 }
 /* *************************************************************** */
-double reg_spline_correctFolding_gpu(nifti_image *referenceImage,
-                                     nifti_image *controlPointImage,
-                                     float4 *controlPointImageArray_d,
-                                     bool approx) {
+double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
+                                     const nifti_image *controlPointImage,
+                                     float4 *controlPointImageCuda,
+                                     const bool& approx) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
-    float *jacobianMatrices_d;
-    float *jacobianDet_d;
-    int jacNumber;
-    double jacSum;
+    float *jacobianMatricesCuda, *jacobianDetCuda;
+    size_t jacobianDetSize, jacobianMatricesSize;
+    size_t jacNumber; double jacSum;
     if (approx) {
-        jacNumber = CalcVoxelNumber(*controlPointImage);
+        jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
         jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2);
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        jacobianDetSize = jacNumber * sizeof(float);
+        jacobianMatricesSize = 9 * jacobianDetSize;
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
+        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
-        jacSum = jacNumber = CalcVoxelNumber(*referenceImage);
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float)));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d);
+        jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+        jacSum = static_cast<double>(jacNumber);
+        jacobianDetSize = jacNumber * sizeof(float);
+        jacobianMatricesSize = 9 * jacobianDetSize;
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
+        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
 
     // Check if the Jacobian determinant average
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int)));
-    float *jacobianDet2_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d, jacNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d, jacobianDet_d, jacNumber * sizeof(float), cudaMemcpyDeviceToDevice));
-    const unsigned Grid_reg_spline_logSquaredValues =
-        (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues)));
-    dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1);
-    dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1);
-    reg_spline_logSquaredValues_kernel<<<G1, B1>>>(jacobianDet2_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    float *jacobianDet_h;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h, jacNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h, jacobianDet2_d, jacNumber * sizeof(float), cudaMemcpyDeviceToHost));
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d));
+    float *jacobianDet2Cuda;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice));
+    const unsigned blocks = blockSize->reg_spline_logSquaredValues;
+    const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDet2Cuda, (unsigned)jacNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    float *jacobianDet;
+    NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet, jacobianDetSize));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet, jacobianDet2Cuda, jacobianDetSize, cudaMemcpyDeviceToHost));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2Cuda));
     double penaltyTermValue = 0;
-    for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet_h[i];
-    NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h));
+    for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet[i];
+    NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet));
     penaltyTermValue /= jacSum;
     if (penaltyTermValue == penaltyTermValue) {
-        NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-        NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+        NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
+        NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
         return penaltyTermValue;
     }
 
     // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
-    mat33 reorientation;
-    if (controlPointImage->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk);
-    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float)));
-
-    // Bind some variables
-    const int controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
+
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3)));
+    auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize,
+                                                                     cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize,
+                                                                  cudaChannelFormatKindFloat, 1);
     if (approx) {
-        const unsigned Grid_reg_spline_approxCorrectFolding =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_approxCorrectFolding3D)));
-        dim3 G1(Grid_reg_spline_approxCorrectFolding, Grid_reg_spline_approxCorrectFolding, 1);
-        dim3 B1(blockSize->reg_spline_approxCorrectFolding3D, 1, 1);
-        reg_spline_approxCorrectFolding3D_kernel<<<G1, B1>>>(controlPointImageArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_approxCorrectFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
+                                                                          *jacobianMatricesTexture, controlPointImageDim,
+                                                                          controlPointSpacing, (unsigned)controlPointNumber, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const int voxelNumber = CalcVoxelNumber(*referenceImage);
         const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
         const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
-        NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3)));
-        const unsigned Grid_reg_spline_correctFolding =
-            (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_correctFolding3D)));
-        dim3 G1(Grid_reg_spline_correctFolding, Grid_reg_spline_correctFolding, 1);
-        dim3 B1(blockSize->reg_spline_correctFolding3D, 1, 1);
-        reg_spline_correctFolding3D_kernel<<<G1, B1>>>(controlPointImageArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_spline_correctFolding3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_spline_correctFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
+                                                                    *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing,
+                                                                    controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                                    referenceImageDim, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d));
-    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
+    NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
     return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
+void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool& reverse = false) {
     // Bind the qform or sform
-    mat44 temp_mat = image->qto_xyz;
-    if (image->sform_code > 0) temp_mat = image->sto_xyz;
-    float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
-
-    const int voxelNumber = CalcVoxelNumber(*image);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-
+    const mat44 affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz;
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3)));
-
-    const unsigned Grid_reg_getDeformationFromDisplacement =
-        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDeformationFromDisplacement)));
-    dim3 G1(Grid_reg_getDeformationFromDisplacement, Grid_reg_getDeformationFromDisplacement, 1);
-    dim3 B1(blockSize->reg_getDeformationFromDisplacement, 1, 1);
-    reg_getDeformationFromDisplacement3D_kernel<<<G1, B1>>>(imageArray_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
+
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement;
+    const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_getDeformationFromDisplacement3D_kernel<<<gridDims, blockDims>>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    // Bind the qform or sform
-    mat44 temp_mat = image->qto_xyz;
-    if (image->sform_code > 0) temp_mat = image->sto_xyz;
-    float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
-
-    const int voxelNumber = CalcVoxelNumber(*image);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-
-    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3)));
-
-    const unsigned Grid_reg_getDisplacementFromDeformation =
-        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDisplacementFromDeformation)));
-    dim3 G1(Grid_reg_getDisplacementFromDeformation, Grid_reg_getDisplacementFromDeformation, 1);
-    dim3 B1(blockSize->reg_getDisplacementFromDeformation, 1, 1);
-    reg_getDisplacementFromDeformation3D_kernel<<<G1, B1>>>(imageArray_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
+void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *imageCuda) {
+    reg_getDeformationFromDisplacement_gpu(image, imageCuda, true);
 }
 /* *************************************************************** */
-void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
-                                                 nifti_image *def_h,
-                                                 float4 *cpp_gpu,
-                                                 float4 *def_gpu) {
-    const int voxelNumber = CalcVoxelNumber(*def_h);
+void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage,
+                                                 const nifti_image *deformationField,
+                                                 const float4 *controlPointImageCuda,
+                                                 float4 *deformationFieldCuda) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
 
     // Create a mask array where no voxel are excluded
-    int *mask_gpu = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber * sizeof(int)));
-    reg_fillMaskArray_gpu(voxelNumber, mask_gpu);
+    int *maskCuda = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&maskCuda, voxelNumber * sizeof(int)));
+    reg_fillMaskArray_gpu(maskCuda, voxelNumber);
 
     // Define some variables for the deformation fields
-    float4 *tempDef_gpu = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu, voxelNumber * sizeof(float4)));
+    float4 *tempDefCuda = nullptr;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&tempDefCuda, voxelNumber * sizeof(float4)));
 
     // The deformation field is computed
-    reg_spline_getDeformationField_gpu(cpp_h, def_h, cpp_gpu, def_gpu, mask_gpu, voxelNumber, true); // non-interpolant spline are used
+    reg_spline_getDeformationField_gpu(controlPointImage, deformationField, controlPointImageCuda,
+                                       deformationFieldCuda, maskCuda, voxelNumber, true); // non-interpolant spline is used
 
     // The deformation field is converted into a displacement field
-    reg_getDisplacementFromDeformation_gpu(def_h, def_gpu);
+    reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda);
 
     // Scaling of the deformation field
-    float scalingValue = pow(2.0f, fabs(cpp_h->intent_p1));
-    if (cpp_h->intent_p1 < 0)
-        // backward deformation field is scaled down
-        reg_multiplyValue_gpu(voxelNumber, def_gpu, -1.f / scalingValue);
-    else
-        // forward deformation field is scaled down
-        reg_multiplyValue_gpu(voxelNumber, def_gpu, 1.f / scalingValue);
+    const unsigned squaringNumber = (unsigned)fabs(controlPointImage->intent_p1);
+    const float scalingValue = pow(2.f, (float)squaringNumber);
+    // Backward/forward deformation field is scaled down
+    reg_multiplyValue_gpu((int)voxelNumber, deformationFieldCuda, (controlPointImage->intent_p1 < 0  ? -1.f : 1.f) / scalingValue);
 
     // The displacement field is converted back into a deformation field
-    reg_getDeformationFromDisplacement_gpu(def_h, def_gpu);
+    reg_getDeformationFromDisplacement_gpu(deformationField, deformationFieldCuda);
 
     // The deformation field is squared
-    unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1);
     for (unsigned i = 0; i < squaringNumber; ++i) {
         // The deformation field arrays are updated
-        NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu, def_gpu, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(tempDefCuda, deformationFieldCuda, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
 
         // The deformation fields are composed
-        reg_defField_compose_gpu(def_h, tempDef_gpu, def_gpu, mask_gpu, voxelNumber);
+        reg_defField_compose_gpu(deformationField, tempDefCuda, deformationFieldCuda, voxelNumber);
     }
 
-    NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu));
-    NR_CUDA_SAFE_CALL(cudaFree(mask_gpu));
+    NR_CUDA_SAFE_CALL(cudaFree(tempDefCuda));
+    NR_CUDA_SAFE_CALL(cudaFree(maskCuda));
 }
 /* *************************************************************** */
-void reg_defField_compose_gpu(nifti_image *def,
-                              float4 *def_gpu,
-                              float4 *defOut_gpu,
-                              int *mask_gpu,
-                              int activeVoxel) {
+void reg_defField_compose_gpu(const nifti_image *deformationField,
+                              const float4 *deformationFieldCuda,
+                              float4 *deformationFieldCudaOut,
+                              const size_t& activeVoxelNumber) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int voxelNumber = CalcVoxelNumber(*def);
-
-    // Bind the qform or sform
-    mat44 temp_mat = def->qto_ijk;
-    if (def->sform_code > 0) temp_mat = def->sto_ijk;
-    float4 temp;
-    temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4)));
-
-    temp_mat = def->qto_xyz;
-    if (def->sform_code > 0) temp_mat = def->sto_xyz;
-    temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c, &temp, sizeof(float4)));
-    temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c, &temp, sizeof(float4)));
-
-    const int3 referenceImageDim = make_int3(def->nx, def->ny, def->nz);
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, def_gpu, activeVoxel * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_gpu, activeVoxel * sizeof(int)));
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3)));
-
-    if (def->nz > 1) {
-        const unsigned Grid_reg_defField_compose3D =
-            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose3D)));
-        dim3 G1(Grid_reg_defField_compose3D, Grid_reg_defField_compose3D, 1);
-        dim3 B1(blockSize->reg_defField_compose3D, 1, 1);
-        reg_defField_compose3D_kernel<<<G1, B1>>>(defOut_gpu);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
+    const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
+    const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+
+    if (deformationField->nz > 1) {
+        const unsigned blocks = blockSize->reg_defField_compose3D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_defField_compose3D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
+                                                               (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_defField_compose2D =
-            (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose2D)));
-        dim3 G1(Grid_reg_defField_compose2D, Grid_reg_defField_compose2D, 1);
-        dim3 B1(blockSize->reg_defField_compose2D, 1, 1);
-        reg_defField_compose2D_kernel<<<G1, B1>>>(defOut_gpu);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_defField_compose2D;
+        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_defField_compose2D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
+                                                               (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
 /* *************************************************************** */
-void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
-                                        float4 **deformationField_gpu,
-                                        float **jacobianMatrices_gpu) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int3 referenceDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
-    const float3 referenceSpacing = make_float3(deformationField->dx, deformationField->dy, deformationField->dz);
-    const int voxelNumber = referenceDim.x * referenceDim.y * referenceDim.z;
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing, &referenceSpacing, sizeof(float3)));
-
-    mat33 reorientation;
-    if (deformationField->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&deformationField->sto_xyz);
-    else reorientation = reg_mat44_to_mat33(&deformationField->qto_xyz);
-    float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3)));
-    temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3)));
-
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, *deformationField_gpu, voxelNumber * sizeof(float4)));
-
-    const unsigned Grid_reg_defField_getJacobianMatrix =
-        (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_getJacobianMatrix)));
-    dim3 G1(Grid_reg_defField_getJacobianMatrix, Grid_reg_defField_getJacobianMatrix, 1);
-    dim3 B1(blockSize->reg_defField_getJacobianMatrix);
-    reg_defField_getJacobianMatrix3D_kernel << <G1, B1>>>(*jacobianMatrices_gpu);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture));
+void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
+                                        const float4 *deformationFieldCuda,
+                                        float *jacobianMatricesCuda) {
+    const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz);
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                                  voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
+    const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_defField_getJacobianMatrix3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim,
+                                                                     (unsigned)voxelNumber, reorientation);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 167a1bc4..9f9c9084 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -17,67 +17,60 @@
 #include "_reg_tools_gpu.h"
 #include <limits>
 
+/* *************************************************************** */
 extern "C++"
-void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage,
-                                        nifti_image *targetImage,
-                                        float4 *controlPointImageArray_d,
-                                        float4 *positionFieldImageArray_d,
-                                        int *mask,
-                                        int activeVoxelNumber,
-                                        bool bspline);
-
-/* BE */
+void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
+                                        const nifti_image *referenceImage,
+                                        const float4 *controlPointImageCuda,
+                                        float4 *deformationFieldCuda,
+                                        const int *maskCuda,
+                                        const size_t& activeVoxelNumber,
+                                        const bool& bspline);
+/* *************************************************************** */
 extern "C++"
-float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d);
-
+float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
+                                         const float4 *controlPointImageCuda);
+/* *************************************************************** */
 extern "C++"
-void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-                                                float4 *controlPointImageArray_d,
-                                                float4 *nodeGradientArray_d,
+void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage,
+                                                const float4 *controlPointImageCuda,
+                                                float4 *transGradientCuda,
                                                 float bendingEnergyWeight);
-
-/** Jacobian
- *
- */
+/* *************************************************************** */
 extern "C++"
-double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage,
-                                             nifti_image *controlPointImage,
-                                             float4 *controlPointImageArray_d,
-                                             bool approx);
-
+double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
+                                             const nifti_image *controlPointImage,
+                                             const float4 *controlPointImageCuda,
+                                             const bool& approx);
+/* *************************************************************** */
 extern "C++"
-void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage,
-                                                   nifti_image *controlPointImage,
-                                                   float4 *controlPointImageArray_d,
-                                                   float4 *nodeGradientArray_d,
-                                                   float jacobianWeight,
-                                                   bool approx);
-
+void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage,
+                                                   const nifti_image *controlPointImage,
+                                                   const float4 *controlPointImageCuda,
+                                                   float4 *transGradientCuda,
+                                                   const float& jacobianWeight,
+                                                   const bool& approx);
+/* *************************************************************** */
 extern "C++"
-double reg_spline_correctFolding_gpu(nifti_image *targetImage,
-                                     nifti_image *controlPointImage,
-                                     float4 *controlPointImageArray_d,
-                                     bool approx);
-
-extern "C++"
-void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h,
-                                                 nifti_image *def_h,
-                                                 float4 *cpp_gpu,
-                                                 float4 *def_gpu);
-
+double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
+                                     const nifti_image *controlPointImage,
+                                     float4 *controlPointImageCuda,
+                                     const bool& approx);
+/* *************************************************************** */
 extern "C++"
-void reg_defField_compose_gpu(nifti_image *def,
-                              float4 *def_gpu,
-                              float4 *defOut_gpu,
-                              int *mask_gpu,
-                              int activeVoxel);
-
-extern "C++"
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d);
+void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage,
+                                                 const nifti_image *deformationField,
+                                                 const float4 *controlPointImageCuda,
+                                                 float4 *deformationFieldCuda);
+/* *************************************************************** */
 extern "C++"
-void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d);
-
+void reg_defField_compose_gpu(const nifti_image *deformationField,
+                              const float4 *deformationFieldCuda,
+                              float4 *deformationFieldOutCuda,
+                              const size_t& activeVoxelNumber);
+/* *************************************************************** */
 extern "C++"
-void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField,
-                                        float4 *deformationField_gpu,
-                                        float *jacobianMatrices_gpu);
+void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
+                                        const float4 *deformationFieldCuda,
+                                        float *jacobianMatricesCuda);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 54e8fb30..2a0a9f8c 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_spline_kernels.cu
+ *  _reg_localTransformation_kernels.cu
  *
  *
  *  Created by Marc Modat on 24/03/2009.
@@ -10,79 +10,8 @@
  *
  */
 
-#include "_reg_common_cuda.h"
-
-__device__ __constant__ int c_UseBSpline;
-__device__ __constant__ int c_VoxelNumber;
-__device__ __constant__ int c_ControlPointNumber;
-__device__ __constant__ int3 c_ReferenceImageDim;
-__device__ __constant__ int3 c_ControlPointImageDim;
-__device__ __constant__ float3 c_ControlPointVoxelSpacing;
-__device__ __constant__ float3 c_ControlPointSpacing;
-__device__ __constant__ float3 c_ReferenceSpacing;
-__device__ __constant__ float c_Weight;
-__device__ __constant__ float3 c_Weight3;
-__device__ __constant__ int c_ActiveVoxelNumber;
-__device__ __constant__ bool c_Type;
-__device__ __constant__ float3 c_AffineMatrix0;
-__device__ __constant__ float3 c_AffineMatrix1;
-__device__ __constant__ float3 c_AffineMatrix2;
-__device__ __constant__ float4 c_AffineMatrix0b;
-__device__ __constant__ float4 c_AffineMatrix1b;
-__device__ __constant__ float4 c_AffineMatrix2b;
-__device__ __constant__ float4 c_AffineMatrix0c;
-__device__ __constant__ float4 c_AffineMatrix1c;
-__device__ __constant__ float4 c_AffineMatrix2c;
-/* *************************************************************** */
-texture<float4, 1, cudaReadModeElementType> controlPointTexture;
-texture<float4, 1, cudaReadModeElementType> secondDerivativesTexture;
-texture<float4, 1, cudaReadModeElementType> voxelDeformationTexture;
-texture<int, 1, cudaReadModeElementType> maskTexture;
-texture<float, 1, cudaReadModeElementType> jacobianDeterminantTexture;
-texture<float, 1, cudaReadModeElementType> jacobianMatricesTexture;
-/* *************************************************************** */
-__device__ float2 operator*(float a, float2 b) {
-    return make_float2(a * b.x, a * b.y);
-}
-__device__ float3 operator*(float a, float3 b) {
-    return make_float3(a * b.x, a * b.y, a * b.z);
-}
-__device__ float3 operator*(float3 a, float3 b) {
-    return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-}
-__device__ float4 operator*(float4 a, float4 b) {
-    return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-}
-__device__ float4 operator*(float a, float4 b) {
-    return make_float4(a * b.x, a * b.y, a * b.z, 0.0f);
-}
-/* *************************************************************** */
-__device__ float2 operator/(float2 a, float2 b) {
-    return make_float2(a.x / b.x, a.y / b.y);
-}
-__device__ float3 operator/(float3 a, float b) {
-    return make_float3(a.x / b, a.y / b, a.z / b);
-}
-__device__ float3 operator/(float3 a, float3 b) {
-    return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-}
-/* *************************************************************** */
-__device__ float2 operator+(float2 a, float2 b) {
-    return make_float2(a.x + b.x, a.y + b.y);
-}
-__device__ float4 operator+(float4 a, float4 b) {
-    return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, 0.0f);
-}
-__device__ float3 operator+(float3 a, float3 b) {
-    return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-}
-/* *************************************************************** */
-__device__ float3 operator-(float3 a, float3 b) {
-    return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-}
-__device__ float4 operator-(float4 a, float4 b) {
-    return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f);
-}
+#include "_reg_common_cuda_kernels.cu"
+
 /* *************************************************************** */
 __device__ void GetBasisBSplineValues(const double basis, float *values) {
     const double ff = basis * basis;
@@ -322,75 +251,86 @@ __device__ void GetSecondDerivativeBasisValues3D(const int& index,
     }
 }
 /* *************************************************************** */
-__device__ float4 GetSlidedValues(int x, int y) {
+__device__ float4 GetSlidedValues(int x, int y,
+                                  cudaTextureObject_t deformationFieldTexture,
+                                  const int3& referenceImageDim,
+                                  const mat44& affineMatrix) {
     int newX = x;
     int newY = y;
     if (x < 0) {
         newX = 0;
-    } else if (x >= c_ReferenceImageDim.x) {
-        newX = c_ReferenceImageDim.x - 1;
+    } else if (x >= referenceImageDim.x) {
+        newX = referenceImageDim.x - 1;
     }
     if (y < 0) {
         newY = 0;
-    } else if (y >= c_ReferenceImageDim.y) {
-        newY = c_ReferenceImageDim.y - 1;
+    } else if (y >= referenceImageDim.y) {
+        newY = referenceImageDim.y - 1;
     }
 
     x -= newX;
     y -= newY;
-    const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y,
-                                            x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y,
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
                                             0.f, 0.f);
-    return slidedValues + tex1Dfetch(voxelDeformationTexture, newY * c_ReferenceImageDim.x + newX);
+    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
-__device__ float4 GetSlidedValues(int x, int y, int z) {
+__device__ float4 GetSlidedValues(int x, int y, int z,
+                                  cudaTextureObject_t deformationFieldTexture,
+                                  const int3& referenceImageDim,
+                                  const mat44& affineMatrix) {
     int newX = x;
     int newY = y;
     int newZ = z;
     if (x < 0) {
         newX = 0;
-    } else if (x >= c_ReferenceImageDim.x) {
-        newX = c_ReferenceImageDim.x - 1;
+    } else if (x >= referenceImageDim.x) {
+        newX = referenceImageDim.x - 1;
     }
     if (y < 0) {
         newY = 0;
-    } else if (y >= c_ReferenceImageDim.y) {
-        newY = c_ReferenceImageDim.y - 1;
+    } else if (y >= referenceImageDim.y) {
+        newY = referenceImageDim.y - 1;
     }
     if (z < 0) {
         newZ = 0;
-    } else if (z >= c_ReferenceImageDim.z) {
-        newZ = c_ReferenceImageDim.z - 1;
+    } else if (z >= referenceImageDim.z) {
+        newZ = referenceImageDim.z - 1;
     }
 
     x -= newX;
     y -= newY;
     z -= newZ;
-    const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y + z * c_AffineMatrix0c.z,
-                                            x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y + z * c_AffineMatrix1c.z,
-                                            x * c_AffineMatrix2c.x + y * c_AffineMatrix2c.y + z * c_AffineMatrix2c.z,
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
+                                            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
                                             0.f);
-    return slidedValues + tex1Dfetch(voxelDeformationTexture, (newZ * c_ReferenceImageDim.y + newY) * c_ReferenceImageDim.x + newX);
+    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
-__global__ void reg_spline_getDeformationField3D(float4 *positionField) {
+__global__ void reg_spline_getDeformationField3D(float4 *deformationField,
+                                                 cudaTextureObject_t controlPointTexture,
+                                                 cudaTextureObject_t maskTexture,
+                                                 const int3 referenceImageDim,
+                                                 const int3 controlPointImageDim,
+                                                 const float3 controlPointVoxelSpacing,
+                                                 const unsigned activeVoxelNumber,
+                                                 const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ActiveVoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        int tempIndex = tex1Dfetch(maskTexture, tid);
-        const int z = tempIndex / (imageSize.x * imageSize.y);
-        tempIndex -= z * imageSize.x * imageSize.y;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
+    if (tid < activeVoxelNumber) {
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(tid2, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         // The "nearest previous" node is determined [0,0,0]
-        const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
         const int3 nodeAnte = {
-            int((float)x / gridVoxelSpacing.x),
-            int((float)y / gridVoxelSpacing.y),
-            int((float)z / gridVoxelSpacing.z)
+            int((float)x / controlPointVoxelSpacing.x),
+            int((float)y / controlPointVoxelSpacing.y),
+            int((float)z / controlPointVoxelSpacing.z)
         };
 
         // Z basis values
@@ -398,36 +338,34 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
         const unsigned sharedMemIndex = 4 * threadIdx.x;
         // Compute the shared memory offset which corresponds to four times the number of thread per block
         float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
-        float relative = (float)z / gridVoxelSpacing.z - (float)nodeAnte.z;
+        float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z;
         if (relative < 0) relative = 0; // rounding error
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]);
+        if (bspline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]);
         else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]);
 
         // Y basis values
-        relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y;
+        relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y;
         if (relative < 0) relative = 0; // rounding error
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
+        if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
         else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
 
         // X basis values
         float xBasis[4];
-        relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x;
+        relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x;
         if (relative < 0) relative = 0; // rounding error
-        if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
+        if (bspline) GetBasisBSplineValues(relative, xBasis);
         else GetBasisSplineValues(relative, xBasis);
 
-        const int3 controlPointImageDim = c_ControlPointImageDim;
         float4 displacement{};
-
         for (int c = 0; c < 4; c++) {
             float3 tempDisplacement{};
             int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x;
             for (int b = 0; b < 4; b++) {
                 int indexXYZ = indexYZ + nodeAnte.x;
-                const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ);
+                const float4 nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                const float4 nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
 
                 const float& basis = yBasis[sharedMemIndex + b];
                 tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
@@ -454,48 +392,51 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) {
             displacement.z += basis * tempDisplacement.z;
         }
 
-        positionField[tid] = displacement;
+        deformationField[tid] = displacement;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getDeformationField2D(float4 *positionField) {
+__global__ void reg_spline_getDeformationField2D(float4 *deformationField,
+                                                 cudaTextureObject_t controlPointTexture,
+                                                 cudaTextureObject_t maskTexture,
+                                                 const int3 referenceImageDim,
+                                                 const int3 controlPointImageDim,
+                                                 const float3 controlPointVoxelSpacing,
+                                                 const unsigned activeVoxelNumber,
+                                                 const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ActiveVoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        const int tempIndex = tex1Dfetch(maskTexture, tid);
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
+    if (tid < activeVoxelNumber) {
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(tid2, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         // The "nearest previous" node is determined [0,0,0]
-        const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y };
-        const int2 nodeAnte = { int((float)x / gridVoxelSpacing.x), int((float)y / gridVoxelSpacing.y) };
+        const int2 nodeAnte = { int((float)x / controlPointVoxelSpacing.x), int((float)y / controlPointVoxelSpacing.y) };
 
         // Y basis values
         extern __shared__ float yBasis[];   // Shared memory
         const unsigned sharedMemIndex = 4 * threadIdx.x;
-        float relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y;
+        float relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y;
         if (relative < 0) relative = 0; // rounding error
-        if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
+        if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
         else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
 
         // X basis values
         float xBasis[4];
-        relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x;
+        relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x;
         if (relative < 0) relative = 0; // rounding error
-        if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis);
+        if (bspline) GetBasisBSplineValues(relative, xBasis);
         else GetBasisSplineValues(relative, xBasis);
 
-        const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y };
         float4 displacement{};
-
         for (int b = 0; b < 4; b++) {
             int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
 
-            const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, index++);
-            const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, index++);
-            const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++);
-            const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index);
+            const float4 nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, index++);
+            const float4 nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, index++);
+            const float4 nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, index++);
+            const float4 nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, index);
 
             const float& basis = yBasis[sharedMemIndex + b];
             displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
@@ -509,11 +450,14 @@ __global__ void reg_spline_getDeformationField2D(float4 *positionField) {
                                        nodeCoefficientD.y * xBasis[3]);
         }
 
-        positionField[tid] = displacement;
+        deformationField[tid] = displacement;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues) {
+__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues,
+                                                        cudaTextureObject_t controlPointTexture,
+                                                        const int3 controlPointImageDim,
+                                                        const unsigned controlPointNumber) {
     __shared__ float xxbasis[9];
     __shared__ float yybasis[9];
     __shared__ float xybasis[9];
@@ -523,30 +467,26 @@ __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivative
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        const int y = tid / gridSize.x;
-        const int x = tid - y * gridSize.x;
-
-        float4 xx{};
-        float4 yy{};
-        float4 xy{};
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
+        float4 xx{}, yy{}, xy{};
         unsigned tempIndex;
-        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) {
+        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) {
             tempIndex = 0;
             for (int b = y - 1; b < y + 2; ++b) {
                 for (int a = x - 1; a < x + 2; ++a) {
-                    const int indexXY = b * gridSize.x + a;
-                    float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY);
+                    const int indexXY = b * controlPointImageDim.x + a;
+                    const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXY);
                     xx.x += xxbasis[tempIndex] * controlPointValues.x;
                     xx.y += xxbasis[tempIndex] * controlPointValues.y;
                     yy.x += yybasis[tempIndex] * controlPointValues.x;
                     yy.y += yybasis[tempIndex] * controlPointValues.y;
                     xy.x += xybasis[tempIndex] * controlPointValues.x;
                     xy.y += xybasis[tempIndex] * controlPointValues.y;
-                    ++tempIndex;
+                    tempIndex++;
                 }
             }
         }
@@ -558,7 +498,10 @@ __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivative
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues) {
+__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues,
+                                                        cudaTextureObject_t controlPointTexture,
+                                                        const int3 controlPointImageDim,
+                                                        const unsigned controlPointNumber) {
     __shared__ float xxbasis[27];
     __shared__ float yybasis[27];
     __shared__ float zzbasis[27];
@@ -571,29 +514,22 @@ __global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivative
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
+    if (tid < controlPointNumber) {
         int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
-
-        float4 xx{};
-        float4 yy{};
-        float4 zz{};
-        float4 xy{};
-        float4 yz{};
-        float4 xz{};
-
-        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) {
+        int quot, rem;
+        reg_div_cuda(tempIndex, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        float4 xx{}, yy{}, zz{}, xy{}, yz{}, xz{};
+        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) {
             tempIndex = 0;
             for (int c = z - 1; c < z + 2; ++c) {
                 for (int b = y - 1; b < y + 2; ++b) {
                     for (int a = x - 1; a < x + 2; ++a) {
-                        int indexXYZ = (c * gridSize.y + b) * gridSize.x + a;
-                        float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ);
+                        const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a;
+                        const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
                         xx = xx + xxbasis[tempIndex] * controlPointValues;
                         yy = yy + yybasis[tempIndex] * controlPointValues;
                         zz = zz + zzbasis[tempIndex] * controlPointValues;
@@ -616,33 +552,41 @@ __global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivative
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm) {
+__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm,
+                                                           cudaTextureObject_t secondDerivativesTexture,
+                                                           const unsigned controlPointNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
+    if (tid < controlPointNumber) {
         unsigned index = tid * 3;
-        float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx;
-        float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy;
-        float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy;
+        float4 xx = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xx = xx * xx;
+        float4 yy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yy = yy * yy;
+        float4 xy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xy = xy * xy;
         penaltyTerm[tid] = xx.x + xx.y + yy.x + yy.y + 2.f * (xy.x + xy.y);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm) {
+__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm,
+                                                           cudaTextureObject_t secondDerivativesTexture,
+                                                           const unsigned controlPointNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
+    if (tid < controlPointNumber) {
         unsigned index = tid * 6;
-        float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx;
-        float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy;
-        float4 zz = tex1Dfetch(secondDerivativesTexture, index++); zz = zz * zz;
-        float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy;
-        float4 yz = tex1Dfetch(secondDerivativesTexture, index++); yz = yz * yz;
-        float4 xz = tex1Dfetch(secondDerivativesTexture, index); xz = xz * xz;
+        float4 xx = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xx = xx * xx;
+        float4 yy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yy = yy * yy;
+        float4 zz = tex1Dfetch<float4>(secondDerivativesTexture, index++);  zz = zz * zz;
+        float4 xy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xy = xy * xy;
+        float4 yz = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yz = yz * yz;
+        float4 xz = tex1Dfetch<float4>(secondDerivativesTexture, index);    xz = xz * xz;
         penaltyTerm[tid] = xx.x + xx.y + xx.z + yy.x + yy.y + yy.z + zz.x + zz.y + zz.z +
             2.f * (xy.x + xy.y + xy.z + yz.x + yz.y + yz.z + xz.x + xz.y + xz.z);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradientArray) {
+__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradient,
+                                                                   cudaTextureObject_t secondDerivativesTexture,
+                                                                   const int3 controlPointImageDim,
+                                                                   const unsigned controlPointNumber,
+                                                                   const float weight) {
     __shared__ float xxbasis[9];
     __shared__ float yybasis[9];
     __shared__ float xybasis[9];
@@ -652,27 +596,25 @@ __global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeG
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        const int y = tid / gridSize.x;
-        const int x = tid - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float2 gradientValue{};
         float4 secondDerivativeValues;
-
         int coord = 0;
         for (int b = y - 1; b < y + 2; ++b) {
             for (int a = x - 1; a < x + 2; ++a) {
-                if (-1 < a && -1 < b && a < gridSize.x && b < gridSize.y) {
-                    int indexXY = 3 * (b * gridSize.x + a);
-                    secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // XX
+                if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y) {
+                    int indexXY = 3 * (b * controlPointImageDim.x + a);
+                    secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXY++); // XX
                     gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
                     gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
-                    secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // YY
+                    secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXY++); // YY
                     gradientValue.x += secondDerivativeValues.x * yybasis[coord];
                     gradientValue.y += secondDerivativeValues.y * yybasis[coord];
-                    secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXY); // XY
+                    secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXY); // XY
                     gradientValue.x += secondDerivativeValues.x * xybasis[coord];
                     gradientValue.y += secondDerivativeValues.y * xybasis[coord];
                 }
@@ -680,12 +622,16 @@ __global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeG
             }
         }
 
-        nodeGradientArray[tid].x += c_Weight * gradientValue.x;
-        nodeGradientArray[tid].y += c_Weight * gradientValue.y;
+        nodeGradient[tid].x += weight * gradientValue.x;
+        nodeGradient[tid].y += weight * gradientValue.y;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradientArray) {
+__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradient,
+                                                                   cudaTextureObject_t secondDerivativesTexture,
+                                                                   const int3 controlPointImageDim,
+                                                                   const unsigned controlPointNumber,
+                                                                   const float weight) {
     __shared__ float xxbasis[27];
     __shared__ float yybasis[27];
     __shared__ float zzbasis[27];
@@ -698,45 +644,42 @@ __global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeG
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float3 gradientValue{};
         float4 secondDerivativeValues;
-
         int coord = 0;
         for (int c = z - 1; c < z + 2; ++c) {
             for (int b = y - 1; b < y + 2; ++b) {
                 for (int a = x - 1; a < x + 2; ++a) {
-                    if (-1 < a && -1 < b && -1 < c && a < gridSize.x && b < gridSize.y && c < gridSize.z) {
-                        unsigned indexXYZ = 6 * ((c * gridSize.y + b) * gridSize.x + a);
-                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XX
+                    if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y && -1 < c && c < controlPointImageDim.z) {
+                        unsigned indexXYZ = 6 * ((c * controlPointImageDim.y + b) * controlPointImageDim.x + a);
+                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // XX
                         gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
                         gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
                         gradientValue.z += secondDerivativeValues.z * xxbasis[coord];
-                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YY
+                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // YY
                         gradientValue.x += secondDerivativeValues.x * yybasis[coord];
                         gradientValue.y += secondDerivativeValues.y * yybasis[coord];
                         gradientValue.z += secondDerivativeValues.z * yybasis[coord];
-                        secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); //ZZ
+                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // ZZ
                         gradientValue.x += secondDerivativeValues.x * zzbasis[coord];
                         gradientValue.y += secondDerivativeValues.y * zzbasis[coord];
                         gradientValue.z += secondDerivativeValues.z * zzbasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XY
+                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // XY
                         gradientValue.x += secondDerivativeValues.x * xybasis[coord];
                         gradientValue.y += secondDerivativeValues.y * xybasis[coord];
                         gradientValue.z += secondDerivativeValues.z * xybasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YZ
+                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // YZ
                         gradientValue.x += secondDerivativeValues.x * yzbasis[coord];
                         gradientValue.y += secondDerivativeValues.y * yzbasis[coord];
                         gradientValue.z += secondDerivativeValues.z * yzbasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ); //XZ
+                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ); // XZ
                         gradientValue.x += secondDerivativeValues.x * xzbasis[coord];
                         gradientValue.y += secondDerivativeValues.y * xzbasis[coord];
                         gradientValue.z += secondDerivativeValues.z * xzbasis[coord];
@@ -745,18 +688,22 @@ __global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeG
                 }
             }
         }
-        gradientValue = c_Weight * gradientValue;
+        gradientValue = weight * gradientValue;
 
-        float4 metricGradientValue;
-        metricGradientValue = nodeGradientArray[tid];
+        float4 metricGradientValue = nodeGradient[tid];
         metricGradientValue.x += gradientValue.x;
         metricGradientValue.y += gradientValue.y;
         metricGradientValue.z += gradientValue.z;
-        nodeGradientArray[tid] = metricGradientValue;
+        nodeGradient[tid] = metricGradientValue;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) {
+__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices,
+                                                            float *jacobianDet,
+                                                            cudaTextureObject_t controlPointTexture,
+                                                            const int3 controlPointImageDim,
+                                                            const unsigned controlPointNumber,
+                                                            const mat33 reorientation) {
     __shared__ float xbasis[9];
     __shared__ float ybasis[9];
 
@@ -765,57 +712,59 @@ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatri
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
-
-        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) {
-            float Tx_x = 0, Tx_y = 0;
-            float Ty_x = 0, Ty_y = 0;
-
-            tempIndex = 0;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) {
+            float2 tx{}, ty{};
+            unsigned index = 0;
             for (int b = y - 1; b < y + 2; ++b) {
                 for (int a = x - 1; a < x + 2; ++a) {
-                    int indexXY = b * gridSize.x + a;
-                    float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY);
-                    Tx_x += xbasis[tempIndex] * controlPointValues.x;
-                    Tx_y += ybasis[tempIndex] * controlPointValues.x;
-                    Ty_x += xbasis[tempIndex] * controlPointValues.y;
-                    Ty_y += ybasis[tempIndex] * controlPointValues.y;
-                    tempIndex++;
+                    const int indexXY = b * controlPointImageDim.x + a;
+                    const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXY);
+                    tx.x += xbasis[index] * controlPointValues.x;
+                    tx.y += ybasis[index] * controlPointValues.x;
+                    ty.x += xbasis[index] * controlPointValues.y;
+                    ty.y += ybasis[index] * controlPointValues.y;
+                    index++;
                 }
             }
 
             // The jacobian matrix is reoriented
-            float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x;
-            float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y;
-            float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x;
-            float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y;
+            float2 tx2, ty2;
+            tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x;
+            tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y;
+            ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x;
+            ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y;
 
             // The Jacobian matrix is stored
-            tempIndex = tid * 4;
-            jacobianMatrices[tempIndex++] = Tx_x2;
-            jacobianMatrices[tempIndex++] = Tx_y2;
-            jacobianMatrices[tempIndex++] = Ty_x2;
-            jacobianMatrices[tempIndex] = Ty_y2;
+            index = tid * 4;
+            jacobianMatrices[index++] = tx2.x;
+            jacobianMatrices[index++] = tx2.y;
+            jacobianMatrices[index++] = ty2.x;
+            jacobianMatrices[index] = ty2.y;
 
             // The Jacobian determinant is computed and stored
-            jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
+            jacobianDet[tid] = tx2.x * ty2.y - tx2.y * ty2.x;
         } else {
-            tempIndex = tid * 4;
-            jacobianMatrices[tempIndex++] = 1.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex] = 1.f;
-            jacobianDet[tid] = 1.0f;
+            unsigned index = tid * 4;
+            jacobianMatrices[index++] = 1.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index] = 1.f;
+            jacobianDet[tid] = 1.f;
         }
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) {
+__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices,
+                                                            float *jacobianDet,
+                                                            cudaTextureObject_t controlPointTexture,
+                                                            const int3 controlPointImageDim,
+                                                            const unsigned controlPointNumber,
+                                                            const mat33 reorientation) {
     __shared__ float xbasis[27];
     __shared__ float ybasis[27];
     __shared__ float zbasis[27];
@@ -825,268 +774,264 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
-
-        if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) {
-            float Tx_x = 0, Tx_y = 0, Tx_z = 0;
-            float Ty_x = 0, Ty_y = 0, Ty_z = 0;
-            float Tz_x = 0, Tz_y = 0, Tz_z = 0;
-
-            tempIndex = 0;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) {
+            float3 tx{}, ty{}, tz{};
+            unsigned index = 0;
             for (int c = z - 1; c < z + 2; ++c) {
                 for (int b = y - 1; b < y + 2; ++b) {
                     for (int a = x - 1; a < x + 2; ++a) {
-                        int indexXYZ = (c * gridSize.y + b) * gridSize.x + a;
-                        float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ);
-                        Tx_x += xbasis[tempIndex] * controlPointValues.x;
-                        Tx_y += ybasis[tempIndex] * controlPointValues.x;
-                        Tx_z += zbasis[tempIndex] * controlPointValues.x;
-                        Ty_x += xbasis[tempIndex] * controlPointValues.y;
-                        Ty_y += ybasis[tempIndex] * controlPointValues.y;
-                        Ty_z += zbasis[tempIndex] * controlPointValues.y;
-                        Tz_x += xbasis[tempIndex] * controlPointValues.z;
-                        Tz_y += ybasis[tempIndex] * controlPointValues.z;
-                        Tz_z += zbasis[tempIndex] * controlPointValues.z;
-                        tempIndex++;
+                        const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a;
+                        const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
+                        tx.x += xbasis[index] * controlPointValues.x;
+                        tx.y += ybasis[index] * controlPointValues.x;
+                        tx.z += zbasis[index] * controlPointValues.x;
+                        ty.x += xbasis[index] * controlPointValues.y;
+                        ty.y += ybasis[index] * controlPointValues.y;
+                        ty.z += zbasis[index] * controlPointValues.y;
+                        tz.x += xbasis[index] * controlPointValues.z;
+                        tz.y += ybasis[index] * controlPointValues.z;
+                        tz.z += zbasis[index] * controlPointValues.z;
+                        index++;
                     }
                 }
             }
 
             // The jacobian matrix is reoriented
-            float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x + c_AffineMatrix0.z * Tz_x;
-            float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y + c_AffineMatrix0.z * Tz_y;
-            float Tx_z2 = c_AffineMatrix0.x * Tx_z + c_AffineMatrix0.y * Ty_z + c_AffineMatrix0.z * Tz_z;
-            float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x + c_AffineMatrix1.z * Tz_x;
-            float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y + c_AffineMatrix1.z * Tz_y;
-            float Ty_z2 = c_AffineMatrix1.x * Tx_z + c_AffineMatrix1.y * Ty_z + c_AffineMatrix1.z * Tz_z;
-            float Tz_x2 = c_AffineMatrix2.x * Tx_x + c_AffineMatrix2.y * Ty_x + c_AffineMatrix2.z * Tz_x;
-            float Tz_y2 = c_AffineMatrix2.x * Tx_y + c_AffineMatrix2.y * Ty_y + c_AffineMatrix2.z * Tz_y;
-            float Tz_z2 = c_AffineMatrix2.x * Tx_z + c_AffineMatrix2.y * Ty_z + c_AffineMatrix2.z * Tz_z;
+            float3 tx2, ty2, tz2;
+            tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x + reorientation.m[0][2] * tz.x;
+            tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y + reorientation.m[0][2] * tz.y;
+            tx2.z = reorientation.m[0][0] * tx.z + reorientation.m[0][1] * ty.z + reorientation.m[0][2] * tz.z;
+            ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x + reorientation.m[1][2] * tz.x;
+            ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y + reorientation.m[1][2] * tz.y;
+            ty2.z = reorientation.m[1][0] * tx.z + reorientation.m[1][1] * ty.z + reorientation.m[1][2] * tz.z;
+            tz2.x = reorientation.m[2][0] * tx.x + reorientation.m[2][1] * ty.x + reorientation.m[2][2] * tz.x;
+            tz2.y = reorientation.m[2][0] * tx.y + reorientation.m[2][1] * ty.y + reorientation.m[2][2] * tz.y;
+            tz2.z = reorientation.m[2][0] * tx.z + reorientation.m[2][1] * ty.z + reorientation.m[2][2] * tz.z;
 
             // The Jacobian matrix is stored
-            tempIndex = tid * 9;
-            jacobianMatrices[tempIndex++] = Tx_x2;
-            jacobianMatrices[tempIndex++] = Tx_y2;
-            jacobianMatrices[tempIndex++] = Tx_z2;
-            jacobianMatrices[tempIndex++] = Ty_x2;
-            jacobianMatrices[tempIndex++] = Ty_y2;
-            jacobianMatrices[tempIndex++] = Ty_z2;
-            jacobianMatrices[tempIndex++] = Tz_x2;
-            jacobianMatrices[tempIndex++] = Tz_y2;
-            jacobianMatrices[tempIndex] = Tz_z2;
+            index = tid * 9;
+            jacobianMatrices[index++] = tx2.x;
+            jacobianMatrices[index++] = tx2.y;
+            jacobianMatrices[index++] = tx2.z;
+            jacobianMatrices[index++] = ty2.x;
+            jacobianMatrices[index++] = ty2.y;
+            jacobianMatrices[index++] = ty2.z;
+            jacobianMatrices[index++] = tz2.x;
+            jacobianMatrices[index++] = tz2.y;
+            jacobianMatrices[index] = tz2.z;
 
             // The Jacobian determinant is computed and stored
-            jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2
-                + Tx_y2 * Ty_z2 * Tz_x2
-                + Tx_z2 * Ty_x2 * Tz_y2
-                - Tx_x2 * Ty_z2 * Tz_y2
-                - Tx_y2 * Ty_x2 * Tz_z2
-                - Tx_z2 * Ty_y2 * Tz_x2;
+            jacobianDet[tid] = tx2.x * ty2.y * tz2.z
+                + tx2.y * ty2.z * tz2.x
+                + tx2.z * ty2.x * tz2.y
+                - tx2.x * ty2.z * tz2.y
+                - tx2.y * ty2.x * tz2.z
+                - tx2.z * ty2.y * tz2.x;
         } else {
-            tempIndex = tid * 9;
-            jacobianMatrices[tempIndex++] = 1.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 1.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex++] = 0.f;
-            jacobianMatrices[tempIndex] = 1.f;
-            jacobianDet[tid] = 1.0f;
+            unsigned index = tid * 9;
+            jacobianMatrices[index++] = 1.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 1.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index++] = 0.f;
+            jacobianMatrices[index] = 1.f;
+            jacobianDet[tid] = 1.f;
         }
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) {
+__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
+                                                      float *jacobianDet,
+                                                      cudaTextureObject_t controlPointTexture,
+                                                      const int3 controlPointImageDim,
+                                                      const float3 controlPointSpacing,
+                                                      const int3 referenceImageDim,
+                                                      const unsigned voxelNumber,
+                                                      const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        const int2 imageSize = { c_ReferenceImageDim.x, c_ReferenceImageDim.y };
-
-        int tempIndex = tid;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
-        const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y };
-        const int2 nodeAnte = { (int)floorf((float)x / gridVoxelSpacing.x), (int)floorf((float)y / gridVoxelSpacing.y) };
+        const int2 nodeAnte = { (int)floorf((float)x / controlPointSpacing.x), (int)floorf((float)y / controlPointSpacing.y) };
 
         float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
 
-        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x);
         GetFirstBSplineValues(relative, xBasis, xFirst);
 
-        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y);
         GetFirstBSplineValues(relative, yBasis, yFirst);
 
-        const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y };
-        float2 Tx{};
-        float2 Ty{};
-
+        float2 tx{}, ty{};
         for (int b = 0; b < 4; ++b) {
             int indexXY = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
 
-            float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
-            float2 tempBasis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]);
-            Tx = Tx + nodeCoefficient.x * tempBasis;
-            Ty = Ty + nodeCoefficient.y * tempBasis;
-
-            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
-            tempBasis = make_float2(xFirst[1] * yBasis[b], xBasis[1] * yFirst[b]);
-            Tx = Tx + nodeCoefficient.x * tempBasis;
-            Ty = Ty + nodeCoefficient.y * tempBasis;
-
-            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++);
-            tempBasis = make_float2(xFirst[2] * yBasis[b], xBasis[2] * yFirst[b]);
-            Tx = Tx + nodeCoefficient.x * tempBasis;
-            Ty = Ty + nodeCoefficient.y * tempBasis;
-
-            nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY);
-            tempBasis = make_float2(xFirst[3] * yBasis[b], xBasis[3] * yFirst[b]);
-            Tx = Tx + nodeCoefficient.x * tempBasis;
-            Ty = Ty + nodeCoefficient.y * tempBasis;
+            float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY++);
+            float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]);
+            tx = tx + nodeCoefficient.x * basis;
+            ty = ty + nodeCoefficient.y * basis;
+
+            nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY++);
+            basis = make_float2(xFirst[1] * yBasis[b], xBasis[1] * yFirst[b]);
+            tx = tx + nodeCoefficient.x * basis;
+            ty = ty + nodeCoefficient.y * basis;
+
+            nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY++);
+            basis = make_float2(xFirst[2] * yBasis[b], xBasis[2] * yFirst[b]);
+            tx = tx + nodeCoefficient.x * basis;
+            ty = ty + nodeCoefficient.y * basis;
+
+            nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY);
+            basis = make_float2(xFirst[3] * yBasis[b], xBasis[3] * yFirst[b]);
+            tx = tx + nodeCoefficient.x * basis;
+            ty = ty + nodeCoefficient.y * basis;
         }
 
         // The jacobian matrix is reoriented
-        const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x;
-        const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y;
-        const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x;
-        const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y;
+        float2 tx2, ty2;
+        tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x;
+        tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y;
+        ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x;
+        ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y;
 
         // The Jacobian matrix is stored
-        tempIndex = tid * 4;
-        jacobianMatrices[tempIndex++] = Tx_x2;
-        jacobianMatrices[tempIndex++] = Tx_y2;
-        jacobianMatrices[tempIndex++] = Ty_x2;
-        jacobianMatrices[tempIndex] = Ty_y2;
+        unsigned index = tid * 4;
+        jacobianMatrices[index++] = tx2.x;
+        jacobianMatrices[index++] = tx2.y;
+        jacobianMatrices[index++] = ty2.x;
+        jacobianMatrices[index] = ty2.y;
 
         // The Jacobian determinant is computed and stored
-        jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2;
+        jacobianDet[tid] = tx2.x * ty2.y - tx2.y * ty2.x;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) {
+__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
+                                                      float *jacobianDet,
+                                                      cudaTextureObject_t controlPointTexture,
+                                                      const int3 controlPointImageDim,
+                                                      const float3 controlPointSpacing,
+                                                      const int3 referenceImageDim,
+                                                      const unsigned voxelNumber,
+                                                      const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (imageSize.x * imageSize.y);
-        tempIndex -= z * imageSize.x * imageSize.y;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
-        const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing;
         const int3 nodeAnte = {
-            (int)floorf((float)x / gridVoxelSpacing.x),
-            (int)floorf((float)y / gridVoxelSpacing.y),
-            (int)floorf((float)z / gridVoxelSpacing.z)
+            (int)floorf((float)x / controlPointSpacing.x),
+            (int)floorf((float)y / controlPointSpacing.y),
+            (int)floorf((float)z / controlPointSpacing.z)
         };
 
         extern __shared__ float yFirst[];
         float *zFirst = &yFirst[4 * blockDim.x * blockDim.y * blockDim.z];
 
         float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative;
-
         const unsigned sharedMemIndex = 4 * threadIdx.x;
 
-        relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x);
+        relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x);
         GetFirstBSplineValues(relative, xBasis, xFirst);
 
-        relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y);
+        relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y);
         GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]);
 
-        relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z);
+        relative = fabsf((float)z / controlPointSpacing.z - (float)nodeAnte.z);
         GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]);
 
-        const int3 controlPointImageDim = c_ControlPointImageDim;
-        float3 Tx{};
-        float3 Ty{};
-        float3 Tz{};
-
+        float3 tx{}, ty{}, tz{};
         for (int c = 0; c < 4; ++c) {
             for (int b = 0; b < 4; ++b) {
                 int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
-                float3 tempBasisXY = make_float3(yBasis[b] * zBasis[c],
-                                                 yFirst[sharedMemIndex + b] * zBasis[c],
-                                                 yBasis[b] * zFirst[sharedMemIndex + c]);
-
-                float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
-                float3 tempBasis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * tempBasisXY;
-                Tx = Tx + nodeCoefficient.x * tempBasis;
-                Ty = Ty + nodeCoefficient.y * tempBasis;
-                Tz = Tz + nodeCoefficient.z * tempBasis;
-
-                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
-                tempBasis = make_float3(xFirst[1], xBasis[1], xBasis[1]) * tempBasisXY;
-                Tx = Tx + nodeCoefficient.x * tempBasis;
-                Ty = Ty + nodeCoefficient.y * tempBasis;
-                Tz = Tz + nodeCoefficient.z * tempBasis;
-
-                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++);
-                tempBasis = make_float3(xFirst[2], xBasis[2], xBasis[2]) * tempBasisXY;
-                Tx = Tx + nodeCoefficient.x * tempBasis;
-                Ty = Ty + nodeCoefficient.y * tempBasis;
-                Tz = Tz + nodeCoefficient.z * tempBasis;
-
-                nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ);
-                tempBasis = make_float3(xFirst[3], xBasis[3], xBasis[3]) * tempBasisXY;
-                Tx = Tx + nodeCoefficient.x * tempBasis;
-                Ty = Ty + nodeCoefficient.y * tempBasis;
-                Tz = Tz + nodeCoefficient.z * tempBasis;
+                float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] };
+
+                float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                float3 basis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * basisXY;
+                tx = tx + nodeCoefficient.x * basis;
+                ty = ty + nodeCoefficient.y * basis;
+                tz = tz + nodeCoefficient.z * basis;
+
+                nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                basis = make_float3(xFirst[1], xBasis[1], xBasis[1]) * basisXY;
+                tx = tx + nodeCoefficient.x * basis;
+                ty = ty + nodeCoefficient.y * basis;
+                tz = tz + nodeCoefficient.z * basis;
+
+                nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+                basis = make_float3(xFirst[2], xBasis[2], xBasis[2]) * basisXY;
+                tx = tx + nodeCoefficient.x * basis;
+                ty = ty + nodeCoefficient.y * basis;
+                tz = tz + nodeCoefficient.z * basis;
+
+                nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
+                basis = make_float3(xFirst[3], xBasis[3], xBasis[3]) * basisXY;
+                tx = tx + nodeCoefficient.x * basis;
+                ty = ty + nodeCoefficient.y * basis;
+                tz = tz + nodeCoefficient.z * basis;
             }
         }
 
         // The jacobian matrix is reoriented
-        const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x + c_AffineMatrix0.z * Tz.x;
-        const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y + c_AffineMatrix0.z * Tz.y;
-        const float Tx_z2 = c_AffineMatrix0.x * Tx.z + c_AffineMatrix0.y * Ty.z + c_AffineMatrix0.z * Tz.z;
-        const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x + c_AffineMatrix1.z * Tz.x;
-        const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y + c_AffineMatrix1.z * Tz.y;
-        const float Ty_z2 = c_AffineMatrix1.x * Tx.z + c_AffineMatrix1.y * Ty.z + c_AffineMatrix1.z * Tz.z;
-        const float Tz_x2 = c_AffineMatrix2.x * Tx.x + c_AffineMatrix2.y * Ty.x + c_AffineMatrix2.z * Tz.x;
-        const float Tz_y2 = c_AffineMatrix2.x * Tx.y + c_AffineMatrix2.y * Ty.y + c_AffineMatrix2.z * Tz.y;
-        const float Tz_z2 = c_AffineMatrix2.x * Tx.z + c_AffineMatrix2.y * Ty.z + c_AffineMatrix2.z * Tz.z;
+        float3 tx2, ty2, tz2;
+        tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x + reorientation.m[0][2] * tz.x;
+        tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y + reorientation.m[0][2] * tz.y;
+        tx2.z = reorientation.m[0][0] * tx.z + reorientation.m[0][1] * ty.z + reorientation.m[0][2] * tz.z;
+        ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x + reorientation.m[1][2] * tz.x;
+        ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y + reorientation.m[1][2] * tz.y;
+        ty2.z = reorientation.m[1][0] * tx.z + reorientation.m[1][1] * ty.z + reorientation.m[1][2] * tz.z;
+        tz2.x = reorientation.m[2][0] * tx.x + reorientation.m[2][1] * ty.x + reorientation.m[2][2] * tz.x;
+        tz2.y = reorientation.m[2][0] * tx.y + reorientation.m[2][1] * ty.y + reorientation.m[2][2] * tz.y;
+        tz2.z = reorientation.m[2][0] * tx.z + reorientation.m[2][1] * ty.z + reorientation.m[2][2] * tz.z;
 
         // The Jacobian matrix is stored
-        tempIndex = tid * 9;
-        jacobianMatrices[tempIndex++] = Tx_x2;
-        jacobianMatrices[tempIndex++] = Tx_y2;
-        jacobianMatrices[tempIndex++] = Tx_z2;
-        jacobianMatrices[tempIndex++] = Ty_x2;
-        jacobianMatrices[tempIndex++] = Ty_y2;
-        jacobianMatrices[tempIndex++] = Ty_z2;
-        jacobianMatrices[tempIndex++] = Tz_x2;
-        jacobianMatrices[tempIndex++] = Tz_y2;
-        jacobianMatrices[tempIndex] = Tz_z2;
+        unsigned index = tid * 9;
+        jacobianMatrices[index++] = tx2.x;
+        jacobianMatrices[index++] = tx2.y;
+        jacobianMatrices[index++] = tx2.z;
+        jacobianMatrices[index++] = ty2.x;
+        jacobianMatrices[index++] = ty2.y;
+        jacobianMatrices[index++] = ty2.z;
+        jacobianMatrices[index++] = tz2.x;
+        jacobianMatrices[index++] = tz2.y;
+        jacobianMatrices[index] = tz2.z;
 
         // The Jacobian determinant is computed and stored
-        jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2
-            + Tx_y2 * Ty_z2 * Tz_x2
-            + Tx_z2 * Ty_x2 * Tz_y2
-            - Tx_x2 * Ty_z2 * Tz_y2
-            - Tx_y2 * Ty_x2 * Tz_z2
-            - Tx_z2 * Ty_y2 * Tz_x2;
+        jacobianDet[tid] = tx2.x * ty2.y * tz2.z
+            + tx2.y * ty2.z * tz2.x
+            + tx2.z * ty2.x * tz2.y
+            - tx2.x * ty2.z * tz2.y
+            - tx2.y * ty2.x * tz2.z
+            - tx2.z * ty2.y * tz2.x;
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_logSquaredValues_kernel(float *det) {
+__global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
+    if (tid < voxelNumber) {
         const float val = logf(det[tid]);
         det[tid] = val * val;
     }
 }
 /* *************************************************************** */
-__device__ void getJacobianGradientValues2D(float *jacobianMatrix,
+__device__ void GetJacobianGradientValues2D(float *jacobianMatrix,
                                             float detJac,
                                             float basisX,
                                             float basisY,
@@ -1095,7 +1040,7 @@ __device__ void getJacobianGradientValues2D(float *jacobianMatrix,
     jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]);
 }
 /* *************************************************************** */
-__device__ void getJacobianGradientValues3D(float *jacobianMatrix,
+__device__ void GetJacobianGradientValues3D(float *jacobianMatrix,
                                             float detJac,
                                             float basisX,
                                             float basisY,
@@ -1117,7 +1062,13 @@ __device__ void getJacobianGradientValues3D(float *jacobianMatrix,
         basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3]));
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) {
+__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient,
+                                                             cudaTextureObject_t jacobianDeterminantTexture,
+                                                             cudaTextureObject_t jacobianMatricesTexture,
+                                                             const int3 controlPointImageDim,
+                                                             const unsigned controlPointNumber,
+                                                             const mat33 reorientation,
+                                                             const float3 weight) {
     __shared__ float xbasis[9];
     __shared__ float ybasis[9];
 
@@ -1126,47 +1077,49 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) {
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int y = tempIndex / (gridSize.x);
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float2 jacobianGradient{};
-        tempIndex = 8;
+        unsigned index = 8;
         for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-            if (pixelY > 0 && pixelY < gridSize.y - 1) {
-                int jacIndex = pixelY * gridSize.x + x - 1;
+            if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
+                int jacIndex = pixelY * controlPointImageDim.x + x - 1;
                 for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) {
-                    if (pixelX > 0 && pixelX < gridSize.x - 1) {
-                        float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
-
+                    if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
+                        float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                         if (detJac > 0.f) {
                             detJac = 2.f * logf(detJac) / detJac;
                             float jacobianMatrix[4];
-                            jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4);
-                            jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1);
-                            jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2);
-                            jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3);
-
-                            getJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[tempIndex], ybasis[tempIndex], &jacobianGradient);
+                            jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4);
+                            jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 1);
+                            jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 2);
+                            jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 3);
+                            GetJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient);
                         }
                     }
                     jacIndex++;
-                    tempIndex--;
+                    index--;
                 }
-            } else tempIndex -= 3;
+            } else index -= 3;
         }
 
         gradient[tid] = gradient[tid] + make_float4(
-            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y),
-            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y),
+            weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y),
+            weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y),
             0.f, 0.f);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) {
+__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient,
+                                                             cudaTextureObject_t jacobianDeterminantTexture,
+                                                             cudaTextureObject_t jacobianMatricesTexture,
+                                                             const int3 controlPointImageDim,
+                                                             const unsigned controlPointNumber,
+                                                             const mat33 reorientation,
+                                                             const float3 weight) {
     __shared__ float xbasis[27];
     __shared__ float ybasis[27];
     __shared__ float zbasis[27];
@@ -1176,175 +1129,171 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) {
     __syncthreads();
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float3 jacobianGradient{};
-        tempIndex = 26;
+        unsigned index = 26;
         for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
-            if (pixelZ > 0 && pixelZ < gridSize.z - 1) {
+            if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) {
                 for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-                    if (pixelY > 0 && pixelY < gridSize.y - 1) {
-                        int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + x - 1;
+                    if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
+                        int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + x - 1;
                         for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
-                            if (pixelX > 0 && pixelX < gridSize.x - 1) {
-                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                            if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
+                                float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac > 0.f) {
                                     detJac = 2.f * logf(detJac) / detJac;
                                     float jacobianMatrix[9];
-                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9);
-                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 1);
-                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 2);
-                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 3);
-                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 4);
-                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 5);
-                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6);
-                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7);
-                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8);
-                                    getJacobianGradientValues3D(jacobianMatrix,
-                                                                detJac,
-                                                                xbasis[tempIndex],
-                                                                ybasis[tempIndex],
-                                                                zbasis[tempIndex],
-                                                                &jacobianGradient);
+                                    jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9);
+                                    jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 1);
+                                    jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 2);
+                                    jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 3);
+                                    jacobianMatrix[4] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 4);
+                                    jacobianMatrix[5] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 5);
+                                    jacobianMatrix[6] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 6);
+                                    jacobianMatrix[7] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 7);
+                                    jacobianMatrix[8] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 8);
+                                    GetJacobianGradientValues3D(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient);
                                 }
                             }
                             jacIndex++;
-                            tempIndex--;
+                            index--;
                         }
-                    } else tempIndex -= 3;
+                    } else index -= 3;
                 }
-            } else tempIndex -= 9;
+            } else index -= 9;
         }
 
         gradient[tid] = gradient[tid] + make_float4(
-            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z),
-            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z),
-            c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z),
+            weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y + reorientation.m[0][2] * jacobianGradient.z),
+            weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y + reorientation.m[1][2] * jacobianGradient.z),
+            weight.z * (reorientation.m[2][0] * jacobianGradient.x + reorientation.m[2][1] * jacobianGradient.y + reorientation.m[2][2] * jacobianGradient.z),
             0.f);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient) {
+__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient,
+                                                       cudaTextureObject_t jacobianDeterminantTexture,
+                                                       cudaTextureObject_t jacobianMatricesTexture,
+                                                       const int3 controlPointImageDim,
+                                                       const float3 controlPointVoxelSpacing,
+                                                       const unsigned controlPointNumber,
+                                                       const int3 referenceImageDim,
+                                                       const mat33 reorientation,
+                                                       const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float2 jacobianGradient{};
-        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
-
-        for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
-            if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) {
-                const int yPre = (int)((float)pixelY / spacingVoxel.y);
-                float basis = (float)pixelY / spacingVoxel.y - (float)yPre;
+        for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+            if (-1 < pixelY && pixelY < referenceImageDim.y) {
+                const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
+                float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                 float yBasis, yFirst;
                 GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
-                    if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
-                        const int xPre = (int)((float)pixelX / spacingVoxel.x);
-                        basis = (float)pixelX / spacingVoxel.x - (float)xPre;
+                for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                    if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                        const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
+                        basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
                         float xBasis, xFirst;
                         GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
 
-                        int jacIndex = pixelY * c_ReferenceImageDim.x + pixelX;
-                        float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                        int jacIndex = pixelY * referenceImageDim.x + pixelX;
+                        float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
 
                         if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
                             detJac = 2.f * logf(detJac) / detJac;
                             float jacobianMatrix[4];
                             jacIndex *= 4;
-                            jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                            jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                            jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                            jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+                            jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                            jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex);
                             const float2 basisValues = { xFirst * yBasis, xBasis * yFirst };
-                            getJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient);
+                            GetJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient);
                         }
                     }
                 }
             }
         }
         gradient[tid] = gradient[tid] + make_float4(
-            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y),
-            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y),
+            weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y),
+            weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y),
             0.f, 0.f);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) {
+__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient,
+                                                       cudaTextureObject_t jacobianDeterminantTexture,
+                                                       cudaTextureObject_t jacobianMatricesTexture,
+                                                       const int3 controlPointImageDim,
+                                                       const float3 controlPointVoxelSpacing,
+                                                       const unsigned controlPointNumber,
+                                                       const int3 referenceImageDim,
+                                                       const mat33 reorientation,
+                                                       const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float3 jacobianGradient{};
-        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
-
-        for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ <= (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) {
-            if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) {
-                const int zPre = (int)((float)pixelZ / spacingVoxel.z);
-                float basis = (float)pixelZ / spacingVoxel.z - (float)zPre;
+        for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ <= (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+            if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
+                const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z);
+                float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre;
                 float zBasis, zFirst;
                 GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst);
 
-                for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
-                    if (pixelY > -1 && pixelY < c_ReferenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) {
-                        const int yPre = (int)((float)pixelY / spacingVoxel.y);
-                        basis = (float)pixelY / spacingVoxel.y - (float)yPre;
+                for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                    if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) {
+                        const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
+                        basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                         float yBasis, yFirst;
                         GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                        for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
-                            if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
-                                const int xPre = (int)((float)pixelX / spacingVoxel.x);
-                                basis = (float)pixelX / spacingVoxel.x - (float)xPre;
+                        for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                            if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                                const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
+                                basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
                                 float xBasis, xFirst;
                                 GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
 
-                                int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX;
-                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                                int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX;
+                                float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
 
                                 if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
                                     detJac = 2.f * logf(detJac) / detJac;
                                     float jacobianMatrix[9];
                                     jacIndex *= 9;
-                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+                                    jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex);
 
                                     const float3 basisValues = {
                                         xFirst * yBasis * zBasis,
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    getJacobianGradientValues3D(jacobianMatrix,
-                                                                detJac,
-                                                                basisValues.x,
-                                                                basisValues.y,
-                                                                basisValues.z,
-                                                                &jacobianGradient);
+                                    GetJacobianGradientValues3D(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient);
                                 }
                             }
                         }
@@ -1353,45 +1302,49 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) {
             }
         }
         gradient[tid] = gradient[tid] + make_float4(
-            c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z),
-            c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z),
-            c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z),
+            weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y + reorientation.m[0][2] * jacobianGradient.z),
+            weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y + reorientation.m[1][2] * jacobianGradient.z),
+            weight.z * (reorientation.m[2][0] * jacobianGradient.x + reorientation.m[2][1] * jacobianGradient.y + reorientation.m[2][2] * jacobianGradient.z),
             0.f);
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d) {
+__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid,
+                                                         cudaTextureObject_t jacobianDeterminantTexture,
+                                                         cudaTextureObject_t jacobianMatricesTexture,
+                                                         const int3 controlPointImageDim,
+                                                         const float3 controlPointSpacing,
+                                                         const unsigned controlPointNumber,
+                                                         const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         float3 foldingCorrection{};
         for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
-            if (pixelZ > 0 && pixelZ < gridSize.z - 1) {
+            if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) {
                 for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-                    if (pixelY > 0 && pixelY < gridSize.y - 1) {
+                    if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
                         for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
-                            if (pixelX > 0 && pixelX < gridSize.x - 1) {
-                                int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + pixelX;
-                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+                            if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
+                                int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + pixelX;
+                                float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac <= 0.f) {
                                     float jacobianMatrix[9];
                                     jacIndex *= 9;
-                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+                                    jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex);
 
                                     float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
                                     GetBSplineBasisValue(0.f, x - pixelX + 1, &xBasis, &xFirst);
@@ -1403,12 +1356,7 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    getJacobianGradientValues3D(jacobianMatrix,
-                                                                1.f,
-                                                                basisValue.x,
-                                                                basisValue.y,
-                                                                basisValue.z,
-                                                                &foldingCorrection);
+                                    GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
                                 }
                             }
                         }
@@ -1418,63 +1366,66 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri
         }
         if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) {
             const float3 gradient = {
-                c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z,
-                c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z,
-                c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z
+                reorientation.m[0][0] * foldingCorrection.x + reorientation.m[0][1] * foldingCorrection.y + reorientation.m[0][2] * foldingCorrection.z,
+                reorientation.m[1][0] * foldingCorrection.x + reorientation.m[1][1] * foldingCorrection.y + reorientation.m[1][2] * foldingCorrection.z,
+                reorientation.m[2][0] * foldingCorrection.x + reorientation.m[2][1] * foldingCorrection.y + reorientation.m[2][2] * foldingCorrection.z
             };
             const float norm = 5 * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z);
-            controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm,
-                                                                            gradient.y * c_ControlPointSpacing.y / norm,
-                                                                            gradient.z * c_ControlPointSpacing.z / norm,
-                                                                            0.f);
+            controlPointGrid[tid] = controlPointGrid[tid] + make_float4(gradient.x * controlPointSpacing.x / norm,
+                                                                        gradient.y * controlPointSpacing.y / norm,
+                                                                        gradient.z * controlPointSpacing.z / norm, 0.f);
         }
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) {
+__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
+                                                   cudaTextureObject_t jacobianDeterminantTexture,
+                                                   cudaTextureObject_t jacobianMatricesTexture,
+                                                   const int3 controlPointImageDim,
+                                                   const float3 controlPointSpacing,
+                                                   const float3 controlPointVoxelSpacing,
+                                                   const unsigned controlPointNumber,
+                                                   const int3 referenceImageDim,
+                                                   const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ControlPointNumber) {
-        const int3 gridSize = c_ControlPointImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (gridSize.x * gridSize.y);
-        tempIndex -= z * gridSize.x * gridSize.y;
-        const int y = tempIndex / gridSize.x;
-        const int x = tempIndex - y * gridSize.x;
+    if (tid < controlPointNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
-        const float3 spacingVoxel = c_ControlPointVoxelSpacing;
         float3 foldingCorrection{};
-
-        for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ < (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) {
-            if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) {
-                for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY < (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) {
-                    if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) {
-                        for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX < (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) {
-                            if (pixelX > -1 && pixelX < c_ReferenceImageDim.x) {
-                                int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX;
-                                float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex);
+        for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ < (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+            if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
+                for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY < (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                    if (-1 < pixelY && pixelY < referenceImageDim.y) {
+                        for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX < (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                            if (-1 < pixelX && pixelX < referenceImageDim.x) {
+                                int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX;
+                                float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac <= 0.f) {
                                     float jacobianMatrix[9];
                                     jacIndex *= 9;
-                                    jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++);
-                                    jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex);
+                                    jacobianMatrix[0] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[4] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[5] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[6] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[7] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
+                                    jacobianMatrix[8] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex);
 
                                     float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst;
-                                    int pre = (int)((float)pixelX / spacingVoxel.x);
-                                    float basis = (float)pixelX / spacingVoxel.x - (float)pre;
+                                    int pre = (int)((float)pixelX / controlPointVoxelSpacing.x);
+                                    float basis = (float)pixelX / controlPointVoxelSpacing.x - (float)pre;
                                     GetBSplineBasisValue(basis, x - pre, &xBasis, &xFirst);
-                                    pre = (int)((float)pixelY / spacingVoxel.y);
-                                    basis = (float)pixelY / spacingVoxel.y - (float)pre;
+                                    pre = (int)((float)pixelY / controlPointVoxelSpacing.y);
+                                    basis = (float)pixelY / controlPointVoxelSpacing.y - (float)pre;
                                     GetBSplineBasisValue(basis, y - pre, &yBasis, &yFirst);
-                                    pre = (int)((float)pixelZ / spacingVoxel.z);
-                                    basis = (float)pixelZ / spacingVoxel.z - (float)pre;
+                                    pre = (int)((float)pixelZ / controlPointVoxelSpacing.z);
+                                    basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)pre;
                                     GetBSplineBasisValue(basis, z - pre, &zBasis, &zFirst);
 
                                     const float3 basisValue = {
@@ -1482,12 +1433,7 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) {
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    getJacobianGradientValues3D(jacobianMatrix,
-                                                                1.f,
-                                                                basisValue.x,
-                                                                basisValue.y,
-                                                                basisValue.z,
-                                                                &foldingCorrection);
+                                    GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
                                 }
                             }
                         }
@@ -1497,161 +1443,149 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) {
         }
         if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) {
             const float3 gradient = {
-                c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z,
-                c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z,
-                c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z
+                reorientation.m[0][0] * foldingCorrection.x + reorientation.m[0][1] * foldingCorrection.y + reorientation.m[0][2] * foldingCorrection.z,
+                reorientation.m[1][0] * foldingCorrection.x + reorientation.m[1][1] * foldingCorrection.y + reorientation.m[1][2] * foldingCorrection.z,
+                reorientation.m[2][0] * foldingCorrection.x + reorientation.m[2][1] * foldingCorrection.y + reorientation.m[2][2] * foldingCorrection.z
             };
             const float norm = 5.f * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z);
-            controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm,
-                                                                            gradient.y * c_ControlPointSpacing.y / norm,
-                                                                            gradient.z * c_ControlPointSpacing.z / norm,
-                                                                            0.f);
+            controlPointGrid[tid] = controlPointGrid[tid] + make_float4(gradient.x * controlPointSpacing.x / norm,
+                                                                        gradient.y * controlPointSpacing.y / norm,
+                                                                        gradient.z * controlPointSpacing.z / norm, 0.f);
         }
     }
 }
 /* *************************************************************** */
-__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (imageSize.x * imageSize.y);
-        tempIndex -= z * imageSize.x * imageSize.y;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
-
-        const float4 initialPosition = {
-            x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
-            x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
-            x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
-            0.f
-        };
-
-        imageArray_d[tid] = imageArray_d[tid] + initialPosition;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d) {
+__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *image,
+                                                            const int3 imageDim,
+                                                            const unsigned voxelNumber,
+                                                            const mat44 affineMatrix,
+                                                            const bool reverse = false) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (imageSize.x * imageSize.y);
-        tempIndex -= z * imageSize.x * imageSize.y;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, imageDim.x * imageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, imageDim.x, quot, rem);
+        const int y = quot, x = rem;
 
         const float4 initialPosition = {
-            x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
-            x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
-            x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
+            x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2] + affineMatrix.m[0][3],
+            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2] + affineMatrix.m[1][3],
+            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2] + affineMatrix.m[2][3],
             0.f
         };
 
-        imageArray_d[tid] = imageArray_d[tid] - initialPosition;
+        // If reverse, gets displacement from deformation
+        image[tid] = image[tid] + (reverse ? -1 : 1) * initialPosition;
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose2D_kernel(float4 *outDef) {
+__global__ void reg_defField_compose2D_kernel(float4 *deformationField,
+                                              cudaTextureObject_t deformationFieldTexture,
+                                              const int3 referenceImageDim,
+                                              const unsigned voxelNumber,
+                                              const mat44 affineMatrixB,
+                                              const mat44 affineMatrixC) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
+    if (tid < voxelNumber) {
         // Extract the original voxel position
-        float4 position = outDef[tid];
+        float4 position = deformationField[tid];
 
         // Conversion from real position to voxel coordinate
         float4 voxelPosition = {
-            position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + c_AffineMatrix0b.w,
-            position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + c_AffineMatrix1b.w,
+            position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3],
+            position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3],
             0.f,
             0.f
         };
 
         // Linear interpolation
         const int2 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y) };
-
         float relX[2], relY[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
 
         position = make_float4(0.f, 0.f, 0.f, 0.f);
-
-        for (int b = 0; b < 2; ++b) {
-            for (int a = 0; a < 2; ++a) {
-                const int index = (ante.y + b) * c_ReferenceImageDim.x + ante.x + a;
+        for (short b = 0; b < 2; ++b) {
+            for (short a = 0; a < 2; ++a) {
                 float4 deformation;
-                if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x &&
-                    -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y) {
-                    deformation = tex1Dfetch(voxelDeformationTexture, index);
+                if (-1 < ante.x + a && ante.x + a < referenceImageDim.x &&
+                    -1 < ante.y + b && ante.y + b < referenceImageDim.y) {
+                    const int index = (ante.y + b) * referenceImageDim.x + ante.x + a;
+                    deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
                 } else {
-                    deformation = GetSlidedValues(ante.x + a, ante.y + b);
+                    deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDim, affineMatrixC);
                 }
                 const float basis = relX[a] * relY[b];
                 position = position + basis * deformation;
             }
         }
-        outDef[tid] = position;
+        deformationField[tid] = position;
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose3D_kernel(float4 *outDef) {
+__global__ void reg_defField_compose3D_kernel(float4 *deformationField,
+                                              cudaTextureObject_t deformationFieldTexture,
+                                              const int3 referenceImageDim,
+                                              const unsigned voxelNumber,
+                                              const mat44 affineMatrixB,
+                                              const mat44 affineMatrixC) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
+    if (tid < voxelNumber) {
         // Extract the original voxel position
-        float4 position = outDef[tid];
+        float4 position = deformationField[tid];
 
         // Conversion from real position to voxel coordinate
         const float4 voxelPosition = {
-            position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + position.z * c_AffineMatrix0b.z + c_AffineMatrix0b.w,
-            position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + position.z * c_AffineMatrix1b.z + c_AffineMatrix1b.w,
-            position.x * c_AffineMatrix2b.x + position.y * c_AffineMatrix2b.y + position.z * c_AffineMatrix2b.z + c_AffineMatrix2b.w,
+            position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3],
+            position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3],
+            position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3],
             0.f
         };
 
         // Linear interpolation
         const int3 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y), (int)floorf(voxelPosition.z) };
-
         float relX[2], relY[2], relZ[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
         relZ[1] = voxelPosition.z - (float)ante.z; relZ[0] = 1.f - relZ[1];
 
         position = make_float4(0.f, 0.f, 0.f, 0.f);
-
-        for (int c = 0; c < 2; ++c) {
-            for (int b = 0; b < 2; ++b) {
-                for (int a = 0; a < 2; ++a) {
-                    const int index = ((ante.z + c) * c_ReferenceImageDim.y + ante.y + b) * c_ReferenceImageDim.x + ante.x + a;
+        for (short c = 0; c < 2; ++c) {
+            for (short b = 0; b < 2; ++b) {
+                for (short a = 0; a < 2; ++a) {
                     float4 deformation;
-                    if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x &&
-                        -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y &&
-                        -1 < ante.z + c && ante.z + c < c_ReferenceImageDim.z) {
-                        deformation = tex1Dfetch(voxelDeformationTexture, index);
+                    if (-1 < ante.x + a && ante.x + a < referenceImageDim.x &&
+                        -1 < ante.y + b && ante.y + b < referenceImageDim.y &&
+                        -1 < ante.z + c && ante.z + c < referenceImageDim.z) {
+                        const int index = ((ante.z + c) * referenceImageDim.y + ante.y + b) * referenceImageDim.x + ante.x + a;
+                        deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
                     } else {
-                        deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c);
+                        deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDim, affineMatrixC);
                     }
                     const float basis = relX[a] * relY[b] * relZ[c];
                     position = position + basis * deformation;
                 }
             }
         }
-        outDef[tid] = position;
+        deformationField[tid] = position;
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices) {
+__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices,
+                                                        cudaTextureObject_t deformationFieldTexture,
+                                                        const int3 referenceImageDim,
+                                                        const unsigned voxelNumber,
+                                                        const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        const int3 imageSize = c_ReferenceImageDim;
-
-        int tempIndex = tid;
-        const int z = tempIndex / (imageSize.x * imageSize.y);
-        tempIndex -= z * imageSize.x * imageSize.y;
-        const int y = tempIndex / imageSize.x;
-        const int x = tempIndex - y * imageSize.x;
-
-        if (x == imageSize.x - 1 || y == imageSize.y - 1 || z == imageSize.z - 1) {
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        if (x == referenceImageDim.x - 1 || y == referenceImageDim.y - 1 || z == referenceImageDim.z - 1) {
             int index = tid * 9;
             jacobianMatrices[index++] = 1;
             jacobianMatrices[index++] = 0;
@@ -1665,38 +1599,38 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices)
             return;
         }
 
-        int index = (z * imageSize.y + y) * imageSize.x + x;
-        float4 deformation = tex1Dfetch(voxelDeformationTexture, index);
+        int index = (z * referenceImageDim.y + y) * referenceImageDim.x + x;
+        float4 deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
         float matrix[9] = {
             -deformation.x, -deformation.x, -deformation.x,
             -deformation.y, -deformation.y, -deformation.y,
             -deformation.z, -deformation.z, -deformation.z
         };
-        deformation = tex1Dfetch(voxelDeformationTexture, index + 1);
+        deformation = tex1Dfetch<float4>(deformationFieldTexture, index + 1);
         matrix[0] += deformation.x;
         matrix[3] += deformation.y;
         matrix[6] += deformation.z;
-        index = (z * imageSize.y + y + 1) * imageSize.x + x;
-        deformation = tex1Dfetch(voxelDeformationTexture, index);
+        index = (z * referenceImageDim.y + y + 1) * referenceImageDim.x + x;
+        deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
         matrix[1] += deformation.x;
         matrix[4] += deformation.y;
         matrix[7] += deformation.z;
-        index = ((z + 1) * imageSize.y + y) * imageSize.x + x;
-        deformation = tex1Dfetch(voxelDeformationTexture, index);
+        index = ((z + 1) * referenceImageDim.y + y) * referenceImageDim.x + x;
+        deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
         matrix[2] += deformation.x;
         matrix[5] += deformation.y;
         matrix[8] += deformation.z;
 
         index = tid * 9;
-        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[0] + c_AffineMatrix0.y * matrix[3] + c_AffineMatrix0.z * matrix[6];
-        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[1] + c_AffineMatrix0.y * matrix[4] + c_AffineMatrix0.z * matrix[7];
-        jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[2] + c_AffineMatrix0.y * matrix[5] + c_AffineMatrix0.z * matrix[8];
-        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[0] + c_AffineMatrix1.y * matrix[3] + c_AffineMatrix1.z * matrix[6];
-        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[1] + c_AffineMatrix1.y * matrix[4] + c_AffineMatrix1.z * matrix[7];
-        jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[2] + c_AffineMatrix1.y * matrix[5] + c_AffineMatrix1.z * matrix[8];
-        jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[0] + c_AffineMatrix2.y * matrix[3] + c_AffineMatrix2.z * matrix[6];
-        jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[1] + c_AffineMatrix2.y * matrix[4] + c_AffineMatrix2.z * matrix[7];
-        jacobianMatrices[index] = c_AffineMatrix2.x * matrix[2] + c_AffineMatrix2.y * matrix[5] + c_AffineMatrix2.z * matrix[8];
+        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[0] + reorientation.m[0][1] * matrix[3] + reorientation.m[0][2] * matrix[6];
+        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[1] + reorientation.m[0][1] * matrix[4] + reorientation.m[0][2] * matrix[7];
+        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[2] + reorientation.m[0][1] * matrix[5] + reorientation.m[0][2] * matrix[8];
+        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[0] + reorientation.m[1][1] * matrix[3] + reorientation.m[1][2] * matrix[6];
+        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[1] + reorientation.m[1][1] * matrix[4] + reorientation.m[1][2] * matrix[7];
+        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[2] + reorientation.m[1][1] * matrix[5] + reorientation.m[1][2] * matrix[8];
+        jacobianMatrices[index++] = reorientation.m[2][0] * matrix[0] + reorientation.m[2][1] * matrix[3] + reorientation.m[2][2] * matrix[6];
+        jacobianMatrices[index++] = reorientation.m[2][0] * matrix[1] + reorientation.m[2][1] * matrix[4] + reorientation.m[2][2] * matrix[7];
+        jacobianMatrices[index] = reorientation.m[2][0] * matrix[2] + reorientation.m[2][1] * matrix[5] + reorientation.m[2][2] * matrix[8];
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index fc82d88f..c49df391 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -25,7 +25,7 @@ class reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
@@ -41,7 +41,7 @@ class reg_measure_gpu {
     cudaArray *referenceDevicePointer;
     cudaArray *floatingDevicePointer;
     int *referenceMaskDevicePointer;
-    int activeVoxelNumber;
+    size_t activeVoxelNumber;
     float *warpedFloatingDevicePointer;
     float4 *warpedFloatingGradientDevicePointer;
     float4 *forwardVoxelBasedGradientDevicePointer;
@@ -61,7 +61,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
@@ -92,7 +92,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
@@ -123,7 +123,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index b907d8bd..9aebb418 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -14,25 +14,22 @@
 #include "_reg_nmi_gpu.h"
 #include "_reg_nmi_kernels.cu"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
     this->forwardJointHistogramLog_device = nullptr;
     //	this->backwardJointHistogramLog_device=nullptr;
-
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 reg_nmi_gpu::~reg_nmi_gpu() {
     this->DeallocateHistogram();
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_nmi_gpu::DeallocateHistogram() {
     if (this->forwardJointHistogramLog_device != nullptr) {
         cudaFree(this->forwardJointHistogramLog_device);
@@ -42,12 +39,11 @@ void reg_nmi_gpu::DeallocateHistogram() {
     printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     nifti_image *floImgPtr,
                                     int *maskRefPtr,
-                                    int activeVoxNum,
+                                    size_t activeVoxNum,
                                     nifti_image *warFloImgPtr,
                                     nifti_image *warFloGraPtr,
                                     nifti_image *forVoxBasedGraPtr,
@@ -74,14 +70,14 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
     // Check if the input images have multiple timepoints
     if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] This class can only be \n");
+        fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n");
         reg_exit();
     }
     // Check that the input image are of type float
     if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 ||
         this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] This class can only be \n");
+        fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n");
         reg_exit();
     }
     // Bind the required pointers
@@ -95,12 +91,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
     // The reference and floating images have to be updated on the device
     if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceDevicePointer, this->referenceImagePointer)) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        printf("[NiftyReg ERROR] Error when transfering the reference image.\n");
+        printf("[NiftyReg ERROR] Error when transferring the reference image.\n");
         reg_exit();
     }
     if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingDevicePointer, this->floatingImagePointer)) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        printf("[NiftyReg ERROR] Error when transfering the floating image.\n");
+        printf("[NiftyReg ERROR] Error when transferring the floating image.\n");
         reg_exit();
     }
     // Allocate the required joint histogram on the GPU
@@ -110,8 +106,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
     printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValue() {
     // The NMI computation is performed into the host for now
     // The relevant images have to be transferred from the device to the host
@@ -132,82 +127,68 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() {
                            this->forwardEntropyValues,
                            this->referenceMaskPointer);
 
-    double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2];
+    const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2];
 
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n");
 #endif
     return nmi_value;
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /// Called when we only have one target and one source image
 void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
-                                      const cudaArray *referenceImageArray_d,
-                                      const float *warpedImageArray_d,
-                                      const float4 *warpedGradientArray_d,
-                                      const float *logJointHistogram_d,
-                                      float4 *voxelNMIGradientArray_d,
-                                      const int *mask_d,
-                                      const int activeVoxelNumber,
+                                      const cudaArray *referenceImageCuda,
+                                      const float *warpedImageCuda,
+                                      const float4 *warpedGradientCuda,
+                                      const float *logJointHistogramCuda,
+                                      float4 *voxelBasedGradientCuda,
+                                      const int *maskCuda,
+                                      const size_t& activeVoxelNumber,
                                       const double *entropies,
-                                      const int refBinning,
-                                      const int floBinning) {
+                                      const int& refBinning,
+                                      const int& floBinning) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int binNumber = refBinning * floBinning + refBinning + floBinning;
     const float normalisedJE = (float)(entropies[2] * entropies[3]);
-    const float NMI = (float)((entropies[0] + entropies[1]) / entropies[2]);
-
-    // Bind Symbols
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin, &refBinning, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin, &floBinning, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE, &normalisedJE, sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI, &NMI, sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
+    const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]);
 
-    // Texture binding floating
-    //Bind target image array to a 3D texture
-    firstreferenceImageTexture.normalized = true;
-    firstreferenceImageTexture.filterMode = cudaFilterModeLinear;
-    firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap;
-    firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap;
-    firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap;
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4)));
+    auto referenceImageTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                                cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedImageTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                             cudaChannelFormatKindFloat, 1);
+    auto warpedGradientTexture = cudaCommon_createTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+                                                                cudaChannelFormatKindFloat, 4);
+    auto histogramTexture = cudaCommon_createTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float),
+                                                           cudaChannelFormatKindFloat, 1);
+    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                      cudaChannelFormatKindSigned, 1);
+    NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4)));
 
     if (referenceImage->nz > 1) {
-        const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW3D =
-            (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW3D));
-        dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1);
-        dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1);
-        reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
+                                                                              *warpedGradientTexture, *histogramTexture, *maskTexture,
+                                                                              imageSize, refBinning, floBinning, normalisedJE, nmi,
+                                                                              (unsigned)activeVoxelNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW2D =
-            (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW2D));
-        dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1);
-        dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1);
-        reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
+                                                                              *warpedGradientTexture, *histogramTexture, *maskTexture,
+                                                                              imageSize, refBinning, floBinning, normalisedJE, nmi,
+                                                                              (unsigned)activeVoxelNumber);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(histogramTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     // The latest joint histogram is transferred onto the GPU
     float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float));
@@ -235,5 +216,4 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
     printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
 #endif
 }
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index dc6ccbe7..46aa61f0 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -15,8 +15,7 @@
 #include "_reg_nmi.h"
 #include "_reg_measure_gpu.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /// @brief NMI measure of similarity class - GPU based
 class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 public:
@@ -29,7 +28,7 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
@@ -50,15 +49,14 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     // float **backwardJointHistogramLog_device;
     void DeallocateHistogram();
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
     void InitialiseMeasure(nifti_image *refImgPtr,
                            nifti_image *floImgPtr,
                            int *maskRefPtr,
-                           int activeVoxNum,
+                           size_t activeVoxNum,
                            nifti_image *warFloImgPtr,
                            nifti_image *warFloGraPtr,
                            nifti_image *forVoxBasedGraPtr,
@@ -78,17 +76,4 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-extern "C++"
-void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
-                                      const cudaArray *referenceImageArray_d,
-                                      const float *warpedImageArray_d,
-                                      const float4 *resultGradientArray_d,
-                                      const float *logJointHistogram_d,
-                                      float4 *voxelNMIGradientArray_d,
-                                      const int *targetMask_d,
-                                      const int activeVoxelNumber,
-                                      const double *entropies,
-                                      const int refBinning,
-                                      const int floBinning);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu
index d7108bb2..9218537c 100755
--- a/reg-lib/cuda/_reg_nmi_kernels.cu
+++ b/reg-lib/cuda/_reg_nmi_kernels.cu
@@ -10,280 +10,230 @@
  *
  */
 
-#include <stdio.h>
+#include "_reg_common_cuda_kernels.cu"
 
 #define COEFF_L 0.16666666f
 #define COEFF_C 0.66666666f
 #define COEFF_B 0.83333333f
 
-__device__ __constant__ int c_VoxelNumber;
-__device__ __constant__ int3 c_ImageSize;
-
-// Bins: Need 4 values for max 4 channels.
-__device__ __constant__ int c_firstTargetBin;
-__device__ __constant__ int c_secondTargetBin;
-__device__ __constant__ int c_firstResultBin;
-__device__ __constant__ int c_secondResultBin;
-
-__device__ __constant__ float c_NormalisedJE;
-__device__ __constant__ float c_NMI;
-__device__ __constant__ int c_ActiveVoxelNumber;
-
-texture<float, 3, cudaReadModeElementType> firstreferenceImageTexture;
-texture<float, 1, cudaReadModeElementType> firstwarpedImageTexture;
-texture<float4, 1, cudaReadModeElementType> firstwarpedImageGradientTexture;
-texture<float, 1, cudaReadModeElementType> histogramTexture;
-texture<float4, 1, cudaReadModeElementType> gradientImageTexture;
-texture<int, 1, cudaReadModeElementType> maskTexture;
-
-/// Added for the multichannel stuff. We currently only support 2 target and 2 source channels.
-/// So we need another texture for the second target and source channel respectively.
-texture<float, 3, cudaReadModeElementType> secondreferenceImageTexture;
-texture<float, 1, cudaReadModeElementType> secondwarpedImageTexture;
-texture<float4, 1, cudaReadModeElementType> secondwarpedImageGradientTexture;
-
 /* *************************************************************** */
-__device__ float GetBasisSplineValue(float x)
-{
-    x=fabsf(x);
-    float value=0.0f;
-    if(x<2.0f)
-        if(x<1.0f)
-            value = 2.0f/3.0f + (0.5f*x-1.0f)*x*x;
-        else{
-            x-=2.0f;
-            value = -x*x*x/6.0f;
-    }
+__device__ float GetBasisSplineValue(float x) {
+    x = fabsf(x);
+    float value = 0.0f;
+    if (x < 2.0f)
+        if (x < 1.0f)
+            value = 2.0f / 3.0f + (0.5f * x - 1.0f) * x * x;
+        else {
+            x -= 2.0f;
+            value = -x * x * x / 6.0f;
+        }
     return value;
 }
 /* *************************************************************** */
-__device__ float GetBasisSplineDerivativeValue(float ori)
-{
-    float x=fabsf(ori);
-    float value=0.0f;
-    if(x<2.0f)
-        if(x<1.0f)
-            value = (1.5f*x-2.0f)*ori;
-        else{
-            x-=2.0f;
+__device__ float GetBasisSplineDerivativeValue(const float& ori) {
+    float x = fabsf(ori);
+    float value = 0.0f;
+    if (x < 2.0f)
+        if (x < 1.0f)
+            value = (1.5f * x - 2.0f) * ori;
+        else {
+            x -= 2.0f;
             value = -0.5f * x * x;
-			if(ori<0.0f) value =-value;
-    }
+            if (ori < 0.0f) value = -value;
+        }
     return value;
 }
 /* *************************************************************** */
-__global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelNMIGradientArray_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ActiveVoxelNumber){
-
-		const int targetIndex = tex1Dfetch(maskTexture,tid);
-		int tempIndex=targetIndex;
-		const int y = tempIndex/c_ImageSize.x;
-		const int x = tempIndex - y*c_ImageSize.x;
-
-		float referenceImageValue = tex3D(firstreferenceImageTexture,
-									   ((float)x+0.5f)/(float)c_ImageSize.x,
-									   ((float)y+0.5f)/(float)c_ImageSize.y,
-									   0.5f);
-		float warpedImageValue = tex1Dfetch(firstwarpedImageTexture,targetIndex);
-		float4 warpedImageGradient = tex1Dfetch(firstwarpedImageGradientTexture,tid);
-
-		float4 gradValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		// No computation is performed if any of the point is part of the background
-		// The two is added because the image is resample between 2 and bin +2
-		// if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
-		if( referenceImageValue>0.0f &&
-			warpedImageValue>0.0f &&
-			referenceImageValue<c_firstTargetBin &&
-			warpedImageValue<c_firstResultBin &&
-			referenceImageValue==referenceImageValue &&
-			warpedImageValue==warpedImageValue){
-
-//			referenceImageValue = floor(referenceImageValue);
-//			warpedImageValue = floor(warpedImageValue);
-
-			float2 resDeriv = make_float2(
-				warpedImageGradient.x,
-				warpedImageGradient.y);
-
-			if(resDeriv.x==resDeriv.x &&
-			   resDeriv.y==resDeriv.y){
-
-				float jointEntropyDerivative_X = 0.0f;
-				float warpedEntropyDerivative_X = 0.0f;
-				float referenceEntropyDerivative_X = 0.0f;
-
-				float jointEntropyDerivative_Y = 0.0f;
-				float warpedEntropyDerivative_Y = 0.0f;
-				float referenceEntropyDerivative_Y = 0.0f;
-
-				for(int r=static_cast<int>(referenceImageValue)-1; r<static_cast<int>(referenceImageValue)+3; ++r){
-					if(-1<r && r<c_firstTargetBin){
-						for(int w=static_cast<int>(warpedImageValue)-1; w<static_cast<int>(warpedImageValue)+3; ++w){
-							if(-1<w && w<c_firstResultBin){
-								float commonValue =
-										GetBasisSplineValue(referenceImageValue-(float)r) *
-										GetBasisSplineDerivativeValue(warpedImageValue-(float)w);
-
-								float jointLog =  tex1Dfetch(histogramTexture, w*c_firstResultBin+r);
-								float targetLog = tex1Dfetch(histogramTexture, c_firstTargetBin*c_firstResultBin+r);
-								float resultLog = tex1Dfetch(histogramTexture, c_firstTargetBin*c_firstResultBin+c_firstTargetBin+w);
-
-								float temp = commonValue * resDeriv.x;
-								jointEntropyDerivative_X += temp * jointLog;
-								referenceEntropyDerivative_X += temp * targetLog;
-								warpedEntropyDerivative_X += temp * resultLog;
-
-								temp = commonValue * resDeriv.y;
-								jointEntropyDerivative_Y += temp * jointLog;
-								referenceEntropyDerivative_Y += temp * targetLog;
-								warpedEntropyDerivative_Y += temp * resultLog;
-							} // O<t<bin
-						} // t
-					} // 0<r<bin
-				} // r
-
-				float NMI= c_NMI;
-				// (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-				gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - NMI * jointEntropyDerivative_X) / c_NormalisedJE;
-				gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - NMI * jointEntropyDerivative_Y) / c_NormalisedJE;
-
-			}
-		}
-		voxelNMIGradientArray_d[targetIndex]=gradValue;
-
-	}
-	return;
+__global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelBasedGradient,
+                                                             cudaTextureObject_t referenceImageTexture,
+                                                             cudaTextureObject_t warpedImageTexture,
+                                                             cudaTextureObject_t warpedGradientTexture,
+                                                             cudaTextureObject_t histogramTexture,
+                                                             cudaTextureObject_t maskTexture,
+                                                             const int3 imageSize,
+                                                             const int refBinning,
+                                                             const int floBinning,
+                                                             const float normalisedJE,
+                                                             const float nmi,
+                                                             const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const int targetIndex = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(targetIndex, imageSize.x, quot, rem);
+        const int y = quot, x = rem;
+
+        const float referenceImageValue = tex3D<float>(referenceImageTexture,
+                                                       ((float)x + 0.5f) / (float)imageSize.x,
+                                                       ((float)y + 0.5f) / (float)imageSize.y,
+                                                       0.5f);
+        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
+        const float4 warpedImageGradient = tex1Dfetch<float4>(warpedGradientTexture, tid);
+
+        float4 gradValue{};
+
+        // No computation is performed if any of the point is part of the background
+        // The two is added because the image is resample between 2 and bin +2
+        // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
+        if (0.f < referenceImageValue && referenceImageValue < refBinning &&
+            0.f < warpedImageValue && warpedImageValue < floBinning &&
+            referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) {
+            const float2 resDeriv = make_float2(warpedImageGradient.x, warpedImageGradient.y);
+            if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y) {
+                float jointEntropyDerivative_X = 0.0f;
+                float warpedEntropyDerivative_X = 0.0f;
+                float referenceEntropyDerivative_X = 0.0f;
+                float jointEntropyDerivative_Y = 0.0f;
+                float warpedEntropyDerivative_Y = 0.0f;
+                float referenceEntropyDerivative_Y = 0.0f;
+                for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
+                    if (-1 < r && r < refBinning) {
+                        for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
+                            if (-1 < w && w < floBinning) {
+                                const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) *
+                                                           GetBasisSplineDerivativeValue(warpedImageValue - (float)w));
+
+                                const float jointLog = tex1Dfetch<float>(histogramTexture, w * floBinning + r);
+                                const float targetLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + r);
+                                const float resultLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + refBinning + w);
+
+                                float temp = commonValue * resDeriv.x;
+                                jointEntropyDerivative_X += temp * jointLog;
+                                referenceEntropyDerivative_X += temp * targetLog;
+                                warpedEntropyDerivative_X += temp * resultLog;
+
+                                temp = commonValue * resDeriv.y;
+                                jointEntropyDerivative_Y += temp * jointLog;
+                                referenceEntropyDerivative_Y += temp * targetLog;
+                                warpedEntropyDerivative_Y += temp * resultLog;
+                            } // O<t<bin
+                        } // t
+                    } // 0<r<bin
+                } // r
+
+                // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
+                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - nmi * jointEntropyDerivative_X) / normalisedJE;
+                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - nmi * jointEntropyDerivative_Y) / normalisedJE;
+
+            }
+        }
+        voxelBasedGradient[targetIndex] = gradValue;
+    }
 }
 /* *************************************************************** */
-__global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelNMIGradientArray_d)
-{
-	const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-	if(tid<c_ActiveVoxelNumber){
-
-		const int targetIndex = tex1Dfetch(maskTexture,tid);
-		int tempIndex=targetIndex;
-		const int z = tempIndex/(c_ImageSize.x*c_ImageSize.y);
-		tempIndex  -= z*c_ImageSize.x*c_ImageSize.y;
-		const int y = tempIndex/c_ImageSize.x;
-		const int x = tempIndex - y*c_ImageSize.x;
-
-		float referenceImageValue = tex3D(firstreferenceImageTexture,
-									   ((float)x+0.5f)/(float)c_ImageSize.x,
-									   ((float)y+0.5f)/(float)c_ImageSize.y,
-									   ((float)z+0.5f)/(float)c_ImageSize.z);
-		float warpedImageValue = tex1Dfetch(firstwarpedImageTexture,targetIndex);
-		float4 warpedImageGradient = tex1Dfetch(firstwarpedImageGradientTexture,tid);
-
-		float4 gradValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-		// No computation is performed if any of the point is part of the background
-		// The two is added because the image is resample between 2 and bin +2
-		// if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
-		if( referenceImageValue>0.0f &&
-			warpedImageValue>0.0f &&
-			referenceImageValue<c_firstTargetBin &&
-			warpedImageValue<c_firstResultBin &&
-			referenceImageValue==referenceImageValue &&
-			warpedImageValue==warpedImageValue){
-
-//			referenceImageValue = floor(referenceImageValue);
-//			warpedImageValue = floor(warpedImageValue);
-
-			float3 resDeriv = make_float3(
-				warpedImageGradient.x,
-				warpedImageGradient.y,
-				warpedImageGradient.z);
-
-			if( resDeriv.x==resDeriv.x &&
-				resDeriv.y==resDeriv.y &&
-				resDeriv.z==resDeriv.z){
-
-				float jointEntropyDerivative_X = 0.0f;
-				float warpedEntropyDerivative_X = 0.0f;
-				float referenceEntropyDerivative_X = 0.0f;
-
-				float jointEntropyDerivative_Y = 0.0f;
-				float warpedEntropyDerivative_Y = 0.0f;
-				float referenceEntropyDerivative_Y = 0.0f;
-
-				float jointEntropyDerivative_Z = 0.0f;
-				float warpedEntropyDerivative_Z = 0.0f;
-				float referenceEntropyDerivative_Z = 0.0f;
-
-				for(int r=static_cast<int>(referenceImageValue)-1; r<static_cast<int>(referenceImageValue)+3; ++r){
-					if(-1<r && r<c_firstTargetBin){
-						for(int w=static_cast<int>(warpedImageValue)-1; w<static_cast<int>(warpedImageValue)+3; ++w){
-							if(-1<w && w<c_firstResultBin){
-								float commonValue =
-										GetBasisSplineValue(referenceImageValue-(float)r) *
-										GetBasisSplineDerivativeValue(warpedImageValue-(float)w);
-
-								float jointLog =  tex1Dfetch(histogramTexture, w*c_firstResultBin+r);
-								float targetLog = tex1Dfetch(histogramTexture, c_firstTargetBin*c_firstResultBin+r);
-								float resultLog = tex1Dfetch(histogramTexture, c_firstTargetBin*c_firstResultBin+c_firstTargetBin+w);
-
-								float temp = commonValue * resDeriv.x;
-								jointEntropyDerivative_X += temp * jointLog;
-								referenceEntropyDerivative_X += temp * targetLog;
-								warpedEntropyDerivative_X += temp * resultLog;
-
-								temp = commonValue * resDeriv.y;
-								jointEntropyDerivative_Y += temp * jointLog;
-								referenceEntropyDerivative_Y += temp * targetLog;
-								warpedEntropyDerivative_Y += temp * resultLog;
-
-								temp = commonValue * resDeriv.z;
-								jointEntropyDerivative_Z += temp * jointLog;
-								referenceEntropyDerivative_Z += temp * targetLog;
-								warpedEntropyDerivative_Z += temp * resultLog;
-							} // O<t<bin
-						} // t
-					} // 0<r<bin
-				} // r
-
-				float NMI= c_NMI;
-				// (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-				gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - NMI * jointEntropyDerivative_X) / c_NormalisedJE;
-				gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - NMI * jointEntropyDerivative_Y) / c_NormalisedJE;
-				gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - NMI * jointEntropyDerivative_Z) / c_NormalisedJE;
-
-			}
-		}
-		voxelNMIGradientArray_d[targetIndex]=gradValue;
-
-	}
-	return;
+__global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelBasedGradient,
+                                                             cudaTextureObject_t referenceImageTexture,
+                                                             cudaTextureObject_t warpedImageTexture,
+                                                             cudaTextureObject_t warpedGradientTexture,
+                                                             cudaTextureObject_t histogramTexture,
+                                                             cudaTextureObject_t maskTexture,
+                                                             const int3 imageSize,
+                                                             const int refBinning,
+                                                             const int floBinning,
+                                                             const float normalisedJE,
+                                                             const float nmi,
+                                                             const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const int targetIndex = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(targetIndex, imageSize.x * imageSize.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, imageSize.x, quot, rem);
+        const int y = quot, x = rem;
+
+        const float referenceImageValue = tex3D<float>(referenceImageTexture,
+                                                       ((float)x + 0.5f) / (float)imageSize.x,
+                                                       ((float)y + 0.5f) / (float)imageSize.y,
+                                                       ((float)z + 0.5f) / (float)imageSize.z);
+        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
+        const float4 warpedImageGradient = tex1Dfetch<float4>(warpedGradientTexture, tid);
+
+        float4 gradValue{};
+
+        // No computation is performed if any of the point is part of the background
+        // The two is added because the image is resample between 2 and bin +2
+        // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
+        if (0.f < referenceImageValue && referenceImageValue < refBinning &&
+            0.f < warpedImageValue && warpedImageValue < floBinning &&
+            referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) {
+            const float3 resDeriv = make_float3(warpedImageGradient.x, warpedImageGradient.y, warpedImageGradient.z);
+            if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y && resDeriv.z == resDeriv.z) {
+                float jointEntropyDerivative_X = 0.0f;
+                float warpedEntropyDerivative_X = 0.0f;
+                float referenceEntropyDerivative_X = 0.0f;
+                float jointEntropyDerivative_Y = 0.0f;
+                float warpedEntropyDerivative_Y = 0.0f;
+                float referenceEntropyDerivative_Y = 0.0f;
+                float jointEntropyDerivative_Z = 0.0f;
+                float warpedEntropyDerivative_Z = 0.0f;
+                float referenceEntropyDerivative_Z = 0.0f;
+                for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
+                    if (-1 < r && r < refBinning) {
+                        for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
+                            if (-1 < w && w < floBinning) {
+                                const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) *
+                                                           GetBasisSplineDerivativeValue(warpedImageValue - (float)w));
+
+                                const float jointLog = tex1Dfetch<float>(histogramTexture, w * floBinning + r);
+                                const float targetLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + r);
+                                const float resultLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + refBinning + w);
+
+                                float temp = commonValue * resDeriv.x;
+                                jointEntropyDerivative_X += temp * jointLog;
+                                referenceEntropyDerivative_X += temp * targetLog;
+                                warpedEntropyDerivative_X += temp * resultLog;
+
+                                temp = commonValue * resDeriv.y;
+                                jointEntropyDerivative_Y += temp * jointLog;
+                                referenceEntropyDerivative_Y += temp * targetLog;
+                                warpedEntropyDerivative_Y += temp * resultLog;
+
+                                temp = commonValue * resDeriv.z;
+                                jointEntropyDerivative_Z += temp * jointLog;
+                                referenceEntropyDerivative_Z += temp * targetLog;
+                                warpedEntropyDerivative_Z += temp * resultLog;
+                            } // O<t<bin
+                        } // t
+                    } // 0<r<bin
+                } // r
+
+                // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
+                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - nmi * jointEntropyDerivative_X) / normalisedJE;
+                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - nmi * jointEntropyDerivative_Y) / normalisedJE;
+                gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - nmi * jointEntropyDerivative_Z) / normalisedJE;
+
+            }
+        }
+        voxelBasedGradient[targetIndex] = gradValue;
+    }
 }
 /* *************************************************************** */
 // Multichannel NMI gradient. Hardcoded for 2x2 NMI channels.
-__global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelNMIGradientArray_d)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-        const int targetIndex = tex1Dfetch(maskTexture,tid);
-        int tempIndex=targetIndex;
-        const int z = tempIndex/(c_ImageSize.x*c_ImageSize.y);
-        tempIndex  -= z*c_ImageSize.x*c_ImageSize.y;
-        const int y = tempIndex/c_ImageSize.x;
-        const int x = tempIndex - y*c_ImageSize.x;
+/* __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelBasedGradient) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_ActiveVoxelNumber) {
+        const int targetIndex = tex1Dfetch(maskTexture, tid);
+        int tempIndex = targetIndex;
+        const int z = tempIndex / (c_ImageSize.x * c_ImageSize.y);
+        tempIndex -= z * c_ImageSize.x * c_ImageSize.y;
+        const int y = tempIndex / c_ImageSize.x;
+        const int x = tempIndex - y * c_ImageSize.x;
 
         float4 voxelValues = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-		voxelValues.x = tex3D(firstreferenceImageTexture,
-                              ((float)x+0.5f)/(float)c_ImageSize.x,
-                              ((float)y+0.5f)/(float)c_ImageSize.y,
-                              ((float)z+0.5f)/(float)c_ImageSize.z);
-		voxelValues.y = tex3D(secondreferenceImageTexture,
-                              ((float)x+0.5f)/(float)c_ImageSize.x,
-                              ((float)y+0.5f)/(float)c_ImageSize.y,
-                              ((float)z+0.5f)/(float)c_ImageSize.z);
-		voxelValues.z = tex1Dfetch(firstwarpedImageTexture,targetIndex);
-		voxelValues.w = tex1Dfetch(secondwarpedImageTexture,targetIndex);
-
-		float4 firstwarpedImageGradient = tex1Dfetch(firstwarpedImageGradientTexture,tid);
-		float4 secondwarpedImageGradient = tex1Dfetch(secondwarpedImageGradientTexture,tid);
+        voxelValues.x = tex3D(firstreferenceImageTexture,
+                              ((float)x + 0.5f) / (float)c_ImageSize.x,
+                              ((float)y + 0.5f) / (float)c_ImageSize.y,
+                              ((float)z + 0.5f) / (float)c_ImageSize.z);
+        voxelValues.y = tex3D(secondreferenceImageTexture,
+                              ((float)x + 0.5f) / (float)c_ImageSize.x,
+                              ((float)y + 0.5f) / (float)c_ImageSize.y,
+                              ((float)z + 0.5f) / (float)c_ImageSize.z);
+        voxelValues.z = tex1Dfetch(firstwarpedImageTexture, targetIndex);
+        voxelValues.w = tex1Dfetch(secondwarpedImageTexture, targetIndex);
+
+        float4 firstwarpedImageGradient = tex1Dfetch(firstwarpedImageGradientTexture, tid);
+        float4 secondwarpedImageGradient = tex1Dfetch(secondwarpedImageGradientTexture, tid);
         float4 gradValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 
         // Could remove some tests (which are not really needed) to reduce register
@@ -299,31 +249,29 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelNMIGr
             voxelValues.x < c_firstTargetBin &&
             voxelValues.y < c_secondTargetBin &&
             voxelValues.z < c_firstResultBin &&
-            voxelValues.w < c_secondResultBin)
-        {
+            voxelValues.w < c_secondResultBin) {
             voxelValues.x = (float)((int)voxelValues.x);
             voxelValues.y = (float)((int)voxelValues.y);
             voxelValues.z = (float)((int)voxelValues.z);
             voxelValues.w = (float)((int)voxelValues.w);
 
-			if( firstwarpedImageGradient.x==firstwarpedImageGradient.x &&
-				firstwarpedImageGradient.y==firstwarpedImageGradient.y &&
-				firstwarpedImageGradient.z==firstwarpedImageGradient.z &&
-				secondwarpedImageGradient.x==secondwarpedImageGradient.x &&
-				secondwarpedImageGradient.y==secondwarpedImageGradient.y &&
-				secondwarpedImageGradient.z==secondwarpedImageGradient.z)
-            {
+            if (firstwarpedImageGradient.x == firstwarpedImageGradient.x &&
+                firstwarpedImageGradient.y == firstwarpedImageGradient.y &&
+                firstwarpedImageGradient.z == firstwarpedImageGradient.z &&
+                secondwarpedImageGradient.x == secondwarpedImageGradient.x &&
+                secondwarpedImageGradient.y == secondwarpedImageGradient.y &&
+                secondwarpedImageGradient.z == secondwarpedImageGradient.z) {
                 float jointEntropyDerivative_X = 0.0f;
-				float warpedEntropyDerivative_X = 0.0f;
-				float referenceEntropyDerivative_X = 0.0f;
+                float warpedEntropyDerivative_X = 0.0f;
+                float referenceEntropyDerivative_X = 0.0f;
 
                 float jointEntropyDerivative_Y = 0.0f;
-				float warpedEntropyDerivative_Y = 0.0f;
-				float referenceEntropyDerivative_Y = 0.0f;
+                float warpedEntropyDerivative_Y = 0.0f;
+                float referenceEntropyDerivative_Y = 0.0f;
 
                 float jointEntropyDerivative_Z = 0.0f;
-				float warpedEntropyDerivative_Z = 0.0f;
-				float referenceEntropyDerivative_Z = 0.0f;
+                float warpedEntropyDerivative_Z = 0.0f;
+                float referenceEntropyDerivative_Z = 0.0f;
 
                 float jointLog, targetLog, resultLog, temp;
                 float4 relative_pos = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
@@ -331,26 +279,26 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelNMIGr
                 float s_x, s_y, s_z, s_w;
                 float common_target_value = 0.0f;
                 int target_flat_index, result_flat_index, total_target_entries, num_probabilities;
-                for (int i=-1; i<2; ++i) {
-                    relative_pos.x = (int)(voxelValues.x+i);
-
-                    if (-1<relative_pos.x && relative_pos.x<c_firstTargetBin) {
-                        for (int j=-1; j<2; ++j) {
-                            relative_pos.y = (int)(voxelValues.y+j);
-
-                            if (-1<relative_pos.y && relative_pos.y<c_secondTargetBin) {
-                                s_x = GetBasisSplineValue(relative_pos.x-voxelValues.x);
-                                s_y = GetBasisSplineValue(relative_pos.y-voxelValues.y);
-                                common_target_value =  s_x * s_y;
-
-                                for (int k=-1; k<2; ++k) {
-                                    relative_pos.z = (int)(voxelValues.z+k);
-                                    if (-1<relative_pos.z && relative_pos.z<c_firstResultBin) {
-                                        s_x = GetBasisSplineDerivativeValue(relative_pos.z-voxelValues.z);
-                                        s_w = GetBasisSplineValue(relative_pos.z-voxelValues.z);
-                                        for (int l=-1; l<2; ++l) {
-                                            relative_pos.w = (int)(voxelValues.w+l);
-                                            if (-1<relative_pos.w && relative_pos.w<c_secondResultBin) {
+                for (int i = -1; i < 2; ++i) {
+                    relative_pos.x = (int)(voxelValues.x + i);
+
+                    if (-1 < relative_pos.x && relative_pos.x < c_firstTargetBin) {
+                        for (int j = -1; j < 2; ++j) {
+                            relative_pos.y = (int)(voxelValues.y + j);
+
+                            if (-1 < relative_pos.y && relative_pos.y < c_secondTargetBin) {
+                                s_x = GetBasisSplineValue(relative_pos.x - voxelValues.x);
+                                s_y = GetBasisSplineValue(relative_pos.y - voxelValues.y);
+                                common_target_value = s_x * s_y;
+
+                                for (int k = -1; k < 2; ++k) {
+                                    relative_pos.z = (int)(voxelValues.z + k);
+                                    if (-1 < relative_pos.z && relative_pos.z < c_firstResultBin) {
+                                        s_x = GetBasisSplineDerivativeValue(relative_pos.z - voxelValues.z);
+                                        s_w = GetBasisSplineValue(relative_pos.z - voxelValues.z);
+                                        for (int l = -1; l < 2; ++l) {
+                                            relative_pos.w = (int)(voxelValues.w + l);
+                                            if (-1 < relative_pos.w && relative_pos.w < c_secondResultBin) {
                                                 target_flat_index = relative_pos.x + relative_pos.y * c_firstTargetBin;
                                                 result_flat_index = relative_pos.z + relative_pos.w * c_firstResultBin;
                                                 total_target_entries = c_firstTargetBin * c_secondTargetBin;
@@ -362,29 +310,29 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelNMIGr
 
                                                 // Contribution from floating images. These arithmetic operations use
                                                 // a lot of registers. Need to look into whether this can be reduced somehow.
-                                                s_y = GetBasisSplineValue(relative_pos.w-voxelValues.w);
-                                                s_z = GetBasisSplineDerivativeValue(relative_pos.w-voxelValues.w);
-												temp = (s_x * firstwarpedImageGradient.x * s_y) +
-													   (s_z * secondwarpedImageGradient.x * s_w);
+                                                s_y = GetBasisSplineValue(relative_pos.w - voxelValues.w);
+                                                s_z = GetBasisSplineDerivativeValue(relative_pos.w - voxelValues.w);
+                                                temp = (s_x * firstwarpedImageGradient.x * s_y) +
+                                                    (s_z * secondwarpedImageGradient.x * s_w);
                                                 temp *= common_target_value;
 
                                                 jointEntropyDerivative_X -= temp * jointLog;
-												referenceEntropyDerivative_X -= temp * targetLog;
-												warpedEntropyDerivative_X -= temp * resultLog;
+                                                referenceEntropyDerivative_X -= temp * targetLog;
+                                                warpedEntropyDerivative_X -= temp * resultLog;
 
-												temp = (s_x * firstwarpedImageGradient.y * s_y) +
-													   (s_z * secondwarpedImageGradient.y * s_w);
+                                                temp = (s_x * firstwarpedImageGradient.y * s_y) +
+                                                    (s_z * secondwarpedImageGradient.y * s_w);
                                                 temp *= common_target_value;
                                                 jointEntropyDerivative_Y -= temp * jointLog;
-												referenceEntropyDerivative_Y -= temp * targetLog;
-												warpedEntropyDerivative_Y -= temp * resultLog;
+                                                referenceEntropyDerivative_Y -= temp * targetLog;
+                                                warpedEntropyDerivative_Y -= temp * resultLog;
 
-												temp = (s_x * firstwarpedImageGradient.z * s_y) +
-													   (s_z * secondwarpedImageGradient.z * s_w);
+                                                temp = (s_x * firstwarpedImageGradient.z * s_y) +
+                                                    (s_z * secondwarpedImageGradient.z * s_w);
                                                 temp *= common_target_value;
                                                 jointEntropyDerivative_Z -= temp * jointLog;
-												referenceEntropyDerivative_Z -= temp * targetLog;
-												warpedEntropyDerivative_Z -= temp * resultLog;
+                                                referenceEntropyDerivative_Z -= temp * targetLog;
+                                                warpedEntropyDerivative_Z -= temp * resultLog;
                                             }
                                         }
                                     }
@@ -394,191 +342,178 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelNMIGr
                     }
                 }
 
-				gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - c_NMI * jointEntropyDerivative_X) / c_NormalisedJE;
-				gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - c_NMI * jointEntropyDerivative_Y) / c_NormalisedJE;
-				gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - c_NMI * jointEntropyDerivative_Z) / c_NormalisedJE;
+                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - c_NMI * jointEntropyDerivative_X) / c_NormalisedJE;
+                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - c_NMI * jointEntropyDerivative_Y) / c_NormalisedJE;
+                gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - c_NMI * jointEntropyDerivative_Z) / c_NormalisedJE;
             }
         }
-        voxelNMIGradientArray_d[targetIndex]=gradValue;
+        voxelBasedGradient[targetIndex] = gradValue;
     }
-}
+} */
 /* *************************************************************** */
-__global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_secondTargetBin*c_firstResultBin*c_secondResultBin){
+/* __global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) {
         // The starting index is computed
-        unsigned startingPoint=tid*c_firstTargetBin;
-        unsigned finishPoint=startingPoint+c_firstTargetBin;
+        unsigned startingPoint = tid * c_firstTargetBin;
+        unsigned finishPoint = startingPoint + c_firstTargetBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                       tex1Dfetch(histogramTexture, startingPoint+1) * COEFF_L) / COEFF_B;
+                                        tex1Dfetch(histogramTexture, startingPoint + 1) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned i=startingPoint+1; i<finishPoint-1; ++i){
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i-1) * COEFF_L +
-                               tex1Dfetch(histogramTexture, i) * COEFF_C +
-                               tex1Dfetch(histogramTexture, i+1) * COEFF_L;
+        for (unsigned i = startingPoint + 1; i < finishPoint - 1; ++i) {
+            tempHistogram[i] = tex1Dfetch(histogramTexture, i - 1) * COEFF_L +
+                tex1Dfetch(histogramTexture, i) * COEFF_C +
+                tex1Dfetch(histogramTexture, i + 1) * COEFF_L;
         }
         // The last point is computed
-        tempHistogram[finishPoint-1] = (tex1Dfetch(histogramTexture, finishPoint-2) * COEFF_L +
-                                       tex1Dfetch(histogramTexture, finishPoint-1) * COEFF_C) / COEFF_B;
+        tempHistogram[finishPoint - 1] = (tex1Dfetch(histogramTexture, finishPoint - 2) * COEFF_L +
+                                          tex1Dfetch(histogramTexture, finishPoint - 1) * COEFF_C) / COEFF_B;
     }
-    return;
-}
-
-__global__ void reg_smoothJointHistogramY_kernel(float *tempHistogram)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstTargetBin*c_firstResultBin*c_secondResultBin){
+} */
+/* *************************************************************** */
+/* __global__ void reg_smoothJointHistogramY_kernel(float *tempHistogram) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstTargetBin * c_firstResultBin * c_secondResultBin) {
         // The starting index is computed
-        unsigned startingPoint=tid + c_firstTargetBin*(c_secondTargetBin-1)*(c_firstResultBin*(int)(tid/(c_firstTargetBin*c_firstResultBin)) +
-                                   (int)(tid/c_firstTargetBin - c_firstResultBin * (int)(tid/(c_firstTargetBin*c_firstResultBin))));
+        unsigned startingPoint = tid + c_firstTargetBin * (c_secondTargetBin - 1) * (c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin)) +
+                                                                                     (int)(tid / c_firstTargetBin - c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin))));
         unsigned increment = c_firstTargetBin;
-        unsigned finishPoint=startingPoint+increment*c_secondTargetBin;
+        unsigned finishPoint = startingPoint + increment * c_secondTargetBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                       tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
+                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
-                               tex1Dfetch(histogramTexture, i) * COEFF_C +
-                               tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
+        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
+            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
+                tex1Dfetch(histogramTexture, i) * COEFF_C +
+                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
         }
         // The last point is computed
-        tempHistogram[finishPoint-increment] = (tex1Dfetch(histogramTexture, finishPoint-2*increment) * COEFF_L +
-                                       tex1Dfetch(histogramTexture, finishPoint-increment) * COEFF_C) / COEFF_B;
+        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
+                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
     }
-    return;
-}
-
-__global__ void reg_smoothJointHistogramZ_kernel(float *tempHistogram)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstTargetBin*c_secondTargetBin*c_secondResultBin){
+} */
+/* *************************************************************** */
+/* __global__ void reg_smoothJointHistogramZ_kernel(float *tempHistogram) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstTargetBin * c_secondTargetBin * c_secondResultBin) {
         // The starting index is computed
-        unsigned startingPoint=tid+c_firstTargetBin*c_secondTargetBin*(c_firstResultBin-1)*(int)(tid/(c_firstTargetBin*c_secondTargetBin));
-        unsigned increment = c_firstTargetBin*c_secondTargetBin;
-        unsigned finishPoint=startingPoint+increment*c_firstResultBin;
+        unsigned startingPoint = tid + c_firstTargetBin * c_secondTargetBin * (c_firstResultBin - 1) * (int)(tid / (c_firstTargetBin * c_secondTargetBin));
+        unsigned increment = c_firstTargetBin * c_secondTargetBin;
+        unsigned finishPoint = startingPoint + increment * c_firstResultBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                       tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
+                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
-                               tex1Dfetch(histogramTexture, i) * COEFF_C +
-                               tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
+        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
+            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
+                tex1Dfetch(histogramTexture, i) * COEFF_C +
+                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
         }
         // The last point is computed
-        tempHistogram[finishPoint-increment] = (tex1Dfetch(histogramTexture, finishPoint-2*increment) * COEFF_L +
-                                       tex1Dfetch(histogramTexture, finishPoint-increment) * COEFF_C) / COEFF_B;
+        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
+                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
     }
-    return;
-}
-
-__global__ void reg_smoothJointHistogramW_kernel(float *tempHistogram)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstTargetBin*c_secondTargetBin*c_firstResultBin){
+} */
+/* *************************************************************** */
+/* __global__ void reg_smoothJointHistogramW_kernel(float *tempHistogram) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) {
         // The starting index is computed
-        unsigned startingPoint=tid;
-        unsigned increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
-        unsigned finishPoint=increment*c_secondResultBin;
+        unsigned startingPoint = tid;
+        unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin;
+        unsigned finishPoint = increment * c_secondResultBin;
 
         // The first point is computed
         tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                       tex1Dfetch(histogramTexture, startingPoint+increment) * COEFF_L) / COEFF_B;
+                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
         // The middle points are computed
-        for(unsigned i=startingPoint+increment; i<finishPoint-increment; i+=increment){
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i-increment) * COEFF_L +
-                               tex1Dfetch(histogramTexture, i) * COEFF_C +
-                               tex1Dfetch(histogramTexture, i+increment) * COEFF_L;
+        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
+            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
+                tex1Dfetch(histogramTexture, i) * COEFF_C +
+                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
         }
         // The last point is computed
-        tempHistogram[finishPoint-increment] = (tex1Dfetch(histogramTexture, finishPoint-2*increment) * COEFF_L +
-                                       tex1Dfetch(histogramTexture, finishPoint-increment) * COEFF_C) / COEFF_B;
+        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
+                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
     }
-    return;
-}
-
-/// Kernels for marginalisation along the different axes
-__global__ void reg_marginaliseTargetX_kernel(float *babyHisto)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_secondTargetBin*c_firstResultBin*c_secondResultBin){
-        unsigned startingPoint=tid*c_firstTargetBin;
-        unsigned finishPoint=startingPoint+c_firstTargetBin;
-
-        float sum=tex1Dfetch(histogramTexture, startingPoint);
-        float c=0.f,Y,t;
-        for(unsigned i=startingPoint+1; i<finishPoint; ++i){
+} */
+/* *************************************************************** */
+// Kernels for marginalisation along the different axes
+/* __global__ void reg_marginaliseTargetX_kernel(float *babyHisto) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) {
+        unsigned startingPoint = tid * c_firstTargetBin;
+        unsigned finishPoint = startingPoint + c_firstTargetBin;
+
+        float sum = tex1Dfetch(histogramTexture, startingPoint);
+        float c = 0.f, Y, t;
+        for (unsigned i = startingPoint + 1; i < finishPoint; ++i) {
             Y = tex1Dfetch(histogramTexture, i) - c;
             t = sum + Y;
-            c = (t-sum)-Y;
-            sum=t;
+            c = (t - sum) - Y;
+            sum = t;
         }
-        babyHisto[tid]=sum;
+        babyHisto[tid] = sum;
     }
-}
-
-__global__ void reg_marginaliseTargetXY_kernel(float *babyHisto)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstResultBin*c_secondResultBin){
-        unsigned startingPoint=tid*c_secondTargetBin;
-        unsigned finishPoint=startingPoint+c_secondTargetBin;
-
-        float sum=tex1Dfetch(histogramTexture, startingPoint);
-        float c=0.f,Y,t;
-        for(unsigned i=startingPoint+1; i<finishPoint; ++i){
+} */
+/* *************************************************************** */
+/* __global__ void reg_marginaliseTargetXY_kernel(float *babyHisto) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstResultBin * c_secondResultBin) {
+        unsigned startingPoint = tid * c_secondTargetBin;
+        unsigned finishPoint = startingPoint + c_secondTargetBin;
+
+        float sum = tex1Dfetch(histogramTexture, startingPoint);
+        float c = 0.f, Y, t;
+        for (unsigned i = startingPoint + 1; i < finishPoint; ++i) {
             Y = tex1Dfetch(histogramTexture, i) - c;
             t = sum + Y;
-            c = (t-sum)-Y;
-            sum=t;
+            c = (t - sum) - Y;
+            sum = t;
         }
-        babyHisto[tid]=sum;
+        babyHisto[tid] = sum;
     }
-}
-
-__global__ void reg_marginaliseResultX_kernel(float *babyHisto)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstTargetBin*c_secondTargetBin*c_firstResultBin){
+} */
+/* *************************************************************** */
+/* __global__ void reg_marginaliseResultX_kernel(float *babyHisto) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) {
         unsigned startingPoint = tid;
-        float sum=tex1Dfetch(histogramTexture, startingPoint);
+        float sum = tex1Dfetch(histogramTexture, startingPoint);
         // increment by a the cube
-        unsigned increment = c_firstTargetBin*c_secondTargetBin*c_firstResultBin;
-        float c=0.f,Y,t;
+        unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin;
+        float c = 0.f, Y, t;
 
-        for (unsigned i = 1; i < c_secondResultBin; ++i)
-        {
-            Y = tex1Dfetch(histogramTexture, startingPoint + i *increment) - c;
+        for (unsigned i = 1; i < c_secondResultBin; ++i) {
+            Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c;
             t = sum + Y;
-            c = (t-sum)-Y;
-            sum=t;
+            c = (t - sum) - Y;
+            sum = t;
         }
-        babyHisto[tid]=sum;
+        babyHisto[tid] = sum;
     }
-}
-
-__global__ void reg_marginaliseResultXY_kernel(float *babyHisto)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_firstTargetBin*c_secondTargetBin){
-        unsigned startingPoint=tid;
-        float sum=tex1Dfetch(histogramTexture, startingPoint);
+} */
+/* *************************************************************** */
+/* __global__ void reg_marginaliseResultXY_kernel(float *babyHisto) {
+    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < c_firstTargetBin * c_secondTargetBin) {
+        unsigned startingPoint = tid;
+        float sum = tex1Dfetch(histogramTexture, startingPoint);
         // increment by the plane.
-        unsigned increment = c_firstTargetBin*c_secondTargetBin;
-        float c=0.f,Y,t;
-        for (unsigned i = 1; i < c_firstResultBin; ++i)
-        {
-            Y = tex1Dfetch(histogramTexture, startingPoint + i *increment) - c;
+        unsigned increment = c_firstTargetBin * c_secondTargetBin;
+        float c = 0.f, Y, t;
+        for (unsigned i = 1; i < c_firstResultBin; ++i) {
+            Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c;
             t = sum + Y;
-            c = (t-sum)-Y;
-            sum=t;
+            c = (t - sum) - Y;
+            sum = t;
         }
-        babyHisto[tid]=sum;
+        babyHisto[tid] = sum;
     }
-}
+} */
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index cc14aae4..2acccafa 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -14,84 +14,81 @@
 #include "_reg_resampling_kernels.cu"
 
 /* *************************************************************** */
-void reg_resampleImage_gpu(nifti_image *floatingImage,
-                           float *warpedImageArray_d,
-                           cudaArray *floatingImageArray_d,
-                           float4 *deformationFieldImageArray_d,
-                           int *mask_d,
-                           size_t activeVoxelNumber,
-                           float paddingValue) {
+void reg_resampleImage_gpu(const nifti_image *floatingImage,
+                           float *warpedImageCuda,
+                           const cudaArray *floatingImageCuda,
+                           const float4 *deformationFieldCuda,
+                           const int *maskCuda,
+                           const size_t& activeVoxelNumber,
+                           const float& paddingValue) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
+    const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
-
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
                                                                   activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-
     // Create the texture object for the mask
-    auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
                                                       cudaChannelFormatKindSigned, 1);
 
     // Bind the real to voxel matrix to the texture
-    mat44 floatingMatrix;
-    if (floatingImage->sform_code > 0)
-        floatingMatrix = floatingImage->sto_ijk;
-    else floatingMatrix = floatingImage->qto_ijk;
+    const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage3D));
-        dim3 B1(blockSize->reg_resampleImage3D, 1, 1);
-        dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1);
-        reg_resampleImage3D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_resampleImage3D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_resampleImage3D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
+                                                            floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage2D));
-        dim3 B1(blockSize->reg_resampleImage2D, 1, 1);
-        dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1);
-        reg_resampleImage2D_kernel<<<G1, B1>>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_resampleImage2D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_resampleImage2D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
+                                                            floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
-void reg_getImageGradient_gpu(nifti_image *floatingImage,
-                              cudaArray *floatingImageArray_d,
-                              float4 *deformationFieldImageArray_d,
-                              float4 *warpedGradientArray_d,
-                              size_t activeVoxelNumber,
-                              float paddingValue) {
+void reg_getImageGradient_gpu(const nifti_image *floatingImage,
+                              const cudaArray *floatingImageCuda,
+                              const float4 *deformationFieldCuda,
+                              float4 *warpedGradientCuda,
+                              const size_t& activeVoxelNumber,
+                              const float& paddingValue) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
+    const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray);
-
+    auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear,
+    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
                                                                   activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Bind the real to voxel matrix to the texture
-    mat44 floatingMatrix;
-    if (floatingImage->sform_code > 0)
-        floatingMatrix = floatingImage->sto_ijk;
-    else floatingMatrix = floatingImage->qto_ijk;
+    const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
-        const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient3D));
-        dim3 B1(blockSize->reg_getImageGradient3D, 1, 1);
-        dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1);
-        reg_getImageGradient3D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_getImageGradient3D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_getImageGradient3D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
+                                                               floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient2D));
-        dim3 B1(blockSize->reg_getImageGradient2D, 1, 1);
-        dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1);
-        reg_getImageGradient2D_kernel<<<G1, B1>>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(G1, B1);
+        const unsigned blocks = blockSize->reg_getImageGradient2D;
+        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const dim3 gridDims(grids, grids, 1);
+        const dim3 blockDims(blocks, 1, 1);
+        reg_getImageGradient2D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
+                                                               floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 7fcfe95f..5c3e15e7 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -14,19 +14,21 @@
 
 #include "_reg_common_cuda.h"
 
+/* *************************************************************** */
 extern "C++"
-void reg_resampleImage_gpu(nifti_image *sourceImage,
-                           float *resultImageArray_d,
-                           cudaArray *sourceImageArray_d,
-                           float4 *positionFieldImageArray_d,
-                           int *mask_d,
-                           size_t activeVoxelNumber,
-                           float paddingValue);
-
+void reg_resampleImage_gpu(const nifti_image *floatingImage,
+                           float *warpedImageCuda,
+                           const cudaArray *floatingImageCuda,
+                           const float4 *deformationFieldCuda,
+                           const int *maskCuda,
+                           const size_t& activeVoxelNumber,
+                           const float& paddingValue);
+/* *************************************************************** */
 extern "C++"
-void reg_getImageGradient_gpu(nifti_image *sourceImage,
-                              cudaArray *sourceImageArray_d,
-                              float4 *positionFieldImageArray_d,
-                              float4 *resultGradientArray_d,
-                              size_t activeVoxelNumber,
-                              float paddingValue);
+void reg_getImageGradient_gpu(const nifti_image *floatingImage,
+                              const cudaArray *floatingImageCuda,
+                              const float4 *deformationFieldCuda,
+                              float4 *warpedGradientCuda,
+                              const size_t& activeVoxelNumber,
+                              const float& paddingValue);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index dbf09b17..c126e4fa 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -13,7 +13,6 @@
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
 
-/* *************************************************************** */
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
 #ifndef NDEBUG
@@ -21,11 +20,10 @@ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
                                     nifti_image *floImgPtr,
                                     int *maskRefPtr,
-                                    int activeVoxNum,
+                                    size_t activeVoxNum,
                                     nifti_image *warFloImgPtr,
                                     nifti_image *warFloGraPtr,
                                     nifti_image *forVoxBasedGraPtr,
@@ -75,117 +73,100 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 #endif
 }
 /* *************************************************************** */
-float reg_getSSDValue_gpu(nifti_image *referenceImage,
-                          cudaArray **reference_d,
-                          float **warped_d,
-                          int **mask_d,
-                          int activeVoxelNumber) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
+double reg_getSSDValue_gpu(const nifti_image *referenceImage,
+                           const cudaArray *referenceImageCuda,
+                           const float *warpedCuda,
+                           const int *maskCuda,
+                           const size_t& activeVoxelNumber) {
     // Copy the constant memory variables
-    const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    const int voxelNumber = CalcVoxelNumber(*referenceImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
-    // Bind the required textures
-    referenceTexture.normalized = true;
-    referenceTexture.filterMode = cudaFilterModeLinear;
-    referenceTexture.addressMode[0] = cudaAddressModeWrap;
-    referenceTexture.addressMode[1] = cudaAddressModeWrap;
-    referenceTexture.addressMode[2] = cudaAddressModeWrap;
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber * sizeof(int)));
+    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+
+    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                        cudaChannelFormatKindFloat, 1);
+    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                      cudaChannelFormatKindSigned, 1);
+
     // Create an array on the device to store the absolute difference values
-    float *absoluteValues_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float)));
+    float *absoluteValuesCuda;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValuesCuda, activeVoxelNumber * sizeof(float)));
+
     // Compute the absolute values
-    const unsigned Grid_reg_getSquaredDifference =
-        (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSquaredDifference));
-    dim3 B1(blockSize->reg_getSquaredDifference, 1, 1);
-    dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1);
-    if (referenceDim.z > 1)
-        reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d);
-    else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    // Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    if (referenceImageDim.z > 1)
+        reg_getSquaredDifference3D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture,
+                                                                   referenceImageDim, (unsigned)activeVoxelNumber);
+    else reg_getSquaredDifference2D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture,
+                                                                    referenceImageDim, (unsigned)activeVoxelNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+
     // Perform a reduction on the absolute values
-    float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d, activeVoxelNumber) / (double)activeVoxelNumber);
+    const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda, activeVoxelNumber) / (double)activeVoxelNumber;
+
     // Free the absolute value array
-    NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d));
+    NR_CUDA_SAFE_CALL(cudaFree(absoluteValuesCuda));
 
     return ssd;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValue() {
-    double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer,
-                                          &this->referenceDevicePointer,
-                                          &this->warpedFloatingDevicePointer,
-                                          &this->referenceMaskDevicePointer,
-                                          this->activeVoxelNumber);
+    const double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer,
+                                                this->referenceDevicePointer,
+                                                this->warpedFloatingDevicePointer,
+                                                this->referenceMaskDevicePointer,
+                                                this->activeVoxelNumber);
     return -SSDValue;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray *reference_d,
-                                      float *warped_d,
-                                      float4 *spaGradient_d,
-                                      float4 *ssdGradient_d,
-                                      float maxSD,
-                                      int *mask_d,
-                                      int activeVoxelNumber) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
+void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage,
+                                      const cudaArray *referenceImageCuda,
+                                      const float *warpedCuda,
+                                      const float4 *spaGradientCuda,
+                                      float4 *ssdGradientCuda,
+                                      const float& maxSD,
+                                      const int *maskCuda,
+                                      const size_t& activeVoxelNumber) {
     // Copy the constant memory variables
-    const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    const int voxelNumber = CalcVoxelNumber(*referenceImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float)));
-    // Bind the required textures
-    referenceTexture.normalized = true;
-    referenceTexture.filterMode = cudaFilterModeLinear;
-    referenceTexture.addressMode[0] = cudaAddressModeWrap;
-    referenceTexture.addressMode[1] = cudaAddressModeWrap;
-    referenceTexture.addressMode[2] = cudaAddressModeWrap;
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-    NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4)));
+    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+
+    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                        cudaChannelFormatKindFloat, 1);
+    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                      cudaChannelFormatKindSigned, 1);
+    auto spaGradientTexture = cudaCommon_createTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+                                                             cudaChannelFormatKindFloat, 4);
+
     // Set the gradient image to zero
-    NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4)))
-        const unsigned Grid_reg_getSSDGradient =
-        (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSSDGradient));
-    dim3 B1(blockSize->reg_getSSDGradient, 1, 1);
-    dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1);
-    if (referenceDim.z > 1)
-        reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d);
-    else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-    // Unbind the textures
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture));
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture));
+    NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4)));
+
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSSDGradient;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    if (referenceImageDim.z > 1)
+        reg_getSSDGradient3D_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
+                                                             *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
+    else reg_getSSDGradient2D_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
+                                                              *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
                                      this->referenceDevicePointer,
                                      this->warpedFloatingDevicePointer,
                                      this->warpedFloatingGradientDevicePointer,
                                      this->forwardVoxelBasedGradientDevicePointer,
-                                     1.0f,
+                                     1.f,
                                      this->referenceMaskDevicePointer,
                                      this->activeVoxelNumber);
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 2f55dd21..69a6602b 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -16,8 +16,7 @@
 #include "_reg_measure_gpu.h"
 #include "_reg_ssd.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /// @brief SSD measure of similarity class on the device
 class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
 public:
@@ -30,7 +29,7 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     virtual void InitialiseMeasure(nifti_image *refImgPtr,
                                    nifti_image *floImgPtr,
                                    int *maskRefPtr,
-                                   int activeVoxNum,
+                                   size_t activeVoxNum,
                                    nifti_image *warFloImgPtr,
                                    nifti_image *warFloGraPtr,
                                    nifti_image *forVoxBasedGraPtr,
@@ -46,22 +45,4 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     /// @brief Compute the voxel based ssd gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-extern "C++"
-float reg_getSSDValue_gpu(nifti_image *referenceImage,
-                          cudaArray **reference_d,
-                          float **warped_d,
-                          int **mask_d,
-                          int activeVoxelNumber);
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-extern "C++"
-void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage,
-                                      cudaArray *reference_d,
-                                      float *warped_d,
-                                      float4 *spaGradient_d,
-                                      float4 *ssdGradient_d,
-                                      float maxSD,
-                                      int *mask_d,
-                                      int activeVoxelNumber);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index d145915b..c3832e52 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -14,136 +14,125 @@
 
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
+#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-texture<float, 3, cudaReadModeElementType> referenceTexture;
-texture<float, 1, cudaReadModeElementType> warpedTexture;
-texture<int, 1, cudaReadModeElementType> maskTexture;
-texture<float4, 1, cudaReadModeElementType> spaGradientTexture;
-/* *************************************************************** */
-__device__ __constant__ int c_ActiveVoxelNumber;
-__device__ __constant__ int3 c_ReferenceImageDim;
-__device__ __constant__ float c_NormalisationNumber;
-/* *************************************************************** */
-__global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
-        int3 imageSize = c_ReferenceImageDim;
-        unsigned index=tex1Dfetch(maskTexture,tid);
-        const int z = index/(imageSize.x*imageSize.y);
-        const int tempIndex = index - z*imageSize.x*imageSize.y;
-        const int y = tempIndex/imageSize.x;
-        const int x = tempIndex - y*imageSize.x;
-
-        float difference = tex3D(referenceTexture,
-                                    ((float)x+0.5f)/(float)imageSize.x,
-                                    ((float)y+0.5f)/(float)imageSize.y,
-                                    ((float)z+0.5f)/(float)imageSize.z);
-        difference -= tex1Dfetch(warpedTexture,index);
-        if(difference==difference)
-            squaredDifference[tid]= difference*difference;
-        else squaredDifference[tid] = 0.f;
+__global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference,
+                                                  cudaTextureObject_t referenceTexture,
+                                                  cudaTextureObject_t warpedTexture,
+                                                  cudaTextureObject_t maskTexture,
+                                                  const int3 referenceImageDim,
+                                                  const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        float difference = tex3D<float>(referenceTexture,
+                                        ((float)x + 0.5f) / (float)referenceImageDim.x,
+                                        ((float)y + 0.5f) / (float)referenceImageDim.y,
+                                        ((float)z + 0.5f) / (float)referenceImageDim.z);
+        difference -= tex1Dfetch<float>(warpedTexture, index);
+        squaredDifference[tid] = difference == difference ? difference * difference : 0;
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
-        int3 imageSize = c_ReferenceImageDim;
-        unsigned index=tex1Dfetch(maskTexture,tid);
-        const int y = index/imageSize.x;
-        const int x = index - y*imageSize.x;
-
-        float difference = tex3D(referenceTexture,
-                                    ((float)x+0.5f)/(float)imageSize.x,
-                                    ((float)y+0.5f)/(float)imageSize.y,
-                                    0.5f);
-        difference -= tex1Dfetch(warpedTexture,index);
-        if(difference==difference)
-            squaredDifference[tid]= difference*difference;
-        else squaredDifference[tid] = 0.f;
+__global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference,
+                                                  cudaTextureObject_t referenceTexture,
+                                                  cudaTextureObject_t warpedTexture,
+                                                  cudaTextureObject_t maskTexture,
+                                                  const int3 referenceImageDim,
+                                                  const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(index, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        float difference = tex3D<float>(referenceTexture,
+                                        ((float)x + 0.5f) / (float)referenceImageDim.x,
+                                        ((float)y + 0.5f) / (float)referenceImageDim.y,
+                                        0.5f);
+        difference -= tex1Dfetch<float>(warpedTexture, index);
+        squaredDifference[tid] = difference == difference ? difference * difference : 0;
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
-        int3 imageSize = c_ReferenceImageDim;
-        unsigned index = tex1Dfetch(maskTexture,tid);
-        const int y = index/imageSize.x;
-        const int x = index - y*imageSize.x;
-
-        float refValue = tex3D(referenceTexture,
-                               ((float)x+0.5f)/(float)imageSize.x,
-                               ((float)y+0.5f)/(float)imageSize.y,
-                               0.5f);
-        if(refValue != refValue)
+__global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient,
+                                            cudaTextureObject_t referenceTexture,
+                                            cudaTextureObject_t warpedTexture,
+                                            cudaTextureObject_t maskTexture,
+                                            cudaTextureObject_t spaGradientTexture,
+                                            const int3 referenceImageDim,
+                                            const float maxSD,
+                                            const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(index, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        const float refValue = tex3D<float>(referenceTexture,
+                                            ((float)x + 0.5f) / (float)referenceImageDim.x,
+                                            ((float)y + 0.5f) / (float)referenceImageDim.y,
+                                            0.5f);
+        if (refValue != refValue)
             return;
-        float warpValue = tex1Dfetch(warpedTexture,index);
-        if(warpValue != warpValue)
+        const float warpValue = tex1Dfetch<float>(warpedTexture, index);
+        if (warpValue != warpValue)
             return;
 
-        float4 spaGradientValue = tex1Dfetch(spaGradientTexture,tid);
-        if(spaGradientValue.x != spaGradientValue.x ||
-           spaGradientValue.y != spaGradientValue.y)
+        const float4 spaGradientValue = tex1Dfetch<float4>(spaGradientTexture, tid);
+        if (spaGradientValue.x != spaGradientValue.x || spaGradientValue.y != spaGradientValue.y)
             return;
 
-        float common = -2.f * (refValue - warpValue) /
-                (c_NormalisationNumber * (float)c_ActiveVoxelNumber);
-
-        ssdGradient[index] = make_float4(
-                    common * spaGradientValue.x,
-                    common * spaGradientValue.y,
-                    0.f,
-                    0.f
-                    );
+        const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber);
+        ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, 0.f, 0.f);
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient)
-{
-    const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x;
-    if(tid<c_ActiveVoxelNumber){
-
-        int3 imageSize = c_ReferenceImageDim;
-        unsigned index = tex1Dfetch(maskTexture,tid);
-        const int z = index/(imageSize.x*imageSize.y);
-        const int tempIndex = index - z*imageSize.x*imageSize.y;
-        const int y = tempIndex/imageSize.x;
-        const int x = tempIndex - y*imageSize.x;
-
-        float refValue = tex3D(referenceTexture,
-                               ((float)x+0.5f)/(float)imageSize.x,
-                               ((float)y+0.5f)/(float)imageSize.y,
-                               ((float)z+0.5f)/(float)imageSize.z);
-        if(refValue != refValue)
+__global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient,
+                                            cudaTextureObject_t referenceTexture,
+                                            cudaTextureObject_t warpedTexture,
+                                            cudaTextureObject_t maskTexture,
+                                            cudaTextureObject_t spaGradientTexture,
+                                            const int3 referenceImageDim,
+                                            const float maxSD,
+                                            const unsigned activeVoxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < activeVoxelNumber) {
+        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        int quot, rem;
+        reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        const int z = quot;
+        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        const int y = quot, x = rem;
+
+        const float refValue = tex3D<float>(referenceTexture,
+                                            ((float)x + 0.5f) / (float)referenceImageDim.x,
+                                            ((float)y + 0.5f) / (float)referenceImageDim.y,
+                                            ((float)z + 0.5f) / (float)referenceImageDim.z);
+        if (refValue != refValue)
             return;
 
-        float warpValue = tex1Dfetch(warpedTexture,index);
-        if(warpValue != warpValue)
+        const float warpValue = tex1Dfetch<float>(warpedTexture, index);
+        if (warpValue != warpValue)
             return;
 
-        float4 spaGradientValue = tex1Dfetch(spaGradientTexture,tid);
-        if(spaGradientValue.x != spaGradientValue.x ||
-           spaGradientValue.y != spaGradientValue.y ||
-           spaGradientValue.z != spaGradientValue.z)
+        const float4 spaGradientValue = tex1Dfetch<float4>(spaGradientTexture, tid);
+        if (spaGradientValue.x != spaGradientValue.x ||
+            spaGradientValue.y != spaGradientValue.y ||
+            spaGradientValue.z != spaGradientValue.z)
             return;
 
-        float common = -2.f * (refValue - warpValue) /
-                (c_NormalisationNumber * (float)c_ActiveVoxelNumber);
-
-        ssdGradient[index] = make_float4(
-                    common * spaGradientValue.x,
-                    common * spaGradientValue.y,
-                    common * spaGradientValue.z,
-                    0.f
-                    );
+        const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber);
+        ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f);
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index fcb8d885..193c18eb 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -70,54 +70,33 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
 
     const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
     const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
-    const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
     reg_voxelCentric2NodeCentric_kernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
                                                                  voxelImageDims, is3d, weight, transformation, reorientation);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
-                                                    nifti_image *controlPointImage,
-                                                    float4 *nodeNMIGradientArray_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int nodeNumber = CalcVoxelNumber(*controlPointImage);
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int)));
-
-    float4 *matrix_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3 * sizeof(float4)));
-    matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]);
-    matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]);
-    matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]);
-    float4 *matrix_d;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3 * sizeof(float4)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h));
-    NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3 * sizeof(float4)));
-
-    const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace =
-        (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace));
-    dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace, Grid_reg_convertNMIGradientFromVoxelToRealSpace, 1);
-    dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace, 1, 1);
-    _reg_convertNMIGradientFromVoxelToRealSpace_kernel<<<G1, B1>>>(nodeNMIGradientArray_d);
-    NR_CUDA_CHECK_KERNEL(G1, B1);
-
-    NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture));
-    NR_CUDA_SAFE_CALL(cudaFree(matrix_d));
+void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
+                                                    const nifti_image *controlPointImage,
+                                                    float4 *nmiGradientCuda) {
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+    reg_convertNMIGradientFromVoxelToRealSpace_kernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_gaussianSmoothing_gpu(nifti_image *image,
-                               float4 *imageArray_d,
-                               float sigma,
-                               bool smoothXYZ[8]) {
+void reg_gaussianSmoothing_gpu(const nifti_image *image,
+                               float4 *imageCuda,
+                               const float& sigma,
+                               const bool smoothXYZ[8]) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-
     bool axisToSmooth[8];
     if (smoothXYZ == nullptr) {
         for (int i = 0; i < 8; i++) axisToSmooth[i] = true;
@@ -130,220 +109,207 @@ void reg_gaussianSmoothing_gpu(nifti_image *image,
             float currentSigma;
             if (sigma > 0) currentSigma = sigma / image->pixdim[n];
             else currentSigma = fabs(sigma); // voxel based if negative value
-            int radius = (int)ceil(currentSigma * 3.0f);
+            const int radius = (int)ceil(currentSigma * 3.0f);
             if (radius > 0) {
-                int kernelSize = 1 + radius * 2;
-                float *kernel_h;
-                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float)));
+                const int kernelSize = 1 + radius * 2;
+                float *kernel;
+                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float)));
                 float kernelSum = 0;
                 for (int i = -radius; i <= radius; i++) {
-                    kernel_h[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) /
-                                                   (currentSigma * 2.506628274631));
+                    kernel[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) /
+                                                 (currentSigma * 2.506628274631));
                     // 2.506... = sqrt(2*pi)
-                    kernelSum += kernel_h[radius + i];
+                    kernelSum += kernel[radius + i];
                 }
                 for (int i = 0; i < kernelSize; i++)
-                    kernel_h[i] /= kernelSum;
+                    kernel[i] /= kernelSum;
 
-                float *kernel_d;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float)));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
-                NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
+                float *kernelCuda;
+                NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float)));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
+                NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
 
                 float4 *smoothedImage;
                 NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float)));
-                NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4)));
 
-                unsigned Grid_reg_ApplyConvolutionWindow;
-                dim3 B, G;
+                auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                                   voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+                auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear,
+                                                                    kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
+
+                unsigned blocks, grids;
+                dim3 blockDims, gridDims;
                 switch (n) {
                 case 1:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX));
-                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1);
-                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                    _reg_ApplyConvolutionWindowAlongX_kernel<<<G, B>>>(smoothedImage, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G, B);
+                    blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
+                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    gridDims = dim3(grids, grids, 1);
+                    blockDims = dim3(blocks, 1, 1);
+                    reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
+                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
                 case 2:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY));
-                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1);
-                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                    _reg_ApplyConvolutionWindowAlongY_kernel<<<G, B>>>(smoothedImage, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G, B);
+                    blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
+                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    gridDims = dim3(grids, grids, 1);
+                    blockDims = dim3(blocks, 1, 1);
+                    reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
+                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
                 case 3:
-                    Grid_reg_ApplyConvolutionWindow =
-                        (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ));
-                    B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1);
-                    G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                    _reg_ApplyConvolutionWindowAlongZ_kernel<<<G, B>>>(smoothedImage, kernelSize);
-                    NR_CUDA_CHECK_KERNEL(G, B);
+                    blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
+                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    gridDims = dim3(grids, grids, 1);
+                    blockDims = dim3(blocks, 1, 1);
+                    reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
+                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
                 }
-                NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
-                NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-                NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
+                NR_CUDA_SAFE_CALL(cudaFree(kernelCuda));
+                NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
                 NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
             }
         }
     }
 }
 /* *************************************************************** */
-void reg_smoothImageForCubicSpline_gpu(nifti_image *image,
-                                       float4 *imageArray_d,
-                                       float *spacingVoxel) {
+void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
+                                       float4 *imageCuda,
+                                       const float *spacingVoxel) {
     auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    const int voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int)));
-
     for (int n = 0; n < 3; n++) {
         if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) {
             int radius = static_cast<int>(reg_ceil(2.0 * spacingVoxel[n]));
             int kernelSize = 1 + radius * 2;
 
-            float *kernel_h;
-            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float)));
+            float *kernel;
+            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float)));
 
             float coeffSum = 0;
             for (int it = -radius; it <= radius; it++) {
                 float coeff = (float)(fabs((float)(float)it / (float)spacingVoxel[0]));
-                if (coeff < 1.0) kernel_h[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff);
-                else if (coeff < 2.0) kernel_h[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0);
-                else kernel_h[it + radius] = 0;
-                coeffSum += kernel_h[it + radius];
+                if (coeff < 1.0) kernel[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff);
+                else if (coeff < 2.0) kernel[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0);
+                else kernel[it + radius] = 0;
+                coeffSum += kernel[it + radius];
             }
-            for (int it = 0; it < kernelSize; it++) kernel_h[it] /= coeffSum;
+            for (int it = 0; it < kernelSize; it++)
+                kernel[it] /= coeffSum;
+
+            float *kernelCuda;
+            NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float)));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
+            NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
 
-            float *kernel_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float)));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
-            NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h));
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float)));
+            auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                               voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+            auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear,
+                                                                kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
 
-            float4 *smoothedImage_d;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d, voxelNumber * sizeof(float4)));
-            NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4)));
+            float4 *smoothedImage;
+            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
 
-            unsigned Grid_reg_ApplyConvolutionWindow;
-            dim3 B, G;
+            unsigned grids, blocks;
+            dim3 blockDims, gridDims;
             switch (n) {
             case 0:
-                Grid_reg_ApplyConvolutionWindow =
-                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX));
-                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1);
-                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                _reg_ApplyConvolutionWindowAlongX_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
-                NR_CUDA_CHECK_KERNEL(G, B);
+                blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
+                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                gridDims = dim3(grids, grids, 1);
+                blockDims = dim3(blocks, 1, 1);
+                reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
+                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
             case 1:
-                Grid_reg_ApplyConvolutionWindow =
-                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY));
-                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1);
-                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                _reg_ApplyConvolutionWindowAlongY_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
-                NR_CUDA_CHECK_KERNEL(G, B);
+                blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
+                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                gridDims = dim3(grids, grids, 1);
+                blockDims = dim3(blocks, 1, 1);
+                reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
+                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
             case 2:
-                Grid_reg_ApplyConvolutionWindow =
-                    (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ));
-                B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1);
-                G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1);
-                _reg_ApplyConvolutionWindowAlongZ_kernel<<<G, B>>>(smoothedImage_d, kernelSize);
-                NR_CUDA_CHECK_KERNEL(G, B);
+                blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
+                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                gridDims = dim3(grids, grids, 1);
+                blockDims = dim3(blocks, 1, 1);
+                reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
+                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
             }
-
-            NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture));
-            NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture));
-            NR_CUDA_SAFE_CALL(cudaFree(kernel_d));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
-            NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d));
+            NR_CUDA_SAFE_CALL(cudaFree(kernelCuda));
+            NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
+            NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
         }
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(int num, float4 *array_d, float value) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float)));
-
-    const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
-    dim3 G = dim3(Grid_reg_multiplyValues, Grid_reg_multiplyValues, 1);
-    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
-    reg_multiplyValue_kernel_float4<<<G, B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G, B);
+void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const dim3 gridDims = dim3(grids, grids, 1);
+    const dim3 blockDims = dim3(blocks, 1, 1);
+    reg_multiplyValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addValue_gpu(int num, float4 *array_d, float value) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float)));
-
-    const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
-    dim3 G = dim3(Grid_reg_addValues, Grid_reg_addValues, 1);
-    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
-    reg_addValue_kernel_float4<<<G, B>>>(array_d);
-    NR_CUDA_CHECK_KERNEL(G, B);
+void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const dim3 gridDims = dim3(grids, grids, 1);
+    const dim3 blockDims = dim3(blocks, 1, 1);
+    reg_addValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
-
-    const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
-    dim3 G = dim3(Grid_reg_multiplyArrays, Grid_reg_multiplyArrays, 1);
-    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
-    reg_multiplyArrays_kernel_float4<<<G, B>>>(array1_d, array2_d);
-    NR_CUDA_CHECK_KERNEL(G, B);
+void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const dim3 gridDims = dim3(grids, grids, 1);
+    const dim3 blockDims = dim3(blocks, 1, 1);
+    reg_multiplyArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
-
-    const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
-    dim3 G = dim3(Grid_reg_addArrays, Grid_reg_addArrays, 1);
-    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
-    reg_addArrays_kernel_float4<<<G, B>>>(array1_d, array2_d);
-    NR_CUDA_CHECK_KERNEL(G, B);
+void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const dim3 gridDims = dim3(grids, grids, 1);
+    const dim3 blockDims = dim3(blocks, 1, 1);
+    reg_addArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_fillMaskArray_gpu(int num, int *array1_d) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
-
-    NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int)));
-
-    const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic));
-    dim3 G = dim3(Grid_reg_fillMaskArray, Grid_reg_fillMaskArray, 1);
-    dim3 B = dim3(blockSize->reg_arithmetic, 1, 1);
-    reg_fillMaskArray_kernel<<<G, B>>>(array1_d);
-    NR_CUDA_CHECK_KERNEL(G, B);
+void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
+    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const dim3 gridDims = dim3(grids, grids, 1);
+    const dim3 blockDims = dim3(blocks, 1, 1);
+    reg_fillMaskArray_kernel<<<gridDims, blockDims>>>(arrayCuda, (unsigned)count);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *array_d, size_t size) {
-    thrust::device_ptr<float> dptr(array_d);
+float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) {
+    thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *array_d, size_t size) {
-    thrust::device_ptr<float> dptr(array_d);
+float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) {
+    thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *array_d, size_t size) {
-    thrust::device_ptr<float> dptr(array_d);
+float reg_minReduction_gpu(float *arrayCuda, const size_t& size) {
+    thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index cccd33ef..dbd43398 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -27,42 +27,42 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
                                       const mat44 *voxelToMillimetre);
 /* *************************************************************** */
 extern "C++"
-void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz,
-                                                    nifti_image *controlPointImage,
-                                                    float4 *nodeNMIGradientArray_d);
+void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
+                                                    const nifti_image *controlPointImage,
+                                                    float4 *nmiGradientCuda);
 /* *************************************************************** */
 extern "C++"
-void reg_gaussianSmoothing_gpu(nifti_image *image,
-                               float4 *imageArray_d,
-                               float sigma,
-                               bool axisToSmooth[8]);
+void reg_gaussianSmoothing_gpu(const nifti_image *image,
+                               float4 *imageCuda,
+                               const float& sigma,
+                               const bool axisToSmooth[8]);
 /* *************************************************************** */
 extern "C++"
-void reg_smoothImageForCubicSpline_gpu(nifti_image *resultImage,
-                                       float4 *voxelNMIGradientArray_d,
-                                       float *smoothingRadius);
+void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
+                                       float4 *imageCuda,
+                                       const float *smoothingRadius);
 /* *************************************************************** */
 extern "C++"
-void reg_multiplyValue_gpu(int num, float4 *array_d, float value);
+void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
 /* *************************************************************** */
 extern "C++"
-void reg_addValue_gpu(int num, float4 *array_d, float value);
+void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
 /* *************************************************************** */
 extern "C++"
-void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d);
+void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
 extern "C++"
-void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d);
+void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
 extern "C++"
-void reg_fillMaskArray_gpu(int num, int *array1_d);
+void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count);
 /* *************************************************************** */
 extern "C++"
-float reg_sumReduction_gpu(float *array_d, size_t size);
+float reg_sumReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
 extern "C++"
-float reg_maxReduction_gpu(float *array_d, size_t size);
+float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
 extern "C++"
-float reg_minReduction_gpu(float *array_d, size_t size);
+float reg_minReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 112ec7b3..ac06be23 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -8,37 +8,8 @@
  *  See the LICENSE.txt file in the nifty_reg root folder
  */
 
-/* *************************************************************** */
-__device__ __constant__ int c_NodeNumber;
-__device__ __constant__ int c_VoxelNumber;
-__device__ __constant__ int3 c_TargetImageDim;
-__device__ __constant__ float3 c_VoxelNodeRatio;
-__device__ __constant__ int3 c_ControlPointImageDim;
-__device__ __constant__ int3 c_ImageDim;
-__device__ __constant__ float c_Weight;
-/* *************************************************************** */
-texture<float4, 1, cudaReadModeElementType> controlPointTexture;
-texture<float4, 1, cudaReadModeElementType> gradientImageTexture;
-texture<float4, 1, cudaReadModeElementType> matrixTexture;
-texture<float, 1, cudaReadModeElementType> convolutionKernelTexture;
-/* *************************************************************** */
-__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
-    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
-    out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
-    out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0;
-}
-/* *************************************************************** */
-__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) {
-    out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3];
-    out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3];
-    out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
-}
-/* *************************************************************** */
-__device__ __inline__ void div(const int num, const int denom, int& quot, int& rem) {
-    // This will be optimised by the compiler into a single div instruction
-    quot = num / denom;
-    rem = num % denom;
-}
+#include "_reg_common_cuda_kernels.cu"
+
 /* *************************************************************** */
 __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
                                                     cudaTextureObject_t voxelImageTexture,
@@ -54,9 +25,9 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
         float nodeCoord[3], voxelCoord[3], reorientedValue[3];
         // Calculate the node coordinates
         int quot, rem;
-        div(tid, nodeImageDims.x * nodeImageDims.y, quot, rem);
+        reg_div_cuda(tid, nodeImageDims.x * nodeImageDims.y, quot, rem);
         nodeCoord[2] = quot;
-        div(rem, nodeImageDims.x, quot, rem);
+        reg_div_cuda(rem, nodeImageDims.x, quot, rem);
         nodeCoord[1] = quot; nodeCoord[0] = rem;
         // Transform into voxel coordinates
         reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d);
@@ -100,46 +71,44 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
     }
 }
 /* *************************************************************** */
-__global__ void _reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_NodeNumber) {
-        float4 voxelGradient = gradient[tid];
+__global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < nodeNumber) {
+        const float4 voxelGradient = gradient[tid];
         float4 realGradient;
-        float4 matrix = tex1Dfetch(matrixTexture, 0);
-        realGradient.x = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
-        matrix = tex1Dfetch(matrixTexture, 1);
-        realGradient.y = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
-        matrix = tex1Dfetch(matrixTexture, 2);
-        realGradient.z = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z;
-
+        realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z;
+        realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z;
+        realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z;
         gradient[tid] = realGradient;
     }
 }
 /* *************************************************************** */
-__global__ void _reg_ApplyConvolutionWindowAlongX_kernel(float4 *smoothedImage, int windowSize) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        int3 imageSize = c_ImageDim;
-
-        int temp = tid;
-        const short z = (int)(temp / (imageSize.x * imageSize.y));
-        temp -= z * imageSize.x * imageSize.y;
-        const short y = (int)(temp / (imageSize.x));
-        short x = temp - y * (imageSize.x);
+__global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage,
+                                                        cudaTextureObject_t imageTexture,
+                                                        cudaTextureObject_t kernelTexture,
+                                                        const int kernelSize,
+                                                        const int3 imageSize,
+                                                        const unsigned voxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
+        reg_div_cuda(rem, imageSize.x, quot, rem);
+        int x = rem;
 
-        int radius = (windowSize - 1) / 2;
+        const int radius = (kernelSize - 1) / 2;
         int index = tid - radius;
         x -= radius;
 
-        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue{};
 
         // Kahan summation used here
-        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float3 c{}, Y, t;
         float windowValue;
-        for (int i = 0; i < windowSize; i++) {
+        for (int i = 0; i < kernelSize; i++) {
             if (-1 < x && x < imageSize.x) {
-                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
-                windowValue = tex1Dfetch(convolutionKernelTexture, i);
+                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
+                windowValue = tex1Dfetch<float>(kernelTexture, i);
 
                 Y.x = gradientValue.x * windowValue - c.x;
                 Y.y = gradientValue.y * windowValue - c.y;
@@ -159,28 +128,31 @@ __global__ void _reg_ApplyConvolutionWindowAlongX_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage, int windowSize) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        int3 imageSize = c_ImageDim;
-
-        const short z = (int)(tid / (imageSize.x * imageSize.y));
-        int index = tid - z * imageSize.x * imageSize.y;
-        short y = (int)(index / imageSize.x);
+__global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage,
+                                                        cudaTextureObject_t imageTexture,
+                                                        cudaTextureObject_t kernelTexture,
+                                                        const int kernelSize,
+                                                        const int3 imageSize,
+                                                        const unsigned voxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < voxelNumber) {
+        int quot, rem;
+        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
+        int y = rem / imageSize.x;
 
-        int radius = (windowSize - 1) / 2;
-        index = tid - imageSize.x * radius;
+        const int radius = (kernelSize - 1) / 2;
+        int index = tid - imageSize.x * radius;
         y -= radius;
 
-        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue{};
 
         // Kahan summation used here
-        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float3 c{}, Y, t;
         float windowValue;
-        for (int i = 0; i < windowSize; i++) {
+        for (int i = 0; i < kernelSize; i++) {
             if (-1 < y && y < imageSize.y) {
-                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
-                windowValue = tex1Dfetch(convolutionKernelTexture, i);
+                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
+                windowValue = tex1Dfetch<float>(kernelTexture, i);
 
                 Y.x = gradientValue.x * windowValue - c.x;
                 Y.y = gradientValue.y * windowValue - c.y;
@@ -200,26 +172,29 @@ __global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, int windowSize) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        int3 imageSize = c_ImageDim;
-
-        short z = (int)(tid / ((imageSize.x) * (imageSize.y)));
+__global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage,
+                                                        cudaTextureObject_t imageTexture,
+                                                        cudaTextureObject_t kernelTexture,
+                                                        const int kernelSize,
+                                                        const int3 imageSize,
+                                                        const unsigned voxelNumber) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < voxelNumber) {
+        int z = (int)tid / (imageSize.x * imageSize.y);
 
-        int radius = (windowSize - 1) / 2;
+        const int radius = (kernelSize - 1) / 2;
         int index = tid - imageSize.x * imageSize.y * radius;
         z -= radius;
 
-        float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        float4 finalValue{};
 
         // Kahan summation used here
-        float3 c = make_float3(0.f, 0.f, 0.f), Y, t;
+        float3 c{}, Y, t;
         float windowValue;
-        for (int i = 0; i < windowSize; i++) {
+        for (int i = 0; i < kernelSize; i++) {
             if (-1 < z && z < imageSize.z) {
-                float4 gradientValue = tex1Dfetch(gradientImageTexture, index);
-                windowValue = tex1Dfetch(convolutionKernelTexture, i);
+                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
+                windowValue = tex1Dfetch<float>(kernelTexture, i);
 
                 Y.x = gradientValue.x * windowValue - c.x;
                 Y.y = gradientValue.y * windowValue - c.y;
@@ -239,71 +214,67 @@ __global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float(float *array_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        array_d[tid] *= c_Weight;
-    }
+__global__ void reg_multiplyValue_kernel_float(float *array, const float value, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count)
+        array[tid] *= value;
 }
 /* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float4(float4 *array_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        float4 temp = array_d[tid];
-        array_d[tid] = make_float4(temp.x * c_Weight, temp.y * c_Weight, temp.z * c_Weight, temp.w * c_Weight);
+__global__ void reg_multiplyValue_kernel_float4(float4 *array, const float value, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count) {
+        const float4 temp = array[tid];
+        array[tid] = make_float4(temp.x * value, temp.y * value, temp.z * value, temp.w * value);
     }
 }
 /* *************************************************************** */
-__global__ void reg_addValue_kernel_float(float *array_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        array_d[tid] += c_Weight;
-    }
+__global__ void reg_addValue_kernel_float(float *array, const float value, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count)
+        array[tid] += value;
 }
 /* *************************************************************** */
-__global__ void reg_addValue_kernel_float4(float4 *array_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        float4 temp = array_d[tid];
-        array_d[tid] = make_float4(temp.x + c_Weight, temp.y + c_Weight, temp.z + c_Weight, temp.w + c_Weight);
+__global__ void reg_addValue_kernel_float4(float4 *array, const float value, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count) {
+        const float4 temp = array[tid];
+        array[tid] = make_float4(temp.x + value, temp.y + value, temp.z + value, temp.w + value);
     }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float(float *array1_d, float *array2_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        array1_d[tid] *= array2_d[tid];
-    }
+__global__ void reg_multiplyArrays_kernel_float(float *array1, float *array2, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count)
+        array1[tid] *= array2[tid];
 }
 /* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float4(float4 *array1_d, float4 *array2_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        float4 a = array1_d[tid];
-        float4 b = array1_d[tid];
-        array1_d[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
+__global__ void reg_multiplyArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count) {
+        const float4 a = array1[tid];
+        const float4 b = array2[tid];
+        array1[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
     }
 }
 /* *************************************************************** */
-__global__ void reg_addArrays_kernel_float(float *array1_d, float *array2_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        array1_d[tid] += array2_d[tid];
-    }
+__global__ void reg_addArrays_kernel_float(float *array1, float *array2, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count)
+        array1[tid] += array2[tid];
 }
 /* *************************************************************** */
-__global__ void reg_addArrays_kernel_float4(float4 *array1_d, float4 *array2_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber) {
-        float4 a = array1_d[tid];
-        float4 b = array1_d[tid];
-        array1_d[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+__global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count) {
+        const float4 a = array1[tid];
+        const float4 b = array2[tid];
+        array1[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
     }
 }
 /* *************************************************************** */
-__global__ void reg_fillMaskArray_kernel(int *array1_d) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_VoxelNumber)
-        array1_d[tid] = tid;
+__global__ void reg_fillMaskArray_kernel(int *array, const unsigned count) {
+    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (tid < count)
+        array[tid] = tid;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 1947f066..7579d2fa 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_blockMatching_gpu.cu
+ *  blockMatchingKernel.cu
  *
  *
  *  Created by Marc Modat and Pankaj Daga on 24/03/2009.
@@ -17,185 +17,161 @@
 #include <vector>
 #include "_reg_maths.h"
 
-//#define USE_TEST_KERNEL
+// #define USE_TEST_KERNEL
 ////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////
 /*
-*  before it was in the file _reg_blockMatching_kernels.cu
-*
-*
-*  Created by Marc Modat and Pankaj Daga on 24/03/2009.
+ *  before it was in the file _reg_blockMatching_kernels.cu
+ *
+ *
+ *  Created by Marc Modat and Pankaj Daga on 24/03/2009.
  *  Copyright (c) 2009-2018, University College London
  *  Copyright (c) 2018, NiftyReg Developers.
  *  All rights reserved.
-*  See the LICENSE.txt file in the nifty_reg root folder
-*
-*/
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
 // Some parameters that we need for the kernel execution.
 // The caller is supposed to ensure that the values are set
 
-// Number of blocks in each dimension
-__device__ __constant__ int3 c_BlockDim;
-__device__ __constant__ uint3 c_ImageSize;
-
 // Transformation matrix from nifti header
-__device__          __constant__ float4 t_m_a;
-__device__          __constant__ float4 t_m_b;
-__device__          __constant__ float4 t_m_c;
+__device__ __constant__ float4 t_m_a;
+__device__ __constant__ float4 t_m_b;
+__device__ __constant__ float4 t_m_c;
 
-#define BLOCK_WIDTH 4
-#define BLOCK_SIZE 64
-#define OVERLAP_SIZE 3
-#define STEP_SIZE 1
+#define BLOCK_WIDTH   4
+#define BLOCK_SIZE    64
+#define OVERLAP_SIZE  3
+#define STEP_SIZE     1
 
-
-texture<float, 1, cudaReadModeElementType> referenceImageArray_texture;
-texture<float, 1, cudaReadModeElementType> warpedImageArray_texture;
-texture<int, 1, cudaReadModeElementType> totalBlock_texture;
 /* *************************************************************** */
 template<class DataType>
-__inline__ __device__
-void reg2D_mat44_mul_cuda(float* mat, DataType const* in, DataType *out)
-{
-   out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]);
-   out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]);
-   return;
+__device__ __inline__ void reg2D_mat44_mul_cuda(const float *mat, const DataType *in, DataType *out) {
+    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]);
+    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]);
 }
 template<class DataType>
-__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out)
-{
-   out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-   out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-   out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
-   return;
+__device__ __inline__ void reg_mat44_mul_cuda(const float *mat, const DataType *in, DataType *out) {
+    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
+    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
+    out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
 }
 // Apply the transformation matrix
-__device__ inline void apply_affine(const float4 &pt, float * result)
-{
-   float4 mat = t_m_a;
-   result[0] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
-   mat = t_m_b;
-   result[1] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
-   mat = t_m_c;
-   result[2] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
+__device__ __inline__ void apply_affine(const float4& pt, float *result) {
+    float4 mat = t_m_a;
+    result[0] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
+    mat = t_m_b;
+    result[1] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
+    mat = t_m_c;
+    result[2] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w);
 }
 /* *************************************************************** */
-__inline__ __device__
-float blockReduce2DSum(float val, int tid)
-{
-   static __shared__ float shared[16];
-   shared[tid] = val;
-   __syncthreads();
-
-	for (unsigned i = 8; i > 0; i >>= 1){
-        if (tid < i) {
+__device__ __inline__ float blockReduce2DSum(float val, unsigned tid) {
+    static __shared__ float shared[16];
+    shared[tid] = val;
+    __syncthreads();
+
+    for (unsigned i = 8; i > 0; i >>= 1) {
+        if (tid < i)
             shared[tid] += shared[tid + i];
-        }
-		__syncthreads();
-	}
-	return shared[0];
+        __syncthreads();
+    }
+    return shared[0];
 }
 /* *************************************************************** */
-__inline__ __device__
-float blockReduceSum(float val, int tid)
-{
-   static __shared__ float shared[64];
-   shared[tid] = val;
-   __syncthreads();
-
-	for (unsigned i = 32; i > 0; i >>= 1){
-        if (tid < i) {
+__device__ __inline__ float blockReduceSum(float val, unsigned tid) {
+    static __shared__ float shared[64];
+    shared[tid] = val;
+    __syncthreads();
+
+    for (unsigned i = 32; i > 0; i >>= 1) {
+        if (tid < i)
             shared[tid] += shared[tid + i];
-        }
-		__syncthreads();
-	}
-	return shared[0];
+        __syncthreads();
+    }
+    return shared[0];
 }
 /* *************************************************************** */
 __global__ void blockMatchingKernel2D(float *warpedPosition,
                                       float *referencePosition,
-                                      int *mask,
-                                      float* referenceMatrix_xyz,
-                                      unsigned *definedBlock)
-{
-	extern __shared__ float sWarpedValues[];
-	// Compute the current block index
+                                      cudaTextureObject_t referenceTexture,
+                                      cudaTextureObject_t warpedTexture,
+                                      cudaTextureObject_t totalBlockTexture,
+                                      const int *mask,
+                                      const float* referenceMatrix,
+                                      unsigned *definedBlock,
+                                      const int3 imageSize,
+                                      const uint3 blockSize) {
+    extern __shared__ float sWarpedValues[];
+    // Compute the current block index
     const unsigned bid = blockIdx.y * gridDim.x + blockIdx.x;
 
-	const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid);
-	if (currentBlockIndex > -1) {
-
-		const unsigned idy = threadIdx.x;
-		const unsigned idx = threadIdx.y;
-		const unsigned tid = idy * 4 + idx;
-
-		const unsigned xImage = blockIdx.x * 4 + idx;
-		const unsigned yImage = blockIdx.y * 4 + idy;
-
-		//populate shared memory with resultImageArray's values
-		for (int y=-1; y<2; ++y) {
-			const int yImageIn = yImage + y * 4;
-			for (int x=-1; x<2; ++x) {
-				const int xImageIn = xImage + x * 4;
-
-				const int sharedIndex = ((y+1)*4+idy)*12+(x+1)*4+idx;
-
-				const int indexXYIn = yImageIn * c_ImageSize.x + xImageIn;
-
-				const bool valid =
-						(xImageIn > -1 && xImageIn < (int)c_ImageSize.x) &&
-						(yImageIn > -1 && yImageIn < (int)c_ImageSize.y);
-				sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ?
-							tex1Dfetch(warpedImageArray_texture, indexXYIn) : nanf("sNaN");
-			}
-		}
-
-		//for most cases we need this out of th loop
-		//value if the block is 4x4 NaN otherwise
-		const unsigned long voxIndex = yImage * c_ImageSize.x + xImage;
-		const bool referenceInBounds =
-				xImage < c_ImageSize.x &&
-				yImage < c_ImageSize.y;
-		float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
-					tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN");
-		const bool finiteReference = isfinite(rReferenceValue);
-		rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-		const unsigned referenceSize = __syncthreads_count(finiteReference);
-
-        float bestDisplacement[2] = {nanf("sNaN"), 0.0f};
-        float bestCC = 0;
+    const int currentBlockIndex = tex1Dfetch<int>(totalBlockTexture, bid);
+    if (currentBlockIndex > -1) {
+        const unsigned idy = threadIdx.x;
+        const unsigned idx = threadIdx.y;
+        const unsigned tid = idy * 4 + idx;
+        const unsigned xImage = blockIdx.x * 4 + idx;
+        const unsigned yImage = blockIdx.y * 4 + idy;
+
+        //populate shared memory with resultImageArray's values
+        for (int y = -1; y < 2; ++y) {
+            const int yImageIn = yImage + y * 4;
+            for (int x = -1; x < 2; ++x) {
+                const int xImageIn = xImage + x * 4;
+                const int sharedIndex = ((y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
+                const int indexXYIn = yImageIn * imageSize.x + xImageIn;
+                const bool valid =
+                    (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
+                    (yImageIn > -1 && yImageIn < (int)imageSize.y);
+                sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ?
+                    tex1Dfetch<float>(warpedTexture, indexXYIn) : nanf("sNaN");
+            }
+        }
 
-		if (referenceSize > 8) {
-			//the target values must remain constant throughout the block matching process
-			const float referenceMean = __fdividef(blockReduce2DSum(rReferenceValue, tid), referenceSize);
-			const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
-			const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid);
-			// iteration over the result blocks (block matching part)
-			for (unsigned y=1; y<8; ++y) {
-				for (unsigned x=1; x<8; ++x) {
+        //for most cases we need this out of th loop
+        //value if the block is 4x4 NaN otherwise
+        const unsigned long voxIndex = yImage * imageSize.x + xImage;
+        const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y;
+        float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
+            tex1Dfetch<float>(referenceTexture, voxIndex) : nanf("sNaN");
+        const bool finiteReference = isfinite(rReferenceValue);
+        rReferenceValue = finiteReference ? rReferenceValue : 0.f;
+        const unsigned referenceSize = __syncthreads_count(finiteReference);
+
+        float bestDisplacement[2] = { nanf("sNaN"), 0.0f };
+        float bestCC = 0;
 
-					const unsigned sharedIndex = ( y + idy ) * 12 + x + idx;
-					const float rWarpedValue = sWarpedValues[sharedIndex];
-					const bool overlap = isfinite(rWarpedValue) && finiteReference;
-					const unsigned warpedSize = __syncthreads_count(overlap);
+        if (referenceSize > 8) {
+            //the target values must remain constant throughout the block matching process
+            const float referenceMean = __fdividef(blockReduce2DSum(rReferenceValue, tid), referenceSize);
+            const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
+            const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid);
+            // iteration over the result blocks (block matching part)
+            for (unsigned y = 1; y < 8; ++y) {
+                for (unsigned x = 1; x < 8; ++x) {
+                    const unsigned sharedIndex = (y + idy) * 12 + x + idx;
+                    const float rWarpedValue = sWarpedValues[sharedIndex];
+                    const bool overlap = isfinite(rWarpedValue) && finiteReference;
+                    const unsigned warpedSize = __syncthreads_count(overlap);
 
                     if (warpedSize > 8) {
                         //the reference values must remain intact at each loop, so please do not touch this!
-						float newreferenceTemp = referenceTemp;
-						float newreferenceVar = referenceVar;
-						if (warpedSize != referenceSize){
-							const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-							const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize);
-							newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
-							newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid);
-						}
-
-						const float rChecked = overlap ? rWarpedValue : 0.0f;
-						const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize);
-						const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
-						const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid);
-
-						const float sumTargetResult = blockReduce2DSum((newreferenceTemp)* (warpedTemp), tid);
+                        float newreferenceTemp = referenceTemp;
+                        float newreferenceVar = referenceVar;
+                        if (warpedSize != referenceSize) {
+                            const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
+                            const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize);
+                            newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
+                            newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid);
+                        }
+
+                        const float rChecked = overlap ? rWarpedValue : 0.0f;
+                        const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize);
+                        const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
+                        const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid);
+
+                        const float sumTargetResult = blockReduce2DSum((newreferenceTemp) * (warpedTemp), tid);
                         const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0;
 
                         if (tid == 0 && localCC > bestCC) {
@@ -203,324 +179,292 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
                             bestDisplacement[0] = x - 4.f;
                             bestDisplacement[1] = y - 4.f;
                         }
-					}
-				}
-			}
-		}
-
-        if (tid==0){
-			const unsigned posIdx = 2 * currentBlockIndex;
-			const float referencePosition_temp[2] = {(float)xImage, (float)yImage};
-
-			bestDisplacement[0] += referencePosition_temp[0];
-			bestDisplacement[1] += referencePosition_temp[1];
-
-			reg2D_mat44_mul_cuda<float>(referenceMatrix_xyz, referencePosition_temp, &referencePosition[posIdx]);
-            reg2D_mat44_mul_cuda<float>(referenceMatrix_xyz, bestDisplacement, &warpedPosition[posIdx]);
-
-			if (isfinite(bestDisplacement[0])) {
-				atomicAdd(definedBlock, 1);
-			}
-		}
-	}
+                    }
+                }
+            }
+        }
+
+        if (tid == 0) {
+            const unsigned posIdx = 2 * currentBlockIndex;
+            const float referencePosition_temp[2] = { (float)xImage, (float)yImage };
+
+            bestDisplacement[0] += referencePosition_temp[0];
+            bestDisplacement[1] += referencePosition_temp[1];
+
+            reg2D_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
+            reg2D_mat44_mul_cuda<float>(referenceMatrix, bestDisplacement, &warpedPosition[posIdx]);
+
+            if (isfinite(bestDisplacement[0]))
+                atomicAdd(definedBlock, 1);
+        }
+    }
 }
 /* *************************************************************** */
 #ifdef USE_TEST_KERNEL
-__inline__ __device__
-float2 REDUCE_TEST(float* sData,
-                   float data,
-                   unsigned tid)
-{
-	sData[tid] = data;
-	__syncthreads();
-
-	bool seconHalf = tid > 63 ? true : false;
-	for (unsigned i = 32; i > 0; i >>= 1){
-		if (tid < i) sData[tid] += sData[tid + i];
-		if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i];
-		__syncthreads();
-	}
-
-	const float2 temp = make_float2(sData[0], sData[64]);
-	__syncthreads();
-	return temp;
+__device__ __inline__ float2 REDUCE_TEST(float* sData,
+                                         float data,
+                                         unsigned tid) {
+    sData[tid] = data;
+    __syncthreads();
+
+    bool seconHalf = tid > 63 ? true : false;
+    for (unsigned i = 32; i > 0; i >>= 1) {
+        if (tid < i) sData[tid] += sData[tid + i];
+        if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i];
+        __syncthreads();
+    }
+
+    const float2 temp = make_float2(sData[0], sData[64]);
+    __syncthreads();
+    return temp;
 }
 /* *************************************************************** */
 __global__ void blockMatchingKernel3D(float *warpedPosition,
                                       float *referencePosition,
-                                      int *mask,
-                                      float* referenceMatrix_xyz,
-                                      unsigned *definedBlock)
-{
-   extern __shared__ float sWarpedValues[];
-   float *sData = &sWarpedValues[12*12*16];
-
-   // Compute the current block index
-   const unsigned bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) *
-         gridDim.x + blockIdx.x;
-   const unsigned bid1 = bid0 + gridDim.x * gridDim.y;
-   int currentBlockIndex[2] = {tex1Dfetch(totalBlock_texture, bid0),
-                               tex1Dfetch(totalBlock_texture, bid1)};
-   currentBlockIndex[1] = (2*blockIdx.z+1)<c_BlockDim.z ? currentBlockIndex[1] : -1;
-   if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) {
-      const unsigned idx = threadIdx.x;
-      const unsigned idy = threadIdx.y;
-      const unsigned idz = threadIdx.z;
-      const unsigned tid = (idz*4+idy)*4+idx;
-      const unsigned xImage = blockIdx.x * 4 + idx;
-      const unsigned yImage = blockIdx.y * 4 + idy;
-      const unsigned zImage = blockIdx.z * 8 + idz;
-
-      //populate shared memory with resultImageArray's values
-      for (int z=-1 ; z<2; z+=2) {
-         const int zImageIn = zImage + z * 4;
-         for (int y=-1; y<2; ++y) {
-            const int yImageIn = yImage + y * 4;
-            for (int x=-1; x<2; ++x) {
-               const int xImageIn = xImage + x * 4;
-
-               const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx;
+                                      cudaTextureObject_t referenceTexture,
+                                      cudaTextureObject_t warpedTexture,
+                                      cudaTextureObject_t totalBlockTexture,
+                                      const int *mask,
+                                      const float* referenceMatrix,
+                                      unsigned *definedBlock,
+                                      const int3 imageSize,
+                                      const uint3 blockSize) {
+    extern __shared__ float sWarpedValues[];
+    float *sData = &sWarpedValues[12 * 12 * 16];
+
+    // Compute the current block index
+    const unsigned bid0 = (2 * blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
+    const unsigned bid1 = bid0 + gridDim.x * gridDim.y;
+    int currentBlockIndex[2]{ tex1Dfetch<int>(totalBlockTexture, bid0), tex1Dfetch<int>(totalBlockTexture, bid1) };
+    currentBlockIndex[1] = (2 * blockIdx.z + 1) < blockSize.z ? currentBlockIndex[1] : -1;
+    if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) {
+        const unsigned idx = threadIdx.x;
+        const unsigned idy = threadIdx.y;
+        const unsigned idz = threadIdx.z;
+        const unsigned tid = (idz * 4 + idy) * 4 + idx;
+        const unsigned xImage = blockIdx.x * 4 + idx;
+        const unsigned yImage = blockIdx.y * 4 + idy;
+        const unsigned zImage = blockIdx.z * 8 + idz;
+
+        //populate shared memory with resultImageArray's values
+        for (int z = -1; z < 2; z += 2) {
+            const int zImageIn = zImage + z * 4;
+            for (int y = -1; y < 2; ++y) {
+                const int yImageIn = yImage + y * 4;
+                for (int x = -1; x < 2; ++x) {
+                    const int xImageIn = xImage + x * 4;
+                    const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
+                    const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y);
+                    const bool valid =
+                        (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
+                        (yImageIn > -1 && yImageIn < (int)imageSize.y) &&
+                        (zImageIn > -1 && zImageIn < (int)imageSize.z);
+                    sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
+                        tex1Dfetch<float>(warpedTexture, indexXYZIn) : nanf("sNaN");
+                }
+            }
+        }
 
-               const unsigned indexXYZIn = xImageIn + c_ImageSize.x *
-                     (yImageIn + zImageIn * c_ImageSize.y);
+        const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage;
+        const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z;
+        float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
+            tex1Dfetch<float>(referenceTexture, voxIndex) : nanf("sNaN");
+        const bool finiteReference = isfinite(rReferenceValue);
+        rReferenceValue = finiteReference ? rReferenceValue : 0.f;
+        float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid);
+        const uint2 referenceSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y);
+
+        float2 bestValue{};
+        float bestDisp[2][3];
+        bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN");
+        if (referenceSize.x > 32 || referenceSize.y > 32) {
+            float2 referenceMean = REDUCE_TEST(sData, rReferenceValue, tid);
+            referenceMean.x /= (float)referenceSize.x;
+            referenceMean.y /= (float)referenceSize.y;
+            float referenceTemp;
+            if (tid > 63)
+                referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f;
+            else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f;
+            float2 referenceVar = REDUCE_TEST(sData, referenceTemp * referenceTemp, tid);
+
+            // iteration over the result blocks (block matching part)
+            for (unsigned z = 1; z < 8; ++z) {
+                for (unsigned y = 1; y < 8; ++y) {
+                    for (unsigned x = 1; x < 8; ++x) {
+                        const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx;
+                        const float rWarpedValue = sWarpedValues[sharedIndex];
+                        const bool overlap = isfinite(rWarpedValue) && finiteReference;
+                        tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid);
+                        const uint2 warpedSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y);
+
+                        if (warpedSize.x > 32 || warpedSize.y > 32) {
+                            float newreferenceTemp = referenceTemp;
+                            float2 newreferenceVar = referenceVar;
+                            if (warpedSize.x != referenceSize.x || warpedSize.y != referenceSize.y) {
+                                const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
+                                float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid);
+                                newReferenceMean.x /= (float)warpedSize.x;
+                                newReferenceMean.y /= (float)warpedSize.y;
+                                if (tid > 63)
+                                    referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f;
+                                else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f;
+                                newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid);
+                            }
+                            const float rChecked = overlap ? rWarpedValue : 0.0f;
+                            float2 warpedMean = REDUCE_TEST(sData, rChecked, tid);
+                            warpedMean.x /= (float)warpedSize.x;
+                            warpedMean.y /= (float)warpedSize.y;
+                            float warpedTemp;
+                            if (tid > 63)
+                                warpedTemp = overlap ? rChecked - warpedMean.y : 0.f;
+                            else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f;
+                            const float2 warpedVar = REDUCE_TEST(sData, warpedTemp * warpedTemp, tid);
+                            const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp * warpedTemp, tid);
+
+                            if (tid == 0 && warpedSize.x > 32) {
+                                const float localCC = fabs(sumTargetResult.x * rsqrtf(newreferenceVar.x * warpedVar.x));
+                                if (localCC > bestValue.x) {
+                                    bestValue.x = localCC;
+                                    bestDisp[0][0] = x - 4.f;
+                                    bestDisp[0][1] = y - 4.f;
+                                    bestDisp[0][2] = z - 4.f;
+                                }
+                            }
+                            if (tid == 64 && warpedSize.y > 32) {
+                                const float localCC = fabs(sumTargetResult.y * rsqrtf(newreferenceVar.y * warpedVar.y));
+                                if (localCC > bestValue.y) {
+                                    bestValue.y = localCC;
+                                    bestDisp[1][0] = x - 4.f;
+                                    bestDisp[1][1] = y - 4.f;
+                                    bestDisp[1][2] = z - 4.f;
+                                }
+                            }
+                            __syncthreads();
+                        }
+                    }
+                }
+            }
+        }
 
-               const bool valid =
-                     (xImageIn > -1 && xImageIn < (int)c_ImageSize.x) &&
-                     (yImageIn > -1 && yImageIn < (int)c_ImageSize.y) &&
-                     (zImageIn > -1 && zImageIn < (int)c_ImageSize.z);
-               sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
-                        tex1Dfetch(warpedImageArray_texture, indexXYZIn) : nanf("sNaN");
+        if (tid == 0 && currentBlockIndex[0] > -1) {
+            const unsigned posIdx = 3 * currentBlockIndex[0];
+            warpedPosition[posIdx] = NAN;
+            if (isfinite(bestDisp[0][0])) {
+                const float referencePosition_temp[3]{ (float)xImage, (float)yImage, (float)zImage };
+                bestDisp[0][0] += referencePosition_temp[0];
+                bestDisp[0][1] += referencePosition_temp[1];
+                bestDisp[0][2] += referencePosition_temp[2];
+                reg_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
+                reg_mat44_mul_cuda<float>(referenceMatrix, bestDisp[0], &warpedPosition[posIdx]);
+                atomicAdd(definedBlock, 1);
             }
-         }
-      }
-
-      const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) *
-            c_ImageSize.x + xImage;
-      const bool referenceInBounds =
-            xImage < c_ImageSize.x &&
-            yImage < c_ImageSize.y &&
-            zImage < c_ImageSize.z;
-      float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
-               tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN");
-      const bool finiteReference = isfinite(rReferenceValue);
-      rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-      float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid);
-      const uint2 referenceSize = make_uint2((uint)tempVal.x, (uint)tempVal.y);
-
-      float2 bestValue = make_float2(0.f, 0.f);
-      float bestDisp[2][3];
-      bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN");
-      if (referenceSize.x > 32 || referenceSize.y > 32) {
-         float2 referenceMean=REDUCE_TEST(sData, rReferenceValue, tid);
-         referenceMean.x /= (float)referenceSize.x;
-         referenceMean.y /= (float)referenceSize.y;
-         float referenceTemp;
-         if(tid>63)
-            referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f;
-         else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f;
-         float2 referenceVar = REDUCE_TEST(sData, referenceTemp*referenceTemp, tid);
-
-         // iteration over the result blocks (block matching part)
-         for (unsigned z=1; z<8; ++z) {
-            for (unsigned y=1; y<8; ++y) {
-               for (unsigned x=1; x<8; ++x) {
-
-                  const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
-                  const float rWarpedValue = sWarpedValues[sharedIndex];
-                  const bool overlap = isfinite(rWarpedValue) && finiteReference;
-                  tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid);
-                  const uint2 warpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y);
-
-                  if (warpedSize.x > 32 || warpedSize.y > 32) {
-
-                     float newreferenceTemp = referenceTemp;
-                     float2 newreferenceVar = referenceVar;
-                     if (warpedSize.x!=referenceSize.x || warpedSize.y!=referenceSize.y){
-                        const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-                        float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid);
-                        newReferenceMean.x /= (float)warpedSize.x;
-                        newReferenceMean.y /= (float)warpedSize.y;
-                        if(tid>63)
-                           referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f;
-                        else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f;
-                        newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid);
-                     }
-                     const float rChecked = overlap ? rWarpedValue : 0.0f;
-                     float2 warpedMean = REDUCE_TEST(sData, rChecked, tid);
-                     warpedMean.x /= (float)warpedSize.x;
-                     warpedMean.y /= (float)warpedSize.y;
-                     float warpedTemp;
-                     if(tid>63)
-                        warpedTemp = overlap ? rChecked - warpedMean.y : 0.f;
-                     else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f;
-                     const float2 warpedVar = REDUCE_TEST(sData, warpedTemp*warpedTemp, tid);
-                     const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp*warpedTemp, tid);
-
-                     if (tid==0 && warpedSize.x > 32 ){
-                        const float localCC = fabs(sumTargetResult.x *
-                                                   rsqrtf(newreferenceVar.x * warpedVar.x));
-                        if(localCC > bestValue.x) {
-                           bestValue.x = localCC;
-                           bestDisp[0][0] = x - 4.f;
-                           bestDisp[0][1] = y - 4.f;
-                           bestDisp[0][2] = z - 4.f;
-                        }
-                     }
-                     if (tid==64 && warpedSize.y > 32 ){
-                        const float localCC = fabs(sumTargetResult.y *
-                                                   rsqrtf(newreferenceVar.y * warpedVar.y));
-                        if(localCC > bestValue.y) {
-                           bestValue.y = localCC;
-                           bestDisp[1][0] = x - 4.f;
-                           bestDisp[1][1] = y - 4.f;
-                           bestDisp[1][2] = z - 4.f;
-                        }
-                     }
-                     __syncthreads();
-                  }
-               }
+        }
+        if (tid == 64 && currentBlockIndex[1] > -1) {
+            const unsigned posIdx = 3 * currentBlockIndex[1];
+            warpedPosition[posIdx] = NAN;
+            if (isfinite(bestDisp[1][0])) {
+                const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage };
+                bestDisp[1][0] += referencePosition_temp[0];
+                bestDisp[1][1] += referencePosition_temp[1];
+                bestDisp[1][2] += referencePosition_temp[2];
+                reg_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
+                reg_mat44_mul_cuda<float>(referenceMatrix, bestDisp[1], &warpedPosition[posIdx]);
+                atomicAdd(definedBlock, 1);
             }
-         }
-      }
-
-      if(tid==0 && currentBlockIndex[0]>-1){
-         const unsigned posIdx = 3 * currentBlockIndex[0];
-         warpedPosition[posIdx] = NAN;
-         if (isfinite(bestDisp[0][0])){
-            const float referencePosition_temp[3] = { (float)xImage,
-                                                      (float)yImage,
-                                                      (float)zImage};
-            bestDisp[0][0] += referencePosition_temp[0];
-            bestDisp[0][1] += referencePosition_temp[1];
-            bestDisp[0][2] += referencePosition_temp[2];
-            reg_mat44_mul_cuda<float>(referenceMatrix_xyz,
-                                      referencePosition_temp,
-                                      &referencePosition[posIdx]);
-            reg_mat44_mul_cuda<float>(referenceMatrix_xyz,
-                                      bestDisp[0],
-                  &warpedPosition[posIdx]);
-            atomicAdd(definedBlock, 1);
-         }
-      }
-      if(tid==64 && currentBlockIndex[1]>-1){
-         const unsigned posIdx = 3 * currentBlockIndex[1];
-         warpedPosition[posIdx] = NAN;
-         if (isfinite(bestDisp[1][0])){
-            const float referencePosition_temp[3] = {(float)xImage,
-                                                     (float)yImage,
-                                                     (float)zImage};
-            bestDisp[1][0] += referencePosition_temp[0];
-            bestDisp[1][1] += referencePosition_temp[1];
-            bestDisp[1][2] += referencePosition_temp[2];
-            reg_mat44_mul_cuda<float>(referenceMatrix_xyz,
-                                      referencePosition_temp,
-                                      &referencePosition[posIdx]);
-            reg_mat44_mul_cuda<float>(referenceMatrix_xyz,
-                                      bestDisp[1],
-                  &warpedPosition[posIdx]);
-            atomicAdd(definedBlock, 1);
-         }
-      }
-   }
+        }
+    }
 }
 #else
-
 /* *************************************************************** */
 __global__ void blockMatchingKernel3D(float *warpedPosition,
                                       float *referencePosition,
-                                      int *mask,
-                                      float* referenceMatrix_xyz,
-                                      unsigned *definedBlock)
-{
-	extern __shared__ float sWarpedValues[];
-	// Compute the current block index
-	const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ;
-
-	const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid);
-	if (currentBlockIndex > -1) {
-		const unsigned idx = threadIdx.x;
-		const unsigned idy = threadIdx.y;
-		const unsigned idz = threadIdx.z;
-		const unsigned tid = (idz*4+idy)*4+idx;
-		const unsigned xImage = blockIdx.x * 4 + idx;
-		const unsigned yImage = blockIdx.y * 4 + idy;
-		const unsigned zImage = blockIdx.z * 4 + idz;
-
-		//populate shared memory with resultImageArray's values
-		for (int z=-1 ; z<2; ++z) {
-			const int zImageIn = zImage + z * 4;
-			for (int y=-1; y<2; ++y) {
-				const int yImageIn = yImage + y * 4;
-				for (int x=-1; x<2; ++x) {
-					const int xImageIn = xImage + x * 4;
-
-					const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx;
-
-					const unsigned indexXYZIn = xImageIn + c_ImageSize.x *
-							(yImageIn + zImageIn * c_ImageSize.y);
-
-					const bool valid =
-							(xImageIn > -1 && xImageIn < (int)c_ImageSize.x) &&
-							(yImageIn > -1 && yImageIn < (int)c_ImageSize.y) &&
-							(zImageIn > -1 && zImageIn < (int)c_ImageSize.z);
-					sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
-								tex1Dfetch(warpedImageArray_texture, indexXYZIn) : nanf("sNaN");     //for some reason the mask here creates probs
-				}
-			}
-		}
-
-		//for most cases we need this out of th loop
-		//value if the block is 4x4x4 NaN otherwise
-		const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) *
-				c_ImageSize.x + xImage;
-		const bool referenceInBounds =
-				xImage < c_ImageSize.x &&
-				yImage < c_ImageSize.y &&
-				zImage < c_ImageSize.z;
-		float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
-					tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN");
-		const bool finiteReference = isfinite(rReferenceValue);
-		rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-		const unsigned referenceSize = __syncthreads_count(finiteReference);
-
-        float bestDisplacement[3] = {nanf("sNaN"), 0.0f, 0.0f };
+                                      cudaTextureObject_t referenceTexture,
+                                      cudaTextureObject_t warpedTexture,
+                                      cudaTextureObject_t totalBlockTexture,
+                                      const int *mask,
+                                      const float* referenceMatrix,
+                                      unsigned *definedBlock,
+                                      const int3 imageSize,
+                                      const uint3 blockSize) {
+    extern __shared__ float sWarpedValues[];
+    // Compute the current block index
+    const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
+
+    const int currentBlockIndex = tex1Dfetch<int>(totalBlockTexture, bid);
+    if (currentBlockIndex > -1) {
+        const unsigned idx = threadIdx.x;
+        const unsigned idy = threadIdx.y;
+        const unsigned idz = threadIdx.z;
+        const unsigned tid = (idz * 4 + idy) * 4 + idx;
+        const unsigned xImage = blockIdx.x * 4 + idx;
+        const unsigned yImage = blockIdx.y * 4 + idy;
+        const unsigned zImage = blockIdx.z * 4 + idz;
+
+        //populate shared memory with resultImageArray's values
+        for (int z = -1; z < 2; ++z) {
+            const int zImageIn = zImage + z * 4;
+            for (int y = -1; y < 2; ++y) {
+                const int yImageIn = yImage + y * 4;
+                for (int x = -1; x < 2; ++x) {
+                    const int xImageIn = xImage + x * 4;
+                    const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
+                    const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y);
+                    const bool valid =
+                        (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
+                        (yImageIn > -1 && yImageIn < (int)imageSize.y) &&
+                        (zImageIn > -1 && zImageIn < (int)imageSize.z);
+                    sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
+                        tex1Dfetch<float>(warpedTexture, indexXYZIn) : nanf("sNaN");     //for some reason the mask here creates probs
+                }
+            }
+        }
+
+        //for most cases we need this out of th loop
+        //value if the block is 4x4x4 NaN otherwise
+        const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage;
+        const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z;
+        float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
+            tex1Dfetch<float>(referenceTexture, voxIndex) : nanf("sNaN");
+        const bool finiteReference = isfinite(rReferenceValue);
+        rReferenceValue = finiteReference ? rReferenceValue : 0.f;
+        const unsigned referenceSize = __syncthreads_count(finiteReference);
+
+        float bestDisplacement[3] = { nanf("sNaN"), 0.0f, 0.0f };
         float bestCC = 0.0f;
 
-		if (referenceSize > 32) {
-			//the target values must remain constant throughout the block matching process
-			const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize);
-			const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
-			const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid);
-
-			// iteration over the result blocks (block matching part)
-			for (unsigned z=1; z<8; ++z) {
-				for (unsigned y=1; y<8; ++y) {
-					for (unsigned x=1; x<8; ++x) {
-
-						const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx;
-						const float rWarpedValue = sWarpedValues[sharedIndex];
-						const bool overlap = isfinite(rWarpedValue) && finiteReference;
-						const unsigned warpedSize = __syncthreads_count(overlap);
-
-						if (warpedSize > 32) {
-
-							//the target values must remain intact at each loop, so please do not touch this!
-							float newreferenceTemp = referenceTemp;
-							float newreferenceVar = referenceVar;
-							if (warpedSize != referenceSize){
-								const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-								const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize);
-								newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
-								newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid);
-							}
-
-							const float rChecked = overlap ? rWarpedValue : 0.0f;
-							const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize);
-							const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
-							const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid);
-
-							const float sumTargetResult = blockReduceSum((newreferenceTemp)* (warpedTemp), tid);
-                            const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0;
+        if (referenceSize > 32) {
+            //the target values must remain constant throughout the block matching process
+            const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize);
+            const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
+            const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid);
+
+            // iteration over the result blocks (block matching part)
+            for (unsigned z = 1; z < 8; ++z) {
+                for (unsigned y = 1; y < 8; ++y) {
+                    for (unsigned x = 1; x < 8; ++x) {
+                        const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx;
+                        const float rWarpedValue = sWarpedValues[sharedIndex];
+                        const bool overlap = isfinite(rWarpedValue) && finiteReference;
+                        const unsigned warpedSize = __syncthreads_count(overlap);
+
+                        if (warpedSize > 32) {
+                            //the target values must remain intact at each loop, so please do not touch this!
+                            float newreferenceTemp = referenceTemp;
+                            float newreferenceVar = referenceVar;
+                            if (warpedSize != referenceSize) {
+                                const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
+                                const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize);
+                                newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
+                                newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid);
+                            }
+
+                            const float rChecked = overlap ? rWarpedValue : 0.0f;
+                            const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize);
+                            const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
+                            const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid);
+
+                            const float sumTargetResult = blockReduceSum(newreferenceTemp * warpedTemp, tid);
+                            const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs(sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0;
 
                             if (tid == 0 && localCC > bestCC) {
                                 bestCC = localCC + 1.0e-7f;
@@ -528,115 +472,98 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                                 bestDisplacement[1] = y - 4.f;
                                 bestDisplacement[2] = z - 4.f;
                             }
-						}
-					}
-				}
-			}
-		}
-
-		if (tid==0) {
-			const unsigned posIdx = 3 * currentBlockIndex;
-			const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage };
-
-			bestDisplacement[0] += referencePosition_temp[0];
-			bestDisplacement[1] += referencePosition_temp[1];
-			bestDisplacement[2] += referencePosition_temp[2];
-
-			reg_mat44_mul_cuda<float>(referenceMatrix_xyz, referencePosition_temp, &referencePosition[posIdx]);
-			reg_mat44_mul_cuda<float>(referenceMatrix_xyz, bestDisplacement, &warpedPosition[posIdx]);
-			if (isfinite(bestDisplacement[0])) {
-				atomicAdd(definedBlock, 1);
-			}
-		}
-	}
+                        }
+                    }
+                }
+            }
+        }
+
+        if (tid == 0) {
+            const unsigned posIdx = 3 * currentBlockIndex;
+            const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage };
+
+            bestDisplacement[0] += referencePosition_temp[0];
+            bestDisplacement[1] += referencePosition_temp[1];
+            bestDisplacement[2] += referencePosition_temp[2];
+
+            reg_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
+            reg_mat44_mul_cuda<float>(referenceMatrix, bestDisplacement, &warpedPosition[posIdx]);
+            if (isfinite(bestDisplacement[0]))
+                atomicAdd(definedBlock, 1);
+        }
+    }
 }
 #endif
 /* *************************************************************** */
-void block_matching_method_gpu(nifti_image *targetImage,
-										 _reg_blockMatchingParam *params,
-										 float **targetImageArray_d,
-										 float **resultImageArray_d,
-										 float **referencePosition_d,
-										 float **warpedPosition_d,
-										 int **totalBlock_d,
-										 int **mask_d,
-										 float** referenceMat_d)
-{
-	// Copy some required parameters over to the device
-    uint3 imageSize = make_uint3(targetImage->nx,
-                                 targetImage->ny,
-                                 targetImage->nz);
-	uint3 blockSize = make_uint3(params->blockNumber[0],
-			params->blockNumber[1],
-			params->blockNumber[2]);
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(uint3)));
-	NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_BlockDim,&blockSize,sizeof(uint3)));
-
-	// Texture binding
-	const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, referenceImageArray_texture, *targetImageArray_d, targetImage->nvox * sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedImageArray_texture, *resultImageArray_d, targetImage->nvox * sizeof(float)));
-	NR_CUDA_SAFE_CALL(cudaBindTexture(0, totalBlock_texture, *totalBlock_d, numBlocks * sizeof(int)));
-
-	unsigned *definedBlock_d;
-	unsigned *definedBlock_h = (unsigned*) malloc(sizeof(unsigned));
-	*definedBlock_h = 0;
-	NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned)));
-	NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned), cudaMemcpyHostToDevice));
-
-
-	if (params->stepSize!=1 || params->voxelCaptureRange!=3){
-        reg_print_msg_error("The block Mathching CUDA kernel supports only a stepsize of 1");
-		reg_exit();
-	}
+void block_matching_method_gpu(const nifti_image *referenceImage,
+                               _reg_blockMatchingParam *params,
+                               const float *referenceImageCuda,
+                               const float *warpedImageCuda,
+                               float *referencePositionCuda,
+                               float *warpedPositionCuda,
+                               const int *totalBlockCuda,
+                               const int *maskCuda,
+                               const float *refMatCuda) {
+    if (params->stepSize != 1 || params->voxelCaptureRange != 3) {
+        reg_print_msg_error("The block matching CUDA kernel supports only single step size!");
+        reg_exit();
+    }
+
+    const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
+    const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
+
+    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
+                                                           cudaChannelFormatKindFloat, 1);
+    auto warpedTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
+                                                        cudaChannelFormatKindFloat, 1);
+    auto totalBlockTexture = cudaCommon_createTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int),
+                                                            cudaChannelFormatKindSigned, 1);
+
+    unsigned definedBlock = 0, *definedBlockCuda;
+    NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned)));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlockCuda, &definedBlock, sizeof(unsigned), cudaMemcpyHostToDevice));
 
 #ifdef USE_TEST_KERNEL
-	dim3 BlockDims1D(4,4,8);
-	dim3 BlocksGrid3D(
-				params->blockNumber[0],
-			params->blockNumber[1],
-			(unsigned)reg_ceil((float)params->blockNumber[2]/2.f));
-	unsigned sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float);
+    dim3 blockDims(4, 4, 8);
+    dim3 gridDims(params->blockNumber[0], params->blockNumber[1], (unsigned)reg_ceil((float)params->blockNumber[2] / 2.f));
+    unsigned sharedMemSize = (128 + 4 * 3 * 4 * 3 * 4 * 4) * sizeof(float);
 #else
-    dim3 BlockDims1D(4,4,4);
-    dim3 BlocksGrid3D(
-                params->blockNumber[0],
-            params->blockNumber[1],
-            params->blockNumber[2]);
-    unsigned sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3
+    dim3 blockDims(4, 4, 4);
+    dim3 gridDims(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
+    unsigned sharedMemSize = (64 + 4 * 3 * 4 * 3 * 4 * 3) * sizeof(float);  // (3*4)^3
 #endif
 
-	if (targetImage->nz == 1){
-		BlockDims1D.z=1;
-		BlocksGrid3D.z=1;
-		sMem = (16 + 144) * sizeof(float); // // (3*4)^2
-		blockMatchingKernel2D << <BlocksGrid3D, BlockDims1D, sMem >> >(*warpedPosition_d,
-																							*referencePosition_d,
-																							*mask_d,
-																							*referenceMat_d,
-																							definedBlock_d);
-	}
-	else {
-		blockMatchingKernel3D <<<BlocksGrid3D, BlockDims1D, sMem>>>(*warpedPosition_d,
-																						*referencePosition_d,
-																						*mask_d,
-																						*referenceMat_d,
-																						definedBlock_d);
-	}
-#ifndef NDEBUG
-    NR_CUDA_CHECK_KERNEL(BlocksGrid3D, BlockDims1D);
-        #else
-    NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-#endif
-
-	NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned), cudaMemcpyDeviceToHost));
-	params->definedActiveBlockNumber = *definedBlock_h;
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceImageArray_texture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedImageArray_texture));
-	NR_CUDA_SAFE_CALL(cudaUnbindTexture(totalBlock_texture));
-
-	free(definedBlock_h);
-	cudaFree(definedBlock_d);
-
+    if (referenceImage->nz == 1) {
+        blockDims.z = 1;
+        gridDims.z = 1;
+        sharedMemSize = (16 + 144) * sizeof(float);  // (3*4)^2
+        blockMatchingKernel2D<<<gridDims, blockDims, sharedMemSize>>>(warpedPositionCuda,
+                                                                      referencePositionCuda,
+                                                                      *referenceTexture,
+                                                                      *warpedTexture,
+                                                                      *totalBlockTexture,
+                                                                      maskCuda,
+                                                                      refMatCuda,
+                                                                      definedBlockCuda,
+                                                                      imageSize,
+                                                                      blockSize);
+    } else {
+        blockMatchingKernel3D<<<gridDims, blockDims, sharedMemSize>>>(warpedPositionCuda,
+                                                                      referencePositionCuda,
+                                                                      *referenceTexture,
+                                                                      *warpedTexture,
+                                                                      *totalBlockTexture,
+                                                                      maskCuda,
+                                                                      refMatCuda,
+                                                                      definedBlockCuda,
+                                                                      imageSize,
+                                                                      blockSize);
+    }
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+
+    NR_CUDA_SAFE_CALL(cudaMemcpy(&definedBlock, definedBlockCuda, sizeof(unsigned), cudaMemcpyDeviceToHost));
+    params->definedActiveBlockNumber = definedBlock;
+    NR_CUDA_SAFE_CALL(cudaFree(definedBlockCuda));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h
index dcf1452a..2692ab81 100644
--- a/reg-lib/cuda/blockMatchingKernel.h
+++ b/reg-lib/cuda/blockMatchingKernel.h
@@ -1,5 +1,5 @@
 /*
- *  _reg_blockMatching_gpu.h
+ *  blockMatchingKernel.h
  *
  *
  *  Created by Marc Modat and Pankaj Daga on 24/03/2009.
@@ -15,13 +15,25 @@
 #include "_reg_common_cuda.h"
 #include "_reg_blockMatching.h"
 
-// targetImage: The target/fixed/reference image.
-// resultImage: The warped/deformed/result image.
-// blockMatchingParam:
-// targetImageArray_d: The target/fixed/reference image on the device.
-// targetPosition_d: Output. The center of the blocks in the target image.
-// resultPosition_d: Output. The corresponding center of the blocks in the result.
-// activeBlock_d: Array specifying which blocks are active.
-
+/**
+ * @brief Block matching method
+ * @param referenceImage The reference image.
+ * @param params The block matching parameters.
+ * @param referenceImageCuda The reference image on the device.
+ * @param warpedImageCuda The warped image on the device.
+ * @param referencePositionCuda Output. The centre of the blocks in the reference image.
+ * @param warpedPositionCuda Output. The corresponding centre of the blocks in the result.
+ * @param totalBlockCuda Array specifying which blocks are active.
+ * @param maskCuda The mask image on the device.
+ * @param refMatCuda The reference image transformation matrix on the device.
+ */
 extern "C++"
-void block_matching_method_gpu(nifti_image *targetImage, _reg_blockMatchingParam *params, float **targetImageArray_d, float **resultImageArray_d, float **targetPosition_d, float **resultPosition_d, int **activeBlock_d, int **mask_d, float** targetMat_d);
+void block_matching_method_gpu(const nifti_image *referenceImage,
+                               _reg_blockMatchingParam *params,
+                               const float *referenceImageCuda,
+                               const float *warpedImageCuda,
+                               float *referencePositionCuda,
+                               float *warpedPositionCuda,
+                               const int *totalBlockCuda,
+                               const int *maskCuda,
+                               const float *refMatCuda);

From 46bb6c81f7ea57a53a891f2cb91b1d643f798cd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 11 Jul 2023 16:12:48 +0100
Subject: [PATCH 152/314] Fix a memory leak

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/Content.cpp        | 7 ++++---
 reg-lib/Content.h          | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 67f3f23e..e6a4f00b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-270
+271
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 145c9e1e..7db0847a 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -19,14 +19,15 @@ Content::Content(nifti_image *referenceIn,
     AllocateWarped();
     AllocateDeformationField(bytesIn);
     activeVoxelNumber = reference->nvox;
-    if (!referenceMask)
-        referenceMask = (int*)calloc(activeVoxelNumber, sizeof(int));
+    if (!referenceMask) {
+        referenceMaskManaged.reset(new int[activeVoxelNumber]());
+        referenceMask = referenceMaskManaged.get();
+    }
 }
 /* *************************************************************** */
 Content::~Content() {
     DeallocateWarped();
     DeallocateDeformationField();
-    // free(referenceMask); // TODO Fix this with smart pointers
 }
 /* *************************************************************** */
 void Content::AllocateWarped() {
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index f4c8f86a..8883acba 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "_reg_maths.h"
+#include "_reg_tools.h"
 
 class Content {
 public:
@@ -41,6 +41,7 @@ class Content {
     nifti_image *floating = nullptr;
     nifti_image *deformationField = nullptr;
     int *referenceMask = nullptr;
+    unique_ptr<int[]> referenceMaskManaged;
     mat44 *transformationMatrix = nullptr;
     nifti_image *warped = nullptr;
 

From c4258837753316d789ff474219e6bac2aa9eea45 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Fri, 14 Jul 2023 14:11:07 +0100
Subject: [PATCH 153/314] #92: block match & LTS CPU/CUDA regression tests

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/cpu/_reg_blockMatching.cpp       |  32 +++-
 reg-lib/cpu/_reg_blockMatching.h         |  10 +-
 reg-lib/cuda/CudaAladinContent.cpp       |  22 +--
 reg-lib/cuda/_reg_common_cuda.cu         |   3 +-
 reg-test/CMakeLists.txt                  |   5 +
 reg-test/reg_test_regr_blockMatching.cpp | 175 ++++++++++++++++++++++
 reg-test/reg_test_regr_lts.cpp           | 178 +++++++++++++++++++++++
 8 files changed, 408 insertions(+), 19 deletions(-)
 create mode 100644 reg-test/reg_test_regr_blockMatching.cpp
 create mode 100644 reg-test/reg_test_regr_lts.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e6a4f00b..31e9cf9a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-271
+272
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index b54ac854..9b2b8e21 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -15,6 +15,36 @@
 #include <map>
 #include <iostream>
 #include <cmath>
+
+_reg_blockMatchingParam::_reg_blockMatchingParam(_reg_blockMatchingParam *in)
+{
+   this->totalBlockNumber=in->totalBlockNumber;
+   this->dim=in->dim;
+   this->percent_to_keep=in->percent_to_keep;
+   this->activeBlockNumber=in->activeBlockNumber;
+   this->definedActiveBlockNumber=in->definedActiveBlockNumber;
+   this->stepSize=in->stepSize;
+   this->voxelCaptureRange=in->voxelCaptureRange;
+   this->blockNumber[0]=in->blockNumber[0];
+   this->blockNumber[1]=in->blockNumber[1];
+   this->blockNumber[2]=in->blockNumber[2];
+   this->totalBlock = (int *)malloc(this->totalBlockNumber * sizeof(int));
+   for(int i=0; i<this->totalBlockNumber; ++i)
+      this->totalBlock[i] = in->totalBlock[i];
+
+   this->referencePosition = (float *)malloc(this->activeBlockNumber * this->dim * sizeof(float));
+   this->warpedPosition = (float *)malloc(this->activeBlockNumber * this->dim * sizeof(float));
+   for(int i=0; i<this->activeBlockNumber*this->dim ; ++i){
+      this->referencePosition[i] = in->referencePosition[i];
+      this->warpedPosition[i] = in->warpedPosition[i];
+   }
+}
+_reg_blockMatchingParam::~_reg_blockMatchingParam()
+{
+   if (referencePosition) free(referencePosition);
+   if (warpedPosition) free(warpedPosition);
+   if (totalBlock) free(totalBlock);
+}
 /* *************************************************************** */
 template<class DataType>
 void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam *params, int *mask, bool runningOnGPU) {
@@ -247,7 +277,7 @@ void initialise_block_matching_method(nifti_image * reference,
    reg_print_msg_debug(text)
       #endif
          //params->activeBlock = (int *)malloc(params->activeBlockNumber * sizeof(int));
-         params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float));
+   params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float));
    params->warpedPosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float));
 
 #ifndef NDEBUG
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index 958c4bec..cedadd9b 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -69,12 +69,10 @@ struct _reg_blockMatchingParam
         stepSize(0)
    {}
 
-   ~_reg_blockMatchingParam()
-   {
-      if (referencePosition) free(referencePosition);
-      if (warpedPosition) free(warpedPosition);
-      if (totalBlock) free(totalBlock);
-   }
+   // Perform a deep copy
+   _reg_blockMatchingParam(_reg_blockMatchingParam *);
+
+   ~_reg_blockMatchingParam();
 };
 /* *************************************************************** */
 /** @brief This function initialise a _reg_blockMatchingParam structure
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index eccdb1ea..c389e367 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -35,6 +35,7 @@ CudaAladinContent::~CudaAladinContent() {
 void CudaAladinContent::InitVars() {
     referenceImageArray_d = nullptr;
     floatingImageArray_d = nullptr;
+    transformationMatrix_d = nullptr;
     warpedImageArray_d = nullptr;
     deformationFieldArray_d = nullptr;
     referencePosition_d = nullptr;
@@ -404,31 +405,33 @@ int* CudaAladinContent::GetFloatingDims() {
 }
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
-    if (transformationMatrix != nullptr)
+    if (transformationMatrix_d != nullptr)
         cudaCommon_free(transformationMatrix_d);
 
-    if (reference != nullptr) {
+    if (referenceImageArray_d != nullptr)
         cudaCommon_free(referenceImageArray_d);
+    if (referenceMat_d != nullptr)
         cudaCommon_free(referenceMat_d);
-    }
 
-    if (floating != nullptr) {
+    if (floatingImageArray_d != nullptr)
         cudaCommon_free(floatingImageArray_d);
+    if (floIJKMat_d != nullptr)
         cudaCommon_free(floIJKMat_d);
-    }
 
-    if (warped != nullptr)
+    if (warpedImageArray_d != nullptr)
         cudaCommon_free(warpedImageArray_d);
 
-    if (deformationField != nullptr)
+    if (deformationFieldArray_d != nullptr)
         cudaCommon_free(deformationFieldArray_d);
 
-    if (referenceMask != nullptr)
+    if (mask_d != nullptr)
         cudaCommon_free(mask_d);
 
-    if (blockMatchingParams != nullptr) {
+    if (totalBlock_d != nullptr)
         cudaCommon_free(totalBlock_d);
+    if (referencePosition_d != nullptr)
         cudaCommon_free(referencePosition_d);
+    if (warpedPosition_d != nullptr)
         cudaCommon_free(warpedPosition_d);
         /*
         cudaCommon_free(AR_d);
@@ -438,7 +441,6 @@ void CudaAladinContent::FreeCuPtrs() {
         cudaCommon_free(lengths_d);
         cudaCommon_free(newWarpedPos_d);
         */
-    }
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 26eefc07..35ec2db1 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -577,7 +577,8 @@ void cudaCommon_free(cudaArray *cuArray_d) {
 /* *************************************************************** */
 template <class DataType>
 void cudaCommon_free(DataType *array_d) {
-    NR_CUDA_SAFE_CALL(cudaFree(array_d));
+    if (array_d != nullptr)
+        NR_CUDA_SAFE_CALL(cudaFree(array_d));
 }
 template void cudaCommon_free<int>(int*);
 template void cudaCommon_free<float>(float*);
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 09c72cd7..3745e97c 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -117,6 +117,11 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
+if(USE_CUDA)
+  set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
+endif(USE_CUDA)
+
 
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
new file mode 100644
index 00000000..063b2b08
--- /dev/null
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -0,0 +1,175 @@
+#include "reg_test_common.h"
+#include "_reg_blockMatching.h"
+#include "CpuBlockMatchingKernel.h"
+#include "CudaBlockMatchingKernel.h"
+
+/*
+    This test file contains a regression test to ensure the CPU and GPU version yield the same output
+*/
+
+class BMTest {
+    /*
+    Class to call the block matching function
+    */
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int>;
+    using TestCase = std::tuple<std::string, _reg_blockMatchingParam *, _reg_blockMatchingParam *>;
+    inline static vector<TestCase> testCases;
+    NiftiImage reference2d;
+    NiftiImage floating2d;
+    NiftiImage reference3d;
+    NiftiImage floating3d;
+public:
+    ~BMTest() {
+        std::cout << "Calling destructor" << std::endl;
+    }
+    BMTest() {
+        std::cout << "Calling constructor" << std::endl;
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference and floating 2D images
+        NiftiImage::dim_t size = 64;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create a reference 3D image
+        dim.push_back(size);
+        this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Fill images with random values
+        float *ref2dPtr = static_cast<float *>(reference2d->data);
+        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x) {
+                *ref2dPtr++ = distr(gen);
+                *flo2dPtr++ = distr(gen);
+            }
+
+        // Fill images with random values
+        float *ref3dPtr = static_cast<float *>(reference3d->data);
+        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x) {
+                    *ref3dPtr++ = distr(gen);
+                    *flo3dPtr++ = distr(gen);
+                }
+
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        for(int b=50; b<=100; b+=50){
+            testData.emplace_back(TestData(
+                "BlockMatching 2D block " + std::to_string(b),
+                std::move(NiftiImage(this->reference2d)),
+                std::move(NiftiImage(this->floating2d)),
+                b
+            ));
+            testData.emplace_back(TestData(
+                "BlockMatching 3D block " + std::to_string(b),
+                std::move(NiftiImage(this->reference3d)),
+                std::move(NiftiImage(this->floating3d)),
+                b
+            ));
+        }
+
+        for (auto&& data : testData) {
+            unique_ptr<Platform> platformCPU{ new Platform(PlatformType::Cpu) };
+            unique_ptr<Platform> platformCUDA{ new Platform(PlatformType::Cuda) };
+            // Make a copy of the test data
+            auto&& [testName, reference, floating, block] = data;
+            // Create content creator
+            unique_ptr<AladinContentCreator> contentCreatorCPU{
+                dynamic_cast<AladinContentCreator*>(platformCPU->CreateContentCreator(ContentType::Aladin))
+            };
+            unique_ptr<AladinContentCreator> contentCreatorCUDA{
+                dynamic_cast<AladinContentCreator*>(platformCUDA->CreateContentCreator(ContentType::Aladin))
+            };
+            // Create the contents
+            unique_ptr<AladinContent> contentCPU{ contentCreatorCPU->Create(
+                NiftiImage(reference).disown(),
+                NiftiImage(floating).disown(),
+                nullptr,
+                nullptr,
+                sizeof(float),
+                100,
+                block,
+                1
+            )};
+            unique_ptr<AladinContent> contentCUDA{ contentCreatorCUDA->Create(
+                NiftiImage(reference).disown(),
+                NiftiImage(floating).disown(),
+                nullptr,
+                nullptr,
+                sizeof(float),
+                100,
+                block,
+                1
+            )};
+            // Initialise the warped image
+            contentCPU->SetWarped(NiftiImage(floating).disown());
+            contentCUDA->SetWarped(NiftiImage(floating).disown());
+            // Initialise the block matching
+            std::unique_ptr<Kernel> kernelCPU = nullptr;
+            kernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get()));
+            std::unique_ptr<Kernel> kernelCUDA = nullptr;
+            kernelCUDA.reset(platformCUDA->CreateKernel(BlockMatchingKernel::GetName(), contentCUDA.get()));
+
+            // run the computation
+            kernelCPU->template castTo<CpuBlockMatchingKernel>()->Calculate();
+            kernelCUDA->template castTo<CudaBlockMatchingKernel>()->Calculate();
+
+            // Retrieve the information
+            _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams());
+            _reg_blockMatchingParam *blockMatchingParamsCUDA = new _reg_blockMatchingParam(contentCUDA->GetBlockMatchingParams());
+
+            testCases.push_back({
+                testName,
+                blockMatchingParamsCPU,
+                blockMatchingParamsCUDA
+            });
+            contentCPU.reset();
+            contentCUDA.reset();
+        }
+    }
+};
+
+TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : this->testCases) {
+        // Retrieve test information
+        auto&& [testName, blockMatchingParamsCPU, blockMatchingParamsCUDA] = testCase;
+
+        SECTION(testName) {
+
+            // Ensure both approaches retreive the same number of voxel
+            REQUIRE(blockMatchingParamsCPU->activeBlockNumber==blockMatchingParamsCUDA->activeBlockNumber);
+
+            // Loop over the block and ensure all values are identical
+            for(int b=0; b<blockMatchingParamsCPU->activeBlockNumber*blockMatchingParamsCPU->dim; ++b){
+                float delta = blockMatchingParamsCPU->referencePosition[b] - blockMatchingParamsCUDA->referencePosition[b];
+                if(fabs(delta) > EPS){
+                    std::cout << "HERE " << delta << std::endl;
+                    std::cout.flush();
+                }
+                REQUIRE(fabs(delta) < EPS);
+                delta = blockMatchingParamsCPU->warpedPosition[b] - blockMatchingParamsCUDA->warpedPosition[b];
+                if(fabs(delta) > EPS){
+                    std::cout << "HERE " << delta << std::endl;
+                    std::cout.flush();
+                }
+                REQUIRE(fabs(delta) < EPS);
+            }
+            delete blockMatchingParamsCPU;
+            delete blockMatchingParamsCUDA;
+        }
+    }
+}
\ No newline at end of file
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
new file mode 100644
index 00000000..b610b72d
--- /dev/null
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -0,0 +1,178 @@
+#include "reg_test_common.h"
+#include "_reg_blockMatching.h"
+#include "CpuBlockMatchingKernel.h"
+
+#include "OptimiseKernel.h"
+#include "CpuOptimiseKernel.h"
+#include "CudaOptimiseKernel.h"
+
+/*
+    This test file contains a regression test to ensure the CPU and GPU version yield the same output
+*/
+
+class LTSTest {
+    /*
+    Class to call the LTS function
+    */
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int, int>;
+    using TestCase = std::tuple<std::string, mat44 *, mat44 *>;
+    inline static vector<TestCase> testCases;
+    NiftiImage reference2d;
+    NiftiImage floating2d;
+    NiftiImage reference3d;
+    NiftiImage floating3d;
+public:
+    ~LTSTest() {
+        std::cout << "Calling destructor" << std::endl;
+    }
+    LTSTest() {
+        std::cout << "Calling constructor" << std::endl;
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference and floating 2D images
+        NiftiImage::dim_t size = 64;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create a reference 3D image
+        dim.push_back(size);
+        this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+
+        // Fill images with random values
+        float *ref2dPtr = static_cast<float *>(reference2d->data);
+        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x) {
+                *ref2dPtr++ = distr(gen);
+                *flo2dPtr++ = distr(gen);
+            }
+
+        // Fill images with random values
+        float *ref3dPtr = static_cast<float *>(reference3d->data);
+        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x) {
+                    *ref3dPtr++ = distr(gen);
+                    *flo3dPtr++ = distr(gen);
+                }
+
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        for(int t=0; t<=1; ++t){
+            for(int i=20; i<=100; i+=20){
+                testData.emplace_back(TestData(
+                    "BlockMatching 2D type " + std::to_string(t) + " inlier " + std::to_string(i),
+                    std::move(NiftiImage(this->reference2d)),
+                    std::move(NiftiImage(this->floating2d)),
+                    t,
+                    i
+                ));
+                testData.emplace_back(TestData(
+                    "BlockMatching 3D type " + std::to_string(t) + " inlier " + std::to_string(i),
+                    std::move(NiftiImage(this->reference3d)),
+                    std::move(NiftiImage(this->floating3d)),
+                    t,
+                    i
+                ));
+            }
+        }
+
+        for (auto&& data : testData) {
+            unique_ptr<Platform> platformCPU{ new Platform(PlatformType::Cpu) };
+            unique_ptr<Platform> platformCUDA{ new Platform(PlatformType::Cuda) };
+            // Make a copy of the test data
+            auto&& [testName, reference, floating, ttype, inlier] = data;
+            // Create content creator
+            unique_ptr<AladinContentCreator> contentCreatorCPU{
+                dynamic_cast<AladinContentCreator*>(platformCPU->CreateContentCreator(ContentType::Aladin))
+            };
+            unique_ptr<AladinContentCreator> contentCreatorCUDA{
+                dynamic_cast<AladinContentCreator*>(platformCUDA->CreateContentCreator(ContentType::Aladin))
+            };
+            // Create identity transformations
+            mat44 *matCPU = new mat44; reg_mat44_eye(matCPU);
+            mat44 *matCUDA = new mat44; reg_mat44_eye(matCUDA);
+            // Create the contents
+            unique_ptr<AladinContent> contentCPU{ contentCreatorCPU->Create(
+                NiftiImage(reference).disown(),
+                NiftiImage(floating).disown(),
+                nullptr,
+                matCPU,
+                sizeof(float),
+                inlier,
+                100,
+                1
+            )};
+            unique_ptr<AladinContent> contentCUDA{ contentCreatorCUDA->Create(
+                NiftiImage(reference).disown(),
+                NiftiImage(floating).disown(),
+                nullptr,
+                matCUDA,
+                sizeof(float),
+                inlier,
+                100,
+                1
+            )};
+            // Initialise the warped image
+            contentCPU->SetWarped(NiftiImage(floating).disown());
+            contentCUDA->SetWarped(NiftiImage(floating).disown());
+
+            // Initialise the block matching and run it on the CPU
+            std::unique_ptr<Kernel> BMKernelCPU = nullptr;
+            BMKernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get()));
+            BMKernelCPU->template castTo<CpuBlockMatchingKernel>()->Calculate();
+
+            // Set the CUDA block matching parameteters
+            _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams());
+            contentCUDA->SetBlockMatchingParams(blockMatchingParamsCPU);
+
+            // Compute a transformations
+            std::unique_ptr<Kernel> kernelCPU = nullptr;
+            kernelCPU.reset(platformCPU->CreateKernel(OptimiseKernel::GetName(), contentCPU.get()));
+            kernelCPU->template castTo<CpuOptimiseKernel>()->Calculate(ttype);
+            std::unique_ptr<Kernel> kernelCUDA = nullptr;
+            kernelCUDA.reset(platformCUDA->CreateKernel(OptimiseKernel::GetName(), contentCUDA.get()));
+            kernelCUDA->template castTo<CudaOptimiseKernel>()->Calculate(ttype);
+
+            // Save the matrices for testing
+            testCases.push_back({
+                testName,
+                matCPU,
+                matCUDA
+            });
+            contentCPU.reset();
+            contentCUDA.reset();
+        }
+    }
+};
+
+TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : this->testCases) {
+        // Retrieve test information
+        auto&& [testName, mat_cpu, mat_cuda] = testCase;
+
+        SECTION(testName) {
+
+            // Loop over the matrix values and ensure they are identical
+            for(int j=0; j<4; ++j){
+                for(int i=0; i<4; ++i){
+                    REQUIRE(fabs(mat_cpu->m[i][j] - mat_cuda->m[i][j]) < EPS);
+                }
+            }
+            delete mat_cpu;
+            delete mat_cuda;
+        }
+    }
+}
\ No newline at end of file

From 9506815644a889c4f769da1039d808d9605f805a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 14 Jul 2023 19:19:55 +0100
Subject: [PATCH 154/314] Refactor the LTS regression test

---
 niftyreg_build_version.txt     |   2 +-
 reg-test/reg_test_regr_lts.cpp | 166 +++++++++++++++------------------
 2 files changed, 78 insertions(+), 90 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 31e9cf9a..305aa985 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-272
+273
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index b610b72d..162755b0 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -6,28 +6,19 @@
 #include "CpuOptimiseKernel.h"
 #include "CudaOptimiseKernel.h"
 
-/*
-    This test file contains a regression test to ensure the CPU and GPU version yield the same output
-*/
+/**
+ *  LTS regression test to ensure the CPU and CUDA versions yield the same output
+ */
 
 class LTSTest {
-    /*
-    Class to call the LTS function
-    */
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int, int>;
-    using TestCase = std::tuple<std::string, mat44 *, mat44 *>;
+    using TestCase = std::tuple<std::string, unique_ptr<mat44>, unique_ptr<mat44>>;
+
     inline static vector<TestCase> testCases;
-    NiftiImage reference2d;
-    NiftiImage floating2d;
-    NiftiImage reference3d;
-    NiftiImage floating3d;
+
 public:
-    ~LTSTest() {
-        std::cout << "Calling destructor" << std::endl;
-    }
     LTSTest() {
-        std::cout << "Calling constructor" << std::endl;
         if (!testCases.empty())
             return;
 
@@ -37,51 +28,54 @@ class LTSTest {
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
-        NiftiImage::dim_t size = 64;
+        constexpr NiftiImage::dim_t size = 64;
         vector<NiftiImage::dim_t> dim{ size, size };
-        this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create a reference 3D image
         dim.push_back(size);
-        this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
-        float *ref2dPtr = static_cast<float *>(reference2d->data);
-        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        const auto ref2dPtr = reference2d.data();
+        auto ref2dItr = ref2dPtr.begin();
+        const auto flo2dPtr = floating2d.data();
+        auto flo2dItr = flo2dPtr.begin();
         for (int y = 0; y < reference2d->ny; ++y)
             for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dPtr++ = distr(gen);
-                *flo2dPtr++ = distr(gen);
+                *ref2dItr++ = distr(gen);
+                *flo2dItr++ = distr(gen);
             }
 
         // Fill images with random values
-        float *ref3dPtr = static_cast<float *>(reference3d->data);
-        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        const auto ref3dPtr = reference3d.data();
+        auto ref3dItr = ref3dPtr.begin();
+        const auto flo3dPtr = floating3d.data();
+        auto flo3dItr = flo3dPtr.begin();
         for (int z = 0; z < reference3d->nz; ++z)
             for (int y = 0; y < reference3d->ny; ++y)
                 for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dPtr++ = distr(gen);
-                    *flo3dPtr++ = distr(gen);
+                    *ref3dItr++ = distr(gen);
+                    *flo3dItr++ = distr(gen);
                 }
 
-
         // Create the data container for the regression test
         vector<TestData> testData;
-        for(int t=0; t<=1; ++t){
-            for(int i=20; i<=100; i+=20){
+        for (int t = 0; t <= 1; ++t) {
+            for (int i = 20; i <= 100; i += 20) {
                 testData.emplace_back(TestData(
                     "BlockMatching 2D type " + std::to_string(t) + " inlier " + std::to_string(i),
-                    std::move(NiftiImage(this->reference2d)),
-                    std::move(NiftiImage(this->floating2d)),
+                    reference2d,
+                    floating2d,
                     t,
                     i
                 ));
                 testData.emplace_back(TestData(
                     "BlockMatching 3D type " + std::to_string(t) + " inlier " + std::to_string(i),
-                    std::move(NiftiImage(this->reference3d)),
-                    std::move(NiftiImage(this->floating3d)),
+                    reference3d,
+                    floating3d,
                     t,
                     i
                 ));
@@ -89,70 +83,62 @@ class LTSTest {
         }
 
         for (auto&& data : testData) {
-            unique_ptr<Platform> platformCPU{ new Platform(PlatformType::Cpu) };
-            unique_ptr<Platform> platformCUDA{ new Platform(PlatformType::Cuda) };
-            // Make a copy of the test data
+            // Get the test data
             auto&& [testName, reference, floating, ttype, inlier] = data;
-            // Create content creator
-            unique_ptr<AladinContentCreator> contentCreatorCPU{
-                dynamic_cast<AladinContentCreator*>(platformCPU->CreateContentCreator(ContentType::Aladin))
-            };
-            unique_ptr<AladinContentCreator> contentCreatorCUDA{
-                dynamic_cast<AladinContentCreator*>(platformCUDA->CreateContentCreator(ContentType::Aladin))
-            };
+
             // Create identity transformations
-            mat44 *matCPU = new mat44; reg_mat44_eye(matCPU);
-            mat44 *matCUDA = new mat44; reg_mat44_eye(matCUDA);
+            unique_ptr<mat44> matCpu{ new mat44 }; reg_mat44_eye(matCpu.get());
+            unique_ptr<mat44> matCuda{ new mat44 }; reg_mat44_eye(matCuda.get());
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage floatingCpu(floating), floatingCuda(floating);
+            NiftiImage warpedCpu(floating), warpedCuda(floating);
+
             // Create the contents
-            unique_ptr<AladinContent> contentCPU{ contentCreatorCPU->Create(
-                NiftiImage(reference).disown(),
-                NiftiImage(floating).disown(),
+            unique_ptr<AladinContent> contentCpu{ new AladinContent(
+                referenceCpu,
+                floatingCpu,
                 nullptr,
-                matCPU,
+                matCpu.get(),
                 sizeof(float),
                 inlier,
                 100,
                 1
-            )};
-            unique_ptr<AladinContent> contentCUDA{ contentCreatorCUDA->Create(
-                NiftiImage(reference).disown(),
-                NiftiImage(floating).disown(),
+            ) };
+            unique_ptr<AladinContent> contentCuda{ new CudaAladinContent(
+                referenceCuda,
+                floatingCuda,
                 nullptr,
-                matCUDA,
+                matCuda.get(),
                 sizeof(float),
                 inlier,
                 100,
                 1
-            )};
-            // Initialise the warped image
-            contentCPU->SetWarped(NiftiImage(floating).disown());
-            contentCUDA->SetWarped(NiftiImage(floating).disown());
+            ) };
+
+            // Initialise the warped images
+            contentCpu->SetWarped(warpedCpu.disown());
+            contentCuda->SetWarped(warpedCuda.disown());
 
             // Initialise the block matching and run it on the CPU
-            std::unique_ptr<Kernel> BMKernelCPU = nullptr;
-            BMKernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get()));
-            BMKernelCPU->template castTo<CpuBlockMatchingKernel>()->Calculate();
-
-            // Set the CUDA block matching parameteters
-            _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams());
-            contentCUDA->SetBlockMatchingParams(blockMatchingParamsCPU);
-
-            // Compute a transformations
-            std::unique_ptr<Kernel> kernelCPU = nullptr;
-            kernelCPU.reset(platformCPU->CreateKernel(OptimiseKernel::GetName(), contentCPU.get()));
-            kernelCPU->template castTo<CpuOptimiseKernel>()->Calculate(ttype);
-            std::unique_ptr<Kernel> kernelCUDA = nullptr;
-            kernelCUDA.reset(platformCUDA->CreateKernel(OptimiseKernel::GetName(), contentCUDA.get()));
-            kernelCUDA->template castTo<CudaOptimiseKernel>()->Calculate(ttype);
+            std::unique_ptr<BlockMatchingKernel> bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) };
+            bmKernelCpu->Calculate();
+
+            // Set the CUDA block matching parameters
+            _reg_blockMatchingParam *blockMatchingParamsCuda = new _reg_blockMatchingParam(contentCpu->GetBlockMatchingParams());
+            contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda);
+
+            // Initialise the optimise kernels
+            std::unique_ptr<OptimiseKernel> kernelCpu{ new CpuOptimiseKernel(contentCpu.get()) };
+            std::unique_ptr<OptimiseKernel> kernelCuda{ new CudaOptimiseKernel(contentCuda.get()) };
+
+            // Compute the transformations
+            kernelCpu->Calculate(ttype);
+            kernelCuda->Calculate(ttype);
 
             // Save the matrices for testing
-            testCases.push_back({
-                testName,
-                matCPU,
-                matCUDA
-            });
-            contentCPU.reset();
-            contentCUDA.reset();
+            testCases.push_back({ testName, std::move(matCpu), std::move(matCuda) });
         }
     }
 };
@@ -161,18 +147,20 @@ TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : this->testCases) {
         // Retrieve test information
-        auto&& [testName, mat_cpu, mat_cuda] = testCase;
+        auto&& [testName, matCpu, matCuda] = testCase;
 
         SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
 
             // Loop over the matrix values and ensure they are identical
-            for(int j=0; j<4; ++j){
-                for(int i=0; i<4; ++i){
-                    REQUIRE(fabs(mat_cpu->m[i][j] - mat_cuda->m[i][j]) < EPS);
+            for (int i = 0; i < 4; ++i) {
+                for (int j = 0; j < 4; ++j) {
+                    const auto mCpu = matCpu->m[i][j];
+                    const auto mCuda = matCuda->m[i][j];
+                    std::cout << i << " " << j << " " << mCpu << " " << mCuda << std::endl;
+                    REQUIRE(fabs(mCpu - mCuda) < EPS);
                 }
             }
-            delete mat_cpu;
-            delete mat_cuda;
         }
     }
-}
\ No newline at end of file
+}

From 9e85760519464fae50d8d32df1ea5b53a7da3299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 14 Jul 2023 19:21:00 +0100
Subject: [PATCH 155/314] Refactor the BlockMatching regression test

---
 niftyreg_build_version.txt               |   2 +-
 reg-test/reg_test_regr_blockMatching.cpp | 162 ++++++++++-------------
 2 files changed, 72 insertions(+), 92 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 305aa985..d4d5a4b7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-273
+274
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index 063b2b08..ca2392cf 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -3,28 +3,19 @@
 #include "CpuBlockMatchingKernel.h"
 #include "CudaBlockMatchingKernel.h"
 
-/*
-    This test file contains a regression test to ensure the CPU and GPU version yield the same output
-*/
+/**
+ *  Block matching regression test to ensure the CPU and CUDA versions yield the same output
+ */
 
 class BMTest {
-    /*
-    Class to call the block matching function
-    */
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int>;
-    using TestCase = std::tuple<std::string, _reg_blockMatchingParam *, _reg_blockMatchingParam *>;
+    using TestCase = std::tuple<std::string, unique_ptr<_reg_blockMatchingParam>, unique_ptr<_reg_blockMatchingParam>>;
+
     inline static vector<TestCase> testCases;
-    NiftiImage reference2d;
-    NiftiImage floating2d;
-    NiftiImage reference3d;
-    NiftiImage floating3d;
+
 public:
-    ~BMTest() {
-        std::cout << "Calling destructor" << std::endl;
-    }
     BMTest() {
-        std::cout << "Calling constructor" << std::endl;
         if (!testCases.empty())
             return;
 
@@ -34,110 +25,104 @@ class BMTest {
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
-        NiftiImage::dim_t size = 64;
+        constexpr NiftiImage::dim_t size = 64;
         vector<NiftiImage::dim_t> dim{ size, size };
-        this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create a reference 3D image
         dim.push_back(size);
-        this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
-        float *ref2dPtr = static_cast<float *>(reference2d->data);
-        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        const auto ref2dPtr = reference2d.data();
+        auto ref2dItr = ref2dPtr.begin();
+        const auto flo2dPtr = floating2d.data();
+        auto flo2dItr = flo2dPtr.begin();
         for (int y = 0; y < reference2d->ny; ++y)
             for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dPtr++ = distr(gen);
-                *flo2dPtr++ = distr(gen);
+                *ref2dItr++ = distr(gen);
+                *flo2dItr++ = distr(gen);
             }
 
         // Fill images with random values
-        float *ref3dPtr = static_cast<float *>(reference3d->data);
-        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        const auto ref3dPtr = reference3d.data();
+        auto ref3dItr = ref3dPtr.begin();
+        const auto flo3dPtr = floating3d.data();
+        auto flo3dItr = flo3dPtr.begin();
         for (int z = 0; z < reference3d->nz; ++z)
             for (int y = 0; y < reference3d->ny; ++y)
                 for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dPtr++ = distr(gen);
-                    *flo3dPtr++ = distr(gen);
+                    *ref3dItr++ = distr(gen);
+                    *flo3dItr++ = distr(gen);
                 }
 
-
         // Create the data container for the regression test
         vector<TestData> testData;
-        for(int b=50; b<=100; b+=50){
+        for (int b = 50; b <= 100; b += 50) {
             testData.emplace_back(TestData(
                 "BlockMatching 2D block " + std::to_string(b),
-                std::move(NiftiImage(this->reference2d)),
-                std::move(NiftiImage(this->floating2d)),
+                reference2d,
+                floating2d,
                 b
             ));
             testData.emplace_back(TestData(
                 "BlockMatching 3D block " + std::to_string(b),
-                std::move(NiftiImage(this->reference3d)),
-                std::move(NiftiImage(this->floating3d)),
+                reference3d,
+                floating3d,
                 b
             ));
         }
 
         for (auto&& data : testData) {
-            unique_ptr<Platform> platformCPU{ new Platform(PlatformType::Cpu) };
-            unique_ptr<Platform> platformCUDA{ new Platform(PlatformType::Cuda) };
-            // Make a copy of the test data
+            // Get the test data
             auto&& [testName, reference, floating, block] = data;
-            // Create content creator
-            unique_ptr<AladinContentCreator> contentCreatorCPU{
-                dynamic_cast<AladinContentCreator*>(platformCPU->CreateContentCreator(ContentType::Aladin))
-            };
-            unique_ptr<AladinContentCreator> contentCreatorCUDA{
-                dynamic_cast<AladinContentCreator*>(platformCUDA->CreateContentCreator(ContentType::Aladin))
-            };
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage floatingCpu(floating), floatingCuda(floating);
+            NiftiImage warpedCpu(floating), warpedCuda(floating);
+
             // Create the contents
-            unique_ptr<AladinContent> contentCPU{ contentCreatorCPU->Create(
-                NiftiImage(reference).disown(),
-                NiftiImage(floating).disown(),
+            unique_ptr<AladinContent> contentCpu{ new AladinContent(
+                referenceCpu,
+                floatingCpu,
                 nullptr,
                 nullptr,
                 sizeof(float),
                 100,
                 block,
                 1
-            )};
-            unique_ptr<AladinContent> contentCUDA{ contentCreatorCUDA->Create(
-                NiftiImage(reference).disown(),
-                NiftiImage(floating).disown(),
+            ) };
+            unique_ptr<AladinContent> contentCuda{ new CudaAladinContent(
+                referenceCuda,
+                floatingCuda,
                 nullptr,
                 nullptr,
                 sizeof(float),
                 100,
                 block,
                 1
-            )};
-            // Initialise the warped image
-            contentCPU->SetWarped(NiftiImage(floating).disown());
-            contentCUDA->SetWarped(NiftiImage(floating).disown());
+            ) };
+
+            // Initialise the warped images
+            contentCpu->SetWarped(warpedCpu.disown());
+            contentCuda->SetWarped(warpedCuda.disown());
+
             // Initialise the block matching
-            std::unique_ptr<Kernel> kernelCPU = nullptr;
-            kernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get()));
-            std::unique_ptr<Kernel> kernelCUDA = nullptr;
-            kernelCUDA.reset(platformCUDA->CreateKernel(BlockMatchingKernel::GetName(), contentCUDA.get()));
+            std::unique_ptr<BlockMatchingKernel> kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
+            std::unique_ptr<BlockMatchingKernel> kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) };
 
-            // run the computation
-            kernelCPU->template castTo<CpuBlockMatchingKernel>()->Calculate();
-            kernelCUDA->template castTo<CudaBlockMatchingKernel>()->Calculate();
+            // Do the computation
+            kernelCpu->Calculate();
+            kernelCuda->Calculate();
 
             // Retrieve the information
-            _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams());
-            _reg_blockMatchingParam *blockMatchingParamsCUDA = new _reg_blockMatchingParam(contentCUDA->GetBlockMatchingParams());
-
-            testCases.push_back({
-                testName,
-                blockMatchingParamsCPU,
-                blockMatchingParamsCUDA
-            });
-            contentCPU.reset();
-            contentCUDA.reset();
+            unique_ptr<_reg_blockMatchingParam> blockMatchingParamsCpu{ new _reg_blockMatchingParam(contentCpu->GetBlockMatchingParams()) };
+            unique_ptr<_reg_blockMatchingParam> blockMatchingParamsCuda{ new _reg_blockMatchingParam(contentCuda->GetBlockMatchingParams()) };
+
+            testCases.push_back({ testName, std::move(blockMatchingParamsCpu), std::move(blockMatchingParamsCuda) });
         }
     }
 };
@@ -146,30 +131,25 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : this->testCases) {
         // Retrieve test information
-        auto&& [testName, blockMatchingParamsCPU, blockMatchingParamsCUDA] = testCase;
+        auto&& [testName, blockMatchingParamsCpu, blockMatchingParamsCuda] = testCase;
 
         SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
 
-            // Ensure both approaches retreive the same number of voxel
-            REQUIRE(blockMatchingParamsCPU->activeBlockNumber==blockMatchingParamsCUDA->activeBlockNumber);
+            // Ensure both approaches retrieve the same number of voxels
+            REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber);
 
             // Loop over the block and ensure all values are identical
-            for(int b=0; b<blockMatchingParamsCPU->activeBlockNumber*blockMatchingParamsCPU->dim; ++b){
-                float delta = blockMatchingParamsCPU->referencePosition[b] - blockMatchingParamsCUDA->referencePosition[b];
-                if(fabs(delta) > EPS){
-                    std::cout << "HERE " << delta << std::endl;
-                    std::cout.flush();
-                }
-                REQUIRE(fabs(delta) < EPS);
-                delta = blockMatchingParamsCPU->warpedPosition[b] - blockMatchingParamsCUDA->warpedPosition[b];
-                if(fabs(delta) > EPS){
-                    std::cout << "HERE " << delta << std::endl;
-                    std::cout.flush();
-                }
-                REQUIRE(fabs(delta) < EPS);
+            for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber * (int)blockMatchingParamsCpu->dim; ++b) {
+                const auto refPosCpu = blockMatchingParamsCpu->referencePosition[b];
+                const auto refPosCuda = blockMatchingParamsCuda->referencePosition[b];
+                std::cout << "referencePosition: " << b << " " << refPosCpu << " " << refPosCuda << std::endl;
+                REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
+                const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[b];
+                const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[b];
+                std::cout << "warpedPosition: " << b << " " << warPosCpu << " " << warPosCuda << std::endl;
+                REQUIRE(fabs(warPosCpu - warPosCuda) < EPS);
             }
-            delete blockMatchingParamsCPU;
-            delete blockMatchingParamsCUDA;
         }
     }
-}
\ No newline at end of file
+}

From fdf30a41c45072766b88278eb9920eafd1475067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 17 Jul 2023 12:40:38 +0100
Subject: [PATCH 156/314] Refactor the LNCC test

---
 niftyreg_build_version.txt |   2 +-
 reg-test/reg_test_lncc.cpp | 350 +++++++++++++++++--------------------
 2 files changed, 162 insertions(+), 190 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d4d5a4b7..4c738e3f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-274
+275
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 1d84f86c..f6bd6cb3 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -7,15 +7,11 @@
 
 /*
     This test file contains the following unit tests:
-    test function: LNCC computation and its voxel wise gradient
+    test function: LNCC computation and its voxel-wise gradient
     In 2D and 3D
 */
 
 class LNCCTest {
-    /*
-    Class to compute the LNCC between two values without any convolution
-    Will take some time, don't judge me!!
-    */
 public:
     LNCCTest() {
         if (!testCases.empty())
@@ -26,156 +22,157 @@ class LNCCTest {
         std::mt19937 gen(rd());
         std::uniform_real_distribution<float> distr(0, 1);
 
-        // Create a reference and floating 2D images
+        // Create reference and floating 2D images
         vector<NiftiImage::dim_t> dim{ 16, 16 };
-        reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
 
-        // Create a reference 3D image
+        // Create reference and floating 3D images
         dim.push_back(16);
-        reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
-        floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create corresponding identify control point grids
-        cpp2d = CreateControlPointGrid(reference2d);
-        cpp3d = CreateControlPointGrid(reference3d);
+        NiftiImage cpp2d(CreateControlPointGrid(reference2d));
+        NiftiImage cpp3d(CreateControlPointGrid(reference3d));
 
         // Fill images with random values
-        float *ref2dPtr = static_cast<float *>(reference2d->data);
-        float *flo2dPtr = static_cast<float *>(floating2d->data);
+        const auto ref2dPtr = reference2d.data();
+        auto ref2dItr = ref2dPtr.begin();
+        const auto flo2dPtr = floating2d.data();
+        auto flo2dItr = flo2dPtr.begin();
         for (int y = 0; y < reference2d->ny; ++y)
             for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dPtr++ = distr(gen);
-                *flo2dPtr++ = distr(gen);
+                *ref2dItr++ = distr(gen);
+                *flo2dItr++ = distr(gen);
             }
 
         // Fill images with random values
-        float *ref3dPtr = static_cast<float *>(reference3d->data);
-        float *flo3dPtr = static_cast<float *>(floating3d->data);
+        const auto ref3dPtr = reference3d.data();
+        auto ref3dItr = ref3dPtr.begin();
+        const auto flo3dPtr = floating3d.data();
+        auto flo3dItr = flo3dPtr.begin();
         for (int z = 0; z < reference3d->nz; ++z)
             for (int y = 0; y < reference3d->ny; ++y)
                 for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dPtr++ = distr(gen);
-                    *flo3dPtr++ = distr(gen);
+                    *ref3dItr++ = distr(gen);
+                    *flo3dItr++ = distr(gen);
                 }
 
         // Create the object to compute the expected values
         vector<TestData> testData;
-        this->_ref = reference2d;
-        this->_flo = floating2d;
         testData.emplace_back(TestData(
             "LNCC 2D -1",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(floating2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            floating2d,
+            cpp2d,
             -1.f,
-            this->GetLNCCNoConv(1)
+            GetLNCCNoConv(1, reference2d, floating2d)
         ));
         testData.emplace_back(TestData(
             "LNCC 2D -1 same image",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            reference2d,
+            cpp2d,
             -1.f,
-            1.f
+            1.0
         ));
         testData.emplace_back(TestData(
             "LNCC 2D -5",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(floating2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            floating2d,
+            cpp2d,
             -5.f,
-            this->GetLNCCNoConv(5)
+            GetLNCCNoConv(5, reference2d, floating2d)
         ));
         testData.emplace_back(TestData(
             "LNCC 2D -5 same image",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            reference2d,
+            cpp2d,
             -5.f,
-            1.f
+            1.0
         ));
         reg_tools_multiplyValueToImage(reference2d, floating2d, -1.f);
         testData.emplace_back(TestData(
             "LNCC 2D -1 same image negated",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(floating2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            floating2d,
+            cpp2d,
             -1.f,
-            1.f
+            1.0
         ));
         testData.emplace_back(TestData(
             "LNCC 2D -5 same image negated",
-            std::move(NiftiImage(reference2d)),
-            std::move(NiftiImage(floating2d)),
-            std::move(NiftiImage(cpp2d)),
+            reference2d,
+            floating2d,
+            cpp2d,
             -5.f,
-            1.f
+            1.0
         ));
-        this->_ref = reference3d;
-        this->_flo = floating3d;
         testData.emplace_back(TestData(
             "LNCC 3D -1",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(floating3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            floating3d,
+            cpp3d,
             -1.f,
-            this->GetLNCCNoConv(1)
+            GetLNCCNoConv(1, reference3d, floating3d)
         ));
         testData.emplace_back(TestData(
             "LNCC 3D -1 same image",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            reference3d,
+            cpp3d,
             -1.f,
-            1.f
+            1.0
         ));
         testData.emplace_back(TestData(
             "LNCC 3D -5",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(floating3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            floating3d,
+            cpp3d,
             -5.f,
-            this->GetLNCCNoConv(5)
+            GetLNCCNoConv(5, reference3d, floating3d)
         ));
         testData.emplace_back(TestData(
             "LNCC 3D -5 same image",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            reference3d,
+            cpp3d,
             -5.f,
-            1.f
+            1.0
         ));
         reg_tools_multiplyValueToImage(reference3d, floating3d, -1.f);
         testData.emplace_back(TestData(
             "LNCC 3D -1 same image negated",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(floating3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            floating3d,
+            cpp3d,
             -1.f,
-            1.f
+            1.0
         ));
         testData.emplace_back(TestData(
             "LNCC 3D -5 same image negated",
-            std::move(NiftiImage(reference3d)),
-            std::move(NiftiImage(floating3d)),
-            std::move(NiftiImage(cpp3d)),
+            reference3d,
+            floating3d,
+            cpp3d,
             -5.f,
-            1.f
+            1.0
         ));
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
+                // Create the platform
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
                 auto td = data;
                 auto&& [testName, reference, floating, cpp, sigma, result] = td;
-                // Create content creator
+                // Create the content creator
                 unique_ptr<F3dContentCreator> contentCreator{
                     dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d))
                 };
                 // Create the content
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, floating, cpp) };
-                // Initialise the warped image using nearest neigh interpolation
+                // Initialise the warped image using the nearest-neighbour interpolation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(0, 0);
                 content->SetWarped(floating.disown());
@@ -192,40 +189,26 @@ class LNCCTest {
         }
     }
 
-    ~LNCCTest() {
-        if (this->_kernel != nullptr) delete[] this->_kernel;
-    }
-
 protected:
-    NiftiImage reference2d;
-    NiftiImage reference3d;
-    NiftiImage floating2d;
-    NiftiImage floating3d;
-    NiftiImage cpp2d;
-    NiftiImage cpp3d;
-    nifti_image *_ref = nullptr;
-    nifti_image *_flo = nullptr;
-    float *_kernel = nullptr;
-    float _kernelStdVoxel=5;
-    int _kernel_radius[3];
-    int _kernel_size[3];
-    using LocalStats = std::tuple<float, float>;
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, int, float>;
-    using TestCase = std::tuple<unique_ptr<Content>, unique_ptr<reg_lncc>, shared_ptr<Platform>, TestData>;
+    struct Kernel {
+        unique_ptr<float> ptr;
+        int radius[3];
+        int size[3];
+    };
 
+    using LocalStats = std::tuple<double, double>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, float, double>;
+    using TestCase = std::tuple<unique_ptr<Content>, unique_ptr<reg_lncc>, shared_ptr<Platform>, TestData>;
     inline static vector<TestCase> testCases;
 
-    float GetLNCCNoConv(int kernelStd) {
-        double lncc_value = 0;
+    double GetLNCCNoConv(int kernelStd, const NiftiImage& ref, const NiftiImage& flo) {
         // Compute the kernel
-        this->_kernelStdVoxel = fabs(kernelStd);
-        this->InitialiseKernel();
-        float lncc = 0;
-        float voxelNumber = 0;
-        for (int z = 0; z < this->_ref->nz; ++z) {
-            for (int y = 0; y < this->_ref->ny; ++y) {
-                for (int x = 0; x < this->_ref->nx; ++x) {
-                    lncc += fabs(this->GetLocalCC(x, y, z, this->GetLocalMeans(x, y, z)));
+        Kernel kernel = InitialiseKernel(ref, (float)abs(kernelStd));
+        double lncc = 0, voxelNumber = 0;
+        for (int z = 0; z < ref->nz; ++z) {
+            for (int y = 0; y < ref->ny; ++y) {
+                for (int x = 0; x < ref->nx; ++x) {
+                    lncc += abs(GetLocalCC(x, y, z, kernel, ref, flo, GetLocalMeans(x, y, z, kernel, ref, flo)));
                     voxelNumber++;
                 }
             }
@@ -233,128 +216,117 @@ class LNCCTest {
         return lncc / voxelNumber;
     }
 
-    void InitialiseKernel() {
-        if (this->_kernel != nullptr) {
-            delete[] this->_kernel;
-        }
-        this->_kernel_radius[0] = 3 * this->_kernelStdVoxel;
-        this->_kernel_radius[1] = 3 * this->_kernelStdVoxel;
-        this->_kernel_radius[2] = 0;
-        if (this->_ref->ndim > 2)
-            this->_kernel_radius[2] = 3 * this->_kernelStdVoxel;
-        this->_kernel_size[0] = this->_kernel_radius[0] * 2 + 1;
-        this->_kernel_size[1] = this->_kernel_radius[1] * 2 + 1;
-        this->_kernel_size[2] = this->_kernel_radius[2] * 2 + 1;
-        this->_kernel = new float[this->_kernel_size[0] *
-            this->_kernel_size[1] *
-            this->_kernel_size[2]];
-        float *kernelPtr = this->_kernel;
+    Kernel InitialiseKernel(const NiftiImage& ref, const float& kernelStdVoxel) {
+        Kernel kernel;
+        kernel.radius[0] = static_cast<int>(3.f * kernelStdVoxel);
+        kernel.radius[1] = static_cast<int>(3.f * kernelStdVoxel);
+        kernel.radius[2] = 0;
+        if (ref->ndim > 2)
+            kernel.radius[2] = static_cast<int>(3.f * kernelStdVoxel);
+        kernel.size[0] = kernel.radius[0] * 2 + 1;
+        kernel.size[1] = kernel.radius[1] * 2 + 1;
+        kernel.size[2] = kernel.radius[2] * 2 + 1;
+        kernel.ptr = unique_ptr<float>(new float[kernel.size[0] * kernel.size[1] * kernel.size[2]]);
+        float *kernelPtr = kernel.ptr.get();
 
-        for (int z = -this->_kernel_radius[2]; z <= this->_kernel_radius[2]; z++) {
-            float z_value = static_cast<float>(
-                exp(-(z * z) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
-                (this->_kernelStdVoxel * 2.506628274631)
-                );
-            for (int y = -this->_kernel_radius[1]; y <= this->_kernel_radius[1]; y++) {
-                float y_value = static_cast<float>(
-                    exp(-(y * y) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
-                    (this->_kernelStdVoxel * 2.506628274631)
-                    );
-                for (int x = -this->_kernel_radius[0]; x <= this->_kernel_radius[0]; x++) {
-                    float x_value = static_cast<float>(
-                        exp(-(x * x) / (2.0 * reg_pow2(this->_kernelStdVoxel))) /
-                        (this->_kernelStdVoxel * 2.506628274631)
-                        );
+        for (int z = -kernel.radius[2]; z <= kernel.radius[2]; z++) {
+            const float z_value = static_cast<float>(
+                exp(-(z * z) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
+            for (int y = -kernel.radius[1]; y <= kernel.radius[1]; y++) {
+                const float y_value = static_cast<float>(
+                    exp(-(y * y) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
+                for (int x = -kernel.radius[0]; x <= kernel.radius[0]; x++) {
+                    const float x_value = static_cast<float>(
+                        exp(-(x * x) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
                     *kernelPtr++ = x_value * y_value * z_value;
                 }
             }
         }
+
+        return kernel;
     }
 
-    LocalStats GetLocalMeans(int x, int y, int z) {
-        double mean_ref = 0.;
-        double mean_flo = 0.;
-        double sum_kernel = 0.;
-        float *kernelPtr = this->_kernel;
-        float *refPtr = static_cast<float *>(this->_ref->data);
-        float *floPtr = static_cast<float *>(this->_flo->data);
-        for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) {
+    LocalStats GetLocalMeans(const int& x, const int& y, const int& z, const Kernel& kernel,
+                             const NiftiImage& ref, const NiftiImage& flo) {
+        double meanRef = 0, meanFlo = 0, kernelSum = 0;
+        const float *kernelPtr = kernel.ptr.get();
+        const auto refPtr = ref.data();
+        const auto floPtr = flo.data();
+        for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) {
             int zz = z + k;
-            if (0 <= zz && zz < this->_ref->nz) {
-                for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) {
+            if (0 <= zz && zz < ref->nz) {
+                for (int j = -kernel.radius[1]; j <= kernel.radius[1]; j++) {
                     int yy = y + j;
-                    if (0 <= yy && yy < this->_ref->ny) {
-                        for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) {
+                    if (0 <= yy && yy < ref->ny) {
+                        for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) {
                             int xx = x + i;
-                            if (0 <= xx && xx < this->_ref->nx) {
-                                double kernelValue = *kernelPtr;
-                                int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx;
-                                mean_ref += kernelValue * refPtr[index];
-                                mean_flo += kernelValue * floPtr[index];
-                                sum_kernel += kernelValue;
+                            if (0 <= xx && xx < ref->nx) {
+                                const double& kernelValue = *kernelPtr;
+                                const int index = (zz * ref->ny + yy) * ref->nx + xx;
+                                meanRef += kernelValue * static_cast<double>(refPtr[index]);
+                                meanFlo += kernelValue * static_cast<double>(floPtr[index]);
+                                kernelSum += kernelValue;
                             }
                             kernelPtr++;
                         }
-                    } else kernelPtr += this->_kernel_size[0];
+                    } else kernelPtr += kernel.size[0];
                 }
-            } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1];
+            } else kernelPtr += kernel.size[0] * kernel.size[1];
         }
-        return LocalStats(mean_ref / sum_kernel, mean_flo / sum_kernel);
+        return LocalStats(meanRef / kernelSum, meanFlo / kernelSum);
     }
 
-    float GetLocalCC(int x, int y, int z, LocalStats means) {
-        float *kernelPtr = this->_kernel;
-        float *refPtr = static_cast<float *>(this->_ref->data);
-        float *floPtr = static_cast<float *>(this->_flo->data);
-        auto &&[mean_ref, mean_flo] = means;
-        double var_ref = 0.;
-        double var_flo = 0.;
-        double wdiff = 0.;
-        double sum_kernel = 0.;
-        for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) {
+    double GetLocalCC(const int& x, const int& y, const int& z, const Kernel& kernel,
+                      const NiftiImage& ref, const NiftiImage& flo, const LocalStats& means) {
+        const float *kernelPtr = kernel.ptr.get();
+        const auto refPtr = ref.data();
+        const auto floPtr = flo.data();
+        const auto& [meanRef, meanFlo] = means;
+        double varRef = 0, varFlo = 0, wdiff = 0, kernelSum = 0;
+        for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) {
             int zz = z + k;
-            if (0 <= zz && zz < this->_ref->nz) {
-                for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) {
+            if (0 <= zz && zz < ref->nz) {
+                for (int j = -kernel.radius[1]; j <= kernel.radius[1]; j++) {
                     int yy = y + j;
-                    if (0 <= yy && yy < this->_ref->ny) {
-                        for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) {
+                    if (0 <= yy && yy < ref->ny) {
+                        for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) {
                             int xx = x + i;
-                            if (0 <= xx && xx < this->_ref->nx) {
-                                int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx;
-                                float refValue = refPtr[index];
-                                float floValue = floPtr[index];
-                                float kernelValue = *kernelPtr;
-                                var_ref += kernelValue * (refValue - mean_ref) * (refValue - mean_ref);
-                                var_flo += kernelValue * (floValue - mean_flo) * (floValue - mean_flo);
-                                wdiff += kernelValue * (refValue - mean_ref) * (floValue - mean_flo);
-                                sum_kernel += kernelValue;
+                            if (0 <= xx && xx < ref->nx) {
+                                const int index = (zz * ref->ny + yy) * ref->nx + xx;
+                                const float refValue = refPtr[index];
+                                const float floValue = floPtr[index];
+                                const float kernelValue = *kernelPtr;
+                                varRef += kernelValue * (refValue - meanRef) * (refValue - meanRef);
+                                varFlo += kernelValue * (floValue - meanFlo) * (floValue - meanFlo);
+                                wdiff += kernelValue * (refValue - meanRef) * (floValue - meanFlo);
+                                kernelSum += kernelValue;
                             }
                             kernelPtr++;
                         }
-                    } else kernelPtr += this->_kernel_size[0];
+                    } else kernelPtr += kernel.size[0];
                 }
 
-            } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1];
+            } else kernelPtr += kernel.size[0] * kernel.size[1];
         }
-        var_ref /= sum_kernel;
-        var_flo /= sum_kernel;
-        wdiff /= sum_kernel;
-        return wdiff / (sqrtf(var_ref) * sqrtf(var_flo));
+        varRef /= kernelSum;
+        varFlo /= kernelSum;
+        wdiff /= kernelSum;
+        return wdiff / (sqrt(varRef) * sqrt(varFlo));
     }
 };
 
 TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") {
     // Loop over all generated test cases
-    for (auto&& testCase : this->testCases) {
+    for (auto&& testCase : testCases) {
         // Retrieve test information
         auto&& [content, measure, platform, testData] = testCase;
         auto&& [testName, reference, floating, cpp, sigma, value] = testData;
 
         SECTION(testName) {
-            float lncc = measure->GetSimilarityMeasureValue();
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            const double lncc = measure->GetSimilarityMeasureValue();
             std::cout << lncc << " " << value << std::endl;
             REQUIRE(fabs(lncc - value) < EPS);
-            content.reset();
         }
     }
-}
\ No newline at end of file
+}

From ab5ce4f9ea395a49cd5dd2a5ce394be065941a17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 17 Jul 2023 13:38:24 +0100
Subject: [PATCH 157/314] Rename OptimiseKernel to LtsKernel

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/AladinContent.h                       |   1 -
 reg-lib/CMakeLists.txt                        |   2 +-
 reg-lib/LtsKernel.h                           |  13 +++
 reg-lib/Measure.cpp                           |  90 ++++++++--------
 reg-lib/Measure.h                             |  24 ++---
 reg-lib/MeasureFactory.h                      |  16 +--
 reg-lib/OptimiseKernel.h                      |  13 ---
 reg-lib/_reg_aladin.cpp                       |   8 +-
 reg-lib/_reg_aladin.h                         |   4 +-
 reg-lib/_reg_aladin_sym.cpp                   |   6 +-
 reg-lib/_reg_aladin_sym.h                     |   2 +-
 reg-lib/_reg_base.cpp                         |  18 ++--
 reg-lib/_reg_base.h                           |   1 +
 reg-lib/_reg_f3d.cpp                          |   2 +
 reg-lib/cl/CMakeLists.txt                     |   2 +-
 reg-lib/cl/ClKernelFactory.cpp                |   4 +-
 .../{ClOptimiseKernel.cpp => ClLtsKernel.cpp} |   6 +-
 .../cl/{ClOptimiseKernel.h => ClLtsKernel.h}  |   8 +-
 reg-lib/cpu/CpuKernelFactory.cpp              |   4 +-
 ...CpuOptimiseKernel.cpp => CpuLtsKernel.cpp} |   6 +-
 .../{CpuOptimiseKernel.h => CpuLtsKernel.h}   |   6 +-
 reg-lib/cpu/_reg_nmi.cpp                      |  22 ++--
 reg-lib/cpu/_reg_optimiser.cpp                |   8 +-
 reg-lib/cpu/_reg_optimiser.h                  |   2 +-
 reg-lib/cuda/CMakeLists.txt                   |   2 +-
 reg-lib/cuda/CudaContext.cpp                  |   2 +-
 reg-lib/cuda/CudaKernelFactory.cpp            |   4 +-
 ...daOptimiseKernel.cpp => CudaLtsKernel.cpp} |   6 +-
 .../{CudaOptimiseKernel.h => CudaLtsKernel.h} |   6 +-
 reg-lib/cuda/CudaMeasure.cpp                  | 100 +++++++++---------
 reg-lib/cuda/CudaMeasure.h                    |  18 ++--
 reg-lib/cuda/CudaMeasureFactory.h             |  16 +--
 reg-lib/cuda/_reg_measure_gpu.h               |  15 +--
 reg-test/reg_test_regr_lts.cpp                |  10 +-
 35 files changed, 218 insertions(+), 231 deletions(-)
 create mode 100644 reg-lib/LtsKernel.h
 delete mode 100644 reg-lib/OptimiseKernel.h
 rename reg-lib/cl/{ClOptimiseKernel.cpp => ClLtsKernel.cpp} (79%)
 rename reg-lib/cl/{ClOptimiseKernel.h => ClLtsKernel.h} (56%)
 rename reg-lib/cpu/{CpuOptimiseKernel.cpp => CpuLtsKernel.cpp} (74%)
 rename reg-lib/cpu/{CpuOptimiseKernel.h => CpuLtsKernel.h} (68%)
 rename reg-lib/cuda/{CudaOptimiseKernel.cpp => CudaLtsKernel.cpp} (95%)
 rename reg-lib/cuda/{CudaOptimiseKernel.h => CudaLtsKernel.h} (84%)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4c738e3f..15007f1b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-275
+276
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index ca7e6cd4..5444cfd0 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -19,7 +19,6 @@ class AladinContent: public Content {
                   const unsigned percentageOfBlocks = 0,
                   const unsigned inlierLts = 0,
                   int blockStepSize = 0);
-
     virtual ~AladinContent();
 
     // Getters
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 4b20646d..b3318053 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -128,7 +128,7 @@ add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE}
   cpu/CpuAffineDeformationFieldKernel.cpp
   cpu/CpuBlockMatchingKernel.cpp
   cpu/CpuConvolutionKernel.cpp
-  cpu/CpuOptimiseKernel.cpp
+  cpu/CpuLtsKernel.cpp
   cpu/CpuResampleImageKernel.cpp
 )
 target_link_libraries(_reg_kernels
diff --git a/reg-lib/LtsKernel.h b/reg-lib/LtsKernel.h
new file mode 100644
index 00000000..139f6cf9
--- /dev/null
+++ b/reg-lib/LtsKernel.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "Kernel.h"
+
+class LtsKernel: public Kernel {
+public:
+    static std::string GetName() {
+        return "LtsKernel";
+    }
+    LtsKernel() : Kernel() {}
+    virtual ~LtsKernel() {}
+    virtual void Calculate(bool affine) = 0;
+};
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index e28b4314..4b463ba7 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -1,45 +1,45 @@
-#include "Measure.h"
-#include "_reg_nmi.h"
-#include "_reg_ssd.h"
-#include "_reg_dti.h"
-#include "_reg_lncc.h"
-#include "_reg_kld.h"
-#include "_reg_mind.h"
-
-/* *************************************************************** */
-reg_measure* Measure::Create(const MeasureType& measureType) {
-    switch (measureType) {
-    case MeasureType::Nmi:
-        return new reg_nmi();
-    case MeasureType::Ssd:
-        return new reg_ssd();
-    case MeasureType::Dti:
-        return new reg_dti();
-    case MeasureType::Lncc:
-        return new reg_lncc();
-    case MeasureType::Kld:
-        return new reg_kld();
-    case MeasureType::Mind:
-        return new reg_mind();
-    case MeasureType::Mindssc:
-        return new reg_mindssc();
-    }
-    reg_print_msg_error("Unsupported measure type");
-    reg_exit();
-    return nullptr;
-}
-/* *************************************************************** */
-void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
-    measure.InitialiseMeasure(con.GetReference(),
-                              con.GetFloating(),
-                              con.GetReferenceMask(),
-                              con.GetWarped(),
-                              con.GetWarpedGradient(),
-                              con.GetVoxelBasedMeasureGradient(),
-                              con.GetLocalWeightSim(),
-                              conBw ? conBw->GetReferenceMask() : nullptr,
-                              conBw ? conBw->GetWarped() : nullptr,
-                              conBw ? conBw->GetWarpedGradient() : nullptr,
-                              conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr);
-}
-/* *************************************************************** */
+#include "Measure.h"
+#include "_reg_nmi.h"
+#include "_reg_ssd.h"
+#include "_reg_dti.h"
+#include "_reg_lncc.h"
+#include "_reg_kld.h"
+#include "_reg_mind.h"
+
+/* *************************************************************** */
+reg_measure* Measure::Create(const MeasureType& measureType) {
+    switch (measureType) {
+    case MeasureType::Nmi:
+        return new reg_nmi();
+    case MeasureType::Ssd:
+        return new reg_ssd();
+    case MeasureType::Dti:
+        return new reg_dti();
+    case MeasureType::Lncc:
+        return new reg_lncc();
+    case MeasureType::Kld:
+        return new reg_kld();
+    case MeasureType::Mind:
+        return new reg_mind();
+    case MeasureType::Mindssc:
+        return new reg_mindssc();
+    }
+    reg_print_msg_error("Unsupported measure type");
+    reg_exit();
+    return nullptr;
+}
+/* *************************************************************** */
+void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
+    measure.InitialiseMeasure(con.GetReference(),
+                              con.GetFloating(),
+                              con.GetReferenceMask(),
+                              con.GetWarped(),
+                              con.GetWarpedGradient(),
+                              con.GetVoxelBasedMeasureGradient(),
+                              con.GetLocalWeightSim(),
+                              conBw ? conBw->GetReferenceMask() : nullptr,
+                              conBw ? conBw->GetWarped() : nullptr,
+                              conBw ? conBw->GetWarpedGradient() : nullptr,
+                              conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr);
+}
+/* *************************************************************** */
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
index d33c1757..04ff5bdd 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/Measure.h
@@ -1,12 +1,12 @@
-#pragma once
-
-#include "F3dContent.h"
-#include "_reg_measure.h"
-
-enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc };
-
-class Measure {
-public:
-    virtual reg_measure* Create(const MeasureType& measureType);
-    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr);
-};
+#pragma once
+
+#include "F3dContent.h"
+#include "_reg_measure.h"
+
+enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc };
+
+class Measure {
+public:
+    virtual reg_measure* Create(const MeasureType& measureType);
+    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr);
+};
diff --git a/reg-lib/MeasureFactory.h b/reg-lib/MeasureFactory.h
index f256794e..9c1927a9 100644
--- a/reg-lib/MeasureFactory.h
+++ b/reg-lib/MeasureFactory.h
@@ -1,8 +1,8 @@
-#pragma once
-
-#include "Measure.h"
-
-class MeasureFactory {
-public:
-    virtual Measure* Produce() { return new Measure(); }
-};
+#pragma once
+
+#include "Measure.h"
+
+class MeasureFactory {
+public:
+    virtual Measure* Produce() { return new Measure(); }
+};
diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h
deleted file mode 100644
index 8c65d5de..00000000
--- a/reg-lib/OptimiseKernel.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include "Kernel.h"
-
-class OptimiseKernel: public Kernel {
-public:
-    static std::string GetName() {
-        return "OptimiseKernel";
-    }
-    OptimiseKernel() : Kernel() {}
-    virtual ~OptimiseKernel() {}
-    virtual void Calculate(bool affine) = 0;
-};
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 01c8b13b..620ae212 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -336,10 +336,10 @@ void reg_aladin<T>::CreateKernels() {
     this->resamplingKernel.reset(platform->CreateKernel(ResampleImageKernel::GetName(), this->con.get()));
     if (this->blockMatchingParams) {
         this->blockMatchingKernel.reset(platform->CreateKernel(BlockMatchingKernel::GetName(), this->con.get()));
-        this->optimiseKernel.reset(platform->CreateKernel(OptimiseKernel::GetName(), this->con.get()));
+        this->ltsKernel.reset(platform->CreateKernel(LtsKernel::GetName(), this->con.get()));
     } else {
         this->blockMatchingKernel = nullptr;
-        this->optimiseKernel = nullptr;
+        this->ltsKernel = nullptr;
     }
 }
 /* *************************************************************** */
@@ -348,7 +348,7 @@ void reg_aladin<T>::DeallocateKernels() {
     this->affineTransformation3DKernel = nullptr;
     this->resamplingKernel = nullptr;
     this->blockMatchingKernel = nullptr;
-    this->optimiseKernel = nullptr;
+    this->ltsKernel = nullptr;
 }
 /* *************************************************************** */
 template<class T>
@@ -365,7 +365,7 @@ void reg_aladin<T>::GetWarpedImage(int interp, float padding) {
 template<class T>
 void reg_aladin<T>::UpdateTransformationMatrix(int type) {
     this->blockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
-    this->optimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
+    this->ltsKernel->template castTo<LtsKernel>()->Calculate(type);
 
 #ifndef NDEBUG
     reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix");
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index b1515195..f34f91f9 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -29,7 +29,7 @@
 #include "AffineDeformationFieldKernel.h"
 #include "ResampleImageKernel.h"
 #include "BlockMatchingKernel.h"
-#include "OptimiseKernel.h"
+#include "LtsKernel.h"
 #include "ConvolutionKernel.h"
 #include "AladinContent.h"
 
@@ -263,6 +263,6 @@ class reg_aladin {
     }
 
 private:
-    unique_ptr<Kernel> affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel;
+    unique_ptr<Kernel> affineTransformation3DKernel, blockMatchingKernel, ltsKernel, resamplingKernel;
     void ResolveMatrix(unsigned iterations, const unsigned optimizationFlag);
 };
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 2cafb89e..a29a772e 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -141,7 +141,7 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
 
     // Update now the backward transformation matrix
     this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
-    this->bOptimiseKernel->template castTo<OptimiseKernel>()->Calculate(type);
+    this->bLtsKernel->template castTo<LtsKernel>()->Calculate(type);
 
 #ifndef NDEBUG
     reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
@@ -194,7 +194,7 @@ void reg_aladin_sym<T>::CreateKernels() {
     this->bAffineTransformation3DKernel.reset(this->platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->backCon.get()));
     this->bBlockMatchingKernel.reset(this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon.get()));
     this->bResamplingKernel.reset(this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon.get()));
-    this->bOptimiseKernel.reset(this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon.get()));
+    this->bLtsKernel.reset(this->platform->CreateKernel(LtsKernel::GetName(), this->backCon.get()));
 }
 /* *************************************************************** */
 template <class T>
@@ -209,7 +209,7 @@ void reg_aladin_sym<T>::DeallocateKernels() {
     this->bResamplingKernel = nullptr;
     this->bAffineTransformation3DKernel = nullptr;
     this->bBlockMatchingKernel = nullptr;
-    this->bOptimiseKernel = nullptr;
+    this->bLtsKernel = nullptr;
 }
 /* *************************************************************** */
 template <class T>
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 58e71378..6da18e76 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -19,7 +19,7 @@ template <class T>
 class reg_aladin_sym: public reg_aladin<T> {
 private:
     unique_ptr<AladinContent> backCon;
-    unique_ptr<Kernel> bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bOptimiseKernel, bResamplingKernel;
+    unique_ptr<Kernel> bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bLtsKernel, bResamplingKernel;
 
     virtual void InitAladinContent(nifti_image *ref,
                                    nifti_image *flo,
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 308978df..87963d8a 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -293,7 +293,7 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::CheckParameters() {
-    // CHECK THAT BOTH INPUT IMAGES ARE DEFINED
+    // Check if both input images are defined
     if (!inputReference) {
         reg_print_fct_error("reg_base::CheckParameters()");
         reg_print_msg_error("The reference image is not defined");
@@ -305,7 +305,7 @@ void reg_base<T>::CheckParameters() {
         reg_exit();
     }
 
-    // CHECK THE MASK DIMENSION IF IT IS DEFINED
+    // Check the mask dimension if it is defined
     if (maskImage) {
         if (inputReference->nx != maskImage->nx ||
             inputReference->ny != maskImage->ny ||
@@ -316,14 +316,14 @@ void reg_base<T>::CheckParameters() {
         }
     }
 
-    // CHECK THE NUMBER OF LEVEL TO PERFORM
+    // Check the number of level to perform
     if (levelToPerform > 0) {
         levelToPerform = levelToPerform < levelNumber ? levelToPerform : levelNumber;
     } else levelToPerform = levelNumber;
     if (levelToPerform == 0 || levelToPerform > levelNumber)
         levelToPerform = levelNumber;
 
-    // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET
+    // Set the default similarity measure if none has been set
     if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc &&
         !measure_kld && !measure_mind && !measure_mindssc) {
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
@@ -331,14 +331,14 @@ void reg_base<T>::CheckParameters() {
             measure_nmi->SetTimepointWeight(i, 1.0);
     }
 
-    // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS)
-    // THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED
-    // AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL
-    // NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1
+    // Check that images have same number of channels (timepoints)
+    // that each channel has at least one similarity measure assigned
+    // and that each similarity measure is used for at least one channel
+    // Normalise channel and similarity weights so total = 1
     //
     // NOTE - DTI currently ignored as needs fixing
     //
-    // tests ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting
+    // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting
     if (!measure_mind && !measure_mindssc) {
         if (inputFloating->nt != inputReference->nt) {
             reg_print_fct_error("reg_base::CheckParameters()");
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index e912977b..7e62a37f 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -159,6 +159,7 @@ class reg_base: public InterfaceOptimiser {
     virtual void DoNotUseConjugateGradient();
     virtual void UseApproximatedGradient();
     virtual void DoNotUseApproximatedGradient();
+
     // Measure of similarity-related functions
     // virtual void ApproximateParzenWindow();
     // virtual void DoNotApproximateParzenWindow();
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index da1089f5..26530618 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -153,6 +153,7 @@ void reg_f3d<T>::DeinitCurrentLevel(int currentLevel) {
 template<class T>
 void reg_f3d<T>::CheckParameters() {
     reg_base<T>::CheckParameters();
+
     // Normalise the objective function weights
     if (strcmp(this->executableName, "NiftyReg F3D") == 0) {
         T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight;
@@ -164,6 +165,7 @@ void reg_f3d<T>::CheckParameters() {
             this->landmarkRegWeight /= penaltySum;
         } else this->similarityWeight = 1 - penaltySum;
     }
+
 #ifndef NDEBUG
     reg_print_fct_debug("reg_f3d<T>::CheckParameters");
 #endif
diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt
index c3ed44ad..b0328216 100755
--- a/reg-lib/cl/CMakeLists.txt
+++ b/reg-lib/cl/CMakeLists.txt
@@ -11,7 +11,7 @@ add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
   ClAffineDeformationFieldKernel.cpp
   ClBlockMatchingKernel.cpp
   ClConvolutionKernel.cpp
-  ClOptimiseKernel.cpp
+  ClLtsKernel.cpp
   ClResampleImageKernel.cpp
   ../AladinContent.cpp
 )
diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp
index d7d4fdd7..3511db2f 100644
--- a/reg-lib/cl/ClKernelFactory.cpp
+++ b/reg-lib/cl/ClKernelFactory.cpp
@@ -3,7 +3,7 @@
 #include "ClConvolutionKernel.h"
 #include "ClBlockMatchingKernel.h"
 #include "ClResampleImageKernel.h"
-#include "ClOptimiseKernel.h"
+#include "ClLtsKernel.h"
 #include "AladinContent.h"
 
 Kernel* ClKernelFactory::Produce(std::string name, Content *con) const {
@@ -11,6 +11,6 @@ Kernel* ClKernelFactory::Produce(std::string name, Content *con) const {
 	else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel();
 	else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con);
 	else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con);
-	else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con);
+	else if (name == LtsKernel::GetName()) return new ClLtsKernel(con);
 	else return nullptr;
 }
diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClLtsKernel.cpp
similarity index 79%
rename from reg-lib/cl/ClOptimiseKernel.cpp
rename to reg-lib/cl/ClLtsKernel.cpp
index a040e0f1..e15201e5 100644
--- a/reg-lib/cl/ClOptimiseKernel.cpp
+++ b/reg-lib/cl/ClLtsKernel.cpp
@@ -1,7 +1,7 @@
-#include "ClOptimiseKernel.h"
+#include "ClLtsKernel.h"
 
 /* *************************************************************** */
-ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() {
+ClLtsKernel::ClLtsKernel(Content *conIn) : LtsKernel() {
     //populate the ClAladinContent object ptr
     ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
@@ -10,7 +10,7 @@ ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() {
     blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
 }
 /* *************************************************************** */
-void ClOptimiseKernel::Calculate(bool affine) {
+void ClLtsKernel::Calculate(bool affine) {
     optimize(blockMatchingParams, transformationMatrix, affine);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClLtsKernel.h
similarity index 56%
rename from reg-lib/cl/ClOptimiseKernel.h
rename to reg-lib/cl/ClLtsKernel.h
index 527a5bee..b0ce0b13 100644
--- a/reg-lib/cl/ClOptimiseKernel.h
+++ b/reg-lib/cl/ClLtsKernel.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "OptimiseKernel.h"
+#include "LtsKernel.h"
 #include "ClAladinContent.h"
 
-class ClOptimiseKernel: public OptimiseKernel {
+class ClLtsKernel: public LtsKernel {
 public:
-    ClOptimiseKernel(Content *con);
-    ~ClOptimiseKernel() {}
+    ClLtsKernel(Content *con);
+    ~ClLtsKernel() {}
     void Calculate(bool affine);
 
 private:
diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp
index 4ef1612c..e82255e2 100644
--- a/reg-lib/cpu/CpuKernelFactory.cpp
+++ b/reg-lib/cpu/CpuKernelFactory.cpp
@@ -3,7 +3,7 @@
 #include "CpuConvolutionKernel.h"
 #include "CpuBlockMatchingKernel.h"
 #include "CpuResampleImageKernel.h"
-#include "CpuOptimiseKernel.h"
+#include "CpuLtsKernel.h"
 #include "AladinContent.h"
 
 Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const {
@@ -11,6 +11,6 @@ Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const {
 	else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel();
 	else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con);
 	else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con);
-	else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con);
+	else if (name == LtsKernel::GetName()) return new CpuLtsKernel(con);
 	else return nullptr;
 }
diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuLtsKernel.cpp
similarity index 74%
rename from reg-lib/cpu/CpuOptimiseKernel.cpp
rename to reg-lib/cpu/CpuLtsKernel.cpp
index f7874795..c66d2300 100644
--- a/reg-lib/cpu/CpuOptimiseKernel.cpp
+++ b/reg-lib/cpu/CpuLtsKernel.cpp
@@ -1,13 +1,13 @@
-#include "CpuOptimiseKernel.h"
+#include "CpuLtsKernel.h"
 
 /* *************************************************************** */
-CpuOptimiseKernel::CpuOptimiseKernel(Content *conIn) : OptimiseKernel() {
+CpuLtsKernel::CpuLtsKernel(Content *conIn) : LtsKernel() {
     AladinContent *con = static_cast<AladinContent*>(conIn);
     transformationMatrix = con->GetTransformationMatrix();
     blockMatchingParams = con->GetBlockMatchingParams();
 }
 /* *************************************************************** */
-void CpuOptimiseKernel::Calculate(bool affine) {
+void CpuLtsKernel::Calculate(bool affine) {
     optimize(blockMatchingParams, transformationMatrix, affine);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuLtsKernel.h
similarity index 68%
rename from reg-lib/cpu/CpuOptimiseKernel.h
rename to reg-lib/cpu/CpuLtsKernel.h
index df9865b2..8bb4c26e 100644
--- a/reg-lib/cpu/CpuOptimiseKernel.h
+++ b/reg-lib/cpu/CpuLtsKernel.h
@@ -1,13 +1,13 @@
 #pragma once
 
-#include "OptimiseKernel.h"
+#include "LtsKernel.h"
 #include "_reg_blockMatching.h"
 #include "niftilib/nifti1_io.h"
 #include "AladinContent.h"
 
-class CpuOptimiseKernel: public OptimiseKernel {
+class CpuLtsKernel: public LtsKernel {
 public:
-    CpuOptimiseKernel(Content *con);
+    CpuLtsKernel(Content *con);
     void Calculate(bool affine);
 
 private:
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 9ecdb6af..0eec06ed 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -12,8 +12,7 @@
 
 #include "_reg_nmi.h"
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
 reg_nmi::reg_nmi(): reg_measure() {
     this->forwardJointHistogramPro = nullptr;
     this->forwardJointHistogramLog = nullptr;
@@ -31,7 +30,6 @@ reg_nmi::reg_nmi(): reg_measure() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 reg_nmi::~reg_nmi() {
     this->DeallocateHistogram();
 #ifndef NDEBUG
@@ -103,7 +101,6 @@ void reg_nmi::DeallocateHistogram() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
                                 nifti_image *floImgPtr,
                                 int *maskRefPtr,
@@ -188,7 +185,6 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class PrecisionType>
 PrecisionType GetBasisSplineValue(PrecisionType x) {
     x = fabs(x);
@@ -220,7 +216,6 @@ PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
     return value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_getNMIValue(nifti_image *referenceImage,
                      nifti_image *warpedImage,
@@ -371,11 +366,9 @@ void reg_getNMIValue(nifti_image *referenceImage,
         } // if active time point
     } // iterate over all time point in the reference image
 }
-/* *************************************************************** */
 template void reg_getNMIValue<float>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
 template void reg_getNMIValue<double>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
 /* *************************************************************** */
-/* *************************************************************** */
 double reg_nmi::GetSimilarityMeasureValue() {
     // Check that all the specified image are of the same datatype
     if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
@@ -552,14 +545,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                     }
                 }
                 measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
-                                                                  nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                     nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
                 measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
-                                                                  nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                     nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
     } // loop over all voxel
 }
-/* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient2D<float>
 (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 template void reg_getVoxelBasedNMIGradient2D<double>
@@ -667,16 +659,15 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                     }
                 }
                 measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
-                                                                  nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                     nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
                 measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
-                                                                  nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                     nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
                 measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
-                                                                  nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                     nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
     } // loop over all voxel
 }
-/* *************************************************************** */
 template void reg_getVoxelBasedNMIGradient3D<float>
 (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 template void reg_getVoxelBasedNMIGradient3D<double>
@@ -853,4 +844,3 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 3acbb846..4b624b22 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -133,10 +133,10 @@ void reg_optimiser<T>::Perturbation(float length) {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T &startLength) {
+void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
     size_t lineIteration = 0;
     float addedLength = 0;
-    float currentLength = startLength;
+    float currentLength = static_cast<float>(startLength);
 
     // Start performing the line search
     while (currentLength > smallLength &&
@@ -343,7 +343,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #endif
         for (i = 0; i < num; i++) {
             array1Ptr[i] = -gradientPtr[i];
-            array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]);
+            array2Ptr[i] = static_cast<T>(array1Ptr[i] + gam * array2Ptr[i]);
             gradientPtr[i] = -array2Ptr[i];
         }
         if (this->dofNumberBw > 0) {
@@ -353,7 +353,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #endif
             for (i = 0; i < numBw; i++) {
                 array1PtrBw[i] = -gradientPtrBw[i];
-                array2PtrBw[i] = (array1PtrBw[i] + gam * array2PtrBw[i]);
+                array2PtrBw[i] = static_cast<T>(array1PtrBw[i] + gam * array2PtrBw[i]);
                 gradientPtrBw[i] = -array2PtrBw[i];
             }
         }
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index 4cdb5eff..6f0b7835 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -137,7 +137,7 @@ class reg_optimiser {
                             T *gradDataBw = nullptr);
     virtual void Optimise(T maxLength,
                           T smallLength,
-                          T &startLength);
+                          T& startLength);
     virtual void Perturbation(float length);
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index d56a72f6..41d6ae7c 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -74,7 +74,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaBlockMatchingKernel.cpp
     CudaConvolutionKernel.cpp
     CudaNormaliseGradient.cu
-    CudaOptimiseKernel.cpp
+    CudaLtsKernel.cpp
     CudaResampleImageKernel.cpp
     ../AladinContent.cpp
     _reg_resampling_gpu.cu
diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp
index 70351a43..e0485ff0 100644
--- a/reg-lib/cuda/CudaContext.cpp
+++ b/reg-lib/cuda/CudaContext.cpp
@@ -121,4 +121,4 @@ CudaContext::~CudaContext() {
     cuCtxDestroy(cudaContext);
 }
 /* *************************************************************** */
-} // namespace NiftyReg::Cuda
+} // namespace NiftyReg
diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp
index 57af4ae0..95bd23cb 100644
--- a/reg-lib/cuda/CudaKernelFactory.cpp
+++ b/reg-lib/cuda/CudaKernelFactory.cpp
@@ -3,7 +3,7 @@
 #include "CudaConvolutionKernel.h"
 #include "CudaBlockMatchingKernel.h"
 #include "CudaResampleImageKernel.h"
-#include "CudaOptimiseKernel.h"
+#include "CudaLtsKernel.h"
 #include "AladinContent.h"
 
 Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const {
@@ -11,6 +11,6 @@ Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const {
     else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel();
     else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con);
     else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con);
-    else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con);
+    else if (name == LtsKernel::GetName()) return new CudaLtsKernel(con);
     else return nullptr;
 }
diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp
similarity index 95%
rename from reg-lib/cuda/CudaOptimiseKernel.cpp
rename to reg-lib/cuda/CudaLtsKernel.cpp
index bac2268f..aa5cd6fd 100644
--- a/reg-lib/cuda/CudaOptimiseKernel.cpp
+++ b/reg-lib/cuda/CudaLtsKernel.cpp
@@ -1,10 +1,10 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
-#include "CudaOptimiseKernel.h"
+#include "CudaLtsKernel.h"
 #include "optimizeKernel.h"
 
 /* *************************************************************** */
-CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() {
+CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() {
     //get CudaAladinContent ptr
     con = static_cast<CudaAladinContent*>(conIn);
 
@@ -24,7 +24,7 @@ CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() {
 
 }
 /* *************************************************************** */
-void CudaOptimiseKernel::Calculate(bool affine) {
+void CudaLtsKernel::Calculate(bool affine) {
     /* // Removed until CUDA SVD is added back
  #if _WIN64 || __x86_64__ || __ppc64__
 
diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaLtsKernel.h
similarity index 84%
rename from reg-lib/cuda/CudaOptimiseKernel.h
rename to reg-lib/cuda/CudaLtsKernel.h
index 62356876..605730bd 100644
--- a/reg-lib/cuda/CudaOptimiseKernel.h
+++ b/reg-lib/cuda/CudaLtsKernel.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "OptimiseKernel.h"
+#include "LtsKernel.h"
 #include "CudaAladinContent.h"
 
 // Kernel functions for numerical optimisation
-class CudaOptimiseKernel: public OptimiseKernel {
+class CudaLtsKernel: public LtsKernel {
 public:
-    CudaOptimiseKernel(Content *conIn);
+    CudaLtsKernel(Content *conIn);
     void Calculate(bool affine);
 
 private:
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 549290d5..7ef87391 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -1,50 +1,50 @@
-#include "CudaMeasure.h"
-#include "CudaF3dContent.h"
-#include "_reg_nmi_gpu.h"
-#include "_reg_ssd_gpu.h"
-
-/* *************************************************************** */
-reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
-    switch (measureType) {
-    case MeasureType::Nmi:
-        return new reg_nmi_gpu();
-    case MeasureType::Ssd:
-        return new reg_ssd_gpu();
-    case MeasureType::Dti:
-        return new reg_dti_gpu();
-    case MeasureType::Lncc:
-        return new reg_lncc_gpu();
-    case MeasureType::Kld:
-        return new reg_kld_gpu();
-    case MeasureType::Mind:
-        reg_print_msg_error("MIND measure type isn't implemented for GPU");
-        reg_exit();
-    case MeasureType::Mindssc:
-        reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU");
-        reg_exit();
-    }
-    reg_print_msg_error("Unsupported measure type");
-    reg_exit();
-    return nullptr;
-}
-/* *************************************************************** */
-void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
-    // TODO Implement symmetric scheme for CUDA measure types
-    reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
-    CudaF3dContent& cudaCon = dynamic_cast<CudaF3dContent&>(con);
-    measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
-                                 cudaCon.Content::GetFloating(),
-                                 cudaCon.Content::GetReferenceMask(),
-                                 cudaCon.GetActiveVoxelNumber(),
-                                 cudaCon.Content::GetWarped(),
-                                 cudaCon.F3dContent::GetWarpedGradient(),
-                                 cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),
-                                 cudaCon.F3dContent::GetLocalWeightSim(),
-                                 cudaCon.GetReferenceCuda(),
-                                 cudaCon.GetFloatingCuda(),
-                                 cudaCon.GetReferenceMaskCuda(),
-                                 cudaCon.GetWarpedCuda(),
-                                 cudaCon.GetWarpedGradientCuda(),
-                                 cudaCon.GetVoxelBasedMeasureGradientCuda());
-}
-/* *************************************************************** */
+#include "CudaMeasure.h"
+#include "CudaF3dContent.h"
+#include "_reg_nmi_gpu.h"
+#include "_reg_ssd_gpu.h"
+
+/* *************************************************************** */
+reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
+    switch (measureType) {
+    case MeasureType::Nmi:
+        return new reg_nmi_gpu();
+    case MeasureType::Ssd:
+        return new reg_ssd_gpu();
+    case MeasureType::Dti:
+        return new reg_dti_gpu();
+    case MeasureType::Lncc:
+        return new reg_lncc_gpu();
+    case MeasureType::Kld:
+        return new reg_kld_gpu();
+    case MeasureType::Mind:
+        reg_print_msg_error("MIND measure type isn't implemented for GPU");
+        reg_exit();
+    case MeasureType::Mindssc:
+        reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU");
+        reg_exit();
+    }
+    reg_print_msg_error("Unsupported measure type");
+    reg_exit();
+    return nullptr;
+}
+/* *************************************************************** */
+void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
+    // TODO Implement symmetric scheme for CUDA measure types
+    reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
+    CudaF3dContent& cudaCon = dynamic_cast<CudaF3dContent&>(con);
+    measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
+                                 cudaCon.Content::GetFloating(),
+                                 cudaCon.Content::GetReferenceMask(),
+                                 cudaCon.GetActiveVoxelNumber(),
+                                 cudaCon.Content::GetWarped(),
+                                 cudaCon.F3dContent::GetWarpedGradient(),
+                                 cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),
+                                 cudaCon.F3dContent::GetLocalWeightSim(),
+                                 cudaCon.GetReferenceCuda(),
+                                 cudaCon.GetFloatingCuda(),
+                                 cudaCon.GetReferenceMaskCuda(),
+                                 cudaCon.GetWarpedCuda(),
+                                 cudaCon.GetWarpedGradientCuda(),
+                                 cudaCon.GetVoxelBasedMeasureGradientCuda());
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
index 6b178611..76fb9983 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -1,9 +1,9 @@
-#pragma once
-
-#include "Measure.h"
-
-class CudaMeasure: public Measure {
-public:
-    virtual reg_measure* Create(const MeasureType& measureType) override;
-    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override;
-};
+#pragma once
+
+#include "Measure.h"
+
+class CudaMeasure: public Measure {
+public:
+    virtual reg_measure* Create(const MeasureType& measureType) override;
+    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override;
+};
diff --git a/reg-lib/cuda/CudaMeasureFactory.h b/reg-lib/cuda/CudaMeasureFactory.h
index 2f597e43..58061a23 100644
--- a/reg-lib/cuda/CudaMeasureFactory.h
+++ b/reg-lib/cuda/CudaMeasureFactory.h
@@ -1,8 +1,8 @@
-#pragma once
-
-#include "CudaMeasure.h"
-
-class CudaMeasureFactory: public MeasureFactory {
-public:
-    virtual Measure* Produce() override { return new CudaMeasure(); }
-};
+#pragma once
+
+#include "CudaMeasure.h"
+
+class CudaMeasureFactory: public MeasureFactory {
+public:
+    virtual Measure* Produce() override { return new CudaMeasure(); }
+};
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index c49df391..54a40264 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -12,8 +12,7 @@
 #include "_reg_common_cuda.h"
 #include "_reg_kld.h"
 
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 /// @brief Class that contains the GPU device pointers
 class reg_measure_gpu {
 public:
@@ -46,8 +45,7 @@ class reg_measure_gpu {
     float4 *warpedFloatingGradientDevicePointer;
     float4 *forwardVoxelBasedGradientDevicePointer;
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
 public:
     /// @brief reg_lncc class constructor
@@ -77,8 +75,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     /// @brief Compute the voxel based lncc gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
 public:
     /// @brief reg_kld_gpu class constructor
@@ -108,8 +105,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     /// @brief Compute the voxel based kld gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
 class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
 public:
     /// @brief reg_dti_gpu class constructor
@@ -139,5 +135,4 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     /// @brief Compute the voxel based dti gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
 };
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
+/* *************************************************************** */
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 162755b0..6639d40a 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -2,9 +2,9 @@
 #include "_reg_blockMatching.h"
 #include "CpuBlockMatchingKernel.h"
 
-#include "OptimiseKernel.h"
-#include "CpuOptimiseKernel.h"
-#include "CudaOptimiseKernel.h"
+#include "LtsKernel.h"
+#include "CpuLtsKernel.h"
+#include "CudaLtsKernel.h"
 
 /**
  *  LTS regression test to ensure the CPU and CUDA versions yield the same output
@@ -130,8 +130,8 @@ class LTSTest {
             contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda);
 
             // Initialise the optimise kernels
-            std::unique_ptr<OptimiseKernel> kernelCpu{ new CpuOptimiseKernel(contentCpu.get()) };
-            std::unique_ptr<OptimiseKernel> kernelCuda{ new CudaOptimiseKernel(contentCuda.get()) };
+            std::unique_ptr<LtsKernel> kernelCpu{ new CpuLtsKernel(contentCpu.get()) };
+            std::unique_ptr<LtsKernel> kernelCuda{ new CudaLtsKernel(contentCuda.get()) };
 
             // Compute the transformations
             kernelCpu->Calculate(ttype);

From 655c6fd3df54e85a5557249d2a23969e457476e0 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Tue, 18 Jul 2023 14:09:32 +0100
Subject: [PATCH 158/314] Issue#92: Added BM unit test and fix mask handling

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/cl/blockMatchingKernel.cl        |   8 +-
 reg-lib/cpu/_reg_blockMatching.cpp       |  28 ++--
 reg-lib/cuda/blockMatchingKernel.cu      | 200 ++--------------------
 reg-test/CMakeLists.txt                  |   1 +
 reg-test/reg_test_blockMatching.cpp      | 202 +++++++++++++++++++++++
 reg-test/reg_test_regr_blockMatching.cpp |  32 ++--
 7 files changed, 257 insertions(+), 216 deletions(-)
 create mode 100644 reg-test/reg_test_blockMatching.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 15007f1b..26817477 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-276
+277
diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl
index f7a63a7e..d3f7b0d9 100755
--- a/reg-lib/cl/blockMatchingKernel.cl
+++ b/reg-lib/cl/blockMatchingKernel.cl
@@ -151,7 +151,9 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 						(xImageIn > -1 && xImageIn < (int)c_ImageSize.x) &&
 						(yImageIn > -1 && yImageIn < (int)c_ImageSize.y);
 				// Copy the value from the global to the local shared memory
-				sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ?
+				//sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ?
+				//			warpedImageArray[indexXYIn] : NAN;
+				sWarpedValues[sharedIndex] = valid ?
 							warpedImageArray[indexXYIn] : NAN;
 			}
 		}
@@ -309,7 +311,9 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 							(yImageIn > -1 && yImageIn < (int)c_ImageSize.y) &&
 							(zImageIn > -1 && zImageIn < (int)c_ImageSize.z);
 					// Copy the value from the global to the local shared memory
-					sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
+					// sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
+					//			warpedImageArray[indexXYZIn] : NAN;
+					sWarpedValues[sharedIndex] = valid ?
 								warpedImageArray[indexXYZIn] : NAN;
 				}
 			}
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 9b2b8e21..7091b22d 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -129,8 +129,8 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
    }
    else {
       // Version using 2D blocks
-      for (unsigned j = 0; j < params->blockNumber[1]; j++) {
-         for (unsigned i = 0; i < params->blockNumber[0]; i++) {
+      for (unsigned j = 0; j < params->blockNumber[1]; j++){
+         for (unsigned i = 0; i < params->blockNumber[0]; i++){
 
             for (unsigned n = 0; n < BLOCK_2D_SIZE; n++)
                referenceValues[n] = std::numeric_limits<DataType>::quiet_NaN();
@@ -162,9 +162,9 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
 
             //Let's calculate the variance of the block
             float variance = 0.0f;
-            for (int i = 0; i < BLOCK_2D_SIZE; i++) {
-               if (referenceValues[i] == referenceValues[i])
-                  variance += (mean - (float)referenceValues[i]) * (mean - (float)referenceValues[i]);
+            for (int ii = 0; ii < BLOCK_2D_SIZE; ii++) {
+               if (referenceValues[ii] == referenceValues[ii])
+                  variance += (mean - (float)referenceValues[ii]) * (mean - (float)referenceValues[ii]);
             }
             variance /= voxelNumber;
 
@@ -267,7 +267,7 @@ void initialise_block_matching_method(nifti_image * reference,
    }
    if (params->activeBlockNumber < 2) {
       reg_print_fct_error("initialise_block_matching_method()");
-      reg_print_msg_error("There are no active blocks");
+      reg_print_msg_error("There are less than 2 active blocks");
       reg_exit();
    }
 #ifndef NDEBUG
@@ -382,8 +382,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                            if (-1 < x && x < warped->nx) {
                               warpedPtr_XY = &warpedPtr[index];
                               value = *warpedPtr_XY;
-                              maskPtr_XY = &mask[index];
-                              if (value == value && *maskPtr_XY > -1) {
+                              // maskPtr_XY = &mask[index];
+                              if (value == value) {
+                              // if (value == value && *maskPtr_XY > -1) {
                                  warpedValues[warpedIndex] = value;
                                  warpedOverlap[warpedIndex] = 1;
                               }
@@ -424,8 +425,8 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
                         }
                      }
 
-                     localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0;
-                     //localCC = fabs(localCC / sqrt(referenceVar * warpedVar));
+                     localCC = (referenceVar * warpedVar) > 0 ? fabs(
+                        localCC / sqrt(referenceVar * warpedVar)) : 0;
 
                      if (localCC > bestCC) {
                         bestCC = localCC + 1.0e-7f;
@@ -601,16 +602,17 @@ void block_matching_method3D(nifti_image * reference,
                            if (-1 < z && z < warped->nz) {
                               index = z * warped->nx * warped->ny;
                               warpedPtr_Z = &warpedPtr[index];
-                              maskPtr_Z = &mask[index];
+                              // maskPtr_Z = &mask[index];
                               for (y = warpedIndex_start_y; y < warpedIndex_end_y; y++) {
                                  if (-1 < y && y < warped->ny) {
                                     index = y * warped->nx + warpedIndex_start_x;
                                     for (x = warpedIndex_start_x; x < warpedIndex_end_x; x++) {
                                        if (-1 < x && x < warped->nx) {
                                           warpedPtr_XYZ = &warpedPtr_Z[index];
-                                          maskPtr_XYZ = &maskPtr_Z[index];
+                                          // maskPtr_XYZ = &maskPtr_Z[index];
                                           value = *warpedPtr_XYZ;
-                                          if (value == value && *maskPtr_XYZ > -1) {
+                                          if (value == value) {
+                                          // if (value == value && *maskPtr_XYZ > -1) {
                                              warpedValues[tid][warpedIndex] = value;
                                              warpedOverlap[tid][warpedIndex] = 1;
                                           }
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 7579d2fa..05d005f8 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -17,7 +17,6 @@
 #include <vector>
 #include "_reg_maths.h"
 
-// #define USE_TEST_KERNEL
 ////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////
 /*
@@ -124,7 +123,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
                 const bool valid =
                     (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
                     (yImageIn > -1 && yImageIn < (int)imageSize.y);
-                sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ?
+                sWarpedValues[sharedIndex] = valid  ?
                     tex1Dfetch<float>(warpedTexture, indexXYIn) : nanf("sNaN");
             }
         }
@@ -200,182 +199,6 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
     }
 }
 /* *************************************************************** */
-#ifdef USE_TEST_KERNEL
-__device__ __inline__ float2 REDUCE_TEST(float* sData,
-                                         float data,
-                                         unsigned tid) {
-    sData[tid] = data;
-    __syncthreads();
-
-    bool seconHalf = tid > 63 ? true : false;
-    for (unsigned i = 32; i > 0; i >>= 1) {
-        if (tid < i) sData[tid] += sData[tid + i];
-        if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i];
-        __syncthreads();
-    }
-
-    const float2 temp = make_float2(sData[0], sData[64]);
-    __syncthreads();
-    return temp;
-}
-/* *************************************************************** */
-__global__ void blockMatchingKernel3D(float *warpedPosition,
-                                      float *referencePosition,
-                                      cudaTextureObject_t referenceTexture,
-                                      cudaTextureObject_t warpedTexture,
-                                      cudaTextureObject_t totalBlockTexture,
-                                      const int *mask,
-                                      const float* referenceMatrix,
-                                      unsigned *definedBlock,
-                                      const int3 imageSize,
-                                      const uint3 blockSize) {
-    extern __shared__ float sWarpedValues[];
-    float *sData = &sWarpedValues[12 * 12 * 16];
-
-    // Compute the current block index
-    const unsigned bid0 = (2 * blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x;
-    const unsigned bid1 = bid0 + gridDim.x * gridDim.y;
-    int currentBlockIndex[2]{ tex1Dfetch<int>(totalBlockTexture, bid0), tex1Dfetch<int>(totalBlockTexture, bid1) };
-    currentBlockIndex[1] = (2 * blockIdx.z + 1) < blockSize.z ? currentBlockIndex[1] : -1;
-    if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) {
-        const unsigned idx = threadIdx.x;
-        const unsigned idy = threadIdx.y;
-        const unsigned idz = threadIdx.z;
-        const unsigned tid = (idz * 4 + idy) * 4 + idx;
-        const unsigned xImage = blockIdx.x * 4 + idx;
-        const unsigned yImage = blockIdx.y * 4 + idy;
-        const unsigned zImage = blockIdx.z * 8 + idz;
-
-        //populate shared memory with resultImageArray's values
-        for (int z = -1; z < 2; z += 2) {
-            const int zImageIn = zImage + z * 4;
-            for (int y = -1; y < 2; ++y) {
-                const int yImageIn = yImage + y * 4;
-                for (int x = -1; x < 2; ++x) {
-                    const int xImageIn = xImage + x * 4;
-                    const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
-                    const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y);
-                    const bool valid =
-                        (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
-                        (yImageIn > -1 && yImageIn < (int)imageSize.y) &&
-                        (zImageIn > -1 && zImageIn < (int)imageSize.z);
-                    sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
-                        tex1Dfetch<float>(warpedTexture, indexXYZIn) : nanf("sNaN");
-                }
-            }
-        }
-
-        const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage;
-        const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z;
-        float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ?
-            tex1Dfetch<float>(referenceTexture, voxIndex) : nanf("sNaN");
-        const bool finiteReference = isfinite(rReferenceValue);
-        rReferenceValue = finiteReference ? rReferenceValue : 0.f;
-        float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid);
-        const uint2 referenceSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y);
-
-        float2 bestValue{};
-        float bestDisp[2][3];
-        bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN");
-        if (referenceSize.x > 32 || referenceSize.y > 32) {
-            float2 referenceMean = REDUCE_TEST(sData, rReferenceValue, tid);
-            referenceMean.x /= (float)referenceSize.x;
-            referenceMean.y /= (float)referenceSize.y;
-            float referenceTemp;
-            if (tid > 63)
-                referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f;
-            else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f;
-            float2 referenceVar = REDUCE_TEST(sData, referenceTemp * referenceTemp, tid);
-
-            // iteration over the result blocks (block matching part)
-            for (unsigned z = 1; z < 8; ++z) {
-                for (unsigned y = 1; y < 8; ++y) {
-                    for (unsigned x = 1; x < 8; ++x) {
-                        const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx;
-                        const float rWarpedValue = sWarpedValues[sharedIndex];
-                        const bool overlap = isfinite(rWarpedValue) && finiteReference;
-                        tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid);
-                        const uint2 warpedSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y);
-
-                        if (warpedSize.x > 32 || warpedSize.y > 32) {
-                            float newreferenceTemp = referenceTemp;
-                            float2 newreferenceVar = referenceVar;
-                            if (warpedSize.x != referenceSize.x || warpedSize.y != referenceSize.y) {
-                                const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-                                float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid);
-                                newReferenceMean.x /= (float)warpedSize.x;
-                                newReferenceMean.y /= (float)warpedSize.y;
-                                if (tid > 63)
-                                    referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f;
-                                else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f;
-                                newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid);
-                            }
-                            const float rChecked = overlap ? rWarpedValue : 0.0f;
-                            float2 warpedMean = REDUCE_TEST(sData, rChecked, tid);
-                            warpedMean.x /= (float)warpedSize.x;
-                            warpedMean.y /= (float)warpedSize.y;
-                            float warpedTemp;
-                            if (tid > 63)
-                                warpedTemp = overlap ? rChecked - warpedMean.y : 0.f;
-                            else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f;
-                            const float2 warpedVar = REDUCE_TEST(sData, warpedTemp * warpedTemp, tid);
-                            const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp * warpedTemp, tid);
-
-                            if (tid == 0 && warpedSize.x > 32) {
-                                const float localCC = fabs(sumTargetResult.x * rsqrtf(newreferenceVar.x * warpedVar.x));
-                                if (localCC > bestValue.x) {
-                                    bestValue.x = localCC;
-                                    bestDisp[0][0] = x - 4.f;
-                                    bestDisp[0][1] = y - 4.f;
-                                    bestDisp[0][2] = z - 4.f;
-                                }
-                            }
-                            if (tid == 64 && warpedSize.y > 32) {
-                                const float localCC = fabs(sumTargetResult.y * rsqrtf(newreferenceVar.y * warpedVar.y));
-                                if (localCC > bestValue.y) {
-                                    bestValue.y = localCC;
-                                    bestDisp[1][0] = x - 4.f;
-                                    bestDisp[1][1] = y - 4.f;
-                                    bestDisp[1][2] = z - 4.f;
-                                }
-                            }
-                            __syncthreads();
-                        }
-                    }
-                }
-            }
-        }
-
-        if (tid == 0 && currentBlockIndex[0] > -1) {
-            const unsigned posIdx = 3 * currentBlockIndex[0];
-            warpedPosition[posIdx] = NAN;
-            if (isfinite(bestDisp[0][0])) {
-                const float referencePosition_temp[3]{ (float)xImage, (float)yImage, (float)zImage };
-                bestDisp[0][0] += referencePosition_temp[0];
-                bestDisp[0][1] += referencePosition_temp[1];
-                bestDisp[0][2] += referencePosition_temp[2];
-                reg_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
-                reg_mat44_mul_cuda<float>(referenceMatrix, bestDisp[0], &warpedPosition[posIdx]);
-                atomicAdd(definedBlock, 1);
-            }
-        }
-        if (tid == 64 && currentBlockIndex[1] > -1) {
-            const unsigned posIdx = 3 * currentBlockIndex[1];
-            warpedPosition[posIdx] = NAN;
-            if (isfinite(bestDisp[1][0])) {
-                const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage };
-                bestDisp[1][0] += referencePosition_temp[0];
-                bestDisp[1][1] += referencePosition_temp[1];
-                bestDisp[1][2] += referencePosition_temp[2];
-                reg_mat44_mul_cuda<float>(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]);
-                reg_mat44_mul_cuda<float>(referenceMatrix, bestDisp[1], &warpedPosition[posIdx]);
-                atomicAdd(definedBlock, 1);
-            }
-        }
-    }
-}
-#else
-/* *************************************************************** */
 __global__ void blockMatchingKernel3D(float *warpedPosition,
                                       float *referencePosition,
                                       cudaTextureObject_t referenceTexture,
@@ -413,7 +236,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                         (xImageIn > -1 && xImageIn < (int)imageSize.x) &&
                         (yImageIn > -1 && yImageIn < (int)imageSize.y) &&
                         (zImageIn > -1 && zImageIn < (int)imageSize.z);
-                    sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ?
+                    sWarpedValues[sharedIndex] = valid ?
                         tex1Dfetch<float>(warpedTexture, indexXYZIn) : nanf("sNaN");     //for some reason the mask here creates probs
                 }
             }
@@ -434,7 +257,8 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 
         if (referenceSize > 32) {
             //the target values must remain constant throughout the block matching process
-            const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize);
+            // const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize);
+            const float referenceMean = blockReduceSum(rReferenceValue, tid) / referenceSize;
             const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f;
             const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid);
 
@@ -453,18 +277,21 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
                             float newreferenceVar = referenceVar;
                             if (warpedSize != referenceSize) {
                                 const float newReferenceValue = overlap ? rReferenceValue : 0.0f;
-                                const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize);
+                                // const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize);
+                                const float newReferenceMean = blockReduceSum(newReferenceValue, tid) / warpedSize;
                                 newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f;
                                 newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid);
                             }
 
                             const float rChecked = overlap ? rWarpedValue : 0.0f;
-                            const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize);
+                            // const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize);
+                            const float warpedMean = blockReduceSum(rChecked, tid) / warpedSize;
                             const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f;
                             const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid);
 
                             const float sumTargetResult = blockReduceSum(newreferenceTemp * warpedTemp, tid);
-                            const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs(sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0;
+                            const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs(
+                                sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0;
 
                             if (tid == 0 && localCC > bestCC) {
                                 bestCC = localCC + 1.0e-7f;
@@ -493,7 +320,6 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
         }
     }
 }
-#endif
 /* *************************************************************** */
 void block_matching_method_gpu(const nifti_image *referenceImage,
                                _reg_blockMatchingParam *params,
@@ -524,15 +350,9 @@ void block_matching_method_gpu(const nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned)));
     NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlockCuda, &definedBlock, sizeof(unsigned), cudaMemcpyHostToDevice));
 
-#ifdef USE_TEST_KERNEL
-    dim3 blockDims(4, 4, 8);
-    dim3 gridDims(params->blockNumber[0], params->blockNumber[1], (unsigned)reg_ceil((float)params->blockNumber[2] / 2.f));
-    unsigned sharedMemSize = (128 + 4 * 3 * 4 * 3 * 4 * 4) * sizeof(float);
-#else
     dim3 blockDims(4, 4, 4);
     dim3 gridDims(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
     unsigned sharedMemSize = (64 + 4 * 3 * 4 * 3 * 4 * 3) * sizeof(float);  // (3*4)^3
-#endif
 
     if (referenceImage->nz == 1) {
         blockDims.z = 1;
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 3745e97c..a332db78 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -117,6 +117,7 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
+set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
new file mode 100644
index 00000000..fcff3e96
--- /dev/null
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -0,0 +1,202 @@
+#include "reg_test_common.h"
+#include "_reg_blockMatching.h"
+#include "CpuBlockMatchingKernel.h"
+#include "CudaBlockMatchingKernel.h"
+#include "CpuAffineDeformationFieldKernel.h"
+#include "CpuResampleImageKernel.h"
+
+
+/**
+ *  Block matching regression test to ensure the CPU and CUDA versions yield the same output
+ */
+
+#define OFFSET 1
+
+class BMTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int*>;
+    using TestCase = std::tuple<std::string, unique_ptr<_reg_blockMatchingParam>>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    BMTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create a reference 2D and 3D images
+        constexpr NiftiImage::dim_t size = 64;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        dim.push_back(size);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Fill images with random values
+        const auto ref2dPtr = reference2d.data();
+        for(auto ref2dItr = ref2dPtr.begin(); ref2dItr!=ref2dPtr.end(); ++ref2dItr){
+            *ref2dItr = distr(gen);
+        }
+        const auto ref3dPtr = reference3d.data();
+        for(auto ref3dItr = ref3dPtr.begin(); ref3dItr!=ref3dPtr.end(); ++ref3dItr){
+            *ref3dItr = distr(gen);
+        }
+
+        // Create a translation matrix to apply OFFSET voxels along each axis
+        mat44 translationMatrix;
+        reg_mat44_eye(&translationMatrix);
+        translationMatrix.m[0][3] = -OFFSET;
+        translationMatrix.m[1][3] = -OFFSET;
+        translationMatrix.m[2][3] = -OFFSET;
+
+        // Create a mask so that voxel at the boundary are ignored
+        int *mask2D = new int[reference2d.nVoxels()];
+        int *mask3D = new int[reference3d.nVoxels()];
+        int *mask2dPtr = mask2D;
+        int *mask3dPtr = mask3D;
+        // set all values to -1
+        for(int y=0; y<reference2d->ny;++y)
+            for(int x=0; x<reference2d->nx;++x)
+                *mask2dPtr++ = -1;
+        for(int z=0; z<reference3d->nz;++z)
+            for(int y=0; y<reference3d->ny;++y)
+                for(int x=0; x<reference3d->nx;++x)
+                    *mask3dPtr++ = -1;
+        // Set the internal values to 1
+        for(int y=OFFSET; y<reference2d->ny-OFFSET;++y){
+            mask2dPtr = &mask2D[y*reference2d->nx+OFFSET];
+            for(int x=OFFSET; x<reference2d->nx-OFFSET;++x){
+                *mask2dPtr++ = 1;
+            }
+        }
+        for(int z=OFFSET; z<reference3d->nz-OFFSET;++z){
+            for(int y=OFFSET; y<reference3d->ny-OFFSET;++y){
+                mask3dPtr = &mask3D[(z*reference3d->ny+y)*reference3d->nx+OFFSET];
+                for(int x=OFFSET; x<reference3d->nx-OFFSET;++x){
+                    *mask3dPtr++ = 1;
+                }
+            }
+        }
+
+        // Apply the transformation in 2D
+        unique_ptr<AladinContent> contentResampling2D{ new AladinContent(
+                reference2d,
+                reference2d
+        ) };
+        contentResampling2D->SetTransformationMatrix(&translationMatrix);
+        std::unique_ptr<AffineDeformationFieldKernel> affineDeformKernel2D{
+            new CpuAffineDeformationFieldKernel(contentResampling2D.get())
+            };
+        affineDeformKernel2D->Calculate();
+        std::unique_ptr<ResampleImageKernel> resampleKernel2D{
+            new CpuResampleImageKernel(contentResampling2D.get())
+            };
+        resampleKernel2D->Calculate(0, std::numeric_limits<float>::quiet_NaN());
+
+        // Apply the transformation in 3D
+        unique_ptr<AladinContent> contentResampling3D{ new AladinContent(
+                reference3d,
+                reference3d
+        ) };
+        contentResampling3D->SetTransformationMatrix(&translationMatrix);
+        std::unique_ptr<AffineDeformationFieldKernel> affineDeformKernel3D{
+            new CpuAffineDeformationFieldKernel(contentResampling3D.get())
+            };
+        affineDeformKernel3D->Calculate();
+        std::unique_ptr<ResampleImageKernel> resampleKernel3D{
+            new CpuResampleImageKernel(contentResampling3D.get())
+            };
+        resampleKernel3D->Calculate(0, 0);
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "BlockMatching 2D",
+            reference2d,
+            NiftiImage(contentResampling2D->GetWarped()),
+            mask2D
+        ));
+        contentResampling2D.release();
+        testData.emplace_back(TestData(
+            "BlockMatching 3D",
+            reference3d,
+            NiftiImage(contentResampling3D->GetWarped()),
+            mask3D
+        ));
+        contentResampling3D.release();
+
+        for (auto&& data : testData) {
+            // Get the test data
+            auto&& [testName, reference, warped, mask] = data;
+
+            for (auto&& platformType : PlatformTypes) {
+
+                // Create images
+                NiftiImage referenceTest(reference);
+                NiftiImage warpedTest(warped);
+
+                // Create the contents
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<AladinContentCreator> contentCreator{
+                    dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin))
+                    };
+                unique_ptr<AladinContent> content{ contentCreator->Create(
+                    referenceTest,
+                    referenceTest,
+                    mask,
+                    nullptr,
+                    sizeof(float),
+                    100,
+                    100,
+                    1) };
+                content->SetWarped(warpedTest.disown());
+
+                // Inititialise the block matching
+                unique_ptr<Kernel> bmKernel{ platform->CreateKernel(
+                    BlockMatchingKernel::GetName(), content.get()
+                    ) };
+
+                // Do the computation
+                bmKernel->castTo<BlockMatchingKernel>()->Calculate();
+
+                // Retrieve the information
+                unique_ptr<_reg_blockMatchingParam> blockMatchingParams{
+                    new _reg_blockMatchingParam(content->GetBlockMatchingParams())
+                    };
+    
+                testCases.push_back({ testName + " " + platform->GetName(), std::move(blockMatchingParams) });
+            } // loop over platforms
+        }
+        delete mask2D;
+        delete mask3D;
+    }
+};
+
+TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : this->testCases) {
+        // Retrieve test information
+        auto&& [testName, blockMatchingParams] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Loop over the block and ensure all values are identical
+            for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) {
+                for(int d = 0; d<(int)blockMatchingParams->dim; ++d){
+                    const int i = b*(int)blockMatchingParams->dim+d;
+                    const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i];
+                    if(fabs(diffPos - OFFSET) > EPS){
+                        std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] ";
+                        std::cout << diffPos << std::endl; std::cout.flush();
+                    }
+                    REQUIRE(fabs(diffPos - OFFSET) < EPS);
+                }
+            }
+        }
+    }
+}
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index ca2392cf..ee0a62d5 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -25,7 +25,7 @@ class BMTest {
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
-        constexpr NiftiImage::dim_t size = 64;
+        constexpr NiftiImage::dim_t size = 128;
         vector<NiftiImage::dim_t> dim{ size, size };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
@@ -140,15 +140,27 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
             REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber);
 
             // Loop over the block and ensure all values are identical
-            for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber * (int)blockMatchingParamsCpu->dim; ++b) {
-                const auto refPosCpu = blockMatchingParamsCpu->referencePosition[b];
-                const auto refPosCuda = blockMatchingParamsCuda->referencePosition[b];
-                std::cout << "referencePosition: " << b << " " << refPosCpu << " " << refPosCuda << std::endl;
-                REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
-                const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[b];
-                const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[b];
-                std::cout << "warpedPosition: " << b << " " << warPosCpu << " " << warPosCuda << std::endl;
-                REQUIRE(fabs(warPosCpu - warPosCuda) < EPS);
+            for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) {
+                for(int d = 0; d<(int)blockMatchingParamsCpu->dim; ++d){
+
+                    const int i = b*(int)blockMatchingParamsCpu->dim+d;
+                    const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i];
+                    const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i];
+                    if(fabs(refPosCpu - refPosCuda) > EPS){
+                        std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
+                        std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl;
+                        std::cout.flush();
+                    }
+                    REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
+                    const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i];
+                    const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i];
+                    if(fabs(warPosCpu - warPosCuda) > EPS){
+                        std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
+                        std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl;
+                        std::cout.flush();
+                    }
+                    REQUIRE(fabs(warPosCpu - warPosCuda) < EPS);
+                }
             }
         }
     }

From 44447687c98fe88c592d89f2ef759b027b03ebfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 18 Jul 2023 17:30:52 +0100
Subject: [PATCH 159/314] Refactorisations

---
 niftyreg_build_version.txt                |   2 +-
 reg-lib/cpu/_reg_dti.cpp                  | 148 +++--
 reg-lib/cpu/_reg_dti.h                    |  35 +-
 reg-lib/cpu/_reg_kld.cpp                  | 178 +++---
 reg-lib/cpu/_reg_kld.h                    |  28 +-
 reg-lib/cpu/_reg_lncc.cpp                 | 669 +++++++++++-----------
 reg-lib/cpu/_reg_lncc.h                   |  81 ++-
 reg-lib/cpu/_reg_measure.h                |  86 +--
 reg-lib/cpu/_reg_mind.cpp                 | 191 +++---
 reg-lib/cpu/_reg_mind.h                   |  32 +-
 reg-lib/cpu/_reg_nmi.cpp                  | 280 ++++-----
 reg-lib/cpu/_reg_nmi.h                    |  58 +-
 reg-lib/cpu/_reg_ssd.cpp                  | 224 ++++----
 reg-lib/cpu/_reg_ssd.h                    |  28 +-
 reg-lib/cuda/CudaContext.hpp              |   4 +-
 reg-lib/cuda/_reg_common_cuda.h           |   2 +-
 reg-lib/cuda/_reg_measure_gpu.h           | 122 ++--
 reg-lib/cuda/_reg_nmi_gpu.cu              |  88 +--
 reg-lib/cuda/_reg_nmi_gpu.h               |  56 +-
 reg-lib/cuda/_reg_ssd_gpu.cu              |  80 +--
 reg-lib/cuda/_reg_ssd_gpu.h               |  28 +-
 reg-test/CMakeLists.txt                   |   2 +-
 reg-test/reg_test_blockMatching.cpp       | 117 ++--
 reg-test/reg_test_conjugateGradient.cpp   |   8 +-
 reg-test/reg_test_getDeformationField.cpp |   4 +-
 reg-test/reg_test_imageGradient.cpp       |   8 +-
 reg-test/reg_test_interpolation.cpp       |   8 +-
 reg-test/reg_test_lncc.cpp                |  31 +-
 reg-test/reg_test_normaliseGradient.cpp   |   8 +-
 reg-test/reg_test_regr_blockMatching.cpp  |  43 +-
 reg-test/reg_test_regr_lts.cpp            |  37 +-
 31 files changed, 1313 insertions(+), 1373 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 26817477..3d242f55 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-277
+278
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index c81ab780..509b0939 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -12,7 +12,6 @@
 
 #include "_reg_dti.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 reg_dti::reg_dti()
    : reg_measure()
@@ -22,35 +21,34 @@ reg_dti::reg_dti()
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 // This function is directly the same as that used for reg_ssd
-void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
-                                nifti_image *floImgPtr,
-                                int *maskRefPtr,
-                                nifti_image *warFloImgPtr,
-                                nifti_image *warFloGraPtr,
-                                nifti_image *forVoxBasedGraPtr,
-                                nifti_image *localWeightSimPtr,
-                                int *maskFloPtr,
-                                nifti_image *warRefImgPtr,
-                                nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr)
+void reg_dti::InitialiseMeasure(nifti_image *refImg,
+                                nifti_image *floImg,
+                                int *refMask,
+                                nifti_image *warpedImg,
+                                nifti_image *warpedGrad,
+                                nifti_image *voxelBasedGrad,
+                                nifti_image *localWeightSim,
+                                int *floMask,
+                                nifti_image *warpedImgBw,
+                                nifti_image *warpedGradBw,
+                                nifti_image *voxelBasedGradBw)
 {
    // Set the pointers using the parent class function
-   reg_measure::InitialiseMeasure(refImgPtr,
-                                  floImgPtr,
-                                  maskRefPtr,
-                                  warFloImgPtr,
-                                  warFloGraPtr,
-                                  forVoxBasedGraPtr,
-                                  localWeightSimPtr,
-                                  maskFloPtr,
-                                  warRefImgPtr,
-                                  warRefGraPtr,
-                                  bckVoxBasedGraPtr);
+   reg_measure::InitialiseMeasure(refImg,
+                                  floImg,
+                                  refMask,
+                                  warpedImg,
+                                  warpedGrad,
+                                  voxelBasedGrad,
+                                  localWeightSim,
+                                  floMask,
+                                  warpedImgBw,
+                                  warpedGradBw,
+                                  voxelBasedGradBw);
 
    // Check that the input images have the same number of time point
-   if(this->referenceImagePointer->nt != this->floatingImagePointer->nt)
+   if(this->referenceImage->nt != this->floatingImage->nt)
    {
       reg_print_fct_error("reg_dti::InitialiseMeasure");
       reg_print_msg_error("This number of time point should be the same for both input images");
@@ -58,7 +56,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
    }
 
    int j=0;
-   for(int i=0; i<refImgPtr->nt; ++i)
+   for(int i=0; i<refImg->nt; ++i)
    {
       //JM - note, the specific value of timePointWeight is not used for DTI images
       //any value > 0 indicates the 'time point' is active
@@ -73,7 +71,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr,
 #endif
       }
    }
-   if((refImgPtr->nz>1 && j!=6) && (refImgPtr->nz==1 && j!=3))
+   if((refImg->nz>1 && j!=6) && (refImg->nz==1 && j!=3))
    {
       reg_print_fct_error("reg_dti::InitialiseMeasure");
       reg_print_msg_error("Unexpected number of DTI components");
@@ -157,28 +155,28 @@ template double reg_getDTIMeasureValue<double>(nifti_image *,nifti_image *,int *
 double reg_dti::GetSimilarityMeasureValue()
 {
    // Check that all the specified image are of the same datatype
-   if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype)
+   if(this->warpedImage->datatype != this->referenceImage->datatype)
    {
       reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
       reg_print_msg_error("Both input images are expected to have the same type");
       reg_exit();
    }
    double DTIMeasureValue;
-   switch(this->referenceImagePointer->datatype)
+   switch(this->referenceImage->datatype)
    {
    case NIFTI_TYPE_FLOAT32:
       DTIMeasureValue = reg_getDTIMeasureValue<float>
-                        (this->referenceImagePointer,
-                         this->warpedFloatingImagePointer,
-                         this->referenceMaskPointer,
+                        (this->referenceImage,
+                         this->warpedImage,
+                         this->referenceMask,
                          this->dtIndicies
                         );
       break;
    case NIFTI_TYPE_FLOAT64:
       DTIMeasureValue = reg_getDTIMeasureValue<double>
-                        (this->referenceImagePointer,
-                         this->warpedFloatingImagePointer,
-                         this->referenceMaskPointer,
+                        (this->referenceImage,
+                         this->warpedImage,
+                         this->referenceMask,
                          this->dtIndicies
                         );
       break;
@@ -192,27 +190,27 @@ double reg_dti::GetSimilarityMeasureValue()
    if(this->isSymmetric)
    {
       // Check that all the specified image are of the same datatype
-      if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype)
+      if(this->warpedImageBw->datatype != this->floatingImage->datatype)
       {
          reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
          reg_print_msg_error("Both input images are expected to have the same type");
          reg_exit();
       }
-      switch(this->floatingImagePointer->datatype)
+      switch(this->floatingImage->datatype)
       {
       case NIFTI_TYPE_FLOAT32:
          DTIMeasureValue += reg_getDTIMeasureValue<float>
-                            (this->floatingImagePointer,
-                             this->warpedReferenceImagePointer,
-                             this->floatingMaskPointer,
+                            (this->floatingImage,
+                             this->warpedImageBw,
+                             this->floatingMask,
                              this->dtIndicies
                             );
          break;
       case NIFTI_TYPE_FLOAT64:
          DTIMeasureValue += reg_getDTIMeasureValue<double>
-                            (this->floatingImagePointer,
-                             this->warpedReferenceImagePointer,
-                             this->floatingMaskPointer,
+                            (this->floatingImage,
+                             this->warpedImageBw,
+                             this->floatingMask,
                              this->dtIndicies
                             );
          break;
@@ -225,7 +223,6 @@ double reg_dti::GetSimilarityMeasureValue()
    return DTIMeasureValue;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
       nifti_image *warpedImage,
@@ -331,18 +328,18 @@ template void reg_getVoxelBasedDTIMeasureGradient<float>
 template void reg_getVoxelBasedDTIMeasureGradient<double>
 (nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *);
 /* *************************************************************** */
-void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
+void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint)
 {
    // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-   if(this->timePointWeight[current_timepoint]==0)
+   reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+   if(this->timePointWeight[currentTimepoint]==0)
       return;
 
    // Check if all required input images are of the same data type
-   int dtype = this->referenceImagePointer->datatype;
-   if(this->warpedFloatingImagePointer->datatype != dtype ||
-         this->warpedFloatingGradientImagePointer->datatype != dtype ||
-         this->forwardVoxelBasedGradientImagePointer->datatype != dtype
+   int dtype = this->referenceImage->datatype;
+   if(this->warpedImage->datatype != dtype ||
+         this->warpedGradient->datatype != dtype ||
+         this->voxelBasedGradient->datatype != dtype
      )
    {
       reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
@@ -354,21 +351,21 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
    {
    case NIFTI_TYPE_FLOAT32:
       reg_getVoxelBasedDTIMeasureGradient<float>
-      (this->referenceImagePointer,
-       this->warpedFloatingImagePointer,
-       this->warpedFloatingGradientImagePointer,
-       this->forwardVoxelBasedGradientImagePointer,
-       this->referenceMaskPointer,
+      (this->referenceImage,
+       this->warpedImage,
+       this->warpedGradient,
+       this->voxelBasedGradient,
+       this->referenceMask,
        this->dtIndicies
       );
       break;
    case NIFTI_TYPE_FLOAT64:
       reg_getVoxelBasedDTIMeasureGradient<double>
-      (this->referenceImagePointer,
-       this->warpedFloatingImagePointer,
-       this->warpedFloatingGradientImagePointer,
-       this->forwardVoxelBasedGradientImagePointer,
-       this->referenceMaskPointer,
+      (this->referenceImage,
+       this->warpedImage,
+       this->warpedGradient,
+       this->voxelBasedGradient,
+       this->referenceMask,
        this->dtIndicies
       );
       break;
@@ -380,10 +377,10 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
    // Compute the gradient of the ssd for the backward transformation
    if(this->isSymmetric)
    {
-      dtype = this->floatingImagePointer->datatype;
-      if(this->warpedReferenceImagePointer->datatype != dtype ||
-            this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype
+      dtype = this->floatingImage->datatype;
+      if(this->warpedImageBw->datatype != dtype ||
+            this->warpedGradientBw->datatype != dtype ||
+            this->voxelBasedGradientBw->datatype != dtype
         )
       {
          reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
@@ -395,21 +392,21 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
       {
       case NIFTI_TYPE_FLOAT32:
          reg_getVoxelBasedDTIMeasureGradient<float>
-         (this->floatingImagePointer,
-          this->warpedReferenceImagePointer,
-          this->warpedReferenceGradientImagePointer,
-          this->backwardVoxelBasedGradientImagePointer,
-          this->floatingMaskPointer,
+         (this->floatingImage,
+          this->warpedImageBw,
+          this->warpedGradientBw,
+          this->voxelBasedGradientBw,
+          this->floatingMask,
           this->dtIndicies
          );
          break;
       case NIFTI_TYPE_FLOAT64:
          reg_getVoxelBasedDTIMeasureGradient<double>
-         (this->floatingImagePointer,
-          this->warpedReferenceImagePointer,
-          this->warpedReferenceGradientImagePointer,
-          this->backwardVoxelBasedGradientImagePointer,
-          this->floatingMaskPointer,
+         (this->floatingImage,
+          this->warpedImageBw,
+          this->warpedGradientBw,
+          this->voxelBasedGradientBw,
+          this->floatingMask,
           this->dtIndicies
          );
          break;
@@ -421,4 +418,3 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 6df167b6..580382af 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -16,7 +16,6 @@
 
 #include "_reg_ssd.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 /// @brief DTI related measure of similarity class
 class reg_dti: public reg_measure {
@@ -27,21 +26,21 @@ class reg_dti: public reg_measure {
     virtual ~reg_dti() {}
 
     /// @brief Initialise the reg_dti object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Returns the value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based gradient for DTI images
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 
 protected:
     // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
@@ -49,8 +48,8 @@ class reg_dti: public reg_measure {
     float currentValue;
 };
 /* *************************************************************** */
-
-/** @brief Copmutes and returns the SSD between two input image
+/**
+ * @brief Computes and returns the SSD between two input image
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param mask Array that contains a mask to specify which voxel
@@ -62,8 +61,9 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
                               nifti_image *warpedImage,
                               int *mask,
                               unsigned *dtIndicies);
-
-/** @brief Compute a voxel based gradient of the sum squared difference.
+/* *************************************************************** */
+/**
+ * @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param warpedImageGradient Spatial gradient of the input warped image
@@ -81,3 +81,4 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
                                          nifti_image *dtiMeasureGradientImage,
                                          int *mask,
                                          unsigned *dtIndicies);
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index f0a5b3af..eff52320 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -21,44 +21,44 @@ reg_kld::reg_kld(): reg_measure() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
-                                nifti_image *floImgPtr,
-                                int *maskRefPtr,
-                                nifti_image *warFloImgPtr,
-                                nifti_image *warFloGraPtr,
-                                nifti_image *forVoxBasedGraPtr,
-                                nifti_image *localWeightSimPtr,
-                                int *maskFloPtr,
-                                nifti_image *warRefImgPtr,
-                                nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr) {
+void reg_kld::InitialiseMeasure(nifti_image *refImg,
+                                nifti_image *floImg,
+                                int *refMask,
+                                nifti_image *warpedImg,
+                                nifti_image *warpedGrad,
+                                nifti_image *voxelBasedGrad,
+                                nifti_image *localWeightSim,
+                                int *floMask,
+                                nifti_image *warpedImgBw,
+                                nifti_image *warpedGradBw,
+                                nifti_image *voxelBasedGradBw) {
     // Set the pointers using the parent class function
-    reg_measure::InitialiseMeasure(refImgPtr,
-                                   floImgPtr,
-                                   maskRefPtr,
-                                   warFloImgPtr,
-                                   warFloGraPtr,
-                                   forVoxBasedGraPtr,
-                                   localWeightSimPtr,
-                                   maskFloPtr,
-                                   warRefImgPtr,
-                                   warRefGraPtr,
-                                   bckVoxBasedGraPtr);
+    reg_measure::InitialiseMeasure(refImg,
+                                   floImg,
+                                   refMask,
+                                   warpedImg,
+                                   warpedGrad,
+                                   voxelBasedGrad,
+                                   localWeightSim,
+                                   floMask,
+                                   warpedImgBw,
+                                   warpedGradBw,
+                                   voxelBasedGradBw);
 
     // Check that the input images have the same number of time point
-    if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) {
+    if (this->referenceImage->nt != this->floatingImage->nt) {
         reg_print_fct_error("reg_kld::InitialiseMeasure");
         reg_print_msg_error("This number of time point should be the same for both input images");
         reg_exit();
     }
     // Input images are expected to be bounded between 0 and 1 as they
     // are meant to be probabilities
-    for (int t = 0; t < this->referenceImagePointer->nt; ++t) {
+    for (int t = 0; t < this->referenceImage->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
-            float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t);
-            float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t);
-            float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t);
-            float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t);
+            float min_ref = reg_tools_getMinValue(this->referenceImage, t);
+            float max_ref = reg_tools_getMaxValue(this->referenceImage, t);
+            float min_flo = reg_tools_getMinValue(this->floatingImage, t);
+            float max_flo = reg_tools_getMaxValue(this->floatingImage, t);
             if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) {
                 reg_print_msg_error("The input images are expected to be probabilities to use the kld measure");
                 reg_exit();
@@ -68,7 +68,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr,
 #ifndef NDEBUG
     char text[255];
     reg_print_msg_debug("reg_kld::InitialiseMeasure().");
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
     }
@@ -145,26 +145,26 @@ template double reg_getKLDivergence<double>(nifti_image*, nifti_image*, double*,
 /* *************************************************************** */
 double reg_kld::GetSimilarityMeasureValue() {
     // Check that all the specified image are of the same datatype
-    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+    if (this->warpedImage->datatype != this->referenceImage->datatype) {
         reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
         reg_print_msg_error("Both input images are expected to have the same type");
         reg_exit();
     }
     double KLDValue;
-    switch (this->referenceImagePointer->datatype) {
+    switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        KLDValue = reg_getKLDivergence<float>(this->referenceImagePointer,
-                                              this->warpedFloatingImagePointer,
+        KLDValue = reg_getKLDivergence<float>(this->referenceImage,
+                                              this->warpedImage,
                                               this->timePointWeight,
                                               nullptr, // TODO this->forwardJacDetImagePointer,
-                                              this->referenceMaskPointer);
+                                              this->referenceMask);
         break;
     case NIFTI_TYPE_FLOAT64:
-        KLDValue = reg_getKLDivergence<double>(this->referenceImagePointer,
-                                               this->warpedFloatingImagePointer,
+        KLDValue = reg_getKLDivergence<double>(this->referenceImage,
+                                               this->warpedImage,
                                                this->timePointWeight,
                                                nullptr, // TODO this->forwardJacDetImagePointer,
-                                               this->referenceMaskPointer);
+                                               this->referenceMask);
         break;
     default:
         reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
@@ -175,25 +175,25 @@ double reg_kld::GetSimilarityMeasureValue() {
     // Backward computation
     if (this->isSymmetric) {
         // Check that all the specified image are of the same datatype
-        if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) {
+        if (this->warpedImageBw->datatype != this->floatingImage->datatype) {
             reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
             reg_print_msg_error("Both input images are expected to have the same type");
             reg_exit();
         }
-        switch (this->floatingImagePointer->datatype) {
+        switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            KLDValue += reg_getKLDivergence<float>(this->floatingImagePointer,
-                                                   this->warpedReferenceImagePointer,
+            KLDValue += reg_getKLDivergence<float>(this->floatingImage,
+                                                   this->warpedImageBw,
                                                    this->timePointWeight,
                                                    nullptr, // TODO this->backwardJacDetImagePointer,
-                                                   this->floatingMaskPointer);
+                                                   this->floatingMask);
             break;
         case NIFTI_TYPE_FLOAT64:
-            KLDValue += reg_getKLDivergence<double>(this->floatingImagePointer,
-                                                    this->warpedReferenceImagePointer,
+            KLDValue += reg_getKLDivergence<double>(this->floatingImage,
+                                                    this->warpedImageBw,
                                                     this->timePointWeight,
                                                     nullptr, // TODO this->backwardJacDetImagePointer,
-                                                    this->floatingMaskPointer);
+                                                    this->floatingMask);
             break;
         default:
             reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
@@ -212,8 +212,8 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                                            nifti_image *measureGradient,
                                            nifti_image *jacobianDetImg,
                                            int *mask,
-                                           int current_timepoint,
-                                           double timepoint_weight) {
+                                           int currentTimepoint,
+                                           double timepointWeight) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
@@ -224,8 +224,8 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
 
     DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
     DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
-    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
     int *maskPtr = nullptr;
     bool MrClean = false;
     if (mask == nullptr) {
@@ -260,7 +260,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                 activeVoxel_num += 1.0;
         }
     }
-    double adjusted_weight = timepoint_weight / activeVoxel_num;
+    double adjusted_weight = timepointWeight / activeVoxel_num;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -320,17 +320,17 @@ template void reg_getKLDivergenceVoxelBasedGradient<double>
 (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double);
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-    if (this->timePointWeight[current_timepoint] == 0)
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
         return;
 
     // Check if all required input images are of the same data type
-    int dtype = this->referenceImagePointer->datatype;
-    if (this->warpedFloatingImagePointer->datatype != dtype ||
-        this->warpedFloatingGradientImagePointer->datatype != dtype ||
-        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+    int dtype = this->referenceImage->datatype;
+    if (this->warpedImage->datatype != dtype ||
+        this->warpedGradient->datatype != dtype ||
+        this->voxelBasedGradient->datatype != dtype) {
         reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
         reg_print_msg_error("Input images are expected to be of the same type");
         reg_exit();
@@ -338,24 +338,24 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     // Compute the gradient of the kld for the forward transformation
     switch (dtype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getKLDivergenceVoxelBasedGradient<float>(this->referenceImagePointer,
-                                                     this->warpedFloatingImagePointer,
-                                                     this->warpedFloatingGradientImagePointer,
-                                                     this->forwardVoxelBasedGradientImagePointer,
+        reg_getKLDivergenceVoxelBasedGradient<float>(this->referenceImage,
+                                                     this->warpedImage,
+                                                     this->warpedGradient,
+                                                     this->voxelBasedGradient,
                                                      nullptr, // TODO this->forwardJacDetImagePointer,
-                                                     this->referenceMaskPointer,
-                                                     current_timepoint,
-                                                     this->timePointWeight[current_timepoint]);
+                                                     this->referenceMask,
+                                                     currentTimepoint,
+                                                     this->timePointWeight[currentTimepoint]);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getKLDivergenceVoxelBasedGradient<double>(this->referenceImagePointer,
-                                                      this->warpedFloatingImagePointer,
-                                                      this->warpedFloatingGradientImagePointer,
-                                                      this->forwardVoxelBasedGradientImagePointer,
+        reg_getKLDivergenceVoxelBasedGradient<double>(this->referenceImage,
+                                                      this->warpedImage,
+                                                      this->warpedGradient,
+                                                      this->voxelBasedGradient,
                                                       nullptr, // TODO this->forwardJacDetImagePointer,
-                                                      this->referenceMaskPointer,
-                                                      current_timepoint,
-                                                      this->timePointWeight[current_timepoint]);
+                                                      this->referenceMask,
+                                                      currentTimepoint,
+                                                      this->timePointWeight[currentTimepoint]);
         break;
     default:
         reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
@@ -364,10 +364,10 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     }
     // Compute the gradient of the kld for the backward transformation
     if (this->isSymmetric) {
-        dtype = this->floatingImagePointer->datatype;
-        if (this->warpedReferenceImagePointer->datatype != dtype ||
-            this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        dtype = this->floatingImage->datatype;
+        if (this->warpedImageBw->datatype != dtype ||
+            this->warpedGradientBw->datatype != dtype ||
+            this->voxelBasedGradientBw->datatype != dtype) {
             reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
             reg_print_msg_error("Input images are expected to be of the same type");
             reg_exit();
@@ -375,24 +375,24 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         // Compute the gradient of the nmi for the backward transformation
         switch (dtype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getKLDivergenceVoxelBasedGradient<float>(this->floatingImagePointer,
-                                                         this->warpedReferenceImagePointer,
-                                                         this->warpedReferenceGradientImagePointer,
-                                                         this->backwardVoxelBasedGradientImagePointer,
+            reg_getKLDivergenceVoxelBasedGradient<float>(this->floatingImage,
+                                                         this->warpedImageBw,
+                                                         this->warpedGradientBw,
+                                                         this->voxelBasedGradientBw,
                                                          nullptr, // TODO this->backwardJacDetImagePointer,
-                                                         this->floatingMaskPointer,
-                                                         current_timepoint,
-                                                         this->timePointWeight[current_timepoint]);
+                                                         this->floatingMask,
+                                                         currentTimepoint,
+                                                         this->timePointWeight[currentTimepoint]);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getKLDivergenceVoxelBasedGradient<double>(this->floatingImagePointer,
-                                                          this->warpedReferenceImagePointer,
-                                                          this->warpedReferenceGradientImagePointer,
-                                                          this->backwardVoxelBasedGradientImagePointer,
+            reg_getKLDivergenceVoxelBasedGradient<double>(this->floatingImage,
+                                                          this->warpedImageBw,
+                                                          this->warpedGradientBw,
+                                                          this->voxelBasedGradientBw,
                                                           nullptr, // TODO this->backwardJacDetImagePointer,
-                                                          this->floatingMaskPointer,
-                                                          current_timepoint,
-                                                          this->timePointWeight[current_timepoint]);
+                                                          this->floatingMask,
+                                                          currentTimepoint,
+                                                          this->timePointWeight[currentTimepoint]);
             break;
         default:
             reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index fa84ef20..aaf70556 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -23,21 +23,21 @@ class reg_kld: public reg_measure {
     virtual ~reg_kld() {}
 
     /// @brief Initialise the reg_kld object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Returns the kld value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based kld gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 };
 /* *************************************************************** */
 
@@ -82,6 +82,6 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
                                            nifti_image *KLdivGradient,
                                            nifti_image *jacobianDeterminantImage,
                                            int *mask,
-                                           int current_timepoint,
-                                           double timepoint_weight);
+                                           int currentTimepoint,
+                                           double timepointWeight);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 8c9545cf..547f24af 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -12,21 +12,20 @@
 
 #include "_reg_lncc.h"
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
 reg_lncc::reg_lncc(): reg_measure() {
-    this->forwardCorrelationImage = nullptr;
-    this->referenceMeanImage = nullptr;
-    this->referenceSdevImage = nullptr;
-    this->warpedFloatingMeanImage = nullptr;
-    this->warpedFloatingSdevImage = nullptr;
+    this->correlationImage = nullptr;
+    this->meanImage = nullptr;
+    this->sdevImage = nullptr;
+    this->warpedMeanImage = nullptr;
+    this->warpedSdevImage = nullptr;
     this->forwardMask = nullptr;
 
-    this->backwardCorrelationImage = nullptr;
-    this->floatingMeanImage = nullptr;
-    this->floatingSdevImage = nullptr;
-    this->warpedReferenceMeanImage = nullptr;
-    this->warpedReferenceSdevImage = nullptr;
+    this->correlationImageBw = nullptr;
+    this->meanImageBw = nullptr;
+    this->sdevImageBw = nullptr;
+    this->warpedMeanImageBw = nullptr;
+    this->warpedSdevImageBw = nullptr;
     this->backwardMask = nullptr;
 
     // Gaussian kernel is used by default
@@ -39,58 +38,56 @@ reg_lncc::reg_lncc(): reg_measure() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 reg_lncc::~reg_lncc() {
-    if (this->forwardCorrelationImage != nullptr)
-        nifti_image_free(this->forwardCorrelationImage);
-    this->forwardCorrelationImage = nullptr;
-    if (this->referenceMeanImage != nullptr)
-        nifti_image_free(this->referenceMeanImage);
-    this->referenceMeanImage = nullptr;
-    if (this->referenceSdevImage != nullptr)
-        nifti_image_free(this->referenceSdevImage);
-    this->referenceSdevImage = nullptr;
-    if (this->warpedFloatingMeanImage != nullptr)
-        nifti_image_free(this->warpedFloatingMeanImage);
-    this->warpedFloatingMeanImage = nullptr;
-    if (this->warpedFloatingSdevImage != nullptr)
-        nifti_image_free(this->warpedFloatingSdevImage);
-    this->warpedFloatingSdevImage = nullptr;
+    if (this->correlationImage != nullptr)
+        nifti_image_free(this->correlationImage);
+    this->correlationImage = nullptr;
+    if (this->meanImage != nullptr)
+        nifti_image_free(this->meanImage);
+    this->meanImage = nullptr;
+    if (this->sdevImage != nullptr)
+        nifti_image_free(this->sdevImage);
+    this->sdevImage = nullptr;
+    if (this->warpedMeanImage != nullptr)
+        nifti_image_free(this->warpedMeanImage);
+    this->warpedMeanImage = nullptr;
+    if (this->warpedSdevImage != nullptr)
+        nifti_image_free(this->warpedSdevImage);
+    this->warpedSdevImage = nullptr;
     if (this->forwardMask != nullptr)
         free(this->forwardMask);
     this->forwardMask = nullptr;
 
-    if (this->backwardCorrelationImage != nullptr)
-        nifti_image_free(this->backwardCorrelationImage);
-    this->backwardCorrelationImage = nullptr;
-    if (this->floatingMeanImage != nullptr)
-        nifti_image_free(this->floatingMeanImage);
-    this->floatingMeanImage = nullptr;
-    if (this->floatingSdevImage != nullptr)
-        nifti_image_free(this->floatingSdevImage);
-    this->floatingSdevImage = nullptr;
-    if (this->warpedReferenceMeanImage != nullptr)
-        nifti_image_free(this->warpedReferenceMeanImage);
-    this->warpedReferenceMeanImage = nullptr;
-    if (this->warpedReferenceSdevImage != nullptr)
-        nifti_image_free(this->warpedReferenceSdevImage);
-    this->warpedReferenceSdevImage = nullptr;
+    if (this->correlationImageBw != nullptr)
+        nifti_image_free(this->correlationImageBw);
+    this->correlationImageBw = nullptr;
+    if (this->meanImageBw != nullptr)
+        nifti_image_free(this->meanImageBw);
+    this->meanImageBw = nullptr;
+    if (this->sdevImageBw != nullptr)
+        nifti_image_free(this->sdevImageBw);
+    this->sdevImageBw = nullptr;
+    if (this->warpedMeanImageBw != nullptr)
+        nifti_image_free(this->warpedMeanImageBw);
+    this->warpedMeanImageBw = nullptr;
+    if (this->warpedSdevImageBw != nullptr)
+        nifti_image_free(this->warpedSdevImageBw);
+    this->warpedSdevImageBw = nullptr;
     if (this->backwardMask != nullptr)
         free(this->backwardMask);
     this->backwardMask = nullptr;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
                                      nifti_image *warImage,
-                                     nifti_image *meanRefImage,
-                                     nifti_image *meanWarImage,
-                                     nifti_image *stdDevRefImage,
-                                     nifti_image *stdDevWarImage,
+                                     nifti_image *meanImage,
+                                     nifti_image *warpedMeanImage,
+                                     nifti_image *stdDevImage,
+                                     nifti_image *warpedSdevImage,
                                      int *refMask,
                                      int *combinedMask,
-                                     int current_timepoint) {
+                                     int currentTimepoint) {
     // Generate the forward mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
@@ -104,100 +101,99 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     reg_tools_removeNanFromMask(warImage, combinedMask);
 
     DataType *origRefPtr = static_cast<DataType*>(refImage->data);
-    DataType *meanRefPtr = static_cast<DataType*>(meanRefImage->data);
-    DataType *sdevRefPtr = static_cast<DataType*>(stdDevRefImage->data);
-    memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
-    memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper);
+    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
+    DataType *sdevImgPtr = static_cast<DataType*>(stdDevImage->data);
+    memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
+    memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
 
-    reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage);
-    reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
-    reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_multiplyImageToImage(stdDevImage, stdDevImage, stdDevImage);
+    reg_tools_kernelConvolution(meanImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_kernelConvolution(stdDevImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
 
     DataType *origWarPtr = static_cast<DataType*>(warImage->data);
-    DataType *meanWarPtr = static_cast<DataType*>(meanWarImage->data);
-    DataType *sdevWarPtr = static_cast<DataType*>(stdDevWarImage->data);
-    memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
-    memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper);
-
-    reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage);
-    reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
-    reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
+    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
+    memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
+    memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
+
+    reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage);
+    reg_tools_kernelConvolution(warpedMeanImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_kernelConvolution(warpedSdevImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr)
+    shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // G*(I^2) - (G*I)^2
-        sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel]));
-        sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel]));
+        sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel]));
+        warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel]));
         // Stabilise the computation
-        if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = 0;
-        if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = 0;
+        if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0;
+        if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0;
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
-                                 nifti_image *floImgPtr,
-                                 int *maskRefPtr,
-                                 nifti_image *warFloImgPtr,
-                                 nifti_image *warFloGraPtr,
-                                 nifti_image *forVoxBasedGraPtr,
-                                 nifti_image *localWeightSimPtr,
-                                 int *maskFloPtr,
-                                 nifti_image *warRefImgPtr,
-                                 nifti_image *warRefGraPtr,
-                                 nifti_image *bckVoxBasedGraPtr) {
-    reg_measure::InitialiseMeasure(refImgPtr,
-                                   floImgPtr,
-                                   maskRefPtr,
-                                   warFloImgPtr,
-                                   warFloGraPtr,
-                                   forVoxBasedGraPtr,
-                                   localWeightSimPtr,
-                                   maskFloPtr,
-                                   warRefImgPtr,
-                                   warRefGraPtr,
-                                   bckVoxBasedGraPtr);
-
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+void reg_lncc::InitialiseMeasure(nifti_image *refImg,
+                                 nifti_image *floImg,
+                                 int *refMask,
+                                 nifti_image *warpedImg,
+                                 nifti_image *warpedGrad,
+                                 nifti_image *voxelBasedGrad,
+                                 nifti_image *localWeightSim,
+                                 int *floMask,
+                                 nifti_image *warpedImgBw,
+                                 nifti_image *warpedGradBw,
+                                 nifti_image *voxelBasedGradBw) {
+    reg_measure::InitialiseMeasure(refImg,
+                                   floImg,
+                                   refMask,
+                                   warpedImg,
+                                   warpedGrad,
+                                   voxelBasedGrad,
+                                   localWeightSim,
+                                   floMask,
+                                   warpedImgBw,
+                                   warpedGradBw,
+                                   voxelBasedGradBw);
+
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         if (this->timePointWeight[i] > 0) {
-            reg_intensityRescale(this->referenceImagePointer, i, 0.f, 1.f);
-            reg_intensityRescale(this->floatingImagePointer, i, 0.f, 1.f);
+            reg_intensityRescale(this->referenceImage, i, 0.f, 1.f);
+            reg_intensityRescale(this->floatingImage, i, 0.f, 1.f);
         }
     }
 
     // Check that no images are already allocated
-    if (this->forwardCorrelationImage != nullptr)
-        nifti_image_free(this->forwardCorrelationImage);
-    this->forwardCorrelationImage = nullptr;
-    if (this->referenceMeanImage != nullptr)
-        nifti_image_free(this->referenceMeanImage);
-    this->referenceMeanImage = nullptr;
-    if (this->referenceSdevImage != nullptr)
-        nifti_image_free(this->referenceSdevImage);
-    this->referenceSdevImage = nullptr;
-    if (this->warpedFloatingMeanImage != nullptr)
-        nifti_image_free(this->warpedFloatingMeanImage);
-    this->warpedFloatingMeanImage = nullptr;
-    if (this->warpedFloatingSdevImage != nullptr)
-        nifti_image_free(this->warpedFloatingSdevImage);
-    this->warpedFloatingSdevImage = nullptr;
-    if (this->backwardCorrelationImage != nullptr)
-        nifti_image_free(this->backwardCorrelationImage);
-    this->backwardCorrelationImage = nullptr;
-    if (this->floatingMeanImage != nullptr)
-        nifti_image_free(this->floatingMeanImage);
-    this->floatingMeanImage = nullptr;
-    if (this->floatingSdevImage != nullptr)
-        nifti_image_free(this->floatingSdevImage);
-    this->floatingSdevImage = nullptr;
-    if (this->warpedReferenceMeanImage != nullptr)
-        nifti_image_free(this->warpedReferenceMeanImage);
-    this->warpedReferenceMeanImage = nullptr;
-    if (this->warpedReferenceSdevImage != nullptr)
-        nifti_image_free(this->warpedReferenceSdevImage);
-    this->warpedReferenceSdevImage = nullptr;
+    if (this->correlationImage != nullptr)
+        nifti_image_free(this->correlationImage);
+    this->correlationImage = nullptr;
+    if (this->meanImage != nullptr)
+        nifti_image_free(this->meanImage);
+    this->meanImage = nullptr;
+    if (this->sdevImage != nullptr)
+        nifti_image_free(this->sdevImage);
+    this->sdevImage = nullptr;
+    if (this->warpedMeanImage != nullptr)
+        nifti_image_free(this->warpedMeanImage);
+    this->warpedMeanImage = nullptr;
+    if (this->warpedSdevImage != nullptr)
+        nifti_image_free(this->warpedSdevImage);
+    this->warpedSdevImage = nullptr;
+    if (this->correlationImageBw != nullptr)
+        nifti_image_free(this->correlationImageBw);
+    this->correlationImageBw = nullptr;
+    if (this->meanImageBw != nullptr)
+        nifti_image_free(this->meanImageBw);
+    this->meanImageBw = nullptr;
+    if (this->sdevImageBw != nullptr)
+        nifti_image_free(this->sdevImageBw);
+    this->sdevImageBw = nullptr;
+    if (this->warpedMeanImageBw != nullptr)
+        nifti_image_free(this->warpedMeanImageBw);
+    this->warpedMeanImageBw = nullptr;
+    if (this->warpedSdevImageBw != nullptr)
+        nifti_image_free(this->warpedSdevImageBw);
+    this->warpedSdevImageBw = nullptr;
     if (this->forwardMask != nullptr)
         free(this->forwardMask);
     this->forwardMask = nullptr;
@@ -205,42 +201,42 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
         free(this->backwardMask);
     this->backwardMask = nullptr;
 
-    size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer);
+    size_t voxelNumber = CalcVoxelNumber(*this->referenceImage);
 
     // Allocate the required image to store the correlation of the forward transformation
-    this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer);
-    this->forwardCorrelationImage->ndim = this->forwardCorrelationImage->dim[0] = this->referenceImagePointer->nz > 1 ? 3 : 2;
-    this->forwardCorrelationImage->nt = this->forwardCorrelationImage->dim[4] = 1;
-    this->forwardCorrelationImage->nvox = voxelNumber;
-    this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper);
+    this->correlationImage = nifti_copy_nim_info(this->referenceImage);
+    this->correlationImage->ndim = this->correlationImage->dim[0] = this->referenceImage->nz > 1 ? 3 : 2;
+    this->correlationImage->nt = this->correlationImage->dim[4] = 1;
+    this->correlationImage->nvox = voxelNumber;
+    this->correlationImage->data = malloc(voxelNumber * this->correlationImage->nbyper);
 
     // Allocate the required images to store mean and stdev of the reference image
-    this->referenceMeanImage = nifti_dup(*this->forwardCorrelationImage, false);
-    this->referenceSdevImage = nifti_dup(*this->forwardCorrelationImage, false);
+    this->meanImage = nifti_dup(*this->correlationImage, false);
+    this->sdevImage = nifti_dup(*this->correlationImage, false);
 
     // Allocate the required images to store mean and stdev of the warped floating image
-    this->warpedFloatingMeanImage = nifti_dup(*this->forwardCorrelationImage, false);
-    this->warpedFloatingSdevImage = nifti_dup(*this->forwardCorrelationImage, false);
+    this->warpedMeanImage = nifti_dup(*this->correlationImage, false);
+    this->warpedSdevImage = nifti_dup(*this->correlationImage, false);
 
     // Allocate the array to store the mask of the forward image
     this->forwardMask = (int*)malloc(voxelNumber * sizeof(int));
     if (this->isSymmetric) {
-        voxelNumber = CalcVoxelNumber(*floatingImagePointer);
+        voxelNumber = CalcVoxelNumber(*floatingImage);
 
         // Allocate the required image to store the correlation of the backward transformation
-        this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer);
-        this->backwardCorrelationImage->ndim = this->backwardCorrelationImage->dim[0] = this->floatingImagePointer->nz > 1 ? 3 : 2;
-        this->backwardCorrelationImage->nt = this->backwardCorrelationImage->dim[4] = 1;
-        this->backwardCorrelationImage->nvox = voxelNumber;
-        this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper);
+        this->correlationImageBw = nifti_copy_nim_info(this->floatingImage);
+        this->correlationImageBw->ndim = this->correlationImageBw->dim[0] = this->floatingImage->nz > 1 ? 3 : 2;
+        this->correlationImageBw->nt = this->correlationImageBw->dim[4] = 1;
+        this->correlationImageBw->nvox = voxelNumber;
+        this->correlationImageBw->data = malloc(voxelNumber * this->correlationImageBw->nbyper);
 
         // Allocate the required images to store mean and stdev of the floating image
-        this->floatingMeanImage = nifti_dup(*this->backwardCorrelationImage, false);
-        this->floatingSdevImage = nifti_dup(*this->backwardCorrelationImage, false);
+        this->meanImageBw = nifti_dup(*this->correlationImageBw, false);
+        this->sdevImageBw = nifti_dup(*this->correlationImageBw, false);
 
         // Allocate the required images to store mean and stdev of the warped reference image
-        this->warpedReferenceMeanImage = nifti_dup(*this->backwardCorrelationImage, false);
-        this->warpedReferenceSdevImage = nifti_dup(*this->backwardCorrelationImage, false);
+        this->warpedMeanImageBw = nifti_dup(*this->correlationImageBw, false);
+        this->warpedSdevImageBw = nifti_dup(*this->correlationImageBw, false);
 
         // Allocate the array to store the mask of the backward image
         this->backwardMask = (int*)malloc(voxelNumber * sizeof(int));
@@ -248,18 +244,17 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr,
 #ifndef NDEBUG
     char text[255];
     reg_print_msg_debug("reg_lncc::InitialiseMeasure().");
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
     }
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
 double reg_getLNCCValue(nifti_image *referenceImage,
-                        nifti_image *referenceMeanImage,
-                        nifti_image *referenceSdevImage,
+                        nifti_image *meanImage,
+                        nifti_image *sdevImage,
                         nifti_image *warpedImage,
                         nifti_image *warpedMeanImage,
                         nifti_image *warpedSdevImage,
@@ -267,7 +262,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
                         float *kernelStandardDeviation,
                         nifti_image *correlationImage,
                         int kernelType,
-                        int current_timepoint) {
+                        int currentTimepoint) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
@@ -278,19 +273,19 @@ double reg_getLNCCValue(nifti_image *referenceImage,
 
     // Compute the local correlation
     DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
 
     DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *refMeanPtr = static_cast<DataType*>(referenceMeanImage->data);
+    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
-    DataType *refSdevPtr = static_cast<DataType*>(referenceSdevImage->data);
+    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
     DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
-    DataType *correlaPtr = static_cast<DataType*>(correlationImage->data);
+    DataType *correlationPtr = static_cast<DataType*>(correlationImage->data);
 
     for (size_t i = 0; i < voxelNumber; ++i)
-        correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
+        correlationPtr[i] = currentRefPtr[i] * currentWarPtr[i];
 
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
@@ -300,8 +295,8 @@ double reg_getLNCCValue(nifti_image *referenceImage,
     // Iteration over all voxels
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
-    refSdevPtr,warSdevPtr,correlaPtr) \
+    shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \
+    sdevImgPtr,warSdevPtr,correlationPtr) \
     private(lncc_value) \
     reduction(+:lncc_value_sum) \
     reduction(+:activeVoxel_num)
@@ -309,7 +304,7 @@ double reg_getLNCCValue(nifti_image *referenceImage,
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            lncc_value = (correlaPtr[voxel] - (refMeanPtr[voxel] * warMeanPtr[voxel])) / (refSdevPtr[voxel] * warSdevPtr[voxel]);
+            lncc_value = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]);
             if (lncc_value == lncc_value && isinf(lncc_value) == 0) {
                 lncc_value_sum += fabs(lncc_value);
                 ++activeVoxel_num;
@@ -319,135 +314,133 @@ double reg_getLNCCValue(nifti_image *referenceImage,
     return lncc_value_sum / activeVoxel_num;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 double reg_lncc::GetSimilarityMeasureValue() {
     double lncc_value = 0;
 
-    for (int current_timepoint = 0; current_timepoint < this->referenceImagePointer->nt; ++current_timepoint) {
-        if (this->timePointWeight[current_timepoint] > 0) {
+    for (int currentTimepoint = 0; currentTimepoint < this->referenceImage->nt; ++currentTimepoint) {
+        if (this->timePointWeight[currentTimepoint] > 0) {
             double tp_value = 0;
             // Compute the mean and variance of the reference and warped floating
-            switch (this->referenceImagePointer->datatype) {
+            switch (this->referenceImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                this->UpdateLocalStatImages<float>(this->referenceImagePointer,
-                                                   this->warpedFloatingImagePointer,
-                                                   this->referenceMeanImage,
-                                                   this->warpedFloatingMeanImage,
-                                                   this->referenceSdevImage,
-                                                   this->warpedFloatingSdevImage,
-                                                   this->referenceMaskPointer,
+                this->UpdateLocalStatImages<float>(this->referenceImage,
+                                                   this->warpedImage,
+                                                   this->meanImage,
+                                                   this->warpedMeanImage,
+                                                   this->sdevImage,
+                                                   this->warpedSdevImage,
+                                                   this->referenceMask,
                                                    this->forwardMask,
-                                                   current_timepoint);
+                                                   currentTimepoint);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                this->UpdateLocalStatImages<double>(this->referenceImagePointer,
-                                                    this->warpedFloatingImagePointer,
-                                                    this->referenceMeanImage,
-                                                    this->warpedFloatingMeanImage,
-                                                    this->referenceSdevImage,
-                                                    this->warpedFloatingSdevImage,
-                                                    this->referenceMaskPointer,
+                this->UpdateLocalStatImages<double>(this->referenceImage,
+                                                    this->warpedImage,
+                                                    this->meanImage,
+                                                    this->warpedMeanImage,
+                                                    this->sdevImage,
+                                                    this->warpedSdevImage,
+                                                    this->referenceMask,
                                                     this->forwardMask,
-                                                    current_timepoint);
+                                                    currentTimepoint);
                 break;
             }
 
             // Compute the LNCC - Forward
-            switch (this->referenceImagePointer->datatype) {
+            switch (this->referenceImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                tp_value += reg_getLNCCValue<float>(this->referenceImagePointer,
-                                                    this->referenceMeanImage,
-                                                    this->referenceSdevImage,
-                                                    this->warpedFloatingImagePointer,
-                                                    this->warpedFloatingMeanImage,
-                                                    this->warpedFloatingSdevImage,
+                tp_value += reg_getLNCCValue<float>(this->referenceImage,
+                                                    this->meanImage,
+                                                    this->sdevImage,
+                                                    this->warpedImage,
+                                                    this->warpedMeanImage,
+                                                    this->warpedSdevImage,
                                                     this->forwardMask,
                                                     this->kernelStandardDeviation,
-                                                    this->forwardCorrelationImage,
+                                                    this->correlationImage,
                                                     this->kernelType,
-                                                    current_timepoint);
+                                                    currentTimepoint);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                tp_value += reg_getLNCCValue<double>(this->referenceImagePointer,
-                                                     this->referenceMeanImage,
-                                                     this->referenceSdevImage,
-                                                     this->warpedFloatingImagePointer,
-                                                     this->warpedFloatingMeanImage,
-                                                     this->warpedFloatingSdevImage,
+                tp_value += reg_getLNCCValue<double>(this->referenceImage,
+                                                     this->meanImage,
+                                                     this->sdevImage,
+                                                     this->warpedImage,
+                                                     this->warpedMeanImage,
+                                                     this->warpedSdevImage,
                                                      this->forwardMask,
                                                      this->kernelStandardDeviation,
-                                                     this->forwardCorrelationImage,
+                                                     this->correlationImage,
                                                      this->kernelType,
-                                                     current_timepoint);
+                                                     currentTimepoint);
                 break;
             }
             if (this->isSymmetric) {
                 // Compute the mean and variance of the floating and warped reference
-                switch (this->floatingImagePointer->datatype) {
+                switch (this->floatingImage->datatype) {
                 case NIFTI_TYPE_FLOAT32:
-                    this->UpdateLocalStatImages<float>(this->floatingImagePointer,
-                                                       this->warpedReferenceImagePointer,
-                                                       this->floatingMeanImage,
-                                                       this->warpedReferenceMeanImage,
-                                                       this->floatingSdevImage,
-                                                       this->warpedReferenceSdevImage,
-                                                       this->floatingMaskPointer,
+                    this->UpdateLocalStatImages<float>(this->floatingImage,
+                                                       this->warpedImageBw,
+                                                       this->meanImageBw,
+                                                       this->warpedMeanImageBw,
+                                                       this->sdevImageBw,
+                                                       this->warpedSdevImageBw,
+                                                       this->floatingMask,
                                                        this->backwardMask,
-                                                       current_timepoint);
+                                                       currentTimepoint);
                     break;
                 case NIFTI_TYPE_FLOAT64:
-                    this->UpdateLocalStatImages<double>(this->floatingImagePointer,
-                                                        this->warpedReferenceImagePointer,
-                                                        this->floatingMeanImage,
-                                                        this->warpedReferenceMeanImage,
-                                                        this->floatingSdevImage,
-                                                        this->warpedReferenceSdevImage,
-                                                        this->floatingMaskPointer,
+                    this->UpdateLocalStatImages<double>(this->floatingImage,
+                                                        this->warpedImageBw,
+                                                        this->meanImageBw,
+                                                        this->warpedMeanImageBw,
+                                                        this->sdevImageBw,
+                                                        this->warpedSdevImageBw,
+                                                        this->floatingMask,
                                                         this->backwardMask,
-                                                        current_timepoint);
+                                                        currentTimepoint);
                     break;
                 }
                 // Compute the LNCC - Backward
-                switch (this->floatingImagePointer->datatype) {
+                switch (this->floatingImage->datatype) {
                 case NIFTI_TYPE_FLOAT32:
-                    tp_value += reg_getLNCCValue<float>(this->floatingImagePointer,
-                                                        this->floatingMeanImage,
-                                                        this->floatingSdevImage,
-                                                        this->warpedReferenceImagePointer,
-                                                        this->warpedReferenceMeanImage,
-                                                        this->warpedReferenceSdevImage,
+                    tp_value += reg_getLNCCValue<float>(this->floatingImage,
+                                                        this->meanImageBw,
+                                                        this->sdevImageBw,
+                                                        this->warpedImageBw,
+                                                        this->warpedMeanImageBw,
+                                                        this->warpedSdevImageBw,
                                                         this->backwardMask,
                                                         this->kernelStandardDeviation,
-                                                        this->backwardCorrelationImage,
+                                                        this->correlationImageBw,
                                                         this->kernelType,
-                                                        current_timepoint);
+                                                        currentTimepoint);
                     break;
                 case NIFTI_TYPE_FLOAT64:
-                    tp_value += reg_getLNCCValue<double>(this->floatingImagePointer,
-                                                         this->floatingMeanImage,
-                                                         this->floatingSdevImage,
-                                                         this->warpedReferenceImagePointer,
-                                                         this->warpedReferenceMeanImage,
-                                                         this->warpedReferenceSdevImage,
+                    tp_value += reg_getLNCCValue<double>(this->floatingImage,
+                                                         this->meanImageBw,
+                                                         this->sdevImageBw,
+                                                         this->warpedImageBw,
+                                                         this->warpedMeanImageBw,
+                                                         this->warpedSdevImageBw,
                                                          this->backwardMask,
                                                          this->kernelStandardDeviation,
-                                                         this->backwardCorrelationImage,
+                                                         this->correlationImageBw,
                                                          this->kernelType,
-                                                         current_timepoint);
+                                                         currentTimepoint);
                     break;
                 }
             }
-            lncc_value += tp_value * this->timePointWeight[current_timepoint];
+            lncc_value += tp_value * this->timePointWeight[currentTimepoint];
         }
     }
     return lncc_value;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
-                                   nifti_image *referenceMeanImage,
-                                   nifti_image *referenceSdevImage,
+                                   nifti_image *meanImage,
+                                   nifti_image *sdevImage,
                                    nifti_image *warpedImage,
                                    nifti_image *warpedMeanImage,
                                    nifti_image *warpedSdevImage,
@@ -457,8 +450,8 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *warpedGradient,
                                    nifti_image *measureGradientImage,
                                    int kernelType,
-                                   int current_timepoint,
-                                   double timepoint_weight) {
+                                   int currentTimepoint,
+                                   double timepointWeight) {
 #ifdef _WIN32
     long voxel;
     long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
@@ -469,19 +462,19 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 
     // Compute the local correlation
     DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
 
     DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *refMeanPtr = static_cast<DataType*>(referenceMeanImage->data);
+    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
-    DataType *refSdevPtr = static_cast<DataType*>(referenceSdevImage->data);
+    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
     DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
-    DataType *correlaPtr = static_cast<DataType*>(correlationImage->data);
+    DataType *correlationPtr = static_cast<DataType*>(correlationImage->data);
 
     for (size_t i = 0; i < voxelNumber; ++i)
-        correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i];
+        correlationPtr[i] = currentRefPtr[i] * currentWarPtr[i];
 
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
@@ -492,8 +485,8 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     // Iteration over all voxels
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \
-    refSdevPtr,warSdevPtr,correlaPtr) \
+    shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \
+    sdevImgPtr,warSdevPtr,correlationPtr) \
     private(refMeanValue,warMeanValue,refSdevValue, \
     warSdevValue, correlaValue, temp1, temp2, temp3) \
     reduction(+:activeVoxel_num)
@@ -502,11 +495,11 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
 
-            refMeanValue = refMeanPtr[voxel];
+            refMeanValue = meanImgPtr[voxel];
             warMeanValue = warMeanPtr[voxel];
-            refSdevValue = refSdevPtr[voxel];
+            refSdevValue = sdevImgPtr[voxel];
             warSdevValue = warSdevPtr[voxel];
-            correlaValue = correlaPtr[voxel] - (refMeanValue * warMeanValue);
+            correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
 
             temp1 = 1.0 / (refSdevValue * warSdevValue);
             temp2 = correlaValue /
@@ -526,14 +519,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                 }
                 warMeanPtr[voxel] = static_cast<DataType>(temp1);
                 warSdevPtr[voxel] = static_cast<DataType>(temp2);
-                correlaPtr[voxel] = static_cast<DataType>(temp3);
+                correlationPtr[voxel] = static_cast<DataType>(temp3);
                 activeVoxel_num++;
-            } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
-        } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0;
+            } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0;
+        } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0;
     }
 
     //adjust weight for number of voxels
-    double adjusted_weight = timepoint_weight / activeVoxel_num;
+    double adjusted_weight = timepointWeight / activeVoxel_num;
 
     // Smooth the newly computed values
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
@@ -557,14 +550,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
-    warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \
+    warMeanPtr,warSdevPtr,correlationPtr,measureGradPtrX,measureGradPtrY, \
     measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \
     private(common)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel];
+            common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel];
             common *= adjusted_weight;
             measureGradPtrX[voxel] -= static_cast<DataType>(warpGradPtrX[voxel] * common);
             measureGradPtrY[voxel] -= static_cast<DataType>(warpGradPtrY[voxel] * common);
@@ -591,136 +584,134 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-    if (this->timePointWeight[current_timepoint] == 0)
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
         return;
 
     // Compute the mean and variance of the reference and warped floating
-    switch (this->referenceImagePointer->datatype) {
+    switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        this->UpdateLocalStatImages<float>(this->referenceImagePointer,
-                                           this->warpedFloatingImagePointer,
-                                           this->referenceMeanImage,
-                                           this->warpedFloatingMeanImage,
-                                           this->referenceSdevImage,
-                                           this->warpedFloatingSdevImage,
-                                           this->referenceMaskPointer,
+        this->UpdateLocalStatImages<float>(this->referenceImage,
+                                           this->warpedImage,
+                                           this->meanImage,
+                                           this->warpedMeanImage,
+                                           this->sdevImage,
+                                           this->warpedSdevImage,
+                                           this->referenceMask,
                                            this->forwardMask,
-                                           current_timepoint);
+                                           currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        this->UpdateLocalStatImages<double>(this->referenceImagePointer,
-                                            this->warpedFloatingImagePointer,
-                                            this->referenceMeanImage,
-                                            this->warpedFloatingMeanImage,
-                                            this->referenceSdevImage,
-                                            this->warpedFloatingSdevImage,
-                                            this->referenceMaskPointer,
+        this->UpdateLocalStatImages<double>(this->referenceImage,
+                                            this->warpedImage,
+                                            this->meanImage,
+                                            this->warpedMeanImage,
+                                            this->sdevImage,
+                                            this->warpedSdevImage,
+                                            this->referenceMask,
                                             this->forwardMask,
-                                            current_timepoint);
+                                            currentTimepoint);
         break;
     }
 
     // Compute the LNCC gradient - Forward
-    switch (this->referenceImagePointer->datatype) {
+    switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedLNCCGradient<float>(this->referenceImagePointer,
-                                             this->referenceMeanImage,
-                                             this->referenceSdevImage,
-                                             this->warpedFloatingImagePointer,
-                                             this->warpedFloatingMeanImage,
-                                             this->warpedFloatingSdevImage,
+        reg_getVoxelBasedLNCCGradient<float>(this->referenceImage,
+                                             this->meanImage,
+                                             this->sdevImage,
+                                             this->warpedImage,
+                                             this->warpedMeanImage,
+                                             this->warpedSdevImage,
                                              this->forwardMask,
                                              this->kernelStandardDeviation,
-                                             this->forwardCorrelationImage,
-                                             this->warpedFloatingGradientImagePointer,
-                                             this->forwardVoxelBasedGradientImagePointer,
+                                             this->correlationImage,
+                                             this->warpedGradient,
+                                             this->voxelBasedGradient,
                                              this->kernelType,
-                                             current_timepoint,
-                                             this->timePointWeight[current_timepoint]);
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedLNCCGradient<double>(this->referenceImagePointer,
-                                              this->referenceMeanImage,
-                                              this->referenceSdevImage,
-                                              this->warpedFloatingImagePointer,
-                                              this->warpedFloatingMeanImage,
-                                              this->warpedFloatingSdevImage,
+        reg_getVoxelBasedLNCCGradient<double>(this->referenceImage,
+                                              this->meanImage,
+                                              this->sdevImage,
+                                              this->warpedImage,
+                                              this->warpedMeanImage,
+                                              this->warpedSdevImage,
                                               this->forwardMask,
                                               this->kernelStandardDeviation,
-                                              this->forwardCorrelationImage,
-                                              this->warpedFloatingGradientImagePointer,
-                                              this->forwardVoxelBasedGradientImagePointer,
+                                              this->correlationImage,
+                                              this->warpedGradient,
+                                              this->voxelBasedGradient,
                                               this->kernelType,
-                                              current_timepoint,
-                                              this->timePointWeight[current_timepoint]);
+                                              currentTimepoint,
+                                              this->timePointWeight[currentTimepoint]);
         break;
     }
     if (this->isSymmetric) {
         // Compute the mean and variance of the floating and warped reference
-        switch (this->floatingImagePointer->datatype) {
+        switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            this->UpdateLocalStatImages<float>(this->floatingImagePointer,
-                                               this->warpedReferenceImagePointer,
-                                               this->floatingMeanImage,
-                                               this->warpedReferenceMeanImage,
-                                               this->floatingSdevImage,
-                                               this->warpedReferenceSdevImage,
-                                               this->floatingMaskPointer,
+            this->UpdateLocalStatImages<float>(this->floatingImage,
+                                               this->warpedImageBw,
+                                               this->meanImageBw,
+                                               this->warpedMeanImageBw,
+                                               this->sdevImageBw,
+                                               this->warpedSdevImageBw,
+                                               this->floatingMask,
                                                this->backwardMask,
-                                               current_timepoint);
+                                               currentTimepoint);
             break;
         case NIFTI_TYPE_FLOAT64:
-            this->UpdateLocalStatImages<double>(this->floatingImagePointer,
-                                                this->warpedReferenceImagePointer,
-                                                this->floatingMeanImage,
-                                                this->warpedReferenceMeanImage,
-                                                this->floatingSdevImage,
-                                                this->warpedReferenceSdevImage,
-                                                this->floatingMaskPointer,
+            this->UpdateLocalStatImages<double>(this->floatingImage,
+                                                this->warpedImageBw,
+                                                this->meanImageBw,
+                                                this->warpedMeanImageBw,
+                                                this->sdevImageBw,
+                                                this->warpedSdevImageBw,
+                                                this->floatingMask,
                                                 this->backwardMask,
-                                                current_timepoint);
+                                                currentTimepoint);
             break;
         }
         // Compute the LNCC gradient - Backward
-        switch (this->floatingImagePointer->datatype) {
+        switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedLNCCGradient<float>(this->floatingImagePointer,
-                                                 this->floatingMeanImage,
-                                                 this->floatingSdevImage,
-                                                 this->warpedReferenceImagePointer,
-                                                 this->warpedReferenceMeanImage,
-                                                 this->warpedReferenceSdevImage,
+            reg_getVoxelBasedLNCCGradient<float>(this->floatingImage,
+                                                 this->meanImageBw,
+                                                 this->sdevImageBw,
+                                                 this->warpedImageBw,
+                                                 this->warpedMeanImageBw,
+                                                 this->warpedSdevImageBw,
                                                  this->backwardMask,
                                                  this->kernelStandardDeviation,
-                                                 this->backwardCorrelationImage,
-                                                 this->warpedReferenceGradientImagePointer,
-                                                 this->backwardVoxelBasedGradientImagePointer,
+                                                 this->correlationImageBw,
+                                                 this->warpedGradientBw,
+                                                 this->voxelBasedGradientBw,
                                                  this->kernelType,
-                                                 current_timepoint,
-                                                 this->timePointWeight[current_timepoint]);
+                                                 currentTimepoint,
+                                                 this->timePointWeight[currentTimepoint]);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedLNCCGradient<double>(this->floatingImagePointer,
-                                                  this->floatingMeanImage,
-                                                  this->floatingSdevImage,
-                                                  this->warpedReferenceImagePointer,
-                                                  this->warpedReferenceMeanImage,
-                                                  this->warpedReferenceSdevImage,
+            reg_getVoxelBasedLNCCGradient<double>(this->floatingImage,
+                                                  this->meanImageBw,
+                                                  this->sdevImageBw,
+                                                  this->warpedImageBw,
+                                                  this->warpedMeanImageBw,
+                                                  this->warpedSdevImageBw,
                                                   this->backwardMask,
                                                   this->kernelStandardDeviation,
-                                                  this->backwardCorrelationImage,
-                                                  this->warpedReferenceGradientImagePointer,
-                                                  this->backwardVoxelBasedGradientImagePointer,
+                                                  this->correlationImageBw,
+                                                  this->warpedGradientBw,
+                                                  this->voxelBasedGradientBw,
                                                   this->kernelType,
-                                                  current_timepoint,
-                                                  this->timePointWeight[current_timepoint]);
+                                                  currentTimepoint,
+                                                  this->timePointWeight[currentTimepoint]);
             break;
         }
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index 07f14eca..5a7b5ef0 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -14,8 +14,7 @@
 
 #include "_reg_measure.h"
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
 class reg_lncc: public reg_measure {
 public:
     /// @brief reg_lncc class constructor
@@ -24,21 +23,21 @@ class reg_lncc: public reg_measure {
     virtual ~reg_lncc();
 
     /// @brief Initialise the reg_lncc object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Returns the lncc value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based lncc gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
     /// @brief Stuff
     virtual void SetKernelStandardDeviation(int t, float stddev) {
         this->kernelStandardDeviation[t] = stddev;
@@ -50,18 +49,18 @@ class reg_lncc: public reg_measure {
 
 protected:
     float kernelStandardDeviation[255];
-    nifti_image *forwardCorrelationImage;
-    nifti_image *referenceMeanImage;
-    nifti_image *referenceSdevImage;
-    nifti_image *warpedFloatingMeanImage;
-    nifti_image *warpedFloatingSdevImage;
+    nifti_image *correlationImage;
+    nifti_image *meanImage;
+    nifti_image *sdevImage;
+    nifti_image *warpedMeanImage;
+    nifti_image *warpedSdevImage;
     int *forwardMask;
 
-    nifti_image *backwardCorrelationImage;
-    nifti_image *floatingMeanImage;
-    nifti_image *floatingSdevImage;
-    nifti_image *warpedReferenceMeanImage;
-    nifti_image *warpedReferenceSdevImage;
+    nifti_image *correlationImageBw;
+    nifti_image *meanImageBw;
+    nifti_image *sdevImageBw;
+    nifti_image *warpedMeanImageBw;
+    nifti_image *warpedSdevImageBw;
     int *backwardMask;
 
     int kernelType;
@@ -69,17 +68,16 @@ class reg_lncc: public reg_measure {
     template <class DataType>
     void UpdateLocalStatImages(nifti_image *refImage,
                                nifti_image *warImage,
-                               nifti_image *meanRefImage,
-                               nifti_image *meanWarImage,
-                               nifti_image *stdDevRefImage,
-                               nifti_image *stdDevWarImage,
+                               nifti_image *meanImage,
+                               nifti_image *warpedMeanImage,
+                               nifti_image *stdDevImage,
+                               nifti_image *warpedSdevImage,
                                int *refMask,
                                int *mask,
-                               int current_timepoint);
+                               int currentTimepoint);
 };
 /* *************************************************************** */
-/* *************************************************************** */
-/** @brief Copmutes and returns the LNCC between two input image
+/** @brief Compute and return the LNCC between two input image
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel
@@ -90,16 +88,16 @@ class reg_lncc: public reg_measure {
  */
 extern "C++" template<class DataType>
 double reg_getLNCCValue(nifti_image *referenceImage,
-                        nifti_image *referenceMeanImage,
-                        nifti_image *referenceStdDevImage,
+                        nifti_image *meanImage,
+                        nifti_image *sdevImage,
                         nifti_image *warpedImage,
                         nifti_image *warpedMeanImage,
-                        nifti_image *warpedStdDevImage,
+                        nifti_image *warpedSdevImage,
                         int *combinedMask,
-                        float *kernelStdDev,
+                        float *kernelStandardDeviation,
                         nifti_image *correlationImage,
-                        int kernelType);
-
+                        int kernelType,
+                        int currentTimepoint);
 /* *************************************************************** */
 /** @brief Compute a voxel based gradient of the LNCC.
  *  @param referenceImage First input image to use to compute the metric
@@ -114,8 +112,8 @@ double reg_getLNCCValue(nifti_image *referenceImage,
  */
 extern "C++" template <class DataType>
 void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
-                                   nifti_image *referenceMeanImage,
-                                   nifti_image *referenceStdDevImage,
+                                   nifti_image *meanImage,
+                                   nifti_image *sdevImage,
                                    nifti_image *warpedImage,
                                    nifti_image *warpedMeanImage,
                                    nifti_image *warpedStdDevImage,
@@ -125,5 +123,6 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    nifti_image *warpedGradient,
                                    nifti_image *lnccGradientImage,
                                    int kernelType,
-                                   int current_timepoint,
-                                   double timepoint_weight);
+                                   int currentTimepoint,
+                                   double timepointWeight);
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index dbe7a87d..ee2a2625 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -23,37 +23,37 @@ class reg_measure {
     virtual ~reg_measure() {}
 
     /// @brief Set the pointers to be used by the measure object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) {
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) {
         this->isSymmetric = false;
-        this->referenceImagePointer = refImgPtr;
-        this->referenceTimePoint = this->referenceImagePointer->nt;
-        this->floatingImagePointer = floImgPtr;
-        this->referenceMaskPointer = maskRefPtr;
-        this->warpedFloatingImagePointer = warFloImgPtr;
-        this->warpedFloatingGradientImagePointer = warFloGraPtr;
-        this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr;
-        this->forwardLocalWeightSimImagePointer = localWeightSimPtr;
-        if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) {
+        this->referenceImage = refImg;
+        this->referenceTimePoint = this->referenceImage->nt;
+        this->floatingImage = floImg;
+        this->referenceMask = refMask;
+        this->warpedImage = warpedImg;
+        this->warpedGradient = warpedGrad;
+        this->voxelBasedGradient = voxelBasedGrad;
+        this->localWeightSim = localWeightSim;
+        if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr) {
             this->isSymmetric = true;
-            this->floatingMaskPointer = maskFloPtr;
-            this->warpedReferenceImagePointer = warRefImgPtr;
-            this->warpedReferenceGradientImagePointer = warRefGraPtr;
-            this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr;
+            this->floatingMask = floMask;
+            this->warpedImageBw = warpedImgBw;
+            this->warpedGradientBw = warpedGradBw;
+            this->voxelBasedGradientBw = voxelBasedGradBw;
         } else {
-            this->floatingMaskPointer = nullptr;
-            this->warpedReferenceImagePointer = nullptr;
-            this->warpedReferenceGradientImagePointer = nullptr;
-            this->backwardVoxelBasedGradientImagePointer = nullptr;
+            this->floatingMask = nullptr;
+            this->warpedImageBw = nullptr;
+            this->warpedGradientBw = nullptr;
+            this->voxelBasedGradientBw = nullptr;
         }
 #ifndef NDEBUG
         printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
@@ -64,8 +64,8 @@ class reg_measure {
     virtual double GetSimilarityMeasureValue() = 0;
 
     /// @brief Compute the voxel based measure of similarity gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
-        if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) {
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
+        if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) {
             reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
             reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
             reg_exit();
@@ -79,26 +79,26 @@ class reg_measure {
         return this->timePointWeight;
     }
     virtual nifti_image* GetReferenceImage(void) {
-        return this->referenceImagePointer;
+        return this->referenceImage;
     }
     virtual int* GetReferenceMask(void) {
-        return this->referenceMaskPointer;
+        return this->referenceMask;
     }
 
 protected:
-    nifti_image *referenceImagePointer;
-    int *referenceMaskPointer;
-    nifti_image *warpedFloatingImagePointer;
-    nifti_image *warpedFloatingGradientImagePointer;
-    nifti_image *forwardVoxelBasedGradientImagePointer;
-    nifti_image *forwardLocalWeightSimImagePointer;
+    nifti_image *referenceImage;
+    int *referenceMask;
+    nifti_image *warpedImage;
+    nifti_image *warpedGradient;
+    nifti_image *voxelBasedGradient;
+    nifti_image *localWeightSim;
 
     bool isSymmetric;
-    nifti_image *floatingImagePointer;
-    int *floatingMaskPointer;
-    nifti_image *warpedReferenceImagePointer;
-    nifti_image *warpedReferenceGradientImagePointer;
-    nifti_image *backwardVoxelBasedGradientImagePointer;
+    nifti_image *floatingImage;
+    int *floatingMask;
+    nifti_image *warpedImageBw;
+    nifti_image *warpedGradientBw;
+    nifti_image *voxelBasedGradientBw;
 
     double timePointWeight[255] = {0};
     int referenceTimePoint;
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index cd4196d4..0335843b 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -12,7 +12,7 @@
 
 #include "_reg_mind.h"
 
- /* *************************************************************** */
+/* *************************************************************** */
 template <class DataType>
 void ShiftImage(nifti_image* inputImgPtr,
                 nifti_image* shiftedImgPtr,
@@ -69,7 +69,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
                                 nifti_image* MINDImage,
                                 int *maskPtr,
                                 int descriptorOffset,
-                                int current_timepoint) {
+                                int currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
@@ -87,7 +87,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
+    currentInputImage->data = static_cast<void*>(&inputImagePtr[currentTimepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
     nifti_image *meanImage = nifti_dup(*currentInputImage, false);
@@ -169,7 +169,7 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr,
                            nifti_image* MINDImgPtr,
                            int *maskPtr,
                            int descriptorOffset,
-                           int current_timepoint) {
+                           int currentTimepoint) {
 #ifndef NDEBUG
     reg_print_fct_debug("GetMINDImageDescriptor()");
 #endif
@@ -181,10 +181,10 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr,
 
     switch (inputImgPtr->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        GetMINDImageDescriptor_core<float>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        GetMINDImageDescriptor_core<float>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        GetMINDImageDescriptor_core<double>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        GetMINDImageDescriptor_core<double>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint);
         break;
     default:
         reg_print_fct_error("GetMINDImageDescriptor");
@@ -199,7 +199,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
                                    nifti_image* MINDSSCImage,
                                    int *maskPtr,
                                    int descriptorOffset,
-                                   int current_timepoint) {
+                                   int currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
@@ -217,7 +217,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = static_cast<void*>(&inputImagePtr[current_timepoint * voxelNumber]);
+    currentInputImage->data = static_cast<void*>(&inputImagePtr[currentTimepoint * voxelNumber]);
 
     // Allocate an image to store the mean image
     nifti_image *mean_img = nifti_dup(*currentInputImage, false);
@@ -322,7 +322,7 @@ void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr,
                               nifti_image* MINDSSCImgPtr,
                               int *maskPtr,
                               int descriptorOffset,
-                              int current_timepoint) {
+                              int currentTimepoint) {
 #ifndef NDEBUG
     reg_print_fct_debug("GetMINDSSCImageDescriptor()");
 #endif
@@ -334,10 +334,10 @@ void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr,
 
     switch (inputImgPtr->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        GetMINDSSCImageDescriptor_core<float>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        GetMINDSSCImageDescriptor_core<float>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        GetMINDSSCImageDescriptor_core<double>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint);
+        GetMINDSSCImageDescriptor_core<double>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint);
         break;
     default:
         reg_print_fct_error("GetMINDSSCImageDescriptor");
@@ -386,45 +386,45 @@ reg_mind::~reg_mind() {
     }
 }
 /* *************************************************************** */
-void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
-                                 nifti_image *floImgPtr,
-                                 int *maskRefPtr,
-                                 nifti_image *warFloImgPtr,
-                                 nifti_image *warFloGraPtr,
-                                 nifti_image *forVoxBasedGraPtr,
-                                 nifti_image *localWeightSimPtr,
-                                 int *maskFloPtr,
-                                 nifti_image *warRefImgPtr,
-                                 nifti_image *warRefGraPtr,
-                                 nifti_image *bckVoxBasedGraPtr) {
+void reg_mind::InitialiseMeasure(nifti_image *refImg,
+                                 nifti_image *floImg,
+                                 int *refMask,
+                                 nifti_image *warpedImg,
+                                 nifti_image *warpedGrad,
+                                 nifti_image *voxelBasedGrad,
+                                 nifti_image *localWeightSim,
+                                 int *floMask,
+                                 nifti_image *warpedImgBw,
+                                 nifti_image *warpedGradBw,
+                                 nifti_image *voxelBasedGradBw) {
     // Set the pointers using the parent class function
-    reg_ssd::InitialiseMeasure(refImgPtr,
-                               floImgPtr,
-                               maskRefPtr,
-                               warFloImgPtr,
-                               warFloGraPtr,
-                               forVoxBasedGraPtr,
-                               localWeightSimPtr,
-                               maskFloPtr,
-                               warRefImgPtr,
-                               warRefGraPtr,
-                               bckVoxBasedGraPtr);
+    reg_ssd::InitialiseMeasure(refImg,
+                               floImg,
+                               refMask,
+                               warpedImg,
+                               warpedGrad,
+                               voxelBasedGrad,
+                               localWeightSim,
+                               floMask,
+                               warpedImgBw,
+                               warpedGradBw,
+                               voxelBasedGradBw);
 
     this->descriptor_number = 0;
     if (this->mind_type == MIND_TYPE) {
-        descriptor_number = this->referenceImagePointer->nz > 1 ? 6 : 4;
+        descriptor_number = this->referenceImage->nz > 1 ? 6 : 4;
     } else if (this->mind_type == MINDSSC_TYPE) {
-        descriptor_number = this->referenceImagePointer->nz > 1 ? 12 : 4;
+        descriptor_number = this->referenceImage->nz > 1 ? 12 : 4;
 
     }
     // Initialise the reference descriptor
-    this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
+    this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4;
     this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number;
     this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim);
     this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper);
     // Initialise the warped floating descriptor
-    this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer);
+    this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
     this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number;
     this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor,
@@ -433,12 +433,12 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
                                                        this->warpedFloatingImageDescriptor->nbyper);
 
     if (this->isSymmetric) {
-        if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) {
+        if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1) {
             reg_print_msg_error("reg_mind does not support multiple time point image");
             reg_exit();
         }
         // Initialise the floating descriptor
-        this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
+        this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
         this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number;
         this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor,
@@ -446,7 +446,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
         this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox *
                                                      this->floatingImageDescriptor->nbyper);
         // Initialise the warped floating descriptor
-        this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer);
+        this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
         this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number;
         this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor,
@@ -472,32 +472,32 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr,
 /* *************************************************************** */
 double reg_mind::GetSimilarityMeasureValue() {
     double MINDValue = 0.;
-    for (int t = 0; t < this->referenceImagePointer->nt; ++t) {
+    for (int t = 0; t < this->referenceImage->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
-            size_t voxelNumber = CalcVoxelNumber(*referenceImagePointer);
+            size_t voxelNumber = CalcVoxelNumber(*referenceImage);
             int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-            memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
-            reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
-            reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
+            memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
+            reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
+            reg_tools_removeNanFromMask(this->warpedImage, combinedMask);
 
             if (this->mind_type == MIND_TYPE) {
-                GetMINDImageDescriptor(this->referenceImagePointer,
+                GetMINDImageDescriptor(this->referenceImage,
                                       this->referenceImageDescriptor,
                                       combinedMask,
                                       this->descriptorOffset,
                                       t);
-                GetMINDImageDescriptor(this->warpedFloatingImagePointer,
+                GetMINDImageDescriptor(this->warpedImage,
                                       this->warpedFloatingImageDescriptor,
                                       combinedMask,
                                       this->descriptorOffset,
                                       t);
             } else if (this->mind_type == MINDSSC_TYPE) {
-                GetMINDSSCImageDescriptor(this->referenceImagePointer,
+                GetMINDSSCImageDescriptor(this->referenceImage,
                                          this->referenceImageDescriptor,
                                          combinedMask,
                                          this->descriptorOffset,
                                          t);
-                GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer,
+                GetMINDSSCImageDescriptor(this->warpedImage,
                                          this->warpedFloatingImageDescriptor,
                                          combinedMask,
                                          this->descriptorOffset,
@@ -532,30 +532,30 @@ double reg_mind::GetSimilarityMeasureValue() {
 
             // Backward computation
             if (this->isSymmetric) {
-                voxelNumber = CalcVoxelNumber(*floatingImagePointer);
+                voxelNumber = CalcVoxelNumber(*floatingImage);
                 combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-                memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
-                reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
-                reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
+                memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
+                reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
+                reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask);
 
                 if (this->mind_type == MIND_TYPE) {
-                    GetMINDImageDescriptor(this->floatingImagePointer,
+                    GetMINDImageDescriptor(this->floatingImage,
                                           this->floatingImageDescriptor,
                                           combinedMask,
                                           this->descriptorOffset,
                                           t);
-                    GetMINDImageDescriptor(this->warpedReferenceImagePointer,
+                    GetMINDImageDescriptor(this->warpedImageBw,
                                           this->warpedReferenceImageDescriptor,
                                           combinedMask,
                                           this->descriptorOffset,
                                           t);
                 } else if (this->mind_type == MINDSSC_TYPE) {
-                    GetMINDSSCImageDescriptor(this->floatingImagePointer,
+                    GetMINDSSCImageDescriptor(this->floatingImage,
                                              this->floatingImageDescriptor,
                                              combinedMask,
                                              this->descriptorOffset,
                                              t);
-                    GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer,
+                    GetMINDSSCImageDescriptor(this->warpedImageBw,
                                              this->warpedReferenceImageDescriptor,
                                              combinedMask,
                                              this->descriptorOffset,
@@ -593,52 +593,52 @@ double reg_mind::GetSimilarityMeasureValue() {
     return MINDValue;   // (double) this->referenceImageDescriptor->nt;
 }
 /* *************************************************************** */
-void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-    if (this->timePointWeight[current_timepoint] == 0)
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
         return;
 
     // Create a combined mask to ignore masked and undefined values
-    size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer);
+    size_t voxelNumber = CalcVoxelNumber(*this->referenceImage);
     int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-    memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int));
-    reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask);
-    reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask);
+    memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
+    reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
+    reg_tools_removeNanFromMask(this->warpedImage, combinedMask);
 
     if (this->mind_type == MIND_TYPE) {
         // Compute the reference image descriptors
-        GetMINDImageDescriptor(this->referenceImagePointer,
+        GetMINDImageDescriptor(this->referenceImage,
                               this->referenceImageDescriptor,
                               combinedMask,
                               this->descriptorOffset,
-                              current_timepoint);
+                              currentTimepoint);
         // Compute the warped floating image descriptors
-        GetMINDImageDescriptor(this->warpedFloatingImagePointer,
+        GetMINDImageDescriptor(this->warpedImage,
                               this->warpedFloatingImageDescriptor,
                               combinedMask,
                               this->descriptorOffset,
-                              current_timepoint);
+                              currentTimepoint);
     } else if (this->mind_type == MINDSSC_TYPE) {
         // Compute the reference image descriptors
-        GetMINDSSCImageDescriptor(this->referenceImagePointer,
+        GetMINDSSCImageDescriptor(this->referenceImage,
                                  this->referenceImageDescriptor,
                                  combinedMask,
                                  this->descriptorOffset,
-                                 current_timepoint);
+                                 currentTimepoint);
         // Compute the warped floating image descriptors
-        GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer,
+        GetMINDSSCImageDescriptor(this->warpedImage,
                                  this->warpedFloatingImageDescriptor,
                                  combinedMask,
                                  this->descriptorOffset,
-                                 current_timepoint);
+                                 currentTimepoint);
     }
 
 
     for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
         // Compute the warped image descriptors gradient
         reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor,
-                                     this->warpedFloatingGradientImagePointer,
+                                     this->warpedGradient,
                                      combinedMask,
                                      std::numeric_limits<float>::quiet_NaN(),
                                      desc_index);
@@ -648,8 +648,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         case NIFTI_TYPE_FLOAT32:
             reg_getVoxelBasedSSDGradient<float>(this->referenceImageDescriptor,
                                                 this->warpedFloatingImageDescriptor,
-                                                this->warpedFloatingGradientImagePointer,
-                                                this->forwardVoxelBasedGradientImagePointer,
+                                                this->warpedGradient,
+                                                this->voxelBasedGradient,
                                                 nullptr, // no Jacobian required here,
                                                 combinedMask,
                                                 desc_index,
@@ -659,8 +659,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         case NIFTI_TYPE_FLOAT64:
             reg_getVoxelBasedSSDGradient<double>(this->referenceImageDescriptor,
                                                  this->warpedFloatingImageDescriptor,
-                                                 this->warpedFloatingGradientImagePointer,
-                                                 this->forwardVoxelBasedGradientImagePointer,
+                                                 this->warpedGradient,
+                                                 this->voxelBasedGradient,
                                                  nullptr, // no Jacobian required here,
                                                  combinedMask,
                                                  desc_index,
@@ -677,50 +677,50 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
 
     // Compute the gradient of the ssd for the backward transformation
     if (this->isSymmetric) {
-        voxelNumber = CalcVoxelNumber(*floatingImagePointer);
+        voxelNumber = CalcVoxelNumber(*floatingImage);
         combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-        memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int));
-        reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask);
-        reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask);
+        memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
+        reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
+        reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask);
 
         if (this->mind_type == MIND_TYPE) {
-            GetMINDImageDescriptor(this->floatingImagePointer,
+            GetMINDImageDescriptor(this->floatingImage,
                                   this->floatingImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
-                                  current_timepoint);
-            GetMINDImageDescriptor(this->warpedReferenceImagePointer,
+                                  currentTimepoint);
+            GetMINDImageDescriptor(this->warpedImageBw,
                                   this->warpedReferenceImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
-                                  current_timepoint);
+                                  currentTimepoint);
         } else if (this->mind_type == MINDSSC_TYPE) {
-            GetMINDSSCImageDescriptor(this->floatingImagePointer,
+            GetMINDSSCImageDescriptor(this->floatingImage,
                                      this->floatingImageDescriptor,
                                      combinedMask,
                                      this->descriptorOffset,
-                                     current_timepoint);
-            GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer,
+                                     currentTimepoint);
+            GetMINDSSCImageDescriptor(this->warpedImageBw,
                                      this->warpedReferenceImageDescriptor,
                                      combinedMask,
                                      this->descriptorOffset,
-                                     current_timepoint);
+                                     currentTimepoint);
         }
 
         for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
             reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor,
-                                         this->warpedReferenceGradientImagePointer,
+                                         this->warpedGradientBw,
                                          combinedMask,
                                          std::numeric_limits<float>::quiet_NaN(),
                                          desc_index);
 
             // Compute the gradient of the nmi for the backward transformation
-            switch (floatingImagePointer->datatype) {
+            switch (floatingImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
                 reg_getVoxelBasedSSDGradient<float>(this->floatingImageDescriptor,
                                                     this->warpedReferenceImageDescriptor,
-                                                    this->warpedReferenceGradientImagePointer,
-                                                    this->backwardVoxelBasedGradientImagePointer,
+                                                    this->warpedGradientBw,
+                                                    this->voxelBasedGradientBw,
                                                     nullptr, // no Jacobian required here,
                                                     combinedMask,
                                                     desc_index,
@@ -730,8 +730,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
             case NIFTI_TYPE_FLOAT64:
                 reg_getVoxelBasedSSDGradient<double>(this->floatingImageDescriptor,
                                                      this->warpedReferenceImageDescriptor,
-                                                     this->warpedReferenceGradientImagePointer,
-                                                     this->backwardVoxelBasedGradientImagePointer,
+                                                     this->warpedGradientBw,
+                                                     this->voxelBasedGradientBw,
                                                      nullptr, // no Jacobian required here,
                                                      combinedMask,
                                                      desc_index,
@@ -748,7 +748,6 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 reg_mindssc::reg_mindssc(): reg_mind() {
     this->mind_type = MINDSSC_TYPE;
 #ifndef NDEBUG
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 771cfd45..cf09a4a8 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -19,7 +19,6 @@
 #define MIND_TYPE 0
 #define MINDSSC_TYPE 1
 
-/* *************************************************************** */
 /* *************************************************************** */
 /// @brief MIND measure of similarity class
 class reg_mind: public reg_ssd {
@@ -30,21 +29,21 @@ class reg_mind: public reg_ssd {
     virtual ~reg_mind();
 
     /// @brief Initialise the reg_mind object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *localWeightSimPtr = nullptr,
-                           int *maskFloPtr = nullptr,
-                           nifti_image *warRefImgPtr = nullptr,
-                           nifti_image *warRefGraPtr = nullptr,
-                           nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                           nifti_image *floImg,
+                           int *refMask,
+                           nifti_image *warpedImg,
+                           nifti_image *warpedGrad,
+                           nifti_image *voxelBasedGrad,
+                           nifti_image *localWeightSim = nullptr,
+                           int *floMask = nullptr,
+                           nifti_image *warpedImgBw = nullptr,
+                           nifti_image *warpedGradBw = nullptr,
+                           nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Returns the mind based measure of similarity value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
     virtual void SetDescriptorOffset(int);
     virtual int GetDescriptorOffset();
 
@@ -69,16 +68,17 @@ class reg_mindssc: public reg_mind {
     virtual ~reg_mindssc();
 };
 /* *************************************************************** */
-
 extern "C++"
 void GetMINDImageDescriptor(nifti_image *inputImgPtr,
                            nifti_image *MINDImgPtr,
                            int *mask,
                            int descriptorOffset,
-                           int current_timepoint);
+                           int currentTimepoint);
+/* *************************************************************** */
 extern "C++"
 void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr,
                               nifti_image *MINDSSCImgPtr,
                               int *mask,
                               int descriptorOffset,
-                              int current_timepoint);
+                              int currentTimepoint);
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 0eec06ed..40e69328 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -101,29 +101,29 @@ void reg_nmi::DeallocateHistogram() {
 #endif
 }
 /* *************************************************************** */
-void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
-                                nifti_image *floImgPtr,
-                                int *maskRefPtr,
-                                nifti_image *warFloImgPtr,
-                                nifti_image *warFloGraPtr,
-                                nifti_image *forVoxBasedGraPtr,
-                                nifti_image *localWeightSimPtr,
-                                int *maskFloPtr,
-                                nifti_image *warRefImgPtr,
-                                nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr) {
+void reg_nmi::InitialiseMeasure(nifti_image *refImg,
+                                nifti_image *floImg,
+                                int *refMask,
+                                nifti_image *warpedImg,
+                                nifti_image *warpedGrad,
+                                nifti_image *voxelBasedGrad,
+                                nifti_image *localWeightSim,
+                                int *floMask,
+                                nifti_image *warpedImgBw,
+                                nifti_image *warpedGradBw,
+                                nifti_image *voxelBasedGradBw) {
     // Set the pointers using the parent class function
-    reg_measure::InitialiseMeasure(refImgPtr,
-                                   floImgPtr,
-                                   maskRefPtr,
-                                   warFloImgPtr,
-                                   warFloGraPtr,
-                                   forVoxBasedGraPtr,
-                                   localWeightSimPtr,
-                                   maskFloPtr,
-                                   warRefImgPtr,
-                                   warRefGraPtr,
-                                   bckVoxBasedGraPtr);
+    reg_measure::InitialiseMeasure(refImg,
+                                   floImg,
+                                   refMask,
+                                   warpedImg,
+                                   warpedGrad,
+                                   voxelBasedGrad,
+                                   localWeightSim,
+                                   floMask,
+                                   warpedImgBw,
+                                   warpedGradBw,
+                                   voxelBasedGradBw);
 
     // Deallocate all allocated arrays
     this->DeallocateHistogram();
@@ -132,11 +132,11 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
     // Reference and floating are resampled between 2 and bin-3
     for (int i = 0; i < timepoint; ++i) {
         if (this->timePointWeight[i] > 0) {
-            reg_intensityRescale(this->referenceImagePointer,
+            reg_intensityRescale(this->referenceImage,
                                  i,
                                  2.f,
                                  this->referenceBinNumber[i] - 3.f);
-            reg_intensityRescale(this->floatingImagePointer,
+            reg_intensityRescale(this->floatingImage,
                                  i,
                                  2.f,
                                  this->floatingBinNumber[i] - 3.f);
@@ -178,7 +178,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr,
 #ifndef NDEBUG
     char text[255];
     reg_print_msg_debug("reg_nmi::InitialiseMeasure().");
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
     }
@@ -371,15 +371,15 @@ template void reg_getNMIValue<double>(nifti_image*, nifti_image*, double*, unsig
 /* *************************************************************** */
 double reg_nmi::GetSimilarityMeasureValue() {
     // Check that all the specified image are of the same datatype
-    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+    if (this->warpedImage->datatype != this->referenceImage->datatype) {
         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
         reg_print_msg_error("Both input images are expected to have the same type");
         reg_exit();
     }
-    switch (this->referenceImagePointer->datatype) {
+    switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getNMIValue<float>(this->referenceImagePointer,
-                               this->warpedFloatingImagePointer,
+        reg_getNMIValue<float>(this->referenceImage,
+                               this->warpedImage,
                                this->timePointWeight,
                                this->referenceBinNumber,
                                this->floatingBinNumber,
@@ -387,11 +387,11 @@ double reg_nmi::GetSimilarityMeasureValue() {
                                this->forwardJointHistogramLog,
                                this->forwardJointHistogramPro,
                                this->forwardEntropyValues,
-                               this->referenceMaskPointer);
+                               this->referenceMask);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getNMIValue<double>(this->referenceImagePointer,
-                                this->warpedFloatingImagePointer,
+        reg_getNMIValue<double>(this->referenceImage,
+                                this->warpedImage,
                                 this->timePointWeight,
                                 this->referenceBinNumber,
                                 this->floatingBinNumber,
@@ -399,7 +399,7 @@ double reg_nmi::GetSimilarityMeasureValue() {
                                 this->forwardJointHistogramLog,
                                 this->forwardJointHistogramPro,
                                 this->forwardEntropyValues,
-                                this->referenceMaskPointer);
+                                this->referenceMask);
         break;
     default:
         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
@@ -409,15 +409,15 @@ double reg_nmi::GetSimilarityMeasureValue() {
 
     if (this->isSymmetric) {
         // Check that all the specified image are of the same datatype
-        if (this->floatingImagePointer->datatype != this->warpedReferenceImagePointer->datatype) {
+        if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
             reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
             reg_print_msg_error("Both input images are expected to have the same type");
             reg_exit();
         }
-        switch (this->floatingImagePointer->datatype) {
+        switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getNMIValue<float>(this->floatingImagePointer,
-                                   this->warpedReferenceImagePointer,
+            reg_getNMIValue<float>(this->floatingImage,
+                                   this->warpedImageBw,
                                    this->timePointWeight,
                                    this->floatingBinNumber,
                                    this->referenceBinNumber,
@@ -425,11 +425,11 @@ double reg_nmi::GetSimilarityMeasureValue() {
                                    this->backwardJointHistogramLog,
                                    this->backwardJointHistogramPro,
                                    this->backwardEntropyValues,
-                                   this->floatingMaskPointer);
+                                   this->floatingMask);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getNMIValue<double>(this->floatingImagePointer,
-                                    this->warpedReferenceImagePointer,
+            reg_getNMIValue<double>(this->floatingImage,
+                                    this->warpedImageBw,
                                     this->timePointWeight,
                                     this->floatingBinNumber,
                                     this->referenceBinNumber,
@@ -437,7 +437,7 @@ double reg_nmi::GetSimilarityMeasureValue() {
                                     this->backwardJointHistogramLog,
                                     this->backwardJointHistogramPro,
                                     this->backwardEntropyValues,
-                                    this->floatingMaskPointer);
+                                    this->floatingMask);
             break;
         default:
             reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
@@ -477,9 +477,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     const int *referenceMask,
-                                    const int& current_timepoint,
-                                    const double& timepoint_weight) {
-    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+                                    const int& currentTimepoint,
+                                    const double& timepointWeight) {
+    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
@@ -488,9 +488,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
 
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber];
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
@@ -501,11 +501,11 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
 
     // Create pointers to the current joint histogram
-    const double *logHistoPtr = jointHistogramLog[current_timepoint];
-    const double *entropyPtr = entropyValues[current_timepoint];
+    const double *logHistoPtr = jointHistogramLog[currentTimepoint];
+    const double *entropyPtr = entropyValues[currentTimepoint];
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
-    const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
     // Iterate over all voxel
     for (size_t i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -521,13 +521,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                 double warDeriv[2] = {0};
 
                 for (int r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
-                    if (-1 < r && r < referenceBinNumber[current_timepoint]) {
+                    if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
                         for (int w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
-                            if (-1 < w && w < floatingBinNumber[current_timepoint]) {
+                            if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
                                 double commun =
                                     GetBasisSplineValue((double)refValue - (double)r) *
                                     GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                                double jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]];
+                                double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
                                 double refLog = logHistoPtr[r + referenceOffset];
                                 double warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
@@ -544,9 +544,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] -
                                                                      nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] -
                                                                      nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
@@ -567,9 +567,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     const int *referenceMask,
-                                    const int& current_timepoint,
-                                    const double& timepoint_weight) {
-    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+                                    const int& currentTimepoint,
+                                    const double& timepointWeight) {
+    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedNMIGradient3D");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
@@ -584,9 +584,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber];
+    const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber];
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber];
+    const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
@@ -599,11 +599,11 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
     DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create pointers to the current joint histogram
-    const double *logHistoPtr = jointHistogramLog[current_timepoint];
-    const double *entropyPtr = entropyValues[current_timepoint];
+    const double *logHistoPtr = jointHistogramLog[currentTimepoint];
+    const double *entropyPtr = entropyValues[currentTimepoint];
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint];
-    const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint];
+    const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
     int r, w;
     DataType refValue, warValue, gradX, gradY, gradZ;
     double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog;
@@ -614,7 +614,7 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
     jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
-    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight)
+    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight)
 #endif // _OPENMP
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -631,12 +631,12 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                 warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f;
 
                 for (r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
-                    if (-1 < r && r < referenceBinNumber[current_timepoint]) {
+                    if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
                         for (w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
-                            if (-1 < w && w < floatingBinNumber[current_timepoint]) {
+                            if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
                                 commun = GetBasisSplineValue((double)refValue - (double)r) *
                                     GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                                jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]];
+                                jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
                                 refLog = logHistoPtr[r + referenceOffset];
                                 warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
@@ -658,11 +658,11 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] -
                                                                      nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] -
                                                                      nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] -
+                measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] -
                                                                      nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
@@ -673,17 +673,17 @@ template void reg_getVoxelBasedNMIGradient3D<float>
 template void reg_getVoxelBasedNMIGradient3D<double>
 (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 /* *************************************************************** */
-void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-    if (this->timePointWeight[current_timepoint] == 0)
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
         return;
 
     // Check if all required input images are of the same data type
-    int dtype = this->referenceImagePointer->datatype;
-    if (this->warpedFloatingImagePointer->datatype != dtype ||
-        this->warpedFloatingGradientImagePointer->datatype != dtype ||
-        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+    int dtype = this->referenceImage->datatype;
+    if (this->warpedImage->datatype != dtype ||
+        this->warpedGradient->datatype != dtype ||
+        this->voxelBasedGradient->datatype != dtype) {
         reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
         reg_print_msg_error("Input images are expected to be of the same type");
         reg_exit();
@@ -693,33 +693,33 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     this->GetSimilarityMeasureValue();
 
     // Compute the gradient of the nmi for the forward transformation
-    if (this->referenceImagePointer->nz > 1) {  // 3D input images
+    if (this->referenceImage->nz > 1) {  // 3D input images
         switch (dtype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient3D<float>(this->referenceImagePointer,
-                                                  this->warpedFloatingImagePointer,
+            reg_getVoxelBasedNMIGradient3D<float>(this->referenceImage,
+                                                  this->warpedImage,
                                                   this->referenceBinNumber,
                                                   this->floatingBinNumber,
                                                   this->forwardJointHistogramLog,
                                                   this->forwardEntropyValues,
-                                                  this->warpedFloatingGradientImagePointer,
-                                                  this->forwardVoxelBasedGradientImagePointer,
-                                                  this->referenceMaskPointer,
-                                                  current_timepoint,
-                                                  this->timePointWeight[current_timepoint]);
+                                                  this->warpedGradient,
+                                                  this->voxelBasedGradient,
+                                                  this->referenceMask,
+                                                  currentTimepoint,
+                                                  this->timePointWeight[currentTimepoint]);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient3D<double>(this->referenceImagePointer,
-                                                   this->warpedFloatingImagePointer,
+            reg_getVoxelBasedNMIGradient3D<double>(this->referenceImage,
+                                                   this->warpedImage,
                                                    this->referenceBinNumber,
                                                    this->floatingBinNumber,
                                                    this->forwardJointHistogramLog,
                                                    this->forwardEntropyValues,
-                                                   this->warpedFloatingGradientImagePointer,
-                                                   this->forwardVoxelBasedGradientImagePointer,
-                                                   this->referenceMaskPointer,
-                                                   current_timepoint,
-                                                   this->timePointWeight[current_timepoint]);
+                                                   this->warpedGradient,
+                                                   this->voxelBasedGradient,
+                                                   this->referenceMask,
+                                                   currentTimepoint,
+                                                   this->timePointWeight[currentTimepoint]);
             break;
         default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -729,30 +729,30 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     } else { // 2D input images
         switch (dtype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient2D<float>(this->referenceImagePointer,
-                                                  this->warpedFloatingImagePointer,
+            reg_getVoxelBasedNMIGradient2D<float>(this->referenceImage,
+                                                  this->warpedImage,
                                                   this->referenceBinNumber,
                                                   this->floatingBinNumber,
                                                   this->forwardJointHistogramLog,
                                                   this->forwardEntropyValues,
-                                                  this->warpedFloatingGradientImagePointer,
-                                                  this->forwardVoxelBasedGradientImagePointer,
-                                                  this->referenceMaskPointer,
-                                                  current_timepoint,
-                                                  this->timePointWeight[current_timepoint]);
+                                                  this->warpedGradient,
+                                                  this->voxelBasedGradient,
+                                                  this->referenceMask,
+                                                  currentTimepoint,
+                                                  this->timePointWeight[currentTimepoint]);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient2D<double>(this->referenceImagePointer,
-                                                   this->warpedFloatingImagePointer,
+            reg_getVoxelBasedNMIGradient2D<double>(this->referenceImage,
+                                                   this->warpedImage,
                                                    this->referenceBinNumber,
                                                    this->floatingBinNumber,
                                                    this->forwardJointHistogramLog,
                                                    this->forwardEntropyValues,
-                                                   this->warpedFloatingGradientImagePointer,
-                                                   this->forwardVoxelBasedGradientImagePointer,
-                                                   this->referenceMaskPointer,
-                                                   current_timepoint,
-                                                   this->timePointWeight[current_timepoint]);
+                                                   this->warpedGradient,
+                                                   this->voxelBasedGradient,
+                                                   this->referenceMask,
+                                                   currentTimepoint,
+                                                   this->timePointWeight[currentTimepoint]);
             break;
         default:
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -762,42 +762,42 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     }
 
     if (this->isSymmetric) {
-        dtype = this->floatingImagePointer->datatype;
-        if (this->warpedReferenceImagePointer->datatype != dtype ||
-            this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        dtype = this->floatingImage->datatype;
+        if (this->warpedImageBw->datatype != dtype ||
+            this->warpedGradientBw->datatype != dtype ||
+            this->voxelBasedGradientBw->datatype != dtype) {
             reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
             reg_print_msg_error("Input images are expected to be of the same type");
             reg_exit();
         }
         // Compute the gradient of the nmi for the backward transformation
-        if (this->floatingImagePointer->nz > 1) {  // 3D input images
+        if (this->floatingImage->nz > 1) {  // 3D input images
             switch (dtype) {
             case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedNMIGradient3D<float>(this->floatingImagePointer,
-                                                      this->warpedReferenceImagePointer,
+                reg_getVoxelBasedNMIGradient3D<float>(this->floatingImage,
+                                                      this->warpedImageBw,
                                                       this->floatingBinNumber,
                                                       this->referenceBinNumber,
                                                       this->backwardJointHistogramLog,
                                                       this->backwardEntropyValues,
-                                                      this->warpedReferenceGradientImagePointer,
-                                                      this->backwardVoxelBasedGradientImagePointer,
-                                                      this->floatingMaskPointer,
-                                                      current_timepoint,
-                                                      this->timePointWeight[current_timepoint]);
+                                                      this->warpedGradientBw,
+                                                      this->voxelBasedGradientBw,
+                                                      this->floatingMask,
+                                                      currentTimepoint,
+                                                      this->timePointWeight[currentTimepoint]);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedNMIGradient3D<double>(this->floatingImagePointer,
-                                                       this->warpedReferenceImagePointer,
+                reg_getVoxelBasedNMIGradient3D<double>(this->floatingImage,
+                                                       this->warpedImageBw,
                                                        this->floatingBinNumber,
                                                        this->referenceBinNumber,
                                                        this->backwardJointHistogramLog,
                                                        this->backwardEntropyValues,
-                                                       this->warpedReferenceGradientImagePointer,
-                                                       this->backwardVoxelBasedGradientImagePointer,
-                                                       this->floatingMaskPointer,
-                                                       current_timepoint,
-                                                       this->timePointWeight[current_timepoint]);
+                                                       this->warpedGradientBw,
+                                                       this->voxelBasedGradientBw,
+                                                       this->floatingMask,
+                                                       currentTimepoint,
+                                                       this->timePointWeight[currentTimepoint]);
                 break;
             default:
                 reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
@@ -807,30 +807,30 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         } else { // 2D input images
             switch (dtype) {
             case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedNMIGradient2D<float>(this->floatingImagePointer,
-                                                      this->warpedReferenceImagePointer,
+                reg_getVoxelBasedNMIGradient2D<float>(this->floatingImage,
+                                                      this->warpedImageBw,
                                                       this->floatingBinNumber,
                                                       this->referenceBinNumber,
                                                       this->backwardJointHistogramLog,
                                                       this->backwardEntropyValues,
-                                                      this->warpedReferenceGradientImagePointer,
-                                                      this->backwardVoxelBasedGradientImagePointer,
-                                                      this->floatingMaskPointer,
-                                                      current_timepoint,
-                                                      this->timePointWeight[current_timepoint]);
+                                                      this->warpedGradientBw,
+                                                      this->voxelBasedGradientBw,
+                                                      this->floatingMask,
+                                                      currentTimepoint,
+                                                      this->timePointWeight[currentTimepoint]);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedNMIGradient2D<double>(this->floatingImagePointer,
-                                                       this->warpedReferenceImagePointer,
+                reg_getVoxelBasedNMIGradient2D<double>(this->floatingImage,
+                                                       this->warpedImageBw,
                                                        this->floatingBinNumber,
                                                        this->referenceBinNumber,
                                                        this->backwardJointHistogramLog,
                                                        this->backwardEntropyValues,
-                                                       this->warpedReferenceGradientImagePointer,
-                                                       this->backwardVoxelBasedGradientImagePointer,
-                                                       this->floatingMaskPointer,
-                                                       current_timepoint,
-                                                       this->timePointWeight[current_timepoint]);
+                                                       this->warpedGradientBw,
+                                                       this->voxelBasedGradientBw,
+                                                       this->floatingMask,
+                                                       currentTimepoint,
+                                                       this->timePointWeight[currentTimepoint]);
                 break;
             default:
                 reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index a48583c8..8faafcee 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -18,7 +18,6 @@
 #include "omp.h"
 #endif
 
-/* *************************************************************** */
 /* *************************************************************** */
 /// @brief NMI measure of similarity class
 class reg_nmi: public reg_measure {
@@ -28,21 +27,21 @@ class reg_nmi: public reg_measure {
     /// @brief reg_nmi class destructor
     virtual ~reg_nmi();
 
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 
     virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
                                           unsigned short floBinNumber,
@@ -77,7 +76,6 @@ class reg_nmi: public reg_measure {
     void DeallocateHistogram();
 };
 /* *************************************************************** */
-/* *************************************************************** */
 extern "C++" template <class DataType>
 void reg_getNMIValue(nifti_image *referenceImage,
                      nifti_image *warpedImage,
@@ -101,8 +99,8 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
                                     const int *referenceMask,
-                                    const int& current_timepoint,
-                                    const double& timepoint_weight
+                                    const int& currentTimepoint,
+                                    const double& timepointWeight
 );
 /* *************************************************************** */
 extern "C++" template <class DataType>
@@ -115,11 +113,10 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *nmiGradientImage,
                                     const int *referenceMask,
-                                    const int& current_timepoint,
-                                    const double& timepoint_weight
+                                    const int& currentTimepoint,
+                                    const double& timepointWeight
 );
 /* *************************************************************** */
-/* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
 template<class DataTYPE>
@@ -237,7 +234,6 @@ inline int previous(int current, int num_dims) {
     return num_dims - 1;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi: public reg_measure {
 public:
@@ -250,10 +246,10 @@ class reg_multichannel_nmi: public reg_measure {
     virtual double GetSimilarityMeasureValue() override { return 0; }
 
     /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {
         // Check if the specified time point exists and is active
-        reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-        if (this->timePointWeight[current_timepoint] == 0)
+        reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+        if (this->timePointWeight[currentTimepoint] == 0)
             return;
     }
 
@@ -273,37 +269,37 @@ class reg_multichannel_nmi: public reg_measure {
 extern "C++"
 void reg_getMultiChannelNMIValue(nifti_image *referenceImages,
                                  nifti_image *warpedImages,
-                                 unsigned *reference_bins, // should be an array of size num_reference_volumes
-                                 unsigned *warped_bins, // should be an array of size num_warped_volumes
+                                 unsigned *referenceBins, // should be an array of size num_reference_volumes
+                                 unsigned *warpedBins, // should be an array of size num_warped_volumes
                                  double *probaJointHistogram,
                                  double *logJointHistogram,
                                  double *entropies,
                                  int *mask,
                                  bool approx);
-
+/* *************************************************************** */
 /// Multi channel NMI version - Gradient
 extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
-                                                unsigned *reference_bins,
-                                                unsigned *warped_bins,
+                                                unsigned *referenceBins,
+                                                unsigned *warpedBins,
                                                 double *logJointHistogram,
                                                 double *entropies,
                                                 nifti_image *nmiGradientImage,
                                                 int *mask,
                                                 bool approx);
+/* *************************************************************** */
 /// Multi channel NMI version - Gradient
 extern "C++"
 void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
-                                                unsigned *reference_bins,
-                                                unsigned *warped_bins,
+                                                unsigned *referenceBins,
+                                                unsigned *warpedBins,
                                                 double *logJointHistogram,
                                                 double *entropies,
                                                 nifti_image *nmiGradientImage,
                                                 int *mask,
                                                 bool approx);
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index a89f0122..fc16cd64 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -25,53 +25,53 @@ reg_ssd::reg_ssd(): reg_measure() {
 }
 /* *************************************************************** */
 /* *************************************************************** */
-void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr,
-                                nifti_image *floImgPtr,
-                                int *maskRefPtr,
-                                nifti_image *warFloImgPtr,
-                                nifti_image *warFloGraPtr,
-                                nifti_image *forVoxBasedGraPtr,
-                                nifti_image *localWeightSimPtr,
-                                int *maskFloPtr,
-                                nifti_image *warRefImgPtr,
-                                nifti_image *warRefGraPtr,
-                                nifti_image *bckVoxBasedGraPtr) {
+void reg_ssd::InitialiseMeasure(nifti_image *refImg,
+                                nifti_image *floImg,
+                                int *refMask,
+                                nifti_image *warpedImg,
+                                nifti_image *warpedGrad,
+                                nifti_image *voxelBasedGrad,
+                                nifti_image *localWeightSim,
+                                int *floMask,
+                                nifti_image *warpedImgBw,
+                                nifti_image *warpedGradBw,
+                                nifti_image *voxelBasedGradBw) {
     // Set the pointers using the parent class function
-    reg_measure::InitialiseMeasure(refImgPtr,
-                                   floImgPtr,
-                                   maskRefPtr,
-                                   warFloImgPtr,
-                                   warFloGraPtr,
-                                   forVoxBasedGraPtr,
-                                   localWeightSimPtr,
-                                   maskFloPtr,
-                                   warRefImgPtr,
-                                   warRefGraPtr,
-                                   bckVoxBasedGraPtr);
+    reg_measure::InitialiseMeasure(refImg,
+                                   floImg,
+                                   refMask,
+                                   warpedImg,
+                                   warpedGrad,
+                                   voxelBasedGrad,
+                                   localWeightSim,
+                                   floMask,
+                                   warpedImgBw,
+                                   warpedGradBw,
+                                   voxelBasedGradBw);
 
     // Check that the input images have the same number of time point
-    if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) {
+    if (this->referenceImage->nt != this->floatingImage->nt) {
         reg_print_fct_error("reg_ssd::InitialiseMeasure");
         reg_print_msg_error("This number of time point should be the same for both input images");
         reg_exit();
     }
     // Input images are normalised between 0 and 1
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
             //sets max value over both images to be 1 and min value over both images to be 0
             //scales values such that identical values in the images are still identical after scaling
-            float maxF = reg_tools_getMaxValue(this->floatingImagePointer, i);
-            float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i);
-            float minF = reg_tools_getMinValue(this->floatingImagePointer, i);
-            float minR = reg_tools_getMinValue(this->referenceImagePointer, i);
+            float maxF = reg_tools_getMaxValue(this->floatingImage, i);
+            float maxR = reg_tools_getMaxValue(this->referenceImage, i);
+            float minF = reg_tools_getMinValue(this->floatingImage, i);
+            float minR = reg_tools_getMinValue(this->referenceImage, i);
             float maxFR = fmax(maxF, maxR);
             float minFR = fmin(minF, minR);
             float rangeFR = maxFR - minFR;
-            reg_intensityRescale(this->referenceImagePointer,
+            reg_intensityRescale(this->referenceImage,
                                  i,
                                  (minR - minFR) / rangeFR,
                                  1 - ((maxFR - maxR) / rangeFR));
-            reg_intensityRescale(this->floatingImagePointer,
+            reg_intensityRescale(this->floatingImage,
                                  i,
                                  (minF - minFR) / rangeFR,
                                  1 - ((maxFR - maxF) / rangeFR));
@@ -83,12 +83,12 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr,
 #ifndef NDEBUG
     char text[255];
     reg_print_msg_debug("reg_ssd::InitialiseMeasure().");
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i) {
+    for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
     }
     sprintf(text, "Normalize time point:");
-    for (int i = 0; i < this->referenceImagePointer->nt; ++i)
+    for (int i = 0; i < this->referenceImage->nt; ++i)
         if (this->normaliseTimePoint[i])
             sprintf(text, "%s %i", text, i);
     reg_print_msg_debug(text);
@@ -187,30 +187,30 @@ template double reg_getSSDValue<double>(nifti_image*, nifti_image*, double*, nif
 /* *************************************************************** */
 double reg_ssd::GetSimilarityMeasureValue() {
     // Check that all the specified image are of the same datatype
-    if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) {
+    if (this->warpedImage->datatype != this->referenceImage->datatype) {
         reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
         reg_print_msg_error("Both input images are expected to have the same type");
         reg_exit();
     }
     double SSDValue = 0;
-    switch (this->referenceImagePointer->datatype) {
+    switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        SSDValue = reg_getSSDValue<float>(this->referenceImagePointer,
-                                          this->warpedFloatingImagePointer,
+        SSDValue = reg_getSSDValue<float>(this->referenceImage,
+                                          this->warpedImage,
                                           this->timePointWeight,
                                           nullptr, // TODO this->forwardJacDetImagePointer,
-                                          this->referenceMaskPointer,
+                                          this->referenceMask,
                                           this->currentValue,
-                                          this->forwardLocalWeightSimImagePointer);
+                                          this->localWeightSim);
         break;
     case NIFTI_TYPE_FLOAT64:
-        SSDValue = reg_getSSDValue<double>(this->referenceImagePointer,
-                                           this->warpedFloatingImagePointer,
+        SSDValue = reg_getSSDValue<double>(this->referenceImage,
+                                           this->warpedImage,
                                            this->timePointWeight,
                                            nullptr, // TODO this->forwardJacDetImagePointer,
-                                           this->referenceMaskPointer,
+                                           this->referenceMask,
                                            this->currentValue,
-                                           this->forwardLocalWeightSimImagePointer);
+                                           this->localWeightSim);
         break;
     default:
         reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
@@ -221,27 +221,27 @@ double reg_ssd::GetSimilarityMeasureValue() {
     // Backward computation
     if (this->isSymmetric) {
         // Check that all the specified image are of the same datatype
-        if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) {
+        if (this->warpedImageBw->datatype != this->floatingImage->datatype) {
             reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
             reg_print_msg_error("Both input images are expected to have the same type");
             reg_exit();
         }
-        switch (this->floatingImagePointer->datatype) {
+        switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            SSDValue += reg_getSSDValue<float>(this->floatingImagePointer,
-                                               this->warpedReferenceImagePointer,
+            SSDValue += reg_getSSDValue<float>(this->floatingImage,
+                                               this->warpedImageBw,
                                                this->timePointWeight,
                                                nullptr, // TODO this->backwardJacDetImagePointer,
-                                               this->floatingMaskPointer,
+                                               this->floatingMask,
                                                this->currentValue,
                                                nullptr);
             break;
         case NIFTI_TYPE_FLOAT64:
-            SSDValue += reg_getSSDValue<double>(this->floatingImagePointer,
-                                                this->warpedReferenceImagePointer,
+            SSDValue += reg_getSSDValue<double>(this->floatingImage,
+                                                this->warpedImageBw,
                                                 this->timePointWeight,
                                                 nullptr, // TODO this->backwardJacDetImagePointer,
-                                                this->floatingMaskPointer,
+                                                this->floatingMask,
                                                 this->currentValue,
                                                 nullptr);
             break;
@@ -262,10 +262,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *measureGradientImage,
                                   nifti_image *jacobianDetImage,
                                   int *mask,
-                                  int current_timepoint,
-                                  double timepoint_weight,
+                                  int currentTimepoint,
+                                  double timepointWeight,
                                   nifti_image *localWeightSimImage) {
-    if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) {
+    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
@@ -280,9 +280,9 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
 #endif
     // Pointers to the image data
     DataType *refImagePtr = static_cast<DataType *>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber];
+    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
     DataType *warImagePtr = static_cast<DataType *>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber];
+    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     DataType *spatialGradPtrX = static_cast<DataType *>(warpedGradient->data);
@@ -315,7 +315,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                 activeVoxel_num += 1.0;
         }
     }
-    double adjusted_weight = timepoint_weight / activeVoxel_num;
+    double adjusted_weight = timepointWeight / activeVoxel_num;
 
     double refValue, warValue, common;
 
@@ -364,17 +364,17 @@ template void reg_getVoxelBasedSSDGradient<float>
 template void reg_getVoxelBasedSSDGradient<double>
 (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
 /* *************************************************************** */
-void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint);
-    if (this->timePointWeight[current_timepoint] == 0)
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
         return;
 
     // Check if all required input images are of the same data type
-    int dtype = this->referenceImagePointer->datatype;
-    if (this->warpedFloatingImagePointer->datatype != dtype ||
-        this->warpedFloatingGradientImagePointer->datatype != dtype ||
-        this->forwardVoxelBasedGradientImagePointer->datatype != dtype) {
+    int dtype = this->referenceImage->datatype;
+    if (this->warpedImage->datatype != dtype ||
+        this->warpedGradient->datatype != dtype ||
+        this->voxelBasedGradient->datatype != dtype) {
         reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
         reg_print_msg_error("Input images are expected to be of the same type");
         reg_exit();
@@ -382,26 +382,26 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     // Compute the gradient of the ssd for the forward transformation
     switch (dtype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedSSDGradient<float>(this->referenceImagePointer,
-                                            this->warpedFloatingImagePointer,
-                                            this->warpedFloatingGradientImagePointer,
-                                            this->forwardVoxelBasedGradientImagePointer,
+        reg_getVoxelBasedSSDGradient<float>(this->referenceImage,
+                                            this->warpedImage,
+                                            this->warpedGradient,
+                                            this->voxelBasedGradient,
                                             nullptr, // TODO this->forwardJacDetImagePointer,
-                                            this->referenceMaskPointer,
-                                            current_timepoint,
-                                            this->timePointWeight[current_timepoint],
-                                            this->forwardLocalWeightSimImagePointer);
+                                            this->referenceMask,
+                                            currentTimepoint,
+                                            this->timePointWeight[currentTimepoint],
+                                            this->localWeightSim);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedSSDGradient<double>(this->referenceImagePointer,
-                                             this->warpedFloatingImagePointer,
-                                             this->warpedFloatingGradientImagePointer,
-                                             this->forwardVoxelBasedGradientImagePointer,
+        reg_getVoxelBasedSSDGradient<double>(this->referenceImage,
+                                             this->warpedImage,
+                                             this->warpedGradient,
+                                             this->voxelBasedGradient,
                                              nullptr, // TODO this->forwardJacDetImagePointer,
-                                             this->referenceMaskPointer,
-                                             current_timepoint,
-                                             this->timePointWeight[current_timepoint],
-                                             this->forwardLocalWeightSimImagePointer);
+                                             this->referenceMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint],
+                                             this->localWeightSim);
         break;
     default:
         reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
@@ -410,10 +410,10 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
     }
     // Compute the gradient of the ssd for the backward transformation
     if (this->isSymmetric) {
-        dtype = this->floatingImagePointer->datatype;
-        if (this->warpedReferenceImagePointer->datatype != dtype ||
-            this->warpedReferenceGradientImagePointer->datatype != dtype ||
-            this->backwardVoxelBasedGradientImagePointer->datatype != dtype) {
+        dtype = this->floatingImage->datatype;
+        if (this->warpedImageBw->datatype != dtype ||
+            this->warpedGradientBw->datatype != dtype ||
+            this->voxelBasedGradientBw->datatype != dtype) {
             reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
             reg_print_msg_error("Input images are expected to be of the same type");
             reg_exit();
@@ -421,25 +421,25 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
         // Compute the gradient of the nmi for the backward transformation
         switch (dtype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSSDGradient<float>(this->floatingImagePointer,
-                                                this->warpedReferenceImagePointer,
-                                                this->warpedReferenceGradientImagePointer,
-                                                this->backwardVoxelBasedGradientImagePointer,
+            reg_getVoxelBasedSSDGradient<float>(this->floatingImage,
+                                                this->warpedImageBw,
+                                                this->warpedGradientBw,
+                                                this->voxelBasedGradientBw,
                                                 nullptr, // TODO this->backwardJacDetImagePointer,
-                                                this->floatingMaskPointer,
-                                                current_timepoint,
-                                                this->timePointWeight[current_timepoint],
+                                                this->floatingMask,
+                                                currentTimepoint,
+                                                this->timePointWeight[currentTimepoint],
                                                 nullptr);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSSDGradient<double>(this->floatingImagePointer,
-                                                 this->warpedReferenceImagePointer,
-                                                 this->warpedReferenceGradientImagePointer,
-                                                 this->backwardVoxelBasedGradientImagePointer,
+            reg_getVoxelBasedSSDGradient<double>(this->floatingImage,
+                                                 this->warpedImageBw,
+                                                 this->warpedGradientBw,
+                                                 this->voxelBasedGradientBw,
                                                  nullptr, // TODO this->backwardJacDetImagePointer,
-                                                 this->floatingMaskPointer,
-                                                 current_timepoint,
-                                                 this->timePointWeight[current_timepoint],
+                                                 this->floatingMask,
+                                                 currentTimepoint,
+                                                 this->timePointWeight[currentTimepoint],
                                                  nullptr);
             break;
         default:
@@ -954,25 +954,25 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
                                   float *discretisedValue,
                                   int discretise_radius,
                                   int discretise_step) {
-    if (referenceImagePointer->nz > 1) {
-        switch (this->referenceImagePointer->datatype) {
+    if (referenceImage->nz > 1) {
+        switch (this->referenceImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
             GetDiscretisedValueSSD_core3D_2<float>(controlPointGridImage,
                                                    discretisedValue,
                                                    discretise_radius,
                                                    discretise_step,
-                                                   this->referenceImagePointer,
-                                                   this->warpedFloatingImagePointer,
-                                                   this->referenceMaskPointer);
+                                                   this->referenceImage,
+                                                   this->warpedImage,
+                                                   this->referenceMask);
             break;
         case NIFTI_TYPE_FLOAT64:
             GetDiscretisedValueSSD_core3D_2<double>(controlPointGridImage,
                                                     discretisedValue,
                                                     discretise_radius,
                                                     discretise_step,
-                                                    this->referenceImagePointer,
-                                                    this->warpedFloatingImagePointer,
-                                                    this->referenceMaskPointer);
+                                                    this->referenceImage,
+                                                    this->warpedImage,
+                                                    this->referenceMask);
             break;
         default:
             reg_print_fct_error("reg_ssd::GetDiscretisedValue");
@@ -983,24 +983,24 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
         reg_print_fct_error("reg_ssd::GetDiscretisedValue");
         reg_print_msg_error("Not implemented in 2D yet");
         reg_exit();
-        // switch (this->referenceImagePointer->datatype) {
+        // switch (this->referenceImage->datatype) {
         // case NIFTI_TYPE_FLOAT32:
         //     GetDiscretisedValueSSD_core2D<float>(controlPointGridImage,
         //                                          discretisedValue,
         //                                          discretise_radius,
         //                                          discretise_step,
-        //                                          this->referenceImagePointer,
-        //                                          this->warpedFloatingImagePointer,
-        //                                          this->referenceMaskPointer);
+        //                                          this->referenceImage,
+        //                                          this->warpedImage,
+        //                                          this->referenceMask);
         //     break;
         // case NIFTI_TYPE_FLOAT64:
         //     GetDiscretisedValueSSD_core2D<double>(controlPointGridImage,
         //                                           discretisedValue,
         //                                           discretise_radius,
         //                                           discretise_step,
-        //                                           this->referenceImagePointer,
-        //                                           this->warpedFloatingImagePointer,
-        //                                           this->referenceMaskPointer);
+        //                                           this->referenceImage,
+        //                                           this->warpedImage,
+        //                                           this->referenceMask);
         //     break;
         // default:
         //     reg_print_fct_error("reg_ssd::GetDiscretisedValue");
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index c2ab3f99..5492f60c 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -27,23 +27,23 @@ class reg_ssd: public reg_measure {
     virtual ~reg_ssd() {}
 
     /// @brief Initialise the reg_ssd object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr = nullptr,
-                                   int *maskFloPtr = nullptr,
-                                   nifti_image *warRefImgPtr = nullptr,
-                                   nifti_image *warRefGraPtr = nullptr,
-                                   nifti_image *bckVoxBasedGraPtr = nullptr) override;
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Define if the specified time point should be normalised
     void SetNormaliseTimepoint(int timepoint, bool normalise);
     /// @brief Returns the ssd value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based ssd gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
     /// @brief Here
     virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
@@ -99,6 +99,6 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *ssdGradientImage,
                                   nifti_image *jacobianDeterminantImage,
                                   int *mask,
-                                  int current_timepoint,
-                                  double timepoint_weight,
+                                  int currentTimepoint,
+                                  double timepointWeight,
                                   nifti_image *localWeightImage);
diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp
index 0e4af74e..e9e9ca32 100644
--- a/reg-lib/cuda/CudaContext.hpp
+++ b/reg-lib/cuda/CudaContext.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <cuda.h>
-#include "_reg_maths.h"
+#include "_reg_tools.h"
 #include "BlockSize.hpp"
 
 namespace NiftyReg {
@@ -33,7 +33,7 @@ class CudaContext {
     CUcontext cudaContext;
     unsigned numDevices;
     unsigned cudaIdx;
-    std::unique_ptr<BlockSize> blockSize;
+    unique_ptr<BlockSize> blockSize;
 
     void PickCard(unsigned deviceId);
     void SetBlockSize(int major);
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 31fc61dd..15886661 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -130,7 +130,7 @@ extern "C++"
 template <class DataType>
 int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned);
 /* *************************************************************** */
-using UniqueTextureObjectPtr = std::unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
+using UniqueTextureObjectPtr = unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
 extern "C++"
 UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 54a40264..f6c9615f 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -21,29 +21,29 @@ class reg_measure_gpu {
     /// @brief Measure class destructor
     virtual ~reg_measure_gpu() {}
 
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) = 0;
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) = 0;
 
 protected:
-    cudaArray *referenceDevicePointer;
-    cudaArray *floatingDevicePointer;
-    int *referenceMaskDevicePointer;
+    cudaArray *referenceImageCuda;
+    cudaArray *floatingImageCuda;
+    int *referenceMaskCuda;
     size_t activeVoxelNumber;
-    float *warpedFloatingDevicePointer;
-    float4 *warpedFloatingGradientDevicePointer;
-    float4 *forwardVoxelBasedGradientDevicePointer;
+    float *warpedImageCuda;
+    float4 *warpedGradientCuda;
+    float4 *voxelBasedGradientCuda;
 };
 /* *************************************************************** */
 class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
@@ -56,24 +56,24 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     /// @brief reg_lncc class destructor
     virtual ~reg_lncc_gpu() {}
 
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) override {}
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) override {}
     /// @brief Returns the lncc value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based lncc gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
 /* *************************************************************** */
 class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
@@ -86,24 +86,24 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     /// @brief reg_kld_gpu class destructor
     virtual ~reg_kld_gpu() {}
 
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) override {}
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) override {}
     /// @brief Returns the kld value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based kld gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
 /* *************************************************************** */
 class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
@@ -116,23 +116,23 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     /// @brief reg_dti_gpu class destructor
     virtual ~reg_dti_gpu() {}
 
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) override {}
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) override {}
     /// @brief Returns the dti value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based dti gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 9aebb418..1f5c1997 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -40,27 +40,27 @@ void reg_nmi_gpu::DeallocateHistogram() {
 #endif
 }
 /* *************************************************************** */
-void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
-                                    nifti_image *floImgPtr,
-                                    int *maskRefPtr,
+void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg,
+                                    nifti_image *floImg,
+                                    int *refMask,
                                     size_t activeVoxNum,
-                                    nifti_image *warFloImgPtr,
-                                    nifti_image *warFloGraPtr,
-                                    nifti_image *forVoxBasedGraPtr,
-                                    nifti_image *localWeightSimPtr,
-                                    cudaArray *refDevicePtr,
-                                    cudaArray *floDevicePtr,
-                                    int *refMskDevicePtr,
-                                    float *warFloDevicePtr,
-                                    float4 *warFloGradDevicePtr,
-                                    float4 *forVoxBasedGraDevicePtr) {
+                                    nifti_image *warpedImg,
+                                    nifti_image *warpedGrad,
+                                    nifti_image *voxelBasedGrad,
+                                    nifti_image *localWeightSim,
+                                    cudaArray *refImgCuda,
+                                    cudaArray *floImgCuda,
+                                    int *refMaskCuda,
+                                    float *warpedImgCuda,
+                                    float4 *warpedGradCuda,
+                                    float4 *voxelBasedGradCuda) {
     this->DeallocateHistogram();
-    reg_nmi::InitialiseMeasure(refImgPtr,
-                               floImgPtr,
-                               maskRefPtr,
-                               warFloImgPtr,
-                               warFloGraPtr,
-                               forVoxBasedGraPtr);
+    reg_nmi::InitialiseMeasure(refImg,
+                               floImg,
+                               refMask,
+                               warpedImg,
+                               warpedGrad,
+                               voxelBasedGrad);
     // Check if a symmetric measure is required
     if (this->isSymmetric) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
@@ -68,33 +68,33 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
         reg_exit();
     }
     // Check if the input images have multiple timepoints
-    if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) {
+    if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n");
         reg_exit();
     }
     // Check that the input image are of type float
-    if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 ||
-        this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) {
+    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 ||
+        this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n");
         reg_exit();
     }
     // Bind the required pointers
-    this->referenceDevicePointer = refDevicePtr;
-    this->floatingDevicePointer = floDevicePtr;
-    this->referenceMaskDevicePointer = refMskDevicePtr;
+    this->referenceImageCuda = refImgCuda;
+    this->floatingImageCuda = floImgCuda;
+    this->referenceMaskCuda = refMaskCuda;
     this->activeVoxelNumber = activeVoxNum;
-    this->warpedFloatingDevicePointer = warFloDevicePtr;
-    this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
-    this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
+    this->warpedImageCuda = warpedImgCuda;
+    this->warpedGradientCuda = warpedGradCuda;
+    this->voxelBasedGradientCuda = voxelBasedGradCuda;
     // The reference and floating images have to be updated on the device
-    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceDevicePointer, this->referenceImagePointer)) {
+    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceImageCuda, this->referenceImage)) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         printf("[NiftyReg ERROR] Error when transferring the reference image.\n");
         reg_exit();
     }
-    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingDevicePointer, this->floatingImagePointer)) {
+    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage)) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         printf("[NiftyReg ERROR] Error when transferring the floating image.\n");
         reg_exit();
@@ -110,14 +110,14 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr,
 double reg_nmi_gpu::GetSimilarityMeasureValue() {
     // The NMI computation is performed into the host for now
     // The relevant images have to be transferred from the device to the host
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedFloatingImagePointer->data,
-                                 this->warpedFloatingDevicePointer,
-                                 this->warpedFloatingImagePointer->nvox *
-                                 this->warpedFloatingImagePointer->nbyper,
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedImage->data,
+                                 this->warpedImageCuda,
+                                 this->warpedImage->nvox *
+                                 this->warpedImage->nbyper,
                                  cudaMemcpyDeviceToHost));
 
-    reg_getNMIValue<float>(this->referenceImagePointer,
-                           this->warpedFloatingImagePointer,
+    reg_getNMIValue<float>(this->referenceImage,
+                           this->warpedImage,
                            this->timePointWeight,
                            this->referenceBinNumber,
                            this->floatingBinNumber,
@@ -125,7 +125,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() {
                            this->forwardJointHistogramLog,
                            this->forwardJointHistogramPro,
                            this->forwardEntropyValues,
-                           this->referenceMaskPointer);
+                           this->referenceMask);
 
     const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2];
 
@@ -189,7 +189,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // The latest joint histogram is transferred onto the GPU
     float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float));
     for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i)
@@ -201,13 +201,13 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint)
     free(temp);
 
     // The gradient of the NMI is computed on the GPU
-    reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer,
-                                     this->referenceDevicePointer,
-                                     this->warpedFloatingDevicePointer,
-                                     this->warpedFloatingGradientDevicePointer,
+    reg_getVoxelBasedNMIGradient_gpu(this->referenceImage,
+                                     this->referenceImageCuda,
+                                     this->warpedImageCuda,
+                                     this->warpedGradientCuda,
                                      this->forwardJointHistogramLog_device,
-                                     this->forwardVoxelBasedGradientDevicePointer,
-                                     this->referenceMaskDevicePointer,
+                                     this->voxelBasedGradientCuda,
+                                     this->referenceMaskCuda,
                                      this->activeVoxelNumber,
                                      this->forwardEntropyValues[0],
                                      this->referenceBinNumber[0],
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 46aa61f0..ea3da371 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -25,24 +25,24 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     virtual ~reg_nmi_gpu();
 
     /// @brief Initialise the reg_nmi_gpu object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) override;
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) override;
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 
 protected:
     float *forwardJointHistogramLog_device;
@@ -53,20 +53,20 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
-    void InitialiseMeasure(nifti_image *refImgPtr,
-                           nifti_image *floImgPtr,
-                           int *maskRefPtr,
+    void InitialiseMeasure(nifti_image *refImg,
+                           nifti_image *floImg,
+                           int *refMask,
                            size_t activeVoxNum,
-                           nifti_image *warFloImgPtr,
-                           nifti_image *warFloGraPtr,
-                           nifti_image *forVoxBasedGraPtr,
-                           nifti_image *localWeightSimPtr,
-                           cudaArray *refDevicePtr,
-                           cudaArray *floDevicePtr,
-                           int *refMskDevicePtr,
-                           float *warFloDevicePtr,
-                           float4 *warFloGradDevicePtr,
-                           float4 *forVoxBasedGraDevicePtr) override {}
+                           nifti_image *warpedImg,
+                           nifti_image *warpedGrad,
+                           nifti_image *voxelBasedGrad,
+                           nifti_image *localWeightSim,
+                           cudaArray *refImgCuda,
+                           cudaArray *floImgCuda,
+                           int *refMaskCuda,
+                           float *warpedImgCuda,
+                           float4 *warpedGradCuda,
+                           float4 *voxelBasedGradCuda) override {}
     /// @brief reg_nmi class constructor
     reg_multichannel_nmi_gpu() {}
     /// @brief reg_nmi class destructor
@@ -74,6 +74,6 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index c126e4fa..275fc7ef 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -20,27 +20,27 @@ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
 #endif
 }
 /* *************************************************************** */
-void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
-                                    nifti_image *floImgPtr,
-                                    int *maskRefPtr,
+void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg,
+                                    nifti_image *floImg,
+                                    int *refMask,
                                     size_t activeVoxNum,
-                                    nifti_image *warFloImgPtr,
-                                    nifti_image *warFloGraPtr,
-                                    nifti_image *forVoxBasedGraPtr,
-                                    nifti_image *localWeightSimPtr,
-                                    cudaArray *refDevicePtr,
-                                    cudaArray *floDevicePtr,
-                                    int *refMskDevicePtr,
-                                    float *warFloDevicePtr,
-                                    float4 *warFloGradDevicePtr,
-                                    float4 *forVoxBasedGraDevicePtr) {
-    reg_ssd::InitialiseMeasure(refImgPtr,
-                               floImgPtr,
-                               maskRefPtr,
-                               warFloImgPtr,
-                               warFloGraPtr,
-                               forVoxBasedGraPtr,
-                               localWeightSimPtr);
+                                    nifti_image *warpedImg,
+                                    nifti_image *warpedGrad,
+                                    nifti_image *voxelBasedGrad,
+                                    nifti_image *localWeightSim,
+                                    cudaArray *refImgCuda,
+                                    cudaArray *floImgCuda,
+                                    int *refMaskCuda,
+                                    float *warpedImgCuda,
+                                    float4 *warpedGradCuda,
+                                    float4 *voxelBasedGradCuda) {
+    reg_ssd::InitialiseMeasure(refImg,
+                               floImg,
+                               refMask,
+                               warpedImg,
+                               warpedGrad,
+                               voxelBasedGrad,
+                               localWeightSim);
     // Check if a symmetric measure is required
     if (this->isSymmetric) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
@@ -48,26 +48,26 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr,
         reg_exit();
     }
     // Check that the input image are of type float
-    if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 ||
-        this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) {
+    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 ||
+        this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n");
         reg_exit();
     }
     // Check that the input images have only one time point
-    if (this->referenceImagePointer->nt > 1 || this->floatingImagePointer->nt > 1) {
+    if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) {
         fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
         fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n");
         reg_exit();
     }
     // Bind the required pointers
-    this->referenceDevicePointer = refDevicePtr;
-    this->floatingDevicePointer = floDevicePtr;
-    this->referenceMaskDevicePointer = refMskDevicePtr;
+    this->referenceImageCuda = refImgCuda;
+    this->floatingImageCuda = floImgCuda;
+    this->referenceMaskCuda = refMaskCuda;
     this->activeVoxelNumber = activeVoxNum;
-    this->warpedFloatingDevicePointer = warFloDevicePtr;
-    this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr;
-    this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr;
+    this->warpedImageCuda = warpedImgCuda;
+    this->warpedGradientCuda = warpedGradCuda;
+    this->voxelBasedGradientCuda = voxelBasedGradCuda;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
 #endif
@@ -115,10 +115,10 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValue() {
-    const double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer,
-                                                this->referenceDevicePointer,
-                                                this->warpedFloatingDevicePointer,
-                                                this->referenceMaskDevicePointer,
+    const double SSDValue = reg_getSSDValue_gpu(this->referenceImage,
+                                                this->referenceImageCuda,
+                                                this->warpedImageCuda,
+                                                this->referenceMaskCuda,
                                                 this->activeVoxelNumber);
     return -SSDValue;
 }
@@ -159,14 +159,14 @@ void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) {
-    reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer,
-                                     this->referenceDevicePointer,
-                                     this->warpedFloatingDevicePointer,
-                                     this->warpedFloatingGradientDevicePointer,
-                                     this->forwardVoxelBasedGradientDevicePointer,
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
+    reg_getVoxelBasedSSDGradient_gpu(this->referenceImage,
+                                     this->referenceImageCuda,
+                                     this->warpedImageCuda,
+                                     this->warpedGradientCuda,
+                                     this->voxelBasedGradientCuda,
                                      1.f,
-                                     this->referenceMaskDevicePointer,
+                                     this->referenceMaskCuda,
                                      this->activeVoxelNumber);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 69a6602b..c95d4064 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -26,23 +26,23 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     virtual ~reg_ssd_gpu() {}
 
     /// @brief Initialise the reg_ssd object
-    virtual void InitialiseMeasure(nifti_image *refImgPtr,
-                                   nifti_image *floImgPtr,
-                                   int *maskRefPtr,
+    virtual void InitialiseMeasure(nifti_image *refImg,
+                                   nifti_image *floImg,
+                                   int *refMask,
                                    size_t activeVoxNum,
-                                   nifti_image *warFloImgPtr,
-                                   nifti_image *warFloGraPtr,
-                                   nifti_image *forVoxBasedGraPtr,
-                                   nifti_image *localWeightSimPtr,
-                                   cudaArray *refDevicePtr,
-                                   cudaArray *floDevicePtr,
-                                   int *refMskDevicePtr,
-                                   float *warFloDevicePtr,
-                                   float4 *warFloGradDevicePtr,
-                                   float4 *forVoxBasedGraDevicePtr) override;
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim,
+                                   cudaArray *refImgCuda,
+                                   cudaArray *floImgCuda,
+                                   int *refMaskCuda,
+                                   float *warpedImgCuda,
+                                   float4 *warpedGradCuda,
+                                   float4 *voxelBasedGradCuda) override;
     /// @brief Returns the ssd value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based ssd gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 };
 /* *************************************************************** */
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index a332db78..f9609036 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -110,6 +110,7 @@ include(Catch)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 set(EXEC_LIST reg_test_affineDeformationField)
+set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
@@ -117,7 +118,6 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
-set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index fcff3e96..f237a44d 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -1,7 +1,6 @@
 #include "reg_test_common.h"
 #include "_reg_blockMatching.h"
-#include "CpuBlockMatchingKernel.h"
-#include "CudaBlockMatchingKernel.h"
+#include "BlockMatchingKernel.h"
 #include "CpuAffineDeformationFieldKernel.h"
 #include "CpuResampleImageKernel.h"
 
@@ -29,7 +28,7 @@ class BMTest {
         std::mt19937 gen(rd());
         std::uniform_real_distribution<float> distr(0, 1);
 
-        // Create a reference 2D and 3D images
+        // Create 2D and 3D reference images
         constexpr NiftiImage::dim_t size = 64;
         vector<NiftiImage::dim_t> dim{ size, size };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
@@ -38,11 +37,11 @@ class BMTest {
 
         // Fill images with random values
         const auto ref2dPtr = reference2d.data();
-        for(auto ref2dItr = ref2dPtr.begin(); ref2dItr!=ref2dPtr.end(); ++ref2dItr){
+        for (auto ref2dItr = ref2dPtr.begin(); ref2dItr != ref2dPtr.end(); ++ref2dItr) {
             *ref2dItr = distr(gen);
         }
         const auto ref3dPtr = reference3d.data();
-        for(auto ref3dItr = ref3dPtr.begin(); ref3dItr!=ref3dPtr.end(); ++ref3dItr){
+        for (auto ref3dItr = ref3dPtr.begin(); ref3dItr != ref3dPtr.end(); ++ref3dItr) {
             *ref3dItr = distr(gen);
         }
 
@@ -54,87 +53,72 @@ class BMTest {
         translationMatrix.m[2][3] = -OFFSET;
 
         // Create a mask so that voxel at the boundary are ignored
-        int *mask2D = new int[reference2d.nVoxels()];
-        int *mask3D = new int[reference3d.nVoxels()];
-        int *mask2dPtr = mask2D;
-        int *mask3dPtr = mask3D;
+        unique_ptr<int[]> mask2d{ new int[reference2d.nVoxels()] };
+        unique_ptr<int[]> mask3d{ new int[reference3d.nVoxels()] };
+        int *mask2dPtr = mask2d.get();
+        int *mask3dPtr = mask3d.get();
         // set all values to -1
-        for(int y=0; y<reference2d->ny;++y)
-            for(int x=0; x<reference2d->nx;++x)
+        for (int y = 0; y < reference2d->ny; ++y)
+            for (int x = 0; x < reference2d->nx; ++x)
                 *mask2dPtr++ = -1;
-        for(int z=0; z<reference3d->nz;++z)
-            for(int y=0; y<reference3d->ny;++y)
-                for(int x=0; x<reference3d->nx;++x)
+        for (int z = 0; z < reference3d->nz; ++z)
+            for (int y = 0; y < reference3d->ny; ++y)
+                for (int x = 0; x < reference3d->nx; ++x)
                     *mask3dPtr++ = -1;
         // Set the internal values to 1
-        for(int y=OFFSET; y<reference2d->ny-OFFSET;++y){
-            mask2dPtr = &mask2D[y*reference2d->nx+OFFSET];
-            for(int x=OFFSET; x<reference2d->nx-OFFSET;++x){
+        for (int y = OFFSET; y < reference2d->ny - OFFSET; ++y) {
+            mask2dPtr = &mask2d[y * reference2d->nx + OFFSET];
+            for (int x = OFFSET; x < reference2d->nx - OFFSET; ++x) {
                 *mask2dPtr++ = 1;
             }
         }
-        for(int z=OFFSET; z<reference3d->nz-OFFSET;++z){
-            for(int y=OFFSET; y<reference3d->ny-OFFSET;++y){
-                mask3dPtr = &mask3D[(z*reference3d->ny+y)*reference3d->nx+OFFSET];
-                for(int x=OFFSET; x<reference3d->nx-OFFSET;++x){
+        for (int z = OFFSET; z < reference3d->nz - OFFSET; ++z) {
+            for (int y = OFFSET; y < reference3d->ny - OFFSET; ++y) {
+                mask3dPtr = &mask3d[(z * reference3d->ny + y) * reference3d->nx + OFFSET];
+                for (int x = OFFSET; x < reference3d->nx - OFFSET; ++x) {
                     *mask3dPtr++ = 1;
                 }
             }
         }
 
         // Apply the transformation in 2D
-        unique_ptr<AladinContent> contentResampling2D{ new AladinContent(
-                reference2d,
-                reference2d
-        ) };
-        contentResampling2D->SetTransformationMatrix(&translationMatrix);
-        std::unique_ptr<AffineDeformationFieldKernel> affineDeformKernel2D{
-            new CpuAffineDeformationFieldKernel(contentResampling2D.get())
-            };
-        affineDeformKernel2D->Calculate();
-        std::unique_ptr<ResampleImageKernel> resampleKernel2D{
-            new CpuResampleImageKernel(contentResampling2D.get())
-            };
-        resampleKernel2D->Calculate(0, std::numeric_limits<float>::quiet_NaN());
+        unique_ptr<AladinContent> contentResampling2d{ new AladinContent(reference2d, reference2d) };
+        contentResampling2d->SetTransformationMatrix(&translationMatrix);
+        unique_ptr<AffineDeformationFieldKernel> affineDeformKernel2d{ new CpuAffineDeformationFieldKernel(contentResampling2d.get()) };
+        affineDeformKernel2d->Calculate();
+        unique_ptr<ResampleImageKernel> resampleKernel2d{ new CpuResampleImageKernel(contentResampling2d.get()) };
+        resampleKernel2d->Calculate(0, std::numeric_limits<float>::quiet_NaN());
 
         // Apply the transformation in 3D
-        unique_ptr<AladinContent> contentResampling3D{ new AladinContent(
-                reference3d,
-                reference3d
-        ) };
-        contentResampling3D->SetTransformationMatrix(&translationMatrix);
-        std::unique_ptr<AffineDeformationFieldKernel> affineDeformKernel3D{
-            new CpuAffineDeformationFieldKernel(contentResampling3D.get())
-            };
-        affineDeformKernel3D->Calculate();
-        std::unique_ptr<ResampleImageKernel> resampleKernel3D{
-            new CpuResampleImageKernel(contentResampling3D.get())
-            };
-        resampleKernel3D->Calculate(0, 0);
+        unique_ptr<AladinContent> contentResampling3d{ new AladinContent(reference3d, reference3d) };
+        contentResampling3d->SetTransformationMatrix(&translationMatrix);
+        unique_ptr<AffineDeformationFieldKernel> affineDeformKernel3d{ new CpuAffineDeformationFieldKernel(contentResampling3d.get()) };
+        affineDeformKernel3d->Calculate();
+        unique_ptr<ResampleImageKernel> resampleKernel3d{ new CpuResampleImageKernel(contentResampling3d.get()) };
+        resampleKernel3d->Calculate(0, 0);
 
         // Create the data container for the regression test
         vector<TestData> testData;
         testData.emplace_back(TestData(
             "BlockMatching 2D",
             reference2d,
-            NiftiImage(contentResampling2D->GetWarped()),
-            mask2D
+            NiftiImage(contentResampling2d->GetWarped()),
+            mask2d.get()
         ));
-        contentResampling2D.release();
+        contentResampling2d.release();
         testData.emplace_back(TestData(
             "BlockMatching 3D",
             reference3d,
-            NiftiImage(contentResampling3D->GetWarped()),
-            mask3D
+            NiftiImage(contentResampling3d->GetWarped()),
+            mask3d.get()
         ));
-        contentResampling3D.release();
+        contentResampling3d.release();
 
         for (auto&& data : testData) {
             // Get the test data
             auto&& [testName, reference, warped, mask] = data;
 
             for (auto&& platformType : PlatformTypes) {
-
                 // Create images
                 NiftiImage referenceTest(reference);
                 NiftiImage warpedTest(warped);
@@ -143,7 +127,7 @@ class BMTest {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<AladinContentCreator> contentCreator{
                     dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin))
-                    };
+                };
                 unique_ptr<AladinContent> content{ contentCreator->Create(
                     referenceTest,
                     referenceTest,
@@ -152,27 +136,22 @@ class BMTest {
                     sizeof(float),
                     100,
                     100,
-                    1) };
+                    1
+                ) };
                 content->SetWarped(warpedTest.disown());
 
-                // Inititialise the block matching
-                unique_ptr<Kernel> bmKernel{ platform->CreateKernel(
-                    BlockMatchingKernel::GetName(), content.get()
-                    ) };
+                // Initialise the block matching
+                unique_ptr<Kernel> bmKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), content.get()) };
 
                 // Do the computation
                 bmKernel->castTo<BlockMatchingKernel>()->Calculate();
 
                 // Retrieve the information
-                unique_ptr<_reg_blockMatchingParam> blockMatchingParams{
-                    new _reg_blockMatchingParam(content->GetBlockMatchingParams())
-                    };
-    
+                unique_ptr<_reg_blockMatchingParam> blockMatchingParams{ new _reg_blockMatchingParam(content->GetBlockMatchingParams()) };
+
                 testCases.push_back({ testName + " " + platform->GetName(), std::move(blockMatchingParams) });
             } // loop over platforms
         }
-        delete mask2D;
-        delete mask3D;
     }
 };
 
@@ -187,10 +166,10 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
 
             // Loop over the block and ensure all values are identical
             for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) {
-                for(int d = 0; d<(int)blockMatchingParams->dim; ++d){
-                    const int i = b*(int)blockMatchingParams->dim+d;
+                for (int d = 0; d < (int)blockMatchingParams->dim; ++d) {
+                    const int i = b * (int)blockMatchingParams->dim + d;
                     const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i];
-                    if(fabs(diffPos - OFFSET) > EPS){
+                    if (fabs(diffPos - OFFSET) > EPS) {
                         std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] ";
                         std::cout << diffPos << std::endl; std::cout.flush();
                     }
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 5eebec14..d4b060f6 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -35,10 +35,10 @@ class ConjugateGradientTest: public InterfaceOptimiser {
 
         // Fill image with distance from identity
         const auto ref2dPtr = reference2d.data();
-        auto ref2dIt = ref2dPtr.begin();
+        auto ref2dItr = ref2dPtr.begin();
         for (int y = 0; y < reference2d->ny; ++y)
             for (int x = 0; x < reference2d->nx; ++x)
-                *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+                *ref2dItr++ = sqrtf(static_cast<float>(x * x + y * y));
 
         // Create a reference 3D image
         dimFlo.push_back(4);
@@ -46,11 +46,11 @@ class ConjugateGradientTest: public InterfaceOptimiser {
 
         // Fill image with distance from identity
         const auto ref3dPtr = reference3d.data();
-        auto ref3dIt = ref3dPtr.begin();
+        auto ref3dItr = ref3dPtr.begin();
         for (int z = 0; z < reference3d->nz; ++z)
             for (int y = 0; y < reference3d->ny; ++y)
                 for (int x = 0; x < reference3d->nx; ++x)
-                    *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+                    *ref3dItr++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
         // Generate the different test cases
         // Test 2D
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index c1ceb951..03b17dd7 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -109,7 +109,7 @@ class GetDeformationFieldTest {
     void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) {
         const auto cppPtr = controlPointGrid.data();
         const auto cppPtrX = cppPtr.begin();
-        const auto cppPtrY = cppPtrX + NiftiImage::calcVoxelNumber(controlPointGrid, 2);
+        const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice();
         size_t coord = 0;
         for (int y = yPre; y < yPre + 4; y++) {
             const bool in = -1 < y && y < controlPointGrid->ny;
@@ -128,7 +128,7 @@ class GetDeformationFieldTest {
     }
 
     void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) {
-        const size_t cppVoxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+        const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume();
         const auto cppPtr = controlPointGrid.data();
         const auto cppPtrX = cppPtr.begin();
         const auto cppPtrY = cppPtrX + cppVoxelNumber;
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 3c99f312..a816daee 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -22,10 +22,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
 
     // Fill image with distance from identity
     const auto ref2dPtr = reference2d.data();
-    auto ref2dIt = ref2dPtr.begin();
+    auto ref2dItr = ref2dPtr.begin();
     for (int y = 0; y < reference2d->ny; ++y)
         for (int x = 0; x < reference2d->nx; ++x)
-            *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+            *ref2dItr++ = sqrtf(static_cast<float>(x * x + y * y));
 
     // Create a corresponding 2D deformation field
     vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
@@ -40,11 +40,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
 
     // Fill image with distance from identity
     const auto ref3dPtr = reference3d.data();
-    auto ref3dIt = ref3dPtr.begin();
+    auto ref3dItr = ref3dPtr.begin();
     for (int z = 0; z < reference3d->nz; ++z)
         for (int y = 0; y < reference3d->ny; ++y)
             for (int x = 0; x < reference3d->nx; ++x)
-                *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+                *ref3dItr++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
     // Create a corresponding 3D deformation field
     dimDef[4] = 3;
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 7587b499..57b0f6c8 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -23,10 +23,10 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Fill image with distance from identity
     const auto ref2dPtr = reference2d.data();
-    auto ref2dIt = ref2dPtr.begin();
+    auto ref2dItr = ref2dPtr.begin();
     for (int y = 0; y < reference2d->ny; ++y)
         for (int x = 0; x < reference2d->nx; ++x)
-            *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+            *ref2dItr++ = sqrtf(static_cast<float>(x * x + y * y));
 
     // Create a corresponding 2D deformation field
     vector<NiftiImage::dim_t> dimDef{ 1, 1, 1, 1, 2 };
@@ -41,11 +41,11 @@ TEST_CASE("Interpolation", "[Interpolation]") {
 
     // Fill image with distance from identity
     const auto ref3dPtr = reference3d.data();
-    auto ref3dIt = ref3dPtr.begin();
+    auto ref3dItr = ref3dPtr.begin();
     for (int z = 0; z < reference3d->nz; ++z)
         for (int y = 0; y < reference3d->ny; ++y)
             for (int x = 0; x < reference3d->nx; ++x)
-                *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+                *ref3dItr++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
     // Create a corresponding 3D deformation field
     dimDef[4] = 3;
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index f6bd6cb3..a6f1052b 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -37,27 +37,20 @@ class LNCCTest {
         NiftiImage cpp3d(CreateControlPointGrid(reference3d));
 
         // Fill images with random values
-        const auto ref2dPtr = reference2d.data();
-        auto ref2dItr = ref2dPtr.begin();
-        const auto flo2dPtr = floating2d.data();
-        auto flo2dItr = flo2dPtr.begin();
-        for (int y = 0; y < reference2d->ny; ++y)
-            for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dItr++ = distr(gen);
-                *flo2dItr++ = distr(gen);
-            }
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
+        for (size_t i = 0; i < reference2d.nVoxels(); ++i) {
+            ref2dPtr[i] = distr(gen);
+            flo2dPtr[i] = distr(gen);
+        }
 
         // Fill images with random values
-        const auto ref3dPtr = reference3d.data();
-        auto ref3dItr = ref3dPtr.begin();
-        const auto flo3dPtr = floating3d.data();
-        auto flo3dItr = flo3dPtr.begin();
-        for (int z = 0; z < reference3d->nz; ++z)
-            for (int y = 0; y < reference3d->ny; ++y)
-                for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dItr++ = distr(gen);
-                    *flo3dItr++ = distr(gen);
-                }
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        for (size_t i = 0; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = distr(gen);
+            flo3dPtr[i] = distr(gen);
+        }
 
         // Create the object to compute the expected values
         vector<TestData> testData;
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index fe59bec9..47876b7c 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -35,10 +35,10 @@ class NormaliseGradientTest {
 
         // Fill image with distance from identity
         const auto ref2dPtr = reference2d.data();
-        auto ref2dIt = ref2dPtr.begin();
+        auto ref2dItr = ref2dPtr.begin();
         for (int y = 0; y < reference2d->ny; ++y)
             for (int x = 0; x < reference2d->nx; ++x)
-                *ref2dIt++ = sqrtf(static_cast<float>(x * x + y * y));
+                *ref2dItr++ = sqrtf(static_cast<float>(x * x + y * y));
 
         // Create a reference 3D image
         dimFlo.push_back(4);
@@ -46,11 +46,11 @@ class NormaliseGradientTest {
 
         // Fill image with distance from identity
         const auto ref3dPtr = reference3d.data();
-        auto ref3dIt = ref3dPtr.begin();
+        auto ref3dItr = ref3dPtr.begin();
         for (int z = 0; z < reference3d->nz; ++z)
             for (int y = 0; y < reference3d->ny; ++y)
                 for (int x = 0; x < reference3d->nx; ++x)
-                    *ref3dIt++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
+                    *ref3dItr++ = sqrtf(static_cast<float>(x * x + y * y + z * z));
 
         // Generate the different test cases
         // Test 2D
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index ee0a62d5..be362a51 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -36,27 +36,20 @@ class BMTest {
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
-        const auto ref2dPtr = reference2d.data();
-        auto ref2dItr = ref2dPtr.begin();
-        const auto flo2dPtr = floating2d.data();
-        auto flo2dItr = flo2dPtr.begin();
-        for (int y = 0; y < reference2d->ny; ++y)
-            for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dItr++ = distr(gen);
-                *flo2dItr++ = distr(gen);
-            }
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
+        for (size_t i = 0; i < reference2d.nVoxels(); ++i) {
+            ref2dPtr[i] = distr(gen);
+            flo2dPtr[i] = distr(gen);
+        }
 
         // Fill images with random values
-        const auto ref3dPtr = reference3d.data();
-        auto ref3dItr = ref3dPtr.begin();
-        const auto flo3dPtr = floating3d.data();
-        auto flo3dItr = flo3dPtr.begin();
-        for (int z = 0; z < reference3d->nz; ++z)
-            for (int y = 0; y < reference3d->ny; ++y)
-                for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dItr++ = distr(gen);
-                    *flo3dItr++ = distr(gen);
-                }
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        for (size_t i = 0; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = distr(gen);
+            flo3dPtr[i] = distr(gen);
+        }
 
         // Create the data container for the regression test
         vector<TestData> testData;
@@ -111,8 +104,8 @@ class BMTest {
             contentCuda->SetWarped(warpedCuda.disown());
 
             // Initialise the block matching
-            std::unique_ptr<BlockMatchingKernel> kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
-            std::unique_ptr<BlockMatchingKernel> kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) };
+            unique_ptr<BlockMatchingKernel> kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
+            unique_ptr<BlockMatchingKernel> kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) };
 
             // Do the computation
             kernelCpu->Calculate();
@@ -141,12 +134,12 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
 
             // Loop over the block and ensure all values are identical
             for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) {
-                for(int d = 0; d<(int)blockMatchingParamsCpu->dim; ++d){
+                for (int d = 0; d < (int)blockMatchingParamsCpu->dim; ++d) {
 
-                    const int i = b*(int)blockMatchingParamsCpu->dim+d;
+                    const int i = b * (int)blockMatchingParamsCpu->dim + d;
                     const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i];
                     const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i];
-                    if(fabs(refPosCpu - refPosCuda) > EPS){
+                    if (fabs(refPosCpu - refPosCuda) > EPS) {
                         std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
                         std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl;
                         std::cout.flush();
@@ -154,7 +147,7 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
                     REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
                     const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i];
                     const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i];
-                    if(fabs(warPosCpu - warPosCuda) > EPS){
+                    if (fabs(warPosCpu - warPosCuda) > EPS) {
                         std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
                         std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl;
                         std::cout.flush();
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 6639d40a..c7c72ef1 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -39,27 +39,20 @@ class LTSTest {
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
-        const auto ref2dPtr = reference2d.data();
-        auto ref2dItr = ref2dPtr.begin();
-        const auto flo2dPtr = floating2d.data();
-        auto flo2dItr = flo2dPtr.begin();
-        for (int y = 0; y < reference2d->ny; ++y)
-            for (int x = 0; x < reference2d->nx; ++x) {
-                *ref2dItr++ = distr(gen);
-                *flo2dItr++ = distr(gen);
-            }
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
+        for (size_t i = 0; i < reference2d.nVoxels(); ++i) {
+            ref2dPtr[i] = distr(gen);
+            flo2dPtr[i] = distr(gen);
+        }
 
         // Fill images with random values
-        const auto ref3dPtr = reference3d.data();
-        auto ref3dItr = ref3dPtr.begin();
-        const auto flo3dPtr = floating3d.data();
-        auto flo3dItr = flo3dPtr.begin();
-        for (int z = 0; z < reference3d->nz; ++z)
-            for (int y = 0; y < reference3d->ny; ++y)
-                for (int x = 0; x < reference3d->nx; ++x) {
-                    *ref3dItr++ = distr(gen);
-                    *flo3dItr++ = distr(gen);
-                }
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        for (size_t i = 0; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = distr(gen);
+            flo3dPtr[i] = distr(gen);
+        }
 
         // Create the data container for the regression test
         vector<TestData> testData;
@@ -122,7 +115,7 @@ class LTSTest {
             contentCuda->SetWarped(warpedCuda.disown());
 
             // Initialise the block matching and run it on the CPU
-            std::unique_ptr<BlockMatchingKernel> bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) };
+            unique_ptr<BlockMatchingKernel> bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) };
             bmKernelCpu->Calculate();
 
             // Set the CUDA block matching parameters
@@ -130,8 +123,8 @@ class LTSTest {
             contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda);
 
             // Initialise the optimise kernels
-            std::unique_ptr<LtsKernel> kernelCpu{ new CpuLtsKernel(contentCpu.get()) };
-            std::unique_ptr<LtsKernel> kernelCuda{ new CudaLtsKernel(contentCuda.get()) };
+            unique_ptr<LtsKernel> kernelCpu{ new CpuLtsKernel(contentCpu.get()) };
+            unique_ptr<LtsKernel> kernelCuda{ new CudaLtsKernel(contentCuda.get()) };
 
             // Compute the transformations
             kernelCpu->Calculate(ttype);

From 63bccddb9b7e631b841888f5779cc66d683f5c3a Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Wed, 19 Jul 2023 13:00:58 +0100
Subject: [PATCH 160/314] #92 fixed shared mem race condition in block match

---
 niftyreg_build_version.txt             | 2 +-
 reg-lib/cuda/CudaBlockMatchingKernel.h | 2 +-
 reg-lib/cuda/blockMatchingKernel.cu    | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3d242f55..bbb81cf1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-278
+279
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h
index 643d96f7..f917f85e 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.h
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.h
@@ -6,7 +6,7 @@
 //Kernel functions for block matching
 class CudaBlockMatchingKernel: public BlockMatchingKernel {
 public:
-    CudaBlockMatchingKernel(Content *conIn);
+    explicit CudaBlockMatchingKernel(Content *conIn);
     void Calculate();
 
 private:
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 05d005f8..874a20de 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -67,6 +67,7 @@ __device__ __inline__ void apply_affine(const float4& pt, float *result) {
 /* *************************************************************** */
 __device__ __inline__ float blockReduce2DSum(float val, unsigned tid) {
     static __shared__ float shared[16];
+    __syncthreads();
     shared[tid] = val;
     __syncthreads();
 
@@ -80,6 +81,7 @@ __device__ __inline__ float blockReduce2DSum(float val, unsigned tid) {
 /* *************************************************************** */
 __device__ __inline__ float blockReduceSum(float val, unsigned tid) {
     static __shared__ float shared[64];
+    __syncthreads();
     shared[tid] = val;
     __syncthreads();
 

From 8653742e7a7a60fe6a9ae7a0b22a54dd77189831 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 15:35:35 +0100
Subject: [PATCH 161/314] Add NiftiImage::getDataType() returning the current
 data type

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 62 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bbb81cf1..1b1c1312 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-279
+280
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index d63f0f53..ce159bb2 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -27,6 +27,7 @@
 #include <map>
 #include <locale>
 #include <limits>
+#include <variant>
 
 #endif
 
@@ -1733,6 +1734,65 @@ class NiftiImage
     **/
     NiftiImage & changeDatatype (const std::string &datatype, const bool useSlope = false);
 
+    /// @brief  A variant type holding a NIfTI datatype
+    using DataType = std::variant<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t, float, double>;
+
+    /**
+     * Return the datatype of the image
+     * @return A variant holding a NIfTI datatype
+    */
+    static DataType getDataType(const nifti_image *image)
+    {
+        if (image == nullptr)
+            throw std::runtime_error("Cannot get datatype of null image");
+        switch (image->datatype)
+        {
+            case DT_UINT8:      return uint8_t();          break;
+            case DT_INT16:      return int16_t();          break;
+            case DT_INT32:      return int32_t();          break;
+            case DT_FLOAT32:    return float();            break;
+            case DT_FLOAT64:    return double();           break;
+            case DT_INT8:       return int8_t();           break;
+            case DT_UINT16:     return uint16_t();         break;
+            case DT_UINT32:     return uint32_t();         break;
+            case DT_INT64:      return int64_t();          break;
+            case DT_UINT64:     return uint64_t();         break;
+
+            default:
+            throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(image->datatype)) + ")");
+        }
+    }
+
+    /**
+     * Return the datatype of the image
+     * @return A variant holding a NIfTI datatype
+    */
+    DataType getDataType() const { return getDataType(image); }
+
+    /**
+     * Return the datatype of the image, if it is a floating-point type
+     * @return A variant holding a NIfTI datatype
+    */
+    static std::variant<float, double> getFloatingDataType(const nifti_image *image)
+    {
+        if (image == nullptr)
+            throw std::runtime_error("Cannot get datatype of null image");
+        switch (image->datatype)
+        {
+            case DT_FLOAT32:    return float();            break;
+            case DT_FLOAT64:    return double();           break;
+
+            default:
+            throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(image->datatype)) + ")");
+        }
+    }
+
+    /**
+     * Return the datatype of the image, if it is a floating-point type
+     * @return A variant holding a NIfTI datatype
+    */
+    std::variant<float, double> getFloatingDataType() const { return getFloatingDataType(image); }
+
     /**
      * Replace the pixel data in the image with the contents of a vector
      * @param data A data vector, whose elements will be used to replace the image data
@@ -1786,7 +1846,7 @@ class NiftiImage
         if (image->data)
             free(image->data);
         recalcVoxelNumber();
-        image->data = calloc(1, nifti_get_volsize(image));
+        image->data = calloc(1, totalBytes());
     }
 
     /**

From 2cd5ce4146be6e58ea72a610dc90abb8a86049ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 15:36:45 +0100
Subject: [PATCH 162/314] Refactor _reg_resampling

---
 niftyreg_build_version.txt      |    2 +-
 reg-lib/cpu/_reg_resampling.cpp | 3919 +++++++++++++------------------
 reg-lib/cpu/_reg_resampling.h   |   72 +-
 3 files changed, 1642 insertions(+), 2351 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1b1c1312..1473a88f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-280
+281
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index d8b12719..d881001b 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -19,174 +19,154 @@
 #define SINC_KERNEL_SIZE SINC_KERNEL_RADIUS*2
 
 /* *************************************************************** */
-void interpWindowedSincKernel(double relative, double *basis)
-{
-    if(relative<0) relative=0; //reg_rounding error
-    int j=0;
-    double sum=0.;
-    for(int i=-SINC_KERNEL_RADIUS; i<SINC_KERNEL_RADIUS; ++i)
-    {
-        double x=relative-static_cast<double>(i);
-        if(x==0)
-            basis[j]=1.0;
-        else if(fabs(x)>=static_cast<double>(SINC_KERNEL_RADIUS))
-            basis[j]=0;
-        else{
-            double pi_x=M_PI*x;
-            basis[j]=static_cast<double>(SINC_KERNEL_RADIUS) *
-                    sin(pi_x) *
-                    sin(pi_x/static_cast<double>(SINC_KERNEL_RADIUS)) /
-                    (pi_x*pi_x);
+void interpWindowedSincKernel(double relative, double *basis) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    int j = 0;
+    double sum = 0.;
+    for (int i = -SINC_KERNEL_RADIUS; i < SINC_KERNEL_RADIUS; ++i) {
+        double x = relative - static_cast<double>(i);
+        if (x == 0)
+            basis[j] = 1.0;
+        else if (fabs(x) >= static_cast<double>(SINC_KERNEL_RADIUS))
+            basis[j] = 0;
+        else {
+            double pi_x = M_PI * x;
+            basis[j] = static_cast<double>(SINC_KERNEL_RADIUS) *
+                sin(pi_x) *
+                sin(pi_x / static_cast<double>(SINC_KERNEL_RADIUS)) /
+                (pi_x * pi_x);
         }
-        sum+=basis[j];
+        sum += basis[j];
         j++;
     }
-    for(int i=0;i<SINC_KERNEL_SIZE;++i)
-        basis[i]/=sum;
+    for (int i = 0; i < SINC_KERNEL_SIZE; ++i)
+        basis[i] /= sum;
 }
-
-/* *************************************************************** */
 /* *************************************************************** */
-double interpWindowedSincKernel_Samp(double x, double kernelsize)
-{
-    if(x==0)
+double interpWindowedSincKernel_Samp(double x, double kernelsize) {
+    if (x == 0)
         return 1.0;
-    else if(fabs(x)>=static_cast<double>(kernelsize))
+    else if (fabs(x) >= static_cast<double>(kernelsize))
         return 0;
-    else{
-        double pi_x=M_PI*fabs(x);
+    else {
+        double pi_x = M_PI * fabs(x);
         return static_cast<double>(kernelsize) *
-                sin(pi_x) *
-                sin(pi_x/static_cast<double>(kernelsize)) /
-                (pi_x*pi_x);
+            sin(pi_x) *
+            sin(pi_x / static_cast<double>(kernelsize)) /
+            (pi_x * pi_x);
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void interpCubicSplineKernel(double relative, double *basis)
-{
-    if(relative<0) relative=0; //reg_rounding error
-    double FF= relative*relative;
-    basis[0] = (relative * ((2.0-relative)*relative - 1.0))/2.0;
-    basis[1] = (FF * (3.0*relative-5.0) + 2.0)/2.0;
-    basis[2] = (relative * ((4.0-3.0*relative)*relative + 1.0))/2.0;
-    basis[3] = (relative-1.0) * FF/2.0;
+void interpCubicSplineKernel(double relative, double *basis) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    double FF = relative * relative;
+    basis[0] = (relative * ((2.0 - relative) * relative - 1.0)) / 2.0;
+    basis[1] = (FF * (3.0 * relative - 5.0) + 2.0) / 2.0;
+    basis[2] = (relative * ((4.0 - 3.0 * relative) * relative + 1.0)) / 2.0;
+    basis[3] = (relative - 1.0) * FF / 2.0;
 }
 /* *************************************************************** */
-void interpCubicSplineKernel(double relative, double *basis, double *derivative)
-{
-    interpCubicSplineKernel(relative,basis);
-    if(relative<0) relative=0; //reg_rounding error
-    double FF= relative*relative;
-    derivative[0] = (4.0*relative - 3.0*FF - 1.0)/2.0;
-    derivative[1] = (9.0*relative - 10.0) * relative/2.0;
-    derivative[2] = (8.0*relative - 9.0*FF + 1.0)/2.0;
-    derivative[3] = (3.0*relative - 2.0) * relative/2.0;
+void interpCubicSplineKernel(double relative, double *basis, double *derivative) {
+    interpCubicSplineKernel(relative, basis);
+    if (relative < 0) relative = 0; //reg_rounding error
+    double FF = relative * relative;
+    derivative[0] = (4.0 * relative - 3.0 * FF - 1.0) / 2.0;
+    derivative[1] = (9.0 * relative - 10.0) * relative / 2.0;
+    derivative[2] = (8.0 * relative - 9.0 * FF + 1.0) / 2.0;
+    derivative[3] = (3.0 * relative - 2.0) * relative / 2.0;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void interpLinearKernel(double relative, double *basis)
-{
-    if(relative<0) relative=0; //reg_rounding error
-    basis[1]=relative;
-    basis[0]=1.0-relative;
+void interpLinearKernel(double relative, double *basis) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    basis[1] = relative;
+    basis[0] = 1.0 - relative;
 }
 /* *************************************************************** */
-void interpLinearKernel(double relative, double *basis, double *derivative)
-{
-    interpLinearKernel(relative,basis);
-    derivative[1]=1;
-    derivative[0]=0;
+void interpLinearKernel(double relative, double *basis, double *derivative) {
+    interpLinearKernel(relative, basis);
+    derivative[1] = 1;
+    derivative[0] = 0;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void interpNearestNeighKernel(double relative, double *basis)
-{
-    if(relative<0) relative=0; //reg_rounding error
-    basis[0]=basis[1]=0;
-    if(relative>=0.5)
-        basis[1]=1;
-    else basis[0]=1;
+void interpNearestNeighKernel(double relative, double *basis) {
+    if (relative < 0) relative = 0; //reg_rounding error
+    basis[0] = basis[1] = 0;
+    if (relative >= 0.5)
+        basis[1] = 1;
+    else basis[0] = 1;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
                                       void **originalFloatingData,
-                                      int *dtIndicies)
-{
+                                      const int *dtIndicies) {
     // If we have some valid diffusion tensor indicies, we need to replace the tensor components
     // by the the log tensor components
-    if( dtIndicies[0] != -1 )
-    {
+    if (dtIndicies[0] != -1) {
 #ifndef NDEBUG
         char text[255];
         reg_print_msg_debug("DTI indices:");
         sprintf(text, "Active time point:");
-        for(unsigned i = 0; i < 6; i++ )
+        for (unsigned i = 0; i < 6; i++)
             sprintf(text, "%s %i", text, dtIndicies[i]);
         reg_print_msg_debug(text);
 #endif
 
 #ifdef WIN32
         long floatingIndex;
-        const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+        const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
         size_t floatingIndex;
-        const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+        const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
 
-        *originalFloatingData=malloc(floatingImage->nvox*sizeof(DataType));
-        memcpy(*originalFloatingData,
-               floatingImage->data,
-               floatingImage->nvox*sizeof(DataType));
+        *originalFloatingData = malloc(floatingImage->nvox * sizeof(DataType));
+        memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType));
 #ifndef NDEBUG
         reg_print_msg_debug("The floating image data has been copied");
 #endif
 
         /* As the tensor has 6 unique components that we need to worry about, read them out
       for the floating image. */
-        DataType *firstVox = static_cast<DataType *>(floatingImage->data);
+        DataType *firstVox = static_cast<DataType*>(floatingImage->data);
         // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
-        DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]];
-        DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]];
-        DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]];
-        DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]];
-        DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]];
-        DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]];
-
+        DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber * dtIndicies[0]];
+        DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber * dtIndicies[1]];
+        DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber * dtIndicies[2]];
+        DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber * dtIndicies[3]];
+        DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber * dtIndicies[4]];
+        DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber * dtIndicies[5]];
 
         // Should log the tensor up front
         // We need to take the logarithm of the tensor for each voxel in the floating intensity
         // image, and replace the warped
-        int tid=0;
+        int tid = 0;
 #ifdef _OPENMP
         mat33 diffTensor[16];
         int max_thread_number = omp_get_max_threads();
-        if(max_thread_number>16) omp_set_num_threads(16);
+        if (max_thread_number > 16) omp_set_num_threads(16);
 #pragma omp parallel for default(none) \
-    private(floatingIndex, tid) \
+    private(tid) \
     shared(floatingVoxelNumber,floatingIntensityXX,floatingIntensityYY, \
     floatingIntensityZZ,floatingIntensityXY,floatingIntensityXZ, \
     floatingIntensityYZ, diffTensor)
 #else
         mat33 diffTensor[1];
 #endif
-        for(floatingIndex=0; floatingIndex<floatingVoxelNumber; ++floatingIndex)
-        {
+        for (floatingIndex = 0; floatingIndex < floatingVoxelNumber; ++floatingIndex) {
 #ifdef _OPENMP
-            tid=omp_get_thread_num();
+            tid = omp_get_thread_num();
 #endif
             // Fill a mat44 with the tensor components
-            diffTensor[tid].m[0][0] = floatingIntensityXX[floatingIndex];
-            diffTensor[tid].m[0][1] = floatingIntensityXY[floatingIndex];
+            diffTensor[tid].m[0][0] = static_cast<float>(floatingIntensityXX[floatingIndex]);
+            diffTensor[tid].m[0][1] = static_cast<float>(floatingIntensityXY[floatingIndex]);
             diffTensor[tid].m[1][0] = diffTensor[tid].m[0][1];
-            diffTensor[tid].m[1][1] = floatingIntensityYY[floatingIndex];
-            diffTensor[tid].m[0][2] = floatingIntensityXZ[floatingIndex];
+            diffTensor[tid].m[1][1] = static_cast<float>(floatingIntensityYY[floatingIndex]);
+            diffTensor[tid].m[0][2] = static_cast<float>(floatingIntensityXZ[floatingIndex]);
             diffTensor[tid].m[2][0] = diffTensor[tid].m[0][2];
-            diffTensor[tid].m[1][2] = floatingIntensityYZ[floatingIndex];
+            diffTensor[tid].m[1][2] = static_cast<float>(floatingIntensityYZ[floatingIndex]);
             diffTensor[tid].m[2][1] = diffTensor[tid].m[1][2];
-            diffTensor[tid].m[2][2] = floatingIntensityZZ[floatingIndex];
+            diffTensor[tid].m[2][2] = static_cast<float>(floatingIntensityZZ[floatingIndex]);
 
             // Compute the log of the diffusion tensor.
             reg_mat33_logm(&diffTensor[tid]);
@@ -210,59 +190,55 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
 /* *************************************************************** */
 template <class DataType>
 void reg_dti_resampling_postprocessing(nifti_image *inputImage,
-                                       int *mask,
-                                       mat33 *jacMat,
-                                       int *dtIndicies,
-                                       nifti_image *warpedImage = nullptr)
-{
+                                       const int *mask,
+                                       const mat33 *jacMat,
+                                       const int *dtIndicies,
+                                       const nifti_image *warpedImage = nullptr) {
     // If we have some valid diffusion tensor indicies, we need to exponentiate the previously logged tensor components
     // we also need to reorient the tensors based on the local transformation Jacobians
-    if(dtIndicies[0] != -1 )
-    {
+    if (dtIndicies[0] != -1) {
 #ifdef WIN32
         long warpedIndex;
-        const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
+        const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
 #else
         size_t warpedIndex;
-        const size_t voxelNumber = CalcVoxelNumber(*inputImage);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3);
 #endif
-        DataType *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ;
-        if(warpedImage!=nullptr)
-        {
-            warpVox = static_cast<DataType *>(warpedImage->data);
+        const DataType *warpVox, *warpedXX, *warpedXY, *warpedXZ, *warpedYY, *warpedYZ, *warpedZZ;
+        if (warpedImage != nullptr) {
+            warpVox = static_cast<DataType*>(warpedImage->data);
             // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
-            warpedXX = &warpVox[voxelNumber*dtIndicies[0]];
-            warpedXY = &warpVox[voxelNumber*dtIndicies[1]];
-            warpedYY = &warpVox[voxelNumber*dtIndicies[2]];
-            warpedXZ = &warpVox[voxelNumber*dtIndicies[3]];
-            warpedYZ = &warpVox[voxelNumber*dtIndicies[4]];
-            warpedZZ = &warpVox[voxelNumber*dtIndicies[5]];
+            warpedXX = &warpVox[voxelNumber * dtIndicies[0]];
+            warpedXY = &warpVox[voxelNumber * dtIndicies[1]];
+            warpedYY = &warpVox[voxelNumber * dtIndicies[2]];
+            warpedXZ = &warpVox[voxelNumber * dtIndicies[3]];
+            warpedYZ = &warpVox[voxelNumber * dtIndicies[4]];
+            warpedZZ = &warpVox[voxelNumber * dtIndicies[5]];
         }
-        for(int u=0; u<inputImage->nu; ++u)
-        {
+        for (int u = 0; u < inputImage->nu; ++u) {
             // Now, we need to exponentiate the warped intensities back to give us a regular tensor
             // let's reorient each tensor based on the rigid component of the local warping
             /* As the tensor has 6 unique components that we need to worry about, read them out
          for the warped image. */
-            // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
-            DataType *firstWarpVox = static_cast<DataType *>(inputImage->data);
-            DataType *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)];
-            DataType *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)];
-            DataType *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)];
-            DataType *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)];
-            DataType *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)];
-            DataType *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)];
+         // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
+            DataType *firstWarpVox = static_cast<DataType*>(inputImage->data);
+            DataType *inputIntensityXX = &firstWarpVox[voxelNumber * (dtIndicies[0] + inputImage->nt * u)];
+            DataType *inputIntensityXY = &firstWarpVox[voxelNumber * (dtIndicies[1] + inputImage->nt * u)];
+            DataType *inputIntensityYY = &firstWarpVox[voxelNumber * (dtIndicies[2] + inputImage->nt * u)];
+            DataType *inputIntensityXZ = &firstWarpVox[voxelNumber * (dtIndicies[3] + inputImage->nt * u)];
+            DataType *inputIntensityYZ = &firstWarpVox[voxelNumber * (dtIndicies[4] + inputImage->nt * u)];
+            DataType *inputIntensityZZ = &firstWarpVox[voxelNumber * (dtIndicies[5] + inputImage->nt * u)];
 
             // Step through each voxel in the warped image
-            double testSum=0;
+            double testSum = 0;
             int col, row;
-            int tid=0;
+            int tid = 0;
 #ifdef _OPENMP
             mat33 inputTensor[16], warpedTensor[16], RotMat[16], RotMatT[16];
             int max_thread_number = omp_get_max_threads();
-            if(max_thread_number>16) omp_set_num_threads(16);
+            if (max_thread_number > 16) omp_set_num_threads(16);
 #pragma omp parallel for default(none) \
-    private(warpedIndex, testSum, col, row, tid) \
+    private(testSum, col, row, tid) \
     shared(voxelNumber,inputIntensityXX,inputIntensityYY,inputIntensityZZ, \
     warpedXX, warpedXY, warpedXZ, warpedYY, warpedYZ, warpedZZ, warpedImage, \
     inputIntensityXY,inputIntensityXZ,inputIntensityYZ, jacMat, mask, \
@@ -270,57 +246,51 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
 #else
             mat33 inputTensor[1], warpedTensor[1], RotMat[1], RotMatT[1];
 #endif
-            for(warpedIndex=0; warpedIndex<voxelNumber; ++warpedIndex)
-            {
+            for (warpedIndex = 0; warpedIndex < voxelNumber; ++warpedIndex) {
 #ifdef _OPENMP
-                tid=omp_get_thread_num();
+                tid = omp_get_thread_num();
 #endif
-                if(mask[warpedIndex]>-1)
-                {
+                if (mask[warpedIndex] > -1) {
                     // Fill the rest of the mat44 with the tensor components
-                    inputTensor[tid].m[0][0] = static_cast<double>(inputIntensityXX[warpedIndex]);
-                    inputTensor[tid].m[0][1] = static_cast<double>(inputIntensityXY[warpedIndex]);
+                    inputTensor[tid].m[0][0] = static_cast<float>(inputIntensityXX[warpedIndex]);
+                    inputTensor[tid].m[0][1] = static_cast<float>(inputIntensityXY[warpedIndex]);
                     inputTensor[tid].m[1][0] = inputTensor[tid].m[0][1];
-                    inputTensor[tid].m[1][1] = static_cast<double>(inputIntensityYY[warpedIndex]);
-                    inputTensor[tid].m[0][2] = static_cast<double>(inputIntensityXZ[warpedIndex]);
+                    inputTensor[tid].m[1][1] = static_cast<float>(inputIntensityYY[warpedIndex]);
+                    inputTensor[tid].m[0][2] = static_cast<float>(inputIntensityXZ[warpedIndex]);
                     inputTensor[tid].m[2][0] = inputTensor[tid].m[0][2];
-                    inputTensor[tid].m[1][2] = static_cast<double>(inputIntensityYZ[warpedIndex]);
+                    inputTensor[tid].m[1][2] = static_cast<float>(inputIntensityYZ[warpedIndex]);
                     inputTensor[tid].m[2][1] = inputTensor[tid].m[1][2];
-                    inputTensor[tid].m[2][2] = static_cast<double>(inputIntensityZZ[warpedIndex]);
+                    inputTensor[tid].m[2][2] = static_cast<float>(inputIntensityZZ[warpedIndex]);
                     // Exponentiate the warped tensor
-                    if(warpedImage==nullptr)
-                    {
+                    if (warpedImage == nullptr) {
                         reg_mat33_expm(&inputTensor[tid]);
-                        testSum=0;
-                    }
-                    else
-                    {
+                        testSum = 0;
+                    } else {
                         reg_mat33_eye(&warpedTensor[tid]);
-                        warpedTensor[tid].m[0][0] = static_cast<double>(warpedXX[warpedIndex]);
-                        warpedTensor[tid].m[0][1] = static_cast<double>(warpedXY[warpedIndex]);
+                        warpedTensor[tid].m[0][0] = static_cast<float>(warpedXX[warpedIndex]);
+                        warpedTensor[tid].m[0][1] = static_cast<float>(warpedXY[warpedIndex]);
                         warpedTensor[tid].m[1][0] = warpedTensor[tid].m[0][1];
-                        warpedTensor[tid].m[1][1] = static_cast<double>(warpedYY[warpedIndex]);
-                        warpedTensor[tid].m[0][2] = static_cast<double>(warpedXZ[warpedIndex]);
+                        warpedTensor[tid].m[1][1] = static_cast<float>(warpedYY[warpedIndex]);
+                        warpedTensor[tid].m[0][2] = static_cast<float>(warpedXZ[warpedIndex]);
                         warpedTensor[tid].m[2][0] = warpedTensor[tid].m[0][2];
-                        warpedTensor[tid].m[1][2] = static_cast<double>(warpedYZ[warpedIndex]);
+                        warpedTensor[tid].m[1][2] = static_cast<float>(warpedYZ[warpedIndex]);
                         warpedTensor[tid].m[2][1] = warpedTensor[tid].m[1][2];
-                        warpedTensor[tid].m[2][2] = static_cast<double>(warpedZZ[warpedIndex]);
-                        inputTensor[tid] = nifti_mat33_mul(warpedTensor[tid],inputTensor[tid]);
-                        testSum=static_cast<double>(warpedTensor[tid].m[0][0]+warpedTensor[tid].m[0][1]+
-                                warpedTensor[tid].m[0][2]+warpedTensor[tid].m[1][0]+warpedTensor[tid].m[1][1]+
-                                warpedTensor[tid].m[1][2]+warpedTensor[tid].m[2][0]+warpedTensor[tid].m[2][1]+
-                                warpedTensor[tid].m[2][2]);
+                        warpedTensor[tid].m[2][2] = static_cast<float>(warpedZZ[warpedIndex]);
+                        inputTensor[tid] = nifti_mat33_mul(warpedTensor[tid], inputTensor[tid]);
+                        testSum = static_cast<double>(warpedTensor[tid].m[0][0] + warpedTensor[tid].m[0][1] +
+                                                      warpedTensor[tid].m[0][2] + warpedTensor[tid].m[1][0] + warpedTensor[tid].m[1][1] +
+                                                      warpedTensor[tid].m[1][2] + warpedTensor[tid].m[2][0] + warpedTensor[tid].m[2][1] +
+                                                      warpedTensor[tid].m[2][2]);
                     }
 
-                    if(testSum==testSum)
-                    {
+                    if (testSum == testSum) {
                         // Calculate the polar decomposition of the local Jacobian matrix, which
                         // tells us how to rotate the local tensor information
                         RotMat[tid] = nifti_mat33_polar(jacMat[warpedIndex]);
                         // We need both the rotation matrix, and it's transpose
-                        for(col=0; col<3; col++)
-                            for(row=0; row<3; row++)
-                                RotMatT[tid].m[col][row] = static_cast<double>(RotMat[tid].m[row][col]);
+                        for (col = 0; col < 3; col++)
+                            for (row = 0; row < 3; row++)
+                                RotMatT[tid].m[col][row] = RotMat[tid].m[row][col];
                         // As the mat44 multiplication uses pointers, do the multiplications separately
                         inputTensor[tid] = nifti_mat33_mul(nifti_mat33_mul(RotMatT[tid], inputTensor[tid]), RotMat[tid]);
 
@@ -331,9 +301,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                         inputIntensityXY[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[0][1]);
                         inputIntensityXZ[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[0][2]);
                         inputIntensityYZ[warpedIndex] = static_cast<DataType>(inputTensor[tid].m[1][2]);
-                    }
-                    else
-                    {
+                    } else {
                         inputIntensityXX[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
                         inputIntensityYY[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
                         inputIntensityZZ[warpedIndex] = std::numeric_limits<DataType>::quiet_NaN();
@@ -353,99 +321,91 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
     }
 }
 /* *************************************************************** */
-template<class FloatingTYPE, class FieldTYPE>
-void ResampleImage3D(nifti_image *floatingImage,
-                     nifti_image *deformationField,
+template<class FloatingType, class FieldType>
+void ResampleImage3D(const nifti_image *floatingImage,
+                     const nifti_image *deformationField,
                      nifti_image *warpedImage,
-                     int *mask,
-                     FieldTYPE paddingValue,
-                     int kernel)
-{
+                     const int *mask,
+                     const FieldType& paddingValue,
+                     const int& kernel) {
 #ifdef _WIN32
     long  index;
-    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+    const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
     size_t  index;
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
-    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    FloatingType *warpedIntensityPtr = static_cast<FloatingType*>(warpedImage->data);
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
+    const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
 
-    int *maskPtr = &mask[0];
-
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
     // Define the kernel to use
     int kernel_size;
-    int kernel_offset=0;
-    void (*kernelCompFctPtr)(double,double *);
-    switch(kernel){
+    int kernel_offset = 0;
+    void (*kernelCompFctPtr)(double, double *);
+    switch (kernel) {
     case 0:
-        kernel_size=2;
-        kernelCompFctPtr=&interpNearestNeighKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpNearestNeighKernel;
+        kernel_offset = 0;
         break; // nearest-neighbour interpolation
     case 1:
-        kernel_size=2;
-        kernelCompFctPtr=&interpLinearKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpLinearKernel;
+        kernel_offset = 0;
         break; // linear interpolation
     case 4:
-        kernel_size=SINC_KERNEL_SIZE;
-        kernelCompFctPtr=&interpWindowedSincKernel;
-        kernel_offset=SINC_KERNEL_RADIUS;
+        kernel_size = SINC_KERNEL_SIZE;
+        kernelCompFctPtr = &interpWindowedSincKernel;
+        kernel_offset = SINC_KERNEL_RADIUS;
         break; // sinc interpolation
     default:
-        kernel_size=4;
-        kernelCompFctPtr=&interpCubicSplineKernel;
-        kernel_offset=1;
+        kernel_size = 4;
+        kernelCompFctPtr = &interpCubicSplineKernel;
+        kernel_offset = 1;
         break; // cubic spline interpolation
     }
 
     // Iteration over the different volume along the 4th axis
-    for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++)
-    {
+    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
 #ifndef NDEBUG
         char text[255];
-        sprintf(text, "3D resampling of volume number %zu",t);
+        sprintf(text, "3D resampling of volume number %zu", t);
         reg_print_msg_debug(text);
 #endif
 
-        FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber];
-        FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber];
+        FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
+        const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
 
         int a, b, c, Y, Z, previous[3];
-
-        FloatingTYPE *zPointer, *xyzPointer;
+        const FloatingType *zPointer, *xyzPointer;
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3];
         double xTempNewValue, yTempNewValue, intensity;
         float world[3], position[3];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \
+    private(intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \
     a, b, c, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue) \
     shared(floatingIntensity, warpedIntensity, warpedVoxelNumber, floatingVoxelNumber, \
-    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \
+    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \
     floatingIJKMatrix, floatingImage, paddingValue, kernel_size, kernel_offset, kernelCompFctPtr)
 #endif // _OPENMP
-        for(index=0; index<warpedVoxelNumber; index++)
-        {
-
-            intensity=paddingValue;
+        for (index = 0; index < warpedVoxelNumber; index++) {
+            intensity = paddingValue;
 
-            if((maskPtr[index])>-1)
-            {
-                world[0]=static_cast<float>(deformationFieldPtrX[index]);
-                world[1]=static_cast<float>(deformationFieldPtrY[index]);
-                world[2]=static_cast<float>(deformationFieldPtrZ[index]);
+            if (mask[index] > -1) {
+                world[0] = static_cast<float>(deformationFieldPtrX[index]);
+                world[1] = static_cast<float>(deformationFieldPtrY[index]);
+                world[2] = static_cast<float>(deformationFieldPtrZ[index]);
 
                 // real -> voxel; floating space
                 reg_mat44_mul(floatingIJKMatrix, world, position);
@@ -454,196 +414,178 @@ void ResampleImage3D(nifti_image *floatingImage,
                 previous[1] = static_cast<int>(reg_floor(position[1]));
                 previous[2] = static_cast<int>(reg_floor(position[2]));
 
-                relative[0]=static_cast<double>(position[0])-static_cast<double>(previous[0]);
-                relative[1]=static_cast<double>(position[1])-static_cast<double>(previous[1]);
-                relative[2]=static_cast<double>(position[2])-static_cast<double>(previous[2]);
+                relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
+                relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
+                relative[2] = static_cast<double>(position[2]) - static_cast<double>(previous[2]);
 
                 (*kernelCompFctPtr)(relative[0], xBasis);
                 (*kernelCompFctPtr)(relative[1], yBasis);
                 (*kernelCompFctPtr)(relative[2], zBasis);
-                previous[0]-=kernel_offset;
-                previous[1]-=kernel_offset;
-                previous[2]-=kernel_offset;
-
-                intensity=0;
-                if(-1<(previous[0]) && (previous[0]+kernel_size-1)<floatingImage->nx &&
-                   -1<(previous[1]) && (previous[1]+kernel_size-1)<floatingImage->ny &&
-                   -1<(previous[2]) && (previous[2]+kernel_size-1)<floatingImage->nz){
-                   for(c=0; c<kernel_size; c++)
-                   {
-                      Z= previous[2]+c;
-                      zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                      yTempNewValue=0;
-                      for(b=0; b<kernel_size; b++)
-                      {
-                         Y= previous[1]+b;
-                         xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                         xTempNewValue=0;
-                         for(a=0; a<kernel_size; a++)
-                         {
-                            xTempNewValue +=  static_cast<double>(*xyzPointer++) * xBasis[a];
-                         }
-                         yTempNewValue += xTempNewValue * yBasis[b];
-                      }
-                      intensity += yTempNewValue * zBasis[c];
-                   }
-                }
-                else{
-                   for(c=0; c<kernel_size; c++)
-                   {
-                      Z= previous[2]+c;
-                      zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                      yTempNewValue=0;
-                      for(b=0; b<kernel_size; b++)
-                      {
-                         Y= previous[1]+b;
-                         xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                         xTempNewValue=0;
-                         for(a=0; a<kernel_size; a++)
-                         {
-                            if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
-                               -1<Z && Z<floatingImage->nz &&
-                               -1<Y && Y<floatingImage->ny)
-                            {
-                               xTempNewValue +=  static_cast<double>(*xyzPointer) * xBasis[a];
+                previous[0] -= kernel_offset;
+                previous[1] -= kernel_offset;
+                previous[2] -= kernel_offset;
+
+                intensity = 0;
+                if (-1 < (previous[0]) && (previous[0] + kernel_size - 1) < floatingImage->nx &&
+                    -1 < (previous[1]) && (previous[1] + kernel_size - 1) < floatingImage->ny &&
+                    -1 < (previous[2]) && (previous[2] + kernel_size - 1) < floatingImage->nz) {
+                    for (c = 0; c < kernel_size; c++) {
+                        Z = previous[2] + c;
+                        zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                        yTempNewValue = 0;
+                        for (b = 0; b < kernel_size; b++) {
+                            Y = previous[1] + b;
+                            xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                            xTempNewValue = 0;
+                            for (a = 0; a < kernel_size; a++) {
+                                xTempNewValue += *xyzPointer++ * xBasis[a];
                             }
-                            else
-                            {
-                               // paddingValue
-                               xTempNewValue +=  static_cast<double>(paddingValue) * xBasis[a];
+                            yTempNewValue += xTempNewValue * yBasis[b];
+                        }
+                        intensity += yTempNewValue * zBasis[c];
+                    }
+                } else {
+                    for (c = 0; c < kernel_size; c++) {
+                        Z = previous[2] + c;
+                        zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                        yTempNewValue = 0;
+                        for (b = 0; b < kernel_size; b++) {
+                            Y = previous[1] + b;
+                            xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                            xTempNewValue = 0;
+                            for (a = 0; a < kernel_size; a++) {
+                                if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx &&
+                                    -1 < Z && Z < floatingImage->nz &&
+                                    -1 < Y && Y < floatingImage->ny) {
+                                    xTempNewValue += *xyzPointer * xBasis[a];
+                                } else {
+                                    // paddingValue
+                                    xTempNewValue += paddingValue * xBasis[a];
+                                }
+                                xyzPointer++;
                             }
-                            xyzPointer++;
-                         }
-                         yTempNewValue += xTempNewValue * yBasis[b];
-                      }
-                      intensity += yTempNewValue * zBasis[c];
-                   }
+                            yTempNewValue += xTempNewValue * yBasis[b];
+                        }
+                        intensity += yTempNewValue * zBasis[c];
+                    }
                 }
             }
 
-            switch(floatingImage->datatype)
-            {
+            switch (floatingImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index]=intensity;
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_UINT8:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             default:
-                if(intensity!=intensity)
-                    intensity=0;
-                warpedIntensity[index]=static_cast<FloatingTYPE>(reg_round(intensity));
+                if (intensity != intensity)
+                    intensity = 0;
+                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
                 break;
             }
         }
     }
 }
 /* *************************************************************** */
-template<class FloatingTYPE, class FieldTYPE>
-void ResampleImage2D(nifti_image *floatingImage,
-                     nifti_image *deformationField,
+template<class FloatingType, class FieldType>
+void ResampleImage2D(const nifti_image *floatingImage,
+                     const nifti_image *deformationField,
                      nifti_image *warpedImage,
-                     int *mask,
-                     FieldTYPE paddingValue,
-                     int kernel)
-{
+                     const int *mask,
+                     const FieldType& paddingValue,
+                     const int& kernel) {
 #ifdef _WIN32
     long  index;
-    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage, 2);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
+    const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2);
 #else
     size_t  index;
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage, 2);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 2);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
-
-    int *maskPtr = &mask[0];
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    FloatingType *warpedIntensityPtr = static_cast<FloatingType*>(warpedImage->data);
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
 
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
     int kernel_size;
-    int kernel_offset=0;
-    void (*kernelCompFctPtr)(double,double *);
-    switch(kernel){
+    int kernel_offset = 0;
+    void (*kernelCompFctPtr)(double, double *);
+    switch (kernel) {
     case 0:
-        kernel_size=2;
-        kernelCompFctPtr=&interpNearestNeighKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpNearestNeighKernel;
+        kernel_offset = 0;
         break; // nearest-neighbour interpolation
     case 1:
-        kernel_size=2;
-        kernelCompFctPtr=&interpLinearKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpLinearKernel;
+        kernel_offset = 0;
         break; // linear interpolation
     case 4:
-        kernel_size=SINC_KERNEL_SIZE;
-        kernelCompFctPtr=&interpWindowedSincKernel;
-        kernel_offset=SINC_KERNEL_RADIUS;
+        kernel_size = SINC_KERNEL_SIZE;
+        kernelCompFctPtr = &interpWindowedSincKernel;
+        kernel_offset = SINC_KERNEL_RADIUS;
         break; // sinc interpolation
     default:
-        kernel_size=4;
-        kernelCompFctPtr=&interpCubicSplineKernel;
-        kernel_offset=1;
+        kernel_size = 4;
+        kernelCompFctPtr = &interpCubicSplineKernel;
+        kernel_offset = 1;
         break; // cubic spline interpolation
     }
 
     // Iteration over the different volume along the 4th axis
-    for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++)
-    {
+    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
 #ifndef NDEBUG
         char text[255];
-        sprintf(text, "2D resampling of volume number %zu",t);
+        sprintf(text, "2D resampling of volume number %zu", t);
         reg_print_msg_debug(text);
 #endif
-        FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber];
-        FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber];
+        FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
+        const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
 
         int a, b, Y, previous[2];
-
-        FloatingTYPE *xyzPointer;
+        const FloatingType *xyzPointer;
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], relative[2];
         double xTempNewValue, intensity;
-        float world[3] = {0, 0, 0};
-        float position[3] = {0, 0, 0};
+        float world[3] = { 0, 0, 0 };
+        float position[3] = { 0, 0, 0 };
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, intensity, world, position, previous, xBasis, yBasis, relative, \
+    private(intensity, world, position, previous, xBasis, yBasis, relative, \
     a, b, Y, xyzPointer, xTempNewValue) \
     shared(floatingIntensity, warpedIntensity, warpedVoxelNumber, floatingVoxelNumber, \
-    deformationFieldPtrX, deformationFieldPtrY, maskPtr, \
+    deformationFieldPtrX, deformationFieldPtrY, mask, \
     floatingIJKMatrix, floatingImage, paddingValue, kernel_size, kernel_offset, kernelCompFctPtr)
 #endif // _OPENMP
-        for(index=0; index<warpedVoxelNumber; index++)
-        {
+        for (index = 0; index < warpedVoxelNumber; index++) {
+            intensity = paddingValue;
 
-            intensity=paddingValue;
-            if((maskPtr[index])>-1)
-            {
+            if (mask[index] > -1) {
                 world[0] = static_cast<float>(deformationFieldPtrX[index]);
                 world[1] = static_cast<float>(deformationFieldPtrY[index]);
                 world[2] = 0;
@@ -654,59 +596,53 @@ void ResampleImage2D(nifti_image *floatingImage,
                 previous[0] = static_cast<int>(reg_floor(position[0]));
                 previous[1] = static_cast<int>(reg_floor(position[1]));
 
-                relative[0] = static_cast<double>(position[0])-static_cast<double>(previous[0]);
-                relative[1] = static_cast<double>(position[1])-static_cast<double>(previous[1]);
+                relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
+                relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
 
                 (*kernelCompFctPtr)(relative[0], xBasis);
                 (*kernelCompFctPtr)(relative[1], yBasis);
-                previous[0]-=kernel_offset;
-                previous[1]-=kernel_offset;
-
-                intensity=0;
-                for(b=0; b<kernel_size; b++)
-                {
-                    Y= previous[1]+b;
-                    xyzPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]];
-                    xTempNewValue=0;
-                    for(a=0; a<kernel_size; a++)
-                    {
-                        if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
-                                -1<Y && Y<floatingImage->ny)
-                        {
-                            xTempNewValue +=  static_cast<double>(*xyzPointer) * xBasis[a];
-                        }
-                        else
-                        {
+                previous[0] -= kernel_offset;
+                previous[1] -= kernel_offset;
+
+                intensity = 0;
+                for (b = 0; b < kernel_size; b++) {
+                    Y = previous[1] + b;
+                    xyzPointer = &floatingIntensity[Y * floatingImage->nx + previous[0]];
+                    xTempNewValue = 0;
+                    for (a = 0; a < kernel_size; a++) {
+                        if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx &&
+                            -1 < Y && Y < floatingImage->ny) {
+                            xTempNewValue += *xyzPointer * xBasis[a];
+                        } else {
                             // paddingValue
-                            xTempNewValue +=  static_cast<double>(paddingValue) * xBasis[a];
+                            xTempNewValue += paddingValue * xBasis[a];
                         }
                         xyzPointer++;
                     }
                     intensity += xTempNewValue * yBasis[b];
                 }
 
-                switch(floatingImage->datatype)
-                {
+                switch (floatingImage->datatype) {
                 case NIFTI_TYPE_FLOAT32:
-                    warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity);
                     break;
                 case NIFTI_TYPE_FLOAT64:
-                    warpedIntensity[index]=intensity;
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity);
                     break;
                 case NIFTI_TYPE_UINT8:
-                    intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1
-                    warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                    intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                     break;
                 case NIFTI_TYPE_UINT16:
-                    intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1
-                    warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                    intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                     break;
                 case NIFTI_TYPE_UINT32:
-                    intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1
-                    warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                    intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                     break;
                 default:
-                    warpedIntensity[index]=static_cast<FloatingTYPE>(reg_round(intensity));
+                    warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
                     break;
                 }
             }
@@ -714,8 +650,6 @@ void ResampleImage2D(nifti_image *floatingImage,
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-
 /** This function resample a floating image into the referential
  * of a reference image by applying an affine transformation and
  * a deformation field. The affine transformation has to be in
@@ -728,99 +662,88 @@ void ResampleImage2D(nifti_image *floatingImage,
  * that provides the position of the DT components (if there are any)
  * these values are set to -1 if there are not
  */
-template <class FieldTYPE, class FloatingTYPE>
-void reg_resampleImage2(nifti_image *floatingImage,
-                        nifti_image *warpedImage,
-                        nifti_image *deformationFieldImage,
-                        int *mask,
-                        int interp,
-                        FieldTYPE paddingValue,
-                        int *dtIndicies,
-                        mat33 * jacMat)
-{
+template <class FieldType, class FloatingType>
+void reg_resampleImage(nifti_image *floatingImage,
+                       nifti_image *warpedImage,
+                       const nifti_image *deformationFieldImage,
+                       const int *mask,
+                       const int& interp,
+                       const FieldType& paddingValue,
+                       const int *dtIndicies,
+                       const mat33 *jacMat) {
     // The floating image data is copied in case one deal with DTI
-    void *originalFloatingData=nullptr;
+    void *originalFloatingData = nullptr;
     // The DTI are logged
-    reg_dti_resampling_preprocessing<FloatingTYPE>(floatingImage,
-                                                   &originalFloatingData,
-                                                   dtIndicies);
+    reg_dti_resampling_preprocessing<FloatingType>(floatingImage, &originalFloatingData, dtIndicies);
 
     // The deformation field contains the position in the real world
-    if(deformationFieldImage->nu>2)
-    {
-        ResampleImage3D<FloatingTYPE,FieldTYPE>(floatingImage,
-                                                deformationFieldImage,
-                                                warpedImage,
-                                                mask,
-                                                paddingValue,
-                                                interp);
-    }
-    else
-    {
-        ResampleImage2D<FloatingTYPE,FieldTYPE>(floatingImage,
-                                                deformationFieldImage,
-                                                warpedImage,
-                                                mask,
-                                                paddingValue,
-                                                interp);
+    if (deformationFieldImage->nu > 2) {
+        ResampleImage3D<FloatingType, FieldType>(floatingImage,
+                                                 deformationFieldImage,
+                                                 warpedImage,
+                                                 mask,
+                                                 paddingValue,
+                                                 interp);
+    } else {
+        ResampleImage2D<FloatingType, FieldType>(floatingImage,
+                                                 deformationFieldImage,
+                                                 warpedImage,
+                                                 mask,
+                                                 paddingValue,
+                                                 interp);
     }
     // The temporary logged floating array is deleted and the original restored
-    if(originalFloatingData!=nullptr)
-    {
+    if (originalFloatingData != nullptr) {
         free(floatingImage->data);
-        floatingImage->data=originalFloatingData;
-        originalFloatingData=nullptr;
+        floatingImage->data = originalFloatingData;
+        originalFloatingData = nullptr;
     }
 
     // The interpolated tensors are reoriented and exponentiated
-    reg_dti_resampling_postprocessing<FloatingTYPE>(warpedImage,
-                                                    mask,
-                                                    jacMat,
-                                                    dtIndicies);
+    reg_dti_resampling_postprocessing<FloatingType>(warpedImage, mask, jacMat, dtIndicies);
 }
 /* *************************************************************** */
 void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
-                       nifti_image *deformationField,
-                       int *mask,
-                       int interp,
-                       float paddingValue,
-                       bool *dti_timepoint,
-                       mat33 * jacMat)
-{
-    if(floatingImage->datatype != warpedImage->datatype)
-    {
+                       const nifti_image *deformationField,
+                       const int *mask,
+                       const int& interp,
+                       const float& paddingValue,
+                       const bool *dtiTimepoint,
+                       const mat33 *jacMat) {
+    if (floatingImage->datatype != warpedImage->datatype) {
         reg_print_fct_error("reg_resampleImage");
         reg_print_msg_error("The floating and warped image should have the same data type");
         reg_exit();
     }
 
-    if(floatingImage->nt != warpedImage->nt)
-    {
+    if (floatingImage->nt != warpedImage->nt) {
         reg_print_fct_error("reg_resampleImage");
         reg_print_msg_error("The floating and warped images have different dimension along the time axis");
         reg_exit();
     }
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
+        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_resampleImage");
+        reg_print_msg_error("The deformation field image is expected to be of type float or double");
+        reg_exit();
+    }
 
     // Define the DTI indices if required
     int dtIndicies[6];
-    for(int i=0; i<6; ++i) dtIndicies[i]=-1;
-    if(dti_timepoint!=nullptr)
-    {
-        if(jacMat==nullptr)
-        {
+    for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
+    if (dtiTimepoint != nullptr) {
+        if (jacMat == nullptr) {
             reg_print_fct_error("reg_resampleImage");
             reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
             reg_exit();
         }
-        int j=0;
-        for(int i=0; i<floatingImage->nt; ++i)
-        {
-            if(dti_timepoint[i])
-                dtIndicies[j++]=i;
+        int j = 0;
+        for (int i = 0; i < floatingImage->nt; ++i) {
+            if (dtiTimepoint[i])
+                dtIndicies[j++] = i;
         }
-        if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3))
-        {
+        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) {
             reg_print_fct_error("reg_resampleImage");
             reg_print_msg_error("DTI resampling: Unexpected number of DTI components");
             reg_exit();
@@ -829,275 +752,98 @@ void reg_resampleImage(nifti_image *floatingImage,
 
     // a mask array is created if no mask is specified
     bool MrPropreRules = false;
-    if(mask==nullptr)
-    {
+    if (mask == nullptr) {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
-        mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int));
+        mask = (int*)calloc(NiftiImage::calcVoxelNumber(warpedImage, 3), sizeof(int));
         MrPropreRules = true;
     }
 
-    switch ( deformationField->datatype )
-    {
-    case NIFTI_TYPE_FLOAT32:
-        switch ( floatingImage->datatype )
-        {
-        case NIFTI_TYPE_UINT8:
-            reg_resampleImage2<float,unsigned char>(floatingImage,
-                                                    warpedImage,
-                                                    deformationField,
-                                                    mask,
-                                                    interp,
-                                                    paddingValue,
-                                                    dtIndicies,
-                                                    jacMat);
-            break;
-        case NIFTI_TYPE_INT8:
-            reg_resampleImage2<float,char>(floatingImage,
-                                           warpedImage,
-                                           deformationField,
-                                           mask,
-                                           interp,
-                                           paddingValue,
-                                           dtIndicies,
-                                           jacMat);
-            break;
-        case NIFTI_TYPE_UINT16:
-            reg_resampleImage2<float,unsigned short>(floatingImage,
-                                                     warpedImage,
-                                                     deformationField,
-                                                     mask,
-                                                     interp,
-                                                     paddingValue,
-                                                     dtIndicies,
-                                                     jacMat);
-            break;
-        case NIFTI_TYPE_INT16:
-            reg_resampleImage2<float,short>(floatingImage,
-                                            warpedImage,
-                                            deformationField,
-                                            mask,
-                                            interp,
-                                            paddingValue,
-                                            dtIndicies,
-                                            jacMat);
-            break;
-        case NIFTI_TYPE_UINT32:
-            reg_resampleImage2<float,unsigned>(floatingImage,
-                                                   warpedImage,
-                                                   deformationField,
-                                                   mask,
-                                                   interp,
-                                                   paddingValue,
-                                                   dtIndicies,
-                                                   jacMat);
-            break;
-        case NIFTI_TYPE_INT32:
-            reg_resampleImage2<float,int>(floatingImage,
-                                          warpedImage,
-                                          deformationField,
-                                          mask,
-                                          interp,
-                                          paddingValue,
-                                          dtIndicies,
-                                          jacMat);
-            break;
-        case NIFTI_TYPE_FLOAT32:
-            reg_resampleImage2<float,float>(floatingImage,
-                                            warpedImage,
-                                            deformationField,
-                                            mask,
-                                            interp,
-                                            paddingValue,
-                                            dtIndicies,
-                                            jacMat);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_resampleImage2<float,double>(floatingImage,
-                                             warpedImage,
-                                             deformationField,
-                                             mask,
-                                             interp,
-                                             paddingValue,
-                                             dtIndicies,
-                                             jacMat);
-            break;
-        default:
-            printf("floating pixel type unsupported.");
-            break;
-        }
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        switch ( floatingImage->datatype )
-        {
-        case NIFTI_TYPE_UINT8:
-            reg_resampleImage2<double,unsigned char>(floatingImage,
-                                                     warpedImage,
-                                                     deformationField,
-                                                     mask,
-                                                     interp,
-                                                     paddingValue,
-                                                     dtIndicies,
-                                                     jacMat);
-            break;
-        case NIFTI_TYPE_INT8:
-            reg_resampleImage2<double,char>(floatingImage,
-                                            warpedImage,
-                                            deformationField,
-                                            mask,
-                                            interp,
-                                            paddingValue,
-                                            dtIndicies,
-                                            jacMat);
-            break;
-        case NIFTI_TYPE_UINT16:
-            reg_resampleImage2<double,unsigned short>(floatingImage,
-                                                      warpedImage,
-                                                      deformationField,
-                                                      mask,
-                                                      interp,
-                                                      paddingValue,
-                                                      dtIndicies,
-                                                      jacMat);
-            break;
-        case NIFTI_TYPE_INT16:
-            reg_resampleImage2<double,short>(floatingImage,
-                                             warpedImage,
-                                             deformationField,
-                                             mask,
-                                             interp,
-                                             paddingValue,
-                                             dtIndicies,
-                                             jacMat);
-            break;
-        case NIFTI_TYPE_UINT32:
-            reg_resampleImage2<double,unsigned>(floatingImage,
-                                                    warpedImage,
-                                                    deformationField,
-                                                    mask,
-                                                    interp,
-                                                    paddingValue,
-                                                    dtIndicies,
-                                                    jacMat );
-            break;
-        case NIFTI_TYPE_INT32:
-            reg_resampleImage2<double,int>(floatingImage,
-                                           warpedImage,
-                                           deformationField,
-                                           mask,
-                                           interp,
-                                           paddingValue,
-                                           dtIndicies,
-                                           jacMat);
-            break;
-        case NIFTI_TYPE_FLOAT32:
-            reg_resampleImage2<double,float>(floatingImage,
-                                             warpedImage,
-                                             deformationField,
-                                             mask,
-                                             interp,
-                                             paddingValue,
-                                             dtIndicies,
-                                             jacMat);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_resampleImage2<double,double>(floatingImage,
-                                              warpedImage,
-                                              deformationField,
-                                              mask,
-                                              interp,
-                                              paddingValue,
-                                              dtIndicies,
-                                              jacMat);
-            break;
-        default:
-            printf("floating pixel type unsupported.");
-            break;
-        }
-        break;
-    default:
-        printf("Deformation field pixel type unsupported.");
-        break;
-    }
-    if(MrPropreRules)
-    {
-        free(mask);
-        mask=nullptr;
-    }
+    std::visit([&](auto&& defFieldDataType, auto&& floImgDataType) {
+        using DefFieldDataType = std::decay_t<decltype(defFieldDataType)>;
+        using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+        reg_resampleImage<DefFieldDataType, FloImgDataType>(floatingImage,
+                                                            warpedImage,
+                                                            deformationField,
+                                                            mask,
+                                                            interp,
+                                                            paddingValue,
+                                                            dtIndicies,
+                                                            jacMat);
+    }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage));
+
+    if (MrPropreRules)
+        free(const_cast<int*>(mask));
 }
 /* *************************************************************** */
-
-template<class FloatingTYPE, class FieldTYPE>
-void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
-                              nifti_image *deformationField,
+template<class FloatingType, class FieldType>
+void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
+                              const nifti_image *deformationField,
                               nifti_image *warpedImage,
-                              int *mask,
-                              FieldTYPE paddingValue,
-                              int kernel)
-{
+                              const int *mask,
+                              const FieldType& paddingValue,
+                              const int& kernel) {
 #ifdef _WIN32
     long index;
-    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
-    const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2);
+    const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const long warpedPlaneNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2);
     const long warpedLineNumber = (long)warpedImage->nx;
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
     size_t index;
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const size_t warpedPlaneNumber = NiftiImage::calcVoxelNumber(warpedImage, 2);
     const size_t warpedLineNumber = (size_t)warpedImage->nx;
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
-    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
-    int *maskPtr = &mask[0];
-
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    FloatingType *warpedIntensityPtr = static_cast<FloatingType*>(warpedImage->data);
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
+    const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
+
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
     // Define the kernel to use
     int kernel_size;
-    int kernel_offset=0;
-    void (*kernelCompFctPtr)(double,double *);
-    switch(kernel){
+    int kernel_offset = 0;
+    void (*kernelCompFctPtr)(double, double *);
+    switch (kernel) {
     case 0:
         reg_print_fct_error("ResampleImage3D_PSF");
         reg_print_msg_error("Not implemented for NN interpolation yet");
         reg_exit();
-        kernel_size=2;
-        kernelCompFctPtr=&interpNearestNeighKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpNearestNeighKernel;
+        kernel_offset = 0;
         break; // nearest-neighbour interpolation
     case 1:
-        kernel_size=2;
-        kernelCompFctPtr=&interpLinearKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpLinearKernel;
+        kernel_offset = 0;
         break; // linear interpolation
     case 4:
-        kernel_size=SINC_KERNEL_SIZE;
-        kernelCompFctPtr=&interpWindowedSincKernel;
-        kernel_offset=SINC_KERNEL_RADIUS;
+        kernel_size = SINC_KERNEL_SIZE;
+        kernelCompFctPtr = &interpWindowedSincKernel;
+        kernel_offset = SINC_KERNEL_RADIUS;
         break; // sinc interpolation
     default:
-        kernel_size=4;
-        kernelCompFctPtr=&interpCubicSplineKernel;
-        kernel_offset=1;
+        kernel_size = 4;
+        kernelCompFctPtr = &interpCubicSplineKernel;
+        kernel_offset = 1;
         break; // cubic spline interpolation
     }
 
     // Iteration over the different volume along the 4th axis
-    for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++)
-    {
+    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
 #ifndef NDEBUG
-        printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n",t);
+        printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n", t);
 #endif
 
-        FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber];
-        FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber];
+        FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
+        const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
 
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3];
         double xBasisSamp[SINC_KERNEL_SIZE], yBasisSamp[SINC_KERNEL_SIZE], zBasisSamp[SINC_KERNEL_SIZE];
@@ -1108,14 +854,14 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
         interpWindowedSincKernel(0.00001, zBasisSamp);
 
         float psfWeightSum;
-        FloatingTYPE *zPointer, *xyzPointer;
+        const FloatingType *zPointer, *xyzPointer;
         double xTempNewValue, yTempNewValue, intensity, psfIntensity, psfWorld[3], position[3];
         float currentA, currentB, currentC, psfWeight;
         float shiftSamp[3];
         float currentAPre, currentARel, currentBPre, currentBRel, currentCPre, currentCRel, resamplingWeightSum, resamplingWeight;
         size_t currentIndex;
 
-        /*
+/*
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(intensity, psfWeightSum, psfWeight, \
@@ -1124,98 +870,91 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
     resamplingWeightSum, resamplingWeight, currentIndex, previous, relative,\
     xBasis, yBasis, zBasis, xBasisSamp, yBasisSamp, zBasisSamp, relativeSamp, Y, Z, psfIntensity, yTempNewValue, xTempNewValue,\
     xyzPointer, zPointer) \
-    shared(warpedVoxelNumber, maskPtr, paddingValue,\
+    shared(warpedVoxelNumber, mask, paddingValue,\
     a, b, c , warpedPlaneNumber, warpedLineNumber, floatingIntensity,\
     deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, floatingIJKMatrix,\
     floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity,stderr)
 #endif // _OPENMP
 */
-        for(index=0; index<warpedVoxelNumber; index++)
-        {
-            intensity=paddingValue;
+        for (index = 0; index < warpedVoxelNumber; index++) {
+            intensity = paddingValue;
 
-            if((maskPtr[index])>-1)
-            {
+            if (mask[index] > -1) {
                 //initialise weights
-                psfWeightSum=0.0f;
-                intensity=0.0f;
-                currentC=index/warpedPlaneNumber;
-                currentB=(index-currentC*warpedPlaneNumber)/warpedLineNumber;
-                currentA=(index-currentB*warpedLineNumber-currentC*warpedPlaneNumber);
+                psfWeightSum = 0.0f;
+                intensity = 0.0f;
+                currentC = static_cast<float>(index / warpedPlaneNumber);
+                currentB = (index - currentC * warpedPlaneNumber) / warpedLineNumber;
+                currentA = (index - currentB * warpedLineNumber - currentC * warpedPlaneNumber);
 
                 // coordinates in eigen space
-                float shiftall=SINC_KERNEL_RADIUS;
-                float spacing=1.0f;
-                spacing=0.3f;
-                for(shiftSamp[0]=-shiftall;shiftSamp[0]<=shiftall; shiftSamp[0]+=spacing)
-                {
-                    for(shiftSamp[1]=-shiftall;shiftSamp[1]<=shiftall; shiftSamp[1]+=spacing)
-                    {
-                        for(shiftSamp[2]=-shiftall;shiftSamp[2]<=shiftall; shiftSamp[2]+=spacing)
-                        {
+                float shiftall = SINC_KERNEL_RADIUS;
+                float spacing = 1.0f;
+                spacing = 0.3f;
+                for (shiftSamp[0] = -shiftall; shiftSamp[0] <= shiftall; shiftSamp[0] += spacing) {
+                    for (shiftSamp[1] = -shiftall; shiftSamp[1] <= shiftall; shiftSamp[1] += spacing) {
+                        for (shiftSamp[2] = -shiftall; shiftSamp[2] <= shiftall; shiftSamp[2] += spacing) {
                             // Distance threshold (only interpolate if distance is below 3 std)
 
                             // Use the Eigen coordinates and convert them to XYZ
                             // The new lambda per coordinate is eige_coordinate*sqrt(eigenVal)
                             // as the sqrt(eigenVal) is equivalent to the STD
 
-
-                            psfWeight=interpWindowedSincKernel_Samp(shiftSamp[0],shiftall)*
-                                    interpWindowedSincKernel_Samp(shiftSamp[1],shiftall)*
-                                    interpWindowedSincKernel_Samp(shiftSamp[2],shiftall);
+                            psfWeight = static_cast<float>(interpWindowedSincKernel_Samp(shiftSamp[0], shiftall) *
+                                                           interpWindowedSincKernel_Samp(shiftSamp[1], shiftall) *
+                                                           interpWindowedSincKernel_Samp(shiftSamp[2], shiftall));
                             //  std::cout<<shiftSamp[0]<<", "<<shiftSamp[1]<<", "<<shiftSamp[2]<<", "<<psfWeight<<std::endl;
 
                             // Interpolate (trilinearly) the deformation field for non-integer positions
-                            float scalling=1.0f;
-                            currentAPre=(float)(reg_floor(currentA+(shiftSamp[0]/warpedImage->pixdim[1])*scalling));
-                            currentARel=currentA+(shiftSamp[0]/warpedImage->pixdim[1]*scalling)-(float)(currentAPre);
+                            float scalling = 1.0f;
+                            currentAPre = (float)(reg_floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling));
+                            currentARel = currentA + (shiftSamp[0] / warpedImage->pixdim[1] * scalling) - (float)(currentAPre);
 
-                            currentBPre=(float)(reg_floor(currentB+(shiftSamp[1]/warpedImage->pixdim[2])));
-                            currentBRel=currentB+(shiftSamp[1]/warpedImage->pixdim[2]*scalling)-(float)(currentBPre);
-
-                            currentCPre=(float)(reg_floor(currentC+(shiftSamp[2]/warpedImage->pixdim[3]*scalling)));
-                            currentCRel=currentC+(shiftSamp[2]/warpedImage->pixdim[3]*scalling)-(float)(currentCPre);
+                            currentBPre = (float)(reg_floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2])));
+                            currentBRel = currentB + (shiftSamp[1] / warpedImage->pixdim[2] * scalling) - (float)(currentBPre);
 
+                            currentCPre = (float)(reg_floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling)));
+                            currentCRel = currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling) - (float)(currentCPre);
 
                             // Interpolate the PSF world coordinates
-                            psfWorld[0]=0.0f;
-                            psfWorld[1]=0.0f;
-                            psfWorld[2]=0.0f;
-                            if(psfWeight>0){
-                                resamplingWeightSum=0.0f;
-                                for (a=0;a<=1;a++){
-                                    for (b=0;b<=1;b++){
-                                        for (c=0;c<=1;c++){
-
-                                            if((currentAPre+a)>=0
-                                                    && (currentBPre+b)>=0
-                                                    && (currentCPre+c)>=0
-                                                    && (currentAPre+a)<warpedImage->nx
-                                                    && (currentBPre+b)<warpedImage->ny
-                                                    && (currentCPre+c)<warpedImage->nz){
-
-                                                currentIndex=(currentAPre+a)+
-                                                        (currentBPre+b)*warpedLineNumber+
-                                                        (currentCPre+c)*warpedPlaneNumber;
-
-                                                resamplingWeight=fabs((float)(1-a)-currentARel)*
-                                                        fabs((float)(1-b)-currentBRel)*
-                                                        fabs((float)(1-c)-currentCRel);
-
-                                                resamplingWeightSum+=resamplingWeight;
-
-                                                psfWorld[0]+=static_cast<double>(resamplingWeight*deformationFieldPtrX[currentIndex]);
-                                                psfWorld[1]+=static_cast<double>(resamplingWeight*deformationFieldPtrY[currentIndex]);
-                                                psfWorld[2]+=static_cast<double>(resamplingWeight*deformationFieldPtrZ[currentIndex]);
+                            psfWorld[0] = 0.0f;
+                            psfWorld[1] = 0.0f;
+                            psfWorld[2] = 0.0f;
+                            if (psfWeight > 0) {
+                                resamplingWeightSum = 0.0f;
+                                for (a = 0; a <= 1; a++) {
+                                    for (b = 0; b <= 1; b++) {
+                                        for (c = 0; c <= 1; c++) {
+
+                                            if ((currentAPre + a) >= 0
+                                                && (currentBPre + b) >= 0
+                                                && (currentCPre + c) >= 0
+                                                && (currentAPre + a) < warpedImage->nx
+                                                && (currentBPre + b) < warpedImage->ny
+                                                && (currentCPre + c) < warpedImage->nz) {
+
+                                                currentIndex = static_cast<size_t>((currentAPre + a) +
+                                                                                   (currentBPre + b) * warpedLineNumber +
+                                                                                   (currentCPre + c) * warpedPlaneNumber);
+
+                                                resamplingWeight = fabs((float)(1 - a) - currentARel) *
+                                                    fabs((float)(1 - b) - currentBRel) *
+                                                    fabs((float)(1 - c) - currentCRel);
+
+                                                resamplingWeightSum += resamplingWeight;
+
+                                                psfWorld[0] += static_cast<double>(resamplingWeight * deformationFieldPtrX[currentIndex]);
+                                                psfWorld[1] += static_cast<double>(resamplingWeight * deformationFieldPtrY[currentIndex]);
+                                                psfWorld[2] += static_cast<double>(resamplingWeight * deformationFieldPtrZ[currentIndex]);
                                             }
                                         }
                                     }
                                 }
 
-                                if(resamplingWeightSum>0){
-                                    psfWorld[0]/=resamplingWeightSum;
-                                    psfWorld[1]/=resamplingWeightSum;
-                                    psfWorld[2]/=resamplingWeightSum;
+                                if (resamplingWeightSum > 0) {
+                                    psfWorld[0] /= resamplingWeightSum;
+                                    psfWorld[1] /= resamplingWeightSum;
+                                    psfWorld[2] /= resamplingWeightSum;
 
                                     // real -> voxel; floating space
                                     reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
@@ -1224,40 +963,34 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
                                     previous[1] = static_cast<int>(reg_floor(position[1]));
                                     previous[2] = static_cast<int>(reg_floor(position[2]));
 
-                                    relative[0]=position[0]-static_cast<double>(previous[0]);
-                                    relative[1]=position[1]-static_cast<double>(previous[1]);
-                                    relative[2]=position[2]-static_cast<double>(previous[2]);
+                                    relative[0] = position[0] - static_cast<double>(previous[0]);
+                                    relative[1] = position[1] - static_cast<double>(previous[1]);
+                                    relative[2] = position[2] - static_cast<double>(previous[2]);
 
                                     (*kernelCompFctPtr)(relative[0], xBasis);
                                     (*kernelCompFctPtr)(relative[1], yBasis);
                                     (*kernelCompFctPtr)(relative[2], zBasis);
-                                    previous[0]-=kernel_offset;
-                                    previous[1]-=kernel_offset;
-                                    previous[2]-=kernel_offset;
-
-                                    psfIntensity=0;
-                                    for(c=0; c<kernel_size; c++)
-                                    {
-                                        Z= previous[2]+c;
-                                        zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                                        yTempNewValue=0;
-                                        for(b=0; b<kernel_size; b++)
-                                        {
-                                            Y= previous[1]+b;
-                                            xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                            xTempNewValue=0;
-                                            for(a=0; a<kernel_size; a++)
-                                            {
-                                                if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
-                                                        -1<Z && Z<floatingImage->nz &&
-                                                        -1<Y && Y<floatingImage->ny)
-                                                {
-                                                    xTempNewValue +=  static_cast<double>(*xyzPointer) * xBasis[a];
-                                                }
-                                                else
-                                                {
-                                                    if(!(paddingValue!=paddingValue))// paddingValue
-                                                        xTempNewValue +=  paddingValue * xBasis[a];
+                                    previous[0] -= kernel_offset;
+                                    previous[1] -= kernel_offset;
+                                    previous[2] -= kernel_offset;
+
+                                    psfIntensity = 0;
+                                    for (c = 0; c < kernel_size; c++) {
+                                        Z = previous[2] + c;
+                                        zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                                        yTempNewValue = 0;
+                                        for (b = 0; b < kernel_size; b++) {
+                                            Y = previous[1] + b;
+                                            xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                                            xTempNewValue = 0;
+                                            for (a = 0; a < kernel_size; a++) {
+                                                if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx &&
+                                                    -1 < Z && Z < floatingImage->nz &&
+                                                    -1 < Y && Y < floatingImage->ny) {
+                                                    xTempNewValue += *xyzPointer * xBasis[a];
+                                                } else {
+                                                    if (!(paddingValue != paddingValue))// paddingValue
+                                                        xTempNewValue += paddingValue * xBasis[a];
                                                 }
                                                 xyzPointer++;
                                             }
@@ -1265,188 +998,177 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage,
                                         }
                                         psfIntensity += yTempNewValue * zBasis[c];
                                     }
-                                    if(!(psfIntensity!=psfIntensity)){
-                                        intensity+=psfWeight*psfIntensity;
-                                        psfWeightSum+=psfWeight;
+                                    if (!(psfIntensity != psfIntensity)) {
+                                        intensity += psfWeight * psfIntensity;
+                                        psfWeightSum += psfWeight;
                                     }
                                 }
                             }
                         }
                     }
                 }
-                if(psfWeightSum>0){
-                    intensity/=psfWeightSum;
-                }
-                else{
-                    intensity=paddingValue;
+                if (psfWeightSum > 0) {
+                    intensity /= psfWeightSum;
+                } else {
+                    intensity = paddingValue;
                 }
             } // if in mask
-            switch(floatingImage->datatype)
-            {
+            switch (floatingImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index]=intensity;
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_UINT8:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             default:
-                if(intensity!=intensity)
-                    intensity=0;
-                warpedIntensity[index]=static_cast<FloatingTYPE>(reg_round(intensity));
+                if (intensity != intensity)
+                    intensity = 0;
+                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
                 break;
             }
         }
     }
 }
-
-/* *************************************************************** */
 /* *************************************************************** */
-template<class FloatingTYPE, class FieldTYPE>
-void ResampleImage3D_PSF(nifti_image *floatingImage,
-                         nifti_image *deformationField,
+template<class FloatingType, class FieldType>
+void ResampleImage3D_PSF(const nifti_image *floatingImage,
+                         const nifti_image *deformationField,
                          nifti_image *warpedImage,
-                         int *mask,
-                         FieldTYPE paddingValue,
-                         int kernel,
-                         mat33 * jacMat,
-                         char algorithm)
-{
+                         const int *mask,
+                         const FieldType& paddingValue,
+                         const int& kernel,
+                         const mat33 *jacMat,
+                         const char& algorithm) {
 #ifdef _WIN32
     long index;
-    const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage);
-    const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2);
+    const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const long warpedPlaneNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2);
     const long warpedLineNumber = (long)warpedImage->nx;
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
     size_t index;
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const size_t warpedPlaneNumber = NiftiImage::calcVoxelNumber(warpedImage, 2);
     const size_t warpedLineNumber = (size_t)warpedImage->nx;
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *warpedIntensityPtr = static_cast<FloatingTYPE *>(warpedImage->data);
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
-    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
-
-    int *maskPtr = &mask[0];
-
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
-    mat44 *warpedMatrix = &(warpedImage->qto_xyz);
-    if(warpedImage->sform_code>0)
-        warpedMatrix = &(warpedImage->sto_xyz);
-    mat44 *floatingMatrix = &(floatingImage->qto_xyz);
-    if(floatingImage->sform_code>0)
-        floatingMatrix = &(floatingImage->sto_xyz);
-
-    float fwhmToStd=2.355f;
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    FloatingType *warpedIntensityPtr = static_cast<FloatingType*>(warpedImage->data);
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber];
+    const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber];
+
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
+    mat44 *warpedMatrix = &warpedImage->qto_xyz;
+    if (warpedImage->sform_code > 0)
+        warpedMatrix = &warpedImage->sto_xyz;
+    const mat44 *floatingMatrix = &floatingImage->qto_xyz;
+    if (floatingImage->sform_code > 0)
+        floatingMatrix = &floatingImage->sto_xyz;
+
+    float fwhmToStd = 2.355f;
     // T is the reference PSF and S is the floating PSF
     mat33 T, S;
-    for(int j=0; j<3; j++){
-        for(int i=0; i<3; i++){
-            T.m[i][j]=0;
-            S.m[i][j]=0;
+    for (int j = 0; j < 3; j++) {
+        for (int i = 0; i < 3; i++) {
+            T.m[i][j] = 0;
+            S.m[i][j] = 0;
         }
     }
-    for(int j=0; j<3; j++){
-        for(int i=0; i<3; i++){
+    for (int j = 0; j < 3; j++) {
+        for (int i = 0; i < 3; i++) {
             T.m[j][j] += reg_pow2(warpedMatrix->m[i][j]);
             S.m[j][j] += reg_pow2(floatingMatrix->m[i][j]);
         }
-        T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd)/2.0f;
-        S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd)/2.0f;
+        T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f;
+        S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f;
     }
 
     // Define the kernel to use
     int kernel_size;
-    int kernel_offset=0;
-    void (*kernelCompFctPtr)(double,double *);
-    switch(kernel){
+    int kernel_offset = 0;
+    void (*kernelCompFctPtr)(double, double *);
+    switch (kernel) {
     case 0:
         reg_print_fct_error("ResampleImage3D_PSF");
         reg_print_msg_error("Not implemented for NN interpolation yet");
         reg_exit();
-        kernel_size=2;
-        kernelCompFctPtr=&interpNearestNeighKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpNearestNeighKernel;
+        kernel_offset = 0;
         break; // nearest-neighbour interpolation
     case 1:
-        kernel_size=2;
-        kernelCompFctPtr=&interpLinearKernel;
-        kernel_offset=0;
+        kernel_size = 2;
+        kernelCompFctPtr = &interpLinearKernel;
+        kernel_offset = 0;
         break; // linear interpolation
     case 4:
-        kernel_size=SINC_KERNEL_SIZE;
-        kernelCompFctPtr=&interpWindowedSincKernel;
-        kernel_offset=SINC_KERNEL_RADIUS;
+        kernel_size = SINC_KERNEL_SIZE;
+        kernelCompFctPtr = &interpWindowedSincKernel;
+        kernel_offset = SINC_KERNEL_RADIUS;
         break; // sinc interpolation
     default:
-        kernel_size=4;
-        kernelCompFctPtr=&interpCubicSplineKernel;
-        kernel_offset=1;
+        kernel_size = 4;
+        kernelCompFctPtr = &interpCubicSplineKernel;
+        kernel_offset = 1;
         break; // cubic spline interpolation
     }
 
     // Iteration over the different volume along the 4th axis
-    for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++)
-    {
+    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
 #ifndef NDEBUG
         char text[255];
-        sprintf(text,"PSF 3D resampling of volume number %zu\n",t);
+        sprintf(text, "PSF 3D resampling of volume number %zu\n", t);
         reg_print_msg_debug(text);
 #endif
 
-        FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber];
-        FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber];
+        FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
+        const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
 
         double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3];
         int Y, Z, previous[3];
 
         float psf_xyz[3];
 
-        mat33 P, invP, ASAt, A,TmS,TmS_EigVec,TmS_EigVec_trans,TmS_EigVal,TmS_EigVal_inv;
-        float currentDeterminant, psfKernelShift[3], psfSampleSpacing, psfWeightSum,curLambda;
+        mat33 P, invP, ASAt, A, TmS, TmS_EigVec, TmS_EigVec_trans, TmS_EigVal, TmS_EigVal_inv;
+        float currentDeterminant, psfKernelShift[3], psfSampleSpacing, psfWeightSum, curLambda;
         float psfNumbSamples;
 
-        FloatingTYPE *zPointer, *xyzPointer;
+        const FloatingType *zPointer, *xyzPointer;
         double xTempNewValue, yTempNewValue, intensity, psfIntensity, psfWorld[3], position[3];
         size_t currentA, currentB, currentC, currentAPre, currentBPre, currentCPre;
-        float  psf_eig[3],  mahal, psfWeight;
+        float  psf_eig[3], mahal, psfWeight;
         float currentARel, currentBRel, currentCRel, resamplingWeightSum, resamplingWeight;
         size_t currentIndex;
 
-        for(index=0; index<warpedVoxelNumber; index++)
-        {
-            intensity=paddingValue;
-
-            if((maskPtr[index])>-1)
-            {
-                if(algorithm==0){
+        for (index = 0; index < warpedVoxelNumber; index++) {
+            intensity = paddingValue;
 
+            if (mask[index] > -1) {
+                if (algorithm == 0) {
                     // T=P+A*S*At
-                    A=nifti_mat33_inverse(jacMat[index]);
+                    A = nifti_mat33_inverse(jacMat[index]);
 
                     ASAt = A * S * reg_mat33_trans(A);
 
@@ -1457,29 +1179,28 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
 
                     // If eigen values are less than 0, set them to 0.
                     // Also, invert the eigenvalues to estimate the inverse.
-                    for(int m=0;m<3;m++){
-                        for(int n=0;n<3;n++){
-                            if(m==n){ // Set diagonals to max(val,0)
-                                TmS_EigVal.m[m][n]=TmS_EigVal.m[m][n]>0.000001f?TmS_EigVal.m[m][n]:0.000001f;
-                                TmS_EigVal_inv.m[m][n]=1.0f/TmS_EigVal.m[m][n];
-                            }else{ // Set off-diagonal residuals to 0
-                                TmS_EigVal.m[m][n]=0;
-                                TmS_EigVal_inv.m[m][n]=0;
+                    for (int m = 0; m < 3; m++) {
+                        for (int n = 0; n < 3; n++) {
+                            if (m == n) { // Set diagonals to max(val,0)
+                                TmS_EigVal.m[m][n] = TmS_EigVal.m[m][n] > 0.000001f ? TmS_EigVal.m[m][n] : 0.000001f;
+                                TmS_EigVal_inv.m[m][n] = 1.0f / TmS_EigVal.m[m][n];
+                            } else { // Set off-diagonal residuals to 0
+                                TmS_EigVal.m[m][n] = 0;
+                                TmS_EigVal_inv.m[m][n] = 0;
                             }
                         }
                     }
 
-                    TmS_EigVec_trans=reg_mat33_trans(TmS_EigVec);
-                    P= TmS_EigVec * TmS_EigVal * TmS_EigVec_trans;
-                    invP= TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans;
-                    currentDeterminant = TmS_EigVal.m[0][0]*TmS_EigVal.m[1][1]*TmS_EigVal.m[2][2];
-                    currentDeterminant=currentDeterminant<0.000001f?0.000001f:currentDeterminant;
-                }
-                else{
+                    TmS_EigVec_trans = reg_mat33_trans(TmS_EigVec);
+                    P = TmS_EigVec * TmS_EigVal * TmS_EigVec_trans;
+                    invP = TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans;
+                    currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2];
+                    currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant;
+                } else {
 
-                    A=nifti_mat33_inverse(jacMat[index]);
+                    A = nifti_mat33_inverse(jacMat[index]);
 
-                    ASAt =  A * S * reg_mat33_trans(A);
+                    ASAt = A * S * reg_mat33_trans(A);
 
                     mat33 S_EigVec, S_EigVal;
 
@@ -1492,22 +1213,22 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
 
                     //                % Volume-preserving scale of S to make it isotropic
                     //                detS = prod(diag(DS));
-                    float detASAt = S_EigVal.m[0][0]*S_EigVal.m[1][1]*S_EigVal.m[2][2];
+                    float detASAt = S_EigVal.m[0][0] * S_EigVal.m[1][1] * S_EigVal.m[2][2];
 
                     //                factDetS = detS^(1/4);
-                    float factDetS=powf(detASAt,0.25);
+                    float factDetS = powf(detASAt, 0.25);
 
                     //                LambdaN = factDetS*diag(diag(DS).^(-1/2));
                     //                invLambdaN = diag(1./diag(LambdaN))
-                    mat33 LambdaN,invLambdaN;
-                    for(int m=0;m<3;m++){
-                        for(int n=0;n<3;n++){
-                            if(m==n){
-                                LambdaN.m[m][n]=factDetS*powf(S_EigVal.m[m][n],-0.5);
-                                invLambdaN.m[m][n]=1.0f/LambdaN.m[m][n];
-                            }else{ // Set off-diagonal to 0
-                                LambdaN.m[m][n]=0;
-                                invLambdaN.m[m][n]=0;
+                    mat33 LambdaN, invLambdaN;
+                    for (int m = 0; m < 3; m++) {
+                        for (int n = 0; n < 3; n++) {
+                            if (m == n) {
+                                LambdaN.m[m][n] = factDetS * powf(S_EigVal.m[m][n], -0.5);
+                                invLambdaN.m[m][n] = 1.0f / LambdaN.m[m][n];
+                            } else { // Set off-diagonal to 0
+                                LambdaN.m[m][n] = 0;
+                                invLambdaN.m[m][n] = 0;
                             }
                         }
                     }
@@ -1523,12 +1244,12 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                     //                % Optimal solution in the transformed axis-aligned space
                     //                DP2 = diag(max(sqrt(detS),diag(DT2)));
                     mat33 DP2;
-                    for(int m=0;m<3;m++){
-                        for(int n=0;n<3;n++){
-                            if(m==n){
-                                DP2.m[m][n]= powf(factDetS,0.5)>(T2_EigVal.m[m][n])?powf(factDetS,0.5):(T2_EigVal.m[m][n]);
-                            }else{ // Set off-diagonal to 0
-                                DP2.m[m][n]=0;
+                    for (int m = 0; m < 3; m++) {
+                        for (int n = 0; n < 3; n++) {
+                            if (m == n) {
+                                DP2.m[m][n] = powf(factDetS, 0.5) > (T2_EigVal.m[m][n]) ? powf(factDetS, 0.5) : (T2_EigVal.m[m][n]);
+                            } else { // Set off-diagonal to 0
+                                DP2.m[m][n] = 0;
                             }
                         }
                     }
@@ -1538,115 +1259,110 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                     mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * reg_mat33_trans(T2_EigVec) * invLambdaN * reg_mat33_trans(S_EigVec);
                     //                P=Q-S
                     TmS = Q - S;
-                    invP=nifti_mat33_inverse(TmS);
+                    invP = nifti_mat33_inverse(TmS);
                     reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
 
-                    currentDeterminant = TmS_EigVal.m[0][0]*TmS_EigVal.m[1][1]*TmS_EigVal.m[2][2];
-                    currentDeterminant=currentDeterminant<0.000001f?0.000001f:currentDeterminant;
+                    currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2];
+                    currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant;
                 }
 
                 // set sampling rate
-                psfNumbSamples=3; // in standard deviations mm
-                psfSampleSpacing=0.75; // in standard deviations mm
-                psfKernelShift[0]=TmS_EigVal.m[0][0]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing;
-                psfKernelShift[1]=TmS_EigVal.m[1][1]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing;
-                psfKernelShift[2]=TmS_EigVal.m[2][2]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing;
+                psfNumbSamples = 3; // in standard deviations mm
+                psfSampleSpacing = 0.75; // in standard deviations mm
+                psfKernelShift[0] = TmS_EigVal.m[0][0] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing;
+                psfKernelShift[1] = TmS_EigVal.m[1][1] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing;
+                psfKernelShift[2] = TmS_EigVal.m[2][2] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing;
 
                 // Get image coordinates of the centre
-                currentC=index/warpedPlaneNumber;
-                currentB=(index-currentC*warpedPlaneNumber)/warpedLineNumber;
-                currentA=(index-currentB*warpedLineNumber-currentC*warpedPlaneNumber);
+                currentC = index / warpedPlaneNumber;
+                currentB = (index - currentC * warpedPlaneNumber) / warpedLineNumber;
+                currentA = (index - currentB * warpedLineNumber - currentC * warpedPlaneNumber);
 
                 //initialise weights
-                psfWeightSum=0.0f;
-                intensity=0.0f;
+                psfWeightSum = 0.0f;
+                intensity = 0.0f;
 
                 // coordinates in eigen space
-                for(psf_eig[0]=-psfKernelShift[0];psf_eig[0]<=(psfKernelShift[0]); psf_eig[0]+=psfSampleSpacing)
-                {
-                    for(psf_eig[1]=-psfKernelShift[1];psf_eig[1]<=(psfKernelShift[1]); psf_eig[1]+=psfSampleSpacing)
-                    {
-                        for(psf_eig[2]=-psfKernelShift[2];psf_eig[2]<=(psfKernelShift[2]); psf_eig[2]+=psfSampleSpacing)
-                        {
+                for (psf_eig[0] = -psfKernelShift[0]; psf_eig[0] <= (psfKernelShift[0]); psf_eig[0] += psfSampleSpacing) {
+                    for (psf_eig[1] = -psfKernelShift[1]; psf_eig[1] <= (psfKernelShift[1]); psf_eig[1] += psfSampleSpacing) {
+                        for (psf_eig[2] = -psfKernelShift[2]; psf_eig[2] <= (psfKernelShift[2]); psf_eig[2] += psfSampleSpacing) {
                             // Distance threshold (only interpolate if distance is below 3 std)
-                            if(sqrtf(psf_eig[0]*psf_eig[0]+psf_eig[1]*psf_eig[1]+psf_eig[2]*psf_eig[2])<=3){
+                            if (sqrtf(psf_eig[0] * psf_eig[0] + psf_eig[1] * psf_eig[1] + psf_eig[2] * psf_eig[2]) <= 3) {
                                 // Use the Eigen coordinates and convert them to XYZ
                                 // The new lambda per coordinate is eige_coordinate*sqrt(eigenVal)
                                 // as the sqrt(eigenVal) is equivalent to the STD
-                                psf_xyz[0]=0;
-                                psf_xyz[1]=0;
-                                psf_xyz[2]=0;
-                                for(int m=0;m<3;m++){
-                                    curLambda=(float)(psf_eig[m])*sqrt(TmS_EigVal.m[m][m]);
-                                    psf_xyz[0]+=curLambda*TmS_EigVec.m[0][m];
-                                    psf_xyz[1]+=curLambda*TmS_EigVec.m[1][m];
-                                    psf_xyz[2]+=curLambda*TmS_EigVec.m[2][m];
+                                psf_xyz[0] = 0;
+                                psf_xyz[1] = 0;
+                                psf_xyz[2] = 0;
+                                for (int m = 0; m < 3; m++) {
+                                    curLambda = (float)(psf_eig[m]) * sqrt(TmS_EigVal.m[m][m]);
+                                    psf_xyz[0] += curLambda * TmS_EigVec.m[0][m];
+                                    psf_xyz[1] += curLambda * TmS_EigVec.m[1][m];
+                                    psf_xyz[2] += curLambda * TmS_EigVec.m[2][m];
                                 }
 
                                 //mahal=0;
-                                mahal=psf_xyz[0]*invP.m[0][0]*psf_xyz[0]+
-                                        psf_xyz[0]*invP.m[1][0]*psf_xyz[1]+
-                                        psf_xyz[0]*invP.m[2][0]*psf_xyz[2]+
-                                        psf_xyz[1]*invP.m[0][1]*psf_xyz[0]+
-                                        psf_xyz[1]*invP.m[1][1]*psf_xyz[1]+
-                                        psf_xyz[1]*invP.m[2][1]*psf_xyz[2]+
-                                        psf_xyz[2]*invP.m[0][2]*psf_xyz[0]+
-                                        psf_xyz[2]*invP.m[1][2]*psf_xyz[1]+
-                                        psf_xyz[2]*invP.m[2][2]*psf_xyz[2];
-
-                                psfWeight=powf(2.f*M_PI,-3.f/2.f)*
-                                        pow(currentDeterminant,-0.5f)*
-                                        expf(-0.5f*mahal);
-
-                                if(psfWeight!=0.f){ // If the relative weight is above 0
+                                mahal = psf_xyz[0] * invP.m[0][0] * psf_xyz[0] +
+                                    psf_xyz[0] * invP.m[1][0] * psf_xyz[1] +
+                                    psf_xyz[0] * invP.m[2][0] * psf_xyz[2] +
+                                    psf_xyz[1] * invP.m[0][1] * psf_xyz[0] +
+                                    psf_xyz[1] * invP.m[1][1] * psf_xyz[1] +
+                                    psf_xyz[1] * invP.m[2][1] * psf_xyz[2] +
+                                    psf_xyz[2] * invP.m[0][2] * psf_xyz[0] +
+                                    psf_xyz[2] * invP.m[1][2] * psf_xyz[1] +
+                                    psf_xyz[2] * invP.m[2][2] * psf_xyz[2];
+
+                                psfWeight = powf(2.f * (float)M_PI, -3.f / 2.f) * powf(currentDeterminant, -0.5f) * expf(-0.5f * mahal);
+
+                                if (psfWeight != 0.f) { // If the relative weight is above 0
                                     // Interpolate (trilinearly) the deformation field for non-integer positions
-                                    currentAPre=(size_t)(currentA+(size_t)reg_floor(psf_xyz[0]/(float)warpedImage->pixdim[1]));
-                                    currentARel=(float)currentA+(float)(psf_xyz[0]/(float)warpedImage->pixdim[1])-(float)(currentAPre);
+                                    currentAPre = (size_t)(currentA + (size_t)reg_floor(psf_xyz[0] / (float)warpedImage->pixdim[1]));
+                                    currentARel = (float)currentA + (float)(psf_xyz[0] / (float)warpedImage->pixdim[1]) - (float)(currentAPre);
 
-                                    currentBPre=(size_t)(currentB+(size_t)reg_floor(psf_xyz[1]/(float)warpedImage->pixdim[2]));
-                                    currentBRel=(float)currentB+(float)(psf_xyz[1]/(float)warpedImage->pixdim[2])-(float)(currentBPre);
+                                    currentBPre = (size_t)(currentB + (size_t)reg_floor(psf_xyz[1] / (float)warpedImage->pixdim[2]));
+                                    currentBRel = (float)currentB + (float)(psf_xyz[1] / (float)warpedImage->pixdim[2]) - (float)(currentBPre);
 
-                                    currentCPre=(size_t)(currentC+(size_t)reg_floor(psf_xyz[2]/(float)warpedImage->pixdim[3]));
-                                    currentCRel=(float)currentC+(float)(psf_xyz[2]/(float)warpedImage->pixdim[3])-(float)(currentCPre);
+                                    currentCPre = (size_t)(currentC + (size_t)reg_floor(psf_xyz[2] / (float)warpedImage->pixdim[3]));
+                                    currentCRel = (float)currentC + (float)(psf_xyz[2] / (float)warpedImage->pixdim[3]) - (float)(currentCPre);
 
                                     // Interpolate the PSF world coordinates
-                                    psfWorld[0]=0.0f;
-                                    psfWorld[1]=0.0f;
-                                    psfWorld[2]=0.0f;
-                                    resamplingWeightSum=0.0f;
-                                    for (int a=0;a<=1;a++){
-                                        for (int b=0;b<=1;b++){
-                                            for (int c=0;c<=1;c++){
-
-                                                if(((int)currentAPre+a)>=0
-                                                        && ((int)currentBPre+b)>=0
-                                                        && ((int)currentCPre+c)>=0
-                                                        && ((int)currentAPre+a)<warpedImage->nx
-                                                        && ((int)currentBPre+b)<warpedImage->ny
-                                                        && ((int)currentCPre+c)<warpedImage->nz){
-
-                                                    currentIndex=((size_t)currentAPre+(size_t)a)+
-                                                            ((size_t)currentBPre+(size_t)b)*warpedLineNumber+
-                                                            ((size_t)currentCPre+(size_t)c)*warpedPlaneNumber;
-
-                                                    resamplingWeight=fabs((float)(1-a)-currentARel)*
-                                                            fabs((float)(1-b)-currentBRel)*
-                                                            fabs((float)(1-c)-currentCRel);
-
-                                                    resamplingWeightSum+=resamplingWeight;
-
-                                                    psfWorld[0]+=static_cast<double>(resamplingWeight*deformationFieldPtrX[currentIndex]);
-                                                    psfWorld[1]+=static_cast<double>(resamplingWeight*deformationFieldPtrY[currentIndex]);
-                                                    psfWorld[2]+=static_cast<double>(resamplingWeight*deformationFieldPtrZ[currentIndex]);
+                                    psfWorld[0] = 0.0f;
+                                    psfWorld[1] = 0.0f;
+                                    psfWorld[2] = 0.0f;
+                                    resamplingWeightSum = 0.0f;
+                                    for (int a = 0; a <= 1; a++) {
+                                        for (int b = 0; b <= 1; b++) {
+                                            for (int c = 0; c <= 1; c++) {
+
+                                                if (((int)currentAPre + a) >= 0
+                                                    && ((int)currentBPre + b) >= 0
+                                                    && ((int)currentCPre + c) >= 0
+                                                    && ((int)currentAPre + a) < warpedImage->nx
+                                                    && ((int)currentBPre + b) < warpedImage->ny
+                                                    && ((int)currentCPre + c) < warpedImage->nz) {
+
+                                                    currentIndex = ((size_t)currentAPre + (size_t)a) +
+                                                        ((size_t)currentBPre + (size_t)b) * warpedLineNumber +
+                                                        ((size_t)currentCPre + (size_t)c) * warpedPlaneNumber;
+
+                                                    resamplingWeight = fabs((float)(1 - a) - currentARel) *
+                                                        fabs((float)(1 - b) - currentBRel) *
+                                                        fabs((float)(1 - c) - currentCRel);
+
+                                                    resamplingWeightSum += resamplingWeight;
+
+                                                    psfWorld[0] += static_cast<double>(resamplingWeight * deformationFieldPtrX[currentIndex]);
+                                                    psfWorld[1] += static_cast<double>(resamplingWeight * deformationFieldPtrY[currentIndex]);
+                                                    psfWorld[2] += static_cast<double>(resamplingWeight * deformationFieldPtrZ[currentIndex]);
                                                 }
                                             }
                                         }
                                     }
 
-                                    if(resamplingWeightSum>0.0f){
-                                        psfWorld[0]/=resamplingWeightSum;
-                                        psfWorld[1]/=resamplingWeightSum;
-                                        psfWorld[2]/=resamplingWeightSum;
+                                    if (resamplingWeightSum > 0.0f) {
+                                        psfWorld[0] /= resamplingWeightSum;
+                                        psfWorld[1] /= resamplingWeightSum;
+                                        psfWorld[2] /= resamplingWeightSum;
 
                                         // real -> voxel; floating space
                                         reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
@@ -1655,41 +1371,35 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                                         previous[1] = static_cast<int>(reg_floor(position[1]));
                                         previous[2] = static_cast<int>(reg_floor(position[2]));
 
-                                        relative[0]=position[0]-static_cast<double>(previous[0]);
-                                        relative[1]=position[1]-static_cast<double>(previous[1]);
-                                        relative[2]=position[2]-static_cast<double>(previous[2]);
+                                        relative[0] = position[0] - static_cast<double>(previous[0]);
+                                        relative[1] = position[1] - static_cast<double>(previous[1]);
+                                        relative[2] = position[2] - static_cast<double>(previous[2]);
 
                                         (*kernelCompFctPtr)(relative[0], xBasis);
                                         (*kernelCompFctPtr)(relative[1], yBasis);
                                         (*kernelCompFctPtr)(relative[2], zBasis);
-                                        previous[0]-=kernel_offset;
-                                        previous[1]-=kernel_offset;
-                                        previous[2]-=kernel_offset;
-
-                                        psfIntensity=0;
-                                        for(int c=0; c<kernel_size; c++)
-                                        {
-                                            Z= previous[2]+c;
-                                            zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                                            yTempNewValue=0;
-                                            for(int b=0; b<kernel_size; b++)
-                                            {
-                                                Y= previous[1]+b;
-                                                xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                                xTempNewValue=0;
-                                                for(int a=0; a<kernel_size; a++)
-                                                {
-                                                    if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx &&
-                                                            -1<Z && Z<floatingImage->nz &&
-                                                            -1<Y && Y<floatingImage->ny)
-                                                    {
-                                                        xTempNewValue +=  static_cast<double>(*xyzPointer) * xBasis[a];
-                                                    }
-                                                    else
-                                                    {
+                                        previous[0] -= kernel_offset;
+                                        previous[1] -= kernel_offset;
+                                        previous[2] -= kernel_offset;
+
+                                        psfIntensity = 0;
+                                        for (int c = 0; c < kernel_size; c++) {
+                                            Z = previous[2] + c;
+                                            zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                                            yTempNewValue = 0;
+                                            for (int b = 0; b < kernel_size; b++) {
+                                                Y = previous[1] + b;
+                                                xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                                                xTempNewValue = 0;
+                                                for (int a = 0; a < kernel_size; a++) {
+                                                    if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx &&
+                                                        -1 < Z && Z < floatingImage->nz &&
+                                                        -1 < Y && Y < floatingImage->ny) {
+                                                        xTempNewValue += *xyzPointer * xBasis[a];
+                                                    } else {
                                                         // paddingValue
-                                                        if(!(paddingValue!=paddingValue))// paddingValue
-                                                            xTempNewValue +=  paddingValue * xBasis[a];
+                                                        if (!(paddingValue != paddingValue))// paddingValue
+                                                            xTempNewValue += paddingValue * xBasis[a];
                                                     }
                                                     xyzPointer++;
                                                 }
@@ -1697,9 +1407,9 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                                             }
                                             psfIntensity += yTempNewValue * zBasis[c];
                                         }
-                                        if(!(psfIntensity!=psfIntensity)){
-                                            intensity+=psfWeight*psfIntensity;
-                                            psfWeightSum+=psfWeight;
+                                        if (!(psfIntensity != psfIntensity)) {
+                                            intensity += psfWeight * psfIntensity;
+                                            psfWeightSum += psfWeight;
                                         }
                                     }
                                 }
@@ -1707,379 +1417,194 @@ void ResampleImage3D_PSF(nifti_image *floatingImage,
                         }
                     }
                 }
-                if(psfWeightSum>0){
-                    intensity/=psfWeightSum;
-                }
-                else{
-                    intensity=paddingValue;
+                if (psfWeightSum > 0) {
+                    intensity /= psfWeightSum;
+                } else {
+                    intensity = paddingValue;
                 }
             } // if in mask
-            switch(floatingImage->datatype)
-            {
+            switch (floatingImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index]=intensity;
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_UINT8:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity>0?reg_round(intensity):0);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
                 break;
             case NIFTI_TYPE_INT16:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=32767?reg_round(intensity):32767); // 32767=2^15-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 32767 ? reg_round(intensity) : 32767); // 32767=2^15-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_INT32:
-                if(intensity!=intensity)
-                    intensity=0;
-                intensity=(intensity<=2147483647?reg_round(intensity):2147483647); // 2147483647=2^31-1
-                warpedIntensity[index]=static_cast<FloatingTYPE>(intensity);
+                if (intensity != intensity)
+                    intensity = 0;
+                intensity = (intensity <= 2147483647 ? reg_round(intensity) : 2147483647); // 2147483647=2^31-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             default:
-                if(intensity!=intensity)
-                    intensity=0;
-                warpedIntensity[index]=static_cast<FloatingTYPE>(reg_round(intensity));
+                if (intensity != intensity)
+                    intensity = 0;
+                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
                 break;
             }
         }
     }
 }
-
 /* *************************************************************** */
-template <class FieldTYPE, class FloatingTYPE>
-void reg_resampleImage2_PSF(nifti_image *floatingImage,
+template <class FieldType, class FloatingType>
+void reg_resampleImage_PSF(const nifti_image *floatingImage,
                             nifti_image *warpedImage,
-                            nifti_image *deformationFieldImage,
-                            int *mask,
-                            int interp,
-                            FieldTYPE paddingValue,
-                            mat33 * jacMat,
-                            char algorithm)
-{
+                            const nifti_image *deformationFieldImage,
+                            const int *mask,
+                            const int& interp,
+                            const FieldType& paddingValue,
+                            const mat33 *jacMat,
+                            const char& algorithm) {
     // The deformation field contains the position in the real world
-    if(deformationFieldImage->nu>2)
-    {
-        if(algorithm==2){
+    if (deformationFieldImage->nu > 2) {
+        if (algorithm == 2) {
 #ifndef NDEBUG
-            std::cout<<"Running ResampleImage3D_PSF_Sinc 1"<<std::endl;
+            std::cout << "Running ResampleImage3D_PSF_Sinc 1" << std::endl;
 #endif
-            ResampleImage3D_PSF_Sinc<FloatingTYPE,FieldTYPE>(floatingImage,
-                                                             deformationFieldImage,
-                                                             warpedImage,
-                                                             mask,
-                                                             paddingValue,
-                                                             interp);
-        }
-        else{
+            ResampleImage3D_PSF_Sinc<FloatingType, FieldType>(floatingImage,
+                                                              deformationFieldImage,
+                                                              warpedImage,
+                                                              mask,
+                                                              paddingValue,
+                                                              interp);
+        } else {
 #ifndef NDEBUG
-            std::cout<<"Running ResampleImage3D_PSF"<<std::endl;
+            std::cout << "Running ResampleImage3D_PSF" << std::endl;
 #endif
-            ResampleImage3D_PSF<FloatingTYPE,FieldTYPE>(floatingImage,
-                                                        deformationFieldImage,
-                                                        warpedImage,
-                                                        mask,
-                                                        paddingValue,
-                                                        interp,
-                                                        jacMat,
-                                                        algorithm);
+            ResampleImage3D_PSF<FloatingType, FieldType>(floatingImage,
+                                                         deformationFieldImage,
+                                                         warpedImage,
+                                                         mask,
+                                                         paddingValue,
+                                                         interp,
+                                                         jacMat,
+                                                         algorithm);
         }
-    }
-    else
-    {
-        reg_print_fct_error("reg_resampleImage2_PSF");
+    } else {
+        reg_print_fct_error("reg_resampleImage_PSF");
         reg_print_msg_error("Not implemented for 2D images yet");
         reg_exit();
     }
-
 }
 /* *************************************************************** */
-void reg_resampleImage_PSF(nifti_image *floatingImage,
+void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            nifti_image *warpedImage,
-                           nifti_image *deformationField,
-                           int *mask,
-                           int interp,
-                           float paddingValue,
-                           mat33 * jacMat,
-                           char algorithm)
-{
-    if(floatingImage->datatype != warpedImage->datatype)
-    {
+                           const nifti_image *deformationField,
+                           const int *mask,
+                           const int& interp,
+                           const float& paddingValue,
+                           const mat33 *jacMat,
+                           const char& algorithm) {
+    if (floatingImage->datatype != warpedImage->datatype) {
         reg_print_fct_error("reg_resampleImage");
         reg_print_msg_error("The floating and warped image should have the same data type");
         reg_exit();
     }
-
-    if(floatingImage->nt != warpedImage->nt)
-    {
+    if (floatingImage->nt != warpedImage->nt) {
         reg_print_fct_error("reg_resampleImage");
         reg_print_msg_error("The floating and warped images have different dimension along the time axis");
         reg_exit();
     }
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
+        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_resampleImage");
+        reg_print_msg_error("The deformation field image is expected to be of type float or double");
+        reg_exit();
+    }
 
     // a mask array is created if no mask is specified
     bool MrPropreRules = false;
-    if(mask==nullptr)
-    {
+    if (mask == nullptr) {
         // voxels in the background are set to negative value so 0 corresponds to active voxel
-        mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int));
+        mask = (int*)calloc(NiftiImage::calcVoxelNumber(warpedImage, 3), sizeof(int));
         MrPropreRules = true;
     }
 
-    switch ( deformationField->datatype )
-    {
-    case NIFTI_TYPE_FLOAT32:
-        switch ( floatingImage->datatype )
-        {
-        case NIFTI_TYPE_UINT8:
-            reg_resampleImage2_PSF<float,unsigned char>(floatingImage,
-                                                        warpedImage,
-                                                        deformationField,
-                                                        mask,
-                                                        interp,
-                                                        paddingValue,
-                                                        jacMat,
-                                                        algorithm);
-            break;
-        case NIFTI_TYPE_INT8:
-            reg_resampleImage2_PSF<float,char>(floatingImage,
-                                               warpedImage,
-                                               deformationField,
-                                               mask,
-                                               interp,
-                                               paddingValue,
-                                               jacMat,
-                                               algorithm);
-            break;
-        case NIFTI_TYPE_UINT16:
-            reg_resampleImage2_PSF<float,unsigned short>(floatingImage,
-                                                         warpedImage,
-                                                         deformationField,
-                                                         mask,
-                                                         interp,
-                                                         paddingValue,
-                                                         jacMat,
-                                                         algorithm);
-            break;
-        case NIFTI_TYPE_INT16:
-            reg_resampleImage2_PSF<float,short>(floatingImage,
-                                                warpedImage,
-                                                deformationField,
-                                                mask,
-                                                interp,
-                                                paddingValue,
-                                                jacMat,
-                                                algorithm);
-            break;
-        case NIFTI_TYPE_UINT32:
-            reg_resampleImage2_PSF<float,unsigned>(floatingImage,
-                                                       warpedImage,
-                                                       deformationField,
-                                                       mask,
-                                                       interp,
-                                                       paddingValue,
-                                                       jacMat,
-                                                       algorithm);
-            break;
-        case NIFTI_TYPE_INT32:
-            reg_resampleImage2_PSF<float,int>(floatingImage,
-                                              warpedImage,
-                                              deformationField,
-                                              mask,
-                                              interp,
-                                              paddingValue,
-                                              jacMat,
-                                              algorithm);
-            break;
-        case NIFTI_TYPE_FLOAT32:
-            reg_resampleImage2_PSF<float,float>(floatingImage,
-                                                warpedImage,
-                                                deformationField,
-                                                mask,
-                                                interp,
-                                                paddingValue,
-                                                jacMat,
-                                                algorithm);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_resampleImage2_PSF<float,double>(floatingImage,
-                                                 warpedImage,
-                                                 deformationField,
-                                                 mask,
-                                                 interp,
-                                                 paddingValue,
-                                                 jacMat,
-                                                 algorithm);
-            break;
-        default:
-            printf("floating pixel type unsupported.");
-            break;
-        }
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        switch ( floatingImage->datatype )
-        {
-        case NIFTI_TYPE_UINT8:
-            reg_resampleImage2_PSF<double,unsigned char>(floatingImage,
-                                                         warpedImage,
-                                                         deformationField,
-                                                         mask,
-                                                         interp,
-                                                         paddingValue,
-                                                         jacMat,
-                                                         algorithm);
-            break;
-        case NIFTI_TYPE_INT8:
-            reg_resampleImage2_PSF<double,char>(floatingImage,
-                                                warpedImage,
-                                                deformationField,
-                                                mask,
-                                                interp,
-                                                paddingValue,
-                                                jacMat,
-                                                algorithm);
-            break;
-        case NIFTI_TYPE_UINT16:
-            reg_resampleImage2_PSF<double,unsigned short>(floatingImage,
-                                                          warpedImage,
-                                                          deformationField,
-                                                          mask,
-                                                          interp,
-                                                          paddingValue,
-                                                          jacMat,
-                                                          algorithm);
-            break;
-        case NIFTI_TYPE_INT16:
-            reg_resampleImage2_PSF<double,short>(floatingImage,
-                                                 warpedImage,
-                                                 deformationField,
-                                                 mask,
-                                                 interp,
-                                                 paddingValue,
-                                                 jacMat,
-                                                 algorithm);
-            break;
-        case NIFTI_TYPE_UINT32:
-            reg_resampleImage2_PSF<double,unsigned>(floatingImage,
-                                                        warpedImage,
-                                                        deformationField,
-                                                        mask,
-                                                        interp,
-                                                        paddingValue,
-                                                        jacMat,
-                                                        algorithm);
-            break;
-        case NIFTI_TYPE_INT32:
-            reg_resampleImage2_PSF<double,int>(floatingImage,
-                                               warpedImage,
-                                               deformationField,
-                                               mask,
-                                               interp,
-                                               paddingValue,
-                                               jacMat,
-                                               algorithm);
-            break;
-        case NIFTI_TYPE_FLOAT32:
-            reg_resampleImage2_PSF<double,float>(floatingImage,
-                                                 warpedImage,
-                                                 deformationField,
-                                                 mask,
-                                                 interp,
-                                                 paddingValue,
-                                                 jacMat,
-                                                 algorithm);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_resampleImage2_PSF<double,double>(floatingImage,
-                                                  warpedImage,
-                                                  deformationField,
-                                                  mask,
-                                                  interp,
-                                                  paddingValue,
-                                                  jacMat,
-                                                  algorithm);
-            break;
-        default:
-            printf("floating pixel type unsupported.");
-            break;
-        }
-        break;
-    default:
-        printf("Deformation field pixel type unsupported.");
-        break;
-    }
-    if(MrPropreRules)
-    {
-        free(mask);
-        mask=nullptr;
-    }
+    std::visit([&](auto&& defFieldDataType, auto&& floImgDataType) {
+        using DefFieldDataType = std::decay_t<decltype(defFieldDataType)>;
+        using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+        reg_resampleImage_PSF<DefFieldDataType, FloImgDataType>(floatingImage,
+                                                                warpedImage,
+                                                                deformationField,
+                                                                mask,
+                                                                interp,
+                                                                paddingValue,
+                                                                jacMat,
+                                                                algorithm);
+    }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage));
+
+    if (MrPropreRules)
+        free(const_cast<int*>(mask));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
-void reg_bilinearResampleGradient(nifti_image *floatingImage,
+void reg_bilinearResampleGradient(const nifti_image *floatingImage,
                                   nifti_image *warpedImage,
-                                  nifti_image *deformationField,
-                                  float paddingValue)
-{
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    DataType *floatingIntensityX = static_cast<DataType *>(floatingImage->data);
-    DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
-    DataType *warpedIntensityX = static_cast<DataType *>(warpedImage->data);
+                                  const nifti_image *deformationField,
+                                  const float& paddingValue) {
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const DataType *floatingIntensityX = static_cast<DataType*>(floatingImage->data);
+    const DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
+    DataType *warpedIntensityX = static_cast<DataType*>(warpedImage->data);
     DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
-    DataType *deformationFieldPtrX = static_cast<DataType *>(deformationField->data);
-    DataType *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)];
+    const DataType *deformationFieldPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *deformationFieldPtrY = &deformationFieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)];
 
     // Extract the relevant affine matrix
-    mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
-    if(floatingImage->sform_code!=0)
+    const mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
+    if (floatingImage->sform_code != 0)
         floating_mm_to_voxel = &floatingImage->sto_ijk;
 
     // The spacing is computed in case the sform if defined
     float realSpacing[2];
-    if(warpedImage->sform_code>0)
-    {
-        reg_getRealImageSpacing(warpedImage,realSpacing);
-    }
-    else
-    {
-        realSpacing[0]=warpedImage->dx;
-        realSpacing[1]=warpedImage->dy;
+    if (warpedImage->sform_code > 0) {
+        reg_getRealImageSpacing(warpedImage, realSpacing);
+    } else {
+        realSpacing[0] = warpedImage->dx;
+        realSpacing[1] = warpedImage->dy;
     }
 
     // Reorientation matrix is assessed in order to remove the rigid component
-    mat33 reorient=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
 
     // Some useful variables
     mat33 jacMat;
-    DataType defX,defY;
+    DataType defX, defY;
     DataType basisX[2], basisY[2], deriv[2], basis[2];
-    DataType xFloCoord,yFloCoord;
-    int anteIntX[2],anteIntY[2];
-    int x,y,a,b,defIndex,floIndex,warpedIndex;
-    DataType val_x,val_y,weight[2];
+    DataType xFloCoord, yFloCoord;
+    int anteIntX[2], anteIntY[2];
+    int x, y, a, b, defIndex, floIndex, warpedIndex;
+    DataType val_x, val_y, weight[2];
 
     // Loop over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(x,y,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \
+    private(x,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \
     anteIntX,anteIntY,xFloCoord,yFloCoord, \
     basisX,basisY,deriv,basis,defX,defY,jacMat,weight) \
     shared(warpedImage,warpedIntensityX,warpedIntensityY, \
@@ -2087,126 +1612,114 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
     floatingImage,floatingIntensityX,floatingIntensityY,floating_mm_to_voxel, \
     paddingValue, reorient,realSpacing)
 #endif // _OPENMP
-    for(y=0; y<warpedImage->ny; ++y)
-    {
-        warpedIndex=y*warpedImage->nx;
-        deriv[0]=-1;
-        deriv[1]=1;
-        basis[0]=1;
-        basis[1]=0;
-        for(x=0; x<warpedImage->nx; ++x)
-        {
-            warpedIntensityX[warpedIndex]=paddingValue;
-            warpedIntensityY[warpedIndex]=paddingValue;
+    for (y = 0; y < warpedImage->ny; ++y) {
+        warpedIndex = y * warpedImage->nx;
+        deriv[0] = -1;
+        deriv[1] = 1;
+        basis[0] = 1;
+        basis[1] = 0;
+        for (x = 0; x < warpedImage->nx; ++x) {
+            warpedIntensityX[warpedIndex] = paddingValue;
+            warpedIntensityY[warpedIndex] = paddingValue;
 
             // Compute the index in the floating image
-            defX=deformationFieldPtrX[warpedIndex];
-            defY=deformationFieldPtrY[warpedIndex];
+            defX = deformationFieldPtrX[warpedIndex];
+            defY = deformationFieldPtrY[warpedIndex];
             xFloCoord =
-                    floating_mm_to_voxel->m[0][0] * defX +
-                    floating_mm_to_voxel->m[0][1] * defY +
-                    floating_mm_to_voxel->m[0][3];
+                floating_mm_to_voxel->m[0][0] * defX +
+                floating_mm_to_voxel->m[0][1] * defY +
+                floating_mm_to_voxel->m[0][3];
             yFloCoord =
-                    floating_mm_to_voxel->m[1][0] * defX +
-                    floating_mm_to_voxel->m[1][1] * defY +
-                    floating_mm_to_voxel->m[1][3];
+                floating_mm_to_voxel->m[1][0] * defX +
+                floating_mm_to_voxel->m[1][1] * defY +
+                floating_mm_to_voxel->m[1][3];
 
             // Extract the floating value using bilinear interpolation
-            anteIntX[0]=static_cast<int>(reg_floor(xFloCoord));
-            anteIntX[1]=static_cast<int>(reg_ceil(xFloCoord));
-            anteIntY[0]=static_cast<int>(reg_floor(yFloCoord));
-            anteIntY[1]=static_cast<int>(reg_ceil(yFloCoord));
-            val_x=0;
-            val_y=0;
-            basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]);
-            basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]);
-            basisX[0]=1.0-basisX[1];
-            basisY[0]=1.0-basisY[1];
-            for(b=0; b<2; ++b)
-            {
-                if(anteIntY[b]>-1 && anteIntY[b]<floatingImage->ny)
-                {
-                    for(a=0; a<2; ++a)
-                    {
-                        weight[0]=basisX[a] * basisY[b];
-                        if(anteIntX[a]>-1 && anteIntX[a]<floatingImage->nx)
-                        {
-                            floIndex = anteIntY[b]*floatingImage->nx+anteIntX[a];
+            anteIntX[0] = static_cast<int>(reg_floor(xFloCoord));
+            anteIntX[1] = static_cast<int>(reg_ceil(xFloCoord));
+            anteIntY[0] = static_cast<int>(reg_floor(yFloCoord));
+            anteIntY[1] = static_cast<int>(reg_ceil(yFloCoord));
+            val_x = 0;
+            val_y = 0;
+            basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]);
+            basisY[1] = fabs(yFloCoord - (DataType)anteIntY[0]);
+            basisX[0] = 1 - basisX[1];
+            basisY[0] = 1 - basisY[1];
+            for (b = 0; b < 2; ++b) {
+                if (anteIntY[b] > -1 && anteIntY[b] < floatingImage->ny) {
+                    for (a = 0; a < 2; ++a) {
+                        weight[0] = basisX[a] * basisY[b];
+                        if (anteIntX[a] > -1 && anteIntX[a] < floatingImage->nx) {
+                            floIndex = anteIntY[b] * floatingImage->nx + anteIntX[a];
                             val_x += floatingIntensityX[floIndex] * weight[0];
                             val_y += floatingIntensityY[floIndex] * weight[0];
                         } // anteIntX not in the floating image space
-                        else
-                        {
+                        else {
                             val_x += paddingValue * weight[0];
                             val_y += paddingValue * weight[0];
                         }
                     } // a
                 } // anteIntY not in the floating image space
-                else
-                {
+                else {
                     val_x += paddingValue * basisY[b];
                     val_y += paddingValue * basisY[b];
                 }
             } // b
 
             // Compute the Jacobian matrix
-            memset(&jacMat,0,sizeof(mat33));
-            jacMat.m[2][2]=1.;
-            for(b=0; b<2; ++b)
-            {
-                anteIntY[0]=y+b;
-                basisY[0]=basis[b];
-                basisY[1]=deriv[b];
+            memset(&jacMat, 0, sizeof(mat33));
+            jacMat.m[2][2] = 1.;
+            for (b = 0; b < 2; ++b) {
+                anteIntY[0] = y + b;
+                basisY[0] = basis[b];
+                basisY[1] = deriv[b];
                 // Boundary conditions along y - slidding
-                if(y==deformationField->ny-1)
-                {
-                    if(b==1)
-                        anteIntY[0]-=2;
-                    basisY[0]=fabs(basisY[0]-1.);
-                    basisY[1]*=-1.;
+                if (y == deformationField->ny - 1) {
+                    if (b == 1)
+                        anteIntY[0] -= 2;
+                    basisY[0] = fabs(basisY[0] - 1);
+                    basisY[1] *= -1;
                 }
-                for(a=0; a<2; ++a)
-                {
-                    anteIntX[0]=x+a;
-                    basisX[0]=basis[a];
-                    basisX[1]=deriv[a];
+                for (a = 0; a < 2; ++a) {
+                    anteIntX[0] = x + a;
+                    basisX[0] = basis[a];
+                    basisX[1] = deriv[a];
                     // Boundary conditions along x - slidding
-                    if(x==deformationField->nx-1)
-                    {
-                        if(a==1)
-                            anteIntX[0]-=2;
-                        basisX[0]=fabs(basisX[0]-1.);
-                        basisX[1]*=-1.;
+                    if (x == deformationField->nx - 1) {
+                        if (a == 1)
+                            anteIntX[0] -= 2;
+                        basisX[0] = fabs(basisX[0] - 1);
+                        basisX[1] *= -1;
                     }
 
                     // Compute the basis function values
-                    weight[0] = basisX[1]*basisY[0];
-                    weight[1] = basisX[0]*basisY[1];
+                    weight[0] = basisX[1] * basisY[0];
+                    weight[1] = basisX[0] * basisY[1];
 
                     // Get the deformation field index
-                    defIndex=anteIntY[0]*deformationField->nx+anteIntX[0];
+                    defIndex = anteIntY[0] * deformationField->nx + anteIntX[0];
 
                     // Get the deformation field values
-                    defX=deformationFieldPtrX[defIndex];
-                    defY=deformationFieldPtrY[defIndex];
+                    defX = deformationFieldPtrX[defIndex];
+                    defY = deformationFieldPtrY[defIndex];
 
                     // Symmetric difference to compute the derivatives
-                    jacMat.m[0][0] += weight[0]*defX;
-                    jacMat.m[0][1] += weight[1]*defX;
-                    jacMat.m[1][0] += weight[0]*defY;
-                    jacMat.m[1][1] += weight[1]*defY;
+                    jacMat.m[0][0] += static_cast<float>(weight[0] * defX);
+                    jacMat.m[0][1] += static_cast<float>(weight[1] * defX);
+                    jacMat.m[1][0] += static_cast<float>(weight[0] * defY);
+                    jacMat.m[1][1] += static_cast<float>(weight[1] * defY);
                 }
             }
             // reorient and scale the Jacobian matrix
-            jacMat=nifti_mat33_mul(reorient,jacMat);
+            jacMat = nifti_mat33_mul(reorient, jacMat);
             jacMat.m[0][0] /= realSpacing[0];
             jacMat.m[0][1] /= realSpacing[1];
             jacMat.m[1][0] /= realSpacing[0];
             jacMat.m[1][1] /= realSpacing[1];
 
             // Modulate the gradient scalar values
-            warpedIntensityX[warpedIndex]=jacMat.m[0][0]*val_x + jacMat.m[0][1]*val_y;
-            warpedIntensityY[warpedIndex]=jacMat.m[1][0]*val_x + jacMat.m[1][1]*val_y;
+            warpedIntensityX[warpedIndex] = jacMat.m[0][0] * val_x + jacMat.m[0][1] * val_y;
+            warpedIntensityY[warpedIndex] = jacMat.m[1][0] * val_x + jacMat.m[1][1] * val_y;
 
             ++warpedIndex;
         } // x
@@ -2214,58 +1727,54 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage,
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_trilinearResampleGradient(nifti_image *floatingImage,
+void reg_trilinearResampleGradient(const nifti_image *floatingImage,
                                    nifti_image *warpedImage,
-                                   nifti_image *deformationField,
-                                   float paddingValue)
-{
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
-    const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage);
-    const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-    DataType *floatingIntensityX = static_cast<DataType *>(floatingImage->data);
-    DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
-    DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber];
-    DataType *warpedIntensityX = static_cast<DataType *>(warpedImage->data);
+                                   const nifti_image *deformationField,
+                                   const float& paddingValue) {
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
+    const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    const DataType *floatingIntensityX = static_cast<DataType*>(floatingImage->data);
+    const DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber];
+    const DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber];
+    DataType *warpedIntensityX = static_cast<DataType*>(warpedImage->data);
     DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber];
     DataType *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber];
-    DataType *deformationFieldPtrX = static_cast<DataType *>(deformationField->data);
-    DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber];
-    DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber];
+    const DataType *deformationFieldPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber];
+    const DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber];
 
     // Extract the relevant affine matrix
-    mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
-    if(floatingImage->sform_code!=0)
+    const mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk;
+    if (floatingImage->sform_code != 0)
         floating_mm_to_voxel = &floatingImage->sto_ijk;
 
     // The spacing is computed in case the sform if defined
     float realSpacing[3];
-    if(warpedImage->sform_code>0)
-    {
-        reg_getRealImageSpacing(warpedImage,realSpacing);
-    }
-    else
-    {
-        realSpacing[0]=warpedImage->dx;
-        realSpacing[1]=warpedImage->dy;
-        realSpacing[2]=warpedImage->dz;
+    if (warpedImage->sform_code > 0) {
+        reg_getRealImageSpacing(warpedImage, realSpacing);
+    } else {
+        realSpacing[0] = warpedImage->dx;
+        realSpacing[1] = warpedImage->dy;
+        realSpacing[2] = warpedImage->dz;
     }
 
     // Reorientation matrix is assessed in order to remove the rigid component
-    mat33 reorient=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
 
     // Some useful variables
     mat33 jacMat;
-    DataType defX,defY,defZ;
+    DataType defX, defY, defZ;
     DataType basisX[2], basisY[2], basisZ[2], deriv[2], basis[2];
-    DataType xFloCoord,yFloCoord,zFloCoord;
-    int anteIntX[2],anteIntY[2],anteIntZ[2];
-    int x,y,z,a,b,c,defIndex,floIndex,warpedIndex;
-    DataType val_x,val_y,val_z,weight[3];
+    DataType xFloCoord, yFloCoord, zFloCoord;
+    int anteIntX[2], anteIntY[2], anteIntZ[2];
+    int x, y, z, a, b, c, defIndex, floIndex, warpedIndex;
+    DataType val_x, val_y, val_z, weight[3];
 
     // Loop over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(x,y,z,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \
+    private(x,y,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \
     anteIntX,anteIntY,anteIntZ,xFloCoord,yFloCoord,zFloCoord, \
     basisX,basisY,basisZ,deriv,basis,defX,defY,defZ,jacMat,weight) \
     shared(warpedImage,warpedIntensityX,warpedIntensityY,warpedIntensityZ, \
@@ -2273,93 +1782,81 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
     floatingImage,floatingIntensityX,floatingIntensityY,floatingIntensityZ,floating_mm_to_voxel, \
     paddingValue, reorient, realSpacing)
 #endif // _OPENMP
-    for(z=0; z<warpedImage->nz; ++z)
-    {
-        warpedIndex=z*warpedImage->nx*warpedImage->ny;
-        deriv[0]=-1;
-        deriv[1]=1;
-        basis[0]=1;
-        basis[1]=0;
-        for(y=0; y<warpedImage->ny; ++y)
-        {
-            for(x=0; x<warpedImage->nx; ++x)
-            {
-                warpedIntensityX[warpedIndex]=paddingValue;
-                warpedIntensityY[warpedIndex]=paddingValue;
-                warpedIntensityZ[warpedIndex]=paddingValue;
+    for (z = 0; z < warpedImage->nz; ++z) {
+        warpedIndex = z * warpedImage->nx * warpedImage->ny;
+        deriv[0] = -1;
+        deriv[1] = 1;
+        basis[0] = 1;
+        basis[1] = 0;
+        for (y = 0; y < warpedImage->ny; ++y) {
+            for (x = 0; x < warpedImage->nx; ++x) {
+                warpedIntensityX[warpedIndex] = paddingValue;
+                warpedIntensityY[warpedIndex] = paddingValue;
+                warpedIntensityZ[warpedIndex] = paddingValue;
 
                 // Compute the index in the floating image
-                defX=deformationFieldPtrX[warpedIndex];
-                defY=deformationFieldPtrY[warpedIndex];
-                defZ=deformationFieldPtrZ[warpedIndex];
+                defX = deformationFieldPtrX[warpedIndex];
+                defY = deformationFieldPtrY[warpedIndex];
+                defZ = deformationFieldPtrZ[warpedIndex];
                 xFloCoord =
-                        floating_mm_to_voxel->m[0][0] * defX +
-                        floating_mm_to_voxel->m[0][1] * defY +
-                        floating_mm_to_voxel->m[0][2] * defZ +
-                        floating_mm_to_voxel->m[0][3];
+                    floating_mm_to_voxel->m[0][0] * defX +
+                    floating_mm_to_voxel->m[0][1] * defY +
+                    floating_mm_to_voxel->m[0][2] * defZ +
+                    floating_mm_to_voxel->m[0][3];
                 yFloCoord =
-                        floating_mm_to_voxel->m[1][0] * defX +
-                        floating_mm_to_voxel->m[1][1] * defY +
-                        floating_mm_to_voxel->m[1][2] * defZ +
-                        floating_mm_to_voxel->m[1][3];
+                    floating_mm_to_voxel->m[1][0] * defX +
+                    floating_mm_to_voxel->m[1][1] * defY +
+                    floating_mm_to_voxel->m[1][2] * defZ +
+                    floating_mm_to_voxel->m[1][3];
                 zFloCoord =
-                        floating_mm_to_voxel->m[2][0] * defX +
-                        floating_mm_to_voxel->m[2][1] * defY +
-                        floating_mm_to_voxel->m[2][2] * defZ +
-                        floating_mm_to_voxel->m[2][3];
+                    floating_mm_to_voxel->m[2][0] * defX +
+                    floating_mm_to_voxel->m[2][1] * defY +
+                    floating_mm_to_voxel->m[2][2] * defZ +
+                    floating_mm_to_voxel->m[2][3];
 
                 // Extract the floating value using bilinear interpolation
-                anteIntX[0]=static_cast<int>(reg_floor(xFloCoord));
-                anteIntX[1]=static_cast<int>(reg_ceil(xFloCoord));
-                anteIntY[0]=static_cast<int>(reg_floor(yFloCoord));
-                anteIntY[1]=static_cast<int>(reg_ceil(yFloCoord));
-                anteIntZ[0]=static_cast<int>(reg_floor(zFloCoord));
-                anteIntZ[1]=static_cast<int>(reg_ceil(zFloCoord));
-                val_x=0;
-                val_y=0;
-                val_z=0;
-                basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]);
-                basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]);
-                basisZ[1]=fabs(zFloCoord-(DataType)anteIntZ[0]);
-                basisX[0]=1.0-basisX[1];
-                basisY[0]=1.0-basisY[1];
-                basisZ[0]=1.0-basisZ[1];
-                for(c=0; c<2; ++c)
-                {
-                    if(anteIntZ[c]>-1 && anteIntZ[c]<floatingImage->nz)
-                    {
-                        for(b=0; b<2; ++b)
-                        {
-                            if(anteIntY[b]>-1 && anteIntY[b]<floatingImage->ny)
-                            {
-                                for(a=0; a<2; ++a)
-                                {
-                                    weight[0]=basisX[a] * basisY[b] * basisZ[c];
-                                    if(anteIntX[a]>-1 && anteIntX[a]<floatingImage->nx)
-                                    {
-                                        floIndex = (anteIntZ[c]*floatingImage->ny+anteIntY[b])*floatingImage->nx+anteIntX[a];
+                anteIntX[0] = static_cast<int>(reg_floor(xFloCoord));
+                anteIntX[1] = static_cast<int>(reg_ceil(xFloCoord));
+                anteIntY[0] = static_cast<int>(reg_floor(yFloCoord));
+                anteIntY[1] = static_cast<int>(reg_ceil(yFloCoord));
+                anteIntZ[0] = static_cast<int>(reg_floor(zFloCoord));
+                anteIntZ[1] = static_cast<int>(reg_ceil(zFloCoord));
+                val_x = 0;
+                val_y = 0;
+                val_z = 0;
+                basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]);
+                basisY[1] = fabs(yFloCoord - (DataType)anteIntY[0]);
+                basisZ[1] = fabs(zFloCoord - (DataType)anteIntZ[0]);
+                basisX[0] = 1 - basisX[1];
+                basisY[0] = 1 - basisY[1];
+                basisZ[0] = 1 - basisZ[1];
+                for (c = 0; c < 2; ++c) {
+                    if (anteIntZ[c] > -1 && anteIntZ[c] < floatingImage->nz) {
+                        for (b = 0; b < 2; ++b) {
+                            if (anteIntY[b] > -1 && anteIntY[b] < floatingImage->ny) {
+                                for (a = 0; a < 2; ++a) {
+                                    weight[0] = basisX[a] * basisY[b] * basisZ[c];
+                                    if (anteIntX[a] > -1 && anteIntX[a] < floatingImage->nx) {
+                                        floIndex = (anteIntZ[c] * floatingImage->ny + anteIntY[b]) * floatingImage->nx + anteIntX[a];
                                         val_x += floatingIntensityX[floIndex] * weight[0];
                                         val_y += floatingIntensityY[floIndex] * weight[0];
                                         val_z += floatingIntensityZ[floIndex] * weight[0];
                                     } // anteIntX not in the floating image space
-                                    else
-                                    {
+                                    else {
                                         val_x += paddingValue * weight[0];
                                         val_y += paddingValue * weight[0];
                                         val_z += paddingValue * weight[0];
                                     }
                                 } // a
                             } // anteIntY not in the floating image space
-                            else
-                            {
+                            else {
                                 val_x += paddingValue * basisY[b] * basisZ[c];
                                 val_y += paddingValue * basisY[b] * basisZ[c];
                                 val_z += paddingValue * basisY[b] * basisZ[c];
                             }
                         } // b
                     } // anteIntZ not in the floating image space
-                    else
-                    {
+                    else {
                         val_x += paddingValue * basisZ[c];
                         val_y += paddingValue * basisZ[c];
                         val_z += paddingValue * basisZ[c];
@@ -2367,76 +1864,70 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
                 } // c
 
                 // Compute the Jacobian matrix
-                memset(&jacMat,0,sizeof(mat33));
-                for(c=0; c<2; ++c)
-                {
-                    anteIntZ[0]=z+c;
-                    basisZ[0]=basis[c];
-                    basisZ[1]=deriv[c];
+                memset(&jacMat, 0, sizeof(mat33));
+                for (c = 0; c < 2; ++c) {
+                    anteIntZ[0] = z + c;
+                    basisZ[0] = basis[c];
+                    basisZ[1] = deriv[c];
                     // Boundary conditions along z - slidding
-                    if(z==deformationField->nz-1)
-                    {
-                        if(c==1)
-                            anteIntZ[0]-=2;
-                        basisZ[0]=fabs(basisZ[0]-1.);
-                        basisZ[1]*=-1.;
+                    if (z == deformationField->nz - 1) {
+                        if (c == 1)
+                            anteIntZ[0] -= 2;
+                        basisZ[0] = fabs(basisZ[0] - 1);
+                        basisZ[1] *= -1;
                     }
-                    for(b=0; b<2; ++b)
-                    {
-                        anteIntY[0]=y+b;
-                        basisY[0]=basis[b];
-                        basisY[1]=deriv[b];
+                    for (b = 0; b < 2; ++b) {
+                        anteIntY[0] = y + b;
+                        basisY[0] = basis[b];
+                        basisY[1] = deriv[b];
                         // Boundary conditions along y - slidding
-                        if(y==deformationField->ny-1)
-                        {
-                            if(b==1)
-                                anteIntY[0]-=2;
-                            basisY[0]=fabs(basisY[0]-1.);
-                            basisY[1]*=-1.;
+                        if (y == deformationField->ny - 1) {
+                            if (b == 1)
+                                anteIntY[0] -= 2;
+                            basisY[0] = fabs(basisY[0] - 1);
+                            basisY[1] *= -1;
                         }
-                        for(a=0; a<2; ++a)
-                        {
-                            anteIntX[0]=x+a;
-                            basisX[0]=basis[a];
-                            basisX[1]=deriv[a];
+                        for (a = 0; a < 2; ++a) {
+                            anteIntX[0] = x + a;
+                            basisX[0] = basis[a];
+                            basisX[1] = deriv[a];
                             // Boundary conditions along x - slidding
-                            if(x==deformationField->nx-1)
-                            {
-                                if(a==1)
-                                    anteIntX[0]-=2;
-                                basisX[0]=fabs(basisX[0]-1.);
-                                basisX[1]*=-1.;
+                            if (x == deformationField->nx - 1) {
+                                if (a == 1)
+                                    anteIntX[0] -= 2;
+                                basisX[0] = fabs(basisX[0] - 1);
+                                basisX[1] *= -1;
                             }
 
                             // Compute the basis function values
-                            weight[0] = basisX[1]*basisY[0]*basisZ[0];
-                            weight[1] = basisX[0]*basisY[1]*basisZ[0];
-                            weight[2] = basisX[0]*basisY[0]*basisZ[1];
+                            weight[0] = basisX[1] * basisY[0] * basisZ[0];
+                            weight[1] = basisX[0] * basisY[1] * basisZ[0];
+                            weight[2] = basisX[0] * basisY[0] * basisZ[1];
 
                             // Get the deformation field index
-                            defIndex=(anteIntZ[0]*deformationField->ny+anteIntY[0]) *
-                                    deformationField->nx+anteIntX[0];
+                            defIndex = (anteIntZ[0] * deformationField->ny + anteIntY[0]) *
+                                deformationField->nx + anteIntX[0];
 
                             // Get the deformation field values
-                            defX=deformationFieldPtrX[defIndex];
-                            defY=deformationFieldPtrY[defIndex];
-                            defZ=deformationFieldPtrZ[defIndex];
+                            defX = deformationFieldPtrX[defIndex];
+                            defY = deformationFieldPtrY[defIndex];
+                            defZ = deformationFieldPtrZ[defIndex];
 
                             // Symmetric difference to compute the derivatives
-                            jacMat.m[0][0] += weight[0]*defX;
-                            jacMat.m[0][1] += weight[1]*defX;
-                            jacMat.m[0][2] += weight[2]*defX;
-                            jacMat.m[1][0] += weight[0]*defY;
-                            jacMat.m[1][1] += weight[1]*defY;
-                            jacMat.m[1][2] += weight[2]*defY;
-                            jacMat.m[2][0] += weight[0]*defZ;
-                            jacMat.m[2][1] += weight[1]*defZ;
-                            jacMat.m[2][2] += weight[2]*defZ;
+                            jacMat.m[0][0] += static_cast<float>(weight[0] * defX);
+                            jacMat.m[0][1] += static_cast<float>(weight[1] * defX);
+                            jacMat.m[0][2] += static_cast<float>(weight[2] * defX);
+                            jacMat.m[1][0] += static_cast<float>(weight[0] * defY);
+                            jacMat.m[1][1] += static_cast<float>(weight[1] * defY);
+                            jacMat.m[1][2] += static_cast<float>(weight[2] * defY);
+                            jacMat.m[2][0] += static_cast<float>(weight[0] * defZ);
+                            jacMat.m[2][1] += static_cast<float>(weight[1] * defZ);
+                            jacMat.m[2][2] += static_cast<float>(weight[2] * defZ);
                         }
                     }
                 }
                 // reorient and scale the Jacobian matrix
-                jacMat=nifti_mat33_mul(reorient,jacMat);
+                jacMat = nifti_mat33_mul(reorient, jacMat);
                 jacMat.m[0][0] /= realSpacing[0];
                 jacMat.m[0][1] /= realSpacing[1];
                 jacMat.m[0][2] /= realSpacing[2];
@@ -2448,151 +1939,122 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage,
                 jacMat.m[2][2] /= realSpacing[2];
 
                 // Modulate the gradient scalar values
-                warpedIntensityX[warpedIndex]=jacMat.m[0][0]*val_x+jacMat.m[0][1]*val_y+jacMat.m[0][2]*val_z;
-                warpedIntensityY[warpedIndex]=jacMat.m[1][0]*val_x+jacMat.m[1][1]*val_y+jacMat.m[1][2]*val_z;
-                warpedIntensityZ[warpedIndex]=jacMat.m[2][0]*val_x+jacMat.m[2][1]*val_y+jacMat.m[2][2]*val_z;
+                warpedIntensityX[warpedIndex] = jacMat.m[0][0] * val_x + jacMat.m[0][1] * val_y + jacMat.m[0][2] * val_z;
+                warpedIntensityY[warpedIndex] = jacMat.m[1][0] * val_x + jacMat.m[1][1] * val_y + jacMat.m[1][2] * val_z;
+                warpedIntensityZ[warpedIndex] = jacMat.m[2][0] * val_x + jacMat.m[2][1] * val_y + jacMat.m[2][2] * val_z;
                 ++warpedIndex;
             } // x
         } // y
     } // z
 }
 /* *************************************************************** */
-void reg_resampleGradient(nifti_image *floatingImage,
+void reg_resampleGradient(const nifti_image *floatingImage,
                           nifti_image *warpedImage,
-                          nifti_image *deformationField,
-                          int interp,
-                          float paddingValue)
-{
-    if(interp!=1)
-    {
+                          const nifti_image *deformationField,
+                          const int& interp,
+                          const float& paddingValue) {
+    if (interp != 1) {
         reg_print_fct_error("reg_resampleGradient");
         reg_print_msg_error("Only linear interpolation is supported");
         reg_exit();
-
     }
-    if(floatingImage->datatype!=warpedImage->datatype ||
-            floatingImage->datatype!=deformationField->datatype)
-    {
+    if (floatingImage->datatype != warpedImage->datatype ||
+        floatingImage->datatype != deformationField->datatype) {
         reg_print_fct_error("reg_resampleGradient");
         reg_print_msg_error("Input images are expected to have the same type");
         reg_exit();
     }
-    switch(floatingImage->datatype)
-    {
-    case NIFTI_TYPE_FLOAT32:
-        if(warpedImage->nz>1)
-        {
-            reg_trilinearResampleGradient<float>(floatingImage,
-                                                 warpedImage,
-                                                 deformationField,
-                                                 paddingValue);
-        }
-        else
-        {
-            reg_bilinearResampleGradient<float>(floatingImage,
-                                                warpedImage,
-                                                deformationField,
-                                                paddingValue);
-        }
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        if(warpedImage->nz>1)
-        {
-            reg_trilinearResampleGradient<double>(floatingImage,
-                                                  warpedImage,
-                                                  deformationField,
-                                                  paddingValue);
-        }
-        else
-        {
-            reg_bilinearResampleGradient<double>(floatingImage,
-                                                 warpedImage,
-                                                 deformationField,
-                                                 paddingValue);
-        }
-        break;
-    default:
+    if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 &&
+        floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
         reg_print_fct_error("reg_resampleGradient");
-        reg_print_msg_error("Only single and double floating precision are supported");
+        reg_print_msg_error("Input images are expected to be of type float or double");
         reg_exit();
     }
+
+    std::visit([&](auto&& floImgDataType) {
+        using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+        if (warpedImage->nz > 1) {
+            reg_trilinearResampleGradient<FloImgDataType>(floatingImage,
+                                                          warpedImage,
+                                                          deformationField,
+                                                          paddingValue);
+        } else {
+            reg_bilinearResampleGradient<FloImgDataType>(floatingImage,
+                                                         warpedImage,
+                                                         deformationField,
+                                                         paddingValue);
+        }
+    }, NiftiImage::getFloatingDataType(floatingImage));
 }
 /* *************************************************************** */
-/* *************************************************************** */
-template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
-void TrilinearImageGradient(nifti_image *floatingImage,
-                            nifti_image *deformationField,
+template<class FloatingType, class GradientType, class FieldType>
+void TrilinearImageGradient(const nifti_image *floatingImage,
+                            const nifti_image *deformationField,
                             nifti_image *warpedGradient,
-                            int *mask,
-                            float paddingValue,
-                            int active_timepoint)
-{
-    if(active_timepoint<0 || active_timepoint>=floatingImage->nt){
+                            const int *mask,
+                            const float& paddingValue,
+                            const int& activeTimepoint) {
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
         reg_print_fct_error("TrilinearImageGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the floating image");
         reg_exit();
     }
 #ifdef _WIN32
     long index;
-    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+    const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
     size_t index;
-    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
-
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
-    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
-    GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
-    GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
+    const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
 
-    int *maskPtr = &mask[0];
+    GradientType *warpedGradientPtrX = static_cast<GradientType*>(warpedGradient->data);
+    GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
+    GradientType *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
 
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "3D linear gradient computation of volume number %i", active_timepoint);
+    sprintf(text, "3D linear gradient computation of volume number %i", activeTimepoint);
     reg_print_msg_debug(text);
 #endif
 
     int previous[3], a, b, c, X, Y, Z;
-    FieldTYPE position[3], xBasis[2], yBasis[2], zBasis[2];
-    FieldTYPE deriv[2];
-    deriv[0]=-1;
-    deriv[1]=1;
-    FieldTYPE relative, world[3], grad[3], coeff;
-    FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
-    FloatingTYPE *zPointer, *xyzPointer;
+    FieldType position[3], xBasis[2], yBasis[2], zBasis[2];
+    FieldType deriv[2];
+    deriv[0] = -1;
+    deriv[1] = 1;
+    FieldType relative, world[3], grad[3], coeff;
+    FieldType xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
+    const FloatingType *zPointer, *xyzPointer;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \
+    private(world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \
     a, b, c, X, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \
     shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, deriv, paddingValue, \
-    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \
+    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \
     floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY, warpedGradientPtrZ)
 #endif // _OPENMP
-    for(index=0; index<referenceVoxelNumber; index++)
-    {
-
-        grad[0]=0;
-        grad[1]=0;
-        grad[2]=0;
+    for (index = 0; index < referenceVoxelNumber; index++) {
+        grad[0] = 0;
+        grad[1] = 0;
+        grad[2] = 0;
 
-        if(maskPtr[index]>-1)
-        {
-            world[0]=(FieldTYPE) deformationFieldPtrX[index];
-            world[1]=(FieldTYPE) deformationFieldPtrY[index];
-            world[2]=(FieldTYPE) deformationFieldPtrZ[index];
+        if (mask[index] > -1) {
+            world[0] = (FieldType)deformationFieldPtrX[index];
+            world[1] = (FieldType)deformationFieldPtrY[index];
+            world[2] = (FieldType)deformationFieldPtrZ[index];
 
             /* real -> voxel; floating space */
             reg_mat44_mul(floatingIJKMatrix, world, position);
@@ -2601,51 +2063,43 @@ void TrilinearImageGradient(nifti_image *floatingImage,
             previous[1] = static_cast<int>(reg_floor(position[1]));
             previous[2] = static_cast<int>(reg_floor(position[2]));
             // basis values along the x axis
-            relative=position[0]-(FieldTYPE)previous[0];
-            xBasis[0]= (FieldTYPE)(1.0-relative);
-            xBasis[1]= relative;
+            relative = position[0] - (FieldType)previous[0];
+            xBasis[0] = (FieldType)(1.0 - relative);
+            xBasis[1] = relative;
             // basis values along the y axis
-            relative=position[1]-(FieldTYPE)previous[1];
-            yBasis[0]= (FieldTYPE)(1.0-relative);
-            yBasis[1]= relative;
+            relative = position[1] - (FieldType)previous[1];
+            yBasis[0] = (FieldType)(1.0 - relative);
+            yBasis[1] = relative;
             // basis values along the z axis
-            relative=position[2]-(FieldTYPE)previous[2];
-            zBasis[0]= (FieldTYPE)(1.0-relative);
-            zBasis[1]= relative;
+            relative = position[2] - (FieldType)previous[2];
+            zBasis[0] = (FieldType)(1.0 - relative);
+            zBasis[1] = relative;
 
             // The padding value is used for interpolation if it is different from NaN
-            if(paddingValue==paddingValue)
-            {
-                for(c=0; c<2; c++)
-                {
-                    Z=previous[2]+c;
-                    if(Z>-1 && Z<floatingImage->nz)
-                    {
-                        zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                        xxTempNewValue=0;
-                        yyTempNewValue=0;
-                        zzTempNewValue=0;
-                        for(b=0; b<2; b++)
-                        {
-                            Y=previous[1]+b;
-                            if(Y>-1 && Y<floatingImage->ny)
-                            {
-                                xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                                xTempNewValue=0;
-                                yTempNewValue=0;
-                                for(a=0; a<2; a++)
-                                {
-                                    X=previous[0]+a;
-                                    if(X>-1 && X<floatingImage->nx)
-                                    {
-                                        coeff = *xyzPointer;
-                                        xTempNewValue +=  coeff * deriv[a];
-                                        yTempNewValue +=  coeff * xBasis[a];
+            if (paddingValue == paddingValue) {
+                for (c = 0; c < 2; c++) {
+                    Z = previous[2] + c;
+                    if (Z > -1 && Z < floatingImage->nz) {
+                        zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                        xxTempNewValue = 0;
+                        yyTempNewValue = 0;
+                        zzTempNewValue = 0;
+                        for (b = 0; b < 2; b++) {
+                            Y = previous[1] + b;
+                            if (Y > -1 && Y < floatingImage->ny) {
+                                xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                                xTempNewValue = 0;
+                                yTempNewValue = 0;
+                                for (a = 0; a < 2; a++) {
+                                    X = previous[0] + a;
+                                    if (X > -1 && X < floatingImage->nx) {
+                                        coeff = static_cast<FieldType>(*xyzPointer);
+                                        xTempNewValue += coeff * deriv[a];
+                                        yTempNewValue += coeff * xBasis[a];
                                     } // end X in range
-                                    else
-                                    {
-                                        xTempNewValue +=  paddingValue * deriv[a];
-                                        yTempNewValue +=  paddingValue * xBasis[a];
+                                    else {
+                                        xTempNewValue += paddingValue * deriv[a];
+                                        yTempNewValue += paddingValue * xBasis[a];
                                     }
                                     xyzPointer++;
                                 } // end a
@@ -2653,8 +2107,7 @@ void TrilinearImageGradient(nifti_image *floatingImage,
                                 yyTempNewValue += yTempNewValue * deriv[b];
                                 zzTempNewValue += yTempNewValue * yBasis[b];
                             } // end Y in range
-                            else
-                            {
+                            else {
                                 xxTempNewValue += paddingValue * yBasis[b];
                                 yyTempNewValue += paddingValue * deriv[b];
                                 zzTempNewValue += paddingValue * yBasis[b];
@@ -2664,37 +2117,32 @@ void TrilinearImageGradient(nifti_image *floatingImage,
                         grad[1] += yyTempNewValue * zBasis[c];
                         grad[2] += zzTempNewValue * deriv[c];
                     } // end Z in range
-                    else
-                    {
+                    else {
                         grad[0] += paddingValue * zBasis[c];
                         grad[1] += paddingValue * zBasis[c];
                         grad[2] += paddingValue * deriv[c];
                     }
                 } // end c
             } // end padding value is different from NaN
-            else if(previous[0]>=0.f && previous[0]<(floatingImage->nx-1) &&
-                    previous[1]>=0.f && previous[1]<(floatingImage->ny-1) &&
-                    previous[2]>=0.f && previous[2]<(floatingImage->nz-1) )
-            {
-                for(c=0; c<2; c++)
-                {
-                    Z=previous[2]+c;
-                    zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                    xxTempNewValue=0;
-                    yyTempNewValue=0;
-                    zzTempNewValue=0;
-                    for(b=0; b<2; b++)
-                    {
-                        Y=previous[1]+b;
-                        xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]];
-                        xTempNewValue=0;
-                        yTempNewValue=0;
-                        for(a=0; a<2; a++)
-                        {
-                            X=previous[0]+a;
-                            coeff = *xyzPointer;
-                            xTempNewValue +=  coeff * deriv[a];
-                            yTempNewValue +=  coeff * xBasis[a];
+            else if (previous[0] >= 0.f && previous[0] < (floatingImage->nx - 1) &&
+                     previous[1] >= 0.f && previous[1] < (floatingImage->ny - 1) &&
+                     previous[2] >= 0.f && previous[2] < (floatingImage->nz - 1)) {
+                for (c = 0; c < 2; c++) {
+                    Z = previous[2] + c;
+                    zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                    xxTempNewValue = 0;
+                    yyTempNewValue = 0;
+                    zzTempNewValue = 0;
+                    for (b = 0; b < 2; b++) {
+                        Y = previous[1] + b;
+                        xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]];
+                        xTempNewValue = 0;
+                        yTempNewValue = 0;
+                        for (a = 0; a < 2; a++) {
+                            X = previous[0] + a;
+                            coeff = static_cast<FieldType>(*xyzPointer);
+                            xTempNewValue += coeff * deriv[a];
+                            yTempNewValue += coeff * xBasis[a];
                             xyzPointer++;
                         } // end a
                         xxTempNewValue += xTempNewValue * yBasis[b];
@@ -2706,222 +2154,198 @@ void TrilinearImageGradient(nifti_image *floatingImage,
                     grad[2] += zzTempNewValue * deriv[c];
                 } // end c
             } // end padding value is NaN
-            else grad[0]=grad[1]=grad[2]=0;
+            else grad[0] = grad[1] = grad[2] = 0;
         } // end mask
 
-        warpedGradientPtrX[index] = (GradientTYPE)grad[0];
-        warpedGradientPtrY[index] = (GradientTYPE)grad[1];
-        warpedGradientPtrZ[index] = (GradientTYPE)grad[2];
+        warpedGradientPtrX[index] = static_cast<GradientType>(grad[0]);
+        warpedGradientPtrY[index] = static_cast<GradientType>(grad[1]);
+        warpedGradientPtrZ[index] = static_cast<GradientType>(grad[2]);
     }
 }
 /* *************************************************************** */
-template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
-void BilinearImageGradient(nifti_image *floatingImage,
-                           nifti_image *deformationField,
+template<class FloatingType, class GradientType, class FieldType>
+void BilinearImageGradient(const nifti_image *floatingImage,
+                           const nifti_image *deformationField,
                            nifti_image *warpedGradient,
-                           int *mask,
-                           float paddingValue,
-                           int active_timepoint)
-{
-    if(active_timepoint<0 || active_timepoint>=floatingImage->nt){
+                           const int *mask,
+                           const float& paddingValue,
+                           const int& activeTimepoint) {
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
         reg_print_fct_error("TrilinearImageGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the floating image");
         reg_exit();
     }
 #ifdef _WIN32
     long index;
-    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
+    const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2);
 #else
     size_t index;
-    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
+    const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 2);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2);
 #endif
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
 
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
 
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
+    GradientType *warpedGradientPtrX = static_cast<GradientType*>(warpedGradient->data);
+    GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
-    GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
-
-    int *maskPtr = &mask[0];
-
-    mat44 floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=floatingImage->sto_ijk;
-    else floatingIJKMatrix=floatingImage->qto_ijk;
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "2D linear gradient computation of volume number %i",active_timepoint);
+    sprintf(text, "2D linear gradient computation of volume number %i", activeTimepoint);
     reg_print_msg_debug(text);
 #endif
 
-    FieldTYPE position[3], xBasis[2], yBasis[2], relative, world[2], grad[2];
-    FieldTYPE deriv[2];
-    deriv[0]=-1;
-    deriv[1]=1;
-    FieldTYPE coeff, xTempNewValue, yTempNewValue;
+    FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2];
+    FieldType deriv[2];
+    deriv[0] = -1;
+    deriv[1] = 1;
+    FieldType coeff, xTempNewValue, yTempNewValue;
 
     int previous[3], a, b, X, Y;
-    FloatingTYPE *xyPointer;
+    const FloatingType *xyPointer;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, world, position, previous, xBasis, yBasis, relative, grad, coeff, \
+    private(world, position, previous, xBasis, yBasis, relative, grad, coeff, \
     a, b, X, Y, xyPointer, xTempNewValue, yTempNewValue) \
     shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, deriv, \
-    deformationFieldPtrX, deformationFieldPtrY, maskPtr, paddingValue, \
+    deformationFieldPtrX, deformationFieldPtrY, mask, paddingValue, \
     floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY)
 #endif // _OPENMP
-    for(index=0; index<referenceVoxelNumber; index++)
-    {
-
-        grad[0]=0;
-        grad[1]=0;
+    for (index = 0; index < referenceVoxelNumber; index++) {
+        grad[0] = 0;
+        grad[1] = 0;
 
-        if(maskPtr[index]>-1)
-        {
-            world[0]=(FieldTYPE) deformationFieldPtrX[index];
-            world[1]=(FieldTYPE) deformationFieldPtrY[index];
+        if (mask[index] > -1) {
+            world[0] = (FieldType)deformationFieldPtrX[index];
+            world[1] = (FieldType)deformationFieldPtrY[index];
 
             /* real -> voxel; floating space */
-            position[0] = world[0]*floatingIJKMatrix.m[0][0] + world[1]*floatingIJKMatrix.m[0][1] +
-                    floatingIJKMatrix.m[0][3];
-            position[1] = world[0]*floatingIJKMatrix.m[1][0] + world[1]*floatingIJKMatrix.m[1][1] +
-                    floatingIJKMatrix.m[1][3];
+            position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
+            position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
             previous[0] = static_cast<int>(reg_floor(position[0]));
             previous[1] = static_cast<int>(reg_floor(position[1]));
             // basis values along the x axis
-            relative=position[0]-(FieldTYPE)previous[0];
-            relative=relative>0?relative:0;
-            xBasis[0]= (FieldTYPE)(1.0-relative);
-            xBasis[1]= relative;
+            relative = position[0] - (FieldType)previous[0];
+            relative = relative > 0 ? relative : 0;
+            xBasis[0] = (FieldType)(1.0 - relative);
+            xBasis[1] = relative;
             // basis values along the y axis
-            relative=position[1]-(FieldTYPE)previous[1];
-            relative=relative>0?relative:0;
-            yBasis[0]= (FieldTYPE)(1.0-relative);
-            yBasis[1]= relative;
-
-            for(b=0; b<2; b++)
-            {
-                Y= previous[1]+b;
-                if(Y>-1 && Y<floatingImage->ny)
-                {
-                    xyPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]];
-                    xTempNewValue=0;
-                    yTempNewValue=0;
-                    for(a=0; a<2; a++)
-                    {
-                        X= previous[0]+a;
-                        if(X>-1 && X<floatingImage->nx)
-                        {
-                            coeff = *xyPointer;
-                            xTempNewValue +=  coeff * deriv[a];
-                            yTempNewValue +=  coeff * xBasis[a];
-                        }
-                        else
-                        {
-                            xTempNewValue +=  paddingValue * deriv[a];
-                            yTempNewValue +=  paddingValue * xBasis[a];
+            relative = position[1] - (FieldType)previous[1];
+            relative = relative > 0 ? relative : 0;
+            yBasis[0] = (FieldType)(1.0 - relative);
+            yBasis[1] = relative;
+
+            for (b = 0; b < 2; b++) {
+                Y = previous[1] + b;
+                if (Y > -1 && Y < floatingImage->ny) {
+                    xyPointer = &floatingIntensity[Y * floatingImage->nx + previous[0]];
+                    xTempNewValue = 0;
+                    yTempNewValue = 0;
+                    for (a = 0; a < 2; a++) {
+                        X = previous[0] + a;
+                        if (X > -1 && X < floatingImage->nx) {
+                            coeff = static_cast<FieldType>(*xyPointer);
+                            xTempNewValue += coeff * deriv[a];
+                            yTempNewValue += coeff * xBasis[a];
+                        } else {
+                            xTempNewValue += paddingValue * deriv[a];
+                            yTempNewValue += paddingValue * xBasis[a];
                         }
                         xyPointer++;
                     }
                     grad[0] += xTempNewValue * yBasis[b];
                     grad[1] += yTempNewValue * deriv[b];
-                }
-                else
-                {
+                } else {
                     grad[0] += paddingValue * yBasis[b];
                     grad[1] += paddingValue * deriv[b];
                 }
             }
-            if(grad[0]!=grad[0]) grad[0]=0;
-            if(grad[1]!=grad[1]) grad[1]=0;
+            if (grad[0] != grad[0]) grad[0] = 0;
+            if (grad[1] != grad[1]) grad[1] = 0;
         }// mask
 
-        warpedGradientPtrX[index] = (GradientTYPE)grad[0];
-        warpedGradientPtrY[index] = (GradientTYPE)grad[1];
+        warpedGradientPtrX[index] = static_cast<GradientType>(grad[0]);
+        warpedGradientPtrY[index] = static_cast<GradientType>(grad[1]);
     }
 }
 /* *************************************************************** */
-template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
-void CubicSplineImageGradient3D(nifti_image *floatingImage,
-                                nifti_image *deformationField,
+template<class FloatingType, class GradientType, class FieldType>
+void CubicSplineImageGradient3D(const nifti_image *floatingImage,
+                                const nifti_image *deformationField,
                                 nifti_image *warpedGradient,
-                                int *mask,
-                                float paddingValue,
-                                int active_timepoint)
-{
-    if(active_timepoint<0 || active_timepoint>=floatingImage->nt){
+                                const int *mask,
+                                const float& paddingValue,
+                                const int& activeTimepoint) {
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
         reg_print_fct_error("TrilinearImageGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the floating image");
         reg_exit();
     }
 #ifdef _WIN32
     long index;
-    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage);
+    const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3);
 #else
     size_t index;
-    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage);
+    const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
 
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
-    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
+    const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
-    GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
-    GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
+    GradientType *warpedGradientPtrX = static_cast<GradientType*>(warpedGradient->data);
+    GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
+    GradientType *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber];
 
-    int *maskPtr = &mask[0];
-
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "3D cubic spline gradient computation of volume number %i",active_timepoint);
+    sprintf(text, "3D cubic spline gradient computation of volume number %i", activeTimepoint);
     reg_print_msg_debug(text);
 #endif
 
     int previous[3], c, Z, b, Y, a;
 
     double xBasis[4], yBasis[4], zBasis[4], xDeriv[4], yDeriv[4], zDeriv[4], relative;
-    FieldTYPE coeff, position[3], world[3], grad[3];
-    FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
-    FloatingTYPE *zPointer, *yzPointer, *xyzPointer;
+    FieldType coeff, position[3], world[3], grad[3];
+    FieldType xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue;
+    const FloatingType *zPointer, *yzPointer, *xyzPointer;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \
+    private(world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \
     a, b, c, Y, Z, zPointer, yzPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \
     shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, paddingValue, \
-    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \
+    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \
     floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY, warpedGradientPtrZ)
 #endif // _OPENMP
-    for(index=0; index<referenceVoxelNumber; index++)
-    {
-
-        grad[0]=0;
-        grad[1]=0;
-        grad[2]=0;
-
-        if((*maskPtr++)>-1)
-        {
+    for (index = 0; index < referenceVoxelNumber; index++) {
+        grad[0] = 0;
+        grad[1] = 0;
+        grad[2] = 0;
 
-            world[0]=(FieldTYPE) deformationFieldPtrX[index];
-            world[1]=(FieldTYPE) deformationFieldPtrY[index];
-            world[2]=(FieldTYPE) deformationFieldPtrZ[index];
+        if (mask[index] > -1) {
+            world[0] = (FieldType)deformationFieldPtrX[index];
+            world[1] = (FieldType)deformationFieldPtrY[index];
+            world[2] = (FieldType)deformationFieldPtrZ[index];
 
             /* real -> voxel; floating space */
             reg_mat44_mul(floatingIJKMatrix, world, position);
@@ -2931,470 +2355,344 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage,
             previous[2] = static_cast<int>(reg_floor(position[2]));
 
             // basis values along the x axis
-            relative=position[0]-(FieldTYPE)previous[0];
+            relative = position[0] - (FieldType)previous[0];
             interpCubicSplineKernel(relative, xBasis, xDeriv);
 
             // basis values along the y axis
-            relative=position[1]-(FieldTYPE)previous[1];
+            relative = position[1] - (FieldType)previous[1];
             interpCubicSplineKernel(relative, yBasis, yDeriv);
 
             // basis values along the z axis
-            relative=position[2]-(FieldTYPE)previous[2];
+            relative = position[2] - (FieldType)previous[2];
             interpCubicSplineKernel(relative, zBasis, zDeriv);
 
             previous[0]--;
             previous[1]--;
             previous[2]--;
 
-            for(c=0; c<4; c++)
-            {
-                Z = previous[2]+c;
-                if(-1<Z && Z<floatingImage->nz)
-                {
-                    zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny];
-                    xxTempNewValue=0;
-                    yyTempNewValue=0;
-                    zzTempNewValue=0;
-                    for(b=0; b<4; b++)
-                    {
-                        Y= previous[1]+b;
-                        yzPointer = &zPointer[Y*floatingImage->nx];
-                        if(-1<Y && Y<floatingImage->ny)
-                        {
+            for (c = 0; c < 4; c++) {
+                Z = previous[2] + c;
+                if (-1 < Z && Z < floatingImage->nz) {
+                    zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny];
+                    xxTempNewValue = 0;
+                    yyTempNewValue = 0;
+                    zzTempNewValue = 0;
+                    for (b = 0; b < 4; b++) {
+                        Y = previous[1] + b;
+                        yzPointer = &zPointer[Y * floatingImage->nx];
+                        if (-1 < Y && Y < floatingImage->ny) {
                             xyzPointer = &yzPointer[previous[0]];
-                            xTempNewValue=0;
-                            yTempNewValue=0;
-                            for(a=0; a<4; a++)
-                            {
-                                if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx)
-                                {
-                                    coeff = *xyzPointer;
-                                    xTempNewValue +=  coeff * xDeriv[a];
-                                    yTempNewValue +=  coeff * xBasis[a];
+                            xTempNewValue = 0;
+                            yTempNewValue = 0;
+                            for (a = 0; a < 4; a++) {
+                                if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx) {
+                                    coeff = static_cast<FieldType>(*xyzPointer);
+                                    xTempNewValue += coeff * static_cast<FieldType>(xDeriv[a]);
+                                    yTempNewValue += coeff * static_cast<FieldType>(xBasis[a]);
                                 } // previous[0]+a in range
-                                else
-                                {
-                                    xTempNewValue +=  paddingValue * xDeriv[a];
-                                    yTempNewValue +=  paddingValue * xBasis[a];
+                                else {
+                                    xTempNewValue += static_cast<FieldType>(paddingValue * xDeriv[a]);
+                                    yTempNewValue += static_cast<FieldType>(paddingValue * xBasis[a]);
                                 }
                                 xyzPointer++;
                             } // a
-                            xxTempNewValue += xTempNewValue * yBasis[b];
-                            yyTempNewValue += yTempNewValue * yDeriv[b];
-                            zzTempNewValue += yTempNewValue * yBasis[b];
+                            xxTempNewValue += static_cast<FieldType>(xTempNewValue * yBasis[b]);
+                            yyTempNewValue += static_cast<FieldType>(yTempNewValue * yDeriv[b]);
+                            zzTempNewValue += static_cast<FieldType>(yTempNewValue * yBasis[b]);
                         } // Y in range
-                        else
-                        {
-                            xxTempNewValue += paddingValue * yBasis[b];
-                            yyTempNewValue += paddingValue * yDeriv[b];
-                            zzTempNewValue += paddingValue * yBasis[b];
+                        else {
+                            xxTempNewValue += static_cast<FieldType>(paddingValue * yBasis[b]);
+                            yyTempNewValue += static_cast<FieldType>(paddingValue * yDeriv[b]);
+                            zzTempNewValue += static_cast<FieldType>(paddingValue * yBasis[b]);
                         }
                     } // b
-                    grad[0] += xxTempNewValue * zBasis[c];
-                    grad[1] += yyTempNewValue * zBasis[c];
-                    grad[2] += zzTempNewValue * zDeriv[c];
+                    grad[0] += static_cast<FieldType>(xxTempNewValue * zBasis[c]);
+                    grad[1] += static_cast<FieldType>(yyTempNewValue * zBasis[c]);
+                    grad[2] += static_cast<FieldType>(zzTempNewValue * zDeriv[c]);
                 } // Z in range
-                else
-                {
-                    grad[0] += paddingValue * zBasis[c];
-                    grad[1] += paddingValue * zBasis[c];
-                    grad[2] += paddingValue * zDeriv[c];
+                else {
+                    grad[0] += static_cast<FieldType>(paddingValue * zBasis[c]);
+                    grad[1] += static_cast<FieldType>(paddingValue * zBasis[c]);
+                    grad[2] += static_cast<FieldType>(paddingValue * zDeriv[c]);
                 }
             } // c
 
-            grad[0]=grad[0]==grad[0]?grad[0]:0;
-            grad[1]=grad[1]==grad[1]?grad[1]:0;
-            grad[2]=grad[2]==grad[2]?grad[2]:0;
+            grad[0] = grad[0] == grad[0] ? grad[0] : 0;
+            grad[1] = grad[1] == grad[1] ? grad[1] : 0;
+            grad[2] = grad[2] == grad[2] ? grad[2] : 0;
         } // outside of the mask
 
-        warpedGradientPtrX[index] = (GradientTYPE)grad[0];
-        warpedGradientPtrY[index] = (GradientTYPE)grad[1];
-        warpedGradientPtrZ[index] = (GradientTYPE)grad[2];
+        warpedGradientPtrX[index] = static_cast<GradientType>(grad[0]);
+        warpedGradientPtrY[index] = static_cast<GradientType>(grad[1]);
+        warpedGradientPtrZ[index] = static_cast<GradientType>(grad[2]);
     }
 }
 /* *************************************************************** */
-template<class FloatingTYPE, class GradientTYPE, class FieldTYPE>
-void CubicSplineImageGradient2D(nifti_image *floatingImage,
-                                nifti_image *deformationField,
+template<class FloatingType, class GradientType, class FieldType>
+void CubicSplineImageGradient2D(const nifti_image *floatingImage,
+                                const nifti_image *deformationField,
                                 nifti_image *warpedGradient,
-                                int *mask,
-                                float paddingValue,
-                                int active_timepoint)
-{
-    if(active_timepoint<0 || active_timepoint>=floatingImage->nt){
+                                const int *mask,
+                                const float& paddingValue,
+                                const int& activeTimepoint) {
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
         reg_print_fct_error("TrilinearImageGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the floating image");
         reg_exit();
     }
 #ifdef _WIN32
     long index;
-    const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2);
-    const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2);
+    const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
+    const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2);
 #else
     size_t index;
-    const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2);
-    const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2);
+    const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 2);
+    const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2);
 #endif
-    FloatingTYPE *floatingIntensityPtr = static_cast<FloatingTYPE *>(floatingImage->data);
-    FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber];
-
-    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationField->data);
-    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
+    const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
 
-    GradientTYPE *warpedGradientPtrX = static_cast<GradientTYPE *>(warpedGradient->data);
-    GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
+    const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
+    const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
 
-    int *maskPtr = &mask[0];
+    GradientType *warpedGradientPtrX = static_cast<GradientType*>(warpedGradient->data);
+    GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber];
 
-    mat44 *floatingIJKMatrix;
-    if(floatingImage->sform_code>0)
-        floatingIJKMatrix=&(floatingImage->sto_ijk);
-    else floatingIJKMatrix=&(floatingImage->qto_ijk);
+    const mat44 *floatingIJKMatrix;
+    if (floatingImage->sform_code > 0)
+        floatingIJKMatrix = &floatingImage->sto_ijk;
+    else floatingIJKMatrix = &floatingImage->qto_ijk;
 
 #ifndef NDEBUG
     char text[255];
-    sprintf(text, "2D cubic spline gradient computation of volume number %i",active_timepoint);
+    sprintf(text, "2D cubic spline gradient computation of volume number %i", activeTimepoint);
     reg_print_msg_debug(text);
 #endif
     int previous[2], b, Y, a;
     double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative;
-    FieldTYPE coeff, position[3], world[3], grad[2];
-    FieldTYPE xTempNewValue, yTempNewValue;
-    FloatingTYPE *yPointer, *xyPointer;
+    FieldType coeff, position[3], world[3], grad[2];
+    FieldType xTempNewValue, yTempNewValue;
+    const FloatingType *yPointer, *xyPointer;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \
+    private(world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \
     a, b, Y, yPointer, xyPointer, xTempNewValue, yTempNewValue) \
     shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, \
-    deformationFieldPtrX, deformationFieldPtrY, maskPtr, paddingValue, \
+    deformationFieldPtrX, deformationFieldPtrY, mask, paddingValue, \
     floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY)
 #endif // _OPENMP
-    for(index=0; index<referenceVoxelNumber; index++)
-    {
+    for (index = 0; index < referenceVoxelNumber; index++) {
+        grad[0] = 0;
+        grad[1] = 0;
 
-        grad[0]=0;
-        grad[1]=0;
-
-        if(maskPtr[index]>-1)
-        {
-            world[0]=(FieldTYPE) deformationFieldPtrX[index];
-            world[1]=(FieldTYPE) deformationFieldPtrY[index];
+        if (mask[index] > -1) {
+            world[0] = (FieldType)deformationFieldPtrX[index];
+            world[1] = (FieldType)deformationFieldPtrY[index];
 
             /* real -> voxel; floating space */
-            position[0] = world[0]*floatingIJKMatrix->m[0][0] + world[1]*floatingIJKMatrix->m[0][1] +
-                    floatingIJKMatrix->m[0][3];
-            position[1] = world[0]*floatingIJKMatrix->m[1][0] + world[1]*floatingIJKMatrix->m[1][1] +
-                    floatingIJKMatrix->m[1][3];
+            position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
+            position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
             previous[0] = static_cast<int>(reg_floor(position[0]));
             previous[1] = static_cast<int>(reg_floor(position[1]));
             // basis values along the x axis
-            relative=position[0]-(FieldTYPE)previous[0];
-            relative=relative>0?relative:0;
+            relative = position[0] - (FieldType)previous[0];
+            relative = relative > 0 ? relative : 0;
             interpCubicSplineKernel(relative, xBasis, xDeriv);
             // basis values along the y axis
-            relative=position[1]-(FieldTYPE)previous[1];
-            relative=relative>0?relative:0;
+            relative = position[1] - (FieldType)previous[1];
+            relative = relative > 0 ? relative : 0;
             interpCubicSplineKernel(relative, yBasis, yDeriv);
 
             previous[0]--;
             previous[1]--;
 
-            for(b=0; b<4; b++)
-            {
-                Y= previous[1]+b;
-                yPointer = &floatingIntensity[Y*floatingImage->nx];
-                if(-1<Y && Y<floatingImage->ny)
-                {
+            for (b = 0; b < 4; b++) {
+                Y = previous[1] + b;
+                yPointer = &floatingIntensity[Y * floatingImage->nx];
+                if (-1 < Y && Y < floatingImage->ny) {
                     xyPointer = &yPointer[previous[0]];
-                    xTempNewValue=0;
-                    yTempNewValue=0;
-                    for(a=0; a<4; a++)
-                    {
-                        if(-1<(previous[0]+a) && (previous[0]+a)<floatingImage->nx)
-                        {
-                            coeff = *xyPointer;
-                            xTempNewValue +=  coeff * xDeriv[a];
-                            yTempNewValue +=  coeff * xBasis[a];
+                    xTempNewValue = 0;
+                    yTempNewValue = 0;
+                    for (a = 0; a < 4; a++) {
+                        if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx) {
+                            coeff = static_cast<FieldType>(*xyPointer);
+                            xTempNewValue += static_cast<FieldType>(coeff * xDeriv[a]);
+                            yTempNewValue += static_cast<FieldType>(coeff * xBasis[a]);
                         } // previous[0]+a in range
-                        else
-                        {
-                            xTempNewValue +=  paddingValue * xDeriv[a];
-                            yTempNewValue +=  paddingValue * xBasis[a];
+                        else {
+                            xTempNewValue += static_cast<FieldType>(paddingValue * xDeriv[a]);
+                            yTempNewValue += static_cast<FieldType>(paddingValue * xBasis[a]);
                         }
                         xyPointer++;
                     } // a
-                    grad[0] += xTempNewValue * yBasis[b];
-                    grad[1] += yTempNewValue * yDeriv[b];
+                    grad[0] += static_cast<FieldType>(xTempNewValue * yBasis[b]);
+                    grad[1] += static_cast<FieldType>(yTempNewValue * yDeriv[b]);
                 } // Y in range
-                else
-                {
-                    grad[0] += paddingValue * yBasis[b];
-                    grad[1] += paddingValue * yDeriv[b];
+                else {
+                    grad[0] += static_cast<FieldType>(paddingValue * yBasis[b]);
+                    grad[1] += static_cast<FieldType>(paddingValue * yDeriv[b]);
                 }
             } // b
 
-            grad[0]=grad[0]==grad[0]?grad[0]:0;
-            grad[1]=grad[1]==grad[1]?grad[1]:0;
+            grad[0] = grad[0] == grad[0] ? grad[0] : 0;
+            grad[1] = grad[1] == grad[1] ? grad[1] : 0;
         } // outside of the mask
 
-        warpedGradientPtrX[index] = (GradientTYPE)grad[0];
-        warpedGradientPtrY[index] = (GradientTYPE)grad[1];
+        warpedGradientPtrX[index] = static_cast<GradientType>(grad[0]);
+        warpedGradientPtrY[index] = static_cast<GradientType>(grad[1]);
     }
 }
 /* *************************************************************** */
-template <class FieldTYPE, class FloatingTYPE, class GradientTYPE>
-void reg_getImageGradient3(nifti_image *floatingImage,
-                           nifti_image *warpedGradient,
-                           nifti_image *deformationField,
-                           int *mask,
-                           int interp,
-                           float paddingValue,
-                           int active_timepoint,
-                           int *dtIndicies,
-                           mat33 *jacMat,
-                           nifti_image *warpedImage = nullptr
-        )
-{
+template <class FieldType, class FloatingType, class GradientType>
+void reg_getImageGradient(nifti_image *floatingImage,
+                          nifti_image *warpedGradient,
+                          const nifti_image *deformationField,
+                          const int *mask,
+                          const int& interp,
+                          const float& paddingValue,
+                          const int& activeTimepoint,
+                          const int *dtIndicies,
+                          const mat33 *jacMat,
+                          const nifti_image *warpedImage = nullptr) {
     // The floating image data is copied in case one deal with DTI
-    void *originalFloatingData=nullptr;
+    void *originalFloatingData = nullptr;
     // The DTI are logged
-    reg_dti_resampling_preprocessing<FloatingTYPE>(floatingImage,
-                                                   &originalFloatingData,
-                                                   dtIndicies);
+    reg_dti_resampling_preprocessing<FloatingType>(floatingImage, &originalFloatingData, dtIndicies);
     /* The deformation field contains the position in the real world */
-    if(interp==3)
-    {
-        if(deformationField->nu>2)
-        {
-            CubicSplineImageGradient3D
-                    <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
-                                                          deformationField,
-                                                          warpedGradient,
-                                                          mask,
-                                                          paddingValue,
-                                                          active_timepoint);
-        }
-        else
-        {
-            CubicSplineImageGradient2D
-                    <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
-                                                          deformationField,
-                                                          warpedGradient,
-                                                          mask,
-                                                          paddingValue,
-                                                          active_timepoint);
-        }
-    }
-    else  // trilinear interpolation [ by default ]
-    {
-        if(deformationField->nu>2)
-        {
-            TrilinearImageGradient
-                    <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
-                                                          deformationField,
-                                                          warpedGradient,
-                                                          mask,
-                                                          paddingValue,
-                                                          active_timepoint);
+    if (interp == 3) {
+        if (deformationField->nu > 2) {
+            CubicSplineImageGradient3D<FloatingType, GradientType, FieldType>(floatingImage,
+                                                                              deformationField,
+                                                                              warpedGradient,
+                                                                              mask,
+                                                                              paddingValue,
+                                                                              activeTimepoint);
+        } else {
+            CubicSplineImageGradient2D<FloatingType, GradientType, FieldType>(floatingImage,
+                                                                              deformationField,
+                                                                              warpedGradient,
+                                                                              mask,
+                                                                              paddingValue,
+                                                                              activeTimepoint);
         }
-        else
-        {
-            BilinearImageGradient
-                    <FloatingTYPE,GradientTYPE,FieldTYPE>(floatingImage,
-                                                          deformationField,
-                                                          warpedGradient,
-                                                          mask,
-                                                          paddingValue,
-                                                          active_timepoint);
+    } else { // trilinear interpolation [ by default ]
+        if (deformationField->nu > 2) {
+            TrilinearImageGradient<FloatingType, GradientType, FieldType>(floatingImage,
+                                                                          deformationField,
+                                                                          warpedGradient,
+                                                                          mask,
+                                                                          paddingValue,
+                                                                          activeTimepoint);
+        } else {
+            BilinearImageGradient<FloatingType, GradientType, FieldType>(floatingImage,
+                                                                         deformationField,
+                                                                         warpedGradient,
+                                                                         mask,
+                                                                         paddingValue,
+                                                                         activeTimepoint);
         }
     }
     // The temporary logged floating array is deleted
-    if(originalFloatingData!=nullptr)
-    {
+    if (originalFloatingData != nullptr) {
         free(floatingImage->data);
-        floatingImage->data=originalFloatingData;
-        originalFloatingData=nullptr;
+        floatingImage->data = originalFloatingData;
+        originalFloatingData = nullptr;
     }
     // The interpolated tensors are reoriented and exponentiated
-    reg_dti_resampling_postprocessing<FloatingTYPE>(warpedGradient,
-                                                    mask,
-                                                    jacMat,
-                                                    dtIndicies,
-                                                    warpedImage
-                                                    );
+    reg_dti_resampling_postprocessing<FloatingType>(warpedGradient, mask, jacMat, dtIndicies, warpedImage);
 }
 /* *************************************************************** */
-template <class FieldTYPE, class FloatingTYPE>
-void reg_getImageGradient2(nifti_image *floatingImage,
-                           nifti_image *warpedGradient,
-                           nifti_image *deformationField,
-                           int *mask,
-                           int interp,
-                           float paddingValue,
-                           int active_timepoint,
-                           int *dtIndicies,
-                           mat33 *jacMat,
-                           nifti_image *warpedImage
-                           )
-{
-    switch(warpedGradient->datatype)
-    {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getImageGradient3<FieldTYPE,FloatingTYPE,float>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getImageGradient3<FieldTYPE,FloatingTYPE,double>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    default:
-        reg_print_fct_error("reg_getImageGradient2");
-        reg_print_msg_error("The warped image data type is not supported");
+void reg_getImageGradient(nifti_image *floatingImage,
+                          nifti_image *warpedGradient,
+                          const nifti_image *deformationField,
+                          const int *mask,
+                          const int& interp,
+                          const float& paddingValue,
+                          const int& activeTimepoint,
+                          const bool *dtiTimepoint,
+                          const mat33 *jacMat,
+                          const nifti_image *warpedImage) {
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
+        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_getImageGradient");
+        reg_print_msg_error("The deformation field image is expected to be of type float or double");
         reg_exit();
     }
-}
-/* *************************************************************** */
-template <class FieldTYPE>
-void reg_getImageGradient1(nifti_image *floatingImage,
-                           nifti_image *warpedGradient,
-                           nifti_image *deformationField,
-                           int *mask,
-                           int interp,
-                           float paddingValue,
-                           int active_timepoint,
-                           int *dtIndicies,
-                           mat33 *jacMat,
-                           nifti_image *warpedImage
-                           )
-{
-    switch(floatingImage->datatype)
-    {
-    case NIFTI_TYPE_UINT8:
-        reg_getImageGradient2<FieldTYPE,unsigned char>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_INT8:
-        reg_getImageGradient2<FieldTYPE,char>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_UINT16:
-        reg_getImageGradient2<FieldTYPE,unsigned short>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_INT16:
-        reg_getImageGradient2<FieldTYPE,short>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_UINT32:
-        reg_getImageGradient2<FieldTYPE,unsigned>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_INT32:
-        reg_getImageGradient2<FieldTYPE,int>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_FLOAT32:
-        reg_getImageGradient2<FieldTYPE,float>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getImageGradient2<FieldTYPE,double>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    default:
-        reg_print_fct_error("reg_getImageGradient1");
-        reg_print_msg_error("Unsupported floating image datatype");
+    if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 &&
+        warpedGradient->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_getImageGradient");
+        reg_print_msg_error("The warped gradient image is expected to be of type float or double");
         reg_exit();
     }
-}
-/* *************************************************************** */
-void reg_getImageGradient(nifti_image *floatingImage,
-                          nifti_image *warpedGradient,
-                          nifti_image *deformationField,
-                          int *mask,
-                          int interp,
-                          float paddingValue,
-                          int active_timepoint,
-                          bool *dti_timepoint,
-                          mat33 *jacMat,
-                          nifti_image *warpedImage
-                          )
-{
+
     // a mask array is created if no mask is specified
-    bool MrPropreRule=false;
-    if(mask==nullptr)
-    {
+    bool MrPropreRule = false;
+    if (mask == nullptr) {
         // voxels in the backgreg_round are set to -1 so 0 will do the job here
-        mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
-        MrPropreRule=true;
+        mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int));
+        MrPropreRule = true;
     }
 
     // Define the DTI indices if required
     int dtIndicies[6];
-    for(int i=0; i<6; ++i) dtIndicies[i]=-1;
-    if(dti_timepoint!=nullptr)
-    {
-
-        if(jacMat==nullptr)
-        {
+    for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
+    if (dtiTimepoint != nullptr) {
+        if (jacMat == nullptr) {
             reg_print_fct_error("reg_getImageGradient");
             reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
             reg_exit();
         }
-        int j=0;
-        for(int i=0; i<floatingImage->nt; ++i)
-        {
-            if(dti_timepoint[i])
-                dtIndicies[j++]=i;
+        int j = 0;
+        for (int i = 0; i < floatingImage->nt; ++i) {
+            if (dtiTimepoint[i])
+                dtIndicies[j++] = i;
         }
-        if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3))
-        {
+        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) {
             reg_print_fct_error("reg_getImageGradient");
             reg_print_msg_error("DTI resampling: Unexpected number of DTI components");
             reg_exit();
         }
     }
 
-    switch(deformationField->datatype)
-    {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getImageGradient1<float>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getImageGradient1<double>
-                (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage);
-        break;
-    default:
-        reg_print_fct_error("reg_getImageGradient");
-        reg_print_msg_error("Unsupported deformation field image datatype");
-        reg_exit();
-        break;
-    }
-    if(MrPropreRule) free(mask);
+    std::visit([&](auto&& defFieldDataType, auto&& floImgDataType, auto&& warpedGradDataType) {
+        using DefFieldDataType = std::decay_t<decltype(defFieldDataType)>;
+        using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+        using WarpedGradDataType = std::decay_t<decltype(warpedGradDataType)>;
+        reg_getImageGradient<DefFieldDataType, FloImgDataType, WarpedGradDataType>(floatingImage,
+                                                                                   warpedGradient,
+                                                                                   deformationField,
+                                                                                   mask,
+                                                                                   interp,
+                                                                                   paddingValue,
+                                                                                   activeTimepoint,
+                                                                                   dtIndicies,
+                                                                                   jacMat,
+                                                                                   warpedImage);
+    }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage), NiftiImage::getFloatingDataType(warpedGradient));
+
+    if (MrPropreRule)
+        free(const_cast<int*>(mask));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
-void reg_getImageGradient_symDiff_core(nifti_image *img,
-                                       nifti_image *gradImg,
-                                       int *mask,
-                                       float padding_value,
-                                       int timepoint)
-{
-    const size_t voxelNumber = CalcVoxelNumber(*img);
+void reg_getImageGradient_symDiff(const nifti_image *img,
+                                  nifti_image *gradImg,
+                                  const int *mask,
+                                  const float& paddingValue,
+                                  const int& timepoint) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
 
     int dimImg = img->nz > 1 ? 3 : 2;
     int x, y, z;
 
-    DataType *imgPtr = static_cast<DataType *>(img->data);
-    DataType *currentImgPtr = &imgPtr[timepoint*voxelNumber];
+    const DataType *imgPtr = static_cast<DataType*>(img->data);
+    const DataType *currentImgPtr = &imgPtr[timepoint * voxelNumber];
 
-    DataType *gradPtrX = static_cast<DataType *>(gradImg->data);
+    DataType *gradPtrX = static_cast<DataType*>(gradImg->data);
     DataType *gradPtrY = &gradPtrX[voxelNumber];
     DataType *gradPtrZ = nullptr;
-    if(dimImg==3)
+    if (dimImg == 3)
         gradPtrZ = &gradPtrY[voxelNumber];
 
     DataType valX, valY, valZ, pre, post;
@@ -3402,159 +2700,150 @@ void reg_getImageGradient_symDiff_core(nifti_image *img,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(img, currentImgPtr, mask, \
-    gradPtrX, gradPtrY, gradPtrZ, padding_value) \
-    private(x, y, z, pre, post, valX, valY, valZ)
+    gradPtrX, gradPtrY, gradPtrZ, paddingValue) \
+    private(x, y, pre, post, valX, valY, valZ)
 #endif
-    for(z=0; z<img->nz; ++z){
-        size_t voxIndex=z*img->nx*img->ny;
-        for(y=0; y<img->ny; ++y){
-            for(x=0; x<img->nx; ++x){
+    for (z = 0; z < img->nz; ++z) {
+        size_t voxIndex = z * img->nx * img->ny;
+        for (y = 0; y < img->ny; ++y) {
+            for (x = 0; x < img->nx; ++x) {
                 valX = valY = valZ = 0;
-                if(mask[voxIndex]>-1){
-
-                    pre = post = padding_value;
-                    if(x<img->nx-1) post = currentImgPtr[voxIndex+1];
-                    if(x>0) pre = currentImgPtr[voxIndex-1];
-                    valX =  (post - pre) / 2.f;
-
-                    pre = post = padding_value;
-                    if(y<img->ny-1) post = currentImgPtr[voxIndex+img->nx];
-                    if(y>0) pre = currentImgPtr[voxIndex-img->nx];
-                    valY =  (post - pre) / 2.f;
-
-                    if(gradPtrZ!=nullptr){
-                        pre = post = padding_value;
-                        if(z<img->nz-1) post = currentImgPtr[voxIndex+img->nx*img->ny];
-                        if(z>0) pre = currentImgPtr[voxIndex-img->nx*img->ny];
-                        valZ =  (post - pre) / 2.f;
+                if (mask[voxIndex] > -1) {
+
+                    pre = post = paddingValue;
+                    if (x < img->nx - 1) post = currentImgPtr[voxIndex + 1];
+                    if (x > 0) pre = currentImgPtr[voxIndex - 1];
+                    valX = (post - pre) / 2.f;
+
+                    pre = post = paddingValue;
+                    if (y < img->ny - 1) post = currentImgPtr[voxIndex + img->nx];
+                    if (y > 0) pre = currentImgPtr[voxIndex - img->nx];
+                    valY = (post - pre) / 2.f;
+
+                    if (gradPtrZ != nullptr) {
+                        pre = post = paddingValue;
+                        if (z < img->nz - 1) post = currentImgPtr[voxIndex + img->nx * img->ny];
+                        if (z > 0) pre = currentImgPtr[voxIndex - img->nx * img->ny];
+                        valZ = (post - pre) / 2.f;
                     }
                 }
-                gradPtrX[voxIndex] = valX==valX?valX:0;
-                gradPtrY[voxIndex] = valY==valY?valY:0;
-                if(gradPtrZ!=nullptr)
-                    gradPtrZ[voxIndex] = valZ==valZ?valZ:0;
+                gradPtrX[voxIndex] = valX == valX ? valX : 0;
+                gradPtrY[voxIndex] = valY == valY ? valY : 0;
+                if (gradPtrZ != nullptr)
+                    gradPtrZ[voxIndex] = valZ == valZ ? valZ : 0;
                 ++voxIndex;
             } // x
         } // y
     } // z
 }
 /* *************************************************************** */
-void reg_getImageGradient_symDiff(nifti_image *img,
+void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
-                                  int *mask,
-                                  float padding_value,
-                                  int timepoint)
-{
-    if(img->datatype != gradImg->datatype){
+                                  const int *mask,
+                                  const float& paddingValue,
+                                  const int& timepoint) {
+    if (img->datatype != gradImg->datatype) {
         reg_print_fct_error("reg_getImageGradient_symDiff");
-        reg_print_msg_error("Input images are expected to be of the same type")
-                reg_exit();
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
     }
-    switch(img->datatype){
-    case NIFTI_TYPE_FLOAT32: reg_getImageGradient_symDiff_core<float>(
-                    img, gradImg, mask, padding_value, timepoint);
-        break;
-    case NIFTI_TYPE_FLOAT64: reg_getImageGradient_symDiff_core<float>(
-                    img, gradImg, mask, padding_value, timepoint);
-        break;
-    default:
+    if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) {
         reg_print_fct_error("reg_getImageGradient_symDiff");
-        reg_print_msg_error("Input images are expected to be of floating precision type")
-                reg_exit();
+        reg_print_msg_error("Input images are expected to be of floating precision type");
+        reg_exit();
     }
+
+    std::visit([&](auto&& imgDataType) {
+        using ImgDataType = std::decay_t<decltype(imgDataType)>;
+        reg_getImageGradient_symDiff<ImgDataType>(img, gradImg, mask, paddingValue, timepoint);
+    }, NiftiImage::getFloatingDataType(img));
 }
 /* *************************************************************** */
-/* *************************************************************** */
-nifti_image *reg_makeIsotropic(nifti_image *img,
-                               int inter)
-{
+nifti_image* reg_makeIsotropic(nifti_image *img, int inter) {
     // Get the smallest voxel size
-    float smallestPixDim=img->pixdim[1];
-    for(size_t i=2; i<4; ++i)
-        if(i<static_cast<size_t>(img->dim[0]+2))
-            smallestPixDim=img->pixdim[i]<smallestPixDim?img->pixdim[i]:smallestPixDim;
+    float smallestPixDim = img->pixdim[1];
+    for (size_t i = 2; i < 4; ++i)
+        if (i < static_cast<size_t>(img->dim[0] + 2))
+            smallestPixDim = img->pixdim[i] < smallestPixDim ? img->pixdim[i] : smallestPixDim;
     // Define the size of the new image
     int newDim[8];
-    for(size_t i=0; i<8; ++i) newDim[i]=img->dim[i];
-    for(size_t i=1; i<4; ++i)
-    {
-        if(i<static_cast<size_t>(img->dim[0]+1))
-            newDim[i]=(int)ceilf(img->dim[i]*img->pixdim[i]/smallestPixDim);
+    for (size_t i = 0; i < 8; ++i) newDim[i] = img->dim[i];
+    for (size_t i = 1; i < 4; ++i) {
+        if (i < static_cast<size_t>(img->dim[0] + 1))
+            newDim[i] = (int)ceilf(img->dim[i] * img->pixdim[i] / smallestPixDim);
     }
     // Create the new image
-    nifti_image *newImg=nifti_make_new_nim(newDim,img->datatype,true);
-    newImg->pixdim[1]=newImg->dx=smallestPixDim;
-    newImg->pixdim[2]=newImg->dy=smallestPixDim;
-    newImg->pixdim[3]=newImg->dz=smallestPixDim;
-    newImg->qform_code=img->qform_code;
-    newImg->sform_code=img->sform_code;
+    nifti_image *newImg = nifti_make_new_nim(newDim, img->datatype, true);
+    newImg->pixdim[1] = newImg->dx = smallestPixDim;
+    newImg->pixdim[2] = newImg->dy = smallestPixDim;
+    newImg->pixdim[3] = newImg->dz = smallestPixDim;
+    newImg->qform_code = img->qform_code;
+    newImg->sform_code = img->sform_code;
     // Update the qform matrix
-    newImg->qfac=img->qfac;
-    newImg->quatern_b=img->quatern_b;
-    newImg->quatern_c=img->quatern_c;
-    newImg->quatern_d=img->quatern_d;
-    newImg->qoffset_x=img->qoffset_x+smallestPixDim/2.f-img->dx/2.f;
-    newImg->qoffset_y=img->qoffset_y+smallestPixDim/2.f-img->dy/2.f;
-    newImg->qoffset_z=img->qoffset_z+smallestPixDim/2.f-img->dz/2.f;
-    newImg->qto_xyz=nifti_quatern_to_mat44(newImg->quatern_b,
-                                           newImg->quatern_c,
-                                           newImg->quatern_d,
-                                           newImg->qoffset_x,
-                                           newImg->qoffset_y,
-                                           newImg->qoffset_z,
-                                           smallestPixDim,
-                                           smallestPixDim,
-                                           smallestPixDim,
-                                           newImg->qfac);
-    newImg->qto_ijk=nifti_mat44_inverse(newImg->qto_xyz);
-    if(newImg->sform_code>0)
-    {
+    newImg->qfac = img->qfac;
+    newImg->quatern_b = img->quatern_b;
+    newImg->quatern_c = img->quatern_c;
+    newImg->quatern_d = img->quatern_d;
+    newImg->qoffset_x = img->qoffset_x + smallestPixDim / 2.f - img->dx / 2.f;
+    newImg->qoffset_y = img->qoffset_y + smallestPixDim / 2.f - img->dy / 2.f;
+    newImg->qoffset_z = img->qoffset_z + smallestPixDim / 2.f - img->dz / 2.f;
+    newImg->qto_xyz = nifti_quatern_to_mat44(newImg->quatern_b,
+                                             newImg->quatern_c,
+                                             newImg->quatern_d,
+                                             newImg->qoffset_x,
+                                             newImg->qoffset_y,
+                                             newImg->qoffset_z,
+                                             smallestPixDim,
+                                             smallestPixDim,
+                                             smallestPixDim,
+                                             newImg->qfac);
+    newImg->qto_ijk = nifti_mat44_inverse(newImg->qto_xyz);
+    if (newImg->sform_code > 0) {
         // Compute the new sform
         float scalingRatio[3];
-        scalingRatio[0]= newImg->dx / img->dx;
-        scalingRatio[1]= newImg->dy / img->dy;
-        scalingRatio[2]= newImg->dz / img->dz;
-        newImg->sto_xyz.m[0][0]=img->sto_xyz.m[0][0] * scalingRatio[0];
-        newImg->sto_xyz.m[1][0]=img->sto_xyz.m[1][0] * scalingRatio[0];
-        newImg->sto_xyz.m[2][0]=img->sto_xyz.m[2][0] * scalingRatio[0];
-        newImg->sto_xyz.m[3][0]=img->sto_xyz.m[3][0];
-        newImg->sto_xyz.m[0][1]=img->sto_xyz.m[0][1] * scalingRatio[1];
-        newImg->sto_xyz.m[1][1]=img->sto_xyz.m[1][1] * scalingRatio[1];
-        newImg->sto_xyz.m[2][1]=img->sto_xyz.m[2][1] * scalingRatio[1];
-        newImg->sto_xyz.m[3][1]=img->sto_xyz.m[3][1];
-        newImg->sto_xyz.m[0][2]=img->sto_xyz.m[0][2] * scalingRatio[2];
-        newImg->sto_xyz.m[1][2]=img->sto_xyz.m[1][2] * scalingRatio[2];
-        newImg->sto_xyz.m[2][2]=img->sto_xyz.m[2][2] * scalingRatio[2];
-        newImg->sto_xyz.m[3][2]=img->sto_xyz.m[3][2];
-        newImg->sto_xyz.m[0][3]=img->sto_xyz.m[0][3]+smallestPixDim/2.f-img->dx/2.f;
-        newImg->sto_xyz.m[1][3]=img->sto_xyz.m[1][3]+smallestPixDim/2.f-img->dy/2.f;
-        newImg->sto_xyz.m[2][3]=img->sto_xyz.m[2][3]+smallestPixDim/2.f-img->dz/2.f;
-        newImg->sto_xyz.m[3][3]=img->sto_xyz.m[3][3];
-        newImg->sto_ijk=nifti_mat44_inverse(newImg->sto_xyz);
+        scalingRatio[0] = newImg->dx / img->dx;
+        scalingRatio[1] = newImg->dy / img->dy;
+        scalingRatio[2] = newImg->dz / img->dz;
+        newImg->sto_xyz.m[0][0] = img->sto_xyz.m[0][0] * scalingRatio[0];
+        newImg->sto_xyz.m[1][0] = img->sto_xyz.m[1][0] * scalingRatio[0];
+        newImg->sto_xyz.m[2][0] = img->sto_xyz.m[2][0] * scalingRatio[0];
+        newImg->sto_xyz.m[3][0] = img->sto_xyz.m[3][0];
+        newImg->sto_xyz.m[0][1] = img->sto_xyz.m[0][1] * scalingRatio[1];
+        newImg->sto_xyz.m[1][1] = img->sto_xyz.m[1][1] * scalingRatio[1];
+        newImg->sto_xyz.m[2][1] = img->sto_xyz.m[2][1] * scalingRatio[1];
+        newImg->sto_xyz.m[3][1] = img->sto_xyz.m[3][1];
+        newImg->sto_xyz.m[0][2] = img->sto_xyz.m[0][2] * scalingRatio[2];
+        newImg->sto_xyz.m[1][2] = img->sto_xyz.m[1][2] * scalingRatio[2];
+        newImg->sto_xyz.m[2][2] = img->sto_xyz.m[2][2] * scalingRatio[2];
+        newImg->sto_xyz.m[3][2] = img->sto_xyz.m[3][2];
+        newImg->sto_xyz.m[0][3] = img->sto_xyz.m[0][3] + smallestPixDim / 2.f - img->dx / 2.f;
+        newImg->sto_xyz.m[1][3] = img->sto_xyz.m[1][3] + smallestPixDim / 2.f - img->dy / 2.f;
+        newImg->sto_xyz.m[2][3] = img->sto_xyz.m[2][3] + smallestPixDim / 2.f - img->dz / 2.f;
+        newImg->sto_xyz.m[3][3] = img->sto_xyz.m[3][3];
+        newImg->sto_ijk = nifti_mat44_inverse(newImg->sto_xyz);
     }
     reg_checkAndCorrectDimension(newImg);
     // Create a deformation field
-    nifti_image *def=nifti_copy_nim_info(newImg);
-    def->dim[0]=def->ndim=5;
-    def->dim[4]=def->nt=1;
-    def->pixdim[4]=def->dt=1.0;
-    def->dim[5]=def->nu=newImg->nz>1?3:2;
-    def->pixdim[5]=def->du=1.0;
-    def->dim[6]=def->nv=1;
-    def->pixdim[6]=def->dv=1.0;
-    def->dim[7]=def->nw=1;
-    def->pixdim[7]=def->dw=1.0;
-    def->nvox = CalcVoxelNumber(*def, def->ndim);
+    nifti_image *def = nifti_copy_nim_info(newImg);
+    def->dim[0] = def->ndim = 5;
+    def->dim[4] = def->nt = 1;
+    def->pixdim[4] = def->dt = 1.0;
+    def->dim[5] = def->nu = newImg->nz > 1 ? 3 : 2;
+    def->pixdim[5] = def->du = 1.0;
+    def->dim[6] = def->nv = 1;
+    def->pixdim[6] = def->dv = 1.0;
+    def->dim[7] = def->nw = 1;
+    def->pixdim[7] = def->dw = 1.0;
+    def->nvox = NiftiImage::calcVoxelNumber(def, def->ndim);
     def->nbyper = sizeof(float);
     def->datatype = NIFTI_TYPE_FLOAT32;
-    def->data = calloc(def->nvox,def->nbyper);
+    def->data = calloc(def->nvox, def->nbyper);
     // Fill the deformation field with an identity transformation
     reg_getDeformationFromDisplacement(def);
     // resample the original image into the space of the new image
-    reg_resampleImage(img,newImg,def,nullptr,inter,0.f);
-    nifti_set_filenames(newImg,"tempIsotropicImage",0,0);
+    reg_resampleImage(img, newImg, def, nullptr, inter, 0.f);
+    nifti_set_filenames(newImg, "tempIsotropicImage", 0, 0);
     nifti_image_free(def);
     return newImg;
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index 3705e810..e4e88ac8 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -36,47 +36,49 @@
 extern "C++"
 void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
-                       nifti_image *deformationField,
-                       int *mask,
-                       int interp,
-                       float paddingValue,
-                       bool *dti_timepoint = nullptr,
-                       mat33 * jacMat = nullptr);
+                       const nifti_image *deformationField,
+                       const int *mask,
+                       const int& interp,
+                       const float& paddingValue,
+                       const bool *dtiTimepoint = nullptr,
+                       const mat33 *jacMat = nullptr);
+/* *************************************************************** */
 extern "C++"
-void reg_resampleImage_PSF(nifti_image *floatingImage,
+void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            nifti_image *warpedImage,
-                           nifti_image *deformationField,
-                           int *mask,
-                           int interp,
-                           float paddingValue,
-                           mat33 * jacMat,
-                           char algorithm);
-
-
+                           const nifti_image *deformationField,
+                           const int *mask,
+                           const int& interp,
+                           const float& paddingValue,
+                           const mat33 *jacMat,
+                           const char& algorithm);
+/* *************************************************************** */
 extern "C++"
-void reg_resampleGradient(nifti_image *gradientImage,
+void reg_resampleGradient(const nifti_image *gradientImage,
                           nifti_image *warpedGradient,
-                          nifti_image *deformationField,
-                          int interp,
-                          float paddingValue);
-
+                          const nifti_image *deformationField,
+                          const int& interp,
+                          const float& paddingValue);
+/* *************************************************************** */
 extern "C++"
 void reg_getImageGradient(nifti_image *floatingImage,
                           nifti_image *warpedGradient,
-                          nifti_image *deformationField,
-                          int *mask,
-                          int interp,
-                          float paddingValue,
-                          int active_timepoint,
-                          bool *dti_timepoint = nullptr,
-                          mat33 *jacMat = nullptr,
-                          nifti_image *warpedImage = nullptr);
-
+                          const nifti_image *deformationField,
+                          const int *mask,
+                          const int& interp,
+                          const float& paddingValue,
+                          const int& activeTimepoint,
+                          const bool *dtiTimepoint = nullptr,
+                          const mat33 *jacMat = nullptr,
+                          const nifti_image *warpedImage = nullptr);
+/* *************************************************************** */
 extern "C++"
-void reg_getImageGradient_symDiff(nifti_image* inputImg,
-                                  nifti_image* gradImg,
-                                  int *mask,
-                                  float padding_value,
-                                  int timepoint);
+void reg_getImageGradient_symDiff(const nifti_image *img,
+                                  nifti_image *gradImg,
+                                  const int *mask,
+                                  const float& paddingValue,
+                                  const int& timepoint);
+/* *************************************************************** */
 extern "C++"
-nifti_image *reg_makeIsotropic(nifti_image *, int);
+nifti_image* reg_makeIsotropic(nifti_image*, int);
+/* *************************************************************** */

From efb633151a3c4767dc96d529d0f16e5c23aa6876 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 15:51:39 +0100
Subject: [PATCH 163/314] Fix memory leaks in Content

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/Content.h          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1473a88f..e01062f1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-281
+282
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 8883acba..7beb9e4a 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -57,8 +57,8 @@ class Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; }
+    virtual void SetDeformationField(nifti_image *deformationFieldIn) { DeallocateDeformationField(); deformationField = deformationFieldIn; }
     virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; }
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; }
-    virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; }
+    virtual void SetWarped(nifti_image *warpedIn) { DeallocateWarped(); warped = warpedIn; }
 };

From 4006362ab4e7b6a79cf93e5c501882247a7df1cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 16:37:06 +0100
Subject: [PATCH 164/314] Refactor reg_nmi

---
 niftyreg_build_version.txt |   2 +-
 reg-lib/cpu/_reg_mind.cpp  |   8 +-
 reg-lib/cpu/_reg_nmi.cpp   | 483 ++++++++++++++-----------------------
 reg-lib/cpu/_reg_nmi.h     |  71 ++----
 reg-lib/cpu/_reg_ssd.cpp   |   2 +-
 reg-lib/cpu/_reg_tools.h   |   1 +
 6 files changed, 208 insertions(+), 359 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e01062f1..6d26270b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-282
+283
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 0335843b..59429ebb 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -32,7 +32,7 @@ void ShiftImage(nifti_image* inputImgPtr,
 #pragma omp parallel for default(none) \
     shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \
     maskPtr, tx, ty, tz) \
-    private(x, y, z, old_x, old_y, old_z, shiftedIndex, \
+    private(x, y, old_x, old_y, old_z, shiftedIndex, \
     currentIndex)
 #endif
     for (z = 0; z < shiftedImgPtr->nz; z++) {
@@ -130,10 +130,9 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \
     MINDImgDataPtr) \
-    private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
+    private(meanValue, max_desc, descValue, mindIndex)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
-
         if (maskPtr[voxelIndex] > -1) {
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
@@ -281,10 +280,9 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \
     MINDSSCImgDataPtr) \
-    private(voxelIndex, meanValue, max_desc, descValue, mindIndex)
+    private(meanValue, max_desc, descValue, mindIndex)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
-
         if (maskPtr[voxelIndex] > -1) {
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 40e69328..23288d73 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -14,12 +14,12 @@
 
 /* *************************************************************** */
 reg_nmi::reg_nmi(): reg_measure() {
-    this->forwardJointHistogramPro = nullptr;
-    this->forwardJointHistogramLog = nullptr;
-    this->forwardEntropyValues = nullptr;
-    this->backwardJointHistogramPro = nullptr;
-    this->backwardJointHistogramLog = nullptr;
-    this->backwardEntropyValues = nullptr;
+    this->jointHistogramPro = nullptr;
+    this->jointHistogramLog = nullptr;
+    this->entropyValues = nullptr;
+    this->jointHistogramProBw = nullptr;
+    this->jointHistogramLogBw = nullptr;
+    this->entropyValuesBw = nullptr;
 
     for (int i = 0; i < 255; ++i) {
         this->referenceBinNumber[i] = 68;
@@ -40,62 +40,62 @@ reg_nmi::~reg_nmi() {
 void reg_nmi::DeallocateHistogram() {
     int timepoint = this->referenceTimePoint;
     // Free the joint histograms and the entropy arrays
-    if (this->forwardJointHistogramPro != nullptr) {
+    if (this->jointHistogramPro != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->forwardJointHistogramPro[i] != nullptr)
-                free(this->forwardJointHistogramPro[i]);
-            this->forwardJointHistogramPro[i] = nullptr;
+            if (this->jointHistogramPro[i] != nullptr)
+                free(this->jointHistogramPro[i]);
+            this->jointHistogramPro[i] = nullptr;
         }
-        free(this->forwardJointHistogramPro);
+        free(this->jointHistogramPro);
     }
-    this->forwardJointHistogramPro = nullptr;
-    if (this->backwardJointHistogramPro != nullptr) {
+    this->jointHistogramPro = nullptr;
+    if (this->jointHistogramProBw != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->backwardJointHistogramPro[i] != nullptr)
-                free(this->backwardJointHistogramPro[i]);
-            this->backwardJointHistogramPro[i] = nullptr;
+            if (this->jointHistogramProBw[i] != nullptr)
+                free(this->jointHistogramProBw[i]);
+            this->jointHistogramProBw[i] = nullptr;
         }
-        free(this->backwardJointHistogramPro);
+        free(this->jointHistogramProBw);
     }
-    this->backwardJointHistogramPro = nullptr;
+    this->jointHistogramProBw = nullptr;
 
-    if (this->forwardJointHistogramLog != nullptr) {
+    if (this->jointHistogramLog != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->forwardJointHistogramLog[i] != nullptr)
-                free(this->forwardJointHistogramLog[i]);
-            this->forwardJointHistogramLog[i] = nullptr;
+            if (this->jointHistogramLog[i] != nullptr)
+                free(this->jointHistogramLog[i]);
+            this->jointHistogramLog[i] = nullptr;
         }
-        free(this->forwardJointHistogramLog);
+        free(this->jointHistogramLog);
     }
-    this->forwardJointHistogramLog = nullptr;
-    if (this->backwardJointHistogramLog != nullptr) {
+    this->jointHistogramLog = nullptr;
+    if (this->jointHistogramLogBw != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->backwardJointHistogramLog[i] != nullptr)
-                free(this->backwardJointHistogramLog[i]);
-            this->backwardJointHistogramLog[i] = nullptr;
+            if (this->jointHistogramLogBw[i] != nullptr)
+                free(this->jointHistogramLogBw[i]);
+            this->jointHistogramLogBw[i] = nullptr;
         }
-        free(this->backwardJointHistogramLog);
+        free(this->jointHistogramLogBw);
     }
-    this->backwardJointHistogramLog = nullptr;
+    this->jointHistogramLogBw = nullptr;
 
-    if (this->forwardEntropyValues != nullptr) {
+    if (this->entropyValues != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->forwardEntropyValues[i] != nullptr)
-                free(this->forwardEntropyValues[i]);
-            this->forwardEntropyValues[i] = nullptr;
+            if (this->entropyValues[i] != nullptr)
+                free(this->entropyValues[i]);
+            this->entropyValues[i] = nullptr;
         }
-        free(this->forwardEntropyValues);
+        free(this->entropyValues);
     }
-    this->forwardEntropyValues = nullptr;
-    if (this->backwardEntropyValues != nullptr) {
+    this->entropyValues = nullptr;
+    if (this->entropyValuesBw != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
-            if (this->backwardEntropyValues[i] != nullptr)
-                free(this->backwardEntropyValues[i]);
-            this->backwardEntropyValues[i] = nullptr;
+            if (this->entropyValuesBw[i] != nullptr)
+                free(this->entropyValuesBw[i]);
+            this->entropyValuesBw[i] = nullptr;
         }
-        free(this->backwardEntropyValues);
+        free(this->entropyValuesBw);
     }
-    this->backwardEntropyValues = nullptr;
+    this->entropyValuesBw = nullptr;
 #ifndef NDEBUG
     reg_print_msg_debug("reg_nmi::DeallocateHistogram called");
 #endif
@@ -127,10 +127,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
 
     // Deallocate all allocated arrays
     this->DeallocateHistogram();
-    // Extract the number of time point
-    int timepoint = this->referenceTimePoint;
     // Reference and floating are resampled between 2 and bin-3
-    for (int i = 0; i < timepoint; ++i) {
+    for (int i = 0; i < this->referenceTimePoint; ++i) {
         if (this->timePointWeight[i] > 0) {
             reg_intensityRescale(this->referenceImage,
                                  i,
@@ -143,41 +141,32 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
         }
     }
     // Create the joint histograms
-    this->forwardJointHistogramPro = (double**)malloc(255 * sizeof(double*));
-    this->forwardJointHistogramLog = (double**)malloc(255 * sizeof(double*));
-    this->forwardEntropyValues = (double**)malloc(255 * sizeof(double*));
+    this->jointHistogramPro = (double**)calloc(255, sizeof(double*));
+    this->jointHistogramLog = (double**)calloc(255, sizeof(double*));
+    this->entropyValues = (double**)calloc(255, sizeof(double*));
     if (this->isSymmetric) {
-        this->backwardJointHistogramPro = (double**)malloc(255 * sizeof(double*));
-        this->backwardJointHistogramLog = (double**)malloc(255 * sizeof(double*));
-        this->backwardEntropyValues = (double**)malloc(255 * sizeof(double*));
+        this->jointHistogramProBw = (double**)calloc(255, sizeof(double*));
+        this->jointHistogramLogBw = (double**)calloc(255, sizeof(double*));
+        this->entropyValuesBw = (double**)calloc(255, sizeof(double*));
     }
-    for (int i = 0; i < timepoint; ++i) {
+    for (int i = 0; i < this->referenceTimePoint; ++i) {
         if (this->timePointWeight[i] > 0) {
             // Compute the total number of bin
             this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] +
                 this->referenceBinNumber[i] + this->floatingBinNumber[i];
-            this->forwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
-            this->forwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
-            this->forwardEntropyValues[i] = (double*)calloc(4, sizeof(double));
-            if (this->isSymmetric) {
-                this->backwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
-                this->backwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
-                this->backwardEntropyValues[i] = (double*)calloc(4, sizeof(double));
-            }
-        } else {
-            this->forwardJointHistogramLog[i] = nullptr;
-            this->forwardJointHistogramPro[i] = nullptr;
-            this->forwardEntropyValues[i] = nullptr;
+            this->jointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+            this->jointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+            this->entropyValues[i] = (double*)calloc(4, sizeof(double));
             if (this->isSymmetric) {
-                this->backwardJointHistogramLog[i] = nullptr;
-                this->backwardJointHistogramPro[i] = nullptr;
-                this->backwardEntropyValues[i] = nullptr;
+                this->jointHistogramLogBw[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+                this->jointHistogramProBw[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double));
+                this->entropyValuesBw[i] = (double*)calloc(4, sizeof(double));
             }
         }
     }
 #ifndef NDEBUG
     char text[255];
-    reg_print_msg_debug("reg_nmi::InitialiseMeasure().");
+    reg_print_msg_debug("reg_nmi::InitialiseMeasure()");
     for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
@@ -217,21 +206,21 @@ PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getNMIValue(nifti_image *referenceImage,
-                     nifti_image *warpedImage,
-                     double *timePointWeight,
-                     unsigned short *referenceBinNumber,
-                     unsigned short *floatingBinNumber,
-                     unsigned short *totalBinNumber,
+void reg_getNMIValue(const nifti_image *referenceImage,
+                     const nifti_image *warpedImage,
+                     const double *timePointWeight,
+                     const unsigned short *referenceBinNumber,
+                     const unsigned short *floatingBinNumber,
+                     const unsigned short *totalBinNumber,
                      double **jointHistogramLog,
                      double **jointhistogramPro,
                      double **entropyValues,
-                     int *referenceMask) {
+                     const int *referenceMask) {
     // Create pointers to the image data arrays
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
     // Useful variable
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     // Iterate over all active time points
     for (int t = 0; t < referenceImage->nt; ++t) {
         if (timePointWeight[t] > 0) {
@@ -246,16 +235,15 @@ void reg_getNMIValue(nifti_image *referenceImage,
             // Empty the joint histogram
             memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double));
             // Fill the joint histograms using an approximation
-            DataType *refPtr = &refImagePtr[t * voxelNumber];
-            DataType *warPtr = &warImagePtr[t * voxelNumber];
+            const DataType *refPtr = &refImagePtr[t * voxelNumber];
+            const DataType *warPtr = &warImagePtr[t * voxelNumber];
             for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                 if (referenceMask[voxel] > -1) {
-                    DataType refValue = refPtr[voxel];
-                    DataType warValue = warPtr[voxel];
+                    const DataType& refValue = refPtr[voxel];
+                    const DataType& warValue = warPtr[voxel];
                     if (refValue == refValue && warValue == warValue &&
-                        refValue >= 0 && warValue >= 0 &&
-                        refValue < referenceBinNumber[t] &&
-                        warValue < floatingBinNumber[t]) {
+                        0 <= refValue && refValue < referenceBinNumber[t] &&
+                        0 <= warValue && warValue < floatingBinNumber[t]) {
                         ++jointHistoProPtr[static_cast<int>(refValue) + static_cast<int>(warValue) * referenceBinNumber[t]];
                     }
                 }
@@ -366,105 +354,72 @@ void reg_getNMIValue(nifti_image *referenceImage,
         } // if active time point
     } // iterate over all time point in the reference image
 }
-template void reg_getNMIValue<float>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
-template void reg_getNMIValue<double>(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*);
 /* *************************************************************** */
 double reg_nmi::GetSimilarityMeasureValue() {
     // Check that all the specified image are of the same datatype
-    if (this->warpedImage->datatype != this->referenceImage->datatype) {
+    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) {
         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-        reg_print_msg_error("Both input images are expected to have the same type");
+        reg_print_msg_error("Input images are expected to be of floating precision type");
         reg_exit();
     }
-    switch (this->referenceImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getNMIValue<float>(this->referenceImage,
-                               this->warpedImage,
-                               this->timePointWeight,
-                               this->referenceBinNumber,
-                               this->floatingBinNumber,
-                               this->totalBinNumber,
-                               this->forwardJointHistogramLog,
-                               this->forwardJointHistogramPro,
-                               this->forwardEntropyValues,
-                               this->referenceMask);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getNMIValue<double>(this->referenceImage,
-                                this->warpedImage,
-                                this->timePointWeight,
-                                this->referenceBinNumber,
-                                this->floatingBinNumber,
-                                this->totalBinNumber,
-                                this->forwardJointHistogramLog,
-                                this->forwardJointHistogramPro,
-                                this->forwardEntropyValues,
-                                this->referenceMask);
-        break;
-    default:
+    if (this->warpedImage->datatype != this->referenceImage->datatype) {
         reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-        reg_print_msg_error("Unsupported datatype");
+        reg_print_msg_error("Both input images are expected to have the same type");
         reg_exit();
     }
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        reg_getNMIValue<RefImgDataType>(this->referenceImage,
+                                        this->warpedImage,
+                                        this->timePointWeight,
+                                        this->referenceBinNumber,
+                                        this->floatingBinNumber,
+                                        this->totalBinNumber,
+                                        this->jointHistogramLog,
+                                        this->jointHistogramPro,
+                                        this->entropyValues,
+                                        this->referenceMask);
+    }, NiftiImage::getFloatingDataType(this->referenceImage));
 
     if (this->isSymmetric) {
         // Check that all the specified image are of the same datatype
-        if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
+        if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
             reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Both input images are expected to have the same type");
+            reg_print_msg_error("Input images are expected to be of floating precision type");
             reg_exit();
         }
-        switch (this->floatingImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getNMIValue<float>(this->floatingImage,
-                                   this->warpedImageBw,
-                                   this->timePointWeight,
-                                   this->floatingBinNumber,
-                                   this->referenceBinNumber,
-                                   this->totalBinNumber,
-                                   this->backwardJointHistogramLog,
-                                   this->backwardJointHistogramPro,
-                                   this->backwardEntropyValues,
-                                   this->floatingMask);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getNMIValue<double>(this->floatingImage,
-                                    this->warpedImageBw,
-                                    this->timePointWeight,
-                                    this->floatingBinNumber,
-                                    this->referenceBinNumber,
-                                    this->totalBinNumber,
-                                    this->backwardJointHistogramLog,
-                                    this->backwardJointHistogramPro,
-                                    this->backwardEntropyValues,
-                                    this->floatingMask);
-            break;
-        default:
+        if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
             reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Unsupported datatype");
+            reg_print_msg_error("Both input images are expected to have the same type");
             reg_exit();
         }
+        std::visit([&](auto&& floImgDataType) {
+            using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+            reg_getNMIValue<FloImgDataType>(this->floatingImage,
+                                            this->warpedImageBw,
+                                            this->timePointWeight,
+                                            this->floatingBinNumber,
+                                            this->referenceBinNumber,
+                                            this->totalBinNumber,
+                                            this->jointHistogramLogBw,
+                                            this->jointHistogramProBw,
+                                            this->entropyValuesBw,
+                                            this->floatingMask);
+        }, NiftiImage::getFloatingDataType(this->floatingImage));
     }
 
-    double nmi_value_forward = 0.;
-    double nmi_value_backward = 0.;
+    double nmiFw = 0, nmiBw = 0;
     for (int t = 0; t < this->referenceTimePoint; ++t) {
         if (this->timePointWeight[t] > 0) {
-            nmi_value_forward += timePointWeight[t] *
-                (this->forwardEntropyValues[t][0] +
-                 this->forwardEntropyValues[t][1]) /
-                this->forwardEntropyValues[t][2];
+            nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2];
             if (this->isSymmetric)
-                nmi_value_backward += timePointWeight[t] *
-                (this->backwardEntropyValues[t][0] +
-                 this->backwardEntropyValues[t][1]) /
-                this->backwardEntropyValues[t][2];
+                nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2];
         }
     }
 #ifndef NDEBUG
     reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called");
 #endif
-    return nmi_value_forward + nmi_value_backward;
+    return nmiFw + nmiBw;
 }
 /* *************************************************************** */
 template <class DataType>
@@ -484,7 +439,7 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
     }
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
@@ -552,10 +507,6 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
         } // mask
     } // loop over all voxel
 }
-template void reg_getVoxelBasedNMIGradient2D<float>
-(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
-template void reg_getVoxelBasedNMIGradient2D<double>
-(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 /* *************************************************************** */
 template <class DataType>
 void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
@@ -577,10 +528,10 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
 
 #ifdef WIN32
     long i;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t i;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
@@ -659,19 +610,15 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                     }
                 }
                 measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] -
-                                                                     nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                    nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
                 measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] -
-                                                                     nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                    nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
                 measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] -
-                                                                     nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
+                                                                    nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
     } // loop over all voxel
 }
-template void reg_getVoxelBasedNMIGradient3D<float>
-(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
-template void reg_getVoxelBasedNMIGradient3D<double>
-(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&);
 /* *************************************************************** */
 void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
@@ -681,6 +628,11 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
 
     // Check if all required input images are of the same data type
     int dtype = this->referenceImage->datatype;
+    if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+        reg_print_msg_error("Input images are expected to be of floating precision type");
+        reg_exit();
+    }
     if (this->warpedImage->datatype != dtype ||
         this->warpedGradient->datatype != dtype ||
         this->voxelBasedGradient->datatype != dtype) {
@@ -693,76 +645,42 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     this->GetSimilarityMeasureValue();
 
     // Compute the gradient of the nmi for the forward transformation
-    if (this->referenceImage->nz > 1) {  // 3D input images
-        switch (dtype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient3D<float>(this->referenceImage,
-                                                  this->warpedImage,
-                                                  this->referenceBinNumber,
-                                                  this->floatingBinNumber,
-                                                  this->forwardJointHistogramLog,
-                                                  this->forwardEntropyValues,
-                                                  this->warpedGradient,
-                                                  this->voxelBasedGradient,
-                                                  this->referenceMask,
-                                                  currentTimepoint,
-                                                  this->timePointWeight[currentTimepoint]);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient3D<double>(this->referenceImage,
-                                                   this->warpedImage,
-                                                   this->referenceBinNumber,
-                                                   this->floatingBinNumber,
-                                                   this->forwardJointHistogramLog,
-                                                   this->forwardEntropyValues,
-                                                   this->warpedGradient,
-                                                   this->voxelBasedGradient,
-                                                   this->referenceMask,
-                                                   currentTimepoint,
-                                                   this->timePointWeight[currentTimepoint]);
-            break;
-        default:
-            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
-        }
-    } else { // 2D input images
-        switch (dtype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedNMIGradient2D<float>(this->referenceImage,
-                                                  this->warpedImage,
-                                                  this->referenceBinNumber,
-                                                  this->floatingBinNumber,
-                                                  this->forwardJointHistogramLog,
-                                                  this->forwardEntropyValues,
-                                                  this->warpedGradient,
-                                                  this->voxelBasedGradient,
-                                                  this->referenceMask,
-                                                  currentTimepoint,
-                                                  this->timePointWeight[currentTimepoint]);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedNMIGradient2D<double>(this->referenceImage,
-                                                   this->warpedImage,
-                                                   this->referenceBinNumber,
-                                                   this->floatingBinNumber,
-                                                   this->forwardJointHistogramLog,
-                                                   this->forwardEntropyValues,
-                                                   this->warpedGradient,
-                                                   this->voxelBasedGradient,
-                                                   this->referenceMask,
-                                                   currentTimepoint,
-                                                   this->timePointWeight[currentTimepoint]);
-            break;
-        default:
-            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        if (this->referenceImage->nz > 1) {  // 3D input images
+            reg_getVoxelBasedNMIGradient3D<RefImgDataType>(this->referenceImage,
+                                                           this->warpedImage,
+                                                           this->referenceBinNumber,
+                                                           this->floatingBinNumber,
+                                                           this->jointHistogramLog,
+                                                           this->entropyValues,
+                                                           this->warpedGradient,
+                                                           this->voxelBasedGradient,
+                                                           this->referenceMask,
+                                                           currentTimepoint,
+                                                           this->timePointWeight[currentTimepoint]);
+        } else { // 2D input images
+            reg_getVoxelBasedNMIGradient2D<RefImgDataType>(this->referenceImage,
+                                                           this->warpedImage,
+                                                           this->referenceBinNumber,
+                                                           this->floatingBinNumber,
+                                                           this->jointHistogramLog,
+                                                           this->entropyValues,
+                                                           this->warpedGradient,
+                                                           this->voxelBasedGradient,
+                                                           this->referenceMask,
+                                                           currentTimepoint,
+                                                           this->timePointWeight[currentTimepoint]);
         }
-    }
+    }, NiftiImage::getFloatingDataType(this->referenceImage));
 
     if (this->isSymmetric) {
         dtype = this->floatingImage->datatype;
+        if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
+            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
+            reg_print_msg_error("Input images are expected to be of floating precision type");
+            reg_exit();
+        }
         if (this->warpedImageBw->datatype != dtype ||
             this->warpedGradientBw->datatype != dtype ||
             this->voxelBasedGradientBw->datatype != dtype) {
@@ -771,73 +689,34 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
             reg_exit();
         }
         // Compute the gradient of the nmi for the backward transformation
-        if (this->floatingImage->nz > 1) {  // 3D input images
-            switch (dtype) {
-            case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedNMIGradient3D<float>(this->floatingImage,
-                                                      this->warpedImageBw,
-                                                      this->floatingBinNumber,
-                                                      this->referenceBinNumber,
-                                                      this->backwardJointHistogramLog,
-                                                      this->backwardEntropyValues,
-                                                      this->warpedGradientBw,
-                                                      this->voxelBasedGradientBw,
-                                                      this->floatingMask,
-                                                      currentTimepoint,
-                                                      this->timePointWeight[currentTimepoint]);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedNMIGradient3D<double>(this->floatingImage,
-                                                       this->warpedImageBw,
-                                                       this->floatingBinNumber,
-                                                       this->referenceBinNumber,
-                                                       this->backwardJointHistogramLog,
-                                                       this->backwardEntropyValues,
-                                                       this->warpedGradientBw,
-                                                       this->voxelBasedGradientBw,
-                                                       this->floatingMask,
-                                                       currentTimepoint,
-                                                       this->timePointWeight[currentTimepoint]);
-                break;
-            default:
-                reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-                reg_print_msg_error("Unsupported datatype");
-                reg_exit();
+        std::visit([&](auto&& floImgDataType) {
+            using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
+            if (this->floatingImage->nz > 1) {  // 3D input images
+                reg_getVoxelBasedNMIGradient3D<FloImgDataType>(this->floatingImage,
+                                                               this->warpedImageBw,
+                                                               this->floatingBinNumber,
+                                                               this->referenceBinNumber,
+                                                               this->jointHistogramLogBw,
+                                                               this->entropyValuesBw,
+                                                               this->warpedGradientBw,
+                                                               this->voxelBasedGradientBw,
+                                                               this->floatingMask,
+                                                               currentTimepoint,
+                                                               this->timePointWeight[currentTimepoint]);
+            } else { // 2D input images
+                reg_getVoxelBasedNMIGradient2D<FloImgDataType>(this->floatingImage,
+                                                               this->warpedImageBw,
+                                                               this->floatingBinNumber,
+                                                               this->referenceBinNumber,
+                                                               this->jointHistogramLogBw,
+                                                               this->entropyValuesBw,
+                                                               this->warpedGradientBw,
+                                                               this->voxelBasedGradientBw,
+                                                               this->floatingMask,
+                                                               currentTimepoint,
+                                                               this->timePointWeight[currentTimepoint]);
             }
-        } else { // 2D input images
-            switch (dtype) {
-            case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedNMIGradient2D<float>(this->floatingImage,
-                                                      this->warpedImageBw,
-                                                      this->floatingBinNumber,
-                                                      this->referenceBinNumber,
-                                                      this->backwardJointHistogramLog,
-                                                      this->backwardEntropyValues,
-                                                      this->warpedGradientBw,
-                                                      this->voxelBasedGradientBw,
-                                                      this->floatingMask,
-                                                      currentTimepoint,
-                                                      this->timePointWeight[currentTimepoint]);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedNMIGradient2D<double>(this->floatingImage,
-                                                       this->warpedImageBw,
-                                                       this->floatingBinNumber,
-                                                       this->referenceBinNumber,
-                                                       this->backwardJointHistogramLog,
-                                                       this->backwardEntropyValues,
-                                                       this->warpedGradientBw,
-                                                       this->voxelBasedGradientBw,
-                                                       this->floatingMask,
-                                                       currentTimepoint,
-                                                       this->timePointWeight[currentTimepoint]);
-                break;
-            default:
-                reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-                reg_print_msg_error("Unsupported datatype");
-                reg_exit();
-            }
-        }
+        }, NiftiImage::getFloatingDataType(this->floatingImage));
     }
 #ifndef NDEBUG
     reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called");
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 8faafcee..78cd06ad 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -66,56 +66,27 @@ class reg_nmi: public reg_measure {
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
     unsigned short totalBinNumber[255];
-    double **forwardJointHistogramPro;
-    double **forwardJointHistogramLog;
-    double **forwardEntropyValues;
-    double **backwardJointHistogramPro;
-    double **backwardJointHistogramLog;
-    double **backwardEntropyValues;
+    double **jointHistogramPro;
+    double **jointHistogramLog;
+    double **entropyValues;
+    double **jointHistogramProBw;
+    double **jointHistogramLogBw;
+    double **entropyValuesBw;
 
     void DeallocateHistogram();
 };
 /* *************************************************************** */
 extern "C++" template <class DataType>
-void reg_getNMIValue(nifti_image *referenceImage,
-                     nifti_image *warpedImage,
-                     double *timePointWeight,
-                     unsigned short *referenceBinNumber,
-                     unsigned short *floatingBinNumber,
-                     unsigned short *totalBinNumber,
+void reg_getNMIValue(const nifti_image *referenceImage,
+                     const nifti_image *warpedImage,
+                     const double *timePointWeight,
+                     const unsigned short *referenceBinNumber,
+                     const unsigned short *floatingBinNumber,
+                     const unsigned short *totalBinNumber,
                      double **jointHistogramLog,
                      double **jointhistogramPro,
                      double **entropyValues,
-                     int *referenceMask
-);
-/* *************************************************************** */
-extern "C++" template <class DataType>
-void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
-                                    const nifti_image *warpedImage,
-                                    const unsigned short *referenceBinNumber,
-                                    const unsigned short *floatingBinNumber,
-                                    const double *const *jointHistogramLog,
-                                    const double *const *entropyValues,
-                                    const nifti_image *warpedGradient,
-                                    nifti_image *nmiGradientImage,
-                                    const int *referenceMask,
-                                    const int& currentTimepoint,
-                                    const double& timepointWeight
-);
-/* *************************************************************** */
-extern "C++" template <class DataType>
-void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
-                                    const nifti_image *warpedImage,
-                                    const unsigned short *referenceBinNumber,
-                                    const unsigned short *floatingBinNumber,
-                                    const double *const *jointHistogramLog,
-                                    const double *const *entropyValues,
-                                    const nifti_image *warpedGradient,
-                                    nifti_image *nmiGradientImage,
-                                    const int *referenceMask,
-                                    const int& currentTimepoint,
-                                    const double& timepointWeight
-);
+                     const int *referenceMask);
 /* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
@@ -237,9 +208,9 @@ inline int previous(int current, int num_dims) {
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi: public reg_measure {
 public:
-    /// @brief reg_nmi class constructor
+    /// @brief reg_multichannel_nmi class constructor
     reg_multichannel_nmi() {}
-    /// @brief reg_nmi class destructor
+    /// @brief reg_multichannel_nmi class destructor
     virtual ~reg_multichannel_nmi() {}
 
     /// @brief Returns the nmi value
@@ -257,12 +228,12 @@ class reg_multichannel_nmi: public reg_measure {
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
     unsigned short totalBinNumber[255];
-    double *forwardJointHistogramProp;
-    double *forwardJointHistogramLog;
-    double *forwardEntropyValues;
-    double *backwardJointHistogramProp;
-    double *backwardJointHistogramLog;
-    double *backwardEntropyValues;
+    double *jointHistogramProp;
+    double *jointHistogramLog;
+    double *entropyValues;
+    double *jointHistogramPropBw;
+    double *jointHistogramLogBw;
+    double *entropyValuesBw;
 };
 /* *************************************************************** */
 /// Multi channel NMI version - Entropy
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index fc16cd64..ddb2740e 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -266,7 +266,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   double timepointWeight,
                                   nifti_image *localWeightSimImage) {
     if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
-        reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
+        reg_print_fct_error("reg_getVoxelBasedSSDGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
         reg_exit();
     }
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 7470e788..f809fb67 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -22,6 +22,7 @@
 #include <functional>
 #include "_reg_maths.h"
 
+using namespace std::string_literals;
 using std::unique_ptr;
 using std::shared_ptr;
 using std::vector;

From 37c33703db82bfadda55a80a59761610c3d0fdd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 16:42:19 +0100
Subject: [PATCH 165/314] Add symmetric scheme support for reg_nmi_gpu #92

---
 niftyreg_build_version.txt      |   2 +-
 reg-lib/cuda/CudaMeasure.cpp    |  21 ++--
 reg-lib/cuda/_reg_measure_gpu.h | 135 +++++++++++++++++++------
 reg-lib/cuda/_reg_nmi_gpu.cu    | 172 +++++++++++++++-----------------
 reg-lib/cuda/_reg_nmi_gpu.h     |  57 ++++++-----
 reg-lib/cuda/_reg_ssd_gpu.cu    |  65 +++++-------
 reg-lib/cuda/_reg_ssd_gpu.h     |  24 +++--
 7 files changed, 277 insertions(+), 199 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6d26270b..c9716b72 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-283
+284
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 7ef87391..f94a06d1 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -32,19 +32,28 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *
     // TODO Implement symmetric scheme for CUDA measure types
     reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
     CudaF3dContent& cudaCon = dynamic_cast<CudaF3dContent&>(con);
+    CudaF3dContent *cudaConBw = dynamic_cast<CudaF3dContent*>(conBw);
     measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
+                                 cudaCon.GetReferenceCuda(),
                                  cudaCon.Content::GetFloating(),
+                                 cudaCon.GetFloatingCuda(),
                                  cudaCon.Content::GetReferenceMask(),
+                                 cudaCon.GetReferenceMaskCuda(),
                                  cudaCon.GetActiveVoxelNumber(),
                                  cudaCon.Content::GetWarped(),
+                                 cudaCon.GetWarpedCuda(),
                                  cudaCon.F3dContent::GetWarpedGradient(),
+                                 cudaCon.GetWarpedGradientCuda(),
                                  cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),
+                                 cudaCon.GetVoxelBasedMeasureGradientCuda(),
                                  cudaCon.F3dContent::GetLocalWeightSim(),
-                                 cudaCon.GetReferenceCuda(),
-                                 cudaCon.GetFloatingCuda(),
-                                 cudaCon.GetReferenceMaskCuda(),
-                                 cudaCon.GetWarpedCuda(),
-                                 cudaCon.GetWarpedGradientCuda(),
-                                 cudaCon.GetVoxelBasedMeasureGradientCuda());
+                                 cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr,
+                                 cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr,
+                                 cudaConBw ? cudaConBw->Content::GetWarped() : nullptr,
+                                 cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr,
+                                 cudaConBw ? cudaConBw->F3dContent::GetWarpedGradient() : nullptr,
+                                 cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr,
+                                 cudaConBw ? cudaConBw->F3dContent::GetVoxelBasedMeasureGradient() : nullptr,
+                                 cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index f6c9615f..d91c39d6 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -22,19 +22,63 @@ class reg_measure_gpu {
     virtual ~reg_measure_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) = 0;
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) {
+        // Check that the input image are of type float
+        if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) {
+            reg_print_fct_error("reg_measure_gpu::InitialiseMeasure");
+            reg_print_msg_error("Only single precision is supported on the GPU");
+            reg_exit();
+        }
+        // Bind the required pointers
+        this->referenceImageCuda = refImgCuda;
+        this->floatingImageCuda = floImgCuda;
+        this->referenceMaskCuda = refMaskCuda;
+        this->activeVoxelNumber = activeVoxNum;
+        this->warpedImageCuda = warpedImgCuda;
+        this->warpedGradientCuda = warpedGradCuda;
+        this->voxelBasedGradientCuda = voxelBasedGradCuda;
+        // Check if the symmetric mode is used
+        if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr &&
+            floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) {
+            if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) {
+                reg_print_fct_error("reg_measure_gpu::InitialiseMeasure");
+                reg_print_msg_error("Only single precision is supported on the GPU");
+                reg_exit();
+            }
+            this->floatingMaskCuda = floMaskCuda;
+            this->warpedImageBwCuda = warpedImgBwCuda;
+            this->warpedGradientBwCuda = warpedGradBwCuda;
+            this->voxelBasedGradientBwCuda = voxelBasedGradBwCuda;
+        } else {
+            this->floatingMaskCuda = nullptr;
+            this->warpedImageBwCuda = nullptr;
+            this->warpedGradientBwCuda = nullptr;
+            this->voxelBasedGradientBwCuda = nullptr;
+        }
+#ifndef NDEBUG
+        reg_print_msg_debug("reg_measure_gpu::InitialiseMeasure() called");
+#endif
+    }
 
 protected:
     cudaArray *referenceImageCuda;
@@ -44,6 +88,11 @@ class reg_measure_gpu {
     float *warpedImageCuda;
     float4 *warpedGradientCuda;
     float4 *voxelBasedGradientCuda;
+
+    int *floatingMaskCuda;
+    float *warpedImageBwCuda;
+    float4 *warpedGradientBwCuda;
+    float4 *voxelBasedGradientBwCuda;
 };
 /* *************************************************************** */
 class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
@@ -57,19 +106,27 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     virtual ~reg_lncc_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) override {}
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) override {}
     /// @brief Returns the lncc value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based lncc gradient
@@ -80,26 +137,35 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
 public:
     /// @brief reg_kld_gpu class constructor
     reg_kld_gpu() {
-        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n");
+        reg_print_fct_error("reg_kld_gpu::reg_kld_gpu");
+        reg_print_msg_error("CUDA CANNOT BE USED WITH KLD YET");
         reg_exit();
     }
     /// @brief reg_kld_gpu class destructor
     virtual ~reg_kld_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) override {}
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) override {}
     /// @brief Returns the kld value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based kld gradient
@@ -110,26 +176,35 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
 public:
     /// @brief reg_dti_gpu class constructor
     reg_dti_gpu() {
-        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n");
+        reg_print_fct_error("reg_dti_gpu::reg_dti_gpu");
+        reg_print_msg_error("CUDA CANNOT BE USED WITH DTI YET");
         reg_exit();
     }
     /// @brief reg_dti_gpu class destructor
     virtual ~reg_dti_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) override {}
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) override {}
     /// @brief Returns the dti value
     virtual double GetSimilarityMeasureValue() override { return 0; }
     /// @brief Compute the voxel based dti gradient
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 1f5c1997..5efd0391 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -10,129 +10,101 @@
  *
  */
 
-#include "_reg_nmi.h"
 #include "_reg_nmi_gpu.h"
 #include "_reg_nmi_kernels.cu"
+#include <thrust/device_vector.h>
 
 /* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
-    this->forwardJointHistogramLog_device = nullptr;
-    //	this->backwardJointHistogramLog_device=nullptr;
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n");
+    reg_print_msg_debug("reg_nmi_gpu constructor called");
 #endif
 }
 /* *************************************************************** */
 reg_nmi_gpu::~reg_nmi_gpu() {
-    this->DeallocateHistogram();
-#ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n");
-#endif
-}
-/* *************************************************************** */
-void reg_nmi_gpu::DeallocateHistogram() {
-    if (this->forwardJointHistogramLog_device != nullptr) {
-        cudaFree(this->forwardJointHistogramLog_device);
-        this->forwardJointHistogramLog_device = nullptr;
-    }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n");
+    reg_print_msg_debug("reg_nmi_gpu destructor called");
 #endif
 }
 /* *************************************************************** */
-void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg,
-                                    nifti_image *floImg,
-                                    int *refMask,
+void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
+                                    nifti_image *floImg, cudaArray *floImgCuda,
+                                    int *refMask, int *refMaskCuda,
                                     size_t activeVoxNum,
-                                    nifti_image *warpedImg,
-                                    nifti_image *warpedGrad,
-                                    nifti_image *voxelBasedGrad,
+                                    nifti_image *warpedImg, float *warpedImgCuda,
+                                    nifti_image *warpedGrad, float4 *warpedGradCuda,
+                                    nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda,
                                     nifti_image *localWeightSim,
-                                    cudaArray *refImgCuda,
-                                    cudaArray *floImgCuda,
-                                    int *refMaskCuda,
-                                    float *warpedImgCuda,
-                                    float4 *warpedGradCuda,
-                                    float4 *voxelBasedGradCuda) {
+                                    int *floMask, int *floMaskCuda,
+                                    nifti_image *warpedImgBw, float *warpedImgBwCuda,
+                                    nifti_image *warpedGradBw, float4 *warpedGradBwCuda,
+                                    nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) {
     this->DeallocateHistogram();
-    reg_nmi::InitialiseMeasure(refImg,
-                               floImg,
-                               refMask,
-                               warpedImg,
-                               warpedGrad,
-                               voxelBasedGrad);
-    // Check if a symmetric measure is required
-    if (this->isSymmetric) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-        reg_exit();
-    }
+    reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad,
+                               localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw);
+    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda,
+                                       warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
+                                       warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check if the input images have multiple timepoints
     if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n");
+        reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure");
+        reg_print_msg_error("Multiple timepoints are not yet supported");
         reg_exit();
     }
-    // Check that the input image are of type float
-    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 ||
-        this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n");
-        reg_exit();
-    }
-    // Bind the required pointers
-    this->referenceImageCuda = refImgCuda;
-    this->floatingImageCuda = floImgCuda;
-    this->referenceMaskCuda = refMaskCuda;
-    this->activeVoxelNumber = activeVoxNum;
-    this->warpedImageCuda = warpedImgCuda;
-    this->warpedGradientCuda = warpedGradCuda;
-    this->voxelBasedGradientCuda = voxelBasedGradCuda;
     // The reference and floating images have to be updated on the device
-    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceImageCuda, this->referenceImage)) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        printf("[NiftyReg ERROR] Error when transferring the reference image.\n");
-        reg_exit();
-    }
-    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage)) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        printf("[NiftyReg ERROR] Error when transferring the floating image.\n");
+    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceImageCuda, this->referenceImage) ||
+        cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage)) {
+        reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure");
+        reg_print_msg_error("Error when transferring the reference or floating image");
         reg_exit();
     }
-    // Allocate the required joint histogram on the GPU
-    cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float));
-
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n");
+    reg_print_msg_debug("reg_nmi_gpu::InitialiseMeasure called");
 #endif
 }
 /* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValue() {
     // The NMI computation is performed into the host for now
     // The relevant images have to be transferred from the device to the host
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedImage->data,
-                                 this->warpedImageCuda,
-                                 this->warpedImage->nvox *
-                                 this->warpedImage->nbyper,
-                                 cudaMemcpyDeviceToHost));
-
+    cudaCommon_transferFromDeviceToNifti<float>(this->warpedImage, this->warpedImageCuda);
     reg_getNMIValue<float>(this->referenceImage,
                            this->warpedImage,
                            this->timePointWeight,
                            this->referenceBinNumber,
                            this->floatingBinNumber,
                            this->totalBinNumber,
-                           this->forwardJointHistogramLog,
-                           this->forwardJointHistogramPro,
-                           this->forwardEntropyValues,
+                           this->jointHistogramLog,
+                           this->jointHistogramPro,
+                           this->entropyValues,
                            this->referenceMask);
 
-    const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2];
+    if (this->isSymmetric) {
+        cudaCommon_transferFromDeviceToNifti<float>(this->warpedImageBw, this->warpedImageBwCuda);
+        reg_getNMIValue<float>(this->floatingImage,
+                               this->warpedImageBw,
+                               this->timePointWeight,
+                               this->floatingBinNumber,
+                               this->referenceBinNumber,
+                               this->totalBinNumber,
+                               this->jointHistogramLogBw,
+                               this->jointHistogramProBw,
+                               this->entropyValuesBw,
+                               this->floatingMask);
+    }
+
+    double nmiFw = 0, nmiBw = 0;
+    for (int t = 0; t < this->referenceTimePoint; ++t) {
+        if (this->timePointWeight[t] > 0) {
+            nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2];
+            if (this->isSymmetric)
+                nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2];
+        }
+    }
 
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n");
+    reg_print_msg_debug("reg_nmi_gpu::GetSimilarityMeasureValue called");
 #endif
-    return nmi_value;
+    return nmiFw + nmiBw;
 }
 /* *************************************************************** */
 /// Called when we only have one target and one source image
@@ -190,30 +162,46 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
+        return;
+
+    // Call compute similarity measure to calculate joint histogram
+    this->GetSimilarityMeasureValue();
+
     // The latest joint histogram is transferred onto the GPU
-    float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float));
-    for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i)
-        temp[i] = static_cast<float>(this->forwardJointHistogramLog[0][i]);
-    cudaMemcpy(this->forwardJointHistogramLog_device,
-               temp,
-               this->totalBinNumber[0] * sizeof(float),
-               cudaMemcpyHostToDevice);
-    free(temp);
+    thrust::device_vector<float> jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]);
 
     // The gradient of the NMI is computed on the GPU
     reg_getVoxelBasedNMIGradient_gpu(this->referenceImage,
                                      this->referenceImageCuda,
                                      this->warpedImageCuda,
                                      this->warpedGradientCuda,
-                                     this->forwardJointHistogramLog_device,
+                                     jointHistogramLogCuda.data().get(),
                                      this->voxelBasedGradientCuda,
                                      this->referenceMaskCuda,
                                      this->activeVoxelNumber,
-                                     this->forwardEntropyValues[0],
+                                     this->entropyValues[0],
                                      this->referenceBinNumber[0],
                                      this->floatingBinNumber[0]);
+
+    if (this->isSymmetric) {
+        thrust::device_vector<float> jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]);
+        reg_getVoxelBasedNMIGradient_gpu(this->floatingImage,
+                                         this->floatingImageCuda,
+                                         this->warpedImageBwCuda,
+                                         this->warpedGradientBwCuda,
+                                         jointHistogramLogCudaBw.data().get(),
+                                         this->voxelBasedGradientBwCuda,
+                                         this->floatingMaskCuda,
+                                         this->activeVoxelNumber,
+                                         this->entropyValuesBw[0],
+                                         this->floatingBinNumber[0],
+                                         this->referenceBinNumber[0]);
+    }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
+    reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
 #endif
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index ea3da371..ff24a676 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -19,57 +19,68 @@
 /// @brief NMI measure of similarity class - GPU based
 class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 public:
-    /// @brief reg_nmi class constructor
+    /// @brief reg_nmi_gpu class constructor
     reg_nmi_gpu();
-    /// @brief reg_nmi class destructor
+    /// @brief reg_nmi_gpu class destructor
     virtual ~reg_nmi_gpu();
 
     /// @brief Initialise the reg_nmi_gpu object
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) override;
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) override;
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
-
-protected:
-    float *forwardJointHistogramLog_device;
-    // float **backwardJointHistogramLog_device;
-    void DeallocateHistogram();
 };
 /* *************************************************************** */
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
     void InitialiseMeasure(nifti_image *refImg,
+                           cudaArray *refImgCuda,
                            nifti_image *floImg,
+                           cudaArray *floImgCuda,
                            int *refMask,
+                           int *refMaskCuda,
                            size_t activeVoxNum,
                            nifti_image *warpedImg,
-                           nifti_image *warpedGrad,
-                           nifti_image *voxelBasedGrad,
-                           nifti_image *localWeightSim,
-                           cudaArray *refImgCuda,
-                           cudaArray *floImgCuda,
-                           int *refMaskCuda,
                            float *warpedImgCuda,
+                           nifti_image *warpedGrad,
                            float4 *warpedGradCuda,
-                           float4 *voxelBasedGradCuda) override {}
-    /// @brief reg_nmi class constructor
+                           nifti_image *voxelBasedGrad,
+                           float4 *voxelBasedGradCuda,
+                           nifti_image *localWeightSim = nullptr,
+                           int *floMask = nullptr,
+                           int *floMaskCuda = nullptr,
+                           nifti_image *warpedImgBw = nullptr,
+                           float *warpedImgBwCuda = nullptr,
+                           nifti_image *warpedGradBw = nullptr,
+                           float4 *warpedGradBwCuda = nullptr,
+                           nifti_image *voxelBasedGradBw = nullptr,
+                           float4 *voxelBasedGradBwCuda = nullptr) override {}
+    /// @brief reg_multichannel_nmi_gpu class constructor
     reg_multichannel_nmi_gpu() {}
-    /// @brief reg_nmi class destructor
+    /// @brief reg_multichannel_nmi_gpu class destructor
     virtual ~reg_multichannel_nmi_gpu() {}
     /// @brief Returns the nmi value
     virtual double GetSimilarityMeasureValue() override { return 0; }
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 275fc7ef..58a3fcb8 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -16,58 +16,45 @@
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n");
+    reg_print_msg_debug("reg_ssd_gpu constructor called");
 #endif
 }
 /* *************************************************************** */
-void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg,
-                                    nifti_image *floImg,
-                                    int *refMask,
+reg_ssd_gpu::~reg_ssd_gpu() {
+#ifndef NDEBUG
+    reg_print_msg_debug("reg_ssd_gpu destructor called");
+#endif
+}
+/* *************************************************************** */
+void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
+                                    nifti_image *floImg, cudaArray *floImgCuda,
+                                    int *refMask, int *refMaskCuda,
                                     size_t activeVoxNum,
-                                    nifti_image *warpedImg,
-                                    nifti_image *warpedGrad,
-                                    nifti_image *voxelBasedGrad,
+                                    nifti_image *warpedImg, float *warpedImgCuda,
+                                    nifti_image *warpedGrad, float4 *warpedGradCuda,
+                                    nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda,
                                     nifti_image *localWeightSim,
-                                    cudaArray *refImgCuda,
-                                    cudaArray *floImgCuda,
-                                    int *refMaskCuda,
-                                    float *warpedImgCuda,
-                                    float4 *warpedGradCuda,
-                                    float4 *voxelBasedGradCuda) {
-    reg_ssd::InitialiseMeasure(refImg,
-                               floImg,
-                               refMask,
-                               warpedImg,
-                               warpedGrad,
-                               voxelBasedGrad,
-                               localWeightSim);
+                                    int *floMask, int *floMaskCuda,
+                                    nifti_image *warpedImgBw, float *warpedImgBwCuda,
+                                    nifti_image *warpedGradBw, float4 *warpedGradBwCuda,
+                                    nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) {
+    reg_ssd::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad,
+                               localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw);
+    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda,
+                                       warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
+                                       warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check if a symmetric measure is required
     if (this->isSymmetric) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n");
-        reg_exit();
-    }
-    // Check that the input image are of type float
-    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 ||
-        this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n");
+        reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure");
+        reg_print_msg_error("Symmetric scheme is not yet supported");
         reg_exit();
     }
     // Check that the input images have only one time point
     if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) {
-        fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n");
-        fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n");
+        reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure");
+        reg_print_msg_error("Multiple timepoints are not yet supported");
         reg_exit();
     }
-    // Bind the required pointers
-    this->referenceImageCuda = refImgCuda;
-    this->floatingImageCuda = floImgCuda;
-    this->referenceMaskCuda = refMaskCuda;
-    this->activeVoxelNumber = activeVoxNum;
-    this->warpedImageCuda = warpedImgCuda;
-    this->warpedGradientCuda = warpedGradCuda;
-    this->voxelBasedGradientCuda = voxelBasedGradCuda;
 #ifndef NDEBUG
     printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
 #endif
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index c95d4064..34764df3 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -23,23 +23,31 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     /// @brief reg_ssd class constructor
     reg_ssd_gpu();
     /// @brief Measure class destructor
-    virtual ~reg_ssd_gpu() {}
+    virtual ~reg_ssd_gpu();
 
     /// @brief Initialise the reg_ssd object
     virtual void InitialiseMeasure(nifti_image *refImg,
+                                   cudaArray *refImgCuda,
                                    nifti_image *floImg,
+                                   cudaArray *floImgCuda,
                                    int *refMask,
+                                   int *refMaskCuda,
                                    size_t activeVoxNum,
                                    nifti_image *warpedImg,
-                                   nifti_image *warpedGrad,
-                                   nifti_image *voxelBasedGrad,
-                                   nifti_image *localWeightSim,
-                                   cudaArray *refImgCuda,
-                                   cudaArray *floImgCuda,
-                                   int *refMaskCuda,
                                    float *warpedImgCuda,
+                                   nifti_image *warpedGrad,
                                    float4 *warpedGradCuda,
-                                   float4 *voxelBasedGradCuda) override;
+                                   nifti_image *voxelBasedGrad,
+                                   float4 *voxelBasedGradCuda,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   int *floMaskCuda = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   float *warpedImgBwCuda = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   float4 *warpedGradBwCuda = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr,
+                                   float4 *voxelBasedGradBwCuda = nullptr) override;
     /// @brief Returns the ssd value
     virtual double GetSimilarityMeasureValue() override;
     /// @brief Compute the voxel based ssd gradient

From 688d9ac37b95ba3eabd36d94dace05cd842e02bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 20 Jul 2023 16:45:31 +0100
Subject: [PATCH 166/314] Add NMI regression tests #92

---
 niftyreg_build_version.txt     |   2 +-
 reg-test/CMakeLists.txt        |   1 +
 reg-test/reg_test_regr_nmi.cpp | 246 +++++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_regr_nmi.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c9716b72..6cf44528 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-284
+285
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index f9609036..53bd0607 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -121,6 +121,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_nmi ${EXEC_LIST})
 endif(USE_CUDA)
 
 
diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp
new file mode 100644
index 00000000..842a46e3
--- /dev/null
+++ b/reg-test/reg_test_regr_nmi.cpp
@@ -0,0 +1,246 @@
+#include "reg_test_common.h"
+#include "_reg_nmi.h"
+#include "CudaF3dContent.h"
+#include "CudaMeasure.h"
+#include <iomanip>
+
+/**
+ *  NMI regression test to ensure the CPU and CUDA versions yield the same output
+ */
+
+class NmiTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, bool>;
+    using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    NmiTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create 2D reference, floating and control point grid images
+        constexpr NiftiImage::dim_t size = 16;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d));
+
+        // Create 3D reference, floating and control point grid images
+        dim.push_back(size);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d));
+
+        // Fill images with random values
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
+        for (size_t i = 0; i < reference2d.nVoxels(); ++i) {
+            ref2dPtr[i] = distr(gen);
+            flo2dPtr[i] = distr(gen);
+        }
+
+        // Fill images with random values
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        for (size_t i = 0; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = distr(gen);
+            flo3dPtr[i] = distr(gen);
+        }
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        for (int sym = 0; sym < 2; ++sym) {
+            testData.emplace_back(TestData(
+                "2D"s + (sym ? " Symmetric" : ""),
+                reference2d,
+                floating2d,
+                controlPointGrid2d,
+                sym
+            ));
+            testData.emplace_back(TestData(
+                "3D"s + (sym ? " Symmetric" : ""),
+                reference3d,
+                floating3d,
+                controlPointGrid3d,
+                sym
+            ));
+        }
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        // Create the measures
+        unique_ptr<Measure> measureCpu{ new Measure() };
+        unique_ptr<Measure> measureCuda{ new CudaMeasure() };
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, reference, floating, controlPointGrid, isSymmetric] = testData;
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage floatingCpu(floating), floatingCuda(floating);
+            NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid);
+            NiftiImage controlPointGridCpuBw(controlPointGrid), controlPointGridCudaBw(controlPointGrid);
+
+            // Create the contents
+            unique_ptr<F3dContent> contentCpu{ new F3dContent(
+                referenceCpu,
+                floatingCpu,
+                controlPointGridCpu,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(
+                referenceCuda,
+                floatingCuda,
+                controlPointGridCuda,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+            unique_ptr<F3dContent> contentCpuBw, contentCudaBw;
+            if (isSymmetric) {
+                contentCpuBw.reset(new F3dContent(
+                    floatingCpu,
+                    referenceCpu,
+                    controlPointGridCpuBw,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    sizeof(float)
+                ));
+                contentCudaBw.reset(new CudaF3dContent(
+                    floatingCuda,
+                    referenceCuda,
+                    controlPointGridCudaBw,
+                    nullptr,
+                    nullptr,
+                    nullptr,
+                    sizeof(float)
+                ));
+            }
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+            unique_ptr<Compute> computeCpuBw, computeCudaBw;
+            if (isSymmetric) {
+                computeCpuBw.reset(platformCpu.CreateCompute(*contentCpuBw));
+                computeCudaBw.reset(platformCuda.CreateCompute(*contentCudaBw));
+            }
+
+            // Create the NMI measures
+            unique_ptr<reg_nmi> nmiCpu{ dynamic_cast<reg_nmi*>(measureCpu->Create(MeasureType::Nmi)) };
+            unique_ptr<reg_nmi> nmiCuda{ dynamic_cast<reg_nmi*>(measureCuda->Create(MeasureType::Nmi)) };
+
+            // Initialise the measures
+            for (int i = 0; i < referenceCpu->nt; ++i) {
+                nmiCpu->SetTimepointWeight(i, 1.0);
+                nmiCuda->SetTimepointWeight(i, 1.0);
+            }
+            measureCpu->Initialise(*nmiCpu, *contentCpu, contentCpuBw.get());
+            measureCuda->Initialise(*nmiCuda, *contentCuda, contentCudaBw.get());
+
+            // Compute the similarity measure value for CPU
+            computeCpu->GetDeformationField(false, true);
+            computeCpu->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+            if (isSymmetric) {
+                computeCpuBw->GetDeformationField(false, true);
+                computeCpuBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+            }
+            const double simMeasureCpu = nmiCpu->GetSimilarityMeasureValue();
+
+            // Compute the similarity measure value for CUDA
+            NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped());
+            warpedCuda.copyData(contentCpu->GetWarped());
+            warpedCuda.disown();
+            contentCuda->UpdateWarped();
+            // computeCuda->GetDeformationField(false, true);
+            // computeCuda->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+            if (isSymmetric) {
+                NiftiImage warpedCudaBw(contentCudaBw->F3dContent::GetWarped());
+                warpedCudaBw.copyData(contentCpuBw->GetWarped());
+                warpedCudaBw.disown();
+                contentCudaBw->UpdateWarped();
+                // computeCudaBw->GetDeformationField(false, true);
+                // computeCudaBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+            }
+            const double simMeasureCuda = nmiCuda->GetSimilarityMeasureValue();
+
+            // Compute the similarity measure gradient for CPU
+            int timepoint = 0;
+            contentCpu->ZeroVoxelBasedMeasureGradient();
+            computeCpu->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
+            if (isSymmetric) {
+                contentCpuBw->ZeroVoxelBasedMeasureGradient();
+                computeCpuBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
+            }
+            nmiCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint);
+
+            // Compute the similarity measure gradient for CUDA
+            contentCuda->ZeroVoxelBasedMeasureGradient();
+            // computeCuda->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
+            NiftiImage warpedGradCuda(contentCuda->F3dContent::GetWarpedGradient());
+            warpedGradCuda.copyData(contentCpu->GetWarpedGradient());
+            warpedGradCuda.disown();
+            contentCuda->UpdateWarpedGradient();
+            if (isSymmetric) {
+                contentCudaBw->ZeroVoxelBasedMeasureGradient();
+                // computeCudaBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
+                NiftiImage warpedGradCudaBw(contentCudaBw->F3dContent::GetWarpedGradient());
+                warpedGradCudaBw.copyData(contentCpuBw->GetWarpedGradient());
+                warpedGradCudaBw.disown();
+                contentCudaBw->UpdateWarpedGradient();
+            }
+            nmiCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint);
+
+            // Get the voxel-based similarity measure gradients
+            NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+            NiftiImage voxelBasedGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+
+            // Save for testing
+            testCases.push_back({ testName, simMeasureCpu, simMeasureCuda, std::move(voxelBasedGradCpu), std::move(voxelBasedGradCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, simMeasureCpu, simMeasureCuda, voxelBasedGradCpu, voxelBasedGradCuda] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            std::cout << std::fixed << std::setprecision(10);
+
+            // Check the similarity measure values
+            std::cout << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl;
+            REQUIRE(fabs(simMeasureCpu - simMeasureCuda) < EPS);
+
+            // Check the voxel-based similarity measure gradients
+            const auto voxelBasedGradCpuPtr = voxelBasedGradCpu.data();
+            const auto voxelBasedGradCudaPtr = voxelBasedGradCuda.data();
+            for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) {
+                const float cpuVal = voxelBasedGradCpuPtr[i];
+                const float cudaVal = voxelBasedGradCudaPtr[i];
+                std::cout << i << " " << cpuVal << " " << cudaVal << std::endl;
+                REQUIRE(fabs(cpuVal - cudaVal) < EPS);
+            }
+        }
+    }
+}

From fb586dab8376e243874e99da44e0a4df39957562 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Mon, 24 Jul 2023 10:46:04 +0100
Subject: [PATCH 167/314] #92: Set the test seed to constant for
 reproducibility

---
 niftyreg_build_version.txt                      | 2 +-
 reg-lib/cuda/blockMatchingKernel.cu             | 6 ++++++
 reg-test/reg_test_blockMatching.cpp             | 3 +--
 reg-test/reg_test_conjugateGradient.cpp         | 3 +--
 reg-test/reg_test_getDeformationField.cpp       | 3 +--
 reg-test/reg_test_lncc.cpp                      | 3 +--
 reg-test/reg_test_normaliseGradient.cpp         | 3 +--
 reg-test/reg_test_regr_blockMatching.cpp        | 4 +---
 reg-test/reg_test_regr_lts.cpp                  | 3 +--
 reg-test/reg_test_regr_nmi.cpp                  | 3 +--
 reg-test/reg_test_voxelCentricToNodeCentric.cpp | 3 +--
 11 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6cf44528..209ac45b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-285
+287
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 874a20de..cd91c556 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -90,6 +90,12 @@ __device__ __inline__ float blockReduceSum(float val, unsigned tid) {
             shared[tid] += shared[tid + i];
         __syncthreads();
     }
+    // if (tid == 0){
+    //     for (unsigned i = 1; i < 64; ++i) {
+    //             shared[0] += shared[i];
+    //     }
+    // }
+    // __syncthreads();
     return shared[0];
 }
 /* *************************************************************** */
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index f237a44d..06ce0faf 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -24,8 +24,7 @@ class BMTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create 2D and 3D reference images
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index d4b060f6..39e3195e 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -25,8 +25,7 @@ class ConjugateGradientTest: public InterfaceOptimiser {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference 2D image
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 03b17dd7..f9e15c86 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -25,8 +25,7 @@ class GetDeformationFieldTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a 2D reference image
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index a6f1052b..4f0118c4 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -18,8 +18,7 @@ class LNCCTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create reference and floating 2D images
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 47876b7c..6b388e90 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -25,8 +25,7 @@ class NormaliseGradientTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference 2D image
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index be362a51..55b824b0 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -19,9 +19,7 @@ class BMTest {
         if (!testCases.empty())
             return;
 
-        // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);        
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index c7c72ef1..5a075ae8 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -23,8 +23,7 @@ class LTSTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp
index 842a46e3..c79f9e5b 100644
--- a/reg-test/reg_test_regr_nmi.cpp
+++ b/reg-test/reg_test_regr_nmi.cpp
@@ -21,8 +21,7 @@ class NmiTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create 2D reference, floating and control point grid images
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 027e5467..da95af28 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -23,8 +23,7 @@ class VoxelCentricToNodeCentricTest {
             return;
 
         // Create a random number generator
-        std::random_device rd;
-        std::mt19937 gen(rd());
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a 2D reference image

From 4f298005fe57d92bd145e906e1f602538401197a Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@kcl.ac.uk>
Date: Mon, 24 Jul 2023 11:22:10 +0100
Subject: [PATCH 168/314] Issue #92: added a unit test for nmi

---
 .gitignore                 |   4 +
 niftyreg_build_version.txt |   2 +-
 reg-test/CMakeLists.txt    |   1 +
 reg-test/reg_test_nmi.cpp  | 181 +++++++++++++++++++++++++++++++++++++
 4 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_nmi.cpp

diff --git a/.gitignore b/.gitignore
index 158e90bb..34fd63b5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,7 @@ CMakeSettings.json
 
 # Build
 build*
+
+# Doxygen
+html
+latex
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 209ac45b..ea809473 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-287
+288
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 53bd0607..2ac8c8ec 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -116,6 +116,7 @@ set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
+set(EXEC_LIST reg_test_nmi ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
new file mode 100644
index 00000000..ef30cdb0
--- /dev/null
+++ b/reg-test/reg_test_nmi.cpp
@@ -0,0 +1,181 @@
+// OpenCL and CUDA are not supported for this test yet
+#undef _USE_OPENCL
+#undef _USE_CUDA
+
+#include "reg_test_common.h"
+#include "_reg_tools.h"
+#include "_reg_nmi.h"
+
+/*
+    This test file contains the following unit tests:
+    test function: NMI computation
+*/
+
+class NMITest {
+public:
+    NMITest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a number generator
+        std::mt19937 gen(0);
+        // Images will be rescaled between 2 and bin-3
+        // Default bin value is 68 (64+4 for Parzen windowing)
+        std::uniform_real_distribution<float> distr(2, 65);
+
+        // Create reference and floating 2D images
+        vector<NiftiImage::dim_t> dim{ 16, 16 };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create reference and floating 3D images
+        dim.push_back(16);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Fill images with random values
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
+        // Ensure at least one pixel contains the max and one the min
+        ref2dPtr[0] = flo2dPtr[0] = 2.f;
+        ref2dPtr[1] = flo2dPtr[1] = 65.f;
+        for (size_t i = 2; i < reference2d.nVoxels(); ++i)
+        {
+            ref2dPtr[i] = (int)distr(gen); // cast to integer to not use PW
+            flo2dPtr[i] = (int)distr(gen);
+        }
+
+        // Fill images with random values
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        // Ensure at least one pixel contains the max and one the min
+        ref3dPtr[0] = flo3dPtr[0] = 2.f;
+        ref3dPtr[1] = flo3dPtr[1] = 65.f;
+        for (size_t i = 2; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = (int)distr(gen);
+            flo3dPtr[i] = (int)distr(gen);
+        }
+
+        // Create corresponding identify control point grids
+        NiftiImage cpp2d(CreateControlPointGrid(reference2d));
+        NiftiImage cpp3d(CreateControlPointGrid(reference3d));
+
+        // Create the object to compute the expected values
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "NMI 2D",
+            reference2d,
+            floating2d,
+            cpp2d,
+            GetNMIPW(reference2d, floating2d)
+        ));
+        testData.emplace_back(TestData(
+            "NMI 3D",
+            reference3d,
+            floating3d,
+            cpp3d,
+            GetNMIPW(reference3d, floating3d)
+        ));
+        for (auto&& data : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                // Create the platform
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                // Make a copy of the test data
+                auto td = data;
+                auto&& [testName, reference, floating, cpp, expected] = td;
+                // Create the content creator
+                unique_ptr<F3dContentCreator> contentCreator{
+                    dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d))
+                };
+                // Create the content
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, floating, cpp) };
+                // Initialise the warped image using floating image
+                content->SetWarped(floating.disown());
+                // Create the measure
+                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Use NMI as a measure
+                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
+                measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure->Initialise(*measure_nmi, *content);
+                double nmi = measure_nmi->GetSimilarityMeasureValue();
+
+                testCases.push_back({ testName, nmi, expected});
+            }
+        }
+    }
+
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, double>;
+    using TestCase = std::tuple<std::string, double, double>;
+    inline static vector<TestCase> testCases;
+
+    double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo)
+    {   
+        // Allocate a joint histogram and fill it with zeros
+        double jh[68][68];
+        for (unsigned i = 0; i < 68; ++i)
+            for (unsigned j = 0; j < 68; ++j)
+                jh[i][j] = 0;
+        // Fill it with the intensity values
+        const auto refPtr = ref.data();
+        const auto floPtr = flo.data();
+        for (auto refItr = refPtr.begin(), floItr = floPtr.begin();
+            refItr != refPtr.end();
+            ++refItr, ++floItr)
+            jh[(int)*refItr][(int)*floItr]++;
+        // Convert the histogram into an image to later apply the convolution
+        vector<NiftiImage::dim_t> dim{ 68, 68 };
+        NiftiImage jointHistogram(dim, NIFTI_TYPE_FLOAT64);
+        double *jhPtr = static_cast<double *>(jointHistogram->data);
+        // Conver the occurances to probabilities
+        for (unsigned i = 0; i < 68; ++i)
+            for (unsigned j = 0; j < 68; ++j)
+                *jhPtr++ = jh[i][j] / ref.nVoxels();
+        // Apply a convolution to mimic the parzen windowing
+        float sigma[1] = {1.f};
+        reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL);
+        // Restore the jh array
+        jhPtr = static_cast<double *>(jointHistogram->data);
+        for (unsigned i = 0; i < 68; ++i)
+            for (unsigned j = 0; j < 68; ++j)
+                jh[i][j] = *jhPtr++;
+        // Compute the entropies
+        double ref_ent = 0.;
+        double flo_ent = 0.;
+        double joi_ent = 0.;
+        for (unsigned i = 0; i < 68; ++i)
+        {
+            double ref_pro = 0.;
+            double flo_pro = 0.;
+            for (unsigned j = 0; j < 68; ++j)
+            {
+                flo_pro += jh[i][j];
+                ref_pro += jh[j][i];
+                if(jh[i][j]>0.)
+                    joi_ent -= jh[i][j] * log(jh[i][j]);
+            }
+            if (ref_pro>0)
+                ref_ent -= ref_pro * log(ref_pro);
+            if (flo_pro>0)
+                flo_ent -= flo_pro * log(flo_pro);
+        }
+        double nmi = (ref_ent + flo_ent) / joi_ent;
+        return nmi;
+    }
+};
+
+TEST_CASE_METHOD(NMITest, "NMI", "[unit]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, result, expected] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            if (fabs(result - expected) > EPS){
+                std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
+            }
+            REQUIRE(fabs(result - expected) < EPS);
+        }
+    }
+}

From cd099269a1a0407a8818cd2e9251df5cb7cdf468 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Mon, 24 Jul 2023 11:26:55 +0100
Subject: [PATCH 169/314] Issue #92: activate CUDA for NMI test

---
 niftyreg_build_version.txt | 2 +-
 reg-test/reg_test_nmi.cpp  | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ea809473..336dd5e3 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-288
+289
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index ef30cdb0..c5c887d4 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -1,6 +1,5 @@
 // OpenCL and CUDA are not supported for this test yet
 #undef _USE_OPENCL
-#undef _USE_CUDA
 
 #include "reg_test_common.h"
 #include "_reg_tools.h"
@@ -24,12 +23,12 @@ class NMITest {
         std::uniform_real_distribution<float> distr(2, 65);
 
         // Create reference and floating 2D images
-        vector<NiftiImage::dim_t> dim{ 16, 16 };
+        vector<NiftiImage::dim_t> dim{ 60, 62 };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create reference and floating 3D images
-        dim.push_back(16);
+        dim.push_back(64);
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 

From a10fe1d8cf49e07784d1bc64e37de70bb02e7e28 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Mon, 24 Jul 2023 13:38:49 +0100
Subject: [PATCH 170/314] #72 #92 Added an error message when ln is set to 0

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/_reg_base.cpp      | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 336dd5e3..8408670a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-289
+290
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 87963d8a..19448a59 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -178,7 +178,12 @@ void reg_base<T>::SetWarpedPaddingValue(float warpedPaddingValueIn) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLevelNumber(unsigned levelNumberIn) {
+    if(levelNumberIn>0)
     levelNumber = levelNumberIn;
+    else{
+        reg_print_msg_error("The number of level is expected to be strictly positive. Exit");
+        reg_exit();
+    }
 #ifndef NDEBUG
     reg_print_fct_debug("reg_base<T>::SetLevelNumber");
 #endif

From 7204698fa74eab187e47138d8f467c16dc785129 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 24 Jul 2023 14:32:50 +0100
Subject: [PATCH 171/314] Add symmetric scheme support for reg_optimiser_gpu
 and reg_conjugateGradient_gpu #92

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/Platform.cpp                     |   2 +-
 reg-lib/cpu/_reg_optimiser.cpp           |  68 ++++----
 reg-lib/cpu/_reg_optimiser.h             |  34 ++--
 reg-lib/cuda/BlockSize.hpp               |  12 +-
 reg-lib/cuda/CMakeLists.txt              |   2 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu |  30 ++--
 reg-lib/cuda/_reg_optimiser_gpu.cu       | 190 +++++++++++++++--------
 reg-lib/cuda/_reg_optimiser_gpu.h        |  63 ++++----
 reg-lib/cuda/_reg_optimiser_kernels.cu   |   4 +-
 10 files changed, 235 insertions(+), 172 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8408670a..8641ad81 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-290
+291
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 170101f4..86fc226f 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -163,7 +163,7 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
                           optimiseY,
                           optimiseZ,
                           maxIterationNumber,
-                          0, // currentIterationNumber,
+                          0,
                           &opt,
                           controlPointGridData,
                           transformationGradientData,
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index 4b624b22..db71d20f 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -18,7 +18,7 @@ reg_optimiser<T>::reg_optimiser() {
     this->currentDofBw = nullptr;
     this->bestDof = nullptr;
     this->bestDofBw = nullptr;
-    this->isBackwards = false;
+    this->isSymmetric = false;
     this->gradient = nullptr;
     this->currentIterationNumber = 0;
     this->currentObjFunctionValue = 0;
@@ -69,23 +69,21 @@ void reg_optimiser<T>::Initialise(size_t nvox,
     this->maxIterationNumber = maxIt;
     this->currentIterationNumber = startIt;
     this->currentDof = cppData;
-    if (this->bestDof != nullptr) free(this->bestDof);
+    this->gradient = gradData;
+
+    if (this->bestDof) free(this->bestDof);
     this->bestDof = (T*)malloc(this->dofNumber * sizeof(T));
-    memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
-    if (gradData)
-        this->gradient = gradData;
 
-    if (nvoxBw > 0)
+    this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw;
+    if (this->isSymmetric) {
         this->dofNumberBw = nvoxBw;
-    if (cppDataBw) {
         this->currentDofBw = cppDataBw;
-        this->isBackwards = true;
-        if (this->bestDofBw != nullptr) free(this->bestDofBw);
+        this->gradientBw = gradDataBw;
+        if (this->bestDofBw) free(this->bestDofBw);
         this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T));
-        memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T));
     }
-    if (gradDataBw)
-        this->gradientBw = gradDataBw;
+
+    this->StoreCurrentDof();
 
     this->intOpt = intOpt;
     this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
@@ -97,25 +95,25 @@ void reg_optimiser<T>::Initialise(size_t nvox,
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::RestoreBestDof() {
-    // restore forward transformation
+    // Restore forward transformation
     memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T));
-    // restore backward transformation if required
-    if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0)
+    // Restore backward transformation if required
+    if (this->isSymmetric)
         memcpy(this->currentDofBw, this->bestDofBw, this->dofNumberBw * sizeof(T));
 }
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::StoreCurrentDof() {
-    // save forward transformation
+    // Save forward transformation
     memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
-    // save backward transformation if required
-    if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0)
+    // Save backward transformation if required
+    if (this->isSymmetric)
         memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T));
 }
 /* *************************************************************** */
 template <class T>
 void reg_optimiser<T>::Perturbation(float length) {
-    // initialise the randomiser
+    // Initialise the randomiser
     srand((unsigned)time(nullptr));
     // Reset the number of iteration
     this->currentIterationNumber = 0;
@@ -123,7 +121,7 @@ void reg_optimiser<T>::Perturbation(float length) {
     for (size_t i = 0; i < this->dofNumber; ++i) {
         this->currentDof[i] = this->bestDof[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f);
     }
-    if (this->isBackwards) {
+    if (this->isSymmetric) {
         for (size_t i = 0; i < this->dofNumberBw; ++i) {
             this->currentDofBw[i] = this->bestDofBw[i] + length * (float)(rand() % 2001 - 1000) / 1000.f;
         }
@@ -195,10 +193,9 @@ void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
 template <class T>
 reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
     this->array1 = nullptr;
-    this->array2 = nullptr;
     this->array1Bw = nullptr;
+    this->array2 = nullptr;
     this->array2Bw = nullptr;
-
 #ifndef NDEBUG
     reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
 #endif
@@ -210,22 +207,18 @@ reg_conjugateGradient<T>::~reg_conjugateGradient() {
         free(this->array1);
         this->array1 = nullptr;
     }
-
-    if (this->array2) {
-        free(this->array2);
-        this->array2 = nullptr;
-    }
-
     if (this->array1Bw) {
         free(this->array1Bw);
         this->array1Bw = nullptr;
     }
-
+    if (this->array2) {
+        free(this->array2);
+        this->array2 = nullptr;
+    }
     if (this->array2Bw) {
         free(this->array2Bw);
         this->array2Bw = nullptr;
     }
-
 #ifndef NDEBUG
     reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
 #endif
@@ -252,7 +245,7 @@ void reg_conjugateGradient<T>::Initialise(size_t nvox,
     this->array1 = (T*)malloc(this->dofNumber * sizeof(T));
     this->array2 = (T*)malloc(this->dofNumber * sizeof(T));
 
-    if (cppDataBw && gradDataBw && nvoxBw > 0) {
+    if (this->isSymmetric) {
         if (this->array1Bw) free(this->array1Bw);
         if (this->array2Bw) free(this->array2Bw);
         this->array1Bw = (T*)malloc(this->dofNumberBw * sizeof(T));
@@ -296,7 +289,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         for (i = 0; i < num; i++) {
             array2Ptr[i] = array1Ptr[i] = -gradientPtr[i];
         }
-        if (this->dofNumberBw > 0) {
+        if (this->isSymmetric) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw)
@@ -323,7 +316,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         }
         double gam = dgg / gg;
 
-        if (this->dofNumberBw > 0) {
+        if (this->isSymmetric) {
             double dggBw = 0, ggBw = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -346,7 +339,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
             array2Ptr[i] = static_cast<T>(array1Ptr[i] + gam * array2Ptr[i]);
             gradientPtr[i] = -array2Ptr[i];
         }
-        if (this->dofNumberBw > 0) {
+        if (this->isSymmetric) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw,gam)
@@ -365,9 +358,7 @@ void reg_conjugateGradient<T>::Optimise(T maxLength,
                                         T smallLength,
                                         T &startLength) {
     this->UpdateGradientValues();
-    reg_optimiser<T>::Optimise(maxLength,
-                               smallLength,
-                               startLength);
+    reg_optimiser<T>::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
 template <class T>
@@ -377,8 +368,7 @@ void reg_conjugateGradient<T>::Perturbation(float length) {
 }
 /* *************************************************************** */
 template <class T>
-reg_lbfgs<T>::reg_lbfgs()
-    :reg_optimiser<T>::reg_optimiser() {
+reg_lbfgs<T>::reg_lbfgs(): reg_optimiser<T>::reg_optimiser() {
     this->stepToKeep = 5;
     this->oldDof = nullptr;
     this->oldGrad = nullptr;
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h
index 6f0b7835..6ada7867 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/cpu/_reg_optimiser.h
@@ -29,7 +29,7 @@ class InterfaceOptimiser {
 template <class T>
 class reg_optimiser {
 protected:
-    bool isBackwards;
+    bool isSymmetric;
     size_t dofNumber;
     size_t dofNumberBw;
     size_t ndim;
@@ -131,10 +131,10 @@ class reg_optimiser {
                             size_t startIt,
                             InterfaceOptimiser *intOpt,
                             T *cppData,
-                            T *gradData = nullptr,
-                            size_t nvoxBw = 0,
-                            T *cppDataBw = nullptr,
-                            T *gradDataBw = nullptr);
+                            T *gradData,
+                            size_t nvoxBw,
+                            T *cppDataBw,
+                            T *gradDataBw);
     virtual void Optimise(T maxLength,
                           T smallLength,
                           T& startLength);
@@ -169,14 +169,14 @@ class reg_conjugateGradient: public reg_optimiser<T> {
                             size_t maxIt,
                             size_t startIt,
                             InterfaceOptimiser *intOpt,
-                            T *cppData = nullptr,
-                            T *gradData = nullptr,
-                            size_t nvoxBw = 0,
-                            T *cppDataBw = nullptr,
-                            T *gradDataBw = nullptr) override;
+                            T *cppData,
+                            T *gradData,
+                            size_t nvoxBw,
+                            T *cppDataBw,
+                            T *gradDataBw) override;
     virtual void Optimise(T maxLength,
                           T smallLength,
-                          T &startLength) override;
+                          T& startLength) override;
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
@@ -208,14 +208,14 @@ class reg_lbfgs: public reg_optimiser<T> {
                             size_t maxIt,
                             size_t startIt,
                             InterfaceOptimiser *intOpt,
-                            T *cppData = nullptr,
-                            T *gradData = nullptr,
-                            size_t nvoxBw = 0,
-                            T *cppDataBw = nullptr,
-                            T *gradDataBw = nullptr) override;
+                            T *cppData,
+                            T *gradData,
+                            size_t nvoxBw,
+                            T *cppDataBw,
+                            T *gradDataBw) override;
     virtual void Optimise(T maxLength,
                           T smallLength,
-                          T &startLength) override;
+                          T& startLength) override;
 };
 /* *************************************************************** */
 #include "_reg_optimiser.cpp"
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 68880b58..101ece57 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -57,8 +57,8 @@ struct BlockSize {
     unsigned reg_defField_getJacobianMatrix;
     /* _reg_optimiser_gpu */
     unsigned reg_initialiseConjugateGradient;
-    unsigned reg_GetConjugateGradient1;
-    unsigned reg_GetConjugateGradient2;
+    unsigned reg_getConjugateGradient1;
+    unsigned reg_getConjugateGradient2;
     unsigned GetMaximalLength;
     unsigned reg_updateControlPointPosition;
     /* _reg_ssd_gpu */
@@ -122,8 +122,8 @@ struct BlockSize100: public BlockSize {
         reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
         /* _reg_optimiser_gpu */
         reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
-        reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem
-        reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem
+        reg_getConjugateGradient1 = 320; // 12 reg - 24 smem
+        reg_getConjugateGradient2 = 384; // 10 reg - 40 smem
         GetMaximalLength = 384; // 04 reg - 24 smem
         reg_updateControlPointPosition = 384; // 08 reg - 24 smem
         /* _reg_ssd_gpu */
@@ -191,8 +191,8 @@ struct BlockSize300: public BlockSize {
         reg_defField_getJacobianMatrix = 768; // 34 reg
         /* _reg_optimiser_gpu */
         reg_initialiseConjugateGradient = 1024; // 20 reg
-        reg_GetConjugateGradient1 = 1024; // 22 reg
-        reg_GetConjugateGradient2 = 1024; // 25 reg
+        reg_getConjugateGradient1 = 1024; // 22 reg
+        reg_getConjugateGradient2 = 1024; // 25 reg
         GetMaximalLength = 1024; // 20 reg
         reg_updateControlPointPosition = 1024; // 22 reg
         /* _reg_ssd_gpu */
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 41d6ae7c..28f46f4b 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -38,7 +38,7 @@ else(NOT COMPILE_RESULT_VAR)
     endif()
     #adjust for debug and release versions
     if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G -lineinfo")
+        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G")
     else(CMAKE_BUILD_TYPE STREQUAL "Debug")
         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-O3")
     endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 8de94c04..2137a714 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -8,49 +8,53 @@
 #pragma once
 
 /* *************************************************************** */
-__device__ __inline__ float2 operator*(float a, float2 b) {
+__device__ __inline__ float2 operator*(const float& a, const float2& b) {
     return { a * b.x, a * b.y };
 }
-__device__ __inline__ float3 operator*(float a, float3 b) {
+__device__ __inline__ float3 operator*(const float& a, const float3& b) {
     return { a * b.x, a * b.y, a * b.z };
 }
-__device__ __inline__ float3 operator*(float3 a, float3 b) {
+__device__ __inline__ float3 operator*(const float3& a, const float3& b) {
     return { a.x * b.x, a.y * b.y, a.z * b.z };
 }
-__device__ __inline__ float4 operator*(float4 a, float4 b) {
+__device__ __inline__ float4 operator*(const float4& a, const float4& b) {
     return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
 }
-__device__ __inline__ float4 operator*(float a, float4 b) {
+__device__ __inline__ float4 operator*(const float& a, const float4& b) {
     return { a * b.x, a * b.y, a * b.z, 0.0f };
 }
 /* *************************************************************** */
-__device__ __inline__ float2 operator/(float2 a, float2 b) {
+__device__ __inline__ float2 operator/(const float2& a, const float2& b) {
     return { a.x / b.x, a.y / b.y };
 }
-__device__ __inline__ float3 operator/(float3 a, float b) {
+__device__ __inline__ float3 operator/(const float3& a, const float& b) {
     return { a.x / b, a.y / b, a.z / b };
 }
-__device__ __inline__ float3 operator/(float3 a, float3 b) {
+__device__ __inline__ float3 operator/(const float3& a, const float3& b) {
     return { a.x / b.x, a.y / b.y, a.z / b.z };
 }
 /* *************************************************************** */
-__device__ __inline__ float2 operator+(float2 a, float2 b) {
+__device__ __inline__ float2 operator+(const float2& a, const float2& b) {
     return { a.x + b.x, a.y + b.y };
 }
-__device__ __inline__ float4 operator+(float4 a, float4 b) {
+__device__ __inline__ float4 operator+(const float4& a, const float4& b) {
     return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f };
 }
-__device__ __inline__ float3 operator+(float3 a, float3 b) {
+__device__ __inline__ float3 operator+(const float3& a, const float3& b) {
     return { a.x + b.x, a.y + b.y, a.z + b.z };
 }
 /* *************************************************************** */
-__device__ __inline__ float3 operator-(float3 a, float3 b) {
+__device__ __inline__ float3 operator-(const float3& a, const float3& b) {
     return { a.x - b.x, a.y - b.y, a.z - b.z };
 }
-__device__ __inline__ float4 operator-(float4 a, float4 b) {
+__device__ __inline__ float4 operator-(const float4& a, const float4& b) {
     return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f };
 }
 /* *************************************************************** */
+__device__ __inline__ double2 operator+(const double2& a, const double2& b) {
+    return { a.x + b.x, a.y + b.y };
+}
+/* *************************************************************** */
 __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
     out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
     out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index ac1d1d79..fc4b9ead 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -1,14 +1,20 @@
 #include "_reg_optimiser_gpu.h"
 #include "_reg_optimiser_kernels.cu"
+#include "_reg_common_cuda_kernels.cu"
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include <thrust/inner_product.h>
 
 /* *************************************************************** */
 reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
     this->currentDofCuda = nullptr;
+    this->currentDofBwCuda = nullptr;
     this->bestDofCuda = nullptr;
+    this->bestDofBwCuda = nullptr;
     this->gradientCuda = nullptr;
-
+    this->gradientBwCuda = nullptr;
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n");
+    reg_print_msg_debug("reg_optimiser_gpu::reg_optimiser_gpu() called\n");
 #endif
 }
 /* *************************************************************** */
@@ -17,8 +23,12 @@ reg_optimiser_gpu::~reg_optimiser_gpu() {
         cudaCommon_free(this->bestDofCuda);
         this->bestDofCuda = nullptr;
     }
+    if (this->bestDofBwCuda) {
+        cudaCommon_free(this->bestDofBwCuda);
+        this->bestDofBwCuda = nullptr;
+    }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
+    reg_print_msg_debug("reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
 #endif
 }
 /* *************************************************************** */
@@ -42,45 +52,53 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     this->optimiseZ = optZ;
     this->maxIterationNumber = maxIt;
     this->currentIterationNumber = startIt;
-
-    // Arrays are converted from float to float4
     this->currentDofCuda = reinterpret_cast<float4*>(cppData);
+    this->gradientCuda = reinterpret_cast<float4*>(gradData);
 
-    if (gradData)
-        this->gradientCuda = reinterpret_cast<float4*>(gradData);
-
-    if (this->bestDofCuda)
-        cudaCommon_free(this->bestDofCuda);
-
-    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)(this->GetVoxNumber()))) {
-        printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n");
+    cudaCommon_free(this->bestDofCuda);
+    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)this->GetVoxNumber())) {
+        reg_print_fct_error("reg_optimiser_gpu::Initialise()");
+        reg_print_msg_error("Error when allocating the best control point array on the GPU");
         reg_exit();
     }
 
+    this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw;
+    if (this->isSymmetric) {
+        this->dofNumberBw = nvoxBw;
+        this->currentDofBwCuda = reinterpret_cast<float4*>(cppDataBw);
+        this->gradientBwCuda = reinterpret_cast<float4*>(gradDataBw);
+        cudaCommon_free(this->bestDofBwCuda);
+        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, (int)this->GetVoxNumberBw())) {
+            reg_print_fct_error("reg_optimiser_gpu::Initialise()");
+            reg_print_msg_error("Error when allocating the best control point backwards array on the GPU");
+            reg_exit();
+        }
+    }
+
     this->StoreCurrentDof();
 
     this->intOpt = intOpt;
     this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n");
+    reg_print_msg_debug("reg_optimiser_gpu::Initialise() called");
 #endif
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::RestoreBestDof() {
-    // restore forward transformation
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda,
-                                 this->bestDofCuda,
-                                 this->GetVoxNumber() * sizeof(float4),
-                                 cudaMemcpyDeviceToDevice));
+    // Restore forward transformation
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, this->bestDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice));
+    // Restore backward transformation if required
+    if (this->isSymmetric)
+        NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofBwCuda, this->bestDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::StoreCurrentDof() {
     // Store forward transformation
-    NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda,
-                                 this->currentDofCuda,
-                                 this->GetVoxNumber() * sizeof(float4),
-                                 cudaMemcpyDeviceToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, this->currentDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice));
+    // Store backward transformation if required
+    if (this->isSymmetric)
+        NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofBwCuda, this->currentDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::Perturbation(float length) {
@@ -89,9 +107,11 @@ void reg_optimiser_gpu::Perturbation(float length) {
 /* *************************************************************** */
 reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() {
     this->array1 = nullptr;
+    this->array1Bw = nullptr;
     this->array2 = nullptr;
+    this->array2Bw = nullptr;
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n");
+    reg_print_msg_debug("reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called");
 #endif
 }
 /* *************************************************************** */
@@ -100,13 +120,20 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
         cudaCommon_free(this->array1);
         this->array1 = nullptr;
     }
-
+    if (this->array1Bw) {
+        cudaCommon_free(this->array1Bw);
+        this->array1Bw = nullptr;
+    }
     if (this->array2) {
         cudaCommon_free(this->array2);
         this->array2 = nullptr;
     }
+    if (this->array2Bw) {
+        cudaCommon_free(this->array2Bw);
+        this->array2Bw = nullptr;
+    }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n");
+    reg_print_msg_debug("reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called");
 #endif
 }
 /* *************************************************************** */
@@ -123,43 +150,46 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
                                            size_t nvoxBw,
                                            float *cppDataBw,
                                            float *gradDataBw) {
-    reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData);
+    reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->firstCall = true;
-    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, (int)(this->GetVoxNumber()))) {
-        printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient array on the GPU.\n");
+    cudaCommon_free(this->array1); cudaCommon_free(this->array2);
+    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, (int)this->GetVoxNumber()) ||
+        cudaCommon_allocateArrayToDevice<float4>(&this->array2, (int)this->GetVoxNumber())) {
+        reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
+        reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU");
         reg_exit();
     }
-    if (cudaCommon_allocateArrayToDevice<float4>(&this->array2, (int)(this->GetVoxNumber()))) {
-        printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient array on the GPU.\n");
-        reg_exit();
+    if (this->isSymmetric) {
+        cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw);
+        if (cudaCommon_allocateArrayToDevice<float4>(&this->array1Bw, (int)this->GetVoxNumberBw()) ||
+            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, (int)this->GetVoxNumberBw())) {
+            reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
+            reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU");
+            reg_exit();
+        }
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n");
+    reg_print_msg_debug("reg_conjugateGradient_gpu::Initialise() called");
 #endif
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::UpdateGradientValues() {
     if (this->firstCall) {
-        reg_initialiseConjugateGradient_gpu(this->gradientCuda,
-                                            this->array1,
-                                            this->array2,
-                                            this->GetVoxNumber());
+        reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
+        if (this->isSymmetric)
+            reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
         this->firstCall = false;
     } else {
-        reg_GetConjugateGradient_gpu(this->gradientCuda,
-                                     this->array1,
-                                     this->array2,
-                                     this->GetVoxNumber());
+        reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
+                                     this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
     }
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::Optimise(float maxLength,
                                          float smallLength,
-                                         float &startLength) {
+                                         float& startLength) {
     this->UpdateGradientValues();
-    reg_optimiser::Optimise(maxLength,
-                            smallLength,
-                            startLength);
+    reg_optimiser::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::Perturbation(float length) {
@@ -184,46 +214,78 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
     NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* *************************************************************** */
-void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
+struct Float2Sum {
+    __host__ __device__ double2 operator()(const float2& a, const float2& b) const {
+        return make_double2((double)a.x + (double)b.x, (double)a.y + (double)b.y);
+    }
+};
+/* *************************************************************** */
+void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGCuda,
                                   float4 *conjugateHCuda,
-                                  const size_t& nVoxels) {
+                                  const size_t& nVoxels,
+                                  const bool& isSymmetric,
+                                  float4 *gradientImageBwCuda,
+                                  float4 *conjugateGBwCuda,
+                                  float4 *conjugateHBwCuda,
+                                  const size_t& nVoxelsBw) {
     auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
                                                                nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear,
                                                             nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear,
                                                             nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr);
+    if (isSymmetric) {
+        gradientImageBwTexture = std::move(cudaCommon_createTextureObject(gradientImageBwCuda, cudaResourceTypeLinear,
+                                                                          nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        conjugateGBwTexture = std::move(cudaCommon_createTextureObject(conjugateGBwCuda, cudaResourceTypeLinear,
+                                                                       nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        conjugateHBwTexture = std::move(cudaCommon_createTextureObject(conjugateHBwCuda, cudaResourceTypeLinear,
+                                                                       nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+    }
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1;
+    unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient1;
     unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     dim3 blockDims(blocks, 1, 1);
     dim3 gridDims(grids, grids, 1);
 
-    float2 *sumsCuda;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2)));
-    reg_GetConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
+    thrust::device_vector<float2> sumsCuda(nVoxels + nVoxels % 2);  // Make it even for thrust::inner_product
+    reg_getConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda.data().get(), *gradientImageTexture,
+                                                              *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    float2 *sums;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2)));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(sums, sumsCuda, nVoxels * sizeof(float2), cudaMemcpyDeviceToHost));
-    NR_CUDA_SAFE_CALL(cudaFree(sumsCuda));
-    double dgg = 0;
-    double gg = 0;
-    for (size_t i = 0; i < nVoxels; i++) {
-        dgg += sums[i].x;
-        gg += sums[i].y;
+    const size_t sumsSizeHalf = sumsCuda.size() / 2;
+    const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf,
+                                             make_double2(0, 0), thrust::plus<double2>(), Float2Sum());
+    float gam = static_cast<float>(gg.x / gg.y);
+    if (isSymmetric) {
+        grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks));
+        gridDims = dim3(blocks, 1, 1);
+        blockDims = dim3(grids, grids, 1);
+        thrust::device_vector<float2> sumsBwCuda(nVoxelsBw + nVoxelsBw % 2);  // Make it even for thrust::inner_product
+        reg_getConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsBwCuda.data().get(), *gradientImageBwTexture,
+                                                                  *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2;
+        const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf,
+                                                   make_double2(0, 0), thrust::plus<double2>(), Float2Sum());
+        gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
     }
-    const float gam = (float)(dgg / gg);
-    NR_CUDA_SAFE_CALL(cudaFreeHost(sums));
 
-    blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient2;
+    blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient2;
     grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     gridDims = dim3(blocks, 1, 1);
     blockDims = dim3(grids, grids, 1);
-    reg_GetConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
+    reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    if (isSymmetric) {
+        grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks));
+        gridDims = dim3(blocks, 1, 1);
+        blockDims = dim3(grids, grids, 1);
+        reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    }
 }
 /* *************************************************************** */
 void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 3f602b17..de8d818f 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -10,27 +10,35 @@
  */
 class reg_optimiser_gpu: public reg_optimiser<float> {
 protected:
-    float4 *currentDofCuda; // pointers
-    float4 *gradientCuda; // pointers
-    float4 *bestDofCuda; // allocated here
+    float4 *currentDofCuda, *currentDofBwCuda;
+    float4 *bestDofCuda, *bestDofBwCuda;
+    float4 *gradientCuda, *gradientBwCuda;
 
 public:
     reg_optimiser_gpu();
     virtual ~reg_optimiser_gpu();
+    virtual void StoreCurrentDof() override;
+    virtual void RestoreBestDof() override;
 
-    // Float4 are casted to float for compatibility with the cpu class
+    // float4s are casted to floats for compatibility with the CPU class
     virtual float* GetCurrentDof() override {
         return reinterpret_cast<float*>(this->currentDofCuda);
     }
+    virtual float* GetCurrentDofBw() override {
+        return reinterpret_cast<float*>(this->currentDofBwCuda);
+    }
     virtual float* GetBestDof() override {
         return reinterpret_cast<float*>(this->bestDofCuda);
     }
+    virtual float* GetBestDofBw() override {
+        return reinterpret_cast<float*>(this->bestDofBwCuda);
+    }
     virtual float* GetGradient() override {
         return reinterpret_cast<float*>(this->gradientCuda);
     }
-
-    virtual void RestoreBestDof() override;
-    virtual void StoreCurrentDof() override;
+    virtual float* GetGradientBw() override {
+        return reinterpret_cast<float*>(this->gradientBwCuda);
+    }
 
     virtual void Initialise(size_t nvox,
                             int ndim,
@@ -38,13 +46,13 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
                             bool optY,
                             bool optZ,
                             size_t maxIt,
-                            size_t start,
+                            size_t startIt,
                             InterfaceOptimiser *intOpt,
                             float *cppData,
-                            float *gradData = nullptr,
-                            size_t nvoxBw = 0,
-                            float *cppDataBw = nullptr,
-                            float *gradDataBw = nullptr) override;
+                            float *gradData,
+                            size_t nvoxBw,
+                            float *cppDataBw,
+                            float *gradDataBw) override;
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
@@ -53,8 +61,8 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
  */
 class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
 protected:
-    float4 *array1;
-    float4 *array2;
+    float4 *array1, *array1Bw;
+    float4 *array2, *array2Bw;
     bool firstCall;
 
 #ifdef NR_TESTING
@@ -72,37 +80,36 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
                             bool optY,
                             bool optZ,
                             size_t maxIt,
-                            size_t start,
+                            size_t startIt,
                             InterfaceOptimiser *intOpt,
                             float *cppData,
-                            float *gradData = nullptr,
-                            size_t nvoxBw = 0,
-                            float *cppDataBw = nullptr,
-                            float *gradDataBw = nullptr) override;
+                            float *gradData,
+                            size_t nvoxBw,
+                            float *cppDataBw,
+                            float *gradDataBw) override;
     virtual void Optimise(float maxLength,
                           float smallLength,
-                          float &startLength) override;
+                          float& startLength) override;
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-/** @brief
- */
 extern "C++"
 void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
                                          const size_t& nVoxels);
 /* *************************************************************** */
-/** @brief
- */
 extern "C++"
-void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda,
+void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGCuda,
                                   float4 *conjugateHCuda,
-                                  const size_t& nVoxels);
+                                  const size_t& nVoxels,
+                                  const bool& isSymmetric,
+                                  float4 *gradientImageBwCuda,
+                                  float4 *conjugateGBwCuda,
+                                  float4 *conjugateHBwCuda,
+                                  const size_t& nVoxelsBw);
 /* *************************************************************** */
-/** @brief
- */
 extern "C++"
 void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
                                         float4 *controlPointImageCuda,
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 33032095..2ebb18f5 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -9,7 +9,7 @@ __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda,
     }
 }
 /* *************************************************************** */
-__global__ void reg_GetConjugateGradient1_kernel(float2 *sums,
+__global__ void reg_getConjugateGradient1_kernel(float2 *sums,
                                                  cudaTextureObject_t gradientImageTexture,
                                                  cudaTextureObject_t conjugateGTexture,
                                                  cudaTextureObject_t conjugateHTexture,
@@ -27,7 +27,7 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums,
     }
 }
 /* *************************************************************** */
-__global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda,
+__global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda,
                                                  float4 *conjugateGCuda,
                                                  float4 *conjugateHCuda,
                                                  const unsigned nVoxels,

From 6b33dcef2eb0393226af41115f862fe59a7a300f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 24 Jul 2023 14:38:53 +0100
Subject: [PATCH 172/314] Add symmetric scheme support for
 reg_test_conjugateGradient #92

---
 niftyreg_build_version.txt              |   2 +-
 reg-test/reg_test_conjugateGradient.cpp | 169 +++++++++++++++++-------
 2 files changed, 124 insertions(+), 47 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8641ad81..f20bd4ef 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-291
+292
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 39e3195e..a5ff8f44 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -14,8 +14,8 @@
 
 class ConjugateGradientTest: public InterfaceOptimiser {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool, bool, float>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, unique_ptr<F3dContent>, TestData, bool, bool, bool, float>;
 
     inline static vector<TestCase> testCases;
 
@@ -54,13 +54,17 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         // Generate the different test cases
         // Test 2D
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGridBw2d(controlPointGrid2d);
         NiftiImage bestControlPointGrid2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
         NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage transformationGradientBw2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto bestCpp2dPtr = bestControlPointGrid2d.data();
         auto transGrad2dPtr = transformationGradient2d.data();
+        auto transGradBw2dPtr = transformationGradientBw2d.data();
         for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) {
             bestCpp2dPtr[i] = distr(gen);
             transGrad2dPtr[i] = distr(gen);
+            transGradBw2dPtr[i] = distr(gen);
         }
 
         // Add the test data
@@ -69,19 +73,25 @@ class ConjugateGradientTest: public InterfaceOptimiser {
             "2D",
             std::move(reference2d),
             std::move(controlPointGrid2d),
+            std::move(controlPointGridBw2d),
             std::move(bestControlPointGrid2d),
-            std::move(transformationGradient2d)
+            std::move(transformationGradient2d),
+            std::move(transformationGradientBw2d)
         ));
 
         // Test 3D
         NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        NiftiImage controlPointGridBw3d(controlPointGrid3d);
         NiftiImage bestControlPointGrid3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
         NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage transformationGradientBw3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto bestCpp3dPtr = bestControlPointGrid3d.data();
         auto transGrad3dPtr = transformationGradient3d.data();
+        auto transGradBw3dPtr = transformationGradientBw3d.data();
         for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) {
             bestCpp3dPtr[i] = distr(gen);
             transGrad3dPtr[i] = distr(gen);
+            transGradBw3dPtr[i] = distr(gen);
         }
 
         // Add the test data
@@ -89,8 +99,10 @@ class ConjugateGradientTest: public InterfaceOptimiser {
             "3D",
             std::move(reference3d),
             std::move(controlPointGrid3d),
+            std::move(controlPointGridBw3d),
             std::move(bestControlPointGrid3d),
-            std::move(transformationGradient3d)
+            std::move(transformationGradient3d),
+            std::move(transformationGradientBw3d)
         ));
 
         // Add platforms, optimise*, and scale to the test data
@@ -104,10 +116,11 @@ class ConjugateGradientTest: public InterfaceOptimiser {
                         for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
                             // Make a copy of the test data
                             auto td = testData;
-                            auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = td;
+                            auto&& [testName, reference, controlPointGrid, controlPointGridBw, bestControlPointGrid, transGrad, transGradBw] = td;
                             // Add content
                             unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                            testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) });
+                            unique_ptr<F3dContent> contentBw{ contentCreator->Create(reference, reference, controlPointGridBw) };
+                            testCases.push_back({ platform, std::move(content), std::move(contentBw), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) });
                         }
                     }
                 }
@@ -148,23 +161,34 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         }
     }
 
-    void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall) {
+    void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall, const bool& isSymmetric, NiftiImage *gradientBw) {
         // Create array1 and array2
-        static NiftiImage array1;
-        static NiftiImage array2;
+        static NiftiImage array1, array1Bw;
+        static NiftiImage array2, array2Bw;
         if (firstCall) {
-            array1 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData);
-            array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData);
+            array1 = array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData);
+            if (isSymmetric)
+                array1Bw = array2Bw = NiftiImage(*gradientBw, NiftiImage::Copy::ImageInfoAndAllocData);
         }
 
         auto gradientPtr = gradient.data();
         auto array1Ptr = array1.data();
         auto array2Ptr = array2.data();
+        NiftiImageData gradientBwPtr, array1BwPtr, array2BwPtr;
+        if (isSymmetric) {
+            gradientBwPtr = gradientBw->data();
+            array1BwPtr = array1Bw.data();
+            array2BwPtr = array2Bw.data();
+        }
 
         if (firstCall) {
             // Initialise array1 and array2
             for (size_t i = 0; i < gradient.nVoxels(); i++)
                 array2Ptr[i] = array1Ptr[i] = -static_cast<float>(gradientPtr[i]);
+            if (isSymmetric) {
+                for (size_t i = 0; i < gradientBw->nVoxels(); i++)
+                    array2BwPtr[i] = array1BwPtr[i] = -static_cast<float>(gradientBwPtr[i]);
+            }
         } else {
             // Calculate gam
             double dgg = 0, gg = 0;
@@ -172,7 +196,15 @@ class ConjugateGradientTest: public InterfaceOptimiser {
                 gg += static_cast<float>(array2Ptr[i]) * static_cast<float>(array1Ptr[i]);
                 dgg += (static_cast<float>(gradientPtr[i]) + static_cast<float>(array1Ptr[i])) * static_cast<float>(gradientPtr[i]);
             }
-            const double gam = dgg / gg;
+            double gam = dgg / gg;
+            if (isSymmetric) {
+                double dggBw = 0, ggBw = 0;
+                for (size_t i = 0; i < gradientBw->nVoxels(); i++) {
+                    ggBw += static_cast<float>(array2BwPtr[i]) * static_cast<float>(array1BwPtr[i]);
+                    dggBw += (static_cast<float>(gradientBwPtr[i]) + static_cast<float>(array1BwPtr[i])) * static_cast<float>(gradientBwPtr[i]);
+                }
+                gam = (dgg + dggBw) / (gg + ggBw);
+            }
 
             // Update gradient values
             for (size_t i = 0; i < gradient.nVoxels(); i++) {
@@ -180,6 +212,13 @@ class ConjugateGradientTest: public InterfaceOptimiser {
                 array2Ptr[i] = static_cast<float>(array1Ptr[i]) + gam * static_cast<float>(array2Ptr[i]);
                 gradientPtr[i] = -static_cast<float>(array2Ptr[i]);
             }
+            if (isSymmetric) {
+                for (size_t i = 0; i < gradientBw->nVoxels(); i++) {
+                    array1BwPtr[i] = -static_cast<float>(gradientBwPtr[i]);
+                    array2BwPtr[i] = static_cast<float>(array1BwPtr[i]) + gam * static_cast<float>(array2BwPtr[i]);
+                    gradientBwPtr[i] = -static_cast<float>(array2BwPtr[i]);
+                }
+            }
         }
     }
 
@@ -193,8 +232,8 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase;
-        auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = testData;
+        auto&& [platform, content, contentBw, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase;
+        auto&& [testName, reference, controlPointGrid, controlPointGridBw, bestControlPointGrid, transGrad, transGradBw] = testData;
         const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale);
 
         SECTION(sectionName) {
@@ -207,11 +246,15 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             img.disown();
             content->UpdateControlPointGrid();
 
-            // Set the transformation gradient
+            // Set the transformation gradients
             img = content->GetTransformationGradient();
             img.copyData(transGrad);
             img.disown();
             content->UpdateTransformationGradient();
+            img = contentBw->GetTransformationGradient();
+            img.copyData(transGradBw);
+            img.disown();
+            contentBw->UpdateTransformationGradient();
 
             // Create a copy of the control point grid for expected results
             NiftiImage controlPointGridExpected = bestControlPointGrid;
@@ -237,41 +280,75 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             // Update the gradient values
             // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations
             if (!optimiseX && !optimiseY && !optimiseZ) {
-                std::cout << "\n**************** UpdateGradientValues " << sectionName << " ****************" << std::endl;
-
-                // Initialise the conjugate gradient
-                optimiser->UpdateGradientValues();
-                UpdateGradientValues(transGrad, true);
-                // Fill the gradient with random values
-                std::random_device rd;
-                std::mt19937 gen(rd());
-                std::uniform_real_distribution<float> distr(0, 1);
-                auto gradientPtr = transGrad.data();
-                for (size_t i = 0; i < transGrad.nVoxels(); i++)
-                    gradientPtr[i] = distr(gen);
-                // Update the transformation gradient
-                img = content->GetTransformationGradient();
-                img.copyData(transGrad);
-                img.disown();
-                content->UpdateTransformationGradient();
-                // Get the gradient values
-                optimiser->UpdateGradientValues();
-                UpdateGradientValues(transGrad, false);
-
-                // Check the results
-                img = content->GetTransformationGradient();
-                const auto gradPtr = img.data();
-                const auto gradExpPtr = transGrad.data();
-                img.disown();
-                for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
-                    const float gradVal = gradPtr[i];
-                    const float gradExpVal = gradExpPtr[i];
-                    std::cout << i << " " << gradVal << " " << gradExpVal << std::endl;
-                    REQUIRE(fabs(gradVal - gradExpVal) < EPS);
+                for (int isSymmetric = 0; isSymmetric < 2; isSymmetric++) {
+                    std::cout << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl;
+
+                    // Create a random number generator
+                    std::random_device rd;
+                    std::mt19937 gen(rd());
+                    std::uniform_real_distribution<float> distr(0, 1);
+
+                    // Create a symmetric optimiser if required
+                    if (isSymmetric)
+                        optimiser.reset(platform->template CreateOptimiser<float>(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ, contentBw.get()));
+
+                    // Initialise the conjugate gradients
+                    optimiser->UpdateGradientValues();
+                    UpdateGradientValues(transGrad, true, isSymmetric, &transGradBw);
+
+                    // Fill the gradients with random values
+                    auto gradientPtr = transGrad.data();
+                    auto gradientBwPtr = transGradBw.data();
+                    for (size_t i = 0; i < transGrad.nVoxels(); i++) {
+                        gradientPtr[i] = distr(gen);
+                        if (isSymmetric)
+                            gradientBwPtr[i] = distr(gen);
+                    }
+                    // Update the transformation gradients
+                    img = content->GetTransformationGradient();
+                    img.copyData(transGrad);
+                    img.disown();
+                    content->UpdateTransformationGradient();
+                    if (isSymmetric) {
+                        img = contentBw->GetTransformationGradient();
+                        img.copyData(transGradBw);
+                        img.disown();
+                        contentBw->UpdateTransformationGradient();
+                    }
+
+                    // Get the gradient values
+                    optimiser->UpdateGradientValues();
+                    UpdateGradientValues(transGrad, false, isSymmetric, &transGradBw);
+
+                    // Check the results
+                    img = content->GetTransformationGradient();
+                    const auto gradPtr = img.data();
+                    const auto gradExpPtr = transGrad.data();
+                    img.disown();
+                    NiftiImageData gradBwPtr, gradExpBwPtr;
+                    if (isSymmetric) {
+                        img = contentBw->GetTransformationGradient();
+                        gradBwPtr = img.data();
+                        gradExpBwPtr = transGradBw.data();
+                        img.disown();
+                    }
+                    for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
+                        const float gradVal = gradPtr[i];
+                        const float gradExpVal = gradExpPtr[i];
+                        std::cout << i << " " << gradVal << " " << gradExpVal << std::endl;
+                        REQUIRE(fabs(gradVal - gradExpVal) < EPS);
+                        if (isSymmetric) {
+                            const float gradBwVal = gradBwPtr[i];
+                            const float gradExpBwVal = gradExpBwPtr[i];
+                            std::cout << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl;
+                            REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS);
+                        }
+                    }
                 }
             }
             // Ensure the termination of content before CudaContext
             content.reset();
+            contentBw.reset();
         }
     }
 }

From da81948365c304c699716ae4d3cd669b2a3aee25 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Tue, 25 Jul 2023 15:52:03 +0100
Subject: [PATCH 173/314] #92: added bending energy tests

---
 niftyreg_build_version.txt                |   2 +-
 reg-test/CMakeLists.txt                   |   1 +
 reg-test/reg_test_be.cpp                  | 237 ++++++++++++++++++++++
 reg-test/reg_test_common.h                |  14 +-
 reg-test/reg_test_getDeformationField.cpp |   4 +-
 reg-test/reg_test_nmi.cpp                 |   2 +-
 6 files changed, 245 insertions(+), 15 deletions(-)
 create mode 100644 reg-test/reg_test_be.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f20bd4ef..4438e305 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-292
+293
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 2ac8c8ec..7d3faeef 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -117,6 +117,7 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi ${EXEC_LIST})
+set(EXEC_LIST reg_test_be ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
new file mode 100644
index 00000000..92171dd3
--- /dev/null
+++ b/reg-test/reg_test_be.cpp
@@ -0,0 +1,237 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+#include <iomanip>
+
+/*
+    This test file contains the following unit tests:
+    - BE computation for an identity transformation
+    - BE computation for an affine transformation
+    - BE computation for non-linear transformation
+*/
+
+
+class BendingEnergyTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, float>;
+    using TestCase = std::tuple<std::string, float, float>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    BendingEnergyTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(-1, 1);
+
+        // Create a 2D reference image
+        vector<NiftiImage::dim_t> dim{ 4, 4 };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create a 3D reference image
+        dim.push_back(4);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create 2D and 3D control point grids
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "BE identity 2D",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            0.f
+        ));
+        testData.emplace_back(TestData(
+            "BE identity 3D",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            0.f
+        ));
+        // Add random values to the control point grid coefficients
+        // No += or + operator for RNifti::NiftiImageData:Element
+        // so reverting to old school for now
+        float *cpp2dPtr = static_cast<float*>(controlPointGrid2d->data);
+        float *cpp3dPtr = static_cast<float *>(controlPointGrid3d->data);
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i)
+            cpp2dPtr[i] += distr(gen);
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i)
+            cpp3dPtr[i] += distr(gen);
+        // Add the test data
+        testData.emplace_back(TestData(
+            "BE random 2D",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            this->GetBe2d(controlPointGrid2d)
+        ));
+        testData.emplace_back(TestData(
+            "BE random 3D",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            this->GetBe3d(controlPointGrid3d)
+        ));
+
+        // Set some scaling transformation in the transformations
+        mat44 *affine2d = new mat44;
+        mat44 *affine3d = new mat44;
+        reg_mat44_eye(affine2d);
+        reg_mat44_eye(affine3d);
+        affine3d->m[0][0] = affine2d->m[0][0] = 0.8f;
+        affine3d->m[1][1] = affine2d->m[1][1] = 1.2f;
+        affine3d->m[2][2] = 1.1f;
+        reg_affine_getDeformationField(affine2d, controlPointGrid2d);
+        reg_affine_getDeformationField(affine3d, controlPointGrid3d);
+        delete affine2d, affine3d;
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "BE scaling 2D",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            0.f
+        ));
+        testData.emplace_back(TestData(
+            "BE scaling 3D",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            0.f
+        ));
+
+        // Compute the Bending energy for each use case
+        for (auto&& data : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                // Make a copy of the test data
+                auto&& [testName, reference, controlPointGrid, expected] = data;
+                // Add content
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                float be = compute->ApproxBendingEnergy();
+                testCases.push_back({ testName + " " + platform->GetName(), be, expected });
+            }
+        }
+    }
+    float GetBe2d(NiftiImage cpp)
+    {
+        // variable to store the bending energy and the normalisation value
+        double be = 0;
+
+        // The BSpine basis values are known since the control points all have a relative position equal to 0
+        float basis[3], first[3], second[3];
+        basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f;
+        first[0]=-.5f; first[1]=0.f; first[2]=.5f;
+        second[0]=1.f; second[1]=-2.f;second[2]=1.f;
+
+        // the first and last control points along each axis are
+        // ignored for lack of support
+        auto cppPtr = cpp.data();
+        for(unsigned y=1; y<cpp->dim[2]-1;++y){
+            for(unsigned x=1; x<cpp->dim[1]-1;++x){
+                // The BE is computed as
+                // BE=dXX/dx^2 + dYY/dy^2 + dXX/dy^2 + dYY/dx^2 + 2 * [dXY/dx^2 + dXY/dy^2]
+                float XX_x=0,YY_x=0, XY_x=0;
+                float XX_y=0,YY_y=0, XY_y=0;
+                for(unsigned j=0; j<3;++j){
+                    for(unsigned i=0; i<3;++i){
+                        unsigned cpIndex = (y+j-1) * cpp->dim[1] + x+i-1;
+                        float x_val = cppPtr[cpIndex];
+                        float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()];
+                        XX_x += x_val * second[i] * basis[j];
+                        YY_x += x_val * basis[i] * second[j];
+                        XY_x += x_val * first[i] * first[j];
+                        XX_y += y_val * second[i] * basis[j];
+                        YY_y += y_val * basis[i] * second[j];
+                        XY_y += y_val * first[i] * first[j];
+                    }
+                }
+                be += XX_x*XX_x + YY_x*YY_x + XX_y*XX_y + YY_y*YY_y + \
+                    2.*XY_x*XY_x + 2.*XY_y*XY_y;
+            }
+        }
+        return (float)(be/(double)cpp.nVoxels());
+    }
+    float GetBe3d(NiftiImage cpp)
+    {
+        // variable to store the bending energy and the normalisation value
+        double be = 0;
+
+        // The BSpine basis values are known since the control points all have a relative position equal to 0
+        float basis[3], first[3], second[3];
+        basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f;
+        first[0]=-.5f; first[1]=0.f; first[2]=.5f;
+        second[0]=1.f; second[1]=-2.f;second[2]=1.f;
+
+        auto cppPtr = cpp.data();
+        // the first and last control points along each axis are
+        // ignored for lack of support
+        for(unsigned z=1; z<cpp->nz-1;++z){
+            for(unsigned y=1; y<cpp->ny-1;++y){
+                for(unsigned x=1; x<cpp->nx-1;++x){
+                    float XX_x=0, YY_x=0, ZZ_x=0, XY_x=0, YZ_x=0, XZ_x=0;
+                    float XX_y=0, YY_y=0, ZZ_y=0, XY_y=0, YZ_y=0, XZ_y=0;
+                    float XX_z=0, YY_z=0, ZZ_z=0, XY_z=0, YZ_z=0, XZ_z=0;
+                    for(unsigned k=0; k<3;++k){
+                        for(unsigned j=0; j<3;++j){
+                            for(unsigned i=0; i<3;++i){
+                                unsigned cpIndex = ((z+k-1) * cpp->ny + y+j-1 ) * cpp->nx + x+i-1;
+                                float x_val = cppPtr[cpIndex];
+                                float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()];
+                                float z_val = cppPtr[cpIndex + 2*cpp.nVoxelsPerVolume()];
+                                XX_x += x_val * second[i] * basis[j] * basis[k];
+                                YY_x += x_val * basis[i] * second[j] * basis[k];
+                                ZZ_x += x_val * basis[i] * basis[j] * second[k];
+                                XY_x += x_val * first[i] * first[j] * basis[k];
+                                YZ_x += x_val * basis[i] * first[j] * first[k];
+                                XZ_x += x_val * first[i] * basis[j] * first[k];
+
+                                XX_y += y_val * second[i] * basis[j] * basis[k];
+                                YY_y += y_val * basis[i] * second[j] * basis[k];
+                                ZZ_y += y_val * basis[i] * basis[j] * second[k];
+                                XY_y += y_val * first[i] * first[j] * basis[k];
+                                YZ_y += y_val * basis[i] * first[j] * first[k];
+                                XZ_y += y_val * first[i] * basis[j] * first[k];
+
+                                XX_z += z_val * second[i] * basis[j] * basis[k];
+                                YY_z += z_val * basis[i] * second[j] * basis[k];
+                                ZZ_z += z_val * basis[i] * basis[j] * second[k];
+                                XY_z += z_val * first[i] * first[j] * basis[k];
+                                YZ_z += z_val * basis[i] * first[j] * first[k];
+                                XZ_z += z_val * first[i] * basis[j] * first[k];
+                            }
+                        }
+                    }
+                    be += XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + \
+                        XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + \
+                        XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + \
+                        2.*XY_x*XY_x + 2.*YZ_x*YZ_x + 2.*XZ_x*XZ_x + \
+                        2.*XY_y*XY_y + 2.*YZ_y*YZ_y + 2.*XZ_y*XZ_y + \
+                        2.*XY_z*XY_z + 2.*YZ_z*YZ_z + 2.*XZ_z*XZ_z;
+                }
+            }
+        }
+        return (float)(be/(double)cpp.nVoxels());
+    }
+};
+
+TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, result, expected] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            // if (fabs(result - expected) > EPS){
+                std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
+            // }
+            REQUIRE(fabs(result - expected) < EPS);
+        }
+    }
+}
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 9be31b61..8ace6470 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -33,22 +33,14 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
 }
 
 NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
-    // Set the spacing for the control point grid
-    float spacingInMillimetre[3] = { reference->dx, reference->dy, reference->dz };
-
-    // Define the spacing for the first level
-    float gridSpacing[3];
-    gridSpacing[0] = spacingInMillimetre[0];
-    gridSpacing[1] = spacingInMillimetre[1];
-    gridSpacing[2] = 1;
-    if (reference->nz > 1)
-        gridSpacing[2] = spacingInMillimetre[2];
+    // Set the spacing for the control point grid to 2 voxel along each axis
+    float gridSpacing[3] = { reference->dx*2, reference->dy*2, reference->dz*2};
 
     // Create and allocate the control point image
     NiftiImage controlPointGrid;
     reg_createControlPointGrid<float>(controlPointGrid, reference, gridSpacing);
 
-    // The control point position image is initialised with the affine transformation
+    // The control point position image is initialised with an identity transformation
     reg_getDeformationFromDisplacement(controlPointGrid);
 
     return controlPointGrid;
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index f9e15c86..32ccd7c2 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -65,7 +65,7 @@ class GetDeformationFieldTest {
         ));
 
         // Add platforms, composition, and bspline to the test data
-        for (auto&& testData : testData) {
+        for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
@@ -74,7 +74,7 @@ class GetDeformationFieldTest {
                         continue;   // CUDA platform does not support composition
                     for (int bspline = 0; bspline < 2; bspline++) {
                         // Make a copy of the test data
-                        auto td = testData;
+                        auto td = data;
                         auto&& [testName, reference, controlPointGrid] = td;
                         // Add content
                         unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index c5c887d4..3957ef77 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -98,7 +98,7 @@ class NMITest {
                 measure->Initialise(*measure_nmi, *content);
                 double nmi = measure_nmi->GetSimilarityMeasureValue();
 
-                testCases.push_back({ testName, nmi, expected});
+                testCases.push_back({ testName + " " + platform->GetName(), nmi, expected});
             }
         }
     }

From 6e67a5bd0f7c2178e85443175ba1ea8f503799c0 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Tue, 25 Jul 2023 16:21:27 +0100
Subject: [PATCH 174/314] Issue #68: checks for exising folder before saving

---
 niftyreg_build_version.txt      | 2 +-
 reg-io/_reg_ReadWriteImage.cpp  | 9 +++++++++
 reg-io/_reg_ReadWriteMatrix.cpp | 9 +++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4438e305..26f42e64 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-293
+294
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index a23f2c7f..a7fa689a 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -12,6 +12,7 @@
 #include "_reg_ReadWriteImage.h"
 #include "_reg_tools.h"
 #include "_reg_stringFormat.h"
+#include <filesystem>
 
 /* *************************************************************** */
 void reg_hack_filename(nifti_image *image, std::string filename) {
@@ -120,6 +121,14 @@ nifti_image* reg_io_ReadImageHeader(const char *filename) {
 }
 /* *************************************************************** */
 void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
+    // Check if the specified directory exists
+    std::filesystem::path p(filename);
+    p = p.parent_path();
+    if(!std::filesystem::exists(p) && p!=std::filesystem::path()){
+        std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
+        std::cerr << filename << std::endl;
+        reg_exit();
+    }
     // First read the file format in order to use the correct library
     int fileFormat = reg_io_checkFileFormat(filename);
 
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 7b420d2c..d2f7674a 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -1,6 +1,7 @@
 #include "_reg_ReadWriteMatrix.h"
 #include "_reg_maths.h"
 #include <string>
+#include <filesystem>
 
 /* *************************************************************** */
 void reg_tool_ReadAffineFile(mat44 *mat,
@@ -127,6 +128,14 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
 }
 /* *************************************************************** */
 void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) {
+        // Check if the specified directory exists
+    std::filesystem::path p(fileName);
+    p = p.parent_path();
+    if(!std::filesystem::exists(p) && p!=std::filesystem::path()){
+        std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
+        std::cerr << fileName << std::endl;
+        reg_exit();
+    }
     FILE *affineFile;
     affineFile = fopen(fileName, "w");
     for (int i = 0; i < 4; i++)

From 76efc9fa951221325a0fa7da2b8b4b8cba5bd889 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 27 Jul 2023 11:46:04 +0100
Subject: [PATCH 175/314] Refactorisations

---
 niftyreg_build_version.txt                   |    2 +-
 reg-apps/reg_average.cpp                     |    4 +-
 reg-apps/reg_jacobian.cpp                    |    8 +-
 reg-apps/reg_measure.cpp                     |    4 +-
 reg-apps/reg_ppcnr.cpp                       |    8 +-
 reg-apps/reg_resample.cpp                    |    8 +-
 reg-apps/reg_tools.cpp                       |   12 +-
 reg-apps/reg_transform.cpp                   |   14 +-
 reg-io/_reg_ReadWriteImage.cpp               |    4 +-
 reg-io/_reg_ReadWriteMatrix.cpp              |    4 +-
 reg-io/nrrd/reg_nrrd.cpp                     |    2 +-
 reg-lib/Content.cpp                          |    4 +-
 reg-lib/F3dContent.cpp                       |    2 +-
 reg-lib/cl/ClAladinContent.cpp               |    2 +-
 reg-lib/cl/ClResampleImageKernel.cpp         |    4 +-
 reg-lib/cpu/_reg_blockMatching.cpp           |    2 +-
 reg-lib/cpu/_reg_discrete_init.cpp           |    2 +-
 reg-lib/cpu/_reg_dti.cpp                     |   19 +-
 reg-lib/cpu/_reg_femTrans.cpp                |   10 +-
 reg-lib/cpu/_reg_globalTrans.cpp             |   10 +-
 reg-lib/cpu/_reg_kld.cpp                     |   15 +-
 reg-lib/cpu/_reg_lncc.cpp                    |   16 +-
 reg-lib/cpu/_reg_localTrans.cpp              | 7839 ++++++++----------
 reg-lib/cpu/_reg_localTrans.h                |    2 +-
 reg-lib/cpu/_reg_localTrans_jac.cpp          |   84 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp        |   71 +-
 reg-lib/cpu/_reg_maths_eigen.cpp             |   18 +-
 reg-lib/cpu/_reg_mind.cpp                    |  218 +-
 reg-lib/cpu/_reg_mrf.cpp                     |   10 +-
 reg-lib/cpu/_reg_ssd.cpp                     |  111 +-
 reg-lib/cpu/_reg_thinPlateSpline.cpp         |    2 +-
 reg-lib/cpu/_reg_tools.cpp                   |  146 +-
 reg-lib/cpu/_reg_tools.h                     |   17 +-
 reg-lib/cuda/CudaF3dContent.cpp              |    4 +-
 reg-lib/cuda/_reg_common_cuda.cu             |  470 +-
 reg-lib/cuda/_reg_common_cuda.h              |   32 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu           |   12 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                 |   19 +-
 reg-lib/cuda/_reg_ssd_kernels.cu             |    8 +-
 reg-lib/cuda/affineDeformationKernel.cu      |    2 +-
 reg-lib/cuda/resampleKernel.cu               |    4 +-
 reg-test/reg_test_affineDeformationField.cpp |    2 +-
 reg-test/reg_test_be.cpp                     |  102 +-
 reg-test/reg_test_nmi.cpp                    |   34 +-
 reg-test/reg_test_regr_blockMatching.cpp     |    2 +-
 reg-test/reg_test_regr_lts.cpp               |    2 +-
 46 files changed, 4372 insertions(+), 4995 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 26f42e64..594cd09d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-294
+295
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 2f95c3f8..2fc5cb40 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -368,7 +368,7 @@ int compute_average_image(nifti_image *averageImage,
       demeanField->ndim=demeanField->dim[0]=5;
       demeanField->nt=demeanField->dim[4]=1;
       demeanField->nu=demeanField->dim[5]=demeanField->nz>1?3:2;
-      demeanField->nvox=CalcVoxelNumber(*demeanField, demeanField->ndim);
+      demeanField->nvox=NiftiImage::calcVoxelNumber(demeanField, demeanField->ndim);
       demeanField->nbyper=sizeof(float);
       demeanField->datatype=NIFTI_TYPE_FLOAT32;
       demeanField->intent_code=NIFTI_INTENT_VECTOR;
@@ -395,7 +395,7 @@ int compute_average_image(nifti_image *averageImage,
       deformationField->ndim=deformationField->dim[0]=5;
       deformationField->nt=deformationField->dim[4]=1;
       deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2;
-      deformationField->nvox=CalcVoxelNumber(*deformationField, deformationField->ndim);
+      deformationField->nvox=NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
       deformationField->nbyper=sizeof(float);
       deformationField->datatype=NIFTI_TYPE_FLOAT32;
       deformationField->intent_code=NIFTI_INTENT_VECTOR;
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index e4eaa54f..d3cb4757 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -52,7 +52,7 @@ void reg_jacobian_computeLog(nifti_image *image)
 template <class DataType>
 void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image)
 {
-   const size_t voxelNumber=CalcVoxelNumber(*image);
+   const size_t voxelNumber=NiftiImage::calcVoxelNumber(image, 3);
    DataType *ptrXX=static_cast<DataType *>(image->data);
    if(image->nz>1)
    {
@@ -285,7 +285,7 @@ int main(int argc, char **argv)
       jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2;
       jacobianImage->nu=jacobianImage->dim[5]=1;
       jacobianImage->nt=jacobianImage->dim[4]=1;
-      jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim);
+      jacobianImage->nvox=NiftiImage::calcVoxelNumber(jacobianImage, jacobianImage->ndim);
       jacobianImage->datatype = inputTransformation->datatype;
       jacobianImage->nbyper = inputTransformation->nbyper;
       jacobianImage->cal_min=0;
@@ -339,7 +339,7 @@ int main(int argc, char **argv)
       jacobianImage->ndim=jacobianImage->dim[0]=5;
       jacobianImage->nu=jacobianImage->dim[5]=jacobianImage->nz>1?9:4;
       jacobianImage->nt=jacobianImage->dim[4]=1;
-      jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim);
+      jacobianImage->nvox=NiftiImage::calcVoxelNumber(jacobianImage, jacobianImage->ndim);
       jacobianImage->datatype = inputTransformation->datatype;
       jacobianImage->nbyper = inputTransformation->nbyper;
       jacobianImage->cal_min=0;
@@ -348,7 +348,7 @@ int main(int argc, char **argv)
       jacobianImage->scl_inter = 0.0f;
       jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper);
 
-      mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33));
+      mat33 *jacobianMatriceArray = (mat33 *)malloc(NiftiImage::calcVoxelNumber(jacobianImage, 3) * sizeof(mat33));
       // Compute the map of Jacobian matrices
       switch((int)inputTransformation->intent_p1){
       case DISP_FIELD:
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index 97a127fc..dffc2f2b 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -255,7 +255,7 @@ int main(int argc, char **argv)
    warpedFloImage->ndim=warpedFloImage->dim[0]=floImage->ndim;
    warpedFloImage->nt=warpedFloImage->dim[4]=floImage->nt;
    warpedFloImage->nu=warpedFloImage->dim[5]=floImage->nu;
-   warpedFloImage->nvox=CalcVoxelNumber(*warpedFloImage, warpedFloImage->ndim);
+   warpedFloImage->nvox=NiftiImage::calcVoxelNumber(warpedFloImage, warpedFloImage->ndim);
    warpedFloImage->cal_min=floImage->cal_min;
    warpedFloImage->cal_max=floImage->cal_max;
    warpedFloImage->scl_inter=floImage->scl_inter;
@@ -269,7 +269,7 @@ int main(int argc, char **argv)
    defField->ndim=defField->dim[0]=5;
    defField->nt=defField->dim[4]=1;
    defField->nu=defField->dim[5]=refImage->nz>1?3:2;
-   defField->nvox=CalcVoxelNumber(*defField, defField->ndim);
+   defField->nvox=NiftiImage::calcVoxelNumber(defField, defField->ndim);
    defField->datatype=NIFTI_TYPE_FLOAT32;
    defField->nbyper=sizeof(float);
    defField->data=calloc(defField->nvox,defField->nbyper);
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index f7c2fa5f..125b6aaa 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -189,7 +189,7 @@ int main(int argc, char **argv)
          nifti_image_free(source);
          makesource->ndim=makesource->dim[0] = 4;
          makesource->nt = makesource->dim[4] = atoi(argv[++i]);
-         makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim);
+         makesource->nvox = NiftiImage::calcVoxelNumber(makesource->nx, makesource->ndim);
          makesource->data = malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(makesource->data);
          for(int ii=0; ii<makesource->nt; ii++) // fill with file data
@@ -212,7 +212,7 @@ int main(int argc, char **argv)
          nifti_image *makesource = nifti_copy_nim_info(source);
          makesource->ndim=makesource->dim[0] = 3;
          makesource->nt = makesource->dim[4] = 1;
-         makesource->nvox = CalcVoxelNumber(*makesource, makesource->ndim);
+         makesource->nvox = NiftiImage::calcVoxelNumber(makesource, makesource->ndim);
          makesource->data = malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(source->data);
          for(int ii=0; ii<source->nt; ii++) // fill with file data
@@ -402,7 +402,7 @@ int main(int argc, char **argv)
       mask = nifti_copy_nim_info(image);
       mask->ndim=mask->dim[0]=3;
       mask->nt=mask->dim[4]=1;
-      mask->nvox = CalcVoxelNumber(*mask, mask->ndim);
+      mask->nvox = NiftiImage::calcVoxelNumber(mask, mask->ndim);
       mask->data = malloc(mask->nvox*mask->nbyper);
       PrecisionType *intensityPtrM = static_cast<PrecisionType *>(mask->data);
       for(size_t i=0; i<mask->nvox; i++) intensityPtrM[i]=1.0;
@@ -858,7 +858,7 @@ int main(int argc, char **argv)
             nifti_image *stores = nifti_copy_nim_info(images);
             stores->ndim=stores->dim[0]=3;
             stores->nt=stores->dim[4]=1;
-            stores->nvox = CalcVoxelNumber(*stores, stores->ndim);
+            stores->nvox = NiftiImage::calcVoxelNumber(stores, stores->ndim);
             stores->data = calloc(stores->nvox,images->nbyper);
 
             nifti_image *storet = nifti_dup(*stores, false);
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 793a340f..2f5f3a93 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -331,7 +331,7 @@ int main(int argc, char **argv)
    deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2;
    deformationFieldImage->dim[6]=deformationFieldImage->nv=1;
    deformationFieldImage->dim[7]=deformationFieldImage->nw=1;
-   deformationFieldImage->nvox = CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim);
+   deformationFieldImage->nvox = NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim);
    deformationFieldImage->scl_slope=1.f;
    deformationFieldImage->scl_inter=0.f;
    if(inputTransformationImage!=nullptr)
@@ -456,7 +456,7 @@ int main(int argc, char **argv)
          reg_print_msg_debug("DTI-based resampling\n");
 #endif
          // Compute first the Jacobian matrices
-         mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33));
+         mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33));
          reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
          // resample the DTI image
          bool timepoints[7];
@@ -475,7 +475,7 @@ int main(int argc, char **argv)
       else{
          if(flag->usePSF){
             // Compute first the Jacobian matrices
-            mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33));
+            mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33));
             reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
 
             reg_resampleImage_PSF(floatingImage,
@@ -530,7 +530,7 @@ int main(int argc, char **argv)
       gridImage->dim[3]=gridImage->nz=floatingImage->nz;
       gridImage->dim[4]=gridImage->nt=1;
       gridImage->dim[5]=gridImage->nu=1;
-      gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim);
+      gridImage->nvox = NiftiImage::calcVoxelNumber(gridImage, gridImage->ndim);
       gridImage->datatype = NIFTI_TYPE_UINT8;
       gridImage->nbyper = sizeof(unsigned char);
       gridImage->data = calloc(gridImage->nvox, gridImage->nbyper);
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 8ddf43f2..4f2ea7b8 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -883,14 +883,14 @@ int main(int argc, char **argv)
         def->pixdim[6]=def->dv=1.f;
         def->dim[7]=def->nw=1;
         def->pixdim[7]=def->dw=1.f;
-        def->nvox = CalcVoxelNumber(*def, def->ndim);
+        def->nvox = NiftiImage::calcVoxelNumber(def, def->ndim);
         def->nbyper = sizeof(float);
         def->datatype = NIFTI_TYPE_FLOAT32;
         def->data = calloc(def->nvox,def->nbyper);
         // Fill the deformation field with an identity transformation
         reg_getDeformationFromDisplacement(def);
         // Allocate and compute the Jacobian matrices
-        const size_t jacobianVoxelNumber = CalcVoxelNumber(*def);
+        const size_t jacobianVoxelNumber = NiftiImage::calcVoxelNumber(def, 3);
         mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33));
         for (size_t i = 0; i < jacobianVoxelNumber; ++i)
             reg_mat33_eye(&jacobian[i]);
@@ -950,7 +950,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
+        outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim);
         outputImage->datatype = NIFTI_TYPE_RGB24;
         outputImage->nbyper = 3 * sizeof(unsigned char);
         outputImage->data = malloc(outputImage->nbyper*outputImage->nvox);
@@ -988,7 +988,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
+        outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim);
         outputImage->datatype = NIFTI_TYPE_RGB24;
         outputImage->nbyper = 3 * sizeof(unsigned char);
         outputImage->scl_slope = 1.f;
@@ -1079,7 +1079,7 @@ int main(int argc, char **argv)
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
             reg_tools_changeDatatype<float>(image);
         // Create a temporary mask
-        const size_t voxelNumber = CalcVoxelNumber(*image);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
         int *temp_mask = (int *)malloc(voxelNumber * sizeof(int));
         for (size_t i = 0; i < voxelNumber; ++i)
             temp_mask[i]=i;
@@ -1097,7 +1097,7 @@ int main(int argc, char **argv)
         nifti_image *outputImage = nifti_copy_nim_info(image);
         outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1;
         outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2;
-        outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim);
+        outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim);
         outputImage->cal_min=0;
         outputImage->data = calloc(outputImage->nbyper, outputImage->nvox);
         float *inPtr = static_cast<float *>(image->data);
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 174fe2fe..84702a09 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -388,7 +388,7 @@ int main(int argc, char **argv)
          outputTransformationImage->ndim=outputTransformationImage->dim[0]=5;
          outputTransformationImage->nt=outputTransformationImage->dim[4]=1;
          outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2;
-         outputTransformationImage->nvox=CalcVoxelNumber(*outputTransformationImage, outputTransformationImage->ndim);
+         outputTransformationImage->nvox=NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim);
          outputTransformationImage->nbyper=sizeof(float);
          outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32;
          outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR;
@@ -684,7 +684,7 @@ int main(int argc, char **argv)
             output1TransImage->ndim=output1TransImage->dim[0]=5;
             output1TransImage->nt=output1TransImage->dim[4]=1;
             output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2;
-            output1TransImage->nvox=CalcVoxelNumber(*output1TransImage, output1TransImage->ndim);
+            output1TransImage->nvox=NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim);
             output1TransImage->scl_slope=1.f;
             output1TransImage->scl_inter=0.f;
             if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32)
@@ -824,7 +824,7 @@ int main(int argc, char **argv)
                output2TransImage->ndim=output2TransImage->dim[0]=5;
                output2TransImage->nt=output2TransImage->dim[4]=1;
                output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2;
-               output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim);
+               output2TransImage->nvox=NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim);
                output2TransImage->nbyper=output1TransImage->nbyper;
                output2TransImage->datatype=output1TransImage->datatype;
                output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
@@ -956,7 +956,7 @@ int main(int argc, char **argv)
          deformationFieldImage->ndim=deformationFieldImage->dim[0]=5;
          deformationFieldImage->nt=deformationFieldImage->dim[4]=1;
          deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2;
-         deformationFieldImage->nvox=CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim);
+         deformationFieldImage->nvox=NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim);
          deformationFieldImage->nbyper=sizeof(float);
          deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32;
          deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR;
@@ -1085,7 +1085,7 @@ int main(int argc, char **argv)
       landmarkImage->nx=landmarkImage->dim[1]=1;
       landmarkImage->ny=landmarkImage->dim[2]=1;
       landmarkImage->nz=landmarkImage->dim[3]=1;
-      landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim);
+      landmarkImage->nvox=NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim);
       landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper);
       float *landmarkImagePtr = static_cast<float *>(landmarkImage->data);
       for(size_t l=0, index=0;l<landmarkNumber;++l){
@@ -1274,7 +1274,7 @@ int main(int argc, char **argv)
          tempField->ndim=tempField->dim[0]=5;
          tempField->nt=tempField->dim[4]=1;
          tempField->nu=tempField->dim[5]=tempField->nz>1?3:2;
-         tempField->nvox=CalcVoxelNumber(*tempField, tempField->ndim);
+         tempField->nvox=NiftiImage::calcVoxelNumber(tempField, tempField->ndim);
          tempField->nbyper=inputTransImage->nbyper;
          tempField->datatype=inputTransImage->datatype;
          tempField->intent_code=NIFTI_INTENT_VECTOR;
@@ -1311,7 +1311,7 @@ int main(int argc, char **argv)
      outputTransImage->ndim = outputTransImage->dim[0] = 5;
      outputTransImage->nt = outputTransImage->dim[4] = 1;
      outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2;
-     outputTransImage->nvox = CalcVoxelNumber(*outputTransImage, outputTransImage->ndim);
+     outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim);
      outputTransImage->nbyper = inputTransImage->nbyper;
      outputTransImage->datatype = inputTransImage->datatype;
      outputTransImage->intent_code = NIFTI_INTENT_VECTOR;
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index a7fa689a..6e6b0663 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -124,7 +124,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
     // Check if the specified directory exists
     std::filesystem::path p(filename);
     p = p.parent_path();
-    if(!std::filesystem::exists(p) && p!=std::filesystem::path()){
+    if (!std::filesystem::exists(p) && p != std::filesystem::path()) {
         std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
         std::cerr << filename << std::endl;
         reg_exit();
@@ -183,7 +183,7 @@ void reg_io_displayImageData1(nifti_image *image) {
                 text = stringFormat("[%d - %d - %d] = [", x, y, z);
                 for (int tu = 0; tu < image->nt * image->nu; ++tu) {
                     text = stringFormat("%s%g ", text.c_str(),
-                                        static_cast<double>(data[voxelIndex + tu * CalcVoxelNumber(*image)]));
+                                        static_cast<double>(data[voxelIndex + tu * NiftiImage::calcVoxelNumber(image, 3)]));
                 }
                 text = stringFormat("%s]", text.c_str());
                 reg_print_msg_debug(text.c_str());
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index d2f7674a..4881bedf 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -128,10 +128,10 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
 }
 /* *************************************************************** */
 void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) {
-        // Check if the specified directory exists
+    // Check if the specified directory exists
     std::filesystem::path p(fileName);
     p = p.parent_path();
-    if(!std::filesystem::exists(p) && p!=std::filesystem::path()){
+    if (!std::filesystem::exists(p) && p != std::filesystem::path()) {
         std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
         std::cerr << fileName << std::endl;
         reg_exit();
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 20c89f2f..7d57f16b 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -17,7 +17,7 @@ template <class DataType>
 void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage,
       Nrrd *nrrdImage)
 {
-   const size_t voxNumber = CalcVoxelNumber(*niiImage);
+   const size_t voxNumber = NiftiImage::calcVoxelNumber(niiImage, 3);
 
    DataType *inPtrX=static_cast<DataType *>(niiImage->data);
    DataType *inPtrY=&inPtrX[voxNumber];
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 7db0847a..afd8b4ed 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -35,7 +35,7 @@ void Content::AllocateWarped() {
     warped->dim[0] = warped->ndim = floating->ndim;
     warped->dim[4] = warped->nt = floating->nt;
     warped->pixdim[4] = warped->dt = 1;
-    warped->nvox = CalcVoxelNumber(*warped, warped->ndim);
+    warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
     warped->data = calloc(warped->nvox, warped->nbyper);
@@ -61,7 +61,7 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->pixdim[6] = deformationField->dv = 1;
     deformationField->dim[7] = deformationField->nw = 1;
     deformationField->pixdim[7] = deformationField->dw = 1;
-    deformationField->nvox = CalcVoxelNumber(*deformationField, deformationField->ndim);
+    deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
     deformationField->nbyper = (int)bytes;
     if (bytes == 4)
         deformationField->datatype = NIFTI_TYPE_FLOAT32;
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 0f474212..029d7ec0 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -36,7 +36,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
     localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
     localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
     localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
-    localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim);
+    localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim);
     localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
     reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
     reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 07b263ae..ccdb1238 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -105,7 +105,7 @@ void ClAladinContent::AllocateClPtrs() {
     }
     if (referenceMask != nullptr && reference != nullptr) {
         maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                   CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum);
+                                   NiftiImage::calcVoxelNumber(reference, 3) * sizeof(int), referenceMask, &errNum);
         sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 29ff7f36..b22671b9 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -81,7 +81,7 @@ void ClResampleImageKernel::Calculate(int interp,
     }
     sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage.");
 
-    const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage);
+    const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(this->warpedImage, 3);
     const unsigned maxThreads = sContext->GetMaxThreads();
     const unsigned maxBlocks = sContext->GetMaxBlocks();
 
@@ -95,7 +95,7 @@ void ClResampleImageKernel::Calculate(int interp,
     //    int numMats = 0; //needs to be a parameter
     //    float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float));
 
-    cl_long2 voxelNumber = {{(cl_long)CalcVoxelNumber(*warpedImage), (cl_long)CalcVoxelNumber(*this->floatingImage)}};
+    cl_long2 voxelNumber = {{(cl_long)NiftiImage::calcVoxelNumber(warpedImage, 3), (cl_long)NiftiImage::calcVoxelNumber(this->floatingImage, 3)}};
     cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}};
     cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}};
 
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 7091b22d..907f932f 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -521,7 +521,7 @@ void block_matching_method3D(nifti_image * reference,
 #pragma omp parallel for default(none) \
    shared(params, reference, warped, referencePtr, warpedPtr, mask, referenceMatrix_xyz, \
    referenceOverlap, warpedOverlap, referenceValues, warpedValues) \
-   private(i, j, k, l, m, n, x, y, z, blockIndex, referenceIndex, \
+   private(i, j, l, m, n, x, y, z, blockIndex, referenceIndex, \
    index, tid, referencePtr_Z, referencePtr_XYZ, warpedPtr_Z, warpedPtr_XYZ, \
    maskPtr_Z, maskPtr_XYZ, value, bestCC, bestDisplacement, \
    referenceIndex_start_x, referenceIndex_start_y, referenceIndex_start_z, \
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 8c592e3c..d8ba9e84 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -26,7 +26,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure,
    this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
    this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = CalcVoxelNumber(*this->controlPointImage);
+   this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
 
    this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
    this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 509b0939..e9c99a2f 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -88,10 +88,10 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
 {
 #ifdef _WIN32
    long voxel;
-   const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+   const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
    size_t voxel;
-   const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
@@ -116,14 +116,13 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
    const double twoThirds = (2.0/3.0);
    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ;
 #ifdef _OPENMP
-   #pragma omp parallel for default(none) \
+#pragma omp parallel for default(none) \
    shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \
           warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \
           warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \
-   private(voxel, rXX, rXY, rYY, rXZ, rYZ, rZZ) \
-reduction(+:DTI_cost) \
-reduction(+:n)
+   private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \
+   reduction(+:DTI_cost, n)
 #endif
    for(voxel=0; voxel<voxelNumber; ++voxel)
    {
@@ -234,10 +233,10 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
    // Create pointers to the reference and warped images
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
    /* As the tensor has 6 unique components that we need to worry about, read them out
@@ -280,13 +279,13 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
 
    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
 #ifdef _OPENMP
-   #pragma omp parallel for default(none) \
+#pragma omp parallel for default(none) \
    shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ,warpedIntensityXX, \
           warpedIntensityXY,warpedIntensityXZ ,warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, \
           mask, spatialGradXX, spatialGradXY, spatialGradXZ, spatialGradYY, spatialGradYZ, spatialGradZZ, \
           dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) \
-   private(voxel, rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad)
+   private(rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad)
 #endif
    for(voxel=0; voxel<voxelNumber; voxel++)
    {
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index 51c22017..4e2dc22c 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -39,7 +39,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
                                      )
 {
    // Set all the closest nodes and coefficients to zero
-   for (int i = 0; i < 4 * CalcVoxelNumber(*deformationFieldImage); ++i)
+   for (int i = 0; i < 4 * NiftiImage::calcVoxelNumber(deformationFieldImage, 3); ++i)
    {
       closestNodes[i]=0;
       femInterpolationWeight[i]=0.f;
@@ -150,10 +150,10 @@ void reg_fem_getDeformationField(float *nodePositions,
 {
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*deformationFieldImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(deformationFieldImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationFieldImage, 3);
 #endif
 
    float *defPtrX = static_cast<float *>(deformationFieldImage->data);
@@ -166,7 +166,7 @@ void reg_fem_getDeformationField(float *nodePositions,
    #pragma omp parallel for default(none) \
    shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \
           nodePositions, closestNodes, voxelNumber) \
-   private(voxel, coefficients, positionA, positionB, positionC, positionD)
+   private(coefficients, positionA, positionB, positionC, positionD)
 #endif
    for(voxel=0; voxel<voxelNumber; ++voxel)
    {
@@ -215,7 +215,7 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
                                  unsigned nodeNumber,
                                  float *femBasedGradient)
 {
-   const size_t voxelNumber = CalcVoxelNumber(*voxelBasedGradient);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelBasedGradient, 3);
    float *voxGradPtrX = static_cast<float *>(voxelBasedGradient->data);
    float *voxGradPtrY = &voxGradPtrX[voxelNumber];
    float *voxGradPtrZ = &voxGradPtrY[voxelNumber];
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 3f27b7b7..444f273c 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -22,7 +22,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
                                    bool composition,
                                    int *mask)
 {
-   const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage, 2);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationFieldImage, 2);
    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationFieldImage->data);
    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber];
 
@@ -49,7 +49,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
 #pragma omp parallel for default(none) \
    shared(deformationFieldImage, transformationMatrix, affineTransformation, \
    deformationFieldPtrX, deformationFieldPtrY, mask, composition) \
-   private(voxel, position, x, y, index)
+   private(voxel, position, x, index)
 #endif
    for(y=0; y<deformationFieldImage->ny; y++)
    {
@@ -84,7 +84,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
                                    bool composition,
                                    int *mask)
 {
-   const size_t voxelNumber=CalcVoxelNumber(*deformationFieldImage);
+   const size_t voxelNumber=NiftiImage::calcVoxelNumber(deformationFieldImage, 3);
    FieldTYPE *deformationFieldPtrX = static_cast<FieldTYPE *>(deformationFieldImage->data);
    FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber];
    FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[voxelNumber];
@@ -112,7 +112,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
 #pragma omp parallel for default(none) \
    shared(deformationFieldImage, transformationMatrix, affineTransformation, \
    deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, composition) \
-   private(voxel, position, x, y, z, index)
+   private(voxel, position, x, y, index)
 #endif
    for(z=0; z<deformationFieldImage->nz; z++)
    {
@@ -153,7 +153,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
    int *tempMask=mask;
    if(mask==nullptr)
    {
-      tempMask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
+      tempMask = (int *)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int));
    }
    if(deformationField->nz==1)
    {
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index eff52320..39a8b84b 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -84,10 +84,10 @@ double reg_getKLDivergence(nifti_image *referenceImage,
                            int *mask) {
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
     DataType *refPtr = static_cast<DataType*>(referenceImage->data);
@@ -112,9 +112,8 @@ double reg_getKLDivergence(nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,currentRefPtr, currentWarPtr, \
     maskPtr, jacobianDetImg, jacPtr) \
-    private(voxel, tempRefValue, tempWarValue, tempValue) \
-    reduction(+:measure_tp) \
-    reduction(+:num)
+    private(tempRefValue, tempWarValue, tempValue) \
+    reduction(+:measure_tp, num)
 #endif
             for (voxel = 0; voxel < voxelNumber; ++voxel) {
                 if (maskPtr[voxel] > -1) {
@@ -216,10 +215,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
                                            double timepointWeight) {
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
     DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
@@ -268,7 +267,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
     maskPtr, jacobianDetImg, jacPtr, referenceImage, \
     measureGradPtrX, measureGradPtrY, measureGradPtrZ, \
     currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \
-    private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \
+    private(tempValue, tempGradX, tempGradY, tempGradZ, \
     tempRefValue, tempWarValue)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 547f24af..fca452e3 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -91,10 +91,10 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     // Generate the forward mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*refImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*refImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
 #endif
     memcpy(combinedMask, refMask, voxelNumber * sizeof(int));
     reg_tools_removeNanFromMask(refImage, combinedMask);
@@ -201,7 +201,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
         free(this->backwardMask);
     this->backwardMask = nullptr;
 
-    size_t voxelNumber = CalcVoxelNumber(*this->referenceImage);
+    size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3);
 
     // Allocate the required image to store the correlation of the forward transformation
     this->correlationImage = nifti_copy_nim_info(this->referenceImage);
@@ -221,7 +221,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
     // Allocate the array to store the mask of the forward image
     this->forwardMask = (int*)malloc(voxelNumber * sizeof(int));
     if (this->isSymmetric) {
-        voxelNumber = CalcVoxelNumber(*floatingImage);
+        voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 
         // Allocate the required image to store the correlation of the backward transformation
         this->correlationImageBw = nifti_copy_nim_info(this->floatingImage);
@@ -265,10 +265,10 @@ double reg_getLNCCValue(nifti_image *referenceImage,
                         int currentTimepoint) {
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
     // Compute the local correlation
@@ -454,10 +454,10 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                                    double timepointWeight) {
 #ifdef _WIN32
     long voxel;
-    long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
     // Compute the local correlation
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index c09b15e3..35eb7c91 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -14,123 +14,115 @@
 #include "_reg_localTrans.h"
 #include "_reg_maths_eigen.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 template <class DataType>
 void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
                                 const NiftiImage& referenceImage,
-                                const float *spacing)
-{
-   // Define the control point grid dimension
-   vector<NiftiImage::dim_t> dims{
-      static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx / spacing[0]) + 3.f),
-      static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy / spacing[1]) + 3.f),
-      referenceImage->nz > 1 ? static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1,
-      1,
-      referenceImage->nz > 1 ? 3 : 2
-   };
-
-   // Create the new control point grid image and allocate its space
-   controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
-
-   // Fill the header information
-   controlPointGridImage->cal_min=0;
-   controlPointGridImage->cal_max=0;
-   controlPointGridImage->pixdim[0]=1.0f;
-   controlPointGridImage->pixdim[1]=controlPointGridImage->dx=spacing[0];
-   controlPointGridImage->pixdim[2]=controlPointGridImage->dy=spacing[1];
-   if(referenceImage->nz==1)
-   {
-      controlPointGridImage->pixdim[3]=controlPointGridImage->dz=1.0f;
-   }
-   else controlPointGridImage->pixdim[3]=controlPointGridImage->dz=spacing[2];
-   controlPointGridImage->pixdim[4]=controlPointGridImage->dt=1.0f;
-   controlPointGridImage->pixdim[5]=controlPointGridImage->du=1.0f;
-   controlPointGridImage->pixdim[6]=controlPointGridImage->dv=1.0f;
-   controlPointGridImage->pixdim[7]=controlPointGridImage->dw=1.0f;
-
-   // Reproduce the orientation of the reference image and add a one voxel shift
-   if(referenceImage->qform_code+referenceImage->sform_code>0)
-   {
-      controlPointGridImage->qform_code=referenceImage->qform_code;
-      controlPointGridImage->sform_code=referenceImage->sform_code;
-   }
-   else
-   {
-      controlPointGridImage->qform_code=1;
-      controlPointGridImage->sform_code=0;
-   }
-
-   // The qform (and sform) are set for the control point position image
-   controlPointGridImage->quatern_b=referenceImage->quatern_b;
-   controlPointGridImage->quatern_c=referenceImage->quatern_c;
-   controlPointGridImage->quatern_d=referenceImage->quatern_d;
-   controlPointGridImage->qoffset_x=referenceImage->qoffset_x;
-   controlPointGridImage->qoffset_y=referenceImage->qoffset_y;
-   controlPointGridImage->qoffset_z=referenceImage->qoffset_z;
-   controlPointGridImage->qfac=referenceImage->qfac;
-   controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b,
-                                                           controlPointGridImage->quatern_c,
-                                                           controlPointGridImage->quatern_d,
-                                                           controlPointGridImage->qoffset_x,
-                                                           controlPointGridImage->qoffset_y,
-                                                           controlPointGridImage->qoffset_z,
-                                                           controlPointGridImage->dx,
-                                                           controlPointGridImage->dy,
-                                                           controlPointGridImage->dz,
-                                                           controlPointGridImage->qfac);
-
-   // Origin is shifted from 1 control point in the qform
-   float originIndex[3];
-   float originReal[3];
-   originIndex[0] = -1.0f;
-   originIndex[1] = -1.0f;
-   originIndex[2] = 0.0f;
-   if(referenceImage->nz>1) originIndex[2] = -1.0f;
-   reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal);
-   controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0];
-   controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1];
-   controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2];
-
-   controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz);
-
-   // Update the sform if required
-   if(controlPointGridImage->sform_code>0)
-   {
-      float scalingRatio[3];
-      scalingRatio[0]= controlPointGridImage->dx / referenceImage->dx;
-      scalingRatio[1]= controlPointGridImage->dy / referenceImage->dy;
-      scalingRatio[2]= controlPointGridImage->dz / referenceImage->dz;
-
-      controlPointGridImage->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
-      controlPointGridImage->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
-      controlPointGridImage->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
-      controlPointGridImage->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0];
-      controlPointGridImage->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
-      controlPointGridImage->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
-      controlPointGridImage->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
-      controlPointGridImage->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1];
-      controlPointGridImage->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
-      controlPointGridImage->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
-      controlPointGridImage->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
-      controlPointGridImage->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2];
-      controlPointGridImage->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3];
-      controlPointGridImage->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3];
-      controlPointGridImage->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3];
-      controlPointGridImage->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3];
-
-      // Origin is shifted from 1 control point in the sform
-      reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal);
-      controlPointGridImage->sto_xyz.m[0][3] = originReal[0];
-      controlPointGridImage->sto_xyz.m[1][3] = originReal[1];
-      controlPointGridImage->sto_xyz.m[2][3] = originReal[2];
-      controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz);
-   }
-
-   controlPointGridImage->intent_code=NIFTI_INTENT_VECTOR;
-   memset(controlPointGridImage->intent_name, 0, 16);
-   strcpy(controlPointGridImage->intent_name,"NREG_TRANS");
-   controlPointGridImage->intent_p1=CUB_SPLINE_GRID;
+                                const float *spacing) {
+    // Define the control point grid dimensions
+    vector<NiftiImage::dim_t> dims{
+        static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / spacing[0]) + 3.f),
+        static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / spacing[1]) + 3.f),
+        referenceImage->nz > 1 ? static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1,
+        1,
+        referenceImage->nz > 1 ? 3 : 2
+    };
+
+    // Create the new control point grid image and allocate its space
+    controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
+
+    // Fill the header information
+    controlPointGridImage->cal_min = 0;
+    controlPointGridImage->cal_max = 0;
+    controlPointGridImage->pixdim[0] = 1.0f;
+    controlPointGridImage->pixdim[1] = controlPointGridImage->dx = spacing[0];
+    controlPointGridImage->pixdim[2] = controlPointGridImage->dy = spacing[1];
+    if (referenceImage->nz == 1) {
+        controlPointGridImage->pixdim[3] = controlPointGridImage->dz = 1.0f;
+    } else controlPointGridImage->pixdim[3] = controlPointGridImage->dz = spacing[2];
+    controlPointGridImage->pixdim[4] = controlPointGridImage->dt = 1.0f;
+    controlPointGridImage->pixdim[5] = controlPointGridImage->du = 1.0f;
+    controlPointGridImage->pixdim[6] = controlPointGridImage->dv = 1.0f;
+    controlPointGridImage->pixdim[7] = controlPointGridImage->dw = 1.0f;
+
+    // Reproduce the orientation of the reference image and add a one voxel shift
+    if (referenceImage->qform_code + referenceImage->sform_code > 0) {
+        controlPointGridImage->qform_code = referenceImage->qform_code;
+        controlPointGridImage->sform_code = referenceImage->sform_code;
+    } else {
+        controlPointGridImage->qform_code = 1;
+        controlPointGridImage->sform_code = 0;
+    }
+
+    // The qform (and sform) are set for the control point position image
+    controlPointGridImage->quatern_b = referenceImage->quatern_b;
+    controlPointGridImage->quatern_c = referenceImage->quatern_c;
+    controlPointGridImage->quatern_d = referenceImage->quatern_d;
+    controlPointGridImage->qoffset_x = referenceImage->qoffset_x;
+    controlPointGridImage->qoffset_y = referenceImage->qoffset_y;
+    controlPointGridImage->qoffset_z = referenceImage->qoffset_z;
+    controlPointGridImage->qfac = referenceImage->qfac;
+    controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b,
+                                                            controlPointGridImage->quatern_c,
+                                                            controlPointGridImage->quatern_d,
+                                                            controlPointGridImage->qoffset_x,
+                                                            controlPointGridImage->qoffset_y,
+                                                            controlPointGridImage->qoffset_z,
+                                                            controlPointGridImage->dx,
+                                                            controlPointGridImage->dy,
+                                                            controlPointGridImage->dz,
+                                                            controlPointGridImage->qfac);
+
+    // Origin is shifted from 1 control point in the qform
+    float originIndex[3];
+    float originReal[3];
+    originIndex[0] = -1.0f;
+    originIndex[1] = -1.0f;
+    originIndex[2] = 0.0f;
+    if (referenceImage->nz > 1) originIndex[2] = -1.0f;
+    reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal);
+    controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0];
+    controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1];
+    controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2];
+
+    controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz);
+
+    // Update the sform if required
+    if (controlPointGridImage->sform_code > 0) {
+        float scalingRatio[3];
+        scalingRatio[0] = controlPointGridImage->dx / referenceImage->dx;
+        scalingRatio[1] = controlPointGridImage->dy / referenceImage->dy;
+        scalingRatio[2] = controlPointGridImage->dz / referenceImage->dz;
+
+        controlPointGridImage->sto_xyz.m[0][0] = referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
+        controlPointGridImage->sto_xyz.m[1][0] = referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
+        controlPointGridImage->sto_xyz.m[2][0] = referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
+        controlPointGridImage->sto_xyz.m[3][0] = referenceImage->sto_xyz.m[3][0];
+        controlPointGridImage->sto_xyz.m[0][1] = referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
+        controlPointGridImage->sto_xyz.m[1][1] = referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
+        controlPointGridImage->sto_xyz.m[2][1] = referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
+        controlPointGridImage->sto_xyz.m[3][1] = referenceImage->sto_xyz.m[3][1];
+        controlPointGridImage->sto_xyz.m[0][2] = referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
+        controlPointGridImage->sto_xyz.m[1][2] = referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
+        controlPointGridImage->sto_xyz.m[2][2] = referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
+        controlPointGridImage->sto_xyz.m[3][2] = referenceImage->sto_xyz.m[3][2];
+        controlPointGridImage->sto_xyz.m[0][3] = referenceImage->sto_xyz.m[0][3];
+        controlPointGridImage->sto_xyz.m[1][3] = referenceImage->sto_xyz.m[1][3];
+        controlPointGridImage->sto_xyz.m[2][3] = referenceImage->sto_xyz.m[2][3];
+        controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3];
+
+        // Origin is shifted from 1 control point in the sform
+        reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal);
+        controlPointGridImage->sto_xyz.m[0][3] = originReal[0];
+        controlPointGridImage->sto_xyz.m[1][3] = originReal[1];
+        controlPointGridImage->sto_xyz.m[2][3] = originReal[2];
+        controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz);
+    }
+
+    controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR;
+    memset(controlPointGridImage->intent_name, 0, 16);
+    strcpy(controlPointGridImage->intent_name, "NREG_TRANS");
+    controlPointGridImage->intent_p1 = CUB_SPLINE_GRID;
 }
 template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&, const float*);
 template void reg_createControlPointGrid<double>(NiftiImage&, const NiftiImage&, const float*);
@@ -141,761 +133,678 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
                                           const NiftiImage& referenceImage,
                                           const NiftiImage& floatingImage,
                                           const mat44 *forwardAffineTrans,
-                                          const float *spacing)
-{
-   // We specified a space which is in-between both input images
-   // Get the reference image space
-   mat44 referenceImageSpace = referenceImage->qto_xyz;
-   if(referenceImage->sform_code>0)
-      referenceImageSpace = referenceImage->sto_xyz;
+                                          const float *spacing) {
+    // We specified a space which is in-between both input images
+    // Get the reference image space
+    mat44 referenceImageSpace = referenceImage->qto_xyz;
+    if (referenceImage->sform_code > 0)
+        referenceImageSpace = referenceImage->sto_xyz;
 #ifndef NDEBUG
-   reg_mat44_disp(&referenceImageSpace,(char *)"[NiftyReg DEBUG] Input reference image orientation");
+    reg_mat44_disp(&referenceImageSpace, (char*)"[NiftyReg DEBUG] Input reference image orientation");
 #endif
-   // // Get the floating image space
-   mat44 floatingImageSpace = floatingImage->qto_xyz;
-   if(floatingImage->sform_code>0)
-      floatingImageSpace = floatingImage->sto_xyz;
+    // // Get the floating image space
+    mat44 floatingImageSpace = floatingImage->qto_xyz;
+    if (floatingImage->sform_code > 0)
+        floatingImageSpace = floatingImage->sto_xyz;
 #ifndef NDEBUG
-   reg_mat44_disp(&floatingImageSpace,(char *)"[NiftyReg DEBUG] Input floating image orientation");
+    reg_mat44_disp(&floatingImageSpace, (char*)"[NiftyReg DEBUG] Input floating image orientation");
 #endif
-   // Check if an affine transformation is specified
-   mat44 halfForwardAffine, halfBackwardAffine;
-   if(forwardAffineTrans!=nullptr)
-   {
-      // Compute half of the affine transformation - ref to flo
-      halfForwardAffine = reg_mat44_logm(forwardAffineTrans);
-      halfForwardAffine = reg_mat44_mul(&halfForwardAffine,.5f);
-      halfForwardAffine = reg_mat44_expm(&halfForwardAffine);
-      // Compute half of the affine transformation - flo to ref
-      // Note that this is done twice for symmetry consideration
-      halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans);
-      halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine);
-      halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine,.5f);
-      halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine);
-      reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation");
-   }
-   else
-   {
-      reg_mat44_eye(&halfForwardAffine);
-      reg_mat44_eye(&halfBackwardAffine);
-   }
-
-   // Update the reference and floating transformation to propagate to a mid space
-   referenceImageSpace = reg_mat44_mul(&halfForwardAffine,&referenceImageSpace);
-   floatingImageSpace = reg_mat44_mul(&halfBackwardAffine,&floatingImageSpace);
-
-   // Define the largest field of view in the mid space
-   float minPosition[3]={0,0,0}, maxPosition[3]={0,0,0};
-   if(referenceImage->nz>1)  // 3D
-   {
-      float referenceImageCorners[8][3]=
-      {
-         {0,0,0},
-         {float(referenceImage->nx),0,0},
-         {0,float(referenceImage->ny),0},
-         {float(referenceImage->nx),float(referenceImage->ny),0},
-         {0,0,float(referenceImage->nz)},
-         {float(referenceImage->nx),0,float(referenceImage->nz)},
-         {0,float(referenceImage->ny),float(referenceImage->nz)},
-         {float(referenceImage->nx),float(referenceImage->ny),float(referenceImage->nz)}
-      };
-      float floatingImageCorners[8][3]=
-      {
-         {0,0,0},
-         {float(floatingImage->nx),0,0},
-         {0,float(floatingImage->ny),0},
-         {float(floatingImage->nx),float(floatingImage->ny),0},
-         {0,0,float(floatingImage->nz)},
-         {float(floatingImage->nx),0,float(floatingImage->nz)},
-         {0,float(floatingImage->ny),float(floatingImage->nz)},
-         {float(floatingImage->nx),float(floatingImage->ny),float(floatingImage->nz)}
-      };
-      float out[3];
-      for(int c=0; c<8; ++c)
-      {
-         reg_mat44_mul(&referenceImageSpace,referenceImageCorners[c],out);
-         referenceImageCorners[c][0]=out[0];
-         referenceImageCorners[c][1]=out[1];
-         referenceImageCorners[c][2]=out[2];
-         reg_mat44_mul(&floatingImageSpace,floatingImageCorners[c],out);
-         floatingImageCorners[c][0]=out[0];
-         floatingImageCorners[c][1]=out[1];
-         floatingImageCorners[c][2]=out[2];
-
-      }
-      minPosition[0]=referenceImageCorners[0][0]<floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0];
-      minPosition[1]=referenceImageCorners[0][1]<floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1];
-      minPosition[2]=referenceImageCorners[0][2]<floatingImageCorners[0][2]?referenceImageCorners[0][2]:floatingImageCorners[0][2];
-      maxPosition[0]=referenceImageCorners[0][0]>floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0];
-      maxPosition[1]=referenceImageCorners[0][1]>floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1];
-      maxPosition[2]=referenceImageCorners[0][2]>floatingImageCorners[0][2]?referenceImageCorners[0][2]:floatingImageCorners[0][2];
-      for(int c=1; c<8; ++c)
-      {
-         minPosition[0]=minPosition[0]<referenceImageCorners[c][0]?minPosition[0]:referenceImageCorners[c][0];
-         minPosition[0]=minPosition[0]<floatingImageCorners[c][0]?minPosition[0]:floatingImageCorners[c][0];
-         minPosition[1]=minPosition[1]<referenceImageCorners[c][1]?minPosition[1]:referenceImageCorners[c][1];
-         minPosition[1]=minPosition[1]<floatingImageCorners[c][1]?minPosition[1]:floatingImageCorners[c][1];
-         minPosition[2]=minPosition[2]<referenceImageCorners[c][2]?minPosition[2]:referenceImageCorners[c][2];
-         minPosition[2]=minPosition[2]<floatingImageCorners[c][2]?minPosition[2]:floatingImageCorners[c][2];
-         maxPosition[0]=maxPosition[0]>referenceImageCorners[c][0]?maxPosition[0]:referenceImageCorners[c][0];
-         maxPosition[0]=maxPosition[0]>floatingImageCorners[c][0]?maxPosition[0]:floatingImageCorners[c][0];
-         maxPosition[1]=maxPosition[1]>referenceImageCorners[c][1]?maxPosition[1]:referenceImageCorners[c][1];
-         maxPosition[1]=maxPosition[1]>floatingImageCorners[c][1]?maxPosition[1]:floatingImageCorners[c][1];
-         maxPosition[2]=maxPosition[2]>referenceImageCorners[c][2]?maxPosition[2]:referenceImageCorners[c][2];
-         maxPosition[2]=maxPosition[2]>floatingImageCorners[c][2]?maxPosition[2]:floatingImageCorners[c][2];
-      }
-   }
-   else  // 2D
-   {
-      float referenceImageCorners[4][2]=
-      {
-         {0,0},
-         {float(referenceImage->nx),0},
-         {0,float(referenceImage->ny)},
-         {float(referenceImage->nx),float(referenceImage->ny)}
-      };
-      float floatingImageCorners[4][2]=
-      {
-         {0,0},
-         {float(floatingImage->nx),0},
-         {0,float(floatingImage->ny)},
-         {float(floatingImage->nx),float(floatingImage->ny)}
-      };
-      float out[2];
-      for(int c=0; c<4; ++c)
-      {
-         out[0]= referenceImageCorners[c][0] * referenceImageSpace.m[0][0]
-               +referenceImageCorners[c][1] * referenceImageSpace.m[0][1]
-               + referenceImageSpace.m[0][3];
-         out[1]= referenceImageCorners[c][0] * referenceImageSpace.m[1][0]
-               +referenceImageCorners[c][1] * referenceImageSpace.m[1][1]
-               + referenceImageSpace.m[1][3];
-         referenceImageCorners[c][0]=out[0];
-         referenceImageCorners[c][1]=out[1];
-         out[0]= floatingImageCorners[c][0] * floatingImageSpace.m[0][0]
-               +floatingImageCorners[c][1] * floatingImageSpace.m[0][1]
-               + floatingImageSpace.m[0][3];
-         out[1]= floatingImageCorners[c][0] * floatingImageSpace.m[1][0]
-               +floatingImageCorners[c][1] * floatingImageSpace.m[1][1]
-               + floatingImageSpace.m[1][3];
-         floatingImageCorners[c][0]=out[0];
-         floatingImageCorners[c][1]=out[1];
-
-      }
-      minPosition[0]=referenceImageCorners[0][0]<floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0];
-      minPosition[1]=referenceImageCorners[0][1]<floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1];
-      maxPosition[0]=referenceImageCorners[0][0]>floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0];
-      maxPosition[1]=referenceImageCorners[0][1]>floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1];
-      for(int c=1; c<4; ++c)
-      {
-         minPosition[0]=minPosition[0]<referenceImageCorners[c][0]?minPosition[0]:referenceImageCorners[c][0];
-         minPosition[0]=minPosition[0]<floatingImageCorners[c][0]?minPosition[0]:floatingImageCorners[c][0];
-         minPosition[1]=minPosition[1]<referenceImageCorners[c][1]?minPosition[1]:referenceImageCorners[c][1];
-         minPosition[1]=minPosition[1]<floatingImageCorners[c][1]?minPosition[1]:floatingImageCorners[c][1];
-         maxPosition[0]=maxPosition[0]>referenceImageCorners[c][0]?maxPosition[0]:referenceImageCorners[c][0];
-         maxPosition[0]=maxPosition[0]>floatingImageCorners[c][0]?maxPosition[0]:floatingImageCorners[c][0];
-         maxPosition[1]=maxPosition[1]>referenceImageCorners[c][1]?maxPosition[1]:referenceImageCorners[c][1];
-         maxPosition[1]=maxPosition[1]>floatingImageCorners[c][1]?maxPosition[1]:floatingImageCorners[c][1];
-      }
-   }
-
-   // Compute the dimension of the control point grids
-   const vector<NiftiImage::dim_t> dims{
-      static_cast<int>(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3),
-      static_cast<int>(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3),
-      referenceImage->nz > 1 ? static_cast<int>(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1,
-      1,
-      referenceImage->nz > 1 ? 3 : 2
-   };
-
-   // Create the control point grid image
-   forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
-   backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
-
-   // Set the control point grid spacing
-   forwardGridImage->pixdim[1]=forwardGridImage->dx=backwardGridImage->pixdim[1]=backwardGridImage->dx=spacing[0];
-   forwardGridImage->pixdim[2]=forwardGridImage->dy=backwardGridImage->pixdim[2]=backwardGridImage->dy=spacing[1];
-   if(referenceImage->nz>1)
-      forwardGridImage->pixdim[3]=forwardGridImage->dz=backwardGridImage->pixdim[3]=backwardGridImage->dz=spacing[2];
-   // Set the control point grid image orientation
-   forwardGridImage->qform_code=backwardGridImage->qform_code=0;
-   forwardGridImage->sform_code=backwardGridImage->sform_code=1;
-   reg_mat44_eye(&forwardGridImage->sto_xyz);
-   reg_mat44_eye(&backwardGridImage->sto_xyz);
-   reg_mat44_eye(&forwardGridImage->sto_ijk);
-   reg_mat44_eye(&backwardGridImage->sto_ijk);
-   for(unsigned i=0; i<3; ++i)
-   {
-      if(referenceImage->nz>1 || i<2)
-      {
-         forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=spacing[i];
-         forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=minPosition[i]-spacing[i];
-      }
-      else
-      {
-         forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=1.f;
-         forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=0.f;
-      }
-   }
-   forwardGridImage->sto_ijk=backwardGridImage->sto_ijk=nifti_mat44_inverse(forwardGridImage->sto_xyz);
-   // Set the intent type
-   forwardGridImage->intent_code=backwardGridImage->intent_code=NIFTI_INTENT_VECTOR;
-   memset(forwardGridImage->intent_name, 0, 16);
-   memset(backwardGridImage->intent_name, 0, 16);
-   strcpy(forwardGridImage->intent_name,"NREG_TRANS");
-   strcpy(backwardGridImage->intent_name,"NREG_TRANS");
-   forwardGridImage->intent_p1=backwardGridImage->intent_p1=CUB_SPLINE_GRID;
-   // Set the affine matrices
-   mat44 identity;
-   reg_mat44_eye(&identity);
-   if(forwardGridImage->ext_list!=nullptr)
-      free(forwardGridImage->ext_list);
-   if(backwardGridImage->ext_list!=nullptr)
-      free(backwardGridImage->ext_list);
-   forwardGridImage->num_ext=0;
-   backwardGridImage->num_ext=0;
-   if(identity!=halfForwardAffine && identity!=halfBackwardAffine)
-   {
-      // Create extensions to store the affine parametrisations for the forward transformation
-      forwardGridImage->num_ext=2;
-      forwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
-      forwardGridImage->ext_list[0].esize=16*sizeof(float)+16;
-      forwardGridImage->ext_list[1].esize=16*sizeof(float)+16;
-      forwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
-      forwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
-      forwardGridImage->ext_list[0].edata=(char *)calloc(forwardGridImage->ext_list[0].esize-8,sizeof(float));
-      forwardGridImage->ext_list[1].edata=(char *)calloc(forwardGridImage->ext_list[1].esize-8,sizeof(float));
-      memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
-      memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
+    // Check if an affine transformation is specified
+    mat44 halfForwardAffine, halfBackwardAffine;
+    if (forwardAffineTrans != nullptr) {
+        // Compute half of the affine transformation - ref to flo
+        halfForwardAffine = reg_mat44_logm(forwardAffineTrans);
+        halfForwardAffine = reg_mat44_mul(&halfForwardAffine, .5f);
+        halfForwardAffine = reg_mat44_expm(&halfForwardAffine);
+        // Compute half of the affine transformation - flo to ref
+        // Note that this is done twice for symmetry consideration
+        halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans);
+        halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine);
+        halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f);
+        halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine);
+        reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation");
+    } else {
+        reg_mat44_eye(&halfForwardAffine);
+        reg_mat44_eye(&halfBackwardAffine);
+    }
+
+    // Update the reference and floating transformation to propagate to a mid space
+    referenceImageSpace = reg_mat44_mul(&halfForwardAffine, &referenceImageSpace);
+    floatingImageSpace = reg_mat44_mul(&halfBackwardAffine, &floatingImageSpace);
+
+    // Define the largest field of view in the mid space
+    float minPosition[3] = { 0, 0, 0 }, maxPosition[3] = { 0, 0, 0 };
+    if (referenceImage->nz > 1)  // 3D
+    {
+        float referenceImageCorners[8][3] = {
+            { 0, 0, 0 },
+            { float(referenceImage->nx), 0, 0 },
+            { 0, float(referenceImage->ny), 0 },
+            { float(referenceImage->nx), float(referenceImage->ny), 0 },
+            { 0, 0, float(referenceImage->nz) },
+            { float(referenceImage->nx), 0, float(referenceImage->nz) },
+            { 0, float(referenceImage->ny), float(referenceImage->nz) },
+            { float(referenceImage->nx), float(referenceImage->ny), float(referenceImage->nz) }
+        };
+        float floatingImageCorners[8][3] = {
+            { 0, 0, 0 },
+            { float(floatingImage->nx), 0, 0 },
+            { 0, float(floatingImage->ny), 0 },
+            { float(floatingImage->nx), float(floatingImage->ny), 0 },
+            { 0, 0, float(floatingImage->nz) },
+            { float(floatingImage->nx), 0, float(floatingImage->nz) },
+            { 0, float(floatingImage->ny), float(floatingImage->nz) },
+            { float(floatingImage->nx), float(floatingImage->ny), float(floatingImage->nz) }
+        };
+        float out[3];
+        for (int c = 0; c < 8; ++c) {
+            reg_mat44_mul(&referenceImageSpace, referenceImageCorners[c], out);
+            referenceImageCorners[c][0] = out[0];
+            referenceImageCorners[c][1] = out[1];
+            referenceImageCorners[c][2] = out[2];
+            reg_mat44_mul(&floatingImageSpace, floatingImageCorners[c], out);
+            floatingImageCorners[c][0] = out[0];
+            floatingImageCorners[c][1] = out[1];
+            floatingImageCorners[c][2] = out[2];
+
+        }
+        minPosition[0] = std::min(referenceImageCorners[0][0], floatingImageCorners[0][0]);
+        minPosition[1] = std::min(referenceImageCorners[0][1], floatingImageCorners[0][1]);
+        minPosition[2] = std::min(referenceImageCorners[0][2], floatingImageCorners[0][2]);
+        maxPosition[0] = std::max(referenceImageCorners[0][0], floatingImageCorners[0][0]);
+        maxPosition[1] = std::max(referenceImageCorners[0][1], floatingImageCorners[0][1]);
+        maxPosition[2] = std::max(referenceImageCorners[0][2], floatingImageCorners[0][2]);
+        for (int c = 1; c < 8; ++c) {
+            minPosition[0] = std::min(minPosition[0], referenceImageCorners[c][0]);
+            minPosition[0] = std::min(minPosition[0], floatingImageCorners[c][0]);
+            minPosition[1] = std::min(minPosition[1], referenceImageCorners[c][1]);
+            minPosition[1] = std::min(minPosition[1], floatingImageCorners[c][1]);
+            minPosition[2] = std::min(minPosition[2], referenceImageCorners[c][2]);
+            minPosition[2] = std::min(minPosition[2], floatingImageCorners[c][2]);
+            maxPosition[0] = std::max(maxPosition[0], referenceImageCorners[c][0]);
+            maxPosition[0] = std::max(maxPosition[0], floatingImageCorners[c][0]);
+            maxPosition[1] = std::max(maxPosition[1], referenceImageCorners[c][1]);
+            maxPosition[1] = std::max(maxPosition[1], floatingImageCorners[c][1]);
+            maxPosition[2] = std::max(maxPosition[2], referenceImageCorners[c][2]);
+            maxPosition[2] = std::max(maxPosition[2], floatingImageCorners[c][2]);
+        }
+    } else { // 2D
+        float referenceImageCorners[4][2] = {
+            { 0, 0 },
+            { float(referenceImage->nx), 0 },
+            { 0, float(referenceImage->ny) },
+            { float(referenceImage->nx), float(referenceImage->ny) }
+        };
+        float floatingImageCorners[4][2] = {
+            { 0, 0 },
+            { float(floatingImage->nx), 0 },
+            { 0, float(floatingImage->ny) },
+            { float(floatingImage->nx), float(floatingImage->ny) }
+        };
+        float out[2];
+        for (int c = 0; c < 4; ++c) {
+            out[0] = referenceImageCorners[c][0] * referenceImageSpace.m[0][0]
+                + referenceImageCorners[c][1] * referenceImageSpace.m[0][1]
+                + referenceImageSpace.m[0][3];
+            out[1] = referenceImageCorners[c][0] * referenceImageSpace.m[1][0]
+                + referenceImageCorners[c][1] * referenceImageSpace.m[1][1]
+                + referenceImageSpace.m[1][3];
+            referenceImageCorners[c][0] = out[0];
+            referenceImageCorners[c][1] = out[1];
+            out[0] = floatingImageCorners[c][0] * floatingImageSpace.m[0][0]
+                + floatingImageCorners[c][1] * floatingImageSpace.m[0][1]
+                + floatingImageSpace.m[0][3];
+            out[1] = floatingImageCorners[c][0] * floatingImageSpace.m[1][0]
+                + floatingImageCorners[c][1] * floatingImageSpace.m[1][1]
+                + floatingImageSpace.m[1][3];
+            floatingImageCorners[c][0] = out[0];
+            floatingImageCorners[c][1] = out[1];
+
+        }
+        minPosition[0] = std::min(referenceImageCorners[0][0], floatingImageCorners[0][0]);
+        minPosition[1] = std::min(referenceImageCorners[0][1], floatingImageCorners[0][1]);
+        maxPosition[0] = std::max(referenceImageCorners[0][0], floatingImageCorners[0][0]);
+        maxPosition[1] = std::max(referenceImageCorners[0][1], floatingImageCorners[0][1]);
+        for (int c = 1; c < 4; ++c) {
+            minPosition[0] = std::min(minPosition[0], referenceImageCorners[c][0]);
+            minPosition[0] = std::min(minPosition[0], floatingImageCorners[c][0]);
+            minPosition[1] = std::min(minPosition[1], referenceImageCorners[c][1]);
+            minPosition[1] = std::min(minPosition[1], floatingImageCorners[c][1]);
+            maxPosition[0] = std::max(maxPosition[0], referenceImageCorners[c][0]);
+            maxPosition[0] = std::max(maxPosition[0], floatingImageCorners[c][0]);
+            maxPosition[1] = std::max(maxPosition[1], referenceImageCorners[c][1]);
+            maxPosition[1] = std::max(maxPosition[1], floatingImageCorners[c][1]);
+        }
+    }
+
+    // Compute the dimension of the control point grids
+    const vector<NiftiImage::dim_t> dims{
+        static_cast<int>(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3),
+        static_cast<int>(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3),
+        referenceImage->nz > 1 ? static_cast<int>(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1,
+        1,
+        referenceImage->nz > 1 ? 3 : 2
+    };
+
+    // Create the control point grid image
+    forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
+    backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
+
+    // Set the control point grid spacing
+    forwardGridImage->pixdim[1] = forwardGridImage->dx = backwardGridImage->pixdim[1] = backwardGridImage->dx = spacing[0];
+    forwardGridImage->pixdim[2] = forwardGridImage->dy = backwardGridImage->pixdim[2] = backwardGridImage->dy = spacing[1];
+    if (referenceImage->nz > 1)
+        forwardGridImage->pixdim[3] = forwardGridImage->dz = backwardGridImage->pixdim[3] = backwardGridImage->dz = spacing[2];
+    // Set the control point grid image orientation
+    forwardGridImage->qform_code = backwardGridImage->qform_code = 0;
+    forwardGridImage->sform_code = backwardGridImage->sform_code = 1;
+    reg_mat44_eye(&forwardGridImage->sto_xyz);
+    reg_mat44_eye(&backwardGridImage->sto_xyz);
+    reg_mat44_eye(&forwardGridImage->sto_ijk);
+    reg_mat44_eye(&backwardGridImage->sto_ijk);
+    for (unsigned i = 0; i < 3; ++i) {
+        if (referenceImage->nz > 1 || i < 2) {
+            forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = spacing[i];
+            forwardGridImage->sto_xyz.m[i][3] = backwardGridImage->sto_xyz.m[i][3] = minPosition[i] - spacing[i];
+        } else {
+            forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = 1.f;
+            forwardGridImage->sto_xyz.m[i][3] = backwardGridImage->sto_xyz.m[i][3] = 0.f;
+        }
+    }
+    forwardGridImage->sto_ijk = backwardGridImage->sto_ijk = nifti_mat44_inverse(forwardGridImage->sto_xyz);
+    // Set the intent type
+    forwardGridImage->intent_code = backwardGridImage->intent_code = NIFTI_INTENT_VECTOR;
+    memset(forwardGridImage->intent_name, 0, 16);
+    memset(backwardGridImage->intent_name, 0, 16);
+    strcpy(forwardGridImage->intent_name, "NREG_TRANS");
+    strcpy(backwardGridImage->intent_name, "NREG_TRANS");
+    forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID;
+    // Set the affine matrices
+    mat44 identity;
+    reg_mat44_eye(&identity);
+    if (forwardGridImage->ext_list != nullptr)
+        free(forwardGridImage->ext_list);
+    if (backwardGridImage->ext_list != nullptr)
+        free(backwardGridImage->ext_list);
+    forwardGridImage->num_ext = 0;
+    backwardGridImage->num_ext = 0;
+    if (identity != halfForwardAffine && identity != halfBackwardAffine) {
+        // Create extensions to store the affine parametrisations for the forward transformation
+        forwardGridImage->num_ext = 2;
+        forwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension));
+        forwardGridImage->ext_list[0].esize = 16 * sizeof(float) + 16;
+        forwardGridImage->ext_list[1].esize = 16 * sizeof(float) + 16;
+        forwardGridImage->ext_list[0].ecode = NIFTI_ECODE_IGNORE;
+        forwardGridImage->ext_list[1].ecode = NIFTI_ECODE_IGNORE;
+        forwardGridImage->ext_list[0].edata = (char*)calloc(forwardGridImage->ext_list[0].esize - 8, sizeof(float));
+        forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float));
+        memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
+        memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
 #ifndef NDEBUG
-      reg_mat44_disp(&halfForwardAffine,(char *)"[NiftyReg DEBUG] Forward transformation half-affine");
+        reg_mat44_disp(&halfForwardAffine, (char*)"[NiftyReg DEBUG] Forward transformation half-affine");
 #endif
-      // Create extensions to store the affine parametrisations for the backward transformation
-      backwardGridImage->num_ext=2;
-      backwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension));
-      backwardGridImage->ext_list[0].esize=16*sizeof(float)+16;
-      backwardGridImage->ext_list[1].esize=16*sizeof(float)+16;
-      backwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE;
-      backwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE;
-      backwardGridImage->ext_list[0].edata=(char *)calloc(backwardGridImage->ext_list[0].esize-8,sizeof(float));
-      backwardGridImage->ext_list[1].edata=(char *)calloc(backwardGridImage->ext_list[1].esize-8,sizeof(float));
-      memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
-      memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
+        // Create extensions to store the affine parametrisations for the backward transformation
+        backwardGridImage->num_ext = 2;
+        backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension));
+        backwardGridImage->ext_list[0].esize = 16 * sizeof(float) + 16;
+        backwardGridImage->ext_list[1].esize = 16 * sizeof(float) + 16;
+        backwardGridImage->ext_list[0].ecode = NIFTI_ECODE_IGNORE;
+        backwardGridImage->ext_list[1].ecode = NIFTI_ECODE_IGNORE;
+        backwardGridImage->ext_list[0].edata = (char*)calloc(backwardGridImage->ext_list[0].esize - 8, sizeof(float));
+        backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float));
+        memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
+        memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
 #ifndef NDEBUG
-      reg_mat44_disp(&halfBackwardAffine,(char *)"[NiftyReg DEBUG] Backward transformation half-affine");
+        reg_mat44_disp(&halfBackwardAffine, (char*)"[NiftyReg DEBUG] Backward transformation half-affine");
 #endif
-   }
-   // Initialise the grid with identity transformations
-   reg_tools_multiplyValueToImage(forwardGridImage,forwardGridImage,0.f);
-   reg_tools_multiplyValueToImage(backwardGridImage,backwardGridImage,0.f);
-   // Convert the parametrisations into deformation fields
-   reg_getDeformationFromDisplacement(forwardGridImage);
-   reg_getDeformationFromDisplacement(backwardGridImage);
+    }
+    // Initialise the grid with identity transformations
+    reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f);
+    reg_tools_multiplyValueToImage(backwardGridImage, backwardGridImage, 0.f);
+    // Convert the parametrisations into deformation fields
+    reg_getDeformationFromDisplacement(forwardGridImage);
+    reg_getDeformationFromDisplacement(backwardGridImage);
 }
-/* *************************************************************** */
-template void reg_createSymmetricControlPointGrids<float>
-(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*);
-template void reg_createSymmetricControlPointGrids<double>
-(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*);
-/* *************************************************************** */
+template void reg_createSymmetricControlPointGrids<float>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
+template void reg_createSymmetricControlPointGrids<double>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 /* *************************************************************** */
 template<class DataType>
 void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                              nifti_image *deformationField,
                                              int *mask,
-                                             bool composition)
-{
-   int coord;
-
-   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
-   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
-   DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
-   DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
-
-   const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
-   DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
-   DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
-
-   int x, y, z, a, b, c, xPre, yPre, zPre, index;
-   DataType xBasis[2], yBasis[2], zBasis[2], real[3];
-
-   if(composition)  // Composition of deformation fields
-   {
-      // read the ijk sform or qform, as appropriate
-      mat44 referenceMatrix_real_to_voxel;
-      if(splineControlPoint->sform_code>0)
-         referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk);
-      else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk);
-
-      DataType voxel[3];
-
-      for(z=0; z<deformationField->nz; z++)
-      {
-         index=z*deformationField->nx*deformationField->ny;
-         for(y=0; y<deformationField->ny; y++)
-         {
-            for(x=0; x<deformationField->nx; x++)
-            {
-               if(mask[index]>-1)
-               {
-                  // The previous position at the current pixel position is read
-                  real[0] = fieldPtrX[index];
-                  real[1] = fieldPtrY[index];
-                  real[2] = fieldPtrZ[index];
-
-                  // From real to pixel position in the control point space
-                  voxel[0] =
-                        referenceMatrix_real_to_voxel.m[0][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[0][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[0][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[0][3] ;
-                  voxel[1] =
-                        referenceMatrix_real_to_voxel.m[1][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[1][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[1][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[1][3] ;
-                  voxel[2] =
-                        referenceMatrix_real_to_voxel.m[2][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[2][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[2][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[2][3] ;
-
-                  // The spline coefficients are computed
-                  xPre=(int)reg_floor(voxel[0]);
-                  xBasis[1]=voxel[0]-static_cast<DataType>(xPre);
-                  if(xBasis[1]<0) xBasis[1]=0; //rounding error
-                  xBasis[0]=1.-xBasis[1];
-
-                  yPre=(int)reg_floor(voxel[1]);
-                  yBasis[1]=voxel[1]-static_cast<DataType>(yPre);
-                  if(yBasis[1]<0) yBasis[1]=0; //rounding error
-                  yBasis[0]=1.-yBasis[1];
-
-                  zPre=(int)reg_floor(voxel[2]);
-                  zBasis[1]=voxel[2]-static_cast<DataType>(zPre);
-                  if(zBasis[1]<0) zBasis[1]=0; //rounding error
-                  zBasis[0]=1.-zBasis[1];
-
-                  real[0]=0;
-                  real[1]=0;
-                  real[2]=0;
-                  for(c=0; c<2; c++){
-                     for(b=0; b<2; b++){
-                        for(a=0; a<2; a++){
-                           DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
-                           coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a;
-                           real[0] += controlPointPtrX[coord] * tempValue;
-                           real[1] += controlPointPtrY[coord] * tempValue;
-                           real[2] += controlPointPtrZ[coord] * tempValue;
+                                             bool composition) {
+    int coord;
+
+    const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
+    DataType *controlPointPtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+    DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
+
+    const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    DataType *fieldPtrX = static_cast<DataType*>(deformationField->data);
+    DataType *fieldPtrY = &fieldPtrX[deformationFieldVoxelNumber];
+    DataType *fieldPtrZ = &fieldPtrY[deformationFieldVoxelNumber];
+
+    int x, y, z, a, b, c, xPre, yPre, zPre, index;
+    DataType xBasis[2], yBasis[2], zBasis[2], real[3];
+
+    if (composition) { // Composition of deformation fields
+        // read the ijk sform or qform, as appropriate
+        mat44 referenceMatrix_real_to_voxel;
+        if (splineControlPoint->sform_code > 0)
+            referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk;
+        else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk;
+
+        DataType voxel[3];
+
+        for (z = 0; z < deformationField->nz; z++) {
+            index = z * deformationField->nx * deformationField->ny;
+            for (y = 0; y < deformationField->ny; y++) {
+                for (x = 0; x < deformationField->nx; x++) {
+                    if (mask[index] > -1) {
+                        // The previous position at the current pixel position is read
+                        real[0] = fieldPtrX[index];
+                        real[1] = fieldPtrY[index];
+                        real[2] = fieldPtrZ[index];
+
+                        // From real to pixel position in the control point space
+                        voxel[0] =
+                            referenceMatrix_real_to_voxel.m[0][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[0][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[0][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[0][3];
+                        voxel[1] =
+                            referenceMatrix_real_to_voxel.m[1][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[1][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[1][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[1][3];
+                        voxel[2] =
+                            referenceMatrix_real_to_voxel.m[2][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[2][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[2][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[2][3];
+
+                        // The spline coefficients are computed
+                        xPre = (int)reg_floor(voxel[0]);
+                        xBasis[1] = voxel[0] - static_cast<DataType>(xPre);
+                        if (xBasis[1] < 0) xBasis[1] = 0; //rounding error
+                        xBasis[0] = 1.f - xBasis[1];
+
+                        yPre = (int)reg_floor(voxel[1]);
+                        yBasis[1] = voxel[1] - static_cast<DataType>(yPre);
+                        if (yBasis[1] < 0) yBasis[1] = 0; //rounding error
+                        yBasis[0] = 1.f - yBasis[1];
+
+                        zPre = (int)reg_floor(voxel[2]);
+                        zBasis[1] = voxel[2] - static_cast<DataType>(zPre);
+                        if (zBasis[1] < 0) zBasis[1] = 0; //rounding error
+                        zBasis[0] = 1.f - zBasis[1];
+
+                        real[0] = 0;
+                        real[1] = 0;
+                        real[2] = 0;
+                        for (c = 0; c < 2; c++) {
+                            for (b = 0; b < 2; b++) {
+                                for (a = 0; a < 2; a++) {
+                                    DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                                    coord = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
+                                    real[0] += controlPointPtrX[coord] * tempValue;
+                                    real[1] += controlPointPtrY[coord] * tempValue;
+                                    real[2] += controlPointPtrZ[coord] * tempValue;
+                                }
+                            }
                         }
-                     }
-                  }
-                  fieldPtrX[index] = real[0];
-                  fieldPtrY[index] = real[1];
-                  fieldPtrZ[index] = real[2];
-               } // mask
-               index++;
+                        fieldPtrX[index] = real[0];
+                        fieldPtrY[index] = real[1];
+                        fieldPtrZ[index] = real[2];
+                    } // mask
+                    index++;
+                }
             }
-         }
-      }
-   }//Composition of deformation
-   else  // !composition
-   {
-      DataType gridVoxelSpacing[3];
-      gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
-      gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
-      gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
-      DataType tempValue;
+        }
+    } else {  // !composition
+        DataType gridVoxelSpacing[3];
+        gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
+        gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
+        gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
+        DataType tempValue;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   private(x, y, z, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \
+   private(x, y, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \
    shared(deformationField, gridVoxelSpacing, mask, fieldPtrX, fieldPtrY, fieldPtrZ, \
    controlPointPtrX, controlPointPtrY, controlPointPtrZ, splineControlPoint)
 #endif // _OPENMP
-      for(z=0; z<deformationField->nz; z++)
-      {
-         index=z*deformationField->nx*deformationField->ny;
-
-         zPre=static_cast<int>(static_cast<DataType>(z)/gridVoxelSpacing[2]);
-         zBasis[1]=static_cast<DataType>(z)/gridVoxelSpacing[2]-static_cast<DataType>(zPre);
-         if(zBasis[1]<0) zBasis[1]=0; //rounding error
-         zBasis[0]=1.-zBasis[1];
-         zPre++;
-
-         for(y=0; y<deformationField->ny; y++)
-         {
-
-            yPre=static_cast<int>(static_cast<DataType>(y)/gridVoxelSpacing[1]);
-            yBasis[1]=static_cast<DataType>(y)/gridVoxelSpacing[1]-static_cast<DataType>(yPre);
-            if(yBasis[1]<0) yBasis[1]=0; //rounding error
-            yBasis[0]=1.-yBasis[1];
-            yPre++;
-
-            for(x=0; x<deformationField->nx; x++)
-            {
-               real[0]=0;
-               real[1]=0;
-               real[2]=0;
-
-               if(mask[index]>-1)
-               {
-                  xPre=static_cast<int>(static_cast<DataType>(x)/gridVoxelSpacing[0]);
-                  xBasis[1]=static_cast<DataType>(x)/gridVoxelSpacing[0]-static_cast<DataType>(xPre);
-                  if(xBasis[1]<0) xBasis[1]=0; //rounding error
-                  xBasis[0]=1.-xBasis[1];
-                  xPre++;
-                  real[0]=0;
-                  real[1]=0;
-                  real[2]=0;
-                  for(c=0; c<2; c++){
-                     for(b=0; b<2; b++){
-                        for(a=0; a<2; a++){
-                           tempValue = xBasis[a] * yBasis[b] * zBasis[c];
-                           coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a;
-                           real[0] += controlPointPtrX[coord] * tempValue;
-                           real[1] += controlPointPtrY[coord] * tempValue;
-                           real[2] += controlPointPtrZ[coord] * tempValue;
+        for (z = 0; z < deformationField->nz; z++) {
+            index = z * deformationField->nx * deformationField->ny;
+
+            zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
+            zBasis[1] = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
+            if (zBasis[1] < 0) zBasis[1] = 0; //rounding error
+            zBasis[0] = 1.f - zBasis[1];
+            zPre++;
+
+            for (y = 0; y < deformationField->ny; y++) {
+                yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+                yBasis[1] = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
+                if (yBasis[1] < 0) yBasis[1] = 0; //rounding error
+                yBasis[0] = 1.f - yBasis[1];
+                yPre++;
+
+                for (x = 0; x < deformationField->nx; x++) {
+                    real[0] = 0;
+                    real[1] = 0;
+                    real[2] = 0;
+
+                    if (mask[index] > -1) {
+                        xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+                        xBasis[1] = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
+                        if (xBasis[1] < 0) xBasis[1] = 0; //rounding error
+                        xBasis[0] = 1.f - xBasis[1];
+                        xPre++;
+                        real[0] = 0;
+                        real[1] = 0;
+                        real[2] = 0;
+                        for (c = 0; c < 2; c++) {
+                            for (b = 0; b < 2; b++) {
+                                for (a = 0; a < 2; a++) {
+                                    tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                                    coord = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
+                                    real[0] += controlPointPtrX[coord] * tempValue;
+                                    real[1] += controlPointPtrY[coord] * tempValue;
+                                    real[2] += controlPointPtrZ[coord] * tempValue;
+                                }
+                            }
                         }
-                     }
-                  }
-               }// mask
-               fieldPtrX[index] = real[0];
-               fieldPtrY[index] = real[1];
-               fieldPtrZ[index] = real[2];
-               index++;
-            } // x
-         } // y
-      } // z
-   }// from a deformation field
-
-   return;
+                    }// mask
+                    fieldPtrX[index] = real[0];
+                    fieldPtrY[index] = real[1];
+                    fieldPtrZ[index] = real[2];
+                    index++;
+                } // x
+            } // y
+        } // z
+    }// from a deformation field
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
 void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
-                                      nifti_image *deformationField,
-                                      int *mask,
-                                      bool composition,
-                                      bool bspline)
-{
-
+                                            nifti_image *deformationField,
+                                            int *mask,
+                                            bool composition,
+                                            bool bspline) {
 #if _USE_SSE
-   union
-   {
-      __m128 m;
-      float f[4];
-   } val;
-   __m128 tempCurrent, tempX, tempY;
+    union {
+        __m128 m;
+        float f[4];
+    } val;
+    __m128 tempCurrent, tempX, tempY;
 #ifdef _WIN32
-   __declspec(align(16)) DataType temp[4];
-   __declspec(align(16)) DataType yBasis[4];
-   union
-   {
-      __m128 m[16];
-      __declspec(align(16)) DataType f[16];
-   } xControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      __declspec(align(16)) DataType f[16];
-   } yControlPointCoordinates;
-   union u1
-   {
-      __m128 m[4];
-      __declspec(align(16)) DataType f[16];
-   } xyBasis;
+    __declspec(align(16)) DataType temp[4];
+    __declspec(align(16)) DataType yBasis[4];
+    union {
+        __m128 m[16];
+        __declspec(align(16)) DataType f[16];
+    } xControlPointCoordinates;
+    union {
+        __m128 m[16];
+        __declspec(align(16)) DataType f[16];
+    } yControlPointCoordinates;
+    union u1 {
+        __m128 m[4];
+        __declspec(align(16)) DataType f[16];
+    } xyBasis;
 #else // _WIN32
-   DataType temp[4] __attribute__((aligned(16)));
-   DataType yBasis[4] __attribute__((aligned(16)));
-   union
-   {
-      __m128 m[16];
-      DataType f[16] __attribute__((aligned(16)));
-   } xControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      DataType f[16] __attribute__((aligned(16)));
-   } yControlPointCoordinates;
-   union u1
-   {
-      __m128 m[4];
-      DataType f[16] __attribute__((aligned(16)));
-   } xyBasis;
+    DataType temp[4] __attribute__((aligned(16)));
+    DataType yBasis[4] __attribute__((aligned(16)));
+    union {
+        __m128 m[16];
+        DataType f[16] __attribute__((aligned(16)));
+    } xControlPointCoordinates;
+    union {
+        __m128 m[16];
+        DataType f[16] __attribute__((aligned(16)));
+    } yControlPointCoordinates;
+    union u1 {
+        __m128 m[4];
+        DataType f[16] __attribute__((aligned(16)));
+    } xyBasis;
 #endif // _WIN32
 #else // _USE_SSE
-   DataType temp[4];
-   DataType yBasis[4];
-   DataType xyBasis[16];
-   DataType xControlPointCoordinates[16];
-   DataType yControlPointCoordinates[16];
+    DataType temp[4];
+    DataType yBasis[4];
+    DataType xyBasis[16];
+    DataType xControlPointCoordinates[16];
+    DataType yControlPointCoordinates[16];
 #endif // _USE_SSE
 
-
-   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
-   DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
-
-   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
-   DataType *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)];
-
-   DataType gridVoxelSpacing[2];
-   gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
-   gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
-
-   DataType basis, xReal, yReal, xVoxel, yVoxel;
-   int x, y, a, b, xPre, yPre, oldXpre, oldYpre;
-   size_t index, coord;
-
-   if(composition)  // Composition of deformation fields
-   {
-
-      // read the ijk sform or qform, as appropriate
-      mat44 *referenceMatrix_real_to_voxel;
-      if(splineControlPoint->sform_code>0)
-         referenceMatrix_real_to_voxel=&(splineControlPoint->sto_ijk);
-      else referenceMatrix_real_to_voxel=&(splineControlPoint->qto_ijk);
-
-      for(y=0; y<deformationField->ny; y++)
-      {
-         index=y*deformationField->nx;
-         oldXpre=oldYpre=99999999;
-         for(x=0; x<deformationField->nx; x++)
-         {
-
-            // The previous position at the current pixel position is read
-            xReal = (DataType)(fieldPtrX[index]);
-            yReal = (DataType)(fieldPtrY[index]);
-
-            // From real to pixel position in the CPP
-            xVoxel = referenceMatrix_real_to_voxel->m[0][0]*xReal
-                  + referenceMatrix_real_to_voxel->m[0][1]*yReal
-                  + referenceMatrix_real_to_voxel->m[0][3];
-            yVoxel = referenceMatrix_real_to_voxel->m[1][0]*xReal
-                  + referenceMatrix_real_to_voxel->m[1][1]*yReal
-                  + referenceMatrix_real_to_voxel->m[1][3];
-
-            // The spline coefficients are computed
-            xPre=(int)reg_floor(xVoxel);
-            basis=xVoxel-(DataType)xPre;
-            --xPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-            else get_SplineBasisValues<DataType>(basis, temp);
-
-            yPre=(int)reg_floor(yVoxel);
-            basis=yVoxel-(DataType)yPre;
-            --yPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-            else get_SplineBasisValues<DataType>(basis, yBasis);
-
-
-            if(xVoxel>=0 && xVoxel<=deformationField->nx-1 &&
-                  yVoxel>=0 && yVoxel<=deformationField->ny-1)
-            {
-
-               // The control point positions are extracted
-               if(oldXpre!=xPre || oldYpre!=yPre)
-               {
+    DataType *controlPointPtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)];
+
+    DataType *fieldPtrX = static_cast<DataType*>(deformationField->data);
+    DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)];
+
+    DataType gridVoxelSpacing[2];
+    gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
+    gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
+
+    DataType basis, xReal, yReal, xVoxel, yVoxel;
+    int x, y, a, b, xPre, yPre, oldXpre, oldYpre;
+    size_t index, coord;
+
+    if (composition) { // Composition of deformation fields
+        // read the ijk sform or qform, as appropriate
+        const mat44 *referenceMatrix_real_to_voxel;
+        if (splineControlPoint->sform_code > 0)
+            referenceMatrix_real_to_voxel = &splineControlPoint->sto_ijk;
+        else referenceMatrix_real_to_voxel = &splineControlPoint->qto_ijk;
+
+        for (y = 0; y < deformationField->ny; y++) {
+            index = y * deformationField->nx;
+            oldXpre = oldYpre = 99999999;
+            for (x = 0; x < deformationField->nx; x++) {
+
+                // The previous position at the current pixel position is read
+                xReal = static_cast<DataType>(fieldPtrX[index]);
+                yReal = static_cast<DataType>(fieldPtrY[index]);
+
+                // From real to pixel position in the CPP
+                xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal
+                    + referenceMatrix_real_to_voxel->m[0][1] * yReal
+                    + referenceMatrix_real_to_voxel->m[0][3];
+                yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal
+                    + referenceMatrix_real_to_voxel->m[1][1] * yReal
+                    + referenceMatrix_real_to_voxel->m[1][3];
+
+                // The spline coefficients are computed
+                xPre = (int)reg_floor(xVoxel);
+                basis = xVoxel - static_cast<DataType>(xPre--);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                else get_SplineBasisValues<DataType>(basis, temp);
+
+                yPre = (int)reg_floor(yVoxel);
+                basis = yVoxel - static_cast<DataType>(yPre--);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+                else get_SplineBasisValues<DataType>(basis, yBasis);
+
+                if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 &&
+                    yVoxel >= 0 && yVoxel <= deformationField->ny - 1) {
+                    // The control point positions are extracted
+                    if (oldXpre != xPre || oldYpre != yPre) {
 #ifdef _USE_SSE
-                  get_GridValues<DataType>(xPre,
-                                        yPre,
-                                        splineControlPoint,
-                                        controlPointPtrX,
-                                        controlPointPtrY,
-                                        xControlPointCoordinates.f,
-                                        yControlPointCoordinates.f,
-                                        false, // no approximation
-                                        false // not a displacement field
-                                        );
+                        get_GridValues<DataType>(xPre,
+                                                 yPre,
+                                                 splineControlPoint,
+                                                 controlPointPtrX,
+                                                 controlPointPtrY,
+                                                 xControlPointCoordinates.f,
+                                                 yControlPointCoordinates.f,
+                                                 false,  // no approximation
+                                                 false); // not a displacement field
 #else // _USE_SSE
-                  get_GridValues<DataType>(xPre,
-                                        yPre,
-                                        splineControlPoint,
-                                        controlPointPtrX,
-                                        controlPointPtrY,
-                                        xControlPointCoordinates,
-                                        yControlPointCoordinates,
-                                        false, // no approximation
-                                        false // not a displacement field
-                                        );
+                        get_GridValues<DataType>(xPre,
+                                                 yPre,
+                                                 splineControlPoint,
+                                                 controlPointPtrX,
+                                                 controlPointPtrY,
+                                                 xControlPointCoordinates,
+                                                 yControlPointCoordinates,
+                                                 false,  // no approximation
+                                                 false); // not a displacement field
 #endif // _USE_SSE
-                  oldXpre=xPre;
-                  oldYpre=yPre;
-               }
-               xReal=0;
-               yReal=0;
-
-               if(mask[index]>-1)
-               {
+                        oldXpre = xPre;
+                        oldYpre = yPre;
+                    }
+                    xReal = 0;
+                    yReal = 0;
+
+                    if (mask[index] > -1) {
 #if _USE_SSE
-                  coord=0;
-                  for(b=0; b<4; b++)
-                  {
-                     for(a=0; a<4; a++)
-                     {
-                        xyBasis.f[coord++] = temp[a] * yBasis[b];
-                     }
-                  }
-
-                  tempX =  _mm_set_ps1(0);
-                  tempY =  _mm_set_ps1(0);
-                  //addition and multiplication of the 16 basis value and CP position for each axis
-                  for(a=0; a<4; a++)
-                  {
-                     tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX );
-                     tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY );
-                  }
-                  //the values stored in SSE variables are transferred to normal float
-                  val.m = tempX;
-                  xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                  val.m = tempY;
-                  yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
+                        coord = 0;
+                        for (b = 0; b < 4; b++) {
+                            for (a = 0; a < 4; a++) {
+                                xyBasis.f[coord++] = temp[a] * yBasis[b];
+                            }
+                        }
+
+                        tempX = _mm_set_ps1(0);
+                        tempY = _mm_set_ps1(0);
+                        //addition and multiplication of the 16 basis value and CP position for each axis
+                        for (a = 0; a < 4; a++) {
+                            tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX);
+                            tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY);
+                        }
+                        //the values stored in SSE variables are transferred to normal float
+                        val.m = tempX;
+                        xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                        val.m = tempY;
+                        yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #else
-                  for(b=0; b<4; b++)
-                  {
-                     for(a=0; a<4; a++)
-                     {
-                        DataType tempValue = temp[a] * yBasis[b];
-                        xReal += xControlPointCoordinates[b*4+a] * tempValue;
-                        yReal += yControlPointCoordinates[b*4+a] * tempValue;
-                     }
-                  }
+                        for (b = 0; b < 4; b++) {
+                            for (a = 0; a < 4; a++) {
+                                DataType tempValue = temp[a] * yBasis[b];
+                                xReal += xControlPointCoordinates[b * 4 + a] * tempValue;
+                                yReal += yControlPointCoordinates[b * 4 + a] * tempValue;
+                            }
+                        }
 #endif
-               }
+                    }
 
-               fieldPtrX[index] = (DataType)xReal;
-               fieldPtrY[index] = (DataType)yReal;
+                    fieldPtrX[index] = (DataType)xReal;
+                    fieldPtrY[index] = (DataType)yReal;
+                }
+                index++;
             }
-            index++;
-         }
-      }
-   }
-   else  // starting deformation field is blank - !composition
-   {
-
+        }
+    } else { // starting deformation field is blank - !composition
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
-   private(x, y, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \
+   private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \
    val, temp, yBasis, tempCurrent, xyBasis, tempX, tempY, \
    xControlPointCoordinates, yControlPointCoordinates)
 #else // _USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
-   private(x, y, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \
+   private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \
    temp, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates)
 #endif // _USE_SEE
 #endif // _OPENMP
-      for( y=0; y<deformationField->ny; y++)
-      {
-         index=y*deformationField->nx;
-         oldXpre=oldYpre=9999999;
-
-         yPre=(int)((DataType)y/gridVoxelSpacing[1]);
-         basis=(DataType)y/gridVoxelSpacing[1]-(DataType)yPre;
-         if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-         else get_SplineBasisValues<DataType>(basis, yBasis);
-
-         for(x=0; x<deformationField->nx; x++)
-         {
-
-            xPre=(int)((DataType)x/gridVoxelSpacing[0]);
-            basis=(DataType)x/gridVoxelSpacing[0]-(DataType)xPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-            else get_SplineBasisValues<DataType>(basis, temp);
+        for (y = 0; y < deformationField->ny; y++) {
+            index = y * deformationField->nx;
+            oldXpre = oldYpre = 9999999;
+
+            yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+            basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
+            if (basis < 0) basis = 0; //rounding error
+            if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+            else get_SplineBasisValues<DataType>(basis, yBasis);
+
+            for (x = 0; x < deformationField->nx; x++) {
+                xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+                basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
-            val.f[0] = temp[0];
-            val.f[1] = temp[1];
-            val.f[2] = temp[2];
-            val.f[3] = temp[3];
-            tempCurrent=val.m;
-            for(a=0; a<4; a++)
-            {
-               val.m=_mm_set_ps1(yBasis[a]);
-               xyBasis.m[a]=_mm_mul_ps(tempCurrent,val.m);
-            }
+                val.f[0] = static_cast<float>(temp[0]);
+                val.f[1] = static_cast<float>(temp[1]);
+                val.f[2] = static_cast<float>(temp[2]);
+                val.f[3] = static_cast<float>(temp[3]);
+                tempCurrent = val.m;
+                for (a = 0; a < 4; a++) {
+                    val.m = _mm_set_ps1(static_cast<float>(yBasis[a]));
+                    xyBasis.m[a] = _mm_mul_ps(tempCurrent, val.m);
+                }
 #else
-            coord=0;
-            for(a=0; a<4; a++)
-            {
-               xyBasis[coord++]=temp[0]*yBasis[a];
-               xyBasis[coord++]=temp[1]*yBasis[a];
-               xyBasis[coord++]=temp[2]*yBasis[a];
-               xyBasis[coord++]=temp[3]*yBasis[a];
-            }
+                coord = 0;
+                for (a = 0; a < 4; a++) {
+                    xyBasis[coord++] = temp[0] * yBasis[a];
+                    xyBasis[coord++] = temp[1] * yBasis[a];
+                    xyBasis[coord++] = temp[2] * yBasis[a];
+                    xyBasis[coord++] = temp[3] * yBasis[a];
+                }
 #endif
-            if(oldXpre!=xPre || oldYpre!=yPre)
-            {
+                if (oldXpre != xPre || oldYpre != yPre) {
 #ifdef _USE_SSE
-               get_GridValues<DataType>(xPre,
-                                     yPre,
-                                     splineControlPoint,
-                                     controlPointPtrX,
-                                     controlPointPtrY,
-                                     xControlPointCoordinates.f,
-                                     yControlPointCoordinates.f,
-                                     false, // no approximation
-                                     false // not a deformation field
-                                     );
+                    get_GridValues<DataType>(xPre,
+                                             yPre,
+                                             splineControlPoint,
+                                             controlPointPtrX,
+                                             controlPointPtrY,
+                                             xControlPointCoordinates.f,
+                                             yControlPointCoordinates.f,
+                                             false,  // no approximation
+                                             false); // not a deformation field
 #else // _USE_SSE
-               get_GridValues<DataType>(xPre,
-                                     yPre,
-                                     splineControlPoint,
-                                     controlPointPtrX,
-                                     controlPointPtrY,
-                                     xControlPointCoordinates,
-                                     yControlPointCoordinates,
-                                     false, // no approximation
-                                     false // not a deformation field
-                                     );
+                    get_GridValues<DataType>(xPre,
+                                             yPre,
+                                             splineControlPoint,
+                                             controlPointPtrX,
+                                             controlPointPtrY,
+                                             xControlPointCoordinates,
+                                             yControlPointCoordinates,
+                                             false,  // no approximation
+                                             false); // not a deformation field
 #endif // _USE_SSE
-               oldXpre=xPre;
-               oldYpre=yPre;
-            }
+                    oldXpre = xPre;
+                    oldYpre = yPre;
+                }
 
-            xReal=0;
-            yReal=0;
+                xReal = 0;
+                yReal = 0;
 
-            if(mask[index]>-1)
-            {
+                if (mask[index] > -1) {
 #if _USE_SSE
-               tempX =  _mm_set_ps1(0);
-               tempY =  _mm_set_ps1(0);
-               //addition and multiplication of the 64 basis value and CP displacement for each axis
-               for(a=0; a<4; a++)
-               {
-                  tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX );
-                  tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY );
-               }
-               //the values stored in SSE variables are transferred to normal float
-               val.m=tempX;
-               xReal=val.f[0]+val.f[1]+val.f[2]+val.f[3];
-               val.m=tempY;
-               yReal= val.f[0]+val.f[1]+val.f[2]+val.f[3];
+                    tempX = _mm_set_ps1(0);
+                    tempY = _mm_set_ps1(0);
+                    //addition and multiplication of the 64 basis value and CP displacement for each axis
+                    for (a = 0; a < 4; a++) {
+                        tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX);
+                        tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY);
+                    }
+                    //the values stored in SSE variables are transferred to normal float
+                    val.m = tempX;
+                    xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                    val.m = tempY;
+                    yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #else
-               for(a=0; a<16; a++)
-               {
-                  xReal += xControlPointCoordinates[a] * xyBasis[a];
-                  yReal += yControlPointCoordinates[a] * xyBasis[a];
-               }
+                    for (a = 0; a < 16; a++) {
+                        xReal += xControlPointCoordinates[a] * xyBasis[a];
+                        yReal += yControlPointCoordinates[a] * xyBasis[a];
+                    }
 #endif
-            }// mask
-            fieldPtrX[index] = (DataType)xReal;
-            fieldPtrY[index] = (DataType)yReal;
-            index++;
-         } // x
-      } // y
-   } // composition
-
-   return;
+                }// mask
+                fieldPtrX[index] = (DataType)xReal;
+                fieldPtrY[index] = (DataType)yReal;
+                index++;
+            } // x
+        } // y
+    } // composition
 }
 /* *************************************************************** */
 template<class DataType>
@@ -904,103 +813,94 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                             int *mask,
                                             bool composition,
                                             bool bspline,
-                                            bool force_no_lut=false)
-{
+                                            bool force_no_lut = false) {
 #if _USE_SSE
-   union
-   {
-      __m128 m;
-      float f[4];
-   } val;
-   __m128 tempX, tempY, tempZ, tempCurrent;
-   __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse;
+    union {
+        __m128 m;
+        float f[4];
+    } val;
+    __m128 tempX, tempY, tempZ, tempCurrent;
+    __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse;
 
 #ifdef _WIN32
-   __declspec(align(16)) DataType temp[4];
-   __declspec(align(16)) DataType zBasis[4];
-   union
-   {
-      __m128 m[16];
-      __declspec(align(16)) DataType f[16];
-   } xControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      __declspec(align(16)) DataType f[16];
-   } yControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      __declspec(align(16)) DataType f[16];
-   } zControlPointCoordinates;
+    __declspec(align(16)) DataType temp[4];
+    __declspec(align(16)) DataType zBasis[4];
+    union {
+        __m128 m[16];
+        __declspec(align(16)) DataType f[16];
+    } xControlPointCoordinates;
+    union {
+        __m128 m[16];
+        __declspec(align(16)) DataType f[16];
+    } yControlPointCoordinates;
+    union {
+        __m128 m[16];
+        __declspec(align(16)) DataType f[16];
+    } zControlPointCoordinates;
 #else // _WIN32
-   DataType temp[4] __attribute__((aligned(16)));
-   DataType zBasis[4] __attribute__((aligned(16)));
-   union
-   {
-      __m128 m[16];
-      DataType f[16] __attribute__((aligned(16)));
-   } xControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      DataType f[16] __attribute__((aligned(16)));
-   } yControlPointCoordinates;
-   union
-   {
-      __m128 m[16];
-      DataType f[16] __attribute__((aligned(16)));
-   } zControlPointCoordinates;
+    DataType temp[4] __attribute__((aligned(16)));
+    DataType zBasis[4] __attribute__((aligned(16)));
+    union {
+        __m128 m[16];
+        DataType f[16] __attribute__((aligned(16)));
+    } xControlPointCoordinates;
+    union {
+        __m128 m[16];
+        DataType f[16] __attribute__((aligned(16)));
+    } yControlPointCoordinates;
+    union {
+        __m128 m[16];
+        DataType f[16] __attribute__((aligned(16)));
+    } zControlPointCoordinates;
 #endif // _WIN32
 #else // _USE_SSE
-   DataType temp[4];
-   DataType zBasis[4];
-   DataType xControlPointCoordinates[64];
-   DataType yControlPointCoordinates[64];
-   DataType zControlPointCoordinates[64];
-   int coord;
+    DataType temp[4];
+    DataType zBasis[4];
+    DataType xControlPointCoordinates[64];
+    DataType yControlPointCoordinates[64];
+    DataType zControlPointCoordinates[64];
+    int coord;
 #endif // _USE_SSE
 
-   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
-   DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
-   DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
-   DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
+    const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
+    DataType *controlPointPtrX = static_cast<DataType*>(splineControlPoint->data);
+    DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber];
+    DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber];
 
-   const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField);
-   DataType *fieldPtrX=static_cast<DataType *>(deformationField->data);
-   DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber];
-   DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber];
+    const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    DataType *fieldPtrX = static_cast<DataType*>(deformationField->data);
+    DataType *fieldPtrY = &fieldPtrX[deformationFieldVoxelNumber];
+    DataType *fieldPtrZ = &fieldPtrY[deformationFieldVoxelNumber];
 
-   DataType basis, oldBasis=(DataType)(1.1);
+    DataType basis, oldBasis = 1.1f;
 
-   int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index;
-   DataType real[3];
+    int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index;
+    DataType real[3];
 
-   if(composition)  // Composition of deformation fields
-   {
-      // read the ijk sform or qform, as appropriate
-      mat44 referenceMatrix_real_to_voxel;
-      if(splineControlPoint->sform_code>0)
-         referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk);
-      else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk);
+    if (composition) {  // Composition of deformation fields
+        // read the ijk sform or qform, as appropriate
+        mat44 referenceMatrix_real_to_voxel;
+        if (splineControlPoint->sform_code > 0)
+            referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk;
+        else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk;
 #ifdef _USE_SSE
 #ifdef _WIN32
-      __declspec(align(16)) DataType xBasis[4];
-      __declspec(align(16)) DataType yBasis[4];
+        __declspec(align(16)) DataType xBasis[4];
+        __declspec(align(16)) DataType yBasis[4];
 #else
-      DataType xBasis[4] __attribute__((aligned(16)));
-      DataType yBasis[4] __attribute__((aligned(16)));
+        DataType xBasis[4] __attribute__((aligned(16)));
+        DataType yBasis[4] __attribute__((aligned(16)));
 #endif
 #else // _USE_SSE
-      DataType xBasis[4], yBasis[4];
+        DataType xBasis[4], yBasis[4];
 #endif // _USE_SSE
 
-      DataType voxel[3];
+        DataType voxel[3];
 
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
-   private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
+   private(x, y, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
    index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \
    yControlPointCoordinates, zControlPointCoordinates,  \
    tempX, tempY, tempZ, xBasis_sse, yBasis_sse, zBasis_sse, \
@@ -1010,7 +910,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    splineControlPoint, mask)
 #else
 #pragma omp parallel for default(none) \
-   private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
+   private(x, y, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
    index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \
    yControlPointCoordinates, zControlPointCoordinates, coord) \
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, referenceMatrix_real_to_voxel, \
@@ -1018,419 +918,374 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    splineControlPoint, mask)
 #endif // _USE_SSE
 #endif // _OPENMP
-      for(z=0; z<deformationField->nz; z++)
-      {
-
-         index=z*deformationField->nx*deformationField->ny;
-         oldPreX=-99;
-         oldPreY=-99;
-         oldPreZ=-99;
-         for(y=0; y<deformationField->ny; y++)
-         {
-            for(x=0; x<deformationField->nx; x++)
-            {
-
-               if(mask[index]>-1)
-               {
-                  // The previous position at the current pixel position is read
-                  real[0] = fieldPtrX[index];
-                  real[1] = fieldPtrY[index];
-                  real[2] = fieldPtrZ[index];
-
-                  // From real to pixel position in the control point space
-                  voxel[0] =
-                        referenceMatrix_real_to_voxel.m[0][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[0][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[0][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[0][3] ;
-                  voxel[1] =
-                        referenceMatrix_real_to_voxel.m[1][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[1][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[1][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[1][3] ;
-                  voxel[2] =
-                        referenceMatrix_real_to_voxel.m[2][0] * real[0] +
-                        referenceMatrix_real_to_voxel.m[2][1] * real[1] +
-                        referenceMatrix_real_to_voxel.m[2][2] * real[2] +
-                        referenceMatrix_real_to_voxel.m[2][3] ;
-                  //                        reg_mat44_mul(referenceMatrix_real_to_voxel, real, voxel);
-
-                  // The spline coefficients are computed
-                  xPre=(int)reg_floor(voxel[0]);
-                  basis=voxel[0]-static_cast<DataType>(xPre);
-                  --xPre;
-                  if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
-                  else get_SplineBasisValues<DataType>(basis, xBasis);
-
-                  yPre=(int)reg_floor(voxel[1]);
-                  basis=voxel[1]-static_cast<DataType>(yPre);
-                  --yPre;
-                  if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-                  else get_SplineBasisValues<DataType>(basis, yBasis);
-
-                  zPre=(int)reg_floor(voxel[2]);
-                  basis=voxel[2]-static_cast<DataType>(zPre);
-                  --zPre;
-                  if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
-                  else get_SplineBasisValues<DataType>(basis, zBasis);
-
-                  // The control point postions are extracted
-                  if(xPre!=oldPreX || yPre!=oldPreY || zPre!=oldPreZ)
-                  {
+        for (z = 0; z < deformationField->nz; z++) {
+            index = z * deformationField->nx * deformationField->ny;
+            oldPreX = -99;
+            oldPreY = -99;
+            oldPreZ = -99;
+            for (y = 0; y < deformationField->ny; y++) {
+                for (x = 0; x < deformationField->nx; x++) {
+                    if (mask[index] > -1) {
+                        // The previous position at the current pixel position is read
+                        real[0] = fieldPtrX[index];
+                        real[1] = fieldPtrY[index];
+                        real[2] = fieldPtrZ[index];
+
+                        // From real to pixel position in the control point space
+                        voxel[0] =
+                            referenceMatrix_real_to_voxel.m[0][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[0][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[0][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[0][3];
+                        voxel[1] =
+                            referenceMatrix_real_to_voxel.m[1][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[1][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[1][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[1][3];
+                        voxel[2] =
+                            referenceMatrix_real_to_voxel.m[2][0] * real[0] +
+                            referenceMatrix_real_to_voxel.m[2][1] * real[1] +
+                            referenceMatrix_real_to_voxel.m[2][2] * real[2] +
+                            referenceMatrix_real_to_voxel.m[2][3];
+
+                        // The spline coefficients are computed
+                        xPre = (int)reg_floor(voxel[0]);
+                        basis = voxel[0] - static_cast<DataType>(xPre--);
+                        if (basis < 0) basis = 0; //rounding error
+                        if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                        else get_SplineBasisValues<DataType>(basis, xBasis);
+
+                        yPre = (int)reg_floor(voxel[1]);
+                        basis = voxel[1] - static_cast<DataType>(yPre--);
+                        if (basis < 0) basis = 0; //rounding error
+                        if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+                        else get_SplineBasisValues<DataType>(basis, yBasis);
+
+                        zPre = (int)reg_floor(voxel[2]);
+                        basis = voxel[2] - static_cast<DataType>(zPre--);
+                        if (basis < 0) basis = 0; //rounding error
+                        if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+                        else get_SplineBasisValues<DataType>(basis, zBasis);
+
+                        // The control point positions are extracted
+                        if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) {
 #ifdef _USE_SSE
-                     get_GridValues<DataType>(xPre,
-                                           yPre,
-                                           zPre,
-                                           splineControlPoint,
-                                           controlPointPtrX,
-                                           controlPointPtrY,
-                                           controlPointPtrZ,
-                                           xControlPointCoordinates.f,
-                                           yControlPointCoordinates.f,
-                                           zControlPointCoordinates.f,
-                                           false, // no approximation
-                                           false // not a deformation field
-                                           );
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     zPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     controlPointPtrZ,
+                                                     xControlPointCoordinates.f,
+                                                     yControlPointCoordinates.f,
+                                                     zControlPointCoordinates.f,
+                                                     false,  // no approximation
+                                                     false); // not a deformation field
 #else // _USE_SSE
-                     get_GridValues<DataType>(xPre,
-                                           yPre,
-                                           zPre,
-                                           splineControlPoint,
-                                           controlPointPtrX,
-                                           controlPointPtrY,
-                                           controlPointPtrZ,
-                                           xControlPointCoordinates,
-                                           yControlPointCoordinates,
-                                           zControlPointCoordinates,
-                                           false, // no approximation
-                                           false // not a deformation field
-                                           );
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     zPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     controlPointPtrZ,
+                                                     xControlPointCoordinates,
+                                                     yControlPointCoordinates,
+                                                     zControlPointCoordinates,
+                                                     false,  // no approximation
+                                                     false); // not a deformation field
 #endif // _USE_SSE
-                     oldPreX=xPre;
-                     oldPreY=yPre;
-                     oldPreZ=zPre;
-                  }
+                            oldPreX = xPre;
+                            oldPreY = yPre;
+                            oldPreZ = zPre;
+                        }
 
 #if _USE_SSE
-                  tempX =  _mm_set_ps1(0);
-                  tempY =  _mm_set_ps1(0);
-                  tempZ =  _mm_set_ps1(0);
-                  val.f[0] = xBasis[0];
-                  val.f[1] = xBasis[1];
-                  val.f[2] = xBasis[2];
-                  val.f[3] = xBasis[3];
-                  xBasis_sse = val.m;
-
-                  //addition and multiplication of the 16 basis value and CP position for each axis
-                  for(c=0; c<4; c++)
-                  {
-                     for(b=0; b<4; b++)
-                     {
-                        yBasis_sse  = _mm_set_ps1(yBasis[b]);
-                        zBasis_sse  = _mm_set_ps1(zBasis[c]);
-                        temp_basis_sse = _mm_mul_ps(yBasis_sse, zBasis_sse);
-                        basis_sse = _mm_mul_ps(temp_basis_sse, xBasis_sse);
-
-                        tempX = _mm_add_ps(_mm_mul_ps(basis_sse, xControlPointCoordinates.m[c*4+b]), tempX );
-                        tempY = _mm_add_ps(_mm_mul_ps(basis_sse, yControlPointCoordinates.m[c*4+b]), tempY );
-                        tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c*4+b]), tempZ );
-                     }
-                  }
-                  //the values stored in SSE variables are transferred to normal float
-                  val.m = tempX;
-                  real[0] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                  val.m = tempY;
-                  real[1] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                  val.m = tempZ;
-                  real[2] = val.f[0]+val.f[1]+val.f[2]+val.f[3];
+                        tempX = _mm_set_ps1(0);
+                        tempY = _mm_set_ps1(0);
+                        tempZ = _mm_set_ps1(0);
+                        val.f[0] = static_cast<float>(xBasis[0]);
+                        val.f[1] = static_cast<float>(xBasis[1]);
+                        val.f[2] = static_cast<float>(xBasis[2]);
+                        val.f[3] = static_cast<float>(xBasis[3]);
+                        xBasis_sse = val.m;
+
+                        //addition and multiplication of the 16 basis value and CP position for each axis
+                        for (c = 0; c < 4; c++) {
+                            for (b = 0; b < 4; b++) {
+                                yBasis_sse = _mm_set_ps1(static_cast<float>(yBasis[b]));
+                                zBasis_sse = _mm_set_ps1(static_cast<float>(zBasis[c]));
+                                temp_basis_sse = _mm_mul_ps(yBasis_sse, zBasis_sse);
+                                basis_sse = _mm_mul_ps(temp_basis_sse, xBasis_sse);
+
+                                tempX = _mm_add_ps(_mm_mul_ps(basis_sse, xControlPointCoordinates.m[c * 4 + b]), tempX);
+                                tempY = _mm_add_ps(_mm_mul_ps(basis_sse, yControlPointCoordinates.m[c * 4 + b]), tempY);
+                                tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c * 4 + b]), tempZ);
+                            }
+                        }
+                        //the values stored in SSE variables are transferred to normal float
+                        val.m = tempX;
+                        real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                        val.m = tempY;
+                        real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                        val.m = tempZ;
+                        real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #else
-                  real[0]=0;
-                  real[1]=0;
-                  real[2]=0;
-                  coord=0;
-                  for(c=0; c<4; c++)
-                  {
-                     for(b=0; b<4; b++)
-                     {
-                        for(a=0; a<4; a++)
-                        {
-                           DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
-                           real[0] += xControlPointCoordinates[coord] * tempValue;
-                           real[1] += yControlPointCoordinates[coord] * tempValue;
-                           real[2] += zControlPointCoordinates[coord] * tempValue;
-                           coord++;
+                        real[0] = 0;
+                        real[1] = 0;
+                        real[2] = 0;
+                        coord = 0;
+                        for (c = 0; c < 4; c++) {
+                            for (b = 0; b < 4; b++) {
+                                for (a = 0; a < 4; a++) {
+                                    DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                                    real[0] += xControlPointCoordinates[coord] * tempValue;
+                                    real[1] += yControlPointCoordinates[coord] * tempValue;
+                                    real[2] += zControlPointCoordinates[coord] * tempValue;
+                                    coord++;
+                                }
+                            }
                         }
-                     }
-                  }
 #endif
-                  fieldPtrX[index] = real[0];
-                  fieldPtrY[index] = real[1];
-                  fieldPtrZ[index] = real[2];
-               }
-               index++;
+                        fieldPtrX[index] = real[0];
+                        fieldPtrY[index] = real[1];
+                        fieldPtrZ[index] = real[2];
+                    }
+                    index++;
+                }
             }
-         }
-      }
-   }//Composition of deformation
-   else  // !composition
-   {
-      DataType gridVoxelSpacing[3];
-      gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
-      gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
-      gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
+        }
+    } else { // !composition
+        DataType gridVoxelSpacing[3];
+        gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
+        gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
+        gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
 
 #ifdef _USE_SSE
 #ifdef _WIN32
-      union u1
-      {
-         __m128 m[4];
-         __declspec(align(16)) DataType f[16];
-      } yzBasis;
-      union u2
-      {
-         __m128 m[16];
-         __declspec(align(16)) DataType f[64];
-      } xyzBasis;
+        union u1 {
+            __m128 m[4];
+            __declspec(align(16)) DataType f[16];
+        } yzBasis;
+        union u2 {
+            __m128 m[16];
+            __declspec(align(16)) DataType f[64];
+        } xyzBasis;
 #else // _WIN32
-      union
-      {
-         __m128 m[4];
-         DataType f[16] __attribute__((aligned(16)));
-      } yzBasis;
-      union
-      {
-         __m128 m[16];
-         DataType f[64] __attribute__((aligned(16)));
-      } xyzBasis;
+        union {
+            __m128 m[4];
+            DataType f[16] __attribute__((aligned(16)));
+        } yzBasis;
+        union {
+            __m128 m[16];
+            DataType f[64] __attribute__((aligned(16)));
+        } xyzBasis;
 #endif // _WIN32
 #else // _USE_SSE
-      DataType yzBasis[16], xyzBasis[64];
+        DataType yzBasis[16], xyzBasis[64];
 #endif // _USE_SSE
 
-      // Assess if lookup table can be used
-      if(gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && force_no_lut==false){
-
-          // Assign a single array that will contain all coefficients
-         DataType *coefficients = (DataType *)malloc(125*64*sizeof(DataType));
-          // Compute and store all required coefficients
-          int coeff_index;
+        // Assess if lookup table can be used
+        if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && force_no_lut == false) {
+            // Assign a single array that will contain all coefficients
+            DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType));
+            // Compute and store all required coefficients
+            int coeff_index;
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
-    private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \
-    val, tempCurrent, yzBasis) \
+    private(x, y, a, coeff_index, basis, zBasis, temp, val, tempCurrent, yzBasis) \
     shared(coefficients, bspline)
 #else //  _USE_SSE
 #pragma omp parallel for default(none) \
-    private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \
-    yzBasis, coord) \
+    private(x, y, a, coeff_index, basis, zBasis, temp, yzBasis, coord) \
     shared(coefficients, bspline)
 #endif // _USE_SSE
 #endif // _OPENMP
-          for(z=0;z<5;++z){
-             coeff_index=z*5*5*64;
-              basis=(DataType)z/5.;
-              if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
-              else get_SplineBasisValues<DataType>(basis, zBasis);
-              for(y=0;y<5;++y){
-                  basis=(DataType)y/5.;
-                  if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                  else get_SplineBasisValues<DataType>(basis, temp);
+            for (z = 0; z < 5; ++z) {
+                coeff_index = z * 5 * 5 * 64;
+                basis = static_cast<DataType>(z) / 5.f;
+                if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+                else get_SplineBasisValues<DataType>(basis, zBasis);
+                for (y = 0; y < 5; ++y) {
+                    basis = static_cast<DataType>(y) / 5.f;
+                    if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                    else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
-                  val.f[0] = temp[0];
-                  val.f[1] = temp[1];
-                  val.f[2] = temp[2];
-                  val.f[3] = temp[3];
-                  tempCurrent=val.m;
-                  for(a=0; a<4; a++)
-                  {
-                      val.m=_mm_set_ps1(zBasis[a]);
-                      yzBasis.m[a] = _mm_mul_ps(tempCurrent,val.m);
-                  }
+                    val.f[0] = static_cast<float>(temp[0]);
+                    val.f[1] = static_cast<float>(temp[1]);
+                    val.f[2] = static_cast<float>(temp[2]);
+                    val.f[3] = static_cast<float>(temp[3]);
+                    tempCurrent = val.m;
+                    for (a = 0; a < 4; a++) {
+                        val.m = _mm_set_ps1(static_cast<float>(zBasis[a]));
+                        yzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m);
+                    }
 #else
-                  coord=0;
-                  for(a=0; a<4; a++)
-                  {
-                      yzBasis[coord++]=temp[0]*zBasis[a];
-                      yzBasis[coord++]=temp[1]*zBasis[a];
-                      yzBasis[coord++]=temp[2]*zBasis[a];
-                      yzBasis[coord++]=temp[3]*zBasis[a];
-                  }
+                    coord = 0;
+                    for (a = 0; a < 4; a++) {
+                        yzBasis[coord++] = temp[0] * zBasis[a];
+                        yzBasis[coord++] = temp[1] * zBasis[a];
+                        yzBasis[coord++] = temp[2] * zBasis[a];
+                        yzBasis[coord++] = temp[3] * zBasis[a];
+                    }
 #endif
 
-                  for(x=0;x<5;++x){
-                      basis=(DataType)x/5.;
-                      if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                      else get_SplineBasisValues<DataType>(basis, temp);
+                    for (x = 0; x < 5; ++x) {
+                        basis = static_cast<DataType>(x) / 5.f;
+                        if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                        else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
-
-                      val.f[0] = temp[0];
-                      val.f[1] = temp[1];
-                      val.f[2] = temp[2];
-                      val.f[3] = temp[3];
-                      tempCurrent=val.m;
-                      for(a=0; a<16; ++a)
-                      {
-                          val.m=_mm_set_ps1(yzBasis.f[a]);
-                          val.m=_mm_mul_ps(tempCurrent,val.m);
-                          coefficients[coeff_index++]=val.f[0];
-                          coefficients[coeff_index++]=val.f[1];
-                          coefficients[coeff_index++]=val.f[2];
-                          coefficients[coeff_index++]=val.f[3];
-                      }
+                        val.f[0] = static_cast<float>(temp[0]);
+                        val.f[1] = static_cast<float>(temp[1]);
+                        val.f[2] = static_cast<float>(temp[2]);
+                        val.f[3] = static_cast<float>(temp[3]);
+                        tempCurrent = val.m;
+                        for (a = 0; a < 16; ++a) {
+                            val.m = _mm_set_ps1(static_cast<float>(yzBasis.f[a]));
+                            val.m = _mm_mul_ps(tempCurrent, val.m);
+                            coefficients[coeff_index++] = val.f[0];
+                            coefficients[coeff_index++] = val.f[1];
+                            coefficients[coeff_index++] = val.f[2];
+                            coefficients[coeff_index++] = val.f[3];
+                        }
 #else
-                      for(a=0; a<16; a++)
-                      {
-                          coefficients[coeff_index++]=temp[0]*yzBasis[a];
-                          coefficients[coeff_index++]=temp[1]*yzBasis[a];
-                          coefficients[coeff_index++]=temp[2]*yzBasis[a];
-                          coefficients[coeff_index++]=temp[3]*yzBasis[a];
-                      }
+                        for (a = 0; a < 16; a++) {
+                            coefficients[coeff_index++] = temp[0] * yzBasis[a];
+                            coefficients[coeff_index++] = temp[1] * yzBasis[a];
+                            coefficients[coeff_index++] = temp[2] * yzBasis[a];
+                            coefficients[coeff_index++] = temp[3] * yzBasis[a];
+                        }
 #endif
-                  } //x
-              } // y
-          } // z
+                    } //x
+                } // y
+            } // z
 
-          // Loop over block of 5x5x5 voxels
+            // Loop over block of 5x5x5 voxels
 #if _USE_SSE
-          int coord;
+            int coord;
 #endif // USE_SSE
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
-   private(x, y, z, a, b, c, xPre, yPre, zPre, real, \
-   index, xyzBasis, temp, coeff_index, coord, tempX, tempY, tempZ, val,\
+   private(x, y, z, a, b, c, xPre, yPre, real, \
+   index, coeff_index, coord, tempX, tempY, tempZ, val,\
    xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) \
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
    gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \
    coefficients)
 #else //  _USE_SSE
 #pragma omp parallel for default(none) \
-   private(x, y, z, a, b, c, xPre, yPre, zPre, real, \
-   index, xyzBasis, temp, coeff_index, coord, basis, \
+   private(x, y, z, a, b, c, xPre, yPre, real, \
+   index, coeff_index, coord, basis, \
    xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) \
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
    gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \
    coefficients)
 #endif // _USE_SSE
 #endif // _OPENMP
-          for(zPre=0; zPre<splineControlPoint->nz-3; zPre++)
-          {
-              for(yPre=0; yPre<splineControlPoint->ny-3; yPre++)
-              {
-                  for(xPre=0; xPre<splineControlPoint->nx-3; xPre++)
-                  {
+            for (zPre = 0; zPre < splineControlPoint->nz - 3; zPre++) {
+                for (yPre = 0; yPre < splineControlPoint->ny - 3; yPre++) {
+                    for (xPre = 0; xPre < splineControlPoint->nx - 3; xPre++) {
 #if _USE_SSE
-                      get_GridValues<DataType>(xPre,
-                                            yPre,
-                                            zPre,
-                                            splineControlPoint,
-                                            controlPointPtrX,
-                                            controlPointPtrY,
-                                            controlPointPtrZ,
-                                            xControlPointCoordinates.f,
-                                            yControlPointCoordinates.f,
-                                            zControlPointCoordinates.f,
-                                            false, // no approximation
-                                            false // not a deformation field
-                                            );
+                        get_GridValues<DataType>(xPre,
+                                                 yPre,
+                                                 zPre,
+                                                 splineControlPoint,
+                                                 controlPointPtrX,
+                                                 controlPointPtrY,
+                                                 controlPointPtrZ,
+                                                 xControlPointCoordinates.f,
+                                                 yControlPointCoordinates.f,
+                                                 zControlPointCoordinates.f,
+                                                 false,  // no approximation
+                                                 false); // not a deformation field
 #else // _USE_SSE
-                      get_GridValues<DataType>(xPre,
-                                            yPre,
-                                            zPre,
-                                            splineControlPoint,
-                                            controlPointPtrX,
-                                            controlPointPtrY,
-                                            controlPointPtrZ,
-                                            xControlPointCoordinates,
-                                            yControlPointCoordinates,
-                                            zControlPointCoordinates,
-                                            false, // no approximation
-                                            false // not a deformation field
-                                            );
+                        get_GridValues<DataType>(xPre,
+                                                 yPre,
+                                                 zPre,
+                                                 splineControlPoint,
+                                                 controlPointPtrX,
+                                                 controlPointPtrY,
+                                                 controlPointPtrZ,
+                                                 xControlPointCoordinates,
+                                                 yControlPointCoordinates,
+                                                 zControlPointCoordinates,
+                                                 false,  // no approximation
+                                                 false); // not a deformation field
 #endif // _USE_SSE
-                      coeff_index=0;
-                      for(c=0;c<5;++c){
-                          z = zPre*5+c;
-                          if(z<deformationField->nz){
-                              for(b=0;b<5;++b){
-                                  y = yPre*5+b;
-                                  if(y<deformationField->ny){
-                                      index = (z*deformationField->ny+y)*deformationField->nx+xPre*5;
-                                      for(a=0;a<5;++a){
-                                          x = xPre*5+a;
-                                          if(x<deformationField->nx && mask[index]>-1){
+                        coeff_index = 0;
+                        for (c = 0; c < 5; ++c) {
+                            z = zPre * 5 + c;
+                            if (z < deformationField->nz) {
+                                for (b = 0; b < 5; ++b) {
+                                    y = yPre * 5 + b;
+                                    if (y < deformationField->ny) {
+                                        index = (z * deformationField->ny + y) * deformationField->nx + xPre * 5;
+                                        for (a = 0; a < 5; ++a) {
+                                            x = xPre * 5 + a;
+                                            if (x<deformationField->nx && mask[index]>-1) {
 #if _USE_SSE
-                                              tempX =  _mm_set_ps1(0);
-                                              tempY =  _mm_set_ps1(0);
-                                              tempZ =  _mm_set_ps1(0);
-                                              for(coord=0;coord<16;++coord){
-                                                  val.m = _mm_set_ps(coefficients[coeff_index+3],
-                                                        coefficients[coeff_index+2],
-                                                        coefficients[coeff_index+1],
-                                                        coefficients[coeff_index]);
-                                                  coeff_index+=4;
-                                                  tempX = _mm_add_ps(_mm_mul_ps(val.m,
-                                                                                xControlPointCoordinates.m[coord]),
-                                                                     tempX );
-                                                  tempY = _mm_add_ps(_mm_mul_ps(val.m,
-                                                                                yControlPointCoordinates.m[coord]),
-                                                                     tempY );
-                                                  tempZ = _mm_add_ps(_mm_mul_ps(val.m,
-                                                                                zControlPointCoordinates.m[coord]),
-                                                                     tempZ );
-                                              }
-                                              //the values stored in SSE variables are transferred to normal float
+                                                tempX = _mm_set_ps1(0);
+                                                tempY = _mm_set_ps1(0);
+                                                tempZ = _mm_set_ps1(0);
+                                                for (coord = 0; coord < 16; ++coord) {
+                                                    val.m = _mm_set_ps(static_cast<float>(coefficients[coeff_index + 3]),
+                                                                       static_cast<float>(coefficients[coeff_index + 2]),
+                                                                       static_cast<float>(coefficients[coeff_index + 1]),
+                                                                       static_cast<float>(coefficients[coeff_index]));
+                                                    coeff_index += 4;
+                                                    tempX = _mm_add_ps(_mm_mul_ps(val.m, xControlPointCoordinates.m[coord]), tempX);
+                                                    tempY = _mm_add_ps(_mm_mul_ps(val.m, yControlPointCoordinates.m[coord]), tempY);
+                                                    tempZ = _mm_add_ps(_mm_mul_ps(val.m, zControlPointCoordinates.m[coord]), tempZ);
+                                                }
+                                                // The values stored in SSE variables are transferred to normal float
 #ifdef __SSE3__
-                                              val.m = _mm_hadd_ps(tempX, tempY);
-                                              val.m = _mm_hadd_ps(val.m, tempZ);
-                                              real[0] = val.f[0];
-                                              real[1] = val.f[1];
-                                              real[2] = val.f[2]+val.f[3];
+                                                val.m = _mm_hadd_ps(tempX, tempY);
+                                                val.m = _mm_hadd_ps(val.m, tempZ);
+                                                real[0] = val.f[0];
+                                                real[1] = val.f[1];
+                                                real[2] = val.f[2] + val.f[3];
 #else
-                                              val.m=tempX;
-                                              real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                                              val.m=tempY;
-                                              real[1]= val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                                              val.m=tempZ;
-                                              real[2]= val.f[0]+val.f[1]+val.f[2]+val.f[3];
+                                                val.m = tempX;
+                                                real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                                                val.m = tempY;
+                                                real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                                                val.m = tempZ;
+                                                real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #endif
 #else // _USE_SSE
-                                              real[0]=real[1]=real[2]=0;
-                                              for(coord=0;coord<64;++coord){
-                                                  basis = coefficients[coeff_index++];
-                                                  real[0] += xControlPointCoordinates[coord] * basis;
-                                                  real[1] += yControlPointCoordinates[coord] * basis;
-                                                  real[2] += zControlPointCoordinates[coord] * basis;
-                                              }
+                                                real[0] = real[1] = real[2] = 0;
+                                                for (coord = 0; coord < 64; ++coord) {
+                                                    basis = coefficients[coeff_index++];
+                                                    real[0] += xControlPointCoordinates[coord] * basis;
+                                                    real[1] += yControlPointCoordinates[coord] * basis;
+                                                    real[2] += zControlPointCoordinates[coord] * basis;
+                                                }
 #endif // _USE_SSE
-                                              fieldPtrX[index] = real[0];
-                                              fieldPtrY[index] = real[1];
-                                              fieldPtrZ[index] = real[2];
-                                          } // x defined
-                                          else coeff_index += 64;
-                                          index++;
-                                      } // a
-                                  } // y defined
-                                  else coeff_index += 5*64;
-                              } // b
-                          } // z defined
-                          else coeff_index += 5*5*64;
-                      } // c
-                  } // xPre
-              } // yPre
-          } // zPre
-          free(coefficients);
-      } // if spacings==5 voxels
-      else{
-
+                                                fieldPtrX[index] = real[0];
+                                                fieldPtrY[index] = real[1];
+                                                fieldPtrZ[index] = real[2];
+                                            } // x defined
+                                            else coeff_index += 64;
+                                            index++;
+                                        } // a
+                                    } // y defined
+                                    else coeff_index += 5 * 64;
+                                } // b
+                            } // z defined
+                            else coeff_index += 5 * 5 * 64;
+                        } // c
+                    } // xPre
+                } // yPre
+            } // zPre
+            free(coefficients);
+        } else { // if spacings!=5 voxels
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
-    private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
+    private(x, y, a, xPre, yPre, zPre, real, \
     index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \
     yControlPointCoordinates, zControlPointCoordinates, oldBasis, \
     tempX, tempY, tempZ, xBasis_sse, yBasis_sse, zBasis_sse, \
@@ -1439,163 +1294,143 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
     gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ)
 #else //  _USE_SSE
 #pragma omp parallel for default(none) \
-    private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
+    private(x, y, a, xPre, yPre, zPre, real, \
     index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \
     yControlPointCoordinates, zControlPointCoordinates, oldBasis, coord) \
     shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
     gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ)
 #endif // _USE_SSE
 #endif // _OPENMP
-          for(z=0; z<deformationField->nz; z++)
-          {
-
-              index=z*deformationField->nx*deformationField->ny;
-              oldBasis=1.1;
-
-              zPre=static_cast<int>(static_cast<DataType>(z)/gridVoxelSpacing[2]);
-              basis=static_cast<DataType>(z)/gridVoxelSpacing[2]-static_cast<DataType>(zPre);
-              if(basis<0) basis=0; //rounding error
-              if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
-              else get_SplineBasisValues<DataType>(basis, zBasis);
-
-              for(y=0; y<deformationField->ny; y++)
-              {
-
-                  yPre=static_cast<int>(static_cast<DataType>(y)/gridVoxelSpacing[1]);
-                  basis=static_cast<DataType>(y)/gridVoxelSpacing[1]-static_cast<DataType>(yPre);
-                  if(basis<0) basis=0; //rounding error
-                  if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                  else get_SplineBasisValues<DataType>(basis, temp);
+            for (z = 0; z < deformationField->nz; z++) {
+                index = z * deformationField->nx * deformationField->ny;
+                oldBasis = 1.1f;
+
+                zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
+                basis = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+                else get_SplineBasisValues<DataType>(basis, zBasis);
+
+                for (y = 0; y < deformationField->ny; y++) {
+                    yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
+                    basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
+                    if (basis < 0) basis = 0; //rounding error
+                    if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                    else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
-                  val.f[0] = temp[0];
-                  val.f[1] = temp[1];
-                  val.f[2] = temp[2];
-                  val.f[3] = temp[3];
-                  tempCurrent=val.m;
-                  for(a=0; a<4; a++)
-                  {
-                      val.m=_mm_set_ps1(zBasis[a]);
-                      yzBasis.m[a] = _mm_mul_ps(tempCurrent,val.m);
-                  }
+                    val.f[0] = static_cast<float>(temp[0]);
+                    val.f[1] = static_cast<float>(temp[1]);
+                    val.f[2] = static_cast<float>(temp[2]);
+                    val.f[3] = static_cast<float>(temp[3]);
+                    tempCurrent = val.m;
+                    for (a = 0; a < 4; a++) {
+                        val.m = _mm_set_ps1(static_cast<float>(zBasis[a]));
+                        yzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m);
+                    }
 #else
-                  coord=0;
-                  for(a=0; a<4; a++)
-                  {
-                      yzBasis[coord++]=temp[0]*zBasis[a];
-                      yzBasis[coord++]=temp[1]*zBasis[a];
-                      yzBasis[coord++]=temp[2]*zBasis[a];
-                      yzBasis[coord++]=temp[3]*zBasis[a];
-                  }
+                    coord = 0;
+                    for (a = 0; a < 4; a++) {
+                        yzBasis[coord++] = temp[0] * zBasis[a];
+                        yzBasis[coord++] = temp[1] * zBasis[a];
+                        yzBasis[coord++] = temp[2] * zBasis[a];
+                        yzBasis[coord++] = temp[3] * zBasis[a];
+                    }
 #endif
-
-                  for(x=0; x<deformationField->nx; x++)
-                  {
-
-                      xPre=static_cast<int>(static_cast<DataType>(x)/gridVoxelSpacing[0]);
-                      basis=static_cast<DataType>(x)/gridVoxelSpacing[0]-static_cast<DataType>(xPre);
-                      if(basis<0) basis=0; //rounding error
-                      if(bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                      else get_SplineBasisValues<DataType>(basis, temp);
+                    for (x = 0; x < deformationField->nx; x++) {
+                        xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
+                        basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
+                        if (basis < 0) basis = 0; //rounding error
+                        if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
+                        else get_SplineBasisValues<DataType>(basis, temp);
 #if _USE_SSE
-
-                      val.f[0] = temp[0];
-                      val.f[1] = temp[1];
-                      val.f[2] = temp[2];
-                      val.f[3] = temp[3];
-                      tempCurrent=val.m;
-                      for(a=0; a<16; ++a)
-                      {
-                          val.m=_mm_set_ps1(yzBasis.f[a]);
-                          xyzBasis.m[a]=_mm_mul_ps(tempCurrent,val.m);
-                      }
+                        val.f[0] = static_cast<float>(temp[0]);
+                        val.f[1] = static_cast<float>(temp[1]);
+                        val.f[2] = static_cast<float>(temp[2]);
+                        val.f[3] = static_cast<float>(temp[3]);
+                        tempCurrent = val.m;
+                        for (a = 0; a < 16; ++a) {
+                            val.m = _mm_set_ps1(static_cast<float>(yzBasis.f[a]));
+                            xyzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m);
+                        }
 #else
-                      coord=0;
-                      for(a=0; a<16; a++)
-                      {
-                          xyzBasis[coord++]=temp[0]*yzBasis[a];
-                          xyzBasis[coord++]=temp[1]*yzBasis[a];
-                          xyzBasis[coord++]=temp[2]*yzBasis[a];
-                          xyzBasis[coord++]=temp[3]*yzBasis[a];
-                      }
+                        coord = 0;
+                        for (a = 0; a < 16; a++) {
+                            xyzBasis[coord++] = temp[0] * yzBasis[a];
+                            xyzBasis[coord++] = temp[1] * yzBasis[a];
+                            xyzBasis[coord++] = temp[2] * yzBasis[a];
+                            xyzBasis[coord++] = temp[3] * yzBasis[a];
+                        }
 #endif
-                      if(basis<=oldBasis || x==0)
-                      {
+                        if (basis <= oldBasis || x == 0) {
 #ifdef _USE_SSE
-                          get_GridValues<DataType>(xPre,
-                                                yPre,
-                                                zPre,
-                                                splineControlPoint,
-                                                controlPointPtrX,
-                                                controlPointPtrY,
-                                                controlPointPtrZ,
-                                                xControlPointCoordinates.f,
-                                                yControlPointCoordinates.f,
-                                                zControlPointCoordinates.f,
-                                                false, // no approximation
-                                                false // not a deformation field
-                                                );
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     zPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     controlPointPtrZ,
+                                                     xControlPointCoordinates.f,
+                                                     yControlPointCoordinates.f,
+                                                     zControlPointCoordinates.f,
+                                                     false,  // no approximation
+                                                     false); // not a deformation field
 #else // _USE_SSE
-                          get_GridValues<DataType>(xPre,
-                                                yPre,
-                                                zPre,
-                                                splineControlPoint,
-                                                controlPointPtrX,
-                                                controlPointPtrY,
-                                                controlPointPtrZ,
-                                                xControlPointCoordinates,
-                                                yControlPointCoordinates,
-                                                zControlPointCoordinates,
-                                                false, // no approximation
-                                                false // not a deformation field
-                                                );
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     zPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     controlPointPtrZ,
+                                                     xControlPointCoordinates,
+                                                     yControlPointCoordinates,
+                                                     zControlPointCoordinates,
+                                                     false,  // no approximation
+                                                     false); // not a deformation field
 #endif // _USE_SSE
-                      }
-                      oldBasis=basis;
+                        }
+                        oldBasis = basis;
 
-                      real[0]=0;
-                      real[1]=0;
-                      real[2]=0;
+                        real[0] = 0;
+                        real[1] = 0;
+                        real[2] = 0;
 
-                      if(mask[index]>-1)
-                      {
+                        if (mask[index] > -1) {
 #if _USE_SSE
-                          tempX =  _mm_set_ps1(0);
-                          tempY =  _mm_set_ps1(0);
-                          tempZ =  _mm_set_ps1(0);
-                          //addition and multiplication of the 64 basis value and CP displacement for each axis
-                          for(a=0; a<16; a++)
-                          {
-                              tempX = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], xControlPointCoordinates.m[a]), tempX );
-                              tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY );
-                              tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ );
-                          }
-                          //the values stored in SSE variables are transferred to normal float
-                          val.m=tempX;
-                          real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                          val.m=tempY;
-                          real[1]= val.f[0]+val.f[1]+val.f[2]+val.f[3];
-                          val.m=tempZ;
-                          real[2]= val.f[0]+val.f[1]+val.f[2]+val.f[3];
+                            tempX = _mm_set_ps1(0);
+                            tempY = _mm_set_ps1(0);
+                            tempZ = _mm_set_ps1(0);
+                            //addition and multiplication of the 64 basis value and CP displacement for each axis
+                            for (a = 0; a < 16; a++) {
+                                tempX = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], xControlPointCoordinates.m[a]), tempX);
+                                tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY);
+                                tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ);
+                            }
+                            //the values stored in SSE variables are transferred to normal float
+                            val.m = tempX;
+                            real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                            val.m = tempY;
+                            real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                            val.m = tempZ;
+                            real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #else
-                          for(a=0; a<64; a++)
-                          {
-                              real[0] += xControlPointCoordinates[a] * xyzBasis[a];
-                              real[1] += yControlPointCoordinates[a] * xyzBasis[a];
-                              real[2] += zControlPointCoordinates[a] * xyzBasis[a];
-                          }
+                            for (a = 0; a < 64; a++) {
+                                real[0] += xControlPointCoordinates[a] * xyzBasis[a];
+                                real[1] += yControlPointCoordinates[a] * xyzBasis[a];
+                                real[2] += zControlPointCoordinates[a] * xyzBasis[a];
+                            }
 #endif
-                      }// mask
-                      fieldPtrX[index] = real[0];
-                      fieldPtrY[index] = real[1];
-                      fieldPtrZ[index] = real[2];
-                      index++;
-                  } // x
-              } // y
-          } // z
-      } // else spacing==5
-   }// from a deformation field
-
-   return;
+                        }// mask
+                        fieldPtrX[index] = real[0];
+                        fieldPtrY[index] = real[1];
+                        fieldPtrZ[index] = real[2];
+                        index++;
+                    } // x
+                } // y
+            } // z
+        } // else spacing==5
+    }// from a deformation field
 }
 /* *************************************************************** */
 void reg_spline_getDeformationField(nifti_image *splineControlPoint,
@@ -1603,1227 +1438,1103 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                                     int *mask,
                                     bool composition,
                                     bool bspline,
-                                    bool force_no_lut)
-{
-   if(splineControlPoint->datatype != deformationField->datatype)
-   {
-      reg_print_fct_error("reg_spline_getDeformationField");
-      reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type");
-      reg_exit();
-   }
+                                    bool force_no_lut) {
+    if (splineControlPoint->datatype != deformationField->datatype) {
+        reg_print_fct_error("reg_spline_getDeformationField");
+        reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type");
+        reg_exit();
+    }
 
 #if _USE_SSE
-   if(splineControlPoint->datatype != NIFTI_TYPE_FLOAT32)
-   {
-      reg_print_fct_error("reg_spline_getDeformationField");
-      reg_print_msg_error("SSE computation has only been implemented for single precision.");
-      reg_exit();
-   }
+    if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) {
+        reg_print_fct_error("reg_spline_getDeformationField");
+        reg_print_msg_error("SSE computation has only been implemented for single precision");
+        reg_exit();
+    }
 #endif
 
-   bool MrPropre=false;
-   if(mask==nullptr)
-   {
-      // Active voxel are all superior to -1, 0 thus will do !
-      MrPropre=true;
-      mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int));
-   }
-
-   // Check if an affine initialisation is required
-   if(splineControlPoint->num_ext>0)
-   {
-      if(splineControlPoint->ext_list[0].edata!=nullptr)
-      {
-         reg_affine_getDeformationField(reinterpret_cast<mat44 *>(splineControlPoint->ext_list[0].edata),
-               deformationField,
-               composition,
-               mask);
-         composition=true;
-      }
-   }
-
-   if(splineControlPoint->intent_p1==LIN_SPLINE_GRID){
-      if(splineControlPoint->nz==1)
-      {
-         reg_print_fct_error("reg_linear_spline_getDeformationField");
-         reg_print_msg_error("No 2D implementation yet.");
-         reg_exit();
-      }
-      else
-      {
-         switch(deformationField->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_linear_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition);
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_linear_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition);
-            break;
-         default:
+    bool MrPropre = false;
+    if (mask == nullptr) {
+        // Active voxel are all superior to -1, 0 thus will do !
+        MrPropre = true;
+        mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int));
+    }
+
+    // Check if an affine initialisation is required
+    if (splineControlPoint->num_ext > 0) {
+        if (splineControlPoint->ext_list[0].edata != nullptr) {
+            reg_affine_getDeformationField(reinterpret_cast<mat44*>(splineControlPoint->ext_list[0].edata),
+                                           deformationField,
+                                           composition,
+                                           mask);
+            composition = true;
+        }
+    }
+
+    if (splineControlPoint->intent_p1 == LIN_SPLINE_GRID) {
+        if (splineControlPoint->nz == 1) {
             reg_print_fct_error("reg_linear_spline_getDeformationField");
-            reg_print_msg_error("Only single or double precision is implemented for deformation field");
-            reg_exit();
-         }
-      }
-   }
-   else{
-      if(splineControlPoint->nz==1)
-      {
-         switch(deformationField->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_cubic_spline_getDeformationField2D<float>(splineControlPoint, deformationField, mask, composition, bspline);
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_cubic_spline_getDeformationField2D<double>(splineControlPoint, deformationField, mask, composition, bspline);
-            break;
-         default:
-            reg_print_fct_error("reg_spline_getDeformationField");
-            reg_print_msg_error("Only single or double precision is implemented for deformation field");
+            reg_print_msg_error("No 2D implementation yet");
             reg_exit();
-         }
-      }
-      else
-      {
-         switch(deformationField->datatype)
-         {
-         case NIFTI_TYPE_FLOAT32:
-            reg_cubic_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
-            break;
-         case NIFTI_TYPE_FLOAT64:
-            reg_cubic_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
-            break;
-         default:
-            reg_print_fct_error("reg_spline_getDeformationField");
-            reg_print_msg_error("Only single or double precision is implemented for deformation field");
-            reg_exit();
-         }
-      }
-   }
-
-   if(splineControlPoint->num_ext>1)
-   {
-      if(splineControlPoint->ext_list[1].edata!=nullptr)
-      {
-         reg_affine_getDeformationField(reinterpret_cast<mat44 *>(splineControlPoint->ext_list[1].edata),
-               deformationField,
-               true, //composition
-               mask);
-      }
-   }
-   if(MrPropre)
-   {
-      free(mask);
-      mask=nullptr;
-   }
-
-   return;
+        } else {
+            switch (deformationField->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_linear_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_linear_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition);
+                break;
+            default:
+                reg_print_fct_error("reg_linear_spline_getDeformationField");
+                reg_print_msg_error("Only single or double precision is implemented for deformation field");
+                reg_exit();
+            }
+        }
+    } else {
+        if (splineControlPoint->nz == 1) {
+            switch (deformationField->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_cubic_spline_getDeformationField2D<float>(splineControlPoint, deformationField, mask, composition, bspline);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_cubic_spline_getDeformationField2D<double>(splineControlPoint, deformationField, mask, composition, bspline);
+                break;
+            default:
+                reg_print_fct_error("reg_spline_getDeformationField");
+                reg_print_msg_error("Only single or double precision is implemented for deformation field");
+                reg_exit();
+            }
+        } else {
+            switch (deformationField->datatype) {
+            case NIFTI_TYPE_FLOAT32:
+                reg_cubic_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
+                break;
+            case NIFTI_TYPE_FLOAT64:
+                reg_cubic_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
+                break;
+            default:
+                reg_print_fct_error("reg_spline_getDeformationField");
+                reg_print_msg_error("Only single or double precision is implemented for deformation field");
+                reg_exit();
+            }
+        }
+    }
+
+    if (splineControlPoint->num_ext > 1) {
+        if (splineControlPoint->ext_list[1].edata != nullptr) {
+            reg_affine_getDeformationField(reinterpret_cast<mat44*>(splineControlPoint->ext_list[1].edata),
+                                           deformationField,
+                                           true, //composition
+                                           mask);
+        }
+    }
+    if (MrPropre)
+        free(mask);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
-void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage,
+void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                        nifti_image *voxelImage,
                                        float weight,
                                        bool update,
-                                       const mat44 *voxelToMillimetre)
-{
-   const size_t nodeNumber = CalcVoxelNumber(*nodeImage);
-   const size_t voxelNumber = CalcVoxelNumber(*voxelImage);
-   DataType *nodePtrX = static_cast<DataType *>(nodeImage->data);
-   DataType *nodePtrY = &nodePtrX[nodeNumber];
-   DataType *nodePtrZ = nullptr;
-
-   DataType *voxelPtrX = static_cast<DataType *>(voxelImage->data);
-   DataType *voxelPtrY = &voxelPtrX[voxelNumber];
-   DataType *voxelPtrZ = nullptr;
-
-   if(nodeImage->nz>1)
-   {
-      nodePtrZ = &nodePtrY[nodeNumber];
-      voxelPtrZ= &voxelPtrY[voxelNumber];
-   }
-
-   // The transformation between the image and the grid is used
-   mat44 transformation;
-   // voxel to millimetre in the grid image
-   if(nodeImage->sform_code>0)
-      transformation=nodeImage->sto_xyz;
-   else transformation=nodeImage->qto_xyz;
-   // Affine transformation between the grid and the reference image
-   if(nodeImage->num_ext>0)
-   {
-      if(nodeImage->ext_list[0].edata!=nullptr)
-      {
-         mat44 temp=*(reinterpret_cast<mat44 *>(nodeImage->ext_list[0].edata));
-         temp=nifti_mat44_inverse(temp);
-         transformation = reg_mat44_mul(&temp,&transformation);
-      }
-   }
-   // millimetre to voxel in the reference image
-   if(voxelImage->sform_code>0)
-      transformation = reg_mat44_mul(&voxelImage->sto_ijk,&transformation);
-   else transformation = reg_mat44_mul(&voxelImage->qto_ijk,&transformation);
-
-   // The information has to be reoriented
-   mat33 reorientation;
-   // Voxel to millimetre contains the orientation of the image that is used
-   // to compute the spatial gradient (floating image)
-   if(voxelToMillimetre!=nullptr)
-   {
-      reorientation=reg_mat44_to_mat33(voxelToMillimetre);
-      if(nodeImage->num_ext>0)
-      {
-         if(nodeImage->ext_list[0].edata!=nullptr)
-         {
-            mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(nodeImage->ext_list[0].edata));
-            temp=nifti_mat33_inverse(temp);
-            reorientation = nifti_mat33_mul(temp,reorientation);
-         }
-      }
-   }
-   else reg_mat33_eye(&reorientation);
-   // The information has to be weighted
-   float ratio[3]= {nodeImage->dx,nodeImage->dy,nodeImage->dz};
-   for(int i=0; i<(nodeImage->nz>1?3:2); ++i)
-   {
-      if(nodeImage->sform_code>0)
-      {
-         ratio[i] = sqrt(
-                  reg_pow2(nodeImage->sto_xyz.m[i][0]) +
-               reg_pow2(nodeImage->sto_xyz.m[i][1]) +
-               reg_pow2(nodeImage->sto_xyz.m[i][2]) );
-      }
-      ratio[i] /= voxelImage->pixdim[i+1];
-      weight *= ratio[i];
-   }
-   // For each node, the corresponding voxel is computed
-   float nodeCoord[3];
-   float voxelCoord[3];
-   for(int z=0; z<nodeImage->nz; z++)
-   {
-      nodeCoord[2]=z;
-      for(int y=0; y<nodeImage->ny; y++)
-      {
-         nodeCoord[1]=y;
-         for(int x=0; x<nodeImage->nx; x++)
-         {
-            nodeCoord[0]=x;
-            reg_mat44_mul(&transformation,nodeCoord,voxelCoord);
-            // linear interpolation is performed
-            DataType basisX[2], basisY[2], basisZ[2]={0,0};
-            int pre[3]=
-            {
-               static_cast<int>(reg_floor(voxelCoord[0])),
-               static_cast<int>(reg_floor(voxelCoord[1])),
-               static_cast<int>(reg_floor(voxelCoord[2]))
-            };
-            basisX[1]=voxelCoord[0]-static_cast<DataType>(pre[0]);
-            basisX[0]=static_cast<DataType>(1) - basisX[1];
-            basisY[1]=voxelCoord[1]-static_cast<DataType>(pre[1]);
-            basisY[0]=static_cast<DataType>(1) - basisY[1];
-            if(voxelPtrZ!=nullptr)
-            {
-               basisZ[1]=voxelCoord[2]-static_cast<DataType>(pre[2]);
-               basisZ[0]=static_cast<DataType>(1) - basisZ[1];
+                                       const mat44 *voxelToMillimetre) {
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
+    DataType *nodePtrX = static_cast<DataType*>(nodeImage->data);
+    DataType *nodePtrY = &nodePtrX[nodeNumber];
+    DataType *nodePtrZ = nullptr;
+
+    DataType *voxelPtrX = static_cast<DataType*>(voxelImage->data);
+    DataType *voxelPtrY = &voxelPtrX[voxelNumber];
+    DataType *voxelPtrZ = nullptr;
+
+    if (nodeImage->nz > 1) {
+        nodePtrZ = &nodePtrY[nodeNumber];
+        voxelPtrZ = &voxelPtrY[voxelNumber];
+    }
+
+    // The transformation between the image and the grid is used
+    mat44 transformation;
+    // voxel to millimetre in the grid image
+    if (nodeImage->sform_code > 0)
+        transformation = nodeImage->sto_xyz;
+    else transformation = nodeImage->qto_xyz;
+    // Affine transformation between the grid and the reference image
+    if (nodeImage->num_ext > 0) {
+        if (nodeImage->ext_list[0].edata != nullptr) {
+            mat44 temp = *(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+            temp = nifti_mat44_inverse(temp);
+            transformation = reg_mat44_mul(&temp, &transformation);
+        }
+    }
+    // millimetre to voxel in the reference image
+    if (voxelImage->sform_code > 0)
+        transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation);
+    else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation);
+
+    // The information has to be reoriented
+    mat33 reorientation;
+    // Voxel to millimetre contains the orientation of the image that is used
+    // to compute the spatial gradient (floating image)
+    if (voxelToMillimetre != nullptr) {
+        reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        if (nodeImage->num_ext > 0) {
+            if (nodeImage->ext_list[0].edata != nullptr) {
+                mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+                temp = nifti_mat33_inverse(temp);
+                reorientation = nifti_mat33_mul(temp, reorientation);
             }
-            DataType interpolatedValue[3]= {0,0,0};
-            for(int c=0; c<2; ++c)
-            {
-               int indexZ=pre[2]+c;
-               if(indexZ>-1 && indexZ<voxelImage->nz)
-               {
-                  for(int b=0; b<2; ++b)
-                  {
-                     int indexY=pre[1]+b;
-                     if(indexY>-1 && indexY<voxelImage->ny)
-                     {
-                        for(int a=0; a<2; ++a)
-                        {
-                           int indexX=pre[0]+a;
-                           if(indexX>-1 && indexX<voxelImage->nx)
-                           {
-                              size_t index=(indexZ*voxelImage->ny+indexY) *
-                                    voxelImage->nx+indexX;
-                              DataType linearWeight = basisX[a] * basisY[b];
-                              if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c];
-                              interpolatedValue[0] += linearWeight * voxelPtrX[index];
-                              interpolatedValue[1] += linearWeight * voxelPtrY[index];
-                              if(voxelPtrZ!=nullptr)
-                                 interpolatedValue[2] += linearWeight * voxelPtrZ[index];
-                           }
+        }
+    } else reg_mat33_eye(&reorientation);
+    // The information has to be weighted
+    float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
+    for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) {
+        if (nodeImage->sform_code > 0) {
+            ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) +
+                            reg_pow2(nodeImage->sto_xyz.m[i][1]) +
+                            reg_pow2(nodeImage->sto_xyz.m[i][2]));
+        }
+        ratio[i] /= voxelImage->pixdim[i + 1];
+        weight *= ratio[i];
+    }
+    // For each node, the corresponding voxel is computed
+    float nodeCoord[3];
+    float voxelCoord[3];
+    for (int z = 0; z < nodeImage->nz; z++) {
+        nodeCoord[2] = static_cast<float>(z);
+        for (int y = 0; y < nodeImage->ny; y++) {
+            nodeCoord[1] = static_cast<float>(y);
+            for (int x = 0; x < nodeImage->nx; x++) {
+                nodeCoord[0] = static_cast<float>(x);
+                reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
+                // linear interpolation is performed
+                DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 };
+                int pre[3] = {
+                    static_cast<int>(reg_floor(voxelCoord[0])),
+                    static_cast<int>(reg_floor(voxelCoord[1])),
+                    static_cast<int>(reg_floor(voxelCoord[2]))
+                };
+                basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
+                basisX[0] = static_cast<DataType>(1) - basisX[1];
+                basisY[1] = voxelCoord[1] - static_cast<DataType>(pre[1]);
+                basisY[0] = static_cast<DataType>(1) - basisY[1];
+                if (voxelPtrZ != nullptr) {
+                    basisZ[1] = voxelCoord[2] - static_cast<DataType>(pre[2]);
+                    basisZ[0] = static_cast<DataType>(1) - basisZ[1];
+                }
+                DataType interpolatedValue[3] = { 0, 0, 0 };
+                for (int c = 0; c < 2; ++c) {
+                    int indexZ = pre[2] + c;
+                    if (indexZ > -1 && indexZ < voxelImage->nz) {
+                        for (int b = 0; b < 2; ++b) {
+                            int indexY = pre[1] + b;
+                            if (indexY > -1 && indexY < voxelImage->ny) {
+                                for (int a = 0; a < 2; ++a) {
+                                    int indexX = pre[0] + a;
+                                    if (indexX > -1 && indexX < voxelImage->nx) {
+                                        size_t index = (indexZ * voxelImage->ny + indexY) *
+                                            voxelImage->nx + indexX;
+                                        DataType linearWeight = basisX[a] * basisY[b];
+                                        if (voxelPtrZ != nullptr) linearWeight *= basisZ[c];
+                                        interpolatedValue[0] += linearWeight * voxelPtrX[index];
+                                        interpolatedValue[1] += linearWeight * voxelPtrY[index];
+                                        if (voxelPtrZ != nullptr)
+                                            interpolatedValue[2] += linearWeight * voxelPtrZ[index];
+                                    }
+                                }
+                            }
                         }
-                     }
-                  }
-               }
-            }
-            DataType reorientedValue[3]={0,0,0};
-            reorientedValue[0] =
-                  reorientation.m[0][0] * interpolatedValue[0] +
-                  reorientation.m[1][0] * interpolatedValue[1] +
-                  reorientation.m[2][0] * interpolatedValue[2] ;
-            reorientedValue[1] =
-                  reorientation.m[0][1] * interpolatedValue[0] +
-                  reorientation.m[1][1] * interpolatedValue[1] +
-                  reorientation.m[2][1] * interpolatedValue[2] ;
-            if(voxelPtrZ!=nullptr)
-               reorientedValue[2] =
-                     reorientation.m[0][2] * interpolatedValue[0] +
-                     reorientation.m[1][2] * interpolatedValue[1] +
-                     reorientation.m[2][2] * interpolatedValue[2] ;
-            if(update)
-            {
-               *nodePtrX += reorientedValue[0]*static_cast<DataType>(weight);
-               *nodePtrY += reorientedValue[1]*static_cast<DataType>(weight);
-               if(voxelPtrZ!=nullptr)
-                  *nodePtrZ += reorientedValue[2]*static_cast<DataType>(weight);
-            }
-            else
-            {
-               *nodePtrX = reorientedValue[0]*static_cast<DataType>(weight);
-               *nodePtrY = reorientedValue[1]*static_cast<DataType>(weight);
-               if(voxelPtrZ!=nullptr)
-                  *nodePtrZ = reorientedValue[2]*static_cast<DataType>(weight);
-            }
-            ++nodePtrX;
-            ++nodePtrY;
-            if(voxelPtrZ!=nullptr)
-               ++nodePtrZ;
-         } // loop over
-      } // loop over y
-   } // loop over z
+                    }
+                }
+                DataType reorientedValue[3] = { 0, 0, 0 };
+                reorientedValue[0] =
+                    reorientation.m[0][0] * interpolatedValue[0] +
+                    reorientation.m[1][0] * interpolatedValue[1] +
+                    reorientation.m[2][0] * interpolatedValue[2];
+                reorientedValue[1] =
+                    reorientation.m[0][1] * interpolatedValue[0] +
+                    reorientation.m[1][1] * interpolatedValue[1] +
+                    reorientation.m[2][1] * interpolatedValue[2];
+                if (voxelPtrZ != nullptr)
+                    reorientedValue[2] =
+                    reorientation.m[0][2] * interpolatedValue[0] +
+                    reorientation.m[1][2] * interpolatedValue[1] +
+                    reorientation.m[2][2] * interpolatedValue[2];
+                if (update) {
+                    *nodePtrX += reorientedValue[0] * static_cast<DataType>(weight);
+                    *nodePtrY += reorientedValue[1] * static_cast<DataType>(weight);
+                    if (voxelPtrZ != nullptr)
+                        *nodePtrZ += reorientedValue[2] * static_cast<DataType>(weight);
+                } else {
+                    *nodePtrX = reorientedValue[0] * static_cast<DataType>(weight);
+                    *nodePtrY = reorientedValue[1] * static_cast<DataType>(weight);
+                    if (voxelPtrZ != nullptr)
+                        *nodePtrZ = reorientedValue[2] * static_cast<DataType>(weight);
+                }
+                ++nodePtrX;
+                ++nodePtrY;
+                if (voxelPtrZ != nullptr)
+                    ++nodePtrZ;
+            } // loop over
+        } // loop over y
+    } // loop over z
 }
 /* *************************************************************** */
 extern "C++"
-void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
-                                  nifti_image *voxelImage,
+void reg_voxelCentric2NodeCentric(nifti_image * nodeImage,
+                                  nifti_image * voxelImage,
                                   float weight,
                                   bool update,
-                                  const mat44 *voxelToMillimetre)
-{
-   if(nodeImage->datatype!=voxelImage->datatype)
-   {
-      reg_print_fct_error("reg_voxelCentric2NodeCentric");
-      reg_print_msg_error("Both input images do not have the same type");
-      reg_exit();
-   }
-
-   switch(nodeImage->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_voxelCentric2NodeCentric_core<float>
-            (nodeImage, voxelImage, weight, update, voxelToMillimetre);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_voxelCentric2NodeCentric_core<double>
-            (nodeImage, voxelImage, weight, update, voxelToMillimetre);
-      break;
-   default:
-      reg_print_fct_error("reg_voxelCentric2NodeCentric");
-      reg_print_msg_error("Data type not supported");
-      reg_exit();
-   }
+                                  const mat44 * voxelToMillimetre) {
+    if (nodeImage->datatype != voxelImage->datatype) {
+        reg_print_fct_error("reg_voxelCentric2NodeCentric");
+        reg_print_msg_error("Both input images do not have the same type");
+        reg_exit();
+    }
+
+    switch (nodeImage->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_voxelCentric2NodeCentric<float>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_voxelCentric2NodeCentric<double>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
+        break;
+    default:
+        reg_print_fct_error("reg_voxelCentric2NodeCentric");
+        reg_print_msg_error("Data type not supported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class SplineTYPE>
-SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z)
-{
-   if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3])
-      return 0;
-   return array[(z*dim[2]+y)*dim[1]+x];
+SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) {
+    if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3])
+        return 0;
+    return array[(z * dim[2] + y) * dim[1] + x];
 }
 /* *************************************************************** */
 template<class SplineTYPE>
-void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value)
-{
-   if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3])
-      return;
-   array[(z*dim[2]+y)*dim[1]+x] = value;
+void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value) {
+    if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3])
+        return;
+    array[(z * dim[2] + y) * dim[1] + x] = value;
 }
 /* *************************************************************** */
 template<class SplineTYPE>
 void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
-                                         nifti_image *referenceImage)
-{
-   // The input grid is first saved
-   SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper);
-   SplineTYPE *gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper);
-   if(splineControlPoint->data!=nullptr) free(splineControlPoint->data);
-   int oldDim[4];
-   oldDim[0]=splineControlPoint->dim[0];
-   oldDim[1]=splineControlPoint->dim[1];
-   oldDim[2]=splineControlPoint->dim[2];
-   oldDim[3]=splineControlPoint->dim[3];
-
-   splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
-   splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
-   splineControlPoint->dz = 1.0f;
-   if(referenceImage!=nullptr)
-   {
-      splineControlPoint->dim[1]=splineControlPoint->nx=static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f);
-      splineControlPoint->dim[2]=splineControlPoint->ny=static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f);
-   }
-   else
-   {
-      splineControlPoint->dim[1]=splineControlPoint->nx=(oldDim[1]-3)*2+3;
-      splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3;
-   }
-   splineControlPoint->dim[3]=splineControlPoint->nz=1;
-
-   splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
-   splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
-   gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)];
-   SplineTYPE *oldGridPtrX = &oldGrid[0];
-   SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]];
-
-   for(int y=0; y<oldDim[2]; y++)
-   {
-      int Y=2*y-1;
-      if(Y<splineControlPoint->ny)
-      {
-         for(int x=0; x<oldDim[1]; x++)
-         {
-            int X=2*x-1;
-            if(X<splineControlPoint->nx)
-            {
-
-               /* X Axis */
-               // 0 0
-               SetValue(gridPtrX, splineControlPoint->dim, X, Y, 0,
-                        (GetValue(oldGridPtrX,oldDim,x-1,y-1,0) + GetValue(oldGridPtrX,oldDim,x+1,y-1,0) +
-                         GetValue(oldGridPtrX,oldDim,x-1,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0)
-                         + 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) +
-                                   GetValue(oldGridPtrX,oldDim,x,y-1,0) + GetValue(oldGridPtrX,oldDim,x,y+1,0) )
-                         + 36.0f * GetValue(oldGridPtrX,oldDim,x,y,0) ) / 64.0f);
-               // 1 0
-               SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, 0,
-                        (GetValue(oldGridPtrX,oldDim,x,y-1,0) + GetValue(oldGridPtrX,oldDim,x+1,y-1,0) +
-                         GetValue(oldGridPtrX,oldDim,x,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0)
-                         + 6.0f * ( GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) ) ) / 16.0f);
-               // 0 1
-               SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, 0,
-                        (GetValue(oldGridPtrX,oldDim,x-1,y,0) + GetValue(oldGridPtrX,oldDim,x-1,y+1,0) +
-                         GetValue(oldGridPtrX,oldDim,x+1,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0)
-                         + 6.0f * ( GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x,y+1,0) ) ) / 16.0f);
-               // 1 1
-               SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, 0,
-                        (GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) +
-                         GetValue(oldGridPtrX,oldDim,x,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0) ) / 4.0f);
-
-               /* Y Axis */
-               // 0 0
-               SetValue(gridPtrY, splineControlPoint->dim, X, Y, 0,
-                        (GetValue(oldGridPtrY,oldDim,x-1,y-1,0) + GetValue(oldGridPtrY,oldDim,x+1,y-1,0) +
-                         GetValue(oldGridPtrY,oldDim,x-1,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0)
-                         + 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) +
-                                   GetValue(oldGridPtrY,oldDim,x,y-1,0) + GetValue(oldGridPtrY,oldDim,x,y+1,0) )
-                         + 36.0f * GetValue(oldGridPtrY,oldDim,x,y,0) ) / 64.0f);
-               // 1 0
-               SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, 0,
-                        (GetValue(oldGridPtrY,oldDim,x,y-1,0) + GetValue(oldGridPtrY,oldDim,x+1,y-1,0) +
-                         GetValue(oldGridPtrY,oldDim,x,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0)
-                         + 6.0f * ( GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) ) ) / 16.0f);
-               // 0 1
-               SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, 0,
-                        (GetValue(oldGridPtrY,oldDim,x-1,y,0) + GetValue(oldGridPtrY,oldDim,x-1,y+1,0) +
-                         GetValue(oldGridPtrY,oldDim,x+1,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0)
-                         + 6.0f * ( GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x,y+1,0) ) ) / 16.0f);
-               // 1 1
-               SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, 0,
-                        (GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) +
-                         GetValue(oldGridPtrY,oldDim,x,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0) ) / 4.0f);
-
+                                         nifti_image *referenceImage) {
+    // The input grid is first saved
+    SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper);
+    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper);
+    if (splineControlPoint->data != nullptr) free(splineControlPoint->data);
+    int oldDim[4];
+    oldDim[0] = splineControlPoint->dim[0];
+    oldDim[1] = splineControlPoint->dim[1];
+    oldDim[2] = splineControlPoint->dim[2];
+    oldDim[3] = splineControlPoint->dim[3];
+
+    splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
+    splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
+    splineControlPoint->dz = 1.0f;
+    if (referenceImage != nullptr) {
+        splineControlPoint->dim[1] = splineControlPoint->nx = static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f);
+    } else {
+        splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
+        splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
+    }
+    splineControlPoint->dim[3] = splineControlPoint->nz = 1;
+
+    splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim);
+    splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
+    gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *gridPtrY = &gridPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)];
+    SplineTYPE *oldGridPtrX = &oldGrid[0];
+    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]];
+
+    for (int y = 0; y < oldDim[2]; y++) {
+        int Y = 2 * y - 1;
+        if (Y < splineControlPoint->ny) {
+            for (int x = 0; x < oldDim[1]; x++) {
+                int X = 2 * x - 1;
+                if (X < splineControlPoint->nx) {
+
+                    /* X Axis */
+                    // 0 0
+                    SetValue(gridPtrX, splineControlPoint->dim, X, Y, 0,
+                             (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, 0) +
+                              GetValue(oldGridPtrX, oldDim, x - 1, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0) +
+                                        GetValue(oldGridPtrX, oldDim, x, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x, y + 1, 0))
+                              + 36.0f * GetValue(oldGridPtrX, oldDim, x, y, 0)) / 64.0f);
+                    // 1 0
+                    SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, 0,
+                             (GetValue(oldGridPtrX, oldDim, x, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, 0) +
+                              GetValue(oldGridPtrX, oldDim, x, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0))) / 16.0f);
+                    // 0 1
+                    SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, 0,
+                             (GetValue(oldGridPtrX, oldDim, x - 1, y, 0) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, 0) +
+                              GetValue(oldGridPtrX, oldDim, x + 1, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x, y + 1, 0))) / 16.0f);
+                    // 1 1
+                    SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, 0,
+                             (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0) +
+                              GetValue(oldGridPtrX, oldDim, x, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0)) / 4.0f);
+
+                    /* Y Axis */
+                    // 0 0
+                    SetValue(gridPtrY, splineControlPoint->dim, X, Y, 0,
+                             (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, 0) +
+                              GetValue(oldGridPtrY, oldDim, x - 1, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0) +
+                                        GetValue(oldGridPtrY, oldDim, x, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x, y + 1, 0))
+                              + 36.0f * GetValue(oldGridPtrY, oldDim, x, y, 0)) / 64.0f);
+                    // 1 0
+                    SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, 0,
+                             (GetValue(oldGridPtrY, oldDim, x, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, 0) +
+                              GetValue(oldGridPtrY, oldDim, x, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0))) / 16.0f);
+                    // 0 1
+                    SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, 0,
+                             (GetValue(oldGridPtrY, oldDim, x - 1, y, 0) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, 0) +
+                              GetValue(oldGridPtrY, oldDim, x + 1, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0)
+                              + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x, y + 1, 0))) / 16.0f);
+                    // 1 1
+                    SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, 0,
+                             (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0) +
+                              GetValue(oldGridPtrY, oldDim, x, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0)) / 4.0f);
+
+                }
             }
-         }
-      }
-   }
+        }
+    }
 
-   free(oldGrid);
+    free(oldGrid);
 }
 /* *************************************************************** */
 template<class SplineTYPE>
-void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage)
-{
-
-   // The input grid is first saved
-   SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper);
-   SplineTYPE *gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper);
-   if(splineControlPoint->data!=nullptr) free(splineControlPoint->data);
-   int oldDim[4];
-   oldDim[0]=splineControlPoint->dim[0];
-   oldDim[1]=splineControlPoint->dim[1];
-   oldDim[2]=splineControlPoint->dim[2];
-   oldDim[3]=splineControlPoint->dim[3];
-
-   splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
-   splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
-   splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f;
-
-   if(referenceImage!=nullptr)
-   {
-      splineControlPoint->dim[1]=splineControlPoint->nx=static_cast<int>(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f);
-      splineControlPoint->dim[2]=splineControlPoint->ny=static_cast<int>(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f);
-      splineControlPoint->dim[3]=splineControlPoint->nz=static_cast<int>(reg_ceil(referenceImage->nz*referenceImage->dz/splineControlPoint->dz)+3.f);
-   }
-   else
-   {
-      splineControlPoint->dim[1]=splineControlPoint->nx=(oldDim[1]-3)*2+3;
-      splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3;
-      splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3;
-   }
-   splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim);
-   splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
-
-   const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint);
-   gridPtrX = static_cast<SplineTYPE *>(splineControlPoint->data);
-   SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber];
-   SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber];
-   SplineTYPE *oldGridPtrX = &oldGrid[0];
-   SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]*oldDim[3]];
-   SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1]*oldDim[2]*oldDim[3]];
-
-   for(int z=0; z<oldDim[3]; z++)
-   {
-      int Z=2*z-1;
-      if(Z<splineControlPoint->nz)
-      {
-         for(int y=0; y<oldDim[2]; y++)
-         {
-            int Y=2*y-1;
-            if(Y<splineControlPoint->ny)
-            {
-               for(int x=0; x<oldDim[1]; x++)
-               {
-                  int X=2*x-1;
-                  if(X<splineControlPoint->nx)
-                  {
-
-                     /* X Axis */
-                     // 0 0 0
-                     SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z,
-                              (GetValue(oldGridPtrX,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z-1) +
-                               GetValue(oldGridPtrX,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrX,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1)+
-                               GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1)
-                               + 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y-1,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) +
-                                         GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                                         GetValue(oldGridPtrX,oldDim,x-1,y,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y,z+1) +
-                                         GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) +
-                                         GetValue(oldGridPtrX,oldDim,x,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x,y-1,z+1) +
-                                         GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) )
-                               + 36.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                                          GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) +
-                                          GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) )
-                               + 216.0f * GetValue(oldGridPtrX,oldDim,x,y,z) ) / 512.0f);
-
-                     // 1 0 0
-                     SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, Z,
-                              ( GetValue(oldGridPtrX,oldDim,x,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x,y-1,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1)) +
-                                36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z)) ) / 128.0f);
-
-                     // 0 1 0
-                     SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, Z,
-                              ( GetValue(oldGridPtrX,oldDim,x-1,y,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrX,oldDim,x-1,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z)) ) / 128.0f);
-
-                     // 1 1 0
-                     SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, Z,
-                              (GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z-1) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) ) ) / 32.0f);
-
-                     // 0 0 1
-                     SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z+1,
-                              ( GetValue(oldGridPtrX,oldDim,x-1,y-1,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                                GetValue(oldGridPtrX,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) +
-                                        GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y,z+1)) ) / 128.0f);
-
-                     // 1 0 1
-                     SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, Z+1,
-                              (GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z) +
-                               GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) ) ) / 32.0f);
-
-                     // 0 1 1
-                     SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, Z+1,
-                              (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) +
-                               GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) +
-                               GetValue(oldGridPtrX,oldDim,x+1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) +
-                                       GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) ) ) / 32.0f);
-
-                     // 1 1 1
-                     SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, Z+1,
-                              (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1)) / 8.0f);
-
-
-                     /* Y Axis */
-                     // 0 0 0
-                     SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z,
-                              (GetValue(oldGridPtrY,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z-1) +
-                               GetValue(oldGridPtrY,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrY,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1)+
-                               GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1)
-                               + 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y-1,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) +
-                                         GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                                         GetValue(oldGridPtrY,oldDim,x-1,y,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y,z+1) +
-                                         GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) +
-                                         GetValue(oldGridPtrY,oldDim,x,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x,y-1,z+1) +
-                                         GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) )
-                               + 36.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                                          GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) +
-                                          GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) )
-                               + 216.0f * GetValue(oldGridPtrY,oldDim,x,y,z) ) / 512.0f);
-
-                     // 1 0 0
-                     SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, Z,
-                              ( GetValue(oldGridPtrY,oldDim,x,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x,y-1,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1)) +
-                                36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z)) ) / 128.0f);
-
-                     // 0 1 0
-                     SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, Z,
-                              ( GetValue(oldGridPtrY,oldDim,x-1,y,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrY,oldDim,x-1,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z)) ) / 128.0f);
-
-                     // 1 1 0
-                     SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, Z,
-                              (GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z-1) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) ) ) / 32.0f);
-
-                     // 0 0 1
-                     SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z+1,
-                              ( GetValue(oldGridPtrY,oldDim,x-1,y-1,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                                GetValue(oldGridPtrY,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) +
-                                        GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y,z+1)) ) / 128.0f);
-
-                     // 1 0 1
-                     SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, Z+1,
-                              (GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z) +
-                               GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) ) ) / 32.0f);
-
-                     // 0 1 1
-                     SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, Z+1,
-                              (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) +
-                               GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) +
-                               GetValue(oldGridPtrY,oldDim,x+1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) +
-                                       GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) ) ) / 32.0f);
-
-                     // 1 1 1
-                     SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, Z+1,
-                              (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1)) / 8.0f);
-
-                     /* Z Axis */
-                     // 0 0 0
-                     SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z,
-                              (GetValue(oldGridPtrZ,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z-1) +
-                               GetValue(oldGridPtrZ,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrZ,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1)+
-                               GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1)
-                               + 6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) +
-                                         GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                                         GetValue(oldGridPtrZ,oldDim,x-1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) +
-                                         GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) +
-                                         GetValue(oldGridPtrZ,oldDim,x,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) +
-                                         GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) )
-                               + 36.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                                          GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) +
-                                          GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) )
-                               + 216.0f * GetValue(oldGridPtrZ,oldDim,x,y,z) ) / 512.0f);
-
-                     // 1 0 0
-                     SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y, Z,
-                              ( GetValue(oldGridPtrZ,oldDim,x,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1)) +
-                                36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z)) ) / 128.0f);
-
-                     // 0 1 0
-                     SetValue(gridPtrZ, splineControlPoint->dim, X, Y+1, Z,
-                              ( GetValue(oldGridPtrZ,oldDim,x-1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) +
-                                        GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z)) ) / 128.0f);
-
-                     // 1 1 0
-                     SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y+1, Z,
-                              (GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) +
-                               GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) ) ) / 32.0f);
-
-                     // 0 0 1
-                     SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z+1,
-                              ( GetValue(oldGridPtrZ,oldDim,x-1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                                GetValue(oldGridPtrZ,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) +
-                                GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                                6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) +
-                                        GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) +
-                                        GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1)) +
-                                36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y,z+1)) ) / 128.0f);
-
-                     // 1 0 1
-                     SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y, Z+1,
-                              (GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) +
-                               GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                                       GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) ) ) / 32.0f);
-
-                     // 0 1 1
-                     SetValue(gridPtrZ, splineControlPoint->dim, X, Y+1, Z+1,
-                              (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) +
-                               GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) +
-                               GetValue(oldGridPtrZ,oldDim,x+1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) +
-                               6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) +
-                                       GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) ) ) / 32.0f);
-
-                     // 1 1 1
-                     SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y+1, Z+1,
-                              (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) +
-                               GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) +
-                               GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1)) / 8.0f);
-                  }
-               }
+void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage) {
+    // The input grid is first saved
+    SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper);
+    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper);
+    if (splineControlPoint->data != nullptr) free(splineControlPoint->data);
+    int oldDim[4];
+    oldDim[0] = splineControlPoint->dim[0];
+    oldDim[1] = splineControlPoint->dim[1];
+    oldDim[2] = splineControlPoint->dim[2];
+    oldDim[3] = splineControlPoint->dim[3];
+
+    splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
+    splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
+    splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f;
+
+    if (referenceImage != nullptr) {
+        splineControlPoint->dim[1] = splineControlPoint->nx = static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f);
+        splineControlPoint->dim[3] = splineControlPoint->nz = static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz) + 3.f);
+    } else {
+        splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
+        splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
+        splineControlPoint->dim[3] = splineControlPoint->nz = (oldDim[3] - 3) * 2 + 3;
+    }
+    splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim);
+    splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
+
+    const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
+    gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber];
+    SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber];
+    SplineTYPE *oldGridPtrX = &oldGrid[0];
+    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]];
+    SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]];
+
+    for (int z = 0; z < oldDim[3]; z++) {
+        int Z = 2 * z - 1;
+        if (Z < splineControlPoint->nz) {
+            for (int y = 0; y < oldDim[2]; y++) {
+                int Y = 2 * y - 1;
+                if (Y < splineControlPoint->ny) {
+                    for (int x = 0; x < oldDim[1]; x++) {
+                        int X = 2 * x - 1;
+                        if (X < splineControlPoint->nx) {
+
+                            /* X Axis */
+                            // 0 0 0
+                            SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z,
+                                     (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z - 1) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1)
+                                      + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) +
+                                                GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                                GetValue(oldGridPtrX, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) +
+                                                GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) +
+                                                GetValue(oldGridPtrX, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) +
+                                                GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1))
+                                      + 36.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                                 GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) +
+                                                 GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1))
+                                      + 216.0f * GetValue(oldGridPtrX, oldDim, x, y, z)) / 512.0f);
+
+                            // 1 0 0
+                            SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, Z,
+                                     (GetValue(oldGridPtrX, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z))) / 128.0f);
+
+                            // 0 1 0
+                            SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, Z,
+                                     (GetValue(oldGridPtrX, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z))) / 128.0f);
+
+                            // 1 1 0
+                            SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, Z,
+                                     (GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z))) / 32.0f);
+
+                            // 0 0 1
+                            SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z + 1,
+                                     (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) +
+                                              GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y, z + 1))) / 128.0f);
+
+                            // 1 0 1
+                            SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, Z + 1,
+                                     (GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1))) / 32.0f);
+
+                            // 0 1 1
+                            SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1))) / 32.0f);
+
+                            // 1 1 1
+                            SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1)) / 8.0f);
+
+
+                            /* Y Axis */
+                            // 0 0 0
+                            SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z,
+                                     (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z - 1) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1)
+                                      + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) +
+                                                GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                                GetValue(oldGridPtrY, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) +
+                                                GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) +
+                                                GetValue(oldGridPtrY, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) +
+                                                GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1))
+                                      + 36.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                                 GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) +
+                                                 GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1))
+                                      + 216.0f * GetValue(oldGridPtrY, oldDim, x, y, z)) / 512.0f);
+
+                            // 1 0 0
+                            SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, Z,
+                                     (GetValue(oldGridPtrY, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z))) / 128.0f);
+
+                            // 0 1 0
+                            SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, Z,
+                                     (GetValue(oldGridPtrY, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z))) / 128.0f);
+
+                            // 1 1 0
+                            SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, Z,
+                                     (GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z))) / 32.0f);
+
+                            // 0 0 1
+                            SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z + 1,
+                                     (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) +
+                                              GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y, z + 1))) / 128.0f);
+
+                            // 1 0 1
+                            SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, Z + 1,
+                                     (GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1))) / 32.0f);
+
+                            // 0 1 1
+                            SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1))) / 32.0f);
+
+                            // 1 1 1
+                            SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1)) / 8.0f);
+
+                            /* Z Axis */
+                            // 0 0 0
+                            SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z,
+                                     (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z - 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1)
+                                      + 6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) +
+                                                GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                                GetValue(oldGridPtrZ, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) +
+                                                GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) +
+                                                GetValue(oldGridPtrZ, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) +
+                                                GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1))
+                                      + 36.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                                 GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) +
+                                                 GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1))
+                                      + 216.0f * GetValue(oldGridPtrZ, oldDim, x, y, z)) / 512.0f);
+
+                            // 1 0 0
+                            SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y, Z,
+                                     (GetValue(oldGridPtrZ, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z))) / 128.0f);
+
+                            // 0 1 0
+                            SetValue(gridPtrZ, splineControlPoint->dim, X, Y + 1, Z,
+                                     (GetValue(oldGridPtrZ, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) +
+                                              GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z))) / 128.0f);
+
+                            // 1 1 0
+                            SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y + 1, Z,
+                                     (GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z))) / 32.0f);
+
+                            // 0 0 1
+                            SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z + 1,
+                                     (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1)) +
+                                      36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1))) / 128.0f);
+
+                            // 1 0 1
+                            SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y, Z + 1,
+                                     (GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1))) / 32.0f);
+
+                            // 0 1 1
+                            SetValue(gridPtrZ, splineControlPoint->dim, X, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) +
+                                      6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) +
+                                              GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1))) / 32.0f);
+
+                            // 1 1 1
+                            SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y + 1, Z + 1,
+                                     (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) +
+                                      GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1)) / 8.0f);
+                        }
+                    }
+                }
             }
-         }
-      }
-   }
-   free(oldGrid);
+        }
+    }
+    free(oldGrid);
 }
 /* *************************************************************** */
 extern "C++"
-void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
-                                       nifti_image *referenceImage)
-{
+void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid,
+                                       nifti_image * referenceImage) {
 #ifndef NDEBUG
-   reg_print_msg_debug("Starting the refine the control point grid");
+    reg_print_msg_debug("Starting the refine the control point grid");
 #endif
-   if(controlPointGrid->nz==1)
-   {
-      switch(controlPointGrid->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_refineControlPointGrid2D<float>(controlPointGrid,referenceImage);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_refineControlPointGrid2D<double>(controlPointGrid,referenceImage);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_refineControlPointGrid");
-         reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
-         reg_exit();
-      }
-   }
-   else
-   {
-      switch(controlPointGrid->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_refineControlPointGrid3D<float>(controlPointGrid,referenceImage);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_refineControlPointGrid3D<double>(controlPointGrid,referenceImage);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_refineControlPointGrid");
-         reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
-         reg_exit();
-      }
-   }
-   if(referenceImage!=nullptr)
-   {
-      // Compute the new control point header
-      // The qform (and sform) are set for the control point position image
-      controlPointGrid->quatern_b=referenceImage->quatern_b;
-      controlPointGrid->quatern_c=referenceImage->quatern_c;
-      controlPointGrid->quatern_d=referenceImage->quatern_d;
-      controlPointGrid->qoffset_x=referenceImage->qoffset_x;
-      controlPointGrid->qoffset_y=referenceImage->qoffset_y;
-      controlPointGrid->qoffset_z=referenceImage->qoffset_z;
-      controlPointGrid->qfac=referenceImage->qfac;
-      controlPointGrid->qto_xyz = nifti_quatern_to_mat44(controlPointGrid->quatern_b,
-                                                         controlPointGrid->quatern_c,
-                                                         controlPointGrid->quatern_d,
-                                                         controlPointGrid->qoffset_x,
-                                                         controlPointGrid->qoffset_y,
-                                                         controlPointGrid->qoffset_z,
-                                                         controlPointGrid->dx,
-                                                         controlPointGrid->dy,
-                                                         controlPointGrid->dz,
-                                                         controlPointGrid->qfac);
-
-      // Origin is shifted from 1 control point in the qform
-      float originIndex[3];
-      float originReal[3];
-      originIndex[0] = -1.0f;
-      originIndex[1] = -1.0f;
-      originIndex[2] = 0.0f;
-      if(referenceImage->nz>1) originIndex[2] = -1.0f;
-      reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal);
-      if(controlPointGrid->qform_code==0 && controlPointGrid->sform_code==0)
-         controlPointGrid->qform_code=1;
-      controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0];
-      controlPointGrid->qto_xyz.m[1][3] = controlPointGrid->qoffset_y = originReal[1];
-      controlPointGrid->qto_xyz.m[2][3] = controlPointGrid->qoffset_z = originReal[2];
-
-      controlPointGrid->qto_ijk = nifti_mat44_inverse(controlPointGrid->qto_xyz);
-
-      if(controlPointGrid->sform_code>0)
-      {
-         float scalingRatio[3];
-         scalingRatio[0]= controlPointGrid->dx / referenceImage->dx;
-         scalingRatio[1]= controlPointGrid->dy / referenceImage->dy;
-         scalingRatio[2] = 1.f;
-         if(controlPointGrid->nz>1)
-            scalingRatio[2]= controlPointGrid->dz / referenceImage->dz;
-
-         controlPointGrid->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
-         controlPointGrid->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
-         controlPointGrid->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
-         controlPointGrid->sto_xyz.m[3][0]=0.f;
-         controlPointGrid->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
-         controlPointGrid->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
-         controlPointGrid->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
-         controlPointGrid->sto_xyz.m[3][1]=0.f;
-         controlPointGrid->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
-         controlPointGrid->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
-         controlPointGrid->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
-         controlPointGrid->sto_xyz.m[3][2]=0.f;
-         controlPointGrid->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3];
-         controlPointGrid->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3];
-         controlPointGrid->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3];
-         controlPointGrid->sto_xyz.m[3][3]=1.f;
-
-         // The origin is shifted by one compare to the reference image
-         float originIndex[3];
-         originIndex[0]=originIndex[1]=originIndex[2]=-1;
-         if(referenceImage->nz<=1) originIndex[2]=0;
-         reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal);
-         controlPointGrid->sto_xyz.m[0][3] = originReal[0];
-         controlPointGrid->sto_xyz.m[1][3] = originReal[1];
-         controlPointGrid->sto_xyz.m[2][3] = originReal[2];
-         controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz);
-      }
-   }
-   else
-   {
-      // The voxel spacing is reduced by two
-      for(unsigned i=0; i<3; ++i)
-      {
-         controlPointGrid->sto_xyz.m[0][i] /= 2.f;
-         controlPointGrid->sto_xyz.m[1][i] /= 2.f;
-         if(controlPointGrid->nz>1)
-            controlPointGrid->sto_xyz.m[2][i] /= 2.f;
-      }
-      // The origin is shifted by one node when compared to the previous origin
-      float nodeCoord[3]= {1,1,1};
-      float newOrigin[3];
-      reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin);
-      controlPointGrid->sto_xyz.m[0][3]=newOrigin[0];
-      controlPointGrid->sto_xyz.m[1][3]=newOrigin[1];
-      if(controlPointGrid->nz>1)
-         controlPointGrid->sto_xyz.m[2][3]=newOrigin[2];
-      controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz);
-   }
+    if (controlPointGrid->nz == 1) {
+        switch (controlPointGrid->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_refineControlPointGrid2D<float>(controlPointGrid, referenceImage);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_refineControlPointGrid2D<double>(controlPointGrid, referenceImage);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_refineControlPointGrid");
+            reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
+            reg_exit();
+        }
+    } else {
+        switch (controlPointGrid->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_refineControlPointGrid3D<float>(controlPointGrid, referenceImage);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_refineControlPointGrid3D<double>(controlPointGrid, referenceImage);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_refineControlPointGrid");
+            reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
+            reg_exit();
+        }
+    }
+    if (referenceImage != nullptr) {
+        // Compute the new control point header
+        // The qform (and sform) are set for the control point position image
+        controlPointGrid->quatern_b = referenceImage->quatern_b;
+        controlPointGrid->quatern_c = referenceImage->quatern_c;
+        controlPointGrid->quatern_d = referenceImage->quatern_d;
+        controlPointGrid->qoffset_x = referenceImage->qoffset_x;
+        controlPointGrid->qoffset_y = referenceImage->qoffset_y;
+        controlPointGrid->qoffset_z = referenceImage->qoffset_z;
+        controlPointGrid->qfac = referenceImage->qfac;
+        controlPointGrid->qto_xyz = nifti_quatern_to_mat44(controlPointGrid->quatern_b,
+                                                           controlPointGrid->quatern_c,
+                                                           controlPointGrid->quatern_d,
+                                                           controlPointGrid->qoffset_x,
+                                                           controlPointGrid->qoffset_y,
+                                                           controlPointGrid->qoffset_z,
+                                                           controlPointGrid->dx,
+                                                           controlPointGrid->dy,
+                                                           controlPointGrid->dz,
+                                                           controlPointGrid->qfac);
+
+        // Origin is shifted from 1 control point in the qform
+        float originIndex[3];
+        float originReal[3];
+        originIndex[0] = -1.0f;
+        originIndex[1] = -1.0f;
+        originIndex[2] = 0.0f;
+        if (referenceImage->nz > 1) originIndex[2] = -1.0f;
+        reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal);
+        if (controlPointGrid->qform_code == 0 && controlPointGrid->sform_code == 0)
+            controlPointGrid->qform_code = 1;
+        controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0];
+        controlPointGrid->qto_xyz.m[1][3] = controlPointGrid->qoffset_y = originReal[1];
+        controlPointGrid->qto_xyz.m[2][3] = controlPointGrid->qoffset_z = originReal[2];
+
+        controlPointGrid->qto_ijk = nifti_mat44_inverse(controlPointGrid->qto_xyz);
+
+        if (controlPointGrid->sform_code > 0) {
+            float scalingRatio[3];
+            scalingRatio[0] = controlPointGrid->dx / referenceImage->dx;
+            scalingRatio[1] = controlPointGrid->dy / referenceImage->dy;
+            scalingRatio[2] = 1.f;
+            if (controlPointGrid->nz > 1)
+                scalingRatio[2] = controlPointGrid->dz / referenceImage->dz;
+
+            controlPointGrid->sto_xyz.m[0][0] = referenceImage->sto_xyz.m[0][0] * scalingRatio[0];
+            controlPointGrid->sto_xyz.m[1][0] = referenceImage->sto_xyz.m[1][0] * scalingRatio[0];
+            controlPointGrid->sto_xyz.m[2][0] = referenceImage->sto_xyz.m[2][0] * scalingRatio[0];
+            controlPointGrid->sto_xyz.m[3][0] = 0.f;
+            controlPointGrid->sto_xyz.m[0][1] = referenceImage->sto_xyz.m[0][1] * scalingRatio[1];
+            controlPointGrid->sto_xyz.m[1][1] = referenceImage->sto_xyz.m[1][1] * scalingRatio[1];
+            controlPointGrid->sto_xyz.m[2][1] = referenceImage->sto_xyz.m[2][1] * scalingRatio[1];
+            controlPointGrid->sto_xyz.m[3][1] = 0.f;
+            controlPointGrid->sto_xyz.m[0][2] = referenceImage->sto_xyz.m[0][2] * scalingRatio[2];
+            controlPointGrid->sto_xyz.m[1][2] = referenceImage->sto_xyz.m[1][2] * scalingRatio[2];
+            controlPointGrid->sto_xyz.m[2][2] = referenceImage->sto_xyz.m[2][2] * scalingRatio[2];
+            controlPointGrid->sto_xyz.m[3][2] = 0.f;
+            controlPointGrid->sto_xyz.m[0][3] = referenceImage->sto_xyz.m[0][3];
+            controlPointGrid->sto_xyz.m[1][3] = referenceImage->sto_xyz.m[1][3];
+            controlPointGrid->sto_xyz.m[2][3] = referenceImage->sto_xyz.m[2][3];
+            controlPointGrid->sto_xyz.m[3][3] = 1.f;
+
+            // The origin is shifted by one compare to the reference image
+            float originIndex[3];
+            originIndex[0] = originIndex[1] = originIndex[2] = -1;
+            if (referenceImage->nz <= 1) originIndex[2] = 0;
+            reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal);
+            controlPointGrid->sto_xyz.m[0][3] = originReal[0];
+            controlPointGrid->sto_xyz.m[1][3] = originReal[1];
+            controlPointGrid->sto_xyz.m[2][3] = originReal[2];
+            controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz);
+        }
+    } else {
+        // The voxel spacing is reduced by two
+        for (unsigned i = 0; i < 3; ++i) {
+            controlPointGrid->sto_xyz.m[0][i] /= 2.f;
+            controlPointGrid->sto_xyz.m[1][i] /= 2.f;
+            if (controlPointGrid->nz > 1)
+                controlPointGrid->sto_xyz.m[2][i] /= 2.f;
+        }
+        // The origin is shifted by one node when compared to the previous origin
+        float nodeCoord[3] = { 1, 1, 1 };
+        float newOrigin[3];
+        reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin);
+        controlPointGrid->sto_xyz.m[0][3] = newOrigin[0];
+        controlPointGrid->sto_xyz.m[1][3] = newOrigin[1];
+        if (controlPointGrid->nz > 1)
+            controlPointGrid->sto_xyz.m[2][3] = newOrigin[2];
+        controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz);
+    }
 #ifndef NDEBUG
-   reg_print_msg_debug("The control point grid has been refined");
+    reg_print_msg_debug("The control point grid has been refined");
 #endif
-   return;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_defField_compose2D(nifti_image *deformationField,
                             nifti_image *dfToUpdate,
-                            int *mask)
-{
-   const size_t DFVoxelNumber = CalcVoxelNumber(*deformationField, 2);
+                            int *mask) {
+    const size_t DFVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
 #ifdef _WIN32
-   long i;
-   const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate, 2);
+    long i;
+    const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 2);
 #else
-   size_t i;
-   const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2);
+    size_t i;
+    const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 2);
 #endif
-   DataType *defPtrX = static_cast<DataType *>(deformationField->data);
-   DataType *defPtrY = &defPtrX[DFVoxelNumber];
-
-   DataType *resPtrX = static_cast<DataType *>(dfToUpdate->data);
-   DataType *resPtrY = &resPtrX[warVoxelNumber];
-
-   mat44 *df_real2Voxel=nullptr;
-   mat44 *df_voxel2Real=nullptr;
-   if(deformationField->sform_code>0)
-   {
-      df_real2Voxel=&(dfToUpdate->sto_ijk);
-      df_voxel2Real=&(deformationField->sto_xyz);
-   }
-   else
-   {
-      df_real2Voxel=&(dfToUpdate->qto_ijk);
-      df_voxel2Real=&(deformationField->qto_xyz);
-   }
-
-   size_t index;
-   int a, b, pre[2];
-   DataType realDefX, realDefY, voxelX, voxelY;
-   DataType defX, defY, relX[2], relY[2], basis;
+    DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    DataType *defPtrY = &defPtrX[DFVoxelNumber];
+
+    DataType *resPtrX = static_cast<DataType*>(dfToUpdate->data);
+    DataType *resPtrY = &resPtrX[warVoxelNumber];
+
+    const mat44 *df_real2Voxel;
+    mat44 *df_voxel2Real;
+    if (deformationField->sform_code > 0) {
+        df_real2Voxel = &dfToUpdate->sto_ijk;
+        df_voxel2Real = &deformationField->sto_xyz;
+    } else {
+        df_real2Voxel = &dfToUpdate->qto_ijk;
+        df_voxel2Real = &deformationField->qto_xyz;
+    }
+
+    size_t index;
+    int a, b, pre[2];
+    DataType realDefX, realDefY, voxelX, voxelY;
+    DataType defX, defY, relX[2], relY[2], basis;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \
    deformationField, defPtrX, defPtrY, resPtrX, resPtrY) \
-   private(i, a, b, index, pre,realDefX, realDefY, voxelX, voxelY, \
+   private(a, b, index, pre,realDefX, realDefY, voxelX, voxelY, \
    defX, defY, relX, relY, basis)
 #endif
-   for(i=0; i<warVoxelNumber; ++i)
-   {
-      if(mask[i]>-1)
-      {
-         realDefX = resPtrX[i];
-         realDefY = resPtrY[i];
-
-         // Conversion from real to voxel in the deformation field
-         voxelX = realDefX * df_real2Voxel->m[0][0]
-               + realDefY * df_real2Voxel->m[0][1]
-               + df_real2Voxel->m[0][3];
-         voxelY = realDefX * df_real2Voxel->m[1][0]
-               + realDefY * df_real2Voxel->m[1][1]
-               + df_real2Voxel->m[1][3];
-
-         // Linear interpolation to compute the new deformation
-         pre[0]=(int)reg_floor(voxelX);
-         pre[1]=(int)reg_floor(voxelY);
-         relX[1]=voxelX-(DataType)pre[0];
-         relX[0]=1.f-relX[1];
-         relY[1]=voxelY-(DataType)pre[1];
-         relY[0]=1.f-relY[1];
-         realDefX=realDefY=0.f;
-         for(b=0; b<2; ++b)
-         {
-            for(a=0; a<2; ++a)
-            {
-               basis = relX[a] * relY[b];
-               if(pre[0]+a>-1 && pre[0]+a<deformationField->nx &&
-                     pre[1]+b>-1 && pre[1]+b<deformationField->ny)
-               {
-                  // Uses the deformation field if voxel is in its space
-                  index=(pre[1]+b)*deformationField->nx+pre[0]+a;
-                  defX = defPtrX[index];
-                  defY = defPtrY[index];
-               }
-               else
-               {
-                  // Uses a sliding effect
-                  get_SlidedValues<DataType>(defX,
-                                          defY,
-                                          pre[0]+a,
-                        pre[1]+b,
-                        defPtrX,
-                        defPtrY,
-                        df_voxel2Real,
-                        deformationField->dim,
-                        false // not a deformation field
-                        );
-               }
-               realDefX += defX * basis;
-               realDefY += defY * basis;
+    for (i = 0; i < warVoxelNumber; ++i) {
+        if (mask[i] > -1) {
+            realDefX = resPtrX[i];
+            realDefY = resPtrY[i];
+
+            // Conversion from real to voxel in the deformation field
+            voxelX = realDefX * df_real2Voxel->m[0][0]
+                + realDefY * df_real2Voxel->m[0][1]
+                + df_real2Voxel->m[0][3];
+            voxelY = realDefX * df_real2Voxel->m[1][0]
+                + realDefY * df_real2Voxel->m[1][1]
+                + df_real2Voxel->m[1][3];
+
+            // Linear interpolation to compute the new deformation
+            pre[0] = (int)reg_floor(voxelX);
+            pre[1] = (int)reg_floor(voxelY);
+            relX[1] = voxelX - static_cast<DataType>(pre[0]);
+            relX[0] = 1.f - relX[1];
+            relY[1] = voxelY - static_cast<DataType>(pre[1]);
+            relY[0] = 1.f - relY[1];
+            realDefX = realDefY = 0.f;
+            for (b = 0; b < 2; ++b) {
+                for (a = 0; a < 2; ++a) {
+                    basis = relX[a] * relY[b];
+                    if (pre[0] + a > -1 && pre[0] + a<deformationField->nx &&
+                        pre[1] + b>-1 && pre[1] + b < deformationField->ny) {
+                        // Uses the deformation field if voxel is in its space
+                        index = (pre[1] + b) * deformationField->nx + pre[0] + a;
+                        defX = defPtrX[index];
+                        defY = defPtrY[index];
+                    } else {
+                        // Uses a sliding effect
+                        get_SlidedValues<DataType>(defX,
+                                                   defY,
+                                                   pre[0] + a,
+                                                   pre[1] + b,
+                                                   defPtrX,
+                                                   defPtrY,
+                                                   df_voxel2Real,
+                                                   deformationField->dim,
+                                                   false); // not a deformation field
+                    }
+                    realDefX += defX * basis;
+                    realDefY += defY * basis;
+                }
             }
-         }
-         resPtrX[i]=realDefX;
-         resPtrY[i]=realDefY;
-      }// mask
-   }// loop over every voxel
+            resPtrX[i] = realDefX;
+            resPtrY[i] = realDefY;
+        }// mask
+    }// loop over every voxel
 }
 /* *************************************************************** */
 template <class DataType>
 void reg_defField_compose3D(nifti_image *deformationField,
                             nifti_image *dfToUpdate,
-                            int *mask)
-{
-   const int DefFieldDim[3]= {deformationField->nx,deformationField->ny,deformationField->nz};
-   const size_t DFVoxelNumber=(size_t)DefFieldDim[0]*DefFieldDim[1]*DefFieldDim[2];
+                            int *mask) {
+    const int DefFieldDim[3] = { deformationField->nx, deformationField->ny, deformationField->nz };
+    const size_t DFVoxelNumber = (size_t)DefFieldDim[0] * DefFieldDim[1] * DefFieldDim[2];
 #ifdef _WIN32
-   long i;
-   const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate);
+    long i;
+    const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 3);
 #else
-   size_t i;
-   const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate);
+    size_t i;
+    const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 3);
 #endif
 
-   DataType *defPtrX = static_cast<DataType *>(deformationField->data);
-   DataType *defPtrY = &defPtrX[DFVoxelNumber];
-   DataType *defPtrZ = &defPtrY[DFVoxelNumber];
+    DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    DataType *defPtrY = &defPtrX[DFVoxelNumber];
+    DataType *defPtrZ = &defPtrY[DFVoxelNumber];
 
-   DataType *resPtrX = static_cast<DataType *>(dfToUpdate->data);
-   DataType *resPtrY = &resPtrX[warVoxelNumber];
-   DataType *resPtrZ = &resPtrY[warVoxelNumber];
+    DataType *resPtrX = static_cast<DataType*>(dfToUpdate->data);
+    DataType *resPtrY = &resPtrX[warVoxelNumber];
+    DataType *resPtrZ = &resPtrY[warVoxelNumber];
 
 #ifdef _WIN32
-   __declspec(align(16))mat44 df_real2Voxel;
+    __declspec(align(16))mat44 df_real2Voxel;
 #else
-   mat44 df_real2Voxel __attribute__((aligned(16)));
+    mat44 df_real2Voxel __attribute__((aligned(16)));
 #endif
-   mat44 *df_voxel2Real=nullptr;
-   if(deformationField->sform_code>0)
-   {
-      df_real2Voxel=deformationField->sto_ijk;
-      df_voxel2Real=&deformationField->sto_xyz;
-   }
-   else
-   {
-      df_real2Voxel=deformationField->qto_ijk;
-      df_voxel2Real=&deformationField->qto_xyz;
-   }
-
-   size_t tempIndex, index;
-   int a, b, c, currentX, currentY, currentZ, pre[3];
-   DataType realDef[3], voxel[3], basis, tempBasis;
-   DataType defX, defY, defZ, relX[2], relY[2], relZ[2];
-   bool inY, inZ;
+    mat44 *df_voxel2Real;
+    if (deformationField->sform_code > 0) {
+        df_real2Voxel = deformationField->sto_ijk;
+        df_voxel2Real = &deformationField->sto_xyz;
+    } else {
+        df_real2Voxel = deformationField->qto_ijk;
+        df_voxel2Real = &deformationField->qto_xyz;
+    }
+
+    size_t tempIndex, index;
+    int a, b, c, currentX, currentY, currentZ, pre[3];
+    DataType realDef[3], voxel[3], basis, tempBasis;
+    DataType defX, defY, defZ, relX[2], relY[2], relZ[2];
+    bool inY, inZ;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \
    defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \
-   private(i, a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \
+   private(a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \
    realDef, voxel, tempBasis, defX, defY, defZ, relX, relY, relZ, basis, inY, inZ)
 #endif
-   for(i=0; i<warVoxelNumber; ++i)
-   {
-      if(mask[i]>-1)
-      {
-         // Conversion from real to voxel in the deformation field
-         realDef[0] = resPtrX[i];
-         realDef[1] = resPtrY[i];
-         realDef[2] = resPtrZ[i];
-         voxel[0] =
-               df_real2Voxel.m[0][0] * realDef[0] +
-               df_real2Voxel.m[0][1] * realDef[1] +
-               df_real2Voxel.m[0][2] * realDef[2] +
-               df_real2Voxel.m[0][3] ;
-         voxel[1] =
-               df_real2Voxel.m[1][0] * realDef[0] +
-               df_real2Voxel.m[1][1] * realDef[1] +
-               df_real2Voxel.m[1][2] * realDef[2] +
-               df_real2Voxel.m[1][3] ;
-         voxel[2] =
-               df_real2Voxel.m[2][0] * realDef[0] +
-               df_real2Voxel.m[2][1] * realDef[1] +
-               df_real2Voxel.m[2][2] * realDef[2] +
-               df_real2Voxel.m[2][3] ;
-         //reg_mat44_mul(df_real2Voxel, realDef, voxel);
-
-         // Linear interpolation to compute the new deformation
-         pre[0]=static_cast<int>reg_floor(voxel[0]);
-         pre[1]=static_cast<int>reg_floor(voxel[1]);
-         pre[2]=static_cast<int>reg_floor(voxel[2]);
-         relX[1]=voxel[0]-static_cast<DataType>(pre[0]);
-         relX[0]=1.-relX[1];
-         relY[1]=voxel[1]-static_cast<DataType>(pre[1]);
-         relY[0]=1.-relY[1];
-         relZ[1]=voxel[2]-static_cast<DataType>(pre[2]);
-         relZ[0]=1.-relZ[1];
-         realDef[0]=realDef[1]=realDef[2]=0.;
-         for(c=0; c<2; ++c)
-         {
-            currentZ = pre[2]+c;
-            tempIndex=currentZ*DefFieldDim[0]*DefFieldDim[1];
-            if(currentZ>-1 && currentZ<DefFieldDim[2]) inZ=true;
-            else inZ=false;
-            for(b=0; b<2; ++b)
-            {
-               currentY = pre[1]+b;
-               index=tempIndex+currentY*DefFieldDim[0] + pre[0];
-               tempBasis= relY[b] * relZ[c];
-               if(currentY>-1 && currentY<DefFieldDim[1]) inY=true;
-               else inY=false;
-               for(a=0; a<2; ++a)
-               {
-                  currentX = pre[0]+a;
-                  if(currentX>-1 && currentX<DefFieldDim[0] && inY && inZ)
-                  {
-                     // Uses the deformation field if voxel is in its space
-                     defX = defPtrX[index];
-                     defY = defPtrY[index];
-                     defZ = defPtrZ[index];
-                  }
-                  else
-                  {
-                     // Uses a sliding effect
-                     get_SlidedValues<DataType>(defX,
-                                             defY,
-                                             defZ,
-                                             currentX,
-                                             currentY,
-                                             currentZ,
-                                             defPtrX,
-                                             defPtrY,
-                                             defPtrZ,
-                                             df_voxel2Real,
-                                             deformationField->dim,
-                                             false // not a displacement field
-                                             );
-                  }
-                  ++index;
-                  basis = relX[a] * tempBasis;
-                  realDef[0] += defX * basis;
-                  realDef[1] += defY * basis;
-                  realDef[2] += defZ * basis;
-               } // a loop
-            } // b loop
-         } // c loop
-         resPtrX[i] = realDef[0];
-         resPtrY[i] = realDef[1];
-         resPtrZ[i] = realDef[2];
-      }// mask
-   }// loop over every voxel
+    for (i = 0; i < warVoxelNumber; ++i) {
+        if (mask[i] > -1) {
+            // Conversion from real to voxel in the deformation field
+            realDef[0] = resPtrX[i];
+            realDef[1] = resPtrY[i];
+            realDef[2] = resPtrZ[i];
+            voxel[0] =
+                df_real2Voxel.m[0][0] * realDef[0] +
+                df_real2Voxel.m[0][1] * realDef[1] +
+                df_real2Voxel.m[0][2] * realDef[2] +
+                df_real2Voxel.m[0][3];
+            voxel[1] =
+                df_real2Voxel.m[1][0] * realDef[0] +
+                df_real2Voxel.m[1][1] * realDef[1] +
+                df_real2Voxel.m[1][2] * realDef[2] +
+                df_real2Voxel.m[1][3];
+            voxel[2] =
+                df_real2Voxel.m[2][0] * realDef[0] +
+                df_real2Voxel.m[2][1] * realDef[1] +
+                df_real2Voxel.m[2][2] * realDef[2] +
+                df_real2Voxel.m[2][3];
+            //reg_mat44_mul(df_real2Voxel, realDef, voxel);
+
+            // Linear interpolation to compute the new deformation
+            pre[0] = static_cast<int>reg_floor(voxel[0]);
+            pre[1] = static_cast<int>reg_floor(voxel[1]);
+            pre[2] = static_cast<int>reg_floor(voxel[2]);
+            relX[1] = voxel[0] - static_cast<DataType>(pre[0]);
+            relX[0] = 1.f - relX[1];
+            relY[1] = voxel[1] - static_cast<DataType>(pre[1]);
+            relY[0] = 1.f - relY[1];
+            relZ[1] = voxel[2] - static_cast<DataType>(pre[2]);
+            relZ[0] = 1.f - relZ[1];
+            realDef[0] = realDef[1] = realDef[2] = 0.;
+            for (c = 0; c < 2; ++c) {
+                currentZ = pre[2] + c;
+                tempIndex = currentZ * DefFieldDim[0] * DefFieldDim[1];
+                if (currentZ > -1 && currentZ < DefFieldDim[2]) inZ = true;
+                else inZ = false;
+                for (b = 0; b < 2; ++b) {
+                    currentY = pre[1] + b;
+                    index = tempIndex + currentY * DefFieldDim[0] + pre[0];
+                    tempBasis = relY[b] * relZ[c];
+                    if (currentY > -1 && currentY < DefFieldDim[1]) inY = true;
+                    else inY = false;
+                    for (a = 0; a < 2; ++a) {
+                        currentX = pre[0] + a;
+                        if (currentX > -1 && currentX < DefFieldDim[0] && inY && inZ) {
+                            // Uses the deformation field if voxel is in its space
+                            defX = defPtrX[index];
+                            defY = defPtrY[index];
+                            defZ = defPtrZ[index];
+                        } else {
+                            // Uses a sliding effect
+                            get_SlidedValues<DataType>(defX,
+                                                       defY,
+                                                       defZ,
+                                                       currentX,
+                                                       currentY,
+                                                       currentZ,
+                                                       defPtrX,
+                                                       defPtrY,
+                                                       defPtrZ,
+                                                       df_voxel2Real,
+                                                       deformationField->dim,
+                                                       false); // not a displacement field
+                        }
+                        ++index;
+                        basis = relX[a] * tempBasis;
+                        realDef[0] += defX * basis;
+                        realDef[1] += defY * basis;
+                        realDef[2] += defZ * basis;
+                    } // a loop
+                } // b loop
+            } // c loop
+            resPtrX[i] = realDef[0];
+            resPtrY[i] = realDef[1];
+            resPtrZ[i] = realDef[2];
+        }// mask
+    }// loop over every voxel
 }
 /* *************************************************************** */
 void reg_defField_compose(nifti_image *deformationField,
                           nifti_image *dfToUpdate,
-                          int *mask)
-{
-   if(deformationField->datatype != dfToUpdate->datatype)
-   {
-      reg_print_fct_error("reg_defField_compose");
-      reg_print_msg_error("Both deformation fields are expected to have the same type");
-      reg_exit();
-   }
-
-   bool freeMask=false;
-   if(mask==nullptr)
-   {
-      mask = (int *)calloc(CalcVoxelNumber(*dfToUpdate), sizeof(int));
-      freeMask=true;
-   }
-
-   if(dfToUpdate->nu==2)
-   {
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_defField_compose2D<float>(deformationField,dfToUpdate,mask);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_defField_compose2D<double>(deformationField,dfToUpdate,mask);
-         break;
-      default:
-         reg_print_fct_error("reg_defField_compose");
-         reg_print_msg_error("Deformation field pixel type unsupported");
-         reg_exit();
-      }
-   }
-   else
-   {
-      switch(deformationField->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_defField_compose3D<float>(deformationField,dfToUpdate,mask);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_defField_compose3D<double>(deformationField,dfToUpdate,mask);
-         break;
-      default:
-         reg_print_fct_error("reg_defField_compose");
-         reg_print_msg_error("Deformation field pixel type unsupported");
-         reg_exit();
-      }
-   }
-
-   if(freeMask) free(mask);
+                          int *mask) {
+    if (deformationField->datatype != dfToUpdate->datatype) {
+        reg_print_fct_error("reg_defField_compose");
+        reg_print_msg_error("Both deformation fields are expected to have the same type");
+        reg_exit();
+    }
+
+    bool freeMask = false;
+    if (mask == nullptr) {
+        mask = (int*)calloc(NiftiImage::calcVoxelNumber(dfToUpdate, 3), sizeof(int));
+        freeMask = true;
+    }
+
+    if (dfToUpdate->nu == 2) {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_defField_compose2D<float>(deformationField, dfToUpdate, mask);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_defField_compose2D<double>(deformationField, dfToUpdate, mask);
+            break;
+        default:
+            reg_print_fct_error("reg_defField_compose");
+            reg_print_msg_error("Deformation field pixel type unsupported");
+            reg_exit();
+        }
+    } else {
+        switch (deformationField->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_defField_compose3D<float>(deformationField, dfToUpdate, mask);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_defField_compose3D<double>(deformationField, dfToUpdate, mask);
+            break;
+        default:
+            reg_print_fct_error("reg_defField_compose");
+            reg_print_msg_error("Deformation field pixel type unsupported");
+            reg_exit();
+        }
+    }
+
+    if (freeMask) free(mask);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 /// @brief Internal data structure to pass user data into optimizer that get passed to cost_function
-struct ddata
-{
-   nifti_image *deformationField;
-   double gx, gy, gz;
-   double *arrayy[4];
-   double values[4];
+struct ddata {
+    nifti_image *deformationField;
+    double gx, gy, gz;
+    double *arrayy[4];
+    double values[4];
 };
 
 /* ************************************************************************** */
@@ -2832,126 +2543,124 @@ struct ddata
 /* ************************************************************************** */
 
 template<class FieldTYPE>
-static int inline FastWarp(double x, double y, double z, nifti_image *deformationField, double *px, double *py, double *pz)
-{
-   double wax, wbx, wcx, wdx, wex, wfx, wgx, whx, wf3x;
-   FieldTYPE *wpx;
-   double way, wby, wcy, wdy, wey, wfy, wgy, why, wf3y;
-   FieldTYPE *wpy;
-   double waz, wbz, wcz, wdz, wez, wfz, wgz, whz, wf3z;
-   FieldTYPE *wpz;
-   int   xw, yw, zw, dxw, dyw, dxyw, dxyzw;
-   double wxf, wyf, wzf, wyzf;
-   double world[4], position[4];
-
-   FieldTYPE *warpdata = static_cast<FieldTYPE *>(deformationField->data);
-
-   mat44 *deformationFieldIJKMatrix;
-   if(deformationField->sform_code>0)
-      deformationFieldIJKMatrix=&(deformationField->sto_ijk);
-   else deformationFieldIJKMatrix=&(deformationField->qto_ijk);
-
-   dxw = deformationField->nx;
-   dyw = deformationField->ny;
-   dxyw = dxw * dyw;
-   dxyzw = dxw * dyw * deformationField->nz;
-
-   // first guess
-   *px = x;
-   *py = y;
-   *pz = z;
-
-   // detect NAN input
-   if (x!=x || y!=y || z!=z) return EXIT_FAILURE;
-
-   // convert x, y,z to indices in deformationField
-   world[0] = x;
-   world[1] = y;
-   world[2] = z;
-   world[3] = 1;
-   reg_mat44_mul(deformationFieldIJKMatrix, world, position);
-   x = position[0];
-   y = position[1];
-   z = position[2];
-
-   xw = (int)x;        /* get indices into DVF */
-   yw = (int)y;
-   zw = (int)z;
-
-   // if you block out the next three lines the routine will extrapolate indefinitively
+inline static int FastWarp(double x, double y, double z, nifti_image *deformationField, double *px, double *py, double *pz) {
+    double wax, wbx, wcx, wdx, wex, wfx, wgx, whx, wf3x;
+    FieldTYPE *wpx;
+    double way, wby, wcy, wdy, wey, wfy, wgy, why, wf3y;
+    FieldTYPE *wpy;
+    double waz, wbz, wcz, wdz, wez, wfz, wgz, whz, wf3z;
+    FieldTYPE *wpz;
+    int   xw, yw, zw, dxw, dyw, dxyw, dxyzw;
+    double wxf, wyf, wzf, wyzf;
+    double world[4], position[4];
+
+    FieldTYPE *warpdata = static_cast<FieldTYPE*>(deformationField->data);
+
+    const mat44 *deformationFieldIJKMatrix;
+    if (deformationField->sform_code > 0)
+        deformationFieldIJKMatrix = &deformationField->sto_ijk;
+    else deformationFieldIJKMatrix = &deformationField->qto_ijk;
+
+    dxw = deformationField->nx;
+    dyw = deformationField->ny;
+    dxyw = dxw * dyw;
+    dxyzw = dxw * dyw * deformationField->nz;
+
+    // first guess
+    *px = x;
+    *py = y;
+    *pz = z;
+
+    // detect NAN input
+    if (x != x || y != y || z != z) return EXIT_FAILURE;
+
+    // convert x, y,z to indices in deformationField
+    world[0] = x;
+    world[1] = y;
+    world[2] = z;
+    world[3] = 1;
+    reg_mat44_mul(deformationFieldIJKMatrix, world, position);
+    x = position[0];
+    y = position[1];
+    z = position[2];
+
+    xw = (int)x;        /* get indices into DVF */
+    yw = (int)y;
+    zw = (int)z;
+
+    // if you block out the next three lines the routine will extrapolate indefinitively
 #if 0
-   if (x<0 || x>=deformationField->nx-1) return ERROR;
-   if (y<0 || y>=deformationField->ny-1) return ERROR;
-   if (z<0 || z>=deformationField->nz-1) return ERROR;
+    if (x < 0 || x >= deformationField->nx - 1) return ERROR;
+    if (y < 0 || y >= deformationField->ny - 1) return ERROR;
+    if (z < 0 || z >= deformationField->nz - 1) return ERROR;
 #else
-   if (xw<0) xw=0;     /* clip */
-   if (yw<0) yw=0;
-   if (zw<0) zw=0;
-   if (xw>deformationField->nx-2) xw = deformationField->nx-2;
-   if (yw>deformationField->ny-2) yw = deformationField->ny-2;
-   if (zw>deformationField->nz-2) zw = deformationField->nz-2;
+    if (xw < 0) xw = 0;     /* clip */
+    if (yw < 0) yw = 0;
+    if (zw < 0) zw = 0;
+    if (xw > deformationField->nx - 2) xw = deformationField->nx - 2;
+    if (yw > deformationField->ny - 2) yw = deformationField->ny - 2;
+    if (zw > deformationField->nz - 2) zw = deformationField->nz - 2;
 #endif
 
-   wxf = x-xw;                  /* fractional coordinates */
-   wyf = y-yw;
-   wzf = z-zw;
-
-   /* cornerstone for warp coordinates */
-   wpx = warpdata + zw*dxyw + yw*dxw + xw;
-   wpy = wpx+dxyzw;
-   wpz = wpy+dxyzw;
-
-   wf3x = wpx[dxw+1];
-   wax  = wpx[0];
-   wbx  = wpx[1]      - wax;
-   wcx  = wpx[dxw]    - wax;
-   wdx  = wpx[dxyw]   - wax;
-   wex  = wpx[dxyw + dxw] - wax - wcx - wdx;
-   wfx  = wpx[dxyw + 1 ]  - wax - wbx - wdx;
-   wgx  = wf3x            - wax - wbx - wcx;
-   whx  = wpx[dxyw + dxw + 1] - wf3x - wdx - wex - wfx;
-
-   wf3y = wpy[dxw+1];
-   way  = wpy[0];
-   wby  = wpy[1]      - way;
-   wcy  = wpy[dxw]    - way;
-   wdy  = wpy[dxyw]   - way;
-   wey  = wpy[dxyw + dxw] - way - wcy - wdy;
-   wfy  = wpy[dxyw + 1 ]  - way - wby - wdy;
-   wgy  = wf3y            - way - wby - wcy;
-   why  = wpy[dxyw + dxw + 1] - wf3y - wdy - wey - wfy;
-
-   wf3z = wpz[dxw+1];
-   waz  = wpz[0];
-   wbz  = wpz[1]      - waz;
-   wcz  = wpz[dxw]    - waz;
-   wdz  = wpz[dxyw]   - waz;
-   wez  = wpz[dxyw + dxw] - waz - wcz - wdz;
-   wfz  = wpz[dxyw + 1 ]  - waz - wbz - wdz;
-   wgz  = wf3z            - waz - wbz - wcz;
-   whz  = wpz[dxyw + dxw + 1] - wf3z - wdz - wez - wfz;
-
-   wyzf = wyf * wzf;                   /* common term in interpolation     */
-
-   /* trilinear interpolation formulae  */
-   *px = wax + wbx*wxf + wcx*wyf + wdx*wzf + wex*wyzf + wfx*wxf*wzf + wgx*wxf*wyf + whx*wxf*wyzf;
-   *py = way + wby*wxf + wcy*wyf + wdy*wzf + wey*wyzf + wfy*wxf*wzf + wgy*wxf*wyf + why*wxf*wyzf;
-   *pz = waz + wbz*wxf + wcz*wyf + wdz*wzf + wez*wyzf + wfz*wxf*wzf + wgz*wxf*wyf + whz*wxf*wyzf;
-
-   return EXIT_SUCCESS;
+    wxf = x - xw;                  /* fractional coordinates */
+    wyf = y - yw;
+    wzf = z - zw;
+
+    /* cornerstone for warp coordinates */
+    wpx = warpdata + zw * dxyw + yw * dxw + xw;
+    wpy = wpx + dxyzw;
+    wpz = wpy + dxyzw;
+
+    wf3x = wpx[dxw + 1];
+    wax = wpx[0];
+    wbx = wpx[1] - wax;
+    wcx = wpx[dxw] - wax;
+    wdx = wpx[dxyw] - wax;
+    wex = wpx[dxyw + dxw] - wax - wcx - wdx;
+    wfx = wpx[dxyw + 1] - wax - wbx - wdx;
+    wgx = wf3x - wax - wbx - wcx;
+    whx = wpx[dxyw + dxw + 1] - wf3x - wdx - wex - wfx;
+
+    wf3y = wpy[dxw + 1];
+    way = wpy[0];
+    wby = wpy[1] - way;
+    wcy = wpy[dxw] - way;
+    wdy = wpy[dxyw] - way;
+    wey = wpy[dxyw + dxw] - way - wcy - wdy;
+    wfy = wpy[dxyw + 1] - way - wby - wdy;
+    wgy = wf3y - way - wby - wcy;
+    why = wpy[dxyw + dxw + 1] - wf3y - wdy - wey - wfy;
+
+    wf3z = wpz[dxw + 1];
+    waz = wpz[0];
+    wbz = wpz[1] - waz;
+    wcz = wpz[dxw] - waz;
+    wdz = wpz[dxyw] - waz;
+    wez = wpz[dxyw + dxw] - waz - wcz - wdz;
+    wfz = wpz[dxyw + 1] - waz - wbz - wdz;
+    wgz = wf3z - waz - wbz - wcz;
+    whz = wpz[dxyw + dxw + 1] - wf3z - wdz - wez - wfz;
+
+    wyzf = wyf * wzf;                   /* common term in interpolation     */
+
+    /* trilinear interpolation formulae  */
+    *px = wax + wbx * wxf + wcx * wyf + wdx * wzf + wex * wyzf + wfx * wxf * wzf + wgx * wxf * wyf + whx * wxf * wyzf;
+    *py = way + wby * wxf + wcy * wyf + wdy * wzf + wey * wyzf + wfy * wxf * wzf + wgy * wxf * wyf + why * wxf * wyzf;
+    *pz = waz + wbz * wxf + wcz * wyf + wdz * wzf + wez * wyzf + wfz * wxf * wzf + wgz * wxf * wyf + whz * wxf * wyzf;
+
+    return EXIT_SUCCESS;
 }
 
 /* Internal square distance cost function; supports NIFTI_TYPE_FLOAT32 and NIFTI_TYPE_FLOAT64 */
-static double cost_function(const double *vector, const void *data)
-{
-   struct ddata *dat = (struct ddata*) data;
-   double x, y, z;
-   if (dat->deformationField->datatype == NIFTI_TYPE_FLOAT64)
-      FastWarp<double>(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z);
-   else
-      FastWarp<float>(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z);
-
-   return (x-dat->gx)*(x-dat->gx) + (y-dat->gy)*(y-dat->gy) + (z-dat->gz)*(z-dat->gz);
+static double cost_function(const double *vector, const void *data) {
+    struct ddata *dat = (struct ddata*)data;
+    double x, y, z;
+    if (dat->deformationField->datatype == NIFTI_TYPE_FLOAT64)
+        FastWarp<double>(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z);
+    else
+        FastWarp<float>(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z);
+
+    return (x - dat->gx) * (x - dat->gx) + (y - dat->gy) * (y - dat->gy) + (z - dat->gz) * (z - dat->gz);
 }
 
 /* multimin/simplex.c
@@ -2973,629 +2682,565 @@ static double cost_function(const double *vector, const void *data)
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  */
 
-/*
-   - Originally written by Tuomo Keskitalo <tuomo.keskitalo@iki.fi>
-   - Corrections to nmsimplex_iterate and other functions
-     by Ivo Alxneit <ivo.alxneit@psi.ch>
-   - Additional help by Brian Gough <bjg@network-theory.co.uk>
-
-   Modified version by mvh to make it work standalone of GSL
-*/
-
-/* The Simplex method of Nelder and Mead,
-   also known as the polytope search alogorithm. Ref:
-   Nelder, J.A., Mead, R., Computer Journal 7 (1965) pp. 308-313.
-
-   This implementation uses 4 corner points in the simplex for a 3D search.
-*/
-
-typedef struct
-{
-   double x1[12];              /* simplex corner points nsimplex*nvec */
-   double y1[4];               /* function value at corner points */
-   double ws1[3];              /* workspace 1 for algorithm */
-   double ws2[3];              /* workspace 2 for algorithm */
-   int    nvec;
-   int    nsimplex;
+ /*
+    - Originally written by Tuomo Keskitalo <tuomo.keskitalo@iki.fi>
+    - Corrections to nmsimplex_iterate and other functions
+      by Ivo Alxneit <ivo.alxneit@psi.ch>
+    - Additional help by Brian Gough <bjg@network-theory.co.uk>
+
+    Modified version by mvh to make it work standalone of GSL
+ */
+
+ /* The Simplex method of Nelder and Mead,
+    also known as the polytope search alogorithm. Ref:
+    Nelder, J.A., Mead, R., Computer Journal 7 (1965) pp. 308-313.
+
+    This implementation uses 4 corner points in the simplex for a 3D search.
+ */
+
+typedef struct {
+    double x1[12];              /* simplex corner points nsimplex*nvec */
+    double y1[4];               /* function value at corner points */
+    double ws1[3];              /* workspace 1 for algorithm */
+    double ws2[3];              /* workspace 2 for algorithm */
+    int    nvec;
+    int    nsimplex;
 }
 nmsimplex_state_t;
 
-typedef double gsl_multimin_function(const double *, const void *);
+typedef double gsl_multimin_function(const double*, const void*);
 
 static double
-nmsimplex_move_corner (const double coeff, nmsimplex_state_t *state,
-                       size_t corner, double *xc,
-                       gsl_multimin_function *f, void *fdata)
-{
-   /* moves a simplex corner scaled by coeff (negative value represents
-    mirroring by the middle point of the "other" corner points)
-    and gives new corner in xc and function value at xc as a
-    return value
-   */
-
-   double *x1 = state->x1;
-
-   size_t i, j;
-   double newval, mp;
-
-   for (j = 0; j < (size_t)state->nvec; j++)
-   {
-      mp = 0;
-      for (i = 0; i < (size_t)state->nsimplex; i++)
-      {
-         if (i != corner)
-         {
-            mp += x1[i*state->nvec + j];
-         }
-      }
-      mp /= (double) (state->nsimplex - 1);
-      newval = mp - coeff * (mp - x1[corner*state->nvec + j]);
-      xc[j] = newval;
-   }
-
-   newval = f(xc, fdata);
-
-   return newval;
+nmsimplex_move_corner(const double coeff, nmsimplex_state_t *state,
+                      size_t corner, double *xc,
+                      gsl_multimin_function *f, void *fdata) {
+    /* moves a simplex corner scaled by coeff (negative value represents
+     mirroring by the middle point of the "other" corner points)
+     and gives new corner in xc and function value at xc as a
+     return value
+    */
+
+    double *x1 = state->x1;
+
+    size_t i, j;
+    double newval, mp;
+
+    for (j = 0; j < (size_t)state->nvec; j++) {
+        mp = 0;
+        for (i = 0; i < (size_t)state->nsimplex; i++) {
+            if (i != corner) {
+                mp += x1[i * state->nvec + j];
+            }
+        }
+        mp /= (double)(state->nsimplex - 1);
+        newval = mp - coeff * (mp - x1[corner * state->nvec + j]);
+        xc[j] = newval;
+    }
+
+    newval = f(xc, fdata);
+
+    return newval;
 }
 
 static void
-nmsimplex_contract_by_best (nmsimplex_state_t *state, size_t best,
-                            double *xc, gsl_multimin_function *f, void *fdata)
-{
-
-   /* Function contracts the simplex in respect to
-    best valued corner. That is, all corners besides the
-    best corner are moved. */
-
-   /* the xc vector is simply work space here */
-
-   double *x1 = state->x1;
-   double *y1 = state->y1;
-
-   size_t i, j;
-   double newval;
-
-   for (i = 0; i < (size_t)state->nsimplex; i++)
-   {
-      if (i != best)
-      {
-         for (j = 0; j < (size_t)state->nvec; j++)
-         {
-            newval = 0.5 * (x1[i*state->nvec + j] + x1[best*state->nvec + j]);
-            x1[i*state->nvec +  j] = newval;
-         }
-
-         /* evaluate function in the new point */
-
-         xc = x1 + i*state->nvec;
-         newval = f(xc, fdata);
-         y1[i] = newval;
-      }
-   }
+nmsimplex_contract_by_best(nmsimplex_state_t *state, size_t best,
+                           double *xc, gsl_multimin_function *f, void *fdata) {
+
+    /* Function contracts the simplex in respect to
+     best valued corner. That is, all corners besides the
+     best corner are moved. */
+
+     /* the xc vector is simply work space here */
+
+    double *x1 = state->x1;
+    double *y1 = state->y1;
+
+    size_t i, j;
+    double newval;
+
+    for (i = 0; i < (size_t)state->nsimplex; i++) {
+        if (i != best) {
+            for (j = 0; j < (size_t)state->nvec; j++) {
+                newval = 0.5 * (x1[i * state->nvec + j] + x1[best * state->nvec + j]);
+                x1[i * state->nvec + j] = newval;
+            }
+
+            /* evaluate function in the new point */
+
+            xc = x1 + i * state->nvec;
+            newval = f(xc, fdata);
+            y1[i] = newval;
+        }
+    }
 }
 
 static void
-nmsimplex_calc_center (const nmsimplex_state_t *state, double *mp)
-{
-   /* calculates the center of the simplex to mp */
-
-   const double *x1 = state->x1;
-
-   size_t i, j;
-   double val;
-
-   for (j = 0; j < (size_t)state->nvec; j++)
-   {
-      val = 0;
-      for (i = 0; i < (size_t)state->nsimplex; i++)
-      {
-         val += x1[i*state->nvec + j];
-      }
-      val /= state->nsimplex;
-      mp[j] = val;
-   }
+nmsimplex_calc_center(const nmsimplex_state_t *state, double *mp) {
+    /* calculates the center of the simplex to mp */
+
+    const double *x1 = state->x1;
+
+    size_t i, j;
+    double val;
+
+    for (j = 0; j < (size_t)state->nvec; j++) {
+        val = 0;
+        for (i = 0; i < (size_t)state->nsimplex; i++) {
+            val += x1[i * state->nvec + j];
+        }
+        val /= state->nsimplex;
+        mp[j] = val;
+    }
 }
 
 static double
-nmsimplex_size (nmsimplex_state_t *state)
-{
-   /* calculates simplex size as average sum of length of vectors
-    from simplex center to corner points:
+nmsimplex_size(nmsimplex_state_t *state) {
+    /* calculates simplex size as average sum of length of vectors
+     from simplex center to corner points:
 
-    ( sum ( || y - y_middlepoint || ) ) / n
-   */
+     ( sum ( || y - y_middlepoint || ) ) / n
+    */
 
-   double *s = state->ws1;
-   double *mp = state->ws2;
-   double *x1 = state->x1;
+    double *s = state->ws1;
+    double *mp = state->ws2;
+    double *x1 = state->x1;
 
-   size_t i, j;
+    size_t i, j;
 
-   double t, ss = 0;
+    double t, ss = 0;
 
-   /* Calculate middle point */
-   nmsimplex_calc_center (state, mp);
+    /* Calculate middle point */
+    nmsimplex_calc_center(state, mp);
 
-   for (i = 0; i < (size_t)state->nsimplex; i++)
-   {
-      for (j=0; j<(size_t)state->nvec; j++) s[j] = x1[i*state->nvec + j] - mp[j];
-      t = 0;
-      for (j=0; j<(size_t)state->nvec; j++) t += s[j]*s[j];
-      ss += sqrt(t);
-   }
+    for (i = 0; i < (size_t)state->nsimplex; i++) {
+        for (j = 0; j < (size_t)state->nvec; j++) s[j] = x1[i * state->nvec + j] - mp[j];
+        t = 0;
+        for (j = 0; j < (size_t)state->nvec; j++) t += s[j] * s[j];
+        ss += sqrt(t);
+    }
 
-   return ss / (double) (state->nsimplex);
+    return ss / (double)(state->nsimplex);
 }
 
 static void
-nmsimplex_set (void *vstate, gsl_multimin_function *f,
-               const double *x,
-               double *size, const double *step_size, void *fdata)
-{
-   size_t i, j;
-   double val;
+nmsimplex_set(void *vstate, gsl_multimin_function *f,
+              const double *x,
+              double *size, const double *step_size, void *fdata) {
+    size_t i, j;
+    double val;
 
-   nmsimplex_state_t *state = (nmsimplex_state_t *) vstate;
+    nmsimplex_state_t *state = (nmsimplex_state_t*)vstate;
 
-   double *xtemp = state->ws1;
+    double *xtemp = state->ws1;
 
-   /* first point is the original x0 */
+    /* first point is the original x0 */
 
-   val = f(x, fdata);
-   for (j=0; j<(size_t)state->nvec; j++) state->x1[j] = x[j];
-   state->y1[0] = val;
+    val = f(x, fdata);
+    for (j = 0; j < (size_t)state->nvec; j++) state->x1[j] = x[j];
+    state->y1[0] = val;
 
-   /* following points are initialized to x0 + step_size */
+    /* following points are initialized to x0 + step_size */
 
-   for (i = 0; i < (size_t)state->nvec; i++)
-   {
-      for (j=0; j<(size_t)state->nvec; j++) xtemp[j] = x[j];
+    for (i = 0; i < (size_t)state->nvec; i++) {
+        for (j = 0; j < (size_t)state->nvec; j++) xtemp[j] = x[j];
 
-      val = xtemp[i] + step_size[i];
-      xtemp[i] = val;
-      val = f(xtemp, fdata);
-      for (j=0; j<(size_t)state->nvec; j++)
-         state->x1[(i + 1)*state->nvec + j] = xtemp[j];
-      state->y1[i + 1] = val;
-   }
+        val = xtemp[i] + step_size[i];
+        xtemp[i] = val;
+        val = f(xtemp, fdata);
+        for (j = 0; j < (size_t)state->nvec; j++)
+            state->x1[(i + 1) * state->nvec + j] = xtemp[j];
+        state->y1[i + 1] = val;
+    }
 
-   /* Initialize simplex size */
+    /* Initialize simplex size */
 
-   *size = nmsimplex_size (state);
+    *size = nmsimplex_size(state);
 }
 
 static void
-nmsimplex_iterate (void *vstate, gsl_multimin_function *f,
-                   double *x, double *size, double *fval, void *fdata)
-{
-
-   /* Simplex iteration tries to minimize function f value */
-   /* Includes corrections from Ivo Alxneit <ivo.alxneit@psi.ch> */
-
-   nmsimplex_state_t *state = (nmsimplex_state_t *) vstate;
-
-   /* xc and xc2 vectors store tried corner point coordinates */
-
-   double *xc = state->ws1;
-   double *xc2 = state->ws2;
-   double *y1 = state->y1;
-   double *x1 = state->x1;
-
-   size_t n = state->nsimplex;
-   size_t i, j;
-   size_t hi = 0, s_hi = 0, lo = 0;
-   double dhi, ds_hi, dlo;
-   double val, val2;
-
-   /* get index of highest, second highest and lowest point */
-
-   dhi = ds_hi = dlo = y1[0];
-
-   for (i = 1; i < n; i++)
-   {
-      val = y1[i];
-      if (val < dlo)
-      {
-         dlo = val;
-         lo = i;
-      }
-      else if (val > dhi)
-      {
-         ds_hi = dhi;
-         s_hi = hi;
-         dhi = val;
-         hi = i;
-      }
-      else if (val > ds_hi)
-      {
-         ds_hi = val;
-         s_hi = i;
-      }
-   }
-
-   /* reflect the highest value */
-
-   val = nmsimplex_move_corner (-1.0, state, hi, xc, f, fdata);
-
-   if (val < y1[lo])
-   {
-
-      /* reflected point becomes lowest point, try expansion */
-
-      val2 = nmsimplex_move_corner (-2.0, state, hi, xc2, f, fdata);
-
-      if (val2 < y1[lo])
-      {
-         for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc2[j];
-         y1[hi] = val2;
-      }
-      else
-      {
-         for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j];
-         y1[hi] = val;
-      }
-   }
-
-   /* reflection does not improve things enough */
-
-   else if (val > y1[s_hi])
-   {
-      if (val <= y1[hi])
-      {
-
-         /* if trial point is better than highest point, replace
-          highest point */
-
-         for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j];
-         y1[hi] = val;
-      }
-
-      /* try one dimensional contraction */
-
-      val2 = nmsimplex_move_corner (0.5, state, hi, xc2, f, fdata);
-
-      if (val2 <= y1[hi])
-      {
-         for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc2[j];
-         y1[hi] = val2;
-      }
-
-      else
-      {
-         /* contract the whole simplex in respect to the best point */
-         nmsimplex_contract_by_best (state, lo, xc, f, fdata);
-      }
-   }
-   else
-   {
-
-      /* trial point is better than second highest point.
-       Replace highest point by it */
-
-      for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j];
-      y1[hi] = val;
-   }
-
-   /* return lowest point of simplex as x */
-
-   lo=0;
-   val=y1[0];
-   for (j=1; j<(size_t)state->nsimplex; j++) if (y1[j]<val) lo=j, val=y1[j];
-   for (j=0; j<(size_t)state->nvec; j++) x[j] = x1[lo*state->nvec+j];
-   *fval = y1[lo];
-
-
-   /* Update simplex size */
-
-   *size = nmsimplex_size (state);
+nmsimplex_iterate(void *vstate, gsl_multimin_function *f,
+                  double *x, double *size, double *fval, void *fdata) {
+
+    /* Simplex iteration tries to minimize function f value */
+    /* Includes corrections from Ivo Alxneit <ivo.alxneit@psi.ch> */
+
+    nmsimplex_state_t *state = (nmsimplex_state_t*)vstate;
+
+    /* xc and xc2 vectors store tried corner point coordinates */
+
+    double *xc = state->ws1;
+    double *xc2 = state->ws2;
+    double *y1 = state->y1;
+    double *x1 = state->x1;
+
+    size_t n = state->nsimplex;
+    size_t i, j;
+    size_t hi = 0, s_hi = 0, lo = 0;
+    double dhi, ds_hi, dlo;
+    double val, val2;
+
+    /* get index of highest, second highest and lowest point */
+
+    dhi = ds_hi = dlo = y1[0];
+
+    for (i = 1; i < n; i++) {
+        val = y1[i];
+        if (val < dlo) {
+            dlo = val;
+            lo = i;
+        } else if (val > dhi) {
+            ds_hi = dhi;
+            s_hi = hi;
+            dhi = val;
+            hi = i;
+        } else if (val > ds_hi) {
+            ds_hi = val;
+            s_hi = i;
+        }
+    }
+
+    /* reflect the highest value */
+
+    val = nmsimplex_move_corner(-1.0, state, hi, xc, f, fdata);
+
+    if (val < y1[lo]) {
+
+        /* reflected point becomes lowest point, try expansion */
+
+        val2 = nmsimplex_move_corner(-2.0, state, hi, xc2, f, fdata);
+
+        if (val2 < y1[lo]) {
+            for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc2[j];
+            y1[hi] = val2;
+        } else {
+            for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j];
+            y1[hi] = val;
+        }
+    }
+
+    /* reflection does not improve things enough */
+
+    else if (val > y1[s_hi]) {
+        if (val <= y1[hi]) {
+
+            /* if trial point is better than highest point, replace
+             highest point */
+
+            for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j];
+            y1[hi] = val;
+        }
+
+        /* try one dimensional contraction */
+
+        val2 = nmsimplex_move_corner(0.5, state, hi, xc2, f, fdata);
+
+        if (val2 <= y1[hi]) {
+            for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc2[j];
+            y1[hi] = val2;
+        }
+
+        else {
+            /* contract the whole simplex in respect to the best point */
+            nmsimplex_contract_by_best(state, lo, xc, f, fdata);
+        }
+    } else {
+
+        /* trial point is better than second highest point.
+         Replace highest point by it */
+
+        for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j];
+        y1[hi] = val;
+    }
+
+    /* return lowest point of simplex as x */
+
+    lo = 0;
+    val = y1[0];
+    for (j = 1; j < (size_t)state->nsimplex; j++) if (y1[j] < val) lo = j, val = y1[j];
+    for (j = 0; j < (size_t)state->nvec; j++) x[j] = x1[lo * state->nvec + j];
+    *fval = y1[lo];
+
+
+    /* Update simplex size */
+
+    *size = nmsimplex_size(state);
 }
 
 /* Internal wrapper for nmsimplex_iterate */
-static void optimize(gsl_multimin_function *f, double *start, void *data, double tol)
-{
-   nmsimplex_state_t t;
-   double fval[4];
-   double offset[3] = {10, 10, 10};
-   double size;
-   int n=0;
-   t.nvec = 3;
-   t.nsimplex = 4;
-   nmsimplex_set (&t, f, start, &size, offset, data);
-   while (size>tol && n<300)
-   {
-      nmsimplex_iterate (&t, f, start, &size, fval, data);
-      n++;
-   }
-   nmsimplex_calc_center (&t, start);
+static void optimize(gsl_multimin_function *f, double *start, void *data, double tol) {
+    nmsimplex_state_t t;
+    double fval[4];
+    double offset[3] = { 10, 10, 10 };
+    double size;
+    int n = 0;
+    t.nvec = 3;
+    t.nsimplex = 4;
+    nmsimplex_set(&t, f, start, &size, offset, data);
+    while (size > tol && n < 300) {
+        nmsimplex_iterate(&t, f, start, &size, fval, data);
+        n++;
+    }
+    nmsimplex_calc_center(&t, start);
 }
 /* *************************************************************** */
 template <class DataType>
 void reg_defFieldInvert3D(nifti_image *inputDeformationField,
                           nifti_image *outputDeformationField,
-                          float tolerance)
-{
-   const size_t outputVoxelNumber = CalcVoxelNumber(*outputDeformationField);
-
-   mat44 *OutXYZMatrix;
-   if(outputDeformationField->sform_code>0)
-      OutXYZMatrix=&(outputDeformationField->sto_xyz);
-   else OutXYZMatrix=&(outputDeformationField->qto_xyz);
-
-   // added:
-   mat44 *InXYZMatrix;
-   if(inputDeformationField->sform_code>0)
-      InXYZMatrix=&(inputDeformationField->sto_xyz);
-   else InXYZMatrix=&(inputDeformationField->qto_xyz);
-   float center[4], center2[4];
-   double centerout[4], delta[4];
-   center[0] = inputDeformationField->nx / 2;
-   center[1] = inputDeformationField->ny / 2;
-   center[2] = inputDeformationField->nz / 2;
-   center[3] = 1;
-   reg_mat44_mul(InXYZMatrix, center, center2);
-   FastWarp<float>(center2[0], center2[1], center2[2], inputDeformationField, &centerout[0], &centerout[1], &centerout[2]);
-   delta[0] = center2[0]-centerout[0];
-   delta[1] = center2[1]-centerout[1];
-   delta[2] = center2[2]-centerout[2];
-   // end added
-
-
-   int i,x,y,z;
-   double position[4], pars[4], arrayy[4][3];
-   struct ddata dat;
-   DataType *outData;
+                          float tolerance) {
+    const size_t outputVoxelNumber = NiftiImage::calcVoxelNumber(outputDeformationField, 3);
+
+    const mat44 *OutXYZMatrix;
+    if (outputDeformationField->sform_code > 0)
+        OutXYZMatrix = &outputDeformationField->sto_xyz;
+    else OutXYZMatrix = &outputDeformationField->qto_xyz;
+
+    const mat44 *InXYZMatrix;
+    if (inputDeformationField->sform_code > 0)
+        InXYZMatrix = &inputDeformationField->sto_xyz;
+    else InXYZMatrix = &inputDeformationField->qto_xyz;
+    float center[4], center2[4];
+    double centerout[4], delta[4];
+    center[0] = static_cast<float>(inputDeformationField->nx / 2);
+    center[1] = static_cast<float>(inputDeformationField->ny / 2);
+    center[2] = static_cast<float>(inputDeformationField->nz / 2);
+    center[3] = 1;
+    reg_mat44_mul(InXYZMatrix, center, center2);
+    FastWarp<float>(center2[0], center2[1], center2[2], inputDeformationField, &centerout[0], &centerout[1], &centerout[2]);
+    delta[0] = center2[0] - centerout[0];
+    delta[1] = center2[1] - centerout[1];
+    delta[2] = center2[2] - centerout[2];
+    // end added
+
+
+    int i, x, y, z;
+    double position[4], pars[4], arrayy[4][3];
+    struct ddata dat;
+    DataType *outData;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(outputDeformationField,tolerance,outputVoxelNumber, \
    inputDeformationField, OutXYZMatrix, delta) \
-   private(i,x,y,z,dat,outData,position,pars,arrayy)
+   private(i, x, y, dat, outData, position, pars, arrayy)
 #endif
-   for (z=0; z<outputDeformationField->nz; ++z)
-   {
-      dat.deformationField = inputDeformationField;
-      for(i=0; i<4; ++i)              /* set up 2D array pointers */
-         dat.arrayy[i]= arrayy[i];
+    for (z = 0; z < outputDeformationField->nz; ++z) {
+        dat.deformationField = inputDeformationField;
+        for (i = 0; i < 4; ++i)              /* set up 2D array pointers */
+            dat.arrayy[i] = arrayy[i];
 
-      outData = (DataType *)(outputDeformationField->data) +
+        outData = (DataType*)(outputDeformationField->data) +
             outputDeformationField->nx * outputDeformationField->ny * z;
 
-      for(y=0; y<outputDeformationField->ny; ++y)
-      {
-         for(x=0; x<outputDeformationField->nx; ++x)
-         {
-
-            // convert x, y,z to world coordinates
-            position[0] = x;
-            position[1] = y;
-            position[2] = z;
-            position[3] = 1;
-            reg_mat44_mul(OutXYZMatrix, position, pars);
-            dat.gx = pars[0];
-            dat.gy = pars[1];
-            dat.gz = pars[2];
-
-            // added
-            pars[0] += delta[0];
-            pars[1] += delta[1];
-            pars[2] += delta[2];
-            // end added
-
-            optimize(cost_function, pars, &dat, tolerance);
-            // output = (warp-1)(input);
-
-            outData[0]        = pars[0];
-            outData[outputVoxelNumber]   = pars[1];
-            outData[outputVoxelNumber*2] = pars[2];
-            ++outData;
-         }
-      }
-   }
+        for (y = 0; y < outputDeformationField->ny; ++y) {
+            for (x = 0; x < outputDeformationField->nx; ++x) {
+
+                // convert x, y,z to world coordinates
+                position[0] = x;
+                position[1] = y;
+                position[2] = z;
+                position[3] = 1;
+                reg_mat44_mul(OutXYZMatrix, position, pars);
+                dat.gx = pars[0];
+                dat.gy = pars[1];
+                dat.gz = pars[2];
+
+                // added
+                pars[0] += delta[0];
+                pars[1] += delta[1];
+                pars[2] += delta[2];
+                // end added
+
+                optimize(cost_function, pars, &dat, tolerance);
+                // output = (warp-1)(input);
+
+                outData[0] = static_cast<DataType>(pars[0]);
+                outData[outputVoxelNumber] = static_cast<DataType>(pars[1]);
+                outData[outputVoxelNumber * 2] = static_cast<DataType>(pars[2]);
+                ++outData;
+            }
+        }
+    }
 }
 /* *************************************************************** */
 void reg_defFieldInvert(nifti_image *inputDeformationField,
                         nifti_image *outputDeformationField,
-                        float tolerance)
-{
-   // Check the input image data types
-   if(inputDeformationField->datatype!=outputDeformationField->datatype)
-   {
-      reg_print_fct_error("reg_defFieldInvert");
-      reg_print_msg_error("Both deformation fields are expected to have the same data type");
-      reg_exit();
-   }
-
-   if(inputDeformationField->nu!=3)
-   {
-      reg_print_fct_error("reg_defFieldInvert");
-      reg_print_msg_error("The function has only been implemented for 3D deformation field yet");
-      reg_exit();
-   }
-
-   switch(inputDeformationField->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_defFieldInvert3D<float>
-            (inputDeformationField,outputDeformationField,tolerance);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_defFieldInvert3D<double>
-            (inputDeformationField,outputDeformationField,tolerance);
-   default:
-      reg_print_fct_error("reg_defFieldInvert");
-      reg_print_msg_error("Deformation field pixel type unsupported");
-      reg_exit();
-   }
+                        float tolerance) {
+    // Check the input image data types
+    if (inputDeformationField->datatype != outputDeformationField->datatype) {
+        reg_print_fct_error("reg_defFieldInvert");
+        reg_print_msg_error("Both deformation fields are expected to have the same data type");
+        reg_exit();
+    }
+
+    if (inputDeformationField->nu != 3) {
+        reg_print_fct_error("reg_defFieldInvert");
+        reg_print_msg_error("The function has only been implemented for 3D deformation field yet");
+        reg_exit();
+    }
+
+    switch (inputDeformationField->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_defFieldInvert3D<float>
+            (inputDeformationField, outputDeformationField, tolerance);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_defFieldInvert3D<double>
+            (inputDeformationField, outputDeformationField, tolerance);
+    default:
+        reg_print_fct_error("reg_defFieldInvert");
+        reg_print_msg_error("Deformation field pixel type unsupported");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
-//HAVE TO BE CHECKED
+// TODO: HAVE TO BE CHECKED
 template<class DataType>
 void reg_spline_cppComposition_2D(nifti_image *grid1,
                                   nifti_image *grid2,
                                   bool displacement1,
                                   bool displacement2,
-                                  bool bspline)
-{
-   // REMINDER Grid2(x)=Grid1(Grid2(x))
-
- #if _USE_SSE
-   union
-   {
-      __m128 m;
-      float f[4];
-   } val;
- #endif // _USE_SSE
-
-   DataType *outCPPPtrX = static_cast<DataType *>(grid2->data);
-   DataType *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)];
-
-   DataType *controlPointPtrX = static_cast<DataType *>(grid1->data);
-   DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)];
-
-   DataType basis;
-
- #ifdef _WIN32
-   __declspec(align(16)) DataType xBasis[4];
-   __declspec(align(16)) DataType yBasis[4];
- #if _USE_SSE
-   __declspec(align(16)) DataType xyBasis[16];
- #endif  //_USE_SSE
-
-   __declspec(align(16)) DataType xControlPointCoordinates[16];
-   __declspec(align(16)) DataType yControlPointCoordinates[16];
- #else // _WIN32
-   DataType xBasis[4] __attribute__((aligned(16)));
-   DataType yBasis[4] __attribute__((aligned(16)));
- #if _USE_SSE
-   DataType xyBasis[16] __attribute__((aligned(16)));
- #endif  //_USE_SSE
-
-   DataType xControlPointCoordinates[16] __attribute__((aligned(16)));
-   DataType yControlPointCoordinates[16] __attribute__((aligned(16)));
- #endif // _WIN32
-
-   size_t coord;
-
-   // read the xyz/ijk sform or qform, as appropriate
-   mat44 *matrix_real_to_voxel1=nullptr;
-   mat44 *matrix_voxel_to_real2=nullptr;
-   if(grid1->sform_code>0)
-      matrix_real_to_voxel1=&(grid1->sto_ijk);
-   else matrix_real_to_voxel1=&(grid1->qto_ijk);
-   if(grid2->sform_code>0)
-      matrix_voxel_to_real2=&(grid2->sto_xyz);
-   else matrix_voxel_to_real2=&(grid2->qto_xyz);
-
-   for(int y=0; y<grid2->ny; y++)
-   {
-      for(int x=0; x<grid2->nx; x++)
-      {
-
-         // Get the control point actual position
-         DataType xReal = *outCPPPtrX;
-         DataType yReal = *outCPPPtrY;
-         DataType initialX=xReal;
-         DataType initialY=yReal;
-         if(displacement2)
-         {
-            xReal +=
-                  matrix_voxel_to_real2->m[0][0]*x
-                  + matrix_voxel_to_real2->m[0][1]*y
-                  + matrix_voxel_to_real2->m[0][3];
-            yReal +=
-                  matrix_voxel_to_real2->m[1][0]*x
-                  + matrix_voxel_to_real2->m[1][1]*y
-                  + matrix_voxel_to_real2->m[1][3];
-         }
-
-         // Get the voxel based control point position in grid1
-         DataType xVoxel = matrix_real_to_voxel1->m[0][0]*xReal
-               + matrix_real_to_voxel1->m[0][1]*yReal
-               + matrix_real_to_voxel1->m[0][3];
-         DataType yVoxel = matrix_real_to_voxel1->m[1][0]*xReal
-               + matrix_real_to_voxel1->m[1][1]*yReal
-               + matrix_real_to_voxel1->m[1][3];
-
-         // The spline coefficients are computed
-         int xPre=(int)(reg_floor(xVoxel));
-         basis=(DataType)xVoxel-(DataType)xPre;
-         xPre--;
-         if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
-         else get_SplineBasisValues<DataType>(basis, xBasis);
-
-         int yPre=(int)(reg_floor(yVoxel));
-         basis=(DataType)yVoxel-(DataType)yPre;
-         yPre--;
-         if(basis<0) basis=0; //rounding error
-         if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-         else get_SplineBasisValues<DataType>(basis, yBasis);
-
-         // The control points are stored
-         get_GridValues<DataType>(xPre,
-                               yPre,
-                               grid1,
-                               controlPointPtrX,
-                               controlPointPtrY,
-                               xControlPointCoordinates,
-                               yControlPointCoordinates,
-                               false, // no approximation
-                               displacement1 // displacement field?
-                               );
-         xReal=0;
-         yReal=0;
- #if _USE_SSE
-         coord=0;
-         for(unsigned b=0; b<4; b++)
-         {
-            for(unsigned a=0; a<4; a++)
-            {
-               xyBasis[coord++] = xBasis[a] * yBasis[b];
+                                  bool bspline) {
+    // REMINDER Grid2(x)=Grid1(Grid2(x))
+
+#if _USE_SSE
+    union {
+        __m128 m;
+        float f[4];
+    } val;
+#endif // _USE_SSE
+
+    DataType *outCPPPtrX = static_cast<DataType*>(grid2->data);
+    DataType *outCPPPtrY = &outCPPPtrX[NiftiImage::calcVoxelNumber(grid2, 2)];
+
+    DataType *controlPointPtrX = static_cast<DataType*>(grid1->data);
+    DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(grid1, 2)];
+
+    DataType basis;
+
+#ifdef _WIN32
+    __declspec(align(16)) DataType xBasis[4];
+    __declspec(align(16)) DataType yBasis[4];
+#if _USE_SSE
+    __declspec(align(16)) DataType xyBasis[16];
+#endif  //_USE_SSE
+
+    __declspec(align(16)) DataType xControlPointCoordinates[16];
+    __declspec(align(16)) DataType yControlPointCoordinates[16];
+#else // _WIN32
+    DataType xBasis[4] __attribute__((aligned(16)));
+    DataType yBasis[4] __attribute__((aligned(16)));
+#if _USE_SSE
+    DataType xyBasis[16] __attribute__((aligned(16)));
+#endif  //_USE_SSE
+
+    DataType xControlPointCoordinates[16] __attribute__((aligned(16)));
+    DataType yControlPointCoordinates[16] __attribute__((aligned(16)));
+#endif // _WIN32
+
+    size_t coord;
+
+    // read the xyz/ijk sform or qform, as appropriate
+    const mat44 *matrix_real_to_voxel1, *matrix_voxel_to_real2;
+    if (grid1->sform_code > 0)
+        matrix_real_to_voxel1 = &grid1->sto_ijk;
+    else matrix_real_to_voxel1 = &grid1->qto_ijk;
+    if (grid2->sform_code > 0)
+        matrix_voxel_to_real2 = &grid2->sto_xyz;
+    else matrix_voxel_to_real2 = &grid2->qto_xyz;
+
+    for (int y = 0; y < grid2->ny; y++) {
+        for (int x = 0; x < grid2->nx; x++) {
+            // Get the control point actual position
+            DataType xReal = *outCPPPtrX;
+            DataType yReal = *outCPPPtrY;
+            DataType initialX = xReal;
+            DataType initialY = yReal;
+            if (displacement2) {
+                xReal +=
+                    matrix_voxel_to_real2->m[0][0] * x
+                    + matrix_voxel_to_real2->m[0][1] * y
+                    + matrix_voxel_to_real2->m[0][3];
+                yReal +=
+                    matrix_voxel_to_real2->m[1][0] * x
+                    + matrix_voxel_to_real2->m[1][1] * y
+                    + matrix_voxel_to_real2->m[1][3];
+            }
+
+            // Get the voxel based control point position in grid1
+            DataType xVoxel = matrix_real_to_voxel1->m[0][0] * xReal
+                + matrix_real_to_voxel1->m[0][1] * yReal
+                + matrix_real_to_voxel1->m[0][3];
+            DataType yVoxel = matrix_real_to_voxel1->m[1][0] * xReal
+                + matrix_real_to_voxel1->m[1][1] * yReal
+                + matrix_real_to_voxel1->m[1][3];
+
+            // The spline coefficients are computed
+            int xPre = (int)(reg_floor(xVoxel));
+            basis = xVoxel - static_cast<DataType>(xPre--);
+            if (basis < 0) basis = 0; //rounding error
+            if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+            else get_SplineBasisValues<DataType>(basis, xBasis);
+
+            int yPre = (int)(reg_floor(yVoxel));
+            basis = yVoxel - static_cast<DataType>(yPre--);
+            if (basis < 0) basis = 0; //rounding error
+            if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+            else get_SplineBasisValues<DataType>(basis, yBasis);
+
+            // The control points are stored
+            get_GridValues<DataType>(xPre,
+                                     yPre,
+                                     grid1,
+                                     controlPointPtrX,
+                                     controlPointPtrY,
+                                     xControlPointCoordinates,
+                                     yControlPointCoordinates,
+                                     false, // no approximation
+                                     displacement1); // displacement field?
+            xReal = 0;
+            yReal = 0;
+#if _USE_SSE
+            coord = 0;
+            for (unsigned b = 0; b < 4; b++) {
+                for (unsigned a = 0; a < 4; a++) {
+                    xyBasis[coord++] = xBasis[a] * yBasis[b];
+                }
+            }
+
+            __m128 tempX = _mm_set_ps1(0);
+            __m128 tempY = _mm_set_ps1(0);
+            __m128 *ptrX = (__m128*)&xControlPointCoordinates[0];
+            __m128 *ptrY = (__m128*)&yControlPointCoordinates[0];
+            __m128 *ptrBasis = (__m128*)&xyBasis[0];
+            //addition and multiplication of the 16 basis value and CP position for each axis
+            for (unsigned a = 0; a < 4; a++) {
+                tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX++), tempX);
+                tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY++), tempY);
+                ptrBasis++;
+            }
+            //the values stored in SSE variables are transferred to normal float
+            val.m = tempX;
+            xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+            val.m = tempY;
+            yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+#else
+            coord = 0;
+            for (unsigned b = 0; b < 4; b++) {
+                for (unsigned a = 0; a < 4; a++) {
+                    DataType tempValue = xBasis[a] * yBasis[b];
+                    xReal += xControlPointCoordinates[coord] * tempValue;
+                    yReal += yControlPointCoordinates[coord] * tempValue;
+                    coord++;
+                }
             }
-         }
-
-         __m128 tempX =  _mm_set_ps1(0);
-         __m128 tempY =  _mm_set_ps1(0);
-         __m128 *ptrX = (__m128 *) &xControlPointCoordinates[0];
-         __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0];
-         __m128 *ptrBasis   = (__m128 *) &xyBasis[0];
-         //addition and multiplication of the 16 basis value and CP position for each axis
-         for(unsigned a=0; a<4; a++)
-         {
-            tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX), tempX );
-            tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY), tempY );
-            ptrBasis++;
-            ptrX++;
-            ptrY++;
-         }
-         //the values stored in SSE variables are transferred to normal float
-         val.m = tempX;
-         xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-         val.m = tempY;
-         yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
- #else
-         coord=0;
-         for(unsigned b=0; b<4; b++)
-         {
-            for(unsigned a=0; a<4; a++)
-            {
-               DataType tempValue = xBasis[a] * yBasis[b];
-               xReal += xControlPointCoordinates[coord] * tempValue;
-               yReal += yControlPointCoordinates[coord] * tempValue;
-               coord++;
+#endif
+            if (displacement1) {
+                xReal += initialX;
+                yReal += initialY;
             }
-         }
- #endif
-         if(displacement1)
-         {
-            xReal += initialX;
-            yReal += initialY;
-         }
-         *outCPPPtrX++ = xReal;
-         *outCPPPtrY++ = yReal;
-      }
-   }
-   return;
+            *outCPPPtrX++ = xReal;
+            *outCPPPtrY++ = yReal;
+        }
+    }
 }
 /* *************************************************************** */
 //HAVE TO BE CHECKED
@@ -3604,1059 +3249,929 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                                   nifti_image *grid2,
                                   bool displacement1,
                                   bool displacement2,
-                                  bool bspline)
-{
-   // REMINDER Grid2(x)=Grid1(Grid2(x))
- #if _USE_SSE
-   union
-   {
-      __m128 m;
-      float f[4];
-   } val;
-   __m128 _xBasis_sse;
-   __m128 tempX;
-   __m128 tempY;
-   __m128 tempZ;
-   __m128 *ptrX;
-   __m128 *ptrY;
-   __m128 *ptrZ;
-   __m128 _yBasis_sse;
-   __m128 _zBasis_sse;
-   __m128 _temp_basis;
-   __m128 _basis;
- #else
-   int a, b, c;
-   size_t coord;
-   DataType tempValue;
- #endif
-
-   const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2);
-   DataType *outCPPPtrX = static_cast<DataType *>(grid2->data);
-   DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber];
-   DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber];
-
-   const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1);
-   DataType *controlPointPtrX = static_cast<DataType *>(grid1->data);
-   DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber];
-   DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber];
-
-   DataType basis;
-
- #ifdef _WIN32
-   __declspec(align(16)) DataType xBasis[4];
-   __declspec(align(16)) DataType yBasis[4];
-   __declspec(align(16)) DataType zBasis[4];
-   __declspec(align(16)) DataType xControlPointCoordinates[64];
-   __declspec(align(16)) DataType yControlPointCoordinates[64];
-   __declspec(align(16)) DataType zControlPointCoordinates[64];
- #else
-   DataType xBasis[4] __attribute__((aligned(16)));
-   DataType yBasis[4] __attribute__((aligned(16)));
-   DataType zBasis[4] __attribute__((aligned(16)));
-   DataType xControlPointCoordinates[64] __attribute__((aligned(16)));
-   DataType yControlPointCoordinates[64] __attribute__((aligned(16)));
-   DataType zControlPointCoordinates[64] __attribute__((aligned(16)));
- #endif
-
-   int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld;
-   int x, y, z;
-   size_t index;
-   DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ;
-   DataType xVoxel, yVoxel, zVoxel;
-
-   // read the xyz/ijk sform or qform, as appropriate
-   mat44 *matrix_real_to_voxel1=nullptr;
-   mat44 *matrix_voxel_to_real2=nullptr;
-   if(grid1->sform_code>0)
-      matrix_real_to_voxel1=&(grid1->sto_ijk);
-   else matrix_real_to_voxel1=&(grid1->qto_ijk);
-   if(grid2->sform_code>0)
-      matrix_voxel_to_real2=&(grid2->sto_xyz);
-   else matrix_voxel_to_real2=&(grid2->qto_xyz);
-
- #ifdef _OPENMP
- #ifdef _USE_SSE
- #pragma omp parallel for default(none) \
+                                  bool bspline) {
+    // REMINDER Grid2(x)=Grid1(Grid2(x))
+#if _USE_SSE
+    union {
+        __m128 m;
+        float f[4];
+    } val;
+    __m128 _xBasis_sse;
+    __m128 tempX;
+    __m128 tempY;
+    __m128 tempZ;
+    __m128 *ptrX;
+    __m128 *ptrY;
+    __m128 *ptrZ;
+    __m128 _yBasis_sse;
+    __m128 _zBasis_sse;
+    __m128 _temp_basis;
+    __m128 _basis;
+#else
+    int a, b, c;
+    size_t coord;
+    DataType tempValue;
+#endif
+
+    const size_t grid2VoxelNumber = NiftiImage::calcVoxelNumber(grid2, 3);
+    DataType *outCPPPtrX = static_cast<DataType*>(grid2->data);
+    DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber];
+    DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber];
+
+    const size_t grid1VoxelNumber = NiftiImage::calcVoxelNumber(grid1, 3);
+    DataType *controlPointPtrX = static_cast<DataType*>(grid1->data);
+    DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber];
+    DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber];
+
+    DataType basis;
+
+#ifdef _WIN32
+    __declspec(align(16)) DataType xBasis[4];
+    __declspec(align(16)) DataType yBasis[4];
+    __declspec(align(16)) DataType zBasis[4];
+    __declspec(align(16)) DataType xControlPointCoordinates[64];
+    __declspec(align(16)) DataType yControlPointCoordinates[64];
+    __declspec(align(16)) DataType zControlPointCoordinates[64];
+#else
+    DataType xBasis[4] __attribute__((aligned(16)));
+    DataType yBasis[4] __attribute__((aligned(16)));
+    DataType zBasis[4] __attribute__((aligned(16)));
+    DataType xControlPointCoordinates[64] __attribute__((aligned(16)));
+    DataType yControlPointCoordinates[64] __attribute__((aligned(16)));
+    DataType zControlPointCoordinates[64] __attribute__((aligned(16)));
+#endif
+
+    int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld;
+    int x, y, z;
+    size_t index;
+    DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ;
+    DataType xVoxel, yVoxel, zVoxel;
+
+    // read the xyz/ijk sform or qform, as appropriate
+    const mat44 *matrix_real_to_voxel1, *matrix_voxel_to_real2;
+    if (grid1->sform_code > 0)
+        matrix_real_to_voxel1 = &grid1->sto_ijk;
+    else matrix_real_to_voxel1 = &grid1->qto_ijk;
+    if (grid2->sform_code > 0)
+        matrix_voxel_to_real2 = &grid2->sto_xyz;
+    else matrix_voxel_to_real2 = &grid2->qto_xyz;
+
+#ifdef _OPENMP
+#ifdef _USE_SSE
+#pragma omp parallel for default(none) \
    shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \
    outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \
    private(xPre, xPreOld, yPre, yPreOld, zPre, zPreOld, val, index, \
-   x, y, z, xVoxel, yVoxel, zVoxel, basis, xBasis, yBasis, zBasis, \
+   x, y, xVoxel, yVoxel, zVoxel, basis, xBasis, yBasis, zBasis, \
    xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ, \
    _xBasis_sse, tempX, tempY, tempZ, ptrX, ptrY, ptrZ, _yBasis_sse, _zBasis_sse, _temp_basis, _basis, \
    xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates)
- #else
- #pragma omp parallel for default(none) \
+#else
+#pragma omp parallel for default(none) \
    shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \
    outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \
    private(xPre, xPreOld, yPre, yPreOld, zPre, zPreOld, index, \
-   x, y, z, xVoxel, yVoxel, zVoxel, a, b, c, coord, basis, tempValue, xBasis, yBasis, zBasis, \
+   x, y, xVoxel, yVoxel, zVoxel, a, b, c, coord, basis, tempValue, xBasis, yBasis, zBasis, \
    xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ, \
    xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates)
- #endif
- #endif
-   for(z=0; z<grid2->nz; z++)
-   {
-      xPreOld=99999;
-      yPreOld=99999;
-      zPreOld=99999;
-      index=z*grid2->nx*grid2->ny;
-      for(y=0; y<grid2->ny; y++)
-      {
-         for(x=0; x<grid2->nx; x++)
-         {
-            // Get the control point actual position
-            xReal = outCPPPtrX[index];
-            yReal = outCPPPtrY[index];
-            zReal = outCPPPtrZ[index];
-            initialPositionX=0;
-            initialPositionY=0;
-            initialPositionZ=0;
-            if(displacement2)
-            {
-               xReal += initialPositionX =
-                     matrix_voxel_to_real2->m[0][0]*x
-                     + matrix_voxel_to_real2->m[0][1]*y
-                     + matrix_voxel_to_real2->m[0][2]*z
-                     + matrix_voxel_to_real2->m[0][3];
-               yReal += initialPositionY =
-                     matrix_voxel_to_real2->m[1][0]*x
-                     + matrix_voxel_to_real2->m[1][1]*y
-                     + matrix_voxel_to_real2->m[1][2]*z
-                     + matrix_voxel_to_real2->m[1][3];
-               zReal += initialPositionZ =
-                     matrix_voxel_to_real2->m[2][0]*x
-                     + matrix_voxel_to_real2->m[2][1]*y
-                     + matrix_voxel_to_real2->m[2][2]*z
-                     + matrix_voxel_to_real2->m[2][3];
-            }
-
-            // Get the voxel based control point position in grid1
-            xVoxel =
-                  matrix_real_to_voxel1->m[0][0]*xReal
-                  + matrix_real_to_voxel1->m[0][1]*yReal
-                  + matrix_real_to_voxel1->m[0][2]*zReal
-                  + matrix_real_to_voxel1->m[0][3];
-            yVoxel =
-                  matrix_real_to_voxel1->m[1][0]*xReal
-                  + matrix_real_to_voxel1->m[1][1]*yReal
-                  + matrix_real_to_voxel1->m[1][2]*zReal
-                  + matrix_real_to_voxel1->m[1][3];
-            zVoxel =
-                  matrix_real_to_voxel1->m[2][0]*xReal
-                  + matrix_real_to_voxel1->m[2][1]*yReal
-                  + matrix_real_to_voxel1->m[2][2]*zReal
-                  + matrix_real_to_voxel1->m[2][3];
-
-            // The spline coefficients are computed
-            xPre=(int)(reg_floor(xVoxel));
-            basis=(DataType)xVoxel-(DataType)xPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
-            else get_SplineBasisValues<DataType>(basis, xBasis);
-
-            yPre=(int)(reg_floor(yVoxel));
-            basis=(DataType)yVoxel-(DataType)yPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-            else get_SplineBasisValues<DataType>(basis, yBasis);
-
-            zPre=(int)(reg_floor(zVoxel));
-            basis=(DataType)zVoxel-(DataType)zPre;
-            if(basis<0) basis=0; //rounding error
-            if(bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
-            else get_SplineBasisValues<DataType>(basis, zBasis);
-
-            --xPre;
-            --yPre;
-            --zPre;
-
-            // The control points are stored
-            if(xPre!=xPreOld || yPre!=yPreOld || zPre!=zPreOld)
-            {
-               get_GridValues(xPre,
-                              yPre,
-                              zPre,
-                              grid1,
-                              controlPointPtrX,
-                              controlPointPtrY,
-                              controlPointPtrZ,
-                              xControlPointCoordinates,
-                              yControlPointCoordinates,
-                              zControlPointCoordinates,
-                              false, // no approximation
-                              displacement1 // a displacement field?
-                              );
-               xPreOld=xPre;
-               yPreOld=yPre;
-               zPreOld=zPre;
-            }
-            xReal=0;
-            yReal=0;
-            zReal=0;
- #if _USE_SSE
-            val.f[0] = xBasis[0];
-            val.f[1] = xBasis[1];
-            val.f[2] = xBasis[2];
-            val.f[3] = xBasis[3];
-            _xBasis_sse = val.m;
-
-            tempX =  _mm_set_ps1(0);
-            tempY =  _mm_set_ps1(0);
-            tempZ =  _mm_set_ps1(0);
-            ptrX = (__m128 *) &xControlPointCoordinates[0];
-            ptrY = (__m128 *) &yControlPointCoordinates[0];
-            ptrZ = (__m128 *) &zControlPointCoordinates[0];
-
-            for(unsigned c=0; c<4; c++)
-            {
-               for(unsigned b=0; b<4; b++)
-               {
-                  _yBasis_sse  = _mm_set_ps1(yBasis[b]);
-                  _zBasis_sse  = _mm_set_ps1(zBasis[c]);
-                  _temp_basis   = _mm_mul_ps(_yBasis_sse, _zBasis_sse);
-                  _basis       = _mm_mul_ps(_temp_basis, _xBasis_sse);
-                  tempX = _mm_add_ps(_mm_mul_ps(_basis, *ptrX), tempX );
-                  tempY = _mm_add_ps(_mm_mul_ps(_basis, *ptrY), tempY );
-                  tempZ = _mm_add_ps(_mm_mul_ps(_basis, *ptrZ), tempZ );
-                  ptrX++;
-                  ptrY++;
-                  ptrZ++;
-               }
-            }
-            //the values stored in SSE variables are transferred to normal float
-            val.m = tempX;
-            xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-            val.m = tempY;
-            yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
-            val.m = tempZ;
-            zReal = val.f[0]+val.f[1]+val.f[2]+val.f[3];
- #else
-            coord=0;
-            for(c=0; c<4; c++)
-            {
-               for(b=0; b<4; b++)
-               {
-                  for(a=0; a<4; a++)
-                  {
-                     tempValue = xBasis[a] * yBasis[b] * zBasis[c];
-                     xReal += xControlPointCoordinates[coord] * tempValue;
-                     yReal += yControlPointCoordinates[coord] * tempValue;
-                     zReal += zControlPointCoordinates[coord] * tempValue;
-                     coord++;
-                  }
-               }
-            }
- #endif
-            if(displacement2)
-            {
-               xReal -= initialPositionX;
-               yReal -= initialPositionY;
-               zReal -= initialPositionZ;
+#endif
+#endif
+    for (z = 0; z < grid2->nz; z++) {
+        xPreOld = 99999;
+        yPreOld = 99999;
+        zPreOld = 99999;
+        index = z * grid2->nx * grid2->ny;
+        for (y = 0; y < grid2->ny; y++) {
+            for (x = 0; x < grid2->nx; x++) {
+                // Get the control point actual position
+                xReal = outCPPPtrX[index];
+                yReal = outCPPPtrY[index];
+                zReal = outCPPPtrZ[index];
+                initialPositionX = 0;
+                initialPositionY = 0;
+                initialPositionZ = 0;
+                if (displacement2) {
+                    xReal += initialPositionX =
+                        matrix_voxel_to_real2->m[0][0] * x
+                        + matrix_voxel_to_real2->m[0][1] * y
+                        + matrix_voxel_to_real2->m[0][2] * z
+                        + matrix_voxel_to_real2->m[0][3];
+                    yReal += initialPositionY =
+                        matrix_voxel_to_real2->m[1][0] * x
+                        + matrix_voxel_to_real2->m[1][1] * y
+                        + matrix_voxel_to_real2->m[1][2] * z
+                        + matrix_voxel_to_real2->m[1][3];
+                    zReal += initialPositionZ =
+                        matrix_voxel_to_real2->m[2][0] * x
+                        + matrix_voxel_to_real2->m[2][1] * y
+                        + matrix_voxel_to_real2->m[2][2] * z
+                        + matrix_voxel_to_real2->m[2][3];
+                }
+
+                // Get the voxel based control point position in grid1
+                xVoxel =
+                    matrix_real_to_voxel1->m[0][0] * xReal
+                    + matrix_real_to_voxel1->m[0][1] * yReal
+                    + matrix_real_to_voxel1->m[0][2] * zReal
+                    + matrix_real_to_voxel1->m[0][3];
+                yVoxel =
+                    matrix_real_to_voxel1->m[1][0] * xReal
+                    + matrix_real_to_voxel1->m[1][1] * yReal
+                    + matrix_real_to_voxel1->m[1][2] * zReal
+                    + matrix_real_to_voxel1->m[1][3];
+                zVoxel =
+                    matrix_real_to_voxel1->m[2][0] * xReal
+                    + matrix_real_to_voxel1->m[2][1] * yReal
+                    + matrix_real_to_voxel1->m[2][2] * zReal
+                    + matrix_real_to_voxel1->m[2][3];
+
+                // The spline coefficients are computed
+                xPre = (int)reg_floor(xVoxel);
+                basis = xVoxel - static_cast<DataType>(xPre--);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                else get_SplineBasisValues<DataType>(basis, xBasis);
+
+                yPre = (int)reg_floor(yVoxel);
+                basis = yVoxel - static_cast<DataType>(yPre--);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+                else get_SplineBasisValues<DataType>(basis, yBasis);
+
+                zPre = (int)reg_floor(zVoxel);
+                basis = zVoxel - static_cast<DataType>(zPre--);
+                if (basis < 0) basis = 0; //rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
+                else get_SplineBasisValues<DataType>(basis, zBasis);
+
+                // The control points are stored
+                if (xPre != xPreOld || yPre != yPreOld || zPre != zPreOld) {
+                    get_GridValues(xPre,
+                                   yPre,
+                                   zPre,
+                                   grid1,
+                                   controlPointPtrX,
+                                   controlPointPtrY,
+                                   controlPointPtrZ,
+                                   xControlPointCoordinates,
+                                   yControlPointCoordinates,
+                                   zControlPointCoordinates,
+                                   false, // no approximation
+                                   displacement1); // a displacement field?
+                    xPreOld = xPre;
+                    yPreOld = yPre;
+                    zPreOld = zPre;
+                }
+                xReal = 0;
+                yReal = 0;
+                zReal = 0;
+#if _USE_SSE
+                val.f[0] = static_cast<float>(xBasis[0]);
+                val.f[1] = static_cast<float>(xBasis[1]);
+                val.f[2] = static_cast<float>(xBasis[2]);
+                val.f[3] = static_cast<float>(xBasis[3]);
+                _xBasis_sse = val.m;
+
+                tempX = _mm_set_ps1(0);
+                tempY = _mm_set_ps1(0);
+                tempZ = _mm_set_ps1(0);
+                ptrX = (__m128*)&xControlPointCoordinates[0];
+                ptrY = (__m128*)&yControlPointCoordinates[0];
+                ptrZ = (__m128*)&zControlPointCoordinates[0];
+
+                for (unsigned c = 0; c < 4; c++) {
+                    for (unsigned b = 0; b < 4; b++) {
+                        _yBasis_sse = _mm_set_ps1(static_cast<float>(yBasis[b]));
+                        _zBasis_sse = _mm_set_ps1(static_cast<float>(zBasis[c]));
+                        _temp_basis = _mm_mul_ps(_yBasis_sse, _zBasis_sse);
+                        _basis = _mm_mul_ps(_temp_basis, _xBasis_sse);
+                        tempX = _mm_add_ps(_mm_mul_ps(_basis, *ptrX++), tempX);
+                        tempY = _mm_add_ps(_mm_mul_ps(_basis, *ptrY++), tempY);
+                        tempZ = _mm_add_ps(_mm_mul_ps(_basis, *ptrZ++), tempZ);
+                    }
+                }
+                //the values stored in SSE variables are transferred to normal float
+                val.m = tempX;
+                xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                val.m = tempY;
+                yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+                val.m = tempZ;
+                zReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
+#else
+                coord = 0;
+                for (c = 0; c < 4; c++) {
+                    for (b = 0; b < 4; b++) {
+                        for (a = 0; a < 4; a++) {
+                            tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                            xReal += xControlPointCoordinates[coord] * tempValue;
+                            yReal += yControlPointCoordinates[coord] * tempValue;
+                            zReal += zControlPointCoordinates[coord] * tempValue;
+                            coord++;
+                        }
+                    }
+                }
+#endif
+                if (displacement2) {
+                    xReal -= initialPositionX;
+                    yReal -= initialPositionY;
+                    zReal -= initialPositionZ;
+                }
+                outCPPPtrX[index] = xReal;
+                outCPPPtrY[index] = yReal;
+                outCPPPtrZ[index] = zReal;
+                index++;
             }
-            outCPPPtrX[index] = xReal;
-            outCPPPtrY[index] = yReal;
-            outCPPPtrZ[index] = zReal;
-            index++;
-         }
-      }
-   }
-   return;
+        }
+    }
 }
 /* *************************************************************** */
 int reg_spline_cppComposition(nifti_image *grid1,
                               nifti_image *grid2,
                               bool displacement1,
                               bool displacement2,
-                              bool bspline)
-{
-   // REMINDER Grid2(x)=Grid1(Grid2(x))
-
-   if(grid1->datatype != grid2->datatype)
-   {
-      reg_print_fct_error("reg_spline_cppComposition");
-      reg_print_msg_error("Both input images do not have the same type.");
-      reg_exit();
-   }
-
- #if _USE_SSE
-   if(grid1->datatype != NIFTI_TYPE_FLOAT32)
-   {
-      reg_print_fct_error("reg_spline_cppComposition");
-      reg_print_msg_error("SSE computation has only been implemented for single precision.");
-      reg_exit();
-   }
- #endif
-
-   if(grid1->nz>1)
-   {
-      switch(grid1->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_cppComposition_3D<float>
-               (grid1, grid2, displacement1, displacement2, bspline);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_cppComposition_3D<double>
-               (grid1, grid2, displacement1, displacement2, bspline);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_cppComposition");
-         reg_print_msg_error("Only implemented for single or double floating images");
-         reg_exit();
-      }
-   }
-   else
-   {
-      switch(grid1->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_spline_cppComposition_2D<float>
-               (grid1, grid2, displacement1, displacement2, bspline);
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_spline_cppComposition_2D<double>
-               (grid1, grid2, displacement1, displacement2, bspline);
-         break;
-      default:
-         reg_print_fct_error("reg_spline_cppComposition");
-         reg_print_msg_error("Only implemented for single or double floating images");
-         reg_exit();
-      }
-   }
-   return EXIT_SUCCESS;
+                              bool bspline) {
+    // REMINDER Grid2(x)=Grid1(Grid2(x))
+
+    if (grid1->datatype != grid2->datatype) {
+        reg_print_fct_error("reg_spline_cppComposition");
+        reg_print_msg_error("Both input images do not have the same type.");
+        reg_exit();
+    }
+
+#if _USE_SSE
+    if (grid1->datatype != NIFTI_TYPE_FLOAT32) {
+        reg_print_fct_error("reg_spline_cppComposition");
+        reg_print_msg_error("SSE computation has only been implemented for single precision.");
+        reg_exit();
+    }
+#endif
+
+    if (grid1->nz > 1) {
+        switch (grid1->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_cppComposition_3D<float>(grid1, grid2, displacement1, displacement2, bspline);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_cppComposition_3D<double>(grid1, grid2, displacement1, displacement2, bspline);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_cppComposition");
+            reg_print_msg_error("Only implemented for single or double floating images");
+            reg_exit();
+        }
+    } else {
+        switch (grid1->datatype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_spline_cppComposition_2D<float>(grid1, grid2, displacement1, displacement2, bspline);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_spline_cppComposition_2D<double>(grid1, grid2, displacement1, displacement2, bspline);
+            break;
+        default:
+            reg_print_fct_error("reg_spline_cppComposition");
+            reg_print_msg_error("Only implemented for single or double floating images");
+            reg_exit();
+        }
+    }
+    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
-                                             nifti_image *flowField)
-{
-   // Check first if the velocity field is actually a velocity field
-   if(velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
-   {
-      reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid");
-      reg_print_msg_error("The provide grid is not a velocity field");
-      reg_exit();
-   }
-
-   // Initialise the flow field with an identity transformation
-   reg_tools_multiplyValueToImage(flowField, flowField, 0.f);
-   flowField->intent_p1=DISP_VEL_FIELD;
-   reg_getDeformationFromDisplacement(flowField);
-
-   // fake the number of extension here to avoid the second half of the affine
-   int oldNumExt = velocityFieldGrid->num_ext;
-   if(oldNumExt>1)
-      velocityFieldGrid->num_ext=1;
-
-
-   // Copy over the number of required squaring steps
-   flowField->intent_p2=velocityFieldGrid->intent_p2;
-   // The initial flow field is generated using cubic B-Spline interpolation/approximation
-   reg_spline_getDeformationField(velocityFieldGrid,
-                                  flowField,
-                                  nullptr, // mask
-                                  true, //composition
-                                  true // bspline
-                                  );
-
-   velocityFieldGrid->num_ext=oldNumExt;
+                                             nifti_image *flowField) {
+    // Check first if the velocity field is actually a velocity field
+    if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) {
+        reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid");
+        reg_print_msg_error("The provide grid is not a velocity field");
+        reg_exit();
+    }
+
+    // Initialise the flow field with an identity transformation
+    reg_tools_multiplyValueToImage(flowField, flowField, 0.f);
+    flowField->intent_p1 = DISP_VEL_FIELD;
+    reg_getDeformationFromDisplacement(flowField);
+
+    // fake the number of extension here to avoid the second half of the affine
+    int oldNumExt = velocityFieldGrid->num_ext;
+    if (oldNumExt > 1)
+        velocityFieldGrid->num_ext = 1;
+
+
+    // Copy over the number of required squaring steps
+    flowField->intent_p2 = velocityFieldGrid->intent_p2;
+    // The initial flow field is generated using cubic B-Spline interpolation/approximation
+    reg_spline_getDeformationField(velocityFieldGrid,
+                                   flowField,
+                                   nullptr, // mask
+                                   true,  //composition
+                                   true); // bspline
+
+    velocityFieldGrid->num_ext = oldNumExt;
 }
 /* *************************************************************** */
 void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
                                                    nifti_image *deformationFieldImage,
-                                                   bool updateStepNumber)
-{
-   // Check first if the velocity field is actually a velocity field
-   if(flowFieldImage->intent_p1 != DEF_VEL_FIELD)
-   {
-      reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField");
-      reg_print_msg_error("The provide field is not a velocity field");
-      reg_exit();
-   }
-
-   // Remove the affine component from the flow field
-   nifti_image *affineOnly=nullptr;
-   if(flowFieldImage->num_ext>0)
-   {
-      if(flowFieldImage->ext_list[0].edata!=nullptr)
-      {
-         // Create a field that contains the affine component only
-         affineOnly = nifti_dup(*deformationFieldImage, false);
-         reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
-               affineOnly,
-               false);
-         reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage);
-      }
-   }
-   else reg_getDisplacementFromDeformation(flowFieldImage);
-
-   // Compute the number of scaling value to ensure unfolded transformation
-   int squaringNumber = 1;
-   if(updateStepNumber || flowFieldImage->intent_p2==0)
-   {
-      // Check the largest value
-      float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1));
-      float temp = reg_tools_getMaxValue(flowFieldImage, -1);
-      extrema=extrema>temp?extrema:temp;
-      // Check the values for scaling purpose
-      float maxLength;
-      if(deformationFieldImage->nz>1)
-         // 0.2888675 = sqrt(0.5^2/3)
-         maxLength=0.28;
-      // 0.3535533 = sqrt(0.5^2/2)
-      else maxLength=0.35;
-      while(true)
-      {
-         if( (extrema/pow(2.0f,squaringNumber)) >= maxLength)
-            squaringNumber++;
-         else break;
-      }
-      // The minimal number of step is set to 6 by default
-      squaringNumber=squaringNumber<6?6:squaringNumber;
-      // Set the number of squaring step in the flow field
-      if(fabs(flowFieldImage->intent_p2)!=squaringNumber)
-      {
-         char text[255];
-         sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)",
-                static_cast<int>(reg_round(fabs(flowFieldImage->intent_p2))),
-                abs(squaringNumber),
-                (int)pow(2.0f,squaringNumber));
-         reg_print_msg_warn(text);
-      }
-      // Update the number of squaring step required
-      if(flowFieldImage->intent_p2>=0)
-         flowFieldImage->intent_p2 = squaringNumber;
-      else flowFieldImage->intent_p2 = -squaringNumber;
-   }
-   else squaringNumber=static_cast<int>(fabsf(flowFieldImage->intent_p2));
-
-   // The displacement field is scaled
-   float scalingValue = pow(2.0f,std::abs((float)squaringNumber));
-   if(flowFieldImage->intent_p2<0)
-      // backward deformation field is scaled down
-      reg_tools_divideValueToImage(flowFieldImage,
-                                   flowFieldImage,
-                                   -scalingValue); // (/-scalingValue)
-   else
-      // forward deformation field is scaled down
-      reg_tools_divideValueToImage(flowFieldImage,
-                                   flowFieldImage,
-                                   scalingValue); // (/scalingValue)
-
-   // Conversion from displacement to deformation
-   reg_getDeformationFromDisplacement(flowFieldImage);
-
-   // The computed scaled deformation field is copied over
-   memcpy(deformationFieldImage->data, flowFieldImage->data,
-          deformationFieldImage->nvox*deformationFieldImage->nbyper);
-
-   // The deformation field is squared
-   for(unsigned short i=0; i<squaringNumber; ++i)
-   {
-      // The deformation field is applied to itself
-      reg_defField_compose(deformationFieldImage,
-                           flowFieldImage,
-                           nullptr);
-      // The computed scaled deformation field is copied over
-      memcpy(deformationFieldImage->data, flowFieldImage->data,
-             deformationFieldImage->nvox*deformationFieldImage->nbyper);
+                                                   bool updateStepNumber) {
+    // Check first if the velocity field is actually a velocity field
+    if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) {
+        reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField");
+        reg_print_msg_error("The provide field is not a velocity field");
+        reg_exit();
+    }
+
+    // Remove the affine component from the flow field
+    nifti_image *affineOnly = nullptr;
+    if (flowFieldImage->num_ext > 0) {
+        if (flowFieldImage->ext_list[0].edata != nullptr) {
+            // Create a field that contains the affine component only
+            affineOnly = nifti_dup(*deformationFieldImage, false);
+            reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[0].edata),
+                                           affineOnly,
+                                           false);
+            reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage);
+        }
+    } else reg_getDisplacementFromDeformation(flowFieldImage);
+
+    // Compute the number of scaling value to ensure unfolded transformation
+    int squaringNumber = 1;
+    if (updateStepNumber || flowFieldImage->intent_p2 == 0) {
+        // Check the largest value
+        float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1));
+        float temp = reg_tools_getMaxValue(flowFieldImage, -1);
+        extrema = extrema > temp ? extrema : temp;
+        // Check the values for scaling purpose
+        float maxLength;
+        if (deformationFieldImage->nz > 1)
+            // 0.2888675 = sqrt(0.5^2/3)
+            maxLength = 0.28f;
+        // 0.3535533 = sqrt(0.5^2/2)
+        else maxLength = 0.35f;
+        while (true) {
+            if ((extrema / pow(2.0f, squaringNumber)) >= maxLength)
+                squaringNumber++;
+            else break;
+        }
+        // The minimal number of step is set to 6 by default
+        squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
+        // Set the number of squaring step in the flow field
+        if (fabs(flowFieldImage->intent_p2) != squaringNumber) {
+            char text[255];
+            sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)",
+                    static_cast<int>(reg_round(fabs(flowFieldImage->intent_p2))),
+                    abs(squaringNumber),
+                    (int)pow(2.0f, squaringNumber));
+            reg_print_msg_warn(text);
+        }
+        // Update the number of squaring step required
+        if (flowFieldImage->intent_p2 >= 0)
+            flowFieldImage->intent_p2 = static_cast<float>(squaringNumber);
+        else flowFieldImage->intent_p2 = static_cast<float>(-squaringNumber);
+    } else squaringNumber = static_cast<int>(fabsf(flowFieldImage->intent_p2));
+
+    // The displacement field is scaled
+    float scalingValue = pow(2.0f, std::abs(static_cast<float>(squaringNumber)));
+    if (flowFieldImage->intent_p2 < 0)
+        // backward deformation field is scaled down
+        reg_tools_divideValueToImage(flowFieldImage,
+                                     flowFieldImage,
+                                     -scalingValue); // (/-scalingValue)
+    else
+        // forward deformation field is scaled down
+        reg_tools_divideValueToImage(flowFieldImage,
+                                     flowFieldImage,
+                                     scalingValue); // (/scalingValue)
+
+    // Conversion from displacement to deformation
+    reg_getDeformationFromDisplacement(flowFieldImage);
+
+    // The computed scaled deformation field is copied over
+    memcpy(deformationFieldImage->data, flowFieldImage->data,
+           deformationFieldImage->nvox * deformationFieldImage->nbyper);
+
+    // The deformation field is squared
+    for (unsigned short i = 0; i < squaringNumber; ++i) {
+        // The deformation field is applied to itself
+        reg_defField_compose(deformationFieldImage,
+                             flowFieldImage,
+                             nullptr);
+        // The computed scaled deformation field is copied over
+        memcpy(deformationFieldImage->data, flowFieldImage->data,
+               deformationFieldImage->nvox * deformationFieldImage->nbyper);
 #ifndef NDEBUG
-      char text[255];
-      sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber);
-      reg_print_msg_debug(text);
+        char text[255];
+        sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber);
+        reg_print_msg_debug(text);
 #endif
-   }
-   // The affine conponent of the transformation is restored
-   if(affineOnly!=nullptr)
-   {
-      reg_getDisplacementFromDeformation(deformationFieldImage);
-      reg_tools_addImageToImage(deformationFieldImage,affineOnly,deformationFieldImage);
-      nifti_image_free(affineOnly);
-      affineOnly=nullptr;
-   }
-   deformationFieldImage->intent_p1=DEF_FIELD;
-   deformationFieldImage->intent_p2=0;
-   // If required an affine component is composed
-   if(flowFieldImage->num_ext>1)
-   {
-      reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[1].edata),
-            deformationFieldImage,
-            true);
-   }
+    }
+    // The affine conponent of the transformation is restored
+    if (affineOnly != nullptr) {
+        reg_getDisplacementFromDeformation(deformationFieldImage);
+        reg_tools_addImageToImage(deformationFieldImage, affineOnly, deformationFieldImage);
+        nifti_image_free(affineOnly);
+        affineOnly = nullptr;
+    }
+    deformationFieldImage->intent_p1 = DEF_FIELD;
+    deformationFieldImage->intent_p2 = 0;
+    // If required an affine component is composed
+    if (flowFieldImage->num_ext > 1) {
+        reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[1].edata), deformationFieldImage, true);
+    }
 }
 /* *************************************************************** */
 void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                             nifti_image *deformationFieldImage,
-                                            bool updateStepNumber)
-{
-   // Clean any extension in the deformation field as it is unexpected
-   nifti_free_extensions(deformationFieldImage);
-
-   // Check if the velocity field is actually a velocity field
-   if(velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID)
-   {
-      // Use the spline approximation to generate the deformation field
-      reg_spline_getDeformationField(velocityFieldGrid,
-                                     deformationFieldImage,
-                                     nullptr,
-                                     false, // composition
-                                     true // bspline
-                                     );
-   }
-   else if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID)
-   {
-      // Create an image to store the flow field
-      nifti_image *flowField = nifti_dup(*deformationFieldImage, false);
-      flowField->intent_code=NIFTI_INTENT_VECTOR;
-      memset(flowField->intent_name, 0, 16);
-      strcpy(flowField->intent_name,"NREG_TRANS");
-      flowField->intent_p1=DEF_VEL_FIELD;
-      flowField->intent_p2=velocityFieldGrid->intent_p2;
-      if(velocityFieldGrid->num_ext>0)
-         nifti_copy_extensions(flowField, velocityFieldGrid);
-
-      // Generate the velocity field
-      reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid,
-                                              flowField);
-      // Exponentiate the flow field
-      reg_defField_getDeformationFieldFromFlowField(flowField,
-                                                    deformationFieldImage,
-                                                    updateStepNumber);
-      // Update the number of step required. No action otherwise
-      velocityFieldGrid->intent_p2=flowField->intent_p2;
-      // Deallocate the allocated flow field
-      nifti_image_free(flowField);
-   }
-   else
-   {
-      reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid");
-      reg_print_msg_error("The provided input image is not a spline parametrised transformation");
-      reg_exit();
-   }
-   return;
+                                            bool updateStepNumber) {
+    // Clean any extension in the deformation field as it is unexpected
+    nifti_free_extensions(deformationFieldImage);
+
+    // Check if the velocity field is actually a velocity field
+    if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) {
+        // Use the spline approximation to generate the deformation field
+        reg_spline_getDeformationField(velocityFieldGrid,
+                                       deformationFieldImage,
+                                       nullptr,
+                                       false, // composition
+                                       true); // bspline
+    } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
+        // Create an image to store the flow field
+        nifti_image *flowField = nifti_dup(*deformationFieldImage, false);
+        flowField->intent_code = NIFTI_INTENT_VECTOR;
+        memset(flowField->intent_name, 0, 16);
+        strcpy(flowField->intent_name, "NREG_TRANS");
+        flowField->intent_p1 = DEF_VEL_FIELD;
+        flowField->intent_p2 = velocityFieldGrid->intent_p2;
+        if (velocityFieldGrid->num_ext > 0)
+            nifti_copy_extensions(flowField, velocityFieldGrid);
+
+        // Generate the velocity field
+        reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField);
+        // Exponentiate the flow field
+        reg_defField_getDeformationFieldFromFlowField(flowField, deformationFieldImage, updateStepNumber);
+        // Update the number of step required. No action otherwise
+        velocityFieldGrid->intent_p2 = flowField->intent_p2;
+        // Deallocate the allocated flow field
+        nifti_image_free(flowField);
+    } else {
+        reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid");
+        reg_print_msg_error("The provided input image is not a spline parametrised transformation");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
-                                                   nifti_image **deformationFieldImage)
-{
-   // Check if the velocity field is actually a velocity field
-   if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID)
-   {
-      // Create an image to store the flow field
-      nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false);
-      flowFieldImage->intent_code=NIFTI_INTENT_VECTOR;
-      memset(flowFieldImage->intent_name, 0, 16);
-      strcpy(flowFieldImage->intent_name,"NREG_TRANS");
-      flowFieldImage->intent_p1=DEF_VEL_FIELD;
-      flowFieldImage->intent_p2=velocityFieldGrid->intent_p2;
-      if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==nullptr)
-         nifti_copy_extensions(flowFieldImage, velocityFieldGrid);
-
-      // Generate the velocity field
-      reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid,
-                                              flowFieldImage);
-      // Remove the affine component from the flow field
-      nifti_image *affineOnly=nullptr;
-      if(flowFieldImage->num_ext>0)
-      {
-         if(flowFieldImage->ext_list[0].edata!=nullptr)
-         {
-            // Create a field that contains the affine component only
-            affineOnly = nifti_dup(*deformationFieldImage[0], false);
-            reg_affine_getDeformationField(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata),
-                  affineOnly,
-                  false);
-            reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage);
-         }
-      }
-      else reg_getDisplacementFromDeformation(flowFieldImage);
-
-      // Compute the number of scaling value to ensure unfolded transformation
-      int squaringNumber = static_cast<int>(fabsf(velocityFieldGrid->intent_p2));
-
-      // The displacement field is scaled
-      float scalingValue = pow(2.0f,std::abs((float)squaringNumber));
-      if(velocityFieldGrid->intent_p2<0)
-         // backward deformation field is scaled down
-         reg_tools_divideValueToImage(flowFieldImage,
-                                      deformationFieldImage[0],
-                                      -scalingValue); // (/-scalingValue)
-      else
-         // forward deformation field is scaled down
-         reg_tools_divideValueToImage(flowFieldImage,
-                                      deformationFieldImage[0],
-                                      scalingValue); // (/scalingValue)
-
-      // Deallocate the allocated flow field
-      nifti_image_free(flowFieldImage);
-      flowFieldImage=nullptr;
-
-      // Conversion from displacement to deformation
-      reg_getDeformationFromDisplacement(deformationFieldImage[0]);
-
-      // The deformation field is squared
-      for(unsigned short i=0; i<squaringNumber; ++i)
-      {
-         // The computed scaled deformation field is copied over
-         memcpy(deformationFieldImage[i+1]->data, deformationFieldImage[i]->data,
-               deformationFieldImage[i]->nvox*deformationFieldImage[i]->nbyper);
-         // The deformation field is applied to itself
-         reg_defField_compose(deformationFieldImage[i], // to apply
-                              deformationFieldImage[i+1], // to update
-               nullptr);
-   #ifndef NDEBUG
-         char text[255];
-         sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber);
-         reg_print_msg_debug(text);
-   #endif
-      }
-      // The affine conponent of the transformation is restored
-      if(affineOnly!=nullptr)
-      {
-         for(unsigned short i=0; i<=squaringNumber; ++i){
-            reg_getDisplacementFromDeformation(deformationFieldImage[i]);
-            reg_tools_addImageToImage(deformationFieldImage[i],affineOnly,deformationFieldImage[i]);
-            deformationFieldImage[i]->intent_p1=DEF_FIELD;
-            deformationFieldImage[i]->intent_p2=0;
-         }
-         nifti_image_free(affineOnly);
-         affineOnly=nullptr;
-      }
-      // If required an affine component is composed
-      if(velocityFieldGrid->num_ext>1)
-      {
-         for(unsigned short i=0; i<=squaringNumber; ++i){
-            reg_affine_getDeformationField(reinterpret_cast<mat44 *>(velocityFieldGrid->ext_list[1].edata),
-                  deformationFieldImage[i],
-                  true);
-         }
-      }
-   }
-   else
-   {
-      reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid");
-      reg_print_msg_error("The provided input image is not a spline parametrised transformation");
-      reg_exit();
-   }
-   return;
+                                                   nifti_image **deformationFieldImage) {
+    // Check if the velocity field is actually a velocity field
+    if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
+        // Create an image to store the flow field
+        nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false);
+        flowFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+        memset(flowFieldImage->intent_name, 0, 16);
+        strcpy(flowFieldImage->intent_name, "NREG_TRANS");
+        flowFieldImage->intent_p1 = DEF_VEL_FIELD;
+        flowFieldImage->intent_p2 = velocityFieldGrid->intent_p2;
+        if (velocityFieldGrid->num_ext > 0 && flowFieldImage->ext_list == nullptr)
+            nifti_copy_extensions(flowFieldImage, velocityFieldGrid);
+
+        // Generate the velocity field
+        reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowFieldImage);
+        // Remove the affine component from the flow field
+        nifti_image *affineOnly = nullptr;
+        if (flowFieldImage->num_ext > 0) {
+            if (flowFieldImage->ext_list[0].edata != nullptr) {
+                // Create a field that contains the affine component only
+                affineOnly = nifti_dup(*deformationFieldImage[0], false);
+                reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[0].edata), affineOnly, false);
+                reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage);
+            }
+        } else reg_getDisplacementFromDeformation(flowFieldImage);
+
+        // Compute the number of scaling value to ensure unfolded transformation
+        int squaringNumber = static_cast<int>(fabsf(velocityFieldGrid->intent_p2));
+
+        // The displacement field is scaled
+        float scalingValue = pow(2.0f, std::abs((float)squaringNumber));
+        if (velocityFieldGrid->intent_p2 < 0)
+            // backward deformation field is scaled down
+            reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], -scalingValue);
+        else
+            // forward deformation field is scaled down
+            reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], scalingValue);
+
+        // Deallocate the allocated flow field
+        nifti_image_free(flowFieldImage);
+        flowFieldImage = nullptr;
+
+        // Conversion from displacement to deformation
+        reg_getDeformationFromDisplacement(deformationFieldImage[0]);
+
+        // The deformation field is squared
+        for (unsigned short i = 0; i < squaringNumber; ++i) {
+            // The computed scaled deformation field is copied over
+            memcpy(deformationFieldImage[i + 1]->data, deformationFieldImage[i]->data,
+                   deformationFieldImage[i]->nvox * deformationFieldImage[i]->nbyper);
+            // The deformation field is applied to itself
+            reg_defField_compose(deformationFieldImage[i], // to apply
+                                 deformationFieldImage[i + 1], // to update
+                                 nullptr);
+#ifndef NDEBUG
+            char text[255];
+            sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber);
+            reg_print_msg_debug(text);
+#endif
+        }
+        // The affine conponent of the transformation is restored
+        if (affineOnly != nullptr) {
+            for (unsigned short i = 0; i <= squaringNumber; ++i) {
+                reg_getDisplacementFromDeformation(deformationFieldImage[i]);
+                reg_tools_addImageToImage(deformationFieldImage[i], affineOnly, deformationFieldImage[i]);
+                deformationFieldImage[i]->intent_p1 = DEF_FIELD;
+                deformationFieldImage[i]->intent_p2 = 0;
+            }
+            nifti_image_free(affineOnly);
+            affineOnly = nullptr;
+        }
+        // If required an affine component is composed
+        if (velocityFieldGrid->num_ext > 1) {
+            for (unsigned short i = 0; i <= squaringNumber; ++i) {
+                reg_affine_getDeformationField(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
+                                               deformationFieldImage[i],
+                                               true);
+            }
+        }
+    } else {
+        reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid");
+        reg_print_msg_error("The provided input image is not a spline parametrised transformation");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void compute_lie_bracket(nifti_image *img1,
                          nifti_image *img2,
                          nifti_image *res,
-                         bool use_jac
-                         )
-{
-   reg_print_msg_error("The compute_lie_bracket function needs updating");
-   reg_exit();
- #ifdef _WIN32
-   long voxNumber=(long)CalcVoxelNumber(*img1);
- #else
-   size_t voxNumber=CalcVoxelNumber(*img1);
- #endif
-   // Lie bracket using Jacobian for testing
-   if(use_jac)
-   {
-      mat33 *jacImg1=(mat33 *)malloc(voxNumber*sizeof(mat33));
-      mat33 *jacImg2=(mat33 *)malloc(voxNumber*sizeof(mat33));
-
-      reg_getDeformationFromDisplacement(img1);
-      reg_getDeformationFromDisplacement(img2);
-      // HERE TO DO
-      reg_exit();
-      //        reg_spline_GetJacobianMatrixFull(img1,img1,jacImg1);
-      //        reg_spline_GetJacobianMatrixFull(img2,img2,jacImg2);
-      reg_getDisplacementFromDeformation(img1);
-      reg_getDisplacementFromDeformation(img2);
-
-      DataType *resPtrX=static_cast<DataType *>(res->data);
-      DataType *resPtrY=&resPtrX[voxNumber];
-      DataType *img1DispPtrX=static_cast<DataType *>(img1->data);
-      DataType *img1DispPtrY=&img1DispPtrX[voxNumber];
-      DataType *img2DispPtrX=static_cast<DataType *>(img2->data);
-      DataType *img2DispPtrY=&img1DispPtrX[voxNumber];
-      if(img1->nz>1)
-      {
-         DataType *resPtrZ=&resPtrY[voxNumber];
-         DataType *img1DispPtrZ=&img1DispPtrY[voxNumber];
-         DataType *img2DispPtrZ=&img1DispPtrY[voxNumber];
-
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            resPtrX[i]=
-                  (jacImg2[i].m[0][0]*img1DispPtrX[i] +
-                  jacImg2[i].m[0][1]*img1DispPtrY[i] +
-                  jacImg2[i].m[0][2]*img1DispPtrZ[i] )
-                  -
-                  (jacImg1[i].m[0][0]*img2DispPtrX[i] +
-                  jacImg1[i].m[0][1]*img2DispPtrY[i] +
-                  jacImg1[i].m[0][2]*img2DispPtrZ[i] );
-            resPtrY[i]=
-                  (jacImg2[i].m[1][0]*img1DispPtrX[i] +
-                  jacImg2[i].m[1][1]*img1DispPtrY[i] +
-                  jacImg2[i].m[1][2]*img1DispPtrZ[i] )
-                  -
-                  (jacImg1[i].m[1][0]*img2DispPtrX[i] +
-                  jacImg1[i].m[1][1]*img2DispPtrY[i] +
-                  jacImg1[i].m[1][2]*img2DispPtrZ[i] );
-            resPtrZ[i]=
-                  (jacImg2[i].m[2][0]*img1DispPtrX[i] +
-                  jacImg2[i].m[2][1]*img1DispPtrY[i] +
-                  jacImg2[i].m[2][2]*img1DispPtrZ[i] )
-                  -
-                  (jacImg1[i].m[2][0]*img2DispPtrX[i] +
-                  jacImg1[i].m[2][1]*img2DispPtrY[i] +
-                  jacImg1[i].m[2][2]*img2DispPtrZ[i] );
-         }
-      }
-      else
-      {
-         for(size_t i=0; i<voxNumber; ++i)
-         {
-            resPtrX[i]=
-                  (jacImg2[i].m[0][0]*img1DispPtrX[i] +
-                  jacImg2[i].m[0][1]*img1DispPtrY[i] )
-                  -
-                  (jacImg1[i].m[0][0]*img2DispPtrX[i] +
-                  jacImg1[i].m[0][1]*img2DispPtrY[i] );
-            resPtrY[i]=
-                  (jacImg2[i].m[1][0]*img1DispPtrX[i] +
-                  jacImg2[i].m[1][1]*img1DispPtrY[i] )
-                  -
-                  (jacImg1[i].m[1][0]*img2DispPtrX[i] +
-                  jacImg1[i].m[1][1]*img2DispPtrY[i] );
-         }
-      }
-      free(jacImg1);
-      free(jacImg2);
-      return;
-   }
-
-
-   // Allocate two temporary nifti images and set them to zero displacement
-   nifti_image *one_two = nifti_dup(*img2, false);
-   nifti_image *two_one = nifti_dup(*img1, false);
-   // Compute the displacement from img1
-   reg_spline_cppComposition(img1,
-                             two_one,
-                             true, // displacement1?
-                             true, // displacement2?
-                             true // bspline?
-                             );
-   // Compute the displacement from img2
-   reg_spline_cppComposition(img2,
-                             one_two,
-                             true, // displacement1?
-                             true, // displacement2?
-                             true // bspline?
-                             );
-   // Compose both transformations
-   reg_spline_cppComposition(img1,
-                             one_two,
-                             true, // displacement1?
-                             true, // displacement2?
-                             true // bspline?
-                             );
-   // Compose both transformations
-   reg_spline_cppComposition(img2,
-                             two_one,
-                             true, // displacement1?
-                             true, // displacement2?
-                             true // bspline?
-                             );
-   // Create the data pointers
-   DataType *resPtr=static_cast<DataType *>(res->data);
-   DataType *one_twoPtr=static_cast<DataType *>(one_two->data);
-   DataType *two_onePtr=static_cast<DataType *>(two_one->data);
-   // Compute the lie bracket value using difference of composition
-
- #ifdef _WIN32
-   long i;
-   voxNumber=(long)res->nvox;
- #else
-   size_t i;
-   voxNumber=res->nvox;
- #endif
-
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxNumber, resPtr, one_twoPtr, two_onePtr) \
-   private(i)
- #endif
-   for(i=0; i<voxNumber; ++i)
-      resPtr[i]=two_onePtr[i]-one_twoPtr[i];
-   // Free the temporary nifti images
-   nifti_image_free(one_two);
-   nifti_image_free(two_one);
+                         bool use_jac) {
+    reg_print_msg_error("The compute_lie_bracket function needs updating");
+    reg_exit();
+#ifdef _WIN32
+    long voxNumber = (long)NiftiImage::calcVoxelNumber(img1, 3);
+#else
+    size_t voxNumber = NiftiImage::calcVoxelNumber(img1, 3);
+#endif
+    // Lie bracket using Jacobian for testing
+    if (use_jac) {
+        mat33 *jacImg1 = (mat33*)malloc(voxNumber * sizeof(mat33));
+        mat33 *jacImg2 = (mat33*)malloc(voxNumber * sizeof(mat33));
+
+        reg_getDeformationFromDisplacement(img1);
+        reg_getDeformationFromDisplacement(img2);
+        // HERE TO DO
+        reg_exit();
+        //        reg_spline_GetJacobianMatrixFull(img1,img1,jacImg1);
+        //        reg_spline_GetJacobianMatrixFull(img2,img2,jacImg2);
+        reg_getDisplacementFromDeformation(img1);
+        reg_getDisplacementFromDeformation(img2);
+
+        DataType *resPtrX = static_cast<DataType*>(res->data);
+        DataType *resPtrY = &resPtrX[voxNumber];
+        DataType *img1DispPtrX = static_cast<DataType*>(img1->data);
+        DataType *img1DispPtrY = &img1DispPtrX[voxNumber];
+        DataType *img2DispPtrX = static_cast<DataType*>(img2->data);
+        DataType *img2DispPtrY = &img1DispPtrX[voxNumber];
+        if (img1->nz > 1) {
+            DataType *resPtrZ = &resPtrY[voxNumber];
+            DataType *img1DispPtrZ = &img1DispPtrY[voxNumber];
+            DataType *img2DispPtrZ = &img1DispPtrY[voxNumber];
+
+            for (size_t i = 0; i < voxNumber; ++i) {
+                resPtrX[i] =
+                    (jacImg2[i].m[0][0] * img1DispPtrX[i] +
+                     jacImg2[i].m[0][1] * img1DispPtrY[i] +
+                     jacImg2[i].m[0][2] * img1DispPtrZ[i])
+                    -
+                    (jacImg1[i].m[0][0] * img2DispPtrX[i] +
+                     jacImg1[i].m[0][1] * img2DispPtrY[i] +
+                     jacImg1[i].m[0][2] * img2DispPtrZ[i]);
+                resPtrY[i] =
+                    (jacImg2[i].m[1][0] * img1DispPtrX[i] +
+                     jacImg2[i].m[1][1] * img1DispPtrY[i] +
+                     jacImg2[i].m[1][2] * img1DispPtrZ[i])
+                    -
+                    (jacImg1[i].m[1][0] * img2DispPtrX[i] +
+                     jacImg1[i].m[1][1] * img2DispPtrY[i] +
+                     jacImg1[i].m[1][2] * img2DispPtrZ[i]);
+                resPtrZ[i] =
+                    (jacImg2[i].m[2][0] * img1DispPtrX[i] +
+                     jacImg2[i].m[2][1] * img1DispPtrY[i] +
+                     jacImg2[i].m[2][2] * img1DispPtrZ[i])
+                    -
+                    (jacImg1[i].m[2][0] * img2DispPtrX[i] +
+                     jacImg1[i].m[2][1] * img2DispPtrY[i] +
+                     jacImg1[i].m[2][2] * img2DispPtrZ[i]);
+            }
+        } else {
+            for (size_t i = 0; i < voxNumber; ++i) {
+                resPtrX[i] =
+                    (jacImg2[i].m[0][0] * img1DispPtrX[i] +
+                     jacImg2[i].m[0][1] * img1DispPtrY[i])
+                    -
+                    (jacImg1[i].m[0][0] * img2DispPtrX[i] +
+                     jacImg1[i].m[0][1] * img2DispPtrY[i]);
+                resPtrY[i] =
+                    (jacImg2[i].m[1][0] * img1DispPtrX[i] +
+                     jacImg2[i].m[1][1] * img1DispPtrY[i])
+                    -
+                    (jacImg1[i].m[1][0] * img2DispPtrX[i] +
+                     jacImg1[i].m[1][1] * img2DispPtrY[i]);
+            }
+        }
+        free(jacImg1);
+        free(jacImg2);
+        return;
+    }
+
+
+    // Allocate two temporary nifti images and set them to zero displacement
+    nifti_image *one_two = nifti_dup(*img2, false);
+    nifti_image *two_one = nifti_dup(*img1, false);
+    // Compute the displacement from img1
+    reg_spline_cppComposition(img1,
+                              two_one,
+                              true,  // displacement1?
+                              true,  // displacement2?
+                              true); // bspline?
+    // Compute the displacement from img2
+    reg_spline_cppComposition(img2,
+                              one_two,
+                              true,  // displacement1?
+                              true,  // displacement2?
+                              true); // bspline?
+    // Compose both transformations
+    reg_spline_cppComposition(img1,
+                              one_two,
+                              true,  // displacement1?
+                              true,  // displacement2?
+                              true); // bspline?
+    // Compose both transformations
+    reg_spline_cppComposition(img2,
+                              two_one,
+                              true,  // displacement1?
+                              true,  // displacement2?
+                              true); // bspline?
+    // Create the data pointers
+    DataType *resPtr = static_cast<DataType*>(res->data);
+    DataType *one_twoPtr = static_cast<DataType*>(one_two->data);
+    DataType *two_onePtr = static_cast<DataType*>(two_one->data);
+    // Compute the lie bracket value using difference of composition
+
+#ifdef _WIN32
+    long i;
+    voxNumber = (long)res->nvox;
+#else
+    size_t i;
+    voxNumber = res->nvox;
+#endif
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxNumber, resPtr, one_twoPtr, two_onePtr)
+#endif
+    for (i = 0; i < voxNumber; ++i)
+        resPtr[i] = two_onePtr[i] - one_twoPtr[i];
+    // Free the temporary nifti images
+    nifti_image_free(one_two);
+    nifti_image_free(two_one);
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
-void compute_BCH_update1(nifti_image *img1, // current field
+void compute_BCH_update(nifti_image *img1, // current field
                          nifti_image *img2, // gradient
-                         int type)
-{
-   // To update
-   reg_print_msg_error("The compute_BCH_update function needs updating");
-   reg_exit();
-   DataType *res=(DataType *)malloc(img1->nvox*sizeof(DataType));
-
- #ifdef _WIN32
-   long i;
-   long voxelNumber=(long)img1->nvox;
- #else
-   size_t i;
-   size_t voxelNumber=img1->nvox;
- #endif
-
-   bool use_jac=false;
-
-   // r <- 2 + 1
-   DataType *img1Ptr=static_cast<DataType *>(img1->data);
-   DataType *img2Ptr=static_cast<DataType *>(img2->data);
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxelNumber,img1Ptr,img2Ptr, res) \
-   private(i)
- #endif
-   for(i=0; i<voxelNumber; ++i)
-      res[i] = img1Ptr[i] + img2Ptr[i];
-
-   if(type>0)
-   {
-      // Convert the deformation field into a displacement field
-      reg_getDisplacementFromDeformation(img1);
-
-      // r <- 2 + 1 + 0.5[2,1]
-      nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false);
-      compute_lie_bracket<DataType>(img2, img1, lie_bracket_img2_img1, use_jac);
-      DataType *lie_bracket_img2_img1Ptr=static_cast<DataType *>(lie_bracket_img2_img1->data);
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxelNumber, res, lie_bracket_img2_img1Ptr) \
-   private(i)
- #endif
-      for(i=0; i<voxelNumber; ++i)
-         res[i] += 0.5 * lie_bracket_img2_img1Ptr[i];
-
-      if(type>1)
-      {
-         // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12
-         nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
-         compute_lie_bracket<DataType>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
-         DataType *lie_bracket_img2_lie1Ptr=static_cast<DataType *>(lie_bracket_img2_lie1->data);
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxelNumber, res, lie_bracket_img2_lie1Ptr) \
-   private(i)
- #endif
-         for(i=0; i<voxelNumber; ++i)
-            res[i] += lie_bracket_img2_lie1Ptr[i]/12.0;
-
-         if(type>2)
-         {
-            // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12
-            nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
-            compute_lie_bracket<DataType>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
-            DataType *lie_bracket_img1_lie1Ptr=static_cast<DataType *>(lie_bracket_img1_lie1->data);
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxelNumber, res, lie_bracket_img1_lie1Ptr) \
-   private(i)
- #endif
-            for(i=0; i<voxelNumber; ++i)
-               res[i] -= lie_bracket_img1_lie1Ptr[i]/12.0;
-            nifti_image_free(lie_bracket_img1_lie1);
-
-            if(type>3)
-            {
-               // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24
-               nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false);
-               compute_lie_bracket<DataType>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
-               DataType *lie_bracket_img1_lie2Ptr=static_cast<DataType *>(lie_bracket_img1_lie2->data);
- #ifdef _OPENMP
- #pragma omp parallel for default(none) \
-   shared(voxelNumber, res, lie_bracket_img1_lie2Ptr) \
-   private(i)
- #endif
-               for(i=0; i<voxelNumber; ++i)
-                  res[i] -= lie_bracket_img1_lie2Ptr[i]/24.0;
-               nifti_image_free(lie_bracket_img1_lie2);
-            }// >3
-         }// >2
-         nifti_image_free(lie_bracket_img2_lie1);
-      }// >1
-      nifti_image_free(lie_bracket_img2_img1);
-   }// >0
-
-   // update the deformation field
-   memcpy(img1->data, res, img1->nvox*img1->nbyper);
-   free(res);
+                         int type) {
+    // To update
+    reg_print_msg_error("The compute_BCH_update function needs updating");
+    reg_exit();
+    DataType *res = (DataType*)malloc(img1->nvox * sizeof(DataType));
+
+#ifdef _WIN32
+    long i;
+    long voxelNumber = (long)img1->nvox;
+#else
+    size_t i;
+    size_t voxelNumber = img1->nvox;
+#endif
+
+    bool use_jac = false;
+
+    // r <- 2 + 1
+    DataType *img1Ptr = static_cast<DataType*>(img1->data);
+    DataType *img2Ptr = static_cast<DataType*>(img2->data);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxelNumber,img1Ptr,img2Ptr, res)
+#endif
+    for (i = 0; i < voxelNumber; ++i)
+        res[i] = img1Ptr[i] + img2Ptr[i];
+
+    if (type > 0) {
+        // Convert the deformation field into a displacement field
+        reg_getDisplacementFromDeformation(img1);
+
+        // r <- 2 + 1 + 0.5[2,1]
+        nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false);
+        compute_lie_bracket<DataType>(img2, img1, lie_bracket_img2_img1, use_jac);
+        DataType *lie_bracket_img2_img1Ptr = static_cast<DataType*>(lie_bracket_img2_img1->data);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxelNumber, res, lie_bracket_img2_img1Ptr)
+#endif
+        for (i = 0; i < voxelNumber; ++i)
+            res[i] += 0.5f * lie_bracket_img2_img1Ptr[i];
+
+        if (type > 1) {
+            // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12
+            nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
+            compute_lie_bracket<DataType>(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac);
+            DataType *lie_bracket_img2_lie1Ptr = static_cast<DataType*>(lie_bracket_img2_lie1->data);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxelNumber, res, lie_bracket_img2_lie1Ptr)
+#endif
+            for (i = 0; i < voxelNumber; ++i)
+                res[i] += lie_bracket_img2_lie1Ptr[i] / 12.f;
+
+            if (type > 2) {
+                // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12
+                nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false);
+                compute_lie_bracket<DataType>(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac);
+                DataType *lie_bracket_img1_lie1Ptr = static_cast<DataType*>(lie_bracket_img1_lie1->data);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxelNumber, res, lie_bracket_img1_lie1Ptr)
+#endif
+                for (i = 0; i < voxelNumber; ++i)
+                    res[i] -= lie_bracket_img1_lie1Ptr[i] / 12.f;
+                nifti_image_free(lie_bracket_img1_lie1);
+
+                if (type > 3) {
+                    // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24
+                    nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false);
+                    compute_lie_bracket<DataType>(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac);
+                    DataType *lie_bracket_img1_lie2Ptr = static_cast<DataType*>(lie_bracket_img1_lie2->data);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(voxelNumber, res, lie_bracket_img1_lie2Ptr)
+#endif
+                    for (i = 0; i < voxelNumber; ++i)
+                        res[i] -= lie_bracket_img1_lie2Ptr[i] / 24.f;
+                    nifti_image_free(lie_bracket_img1_lie2);
+                }// >3
+            }// >2
+            nifti_image_free(lie_bracket_img2_lie1);
+        }// >1
+        nifti_image_free(lie_bracket_img2_img1);
+    }// >0
+
+    // update the deformation field
+    memcpy(img1->data, res, img1->nvox * img1->nbyper);
+    free(res);
 }
 /* *************************************************************** */
 void compute_BCH_update(nifti_image *img1, // current field
                         nifti_image *img2, // gradient
-                        int type)
-{
-   if(img1->datatype!=img2->datatype)
-   {
-      reg_print_fct_error("compute_BCH_update");
-      reg_print_msg_error("Both input images are expected to be of similar type");
-      reg_exit();
-   }
-   switch(img1->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      compute_BCH_update1<float>(img1, img2, type);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      compute_BCH_update1<double>(img1, img2, type);
-      break;
-   default:
-      reg_print_fct_error("compute_BCH_update");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
-   }
+                        int type) {
+    if (img1->datatype != img2->datatype) {
+        reg_print_fct_error("compute_BCH_update");
+        reg_print_msg_error("Both input images are expected to be of similar type");
+        reg_exit();
+    }
+    switch (img1->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        compute_BCH_update<float>(img1, img2, type);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        compute_BCH_update<double>(img1, img2, type);
+        break;
+    default:
+        reg_print_fct_error("compute_BCH_update");
+        reg_print_msg_error("Only implemented for single or double precision images");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
-void extractLine(int start, int end, int increment,const DataType *image, DataType *values)
-{
-   size_t index = 0;
-   for(int i=start; i<end; i+=increment) values[index++] = image[i];
+void extractLine(int start, int end, int increment, const DataType *image, DataType *values) {
+    size_t index = 0;
+    for (int i = start; i < end; i += increment) values[index++] = image[i];
 }
 /* *************************************************************** */
 template <class DataType>
-void restoreLine(int start, int end, int increment, DataType *image, const DataType *values)
-{
-   size_t index = 0;
-   for(int i=start; i<end; i+=increment) image[i] = values[index++];
+void restoreLine(int start, int end, int increment, DataType *image, const DataType *values) {
+    size_t index = 0;
+    for (int i = start; i < end; i += increment) image[i] = values[index++];
 }
 /* *************************************************************** */
 template <class DataType>
-void intensitiesToSplineCoefficients(DataType *values, int number)
-{
-   // Border are set to zero
-   DataType pole = sqrt(3.0) - 2.0;
-   DataType currentPole = pole;
-   DataType currentOpposite = pow(pole,(DataType)(2.0*(DataType)number-1.0));
-   DataType sum=0;
-   for(int i=1; i<number; i++)
-   {
-      sum += (currentPole - currentOpposite) * values[i];
-      currentPole *= pole;
-      currentOpposite /= pole;
-   }
-   values[0] = (DataType)((values[0] - pole*pole*(values[0] + sum)) / (1.0 - pow(pole,(DataType)(2.0*(double)number+2.0))));
-
-   //other values forward
-   for(int i=1; i<number; i++)
-   {
-      values[i] += pole * values[i-1];
-   }
-
-   DataType ipp=(DataType)(1.0-pole);
-   ipp*=ipp;
-
-   //last value
-   values[number-1] = ipp * values[number-1];
-
-   //other values backward
-   for(int i=number-2; 0<=i; i--)
-   {
-      values[i] = pole * values[i+1] + ipp*values[i];
-   }
-   return;
+void intensitiesToSplineCoefficients(DataType *values, int number) {
+    // Border are set to zero
+    DataType pole = sqrt(3.0) - 2.0;
+    DataType currentPole = pole;
+    DataType currentOpposite = pow(pole, (DataType)(2.0 * (DataType)number - 1.0));
+    DataType sum = 0;
+    for (int i = 1; i < number; i++) {
+        sum += (currentPole - currentOpposite) * values[i];
+        currentPole *= pole;
+        currentOpposite /= pole;
+    }
+    values[0] = (DataType)((values[0] - pole * pole * (values[0] + sum)) / (1.0 - pow(pole, (DataType)(2.0 * (double)number + 2.0))));
+
+    //other values forward
+    for (int i = 1; i < number; i++) {
+        values[i] += pole * values[i - 1];
+    }
+
+    DataType ipp = (DataType)(1.0 - pole);
+    ipp *= ipp;
+
+    //last value
+    values[number - 1] = ipp * values[number - 1];
+
+    //other values backward
+    for (int i = number - 2; 0 <= i; i--) {
+        values[i] = pole * values[i + 1] + ipp * values[i];
+    }
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img)
-{
-   double *coeff=(double *)malloc(img->nvox*sizeof(double));
-   DataType *imgPtr=static_cast<DataType *>(img->data);
-   for(size_t i=0; i<img->nvox; ++i)
-      coeff[i]=imgPtr[i];
-   for(int u=0; u<img->nu; ++u)
-   {
-      for(int t=0; t<img->nt; ++t)
-      {
-         double *coeffPtr=&coeff[(u*img->nt+t)*img->nx*img->ny*img->nz];
-
-         // Along the X axis
-         int number = img->nx;
-         double *values=new double[number];
-         int increment = 1;
-         for(int i=0; i<img->ny*img->nz; i++)
-         {
-            int start = i*img->nx;
-            int end = start + img->nx;
-            extractLine<double>(start,end,increment,coeffPtr,values);
-            intensitiesToSplineCoefficients<double>(values, number);
-            restoreLine<double>(start,end,increment,coeffPtr,values);
-         }
-         delete[] values;
-         values=nullptr;
-
-         // Along the Y axis
-         number = img->ny;
-         values=new double[number];
-         increment = img->nx;
-         for(int i=0; i<img->nx*img->nz; i++)
-         {
-            int start = i + i/img->nx * img->nx * (img->ny - 1);
-            int end = start + img->nx*img->ny;
-            extractLine<double>(start,end,increment,coeffPtr,values);
-            intensitiesToSplineCoefficients<double>(values, number);
-            restoreLine<double>(start,end,increment,coeffPtr,values);
-         }
-         delete[] values;
-         values=nullptr;
-
-         // Along the Z axis
-         if(img->nz>1)
-         {
-            number = img->nz;
-            values=new double[number];
-            increment = img->nx*img->ny;
-            for(int i=0; i<img->nx*img->ny; i++)
-            {
-               int start = i;
-               int end = start + img->nx*img->ny*img->nz;
-               extractLine<double>(start,end,increment,coeffPtr,values);
-               intensitiesToSplineCoefficients<double>(values, number);
-               restoreLine<double>(start,end,increment,coeffPtr,values);
+void reg_spline_getDeconvolvedCoefficents(nifti_image *img) {
+    double *coeff = (double*)malloc(img->nvox * sizeof(double));
+    DataType *imgPtr = static_cast<DataType*>(img->data);
+    for (size_t i = 0; i < img->nvox; ++i)
+        coeff[i] = imgPtr[i];
+    for (int u = 0; u < img->nu; ++u) {
+        for (int t = 0; t < img->nt; ++t) {
+            double *coeffPtr = &coeff[(u * img->nt + t) * img->nx * img->ny * img->nz];
+
+            // Along the X axis
+            int number = img->nx;
+            double *values = new double[number];
+            int increment = 1;
+            for (int i = 0; i < img->ny * img->nz; i++) {
+                int start = i * img->nx;
+                int end = start + img->nx;
+                extractLine<double>(start, end, increment, coeffPtr, values);
+                intensitiesToSplineCoefficients<double>(values, number);
+                restoreLine<double>(start, end, increment, coeffPtr, values);
             }
             delete[] values;
-            values=nullptr;
-         }
-      }//t
-   }//u
-
-   for(size_t i=0; i<img->nvox; ++i)
-      imgPtr[i]=coeff[i];
-   free(coeff);
+            values = nullptr;
+
+            // Along the Y axis
+            number = img->ny;
+            values = new double[number];
+            increment = img->nx;
+            for (int i = 0; i < img->nx * img->nz; i++) {
+                int start = i + i / img->nx * img->nx * (img->ny - 1);
+                int end = start + img->nx * img->ny;
+                extractLine<double>(start, end, increment, coeffPtr, values);
+                intensitiesToSplineCoefficients<double>(values, number);
+                restoreLine<double>(start, end, increment, coeffPtr, values);
+            }
+            delete[] values;
+            values = nullptr;
+
+            // Along the Z axis
+            if (img->nz > 1) {
+                number = img->nz;
+                values = new double[number];
+                increment = img->nx * img->ny;
+                for (int i = 0; i < img->nx * img->ny; i++) {
+                    int start = i;
+                    int end = start + img->nx * img->ny * img->nz;
+                    extractLine<double>(start, end, increment, coeffPtr, values);
+                    intensitiesToSplineCoefficients<double>(values, number);
+                    restoreLine<double>(start, end, increment, coeffPtr, values);
+                }
+                delete[] values;
+                values = nullptr;
+            }
+        }//t
+    }//u
+
+    for (size_t i = 0; i < img->nvox; ++i)
+        imgPtr[i] = static_cast<DataType>(coeff[i]);
+    free(coeff);
 }
 /* *************************************************************** */
-void reg_spline_GetDeconvolvedCoefficents(nifti_image *img)
-{
-
-   switch(img->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_spline_GetDeconvolvedCoefficents_core<float>(img);
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_spline_GetDeconvolvedCoefficents_core<double>(img);
-      break;
-   default:
-      reg_print_fct_error("reg_spline_GetDeconvolvedCoefficents");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
-   }
+void reg_spline_getDeconvolvedCoefficents(nifti_image *img) {
+    switch (img->datatype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_spline_getDeconvolvedCoefficents<float>(img);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_spline_getDeconvolvedCoefficents<double>(img);
+        break;
+    default:
+        reg_print_fct_error("reg_spline_getDeconvolvedCoefficents");
+        reg_print_msg_error("Only implemented for single or double precision images");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index bff164f1..fd1ded7f 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -204,5 +204,5 @@ void compute_BCH_update(nifti_image *img1,
  * @param img Image to be deconvolved
  */
 extern "C++"
-void reg_spline_GetDeconvolvedCoefficents(nifti_image *img);
+void reg_spline_getDeconvolvedCoefficents(nifti_image *img);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 88262208..50dad457 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -75,7 +75,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+   const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *coeffPtrY = &coeffPtrX[nodeNumber];
    DataType *coeffPtrZ = &coeffPtrY[nodeNumber];
@@ -276,7 +276,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
+   const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *coeffPtrY = &coeffPtrX[nodeNumber];
 
@@ -313,8 +313,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, coeffPtrX, coeffPtrY, \
    basisX, basisY, reorientation, JacobianMatrices, JacobianDeterminants) \
-   private(x, y, incr0, coeffX, coeffY, \
-   jacobianMatrix, voxelIndex)
+   private(x, incr0, coeffX, coeffY, jacobianMatrix, voxelIndex)
 #endif
       for(y=1; y<splineControlPoint->ny-1; y++)
       {
@@ -559,7 +558,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
    }
    // Create some pointers towards to control point grid image data
-   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+   const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *coeffPtrY = &coeffPtrX[nodeNumber];
    DataType *coeffPtrZ = &coeffPtrY[nodeNumber];
@@ -610,8 +609,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, coeffPtrX, coeffPtrY, coeffPtrZ, \
    basisX, basisY, basisZ, reorientation, JacobianMatrices, JacobianDeterminants) \
-   private(x, y, z, incr0, coeffX, coeffY, coeffZ, \
-   jacobianMatrix, voxelIndex)
+   private(x, y, incr0, coeffX, coeffY, coeffZ, jacobianMatrix, voxelIndex)
 #endif
       for(z=1; z<splineControlPoint->nz-1; z++)
       {
@@ -1007,7 +1005,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
    shared(referenceImage, gridVoxelSpacing, splineControlPoint, \
    coeffPtrX, coeffPtrY, coeffPtrZ,reorientation, JacobianMatrices, \
    JacobianDeterminants) \
-   private(x, y, z, pre, oldPre, basis, val, \
+   private(x, y, pre, oldPre, basis, val, \
    _xBasis, _xFirst, _yBasis, _yFirst, \
    tempX, tempY, tempZ, basisX, basisY, basisZ, \
    xBasis, xFirst, yBasis, yFirst, zBasis, zFirst, \
@@ -1019,10 +1017,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
    shared(referenceImage, gridVoxelSpacing, splineControlPoint, \
    coeffPtrX, coeffPtrY, coeffPtrZ, reorientation, JacobianMatrices, \
    JacobianDeterminants) \
-   private(x, y, z, pre, oldPre, basis, \
+   private(x, y, pre, oldPre, basis, \
    basisX, basisY, basisZ, coord, tempX, tempY, tempZ, \
    xBasis, xFirst, yBasis, yFirst, zBasis, zFirst, \
-   coeffX, coeffY, coeffZ, incr0, incr1, incr2, \
+   coeffX, coeffY, coeffZ, incr0, incr1, \
    jacobianMatrix, voxelIndex)
 #endif // _USE_SEE
 #endif // _USE_OPENMP
@@ -1245,7 +1243,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
       if(splineControlPoint->nz>1)
          detNumber *= (size_t)(splineControlPoint->nz-2);
    }
-   else detNumber = CalcVoxelNumber(*referenceImage);
+   else detNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
    void *JacobianDetermiantArray=malloc(detNumber*splineControlPoint->nbyper);
 
@@ -1356,7 +1354,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    if(approximation)
       arraySize = (size_t)(splineControlPoint->nx-2) *
             (splineControlPoint->ny-2);
-   else arraySize = CalcVoxelNumber(*referenceImage, 2);
+   else arraySize = NiftiImage::calcVoxelNumber(referenceImage, 2);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
    DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType));
@@ -1371,7 +1369,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 
    // The gradient are now computed for every control point
    DataType *gradientImagePtrX = static_cast<DataType *>(gradientImage->data);
-   DataType *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)];
+   DataType *gradientImagePtrY = &gradientImagePtrX[NiftiImage::calcVoxelNumber(gradientImage, 2)];
 
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
@@ -1382,7 +1380,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    // Ratio to be used for normalisation
    size_t jacobianNumber;
    if(approximation)
-      jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2);
+      jacobianNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
    else jacobianNumber = arraySize;
    DataType ratio[2] =
    {
@@ -1415,8 +1413,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, jacobianMatrices, jacobianDeterminant, basisX, basisY, \
    ratio, gradientImagePtrX, gradientImagePtrY, reorientation) \
-   private(x, y, index, jacobianConstraint, pixelX, pixelY, jacIndex, coord, \
-   detJac, jacobianMatrix)
+   private(x, index, jacobianConstraint, pixelX, pixelY, jacIndex, coord, detJac, jacobianMatrix)
 #endif
       for(y=0; y<splineControlPoint->ny; y++)
       {
@@ -1504,7 +1501,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, ratio, \
    jacobianMatrices, gradientImagePtrX, gradientImagePtrY, reorientation) \
-   private(x, y, xPre, yPre, pixelX, pixelY, jacobianConstraint, \
+   private(x, xPre, yPre, pixelX, pixelY, jacobianConstraint, \
    basis, xBasis, yBasis, xFirst, yFirst, jacIndex, index, detJac, \
    jacobianMatrix, basisValues)
 #endif
@@ -1594,7 +1591,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    if(approximation)
       arraySize = (size_t)(splineControlPoint->nx-2) *
             (splineControlPoint->ny-2) * (splineControlPoint->nz-2);
-   else arraySize = CalcVoxelNumber(*referenceImage);
+   else arraySize = NiftiImage::calcVoxelNumber(referenceImage, 3);
    // Allocate arrays to store determinants and matrices
    mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33));
    DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType));
@@ -1608,7 +1605,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                 useHeaderInformation);
 
    // The gradient are now computed for every control point
-   const size_t voxelNumber = CalcVoxelNumber(*gradientImage);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(gradientImage, 3);
    DataType *gradientImagePtrX = static_cast<DataType *>(gradientImage->data);
    DataType *gradientImagePtrY = &gradientImagePtrX[voxelNumber];
    DataType *gradientImagePtrZ = &gradientImagePtrY[voxelNumber];
@@ -1622,7 +1619,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    // Ratio to be used for normalisation
    size_t jacobianNumber;
    if(approximation)
-      jacobianNumber = CalcVoxelNumber(*splineControlPoint);
+      jacobianNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    else jacobianNumber = arraySize;
    DataType ratio[3] =
    {
@@ -1660,7 +1657,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, jacobianMatrices, jacobianDeterminant, basisX, basisY, basisZ, \
    ratio, gradientImagePtrX, gradientImagePtrY, gradientImagePtrZ, reorientation) \
-   private(x, y, z, index, jacobianConstraint, pixelX, pixelY, pixelZ, jacIndex, coord, \
+   private(x, y, index, jacobianConstraint, pixelX, pixelY, pixelZ, jacIndex, coord, \
    detJac, jacobianMatrix)
 #endif
       for(z=0; z<splineControlPoint->nz; z++)
@@ -1768,7 +1765,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, ratio, \
    jacobianMatrices, gradientImagePtrX, gradientImagePtrY, gradientImagePtrZ, reorientation) \
-   private(x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, jacobianConstraint, \
+   private(x, y, xPre, yPre, zPre, pixelX, pixelY, pixelZ, jacobianConstraint, \
    basis, xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, index, detJac, \
    jacobianMatrix, basisValues)
 #endif
@@ -1949,13 +1946,13 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    long jacobianNumber;
    if(approximation)
       jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2);
-   else jacobianNumber = (long)CalcVoxelNumber(*referenceImage, 2);
+   else jacobianNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2);
 #else
    size_t i;
    size_t jacobianNumber;
    if(approximation)
       jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2);
-   else jacobianNumber = CalcVoxelNumber(*referenceImage, 2);
+   else jacobianNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
    DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType));
@@ -1972,7 +1969,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(jacobianNumber, jacobianDeterminant) \
-   private(i,logDet) \
+   private(logDet) \
    reduction(+:penaltyTerm)
 #endif
    for(i=0; i< jacobianNumber; i++)
@@ -1996,7 +1993,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
       reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
-   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+   const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *controlPointPtrY = &controlPointPtrX[nodeNumber];
 
@@ -2014,7 +2011,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, jacobianDeterminant, jacobianMatrices, \
    controlPointPtrX, controlPointPtrY, reorientation) \
-   private(x, y, pixelX, pixelY, foldingCorrection, \
+   private(x, pixelX, pixelY, foldingCorrection, \
    xBasis, yBasis, xFirst, yFirst, jacIndex, detJac, \
    jacobianMatrix, basisValues, norm, correctFolding, id, gradient)
 #endif
@@ -2107,7 +2104,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, \
    jacobianMatrices, controlPointPtrX, controlPointPtrY, reorientation) \
-   private(x, y, xPre, yPre, pixelX, pixelY, foldingCorrection, \
+   private(x, xPre, yPre, pixelX, pixelY, foldingCorrection, \
    basis, xBasis, yBasis, xFirst, yFirst, jacIndex, detJac, \
    jacobianMatrix, basisValues, norm, correctFolding, id, gradient)
 #endif
@@ -2198,13 +2195,13 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    long jacobianNumber;
    if(approximation)
       jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2);
-   else jacobianNumber = (long)CalcVoxelNumber(*referenceImage);
+   else jacobianNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
    size_t i;
    size_t jacobianNumber;
    if(approximation)
       jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2);
-   else jacobianNumber = CalcVoxelNumber(*referenceImage);
+   else jacobianNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
    mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33));
    DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType));
@@ -2221,7 +2218,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(jacobianNumber, jacobianDeterminant) \
-   private(i,logDet) \
+   private(logDet) \
    reduction(+:penaltyTerm)
 #endif
    for(i=0; i< jacobianNumber; i++)
@@ -2245,7 +2242,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
       reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
 
-   const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+   const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *controlPointPtrY = &controlPointPtrX[nodeNumber];
    DataType *controlPointPtrZ = &controlPointPtrY[nodeNumber];
@@ -2264,7 +2261,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, jacobianDeterminant, jacobianMatrices, \
    controlPointPtrX, controlPointPtrY, controlPointPtrZ, reorientation) \
-   private(x, y, z, pixelX, pixelY, pixelZ, foldingCorrection, \
+   private(x, y, pixelX, pixelY, pixelZ, foldingCorrection, \
    xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, detJac, \
    jacobianMatrix, basisValues, norm, correctFolding, id, gradient)
 #endif
@@ -2378,7 +2375,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
    shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, \
    jacobianMatrices, controlPointPtrX, controlPointPtrY, controlPointPtrZ, reorientation) \
-   private(x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, foldingCorrection, \
+   private(x, y, xPre, yPre, zPre, pixelX, pixelY, pixelZ, foldingCorrection, \
    basis, xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, detJac, \
    jacobianMatrix, basisValues, norm, correctFolding, id, gradient)
 #endif
@@ -2683,7 +2680,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
-   const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
 
    DataType *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
@@ -2716,8 +2713,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
 #pragma omp parallel for default(none) \
    shared(deformationField, jacobianDeterminant, jacobianMatrices, reorientation, \
    basis, first, jacDetPtr, deformationPtrX, deformationPtrY, spacing) \
-   private(currentIndex, x, y, a, b, index, \
-   jacobianMatrix, defX, defY, firstX, firstY)
+   private(currentIndex, x, a, b, index, jacobianMatrix, defX, defY, firstX, firstY)
 #endif
    for(y=0; y<deformationField->ny-1; ++y)
    {
@@ -2793,7 +2789,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
                                    nifti_image *jacobianDeterminant,
                                    mat33 *jacobianMatrices)
 {
-   const size_t voxelNumber = CalcVoxelNumber(*deformationField);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
 
    DataType *jacDetPtr=nullptr;
    if(jacobianDeterminant!=nullptr)
@@ -2828,7 +2824,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
 #pragma omp parallel for default(none) \
    shared(deformationField, jacobianDeterminant, jacobianMatrices, reorientation, \
    basis, first, jacDetPtr, deformationPtrX, deformationPtrY, deformationPtrZ, spacing) \
-   private(currentIndex, x, y, z, a, b, c, currentZ, index, \
+   private(currentIndex, x, y, a, b, c, currentZ, index, \
    jacobianMatrix, defX, defY, defZ, firstX, firstY, firstZ)
 #endif
    for(z=0; z<deformationField->nz-1; ++z)
@@ -3028,7 +3024,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
       }
       else reg_exit();
    }
-   const size_t voxelNumber = CalcVoxelNumber(*flowFieldImage);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3);
    for(size_t i=0; i<voxelNumber; ++i)
       jacobianMatrices[i]=affineMatrix;
 
@@ -3078,7 +3074,7 @@ void reg_getDetArrayFromMatArray(nifti_image *jacobianDetImage,
                                  mat33 *jacobianMatrices
                                  )
 {
-   const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(jacobianDetImage, 3);
    DataType *jacDetPtr=static_cast<DataType *>(jacobianDetImage->data);
    if(jacobianDetImage->nz>1){
        for(size_t voxel=0; voxel<voxelNumber; ++voxel)
@@ -3127,7 +3123,7 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
    flowFieldImage->ndim=flowFieldImage->dim[0]=5;
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2;
-   flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
+   flowFieldImage->nvox = NiftiImage::calcVoxelNumber(flowFieldImage, flowFieldImage->ndim);
    flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
@@ -3146,7 +3142,7 @@ int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage,
                                              )
 {
    // create an array of mat33
-   const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(jacobianDetImage, 3);
    mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33));
 
    // Compute the Jacobian matrice array
@@ -3184,7 +3180,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage,
    flowFieldImage->ndim=flowFieldImage->dim[0]=5;
    flowFieldImage->nt=flowFieldImage->dim[4]=1;
    flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2;
-   flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim);
+   flowFieldImage->nvox = NiftiImage::calcVoxelNumber(flowFieldImage, flowFieldImage->ndim);
    flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper);
 
    // The velocity grid image is first converted into a flow field
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 62ff07b4..4abf1081 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -15,7 +15,7 @@
 /* *************************************************************** */
 template<class DataType>
 double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
     int a, b, x, y, index, i;
 
     // Create pointers to the spline coefficients
@@ -37,8 +37,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi
     shared(splineControlPoint, splinePtrX, splinePtrY, \
     basisXX, basisYY, basisXY) \
     private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \
-    x, y, a, b, index, i, \
-    splineCoeffX, splineCoeffY) \
+    x, a, b, index, i, splineCoeffX, splineCoeffY) \
     reduction(+:constraintValue)
 #endif
     for (y = 1; y < splineControlPoint->ny - 1; ++y) {
@@ -72,7 +71,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi
 /* *************************************************************** */
 template<class DataType>
 double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int a, b, c, x, y, z, index, i;
 
     // Create pointers to the spline coefficients
@@ -96,7 +95,7 @@ double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoi
     shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \
     basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
     private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \
-    XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \
+    XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, a, b, c, index, i, \
     splineCoeffX, splineCoeffY, splineCoeffZ) \
     reduction(+:constraintValue)
 #endif
@@ -184,7 +183,7 @@ template<class DataType>
 void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
     int a, b, x, y, X, Y, index, i;
 
     // Create pointers to the spline coefficients
@@ -210,7 +209,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
     shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \
     basisXX, basisYY, basisXY) \
-    private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
+    private(a, b, i, index, x, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
     XX_x, YY_x, XY_x, XX_y, YY_y, XY_y)
 #endif
     for (y = 0; y < splineControlPoint->ny; y++) {
@@ -255,7 +254,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
     shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \
     basisXX, basisYY, basisXY, approxRatio) \
-    private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue)
+    private(index, a, X, Y, x, derivativeValuesPtr, gradientValue)
 #endif
     for (y = 0; y < splineControlPoint->ny; y++) {
         index = y * splineControlPoint->nx;
@@ -291,7 +290,7 @@ template<class DataType>
 void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
                                               nifti_image *gradientImage,
                                               float weight) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int a, b, c, x, y, z, X, Y, Z, index, i;
 
     // Create pointers to the spline coefficients
@@ -320,7 +319,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
     shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \
     basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \
-    private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
+    private(a, b, c, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \
     splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \
     ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z)
 #endif
@@ -402,7 +401,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
 #pragma omp parallel for default(none) \
     shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \
     basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \
-    private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue)
+    private(index, a, X, Y, Z, x, y, derivativeValuesPtr, gradientValue)
 #endif
     for (z = 0; z < splineControlPoint->nz; z++) {
         index = z * splineControlPoint->nx * splineControlPoint->ny;
@@ -494,7 +493,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
 /* *************************************************************** */
 template <class DataType>
 double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
     int a, b, x, y, i, index;
 
     double constraintValue = 0;
@@ -524,7 +523,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
 #pragma omp parallel for default(none) \
     shared(splinePtrX, splinePtrY, splineControlPoint, \
     basisX, basisY, reorientation) \
-    private(x, y, a, b, i, index, matrix, R, \
+    private(x, a, b, i, index, matrix, R, \
     splineCoeffX, splineCoeffY, currentValue) \
     reduction(+:constraintValue)
 #endif
@@ -569,7 +568,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
 /* *************************************************************** */
 template <class DataType>
 double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int a, b, c, x, y, z, i, index;
 
     double constraintValue = 0;
@@ -601,7 +600,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
 #pragma omp parallel for default(none) \
     shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \
     basisX, basisY, basisZ, reorientation) \
-    private(x, y, z, a, b, c, i, index, matrix, R, \
+    private(x, y, a, b, c, i, index, matrix, R, \
     splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \
     reduction(+:constraintValue)
 #endif
@@ -686,7 +685,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
 template <class DataType>
 double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
     DataType basis;
 
@@ -699,7 +698,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
     double currentValue;
 
     // Create pointers to the spline coefficients
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
     DataType splineCoeffX, splineCoeffY;
@@ -769,7 +768,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
 template <class DataType>
 double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
                                       const nifti_image *splineControlPoint) {
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
     DataType basis;
 
@@ -783,7 +782,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
     double currentValue;
 
     // Create pointers to the spline coefficients
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
     const DataType *splinePtrZ = &splinePtrY[nodeNumber];
@@ -899,7 +898,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
     int a, b, x, y, index, xPre, yPre;
     DataType basis;
 
@@ -909,7 +908,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
     };
 
     // Create pointers to the spline coefficients
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
     DataType splineCoeffX, splineCoeffY;
@@ -990,7 +989,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
                                        const nifti_image *splineControlPoint,
                                        nifti_image *gradientImage,
                                        float weight) {
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     int a, b, c, x, y, z, index, xPre, yPre, zPre;
     DataType basis;
 
@@ -1001,7 +1000,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
     };
 
     // Create pointers to the spline coefficients
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
     const DataType *splinePtrZ = &splinePtrY[nodeNumber];
@@ -1146,7 +1145,7 @@ template <class DataType>
 void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
     int x, y, a, b, i, index;
 
     // Create pointers to the spline coefficients
@@ -1182,7 +1181,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
     shared(splineControlPoint, splinePtrX, splinePtrY, \
     basisX, basisY, reorientation, inv_reorientation, \
     gradientXPtr, gradientYPtr, approxRatio) \
-    private(x, y, a, b, i, index, gradValues, \
+    private(x, a, b, i, index, gradValues, \
     splineCoeffX, splineCoeffY, matrix, R)
 #endif
     for (y = 1; y < splineControlPoint->ny - 1; y++) {
@@ -1241,7 +1240,7 @@ template <class DataType>
 void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int x, y, z, a, b, c, i, index;
 
     // Create pointers to the spline coefficients
@@ -1382,7 +1381,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint
 /* *************************************************************** */
 template <class DataType>
 double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
-    const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
     int a, b, x, y, X, Y, index;
     DataType basis[2] = {1, 0};
     DataType first[2] = {-1, 1};
@@ -1445,7 +1444,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
 /* *************************************************************** */
 template <class DataType>
 double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
-    const size_t voxelNumber = CalcVoxelNumber(*deformationField);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     int a, b, c, x, y, z, X, Y, Z, index;
     DataType basis[2] = {1, 0};
     DataType first[2] = {-1, 1};
@@ -1551,7 +1550,7 @@ template <class DataType>
 void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
-    const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
     int a, b, x, y, X, Y, index;
     DataType basis[2] = {1, 0};
     DataType first[2] = {-1, 1};
@@ -1623,7 +1622,7 @@ template <class DataType>
 void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                                          nifti_image *gradientImage,
                                          float weight) {
-    const size_t voxelNumber = CalcVoxelNumber(*deformationField);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     int a, b, c, x, y, z, X, Y, Z, index;
     DataType basis[2] = {1, 0};
     DataType first[2] = {-1, 1};
@@ -1752,7 +1751,7 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                                            float *landmarkReference,
                                            float *landmarkFloating) {
     const int imageDim = controlPointImage->nz > 1 ? 3 : 2;
-    const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     double constraintValue = 0;
     size_t l, index;
     float ref_position[4];
@@ -1872,7 +1871,7 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                                                  float *landmarkFloating,
                                                  float weight) {
     const int imageDim = controlPointImage->nz > 1 ? 3 : 2;
-    const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage);
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     size_t l, index;
     float ref_position[3];
     float def_position[3];
@@ -2015,7 +2014,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
 /* *************************************************************** */
 template <class DataType>
 double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int x, y, z, index;
 
     // Create pointers to the spline coefficients
@@ -2029,7 +2028,7 @@ double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
     double constraintValue = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, x, y, z, centralCP, neigbCP) \
+    private(index, x, y, centralCP, neigbCP) \
     shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \
     reduction(+:constraintValue)
 #endif // _OPENMP
@@ -2116,7 +2115,7 @@ template <class DataType>
 void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
                                                nifti_image *gradientImage,
                                                float weight) {
-    const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint);
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     int x, y, z, index;
 
     // Create pointers to the spline coefficients
@@ -2137,7 +2136,7 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
     DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(index, x, y, z, centralCP, neigbCP, grad_values) \
+    private(index, x, y, centralCP, neigbCP, grad_values) \
     shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \
     gradPtrX, gradPtrY, gradPtrZ)
 #endif // _OPENMP
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 5a44ef0b..157344d5 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -39,7 +39,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in,m, size__m, size__n) \
-   private(sm, sn)
+   private(sn)
 #endif
    for (sm = 0; sm < size__m; sm++)
    {
@@ -54,7 +54,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in,svd,v,w, size__n,size__m) \
-   private(sn2, sn, sm)
+   private(sn2, sm)
 #endif
    for (sn = 0; sn < size__n; sn++) {
       w[sn] = static_cast<T>(svd.singularValues()(sn));
@@ -88,7 +88,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
    }
 
 #ifdef _WIN32
-   long sm, sn, sn2, min_dim, i, j;
+   long sm, sn, min_dim, i, j;
    long size__m = (long)size_m, size__n = (long)size_n;
 #else
    size_t sm, sn, min_dim, i, j;
@@ -100,7 +100,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(in, m, size__m, size__n) \
-   private(sm, sn)
+   private(sn)
 #endif
    for (sm = 0; sm < size__m; sm++)
    {
@@ -116,7 +116,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, S) \
-   private(i, j)
+   private(j)
 #endif
    //Convert to C matrix
    for (i = 0; i < min_dim; i++) {
@@ -134,7 +134,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, V) \
-   private(i, j)
+   private(j)
 #endif
       //Convert to C matrix
       for (i = 0; i < min_dim; i++) {
@@ -146,7 +146,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, size__m, size__n, U) \
-   private(i, j)
+   private(j)
 #endif
       for (i = 0; i < size__m; i++) {
          for (j = 0; j < size__n; j++) {
@@ -158,7 +158,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, min_dim, U) \
-   private(i, j)
+   private(j)
 #endif
       //Convert to C matrix
       for (i = 0; i < min_dim; i++) {
@@ -170,7 +170,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(svd, size__m, size__n, V) \
-   private(i, j)
+   private(j)
 #endif
       for (i = 0; i < size__n; i++) {
          for (j = 0; j < size__m; j++) {
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 59429ebb..29aa32c9 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -66,16 +66,16 @@ void ShiftImage(nifti_image* inputImgPtr,
 /* *************************************************************** */
 template <class DataType>
 void GetMINDImageDescriptor_core(nifti_image* inputImage,
-                                nifti_image* MINDImage,
-                                int *maskPtr,
-                                int descriptorOffset,
-                                int currentTimepoint) {
+                                 nifti_image* MINDImage,
+                                 int *maskPtr,
+                                 int descriptorOffset,
+                                 int currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
-    const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
 #else
     size_t voxelIndex;
-    const size_t voxelNumber = CalcVoxelNumber(*inputImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3);
 #endif
 
     // Create a pointer to the descriptor image
@@ -104,20 +104,20 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 
     //2D version
     int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4;
-    int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset, 0, 0, 0, 0};
-    int RSampling3D_y[6] = {0, 0, -descriptorOffset, descriptorOffset, 0, 0};
-    int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset};
+    int RSampling3D_x[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 };
+    int RSampling3D_y[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 };
+    int RSampling3D_z[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset };
 
     for (int i = 0; i < samplingNbr; i++) {
         ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
-                          RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
+                             RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
         reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
         reg_tools_addImageToImage(meanImage, diff_image, meanImage);
 
         // Store the current descriptor
-        unsigned index = i * diff_image->nvox;
+        const size_t index = i * diff_image->nvox;
         memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox);
     }
     // Compute the mean over the number of sample
@@ -165,10 +165,10 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 }
 /* *************************************************************** */
 void GetMINDImageDescriptor(nifti_image* inputImgPtr,
-                           nifti_image* MINDImgPtr,
-                           int *maskPtr,
-                           int descriptorOffset,
-                           int currentTimepoint) {
+                            nifti_image* MINDImgPtr,
+                            int *maskPtr,
+                            int descriptorOffset,
+                            int currentTimepoint) {
 #ifndef NDEBUG
     reg_print_fct_debug("GetMINDImageDescriptor()");
 #endif
@@ -195,16 +195,16 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr,
 /* *************************************************************** */
 template <class DataType>
 void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
-                                   nifti_image* MINDSSCImage,
-                                   int *maskPtr,
-                                   int descriptorOffset,
-                                   int currentTimepoint) {
+                                    nifti_image* MINDSSCImage,
+                                    int *maskPtr,
+                                    int descriptorOffset,
+                                    int currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
-    const long voxelNumber = (long)CalcVoxelNumber(*inputImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
 #else
     size_t voxelIndex;
-    const size_t voxelNumber = CalcVoxelNumber(*inputImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3);
 #endif
 
     // Create a pointer to the descriptor image
@@ -241,18 +241,18 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 
     nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false);
 
-    int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0};
-    int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset};
-    int RSampling3D_z[6] = {+0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset};
+    int RSampling3D_x[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 };
+    int RSampling3D_y[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset };
+    int RSampling3D_z[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset };
 
-    int tx[12] = {-descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0};
-    int ty[12] = {+0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset};
-    int tz[12] = {+0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0};
+    int tx[12] = { -descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0 };
+    int ty[12] = { +0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset };
+    int tz[12] = { +0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0 };
     int compteurId = 0;
 
     for (int i = 0; i < samplingNbr; i++) {
         ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
-                          RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
+                             RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
         reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
         reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
@@ -260,11 +260,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
         for (int j = 0; j < 2; j++) {
 
             ShiftImage<DataType>(diff_image, diff_imageShifted, mask_diff_image,
-                              tx[compteurId], ty[compteurId], tz[compteurId]);
+                                 tx[compteurId], ty[compteurId], tz[compteurId]);
 
             reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
             // Store the current descriptor
-            unsigned index = compteurId * diff_imageShifted->nvox;
+            const size_t index = compteurId * diff_imageShifted->nvox;
             memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data,
                    diff_imageShifted->nbyper * diff_imageShifted->nvox);
             compteurId++;
@@ -317,10 +317,10 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 }
 /* *************************************************************** */
 void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr,
-                              nifti_image* MINDSSCImgPtr,
-                              int *maskPtr,
-                              int descriptorOffset,
-                              int currentTimepoint) {
+                               nifti_image* MINDSSCImgPtr,
+                               int *maskPtr,
+                               int descriptorOffset,
+                               int currentTimepoint) {
 #ifndef NDEBUG
     reg_print_fct_debug("GetMINDSSCImageDescriptor()");
 #endif
@@ -419,14 +419,14 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
     this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4;
     this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number;
-    this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim);
+    this->referenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->referenceImageDescriptor, this->referenceImageDescriptor->ndim);
     this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper);
     // Initialise the warped floating descriptor
     this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
     this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number;
-    this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor,
-                                                                this->warpedFloatingImageDescriptor->ndim);
+    this->warpedFloatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedFloatingImageDescriptor,
+                                                                            this->warpedFloatingImageDescriptor->ndim);
     this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox *
                                                        this->warpedFloatingImageDescriptor->nbyper);
 
@@ -439,16 +439,16 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
         this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
         this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number;
-        this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor,
-                                                              this->floatingImageDescriptor->ndim);
+        this->floatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->floatingImageDescriptor,
+                                                                          this->floatingImageDescriptor->ndim);
         this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox *
                                                      this->floatingImageDescriptor->nbyper);
         // Initialise the warped floating descriptor
         this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
         this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number;
-        this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor,
-                                                                     this->warpedReferenceImageDescriptor->ndim);
+        this->warpedReferenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedReferenceImageDescriptor,
+                                                                                 this->warpedReferenceImageDescriptor->ndim);
         this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox *
                                                             this->warpedReferenceImageDescriptor->nbyper);
     }
@@ -472,7 +472,7 @@ double reg_mind::GetSimilarityMeasureValue() {
     double MINDValue = 0.;
     for (int t = 0; t < this->referenceImage->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
-            size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+            size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
             int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
             memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
             reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
@@ -480,26 +480,26 @@ double reg_mind::GetSimilarityMeasureValue() {
 
             if (this->mind_type == MIND_TYPE) {
                 GetMINDImageDescriptor(this->referenceImage,
-                                      this->referenceImageDescriptor,
-                                      combinedMask,
-                                      this->descriptorOffset,
-                                      t);
+                                       this->referenceImageDescriptor,
+                                       combinedMask,
+                                       this->descriptorOffset,
+                                       t);
                 GetMINDImageDescriptor(this->warpedImage,
-                                      this->warpedFloatingImageDescriptor,
-                                      combinedMask,
-                                      this->descriptorOffset,
-                                      t);
+                                       this->warpedFloatingImageDescriptor,
+                                       combinedMask,
+                                       this->descriptorOffset,
+                                       t);
             } else if (this->mind_type == MINDSSC_TYPE) {
                 GetMINDSSCImageDescriptor(this->referenceImage,
-                                         this->referenceImageDescriptor,
-                                         combinedMask,
-                                         this->descriptorOffset,
-                                         t);
+                                          this->referenceImageDescriptor,
+                                          combinedMask,
+                                          this->descriptorOffset,
+                                          t);
                 GetMINDSSCImageDescriptor(this->warpedImage,
-                                         this->warpedFloatingImageDescriptor,
-                                         combinedMask,
-                                         this->descriptorOffset,
-                                         t);
+                                          this->warpedFloatingImageDescriptor,
+                                          combinedMask,
+                                          this->descriptorOffset,
+                                          t);
             }
 
             switch (this->referenceImageDescriptor->datatype) {
@@ -530,7 +530,7 @@ double reg_mind::GetSimilarityMeasureValue() {
 
             // Backward computation
             if (this->isSymmetric) {
-                voxelNumber = CalcVoxelNumber(*floatingImage);
+                voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
                 combinedMask = (int*)malloc(voxelNumber * sizeof(int));
                 memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
                 reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
@@ -538,26 +538,26 @@ double reg_mind::GetSimilarityMeasureValue() {
 
                 if (this->mind_type == MIND_TYPE) {
                     GetMINDImageDescriptor(this->floatingImage,
-                                          this->floatingImageDescriptor,
-                                          combinedMask,
-                                          this->descriptorOffset,
-                                          t);
+                                           this->floatingImageDescriptor,
+                                           combinedMask,
+                                           this->descriptorOffset,
+                                           t);
                     GetMINDImageDescriptor(this->warpedImageBw,
-                                          this->warpedReferenceImageDescriptor,
-                                          combinedMask,
-                                          this->descriptorOffset,
-                                          t);
+                                           this->warpedReferenceImageDescriptor,
+                                           combinedMask,
+                                           this->descriptorOffset,
+                                           t);
                 } else if (this->mind_type == MINDSSC_TYPE) {
                     GetMINDSSCImageDescriptor(this->floatingImage,
-                                             this->floatingImageDescriptor,
-                                             combinedMask,
-                                             this->descriptorOffset,
-                                             t);
+                                              this->floatingImageDescriptor,
+                                              combinedMask,
+                                              this->descriptorOffset,
+                                              t);
                     GetMINDSSCImageDescriptor(this->warpedImageBw,
-                                             this->warpedReferenceImageDescriptor,
-                                             combinedMask,
-                                             this->descriptorOffset,
-                                             t);
+                                              this->warpedReferenceImageDescriptor,
+                                              combinedMask,
+                                              this->descriptorOffset,
+                                              t);
                 }
 
                 switch (this->floatingImageDescriptor->datatype) {
@@ -598,7 +598,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
         return;
 
     // Create a combined mask to ignore masked and undefined values
-    size_t voxelNumber = CalcVoxelNumber(*this->referenceImage);
+    size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3);
     int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
     memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
     reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
@@ -607,29 +607,29 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     if (this->mind_type == MIND_TYPE) {
         // Compute the reference image descriptors
         GetMINDImageDescriptor(this->referenceImage,
-                              this->referenceImageDescriptor,
-                              combinedMask,
-                              this->descriptorOffset,
-                              currentTimepoint);
+                               this->referenceImageDescriptor,
+                               combinedMask,
+                               this->descriptorOffset,
+                               currentTimepoint);
         // Compute the warped floating image descriptors
         GetMINDImageDescriptor(this->warpedImage,
-                              this->warpedFloatingImageDescriptor,
-                              combinedMask,
-                              this->descriptorOffset,
-                              currentTimepoint);
+                               this->warpedFloatingImageDescriptor,
+                               combinedMask,
+                               this->descriptorOffset,
+                               currentTimepoint);
     } else if (this->mind_type == MINDSSC_TYPE) {
         // Compute the reference image descriptors
         GetMINDSSCImageDescriptor(this->referenceImage,
-                                 this->referenceImageDescriptor,
-                                 combinedMask,
-                                 this->descriptorOffset,
-                                 currentTimepoint);
+                                  this->referenceImageDescriptor,
+                                  combinedMask,
+                                  this->descriptorOffset,
+                                  currentTimepoint);
         // Compute the warped floating image descriptors
         GetMINDSSCImageDescriptor(this->warpedImage,
-                                 this->warpedFloatingImageDescriptor,
-                                 combinedMask,
-                                 this->descriptorOffset,
-                                 currentTimepoint);
+                                  this->warpedFloatingImageDescriptor,
+                                  combinedMask,
+                                  this->descriptorOffset,
+                                  currentTimepoint);
     }
 
 
@@ -675,7 +675,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
 
     // Compute the gradient of the ssd for the backward transformation
     if (this->isSymmetric) {
-        voxelNumber = CalcVoxelNumber(*floatingImage);
+        voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
         combinedMask = (int*)malloc(voxelNumber * sizeof(int));
         memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
         reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
@@ -683,26 +683,26 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
 
         if (this->mind_type == MIND_TYPE) {
             GetMINDImageDescriptor(this->floatingImage,
-                                  this->floatingImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  currentTimepoint);
+                                   this->floatingImageDescriptor,
+                                   combinedMask,
+                                   this->descriptorOffset,
+                                   currentTimepoint);
             GetMINDImageDescriptor(this->warpedImageBw,
-                                  this->warpedReferenceImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  currentTimepoint);
+                                   this->warpedReferenceImageDescriptor,
+                                   combinedMask,
+                                   this->descriptorOffset,
+                                   currentTimepoint);
         } else if (this->mind_type == MINDSSC_TYPE) {
             GetMINDSSCImageDescriptor(this->floatingImage,
-                                     this->floatingImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     currentTimepoint);
+                                      this->floatingImageDescriptor,
+                                      combinedMask,
+                                      this->descriptorOffset,
+                                      currentTimepoint);
             GetMINDSSCImageDescriptor(this->warpedImageBw,
-                                     this->warpedReferenceImageDescriptor,
-                                     combinedMask,
-                                     this->descriptorOffset,
-                                     currentTimepoint);
+                                      this->warpedReferenceImageDescriptor,
+                                      combinedMask,
+                                      this->descriptorOffset,
+                                      currentTimepoint);
         }
 
         for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index eb75940c..87a1ed0f 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -59,7 +59,7 @@ reg_mrf::reg_mrf(reg_measure *_measure,
    this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
    this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = CalcVoxelNumber(*this->controlPointImage);
+   this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
 
    this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
    this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
@@ -170,7 +170,7 @@ void reg_mrf::Initialise()
    for(int i =0;i<edge_number;i++) {
       index_neighbours[i]=-1;
    }
-   const size_t num_vertices = CalcVoxelNumber(*this->controlPointImage);
+   const size_t num_vertices = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
    const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4;
 
    this->GetGraph(edgeWeightMatrix, index_neighbours);
@@ -358,7 +358,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
       image_mm2vox = &refImage->sto_ijk;
    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
 
-   const size_t node_number = CalcVoxelNumber(*controlPointGridImage);
+   const size_t node_number = NiftiImage::calcVoxelNumber(controlPointGridImage, 3);
 
    // Compute the block size
    int blockSize[3]={
@@ -633,7 +633,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
 void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
                           int *index_neighbours, int num_vertices, int num_neighbours,bool norm)
 {
-   //size_t num_vertices = CalcVoxelNumber(*controlPointGridImage);
+   //size_t num_vertices = NiftiImage::calcVoxelNumber(controlPointGridImage, 3);
 
    //DEBUG
    //int blockSize[3]={
@@ -641,7 +641,7 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
    //    (int)reg_ceil(controlPointImage->dy / referenceImage->dy),
    //    (int)reg_ceil(controlPointImage->dz / referenceImage->dz),
    //};
-   //size_t sz=CalcVoxelNumber(*referenceImage);
+   //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3);
    //int m=referenceImage->nx;
    //int n=referenceImage->ny;
    //int o=referenceImage->nz;
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index ddb2740e..ac3a3a4b 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -12,11 +12,10 @@
 
 #include "_reg_ssd.h"
 
- //#define USE_LOG_SSD
- //#define MRF_USE_SAD
+// #define USE_LOG_SSD
+// #define MRF_USE_SAD
 
- /* *************************************************************** */
- /* *************************************************************** */
+/* *************************************************************** */
 reg_ssd::reg_ssd(): reg_measure() {
     memset(this->normaliseTimePoint, 0, 255 * sizeof(bool));
 #ifndef NDEBUG
@@ -24,7 +23,6 @@ reg_ssd::reg_ssd(): reg_measure() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_ssd::InitialiseMeasure(nifti_image *refImg,
                                 nifti_image *floImg,
                                 int *refMask,
@@ -95,12 +93,10 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
     this->normaliseTimePoint[timepoint] = normalise;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
 double reg_getSSDValue(nifti_image *referenceImage,
                        nifti_image *warpedImage,
@@ -111,10 +107,10 @@ double reg_getSSDValue(nifti_image *referenceImage,
                        nifti_image *localWeightSimImage) {
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
     // Create pointers to the reference and warped image data
     DataType *referencePtr = static_cast<DataType*>(referenceImage->data);
@@ -143,7 +139,7 @@ double reg_getSSDValue(nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
     jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
-    private(voxel, refValue, warValue, diff) \
+    private(refValue, warValue, diff) \
     reduction(+:SSD_local) \
     reduction(+:n)
 #endif
@@ -176,7 +172,7 @@ double reg_getSSDValue(nifti_image *referenceImage,
             }
 
             SSD_local *= timePointWeight[time];
-            currentValue[time] = -SSD_local;
+            currentValue[time] = static_cast<float>(-SSD_local);
             SSD_global -= SSD_local / n;
         }
     }
@@ -254,7 +250,6 @@ double reg_ssd::GetSimilarityMeasureValue() {
     return SSDValue;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                                   nifti_image *warpedImage,
@@ -273,10 +268,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     // Create pointers to the reference and warped images
 #ifdef _WIN32
     long voxel;
-    const long voxelNumber = (long)CalcVoxelNumber(*referenceImage);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
     size_t voxel;
-    const size_t voxelNumber = CalcVoxelNumber(*referenceImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
     // Pointers to the image data
     DataType *refImagePtr = static_cast<DataType *>(referenceImage->data);
@@ -325,7 +320,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
     measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \
     localWeightPtr, adjusted_weight) \
-    private(voxel, refValue, warValue, common)
+    private(refValue, warValue, common)
 #endif
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
@@ -358,7 +353,6 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
         }
     }
 }
-/* *************************************************************** */
 template void reg_getVoxelBasedSSDGradient<float>
 (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
 template void reg_getVoxelBasedSSDGradient<double>
@@ -450,7 +444,6 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                    float *discretisedValue,
@@ -489,7 +482,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float));
 
     // Pointers to the input image
-    const size_t voxelNumber = CalcVoxelNumber(*refImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
     DataType *refImgPtr = static_cast<DataType*>(refImage->data);
     DataType *warImgPtr = static_cast<DataType*>(warImage->data);
 
@@ -640,7 +633,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     free(paddedWarImgPtr);
     free(refBlockValue);
     // Deal with the labels that contains NaN values
-    for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) {
+    for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) {
         int definedValueNumber = 0;
         float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
         float meanValue = 0;
@@ -694,7 +687,6 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     } // node
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
 void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
@@ -703,7 +695,6 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                      nifti_image *refImage,
                                      nifti_image *warImage,
                                      int *mask) {
-
     int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex;
     size_t voxIndex, voxIndex_t;
     const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
@@ -733,7 +724,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     int currentControlPoint = 0;
 
     // Pointers to the input image
-    const size_t voxelNumber = CalcVoxelNumber(*refImage);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
     DataType *refImgPtr = static_cast<DataType*>(refImage->data);
     DataType *warImgPtr = static_cast<DataType*>(warImage->data);
 
@@ -758,7 +749,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \
     padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \
     discretise_step, discretisedValue) \
-    private(cpx, cpy, cpz, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \
+    private(cpx, cpy, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \
     voxIndex, idBlock, blockIndex, definedValueNumber, tid, \
     timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue)
 #endif
@@ -766,38 +757,38 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 #ifdef _OPENMP
         tid = omp_get_thread_num();
 #endif
-        gridVox[2] = cpz;
+        gridVox[2] = static_cast<float>(cpz);
         for (cpy = 0; cpy < controlPointGridImage->ny; ++cpy) {
-            gridVox[1] = cpy;
+            gridVox[1] = static_cast<float>(cpy);
             for (cpx = 0; cpx < controlPointGridImage->nx; ++cpx) {
-                gridVox[0] = cpx;
+                gridVox[0] = static_cast<float>(cpx);
                 currentControlPoint = controlPointGridImage->ny * controlPointGridImage->nx * cpz +
                     controlPointGridImage->nx * cpy + cpx;
 
                 // Compute the corresponding image voxel position
                 reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                imageVox[0] = reg_round(imageVox[0]);
-                imageVox[1] = reg_round(imageVox[1]);
-                imageVox[2] = reg_round(imageVox[2]);
+                imageVox[0] = static_cast<float>(reg_round(imageVox[0]));
+                imageVox[1] = static_cast<float>(reg_round(imageVox[1]));
+                imageVox[2] = static_cast<float>(reg_round(imageVox[2]));
 
                 //INIT
                 for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) {
-                    refBlockValue[tid][idBlock] = padding_value;
+                    refBlockValue[tid][idBlock] = static_cast<float>(padding_value);
                 }
 
                 // Extract the block in the reference image
                 blockIndex = 0;
                 definedValueNumber = 0;
-                for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) {
-                    for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) {
-                        for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) {
+                for (z = int(imageVox[2] - blockSize[2] / 2); z < imageVox[2] + blockSize[2] / 2; ++z) {
+                    for (y = int(imageVox[1] - blockSize[1] / 2); y < imageVox[1] + blockSize[1] / 2; ++y) {
+                        for (x = int(imageVox[0] - blockSize[0] / 2); x < imageVox[0] + blockSize[0] / 2; ++x) {
                             if (x > -1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z < refImage->nz) {
                                 voxIndex = refImage->ny * refImage->nx * z + refImage->nx * y + x;
                                 if (mask[voxIndex] > -1) {
                                     for (timeV = 0; timeV < refImage->nt; ++timeV) {
                                         voxIndex_t = timeV * voxelNumber + voxIndex;
                                         blockIndex_t = timeV * voxelBlockNumber + blockIndex;
-                                        refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t];
+                                        refBlockValue[tid][blockIndex_t] = static_cast<float>(refImgPtr[voxIndex_t]);
                                         if (refBlockValue[tid][blockIndex_t] == refBlockValue[tid][blockIndex_t]) {
                                             ++definedValueNumber;
                                         } else refBlockValue[tid][blockIndex_t] = 0;
@@ -812,9 +803,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                 if (definedValueNumber > 0) {
 
                     discretisedIndex = 0;
-                    for (c = imageVox[2] - discretise_radius; c <= imageVox[2] + discretise_radius; c += discretise_step) {
-                        for (b = imageVox[1] - discretise_radius; b <= imageVox[1] + discretise_radius; b += discretise_step) {
-                            for (a = imageVox[0] - discretise_radius; a <= imageVox[0] + discretise_radius; a += discretise_step) {
+                    for (c = int(imageVox[2] - discretise_radius); c <= imageVox[2] + discretise_radius; c += discretise_step) {
+                        for (b = int(imageVox[1] - discretise_radius); b <= imageVox[1] + discretise_radius; b += discretise_step) {
+                            for (a = int(imageVox[0] - discretise_radius); a <= imageVox[0] + discretise_radius; a += discretise_step) {
 
                                 blockIndex = 0;
                                 currentSum = 0.;
@@ -833,7 +824,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 #ifdef MRF_USE_SAD
                                                         currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]);
 #else
-                                                        currentValue = reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = static_cast<float>(reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]));
 #endif
                                                     } else {
 #ifdef MRF_USE_SAD
@@ -867,7 +858,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                         } // x
                                     } // y
                                 } // z
-                                discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = currentSum;
+                                discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = static_cast<float>(currentSum);
                                 ++discretisedIndex;
                             } // a
                         } // b
@@ -882,7 +873,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     free(refBlockValue);
 
     // Deal with the labels that contains NaN values
-    for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) {
+    for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) {
         int definedValueNumber = 0;
         float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
         float meanValue = 0;
@@ -916,7 +907,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                         // Check if the value is defined
                                         if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
                                             // compute the distance between label and label2
-                                            current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z);
+                                            current_distance = static_cast<float>(reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z));
                                             if (current_distance < min_distance) {
                                                 min_distance = current_distance;
                                                 discretisedValuePtr[label] = discretisedValuePtr[label2];
@@ -936,20 +927,6 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
     } // node
 }
 /* *************************************************************** */
-//template <class DataType>
-//void GetDiscretisedValueSSD_core2D(nifti_image *controlPointGridImage,
-//                                   float *discretisedValue,
-//                                   int discretise_radius,
-//                                   int discretise_step,
-//                                   nifti_image *refImage,
-//                                   nifti_image *warImage,
-//                                   int *mask)
-//{
-//    reg_print_fct_warn("GetDiscretisedValue_core2D");
-//    reg_print_msg_warn("No yet implemented");
-//    reg_exit();
-//}
-/* *************************************************************** */
 void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
                                   float *discretisedValue,
                                   int discretise_radius,
@@ -983,30 +960,6 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
         reg_print_fct_error("reg_ssd::GetDiscretisedValue");
         reg_print_msg_error("Not implemented in 2D yet");
         reg_exit();
-        // switch (this->referenceImage->datatype) {
-        // case NIFTI_TYPE_FLOAT32:
-        //     GetDiscretisedValueSSD_core2D<float>(controlPointGridImage,
-        //                                          discretisedValue,
-        //                                          discretise_radius,
-        //                                          discretise_step,
-        //                                          this->referenceImage,
-        //                                          this->warpedImage,
-        //                                          this->referenceMask);
-        //     break;
-        // case NIFTI_TYPE_FLOAT64:
-        //     GetDiscretisedValueSSD_core2D<double>(controlPointGridImage,
-        //                                           discretisedValue,
-        //                                           discretise_radius,
-        //                                           discretise_step,
-        //                                           this->referenceImage,
-        //                                           this->warpedImage,
-        //                                           this->referenceMask);
-        //     break;
-        // default:
-        //     reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-        //     reg_print_msg_error("Unsupported datatype");
-        //     reg_exit();
-        // }
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp
index a6c28188..b43b857c 100644
--- a/reg-lib/cpu/_reg_thinPlateSpline.cpp
+++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp
@@ -214,7 +214,7 @@ void reg_tps<T>::FillDeformationField(nifti_image *deformationField)
    if(this->initialised==false)
       this->InitialiseTPS();
 
-   const size_t voxelNumber = CalcVoxelNumber(*deformationField);
+   const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
    T *defX=static_cast<T *>(deformationField->data);
    T *defY=&defX[voxelNumber];
    T *defZ=nullptr;
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 9b4dc6f9..aea666bb 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -95,7 +95,7 @@ void reg_intensityRescale_core(nifti_image *image,
                                float newMin,
                                float newMax) {
     DataType *imagePtr = static_cast<DataType*>(image->data);
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
 
     // The rescaling is done for each volume independently
     DataType *volumePtr = &imagePtr[timePoint * voxelNumber];
@@ -343,7 +343,7 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
                                    const bool& optimiseX,
                                    const bool& optimiseY,
                                    const bool& optimiseZ) {
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const DataType *dataPtrX = static_cast<DataType*>(image->data);
     const DataType *dataPtrY = &dataPtrX[voxelNumber];
     const DataType *dataPtrZ = &dataPtrY[voxelNumber];
@@ -506,7 +506,6 @@ void reg_tools_operationImageToImage(const nifti_image *img1,
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   private(i) \
    shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation)
 #endif
     for (i = 0; i < voxelNumber; i++)
@@ -721,7 +720,6 @@ void reg_tools_operationValueToImage(const nifti_image *img,
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   private(i) \
    shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation)
 #endif
     for (i = 0; i < voxelNumber; i++)
@@ -913,23 +911,23 @@ void reg_tools_divideValueToImage(const nifti_image *img,
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_tools_kernelConvolution_core(nifti_image *image,
-                                      float *sigma,
-                                      int kernelType,
-                                      int *mask,
-                                      bool *timePoint,
-                                      bool *axis) {
+void reg_tools_kernelConvolution(nifti_image *image,
+                                 const float *sigma,
+                                 const int& kernelType,
+                                 const int *mask,
+                                 const bool *timePoint,
+                                 const bool *axis) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) {
-        reg_print_fct_error("reg_tools_kernelConvolution_core");
+        reg_print_fct_error("reg_tools_kernelConvolution");
         reg_print_msg_error("This function does not support images with dimension > 2048");
         reg_exit();
     }
 #ifdef WIN32
     long index;
-    const long voxelNumber = (long)CalcVoxelNumber(*image);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3);
 #else
     size_t index;
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
 #endif
     DataType *imagePtr = static_cast<DataType*>(image->data);
     int imageDim[3] = { image->nx, image->ny, image->nz };
@@ -943,15 +941,14 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
             DataType *intensityPtr = &imagePtr[t * voxelNumber];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \
-   private(index)
+   shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber)
 #endif
             for (index = 0; index < voxelNumber; index++) {
-                densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1.f : 0;
-                densityPtr[index] *= (mask[index] >= 0) ? 1 : 0;
+                densityPtr[index] = intensityPtr[index] == intensityPtr[index] ? 1.f : 0;
+                densityPtr[index] *= mask[index] >= 0 ? 1 : 0;
                 nanImagePtr[index] = static_cast<bool>(densityPtr[index]);
                 if (nanImagePtr[index] == 0)
-                    intensityPtr[index] = static_cast<DataType>(0);
+                    intensityPtr[index] = 0;
             }
             // Loop over the x, y and z dimensions
             for (int n = 0; n < 3; n++) {
@@ -971,7 +968,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
                         // Spline kernel
                         radius = static_cast<int>(temp * 2.0f);
                     } else {
-                        reg_print_fct_error("reg_tools_kernelConvolution_core");
+                        reg_print_fct_error("reg_tools_kernelConvolution");
                         reg_print_msg_error("Unknown kernel type");
                         reg_exit();
                     }
@@ -1060,7 +1057,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
    planeNumber,kernelSum) \
    private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \
    bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \
-   k, bufferIntensitycur,bufferDensitycur, planeIndex, \
+   k, bufferIntensitycur,bufferDensitycur, \
    kernel_sse, intensity_sse, density_sse, intensity_sum_sse, density_sum_sse)
 #else
 #pragma omp parallel for default(none) \
@@ -1068,7 +1065,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
    planeNumber,kernelSum) \
    private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \
    bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \
-   k, bufferIntensitycur,bufferDensitycur, planeIndex)
+   k, bufferIntensitycur,bufferDensitycur)
 #endif
 #endif // _OPENMP
                         // Loop over the different voxel
@@ -1196,8 +1193,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image,
             // Normalise per timepoint
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) \
-   private(index)
+   shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr)
 #endif
             for (index = 0; index < voxelNumber; ++index) {
                 if (nanImagePtr[index] != 0)
@@ -1224,10 +1220,10 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
     }
 #ifdef WIN32
     long index;
-    const long voxelNumber = (long)CalcVoxelNumber(*image);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3);
 #else
     size_t index;
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
 #endif
     DataType *imagePtr = static_cast<DataType*>(image->data);
 
@@ -1408,48 +1404,43 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
 }
 /* *************************************************************** */
 void reg_tools_kernelConvolution(nifti_image *image,
-                                 float *sigma,
-                                 int kernelType,
-                                 int *mask,
-                                 bool *timePoint,
-                                 bool *axis) {
+                                 const float *sigma,
+                                 const int& kernelType,
+                                 const int *mask,
+                                 const bool *timePoint,
+                                 const bool *axis) {
+    if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_tools_kernelConvolution");
+        reg_print_msg_error("The image is expected to be of floating precision type");
+        reg_exit();
+    }
+
     if (image->nt <= 0) image->nt = image->dim[4] = 1;
     if (image->nu <= 0) image->nu = image->dim[5] = 1;
 
-    bool *axisToSmooth = new bool[3];
-    const int activeTimePointNumber = image->nt * image->nu;
-    bool *activeTimePoint = new bool[activeTimePointNumber];
+    unique_ptr<bool[]> axisToSmooth{ new bool[3] };
     if (axis == nullptr) {
         // All axis are smoothed by default
         for (int i = 0; i < 3; i++) axisToSmooth[i] = true;
     } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
 
+    const int activeTimePointNumber = image->nt * image->nu;
+    unique_ptr<bool[]> activeTimePoint{ new bool[activeTimePointNumber] };
     if (timePoint == nullptr) {
         // All time points are considered as active
         for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true;
     } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i];
 
-    int *currentMask = nullptr;
-    if (mask == nullptr) {
-        currentMask = (int*)calloc(CalcVoxelNumber(*image), sizeof(int));
-    } else currentMask = mask;
-
-    switch (image->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_tools_kernelConvolution_core<float>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_tools_kernelConvolution_core<double>(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth);
-        break;
-    default:
-        reg_print_fct_error("reg_tools_kernelConvolution");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+    unique_ptr<int[]> currentMask;
+    if (!mask) {
+        currentMask.reset(new int[NiftiImage::calcVoxelNumber(image, 3)]());
+        mask = currentMask.get();
     }
 
-    if (mask == nullptr) free(currentMask);
-    delete[] axisToSmooth;
-    delete[] activeTimePoint;
+    std::visit([&](auto&& imgDataType) {
+        using ImgDataType = std::decay_t<decltype(imgDataType)>;
+        reg_tools_kernelConvolution<ImgDataType>(image, sigma, kernelType, mask, activeTimePoint.get(), axisToSmooth.get());
+    }, NiftiImage::getFloatingDataType(image));
 }
 /* *************************************************************** */
 template <class PrecisionType, class ImageType>
@@ -1530,7 +1521,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
     image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
 
     // Reallocate the image
-    image->nvox = CalcVoxelNumber(*image, 7);
+    image->nvox = NiftiImage::calcVoxelNumber(image, 7);
     image->data = calloc(image->nvox, image->nbyper);
     imagePtr = static_cast<ImageType*>(image->data);
 
@@ -1699,7 +1690,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
 template <class DataType>
 void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
     const DataType *dataPtr = static_cast<DataType*>(image->data);
-    for (size_t i = 0; i < CalcVoxelNumber(*image); i++)
+    for (size_t i = 0; i < NiftiImage::calcVoxelNumber(image, 3); i++)
         array[i] = dataPtr[i] != 0 ? 1 : -1;
 }
 /* *************************************************************** */
@@ -1738,7 +1729,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
 /* *************************************************************** */
 template <class AType, class BType>
 double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) {
-    const size_t voxelNumber = CalcVoxelNumber(*imageA);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(imageA, 3);
     const AType *imageAPtrX = static_cast<AType*>(imageA->data);
     const BType *imageBPtrX = static_cast<BType*>(imageB->data);
     const AType *imageAPtrY = nullptr;
@@ -1977,7 +1968,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
 /* *************************************************************** */
 template <class DataType>
 int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) {
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const DataType *imagePtr = static_cast<DataType*>(image->data);
     for (int t = 0; t < image->nt; ++t) {
         for (size_t i = 0; i < voxelNumber; ++i) {
@@ -2009,7 +2000,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool
 
     const DataType *imgPtr = static_cast<DataType*>(image->data);
     DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::lowest();
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 
     for (int time = 0; time < image->nt; ++time) {
@@ -2161,7 +2152,7 @@ template <class DataType>
 void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) {
     // Allocate the outputArray if it is not allocated yet
     if (*outputArray == nullptr)
-        *outputArray = malloc(CalcVoxelNumber(*image, 7) * sizeof(DataType));
+        *outputArray = malloc(NiftiImage::calcVoxelNumber(image, 7) * sizeof(DataType));
 
     // Parse the cmd to check which axis have to be flipped
     const char *axisName = "x\0y\0z\0t\0u\0v\0w\0";
@@ -2241,7 +2232,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
 template<class DataType>
 void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
     DataType *ptrX = static_cast<DataType*>(field->data);
-    DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
+    DataType *ptrY = &ptrX[NiftiImage::calcVoxelNumber(field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2253,7 +2244,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, ptrX, ptrY) \
-   private(x, y, index, xInit, yInit)
+   private(x, index, xInit, yInit)
 #endif
     for (y = 0; y < field->ny; y++) {
         index = y * field->nx;
@@ -2276,7 +2267,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
 /* *************************************************************** */
 template<class DataType>
 void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
-    const size_t voxelNumber = CalcVoxelNumber(*field);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(field, 3);
     DataType *ptrX = static_cast<DataType*>(field->data);
     DataType *ptrY = &ptrX[voxelNumber];
     DataType *ptrZ = &ptrY[voxelNumber];
@@ -2290,9 +2281,8 @@ void reg_getDisplacementFromDeformation_3D(nifti_image *field) {
     float xInit, yInit, zInit;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(field, matrix, \
-   ptrX, ptrY, ptrZ) \
-   private(x, y, z, index, xInit, yInit, zInit)
+   shared(field, matrix, ptrX, ptrY, ptrZ) \
+   private(x, y, index, xInit, yInit, zInit)
 #endif
     for (z = 0; z < field->nz; z++) {
         index = z * field->nx * field->ny;
@@ -2367,7 +2357,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) {
 template<class DataType>
 void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
     DataType *ptrX = static_cast<DataType*>(field->data);
-    DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)];
+    DataType *ptrY = &ptrX[NiftiImage::calcVoxelNumber(field, 2)];
 
     mat44 matrix;
     if (field->sform_code > 0)
@@ -2378,9 +2368,8 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
     DataType xInit, yInit;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(field, matrix, \
-   ptrX, ptrY) \
-   private(x, y, index, xInit, yInit)
+   shared(field, matrix, ptrX, ptrY) \
+   private(x, index, xInit, yInit)
 #endif
     for (y = 0; y < field->ny; y++) {
         index = y * field->nx;
@@ -2403,7 +2392,7 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) {
 /* *************************************************************** */
 template<class DataType>
 void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
-    const size_t voxelNumber = CalcVoxelNumber(*field);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(field, 3);
     DataType *ptrX = static_cast<DataType*>(field->data);
     DataType *ptrY = &ptrX[voxelNumber];
     DataType *ptrZ = &ptrY[voxelNumber];
@@ -2418,7 +2407,7 @@ void reg_getDeformationFromDisplacement_3D(nifti_image *field) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(field, matrix, ptrX, ptrY, ptrZ) \
-   private(x, y, z, index, xInit, yInit, zInit)
+   private(x, y, index, xInit, yInit, zInit)
 #endif
     for (z = 0; z < field->nz; z++) {
         index = z * field->nx * field->ny;
@@ -2496,7 +2485,7 @@ void reg_setGradientToZero_core(nifti_image *image,
                                 bool xAxis,
                                 bool yAxis,
                                 bool zAxis) {
-    const size_t voxelNumber = CalcVoxelNumber(*image);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     DataType *ptr = static_cast<DataType*>(image->data);
     if (xAxis) {
         for (size_t i = 0; i < voxelNumber; ++i)
@@ -2717,21 +2706,6 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
     z = index;
 }
 /* *************************************************************** */
-size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) {
-    size_t voxelNumber = static_cast<size_t>(std::abs(image.nx)) * static_cast<size_t>(std::abs(image.ny));
-    if (dimCount > 2)
-        voxelNumber *= static_cast<size_t>(std::abs(image.nz));
-    if (dimCount > 3)
-        voxelNumber *= static_cast<size_t>(std::abs(image.nt));
-    if (dimCount > 4)
-        voxelNumber *= static_cast<size_t>(std::abs(image.nu));
-    if (dimCount > 5)
-        voxelNumber *= static_cast<size_t>(std::abs(image.nv));
-    if (dimCount > 6)
-        voxelNumber *= static_cast<size_t>(std::abs(image.nw));
-    return voxelNumber;
-}
-/* *************************************************************** */
 nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) {
     nifti_image *newImage = nifti_copy_nim_info(&image);
     newImage->data = calloc(image.nvox, image.nbyper);
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index f809fb67..4392b8a7 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -93,11 +93,11 @@ void reg_getRealImageSpacing(nifti_image *image,
  */
 extern "C++"
 void reg_tools_kernelConvolution(nifti_image *image,
-                                 float *sigma,
-                                 int kernelType,
-                                 int *mask = nullptr,
-                                 bool *timePoints = nullptr,
-                                 bool *axis = nullptr);
+                                 const float *sigma,
+                                 const int& kernelType,
+                                 const int *mask = nullptr,
+                                 const bool *timePoints = nullptr,
+                                 const bool *axis = nullptr);
 /* *************************************************************** */
 /** @brief Smooth a label image using a Gaussian kernel
  * @param image Image to be smoothed
@@ -452,13 +452,6 @@ void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
 /* *************************************************************** */
-/** @brief Calculates the number of voxels in the image
- * @param image Input image
- * @param dimCount Number of dimensions to consider
- * @return The number of voxels in the image
- */
-size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3);
-/* *************************************************************** */
 /** @brief Duplicates the nifti image
  * @param image Input image
  * @param copyData Boolean to specify if the image data should be copied
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index a8ea0241..fc4deb3f 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -107,10 +107,10 @@ void CudaF3dContent::UpdateWarpedGradient() {
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
-    cudaMemset(transformationGradientCuda, 0, CalcVoxelNumber(*transformationGradient) * sizeof(float4));
+    cudaMemset(transformationGradientCuda, 0, NiftiImage::calcVoxelNumber(transformationGradient, 3) * sizeof(float4));
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroVoxelBasedMeasureGradient() {
-    cudaMemset(voxelBasedMeasureGradientCuda, 0, CalcVoxelNumber(*voxelBasedMeasureGradient) * sizeof(float4));
+    cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 35ec2db1..5b15a1a2 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -10,77 +10,63 @@
  */
 
 #include "_reg_common_cuda.h"
+#include <thrust/host_vector.h>
+#include <thrust/device_ptr.h>
 
 /* *************************************************************** */
 template <class NiftiType>
-int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) {
-    const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType);
-
-    int *g_dim;
-    float* g_pixdim;
-    NiftiType* g_data;
-
-    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int)));
-    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float)));
-    NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize));
-
-    NiftiType *array_h = static_cast<NiftiType*>(img->data);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice));
-
-    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice));
-
+int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti_image *img) {
+    const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
+    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, img, sizeof(nifti_image), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->data, img->data, memSize, cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToNiftiOnDevice1<float>(nifti_image*, nifti_image*);
-template int cudaCommon_transferNiftiToNiftiOnDevice1<double>(nifti_image*, nifti_image*);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
+        const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
             return EXIT_FAILURE;
         }
-        float *niftiImgValues = static_cast<float*>(img->data);
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        const float *niftiImgValues = static_cast<float*>(img->data);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        unique_ptr<float4[]> array(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
-            array_h[i].x = *niftiImgValues++;
+            array[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].y = *niftiImgValues++;
+                array[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].z = *niftiImgValues++;
+                array[i].z = *niftiImgValues++;
         }
         if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].w = *niftiImgValues++;
+                array[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-        free(array_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -89,69 +75,68 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img)
     }
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, const nifti_image*);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType);
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-        NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice));
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const size_t memSize = voxelNumber * sizeof(DataType);
+        const NiftiType *array1 = static_cast<NiftiType*>(img->data);
+        const NiftiType *array2 = &array1[voxelNumber];
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
             return EXIT_FAILURE;
         }
-        float *niftiImgValues = static_cast<float *>(img->data);
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
-        float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4));
+        const float *niftiImgValues = static_cast<float*>(img->data);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        unique_ptr<float4[]> array1(new float4[voxelNumber]());
+        unique_ptr<float4[]> array2(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
-            array_h[i].x = *niftiImgValues++;
+            array1[i].x = *niftiImgValues++;
         for (size_t i = 0; i < voxelNumber; i++)
-            array2_h[i].x = *niftiImgValues++;
+            array2[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].y = *niftiImgValues++;
+                array1[i].y = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].y = *niftiImgValues++;
+                array2[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].z = *niftiImgValues++;
+                array1[i].z = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].z = *niftiImgValues++;
+                array2[i].z = *niftiImgValues++;
         }
         if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].w = *niftiImgValues++;
+                array1[i].w = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].w = *niftiImgValues++;
+                array2[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-        free(array_h);
-        free(array2_h);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array_d, array2_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -160,26 +145,24 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_
     }
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, nifti_image*); // for deformation field
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, const nifti_image*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-
-        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.srcPtr = make_cudaPitchedPtr(img->data,
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray_d;
+        copyParams.dstArray = arrayCuda;
         copyParams.kind = cudaMemcpyHostToDevice;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     }
@@ -187,45 +170,43 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
             return EXIT_FAILURE;
         }
-        float *niftiImgValues = static_cast<float *>(img->data);
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
-
+        const float *niftiImgValues = static_cast<float*>(img->data);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        unique_ptr<float4[]> array(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
-            array_h[i].x = *niftiImgValues++;
+            array[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].y = *niftiImgValues++;
+                array[i].y = *niftiImgValues++;
         }
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].z = *niftiImgValues++;
+                array[i].z = *niftiImgValues++;
         }
         if (img->dim[5] == 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].w = *niftiImgValues++;
+                array[i].w = *niftiImgValues++;
         }
-        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.srcPtr = make_cudaPitchedPtr(array.get(),
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray_d;
+        copyParams.dstArray = arrayCuda;
         copyParams.kind = cudaMemcpyHostToDevice;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        free(array_h);
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(cuArray_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
             reg_print_msg_error("The image data type is not supported");
@@ -234,104 +215,97 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i
     }
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, nifti_image*); // for deformation field
+template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, const nifti_image*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-        NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]];
-
-        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        NiftiType *array1 = static_cast<NiftiType*>(img->data);
+        NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
         copyParams.kind = cudaMemcpyHostToDevice;
         // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+        copyParams.srcPtr = make_cudaPitchedPtr(array1,
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray_d;
+        copyParams.dstArray = array1Cuda;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
         // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
+        copyParams.srcPtr = make_cudaPitchedPtr(array2,
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray2_d;
+        copyParams.dstArray = array2Cuda;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) {
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
             reg_print_msg_error("The specified image is not a single precision deformation field image");
             return EXIT_FAILURE;
         }
-        float *niftiImgValues = static_cast<float*>(img->data);
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4));
-        float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4));
-
+        const float *niftiImgValues = static_cast<float*>(img->data);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        unique_ptr<float4[]> array1(new float4[voxelNumber]());
+        unique_ptr<float4[]> array2(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
-            array_h[i].x = *niftiImgValues++;
+            array1[i].x = *niftiImgValues++;
         for (size_t i = 0; i < voxelNumber; i++)
-            array2_h[i].x = *niftiImgValues++;
-
+            array2[i].x = *niftiImgValues++;
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].y = *niftiImgValues++;
+                array1[i].y = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].y = *niftiImgValues++;
+                array2[i].y = *niftiImgValues++;
         }
-
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].z = *niftiImgValues++;
+                array1[i].z = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].z = *niftiImgValues++;
+                array2[i].z = *niftiImgValues++;
         }
-
         if (img->dim[5] == 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                array_h[i].w = *niftiImgValues++;
+                array1[i].w = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
-                array2_h[i].w = *niftiImgValues++;
+                array2[i].w = *niftiImgValues++;
         }
 
-        cudaMemcpy3DParms copyParams; memset(&copyParams, 0, sizeof(copyParams));
-        copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
         copyParams.kind = cudaMemcpyHostToDevice;
         // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h,
+        copyParams.srcPtr = make_cudaPitchedPtr(array1.get(),
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray_d;
+        copyParams.dstArray = array1Cuda;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        free(array_h);
         // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h,
+        copyParams.srcPtr = make_cudaPitchedPtr(array2.get(),
                                                 copyParams.extent.width * sizeof(DataType),
                                                 copyParams.extent.width,
                                                 copyParams.extent.height);
-        copyParams.dstArray = cuArray2_d;
+        copyParams.dstArray = array2Cuda;
         NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        free(array2_h);
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(cuArray_d, cuArray2_d, img);
+            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
         default:
             reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
             reg_print_msg_error("The image data type is not supported");
@@ -340,91 +314,87 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA
     }
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArray*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, nifti_image*); // for deformation field
+template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArray*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, const nifti_image*);
+template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, const nifti_image*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
+int cudaCommon_allocateArrayToDevice(cudaArray **arrayCuda, const int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
     cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, int*);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, int*);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, int*); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, const int*);
+template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, const int*);
+template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]);
+int cudaCommon_allocateArrayToDevice(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
     cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize));
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, int*);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, int*);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, int*); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, const int*);
+template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, const int*);
+template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) {
-    const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
+int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const int *dim) {
+    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
+    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float**, int*);
-template int cudaCommon_allocateArrayToDevice<double>(double**, int*);
-template int cudaCommon_allocateArrayToDevice<int>(int**, int*);
-template int cudaCommon_allocateArrayToDevice<float4>(float4**, int*); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(float**, const int*);
+template int cudaCommon_allocateArrayToDevice<double>(double**, const int*);
+template int cudaCommon_allocateArrayToDevice<int>(int**, const int*);
+template int cudaCommon_allocateArrayToDevice<float4>(float4**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) {
-    const unsigned memSize = vox * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
+int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const size_t& nVoxels) {
+    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType)));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float**, int);
-template int cudaCommon_allocateArrayToDevice<double>(double**, int);
-template int cudaCommon_allocateArrayToDevice<int>(int**, int);
-template int cudaCommon_allocateArrayToDevice<float4>(float4**, int); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(float**, const size_t&);
+template int cudaCommon_allocateArrayToDevice<double>(double**, const size_t&);
+template int cudaCommon_allocateArrayToDevice<int>(int**, const size_t&);
+template int cudaCommon_allocateArrayToDevice<float4>(float4**, const size_t&); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) {
-    const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize));
-    NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize));
+int cudaCommon_allocateArrayToDevice(DataType **array1Cuda, DataType **array2Cuda, const int *dim) {
+    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
+    NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_allocateArrayToDevice<float>(float**, float**, int*);
-template int cudaCommon_allocateArrayToDevice<double>(double**, double**, int*);
-template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, int*); // for deformation field
+template int cudaCommon_allocateArrayToDevice<float>(float**, float**, const int*);
+template int cudaCommon_allocateArrayToDevice<double>(double**, double**, const int*);
+template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned nElements) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
+int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, const DataType *cuPtr, const size_t& nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(cpuPtr, cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToCpu<float>(float *cpuPtr, float *cuPtr, const unsigned nElements);
-template int cudaCommon_transferFromDeviceToCpu<double>(double *cpuPtr, double *cuPtr, const unsigned nElements);
+template int cudaCommon_transferFromDeviceToCpu<float>(float*, const float*, const size_t&);
+template int cudaCommon_transferFromDeviceToCpu<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) {
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNifti1<float, float>(nifti_image *img, float *array_d);
-template int cudaCommon_transferFromDeviceToNifti1<double, double>(nifti_image *img, double *array_d);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) {
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
@@ -432,34 +402,29 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) {
             reg_print_msg_error("The nifti image is not a 5D volume");
             return EXIT_FAILURE;
         }
-
-        float4 *array_h;
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        thrust::device_ptr<const float4> arrayCudaPtr(reinterpret_cast<const float4*>(arrayCuda));
+        const thrust::host_vector<float4> array(arrayCudaPtr, arrayCudaPtr + voxelNumber);
         float *niftiImgValues = static_cast<float*>(img->data);
-
         for (size_t i = 0; i < voxelNumber; i++)
-            *niftiImgValues++ = array_h[i].x;
+            *niftiImgValues++ = array[i].x;
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
-                *niftiImgValues++ = array_h[i].y;
+                *niftiImgValues++ = array[i].y;
         }
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
-                *niftiImgValues++ = array_h[i].z;
+                *niftiImgValues++ = array[i].z;
         }
         if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
-                *niftiImgValues++ = array_h[i].w;
+                *niftiImgValues++ = array[i].w;
         }
-        NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
-
         return EXIT_SUCCESS;
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array_d);
+            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, arrayCuda);
         default:
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
             reg_print_msg_error("The image data type is not supported");
@@ -467,46 +432,47 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) {
         }
     }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*); // for deformation field
+template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, const float*);
+template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, const double*);
+template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, const float4*); // for deformation field
 /* *************************************************************** */
 template<>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) {
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
     if (img->datatype != NIFTI_TYPE_FLOAT32) {
         reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
         reg_print_msg_error("The image data type is not supported");
         return EXIT_FAILURE;
     }
-
-    cudaMemcpy3DParms copyParams = {0};
-    copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]);
-    copyParams.srcArray = cuArray_d;
-    copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float),
-                                            copyParams.extent.width, copyParams.extent.height);
+    cudaMemcpy3DParms copyParams{};
+    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+    copyParams.srcArray = const_cast<cudaArray*>(arrayCuda);
+    copyParams.dstPtr = make_cudaPitchedPtr(img->data,
+                                            copyParams.extent.width * sizeof(float),
+                                            copyParams.extent.width,
+                                            copyParams.extent.height);
     copyParams.kind = cudaMemcpyDeviceToHost;
     NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d, DataType *array2_d) {
+int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
         reg_print_msg_error("The host and device arrays are of different types");
         return EXIT_FAILURE;
     } else {
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        NiftiType *array_h = static_cast<NiftiType*>(img->data);
-        NiftiType *array2_h = &array_h[voxelNumber];
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        NiftiType *array1 = static_cast<NiftiType*>(img->data);
+        NiftiType *array2 = &array1[voxelNumber];
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, DataType *array2_d) {
+int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
@@ -514,52 +480,47 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, Da
             reg_print_msg_error("The nifti image is not a 5D volume");
             return EXIT_FAILURE;
         }
-        const size_t voxelNumber = CalcVoxelNumber(*img);
-        float4 *array_h = nullptr;
-        float4 *array2_h = nullptr;
-        NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4)));
-        NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber * sizeof(float4)));
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
-        NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost));
-        float *niftiImgValues = static_cast<float *>(img->data);
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        thrust::device_ptr<const float4> array1CudaPtr(reinterpret_cast<const float4*>(array1Cuda));
+        thrust::device_ptr<const float4> array2CudaPtr(reinterpret_cast<const float4*>(array2Cuda));
+        const thrust::host_vector<float4> array1(array1CudaPtr, array1CudaPtr + voxelNumber);
+        const thrust::host_vector<float4> array2(array2CudaPtr, array2CudaPtr + voxelNumber);
+        float *niftiImgValues = static_cast<float*>(img->data);
         for (size_t i = 0; i < voxelNumber; i++) {
-            *niftiImgValues++ = array_h[i].x;
+            *niftiImgValues++ = array1[i].x;
         }
         for (size_t i = 0; i < voxelNumber; i++) {
-            *niftiImgValues++ = array2_h[i].x;
+            *niftiImgValues++ = array2[i].x;
         }
         if (img->dim[5] >= 2) {
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array_h[i].y;
+                *niftiImgValues++ = array1[i].y;
             }
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array2_h[i].y;
+                *niftiImgValues++ = array2[i].y;
             }
         }
         if (img->dim[5] >= 3) {
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array_h[i].z;
+                *niftiImgValues++ = array1[i].z;
             }
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array2_h[i].z;
+                *niftiImgValues++ = array2[i].z;
             }
         }
         if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array_h[i].w;
+                *niftiImgValues++ = array1[i].w;
             }
             for (size_t i = 0; i < voxelNumber; i++) {
-                *niftiImgValues++ = array2_h[i].w;
+                *niftiImgValues++ = array2[i].w;
             }
         }
-        NR_CUDA_SAFE_CALL(cudaFreeHost(array_h));
-        NR_CUDA_SAFE_CALL(cudaFreeHost(array2_h));
-
         return EXIT_SUCCESS;
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array_d, array2_d);
+            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array1Cuda, array2Cuda);
         default:
             reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
             reg_print_msg_error("The image data type is not supported");
@@ -567,18 +528,19 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, Da
         }
     }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, float*, float*);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, double*, double*);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, float4*, float4*); // for deformation field
+template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, const float*, const float*);
+template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, const double*, const double*);
+template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*); // for deformation field
 /* *************************************************************** */
-void cudaCommon_free(cudaArray *cuArray_d) {
-    NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d));
+void cudaCommon_free(cudaArray *arrayCuda) {
+    if (arrayCuda != nullptr)
+        NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda));
 }
 /* *************************************************************** */
 template <class DataType>
-void cudaCommon_free(DataType *array_d) {
-    if (array_d != nullptr)
-        NR_CUDA_SAFE_CALL(cudaFree(array_d));
+void cudaCommon_free(DataType *arrayCuda) {
+    if (arrayCuda != nullptr)
+        NR_CUDA_SAFE_CALL(cudaFree(arrayCuda));
 }
 template void cudaCommon_free<int>(int*);
 template void cudaCommon_free<float>(float*);
@@ -586,42 +548,40 @@ template void cudaCommon_free<double>(double*);
 template void cudaCommon_free<float4>(float4*);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple(DataType *array_d, nifti_image *img) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
+int cudaCommon_transferFromDeviceToNiftiSimple(DataType *arrayCuda, const nifti_image *img) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, nifti_image*);
-template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, nifti_image*);
-template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, nifti_image*);
+template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, const nifti_image*);
+template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, const nifti_image*);
+template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, const nifti_image*);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned nvox) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
+int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, int*, const unsigned);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, float*, const unsigned);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, double*, const unsigned);
+template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, const int*, const size_t&);
+template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, const float*, const size_t&);
+template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned nElements) {
-    const unsigned memSize = nElements * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice));
+int cudaCommon_transferArrayFromCpuToDevice(DataType *arrayCuda, const DataType *arrayCpu, const size_t& nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, arrayCpu, nElements * sizeof(DataType), cudaMemcpyHostToDevice));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, int*, const unsigned);
-template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, float*, const unsigned);
-template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, double*, const unsigned);
+template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, const int*, const size_t&);
+template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, const float*, const size_t&);
+template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned nElements) {
-    const unsigned memSize = nElements * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost));
+int cudaCommon_transferArrayFromDeviceToCpu(DataType *arrayCpu, const DataType *arrayCuda, const size_t& nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCpu, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
     return EXIT_SUCCESS;
 }
-template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, int*, const unsigned);
-template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, float*, const unsigned);
-template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, double*, const unsigned);
+template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, const int*, const size_t&);
+template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, const float*, const size_t&);
+template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
     NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj));
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index 15886661..c74f8718 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -62,47 +62,47 @@ inline void CheckKernel(const char *file, const int& line, const dim3& grid, con
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray**, int*);
+int cudaCommon_allocateArrayToDevice(cudaArray**, const int*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*);
+int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, int);
+int cudaCommon_allocateArrayToDevice(DataType**, const size_t&);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, int*);
+int cudaCommon_allocateArrayToDevice(DataType**, const int*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, DataType**, int*);
+int cudaCommon_allocateArrayToDevice(DataType**, DataType**, const int*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*);
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*);
+int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType*, nifti_image*);
+int cudaCommon_transferNiftiToArrayOnDevice(DataType*, const nifti_image*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, nifti_image*);
+int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, const nifti_image*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*);
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*, DataType*);
+int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*);
 /* *************************************************************** */
 extern "C++"
 void cudaCommon_free(cudaArray*);
@@ -112,23 +112,23 @@ void cudaCommon_free(DataType*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, nifti_image*);
+int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, const nifti_image*);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsigned);
+int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned);
+int cudaCommon_transferFromDeviceToCpu(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned);
+int cudaCommon_transferArrayFromCpuToDevice(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned);
+int cudaCommon_transferArrayFromDeviceToCpu(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
 using UniqueTextureObjectPtr = unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index fc4b9ead..db6cf562 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -56,7 +56,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     this->gradientCuda = reinterpret_cast<float4*>(gradData);
 
     cudaCommon_free(this->bestDofCuda);
-    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)this->GetVoxNumber())) {
+    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) {
         reg_print_fct_error("reg_optimiser_gpu::Initialise()");
         reg_print_msg_error("Error when allocating the best control point array on the GPU");
         reg_exit();
@@ -68,7 +68,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
         this->currentDofBwCuda = reinterpret_cast<float4*>(cppDataBw);
         this->gradientBwCuda = reinterpret_cast<float4*>(gradDataBw);
         cudaCommon_free(this->bestDofBwCuda);
-        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, (int)this->GetVoxNumberBw())) {
+        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) {
             reg_print_fct_error("reg_optimiser_gpu::Initialise()");
             reg_print_msg_error("Error when allocating the best control point backwards array on the GPU");
             reg_exit();
@@ -153,16 +153,16 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
     reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->firstCall = true;
     cudaCommon_free(this->array1); cudaCommon_free(this->array2);
-    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, (int)this->GetVoxNumber()) ||
-        cudaCommon_allocateArrayToDevice<float4>(&this->array2, (int)this->GetVoxNumber())) {
+    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, this->GetVoxNumber()) ||
+        cudaCommon_allocateArrayToDevice<float4>(&this->array2, this->GetVoxNumber())) {
         reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
         reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU");
         reg_exit();
     }
     if (this->isSymmetric) {
         cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw);
-        if (cudaCommon_allocateArrayToDevice<float4>(&this->array1Bw, (int)this->GetVoxNumberBw()) ||
-            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, (int)this->GetVoxNumberBw())) {
+        if (cudaCommon_allocateArrayToDevice<float4>(&this->array1Bw, this->GetVoxNumberBw()) ||
+            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, this->GetVoxNumberBw())) {
             reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
             reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU");
             reg_exit();
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 58a3fcb8..1ea2ba08 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -12,6 +12,7 @@
 
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
+#include <thrust/device_vector.h>
 
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
@@ -56,7 +57,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
         reg_exit();
     }
 #ifndef NDEBUG
-    printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n");
+    reg_print_msg_debug("reg_ssd_gpu::InitialiseMeasure()");
 #endif
 }
 /* *************************************************************** */
@@ -77,8 +78,7 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
                                                       cudaChannelFormatKindSigned, 1);
 
     // Create an array on the device to store the absolute difference values
-    float *absoluteValuesCuda;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValuesCuda, activeVoxelNumber * sizeof(float)));
+    thrust::device_vector<float> absoluteValuesCuda(activeVoxelNumber);
 
     // Compute the absolute values
     const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference;
@@ -86,17 +86,14 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     if (referenceImageDim.z > 1)
-        reg_getSquaredDifference3D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture,
-                                                                   referenceImageDim, (unsigned)activeVoxelNumber);
-    else reg_getSquaredDifference2D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture,
-                                                                    referenceImageDim, (unsigned)activeVoxelNumber);
+        reg_getSquaredDifference3D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
+                                                                   *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
+    else reg_getSquaredDifference2D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
+                                                                    *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Perform a reduction on the absolute values
-    const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda, activeVoxelNumber) / (double)activeVoxelNumber;
-
-    // Free the absolute value array
-    NR_CUDA_SAFE_CALL(cudaFree(absoluteValuesCuda));
+    const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda.data().get(), activeVoxelNumber) / (double)activeVoxelNumber;
 
     return ssd;
 }
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index c3832e52..5a823634 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -25,7 +25,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference,
                                                   const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        const int index = tex1Dfetch<int>(maskTexture, tid);
         int quot, rem;
         reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
         const int z = quot;
@@ -49,7 +49,7 @@ __global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference,
                                                   const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        const int index = tex1Dfetch<int>(maskTexture, tid);
         int quot, rem;
         reg_div_cuda(index, referenceImageDim.x, quot, rem);
         const int y = quot, x = rem;
@@ -73,7 +73,7 @@ __global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient,
                                             const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        const int index = tex1Dfetch<int>(maskTexture, tid);
         int quot, rem;
         reg_div_cuda(index, referenceImageDim.x, quot, rem);
         const int y = quot, x = rem;
@@ -107,7 +107,7 @@ __global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient,
                                             const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
-        const unsigned index = tex1Dfetch<int>(maskTexture, tid);
+        const int index = tex1Dfetch<int>(maskTexture, tid);
         int quot, rem;
         reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
         const int z = quot;
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index 0124a95c..e99ccf25 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -88,7 +88,7 @@ void launchAffine(mat44 *affineTransformation,
    free(trans);
 
    uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz);
-   affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose);
+   affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose);
 
 #ifndef NDEBUG
    NR_CUDA_CHECK_KERNEL(G1_b, B1_b);
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index aa2b044c..eb3c7cb3 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -397,7 +397,7 @@ void launchResample(nifti_image *floatingImage,
 		reg_exit();
 	}
 
-	const size_t targetVoxelNumber = CalcVoxelNumber(*warpedImage);
+	const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
 
 	//the below lines need to be moved to cu common
 	cudaDeviceProp prop;
@@ -410,7 +410,7 @@ void launchResample(nifti_image *floatingImage,
 	dim3 mygrid(blocks, 1, 1);
 	dim3 myblocks(maxThreads, 1, 1);
 
-	ulong2 voxelNumber = make_ulong2(targetVoxelNumber, CalcVoxelNumber(*floatingImage));
+	ulong2 voxelNumber = make_ulong2(targetVoxelNumber, NiftiImage::calcVoxelNumber(floatingImage, 3));
 	uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 	uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu);
 	 if (floatingImage->nz > 1) {
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index 18e2a202..1d54e6b0 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -165,7 +165,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
                 // Check all values
                 nifti_image *defField = content->GetDeformationField();
                 auto defFieldPtrX = static_cast<float*>(defField->data);
-                const size_t voxelNumber = CalcVoxelNumber(*defField);
+                const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
                 auto defFieldPtrY = &defFieldPtrX[voxelNumber];
                 auto defFieldPtrZ = &defFieldPtrY[voxelNumber];
                 for (size_t i = 0; i < voxelNumber; ++i) {
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index 92171dd3..421f57ae 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -78,16 +78,14 @@ class BendingEnergyTest {
         ));
 
         // Set some scaling transformation in the transformations
-        mat44 *affine2d = new mat44;
-        mat44 *affine3d = new mat44;
-        reg_mat44_eye(affine2d);
-        reg_mat44_eye(affine3d);
-        affine3d->m[0][0] = affine2d->m[0][0] = 0.8f;
-        affine3d->m[1][1] = affine2d->m[1][1] = 1.2f;
-        affine3d->m[2][2] = 1.1f;
-        reg_affine_getDeformationField(affine2d, controlPointGrid2d);
-        reg_affine_getDeformationField(affine3d, controlPointGrid3d);
-        delete affine2d, affine3d;
+        mat44 affine2d, affine3d;
+        reg_mat44_eye(&affine2d);
+        reg_mat44_eye(&affine3d);
+        affine3d.m[0][0] = affine2d.m[0][0] = 0.8f;
+        affine3d.m[1][1] = affine2d.m[1][1] = 1.2f;
+        affine3d.m[2][2] = 1.1f;
+        reg_affine_getDeformationField(&affine2d, controlPointGrid2d);
+        reg_affine_getDeformationField(&affine3d, controlPointGrid3d);
 
         // Add the test data
         testData.emplace_back(TestData(
@@ -113,34 +111,34 @@ class BendingEnergyTest {
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-                float be = compute->ApproxBendingEnergy();
+                float be = static_cast<float>(compute->ApproxBendingEnergy());
                 testCases.push_back({ testName + " " + platform->GetName(), be, expected });
             }
         }
     }
-    float GetBe2d(NiftiImage cpp)
-    {
+
+    float GetBe2d(const NiftiImage& cpp) {
         // variable to store the bending energy and the normalisation value
         double be = 0;
 
         // The BSpine basis values are known since the control points all have a relative position equal to 0
         float basis[3], first[3], second[3];
-        basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f;
-        first[0]=-.5f; first[1]=0.f; first[2]=.5f;
-        second[0]=1.f; second[1]=-2.f;second[2]=1.f;
+        basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f;
+        first[0] = -.5f; first[1] = 0.f; first[2] = .5f;
+        second[0] = 1.f; second[1] = -2.f; second[2] = 1.f;
 
         // the first and last control points along each axis are
         // ignored for lack of support
-        auto cppPtr = cpp.data();
-        for(unsigned y=1; y<cpp->dim[2]-1;++y){
-            for(unsigned x=1; x<cpp->dim[1]-1;++x){
+        const auto cppPtr = cpp.data();
+        for (int y = 1; y < cpp->dim[2] - 1; ++y) {
+            for (int x = 1; x < cpp->dim[1] - 1; ++x) {
                 // The BE is computed as
                 // BE=dXX/dx^2 + dYY/dy^2 + dXX/dy^2 + dYY/dx^2 + 2 * [dXY/dx^2 + dXY/dy^2]
-                float XX_x=0,YY_x=0, XY_x=0;
-                float XX_y=0,YY_y=0, XY_y=0;
-                for(unsigned j=0; j<3;++j){
-                    for(unsigned i=0; i<3;++i){
-                        unsigned cpIndex = (y+j-1) * cpp->dim[1] + x+i-1;
+                float XX_x = 0, YY_x = 0, XY_x = 0;
+                float XX_y = 0, YY_y = 0, XY_y = 0;
+                for (unsigned j = 0; j < 3; ++j) {
+                    for (unsigned i = 0; i < 3; ++i) {
+                        unsigned cpIndex = (y + j - 1) * cpp->dim[1] + x + i - 1;
                         float x_val = cppPtr[cpIndex];
                         float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()];
                         XX_x += x_val * second[i] * basis[j];
@@ -151,39 +149,39 @@ class BendingEnergyTest {
                         XY_y += y_val * first[i] * first[j];
                     }
                 }
-                be += XX_x*XX_x + YY_x*YY_x + XX_y*XX_y + YY_y*YY_y + \
-                    2.*XY_x*XY_x + 2.*XY_y*XY_y;
+                be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + \
+                    2. * XY_x * XY_x + 2. * XY_y * XY_y;
             }
         }
-        return (float)(be/(double)cpp.nVoxels());
+        return (float)(be / (double)cpp.nVoxels());
     }
-    float GetBe3d(NiftiImage cpp)
-    {
+
+    float GetBe3d(const NiftiImage& cpp) {
         // variable to store the bending energy and the normalisation value
         double be = 0;
 
         // The BSpine basis values are known since the control points all have a relative position equal to 0
         float basis[3], first[3], second[3];
-        basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f;
-        first[0]=-.5f; first[1]=0.f; first[2]=.5f;
-        second[0]=1.f; second[1]=-2.f;second[2]=1.f;
+        basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f;
+        first[0] = -.5f; first[1] = 0.f; first[2] = .5f;
+        second[0] = 1.f; second[1] = -2.f; second[2] = 1.f;
 
-        auto cppPtr = cpp.data();
+        const auto cppPtr = cpp.data();
         // the first and last control points along each axis are
         // ignored for lack of support
-        for(unsigned z=1; z<cpp->nz-1;++z){
-            for(unsigned y=1; y<cpp->ny-1;++y){
-                for(unsigned x=1; x<cpp->nx-1;++x){
-                    float XX_x=0, YY_x=0, ZZ_x=0, XY_x=0, YZ_x=0, XZ_x=0;
-                    float XX_y=0, YY_y=0, ZZ_y=0, XY_y=0, YZ_y=0, XZ_y=0;
-                    float XX_z=0, YY_z=0, ZZ_z=0, XY_z=0, YZ_z=0, XZ_z=0;
-                    for(unsigned k=0; k<3;++k){
-                        for(unsigned j=0; j<3;++j){
-                            for(unsigned i=0; i<3;++i){
-                                unsigned cpIndex = ((z+k-1) * cpp->ny + y+j-1 ) * cpp->nx + x+i-1;
+        for (int z = 1; z < cpp->nz - 1; ++z) {
+            for (int y = 1; y < cpp->ny - 1; ++y) {
+                for (int x = 1; x < cpp->nx - 1; ++x) {
+                    float XX_x = 0, YY_x = 0, ZZ_x = 0, XY_x = 0, YZ_x = 0, XZ_x = 0;
+                    float XX_y = 0, YY_y = 0, ZZ_y = 0, XY_y = 0, YZ_y = 0, XZ_y = 0;
+                    float XX_z = 0, YY_z = 0, ZZ_z = 0, XY_z = 0, YZ_z = 0, XZ_z = 0;
+                    for (unsigned k = 0; k < 3; ++k) {
+                        for (unsigned j = 0; j < 3; ++j) {
+                            for (unsigned i = 0; i < 3; ++i) {
+                                unsigned cpIndex = ((z + k - 1) * cpp->ny + y + j - 1) * cpp->nx + x + i - 1;
                                 float x_val = cppPtr[cpIndex];
                                 float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()];
-                                float z_val = cppPtr[cpIndex + 2*cpp.nVoxelsPerVolume()];
+                                float z_val = cppPtr[cpIndex + 2 * cpp.nVoxelsPerVolume()];
                                 XX_x += x_val * second[i] * basis[j] * basis[k];
                                 YY_x += x_val * basis[i] * second[j] * basis[k];
                                 ZZ_x += x_val * basis[i] * basis[j] * second[k];
@@ -207,16 +205,16 @@ class BendingEnergyTest {
                             }
                         }
                     }
-                    be += XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + \
-                        XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + \
-                        XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + \
-                        2.*XY_x*XY_x + 2.*YZ_x*YZ_x + 2.*XZ_x*XZ_x + \
-                        2.*XY_y*XY_y + 2.*YZ_y*YZ_y + 2.*XZ_y*XZ_y + \
-                        2.*XY_z*XY_z + 2.*YZ_z*YZ_z + 2.*XZ_z*XZ_z;
+                    be += XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + \
+                        XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + \
+                        XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + \
+                        2. * XY_x * XY_x + 2. * YZ_x * YZ_x + 2. * XZ_x * XZ_x + \
+                        2. * XY_y * XY_y + 2. * YZ_y * YZ_y + 2. * XZ_y * XZ_y + \
+                        2. * XY_z * XY_z + 2. * YZ_z * YZ_z + 2. * XZ_z * XZ_z;
                 }
             }
         }
-        return (float)(be/(double)cpp.nVoxels());
+        return (float)(be / (double)cpp.nVoxels());
     }
 };
 
@@ -229,7 +227,7 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") {
         SECTION(testName) {
             std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
             // if (fabs(result - expected) > EPS){
-                std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
+            std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
             // }
             REQUIRE(fabs(result - expected) < EPS);
         }
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 3957ef77..7c6e1184 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -38,8 +38,7 @@ class NMITest {
         // Ensure at least one pixel contains the max and one the min
         ref2dPtr[0] = flo2dPtr[0] = 2.f;
         ref2dPtr[1] = flo2dPtr[1] = 65.f;
-        for (size_t i = 2; i < reference2d.nVoxels(); ++i)
-        {
+        for (size_t i = 2; i < reference2d.nVoxels(); ++i) {
             ref2dPtr[i] = (int)distr(gen); // cast to integer to not use PW
             flo2dPtr[i] = (int)distr(gen);
         }
@@ -98,7 +97,7 @@ class NMITest {
                 measure->Initialise(*measure_nmi, *content);
                 double nmi = measure_nmi->GetSimilarityMeasureValue();
 
-                testCases.push_back({ testName + " " + platform->GetName(), nmi, expected});
+                testCases.push_back({ testName + " " + platform->GetName(), nmi, expected });
             }
         }
     }
@@ -108,8 +107,7 @@ class NMITest {
     using TestCase = std::tuple<std::string, double, double>;
     inline static vector<TestCase> testCases;
 
-    double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo)
-    {   
+    double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) {
         // Allocate a joint histogram and fill it with zeros
         double jh[68][68];
         for (unsigned i = 0; i < 68; ++i)
@@ -118,23 +116,21 @@ class NMITest {
         // Fill it with the intensity values
         const auto refPtr = ref.data();
         const auto floPtr = flo.data();
-        for (auto refItr = refPtr.begin(), floItr = floPtr.begin();
-            refItr != refPtr.end();
-            ++refItr, ++floItr)
+        for (auto refItr = refPtr.begin(), floItr = floPtr.begin(); refItr != refPtr.end(); ++refItr, ++floItr)
             jh[(int)*refItr][(int)*floItr]++;
         // Convert the histogram into an image to later apply the convolution
         vector<NiftiImage::dim_t> dim{ 68, 68 };
         NiftiImage jointHistogram(dim, NIFTI_TYPE_FLOAT64);
-        double *jhPtr = static_cast<double *>(jointHistogram->data);
-        // Conver the occurances to probabilities
+        double *jhPtr = static_cast<double*>(jointHistogram->data);
+        // Convert the occurrences to probabilities
         for (unsigned i = 0; i < 68; ++i)
             for (unsigned j = 0; j < 68; ++j)
                 *jhPtr++ = jh[i][j] / ref.nVoxels();
         // Apply a convolution to mimic the parzen windowing
-        float sigma[1] = {1.f};
+        float sigma[1] = { 1.f };
         reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL);
         // Restore the jh array
-        jhPtr = static_cast<double *>(jointHistogram->data);
+        jhPtr = static_cast<double*>(jointHistogram->data);
         for (unsigned i = 0; i < 68; ++i)
             for (unsigned j = 0; j < 68; ++j)
                 jh[i][j] = *jhPtr++;
@@ -142,20 +138,18 @@ class NMITest {
         double ref_ent = 0.;
         double flo_ent = 0.;
         double joi_ent = 0.;
-        for (unsigned i = 0; i < 68; ++i)
-        {
+        for (unsigned i = 0; i < 68; ++i) {
             double ref_pro = 0.;
             double flo_pro = 0.;
-            for (unsigned j = 0; j < 68; ++j)
-            {
+            for (unsigned j = 0; j < 68; ++j) {
                 flo_pro += jh[i][j];
                 ref_pro += jh[j][i];
-                if(jh[i][j]>0.)
+                if (jh[i][j] > 0.)
                     joi_ent -= jh[i][j] * log(jh[i][j]);
             }
-            if (ref_pro>0)
+            if (ref_pro > 0)
                 ref_ent -= ref_pro * log(ref_pro);
-            if (flo_pro>0)
+            if (flo_pro > 0)
                 flo_ent -= flo_pro * log(flo_pro);
         }
         double nmi = (ref_ent + flo_ent) / joi_ent;
@@ -171,7 +165,7 @@ TEST_CASE_METHOD(NMITest, "NMI", "[unit]") {
 
         SECTION(testName) {
             std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
-            if (fabs(result - expected) > EPS){
+            if (fabs(result - expected) > EPS) {
                 std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
             }
             REQUIRE(fabs(result - expected) < EPS);
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index 55b824b0..4768d831 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -19,7 +19,7 @@ class BMTest {
         if (!testCases.empty())
             return;
 
-        std::mt19937 gen(0);        
+        std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a reference and floating 2D images
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 5a075ae8..a1ac51a3 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -114,7 +114,7 @@ class LTSTest {
             contentCuda->SetWarped(warpedCuda.disown());
 
             // Initialise the block matching and run it on the CPU
-            unique_ptr<BlockMatchingKernel> bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) };
+            unique_ptr<BlockMatchingKernel> bmKernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
             bmKernelCpu->Calculate();
 
             // Set the CUDA block matching parameters

From 4a98c0863e21626e65c3814b153890934b381413 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 31 Jul 2023 14:06:30 +0100
Subject: [PATCH 176/314] Rearchitect reg_measure to handle forward and
 backward similarity measure values #92

---
 niftyreg_build_version.txt      |   2 +-
 reg-apps/reg_tools.cpp          |   4 +-
 reg-lib/cpu/_reg_dti.cpp        | 582 ++++++++++++++------------------
 reg-lib/cpu/_reg_dti.h          |  14 +-
 reg-lib/cpu/_reg_kld.cpp        | 154 +++------
 reg-lib/cpu/_reg_kld.h          |  18 +-
 reg-lib/cpu/_reg_lncc.cpp       | 391 ++++++++++-----------
 reg-lib/cpu/_reg_lncc.h         |  70 +---
 reg-lib/cpu/_reg_measure.h      |  41 ++-
 reg-lib/cpu/_reg_mind.cpp       | 487 ++++++++++++--------------
 reg-lib/cpu/_reg_mind.h         |  50 +--
 reg-lib/cpu/_reg_nmi.cpp        | 120 ++++---
 reg-lib/cpu/_reg_nmi.h          |  14 +-
 reg-lib/cpu/_reg_ssd.cpp        | 230 ++++++-------
 reg-lib/cpu/_reg_ssd.h          |  49 +--
 reg-lib/cuda/_reg_measure_gpu.h |  21 +-
 reg-lib/cuda/_reg_nmi_gpu.cu    | 101 +++---
 reg-lib/cuda/_reg_nmi_gpu.h     |  12 +-
 reg-lib/cuda/_reg_ssd_gpu.cu    |   6 +-
 reg-lib/cuda/_reg_ssd_gpu.h     |   6 +-
 20 files changed, 1071 insertions(+), 1301 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 594cd09d..9530e048 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-295
+296
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 4f2ea7b8..5c1d5eeb 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -1037,7 +1037,7 @@ int main(int argc, char **argv)
         outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
-        GetMINDImageDescriptor(image, outputImage, mask, 1, 0);
+        GetMindImageDescriptor(image, outputImage, mask, 1, 0);
         free(mask);
         // Save the MIND descriptor image
         if(flag->outputImageFlag)
@@ -1064,7 +1064,7 @@ int main(int argc, char **argv)
         outputImage->data = malloc(outputImage->nvox * outputImage->nbyper);
         // Compute the MIND-SSC descriptor
         int *mask = (int *)calloc(image->nvox, sizeof(int));
-        GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0);
+        GetMindSscImageDescriptor(image, outputImage, mask, 1, 0);
         free(mask);
         // Save the MIND descriptor image
         if(flag->outputImageFlag)
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index e9c99a2f..d4fa63be 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -13,11 +13,9 @@
 #include "_reg_dti.h"
 
 /* *************************************************************** */
-reg_dti::reg_dti()
-   : reg_measure()
-{
+reg_dti::reg_dti(): reg_measure() {
 #ifndef NDEBUG
-   reg_print_msg_debug("reg_dti constructor called");
+    reg_print_msg_debug("reg_dti constructor called");
 #endif
 }
 /* *************************************************************** */
@@ -32,89 +30,82 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
                                 int *floMask,
                                 nifti_image *warpedImgBw,
                                 nifti_image *warpedGradBw,
-                                nifti_image *voxelBasedGradBw)
-{
-   // Set the pointers using the parent class function
-   reg_measure::InitialiseMeasure(refImg,
-                                  floImg,
-                                  refMask,
-                                  warpedImg,
-                                  warpedGrad,
-                                  voxelBasedGrad,
-                                  localWeightSim,
-                                  floMask,
-                                  warpedImgBw,
-                                  warpedGradBw,
-                                  voxelBasedGradBw);
+                                nifti_image *voxelBasedGradBw) {
+    // Set the pointers using the parent class function
+    reg_measure::InitialiseMeasure(refImg,
+                                   floImg,
+                                   refMask,
+                                   warpedImg,
+                                   warpedGrad,
+                                   voxelBasedGrad,
+                                   localWeightSim,
+                                   floMask,
+                                   warpedImgBw,
+                                   warpedGradBw,
+                                   voxelBasedGradBw);
 
-   // Check that the input images have the same number of time point
-   if(this->referenceImage->nt != this->floatingImage->nt)
-   {
-      reg_print_fct_error("reg_dti::InitialiseMeasure");
-      reg_print_msg_error("This number of time point should be the same for both input images");
-      reg_exit();
-   }
+    // Check that the input images have the same number of time point
+    if (this->referenceImage->nt != this->floatingImage->nt) {
+        reg_print_fct_error("reg_dti::InitialiseMeasure");
+        reg_print_msg_error("This number of time point should be the same for both input images");
+        reg_exit();
+    }
 
-   int j=0;
-   for(int i=0; i<refImg->nt; ++i)
-   {
-      //JM - note, the specific value of timePointWeight is not used for DTI images
-      //any value > 0 indicates the 'time point' is active
-      if(this->timePointWeight[i]>0)
-      {
-         this->dtIndicies[j++]=i;
+    int j = 0;
+    for (int i = 0; i < refImg->nt; ++i) {
+        //JM - note, the specific value of timePointWeight is not used for DTI images
+        //any value > 0 indicates the 'time point' is active
+        if (this->timePointWeight[i] > 0) {
+            this->dtIndicies[j++] = i;
 #ifndef NDEBUG
-         reg_print_msg_debug("reg_dti::InitialiseMeasure().");
-         char text[255];
-         sprintf(text, "Active time point: %i", i);
-         reg_print_msg_debug(text);
+            reg_print_msg_debug("reg_dti::InitialiseMeasure()");
+            char text[255];
+            sprintf(text, "Active time point: %i", i);
+            reg_print_msg_debug(text);
 #endif
-      }
-   }
-   if((refImg->nz>1 && j!=6) && (refImg->nz==1 && j!=3))
-   {
-      reg_print_fct_error("reg_dti::InitialiseMeasure");
-      reg_print_msg_error("Unexpected number of DTI components");
-      reg_exit();
-   }
+        }
+    }
+    if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3)) {
+        reg_print_fct_error("reg_dti::InitialiseMeasure");
+        reg_print_msg_error("Unexpected number of DTI components");
+        reg_exit();
+    }
 }
 /* *************************************************************** */
 template<class DataType>
-double reg_getDTIMeasureValue(nifti_image *referenceImage,
-                              nifti_image *warpedImage,
-                              int *mask,
-                              unsigned *dtIndicies
-                             )
-{
+double reg_getDTIMeasureValue(const nifti_image *referenceImage,
+                              const nifti_image *warpedImage,
+                              const int *mask,
+                              const unsigned *dtIndicies) {
 #ifdef _WIN32
-   long voxel;
-   const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
+    long voxel;
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
 #else
-   size_t voxel;
-   const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    size_t voxel;
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
-   /* As the tensor has 6 unique components that we need to worry about, read them out
-   for the floating and reference images. */
-   DataType *firstWarpedVox = static_cast<DataType *>(warpedImage->data);
-   DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
-   DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
-   DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
-   DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
-   DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
-   DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
+    /* As the tensor has 6 unique components that we need to worry about, read them out
+    for the floating and reference images. */
+    const DataType *firstWarpedVox = static_cast<DataType*>(warpedImage->data);
+    const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]];
+    const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]];
+    const DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]];
+    const DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]];
+    const DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]];
+    const DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]];
 
-   DataType *firstRefVox = static_cast<DataType *>(referenceImage->data);
-   DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
-   DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
-   DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
-   DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
-   DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
-   DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
+    const DataType *firstRefVox = static_cast<DataType*>(referenceImage->data);
+    const DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]];
+    const DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]];
+    const DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]];
+    const DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]];
+    const DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]];
+    const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]];
 
-   double DTI_cost=0, n=0;
-   const double twoThirds = (2.0/3.0);
-   DataType rXX, rXY, rYY, rXZ, rYZ, rZZ;
+    double dtiCost = 0, n = 0;
+    constexpr double twoThirds = 2.0 / 3.0;
+    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
@@ -122,115 +113,65 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage,
           warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \
           warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \
    private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \
-   reduction(+:DTI_cost, n)
+   reduction(+:dtiCost, n)
 #endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      // Check if the current voxel belongs to the mask and the intensities are not nans
-      if(mask[voxel]>-1 )
-      {
-         if(referenceIntensityXX[voxel]==referenceIntensityXX[voxel] &&
-               warpedIntensityXX[voxel]==warpedIntensityXX[voxel])
-         {
-            // Calculate the elementwise residual of the diffusion tensor components
-            rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
-            rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
-            rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
-            rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
-            rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
-            rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
-            DTI_cost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ))
-                        + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ))
-                        - twoThirds * (rXX*rYY+rXX*rZZ+rYY*rZZ);
-            n++;
-         } // check if values are defined
-      } // check if voxel belongs mask
-   } // loop over voxels
-   return DTI_cost/n;
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // Check if the current voxel belongs to the mask and the intensities are not nans
+        if (mask[voxel] > -1) {
+            if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] &&
+                warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) {
+                // Calculate the elementwise residual of the diffusion tensor components
+                rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
+                rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
+                rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
+                rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
+                rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
+                rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
+                dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ))
+                    + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ))
+                    - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ);
+                n++;
+            } // check if values are defined
+        } // check if voxel belongs mask
+    } // loop over voxels
+    return dtiCost / n;
 }
-template double reg_getDTIMeasureValue<float>(nifti_image *,nifti_image *,int *, unsigned *);
-template double reg_getDTIMeasureValue<double>(nifti_image *,nifti_image *,int *, unsigned *);
 /* *************************************************************** */
-double reg_dti::GetSimilarityMeasureValue()
-{
-   // Check that all the specified image are of the same datatype
-   if(this->warpedImage->datatype != this->referenceImage->datatype)
-   {
-      reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-      reg_print_msg_error("Both input images are expected to have the same type");
-      reg_exit();
-   }
-   double DTIMeasureValue;
-   switch(this->referenceImage->datatype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      DTIMeasureValue = reg_getDTIMeasureValue<float>
-                        (this->referenceImage,
-                         this->warpedImage,
-                         this->referenceMask,
-                         this->dtIndicies
-                        );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      DTIMeasureValue = reg_getDTIMeasureValue<double>
-                        (this->referenceImage,
-                         this->warpedImage,
-                         this->referenceMask,
-                         this->dtIndicies
-                        );
-      break;
-   default:
-      reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-      reg_print_msg_error("Result pixel type unsupported in the DTI computation function");
-      reg_exit();
-   }
-
-   // Backward computation
-   if(this->isSymmetric)
-   {
-      // Check that all the specified image are of the same datatype
-      if(this->warpedImageBw->datatype != this->floatingImage->datatype)
-      {
-         reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-         reg_print_msg_error("Both input images are expected to have the same type");
-         reg_exit();
-      }
-      switch(this->floatingImage->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         DTIMeasureValue += reg_getDTIMeasureValue<float>
-                            (this->floatingImage,
-                             this->warpedImageBw,
-                             this->floatingMask,
-                             this->dtIndicies
-                            );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         DTIMeasureValue += reg_getDTIMeasureValue<double>
-                            (this->floatingImage,
-                             this->warpedImageBw,
-                             this->floatingMask,
-                             this->dtIndicies
-                            );
-         break;
-      default:
-         reg_print_fct_error("reg_dti::GetSimilarityMeasureValue");
-         reg_print_msg_error("Warped pixel type unsupported in the DTI computation function");
-         reg_exit();
-      }
-   }
-   return DTIMeasureValue;
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 const nifti_image *warpedImage,
+                                 const int *mask,
+                                 const unsigned *dtIndicies) {
+    return std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        return reg_getDTIMeasureValue<RefImgDataType>(referenceImage,
+                                                      warpedImage,
+                                                      mask,
+                                                      dtIndicies);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+double reg_dti::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->warpedImage,
+                                       this->referenceMask,
+                                       this->dtIndicies);
+}
+/* *************************************************************** */
+double reg_dti::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->warpedImageBw,
+                                       this->floatingMask,
+                                       this->dtIndicies);
 }
 /* *************************************************************** */
 template <class DataType>
 void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
-      nifti_image *warpedImage,
-      nifti_image *warpedGradient,
-      nifti_image *dtiMeasureGradientImage,
-      int *mask,
-      unsigned *dtIndicies)
-{
-   // Create pointers to the reference and warped images
+                                         nifti_image *warpedImage,
+                                         nifti_image *warpedGradient,
+                                         nifti_image *dtiMeasureGradientImage,
+                                         int *mask,
+                                         unsigned *dtIndicies) {
+    // Create pointers to the reference and warped images
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -239,45 +180,45 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
-   /* As the tensor has 6 unique components that we need to worry about, read them out
-   for the floating and reference images. */
-   DataType *firstWarpedVox = static_cast<DataType *>(warpedImage->data);
-   DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]];
-   DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]];
-   DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]];
-   DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]];
-   DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]];
-   DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]];
+    /* As the tensor has 6 unique components that we need to worry about, read them out
+    for the floating and reference images. */
+    DataType *firstWarpedVox = static_cast<DataType*>(warpedImage->data);
+    DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]];
+    DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]];
+    DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]];
+    DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]];
+    DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]];
+    DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]];
 
-   DataType *firstRefVox = static_cast<DataType *>(referenceImage->data);
-   DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]];
-   DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]];
-   DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]];
-   DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]];
-   DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]];
-   DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]];
+    DataType *firstRefVox = static_cast<DataType*>(referenceImage->data);
+    DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]];
+    DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]];
+    DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]];
+    DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]];
+    DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]];
+    DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]];
 
-   // THE FOLLOWING IS WRONG
-   reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
-   reg_exit();
-   unsigned gradientVoxels = warpedGradient->nu*voxelNumber;
-   DataType *firstGradVox = static_cast<DataType *>(warpedGradient->data);
-   DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]];
-   DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]];
-   DataType *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]];
-   DataType *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]];
-   DataType *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]];
-   DataType *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]];
+    // THE FOLLOWING IS WRONG
+    reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
+    reg_exit();
+    unsigned gradientVoxels = warpedGradient->nu * voxelNumber;
+    DataType *firstGradVox = static_cast<DataType*>(warpedGradient->data);
+    DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]];
+    DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]];
+    DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]];
+    DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]];
+    DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]];
+    DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]];
 
-   // Create an array to store the computed gradient per time point
-   DataType *dtiMeasureGradPtrX=static_cast<DataType *>(dtiMeasureGradientImage->data);
-   DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber];
-   DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber];
+    // Create an array to store the computed gradient per time point
+    DataType *dtiMeasureGradPtrX = static_cast<DataType*>(dtiMeasureGradientImage->data);
+    DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber];
+    DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber];
 
-   const double twoThirds = 2.0/3.0;
-   const double fourThirds = 4.0/3.0;
+    const double twoThirds = 2.0 / 3.0;
+    const double fourThirds = 4.0 / 3.0;
 
-   DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
+    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
@@ -287,133 +228,114 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
           dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) \
    private(rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad)
 #endif
-   for(voxel=0; voxel<voxelNumber; voxel++)
-   {
-      if(mask[voxel]>-1 )
-      {
-         if(referenceIntensityXX[voxel]==referenceIntensityXX[voxel] &&
-               warpedIntensityXX[voxel]==warpedIntensityXX[voxel])
-         {
-            rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
-            rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
-            rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
-            rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
-            rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
-            rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
+    for (voxel = 0; voxel < voxelNumber; voxel++) {
+        if (mask[voxel] > -1) {
+            if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] &&
+                warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) {
+                rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
+                rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
+                rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
+                rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
+                rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
+                rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
 
-            xxGrad = fourThirds*rXX-twoThirds*(rYY+rZZ);
-            yyGrad = fourThirds*rYY-twoThirds*(rXX+rZZ);
-            zzGrad = fourThirds*rZZ-twoThirds*(rYY+rXX);
-            xyGrad = 4.0*rXY;
-            xzGrad = 4.0*rXZ;
-            yzGrad = 4.0*rYZ;
+                xxGrad = static_cast<DataType>(fourThirds * rXX - twoThirds * (rYY + rZZ));
+                yyGrad = static_cast<DataType>(fourThirds * rYY - twoThirds * (rXX + rZZ));
+                zzGrad = static_cast<DataType>(fourThirds * rZZ - twoThirds * (rYY + rXX));
+                xyGrad = 4.f * rXY;
+                xzGrad = 4.f * rXZ;
+                yzGrad = 4.f * rYZ;
 
-            dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel]*xxGrad+spatialGradYY[voxel]*yyGrad+spatialGradZZ[voxel]*zzGrad \
-                                          + spatialGradXY[voxel]*xyGrad + spatialGradXZ[voxel]*xzGrad + spatialGradYZ[voxel]*yzGrad);
+                dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel] * xxGrad + spatialGradYY[voxel] * yyGrad + spatialGradZZ[voxel] * zzGrad
+                                              + spatialGradXY[voxel] * xyGrad + spatialGradXZ[voxel] * xzGrad + spatialGradYZ[voxel] * yzGrad);
 
-            dtiMeasureGradPtrY[voxel] -= (spatialGradXX[voxel+voxelNumber]*xxGrad+spatialGradYY[voxel+voxelNumber]*yyGrad+spatialGradZZ[voxel+voxelNumber]*zzGrad \
-                                          + spatialGradXY[voxel+voxelNumber]*xyGrad + spatialGradXZ[voxel+voxelNumber]*xzGrad + spatialGradYZ[voxel+voxelNumber]*yzGrad);
+                dtiMeasureGradPtrY[voxel] -= (spatialGradXX[voxel + voxelNumber] * xxGrad + spatialGradYY[voxel + voxelNumber] * yyGrad + spatialGradZZ[voxel + voxelNumber] * zzGrad
+                                              + spatialGradXY[voxel + voxelNumber] * xyGrad + spatialGradXZ[voxel + voxelNumber] * xzGrad + spatialGradYZ[voxel + voxelNumber] * yzGrad);
 
-            dtiMeasureGradPtrZ[voxel] -= (spatialGradXX[voxel+2*voxelNumber]*xxGrad+spatialGradYY[voxel+2*voxelNumber]*yyGrad \
-                                          + spatialGradZZ[voxel+2*voxelNumber]*zzGrad + spatialGradXY[voxel+2*voxelNumber]*xyGrad  \
-                                          + spatialGradXZ[voxel+2*voxelNumber]*xzGrad + spatialGradYZ[voxel+2*voxelNumber]*yzGrad);
-         }
-      }
-   }
+                dtiMeasureGradPtrZ[voxel] -= (spatialGradXX[voxel + 2 * voxelNumber] * xxGrad + spatialGradYY[voxel + 2 * voxelNumber] * yyGrad
+                                              + spatialGradZZ[voxel + 2 * voxelNumber] * zzGrad + spatialGradXY[voxel + 2 * voxelNumber] * xyGrad
+                                              + spatialGradXZ[voxel + 2 * voxelNumber] * xzGrad + spatialGradYZ[voxel + 2 * voxelNumber] * yzGrad);
+            }
+        }
+    }
 }
 /* *************************************************************** */
-template void reg_getVoxelBasedDTIMeasureGradient<float>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *);
-template void reg_getVoxelBasedDTIMeasureGradient<double>
-(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *);
-/* *************************************************************** */
-void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint)
-{
-   // Check if the specified time point exists and is active
-   reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-   if(this->timePointWeight[currentTimepoint]==0)
-      return;
+void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
+    // Check if the specified time point exists and is active
+    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
+    if (this->timePointWeight[currentTimepoint] == 0)
+        return;
 
-   // Check if all required input images are of the same data type
-   int dtype = this->referenceImage->datatype;
-   if(this->warpedImage->datatype != dtype ||
-         this->warpedGradient->datatype != dtype ||
-         this->voxelBasedGradient->datatype != dtype
-     )
-   {
-      reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("Input images are expected to be of the same type");
-      reg_exit();
-   }
-   // Compute the gradient of the ssd for the forward transformation
-   switch(dtype)
-   {
-   case NIFTI_TYPE_FLOAT32:
-      reg_getVoxelBasedDTIMeasureGradient<float>
-      (this->referenceImage,
-       this->warpedImage,
-       this->warpedGradient,
-       this->voxelBasedGradient,
-       this->referenceMask,
-       this->dtIndicies
-      );
-      break;
-   case NIFTI_TYPE_FLOAT64:
-      reg_getVoxelBasedDTIMeasureGradient<double>
-      (this->referenceImage,
-       this->warpedImage,
-       this->warpedGradient,
-       this->voxelBasedGradient,
-       this->referenceMask,
-       this->dtIndicies
-      );
-      break;
-   default:
-      reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-      reg_print_msg_error("The input image data type is not supported");
-      reg_exit();
-   }
-   // Compute the gradient of the ssd for the backward transformation
-   if(this->isSymmetric)
-   {
-      dtype = this->floatingImage->datatype;
-      if(this->warpedImageBw->datatype != dtype ||
+    // Check if all required input images are of the same data type
+    int dtype = this->referenceImage->datatype;
+    if (this->warpedImage->datatype != dtype ||
+        this->warpedGradient->datatype != dtype ||
+        this->voxelBasedGradient->datatype != dtype
+        ) {
+        reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("Input images are expected to be of the same type");
+        reg_exit();
+    }
+    // Compute the gradient of the ssd for the forward transformation
+    switch (dtype) {
+    case NIFTI_TYPE_FLOAT32:
+        reg_getVoxelBasedDTIMeasureGradient<float>
+            (this->referenceImage,
+             this->warpedImage,
+             this->warpedGradient,
+             this->voxelBasedGradient,
+             this->referenceMask,
+             this->dtIndicies);
+        break;
+    case NIFTI_TYPE_FLOAT64:
+        reg_getVoxelBasedDTIMeasureGradient<double>
+            (this->referenceImage,
+             this->warpedImage,
+             this->warpedGradient,
+             this->voxelBasedGradient,
+             this->referenceMask,
+             this->dtIndicies);
+        break;
+    default:
+        reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
+        reg_print_msg_error("The input image data type is not supported");
+        reg_exit();
+    }
+    // Compute the gradient of the ssd for the backward transformation
+    if (this->isSymmetric) {
+        dtype = this->floatingImage->datatype;
+        if (this->warpedImageBw->datatype != dtype ||
             this->warpedGradientBw->datatype != dtype ||
-            this->voxelBasedGradientBw->datatype != dtype
-        )
-      {
-         reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("Input images are expected to be of the same type");
-         reg_exit();
-      }
-      // Compute the gradient of the nmi for the backward transformation
-      switch(dtype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         reg_getVoxelBasedDTIMeasureGradient<float>
-         (this->floatingImage,
-          this->warpedImageBw,
-          this->warpedGradientBw,
-          this->voxelBasedGradientBw,
-          this->floatingMask,
-          this->dtIndicies
-         );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         reg_getVoxelBasedDTIMeasureGradient<double>
-         (this->floatingImage,
-          this->warpedImageBw,
-          this->warpedGradientBw,
-          this->voxelBasedGradientBw,
-          this->floatingMask,
-          this->dtIndicies
-         );
-         break;
-      default:
-         reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-         reg_print_msg_error("The input image data type is not supported");
-         reg_exit();
-      }
-   }
+            this->voxelBasedGradientBw->datatype != dtype) {
+            reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("Input images are expected to be of the same type");
+            reg_exit();
+        }
+        // Compute the gradient of the nmi for the backward transformation
+        switch (dtype) {
+        case NIFTI_TYPE_FLOAT32:
+            reg_getVoxelBasedDTIMeasureGradient<float>
+                (this->floatingImage,
+                 this->warpedImageBw,
+                 this->warpedGradientBw,
+                 this->voxelBasedGradientBw,
+                 this->floatingMask,
+                 this->dtIndicies);
+            break;
+        case NIFTI_TYPE_FLOAT64:
+            reg_getVoxelBasedDTIMeasureGradient<double>
+                (this->floatingImage,
+                 this->warpedImageBw,
+                 this->warpedGradientBw,
+                 this->voxelBasedGradientBw,
+                 this->floatingMask,
+                 this->dtIndicies);
+            break;
+        default:
+            reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
+            reg_print_msg_error("The input image data type is not supported");
+            reg_exit();
+        }
+    }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 580382af..0e6dc21c 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -37,8 +37,10 @@ class reg_dti: public reg_measure {
                                    nifti_image *warpedImgBw = nullptr,
                                    nifti_image *warpedGradBw = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr) override;
-    /// @brief Returns the value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the dti value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the dti value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based gradient for DTI images
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 
@@ -57,10 +59,10 @@ class reg_dti: public reg_measure {
  * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors
  */
 extern "C++" template <class DataType>
-double reg_getDTIMeasureValue(nifti_image *referenceImage,
-                              nifti_image *warpedImage,
-                              int *mask,
-                              unsigned *dtIndicies);
+double reg_getDTIMeasureValue(const nifti_image *referenceImage,
+                              const nifti_image *warpedImage,
+                              const int *mask,
+                              const unsigned *dtIndicies);
 /* *************************************************************** */
 /**
  * @brief Compute a voxel based gradient of the sum squared difference.
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 39a8b84b..01302e80 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -12,7 +12,6 @@
 
 #include "_reg_kld.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 reg_kld::reg_kld(): reg_measure() {
 #ifndef NDEBUG
@@ -20,7 +19,6 @@ reg_kld::reg_kld(): reg_measure() {
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_kld::InitialiseMeasure(nifti_image *refImg,
                                 nifti_image *floImg,
                                 int *refMask,
@@ -55,11 +53,12 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
     // are meant to be probabilities
     for (int t = 0; t < this->referenceImage->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
-            float min_ref = reg_tools_getMinValue(this->referenceImage, t);
-            float max_ref = reg_tools_getMaxValue(this->referenceImage, t);
-            float min_flo = reg_tools_getMinValue(this->floatingImage, t);
-            float max_flo = reg_tools_getMaxValue(this->floatingImage, t);
-            if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) {
+            const float minRef = reg_tools_getMinValue(this->referenceImage, t);
+            const float maxRef = reg_tools_getMaxValue(this->referenceImage, t);
+            const float minFlo = reg_tools_getMinValue(this->floatingImage, t);
+            const float maxFlo = reg_tools_getMaxValue(this->floatingImage, t);
+            if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f) {
+                reg_print_fct_error("reg_kld::InitialiseMeasure");
                 reg_print_msg_error("The input images are expected to be probabilities to use the kld measure");
                 reg_exit();
             }
@@ -67,7 +66,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
     }
 #ifndef NDEBUG
     char text[255];
-    reg_print_msg_debug("reg_kld::InitialiseMeasure().");
+    reg_print_msg_debug("reg_kld::InitialiseMeasure()");
     for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
@@ -75,13 +74,12 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template <class DataType>
-double reg_getKLDivergence(nifti_image *referenceImage,
-                           nifti_image *warpedImage,
-                           double *timePointWeight,
-                           nifti_image *jacobianDetImg,
-                           int *mask) {
+double reg_getKLDivergence(const nifti_image *referenceImage,
+                           const nifti_image *warpedImage,
+                           const double *timePointWeight,
+                           const nifti_image *jacobianDetImg,
+                           const int *mask) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -90,119 +88,77 @@ double reg_getKLDivergence(nifti_image *referenceImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
 
-    DataType *refPtr = static_cast<DataType*>(referenceImage->data);
-    DataType *warPtr = static_cast<DataType*>(warpedImage->data);
-    int *maskPtr = nullptr;
-    bool MrClean = false;
-    if (mask == nullptr) {
-        maskPtr = (int*)calloc(voxelNumber, sizeof(int));
-        MrClean = true;
-    } else maskPtr = &mask[0];
-
-    DataType *jacPtr = nullptr;
+    const DataType *refPtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *warPtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *jacPtr = nullptr;
     if (jacobianDetImg != nullptr)
         jacPtr = static_cast<DataType*>(jacobianDetImg->data);
-    double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue;
+
+    double measure = 0, measureTp = 0, num = 0, tempRefValue, tempWarValue, tempValue;
 
     for (int time = 0; time < referenceImage->nt; ++time) {
         if (timePointWeight[time] > 0) {
-            DataType *currentRefPtr = &refPtr[time * voxelNumber];
-            DataType *currentWarPtr = &warPtr[time * voxelNumber];
+            const DataType *currentRefPtr = &refPtr[time * voxelNumber];
+            const DataType *currentWarPtr = &warPtr[time * voxelNumber];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber,currentRefPtr, currentWarPtr, \
-    maskPtr, jacobianDetImg, jacPtr) \
+    shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, jacPtr) \
     private(tempRefValue, tempWarValue, tempValue) \
-    reduction(+:measure_tp, num)
+    reduction(+:measureTp, num)
 #endif
             for (voxel = 0; voxel < voxelNumber; ++voxel) {
-                if (maskPtr[voxel] > -1) {
+                if (mask[voxel] > -1) {
                     tempRefValue = currentRefPtr[voxel] + 1e-16;
                     tempWarValue = currentWarPtr[voxel] + 1e-16;
                     tempValue = tempRefValue * log(tempRefValue / tempWarValue);
                     if (tempValue == tempValue &&
                         tempValue != std::numeric_limits<double>::infinity()) {
                         if (jacobianDetImg == nullptr) {
-                            measure_tp -= tempValue;
+                            measureTp -= tempValue;
                             num++;
                         } else {
-                            measure_tp -= tempValue * jacPtr[voxel];
+                            measureTp -= tempValue * jacPtr[voxel];
                             num += jacPtr[voxel];
                         }
                     }
                 }
             }
-            measure += measure_tp * timePointWeight[time] / num;
+            measure += measureTp * timePointWeight[time] / num;
         }
     }
-    if (MrClean) free(maskPtr);
     return measure;
 }
-template double reg_getKLDivergence<float>(nifti_image*, nifti_image*, double*, nifti_image*, int*);
-template double reg_getKLDivergence<double>(nifti_image*, nifti_image*, double*, nifti_image*, int*);
 /* *************************************************************** */
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 const nifti_image *warpedImage,
+                                 const double *timePointWeight,
+                                 const nifti_image *jacobianDetImg,
+                                 const int *mask) {
+    return std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        return reg_getKLDivergence<RefImgDataType>(referenceImage,
+                                                   warpedImage,
+                                                   timePointWeight,
+                                                   jacobianDetImg,
+                                                   mask);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
 /* *************************************************************** */
-double reg_kld::GetSimilarityMeasureValue() {
-    // Check that all the specified image are of the same datatype
-    if (this->warpedImage->datatype != this->referenceImage->datatype) {
-        reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-        reg_print_msg_error("Both input images are expected to have the same type");
-        reg_exit();
-    }
-    double KLDValue;
-    switch (this->referenceImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        KLDValue = reg_getKLDivergence<float>(this->referenceImage,
-                                              this->warpedImage,
-                                              this->timePointWeight,
-                                              nullptr, // TODO this->forwardJacDetImagePointer,
-                                              this->referenceMask);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        KLDValue = reg_getKLDivergence<double>(this->referenceImage,
-                                               this->warpedImage,
-                                               this->timePointWeight,
-                                               nullptr, // TODO this->forwardJacDetImagePointer,
-                                               this->referenceMask);
-        break;
-    default:
-        reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-        reg_print_msg_error("Warped pixel type unsupported");
-        reg_exit();
-    }
-
-    // Backward computation
-    if (this->isSymmetric) {
-        // Check that all the specified image are of the same datatype
-        if (this->warpedImageBw->datatype != this->floatingImage->datatype) {
-            reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-            reg_print_msg_error("Both input images are expected to have the same type");
-            reg_exit();
-        }
-        switch (this->floatingImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            KLDValue += reg_getKLDivergence<float>(this->floatingImage,
-                                                   this->warpedImageBw,
-                                                   this->timePointWeight,
-                                                   nullptr, // TODO this->backwardJacDetImagePointer,
-                                                   this->floatingMask);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            KLDValue += reg_getKLDivergence<double>(this->floatingImage,
-                                                    this->warpedImageBw,
-                                                    this->timePointWeight,
-                                                    nullptr, // TODO this->backwardJacDetImagePointer,
-                                                    this->floatingMask);
-            break;
-        default:
-            reg_print_fct_error("reg_kld::GetSimilarityMeasureValue");
-            reg_print_msg_error("Warped pixel type unsupported");
-            reg_exit();
-        }
-    }
-    return KLDValue;
+double reg_kld::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->warpedImage,
+                                       this->timePointWeight,
+                                       nullptr, // TODO this->forwardJacDetImagePointer,
+                                       this->referenceMask);
 }
 /* *************************************************************** */
+double reg_kld::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->warpedImageBw,
+                                       this->timePointWeight,
+                                       nullptr, // TODO this->backwardJacDetImagePointer,
+                                       this->floatingMask);
+}
 /* *************************************************************** */
 template <class DataType>
 void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
@@ -313,11 +269,6 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
     }
     if (MrClean) free(maskPtr);
 }
-template void reg_getKLDivergenceVoxelBasedGradient<float>
-(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double);
-template void reg_getKLDivergenceVoxelBasedGradient<double>
-(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double);
-/* *************************************************************** */
 /* *************************************************************** */
 void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
@@ -401,4 +352,3 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     }
 }
 /* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index aaf70556..ae5f4cb2 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -34,8 +34,10 @@ class reg_kld: public reg_measure {
                                    nifti_image *warpedImgBw = nullptr,
                                    nifti_image *warpedGradBw = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr) override;
-    /// @brief Returns the kld value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the kld value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the kld value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based kld gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 };
@@ -50,15 +52,15 @@ class reg_kld: public reg_measure {
  * image is used to modulate the KLD. The argument is ignored if the
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to nullptr, all voxels are considered
+ * should be considered
  * @return Returns the computed sum squared difference
  */
 extern "C++" template <class DataType>
-double reg_getKLDivergence(nifti_image *reference,
-                           nifti_image *warped,
-                           double *timePointWeight,
-                           nifti_image *jacobianDeterminantImage,
-                           int *mask);
+double reg_getKLDivergence(const nifti_image *reference,
+                           const nifti_image *warped,
+                           const double *timePointWeight,
+                           const nifti_image *jacobianDeterminantImage,
+                           const int *mask);
 /* *************************************************************** */
 
 /** @brief Compute a voxel based gradient of the sum squared difference.
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index fca452e3..2d1c3848 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -79,16 +79,18 @@ reg_lncc::~reg_lncc() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
-                                     nifti_image *warImage,
-                                     nifti_image *meanImage,
-                                     nifti_image *warpedMeanImage,
-                                     nifti_image *stdDevImage,
-                                     nifti_image *warpedSdevImage,
-                                     int *refMask,
-                                     int *combinedMask,
-                                     int currentTimepoint) {
-    // Generate the forward mask to ignore all NaN values
+void UpdateLocalStatImages(const nifti_image *refImage,
+                           const nifti_image *warImage,
+                           nifti_image *meanImage,
+                           nifti_image *warpedMeanImage,
+                           nifti_image *sdevImage,
+                           nifti_image *warpedSdevImage,
+                           const int *refMask,
+                           int *combinedMask,
+                           const float *kernelStandardDeviation,
+                           const int& kernelType,
+                           const int& currentTimepoint) {
+    // Generate the combined mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3);
@@ -100,25 +102,25 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage,
     reg_tools_removeNanFromMask(refImage, combinedMask);
     reg_tools_removeNanFromMask(warImage, combinedMask);
 
-    DataType *origRefPtr = static_cast<DataType*>(refImage->data);
+    const DataType *origRefPtr = static_cast<DataType*>(refImage->data);
     DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
-    DataType *sdevImgPtr = static_cast<DataType*>(stdDevImage->data);
+    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
     memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
     memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
 
-    reg_tools_multiplyImageToImage(stdDevImage, stdDevImage, stdDevImage);
-    reg_tools_kernelConvolution(meanImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
-    reg_tools_kernelConvolution(stdDevImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage);
+    reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask);
 
-    DataType *origWarPtr = static_cast<DataType*>(warImage->data);
+    const DataType *origWarPtr = static_cast<DataType*>(warImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
     DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
     memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
     memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
 
     reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage);
-    reg_tools_kernelConvolution(warpedMeanImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
-    reg_tools_kernelConvolution(warpedSdevImage, this->kernelStandardDeviation, this->kernelType, combinedMask);
+    reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr)
@@ -243,7 +245,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
     }
 #ifndef NDEBUG
     char text[255];
-    reg_print_msg_debug("reg_lncc::InitialiseMeasure().");
+    reg_print_msg_debug("reg_lncc::InitialiseMeasure()");
     for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
@@ -252,17 +254,17 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
 }
 /* *************************************************************** */
 template<class DataType>
-double reg_getLNCCValue(nifti_image *referenceImage,
-                        nifti_image *meanImage,
-                        nifti_image *sdevImage,
-                        nifti_image *warpedImage,
-                        nifti_image *warpedMeanImage,
-                        nifti_image *warpedSdevImage,
-                        int *combinedMask,
-                        float *kernelStandardDeviation,
+double reg_getLnccValue(const nifti_image *referenceImage,
+                        const nifti_image *meanImage,
+                        const nifti_image *sdevImage,
+                        const nifti_image *warpedImage,
+                        const nifti_image *warpedMeanImage,
+                        const nifti_image *warpedSdevImage,
+                        const int *combinedMask,
+                        const float *kernelStandardDeviation,
                         nifti_image *correlationImage,
-                        int kernelType,
-                        int currentTimepoint) {
+                        const int& kernelType,
+                        const int& currentTimepoint) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -272,16 +274,16 @@ double reg_getLNCCValue(nifti_image *referenceImage,
 #endif
 
     // Compute the local correlation
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
-    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
-    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
-    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
+    const DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
+    const DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
+    const DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
+    const DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
     DataType *correlationPtr = static_cast<DataType*>(correlationImage->data);
 
     for (size_t i = 0; i < voxelNumber; ++i)
@@ -289,156 +291,113 @@ double reg_getLNCCValue(nifti_image *referenceImage,
 
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
-    double lncc_value_sum = 0., lncc_value;
-    double activeVoxel_num = 0.;
+    double lnccSum = 0, lncc;
+    size_t activeVoxelNumber = 0;
 
     // Iteration over all voxels
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \
     sdevImgPtr,warSdevPtr,correlationPtr) \
-    private(lncc_value) \
-    reduction(+:lncc_value_sum) \
-    reduction(+:activeVoxel_num)
+    private(lncc) \
+    reduction(+:lnccSum, activeVoxelNumber)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            lncc_value = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]);
-            if (lncc_value == lncc_value && isinf(lncc_value) == 0) {
-                lncc_value_sum += fabs(lncc_value);
-                ++activeVoxel_num;
+            lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]);
+            if (lncc == lncc && !isinf(lncc)) {
+                lnccSum += fabs(lncc);
+                ++activeVoxelNumber;
             }
         }
     }
-    return lncc_value_sum / activeVoxel_num;
+    return lnccSum / activeVoxelNumber;
 }
 /* *************************************************************** */
-double reg_lncc::GetSimilarityMeasureValue() {
-    double lncc_value = 0;
-
-    for (int currentTimepoint = 0; currentTimepoint < this->referenceImage->nt; ++currentTimepoint) {
-        if (this->timePointWeight[currentTimepoint] > 0) {
-            double tp_value = 0;
-            // Compute the mean and variance of the reference and warped floating
-            switch (this->referenceImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                this->UpdateLocalStatImages<float>(this->referenceImage,
-                                                   this->warpedImage,
-                                                   this->meanImage,
-                                                   this->warpedMeanImage,
-                                                   this->sdevImage,
-                                                   this->warpedSdevImage,
-                                                   this->referenceMask,
-                                                   this->forwardMask,
-                                                   currentTimepoint);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                this->UpdateLocalStatImages<double>(this->referenceImage,
-                                                    this->warpedImage,
-                                                    this->meanImage,
-                                                    this->warpedMeanImage,
-                                                    this->sdevImage,
-                                                    this->warpedSdevImage,
-                                                    this->referenceMask,
-                                                    this->forwardMask,
-                                                    currentTimepoint);
-                break;
-            }
-
-            // Compute the LNCC - Forward
-            switch (this->referenceImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                tp_value += reg_getLNCCValue<float>(this->referenceImage,
-                                                    this->meanImage,
-                                                    this->sdevImage,
-                                                    this->warpedImage,
-                                                    this->warpedMeanImage,
-                                                    this->warpedSdevImage,
-                                                    this->forwardMask,
-                                                    this->kernelStandardDeviation,
-                                                    this->correlationImage,
-                                                    this->kernelType,
-                                                    currentTimepoint);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                tp_value += reg_getLNCCValue<double>(this->referenceImage,
-                                                     this->meanImage,
-                                                     this->sdevImage,
-                                                     this->warpedImage,
-                                                     this->warpedMeanImage,
-                                                     this->warpedSdevImage,
-                                                     this->forwardMask,
-                                                     this->kernelStandardDeviation,
-                                                     this->correlationImage,
-                                                     this->kernelType,
-                                                     currentTimepoint);
-                break;
-            }
-            if (this->isSymmetric) {
-                // Compute the mean and variance of the floating and warped reference
-                switch (this->floatingImage->datatype) {
-                case NIFTI_TYPE_FLOAT32:
-                    this->UpdateLocalStatImages<float>(this->floatingImage,
-                                                       this->warpedImageBw,
-                                                       this->meanImageBw,
-                                                       this->warpedMeanImageBw,
-                                                       this->sdevImageBw,
-                                                       this->warpedSdevImageBw,
-                                                       this->floatingMask,
-                                                       this->backwardMask,
-                                                       currentTimepoint);
-                    break;
-                case NIFTI_TYPE_FLOAT64:
-                    this->UpdateLocalStatImages<double>(this->floatingImage,
-                                                        this->warpedImageBw,
-                                                        this->meanImageBw,
-                                                        this->warpedMeanImageBw,
-                                                        this->sdevImageBw,
-                                                        this->warpedSdevImageBw,
-                                                        this->floatingMask,
-                                                        this->backwardMask,
-                                                        currentTimepoint);
-                    break;
-                }
-                // Compute the LNCC - Backward
-                switch (this->floatingImage->datatype) {
-                case NIFTI_TYPE_FLOAT32:
-                    tp_value += reg_getLNCCValue<float>(this->floatingImage,
-                                                        this->meanImageBw,
-                                                        this->sdevImageBw,
-                                                        this->warpedImageBw,
-                                                        this->warpedMeanImageBw,
-                                                        this->warpedSdevImageBw,
-                                                        this->backwardMask,
-                                                        this->kernelStandardDeviation,
-                                                        this->correlationImageBw,
-                                                        this->kernelType,
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 nifti_image *meanImage,
+                                 nifti_image *sdevImage,
+                                 const nifti_image *warpedImage,
+                                 nifti_image *warpedMeanImage,
+                                 nifti_image *warpedSdevImage,
+                                 const int *referenceMask,
+                                 int *combinedMask,
+                                 const float *kernelStandardDeviation,
+                                 nifti_image *correlationImage,
+                                 const int& kernelType,
+                                 const int& referenceTimePoint,
+                                 const double *timePointWeight) {
+    double lncc = 0;
+    for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) {
+        if (timePointWeight[currentTimepoint] > 0) {
+            const double tp = std::visit([&](auto&& refImgDataType) {
+                using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+                // Compute the mean and variance of the reference and warped floating
+                UpdateLocalStatImages<RefImgDataType>(referenceImage,
+                                                      warpedImage,
+                                                      meanImage,
+                                                      warpedMeanImage,
+                                                      sdevImage,
+                                                      warpedSdevImage,
+                                                      referenceMask,
+                                                      combinedMask,
+                                                      kernelStandardDeviation,
+                                                      kernelType,
+                                                      currentTimepoint);
+                // Compute the LNCC value
+                return reg_getLnccValue<RefImgDataType>(referenceImage,
+                                                        meanImage,
+                                                        sdevImage,
+                                                        warpedImage,
+                                                        warpedMeanImage,
+                                                        warpedSdevImage,
+                                                        combinedMask,
+                                                        kernelStandardDeviation,
+                                                        correlationImage,
+                                                        kernelType,
                                                         currentTimepoint);
-                    break;
-                case NIFTI_TYPE_FLOAT64:
-                    tp_value += reg_getLNCCValue<double>(this->floatingImage,
-                                                         this->meanImageBw,
-                                                         this->sdevImageBw,
-                                                         this->warpedImageBw,
-                                                         this->warpedMeanImageBw,
-                                                         this->warpedSdevImageBw,
-                                                         this->backwardMask,
-                                                         this->kernelStandardDeviation,
-                                                         this->correlationImageBw,
-                                                         this->kernelType,
-                                                         currentTimepoint);
-                    break;
-                }
-            }
-            lncc_value += tp_value * this->timePointWeight[currentTimepoint];
+            }, NiftiImage::getFloatingDataType(referenceImage));
+            lncc += tp * timePointWeight[currentTimepoint];
         }
     }
-    return lncc_value;
+    return lncc;
+}
+/* *************************************************************** */
+double reg_lncc::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->meanImage,
+                                       this->sdevImage,
+                                       this->warpedImage,
+                                       this->warpedMeanImage,
+                                       this->warpedSdevImage,
+                                       this->referenceMask,
+                                       this->forwardMask,
+                                       this->kernelStandardDeviation,
+                                       this->correlationImage,
+                                       this->kernelType,
+                                       this->referenceTimePoint,
+                                       this->timePointWeight);
+}
+/* *************************************************************** */
+double reg_lncc::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->meanImageBw,
+                                       this->sdevImageBw,
+                                       this->warpedImageBw,
+                                       this->warpedMeanImageBw,
+                                       this->warpedSdevImageBw,
+                                       this->floatingMask,
+                                       this->backwardMask,
+                                       this->kernelStandardDeviation,
+                                       this->correlationImageBw,
+                                       this->kernelType,
+                                       this->referenceTimePoint,
+                                       this->timePointWeight);
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
+void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
                                    nifti_image *meanImage,
                                    nifti_image *sdevImage,
                                    nifti_image *warpedImage,
@@ -480,7 +439,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
 
     double refMeanValue, warMeanValue, refSdevValue, warSdevValue, correlaValue;
     double temp1, temp2, temp3;
-    double activeVoxel_num = 0;
+    size_t activeVoxelNumber = 0;
 
     // Iteration over all voxels
 #ifdef _OPENMP
@@ -489,12 +448,11 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
     sdevImgPtr,warSdevPtr,correlationPtr) \
     private(refMeanValue,warMeanValue,refSdevValue, \
     warSdevValue, correlaValue, temp1, temp2, temp3) \
-    reduction(+:activeVoxel_num)
+    reduction(+:activeVoxelNumber)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-
             refMeanValue = meanImgPtr[voxel];
             warMeanValue = warMeanPtr[voxel];
             refSdevValue = sdevImgPtr[voxel];
@@ -502,8 +460,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
             correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
 
             temp1 = 1.0 / (refSdevValue * warSdevValue);
-            temp2 = correlaValue /
-                (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
+            temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
             temp3 = (correlaValue * warMeanValue) /
                 (refSdevValue * warSdevValue * warSdevValue * warSdevValue)
                 -
@@ -520,13 +477,13 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
                 warMeanPtr[voxel] = static_cast<DataType>(temp1);
                 warSdevPtr[voxel] = static_cast<DataType>(temp2);
                 correlationPtr[voxel] = static_cast<DataType>(temp3);
-                activeVoxel_num++;
+                activeVoxelNumber++;
             } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0;
         } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0;
     }
 
     //adjust weight for number of voxels
-    double adjusted_weight = timepointWeight / activeVoxel_num;
+    double adjusted_weight = timepointWeight / activeVoxelNumber;
 
     // Smooth the newly computed values
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
@@ -593,33 +550,37 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Compute the mean and variance of the reference and warped floating
     switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        this->UpdateLocalStatImages<float>(this->referenceImage,
-                                           this->warpedImage,
-                                           this->meanImage,
-                                           this->warpedMeanImage,
-                                           this->sdevImage,
-                                           this->warpedSdevImage,
-                                           this->referenceMask,
-                                           this->forwardMask,
-                                           currentTimepoint);
+        UpdateLocalStatImages<float>(this->referenceImage,
+                                     this->warpedImage,
+                                     this->meanImage,
+                                     this->warpedMeanImage,
+                                     this->sdevImage,
+                                     this->warpedSdevImage,
+                                     this->referenceMask,
+                                     this->forwardMask,
+                                     this->kernelStandardDeviation,
+                                     this->kernelType,
+                                     currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        this->UpdateLocalStatImages<double>(this->referenceImage,
-                                            this->warpedImage,
-                                            this->meanImage,
-                                            this->warpedMeanImage,
-                                            this->sdevImage,
-                                            this->warpedSdevImage,
-                                            this->referenceMask,
-                                            this->forwardMask,
-                                            currentTimepoint);
+        UpdateLocalStatImages<double>(this->referenceImage,
+                                      this->warpedImage,
+                                      this->meanImage,
+                                      this->warpedMeanImage,
+                                      this->sdevImage,
+                                      this->warpedSdevImage,
+                                      this->referenceMask,
+                                      this->forwardMask,
+                                      this->kernelStandardDeviation,
+                                      this->kernelType,
+                                      currentTimepoint);
         break;
     }
 
     // Compute the LNCC gradient - Forward
     switch (this->referenceImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedLNCCGradient<float>(this->referenceImage,
+        reg_getVoxelBasedLnccGradient<float>(this->referenceImage,
                                              this->meanImage,
                                              this->sdevImage,
                                              this->warpedImage,
@@ -635,7 +596,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                              this->timePointWeight[currentTimepoint]);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedLNCCGradient<double>(this->referenceImage,
+        reg_getVoxelBasedLnccGradient<double>(this->referenceImage,
                                               this->meanImage,
                                               this->sdevImage,
                                               this->warpedImage,
@@ -655,32 +616,36 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
         // Compute the mean and variance of the floating and warped reference
         switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            this->UpdateLocalStatImages<float>(this->floatingImage,
-                                               this->warpedImageBw,
-                                               this->meanImageBw,
-                                               this->warpedMeanImageBw,
-                                               this->sdevImageBw,
-                                               this->warpedSdevImageBw,
-                                               this->floatingMask,
-                                               this->backwardMask,
-                                               currentTimepoint);
+            UpdateLocalStatImages<float>(this->floatingImage,
+                                         this->warpedImageBw,
+                                         this->meanImageBw,
+                                         this->warpedMeanImageBw,
+                                         this->sdevImageBw,
+                                         this->warpedSdevImageBw,
+                                         this->floatingMask,
+                                         this->backwardMask,
+                                         this->kernelStandardDeviation,
+                                         this->kernelType,
+                                         currentTimepoint);
             break;
         case NIFTI_TYPE_FLOAT64:
-            this->UpdateLocalStatImages<double>(this->floatingImage,
-                                                this->warpedImageBw,
-                                                this->meanImageBw,
-                                                this->warpedMeanImageBw,
-                                                this->sdevImageBw,
-                                                this->warpedSdevImageBw,
-                                                this->floatingMask,
-                                                this->backwardMask,
-                                                currentTimepoint);
+            UpdateLocalStatImages<double>(this->floatingImage,
+                                          this->warpedImageBw,
+                                          this->meanImageBw,
+                                          this->warpedMeanImageBw,
+                                          this->sdevImageBw,
+                                          this->warpedSdevImageBw,
+                                          this->floatingMask,
+                                          this->backwardMask,
+                                          this->kernelStandardDeviation,
+                                          this->kernelType,
+                                          currentTimepoint);
             break;
         }
         // Compute the LNCC gradient - Backward
         switch (this->floatingImage->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedLNCCGradient<float>(this->floatingImage,
+            reg_getVoxelBasedLnccGradient<float>(this->floatingImage,
                                                  this->meanImageBw,
                                                  this->sdevImageBw,
                                                  this->warpedImageBw,
@@ -696,7 +661,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                                  this->timePointWeight[currentTimepoint]);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedLNCCGradient<double>(this->floatingImage,
+            reg_getVoxelBasedLnccGradient<double>(this->floatingImage,
                                                   this->meanImageBw,
                                                   this->sdevImageBw,
                                                   this->warpedImageBw,
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index 5a7b5ef0..6c7dda5a 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -34,15 +34,17 @@ class reg_lncc: public reg_measure {
                                    nifti_image *warpedImgBw = nullptr,
                                    nifti_image *warpedGradBw = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr) override;
-    /// @brief Returns the lncc value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the lncc value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the lncc value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based lncc gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
-    /// @brief Stuff
+    /// @brief Set the kernel standard deviation
     virtual void SetKernelStandardDeviation(int t, float stddev) {
         this->kernelStandardDeviation[t] = stddev;
     }
-    /// @brief Stuff
+    /// @brief Set the kernel type
     virtual void SetKernelType(int t) {
         this->kernelType = t;
     }
@@ -64,65 +66,5 @@ class reg_lncc: public reg_measure {
     int *backwardMask;
 
     int kernelType;
-
-    template <class DataType>
-    void UpdateLocalStatImages(nifti_image *refImage,
-                               nifti_image *warImage,
-                               nifti_image *meanImage,
-                               nifti_image *warpedMeanImage,
-                               nifti_image *stdDevImage,
-                               nifti_image *warpedSdevImage,
-                               int *refMask,
-                               int *mask,
-                               int currentTimepoint);
 };
 /* *************************************************************** */
-/** @brief Compute and return the LNCC between two input image
- * @param referenceImage First input image to use to compute the metric
- * @param warpedImage Second input image to use to compute the metric
- * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel
- * to use.
- * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to nullptr, all voxels are considered
- * @return Returns the computed LNCC
- */
-extern "C++" template<class DataType>
-double reg_getLNCCValue(nifti_image *referenceImage,
-                        nifti_image *meanImage,
-                        nifti_image *sdevImage,
-                        nifti_image *warpedImage,
-                        nifti_image *warpedMeanImage,
-                        nifti_image *warpedSdevImage,
-                        int *combinedMask,
-                        float *kernelStandardDeviation,
-                        nifti_image *correlationImage,
-                        int kernelType,
-                        int currentTimepoint);
-/* *************************************************************** */
-/** @brief Compute a voxel based gradient of the LNCC.
- *  @param referenceImage First input image to use to compute the metric
- *  @param warpedImage Second input image to use to compute the metric
- *  @param warpedImageGradient Spatial gradient of the input warped image
- *  @param lnccGradientImage Output image that will be updated with the
- *  value of the LNCC gradient
- *  @param gaussianStandardDeviation Standard deviation of the Gaussian kernel
- *  to use.
- *  @param mask Array that contains a mask to specify which voxel
- *  should be considered. If set to nullptr, all voxels are considered
- */
-extern "C++" template <class DataType>
-void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage,
-                                   nifti_image *meanImage,
-                                   nifti_image *sdevImage,
-                                   nifti_image *warpedImage,
-                                   nifti_image *warpedMeanImage,
-                                   nifti_image *warpedStdDevImage,
-                                   int *combinedMask,
-                                   float *kernelStdDev,
-                                   nifti_image *correlationImage,
-                                   nifti_image *warpedGradient,
-                                   nifti_image *lnccGradientImage,
-                                   int kernelType,
-                                   int currentTimepoint,
-                                   double timepointWeight);
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index ee2a2625..56c42d50 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -16,7 +16,7 @@ class reg_measure {
     /// @brief Measure class constructor
     reg_measure() {
 #ifndef NDEBUG
-        printf("[NiftyReg DEBUG] reg_measure constructor called\n");
+        reg_print_msg_debug("reg_measure constructor called");
 #endif
     }
     /// @brief Measure class destructor
@@ -56,12 +56,47 @@ class reg_measure {
             this->voxelBasedGradientBw = nullptr;
         }
 #ifndef NDEBUG
-        printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n");
+        reg_print_msg_debug("reg_measure::InitialiseMeasure()");
 #endif
     }
 
+    /// @brief Returns the forward registration measure of similarity value
+    virtual double GetSimilarityMeasureValueFw() = 0;
+    /// @brief Returns the backward registration measure of similarity value
+    virtual double GetSimilarityMeasureValueBw() = 0;
     /// @brief Returns the registration measure of similarity value
-    virtual double GetSimilarityMeasureValue() = 0;
+    double GetSimilarityMeasureValue() {  // Do not override
+        // Check that all the specified image are of the same datatype
+        if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) {
+            reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
+            reg_print_msg_error("Input images are expected to be of floating precision type");
+            reg_exit();
+        }
+        if (this->warpedImage->datatype != this->referenceImage->datatype) {
+            reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
+            reg_print_msg_error("Both input images are expected to have the same type");
+            reg_exit();
+        }
+        double sim = GetSimilarityMeasureValueFw();
+        if (this->isSymmetric) {
+            // Check that all the specified image are of the same datatype
+            if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
+                reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
+                reg_print_msg_error("Input images are expected to be of floating precision type");
+                reg_exit();
+            }
+            if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
+                reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
+                reg_print_msg_error("Both input images are expected to have the same type");
+                reg_exit();
+            }
+            sim += GetSimilarityMeasureValueBw();
+        }
+#ifndef NDEBUG
+        reg_print_msg_debug("reg_measure::GetSimilarityMeasureValue called");
+#endif
+        return sim;
+    }
 
     /// @brief Compute the voxel based measure of similarity gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 29aa32c9..7b289c27 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -14,14 +14,14 @@
 
 /* *************************************************************** */
 template <class DataType>
-void ShiftImage(nifti_image* inputImgPtr,
-                nifti_image* shiftedImgPtr,
-                int *maskPtr,
-                int tx,
-                int ty,
-                int tz) {
-    DataType* inputData = static_cast<DataType*>(inputImgPtr->data);
-    DataType* shiftImageData = static_cast<DataType*>(shiftedImgPtr->data);
+void ShiftImage(const nifti_image *inputImage,
+                nifti_image *shiftedImage,
+                const int *mask,
+                const int& tx,
+                const int& ty,
+                const int& tz) {
+    const DataType* inputData = static_cast<DataType*>(inputImage->data);
+    DataType* shiftImageData = static_cast<DataType*>(shiftedImage->data);
 
     int currentIndex;
     int shiftedIndex;
@@ -30,23 +30,21 @@ void ShiftImage(nifti_image* inputImgPtr,
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \
-    maskPtr, tx, ty, tz) \
-    private(x, y, old_x, old_y, old_z, shiftedIndex, \
-    currentIndex)
+    shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz) \
+    private(x, y, old_x, old_y, old_z, shiftedIndex, currentIndex)
 #endif
-    for (z = 0; z < shiftedImgPtr->nz; z++) {
-        currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny;
+    for (z = 0; z < shiftedImage->nz; z++) {
+        currentIndex = z * shiftedImage->nx * shiftedImage->ny;
         old_z = z - tz;
-        for (y = 0; y < shiftedImgPtr->ny; y++) {
+        for (y = 0; y < shiftedImage->ny; y++) {
             old_y = y - ty;
-            for (x = 0; x < shiftedImgPtr->nx; x++) {
+            for (x = 0; x < shiftedImage->nx; x++) {
                 old_x = x - tx;
-                if (old_x > -1 && old_x<inputImgPtr->nx &&
-                    old_y>-1 && old_y<inputImgPtr->ny &&
-                    old_z>-1 && old_z < inputImgPtr->nz) {
-                    shiftedIndex = (old_z * inputImgPtr->ny + old_y) * inputImgPtr->nx + old_x;
-                    if (maskPtr[shiftedIndex] > -1) {
+                if (old_x > -1 && old_x < inputImage->nx &&
+                    old_y > -1 && old_y < inputImage->ny &&
+                    old_z > -1 && old_z < inputImage->nz) {
+                    shiftedIndex = (old_z * inputImage->ny + old_y) * inputImage->nx + old_x;
+                    if (mask[shiftedIndex] > -1) {
                         shiftImageData[currentIndex] = inputData[shiftedIndex];
                     } // mask is not defined
                     else {
@@ -65,11 +63,11 @@ void ShiftImage(nifti_image* inputImgPtr,
 }
 /* *************************************************************** */
 template <class DataType>
-void GetMINDImageDescriptor_core(nifti_image* inputImage,
-                                 nifti_image* MINDImage,
-                                 int *maskPtr,
-                                 int descriptorOffset,
-                                 int currentTimepoint) {
+void GetMindImageDescriptorCore(const nifti_image *inputImage,
+                                nifti_image *mindImage,
+                                const int *mask,
+                                const int& descriptorOffset,
+                                const int& currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -79,7 +77,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
 #endif
 
     // Create a pointer to the descriptor image
-    DataType* MINDImgDataPtr = static_cast<DataType*>(MINDImage->data);
+    DataType* mindImgDataPtr = static_cast<DataType*>(mindImage->data);
 
     // Allocate an image to store the current timepoint reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
@@ -87,7 +85,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = static_cast<void*>(&inputImagePtr[currentTimepoint * voxelNumber]);
+    currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber];
 
     // Allocate an image to store the mean image
     nifti_image *meanImage = nifti_dup(*currentInputImage, false);
@@ -97,96 +95,95 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage,
     nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
 
     // Allocation of the difference image
-    nifti_image *diff_image = nifti_dup(*currentInputImage, false);
+    nifti_image *diffImage = nifti_dup(*currentInputImage, false);
 
     // Define the sigma for the convolution
     float sigma = -0.5;// negative value denotes voxel width
 
     //2D version
     int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4;
-    int RSampling3D_x[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 };
-    int RSampling3D_y[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 };
-    int RSampling3D_z[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset };
+    int rSamplingX[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 };
+    int rSamplingY[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 };
+    int rSamplingZ[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset };
 
     for (int i = 0; i < samplingNbr; i++) {
-        ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
-                             RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
-        reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
-        reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
-        reg_tools_addImageToImage(meanImage, diff_image, meanImage);
+        ShiftImage<DataType>(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]);
+        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage);
+        reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage);
+        reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
+        reg_tools_addImageToImage(meanImage, diffImage, meanImage);
 
         // Store the current descriptor
-        const size_t index = i * diff_image->nvox;
-        memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox);
+        const size_t index = i * diffImage->nvox;
+        memcpy(&mindImgDataPtr[index], diffImage->data, diffImage->nbyper * diffImage->nvox);
     }
     // Compute the mean over the number of sample
     reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr);
 
     // Compute the MIND descriptor
     int mindIndex;
-    DataType meanValue, max_desc, descValue;
+    DataType meanValue, maxDesc, descValue;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \
-    MINDImgDataPtr) \
-    private(meanValue, max_desc, descValue, mindIndex)
+    shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, \
+    mindImgDataPtr) \
+    private(meanValue, maxDesc, descValue, mindIndex)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
-        if (maskPtr[voxelIndex] > -1) {
+        if (mask[voxelIndex] > -1) {
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
             if (meanValue == 0) {
                 meanValue = std::numeric_limits<DataType>::epsilon();
             }
-            max_desc = 0;
+            maxDesc = 0;
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                descValue = (DataType)exp(-MINDImgDataPtr[mindIndex] / meanValue);
-                MINDImgDataPtr[mindIndex] = descValue;
-                max_desc = (std::max)(max_desc, descValue);
+                descValue = (DataType)exp(-mindImgDataPtr[mindIndex] / meanValue);
+                mindImgDataPtr[mindIndex] = descValue;
+                maxDesc = std::max(maxDesc, descValue);
                 mindIndex += voxelNumber;
             }
 
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                descValue = MINDImgDataPtr[mindIndex];
-                MINDImgDataPtr[mindIndex] = descValue / max_desc;
+                descValue = mindImgDataPtr[mindIndex];
+                mindImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
         } // mask
     } // voxIndex
     // Mr Propre
-    nifti_image_free(diff_image);
+    nifti_image_free(diffImage);
     nifti_image_free(shiftedImage);
     nifti_image_free(meanImage);
     currentInputImage->data = nullptr;
     nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
-void GetMINDImageDescriptor(nifti_image* inputImgPtr,
-                            nifti_image* MINDImgPtr,
-                            int *maskPtr,
-                            int descriptorOffset,
-                            int currentTimepoint) {
+void GetMindImageDescriptor(const nifti_image *inputImage,
+                            nifti_image *mindImage,
+                            const int *mask,
+                            const int& descriptorOffset,
+                            const int& currentTimepoint) {
 #ifndef NDEBUG
-    reg_print_fct_debug("GetMINDImageDescriptor()");
+    reg_print_fct_debug("GetMindImageDescriptor()");
 #endif
-    if (inputImgPtr->datatype != MINDImgPtr->datatype) {
-        reg_print_fct_error("reg_mind -- GetMINDImageDescriptor");
+    if (inputImage->datatype != mindImage->datatype) {
+        reg_print_fct_error("reg_mind::GetMindImageDescriptor");
         reg_print_msg_error("The input image and the MIND image must have the same datatype !");
         reg_exit();
     }
 
-    switch (inputImgPtr->datatype) {
+    switch (inputImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        GetMINDImageDescriptor_core<float>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint);
+        GetMindImageDescriptorCore<float>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        GetMINDImageDescriptor_core<double>(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint);
+        GetMindImageDescriptorCore<double>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
         break;
     default:
-        reg_print_fct_error("GetMINDImageDescriptor");
+        reg_print_fct_error("GetMindImageDescriptor");
         reg_print_msg_error("Input image datatype not supported");
         reg_exit();
         break;
@@ -194,11 +191,11 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr,
 }
 /* *************************************************************** */
 template <class DataType>
-void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
-                                    nifti_image* MINDSSCImage,
-                                    int *maskPtr,
-                                    int descriptorOffset,
-                                    int currentTimepoint) {
+void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
+                                   nifti_image *mindSscImage,
+                                   const int *mask,
+                                   const int& descriptorOffset,
+                                   const int& currentTimepoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -208,7 +205,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
 #endif
 
     // Create a pointer to the descriptor image
-    DataType* MINDSSCImgDataPtr = static_cast<DataType*>(MINDSSCImage->data);
+    DataType* mindSscImgDataPtr = static_cast<DataType*>(mindSscImage->data);
 
     // Allocate an image to store the current timepoint reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
@@ -216,18 +213,17 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = static_cast<void*>(&inputImagePtr[currentTimepoint * voxelNumber]);
+    currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber];
 
     // Allocate an image to store the mean image
-    nifti_image *mean_img = nifti_dup(*currentInputImage, false);
-    DataType* meanImgDataPtr = static_cast<DataType*>(mean_img->data);
+    nifti_image *meanImg = nifti_dup(*currentInputImage, false);
+    DataType* meanImgDataPtr = static_cast<DataType*>(meanImg->data);
 
     // Allocate an image to store the warped image
     nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
 
     // Define the sigma for the convolution
-    float sigma = -0.5;// negative value denotes voxel width
-    //float sigma = -1.0;// negative value denotes voxel width
+    float sigma = -0.5; // negative value denotes voxel width
 
     //2D version
     int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2;
@@ -236,14 +232,14 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     // Allocation of the difference image
     //std::vector<nifti_image *> vectNiftiImage;
     //for(int i=0;i<samplingNbr;i++) {
-    nifti_image *diff_image = nifti_dup(*currentInputImage, false);
-    int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int));
+    nifti_image *diffImage = nifti_dup(*currentInputImage, false);
+    int *maskDiffImage = (int*)calloc(diffImage->nvox, sizeof(int));
 
-    nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false);
+    nifti_image *diffImageShifted = nifti_dup(*currentInputImage, false);
 
-    int RSampling3D_x[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 };
-    int RSampling3D_y[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset };
-    int RSampling3D_z[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset };
+    int rSamplingX[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 };
+    int rSamplingY[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset };
+    int rSamplingZ[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset };
 
     int tx[12] = { -descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0 };
     int ty[12] = { +0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset };
@@ -251,94 +247,91 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage,
     int compteurId = 0;
 
     for (int i = 0; i < samplingNbr; i++) {
-        ShiftImage<DataType>(currentInputImage, shiftedImage, maskPtr,
-                             RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]);
-        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image);
-        reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image);
-        reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr);
+        ShiftImage<DataType>(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]);
+        reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage);
+        reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage);
+        reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
 
         for (int j = 0; j < 2; j++) {
-
-            ShiftImage<DataType>(diff_image, diff_imageShifted, mask_diff_image,
+            ShiftImage<DataType>(diffImage, diffImageShifted, maskDiffImage,
                                  tx[compteurId], ty[compteurId], tz[compteurId]);
 
-            reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img);
+            reg_tools_addImageToImage(meanImg, diffImageShifted, meanImg);
             // Store the current descriptor
-            const size_t index = compteurId * diff_imageShifted->nvox;
-            memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data,
-                   diff_imageShifted->nbyper * diff_imageShifted->nvox);
+            const size_t index = compteurId * diffImageShifted->nvox;
+            memcpy(&mindSscImgDataPtr[index], diffImageShifted->data,
+                   diffImageShifted->nbyper * diffImageShifted->nvox);
             compteurId++;
         }
     }
     // Compute the mean over the number of sample
-    reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor);
+    reg_tools_divideValueToImage(meanImg, meanImg, lengthDescriptor);
 
-    // Compute the MINDSSC descriptor
+    // Compute the MIND-SSC descriptor
     int mindIndex;
-    DataType meanValue, max_desc, descValue;
+    DataType meanValue, maxDesc, descValue;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \
-    MINDSSCImgDataPtr) \
-    private(meanValue, max_desc, descValue, mindIndex)
+    shared(voxelNumber, lengthDescriptor, samplingNbr, mask, meanImgDataPtr, mindSscImgDataPtr) \
+    private(meanValue, maxDesc, descValue, mindIndex)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
-        if (maskPtr[voxelIndex] > -1) {
+        if (mask[voxelIndex] > -1) {
             // Get the mean value for the current voxel
             meanValue = meanImgDataPtr[voxelIndex];
             if (meanValue == 0) {
                 meanValue = std::numeric_limits<DataType>::epsilon();
             }
-            max_desc = 0;
+            maxDesc = 0;
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                descValue = (DataType)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue);
-                MINDSSCImgDataPtr[mindIndex] = descValue;
-                max_desc = std::max(max_desc, descValue);
+                descValue = (DataType)exp(-mindSscImgDataPtr[mindIndex] / meanValue);
+                mindSscImgDataPtr[mindIndex] = descValue;
+                maxDesc = std::max(maxDesc, descValue);
                 mindIndex += voxelNumber;
             }
 
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                descValue = MINDSSCImgDataPtr[mindIndex];
-                MINDSSCImgDataPtr[mindIndex] = descValue / max_desc;
+                descValue = mindSscImgDataPtr[mindIndex];
+                mindSscImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
         } // mask
     } // voxIndex
     // Mr Propre
-    nifti_image_free(diff_imageShifted);
-    free(mask_diff_image);
-    nifti_image_free(diff_image);
+    nifti_image_free(diffImageShifted);
+    free(maskDiffImage);
+    nifti_image_free(diffImage);
     nifti_image_free(shiftedImage);
-    nifti_image_free(mean_img);
+    nifti_image_free(meanImg);
     currentInputImage->data = nullptr;
     nifti_image_free(currentInputImage);
 }
 /* *************************************************************** */
-void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr,
-                               nifti_image* MINDSSCImgPtr,
-                               int *maskPtr,
-                               int descriptorOffset,
-                               int currentTimepoint) {
+void GetMindSscImageDescriptor(const nifti_image *inputImage,
+                               nifti_image *mindSscImage,
+                               const int *mask,
+                               const int& descriptorOffset,
+                               const int& currentTimepoint) {
 #ifndef NDEBUG
-    reg_print_fct_debug("GetMINDSSCImageDescriptor()");
+    reg_print_fct_debug("GetMindSscImageDescriptor()");
 #endif
-    if (inputImgPtr->datatype != MINDSSCImgPtr->datatype) {
-        reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDescriptor");
-        reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !");
+    if (inputImage->datatype != mindSscImage->datatype) {
+        reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor");
+        reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!");
         reg_exit();
     }
 
-    switch (inputImgPtr->datatype) {
+    switch (inputImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        GetMINDSSCImageDescriptor_core<float>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint);
+        GetMindSscImageDescriptorCore<float>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
         break;
     case NIFTI_TYPE_FLOAT64:
-        GetMINDSSCImageDescriptor_core<double>(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint);
+        GetMindSscImageDescriptorCore<double>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
         break;
     default:
-        reg_print_fct_error("GetMINDSSCImageDescriptor");
+        reg_print_fct_error("GetMindSscImageDescriptor");
         reg_print_msg_error("Input image datatype not supported");
         reg_exit();
         break;
@@ -350,7 +343,7 @@ reg_mind::reg_mind(): reg_ssd() {
     this->floatingImageDescriptor = nullptr;
     this->warpedFloatingImageDescriptor = nullptr;
     this->warpedReferenceImageDescriptor = nullptr;
-    this->mind_type = MIND_TYPE;
+    this->mindType = MIND_TYPE;
     this->descriptorOffset = 1;
 #ifndef NDEBUG
     reg_print_msg_debug("reg_mind constructor called");
@@ -408,23 +401,22 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
                                warpedGradBw,
                                voxelBasedGradBw);
 
-    this->descriptor_number = 0;
-    if (this->mind_type == MIND_TYPE) {
-        descriptor_number = this->referenceImage->nz > 1 ? 6 : 4;
-    } else if (this->mind_type == MINDSSC_TYPE) {
-        descriptor_number = this->referenceImage->nz > 1 ? 12 : 4;
-
+    this->descriptorNumber = 0;
+    if (this->mindType == MIND_TYPE) {
+        this->descriptorNumber = this->referenceImage->nz > 1 ? 6 : 4;
+    } else if (this->mindType == MINDSSC_TYPE) {
+        this->descriptorNumber = this->referenceImage->nz > 1 ? 12 : 4;
     }
     // Initialise the reference descriptor
     this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4;
-    this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number;
+    this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptorNumber;
     this->referenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->referenceImageDescriptor, this->referenceImageDescriptor->ndim);
     this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper);
     // Initialise the warped floating descriptor
     this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage);
     this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4;
-    this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number;
+    this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptorNumber;
     this->warpedFloatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedFloatingImageDescriptor,
                                                                             this->warpedFloatingImageDescriptor->ndim);
     this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox *
@@ -438,7 +430,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
         // Initialise the floating descriptor
         this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
-        this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number;
+        this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptorNumber;
         this->floatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->floatingImageDescriptor,
                                                                           this->floatingImageDescriptor->ndim);
         this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox *
@@ -446,7 +438,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
         // Initialise the warped floating descriptor
         this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4;
-        this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number;
+        this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptorNumber;
         this->warpedReferenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedReferenceImageDescriptor,
                                                                                  this->warpedReferenceImageDescriptor->ndim);
         this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox *
@@ -459,7 +451,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
 
 #ifndef NDEBUG
     char text[255];
-    reg_print_msg_debug("reg_mind::InitialiseMeasure().");
+    reg_print_msg_debug("reg_mind::InitialiseMeasure()");
     sprintf(text, "Active time point:");
     for (int i = 0; i < this->referenceImageDescriptor->nt; ++i)
         if (this->timePointWeightDescriptor[i] > 0)
@@ -468,127 +460,82 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
-double reg_mind::GetSimilarityMeasureValue() {
-    double MINDValue = 0.;
-    for (int t = 0; t < this->referenceImage->nt; ++t) {
-        if (this->timePointWeight[t] > 0) {
-            size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
-            int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-            memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
-            reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
-            reg_tools_removeNanFromMask(this->warpedImage, combinedMask);
-
-            if (this->mind_type == MIND_TYPE) {
-                GetMINDImageDescriptor(this->referenceImage,
-                                       this->referenceImageDescriptor,
-                                       combinedMask,
-                                       this->descriptorOffset,
-                                       t);
-                GetMINDImageDescriptor(this->warpedImage,
-                                       this->warpedFloatingImageDescriptor,
-                                       combinedMask,
-                                       this->descriptorOffset,
-                                       t);
-            } else if (this->mind_type == MINDSSC_TYPE) {
-                GetMINDSSCImageDescriptor(this->referenceImage,
-                                          this->referenceImageDescriptor,
-                                          combinedMask,
-                                          this->descriptorOffset,
-                                          t);
-                GetMINDSSCImageDescriptor(this->warpedImage,
-                                          this->warpedFloatingImageDescriptor,
-                                          combinedMask,
-                                          this->descriptorOffset,
-                                          t);
-            }
-
-            switch (this->referenceImageDescriptor->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                MINDValue += reg_getSSDValue<float>(this->referenceImageDescriptor,
-                                                    this->warpedFloatingImageDescriptor,
-                                                    this->timePointWeightDescriptor,
-                                                    nullptr, // TODO this->forwardJacDetImagePointer,
-                                                    combinedMask,
-                                                    this->currentValue,
-                                                    nullptr);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                MINDValue += reg_getSSDValue<double>(this->referenceImageDescriptor,
-                                                     this->warpedFloatingImageDescriptor,
-                                                     this->timePointWeightDescriptor,
-                                                     nullptr, // TODO this->forwardJacDetImagePointer,
-                                                     combinedMask,
-                                                     this->currentValue,
-                                                     nullptr);
-                break;
-            default:
-                reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
-                reg_print_msg_error("Warped pixel type unsupported");
-                reg_exit();
-            }
-            free(combinedMask);
-
-            // Backward computation
-            if (this->isSymmetric) {
-                voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
-                combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-                memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
-                reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
-                reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask);
-
-                if (this->mind_type == MIND_TYPE) {
-                    GetMINDImageDescriptor(this->floatingImage,
-                                           this->floatingImageDescriptor,
-                                           combinedMask,
-                                           this->descriptorOffset,
-                                           t);
-                    GetMINDImageDescriptor(this->warpedImageBw,
-                                           this->warpedReferenceImageDescriptor,
-                                           combinedMask,
-                                           this->descriptorOffset,
-                                           t);
-                } else if (this->mind_type == MINDSSC_TYPE) {
-                    GetMINDSSCImageDescriptor(this->floatingImage,
-                                              this->floatingImageDescriptor,
-                                              combinedMask,
-                                              this->descriptorOffset,
-                                              t);
-                    GetMINDSSCImageDescriptor(this->warpedImageBw,
-                                              this->warpedReferenceImageDescriptor,
-                                              combinedMask,
-                                              this->descriptorOffset,
-                                              t);
-                }
+double GetSimilarityMeasureValue(nifti_image *referenceImage,
+                                 nifti_image *referenceImageDescriptor,
+                                 const int *referenceMask,
+                                 nifti_image *warpedImage,
+                                 nifti_image *warpedFloatingImageDescriptor,
+                                 const double *timePointWeight,
+                                 double *timePointWeightDescriptor,
+                                 nifti_image *jacobianDetImage,
+                                 float *currentValue,
+                                 int descriptorOffset,
+                                 const int& referenceTimePoint,
+                                 const int& mindType) {
+    if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 &&
+        referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) {
+        reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
+        reg_print_msg_error("The reference image descriptor is expected to be of floating precision type");
+        reg_exit();
+    }
 
-                switch (this->floatingImageDescriptor->datatype) {
-                case NIFTI_TYPE_FLOAT32:
-                    MINDValue += reg_getSSDValue<float>(this->floatingImageDescriptor,
-                                                        this->warpedReferenceImageDescriptor,
-                                                        this->timePointWeightDescriptor,
-                                                        nullptr, // TODO this->backwardJacDetImagePointer,
-                                                        combinedMask,
-                                                        this->currentValue,
+    double mind = 0;
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    unique_ptr<int[]> combinedMask(new int[voxelNumber]);
+    auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
+
+    for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) {
+        if (timePointWeight[currentTimepoint] > 0) {
+            memcpy(combinedMask.get(), referenceMask, voxelNumber * sizeof(int));
+            reg_tools_removeNanFromMask(referenceImage, combinedMask.get());
+            reg_tools_removeNanFromMask(warpedImage, combinedMask.get());
+
+            GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint);
+            GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint);
+
+            std::visit([&](auto&& refImgDataType) {
+                using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+                mind += reg_getSsdValue<RefImgDataType>(referenceImageDescriptor,
+                                                        warpedFloatingImageDescriptor,
+                                                        timePointWeightDescriptor,
+                                                        jacobianDetImage,
+                                                        combinedMask.get(),
+                                                        currentValue,
                                                         nullptr);
-                    break;
-                case NIFTI_TYPE_FLOAT64:
-                    MINDValue += reg_getSSDValue<double>(this->floatingImageDescriptor,
-                                                         this->warpedReferenceImageDescriptor,
-                                                         this->timePointWeightDescriptor,
-                                                         nullptr, // TODO this->backwardJacDetImagePointer,
-                                                         combinedMask,
-                                                         this->currentValue,
-                                                         nullptr);
-                    break;
-                default:
-                    reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
-                    reg_print_msg_error("Warped pixel type unsupported");
-                    reg_exit();
-                }
-                free(combinedMask);
-            }
+            }, NiftiImage::getFloatingDataType(referenceImageDescriptor));
         }
     }
-    return MINDValue;   // (double) this->referenceImageDescriptor->nt;
+    return mind;
+}
+/* *************************************************************** */
+double reg_mind::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->referenceImageDescriptor,
+                                       this->referenceMask,
+                                       this->warpedImage,
+                                       this->warpedFloatingImageDescriptor,
+                                       this->timePointWeight,
+                                       this->timePointWeightDescriptor,
+                                       nullptr, // TODO this->forwardJacDetImagePointer,
+                                       this->currentValue,
+                                       this->descriptorOffset,
+                                       this->referenceTimePoint,
+                                       this->mindType);
+}
+/* *************************************************************** */
+double reg_mind::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->floatingImageDescriptor,
+                                       this->floatingMask,
+                                       this->warpedImageBw,
+                                       this->warpedReferenceImageDescriptor,
+                                       this->timePointWeight,
+                                       this->timePointWeightDescriptor,
+                                       nullptr, // TODO this->backwardJacDetImagePointer,
+                                       this->currentValue,
+                                       this->descriptorOffset,
+                                       this->referenceTimePoint,
+                                       this->mindType);
 }
 /* *************************************************************** */
 void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
@@ -604,28 +551,28 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
     reg_tools_removeNanFromMask(this->warpedImage, combinedMask);
 
-    if (this->mind_type == MIND_TYPE) {
+    if (this->mindType == MIND_TYPE) {
         // Compute the reference image descriptors
-        GetMINDImageDescriptor(this->referenceImage,
+        GetMindImageDescriptor(this->referenceImage,
                                this->referenceImageDescriptor,
                                combinedMask,
                                this->descriptorOffset,
                                currentTimepoint);
         // Compute the warped floating image descriptors
-        GetMINDImageDescriptor(this->warpedImage,
+        GetMindImageDescriptor(this->warpedImage,
                                this->warpedFloatingImageDescriptor,
                                combinedMask,
                                this->descriptorOffset,
                                currentTimepoint);
-    } else if (this->mind_type == MINDSSC_TYPE) {
+    } else if (this->mindType == MINDSSC_TYPE) {
         // Compute the reference image descriptors
-        GetMINDSSCImageDescriptor(this->referenceImage,
+        GetMindSscImageDescriptor(this->referenceImage,
                                   this->referenceImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
                                   currentTimepoint);
         // Compute the warped floating image descriptors
-        GetMINDSSCImageDescriptor(this->warpedImage,
+        GetMindSscImageDescriptor(this->warpedImage,
                                   this->warpedFloatingImageDescriptor,
                                   combinedMask,
                                   this->descriptorOffset,
@@ -633,7 +580,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     }
 
 
-    for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
+    for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) {
         // Compute the warped image descriptors gradient
         reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor,
                                      this->warpedGradient,
@@ -644,7 +591,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
         // Compute the gradient of the ssd for the forward transformation
         switch (referenceImageDescriptor->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSSDGradient<float>(this->referenceImageDescriptor,
+            reg_getVoxelBasedSsdGradient<float>(this->referenceImageDescriptor,
                                                 this->warpedFloatingImageDescriptor,
                                                 this->warpedGradient,
                                                 this->voxelBasedGradient,
@@ -655,7 +602,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                                 nullptr);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSSDGradient<double>(this->referenceImageDescriptor,
+            reg_getVoxelBasedSsdGradient<double>(this->referenceImageDescriptor,
                                                  this->warpedFloatingImageDescriptor,
                                                  this->warpedGradient,
                                                  this->voxelBasedGradient,
@@ -681,31 +628,31 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
         reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
         reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask);
 
-        if (this->mind_type == MIND_TYPE) {
-            GetMINDImageDescriptor(this->floatingImage,
+        if (this->mindType == MIND_TYPE) {
+            GetMindImageDescriptor(this->floatingImage,
                                    this->floatingImageDescriptor,
                                    combinedMask,
                                    this->descriptorOffset,
                                    currentTimepoint);
-            GetMINDImageDescriptor(this->warpedImageBw,
+            GetMindImageDescriptor(this->warpedImageBw,
                                    this->warpedReferenceImageDescriptor,
                                    combinedMask,
                                    this->descriptorOffset,
                                    currentTimepoint);
-        } else if (this->mind_type == MINDSSC_TYPE) {
-            GetMINDSSCImageDescriptor(this->floatingImage,
+        } else if (this->mindType == MINDSSC_TYPE) {
+            GetMindSscImageDescriptor(this->floatingImage,
                                       this->floatingImageDescriptor,
                                       combinedMask,
                                       this->descriptorOffset,
                                       currentTimepoint);
-            GetMINDSSCImageDescriptor(this->warpedImageBw,
+            GetMindSscImageDescriptor(this->warpedImageBw,
                                       this->warpedReferenceImageDescriptor,
                                       combinedMask,
                                       this->descriptorOffset,
                                       currentTimepoint);
         }
 
-        for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) {
+        for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) {
             reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor,
                                          this->warpedGradientBw,
                                          combinedMask,
@@ -715,7 +662,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
             // Compute the gradient of the nmi for the backward transformation
             switch (floatingImage->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedSSDGradient<float>(this->floatingImageDescriptor,
+                reg_getVoxelBasedSsdGradient<float>(this->floatingImageDescriptor,
                                                     this->warpedReferenceImageDescriptor,
                                                     this->warpedGradientBw,
                                                     this->voxelBasedGradientBw,
@@ -726,7 +673,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                                     nullptr);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedSSDGradient<double>(this->floatingImageDescriptor,
+                reg_getVoxelBasedSsdGradient<double>(this->floatingImageDescriptor,
                                                      this->warpedReferenceImageDescriptor,
                                                      this->warpedGradientBw,
                                                      this->voxelBasedGradientBw,
@@ -747,7 +694,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
 }
 /* *************************************************************** */
 reg_mindssc::reg_mindssc(): reg_mind() {
-    this->mind_type = MINDSSC_TYPE;
+    this->mindType = MINDSSC_TYPE;
 #ifndef NDEBUG
     reg_print_msg_debug("reg_mindssc constructor called");
 #endif
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index cf09a4a8..9eb88336 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -30,18 +30,20 @@ class reg_mind: public reg_ssd {
 
     /// @brief Initialise the reg_mind object
     virtual void InitialiseMeasure(nifti_image *refImg,
-                           nifti_image *floImg,
-                           int *refMask,
-                           nifti_image *warpedImg,
-                           nifti_image *warpedGrad,
-                           nifti_image *voxelBasedGrad,
-                           nifti_image *localWeightSim = nullptr,
-                           int *floMask = nullptr,
-                           nifti_image *warpedImgBw = nullptr,
-                           nifti_image *warpedGradBw = nullptr,
-                           nifti_image *voxelBasedGradBw = nullptr) override;
-    /// @brief Returns the mind based measure of similarity value
-    virtual double GetSimilarityMeasureValue() override;
+                                   nifti_image *floImg,
+                                   int *refMask,
+                                   nifti_image *warpedImg,
+                                   nifti_image *warpedGrad,
+                                   nifti_image *voxelBasedGrad,
+                                   nifti_image *localWeightSim = nullptr,
+                                   int *floMask = nullptr,
+                                   nifti_image *warpedImgBw = nullptr,
+                                   nifti_image *warpedGradBw = nullptr,
+                                   nifti_image *voxelBasedGradBw = nullptr) override;
+    /// @brief Returns the forward mind-based measure of similarity value
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the backward mind-based measure of similarity value
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
     virtual void SetDescriptorOffset(int);
@@ -55,8 +57,8 @@ class reg_mind: public reg_ssd {
     double timePointWeightDescriptor[255] = {0};
 
     int descriptorOffset;
-    int mind_type;
-    int descriptor_number;
+    int mindType;
+    int descriptorNumber;
 };
 /* *************************************************************** */
 /// @brief MIND-SSC measure of similarity class
@@ -69,16 +71,16 @@ class reg_mindssc: public reg_mind {
 };
 /* *************************************************************** */
 extern "C++"
-void GetMINDImageDescriptor(nifti_image *inputImgPtr,
-                           nifti_image *MINDImgPtr,
-                           int *mask,
-                           int descriptorOffset,
-                           int currentTimepoint);
+void GetMindImageDescriptor(const nifti_image *inputImage,
+                            nifti_image *mindImage,
+                            const int *mask,
+                            const int& descriptorOffset,
+                            const int& currentTimepoint);
 /* *************************************************************** */
 extern "C++"
-void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr,
-                              nifti_image *MINDSSCImgPtr,
-                              int *mask,
-                              int descriptorOffset,
-                              int currentTimepoint);
+void GetMindSscImageDescriptor(const nifti_image *inputImage,
+                               nifti_image *mindSscImage,
+                               const int *mask,
+                               const int& descriptorOffset,
+                               const int& currentTimepoint);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 23288d73..4036cf08 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -213,7 +213,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                      const unsigned short *floatingBinNumber,
                      const unsigned short *totalBinNumber,
                      double **jointHistogramLog,
-                     double **jointhistogramPro,
+                     double **jointHistogramPro,
                      double **entropyValues,
                      const int *referenceMask) {
     // Create pointers to the image data arrays
@@ -230,7 +230,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             reg_print_msg_debug(text);
 #endif
             // Define some pointers to the current histograms
-            double *jointHistoProPtr = jointhistogramPro[t];
+            double *jointHistoProPtr = jointHistogramPro[t];
             double *jointHistoLogPtr = jointHistogramLog[t];
             // Empty the joint histogram
             memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double));
@@ -355,71 +355,65 @@ void reg_getNMIValue(const nifti_image *referenceImage,
     } // iterate over all time point in the reference image
 }
 /* *************************************************************** */
-double reg_nmi::GetSimilarityMeasureValue() {
-    // Check that all the specified image are of the same datatype
-    if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-        reg_print_msg_error("Input images are expected to be of floating precision type");
-        reg_exit();
-    }
-    if (this->warpedImage->datatype != this->referenceImage->datatype) {
-        reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-        reg_print_msg_error("Both input images are expected to have the same type");
-        reg_exit();
-    }
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 const nifti_image *warpedImage,
+                                 const double *timePointWeight,
+                                 const unsigned short *referenceBinNumber,
+                                 const unsigned short *floatingBinNumber,
+                                 const unsigned short *totalBinNumber,
+                                 double **jointHistogramLog,
+                                 double **jointHistogramPro,
+                                 double **entropyValues,
+                                 const int *referenceMask,
+                                 const int& referenceTimePoint) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
-        reg_getNMIValue<RefImgDataType>(this->referenceImage,
-                                        this->warpedImage,
-                                        this->timePointWeight,
-                                        this->referenceBinNumber,
-                                        this->floatingBinNumber,
-                                        this->totalBinNumber,
-                                        this->jointHistogramLog,
-                                        this->jointHistogramPro,
-                                        this->entropyValues,
-                                        this->referenceMask);
-    }, NiftiImage::getFloatingDataType(this->referenceImage));
-
-    if (this->isSymmetric) {
-        // Check that all the specified image are of the same datatype
-        if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
-            reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Input images are expected to be of floating precision type");
-            reg_exit();
-        }
-        if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
-            reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Both input images are expected to have the same type");
-            reg_exit();
-        }
-        std::visit([&](auto&& floImgDataType) {
-            using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
-            reg_getNMIValue<FloImgDataType>(this->floatingImage,
-                                            this->warpedImageBw,
-                                            this->timePointWeight,
-                                            this->floatingBinNumber,
-                                            this->referenceBinNumber,
-                                            this->totalBinNumber,
-                                            this->jointHistogramLogBw,
-                                            this->jointHistogramProBw,
-                                            this->entropyValuesBw,
-                                            this->floatingMask);
-        }, NiftiImage::getFloatingDataType(this->floatingImage));
-    }
+        reg_getNMIValue<RefImgDataType>(referenceImage,
+                                        warpedImage,
+                                        timePointWeight,
+                                        referenceBinNumber,
+                                        floatingBinNumber,
+                                        totalBinNumber,
+                                        jointHistogramLog,
+                                        jointHistogramPro,
+                                        entropyValues,
+                                        referenceMask);
+    }, NiftiImage::getFloatingDataType(referenceImage));
 
-    double nmiFw = 0, nmiBw = 0;
-    for (int t = 0; t < this->referenceTimePoint; ++t) {
-        if (this->timePointWeight[t] > 0) {
-            nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2];
-            if (this->isSymmetric)
-                nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2];
-        }
+    double nmi = 0;
+    for (int t = 0; t < referenceTimePoint; ++t) {
+        if (timePointWeight[t] > 0)
+            nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called");
-#endif
-    return nmiFw + nmiBw;
+    return nmi;
+}
+/* *************************************************************** */
+double reg_nmi::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->warpedImage,
+                                       this->timePointWeight,
+                                       this->referenceBinNumber,
+                                       this->floatingBinNumber,
+                                       this->totalBinNumber,
+                                       this->jointHistogramLog,
+                                       this->jointHistogramPro,
+                                       this->entropyValues,
+                                       this->referenceMask,
+                                       this->referenceTimePoint);
+}
+/* *************************************************************** */
+double reg_nmi::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->warpedImageBw,
+                                       this->timePointWeight,
+                                       this->floatingBinNumber,
+                                       this->referenceBinNumber,
+                                       this->totalBinNumber,
+                                       this->jointHistogramLogBw,
+                                       this->jointHistogramProBw,
+                                       this->entropyValuesBw,
+                                       this->floatingMask,
+                                       this->referenceTimePoint);
 }
 /* *************************************************************** */
 template <class DataType>
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 78cd06ad..3f66e70e 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -38,8 +38,10 @@ class reg_nmi: public reg_measure {
                                    nifti_image *warpedImgBw = nullptr,
                                    nifti_image *warpedGradBw = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr) override;
-    /// @brief Returns the nmi value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the nmi value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the nmi value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 
@@ -84,7 +86,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                      const unsigned short *floatingBinNumber,
                      const unsigned short *totalBinNumber,
                      double **jointHistogramLog,
-                     double **jointhistogramPro,
+                     double **jointHistogramPro,
                      double **entropyValues,
                      const int *referenceMask);
 /* *************************************************************** */
@@ -213,8 +215,10 @@ class reg_multichannel_nmi: public reg_measure {
     /// @brief reg_multichannel_nmi class destructor
     virtual ~reg_multichannel_nmi() {}
 
-    /// @brief Returns the nmi value
-    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Returns the nmi value forwards
+    virtual double GetSimilarityMeasureValueFw() override { return 0; }
+    /// @brief Returns the nmi value backwards
+    virtual double GetSimilarityMeasureValueBw() override { return 0; }
 
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index ac3a3a4b..19115e20 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -80,7 +80,7 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
 #endif
 #ifndef NDEBUG
     char text[255];
-    reg_print_msg_debug("reg_ssd::InitialiseMeasure().");
+    reg_print_msg_debug("reg_ssd::InitialiseMeasure()");
     for (int i = 0; i < this->referenceImage->nt; ++i) {
         sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
         reg_print_msg_debug(text);
@@ -98,13 +98,13 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
 }
 /* *************************************************************** */
 template<class DataType>
-double reg_getSSDValue(nifti_image *referenceImage,
-                       nifti_image *warpedImage,
-                       double *timePointWeight,
-                       nifti_image *jacobianDetImage,
-                       int *mask,
+double reg_getSsdValue(const nifti_image *referenceImage,
+                       const nifti_image *warpedImage,
+                       const double *timePointWeight,
+                       const nifti_image *jacobianDetImage,
+                       const int *mask,
                        float *currentValue,
-                       nifti_image *localWeightSimImage) {
+                       const nifti_image *localWeightSim) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -113,34 +113,34 @@ double reg_getSSDValue(nifti_image *referenceImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
     // Create pointers to the reference and warped image data
-    DataType *referencePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *warpedPtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *referencePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *warpedPtr = static_cast<DataType*>(warpedImage->data);
     // Create a pointer to the Jacobian determinant image if defined
-    DataType *jacDetPtr = nullptr;
+    const DataType *jacDetPtr = nullptr;
     if (jacobianDetImage != nullptr)
         jacDetPtr = static_cast<DataType*>(jacobianDetImage->data);
     // Create a pointer to the local weight image if defined
-    DataType *localWeightPtr = nullptr;
-    if (localWeightSimImage != nullptr)
-        localWeightPtr = static_cast<DataType*>(localWeightSimImage->data);
+    const DataType *localWeightPtr = nullptr;
+    if (localWeightSim != nullptr)
+        localWeightPtr = static_cast<DataType*>(localWeightSim->data);
 
-    double SSD_global = 0;
+    double ssdGlobal = 0;
     double refValue, warValue, diff;
 
     // Loop over the different time points
     for (int time = 0; time < referenceImage->nt; ++time) {
         if (timePointWeight[time] > 0) {
             // Create pointers to the current time point of the reference and warped images
-            DataType *currentRefPtr = &referencePtr[time * voxelNumber];
-            DataType *currentWarPtr = &warpedPtr[time * voxelNumber];
+            const DataType *currentRefPtr = &referencePtr[time * voxelNumber];
+            const DataType *currentWarPtr = &warpedPtr[time * voxelNumber];
 
-            double SSD_local = 0., n = 0.;
+            double ssdLocal = 0, n = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
     jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
     private(refValue, warValue, diff) \
-    reduction(+:SSD_local) \
+    reduction(+:ssdLocal) \
     reduction(+:n)
 #endif
             for (voxel = 0; voxel < voxelNumber; ++voxel) {
@@ -158,108 +158,78 @@ double reg_getSSDValue(nifti_image *referenceImage,
 #endif
                         // Jacobian determinant modulation of the ssd if required
                         if (jacDetPtr != nullptr) {
-                            SSD_local += diff * jacDetPtr[voxel];
+                            ssdLocal += diff * jacDetPtr[voxel];
                             n += jacDetPtr[voxel];
                         } else if (localWeightPtr != nullptr) {
-                            SSD_local += diff * localWeightPtr[voxel];
+                            ssdLocal += diff * localWeightPtr[voxel];
                             n += localWeightPtr[voxel];
                         } else {
-                            SSD_local += diff;
+                            ssdLocal += diff;
                             n += 1.0;
                         }
                     }
                 }
             }
 
-            SSD_local *= timePointWeight[time];
-            currentValue[time] = static_cast<float>(-SSD_local);
-            SSD_global -= SSD_local / n;
+            ssdLocal *= timePointWeight[time];
+            currentValue[time] = static_cast<float>(-ssdLocal);
+            ssdGlobal -= ssdLocal / n;
         }
     }
-    return SSD_global;
+    return ssdGlobal;
 }
-template double reg_getSSDValue<float>(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*);
-template double reg_getSSDValue<double>(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*);
+template double reg_getSsdValue<float>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*);
+template double reg_getSsdValue<double>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*);
 /* *************************************************************** */
-double reg_ssd::GetSimilarityMeasureValue() {
-    // Check that all the specified image are of the same datatype
-    if (this->warpedImage->datatype != this->referenceImage->datatype) {
-        reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-        reg_print_msg_error("Both input images are expected to have the same type");
-        reg_exit();
-    }
-    double SSDValue = 0;
-    switch (this->referenceImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        SSDValue = reg_getSSDValue<float>(this->referenceImage,
-                                          this->warpedImage,
-                                          this->timePointWeight,
-                                          nullptr, // TODO this->forwardJacDetImagePointer,
-                                          this->referenceMask,
-                                          this->currentValue,
-                                          this->localWeightSim);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        SSDValue = reg_getSSDValue<double>(this->referenceImage,
-                                           this->warpedImage,
-                                           this->timePointWeight,
-                                           nullptr, // TODO this->forwardJacDetImagePointer,
-                                           this->referenceMask,
-                                           this->currentValue,
-                                           this->localWeightSim);
-        break;
-    default:
-        reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-        reg_print_msg_error("Warped pixel type unsupported");
-        reg_exit();
-    }
-
-    // Backward computation
-    if (this->isSymmetric) {
-        // Check that all the specified image are of the same datatype
-        if (this->warpedImageBw->datatype != this->floatingImage->datatype) {
-            reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-            reg_print_msg_error("Both input images are expected to have the same type");
-            reg_exit();
-        }
-        switch (this->floatingImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            SSDValue += reg_getSSDValue<float>(this->floatingImage,
-                                               this->warpedImageBw,
-                                               this->timePointWeight,
-                                               nullptr, // TODO this->backwardJacDetImagePointer,
-                                               this->floatingMask,
-                                               this->currentValue,
-                                               nullptr);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            SSDValue += reg_getSSDValue<double>(this->floatingImage,
-                                                this->warpedImageBw,
-                                                this->timePointWeight,
-                                                nullptr, // TODO this->backwardJacDetImagePointer,
-                                                this->floatingMask,
-                                                this->currentValue,
-                                                nullptr);
-            break;
-        default:
-            reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue");
-            reg_print_msg_error("Warped pixel type unsupported");
-            reg_exit();
-        }
-    }
-    return SSDValue;
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 const nifti_image *warpedImage,
+                                 const double *timePointWeight,
+                                 const nifti_image *jacobianDetImage,
+                                 const int *mask,
+                                 float *currentValue,
+                                 const nifti_image *localWeightSim) {
+    return std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        return reg_getSsdValue<RefImgDataType>(referenceImage,
+                                               warpedImage,
+                                               timePointWeight,
+                                               jacobianDetImage,
+                                               mask,
+                                               currentValue,
+                                               localWeightSim);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+double reg_ssd::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->warpedImage,
+                                       this->timePointWeight,
+                                       nullptr, // TODO this->forwardJacDetImagePointer,
+                                       this->referenceMask,
+                                       this->currentValue,
+                                       this->localWeightSim);
+}
+/* *************************************************************** */
+double reg_ssd::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->warpedImageBw,
+                                       this->timePointWeight,
+                                       nullptr, // TODO this->backwardJacDetImagePointer,
+                                       this->floatingMask,
+                                       this->currentValue,
+                                       nullptr);
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
-                                  nifti_image *warpedImage,
-                                  nifti_image *warpedGradient,
+void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
+                                  const nifti_image *warpedImage,
+                                  const nifti_image *warpedGradient,
                                   nifti_image *measureGradientImage,
-                                  nifti_image *jacobianDetImage,
-                                  int *mask,
-                                  int currentTimepoint,
-                                  double timepointWeight,
-                                  nifti_image *localWeightSimImage) {
+                                  const nifti_image *jacobianDetImage,
+                                  const int *mask,
+                                  const int& currentTimepoint,
+                                  const double& timepointWeight,
+                                  const nifti_image *localWeightSim) {
     if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
         reg_print_fct_error("reg_getVoxelBasedSSDGradient");
         reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
@@ -274,33 +244,33 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
     // Pointers to the image data
-    DataType *refImagePtr = static_cast<DataType *>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
-    DataType *warImagePtr = static_cast<DataType *>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
-    DataType *spatialGradPtrX = static_cast<DataType *>(warpedGradient->data);
-    DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
-    DataType *spatialGradPtrZ = nullptr;
+    const DataType *spatialGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    const DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
+    const DataType *spatialGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         spatialGradPtrZ = &spatialGradPtrY[voxelNumber];
 
     // Pointers to the measure of similarity gradient
-    DataType *measureGradPtrX = static_cast<DataType *>(measureGradientImage->data);
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
     DataType *measureGradPtrZ = nullptr;
     if (referenceImage->nz > 1)
         measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create a pointer to the Jacobian determinant values if defined
-    DataType *jacDetPtr = nullptr;
+    const DataType *jacDetPtr = nullptr;
     if (jacobianDetImage != nullptr)
-        jacDetPtr = static_cast<DataType *>(jacobianDetImage->data);
+        jacDetPtr = static_cast<DataType*>(jacobianDetImage->data);
     // Create a pointer to the local weight image if defined
-    DataType *localWeightPtr = nullptr;
-    if (localWeightSimImage != nullptr)
-        localWeightPtr = static_cast<DataType *>(localWeightSimImage->data);
+    const DataType *localWeightPtr = nullptr;
+    if (localWeightSim != nullptr)
+        localWeightPtr = static_cast<DataType*>(localWeightSim->data);
 
     // find number of active voxels and correct weight
     double activeVoxel_num = 0;
@@ -310,7 +280,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                 activeVoxel_num += 1.0;
         }
     }
-    double adjusted_weight = timepointWeight / activeVoxel_num;
+    double adjustedWeight = timepointWeight / activeVoxel_num;
 
     double refValue, warValue, common;
 
@@ -319,13 +289,13 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \
     mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
     measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \
-    localWeightPtr, adjusted_weight) \
+    localWeightPtr, adjustedWeight) \
     private(refValue, warValue, common)
 #endif
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
-            refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter);
-            warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter);
+            refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter;
+            warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter;
             if (refValue == refValue && warValue == warValue) {
 #ifdef MRF_USE_SAD
                 common = refValue > warValue ? -1.f : 1.f;
@@ -338,25 +308,23 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
                 else if (localWeightPtr != nullptr)
                     common *= localWeightPtr[voxel];
 
-                common *= adjusted_weight;
+                common *= adjustedWeight;
 
                 if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel])
-                    measureGradPtrX[voxel] += (DataType)(common * spatialGradPtrX[voxel]);
+                    measureGradPtrX[voxel] += static_cast<DataType>(common * spatialGradPtrX[voxel]);
                 if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel])
-                    measureGradPtrY[voxel] += (DataType)(common * spatialGradPtrY[voxel]);
+                    measureGradPtrY[voxel] += static_cast<DataType>(common * spatialGradPtrY[voxel]);
 
                 if (measureGradPtrZ != nullptr) {
                     if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel])
-                        measureGradPtrZ[voxel] += (DataType)(common * spatialGradPtrZ[voxel]);
+                        measureGradPtrZ[voxel] += static_cast<DataType>(common * spatialGradPtrZ[voxel]);
                 }
             }
         }
     }
 }
-template void reg_getVoxelBasedSSDGradient<float>
-(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
-template void reg_getVoxelBasedSSDGradient<double>
-(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*);
+template void reg_getVoxelBasedSsdGradient<float>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
+template void reg_getVoxelBasedSsdGradient<double>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
 /* *************************************************************** */
 void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Check if the specified time point exists and is active
@@ -376,7 +344,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
     // Compute the gradient of the ssd for the forward transformation
     switch (dtype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedSSDGradient<float>(this->referenceImage,
+        reg_getVoxelBasedSsdGradient<float>(this->referenceImage,
                                             this->warpedImage,
                                             this->warpedGradient,
                                             this->voxelBasedGradient,
@@ -387,7 +355,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                             this->localWeightSim);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedSSDGradient<double>(this->referenceImage,
+        reg_getVoxelBasedSsdGradient<double>(this->referenceImage,
                                              this->warpedImage,
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
@@ -415,7 +383,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
         // Compute the gradient of the nmi for the backward transformation
         switch (dtype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSSDGradient<float>(this->floatingImage,
+            reg_getVoxelBasedSsdGradient<float>(this->floatingImage,
                                                 this->warpedImageBw,
                                                 this->warpedGradientBw,
                                                 this->voxelBasedGradientBw,
@@ -426,7 +394,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                                 nullptr);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSSDGradient<double>(this->floatingImage,
+            reg_getVoxelBasedSsdGradient<double>(this->floatingImage,
                                                  this->warpedImageBw,
                                                  this->warpedGradientBw,
                                                  this->voxelBasedGradientBw,
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 5492f60c..43dbefe3 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -16,7 +16,6 @@
 
 #include "_reg_measure.h"
 
-/* *************************************************************** */
 /* *************************************************************** */
 /// @brief SSD measure of similarity class
 class reg_ssd: public reg_measure {
@@ -40,8 +39,10 @@ class reg_ssd: public reg_measure {
                                    nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Define if the specified time point should be normalised
     void SetNormaliseTimepoint(int timepoint, bool normalise);
-    /// @brief Returns the ssd value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the ssd value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the ssd value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based ssd gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
     /// @brief Here
@@ -56,12 +57,11 @@ class reg_ssd: public reg_measure {
     bool normaliseTimePoint[255];
 };
 /* *************************************************************** */
-
 /** @brief Computes and returns the SSD between two input images
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param activeTimePoint Specified which time point volumes have to be considered
- * @param jacobianDeterminantImage Image that contains the Jacobian
+ * @param jacobianDetImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
  * pointer is set to nullptr
@@ -70,22 +70,22 @@ class reg_ssd: public reg_measure {
  * @return Returns the computed sum squared difference
  */
 extern "C++" template <class DataType>
-double reg_getSSDValue(nifti_image *referenceImage,
-                       nifti_image *warpedImage,
-                       double *timePointWeight,
-                       nifti_image *jacobianDeterminantImage,
-                       int *mask,
+double reg_getSsdValue(const nifti_image *referenceImage,
+                       const nifti_image *warpedImage,
+                       const double *timePointWeight,
+                       const nifti_image *jacobianDetImage,
+                       const int *mask,
                        float *currentValue,
-                       nifti_image *localWeightImage);
-
+                       const nifti_image *localWeightSim);
+/* *************************************************************** */
 /** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param activeTimePoint Specified which time point volumes have to be considered
- * @param warpedImageGradient Spatial gradient of the input warped image
- * @param ssdGradientImage Output image that will be updated with the
+ * @param warpedGradient Spatial gradient of the input warped image
+ * @param measureGradientImage Output image that will be updated with the
  * value of the SSD gradient
- * @param jacobianDeterminantImage Image that contains the Jacobian
+ * @param jacobianDetImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
  * pointer is set to nullptr
@@ -93,12 +93,13 @@ double reg_getSSDValue(nifti_image *referenceImage,
  * should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DataType>
-void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage,
-                                  nifti_image *warpedImage,
-                                  nifti_image *warpedImageGradient,
-                                  nifti_image *ssdGradientImage,
-                                  nifti_image *jacobianDeterminantImage,
-                                  int *mask,
-                                  int currentTimepoint,
-                                  double timepointWeight,
-                                  nifti_image *localWeightImage);
+void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
+                                  const nifti_image *warpedImage,
+                                  const nifti_image *warpedGradient,
+                                  nifti_image *measureGradientImage,
+                                  const nifti_image *jacobianDetImage,
+                                  const int *mask,
+                                  const int& currentTimepoint,
+                                  const double& timepointWeight,
+                                  const nifti_image *localWeightSim);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index d91c39d6..1ff52195 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -99,7 +99,8 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
 public:
     /// @brief reg_lncc class constructor
     reg_lncc_gpu() {
-        fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n");
+        reg_print_fct_error("reg_lncc_gpu::reg_lncc_gpu");
+        reg_print_msg_error("CUDA CANNOT BE USED WITH LNCC YET");
         reg_exit();
     }
     /// @brief reg_lncc class destructor
@@ -127,8 +128,10 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
                                    float4 *warpedGradBwCuda = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) override {}
-    /// @brief Returns the lncc value
-    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Returns the lncc value forwards
+    virtual double GetSimilarityMeasureValueFw() override { return 0; }
+    /// @brief Returns the lncc value backwards
+    virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel based lncc gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
@@ -166,8 +169,10 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
                                    float4 *warpedGradBwCuda = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) override {}
-    /// @brief Returns the kld value
-    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Returns the kld value forwards
+    virtual double GetSimilarityMeasureValueFw() override { return 0; }
+    /// @brief Returns the kld value backwards
+    virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel based kld gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
@@ -205,8 +210,10 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
                                    float4 *warpedGradBwCuda = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) override {}
-    /// @brief Returns the dti value
-    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Returns the dti value forwards
+    virtual double GetSimilarityMeasureValueFw() override { return 0; }
+    /// @brief Returns the dti value backwards
+    virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel based dti gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 5efd0391..2e55b78b 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -63,48 +63,67 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
 #endif
 }
 /* *************************************************************** */
-double reg_nmi_gpu::GetSimilarityMeasureValue() {
-    // The NMI computation is performed into the host for now
-    // The relevant images have to be transferred from the device to the host
-    cudaCommon_transferFromDeviceToNifti<float>(this->warpedImage, this->warpedImageCuda);
-    reg_getNMIValue<float>(this->referenceImage,
-                           this->warpedImage,
-                           this->timePointWeight,
-                           this->referenceBinNumber,
-                           this->floatingBinNumber,
-                           this->totalBinNumber,
-                           this->jointHistogramLog,
-                           this->jointHistogramPro,
-                           this->entropyValues,
-                           this->referenceMask);
+double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                 nifti_image *warpedImage,
+                                 const float *warpedImageCuda,
+                                 const double *timePointWeight,
+                                 const unsigned short *referenceBinNumber,
+                                 const unsigned short *floatingBinNumber,
+                                 const unsigned short *totalBinNumber,
+                                 double **jointHistogramLog,
+                                 double **jointHistogramPro,
+                                 double **entropyValues,
+                                 const int *referenceMask,
+                                 const int& referenceTimePoint) {
+    // The NMI computation is performed on the host for now
+    cudaCommon_transferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
+    reg_getNMIValue<float>(referenceImage,
+                           warpedImage,
+                           timePointWeight,
+                           referenceBinNumber,
+                           floatingBinNumber,
+                           totalBinNumber,
+                           jointHistogramLog,
+                           jointHistogramPro,
+                           entropyValues,
+                           referenceMask);
 
-    if (this->isSymmetric) {
-        cudaCommon_transferFromDeviceToNifti<float>(this->warpedImageBw, this->warpedImageBwCuda);
-        reg_getNMIValue<float>(this->floatingImage,
-                               this->warpedImageBw,
-                               this->timePointWeight,
-                               this->floatingBinNumber,
-                               this->referenceBinNumber,
-                               this->totalBinNumber,
-                               this->jointHistogramLogBw,
-                               this->jointHistogramProBw,
-                               this->entropyValuesBw,
-                               this->floatingMask);
-    }
-
-    double nmiFw = 0, nmiBw = 0;
-    for (int t = 0; t < this->referenceTimePoint; ++t) {
-        if (this->timePointWeight[t] > 0) {
-            nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2];
-            if (this->isSymmetric)
-                nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2];
-        }
+    double nmi = 0;
+    for (int t = 0; t < referenceTimePoint; ++t) {
+        if (timePointWeight[t] > 0)
+            nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
     }
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu::GetSimilarityMeasureValue called");
-#endif
-    return nmiFw + nmiBw;
+    return nmi;
+}
+/* *************************************************************** */
+double reg_nmi_gpu::GetSimilarityMeasureValueFw() {
+    return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->warpedImage,
+                                       this->warpedImageCuda,
+                                       this->timePointWeight,
+                                       this->referenceBinNumber,
+                                       this->floatingBinNumber,
+                                       this->totalBinNumber,
+                                       this->jointHistogramLog,
+                                       this->jointHistogramPro,
+                                       this->entropyValues,
+                                       this->referenceMask,
+                                       this->referenceTimePoint);
+}
+/* *************************************************************** */
+double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
+    return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->warpedImageBw,
+                                       this->warpedImageBwCuda,
+                                       this->timePointWeight,
+                                       this->floatingBinNumber,
+                                       this->referenceBinNumber,
+                                       this->totalBinNumber,
+                                       this->jointHistogramLogBw,
+                                       this->jointHistogramProBw,
+                                       this->entropyValuesBw,
+                                       this->floatingMask,
+                                       this->referenceTimePoint);
 }
 /* *************************************************************** */
 /// Called when we only have one target and one source image
@@ -201,7 +220,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                          this->referenceBinNumber[0]);
     }
 #ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n");
+    reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called");
 #endif
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index ff24a676..2b55270b 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -47,8 +47,10 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
                                    float4 *warpedGradBwCuda = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) override;
-    /// @brief Returns the nmi value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the nmi value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the nmi value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 };
@@ -82,8 +84,10 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
     reg_multichannel_nmi_gpu() {}
     /// @brief reg_multichannel_nmi_gpu class destructor
     virtual ~reg_multichannel_nmi_gpu() {}
-    /// @brief Returns the nmi value
-    virtual double GetSimilarityMeasureValue() override { return 0; }
+    /// @brief Returns the nmi value forwards
+    virtual double GetSimilarityMeasureValueFw() override { return 0; }
+    /// @brief Returns the nmi value backwards
+    virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel based nmi gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
 };
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 1ea2ba08..dc62ea53 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -98,7 +98,7 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
     return ssd;
 }
 /* *************************************************************** */
-double reg_ssd_gpu::GetSimilarityMeasureValue() {
+double reg_ssd_gpu::GetSimilarityMeasureValueFw() {
     const double SSDValue = reg_getSSDValue_gpu(this->referenceImage,
                                                 this->referenceImageCuda,
                                                 this->warpedImageCuda,
@@ -107,6 +107,10 @@ double reg_ssd_gpu::GetSimilarityMeasureValue() {
     return -SSDValue;
 }
 /* *************************************************************** */
+double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
+    return 0;
+}
+/* *************************************************************** */
 void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage,
                                       const cudaArray *referenceImageCuda,
                                       const float *warpedCuda,
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 34764df3..c0a994be 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -48,8 +48,10 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
                                    float4 *warpedGradBwCuda = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) override;
-    /// @brief Returns the ssd value
-    virtual double GetSimilarityMeasureValue() override;
+    /// @brief Returns the ssd value forwards
+    virtual double GetSimilarityMeasureValueFw() override;
+    /// @brief Returns the ssd value backwards
+    virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel based ssd gradient
     virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
 };

From c101e74c39c4089bfdcdf705f5c63c715f2cc6b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 31 Jul 2023 14:50:04 +0100
Subject: [PATCH 177/314] Disable OpenMP for coverage

---
 .github/workflows/coverage.yml | 2 +-
 niftyreg_build_version.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index ebe51947..f90f1da2 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -28,7 +28,7 @@ jobs:
                 -DUSE_CUDA=OFF \
                 -DUSE_OPENCL=OFF \
                 -DUSE_SSE=ON \
-                -DUSE_OPENMP=ON \
+                -DUSE_OPENMP=OFF \
                 -DBUILD_TESTING=ON \
                 -DWITH_COVERAGE=ON \
                 ..
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9530e048..95de1eed 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-296
+297

From 8129f1af2558d2580b08b1e8ca022f0e4e8f0862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 1 Aug 2023 14:50:03 +0100
Subject: [PATCH 178/314] Rearchitect reg_measure to handle forward and
 backward voxel-based similarity measure gradient computation #92

- Add symmetric scheme support for reg_ssd_gpu
---
 niftyreg_build_version.txt       |   2 +-
 reg-lib/cpu/_reg_dti.cpp         | 233 +++++++----------
 reg-lib/cpu/_reg_dti.h           |  22 +-
 reg-lib/cpu/_reg_kld.cpp         | 263 ++++++++-----------
 reg-lib/cpu/_reg_kld.h           |  51 +---
 reg-lib/cpu/_reg_lncc.cpp        | 419 +++++++++++++------------------
 reg-lib/cpu/_reg_lncc.h          |   6 +-
 reg-lib/cpu/_reg_measure.h       |  45 +++-
 reg-lib/cpu/_reg_mind.cpp        | 361 +++++++++-----------------
 reg-lib/cpu/_reg_mind.h          |  13 +-
 reg-lib/cpu/_reg_nmi.cpp         | 290 +++++++++------------
 reg-lib/cpu/_reg_nmi.h           |  23 +-
 reg-lib/cpu/_reg_ssd.cpp         | 277 +++++++-------------
 reg-lib/cpu/_reg_ssd.h           |  22 +-
 reg-lib/cuda/_reg_measure_gpu.h  |  18 +-
 reg-lib/cuda/_reg_nmi_gpu.cu     |  41 ++-
 reg-lib/cuda/_reg_nmi_gpu.h      |  12 +-
 reg-lib/cuda/_reg_ssd_gpu.cu     |  44 ++--
 reg-lib/cuda/_reg_ssd_gpu.h      |   6 +-
 reg-lib/cuda/_reg_ssd_kernels.cu |   8 +-
 20 files changed, 861 insertions(+), 1295 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 95de1eed..a1f7f63f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-297
+298
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index d4fa63be..1196f47b 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -53,8 +53,8 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
 
     int j = 0;
     for (int i = 0; i < refImg->nt; ++i) {
-        //JM - note, the specific value of timePointWeight is not used for DTI images
-        //any value > 0 indicates the 'time point' is active
+        // JM - note, the specific value of timePointWeight is not used for DTI images
+        // any value > 0 indicates the 'time point' is active
         if (this->timePointWeight[i] > 0) {
             this->dtIndicies[j++] = i;
 #ifndef NDEBUG
@@ -73,7 +73,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
 }
 /* *************************************************************** */
 template<class DataType>
-double reg_getDTIMeasureValue(const nifti_image *referenceImage,
+double reg_getDtiMeasureValue(const nifti_image *referenceImage,
                               const nifti_image *warpedImage,
                               const int *mask,
                               const unsigned *dtIndicies) {
@@ -84,9 +84,8 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage,
     size_t voxel;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
-
-    /* As the tensor has 6 unique components that we need to worry about, read them out
-    for the floating and reference images. */
+    // As the tensor has 6 unique components that we need to worry about
+    // Read them out for the floating and reference images
     const DataType *firstWarpedVox = static_cast<DataType*>(warpedImage->data);
     const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]];
     const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]];
@@ -105,14 +104,12 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage,
 
     double dtiCost = 0, n = 0;
     constexpr double twoThirds = 2.0 / 3.0;
-    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \
-          warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \
-          warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \
-   private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \
+          warpedIntensityXX, warpedIntensityXY, warpedIntensityXZ, \
+          warpedIntensityYY, warpedIntensityYZ, warpedIntensityZZ, mask, voxelNumber) \
    reduction(+:dtiCost, n)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
@@ -121,12 +118,12 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage,
             if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] &&
                 warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) {
                 // Calculate the elementwise residual of the diffusion tensor components
-                rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
-                rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
-                rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
-                rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
-                rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
-                rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
+                const DataType rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
+                const DataType rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
+                const DataType rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
+                const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
+                const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
+                const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
                 dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ))
                     + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ))
                     - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ);
@@ -143,7 +140,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const unsigned *dtIndicies) {
     return std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
-        return reg_getDTIMeasureValue<RefImgDataType>(referenceImage,
+        return reg_getDtiMeasureValue<RefImgDataType>(referenceImage,
                                                       warpedImage,
                                                       mask,
                                                       dtIndicies);
@@ -165,13 +162,12 @@ double reg_dti::GetSimilarityMeasureValueBw() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
-                                         nifti_image *warpedImage,
-                                         nifti_image *warpedGradient,
+void reg_getVoxelBasedDtiMeasureGradient(const nifti_image *referenceImage,
+                                         const nifti_image *warpedImage,
+                                         const nifti_image *warpedGradient,
                                          nifti_image *dtiMeasureGradientImage,
-                                         int *mask,
-                                         unsigned *dtIndicies) {
-    // Create pointers to the reference and warped images
+                                         const int *mask,
+                                         const unsigned *dtIndicies) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -179,72 +175,69 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
     size_t voxel;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
+    // As the tensor has 6 unique components that we need to worry about
+    // Read them out for the floating and reference images
+    const DataType *firstWarpedVox = static_cast<DataType*>(warpedImage->data);
+    const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]];
+    const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]];
+    const DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]];
+    const DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]];
+    const DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]];
+    const DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]];
 
-    /* As the tensor has 6 unique components that we need to worry about, read them out
-    for the floating and reference images. */
-    DataType *firstWarpedVox = static_cast<DataType*>(warpedImage->data);
-    DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]];
-    DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]];
-    DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]];
-    DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]];
-    DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]];
-    DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]];
-
-    DataType *firstRefVox = static_cast<DataType*>(referenceImage->data);
-    DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]];
-    DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]];
-    DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]];
-    DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]];
-    DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]];
-    DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]];
+    const DataType *firstRefVox = static_cast<DataType*>(referenceImage->data);
+    const DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]];
+    const DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]];
+    const DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]];
+    const DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]];
+    const DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]];
+    const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]];
 
     // THE FOLLOWING IS WRONG
     reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
     reg_exit();
-    unsigned gradientVoxels = warpedGradient->nu * voxelNumber;
-    DataType *firstGradVox = static_cast<DataType*>(warpedGradient->data);
-    DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]];
-    DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]];
-    DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]];
-    DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]];
-    DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]];
-    DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]];
+    const size_t gradientVoxels = (size_t)warpedGradient->nu * voxelNumber;
+    const DataType *firstGradVox = static_cast<DataType*>(warpedGradient->data);
+    const DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]];
+    const DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]];
+    const DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]];
+    const DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]];
+    const DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]];
+    const DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]];
 
     // Create an array to store the computed gradient per time point
     DataType *dtiMeasureGradPtrX = static_cast<DataType*>(dtiMeasureGradientImage->data);
     DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber];
     DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber];
 
-    const double twoThirds = 2.0 / 3.0;
-    const double fourThirds = 4.0 / 3.0;
+    constexpr double twoThirds = 2.0 / 3.0;
+    constexpr double fourThirds = 4.0 / 3.0;
 
-    DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \
           referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ,warpedIntensityXX, \
           warpedIntensityXY,warpedIntensityXZ ,warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, \
           mask, spatialGradXX, spatialGradXY, spatialGradXZ, spatialGradYY, spatialGradYZ, spatialGradZZ, \
-          dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) \
-   private(rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad)
+          dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber)
 #endif
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
             if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] &&
                 warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) {
-                rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
-                rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
-                rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
-                rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
-                rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
-                rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
+                const DataType rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel];
+                const DataType rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel];
+                const DataType rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel];
+                const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
+                const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
+                const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
 
-                xxGrad = static_cast<DataType>(fourThirds * rXX - twoThirds * (rYY + rZZ));
-                yyGrad = static_cast<DataType>(fourThirds * rYY - twoThirds * (rXX + rZZ));
-                zzGrad = static_cast<DataType>(fourThirds * rZZ - twoThirds * (rYY + rXX));
-                xyGrad = 4.f * rXY;
-                xzGrad = 4.f * rXZ;
-                yzGrad = 4.f * rYZ;
+                const DataType xxGrad = static_cast<DataType>(fourThirds * rXX - twoThirds * (rYY + rZZ));
+                const DataType yyGrad = static_cast<DataType>(fourThirds * rYY - twoThirds * (rXX + rZZ));
+                const DataType zzGrad = static_cast<DataType>(fourThirds * rZZ - twoThirds * (rYY + rXX));
+                const DataType xyGrad = 4.f * rXY;
+                const DataType xzGrad = 4.f * rXZ;
+                const DataType yzGrad = 4.f * rYZ;
 
                 dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel] * xxGrad + spatialGradYY[voxel] * yyGrad + spatialGradZZ[voxel] * zzGrad
                                               + spatialGradXY[voxel] * xyGrad + spatialGradXZ[voxel] * xzGrad + spatialGradYZ[voxel] * yzGrad);
@@ -260,82 +253,38 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Check if all required input images are of the same data type
-    int dtype = this->referenceImage->datatype;
-    if (this->warpedImage->datatype != dtype ||
-        this->warpedGradient->datatype != dtype ||
-        this->voxelBasedGradient->datatype != dtype
-        ) {
-        reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    // Compute the gradient of the ssd for the forward transformation
-    switch (dtype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedDTIMeasureGradient<float>
-            (this->referenceImage,
-             this->warpedImage,
-             this->warpedGradient,
-             this->voxelBasedGradient,
-             this->referenceMask,
-             this->dtIndicies);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedDTIMeasureGradient<double>
-            (this->referenceImage,
-             this->warpedImage,
-             this->warpedGradient,
-             this->voxelBasedGradient,
-             this->referenceMask,
-             this->dtIndicies);
-        break;
-    default:
-        reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("The input image data type is not supported");
-        reg_exit();
-    }
-    // Compute the gradient of the ssd for the backward transformation
-    if (this->isSymmetric) {
-        dtype = this->floatingImage->datatype;
-        if (this->warpedImageBw->datatype != dtype ||
-            this->warpedGradientBw->datatype != dtype ||
-            this->voxelBasedGradientBw->datatype != dtype) {
-            reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Input images are expected to be of the same type");
-            reg_exit();
-        }
-        // Compute the gradient of the nmi for the backward transformation
-        switch (dtype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedDTIMeasureGradient<float>
-                (this->floatingImage,
-                 this->warpedImageBw,
-                 this->warpedGradientBw,
-                 this->voxelBasedGradientBw,
-                 this->floatingMask,
-                 this->dtIndicies);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedDTIMeasureGradient<double>
-                (this->floatingImage,
-                 this->warpedImageBw,
-                 this->warpedGradientBw,
-                 this->voxelBasedGradientBw,
-                 this->floatingMask,
-                 this->dtIndicies);
-            break;
-        default:
-            reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("The input image data type is not supported");
-            reg_exit();
-        }
-    }
+void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
+                                            const nifti_image *warpedImage,
+                                            const nifti_image *warpedGradient,
+                                            nifti_image *voxelBasedGradient,
+                                            const int *referenceMask,
+                                            const unsigned *dtIndicies) {
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        reg_getVoxelBasedDtiMeasureGradient<RefImgDataType>(referenceImage,
+                                                            warpedImage,
+                                                            warpedGradient,
+                                                            voxelBasedGradient,
+                                                            referenceMask,
+                                                            dtIndicies);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
+                                             this->warpedImage,
+                                             this->warpedGradient,
+                                             this->voxelBasedGradient,
+                                             this->referenceMask,
+                                             this->dtIndicies);
+}
+/* *************************************************************** */
+void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->warpedImageBw,
+                                             this->warpedGradientBw,
+                                             this->voxelBasedGradientBw,
+                                             this->floatingMask,
+                                             this->dtIndicies);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 0e6dc21c..3ef169e0 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -41,8 +41,10 @@ class reg_dti: public reg_measure {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the dti value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based gradient for DTI images
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based gradient for DTI images forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based gradient for DTI images backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
 
 protected:
     // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
@@ -50,8 +52,7 @@ class reg_dti: public reg_measure {
     float currentValue;
 };
 /* *************************************************************** */
-/**
- * @brief Computes and returns the SSD between two input image
+/** @brief Computes and returns the SSD between two input image
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
  * @param mask Array that contains a mask to specify which voxel
@@ -59,25 +60,22 @@ class reg_dti: public reg_measure {
  * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors
  */
 extern "C++" template <class DataType>
-double reg_getDTIMeasureValue(const nifti_image *referenceImage,
+double reg_getDtiMeasureValue(const nifti_image *referenceImage,
                               const nifti_image *warpedImage,
                               const int *mask,
                               const unsigned *dtIndicies);
 /* *************************************************************** */
-/**
- * @brief Compute a voxel based gradient of the sum squared difference.
+/** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
- * @param warpedImageGradient Spatial gradient of the input warped image
- * @param dtiGradientImage Output image that will be updated with the
+ * @param warpedGradient Spatial gradient of the input warped image
+ * @param dtiMeasureGradientImage Output image that will be updated with the
  * value of the dti measure gradient
- * @param maxSD Input scalar that contain the difference value between
- * the highest and the lowest intensity.
  * @param mask Array that contains a mask to specify which voxel
  * should be considered. If set to nullptr, all voxels are considered
  */
 extern "C++" template <class DataType>
-void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage,
+void reg_getVoxelBasedDtiMeasureGradient(nifti_image *referenceImage,
                                          nifti_image *warpedImage,
                                          nifti_image *warpedGradient,
                                          nifti_image *dtiMeasureGradientImage,
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 01302e80..f94846a5 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -74,6 +74,18 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
+/** @brief Computes and returns the KLD between two input image
+ * @param referenceImage First input image to use to compute the metric
+ * @param warpedImage Second input image to use to compute the metric
+ * @param timePointWeight Array that contains the weight of each time point
+ * @param jacobianDetImg Image that contains the Jacobian
+ * determinant of a transformation at every voxel position. This
+ * image is used to modulate the KLD. The argument is ignored if the
+ * pointer is set to nullptr
+ * @param mask Array that contains a mask to specify which voxel
+ * should be considered
+ * @return Returns the computed sum squared difference
+ */
 template <class DataType>
 double reg_getKLDivergence(const nifti_image *referenceImage,
                            const nifti_image *warpedImage,
@@ -87,14 +99,11 @@ double reg_getKLDivergence(const nifti_image *referenceImage,
     size_t voxel;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
-
     const DataType *refPtr = static_cast<DataType*>(referenceImage->data);
     const DataType *warPtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *jacPtr = nullptr;
-    if (jacobianDetImg != nullptr)
-        jacPtr = static_cast<DataType*>(jacobianDetImg->data);
+    const DataType *jacPtr = jacobianDetImg ? static_cast<DataType*>(jacobianDetImg->data) : nullptr;
 
-    double measure = 0, measureTp = 0, num = 0, tempRefValue, tempWarValue, tempValue;
+    double measure = 0, measureTp = 0, num = 0;
 
     for (int time = 0; time < referenceImage->nt; ++time) {
         if (timePointWeight[time] > 0) {
@@ -103,23 +112,17 @@ double reg_getKLDivergence(const nifti_image *referenceImage,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, jacPtr) \
-    private(tempRefValue, tempWarValue, tempValue) \
     reduction(+:measureTp, num)
 #endif
             for (voxel = 0; voxel < voxelNumber; ++voxel) {
                 if (mask[voxel] > -1) {
-                    tempRefValue = currentRefPtr[voxel] + 1e-16;
-                    tempWarValue = currentWarPtr[voxel] + 1e-16;
-                    tempValue = tempRefValue * log(tempRefValue / tempWarValue);
-                    if (tempValue == tempValue &&
-                        tempValue != std::numeric_limits<double>::infinity()) {
-                        if (jacobianDetImg == nullptr) {
-                            measureTp -= tempValue;
-                            num++;
-                        } else {
-                            measureTp -= tempValue * jacPtr[voxel];
-                            num += jacPtr[voxel];
-                        }
+                    const double tempRefValue = currentRefPtr[voxel] + 1e-16;
+                    const double tempWarValue = currentWarPtr[voxel] + 1e-16;
+                    const double tempValue = tempRefValue * log(tempRefValue / tempWarValue);
+                    if (tempValue == tempValue && tempValue != std::numeric_limits<double>::infinity()) {
+                        const DataType jacValue = jacPtr ? jacPtr[voxel] : 1;
+                        measureTp -= tempValue * jacValue;
+                        num += jacValue;
                     }
                 }
             }
@@ -160,15 +163,30 @@ double reg_kld::GetSimilarityMeasureValueBw() {
                                        this->floatingMask);
 }
 /* *************************************************************** */
+/** @brief Compute a voxel based gradient of the sum squared difference.
+ * @param referenceImage First input image to use to compute the metric
+ * @param warpedImage Second input image to use to compute the metric
+ * @param warpedGradient Spatial gradient of the input result image
+ * @param measureGradient Output image that will be updated with the
+ * value of the KLD gradient
+ * @param jacobianDetImg Image that contains the Jacobian
+ * determinant of a transformation at every voxel position. This
+ * image is used to modulate the KLD. The argument is ignored if the
+ * pointer is set to nullptr
+ * @param mask Array that contains a mask to specify which voxel
+ * should be considered
+ * @param currentTimepoint Specified which time point volumes have to be considered
+ * @param timepointWeight Weight of the current time point
+ */
 template <class DataType>
-void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
-                                           nifti_image *warpedImage,
-                                           nifti_image *warpedImageGradient,
+void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
+                                           const nifti_image *warpedImage,
+                                           const nifti_image *warpedGradient,
                                            nifti_image *measureGradient,
-                                           nifti_image *jacobianDetImg,
-                                           int *mask,
-                                           int currentTimepoint,
-                                           double timepointWeight) {
+                                           const nifti_image *jacobianDetImg,
+                                           const int *mask,
+                                           const int& currentTimepoint,
+                                           const double& timepointWeight) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -176,179 +194,120 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage,
     size_t voxel;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
-
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
-    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
-    int *maskPtr = nullptr;
-    bool MrClean = false;
-    if (mask == nullptr) {
-        maskPtr = (int*)calloc(voxelNumber, sizeof(int));
-        MrClean = true;
-    } else maskPtr = &mask[0];
-
-    DataType *jacPtr = nullptr;
-    if (jacobianDetImg != nullptr)
-        jacPtr = static_cast<DataType*>(jacobianDetImg->data);
-    double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue;
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *jacPtr = jacobianDetImg ? static_cast<DataType*>(jacobianDetImg->data) : nullptr;
 
     // Create pointers to the spatial gradient of the current warped volume
-    DataType *currentGradPtrX = static_cast<DataType*>(warpedImageGradient->data);
-    DataType *currentGradPtrY = &currentGradPtrX[voxelNumber];
-    DataType *currentGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        currentGradPtrZ = &currentGradPtrY[voxelNumber];
+    const DataType *currentGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    const DataType *currentGradPtrY = &currentGradPtrX[voxelNumber];
+    const DataType *currentGradPtrZ = referenceImage->nz > 1 ? &currentGradPtrY[voxelNumber] : nullptr;
 
     // Create pointers to the kld gradient image
     DataType *measureGradPtrX = static_cast<DataType*>(measureGradient->data);
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DataType *measureGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr;
 
     // find number of active voxels and correct weight
-    double activeVoxel_num = 0;
+    size_t activeVoxelNumber = 0;
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
             if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
-                activeVoxel_num += 1.0;
+                activeVoxelNumber++;
         }
     }
-    double adjusted_weight = timepointWeight / activeVoxel_num;
+    const double adjustedWeight = timepointWeight / activeVoxelNumber;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber,currentRefPtr, currentWarPtr, \
-    maskPtr, jacobianDetImg, jacPtr, referenceImage, \
-    measureGradPtrX, measureGradPtrY, measureGradPtrZ, \
-    currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \
-    private(tempValue, tempGradX, tempGradY, tempGradZ, \
-    tempRefValue, tempWarValue)
+    shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, \
+    jacPtr, referenceImage, measureGradPtrX, measureGradPtrY, measureGradPtrZ, \
+    currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjustedWeight)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel is in the mask
-        if (maskPtr[voxel] > -1) {
+        if (mask[voxel] > -1) {
             // Read referenceImage and warpedImage probabilities and compute the ratio
-            tempRefValue = currentRefPtr[voxel] + 1e-16;
-            tempWarValue = currentWarPtr[voxel] + 1e-16;
-            tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16);
+            const double tempRefValue = currentRefPtr[voxel] + 1e-16;
+            const double tempWarValue = currentWarPtr[voxel] + 1e-16;
+            double tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16);
             // Check if the intensity ratio is defined and different from zero
             if (tempValue == tempValue &&
                 tempValue != std::numeric_limits<double>::infinity() &&
                 tempValue > 0) {
-                tempValue = tempRefValue / tempWarValue;
-                tempValue *= adjusted_weight;
+                tempValue = (tempRefValue / tempWarValue) * adjustedWeight;
 
                 // Jacobian modulation if the Jacobian determinant image is defined
-                if (jacobianDetImg != nullptr)
+                if (jacPtr)
                     tempValue *= jacPtr[voxel];
 
                 // Ensure that gradient of the warpedImage image along x-axis is not NaN
-                tempGradX = currentGradPtrX[voxel];
+                const double& tempGradX = currentGradPtrX[voxel];
                 if (tempGradX == tempGradX)
                     // Update the gradient along the x-axis
-                    measureGradPtrX[voxel] -= (DataType)(tempValue * tempGradX);
+                    measureGradPtrX[voxel] -= static_cast<DataType>(tempValue * tempGradX);
 
                 // Ensure that gradient of the warpedImage image along y-axis is not NaN
-                tempGradY = currentGradPtrY[voxel];
+                const double& tempGradY = currentGradPtrY[voxel];
                 if (tempGradY == tempGradY)
                     // Update the gradient along the y-axis
-                    measureGradPtrY[voxel] -= (DataType)(tempValue * tempGradY);
+                    measureGradPtrY[voxel] -= static_cast<DataType>(tempValue * tempGradY);
 
                 // Check if the current images are 3D
                 if (referenceImage->nz > 1) {
                     // Ensure that gradient of the warpedImage image along z-axis is not NaN
-                    tempGradZ = currentGradPtrZ[voxel];
+                    const double& tempGradZ = currentGradPtrZ[voxel];
                     if (tempGradZ == tempGradZ)
                         // Update the gradient along the z-axis
-                        measureGradPtrZ[voxel] -= (DataType)(tempValue * tempGradZ);
+                        measureGradPtrZ[voxel] -= static_cast<DataType>(tempValue * tempGradZ);
                 }
             }
         }
     }
-    if (MrClean) free(maskPtr);
 }
 /* *************************************************************** */
-void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Check if all required input images are of the same data type
-    int dtype = this->referenceImage->datatype;
-    if (this->warpedImage->datatype != dtype ||
-        this->warpedGradient->datatype != dtype ||
-        this->voxelBasedGradient->datatype != dtype) {
-        reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    // Compute the gradient of the kld for the forward transformation
-    switch (dtype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getKLDivergenceVoxelBasedGradient<float>(this->referenceImage,
-                                                     this->warpedImage,
-                                                     this->warpedGradient,
-                                                     this->voxelBasedGradient,
-                                                     nullptr, // TODO this->forwardJacDetImagePointer,
-                                                     this->referenceMask,
-                                                     currentTimepoint,
-                                                     this->timePointWeight[currentTimepoint]);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getKLDivergenceVoxelBasedGradient<double>(this->referenceImage,
-                                                      this->warpedImage,
-                                                      this->warpedGradient,
-                                                      this->voxelBasedGradient,
-                                                      nullptr, // TODO this->forwardJacDetImagePointer,
-                                                      this->referenceMask,
-                                                      currentTimepoint,
-                                                      this->timePointWeight[currentTimepoint]);
-        break;
-    default:
-        reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
-    }
-    // Compute the gradient of the kld for the backward transformation
-    if (this->isSymmetric) {
-        dtype = this->floatingImage->datatype;
-        if (this->warpedImageBw->datatype != dtype ||
-            this->warpedGradientBw->datatype != dtype ||
-            this->voxelBasedGradientBw->datatype != dtype) {
-            reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Input images are expected to be of the same type");
-            reg_exit();
-        }
-        // Compute the gradient of the nmi for the backward transformation
-        switch (dtype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getKLDivergenceVoxelBasedGradient<float>(this->floatingImage,
-                                                         this->warpedImageBw,
-                                                         this->warpedGradientBw,
-                                                         this->voxelBasedGradientBw,
-                                                         nullptr, // TODO this->backwardJacDetImagePointer,
-                                                         this->floatingMask,
-                                                         currentTimepoint,
-                                                         this->timePointWeight[currentTimepoint]);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getKLDivergenceVoxelBasedGradient<double>(this->floatingImage,
-                                                          this->warpedImageBw,
-                                                          this->warpedGradientBw,
-                                                          this->voxelBasedGradientBw,
-                                                          nullptr, // TODO this->backwardJacDetImagePointer,
-                                                          this->floatingMask,
-                                                          currentTimepoint,
-                                                          this->timePointWeight[currentTimepoint]);
-            break;
-        default:
-            reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
-        }
-    }
+void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
+                                            nifti_image *warpedImage,
+                                            nifti_image *warpedGradient,
+                                            nifti_image *voxelBasedGradient,
+                                            nifti_image *jacobianDetImg,
+                                            int *mask,
+                                            int currentTimepoint,
+                                            double timepointWeight) {
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        reg_getKLDivergenceVoxelBasedGradient<RefImgDataType>(referenceImage,
+                                                              warpedImage,
+                                                              warpedGradient,
+                                                              voxelBasedGradient,
+                                                              jacobianDetImg,
+                                                              mask,
+                                                              currentTimepoint,
+                                                              timepointWeight);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
+                                             this->warpedImage,
+                                             this->warpedGradient,
+                                             this->voxelBasedGradient,
+                                             nullptr, // TODO this->forwardJacDetImagePointer,
+                                             this->referenceMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
+}
+/* *************************************************************** */
+void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->warpedImageBw,
+                                             this->warpedGradientBw,
+                                             this->voxelBasedGradientBw,
+                                             nullptr, // TODO this->backwardJacDetImagePointer,
+                                             this->floatingMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index ae5f4cb2..1f4b30de 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -38,52 +38,9 @@ class reg_kld: public reg_measure {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the kld value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based kld gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based kld gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based kld gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
 };
 /* *************************************************************** */
-
-/** @brief Computes and returns the KLD between two input image
- * @param reference First input image to use to compute the metric
- * @param warped Second input image to use to compute the metric
- * @param activeTimePoint Specified which time point volumes have to be considered
- * @param jacobianDeterminantImage Image that contains the Jacobian
- * determinant of a transformation at every voxel position. This
- * image is used to modulate the KLD. The argument is ignored if the
- * pointer is set to nullptr
- * @param mask Array that contains a mask to specify which voxel
- * should be considered
- * @return Returns the computed sum squared difference
- */
-extern "C++" template <class DataType>
-double reg_getKLDivergence(const nifti_image *reference,
-                           const nifti_image *warped,
-                           const double *timePointWeight,
-                           const nifti_image *jacobianDeterminantImage,
-                           const int *mask);
-/* *************************************************************** */
-
-/** @brief Compute a voxel based gradient of the sum squared difference.
- * @param reference First input image to use to compute the metric
- * @param warped Second input image to use to compute the metric
- * @param activeTimePoint Specified which time point volumes have to be considered
- * @param warpedGradient Spatial gradient of the input result image
- * @param KLdivGradient Output image that will be updated with the
- * value of the KLD gradient
- * @param jacobianDeterminantImage Image that contains the Jacobian
- * determinant of a transformation at every voxel position. This
- * image is used to modulate the KLD. The argument is ignored if the
- * pointer is set to nullptr
- * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to nullptr, all voxels are considered
- */
-extern "C++" template <class DataType>
-void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference,
-                                           nifti_image *warped,
-                                           nifti_image *warpedGradient,
-                                           nifti_image *KLdivGradient,
-                                           nifti_image *jacobianDeterminantImage,
-                                           int *mask,
-                                           int currentTimepoint,
-                                           double timepointWeight);
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 2d1c3848..f21fe4b3 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -78,63 +78,6 @@ reg_lncc::~reg_lncc() {
     this->backwardMask = nullptr;
 }
 /* *************************************************************** */
-template <class DataType>
-void UpdateLocalStatImages(const nifti_image *refImage,
-                           const nifti_image *warImage,
-                           nifti_image *meanImage,
-                           nifti_image *warpedMeanImage,
-                           nifti_image *sdevImage,
-                           nifti_image *warpedSdevImage,
-                           const int *refMask,
-                           int *combinedMask,
-                           const float *kernelStandardDeviation,
-                           const int& kernelType,
-                           const int& currentTimepoint) {
-    // Generate the combined mask to ignore all NaN values
-#ifdef _WIN32
-    long voxel;
-    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3);
-#else
-    size_t voxel;
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
-#endif
-    memcpy(combinedMask, refMask, voxelNumber * sizeof(int));
-    reg_tools_removeNanFromMask(refImage, combinedMask);
-    reg_tools_removeNanFromMask(warImage, combinedMask);
-
-    const DataType *origRefPtr = static_cast<DataType*>(refImage->data);
-    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
-    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
-    memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
-    memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
-
-    reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage);
-    reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask);
-    reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask);
-
-    const DataType *origWarPtr = static_cast<DataType*>(warImage->data);
-    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
-    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
-    memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
-    memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
-
-    reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage);
-    reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
-    reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-    shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr)
-#endif
-    for (voxel = 0; voxel < voxelNumber; ++voxel) {
-        // G*(I^2) - (G*I)^2
-        sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel]));
-        warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel]));
-        // Stabilise the computation
-        if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0;
-        if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0;
-    }
-}
-/* *************************************************************** */
 void reg_lncc::InitialiseMeasure(nifti_image *refImg,
                                  nifti_image *floImg,
                                  int *refMask,
@@ -253,6 +196,63 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
+template <class DataType>
+void UpdateLocalStatImages(const nifti_image *refImage,
+                           const nifti_image *warImage,
+                           nifti_image *meanImage,
+                           nifti_image *warpedMeanImage,
+                           nifti_image *sdevImage,
+                           nifti_image *warpedSdevImage,
+                           const int *refMask,
+                           int *combinedMask,
+                           const float *kernelStandardDeviation,
+                           const int& kernelType,
+                           const int& currentTimepoint) {
+    // Generate the combined mask to ignore all NaN values
+#ifdef _WIN32
+    long voxel;
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3);
+#else
+    size_t voxel;
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
+#endif
+    memcpy(combinedMask, refMask, voxelNumber * sizeof(int));
+    reg_tools_removeNanFromMask(refImage, combinedMask);
+    reg_tools_removeNanFromMask(warImage, combinedMask);
+
+    const DataType *origRefPtr = static_cast<DataType*>(refImage->data);
+    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
+    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
+    memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
+    memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
+
+    reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage);
+    reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask);
+
+    const DataType *origWarPtr = static_cast<DataType*>(warImage->data);
+    DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
+    DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
+    memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
+    memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
+
+    reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage);
+    reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
+    reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+    shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr)
+#endif
+    for (voxel = 0; voxel < voxelNumber; ++voxel) {
+        // G*(I^2) - (G*I)^2
+        sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel]));
+        warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel]));
+        // Stabilise the computation
+        if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0;
+        if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0;
+    }
+}
+/* *************************************************************** */
 template<class DataType>
 double reg_getLnccValue(const nifti_image *referenceImage,
                         const nifti_image *meanImage,
@@ -272,7 +272,6 @@ double reg_getLnccValue(const nifti_image *referenceImage,
     size_t voxel;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
-
     // Compute the local correlation
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
     const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
@@ -291,7 +290,7 @@ double reg_getLnccValue(const nifti_image *referenceImage,
 
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
-    double lnccSum = 0, lncc;
+    double lnccSum = 0;
     size_t activeVoxelNumber = 0;
 
     // Iteration over all voxels
@@ -299,13 +298,12 @@ double reg_getLnccValue(const nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \
     sdevImgPtr,warSdevPtr,correlationPtr) \
-    private(lncc) \
     reduction(+:lnccSum, activeVoxelNumber)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]);
+            const double lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]);
             if (lncc == lncc && !isinf(lncc)) {
                 lnccSum += fabs(lncc);
                 ++activeVoxelNumber;
@@ -322,7 +320,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  nifti_image *warpedMeanImage,
                                  nifti_image *warpedSdevImage,
                                  const int *referenceMask,
-                                 int *combinedMask,
+                                 int *forwardMask,
                                  const float *kernelStandardDeviation,
                                  nifti_image *correlationImage,
                                  const int& kernelType,
@@ -341,7 +339,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                                       sdevImage,
                                                       warpedSdevImage,
                                                       referenceMask,
-                                                      combinedMask,
+                                                      forwardMask,
                                                       kernelStandardDeviation,
                                                       kernelType,
                                                       currentTimepoint);
@@ -352,7 +350,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                                         warpedImage,
                                                         warpedMeanImage,
                                                         warpedSdevImage,
-                                                        combinedMask,
+                                                        forwardMask,
                                                         kernelStandardDeviation,
                                                         correlationImage,
                                                         kernelType,
@@ -397,20 +395,20 @@ double reg_lncc::GetSimilarityMeasureValueBw() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
-                                   nifti_image *meanImage,
-                                   nifti_image *sdevImage,
-                                   nifti_image *warpedImage,
+void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
+                                   const nifti_image *meanImage,
+                                   const nifti_image *sdevImage,
+                                   const nifti_image *warpedImage,
                                    nifti_image *warpedMeanImage,
                                    nifti_image *warpedSdevImage,
-                                   int *combinedMask,
-                                   float *kernelStandardDeviation,
+                                   const int *combinedMask,
+                                   const float *kernelStandardDeviation,
                                    nifti_image *correlationImage,
-                                   nifti_image *warpedGradient,
-                                   nifti_image *measureGradientImage,
-                                   int kernelType,
-                                   int currentTimepoint,
-                                   double timepointWeight) {
+                                   const nifti_image *warpedGradient,
+                                   nifti_image *measureGradient,
+                                   const int& kernelType,
+                                   const int& currentTimepoint,
+                                   const double& timepointWeight) {
 #ifdef _WIN32
     long voxel;
     long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -418,17 +416,16 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
     size_t voxel;
     size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 #endif
-
     // Compute the local correlation
-    DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
+    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
+    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
 
-    DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
+    const DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
-    DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
+    const DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
     DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
     DataType *correlationPtr = static_cast<DataType*>(correlationImage->data);
 
@@ -437,8 +434,6 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
 
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
 
-    double refMeanValue, warMeanValue, refSdevValue, warSdevValue, correlaValue;
-    double temp1, temp2, temp3;
     size_t activeVoxelNumber = 0;
 
     // Iteration over all voxels
@@ -446,28 +441,23 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \
     sdevImgPtr,warSdevPtr,correlationPtr) \
-    private(refMeanValue,warMeanValue,refSdevValue, \
-    warSdevValue, correlaValue, temp1, temp2, temp3) \
     reduction(+:activeVoxelNumber)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            refMeanValue = meanImgPtr[voxel];
-            warMeanValue = warMeanPtr[voxel];
-            refSdevValue = sdevImgPtr[voxel];
-            warSdevValue = warSdevPtr[voxel];
-            correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
-
-            temp1 = 1.0 / (refSdevValue * warSdevValue);
-            temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
-            temp3 = (correlaValue * warMeanValue) /
-                (refSdevValue * warSdevValue * warSdevValue * warSdevValue)
-                -
-                refMeanValue / (refSdevValue * warSdevValue);
-            if (temp1 == temp1 && isinf(temp1) == 0 &&
-                temp2 == temp2 && isinf(temp2) == 0 &&
-                temp3 == temp3 && isinf(temp3) == 0) {
+            const double& refMeanValue = meanImgPtr[voxel];
+            const double& warMeanValue = warMeanPtr[voxel];
+            const double& refSdevValue = sdevImgPtr[voxel];
+            const double& warSdevValue = warSdevPtr[voxel];
+            const double correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
+            double temp1 = 1.0 / (refSdevValue * warSdevValue);
+            double temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
+            double temp3 = (correlaValue * warMeanValue) / (refSdevValue * warSdevValue * warSdevValue * warSdevValue)
+                - refMeanValue / (refSdevValue * warSdevValue);
+            if (temp1 == temp1 && !isinf(temp1) &&
+                temp2 == temp2 && !isinf(temp2) &&
+                temp3 == temp3 && !isinf(temp3)) {
                 // Derivative of the absolute function
                 if (correlaValue < 0) {
                     temp1 *= -1;
@@ -483,39 +473,32 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
     }
 
     //adjust weight for number of voxels
-    double adjusted_weight = timepointWeight / activeVoxelNumber;
+    const double adjustedWeight = timepointWeight / activeVoxelNumber;
 
     // Smooth the newly computed values
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
     reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask);
     reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask);
-    DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
+    DataType *measureGradPtrX = static_cast<DataType*>(measureGradient->data);
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DataType *measureGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr;
 
     // Create pointers to the spatial gradient of the warped image
-    DataType *warpGradPtrX = static_cast<DataType*>(warpedGradient->data);
-    DataType *warpGradPtrY = &warpGradPtrX[voxelNumber];
-    DataType *warpGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        warpGradPtrZ = &warpGradPtrY[voxelNumber];
+    const DataType *warpGradPtrX = static_cast<DataType*>(warpedGradient->data);
+    const DataType *warpGradPtrY = &warpGradPtrX[voxelNumber];
+    const DataType *warpGradPtrZ = referenceImage->nz > 1 ? &warpGradPtrY[voxelNumber] : nullptr;
 
-    double common;
     // Iteration over all voxels
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \
     warMeanPtr,warSdevPtr,correlationPtr,measureGradPtrX,measureGradPtrY, \
-    measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \
-    private(common)
+    measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjustedWeight)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel];
-            common *= adjusted_weight;
+            const double common = (warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel]) * adjustedWeight;
             measureGradPtrX[voxel] -= static_cast<DataType>(warpGradPtrX[voxel] * common);
             measureGradPtrY[voxel] -= static_cast<DataType>(warpGradPtrY[voxel] * common);
             if (warpGradPtrZ != nullptr)
@@ -523,69 +506,77 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage,
         }
     }
     // Check for NaN
-    DataType val;
 #ifdef _WIN32
-    voxelNumber = (long)measureGradientImage->nvox;
+    voxelNumber = (long)measureGradient->nvox;
 #else
-    voxelNumber = measureGradientImage->nvox;
+    voxelNumber = measureGradient->nvox;
 #endif
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber,measureGradPtrX) \
-    private(val)
+    shared(voxelNumber, measureGradPtrX)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
-        val = measureGradPtrX[voxel];
-        if (val != val || isinf(val) != 0)
+        const DataType& val = measureGradPtrX[voxel];
+        if (val != val || isinf(val))
             measureGradPtrX[voxel] = 0;
     }
 }
 /* *************************************************************** */
-void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Compute the mean and variance of the reference and warped floating
-    switch (this->referenceImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        UpdateLocalStatImages<float>(this->referenceImage,
-                                     this->warpedImage,
-                                     this->meanImage,
-                                     this->warpedMeanImage,
-                                     this->sdevImage,
-                                     this->warpedSdevImage,
-                                     this->referenceMask,
-                                     this->forwardMask,
-                                     this->kernelStandardDeviation,
-                                     this->kernelType,
-                                     currentTimepoint);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        UpdateLocalStatImages<double>(this->referenceImage,
-                                      this->warpedImage,
-                                      this->meanImage,
-                                      this->warpedMeanImage,
-                                      this->sdevImage,
-                                      this->warpedSdevImage,
-                                      this->referenceMask,
-                                      this->forwardMask,
-                                      this->kernelStandardDeviation,
-                                      this->kernelType,
-                                      currentTimepoint);
-        break;
-    }
-
-    // Compute the LNCC gradient - Forward
-    switch (this->referenceImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedLnccGradient<float>(this->referenceImage,
+void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
+                                            nifti_image *meanImage,
+                                            nifti_image *sdevImage,
+                                            const nifti_image *warpedImage,
+                                            nifti_image *warpedMeanImage,
+                                            nifti_image *warpedSdevImage,
+                                            const int *referenceMask,
+                                            int *forwardMask,
+                                            const float *kernelStandardDeviation,
+                                            nifti_image *correlationImage,
+                                            const nifti_image *warpedGradient,
+                                            nifti_image *measureGradient,
+                                            const int& kernelType,
+                                            const int& currentTimepoint,
+                                            const double& timepointWeight) {
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        // Compute the mean and variance of the reference and warped floating
+        UpdateLocalStatImages<RefImgDataType>(referenceImage,
+                                              warpedImage,
+                                              meanImage,
+                                              warpedMeanImage,
+                                              sdevImage,
+                                              warpedSdevImage,
+                                              referenceMask,
+                                              forwardMask,
+                                              kernelStandardDeviation,
+                                              kernelType,
+                                              currentTimepoint);
+        // Compute the LNCC gradient
+        reg_getVoxelBasedLnccGradient<RefImgDataType>(referenceImage,
+                                                      meanImage,
+                                                      sdevImage,
+                                                      warpedImage,
+                                                      warpedMeanImage,
+                                                      warpedSdevImage,
+                                                      forwardMask,
+                                                      kernelStandardDeviation,
+                                                      correlationImage,
+                                                      warpedGradient,
+                                                      measureGradient,
+                                                      kernelType,
+                                                      currentTimepoint,
+                                                      timepointWeight);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->meanImage,
                                              this->sdevImage,
                                              this->warpedImage,
                                              this->warpedMeanImage,
                                              this->warpedSdevImage,
+                                             this->referenceMask,
                                              this->forwardMask,
                                              this->kernelStandardDeviation,
                                              this->correlationImage,
@@ -594,89 +585,23 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                              this->kernelType,
                                              currentTimepoint,
                                              this->timePointWeight[currentTimepoint]);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedLnccGradient<double>(this->referenceImage,
-                                              this->meanImage,
-                                              this->sdevImage,
-                                              this->warpedImage,
-                                              this->warpedMeanImage,
-                                              this->warpedSdevImage,
-                                              this->forwardMask,
-                                              this->kernelStandardDeviation,
-                                              this->correlationImage,
-                                              this->warpedGradient,
-                                              this->voxelBasedGradient,
-                                              this->kernelType,
-                                              currentTimepoint,
-                                              this->timePointWeight[currentTimepoint]);
-        break;
-    }
-    if (this->isSymmetric) {
-        // Compute the mean and variance of the floating and warped reference
-        switch (this->floatingImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            UpdateLocalStatImages<float>(this->floatingImage,
-                                         this->warpedImageBw,
-                                         this->meanImageBw,
-                                         this->warpedMeanImageBw,
-                                         this->sdevImageBw,
-                                         this->warpedSdevImageBw,
-                                         this->floatingMask,
-                                         this->backwardMask,
-                                         this->kernelStandardDeviation,
-                                         this->kernelType,
-                                         currentTimepoint);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            UpdateLocalStatImages<double>(this->floatingImage,
-                                          this->warpedImageBw,
-                                          this->meanImageBw,
-                                          this->warpedMeanImageBw,
-                                          this->sdevImageBw,
-                                          this->warpedSdevImageBw,
-                                          this->floatingMask,
-                                          this->backwardMask,
-                                          this->kernelStandardDeviation,
-                                          this->kernelType,
-                                          currentTimepoint);
-            break;
-        }
-        // Compute the LNCC gradient - Backward
-        switch (this->floatingImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedLnccGradient<float>(this->floatingImage,
-                                                 this->meanImageBw,
-                                                 this->sdevImageBw,
-                                                 this->warpedImageBw,
-                                                 this->warpedMeanImageBw,
-                                                 this->warpedSdevImageBw,
-                                                 this->backwardMask,
-                                                 this->kernelStandardDeviation,
-                                                 this->correlationImageBw,
-                                                 this->warpedGradientBw,
-                                                 this->voxelBasedGradientBw,
-                                                 this->kernelType,
-                                                 currentTimepoint,
-                                                 this->timePointWeight[currentTimepoint]);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedLnccGradient<double>(this->floatingImage,
-                                                  this->meanImageBw,
-                                                  this->sdevImageBw,
-                                                  this->warpedImageBw,
-                                                  this->warpedMeanImageBw,
-                                                  this->warpedSdevImageBw,
-                                                  this->backwardMask,
-                                                  this->kernelStandardDeviation,
-                                                  this->correlationImageBw,
-                                                  this->warpedGradientBw,
-                                                  this->voxelBasedGradientBw,
-                                                  this->kernelType,
-                                                  currentTimepoint,
-                                                  this->timePointWeight[currentTimepoint]);
-            break;
-        }
-    }
+}
+/* *************************************************************** */
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->meanImageBw,
+                                             this->sdevImageBw,
+                                             this->warpedImageBw,
+                                             this->warpedMeanImageBw,
+                                             this->warpedSdevImageBw,
+                                             this->floatingMask,
+                                             this->backwardMask,
+                                             this->kernelStandardDeviation,
+                                             this->correlationImageBw,
+                                             this->warpedGradientBw,
+                                             this->voxelBasedGradientBw,
+                                             this->kernelType,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index 6c7dda5a..b59b48fd 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -38,8 +38,10 @@ class reg_lncc: public reg_measure {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the lncc value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based lncc gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based lncc gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based lncc gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
     /// @brief Set the kernel standard deviation
     virtual void SetKernelStandardDeviation(int t, float stddev) {
         this->kernelStandardDeviation[t] = stddev;
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 56c42d50..12876385 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -98,15 +98,54 @@ class reg_measure {
         return sim;
     }
 
-    /// @brief Compute the voxel based measure of similarity gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
+    /// @brief Compute the forward voxel-based measure of similarity gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) = 0;
+    /// @brief Compute the backward voxel-based measure of similarity gradient
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) = 0;
+    /// @brief Compute the voxel-based measure of similarity gradient
+    void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {  // Do not override
+        // Check if the specified time point exists and is active
         if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) {
             reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
             reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
             reg_exit();
         }
+        if (this->timePointWeight[currentTimepoint] == 0)
+            return;
+        // Check if all required input images are of the same data type
+        int dtype = this->referenceImage->datatype;
+        if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
+            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
+            reg_print_msg_error("Input images are expected to be of floating precision type");
+            reg_exit();
+        }
+        if (this->warpedImage->datatype != dtype ||
+            this->warpedGradient->datatype != dtype ||
+            this->voxelBasedGradient->datatype != dtype) {
+            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
+            reg_print_msg_error("Input images are expected to be of the same type");
+            reg_exit();
+        }
+        // Compute the gradient
+        GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint);
+        if (this->isSymmetric) {
+            dtype = this->floatingImage->datatype;
+            if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
+                reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
+                reg_print_msg_error("Input images are expected to be of floating precision type");
+                reg_exit();
+            }
+            if (this->warpedImageBw->datatype != dtype ||
+                this->warpedGradientBw->datatype != dtype ||
+                this->voxelBasedGradientBw->datatype != dtype) {
+                reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
+                reg_print_msg_error("Input images are expected to be of the same type");
+                reg_exit();
+            }
+            GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint);
+        }
     }
-    virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {}
+    virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {}
     virtual void SetTimepointWeight(int timepoint, double weight) {
         this->timePointWeight[timepoint] = weight;
     }
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 7b289c27..abefc7f5 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -22,38 +22,29 @@ void ShiftImage(const nifti_image *inputImage,
                 const int& tz) {
     const DataType* inputData = static_cast<DataType*>(inputImage->data);
     DataType* shiftImageData = static_cast<DataType*>(shiftedImage->data);
-
-    int currentIndex;
-    int shiftedIndex;
-
-    int x, y, z, old_x, old_y, old_z;
-
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz) \
-    private(x, y, old_x, old_y, old_z, shiftedIndex, currentIndex)
+    shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz)
 #endif
-    for (z = 0; z < shiftedImage->nz; z++) {
-        currentIndex = z * shiftedImage->nx * shiftedImage->ny;
-        old_z = z - tz;
-        for (y = 0; y < shiftedImage->ny; y++) {
-            old_y = y - ty;
-            for (x = 0; x < shiftedImage->nx; x++) {
-                old_x = x - tx;
-                if (old_x > -1 && old_x < inputImage->nx &&
-                    old_y > -1 && old_y < inputImage->ny &&
-                    old_z > -1 && old_z < inputImage->nz) {
-                    shiftedIndex = (old_z * inputImage->ny + old_y) * inputImage->nx + old_x;
+    for (int z = 0; z < shiftedImage->nz; z++) {
+        int currentIndex = z * shiftedImage->nx * shiftedImage->ny;
+        const int oldZ = z - tz;
+        for (int y = 0; y < shiftedImage->ny; y++) {
+            const int oldY = y - ty;
+            for (int x = 0; x < shiftedImage->nx; x++) {
+                const int oldX = x - tx;
+                if (-1 < oldX && oldX < inputImage->nx &&
+                    -1 < oldY && oldY < inputImage->ny &&
+                    -1 < oldZ && oldZ < inputImage->nz) {
+                    const int shiftedIndex = (oldZ * inputImage->ny + oldY) * inputImage->nx + oldX;
                     if (mask[shiftedIndex] > -1) {
                         shiftImageData[currentIndex] = inputData[shiftedIndex];
                     } // mask is not defined
                     else {
-                        //shiftImageData[currentIndex]=std::numeric_limits<DataType>::quiet_NaN();
                         shiftImageData[currentIndex] = 0;
                     }
                 } // outside of the image
                 else {
-                    //shiftImageData[currentIndex]=std::numeric_limits<DataType>::quiet_NaN();
                     shiftImageData[currentIndex] = 0;
                 }
                 currentIndex++;
@@ -75,7 +66,6 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
     size_t voxelIndex;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3);
 #endif
-
     // Create a pointer to the descriptor image
     DataType* mindImgDataPtr = static_cast<DataType*>(mindImage->data);
 
@@ -112,7 +102,6 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
         reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage);
         reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
         reg_tools_addImageToImage(meanImage, diffImage, meanImage);
-
         // Store the current descriptor
         const size_t index = i * diffImage->nvox;
         memcpy(&mindImgDataPtr[index], diffImage->data, diffImage->nbyper * diffImage->nvox);
@@ -121,25 +110,20 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
     reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr);
 
     // Compute the MIND descriptor
-    int mindIndex;
-    DataType meanValue, maxDesc, descValue;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, \
-    mindImgDataPtr) \
-    private(meanValue, maxDesc, descValue, mindIndex)
+    shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, mindImgDataPtr)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
         if (mask[voxelIndex] > -1) {
             // Get the mean value for the current voxel
-            meanValue = meanImgDataPtr[voxelIndex];
-            if (meanValue == 0) {
+            DataType meanValue = meanImgDataPtr[voxelIndex];
+            if (meanValue == 0)
                 meanValue = std::numeric_limits<DataType>::epsilon();
-            }
-            maxDesc = 0;
-            mindIndex = voxelIndex;
+            DataType maxDesc = 0;
+            int mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                descValue = (DataType)exp(-mindImgDataPtr[mindIndex] / meanValue);
+                const DataType descValue = exp(-mindImgDataPtr[mindIndex] / meanValue);
                 mindImgDataPtr[mindIndex] = descValue;
                 maxDesc = std::max(maxDesc, descValue);
                 mindIndex += voxelNumber;
@@ -147,13 +131,12 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                descValue = mindImgDataPtr[mindIndex];
+                const DataType& descValue = mindImgDataPtr[mindIndex];
                 mindImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
         } // mask
     } // voxIndex
-    // Mr Propre
     nifti_image_free(diffImage);
     nifti_image_free(shiftedImage);
     nifti_image_free(meanImage);
@@ -166,28 +149,18 @@ void GetMindImageDescriptor(const nifti_image *inputImage,
                             const int *mask,
                             const int& descriptorOffset,
                             const int& currentTimepoint) {
-#ifndef NDEBUG
-    reg_print_fct_debug("GetMindImageDescriptor()");
-#endif
     if (inputImage->datatype != mindImage->datatype) {
         reg_print_fct_error("reg_mind::GetMindImageDescriptor");
-        reg_print_msg_error("The input image and the MIND image must have the same datatype !");
+        reg_print_msg_error("The input image and the MIND image must have the same datatype");
         reg_exit();
     }
-
-    switch (inputImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        GetMindImageDescriptorCore<float>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        GetMindImageDescriptorCore<double>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
-        break;
-    default:
-        reg_print_fct_error("GetMindImageDescriptor");
-        reg_print_msg_error("Input image datatype not supported");
-        reg_exit();
-        break;
-    }
+    std::visit([&](auto&& imgType) {
+        using ImgType = std::decay_t<decltype(imgType)>;
+        GetMindImageDescriptorCore<ImgType>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
+    }, NiftiImage::getFloatingDataType(inputImage));
+#ifndef NDEBUG
+    reg_print_fct_debug("GetMindImageDescriptor()");
+#endif
 }
 /* *************************************************************** */
 template <class DataType>
@@ -203,7 +176,6 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
     size_t voxelIndex;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3);
 #endif
-
     // Create a pointer to the descriptor image
     DataType* mindSscImgDataPtr = static_cast<DataType*>(mindSscImage->data);
 
@@ -223,11 +195,11 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
     nifti_image *shiftedImage = nifti_dup(*currentInputImage, false);
 
     // Define the sigma for the convolution
-    float sigma = -0.5; // negative value denotes voxel width
+    const float sigma = -0.5; // negative value denotes voxel width
 
-    //2D version
-    int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2;
-    int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4;
+    // 2D version
+    const int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2;
+    const int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4;
 
     // Allocation of the difference image
     //std::vector<nifti_image *> vectNiftiImage;
@@ -253,14 +225,11 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
         reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
 
         for (int j = 0; j < 2; j++) {
-            ShiftImage<DataType>(diffImage, diffImageShifted, maskDiffImage,
-                                 tx[compteurId], ty[compteurId], tz[compteurId]);
-
+            ShiftImage<DataType>(diffImage, diffImageShifted, maskDiffImage, tx[compteurId], ty[compteurId], tz[compteurId]);
             reg_tools_addImageToImage(meanImg, diffImageShifted, meanImg);
             // Store the current descriptor
             const size_t index = compteurId * diffImageShifted->nvox;
-            memcpy(&mindSscImgDataPtr[index], diffImageShifted->data,
-                   diffImageShifted->nbyper * diffImageShifted->nvox);
+            memcpy(&mindSscImgDataPtr[index], diffImageShifted->data, diffImageShifted->nbyper * diffImageShifted->nvox);
             compteurId++;
         }
     }
@@ -268,24 +237,20 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
     reg_tools_divideValueToImage(meanImg, meanImg, lengthDescriptor);
 
     // Compute the MIND-SSC descriptor
-    int mindIndex;
-    DataType meanValue, maxDesc, descValue;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(voxelNumber, lengthDescriptor, samplingNbr, mask, meanImgDataPtr, mindSscImgDataPtr) \
-    private(meanValue, maxDesc, descValue, mindIndex)
+    shared(voxelNumber, lengthDescriptor, mask, meanImgDataPtr, mindSscImgDataPtr)
 #endif
     for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) {
         if (mask[voxelIndex] > -1) {
             // Get the mean value for the current voxel
-            meanValue = meanImgDataPtr[voxelIndex];
-            if (meanValue == 0) {
+            DataType meanValue = meanImgDataPtr[voxelIndex];
+            if (meanValue == 0)
                 meanValue = std::numeric_limits<DataType>::epsilon();
-            }
-            maxDesc = 0;
-            mindIndex = voxelIndex;
+            DataType maxDesc = 0;
+            int mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                descValue = (DataType)exp(-mindSscImgDataPtr[mindIndex] / meanValue);
+                const DataType descValue = exp(-mindSscImgDataPtr[mindIndex] / meanValue);
                 mindSscImgDataPtr[mindIndex] = descValue;
                 maxDesc = std::max(maxDesc, descValue);
                 mindIndex += voxelNumber;
@@ -293,13 +258,12 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                descValue = mindSscImgDataPtr[mindIndex];
+                const DataType& descValue = mindSscImgDataPtr[mindIndex];
                 mindSscImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
         } // mask
     } // voxIndex
-    // Mr Propre
     nifti_image_free(diffImageShifted);
     free(maskDiffImage);
     nifti_image_free(diffImage);
@@ -314,28 +278,18 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                const int *mask,
                                const int& descriptorOffset,
                                const int& currentTimepoint) {
-#ifndef NDEBUG
-    reg_print_fct_debug("GetMindSscImageDescriptor()");
-#endif
     if (inputImage->datatype != mindSscImage->datatype) {
         reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor");
         reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!");
         reg_exit();
     }
-
-    switch (inputImage->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        GetMindSscImageDescriptorCore<float>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        GetMindSscImageDescriptorCore<double>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
-        break;
-    default:
-        reg_print_fct_error("GetMindSscImageDescriptor");
-        reg_print_msg_error("Input image datatype not supported");
-        reg_exit();
-        break;
-    }
+    std::visit([&](auto&& imgType) {
+        using ImgType = std::decay_t<decltype(imgType)>;
+        GetMindSscImageDescriptorCore<ImgType>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
+    }, NiftiImage::getFloatingDataType(inputImage));
+#ifndef NDEBUG
+    reg_print_fct_debug("GetMindSscImageDescriptor()");
+#endif
 }
 /* *************************************************************** */
 reg_mind::reg_mind(): reg_ssd() {
@@ -350,14 +304,6 @@ reg_mind::reg_mind(): reg_ssd() {
 #endif
 }
 /* *************************************************************** */
-void reg_mind::SetDescriptorOffset(int val) {
-    this->descriptorOffset = val;
-}
-/* *************************************************************** */
-int reg_mind::GetDescriptorOffset() {
-    return this->descriptorOffset;
-}
-/* *************************************************************** */
 reg_mind::~reg_mind() {
     if (this->referenceImageDescriptor != nullptr) {
         nifti_image_free(this->referenceImageDescriptor);
@@ -469,7 +415,7 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                  double *timePointWeightDescriptor,
                                  nifti_image *jacobianDetImage,
                                  float *currentValue,
-                                 int descriptorOffset,
+                                 const int& descriptorOffset,
                                  const int& referenceTimePoint,
                                  const int& mindType) {
     if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 &&
@@ -538,161 +484,80 @@ double reg_mind::GetSimilarityMeasureValueBw() {
                                        this->mindType);
 }
 /* *************************************************************** */
-void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Create a combined mask to ignore masked and undefined values
-    size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3);
-    int *combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-    memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int));
-    reg_tools_removeNanFromMask(this->referenceImage, combinedMask);
-    reg_tools_removeNanFromMask(this->warpedImage, combinedMask);
-
-    if (this->mindType == MIND_TYPE) {
-        // Compute the reference image descriptors
-        GetMindImageDescriptor(this->referenceImage,
-                               this->referenceImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               currentTimepoint);
-        // Compute the warped floating image descriptors
-        GetMindImageDescriptor(this->warpedImage,
-                               this->warpedFloatingImageDescriptor,
-                               combinedMask,
-                               this->descriptorOffset,
-                               currentTimepoint);
-    } else if (this->mindType == MINDSSC_TYPE) {
-        // Compute the reference image descriptors
-        GetMindSscImageDescriptor(this->referenceImage,
-                                  this->referenceImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  currentTimepoint);
-        // Compute the warped floating image descriptors
-        GetMindSscImageDescriptor(this->warpedImage,
-                                  this->warpedFloatingImageDescriptor,
-                                  combinedMask,
-                                  this->descriptorOffset,
-                                  currentTimepoint);
-    }
+void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
+                                            nifti_image *referenceImageDescriptor,
+                                            const int *referenceMask,
+                                            nifti_image *warpedImage,
+                                            nifti_image *warpedGradient,
+                                            nifti_image *warpedFloatingImageDescriptor,
+                                            nifti_image *voxelBasedGradient,
+                                            const int& mindType,
+                                            const int& descriptorOffset,
+                                            const int& descriptorNumber,
+                                            const int& currentTimepoint) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    vector<int> combinedMask(referenceMask, referenceMask + voxelNumber);
+    reg_tools_removeNanFromMask(referenceImage, combinedMask.data());
+    reg_tools_removeNanFromMask(warpedImage, combinedMask.data());
 
+    auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
+    // Compute the reference image descriptors
+    GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint);
+    // Compute the warped floating image descriptors
+    GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint);
 
-    for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) {
+    for (int descIndex = 0; descIndex < descriptorNumber; ++descIndex) {
         // Compute the warped image descriptors gradient
-        reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor,
-                                     this->warpedGradient,
-                                     combinedMask,
+        reg_getImageGradient_symDiff(warpedFloatingImageDescriptor,
+                                     warpedGradient,
+                                     combinedMask.data(),
                                      std::numeric_limits<float>::quiet_NaN(),
-                                     desc_index);
+                                     descIndex);
 
         // Compute the gradient of the ssd for the forward transformation
-        switch (referenceImageDescriptor->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSsdGradient<float>(this->referenceImageDescriptor,
-                                                this->warpedFloatingImageDescriptor,
-                                                this->warpedGradient,
-                                                this->voxelBasedGradient,
-                                                nullptr, // no Jacobian required here,
-                                                combinedMask,
-                                                desc_index,
-                                                1.0, //all descriptors given weight of 1
-                                                nullptr);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSsdGradient<double>(this->referenceImageDescriptor,
-                                                 this->warpedFloatingImageDescriptor,
-                                                 this->warpedGradient,
-                                                 this->voxelBasedGradient,
-                                                 nullptr, // no Jacobian required here,
-                                                 combinedMask,
-                                                 desc_index,
-                                                 1.0, //all descriptors given weight of 1
-                                                 nullptr);
-            break;
-        default:
-            reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
-        }
-    }
-    free(combinedMask);
-
-    // Compute the gradient of the ssd for the backward transformation
-    if (this->isSymmetric) {
-        voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
-        combinedMask = (int*)malloc(voxelNumber * sizeof(int));
-        memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int));
-        reg_tools_removeNanFromMask(this->floatingImage, combinedMask);
-        reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask);
-
-        if (this->mindType == MIND_TYPE) {
-            GetMindImageDescriptor(this->floatingImage,
-                                   this->floatingImageDescriptor,
-                                   combinedMask,
-                                   this->descriptorOffset,
-                                   currentTimepoint);
-            GetMindImageDescriptor(this->warpedImageBw,
-                                   this->warpedReferenceImageDescriptor,
-                                   combinedMask,
-                                   this->descriptorOffset,
-                                   currentTimepoint);
-        } else if (this->mindType == MINDSSC_TYPE) {
-            GetMindSscImageDescriptor(this->floatingImage,
-                                      this->floatingImageDescriptor,
-                                      combinedMask,
-                                      this->descriptorOffset,
-                                      currentTimepoint);
-            GetMindSscImageDescriptor(this->warpedImageBw,
-                                      this->warpedReferenceImageDescriptor,
-                                      combinedMask,
-                                      this->descriptorOffset,
-                                      currentTimepoint);
-        }
-
-        for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) {
-            reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor,
-                                         this->warpedGradientBw,
-                                         combinedMask,
-                                         std::numeric_limits<float>::quiet_NaN(),
-                                         desc_index);
-
-            // Compute the gradient of the nmi for the backward transformation
-            switch (floatingImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                reg_getVoxelBasedSsdGradient<float>(this->floatingImageDescriptor,
-                                                    this->warpedReferenceImageDescriptor,
-                                                    this->warpedGradientBw,
-                                                    this->voxelBasedGradientBw,
-                                                    nullptr, // no Jacobian required here,
-                                                    combinedMask,
-                                                    desc_index,
-                                                    1.0, //all descriptors given weight of 1
-                                                    nullptr);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                reg_getVoxelBasedSsdGradient<double>(this->floatingImageDescriptor,
-                                                     this->warpedReferenceImageDescriptor,
-                                                     this->warpedGradientBw,
-                                                     this->voxelBasedGradientBw,
-                                                     nullptr, // no Jacobian required here,
-                                                     combinedMask,
-                                                     desc_index,
-                                                     1.0, //all descriptors given weight of 1
-                                                     nullptr);
-                break;
-            default:
-                reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient");
-                reg_print_msg_error("Unsupported datatype");
-                reg_exit();
-            }
-        }
-        free(combinedMask);
+        std::visit([&](auto&& refDescDataType) {
+            using RefDescDataType = std::decay_t<decltype(refDescDataType)>;
+            reg_getVoxelBasedSsdGradient<RefDescDataType>(referenceImageDescriptor,
+                                                          warpedFloatingImageDescriptor,
+                                                          warpedGradient,
+                                                          voxelBasedGradient,
+                                                          nullptr, // no Jacobian required here
+                                                          combinedMask.data(),
+                                                          descIndex,
+                                                          1.0,  // all descriptors given weight of 1
+                                                          nullptr);
+        }, NiftiImage::getFloatingDataType(referenceImageDescriptor));
     }
 }
 /* *************************************************************** */
+void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
+                                             this->referenceImageDescriptor,
+                                             this->referenceMask,
+                                             this->warpedImage,
+                                             this->warpedGradient,
+                                             this->warpedFloatingImageDescriptor,
+                                             this->voxelBasedGradient,
+                                             this->mindType,
+                                             this->descriptorOffset,
+                                             this->descriptorNumber,
+                                             currentTimepoint);
+}
+/* *************************************************************** */
+void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->floatingImageDescriptor,
+                                             this->floatingMask,
+                                             this->warpedImageBw,
+                                             this->warpedGradientBw,
+                                             this->warpedReferenceImageDescriptor,
+                                             this->voxelBasedGradientBw,
+                                             this->mindType,
+                                             this->descriptorOffset,
+                                             this->descriptorNumber,
+                                             currentTimepoint);
+}
+/* *************************************************************** */
 reg_mindssc::reg_mindssc(): reg_mind() {
     this->mindType = MINDSSC_TYPE;
 #ifndef NDEBUG
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 9eb88336..c1db52e6 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -44,18 +44,19 @@ class reg_mind: public reg_ssd {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the backward mind-based measure of similarity value
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
-    virtual void SetDescriptorOffset(int);
-    virtual int GetDescriptorOffset();
+    /// @brief Compute the voxel-based mind gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based mind gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void SetDescriptorOffset(int val) { this->descriptorOffset = val; }
+    virtual int GetDescriptorOffset() { return this->descriptorOffset; }
 
 protected:
     nifti_image *referenceImageDescriptor;
     nifti_image *floatingImageDescriptor;
     nifti_image *warpedReferenceImageDescriptor;
     nifti_image *warpedFloatingImageDescriptor;
-    double timePointWeightDescriptor[255] = {0};
-
+    double timePointWeightDescriptor[255]{};
     int descriptorOffset;
     int mindType;
     int descriptorNumber;
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 4036cf08..b8ce5a55 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -174,32 +174,29 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
-template<class PrecisionType>
-PrecisionType GetBasisSplineValue(PrecisionType x) {
+static double GetBasisSplineValue(double x) {
     x = fabs(x);
-    PrecisionType value = 0;
+    double value = 0;
     if (x < 2.0) {
         if (x < 1.0)
-            value = (PrecisionType)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x);
+            value = 2.0 / 3.0 + (0.5 * x - 1.0) * x * x;
         else {
-            x -= 2.0f;
-            value = -x * x * x / 6.0f;
+            x -= 2.0;
+            value = -x * x * x / 6.0;
         }
     }
     return value;
 }
 /* *************************************************************** */
-template<class PrecisionType>
-PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
-    PrecisionType x = fabs(ori);
-    PrecisionType value = 0;
+static double GetBasisSplineDerivativeValue(double ori) {
+    double x = fabs(ori), value = 0;
     if (x < 2.0) {
         if (x < 1.0)
-            value = (PrecisionType)((1.5f * x - 2.0) * ori);
+            value = (1.5 * x - 2.0) * ori;
         else {
-            x -= 2.0f;
-            value = -0.5f * x * x;
-            if (ori < 0.0f) value = -value;
+            x -= 2.0;
+            value = -0.5 * x * x;
+            if (ori < 0.0) value = -value;
         }
     }
     return value;
@@ -250,8 +247,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             }
             // Convolve the histogram with a cubic B-spline kernel
             double kernel[3];
-            kernel[0] = kernel[2] = GetBasisSplineValue(-1.);
-            kernel[1] = GetBasisSplineValue(0.);
+            kernel[0] = kernel[2] = GetBasisSplineValue(-1.0);
+            kernel[1] = GetBasisSplineValue(0.0);
             // Histogram is first smooth along the reference axis
             memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
@@ -417,7 +414,7 @@ double reg_nmi::GetSimilarityMeasureValueBw() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
+void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                                     const nifti_image *warpedImage,
                                     const unsigned short *referenceBinNumber,
                                     const unsigned short *floatingBinNumber,
@@ -428,13 +425,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                                     const int *referenceMask,
                                     const int& currentTimepoint,
                                     const double& timepointWeight) {
-    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
-        reg_print_fct_error("reg_getVoxelBasedNMIGradient2D");
-        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-        reg_exit();
-    }
+#ifdef WIN32
+    long i;
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
+#else
+    size_t i;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
-
+#endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
     const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber];
@@ -456,29 +453,28 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
     const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
     const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
     // Iterate over all voxel
-    for (size_t i = 0; i < voxelNumber; ++i) {
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+    shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
+    logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \
+    warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimepoint,timepointWeight)
+#endif // _OPENMP
+    for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
         if (referenceMask[i] > -1) {
-            DataType refValue = refPtr[i];
-            DataType warValue = warPtr[i];
+            DataType refValue = refPtr[i], warValue = warPtr[i];
             if (refValue == refValue && warValue == warValue) {
-                DataType gradX = warGradPtrX[i];
-                DataType gradY = warGradPtrY[i];
-
-                double jointDeriv[2] = {0};
-                double refDeriv[2] = {0};
-                double warDeriv[2] = {0};
-
-                for (int r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
+                DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i];
+                double jointDeriv[2]{}, refDeriv[2]{}, warDeriv[2]{};
+                for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) {
                     if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
-                        for (int w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
+                        for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
                             if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                double commun =
-                                    GetBasisSplineValue((double)refValue - (double)r) *
-                                    GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                                double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
-                                double refLog = logHistoPtr[r + referenceOffset];
-                                double warLog = logHistoPtr[w + floatingOffset];
+                                const double commun = GetBasisSplineValue(refValue - r) *
+                                    GetBasisSplineDerivativeValue(warValue - w);
+                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double& refLog = logHistoPtr[r + referenceOffset];
+                                const double& warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     jointDeriv[0] += commun * gradX * jointLog;
                                     refDeriv[0] += commun * gradX * refLog;
@@ -493,17 +489,17 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] -
-                                                                     nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] -
-                                                                     nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrX[i] += static_cast<DataType>(timepointWeight * (refDeriv[0] + warDeriv[0] -
+                                                                               nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrY[i] += static_cast<DataType>(timepointWeight * (refDeriv[1] + warDeriv[1] -
+                                                                               nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
     } // loop over all voxel
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
+void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                                     const nifti_image *warpedImage,
                                     const unsigned short *referenceBinNumber,
                                     const unsigned short *floatingBinNumber,
@@ -514,12 +510,6 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                                     const int *referenceMask,
                                     const int& currentTimepoint,
                                     const double& timepointWeight) {
-    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
-        reg_print_fct_error("reg_getVoxelBasedNMIGradient3D");
-        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-        reg_exit();
-    }
-
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -549,14 +539,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
     const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
     const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
-    int r, w;
-    DataType refValue, warValue, gradX, gradY, gradZ;
-    double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog;
     // Iterate over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    private(r,w,refValue,warValue,gradX,gradY,gradZ, \
-    jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
     warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight)
@@ -564,26 +549,19 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
         if (referenceMask[i] > -1) {
-            refValue = refPtr[i];
-            warValue = warPtr[i];
+            DataType refValue = refPtr[i], warValue = warPtr[i];
             if (refValue == refValue && warValue == warValue) {
-                gradX = warGradPtrX[i];
-                gradY = warGradPtrY[i];
-                gradZ = warGradPtrZ[i];
-
-                jointDeriv[0] = jointDeriv[1] = jointDeriv[2] = 0.f;
-                refDeriv[0] = refDeriv[1] = refDeriv[2] = 0.f;
-                warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f;
-
-                for (r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) {
+                DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i], gradZ = warGradPtrZ[i];
+                double jointDeriv[3]{}, refDeriv[3]{}, warDeriv[3]{};
+                for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) {
                     if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
-                        for (w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) {
+                        for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
                             if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                commun = GetBasisSplineValue((double)refValue - (double)r) *
-                                    GetBasisSplineDerivativeValue((double)warValue - (double)w);
-                                jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
-                                refLog = logHistoPtr[r + referenceOffset];
-                                warLog = logHistoPtr[w + floatingOffset];
+                                const double commun = GetBasisSplineValue(refValue - r) *
+                                    GetBasisSplineDerivativeValue(warValue - w);
+                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double& refLog = logHistoPtr[r + referenceOffset];
+                                const double& warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     refDeriv[0] += commun * gradX * refLog;
                                     warDeriv[0] += commun * gradX * warLog;
@@ -603,117 +581,73 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] -
-                                                                    nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] -
-                                                                    nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] -
-                                                                    nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrX[i] += static_cast<DataType>(timepointWeight * (refDeriv[0] + warDeriv[0] -
+                                                                               nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrY[i] += static_cast<DataType>(timepointWeight * (refDeriv[1] + warDeriv[1] -
+                                                                               nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
+                measureGradPtrZ[i] += static_cast<DataType>(timepointWeight * (refDeriv[2] + warDeriv[2] -
+                                                                               nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
     } // loop over all voxel
 }
 /* *************************************************************** */
-void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Check if all required input images are of the same data type
-    int dtype = this->referenceImage->datatype;
-    if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-        reg_print_msg_error("Input images are expected to be of floating precision type");
-        reg_exit();
-    }
-    if (this->warpedImage->datatype != dtype ||
-        this->warpedGradient->datatype != dtype ||
-        this->voxelBasedGradient->datatype != dtype) {
-        reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-
-    // Call compute similarity measure to calculate joint histogram
-    this->GetSimilarityMeasureValue();
-
-    // Compute the gradient of the nmi for the forward transformation
+void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
+                                            const nifti_image *warpedImage,
+                                            const unsigned short *referenceBinNumber,
+                                            const unsigned short *floatingBinNumber,
+                                            const double *const *jointHistogramLog,
+                                            const double *const *entropyValues,
+                                            const nifti_image *warpedGradient,
+                                            nifti_image *voxelBasedGradient,
+                                            const int *referenceMask,
+                                            const int& currentTimepoint,
+                                            const double& timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
-        if (this->referenceImage->nz > 1) {  // 3D input images
-            reg_getVoxelBasedNMIGradient3D<RefImgDataType>(this->referenceImage,
-                                                           this->warpedImage,
-                                                           this->referenceBinNumber,
-                                                           this->floatingBinNumber,
-                                                           this->jointHistogramLog,
-                                                           this->entropyValues,
-                                                           this->warpedGradient,
-                                                           this->voxelBasedGradient,
-                                                           this->referenceMask,
-                                                           currentTimepoint,
-                                                           this->timePointWeight[currentTimepoint]);
-        } else { // 2D input images
-            reg_getVoxelBasedNMIGradient2D<RefImgDataType>(this->referenceImage,
-                                                           this->warpedImage,
-                                                           this->referenceBinNumber,
-                                                           this->floatingBinNumber,
-                                                           this->jointHistogramLog,
-                                                           this->entropyValues,
-                                                           this->warpedGradient,
-                                                           this->voxelBasedGradient,
-                                                           this->referenceMask,
-                                                           currentTimepoint,
-                                                           this->timePointWeight[currentTimepoint]);
-        }
-    }, NiftiImage::getFloatingDataType(this->referenceImage));
+        auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d<RefImgDataType> : reg_getVoxelBasedNmiGradient2d<RefImgDataType>;
+        GetVoxelBasedNmiGradient(referenceImage,
+                                 warpedImage,
+                                 referenceBinNumber,
+                                 floatingBinNumber,
+                                 jointHistogramLog,
+                                 entropyValues,
+                                 warpedGradient,
+                                 voxelBasedGradient,
+                                 referenceMask,
+                                 currentTimepoint,
+                                 timepointWeight);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    // Call compute similarity measure to calculate joint histogram
+    this->GetSimilarityMeasureValue();
 
-    if (this->isSymmetric) {
-        dtype = this->floatingImage->datatype;
-        if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
-            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Input images are expected to be of floating precision type");
-            reg_exit();
-        }
-        if (this->warpedImageBw->datatype != dtype ||
-            this->warpedGradientBw->datatype != dtype ||
-            this->voxelBasedGradientBw->datatype != dtype) {
-            reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Input images are expected to be of the same type");
-            reg_exit();
-        }
-        // Compute the gradient of the nmi for the backward transformation
-        std::visit([&](auto&& floImgDataType) {
-            using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
-            if (this->floatingImage->nz > 1) {  // 3D input images
-                reg_getVoxelBasedNMIGradient3D<FloImgDataType>(this->floatingImage,
-                                                               this->warpedImageBw,
-                                                               this->floatingBinNumber,
-                                                               this->referenceBinNumber,
-                                                               this->jointHistogramLogBw,
-                                                               this->entropyValuesBw,
-                                                               this->warpedGradientBw,
-                                                               this->voxelBasedGradientBw,
-                                                               this->floatingMask,
-                                                               currentTimepoint,
-                                                               this->timePointWeight[currentTimepoint]);
-            } else { // 2D input images
-                reg_getVoxelBasedNMIGradient2D<FloImgDataType>(this->floatingImage,
-                                                               this->warpedImageBw,
-                                                               this->floatingBinNumber,
-                                                               this->referenceBinNumber,
-                                                               this->jointHistogramLogBw,
-                                                               this->entropyValuesBw,
-                                                               this->warpedGradientBw,
-                                                               this->voxelBasedGradientBw,
-                                                               this->floatingMask,
-                                                               currentTimepoint,
-                                                               this->timePointWeight[currentTimepoint]);
-            }
-        }, NiftiImage::getFloatingDataType(this->floatingImage));
-    }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called");
-#endif
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
+                                             this->warpedImage,
+                                             this->referenceBinNumber,
+                                             this->floatingBinNumber,
+                                             this->jointHistogramLog,
+                                             this->entropyValues,
+                                             this->warpedGradient,
+                                             this->voxelBasedGradient,
+                                             this->referenceMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
+}
+/* *************************************************************** */
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->warpedImageBw,
+                                             this->floatingBinNumber,
+                                             this->referenceBinNumber,
+                                             this->jointHistogramLogBw,
+                                             this->entropyValuesBw,
+                                             this->warpedGradientBw,
+                                             this->voxelBasedGradientBw,
+                                             this->floatingMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 3f66e70e..84ea55ba 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -42,8 +42,10 @@ class reg_nmi: public reg_measure {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based nmi gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based nmi gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
 
     virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
                                           unsigned short floBinNumber,
@@ -220,13 +222,10 @@ class reg_multichannel_nmi: public reg_measure {
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
 
-    /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {
-        // Check if the specified time point exists and is active
-        reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-        if (this->timePointWeight[currentTimepoint] == 0)
-            return;
-    }
+    /// @brief Compute the voxel-based nmi gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based nmi gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
 
 protected:
     unsigned short referenceBinNumber[255];
@@ -242,7 +241,7 @@ class reg_multichannel_nmi: public reg_measure {
 /* *************************************************************** */
 /// Multi channel NMI version - Entropy
 extern "C++"
-void reg_getMultiChannelNMIValue(nifti_image *referenceImages,
+void reg_getMultiChannelNmiValue(nifti_image *referenceImages,
                                  nifti_image *warpedImages,
                                  unsigned *referenceBins, // should be an array of size num_reference_volumes
                                  unsigned *warpedBins, // should be an array of size num_warped_volumes
@@ -254,7 +253,7 @@ void reg_getMultiChannelNMIValue(nifti_image *referenceImages,
 /* *************************************************************** */
 /// Multi channel NMI version - Gradient
 extern "C++"
-void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages,
+void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
                                                 unsigned *referenceBins,
@@ -267,7 +266,7 @@ void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages,
 /* *************************************************************** */
 /// Multi channel NMI version - Gradient
 extern "C++"
-void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages,
+void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
                                                 unsigned *referenceBins,
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 19115e20..5fc84cb8 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -116,16 +116,11 @@ double reg_getSsdValue(const nifti_image *referenceImage,
     const DataType *referencePtr = static_cast<DataType*>(referenceImage->data);
     const DataType *warpedPtr = static_cast<DataType*>(warpedImage->data);
     // Create a pointer to the Jacobian determinant image if defined
-    const DataType *jacDetPtr = nullptr;
-    if (jacobianDetImage != nullptr)
-        jacDetPtr = static_cast<DataType*>(jacobianDetImage->data);
+    const DataType *jacDetPtr = jacobianDetImage ? static_cast<DataType*>(jacobianDetImage->data) : nullptr;
     // Create a pointer to the local weight image if defined
-    const DataType *localWeightPtr = nullptr;
-    if (localWeightSim != nullptr)
-        localWeightPtr = static_cast<DataType*>(localWeightSim->data);
+    const DataType *localWeightPtr = localWeightSim ? static_cast<DataType*>(localWeightSim->data) : nullptr;
 
     double ssdGlobal = 0;
-    double refValue, warValue, diff;
 
     // Loop over the different time points
     for (int time = 0; time < referenceImage->nt; ++time) {
@@ -133,40 +128,29 @@ double reg_getSsdValue(const nifti_image *referenceImage,
             // Create pointers to the current time point of the reference and warped images
             const DataType *currentRefPtr = &referencePtr[time * voxelNumber];
             const DataType *currentWarPtr = &warpedPtr[time * voxelNumber];
-
             double ssdLocal = 0, n = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \
     jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \
-    private(refValue, warValue, diff) \
-    reduction(+:ssdLocal) \
-    reduction(+:n)
+    reduction(+:ssdLocal, n)
 #endif
             for (voxel = 0; voxel < voxelNumber; ++voxel) {
                 // Check if the current voxel belongs to the mask
                 if (mask[voxel] > -1) {
                     // Ensure that both ref and warped values are defined
-                    refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter);
-                    warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter);
-
+                    const double refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter;
+                    const double warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter;
                     if (refValue == refValue && warValue == warValue) {
 #ifdef MRF_USE_SAD
-                        diff = fabs(refValue - warValue);
+                        const double diff = fabs(refValue - warValue);
 #else
-                        diff = reg_pow2(refValue - warValue);
+                        const double diff = reg_pow2(refValue - warValue);
 #endif
                         // Jacobian determinant modulation of the ssd if required
-                        if (jacDetPtr != nullptr) {
-                            ssdLocal += diff * jacDetPtr[voxel];
-                            n += jacDetPtr[voxel];
-                        } else if (localWeightPtr != nullptr) {
-                            ssdLocal += diff * localWeightPtr[voxel];
-                            n += localWeightPtr[voxel];
-                        } else {
-                            ssdLocal += diff;
-                            n += 1.0;
-                        }
+                        const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
+                        ssdLocal += diff * val;
+                        n += val;
                     }
                 }
             }
@@ -230,11 +214,6 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   const int& currentTimepoint,
                                   const double& timepointWeight,
                                   const nifti_image *localWeightSim) {
-    if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) {
-        reg_print_fct_error("reg_getVoxelBasedSSDGradient");
-        reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-        reg_exit();
-    }
     // Create pointers to the reference and warped images
 #ifdef _WIN32
     long voxel;
@@ -252,56 +231,45 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
     // Pointers to the spatial gradient of the warped image
     const DataType *spatialGradPtrX = static_cast<DataType*>(warpedGradient->data);
     const DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber];
-    const DataType *spatialGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        spatialGradPtrZ = &spatialGradPtrY[voxelNumber];
+    const DataType *spatialGradPtrZ = referenceImage->nz > 1 ? &spatialGradPtrY[voxelNumber] : nullptr;
 
     // Pointers to the measure of similarity gradient
     DataType *measureGradPtrX = static_cast<DataType*>(measureGradientImage->data);
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
-    DataType *measureGradPtrZ = nullptr;
-    if (referenceImage->nz > 1)
-        measureGradPtrZ = &measureGradPtrY[voxelNumber];
+    DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr;
 
     // Create a pointer to the Jacobian determinant values if defined
-    const DataType *jacDetPtr = nullptr;
-    if (jacobianDetImage != nullptr)
-        jacDetPtr = static_cast<DataType*>(jacobianDetImage->data);
+    const DataType *jacDetPtr = jacobianDetImage ? static_cast<DataType*>(jacobianDetImage->data) : nullptr;
     // Create a pointer to the local weight image if defined
-    const DataType *localWeightPtr = nullptr;
-    if (localWeightSim != nullptr)
-        localWeightPtr = static_cast<DataType*>(localWeightSim->data);
+    const DataType *localWeightPtr = localWeightSim ? static_cast<DataType*>(localWeightSim->data) : nullptr;
 
     // find number of active voxels and correct weight
-    double activeVoxel_num = 0;
+    size_t activeVoxelNumber = 0;
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
             if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel])
-                activeVoxel_num += 1.0;
+                activeVoxelNumber++;
         }
     }
-    double adjustedWeight = timepointWeight / activeVoxel_num;
-
-    double refValue, warValue, common;
+    const double adjustedWeight = timepointWeight / activeVoxelNumber;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \
     mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \
     measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \
-    localWeightPtr, adjustedWeight) \
-    private(refValue, warValue, common)
+    localWeightPtr, adjustedWeight)
 #endif
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
-            refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter;
-            warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter;
+            const double refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter;
+            const double warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter;
             if (refValue == refValue && warValue == warValue) {
 #ifdef MRF_USE_SAD
-                common = refValue > warValue ? -1.f : 1.f;
+                double common = refValue > warValue ? -1.f : 1.f;
                 common *= (refValue - warValue);
 #else
-                common = -2.0 * (refValue - warValue);
+                double common = -2.0 * (refValue - warValue);
 #endif
                 if (jacDetPtr != nullptr)
                     common *= jacDetPtr[voxel];
@@ -314,8 +282,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                     measureGradPtrX[voxel] += static_cast<DataType>(common * spatialGradPtrX[voxel]);
                 if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel])
                     measureGradPtrY[voxel] += static_cast<DataType>(common * spatialGradPtrY[voxel]);
-
-                if (measureGradPtrZ != nullptr) {
+                if (measureGradPtrZ) {
                     if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel])
                         measureGradPtrZ[voxel] += static_cast<DataType>(common * spatialGradPtrZ[voxel]);
                 }
@@ -326,36 +293,31 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
 template void reg_getVoxelBasedSsdGradient<float>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
 template void reg_getVoxelBasedSsdGradient<double>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
 /* *************************************************************** */
-void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
-    // Check if all required input images are of the same data type
-    int dtype = this->referenceImage->datatype;
-    if (this->warpedImage->datatype != dtype ||
-        this->warpedGradient->datatype != dtype ||
-        this->voxelBasedGradient->datatype != dtype) {
-        reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    // Compute the gradient of the ssd for the forward transformation
-    switch (dtype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_getVoxelBasedSsdGradient<float>(this->referenceImage,
-                                            this->warpedImage,
-                                            this->warpedGradient,
-                                            this->voxelBasedGradient,
-                                            nullptr, // TODO this->forwardJacDetImagePointer,
-                                            this->referenceMask,
-                                            currentTimepoint,
-                                            this->timePointWeight[currentTimepoint],
-                                            this->localWeightSim);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_getVoxelBasedSsdGradient<double>(this->referenceImage,
+void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
+                                            const nifti_image *warpedImage,
+                                            const nifti_image *warpedGradient,
+                                            nifti_image *voxelBasedGradient,
+                                            const nifti_image *jacobianDetImage,
+                                            const int *mask,
+                                            const int& currentTimepoint,
+                                            const double& timepointWeight,
+                                            const nifti_image *localWeightSim) {
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        reg_getVoxelBasedSsdGradient<RefImgDataType>(referenceImage,
+                                                     warpedImage,
+                                                     warpedGradient,
+                                                     voxelBasedGradient,
+                                                     jacobianDetImage,
+                                                     mask,
+                                                     currentTimepoint,
+                                                     timepointWeight,
+                                                     localWeightSim);
+    }, NiftiImage::getFloatingDataType(referenceImage));
+}
+/* *************************************************************** */
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->warpedImage,
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
@@ -364,65 +326,31 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                              currentTimepoint,
                                              this->timePointWeight[currentTimepoint],
                                              this->localWeightSim);
-        break;
-    default:
-        reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
-    }
-    // Compute the gradient of the ssd for the backward transformation
-    if (this->isSymmetric) {
-        dtype = this->floatingImage->datatype;
-        if (this->warpedImageBw->datatype != dtype ||
-            this->warpedGradientBw->datatype != dtype ||
-            this->voxelBasedGradientBw->datatype != dtype) {
-            reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Input images are expected to be of the same type");
-            reg_exit();
-        }
-        // Compute the gradient of the nmi for the backward transformation
-        switch (dtype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_getVoxelBasedSsdGradient<float>(this->floatingImage,
-                                                this->warpedImageBw,
-                                                this->warpedGradientBw,
-                                                this->voxelBasedGradientBw,
-                                                nullptr, // TODO this->backwardJacDetImagePointer,
-                                                this->floatingMask,
-                                                currentTimepoint,
-                                                this->timePointWeight[currentTimepoint],
-                                                nullptr);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_getVoxelBasedSsdGradient<double>(this->floatingImage,
-                                                 this->warpedImageBw,
-                                                 this->warpedGradientBw,
-                                                 this->voxelBasedGradientBw,
-                                                 nullptr, // TODO this->backwardJacDetImagePointer,
-                                                 this->floatingMask,
-                                                 currentTimepoint,
-                                                 this->timePointWeight[currentTimepoint],
-                                                 nullptr);
-            break;
-        default:
-            reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("Unsupported datatype");
-            reg_exit();
-        }
-    }
+}
+/* *************************************************************** */
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
+                                             this->warpedImageBw,
+                                             this->warpedGradientBw,
+                                             this->voxelBasedGradientBw,
+                                             nullptr, // TODO this->backwardJacDetImagePointer,
+                                             this->floatingMask,
+                                             currentTimepoint,
+                                             this->timePointWeight[currentTimepoint],
+                                             nullptr);
 }
 /* *************************************************************** */
 template <class DataType>
 void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                    float *discretisedValue,
-                                   int discretise_radius,
-                                   int discretise_step,
+                                   int discretiseRadius,
+                                   int discretiseStep,
                                    nifti_image *refImage,
                                    nifti_image *warImage,
                                    int *mask) {
     int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex;
     size_t voxIndex, voxIndex_t;
-    int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
+    int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1;
     int label_2D_number = label_1D_number * label_1D_number;
     int label_nD_number = label_2D_number * label_1D_number;
     //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
@@ -456,9 +384,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 
     // Create a padded version of the warped image to avoid boundary condition check
     int warPaddedOffset[3] = {
-        discretise_radius + blockSize[0],
-        discretise_radius + blockSize[1],
-        discretise_radius + blockSize[2],
+        discretiseRadius + blockSize[0],
+        discretiseRadius + blockSize[1],
+        discretiseRadius + blockSize[2],
     };
     int warPaddedDim[4] = {
         warImage->nx + 2 * warPaddedOffset[0] + blockSize[0],
@@ -467,11 +395,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
         warImage->nt
     };
 
-    //DataType padding_value = std::numeric_limits<DataType>::quiet_NaN();
     DataType padding_value = 0;
 
-    size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] *
-        warPaddedDim[1] * warPaddedDim[2];
+    size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * warPaddedDim[1] * warPaddedDim[2];
     DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType));
     for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex)
         paddedWarImgPtr[voxIndex] = padding_value;
@@ -550,7 +476,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                     double currentSum;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-    shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \
+    shared(label_1D_number, label_2D_number, label_nD_number, discretiseStep, discretiseRadius, \
     paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \
     discretisedValue, currentControlPoint, voxelBlockNumber) \
     private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \
@@ -558,9 +484,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 #endif
                     for (cc = 0; cc < label_1D_number; ++cc) {
                         discretisedIndex = cc * label_2D_number;
-                        c = paddedImageVox[2] - discretise_radius + cc * discretise_step;
-                        for (b = paddedImageVox[1] - discretise_radius; b <= paddedImageVox[1] + discretise_radius; b += discretise_step) {
-                            for (a = paddedImageVox[0] - discretise_radius; a <= paddedImageVox[0] + discretise_radius; a += discretise_step) {
+                        c = paddedImageVox[2] - discretiseRadius + cc * discretiseStep;
+                        for (b = paddedImageVox[1] - discretiseRadius; b <= paddedImageVox[1] + discretiseRadius; b += discretiseStep) {
+                            for (a = paddedImageVox[0] - discretiseRadius; a <= paddedImageVox[0] + discretiseRadius; a += discretiseStep) {
 
                                 blockIndex = 0;
                                 currentSum = 0.;
@@ -658,14 +584,14 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 template <class DataType>
 void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
-                                     int discretise_radius,
-                                     int discretise_step,
+                                     int discretiseRadius,
+                                     int discretiseStep,
                                      nifti_image *refImage,
                                      nifti_image *warImage,
                                      int *mask) {
     int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex;
     size_t voxIndex, voxIndex_t;
-    const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1;
+    const int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1;
     const int label_2D_number = label_1D_number * label_1D_number;
     int label_nD_number = label_2D_number * label_1D_number;
     //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
@@ -715,8 +641,8 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \
-    padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \
-    discretise_step, discretisedValue) \
+    padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretiseRadius, \
+    discretiseStep, discretisedValue) \
     private(cpx, cpy, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \
     voxIndex, idBlock, blockIndex, definedValueNumber, tid, \
     timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue)
@@ -769,11 +695,10 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                 } // z
                 // Loop over the discretised value
                 if (definedValueNumber > 0) {
-
                     discretisedIndex = 0;
-                    for (c = int(imageVox[2] - discretise_radius); c <= imageVox[2] + discretise_radius; c += discretise_step) {
-                        for (b = int(imageVox[1] - discretise_radius); b <= imageVox[1] + discretise_radius; b += discretise_step) {
-                            for (a = int(imageVox[0] - discretise_radius); a <= imageVox[0] + discretise_radius; a += discretise_step) {
+                    for (c = int(imageVox[2] - discretiseRadius); c <= imageVox[2] + discretiseRadius; c += discretiseStep) {
+                        for (b = int(imageVox[1] - discretiseRadius); b <= imageVox[1] + discretiseRadius; b += discretiseStep) {
+                            for (a = int(imageVox[0] - discretiseRadius); a <= imageVox[0] + discretiseRadius; a += discretiseStep) {
 
                                 blockIndex = 0;
                                 currentSum = 0.;
@@ -897,37 +822,23 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 /* *************************************************************** */
 void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
                                   float *discretisedValue,
-                                  int discretise_radius,
-                                  int discretise_step) {
-    if (referenceImage->nz > 1) {
-        switch (this->referenceImage->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            GetDiscretisedValueSSD_core3D_2<float>(controlPointGridImage,
-                                                   discretisedValue,
-                                                   discretise_radius,
-                                                   discretise_step,
-                                                   this->referenceImage,
-                                                   this->warpedImage,
-                                                   this->referenceMask);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            GetDiscretisedValueSSD_core3D_2<double>(controlPointGridImage,
-                                                    discretisedValue,
-                                                    discretise_radius,
-                                                    discretise_step,
-                                                    this->referenceImage,
-                                                    this->warpedImage,
-                                                    this->referenceMask);
-            break;
-        default:
+                                  int discretiseRadius,
+                                  int discretiseStep) {
+    std::visit([&](auto&& refImgDataType) {
+        using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
+        if (referenceImage->nz > 1) {
+            GetDiscretisedValueSSD_core3D_2<RefImgDataType>(controlPointGridImage,
+                                                            discretisedValue,
+                                                            discretiseRadius,
+                                                            discretiseStep,
+                                                            this->referenceImage,
+                                                            this->warpedImage,
+                                                            this->referenceMask);
+        } else {
             reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-            reg_print_msg_error("Unsupported datatype");
+            reg_print_msg_error("Not implemented in 2D yet");
             reg_exit();
         }
-    } else {
-        reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-        reg_print_msg_error("Not implemented in 2D yet");
-        reg_exit();
-    }
+    }, NiftiImage::getFloatingDataType(this->referenceImage));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 43dbefe3..d685509f 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -43,13 +43,15 @@ class reg_ssd: public reg_measure {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the ssd value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based ssd gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based ssd gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based ssd gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
     /// @brief Here
     virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
-                                     int discretise_radius,
-                                     int discretise_step) override;
+                                     int discretiseRadius,
+                                     int discretiseStep) override;
 protected:
     float currentValue[255];
 
@@ -60,13 +62,15 @@ class reg_ssd: public reg_measure {
 /** @brief Computes and returns the SSD between two input images
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
- * @param activeTimePoint Specified which time point volumes have to be considered
+ * @param timePointWeight Array that contains the weight of each time point
  * @param jacobianDetImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to nullptr, all voxels are considered
+ * should be considered
+ * @param currentValue Array that contains the current values
+ * @param localWeightSim Image that contains the local weight similarity
  * @return Returns the computed sum squared difference
  */
 extern "C++" template <class DataType>
@@ -81,7 +85,6 @@ double reg_getSsdValue(const nifti_image *referenceImage,
 /** @brief Compute a voxel based gradient of the sum squared difference.
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
- * @param activeTimePoint Specified which time point volumes have to be considered
  * @param warpedGradient Spatial gradient of the input warped image
  * @param measureGradientImage Output image that will be updated with the
  * value of the SSD gradient
@@ -90,7 +93,10 @@ double reg_getSsdValue(const nifti_image *referenceImage,
  * image is used to modulate the SSD. The argument is ignored if the
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
- * should be considered. If set to nullptr, all voxels are considered
+ * should be considered
+ * @param currentTimepoint Specifies which time point volumes have to be considered
+ * @param timepointWeight Weight of the specified time point
+ * @param localWeightSim Image that contains the local weight similarity
  */
 extern "C++" template <class DataType>
 void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 1ff52195..7e968bed 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -132,8 +132,10 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     virtual double GetSimilarityMeasureValueFw() override { return 0; }
     /// @brief Returns the lncc value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
-    /// @brief Compute the voxel based lncc gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based lncc gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based lncc gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
 };
 /* *************************************************************** */
 class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
@@ -173,8 +175,10 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     virtual double GetSimilarityMeasureValueFw() override { return 0; }
     /// @brief Returns the kld value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
-    /// @brief Compute the voxel based kld gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based kld gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based kld gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
 };
 /* *************************************************************** */
 class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
@@ -214,7 +218,9 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     virtual double GetSimilarityMeasureValueFw() override { return 0; }
     /// @brief Returns the dti value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
-    /// @brief Compute the voxel based dti gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based dti gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based dti gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 2e55b78b..0c52ccc9 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -180,12 +180,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    // Check if the specified time point exists and is active
-    reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint);
-    if (this->timePointWeight[currentTimepoint] == 0)
-        return;
-
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
     // Call compute similarity measure to calculate joint histogram
     this->GetSimilarityMeasureValue();
 
@@ -204,23 +199,23 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                      this->entropyValues[0],
                                      this->referenceBinNumber[0],
                                      this->floatingBinNumber[0]);
+}
+/* *************************************************************** */
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    // The latest joint histogram is transferred onto the GPU
+    thrust::device_vector<float> jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]);
 
-    if (this->isSymmetric) {
-        thrust::device_vector<float> jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]);
-        reg_getVoxelBasedNMIGradient_gpu(this->floatingImage,
-                                         this->floatingImageCuda,
-                                         this->warpedImageBwCuda,
-                                         this->warpedGradientBwCuda,
-                                         jointHistogramLogCudaBw.data().get(),
-                                         this->voxelBasedGradientBwCuda,
-                                         this->floatingMaskCuda,
-                                         this->activeVoxelNumber,
-                                         this->entropyValuesBw[0],
-                                         this->floatingBinNumber[0],
-                                         this->referenceBinNumber[0]);
-    }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called");
-#endif
+    // The gradient of the NMI is computed on the GPU
+    reg_getVoxelBasedNMIGradient_gpu(this->floatingImage,
+                                     this->floatingImageCuda,
+                                     this->warpedImageBwCuda,
+                                     this->warpedGradientBwCuda,
+                                     jointHistogramLogCudaBw.data().get(),
+                                     this->voxelBasedGradientBwCuda,
+                                     this->floatingMaskCuda,
+                                     this->activeVoxelNumber,
+                                     this->entropyValuesBw[0],
+                                     this->floatingBinNumber[0],
+                                     this->referenceBinNumber[0]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 2b55270b..0e8fe3ed 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -51,8 +51,10 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based nmi gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based nmi gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
 };
 /* *************************************************************** */
 /// @brief NMI measure of similarity class
@@ -88,7 +90,9 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
     virtual double GetSimilarityMeasureValueFw() override { return 0; }
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
-    /// @brief Compute the voxel based nmi gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based nmi gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    /// @brief Compute the voxel-based nmi gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index dc62ea53..14a4352f 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -61,7 +61,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
 #endif
 }
 /* *************************************************************** */
-double reg_getSSDValue_gpu(const nifti_image *referenceImage,
+double reg_getSsdValue_gpu(const nifti_image *referenceImage,
                            const cudaArray *referenceImageCuda,
                            const float *warpedCuda,
                            const int *maskCuda,
@@ -86,9 +86,9 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     if (referenceImageDim.z > 1)
-        reg_getSquaredDifference3D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
+        reg_getSquaredDifference3d_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
                                                                    *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
-    else reg_getSquaredDifference2D_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
+    else reg_getSquaredDifference2d_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
                                                                     *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
@@ -99,19 +99,22 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValueFw() {
-    const double SSDValue = reg_getSSDValue_gpu(this->referenceImage,
-                                                this->referenceImageCuda,
-                                                this->warpedImageCuda,
-                                                this->referenceMaskCuda,
-                                                this->activeVoxelNumber);
-    return -SSDValue;
+    return -reg_getSsdValue_gpu(this->referenceImage,
+                                this->referenceImageCuda,
+                                this->warpedImageCuda,
+                                this->referenceMaskCuda,
+                                this->activeVoxelNumber);
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
-    return 0;
+    return -reg_getSsdValue_gpu(this->floatingImage,
+                                this->floatingImageCuda,
+                                this->warpedImageBwCuda,
+                                this->floatingMaskCuda,
+                                this->activeVoxelNumber);
 }
 /* *************************************************************** */
-void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage,
+void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       const cudaArray *referenceImageCuda,
                                       const float *warpedCuda,
                                       const float4 *spaGradientCuda,
@@ -140,15 +143,15 @@ void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage,
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     if (referenceImageDim.z > 1)
-        reg_getSSDGradient3D_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
+        reg_getSsdGradient3d_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
                                                              *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
-    else reg_getSSDGradient2D_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
+    else reg_getSsdGradient2d_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
                                                               *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
-    reg_getVoxelBasedSSDGradient_gpu(this->referenceImage,
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+    reg_getVoxelBasedSsdGradient_gpu(this->referenceImage,
                                      this->referenceImageCuda,
                                      this->warpedImageCuda,
                                      this->warpedGradientCuda,
@@ -158,3 +161,14 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {
                                      this->activeVoxelNumber);
 }
 /* *************************************************************** */
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+    reg_getVoxelBasedSsdGradient_gpu(this->floatingImage,
+                                     this->floatingImageCuda,
+                                     this->warpedImageBwCuda,
+                                     this->warpedGradientBwCuda,
+                                     this->voxelBasedGradientBwCuda,
+                                     1.f,
+                                     this->floatingMaskCuda,
+                                     this->activeVoxelNumber);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index c0a994be..1214d8f2 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -52,7 +52,9 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the ssd value backwards
     virtual double GetSimilarityMeasureValueBw() override;
-    /// @brief Compute the voxel based ssd gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override;
+    /// @brief Compute the voxel-based ssd gradient forwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    /// @brief Compute the voxel-based ssd gradient backwards
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 5a823634..ea387250 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -17,7 +17,7 @@
 #include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-__global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference,
+__global__ void reg_getSquaredDifference3d_kernel(float *squaredDifference,
                                                   cudaTextureObject_t referenceTexture,
                                                   cudaTextureObject_t warpedTexture,
                                                   cudaTextureObject_t maskTexture,
@@ -41,7 +41,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference,
+__global__ void reg_getSquaredDifference2d_kernel(float *squaredDifference,
                                                   cudaTextureObject_t referenceTexture,
                                                   cudaTextureObject_t warpedTexture,
                                                   cudaTextureObject_t maskTexture,
@@ -63,7 +63,7 @@ __global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient,
+__global__ void reg_getSsdGradient2d_kernel(float4 *ssdGradient,
                                             cudaTextureObject_t referenceTexture,
                                             cudaTextureObject_t warpedTexture,
                                             cudaTextureObject_t maskTexture,
@@ -97,7 +97,7 @@ __global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient,
+__global__ void reg_getSsdGradient3d_kernel(float4 *ssdGradient,
                                             cudaTextureObject_t referenceTexture,
                                             cudaTextureObject_t warpedTexture,
                                             cudaTextureObject_t maskTexture,

From 30c427dcf9f6143792eedb107ccc6b7310bf02cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 3 Aug 2023 12:20:31 +0100
Subject: [PATCH 179/314] Remove the symmetric scheme warning from reg_ssd_gpu

---
 niftyreg_build_version.txt   | 2 +-
 reg-lib/cuda/_reg_ssd_gpu.cu | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a1f7f63f..03a5b41d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-298
+299
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 14a4352f..c9d91811 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -44,12 +44,6 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
     reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda,
                                        warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
                                        warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
-    // Check if a symmetric measure is required
-    if (this->isSymmetric) {
-        reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure");
-        reg_print_msg_error("Symmetric scheme is not yet supported");
-        reg_exit();
-    }
     // Check that the input images have only one time point
     if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) {
         reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure");

From 82fe6aae1e4cccd7c72b4b1395d42b69e260b754 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 3 Aug 2023 13:42:00 +0100
Subject: [PATCH 180/314] Rearchitect Content classes #92

---
 niftyreg_build_version.txt               |  2 +-
 reg-lib/AladinContent.cpp                |  4 --
 reg-lib/AladinContent.h                  |  2 +-
 reg-lib/CMakeLists.txt                   |  1 +
 reg-lib/Compute.cpp                      |  2 +-
 reg-lib/ContentCreatorFactory.h          |  5 +-
 reg-lib/DefContent.cpp                   | 67 ++++++++++++++++++++++++
 reg-lib/DefContent.h                     | 40 ++++++++++++++
 reg-lib/DefContentCreator.h              | 16 ++++++
 reg-lib/F3dContent.cpp                   | 54 +------------------
 reg-lib/F3dContent.h                     | 19 +------
 reg-lib/Measure.cpp                      |  5 +-
 reg-lib/Measure.h                        |  6 +--
 reg-lib/_reg_base.cpp                    |  8 ++-
 reg-lib/_reg_base.h                      |  1 -
 reg-lib/cuda/CMakeLists.txt              |  1 +
 reg-lib/cuda/CudaCompute.cpp             |  8 +--
 reg-lib/cuda/CudaContent.h               |  1 -
 reg-lib/cuda/CudaContentCreatorFactory.h |  3 ++
 reg-lib/cuda/CudaDefContent.cpp          | 65 +++++++++++++++++++++++
 reg-lib/cuda/CudaDefContent.h            | 39 ++++++++++++++
 reg-lib/cuda/CudaDefContentCreator.h     | 16 ++++++
 reg-lib/cuda/CudaF3dContent.cpp          | 50 +-----------------
 reg-lib/cuda/CudaF3dContent.h            | 17 +-----
 reg-lib/cuda/CudaMeasure.cpp             | 22 ++++----
 reg-lib/cuda/CudaMeasure.h               |  2 +-
 reg-test/reg_test_imageGradient.cpp      |  9 ++--
 reg-test/reg_test_lncc.cpp               | 28 +++-------
 reg-test/reg_test_nmi.cpp                | 16 ++----
 29 files changed, 301 insertions(+), 208 deletions(-)
 create mode 100644 reg-lib/DefContent.cpp
 create mode 100644 reg-lib/DefContent.h
 create mode 100644 reg-lib/DefContentCreator.h
 create mode 100644 reg-lib/cuda/CudaDefContent.cpp
 create mode 100644 reg-lib/cuda/CudaDefContent.h
 create mode 100644 reg-lib/cuda/CudaDefContentCreator.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 03a5b41d..697cb3a2 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-299
+300
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index 30b4af23..ab1a07af 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -34,7 +34,3 @@ AladinContent::~AladinContent() {
         delete blockMatchingParams;
 }
 /* *************************************************************** */
-void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) {
-    blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn;
-}
-/* *************************************************************** */
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 5444cfd0..bd71257a 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -36,6 +36,6 @@ class AladinContent: public Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetCaptureRange(const int captureRangeIn);
+    virtual void SetCaptureRange(const int& captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; }
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 };
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index b3318053..2d5428cb 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -111,6 +111,7 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   Compute.cpp
   AladinContent.cpp
   Content.cpp
+  DefContent.cpp
   F3dContent.cpp
   Platform.cpp
   Measure.cpp
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 49bb2937..cfae476c 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -127,7 +127,7 @@ void Compute::UpdateControlPointPosition(float *currentDof,
 }
 /* *************************************************************** */
 void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
-    F3dContent& con = dynamic_cast<F3dContent&>(this->con);
+    DefContent& con = dynamic_cast<DefContent&>(this->con);
     reg_getImageGradient(con.GetFloating(),
                          con.GetWarpedGradient(),
                          con.GetDeformationField(),
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
index 575eb8c4..450b38b0 100644
--- a/reg-lib/ContentCreatorFactory.h
+++ b/reg-lib/ContentCreatorFactory.h
@@ -2,9 +2,10 @@
 
 #include "ContentCreator.h"
 #include "AladinContentCreator.h"
+#include "DefContentCreator.h"
 #include "F3dContentCreator.h"
 
-enum class ContentType { Base, Aladin, F3d };
+enum class ContentType { Base, Aladin, Def, F3d };
 
 class ContentCreatorFactory {
 public:
@@ -12,6 +13,8 @@ class ContentCreatorFactory {
         switch (conType) {
         case ContentType::Aladin:
             return new AladinContentCreator();
+        case ContentType::Def:
+            return new DefContentCreator();
         case ContentType::F3d:
             return new F3dContentCreator();
         default:
diff --git a/reg-lib/DefContent.cpp b/reg-lib/DefContent.cpp
new file mode 100644
index 00000000..6885153e
--- /dev/null
+++ b/reg-lib/DefContent.cpp
@@ -0,0 +1,67 @@
+#include "DefContent.h"
+#include "_reg_resampling.h"
+
+/* *************************************************************** */
+DefContent::DefContent(nifti_image *referenceIn,
+                       nifti_image *floatingIn,
+                       nifti_image *localWeightSimIn,
+                       int *referenceMaskIn,
+                       mat44 *transformationMatrixIn,
+                       size_t bytesIn):
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn) {
+    AllocateWarpedGradient();
+    AllocateVoxelBasedMeasureGradient();
+    AllocateLocalWeightSim(localWeightSimIn);
+}
+/* *************************************************************** */
+DefContent::~DefContent() {
+    DeallocateWarpedGradient();
+    DeallocateVoxelBasedMeasureGradient();
+    DeallocateLocalWeightSim();
+}
+/* *************************************************************** */
+void DefContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
+    if (!localWeightSimIn) return;
+    localWeightSim = nifti_copy_nim_info(reference);
+    localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
+    localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
+    localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
+    localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim);
+    localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
+    reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
+    reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
+}
+/* *************************************************************** */
+void DefContent::DeallocateLocalWeightSim() {
+    if (localWeightSim) {
+        nifti_image_free(localWeightSim);
+        localWeightSim = nullptr;
+    }
+}
+/* *************************************************************** */
+void DefContent::AllocateWarpedGradient() {
+    warpedGradient = nifti_dup(*deformationField, false);
+}
+/* *************************************************************** */
+void DefContent::DeallocateWarpedGradient() {
+    if (warpedGradient) {
+        nifti_image_free(warpedGradient);
+        warpedGradient = nullptr;
+    }
+}
+/* *************************************************************** */
+void DefContent::AllocateVoxelBasedMeasureGradient() {
+    voxelBasedMeasureGradient = nifti_dup(*deformationField, false);
+}
+/* *************************************************************** */
+void DefContent::DeallocateVoxelBasedMeasureGradient() {
+    if (voxelBasedMeasureGradient) {
+        nifti_image_free(voxelBasedMeasureGradient);
+        voxelBasedMeasureGradient = nullptr;
+    }
+}
+/* *************************************************************** */
+void DefContent::ZeroVoxelBasedMeasureGradient() {
+    memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper);
+}
+/* *************************************************************** */
diff --git a/reg-lib/DefContent.h b/reg-lib/DefContent.h
new file mode 100644
index 00000000..a5ccab6f
--- /dev/null
+++ b/reg-lib/DefContent.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "Content.h"
+
+class DefContent: public virtual Content {
+public:
+    DefContent() = delete;
+    DefContent(nifti_image *referenceIn,
+               nifti_image *floatingIn,
+               nifti_image *localWeightSimIn = nullptr,
+               int *referenceMaskIn = nullptr,
+               mat44 *transformationMatrixIn = nullptr,
+               size_t bytesIn = sizeof(float));
+    virtual ~DefContent();
+
+    // Getters
+    virtual nifti_image* GetLocalWeightSim() { return localWeightSim; }
+    virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
+    virtual nifti_image* GetWarpedGradient() { return warpedGradient; }
+
+    // Methods for transferring data from nifti to device
+    virtual void UpdateVoxelBasedMeasureGradient() {}
+    virtual void UpdateWarpedGradient() {}
+
+    // Auxiliary methods
+    virtual void ZeroVoxelBasedMeasureGradient();
+
+protected:
+    nifti_image *localWeightSim = nullptr;
+    nifti_image *voxelBasedMeasureGradient = nullptr;
+    nifti_image *warpedGradient = nullptr;
+
+private:
+    void AllocateLocalWeightSim(nifti_image*);
+    void DeallocateLocalWeightSim();
+    void AllocateVoxelBasedMeasureGradient();
+    void DeallocateVoxelBasedMeasureGradient();
+    void AllocateWarpedGradient();
+    void DeallocateWarpedGradient();
+};
diff --git a/reg-lib/DefContentCreator.h b/reg-lib/DefContentCreator.h
new file mode 100644
index 00000000..dce3ba86
--- /dev/null
+++ b/reg-lib/DefContentCreator.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "DefContent.h"
+
+class DefContentCreator: public ContentCreator {
+public:
+    virtual DefContent* Create(nifti_image *reference,
+                               nifti_image *floating,
+                               nifti_image *localWeightSim = nullptr,
+                               int *referenceMask = nullptr,
+                               mat44 *transformationMatrix = nullptr,
+                               size_t bytes = sizeof(float)) {
+        return new DefContent(reference, floating, localWeightSim, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 029d7ec0..035da723 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -1,6 +1,4 @@
 #include "F3dContent.h"
-#include "_reg_tools.h"
-#include "_reg_resampling.h"
 
 /* *************************************************************** */
 F3dContent::F3dContent(nifti_image *referenceIn,
@@ -10,6 +8,7 @@ F3dContent::F3dContent(nifti_image *referenceIn,
                        int *referenceMaskIn,
                        mat44 *transformationMatrixIn,
                        size_t bytesIn):
+    DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     controlPointGrid(controlPointGridIn) {
     if (!controlPointGridIn) {
@@ -17,47 +16,11 @@ F3dContent::F3dContent(nifti_image *referenceIn,
         reg_print_msg_error("controlPointGridIn can't be nullptr");
         reg_exit();
     }
-    AllocateWarpedGradient();
     AllocateTransformationGradient();
-    AllocateVoxelBasedMeasureGradient();
-    AllocateLocalWeightSim(localWeightSimIn);
 }
 /* *************************************************************** */
 F3dContent::~F3dContent() {
-    DeallocateWarpedGradient();
     DeallocateTransformationGradient();
-    DeallocateVoxelBasedMeasureGradient();
-    DeallocateLocalWeightSim();
-}
-/* *************************************************************** */
-void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
-    if (!localWeightSimIn) return;
-    localWeightSim = nifti_copy_nim_info(reference);
-    localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
-    localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
-    localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
-    localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim);
-    localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
-    reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
-    reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
-}
-/* *************************************************************** */
-void F3dContent::DeallocateLocalWeightSim() {
-    if (localWeightSim) {
-        nifti_image_free(localWeightSim);
-        localWeightSim = nullptr;
-    }
-}
-/* *************************************************************** */
-void F3dContent::AllocateWarpedGradient() {
-    warpedGradient = nifti_dup(*deformationField, false);
-}
-/* *************************************************************** */
-void F3dContent::DeallocateWarpedGradient() {
-    if (warpedGradient) {
-        nifti_image_free(warpedGradient);
-        warpedGradient = nullptr;
-    }
 }
 /* *************************************************************** */
 void F3dContent::AllocateTransformationGradient() {
@@ -71,22 +34,7 @@ void F3dContent::DeallocateTransformationGradient() {
     }
 }
 /* *************************************************************** */
-void F3dContent::AllocateVoxelBasedMeasureGradient() {
-    voxelBasedMeasureGradient = nifti_dup(*deformationField, false);
-}
-/* *************************************************************** */
-void F3dContent::DeallocateVoxelBasedMeasureGradient() {
-    if (voxelBasedMeasureGradient) {
-        nifti_image_free(voxelBasedMeasureGradient);
-        voxelBasedMeasureGradient = nullptr;
-    }
-}
-/* *************************************************************** */
 void F3dContent::ZeroTransformationGradient() {
     memset(transformationGradient->data, 0, transformationGradient->nvox * transformationGradient->nbyper);
 }
 /* *************************************************************** */
-void F3dContent::ZeroVoxelBasedMeasureGradient() {
-    memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper);
-}
-/* *************************************************************** */
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
index 46d232a6..f09157c0 100644
--- a/reg-lib/F3dContent.h
+++ b/reg-lib/F3dContent.h
@@ -1,8 +1,8 @@
 #pragma once
 
-#include "Content.h"
+#include "DefContent.h"
 
-class F3dContent: public virtual Content {
+class F3dContent: public virtual DefContent {
 public:
     F3dContent() = delete;
     F3dContent(nifti_image *referenceIn,
@@ -16,35 +16,20 @@ class F3dContent: public virtual Content {
 
     // Getters
     virtual nifti_image* GetControlPointGrid() { return controlPointGrid; }
-    virtual nifti_image* GetLocalWeightSim() { return localWeightSim; }
     virtual nifti_image* GetTransformationGradient() { return transformationGradient; }
-    virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
-    virtual nifti_image* GetWarpedGradient() { return warpedGradient; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateControlPointGrid() {}
     virtual void UpdateTransformationGradient() {}
-    virtual void UpdateVoxelBasedMeasureGradient() {}
-    virtual void UpdateWarpedGradient() {}
 
     // Auxiliary methods
     virtual void ZeroTransformationGradient();
-    virtual void ZeroVoxelBasedMeasureGradient();
 
 protected:
     nifti_image *controlPointGrid = nullptr;
-    nifti_image *localWeightSim = nullptr;
     nifti_image *transformationGradient = nullptr;
-    nifti_image *voxelBasedMeasureGradient = nullptr;
-    nifti_image *warpedGradient = nullptr;
 
 private:
-    void AllocateLocalWeightSim(nifti_image*);
-    void DeallocateLocalWeightSim();
-    void AllocateWarpedGradient();
-    void DeallocateWarpedGradient();
     void AllocateTransformationGradient();
     void DeallocateTransformationGradient();
-    void AllocateVoxelBasedMeasureGradient();
-    void DeallocateVoxelBasedMeasureGradient();
 };
\ No newline at end of file
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index 4b463ba7..f7e077db 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -21,15 +21,16 @@ reg_measure* Measure::Create(const MeasureType& measureType) {
         return new reg_kld();
     case MeasureType::Mind:
         return new reg_mind();
-    case MeasureType::Mindssc:
+    case MeasureType::MindSsc:
         return new reg_mindssc();
     }
+    reg_print_fct_error("Measure::Create");
     reg_print_msg_error("Unsupported measure type");
     reg_exit();
     return nullptr;
 }
 /* *************************************************************** */
-void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
+void Measure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
     measure.InitialiseMeasure(con.GetReference(),
                               con.GetFloating(),
                               con.GetReferenceMask(),
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
index 04ff5bdd..f8527631 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/Measure.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "F3dContent.h"
+#include "DefContent.h"
 #include "_reg_measure.h"
 
-enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc };
+enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc };
 
 class Measure {
 public:
     virtual reg_measure* Create(const MeasureType& measureType);
-    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr);
+    virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr);
 };
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 19448a59..9e0988d5 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -458,8 +458,7 @@ void reg_base<T>::CheckParameters() {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::InitialiseSimilarity() {
-    // TODO Move this function to reg_f3d
-    F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
+    DefContent& con = dynamic_cast<DefContent&>(*this->con);
 
     if (measure_nmi)
         measure->Initialise(*measure_nmi, con);
@@ -604,8 +603,7 @@ double reg_base<T>::ComputeSimilarityMeasure() {
 template<class T>
 void reg_base<T>::GetVoxelBasedGradient() {
     // The voxel based gradient image is filled with zeros
-    // TODO Temporarily call F3dContent. This function will be moved to reg_f3d
-    dynamic_cast<F3dContent&>(*con).ZeroVoxelBasedMeasureGradient();
+    dynamic_cast<DefContent&>(*con).ZeroVoxelBasedMeasureGradient();
 
     // The intensity gradient is first computed
     //   if(measure_nmi || measure_ssd ||
@@ -725,7 +723,7 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
 template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
     if (!measure_mindssc)
-        measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::Mindssc)));
+        measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::MindSsc)));
     measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
 #ifndef NDEBUG
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 7e62a37f..5fffdc56 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -82,7 +82,6 @@ class reg_base: public InterfaceOptimiser {
     unsigned levelToPerform;
     T gradientSmoothingSigma;
     T similarityWeight;
-    bool additive_mc_nmi;
     bool useConjGradient;
     bool useApproxGradient;
     bool verbose;
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 28f46f4b..a5696659 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -64,6 +64,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaCompute.cpp
     CudaContent.cpp
     CudaContext.cpp
+    CudaDefContent.cpp
     CudaF3dContent.cpp
     CudaKernelFactory.cpp
     CudaMeasure.cpp
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index ec1398b2..40702afa 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -115,8 +115,8 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
     // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint
-    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_getImageGradient_gpu(con.F3dContent::GetFloating(),
+    CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
+    reg_getImageGradient_gpu(con.DefContent::GetFloating(),
                              con.GetFloatingCuda(),
                              con.GetDeformationFieldCuda(),
                              con.GetWarpedGradientCuda(),
@@ -179,7 +179,7 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) {
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
     // TODO Implement this for CUDA
     // Use CPU temporarily
-    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
     Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient());
     // Transfer the data back to the CUDA device
     con.UpdateVoxelBasedMeasureGradient();
@@ -193,7 +193,7 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) {
     // Use CPU temporarily
     Compute::ExponentiateGradient(conBwIn);
     // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent&>(con).UpdateVoxelBasedMeasureGradient();
+    dynamic_cast<CudaDefContent&>(con).UpdateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
 void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index a9fd1f4f..16f8e7b2 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -2,7 +2,6 @@
 
 #include "Content.h"
 #include "_reg_common_cuda.h"
-#include "_reg_tools.h"
 
 class CudaContent: public virtual Content {
 public:
diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h
index a70bbe57..5d89e839 100644
--- a/reg-lib/cuda/CudaContentCreatorFactory.h
+++ b/reg-lib/cuda/CudaContentCreatorFactory.h
@@ -3,6 +3,7 @@
 #include "ContentCreatorFactory.h"
 #include "CudaContentCreator.h"
 #include "CudaAladinContentCreator.h"
+#include "CudaDefContentCreator.h"
 #include "CudaF3dContentCreator.h"
 
 class CudaContentCreatorFactory: public ContentCreatorFactory {
@@ -11,6 +12,8 @@ class CudaContentCreatorFactory: public ContentCreatorFactory {
         switch (conType) {
         case ContentType::Aladin:
             return new CudaAladinContentCreator();
+        case ContentType::Def:
+            return new CudaDefContentCreator();
         case ContentType::F3d:
             return new CudaF3dContentCreator();
         default:
diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp
new file mode 100644
index 00000000..a78b3447
--- /dev/null
+++ b/reg-lib/cuda/CudaDefContent.cpp
@@ -0,0 +1,65 @@
+#include "CudaDefContent.h"
+
+/* *************************************************************** */
+CudaDefContent::CudaDefContent(nifti_image *referenceIn,
+                               nifti_image *floatingIn,
+                               nifti_image *localWeightSimIn,
+                               int *referenceMaskIn,
+                               mat44 *transformationMatrixIn,
+                               size_t bytesIn):
+    DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
+    AllocateWarpedGradient();
+    AllocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+CudaDefContent::~CudaDefContent() {
+    DeallocateWarpedGradient();
+    DeallocateVoxelBasedMeasureGradient();
+}
+/* *************************************************************** */
+void CudaDefContent::AllocateWarpedGradient() {
+    cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim);
+}
+/* *************************************************************** */
+void CudaDefContent::DeallocateWarpedGradient() {
+    if (warpedGradientCuda != nullptr) {
+        cudaCommon_free(warpedGradientCuda);
+        warpedGradientCuda = nullptr;
+    }
+}
+/* *************************************************************** */
+void CudaDefContent::AllocateVoxelBasedMeasureGradient() {
+    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim);
+}
+/* *************************************************************** */
+void CudaDefContent::DeallocateVoxelBasedMeasureGradient() {
+    if (voxelBasedMeasureGradientCuda) {
+        cudaCommon_free(voxelBasedMeasureGradientCuda);
+        voxelBasedMeasureGradientCuda = nullptr;
+    }
+}
+/* *************************************************************** */
+nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() {
+    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
+    return voxelBasedMeasureGradient;
+}
+/* *************************************************************** */
+void CudaDefContent::UpdateVoxelBasedMeasureGradient() {
+    cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
+}
+/* *************************************************************** */
+nifti_image* CudaDefContent::GetWarpedGradient() {
+    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
+    return warpedGradient;
+}
+/* *************************************************************** */
+void CudaDefContent::UpdateWarpedGradient() {
+    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient);
+}
+/* *************************************************************** */
+void CudaDefContent::ZeroVoxelBasedMeasureGradient() {
+    cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4));
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaDefContent.h b/reg-lib/cuda/CudaDefContent.h
new file mode 100644
index 00000000..eb6372a8
--- /dev/null
+++ b/reg-lib/cuda/CudaDefContent.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "DefContent.h"
+#include "CudaContent.h"
+
+class CudaDefContent: public virtual DefContent, public virtual CudaContent {
+public:
+    CudaDefContent() = delete;
+    CudaDefContent(nifti_image *referenceIn,
+                   nifti_image *floatingIn,
+                   nifti_image *localWeightSimIn = nullptr,
+                   int *referenceMaskIn = nullptr,
+                   mat44 *transformationMatrixIn = nullptr,
+                   size_t bytesIn = sizeof(float));
+    virtual ~CudaDefContent();
+
+    // Getters
+    virtual nifti_image* GetVoxelBasedMeasureGradient() override;
+    virtual nifti_image* GetWarpedGradient() override;
+    virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
+    virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; }
+
+    // Methods for transferring data from nifti to device
+    virtual void UpdateVoxelBasedMeasureGradient() override;
+    virtual void UpdateWarpedGradient() override;
+
+    // Auxiliary methods
+    virtual void ZeroVoxelBasedMeasureGradient() override;
+
+protected:
+    float4 *voxelBasedMeasureGradientCuda = nullptr;
+    float4 *warpedGradientCuda = nullptr;
+
+private:
+    void AllocateWarpedGradient();
+    void DeallocateWarpedGradient();
+    void AllocateVoxelBasedMeasureGradient();
+    void DeallocateVoxelBasedMeasureGradient();
+};
diff --git a/reg-lib/cuda/CudaDefContentCreator.h b/reg-lib/cuda/CudaDefContentCreator.h
new file mode 100644
index 00000000..af3fb561
--- /dev/null
+++ b/reg-lib/cuda/CudaDefContentCreator.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "DefContentCreator.h"
+#include "CudaDefContent.h"
+
+class CudaDefContentCreator: public DefContentCreator {
+public:
+    virtual DefContent* Create(nifti_image *reference,
+                               nifti_image *floating,
+                               nifti_image *localWeightSim = nullptr,
+                               int *referenceMask = nullptr,
+                               mat44 *transformationMatrix = nullptr,
+                               size_t bytes = sizeof(float)) override {
+        return new CudaDefContent(reference, floating, localWeightSim, referenceMask, transformationMatrix, bytes);
+    }
+};
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index fc4deb3f..9e2f184f 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -9,20 +9,18 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
                                mat44 *transformationMatrixIn,
                                size_t bytesIn):
     F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    CudaDefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
+    DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
     AllocateControlPointGrid();
-    AllocateWarpedGradient();
     AllocateTransformationGradient();
-    AllocateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
 CudaF3dContent::~CudaF3dContent() {
     GetControlPointGrid();  // Transfer device data back to nifti
     DeallocateControlPointGrid();
-    DeallocateWarpedGradient();
     DeallocateTransformationGradient();
-    DeallocateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateControlPointGrid() {
@@ -37,17 +35,6 @@ void CudaF3dContent::DeallocateControlPointGrid() {
     }
 }
 /* *************************************************************** */
-void CudaF3dContent::AllocateWarpedGradient() {
-    cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim);
-}
-/* *************************************************************** */
-void CudaF3dContent::DeallocateWarpedGradient() {
-    if (warpedGradientCuda != nullptr) {
-        cudaCommon_free(warpedGradientCuda);
-        warpedGradientCuda = nullptr;
-    }
-}
-/* *************************************************************** */
 void CudaF3dContent::AllocateTransformationGradient() {
     cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim);
 }
@@ -59,17 +46,6 @@ void CudaF3dContent::DeallocateTransformationGradient() {
     }
 }
 /* *************************************************************** */
-void CudaF3dContent::AllocateVoxelBasedMeasureGradient() {
-    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim);
-}
-/* *************************************************************** */
-void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() {
-    if (voxelBasedMeasureGradientCuda) {
-        cudaCommon_free(voxelBasedMeasureGradientCuda);
-        voxelBasedMeasureGradientCuda = nullptr;
-    }
-}
-/* *************************************************************** */
 nifti_image* CudaF3dContent::GetControlPointGrid() {
     cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda);
     return controlPointGrid;
@@ -88,29 +64,7 @@ void CudaF3dContent::UpdateTransformationGradient() {
     cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient);
 }
 /* *************************************************************** */
-nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() {
-    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
-    return voxelBasedMeasureGradient;
-}
-/* *************************************************************** */
-void CudaF3dContent::UpdateVoxelBasedMeasureGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
-}
-/* *************************************************************** */
-nifti_image* CudaF3dContent::GetWarpedGradient() {
-    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
-    return warpedGradient;
-}
-/* *************************************************************** */
-void CudaF3dContent::UpdateWarpedGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient);
-}
-/* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
     cudaMemset(transformationGradientCuda, 0, NiftiImage::calcVoxelNumber(transformationGradient, 3) * sizeof(float4));
 }
 /* *************************************************************** */
-void CudaF3dContent::ZeroVoxelBasedMeasureGradient() {
-    cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4));
-}
-/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index 0b6dc363..ca085945 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -1,9 +1,9 @@
 #pragma once
 
 #include "F3dContent.h"
-#include "CudaContent.h"
+#include "CudaDefContent.h"
 
-class CudaF3dContent: public F3dContent, public CudaContent {
+class CudaF3dContent: public F3dContent, public CudaDefContent {
 public:
     CudaF3dContent() = delete;
     CudaF3dContent(nifti_image *referenceIn,
@@ -18,36 +18,23 @@ class CudaF3dContent: public F3dContent, public CudaContent {
     // Getters
     virtual nifti_image* GetControlPointGrid() override;
     virtual nifti_image* GetTransformationGradient() override;
-    virtual nifti_image* GetVoxelBasedMeasureGradient() override;
-    virtual nifti_image* GetWarpedGradient() override;
     virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; }
     virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; }
-    virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
-    virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateControlPointGrid() override;
     virtual void UpdateTransformationGradient() override;
-    virtual void UpdateVoxelBasedMeasureGradient() override;
-    virtual void UpdateWarpedGradient() override;
 
     // Auxiliary methods
     virtual void ZeroTransformationGradient() override;
-    virtual void ZeroVoxelBasedMeasureGradient() override;
 
 protected:
     float4 *controlPointGridCuda = nullptr;
     float4 *transformationGradientCuda = nullptr;
-    float4 *voxelBasedMeasureGradientCuda = nullptr;
-    float4 *warpedGradientCuda = nullptr;
 
 private:
     void AllocateControlPointGrid();
     void DeallocateControlPointGrid();
-    void AllocateWarpedGradient();
-    void DeallocateWarpedGradient();
     void AllocateTransformationGradient();
     void DeallocateTransformationGradient();
-    void AllocateVoxelBasedMeasureGradient();
-    void DeallocateVoxelBasedMeasureGradient();
 };
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index f94a06d1..f6c973c3 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -1,5 +1,5 @@
 #include "CudaMeasure.h"
-#include "CudaF3dContent.h"
+#include "CudaDefContent.h"
 #include "_reg_nmi_gpu.h"
 #include "_reg_ssd_gpu.h"
 
@@ -19,20 +19,20 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
     case MeasureType::Mind:
         reg_print_msg_error("MIND measure type isn't implemented for GPU");
         reg_exit();
-    case MeasureType::Mindssc:
+    case MeasureType::MindSsc:
         reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU");
         reg_exit();
     }
+    reg_print_fct_error("CudaMeasure::Create");
     reg_print_msg_error("Unsupported measure type");
     reg_exit();
     return nullptr;
 }
 /* *************************************************************** */
-void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) {
-    // TODO Implement symmetric scheme for CUDA measure types
+void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
     reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
-    CudaF3dContent& cudaCon = dynamic_cast<CudaF3dContent&>(con);
-    CudaF3dContent *cudaConBw = dynamic_cast<CudaF3dContent*>(conBw);
+    CudaDefContent& cudaCon = dynamic_cast<CudaDefContent&>(con);
+    CudaDefContent *cudaConBw = dynamic_cast<CudaDefContent*>(conBw);
     measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(),
                                  cudaCon.GetReferenceCuda(),
                                  cudaCon.Content::GetFloating(),
@@ -42,18 +42,18 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *
                                  cudaCon.GetActiveVoxelNumber(),
                                  cudaCon.Content::GetWarped(),
                                  cudaCon.GetWarpedCuda(),
-                                 cudaCon.F3dContent::GetWarpedGradient(),
+                                 cudaCon.DefContent::GetWarpedGradient(),
                                  cudaCon.GetWarpedGradientCuda(),
-                                 cudaCon.F3dContent::GetVoxelBasedMeasureGradient(),
+                                 cudaCon.DefContent::GetVoxelBasedMeasureGradient(),
                                  cudaCon.GetVoxelBasedMeasureGradientCuda(),
-                                 cudaCon.F3dContent::GetLocalWeightSim(),
+                                 cudaCon.DefContent::GetLocalWeightSim(),
                                  cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr,
                                  cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr,
                                  cudaConBw ? cudaConBw->Content::GetWarped() : nullptr,
                                  cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr,
-                                 cudaConBw ? cudaConBw->F3dContent::GetWarpedGradient() : nullptr,
+                                 cudaConBw ? cudaConBw->DefContent::GetWarpedGradient() : nullptr,
                                  cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr,
-                                 cudaConBw ? cudaConBw->F3dContent::GetVoxelBasedMeasureGradient() : nullptr,
+                                 cudaConBw ? cudaConBw->DefContent::GetVoxelBasedMeasureGradient() : nullptr,
                                  cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
index 76fb9983..928f4fc4 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -5,5 +5,5 @@
 class CudaMeasure: public Measure {
 public:
     virtual reg_measure* Create(const MeasureType& measureType) override;
-    virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override;
+    virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override;
 };
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index a816daee..09ab1f96 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -13,7 +13,7 @@
 
 
 typedef std::tuple<std::string, NiftiImage, NiftiImage, int, float*> TestData;
-typedef std::tuple<unique_ptr<F3dContent>, unique_ptr<Platform>> ContentDesc;
+typedef std::tuple<unique_ptr<DefContent>, unique_ptr<Platform>> ContentDesc;
 
 TEST_CASE("Image gradient", "[ImageGradient]") {
     // Create a reference 2D image
@@ -157,17 +157,14 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
     for (auto&& testCase : testCases) {
         // Retrieve test information
         auto&& [testName, reference, defField, interp, testResult] = testCase;
-        // Create the control point grid
-        NiftiImage controlPointGrid(CreateControlPointGrid(reference));
-
         // Accumulate all required contents with a vector
         vector<ContentDesc> contentDescs;
         for (auto&& platformType : PlatformTypes) {
             if (platformType == PlatformType::Cuda && interp != 1)
                 continue;   // CUDA platform only supports linear interpolation
             unique_ptr<Platform> platform{ new Platform(platformType) };
-            unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-            unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+            unique_ptr<DefContentCreator> contentCreator{ dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def)) };
+            unique_ptr<DefContent> content{ contentCreator->Create(reference, reference) };
             contentDescs.push_back({ std::move(content), std::move(platform) });
         }
 
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 4f0118c4..859bb2c8 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -31,10 +31,6 @@ class LNCCTest {
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
-        // Create corresponding identify control point grids
-        NiftiImage cpp2d(CreateControlPointGrid(reference2d));
-        NiftiImage cpp3d(CreateControlPointGrid(reference3d));
-
         // Fill images with random values
         auto ref2dPtr = reference2d.data();
         auto flo2dPtr = floating2d.data();
@@ -57,7 +53,6 @@ class LNCCTest {
             "LNCC 2D -1",
             reference2d,
             floating2d,
-            cpp2d,
             -1.f,
             GetLNCCNoConv(1, reference2d, floating2d)
         ));
@@ -65,7 +60,6 @@ class LNCCTest {
             "LNCC 2D -1 same image",
             reference2d,
             reference2d,
-            cpp2d,
             -1.f,
             1.0
         ));
@@ -73,7 +67,6 @@ class LNCCTest {
             "LNCC 2D -5",
             reference2d,
             floating2d,
-            cpp2d,
             -5.f,
             GetLNCCNoConv(5, reference2d, floating2d)
         ));
@@ -81,7 +74,6 @@ class LNCCTest {
             "LNCC 2D -5 same image",
             reference2d,
             reference2d,
-            cpp2d,
             -5.f,
             1.0
         ));
@@ -90,7 +82,6 @@ class LNCCTest {
             "LNCC 2D -1 same image negated",
             reference2d,
             floating2d,
-            cpp2d,
             -1.f,
             1.0
         ));
@@ -98,7 +89,6 @@ class LNCCTest {
             "LNCC 2D -5 same image negated",
             reference2d,
             floating2d,
-            cpp2d,
             -5.f,
             1.0
         ));
@@ -106,7 +96,6 @@ class LNCCTest {
             "LNCC 3D -1",
             reference3d,
             floating3d,
-            cpp3d,
             -1.f,
             GetLNCCNoConv(1, reference3d, floating3d)
         ));
@@ -114,7 +103,6 @@ class LNCCTest {
             "LNCC 3D -1 same image",
             reference3d,
             reference3d,
-            cpp3d,
             -1.f,
             1.0
         ));
@@ -122,7 +110,6 @@ class LNCCTest {
             "LNCC 3D -5",
             reference3d,
             floating3d,
-            cpp3d,
             -5.f,
             GetLNCCNoConv(5, reference3d, floating3d)
         ));
@@ -130,7 +117,6 @@ class LNCCTest {
             "LNCC 3D -5 same image",
             reference3d,
             reference3d,
-            cpp3d,
             -5.f,
             1.0
         ));
@@ -139,7 +125,6 @@ class LNCCTest {
             "LNCC 3D -1 same image negated",
             reference3d,
             floating3d,
-            cpp3d,
             -1.f,
             1.0
         ));
@@ -147,7 +132,6 @@ class LNCCTest {
             "LNCC 3D -5 same image negated",
             reference3d,
             floating3d,
-            cpp3d,
             -5.f,
             1.0
         ));
@@ -157,13 +141,13 @@ class LNCCTest {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
                 auto td = data;
-                auto&& [testName, reference, floating, cpp, sigma, result] = td;
+                auto&& [testName, reference, floating, sigma, result] = td;
                 // Create the content creator
-                unique_ptr<F3dContentCreator> contentCreator{
-                    dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d))
+                unique_ptr<DefContentCreator> contentCreator{
+                    dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
                 };
                 // Create the content
-                unique_ptr<F3dContent> content{ contentCreator->Create(reference, floating, cpp) };
+                unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Initialise the warped image using the nearest-neighbour interpolation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(0, 0);
@@ -189,7 +173,7 @@ class LNCCTest {
     };
 
     using LocalStats = std::tuple<double, double>;
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, float, double>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, float, double>;
     using TestCase = std::tuple<unique_ptr<Content>, unique_ptr<reg_lncc>, shared_ptr<Platform>, TestData>;
     inline static vector<TestCase> testCases;
 
@@ -312,7 +296,7 @@ TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") {
     for (auto&& testCase : testCases) {
         // Retrieve test information
         auto&& [content, measure, platform, testData] = testCase;
-        auto&& [testName, reference, floating, cpp, sigma, value] = testData;
+        auto&& [testName, reference, floating, sigma, value] = testData;
 
         SECTION(testName) {
             std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 7c6e1184..c18bdb94 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -54,24 +54,18 @@ class NMITest {
             flo3dPtr[i] = (int)distr(gen);
         }
 
-        // Create corresponding identify control point grids
-        NiftiImage cpp2d(CreateControlPointGrid(reference2d));
-        NiftiImage cpp3d(CreateControlPointGrid(reference3d));
-
         // Create the object to compute the expected values
         vector<TestData> testData;
         testData.emplace_back(TestData(
             "NMI 2D",
             reference2d,
             floating2d,
-            cpp2d,
             GetNMIPW(reference2d, floating2d)
         ));
         testData.emplace_back(TestData(
             "NMI 3D",
             reference3d,
             floating3d,
-            cpp3d,
             GetNMIPW(reference3d, floating3d)
         ));
         for (auto&& data : testData) {
@@ -80,13 +74,13 @@ class NMITest {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
                 auto td = data;
-                auto&& [testName, reference, floating, cpp, expected] = td;
+                auto&& [testName, reference, floating, expected] = td;
                 // Create the content creator
-                unique_ptr<F3dContentCreator> contentCreator{
-                    dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d))
+                unique_ptr<DefContentCreator> contentCreator{
+                    dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
                 };
                 // Create the content
-                unique_ptr<F3dContent> content{ contentCreator->Create(reference, floating, cpp) };
+                unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Initialise the warped image using floating image
                 content->SetWarped(floating.disown());
                 // Create the measure
@@ -103,7 +97,7 @@ class NMITest {
     }
 
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, double>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, double>;
     using TestCase = std::tuple<std::string, double, double>;
     inline static vector<TestCase> testCases;
 

From 4bbc8790216c024dd7ca10fcf1925bc936d8f82c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 23 Aug 2023 14:50:59 +0100
Subject: [PATCH 181/314] Add Debug header to handle errors and logging #92

 - Handle errors and throw exception instead of exiting the app
 - Print errors, warnings and infos with NR_ERROR, NR_WARN and NR_INFO macros respectively
 - Print verbose messages with NR_VERBOSE
 - Print function names with NR_FUNC_CALLED() macro
 - Convert all *printf to NR_COUT or NR_CERR
 - Get rid of NDEBUGs and use NR_DEBUG instead
 - Replace C-style string manipulation with C++-style
 - Ditch reg_stringFormat()
---
 CMakeLists.txt                                |   7 -
 cmake/NIFTYREGConfig.cmake.in                 |   7 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_aladin.cpp                       | 244 ++++-----
 reg-apps/reg_average.cpp                      | 136 ++---
 reg-apps/reg_benchmark.cpp                    |   2 +-
 reg-apps/reg_f3d.cpp                          | 361 ++++++-------
 reg-apps/reg_gpuinfo.cpp                      |   5 +-
 reg-apps/reg_jacobian.cpp                     |  66 ++-
 reg-apps/reg_measure.cpp                      |  71 ++-
 reg-apps/reg_ppcnr.cpp                        | 418 ++++++---------
 reg-apps/reg_resample.cpp                     | 100 ++--
 reg-apps/reg_tools.cpp                        | 115 ++--
 reg-apps/reg_transform.cpp                    | 446 ++++++++--------
 reg-io/CMakeLists.txt                         |   2 +-
 reg-io/RNifti/NiftiImage.h                    |  20 +-
 reg-io/RNifti/NiftiImage_impl.h               |  41 +-
 reg-io/RNifti/NiftiImage_print.h              |   6 +
 reg-io/_reg_ReadWriteImage.cpp                |  40 +-
 reg-io/_reg_ReadWriteImage.h                  |   2 +-
 reg-io/_reg_ReadWriteMatrix.cpp               |  93 +---
 reg-io/_reg_ReadWriteMatrix.h                 |   5 +-
 reg-io/_reg_stringFormat.cpp                  |  43 --
 reg-io/_reg_stringFormat.h                    |  11 -
 reg-io/nrrd/reg_nrrd.cpp                      |  66 +--
 reg-io/nrrd/reg_nrrd.h                        |   3 -
 reg-io/png/reg_png.cpp                        |  82 +--
 reg-io/png/reg_png.h                          |   1 -
 reg-io/zlib/zutil.c                           |   2 +-
 reg-lib/Content.cpp                           |  14 +-
 reg-lib/ConvolutionKernel.h                   |   2 +-
 reg-lib/Debug.hpp                             |  81 +++
 reg-lib/F3dContent.cpp                        |   7 +-
 reg-lib/Measure.cpp                           |   7 +-
 reg-lib/Platform.cpp                          |  13 +-
 reg-lib/ResampleImageKernel.h                 |   2 +-
 reg-lib/_reg_aladin.cpp                       | 191 ++-----
 reg-lib/_reg_aladin.h                         |   6 +-
 reg-lib/_reg_aladin_sym.cpp                   |  86 ++-
 reg-lib/_reg_base.cpp                         | 374 ++++---------
 reg-lib/_reg_base.h                           |   1 -
 reg-lib/_reg_f3d.cpp                          | 493 ++++++------------
 reg-lib/_reg_f3d2.cpp                         | 304 +++--------
 reg-lib/_reg_polyAffine.cpp                   |   9 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |   8 +-
 reg-lib/cl/ClAladinContent.cpp                |  22 +-
 reg-lib/cl/ClBlockMatchingKernel.cpp          |  21 +-
 reg-lib/cl/ClContentCreatorFactory.h          |   5 +-
 reg-lib/cl/ClContextSingleton.cpp             | 153 +++---
 reg-lib/cl/ClContextSingleton.h               |   4 +-
 reg-lib/cl/ClResampleImageKernel.cpp          |  31 +-
 reg-lib/cl/InfoDevice.h                       |  24 +-
 reg-lib/cl/_reg_openclinfo.cpp                |  15 +-
 reg-lib/cl/_reg_openclinfo.h                  |   5 +-
 reg-lib/cpu/CpuBlockMatchingKernel.h          |   2 -
 reg-lib/cpu/CpuLtsKernel.h                    |   2 -
 reg-lib/cpu/_reg_blockMatching.cpp            |  65 +--
 reg-lib/cpu/_reg_discrete_init.cpp            |  51 +-
 reg-lib/cpu/_reg_dti.cpp                      |  30 +-
 reg-lib/cpu/_reg_femTrans.cpp                 |   1 -
 reg-lib/cpu/_reg_femTrans.h                   |  10 +-
 reg-lib/cpu/_reg_globalTrans.cpp              |  16 +-
 reg-lib/cpu/_reg_globalTrans.h                |   2 +-
 reg-lib/cpu/_reg_kld.cpp                      |  34 +-
 reg-lib/cpu/_reg_lncc.cpp                     |  17 +-
 reg-lib/cpu/_reg_localTrans.cpp               | 203 ++------
 reg-lib/cpu/_reg_localTrans_jac.cpp           | 179 ++-----
 reg-lib/cpu/_reg_localTrans_regul.cpp         | 170 ++----
 reg-lib/cpu/_reg_maths.cpp                    |  86 ++-
 reg-lib/cpu/_reg_maths.h                      |  71 +--
 reg-lib/cpu/_reg_maths_eigen.cpp              |  26 +-
 reg-lib/cpu/_reg_maths_eigen.h                |   2 +-
 reg-lib/cpu/_reg_measure.h                    |  82 +--
 reg-lib/cpu/_reg_mind.cpp                     |  56 +-
 reg-lib/cpu/_reg_mrf.cpp                      |  27 +-
 reg-lib/cpu/_reg_nmi.cpp                      |  31 +-
 reg-lib/cpu/_reg_optimiser.cpp                |  68 +--
 reg-lib/cpu/_reg_polyAffine.cpp               |   9 +-
 reg-lib/cpu/_reg_resampling.cpp               | 246 +++------
 reg-lib/cpu/_reg_resampling.h                 |   2 +-
 reg-lib/cpu/_reg_ssd.cpp                      |  32 +-
 reg-lib/cpu/_reg_thinPlateSpline.cpp          |  45 +-
 reg-lib/cpu/_reg_tools.cpp                    | 371 +++++--------
 reg-lib/cpu/_reg_tools.h                      |   4 +
 reg-lib/cuda/BlockSize.hpp                    |  10 +-
 reg-lib/cuda/CudaAladinContent.cpp            |   9 +-
 reg-lib/cuda/CudaContent.cpp                  |   4 +-
 reg-lib/cuda/CudaContext.cpp                  |  49 +-
 reg-lib/cuda/CudaContext.hpp                  |   2 +-
 reg-lib/cuda/CudaLtsKernel.cpp                |   6 +-
 reg-lib/cuda/CudaMeasure.cpp                  |  13 +-
 reg-lib/cuda/CudaNormaliseGradient.hpp        |   2 +-
 reg-lib/cuda/CudaResampleImageKernel.cpp      |  14 +-
 reg-lib/cuda/_reg_common_cuda.cu              |  99 +---
 reg-lib/cuda/_reg_common_cuda.h               |  38 +-
 reg-lib/cuda/_reg_cudainfo.cpp                |  61 +--
 reg-lib/cuda/_reg_cudainfo.h                  |   2 +-
 .../cuda/_reg_localTransformation_kernels.cu  |   2 +-
 reg-lib/cuda/_reg_measure_gpu.h               |  30 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  26 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  52 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  19 +-
 reg-lib/cuda/affineDeformationKernel.cu       |   7 +-
 reg-lib/cuda/affineDeformationKernel.h        |   5 +-
 reg-lib/cuda/blockMatchingKernel.cu           |   6 +-
 reg-lib/cuda/checkCudaCard.cpp                |  25 +-
 reg-lib/cuda/optimizeKernel.cu                |  46 +-
 reg-lib/cuda/optimizeKernel.h                 |   2 +-
 reg-lib/cuda/resampleKernel.cu                |  61 +--
 reg-lib/cuda/resampleKernel.h                 |   3 +-
 reg-test/reg_test_be.cpp                      |   4 +-
 reg-test/reg_test_blockMatching.cpp           |   6 +-
 reg-test/reg_test_conjugateGradient.cpp       |  10 +-
 reg-test/reg_test_getDeformationField.cpp     |   6 +-
 reg-test/reg_test_imageGradient.cpp           |   2 +-
 reg-test/reg_test_interpolation.cpp           |   2 +-
 reg-test/reg_test_lncc.cpp                    |   4 +-
 reg-test/reg_test_nmi.cpp                     |   4 +-
 reg-test/reg_test_normaliseGradient.cpp       |   4 +-
 reg-test/reg_test_regr_blockMatching.cpp      |  12 +-
 reg-test/reg_test_regr_lts.cpp                |   4 +-
 reg-test/reg_test_regr_nmi.cpp                |   8 +-
 .../reg_test_voxelCentricToNodeCentric.cpp    |   4 +-
 123 files changed, 2388 insertions(+), 4490 deletions(-)
 delete mode 100644 reg-io/_reg_stringFormat.cpp
 delete mode 100644 reg-io/_reg_stringFormat.h
 create mode 100644 reg-lib/Debug.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e872c48..67368df2 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -73,9 +73,6 @@ option(USE_OPENCL "To use the OpenCL platform" OFF)
 option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
 option(USE_SSE "To enable SEE computation in some case" ON)
 #-----------------------------------------------------------------------------
-option(USE_THROW_EXCEP "To throw exception rather than exit" OFF)
-mark_as_advanced(USE_THROW_EXCEP)
-#-----------------------------------------------------------------------------
 option(USE_NRRD "To use the NRRD file format" OFF)
 mark_as_advanced(USE_NRRD)
 #-----------------------------------------------------------------------------
@@ -207,10 +204,6 @@ else(BUILD_SHARED_LIBS)
   set(NIFTYREG_LIBRARY_TYPE STATIC)
 endif(BUILD_SHARED_LIBS)
 #-----------------------------------------------------------------------------
-if(USE_THROW_EXCEP)
-  add_definitions(-DNR_THROW_EXCEP)
-endif(USE_THROW_EXCEP)
-#-----------------------------------------------------------------------------
 add_subdirectory(third-party)
 add_subdirectory(reg-io)
 add_subdirectory(reg-lib)
diff --git a/cmake/NIFTYREGConfig.cmake.in b/cmake/NIFTYREGConfig.cmake.in
index 3decd74e..f41ef5ee 100644
--- a/cmake/NIFTYREGConfig.cmake.in
+++ b/cmake/NIFTYREGConfig.cmake.in
@@ -16,7 +16,7 @@
 #  limitations under the License.
 
 # This file sets NIFTYREG_INCLUDE_DIRS, NIFTYREG_LIBRARY_DIRS and NIFTYREG_LIBRARIES.
-@PACKAGE_INIT@ 
+@PACKAGE_INIT@
 
 # add folder where this file resides to the cmake path such that it can use our find_package modules and .cmake files
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR};${CMAKE_MODULE_PATH}")
@@ -55,9 +55,4 @@ endif()
 if (@USE_SSE@)
   set(NIFTYREG_BUILT_WITH_SSE TRUE)
   mark_as_advanced(NIFTYREG_BUILT_WITH_SSE)
-endif()
-# THROW_EXCEP
-if (@USE_THROW_EXCEP@)
-  set(NIFTYREG_BUILT_WITH_THROW_EXCEP TRUE)
-  mark_as_advanced(NIFTYREG_BUILT_WITH_THROW_EXCEP)
 endif()
\ No newline at end of file
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 697cb3a2..d8fc48a4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-300
+301
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 26413b68..cb5f4162 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -24,64 +24,60 @@
 using PrecisionType = float;
 
 void PetitUsage(char *exec) {
-    char text[255];
-    reg_print_msg_error("");
-    reg_print_msg_error("reg_aladin");
-    sprintf(text, "Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]", exec);
-    reg_print_msg_error(text);
-    reg_print_msg_error("\tSee the help for more details (-h).");
-    reg_print_msg_error("");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("reg_aladin");
+    NR_INFO("Usage:\t" << exec << " -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]");
+    NR_INFO("\tSee the help for more details (-h).");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 void Usage(char *exec) {
-    char text[255];
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    reg_print_info(exec, "Block Matching algorithm for global registration.");
-    reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\"");
-    reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003");
-    reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].", exec);
-    reg_print_info(exec, text);
-    reg_print_info(exec, "\t-ref <filename>\tReference image filename (also called Target or Fixed) (mandatory)");
-    reg_print_info(exec, "\t-flo <filename>\tFloating image filename (also called Source or moving) (mandatory)");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "* * OPTIONS * *");
-    reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it.");
-    reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)");
-    reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)");
-
-    reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains the output affine transformation. [outputAffine.txt]");
-    reg_print_info(exec, "\t-inaff <filename>\tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]");
-
-    reg_print_info(exec, "\t-rmask <filename>\tFilename of a mask image in the reference space.");
-    reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space. (Only used when symmetric turned on)");
-    reg_print_info(exec, "\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii.gz]");
-
-    reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]");
-    reg_print_info(exec, "\t-ln <int>\t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]");
-    reg_print_info(exec, "\t-lp <int>\t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]");
-
-    reg_print_info(exec, "\t-smooR <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]");
-    reg_print_info(exec, "\t-smooF <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]");
-    reg_print_info(exec, "\t-refLowThr <float>\tLower threshold value applied to the reference image. [0]");
-    reg_print_info(exec, "\t-refUpThr <float>\tUpper threshold value applied to the reference image. [0]");
-    reg_print_info(exec, "\t-floLowThr <float>\tLower threshold value applied to the floating image. [0]");
-    reg_print_info(exec, "\t-floUpThr <float>\tUpper threshold value applied to the floating image. [0]");
-    reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
-
-    reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)");
-    reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)");
-    reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)");
-    reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image.");
-    reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required.");
-
-    reg_print_info(exec, "\t-pv <int>\t\tPercentage of blocks to use in the optimisation scheme. [50]");
-    reg_print_info(exec, "\t-pi <int>\t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]");
-    reg_print_info(exec, "\t-speeeeed\t\tGo faster");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Block Matching algorithm for global registration.");
+    NR_INFO("Based on Modat et al., \"Global image registration using a symmetric block-matching approach\"");
+    NR_INFO("J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003");
+    NR_INFO("For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Usage:\t" << exec << " -ref <filename> -flo <filename> [OPTIONS]");
+    NR_INFO("\t-ref <filename>\tReference image filename (also called Target or Fixed) (mandatory)");
+    NR_INFO("\t-flo <filename>\tFloating image filename (also called Source or Moving) (mandatory)");
+    NR_INFO("");
+    NR_INFO("* * OPTIONS * *");
+    NR_INFO("\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it.");
+    NR_INFO("\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)");
+    NR_INFO("\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)");
+
+    NR_INFO("\t-aff <filename>\t\tFilename which contains the output affine transformation. [outputAffine.txt]");
+    NR_INFO("\t-inaff <filename>\tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]");
+
+    NR_INFO("\t-rmask <filename>\tFilename of a mask image in the reference space.");
+    NR_INFO("\t-fmask <filename>\tFilename of a mask image in the floating space. (Only used when symmetric turned on)");
+    NR_INFO("\t-res <filename>\t\tFilename of the resampled image. [outputResult.nii.gz]");
+
+    NR_INFO("\t-maxit <int>\t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]");
+    NR_INFO("\t-ln <int>\t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]");
+    NR_INFO("\t-lp <int>\t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]");
+
+    NR_INFO("\t-smooR <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]");
+    NR_INFO("\t-smooF <float>\t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]");
+    NR_INFO("\t-refLowThr <float>\tLower threshold value applied to the reference image. [0]");
+    NR_INFO("\t-refUpThr <float>\tUpper threshold value applied to the reference image. [0]");
+    NR_INFO("\t-floLowThr <float>\tLower threshold value applied to the floating image. [0]");
+    NR_INFO("\t-floUpThr <float>\tUpper threshold value applied to the floating image. [0]");
+    NR_INFO("\t-pad <float>\t\tPadding value [nan]");
+
+    NR_INFO("\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)");
+    NR_INFO("\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)");
+    NR_INFO("\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)");
+    NR_INFO("\t-interp\t\t\tInterpolation order to use internally to warp the floating image.");
+    NR_INFO("\t-iso\t\t\tMake floating and reference images isotropic if required.");
+
+    NR_INFO("\t-pv <int>\t\tPercentage of blocks to use in the optimisation scheme. [50]");
+    NR_INFO("\t-pi <int>\t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]");
+    NR_INFO("\t-speeeeed\t\tGo faster");
 
     if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) {
-        reg_print_info(exec, "*** Platform options:");
+        NR_INFO("*** Platform options:");
         std::string platform = "\t-platf <uint>\t\tChoose platform: CPU=0 | ";
         if (Platform::IsCudaEnabled()) {
             platform += "Cuda=1";
@@ -91,38 +87,32 @@ void Usage(char *exec) {
         if (Platform::IsOpenClEnabled())
             platform += "OpenCL=2";
         platform += " [0]";
-        reg_print_info(exec, platform.c_str());
+        NR_INFO(platform);
 
-        reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
-        reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
+        NR_INFO("\t-gpuid <uint>\t\tChoose a custom gpu.");
+        NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
     }
 
-    //   reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg");
+    //   NR_INFO("\t-crv\t\t\tChoose custom capture range for the block matching alg");
 #ifdef _OPENMP
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
         defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
-    sprintf(text, "\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
-            defaultOpenMPValue, omp_get_num_procs());
-    reg_print_info(exec, text);
+    NR_INFO("\t-omp <int>\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-    reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "\t--version\t\tPrint current version and exit");
-    sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
-    reg_print_info(exec, text);
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("\t-voff\t\t\tTurns verbose off [on]");
+    NR_INFO("");
+    NR_INFO("\t--version\t\tPrint current version and exit");
+    NR_INFO("\t\t\t\t(" << NR_VERSION << ")");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv) {
     if (argc == 1) {
-        //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS !
         PetitUsage(argv[0]);
         return EXIT_FAILURE;
     }
 
-    char text[2048];
-
     time_t start;
     time(&start);
 
@@ -176,7 +166,7 @@ int main(int argc, char **argv) {
     unsigned gpuIdx = 999;
 
 #ifdef _OPENMP
-    // Set the default number of thread
+    // Set the default number of threads
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
         defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
@@ -191,7 +181,7 @@ int main(int argc, char **argv) {
             Usage(argv[0]);
             return EXIT_SUCCESS;
         } else if (strcmp(argv[i], "--xml") == 0) {
-            printf("%s", xml_aladin);
+            NR_COUT << xml_aladin;
             return EXIT_SUCCESS;
         }
         if (strcmp(argv[i], "-version") == 0 ||
@@ -200,7 +190,7 @@ int main(int argc, char **argv) {
             strcmp(argv[i], "-v") == 0 ||
             strcmp(argv[i], "--v") == 0 ||
             strcmp(argv[i], "--version") == 0) {
-            printf("%s\n", NR_VERSION);
+            NR_COUT << NR_VERSION << std::endl;
             return EXIT_SUCCESS;
         } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || strcmp(argv[i], "--ref") == 0) {
             referenceImageName = argv[++i];
@@ -256,15 +246,15 @@ int main(int argc, char **argv) {
             alignCentreOfMass = 2;
         } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) {
             int value = atoi(argv[++i]);
-            if (value < 1 || value>100) {
-                reg_print_msg_error("The variance argument is expected to be an integer between 1 and 100");
+            if (value < 1 || value > 100) {
+                NR_ERROR("The variance argument is expected to be an integer between 1 and 100");
                 return EXIT_FAILURE;
             }
             blockPercentage = value;
         } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) {
             int value = atoi(argv[++i]);
-            if (value < 1 || value>100) {
-                reg_print_msg_error("The inlier argument is expected to be an integer between 1 and 100");
+            if (value < 1 || value > 100) {
+                NR_ERROR("The inlier argument is expected to be an integer between 1 and 100");
                 return EXIT_FAILURE;
             }
             inlierLts = value;
@@ -287,21 +277,24 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) {
             iso = true;
         } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) {
+            NR_DEBUG("The verbose cannot be switch off in debug");
+#ifdef NDEBUG
             verbose = false;
+#endif
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
             PlatformType value{ atoi(argv[++i]) };
             if (value < PlatformType::Cpu || value > PlatformType::OpenCl) {
-                reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
+                NR_ERROR("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL");
                 return EXIT_FAILURE;
             }
             if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) {
-                reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
-                reg_print_msg_warn("The CPU platform is used");
+                NR_WARN("The current install of NiftyReg has not been compiled with CUDA");
+                NR_WARN("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
             if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) {
-                reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
-                reg_print_msg_warn("The CPU platform is used");
+                NR_WARN("The current install of NiftyReg has not been compiled with OpenCL");
+                NR_WARN("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
             platformType = value;
@@ -313,67 +306,50 @@ int main(int argc, char **argv) {
 #ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
-            reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
             ++i;
 #endif
         } else {
-
-            sprintf(text, "Err:\tParameter %s unknown.", argv[i]);
-            reg_print_msg_error(text);
+            NR_ERROR("\tParameter " << argv[i] << " unknown!");
             PetitUsage(argv[0]);
             return EXIT_FAILURE;
         }
     }
 
     if (!referenceImageFlag || !floatingImageFlag) {
-        sprintf(text, "Err:\tThe reference and the floating image have to be defined.");
-        reg_print_msg_error(text);
+        NR_ERROR("The reference and the floating image have to be defined!");
         PetitUsage(argv[0]);
         return EXIT_FAILURE;
     }
 
     // Output the command line
-#ifdef NDEBUG
-    if (verbose) {
-#endif
-        reg_print_info((argv[0]), "");
-        reg_print_info((argv[0]), "Command line:");
-        sprintf(text, "\t");
-        for (int i = 0; i < argc; i++)
-            sprintf(text + strlen(text), " %s", argv[i]);
-        reg_print_info((argv[0]), text);
-        reg_print_info((argv[0]), "");
-#ifdef NDEBUG
-    }
-#endif
+    PrintCmdLine(argc, argv, verbose);
 
     unique_ptr<reg_aladin<PrecisionType>> reg;
     if (symFlag) {
         reg.reset(new reg_aladin_sym<PrecisionType>);
         if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) {
-            reg_print_msg_warn("You have one image mask option turned on but not the other.");
-            reg_print_msg_warn("This will affect the degree of symmetry achieved.");
+            NR_WARN("You have one image mask option turned on but not the other.");
+            NR_WARN("This will affect the degree of symmetry achieved.");
         }
     } else {
         reg.reset(new reg_aladin<PrecisionType>);
         if (floatingMaskFlag) {
-            reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option");
+            NR_WARN("Note: Floating mask flag only used in symmetric method. Ignoring this option");
         }
     }
 
     /* Read the reference image and check its dimension */
     NiftiImage referenceHeader = reg_io_ReadImageFile(referenceImageName);
     if (!referenceHeader) {
-        sprintf(text, "Error when reading the reference image: %s", referenceImageName);
-        reg_print_msg_error(text);
+        NR_ERROR("Error when reading the reference image: " << referenceImageName);
         return EXIT_FAILURE;
     }
 
     /* Read the floating image and check its dimension */
     NiftiImage floatingHeader = reg_io_ReadImageFile(floatingImageName);
     if (!floatingHeader) {
-        sprintf(text, "Error when reading the floating image: %s", floatingImageName);
-        reg_print_msg_error(text);
+        NR_ERROR("Error when reading the floating image: " << floatingImageName);
         return EXIT_FAILURE;
     }
 
@@ -386,14 +362,13 @@ int main(int argc, char **argv) {
     if (referenceMaskFlag) {
         NiftiImage referenceMaskImage = reg_io_ReadImageFile(referenceMaskName);
         if (!referenceMaskImage) {
-            sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName);
-            reg_print_msg_error(text);
+            NR_ERROR("Error when reading the reference mask image: " << referenceMaskName);
             return EXIT_FAILURE;
         }
         /* check the dimension */
         for (int i = 1; i <= referenceHeader->dim[0]; i++) {
             if (referenceHeader->dim[i] != referenceMaskImage->dim[i]) {
-                reg_print_msg_error("The reference image and its mask do not have the same dimension");
+                NR_ERROR("The reference image and its mask do not have the same dimension");
                 return EXIT_FAILURE;
             }
         }
@@ -404,14 +379,13 @@ int main(int argc, char **argv) {
     if (floatingMaskFlag && symFlag) {
         NiftiImage floatingMaskImage = reg_io_ReadImageFile(floatingMaskName);
         if (!floatingMaskImage) {
-            sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName);
-            reg_print_msg_error(text);
+            NR_ERROR("Error when reading the floating mask image: " << floatingMaskName);
             return EXIT_FAILURE;
         }
         /* check the dimension */
         for (int i = 1; i <= floatingHeader->dim[0]; i++) {
             if (floatingHeader->dim[i] != floatingMaskImage->dim[i]) {
-                reg_print_msg_error("The floating image and its mask do not have the same dimension");
+                NR_ERROR("The floating image and its mask do not have the same dimension");
                 return EXIT_FAILURE;
             }
         }
@@ -458,23 +432,17 @@ int main(int argc, char **argv) {
     // Set the verbose type
     reg->SetVerbose(verbose);
 
-#ifndef NDEBUG
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
-    reg_print_msg_debug("Please re-run cmake to set the variable");
-    reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("*******************************************");
-#endif
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("NiftyReg has been compiled in DEBUG mode");
+    NR_DEBUG("Please re-run cmake to set the variable");
+    NR_DEBUG("CMAKE_BUILD_TYPE to \"Release\" if required");
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("*******************************************");
 
 #ifdef _OPENMP
-    if (verbose) {
-        int maxThreadNumber = omp_get_max_threads();
-        sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber);
-        reg_print_info((argv[0]), text);
-    }
-#endif // _OPENMP
+    NR_VERBOSE_APP("OpenMP is used with " << omp_get_max_threads() << " threads");
+#endif
 
     // Run the registration
     reg->Run();
@@ -490,18 +458,12 @@ int main(int argc, char **argv) {
     /* The affine transformation is saved */
     reg_tool_WriteAffineFile(reg->GetTransformationMatrix(), outputAffineName);
 
-#ifdef NDEBUG
-    if (verbose) {
-#endif
-        time_t end;
-        time(&end);
-        float minutes = floorf((end - start) / 60.0f);
-        float seconds = (end - start - 60 * minutes);
-        sprintf(text, "Registration performed in %i min %i sec", (int)minutes, (int)seconds);
-        reg_print_info((argv[0]), text);
-        reg_print_info((argv[0]), "Have a good day !");
-#ifdef NDEBUG
-    }
-#endif
+    time_t end;
+    time(&end);
+    const int minutes = static_cast<int>(floorf((end - start) / 60.0f));
+    const int seconds = static_cast<int>(end - start) - 60 * minutes;
+    NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
+    NR_VERBOSE_APP("Have a good day!");
+
     return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 2fc5cb40..07f7d47c 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -32,46 +32,42 @@ typedef enum
 
 void usage(char *exec)
 {
-   char text[255];
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   reg_print_info(exec, "usage:");
-   sprintf(text, "\t%s <outputFileName> [OPTIONS]", exec);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "\t-avg <inputAffineName1> <inputAffineName2> ... <inputAffineNameN>");
-   reg_print_info(exec, "\t\tIf the input are images, the intensities are averaged");
-   reg_print_info(exec, "\t\tIf the input are affine matrices, out=expm((logm(M1)+logm(M2)+...+logm(MN))/N)");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "\t-avg_lts <AffineMat1> <AffineMat2> ... <AffineMatN> ");
-   reg_print_info(exec, "\t\tIt will estimate the robust average affine matrix by considering half of the matrices as ouliers.");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "\t-avg_tran <referenceImage> <transformationFileName1> <floatingImage1> ... <transformationFileNameN> <floatingImageN> ");
-   reg_print_info(exec, "\t\tAll input images are resampled into the space of <reference image> and averaged");
-   reg_print_info(exec, "\t\tA cubic spline interpolation scheme is used for resampling");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "\t-demean <referenceImage> <transformationFileName1> <floatingImage1> ...  <transformationFileNameN> <floatingImageN>");
-   reg_print_info(exec, "\t\tThe demean option enforces the mean of all transformations to be");
-   reg_print_info(exec, "\t\tidentity.");
-   reg_print_info(exec, "\t\tIf affine transformations are provided, only the non-rigid part is");
-   reg_print_info(exec, "\t\tconsidered after removing the rigid components.");
-   reg_print_info(exec, "\t\tIf non-linear transformation are provided the mean (euclidean) is ");
-   reg_print_info(exec, "\t\tremoved from all input transformations.");
-   reg_print_info(exec, "\t\tIf velocity field non-linear parametrisations are used, the affine");
-   reg_print_info(exec, "\t\tcomponent is discarded and the mean in the log space is removed.");
-   reg_print_info(exec, "");
-   reg_print_info(exec, "\t-demean_noaff <referenceImage> <AffineMat1> <NonRigidTrans1> <floatingImage1> ...  <AffineMatN> <NonRigidTransN> <floatingImageN>");
-   reg_print_info(exec, "\t\tSame as -demean expect that the specified affine is removed from the");
-   reg_print_info(exec, "\t\tnon-linear (euclidean) transformation.");
-   reg_print_info(exec, "\t--NN\t\tUse nearest neighbour interpolation - cubic is default");
-   reg_print_info(exec, "\t--LIN\t\tUse linear interpolation - cubic is default");
-   reg_print_info(exec, "\t--version\t\tPrint current version and exit");
-   sprintf(text, "\t\t\t\t(%s)",NR_VERSION);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "");
-   reg_print_info(exec, "alternative usage:");
-   sprintf(text, "\t%s --cmd_file <textFile>", exec);
-   reg_print_info(exec, text);
-   reg_print_info(exec, "\t\tA text file that contains the full command is provided");
-   reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("usage:");
+   NR_INFO("\t" << exec << " <outputFileName> [OPTIONS]");
+   NR_INFO("\t-avg <inputAffineName1> <inputAffineName2> ... <inputAffineNameN>");
+   NR_INFO("\t\tIf the input are images, the intensities are averaged");
+   NR_INFO("\t\tIf the input are affine matrices, out=expm((logm(M1)+logm(M2)+...+logm(MN))/N)");
+   NR_INFO("");
+   NR_INFO("\t-avg_lts <AffineMat1> <AffineMat2> ... <AffineMatN> ");
+   NR_INFO("\t\tIt will estimate the robust average affine matrix by considering half of the matrices as ouliers.");
+   NR_INFO("");
+   NR_INFO("\t-avg_tran <referenceImage> <transformationFileName1> <floatingImage1> ... <transformationFileNameN> <floatingImageN> ");
+   NR_INFO("\t\tAll input images are resampled into the space of <reference image> and averaged");
+   NR_INFO("\t\tA cubic spline interpolation scheme is used for resampling");
+   NR_INFO("");
+   NR_INFO("\t-demean <referenceImage> <transformationFileName1> <floatingImage1> ...  <transformationFileNameN> <floatingImageN>");
+   NR_INFO("\t\tThe demean option enforces the mean of all transformations to be");
+   NR_INFO("\t\tidentity.");
+   NR_INFO("\t\tIf affine transformations are provided, only the non-rigid part is");
+   NR_INFO("\t\tconsidered after removing the rigid components.");
+   NR_INFO("\t\tIf non-linear transformation are provided the mean (euclidean) is ");
+   NR_INFO("\t\tremoved from all input transformations.");
+   NR_INFO("\t\tIf velocity field non-linear parametrisations are used, the affine");
+   NR_INFO("\t\tcomponent is discarded and the mean in the log space is removed.");
+   NR_INFO("");
+   NR_INFO("\t-demean_noaff <referenceImage> <AffineMat1> <NonRigidTrans1> <floatingImage1> ...  <AffineMatN> <NonRigidTransN> <floatingImageN>");
+   NR_INFO("\t\tSame as -demean expect that the specified affine is removed from the");
+   NR_INFO("\t\tnon-linear (euclidean) transformation.");
+   NR_INFO("\t--NN\t\tUse nearest neighbour interpolation - cubic is default");
+   NR_INFO("\t--LIN\t\tUse linear interpolation - cubic is default");
+   NR_INFO("\t--version\t\tPrint current version and exit");
+   NR_INFO("\t\t\t\t(" << NR_VERSION << ")");
+   NR_INFO("");
+   NR_INFO("alternative usage:");
+   NR_INFO("\t" << exec << " --cmd_file <textFile>");
+   NR_INFO("\t\tA text file that contains the full command is provided");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 void average_norm_intensity(nifti_image *image)
@@ -93,7 +89,7 @@ int remove_nan_and_add(nifti_image *averageImage,
 {
    if(averageImage->nvox!=toAddImage->nvox || averageImage->nvox!=definedNumImage->nvox)
    {
-      reg_print_msg_error(" All images must have the same size");
+      NR_ERROR("All images must have the same size");
       return EXIT_FAILURE;
    }
    PrecisionType *avgImgPtr = static_cast<PrecisionType *>(averageImage->data);
@@ -303,8 +299,7 @@ int compute_nrr_demean(nifti_image *demean_field,
          reg_spline_getFlowFieldFromVelocityGrid(transformation,deformationField);
          break;
       default:
-         reg_print_msg_error("Unsupported transformation parametrisation type:");
-         reg_print_msg_error(transformation->fname);
+         NR_ERROR("Unsupported transformation parametrisation type: " << transformation->fname);
          return EXIT_FAILURE;
       }
       // The affine component is removed
@@ -359,9 +354,7 @@ int compute_average_image(nifti_image *averageImage,
    nifti_image *demeanField = nullptr;
    if(demean && inputAffName!=nullptr && inputNRRName==nullptr){
       demeanMatrix = compute_affine_demean(imageNumber, inputAffName);
-#ifndef NDEBUG
-      reg_print_msg_debug("Matrix to use for demeaning computed");
-#endif
+      NR_DEBUG("Matrix to use for demeaning computed");
    }
    if(demean && inputNRRName!=nullptr){
       demeanField=nifti_copy_nim_info(averageImage);
@@ -379,9 +372,7 @@ int compute_average_image(nifti_image *averageImage,
       demeanField->intent_p1=DISP_FIELD;
       demeanField->data=calloc(demeanField->nvox, demeanField->nbyper);
       compute_nrr_demean(demeanField, imageNumber, inputNRRName, inputAffName);
-#ifndef NDEBUG
-      reg_print_msg_debug("Displacement field to use for demeaning computed");
-#endif
+      NR_DEBUG("Displacement field to use for demeaning computed");
    }
 
    // Set the average image to zero
@@ -430,8 +421,9 @@ int compute_average_image(nifti_image *averageImage,
          case DEF_VEL_FIELD:
             reg_defField_compose(current_transformation,deformationField,nullptr);
             break;
-         default: reg_print_msg_error("Unsupported transformation type")
-                  reg_exit();
+         default:
+            NR_ERROR("Unsupported transformation type");
+            return EXIT_FAILURE;
          }
          nifti_image_free(current_transformation);
          if(demeanField!=nullptr){
@@ -446,9 +438,7 @@ int compute_average_image(nifti_image *averageImage,
                nifti_image_free(tempDef);
             }
             else reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
-#ifndef NDEBUG
-            reg_print_msg_debug("Input non-linear transformation has been demeaned");
-#endif
+            NR_DEBUG("Input non-linear transformation has been demeaned");
          }
       }
       else if(inputAffName!=nullptr){
@@ -456,9 +446,7 @@ int compute_average_image(nifti_image *averageImage,
          reg_tool_ReadAffineFile(&current_affine,inputAffName[i]);
          if(demean && inputAffName!=nullptr && inputNRRName==nullptr){
             current_affine = demeanMatrix * current_affine;
-#ifndef NDEBUG
-      reg_print_msg_debug("Input affine transformation has been demeaned");
-#endif
+            NR_DEBUG("Input affine transformation has been demeaned");
          }
          reg_affine_getDeformationField(&current_affine, deformationField);
       }
@@ -500,7 +488,7 @@ int main(int argc, char **argv)
       return EXIT_FAILURE;
    }
 #ifdef _OPENMP
-   // Set the default number of thread
+   // Set the default number of threads
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -526,14 +514,14 @@ int main(int argc, char **argv)
       // Check if the --xml information is required
       else if(strcmp(argv[i], "--xml")==0)
       {
-         printf("%s",xml_average);
+         NR_COUT << xml_average;
          return EXIT_SUCCESS;
       }
       else if(strcmp(argv[i], "-version")==0 || strcmp(argv[i], "-Version")==0 ||
             strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 ||
             strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",NR_VERSION);
+         NR_COUT << NR_VERSION << std::endl;
          return EXIT_SUCCESS;
       }
    }
@@ -545,9 +533,8 @@ int main(int argc, char **argv)
       char buffer[512];
       FILE *cmd_file = fopen(argv[2], "r+");
       if(cmd_file==nullptr){
-         reg_print_msg_error("Error when reading the provided command line file:");
-         reg_print_msg_error(argv[2]);
-         reg_exit();
+         NR_ERROR("Error when reading the provided command line file: " << argv[2]);
+         return EXIT_FAILURE;
       }
       // First path to extract the actual argument number
       while(fscanf(cmd_file," %511s", buffer)==1)
@@ -563,15 +550,7 @@ int main(int argc, char **argv)
             fscanf(cmd_file," %511s", buffer);
 #ifdef _OPENMP
             omp_set_num_threads(atoi(buffer));
-#else
-            reg_print_msg_warn("OpenMP flag detected and ignored.");
-#endif
-#ifndef NDEBUG
-            reg_print_msg_debug("OpenMP flag detected");
-#ifdef _OPENMP
-            reg_print_msg_debug("OpenMP core number set to:");
-            reg_print_msg_debug(buffer);
-#endif
+            NR_DEBUG("OpenMP core number set to: " << buffer);
 #endif
          }
          else{
@@ -587,13 +566,7 @@ int main(int argc, char **argv)
       arg_num_command = argc;
    }
 
-#ifndef NDEBUG
-   reg_print_msg_debug("command");
-   for(int i=0;i<arg_num_command;++i){
-      printf("%s ", pointer_to_command[i]);
-   }
-   printf("\n");
-#endif
+   PrintCmdLine(arg_num_command, pointer_to_command, true);
 
     // Set a variable to store the interpolation order, cubic is used by default
     int interpolation_order = 3;
@@ -663,9 +636,8 @@ int main(int argc, char **argv)
    }
    else
    {
-      reg_print_msg_error("unknow operation. Options are \"-avg\", \"-avg_lts\", \"-avg_tran\", ");
-      reg_print_msg_error("\"-demean\" or \"-demean_noaff\". Specified argument:");
-      reg_print_msg_error(pointer_to_command[2]);
+      NR_ERROR("Unknown operation. Options are \"-avg\", \"-avg_lts\", \"-avg_tran\", ");
+      NR_ERROR("\"-demean\" or \"-demean_noaff\". Specified argument:" << pointer_to_command[2]);
       usage(pointer_to_command[0]);
       return EXIT_FAILURE;
    }
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index c7e23e45..cf96b43f 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]);
+         NR_ERROR("Unknown parameter: " << argv[i]);
          Usage(argv[0]);
          return 1;
       }
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 7eb5b265..3ca1257c 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -27,115 +27,111 @@
 using PrecisionType = float;
 
 void PetitUsage(char *exec) {
-    char text[255];
-    reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    reg_print_msg_error("Fast Free-Form Deformation algorithm for non-rigid registration");
-    sprintf(text, "Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]", exec);
-    reg_print_msg_error(text);
-    reg_print_msg_error("\tSee the help for more details (-h)");
-    reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Fast Free-Form Deformation algorithm for non-rigid registration");
+    NR_INFO("Usage:\t" << exec << " -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]");
+    NR_INFO("\tSee the help for more details (-h)");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 void Usage(char *exec) {
-    char text[255];
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration.");
-    reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using");
-    reg_print_info(exec, "graphics processing units\", CMPB, 2010");
-    reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    sprintf(text, "Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].", exec);
-    reg_print_info(exec, text);
-    reg_print_info(exec, "\t-ref <filename>\tFilename of the reference image (mandatory)");
-    reg_print_info(exec, "\t-flo <filename>\tFilename of the floating image (mandatory)");
-    reg_print_info(exec, "***************");
-    reg_print_info(exec, "*** OPTIONS ***");
-    reg_print_info(exec, "***************");
-    reg_print_info(exec, "*** Initial transformation options (One option will be considered):");
-    reg_print_info(exec, "\t-aff <filename>\t\tFilename which contains an affine transformation (Affine*Reference=Floating)");
-    reg_print_info(exec, "\t-incpp <filename>\tFilename of the control point grid input");
-    reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file.");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Output options:");
-    reg_print_info(exec, "\t-cpp <filename>\t\tFilename of control point grid [outputCPP.nii]");
-    reg_print_info(exec, "\t-res <filename> \tFilename of the resampled image [outputResult.nii]");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Input image options:");
-    reg_print_info(exec, "\t-rmask <filename>\t\tFilename of a mask image in the reference space");
-    reg_print_info(exec, "\t-smooR <float>\t\t\tSmooth the reference image using the specified sigma (mm) [0]");
-    reg_print_info(exec, "\t-smooF <float>\t\t\tSmooth the floating image using the specified sigma (mm) [0]");
-    reg_print_info(exec, "\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-    reg_print_info(exec, "\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-    reg_print_info(exec, "\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-    reg_print_info(exec, "\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-    reg_print_info(exec, "\t-rLwTh <timepoint> <float>\tLower threshold to apply to the reference image intensities [none]*");
-    reg_print_info(exec, "\t-rUpTh <timepoint> <float>\tUpper threshold to apply to the reference image intensities [none]*");
-    reg_print_info(exec, "\t-fLwTh <timepoint> <float>\tLower threshold to apply to the floating image intensities [none]*");
-    reg_print_info(exec, "\t-fUpTh <timepoint> <float>\tUpper threshold to apply to the floating image intensities [none]*");
-    reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Spline options (All defined at full resolution):");
-    reg_print_info(exec, "\t-sx <float>\t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]");
-    reg_print_info(exec, "\t-sy <float>\t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]");
-    reg_print_info(exec, "\t-sz <float>\t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Regularisation options:");
-    reg_print_info(exec, "\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
-    reg_print_info(exec, "\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]");
-    reg_print_info(exec, "\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
-    reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
-    reg_print_info(exec, "\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
-    reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)");
-    reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as");
-    reg_print_info(exec, "\t\t\t\t<refX> <refY> <refZ> <floX> <floY> <floZ>\\n for 3D images and");
-    reg_print_info(exec, "\t\t\t\t<refX> <refY> <floX> <floY>\\n for 2D images");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Measure of similarity options:");
-    reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise");
-    reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified");
-    reg_print_info(exec, "\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint");
-    reg_print_info(exec, "\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint");
-    reg_print_info(exec, "\t-rbn <tp> <int>\t\tNMI. Number of bin to use for the reference image histogram for the specified time point");
-    reg_print_info(exec, "\t-fbn <tp> <int>\t\tNMI. Number of bin to use for the floating image histogram for the specified time point");
-    reg_print_info(exec, "\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint");
-    reg_print_info(exec, "\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint");
-    reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure");
-    reg_print_info(exec, "\t-ssd <tp> \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure");
-    reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure");
-    reg_print_info(exec, "\t-ssdn <tp> \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure");
-    reg_print_info(exec, "\t--mind <offset>\t\tMIND and the offset to use to compute the descriptor");
-    reg_print_info(exec, "\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
-    reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points");
-    reg_print_info(exec, "\t-kld <tp>\t\tKLD. Used for the specified timepoint");
-    reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
-    reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
-    reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity");
-    reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1");
-    reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1");
-    reg_print_info(exec, "\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint");
-    reg_print_info(exec, "\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint");
-    reg_print_info(exec, "\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint");
-    reg_print_info(exec, "\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
-    reg_print_info(exec, "\t-wSim <filename>\tWeight to apply to the measure of similarity at each voxel position");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Fast Free-Form Deformation (F3D) algorithm for non-rigid registration.");
+    NR_INFO("Based on Modat et al., \"Fast Free-Form Deformation using");
+    NR_INFO("graphics processing units\", CMPB, 2010");
+    NR_INFO("For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Usage:\t" << exec << " -ref <filename> -flo <filename> [OPTIONS]");
+    NR_INFO("\t-ref <filename>\tFilename of the reference image (mandatory)");
+    NR_INFO("\t-flo <filename>\tFilename of the floating image (mandatory)");
+    NR_INFO("***************");
+    NR_INFO("*** OPTIONS ***");
+    NR_INFO("***************");
+    NR_INFO("*** Initial transformation options (One option will be considered):");
+    NR_INFO("\t-aff <filename>\t\tFilename which contains an affine transformation (Affine*Reference=Floating)");
+    NR_INFO("\t-incpp <filename>\tFilename of the control point grid input");
+    NR_INFO("\t\t\t\tThe coarse spacing is defined by this file.");
+    NR_INFO("");
+    NR_INFO("*** Output options:");
+    NR_INFO("\t-cpp <filename>\t\tFilename of control point grid [outputCPP.nii]");
+    NR_INFO("\t-res <filename> \tFilename of the resampled image [outputResult.nii]");
+    NR_INFO("");
+    NR_INFO("*** Input image options:");
+    NR_INFO("\t-rmask <filename>\t\tFilename of a mask image in the reference space");
+    NR_INFO("\t-smooR <float>\t\t\tSmooth the reference image using the specified sigma (mm) [0]");
+    NR_INFO("\t-smooF <float>\t\t\tSmooth the floating image using the specified sigma (mm) [0]");
+    NR_INFO("\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
+    NR_INFO("\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
+    NR_INFO("\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
+    NR_INFO("\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
+    NR_INFO("\t-rLwTh <timepoint> <float>\tLower threshold to apply to the reference image intensities [none]*");
+    NR_INFO("\t-rUpTh <timepoint> <float>\tUpper threshold to apply to the reference image intensities [none]*");
+    NR_INFO("\t-fLwTh <timepoint> <float>\tLower threshold to apply to the floating image intensities [none]*");
+    NR_INFO("\t-fUpTh <timepoint> <float>\tUpper threshold to apply to the floating image intensities [none]*");
+    NR_INFO("\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds");
+    NR_INFO("");
+    NR_INFO("*** Spline options (All defined at full resolution):");
+    NR_INFO("\t-sx <float>\t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]");
+    NR_INFO("\t-sy <float>\t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]");
+    NR_INFO("\t-sz <float>\t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]");
+    NR_INFO("");
+    NR_INFO("*** Regularisation options:");
+    NR_INFO("\t-be <float>\t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]");
+    NR_INFO("\t-le <float>\t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]");
+    NR_INFO("\t-jl <float>\t\tWeight of log of the Jacobian determinant penalty term [0.0]");
+    NR_INFO("\t-noAppJL\t\tTo not approximate the JL value only at the control point position");
+    NR_INFO("\t-land <float> <file>\tUse of a set of landmarks which distance should be minimised");
+    NR_INFO("\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)");
+    NR_INFO("\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as");
+    NR_INFO("\t\t\t\t<refX> <refY> <refZ> <floX> <floY> <floZ>\\n for 3D images and");
+    NR_INFO("\t\t\t\t<refX> <refY> <floX> <floY>\\n for 2D images");
+    NR_INFO("");
+    NR_INFO("*** Measure of similarity options:");
+    NR_INFO("*** NMI with 64 bins is used except if specified otherwise");
+    NR_INFO("\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified");
+    NR_INFO("\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint");
+    NR_INFO("\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint");
+    NR_INFO("\t-rbn <tp> <int>\t\tNMI. Number of bin to use for the reference image histogram for the specified time point");
+    NR_INFO("\t-fbn <tp> <int>\t\tNMI. Number of bin to use for the floating image histogram for the specified time point");
+    NR_INFO("\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint");
+    NR_INFO("\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint");
+    NR_INFO("\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t-ssd <tp> \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t-ssdn <tp> \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t--mind <offset>\t\tMIND and the offset to use to compute the descriptor");
+    NR_INFO("\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
+    NR_INFO("\t--kld\t\t\tKLD. Used for all time points");
+    NR_INFO("\t-kld <tp>\t\tKLD. Used for the specified timepoint");
+    NR_INFO("\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
+    NR_INFO("\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
+    NR_INFO("*** Options for setting the weights for each timepoint for each similarity");
+    NR_INFO("*** Note, the options above should be used first and will set a default weight of 1");
+    NR_INFO("*** The options below should be used afterwards to set the desired weight if different to 1");
+    NR_INFO("\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint");
+    NR_INFO("\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint");
+    NR_INFO("\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint");
+    NR_INFO("\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
+    NR_INFO("\t-wSim <filename>\tWeight to apply to the measure of similarity at each voxel position");
 
-    // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Optimisation options:");
-    reg_print_info(exec, "\t-maxit <int>\t\tMaximal number of iteration at the final level [150]");
-    reg_print_info(exec, "\t-ln <int>\t\tNumber of level to perform [3]");
-    reg_print_info(exec, "\t-lp <int>\t\tOnly perform the first levels [ln]");
-    reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach");
-    reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent");
-    reg_print_info(exec, "\t-pert <int>\t\tTo add perturbation step(s) after each optimisation scheme");
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** F3D2 options:");
-    reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation");
-    reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation");
-    reg_print_info(exec, "\t-fmask <filename>\tFilename of a mask image in the floating space");
-    reg_print_info(exec, "");
+    // NR_INFO("\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
+    NR_INFO("");
+    NR_INFO("*** Optimisation options:");
+    NR_INFO("\t-maxit <int>\t\tMaximal number of iteration at the final level [150]");
+    NR_INFO("\t-ln <int>\t\tNumber of level to perform [3]");
+    NR_INFO("\t-lp <int>\t\tOnly perform the first levels [ln]");
+    NR_INFO("\t-nopy\t\t\tDo not use a pyramidal approach");
+    NR_INFO("\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent");
+    NR_INFO("\t-pert <int>\t\tTo add perturbation step(s) after each optimisation scheme");
+    NR_INFO("");
+    NR_INFO("*** F3D2 options:");
+    NR_INFO("\t-vel \t\t\tUse a velocity field integration to generate the deformation");
+    NR_INFO("\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation");
+    NR_INFO("\t-fmask <filename>\tFilename of a mask image in the floating space");
+    NR_INFO("");
 
     if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) {
-        reg_print_info(exec, "*** Platform options:");
+        NR_INFO("*** Platform options:");
         std::string platform = "\t-platf <uint>\t\tChoose platform: CPU=0 | ";
         if (Platform::IsCudaEnabled()) {
             platform += "Cuda=1";
@@ -145,36 +141,33 @@ void Usage(char *exec) {
         if (Platform::IsOpenClEnabled())
             platform += "OpenCL=2";
         platform += " [0]";
-        reg_print_info(exec, platform.c_str());
+        NR_INFO(platform);
 
-        reg_print_info(exec, "\t-gpuid <uint>\t\tChoose a custom gpu.");
-        reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
+        NR_INFO("\t-gpuid <uint>\t\tChoose a custom gpu.");
+        NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
     }
 
 #ifdef _OPENMP
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** OpenMP-related options:");
+    NR_INFO("");
+    NR_INFO("*** OpenMP-related options:");
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
         defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
-    sprintf(text, "\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]",
-            defaultOpenMPValue, omp_get_num_procs());
-    reg_print_info(exec, text);
+    NR_INFO("\t-omp <int>\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-    reg_print_info(exec, "");
-    reg_print_info(exec, "*** Other options:");
-    reg_print_info(exec, "\t-smoothGrad <float>\tTo smooth the metric derivative (in mm) [0]");
-    reg_print_info(exec, "\t-pad <float>\t\tPadding value [nan]");
-    reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off");
-    reg_print_info(exec, "\t--version\t\tPrint current version and exit");
-    sprintf(text, "\t\t\t\t(%s)", NR_VERSION);
-    reg_print_info(exec, text);
-    reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("");
+    NR_INFO("*** Other options:");
+    NR_INFO("\t-smoothGrad <float>\tTo smooth the metric derivative (in mm) [0]");
+    NR_INFO("\t-pad <float>\t\tPadding value [nan]");
+    NR_INFO("\t-voff\t\t\tTo turn verbose off");
+    NR_INFO("\t--version\t\tPrint current version and exit");
+    NR_INFO("\t\t\t\t(" << NR_VERSION << ")");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv) {
     if (argc == 1) {
-        PetitUsage((argv[0]));
+        PetitUsage(argv[0]);
         return EXIT_FAILURE;
     }
     time_t start;
@@ -182,7 +175,7 @@ int main(int argc, char **argv) {
     int verbose = true;
 
 #ifdef _OPENMP
-    // Set the default number of thread
+    // Set the default number of threads
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
         defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
@@ -206,13 +199,12 @@ int main(int argc, char **argv) {
             return EXIT_SUCCESS;
         }
         if (strcmp(argv[i], "--xml") == 0) {
-            printf("%s", xml_f3d);
+            NR_COUT << xml_f3d;
             return EXIT_SUCCESS;
         }
         if (strcmp(argv[i], "-voff") == 0) {
-#ifndef NDEBUG
-            reg_print_msg_debug("The verbose cannot be switch off in debug");
-#else
+            NR_DEBUG("The verbose cannot be switch off in debug");
+#ifdef NDEBUG
             verbose = false;
 #endif
         }
@@ -222,26 +214,13 @@ int main(int argc, char **argv) {
             strcmp(argv[i], "-v") == 0 ||
             strcmp(argv[i], "--v") == 0 ||
             strcmp(argv[i], "--version") == 0) {
-            printf("%s\n", NR_VERSION);
+            NR_COUT << NR_VERSION << std::endl;
             return EXIT_SUCCESS;
         }
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
     // Output the command line
-#ifdef NDEBUG
-    if (verbose) {
-#endif
-        reg_print_info((argv[0]), "");
-        reg_print_info((argv[0]), "Command line:");
-        text = "\t";
-        for (int i = 0; i < argc; i++) {
-            text = stringFormat("%s %s", text.c_str(), argv[i]);
-        }
-        reg_print_info((argv[0]), text.c_str());
-        reg_print_info((argv[0]), "");
-#ifdef NDEBUG
-    }
-#endif
+    PrintCmdLine(argc, argv, verbose);
 
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
     // Read the reference and floating image
@@ -250,30 +229,28 @@ int main(int argc, char **argv) {
         if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) {
             referenceImage = reg_io_ReadImageFile(argv[++i]);
             if (!referenceImage) {
-                reg_print_msg_error("Error when reading the reference image:");
-                reg_print_msg_error(argv[i - 1]);
+                NR_ERROR("Error when reading the reference image: " << argv[i - 1]);
                 return EXIT_FAILURE;
             }
         }
         if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) {
             floatingImage = reg_io_ReadImageFile(argv[++i]);
             if (!floatingImage) {
-                reg_print_msg_error("Error when reading the floating image:");
-                reg_print_msg_error(argv[i - 1]);
+                NR_ERROR("Error when reading the floating image: " << argv[i - 1]);
                 return EXIT_FAILURE;
             }
         }
     }
     // Check that both reference and floating image have been defined
     if (!referenceImage) {
-        reg_print_msg_error("Error. No reference image has been defined");
-        PetitUsage((argv[0]));
+        NR_ERROR("Error. No reference image has been defined");
+        PetitUsage(argv[0]);
         return EXIT_FAILURE;
     }
     // Read the floating image
     if (!floatingImage) {
-        reg_print_msg_error("Error. No floating image has been defined");
-        PetitUsage((argv[0]));
+        NR_ERROR("Error. No floating image has been defined");
+        PetitUsage(argv[0]);
         return EXIT_FAILURE;
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/
@@ -287,17 +264,17 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
             PlatformType value{ atoi(argv[++i]) };
             if (value < PlatformType::Cpu || value > PlatformType::Cuda) {
-                reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA");
+                NR_ERROR("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA");
                 return EXIT_FAILURE;
             }
             if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) {
-                reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA");
-                reg_print_msg_warn("The CPU platform is used");
+                NR_WARN("The current install of NiftyReg has not been compiled with CUDA");
+                NR_WARN("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
             if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) {
-                reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL");
-                reg_print_msg_warn("The CPU platform is used");
+                NR_WARN("The current install of NiftyReg has not been compiled with OpenCL");
+                NR_WARN("The CPU platform is used");
                 value = PlatformType::Cpu;
             }
             platformType = value;
@@ -336,8 +313,7 @@ int main(int argc, char **argv) {
             if (FILE *aff = fopen(affineTransformationName, "r")) {
                 fclose(aff);
             } else {
-                reg_print_msg_error("The specified input affine file can not be read:");
-                reg_print_msg_error(affineTransformationName);
+                NR_ERROR("The specified input affine file can not be read: " << affineTransformationName);
                 return EXIT_FAILURE;
             }
             // Read the affine matrix
@@ -348,16 +324,14 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) {
             NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]);
             if (!inputCCPImage) {
-                reg_print_msg_error("Error when reading the input control point grid image:");
-                reg_print_msg_error(argv[i - 1]);
+                NR_ERROR("Error when reading the input control point grid image: " << argv[i - 1]);
                 return EXIT_FAILURE;
             }
             reg->SetControlPointGridImage(std::move(inputCCPImage));
         } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) {
             NiftiImage referenceMaskImage = reg_io_ReadImageFile(argv[++i]);
             if (!referenceMaskImage) {
-                reg_print_msg_error("Error when reading the reference mask image:");
-                reg_print_msg_error(argv[i - 1]);
+                NR_ERROR("Error when reading the reference mask image: " << argv[i - 1]);
                 return EXIT_FAILURE;
             }
             reg->SetReferenceMask(std::move(referenceMaskImage));
@@ -423,13 +397,13 @@ int main(int argc, char **argv) {
             size_t landmarkNumber = inputMatrixSize.first;
             size_t n = inputMatrixSize.second;
             if (n == 4 && referenceImage->nz > 1) {
-                reg_print_msg_error("4 values per line are expected for 2D images");
+                NR_ERROR("4 values per line are expected for 2D images");
                 return EXIT_FAILURE;
             } else if (n == 6 && referenceImage->nz < 2) {
-                reg_print_msg_error("6 values per line are expected for 3D images");
+                NR_ERROR("6 values per line are expected for 3D images");
                 return EXIT_FAILURE;
             } else if (n != 4 && n != 6) {
-                reg_print_msg_error("4 or 6 values are expected per line");
+                NR_ERROR("4 or 6 values are expected per line");
                 return EXIT_FAILURE;
             }
             float **allLandmarks = reg_tool_ReadMatrixFile<float>(filename, landmarkNumber, n);
@@ -517,8 +491,8 @@ int main(int argc, char **argv) {
             int offset = atoi(argv[++i]);
             if (offset != -999999) { // Value specified by the CLI - to be ignored
                 if (referenceImage->nt > 1 || floatingImage->nt > 1) {
-                    reg_print_msg_error("reg_mind does not support multiple time point image");
-                    reg_exit();
+                    NR_ERROR("reg_mind does not support multiple time point image");
+                    return EXIT_FAILURE;
                 }
                 reg->UseMIND(0, offset);
             }
@@ -526,8 +500,8 @@ int main(int argc, char **argv) {
             int offset = atoi(argv[++i]);
             if (offset != -999999) { // Value specified by the CLI - to be ignored
                 if (referenceImage->nt > 1 || floatingImage->nt > 1) {
-                    reg_print_msg_error("reg_mindssc does not support multiple time point image");
-                    reg_exit();
+                    NR_ERROR("reg_mindssc does not support multiple time point image");
+                    return EXIT_FAILURE;
                 }
                 reg->UseMINDSSC(0, offset);
             }
@@ -607,8 +581,7 @@ int main(int argc, char **argv) {
                  (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) {
             NiftiImage floatingMaskImage = reg_io_ReadImageFile(argv[++i]);
             if (!floatingMaskImage) {
-                reg_print_msg_error("Error when reading the floating mask image:");
-                reg_print_msg_error(argv[i - 1]);
+                NR_ERROR("Error when reading the floating mask image: " << argv[i - 1]);
                 return EXIT_FAILURE;
             }
             reg->SetFloatingMask(std::move(floatingMaskImage));
@@ -633,7 +606,7 @@ int main(int argc, char **argv) {
 #ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
-            reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
             ++i;
 #endif
         }
@@ -646,32 +619,25 @@ int main(int argc, char **argv) {
                  strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 &&
                  strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 &&
                  strcmp(argv[i], "-vel") != 0) {
-            reg_print_msg_error("\tParameter unknown:");
-            reg_print_msg_error(argv[i]);
-            PetitUsage((argv[0]));
+            NR_ERROR("\tUnknown parameter: " << argv[i]);
+            PetitUsage(argv[0]);
             return EXIT_FAILURE;
         }
     }
     if (useMeanLNCC)
         reg->SetLNCCKernelType(2);
 
-#ifndef NDEBUG
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode");
-    reg_print_msg_debug("Please re-run cmake to set the variable");
-    reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required");
-    reg_print_msg_debug("*******************************************");
-    reg_print_msg_debug("*******************************************");
-#endif
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("NiftyReg has been compiled in DEBUG mode");
+    NR_DEBUG("Please re-run cmake to set the variable");
+    NR_DEBUG("CMAKE_BUILD_TYPE to \"Release\" if required");
+    NR_DEBUG("*******************************************");
+    NR_DEBUG("*******************************************");
 
 #ifdef _OPENMP
-    if (verbose) {
-        int maxThreadNumber = omp_get_max_threads();
-        text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber);
-        reg_print_info((argv[0]), text.c_str());
-    }
-#endif // _OPENMP
+    NR_VERBOSE_APP("OpenMP is used with " << omp_get_max_threads() << " threads");
+#endif
 
     // Run the registration
     reg->Run();
@@ -742,19 +708,12 @@ int main(int argc, char **argv) {
     }
     reg_io_WriteImageFile(outputWarpedImages[0], outputWarpedImageName);
 
-#ifdef NDEBUG
-    if (verbose) {
-#endif
-        time_t end;
-        time(&end);
-        int minutes = (int)floorf((end - start) / 60.0f);
-        int seconds = ((int)(end - start) - 60 * minutes);
-        text = stringFormat("Registration performed in %i min %i sec", minutes, seconds);
-        reg_print_info((argv[0]), text.c_str());
-        reg_print_info((argv[0]), "Have a good day !");
-#ifdef NDEBUG
-    }
-#endif
+    time_t end;
+    time(&end);
+    const int minutes = static_cast<int>(floorf((end - start) / 60.0f));
+    const int seconds = static_cast<int>(end - start) - 60 * minutes;
+    NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
+    NR_VERBOSE_APP("Have a good day!");
 
     return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp
index 5e3d768f..22008d4e 100644
--- a/reg-apps/reg_gpuinfo.cpp
+++ b/reg-apps/reg_gpuinfo.cpp
@@ -13,9 +13,10 @@ int main()
 {
 #ifdef _USE_CUDA
    showCUDAInfo();
+#else
 #ifndef _USE_OPENCL
-   reg_print_msg_warn("NiftyReg has not been compiled with CUDA or OpenCL");
-   reg_print_msg_warn("No GPU device information to display");
+   NR_WARN("NiftyReg has not been compiled with CUDA or OpenCL");
+   NR_WARN("No GPU device information to display");
 #endif
 #endif
 #ifdef _USE_OPENCL
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index d3cb4757..27b517bf 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -97,36 +97,34 @@ void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image)
 
 void PetitUsage(char *exec)
 {
-   fprintf(stderr,"Usage:\t%s -ref <referenceImage> [OPTIONS].\n",exec);
-   fprintf(stderr,"\tSee the help for more details (-h).\n");
-   return;
+   NR_INFO("Usage:\t" << exec << " -ref <referenceImage> [OPTIONS]");
+   NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Usage:\t%s [OPTIONS].\n",exec);
-   printf("* * INPUT * *\n");
-   printf("\t-trans <filename>\n");
-   printf("\t\tFilename of the file containing the transformation (mandatory).\n");
-   printf("\t-ref <filename>\n");
-   printf("\t\tFilename of the reference image (required if the transformation is a spline parametrisation)\n");
-   printf("\n* * OUTPUT * *\n");
-   printf("\t-jac <filename>\n");
-   printf("\t\tFilename of the Jacobian determinant map.\n");
-   printf("\t-jacM <filename>\n");
-   printf("\t\tFilename of the Jacobian matrix map. (9 or 4 values are stored as a 5D nifti).\n");
-   printf("\t-jacL <filename>\n");
-   printf("\t\tFilename of the Log of the Jacobian determinant map.\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("Usage:\t" << exec << " [OPTIONS]");
+   NR_INFO("* * INPUT * *");
+   NR_INFO("\t-trans <filename>");
+   NR_INFO("\t\tFilename of the file containing the transformation (mandatory)");
+   NR_INFO("\t-ref <filename>");
+   NR_INFO("\t\tFilename of the reference image (required if the transformation is a spline parametrisation)");
+   NR_INFO("\n* * OUTPUT * *");
+   NR_INFO("\t-jac <filename>");
+   NR_INFO("\t\tFilename of the Jacobian determinant map");
+   NR_INFO("\t-jacM <filename>");
+   NR_INFO("\t\tFilename of the Jacobian matrix map. (9 or 4 values are stored as a 5D nifti)");
+   NR_INFO("\t-jacL <filename>");
+   NR_INFO("\t\tFilename of the Log of the Jacobian determinant map");
 #ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
-          defaultOpenMPValue, omp_get_num_procs());
+   NR_INFO("\t-omp <int>\n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION);
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   return;
+   NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv)
@@ -140,7 +138,7 @@ int main(int argc, char **argv)
    FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG));
 
 #ifdef _OPENMP
-   // Set the default number of thread
+   // Set the default number of threads
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -165,7 +163,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "--xml")==0)
       {
-         printf("%s",xml_jacobian);
+         NR_COUT << xml_jacobian << std::endl;
          return EXIT_SUCCESS;
       }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
@@ -173,7 +171,7 @@ int main(int argc, char **argv)
 #ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+         NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
          ++i;
 #endif
       }
@@ -184,7 +182,7 @@ int main(int argc, char **argv)
           strcmp(argv[i], "--v")==0 ||
           strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",NR_VERSION);
+         NR_COUT << NR_VERSION << std::endl;
          return EXIT_SUCCESS;
       }
       else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) ||
@@ -219,7 +217,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         fprintf(stderr,"Err:\tParameter %s unknown.\n", argv[i]);
+         NR_ERROR("Parameter unknown: " << argv[i]);
          PetitUsage(argv[0]);
          return EXIT_FAILURE;
       }
@@ -235,20 +233,20 @@ int main(int argc, char **argv)
       if(!reg_isAnImageFileName(param->inputTransName)){
          mat44 *affineTransformation=(mat44 *)malloc(sizeof(mat44));
          reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
-         printf("%g\n", reg_mat44_det<double>(affineTransformation));
+         NR_COUT << reg_mat44_det<double>(affineTransformation) << std::endl;
          return EXIT_SUCCESS;
       }
 
       inputTransformation = reg_io_ReadImageFile(param->inputTransName);
       if(inputTransformation == nullptr)
       {
-         fprintf(stderr,"** ERROR Error when reading the transformation image: %s\n",param->inputTransName);
+         NR_ERROR("Error when reading the transformation image: " << param->inputTransName);
          return EXIT_FAILURE;
       }
    }
    else
    {
-      fprintf(stderr, "No transformation has been provided.\n");
+      NR_ERROR("No transformation has been provided");
       return EXIT_FAILURE;
    }
 
@@ -261,15 +259,15 @@ int main(int argc, char **argv)
          inputTransformation->intent_p1==CUB_SPLINE_GRID ||
          inputTransformation->intent_p1==SPLINE_VEL_GRID){
       if(!flag->refImageFlag){
-         reg_print_msg_error("A reference image has to be specified with a spline parametrisation.");
-         reg_exit();
+         NR_ERROR("A reference image has to be specified with a spline parametrisation.");
+         return EXIT_FAILURE;
       }
       // Read the reference image
       referenceImage = reg_io_ReadImageHeader(param->refImageName);
       if(referenceImage == nullptr)
       {
-         reg_print_msg_error("Error when reading the reference image.");
-         reg_exit();
+         NR_ERROR("Error when reading the reference image.");
+         return EXIT_FAILURE;
       }
    }
 
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index dffc2f2b..df142de5 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -47,35 +47,33 @@ typedef struct
 
 void PetitUsage(char *exec)
 {
-   fprintf(stderr,"Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS].\n",exec);
-   fprintf(stderr,"\tSee the help for more details (-h).\n");
-   return;
+   NR_INFO("Usage:\t" << exec << " -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]");
+   NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].\n",exec);
-   printf("\t-ref <filename>\tFilename of the reference image (mandatory)\n");
-   printf("\t-flo <filename>\tFilename of the floating image (mandatory)\n");
-   printf("\t\tNote that the floating image is resampled into the reference\n");
-   printf("\t\timage space using the header informations.\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("Usage:\t" << exec << " -ref <filename> -flo <filename> [OPTIONS]");
+   NR_INFO("\t-ref <filename>\tFilename of the reference image (mandatory)");
+   NR_INFO("\t-flo <filename>\tFilename of the floating image (mandatory)");
+   NR_INFO("\t\tNote that the floating image is resampled into the reference");
+   NR_INFO("\t\timage space using the header informations");
 
-   printf("* * OPTIONS * *\n");
-   printf("\t-ncc\t\tReturns the NCC value\n");
-   printf("\t-lncc\t\tReturns the LNCC value\n");
-   printf("\t-nmi\t\tReturns the NMI value (64 bins are used)\n");
-   printf("\t-ssd\t\tReturns the SSD value\n");
-   printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n");
+   NR_INFO("* * OPTIONS * *");
+   NR_INFO("\t-ncc\t\tReturns the NCC value");
+   NR_INFO("\t-lncc\t\tReturns the LNCC value");
+   NR_INFO("\t-nmi\t\tReturns the NMI value (64 bins are used)");
+   NR_INFO("\t-ssd\t\tReturns the SSD value");
+   NR_INFO("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default");
 #ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   printf("\t-omp <int>\tNumber of thread to use with OpenMP. [%i/%i]\n",
-          defaultOpenMPValue, omp_get_num_procs());
+   NR_INFO("\t-omp <int>\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   printf("\t--version\tPrint current version and exit (%s)\n",NR_VERSION);
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   return;
+   NR_INFO("\t--version\tPrint current version and exit (" << NR_VERSION << ")");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv)
@@ -87,7 +85,7 @@ int main(int argc, char **argv)
    param->paddingValue=std::numeric_limits<float>::quiet_NaN();
 
 #ifdef _OPENMP
-   // Set the default number of thread
+   // Set the default number of threads
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -110,17 +108,12 @@ int main(int argc, char **argv)
          Usage(argv[0]);
          return EXIT_SUCCESS;
       }
-//      else if(strcmp(argv[i], "--xml")==0)
-//      {
-//         printf("%s",xml_measure);
-//         return exit_success;
-//      }
       else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
       {
 #ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+         NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
          ++i;
 #endif
       }
@@ -131,7 +124,7 @@ int main(int argc, char **argv)
             strcmp(argv[i], "--v")==0 ||
             strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",NR_VERSION);
+         NR_COUT << NR_VERSION << std::endl;
          return EXIT_SUCCESS;
       }
       else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) ||
@@ -201,7 +194,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]);
+         NR_ERROR("Parameter unknown: " << argv[i]);
          PetitUsage(argv[0]);
          return EXIT_FAILURE;
       }
@@ -209,7 +202,7 @@ int main(int argc, char **argv)
 
    if(!flag->refImageFlag || !flag->floImageFlag)
    {
-      fprintf(stderr,"[NiftyReg ERROR] The reference and the floating image have both to be defined.\n");
+      NR_ERROR("The reference and the floating image have both to be defined");
       PetitUsage(argv[0]);
       return EXIT_FAILURE;
    }
@@ -218,7 +211,7 @@ int main(int argc, char **argv)
    NiftiImage refImage = reg_io_ReadImageFile(param->refImageName);
    if(!refImage)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->refImageName);
+      NR_ERROR("Error when reading the reference image: " << param->refImageName);
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(refImage);
@@ -227,7 +220,7 @@ int main(int argc, char **argv)
    NiftiImage floImage = reg_io_ReadImageFile(param->floImageName);
    if(!floImage)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floImageName);
+      NR_ERROR("Error when reading the floating image: " << param->floImageName);
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<float>(floImage);
@@ -240,7 +233,7 @@ int main(int argc, char **argv)
       NiftiImage refMaskImage = reg_io_ReadImageFile(param->refMaskImageName);
       if(!refMaskImage)
       {
-         fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", param->refMaskImageName);
+         NR_ERROR("Error when reading the reference mask image: " << param->refMaskImageName);
          return EXIT_FAILURE;
       }
       reg_createMaskPyramid<float>(refMaskImage, refMasks, 1, 1);
@@ -307,7 +300,7 @@ int main(int argc, char **argv)
          }
       }
       if(refMaskVoxNumber==0)
-         fprintf(stderr, "No active voxel\n");
+         NR_ERROR("No active voxel");
       refMeanValue /= (double)refMaskVoxNumber;
       warMeanValue /= (double)refMaskVoxNumber;
       double refSTDValue =0.;
@@ -327,7 +320,7 @@ int main(int argc, char **argv)
             (double)refMaskVoxNumber;
       if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
-      else printf("NCC: %g\n", measure);
+      else NR_COUT << "NCC: " << measure << std::endl;
    }
    /* Compute the LNCC if required */
    if(flag->returnLNCCFlag){
@@ -343,7 +336,7 @@ int main(int argc, char **argv)
       double measure=lncc_object->GetSimilarityMeasureValue();
       if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
-      else printf("LNCC: %g\n", measure);
+      else NR_COUT << "LNCC: " << measure << std::endl;
       delete lncc_object;
    }
    /* Compute the NMI if required */
@@ -360,7 +353,7 @@ int main(int argc, char **argv)
       double measure=nmi_object->GetSimilarityMeasureValue();
       if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
-      else printf("NMI: %g\n", measure);
+      else NR_COUT << "NMI: " << measure << std::endl;
       delete nmi_object;
    }
    /* Compute the SSD if required */
@@ -378,7 +371,7 @@ int main(int argc, char **argv)
       double measure=ssd_object->GetSimilarityMeasureValue();
       if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
-      else printf("SSD: %g\n", measure);
+      else NR_COUT << "SSD: " << measure << std::endl;
       delete ssd_object;
    }
    /* Compute the MIND SSD if required */
@@ -395,7 +388,7 @@ int main(int argc, char **argv)
       double measure=mind_object->GetSimilarityMeasureValue();
       if(outFile!=nullptr)
          fprintf(outFile, "%g\n", measure);
-      else printf("MIND: %g\n", measure);
+      else NR_COUT << "MIND: " << measure << std::endl;
       delete mind_object;
    }
 
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 125b6aaa..efc7268a 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -40,7 +40,7 @@ typedef struct
    int prinComp;
    int tp;
    const char *outputResultName;
-   char *outputCPPName;
+   std::string outputCPPName;
 } PARAM;
 
 typedef struct
@@ -70,54 +70,52 @@ typedef struct
 
 void PetitUsage(char *exec)
 {
-   fprintf(stderr,"PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR).\n");
-   fprintf(stderr,"Fast Free-Form Deformation algorithm for dynamic contrast enhanced (DCE) non-rigid registration.\n");
-   fprintf(stderr,"Usage:\t%s -source <sourceImageName> [OPTIONS].\n",exec);
-   fprintf(stderr,"\t\t\t\t*Note that no target image is needed!\n");
-   fprintf(stderr,"\tSee the help for more details (-h).\n");
-   return;
+   NR_INFO("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR)");
+   NR_INFO("Fast Free-Form Deformation algorithm for dynamic contrast enhanced (DCE) non-rigid registration");
+   NR_INFO("Usage:\t" << exec << " -source <sourceImageName> [OPTIONS]");
+   NR_INFO("\t\t\t\t*Note that no target image is needed!");
+   NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR).\n");
-   printf("Fast Free-Form Deformation algorithm for non-rigid DCE-MRI registration.\n");
-   printf("This implementation is a re-factoring of the PPCR algorithm in:\n");
-   printf("Melbourne et al., \"Registration of dynamic contrast-enhanced MRI using a \n");
-   printf(" progressive principal component registration (PPCR)\", Phys Med Biol, 2007.\n");
-   printf("This code has been written by Andrew Melbourne (a.melbourne@cs.ucl.ac.uk)\n");
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Usage:\t%s -source <filename> [OPTIONS].\n",exec);
-   printf("\t-source <filename>\tFilename of the source image (mandatory)\n");
-   printf("\t*Note that no target image is needed!\n\n");
-   printf("   Or   -makesource  <outputname> <n> <filenames> \tThis will generate a 4D volume from the n filenames (saved to <outputname>).\n");
-   printf("        -makesourcex <outputname> <n> <filenames> \tAs above but exits before registration step'.\n");
-   printf("        -distribute  <filename> <basename>\t\tThis will generate individual 3D volumes from the 4D filename (saved to '<basename>X.nii', 4D only).\n");
-   printf("\n*** Main Options:\n");
-   printf("\t-result <filename> \tFilename of the resampled image [outputResult.nii].\n");
-   printf("\t-pmask  <filename> \tFilename of the PCA mask region.\n");
-   printf("\t-cpp    <filename>\tFilename of final 5D control point grid (non-rigid registration only).\n");
-   printf("     Or -aff    <filename>\tFilename of final concatenated affine transformation (affine registration only).\n");
-   printf("\n*** Other Options:\n");
-   printf("\t-prinComp <int>\t\tNumber of principal component iterations to run [#timepoints/2].\n");
-   printf("\t-maxit    <int>\t\tNumber of registration iterations to run [max(400/prinComp,100)].\n");
-   printf("\t-autolevel \t\tAutomatically increase registration level during PPCR (switched off with -ln or -lp options).\n"); // not with -FLIRT
-   printf("\t-pca0 \t\t\tOutput pca images 1:prinComp without registration step [pcaX.nii].\n"); // i.e. just print out each PCA image.
-   printf("\t-pca1 \t\t\tOutput pca images 1:prinComp for inspection [pcaX.nii].\n");
-   printf("\t-pca2 \t\t\tOutput intermediate results 1:prinComp for inspection [outX.nii].\n");
-   printf("\t-pca3 \t\t\tSave current deformation result [cppX.nii].\n");
-   printf("\t-pca123 \t\tWrite out everything!.\n");
-   printf("\n*** Alternative Registration Options:\n");
-   printf("\t-mean \t\t\tIterative registration to the mean image only (no PPCR).\n"); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model.
-   printf("\t-locality <int>\t\tIterative registration to the local mean image (pm <int> images - no PPCR).\n");
-   printf("\t-tp       <int>\t\tIterative registration to single timepoint (no PPCR).\n");
-   printf("\t-noinit \t\tTurn off cpp initialisation from previous iteration.\n");
-   //printf("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)\n");
-   printf("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options).\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR).");
+   NR_INFO("Fast Free-Form Deformation algorithm for non-rigid DCE-MRI registration.");
+   NR_INFO("This implementation is a re-factoring of the PPCR algorithm in:");
+   NR_INFO("Melbourne et al., \"Registration of dynamic contrast-enhanced MRI using a ");
+   NR_INFO(" progressive principal component registration (PPCR)\", Phys Med Biol, 2007.");
+   NR_INFO("This code has been written by Andrew Melbourne (a.melbourne@cs.ucl.ac.uk)");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("Usage:\t" << exec << " -source <filename> [OPTIONS]");
+   NR_INFO("\t-source <filename>\tFilename of the source image (mandatory)");
+   NR_INFO("\t*Note that no target image is needed!\n");
+   NR_INFO("   Or   -makesource  <outputname> <n> <filenames> \tThis will generate a 4D volume from the n filenames (saved to <outputname>).");
+   NR_INFO("        -makesourcex <outputname> <n> <filenames> \tAs above but exits before registration step'.");
+   NR_INFO("        -distribute  <filename> <basename>\t\tThis will generate individual 3D volumes from the 4D filename (saved to '<basename>X.nii', 4D only).");
+   NR_INFO("\n*** Main Options:");
+   NR_INFO("\t-result <filename> \tFilename of the resampled image [outputResult.nii].");
+   NR_INFO("\t-pmask  <filename> \tFilename of the PCA mask region.");
+   NR_INFO("\t-cpp    <filename>\tFilename of final 5D control point grid (non-rigid registration only).");
+   NR_INFO("     Or -aff    <filename>\tFilename of final concatenated affine transformation (affine registration only).");
+   NR_INFO("\n*** Other Options:");
+   NR_INFO("\t-prinComp <int>\t\tNumber of principal component iterations to run [#timepoints/2].");
+   NR_INFO("\t-maxit    <int>\t\tNumber of registration iterations to run [max(400/prinComp,100)].");
+   NR_INFO("\t-autolevel \t\tAutomatically increase registration level during PPCR (switched off with -ln or -lp options)."); // not with -FLIRT
+   NR_INFO("\t-pca0 \t\t\tOutput pca images 1:prinComp without registration step [pcaX.nii]."); // i.e. just print out each PCA image.
+   NR_INFO("\t-pca1 \t\t\tOutput pca images 1:prinComp for inspection [pcaX.nii].");
+   NR_INFO("\t-pca2 \t\t\tOutput intermediate results 1:prinComp for inspection [outX.nii].");
+   NR_INFO("\t-pca3 \t\t\tSave current deformation result [cppX.nii].");
+   NR_INFO("\t-pca123 \t\tWrite out everything!.");
+   NR_INFO("\n*** Alternative Registration Options:");
+   NR_INFO("\t-mean \t\t\tIterative registration to the mean image only (no PPCR)."); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model.
+   NR_INFO("\t-locality <int>\t\tIterative registration to the local mean image (pm <int> images - no PPCR).");
+   NR_INFO("\t-tp       <int>\t\tIterative registration to single timepoint (no PPCR).");
+   NR_INFO("\t-noinit \t\tTurn off cpp initialisation from previous iteration.");
+   //NR_INFO("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)");
+   NR_INFO("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options).");
    //system("reg_f3d -h");
-
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   return;
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 
@@ -145,13 +143,10 @@ int main(int argc, char **argv)
    param->tp=0;
    param->maxIteration=-1;
 
-   char regCommandAll[1055]="";
-   char regCommand[1000]="";
-   strcat(regCommand,"-target anchorx.nii -source floatx.nii");
-   char regCommandF[1000]="";
-   strcat(regCommandF,"flirt -ref anchorx.nii -in floatx.nii -out outputResult.nii.gz");
-   char style[10]="";
-   char STYL3[10]="";
+   std::string regCommandAll;
+   std::string regCommand("-target anchorx.nii -source floatx.nii");
+   std::string regCommandF("flirt -ref anchorx.nii -in floatx.nii -out outputResult.nii.gz");
+   std::string style, STYL3;
 
    /* read the input parameters */
    for(int i=1; i<argc; i++)
@@ -168,7 +163,7 @@ int main(int argc, char **argv)
             strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 ||
             strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",_GIT_HASH);
+         NR_COUT << _GIT_HASH << std::endl;
          return EXIT_SUCCESS;
       }
 #endif
@@ -194,7 +189,7 @@ int main(int argc, char **argv)
          char *temp_data = reinterpret_cast<char *>(makesource->data);
          for(int ii=0; ii<makesource->nt; ii++) // fill with file data
          {
-            printf("Reading '%s' (%i of %i)\n",argv[i+1],ii+1,makesource->nt);
+            NR_COUT << "Reading '" << argv[i+1] << "' (" << ii+1 << " of " << makesource->nt << ")" << std::endl;
             source = nifti_image_read(argv[++i],true);
             memcpy(&(temp_data[ii*source->nvox*source->nbyper]), source->data, source->nbyper*source->nvox);
             nifti_image_free(source);
@@ -218,9 +213,8 @@ int main(int argc, char **argv)
          for(int ii=0; ii<source->nt; ii++) // fill with file data
          {
             memcpy(makesource->data, &(temp_data[ii*makesource->nvox*source->nbyper]), makesource->nbyper*makesource->nvox);
-            char outname[100];
-            sprintf(outname,"%s%i.nii",param->finalResultName,ii);
-            printf("Writing '%s' (%i of %i)\n",outname,ii+1,source->nt);
+            const std::string outname=param->finalResultName + std::to_string(ii) + ".nii"s;
+            NR_COUT << "Writing '" << outname << "' (" << ii+1 << " of " << source->nt << ")" << std::endl;
             nifti_set_filenames(makesource,outname, 0, 0); // might want to set this
             nifti_image_write(makesource);
          }
@@ -235,7 +229,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-target") == 0)
       {
-         printf("Target image is not necessary!");
+         NR_ERROR("Target image is not necessary!");
          PetitUsage(argv[0]);
       }
       else if(strcmp(argv[i], "-aff") == 0)  // use ppcnr affine
@@ -246,7 +240,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "-incpp") == 0)  // remove -incpp option
       {
-         printf("-incpp will not be used!");
+         NR_ERROR("-incpp will not be used!");
       }
       else if(strcmp(argv[i], "-result") == 0)
       {
@@ -331,34 +325,24 @@ int main(int argc, char **argv)
       else if(strcmp(argv[i], "-lp") == 0)   // force autolevel select off if lp or ln are present.
       {
          flag->autolevel=0;
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i]);
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i+1]);
+         regCommand += " "s + argv[i] + " "s + argv[i + 1];
          ++i;
       }
       else if(strcmp(argv[i], "-ln") == 0)   // force autolevel select off if lp or ln are present.
       {
          flag->autolevel=0;
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i]);
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i+1]);
+         regCommand += " "s + argv[i] + " "s + argv[i + 1];
          ++i;
       }
       else if(strcmp(argv[i], "-maxit") == 0)  // extract number of registration iterations for display
       {
          param->maxIteration=atoi(argv[i+1]);
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i]);
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i+1]);
+         regCommand += " "s + argv[i] + " "s + argv[i + 1];
          ++i;
       }
       else
       {
-         strcat(regCommand," ");
-         strcat(regCommand,argv[i]);
+         regCommand += " "s + argv[i];
       }
    }
    if(flag->makesourcex)
@@ -372,7 +356,7 @@ int main(int argc, char **argv)
 
    if(!flag->sourceImageFlag)
    {
-      fprintf(stderr,"Error:\tAt least define a source image!\n");
+      NR_ERROR("At least define a source image!");
       Usage(argv[0]);
       return EXIT_FAILURE;
    }
@@ -380,7 +364,7 @@ int main(int argc, char **argv)
    nifti_image *image = nifti_image_read(param->sourceImageName,true);
    if(image == nullptr)
    {
-      fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName);
+      NR_ERROR("Error when reading image: " << param->sourceImageName);
       return EXIT_FAILURE;
    }
    reg_tools_changeDatatype<PrecisionType>(image); // FIX DATA TYPE - DOES THIS WORK?
@@ -392,7 +376,7 @@ int main(int argc, char **argv)
       mask = nifti_image_read(param->pcaMaskName,true);
       if(mask == nullptr)
       {
-         fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName);
+         NR_ERROR("Error when reading image: " << param->pcaMaskName);
          return EXIT_FAILURE;
       }
       reg_tools_changeDatatype<PrecisionType>(mask);
@@ -420,72 +404,48 @@ int main(int argc, char **argv)
    }
    if(param->prinComp>=image->nt) param->prinComp=image->nt-1;
    if(!flag->outputResultFlag) param->outputResultName="ppcnrfinal-img.nii";
-//	if(param->maxIteration<0) param->maxIteration=(int)(400/param->prinComp); // number of registraton iterations is automatically set here...
+//	if(param->maxIteration<0) param->maxIteration=(int)(400/param->prinComp); // number of registration iterations is automatically set here...
 //    param->maxIteration=(param->maxIteration<50)?50:param->maxIteration;
    if(param->tp>image->nt) param->tp=image->nt;
    if(flag->aladin)  // decide whether to use affine or free-form
    {
-      strcat(regCommandAll,"reg_aladin ");
-      strcat(style,"aff");
-      strcat(STYL3,"AFF");
+      regCommandAll += "reg_aladin ";
+      style += "aff";
+      STYL3 += "AFF";
    }
    else if(flag->flirt)
    {
-      strcat(style,"aff");
+      style += "aff";
    }
    else
    {
-      strcat(regCommandAll,"reg_f3d ");
-      strcat(style,"cpp");
-      strcat(STYL3,"CPP");
+      regCommandAll += "reg_f3d ";
+      style += "cpp";
+      STYL3 += "CPP";
    }
    if(!flag->outputCPPFlag)
-   {
-      char buffer[40];
-      sprintf(buffer,"ppcnrfinal-%s",style);
-      if(flag->aladin || flag->flirt)
-      {
-         strcat(buffer,".txt");
-      }
-      else
-      {
-         strcat(buffer,".nii");
-      }
-      param->outputCPPName=buffer;
-   }
-   strcat(regCommandAll,regCommand);
-   printf("%s\n",style);
+      param->outputCPPName = "ppcnrfinal-"s + style + (flag->aladin || flag->flirt ? ".txt"s : ".nii"s);
+   regCommandAll += regCommand;
+   NR_COUT << style << std::endl;
 
    /* ****************** */
    /* DISPLAY THE REGISTRATION PARAMETERS */
    /* ****************** */
+   PrintCmdLine(argc, argv, true);
 
-   printf("\n* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Command line:\n %s",argv[0]);
-   for(int i=1; i<argc; i++)
-      printf(" %s",argv[i]);
-   printf("\n\n");
    if(flag->meanonly && !flag->locality)
-   {
-      printf("Iterative registration to the mean only (Algorithm will ignore PCA results)----------------\n");
-   }
+      NR_COUT << "Iterative registration to the mean only (Algorithm will ignore PCA results)----------------" << std::endl;
    else if(flag->meanonly && flag->locality)
-   {
-      printf("Iterative registration to local mean only (pm%i) (Algorithm will ignore PCA results)----------------\n",param->locality);
-   }
+      NR_COUT << "Iterative registration to local mean only (pm" << param->locality << ") (Algorithm will ignore PCA results)----------------" << std::endl;
    else if(flag->tp)
-   {
-      printf("Iterative registration to single timepoint only (%i) (Algorithm will ignore PCA results)----------------\n",param->tp);
-   }
+      NR_COUT << "Iterative registration to single timepoint only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl;
    else
-   {
-      printf("PPCNR Parameters\n----------------\n");
-   }
-   printf("Source image name: %s\n",param->sourceImageName);
-   if(flag->pmask) printf("PCA Mask image name: %s\n",param->pcaMaskName);
-   printf("Number of timepoints: %i \n", image->nt);
-   printf("Number of principal components: %i\n",param->prinComp);
-   printf("Registration max iterations: %i\n",param->maxIteration);
+      NR_COUT << "PPCNR Parameters\n----------------" << std::endl;
+   NR_COUT << "Source image name: " << param->sourceImageName << std::endl;
+   if(flag->pmask) NR_COUT << "PCA Mask image name: " << param->pcaMaskName << std::endl;
+   NR_COUT << "Number of timepoints: " << image->nt << std::endl;
+   NR_COUT << "Number of principal components: " << param->prinComp << std::endl;
+   NR_COUT << "Registration max iterations: " << param->maxIteration << std::endl;
 
    /* ********************** */
    /* START THE REGISTRATION */
@@ -509,21 +469,17 @@ int main(int argc, char **argv)
    PrecisionType *Mean = new PrecisionType [image->nt];
    PrecisionType *Cov = new PrecisionType [image->nt*image->nt];
    PrecisionType cov;
-//   char pcaname[20];
-//   char outname[20];
 
    for(int prinCompNumber=1; prinCompNumber<=param->prinComp; prinCompNumber++)
    {
       param->spacing[0]=levels[(int)(3.0*prinCompNumber/(param->prinComp+1))]; // choose a reducing level number
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-      printf("RUNNING ITERATION %i of %i \n",prinCompNumber, param->prinComp);
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-      printf("Running component %i of %i \n", prinCompNumber, param->prinComp);
+      NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+      NR_COUT << "RUNNING ITERATION " << prinCompNumber << " of " << param->prinComp << "\n";
+      NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+      NR_COUT << "Running component " << prinCompNumber << " of " << param->prinComp << "\n";
       if(flag->autolevel)
-      {
-         printf("Running %i levels at %g spacing \n", levelNumber, param->spacing[0]);
-      }
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
+         NR_COUT << "Running " << levelNumber << " levels at " << param->spacing[0] << " spacing\n";
+      NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
 
       // Read images and find image means
       unsigned voxelNumber = image->nvox/image->nt;
@@ -642,20 +598,16 @@ int main(int argc, char **argv)
          for (j=0; j<i; j++) z[j+n*i]=z[i+n*j]=0;
       }
 
-      printf("Image Means=[%g",Mean[0]);
+      NR_COUT << "Image Means=[" << Mean[0];
       for(int i=1; i<image->nt; i++)
-      {
-         printf(",%g",Mean[i]); // not sure it's quite right...
-      }
-      printf("]\n");
+         NR_COUT << "," << Mean[i]; // not sure it's quite right...
+      NR_COUT << "]\n";
       for(int i=0; i<image->nt; i++)
       {
-         printf("Cov=[%g",Cov[i+n*0]);
+         NR_COUT << "Cov=[" << Cov[i+n*0];
          for(int j=1; j<image->nt; j++)
-         {
-            printf(",%g",Cov[i+n*j]);
-         }
-         printf("]\n");
+            NR_COUT << "," << Cov[i+n*j];
+         NR_COUT << "]\n";
       }
 
       // 2. diagonalise
@@ -710,7 +662,6 @@ int main(int argc, char **argv)
                e[l]=g;
                e[m]=0;
             }
-            // printf("Iterations=%i\n",iter);
          }
          while(m!=l);
       } // Seems to be ok for an arbitrary covariance matrix.
@@ -734,51 +685,40 @@ int main(int argc, char **argv)
                }
          }
       }
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
+      NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
       for(int i=0; i<image->nt; i++)
       {
-         printf("EVMatrix=[%g",z[i+n*0]);
+         NR_COUT << "EVMatrix=[" << z[i+n*0];
          for(int j=1; j<image->nt; j++)
-         {
-            printf(",%g",z[i+image->nt*j]);
-         }
-         printf("]\n");
+            NR_COUT << "," << z[i+image->nt*j];
+         NR_COUT << "]\n";
       }
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-      printf("Eigenvalues=[%g",d[0]);
+      NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+      NR_COUT << "Eigenvalues=[" << d[0];
       for(int i=0; i<image->nt; i++)
       {
          if(i>0)
-         {
-            printf(",%g",d[i]);
-         }
+            NR_COUT << "," << d[i];
          vsum[prinCompNumber-1]+=d[i];
          dall[i+image->nt*prinCompNumber-1]=d[i];
       }
-      printf("]\n");
+      NR_COUT << "]\n";
       for(j=0; j<prinCompNumber; j++)
       {
-         printf("Variances(%i)=[%g",j+1,100.0*dall[0+n*j]/vsum[j]);
+         NR_COUT << "Variances(" << j+1 << ")=[" << 100.0*dall[0+n*j]/vsum[j];
          for(int i=1; i<image->nt; i++)
-         {
-            printf(",%g",100.0*dall[i+image->nt*j]/vsum[j]);
-         }
-         printf("]\n");
+            NR_COUT << "," << 100.0*dall[i+image->nt*j]/vsum[j];
+         NR_COUT << "]\n";
       }
       if(flag->meanonly)
       {
-         printf("Iterative registration to mean only - eigenvector matrix overwritten.\n");
+         NR_COUT << "Iterative registration to mean only - eigenvector matrix overwritten.\n";
          for(int i=0; i<image->nt; i++)
-         {
             for(int j=0; j<image->nt; j++)
-            {
                z[i+image->nt*j]=1.0/sqrtf(image->nt*prinCompNumber); // is this right?! - if using NMI it's rather moot so I'm not too bothered at the moment...
-            }
-         }
       }
-      if(flag->locality) printf("Iterative registration to local mean only (pm %i images).\n",param->locality);
-      if(flag->tp) printf("Registration to single timepoint (%i).\n",param->tp);
-
+      if(flag->locality) NR_COUT << "Iterative registration to local mean only (pm " << param->locality << " images).\n";
+      if(flag->tp) NR_COUT << "Registration to single timepoint (" << param->tp << ").\n";
 
       // 4. rebuild images
       nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original.
@@ -834,13 +774,9 @@ int main(int argc, char **argv)
             }
          }
       }
-      char pcaname[20];
-      n=sprintf(pcaname,"pca%i.nii",prinCompNumber);
-      nifti_set_filenames(imagep,pcaname, 0, 0);
+      nifti_set_filenames(imagep, ("pca"s + std::to_string(prinCompNumber) + ".nii"s).c_str(), 0, 0);
       if(flag->pca0 | flag->pca1)
-      {
          nifti_image_write(imagep);
-      }
 
       if(!flag->pca0)
       {
@@ -878,65 +814,40 @@ int main(int argc, char **argv)
             nifti_image_write(storet);
             nifti_image_free(storet);
 
-            char regCommandB[1055]="";
+            std::string regCommandB;
             if(!flag->flirt)
             {
-               sprintf(regCommandB,"%s -%s ",regCommandAll,style);
-               char buffer[20];
-               if(flag->aladin)
-               {
-                  n=sprintf(buffer,"float%s%i.txt", style,imageNumber+1);
-               }
-               else
-               {
-                  sprintf(buffer,"float%s%i.nii", style,imageNumber+1);
-               }
-               strcat(regCommandB,buffer);
-               char buffer2[30];
+               const std::string temp = "float"s + style + std::to_string(imageNumber + 1) + (flag->aladin ? ".txt"s : ".nii"s);
+               regCommandB = regCommandAll + " -"s + style + " "s + temp;
                if(flag->autolevel)
                {
-                  n=sprintf(buffer2," -ln %i",levelNumber);
-                  strcat(regCommandB,buffer2);
-                  char buffer3[20];
-                  if(!flag->aladin) n=sprintf(buffer3," -sx %g",param->spacing[0]);
-                  strcat(regCommandB,buffer3);
+                  regCommandB += " -ln "s + std::to_string(levelNumber);
+                  if(!flag->aladin)
+                     regCommandB += " -sx "s + std::to_string(param->spacing[0]);
                }
                if(prinCompNumber>1 && !flag->noinit)
-               {
-                  char buffer4[8];
-                  n=sprintf(buffer4," -in%s ",style);
-                  strcat(regCommandB,buffer4);
-                  strcat(regCommandB,buffer);
-               }
+                  regCommandB += " -in"s + style + temp;
             }
             else  // flirt -ref -in -out -omat -init
             {
-               n=sprintf(regCommandB,"%s -omat ",regCommandF);
-               char buffer[20];
-               n=sprintf(buffer,"float%s%i.txt", style,imageNumber+1);
-               strcat(regCommandB,buffer);
+               const std::string temp = "float"s + style + std::to_string(imageNumber + 1) + ".txt"s;
+               regCommandB = regCommandF + " -omat "s + temp;
                if(prinCompNumber>1 && !flag->noinit)
-               {
-                  char buffer3[8];
-                  n=sprintf(buffer3," -init ");
-                  strcat(regCommandB,buffer3);
-                  strcat(regCommandB,buffer);
-                  strcat(regCommandB,";gunzip -f outputResult.nii.gz");
-               }
+                  regCommandB += " -init "s + temp + ";gunzip -f outputResult.nii.gz";
             }
 
             // DO REGISTRATION
-            printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-            printf("RUNNING ITERATION %i of %i \n",prinCompNumber, param->prinComp);
-            printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-            printf("Registering image %i of %i \n", imageNumber+1,images->nt);
-            printf("'%s' \n",regCommandB);
+            NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+            NR_COUT << "RUNNING ITERATION " << prinCompNumber << " of " << param->prinComp << "\n";
+            NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+            NR_COUT << "Registering image " << imageNumber+1 << " of " << images->nt << "\n";
+            NR_COUT << "'" << regCommandB << "'\n";
             //system(regCommandB);
 
             if(system(regCommandB))
             {
-               fprintf(stderr, "Error while running the following command:\n%s\n",regCommandB);
-               reg_exit(1);
+               NR_ERROR("Error while running the following command: "s + regCommandB);
+               return EXIT_FAILURE;
             }
 
             // READ IN RESULT AND MAKE A NEW CURRENT IMAGE 'image'
@@ -947,22 +858,15 @@ int main(int argc, char **argv)
          }
       }
       nifti_image_free(imagep);
-      char outname[20];
-      n=sprintf(outname,"out%i.nii",prinCompNumber);
-      nifti_set_filenames(image,outname, 0, 0);
+      nifti_set_filenames(image, ("out"s + std::to_string(prinCompNumber) + ".nii"s).c_str(), 0, 0);
       if(flag->pca2)
-      {
          nifti_image_write(image);
-      }
       if(flag->pca3)
       {
-         char cppname[20];
-         sprintf(cppname,"cpp%i.nii",prinCompNumber);
+         const std::string cppname = "cpp"s + std::to_string(prinCompNumber) + ".nii"s;
          if(!flag->aladin & !flag->flirt)
          {
-            char buffer[20];
-            sprintf(buffer,"float%s1.nii",style);
-            nifti_image *dof = nifti_image_read(buffer,true);
+            nifti_image *dof = nifti_image_read(("float"s + style + "1.nii"s).c_str(), true);
             nifti_image *dofs = nifti_copy_nim_info(dof);
             dofs->nt = dofs->dim[4] = images->nt;
             dofs->nvox = dof->nvox*images->nt;
@@ -970,9 +874,7 @@ int main(int argc, char **argv)
             PrecisionType *intensityPtrD = static_cast<PrecisionType *>(dofs->data);
             for(int t=0; t<images->nt; t++)
             {
-               char buffer[20];
-               sprintf(buffer,"float%s%i.nii",style, t+1);
-               nifti_image *dof = nifti_image_read(buffer,true);
+               nifti_image *dof = nifti_image_read(("float"s + style + std::to_string(t + 1) + ".nii"s).c_str(), true);
                PrecisionType *intensityPtrDD = static_cast<PrecisionType *>(dof->data);
                int r=dof->nvox/3.0;
                for(int i=0; i<3; i++)
@@ -981,7 +883,7 @@ int main(int argc, char **argv)
                }
                nifti_image_free(dof);
             }
-            nifti_set_filenames(dofs,cppname, 0, 0); // TODO NAME 	// write final dof data
+            nifti_set_filenames(dofs,cppname.c_str(), 0, 0); // TODO NAME 	// write final dof data
             nifti_image_write(dofs);
             nifti_image_free(dofs);
          }
@@ -990,20 +892,18 @@ int main(int argc, char **argv)
             std::string final_string = "";
             for(int t=0; t<images->nt; t++)
             {
-               char buffer[20];
-               sprintf(buffer,"float%s%i.txt",style,t+1);
-               std::ifstream ifs(buffer);
+               std::ifstream ifs("float"s + style + std::to_string(t + 1) + ".txt"s);
                std::string str((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
-               final_string+=str;
+               final_string += str;
             }
             std::ofstream ofs(cppname);
-            ofs<<final_string.c_str();
+            ofs << final_string;
          }
 
       }
    } // End PC's
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Finished Iterations and now writing outputs...\n");
+   NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
+   NR_COUT << "Finished Iterations and now writing outputs...\n";
 
    // WRITE OUT RESULT IMAGE AND RESULT DOF
    // Read in images and put into single object
@@ -1011,9 +911,7 @@ int main(int argc, char **argv)
    {
       if(!flag->aladin & !flag->flirt)
       {
-         char buffer[20];
-         sprintf(buffer,"float%s1.nii",style);
-         nifti_image *dof = nifti_image_read(buffer,true);
+         nifti_image *dof = nifti_image_read(("float"s + style + "1.nii"s).c_str(),true);
          nifti_image *dofs = nifti_copy_nim_info(dof);
          dofs->nt = dofs->dim[4] = images->nt;
          dofs->nvox = dof->nvox*images->nt;
@@ -1021,36 +919,32 @@ int main(int argc, char **argv)
          PrecisionType *intensityPtrD = static_cast<PrecisionType *>(dofs->data);
          for(int t=0; t<images->nt; t++)
          {
-            char buffer[20];
-            sprintf(buffer,"float%s%i.nii",style, t+1);
-            nifti_image *dof = nifti_image_read(buffer,true);
+            const std::string filename = "float"s + style + std::to_string(t + 1) + ".nii"s;
+            nifti_image *dof = nifti_image_read(filename.c_str(),true);
             PrecisionType *intensityPtrDD = static_cast<PrecisionType *>(dof->data);
             int r=dof->nvox/3.0;
             for(int i=0; i<3; i++)
-            {
                memcpy(&intensityPtrD[i*image->nt*r+t*r], &intensityPtrDD[i*r], dof->nbyper*r);
-            }
             nifti_image_free(dof);
-            remove(buffer); // delete spare floatcpp files
+            remove(filename.c_str()); // delete spare floatcpp files
          }
-         nifti_set_filenames(dofs,param->outputCPPName, 0, 0); // TODO NAME 	// write final dof data
+         nifti_set_filenames(dofs,param->outputCPPName.c_str(), 0, 0); // TODO NAME 	// write final dof data
          nifti_image_write(dofs);
          nifti_image_free(dofs);
       }
       else
       {
-         std::string final_string = "";
+         std::string final_string;
          for(int t=0; t<images->nt; t++)
          {
-            char buffer[20];
-            sprintf(buffer,"float%s%i.txt",style,t+1);
-            std::ifstream ifs(buffer);
+            const std::string filename = "float"s + style + std::to_string(t + 1) + ".txt"s;
+            std::ifstream ifs(filename);
             std::string str((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
-            final_string+=str;
-            remove(buffer);
+            final_string += str;
+            remove(filename.c_str());
          }
          std::ofstream ofs(param->outputCPPName);
-         ofs<<final_string.c_str();
+         ofs << final_string;
       }
 
       // DELETE
@@ -1071,24 +965,16 @@ int main(int argc, char **argv)
    time( &end );
    int minutes = (int)floorf(float(end-start)/60.0f);
    int seconds = (int)(end-start - 60*minutes);
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
+   NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
    if(flag->locality)
-   {
-      printf("Registration to %i-local mean with %i iterations performed in %i min %i sec\n", param->locality, param->prinComp, minutes, seconds);
-   }
+      NR_COUT << "Registration to " << param->locality << "-local mean with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n";
    if(flag->tp)
-   {
-      printf("Single timepoint registration to image %i performed in %i min %i sec\n", param->tp, minutes, seconds);
-   }
+      NR_COUT << "Single timepoint registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n";
    if(flag->meanonly & !flag->locality)
-   {
-      printf("Registration to mean image with %i iterations performed in %i min %i sec\n", param->prinComp, minutes, seconds);
-   }
+      NR_COUT << "Registration to mean image with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n";
    if(!flag->locality & !flag->meanonly & !flag->tp)
-   {
-      printf("PPCNR registration with %i iterations performed in %i min %i sec\n", param->prinComp, minutes, seconds);
-   }
-   printf("Have a good day !\n");
+      NR_COUT << "PPCNR registration with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n";
+   NR_COUT << "Have a good day!" << std::endl;
 
    // CHECK CLEAN-UP
    free( flag );
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 2f5f3a93..366cb4d5 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -49,36 +49,34 @@ typedef struct
 
 void PetitUsage(char *exec)
 {
-   fprintf(stderr,"Usage:\t%s -ref <referenceImageName> -flo <floatingImageName> [OPTIONS].\n",exec);
-   fprintf(stderr,"\tSee the help for more details (-h).\n");
-   return;
+   NR_INFO("Usage:\t" << exec << " -ref <referenceImageName> -flo <floatingImageName> [OPTIONS]");
+   NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Usage:\t%s -ref <filename> -flo <filename> [OPTIONS].\n",exec);
-   printf("\t-ref <filename>\n\t\tFilename of the reference image (mandatory)\n");
-   printf("\t-flo <filename>\n\t\tFilename of the floating image (mandatory)\n\n");
-   printf("* * OPTIONS * *\n");
-   printf("\t-trans <filename>\n\t\tFilename of the file containing the transformation parametrisation (from reg_aladin, reg_f3d or reg_transform)\n");
-   printf("\t-res <filename>\n\t\tFilename of the resampled image [none]\n");
-   printf("\t-blank <filename>\n\t\tFilename of the resampled blank grid [none]\n");
-   printf("\t-inter <int>\n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)\n");
-   printf("\t-pad <int>\n\t\tInterpolation padding value [0]\n");
-   printf("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]\n");
-   printf("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]\n");
-   printf("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]\n");
-   printf("\t-voff\n\t\tTurns verbose off [on]\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("Usage:\t" << exec << " -ref <filename> -flo <filename> [OPTIONS]");
+   NR_INFO("\t-ref <filename>\n\t\tFilename of the reference image (mandatory)");
+   NR_INFO("\t-flo <filename>\n\t\tFilename of the floating image (mandatory)\n");
+   NR_INFO("* * OPTIONS * *");
+   NR_INFO("\t-trans <filename>\n\t\tFilename of the file containing the transformation parametrisation (from reg_aladin, reg_f3d or reg_transform)");
+   NR_INFO("\t-res <filename>\n\t\tFilename of the resampled image [none]");
+   NR_INFO("\t-blank <filename>\n\t\tFilename of the resampled blank grid [none]");
+   NR_INFO("\t-inter <int>\n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)");
+   NR_INFO("\t-pad <int>\n\t\tInterpolation padding value [0]");
+   NR_INFO("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]");
+   NR_INFO("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]");
+   NR_INFO("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]");
+   NR_INFO("\t-voff\n\t\tTurns verbose off [on]");
 #ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
-          defaultOpenMPValue, omp_get_num_procs());
+   NR_INFO("\t-omp <int>\n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION);
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   return;
+   NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv)
@@ -92,7 +90,7 @@ int main(int argc, char **argv)
    bool verbose=true;
 
 #ifdef _OPENMP
-   // Set the default number of thread
+   // Set the default number of threads
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -117,7 +115,7 @@ int main(int argc, char **argv)
       }
       else if(strcmp(argv[i], "--xml")==0)
       {
-         printf("%s",xml_resample);
+         NR_COUT << xml_resample << std::endl;
          return EXIT_SUCCESS;
       }
       else if(strcmp(argv[i], "-voff")==0)
@@ -129,7 +127,7 @@ int main(int argc, char **argv)
 #ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+         NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
          ++i;
 #endif
       }
@@ -140,7 +138,7 @@ int main(int argc, char **argv)
                strcmp(argv[i], "--v")==0 ||
                strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",NR_VERSION);
+         NR_COUT << NR_VERSION << std::endl;
          return EXIT_SUCCESS;
       }
       else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) ||
@@ -239,7 +237,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]);
+         NR_ERROR("Unknown parameter: " << argv[i]);
          PetitUsage(argv[0]);
          return EXIT_FAILURE;
       }
@@ -247,7 +245,7 @@ int main(int argc, char **argv)
 
    if(!flag->referenceImageFlag || !flag->floatingImageFlag)
    {
-      fprintf(stderr,"[NiftyReg ERROR] The reference and the floating image have both to be defined.\n");
+      NR_ERROR("The reference and the floating image have both to be defined");
       PetitUsage(argv[0]);
       return EXIT_FAILURE;
    }
@@ -256,8 +254,7 @@ int main(int argc, char **argv)
    nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
    if(referenceImage == nullptr)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n",
-              param->referenceImageName);
+      NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
       return EXIT_FAILURE;
    }
 
@@ -265,28 +262,22 @@ int main(int argc, char **argv)
    nifti_image *floatingImage = reg_io_ReadImageFile(param->floatingImageName);
    if(floatingImage == nullptr)
    {
-      fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n",
-              param->floatingImageName);
+      NR_ERROR("Error when reading the floating image: " << param->floatingImageName);
       return EXIT_FAILURE;
    }
 
    /* *********************************** */
    /* DISPLAY THE RESAMPLING PARAMETERS */
    /* *********************************** */
-   if(verbose){
-      printf("\n* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-      printf("Command line:\n");
-      for(int i=0; i<argc; i++) printf(" %s", argv[i]);
-      printf("\n");
-      printf("Parameters\n");
-      printf("Reference image name: %s\n",referenceImage->fname);
-      printf("\t%ix%ix%i voxels, %i volumes\n",referenceImage->nx,referenceImage->ny,referenceImage->nz,referenceImage->nt);
-      printf("\t%gx%gx%g mm\n",referenceImage->dx,referenceImage->dy,referenceImage->dz);
-      printf("Floating image name: %s\n",floatingImage->fname);
-      printf("\t%ix%ix%i voxels, %i volumes\n",floatingImage->nx,floatingImage->ny,floatingImage->nz,floatingImage->nt);
-      printf("\t%gx%gx%g mm\n",floatingImage->dx,floatingImage->dy,floatingImage->dz);
-      printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n\n");
-   }
+   PrintCmdLine(argc, argv, verbose);
+   NR_VERBOSE_APP("Parameters");
+   NR_VERBOSE_APP("Reference image name: " << referenceImage->fname);
+   NR_VERBOSE_APP("\t" << referenceImage->nx << "x" << referenceImage->ny << "x" << referenceImage->nz << " voxels, " << referenceImage->nt << " volumes");
+   NR_VERBOSE_APP("\t" << referenceImage->dx << "x" << referenceImage->dy << "x" << referenceImage->dz << " mm");
+   NR_VERBOSE_APP("Floating image name: " << floatingImage->fname);
+   NR_VERBOSE_APP("\t" << floatingImage->nx << "x" << floatingImage->ny << "x" << floatingImage->nz << " voxels, " << floatingImage->nt << " volumes");
+   NR_VERBOSE_APP("\t" << floatingImage->dx << "x" << floatingImage->dy << "x" << floatingImage->dz << " mm");
+   NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
 
    /* *********************** */
    /* READ THE TRANSFORMATION */
@@ -302,8 +293,7 @@ int main(int argc, char **argv)
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
          if(inputTransformationImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
-                    param->inputTransName);
+            NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
             return EXIT_FAILURE;
          }
       }
@@ -452,9 +442,7 @@ int main(int argc, char **argv)
 
       if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor)
       {
-#ifndef NDEBUG
-         reg_print_msg_debug("DTI-based resampling\n");
-#endif
+         NR_DEBUG("DTI-based resampling");
          // Compute first the Jacobian matrices
          mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33));
          reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
@@ -486,9 +474,7 @@ int main(int argc, char **argv)
                                   param->paddingValue,
                                   jacobian,
                                   (char)round(param->PSF_Algorithm));
-#ifndef NDEBUG
-            reg_print_msg_debug("PSF resampling completed\n");
-#endif
+            NR_DEBUG("PSF resampling completed");
             free(jacobian);
          }
          else
@@ -506,8 +492,7 @@ int main(int argc, char **argv)
       strcpy (warpedImage->descrip,"Warped image using NiftyReg (reg_resample)");
       reg_io_WriteImageFile(warpedImage,param->outputResultName);
 
-      if(verbose)
-         printf("[NiftyReg] Resampled image has been saved: %s\n", param->outputResultName);
+      NR_VERBOSE_APP("Resampled image has been saved: " << param->outputResultName);
       nifti_image_free(warpedImage);
    }
 
@@ -599,8 +584,7 @@ int main(int argc, char **argv)
       reg_io_WriteImageFile(warpedImage,param->outputBlankName);
       nifti_image_free(warpedImage);
       nifti_image_free(gridImage);
-      if(verbose)
-         printf("[NiftyReg] Resampled grid has been saved: %s\n", param->outputBlankName);
+      NR_VERBOSE_APP("Resampled grid has been saved: " << param->outputBlankName);
    }
 
    //   // Tell the CLI that we finished
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 5c1d5eeb..5a1f6f30 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -95,49 +95,47 @@ typedef struct
 
 void PetitUsage(char *exec)
 {
-    fprintf(stderr,"Usage:\t%s -in  <filename> [OPTIONS].\n",exec);
-    fprintf(stderr,"\tSee the help for more details (-h).\n");
-    return;
+    NR_INFO("Usage:\t" << exec << " -in  <filename> [OPTIONS]");
+    NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-    printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-    printf("Usage:\t%s -in <filename> -out <filename> [OPTIONS].\n",exec);
-    printf("\t-in <filename>\tFilename of the input image image (mandatory)\n");
-    printf("* * OPTIONS * *\n");
-    printf("\t-out <filename>\t\tFilename out the output image [output.nii]\n");
-    printf("\t-float\t\t\tThe input image is converted to float\n");
-    printf("\t-down\t\t\tThe input image is downsampled 2 times\n");
-    printf("\t-smoS <float> <float> <float>\n\t\t\t\tThe input image is smoothed using a cubic b-spline kernel\n");
-    printf("\t-smoG <float> <float> <float>\n\t\t\t\tThe input image is smoothed using Gaussian kernel\n");
-    printf("\t-smoL <float> <float> <float>\n\t\t\t\tThe input label image is smoothed using Gaussian kernel\n");
-    printf("\t-add <filename/float>\tThis image (or value) is added to the input\n");
-    printf("\t-sub <filename/float>\tThis image (or value) is subtracted to the input\n");
-    printf("\t-mul <filename/float>\tThis image (or value) is multiplied to the input\n");
-    printf("\t-div <filename/float>\tThis image (or value) is divided to the input\n");
-    printf("\t-rms <filename>\t\tCompute the mean rms between both image\n");
-    printf("\t-bin \t\t\tBinarise the input image (val!=0?val=1:val=0)\n");
-    printf("\t-thr <float>\t\tThreshold the input image (val<thr?val=0:val=1)\n");
-    printf("\t-nan <filename>\t\tThis image is used to mask the input image.\n\t\t\t\tVoxels outside of the mask are set to nan\n");
-    printf("\t-iso\t\t\tThe resulting image is made isotropic\n");
-    printf("\t-chgres <float> <float> <float>\n\t\t\t\tResample the input image to the specified resolution (in mm)\n");
-    printf("\t-noscl\t\t\tThe scl_slope and scl_inter are set to 1 and 0 respectively\n");
-    printf("\t-rmNanInf <float>\tRemove the nan and inf from the input image and replace them by the specified value\n");
-    printf("\t-4d2rgb\t\t\tConvert a 4D (or 5D) to rgb nifti file\n");
-    printf("\t-testActiveBlocks\tGenerate an image highlighting the active blocks for reg_aladin (block variance is shown)\n");
-    printf("\t-mind\t\t\tCreate a MIND descriptor image\n");
-    printf("\t-mindssc\t\tCreate a MIND-SSC descriptor image\n");
-    printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Usage:\t" << exec << " -in <filename> -out <filename> [OPTIONS]");
+    NR_INFO("\t-in <filename>\tFilename of the input image image (mandatory)");
+    NR_INFO("* * OPTIONS * *");
+    NR_INFO("\t-out <filename>\t\tFilename out the output image [output.nii]");
+    NR_INFO("\t-float\t\t\tThe input image is converted to float");
+    NR_INFO("\t-down\t\t\tThe input image is downsampled 2 times");
+    NR_INFO("\t-smoS <float> <float> <float>\n\t\t\t\tThe input image is smoothed using a cubic b-spline kernel");
+    NR_INFO("\t-smoG <float> <float> <float>\n\t\t\t\tThe input image is smoothed using Gaussian kernel");
+    NR_INFO("\t-smoL <float> <float> <float>\n\t\t\t\tThe input label image is smoothed using Gaussian kernel");
+    NR_INFO("\t-add <filename/float>\tThis image (or value) is added to the input");
+    NR_INFO("\t-sub <filename/float>\tThis image (or value) is subtracted to the input");
+    NR_INFO("\t-mul <filename/float>\tThis image (or value) is multiplied to the input");
+    NR_INFO("\t-div <filename/float>\tThis image (or value) is divided to the input");
+    NR_INFO("\t-rms <filename>\t\tCompute the mean rms between both image");
+    NR_INFO("\t-bin \t\t\tBinarise the input image (val!=0?val=1:val=0)");
+    NR_INFO("\t-thr <float>\t\tThreshold the input image (val<thr?val=0:val=1)");
+    NR_INFO("\t-nan <filename>\t\tThis image is used to mask the input image.\n\t\t\t\tVoxels outside of the mask are set to nan");
+    NR_INFO("\t-iso\t\t\tThe resulting image is made isotropic");
+    NR_INFO("\t-chgres <float> <float> <float>\n\t\t\t\tResample the input image to the specified resolution (in mm)");
+    NR_INFO("\t-noscl\t\t\tThe scl_slope and scl_inter are set to 1 and 0 respectively");
+    NR_INFO("\t-rmNanInf <float>\tRemove the nan and inf from the input image and replace them by the specified value");
+    NR_INFO("\t-4d2rgb\t\t\tConvert a 4D (or 5D) to rgb nifti file");
+    NR_INFO("\t-testActiveBlocks\tGenerate an image highlighting the active blocks for reg_aladin (block variance is shown)");
+    NR_INFO("\t-mind\t\t\tCreate a MIND descriptor image");
+    NR_INFO("\t-mindssc\t\tCreate a MIND-SSC descriptor image");
+    NR_INFO("\t-interp\t\t\tInterpolation order to use to warp the floating image");
 #ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   printf("\t-omp <int>\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
-          defaultOpenMPValue, omp_get_num_procs());
+   NR_INFO("\t-omp <int>\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   printf("\t--version\t\tPrint current version and exit (%s)\n",NR_VERSION);
-    printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-    return;
+   NR_INFO("\t--version\t\tPrint current version and exit (" << NR_VERSION << ")");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv)
@@ -153,7 +151,7 @@ int main(int argc, char **argv)
     }
 
 #ifdef _OPENMP
-    // Set the default number of thread
+    // Set the default number of threads
     int defaultOpenMPValue=omp_get_num_procs();
     if(getenv("OMP_NUM_THREADS")!=nullptr)
         defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -178,7 +176,7 @@ int main(int argc, char **argv)
         }
         else if(strcmp(argv[i], "--xml")==0)
         {
-            printf("%s",xml_tools);
+            NR_COUT << xml_tools << std::endl;
             return EXIT_SUCCESS;
         }
         else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
@@ -186,7 +184,7 @@ int main(int argc, char **argv)
 #ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
 #else
-            reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
             ++i;
 #endif
         }
@@ -194,7 +192,7 @@ int main(int argc, char **argv)
                 strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 ||
                 strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0)
         {
-            printf("%s\n",NR_VERSION);
+            NR_COUT << NR_VERSION << std::endl;
             return EXIT_SUCCESS;
         }
         else if(strcmp(argv[i], "-in") == 0 || strcmp(argv[i], "--in") == 0)
@@ -454,7 +452,7 @@ int main(int argc, char **argv)
         }
         else
         {
-            fprintf(stderr, "Err:\tParameter %s unknown.\n", argv[i]);
+            NR_ERROR("Unknown parameter: " << argv[i]);
             PetitUsage(argv[0]);
             return EXIT_FAILURE;
         }
@@ -466,7 +464,7 @@ int main(int argc, char **argv)
     nifti_image *image = reg_io_ReadImageFile(param->inputImageName);
     if(image == nullptr)
     {
-        fprintf(stderr,"** ERROR Error when reading the input image: %s\n",param->inputImageName);
+        NR_ERROR("Error when reading the input image: " << param->inputImageName);
         return EXIT_FAILURE;
     }
 
@@ -579,7 +577,7 @@ int main(int argc, char **argv)
             image2 = reg_io_ReadImageFile(param->operationImageName);
             if(image2 == nullptr)
             {
-                fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName);
+                NR_ERROR("Error when reading the image: " << param->operationImageName);
                 return EXIT_FAILURE;
             }
         }
@@ -620,8 +618,8 @@ int main(int argc, char **argv)
                 reg_tools_changeDatatype<double>(image2,NIFTI_TYPE_FLOAT64);
                 break;
             default:
-                reg_print_msg_error("Unsupported data type.");
-                reg_exit();
+                NR_ERROR("Unsupported data type!");
+                return EXIT_FAILURE;
             }
         }
 
@@ -678,7 +676,7 @@ int main(int argc, char **argv)
         nifti_image *image2 = reg_io_ReadImageFile(param->rmsImageName);
         if(image2 == nullptr)
         {
-            fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->rmsImageName);
+            NR_ERROR("Error when reading the image: " << param->rmsImageName);
             return EXIT_FAILURE;
         }
         // Check image dimension
@@ -691,12 +689,12 @@ int main(int argc, char **argv)
                 image->dim[6]!=image2->dim[6] ||
                 image->dim[7]!=image2->dim[7])
         {
-            fprintf(stderr,"Both images do not have the same dimension\n");
+            NR_ERROR("Both images do not have the same dimension");
             return EXIT_FAILURE;
         }
 
         double meanRMSerror = reg_tools_getMeanRMS(image, image2);
-        printf("%g\n", meanRMSerror);
+        NR_COUT << "Mean RMS error: " << meanRMSerror << std::endl;
         nifti_image_free(image2);
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
@@ -723,7 +721,7 @@ int main(int argc, char **argv)
         nifti_image *maskImage = reg_io_ReadImageFile(param->operationImageName);
         if(maskImage == nullptr)
         {
-            fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName);
+            NR_ERROR("Error when reading the image: " << param->operationImageName);
             return EXIT_FAILURE;
         }
 
@@ -792,11 +790,10 @@ int main(int argc, char **argv)
             }
         }
         else{
-            reg_print_msg_error("Nan and Inf value can only be removed when the input image is of float or double datatype");
+            NR_ERROR("Nan and Inf value can only be removed when the input image is of float or double datatype");
             return EXIT_FAILURE;
         }
-        printf("The input image contained %zu NaN, %zu Inf and %zu finite values\n",
-               nanNumber, infNumber, finNumber);
+        NR_COUT << "The input image contained " << nanNumber << " NaN, " << infNumber << " Inf and " << finNumber << " finite values" << std::endl;
         if(flag->outputImageFlag)
             reg_io_WriteImageFile(image,param->outputImageName);
         else reg_io_WriteImageFile(image,"output.nii");
@@ -910,9 +907,7 @@ int main(int argc, char **argv)
                                   0.f,
                                   jacobian,
                                   0);
-#ifndef NDEBUG
-        reg_print_msg_debug("PSF resampling completed\n");
-#endif
+            NR_DEBUG("PSF resampling completed");
         }
         else{
             reg_resampleImage(image,
@@ -921,9 +916,7 @@ int main(int argc, char **argv)
                               nullptr,
                               param->interpOrder,
                               0.f);
-#ifndef NDEBUG
-        reg_print_msg_debug("Resampling completed\n");
-#endif
+            NR_DEBUG("Resampling completed");
         }
         free(jacobian);
         nifti_image_free(def);
@@ -1023,8 +1016,8 @@ int main(int argc, char **argv)
     if(flag->mindFlag)
     {
         if(image->ndim>3){
-            reg_print_msg_error("MIND only support 2D or 3D image for now");
-            reg_exit();
+            NR_ERROR("MIND only support 2D or 3D image for now");
+            return EXIT_FAILURE;
         }
         // Convert the input image to float if needed
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
@@ -1050,8 +1043,8 @@ int main(int argc, char **argv)
     if(flag->mindSSCFlag)
     {
         if(image->ndim>3){
-            reg_print_msg_error("MIND-SSC only support 2D or 3D image for now");
-            reg_exit();
+            NR_ERROR("MIND-SSC only support 2D or 3D image for now");
+            return EXIT_FAILURE;
         }
         // Convert the input image to float if needed
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 84702a09..5c992b69 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -56,110 +56,108 @@ typedef struct
 
 void PetitUsage(char *exec)
 {
-   fprintf(stderr,"Usage:\t%s [OPTIONS].\n",exec);
-   fprintf(stderr,"\tSee the help for more details (-h).\n");
-   return;
+   NR_INFO("Usage:\t" << exec << " [OPTIONS]");
+   NR_INFO("\tSee the help for more details (-h)");
 }
+
 void Usage(char *exec)
 {
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   printf("Usage:\t%s [OPTIONS].\n",exec);
-   printf("* * OPTIONS * *\n\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+   NR_INFO("Usage:\t" << exec << " [OPTIONS]");
+   NR_INFO("* * OPTIONS * *\n");
 
-   printf("\t-ref <filename>\n");
-   printf("\t\tFilename of the reference image\n");
-   printf("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*.\n");
-   printf("\t-ref2 <filename>\n");
-   printf("\t\tFilename of the second reference image to be used when dealing with composition\n\n");
+   NR_INFO("\t-ref <filename>");
+   NR_INFO("\t\tFilename of the reference image");
+   NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*.");
+   NR_INFO("\t-ref2 <filename>");
+   NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n");
 
-   printf("\t-def <filename1> <filename2>\n");
-   printf("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Output deformation field file name\n\n");
+   NR_INFO("\t-def <filename1> <filename2>");
+   NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Output deformation field file name\n");
 
-   printf("\t-disp <filename1> <filename2>\n");
-   printf("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Output displacement field file name\n\n");
+   NR_INFO("\t-disp <filename1> <filename2>");
+   NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Output displacement field file name\n");
 
-   printf("\t-flow <filename1> <filename2>\n");
-   printf("\t\tTake a spline parametrised SVF and compute the corresponding flow field\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Output flow field file name\n\n");
+   NR_INFO("\t-flow <filename1> <filename2>");
+   NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Output flow field file name\n");
 
-   printf("\t-comp <filename1> <filename2> <filename3>\n");
-   printf("\t\tCompose two transformations of any recognised type* and returns a deformation field.\n");
-   printf("\t\tTrans3(x) = Trans2(Trans1(x)).\n");
-   printf("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)\n");
-   printf("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)\n");
-   printf("\t\tfilename3 - Output deformation field file name\n\n");
+   NR_INFO("\t-comp <filename1> <filename2> <filename3>");
+   NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field.");
+   NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x)).");
+   NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)");
+   NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)");
+   NR_INFO("\t\tfilename3 - Output deformation field file name\n");
 
-   printf("\t-land <filename1> <filename2> <filename3>\n");
-   printf("\t\tApply a transformation to a set of landmark(s).\n");
-   printf("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:\n");
-   printf("\t\t\t<key1_x> <key1_y> <key1_z>\n");
-   printf("\t\t\t<key2_x> <key2_y> <key2_z>\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Input landmark file name.\n");
-   printf("\t\tfilename3 - Output landmark file name\n\n");
+   NR_INFO("\t-land <filename1> <filename2> <filename3>");
+   NR_INFO("\t\tApply a transformation to a set of landmark(s).");
+   NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:");
+   NR_INFO("\t\t\t<key1_x> <key1_y> <key1_z>");
+   NR_INFO("\t\t\t<key2_x> <key2_y> <key2_z>");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Input landmark file name.");
+   NR_INFO("\t\tfilename3 - Output landmark file name\n");
 
-   printf("\t-updSform <filename1> <filename2> <filename3>\n");
-   printf("\t\tUpdate the sform of an image using an affine transformation.\n");
-   printf("\t\tFilename1 - Image to be updated\n");
-   printf("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating\n");
-   printf("\t\tFilename3 - Updated image.\n\n");
+   NR_INFO("\t-updSform <filename1> <filename2> <filename3>");
+   NR_INFO("\t\tUpdate the sform of an image using an affine transformation.");
+   NR_INFO("\t\tFilename1 - Image to be updated");
+   NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating");
+   NR_INFO("\t\tFilename3 - Updated image.\n");
 
-   printf("\t-invAff <filename1> <filename2>\n");
-   printf("\t\tInvert an affine matrix.\n");
-   printf("\t\tfilename1 - Input affine transformation file name\n");
-   printf("\t\tfilename2 - Output inverted affine transformation file name\n\n");
+   NR_INFO("\t-invAff <filename1> <filename2>");
+   NR_INFO("\t\tInvert an affine matrix.");
+   NR_INFO("\t\tfilename1 - Input affine transformation file name");
+   NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n");
 
-   printf("\t-invNrr <filename1> <filename2> <filename3>\n");
-   printf("\t\tInvert a non-rigid transformation and save the result as a deformation field.\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Input floating image where the inverted transformation is defined\n");
-   printf("\t\tfilename3 - Output inverted transformation file name\n");
-   printf("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,\n");
-   printf("\t\tas a result, they are converted into deformation fields before inversion.\n\n");
+   NR_INFO("\t-invNrr <filename1> <filename2> <filename3>");
+   NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field.");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined");
+   NR_INFO("\t\tfilename3 - Output inverted transformation file name");
+   NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,");
+   NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n");
 
-   printf("\t-half <filename1> <filename2>\n");
-   printf("\t\tThe input transformation is halfed and stored using the same transformation type.\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Output transformation file name\n\n");
+   NR_INFO("\t-half <filename1> <filename2>");
+   NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type.");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Output transformation file name\n");
 
-   printf("\t-makeAff <rx> <ry> <rz> <tx> <ty> <tz> <sx> <sy> <sz> <shx> <shy> <shz> <outputFilename>\n");
-   printf("\t\tCreate an affine transformation matrix\n\n");
+   NR_INFO("\t-makeAff <rx> <ry> <rz> <tx> <ty> <tz> <sx> <sy> <sz> <shx> <shy> <shz> <outputFilename>");
+   NR_INFO("\t\tCreate an affine transformation matrix\n");
 
-   printf("\t-aff2rig <filename1> <filename2>\n");
-   printf("\t\tExtract the rigid component from an affine transformation matrix\n");
-   printf("\t\tfilename1 - Input transformation file name\n");
-   printf("\t\tfilename2 - Output transformation file name\n\n");
+   NR_INFO("\t-aff2rig <filename1> <filename2>");
+   NR_INFO("\t\tExtract the rigid component from an affine transformation matrix");
+   NR_INFO("\t\tfilename1 - Input transformation file name");
+   NR_INFO("\t\tfilename2 - Output transformation file name\n");
 
-   printf("\t-flirtAff2NR <filename1> <filename2> <filename3> <filename4>\n");
-   printf("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation\n");
-   printf("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name\n");
-   printf("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)\n");
-   printf("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)\n");
-   printf("\t\tfilename4 - Output affine transformation file name\n\n");
+   NR_INFO("\t-flirtAff2NR <filename1> <filename2> <filename3> <filename4>");
+   NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation");
+   NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name");
+   NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)");
+   NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)");
+   NR_INFO("\t\tfilename4 - Output affine transformation file name\n");
 #ifdef _OPENMP
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   printf("\t-omp <int>\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n",
-          defaultOpenMPValue, omp_get_num_procs());
+   NR_INFO("\t-omp <int>\n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION);
+   NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")");
 
-   printf("\n\t* The supported transformation types are:\n");
-   printf("\t\t- cubic B-Spline parametrised grid (reference image is required)\n");
-   printf("\t\t- a dense deformation field\n");
-   printf("\t\t- a dense displacement field\n");
-   printf("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)\n");
-   printf("\t\t- a stationary velocity deformation field\n");
-   printf("\t\t- a stationary velocity displacement field\n");
-   printf("\t\t- an affine matrix\n\n");
-   printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
-   return;
+   NR_INFO("\n\t* The supported transformation types are:");
+   NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)");
+   NR_INFO("\t\t- a dense deformation field");
+   NR_INFO("\t\t- a dense displacement field");
+   NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)");
+   NR_INFO("\t\t- a stationary velocity deformation field");
+   NR_INFO("\t\t- a stationary velocity displacement field");
+   NR_INFO("\t\t- an affine matrix\n");
+   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
 int main(int argc, char **argv)
@@ -176,7 +174,7 @@ int main(int argc, char **argv)
    FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG));
 
 #ifdef _OPENMP
-   // Set the default number of thread
+   // Set the default number of threads
    int defaultOpenMPValue=omp_get_num_procs();
    if(getenv("OMP_NUM_THREADS")!=nullptr)
       defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
@@ -206,7 +204,7 @@ int main(int argc, char **argv)
 #ifdef _OPENMP
          omp_set_num_threads(atoi(argv[++i]));
 #else
-         reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+         NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
          ++i;
 #endif
       }
@@ -214,7 +212,7 @@ int main(int argc, char **argv)
             strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 ||
             strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0)
       {
-         printf("%s\n",NR_VERSION);
+         NR_COUT << NR_VERSION << std::endl;
          return EXIT_SUCCESS;
       }
       else if(strcmp(argv[i],"-ref")==0 || strcmp(argv[i],"--ref")==0 || strcmp(argv[i],"-target")==0)
@@ -310,8 +308,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         fprintf(stderr, "[NiftyReg ERROR] Unrecognised argument: %s\n",
-                 argv[i]);
+         NR_ERROR("Unrecognised argument: " << argv[i]);
          return EXIT_FAILURE;
       }
    }
@@ -332,8 +329,7 @@ int main(int argc, char **argv)
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
          if(inputTransformationImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
-                    param->inputTransName);
+            NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
             return EXIT_FAILURE;
          }
          // If the input transformation is a grid, check that the reference image has been specified
@@ -343,16 +339,14 @@ int main(int argc, char **argv)
          {
             if(!flag->referenceImageFlag)
             {
-               fprintf(stderr, "[NiftyReg ERROR] When using a control point grid parametrisation (%s),",
-                       param->inputTransName);
-               fprintf(stderr, " a reference image shoud be specified (-ref flag).\n");
+               NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
+                        " a reference image should be specified (-ref flag)");
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
             if(referenceImage==nullptr)
             {
-               fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                       param->referenceImageName);
+               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                return EXIT_FAILURE;
             }
          }
@@ -364,16 +358,14 @@ int main(int argc, char **argv)
          reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
          if(!flag->referenceImageFlag)
          {
-            fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),",
-                    param->inputTransName);
-            fprintf(stderr, " a reference image shoud be specified (-ref flag).\n");
+            NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                     " a reference image should be specified (-ref flag)");
             return EXIT_FAILURE;
          }
          referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
          if(referenceImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                    param->referenceImageName);
+            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
             return EXIT_FAILURE;
          }
       }
@@ -409,55 +401,55 @@ int main(int argc, char **argv)
       {
          if(affineTransformation!=nullptr)
          {
-            fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from an affine transformation\n");
+            NR_ERROR("A flow field transformation can not be generated from an affine transformation");
             return EXIT_FAILURE;
          }
          if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID)
          {
-            fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a linear spline grid\n");
+            NR_ERROR("A flow field transformation can not be generated from a linear spline grid");
             return EXIT_FAILURE;
          }
          if(inputTransformationImage->intent_p1==CUB_SPLINE_GRID)
          {
-            fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a cubic spline grid\n");
+            NR_ERROR("A flow field transformation can not be generated from a cubic spline grid");
             return EXIT_FAILURE;
          }
          if(inputTransformationImage->intent_p1==DEF_FIELD)
          {
-            fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a deformation field\n");
+            NR_ERROR("A flow field transformation can not be generated from a deformation field");
             return EXIT_FAILURE;
          }
          if(inputTransformationImage->intent_p1==DISP_FIELD)
          {
-            fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a displacement field\n");
+            NR_ERROR("A flow field transformation can not be generated from a displacement field");
             return EXIT_FAILURE;
          }
          switch(static_cast<int>(inputTransformationImage->intent_p1))
          {
             break;
          case DEF_VEL_FIELD:
-            printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a deformation velocity field:");
+            NR_INFO(inputTransformationImage->fname);
             // The current input transformation is copied
             memcpy(outputTransformationImage->data,inputTransformationImage->data,
                    outputTransformationImage->nvox*outputTransformationImage->nbyper);
             break;
          case DISP_VEL_FIELD:
-            printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a displacement velocity field:");
+            NR_INFO(inputTransformationImage->fname);
             // The current input transformation is copied and converted
             memcpy(outputTransformationImage->data,inputTransformationImage->data,
                    outputTransformationImage->nvox*outputTransformationImage->nbyper);
             reg_getDisplacementFromDeformation(outputTransformationImage);
             break;
          case SPLINE_VEL_GRID:
-            printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a spline velocity parametrisation:");
+            NR_INFO(inputTransformationImage->fname);
             reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage,
                                                     outputTransformationImage);
             break;
          default:
-            fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n");
+            NR_ERROR("Unknown input transformation type");
             return EXIT_FAILURE;
          }
          outputTransformationImage->intent_p1=DEF_VEL_FIELD;
@@ -475,15 +467,15 @@ int main(int argc, char **argv)
             switch(static_cast<int>(reg_round(inputTransformationImage->intent_p1)))
             {
             case DEF_FIELD:
-               printf("[NiftyReg] The specified transformation is a deformation field:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a deformation field:");
+               NR_INFO(inputTransformationImage->fname);
                // the current in transformation is copied
                memcpy(outputTransformationImage->data,inputTransformationImage->data,
                       outputTransformationImage->nvox*outputTransformationImage->nbyper);
                break;
             case DISP_FIELD:
-               printf("[NiftyReg] The specified transformation is a displacement field:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a displacement field:");
+               NR_INFO(inputTransformationImage->fname);
                // the current in transformation is copied and converted
                memcpy(outputTransformationImage->data,inputTransformationImage->data,
                       outputTransformationImage->nvox*outputTransformationImage->nbyper);
@@ -491,8 +483,8 @@ int main(int argc, char **argv)
                break;
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
-               printf("[NiftyReg] The specified transformation is a spline parametrisation:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a spline parametrisation:");
+               NR_INFO(inputTransformationImage->fname);
                // The output field is filled with an identity deformation field
                memset(outputTransformationImage->data,
                       0,
@@ -507,8 +499,8 @@ int main(int argc, char **argv)
                                              );
                break;
             case DEF_VEL_FIELD:
-               printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a deformation velocity field:");
+               NR_INFO(inputTransformationImage->fname);
                // The flow field is exponentiated
                reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
                      outputTransformationImage,
@@ -516,8 +508,8 @@ int main(int argc, char **argv)
                                                             );
                break;
             case DISP_VEL_FIELD:
-               printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a displacement velocity field:");
+               NR_INFO(inputTransformationImage->fname);
                // The input transformation is converted into a def flow
                reg_getDeformationFromDisplacement(outputTransformationImage);
                // The flow field is exponentiated
@@ -527,16 +519,15 @@ int main(int argc, char **argv)
                                                             );
                break;
             case SPLINE_VEL_GRID:
-               printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n",
-                      inputTransformationImage->fname);
+               NR_INFO("The specified transformation is a spline velocity parametrisation:");
+               NR_INFO(inputTransformationImage->fname);
                // The spline parametrisation is converted into a dense flow and exponentiated
                reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
                      outputTransformationImage,
-                     false // step number is not updated
-                                                             );
+                     false); // step number is not updated
                break;
             default:
-               fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n");
+               NR_ERROR("Unknown input transformation type");
                return EXIT_FAILURE;
             }
          }
@@ -550,16 +541,16 @@ int main(int argc, char **argv)
       switch(static_cast<int>(round(outputTransformationImage->intent_p1)))
       {
       case DEF_FIELD:
-         printf("[NiftyReg] The deformation field has been saved as:\n[NiftyReg] %s\n",
-                param->outputTransName);
+         NR_INFO("The deformation field has been saved as:");
+         NR_INFO(param->outputTransName);
          break;
       case DISP_FIELD:
-         printf("[NiftyReg] The displacement field has been saved as:\n[NiftyReg] %s\n",
-                param->outputTransName);
+         NR_INFO("The displacement field has been saved as:");
+         NR_INFO(param->outputTransName);
          break;
       case DEF_VEL_FIELD:
-         printf("[NiftyReg] The flow field has been saved as:\n[NiftyReg] %s\n",
-                param->outputTransName);
+         NR_INFO("The flow field has been saved as:");
+         NR_INFO(param->outputTransName);
          break;
       }
       // Free the allocated images and arrays
@@ -574,7 +565,7 @@ int main(int argc, char **argv)
    /* ************************************ */
    if(flag->outputCompFlag)
    {
-      printf("[NiftyReg] Starting the composition of two transformations\n");
+      NR_INFO("Starting the composition of two transformations");
       // Create some variables
       mat44 *affine1Trans=nullptr;
       mat44 *affine2Trans=nullptr;
@@ -589,16 +580,15 @@ int main(int argc, char **argv)
       {
          affine1Trans=(mat44 *)malloc(sizeof(mat44));
          reg_tool_ReadAffineFile(affine1Trans,param->inputTransName);
-         printf("[NiftyReg] Transformation 1 is an affine parametrisation:\n[NiftyReg] %s\n",
-                param->inputTransName);
+         NR_INFO("Transformation 1 is an affine parametrisation:");
+         NR_INFO(param->inputTransName);
       }
       else
       {
          input1TransImage = reg_io_ReadImageFile(param->inputTransName);
          if(input1TransImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n",
-                    param->inputTransName);
+            NR_ERROR("Error when reading the transformation image: " << param->inputTransName);
             return EXIT_FAILURE;
          }
       }
@@ -613,16 +603,15 @@ int main(int argc, char **argv)
          input2TransImage = reg_io_ReadImageFile(param->input2TransName);
          if(input2TransImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n",
-                    param->input2TransName);
+            NR_ERROR("Error when reading the transformation image: " << param->input2TransName);
             return EXIT_FAILURE;
          }
       }
       // Check if the two input transformations are affine transformation
       if(affine1Trans!=nullptr && affine2Trans!=nullptr)
       {
-         printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n",
-                param->input2TransName);
+         NR_INFO("Transformation 2 is an affine parametrisation:");
+         NR_INFO(param->input2TransName);
          *affine1Trans=reg_mat44_mul(affine2Trans,affine1Trans);
          reg_tool_WriteAffineFile(affine1Trans,param->outputTransName);
       }
@@ -633,16 +622,14 @@ int main(int argc, char **argv)
          {
             if(!flag->referenceImageFlag)
             {
-               fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),",
-                       param->inputTransName);
-               fprintf(stderr, " a reference image shoud be specified (-res flag).\n");
+               NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                        " a reference image should be specified (-res flag).");
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
             if(referenceImage==nullptr)
             {
-               fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                       param->referenceImageName);
+               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                return EXIT_FAILURE;
             }
          }
@@ -652,16 +639,14 @@ int main(int argc, char **argv)
          {
             if(!flag->referenceImageFlag)
             {
-               fprintf(stderr, "[NiftyReg ERROR] When using an cubic b-spline parametrisation (%s),",
-                       param->inputTransName);
-               fprintf(stderr, " a reference image shoud be specified (-ref flag).\n");
+               NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," <<
+                        " a reference image should be specified (-ref flag).");
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
             if(referenceImage==nullptr)
             {
-               fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                       param->referenceImageName);
+               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                return EXIT_FAILURE;
             }
          }
@@ -671,8 +656,7 @@ int main(int argc, char **argv)
             referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name);
             if(referenceImage2==nullptr)
             {
-               fprintf(stderr, "[NiftyReg ERROR] Error when reading the second reference image: %s\n",
-                       param->referenceImage2Name);
+               NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name);
                return EXIT_FAILURE;
             }
          }
@@ -692,8 +676,8 @@ int main(int argc, char **argv)
                output1TransImage->nbyper=sizeof(float);
                output1TransImage->datatype=NIFTI_TYPE_FLOAT32;
             }
-            printf("[NiftyReg] Transformation 1 is defined in the space of image:\n[NiftyReg] %s\n",
-                   referenceImage->fname);
+            NR_INFO("Transformation 1 is defined in the space of image:");
+            NR_INFO(referenceImage->fname);
          }
          else
          {
@@ -713,8 +697,8 @@ int main(int argc, char **argv)
          {
          case LIN_SPLINE_GRID:
          case CUB_SPLINE_GRID:
-               printf("[NiftyReg] Transformation 1 is a spline parametrisation:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a spline parametrisation:");
+               NR_INFO(input1TransImage->fname);
                reg_tools_multiplyValueToImage(output1TransImage,output1TransImage,0.f);
                output1TransImage->intent_p1=DISP_FIELD;
                reg_getDeformationFromDisplacement(output1TransImage);
@@ -725,52 +709,48 @@ int main(int argc, char **argv)
                                               true);
                break;
             case DEF_FIELD:
-               printf("[NiftyReg] Transformation 1 is a deformation field:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a deformation field:");
+               NR_INFO(input1TransImage->fname);
                memcpy(output1TransImage->data,input1TransImage->data,
                       output1TransImage->nbyper*output1TransImage->nvox);
                break;
             case DISP_FIELD:
-               printf("[NiftyReg] Transformation 1 is a displacement field:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a displacement field:");
+               NR_INFO(input1TransImage->fname);
                memcpy(output1TransImage->data,input1TransImage->data,
                       output1TransImage->nbyper*output1TransImage->nvox);
                reg_getDeformationFromDisplacement(output1TransImage);
                break;
             case SPLINE_VEL_GRID:
-               printf("[NiftyReg] Transformation 1 is a spline velocity field parametrisation:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a spline velocity field parametrisation:");
+               NR_INFO(input1TransImage->fname);
                reg_spline_getDefFieldFromVelocityGrid(input1TransImage,
                      output1TransImage,
-                     false // the number of step is not automatically updated
-                                                             );
+                     false); // the number of step is not automatically updated
                break;
             case DEF_VEL_FIELD:
-               printf("[NiftyReg] Transformation 1 is a deformation field velocity:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a deformation field velocity:");
+               NR_INFO(input1TransImage->fname);
                reg_defField_getDeformationFieldFromFlowField(input1TransImage,
                      output1TransImage,
-                     false // the number of step is not automatically updated
-                                                            );
+                     false); // the number of step is not automatically updated
                break;
             case DISP_VEL_FIELD:
-               printf("[NiftyReg] Transformation 1 is a displacement field velocity:\n[NiftyReg] %s\n",
-                      input1TransImage->fname);
+               NR_INFO("Transformation 1 is a displacement field velocity:");
+               NR_INFO(input1TransImage->fname);
                reg_getDeformationFromDisplacement(output1TransImage);
                reg_defField_getDeformationFieldFromFlowField(input1TransImage,
                      output1TransImage,
-                     false // the number of step is not automatically updated
-                                                            );
+                     false); // the number of step is not automatically updated
                break;
             default:
-               fprintf(stderr,"[NiftyReg ERROR] The specified first input transformation type is not recognised: %s\n",
-                       param->input2TransName);
+               NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName);
                return EXIT_FAILURE;
             }
          if(affine2Trans!=nullptr)
          {
-            printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n",
-                   param->input2TransName);
+            NR_INFO("Transformation 2 is an affine parametrisation:");
+            NR_INFO(param->input2TransName);
             // The field is created using the previous image space
             output2TransImage=nifti_copy_nim_info(output1TransImage);
             output2TransImage->intent_code=NIFTI_INTENT_VECTOR;
@@ -787,8 +767,8 @@ int main(int argc, char **argv)
             {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
-               printf("[NiftyReg] Transformation 2 is a spline parametrisation:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a spline parametrisation:");
+               NR_INFO(input2TransImage->fname);
                reg_spline_getDeformationField(input2TransImage,
                                               output1TransImage,
                                               nullptr,
@@ -797,13 +777,13 @@ int main(int argc, char **argv)
                                              );
                break;
             case DEF_FIELD:
-               printf("[NiftyReg] Transformation 2 is a deformation field:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a deformation field:");
+               NR_INFO(input2TransImage->fname);
                reg_defField_compose(input2TransImage,output1TransImage,nullptr);
                break;
             case DISP_FIELD:
-               printf("[NiftyReg] Transformation 2 is a displacement field:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a displacement field:");
+               NR_INFO(input2TransImage->fname);
                reg_getDeformationFromDisplacement(input2TransImage);
                reg_defField_compose(input2TransImage,output1TransImage,nullptr);
                break;
@@ -814,8 +794,8 @@ int main(int argc, char **argv)
                   output2TransImage=nifti_copy_nim_info(referenceImage2);
                   output2TransImage->scl_slope=1.f;
                   output2TransImage->scl_inter=0.f;
-                  printf("[NiftyReg] Transformation 2 is defined in the space of image:\n[NiftyReg] %s\n",
-                         referenceImage2->fname);
+                  NR_INFO("Transformation 2 is defined in the space of image:");
+                  NR_INFO(referenceImage2->fname);
                }
                else
                {
@@ -828,8 +808,8 @@ int main(int argc, char **argv)
                output2TransImage->nbyper=output1TransImage->nbyper;
                output2TransImage->datatype=output1TransImage->datatype;
                output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
-               printf("[NiftyReg] Transformation 2 is a spline velocity field parametrisation:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a spline velocity field parametrisation:");
+               NR_INFO(input2TransImage->fname);
                reg_spline_getDefFieldFromVelocityGrid(input2TransImage,
                      output2TransImage,
                      false // the number of step is not automatically updated
@@ -837,8 +817,8 @@ int main(int argc, char **argv)
                reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             case DEF_VEL_FIELD:
-               printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a deformation field velocity:");
+               NR_INFO(input2TransImage->fname);
                output2TransImage = nifti_dup(*input2TransImage, false);
                output2TransImage->intent_p1=DEF_FIELD;
                reg_defField_getDeformationFieldFromFlowField(input2TransImage,
@@ -848,8 +828,8 @@ int main(int argc, char **argv)
                reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             case DISP_VEL_FIELD:
-               printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n",
-                      input2TransImage->fname);
+               NR_INFO("Transformation 2 is a displacement field velocity:");
+               NR_INFO(input2TransImage->fname);
                output2TransImage = nifti_dup(*input2TransImage, false);
                output2TransImage->intent_p1=DEF_FIELD;
                reg_getDeformationFromDisplacement(input2TransImage);
@@ -860,8 +840,7 @@ int main(int argc, char **argv)
                reg_defField_compose(output2TransImage,output1TransImage,nullptr);
                break;
             default:
-               fprintf(stderr,"[NiftyReg ERROR] The specified second input transformation type is not recognised: %s\n",
-                       param->input2TransName);
+               NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName);
                return EXIT_FAILURE;
             }
          }
@@ -869,8 +848,8 @@ int main(int argc, char **argv)
          memset(output1TransImage->descrip, 0, 80);
          strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)");
          reg_io_WriteImageFile(output1TransImage,param->outputTransName);
-         printf("[NiftyReg] The final deformation field has been saved as:\n[NiftyReg] %s\n",
-                param->outputTransName);
+         NR_INFO("The final deformation field has been saved as:");
+         NR_INFO(param->outputTransName);
       }
       // Free allocated object
       if(affine1Trans!=nullptr) free(affine1Trans);
@@ -900,8 +879,7 @@ int main(int argc, char **argv)
          inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
          if(inputTransformationImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n",
-                    param->inputTransName);
+            NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
             return EXIT_FAILURE;
          }
          // If the input transformation is a grid, check that the reference image has been specified
@@ -911,16 +889,14 @@ int main(int argc, char **argv)
          {
             if(!flag->referenceImageFlag)
             {
-               fprintf(stderr, "[NiftyReg ERROR] When using a control point grid parametrisation (%s),",
-                       param->inputTransName);
-               fprintf(stderr, " a reference image shoud be specified (-ref flag).\n");
+               NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
+                        " a reference image should be specified (-ref flag).");
                return EXIT_FAILURE;
             }
             referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
             if(referenceImage==nullptr)
             {
-               fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                       param->referenceImageName);
+               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                return EXIT_FAILURE;
             }
          }
@@ -932,16 +908,14 @@ int main(int argc, char **argv)
          reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
          if(!flag->referenceImageFlag)
          {
-            fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),",
-                    param->inputTransName);
-            fprintf(stderr, " a reference image shoud be specified (-ref flag).\n");
+            NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                     " a reference image should be specified (-ref flag).");
             return EXIT_FAILURE;
          }
          referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
          if(referenceImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                    param->referenceImageName);
+            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
             return EXIT_FAILURE;
          }
       }
@@ -982,15 +956,15 @@ int main(int argc, char **argv)
          switch(static_cast<int>(reg_round(inputTransformationImage->intent_p1)))
          {
          case DEF_FIELD:
-            printf("[NiftyReg] The specified transformation is a deformation field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a deformation field:");
+            NR_INFO(inputTransformationImage->fname);
             // the current in transformation is copied
             memcpy(deformationFieldImage->data,inputTransformationImage->data,
                    deformationFieldImage->nvox*deformationFieldImage->nbyper);
             break;
          case DISP_FIELD:
-            printf("[NiftyReg] The specified transformation is a displacement field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a displacement field:");
+            NR_INFO(inputTransformationImage->fname);
             // the current in transformation is copied and converted
             memcpy(deformationFieldImage->data,inputTransformationImage->data,
                    deformationFieldImage->nvox*deformationFieldImage->nbyper);
@@ -998,8 +972,8 @@ int main(int argc, char **argv)
             break;
          case LIN_SPLINE_GRID:
          case CUB_SPLINE_GRID:
-            printf("[NiftyReg] The specified transformation is a spline parametrisation:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a spline parametrisation:");
+            NR_INFO(inputTransformationImage->fname);
             // The deformation field is filled with an identity deformation field
             memset(deformationFieldImage->data,
                    0,
@@ -1014,8 +988,8 @@ int main(int argc, char **argv)
                                            );
             break;
          case DEF_VEL_FIELD:
-            printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a deformation velocity field:");
+            NR_INFO(inputTransformationImage->fname);
             // The flow field is exponentiated
             reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
                                                           deformationFieldImage,
@@ -1023,8 +997,8 @@ int main(int argc, char **argv)
                                                           );
             break;
          case DISP_VEL_FIELD:
-            printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a displacement velocity field:");
+            NR_INFO(inputTransformationImage->fname);
             // The input transformation is converted into a def flow
             reg_getDeformationFromDisplacement(deformationFieldImage);
             // The flow field is exponentiated
@@ -1034,8 +1008,8 @@ int main(int argc, char **argv)
                                                           );
             break;
          case SPLINE_VEL_GRID:
-            printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n",
-                   inputTransformationImage->fname);
+            NR_INFO("The specified transformation is a spline velocity parametrisation:");
+            NR_INFO(inputTransformationImage->fname);
             // The spline parametrisation is converted into a dense flow and exponentiated
             reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
                                                    deformationFieldImage,
@@ -1043,7 +1017,7 @@ int main(int argc, char **argv)
                                                    );
             break;
          default:
-            fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n");
+            NR_ERROR("Unknown input transformation type");
             return EXIT_FAILURE;
          }
       }
@@ -1065,15 +1039,15 @@ int main(int argc, char **argv)
       size_t landmarkNumber = inputMatrixSize.first;
       size_t n = inputMatrixSize.second;
       if(n==2 && deformationFieldImage->nz>1){
-         reg_print_msg_error("2 values per line are expected for 2D images");
+         NR_ERROR("2 values per line are expected for 2D images");
          return EXIT_FAILURE;
       }
       else if(n==3 && deformationFieldImage->nz<2){
-         reg_print_msg_error("3 values per line are expected for 3D images");
+         NR_ERROR("3 values per line are expected for 3D images");
          return EXIT_FAILURE;
       }
       else if(n!=2 && n!=3){
-         reg_print_msg_error("2 or 3 values are expected per line");
+         NR_ERROR("2 or 3 values are expected per line");
          return EXIT_FAILURE;
       }
       float **allLandmarks = reg_tool_ReadMatrixFile<float>(param->inputLandmarkName,
@@ -1124,8 +1098,7 @@ int main(int argc, char **argv)
       nifti_image *image = reg_io_ReadImageFile(param->inputTransName);
       if(image==nullptr)
       {
-         fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
-                 param->inputTransName);
+         NR_ERROR("Error when reading the input image: " << param->inputTransName);
          return EXIT_FAILURE;
       }
       // Read the affine transformation
@@ -1179,8 +1152,7 @@ int main(int argc, char **argv)
          inputTransImage = reg_io_ReadImageFile(param->inputTransName);
          if(inputTransImage==nullptr)
          {
-            fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
-                    param->inputTransName);
+            NR_ERROR("Error when reading the input image: " << param->inputTransName);
             return EXIT_FAILURE;
          }
          switch(reg_round(inputTransImage->intent_p1))
@@ -1218,8 +1190,7 @@ int main(int argc, char **argv)
             --inputTransImage->intent_p2;
             break;
          default:
-            fprintf(stderr,"[NiftyReg ERROR] The specified input transformation type is not recognised: %s\n",
-                    param->inputTransName);
+            NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName);
             return EXIT_FAILURE;
          }
          // Save the image
@@ -1237,16 +1208,14 @@ int main(int argc, char **argv)
       nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName);
       if(inputTransImage==nullptr)
       {
-         fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
-                 param->inputTransName);
+         NR_ERROR("Error when reading the input image: " << param->inputTransName);
          return EXIT_FAILURE;
       }
       // Read the provided floating space image
       nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName);
       if(floatingImage==nullptr)
       {
-         fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n",
-                 param->input2TransName);
+         NR_ERROR("Error when reading the input image: " << param->input2TransName);
          return EXIT_FAILURE;
       }
       // Convert the spline parametrisation into a dense deformation parametrisation
@@ -1257,16 +1226,14 @@ int main(int argc, char **argv)
          // Read the reference image
          if(!flag->referenceImageFlag)
          {
-            fprintf(stderr, "[NiftyReg ERROR] When using an spline parametrisation transformation (%s),",
-                    param->inputTransName);
-            fprintf(stderr, " a reference image shoud be specified (-res flag).\n");
+            NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," <<
+                     " a reference image should be specified (-ref flag).");
             return EXIT_FAILURE;
          }
          nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
          if(referenceImage==nullptr)
          {
-            fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n",
-                    param->referenceImageName);
+            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
             return EXIT_FAILURE;
          }
          // Create a deformation field or a flow field
@@ -1374,8 +1341,7 @@ int main(int argc, char **argv)
          break;
       }
       default:
-         fprintf(stderr,"[NiftyReg ERROR] The specified input transformation type is not recognised: %s\n",
-                 param->inputTransName);
+         NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName);
          return EXIT_FAILURE;
       }
       // Save the inverted transformation
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index b546a992..1a3bda5a 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -25,7 +25,7 @@ endif(USE_NRRD)
 SET(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${LIBRARIES}")
 
 # Create the reg_io library
-add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp _reg_stringFormat.cpp)
+add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp)
 target_link_libraries(_reg_ReadWriteImage ${LIBRARIES})
 install(TARGETS _reg_ReadWriteImage
         RUNTIME DESTINATION bin COMPONENT Development
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index ce159bb2..0c568c05 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1411,9 +1411,7 @@ class NiftiImage
             refCount = source.refCount;
             acquire(source.image);
         }
-#ifndef NDEBUG
-        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
     /**
@@ -1424,9 +1422,7 @@ class NiftiImage
         : NiftiImage()
     {
         swap(*this, source);
-#ifndef NDEBUG
-        Rc_printf("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        RN_DEBUG("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
     /**
@@ -1437,9 +1433,7 @@ class NiftiImage
         : NiftiImage()
     {
         this->copy(source);
-#ifndef NDEBUG
-        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from Block)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
     /**
@@ -1454,9 +1448,7 @@ class NiftiImage
             this->copy(image, copy);
         else
             acquire(image);
-#ifndef NDEBUG
-        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from pointer)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from pointer)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
     /**
@@ -1559,9 +1551,7 @@ class NiftiImage
     NiftiImage & operator= (const Block &source)
     {
         copy(source);
-#ifndef NDEBUG
-        Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from Block)", RNIFTI_NIFTILIB_VERSION, this->image);
         return *this;
     }
 
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 6d12dbbc..44085013 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -121,9 +121,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image)
 
     nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image));
 
-#ifndef NDEBUG
-    Rc_printf("Converting v2 image with pointer %p to v1 image with pointer %p\n", image, result);
-#endif
+    RN_DEBUG("Converting v2 image with pointer %p to v1 image with pointer %p", image, result);
 
     // We assume that each block of a given type is stored contiguously like an array - this should be the case, but may not be guaranteed
     std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter<int>());
@@ -177,9 +175,7 @@ inline nifti2_image * convertImageV1to2 (nifti1_image *image)
 
     nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image));
 
-#ifndef NDEBUG
-    Rc_printf("Converting v1 image with pointer %p to v2 image with pointer %p\n", image, result);
-#endif
+    RN_DEBUG("Converting v1 image with pointer %p to v2 image with pointer %p", image, result);
 
     std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter<int64_t>());
     result->nvox = static_cast<int64_t>(image->nvox);
@@ -403,7 +399,7 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b
 
 #endif  // USING_R
 
-}       // internal namespace
+} // internal namespace
 
 template <typename Type, bool alpha>
 inline void NiftiImageData::ConcreteTypeHandler<Type,alpha>::minmax (void *ptr, const size_t length, double *min, double *max) const
@@ -729,10 +725,7 @@ inline void NiftiImage::acquire (nifti_image * const image)
             this->refCount = new int(1);
         else
             (*this->refCount)++;
-
-#ifndef NDEBUG
-        Rc_printf("Acquiring pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
-#endif
+        RN_DEBUG("Acquiring pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
     }
 }
 
@@ -743,9 +736,7 @@ inline void NiftiImage::release ()
         if (this->refCount != nullptr)
         {
             (*this->refCount)--;
-#ifndef NDEBUG
-            Rc_printf("Releasing pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
-#endif
+            RN_DEBUG("Releasing pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
             if (*this->refCount < 1)
             {
 #if RNIFTI_NIFTILIB_VERSION == 1
@@ -759,7 +750,7 @@ inline void NiftiImage::release ()
             }
         }
         else
-            Rc_printf("Releasing untracked object %p", this->image);
+            RN_DEBUG("Releasing untracked object %p", this->image);
     }
 }
 
@@ -1163,9 +1154,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo
 #endif
     }
 
-#ifndef NDEBUG
-    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from SEXP)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+    RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from SEXP)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
 #endif // USING_R
@@ -1248,18 +1237,14 @@ inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const int datatype
     : NiftiImage()
 {
     initFromDims(dim, datatype);
-#ifndef NDEBUG
-    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+    RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from dims)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
 inline NiftiImage::NiftiImage (const std::vector<dim_t> &dim, const std::string &datatype)
     : NiftiImage()
 {
     initFromDims(dim, internal::stringToDatatype(datatype));
-#ifndef NDEBUG
-    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+    RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from dims)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
@@ -1276,9 +1261,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData)
 
     correctDimensions();
 
-#ifndef NDEBUG
-    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+    RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
 inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t> &volumes)
@@ -1317,9 +1300,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
 
     correctDimensions();
 
-#ifndef NDEBUG
-    Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image);
-#endif
+    RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
 inline void NiftiImage::updatePixdim (const std::vector<pixdim_t> &pixdim)
diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h
index 8d8bc42e..2390a2ee 100644
--- a/reg-io/RNifti/NiftiImage_print.h
+++ b/reg-io/RNifti/NiftiImage_print.h
@@ -31,3 +31,9 @@
 #define Rprintf(...) fprintf(stderr, __VA_ARGS__)
 
 #endif // USING_R
+
+#ifndef NDEBUG
+#define RN_DEBUG(format,...) Rc_printf("[RNifti DEBUG] " format "\n", __VA_ARGS__)
+#else
+#define RN_DEBUG(format,...)
+#endif
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 6e6b0663..4902881d 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -11,7 +11,6 @@
 
 #include "_reg_ReadWriteImage.h"
 #include "_reg_tools.h"
-#include "_reg_stringFormat.h"
 #include <filesystem>
 
 /* *************************************************************** */
@@ -50,8 +49,7 @@ int reg_io_checkFileFormat(const std::string& filename) {
         return NR_NRRD_FORMAT;
 #endif
     else {
-        reg_print_fct_warn("reg_io_checkFileFormat");
-        reg_print_msg_warn("No filename extension provided - the Nifti library is used by default");
+        NR_WARN_WFCT("No filename extension provided - the Nifti library is used by default");
     }
 
     return NR_NII_FORMAT;
@@ -124,11 +122,9 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
     // Check if the specified directory exists
     std::filesystem::path p(filename);
     p = p.parent_path();
-    if (!std::filesystem::exists(p) && p != std::filesystem::path()) {
-        std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
-        std::cerr << filename << std::endl;
-        reg_exit();
-    }
+    if (!std::filesystem::exists(p) && p != std::filesystem::path())
+        NR_FATAL_ERROR("The specified folder to save the following file does not exist: "s + filename);
+
     // First read the file format in order to use the correct library
     int fileFormat = reg_io_checkFileFormat(filename);
 
@@ -144,9 +140,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
         // the filename is converted to nifti
         fname = filename;
         fname.replace(fname.find(".png"), 4, ".nii.gz");
-        reg_print_msg_warn("The file can not be saved as png and is converted to nifti");
-        char text[255]; sprintf(text, "%s -> %s", filename, fname.c_str());
-        reg_print_msg_warn(text);
+        NR_WARN("The file can not be saved as png and is converted to nifti " << filename << " -> " << fname);
         filename = fname.c_str();
         fileFormat = NR_NII_FORMAT;
     }
@@ -172,21 +166,21 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
 /* *************************************************************** */
 template <class DataType>
 void reg_io_displayImageData1(nifti_image *image) {
-    reg_print_msg_debug("image values:");
-    DataType *data = static_cast<DataType *>(image->data);
-    std::string text;
+    NR_DEBUG("Image values:");
+    const DataType *data = static_cast<DataType*>(image->data);
+    const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3);
 
     size_t voxelIndex = 0;
     for (int z = 0; z < image->nz; z++) {
         for (int y = 0; y < image->ny; y++) {
             for (int x = 0; x < image->nx; x++) {
-                text = stringFormat("[%d - %d - %d] = [", x, y, z);
-                for (int tu = 0; tu < image->nt * image->nu; ++tu) {
-                    text = stringFormat("%s%g ", text.c_str(),
-                                        static_cast<double>(data[voxelIndex + tu * NiftiImage::calcVoxelNumber(image, 3)]));
-                }
-                text = stringFormat("%s]", text.c_str());
-                reg_print_msg_debug(text.c_str());
+                std::string text = "[" + std::to_string(x) + " - " + std::to_string(y) + " - " + std::to_string(z) + "] = [";
+                for (int tu = 0; tu < image->nt * image->nu; ++tu)
+                    text += std::to_string(static_cast<double>(data[voxelIndex + tu * nVoxelsPerVolume])) + " ";
+                if (text.back() == ' ')
+                    text.pop_back();
+                text += "]";
+                NR_DEBUG(text);
             }
         }
     }
@@ -219,9 +213,7 @@ void reg_io_displayImageData(nifti_image *image) {
         reg_io_displayImageData1<double>(image);
         break;
     default:
-        reg_print_fct_error("reg_io_displayImageData");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 /* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index 1c39bfdb..a012f6c0 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
 #include <string>
+#include "_reg_tools.h"
 
 #include "reg_png.h"
 #ifdef _USE_NRRD
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 4881bedf..48f8316d 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -1,5 +1,4 @@
 #include "_reg_ReadWriteMatrix.h"
-#include "_reg_maths.h"
 #include <string>
 #include <filesystem>
 
@@ -24,16 +23,11 @@ void reg_tool_ReadAffineFile(mat44 *mat,
             if (i > 3) break;
         }
     } else {
-        char text[255]; sprintf(text, "The affine file can not be read: %s", fileName);
-        reg_print_fct_error("reg_tool_ReadAffineFile");
-        reg_print_msg_error(text);
-        reg_exit();
+        NR_FATAL_ERROR("The affine file can not be read: "s + fileName);
     }
     affineFile.close();
 
-#ifndef NDEBUG
-    reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Read affine transformation");
-#endif
+    NR_MAT44(*mat, "Read affine transformation");
 
     if (flirtFile) {
         mat44 absoluteReference;
@@ -46,19 +40,15 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         //If the reference sform is defined, it is used; qform otherwise;
         mat44 *referenceMatrix;
         if (referenceImage->sform_code > 0) {
-            referenceMatrix = &(referenceImage->sto_xyz);
-#ifndef NDEBUG
-            reg_print_msg_debug("The reference sform matrix is defined and used");
-#endif
-        } else referenceMatrix = &(referenceImage->qto_xyz);
+            referenceMatrix = &referenceImage->sto_xyz;
+            NR_DEBUG("The reference sform matrix is defined and used");
+        } else referenceMatrix = &referenceImage->qto_xyz;
         //If the floating sform is defined, it is used; qform otherwise;
         mat44 *floatingMatrix;
         if (floatingImage->sform_code > 0) {
-#ifndef NDEBUG
-            reg_print_msg_debug(" The floating sform matrix is defined and used");
-#endif
-            floatingMatrix = &(floatingImage->sto_xyz);
-        } else floatingMatrix = &(floatingImage->qto_xyz);
+            NR_DEBUG("The floating sform matrix is defined and used");
+            floatingMatrix = &floatingImage->sto_xyz;
+        } else floatingMatrix = &floatingImage->qto_xyz;
 
         for (int i = 0; i < 3; i++) {
             absoluteReference.m[i][i] = sqrt(referenceMatrix->m[0][i] * referenceMatrix->m[0][i]
@@ -69,14 +59,13 @@ void reg_tool_ReadAffineFile(mat44 *mat,
                                             + floatingMatrix->m[2][i] * floatingMatrix->m[2][i]);
         }
         absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0;
-#ifndef NDEBUG
-        reg_print_msg_debug("An flirt affine file is assumed and is converted to a real word affine matrix");
-        reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Matrix read from the input file");
-        reg_mat44_disp(referenceMatrix, (char *)"[NiftyReg DEBUG] Reference Matrix");
-        reg_mat44_disp(floatingMatrix, (char *)"[NiftyReg DEBUG] Floating Matrix");
-        reg_mat44_disp(&(absoluteReference), (char *)"[NiftyReg DEBUG] Reference absolute Matrix");
-        reg_mat44_disp(&(absoluteFloating), (char *)"[NiftyReg DEBUG] Floating absolute Matrix");
-#endif
+
+        NR_DEBUG("An flirt affine file is assumed and is converted to a real word affine matrix");
+        NR_MAT44(*mat, "Matrix read from the input file");
+        NR_MAT44(*referenceMatrix, "Reference Matrix");
+        NR_MAT44(*floatingMatrix, "Floating Matrix");
+        NR_MAT44(absoluteReference, "Reference absolute Matrix");
+        NR_MAT44(absoluteFloating, "Floating absolute Matrix");
 
         absoluteFloating = nifti_mat44_inverse(absoluteFloating);
         *mat = nifti_mat44_inverse(*mat);
@@ -88,9 +77,7 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         *mat = reg_mat44_mul(mat, &tmp);
     }
 
-#ifndef NDEBUG
-    reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Affine matrix");
-#endif
+    NR_MAT44(*mat, "Affine matrix");
 }
 /* *************************************************************** */
 void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
@@ -99,18 +86,10 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
     if (affineFile.is_open()) {
         int i = 0;
         double value1, value2, value3, value4;
-#ifndef NDEBUG
-        char text_header[255];
-        sprintf(text_header, "Affine matrix values:");
-        reg_print_msg_debug(text_header);
-#endif
+        NR_DEBUG("Affine matrix values:");
         while (!affineFile.eof()) {
             affineFile >> value1 >> value2 >> value3 >> value4;
-#ifndef NDEBUG
-            char text[255];
-            sprintf(text, "%f - %f - %f - %f", value1, value2, value3, value4);
-            reg_print_msg_debug(text);
-#endif
+            NR_DEBUG(value1 << " - " << value2 << " - " << value3 << " - " << value4);
             mat->m[i][0] = (float)value1;
             mat->m[i][1] = (float)value2;
             mat->m[i][2] = (float)value3;
@@ -119,10 +98,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
             if (i > 3) break;
         }
     } else {
-        char text[255]; sprintf(text, "The affine file can not be read: %s", fileName);
-        reg_print_fct_error("reg_tool_ReadAffineFile");
-        reg_print_msg_error(text);
-        reg_exit();
+        NR_FATAL_ERROR("The affine file can not be read: "s + fileName);
     }
     affineFile.close();
 }
@@ -131,13 +107,9 @@ void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) {
     // Check if the specified directory exists
     std::filesystem::path p(fileName);
     p = p.parent_path();
-    if (!std::filesystem::exists(p) && p != std::filesystem::path()) {
-        std::cerr << "The specified folder to save the following file does not exist:" << std::endl;
-        std::cerr << fileName << std::endl;
-        reg_exit();
-    }
-    FILE *affineFile;
-    affineFile = fopen(fileName, "w");
+    if (!std::filesystem::exists(p) && p != std::filesystem::path())
+        NR_FATAL_ERROR("The specified folder to save the following file does not exist: "s + fileName);
+    FILE *affineFile = fopen(fileName, "w");
     for (int i = 0; i < 4; i++)
         fprintf(affineFile, "%.7g %.7g %.7g %.7g\n", mat->m[i][0], mat->m[i][1], mat->m[i][2], mat->m[i][3]);
     fclose(affineFile);
@@ -169,11 +141,7 @@ std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename) {
         //
         matrixFile.close();
     } else {
-        char text[255];
-        sprintf(text, "The file can not be read: %s", filename);
-        reg_print_fct_error("reg_tool_ReadMatrixFile");
-        reg_print_msg_error(text);
-        reg_exit();
+        NR_FATAL_ERROR("The file can not be read: "s + filename);
     }
     return { nbLine, nbColumn };
 }
@@ -225,11 +193,7 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) {
         }
         matrixFile.close();
     } else {
-        char text[255];
-        sprintf(text, "The matrix file can not be read: %s", filename);
-        reg_print_fct_error("reg_tool_ReadMatrixFile");
-        reg_print_msg_error(text);
-        reg_exit();
+        NR_FATAL_ERROR("The matrix file can not be read: "s + filename);
     }
 
     return mat;
@@ -255,16 +219,11 @@ mat44* reg_tool_ReadMat44File(char *fileName) {
             if (i > 3) break;
         }
     } else {
-        char text[255]; sprintf(text, "The mat44 file can not be read: %s", fileName);
-        reg_print_fct_error("reg_tool_ReadMat44File");
-        reg_print_msg_error(text);
-        reg_exit();
+        NR_FATAL_ERROR("The mat44 file can not be read: "s + fileName);
     }
     matrixFile.close();
 
-#ifndef NDEBUG
-    reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] mat44 matrix");
-#endif
+    NR_MAT44(*mat, "mat44 matrix");
 
     return mat;
 }
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index ce314ba5..01e6a5b2 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -14,10 +14,7 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
-//STD
-#include <fstream>
-#include <utility>
+#include "_reg_tools.h"
 
 /** @brief Read a text file that contains a affine transformation
  * and store it into a mat44 structure. This function can also read
diff --git a/reg-io/_reg_stringFormat.cpp b/reg-io/_reg_stringFormat.cpp
deleted file mode 100644
index ddf5e0c6..00000000
--- a/reg-io/_reg_stringFormat.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * @file _reg_stringFormat.h
- * @author Marc Modat
- * @date 13/03/2017
- * @brief Simple function for safer formatted string use..
- *
- *  Created by Ian Malone on 13/03/2017.
- *  Copyright (c) 2017-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-
-/**
- * http://stackoverflow.com/a/26221725
- * but re-written with variadic arguments from C (better supported prior to 
- * C++11 than the C++ form) and avoid unique_ptr use, at the cost of
- * copying the string a second time.
- */
-#include "_reg_stringFormat.h"
-
-#include <string>
-#include <cstdio>
-#include <stdarg.h>
-
-std::string stringFormat( const std::string format, ... )
-{
-  using namespace std;
-  va_list ap, ap2;
-  va_start(ap, format);
-  va_copy(ap2,ap);
-  size_t size = vsnprintf( (char*)0, 0, format.c_str(), ap ) + 1; // Extra space for '\0'
-  va_end(ap);
-  char *buffer = 0;
-  buffer = new char[size];
-  vsnprintf( buffer, size, format.c_str(), ap2 );
-  string result(buffer);
-  delete[] buffer;
-  va_end(ap2);
-  return result;
-}
diff --git a/reg-io/_reg_stringFormat.h b/reg-io/_reg_stringFormat.h
deleted file mode 100644
index 57b72c4f..00000000
--- a/reg-io/_reg_stringFormat.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// http://stackoverflow.com/a/26221725
-// but re-written with variadic arguments from C (better supported prior to 
-// C++11 than the C++ form) and avoid unique_ptr use.
-#include <string>
-#include <stdarg.h>
-
-/*
-template<typename ... Args>
-std::string stringFormat( const std::string& format, Args ... args )
-*/
-std::string stringFormat( const std::string format, ... );
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 7d57f16b..60b79416 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -83,11 +83,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
 {
    // Check if the file can be converted
    if(nrrdImage->dim>7)
-   {
-      reg_print_fct_error("reg_io_nrdd2nifti");
-      reg_print_msg_error("The Nifti format only support 7 dimensions");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("The Nifti format only support 7 dimensions");
 
    // Need first to extract the input image dimension
    int dim[8]= {1,1,1,1,1,1,1,1};
@@ -138,9 +134,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
       niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_FLOAT64,true);
       break;
    default:
-      reg_print_fct_error("reg_io_nrdd2nifti");
-      reg_print_msg_error("The data type is not supported");
-      reg_exit();
+      NR_FATAL_ERROR("The data type is not supported");
    }
 
    // The data are copied over from the nrrd to the nifti structure
@@ -198,8 +192,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
            nrrdImage->space!=nrrdSpaceScannerXYZTime )
    {
       niiImage->qform_code=0;
-      reg_print_fct_warn("reg_io_nrdd2nifti");
-      reg_print_msg_warn("nrrd space value unrecognised: the Nifti qform is set to identity");
+      NR_WARN_WFCT("nrrd space value unrecognised: the Nifti qform is set to identity");
    }
    if(niiImage->qform_code>0)
    {
@@ -312,9 +305,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
          reg_convertVectorField_nrrd_to_nifti<double>(nrrdImage,niiImage);
          break;
       default:
-         reg_print_fct_error("reg_convertVectorField_nrrd_to_nifti");
-         reg_print_msg_error("Unsupported datatype. Exit");
-         reg_exit();
+         NR_FATAL_ERROR("Unsupported datatype");
       }
       // The orientation flag are re-organised
       niiImage->ndim=5;
@@ -385,9 +376,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       nrrdAlloc_nva(nrrdImage,nrrdTypeDouble,niiImage->ndim,size);
       break;
    default:
-      reg_print_fct_error("reg_io_nifti2nrrd");
-      reg_print_msg_error("The data type is not supported. Exit");
-      reg_exit();
+      NR_FATAL_ERROR("The data type is not supported");
    }
 
    // Rescale the nii image intensity if required
@@ -437,8 +426,8 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
 //            }
 //            else{
 //                nrrdImage->space=nrrdSpaceUnknown;
-//                fprintf(stderr, "[NiftyReg WARNING] reg_io_nifti2nrrd - The nifti qform information can be stored in the space variable.\n");
-//                fprintf(stderr, "[NiftyReg WARNING] reg_io_nifti2nrrd - The space direction will be used.\n");
+//                NR_WARN_WFCT("The nifti qform information can be stored in the space variable\n"
+//                             "The space direction will be used");
 //            }
          nrrdImage->space=nrrdSpaceUnknown;
       }
@@ -491,7 +480,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i)
       {
          nrrdImage->spaceUnits[i]=(char *)malloc(200);
-         sprintf(nrrdImage->spaceUnits[i],"m");
+         strcpy(nrrdImage->spaceUnits[i], "m");
          nrrdImage->axis[i].kind=nrrdKindDomain;
       }
       break;
@@ -499,7 +488,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i)
       {
          nrrdImage->spaceUnits[i]=(char *)malloc(200);
-         sprintf(nrrdImage->spaceUnits[i],"mm");
+         strcpy(nrrdImage->spaceUnits[i],"mm");
          nrrdImage->axis[i].kind=nrrdKindDomain;
       }
       break;
@@ -507,7 +496,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i)
       {
          nrrdImage->spaceUnits[i]=(char *)malloc(200);
-         sprintf(nrrdImage->spaceUnits[i],"um");
+         strcpy(nrrdImage->spaceUnits[i], "um");
          nrrdImage->axis[i].kind=nrrdKindDomain;
       }
       break;
@@ -542,9 +531,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
          reg_convertVectorField_nifti_to_nrrd<double>(niiImage,nrrdImage);
          break;
       default:
-         reg_print_fct_error("reg_convertVectorField_nifti_to_nrrd");
-         reg_print_msg_error("The data type is not supported. Exit");
-         reg_exit();
+         NR_FATAL_ERROR("The data type is not supported");
       }
 
       // The orientation flag are re-organised
@@ -572,9 +559,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage)
       if(strcmp(niiImage->intent_name,"NREG_VEL_STEP")==0)
       {
          // The number of step is store in the nrrdImage->axis[0].label pointer
-         char temp[64];
-         sprintf(temp,"NREG_VEL_STEP %f",niiImage->intent_p1);
-         std::string str=temp;
+         const std::string str="NREG_VEL_STEP " + std::to_string(niiImage->intent_p1);
          if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label);
          nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char));
          strcpy(nrrdImage->axis[0].label,str.c_str());
@@ -601,19 +586,11 @@ Nrrd *reg_io_readNRRDfile(const char *filename)
 {
    /* create a nrrd; at this point this is just an empty container */
    Nrrd *nrrdImage = nrrdNew();
-   char *err;
 
    /* read in the nrrd from file */
    if (nrrdLoad(nrrdImage, filename, nullptr))
-   {
-      err = biffGetDone(NRRD);
-      char text[255];
-      sprintf(text, "Can not read the file \"%s\":%s\n", filename, err);
-      reg_print_fct_error("reg_io_readNRRDfile");
-      reg_print_msg_error(text);
-      free(err);
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Can not read the file \""s + filename + "\": "s + biffGetDone(NRRD));
+
    return nrrdImage;
 }
 /* *************************************************************** */
@@ -628,21 +605,10 @@ void reg_io_writeNRRDfile(Nrrd *image, const char *filename)
    }
    else
    {
-      char text[255];
-      sprintf(text, "Can not compress the file: \"%s\"", filename);
-      reg_print_fct_error("reg_io_writeNRRDfile");
-      reg_print_msg_error(text);
-      reg_exit();
+      NR_FATAL_ERROR("Can not compress the file: "s + filename);
    }
 
    if (nrrdSave(filename, image, nio))
-   {
-      char text[255];
-      sprintf(text, "Can not write the file \"%s\"", filename);
-      reg_print_fct_error("reg_io_readNRRDfile");
-      reg_print_msg_error(text);
-      reg_exit();
-   }
-   return;
+      NR_FATAL_ERROR("Can not write the file: "s + filename);
 }
 /* *************************************************************** */
diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h
index 5caa648b..3aac5f6c 100644
--- a/reg-io/nrrd/reg_nrrd.h
+++ b/reg-io/nrrd/reg_nrrd.h
@@ -14,11 +14,8 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
 #include "NrrdIO.h"
 #include "_reg_tools.h"
-#include "_reg_maths.h"
-#include <limits>
 
 /* *************************************************************** */
 /** @brief Convert a NRRD image into a nifti image
diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp
index 8c266d03..53c28b1b 100644
--- a/reg-io/png/reg_png.cpp
+++ b/reg-io/png/reg_png.cpp
@@ -20,36 +20,24 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    FILE *pngFile=nullptr;
    pngFile = fopen(pngFileName, "rb");
    if(pngFile==nullptr)
-   {
-      char text[255];
-      sprintf(text, "Can not open the png file %s", pngFileName);
-      reg_print_fct_error("reg_io_readPNGfile");
-      reg_print_msg_error(text);
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Can not open the png file: "s + pngFileName);
 
    uch sig[8];
    if (!fread(sig, 1, 8, pngFile))
-      reg_exit();
+      NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName);
    if (!png_check_sig(sig, 8))
-      reg_exit();
+      NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName);
    rewind(pngFile);
 
    png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (!png_ptr)
-   {
-      reg_print_fct_error("reg_io_readPNGfile");
-      reg_print_msg_error("Error when reading the png file - out of memory");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Error when reading the png file - out of memory");
 
    png_infop info_ptr = png_create_info_struct(png_ptr);
    if (!info_ptr)
    {
       png_destroy_read_struct(&png_ptr, nullptr, nullptr);
-      reg_print_fct_error("reg_io_readPNGfile");
-      reg_print_msg_error("Error when reading the png file - out of memory");
-      reg_exit();
+      NR_FATAL_ERROR("Error when reading the png file - out of memory");
    }
 
    png_init_io(png_ptr, pngFile);
@@ -84,17 +72,9 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
    Channels = (int)png_get_channels(png_ptr, info_ptr);
 
    if(Channels > 3)
-   {
-      char text[255];
-      sprintf(text, "The PNG file has %i channels. Only the first three are considered for RGB to gray conversion.", Channels);
-      reg_print_fct_warn("reg_io_readPNGfile");
-      reg_print_msg_warn(text);
-   }
-   if(Channels == 2)
-   {
-      reg_print_fct_warn("reg_io_readPNGfile");
-      reg_print_msg_warn("The PNG file has 2 channels. They will be average into one single channel");
-   }
+      NR_WARN_WFCT("The PNG file has " << Channels << " channels. Only the first three are considered for RGB to gray conversion.");
+   else if(Channels == 2)
+      NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel");
 
    int dim[8]= {2,static_cast<int>(Width),static_cast<int>(Height),1,1,1,1,1};
    nifti_image *niiImage=nullptr;
@@ -103,7 +83,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
 
       uch *image_data;
       if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr)
-         reg_exit();
+         NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName);
 
       for (png_uint_32 i=0; i<Height; ++i)
       {
@@ -161,30 +141,17 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
 {
    // We first check the nifti image dimension
    if(image->nz>1 || image->nt>1 || image->nu>1 || image->nv>1 || image->nw>1)
-   {
-      reg_print_fct_error("reg_io_writePNGfile");
-      reg_print_msg_error("Image with dimension larger than 2 can be saved as png");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png");
 
    // Check the min and max values of the nifti image
    float minValue = reg_tools_getMinValue(image, -1);
    float maxValue = reg_tools_getMaxValue(image, -1);
 
-   // Rescale the image intensites if  they are outside of the range
+   // Rescale the image intensities if they are outside of the range
    if(minValue<0 || maxValue>255)
    {
-      float newMinValue=0;
-      float newMaxValue=255;
-      reg_intensityRescale(image,
-                           0,
-                           newMinValue,
-                           newMaxValue);
-      char text[255];
-      sprintf(text, "The image intensities have been rescaled from [%g %g] to [0 255].",
-             minValue, maxValue);
-      reg_print_fct_warn("reg_io_writePNGfile");
-      reg_print_msg_warn(text);
+      reg_intensityRescale(image, 0, 0, 255);
+      NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255].");
    }
 
    // The nifti image is converted as unsigned char if required
@@ -197,28 +164,17 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
    // Check first if the png file can be writen
    FILE *fp=fopen(filename, "wb");
    if(!fp)
-   {
-      char text[255];
-      sprintf(text,"The png file can not be written: %s", filename);
-      reg_print_fct_error("reg_io_writePNGfile");
-      reg_print_msg_error(text);
-      reg_exit();
-   }
+      NR_FATAL_ERROR("The png file can not be written: "s + filename);
+
    // The png file structures are created
    png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
    if (png_ptr==nullptr)
-   {
-      reg_print_fct_error("reg_io_writePNGfile");
-      reg_print_msg_error("The png pointer could not be created");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("The png pointer could not be created");
+
    png_infop info_ptr = png_create_info_struct (png_ptr);
    if(info_ptr==nullptr)
-   {
-      reg_print_fct_error("reg_io_writePNGfile");
-      reg_print_msg_error("The png structure could not be created");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("The png structure could not be created");
+
    // Set the png header information
    png_set_IHDR (png_ptr,
                  info_ptr,
diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h
index d6d2a543..ad94cc21 100644
--- a/reg-io/png/reg_png.h
+++ b/reg-io/png/reg_png.h
@@ -14,7 +14,6 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
 #include "_reg_tools.h"
 
 /* *************************************************************** */
diff --git a/reg-io/zlib/zutil.c b/reg-io/zlib/zutil.c
index b1c9a2e3..d55f5948 100644
--- a/reg-io/zlib/zutil.c
+++ b/reg-io/zlib/zutil.c
@@ -123,7 +123,7 @@ void z_error (m)
     char *m;
 {
     fprintf(stderr, "%s\n", m);
-    reg_exit();
+    exit(1);
 }
 #endif
 
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index afd8b4ed..265f329a 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -11,11 +11,8 @@ Content::Content(nifti_image *referenceIn,
     floating(floatingIn),
     referenceMask(referenceMaskIn),
     transformationMatrix(transformationMatrixIn) {
-    if (!referenceIn || !floatingIn) {
-        reg_print_fct_error("Content::Content()");
-        reg_print_msg_error("referenceIn or floatingIn can't be nullptr");
-        reg_exit();
-    }
+    if (!referenceIn || !floatingIn)
+        NR_FATAL_ERROR("referenceIn or floatingIn can't be nullptr");
     AllocateWarped();
     AllocateDeformationField(bytesIn);
     activeVoxelNumber = reference->nvox;
@@ -67,11 +64,8 @@ void Content::AllocateDeformationField(size_t bytes) {
         deformationField->datatype = NIFTI_TYPE_FLOAT32;
     else if (bytes == 8)
         deformationField->datatype = NIFTI_TYPE_FLOAT64;
-    else {
-        reg_print_fct_error("Content::AllocateDeformationField()");
-        reg_print_msg_error("Only float or double are expected for the deformation field");
-        reg_exit();
-    }
+    else
+        NR_FATAL_ERROR("Only float or double are expected for the deformation field");
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
     memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
     strcpy(deformationField->intent_name, "NREG_TRANS");
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index 995f1b2d..9acc6446 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Kernel.h"
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 class ConvolutionKernel: public Kernel {
 public:
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
new file mode 100644
index 00000000..c58bd383
--- /dev/null
+++ b/reg-lib/Debug.hpp
@@ -0,0 +1,81 @@
+#pragma once
+
+#include <stdexcept>
+#include <iostream>
+#include "RNifti.h"
+
+/* *************************************************************** */
+#ifdef RNIFTYREG
+#include <R.h>  // This may have to be changed to Rcpp.h or RcppEigen.h later
+#define NR_COUT     Rcout
+#define NR_CERR     Rcerr
+#else
+#define NR_COUT     std::cout
+#define NR_CERR     std::cerr
+#endif
+/* *************************************************************** */
+namespace NiftyReg::Internal {
+/* *************************************************************** */
+inline void FatalError(const std::string& fileName, const int& line, const std::string& funcName, const std::string& msg) {
+    const std::string errMsg = "[NiftyReg ERROR] File: " + fileName + ":" + std::to_string(line) + "\n" +
+                               "[NiftyReg ERROR] Function: " + funcName + "\n" +
+                               "[NiftyReg ERROR] " + msg + "\n";
+#ifdef RNIFTYREG
+    error(errMsg.c_str());
+#else
+#ifndef __linux__
+    NR_CERR << errMsg << std::endl;
+#endif
+    throw std::runtime_error(errMsg);
+#endif
+}
+/* *************************************************************** */
+inline std::string StripFunctionName(const std::string& funcName) {
+    const size_t end = funcName.find("(");
+    if (end == std::string::npos)
+        return funcName;
+    const size_t start = funcName.rfind(" ", end);
+    if (start == std::string::npos)
+        return funcName.substr(0, end);
+    return funcName.substr(start + 1, end - start - 1);
+}
+/* *************************************************************** */
+} // namespace NiftyReg::Internal
+/* *************************************************************** */
+#ifdef _WIN32
+#define NR_FUNCTION         NiftyReg::Internal::StripFunctionName(__FUNCSIG__)
+#else
+#define NR_FUNCTION         NiftyReg::Internal::StripFunctionName(__PRETTY_FUNCTION__)
+#endif
+#define NR_ERROR(msg)       NR_CERR << "[NiftyReg ERROR] " << msg << std::endl
+#define NR_FATAL_ERROR(msg) NiftyReg::Internal::FatalError(__FILE__, __LINE__, NR_FUNCTION, msg)
+/* *************************************************************** */
+#ifndef NDEBUG
+#define NR_FUNC_CALLED()    NR_COUT << "[NiftyReg DEBUG] Function " << NR_FUNCTION << " called" << std::endl
+#define NR_DEBUG(msg)       NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl
+#define NR_VERBOSE(msg)     NR_DEBUG(msg)
+#define NR_VERBOSE_APP(msg) NR_DEBUG(msg)
+#else
+#define NR_FUNC_CALLED()
+#define NR_DEBUG(msg)
+#define NR_VERBOSE(msg)     if (this->verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl
+#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl
+#endif
+/* *************************************************************** */
+#define NR_WARN(msg)        NR_COUT << "[NiftyReg WARNING] " << msg << std::endl
+#define NR_WARN_WFCT(msg)   NR_COUT << "[NiftyReg WARNING] Function: " << NR_FUNCTION << "\n[NiftyReg WARNING] " << msg << std::endl
+/* *************************************************************** */
+#define NR_INFO(msg)        NR_COUT << "[NiftyReg INFO] " << msg << std::endl
+/* *************************************************************** */
+#ifndef NDEBUG
+#define NR_MAT33(mat, title)            reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT33_VERBOSE(mat, title)    NR_MAT33(mat, title)
+#define NR_MAT44(mat, title)            reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT44_VERBOSE(mat, title)    NR_MAT44(mat, title)
+#else
+#define NR_MAT33(mat, title)
+#define NR_MAT33_VERBOSE(mat, title)    if (this->verbose) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT44(mat, title)
+#define NR_MAT44_VERBOSE(mat, title)    if (this->verbose) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#endif
+/* *************************************************************** */
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 035da723..6dee6030 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -11,11 +11,8 @@ F3dContent::F3dContent(nifti_image *referenceIn,
     DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     controlPointGrid(controlPointGridIn) {
-    if (!controlPointGridIn) {
-        reg_print_fct_error("F3dContent::F3dContent()");
-        reg_print_msg_error("controlPointGridIn can't be nullptr");
-        reg_exit();
-    }
+    if (!controlPointGridIn)
+        NR_FATAL_ERROR("controlPointGridIn can't be nullptr");
     AllocateTransformationGradient();
 }
 /* *************************************************************** */
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index f7e077db..e61a7ce1 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -23,11 +23,10 @@ reg_measure* Measure::Create(const MeasureType& measureType) {
         return new reg_mind();
     case MeasureType::MindSsc:
         return new reg_mindssc();
+    default:
+        NR_FATAL_ERROR("Unsupported measure type");
+        return nullptr;
     }
-    reg_print_fct_error("Measure::Create");
-    reg_print_msg_error("Unsupported measure type");
-    reg_exit();
-    return nullptr;
 }
 /* *************************************************************** */
 void Measure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 86fc226f..23c3a081 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -45,11 +45,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         kernelFactory = new ClKernelFactory();
     }
 #endif
-    else {
-        reg_print_fct_error("Platform::Platform");
-        reg_print_msg_error("Unsupported platform type");
-        reg_exit();
-    }
+    else NR_FATAL_ERROR("Unsupported platform type");
 }
 /* *************************************************************** */
 Platform::~Platform() {
@@ -96,11 +92,8 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
         clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
         cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
         clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
-        if (CL_DEVICE_TYPE_CPU == *field) {
-            reg_print_fct_error("Platform::SetGpuIdx");
-            reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit");
-            reg_exit();
-        }
+        if (CL_DEVICE_TYPE_CPU == *field)
+            NR_FATAL_ERROR("The OpenCL kernels only support GPU devices for now");
     }
 #endif
 }
diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h
index 83853cfc..d4c32991 100644
--- a/reg-lib/ResampleImageKernel.h
+++ b/reg-lib/ResampleImageKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Kernel.h"
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 class ResampleImageKernel: public Kernel {
 public:
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 620ae212..60543ebe 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -47,9 +47,7 @@ reg_aladin<T>::reg_aladin() {
     this->currentLevel = 0;
     this->gpuIdx = 999;
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -94,67 +92,37 @@ void reg_aladin<T>::SetVerbose(bool _verbose) {
 template<class T>
 int reg_aladin<T>::Check() {
     //This does all the initial checking
-    if (!this->inputReference) {
-        reg_print_fct_error("reg_aladin<T>::Check()");
-        reg_print_msg_error("No reference image has been specified or it can not be read");
-        return EXIT_FAILURE;
-    }
+    if (!this->inputReference)
+        NR_FATAL_ERROR("No reference image has been specified or it can not be read");
 
-    if (!this->inputFloating) {
-        reg_print_fct_error("reg_aladin<T>::Check()");
-        reg_print_msg_error("No floating image has been specified or it can not be read");
-        return EXIT_FAILURE;
-    }
+    if (!this->inputFloating)
+        NR_FATAL_ERROR("No floating image has been specified or it can not be read");
 
     return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template<class T>
-int reg_aladin<T>::Print() {
-    if (!this->inputReference) {
-        reg_print_fct_error("reg_aladin<T>::Print()");
-        reg_print_msg_error("No reference image has been specified");
-        return EXIT_FAILURE;
-    }
-    if (!this->inputFloating) {
-        reg_print_fct_error("reg_aladin<T>::Print()");
-        reg_print_msg_error("No floating image has been specified");
-        return EXIT_FAILURE;
-    }
+void reg_aladin<T>::Print() {
+    if (!this->inputReference)
+        NR_FATAL_ERROR("No reference image has been specified");
+    if (!this->inputFloating)
+        NR_FATAL_ERROR("No floating image has been specified");
 
     /* *********************************** */
     /* DISPLAY THE REGISTRATION PARAMETERS */
     /* *********************************** */
-#ifdef NDEBUG
-    if (this->verbose) {
-#endif
-        std::string text;
-        reg_print_info(this->executableName, "Parameters");
-        text = stringFormat("Platform: %s", this->platform->GetName().c_str());
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("Reference image name: %s", this->inputReference->fname);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t%ix%ix%i voxels", this->inputReference->nx, this->inputReference->ny, this->inputReference->nz);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t%gx%gx%g mm", this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("Floating image name: %s", this->inputFloating->fname);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t%ix%ix%i voxels", this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t%gx%gx%g mm", this->inputFloating->dx, this->inputFloating->dy, this->inputFloating->dz);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("Maximum iteration number: %i", this->maxIterations);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t(%i during the first level)", 2 * this->maxIterations);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("Percentage of blocks: %i %%", this->blockPercentage);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-#ifdef NDEBUG
-    }
-#endif
-    return EXIT_SUCCESS;
+    NR_VERBOSE("Parameters");
+    NR_VERBOSE("Platform: " << this->platform->GetName());
+    NR_VERBOSE("Reference image name: " << this->inputReference->fname);
+    NR_VERBOSE("\t" << this->inputReference->nx << "x" << this->inputReference->ny << "x" << this->inputReference->nz << " voxels");
+    NR_VERBOSE("\t" << this->inputReference->dx << "x" << this->inputReference->dy << "x" << this->inputReference->dz << " mm");
+    NR_VERBOSE("Floating image name: " << this->inputFloating->fname);
+    NR_VERBOSE("\t" << this->inputFloating->nx << "x" << this->inputFloating->ny << "x" << this->inputFloating->nz << " voxels");
+    NR_VERBOSE("\t" << this->inputFloating->dx << "x" << this->inputFloating->dy << "x" << this->inputFloating->dz << " mm");
+    NR_VERBOSE("Maximum iteration number: " << this->maxIterations);
+    NR_VERBOSE("\t(" << this->maxIterations * 2 << " during the first level)");
+    NR_VERBOSE("Percentage of blocks: " << this->blockPercentage << "%");
+    NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 /* *************************************************************** */
 template<class T>
@@ -164,9 +132,7 @@ void reg_aladin<T>::SetInputTransform(const char *filename) {
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::InitialiseRegistration() {
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_aladin::InitialiseRegistration()");
-#endif
+    NR_FUNC_CALLED();
 
     this->platform.reset(new Platform(this->platformType));
     this->platform->SetGpuIdx(this->gpuIdx);
@@ -233,10 +199,7 @@ void reg_aladin<T>::InitialiseRegistration() {
         if (FILE *aff = fopen(this->inputTransformName, "r")) {
             fclose(aff);
         } else {
-            std::string text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName);
-            reg_print_fct_error("reg_aladin<T>::InitialiseRegistration()");
-            reg_print_msg_error(text.c_str());
-            reg_exit();
+            NR_FATAL_ERROR("The specified input affine file ("s + this->inputTransformName + ") can not be read");
         }
         reg_tool_ReadAffineFile(this->affineTransformation.get(), this->inputTransformName);
     } else { // No input affine transformation
@@ -366,10 +329,7 @@ template<class T>
 void reg_aladin<T>::UpdateTransformationMatrix(int type) {
     this->blockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
     this->ltsKernel->template castTo<LtsKernel>()->Calculate(type);
-
-#ifndef NDEBUG
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix");
-#endif
+    NR_MAT44(*this->affineTransformation, "The updated forward matrix");
 }
 /* *************************************************************** */
 template<class T>
@@ -395,16 +355,10 @@ template<class T>
 void reg_aladin<T>::ResolveMatrix(unsigned iterations, const unsigned optimizationFlag) {
     unsigned iteration = 0;
     while (iteration < iterations) {
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "%s - level: %i/%i - iteration %i/%i",
-                optimizationFlag ? (char*)"Affine" : (char*)"Rigid",
-                this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG((optimizationFlag ? "Affine" : "Rigid") << " - level: " << this->currentLevel + 1 << "/" << this->numberOfLevels
+                 << " - iteration " << iteration + 1 << "/" << iterations);
         this->GetWarpedImage(this->interpolation, this->warpedPaddingValue);
         this->UpdateTransformationMatrix(optimizationFlag);
-
         iteration++;
     }
 }
@@ -424,24 +378,14 @@ void reg_aladin<T>::Run() {
         // All the blocks are used during the first level
         const unsigned maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations;
 
-#ifdef NDEBUG
-        if (this->verbose) {
-#endif
-            this->DebugPrintLevelInfoStart();
-#ifdef NDEBUG
-        }
-#endif
-
-#ifndef NDEBUG
-        if (this->con->GetReference()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)");
-        else
-            reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)");
-        if (this->con->GetFloating()->sform_code > 0)
-            reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)");
-        else
-            reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)");
-#endif
+        this->DebugPrintLevelInfoStart();
+
+        if (this->con->Content::GetReference()->sform_code > 0)
+            NR_MAT44(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)");
+        else NR_MAT44(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)");
+        if (this->con->Content::GetFloating()->sform_code > 0)
+            NR_MAT44(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)");
+        else NR_MAT44(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)");
 
         /* ****************** */
         /* Rigid registration */
@@ -462,30 +406,18 @@ void reg_aladin<T>::Run() {
         this->DeinitAladinContent();
         this->DeallocateCurrentInputImage();
 
-#ifdef NDEBUG
-        if (this->verbose) {
-#endif
-            this->DebugPrintLevelInfoEnd();
-            reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -");
-#ifdef NDEBUG
-        }
-#endif
-
+        this->DebugPrintLevelInfoEnd();
+        NR_VERBOSE("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -");
     }
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin::Run() done");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
     // The initial images are used
-    if (!this->inputReference || !this->inputFloating || !this->affineTransformation) {
-        reg_print_fct_error("reg_aladin::GetFinalWarpedImage()");
-        reg_print_msg_error("The reference, floating images and the transformation have to be defined");
-        reg_exit();
-    }
+    if (!this->inputReference || !this->inputFloating || !this->affineTransformation)
+        NR_FATAL_ERROR("The reference, floating images and the transformation have to be defined");
 
     unique_ptr<int[]> mask(new int[this->inputReference.nVoxelsPerVolume()]());
 
@@ -511,39 +443,24 @@ NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DebugPrintLevelInfoStart() {
-    /* Display some parameters specific to the current level */
-    char text[255];
-    sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels);
-    reg_print_info(this->executableName, text);
-    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetReference()->nx,
-            this->con->GetReference()->ny,
-            this->con->GetReference()->nz,
-            this->con->GetReference()->dx,
-            this->con->GetReference()->dy,
-            this->con->GetReference()->dz);
-    reg_print_info(this->executableName, text);
-    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetFloating()->nx,
-            this->con->GetFloating()->ny,
-            this->con->GetFloating()->nz,
-            this->con->GetFloating()->dx,
-            this->con->GetFloating()->dy,
-            this->con->GetFloating()->dz);
-    reg_print_info(this->executableName, text);
-    if (this->con->GetReference()->nz == 1) {
-        reg_print_info(this->executableName, "Block size = [4 4 1]");
-    } else reg_print_info(this->executableName, "Block size = [4 4 4]");
-    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
-            this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
-    reg_print_info(this->executableName, text);
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Initial transformation matrix:");
+    const nifti_image *ref = this->con->Content::GetReference();
+    const nifti_image *flo = this->con->Content::GetFloating();
+    NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels);
+    NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" <<
+               ref->dx << "x" << ref->dy << "x" << ref->dz << " mm");
+    NR_VERBOSE("Floating image size:\t" << flo->nx << "x" << flo->ny << "x" << flo->nz << " voxels\t" <<
+               flo->dx << "x" << flo->dy << "x" << flo->dz << " mm");
+    NR_VERBOSE("Block size = [4 4 " << (ref->nz == 1 ? 1 : 4) << "]");
+    NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_VERBOSE("Block number = [" << this->blockMatchingParams->blockNumber[0] << " " <<
+               this->blockMatchingParams->blockNumber[1] << " " << this->blockMatchingParams->blockNumber[2] << "]");
+    NR_MAT44_VERBOSE(*this->affineTransformation, "Initial transformation matrix:");
+    NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DebugPrintLevelInfoEnd() {
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Final transformation matrix:");
+    NR_MAT44_VERBOSE(*this->affineTransformation, "Final transformation matrix:");
 }
 /* *************************************************************** */
 template class reg_aladin<float>;
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index f34f91f9..8f47979b 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -24,7 +24,6 @@
 #include "_reg_ssd.h"
 #include "_reg_tools.h"
 #include "_reg_ReadWriteMatrix.h"
-#include "_reg_stringFormat.h"
 #include "Platform.h"
 #include "AffineDeformationFieldKernel.h"
 #include "ResampleImageKernel.h"
@@ -232,8 +231,7 @@ class reg_aladin {
     GetMacro(Interpolation, interpolation, int);
 
     virtual void SetInputFloatingMask(nifti_image*) {
-        reg_print_fct_warn("reg_aladin::SetInputFloatingMask()");
-        reg_print_msg_warn("Floating mask not used in the asymmetric global registration");
+        NR_WARN_WFCT("Floating mask not used in the asymmetric global registration");
     }
     void SetInterpolationToNearestNeighbor() {
         this->SetInterpolation(0);
@@ -249,7 +247,7 @@ class reg_aladin {
     }
 
     virtual int Check();
-    virtual int Print();
+    virtual void Print();
     virtual void Run();
 
     virtual void DebugPrintLevelInfoStart();
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index a29a772e..1d4bfbd4 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -6,14 +6,9 @@ template <class T>
 reg_aladin_sym<T>::reg_aladin_sym()
     :reg_aladin<T>::reg_aladin() {
     this->executableName = (char*)"reg_aladin_sym";
-
     this->affineTransformationBw.reset(new mat44);
-
     this->backwardBlockMatchingParams = nullptr;
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin_sym constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -23,9 +18,7 @@ void reg_aladin_sym<T>::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) {
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::InitialiseRegistration() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called");
-#endif
+    NR_FUNC_CALLED();
 
     reg_aladin<T>::InitialiseRegistration();
 
@@ -62,10 +55,9 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
     }
 
     if (this->alignCentreMass == 1 && this->inputTransformName == nullptr) {
-        if (!this->inputReferenceMask && !this->inputFloatingMask) {
-            reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified");
-            reg_exit();
-        }
+        if (!this->inputReferenceMask && !this->inputFloatingMask)
+            NR_FATAL_ERROR("The masks' centre of mass can only be used when two masks are specified");
+
         float referenceCentre[3] = { 0, 0, 0 };
         float referenceCount = 0;
         reg_tools_changeDatatype<float>(this->inputReferenceMask);
@@ -143,10 +135,9 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
     this->bLtsKernel->template castTo<LtsKernel>()->Calculate(type);
 
-#ifndef NDEBUG
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix");
-    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] pre-updated backward transformation matrix");
-#endif
+    NR_MAT44_VERBOSE(*this->affineTransformation, "The pre-updated forward transformation matrix");
+    NR_MAT44_VERBOSE(*this->affineTransformationBw, "The pre-updated backward transformation matrix");
+
     // Forward and backward matrix are inverted
     mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation);
     mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw);
@@ -161,10 +152,9 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     }
     this->affineTransformation->m[3][3] = 1.f;
     this->affineTransformationBw->m[3][3] = 1.f;
-#ifndef NDEBUG
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward transformation matrix");
-    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] updated backward transformation matrix");
-#endif
+
+    NR_MAT44_VERBOSE(*this->affineTransformation, "The updated forward transformation matrix");
+    NR_MAT44_VERBOSE(*this->affineTransformationBw, "The updated backward transformation matrix");
 }
 /* *************************************************************** */
 template <class T>
@@ -214,46 +204,28 @@ void reg_aladin_sym<T>::DeallocateKernels() {
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoStart() {
-    char text[255];
-    sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels);
-    reg_print_info(this->executableName, text);
-    sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetReference()->nx,
-            this->con->GetReference()->ny,
-            this->con->GetReference()->nz,
-            this->con->GetReference()->dx,
-            this->con->GetReference()->dy,
-            this->con->GetReference()->dz);
-    reg_print_info(this->executableName, text);
-    sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm",
-            this->con->GetFloating()->nx,
-            this->con->GetFloating()->ny,
-            this->con->GetFloating()->nz,
-            this->con->GetFloating()->dx,
-            this->con->GetFloating()->dy,
-            this->con->GetFloating()->dz);
-    reg_print_info(this->executableName, text);
-    if (this->con->GetReference()->nz == 1) {
-        reg_print_info(this->executableName, "Block size = [4 4 1]");
-    } else reg_print_info(this->executableName, "Block size = [4 4 4]");
-    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-    sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0],
-            this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]);
-    reg_print_info(this->executableName, text);
-    sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0],
-            this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]);
-    reg_print_info(this->executableName, text);
-    reg_mat44_disp(this->affineTransformation.get(),
-                   (char*)"[reg_aladin_sym] Initial forward transformation matrix:");
-    reg_mat44_disp(this->affineTransformationBw.get(),
-                   (char*)"[reg_aladin_sym] Initial backward transformation matrix:");
-    reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    const nifti_image *ref = this->con->Content::GetReference();
+    const nifti_image *flo = this->con->Content::GetFloating();
+    NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels);
+    NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" <<
+               ref->dx << "x" << ref->dy << "x" << ref->dz << " mm");
+    NR_VERBOSE("Floating image size:\t" << flo->nx << "x" << flo->ny << "x" << flo->nz << " voxels\t" <<
+               flo->dx << "x" << flo->dy << "x" << flo->dz << " mm");
+    NR_VERBOSE("Block size = [4 4 " << (ref->nz == 1 ? 1 : 4) << "]");
+    NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_VERBOSE("Forward Block number = [" << this->blockMatchingParams->blockNumber[0] << " " <<
+               this->blockMatchingParams->blockNumber[1] << " " << this->blockMatchingParams->blockNumber[2] << "]");
+    NR_VERBOSE("Backward Block number = [" << this->backwardBlockMatchingParams->blockNumber[0] << " " <<
+               this->backwardBlockMatchingParams->blockNumber[1] << " " << this->backwardBlockMatchingParams->blockNumber[2] << "]");
+    NR_MAT44_VERBOSE(*this->affineTransformation, "Initial forward transformation matrix:");
+    NR_MAT44_VERBOSE(*this->affineTransformationBw, "Initial backward transformation matrix:");
+    NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoEnd() {
-    reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin_sym] Final forward transformation matrix:");
-    reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[reg_aladin_sym] Final backward transformation matrix:");
+    NR_MAT44_VERBOSE(*this->affineTransformation, "Final forward transformation matrix:");
+    NR_MAT44_VERBOSE(*this->affineTransformationBw, "Final backward transformation matrix:");
 }
 /* *************************************************************** */
 template class reg_aladin_sym<float>;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 9e0988d5..9293ecee 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -59,230 +59,172 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     landmarkReference = nullptr;
     landmarkFloating = nullptr;
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::reg_base");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceImage(NiftiImage inputReferenceIn) {
     inputReference = inputReferenceIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetReferenceImage");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingImage(NiftiImage inputFloatingIn) {
     inputFloating = inputFloatingIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetFloatingImage");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetMaximalIterationNumber(unsigned iter) {
     maxIterationNumber = iter;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetMaximalIterationNumber");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceMask(NiftiImage maskImageIn) {
     maskImage = maskImageIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetReferenceMask");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetAffineTransformation(const mat44& affineTransformationIn) {
     affineTransformation.reset(new mat44(affineTransformationIn));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetAffineTransformation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceSmoothingSigma(T referenceSmoothingSigmaIn) {
     referenceSmoothingSigma = referenceSmoothingSigmaIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetReferenceSmoothingSigma");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) {
     floatingSmoothingSigma = floatingSmoothingSigmaIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetFloatingSmoothingSigma");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceThresholdUp(unsigned i, T t) {
     referenceThresholdUp[i] = t;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetReferenceThresholdUp");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetReferenceThresholdLow(unsigned i, T t) {
     referenceThresholdLow[i] = t;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetReferenceThresholdLow");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingThresholdUp(unsigned i, T t) {
     floatingThresholdUp[i] = t;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetFloatingThresholdUp");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetFloatingThresholdLow(unsigned i, T t) {
     floatingThresholdLow[i] = t;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetFloatingThresholdLow");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseRobustRange() {
     robustRange = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseRobustRange");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUseRobustRange() {
     robustRange = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseRobustRange");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetWarpedPaddingValue(float warpedPaddingValueIn) {
     warpedPaddingValue = warpedPaddingValueIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetWarpedPaddingValue");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLevelNumber(unsigned levelNumberIn) {
-    if(levelNumberIn>0)
-    levelNumber = levelNumberIn;
-    else{
-        reg_print_msg_error("The number of level is expected to be strictly positive. Exit");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetLevelNumber");
-#endif
+    if (levelNumberIn > 0)
+        levelNumber = levelNumberIn;
+    else
+        NR_FATAL_ERROR("The number of level is expected to be strictly positive!");
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLevelToPerform(unsigned levelToPerformIn) {
     levelToPerform = levelToPerformIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetLevelToPerform");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetGradientSmoothingSigma(T gradientSmoothingSigmaIn) {
     gradientSmoothingSigma = gradientSmoothingSigmaIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetGradientSmoothingSigma");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseConjugateGradient() {
     useConjGradient = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseConjugateGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUseConjugateGradient() {
     useConjGradient = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::DoNotUseConjugateGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseApproximatedGradient() {
     useApproxGradient = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseApproximatedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUseApproximatedGradient() {
     useApproxGradient = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::DoNotUseApproximatedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::PrintOutInformation() {
     verbose = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::PrintOutInformation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotPrintOutInformation() {
     verbose = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::DoNotPrintOutInformation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::DoNotUsePyramidalApproach() {
     usePyramid = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::DoNotUsePyramidalApproach");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNearestNeighborInterpolation() {
     interpolation = 0;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseNearestNeighborInterpolation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseLinearInterpolation() {
     interpolation = 1;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseLinearInterpolation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseCubicSplineInterpolation() {
     interpolation = 3;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseCubicSplineInterpolation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -291,35 +233,22 @@ void reg_base<T>::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f
     landmarkReference = r;
     landmarkFloating = f;
     landmarkRegWeight = w;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetLandmarkRegularisationParam");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::CheckParameters() {
     // Check if both input images are defined
-    if (!inputReference) {
-        reg_print_fct_error("reg_base::CheckParameters()");
-        reg_print_msg_error("The reference image is not defined");
-        reg_exit();
-    }
-    if (!inputFloating) {
-        reg_print_fct_error("reg_base::CheckParameters()");
-        reg_print_msg_error("The floating image is not defined");
-        reg_exit();
-    }
+    if (!inputReference)
+        NR_FATAL_ERROR("The reference image is not defined");
+    if (!inputFloating)
+        NR_FATAL_ERROR("The floating image is not defined");
 
     // Check the mask dimension if it is defined
-    if (maskImage) {
-        if (inputReference->nx != maskImage->nx ||
-            inputReference->ny != maskImage->ny ||
-            inputReference->nz != maskImage->nz) {
-            reg_print_fct_error("reg_base::CheckParameters()");
-            reg_print_msg_error("The reference and mask images have different dimension");
-            reg_exit();
-        }
-    }
+    if (maskImage && (inputReference->nx != maskImage->nx ||
+                      inputReference->ny != maskImage->ny ||
+                      inputReference->nz != maskImage->nz))
+        NR_FATAL_ERROR("The reference and mask images have different dimension");
 
     // Check the number of level to perform
     if (levelToPerform > 0) {
@@ -345,11 +274,8 @@ void reg_base<T>::CheckParameters() {
     //
     // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting
     if (!measure_mind && !measure_mindssc) {
-        if (inputFloating->nt != inputReference->nt) {
-            reg_print_fct_error("reg_base::CheckParameters()");
-            reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)");
-            reg_exit();
-        }
+        if (inputFloating->nt != inputReference->nt)
+            NR_FATAL_ERROR("The reference and floating images have different numbers of channels (timepoints)");
         unique_ptr<double[]> chanWeightSum(new double[inputReference->nt]());
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
@@ -357,89 +283,57 @@ void reg_base<T>::CheckParameters() {
             nmiWeights = measure_nmi->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
-                if (nmiWeights[n] < 0) {
-                    char text[255];
-                    sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n);
-                    reg_print_fct_error("reg_base::CheckParameters()");
-                    reg_print_msg_error(text);
-                    reg_exit();
-                }
+                if (nmiWeights[n] < 0)
+                    NR_FATAL_ERROR("The NMI weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += nmiWeights[n];
                 simWeightSum += nmiWeights[n];
                 totWeightSum += nmiWeights[n];
             }
-            if (simWeightSum == 0) {
-                reg_print_fct_warn("reg_base::CheckParameters()");
-                reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
-            }
+            if (simWeightSum == 0)
+                NR_WARN_WFCT("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_ssd) {
             ssdWeights = measure_ssd->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
-                if (ssdWeights[n] < 0) {
-                    char text[255];
-                    sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n);
-                    reg_print_fct_error("reg_base::CheckParameters()");
-                    reg_print_msg_error(text);
-                    reg_exit();
-                }
+                if (ssdWeights[n] < 0)
+                    NR_FATAL_ERROR("The SSD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += ssdWeights[n];
                 simWeightSum += ssdWeights[n];
                 totWeightSum += ssdWeights[n];
             }
-            if (simWeightSum == 0) {
-                reg_print_fct_warn("reg_base::CheckParameters()");
-                reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
-            }
+            if (simWeightSum == 0)
+                NR_WARN_WFCT("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_kld) {
             kldWeights = measure_kld->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
-                if (kldWeights[n] < 0) {
-                    char text[255];
-                    sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n);
-                    reg_print_fct_error("reg_base::CheckParameters()");
-                    reg_print_msg_error(text);
-                    reg_exit();
-                }
+                if (kldWeights[n] < 0)
+                    NR_FATAL_ERROR("The KLD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += kldWeights[n];
                 simWeightSum += kldWeights[n];
                 totWeightSum += kldWeights[n];
             }
-            if (simWeightSum == 0) {
-                reg_print_fct_warn("reg_base::CheckParameters()");
-                reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
-            }
+            if (simWeightSum == 0)
+                NR_WARN_WFCT("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_lncc) {
             lnccWeights = measure_lncc->GetTimepointsWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
-                if (lnccWeights[n] < 0) {
-                    char text[255];
-                    sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n);
-                    reg_print_fct_error("reg_base::CheckParameters()");
-                    reg_print_msg_error(text);
-                    reg_exit();
-                }
+                if (lnccWeights[n] < 0)
+                    NR_FATAL_ERROR("The LNCC weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += lnccWeights[n];
                 simWeightSum += lnccWeights[n];
                 totWeightSum += lnccWeights[n];
             }
-            if (simWeightSum == 0) {
-                reg_print_fct_warn("reg_base::CheckParameters()");
-                reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
-            }
+            if (simWeightSum == 0)
+                NR_WARN_WFCT("The LNCC similarity measure has a weight of 0 for all channels so will be ignored");
         }
         for (int n = 0; n < inputReference->nt; n++) {
-            if (chanWeightSum[n] == 0) {
-                char text[255];
-                sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n);
-                reg_print_fct_warn("reg_base::CheckParameters()");
-                reg_print_msg_warn(text);
-            }
+            if (chanWeightSum[n] == 0)
+                NR_WARN_WFCT("Channel " << n << " has a weight of 0 for all similarity measures so will be ignored");
             if (measure_nmi)
                 measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
             if (measure_ssd)
@@ -451,9 +345,7 @@ void reg_base<T>::CheckParameters() {
         }
     }
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::CheckParameters");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -481,9 +373,7 @@ void reg_base<T>::InitialiseSimilarity() {
     if (measure_mindssc)
         measure->Initialise(*measure_mindssc, con);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::InitialiseSimilarity");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -565,9 +455,7 @@ void reg_base<T>::Initialise() {
     }
 
     initialised = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::Initialise");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -594,9 +482,7 @@ double reg_base<T>::ComputeSimilarityMeasure() {
     if (measure_mindssc)
         measure += measure_mindssc->GetSimilarityMeasureValue();
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::ComputeSimilarityMeasure");
-#endif
+    NR_FUNC_CALLED();
     return similarityWeight * measure;
 }
 /* *************************************************************** */
@@ -651,9 +537,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
             measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t);
     }
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::GetVoxelBasedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 //template<class T>
@@ -680,9 +564,7 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
     measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseNMISetReferenceBinNumber");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -693,9 +575,7 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
     measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseNMISetFloatingBinNumber");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -704,9 +584,7 @@ void reg_base<T>::UseSSD(int timepoint, bool normalise) {
         measure_ssd.reset(dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd)));
     measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseSSD");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -715,9 +593,7 @@ void reg_base<T>::UseMIND(int timepoint, int offset) {
         measure_mind.reset(dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind)));
     measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mind->SetDescriptorOffset(offset);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseMIND");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -726,9 +602,7 @@ void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
         measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::MindSsc)));
     measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseMINDSSC");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -736,9 +610,7 @@ void reg_base<T>::UseKLDivergence(int timepoint) {
     if (!measure_kld)
         measure_kld.reset(dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld)));
     measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseKLDivergence");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -747,28 +619,20 @@ void reg_base<T>::UseLNCC(int timepoint, float stddev) {
         measure_lncc.reset(dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)));
     measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
     measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseLNCC");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCKernelType(int type) {
-    if (!measure_lncc) {
-        reg_print_fct_error("reg_base<T>::SetLNCCKernelType");
-        reg_print_msg_error("The LNCC object has to be created first");
-        reg_exit();
-    }
+    if (!measure_lncc)
+        NR_FATAL_ERROR("The LNCC object has to be created first");
     measure_lncc->SetKernelType(type);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::SetLNCCKernelType");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseDTI(bool *timepoint) {
-    reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring");
-    reg_exit();
+    NR_FATAL_ERROR("The use of DTI has been deactivated as it requires some refactoring");
 
     if (!measure_dti)
         measure_dti.reset(dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti)));
@@ -776,48 +640,34 @@ void reg_base<T>::UseDTI(bool *timepoint) {
         if (timepoint[i])
             measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::UseDTI");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
-    if (!measure_nmi) {
-        reg_print_fct_error("reg_base<T>::SetNMIWeight");
-        reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set");
-        reg_exit();
-    }
+    if (!measure_nmi)
+        NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set");
     measure_nmi->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
-    if (!measure_lncc) {
-        reg_print_fct_error("reg_base<T>::SetLNCCWeight");
-        reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set");
-        reg_exit();
-    }
+    if (!measure_lncc)
+        NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set");
     measure_lncc->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
-    if (!measure_ssd) {
-        reg_print_fct_error("reg_base<T>::SetSSDWeight");
-        reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set");
-        reg_exit();
-    }
+    if (!measure_ssd)
+        NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set");
     measure_ssd->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
-    if (!measure_kld) {
-        reg_print_fct_error("reg_base<T>::SetKLDWeight");
-        reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set");
-        reg_exit();
-    }
+    if (!measure_kld)
+        NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set");
     measure_kld->SetTimepointWeight(timepoint, weight);
 }
 /* *************************************************************** */
@@ -847,9 +697,7 @@ void reg_base<T>::WarpFloatingImage(int inter) {
                           measure_dti->GetActiveTimepoints(),
                           forwardJacobianMatrix);*/
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::WarpFloatingImage");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -870,23 +718,14 @@ void reg_base<T>::DeinitCurrentLevel(int currentLevel) {
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::Run() {
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "%s::Run() called", executableName);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG(executableName << "::Run() called");
 
     Initialise();
-#ifdef NDEBUG
-    if (verbose) {
-#endif
-        reg_print_info(executableName, "***********************************************************");
-#ifdef NDEBUG
-    }
-#endif
+
+    NR_VERBOSE("***********************************************************");
 
     // Update the maximal number of iteration to perform per level
-    maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1);
+    maxIterationNumber *= pow(2, levelToPerform - 1);
 
     // Loop over the different resolution level to perform
     for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) {
@@ -912,7 +751,7 @@ void reg_base<T>::Run() {
             // Iterate until convergence or until the max number of iteration is reach
             while (currentSize) {
                 if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) {
-                    reg_print_msg_warn("The current level reached the maximum number of iteration");
+                    NR_WARN("The current level reached the maximum number of iteration");
                     break;
                 }
 
@@ -935,18 +774,8 @@ void reg_base<T>::Run() {
             if (perturbation < perturbationNumber) {
                 optimiser->Perturbation(smallestSize);
                 currentSize = maxStepSize;
-#ifdef NDEBUG
-                if (verbose) {
-#endif
-                    char text[255];
-                    reg_print_info(executableName, "Perturbation Step - The number of iteration is reset to 0");
-                    sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]",
-                            smallestSize, smallestSize);
-                    reg_print_info(executableName, text);
-
-#ifdef NDEBUG
-                }
-#endif
+                NR_VERBOSE("Perturbation Step - The number of iteration is reset to 0");
+                NR_VERBOSE("Perturbation Step - Every control point positions is altered by [-" << smallestSize << " " << smallestSize << "]");
             }
         } // perturbation loop
 
@@ -956,21 +785,14 @@ void reg_base<T>::Run() {
         // Some cleaning is performed
         DeinitCurrentLevel(currentLevel);
 
-#ifdef NDEBUG
-        if (verbose) {
-#endif
-            reg_print_info(executableName, "Current registration level done");
-            reg_print_info(executableName, "***********************************************************");
-#ifdef NDEBUG
-        }
-#endif
+        NR_VERBOSE("Current registration level done");
+        NR_VERBOSE("***********************************************************");
+
         // Update the number of level for the next level
         maxIterationNumber /= 2;
     } // level levelToPerform
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_base<T>::Run");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template class reg_base<float>;
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 5fffdc56..eb5d4d3d 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -25,7 +25,6 @@
 #include "_reg_lncc.h"
 #include "_reg_tools.h"
 #include "_reg_ReadWriteImage.h"
-#include "_reg_stringFormat.h"
 #include "_reg_optimiser.h"
 #include "Platform.h"
 
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 26530618..e8207c16 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -30,65 +30,49 @@ reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
     this->useApproxGradient = false;
     gridRefinement = true;
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::reg_f3d");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetControlPointGridImage(NiftiImage inputControlPointGridIn) {
     inputControlPointGrid = inputControlPointGridIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetControlPointGridImage");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetBendingEnergyWeight(T be) {
     bendingEnergyWeight = be;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetBendingEnergyWeight");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetLinearEnergyWeight(T le) {
     linearEnergyWeight = le;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetLinearEnergyWeight");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetJacobianLogWeight(T j) {
     jacobianLogWeight = j;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetJacobianLogWeight");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::ApproximateJacobianLog() {
     jacobianLogApproximation = true;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ApproximateJacobianLog");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DoNotApproximateJacobianLog() {
     jacobianLogApproximation = false;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::DoNotApproximateJacobianLog");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SetSpacing(unsigned i, T s) {
     spacing[i] = s;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetSpacing");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -137,9 +121,7 @@ T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
 
     InitContent(reference, floating, mask);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::InitCurrentLevel");
-#endif
+    NR_FUNC_CALLED();
     return maxStepSize;
 }
 /* *************************************************************** */
@@ -166,9 +148,7 @@ void reg_f3d<T>::CheckParameters() {
         } else this->similarityWeight = 1 - penaltySum;
     }
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::CheckParameters");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -212,141 +192,101 @@ void reg_f3d<T>::Initialise() {
         if (controlPointGrid->nz > 1)
             spacing[2] = controlPointGrid->dz / powf(2, this->levelNumber - 1);
     }
-#ifdef NDEBUG
-    if (this->verbose) {
-#endif
-        std::string text;
-        // Print out some global information about the registration
-        reg_print_info(this->executableName, "***********************************************************");
-        reg_print_info(this->executableName, "INPUT PARAMETERS");
-        reg_print_info(this->executableName, "***********************************************************");
-        reg_print_info(this->executableName, "Reference image:");
-        text = stringFormat("\t* name: %s", this->inputReference->fname);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            this->inputReference->nx, this->inputReference->ny,
-                            this->inputReference->nz, this->inputReference->nt);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* image spacing: %g x %g x %g mm",
-                            this->inputReference->dx, this->inputReference->dy, this->inputReference->dz);
-        reg_print_info(this->executableName, text.c_str());
-        for (int i = 0; i < this->inputReference->nt; i++) {
-            text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]);
-            reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
-                    text = stringFormat("\t* binning size for timepoint %i/%i: %i",
-                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4);
-                    reg_print_info(this->executableName, text.c_str());
-                }
-            }
-        }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        reg_print_info(this->executableName, "Floating image:");
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* name: %s", this->inputFloating->fname);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* image dimension: %i x %i x %i x %i",
-                            this->inputFloating->nx, this->inputFloating->ny,
-                            this->inputFloating->nz, this->inputFloating->nt);
-        reg_print_info(this->executableName, text.c_str());
-        text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx,
-                            this->inputFloating->dy, this->inputFloating->dz);
-        reg_print_info(this->executableName, text.c_str());
-        for (int i = 0; i < this->inputFloating->nt; i++) {
-            text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]",
-                                i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]);
-            reg_print_info(this->executableName, text.c_str());
-            if (this->measure_nmi) {
-                if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
-                    text = stringFormat("\t* binning size for timepoint %i/%i: %i",
-                                        i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4);
-                    reg_print_info(this->executableName, text.c_str());
-                }
+
+    // Print out some global information about the registration
+    NR_VERBOSE("***********************************************************");
+    NR_VERBOSE("INPUT PARAMETERS");
+    NR_VERBOSE("***********************************************************");
+    NR_VERBOSE("Reference image:");
+    NR_VERBOSE("\t* name: " << this->inputReference->fname);
+    NR_VERBOSE("\t* image dimension: " << this->inputReference->nx << " x " << this->inputReference->ny << " x " <<
+               this->inputReference->nz << " x " << this->inputReference->nt);
+    NR_VERBOSE("\t* image spacing: " << this->inputReference->dx << " x " << this->inputReference->dy << " x " <<
+               this->inputReference->dz << " mm");
+    for (int i = 0; i < this->inputReference->nt; i++) {
+        NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" <<
+                   this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]");
+        if (this->measure_nmi) {
+            if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
+                NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " <<
+                           this->measure_nmi->GetReferenceBinNumber()[i] - 4);
             }
         }
-        text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Level number: %i", this->levelNumber);
-        reg_print_info(this->executableName, text.c_str());
-        if (this->levelNumber != this->levelToPerform) {
-            text = stringFormat("\t* Level to perform: %i", this->levelToPerform);
-            reg_print_info(this->executableName, text.c_str());
-        }
-        reg_print_info(this->executableName, "");
-        text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-
-        text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        if (this->measure_ssd)
-            reg_print_info(this->executableName, "The SSD is used as a similarity measure.");
-        if (this->measure_kld)
-            reg_print_info(this->executableName, "The KL divergence is used as a similarity measure.");
-        if (this->measure_lncc)
-            reg_print_info(this->executableName, "The LNCC is used as a similarity measure.");
-        if (this->measure_dti)
-            reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure.");
-        if (this->measure_mind)
-            reg_print_info(this->executableName, "MIND is used as a similarity measure.");
-        if (this->measure_mindssc)
-            reg_print_info(this->executableName, "MINDSSC is used as a similarity measure.");
-        if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc &&
-                                  !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc))
-            reg_print_info(this->executableName, "The NMI is used as a similarity measure.");
-        text = stringFormat("Similarity measure term weight: %g", this->similarityWeight);
-        reg_print_info(this->executableName, text.c_str());
-        reg_print_info(this->executableName, "");
-        if (bendingEnergyWeight > 0) {
-            text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
-        }
-        if ((linearEnergyWeight) > 0) {
-            text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
-        }
-        if (jacobianLogWeight > 0) {
-            text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight);
-            reg_print_info(this->executableName, text.c_str());
-            if (jacobianLogApproximation) {
-                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated");
-            } else {
-                reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated");
+    }
+    NR_VERBOSE("\t* gaussian smoothing sigma: " << this->referenceSmoothingSigma);
+    NR_VERBOSE("");
+    NR_VERBOSE("Floating image:");
+    NR_VERBOSE("\t* name: " << this->inputFloating->fname);
+    NR_VERBOSE("\t* image dimension: " << this->inputFloating->nx << " x " << this->inputFloating->ny << " x " <<
+               this->inputFloating->nz << " x " << this->inputFloating->nt);
+    NR_VERBOSE("\t* image spacing: " << this->inputFloating->dx << " x " << this->inputFloating->dy << " x " <<
+               this->inputFloating->dz << " mm");
+    for (int i = 0; i < this->inputFloating->nt; i++) {
+        NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" <<
+                   this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]");
+        if (this->measure_nmi) {
+            if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
+                NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " <<
+                           this->measure_nmi->GetFloatingBinNumber()[i] - 4);
             }
-            reg_print_info(this->executableName, "");
         }
-        if (this->landmarkRegWeight > 0) {
-            text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight);
-            reg_print_info(this->executableName, text.c_str());
-            reg_print_info(this->executableName, "");
-        }
-#ifdef NDEBUG
     }
-#endif
+    NR_VERBOSE("\t* gaussian smoothing sigma: " << this->floatingSmoothingSigma);
+    NR_VERBOSE("");
+    NR_VERBOSE("Warped image padding value: " << this->warpedPaddingValue);
+    NR_VERBOSE("");
+    NR_VERBOSE("Level number: " << this->levelNumber);
+    if (this->levelNumber != this->levelToPerform)
+        NR_VERBOSE("\t* Level to perform: " << this->levelToPerform);
+    NR_VERBOSE("");
+    NR_VERBOSE("Maximum iteration number during the last level: " << this->maxIterationNumber);
+    NR_VERBOSE("");
+
+    NR_VERBOSE("Final spacing in mm: " << spacing[0] << " " << spacing[1] << " " << spacing[2]);
+    NR_VERBOSE("");
+    if (this->measure_ssd)
+        NR_VERBOSE("The SSD is used as a similarity measure.");
+    if (this->measure_kld)
+        NR_VERBOSE("The KL divergence is used as a similarity measure.");
+    if (this->measure_lncc)
+        NR_VERBOSE("The LNCC is used as a similarity measure.");
+    if (this->measure_dti)
+        NR_VERBOSE("A DTI based measure is used as a similarity measure.");
+    if (this->measure_mind)
+        NR_VERBOSE("MIND is used as a similarity measure.");
+    if (this->measure_mindssc)
+        NR_VERBOSE("MINDSSC is used as a similarity measure.");
+    if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc &&
+                              !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc))
+        NR_VERBOSE("The NMI is used as a similarity measure.");
+    NR_VERBOSE("Similarity measure term weight: " << this->similarityWeight);
+    NR_VERBOSE("");
+    if (bendingEnergyWeight > 0) {
+        NR_VERBOSE("Bending energy penalty term weight: " << bendingEnergyWeight);
+        NR_VERBOSE("");
+    }
+    if (linearEnergyWeight > 0) {
+        NR_VERBOSE("Linear energy penalty term weight: " << linearEnergyWeight);
+        NR_VERBOSE("");
+    }
+    if (jacobianLogWeight > 0) {
+        NR_VERBOSE("Jacobian-based penalty term weight: " << jacobianLogWeight);
+        NR_VERBOSE("\t* Jacobian-based penalty term is " << (jacobianLogApproximation ? "approximated" : "not approximated"));
+        NR_VERBOSE("");
+    }
+    if (this->landmarkRegWeight > 0) {
+        NR_VERBOSE("Landmark distance regularisation term weight: " << this->landmarkRegWeight);
+        NR_VERBOSE("");
+    }
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::Initialise");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetDeformationField() {
     this->compute->GetDeformationField(false, // Composition
                                        true); // bspline
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetDeformationField");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -362,64 +302,44 @@ double reg_f3d<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     unsigned it = 0;
     while (value != value && it < maxit) {
         value = this->compute->CorrectFolding(approx);
-#ifndef NDEBUG
-        reg_print_msg_debug("Folding correction");
-#endif
+        NR_DEBUG("Folding correction");
         it++;
     }
     if (type > 0) {
         if (value != value) {
             this->optimiser->RestoreBestDof();
-            reg_print_fct_warn("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm()");
-            reg_print_msg_warn("The folding correction scheme failed");
-        } else {
-#ifndef NDEBUG
-            if (it > 0) {
-                char text[255];
-                sprintf(text, "Folding correction, %i step(s)", it);
-                reg_print_msg_debug(text);
-            }
-#endif
+            NR_WARN_WFCT("The folding correction scheme failed");
+        } else if (it > 0) {
+            NR_DEBUG("Folding correction, " << it << " step(s)");
         }
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ComputeJacobianBasedPenaltyTerm");
-#endif
+    NR_FUNC_CALLED();
     return jacobianLogWeight * value;
 }
 /* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeBendingEnergyPenaltyTerm() {
     if (bendingEnergyWeight <= 0) return 0;
-
-    double value = this->compute->ApproxBendingEnergy();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ComputeBendingEnergyPenaltyTerm");
-#endif
+    const double value = this->compute->ApproxBendingEnergy();
+    NR_FUNC_CALLED();
     return bendingEnergyWeight * value;
 }
 /* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeLinearEnergyPenaltyTerm() {
     if (linearEnergyWeight <= 0) return 0;
-
-    double value = this->compute->ApproxLinearEnergy();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ComputeLinearEnergyPenaltyTerm");
-#endif
+    const double value = this->compute->ApproxLinearEnergy();
+    NR_FUNC_CALLED();
     return linearEnergyWeight * value;
 }
 /* *************************************************************** */
 template<class T>
 double reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm() {
     if (this->landmarkRegWeight <= 0) return 0;
-
-    double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber,
-                                                      this->landmarkReference,
-                                                      this->landmarkFloating);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm");
-#endif
+    const double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber,
+                                                            this->landmarkReference,
+                                                            this->landmarkFloating);
+    NR_FUNC_CALLED();
     return this->landmarkRegWeight * value;
 }
 /* *************************************************************** */
@@ -431,52 +351,38 @@ void reg_f3d<T>::GetSimilarityMeasureGradient() {
     // And the node-based NMI gradient is extracted
     this->compute->ConvolveVoxelBasedMeasureGradient(this->similarityWeight);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetSimilarityMeasureGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetBendingEnergyGradient() {
     if (bendingEnergyWeight <= 0) return;
-
     this->compute->ApproxBendingEnergyGradient(bendingEnergyWeight);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetBendingEnergyGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetLinearEnergyGradient() {
     if (linearEnergyWeight <= 0) return;
-
     this->compute->ApproxLinearEnergyGradient(linearEnergyWeight);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetLinearEnergyGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetJacobianBasedGradient() {
     if (jacobianLogWeight <= 0) return;
-
     this->compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetJacobianBasedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetLandmarkDistanceGradient() {
     if (this->landmarkRegWeight <= 0) return;
-
     this->compute->LandmarkDistanceGradient(this->landmarkRegNumber,
                                             this->landmarkReference,
                                             this->landmarkFloating,
                                             this->landmarkRegWeight);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetLandmarkDistanceGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -488,15 +394,10 @@ T reg_f3d<T>::NormaliseGradient() {
         // The gradient is normalised if we are running f3d
         // It will be normalised later when running f3d2
         this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG("Objective function gradient maximal length: " << maxGradLength);
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::NormaliseGradient");
-#endif
+
+    NR_FUNC_CALLED();
 
     // Returns the largest gradient distance
     return maxGradLength;
@@ -504,61 +405,38 @@ T reg_f3d<T>::NormaliseGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DisplayCurrentLevelParameters(int currentLevel) {
-#ifdef NDEBUG
-    if (this->verbose) {
-#endif
-        nifti_image *reference = this->con->Content::GetReference();
-        nifti_image *floating = this->con->Content::GetFloating();
-        char text[255];
-        sprintf(text, "Current level: %i / %i", currentLevel + 1, this->levelNumber);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current reference image");
-        sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current floating image");
-        sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz);
-        reg_print_info(this->executableName, text);
-        reg_print_info(this->executableName, "Current control point image");
-        sprintf(text, "\t* image dimension: %i x %i x %i", controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm", controlPointGrid->dx, controlPointGrid->dy, controlPointGrid->dz);
-        reg_print_info(this->executableName, text);
-#ifdef NDEBUG
-    }
-#endif
+    const nifti_image *reference = this->con->Content::GetReference();
+    const nifti_image *floating = this->con->Content::GetFloating();
+    NR_VERBOSE("Current level: " << currentLevel + 1 << " / " << this->levelNumber);
+    NR_VERBOSE("Maximum iteration number: " << this->maxIterationNumber);
+    NR_VERBOSE("Current reference image");
+    NR_VERBOSE("\t* image dimension: " << reference->nx << " x " << reference->ny << " x " << reference->nz << " x " << reference->nt);
+    NR_VERBOSE("\t* image spacing: " << reference->dx << " x " << reference->dy << " x " << reference->dz << " mm");
+    NR_VERBOSE("Current floating image");
+    NR_VERBOSE("\t* image dimension: " << floating->nx << " x " << floating->ny << " x " << floating->nz << " x " << floating->nt);
+    NR_VERBOSE("\t* image spacing: " << floating->dx << " x " << floating->dy << " x " << floating->dz << " mm");
+    NR_VERBOSE("Current control point image");
+    NR_VERBOSE("\t* image dimension: " << controlPointGrid->nx << " x " << controlPointGrid->ny << " x " << controlPointGrid->nz);
+    NR_VERBOSE("\t* image spacing: " << controlPointGrid->dx << " x " << controlPointGrid->dy << " x " << controlPointGrid->dz << " mm");
 
-#ifndef NDEBUG
     if (reference->sform_code > 0)
-        reg_mat44_disp(&(reference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform");
-    else reg_mat44_disp(&(reference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform");
-
+        NR_MAT44_VERBOSE(reference->sto_xyz, "Reference sform");
+    else NR_MAT44_VERBOSE(reference->qto_xyz, "Reference qform");
     if (floating->sform_code > 0)
-        reg_mat44_disp(&(floating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform");
-    else reg_mat44_disp(&(floating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform");
-
+        NR_MAT44_VERBOSE(floating->sto_xyz, "Floating sform");
+    else NR_MAT44_VERBOSE(floating->qto_xyz, "Floating qform");
     if (controlPointGrid->sform_code > 0)
-        reg_mat44_disp(&(controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform");
-    else reg_mat44_disp(&(controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform");
-#endif
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::DisplayCurrentLevelParameters");
-#endif
+        NR_MAT44_VERBOSE(controlPointGrid->sto_xyz, "CPP sform");
+    else NR_MAT44_VERBOSE(controlPointGrid->qto_xyz, "CPP qform");
+
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 double reg_f3d<T>::GetObjectiveFunctionValue() {
     currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
-
     currentWBE = ComputeBendingEnergyPenaltyTerm();
-
     currentWLE = ComputeLinearEnergyPenaltyTerm();
-
     this->currentWLand = ComputeLandmarkDistancePenaltyTerm();
 
     // Compute initial similarity measure
@@ -567,16 +445,10 @@ double reg_f3d<T>::GetObjectiveFunctionValue() {
         this->WarpFloatingImage(this->interpolation);
         this->currentWMeasure = this->ComputeSimilarityMeasure();
     }
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
-            this->currentWMeasure, currentWBE, currentWLE, currentWJac, this->currentWLand);
-    reg_print_msg_debug(text);
-#endif
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionValue");
-#endif
+    NR_DEBUG("(wMeasure) " << this->currentWMeasure << " | (wBE) " << currentWBE << " | (wLE) " << currentWLE <<
+             " | (wJac) " << currentWJac << " | (wLan) " << this->currentWLand);
+    NR_FUNC_CALLED();
 
     // Store the global objective function value
     return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand;
@@ -591,9 +463,7 @@ void reg_f3d<T>::UpdateParameters(float scale) {
                                               this->optimiseX,
                                               this->optimiseY,
                                               this->optimiseZ);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::UpdateParameters");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -605,55 +475,40 @@ void reg_f3d<T>::SetOptimiser() {
                                                                       this->optimiseX,
                                                                       this->optimiseY,
                                                                       this->optimiseZ));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SetOptimiser");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::SmoothGradient() {
     // The gradient is smoothed using a Gaussian kernel if it is required
     this->compute->SmoothGradient(this->gradientSmoothingSigma);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::SmoothGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::GetApproximatedGradient() {
     this->compute->GetApproximatedGradient(*this);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetApproximatedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 vector<NiftiImage> reg_f3d<T>::GetWarpedImage() {
     // The initial images are used
-    if (!this->inputReference || !this->inputFloating || !controlPointGrid) {
-        reg_print_fct_error("reg_f3d<T>::GetWarpedImage()");
-        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-        reg_exit();
-    }
+    if (!this->inputReference || !this->inputFloating || !controlPointGrid)
+        NR_FATAL_ERROR("The reference, floating and control point grid images have to be defined");
 
     InitCurrentLevel(-1);
-
     this->WarpFloatingImage(3); // cubic spline interpolation
-
     NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), NiftiImage::Copy::Image);
-
     DeinitCurrentLevel(-1);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetWarpedImage");
-#endif
+
+    NR_FUNC_CALLED();
     return { warpedImage };
 }
 /* *************************************************************** */
 template<class T>
 NiftiImage reg_f3d<T>::GetControlPointPositionImage() {
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetControlPointPositionImage");
-#endif
+    NR_FUNC_CALLED();
     return controlPointGrid;
 }
 /* *************************************************************** */
@@ -664,48 +519,26 @@ void reg_f3d<T>::UpdateBestObjFunctionValue() {
     bestWLE = currentWLE;
     bestWJac = currentWJac;
     this->bestWLand = this->currentWLand;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::UpdateBestObjFunctionValue");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintInitialObjFunctionValue() {
-    if (!this->verbose) return;
-
-    double bestValue = this->optimiser->GetBestObjFunctionValue();
-
-    char text[255];
-    sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g",
-            bestValue, this->bestWMeasure, bestWBE, bestWLE, bestWJac, this->bestWLand);
-    reg_print_info(this->executableName, text);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::PrintInitialObjFunctionValue");
-#endif
+    NR_VERBOSE("Initial objective function: " << this->optimiser->GetBestObjFunctionValue() << " = (wSIM)" << this->bestWMeasure <<
+               " - (wBE)" << bestWBE << " - (wLE)" << bestWLE << " - (wJAC)" << bestWJac << " - (wLAN)" << this->bestWLand);
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::PrintCurrentObjFunctionValue(T currentSize) {
-    if (!this->verbose) return;
-
-    char text[255];
-    sprintf(text, "[%i] Current objective function: %g",
-            (int)this->optimiser->GetCurrentIterationNumber(),
-            this->optimiser->GetBestObjFunctionValue());
-    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
-    if (bendingEnergyWeight > 0)
-        sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE);
-    if (linearEnergyWeight > 0)
-        sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE);
-    if (jacobianLogWeight > 0)
-        sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac);
-    if (this->landmarkRegWeight > 0)
-        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
-    sprintf(text + strlen(text), " [+ %g mm]", currentSize);
-    reg_print_info(this->executableName, text);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::PrintCurrentObjFunctionValue");
-#endif
+    NR_VERBOSE("[" << this->optimiser->GetCurrentIterationNumber() << "] Current objective function: " <<
+               this->optimiser->GetBestObjFunctionValue() << " = (wSIM)" << this->bestWMeasure <<
+               (bendingEnergyWeight > 0 ? " - (wBE)"s + std::to_string(bestWBE) : "") <<
+               (linearEnergyWeight > 0 ? " - (wLE)"s + std::to_string(bestWLE) : "") <<
+               (jacobianLogWeight > 0 ? " - (wJAC)"s + std::to_string(bestWJac) : "") <<
+               (this->landmarkRegWeight > 0 ? " - (wLAN)"s + std::to_string(this->bestWLand) : "") <<
+               " [+ " << currentSize << " mm]");
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -731,18 +564,14 @@ void reg_f3d<T>::GetObjectiveFunctionGradient() {
 
     // Smooth the gradient if require
     SmoothGradient();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::GetObjectiveFunctionGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::CorrectTransformation() {
     if (jacobianLogWeight > 0 && jacobianLogApproximation)
         ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d<T>::CorrectTransformation");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template class reg_f3d<float>;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index f56d6a48..70ede1f8 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -22,26 +22,19 @@ reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
     bchUpdate = false;
     useGradientCumulativeExp = true;
     bchUpdateValue = 0;
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_f3d2 constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::SetFloatingMask(NiftiImage floatingMaskImageIn) {
     floatingMaskImage = floatingMaskImageIn;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::~SetFloatingMask");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
     inverseConsistencyWeight = w;
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::SetInverseConsistencyWeight");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
@@ -100,9 +93,7 @@ T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
     reg_f3d<T>::InitContent(reference, floating, referenceMask);
     InitContent(reference, floating, floatingMask);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::InitCurrentLevel");
-#endif
+    NR_FUNC_CALLED();
     return maxStepSize;
 }
 /* *************************************************************** */
@@ -125,15 +116,10 @@ void reg_f3d2<T>::CheckParameters() {
     reg_f3d<T>::CheckParameters();
 
     // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED
-    if (floatingMaskImage) {
-        if (this->inputFloating->nx != floatingMaskImage->nx ||
-            this->inputFloating->ny != floatingMaskImage->ny ||
-            this->inputFloating->nz != floatingMaskImage->nz) {
-            reg_print_fct_error("reg_f3d2<T>::CheckParameters()");
-            reg_print_msg_error("The floating image and its mask have different dimension");
-            reg_exit();
-        }
-    }
+    if (floatingMaskImage && (this->inputFloating->nx != floatingMaskImage->nx ||
+                              this->inputFloating->ny != floatingMaskImage->ny ||
+                              this->inputFloating->nz != floatingMaskImage->nz))
+        NR_FATAL_ERROR("The floating image and its mask have different dimension");
 
     // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS
     T penaltySum = (this->bendingEnergyWeight + this->linearEnergyWeight + this->jacobianLogWeight +
@@ -147,9 +133,7 @@ void reg_f3d2<T>::CheckParameters() {
         this->landmarkRegWeight /= penaltySum;
     } else this->similarityWeight = 1 - penaltySum;
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::CheckParameters");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -159,18 +143,11 @@ void reg_f3d2<T>::GetDeformationField() {
     if (!this->optimiser)
         updateStepNumber = false;
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("Velocity integration forward. Step number update=" << updateStepNumber);
     // The forward transformation is computed using the scaling-and-squaring approach
     this->compute->GetDefFieldFromVelocityGrid(updateStepNumber);
 
-#ifndef NDEBUG
-    sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("Velocity integration backward. Step number update=" << updateStepNumber);
     // The number of step number is copied over from the forward transformation
     controlPointGridBw->intent_p2 = this->controlPointGrid->intent_p2;
     // The backward transformation is computed using the scaling-and-squaring approach
@@ -196,9 +173,7 @@ void reg_f3d2<T>::WarpFloatingImage(int inter) {
                           this->measure_dti->GetActiveTimepoints(),
                           backwardJacobianMatrix);*/
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::WarpFloatingImage");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -216,76 +191,49 @@ double reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm(int type) {
     unsigned it = 0;
     while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) {
         backwardPenaltyTerm = computeBw->CorrectFolding(approx);
-#ifndef NDEBUG
-        reg_print_msg_debug("Folding correction - Backward transformation");
-#endif
+        NR_DEBUG("Folding correction - Backward transformation");
         it++;
     }
     if (type > 0 && it > 0) {
         if (backwardPenaltyTerm != backwardPenaltyTerm) {
             this->optimiser->RestoreBestDof();
-#ifndef NDEBUG
-            reg_print_fct_warn("reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm()");
-            reg_print_msg_warn("The backward transformation folding correction scheme failed");
-#endif
+            NR_DEBUG("The backward transformation folding correction scheme failed");
         } else {
-#ifdef NDEBUG
-            if (this->verbose) {
-#endif
-                char text[255];
-                sprintf(text, "Backward transformation folding correction, %i step(s)", it);
-                reg_print_msg_debug(text);
-#ifdef NDEBUG
-            }
-#endif
+            NR_VERBOSE("Backward transformation folding correction, " << it << " step(s)");
         }
     }
     backwardPenaltyTerm *= this->jacobianLogWeight;
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::ComputeJacobianBasedPenaltyTerm");
-#endif
+    NR_FUNC_CALLED();
     return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
 template <class T>
 double reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm() {
     if (this->bendingEnergyWeight <= 0) return 0;
-
-    double forwardPenaltyTerm = reg_f3d<T>::ComputeBendingEnergyPenaltyTerm();
-    double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy();
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::ComputeBendingEnergyPenaltyTerm");
-#endif
+    const double forwardPenaltyTerm = reg_f3d<T>::ComputeBendingEnergyPenaltyTerm();
+    const double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy();
+    NR_FUNC_CALLED();
     return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
 template <class T>
 double reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm() {
     if (this->linearEnergyWeight <= 0) return 0;
-
-    double forwardPenaltyTerm = reg_f3d<T>::ComputeLinearEnergyPenaltyTerm();
-    double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy();
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::ComputeLinearEnergyPenaltyTerm");
-#endif
+    const double forwardPenaltyTerm = reg_f3d<T>::ComputeLinearEnergyPenaltyTerm();
+    const double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy();
+    NR_FUNC_CALLED();
     return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
 template <class T>
 double reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm() {
     if (this->landmarkRegWeight <= 0) return 0;
-
-    double forwardPenaltyTerm = reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm();
-    double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber,
-                                                                                          this->landmarkFloating,
-                                                                                          this->landmarkReference);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::ComputeLandmarkDistancePenaltyTerm");
-#endif
+    const double forwardPenaltyTerm = reg_f3d<T>::ComputeLandmarkDistancePenaltyTerm();
+    const double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber,
+                                                                                                this->landmarkFloating,
+                                                                                                this->landmarkReference);
+    NR_FUNC_CALLED();
     return forwardPenaltyTerm + backwardPenaltyTerm;
 }
 /* *************************************************************** */
@@ -349,9 +297,7 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
     // Exponentiate the gradients if required
     ExponentiateGradient();
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetVoxelBasedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -362,59 +308,42 @@ void reg_f3d2<T>::GetSimilarityMeasureGradient() {
     // And the backward-node-based NMI gradient is extracted
     computeBw->ConvolveVoxelBasedMeasureGradient(this->similarityWeight);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetSimilarityMeasureGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetJacobianBasedGradient() {
     if (this->jacobianLogWeight <= 0) return;
-
     reg_f3d<T>::GetJacobianBasedGradient();
     computeBw->JacobianPenaltyTermGradient(this->jacobianLogWeight, this->jacobianLogApproximation);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetJacobianBasedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetBendingEnergyGradient() {
     if (this->bendingEnergyWeight <= 0) return;
-
     reg_f3d<T>::GetBendingEnergyGradient();
     computeBw->ApproxBendingEnergyGradient(this->bendingEnergyWeight);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetBendingEnergyGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetLinearEnergyGradient() {
     if (this->linearEnergyWeight <= 0) return;
-
     reg_f3d<T>::GetLinearEnergyGradient();
     computeBw->ApproxLinearEnergyGradient(this->linearEnergyWeight);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetLinearEnergyGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetLandmarkDistanceGradient() {
     if (this->landmarkRegWeight <= 0) return;
-
     reg_f3d<T>::GetLandmarkDistanceGradient();
     computeBw->LandmarkDistanceGradient(this->landmarkRegNumber,
                                         this->landmarkFloating,
                                         this->landmarkReference,
                                         this->landmarkRegWeight);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetLandmarkDistanceGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -424,20 +353,14 @@ void reg_f3d2<T>::SmoothGradient() {
     // The gradient is smoothed using a Gaussian kernel if it is required
     computeBw->SmoothGradient(this->gradientSmoothingSigma);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::SmoothGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::GetApproximatedGradient() {
     reg_f3d<T>::GetApproximatedGradient();
-
     computeBw->GetApproximatedGradient(*this);
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetApproximatedGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -452,21 +375,14 @@ T reg_f3d2<T>::NormaliseGradient() {
 
     // The largest value between the forward and backward gradient is kept
     const T maxGradLength = std::max(backwardMaxGradLength, forwardMaxGradLength);
-
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "Objective function gradient maximal length: %g", maxGradLength);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("Objective function gradient maximal length: " << maxGradLength);
 
     // The forward gradient is normalised
     this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
     // The backward gradient is normalised
     computeBw->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::NormaliseGradient");
-#endif
+    NR_FUNC_CALLED();
     // Returns the largest gradient distance
     return maxGradLength;
 }
@@ -495,37 +411,21 @@ void reg_f3d2<T>::GetObjectiveFunctionGradient() {
         GetLinearEnergyGradient();
         GetLandmarkDistanceGradient();
     }
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::DisplayCurrentLevelParameters(int currentLevel) {
     reg_f3d<T>::DisplayCurrentLevelParameters(currentLevel);
-#ifdef NDEBUG
-    if (this->verbose) {
-#endif
-        char text[255];
-        reg_print_info(this->executableName, "Current backward control point image");
-        sprintf(text, "\t* image dimension: %i x %i x %i",
-                controlPointGridBw->nx, controlPointGridBw->ny, controlPointGridBw->nz);
-        reg_print_info(this->executableName, text);
-        sprintf(text, "\t* image spacing: %g x %g x %g mm",
-                controlPointGridBw->dx, controlPointGridBw->dy, controlPointGridBw->dz);
-        reg_print_info(this->executableName, text);
-#ifdef NDEBUG
-    }
-#endif
+    NR_VERBOSE("Current backward control point image");
+    NR_VERBOSE("\t* image dimension: " << controlPointGridBw->nx << " x " << controlPointGridBw->ny << " x " << controlPointGridBw->nz);
+    NR_VERBOSE("\t* image spacing: " << controlPointGridBw->dx << " x " << controlPointGridBw->dy << " x " << controlPointGridBw->dz << " mm");
 
-#ifndef NDEBUG
     if (controlPointGridBw->sform_code > 0)
-        reg_mat44_disp(&controlPointGridBw->sto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP sform");
-    else reg_mat44_disp(&controlPointGridBw->qto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP qform");
-#endif
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::DisplayCurrentLevelParameters");
-#endif
+        NR_MAT44_VERBOSE(controlPointGridBw->sto_xyz, "Backward CPP sform");
+    else NR_MAT44_VERBOSE(controlPointGridBw->qto_xyz, "Backward CPP qform");
+
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -538,60 +438,32 @@ void reg_f3d2<T>::SetOptimiser() {
                                                                       this->optimiseY,
                                                                       this->optimiseZ,
                                                                       conBw.get()));
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::SetOptimiser");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::PrintCurrentObjFunctionValue(T currentSize) {
-    if (!this->verbose) return;
-
-    char text[255];
-    sprintf(text, "[%i] Current objective function: %g",
-            (int)this->optimiser->GetCurrentIterationNumber(),
-            this->optimiser->GetBestObjFunctionValue());
-    sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure);
-    if (this->bendingEnergyWeight > 0)
-        sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE);
-    if (this->linearEnergyWeight)
-        sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE);
-    if (this->jacobianLogWeight > 0)
-        sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac);
-    if (this->landmarkRegWeight > 0)
-        sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand);
-    sprintf(text + strlen(text), " [+ %g mm]", currentSize);
-    reg_print_info(this->executableName, text);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::PrintCurrentObjFunctionValue");
-#endif
+    reg_f3d<T>::PrintCurrentObjFunctionValue(currentSize);
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::UpdateBestObjFunctionValue() {
     reg_f3d<T>::UpdateBestObjFunctionValue();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::UpdateBestObjFunctionValue");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 void reg_f3d2<T>::PrintInitialObjFunctionValue() {
-    if (!this->verbose) return;
     reg_f3d<T>::PrintInitialObjFunctionValue();
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::PrintInitialObjFunctionValue");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
 double reg_f3d2<T>::GetObjectiveFunctionValue() {
     this->currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations
-
     this->currentWBE = ComputeBendingEnergyPenaltyTerm();
-
     this->currentWLE = ComputeLinearEnergyPenaltyTerm();
-
     this->currentWLand = ComputeLandmarkDistancePenaltyTerm();
 
     // Compute initial similarity measure
@@ -601,17 +473,10 @@ double reg_f3d2<T>::GetObjectiveFunctionValue() {
         this->currentWMeasure = this->ComputeSimilarityMeasure();
     }
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g",
-            this->currentWMeasure, this->currentWBE, this->currentWLE,
-            this->currentWJac, this->currentWLand);
-    reg_print_msg_debug(text);
-#endif
-
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetObjectiveFunctionValue");
-#endif
+    NR_DEBUG("(wMeasure) " << this->currentWMeasure << " | (wBE) " << this->currentWBE << " | (wLE) " << this->currentWLE <<
+             " | (wJac) " << this->currentWJac << " | (wLan) " << this->currentWLand);
+    NR_FUNC_CALLED();
+
     // Store the global objective function value
     return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac;
 }
@@ -641,16 +506,12 @@ void reg_f3d2<T>::InitialiseSimilarity() {
     if (this->measure_mindssc)
         this->measure->Initialise(*this->measure_mindssc, con, conBw.get());
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::InitialiseSimilarity");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
 NiftiImage reg_f3d2<T>::GetBackwardControlPointPositionImage() {
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetBackwardControlPointPositionImage");
-#endif
+    NR_FUNC_CALLED();
     return controlPointGridBw;
 }
 /* *************************************************************** */
@@ -726,17 +587,8 @@ void reg_f3d2<T>::Initialise() {
         for (unsigned l = 0; l < imageCount; ++l)
             floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]());
 
-#ifdef NDEBUG
-    if (this->verbose) {
-#endif
-        if (inverseConsistencyWeight > 0) {
-            char text[255];
-            sprintf(text, "Inverse consistency error penalty term weight: %g", inverseConsistencyWeight);
-            reg_print_info(this->executableName, text);
-        }
-#ifdef NDEBUG
-    }
-#endif
+    if (inverseConsistencyWeight > 0)
+        NR_VERBOSE("Inverse consistency error penalty term weight: "s + std::to_string(inverseConsistencyWeight));
 
     // Convert the control point grid into velocity field parametrisation
     this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID;
@@ -747,9 +599,7 @@ void reg_f3d2<T>::Initialise() {
     if (this->affineTransformation)
         affineTransformationBw.reset(new mat44(nifti_mat44_inverse(*this->affineTransformation)));
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_f3d2::Initialise() done");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -757,20 +607,14 @@ void reg_f3d2<T>::ExponentiateGradient() {
     if (!useGradientCumulativeExp) return;
 
     // Exponentiate the forward gradient using the backward transformation
-#ifndef NDEBUG
-    reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach");
-#endif
+    NR_DEBUG("Update the forward measure gradient using a Dartel like approach");
     this->compute->ExponentiateGradient(*conBw);
 
     /* Exponentiate the backward gradient using the forward transformation */
-#ifndef NDEBUG
-    reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach");
-#endif
+    NR_DEBUG("Update the backward measure gradient using a Dartel like approach");
     computeBw->ExponentiateGradient(*this->con);
 
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::ExponentiateGradient");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -783,17 +627,13 @@ void reg_f3d2<T>::UpdateParameters(float scale) {
     // Note that the gradient has been integrated over the path of transformation previously
     if (bchUpdate) {
         // Forward update
-        reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY");
-#ifndef NDEBUG
-        reg_print_msg_debug("Update the forward control point grid using BCH approximation");
-#endif
+        NR_WARN("USING BCH FORWARD - TESTING ONLY");
+        NR_DEBUG("Update the forward control point grid using BCH approximation");
         this->compute->BchUpdate(scale, bchUpdateValue);
 
         // Backward update
-        reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY");
-#ifndef NDEBUG
-        reg_print_msg_debug("Update the backward control point grid using BCH approximation");
-#endif
+        NR_WARN("USING BCH BACKWARD - TESTING ONLY");
+        NR_DEBUG("Update the backward control point grid using BCH approximation");
         computeBw->BchUpdate(scale, bchUpdateValue);
     } else {
         // Forward update
@@ -815,11 +655,8 @@ void reg_f3d2<T>::UpdateParameters(float scale) {
 template<class T>
 vector<NiftiImage> reg_f3d2<T>::GetWarpedImage() {
     // The initial images are used
-    if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) {
-        reg_print_fct_error("reg_f3d2<T>::GetWarpedImage()");
-        reg_print_msg_error("The reference, floating and control point grid images have to be defined");
-        reg_exit();
-    }
+    if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw)
+        NR_FATAL_ERROR("The reference, floating and control point grid images have to be defined");
 
     InitCurrentLevel(-1);
 
@@ -832,9 +669,8 @@ vector<NiftiImage> reg_f3d2<T>::GetWarpedImage() {
     };
 
     DeinitCurrentLevel(-1);
-#ifndef NDEBUG
-    reg_print_fct_debug("reg_f3d2<T>::GetWarpedImage");
-#endif
+
+    NR_FUNC_CALLED();
     return warpedImage;
 }
 /* *************************************************************** */
diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp
index dd01abca..27569d2c 100644
--- a/reg-lib/_reg_polyAffine.cpp
+++ b/reg-lib/_reg_polyAffine.cpp
@@ -19,19 +19,14 @@ reg_polyAffine<T>::reg_polyAffine(int refTimePoint,int floTimePoint)
    : reg_base<T>::reg_base(refTimePoint,floTimePoint)
 {
    this->executableName=(char *)"NiftyReg PolyAffine";
-
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_polyAffine constructor called");
-#endif
+   NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
 reg_polyAffine<T>::~reg_polyAffine()
 {
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_polyAffine destructor called");
-#endif
+   NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index 0ffd4234..a7c33a51 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -15,15 +15,11 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
     std::string clSrcPath;
     //src dir
     if (niftyreg_src_dir != nullptr) {
-        char opencl_kernel_path[255];
-        sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
-        clSrcPath = opencl_kernel_path;
+        clSrcPath = niftyreg_src_dir + "/reg-lib/cl/"s;
     } else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
     if (niftyreg_install_dir != nullptr) {
-        char opencl_kernel_path[255];
-        sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
-        clInstallPath = opencl_kernel_path;
+        clInstallPath = niftyreg_install_dir + "/include/cl/"s;
     } else clInstallPath = CL_KERNELS_PATH;
 
     std::string clKernel("affineDeformationKernel.cl");
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index ccdb1238..f15eee35 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -236,34 +236,28 @@ template<class DataType>
 DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
     switch (datatype) {
     case NIFTI_TYPE_FLOAT32:
-        return static_cast<float>(intensity);
-        break;
+        return static_cast<DataType>(intensity);
     case NIFTI_TYPE_FLOAT64:
-        return static_cast<double>(intensity);
-        break;
+        return static_cast<DataType>(intensity);
     case NIFTI_TYPE_UINT8:
         if (intensity != intensity)
             intensity = 0;
         intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
         return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
-        break;
     case NIFTI_TYPE_UINT16:
         if (intensity != intensity)
             intensity = 0;
         intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
         return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
-        break;
     case NIFTI_TYPE_UINT32:
         if (intensity != intensity)
             intensity = 0;
         intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
         return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
-        break;
     default:
         if (intensity != intensity)
             intensity = 0;
         return static_cast<DataType>(reg_round(intensity));
-        break;
     }
 }
 /* *************************************************************** */
@@ -272,11 +266,8 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int
     size_t size = image->nvox;
     float* buffer = nullptr;
     buffer = (float*)malloc(size * sizeof(float));
-    if (buffer == nullptr) {
-        reg_print_fct_error("ClAladinContent::FillImageData");
-        reg_print_msg_error("Memory allocation did not complete successfully. Exit.");
-        reg_exit();
-    }
+    if (buffer == nullptr)
+        NR_FATAL_ERROR("Memory allocation did not complete successfully");
 
     errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0,
                                  size * sizeof(float), buffer, 0, nullptr, nullptr);
@@ -319,10 +310,7 @@ void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int
         FillImageData<int>(image, memoryObject, datatype);
         break;
     default:
-        reg_print_fct_error("ClAladinContent::DownloadImage");
-        reg_print_msg_error("Unsupported type");
-        reg_exit();
-        break;
+        NR_FATAL_ERROR("Unsupported type");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index 9cea76c7..06002aa9 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -15,15 +15,11 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
    std::string clSrcPath;
    //src dir
    if (niftyreg_src_dir != nullptr) {
-      char opencl_kernel_path[255];
-      sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
-      clSrcPath = opencl_kernel_path;
+      clSrcPath = niftyreg_src_dir + "/reg-lib/cl/"s;
    } else clSrcPath = CL_KERNELS_SRC_PATH;
    //install dir
    if (niftyreg_install_dir != nullptr) {
-      char opencl_kernel_path[255];
-      sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
-      clInstallPath = opencl_kernel_path;
+      clInstallPath = niftyreg_install_dir + "/include/cl/"s;
    } else clInstallPath = CL_KERNELS_PATH;
    std::string clKernel("blockMatchingKernel.cl");
    //Let's check if we did an install
@@ -65,10 +61,8 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
 }
 /* *************************************************************** */
 void ClBlockMatchingKernel::Calculate() {
-   if (params->stepSize != 1 || params->voxelCaptureRange != 3) {
-      reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1");
-      reg_exit();
-   }
+   if (params->stepSize != 1 || params->voxelCaptureRange != 3)
+      NR_FATAL_ERROR("The block matching OpenCL kernel supports only a single step size");
    cl_int errNum;
    params->definedActiveBlockNumber = 0;
    cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
@@ -123,10 +117,9 @@ void ClBlockMatchingKernel::Calculate() {
                                 &(params->definedActiveBlockNumber), 0, nullptr, nullptr);
    sContext->CheckErrNum(errNum, "Error reading  var after ClBlockMatchingKernel execution ");
 
-   if (params->definedActiveBlockNumber == 0) {
-      reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution");
-      reg_exit();
-   }
+   if (params->definedActiveBlockNumber == 0)
+      NR_FATAL_ERROR("Unexpected error in the ClBlockMatchingKernel execution");
+
    clReleaseMemObject(cldefinedBlock);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h
index b80c687e..cc6f8620 100644
--- a/reg-lib/cl/ClContentCreatorFactory.h
+++ b/reg-lib/cl/ClContentCreatorFactory.h
@@ -10,9 +10,8 @@ class ClContentCreatorFactory: public ContentCreatorFactory {
         case ContentType::Aladin:
             return new ClAladinContentCreator();
         default:
-            reg_print_fct_error("ClContentFactory::Produce");
-            reg_print_msg_error("Unsupported content type");
-            reg_exit();
+            NR_FATAL_ERROR("Unsupported content type");
+            return nullptr;
         }
     }
 };
diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp
index c3d3d1fc..c9deb205 100644
--- a/reg-lib/cl/ClContextSingleton.cpp
+++ b/reg-lib/cl/ClContextSingleton.cpp
@@ -29,10 +29,10 @@ void ClContextSingleton::Init() {
     this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum);
 
     if (errNum != CL_SUCCESS) {
-        std::cout << "Could not create GPU context, trying CPU..." << std::endl;
+        NR_WARN("Could not create GPU context, trying CPU...");
         context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &errNum);
         if (errNum != CL_SUCCESS) {
-            std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
+            NR_ERROR("Failed to create an OpenCL GPU or CPU context");
             return;
         }
     }
@@ -101,11 +101,8 @@ void ClContextSingleton::PickCard(cl_uint deviceId) {
             this->isCardDoubleCapable = false;
         }
         return;
-    } else if (deviceId != 999) {
-        reg_print_msg_error("The specified opencl card id is not defined");
-        reg_print_msg_error("Run reg_gpuinfo to get the proper id");
-        reg_exit();
-    }
+    } else if (deviceId != 999)
+        NR_FATAL_ERROR("The specified OpenCL card ID is not defined! Run reg_gpuinfo to get the proper ID.");
 
     for (cl_uint i = 0; i < this->numDevices; ++i) {
         cl_device_type dev_type;
@@ -151,7 +148,7 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) {
     cl_program program;
     std::ifstream kernelFile(fileName, std::ios::in);
     if (!kernelFile.is_open()) {
-        std::cerr << "Failed to open file for reading: " << fileName << std::endl;
+        NR_ERROR("Failed to open file for reading: " << fileName);
         return nullptr;
     }
     std::ostringstream oss;
@@ -163,97 +160,93 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) {
 
     errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
     if (errNum != CL_SUCCESS) {
-        CheckDebugKernelInfo(program, this->deviceId, (char*)"Errors in kernel: ");
+        CheckDebugKernelInfo(program, this->deviceId, "Errors in kernel: ");
         //create log
-        size_t length;
         char buffer[2048];
-        clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length);
-        std::cout << "--- Build log ---\n " << buffer << std::endl;
-        reg_exit();
+        clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
+        NR_FATAL_ERROR("--- Build log ---\n"s + buffer);
     }
 
     return program;
 }
 /* *************************************************************** */
 ClContextSingleton::~ClContextSingleton() {
-    /*std::cout << "Shutting down cl" << std::endl;*/
     if (this->context != 0) clReleaseContext(this->context);
     if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue);
     delete[] this->devices;
 }
 /* *************************************************************** */
-void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) {
+void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message) {
     char buffer[10240];
     clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr);
-    reg_print_fct_error(message);
-    reg_print_fct_error(buffer);
+    NR_ERROR(message);
+    NR_ERROR(buffer);
 }
 /* *************************************************************** */
 void ClContextSingleton::CheckErrNum(cl_int errNum, std::string message) {
     if (errNum != CL_SUCCESS) {
-        reg_print_msg_error(message.c_str());
+        NR_ERROR(message);
         switch (errNum) {
-        case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND"); break;
-        case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE"); break;
-        case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE"); break;
-        case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE"); break;
-        case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES"); break;
-        case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY"); break;
-        case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE"); break;
-        case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP"); break;
-        case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH"); break;
-        case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED"); break;
-        case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE"); break;
-        case -12: reg_print_msg_error("CL_MAP_FAILURE"); break;
-        case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET"); break;
-        case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); break;
-        case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE"); break;
-        case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE"); break;
-        case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE"); break;
-        case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED"); break;
-        case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE"); break;
-        case -30: reg_print_msg_error("CL_INVALID_VALUE"); break;
-        case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE"); break;
-        case -32: reg_print_msg_error("CL_INVALID_PLATFORM"); break;
-        case -33: reg_print_msg_error("CL_INVALID_DEVICE"); break;
-        case -34: reg_print_msg_error("CL_INVALID_CONTEXT"); break;
-        case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES"); break;
-        case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE"); break;
-        case -37: reg_print_msg_error("CL_INVALID_HOST_PTR"); break;
-        case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT"); break;
-        case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); break;
-        case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE"); break;
-        case -41: reg_print_msg_error("CL_INVALID_SAMPLER"); break;
-        case -42: reg_print_msg_error("CL_INVALID_BINARY"); break;
-        case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS"); break;
-        case -44: reg_print_msg_error("CL_INVALID_PROGRAM"); break;
-        case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE"); break;
-        case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME"); break;
-        case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION"); break;
-        case -48: reg_print_msg_error("CL_INVALID_KERNEL"); break;
-        case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX"); break;
-        case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE"); break;
-        case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE"); break;
-        case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS"); break;
-        case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION"); break;
-        case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE"); break;
-        case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE"); break;
-        case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET"); break;
-        case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST"); break;
-        case -58: reg_print_msg_error("CL_INVALID_EVENT"); break;
-        case -59: reg_print_msg_error("CL_INVALID_OPERATION"); break;
-        case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT"); break;
-        case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE"); break;
-        case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL"); break;
-        case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE"); break;
-        case -64: reg_print_msg_error("CL_INVALID_PROPERTY"); break;
-        case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR"); break;
-        case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS"); break;
-        case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS"); break;
-        case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT"); break;
-        default: reg_print_msg_error("Unknown error type"); break;
+        case -1: NR_FATAL_ERROR("CL_DEVICE_NOT_FOUND");
+        case -2: NR_FATAL_ERROR("CL_DEVICE_NOT_AVAILABLE");
+        case -3: NR_FATAL_ERROR("CL_COMPILER_NOT_AVAILABLE");
+        case -4: NR_FATAL_ERROR("CL_MEM_OBJECT_ALLOCATION_FAILURE");
+        case -5: NR_FATAL_ERROR("CL_OUT_OF_RESOURCES");
+        case -6: NR_FATAL_ERROR("CL_OUT_OF_HOST_MEMORY");
+        case -7: NR_FATAL_ERROR("CL_PROFILING_INFO_NOT_AVAILABLE");
+        case -8: NR_FATAL_ERROR("CL_MEM_COPY_OVERLAP");
+        case -9: NR_FATAL_ERROR("CL_IMAGE_FORMAT_MISMATCH");
+        case -10: NR_FATAL_ERROR("CL_IMAGE_FORMAT_NOT_SUPPORTED");
+        case -11: NR_FATAL_ERROR("CL_BUILD_PROGRAM_FAILURE");
+        case -12: NR_FATAL_ERROR("CL_MAP_FAILURE");
+        case -13: NR_FATAL_ERROR("CL_MISALIGNED_SUB_BUFFER_OFFSET");
+        case -14: NR_FATAL_ERROR("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST");
+        case -15: NR_FATAL_ERROR("CL_COMPILE_PROGRAM_FAILURE");
+        case -16: NR_FATAL_ERROR("CL_LINKER_NOT_AVAILABLE");
+        case -17: NR_FATAL_ERROR("CL_LINK_PROGRAM_FAILURE");
+        case -18: NR_FATAL_ERROR("CL_DEVICE_PARTITION_FAILED");
+        case -19: NR_FATAL_ERROR("CL_KERNEL_ARG_INFO_NOT_AVAILABLE");
+        case -30: NR_FATAL_ERROR("CL_INVALID_VALUE");
+        case -31: NR_FATAL_ERROR("CL_INVALID_DEVICE_TYPE");
+        case -32: NR_FATAL_ERROR("CL_INVALID_PLATFORM");
+        case -33: NR_FATAL_ERROR("CL_INVALID_DEVICE");
+        case -34: NR_FATAL_ERROR("CL_INVALID_CONTEXT");
+        case -35: NR_FATAL_ERROR("CL_INVALID_QUEUE_PROPERTIES");
+        case -36: NR_FATAL_ERROR("CL_INVALID_COMMAND_QUEUE");
+        case -37: NR_FATAL_ERROR("CL_INVALID_HOST_PTR");
+        case -38: NR_FATAL_ERROR("CL_INVALID_MEM_OBJECT");
+        case -39: NR_FATAL_ERROR("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR");
+        case -40: NR_FATAL_ERROR("CL_INVALID_IMAGE_SIZE");
+        case -41: NR_FATAL_ERROR("CL_INVALID_SAMPLER");
+        case -42: NR_FATAL_ERROR("CL_INVALID_BINARY");
+        case -43: NR_FATAL_ERROR("CL_INVALID_BUILD_OPTIONS");
+        case -44: NR_FATAL_ERROR("CL_INVALID_PROGRAM");
+        case -45: NR_FATAL_ERROR("CL_INVALID_PROGRAM_EXECUTABLE");
+        case -46: NR_FATAL_ERROR("CL_INVALID_KERNEL_NAME");
+        case -47: NR_FATAL_ERROR("CL_INVALID_KERNEL_DEFINITION");
+        case -48: NR_FATAL_ERROR("CL_INVALID_KERNEL");
+        case -49: NR_FATAL_ERROR("CL_INVALID_ARG_INDEX");
+        case -50: NR_FATAL_ERROR("CL_INVALID_ARG_VALUE");
+        case -51: NR_FATAL_ERROR("CL_INVALID_ARG_SIZE");
+        case -52: NR_FATAL_ERROR("CL_INVALID_KERNEL_ARGS");
+        case -53: NR_FATAL_ERROR("CL_INVALID_WORK_DIMENSION");
+        case -54: NR_FATAL_ERROR("CL_INVALID_WORK_GROUP_SIZE");
+        case -55: NR_FATAL_ERROR("CL_INVALID_WORK_ITEM_SIZE");
+        case -56: NR_FATAL_ERROR("CL_INVALID_GLOBAL_OFFSET");
+        case -57: NR_FATAL_ERROR("CL_INVALID_EVENT_WAIT_LIST");
+        case -58: NR_FATAL_ERROR("CL_INVALID_EVENT");
+        case -59: NR_FATAL_ERROR("CL_INVALID_OPERATION");
+        case -60: NR_FATAL_ERROR("CL_INVALID_GL_OBJECT");
+        case -61: NR_FATAL_ERROR("CL_INVALID_BUFFER_SIZE");
+        case -62: NR_FATAL_ERROR("CL_INVALID_MIP_LEVEL");
+        case -63: NR_FATAL_ERROR("CL_INVALID_GLOBAL_WORK_SIZE");
+        case -64: NR_FATAL_ERROR("CL_INVALID_PROPERTY");
+        case -65: NR_FATAL_ERROR("CL_INVALID_IMAGE_DESCRIPTOR");
+        case -66: NR_FATAL_ERROR("CL_INVALID_COMPILER_OPTIONS");
+        case -67: NR_FATAL_ERROR("CL_INVALID_LINKER_OPTIONS");
+        case -68: NR_FATAL_ERROR("CL_INVALID_DEVICE_PARTITION_COUNT");
+        default: NR_FATAL_ERROR("Unknown error type");
         }
-        reg_exit();
     }
 }
 /* *************************************************************** */
@@ -322,12 +315,12 @@ cl_kernel ClContextSingleton::DummyKernel(cl_device_id deviceIdIn) {
     cl_program program = clCreateProgramWithSource(this->context, 1, (const char **)&source, nullptr, &err);
     CheckErrNum(err, "Failed to create CL program");
     err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
-    if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, (char *)"Errors in kernel: ");
+    if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, "Errors in kernel: ");
 
     // Create the compute kernel in the program we wish to run
     cl_kernel kernel = clCreateKernel(program, "dummy", &err);
     if (!kernel || err != CL_SUCCESS) {
-        reg_print_fct_error("Error: Failed to create compute kernel!");
+        NR_ERROR("Failed to create the compute kernel!");
         return nullptr;
     }
     return kernel;
diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h
index c574933d..2da4247e 100644
--- a/reg-lib/cl/ClContextSingleton.h
+++ b/reg-lib/cl/ClContextSingleton.h
@@ -6,7 +6,7 @@
 #include <CL/cl.h>
 #endif
 
-#include "_reg_maths.h"
+#include "_reg_tools.h"
 
 #include <string>
 #include <iostream>
@@ -48,7 +48,7 @@ class ClContextSingleton {
 
     void Init();
     void PickCard(cl_uint deviceId);
-    void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char *message);
+    void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message);
     void QueryGridDims();
 
     cl_context context;
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index b22671b9..4867af20 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -9,30 +9,26 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern
     ClAladinContent *con = static_cast<ClAladinContent*>(conIn);
 
     //path to kernel file
-    const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR");
-    const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR");
+    const char *nrInstallDir = getenv("NIFTYREG_INSTALL_DIR");
+    const char *nrSrcDir = getenv("NIFTYREG_SRC_DIR");
 
     std::string clInstallPath;
     std::string clSrcPath;
     //src dir
-    if (niftyreg_src_dir != nullptr) {
-        char opencl_kernel_path[255];
-        sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir);
-        clSrcPath = opencl_kernel_path;
+    if (nrSrcDir != nullptr) {
+        clSrcPath = nrSrcDir + "/reg-lib/cl/"s;
     } else clSrcPath = CL_KERNELS_SRC_PATH;
     //install dir
-    if (niftyreg_install_dir != nullptr) {
-        char opencl_kernel_path[255];
-        sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir);
-        clInstallPath = opencl_kernel_path;
+    if (nrInstallDir != nullptr) {
+        clInstallPath = nrInstallDir + "/include/cl/"s;
     } else clInstallPath = CL_KERNELS_PATH;
     std::string clKernel("resampleKernel.cl");
     //Let's check if we did an install
     std::string clKernelPath = (clInstallPath + clKernel);
     std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in);
     if (kernelFile.is_open() == 0) {
-        //"clKernel.cl propbably not installed - let's use the src location"
-        clKernelPath = (clSrcPath + clKernel);
+        //"clKernel.cl probably not installed - let's use the src location"
+        clKernelPath = clSrcPath + clKernel;
     }
 
     //get opencl context params
@@ -63,11 +59,8 @@ void ClResampleImageKernel::Calculate(int interp,
                                       mat33 *jacMat) {
     cl_int errNum;
     // Define the DTI indices if required
-    if (dti_timepoint != nullptr || jacMat != nullptr) {
-        reg_print_fct_error("ClResampleImageKernel::calculate");
-        reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit.");
-        reg_exit();
-    }
+    if (dti_timepoint != nullptr || jacMat != nullptr)
+        NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the OpenCL platform");
 
     if (this->floatingImage->nz > 1) {
         this->kernel = clCreateKernel(program, "ResampleImage3D", &errNum);
@@ -75,9 +68,7 @@ void ClResampleImageKernel::Calculate(int interp,
         //2D case
         this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum);
     } else {
-        reg_print_fct_error("ClResampleImageKernel::calculate");
-        reg_print_msg_error("The image dimension is not supported. Exit.");
-        reg_exit();
+        NR_FATAL_ERROR("The image dimension is not supported");
     }
     sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage.");
 
diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h
index a4831445..a4f7a70f 100644
--- a/reg-lib/cl/InfoDevice.h
+++ b/reg-lib/cl/InfoDevice.h
@@ -36,7 +36,7 @@ class DeviceLog {
 				appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo);
 				appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo);
 				appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo);
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
 			}
 			break;
 		case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: {
@@ -45,7 +45,7 @@ class DeviceLog {
 				appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo);
 				appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo);
 
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
 			}
 			break;
 		case CL_DEVICE_LOCAL_MEM_TYPE: {
@@ -53,7 +53,7 @@ class DeviceLog {
 				appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo);
 				appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo);
 
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
 			}
 			break;
 		case CL_DEVICE_EXECUTION_CAPABILITIES: {
@@ -63,7 +63,7 @@ class DeviceLog {
 				appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo);
 				appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo);
 
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
 			}
 			break;
 		case CL_DEVICE_QUEUE_PROPERTIES: {
@@ -71,17 +71,17 @@ class DeviceLog {
 				appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field)) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo);
 				appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field)) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo);
 
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
 			}
 			break;
 		case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
 				cl_uint maxWorkItemDimensions;
 
 				sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
-				std::cout << str << ":\t";
+				NR_COUT << str << ":\t";
 				for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
-					std::cout << field[i] << " ";
-				std::cout << std::endl;
+					NR_COUT << field[i] << " ";
+				NR_COUT << std::endl;
 			}
 			break;
 
@@ -89,11 +89,11 @@ class DeviceLog {
 		case CL_DEVICE_VENDOR:
 		case CL_DRIVER_VERSION:
 		case CL_DEVICE_VERSION: {
-				std::cout << "[NiftyReg OPENCL] " << str << ": " << field << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field << std::endl;
 			}
 			break;
 		default:
-			std::cout << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl;
+			NR_COUT << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl;
 			break;
 		}
 	}
@@ -108,12 +108,12 @@ class DeviceLog {
 		switch (name) {
 		case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
 				if (errNum != CL_SUCCESS)  local = 1;
-				std::cout << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl;
+				NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl;
 			}
 			break;
 			break;
 		default:
-			std::cout << "[NiftyReg OPENCL] " << str << ": " << local << std::endl;
+			NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl;
 			break;
 		}
 	}
diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp
index ee0d9671..c5cf382a 100644
--- a/reg-lib/cl/_reg_openclinfo.cpp
+++ b/reg-lib/cl/_reg_openclinfo.cpp
@@ -1,6 +1,7 @@
-#include "_reg_openclinfo.h"
+#include <iostream>
+#include "InfoDevice.h"
 
-void showCLInfo(void)
+void showCLInfo()
 {
    ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
    cl_uint numPlatforms = sContext->GetNumPlatforms();
@@ -9,13 +10,13 @@ void showCLInfo(void)
    {
       cl_uint numDevices = sContext->GetNumDevices();
       cl_device_id * devices = sContext->GetDevices();
-      printf("-----------------------------------\n");
-      printf("[NiftyReg OPENCL] %i device(s) detected\n", numDevices);
-      printf("-----------------------------------\n");
+      NR_COUT << "-----------------------------------" << std::endl;
+      NR_COUT << "[NiftyReg OPENCL] " << numDevices << "device(s) detected" << std::endl;
+      NR_COUT << "-----------------------------------" << std::endl;
       // Iterate through each device, displaying associated information
       for (cl_uint j = 0; j < numDevices; j++)
       {
-         printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned)j);
+         NR_COUT << "[NiftyReg OPENCL] Device id " << j << std::endl;
          DeviceLog<char >::show(devices[j], CL_DEVICE_NAME, "Device Name");
 //         DeviceLog<char >::show(devices[j], CL_DEVICE_VENDOR, "**** CL_DEVICE_VENDOR");
 //         DeviceLog<char >::show(devices[j], CL_DRIVER_VERSION, "**** CL_DRIVER_VERSION");
@@ -43,7 +44,7 @@ void showCLInfo(void)
 #else
          DeviceLog<int>::show(devices[j], CL_DEVICE_SINGLE_FP_CONFIG, "Device single config only");
 #endif
-         printf("-----------------------------------\n");
+         NR_COUT << "-----------------------------------" << std::endl;
       }
    }
 }
diff --git a/reg-lib/cl/_reg_openclinfo.h b/reg-lib/cl/_reg_openclinfo.h
index 56f895e9..50a1b5c2 100644
--- a/reg-lib/cl/_reg_openclinfo.h
+++ b/reg-lib/cl/_reg_openclinfo.h
@@ -1,6 +1,3 @@
 #pragma once
 
-#include <iostream>
-#include "InfoDevice.h"
-
-void showCLInfo(void);
+void showCLInfo();
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h
index d923f5ed..3626d908 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.h
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.h
@@ -1,8 +1,6 @@
 #pragma once
 
 #include "BlockMatchingKernel.h"
-#include "_reg_blockMatching.h"
-#include "niftilib/nifti1_io.h"
 #include "AladinContent.h"
 
 class CpuBlockMatchingKernel: public BlockMatchingKernel {
diff --git a/reg-lib/cpu/CpuLtsKernel.h b/reg-lib/cpu/CpuLtsKernel.h
index 8bb4c26e..4f808dff 100644
--- a/reg-lib/cpu/CpuLtsKernel.h
+++ b/reg-lib/cpu/CpuLtsKernel.h
@@ -1,8 +1,6 @@
 #pragma once
 
 #include "LtsKernel.h"
-#include "_reg_blockMatching.h"
-#include "niftilib/nifti1_io.h"
 #include "AladinContent.h"
 
 class CpuLtsKernel: public LtsKernel {
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 907f932f..8e70f957 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -260,29 +260,16 @@ void initialise_block_matching_method(nifti_image * reference,
       _reg_set_active_blocks<double>(reference, params, mask, runningOnGPU);
       break;
    default:
-      reg_print_fct_error("initialise_block_matching_method()");
-      reg_print_msg_error("The reference image data type is not supported");
-      reg_exit();
-      ;
+      NR_FATAL_ERROR("The reference image data type is not supported");
    }
-   if (params->activeBlockNumber < 2) {
-      reg_print_fct_error("initialise_block_matching_method()");
-      reg_print_msg_error("There are less than 2 active blocks");
-      reg_exit();
-   }
-#ifndef NDEBUG
-   char text[255];
-   sprintf(text, "There are %i active block(s) out of %i.",
-           params->activeBlockNumber, params->totalBlockNumber);
-   reg_print_msg_debug(text)
-      #endif
+   if (params->activeBlockNumber < 2)
+      NR_FATAL_ERROR("There are less than 2 active blocks");
+   NR_DEBUG("There are " << params->activeBlockNumber << " active block(s) out of " << params->totalBlockNumber);
          //params->activeBlock = (int *)malloc(params->activeBlockNumber * sizeof(int));
    params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float));
    params->warpedPosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float));
 
-#ifndef NDEBUG
-   reg_print_msg_debug("block matching initialisation done.");
-#endif
+   NR_DEBUG("Block matching initialisation done");
 }
 /* *************************************************************** */
 /* *************************************************************** */
@@ -704,10 +691,8 @@ void block_matching_method3D(nifti_image * reference,
 /* *************************************************************** */
 // Block matching interface function
 void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_blockMatchingParam *params, int *mask) {
-   if (reference->datatype != warped->datatype) {
-      reg_print_fct_error("block_matching_method");
-      reg_print_msg_error("Both input images are expected to be of the same type");
-   }
+   if (reference->datatype != warped->datatype)
+      NR_FATAL_ERROR("Both input images are expected to be of the same type");
    if (reference->nz == 1) {
       switch (reference->datatype) {
       case NIFTI_TYPE_FLOAT64:
@@ -717,9 +702,7 @@ void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_b
          block_matching_method2D<float>(reference, warped, params, mask);
          break;
       default:
-         reg_print_fct_error("block_matching_method");
-         reg_print_msg_error("The reference image data type is not supported");
-         reg_exit();
+         NR_FATAL_ERROR("The reference image data type is not supported");
       }
    } else {
       switch (reference->datatype) {
@@ -730,9 +713,7 @@ void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_b
          block_matching_method3D<float>(reference, warped, params, mask);
          break;
       default:
-         reg_print_fct_error("block_matching_method");
-         reg_print_msg_error("The reference image data type is not supported");
-         reg_exit();
+         NR_FATAL_ERROR("The reference image data type is not supported");
       }
    }
 }
@@ -753,20 +734,14 @@ void optimize(_reg_blockMatchingParam *params,
          //3 = minimum number of correspondences needed
          if(params->definedActiveBlockNumber < 6)
          {
-            char text[255];
-            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
-            reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation");
-            reg_exit();
+            NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found");
+            NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate an affine transformation");
          }
       } else {
          if(params->definedActiveBlockNumber < 4)
          {
-            char text[255];
-            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
-            reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
-            reg_exit();
+            NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found");
+            NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
          }
       }
 
@@ -803,20 +778,14 @@ void optimize(_reg_blockMatchingParam *params,
          //4 = minimum number of correspondences needed
          if(params->definedActiveBlockNumber < 8)
          {
-            char text[255];
-            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
-            reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation");
-            reg_exit();
+            NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found");
+            NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate an affine transformation");
          }
       } else {
          if(params->definedActiveBlockNumber < 4)
          {
-            char text[255];
-            sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber);
-            reg_print_msg_error(text);
-            reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
-            reg_exit();
+            NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found");
+            NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate a rigid transformation");
          }
       }
 
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index d8ba9e84..93ed99b8 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -17,11 +17,9 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure,
    this->regularisation_weight = _reg_weight;
    this->reg_max_it = _reg_max_it;
 
-   if(this->discrete_radius/this->discrete_increment !=
-      (float)this->discrete_radius/(float)this->discrete_increment){
-      reg_print_fct_error("reg_discrete_init:reg_discrete_init()");
-      reg_print_msg_error("The discrete_radius is expected to be a multiple of discretise_increment");
-   }
+   if (this->discrete_radius / this->discrete_increment !=
+       (float)this->discrete_radius / (float)this->discrete_increment)
+      NR_FATAL_ERROR("The discrete_radius is expected to be a multiple of discretise_increment");
 
    this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
@@ -136,9 +134,7 @@ void reg_discrete_init::GetDiscretisedMeasure()
                                 this->discretised_measures,
                                 this->discrete_radius,
                                 this->discrete_increment);
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_discrete_init::GetDiscretisedMeasure done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 /*****************************************************/
@@ -156,9 +152,7 @@ void reg_discrete_init::GetOptimalLabel()
       if(current_optimal != opt_label)
          ++this->regularisation_convergence;
    }
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_discrete_init::getOptimalLabel done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 /*****************************************************/
@@ -190,9 +184,7 @@ void reg_discrete_init::UpdateTransformation()
       }
    }
 
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_discrete_init::UpdateTransformation done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 /*****************************************************/
@@ -363,24 +355,17 @@ void reg_discrete_init::GetRegularisedMeasure()
    } // z
    reg_getDeformationFromDisplacement(this->controlPointImage);
    reg_getDeformationFromDisplacement(this->input_transformation);
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_discrete_init::GetRegularisedMeasure done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 /*****************************************************/
 void reg_discrete_init::Run()
 {
-   char text[255];
-   sprintf(text, "Control point number = %lu", this->node_number);
-   reg_print_info("reg_discrete_init", text);
-   sprintf(text, "Discretised radius (voxel) = %i", this->discrete_radius);
-   reg_print_info("reg_discrete_init", text);
-   sprintf(text, "Discretised step (voxel) = %i", this->discrete_increment);
-   reg_print_info("reg_discrete_init", text);
-   sprintf(text, "Discretised label number = %i", this->label_nD_num);
-   reg_print_info("reg_discrete_init", text);
-   // Store the intial transformation parametrisation
+   NR_VERBOSE("Control point number = " << this->node_number);
+   NR_VERBOSE("Discretised radius (voxel) = " << this->discrete_radius);
+   NR_VERBOSE("Discretised step (voxel) = " << this->discrete_increment);
+   NR_VERBOSE("Discretised label number = " << this->label_nD_num);
+   // Store the initial transformation parametrisation
    memcpy(this->input_transformation->data, this->controlPointImage->data,
           this->node_number*this->image_dim*sizeof(float));
    // Compute the discretised data term values
@@ -400,17 +385,13 @@ void reg_discrete_init::Run()
       this->GetRegularisedMeasure();
       this->GetOptimalLabel();
       this->UpdateTransformation();
-      sprintf(text, "Regularisation %i/%i - BE=%.2f - [%2.2f%%]",
-             i+1, this->reg_max_it,
-             reg_spline_approxBendingEnergy(this->controlPointImage),
-             100.f*(float)this->regularisation_convergence/this->node_number);
-      reg_print_info("reg_discrete_init", text);
+      NR_VERBOSE("Regularisation " << i+1 << "/" << this->reg_max_it <<
+                 " - BE=" << reg_spline_approxBendingEnergy(this->controlPointImage) <<
+                 " - [" << 100.f*(float)this->regularisation_convergence/this->node_number << "%]");
       //if(this->regularisation_convergence<this->node_number/100)
       //   break;
    }
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_discrete_init::Run done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 /*****************************************************/
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 1196f47b..a197b559 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -14,9 +14,7 @@
 
 /* *************************************************************** */
 reg_dti::reg_dti(): reg_measure() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_dti constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 // This function is directly the same as that used for reg_ssd
@@ -45,11 +43,8 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
                                    voxelBasedGradBw);
 
     // Check that the input images have the same number of time point
-    if (this->referenceImage->nt != this->floatingImage->nt) {
-        reg_print_fct_error("reg_dti::InitialiseMeasure");
-        reg_print_msg_error("This number of time point should be the same for both input images");
-        reg_exit();
-    }
+    if (this->referenceImage->nt != this->floatingImage->nt)
+        NR_FATAL_ERROR("This number of time point should be the same for both input images");
 
     int j = 0;
     for (int i = 0; i < refImg->nt; ++i) {
@@ -57,19 +52,13 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
         // any value > 0 indicates the 'time point' is active
         if (this->timePointWeight[i] > 0) {
             this->dtIndicies[j++] = i;
-#ifndef NDEBUG
-            reg_print_msg_debug("reg_dti::InitialiseMeasure()");
-            char text[255];
-            sprintf(text, "Active time point: %i", i);
-            reg_print_msg_debug(text);
-#endif
+            NR_DEBUG("Active time point: " << i);
         }
     }
-    if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3)) {
-        reg_print_fct_error("reg_dti::InitialiseMeasure");
-        reg_print_msg_error("Unexpected number of DTI components");
-        reg_exit();
-    }
+    if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3))
+        NR_FATAL_ERROR("Unexpected number of DTI components");
+
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class DataType>
@@ -194,8 +183,7 @@ void reg_getVoxelBasedDtiMeasureGradient(const nifti_image *referenceImage,
     const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]];
 
     // THE FOLLOWING IS WRONG
-    reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
-    reg_exit();
+    NR_FATAL_ERROR("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX");
     const size_t gradientVoxels = (size_t)warpedGradient->nu * voxelNumber;
     const DataType *firstGradVox = static_cast<DataType*>(warpedGradient->data);
     const DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]];
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index 4e2dc22c..ccf9b6cc 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -11,7 +11,6 @@
  */
 
 #include "_reg_femTrans.h"
-#include "_reg_tools.h"
 
 float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4)
 {
diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h
index 3c0802d5..d9ee6861 100644
--- a/reg-lib/cpu/_reg_femTrans.h
+++ b/reg-lib/cpu/_reg_femTrans.h
@@ -15,9 +15,7 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
-#include <fstream>
-#include "_reg_maths.h"
+#include "_reg_tools.h"
 
 /** @brief Initialise multiples arrays to populate a dense deformation
  * field from a FEM parametrisation
@@ -36,8 +34,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
                                       float *nodePositions,
                                       nifti_image *deformationFieldImage,
                                       unsigned *closestNodes,
-                                      float *femInterpolationWeight
-                                     );
+                                      float *femInterpolationWeight);
 
 /** @brief A dense deformation field is filled using interpolation
  * from a coarse mesh
@@ -52,8 +49,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
 void reg_fem_getDeformationField(float *nodePositions,
                                  nifti_image *deformationFieldImage,
                                  unsigned *closestNodes,
-                                 float *femInterpolationWeight
-                                );
+                                 float *femInterpolationWeight);
 
 /** @brief Convert a dense gradient image into a mesh based gradient image
  * @param voxelBasedGradient Image that contains the gradient image
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index 444f273c..e8988b75 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -38,9 +38,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
-#ifndef NDEBUG
-   reg_mat44_disp(&transformationMatrix, (char *)"[NiftyReg DEBUG] Global affine transformation");
-#endif
+   NR_MAT44(transformationMatrix, "Global affine transformation");
 
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0;
@@ -101,9 +99,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
-#ifndef NDEBUG
-   reg_mat44_disp(&transformationMatrix, (char *)"[NiftyReg DEBUG] Global affine transformation");
-#endif
+   NR_MAT44(transformationMatrix, "Global affine transformation");
 
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0, z=0;
@@ -166,9 +162,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
          reg_affine_deformationField2D<double>(affineTransformation, deformationField, compose, tempMask);
          break;
       default:
-         reg_print_fct_error("reg_affine_getDeformationField");
-         reg_print_msg_error("The deformation field data type is not supported");
-         reg_exit();
+         NR_FATAL_ERROR("The deformation field data type is not supported");
       }
    }
    else
@@ -182,9 +176,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation,
          reg_affine_deformationField3D<double>(affineTransformation, deformationField, compose, tempMask);
          break;
       default:
-         reg_print_fct_error("reg_affine_getDeformationField");
-         reg_print_msg_error("The deformation field data type is not supported");
-         reg_exit();
+         NR_FATAL_ERROR("The deformation field data type is not supported");
       }
    }
    if(mask==nullptr)
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index 06c47bbc..dd771a3b 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -14,8 +14,8 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
 #include "_reg_tools.h"
+
 /* *************************************************************** */
 /// @brief Structure that is used to store the distance between two corresponding voxel
 struct _reg_sorted_point3D
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index f94846a5..c202d0a5 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -14,9 +14,7 @@
 
 /* *************************************************************** */
 reg_kld::reg_kld(): reg_measure() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_kld constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_kld::InitialiseMeasure(nifti_image *refImg,
@@ -44,34 +42,24 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
                                    voxelBasedGradBw);
 
     // Check that the input images have the same number of time point
-    if (this->referenceImage->nt != this->floatingImage->nt) {
-        reg_print_fct_error("reg_kld::InitialiseMeasure");
-        reg_print_msg_error("This number of time point should be the same for both input images");
-        reg_exit();
-    }
-    // Input images are expected to be bounded between 0 and 1 as they
-    // are meant to be probabilities
+    if (this->referenceImage->nt != this->floatingImage->nt)
+        NR_FATAL_ERROR("This number of time point should be the same for both input images");
+
+    // Input images are expected to be bounded between 0 and 1 as they are meant to be probabilities
     for (int t = 0; t < this->referenceImage->nt; ++t) {
         if (this->timePointWeight[t] > 0) {
             const float minRef = reg_tools_getMinValue(this->referenceImage, t);
             const float maxRef = reg_tools_getMaxValue(this->referenceImage, t);
             const float minFlo = reg_tools_getMinValue(this->floatingImage, t);
             const float maxFlo = reg_tools_getMaxValue(this->floatingImage, t);
-            if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f) {
-                reg_print_fct_error("reg_kld::InitialiseMeasure");
-                reg_print_msg_error("The input images are expected to be probabilities to use the kld measure");
-                reg_exit();
-            }
+            if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f)
+                NR_FATAL_ERROR("The input images are expected to be probabilities to use the kld measure");
         }
     }
-#ifndef NDEBUG
-    char text[255];
-    reg_print_msg_debug("reg_kld::InitialiseMeasure()");
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-        reg_print_msg_debug(text);
-    }
-#endif
+
+    for (int i = 0; i < this->referenceImage->nt; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 /** @brief Computes and returns the KLD between two input image
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index f21fe4b3..dc61d5b7 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -33,9 +33,8 @@ reg_lncc::reg_lncc(): reg_measure() {
 
     for (int i = 0; i < 255; ++i)
         kernelStandardDeviation[i] = -5.f;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_lncc constructor called");
-#endif
+
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_lncc::~reg_lncc() {
@@ -186,14 +185,10 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
         // Allocate the array to store the mask of the backward image
         this->backwardMask = (int*)malloc(voxelNumber * sizeof(int));
     }
-#ifndef NDEBUG
-    char text[255];
-    reg_print_msg_debug("reg_lncc::InitialiseMeasure()");
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-        reg_print_msg_debug(text);
-    }
-#endif
+
+    for (int i = 0; i < this->referenceImage->nt; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class DataType>
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 35eb7c91..88088b73 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -139,16 +139,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     mat44 referenceImageSpace = referenceImage->qto_xyz;
     if (referenceImage->sform_code > 0)
         referenceImageSpace = referenceImage->sto_xyz;
-#ifndef NDEBUG
-    reg_mat44_disp(&referenceImageSpace, (char*)"[NiftyReg DEBUG] Input reference image orientation");
-#endif
+    NR_MAT44(referenceImageSpace, "Input reference image orientation");
     // // Get the floating image space
     mat44 floatingImageSpace = floatingImage->qto_xyz;
     if (floatingImage->sform_code > 0)
         floatingImageSpace = floatingImage->sto_xyz;
-#ifndef NDEBUG
-    reg_mat44_disp(&floatingImageSpace, (char*)"[NiftyReg DEBUG] Input floating image orientation");
-#endif
+    NR_MAT44(floatingImageSpace, "Input floating image orientation");
     // Check if an affine transformation is specified
     mat44 halfForwardAffine, halfBackwardAffine;
     if (forwardAffineTrans != nullptr) {
@@ -162,7 +158,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine);
         halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f);
         halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine);
-        reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation");
+        NR_WARN("Note that the symmetry of the registration is affected by the input affine transformation");
     } else {
         reg_mat44_eye(&halfForwardAffine);
         reg_mat44_eye(&halfBackwardAffine);
@@ -340,9 +336,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float));
         memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
         memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
-#ifndef NDEBUG
-        reg_mat44_disp(&halfForwardAffine, (char*)"[NiftyReg DEBUG] Forward transformation half-affine");
-#endif
+        NR_MAT44(halfForwardAffine, "Forward transformation half-affine");
         // Create extensions to store the affine parametrisations for the backward transformation
         backwardGridImage->num_ext = 2;
         backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension));
@@ -354,9 +348,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float));
         memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
         memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
-#ifndef NDEBUG
-        reg_mat44_disp(&halfBackwardAffine, (char*)"[NiftyReg DEBUG] Backward transformation half-affine");
-#endif
+        NR_MAT44(halfBackwardAffine, "Backward transformation half-affine");
     }
     // Initialise the grid with identity transformations
     reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f);
@@ -1439,18 +1431,12 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                                     bool composition,
                                     bool bspline,
                                     bool force_no_lut) {
-    if (splineControlPoint->datatype != deformationField->datatype) {
-        reg_print_fct_error("reg_spline_getDeformationField");
-        reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != deformationField->datatype)
+        NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type");
 
 #if _USE_SSE
-    if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) {
-        reg_print_fct_error("reg_spline_getDeformationField");
-        reg_print_msg_error("SSE computation has only been implemented for single precision");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32)
+        NR_FATAL_ERROR("SSE computation has only been implemented for single precision");
 #endif
 
     bool MrPropre = false;
@@ -1473,9 +1459,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
 
     if (splineControlPoint->intent_p1 == LIN_SPLINE_GRID) {
         if (splineControlPoint->nz == 1) {
-            reg_print_fct_error("reg_linear_spline_getDeformationField");
-            reg_print_msg_error("No 2D implementation yet");
-            reg_exit();
+            NR_FATAL_ERROR("No 2D implementation yet");
         } else {
             switch (deformationField->datatype) {
             case NIFTI_TYPE_FLOAT32:
@@ -1485,9 +1469,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                 reg_linear_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition);
                 break;
             default:
-                reg_print_fct_error("reg_linear_spline_getDeformationField");
-                reg_print_msg_error("Only single or double precision is implemented for deformation field");
-                reg_exit();
+                NR_FATAL_ERROR("Only single or double precision is implemented for deformation field");
             }
         }
     } else {
@@ -1500,9 +1482,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                 reg_cubic_spline_getDeformationField2D<double>(splineControlPoint, deformationField, mask, composition, bspline);
                 break;
             default:
-                reg_print_fct_error("reg_spline_getDeformationField");
-                reg_print_msg_error("Only single or double precision is implemented for deformation field");
-                reg_exit();
+                NR_FATAL_ERROR("Only single or double precision is implemented for deformation field");
             }
         } else {
             switch (deformationField->datatype) {
@@ -1513,9 +1493,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                 reg_cubic_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
                 break;
             default:
-                reg_print_fct_error("reg_spline_getDeformationField");
-                reg_print_msg_error("Only single or double precision is implemented for deformation field");
-                reg_exit();
+                NR_FATAL_ERROR("Only single or double precision is implemented for deformation field");
             }
         }
     }
@@ -1686,11 +1664,8 @@ void reg_voxelCentric2NodeCentric(nifti_image * nodeImage,
                                   float weight,
                                   bool update,
                                   const mat44 * voxelToMillimetre) {
-    if (nodeImage->datatype != voxelImage->datatype) {
-        reg_print_fct_error("reg_voxelCentric2NodeCentric");
-        reg_print_msg_error("Both input images do not have the same type");
-        reg_exit();
-    }
+    if (nodeImage->datatype != voxelImage->datatype)
+        NR_FATAL_ERROR("Both input images are expected to have the same data type");
 
     switch (nodeImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
@@ -1700,9 +1675,7 @@ void reg_voxelCentric2NodeCentric(nifti_image * nodeImage,
         reg_voxelCentric2NodeCentric<double>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
         break;
     default:
-        reg_print_fct_error("reg_voxelCentric2NodeCentric");
-        reg_print_msg_error("Data type not supported");
-        reg_exit();
+        NR_FATAL_ERROR("Data type not supported");
     }
 }
 /* *************************************************************** */
@@ -2135,11 +2108,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
 }
 /* *************************************************************** */
 extern "C++"
-void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid,
-                                       nifti_image * referenceImage) {
-#ifndef NDEBUG
-    reg_print_msg_debug("Starting the refine the control point grid");
-#endif
+void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
+                                       nifti_image *referenceImage) {
+    NR_DEBUG("Starting the refine the control point grid");
     if (controlPointGrid->nz == 1) {
         switch (controlPointGrid->datatype) {
         case NIFTI_TYPE_FLOAT32:
@@ -2149,9 +2120,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid,
             reg_spline_refineControlPointGrid2D<double>(controlPointGrid, referenceImage);
             break;
         default:
-            reg_print_fct_error("reg_spline_refineControlPointGrid");
-            reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
-            reg_exit();
+            NR_FATAL_ERROR("Only single or double precision is implemented for the bending energy gradient");
         }
     } else {
         switch (controlPointGrid->datatype) {
@@ -2162,9 +2131,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid,
             reg_spline_refineControlPointGrid3D<double>(controlPointGrid, referenceImage);
             break;
         default:
-            reg_print_fct_error("reg_spline_refineControlPointGrid");
-            reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient");
-            reg_exit();
+            NR_FATAL_ERROR("Only single or double precision is implemented for the bending energy gradient");
         }
     }
     if (referenceImage != nullptr) {
@@ -2257,9 +2224,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid,
             controlPointGrid->sto_xyz.m[2][3] = newOrigin[2];
         controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz);
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("The control point grid has been refined");
-#endif
+    NR_DEBUG("The control point grid has been refined");
 }
 /* *************************************************************** */
 template <class DataType>
@@ -2486,11 +2451,8 @@ void reg_defField_compose3D(nifti_image *deformationField,
 void reg_defField_compose(nifti_image *deformationField,
                           nifti_image *dfToUpdate,
                           int *mask) {
-    if (deformationField->datatype != dfToUpdate->datatype) {
-        reg_print_fct_error("reg_defField_compose");
-        reg_print_msg_error("Both deformation fields are expected to have the same type");
-        reg_exit();
-    }
+    if (deformationField->datatype != dfToUpdate->datatype)
+        NR_FATAL_ERROR("Both deformation fields are expected to have the same type");
 
     bool freeMask = false;
     if (mask == nullptr) {
@@ -2507,9 +2469,7 @@ void reg_defField_compose(nifti_image *deformationField,
             reg_defField_compose2D<double>(deformationField, dfToUpdate, mask);
             break;
         default:
-            reg_print_fct_error("reg_defField_compose");
-            reg_print_msg_error("Deformation field pixel type unsupported");
-            reg_exit();
+            NR_FATAL_ERROR("Deformation field pixel type is unsupported");
         }
     } else {
         switch (deformationField->datatype) {
@@ -2520,9 +2480,7 @@ void reg_defField_compose(nifti_image *deformationField,
             reg_defField_compose3D<double>(deformationField, dfToUpdate, mask);
             break;
         default:
-            reg_print_fct_error("reg_defField_compose");
-            reg_print_msg_error("Deformation field pixel type unsupported");
-            reg_exit();
+            NR_FATAL_ERROR("Deformation field pixel type is unsupported");
         }
     }
 
@@ -3065,17 +3023,11 @@ void reg_defFieldInvert(nifti_image *inputDeformationField,
                         nifti_image *outputDeformationField,
                         float tolerance) {
     // Check the input image data types
-    if (inputDeformationField->datatype != outputDeformationField->datatype) {
-        reg_print_fct_error("reg_defFieldInvert");
-        reg_print_msg_error("Both deformation fields are expected to have the same data type");
-        reg_exit();
-    }
+    if (inputDeformationField->datatype != outputDeformationField->datatype)
+        NR_FATAL_ERROR("Both deformation fields are expected to have the same data type");
 
-    if (inputDeformationField->nu != 3) {
-        reg_print_fct_error("reg_defFieldInvert");
-        reg_print_msg_error("The function has only been implemented for 3D deformation field yet");
-        reg_exit();
-    }
+    if (inputDeformationField->nu != 3)
+        NR_FATAL_ERROR("The function has only been implemented for 3D deformation field yet");
 
     switch (inputDeformationField->datatype) {
     case NIFTI_TYPE_FLOAT32:
@@ -3086,9 +3038,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField,
         reg_defFieldInvert3D<double>
             (inputDeformationField, outputDeformationField, tolerance);
     default:
-        reg_print_fct_error("reg_defFieldInvert");
-        reg_print_msg_error("Deformation field pixel type unsupported");
-        reg_exit();
+        NR_FATAL_ERROR("Deformation field pixel type is unsupported");
     }
 }
 /* *************************************************************** */
@@ -3492,18 +3442,12 @@ int reg_spline_cppComposition(nifti_image *grid1,
                               bool bspline) {
     // REMINDER Grid2(x)=Grid1(Grid2(x))
 
-    if (grid1->datatype != grid2->datatype) {
-        reg_print_fct_error("reg_spline_cppComposition");
-        reg_print_msg_error("Both input images do not have the same type.");
-        reg_exit();
-    }
+    if (grid1->datatype != grid2->datatype)
+        NR_FATAL_ERROR("Both input images are expected to have the same data type");
 
 #if _USE_SSE
-    if (grid1->datatype != NIFTI_TYPE_FLOAT32) {
-        reg_print_fct_error("reg_spline_cppComposition");
-        reg_print_msg_error("SSE computation has only been implemented for single precision.");
-        reg_exit();
-    }
+    if (grid1->datatype != NIFTI_TYPE_FLOAT32)
+        NR_FATAL_ERROR("SSE computation has only been implemented for single precision");
 #endif
 
     if (grid1->nz > 1) {
@@ -3515,9 +3459,7 @@ int reg_spline_cppComposition(nifti_image *grid1,
             reg_spline_cppComposition_3D<double>(grid1, grid2, displacement1, displacement2, bspline);
             break;
         default:
-            reg_print_fct_error("reg_spline_cppComposition");
-            reg_print_msg_error("Only implemented for single or double floating images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double floating images");
         }
     } else {
         switch (grid1->datatype) {
@@ -3528,9 +3470,7 @@ int reg_spline_cppComposition(nifti_image *grid1,
             reg_spline_cppComposition_2D<double>(grid1, grid2, displacement1, displacement2, bspline);
             break;
         default:
-            reg_print_fct_error("reg_spline_cppComposition");
-            reg_print_msg_error("Only implemented for single or double floating images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double floating images");
         }
     }
     return EXIT_SUCCESS;
@@ -3539,11 +3479,8 @@ int reg_spline_cppComposition(nifti_image *grid1,
 void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                              nifti_image *flowField) {
     // Check first if the velocity field is actually a velocity field
-    if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) {
-        reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid");
-        reg_print_msg_error("The provide grid is not a velocity field");
-        reg_exit();
-    }
+    if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
+        NR_FATAL_ERROR("The provide grid is not a velocity field");
 
     // Initialise the flow field with an identity transformation
     reg_tools_multiplyValueToImage(flowField, flowField, 0.f);
@@ -3572,11 +3509,8 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
                                                    nifti_image *deformationFieldImage,
                                                    bool updateStepNumber) {
     // Check first if the velocity field is actually a velocity field
-    if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) {
-        reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField");
-        reg_print_msg_error("The provide field is not a velocity field");
-        reg_exit();
-    }
+    if (flowFieldImage->intent_p1 != DEF_VEL_FIELD)
+        NR_FATAL_ERROR("The provide field is not a velocity field");
 
     // Remove the affine component from the flow field
     nifti_image *affineOnly = nullptr;
@@ -3614,12 +3548,8 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
         squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
         // Set the number of squaring step in the flow field
         if (fabs(flowFieldImage->intent_p2) != squaringNumber) {
-            char text[255];
-            sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)",
-                    static_cast<int>(reg_round(fabs(flowFieldImage->intent_p2))),
-                    abs(squaringNumber),
-                    (int)pow(2.0f, squaringNumber));
-            reg_print_msg_warn(text);
+            NR_WARN("Changing from " << (int)reg_round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) <<
+                    " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
         }
         // Update the number of squaring step required
         if (flowFieldImage->intent_p2 >= 0)
@@ -3656,11 +3586,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
         // The computed scaled deformation field is copied over
         memcpy(deformationFieldImage->data, flowFieldImage->data,
                deformationFieldImage->nvox * deformationFieldImage->nbyper);
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
     }
     // The affine conponent of the transformation is restored
     if (affineOnly != nullptr) {
@@ -3710,11 +3636,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
         velocityFieldGrid->intent_p2 = flowField->intent_p2;
         // Deallocate the allocated flow field
         nifti_image_free(flowField);
-    } else {
-        reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid");
-        reg_print_msg_error("The provided input image is not a spline parametrised transformation");
-        reg_exit();
-    }
+    } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
 void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
@@ -3772,11 +3694,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
             reg_defField_compose(deformationFieldImage[i], // to apply
                                  deformationFieldImage[i + 1], // to update
                                  nullptr);
-#ifndef NDEBUG
-            char text[255];
-            sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber);
-            reg_print_msg_debug(text);
-#endif
+            NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
         }
         // The affine conponent of the transformation is restored
         if (affineOnly != nullptr) {
@@ -3797,11 +3715,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
                                                true);
             }
         }
-    } else {
-        reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid");
-        reg_print_msg_error("The provided input image is not a spline parametrised transformation");
-        reg_exit();
-    }
+    } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
 template <class DataType>
@@ -3809,8 +3723,7 @@ void compute_lie_bracket(nifti_image *img1,
                          nifti_image *img2,
                          nifti_image *res,
                          bool use_jac) {
-    reg_print_msg_error("The compute_lie_bracket function needs updating");
-    reg_exit();
+    NR_FATAL_ERROR("The compute_lie_bracket function needs updating");
 #ifdef _WIN32
     long voxNumber = (long)NiftiImage::calcVoxelNumber(img1, 3);
 #else
@@ -3824,7 +3737,7 @@ void compute_lie_bracket(nifti_image *img1,
         reg_getDeformationFromDisplacement(img1);
         reg_getDeformationFromDisplacement(img2);
         // HERE TO DO
-        reg_exit();
+        NR_FATAL_ERROR("The function needs updating");
         //        reg_spline_GetJacobianMatrixFull(img1,img1,jacImg1);
         //        reg_spline_GetJacobianMatrixFull(img2,img2,jacImg2);
         reg_getDisplacementFromDeformation(img1);
@@ -3946,8 +3859,7 @@ void compute_BCH_update(nifti_image *img1, // current field
                          nifti_image *img2, // gradient
                          int type) {
     // To update
-    reg_print_msg_error("The compute_BCH_update function needs updating");
-    reg_exit();
+    NR_FATAL_ERROR("The compute_BCH_update function needs updating");
     DataType *res = (DataType*)malloc(img1->nvox * sizeof(DataType));
 
 #ifdef _WIN32
@@ -4037,11 +3949,8 @@ void compute_BCH_update(nifti_image *img1, // current field
 void compute_BCH_update(nifti_image *img1, // current field
                         nifti_image *img2, // gradient
                         int type) {
-    if (img1->datatype != img2->datatype) {
-        reg_print_fct_error("compute_BCH_update");
-        reg_print_msg_error("Both input images are expected to be of similar type");
-        reg_exit();
-    }
+    if (img1->datatype != img2->datatype)
+        NR_FATAL_ERROR("Both input images are expected to be of same type");
     switch (img1->datatype) {
     case NIFTI_TYPE_FLOAT32:
         compute_BCH_update<float>(img1, img2, type);
@@ -4050,9 +3959,7 @@ void compute_BCH_update(nifti_image *img1, // current field
         compute_BCH_update<double>(img1, img2, type);
         break;
     default:
-        reg_print_fct_error("compute_BCH_update");
-        reg_print_msg_error("Only implemented for single or double precision images");
-        reg_exit();
+        NR_FATAL_ERROR("Only implemented for single or double precision images");
     }
 }
 /* *************************************************************** */
@@ -4169,9 +4076,7 @@ void reg_spline_getDeconvolvedCoefficents(nifti_image *img) {
         reg_spline_getDeconvolvedCoefficents<double>(img);
         break;
     default:
-        reg_print_fct_error("reg_spline_getDeconvolvedCoefficents");
-        reg_print_msg_error("Only implemented for single or double precision images");
-        reg_exit();
+        NR_FATAL_ERROR("Only implemented for single or double precision images");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 50dad457..8eba7987 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -61,26 +61,17 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                                   bool useHeaderInformation)
 {
    if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
-   {
-      reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("Both output pointers are nullptr");
-      reg_print_msg_error("Nothing to be done");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Both output pointers are nullptr");
    if(referenceImage==nullptr && approximation==false)
-   {
-      reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
-      reg_exit();
+      NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position");
 
-   }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
    DataType *coeffPtrY = &coeffPtrX[nodeNumber];
    DataType *coeffPtrZ = &coeffPtrY[nodeNumber];
 
-   // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
+   // Define a matrix to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
    if(splineControlPoint->sform_code>0)
       reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
@@ -262,19 +253,10 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                            bool useHeaderInformation)
 {
    if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
-   {
-      reg_print_fct_error("reg_spline_jacobian2D");
-      reg_print_msg_error("Both output pointers are nullptr");
-      reg_print_msg_error("Nothing to be done");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Both output pointers are nullptr");
    if(referenceImage==nullptr && approximation==false)
-   {
-      reg_print_fct_error("reg_spline_jacobian2D");
-      reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
-      reg_exit();
+      NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position");
 
-   }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
@@ -544,19 +526,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            bool useHeaderInformation)
 {
    if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr)
-   {
-      reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("Both output pointers are nullptr");
-      reg_print_msg_error("Nothing to be done");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Both output pointers are nullptr");
    if(referenceImage==nullptr && approximation==false)
-   {
-      reg_print_fct_error("reg_spline_jacobian3D");
-      reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position");
-      reg_exit();
+      NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position");
 
-   }
    // Create some pointers towards to control point grid image data
    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *coeffPtrX = static_cast<DataType *>(splineControlPoint->data);
@@ -1269,9 +1242,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
                                        useHeaderInformation);
          break;
       default:
-         reg_print_fct_error("reg_spline_getJacobianPenaltyTerm");
-         reg_print_fct_error("Only single or double precision has been implemented");
-         reg_exit();
+         NR_FATAL_ERROR("Only single or double precision has been implemented");
       }
    }
    else
@@ -1295,9 +1266,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
                                        useHeaderInformation);
          break;
       default:
-         reg_print_fct_error("reg_spline_getJacobianPenaltyTerm");
-         reg_print_fct_error("Only single or double precision has been implemented");
-         reg_exit();
+         NR_FATAL_ERROR("Only single or double precision has been implemented");
       }
    }
    // The jacobian determinant are averaged
@@ -1479,8 +1448,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
       if(useHeaderInformation)
       {
          // The header information is considered
-         reg_exit();
-
+         NR_FATAL_ERROR("Not implemented yet");
       } // end if use header information
       else
       {
@@ -1742,8 +1710,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
       if(useHeaderInformation)
       {
          // The header information is considered
-         reg_exit();
-
+         NR_FATAL_ERROR("Not implemented yet");
       } // end if use header information
       else
       {
@@ -1873,12 +1840,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint,
                                                bool useHeaderInformation)
 {
    if(splineControlPoint->datatype != gradientImage->datatype)
-   {
-      reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient");
-      reg_print_msg_error("The input images are expected to be of the same type");
-      reg_exit();
-   }
-
+      NR_FATAL_ERROR("The input images are expected to be of the same type");
 
    if(splineControlPoint->nz==1)
    {
@@ -1901,9 +1863,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint,
                                                   useHeaderInformation);
          break;
       default:
-         reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient");
-         reg_print_msg_error("Function only usable with single or double floating precision");
-         reg_exit();
+         NR_FATAL_ERROR("Function only usable with single or double floating precision");
       }
    }
    else
@@ -1927,9 +1887,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint,
                                                   useHeaderInformation);
          break;
       default:
-         reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient");
-         reg_print_msg_error("Function only usable with single or double floating precision");
-         reg_exit();
+         NR_FATAL_ERROR("Function only usable with single or double floating precision");
       }
    }
 }
@@ -2091,7 +2049,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
       if(useHeaderInformation)
       {
          // The grid and reference image are not aligned
-         reg_exit();
+         NR_FATAL_ERROR("Not implemented yet");
       }
       else
       {
@@ -2361,7 +2319,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
       if(useHeaderInformation)
       {
          // The grid and reference image are not aligned
-         reg_exit();
+         NR_FATAL_ERROR("Not implemented yet");
       }
       else
       {
@@ -2487,17 +2445,14 @@ double reg_spline_correctFolding(nifti_image *splineControlPoint,
       switch(splineControlPoint->datatype)
       {
       case NIFTI_TYPE_FLOAT32:
-         return reg_spline_correctFolding2D<float>
-               (splineControlPoint, referenceImage, approx, false);
+         return reg_spline_correctFolding2D<float>(splineControlPoint, referenceImage, approx, false);
          break;
       case NIFTI_TYPE_FLOAT64:
-         return reg_spline_correctFolding2D<double>
-               (splineControlPoint, referenceImage, approx, false);
+         return reg_spline_correctFolding2D<double>(splineControlPoint, referenceImage, approx, false);
          break;
       default:
-         reg_print_fct_error("reg_spline_correctFolding");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
+         NR_FATAL_ERROR("Only implemented for single or double precision images");
+         return 0;
       }
    }
    else
@@ -2505,17 +2460,14 @@ double reg_spline_correctFolding(nifti_image *splineControlPoint,
       switch(splineControlPoint->datatype)
       {
       case NIFTI_TYPE_FLOAT32:
-         return reg_spline_correctFolding3D<float>
-               (splineControlPoint, referenceImage, approx, false);
+         return reg_spline_correctFolding3D<float>(splineControlPoint, referenceImage, approx, false);
          break;
       case NIFTI_TYPE_FLOAT64:
-         return reg_spline_correctFolding3D<double>
-               (splineControlPoint, referenceImage, approx, false);
+         return reg_spline_correctFolding3D<double>(splineControlPoint, referenceImage, approx, false);
          break;
       default:
-         reg_print_fct_error("reg_spline_correctFolding");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
+         NR_FATAL_ERROR("Only implemented for single or double precision images");
+         return 0;
       }
    }
 }
@@ -2527,9 +2479,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
    if(splineControlPoint->intent_p1==LIN_SPLINE_GRID){
       if(splineControlPoint->nz==1)
       {
-         reg_print_fct_error("reg_spline_GetJacobianMap");
-         reg_print_msg_error("No 2D implementation for the linear spline yet");
-         reg_exit();
+         NR_FATAL_ERROR("No 2D implementation for the linear spline yet");
       }
       else
       {
@@ -2552,9 +2502,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
                                                 true);
             break;
          default:
-            reg_print_fct_error("reg_spline_GetJacobianMap");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
          }
       }
 
@@ -2581,9 +2529,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
                                                 true);
             break;
          default:
-            reg_print_fct_error("reg_spline_GetJacobianMap");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
          }
       }
       else
@@ -2607,9 +2553,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint,
                                                 true);
             break;
          default:
-            reg_print_fct_error("reg_spline_GetJacobianMap");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
          }
       }
    }
@@ -2641,9 +2585,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
                                        true);
          break;
       default:
-         reg_print_fct_error("reg_spline_GetJacobianMatrix");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
+         NR_FATAL_ERROR("Only implemented for single or double precision images");
       }
    }
    else
@@ -2667,9 +2609,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
                                        true);
          break;
       default:
-         reg_print_fct_error("reg_spline_GetJacobianMatrix");
-         reg_print_msg_error("Only implemented for single or double precision images");
-         reg_exit();
+         NR_FATAL_ERROR("Only implemented for single or double precision images");
       }
    }
 }
@@ -2923,11 +2863,8 @@ void reg_defField_getJacobianMap(nifti_image *deformationField,
                                  nifti_image *jacobianImage)
 {
    if(deformationField->datatype!=jacobianImage->datatype)
-   {
-      reg_print_fct_error("reg_defField_getJacobianMap");
-      reg_print_msg_error("Both input images have different datatype");
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Both input images are expected to have the same datatype");
+
    switch(deformationField->datatype)
    {
    case NIFTI_TYPE_FLOAT32:
@@ -2941,9 +2878,7 @@ void reg_defField_getJacobianMap(nifti_image *deformationField,
       else reg_defField_getJacobianMap2D<double>(deformationField,jacobianImage,nullptr);
       break;
    default:
-      reg_print_fct_error("reg_defField_getJacobianMap");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
+      NR_FATAL_ERROR("Only implemented for single or double precision images");
    }
 }
 /* *************************************************************** */
@@ -2964,18 +2899,14 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField,
       else reg_defField_getJacobianMap2D<double>(deformationField,nullptr,jacobianMatrices);
       break;
    default:
-      reg_print_fct_error("reg_defField_getJacobianMatrix");
-      reg_print_msg_error("Only implemented for single or double precision images");
-      reg_exit();
+      NR_FATAL_ERROR("Only implemented for single or double precision images");
    }
 }
 /* *************************************************************** */
 template <class DataType>
 void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
-                                                   nifti_image* flowFieldImage
-                                                   )
+                                                   nifti_image* flowFieldImage)
 {
-
    // A second field is allocated to store the deformation
    nifti_image *defFieldImage = nifti_dup(*flowFieldImage, false);
 
@@ -3019,10 +2950,8 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    if(flowFieldImage->num_ext>0)
    {
       if(flowFieldImage->ext_list[0].edata!=nullptr)
-      {
          affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata));
-      }
-      else reg_exit();
+      else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field");
    }
    const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3);
    for(size_t i=0; i<voxelNumber; ++i)
@@ -3047,10 +2976,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
       // The computed scaled deformation field is copied over
       memcpy(defFieldImage->data, flowFieldImage->data,
              defFieldImage->nvox*defFieldImage->nbyper);
-#ifndef NDEBUG
-      reg_print_fct_debug("reg_defField_GetJacobianMatFromFlowField_core");
-      printf("[NiftyReg DEBUG] Squaring (composition) step %i/%i\n", (int)step+1, (int)fabs(flowFieldImage->intent_p2));
-#endif
+      NR_DEBUG("Squaring (composition) step " << int(step + 1) << "/" << int(fabs(flowFieldImage->intent_p2)));
    }
    // Allocated arrays and images are free'ed
    nifti_image_free(defFieldImage);
@@ -3059,10 +2985,8 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    if(flowFieldImage->num_ext>1)
    {
       if(flowFieldImage->ext_list[1].edata!=nullptr)
-      {
          affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[1].edata));
-      }
-      else reg_exit();
+      else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field");
       for(size_t i=0; i<voxelNumber; ++i)
          jacobianMatrices[i]=nifti_mat33_mul(affineMatrix,jacobianMatrices[i]);
    }
@@ -3096,18 +3020,13 @@ int reg_defField_GetJacobianMatFromFlowField(mat33* jacobianMatrices,
    switch(flowFieldImage->datatype)
    {
    case NIFTI_TYPE_FLOAT32:
-      reg_defField_GetJacobianMatFromFlowField_core<float>
-            (jacobianMatrices,flowFieldImage);
+      reg_defField_GetJacobianMatFromFlowField_core<float>(jacobianMatrices,flowFieldImage);
       break;
    case NIFTI_TYPE_FLOAT64:
-      reg_defField_GetJacobianMatFromFlowField_core<double>
-            (jacobianMatrices,flowFieldImage);
+      reg_defField_GetJacobianMatFromFlowField_core<double>(jacobianMatrices,flowFieldImage);
       break;
    default:
-      reg_print_fct_error("reg_defField_GetJacobianMatFromFlowField");
-      reg_print_msg_error("Unsupported data type");
-      reg_exit();
-      break;
+      NR_FATAL_ERROR("Unsupported data type");
    }
    return 0;
 }
@@ -3138,32 +3057,26 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
 }
 /* *************************************************************** */
 int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage,
-                                             nifti_image* flowFieldImage
-                                             )
+                                             nifti_image* flowFieldImage)
 {
    // create an array of mat33
    const size_t voxelNumber = NiftiImage::calcVoxelNumber(jacobianDetImage, 3);
    mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33));
 
    // Compute the Jacobian matrice array
-   reg_defField_GetJacobianMatFromFlowField(jacobianMatrices,
-                                            flowFieldImage);
+   reg_defField_GetJacobianMatFromFlowField(jacobianMatrices, flowFieldImage);
 
    // Compute and store all determinant
    switch(jacobianDetImage->datatype)
    {
    case NIFTI_TYPE_FLOAT32:
-      reg_getDetArrayFromMatArray<float>
-            (jacobianDetImage,jacobianMatrices);
+      reg_getDetArrayFromMatArray<float>(jacobianDetImage,jacobianMatrices);
       break;
    case NIFTI_TYPE_FLOAT64:
-      reg_getDetArrayFromMatArray<double>
-            (jacobianDetImage,jacobianMatrices);
+      reg_getDetArrayFromMatArray<double>(jacobianDetImage,jacobianMatrices);
       break;
    default:
-      reg_print_fct_error("reg_defField_GetJacobianDetFromFlowField");
-      reg_print_msg_error("Unsupported data type");
-      break;
+      NR_FATAL_ERROR("Unsupported data type");
    }
    free(jacobianMatrices);
    return 0;
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 4abf1081..41e9311c 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -161,9 +161,8 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) {
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_approxBendingEnergyValue2D<double>(splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxBendingEnergy");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -172,9 +171,8 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) {
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_approxBendingEnergyValue3D<double>(splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxBendingEnergy");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     }
 }
@@ -457,11 +455,9 @@ extern "C++"
 void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
                                             nifti_image *gradientImage,
                                             float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype) {
-        reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-        reg_print_msg_error("The input images are expected to have the same type");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != gradientImage->datatype)
+        NR_FATAL_ERROR("The input images are expected to have the same type");
+
     if (splineControlPoint->nz == 1) {
         switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
@@ -471,9 +467,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
             reg_spline_approxBendingEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -484,9 +478,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
             reg_spline_approxBendingEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_approxBendingEnergyGradient");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     }
 }
@@ -664,9 +656,8 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_approxLinearEnergyValue3D<double>(splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxLinearEnergyValue3D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -675,9 +666,8 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_approxLinearEnergyValue2D<double>(splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     }
 }
@@ -875,9 +865,8 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage,
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_linearEnergyValue3D<double>(referenceImage, splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_linearEnergyValue3D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -886,9 +875,8 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage,
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_linearEnergyValue2D<double>(referenceImage, splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxLinearEnergyValue2D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     }
 }
@@ -1107,11 +1095,9 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
                                      const nifti_image *splineControlPoint,
                                      nifti_image *gradientImage,
                                      float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype) {
-        reg_print_fct_error("reg_spline_linearEnergyGradient");
-        reg_print_msg_error("Input images are expected to have the same datatype");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != gradientImage->datatype)
+        NR_FATAL_ERROR("Input images are expected to have the same datatype");
+
     if (splineControlPoint->nz > 1) {
         switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
@@ -1121,9 +1107,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
             reg_spline_linearEnergyGradient3D<double>(referenceImage, splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_linearEnergyGradient3D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -1134,9 +1118,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
             reg_spline_linearEnergyGradient2D<double>(referenceImage, splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_linearEnergyGradient2D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     }
 }
@@ -1345,11 +1327,9 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
 void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint,
                                            nifti_image *gradientImage,
                                            float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype) {
-        reg_print_fct_error("reg_spline_linearEnergyGradient");
-        reg_print_msg_error("Input images are expected to have the same datatype");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != gradientImage->datatype)
+        NR_FATAL_ERROR("Input images are expected to have the same datatype");
+
     if (splineControlPoint->nz > 1) {
         switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
@@ -1359,9 +1339,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint
             reg_spline_approxLinearEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_linearEnergyGradient");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
         switch (splineControlPoint->datatype) {
@@ -1372,9 +1350,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint
             reg_spline_approxLinearEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_linearEnergyGradient");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     }
 }
@@ -1528,9 +1504,8 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) {
         case NIFTI_TYPE_FLOAT64:
             return reg_defField_linearEnergyValue3D<double>(deformationField);
         default:
-            reg_print_fct_error("reg_defField_linearEnergyValue3D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     } else {
         switch (deformationField->datatype) {
@@ -1539,9 +1514,8 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) {
         case NIFTI_TYPE_FLOAT64:
             return reg_defField_linearEnergyValue2D<double>(deformationField);
         default:
-            reg_print_fct_error("reg_defField_linearEnergyValue2D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     }
 }
@@ -1725,9 +1699,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
             reg_defField_linearEnergyGradient3D<double>(deformationField, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_defField_linearEnergyGradient3D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
         switch (deformationField->datatype) {
@@ -1738,9 +1710,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
             reg_defField_linearEnergyGradient2D<double>(deformationField, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_defField_linearEnergyGradient2D");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     }
 }
@@ -1826,15 +1796,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
             if (imageDim > 2)
                 constraintValue += reg_pow2(flo_position[2] - def_position[2]);
         } else {
-            char warning_text[255];
-            if (imageDim > 2)
-                sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
-                        ref_position[0], ref_position[1], ref_position[2]);
-            else
-                sprintf(warning_text, "The current landmark at position %g %g is ignored",
-                        ref_position[0], ref_position[1]);
-            reg_print_msg_warn(warning_text);
-            reg_print_msg_warn("as it is not in the space of the reference image");
+            NR_WARN("The current landmark at position " << ref_position[0] << " " <<
+                    ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") <<
+                    " is ignored as it is not in the space of the reference image");
         }
     }
     return constraintValue;
@@ -1844,11 +1808,8 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
                                       size_t landmarkNumber,
                                       float *landmarkReference,
                                       float *landmarkFloating) {
-    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) {
-        reg_print_fct_error("reg_spline_getLandmarkDistance");
-        reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
-        reg_exit();
-    }
+    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID)
+        NR_FATAL_ERROR("This function is only implemented for control point grid within an Euclidean setting for now");
     switch (controlPointImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
         return reg_spline_getLandmarkDistance_core<float>(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
@@ -1857,9 +1818,8 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
         return reg_spline_getLandmarkDistance_core<double>(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating);
         break;
     default:
-        reg_print_fct_error("reg_spline_getLandmarkDistance_core");
-        reg_print_msg_error("Only implemented for single or double precision images");
-        reg_exit();
+        NR_FATAL_ERROR("Only implemented for single or double precision images");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -1972,15 +1932,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                 }
             }
         } else {
-            char warning_text[255];
-            if (imageDim > 2)
-                sprintf(warning_text, "The current landmark at position %g %g %g is ignored",
-                        ref_position[0], ref_position[1], ref_position[2]);
-            else
-                sprintf(warning_text, "The current landmark at position %g %g is ignored",
-                        ref_position[0], ref_position[1]);
-            reg_print_msg_warn(warning_text);
-            reg_print_msg_warn("as it is not in the space of the reference image");
+            NR_WARN("The current landmark at position " << ref_position[0] << " " <<
+                    ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") <<
+                    " is ignored as it is not in the space of the reference image");
         }
     }
 }
@@ -1991,11 +1945,9 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
                                             float *landmarkReference,
                                             float *landmarkFloating,
                                             float weight) {
-    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) {
-        reg_print_fct_error("reg_spline_getLandmarkDistanceGradient");
-        reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now");
-        reg_exit();
-    }
+    if (controlPointImage->intent_p1 != CUB_SPLINE_GRID)
+        NR_FATAL_ERROR("This function is only implemented for control point grid within an Euclidean setting for now");
+
     switch (controlPointImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
         reg_spline_getLandmarkDistanceGradient_core<float>
@@ -2006,9 +1958,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
             (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight);
         break;
     default:
-        reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core");
-        reg_print_msg_error("Only implemented for single or double precision images");
-        reg_exit();
+        NR_FATAL_ERROR("Only implemented for single or double precision images");
     }
 }
 /* *************************************************************** */
@@ -2100,14 +2050,12 @@ double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) {
         case NIFTI_TYPE_FLOAT64:
             return reg_spline_approxLinearPairwise3D<double>(splineControlPoint);
         default:
-            reg_print_fct_error("reg_spline_approxLinearPairwise");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
+            return 0;
         }
     } else {
-        reg_print_fct_error("reg_spline_approxLinearPairwise");
-        reg_print_msg_error("Not implemented in 2D yet");
-        reg_exit();
+        NR_FATAL_ERROR("Not implemented in 2D yet");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2215,11 +2163,9 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
 void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint,
                                              nifti_image *gradientImage,
                                              float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype) {
-        reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
-        reg_print_msg_error("Input images are expected to have the same datatype");
-        reg_exit();
-    }
+    if (splineControlPoint->datatype != gradientImage->datatype)
+        NR_FATAL_ERROR("Input images are expected to have the same datatype");
+
     if (splineControlPoint->nz > 1) {
         switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
@@ -2229,14 +2175,10 @@ void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint,
             reg_spline_approxLinearPairwiseGradient3D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
-            reg_print_fct_error("reg_spline_linearEnergyGradient");
-            reg_print_msg_error("Only implemented for single or double precision images");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
-        reg_print_fct_error("reg_spline_approxLinearPairwiseGradient");
-        reg_print_msg_error("Not implemented for 2D images yet");
-        reg_exit();
+        NR_FATAL_ERROR("Not implemented for 2D images yet");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index 7ca78285..07dbf3bd 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -1,7 +1,5 @@
 #include "_reg_maths.h"
-//STD
-#include <map>
-#include <vector>
+#include "Debug.hpp"
 
 #define mat(i,j,dim) mat[i*dim+j]
 
@@ -23,11 +21,7 @@ void reg_LUdecomposition(T *mat,
             if ((temp = fabs(mat(i, j, dim)))>big)
                 big = temp;
         if (big == 0.f)
-        {
-            reg_print_fct_error("reg_LUdecomposition");
-            reg_print_msg_error("Singular matrix");
-            reg_exit();
-        }
+            NR_FATAL_ERROR("Singular matrix");
         vv[i] = 1.0 / big;
     }
     for (j = 0; j < dim; ++j)
@@ -120,13 +114,8 @@ void reg_matrixMultiply(T *mat1,
 {
     // First check that the dimension are appropriate
     if (dim1[1] != dim2[0])
-    {
-        char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]",
-            dim1[0], dim1[1], dim2[0], dim2[1]);
-        reg_print_fct_error("reg_matrixMultiply");
-        reg_print_msg_error(text);
-        reg_exit();
-    }
+        NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(dim1[0]) + " " +
+                       std::to_string(dim1[1]) + "] [" + std::to_string(dim2[0]) + " " + std::to_string(dim2[1]) + "]");
     size_t resDim[2] = {dim1[0], dim2[1]};
     // Allocate the result matrix
     if (res != nullptr)
@@ -233,13 +222,9 @@ template<class T>
 T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2) {
     if (transposeMat2 == false) {
         // First check that the dimension are appropriate
-        if (mat1Y != mat2X) {
-            char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]",
-                mat1X, mat1Y, mat2X, mat2Y);
-            reg_print_fct_error("reg_matrix2DMultiply");
-            reg_print_msg_error(text);
-            reg_exit();
-        }
+        if (mat1Y != mat2X)
+            NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
+                           std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]");
 
         size_t nbElement = mat1Y;
         double resTemp = 0;
@@ -259,13 +244,10 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
     }
     else {
         // First check that the dimension are appropriate
-        if (mat1Y != mat2Y) {
-            char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]",
-                mat1X, mat1Y, mat2Y, mat2X);
-            reg_print_fct_error("reg_matrix2DMultiply");
-            reg_print_msg_error(text);
-            reg_exit();
-        }
+        if (mat1Y != mat2Y)
+            NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
+                           std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]");
+
         size_t nbElement = mat1Y;
         double resTemp = 0;
         T** res = reg_matrix2DAllocate<T>(mat1X,mat2X);
@@ -290,13 +272,10 @@ template<class T>
 void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** resT, bool transposeMat2) {
     if (transposeMat2 == false) {
         // First check that the dimension are appropriate
-        if (mat1Y != mat2X) {
-            char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]",
-                mat1X, mat1Y, mat2X, mat2Y);
-            reg_print_fct_error("reg_matrix2DMultiply");
-            reg_print_msg_error(text);
-            reg_exit();
-        }
+        if (mat1Y != mat2X)
+            NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
+                           std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]");
+
         size_t nbElement = mat1Y;
         double resTemp;
 
@@ -312,13 +291,10 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
     }
     else {
         // First check that the dimension are appropriate
-        if (mat1Y != mat2Y) {
-            char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]",
-                mat1X, mat1Y, mat2Y, mat2X);
-            reg_print_fct_error("reg_matrix2DMultiply");
-            reg_print_msg_error(text);
-            reg_exit();
-        }
+        if (mat1Y != mat2Y)
+            NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
+                           std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]");
+
         size_t nbElement = mat1Y;
         double resTemp;
 
@@ -943,21 +919,19 @@ mat44 reg_mat44_mul(mat44 const* A, double scalar)
     return out;
 }
 /* *************************************************************** */
-void reg_mat44_disp(mat44 *mat, char * title){
-    printf("%s:\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n", title,
-        mat->m[0][0], mat->m[0][1], mat->m[0][2], mat->m[0][3],
-        mat->m[1][0], mat->m[1][1], mat->m[1][2], mat->m[1][3],
-        mat->m[2][0], mat->m[2][1], mat->m[2][2], mat->m[2][3],
-        mat->m[3][0], mat->m[3][1], mat->m[3][2], mat->m[3][3]);
+void reg_mat44_disp(const mat44& mat, const std::string& title) {
+    NR_COUT << title << ":\n"
+            << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n"
+            << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n"
+            << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n"
+            << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl;
 }
-
-/* *************************************************************** */
 /* *************************************************************** */
-void reg_mat33_disp(mat33 *mat, char * title){
-    printf("%s:\n%g\t%g\t%g\n%g\t%g\t%g\n%g\t%g\t%g\n", title,
-        mat->m[0][0], mat->m[0][1], mat->m[0][2],
-        mat->m[1][0], mat->m[1][1], mat->m[1][2],
-        mat->m[2][0], mat->m[2][1], mat->m[2][2]);
+void reg_mat33_disp(const mat33& mat, const std::string& title){
+    NR_COUT << title << ":\n"
+            << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\n"
+            << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\n"
+            << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << std::endl;
 }
 /* *************************************************************** */
 //is it square distance or just distance?
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 7787e3c1..c983340f 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -14,12 +14,6 @@
 
 #pragma once
 
-#include <limits>
-#include <stdio.h>
-#include <math.h>
-#include <iostream>
-#include <vector>
-#include <stdexcept>
 #include "RNifti.h"
 
 #ifdef _OPENMP
@@ -34,15 +28,14 @@
 #endif
 #endif
 
-typedef enum
-{
-   DEF_FIELD,
-   DISP_FIELD,
-   CUB_SPLINE_GRID,
-   DEF_VEL_FIELD,
-   DISP_VEL_FIELD,
-   SPLINE_VEL_GRID,
-   LIN_SPLINE_GRID
+typedef enum {
+    DEF_FIELD,
+    DISP_FIELD,
+    CUB_SPLINE_GRID,
+    DEF_VEL_FIELD,
+    DISP_VEL_FIELD,
+    SPLINE_VEL_GRID,
+    LIN_SPLINE_GRID
 } NREG_TRANS_TYPE;
 
 /* *************************************************************** */
@@ -60,36 +53,6 @@ typedef enum
 #define IMIN(a,b) (a < b ? a : b)
 #define SQR(a) (a==0.0 ? 0.0 : a*a)
 /* *************************************************************** */
-#ifdef RNIFTYREG
-#include <R.h>  // This may have to change to Rcpp.h or RcppEigen.h later
-#define reg_exit(){error("[NiftyReg] Fatal error");}
-#define reg_print_info(executable,text){Rprintf("[%s] %s\n", executable, text);}
-#define reg_print_fct_debug(text){Rprintf("[NiftyReg DEBUG] Function: %s called\n", text);}
-#define reg_print_msg_debug(text){Rprintf("[NiftyReg DEBUG] %s\n", text);}
-#define reg_print_fct_warn(text){REprintf("[NiftyReg WARNING] Function: %s\n", text);}
-#define reg_print_msg_warn(text){REprintf("[NiftyReg WARNING] %s\n", text);}
-#define reg_print_fct_error(text){REprintf("[NiftyReg ERROR] Function: %s\n", text);}
-#define reg_print_msg_error(text){REprintf("[NiftyReg ERROR] %s\n", text);}
-#else
-#ifdef NR_THROW_EXCEP
-#define reg_exit(){ \
-    throw std::runtime_error("[NiftyReg] Exception"); \
-}
-#else // NR_THROW_EXCEP
-#define reg_exit(){ \
-    fprintf(stderr,"[NiftyReg] Exit here. File: %s:%i\n",__FILE__, __LINE__); \
-    exit(1); \
-}
-#endif // NR_THROW_EXCEP
-#define reg_print_info(executable,text){printf("[%s] %s\n", executable, text);}
-#define reg_print_fct_debug(text){printf("[NiftyReg DEBUG] Function: %s called\n", text);}
-#define reg_print_msg_debug(text){printf("[NiftyReg DEBUG] %s\n", text);}
-#define reg_print_fct_warn(text){printf("[NiftyReg WARNING] Function: %s\n", text);}
-#define reg_print_msg_warn(text){printf("[NiftyReg WARNING] %s\n", text);}
-#define reg_print_fct_error(text){fprintf(stderr,"[NiftyReg ERROR] Function: %s\n", text);}
-#define reg_print_msg_error(text){fprintf(stderr,"[NiftyReg ERROR] %s\n", text);}
-#endif
-/* *************************************************************** */
 #if defined(_WIN32) && !defined(__CYGWIN__)
 #include <float.h>
 #include <time.h>
@@ -180,7 +143,7 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res);
 mat33 reg_mat33_add(mat33 const* A, mat33 const* B);
 mat33 operator+(mat33 A, mat33 B);
 /* *************************************************************** */
-/** @brief Multipy two 3-by-3 matrices
+/** @brief Multiply two 3-by-3 matrices
 */
 mat33 reg_mat33_mul(mat33 const* A,
     mat33 const* B);
@@ -191,7 +154,7 @@ mat33 operator*(mat33 A,
 void reg_mat33_mul(mat44 const* mat, float const* in, float *out);
 void reg_mat33_mul(mat33 const* mat, float const* in, float *out);
 /* *************************************************************** */
-/** @brief Substract two 3-by-3 matrices
+/** @brief Subtract two 3-by-3 matrices
 */
 mat33 reg_mat33_minus(mat33 const* A, mat33 const* B);
 mat33 operator-(mat33 A, mat33 B);
@@ -230,14 +193,14 @@ bool operator==(mat44 A,mat44 B);
 /* *************************************************************** */
 bool operator!=(mat44 A,mat44 B);
 /* *************************************************************** */
-/** @brief Multipy two 4-by-4 matrices
+/** @brief Multiply two 4-by-4 matrices
  */
 mat44 reg_mat44_mul(mat44 const* A,
                     mat44 const* B);
 mat44 operator*(mat44 A,
                 mat44 B);
 /* *************************************************************** */
-/** @brief Multipy a vector with a 4-by-4 matrix
+/** @brief Multiply a vector with a 4-by-4 matrix
  */
 void reg_mat44_mul(mat44 const* mat,
                    float const* in,
@@ -247,7 +210,7 @@ void reg_mat44_mul(mat44 const* mat,
                    double const* in,
                    double *out);
 /* *************************************************************** */
-/** @brief Multipy a 4-by-4 matrix with a scalar
+/** @brief Multiply a 4-by-4 matrix with a scalar
  */
 mat44 reg_mat44_mul(mat44 const* mat,
                     double scalar);
@@ -257,7 +220,7 @@ mat44 reg_mat44_mul(mat44 const* mat,
 mat44 reg_mat44_add(mat44 const* A, mat44 const* B);
 mat44 operator+(mat44 A,mat44 B);
 /* *************************************************************** */
-/** @brief Substract two 4-by-4 matrices
+/** @brief Subtract two 4-by-4 matrices
  */
 mat44 reg_mat44_minus(mat44 const* A, mat44 const* B);
 mat44 operator-(mat44 A,mat44 B);
@@ -274,13 +237,11 @@ float reg_mat44_norm_inf(mat44 const* mat);
 /* *************************************************************** */
 /** @brief Display a mat44 matrix
  */
-void reg_mat44_disp(mat44 *mat,
-                    char * title);
+void reg_mat44_disp(const mat44& mat, const std::string& title);
 /* *************************************************************** */
 /** @brief Display a mat33 matrix
  */
-void reg_mat33_disp(mat33 *mat,
-                    char * title);
+void reg_mat33_disp(const mat33& mat, const std::string& title);
 /* *************************************************************** */
 double get_square_distance3D(float * first_point3D, float * second_point3D);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 157344d5..0ad50020 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -2,7 +2,7 @@
 
 #include "_reg_maths_eigen.h"
 #include "_reg_maths.h"
-#include "niftilib/nifti1_io.h"
+#include "Debug.hpp"
 
 // Eigen headers are in there because of the nvcc preprocessing step
 #include "Eigen/Core"
@@ -20,11 +20,8 @@
 */
 template<class T>
 void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
-   if (size_m == 0 || size_n == 0) {
-      reg_print_fct_error("svd");
-      reg_print_msg_error("The specified matrix is empty");
-      reg_exit();
-   }
+   if (size_m == 0 || size_n == 0)
+      NR_FATAL_ERROR("The specified matrix is empty");
 
 #ifdef _WIN32
    long sm, sn, sn2;
@@ -81,11 +78,8 @@ template void svd<double>(double **in, size_t m, size_t n, double * w, double **
 */
 template<class T>
 void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
-   if (in == nullptr) {
-      reg_print_fct_error("svd");
-      reg_print_msg_error("The specified matrix is empty");
-      reg_exit();
-   }
+   if (in == nullptr)
+      NR_FATAL_ERROR("The specified matrix is empty");
 
 #ifdef _WIN32
    long sm, sn, min_dim, i, j;
@@ -185,13 +179,9 @@ template void svd<double>(double **in, size_t size_m, size_t size_n, double ***U
 /* *************************************************************** */
 template<class T>
 T reg_matrix2DDet(T** mat, size_t m, size_t n) {
-   if (m != n) {
-      char text[255]; sprintf(text, "The matrix have to be square: [%zu %zu]",
-                              m, n);
-      reg_print_fct_error("reg_matrix2DDeterminant");
-      reg_print_msg_error(text);
-      reg_exit();
-   }
+   if (m != n)
+      NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]");
+
    double res;
    if (m == 2) {
       res = static_cast<double>(mat[0][0]) * static_cast<double>(mat[1][1]) - static_cast<double>(mat[1][0]) * static_cast<double>(mat[0][1]);
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
index 8b3239cb..16c079c4 100644
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ b/reg-lib/cpu/_reg_maths_eigen.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 /* *************************************************************** */
 /* Functions calling the Eigen library                             */
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 12876385..68277bdb 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -15,9 +15,7 @@ class reg_measure {
 public:
     /// @brief Measure class constructor
     reg_measure() {
-#ifndef NDEBUG
-        reg_print_msg_debug("reg_measure constructor called");
-#endif
+        NR_FUNC_CALLED();
     }
     /// @brief Measure class destructor
     virtual ~reg_measure() {}
@@ -55,9 +53,7 @@ class reg_measure {
             this->warpedGradientBw = nullptr;
             this->voxelBasedGradientBw = nullptr;
         }
-#ifndef NDEBUG
-        reg_print_msg_debug("reg_measure::InitialiseMeasure()");
-#endif
+        NR_FUNC_CALLED();
     }
 
     /// @brief Returns the forward registration measure of similarity value
@@ -67,34 +63,20 @@ class reg_measure {
     /// @brief Returns the registration measure of similarity value
     double GetSimilarityMeasureValue() {  // Do not override
         // Check that all the specified image are of the same datatype
-        if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) {
-            reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Input images are expected to be of floating precision type");
-            reg_exit();
-        }
-        if (this->warpedImage->datatype != this->referenceImage->datatype) {
-            reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
-            reg_print_msg_error("Both input images are expected to have the same type");
-            reg_exit();
-        }
+        if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64)
+            NR_FATAL_ERROR("Input images are expected to be of floating precision type");
+        if (this->warpedImage->datatype != this->referenceImage->datatype)
+            NR_FATAL_ERROR("Both input images are expected to have the same type");
         double sim = GetSimilarityMeasureValueFw();
         if (this->isSymmetric) {
             // Check that all the specified image are of the same datatype
-            if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
-                reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
-                reg_print_msg_error("Input images are expected to be of floating precision type");
-                reg_exit();
-            }
-            if (this->floatingImage->datatype != this->warpedImageBw->datatype) {
-                reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()");
-                reg_print_msg_error("Both input images are expected to have the same type");
-                reg_exit();
-            }
+            if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64)
+                NR_FATAL_ERROR("Input images are expected to be of floating precision type");
+            if (this->floatingImage->datatype != this->warpedImageBw->datatype)
+                NR_FATAL_ERROR("Both input images are expected to have the same type");
             sim += GetSimilarityMeasureValueBw();
         }
-#ifndef NDEBUG
-        reg_print_msg_debug("reg_measure::GetSimilarityMeasureValue called");
-#endif
+        NR_FUNC_CALLED();
         return sim;
     }
 
@@ -105,57 +87,43 @@ class reg_measure {
     /// @brief Compute the voxel-based measure of similarity gradient
     void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {  // Do not override
         // Check if the specified time point exists and is active
-        if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) {
-            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient");
-            reg_print_msg_error("The specified active timepoint is not defined in the ref/war images");
-            reg_exit();
-        }
+        if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt)
+            NR_FATAL_ERROR("The specified active timepoint is not defined in the ref/war images");
         if (this->timePointWeight[currentTimepoint] == 0)
             return;
         // Check if all required input images are of the same data type
         int dtype = this->referenceImage->datatype;
-        if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
-            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Input images are expected to be of floating precision type");
-            reg_exit();
-        }
+        if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64)
+            NR_FATAL_ERROR("Input images are expected to be of floating precision type");
         if (this->warpedImage->datatype != dtype ||
             this->warpedGradient->datatype != dtype ||
-            this->voxelBasedGradient->datatype != dtype) {
-            reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
-            reg_print_msg_error("Input images are expected to be of the same type");
-            reg_exit();
-        }
+            this->voxelBasedGradient->datatype != dtype)
+            NR_FATAL_ERROR("Input images are expected to be of the same type");
         // Compute the gradient
         GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint);
         if (this->isSymmetric) {
             dtype = this->floatingImage->datatype;
-            if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) {
-                reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
-                reg_print_msg_error("Input images are expected to be of floating precision type");
-                reg_exit();
-            }
+            if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64)
+                NR_FATAL_ERROR("Input images are expected to be of floating precision type");
             if (this->warpedImageBw->datatype != dtype ||
                 this->warpedGradientBw->datatype != dtype ||
-                this->voxelBasedGradientBw->datatype != dtype) {
-                reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()");
-                reg_print_msg_error("Input images are expected to be of the same type");
-                reg_exit();
-            }
+                this->voxelBasedGradientBw->datatype != dtype)
+                NR_FATAL_ERROR("Input images are expected to be of the same type");
             GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint);
         }
+        NR_FUNC_CALLED();
     }
     virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {}
     virtual void SetTimepointWeight(int timepoint, double weight) {
         this->timePointWeight[timepoint] = weight;
     }
-    virtual double* GetTimepointsWeights(void) {
+    virtual double* GetTimepointsWeights() {
         return this->timePointWeight;
     }
-    virtual nifti_image* GetReferenceImage(void) {
+    virtual nifti_image* GetReferenceImage() {
         return this->referenceImage;
     }
-    virtual int* GetReferenceMask(void) {
+    virtual int* GetReferenceMask() {
         return this->referenceMask;
     }
 
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index abefc7f5..92a37b35 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -149,18 +149,13 @@ void GetMindImageDescriptor(const nifti_image *inputImage,
                             const int *mask,
                             const int& descriptorOffset,
                             const int& currentTimepoint) {
-    if (inputImage->datatype != mindImage->datatype) {
-        reg_print_fct_error("reg_mind::GetMindImageDescriptor");
-        reg_print_msg_error("The input image and the MIND image must have the same datatype");
-        reg_exit();
-    }
+    if (inputImage->datatype != mindImage->datatype)
+        NR_FATAL_ERROR("The input image and the MIND image must have the same datatype");
     std::visit([&](auto&& imgType) {
         using ImgType = std::decay_t<decltype(imgType)>;
         GetMindImageDescriptorCore<ImgType>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
     }, NiftiImage::getFloatingDataType(inputImage));
-#ifndef NDEBUG
-    reg_print_fct_debug("GetMindImageDescriptor()");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class DataType>
@@ -278,18 +273,13 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                const int *mask,
                                const int& descriptorOffset,
                                const int& currentTimepoint) {
-    if (inputImage->datatype != mindSscImage->datatype) {
-        reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor");
-        reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!");
-        reg_exit();
-    }
+    if (inputImage->datatype != mindSscImage->datatype)
+        NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!");
     std::visit([&](auto&& imgType) {
         using ImgType = std::decay_t<decltype(imgType)>;
         GetMindSscImageDescriptorCore<ImgType>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
     }, NiftiImage::getFloatingDataType(inputImage));
-#ifndef NDEBUG
-    reg_print_fct_debug("GetMindSscImageDescriptor()");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_mind::reg_mind(): reg_ssd() {
@@ -299,9 +289,7 @@ reg_mind::reg_mind(): reg_ssd() {
     this->warpedReferenceImageDescriptor = nullptr;
     this->mindType = MIND_TYPE;
     this->descriptorOffset = 1;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_mind constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_mind::~reg_mind() {
@@ -369,10 +357,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
                                                        this->warpedFloatingImageDescriptor->nbyper);
 
     if (this->isSymmetric) {
-        if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1) {
-            reg_print_msg_error("reg_mind does not support multiple time point image");
-            reg_exit();
-        }
+        if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1)
+            NR_FATAL_ERROR("reg_mind does not support multiple time point image");
         // Initialise the floating descriptor
         this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage);
         this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4;
@@ -396,13 +382,12 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
     }
 
 #ifndef NDEBUG
-    char text[255];
-    reg_print_msg_debug("reg_mind::InitialiseMeasure()");
-    sprintf(text, "Active time point:");
+    std::string msg = "Active time point:";
     for (int i = 0; i < this->referenceImageDescriptor->nt; ++i)
         if (this->timePointWeightDescriptor[i] > 0)
-            sprintf(text, "%s %i", text, i);
-    reg_print_msg_debug(text);
+            msg += " " + std::to_string(i);
+    NR_DEBUG(msg);
+    NR_FUNC_CALLED();
 #endif
 }
 /* *************************************************************** */
@@ -419,11 +404,8 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                  const int& referenceTimePoint,
                                  const int& mindType) {
     if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 &&
-        referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_mind::GetSimilarityMeasureValue");
-        reg_print_msg_error("The reference image descriptor is expected to be of floating precision type");
-        reg_exit();
-    }
+        referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The reference image descriptor is expected to be of floating precision type");
 
     double mind = 0;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -560,14 +542,10 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
 /* *************************************************************** */
 reg_mindssc::reg_mindssc(): reg_mind() {
     this->mindType = MINDSSC_TYPE;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_mindssc constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_mindssc::~reg_mindssc() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_mindssc destructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index 87a1ed0f..7ba015ae 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -178,9 +178,7 @@ void reg_mrf::Initialise()
    free(edgeWeightMatrix);
    free(index_neighbours);
    this->initialised = true;
-#ifndef NDEBUG
-   reg_print_msg_debug("reg_mrf::Initilisation done.");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 float* reg_mrf::GetDiscretisedMeasurePtr()
@@ -255,7 +253,7 @@ void reg_mrf::GetDiscretisedMeasure()
    //
    if (myfile.is_open()) {
        // ok, proceed with output
-       std::cout<<"OK - file opened"<<std::endl;
+       NR_COUT<<"OK - file opened"<<std::endl;
        for(int i=0;i<32388174;i++){
            myfile.read(buffer, sizeof(float));
            this->discretised_measures[i]=atof(buffer);
@@ -275,9 +273,7 @@ for(int i=0;i<32388174;i++){
 }
 */
 //DEBUG
- #ifndef NDEBUG
-   reg_print_msg_debug("reg_mrf::GetDiscretisedMeasure done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 void reg_mrf::GetOptimalLabel()
@@ -314,9 +310,7 @@ void reg_mrf::UpdateNodePositions()
          }
       }
    }
-#ifndef NDEBUG
-  reg_print_msg_debug("reg_mrf::Optimise done");
-#endif
+   NR_FUNC_CALLED();
 }
 /*****************************************************/
 void reg_mrf::Run()
@@ -565,9 +559,7 @@ void GetGraph_core2D(nifti_image* controlPointGridImage,
                      nifti_image *refImage,
                      int *mask)
 {
-   reg_print_fct_warn("GetGraph_core2D");
-   reg_print_msg_warn("No yet implemented");
-   reg_exit();
+   NR_ERROR("Not yet implemented");
 }
 /* *************************************************************** */
 void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
@@ -594,9 +586,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
                 );
          break;
       default:
-         reg_print_fct_error("reg_mrf::GetGraph");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
+         NR_FATAL_ERROR("Unsupported datatype");
       }
    } else {
       switch(this->referenceImage->datatype)
@@ -620,9 +610,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
                 );
          break;
       default:
-         reg_print_fct_error("reg_mrf::GetGraph");
-         reg_print_msg_error("Unsupported datatype");
-         reg_exit();
+         NR_FATAL_ERROR("Unsupported datatype");
       }
    }
 }
@@ -708,7 +696,6 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
    }
    //generate list of nodes ordered by tree depth
    std::sort(treeLevel,treeLevel+num_vertices);
-   //printf("max tree depth: %d, mincost: %f\n",treeLevel[num_vertices-1].first,mincost);
    for(int i=0;i<num_vertices;i++){
       orderedList[i]=treeLevel[i].second;
    }
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index b8ce5a55..db281f08 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -20,21 +20,16 @@ reg_nmi::reg_nmi(): reg_measure() {
     this->jointHistogramProBw = nullptr;
     this->jointHistogramLogBw = nullptr;
     this->entropyValuesBw = nullptr;
-
     for (int i = 0; i < 255; ++i) {
         this->referenceBinNumber[i] = 68;
         this->floatingBinNumber[i] = 68;
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_nmi::~reg_nmi() {
     this->DeallocateHistogram();
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi destructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_nmi::DeallocateHistogram() {
@@ -96,9 +91,7 @@ void reg_nmi::DeallocateHistogram() {
         free(this->entropyValuesBw);
     }
     this->entropyValuesBw = nullptr;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi::DeallocateHistogram called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_nmi::InitialiseMeasure(nifti_image *refImg,
@@ -164,14 +157,10 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
             }
         }
     }
-#ifndef NDEBUG
-    char text[255];
-    reg_print_msg_debug("reg_nmi::InitialiseMeasure()");
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-        reg_print_msg_debug(text);
-    }
-#endif
+
+    for (int i = 0; i < this->referenceImage->nt; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 static double GetBasisSplineValue(double x) {
@@ -221,11 +210,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
     // Iterate over all active time points
     for (int t = 0; t < referenceImage->nt; ++t) {
         if (timePointWeight[t] > 0) {
-#ifndef NDEBUG
-            char text[255];
-            sprintf(text, "Computing NMI for time point %i", t);
-            reg_print_msg_debug(text);
-#endif
+            NR_DEBUG("Computing NMI for time point " << t);
             // Define some pointers to the current histograms
             double *jointHistoProPtr = jointHistogramPro[t];
             double *jointHistoLogPtr = jointHistogramLog[t];
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index db71d20f..c25ef7e4 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -26,10 +26,7 @@ reg_optimiser<T>::reg_optimiser() {
     this->bestObjFunctionValue = 0;
     this->intOpt = nullptr;
     this->gradientBw = nullptr;
-
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser<T>::reg_optimiser() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -42,9 +39,7 @@ reg_optimiser<T>::~reg_optimiser() {
         free(this->bestDofBw);
         this->bestDofBw = nullptr;
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser<T>::~reg_optimiser() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -88,9 +83,7 @@ void reg_optimiser<T>::Initialise(size_t nvox,
     this->intOpt = intOpt;
     this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser<T>::Initialise called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -150,15 +143,10 @@ void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
         this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
         // Check if the update lead to an improvement of the objective function
-        if (this->currentObjFunctionValue > this->bestObjFunctionValue) {
-#ifndef NDEBUG
-            char text[255];
-            sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED",
-                    (int)this->currentIterationNumber,
-                    this->currentObjFunctionValue,
-                    currentLength);
-            reg_print_msg_debug(text);
-#endif
+        const bool isImproved = this->currentObjFunctionValue > this->bestObjFunctionValue;
+        NR_DEBUG("[" << this->currentIterationNumber << "] objective function: " << this->currentObjFunctionValue <<
+                 " | Increment " << currentLength << " | " << (isImproved ? "ACCEPTED" : "REJECTED"));
+        if (isImproved) {
             // Improvement - Save the new objective function value
             this->intOpt->UpdateBestObjFunctionValue();
             this->bestObjFunctionValue = this->currentObjFunctionValue;
@@ -170,14 +158,6 @@ void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
             // Save the current deformation parametrisation
             this->StoreCurrentDof();
         } else {
-#ifndef NDEBUG
-            char text[255];
-            sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED",
-                    (int)this->currentIterationNumber,
-                    this->currentObjFunctionValue,
-                    currentLength);
-            reg_print_msg_debug(text);
-#endif
             // No improvement - Decrease the step size
             currentLength *= 0.5;
         }
@@ -196,9 +176,7 @@ reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimis
     this->array1Bw = nullptr;
     this->array2 = nullptr;
     this->array2Bw = nullptr;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient<T>::reg_conjugateGradient() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -219,9 +197,7 @@ reg_conjugateGradient<T>::~reg_conjugateGradient() {
         free(this->array2Bw);
         this->array2Bw = nullptr;
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient<T>::~reg_conjugateGradient() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -252,9 +228,7 @@ void reg_conjugateGradient<T>::Initialise(size_t nvox,
         this->array2Bw = (T*)malloc(this->dofNumberBw * sizeof(T));
     }
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient<T>::Initialise called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template <class T>
@@ -278,9 +252,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
     T *array2PtrBw = this->array2Bw;
 
     if (this->firstCall) {
-#ifndef NDEBUG
-        reg_print_msg_debug("Conjugate gradient initialisation");
-#endif
+        NR_DEBUG("Conjugate gradient initialisation");
         // first conjugate gradient iteration
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -300,9 +272,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
         }
         this->firstCall = false;
     } else {
-#ifndef NDEBUG
-        reg_print_msg_debug("Conjugate gradient update");
-#endif
+        NR_DEBUG("Conjugate gradient update");
         double dgg = 0, gg = 0;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -427,19 +397,13 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
     for (size_t i = 0; i < this->stepToKeep; ++i) {
         this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T));
         this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T));
-        if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) {
-            reg_print_fct_error("reg_lbfgs<T>::Initialise");
-            reg_print_msg_error("Out of memory");
-            reg_exit();
-        }
+        if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr)
+            NR_FATAL_ERROR("Out of memory");
     }
     this->oldDof = (T*)malloc(this->dofNumber * sizeof(T));
     this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T));
-    if (this->oldDof == nullptr || this->oldGrad == nullptr) {
-        reg_print_fct_error("reg_lbfgs<T>::Initialise");
-        reg_print_msg_error("Out of memory");
-        reg_exit();
-    }
+    if (this->oldDof == nullptr || this->oldGrad == nullptr)
+        NR_FATAL_ERROR("Out of memory");
 }
 /* *************************************************************** */
 template <class T>
diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp
index 4560f990..ddd2a8aa 100644
--- a/reg-lib/cpu/_reg_polyAffine.cpp
+++ b/reg-lib/cpu/_reg_polyAffine.cpp
@@ -19,19 +19,14 @@ reg_polyAffine<T>::reg_polyAffine(int refTimePoint,int floTimePoint)
    : reg_base<T>::reg_base(refTimePoint,floTimePoint)
 {
    this->executableName=(char *)"NiftyReg PolyAffine";
-
-#ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_polyAffine constructor called\n");
-#endif
+   NR_FUNC_CALLED();
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 template <class T>
 reg_polyAffine<T>::~reg_polyAffine()
 {
-#ifndef NDEBUG
-   printf("[NiftyReg DEBUG] reg_polyAffine destructor called\n");
-#endif
+   NR_FUNC_CALLED();
 }
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index d881001b..a74772bf 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -104,12 +104,10 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
     // by the the log tensor components
     if (dtIndicies[0] != -1) {
 #ifndef NDEBUG
-        char text[255];
-        reg_print_msg_debug("DTI indices:");
-        sprintf(text, "Active time point:");
+        std::string msg = "DTI indices: Active time point:";
         for (unsigned i = 0; i < 6; i++)
-            sprintf(text, "%s %i", text, dtIndicies[i]);
-        reg_print_msg_debug(text);
+            msg += " " + std::to_string(dtIndicies[i]);
+        NR_DEBUG(msg);
 #endif
 
 #ifdef WIN32
@@ -122,9 +120,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
 
         *originalFloatingData = malloc(floatingImage->nvox * sizeof(DataType));
         memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType));
-#ifndef NDEBUG
-        reg_print_msg_debug("The floating image data has been copied");
-#endif
+        NR_DEBUG("The floating image data has been copied");
 
         /* As the tensor has 6 unique components that we need to worry about, read them out
       for the floating image. */
@@ -182,9 +178,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
 #ifdef _OPENMP
         omp_set_num_threads(max_thread_number);
 #endif
-#ifndef NDEBUG
-        reg_print_msg_debug("Tensors have been logged");
-#endif
+        NR_DEBUG("Tensors have been logged");
     }
 }
 /* *************************************************************** */
@@ -315,9 +309,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
             omp_set_num_threads(max_thread_number);
 #endif
         }
-#ifndef NDEBUG
-        reg_print_msg_debug("Exponentiated and rotated all voxels");
-#endif
+        NR_DEBUG("Exponentiated and rotated all voxels");
     }
 }
 /* *************************************************************** */
@@ -377,11 +369,7 @@ void ResampleImage3D(const nifti_image *floatingImage,
 
     // Iteration over the different volume along the 4th axis
     for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "3D resampling of volume number %zu", t);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG("3D resampling of volume number " << t);
 
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
         const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
@@ -560,11 +548,8 @@ void ResampleImage2D(const nifti_image *floatingImage,
 
     // Iteration over the different volume along the 4th axis
     for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "2D resampling of volume number %zu", t);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG("2D resampling of volume number " << t);
+
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
         const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
 
@@ -711,43 +696,26 @@ void reg_resampleImage(nifti_image *floatingImage,
                        const float& paddingValue,
                        const bool *dtiTimepoint,
                        const mat33 *jacMat) {
-    if (floatingImage->datatype != warpedImage->datatype) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The floating and warped image should have the same data type");
-        reg_exit();
-    }
-
-    if (floatingImage->nt != warpedImage->nt) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The floating and warped images have different dimension along the time axis");
-        reg_exit();
-    }
-    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
-        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The deformation field image is expected to be of type float or double");
-        reg_exit();
-    }
+    if (floatingImage->datatype != warpedImage->datatype)
+        NR_FATAL_ERROR("The floating and warped image should have the same data type");
+    if (floatingImage->nt != warpedImage->nt)
+        NR_FATAL_ERROR("The floating and warped images have different dimensions along the time axis");
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The deformation field image is expected to be of type float or double");
 
     // Define the DTI indices if required
     int dtIndicies[6];
     for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
     if (dtiTimepoint != nullptr) {
-        if (jacMat == nullptr) {
-            reg_print_fct_error("reg_resampleImage");
-            reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
-            reg_exit();
-        }
+        if (jacMat == nullptr)
+            NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided");
         int j = 0;
         for (int i = 0; i < floatingImage->nt; ++i) {
             if (dtiTimepoint[i])
                 dtIndicies[j++] = i;
         }
-        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) {
-            reg_print_fct_error("reg_resampleImage");
-            reg_print_msg_error("DTI resampling: Unexpected number of DTI components");
-            reg_exit();
-        }
+        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3))
+            NR_FATAL_ERROR("DTI resampling: Unexpected number of DTI components");
     }
 
     // a mask array is created if no mask is specified
@@ -812,9 +780,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
     void (*kernelCompFctPtr)(double, double *);
     switch (kernel) {
     case 0:
-        reg_print_fct_error("ResampleImage3D_PSF");
-        reg_print_msg_error("Not implemented for NN interpolation yet");
-        reg_exit();
+        NR_FATAL_ERROR("Not implemented for NN interpolation yet");
         kernel_size = 2;
         kernelCompFctPtr = &interpNearestNeighKernel;
         kernel_offset = 0;
@@ -838,9 +804,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
 
     // Iteration over the different volume along the 4th axis
     for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
-#ifndef NDEBUG
-        printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n", t);
-#endif
+        NR_DEBUG("3D resampling of volume number " << t);
 
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
         const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
@@ -873,7 +837,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
     shared(warpedVoxelNumber, mask, paddingValue,\
     a, b, c , warpedPlaneNumber, warpedLineNumber, floatingIntensity,\
     deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, floatingIJKMatrix,\
-    floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity,stderr)
+    floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity)
 #endif // _OPENMP
 */
         for (index = 0; index < warpedVoxelNumber; index++) {
@@ -903,7 +867,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                             psfWeight = static_cast<float>(interpWindowedSincKernel_Samp(shiftSamp[0], shiftall) *
                                                            interpWindowedSincKernel_Samp(shiftSamp[1], shiftall) *
                                                            interpWindowedSincKernel_Samp(shiftSamp[2], shiftall));
-                            //  std::cout<<shiftSamp[0]<<", "<<shiftSamp[1]<<", "<<shiftSamp[2]<<", "<<psfWeight<<std::endl;
+                            //  NR_COUT<<shiftSamp[0]<<", "<<shiftSamp[1]<<", "<<shiftSamp[2]<<", "<<psfWeight<<std::endl;
 
                             // Interpolate (trilinearly) the deformation field for non-integer positions
                             float scalling = 1.0f;
@@ -1111,9 +1075,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
     void (*kernelCompFctPtr)(double, double *);
     switch (kernel) {
     case 0:
-        reg_print_fct_error("ResampleImage3D_PSF");
-        reg_print_msg_error("Not implemented for NN interpolation yet");
-        reg_exit();
+        NR_FATAL_ERROR("Not implemented for NN interpolation yet");
         kernel_size = 2;
         kernelCompFctPtr = &interpNearestNeighKernel;
         kernel_offset = 0;
@@ -1137,11 +1099,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
 
     // Iteration over the different volume along the 4th axis
     for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
-#ifndef NDEBUG
-        char text[255];
-        sprintf(text, "PSF 3D resampling of volume number %zu\n", t);
-        reg_print_msg_debug(text);
-#endif
+        NR_DEBUG("PSF 3D resampling of volume number " << t);
 
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
         const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber];
@@ -1482,9 +1440,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
     // The deformation field contains the position in the real world
     if (deformationFieldImage->nu > 2) {
         if (algorithm == 2) {
-#ifndef NDEBUG
-            std::cout << "Running ResampleImage3D_PSF_Sinc 1" << std::endl;
-#endif
+            NR_DEBUG("Running ResampleImage3D_PSF_Sinc 1");
             ResampleImage3D_PSF_Sinc<FloatingType, FieldType>(floatingImage,
                                                               deformationFieldImage,
                                                               warpedImage,
@@ -1492,9 +1448,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                                                               paddingValue,
                                                               interp);
         } else {
-#ifndef NDEBUG
-            std::cout << "Running ResampleImage3D_PSF" << std::endl;
-#endif
+            NR_DEBUG("Running ResampleImage3D_PSF");
             ResampleImage3D_PSF<FloatingType, FieldType>(floatingImage,
                                                          deformationFieldImage,
                                                          warpedImage,
@@ -1505,9 +1459,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                                                          algorithm);
         }
     } else {
-        reg_print_fct_error("reg_resampleImage_PSF");
-        reg_print_msg_error("Not implemented for 2D images yet");
-        reg_exit();
+        NR_FATAL_ERROR("Not implemented for 2D images yet");
     }
 }
 /* *************************************************************** */
@@ -1519,22 +1471,12 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            const float& paddingValue,
                            const mat33 *jacMat,
                            const char& algorithm) {
-    if (floatingImage->datatype != warpedImage->datatype) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The floating and warped image should have the same data type");
-        reg_exit();
-    }
-    if (floatingImage->nt != warpedImage->nt) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The floating and warped images have different dimension along the time axis");
-        reg_exit();
-    }
-    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
-        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_resampleImage");
-        reg_print_msg_error("The deformation field image is expected to be of type float or double");
-        reg_exit();
-    }
+    if (floatingImage->datatype != warpedImage->datatype)
+        NR_FATAL_ERROR("The floating and warped image should have the same data type");
+    if (floatingImage->nt != warpedImage->nt)
+        NR_FATAL_ERROR("The floating and warped images have different dimension along the time axis");
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The deformation field image is expected to be of type float or double");
 
     // a mask array is created if no mask is specified
     bool MrPropreRules = false;
@@ -1953,23 +1895,12 @@ void reg_resampleGradient(const nifti_image *floatingImage,
                           const nifti_image *deformationField,
                           const int& interp,
                           const float& paddingValue) {
-    if (interp != 1) {
-        reg_print_fct_error("reg_resampleGradient");
-        reg_print_msg_error("Only linear interpolation is supported");
-        reg_exit();
-    }
-    if (floatingImage->datatype != warpedImage->datatype ||
-        floatingImage->datatype != deformationField->datatype) {
-        reg_print_fct_error("reg_resampleGradient");
-        reg_print_msg_error("Input images are expected to have the same type");
-        reg_exit();
-    }
-    if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 &&
-        floatingImage->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_resampleGradient");
-        reg_print_msg_error("Input images are expected to be of type float or double");
-        reg_exit();
-    }
+    if (interp != 1)
+        NR_FATAL_ERROR("Only linear interpolation is supported");
+    if (floatingImage->datatype != warpedImage->datatype || floatingImage->datatype != deformationField->datatype)
+        NR_FATAL_ERROR("Input images are expected to have the same type");
+    if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 && floatingImage->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("Input images are expected to be of type float or double");
 
     std::visit([&](auto&& floImgDataType) {
         using FloImgDataType = std::decay_t<decltype(floImgDataType)>;
@@ -1994,11 +1925,8 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
                             const int *mask,
                             const float& paddingValue,
                             const int& activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
-        reg_print_fct_error("TrilinearImageGradient");
-        reg_print_msg_error("The specified active timepoint is not defined in the floating image");
-        reg_exit();
-    }
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
@@ -2024,11 +1952,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "3D linear gradient computation of volume number %i", activeTimepoint);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("3D linear gradient computation of volume number " << activeTimepoint);
 
     int previous[3], a, b, c, X, Y, Z;
     FieldType position[3], xBasis[2], yBasis[2], zBasis[2];
@@ -2170,11 +2094,8 @@ void BilinearImageGradient(const nifti_image *floatingImage,
                            const int *mask,
                            const float& paddingValue,
                            const int& activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
-        reg_print_fct_error("TrilinearImageGradient");
-        reg_print_msg_error("The specified active timepoint is not defined in the floating image");
-        reg_exit();
-    }
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
@@ -2198,11 +2119,7 @@ void BilinearImageGradient(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "2D linear gradient computation of volume number %i", activeTimepoint);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("2D linear gradient computation of volume number " << activeTimepoint);
 
     FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2];
     FieldType deriv[2];
@@ -2287,11 +2204,8 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
                                 const int *mask,
                                 const float& paddingValue,
                                 const int& activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
-        reg_print_fct_error("TrilinearImageGradient");
-        reg_print_msg_error("The specified active timepoint is not defined in the floating image");
-        reg_exit();
-    }
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
@@ -2317,11 +2231,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "3D cubic spline gradient computation of volume number %i", activeTimepoint);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimepoint);
 
     int previous[3], c, Z, b, Y, a;
 
@@ -2435,11 +2345,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
                                 const int *mask,
                                 const float& paddingValue,
                                 const int& activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) {
-        reg_print_fct_error("TrilinearImageGradient");
-        reg_print_msg_error("The specified active timepoint is not defined in the floating image");
-        reg_exit();
-    }
+    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
@@ -2463,11 +2370,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "2D cubic spline gradient computation of volume number %i", activeTimepoint);
-    reg_print_msg_debug(text);
-#endif
+    NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimepoint);
+
     int previous[2], b, Y, a;
     double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative;
     FieldType coeff, position[3], world[3], grad[2];
@@ -2613,18 +2517,10 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           const bool *dtiTimepoint,
                           const mat33 *jacMat,
                           const nifti_image *warpedImage) {
-    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 &&
-        deformationField->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_getImageGradient");
-        reg_print_msg_error("The deformation field image is expected to be of type float or double");
-        reg_exit();
-    }
-    if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 &&
-        warpedGradient->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_getImageGradient");
-        reg_print_msg_error("The warped gradient image is expected to be of type float or double");
-        reg_exit();
-    }
+    if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The deformation field image is expected to be of type float or double");
+    if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 && warpedGradient->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The warped gradient image is expected to be of type float or double");
 
     // a mask array is created if no mask is specified
     bool MrPropreRule = false;
@@ -2638,21 +2534,15 @@ void reg_getImageGradient(nifti_image *floatingImage,
     int dtIndicies[6];
     for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
     if (dtiTimepoint != nullptr) {
-        if (jacMat == nullptr) {
-            reg_print_fct_error("reg_getImageGradient");
-            reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided");
-            reg_exit();
-        }
+        if (jacMat == nullptr)
+            NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided");
         int j = 0;
         for (int i = 0; i < floatingImage->nt; ++i) {
             if (dtiTimepoint[i])
                 dtIndicies[j++] = i;
         }
-        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) {
-            reg_print_fct_error("reg_getImageGradient");
-            reg_print_msg_error("DTI resampling: Unexpected number of DTI components");
-            reg_exit();
-        }
+        if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3))
+            NR_FATAL_ERROR("DTI resampling: Unexpected number of DTI components");
     }
 
     std::visit([&](auto&& defFieldDataType, auto&& floImgDataType, auto&& warpedGradDataType) {
@@ -2742,16 +2632,10 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
                                   const int *mask,
                                   const float& paddingValue,
                                   const int& timepoint) {
-    if (img->datatype != gradImg->datatype) {
-        reg_print_fct_error("reg_getImageGradient_symDiff");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_getImageGradient_symDiff");
-        reg_print_msg_error("Input images are expected to be of floating precision type");
-        reg_exit();
-    }
+    if (img->datatype != gradImg->datatype)
+        NR_FATAL_ERROR("Input images are expected to be of the same type");
+    if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("Input images are expected to be of floating precision type");
 
     std::visit([&](auto&& imgDataType) {
         using ImgDataType = std::decay_t<decltype(imgDataType)>;
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index e4e88ac8..f69c4cf4 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -13,7 +13,7 @@
 
 #pragma once
 
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 /** @brief This function resample a floating image into the space of a reference/warped image.
  * The deformation is provided by a 4D nifti image which is in the space of the reference image.
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 5fc84cb8..78c9fe54 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -18,9 +18,7 @@
 /* *************************************************************** */
 reg_ssd::reg_ssd(): reg_measure() {
     memset(this->normaliseTimePoint, 0, 255 * sizeof(bool));
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_ssd constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_ssd::InitialiseMeasure(nifti_image *refImg,
@@ -48,11 +46,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
                                    voxelBasedGradBw);
 
     // Check that the input images have the same number of time point
-    if (this->referenceImage->nt != this->floatingImage->nt) {
-        reg_print_fct_error("reg_ssd::InitialiseMeasure");
-        reg_print_msg_error("This number of time point should be the same for both input images");
-        reg_exit();
-    }
+    if (this->referenceImage->nt != this->floatingImage->nt)
+        NR_FATAL_ERROR("This number of time point should be the same for both input images");
     // Input images are normalised between 0 and 1
     for (int i = 0; i < this->referenceImage->nt; ++i) {
         if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
@@ -76,20 +71,17 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
         }
     }
 #ifdef MRF_USE_SAD
-    reg_print_msg_warn("SAD is used instead of SSD");
+    NR_WARN("SAD is used instead of SSD");
 #endif
 #ifndef NDEBUG
-    char text[255];
-    reg_print_msg_debug("reg_ssd::InitialiseMeasure()");
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]);
-        reg_print_msg_debug(text);
-    }
-    sprintf(text, "Normalize time point:");
+    for (int i = 0; i < this->referenceImage->nt; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    std::string msg = "Normalize time point:";
     for (int i = 0; i < this->referenceImage->nt; ++i)
         if (this->normaliseTimePoint[i])
-            sprintf(text, "%s %i", text, i);
-    reg_print_msg_debug(text);
+            msg += " " + std::to_string(i);
+    NR_DEBUG(msg);
+    NR_FUNC_CALLED();
 #endif
 }
 /* *************************************************************** */
@@ -835,9 +827,7 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
                                                             this->warpedImage,
                                                             this->referenceMask);
         } else {
-            reg_print_fct_error("reg_ssd::GetDiscretisedValue");
-            reg_print_msg_error("Not implemented in 2D yet");
-            reg_exit();
+            NR_FATAL_ERROR("Not implemented in 2D yet");
         }
     }, NiftiImage::getFloatingDataType(this->referenceImage));
 }
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp
index b43b857c..186349a2 100644
--- a/reg-lib/cpu/_reg_thinPlateSpline.cpp
+++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp
@@ -140,19 +140,12 @@ T reg_tps<T>::GetTPSweight(T dist)
 template <class T>
 void reg_tps<T>::InitialiseTPS()
 {
-   size_t matrix_side=this->number + this->dim + 1;
-   T *matrixL=(T *)calloc(matrix_side*matrix_side,sizeof(T));
+   const size_t matrixSide=this->number + this->dim + 1;
+   T *matrixL=(T*)calloc(matrixSide*matrixSide,sizeof(T));
    if(matrixL==nullptr)
-   {
-      char text[255];
-      sprintf(text,"Size should be %g GB (%i x %i)",
-              (T)(matrix_side*matrix_side)*sizeof(T)/1000000000.f,
-              (int)matrix_side,(int)matrix_side);
-      reg_print_fct_error("reg_tps<T>::InitialiseTPS()");
-      reg_print_msg_error("Calloc failed, the TPS distance matrix is too large");
-      reg_print_msg_error(text);
-      reg_exit();
-   }
+      NR_FATAL_ERROR("Calloc failed, the TPS distance matrix is too large! Size should be " +
+                     std::to_string(matrixSide * matrixSide * sizeof(T) / 1000000000.f) + " GB (" +
+                     std::to_string(matrixSide) + " x " + std::to_string(matrixSide) + ")");
 
    // Distance matrix is computed
    double a=0.;
@@ -163,42 +156,42 @@ void reg_tps<T>::InitialiseTPS()
          T distance = this->GetTPSEuclideanDistance(i,j);
          a += distance * 2.;
          distance = this->GetTPSweight(distance);
-         matrixL[i*matrix_side+j]=matrixL[j*matrix_side+i]=distance;
+         matrixL[i*matrixSide+j]=matrixL[j*matrixSide+i]=distance;
       }
    }
    a/=(double)(this->number*this->number);
    a=(double)this->approxInter*a*a;
    for(size_t i=0; i<this->number; ++i)
    {
-      matrixL[i*matrix_side+i]=a;
+      matrixL[i*matrixSide+i]=a;
    }
    for(size_t i=0; i<this->number; ++i)
    {
-      matrixL[i*matrix_side+this->number]=matrixL[(this->number)*matrix_side+i]=1;
-      matrixL[i*matrix_side+this->number+1]=matrixL[(this->number+1)*matrix_side+i]=this->positionX[i];
-      matrixL[i*matrix_side+this->number+2]=matrixL[(this->number+2)*matrix_side+i]=this->positionY[i];
+      matrixL[i*matrixSide+this->number]=matrixL[(this->number)*matrixSide+i]=1;
+      matrixL[i*matrixSide+this->number+1]=matrixL[(this->number+1)*matrixSide+i]=this->positionX[i];
+      matrixL[i*matrixSide+this->number+2]=matrixL[(this->number+2)*matrixSide+i]=this->positionY[i];
       if(this->dim==3)
-         matrixL[i*matrix_side+this->number+3]=matrixL[(this->number+3)*matrix_side+i]=this->positionZ[i];
+         matrixL[i*matrixSide+this->number+3]=matrixL[(this->number+3)*matrixSide+i]=this->positionZ[i];
 
    }
-   for(size_t i=this->number; i<matrix_side; ++i)
+   for(size_t i=this->number; i<matrixSide; ++i)
    {
-      for(size_t j=this->number; j<matrix_side; ++j)
+      for(size_t j=this->number; j<matrixSide; ++j)
       {
-         matrixL[i*matrix_side+j]=0;
+         matrixL[i*matrixSide+j]=0;
       }
    }
 
    // Run the LU decomposition
-   size_t *index=(size_t *)calloc(matrix_side,sizeof(size_t));
-   reg_LUdecomposition<T>(matrixL, matrix_side, index);
+   size_t *index=(size_t *)calloc(matrixSide,sizeof(size_t));
+   reg_LUdecomposition<T>(matrixL, matrixSide, index);
 
    // Perform the multiplications
-   reg_matrixInvertMultiply<T>(matrixL, matrix_side, index, this->coefficientX);
-   reg_matrixInvertMultiply<T>(matrixL, matrix_side, index, this->coefficientY);
+   reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientX);
+   reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientY);
    if(this->dim==3)
    {
-      reg_matrixInvertMultiply<T>(matrixL, matrix_side, index, this->coefficientZ);
+      reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientZ);
    }
 
    free(index);
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index aea666bb..35bfebd1 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -203,9 +203,7 @@ void reg_intensityRescale(nifti_image *image,
         reg_intensityRescale_core<double>(image, timepoint, newMin, newMax);
         break;
     default:
-        reg_print_fct_error("reg_intensityRescale");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -248,9 +246,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) {
         reg_tools_removeSCLInfo<double>(image);
         break;
     default:
-        reg_print_fct_error("reg_tools_removeSCLInfo");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -330,9 +326,7 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) {
         reg_thresholdImage<T, double>(image, lowThr, upThr);
         break;
     default:
-        reg_print_fct_error("reg_thresholdImage");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 template void reg_thresholdImage<float>(nifti_image*, float, float);
@@ -388,23 +382,15 @@ void reg_tools_changeDatatype(nifti_image *image, int type) {
     } else {
         if (sizeof(NewType) == sizeof(unsigned char)) {
             image->datatype = NIFTI_TYPE_UINT8;
-#ifndef NDEBUG
-            reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8");
-#endif
+            NR_DEBUG("new datatype is NIFTI_TYPE_UINT8");
         } else if (sizeof(NewType) == sizeof(float)) {
             image->datatype = NIFTI_TYPE_FLOAT32;
-#ifndef NDEBUG
-            reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32");
-#endif
+            NR_DEBUG("new datatype is NIFTI_TYPE_FLOAT32");
         } else if (sizeof(NewType) == sizeof(double)) {
             image->datatype = NIFTI_TYPE_FLOAT64;
-#ifndef NDEBUG
-            reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64");
-#endif
+            NR_DEBUG("new datatype is NIFTI_TYPE_FLOAT64");
         } else {
-            reg_print_fct_error("reg_tools_changeDatatype");
-            reg_print_msg_error("Only change to unsigned char, float or double are supported");
-            reg_exit();
+            NR_FATAL_ERROR("Only change to unsigned char, float or double are supported");
         }
     }
     free(image->data);
@@ -445,9 +431,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) {
         reg_tools_changeDatatype<NewType, double>(image, type);
         break;
     default:
-        reg_print_fct_error("reg_tools_changeDatatype");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 template void reg_tools_changeDatatype<unsigned char>(nifti_image*, int);
@@ -473,9 +457,7 @@ struct Operation {
         case Type::Divide:
             return lhs / rhs;
         default:
-            reg_print_fct_error("Operation::operator()");
-            reg_print_msg_error("Unsupported operation");
-            reg_exit();
+            NR_FATAL_ERROR("Unsupported operation");
             return 0;
         }
     }
@@ -516,16 +498,10 @@ void reg_tools_operationImageToImage(const nifti_image *img1,
 void reg_tools_addImageToImage(const nifti_image *img1,
                                const nifti_image *img2,
                                nifti_image *res) {
-    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_addImageToImage");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_addImageToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype)
+        NR_FATAL_ERROR("Input images are expected to be of the same type");
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Add);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -553,25 +529,17 @@ void reg_tools_addImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_addImageToImage");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 /* *************************************************************** */
 void reg_tools_subtractImageFromImage(const nifti_image *img1,
                                       const nifti_image *img2,
                                       nifti_image *res) {
-    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_subtractImageFromImage");
-        reg_print_msg_error("Input images are expected to be of the same type");
-        reg_exit();
-    }
-    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_subtractImageFromImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype)
+        NR_FATAL_ERROR("Input images are expected to be of the same type");
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Subtract);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -599,25 +567,17 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1,
         reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_subtractImageFromImage");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 /* *************************************************************** */
 void reg_tools_multiplyImageToImage(const nifti_image *img1,
                                     const nifti_image *img2,
                                     nifti_image *res) {
-    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_multiplyImageToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
-    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_multiplyImageToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype)
+        NR_FATAL_ERROR("Input images are expected to be of the same type");
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Multiply);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -645,25 +605,17 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_multiplyImageToImage");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 /* *************************************************************** */
 void reg_tools_divideImageToImage(const nifti_image *img1,
                                   const nifti_image *img2,
                                   nifti_image *res) {
-    if (img1->datatype != res->datatype || img2->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_divideImageToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
-    if (img1->nvox != res->nvox || img2->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_divideImageToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img1->datatype != res->datatype || img2->datatype != res->datatype)
+        NR_FATAL_ERROR("Input images are expected to be of the same type");
+    if (img1->nvox != res->nvox || img2->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Divide);
     switch (img1->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -691,9 +643,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
         reg_tools_operationImageToImage<double>(img1, img2, res, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_divideImageToImage");
-        reg_print_msg_error("Unsupported datatype");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported datatype");
     }
 }
 /* *************************************************************** */
@@ -729,16 +679,10 @@ void reg_tools_operationValueToImage(const nifti_image *img,
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *res,
                                const double& val) {
-    if (img->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_addValueToImage");
-        reg_print_msg_error("Input and output image do not have the same data type");
-        reg_exit();
-    }
-    if (img->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_addValueToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img->datatype != res->datatype)
+        NR_FATAL_ERROR("Input and output image are expected to be of the same type");
+    if (img->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Add);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -766,25 +710,17 @@ void reg_tools_addValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_addValueToImage");
-        reg_print_msg_error("Image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("Image data type is not supported");
     }
 }
 /* *************************************************************** */
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *res,
                                       const double& val) {
-    if (img->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_subtractValueFromImage");
-        reg_print_msg_error("Input and output image do not have the same data type");
-        reg_exit();
-    }
-    if (img->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_subtractValueFromImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img->datatype != res->datatype)
+        NR_FATAL_ERROR("Input and output image are expected to be of the same type");
+    if (img->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Subtract);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -812,25 +748,17 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
         reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_subtractValueFromImage");
-        reg_print_msg_error("Image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("Image data type is not supported");
     }
 }
 /* *************************************************************** */
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *res,
                                     const double& val) {
-    if (img->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_multiplyValueToImage");
-        reg_print_msg_error("Input and output image do not have the same data type");
-        reg_exit();
-    }
-    if (img->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_multiplyValueToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img->datatype != res->datatype)
+        NR_FATAL_ERROR("Input and output image are expected to be of the same type");
+    if (img->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Multiply);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -858,25 +786,17 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_multiplyValueToImage");
-        reg_print_msg_error("Image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("Image data type is not supported");
     }
 }
 /* *************************************************************** */
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *res,
                                   const double& val) {
-    if (img->datatype != res->datatype) {
-        reg_print_fct_error("reg_tools_divideValueToImage");
-        reg_print_msg_error("Input and output image do not have the same data type");
-        reg_exit();
-    }
-    if (img->nvox != res->nvox) {
-        reg_print_fct_error("reg_tools_divideValueToImage");
-        reg_print_msg_error("Input images are expected to have the same size");
-        reg_exit();
-    }
+    if (img->datatype != res->datatype)
+        NR_FATAL_ERROR("Input and output image are expected to be of the same type");
+    if (img->nvox != res->nvox)
+        NR_FATAL_ERROR("Input images are expected to have the same size");
     Operation operation(Operation::Type::Divide);
     switch (img->datatype) {
     case NIFTI_TYPE_UINT8:
@@ -904,9 +824,7 @@ void reg_tools_divideValueToImage(const nifti_image *img,
         reg_tools_operationValueToImage<double>(img, res, val, operation);
         break;
     default:
-        reg_print_fct_error("reg_tools_divideValueToImage");
-        reg_print_msg_error("Image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("Image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -917,11 +835,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const int *mask,
                                  const bool *timePoint,
                                  const bool *axis) {
-    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) {
-        reg_print_fct_error("reg_tools_kernelConvolution");
-        reg_print_msg_error("This function does not support images with dimension > 2048");
-        reg_exit();
-    }
+    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
+        NR_FATAL_ERROR("This function does not support images with dimension > 2048");
 #ifdef WIN32
     long index;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3);
@@ -968,9 +883,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         // Spline kernel
                         radius = static_cast<int>(temp * 2.0f);
                     } else {
-                        reg_print_fct_error("reg_tools_kernelConvolution");
-                        reg_print_msg_error("Unknown kernel type");
-                        reg_exit();
+                        NR_FATAL_ERROR("Unknown kernel type");
                     }
                     if (radius > 0) {
                         // Allocate the kernel
@@ -1010,11 +923,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         }
                         // No kernel is required for the mean filtering
                         // No need for kernel normalisation as this is handle by the density function
-#ifndef NDEBUG
-                        char text[255];
-                        sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum);
-                        reg_print_msg_debug(text);
-#endif
+                        NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
+
                         int planeNumber, planeIndex, lineOffset;
                         int lineIndex, shiftPre, shiftPst, k;
                         switch (n) {
@@ -1213,11 +1123,8 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                                            float varianceZ,
                                            int *mask,
                                            bool *timePoint) {
-    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) {
-        reg_print_fct_error("reg_tools_labelKernelConvolution_core");
-        reg_print_msg_error("This function does not support images with dimension > 2048");
-        reg_exit();
-    }
+    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
+        NR_FATAL_ERROR("This function does not support images with dimension > 2048");
 #ifdef WIN32
     long index;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3);
@@ -1397,9 +1304,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
         reg_tools_labelKernelConvolution_core<double>(image, varianceX, varianceY, varianceZ, mask, timePoint);
         break;
     default:
-        reg_print_fct_error("reg_tools_labelKernelConvolution");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -1409,11 +1314,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const int *mask,
                                  const bool *timePoint,
                                  const bool *axis) {
-    if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) {
-        reg_print_fct_error("reg_tools_kernelConvolution");
-        reg_print_msg_error("The image is expected to be of floating precision type");
-        reg_exit();
-    }
+    if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64)
+        NR_FATAL_ERROR("The image is expected to be of floating precision type");
 
     if (image->nt <= 0) image->nt = image->dim[4] = 1;
     if (image->nu <= 0) image->nu = image->dim[5] = 1;
@@ -1597,9 +1499,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
         reg_downsampleImage<PrecisionType, double>(image, type, downsampleAxis);
         break;
     default:
-        reg_print_fct_error("reg_downsampleImage");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 template void reg_downsampleImage<float>(nifti_image*, int, bool*);
@@ -1641,9 +1541,7 @@ void reg_tools_binarise_image(nifti_image *image) {
         reg_tools_binarise_image<double>(image);
         break;
     default:
-        reg_print_fct_error("reg_tools_binarise_image");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -1681,9 +1579,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) {
         reg_tools_binarise_image<double>(image, threshold);
         break;
     default:
-        reg_print_fct_error("reg_tools_binarise_image");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -1721,9 +1617,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) {
         reg_tools_binaryImage2int<double>(image, array);
         break;
     default:
-        reg_print_fct_error("reg_tools_binaryImage2int");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -1784,9 +1678,8 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_getMeanRMS<AType, double>(imageA, imageB);
     default:
-        reg_print_fct_error("reg_tools_getMeanRMS");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -1809,9 +1702,8 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_getMeanRMS<double>(imageA, imageB);
     default:
-        reg_print_fct_error("reg_tools_getMeanRMS");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -1923,25 +1815,18 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_nanMask_image<ImageType, double>(image, maskImage, outputImage);
     default:
-        reg_print_fct_error("reg_tools_nanMask_image");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
 int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) {
     // Check dimension
-    if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) {
-        reg_print_fct_error("reg_tools_nanMask_image");
-        reg_print_msg_error("Input images have different size");
-        reg_exit();
-    }
+    if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox)
+        NR_FATAL_ERROR("Input images have different size");
     // Check output data type
-    if (image->datatype != outputImage->datatype) {
-        reg_print_fct_error("reg_tools_nanMask_image");
-        reg_print_msg_error("Input and output images have different data type");
-        reg_exit();
-    }
+    if (image->datatype != outputImage->datatype)
+        NR_FATAL_ERROR("Input and output images have different data type");
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
         return reg_tools_nanMask_image<unsigned char>(image, maskImage, outputImage);
@@ -1960,9 +1845,8 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_nanMask_image<double>(image, maskImage, outputImage);
     default:
-        reg_print_fct_error("reg_tools_nanMask_image");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -1987,16 +1871,15 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) {
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_removeNanFromMask_core<double>(image, mask);
     default:
-        reg_print_fct_error("reg_tools_removeNanFromMask");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
 template <class DataType>
 DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) {
     if (timepoint < -1 || timepoint >= image->nt)
-        reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists");
+        NR_FATAL_ERROR("The required time point does not exist");
 
     const DataType *imgPtr = static_cast<DataType*>(image->data);
     DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::lowest();
@@ -2037,9 +1920,8 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) {
     case NIFTI_TYPE_FLOAT64:
         return (float)reg_tools_getMinMaxValue<double>(image, timepoint);
     default:
-        reg_print_fct_error("reg_tools_getMinValue");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2063,9 +1945,8 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
     case NIFTI_TYPE_FLOAT64:
         return (float)reg_tools_getMinMaxValue<double>(image, timepoint, false);
     default:
-        reg_print_fct_error("reg_tools_getMaxValue");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2102,9 +1983,8 @@ float reg_tools_getMeanValue(const nifti_image *image) {
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_getMeanValue<double>(image);
     default:
-        reg_print_fct_error("reg_tools_getMeanValue");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2142,9 +2022,8 @@ float reg_tools_getSTDValue(const nifti_image *image) {
     case NIFTI_TYPE_FLOAT64:
         return reg_tools_getSTDValue<double>(image);
     default:
-        reg_print_fct_error("reg_tools_getSTDValue");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2223,9 +2102,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
         reg_flipAxis<double>(image, outputArray, cmd);
         break;
     default:
-        reg_print_fct_error("reg_flipAxis");
-        reg_print_msg_error("The image data type is not supported");
-        reg_exit();
+        NR_FATAL_ERROR("The image data type is not supported");
     }
 }
 /* *************************************************************** */
@@ -2322,9 +2199,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) {
             reg_getDisplacementFromDeformation_3D<float>(field);
             break;
         default:
-            reg_print_fct_error("reg_getDisplacementFromDeformation");
-            reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
         }
     } else if (field->datatype == NIFTI_TYPE_FLOAT64) {
         switch (field->nu) {
@@ -2335,14 +2210,10 @@ int reg_getDisplacementFromDeformation(nifti_image *field) {
             reg_getDisplacementFromDeformation_3D<double>(field);
             break;
         default:
-            reg_print_fct_error("reg_getDisplacementFromDeformation");
-            reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for 5D image with 2 or 3 components in the fifth dimension");
         }
     } else {
-        reg_print_fct_error("reg_getDisplacementFromDeformation");
-        reg_print_msg_error("Only single or double floating precision have been implemented");
-        reg_exit();
+        NR_FATAL_ERROR("Only single or double floating precision have been implemented");
     }
     field->intent_code = NIFTI_INTENT_VECTOR;
     memset(field->intent_name, 0, 16);
@@ -2447,9 +2318,7 @@ int reg_getDeformationFromDisplacement(nifti_image *field) {
             reg_getDeformationFromDisplacement_3D<float>(field);
             break;
         default:
-            reg_print_fct_error("reg_getDeformationFromDisplacement");
-            reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
         }
     } else if (field->datatype == NIFTI_TYPE_FLOAT64) {
         switch (field->nu) {
@@ -2460,14 +2329,10 @@ int reg_getDeformationFromDisplacement(nifti_image *field) {
             reg_getDeformationFromDisplacement_3D<double>(field);
             break;
         default:
-            reg_print_fct_error("reg_getDeformationFromDisplacement");
-            reg_print_msg_error("Only implemented for 2 or 3D deformation fields");
-            reg_exit();
+            NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
         }
     } else {
-        reg_print_fct_error("reg_getDeformationFromDisplacement");
-        reg_print_msg_error("Only single or double floating precision have been implemented");
-        reg_exit();
+        NR_FATAL_ERROR("Only single or double floating precision have been implemented");
     }
 
     field->intent_code = NIFTI_INTENT_VECTOR;
@@ -2506,11 +2371,8 @@ void reg_setGradientToZero(nifti_image *image,
                            bool yAxis,
                            bool zAxis = false) {
     // Ensure that the specified image is a 5D image
-    if (image->ndim != 5) {
-        reg_print_fct_error("reg_setGradientToZero");
-        reg_print_msg_error("Input image is expected to be a 5D image");
-        reg_exit();
-    }
+    if (image->ndim != 5)
+        NR_FATAL_ERROR("Input image is expected to be a 5D image");
     switch (image->datatype) {
     case NIFTI_TYPE_FLOAT32:
         reg_setGradientToZero_core<float>(image, xAxis, yAxis, zAxis);
@@ -2519,9 +2381,7 @@ void reg_setGradientToZero(nifti_image *image,
         reg_setGradientToZero_core<double>(image, xAxis, yAxis, zAxis);
         break;
     default:
-        reg_print_fct_error("reg_setGradientToZero");
-        reg_print_msg_error("Input image is expected to be float or double");
-        reg_exit();
+        NR_FATAL_ERROR("Input image is expected to be float or double");
     }
 }
 /* *************************************************************** */
@@ -2536,8 +2396,7 @@ double reg_test_compare_arrays(const DataType *ptrA,
         const double valB = (double)ptrB[i];
         if (valA != valA || valB != valB) {
             if (valA == valA || valB == valB) {
-                reg_print_fct_warn("reg_test_compare_arrays");
-                reg_print_msg_warn("Unexpected NaN in only one of the array");
+                NR_WARN_WFCT("Unexpected NaN in only one of the array");
                 return std::numeric_limits<float>::max();
             }
         } else {
@@ -2568,16 +2427,10 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB)
 }
 /* *************************************************************** */
 double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) {
-    if (imgA->datatype != imgB->datatype) {
-        reg_print_fct_error("reg_test_compare_images");
-        reg_print_msg_error("Input images have different datatype");
-        reg_exit();
-    }
-    if (imgA->nvox != imgB->nvox) {
-        reg_print_fct_error("reg_test_compare_images");
-        reg_print_msg_error("Input images have different size");
-        reg_exit();
-    }
+    if (imgA->datatype != imgB->datatype)
+        NR_FATAL_ERROR("Input images have different datatype");
+    if (imgA->nvox != imgB->nvox)
+        NR_FATAL_ERROR("Input images have different size");
     switch (imgA->datatype) {
     case NIFTI_TYPE_UINT8:
         return reg_test_compare_images<unsigned char>(imgA, imgB);
@@ -2596,9 +2449,8 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB)
     case NIFTI_TYPE_FLOAT64:
         return reg_test_compare_images<double>(imgA, imgB);
     default:
-        reg_print_fct_error("reg_test_compare_images");
-        reg_print_msg_error("Unsupported data type");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported data type");
+        return 0;
     }
 }
 /* *************************************************************** */
@@ -2636,9 +2488,7 @@ void reg_tools_abs_image(nifti_image *img) {
         reg_tools_abs_image<double>(img);
         break;
     default:
-        reg_print_fct_error("reg_tools_abs_image");
-        reg_print_msg_error("Unsupported data type");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported data type");
     }
 }
 /* *************************************************************** */
@@ -2714,3 +2564,16 @@ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) {
     return newImage;
 }
 /* *************************************************************** */
+void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose) {
+#ifdef NDEBUG
+    if (!verbose) return;
+#endif
+    NR_INFO("");
+    NR_INFO("Command line:");
+    std::string text("\t");
+    for (int i = 0; i < argc; i++)
+        text += " "s + argv[i];
+    NR_INFO(text);
+    NR_INFO("");
+}
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 4392b8a7..84f28bcb 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -21,6 +21,7 @@
 #include <algorithm>
 #include <functional>
 #include "_reg_maths.h"
+#include "Debug.hpp"
 
 using namespace std::string_literals;
 using std::unique_ptr;
@@ -459,3 +460,6 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
  */
 nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true);
 /* *************************************************************** */
+/// @brief Prints the command line
+void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 101ece57..c173148f 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -141,9 +141,7 @@ struct BlockSize100: public BlockSize {
         reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
         reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem
         reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem
-#ifndef NDEBUG
-        printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n");
-#endif
+        NR_FUNC_CALLED();
     }
 };
 /* *************************************************************** */
@@ -210,10 +208,8 @@ struct BlockSize300: public BlockSize {
         reg_resampleImage3D = 1024; // 24 reg
         reg_getImageGradient2D = 768; // 34 reg
         reg_getImageGradient3D = 768; // 34 reg
-#ifndef NDEBUG
-        printf("[NiftyReg DEBUG] BlockSize300 constructor called\n");
-#endif
+        NR_FUNC_CALLED();
     }
 };
 /* *************************************************************** */
-} // End namespace NiftyReg::Cuda
+} // namespace NiftyReg
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index c389e367..64ecfcd8 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -20,10 +20,8 @@ CudaAladinContent::CudaAladinContent(nifti_image *referenceIn,
                   percentageOfBlocks,
                   inlierLts,
                   blockStepSize) {
-    if (bytesIn != sizeof(float)) {
-        reg_print_fct_warn("CudaAladinContent::CudaAladinContent");
-        reg_print_msg_warn("Datatype has been forced to float");
-    }
+    if (bytesIn != sizeof(float))
+        NR_WARN_WFCT("Datatype has been forced to float");
     InitVars();
     AllocateCuPtrs();
 }
@@ -305,8 +303,7 @@ void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, i
         FillImageData<int>(image, memoryObject, datatype);
         break;
     default:
-        std::cout << "CUDA: unsupported type" << std::endl;
-        break;
+        NR_FATAL_ERROR("CUDA: unsupported type");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 997676ca..ab0eed9a 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -221,9 +221,7 @@ void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int dat
         FillImageData<int>(image, memoryObject, datatype);
         break;
     default:
-        reg_print_fct_error("CudaContent::DownloadImage()");
-        reg_print_msg_error("Unsupported type");
-        break;
+        NR_FATAL_ERROR("Unsupported type");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp
index e0485ff0..f0fb9f06 100644
--- a/reg-lib/cuda/CudaContext.cpp
+++ b/reg-lib/cuda/CudaContext.cpp
@@ -6,25 +6,17 @@ namespace NiftyReg {
 CudaContext::CudaContext() {
     // The CUDA card is setup
     cuInit(0);
-    int device_count = 0;
-    cudaGetDeviceCount(&device_count);
-#ifndef NDEBUG
-    char text[255];
-    sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count);
-    reg_print_msg_debug(text);
-#endif
+    numDevices = 0;
+    cudaGetDeviceCount((int*)&numDevices);
+    NR_DEBUG(numDevices << " CUDA card(s) detected");
     cudaContext = nullptr;
-    numDevices = device_count;
     cudaIdx = 999;
     PickCard(cudaIdx);
 }
 /* *************************************************************** */
 void CudaContext::SetCudaIdx(unsigned cudaIdxIn) {
-    if (cudaIdxIn >= numDevices) {
-        reg_print_msg_error("The specified cuda card id is not defined");
-        reg_print_msg_error("Run reg_gpuinfo to get the proper id");
-        reg_exit();
-    }
+    if (cudaIdxIn >= numDevices)
+        NR_FATAL_ERROR("The specified CUDA card ID is not defined! Run reg_gpuinfo to get the proper id.");
     cudaIdx = cudaIdxIn;
     PickCard(cudaIdx);
 }
@@ -77,29 +69,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) {
     NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
 
     if (deviceProp.major < 1) {
-        reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n");
-        reg_exit();
+        NR_FATAL_ERROR("The specified graphics card does not exist");
     } else {
         size_t free = 0;
         size_t total = 0;
         cuMemGetInfo(&free, &total);
-        if (deviceProp.totalGlobalMem != total) {
-            fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n",
-                    deviceProp.name);
-            fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n",
-                    deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024));
-            reg_exit();
-        }
-#ifndef NDEBUG
-        printf("[NiftyReg CUDA] The following device is used: %s\n", deviceProp.name);
-        printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-               (unsigned long)(free / (1024 * 1024)), (unsigned long)(total / (1024 * 1024)));
-        printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", deviceProp.major, deviceProp.minor);
-        printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", deviceProp.sharedMemPerBlock);
-        printf("[NiftyReg CUDA] CUDA version %i\n", CUDART_VERSION);
-        printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000);
-        printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount);
-#endif
+        if (deviceProp.totalGlobalMem != total)
+            NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s +
+                           "Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) +
+                           " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB");
+        NR_DEBUG("The following device is used: "s + deviceProp.name);
+        NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB");
+        NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor));
+        NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock));
+        NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION));
+        NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz");
+        NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors");
         cudaIdx = max_gflops_device;
         cudaGetDeviceProperties(&deviceProp, cudaIdx);
         if (deviceProp.major > 1) {
diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp
index e9e9ca32..e5d5f396 100644
--- a/reg-lib/cuda/CudaContext.hpp
+++ b/reg-lib/cuda/CudaContext.hpp
@@ -39,4 +39,4 @@ class CudaContext {
     void SetBlockSize(int major);
 };
 /* *************************************************************** */
-}   // namespace NiftyReg
+} // namespace NiftyReg
diff --git a/reg-lib/cuda/CudaLtsKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp
index aa5cd6fd..a0993fe9 100644
--- a/reg-lib/cuda/CudaLtsKernel.cpp
+++ b/reg-lib/cuda/CudaLtsKernel.cpp
@@ -34,10 +34,8 @@ void CudaLtsKernel::Calculate(bool affine) {
      cudaRuntimeGetVersion(cudaRunTimeVersion);
      cudaDriverGetVersion(cudaDriverVersion);
 
- #ifndef DEBUG
-     printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion);
-     printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion);
- #endif
+     NR_DEBUG("CUDA runtime version=" << *cudaRunTimeVersion);
+     NR_DEBUG("CUDA driver version=" << *cudaDriverVersion);
 
      if (*cudaRunTimeVersion < 7050) {
          blockMatchingParams = con->GetBlockMatchingParams();
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index f6c973c3..3d1325e7 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -17,16 +17,13 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
     case MeasureType::Kld:
         return new reg_kld_gpu();
     case MeasureType::Mind:
-        reg_print_msg_error("MIND measure type isn't implemented for GPU");
-        reg_exit();
+        NR_FATAL_ERROR("MIND measure type isn't implemented for GPU");
     case MeasureType::MindSsc:
-        reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU");
-        reg_exit();
+        NR_FATAL_ERROR("MIND-SSC measure type isn't implemented for GPU");
+    default:
+        NR_FATAL_ERROR("Unsupported measure type");
+        return nullptr;
     }
-    reg_print_fct_error("CudaMeasure::Create");
-    reg_print_msg_error("Unsupported measure type");
-    reg_exit();
-    return nullptr;
 }
 /* *************************************************************** */
 void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp
index 7b7c8ce8..45c1f204 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.hpp
+++ b/reg-lib/cuda/CudaNormaliseGradient.hpp
@@ -35,4 +35,4 @@ void NormaliseGradient(float4 *imageCuda,
                        const bool& optimiseY,
                        const bool& optimiseZ);
 /* *************************************************************** */
-}   // namespace NiftyReg::Cuda
\ No newline at end of file
+} // namespace NiftyReg::Cuda
diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp
index 8f28948f..e17b22da 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.cpp
+++ b/reg-lib/cuda/CudaResampleImageKernel.cpp
@@ -15,17 +15,11 @@ CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImage
     mask_d = con->GetMask_d();
     floIJKMat_d = con->GetFloIJKMat_d();
 
-    if (floatingImage->datatype != warpedImage->datatype) {
-        reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel");
-        reg_print_msg_error("Floating and warped images should have the same data type. Exit.");
-        reg_exit();
-    }
+    if (floatingImage->datatype != warpedImage->datatype)
+        NR_FATAL_ERROR("Floating and warped images should have the same data type");
 
-    if (floatingImage->nt != warpedImage->nt) {
-        reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel");
-        reg_print_msg_error("Floating and warped images have different dimension along the time axis. Exit.");
-        reg_exit();
-    }
+    if (floatingImage->nt != warpedImage->nt)
+        NR_FATAL_ERROR("Floating and warped images have different dimensions along the time axis");
 }
 /* *************************************************************** */
 void CudaResampleImageKernel::Calculate(int interp,
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu
index 5b15a1a2..464535bb 100755
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/_reg_common_cuda.cu
@@ -27,9 +27,7 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti
 template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
         NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice));
@@ -40,11 +38,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_im
 template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The specified image is not a single precision deformation field image");
-            return EXIT_FAILURE;
-        }
+        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
+            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         unique_ptr<float4[]> array(new float4[voxelNumber]());
@@ -68,9 +63,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_ima
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
         default:
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The image data type is not supported");
-            return EXIT_FAILURE;
+            NR_FATAL_ERROR("The image data type is not supported");
         }
     }
     return EXIT_SUCCESS;
@@ -83,9 +76,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, const nift
 template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         const size_t memSize = voxelNumber * sizeof(DataType);
@@ -100,11 +91,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *arr
 template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The specified image is not a single precision deformation field image");
-            return EXIT_FAILURE;
-        }
+        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
+            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         unique_ptr<float4[]> array1(new float4[voxelNumber]());
@@ -138,9 +126,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *arra
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
         default:
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The image data type is not supported");
-            return EXIT_FAILURE;
+            NR_FATAL_ERROR("The image data type is not supported");
         }
     }
     return EXIT_SUCCESS;
@@ -152,9 +138,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, c
 template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         cudaMemcpy3DParms copyParams{};
         copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
@@ -172,11 +156,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_i
 template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The specified image is not a single precision deformation field image");
-            return EXIT_FAILURE;
-        }
+        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
+            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         unique_ptr<float4[]> array(new float4[voxelNumber]());
@@ -208,9 +189,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_im
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
         default:
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice");
-            reg_print_msg_error("The image data type is not supported");
-            return EXIT_FAILURE;
+            NR_FATAL_ERROR("The image data type is not supported");
         }
     }
     return EXIT_SUCCESS;
@@ -223,9 +202,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, const n
 template <class DataType, class NiftiType>
 int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         NiftiType *array1 = static_cast<NiftiType*>(img->data);
         NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
@@ -253,11 +230,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *a
 template <class DataType>
 int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) {
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-            reg_print_msg_error("The specified image is not a single precision deformation field image");
-            return EXIT_FAILURE;
-        }
+        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
+            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         unique_ptr<float4[]> array1(new float4[voxelNumber]());
@@ -307,9 +281,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *ar
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
         default:
-            reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1");
-            reg_print_msg_error("The image data type is not supported");
-            return EXIT_FAILURE;
+            NR_FATAL_ERROR("The image data type is not supported");
         }
     }
     return EXIT_SUCCESS;
@@ -384,9 +356,7 @@ template int cudaCommon_transferFromDeviceToCpu<double>(double*, const double*,
 template <class DataType, class NiftiType>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
@@ -397,11 +367,8 @@ template <class DataType>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
-        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
-            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-            reg_print_msg_error("The nifti image is not a 5D volume");
-            return EXIT_FAILURE;
-        }
+        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The nifti image is not a 5D volume");
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         thrust::device_ptr<const float4> arrayCudaPtr(reinterpret_cast<const float4*>(arrayCuda));
         const thrust::host_vector<float4> array(arrayCudaPtr, arrayCudaPtr + voxelNumber);
@@ -426,8 +393,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, arrayCuda);
         default:
-            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-            reg_print_msg_error("The image data type is not supported");
+            NR_FATAL_ERROR("The image data type is not supported");
             return EXIT_FAILURE;
         }
     }
@@ -438,11 +404,8 @@ template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, const fl
 /* *************************************************************** */
 template<>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
-    if (img->datatype != NIFTI_TYPE_FLOAT32) {
-        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-        reg_print_msg_error("The image data type is not supported");
-        return EXIT_FAILURE;
-    }
+    if (img->datatype != NIFTI_TYPE_FLOAT32)
+        NR_FATAL_ERROR("The image data type is not supported");
     cudaMemcpy3DParms copyParams{};
     copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
     copyParams.srcArray = const_cast<cudaArray*>(arrayCuda);
@@ -458,9 +421,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arra
 template <class DataType, class NiftiType>
 int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
-        reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1");
-        reg_print_msg_error("The host and device arrays are of different types");
-        return EXIT_FAILURE;
+        NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         NiftiType *array1 = static_cast<NiftiType*>(img->data);
@@ -475,11 +436,8 @@ template <class DataType>
 int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
-        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) {
-            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-            reg_print_msg_error("The nifti image is not a 5D volume");
-            return EXIT_FAILURE;
-        }
+        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The nifti image is not a 5D volume");
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
         thrust::device_ptr<const float4> array1CudaPtr(reinterpret_cast<const float4*>(array1Cuda));
         thrust::device_ptr<const float4> array2CudaPtr(reinterpret_cast<const float4*>(array2Cuda));
@@ -522,8 +480,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array
         case NIFTI_TYPE_FLOAT32:
             return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array1Cuda, array2Cuda);
         default:
-            reg_print_fct_error("cudaCommon_transferFromDeviceToNifti");
-            reg_print_msg_error("The image data type is not supported");
+            NR_FATAL_ERROR("The image data type is not supported");
             return EXIT_FAILURE;
         }
     }
@@ -615,9 +572,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
         resDesc.res.array.array = static_cast<cudaArray*>(const_cast<void*>(devPtr));
         break;
     default:
-        reg_print_fct_error("cudaCommon_createTextureObject");
-        reg_print_msg_error("Unsupported resource type");
-        reg_exit();
+        NR_FATAL_ERROR("Unsupported resource type");
     }
 
     // Specify texture object parameters
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h
index c74f8718..688cb6da 100755
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/_reg_common_cuda.h
@@ -24,41 +24,37 @@ struct __attribute__((aligned(4))) float4 {
 /* *************************************************************** */
 namespace NiftyReg::Cuda::Internal {
 /* *************************************************************** */
-inline void SafeCall(const char *file, const int& line) {
+inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) {
 #if CUDART_VERSION >= 3200
-	cudaError_t err = cudaPeekAtLastError();
+	const cudaError_t err = cudaPeekAtLastError();
 #else
-	cudaError_t err = cudaDeviceSynchronize();
+	const cudaError_t err = cudaDeviceSynchronize();
 #endif
-	if (err != cudaSuccess) {
-		fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
-		reg_exit();
-	}
+	if (err != cudaSuccess)
+        NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err));
 }
 /* *************************************************************** */
-inline void CheckKernel(const char *file, const int& line, const dim3& grid, const dim3& block) {
+inline void CheckKernel(const std::string& file, const int& line, const std::string& funcName, const dim3& grid, const dim3& block) {
 #if CUDART_VERSION >= 3200
 	cudaDeviceSynchronize();
-	cudaError_t err = cudaPeekAtLastError();
+	const cudaError_t err = cudaPeekAtLastError();
 #else
-	cudaError_t err = cudaDeviceSynchronize();
+	const cudaError_t err = cudaDeviceSynchronize();
 #endif
 	if (err != cudaSuccess) {
-		fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err));
-		fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", grid.x, grid.y, grid.z, block.x, block.y, block.z);
-		reg_exit();
+        NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err) +
+                "\n\tGrid size ["s + std::to_string(grid.x) + " "s + std::to_string(grid.y) + " "s + std::to_string(grid.z) +
+                "] - Block size ["s + std::to_string(block.x) + " "s + std::to_string(block.y) + " "s + std::to_string(block.z) + "]");
+	} else {
+        NR_DEBUG("CUDA kernel: "s + cudaGetErrorString(err) +
+                 " - Grid size ["s + std::to_string(grid.x) + " "s + std::to_string(grid.y) + " "s + std::to_string(grid.z) +
+                 "] - Block size ["s + std::to_string(block.x) + " "s + std::to_string(block.y) + " "s + std::to_string(block.z) + "]");
 	}
-#ifndef NDEBUG
-	else {
-		printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n",
-			cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);
-	}
-#endif
 }
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda::Internal
-#define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__); }
-#define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, grid, block)
+#define NR_CUDA_SAFE_CALL(call)             { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); }
+#define NR_CUDA_CHECK_KERNEL(grid, block)   NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block)
 /* *************************************************************** */
 extern "C++"
 template <class DataType>
diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp
index 7d52161f..ea58f824 100644
--- a/reg-lib/cuda/_reg_cudainfo.cpp
+++ b/reg-lib/cuda/_reg_cudainfo.cpp
@@ -1,51 +1,38 @@
-#include <iostream>
 #include "_reg_common_cuda.h"
 #include "_reg_tools.h"
 
-void showCUDAInfo(void) {
+void showCUDAInfo() {
     // The CUDA card is setup
     cuInit(0);
 
-    int device_count = 0;
-    cudaGetDeviceCount(&device_count);
-    printf("-----------------------------------\n");
-    printf("[NiftyReg CUDA] %i device(s) detected\n", device_count);
-    printf("-----------------------------------\n");
-
-    CUcontext cucontext;
+    int numDevices = 0;
+    cudaGetDeviceCount(&numDevices);
+    NR_COUT << "-----------------------------------" << std::endl;
+    NR_COUT << "[NiftyReg CUDA] " << numDevices << " device(s) detected" << std::endl;
+    NR_COUT << "-----------------------------------" << std::endl;
 
+    CUcontext cuContext;
     struct cudaDeviceProp deviceProp;
     // following code is from cutGetMaxGflopsDeviceId()
-    int current_device = 0;
-    while (current_device < device_count) {
-        cudaGetDeviceProperties(&deviceProp, current_device);
+    int currentDevice = 0;
+    while (currentDevice < numDevices) {
+        cudaGetDeviceProperties(&deviceProp, currentDevice);
         if (deviceProp.major > 0) {
-
-            NR_CUDA_SAFE_CALL(cudaSetDevice(current_device));
-            NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device));
-
-            printf("[NiftyReg CUDA] Device id [%i]\n", current_device);
-            printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name);
-            size_t free = 0;
-            size_t total = 0;
+            NR_CUDA_SAFE_CALL(cudaSetDevice(currentDevice));
+            NR_CUDA_SAFE_CALL(cuCtxCreate(&cuContext, CU_CTX_SCHED_SPIN, currentDevice));
+            NR_COUT << "[NiftyReg CUDA] Device ID: " << currentDevice << std::endl;
+            NR_COUT << "[NiftyReg CUDA] Device name: " << deviceProp.name << std::endl;
+            size_t free = 0, total = 0;
             cuMemGetInfo(&free, &total);
-            printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n",
-                   (unsigned long int)(free / (1024 * 1024)),
-                   (unsigned long int)(total / (1024 * 1024)));
-            printf("[NiftyReg CUDA] Card compute capability: %i.%i\n",
-                   deviceProp.major,
-                   deviceProp.minor);
-            printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n",
-                   deviceProp.sharedMemPerBlock);
-            printf("[NiftyReg CUDA] CUDA version %i\n",
-                   CUDART_VERSION);
-            printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n",
-                   deviceProp.clockRate / 1000);
-            printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n",
-                   deviceProp.multiProcessorCount);
+            NR_COUT << "[NiftyReg CUDA] It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB" << std::endl;
+            NR_COUT << "[NiftyReg CUDA] Card compute capability: " << deviceProp.major << "." << deviceProp.minor << std::endl;
+            NR_COUT << "[NiftyReg CUDA] Shared memory size in bytes: " << deviceProp.sharedMemPerBlock << std::endl;
+            NR_COUT << "[NiftyReg CUDA] CUDA version " << CUDART_VERSION << std::endl;
+            NR_COUT << "[NiftyReg CUDA] Card clock rate (Mhz): " << deviceProp.clockRate / 1000 << std::endl;
+            NR_COUT << "[NiftyReg CUDA] Card has " << deviceProp.multiProcessorCount << " multiprocessor(s)" << std::endl;
         }
-        cuCtxDestroy(cucontext);
-        ++current_device;
-        printf("-----------------------------------\n");
+        cuCtxDestroy(cuContext);
+        ++currentDevice;
+        NR_COUT << "-----------------------------------" << std::endl;
     }
 }
diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h
index 889b396e..ee5baa69 100644
--- a/reg-lib/cuda/_reg_cudainfo.h
+++ b/reg-lib/cuda/_reg_cudainfo.h
@@ -1,3 +1,3 @@
 #pragma once
 
-void showCUDAInfo(void);
+void showCUDAInfo();
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 2a0a9f8c..7dbb89cf 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -336,7 +336,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
         // Z basis values
         extern __shared__ float yBasis[];   // Shared memory
         const unsigned sharedMemIndex = 4 * threadIdx.x;
-        // Compute the shared memory offset which corresponds to four times the number of thread per block
+        // Compute the shared memory offset which corresponds to four times the number of threads per block
         float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
         float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z;
         if (relative < 0) relative = 0; // rounding error
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 7e968bed..1bed83a2 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -44,11 +44,8 @@ class reg_measure_gpu {
                                    nifti_image *voxelBasedGradBw = nullptr,
                                    float4 *voxelBasedGradBwCuda = nullptr) {
         // Check that the input image are of type float
-        if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) {
-            reg_print_fct_error("reg_measure_gpu::InitialiseMeasure");
-            reg_print_msg_error("Only single precision is supported on the GPU");
-            reg_exit();
-        }
+        if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("Only single precision is supported on the GPU");
         // Bind the required pointers
         this->referenceImageCuda = refImgCuda;
         this->floatingImageCuda = floImgCuda;
@@ -60,11 +57,8 @@ class reg_measure_gpu {
         // Check if the symmetric mode is used
         if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr &&
             floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) {
-            if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) {
-                reg_print_fct_error("reg_measure_gpu::InitialiseMeasure");
-                reg_print_msg_error("Only single precision is supported on the GPU");
-                reg_exit();
-            }
+            if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32)
+                NR_FATAL_ERROR("Only single precision is supported on the GPU");
             this->floatingMaskCuda = floMaskCuda;
             this->warpedImageBwCuda = warpedImgBwCuda;
             this->warpedGradientBwCuda = warpedGradBwCuda;
@@ -75,9 +69,7 @@ class reg_measure_gpu {
             this->warpedGradientBwCuda = nullptr;
             this->voxelBasedGradientBwCuda = nullptr;
         }
-#ifndef NDEBUG
-        reg_print_msg_debug("reg_measure_gpu::InitialiseMeasure() called");
-#endif
+        NR_FUNC_CALLED();
     }
 
 protected:
@@ -99,9 +91,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
 public:
     /// @brief reg_lncc class constructor
     reg_lncc_gpu() {
-        reg_print_fct_error("reg_lncc_gpu::reg_lncc_gpu");
-        reg_print_msg_error("CUDA CANNOT BE USED WITH LNCC YET");
-        reg_exit();
+        NR_FATAL_ERROR("CUDA CANNOT BE USED WITH LNCC YET");
     }
     /// @brief reg_lncc class destructor
     virtual ~reg_lncc_gpu() {}
@@ -142,9 +132,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
 public:
     /// @brief reg_kld_gpu class constructor
     reg_kld_gpu() {
-        reg_print_fct_error("reg_kld_gpu::reg_kld_gpu");
-        reg_print_msg_error("CUDA CANNOT BE USED WITH KLD YET");
-        reg_exit();
+        NR_FATAL_ERROR("CUDA CANNOT BE USED WITH KLD YET");
     }
     /// @brief reg_kld_gpu class destructor
     virtual ~reg_kld_gpu() {}
@@ -185,9 +173,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
 public:
     /// @brief reg_dti_gpu class constructor
     reg_dti_gpu() {
-        reg_print_fct_error("reg_dti_gpu::reg_dti_gpu");
-        reg_print_msg_error("CUDA CANNOT BE USED WITH DTI YET");
-        reg_exit();
+        NR_FATAL_ERROR("CUDA CANNOT BE USED WITH DTI YET");
     }
     /// @brief reg_dti_gpu class destructor
     virtual ~reg_dti_gpu() {}
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 0c52ccc9..459da264 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -16,15 +16,11 @@
 
 /* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_nmi_gpu::~reg_nmi_gpu() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu destructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
@@ -46,21 +42,13 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                        warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
                                        warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check if the input images have multiple timepoints
-    if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) {
-        reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure");
-        reg_print_msg_error("Multiple timepoints are not yet supported");
-        reg_exit();
-    }
+    if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1)
+        NR_FATAL_ERROR("Multiple timepoints are not yet supported");
     // The reference and floating images have to be updated on the device
     if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceImageCuda, this->referenceImage) ||
-        cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage)) {
-        reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure");
-        reg_print_msg_error("Error when transferring the reference or floating image");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_nmi_gpu::InitialiseMeasure called");
-#endif
+        cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage))
+        NR_FATAL_ERROR("Error when transferring the reference or floating image");
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index db6cf562..d7a9796c 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -13,9 +13,7 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
     this->bestDofBwCuda = nullptr;
     this->gradientCuda = nullptr;
     this->gradientBwCuda = nullptr;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser_gpu::reg_optimiser_gpu() called\n");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_optimiser_gpu::~reg_optimiser_gpu() {
@@ -27,9 +25,7 @@ reg_optimiser_gpu::~reg_optimiser_gpu() {
         cudaCommon_free(this->bestDofBwCuda);
         this->bestDofBwCuda = nullptr;
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser_gpu::~reg_optimiser_gpu() called\n");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::Initialise(size_t nvox,
@@ -56,11 +52,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     this->gradientCuda = reinterpret_cast<float4*>(gradData);
 
     cudaCommon_free(this->bestDofCuda);
-    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) {
-        reg_print_fct_error("reg_optimiser_gpu::Initialise()");
-        reg_print_msg_error("Error when allocating the best control point array on the GPU");
-        reg_exit();
-    }
+    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber()))
+        NR_FATAL_ERROR("Error when allocating the best control point array on the GPU");
 
     this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw;
     if (this->isSymmetric) {
@@ -68,11 +61,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
         this->currentDofBwCuda = reinterpret_cast<float4*>(cppDataBw);
         this->gradientBwCuda = reinterpret_cast<float4*>(gradDataBw);
         cudaCommon_free(this->bestDofBwCuda);
-        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) {
-            reg_print_fct_error("reg_optimiser_gpu::Initialise()");
-            reg_print_msg_error("Error when allocating the best control point backwards array on the GPU");
-            reg_exit();
-        }
+        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw()))
+            NR_FATAL_ERROR("Error when allocating the best control point backwards array on the GPU");
     }
 
     this->StoreCurrentDof();
@@ -80,9 +70,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     this->intOpt = intOpt;
     this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_optimiser_gpu::Initialise() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::RestoreBestDof() {
@@ -110,9 +98,7 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o
     this->array1Bw = nullptr;
     this->array2 = nullptr;
     this->array2Bw = nullptr;
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
@@ -132,9 +118,7 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
         cudaCommon_free(this->array2Bw);
         this->array2Bw = nullptr;
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::Initialise(size_t nvox,
@@ -154,23 +138,15 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
     this->firstCall = true;
     cudaCommon_free(this->array1); cudaCommon_free(this->array2);
     if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, this->GetVoxNumber()) ||
-        cudaCommon_allocateArrayToDevice<float4>(&this->array2, this->GetVoxNumber())) {
-        reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
-        reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU");
-        reg_exit();
-    }
+        cudaCommon_allocateArrayToDevice<float4>(&this->array2, this->GetVoxNumber()))
+        NR_FATAL_ERROR("Error when allocating the conjugate gradient array on the GPU");
     if (this->isSymmetric) {
         cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw);
         if (cudaCommon_allocateArrayToDevice<float4>(&this->array1Bw, this->GetVoxNumberBw()) ||
-            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, this->GetVoxNumberBw())) {
-            reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()");
-            reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU");
-            reg_exit();
-        }
+            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, this->GetVoxNumberBw()))
+            NR_FATAL_ERROR("Error when allocating the conjugate gradient array backwards on the GPU");
     }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_conjugateGradient_gpu::Initialise() called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::UpdateGradientValues() {
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index c9d91811..4f3b6c77 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -16,15 +16,11 @@
 
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_ssd_gpu constructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 reg_ssd_gpu::~reg_ssd_gpu() {
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_ssd_gpu destructor called");
-#endif
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
@@ -45,14 +41,9 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                        warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
                                        warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check that the input images have only one time point
-    if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) {
-        reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure");
-        reg_print_msg_error("Multiple timepoints are not yet supported");
-        reg_exit();
-    }
-#ifndef NDEBUG
-    reg_print_msg_debug("reg_ssd_gpu::InitialiseMeasure()");
-#endif
+    if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1)
+        NR_FATAL_ERROR("Multiple timepoints are not yet supported");
+    NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 double reg_getSsdValue_gpu(const nifti_image *referenceImage,
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index e99ccf25..3dbc4f71 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -88,11 +88,6 @@ void launchAffine(mat44 *affineTransformation,
    free(trans);
 
    uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz);
-   affineKernel << <G1_b, B1_b >> >(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose);
-
-#ifndef NDEBUG
+   affineKernel<<<G1_b, B1_b>>>(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose);
    NR_CUDA_CHECK_KERNEL(G1_b, B1_b);
-#else
-   NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-#endif
 }
diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h
index a2455525..80466e59 100644
--- a/reg-lib/cuda/affineDeformationKernel.h
+++ b/reg-lib/cuda/affineDeformationKernel.h
@@ -1,4 +1,5 @@
 #pragma once
-#include "niftilib/nifti1_io.h"
-//
+
+#include "RNifti.h"
+
 void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false);
\ No newline at end of file
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index cd91c556..81f5ad1a 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -338,10 +338,8 @@ void block_matching_method_gpu(const nifti_image *referenceImage,
                                const int *totalBlockCuda,
                                const int *maskCuda,
                                const float *refMatCuda) {
-    if (params->stepSize != 1 || params->voxelCaptureRange != 3) {
-        reg_print_msg_error("The block matching CUDA kernel supports only single step size!");
-        reg_exit();
-    }
+    if (params->stepSize != 1 || params->voxelCaptureRange != 3)
+        NR_FATAL_ERROR("The block matching CUDA kernel supports only single step size!");
 
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cpp
index 9ca46a7d..b278076e 100755
--- a/reg-lib/cuda/checkCudaCard.cpp
+++ b/reg-lib/cuda/checkCudaCard.cpp
@@ -1,37 +1,34 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
-#include <stdio.h>
+#include <iostream>
 #include <algorithm>
 
 int main() {
-
-    int deviceCount = 0;
-    int output = 0;
-    cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount);
-
+    int deviceCount = 0, output = 0;
+    const cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount);
 
     // Error when running cudaGetDeviceCount
-    if(cudaResultCode != cudaSuccess){
-        fprintf(stderr, "%s (CUDA error Code=%d)\n", cudaGetErrorString(cudaResultCode), (int)cudaResultCode);
+    if (cudaResultCode != cudaSuccess) {
+        std::cerr << cudaGetErrorString(cudaResultCode) << " (CUDA Error Code=" << cudaResultCode << ")" << std::endl;
         return EXIT_FAILURE;
     }
 
     // Error when running cudaGetDeviceCount
-    if(deviceCount == 0){
-        fprintf(stderr, "No device detected\n");
+    if (deviceCount == 0) {
+        std::cerr << "No device detected" << std::endl;
         return EXIT_FAILURE;
     }
 
-    //detects device capability and picks the best
-    for( unsigned i = 0; i < deviceCount; ++i ) {
+    // Detect device capability and picks the best
+    for (unsigned i = 0; i < deviceCount; ++i) {
         cudaSetDevice(i);
         cudaDeviceProp deviceProp;
         cudaGetDeviceProperties(&deviceProp, i);
         output = std::max(output, deviceProp.major * 10 + deviceProp.minor);
     }
 
-    //	output for device capability
-    printf("%i", output);
+    // Output for device capability
+    std::cout << output;
 
     return EXIT_SUCCESS;
 }
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index 47615c5f..82cb3c89 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -32,37 +32,30 @@ __device__ double getSquareDistance3Dcu(float * first_point3D, float * second_po
                 ((double)first_point3D[2] - (double)second_point3D[2]));
 }
 /* *************************************************************** */
-void checkCublasStatus(cublasStatus_t status)
-{
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        reg_print_fct_error("checkCublasStatus");
-        reg_print_msg_error("!!!! CUBLAS  error");
-        reg_exit(0);
-    }
+void checkCublasStatus(cublasStatus_t status) {
+    if (status != CUBLAS_STATUS_SUCCESS)
+        NR_FATAL_ERROR("CUBLAS error");
 }
 /* *************************************************************** */
 void checkCUSOLVERStatus(cusolverStatus_t status, char* msg) {
-
     if (status != CUSOLVER_STATUS_SUCCESS) {
-        if (status == CUSOLVER_STATUS_NOT_INITIALIZED) {
-            reg_print_fct_error("the library was not initialized.");
-        }
-        else if (status == CUSOLVER_STATUS_INTERNAL_ERROR) {
-            reg_print_fct_error(" an internal operation failed.");
-        }
-        reg_exit(0);
+        if (status == CUSOLVER_STATUS_NOT_INITIALIZED)
+            NR_FATAL_ERROR("The library was not initialized");
+        else if (status == CUSOLVER_STATUS_INTERNAL_ERROR)
+            NR_FATAL_ERROR("An internal operation failed");
+        NR_FATAL_ERROR("CUSOLVER error");
     }
 }
 /* *************************************************************** */
 void checkDevInfo(int *devInfo) {
-    int * hostDevInfo = (int*)malloc(sizeof(int));
+    int *hostDevInfo = (int*)malloc(sizeof(int));
     cudaMemcpy(hostDevInfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
     if (hostDevInfo < 0)
-        printf("parameter: %d is wrong\n", hostDevInfo);
+        NR_ERROR("Parameter " << hostDevInfo << " is wrong");
     if (hostDevInfo > 0)
-        printf("%d superdiagonals of an intermediate bidiagonal form B did not converge to zero.\n", hostDevInfo);
+        NR_ERROR(hostDevInfo << " superdiagonals of an intermediate bidiagonal form B did not converge to zero");
     else
-        printf(" %d: operation successful\n", hostDevInfo);
+        NR_INFO(hostDevInfo << ": operation successful");
     free(hostDevInfo);
 }
 /* *************************************************************** */
@@ -172,21 +165,20 @@ __global__ void populateLengthsKernel(float* lengths, float* warped_d, float* ne
 __global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg)
 {
     for (int i = 0; i < ldm * n; ++i)
-        printf("%f | ", mat[i]);
-    printf("\n");
+        NR_COUT << mat[i] << " | ";
+    NR_COUT << std::endl;
 }
 /* *************************************************************** */
 //launched as 1 block 1 thread
 __global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg)
 {
     for (int i = 0; i < ldm; ++i) {
-        printf("%d ", i);
-        for (int j = 0; j < n; ++j) {
-            printf("%f ", mat[IDX2C(i, j, ldm)]);
-        }
-        printf("\n");
+        NR_COUT << i << " ";
+        for (int j = 0; j < n; ++j)
+            NR_COUT << mat[IDX2C(i, j, ldm)] << " ";
+        NR_COUT << "\n";
     }
-    printf("\n");
+    NR_COUT << std::endl;
 }
 /* *************************************************************** */
 /*
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
index 7e7926b4..cfb7cb2c 100644
--- a/reg-lib/cuda/optimizeKernel.h
+++ b/reg-lib/cuda/optimizeKernel.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "niftilib/nifti1_io.h"
+#include "RNifti.h"
 
 /*
 extern "C++"
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index eb3c7cb3..40633392 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -389,13 +389,9 @@ void launchResample(nifti_image *floatingImage,
 						  float **deformationFieldImage_d,
 						  int **mask_d,
 						  float **sourceIJKMatrix_d) {
-
 	// Define the DTI indices if required
-	if(dti_timepoint!=nullptr || jacMat!=nullptr){
-		reg_print_fct_error("launchResample");
-		reg_print_msg_error("The DTI resampling has not yet been implemented with the CUDA platform. Exit.");
-		reg_exit();
-	}
+	if (dti_timepoint != nullptr || jacMat != nullptr)
+		NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the CUDA platform");
 
 	const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
 
@@ -413,35 +409,30 @@ void launchResample(nifti_image *floatingImage,
 	ulong2 voxelNumber = make_ulong2(targetVoxelNumber, NiftiImage::calcVoxelNumber(floatingImage, 3));
 	uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 	uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu);
-	 if (floatingImage->nz > 1) {
-		  ResampleImage3D <<<mygrid, myblocks >>>(*floatingImage_d,
-																*deformationFieldImage_d,
-																*warpedImage_d,
-																*mask_d,
-																*sourceIJKMatrix_d,
-																voxelNumber,
-																fi_xyz,
-																wi_tu,
-																paddingValue,
-																interp);
-	 }
-	 else{
-		  ResampleImage2D <<<mygrid, myblocks >>>(*floatingImage_d,
-																*deformationFieldImage_d,
-																*warpedImage_d,
-																*mask_d,
-																*sourceIJKMatrix_d,
-																voxelNumber,
-																fi_xyz,
-																wi_tu,
-																paddingValue,
-																interp);
-	 }
-#ifndef NDEBUG
-	NR_CUDA_CHECK_KERNEL(mygrid, myblocks);
-#else
-	NR_CUDA_SAFE_CALL(cudaDeviceSynchronize());
-#endif
+    if (floatingImage->nz > 1) {
+        ResampleImage3D<<<mygrid, myblocks>>>(*floatingImage_d,
+                                              *deformationFieldImage_d,
+                                              *warpedImage_d,
+                                              *mask_d,
+                                              *sourceIJKMatrix_d,
+                                              voxelNumber,
+                                              fi_xyz,
+                                              wi_tu,
+                                              paddingValue,
+                                              interp);
+    } else {
+        ResampleImage2D<<<mygrid, myblocks>>>(*floatingImage_d,
+                                              *deformationFieldImage_d,
+                                              *warpedImage_d,
+                                              *mask_d,
+                                              *sourceIJKMatrix_d,
+                                              voxelNumber,
+                                              fi_xyz,
+                                              wi_tu,
+                                              paddingValue,
+                                              interp);
+    }
+    NR_CUDA_CHECK_KERNEL(mygrid, myblocks);
 }
 /* *************************************************************** */
 void identityConst()
diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h
index c1055f59..758a38ed 100644
--- a/reg-lib/cuda/resampleKernel.h
+++ b/reg-lib/cuda/resampleKernel.h
@@ -1,5 +1,6 @@
 #pragma once
-#include "niftilib/nifti1_io.h"
+
+#include "RNifti.h"
 
 void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis);
 void launchResample(nifti_image *floatingImage, nifti_image *warpedImage,  int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d);
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index 421f57ae..9025d893 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -225,9 +225,9 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") {
         auto&& [testName, result, expected] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
             // if (fabs(result - expected) > EPS){
-            std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
+            NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
             // }
             REQUIRE(fabs(result - expected) < EPS);
         }
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index 06ce0faf..a314e376 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -161,7 +161,7 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
         auto&& [testName, blockMatchingParams] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
             // Loop over the block and ensure all values are identical
             for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) {
@@ -169,8 +169,8 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
                     const int i = b * (int)blockMatchingParams->dim + d;
                     const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i];
                     if (fabs(diffPos - OFFSET) > EPS) {
-                        std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] ";
-                        std::cout << diffPos << std::endl; std::cout.flush();
+                        NR_COUT << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] ";
+                        NR_COUT << diffPos << std::endl;
                     }
                     REQUIRE(fabs(diffPos - OFFSET) < EPS);
                 }
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index a5ff8f44..bb2d4e63 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -237,7 +237,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
         const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale);
 
         SECTION(sectionName) {
-            std::cout << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl;
+            NR_COUT << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl;
 
             // Set the control point grid
             NiftiImage img = content->GetControlPointGrid();
@@ -273,7 +273,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) {
                 const float cppVal = cppPtr[i];
                 const float cppExpVal = cppExpPtr[i];
-                std::cout << i << " " << cppVal << " " << cppExpVal << std::endl;
+                NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl;
                 REQUIRE(fabs(cppVal - cppExpVal) < EPS);
             }
 
@@ -281,7 +281,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations
             if (!optimiseX && !optimiseY && !optimiseZ) {
                 for (int isSymmetric = 0; isSymmetric < 2; isSymmetric++) {
-                    std::cout << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl;
+                    NR_COUT << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl;
 
                     // Create a random number generator
                     std::random_device rd;
@@ -335,12 +335,12 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
                     for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
                         const float gradVal = gradPtr[i];
                         const float gradExpVal = gradExpPtr[i];
-                        std::cout << i << " " << gradVal << " " << gradExpVal << std::endl;
+                        NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl;
                         REQUIRE(fabs(gradVal - gradExpVal) < EPS);
                         if (isSymmetric) {
                             const float gradBwVal = gradBwPtr[i];
                             const float gradExpBwVal = gradExpBwPtr[i];
-                            std::cout << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl;
+                            NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl;
                             REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS);
                         }
                     }
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 32ccd7c2..9a93e705 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -445,7 +445,7 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformat
         const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline);
 
         SECTION(sectionName) {
-            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
 
             // Compute the deformation field
             unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
@@ -459,11 +459,11 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformat
             const auto defFieldExpPtr = defFieldExp.data();
             defField.disown();
             // Increase the precision for the output
-            std::cout << std::fixed << std::setprecision(10);
+            NR_COUT << std::fixed << std::setprecision(10);
             for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) {
                 const double defFieldVal = defFieldPtr[i];
                 const double defFieldExpVal = defFieldExpPtr[i];
-                std::cout << i << " " << defFieldVal << " " << defFieldExpVal << std::endl;
+                NR_COUT << i << " " << defFieldVal << " " << defFieldExpVal << std::endl;
                 REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS);
             }
             // Ensure the termination of content before CudaContext
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 09ab1f96..8689954a 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -196,7 +196,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 warpedGradient.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedGradVal = warpedGradPtr[i];
-                    std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
+                    NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS);
                 }
             }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 57b0f6c8..3de5aae3 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -223,7 +223,7 @@ TEST_CASE("Interpolation", "[Interpolation]") {
                 warped.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedValue = warpedPtr[i];
-                    std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl;
+                    NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl;
                     REQUIRE(fabs(warpedValue - testResult[i]) < EPS);
                 }
             }
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 859bb2c8..592ee238 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -299,9 +299,9 @@ TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") {
         auto&& [testName, reference, floating, sigma, value] = testData;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
             const double lncc = measure->GetSimilarityMeasureValue();
-            std::cout << lncc << " " << value << std::endl;
+            NR_COUT << lncc << " " << value << std::endl;
             REQUIRE(fabs(lncc - value) < EPS);
         }
     }
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index c18bdb94..5f9c66b4 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -158,9 +158,9 @@ TEST_CASE_METHOD(NMITest, "NMI", "[unit]") {
         auto&& [testName, result, expected] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
             if (fabs(result - expected) > EPS) {
-                std::cout << "Result=" << result << " | Expected=" << expected << std::endl;
+                NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
             }
             REQUIRE(fabs(result - expected) < EPS);
         }
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 6b388e90..d56cd356 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -181,7 +181,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
         const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
 
         SECTION(sectionName) {
-            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
 
             // Set the transformation gradient image to host the computation
             NiftiImage transGrad = content->GetTransformationGradient();
@@ -208,7 +208,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
             for (size_t i = 0; i < testGrad.nVoxels(); ++i) {
                 const float transGradVal = transGradPtr[i];
                 const float testGradVal = testGradPtr[i];
-                std::cout << i << " " << transGradVal << " " << testGradVal << std::endl;
+                NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl;
                 REQUIRE(fabs(transGradVal - testGradVal) < EPS);
             }
             // Ensure the termination of content before CudaContext
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index 4768d831..5bb9e8e0 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -125,7 +125,7 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
         auto&& [testName, blockMatchingParamsCpu, blockMatchingParamsCuda] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
             // Ensure both approaches retrieve the same number of voxels
             REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber);
@@ -138,17 +138,15 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
                     const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i];
                     const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i];
                     if (fabs(refPosCpu - refPosCuda) > EPS) {
-                        std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
-                        std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl;
-                        std::cout.flush();
+                        NR_COUT << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
+                        NR_COUT << refPosCpu << " | CUDA:" << refPosCuda << std::endl;
                     }
                     REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
                     const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i];
                     const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i];
                     if (fabs(warPosCpu - warPosCuda) > EPS) {
-                        std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
-                        std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl;
-                        std::cout.flush();
+                        NR_COUT << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
+                        NR_COUT << warPosCpu << " | CUDA:" << warPosCuda << std::endl;
                     }
                     REQUIRE(fabs(warPosCpu - warPosCuda) < EPS);
                 }
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index a1ac51a3..58cd390d 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -142,14 +142,14 @@ TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") {
         auto&& [testName, matCpu, matCuda] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
             // Loop over the matrix values and ensure they are identical
             for (int i = 0; i < 4; ++i) {
                 for (int j = 0; j < 4; ++j) {
                     const auto mCpu = matCpu->m[i][j];
                     const auto mCuda = matCuda->m[i][j];
-                    std::cout << i << " " << j << " " << mCpu << " " << mCuda << std::endl;
+                    NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl;
                     REQUIRE(fabs(mCpu - mCuda) < EPS);
                 }
             }
diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp
index c79f9e5b..5fed6b15 100644
--- a/reg-test/reg_test_regr_nmi.cpp
+++ b/reg-test/reg_test_regr_nmi.cpp
@@ -222,13 +222,13 @@ TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") {
         auto&& [testName, simMeasureCpu, simMeasureCuda, voxelBasedGradCpu, voxelBasedGradCuda] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
             // Increase the precision for the output
-            std::cout << std::fixed << std::setprecision(10);
+            NR_COUT << std::fixed << std::setprecision(10);
 
             // Check the similarity measure values
-            std::cout << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl;
+            NR_COUT << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl;
             REQUIRE(fabs(simMeasureCpu - simMeasureCuda) < EPS);
 
             // Check the voxel-based similarity measure gradients
@@ -237,7 +237,7 @@ TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") {
             for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) {
                 const float cpuVal = voxelBasedGradCpuPtr[i];
                 const float cudaVal = voxelBasedGradCudaPtr[i];
-                std::cout << i << " " << cpuVal << " " << cudaVal << std::endl;
+                NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
                 REQUIRE(fabs(cpuVal - cudaVal) < EPS);
             }
         }
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index da95af28..c23d95ac 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -223,7 +223,7 @@ TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric",
         const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight);
 
         SECTION(sectionName) {
-            std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
             // Set the matrices required for computation
             nifti_image *floating = content->Content::GetFloating();
             if (floating->sform_code > 0)
@@ -261,7 +261,7 @@ TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric",
             for (size_t i = 0; i < transGradExp.nVoxels(); ++i) {
                 const float transGradVal = transGradPtr[i];
                 const float transGradExpVal = transGradExpPtr[i];
-                std::cout << i << " " << transGradVal << " " << transGradExpVal << std::endl;
+                NR_COUT << i << " " << transGradVal << " " << transGradExpVal << std::endl;
                 REQUIRE(fabs(transGradVal - transGradExpVal) < EPS);
             }
             // Ensure the termination of content before CudaContext

From 846b2f123a0c3599695b5d03cd3035ca18007719 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 24 Aug 2023 12:05:12 +0100
Subject: [PATCH 182/314] Refactor _reg_common_cuda #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_benchmark.cpp                    |  50 +-
 reg-lib/CMakeLists.txt                        |   2 +-
 reg-lib/Platform.cpp                          |   2 +-
 reg-lib/cpu/_reg_tools.h                      |   1 +
 reg-lib/cuda/CMakeLists.txt                   |   4 +-
 reg-lib/cuda/CudaAladinContent.cpp            | 150 +++---
 .../{_reg_common_cuda.cu => CudaCommon.cu}    | 492 ++++++++----------
 .../{_reg_common_cuda.h => CudaCommon.hpp}    |  81 ++-
 reg-lib/cuda/CudaCompute.cpp                  |   4 +-
 reg-lib/cuda/CudaContent.cpp                  |  40 +-
 reg-lib/cuda/CudaContent.h                    |   2 +-
 reg-lib/cuda/CudaContext.cpp                  |   2 +-
 reg-lib/cuda/CudaDefContent.cpp               |  16 +-
 reg-lib/cuda/CudaF3dContent.cpp               |  18 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         |   8 +-
 reg-lib/cuda/CudaNormaliseGradient.hpp        |   2 +-
 reg-lib/cuda/_reg_cudainfo.cpp                |   2 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   2 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |   2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  82 +--
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   2 +-
 reg-lib/cuda/_reg_measure_gpu.h               |   2 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  29 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  82 ++-
 reg-lib/cuda/_reg_optimiser_gpu.h             |   2 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           |  20 +-
 reg-lib/cuda/_reg_resampling_gpu.h            |   2 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  32 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |  40 +-
 reg-lib/cuda/_reg_tools_gpu.h                 |   2 +-
 reg-lib/cuda/affineDeformationKernel.cu       |   2 +-
 reg-lib/cuda/blockMatchingKernel.cu           |  12 +-
 reg-lib/cuda/blockMatchingKernel.h            |   2 +-
 reg-lib/cuda/resampleKernel.cu                |   2 +-
 35 files changed, 565 insertions(+), 630 deletions(-)
 rename reg-lib/cuda/{_reg_common_cuda.cu => CudaCommon.cu} (64%)
 mode change 100755 => 100644
 rename reg-lib/cuda/{_reg_common_cuda.h => CudaCommon.hpp} (68%)
 mode change 100755 => 100644

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d8fc48a4..274f7143 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-301
+302
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index cf96b43f..18393378 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -186,10 +186,10 @@ int main(int argc, char **argv)
    float4 *deformationFieldImageArray_d;
    if(runGPU)
    {
-      if(cudaCommon_allocateArrayToDevice<float>(&targetImageArray_d, targetImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>(targetImageArray_d, targetImage)) return 1;
-      if(cudaCommon_allocateArrayToDevice<float>(&sourceImageArray_d, sourceImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float>(sourceImageArray_d,sourceImage)) return 1;
+      Cuda::Allocate<float>(&targetImageArray_d, targetImage->dim);
+      Cuda::TransferNiftiToDevice<float>(targetImageArray_d, targetImage);
+      Cuda::Allocate<float>(&sourceImageArray_d, sourceImage->dim);
+      Cuda::TransferNiftiToDevice<float>(sourceImageArray_d,sourceImage);
       CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int)));
       CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice));
       CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4)));
@@ -277,8 +277,8 @@ int main(int argc, char **argv)
    float4 *controlPointImageArray_d;
    if(runGPU)
    {
-      if(cudaCommon_allocateArrayToDevice<float4>(&controlPointImageArray_d, controlPointImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(controlPointImageArray_d,controlPointImage)) return 1;
+      Cuda::Allocate<float4>(&controlPointImageArray_d, controlPointImage->dim);
+      Cuda::TransferNiftiToDevice<float4>(controlPointImageArray_d,controlPointImage);
    }
 #endif
    {
@@ -330,8 +330,8 @@ int main(int argc, char **argv)
    float4 *velocityFieldImageArray_d;
    if(runGPU)
    {
-      if(cudaCommon_allocateArrayToDevice<float4>(&velocityFieldImageArray_d, velocityFieldImage->dim)) return 1;
-      if(cudaCommon_transferNiftiToArrayOnDevice<float4>(velocityFieldImageArray_d,velocityFieldImage)) return 1;
+      Cuda::Allocate<float4>(&velocityFieldImageArray_d, velocityFieldImage->dim);
+      Cuda::TransferNiftiToDevice<float4>(velocityFieldImageArray_d,velocityFieldImage);
    }
 #endif
    {
@@ -377,7 +377,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    float *resultImageArray_d;
    if(runGPU)
-      if(cudaCommon_allocateArrayToDevice<float>(&resultImageArray_d, targetImage->dim)) return 1;
+      Cuda::Allocate<float>(&resultImageArray_d, targetImage->dim);
 #endif
    {
       maxIt=100000 / dimension;
@@ -472,7 +472,7 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Spatial gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Spatial gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free(sourceImageArray_d);
+         Cuda::Free(sourceImageArray_d);
       }
 #endif
       printf("Spatial gradient done\n\n");
@@ -482,7 +482,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free(deformationFieldImageArray_d);
+      Cuda::Free(deformationFieldImageArray_d);
    }
 #endif
 
@@ -504,9 +504,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    float4 *voxelNMIGradientArray_d;
    if(runGPU)
-   {
-      if(cudaCommon_allocateArrayToDevice(&voxelNMIGradientArray_d, resultImage->dim)) return 1;
-   }
+      Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim);
 #endif
    {
       maxIt=100000 / dimension;
@@ -566,7 +564,7 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Voxel-based NMI gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Voxel-based NMI gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free(logJointHistogram_d);
+         Cuda::Free(logJointHistogram_d);
       }
       CUDA_SAFE_CALL(cudaFree(targetMask_d));
 #endif
@@ -576,7 +574,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free(resultGradientArray_d);
+      Cuda::Free(resultGradientArray_d);
    }
 #endif
 
@@ -584,9 +582,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    float4 *nodeNMIGradientArray_d;
    if(runGPU)
-   {
-      if(cudaCommon_allocateArrayToDevice(&nodeNMIGradientArray_d, controlPointImage->dim)) return 1;
-   }
+      Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim);
 #endif
    {
       maxIt=10000 / dimension;
@@ -638,8 +634,8 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free(voxelNMIGradientArray_d);
-      cudaCommon_free(nodeNMIGradientArray_d);
+      Cuda::Free(voxelNMIGradientArray_d);
+      Cuda::Free(nodeNMIGradientArray_d);
    }
 #endif
 
@@ -796,7 +792,7 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free(controlPointImageArray_d );
+      Cuda::Free(controlPointImageArray_d );
    }
 #endif
 
@@ -862,9 +858,9 @@ int main(int argc, char **argv)
          fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
          printf("Block-Matching ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime);
          fprintf(outputFile, "Block-Matching ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime);
-         cudaCommon_free(targetPosition_d);
-         cudaCommon_free(resultPosition_d);
-         cudaCommon_free(activeBlock_d);
+         Cuda::Free(targetPosition_d);
+         Cuda::Free(resultPosition_d);
+         Cuda::Free(activeBlock_d);
       }
 #endif
       printf("Block-matching done\n");
@@ -887,8 +883,8 @@ int main(int argc, char **argv)
 #ifdef _USE_CUDA
    if(runGPU)
    {
-      cudaCommon_free(targetImageArray_d);
-      cudaCommon_free(resultImageArray_d);
+      Cuda::Free(targetImageArray_d);
+      Cuda::Free(resultImageArray_d);
    }
 #endif
 
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 2d5428cb..3b0c528e 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -2,7 +2,7 @@
 if(USE_CUDA)
   add_subdirectory(cuda)
   set(NR_CUDA_LIBRARIES
-    _reg_common_cuda
+    CudaCommon
     _reg_cuda_kernels
   )
 endif(USE_CUDA)
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 23c3a081..271273f4 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -73,7 +73,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
     }
 #ifdef _USE_CUDA
     else if (platformType == PlatformType::Cuda) {
-        NiftyReg::CudaContext& cudaContext = NiftyReg::CudaContext::GetInstance();
+        CudaContext& cudaContext = CudaContext::GetInstance();
         if (gpuIdxIn != 999) {
             gpuIdx = gpuIdxIn;
             cudaContext.SetCudaIdx(gpuIdxIn);
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 84f28bcb..8b246513 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -23,6 +23,7 @@
 #include "_reg_maths.h"
 #include "Debug.hpp"
 
+using namespace NiftyReg;
 using namespace std::string_literals;
 using std::unique_ptr;
 using std::shared_ptr;
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index a5696659..7acea9e9 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -48,7 +48,7 @@ else(NOT COMPILE_RESULT_VAR)
     endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
 endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
-set(NAME _reg_common_cuda)
+set(NAME CudaCommon)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu)
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
@@ -85,7 +85,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
 )
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda)
+target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} CudaCommon)
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 64ecfcd8..489bdf6c 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -1,5 +1,5 @@
 #include "CudaAladinContent.h"
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_tools.h"
 #include <algorithm>
 
@@ -55,7 +55,7 @@ void CudaAladinContent::InitVars() {
 /* *************************************************************** */
 void CudaAladinContent::AllocateCuPtrs() {
     if (transformationMatrix != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
+        Cuda::Allocate<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
 
         float *tmpMat_h = (float*)malloc(sizeof(mat44));
         mat44ToCptr(*(transformationMatrix), tmpMat_h);
@@ -64,33 +64,33 @@ void CudaAladinContent::AllocateCuPtrs() {
         free(tmpMat_h);
     }
     if (referenceMask != nullptr) {
-        cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple1<int>(mask_d, referenceMask, reference->nvox);
+        Cuda::Allocate<int>(&mask_d, reference->nvox);
+        Cuda::TransferNiftiToDeviceSimple<int>(mask_d, referenceMask, reference->nvox);
     }
     if (reference != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&referenceImageArray_d, reference->nvox);
-        cudaCommon_allocateArrayToDevice<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
+        Cuda::Allocate<float>(&referenceImageArray_d, reference->nvox);
+        Cuda::Allocate<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(referenceImageArray_d, reference);
+        Cuda::TransferNiftiToDeviceSimple<float>(referenceImageArray_d, reference);
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
         mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
-        cudaCommon_transferFromDeviceToNiftiSimple1<float>(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
+        Cuda::TransferNiftiToDeviceSimple<float>(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
     if (warped != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(warpedImageArray_d, warped);
+        Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
+        Cuda::TransferNiftiToDeviceSimple<float>(warpedImageArray_d, warped);
     }
     if (deformationField != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(deformationFieldArray_d, deformationField);
+        Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
+        Cuda::TransferNiftiToDeviceSimple<float>(deformationFieldArray_d, deformationField);
     }
     if (floating != nullptr) {
-        cudaCommon_allocateArrayToDevice<float>(&floatingImageArray_d, floating->nvox);
-        cudaCommon_allocateArrayToDevice<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
+        Cuda::Allocate<float>(&floatingImageArray_d, floating->nvox);
+        Cuda::Allocate<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
-        cudaCommon_transferFromDeviceToNiftiSimple<float>(floatingImageArray_d, floating);
+        Cuda::TransferNiftiToDeviceSimple<float>(floatingImageArray_d, floating);
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
         mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
@@ -100,16 +100,16 @@ void CudaAladinContent::AllocateCuPtrs() {
 
     if (blockMatchingParams != nullptr) {
         if (blockMatchingParams->referencePosition != nullptr) {
-            cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            Cuda::Allocate<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            Cuda::TransferFromHostToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         if (blockMatchingParams->warpedPosition != nullptr) {
-            cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-            cudaCommon_transferArrayFromCpuToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            Cuda::Allocate<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+            Cuda::TransferFromHostToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         if (blockMatchingParams->totalBlock != nullptr) {
-            cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-            cudaCommon_transferFromDeviceToNiftiSimple1<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+            Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
+            Cuda::TransferNiftiToDeviceSimple<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
         /* // Removed until CUDA SVD is added back
         if (blockMatchingParams->activeBlockNumber > 0 ) {
@@ -123,12 +123,12 @@ void CudaAladinContent::AllocateCuPtrs() {
               n = 12;
            }
 
-           cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
-           cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
-           cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
-           cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
-           cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-           cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+           Cuda::Allocate<float>(&AR_d, m * n);
+           Cuda::Allocate<float>(&U_d, m * m); //only the singular vectors output is needed
+           Cuda::Allocate<float>(&VT_d, n * n);
+           Cuda::Allocate<float>(&Sigma_d, std::min(m, n));
+           Cuda::Allocate<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
+           Cuda::Allocate<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
         */
     }
@@ -140,75 +140,75 @@ nifti_image* CudaAladinContent::GetWarped() {
 }
 /* *************************************************************** */
 nifti_image* CudaAladinContent::GetDeformationField() {
-    cudaCommon_transferFromDeviceToCpu<float>((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox);
+    Cuda::TransferFromDeviceToHost<float>((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox);
     return deformationField;
 }
 /* *************************************************************** */
 _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
-    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-    cudaCommon_transferFromDeviceToCpu<float>(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    Cuda::TransferFromDeviceToHost<float>(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+    Cuda::TransferFromDeviceToHost<float>(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     return blockMatchingParams;
 }
 /* *************************************************************** */
 void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     if (transformationMatrix != nullptr)
-        cudaCommon_free(transformationMatrix_d);
+        Cuda::Free(transformationMatrix_d);
 
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
     float *tmpMat_h = (float*)malloc(sizeof(mat44));
     mat44ToCptr(*transformationMatrix, tmpMat_h);
 
-    cudaCommon_allocateArrayToDevice<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
+    Cuda::Allocate<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
     NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice));
     free(tmpMat_h);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
     if (deformationField != nullptr)
-        cudaCommon_free(deformationFieldArray_d);
+        Cuda::Free(deformationFieldArray_d);
     AladinContent::SetDeformationField(deformationFieldIn);
 
-    cudaCommon_allocateArrayToDevice<float>(&deformationFieldArray_d, deformationField->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(deformationFieldArray_d, deformationField);
+    Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
+    Cuda::TransferNiftiToDeviceSimple<float>(deformationFieldArray_d, deformationField);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
     if (referenceMask != nullptr)
-        cudaCommon_free(mask_d);
+        Cuda::Free(mask_d);
     AladinContent::SetReferenceMask(referenceMaskIn);
-    cudaCommon_allocateArrayToDevice<int>(&mask_d, reference->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple1<int>(mask_d, referenceMaskIn, reference->nvox);
+    Cuda::Allocate<int>(&mask_d, reference->nvox);
+    Cuda::TransferNiftiToDeviceSimple<int>(mask_d, referenceMaskIn, reference->nvox);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetWarped(nifti_image *warped) {
     if (warped != nullptr)
-        cudaCommon_free(warpedImageArray_d);
+        Cuda::Free(warpedImageArray_d);
     AladinContent::SetWarped(warped);
     reg_tools_changeDatatype<float>(warped);
 
-    cudaCommon_allocateArrayToDevice<float>(&warpedImageArray_d, warped->nvox);
-    cudaCommon_transferFromDeviceToNiftiSimple<float>(warpedImageArray_d, warped);
+    Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
+    Cuda::TransferNiftiToDeviceSimple<float>(warpedImageArray_d, warped);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
     if (blockMatchingParams->referencePosition != nullptr) {
-        cudaCommon_free(referencePosition_d);
+        Cuda::Free(referencePosition_d);
         //referencePosition
-        cudaCommon_allocateArrayToDevice<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        Cuda::Allocate<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        Cuda::TransferFromHostToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->warpedPosition != nullptr) {
-        cudaCommon_free(warpedPosition_d);
+        Cuda::Free(warpedPosition_d);
         //warpedPosition
-        cudaCommon_allocateArrayToDevice<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-        cudaCommon_transferArrayFromCpuToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        Cuda::Allocate<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+        Cuda::TransferFromHostToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
     if (blockMatchingParams->totalBlock != nullptr) {
-        cudaCommon_free(totalBlock_d);
+        Cuda::Free(totalBlock_d);
         //activeBlock
-        cudaCommon_allocateArrayToDevice<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-        cudaCommon_transferArrayFromCpuToDevice<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+        Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
+        Cuda::TransferFromHostToDevice<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
     }
     /* // Removed until CUDA SVD is added back
      if (blockMatchingParams->activeBlockNumber > 0) {
@@ -222,12 +222,12 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
              n = 12;
          }
 
-         cudaCommon_allocateArrayToDevice<float>(&AR_d, m * n);
-         cudaCommon_allocateArrayToDevice<float>(&U_d, m * m); //only the singular vectors output is needed
-         cudaCommon_allocateArrayToDevice<float>(&VT_d, n * n);
-         cudaCommon_allocateArrayToDevice<float>(&Sigma_d, std::min(m, n));
-         cudaCommon_allocateArrayToDevice<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-         cudaCommon_allocateArrayToDevice<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
+         Cuda::Allocate<float>(&AR_d, m * n);
+         Cuda::Allocate<float>(&U_d, m * m); //only the singular vectors output is needed
+         Cuda::Allocate<float>(&VT_d, n * n);
+         Cuda::Allocate<float>(&Sigma_d, std::min(m, n));
+         Cuda::Allocate<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
+         Cuda::Allocate<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
      }
      */
 }
@@ -264,7 +264,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i
     size_t size = image->nvox;
     float *buffer = (float*)malloc(size * sizeof(float));
 
-    cudaCommon_transferFromDeviceToCpu<float>(buffer, memoryObject, size);
+    Cuda::TransferFromDeviceToHost<float>(buffer, memoryObject, size);
 
     free(image->data);
     image->datatype = type;
@@ -403,44 +403,44 @@ int* CudaAladinContent::GetFloatingDims() {
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
     if (transformationMatrix_d != nullptr)
-        cudaCommon_free(transformationMatrix_d);
+        Cuda::Free(transformationMatrix_d);
 
     if (referenceImageArray_d != nullptr)
-        cudaCommon_free(referenceImageArray_d);
+        Cuda::Free(referenceImageArray_d);
     if (referenceMat_d != nullptr)
-        cudaCommon_free(referenceMat_d);
+        Cuda::Free(referenceMat_d);
 
     if (floatingImageArray_d != nullptr)
-        cudaCommon_free(floatingImageArray_d);
+        Cuda::Free(floatingImageArray_d);
     if (floIJKMat_d != nullptr)
-        cudaCommon_free(floIJKMat_d);
+        Cuda::Free(floIJKMat_d);
 
     if (warpedImageArray_d != nullptr)
-        cudaCommon_free(warpedImageArray_d);
+        Cuda::Free(warpedImageArray_d);
 
     if (deformationFieldArray_d != nullptr)
-        cudaCommon_free(deformationFieldArray_d);
+        Cuda::Free(deformationFieldArray_d);
 
     if (mask_d != nullptr)
-        cudaCommon_free(mask_d);
+        Cuda::Free(mask_d);
 
     if (totalBlock_d != nullptr)
-        cudaCommon_free(totalBlock_d);
+        Cuda::Free(totalBlock_d);
     if (referencePosition_d != nullptr)
-        cudaCommon_free(referencePosition_d);
+        Cuda::Free(referencePosition_d);
     if (warpedPosition_d != nullptr)
-        cudaCommon_free(warpedPosition_d);
+        Cuda::Free(warpedPosition_d);
         /*
-        cudaCommon_free(AR_d);
-        cudaCommon_free(U_d);
-        cudaCommon_free(VT_d);
-        cudaCommon_free(Sigma_d);
-        cudaCommon_free(lengths_d);
-        cudaCommon_free(newWarpedPos_d);
+        Cuda::Free(AR_d);
+        Cuda::Free(U_d);
+        Cuda::Free(VT_d);
+        Cuda::Free(Sigma_d);
+        Cuda::Free(lengths_d);
+        Cuda::Free(newWarpedPos_d);
         */
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
-    return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable();
+    return CudaContext::GetInstance().IsCardDoubleCapable();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/CudaCommon.cu
old mode 100755
new mode 100644
similarity index 64%
rename from reg-lib/cuda/_reg_common_cuda.cu
rename to reg-lib/cuda/CudaCommon.cu
index 464535bb..bf6bee75
--- a/reg-lib/cuda/_reg_common_cuda.cu
+++ b/reg-lib/cuda/CudaCommon.cu
@@ -1,5 +1,5 @@
 /**
- * @file _reg_common_cuda.cu
+ * @file CudaCommon.cu
  * @author Marc Modat
  * @date 25/03/2009
  *  Copyright (c) 2009-2018, University College London
@@ -9,34 +9,82 @@
  *
  */
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include <thrust/host_vector.h>
 #include <thrust/device_ptr.h>
 
 /* *************************************************************** */
-template <class NiftiType>
-int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti_image *img) {
-    const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, img, sizeof(nifti_image), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->data, img->data, memSize, cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice));
-    return EXIT_SUCCESS;
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+template <class DataType>
+void Allocate(cudaArray **arrayCuda, const int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
+    const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize));
+}
+template void Allocate<float>(cudaArray**, const int*);
+template void Allocate<double>(cudaArray**, const int*);
+template void Allocate<float4>(cudaArray**, const int*); // for deformation field
+/* *************************************************************** */
+template <class DataType>
+void Allocate(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) {
+    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
+    const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize));
+}
+template void Allocate<float>(cudaArray**, cudaArray**, const int*);
+template void Allocate<double>(cudaArray**, cudaArray**, const int*);
+template void Allocate<float4>(cudaArray**, cudaArray**, const int*); // for deformation field
+/* *************************************************************** */
+template <class DataType>
+void Allocate(DataType **arrayCuda, const size_t& nVoxels) {
+    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType)));
+}
+template void Allocate<float>(float**, const size_t&);
+template void Allocate<double>(double**, const size_t&);
+template void Allocate<int>(int**, const size_t&);
+template void Allocate<float4>(float4**, const size_t&); // for deformation field
+/* *************************************************************** */
+template <class DataType>
+void Allocate(DataType **arrayCuda, const int *dim) {
+    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
+    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize));
 }
+template void Allocate<float>(float**, const int*);
+template void Allocate<double>(double**, const int*);
+template void Allocate<int>(int**, const int*);
+template void Allocate<float4>(float4**, const int*); // for deformation field
+/* *************************************************************** */
+template <class DataType>
+void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) {
+    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
+    NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize));
+    NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize));
+}
+template void Allocate<float>(float**, float**, const int*);
+template void Allocate<double>(double**, double**, const int*);
+template void Allocate<float4>(float4**, float4**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) {
+void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
-        const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
-        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice));
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.srcPtr = make_cudaPitchedPtr(img->data,
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = arrayCuda;
+        copyParams.kind = cudaMemcpyHostToDevice;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) {
+void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
             NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
@@ -57,39 +105,59 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_ima
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.srcPtr = make_cudaPitchedPtr(array.get(),
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = arrayCuda;
+        copyParams.kind = cudaMemcpyHostToDevice;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
+            TransferNiftiToDevice<DataType, float>(arrayCuda, img);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
         }
     }
-    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(int*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, const nifti_image*);
+template void TransferNiftiToDevice<int>(cudaArray*, const nifti_image*);
+template void TransferNiftiToDevice<float>(cudaArray*, const nifti_image*);
+template void TransferNiftiToDevice<double>(cudaArray*, const nifti_image*);
+template void TransferNiftiToDevice<float4>(cudaArray*, const nifti_image*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
+void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-        const size_t memSize = voxelNumber * sizeof(DataType);
-        const NiftiType *array1 = static_cast<NiftiType*>(img->data);
-        const NiftiType *array2 = &array1[voxelNumber];
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
+        NiftiType *array1 = static_cast<NiftiType*>(img->data);
+        NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.kind = cudaMemcpyHostToDevice;
+        // First timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr(array1,
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = array1Cuda;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        // Second timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr(array2,
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = array2Cuda;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
+void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
             NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
@@ -119,42 +187,50 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *arra
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].w = *niftiImgValues++;
         }
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
+
+        cudaMemcpy3DParms copyParams{};
+        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+        copyParams.kind = cudaMemcpyHostToDevice;
+        // First timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr(array1.get(),
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = array1Cuda;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        // Second timepoint
+        copyParams.srcPtr = make_cudaPitchedPtr(array2.get(),
+                                                copyParams.extent.width * sizeof(DataType),
+                                                copyParams.extent.width,
+                                                copyParams.extent.height);
+        copyParams.dstArray = array2Cuda;
+        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
+            TransferNiftiToDevice<DataType, float>(array1Cuda, array2Cuda, img);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
         }
     }
-    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(float*, float*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(double*, double*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(float4*, float4*, const nifti_image*); // for deformation field
+template void TransferNiftiToDevice<float>(cudaArray*, cudaArray*, const nifti_image*);
+template void TransferNiftiToDevice<double>(cudaArray*, cudaArray*, const nifti_image*);
+template void TransferNiftiToDevice<float4>(cudaArray*, cudaArray*, const nifti_image*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) {
+void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.srcPtr = make_cudaPitchedPtr(img->data,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = arrayCuda;
-        copyParams.kind = cudaMemcpyHostToDevice;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
+        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) {
+void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
             NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
@@ -171,64 +247,42 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_im
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] == 3) {
+        if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].w = *niftiImgValues++;
         }
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.srcPtr = make_cudaPitchedPtr(array.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = arrayCuda;
-        copyParams.kind = cudaMemcpyHostToDevice;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(arrayCuda, img);
+            TransferNiftiToDevice<DataType, float>(arrayCuda, img);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
         }
     }
-    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<int>(cudaArray*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, const nifti_image*); // for deformation field
+template void TransferNiftiToDevice<double>(double*, const nifti_image*);
+template void TransferNiftiToDevice<float>(float*, const nifti_image*);
+template void TransferNiftiToDevice<int>(int*, const nifti_image*);
+template void TransferNiftiToDevice<float4>(float4*, const nifti_image*);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
+void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
-        NiftiType *array1 = static_cast<NiftiType*>(img->data);
-        NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.kind = cudaMemcpyHostToDevice;
-        // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array1,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array1Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array2,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array2Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const size_t memSize = voxelNumber * sizeof(DataType);
+        const NiftiType *array1 = static_cast<NiftiType*>(img->data);
+        const NiftiType *array2 = &array1[voxelNumber];
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
+void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
             NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
@@ -252,119 +306,53 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *ar
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] == 3) {
+        if (img->dim[5] >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].w = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].w = *niftiImgValues++;
         }
-
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.kind = cudaMemcpyHostToDevice;
-        // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array1.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array1Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array2.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array2Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
+        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
     } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferNiftiToArrayOnDevice1<DataType, float>(array1Cuda, array2Cuda, img);
+            TransferNiftiToDevice<DataType, float>(array1Cuda, array2Cuda, img);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
         }
     }
-    return EXIT_SUCCESS;
 }
-template int cudaCommon_transferNiftiToArrayOnDevice<float>(cudaArray*, cudaArray*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<double>(cudaArray*, cudaArray*, const nifti_image*);
-template int cudaCommon_transferNiftiToArrayOnDevice<float4>(cudaArray*, cudaArray*, const nifti_image*); // for deformation field
+template void TransferNiftiToDevice<float>(float*, float*, const nifti_image*);
+template void TransferNiftiToDevice<double>(double*, double*, const nifti_image*);
+template void TransferNiftiToDevice<float4>(float4*, float4*, const nifti_image*); // for deformation field
 /* *************************************************************** */
-template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray **arrayCuda, const int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
-    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize));
-    return EXIT_SUCCESS;
-}
-template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, const int*);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, const int*);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
-    cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize));
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize));
-    return EXIT_SUCCESS;
-}
-template int cudaCommon_allocateArrayToDevice<float>(cudaArray**, cudaArray**, const int*);
-template int cudaCommon_allocateArrayToDevice<double>(cudaArray**, cudaArray**, const int*);
-template int cudaCommon_allocateArrayToDevice<float4>(cudaArray**, cudaArray**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const int *dim) {
-    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize));
-    return EXIT_SUCCESS;
-}
-template int cudaCommon_allocateArrayToDevice<float>(float**, const int*);
-template int cudaCommon_allocateArrayToDevice<double>(double**, const int*);
-template int cudaCommon_allocateArrayToDevice<int>(int**, const int*);
-template int cudaCommon_allocateArrayToDevice<float4>(float4**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const size_t& nVoxels) {
-    NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType)));
-    return EXIT_SUCCESS;
-}
-template int cudaCommon_allocateArrayToDevice<float>(float**, const size_t&);
-template int cudaCommon_allocateArrayToDevice<double>(double**, const size_t&);
-template int cudaCommon_allocateArrayToDevice<int>(int**, const size_t&);
-template int cudaCommon_allocateArrayToDevice<float4>(float4**, const size_t&); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType **array1Cuda, DataType **array2Cuda, const int *dim) {
-    const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
-    NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize));
-    NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize));
-    return EXIT_SUCCESS;
-}
-template int cudaCommon_allocateArrayToDevice<float>(float**, float**, const int*);
-template int cudaCommon_allocateArrayToDevice<double>(double**, double**, const int*);
-template int  cudaCommon_allocateArrayToDevice<float4>(float4**, float4**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, const DataType *cuPtr, const size_t& nElements) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(cpuPtr, cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
-    return EXIT_SUCCESS;
+void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
+    if (img->datatype != NIFTI_TYPE_FLOAT32)
+        NR_FATAL_ERROR("The image data type is not supported");
+    cudaMemcpy3DParms copyParams{};
+    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+    copyParams.srcArray = const_cast<cudaArray*>(arrayCuda);
+    copyParams.dstPtr = make_cudaPitchedPtr(img->data,
+                                            copyParams.extent.width * sizeof(float),
+                                            copyParams.extent.width,
+                                            copyParams.extent.height);
+    copyParams.kind = cudaMemcpyDeviceToHost;
+    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
 }
-template int cudaCommon_transferFromDeviceToCpu<float>(float*, const float*, const size_t&);
-template int cudaCommon_transferFromDeviceToCpu<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) {
+void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
         NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
+void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
@@ -387,39 +375,22 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array[i].w;
         }
-        return EXIT_SUCCESS;
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, arrayCuda);
+            TransferFromDeviceToNifti<DataType, float>(img, arrayCuda);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
-            return EXIT_FAILURE;
         }
     }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, const float*);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, const double*);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, const float4*); // for deformation field
-/* *************************************************************** */
-template<>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
-    if (img->datatype != NIFTI_TYPE_FLOAT32)
-        NR_FATAL_ERROR("The image data type is not supported");
-    cudaMemcpy3DParms copyParams{};
-    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-    copyParams.srcArray = const_cast<cudaArray*>(arrayCuda);
-    copyParams.dstPtr = make_cudaPitchedPtr(img->data,
-                                            copyParams.extent.width * sizeof(float),
-                                            copyParams.extent.width,
-                                            copyParams.extent.height);
-    copyParams.kind = cudaMemcpyDeviceToHost;
-    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    return EXIT_SUCCESS;
-}
+template void TransferFromDeviceToNifti<float>(nifti_image*, const float*);
+template void TransferFromDeviceToNifti<double>(nifti_image*, const double*);
+template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*); // for deformation field
 /* *************************************************************** */
 template <class DataType, class NiftiType>
-int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
+void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) != sizeof(NiftiType)) {
         NR_FATAL_ERROR("The host and device arrays are of different types");
     } else {
@@ -429,11 +400,10 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arra
         NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
         NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
     }
-    return EXIT_SUCCESS;
 }
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
+void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
         if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
@@ -444,114 +414,100 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array
         const thrust::host_vector<float4> array1(array1CudaPtr, array1CudaPtr + voxelNumber);
         const thrust::host_vector<float4> array2(array2CudaPtr, array2CudaPtr + voxelNumber);
         float *niftiImgValues = static_cast<float*>(img->data);
-        for (size_t i = 0; i < voxelNumber; i++) {
+        for (size_t i = 0; i < voxelNumber; i++)
             *niftiImgValues++ = array1[i].x;
-        }
-        for (size_t i = 0; i < voxelNumber; i++) {
+        for (size_t i = 0; i < voxelNumber; i++)
             *niftiImgValues++ = array2[i].x;
-        }
         if (img->dim[5] >= 2) {
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].y;
-            }
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array2[i].y;
-            }
         }
         if (img->dim[5] >= 3) {
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].z;
-            }
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array2[i].z;
-            }
         }
         if (img->dim[5] >= 4) {
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].w;
-            }
-            for (size_t i = 0; i < voxelNumber; i++) {
+            for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array2[i].w;
-            }
         }
-        return EXIT_SUCCESS;
     } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            return cudaCommon_transferFromDeviceToNifti1<DataType, float>(img, array1Cuda, array2Cuda);
+            TransferFromDeviceToNifti<DataType, float>(img, array1Cuda, array2Cuda);
+            break;
         default:
             NR_FATAL_ERROR("The image data type is not supported");
-            return EXIT_FAILURE;
         }
     }
 }
-template int cudaCommon_transferFromDeviceToNifti<float>(nifti_image*, const float*, const float*);
-template int cudaCommon_transferFromDeviceToNifti<double>(nifti_image*, const double*, const double*);
-template int cudaCommon_transferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*); // for deformation field
+template void TransferFromDeviceToNifti<float>(nifti_image*, const float*, const float*);
+template void TransferFromDeviceToNifti<double>(nifti_image*, const double*, const double*);
+template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*); // for deformation field
 /* *************************************************************** */
-void cudaCommon_free(cudaArray *arrayCuda) {
-    if (arrayCuda != nullptr)
-        NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda));
+template <class DataType>
+void TransferNiftiToDeviceSimple(DataType *arrayCuda, const nifti_image *img) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
 }
+template void TransferNiftiToDeviceSimple<int>(int*, const nifti_image*);
+template void TransferNiftiToDeviceSimple<float>(float*, const nifti_image*);
+template void TransferNiftiToDeviceSimple<double>(double*, const nifti_image*);
 /* *************************************************************** */
 template <class DataType>
-void cudaCommon_free(DataType *arrayCuda) {
-    if (arrayCuda != nullptr)
-        NR_CUDA_SAFE_CALL(cudaFree(arrayCuda));
+void TransferNiftiToDeviceSimple(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
 }
-template void cudaCommon_free<int>(int*);
-template void cudaCommon_free<float>(float*);
-template void cudaCommon_free<double>(double*);
-template void cudaCommon_free<float4>(float4*);
+template void TransferNiftiToDeviceSimple<int>(int*, const int*, const size_t&);
+template void TransferNiftiToDeviceSimple<float>(float*, const float*, const size_t&);
+template void TransferNiftiToDeviceSimple<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple(DataType *arrayCuda, const nifti_image *img) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
-    return EXIT_SUCCESS;
+void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple<int>(int*, const nifti_image*);
-template int cudaCommon_transferFromDeviceToNiftiSimple<float>(float*, const nifti_image*);
-template int cudaCommon_transferFromDeviceToNiftiSimple<double>(double*, const nifti_image*);
+template void TransferFromDeviceToHost<float>(float*, const float*, const size_t&);
+template void TransferFromDeviceToHost<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
-    return EXIT_SUCCESS;
+void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t& nElements) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array, nElements * sizeof(DataType), cudaMemcpyHostToDevice));
 }
-template int cudaCommon_transferFromDeviceToNiftiSimple1<int>(int*, const int*, const size_t&);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<float>(float*, const float*, const size_t&);
-template int cudaCommon_transferFromDeviceToNiftiSimple1<double>(double*, const double*, const size_t&);
+template void TransferFromHostToDevice<int>(int*, const int*, const size_t&);
+template void TransferFromHostToDevice<float>(float*, const float*, const size_t&);
+template void TransferFromHostToDevice<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
-template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType *arrayCuda, const DataType *arrayCpu, const size_t& nElements) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, arrayCpu, nElements * sizeof(DataType), cudaMemcpyHostToDevice));
-    return EXIT_SUCCESS;
+void Free(cudaArray *arrayCuda) {
+    if (arrayCuda != nullptr)
+        NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda));
 }
-template int cudaCommon_transferArrayFromCpuToDevice<int>(int*, const int*, const size_t&);
-template int cudaCommon_transferArrayFromCpuToDevice<float>(float*, const float*, const size_t&);
-template int cudaCommon_transferArrayFromCpuToDevice<double>(double*, const double*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType *arrayCpu, const DataType *arrayCuda, const size_t& nElements) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCpu, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
-    return EXIT_SUCCESS;
+void Free(DataType *arrayCuda) {
+    if (arrayCuda != nullptr)
+        NR_CUDA_SAFE_CALL(cudaFree(arrayCuda));
 }
-template int cudaCommon_transferArrayFromDeviceToCpu<int>(int*, const int*, const size_t&);
-template int cudaCommon_transferArrayFromDeviceToCpu<float>(float*, const float*, const size_t&);
-template int cudaCommon_transferArrayFromDeviceToCpu<double>(double*, const double*, const size_t&);
+template void Free<int>(int*);
+template void Free<float>(float*);
+template void Free<double>(double*);
+template void Free<float4>(float4*);
 /* *************************************************************** */
-void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) {
+void DestroyTextureObject(cudaTextureObject_t *texObj) {
     NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj));
     delete texObj;
 }
 /* *************************************************************** */
-UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
-                                                      const cudaResourceType& resType,
-                                                      const size_t& size,
-                                                      const cudaChannelFormatKind& channelFormat,
-                                                      const unsigned& channelCount,
-                                                      const cudaTextureFilterMode& filterMode,
-                                                      const bool& normalizedCoordinates) {
+UniqueTextureObjectPtr CreateTextureObject(const void *devPtr,
+                                           const cudaResourceType& resType,
+                                           const size_t& size,
+                                           const cudaChannelFormatKind& channelFormat,
+                                           const unsigned& channelCount,
+                                           const cudaTextureFilterMode& filterMode,
+                                           const bool& normalizedCoordinates) {
     // Specify texture
     cudaResourceDesc resDesc{};
     resDesc.resType = resType;
@@ -585,9 +541,11 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
     texDesc.normalizedCoords = normalizedCoordinates;
 
     // Create texture object
-    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), cudaCommon_destroyTextureObject);
+    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), DestroyTextureObject);
     NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr));
 
     return texObj;
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/CudaCommon.hpp
old mode 100755
new mode 100644
similarity index 68%
rename from reg-lib/cuda/_reg_common_cuda.h
rename to reg-lib/cuda/CudaCommon.hpp
index 688cb6da..f8319b79
--- a/reg-lib/cuda/_reg_common_cuda.h
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -1,4 +1,4 @@
-/** @file _reg_common_cuda.h
+/** @file CudaCommon.hpp
  * @author Marc Modat
  * @date 25/03/2009.
  *  Copyright (c) 2009-2018, University College London
@@ -22,7 +22,9 @@ struct __attribute__((aligned(4))) float4 {
 };
 #endif
 /* *************************************************************** */
-namespace NiftyReg::Cuda::Internal {
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+namespace Internal {
 /* *************************************************************** */
 inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) {
 #if CUDART_VERSION >= 3200
@@ -56,84 +58,67 @@ inline void CheckKernel(const std::string& file, const int& line, const std::str
 #define NR_CUDA_SAFE_CALL(call)             { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); }
 #define NR_CUDA_CHECK_KERNEL(grid, block)   NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block)
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray**, const int*);
+void Allocate(cudaArray**, const int*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*);
+void Allocate(cudaArray**, cudaArray**, const int*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, const size_t&);
+void Allocate(DataType**, const size_t&);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, const int*);
+void Allocate(DataType**, const int*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_allocateArrayToDevice(DataType**, DataType**, const int*);
+void Allocate(DataType**, DataType**, const int*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*);
+void TransferNiftiToDevice(cudaArray*, const nifti_image*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*);
+void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType*, const nifti_image*);
+void TransferNiftiToDevice(DataType*, const nifti_image*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, const nifti_image*);
+void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*);
 /* *************************************************************** */
-extern "C++"
-template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*);
+void TransferFromDeviceToNifti(nifti_image*, const cudaArray*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*);
-/* *************************************************************** */
-extern "C++"
-void cudaCommon_free(cudaArray*);
+void TransferFromDeviceToNifti(nifti_image*, const DataType*);
 /* *************************************************************** */
-extern "C++" template <class DataType>
-void cudaCommon_free(DataType*);
+template <class DataType>
+void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, const nifti_image*);
+void TransferNiftiToDeviceSimple(DataType*, const nifti_image*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, const DataType*, const size_t&);
+void TransferNiftiToDeviceSimple(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferFromDeviceToCpu(DataType*, const DataType*, const size_t&);
+void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromCpuToDevice(DataType*, const DataType*, const size_t&);
+void TransferFromHostToDevice(DataType*, const DataType*, const size_t&);
+/* *************************************************************** */
+void Free(cudaArray*);
 /* *************************************************************** */
-extern "C++"
 template <class DataType>
-int cudaCommon_transferArrayFromDeviceToCpu(DataType*, const DataType*, const size_t&);
+void Free(DataType*);
 /* *************************************************************** */
 using UniqueTextureObjectPtr = unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
-extern "C++"
-UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr,
-													  const cudaResourceType& resType,
-													  const size_t& size = 0,
-													  const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone,
-													  const unsigned& channelCount = 1,
-													  const cudaTextureFilterMode& filterMode = cudaFilterModePoint,
-													  const bool& normalizedCoordinates = false);
+UniqueTextureObjectPtr CreateTextureObject(const void *devPtr,
+                                           const cudaResourceType& resType,
+                                           const size_t& size = 0,
+                                           const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone,
+                                           const unsigned& channelCount = 1,
+                                           const cudaTextureFilterMode& filterMode = cudaFilterModePoint,
+                                           const bool& normalizedCoordinates = false);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 40702afa..8ebdb816 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -128,14 +128,14 @@ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimi
     if (!optimiseX && !optimiseY && !optimiseZ) return 0;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
-    return NiftyReg::Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ);
+    return Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
-    NiftyReg::Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast<float>(maxGradLength), optimiseX, optimiseY, optimiseZ);
+    Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast<float>(maxGradLength), optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index ab0eed9a..72db366d 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -27,51 +27,51 @@ void CudaContent::AllocateImages() {
         reg_tools_changeDatatype<float>(reference);
     if (floating->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(floating);
-    cudaCommon_allocateArrayToDevice<float>(&referenceCuda, reference->dim);
-    cudaCommon_transferNiftiToArrayOnDevice<float>(referenceCuda, reference);
-    cudaCommon_allocateArrayToDevice<float>(&floatingCuda, floating->dim);
-    cudaCommon_transferNiftiToArrayOnDevice<float>(floatingCuda, floating);
+    Cuda::Allocate<float>(&referenceCuda, reference->dim);
+    Cuda::TransferNiftiToDevice<float>(referenceCuda, reference);
+    Cuda::Allocate<float>(&floatingCuda, floating->dim);
+    Cuda::TransferNiftiToDevice<float>(floatingCuda, floating);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateImages() {
     if (referenceCuda) {
-        cudaCommon_free(referenceCuda);
+        Cuda::Free(referenceCuda);
         referenceCuda = nullptr;
     }
     if (floatingCuda) {
-        cudaCommon_free(floatingCuda);
+        Cuda::Free(floatingCuda);
         floatingCuda = nullptr;
     }
 }
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
-    cudaCommon_allocateArrayToDevice(&deformationFieldCuda, deformationField->dim);
+    Cuda::Allocate(&deformationFieldCuda, deformationField->dim);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateDeformationField() {
     if (deformationFieldCuda) {
-        cudaCommon_free(deformationFieldCuda);
+        Cuda::Free(deformationFieldCuda);
         deformationFieldCuda = nullptr;
     }
 }
 /* *************************************************************** */
 void CudaContent::AllocateWarped() {
-    cudaCommon_allocateArrayToDevice<float>(&warpedCuda, warped->dim);
+    Cuda::Allocate<float>(&warpedCuda, warped->dim);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateWarped() {
     if (warpedCuda) {
-        cudaCommon_free(warpedCuda);
+        Cuda::Free(warpedCuda);
         warpedCuda = nullptr;
     }
 }
 /* *************************************************************** */
 bool CudaContent::IsCurrentComputationDoubleCapable() {
-    return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable();
+    return CudaContext::GetInstance().IsCardDoubleCapable();
 }
 /* *************************************************************** */
 nifti_image* CudaContent::GetDeformationField() {
-    cudaCommon_transferFromDeviceToNifti(deformationField, deformationFieldCuda);
+    Cuda::TransferFromDeviceToNifti(deformationField, deformationFieldCuda);
     return deformationField;
 }
 /* *************************************************************** */
@@ -81,18 +81,18 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) {
     if (!deformationField) return;
 
     AllocateDeformationField();
-    cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField);
+    Cuda::TransferNiftiToDevice(deformationFieldCuda, deformationField);
 }
 /* *************************************************************** */
 void CudaContent::UpdateDeformationField() {
-    cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField);
+    Cuda::TransferNiftiToDevice(deformationFieldCuda, deformationField);
 }
 /* *************************************************************** */
 void CudaContent::SetReferenceMask(int *referenceMaskIn) {
     Content::SetReferenceMask(referenceMaskIn);
 
     if (referenceMaskCuda) {
-        cudaCommon_free(referenceMaskCuda);
+        Cuda::Free(referenceMaskCuda);
         referenceMaskCuda = nullptr;
     }
 
@@ -109,7 +109,7 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
         }
     }
 
-    cudaCommon_allocateArrayToDevice(&referenceMaskCuda, activeVoxelNumber);
+    Cuda::Allocate(&referenceMaskCuda, activeVoxelNumber);
     NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice));
     NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask));
 }
@@ -118,7 +118,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     Content::SetTransformationMatrix(transformationMatrixIn);
 
     if (transformationMatrixCuda) {
-        cudaCommon_free(transformationMatrixCuda);
+        Cuda::Free(transformationMatrixCuda);
         transformationMatrixCuda = nullptr;
     }
 
@@ -143,11 +143,11 @@ void CudaContent::SetWarped(nifti_image *warpedIn) {
 
     reg_tools_changeDatatype<float>(warped);
     AllocateWarped();
-    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped);
+    Cuda::TransferNiftiToDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
 void CudaContent::UpdateWarped() {
-    cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped);
+    Cuda::TransferNiftiToDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
 template<class DataType>
@@ -182,7 +182,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat
     size_t size = image->nvox;
     float *buffer = (float*)malloc(size * sizeof(float));
 
-    cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size);
+    Cuda::TransferFromDeviceToHost(buffer, memoryObject, size);
 
     free(image->data);
     image->datatype = datatype;
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index 16f8e7b2..d914bbc2 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Content.h"
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 
 class CudaContent: public virtual Content {
 public:
diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp
index f0fb9f06..9be42aca 100644
--- a/reg-lib/cuda/CudaContext.cpp
+++ b/reg-lib/cuda/CudaContext.cpp
@@ -1,5 +1,5 @@
 #include "CudaContext.hpp"
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 
 namespace NiftyReg {
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp
index a78b3447..44ce96ed 100644
--- a/reg-lib/cuda/CudaDefContent.cpp
+++ b/reg-lib/cuda/CudaDefContent.cpp
@@ -20,43 +20,43 @@ CudaDefContent::~CudaDefContent() {
 }
 /* *************************************************************** */
 void CudaDefContent::AllocateWarpedGradient() {
-    cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim);
+    Cuda::Allocate(&warpedGradientCuda, warpedGradient->dim);
 }
 /* *************************************************************** */
 void CudaDefContent::DeallocateWarpedGradient() {
     if (warpedGradientCuda != nullptr) {
-        cudaCommon_free(warpedGradientCuda);
+        Cuda::Free(warpedGradientCuda);
         warpedGradientCuda = nullptr;
     }
 }
 /* *************************************************************** */
 void CudaDefContent::AllocateVoxelBasedMeasureGradient() {
-    cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim);
+    Cuda::Allocate(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim);
 }
 /* *************************************************************** */
 void CudaDefContent::DeallocateVoxelBasedMeasureGradient() {
     if (voxelBasedMeasureGradientCuda) {
-        cudaCommon_free(voxelBasedMeasureGradientCuda);
+        Cuda::Free(voxelBasedMeasureGradientCuda);
         voxelBasedMeasureGradientCuda = nullptr;
     }
 }
 /* *************************************************************** */
 nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() {
-    cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
+    Cuda::TransferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
     return voxelBasedMeasureGradient;
 }
 /* *************************************************************** */
 void CudaDefContent::UpdateVoxelBasedMeasureGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
+    Cuda::TransferNiftiToDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
 }
 /* *************************************************************** */
 nifti_image* CudaDefContent::GetWarpedGradient() {
-    cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
+    Cuda::TransferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
     return warpedGradient;
 }
 /* *************************************************************** */
 void CudaDefContent::UpdateWarpedGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient);
+    Cuda::TransferNiftiToDevice(warpedGradientCuda, warpedGradient);
 }
 /* *************************************************************** */
 void CudaDefContent::ZeroVoxelBasedMeasureGradient() {
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index 9e2f184f..1ea4efa8 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -24,44 +24,44 @@ CudaF3dContent::~CudaF3dContent() {
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateControlPointGrid() {
-    cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim);
-    cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid);
+    Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim);
+    Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateControlPointGrid() {
     if (controlPointGridCuda) {
-        cudaCommon_free(controlPointGridCuda);
+        Cuda::Free(controlPointGridCuda);
         controlPointGridCuda = nullptr;
     }
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateTransformationGradient() {
-    cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim);
+    Cuda::Allocate(&transformationGradientCuda, transformationGradient->dim);
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateTransformationGradient() {
     if (transformationGradientCuda) {
-        cudaCommon_free(transformationGradientCuda);
+        Cuda::Free(transformationGradientCuda);
         transformationGradientCuda = nullptr;
     }
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetControlPointGrid() {
-    cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda);
+    Cuda::TransferFromDeviceToNifti(controlPointGrid, controlPointGridCuda);
     return controlPointGrid;
 }
 /* *************************************************************** */
 void CudaF3dContent::UpdateControlPointGrid() {
-    cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid);
+    Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid);
 }
 /* *************************************************************** */
 nifti_image* CudaF3dContent::GetTransformationGradient() {
-    cudaCommon_transferFromDeviceToNifti(transformationGradient, transformationGradientCuda);
+    Cuda::TransferFromDeviceToNifti(transformationGradient, transformationGradientCuda);
     return transformationGradient;
 }
 /* *************************************************************** */
 void CudaF3dContent::UpdateTransformationGradient() {
-    cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient);
+    Cuda::TransferNiftiToDevice(transformationGradientCuda, transformationGradient);
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 96810cfe..8516a148 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -23,13 +23,13 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
     // Create a texture object for the imageCuda
-    auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                  nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     float *dists = nullptr;
     NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
 
-    const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->GetMaximalLength;
+    const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength;
     const unsigned blocks = static_cast<unsigned>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
     dim3 blockDims(threads, 1, 1);
     dim3 gridDims(blocks, blocks, 1);
@@ -64,7 +64,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
                                        const bool& optimiseX,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
-    const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned blocks = static_cast<unsigned>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
     const dim3 blockDims(threads, 1, 1);
     const dim3 gridDims(blocks, blocks, 1);
diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp
index 45c1f204..5d619d2f 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.hpp
+++ b/reg-lib/cuda/CudaNormaliseGradient.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp
index ea58f824..3d7c9c54 100644
--- a/reg-lib/cuda/_reg_cudainfo.cpp
+++ b/reg-lib/cuda/_reg_cudainfo.cpp
@@ -1,4 +1,4 @@
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_tools.h"
 
 void showCUDAInfo() {
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index fcea21ea..820cffe8 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -27,7 +27,7 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix,
     // Affine * TargetMat is constant
     const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_affine_deformationField;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField;
     const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 33efd396..3c748bfd 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 
 extern "C++"
 void reg_affine_positionField_gpu(const mat44 *affineMatrix,
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index e1a251e7..476b69b6 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -28,13 +28,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                                         controlPointImage->dy / referenceImage->dy,
                                                         controlPointImage->dz / referenceImage->dz);
 
-    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear,
-                                                      activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear,
+                                                 activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
 
     if (referenceImage->nz > 1) {
-        const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
+        const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
         const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
@@ -49,7 +49,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                                                                               bspline);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField2D;
+        const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D;
         const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
@@ -67,12 +67,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
 }
 /* *************************************************************** */
 float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
-    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                              controlPointGridSize, cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                         controlPointGridSize, cudaChannelFormatKindFloat, 4);
 
     // First compute all the second derivatives
     float4 *secondDerivativeValuesCuda;
@@ -102,8 +102,8 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c
     // Compute the bending energy from the second derivatives
     float *penaltyTermCuda;
     NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float)));
-    auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
-                                                                   secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
+    auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
+                                                              secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D;
         const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
@@ -134,12 +134,12 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
                                                 const float4 *controlPointImageCuda,
                                                 float4 *transGradientCuda,
                                                 float bendingEnergyWeight) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
-    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                              controlPointGridSize, cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                         controlPointGridSize, cudaChannelFormatKindFloat, 4);
 
     // First compute all the second derivatives
     float4 *secondDerivativeValuesCuda;
@@ -168,8 +168,8 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
 
     // Compute the gradient
     bendingEnergyWeight *= 1.f / (float)controlPointNumber;
-    auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
-                                                                   secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
+    auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
+                                                              secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D;
         const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
@@ -196,11 +196,11 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage
                                             const float4 *controlPointImageCuda,
                                             float *jacobianMatricesCuda,
                                             float *jacobianDetCuda) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
     const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
@@ -230,14 +230,14 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
                                       const float4 *controlPointImageCuda,
                                       float *jacobianMatricesCuda,
                                       float *jacobianDetCuda) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                              controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
     const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
@@ -293,7 +293,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
 
     // The Jacobian determinant are squared and logged (might not be english but will do)
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_logSquaredValues;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues;
     const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -312,7 +312,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
                                                    float4 *transGradientCuda,
                                                    const float& jacobianWeight,
                                                    const bool& approx) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
     float *jacobianMatricesCuda, *jacobianDetCuda;
@@ -340,11 +340,11 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
     const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
                                       referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
                                       referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz));
-    auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float),
-                                                                     cudaChannelFormatKindFloat, 1);
-    auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear,
-                                                                  (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float),
-                                                                  cudaChannelFormatKindFloat, 1);
+    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float),
+                                                                cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear,
+                                                             (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float),
+                                                             cudaChannelFormatKindFloat, 1);
     if (approx) {
         if (controlPointImage->nz > 1) {
             const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D;
@@ -400,7 +400,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                      const nifti_image *controlPointImage,
                                      float4 *controlPointImageCuda,
                                      const bool& approx) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
     float *jacobianMatricesCuda, *jacobianDetCuda;
@@ -454,10 +454,10 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize,
-                                                                     cudaChannelFormatKindFloat, 1);
-    auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize,
-                                                                  cudaChannelFormatKindFloat, 1);
+    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize,
+                                                                cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize,
+                                                             cudaChannelFormatKindFloat, 1);
     if (approx) {
         const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D;
         const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
@@ -493,7 +493,7 @@ void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *im
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement;
     const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -553,13 +553,13 @@ void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
                               float4 *deformationFieldCudaOut,
                               const size_t& activeVoxelNumber) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
     const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
     const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     if (deformationField->nz > 1) {
         const unsigned blocks = blockSize->reg_defField_compose3D;
@@ -586,10 +586,10 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
     const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz);
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                                  voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                             voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
     const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 9f9c9084..40cfd892 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_maths.h"
 #include "_reg_tools_gpu.h"
 #include <limits>
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 1bed83a2..19f88644 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -7,9 +7,9 @@
 
 #pragma once
 
+#include "CudaCommon.hpp"
 #include "_reg_lncc.h"
 #include "_reg_dti.h"
-#include "_reg_common_cuda.h"
 #include "_reg_kld.h"
 
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 459da264..2a8ba350 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -45,9 +45,8 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
     if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1)
         NR_FATAL_ERROR("Multiple timepoints are not yet supported");
     // The reference and floating images have to be updated on the device
-    if (cudaCommon_transferNiftiToArrayOnDevice<float>(this->referenceImageCuda, this->referenceImage) ||
-        cudaCommon_transferNiftiToArrayOnDevice<float>(this->floatingImageCuda, this->floatingImage))
-        NR_FATAL_ERROR("Error when transferring the reference or floating image");
+    Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
+    Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -64,7 +63,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const int *referenceMask,
                                  const int& referenceTimePoint) {
     // The NMI computation is performed on the host for now
-    cudaCommon_transferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
+    Cuda::TransferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
     reg_getNMIValue<float>(referenceImage,
                            warpedImage,
                            timePointWeight,
@@ -126,23 +125,23 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
                                       const double *entropies,
                                       const int& refBinning,
                                       const int& floBinning) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int binNumber = refBinning * floBinning + refBinning + floBinning;
     const float normalisedJE = (float)(entropies[2] * entropies[3]);
     const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]);
 
-    auto referenceImageTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                                cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
-    auto warpedImageTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                             cudaChannelFormatKindFloat, 1);
-    auto warpedGradientTexture = cudaCommon_createTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
-                                                                cudaChannelFormatKindFloat, 4);
-    auto histogramTexture = cudaCommon_createTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float),
-                                                           cudaChannelFormatKindFloat, 1);
-    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                      cudaChannelFormatKindSigned, 1);
+    auto referenceImageTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedImageTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                        cudaChannelFormatKindFloat, 1);
+    auto warpedGradientTexture = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+                                                           cudaChannelFormatKindFloat, 4);
+    auto histogramTexture = Cuda::CreateTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float),
+                                                      cudaChannelFormatKindFloat, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                 cudaChannelFormatKindSigned, 1);
     NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4)));
 
     if (referenceImage->nz > 1) {
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index d7a9796c..7f971b20 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -18,11 +18,11 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
 /* *************************************************************** */
 reg_optimiser_gpu::~reg_optimiser_gpu() {
     if (this->bestDofCuda) {
-        cudaCommon_free(this->bestDofCuda);
+        Cuda::Free(this->bestDofCuda);
         this->bestDofCuda = nullptr;
     }
     if (this->bestDofBwCuda) {
-        cudaCommon_free(this->bestDofBwCuda);
+        Cuda::Free(this->bestDofBwCuda);
         this->bestDofBwCuda = nullptr;
     }
     NR_FUNC_CALLED();
@@ -51,18 +51,16 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     this->currentDofCuda = reinterpret_cast<float4*>(cppData);
     this->gradientCuda = reinterpret_cast<float4*>(gradData);
 
-    cudaCommon_free(this->bestDofCuda);
-    if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber()))
-        NR_FATAL_ERROR("Error when allocating the best control point array on the GPU");
+    Cuda::Free(this->bestDofCuda);
+    Cuda::Allocate(&this->bestDofCuda, this->GetVoxNumber());
 
     this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw;
     if (this->isSymmetric) {
         this->dofNumberBw = nvoxBw;
         this->currentDofBwCuda = reinterpret_cast<float4*>(cppDataBw);
         this->gradientBwCuda = reinterpret_cast<float4*>(gradDataBw);
-        cudaCommon_free(this->bestDofBwCuda);
-        if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw()))
-            NR_FATAL_ERROR("Error when allocating the best control point backwards array on the GPU");
+        Cuda::Free(this->bestDofBwCuda);
+        Cuda::Allocate(&this->bestDofBwCuda, this->GetVoxNumberBw());
     }
 
     this->StoreCurrentDof();
@@ -103,19 +101,19 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o
 /* *************************************************************** */
 reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
     if (this->array1) {
-        cudaCommon_free(this->array1);
+        Cuda::Free(this->array1);
         this->array1 = nullptr;
     }
     if (this->array1Bw) {
-        cudaCommon_free(this->array1Bw);
+        Cuda::Free(this->array1Bw);
         this->array1Bw = nullptr;
     }
     if (this->array2) {
-        cudaCommon_free(this->array2);
+        Cuda::Free(this->array2);
         this->array2 = nullptr;
     }
     if (this->array2Bw) {
-        cudaCommon_free(this->array2Bw);
+        Cuda::Free(this->array2Bw);
         this->array2Bw = nullptr;
     }
     NR_FUNC_CALLED();
@@ -136,15 +134,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
                                            float *gradDataBw) {
     reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->firstCall = true;
-    cudaCommon_free(this->array1); cudaCommon_free(this->array2);
-    if (cudaCommon_allocateArrayToDevice<float4>(&this->array1, this->GetVoxNumber()) ||
-        cudaCommon_allocateArrayToDevice<float4>(&this->array2, this->GetVoxNumber()))
-        NR_FATAL_ERROR("Error when allocating the conjugate gradient array on the GPU");
+    Cuda::Free(this->array1); Cuda::Free(this->array2);
+    Cuda::Allocate<float4>(&this->array1, this->GetVoxNumber());
+    Cuda::Allocate<float4>(&this->array2, this->GetVoxNumber());
     if (this->isSymmetric) {
-        cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw);
-        if (cudaCommon_allocateArrayToDevice<float4>(&this->array1Bw, this->GetVoxNumberBw()) ||
-            cudaCommon_allocateArrayToDevice<float4>(&this->array2Bw, this->GetVoxNumberBw()))
-            NR_FATAL_ERROR("Error when allocating the conjugate gradient array backwards on the GPU");
+        Cuda::Free(this->array1Bw); Cuda::Free(this->array2Bw);
+        Cuda::Allocate<float4>(&this->array1Bw, this->GetVoxNumberBw());
+        Cuda::Allocate<float4>(&this->array2Bw, this->GetVoxNumberBw());
     }
     NR_FUNC_CALLED();
 }
@@ -177,10 +173,10 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
                                          const size_t& nVoxels) {
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
     const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -205,24 +201,24 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGBwCuda,
                                   float4 *conjugateHBwCuda,
                                   const size_t& nVoxelsBw) {
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear,
-                                                            nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear,
-                                                            nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear,
+                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, cudaResourceTypeLinear,
+                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    Cuda::UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr);
     if (isSymmetric) {
-        gradientImageBwTexture = std::move(cudaCommon_createTextureObject(gradientImageBwCuda, cudaResourceTypeLinear,
-                                                                          nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
-        conjugateGBwTexture = std::move(cudaCommon_createTextureObject(conjugateGBwCuda, cudaResourceTypeLinear,
-                                                                       nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
-        conjugateHBwTexture = std::move(cudaCommon_createTextureObject(conjugateHBwCuda, cudaResourceTypeLinear,
-                                                                       nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        gradientImageBwTexture = std::move(Cuda::CreateTextureObject(gradientImageBwCuda, cudaResourceTypeLinear,
+                                                                     nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        conjugateGBwTexture = std::move(Cuda::CreateTextureObject(conjugateGBwCuda, cudaResourceTypeLinear,
+                                                                  nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        conjugateHBwTexture = std::move(Cuda::CreateTextureObject(conjugateHBwCuda, cudaResourceTypeLinear,
+                                                                  nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
     }
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient1;
+    unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1;
     unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     dim3 blockDims(blocks, 1, 1);
     dim3 gridDims(grids, grids, 1);
@@ -249,7 +245,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
         gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
     }
 
-    blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient2;
+    blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2;
     grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     gridDims = dim3(blocks, 1, 1);
     blockDims = dim3(grids, grids, 1);
@@ -272,12 +268,12 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
                                         const bool& optimiseX,
                                         const bool& optimiseY,
                                         const bool& optimiseZ) {
-    auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear,
-                                                                  nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                               nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear,
+                                                             nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
+                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
-    const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition;
+    const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition;
     const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index de8d818f..69e20f19 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_optimiser.h"
 #include "_reg_tools_gpu.h"
 
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 2acccafa..7f81bad9 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -21,17 +21,17 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            const int *maskCuda,
                            const size_t& activeVoxelNumber,
                            const float& paddingValue) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray);
+    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
     // Create the texture object for the mask
-    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                      cudaChannelFormatKindSigned, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                 cudaChannelFormatKindSigned, 1);
 
     // Bind the real to voxel matrix to the texture
     const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
@@ -61,14 +61,14 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
                               float4 *warpedGradientCuda,
                               const size_t& activeVoxelNumber,
                               const float& paddingValue) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray);
+    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                                  activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
+                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // Bind the real to voxel matrix to the texture
     const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 5c3e15e7..9d720006 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 
 /* *************************************************************** */
 extern "C++"
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 4f3b6c77..7ac8a625 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -55,18 +55,18 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
-    auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                        cudaChannelFormatKindFloat, 1);
-    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                      cudaChannelFormatKindSigned, 1);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                      cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                   cudaChannelFormatKindFloat, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                 cudaChannelFormatKindSigned, 1);
 
     // Create an array on the device to store the absolute difference values
     thrust::device_vector<float> absoluteValuesCuda(activeVoxelNumber);
 
     // Compute the absolute values
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_getSquaredDifference;
     const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -111,19 +111,19 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
-    auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                        cudaChannelFormatKindFloat, 1);
-    auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                      cudaChannelFormatKindSigned, 1);
-    auto spaGradientTexture = cudaCommon_createTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
-                                                             cudaChannelFormatKindFloat, 4);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                      cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
+                                                   cudaChannelFormatKindFloat, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                 cudaChannelFormatKindSigned, 1);
+    auto spaGradientTexture = Cuda::CreateTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+                                                        cudaChannelFormatKindFloat, 4);
 
     // Set the gradient image to zero
     NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4)));
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSSDGradient;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_getSSDGradient;
     const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 193c18eb..46f6417b 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -10,7 +10,7 @@
  *
  */
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_tools_gpu.h"
 #include "_reg_tools_kernels.cu"
 
@@ -27,8 +27,8 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
     const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz);
     const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz);
 
-    auto voxelImageTexture = cudaCommon_createTextureObject(voxelImageCuda, cudaResourceTypeLinear,
-                                                            voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear,
+                                                       voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     // The transformation between the image and the grid
     mat44 transformation;
@@ -68,7 +68,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
         weight *= ratio[i];
     }
 
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
     const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -81,7 +81,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
                                                     const nifti_image *controlPointImage,
                                                     float4 *nmiGradientCuda) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace;
     const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -93,7 +93,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
                                const float& sigma,
                                const bool smoothXYZ[8]) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
@@ -132,10 +132,10 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                 float4 *smoothedImage;
                 NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
 
-                auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                                   voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-                auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear,
-                                                                    kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
+                auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                              voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+                auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear,
+                                                               kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
 
                 unsigned blocks, grids;
                 dim3 blockDims, gridDims;
@@ -179,7 +179,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
 void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *spacingVoxel) {
-    auto blockSize = NiftyReg::CudaContext::GetBlockSize();
+    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
 
@@ -207,10 +207,10 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
             NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
             NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
 
-            auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                               voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-            auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear,
-                                                                kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
+            auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                          voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+            auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear,
+                                                           kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
 
             float4 *smoothedImage;
             NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
@@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
 }
 /* *************************************************************** */
 void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float&
 }
 /* *************************************************************** */
 void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value
 }
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
 }
 /* *************************************************************** */
 void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -290,7 +290,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
 }
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
-    const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
     const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index dbd43398..947d8065 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_tools.h"
 #include <thrust/device_ptr.h>
 #include <thrust/reduce.h>
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index 3dbc4f71..d6cddd0b 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -4,7 +4,7 @@
 #include <cuda.h>
 #include"_reg_resampling.h"
 #include"_reg_maths.h"
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include"_reg_tools.h"
 #include"_reg_ReadWriteImage.h"
 #include <thrust/sort.h>
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 81f5ad1a..d638755d 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -345,12 +345,12 @@ void block_matching_method_gpu(const nifti_image *referenceImage,
     const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
     const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
 
-    auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
-                                                           cudaChannelFormatKindFloat, 1);
-    auto warpedTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
-                                                        cudaChannelFormatKindFloat, 1);
-    auto totalBlockTexture = cudaCommon_createTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int),
-                                                            cudaChannelFormatKindSigned, 1);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
+                                                      cudaChannelFormatKindFloat, 1);
+    auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
+                                                   cudaChannelFormatKindFloat, 1);
+    auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int),
+                                                       cudaChannelFormatKindSigned, 1);
 
     unsigned definedBlock = 0, *definedBlockCuda;
     NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned)));
diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h
index 2692ab81..f341ff81 100644
--- a/reg-lib/cuda/blockMatchingKernel.h
+++ b/reg-lib/cuda/blockMatchingKernel.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include "_reg_blockMatching.h"
 
 /**
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 40633392..c7e7d230 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -5,7 +5,7 @@
 #include"_reg_resampling.h"
 #include"_reg_maths.h"
 #include "resampleKernel.h"
-#include "_reg_common_cuda.h"
+#include "CudaCommon.hpp"
 #include"_reg_tools.h"
 #include"_reg_ReadWriteImage.h"
 

From 3516bfec566d5f5ee001ec4cd720d295c42ebb48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 24 Aug 2023 14:02:40 +0100
Subject: [PATCH 183/314] Convert NMI regression test to a multi-measure
 regression test #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-test/CMakeLists.txt                       |  2 +-
 reg-test/reg_test_lncc.cpp                    |  6 +-
 reg-test/reg_test_nmi.cpp                     |  6 +-
 reg-test/reg_test_regr_lts.cpp                |  6 +-
 ...regr_nmi.cpp => reg_test_regr_measure.cpp} | 77 +++++++++++--------
 6 files changed, 54 insertions(+), 45 deletions(-)
 rename reg-test/{reg_test_regr_nmi.cpp => reg_test_regr_measure.cpp} (80%)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 274f7143..81606223 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-302
+303
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 7d3faeef..4d518ef8 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -123,7 +123,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
-  set(EXEC_LIST reg_test_regr_nmi ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
 endif(USE_CUDA)
 
 
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 592ee238..00a0f5a6 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -11,9 +11,9 @@
     In 2D and 3D
 */
 
-class LNCCTest {
+class LnccTest {
 public:
-    LNCCTest() {
+    LnccTest() {
         if (!testCases.empty())
             return;
 
@@ -291,7 +291,7 @@ class LNCCTest {
     }
 };
 
-TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") {
+TEST_CASE_METHOD(LnccTest, "LNCC", "[GetSimilarityMeasureValue]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 5f9c66b4..7d03e3ee 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -10,9 +10,9 @@
     test function: NMI computation
 */
 
-class NMITest {
+class NmiTest {
 public:
-    NMITest() {
+    NmiTest() {
         if (!testCases.empty())
             return;
 
@@ -151,7 +151,7 @@ class NMITest {
     }
 };
 
-TEST_CASE_METHOD(NMITest, "NMI", "[unit]") {
+TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 58cd390d..16547d70 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -10,7 +10,7 @@
  *  LTS regression test to ensure the CPU and CUDA versions yield the same output
  */
 
-class LTSTest {
+class LtsTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, int, int>;
     using TestCase = std::tuple<std::string, unique_ptr<mat44>, unique_ptr<mat44>>;
@@ -18,7 +18,7 @@ class LTSTest {
     inline static vector<TestCase> testCases;
 
 public:
-    LTSTest() {
+    LtsTest() {
         if (!testCases.empty())
             return;
 
@@ -135,7 +135,7 @@ class LTSTest {
     }
 };
 
-TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") {
+TEST_CASE_METHOD(LtsTest, "Regression LTS", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : this->testCases) {
         // Retrieve test information
diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_measure.cpp
similarity index 80%
rename from reg-test/reg_test_regr_nmi.cpp
rename to reg-test/reg_test_regr_measure.cpp
index 5fed6b15..8a472bac 100644
--- a/reg-test/reg_test_regr_nmi.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -5,18 +5,21 @@
 #include <iomanip>
 
 /**
- *  NMI regression test to ensure the CPU and CUDA versions yield the same output
+ *  Measure regression tests to ensure the CPU and CUDA versions yield the same output
+ *  Test classes:
+ *   - NMI
+ *   - SSD
  */
 
-class NmiTest {
+class MeasureTest {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, bool>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, MeasureType, bool>;
     using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
 
 public:
-    NmiTest() {
+    MeasureTest() {
         if (!testCases.empty())
             return;
 
@@ -54,22 +57,28 @@ class NmiTest {
         }
 
         // Create the data container for the regression test
+        const std::string measureNames[]{ "NMI"s, "SSD"s, "DTI"s, "LNCC"s, "KLD"s, "MIND"s, "MINDSSC"s };
+        const MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd };
         vector<TestData> testData;
-        for (int sym = 0; sym < 2; ++sym) {
-            testData.emplace_back(TestData(
-                "2D"s + (sym ? " Symmetric" : ""),
-                reference2d,
-                floating2d,
-                controlPointGrid2d,
-                sym
-            ));
-            testData.emplace_back(TestData(
-                "3D"s + (sym ? " Symmetric" : ""),
-                reference3d,
-                floating3d,
-                controlPointGrid3d,
-                sym
-            ));
+        for (auto&& measure : testMeasures) {
+            for (int sym = 0; sym < 2; ++sym) {
+                testData.emplace_back(TestData(
+                    measureNames[(int)measure] + " 2D"s + (sym ? " Symmetric" : ""),
+                    reference2d,
+                    floating2d,
+                    controlPointGrid2d,
+                    measure,
+                    sym
+                ));
+                testData.emplace_back(TestData(
+                    measureNames[(int)measure] + " 3D"s + (sym ? " Symmetric" : ""),
+                    reference3d,
+                    floating3d,
+                    controlPointGrid3d,
+                    measure,
+                    sym
+                ));
+            }
         }
 
         // Create the platforms
@@ -77,12 +86,12 @@ class NmiTest {
         Platform platformCuda(PlatformType::Cuda);
 
         // Create the measures
-        unique_ptr<Measure> measureCpu{ new Measure() };
-        unique_ptr<Measure> measureCuda{ new CudaMeasure() };
+        unique_ptr<Measure> measureCreatorCpu{ new Measure() };
+        unique_ptr<Measure> measureCreatorCuda{ new CudaMeasure() };
 
         for (auto&& testData : testData) {
             // Get the test data
-            auto&& [testName, reference, floating, controlPointGrid, isSymmetric] = testData;
+            auto&& [testName, reference, floating, controlPointGrid, measureType, isSymmetric] = testData;
 
             // Create images
             NiftiImage referenceCpu(reference), referenceCuda(reference);
@@ -140,17 +149,17 @@ class NmiTest {
                 computeCudaBw.reset(platformCuda.CreateCompute(*contentCudaBw));
             }
 
-            // Create the NMI measures
-            unique_ptr<reg_nmi> nmiCpu{ dynamic_cast<reg_nmi*>(measureCpu->Create(MeasureType::Nmi)) };
-            unique_ptr<reg_nmi> nmiCuda{ dynamic_cast<reg_nmi*>(measureCuda->Create(MeasureType::Nmi)) };
+            // Create the measures
+            unique_ptr<reg_measure> measureCpu{ measureCreatorCpu->Create(measureType) };
+            unique_ptr<reg_measure> measureCuda{ measureCreatorCuda->Create(measureType) };
 
             // Initialise the measures
             for (int i = 0; i < referenceCpu->nt; ++i) {
-                nmiCpu->SetTimepointWeight(i, 1.0);
-                nmiCuda->SetTimepointWeight(i, 1.0);
+                measureCpu->SetTimepointWeight(i, 1.0);
+                measureCuda->SetTimepointWeight(i, 1.0);
             }
-            measureCpu->Initialise(*nmiCpu, *contentCpu, contentCpuBw.get());
-            measureCuda->Initialise(*nmiCuda, *contentCuda, contentCudaBw.get());
+            measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get());
+            measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get());
 
             // Compute the similarity measure value for CPU
             computeCpu->GetDeformationField(false, true);
@@ -159,7 +168,7 @@ class NmiTest {
                 computeCpuBw->GetDeformationField(false, true);
                 computeCpuBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
             }
-            const double simMeasureCpu = nmiCpu->GetSimilarityMeasureValue();
+            const double simMeasureCpu = measureCpu->GetSimilarityMeasureValue();
 
             // Compute the similarity measure value for CUDA
             NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped());
@@ -176,7 +185,7 @@ class NmiTest {
                 // computeCudaBw->GetDeformationField(false, true);
                 // computeCudaBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
             }
-            const double simMeasureCuda = nmiCuda->GetSimilarityMeasureValue();
+            const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue();
 
             // Compute the similarity measure gradient for CPU
             int timepoint = 0;
@@ -186,7 +195,7 @@ class NmiTest {
                 contentCpuBw->ZeroVoxelBasedMeasureGradient();
                 computeCpuBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             }
-            nmiCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint);
+            measureCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint);
 
             // Compute the similarity measure gradient for CUDA
             contentCuda->ZeroVoxelBasedMeasureGradient();
@@ -203,7 +212,7 @@ class NmiTest {
                 warpedGradCudaBw.disown();
                 contentCudaBw->UpdateWarpedGradient();
             }
-            nmiCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint);
+            measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint);
 
             // Get the voxel-based similarity measure gradients
             NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
@@ -215,7 +224,7 @@ class NmiTest {
     }
 };
 
-TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") {
+TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information

From cece094e02ffce56e4fa5b17a5b2bda3ed91e095 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 29 Aug 2023 15:06:43 +0100
Subject: [PATCH 184/314] Refactor Cuda::TransferNiftiToDevice and remove
 Cuda::TransferNiftiToDeviceSimple

---
 niftyreg_build_version.txt         |   2 +-
 reg-apps/reg_benchmark.cpp         |   4 +-
 reg-lib/cpu/_reg_maths.h           |   3 -
 reg-lib/cuda/CudaAladinContent.cpp |  20 ++---
 reg-lib/cuda/CudaCommon.cu         | 139 ++++++++++++-----------------
 reg-lib/cuda/CudaCommon.hpp        |   9 +-
 reg-lib/cuda/CudaContent.cpp       |   2 +-
 7 files changed, 76 insertions(+), 103 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 81606223..873b744b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-303
+304
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 18393378..fe90b400 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -186,9 +186,9 @@ int main(int argc, char **argv)
    float4 *deformationFieldImageArray_d;
    if(runGPU)
    {
-      Cuda::Allocate<float>(&targetImageArray_d, targetImage->dim);
+      Cuda::Allocate<float>(&targetImageArray_d, targetImage->nvox);
       Cuda::TransferNiftiToDevice<float>(targetImageArray_d, targetImage);
-      Cuda::Allocate<float>(&sourceImageArray_d, sourceImage->dim);
+      Cuda::Allocate<float>(&sourceImageArray_d, sourceImage->nvox);
       Cuda::TransferNiftiToDevice<float>(sourceImageArray_d,sourceImage);
       CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int)));
       CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice));
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index c983340f..ea14462d 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -59,9 +59,6 @@ typedef enum {
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
-#ifndef isnan
-#define isnan(_X) _isnan(_X)
-#endif
 #if (_MSC_VER < 1900)
 #ifndef strtof
 #define strtof(_s, _t) (float) strtod(_s, _t)
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 489bdf6c..7f08b840 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -65,32 +65,32 @@ void CudaAladinContent::AllocateCuPtrs() {
     }
     if (referenceMask != nullptr) {
         Cuda::Allocate<int>(&mask_d, reference->nvox);
-        Cuda::TransferNiftiToDeviceSimple<int>(mask_d, referenceMask, reference->nvox);
+        Cuda::TransferNiftiToDevice(mask_d, referenceMask, reference->nvox);
     }
     if (reference != nullptr) {
         Cuda::Allocate<float>(&referenceImageArray_d, reference->nvox);
         Cuda::Allocate<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
-        Cuda::TransferNiftiToDeviceSimple<float>(referenceImageArray_d, reference);
+        Cuda::TransferNiftiToDevice(referenceImageArray_d, reference);
 
         float* targetMat = (float *)malloc(sizeof(mat44)); //freed
         mat44ToCptr(*GetXYZMatrix(*reference), targetMat);
-        Cuda::TransferNiftiToDeviceSimple<float>(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
+        Cuda::TransferNiftiToDevice(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
     if (warped != nullptr) {
         Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
-        Cuda::TransferNiftiToDeviceSimple<float>(warpedImageArray_d, warped);
+        Cuda::TransferNiftiToDevice(warpedImageArray_d, warped);
     }
     if (deformationField != nullptr) {
         Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
-        Cuda::TransferNiftiToDeviceSimple<float>(deformationFieldArray_d, deformationField);
+        Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField);
     }
     if (floating != nullptr) {
         Cuda::Allocate<float>(&floatingImageArray_d, floating->nvox);
         Cuda::Allocate<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
-        Cuda::TransferNiftiToDeviceSimple<float>(floatingImageArray_d, floating);
+        Cuda::TransferNiftiToDevice(floatingImageArray_d, floating);
 
         float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44));
         mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h);
@@ -109,7 +109,7 @@ void CudaAladinContent::AllocateCuPtrs() {
         }
         if (blockMatchingParams->totalBlock != nullptr) {
             Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
-            Cuda::TransferNiftiToDeviceSimple<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
+            Cuda::TransferNiftiToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
         /* // Removed until CUDA SVD is added back
         if (blockMatchingParams->activeBlockNumber > 0 ) {
@@ -169,7 +169,7 @@ void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
     AladinContent::SetDeformationField(deformationFieldIn);
 
     Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
-    Cuda::TransferNiftiToDeviceSimple<float>(deformationFieldArray_d, deformationField);
+    Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
@@ -177,7 +177,7 @@ void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
         Cuda::Free(mask_d);
     AladinContent::SetReferenceMask(referenceMaskIn);
     Cuda::Allocate<int>(&mask_d, reference->nvox);
-    Cuda::TransferNiftiToDeviceSimple<int>(mask_d, referenceMaskIn, reference->nvox);
+    Cuda::TransferNiftiToDevice(mask_d, referenceMaskIn, reference->nvox);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetWarped(nifti_image *warped) {
@@ -187,7 +187,7 @@ void CudaAladinContent::SetWarped(nifti_image *warped) {
     reg_tools_changeDatatype<float>(warped);
 
     Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
-    Cuda::TransferNiftiToDeviceSimple<float>(warpedImageArray_d, warped);
+    Cuda::TransferNiftiToDevice(warpedImageArray_d, warped);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu
index bf6bee75..870091c3 100644
--- a/reg-lib/cuda/CudaCommon.cu
+++ b/reg-lib/cuda/CudaCommon.cu
@@ -41,9 +41,9 @@ template <class DataType>
 void Allocate(DataType **arrayCuda, const size_t& nVoxels) {
     NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType)));
 }
+template void Allocate<int>(int**, const size_t&);
 template void Allocate<float>(float**, const size_t&);
 template void Allocate<double>(double**, const size_t&);
-template void Allocate<int>(int**, const size_t&);
 template void Allocate<float4>(float4**, const size_t&); // for deformation field
 /* *************************************************************** */
 template <class DataType>
@@ -51,9 +51,9 @@ void Allocate(DataType **arrayCuda, const int *dim) {
     const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize));
 }
+template void Allocate<int>(int**, const int*);
 template void Allocate<float>(float**, const int*);
 template void Allocate<double>(double**, const int*);
-template void Allocate<int>(int**, const int*);
 template void Allocate<float4>(float4**, const int*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
@@ -68,19 +68,17 @@ template void Allocate<float4>(float4**, float4**, const int*); // for deformati
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.srcPtr = make_cudaPitchedPtr(img->data,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = arrayCuda;
-        copyParams.kind = cudaMemcpyHostToDevice;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    }
+    cudaMemcpy3DParms copyParams{};
+    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+    copyParams.srcPtr = make_cudaPitchedPtr(img->data,
+                                            copyParams.extent.width * sizeof(DataType),
+                                            copyParams.extent.width,
+                                            copyParams.extent.height);
+    copyParams.dstArray = arrayCuda;
+    copyParams.kind = cudaMemcpyHostToDevice;
+    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -131,29 +129,27 @@ template void TransferNiftiToDevice<float4>(cudaArray*, const nifti_image*); //
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        NiftiType *array1 = static_cast<NiftiType*>(img->data);
-        NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.kind = cudaMemcpyHostToDevice;
-        // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array1,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array1Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array2,
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array2Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    }
+    NiftiType *array1 = static_cast<NiftiType*>(img->data);
+    NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
+    cudaMemcpy3DParms copyParams{};
+    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
+    copyParams.kind = cudaMemcpyHostToDevice;
+    // First timepoint
+    copyParams.srcPtr = make_cudaPitchedPtr(array1,
+                                            copyParams.extent.width * sizeof(DataType),
+                                            copyParams.extent.width,
+                                            copyParams.extent.height);
+    copyParams.dstArray = array1Cuda;
+    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
+    // Second timepoint
+    copyParams.srcPtr = make_cudaPitchedPtr(array2,
+                                            copyParams.extent.width * sizeof(DataType),
+                                            copyParams.extent.width,
+                                            copyParams.extent.height);
+    copyParams.dstArray = array2Cuda;
+    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -221,12 +217,9 @@ template void TransferNiftiToDevice<float4>(cudaArray*, cudaArray*, const nifti_
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType);
-        NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice));
-    }
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(NiftiType), cudaMemcpyHostToDevice));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -262,23 +255,21 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
         }
     }
 }
-template void TransferNiftiToDevice<double>(double*, const nifti_image*);
-template void TransferNiftiToDevice<float>(float*, const nifti_image*);
 template void TransferNiftiToDevice<int>(int*, const nifti_image*);
+template void TransferNiftiToDevice<float>(float*, const nifti_image*);
+template void TransferNiftiToDevice<double>(double*, const nifti_image*);
 template void TransferNiftiToDevice<float4>(float4*, const nifti_image*);
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-        const size_t memSize = voxelNumber * sizeof(DataType);
-        const NiftiType *array1 = static_cast<NiftiType*>(img->data);
-        const NiftiType *array2 = &array1[voxelNumber];
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
-    }
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+    const size_t memSize = voxelNumber * sizeof(DataType);
+    const NiftiType *array1 = static_cast<NiftiType*>(img->data);
+    const NiftiType *array2 = &array1[voxelNumber];
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -328,6 +319,14 @@ template void TransferNiftiToDevice<float>(float*, float*, const nifti_image*);
 template void TransferNiftiToDevice<double>(double*, double*, const nifti_image*);
 template void TransferNiftiToDevice<float4>(float4*, float4*, const nifti_image*); // for deformation field
 /* *************************************************************** */
+template <class DataType>
+void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
+    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
+}
+template void TransferNiftiToDevice<int>(int*, const int*, const size_t&);
+template void TransferNiftiToDevice<float>(float*, const float*, const size_t&);
+template void TransferNiftiToDevice<double>(double*, const double*, const size_t&);
+/* *************************************************************** */
 void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
     if (img->datatype != NIFTI_TYPE_FLOAT32)
         NR_FATAL_ERROR("The image data type is not supported");
@@ -344,11 +343,9 @@ void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
-    }
+    NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -391,15 +388,13 @@ template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*); //
 /* *************************************************************** */
 template <class DataType, class NiftiType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
-    if (sizeof(DataType) != sizeof(NiftiType)) {
+    if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
-    } else {
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-        NiftiType *array1 = static_cast<NiftiType*>(img->data);
-        NiftiType *array2 = &array1[voxelNumber];
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
-        NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
-    }
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+    NiftiType *array1 = static_cast<NiftiType*>(img->data);
+    NiftiType *array2 = &array1[voxelNumber];
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
+    NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
 /* *************************************************************** */
 template <class DataType>
@@ -451,22 +446,6 @@ template void TransferFromDeviceToNifti<double>(nifti_image*, const double*, con
 template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*); // for deformation field
 /* *************************************************************** */
 template <class DataType>
-void TransferNiftiToDeviceSimple(DataType *arrayCuda, const nifti_image *img) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice));
-}
-template void TransferNiftiToDeviceSimple<int>(int*, const nifti_image*);
-template void TransferNiftiToDeviceSimple<float>(float*, const nifti_image*);
-template void TransferNiftiToDeviceSimple<double>(double*, const nifti_image*);
-/* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDeviceSimple(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
-    NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
-}
-template void TransferNiftiToDeviceSimple<int>(int*, const int*, const size_t&);
-template void TransferNiftiToDeviceSimple<float>(float*, const float*, const size_t&);
-template void TransferNiftiToDeviceSimple<double>(double*, const double*, const size_t&);
-/* *************************************************************** */
-template <class DataType>
 void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index f8319b79..088b11f2 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -85,6 +85,9 @@ void TransferNiftiToDevice(DataType*, const nifti_image*);
 template <class DataType>
 void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*);
 /* *************************************************************** */
+template <class DataType>
+void TransferNiftiToDevice(DataType*, const DataType*, const size_t&);
+/* *************************************************************** */
 void TransferFromDeviceToNifti(nifti_image*, const cudaArray*);
 /* *************************************************************** */
 template <class DataType>
@@ -94,12 +97,6 @@ template <class DataType>
 void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*);
 /* *************************************************************** */
 template <class DataType>
-void TransferNiftiToDeviceSimple(DataType*, const nifti_image*);
-/* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDeviceSimple(DataType*, const DataType*, const size_t&);
-/* *************************************************************** */
-template <class DataType>
 void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&);
 /* *************************************************************** */
 template <class DataType>
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 72db366d..63cc488f 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -56,7 +56,7 @@ void CudaContent::DeallocateDeformationField() {
 }
 /* *************************************************************** */
 void CudaContent::AllocateWarped() {
-    Cuda::Allocate<float>(&warpedCuda, warped->dim);
+    Cuda::Allocate(&warpedCuda, warped->nvox);
 }
 /* *************************************************************** */
 void CudaContent::DeallocateWarped() {

From 48751ff981d7ece8ca2712d789c9a4fd28f5c672 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 29 Aug 2023 15:14:27 +0100
Subject: [PATCH 185/314] Make SSD GPU on a par with the CPU version #92

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/cpu/_reg_mind.cpp                |   4 -
 reg-lib/cpu/_reg_ssd.cpp                 |  15 +--
 reg-lib/cpu/_reg_ssd.h                   |   6 +-
 reg-lib/cuda/BlockSize.hpp               |  12 +-
 reg-lib/cuda/CMakeLists.txt              |   4 +-
 reg-lib/cuda/CudaDefContent.cpp          |  20 ++++
 reg-lib/cuda/CudaDefContent.h            |   5 +
 reg-lib/cuda/CudaMeasure.cpp             |   1 +
 reg-lib/cuda/_reg_common_cuda_kernels.cu |  11 ++
 reg-lib/cuda/_reg_measure_gpu.h          |   6 +
 reg-lib/cuda/_reg_nmi_gpu.cu             |   9 +-
 reg-lib/cuda/_reg_nmi_gpu.h              |   2 +
 reg-lib/cuda/_reg_ssd_gpu.cu             | 116 +++++++++++--------
 reg-lib/cuda/_reg_ssd_gpu.h              |   1 +
 reg-lib/cuda/_reg_ssd_kernels.cu         | 135 +++++++----------------
 reg-test/reg_test_regr_measure.cpp       |  21 +++-
 17 files changed, 191 insertions(+), 179 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 873b744b..67d04b9f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-304
+305
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 92a37b35..3fa94f11 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -399,7 +399,6 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                  const double *timePointWeight,
                                  double *timePointWeightDescriptor,
                                  nifti_image *jacobianDetImage,
-                                 float *currentValue,
                                  const int& descriptorOffset,
                                  const int& referenceTimePoint,
                                  const int& mindType) {
@@ -428,7 +427,6 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                                         timePointWeightDescriptor,
                                                         jacobianDetImage,
                                                         combinedMask.get(),
-                                                        currentValue,
                                                         nullptr);
             }, NiftiImage::getFloatingDataType(referenceImageDescriptor));
         }
@@ -445,7 +443,6 @@ double reg_mind::GetSimilarityMeasureValueFw() {
                                        this->timePointWeight,
                                        this->timePointWeightDescriptor,
                                        nullptr, // TODO this->forwardJacDetImagePointer,
-                                       this->currentValue,
                                        this->descriptorOffset,
                                        this->referenceTimePoint,
                                        this->mindType);
@@ -460,7 +457,6 @@ double reg_mind::GetSimilarityMeasureValueBw() {
                                        this->timePointWeight,
                                        this->timePointWeightDescriptor,
                                        nullptr, // TODO this->backwardJacDetImagePointer,
-                                       this->currentValue,
                                        this->descriptorOffset,
                                        this->referenceTimePoint,
                                        this->mindType);
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 78c9fe54..b3d805a2 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -12,7 +12,6 @@
 
 #include "_reg_ssd.h"
 
-// #define USE_LOG_SSD
 // #define MRF_USE_SAD
 
 /* *************************************************************** */
@@ -95,7 +94,6 @@ double reg_getSsdValue(const nifti_image *referenceImage,
                        const double *timePointWeight,
                        const nifti_image *jacobianDetImage,
                        const int *mask,
-                       float *currentValue,
                        const nifti_image *localWeightSim) {
 #ifdef _WIN32
     long voxel;
@@ -137,7 +135,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
 #ifdef MRF_USE_SAD
                         const double diff = fabs(refValue - warValue);
 #else
-                        const double diff = reg_pow2(refValue - warValue);
+                        const double diff = std::pow(refValue - warValue, 2.0);
 #endif
                         // Jacobian determinant modulation of the ssd if required
                         const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
@@ -148,21 +146,19 @@ double reg_getSsdValue(const nifti_image *referenceImage,
             }
 
             ssdLocal *= timePointWeight[time];
-            currentValue[time] = static_cast<float>(-ssdLocal);
             ssdGlobal -= ssdLocal / n;
         }
     }
     return ssdGlobal;
 }
-template double reg_getSsdValue<float>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*);
-template double reg_getSsdValue<double>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*);
+template double reg_getSsdValue<float>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*);
+template double reg_getSsdValue<double>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*);
 /* *************************************************************** */
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const nifti_image *warpedImage,
                                  const double *timePointWeight,
                                  const nifti_image *jacobianDetImage,
                                  const int *mask,
-                                 float *currentValue,
                                  const nifti_image *localWeightSim) {
     return std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -171,7 +167,6 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                                timePointWeight,
                                                jacobianDetImage,
                                                mask,
-                                               currentValue,
                                                localWeightSim);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
@@ -182,7 +177,6 @@ double reg_ssd::GetSimilarityMeasureValueFw() {
                                        this->timePointWeight,
                                        nullptr, // TODO this->forwardJacDetImagePointer,
                                        this->referenceMask,
-                                       this->currentValue,
                                        this->localWeightSim);
 }
 /* *************************************************************** */
@@ -192,7 +186,6 @@ double reg_ssd::GetSimilarityMeasureValueBw() {
                                        this->timePointWeight,
                                        nullptr, // TODO this->backwardJacDetImagePointer,
                                        this->floatingMask,
-                                       this->currentValue,
                                        nullptr);
 }
 /* *************************************************************** */
@@ -235,7 +228,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
     // Create a pointer to the local weight image if defined
     const DataType *localWeightPtr = localWeightSim ? static_cast<DataType*>(localWeightSim->data) : nullptr;
 
-    // find number of active voxels and correct weight
+    // Find number of active voxels and correct weight
     size_t activeVoxelNumber = 0;
     for (voxel = 0; voxel < voxelNumber; voxel++) {
         if (mask[voxel] > -1) {
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index d685509f..9a27c185 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -52,10 +52,8 @@ class reg_ssd: public reg_measure {
                                      float *discretisedValue,
                                      int discretiseRadius,
                                      int discretiseStep) override;
-protected:
-    float currentValue[255];
 
-private:
+protected:
     bool normaliseTimePoint[255];
 };
 /* *************************************************************** */
@@ -69,7 +67,6 @@ class reg_ssd: public reg_measure {
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
  * should be considered
- * @param currentValue Array that contains the current values
  * @param localWeightSim Image that contains the local weight similarity
  * @return Returns the computed sum squared difference
  */
@@ -79,7 +76,6 @@ double reg_getSsdValue(const nifti_image *referenceImage,
                        const double *timePointWeight,
                        const nifti_image *jacobianDetImage,
                        const int *mask,
-                       float *currentValue,
                        const nifti_image *localWeightSim);
 /* *************************************************************** */
 /** @brief Compute a voxel based gradient of the sum squared difference.
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index c173148f..0970e365 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -62,8 +62,8 @@ struct BlockSize {
     unsigned GetMaximalLength;
     unsigned reg_updateControlPointPosition;
     /* _reg_ssd_gpu */
-    unsigned reg_getSquaredDifference;
-    unsigned reg_getSSDGradient;
+    unsigned GetSsdValue;
+    unsigned GetSsdGradient;
     /* _reg_tools_gpu */
     unsigned reg_voxelCentric2NodeCentric;
     unsigned reg_convertNMIGradientFromVoxelToRealSpace;
@@ -127,8 +127,8 @@ struct BlockSize100: public BlockSize {
         GetMaximalLength = 384; // 04 reg - 24 smem
         reg_updateControlPointPosition = 384; // 08 reg - 24 smem
         /* _reg_ssd_gpu */
-        reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem
-        reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem
+        GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem
+        GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem
         /* _reg_tools_gpu */
         reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
@@ -194,8 +194,8 @@ struct BlockSize300: public BlockSize {
         GetMaximalLength = 1024; // 20 reg
         reg_updateControlPointPosition = 1024; // 22 reg
         /* _reg_ssd_gpu */
-        reg_getSquaredDifference = 768; // 34 reg
-        reg_getSSDGradient = 768; // 34 reg
+        GetSsdValue = 768; // 34 reg
+        GetSsdGradient = 768; // 34 reg
         /* _reg_tools_gpu */
         reg_voxelCentric2NodeCentric = 1024; // 23 reg
         reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 7acea9e9..9c66607d 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -21,8 +21,8 @@ elseif(RUN_RESULT_VAR)
     return()
 else(NOT COMPILE_RESULT_VAR)
     message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
-    # Set C++ standard version for CUDA
-    set(CUDA_NVCC_FLAGS "-std=c++17")
+    # Set C++ standard version for CUDA and enable extended lambdas
+    set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda")
     #check cuda version and adjust compile flags
     if("${RUN_OUTPUT_VAR}" LESS "30")
         set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp
index 44ce96ed..72f1c88c 100644
--- a/reg-lib/cuda/CudaDefContent.cpp
+++ b/reg-lib/cuda/CudaDefContent.cpp
@@ -12,11 +12,26 @@ CudaDefContent::CudaDefContent(nifti_image *referenceIn,
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
     AllocateWarpedGradient();
     AllocateVoxelBasedMeasureGradient();
+    AllocateLocalWeightSim();
 }
 /* *************************************************************** */
 CudaDefContent::~CudaDefContent() {
     DeallocateWarpedGradient();
     DeallocateVoxelBasedMeasureGradient();
+    DeallocateLocalWeightSim();
+}
+/* *************************************************************** */
+void CudaDefContent::AllocateLocalWeightSim() {
+    if (!localWeightSim) return;
+    Cuda::Allocate(&localWeightSimCuda, localWeightSim->nvox);
+    Cuda::TransferNiftiToDevice(localWeightSimCuda, localWeightSim);
+}
+/* *************************************************************** */
+void CudaDefContent::DeallocateLocalWeightSim() {
+    if (localWeightSimCuda != nullptr) {
+        Cuda::Free(localWeightSimCuda);
+        localWeightSimCuda = nullptr;
+    }
 }
 /* *************************************************************** */
 void CudaDefContent::AllocateWarpedGradient() {
@@ -41,6 +56,11 @@ void CudaDefContent::DeallocateVoxelBasedMeasureGradient() {
     }
 }
 /* *************************************************************** */
+nifti_image* CudaDefContent::GetLocalWeightSim() {
+    Cuda::TransferFromDeviceToNifti(localWeightSim, localWeightSimCuda);
+    return localWeightSim;
+}
+/* *************************************************************** */
 nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() {
     Cuda::TransferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
     return voxelBasedMeasureGradient;
diff --git a/reg-lib/cuda/CudaDefContent.h b/reg-lib/cuda/CudaDefContent.h
index eb6372a8..76e09b21 100644
--- a/reg-lib/cuda/CudaDefContent.h
+++ b/reg-lib/cuda/CudaDefContent.h
@@ -15,8 +15,10 @@ class CudaDefContent: public virtual DefContent, public virtual CudaContent {
     virtual ~CudaDefContent();
 
     // Getters
+    virtual nifti_image* GetLocalWeightSim() override;
     virtual nifti_image* GetVoxelBasedMeasureGradient() override;
     virtual nifti_image* GetWarpedGradient() override;
+    virtual float* GetLocalWeightSimCuda() { return localWeightSimCuda; }
     virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
     virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; }
 
@@ -28,10 +30,13 @@ class CudaDefContent: public virtual DefContent, public virtual CudaContent {
     virtual void ZeroVoxelBasedMeasureGradient() override;
 
 protected:
+    float *localWeightSimCuda = nullptr;
     float4 *voxelBasedMeasureGradientCuda = nullptr;
     float4 *warpedGradientCuda = nullptr;
 
 private:
+    void AllocateLocalWeightSim();
+    void DeallocateLocalWeightSim();
     void AllocateWarpedGradient();
     void DeallocateWarpedGradient();
     void AllocateVoxelBasedMeasureGradient();
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 3d1325e7..4cdfbdc8 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -44,6 +44,7 @@ void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *
                                  cudaCon.DefContent::GetVoxelBasedMeasureGradient(),
                                  cudaCon.GetVoxelBasedMeasureGradientCuda(),
                                  cudaCon.DefContent::GetLocalWeightSim(),
+                                 cudaCon.GetLocalWeightSimCuda(),
                                  cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr,
                                  cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr,
                                  cudaConBw ? cudaConBw->Content::GetWarped() : nullptr,
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 2137a714..3a30f9af 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -73,3 +73,14 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo
     rem = num % denom;
 }
 /* *************************************************************** */
+__device__ __inline__ int3 reg_indexToDims_cuda(const int& index, const int3& dims) {
+    int quot = 0, rem;
+    if (dims.z > 1)
+        reg_div_cuda(index, dims.x * dims.y, quot, rem);
+    else rem = index;
+    const int z = quot;
+    reg_div_cuda(rem, dims.x, quot, rem);
+    const int y = quot, x = rem;
+    return { x, y, z };
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 19f88644..7055465e 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -35,6 +35,7 @@ class reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
@@ -54,6 +55,7 @@ class reg_measure_gpu {
         this->warpedImageCuda = warpedImgCuda;
         this->warpedGradientCuda = warpedGradCuda;
         this->voxelBasedGradientCuda = voxelBasedGradCuda;
+        this->localWeightSimCuda = localWeightSimCuda;
         // Check if the symmetric mode is used
         if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr &&
             floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) {
@@ -80,6 +82,7 @@ class reg_measure_gpu {
     float *warpedImageCuda;
     float4 *warpedGradientCuda;
     float4 *voxelBasedGradientCuda;
+    float *localWeightSimCuda;
 
     int *floatingMaskCuda;
     float *warpedImageBwCuda;
@@ -110,6 +113,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
@@ -151,6 +155,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
@@ -192,6 +197,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 2a8ba350..a91b8f9b 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -30,7 +30,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                     nifti_image *warpedImg, float *warpedImgCuda,
                                     nifti_image *warpedGrad, float4 *warpedGradCuda,
                                     nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda,
-                                    nifti_image *localWeightSim,
+                                    nifti_image *localWeightSim, float *localWeightSimCuda,
                                     int *floMask, int *floMaskCuda,
                                     nifti_image *warpedImgBw, float *warpedImgBwCuda,
                                     nifti_image *warpedGradBw, float4 *warpedGradBwCuda,
@@ -38,9 +38,10 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
     this->DeallocateHistogram();
     reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad,
                                localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw);
-    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda,
-                                       warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
-                                       warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
+    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum,
+                                       warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda,
+                                       localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
+                                       warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check if the input images have multiple timepoints
     if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1)
         NR_FATAL_ERROR("Multiple timepoints are not yet supported");
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 0e8fe3ed..be6479ec 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -39,6 +39,7 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
@@ -74,6 +75,7 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
                            nifti_image *voxelBasedGrad,
                            float4 *voxelBasedGradCuda,
                            nifti_image *localWeightSim = nullptr,
+                           float *localWeightSimCuda = nullptr,
                            int *floMask = nullptr,
                            int *floMaskCuda = nullptr,
                            nifti_image *warpedImgBw = nullptr,
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 7ac8a625..2a3e853b 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -30,108 +30,134 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                     nifti_image *warpedImg, float *warpedImgCuda,
                                     nifti_image *warpedGrad, float4 *warpedGradCuda,
                                     nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda,
-                                    nifti_image *localWeightSim,
+                                    nifti_image *localWeightSim, float *localWeightSimCuda,
                                     int *floMask, int *floMaskCuda,
                                     nifti_image *warpedImgBw, float *warpedImgBwCuda,
                                     nifti_image *warpedGradBw, float4 *warpedGradBwCuda,
                                     nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) {
     reg_ssd::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad,
                                localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw);
-    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda,
-                                       warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda,
-                                       warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
+    reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum,
+                                       warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda,
+                                       localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
+                                       warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check that the input images have only one time point
     if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1)
         NR_FATAL_ERROR("Multiple timepoints are not yet supported");
+    // Check if the reference and floating images need to be updated
+    for (int i = 0; i < this->referenceImage->nt; ++i)
+        if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
+            Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
+            Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
+            break;
+        }
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 double reg_getSsdValue_gpu(const nifti_image *referenceImage,
                            const cudaArray *referenceImageCuda,
                            const float *warpedCuda,
+                           const float *localWeightSimCuda,
                            const int *maskCuda,
                            const size_t& activeVoxelNumber) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                      cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
     auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
                                                    cudaChannelFormatKindFloat, 1);
     auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
                                                  cudaChannelFormatKindSigned, 1);
+    Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr);
+    if (localWeightSimCuda)
+        localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear,
+                                                                    voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1));
 
     // Create an array on the device to store the absolute difference values
-    thrust::device_vector<float> absoluteValuesCuda(activeVoxelNumber);
+    thrust::device_vector<float> ssdSum(1), ssdCount(1);
 
     // Compute the absolute values
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_getSquaredDifference;
+    const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue;
     const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    if (referenceImageDim.z > 1)
-        reg_getSquaredDifference3d_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
-                                                                   *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
-    else reg_getSquaredDifference2d_kernel<<<gridDims, blockDims>>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture,
-                                                                    *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
+    Cuda::GetSsdValueKernel<<<gridDims, blockDims>>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture,
+                                                     *warpedTexture, localWeightSimCuda ? *localWeightSimTexture : 0,
+                                                     *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
-    // Perform a reduction on the absolute values
-    const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda.data().get(), activeVoxelNumber) / (double)activeVoxelNumber;
+    // Calculate the SSD
+    const float ssd = ssdSum[0] / ssdCount[0];
 
-    return ssd;
+    return -ssd;
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValueFw() {
-    return -reg_getSsdValue_gpu(this->referenceImage,
-                                this->referenceImageCuda,
-                                this->warpedImageCuda,
-                                this->referenceMaskCuda,
-                                this->activeVoxelNumber);
+    return reg_getSsdValue_gpu(this->referenceImage,
+                               this->referenceImageCuda,
+                               this->warpedImageCuda,
+                               this->localWeightSimCuda,
+                               this->referenceMaskCuda,
+                               this->activeVoxelNumber);
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
-    return -reg_getSsdValue_gpu(this->floatingImage,
-                                this->floatingImageCuda,
-                                this->warpedImageBwCuda,
-                                this->floatingMaskCuda,
-                                this->activeVoxelNumber);
+    return reg_getSsdValue_gpu(this->floatingImage,
+                               this->floatingImageCuda,
+                               this->warpedImageBwCuda,
+                               nullptr,
+                               this->floatingMaskCuda,
+                               this->activeVoxelNumber);
 }
 /* *************************************************************** */
 void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       const cudaArray *referenceImageCuda,
                                       const float *warpedCuda,
-                                      const float4 *spaGradientCuda,
+                                      const float4 *spatialGradCuda,
+                                      const float *localWeightSimCuda,
                                       float4 *ssdGradientCuda,
-                                      const float& maxSD,
                                       const int *maskCuda,
-                                      const size_t& activeVoxelNumber) {
+                                      const size_t& activeVoxelNumber,
+                                      const float& timepointWeight) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                      cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
     auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
                                                    cudaChannelFormatKindFloat, 1);
     auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
                                                  cudaChannelFormatKindSigned, 1);
-    auto spaGradientTexture = Cuda::CreateTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+    auto spatialGradTexture = Cuda::CreateTextureObject(spatialGradCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
                                                         cudaChannelFormatKindFloat, 4);
+    Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr);
+    if (localWeightSimCuda)
+        localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear,
+                                                                    voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1));
 
-    // Set the gradient image to zero
-    NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4)));
+    // Find number of valid voxels and correct weight
+    const cudaTextureObject_t referenceTextureObject = *referenceTexture;
+    const cudaTextureObject_t warpedTextureObject = *warpedTexture;
+    const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int& index) {
+        const float warValue = tex1Dfetch<float>(warpedTextureObject, index);
+        if (warValue != warValue) return false;
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_getSSDGradient;
+        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
+        const float refValue = tex3D<float>(referenceTextureObject, x, y, z);
+        if (refValue != refValue) return false;
+
+        return true;
+    });
+    const float adjustedWeight = timepointWeight / static_cast<float>(validVoxelNumber);
+
+    const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient;
     const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    if (referenceImageDim.z > 1)
-        reg_getSsdGradient3d_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
-                                                             *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
-    else reg_getSsdGradient2d_kernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
-                                                              *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber);
+    Cuda::GetSsdGradientKernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
+                                                        *spatialGradTexture, localWeightSimCuda ? *localWeightSimTexture : 0,
+                                                        referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
@@ -140,10 +166,11 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint)
                                      this->referenceImageCuda,
                                      this->warpedImageCuda,
                                      this->warpedGradientCuda,
+                                     this->localWeightSimCuda,
                                      this->voxelBasedGradientCuda,
-                                     1.f,
                                      this->referenceMaskCuda,
-                                     this->activeVoxelNumber);
+                                     this->activeVoxelNumber,
+                                     static_cast<float>(this->timePointWeight[currentTimepoint]));
 }
 /* *************************************************************** */
 void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
@@ -151,9 +178,10 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint)
                                      this->floatingImageCuda,
                                      this->warpedImageBwCuda,
                                      this->warpedGradientBwCuda,
+                                     nullptr,
                                      this->voxelBasedGradientBwCuda,
-                                     1.f,
                                      this->floatingMaskCuda,
-                                     this->activeVoxelNumber);
+                                     this->activeVoxelNumber,
+                                     static_cast<float>(this->timePointWeight[currentTimepoint]));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 1214d8f2..9dfd2960 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -40,6 +40,7 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
                                    nifti_image *voxelBasedGrad,
                                    float4 *voxelBasedGradCuda,
                                    nifti_image *localWeightSim = nullptr,
+                                   float *localWeightSimCuda = nullptr,
                                    int *floMask = nullptr,
                                    int *floMaskCuda = nullptr,
                                    nifti_image *warpedImgBw = nullptr,
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index ea387250..794c3a23 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -17,122 +17,65 @@
 #include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-__global__ void reg_getSquaredDifference3d_kernel(float *squaredDifference,
-                                                  cudaTextureObject_t referenceTexture,
-                                                  cudaTextureObject_t warpedTexture,
-                                                  cudaTextureObject_t maskTexture,
-                                                  const int3 referenceImageDim,
-                                                  const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int index = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        float difference = tex3D<float>(referenceTexture,
-                                        ((float)x + 0.5f) / (float)referenceImageDim.x,
-                                        ((float)y + 0.5f) / (float)referenceImageDim.y,
-                                        ((float)z + 0.5f) / (float)referenceImageDim.z);
-        difference -= tex1Dfetch<float>(warpedTexture, index);
-        squaredDifference[tid] = difference == difference ? difference * difference : 0;
-    }
-}
+namespace NiftyReg::Cuda {
 /* *************************************************************** */
-__global__ void reg_getSquaredDifference2d_kernel(float *squaredDifference,
-                                                  cudaTextureObject_t referenceTexture,
-                                                  cudaTextureObject_t warpedTexture,
-                                                  cudaTextureObject_t maskTexture,
-                                                  const int3 referenceImageDim,
-                                                  const unsigned activeVoxelNumber) {
+__global__ void GetSsdValueKernel(float *ssdSum,
+                                  float *ssdCount,
+                                  cudaTextureObject_t referenceTexture,
+                                  cudaTextureObject_t warpedTexture,
+                                  cudaTextureObject_t localWeightSimTexture,
+                                  cudaTextureObject_t maskTexture,
+                                  const int3 referenceImageDim,
+                                  const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         const int index = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(index, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
 
-        float difference = tex3D<float>(referenceTexture,
-                                        ((float)x + 0.5f) / (float)referenceImageDim.x,
-                                        ((float)y + 0.5f) / (float)referenceImageDim.y,
-                                        0.5f);
-        difference -= tex1Dfetch<float>(warpedTexture, index);
-        squaredDifference[tid] = difference == difference ? difference * difference : 0;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_getSsdGradient2d_kernel(float4 *ssdGradient,
-                                            cudaTextureObject_t referenceTexture,
-                                            cudaTextureObject_t warpedTexture,
-                                            cudaTextureObject_t maskTexture,
-                                            cudaTextureObject_t spaGradientTexture,
-                                            const int3 referenceImageDim,
-                                            const float maxSD,
-                                            const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int index = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(index, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const float warValue = tex1Dfetch<float>(warpedTexture, index);
+        if (warValue != warValue) return;
 
-        const float refValue = tex3D<float>(referenceTexture,
-                                            ((float)x + 0.5f) / (float)referenceImageDim.x,
-                                            ((float)y + 0.5f) / (float)referenceImageDim.y,
-                                            0.5f);
-        if (refValue != refValue)
-            return;
-        const float warpValue = tex1Dfetch<float>(warpedTexture, index);
-        if (warpValue != warpValue)
-            return;
-
-        const float4 spaGradientValue = tex1Dfetch<float4>(spaGradientTexture, tid);
-        if (spaGradientValue.x != spaGradientValue.x || spaGradientValue.y != spaGradientValue.y)
-            return;
+        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
+        const float refValue = tex3D<float>(referenceTexture, x, y, z);
+        if (refValue != refValue) return;
 
-        const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber);
-        ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, 0.f, 0.f);
+        const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
+        const float diff = refValue - warValue;
+        atomicAdd(ssdSum, diff * diff * val);
+        atomicAdd(ssdCount, val);
     }
 }
 /* *************************************************************** */
-__global__ void reg_getSsdGradient3d_kernel(float4 *ssdGradient,
-                                            cudaTextureObject_t referenceTexture,
-                                            cudaTextureObject_t warpedTexture,
-                                            cudaTextureObject_t maskTexture,
-                                            cudaTextureObject_t spaGradientTexture,
-                                            const int3 referenceImageDim,
-                                            const float maxSD,
-                                            const unsigned activeVoxelNumber) {
+__global__ void GetSsdGradientKernel(float4 *ssdGradient,
+                                     cudaTextureObject_t referenceTexture,
+                                     cudaTextureObject_t warpedTexture,
+                                     cudaTextureObject_t maskTexture,
+                                     cudaTextureObject_t spatialGradTexture,
+                                     cudaTextureObject_t localWeightSimTexture,
+                                     const int3 referenceImageDim,
+                                     const float adjustedWeight,
+                                     const unsigned activeVoxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < activeVoxelNumber) {
         const int index = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
 
-        const float refValue = tex3D<float>(referenceTexture,
-                                            ((float)x + 0.5f) / (float)referenceImageDim.x,
-                                            ((float)y + 0.5f) / (float)referenceImageDim.y,
-                                            ((float)z + 0.5f) / (float)referenceImageDim.z);
-        if (refValue != refValue)
-            return;
-
-        const float warpValue = tex1Dfetch<float>(warpedTexture, index);
-        if (warpValue != warpValue)
-            return;
+        const float warValue = tex1Dfetch<float>(warpedTexture, index);
+        if (warValue != warValue) return;
 
-        const float4 spaGradientValue = tex1Dfetch<float4>(spaGradientTexture, tid);
+        const float4 spaGradientValue = tex1Dfetch<float4>(spatialGradTexture, tid);
         if (spaGradientValue.x != spaGradientValue.x ||
             spaGradientValue.y != spaGradientValue.y ||
             spaGradientValue.z != spaGradientValue.z)
             return;
 
-        const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber);
-        ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f);
+        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
+        const float refValue = tex3D<float>(referenceTexture, x, y, z);
+        if (refValue != refValue) return;
+
+        const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
+        const float common = -2.f * (refValue - warValue) * adjustedWeight * val;
+        ssdGradient[index] = ssdGradient[index] + make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f);
     }
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 8a472bac..16d3040e 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -13,7 +13,7 @@
 
 class MeasureTest {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, MeasureType, bool>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage, MeasureType, bool>;
     using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
@@ -27,33 +27,39 @@ class MeasureTest {
         std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
-        // Create 2D reference, floating and control point grid images
+        // Create 2D reference, floating, control point grid and local weight similarity images
         constexpr NiftiImage::dim_t size = 16;
         vector<NiftiImage::dim_t> dim{ size, size };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d));
+        NiftiImage localWeightSim2d(dim, NIFTI_TYPE_FLOAT32);
 
-        // Create 3D reference, floating and control point grid images
+        // Create 3D reference, floating, control point grid and local weight similarity images
         dim.push_back(size);
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d));
+        NiftiImage localWeightSim3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
         auto ref2dPtr = reference2d.data();
         auto flo2dPtr = floating2d.data();
+        auto localWeightSim2dPtr = localWeightSim2d.data();
         for (size_t i = 0; i < reference2d.nVoxels(); ++i) {
             ref2dPtr[i] = distr(gen);
             flo2dPtr[i] = distr(gen);
+            localWeightSim2dPtr[i] = distr(gen);
         }
 
         // Fill images with random values
         auto ref3dPtr = reference3d.data();
         auto flo3dPtr = floating3d.data();
+        auto localWeightSim3dPtr = localWeightSim3d.data();
         for (size_t i = 0; i < reference3d.nVoxels(); ++i) {
             ref3dPtr[i] = distr(gen);
             flo3dPtr[i] = distr(gen);
+            localWeightSim3dPtr[i] = distr(gen);
         }
 
         // Create the data container for the regression test
@@ -67,6 +73,7 @@ class MeasureTest {
                     reference2d,
                     floating2d,
                     controlPointGrid2d,
+                    localWeightSim2d,
                     measure,
                     sym
                 ));
@@ -75,6 +82,7 @@ class MeasureTest {
                     reference3d,
                     floating3d,
                     controlPointGrid3d,
+                    localWeightSim3d,
                     measure,
                     sym
                 ));
@@ -91,20 +99,21 @@ class MeasureTest {
 
         for (auto&& testData : testData) {
             // Get the test data
-            auto&& [testName, reference, floating, controlPointGrid, measureType, isSymmetric] = testData;
+            auto&& [testName, reference, floating, controlPointGrid, localWeightSim, measureType, isSymmetric] = testData;
 
             // Create images
             NiftiImage referenceCpu(reference), referenceCuda(reference);
             NiftiImage floatingCpu(floating), floatingCuda(floating);
             NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid);
             NiftiImage controlPointGridCpuBw(controlPointGrid), controlPointGridCudaBw(controlPointGrid);
+            NiftiImage localWeightSimCpu(localWeightSim), localWeightSimCuda(localWeightSim);
 
             // Create the contents
             unique_ptr<F3dContent> contentCpu{ new F3dContent(
                 referenceCpu,
                 floatingCpu,
                 controlPointGridCpu,
-                nullptr,
+                localWeightSimCpu,
                 nullptr,
                 nullptr,
                 sizeof(float)
@@ -113,7 +122,7 @@ class MeasureTest {
                 referenceCuda,
                 floatingCuda,
                 controlPointGridCuda,
-                nullptr,
+                localWeightSimCuda,
                 nullptr,
                 nullptr,
                 sizeof(float)

From b1036bef11fa4483517dd7e1d94c85230958d969 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Wed, 30 Aug 2023 10:42:25 +0100
Subject: [PATCH 186/314] #92 Added nmi gradient test

---
 niftyreg_build_version.txt         |   2 +-
 reg-apps/reg_tools.cpp             |   2 +
 reg-lib/Content.cpp                |   6 +-
 reg-lib/cpu/_reg_nmi.cpp           |  68 ++++--------
 reg-test/CMakeLists.txt            |   1 +
 reg-test/reg_test_nmi_gradient.cpp | 172 +++++++++++++++++++++++++++++
 6 files changed, 200 insertions(+), 51 deletions(-)
 create mode 100644 reg-test/reg_test_nmi_gradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 697cb3a2..d8fc48a4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-300
+301
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 5c1d5eeb..7fcbdc29 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -149,6 +149,8 @@ int main(int argc, char **argv)
     if (argc < 2)
     {
         PetitUsage(argv[0]);
+        free(param);
+        free(flag);
         return EXIT_FAILURE;
     }
 
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index afd8b4ed..5b3f0080 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -28,6 +28,9 @@ Content::Content(nifti_image *referenceIn,
 Content::~Content() {
     DeallocateWarped();
     DeallocateDeformationField();
+#ifndef NDEBUG
+    reg_print_msg_debug("Content destructor called");
+#endif
 }
 /* *************************************************************** */
 void Content::AllocateWarped() {
@@ -75,10 +78,11 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
     memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
     strcpy(deformationField->intent_name, "NREG_TRANS");
-    deformationField->intent_p1 = DEF_FIELD;
+    deformationField->intent_p1 = DISP_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
     deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
+    reg_getDeformationFromDisplacement(deformationField);
 }
 /* *************************************************************** */
 void Content::DeallocateDeformationField() {
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index b8ce5a55..b5a14594 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -236,52 +236,21 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             const DataType *warPtr = &warImagePtr[t * voxelNumber];
             for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                 if (referenceMask[voxel] > -1) {
-                    const DataType& refValue = refPtr[voxel];
-                    const DataType& warValue = warPtr[voxel];
-                    if (refValue == refValue && warValue == warValue &&
-                        0 <= refValue && refValue < referenceBinNumber[t] &&
-                        0 <= warValue && warValue < floatingBinNumber[t]) {
-                        ++jointHistoProPtr[static_cast<int>(refValue) + static_cast<int>(warValue) * referenceBinNumber[t]];
-                    }
-                }
-            }
-            // Convolve the histogram with a cubic B-spline kernel
-            double kernel[3];
-            kernel[0] = kernel[2] = GetBasisSplineValue(-1.0);
-            kernel[1] = GetBasisSplineValue(0.0);
-            // Histogram is first smooth along the reference axis
-            memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
-            for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                    double value = 0;
-                    int index = r - 1;
-                    double *ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f];
-
-                    for (int it = 0; it < 3; it++) {
-                        if (-1 < index && index < referenceBinNumber[t]) {
-                            value += *ptrHisto * kernel[it];
-                        }
-                        ++ptrHisto;
-                        ++index;
-                    }
-                    jointHistoLogPtr[r + referenceBinNumber[t] * f] = value;
-                }
-            }
-            // Histogram is then smooth along the warped floating axis
-            for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                    double value = 0.;
-                    int index = f - 1;
-                    double *ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index];
-
-                    for (int it = 0; it < 3; it++) {
-                        if (-1 < index && index < floatingBinNumber[t]) {
-                            value += *ptrHisto * kernel[it];
+                    const DataType refValue = refPtr[voxel];
+                    const DataType warValue = warPtr[voxel];
+                    if (refValue == refValue && warValue == warValue){
+                        for(int r = int(refValue-1); r < int(refValue+3); ++r){
+                            if( 0 <= r && r < referenceBinNumber[t]){
+                                const double refBasis = GetBasisSplineValue(refValue - r);
+                                for(int w = int(warValue-1); w < int(warValue+3); ++w){
+                                    if( 0 <= w && w < floatingBinNumber[t]){
+                                        const double warBasis = GetBasisSplineValue(warValue - w);
+                                        jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis;
+                                    }
+                                }
+                            }
                         }
-                        ptrHisto += referenceBinNumber[t];
-                        ++index;
                     }
-                    jointHistoProPtr[r + referenceBinNumber[t] * f] = value;
                 }
             }
             // Normalise the histogram
@@ -427,10 +396,10 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                                     const double& timepointWeight) {
 #ifdef WIN32
     long i;
-    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
+    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2);
 #else
     size_t i;
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
@@ -452,6 +421,7 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
     const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
     const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
+
     // Iterate over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -472,9 +442,9 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                             if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
                                 const double commun = GetBasisSplineValue(refValue - r) *
                                     GetBasisSplineDerivativeValue(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
-                                const double& refLog = logHistoPtr[r + referenceOffset];
-                                const double& warLog = logHistoPtr[w + floatingOffset];
+                                const double &jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double &refLog = logHistoPtr[r + referenceOffset];
+                                const double &warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     jointDeriv[0] += commun * gradX * jointLog;
                                     refDeriv[0] += commun * gradX * refLog;
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 7d3faeef..27364cfc 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -118,6 +118,7 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi ${EXEC_LIST})
 set(EXEC_LIST reg_test_be ${EXEC_LIST})
+set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
new file mode 100644
index 00000000..134d0e69
--- /dev/null
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -0,0 +1,172 @@
+// OpenCL and CUDA are not supported for this test yet
+#undef _USE_OPENCL
+#undef _USE_CUDA
+
+#include "reg_test_common.h"
+#include "_reg_tools.h"
+#include "_reg_ReadWriteImage.h"
+#include "_reg_nmi.h"
+
+/*
+    This test file contains the following unit tests:
+    test function: NMI gradient.
+    The anylitical formulation is compared against an approximation
+*/
+
+class NMIGradientTest {
+public:
+    NMIGradientTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a number generator
+        std::mt19937 gen(0);
+        // Images will be rescaled between 2 and bin-3
+        // Default bin value is 68 (64+4 for Parzen windowing)
+        const unsigned binNumber = 8;
+        const float padding = 2; //std::numeric_limits<float>::quiet_NaN();
+        std::uniform_real_distribution<float> distr(2, binNumber-3);
+
+        // Create reference and floating 2D images
+        vector<NiftiImage::dim_t> dim{ 4, 4 };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Create reference and floating 3D images
+        dim.push_back(4);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
+
+        // Fill images with random values
+        auto ref2dPtr = static_cast<float *>(reference2d->data);
+        auto flo2dPtr = static_cast<float *>(floating2d->data);
+        // Ensure at least one pixel contains the max and one the min
+        ref2dPtr[0] = flo2dPtr[1] = 2.f;
+        ref2dPtr[1] = flo2dPtr[0] = binNumber-3;
+        for (size_t i = 2; i < reference2d.nVoxels(); ++i)
+        {
+            ref2dPtr[i] = distr(gen);
+            flo2dPtr[i] = distr(gen);
+        }
+
+        // Fill images with random values
+        auto ref3dPtr = reference3d.data();
+        auto flo3dPtr = floating3d.data();
+        // Ensure at least one pixel contains the max and one the min
+        ref3dPtr[0] = flo3dPtr[1] = 2.f;
+        ref3dPtr[1] = flo3dPtr[0] = binNumber-3;
+        for (size_t i = 2; i < reference3d.nVoxels(); ++i) {
+            ref3dPtr[i] = distr(gen);
+            flo3dPtr[i] = distr(gen);
+        }
+
+        // Create the object to compute the expected values
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "NMI 2D",
+            reference2d,
+            floating2d
+        ));
+        testData.emplace_back(TestData(
+            "NMI 3D",
+            reference3d,
+            floating3d
+        ));
+        for (auto&& data : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                // Create the platform
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                auto td = data;
+                auto&& [testName, reference, floating] = td;
+                // Create the content creator
+                unique_ptr<DefContentCreator> contentCreator{
+                    dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
+                };
+                // Create the content
+                unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
+                // Add some displacements to the deformation field to avoid grid effect
+                float *defPtr = static_cast<float *>(content->GetDeformationField()->data);
+                for(unsigned index=0; index<content->GetDeformationField()->nvox;++index)
+                    defPtr[index] += 0.1f;
+                // Compute the warped image given the current transformation
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                compute->ResampleImage(1, padding);
+                compute->GetImageGradient(1, padding, 0);
+                // Create the measure
+                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Use NMI as a measure
+                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
+                measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0);
+                measure->Initialise(*measure_nmi, *content);
+                // Compute the NMI gradient
+                measure_nmi->GetVoxelBasedSimilarityMeasureGradient(0);
+                // Create an image to store the gradient values
+                NiftiImage gradientImage(content->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+                // Create an image to store the expected gradient values
+                NiftiImage expectedGradientImage(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Apply perturbations to each value in the deformation field
+                float *gradPtr = static_cast<float *>(expectedGradientImage->data);
+                const float delta = 0.00001;
+                for(unsigned index=0; index<expectedGradientImage.nVoxels();++index){
+                    float current_value = defPtr[index];
+                    // compute the NMI when removing delta(s)
+                    defPtr[index] = current_value - delta;
+                    compute->ResampleImage(1, padding);
+                    const double nmi_pre = measure_nmi->GetSimilarityMeasureValue();
+                    // compute the NMI when adding delta(s)
+                    defPtr[index] = current_value + delta;
+                    compute->ResampleImage(1, padding);
+                    const double nmi_post = measure_nmi->GetSimilarityMeasureValue();
+                    // Compute the difference
+                    gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta);
+                    defPtr[index] = current_value;
+                }
+                testCases.push_back({testName + " " + platform->GetName(),
+                                     std::move(gradientImage), std::move(expectedGradientImage)});
+            }
+        }
+    }
+
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+    inline static vector<TestCase> testCases;
+};
+
+TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, result, expected] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            float *resPtr = static_cast<float *>(result->data);
+            float *expPtr = static_cast<float *>(expected->data);
+            float resMean = reg_tools_getMeanValue(result);
+            float expMean = reg_tools_getMeanValue(expected);
+            float resStdd = reg_tools_getSTDValue(result);
+            float expStdd = reg_tools_getSTDValue(expected);
+            double corr = 0;
+            for(unsigned i=0; i<expected.nVoxels();++i)
+                corr += (resPtr[i]-resMean)*(expPtr[i]-expMean);
+            
+            corr /= resStdd*expStdd*result.nVoxels();
+            std::cout << "Correlation = " << corr << std::endl;
+            const double norm = std::max(fabs(reg_tools_getMinValue(expected, 0)),
+                                         fabs(reg_tools_getMaxValue(expected, 0)));
+            for(unsigned i=0; i<expected.nVoxels();++i){
+                const double ratio = fabs(resPtr[i] - expPtr[i])/norm;
+                if (ratio > .1){
+                    std::cout << "[i]=" << i;
+                    std::cout << " | ratio=" << ratio;
+                    std::cout << " | Result=" << resPtr[i];
+                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+                }
+            }
+            REQUIRE(corr > 0.99);
+        }
+    }
+}

From 6fc1323a5b3cfdd03c80395765eb77379f5512ee Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Wed, 30 Aug 2023 10:59:29 +0100
Subject: [PATCH 187/314] #92 removed readwritebinary as not needed

---
 niftyreg_build_version.txt      |  2 +-
 reg-io/CMakeLists.txt           |  2 +-
 reg-io/_reg_ReadWriteBinary.cpp | 21 ---------------------
 reg-io/_reg_ReadWriteBinary.h   |  8 --------
 reg-lib/cpu/_reg_mrf.cpp        |  1 -
 5 files changed, 2 insertions(+), 32 deletions(-)
 delete mode 100644 reg-io/_reg_ReadWriteBinary.cpp
 delete mode 100644 reg-io/_reg_ReadWriteBinary.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cd307095..ae4cf41b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-306
+307
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index 1a3bda5a..82a541ca 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -25,7 +25,7 @@ endif(USE_NRRD)
 SET(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${LIBRARIES}")
 
 # Create the reg_io library
-add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp)
+add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp)
 target_link_libraries(_reg_ReadWriteImage ${LIBRARIES})
 install(TARGETS _reg_ReadWriteImage
         RUNTIME DESTINATION bin COMPONENT Development
diff --git a/reg-io/_reg_ReadWriteBinary.cpp b/reg-io/_reg_ReadWriteBinary.cpp
deleted file mode 100644
index 8aa6aa81..00000000
--- a/reg-io/_reg_ReadWriteBinary.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#include "_reg_ReadWriteBinary.h"
-
-void readFloatBinaryArray(const char* fileName, int lengthArray, float* outputArray) {
-    FILE* infile;
-    infile=fopen(fileName,"rb");
-    float currentValue;
-    for (int i =0;i<lengthArray;i++) {
-        fread((void*)(&currentValue), sizeof(currentValue), 1, infile);
-        outputArray[i]=currentValue;
-    }
-}
-//
-void readIntBinaryArray(const char* fileName, int lengthArray, int* outputArray) {
-    FILE* infile;
-    infile=fopen(fileName,"rb");
-    int currentValue;
-    for (int i =0;i<lengthArray;i++) {
-        fread((void*)(&currentValue), sizeof(currentValue), 1, infile);
-        outputArray[i]=currentValue;
-    }
-}
diff --git a/reg-io/_reg_ReadWriteBinary.h b/reg-io/_reg_ReadWriteBinary.h
deleted file mode 100644
index 4bc0da83..00000000
--- a/reg-io/_reg_ReadWriteBinary.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include <fstream>      // std::ifstream
-#include <stdlib.h>
-
-extern "C++"
-void readFloatBinaryArray(const char* fileName, int lengthArray, float* outputArray);
-void readIntBinaryArray(const char* fileName, int lengthArray, int* outputArray);
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index 7ba015ae..83ea45ee 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -3,7 +3,6 @@
 //DEBUG
 #include <iostream>
 #include <fstream>
-#include "_reg_ReadWriteBinary.h"
 //DEBUG
 /*****************************************************/
 reg_mrf::reg_mrf(int _discrete_radius,

From 7d96dfdb5c2a5696e2341554aa1dc4d7e90317a9 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Wed, 30 Aug 2023 12:28:52 +0100
Subject: [PATCH 188/314] #92 changed the def field test to be a unit test
 against known output

---
 niftyreg_build_version.txt                |   2 +-
 reg-lib/Content.cpp                       |   3 -
 reg-lib/cpu/_reg_localTrans.cpp           |   2 +
 reg-test/reg_test_common.h                |  31 +-
 reg-test/reg_test_getDeformationField.cpp | 509 ++++------------------
 reg-test/reg_test_regr_measure.cpp        |   6 +-
 6 files changed, 131 insertions(+), 422 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ae4cf41b..33a21f83 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-307
+308
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 43566458..3ce854b4 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -25,9 +25,6 @@ Content::Content(nifti_image *referenceIn,
 Content::~Content() {
     DeallocateWarped();
     DeallocateDeformationField();
-#ifndef NDEBUG
-    reg_print_msg_debug("Content destructor called");
-#endif
 }
 /* *************************************************************** */
 void Content::AllocateWarped() {
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 88088b73..bbe1e4f7 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -122,6 +122,8 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR;
     memset(controlPointGridImage->intent_name, 0, 16);
     strcpy(controlPointGridImage->intent_name, "NREG_TRANS");
+    // Set to be the identity transformation by default
+    reg_getDeformationFromDisplacement(controlPointGridImage);
     controlPointGridImage->intent_p1 = CUB_SPLINE_GRID;
 }
 template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&, const float*);
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 8ace6470..4d5a1256 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -40,8 +40,33 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     NiftiImage controlPointGrid;
     reg_createControlPointGrid<float>(controlPointGrid, reference, gridSpacing);
 
-    // The control point position image is initialised with an identity transformation
-    reg_getDeformationFromDisplacement(controlPointGrid);
-
     return controlPointGrid;
 }
+
+NiftiImage CreateDeformationField(const NiftiImage &reference) {
+    // Create and allocate a deformation field
+    NiftiImage deformationField;
+    deformationField = nifti_copy_nim_info(reference);
+    deformationField->dim[0] = deformationField->ndim = 5;
+    if (reference->dim[0] == 2)
+        deformationField->dim[3] = deformationField->nz = 1;
+    deformationField->dim[4] = deformationField->nt = 1;
+    deformationField->pixdim[4] = deformationField->dt = 1;
+    deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2;
+    deformationField->pixdim[5] = deformationField->du = 1;
+    deformationField->dim[6] = deformationField->nv = 1;
+    deformationField->pixdim[6] = deformationField->dv = 1;
+    deformationField->dim[7] = deformationField->nw = 1;
+    deformationField->pixdim[7] = deformationField->dw = 1;
+    deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
+    deformationField->datatype = NIFTI_TYPE_FLOAT32;
+    deformationField->intent_code = NIFTI_INTENT_VECTOR;
+    memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
+    strcpy(deformationField->intent_name, "NREG_TRANS");
+    deformationField->intent_p1 = DISP_FIELD;
+    deformationField->scl_slope = 1;
+    deformationField->scl_inter = 0;
+    deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
+    reg_getDeformationFromDisplacement(deformationField);
+    return deformationField;
+}
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 9a93e705..797d0959 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -14,8 +14,8 @@
 
 class GetDeformationFieldTest {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool>;
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
 
@@ -29,39 +29,85 @@ class GetDeformationFieldTest {
         std::uniform_real_distribution<float> distr(0, 1);
 
         // Create a 2D reference image
-        vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
+        NiftiImage::dim_t size = 5;
+        vector<NiftiImage::dim_t> dimFlo{ size, size };
         NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
 
         // Create a 3D reference image
-        dimFlo.push_back(4);
+        dimFlo.push_back(size);
         NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
 
-        // Generate the different test cases
-        // Test 2D
+        // Data container for the test data
+        vector<TestData> testData;
+
+        // Identity transformation tests
+        // Create an affine transformation b-spline parametrisation
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
-        auto cpp2dPtr = controlPointGrid2d.data();
-        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i)
-            cpp2dPtr[i] = distr(gen);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        // Create the expected deformation field result with an identity
+        NiftiImage deformationField2d = CreateDeformationField(reference2d);
+        NiftiImage deformationField3d = CreateDeformationField(reference3d);
+        testData.emplace_back(TestData(
+            "2D ID",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            NiftiImage(deformationField2d)
+        ));
+        testData.emplace_back(TestData(
+            "3D ID",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            NiftiImage(deformationField3d)
+        ));
 
-        // Add the test data
-        vector<TestData> testData;
+        // Translation transformation tests - translation of 2 along each axis
+        float * cpp2dPtr = static_cast<float *>(controlPointGrid2d->data);
+        float * cpp3dPtr = static_cast<float *>(controlPointGrid3d->data);
+        float * def2dPtr = static_cast<float *>(deformationField2d->data);
+        float * def3dPtr = static_cast<float *>(deformationField3d->data);
+        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
+            cpp2dPtr[i] += 2.f;
+        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
+            cpp3dPtr[i] += 2.f;
+        for(size_t i=0; i<deformationField2d.nVoxels(); i++)
+            def2dPtr[i] += 2.f;
+        for(size_t i=0; i<deformationField3d.nVoxels(); i++)
+            def3dPtr[i] += 2.f;
+        
+        testData.emplace_back(TestData(
+            "2D Trans",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            NiftiImage(deformationField2d)
+        ));
         testData.emplace_back(TestData(
-            "2D",
-            std::move(reference2d),
-            std::move(controlPointGrid2d)
+            "3D Trans",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            NiftiImage(deformationField3d)
         ));
 
-        // Test 3D
-        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
-        auto cpp3dPtr = controlPointGrid3d.data();
-        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i)
-            cpp3dPtr[i] = distr(gen);
+        // Scaling transformation tests
+        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
+            cpp2dPtr[i] = (cpp2dPtr[i]-2.f) * 1.1f;
+        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
+            cpp3dPtr[i] = (cpp3dPtr[i]-2.f) * 1.1f;
+        for(size_t i=0; i<deformationField2d.nVoxels(); i++)
+            def2dPtr[i] = (def2dPtr[i]-2.f) * 1.1f;
+        for(size_t i=0; i<deformationField3d.nVoxels(); i++)
+            def3dPtr[i] = (def3dPtr[i]-2.f) * 1.1f;
 
-        // Add the test data
         testData.emplace_back(TestData(
-            "3D",
-            std::move(reference3d),
-            std::move(controlPointGrid3d)
+            "2D scaling",
+            reference2d,
+            NiftiImage(controlPointGrid2d),
+            NiftiImage(deformationField2d)
+        ));
+        testData.emplace_back(TestData(
+            "3D scaling",
+            reference3d,
+            NiftiImage(controlPointGrid3d),
+            NiftiImage(deformationField3d)
         ));
 
         // Add platforms, composition, and bspline to the test data
@@ -69,405 +115,42 @@ class GetDeformationFieldTest {
             for (auto&& platformType : PlatformTypes) {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-                for (int composition = 0; composition < 2; composition++) {
-                    if (platformType == PlatformType::Cuda && composition)
-                        continue;   // CUDA platform does not support composition
-                    for (int bspline = 0; bspline < 2; bspline++) {
-                        // Make a copy of the test data
-                        auto td = data;
-                        auto&& [testName, reference, controlPointGrid] = td;
-                        // Add content
-                        unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                        testCases.push_back({ platform, std::move(content), std::move(td), composition, bspline });
-                    }
-                }
-            }
-        }
-    }
-
-    template<class DataType>
-    void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) {
-        const DataType ff = basis * basis;
-        const DataType fff = ff * basis;
-        const DataType mf = static_cast<DataType>(1.0 - basis);
-        values[0] = static_cast<DataType>(mf * mf * mf / 6.0);
-        values[1] = static_cast<DataType>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
-        values[2] = static_cast<DataType>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
-        values[3] = static_cast<DataType>(fff / 6.0);
-    }
-
-    template<class DataType>
-    void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) {
-        const DataType ff = basis * basis;
-        values[0] = static_cast<DataType>((basis * ((2.0 - basis) * basis - 1.0)) / 2.0);
-        values[1] = static_cast<DataType>((ff * (3.0 * basis - 5.0) + 2.0) / 2.0);
-        values[2] = static_cast<DataType>((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0);
-        values[3] = static_cast<DataType>((basis - 1.0) * ff / 2.0);
-    }
-
-    void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) {
-        const auto cppPtr = controlPointGrid.data();
-        const auto cppPtrX = cppPtr.begin();
-        const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice();
-        size_t coord = 0;
-        for (int y = yPre; y < yPre + 4; y++) {
-            const bool in = -1 < y && y < controlPointGrid->ny;
-            const size_t index = y * controlPointGrid->nx;
-            for (int x = xPre; x < xPre + 4; x++) {
-                if (in && -1 < x && x < controlPointGrid->nx) {
-                    xControlPointCoordinates[coord] = cppPtrX[index + x];
-                    yControlPointCoordinates[coord] = cppPtrY[index + x];
-                } else {
-                    xControlPointCoordinates[coord] = 0;
-                    yControlPointCoordinates[coord] = 0;
-                }
-                coord++;
-            }
-        }
-    }
-
-    void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) {
-        const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume();
-        const auto cppPtr = controlPointGrid.data();
-        const auto cppPtrX = cppPtr.begin();
-        const auto cppPtrY = cppPtrX + cppVoxelNumber;
-        const auto cppPtrZ = cppPtrY + cppVoxelNumber;
-        size_t coord = 0, yIndex, zIndex;
-        for (int z = zPre; z < zPre + 4; z++) {
-            bool in = true;
-            if (-1 < z && z < controlPointGrid->nz)
-                zIndex = z * controlPointGrid->nx * controlPointGrid->ny;
-            else in = false;
-            for (int y = yPre; y < yPre + 4; y++) {
-                if (in && -1 < y && y < controlPointGrid->ny)
-                    yIndex = y * controlPointGrid->nx;
-                else in = false;
-                for (int x = xPre; x < xPre + 4; x++) {
-                    if (in && -1 < x && x < controlPointGrid->nx) {
-                        xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x];
-                        yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x];
-                        zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x];
-                    } else {
-                        xControlPointCoordinates[coord] = 0;
-                        yControlPointCoordinates[coord] = 0;
-                        zControlPointCoordinates[coord] = 0;
-                    }
-                    coord++;
-                }
-            }
-        }
-    }
-
-    template<class DataType>
-    void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
-        if (controlPointGrid->nz > 1)
-            GetDeformationField3D<DataType>(controlPointGrid, defField, mask, composition, bspline);
-        else
-            GetDeformationField2D<DataType>(controlPointGrid, defField, mask, composition, bspline);
-    }
-
-    template<class DataType>
-    void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
-        auto defFieldPtr = defField.data();
-        auto defFieldPtrX = defFieldPtr.begin();
-        auto defFieldPtrY = defFieldPtrX + NiftiImage::calcVoxelNumber(defField, 3);
-
-        const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy };
-        DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16];
-        int oldXPre = -1, oldYPre = -1;
-
-        if (composition) {  // Composition of deformation fields
-            // Read the ijk sform or qform, as appropriate
-            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
-
-            for (int y = 0; y < defField->ny; y++) {
-                size_t index = y * defField->nx;
-                for (int x = 0; x < defField->nx; x++) {
-                    // The previous position at the current pixel position is read
-                    DataType xReal = defFieldPtrX[index];
-                    DataType yReal = defFieldPtrY[index];
-
-                    // From real to pixel position in the CPP
-                    const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3];
-                    const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3];
-
-                    // The spline coefficients are computed
-                    int xPre = reg_floor(xVoxel);
-                    DataType basis = xVoxel - (DataType)xPre--;
-                    if (basis < 0) basis = 0; // rounding error
-                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
-                    else GetSplineBasisValues<DataType>(basis, xBasis);
-
-                    int yPre = reg_floor(yVoxel);
-                    basis = yVoxel - (DataType)yPre--;
-                    if (basis < 0) basis = 0; // rounding error
-                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
-                    else GetSplineBasisValues<DataType>(basis, yBasis);
-
-                    if (xVoxel >= 0 && xVoxel <= defField->nx - 1 &&
-                        yVoxel >= 0 && yVoxel <= defField->ny - 1) {
-                        // The control point positions are extracted
-                        if (oldXPre != xPre || oldYPre != yPre) {
-                            GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
-                            oldXPre = xPre;
-                            oldYPre = yPre;
-                        }
-
-                        xReal = 0; yReal = 0;
-                        if (mask[index] > -1) {
-                            for (int b = 0; b < 4; b++) {
-                                for (int a = 0; a < 4; a++) {
-                                    const DataType xyBasis = xBasis[a] * yBasis[b];
-                                    xReal += xControlPointCoordinates[b * 4 + a] * xyBasis;
-                                    yReal += yControlPointCoordinates[b * 4 + a] * xyBasis;
-                                }
-                            }
-                        }
-
-                        defFieldPtrX[index] = xReal;
-                        defFieldPtrY[index] = yReal;
-                    }
-                    index++;
-                }
-            }
-        } else {    // If the deformation field is blank - !composition
-            for (int y = 0; y < defField->ny; y++) {
-                size_t index = y * defField->nx;
-
-                int yPre = (int)((DataType)y / gridVoxelSpacing[1]);
-                DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
-                if (basis < 0) basis = 0; // rounding error
-                if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
-                else GetSplineBasisValues<DataType>(basis, yBasis);
-
-                for (int x = 0; x < defField->nx; x++) {
-                    int xPre = (int)((DataType)x / gridVoxelSpacing[0]);
-                    basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
-                    if (basis < 0) basis = 0; // rounding error
-                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
-                    else GetSplineBasisValues<DataType>(basis, xBasis);
-
-                    size_t coord = 0;
-                    for (int a = 0; a < 4; a++) {
-                        xyBasis[coord++] = xBasis[0] * yBasis[a];
-                        xyBasis[coord++] = xBasis[1] * yBasis[a];
-                        xyBasis[coord++] = xBasis[2] * yBasis[a];
-                        xyBasis[coord++] = xBasis[3] * yBasis[a];
-                    }
-
-                    if (oldXPre != xPre || oldYPre != yPre) {
-                        GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
-                        oldXPre = xPre;
-                        oldYPre = yPre;
-                    }
-
-                    DataType xReal = 0, yReal = 0;
-                    if (mask[index] > -1) {
-                        for (int a = 0; a < 16; a++) {
-                            xReal += xControlPointCoordinates[a] * xyBasis[a];
-                            yReal += yControlPointCoordinates[a] * xyBasis[a];
-                        }
-                    }
-                    defFieldPtrX[index] = xReal;
-                    defFieldPtrY[index] = yReal;
-                    index++;
-                }
+                auto&& [testName, reference, controlPointGrid, expectedField] = data;
+                // Add content
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                // Add compute
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                // Compute the deformation field
+                compute->GetDeformationField(false, true); // no composition - use bspline
+                // Retrieve the deformation field
+                NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Check the results
+                testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField});
             }
         }
     }
-
-    template<class DataType>
-    void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) {
-        DataType xBasis[4], yBasis[4], zBasis[4];
-        DataType xControlPointCoordinates[64];
-        DataType yControlPointCoordinates[64];
-        DataType zControlPointCoordinates[64];
-
-        const size_t defFieldVoxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
-        auto defFieldPtr = defField.data();
-        auto defFieldPtrX = defFieldPtr.begin();
-        auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber;
-        auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber;
-
-        if (composition) {  // Composition of deformation fields
-            // Read the ijk sform or qform, as appropriate
-            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
-            for (int z = 0; z < defField->nz; z++) {
-                size_t index = z * defField->nx * defField->ny;
-                int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99;
-                for (int y = 0; y < defField->ny; y++) {
-                    for (int x = 0; x < defField->nx; x++) {
-                        if (mask[index] > -1) {
-                            // The previous position at the current pixel position is read
-                            DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] };
-
-                            // From real to pixel position in the control point space
-                            DataType voxel[3];
-                            voxel[0] =
-                                realToVoxel->m[0][0] * real[0] +
-                                realToVoxel->m[0][1] * real[1] +
-                                realToVoxel->m[0][2] * real[2] +
-                                realToVoxel->m[0][3];
-                            voxel[1] =
-                                realToVoxel->m[1][0] * real[0] +
-                                realToVoxel->m[1][1] * real[1] +
-                                realToVoxel->m[1][2] * real[2] +
-                                realToVoxel->m[1][3];
-                            voxel[2] =
-                                realToVoxel->m[2][0] * real[0] +
-                                realToVoxel->m[2][1] * real[1] +
-                                realToVoxel->m[2][2] * real[2] +
-                                realToVoxel->m[2][3];
-
-                            // The spline coefficients are computed
-                            int xPre = reg_floor(voxel[0]);
-                            DataType basis = voxel[0] - (DataType)xPre--;
-                            if (basis < 0) basis = 0; // rounding error
-                            if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
-                            else GetSplineBasisValues<DataType>(basis, xBasis);
-
-                            int yPre = reg_floor(voxel[1]);
-                            basis = voxel[1] - (DataType)yPre--;
-                            if (basis < 0) basis = 0; // rounding error
-                            if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
-                            else GetSplineBasisValues<DataType>(basis, yBasis);
-
-                            int zPre = reg_floor(voxel[2]);
-                            basis = voxel[2] - (DataType)zPre--;
-                            if (basis < 0) basis = 0; // rounding error
-                            if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
-                            else GetSplineBasisValues<DataType>(basis, zBasis);
-
-                            // The control point positions are extracted
-                            if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) {
-                                GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
-                                oldPreX = xPre;
-                                oldPreY = yPre;
-                                oldPreZ = zPre;
-                            }
-
-                            real[0] = real[1] = real[2] = 0;
-                            int coord = 0;
-                            for (int c = 0; c < 4; c++) {
-                                for (int b = 0; b < 4; b++) {
-                                    for (int a = 0; a < 4; a++) {
-                                        DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
-                                        real[0] += xControlPointCoordinates[coord] * tempValue;
-                                        real[1] += yControlPointCoordinates[coord] * tempValue;
-                                        real[2] += zControlPointCoordinates[coord] * tempValue;
-                                        coord++;
-                                    }
-                                }
-                            }
-                            defFieldPtrX[index] = real[0];
-                            defFieldPtrY[index] = real[1];
-                            defFieldPtrZ[index] = real[2];
-                        }
-                        index++;
-                    }
-                }
-            }
-        } else {    // If the deformation field is blank - !composition
-            const DataType gridVoxelSpacing[3] = {
-                controlPointGrid->dx / defField->dx,
-                controlPointGrid->dy / defField->dy,
-                controlPointGrid->dz / defField->dz
-            };
-
-            for (int z = 0; z < defField->nz; z++) {
-                size_t index = z * defField->nx * defField->ny;
-                DataType oldBasis = DataType(1.1);
-
-                int zPre = int(DataType(z) / gridVoxelSpacing[2]);
-                DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre;
-                if (basis < 0) basis = 0; // rounding error
-                if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
-                else GetSplineBasisValues<DataType>(basis, zBasis);
-
-                for (int y = 0; y < defField->ny; y++) {
-                    int yPre = int(DataType(y) / gridVoxelSpacing[1]);
-                    basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
-                    if (basis < 0) basis = 0; // rounding error
-                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
-                    else GetSplineBasisValues<DataType>(basis, yBasis);
-                    int coord = 0;
-                    DataType yzBasis[16];
-                    for (int a = 0; a < 4; a++) {
-                        yzBasis[coord++] = yBasis[0] * zBasis[a];
-                        yzBasis[coord++] = yBasis[1] * zBasis[a];
-                        yzBasis[coord++] = yBasis[2] * zBasis[a];
-                        yzBasis[coord++] = yBasis[3] * zBasis[a];
-                    }
-
-                    for (int x = 0; x < defField->nx; x++) {
-                        int xPre = int(DataType(x) / gridVoxelSpacing[0]);
-                        basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
-                        if (basis < 0) basis = 0; // rounding error
-                        if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
-                        else GetSplineBasisValues<DataType>(basis, xBasis);
-                        coord = 0;
-                        DataType xyzBasis[64];
-                        for (int a = 0; a < 16; a++) {
-                            xyzBasis[coord++] = xBasis[0] * yzBasis[a];
-                            xyzBasis[coord++] = xBasis[1] * yzBasis[a];
-                            xyzBasis[coord++] = xBasis[2] * yzBasis[a];
-                            xyzBasis[coord++] = xBasis[3] * yzBasis[a];
-                        }
-                        if (basis <= oldBasis || x == 0)
-                            GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
-                        oldBasis = basis;
-
-                        DataType real[3]{};
-                        if (mask[index] > -1) {
-                            for (int a = 0; a < 64; a++) {
-                                real[0] += xControlPointCoordinates[a] * xyzBasis[a];
-                                real[1] += yControlPointCoordinates[a] * xyzBasis[a];
-                                real[2] += zControlPointCoordinates[a] * xyzBasis[a];
-                            }
-                        }// mask
-                        defFieldPtrX[index] = real[0];
-                        defFieldPtrY[index] = real[1];
-                        defFieldPtrZ[index] = real[2];
-                        index++;
-                    } // x
-                } // y
-            } // z
-        } // composition
-    }
 };
 
-TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformationField]") {
+TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [platform, content, testData, composition, bspline] = testCase;
-        auto&& [testName, reference, controlPointGrid] = testData;
-        const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline);
-
-        SECTION(sectionName) {
-            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
-
-            // Compute the deformation field
-            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-            compute->GetDeformationField(composition, bspline);
-            NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::ImageInfoAndAllocData);
-            GetDeformationField<float>(controlPointGrid, defFieldExp, content->GetReferenceMask(), composition, bspline);
-
-            // Check the results
-            NiftiImage defField = content->GetDeformationField();
-            const auto defFieldPtr = defField.data();
-            const auto defFieldExpPtr = defFieldExp.data();
-            defField.disown();
-            // Increase the precision for the output
-            NR_COUT << std::fixed << std::setprecision(10);
-            for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) {
-                const double defFieldVal = defFieldPtr[i];
-                const double defFieldExpVal = defFieldExpPtr[i];
-                NR_COUT << i << " " << defFieldVal << " " << defFieldExpVal << std::endl;
-                REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS);
+        auto&& [testName, result, expected] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            float *resPtr = static_cast<float *>(result->data);
+            float *expPtr = static_cast<float *>(expected->data);
+            for(unsigned i=0; i<expected.nVoxels();++i){
+                const double diff = fabs(resPtr[i] - expPtr[i]);
+                if (diff > EPS){
+                    std::cout << "[i]=" << i;
+                    std::cout << " | diff=" << diff;
+                    std::cout << " | Result=" << resPtr[i];
+                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+                }
+                REQUIRE(diff < EPS);
             }
-            // Ensure the termination of content before CudaContext
-            content.reset();
         }
     }
 }
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 16d3040e..895cec69 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -255,8 +255,10 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") {
             for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) {
                 const float cpuVal = voxelBasedGradCpuPtr[i];
                 const float cudaVal = voxelBasedGradCudaPtr[i];
-                NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
-                REQUIRE(fabs(cpuVal - cudaVal) < EPS);
+                const double diff = fabs(cpuVal - cudaVal);
+                if(diff>EPS)
+                    NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
+                REQUIRE(diff < EPS);
             }
         }
     }

From 758024969bcb84bc6a39fecd09db3b3387bdbe31 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Wed, 30 Aug 2023 15:21:06 +0100
Subject: [PATCH 189/314] #92 Added spine composition to the unit test

---
 niftyreg_build_version.txt                |   2 +-
 reg-test/reg_test_getDeformationField.cpp | 102 +++++++++++++++++++---
 2 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 33a21f83..7536e3d3 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-308
+309
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 797d0959..9cbcaf47 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -15,6 +15,7 @@
 class GetDeformationFieldTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestDataComp = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
     using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
@@ -50,14 +51,14 @@ class GetDeformationFieldTest {
         testData.emplace_back(TestData(
             "2D ID",
             reference2d,
-            NiftiImage(controlPointGrid2d),
-            NiftiImage(deformationField2d)
+            controlPointGrid2d,
+            deformationField2d
         ));
         testData.emplace_back(TestData(
             "3D ID",
             reference3d,
-            NiftiImage(controlPointGrid3d),
-            NiftiImage(deformationField3d)
+            controlPointGrid3d,
+            deformationField3d
         ));
 
         // Translation transformation tests - translation of 2 along each axis
@@ -77,14 +78,14 @@ class GetDeformationFieldTest {
         testData.emplace_back(TestData(
             "2D Trans",
             reference2d,
-            NiftiImage(controlPointGrid2d),
-            NiftiImage(deformationField2d)
+            controlPointGrid2d,
+            deformationField2d
         ));
         testData.emplace_back(TestData(
             "3D Trans",
             reference3d,
-            NiftiImage(controlPointGrid3d),
-            NiftiImage(deformationField3d)
+            controlPointGrid3d,
+            deformationField3d
         ));
 
         // Scaling transformation tests
@@ -100,17 +101,17 @@ class GetDeformationFieldTest {
         testData.emplace_back(TestData(
             "2D scaling",
             reference2d,
-            NiftiImage(controlPointGrid2d),
-            NiftiImage(deformationField2d)
+            (controlPointGrid2d),
+            (deformationField2d)
         ));
         testData.emplace_back(TestData(
             "3D scaling",
             reference3d,
-            NiftiImage(controlPointGrid3d),
-            NiftiImage(deformationField3d)
+            controlPointGrid3d,
+            deformationField3d
         ));
 
-        // Add platforms, composition, and bspline to the test data
+        // Run the actual computation with the provided input data
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
@@ -128,6 +129,81 @@ class GetDeformationFieldTest {
                 testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField});
             }
         }
+
+        // Data container for the test data related to composition
+        vector<TestDataComp> testDataComp;
+
+        // Ensures composition of identity transformation yield identity
+        NiftiImage deformationFieldInput2d = CreateDeformationField(reference2d);
+        NiftiImage deformationFieldInput3d = CreateDeformationField(reference3d);
+        reg_tools_multiplyValueToImage(deformationField2d, deformationField2d, 0.f);
+        reg_tools_multiplyValueToImage(deformationField3d, deformationField3d, 0.f);
+        reg_tools_multiplyValueToImage(controlPointGrid2d, controlPointGrid2d, 0.f);
+        reg_tools_multiplyValueToImage(controlPointGrid3d, controlPointGrid3d, 0.f);
+        reg_getDeformationFromDisplacement(deformationField2d);
+        reg_getDeformationFromDisplacement(deformationField3d);
+        reg_getDeformationFromDisplacement(controlPointGrid2d);
+        reg_getDeformationFromDisplacement(controlPointGrid3d);
+        testDataComp.emplace_back(TestDataComp(
+            "2D composition ID",
+            reference3d,
+            controlPointGrid2d,
+            deformationFieldInput2d,
+            deformationField2d
+        ));
+        testDataComp.emplace_back(TestDataComp(
+            "3D composition ID",
+            reference3d,
+            controlPointGrid3d,
+            deformationFieldInput3d,
+            deformationField3d
+        ));
+
+        // Ensures composition from zooming and and out goes back identity ID
+        float * def2dInPtr = static_cast<float *>(deformationFieldInput2d->data);
+        float * def3dInPtr = static_cast<float *>(deformationFieldInput3d->data);
+        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
+            cpp2dPtr[i] *= 1.1f;
+        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
+            cpp3dPtr[i] *= 1.1f;
+        for(size_t i=0; i<deformationFieldInput2d.nVoxels(); i++)
+            def2dInPtr[i] /= 1.1f;
+        for(size_t i=0; i<deformationFieldInput3d.nVoxels(); i++)
+            def3dInPtr[i] /= 1.1f;
+        testDataComp.emplace_back(TestDataComp(
+            "2D composition scaling",
+            reference3d,
+            controlPointGrid2d,
+            deformationFieldInput2d,
+            deformationField2d
+        ));
+        testDataComp.emplace_back(TestDataComp(
+            "3D composition scaling",
+            reference3d,
+            controlPointGrid3d,
+            deformationFieldInput3d,
+            deformationField3d
+        ));
+
+        for (auto&& data : testDataComp) {
+            for (auto&& platformType : {PlatformType::Cpu}) {
+                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                auto&& [testName, reference, controlPointGrid, inputField, expectedField] = data;
+                // Add content
+                unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                content->SetDeformationField(NiftiImage(inputField).disown());
+                // Add compute
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                // Compute the deformation field
+                compute->GetDeformationField(true, true); // with composition - use bspline
+                // Retrieve the deformation field
+                NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Check the results
+                testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField});
+            }
+        }
+
     }
 };
 

From eba4a9fa142d4be17e65abc82bdaa7ba936d35cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 30 Aug 2023 18:38:09 +0100
Subject: [PATCH 190/314] Add NiftiImage::setPixDim()

---
 niftyreg_build_version.txt |  2 +-
 reg-io/RNifti/NiftiImage.h | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7536e3d3..54ea97e9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-309
+310
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 0c568c05..308b814d 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1656,7 +1656,7 @@ class NiftiImage
      * Return the dimensions of the pixels or voxels in the image
      * @return A vector of floating-point values giving the pixel width in each dimension
     **/
-    std::vector<pixdim_t> pixdim () const
+    std::vector<pixdim_t> pixDim () const
     {
         if (image == nullptr)
             return std::vector<pixdim_t>();
@@ -1664,6 +1664,40 @@ class NiftiImage
             return std::vector<pixdim_t>(image->pixdim+1, image->pixdim+image->ndim+1);
     }
 
+    /**
+     * Set a pixel dimension of the image
+     * @param dim The dimension to set
+     * @param value The new value of the dimension
+    */
+    void setPixDim (const Dim dim, const pixdim_t value)
+    {
+        if (image == nullptr)
+            return;
+        switch (dim) {
+        case Dim::X:
+            image->pixdim[1] = image->dx = value;
+            break;
+        case Dim::Y:
+            image->pixdim[2] = image->dy = value;
+            break;
+        case Dim::Z:
+            image->pixdim[3] = image->dz = value;
+            break;
+        case Dim::T:
+            image->pixdim[4] = image->dt = value;
+            break;
+        case Dim::U:
+            image->pixdim[5] = image->du = value;
+            break;
+        case Dim::V:
+            image->pixdim[6] = image->dv = value;
+            break;
+        case Dim::W:
+            image->pixdim[7] = image->dw = value;
+            break;
+        }
+    }
+
     /**
      * Drop unitary dimensions
      * @return Self, after possibly reducing the dimensionality of the image

From 2ec3de1741ccbf490f0f124f56785dcc7638dc39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 30 Aug 2023 18:38:55 +0100
Subject: [PATCH 191/314] Fix test errors

---
 niftyreg_build_version.txt                |  2 +-
 reg-test/reg_test_be.cpp                  |  2 +-
 reg-test/reg_test_common.h                | 30 ++++----
 reg-test/reg_test_getDeformationField.cpp | 86 ++++++++++++-----------
 reg-test/reg_test_nmi.cpp                 |  3 +-
 reg-test/reg_test_nmi_gradient.cpp        | 62 ++++++++--------
 6 files changed, 91 insertions(+), 94 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 54ea97e9..b661fff6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-310
+311
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index 9025d893..44c85e71 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -105,7 +105,7 @@ class BendingEnergyTest {
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
                 // Make a copy of the test data
-                auto&& [testName, reference, controlPointGrid, expected] = data;
+                auto [testName, reference, controlPointGrid, expected] = data;
                 // Add content
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 4d5a1256..3437eb3e 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -34,7 +34,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
 
 NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     // Set the spacing for the control point grid to 2 voxel along each axis
-    float gridSpacing[3] = { reference->dx*2, reference->dy*2, reference->dz*2};
+    float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 };
 
     // Create and allocate the control point image
     NiftiImage controlPointGrid;
@@ -43,22 +43,20 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     return controlPointGrid;
 }
 
-NiftiImage CreateDeformationField(const NiftiImage &reference) {
+NiftiImage CreateDeformationField(const NiftiImage& reference) {
     // Create and allocate a deformation field
-    NiftiImage deformationField;
-    deformationField = nifti_copy_nim_info(reference);
-    deformationField->dim[0] = deformationField->ndim = 5;
+    NiftiImage deformationField(reference, NiftiImage::Copy::ImageInfo);
+    deformationField.setDim(NiftiDim::NDim, 5);
     if (reference->dim[0] == 2)
-        deformationField->dim[3] = deformationField->nz = 1;
-    deformationField->dim[4] = deformationField->nt = 1;
-    deformationField->pixdim[4] = deformationField->dt = 1;
-    deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2;
-    deformationField->pixdim[5] = deformationField->du = 1;
-    deformationField->dim[6] = deformationField->nv = 1;
-    deformationField->pixdim[6] = deformationField->dv = 1;
-    deformationField->dim[7] = deformationField->nw = 1;
-    deformationField->pixdim[7] = deformationField->dw = 1;
-    deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
+        deformationField.setDim(NiftiDim::Z, 1);
+    deformationField.setDim(NiftiDim::T, 1);
+    deformationField.setPixDim(NiftiDim::T, 1);
+    deformationField.setDim(NiftiDim::U, reference->nz > 1 ? 3 : 2);
+    deformationField.setPixDim(NiftiDim::U, 1);
+    deformationField.setDim(NiftiDim::V, 1);
+    deformationField.setPixDim(NiftiDim::V, 1);
+    deformationField.setDim(NiftiDim::W, 1);
+    deformationField.setPixDim(NiftiDim::W, 1);
     deformationField->datatype = NIFTI_TYPE_FLOAT32;
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
     memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
@@ -66,7 +64,7 @@ NiftiImage CreateDeformationField(const NiftiImage &reference) {
     deformationField->intent_p1 = DISP_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
-    deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
+    deformationField.realloc();
     reg_getDeformationFromDisplacement(deformationField);
     return deformationField;
 }
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 9cbcaf47..17bb21e5 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -62,19 +62,19 @@ class GetDeformationFieldTest {
         ));
 
         // Translation transformation tests - translation of 2 along each axis
-        float * cpp2dPtr = static_cast<float *>(controlPointGrid2d->data);
-        float * cpp3dPtr = static_cast<float *>(controlPointGrid3d->data);
-        float * def2dPtr = static_cast<float *>(deformationField2d->data);
-        float * def3dPtr = static_cast<float *>(deformationField3d->data);
-        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
+        float *cpp2dPtr = static_cast<float*>(controlPointGrid2d->data);
+        float *cpp3dPtr = static_cast<float*>(controlPointGrid3d->data);
+        float *def2dPtr = static_cast<float*>(deformationField2d->data);
+        float *def3dPtr = static_cast<float*>(deformationField3d->data);
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++)
             cpp2dPtr[i] += 2.f;
-        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
             cpp3dPtr[i] += 2.f;
-        for(size_t i=0; i<deformationField2d.nVoxels(); i++)
+        for (size_t i = 0; i < deformationField2d.nVoxels(); i++)
             def2dPtr[i] += 2.f;
-        for(size_t i=0; i<deformationField3d.nVoxels(); i++)
+        for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
             def3dPtr[i] += 2.f;
-        
+
         testData.emplace_back(TestData(
             "2D Trans",
             reference2d,
@@ -89,14 +89,14 @@ class GetDeformationFieldTest {
         ));
 
         // Scaling transformation tests
-        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
-            cpp2dPtr[i] = (cpp2dPtr[i]-2.f) * 1.1f;
-        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
-            cpp3dPtr[i] = (cpp3dPtr[i]-2.f) * 1.1f;
-        for(size_t i=0; i<deformationField2d.nVoxels(); i++)
-            def2dPtr[i] = (def2dPtr[i]-2.f) * 1.1f;
-        for(size_t i=0; i<deformationField3d.nVoxels(); i++)
-            def3dPtr[i] = (def3dPtr[i]-2.f) * 1.1f;
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++)
+            cpp2dPtr[i] = (cpp2dPtr[i] - 2.f) * 1.1f;
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
+            cpp3dPtr[i] = (cpp3dPtr[i] - 2.f) * 1.1f;
+        for (size_t i = 0; i < deformationField2d.nVoxels(); i++)
+            def2dPtr[i] = (def2dPtr[i] - 2.f) * 1.1f;
+        for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
+            def3dPtr[i] = (def3dPtr[i] - 2.f) * 1.1f;
 
         testData.emplace_back(TestData(
             "2D scaling",
@@ -116,7 +116,8 @@ class GetDeformationFieldTest {
             for (auto&& platformType : PlatformTypes) {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-                auto&& [testName, reference, controlPointGrid, expectedField] = data;
+                // Make a copy of the test data
+                auto [testName, reference, controlPointGrid, defFieldExp] = data;
                 // Add content
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
                 // Add compute
@@ -124,9 +125,9 @@ class GetDeformationFieldTest {
                 // Compute the deformation field
                 compute->GetDeformationField(false, true); // no composition - use bspline
                 // Retrieve the deformation field
-                NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image);
-                // Check the results
-                testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField});
+                NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Save for testing
+                testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) });
             }
         }
 
@@ -160,15 +161,15 @@ class GetDeformationFieldTest {
         ));
 
         // Ensures composition from zooming and and out goes back identity ID
-        float * def2dInPtr = static_cast<float *>(deformationFieldInput2d->data);
-        float * def3dInPtr = static_cast<float *>(deformationFieldInput3d->data);
-        for(size_t i=0; i<controlPointGrid2d.nVoxels(); i++)
+        float *def2dInPtr = static_cast<float*>(deformationFieldInput2d->data);
+        float *def3dInPtr = static_cast<float*>(deformationFieldInput3d->data);
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++)
             cpp2dPtr[i] *= 1.1f;
-        for(size_t i=0; i<controlPointGrid3d.nVoxels(); i++)
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
             cpp3dPtr[i] *= 1.1f;
-        for(size_t i=0; i<deformationFieldInput2d.nVoxels(); i++)
+        for (size_t i = 0; i < deformationFieldInput2d.nVoxels(); i++)
             def2dInPtr[i] /= 1.1f;
-        for(size_t i=0; i<deformationFieldInput3d.nVoxels(); i++)
+        for (size_t i = 0; i < deformationFieldInput3d.nVoxels(); i++)
             def3dInPtr[i] /= 1.1f;
         testDataComp.emplace_back(TestDataComp(
             "2D composition scaling",
@@ -186,21 +187,22 @@ class GetDeformationFieldTest {
         ));
 
         for (auto&& data : testDataComp) {
-            for (auto&& platformType : {PlatformType::Cpu}) {
+            for (auto&& platformType : { PlatformType::Cpu }) {
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
-                auto&& [testName, reference, controlPointGrid, inputField, expectedField] = data;
+                // Make a copy of the test data
+                auto [testName, reference, controlPointGrid, defField, defFieldExp] = data;
                 // Add content
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                content->SetDeformationField(NiftiImage(inputField).disown());
+                content->SetDeformationField(defField.disown());
                 // Add compute
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Compute the deformation field
                 compute->GetDeformationField(true, true); // with composition - use bspline
                 // Retrieve the deformation field
-                NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image);
-                // Check the results
-                testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField});
+                defField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Save for testing
+                testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) });
             }
         }
 
@@ -214,16 +216,16 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid"
         auto&& [testName, result, expected] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
-            float *resPtr = static_cast<float *>(result->data);
-            float *expPtr = static_cast<float *>(expected->data);
-            for(unsigned i=0; i<expected.nVoxels();++i){
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+            float *resPtr = static_cast<float*>(result->data);
+            float *expPtr = static_cast<float*>(expected->data);
+            for (unsigned i = 0; i < expected.nVoxels(); ++i) {
                 const double diff = fabs(resPtr[i] - expPtr[i]);
-                if (diff > EPS){
-                    std::cout << "[i]=" << i;
-                    std::cout << " | diff=" << diff;
-                    std::cout << " | Result=" << resPtr[i];
-                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+                if (diff > EPS) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | Result=" << resPtr[i];
+                    NR_COUT << " | Expected=" << expPtr[i] << std::endl;
                 }
                 REQUIRE(diff < EPS);
             }
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 7d03e3ee..39841b80 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -73,8 +73,7 @@ class NmiTest {
                 // Create the platform
                 shared_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
-                auto td = data;
-                auto&& [testName, reference, floating, expected] = td;
+                auto [testName, reference, floating, expected] = data;
                 // Create the content creator
                 unique_ptr<DefContentCreator> contentCreator{
                     dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 134d0e69..860e2520 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -10,7 +10,7 @@
 /*
     This test file contains the following unit tests:
     test function: NMI gradient.
-    The anylitical formulation is compared against an approximation
+    The analytical formulation is compared against an approximation
 */
 
 class NMIGradientTest {
@@ -25,7 +25,7 @@ class NMIGradientTest {
         // Default bin value is 68 (64+4 for Parzen windowing)
         const unsigned binNumber = 8;
         const float padding = 2; //std::numeric_limits<float>::quiet_NaN();
-        std::uniform_real_distribution<float> distr(2, binNumber-3);
+        std::uniform_real_distribution<float> distr(2, binNumber - 3);
 
         // Create reference and floating 2D images
         vector<NiftiImage::dim_t> dim{ 4, 4 };
@@ -38,13 +38,12 @@ class NMIGradientTest {
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
         // Fill images with random values
-        auto ref2dPtr = static_cast<float *>(reference2d->data);
-        auto flo2dPtr = static_cast<float *>(floating2d->data);
+        auto ref2dPtr = reference2d.data();
+        auto flo2dPtr = floating2d.data();
         // Ensure at least one pixel contains the max and one the min
         ref2dPtr[0] = flo2dPtr[1] = 2.f;
-        ref2dPtr[1] = flo2dPtr[0] = binNumber-3;
-        for (size_t i = 2; i < reference2d.nVoxels(); ++i)
-        {
+        ref2dPtr[1] = flo2dPtr[0] = binNumber - 3;
+        for (size_t i = 2; i < reference2d.nVoxels(); ++i) {
             ref2dPtr[i] = distr(gen);
             flo2dPtr[i] = distr(gen);
         }
@@ -54,7 +53,7 @@ class NMIGradientTest {
         auto flo3dPtr = floating3d.data();
         // Ensure at least one pixel contains the max and one the min
         ref3dPtr[0] = flo3dPtr[1] = 2.f;
-        ref3dPtr[1] = flo3dPtr[0] = binNumber-3;
+        ref3dPtr[1] = flo3dPtr[0] = binNumber - 3;
         for (size_t i = 2; i < reference3d.nVoxels(); ++i) {
             ref3dPtr[i] = distr(gen);
             flo3dPtr[i] = distr(gen);
@@ -76,8 +75,8 @@ class NMIGradientTest {
             for (auto&& platformType : PlatformTypes) {
                 // Create the platform
                 shared_ptr<Platform> platform{ new Platform(platformType) };
-                auto td = data;
-                auto&& [testName, reference, floating] = td;
+                // Make a copy of the test data
+                auto [testName, reference, floating] = data;
                 // Create the content creator
                 unique_ptr<DefContentCreator> contentCreator{
                     dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
@@ -85,8 +84,8 @@ class NMIGradientTest {
                 // Create the content
                 unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Add some displacements to the deformation field to avoid grid effect
-                float *defPtr = static_cast<float *>(content->GetDeformationField()->data);
-                for(unsigned index=0; index<content->GetDeformationField()->nvox;++index)
+                float *defPtr = static_cast<float*>(content->GetDeformationField()->data);
+                for (size_t index = 0; index < content->GetDeformationField()->nvox; ++index)
                     defPtr[index] += 0.1f;
                 // Compute the warped image given the current transformation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
@@ -108,7 +107,7 @@ class NMIGradientTest {
                 // Apply perturbations to each value in the deformation field
                 float *gradPtr = static_cast<float *>(expectedGradientImage->data);
                 const float delta = 0.00001;
-                for(unsigned index=0; index<expectedGradientImage.nVoxels();++index){
+                for (unsigned index = 0; index < expectedGradientImage.nVoxels(); ++index) {
                     float current_value = defPtr[index];
                     // compute the NMI when removing delta(s)
                     defPtr[index] = current_value - delta;
@@ -122,8 +121,7 @@ class NMIGradientTest {
                     gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta);
                     defPtr[index] = current_value;
                 }
-                testCases.push_back({testName + " " + platform->GetName(),
-                                     std::move(gradientImage), std::move(expectedGradientImage)});
+                testCases.push_back({ testName + " " + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) });
             }
         }
     }
@@ -141,29 +139,29 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") {
         auto&& [testName, result, expected] = testCase;
 
         SECTION(testName) {
-            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
-            float *resPtr = static_cast<float *>(result->data);
-            float *expPtr = static_cast<float *>(expected->data);
+            float *resPtr = static_cast<float*>(result->data);
+            float *expPtr = static_cast<float*>(expected->data);
             float resMean = reg_tools_getMeanValue(result);
             float expMean = reg_tools_getMeanValue(expected);
-            float resStdd = reg_tools_getSTDValue(result);
-            float expStdd = reg_tools_getSTDValue(expected);
+            float resStd = reg_tools_getSTDValue(result);
+            float expStd = reg_tools_getSTDValue(expected);
             double corr = 0;
-            for(unsigned i=0; i<expected.nVoxels();++i)
-                corr += (resPtr[i]-resMean)*(expPtr[i]-expMean);
-            
-            corr /= resStdd*expStdd*result.nVoxels();
-            std::cout << "Correlation = " << corr << std::endl;
+            for (size_t i = 0; i < expected.nVoxels(); ++i)
+                corr += (resPtr[i] - resMean) * (expPtr[i] - expMean);
+
+            corr /= resStd * expStd * result.nVoxels();
+            NR_COUT << "Correlation = " << corr << std::endl;
             const double norm = std::max(fabs(reg_tools_getMinValue(expected, 0)),
                                          fabs(reg_tools_getMaxValue(expected, 0)));
-            for(unsigned i=0; i<expected.nVoxels();++i){
-                const double ratio = fabs(resPtr[i] - expPtr[i])/norm;
-                if (ratio > .1){
-                    std::cout << "[i]=" << i;
-                    std::cout << " | ratio=" << ratio;
-                    std::cout << " | Result=" << resPtr[i];
-                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+            for (size_t i = 0; i < expected.nVoxels(); ++i) {
+                const double ratio = fabs(resPtr[i] - expPtr[i]) / norm;
+                if (ratio > .1) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | ratio=" << ratio;
+                    NR_COUT << " | Result=" << resPtr[i];
+                    NR_COUT << " | Expected=" << expPtr[i] << std::endl;
                 }
             }
             REQUIRE(corr > 0.99);

From aa472f8473def9f7881602976064bcc4f035288e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 30 Aug 2023 18:58:10 +0100
Subject: [PATCH 192/314] Turn math macros into inline functions #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_measure.cpp                      |   4 +-
 reg-apps/reg_tools.cpp                        |  12 +-
 reg-apps/reg_transform.cpp                    |  12 +-
 reg-lib/_reg_base.cpp                         |   8 +-
 reg-lib/cl/ClAladinContent.cpp                |  14 +-
 reg-lib/cpu/_reg_blockMatching.cpp            |   6 +-
 reg-lib/cpu/_reg_discrete_init.cpp            |  36 ++---
 reg-lib/cpu/_reg_dti.cpp                      |   4 +-
 reg-lib/cpu/_reg_femTrans.cpp                 |  18 +--
 reg-lib/cpu/_reg_lncc.cpp                     |   4 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  72 ++++-----
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  40 ++---
 reg-lib/cpu/_reg_localTrans_regul.cpp         |  54 +++----
 reg-lib/cpu/_reg_maths.cpp                    |  17 +--
 reg-lib/cpu/_reg_maths.h                      |  73 ++++-----
 reg-lib/cpu/_reg_mrf.cpp                      |  24 +--
 reg-lib/cpu/_reg_resampling.cpp               | 142 +++++++++---------
 reg-lib/cpu/_reg_ssd.cpp                      |  36 ++---
 reg-lib/cpu/_reg_tools.cpp                    |  16 +-
 reg-lib/cuda/CudaAladinContent.cpp            |  14 +-
 reg-lib/cuda/CudaContent.cpp                  |  14 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         |   4 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   2 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  |   4 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  12 +-
 reg-lib/cuda/_reg_resampling_gpu.cu           |   8 +-
 reg-lib/cuda/_reg_resampling_kernels.cu       |   8 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |   4 +-
 reg-lib/cuda/_reg_tools_gpu.cu                |  36 ++---
 reg-lib/cuda/_reg_tools_kernels.cu            |   2 +-
 reg-test/reg_test_lncc.cpp                    |   6 +-
 .../reg_test_voxelCentricToNodeCentric.cpp    |   8 +-
 33 files changed, 350 insertions(+), 366 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b661fff6..a1e0432c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-311
+312
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index df142de5..07f6a60f 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -308,8 +308,8 @@ int main(int argc, char **argv)
       double measure=0.;
       for(size_t i=0; i<refImage->nvox; ++i){
          if(refMask[i]>-1 && refPtr[i]==refPtr[i] && warPtr[i]==warPtr[i]){
-            refSTDValue += reg_pow2((double)refPtr[i] - refMeanValue);
-            warSTDValue += reg_pow2((double)warPtr[i] - warMeanValue);
+            refSTDValue += Square((double)refPtr[i] - refMeanValue);
+            warSTDValue += Square((double)warPtr[i] - warMeanValue);
             measure += ((double)refPtr[i] - refMeanValue) *
                   ((double)warPtr[i] - warMeanValue);
          }
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 1efab016..3bcac2f6 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -499,8 +499,8 @@ int main(int argc, char **argv)
         reg_tools_changeDatatype<float>(image);
         nifti_image *normImage = nifti_dup(*image);
         reg_heapSort(static_cast<float *>(normImage->data), normImage->nvox);
-        float minValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(03*(int)normImage->nvox/100))];
-        float maxValue = static_cast<float *>(normImage->data)[static_cast<int>(reg_floor(97*(int)normImage->nvox/100))];
+        float minValue = static_cast<float *>(normImage->data)[Floor(03*(int)normImage->nvox/100)];
+        float maxValue = static_cast<float *>(normImage->data)[Floor(97*(int)normImage->nvox/100)];
         reg_tools_subtractValueFromImage(image,normImage,minValue);
         reg_tools_divideValueToImage(normImage,normImage,maxValue-minValue);
         if(flag->outputImageFlag)
@@ -957,7 +957,7 @@ int main(int argc, char **argv)
                 for(int y=0; y<image->ny; ++y){
                     for(int x=0; x<image->nx; ++x){
                         size_t outIndex = ((z*image->ny+y)*image->nx+x)*image->nt*image->nu+t;
-                        outPtr[outIndex] = reg_round(*inPtr);
+                        outPtr[outIndex] = Round(*inPtr);
                         ++inPtr;
                     }
                 }
@@ -1000,8 +1000,8 @@ int main(int argc, char **argv)
                     float value = *inPtr * 255.f;
                     size_t outIndex = ((z*image->ny+y)*image->nx+x)*3;
                     if (value > 0)
-                        outPtr[outIndex] = static_cast<unsigned char>(reg_round(value>255?255:value));
-                    else outPtr[outIndex+1] = static_cast<unsigned char>(reg_round(-value<-255?-255:-value));
+                        outPtr[outIndex] = static_cast<unsigned char>(Round(value>255?255:value));
+                    else outPtr[outIndex+1] = static_cast<unsigned char>(Round(-value<-255?-255:-value));
                     outPtr[outIndex+2] = 0;
                     ++inPtr;
                 }
@@ -1133,7 +1133,7 @@ int main(int argc, char **argv)
                                         size_t voxelIndex = (z*outputImage->ny+y)*outputImage->nx+vx;
                                         for(size_t x=vx;x<vx+4;++x){
                                             if(x<(size_t)outputImage->nx){
-                                                variance += reg_pow2(meanValue - inPtr[voxelIndex]);
+                                                variance += Square(meanValue - inPtr[voxelIndex]);
                                             }
                                             voxelIndex++;
                                         } // x
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 5c992b69..fa91583f 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -464,7 +464,7 @@ int main(int argc, char **argv)
          }
          else
          {
-            switch(static_cast<int>(reg_round(inputTransformationImage->intent_p1)))
+            switch(Round(inputTransformationImage->intent_p1))
             {
             case DEF_FIELD:
                NR_INFO("The specified transformation is a deformation field:");
@@ -693,7 +693,7 @@ int main(int argc, char **argv)
          {
             reg_affine_getDeformationField(affine1Trans,output1TransImage);
          }
-         else switch(reg_round(input1TransImage->intent_p1))
+         else switch(Round(input1TransImage->intent_p1))
          {
          case LIN_SPLINE_GRID:
          case CUB_SPLINE_GRID:
@@ -763,7 +763,7 @@ int main(int argc, char **argv)
          }
          else
          {
-            switch(reg_round(input2TransImage->intent_p1))
+            switch(Round(input2TransImage->intent_p1))
             {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
@@ -953,7 +953,7 @@ int main(int argc, char **argv)
       }
       else
       {
-         switch(static_cast<int>(reg_round(inputTransformationImage->intent_p1)))
+         switch(Round(inputTransformationImage->intent_p1))
          {
          case DEF_FIELD:
             NR_INFO("The specified transformation is a deformation field:");
@@ -1155,7 +1155,7 @@ int main(int argc, char **argv)
             NR_ERROR("Error when reading the input image: " << param->inputTransName);
             return EXIT_FAILURE;
          }
-         switch(reg_round(inputTransImage->intent_p1))
+         switch(Round(inputTransImage->intent_p1))
          {
          case LIN_SPLINE_GRID:
          case CUB_SPLINE_GRID:
@@ -1290,7 +1290,7 @@ int main(int argc, char **argv)
      outputTransImage->scl_inter = 0.f;
      outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper);
       // Invert the provided
-      switch(reg_round(inputTransImage->intent_p1))
+      switch(Round(inputTransImage->intent_p1))
       {
       case DEF_FIELD:
          reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f);
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 9293ecee..80882617 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -398,9 +398,9 @@ void reg_base<T>::Initialise() {
         reg_heapSort(refDataPtr, tmpReference->nvox);
         // Update the reference threshold values if no value has been setup by the user
         if (referenceThresholdLow[0] == std::numeric_limits<T>::lowest())
-            referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)];
+            referenceThresholdLow[0] = refDataPtr[Round((float)tmpReference->nvox * 0.02f)];
         if (referenceThresholdUp[0] == std::numeric_limits<T>::max())
-            referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)];
+            referenceThresholdUp[0] = refDataPtr[Round((float)tmpReference->nvox * 0.98f)];
 
         // Create a copy of the floating image to extract the robust range
         NiftiImage tmpFloating = inputFloating;
@@ -410,9 +410,9 @@ void reg_base<T>::Initialise() {
         reg_heapSort(floDataPtr, tmpFloating->nvox);
         // Update the floating threshold values if no value has been setup by the user
         if (floatingThresholdLow[0] == std::numeric_limits<T>::lowest())
-            floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)];
+            floatingThresholdLow[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.02f)];
         if (floatingThresholdUp[0] == std::numeric_limits<T>::max())
-            floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)];
+            floatingThresholdUp[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.98f)];
     }
 
     // FINEST LEVEL OF REGISTRATION
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index f15eee35..bff1e4c6 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -242,22 +242,22 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
     case NIFTI_TYPE_UINT8:
         if (intensity != intensity)
             intensity = 0;
-        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
     case NIFTI_TYPE_UINT16:
         if (intensity != intensity)
             intensity = 0;
-        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
     case NIFTI_TYPE_UINT32:
         if (intensity != intensity)
             intensity = 0;
-        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
     default:
         if (intensity != intensity)
             intensity = 0;
-        return static_cast<DataType>(reg_round(intensity));
+        return static_cast<DataType>(Round(intensity));
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index 8e70f957..e91ef03a 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -232,10 +232,10 @@ void initialise_block_matching_method(nifti_image * reference,
    }
 
    params->voxelCaptureRange = 3;
-   params->blockNumber[0] = (int)std::ceil((double)reference->nx / (double)BLOCK_WIDTH);
-   params->blockNumber[1] = (int)std::ceil((double)reference->ny / (double)BLOCK_WIDTH);
+   params->blockNumber[0] = Ceil((double)reference->nx / (double)BLOCK_WIDTH);
+   params->blockNumber[1] = Ceil((double)reference->ny / (double)BLOCK_WIDTH);
    if (reference->nz > 1) {
-      params->blockNumber[2] = (int)std::ceil((double)reference->nz / (double)BLOCK_WIDTH);
+      params->blockNumber[2] = Ceil((double)reference->nz / (double)BLOCK_WIDTH);
       params->dim = 3;
    }
    else {
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
index 93ed99b8..a35fa85a 100644
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ b/reg-lib/cpu/_reg_discrete_init.cpp
@@ -329,24 +329,24 @@ void reg_discrete_init::GetRegularisedMeasure()
                this->regularised_measures[measure_index] =
                      (1.f-this->regularisation_weight-this->l2_weight) * this->discretised_measures[measure_index] -
                      this->regularisation_weight * (
-                     reg_pow2(XX_x + valX * _basisXX) +
-                     reg_pow2(XX_y + valY * _basisXX) +
-                     reg_pow2(XX_z + valZ * _basisXX) +
-                     reg_pow2(YY_x + valX * _basisYY) +
-                     reg_pow2(YY_y + valY * _basisYY) +
-                     reg_pow2(YY_z + valZ * _basisYY) +
-                     reg_pow2(ZZ_x + valX * _basisZZ) +
-                     reg_pow2(ZZ_y + valY * _basisZZ) +
-                     reg_pow2(ZZ_z + valZ * _basisZZ) + 2.0 * (
-                     reg_pow2(XY_x + valX * _basisXY) +
-                     reg_pow2(XY_y + valY * _basisXY) +
-                     reg_pow2(XY_z + valZ * _basisXY) +
-                     reg_pow2(XZ_x + valX * _basisXZ) +
-                     reg_pow2(XZ_y + valY * _basisXZ) +
-                     reg_pow2(XZ_z + valZ * _basisXZ) +
-                     reg_pow2(YZ_x + valX * _basisYZ) +
-                     reg_pow2(YZ_y + valY * _basisYZ) +
-                     reg_pow2(YZ_z + valZ * _basisYZ)
+                     Square(XX_x + valX * _basisXX) +
+                     Square(XX_y + valY * _basisXX) +
+                     Square(XX_z + valZ * _basisXX) +
+                     Square(YY_x + valX * _basisYY) +
+                     Square(YY_y + valY * _basisYY) +
+                     Square(YY_z + valZ * _basisYY) +
+                     Square(ZZ_x + valX * _basisZZ) +
+                     Square(ZZ_y + valY * _basisZZ) +
+                     Square(ZZ_z + valZ * _basisZZ) + 2.0 * (
+                     Square(XY_x + valX * _basisXY) +
+                     Square(XY_y + valY * _basisXY) +
+                     Square(XY_z + valZ * _basisXY) +
+                     Square(XZ_x + valX * _basisXZ) +
+                     Square(XZ_y + valY * _basisXZ) +
+                     Square(XZ_z + valZ * _basisXZ) +
+                     Square(YZ_x + valX * _basisYZ) +
+                     Square(YZ_y + valY * _basisYZ) +
+                     Square(YZ_z + valZ * _basisYZ)
                      ) ) - this->l2_weight * this->l2_penalisation[label];
             } // label
             ++node;
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index a197b559..7e563abe 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -113,8 +113,8 @@ double reg_getDtiMeasureValue(const nifti_image *referenceImage,
                 const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel];
                 const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel];
                 const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel];
-                dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ))
-                    + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ))
+                dtiCost -= twoThirds * (Square(rXX) + Square(rYY) + Square(rZZ))
+                    + 2.0 * (Square(rXY) + Square(rXZ) + Square(rYZ))
                     - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ);
                 n++;
             } // check if values are defined
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
index ccf9b6cc..04cb40bd 100644
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ b/reg-lib/cpu/_reg_femTrans.cpp
@@ -70,17 +70,17 @@ void reg_fem_InitialiseTransformation(int *elementNodes,
          reg_mat44_mul(realToVoxel, nodeRealPosition, nodeVoxelIndices[i]);
       }
 
-      int xRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][0]), (int)reg_floor(nodeVoxelIndices[0][0])};
-      int yRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][1]), (int)reg_floor(nodeVoxelIndices[0][1])};
-      int zRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][2]), (int)reg_floor(nodeVoxelIndices[0][2])};
+      int xRange[2]= {Ceil(nodeVoxelIndices[0][0]), Floor(nodeVoxelIndices[0][0])};
+      int yRange[2]= {Ceil(nodeVoxelIndices[0][1]), Floor(nodeVoxelIndices[0][1])};
+      int zRange[2]= {Ceil(nodeVoxelIndices[0][2]), Floor(nodeVoxelIndices[0][2])};
       for(unsigned i=1; i<4; ++i)
       {
-         xRange[0]=xRange[0]<(int)reg_ceil(nodeVoxelIndices[i][0])?xRange[0]:(int)reg_ceil(nodeVoxelIndices[i][0]);
-         xRange[1]=xRange[1]>(int)reg_floor(nodeVoxelIndices[i][0])?xRange[1]:(int)reg_floor(nodeVoxelIndices[i][0]);
-         yRange[0]=yRange[0]<(int)reg_ceil(nodeVoxelIndices[i][1])?yRange[0]:(int)reg_ceil(nodeVoxelIndices[i][1]);
-         yRange[1]=yRange[1]>(int)reg_floor(nodeVoxelIndices[i][1])?yRange[1]:(int)reg_floor(nodeVoxelIndices[i][1]);
-         zRange[0]=zRange[0]<(int)reg_ceil(nodeVoxelIndices[i][2])?zRange[0]:(int)reg_ceil(nodeVoxelIndices[i][2]);
-         zRange[1]=zRange[1]>(int)reg_floor(nodeVoxelIndices[i][2])?zRange[1]:(int)reg_floor(nodeVoxelIndices[i][2]);
+         xRange[0]=xRange[0]<Ceil(nodeVoxelIndices[i][0])?xRange[0]:Ceil(nodeVoxelIndices[i][0]);
+         xRange[1]=xRange[1]>Floor(nodeVoxelIndices[i][0])?xRange[1]:Floor(nodeVoxelIndices[i][0]);
+         yRange[0]=yRange[0]<Ceil(nodeVoxelIndices[i][1])?yRange[0]:Ceil(nodeVoxelIndices[i][1]);
+         yRange[1]=yRange[1]>Floor(nodeVoxelIndices[i][1])?yRange[1]:Floor(nodeVoxelIndices[i][1]);
+         zRange[0]=zRange[0]<Ceil(nodeVoxelIndices[i][2])?zRange[0]:Ceil(nodeVoxelIndices[i][2]);
+         zRange[1]=zRange[1]>Floor(nodeVoxelIndices[i][2])?zRange[1]:Floor(nodeVoxelIndices[i][2]);
       }
 
       xRange[0]=xRange[0]<0?0:xRange[0];
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index dc61d5b7..51e4c82b 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -240,8 +240,8 @@ void UpdateLocalStatImages(const nifti_image *refImage,
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // G*(I^2) - (G*I)^2
-        sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel]));
-        warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel]));
+        sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - Square(meanImgPtr[voxel]));
+        warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - Square(warMeanPtr[voxel]));
         // Stabilise the computation
         if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0;
         if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0;
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index bbe1e4f7..87d42be9 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -21,9 +21,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
                                 const float *spacing) {
     // Define the control point grid dimensions
     vector<NiftiImage::dim_t> dims{
-        static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / spacing[0]) + 3.f),
-        static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / spacing[1]) + 3.f),
-        referenceImage->nz > 1 ? static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1,
+        Ceil(referenceImage->nx * referenceImage->dx / spacing[0] + 3.f),
+        Ceil(referenceImage->ny * referenceImage->dy / spacing[1] + 3.f),
+        referenceImage->nz > 1 ? Ceil(referenceImage->nz * referenceImage->dz / spacing[2] + 3.f) : 1,
         1,
         referenceImage->nz > 1 ? 3 : 2
     };
@@ -277,9 +277,9 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 
     // Compute the dimension of the control point grids
     const vector<NiftiImage::dim_t> dims{
-        static_cast<int>(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3),
-        static_cast<int>(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3),
-        referenceImage->nz > 1 ? static_cast<int>(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1,
+        Ceil((maxPosition[0] - minPosition[0]) / spacing[0] + 3.f),
+        Ceil((maxPosition[1] - minPosition[1]) / spacing[1] + 3.f),
+        referenceImage->nz > 1 ? Ceil((maxPosition[2] - minPosition[2]) / spacing[2] + 3.f) : 1,
         1,
         referenceImage->nz > 1 ? 3 : 2
     };
@@ -419,17 +419,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                             referenceMatrix_real_to_voxel.m[2][3];
 
                         // The spline coefficients are computed
-                        xPre = (int)reg_floor(voxel[0]);
+                        xPre = Floor(voxel[0]);
                         xBasis[1] = voxel[0] - static_cast<DataType>(xPre);
                         if (xBasis[1] < 0) xBasis[1] = 0; //rounding error
                         xBasis[0] = 1.f - xBasis[1];
 
-                        yPre = (int)reg_floor(voxel[1]);
+                        yPre = Floor(voxel[1]);
                         yBasis[1] = voxel[1] - static_cast<DataType>(yPre);
                         if (yBasis[1] < 0) yBasis[1] = 0; //rounding error
                         yBasis[0] = 1.f - yBasis[1];
 
-                        zPre = (int)reg_floor(voxel[2]);
+                        zPre = Floor(voxel[2]);
                         zBasis[1] = voxel[2] - static_cast<DataType>(zPre);
                         if (zBasis[1] < 0) zBasis[1] = 0; //rounding error
                         zBasis[0] = 1.f - zBasis[1];
@@ -610,13 +610,13 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                     + referenceMatrix_real_to_voxel->m[1][3];
 
                 // The spline coefficients are computed
-                xPre = (int)reg_floor(xVoxel);
+                xPre = Floor(xVoxel);
                 basis = xVoxel - static_cast<DataType>(xPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
                 else get_SplineBasisValues<DataType>(basis, temp);
 
-                yPre = (int)reg_floor(yVoxel);
+                yPre = Floor(yVoxel);
                 basis = yVoxel - static_cast<DataType>(yPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
@@ -943,19 +943,19 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                             referenceMatrix_real_to_voxel.m[2][3];
 
                         // The spline coefficients are computed
-                        xPre = (int)reg_floor(voxel[0]);
+                        xPre = Floor(voxel[0]);
                         basis = voxel[0] - static_cast<DataType>(xPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                         else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                        yPre = (int)reg_floor(voxel[1]);
+                        yPre = Floor(voxel[1]);
                         basis = voxel[1] - static_cast<DataType>(yPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
                         else get_SplineBasisValues<DataType>(basis, yBasis);
 
-                        zPre = (int)reg_floor(voxel[2]);
+                        zPre = Floor(voxel[2]);
                         basis = voxel[2] - static_cast<DataType>(zPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
@@ -1570,9 +1570,9 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
     float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
     for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) {
         if (nodeImage->sform_code > 0) {
-            ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) +
-                            reg_pow2(nodeImage->sto_xyz.m[i][1]) +
-                            reg_pow2(nodeImage->sto_xyz.m[i][2]));
+            ratio[i] = sqrt(Square(nodeImage->sto_xyz.m[i][0]) +
+                            Square(nodeImage->sto_xyz.m[i][1]) +
+                            Square(nodeImage->sto_xyz.m[i][2]));
         }
         ratio[i] /= voxelImage->pixdim[i + 1];
         weight *= ratio[i];
@@ -1590,9 +1590,9 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                 // linear interpolation is performed
                 DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 };
                 int pre[3] = {
-                    static_cast<int>(reg_floor(voxelCoord[0])),
-                    static_cast<int>(reg_floor(voxelCoord[1])),
-                    static_cast<int>(reg_floor(voxelCoord[2]))
+                    Floor(voxelCoord[0]),
+                    Floor(voxelCoord[1]),
+                    Floor(voxelCoord[2])
                 };
                 basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
                 basisX[0] = static_cast<DataType>(1) - basisX[1];
@@ -1713,8 +1713,8 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
     splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
     splineControlPoint->dz = 1.0f;
     if (referenceImage != nullptr) {
-        splineControlPoint->dim[1] = splineControlPoint->nx = static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f);
-        splineControlPoint->dim[2] = splineControlPoint->ny = static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f);
+        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
     } else {
         splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
         splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
@@ -1807,9 +1807,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
     splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f;
 
     if (referenceImage != nullptr) {
-        splineControlPoint->dim[1] = splineControlPoint->nx = static_cast<int>(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f);
-        splineControlPoint->dim[2] = splineControlPoint->ny = static_cast<int>(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f);
-        splineControlPoint->dim[3] = splineControlPoint->nz = static_cast<int>(reg_ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz) + 3.f);
+        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
+        splineControlPoint->dim[3] = splineControlPoint->nz = Ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz + 3.f);
     } else {
         splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
         splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
@@ -2282,8 +2282,8 @@ void reg_defField_compose2D(nifti_image *deformationField,
                 + df_real2Voxel->m[1][3];
 
             // Linear interpolation to compute the new deformation
-            pre[0] = (int)reg_floor(voxelX);
-            pre[1] = (int)reg_floor(voxelY);
+            pre[0] = Floor(voxelX);
+            pre[1] = Floor(voxelY);
             relX[1] = voxelX - static_cast<DataType>(pre[0]);
             relX[0] = 1.f - relX[1];
             relY[1] = voxelY - static_cast<DataType>(pre[1]);
@@ -2392,9 +2392,9 @@ void reg_defField_compose3D(nifti_image *deformationField,
             //reg_mat44_mul(df_real2Voxel, realDef, voxel);
 
             // Linear interpolation to compute the new deformation
-            pre[0] = static_cast<int>reg_floor(voxel[0]);
-            pre[1] = static_cast<int>reg_floor(voxel[1]);
-            pre[2] = static_cast<int>reg_floor(voxel[2]);
+            pre[0] = Floor(voxel[0]);
+            pre[1] = Floor(voxel[1]);
+            pre[2] = Floor(voxel[2]);
             relX[1] = voxel[0] - static_cast<DataType>(pre[0]);
             relX[0] = 1.f - relX[1];
             relY[1] = voxel[1] - static_cast<DataType>(pre[1]);
@@ -3126,13 +3126,13 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
                 + matrix_real_to_voxel1->m[1][3];
 
             // The spline coefficients are computed
-            int xPre = (int)(reg_floor(xVoxel));
+            int xPre = Floor(xVoxel);
             basis = xVoxel - static_cast<DataType>(xPre--);
             if (basis < 0) basis = 0; //rounding error
             if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
             else get_SplineBasisValues<DataType>(basis, xBasis);
 
-            int yPre = (int)(reg_floor(yVoxel));
+            int yPre = Floor(yVoxel);
             basis = yVoxel - static_cast<DataType>(yPre--);
             if (basis < 0) basis = 0; //rounding error
             if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
@@ -3338,19 +3338,19 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                     + matrix_real_to_voxel1->m[2][3];
 
                 // The spline coefficients are computed
-                xPre = (int)reg_floor(xVoxel);
+                xPre = Floor(xVoxel);
                 basis = xVoxel - static_cast<DataType>(xPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                 else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                yPre = (int)reg_floor(yVoxel);
+                yPre = Floor(yVoxel);
                 basis = yVoxel - static_cast<DataType>(yPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
                 else get_SplineBasisValues<DataType>(basis, yBasis);
 
-                zPre = (int)reg_floor(zVoxel);
+                zPre = Floor(zVoxel);
                 basis = zVoxel - static_cast<DataType>(zPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
@@ -3550,7 +3550,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
         squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
         // Set the number of squaring step in the flow field
         if (fabs(flowFieldImage->intent_p2) != squaringNumber) {
-            NR_WARN("Changing from " << (int)reg_round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) <<
+            NR_WARN("Changing from " << Round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) <<
                     " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
         }
         // Update the number of squaring step required
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 8eba7987..62acf252 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -165,9 +165,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                   // Compute the position in the grid
                   reg_mat44_mul(&transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
-                  pre[0]=static_cast<int>(reg_floor(gridCoord[0]));
-                  pre[1]=static_cast<int>(reg_floor(gridCoord[1]));
-                  pre[2]=static_cast<int>(reg_floor(gridCoord[2]));
+                  pre[0]=Floor(gridCoord[0]);
+                  pre[1]=Floor(gridCoord[1]);
+                  pre[2]=Floor(gridCoord[2]);
                   int controlPoint_index=(pre[2]*splineControlPoint->ny+pre[1])*splineControlPoint->nx+pre[0];
 
                   jacobianMatrix.m[0][0] = (coeffPtrX[controlPoint_index+1] - coeffPtrX[controlPoint_index]);
@@ -382,8 +382,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                // Compute the position in the grid
                reg_mat44_mul(&transformation,imageCoord,gridCoord);
                // Compute the anterior node coord
-               pre[0]=static_cast<int>(reg_floor(gridCoord[0]));
-               pre[1]=static_cast<int>(reg_floor(gridCoord[1]));
+               pre[0]=Floor(gridCoord[0]);
+               pre[1]=Floor(gridCoord[1]);
                // Compute the basis values and their first derivatives
                basis = gridCoord[0] - pre[0];
                get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
@@ -794,9 +794,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   // Compute the position in the grid
                   reg_mat44_mul(&transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
-                  pre[0]=static_cast<int>(reg_floor(gridCoord[0]));
-                  pre[1]=static_cast<int>(reg_floor(gridCoord[1]));
-                  pre[2]=static_cast<int>(reg_floor(gridCoord[2]));
+                  pre[0]=Floor(gridCoord[0]);
+                  pre[1]=Floor(gridCoord[1]);
+                  pre[2]=Floor(gridCoord[2]);
                   // Compute the basis values and their first derivatives
                   basis = gridCoord[0] - pre[0];
                   get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
@@ -1483,7 +1483,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 
                // Loop over all the control points in the surrounding area
 
-               for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
+               for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
                {
                   if(pixelY>-1 && pixelY<referenceImage->ny)
                   {
@@ -1492,9 +1492,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                      basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
                      get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
-                     jacIndex = pixelY*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]);
+                     jacIndex = pixelY*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]);
 
-                     for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<=(int)reg_ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
+                     for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
                      {
                         if(pixelX>-1 && pixelX<referenceImage->nx && (yFirst!=0 || yBasis!=0))
                         {
@@ -1747,7 +1747,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                   jacobianConstraint[0]=jacobianConstraint[1]=jacobianConstraint[2]=0.;
 
                   // Loop over all the control points in the surrounding area
-                  for(pixelZ=(int)reg_ceil((z-3)*gridVoxelSpacing[2]); pixelZ<=(int)reg_ceil((z+1)*gridVoxelSpacing[2]); pixelZ++)
+                  for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ<=Ceil((z+1)*gridVoxelSpacing[2]); pixelZ++)
                   {
                      if(pixelZ>-1 && pixelZ<referenceImage->nz)
                      {
@@ -1756,7 +1756,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                         basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre;
                         get_BSplineBasisValue<DataType>(basis,z-zPre,zBasis,zFirst);
 
-                        for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
+                        for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
                         {
                            if(pixelY>-1 && pixelY<referenceImage->ny && (zFirst!=0 || zBasis!=0))
                            {
@@ -1765,9 +1765,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                               basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
                               get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
-                              jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]);
+                              jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]);
 
-                              for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<=(int)reg_ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
+                              for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
                               {
                                  if(pixelX>-1 && pixelX<referenceImage->nx && (yFirst!=0 || yBasis!=0))
                                  {
@@ -2076,12 +2076,12 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 
                // Loop over all the control points in the surrounding area
 
-               for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<(int)reg_floor((y+1)*gridVoxelSpacing[1]); pixelY++)
+               for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<Floor((y+1)*gridVoxelSpacing[1]); pixelY++)
                {
                   if(pixelY>-1 && pixelY<referenceImage->ny)
                   {
 
-                     for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<(int)reg_floor((x+1)*gridVoxelSpacing[0]); pixelX++)
+                     for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<Floor((x+1)*gridVoxelSpacing[0]); pixelX++)
                      {
                         if(pixelX>-1 && pixelX<referenceImage->nx)
                         {
@@ -2348,17 +2348,17 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                   correctFolding=false;
 
                   // Loop over all the control points in the surrounding area
-                  for(pixelZ=(int)reg_ceil((z-3)*gridVoxelSpacing[2]); pixelZ<(int)reg_floor((z+1)*gridVoxelSpacing[2]); pixelZ++)
+                  for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ<Floor((z+1)*gridVoxelSpacing[2]); pixelZ++)
                   {
                      if(pixelZ>-1 && pixelZ<referenceImage->nz)
                      {
 
-                        for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<(int)reg_floor((y+1)*gridVoxelSpacing[1]); pixelY++)
+                        for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<Floor((y+1)*gridVoxelSpacing[1]); pixelY++)
                         {
                            if(pixelY>-1 && pixelY<referenceImage->ny)
                            {
 
-                              for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<(int)reg_floor((x+1)*gridVoxelSpacing[0]); pixelX++)
+                              for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<Floor((x+1)*gridVoxelSpacing[0]); pixelX++)
                               {
                                  if(pixelX>-1 && pixelX<referenceImage->nx)
                                  {
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 41e9311c..4ecd3c77 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -549,7 +549,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
             currentValue = 0;
             for (b = 0; b < 2; b++) {
                 for (a = 0; a < 2; a++) {
-                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                 }
             }
             constraintValue += currentValue;
@@ -638,7 +638,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
                 currentValue = 0;
                 for (b = 0; b < 3; b++) {
                     for (a = 0; a < 3; a++) {
-                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                        currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                     }
                 }
                 constraintValue += currentValue;
@@ -746,7 +746,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
             currentValue = 0;
             for (b = 0; b < 2; b++) {
                 for (a = 0; a < 2; a++) {
-                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                 }
             }
             constraintValue += currentValue;
@@ -846,7 +846,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
                 currentValue = 0;
                 for (b = 0; b < 3; b++) {
                     for (a = 0; a < 3; a++) {
-                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                        currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                     }
                 }
                 constraintValue += currentValue;
@@ -1409,7 +1409,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
             currentValue = 0;
             for (b = 0; b < 2; b++) {
                 for (a = 0; a < 2; a++) {
-                    currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                    currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                 }
             }
             constraintValue += currentValue;
@@ -1486,7 +1486,7 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
                 currentValue = 0;
                 for (b = 0; b < 3; b++) {
                     for (a = 0; a < 3; a++) {
-                        currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
+                        currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
                     }
                 }
                 constraintValue += currentValue;
@@ -1754,9 +1754,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
         reg_mat44_mul(gridRealToVox, ref_position, def_position);
 
         // Extract the corresponding nodes
-        previous[0] = static_cast<int>(reg_floor(def_position[0])) - 1;
-        previous[1] = static_cast<int>(reg_floor(def_position[1])) - 1;
-        previous[2] = static_cast<int>(reg_floor(def_position[2])) - 1;
+        previous[0] = Floor(def_position[0]) - 1;
+        previous[1] = Floor(def_position[1]) - 1;
+        previous[2] = Floor(def_position[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
@@ -1791,10 +1791,10 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                     }
                 }
             }
-            constraintValue += reg_pow2(flo_position[0] - def_position[0]);
-            constraintValue += reg_pow2(flo_position[1] - def_position[1]);
+            constraintValue += Square(flo_position[0] - def_position[0]);
+            constraintValue += Square(flo_position[1] - def_position[1]);
             if (imageDim > 2)
-                constraintValue += reg_pow2(flo_position[2] - def_position[2]);
+                constraintValue += Square(flo_position[2] - def_position[2]);
         } else {
             NR_WARN("The current landmark at position " << ref_position[0] << " " <<
                     ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") <<
@@ -1867,9 +1867,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
         reg_mat44_mul(gridRealToVox, ref_position, def_position);
         if (imageDim == 2) def_position[2] = 0;
         // Extract the corresponding nodes
-        previous[0] = static_cast<int>(reg_floor(def_position[0])) - 1;
-        previous[1] = static_cast<int>(reg_floor(def_position[1])) - 1;
-        previous[2] = static_cast<int>(reg_floor(def_position[2])) - 1;
+        previous[0] = Floor(def_position[0]) - 1;
+        previous[1] = Floor(def_position[1]) - 1;
+        previous[2] = Floor(def_position[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
@@ -1994,45 +1994,45 @@ double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
                     neigbCP[0] = splinePtrX[index - 1];
                     neigbCP[1] = splinePtrY[index - 1];
                     neigbCP[2] = splinePtrZ[index - 1];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
                 }
                 if (x < splineControlPoint->nx - 1) {
                     neigbCP[0] = splinePtrX[index + 1];
                     neigbCP[1] = splinePtrY[index + 1];
                     neigbCP[2] = splinePtrZ[index + 1];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
                 }
 
                 if (y > 0) {
                     neigbCP[0] = splinePtrX[index - splineControlPoint->nx];
                     neigbCP[1] = splinePtrY[index - splineControlPoint->nx];
                     neigbCP[2] = splinePtrZ[index - splineControlPoint->nx];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
                 }
                 if (y < splineControlPoint->ny - 1) {
                     neigbCP[0] = splinePtrX[index + splineControlPoint->nx];
                     neigbCP[1] = splinePtrY[index + splineControlPoint->nx];
                     neigbCP[2] = splinePtrZ[index + splineControlPoint->nx];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
                 }
 
                 if (z > 0) {
                     neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny];
                     neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny];
                     neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
                 }
                 if (z < splineControlPoint->nz - 1) {
                     neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny];
                     neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny];
                     neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny];
-                    constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) +
-                                        reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
+                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
+                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
                 }
                 index++;
             } // x
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index 07dbf3bd..45d6a8b7 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -1,5 +1,4 @@
-#include "_reg_maths.h"
-#include "Debug.hpp"
+#include "_reg_tools.h"
 
 #define mat(i,j,dim) mat[i*dim+j]
 
@@ -937,15 +936,15 @@ void reg_mat33_disp(const mat33& mat, const std::string& title){
 //is it square distance or just distance?
 // Helper function: Get the square of the Euclidean distance
 double get_square_distance3D(float * first_point3D, float * second_point3D) {
-    return sqrt(reg_pow2(first_point3D[0] - second_point3D[0]) +
-          reg_pow2(first_point3D[1] - second_point3D[1]) +
-          reg_pow2(first_point3D[2] - second_point3D[2]));
+    return sqrt(Square(first_point3D[0] - second_point3D[0]) +
+          Square(first_point3D[1] - second_point3D[1]) +
+          Square(first_point3D[2] - second_point3D[2]));
 }
 /* *************************************************************** */
 //is it square distance or just distance?
 double get_square_distance2D(float * first_point2D, float * second_point2D) {
-    return sqrt(reg_pow2(first_point2D[0] - second_point2D[0]) +
-          reg_pow2(first_point2D[1] - second_point2D[1]));
+    return sqrt(Square(first_point2D[0] - second_point2D[0]) +
+          Square(first_point2D[1] - second_point2D[1]));
 }
 /* *************************************************************** */
 // Calculate pythagorean distance
@@ -957,7 +956,7 @@ T pythag(T a, T b)
     absb = fabs(b);
 
     if (absa > absb)
-        return (T)(absa * sqrt(1.0f + SQR(absb / absa)));
+        return (T)(absa * sqrt(1.0f + Square(absb / absa)));
     else
-        return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + SQR(absa / absb))));
+        return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + Square(absa / absb))));
 }
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index ea14462d..2eac28f9 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -28,6 +28,15 @@
 #endif
 #endif
 
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+#ifdef __CUDACC__
+#define DEVICE  __host__ __device__
+#else
+#define DEVICE
+#endif
+
 typedef enum {
     DEF_FIELD,
     DISP_FIELD,
@@ -39,46 +48,29 @@ typedef enum {
 } NREG_TRANS_TYPE;
 
 /* *************************************************************** */
-#define reg_pow2(a) ((a)*(a))
-#define reg_ceil(a) (ceil(a))
-#define reg_round(a) ((a)>0.0 ?(int)((a)+0.5):(int)((a)-0.5))
-#ifdef _WIN32
-#define reg_floor(a) ((a)>0?(int)(a):(int)((a)-1))
-#define reg_floor_size_t(a) ((a)>0?(long)(a):(long)((a)-1))
-#else
-#define reg_floor(a) ((a)>=0?(int)(a):floor(a))
-#endif
-#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
-#define FMAX(a,b) (a > b ? a : b)
-#define IMIN(a,b) (a < b ? a : b)
-#define SQR(a) (a==0.0 ? 0.0 : a*a)
-/* *************************************************************** */
-#if defined(_WIN32) && !defined(__CYGWIN__)
-#include <float.h>
-#include <time.h>
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-#if (_MSC_VER < 1900)
-#ifndef strtof
-#define strtof(_s, _t) (float) strtod(_s, _t)
-#endif
-#endif
-template<class PrecisionType> inline int round(PrecisionType x)
-{
-   return int(x > 0.0 ? (x + 0.5) : (x - 0.5));
+namespace NiftyReg {
+/* *************************************************************** */
+// The functions in the standard library are slower; so, these are implemented
+template<typename T>
+DEVICE inline T Square(const T& x) {
+    return x * x;
 }
-#if _MSC_VER < 1800 //test if visual studio version older than 2013
-template<typename T>inline bool isinf(T value)
-{
-   return std::numeric_limits<T>::has_infinity && value == std::numeric_limits<T>::infinity();
+template<typename T>
+DEVICE inline int Floor(const T& x) {
+    const int i = static_cast<int>(x);
+    return i - (x < i);
 }
-#endif
-inline int fabs(int _x)
-{
-   return (int)fabs((float)(_x));
+template<typename T>
+DEVICE inline int Ceil(const T& x) {
+    const int i = static_cast<int>(x);
+    return i + (x > i);
+}
+template<typename T>
+DEVICE inline int Round(const T& x) {
+    return static_cast<int>(x + (x >= 0 ? 0.5 : -0.5));
 }
-#endif // If on windows...
+/* *************************************************************** */
+} // namespace NiftyReg
 /* *************************************************************** */
 extern "C++" template <class T>
 void reg_LUdecomposition(T *inputMatrix,
@@ -98,9 +90,6 @@ void reg_matrixInvertMultiply(T *mat,
                               size_t *index,
                               T *vec);
 /* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
 extern "C++" template<class T>
 T* reg_matrix1DAllocate(size_t arraySize);
 /* *************************************************************** */
@@ -132,9 +121,6 @@ T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect);
 extern "C++" template<class T>
 void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res);
 /* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
 /** @brief Add two 3-by-3 matrices
 */
 mat33 reg_mat33_add(mat33 const* A, mat33 const* B);
@@ -185,7 +171,6 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum);
 extern "C++" template <class T>
 void reg_heapSort(T *array_tmp,int blockNum);
 /* *************************************************************** */
-/* *************************************************************** */
 bool operator==(mat44 A,mat44 B);
 /* *************************************************************** */
 bool operator!=(mat44 A,mat44 B);
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
index 83ea45ee..2ed3463f 100644
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ b/reg-lib/cpu/_reg_mrf.cpp
@@ -355,9 +355,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
 
    // Compute the block size
    int blockSize[3]={
-      (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
-      (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
-      (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
+      Ceil(controlPointGridImage->dx / refImage->dx),
+      Ceil(controlPointGridImage->dy / refImage->dy),
+      Ceil(controlPointGridImage->dz / refImage->dz),
    };
    int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
    // Allocate some static memory
@@ -378,9 +378,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
             gridVox[0] = cpx;
             // Compute the corresponding image voxel position
             reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-            imageVox[0]=reg_round(imageVox[0]);
-            imageVox[1]=reg_round(imageVox[1]);
-            imageVox[2]=reg_round(imageVox[2]);
+            imageVox[0]=Round(imageVox[0]);
+            imageVox[1]=Round(imageVox[1]);
+            imageVox[2]=Round(imageVox[2]);
             //DEBUG
             //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx;
             //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy;
@@ -436,9 +436,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage,
                   //DEBUG
                   // Compute the corresponding image voxel position
                   reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                  imageVox[0]=reg_round(imageVox[0]);
-                  imageVox[1]=reg_round(imageVox[1]);
-                  imageVox[2]=reg_round(imageVox[2]);
+                  imageVox[0]=Round(imageVox[0]);
+                  imageVox[1]=Round(imageVox[1]);
+                  imageVox[2]=Round(imageVox[2]);
                   //DEBUG
                   //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx;
                   //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy;
@@ -624,9 +624,9 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
 
    //DEBUG
    //int blockSize[3]={
-   //    (int)reg_ceil(controlPointImage->dx / referenceImage->dx),
-   //    (int)reg_ceil(controlPointImage->dy / referenceImage->dy),
-   //    (int)reg_ceil(controlPointImage->dz / referenceImage->dz),
+   //    Ceil(controlPointImage->dx / referenceImage->dx),
+   //    Ceil(controlPointImage->dy / referenceImage->dy),
+   //    Ceil(controlPointImage->dz / referenceImage->dz),
    //};
    //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3);
    //int m=referenceImage->nx;
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index a74772bf..1408df36 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -398,9 +398,9 @@ void ResampleImage3D(const nifti_image *floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul(floatingIJKMatrix, world, position);
 
-                previous[0] = static_cast<int>(reg_floor(position[0]));
-                previous[1] = static_cast<int>(reg_floor(position[1]));
-                previous[2] = static_cast<int>(reg_floor(position[2]));
+                previous[0] = Floor(position[0]);
+                previous[1] = Floor(position[1]);
+                previous[2] = Floor(position[2]);
 
                 relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
                 relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
@@ -469,25 +469,25 @@ void ResampleImage3D(const nifti_image *floatingImage,
             case NIFTI_TYPE_UINT8:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             default:
                 if (intensity != intensity)
                     intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
+                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
                 break;
             }
         }
@@ -578,8 +578,8 @@ void ResampleImage2D(const nifti_image *floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul(floatingIJKMatrix, world, position);
 
-                previous[0] = static_cast<int>(reg_floor(position[0]));
-                previous[1] = static_cast<int>(reg_floor(position[1]));
+                previous[0] = Floor(position[0]);
+                previous[1] = Floor(position[1]);
 
                 relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
                 relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
@@ -615,19 +615,19 @@ void ResampleImage2D(const nifti_image *floatingImage,
                     warpedIntensity[index] = static_cast<FloatingType>(intensity);
                     break;
                 case NIFTI_TYPE_UINT8:
-                    intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                    intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                     break;
                 case NIFTI_TYPE_UINT16:
-                    intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                    intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                     break;
                 case NIFTI_TYPE_UINT32:
-                    intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                    intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                     break;
                 default:
-                    warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
+                    warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
                     break;
                 }
             }
@@ -871,13 +871,13 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
 
                             // Interpolate (trilinearly) the deformation field for non-integer positions
                             float scalling = 1.0f;
-                            currentAPre = (float)(reg_floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling));
+                            currentAPre = (float)Floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling);
                             currentARel = currentA + (shiftSamp[0] / warpedImage->pixdim[1] * scalling) - (float)(currentAPre);
 
-                            currentBPre = (float)(reg_floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2])));
+                            currentBPre = (float)Floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2]));
                             currentBRel = currentB + (shiftSamp[1] / warpedImage->pixdim[2] * scalling) - (float)(currentBPre);
 
-                            currentCPre = (float)(reg_floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling)));
+                            currentCPre = (float)Floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling));
                             currentCRel = currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling) - (float)(currentCPre);
 
                             // Interpolate the PSF world coordinates
@@ -923,9 +923,9 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                                     // real -> voxel; floating space
                                     reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
 
-                                    previous[0] = static_cast<int>(reg_floor(position[0]));
-                                    previous[1] = static_cast<int>(reg_floor(position[1]));
-                                    previous[2] = static_cast<int>(reg_floor(position[2]));
+                                    previous[0] = Floor(position[0]);
+                                    previous[1] = Floor(position[1]);
+                                    previous[2] = Floor(position[2]);
 
                                     relative[0] = position[0] - static_cast<double>(previous[0]);
                                     relative[1] = position[1] - static_cast<double>(previous[1]);
@@ -987,25 +987,25 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
             case NIFTI_TYPE_UINT8:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             default:
                 if (intensity != intensity)
                     intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
+                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
                 break;
             }
         }
@@ -1062,11 +1062,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
     }
     for (int j = 0; j < 3; j++) {
         for (int i = 0; i < 3; i++) {
-            T.m[j][j] += reg_pow2(warpedMatrix->m[i][j]);
-            S.m[j][j] += reg_pow2(floatingMatrix->m[i][j]);
+            T.m[j][j] += Square(warpedMatrix->m[i][j]);
+            S.m[j][j] += Square(floatingMatrix->m[i][j]);
         }
-        T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f;
-        S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f;
+        T.m[j][j] = Square(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f;
+        S.m[j][j] = Square(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f;
     }
 
     // Define the kernel to use
@@ -1274,13 +1274,13 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
 
                                 if (psfWeight != 0.f) { // If the relative weight is above 0
                                     // Interpolate (trilinearly) the deformation field for non-integer positions
-                                    currentAPre = (size_t)(currentA + (size_t)reg_floor(psf_xyz[0] / (float)warpedImage->pixdim[1]));
+                                    currentAPre = (size_t)(currentA + (size_t)Floor(psf_xyz[0] / (float)warpedImage->pixdim[1]));
                                     currentARel = (float)currentA + (float)(psf_xyz[0] / (float)warpedImage->pixdim[1]) - (float)(currentAPre);
 
-                                    currentBPre = (size_t)(currentB + (size_t)reg_floor(psf_xyz[1] / (float)warpedImage->pixdim[2]));
+                                    currentBPre = (size_t)(currentB + (size_t)Floor(psf_xyz[1] / (float)warpedImage->pixdim[2]));
                                     currentBRel = (float)currentB + (float)(psf_xyz[1] / (float)warpedImage->pixdim[2]) - (float)(currentBPre);
 
-                                    currentCPre = (size_t)(currentC + (size_t)reg_floor(psf_xyz[2] / (float)warpedImage->pixdim[3]));
+                                    currentCPre = (size_t)(currentC + (size_t)Floor(psf_xyz[2] / (float)warpedImage->pixdim[3]));
                                     currentCRel = (float)currentC + (float)(psf_xyz[2] / (float)warpedImage->pixdim[3]) - (float)(currentCPre);
 
                                     // Interpolate the PSF world coordinates
@@ -1325,9 +1325,9 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                                         // real -> voxel; floating space
                                         reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
 
-                                        previous[0] = static_cast<int>(reg_floor(position[0]));
-                                        previous[1] = static_cast<int>(reg_floor(position[1]));
-                                        previous[2] = static_cast<int>(reg_floor(position[2]));
+                                        previous[0] = Floor(position[0]);
+                                        previous[1] = Floor(position[1]);
+                                        previous[2] = Floor(position[2]);
 
                                         relative[0] = position[0] - static_cast<double>(previous[0]);
                                         relative[1] = position[1] - static_cast<double>(previous[1]);
@@ -1391,37 +1391,37 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
             case NIFTI_TYPE_UINT8:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT16:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_UINT32:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? reg_round(intensity) : 0);
+                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
                 break;
             case NIFTI_TYPE_INT16:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 32767 ? reg_round(intensity) : 32767); // 32767=2^15-1
+                intensity = (intensity <= 32767 ? Round(intensity) : 32767); // 32767=2^15-1
                 warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             case NIFTI_TYPE_INT32:
                 if (intensity != intensity)
                     intensity = 0;
-                intensity = (intensity <= 2147483647 ? reg_round(intensity) : 2147483647); // 2147483647=2^31-1
+                intensity = (intensity <= 2147483647 ? Round(intensity) : 2147483647); // 2147483647=2^31-1
                 warpedIntensity[index] = static_cast<FloatingType>(intensity);
                 break;
             default:
                 if (intensity != intensity)
                     intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(reg_round(intensity));
+                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
                 break;
             }
         }
@@ -1577,10 +1577,10 @@ void reg_bilinearResampleGradient(const nifti_image *floatingImage,
                 floating_mm_to_voxel->m[1][3];
 
             // Extract the floating value using bilinear interpolation
-            anteIntX[0] = static_cast<int>(reg_floor(xFloCoord));
-            anteIntX[1] = static_cast<int>(reg_ceil(xFloCoord));
-            anteIntY[0] = static_cast<int>(reg_floor(yFloCoord));
-            anteIntY[1] = static_cast<int>(reg_ceil(yFloCoord));
+            anteIntX[0] = Floor(xFloCoord);
+            anteIntX[1] = Ceil(xFloCoord);
+            anteIntY[0] = Floor(yFloCoord);
+            anteIntY[1] = Ceil(yFloCoord);
             val_x = 0;
             val_y = 0;
             basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]);
@@ -1757,12 +1757,12 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage,
                     floating_mm_to_voxel->m[2][3];
 
                 // Extract the floating value using bilinear interpolation
-                anteIntX[0] = static_cast<int>(reg_floor(xFloCoord));
-                anteIntX[1] = static_cast<int>(reg_ceil(xFloCoord));
-                anteIntY[0] = static_cast<int>(reg_floor(yFloCoord));
-                anteIntY[1] = static_cast<int>(reg_ceil(yFloCoord));
-                anteIntZ[0] = static_cast<int>(reg_floor(zFloCoord));
-                anteIntZ[1] = static_cast<int>(reg_ceil(zFloCoord));
+                anteIntX[0] = Floor(xFloCoord);
+                anteIntX[1] = Ceil(xFloCoord);
+                anteIntY[0] = Floor(yFloCoord);
+                anteIntY[1] = Ceil(yFloCoord);
+                anteIntZ[0] = Floor(zFloCoord);
+                anteIntZ[1] = Ceil(zFloCoord);
                 val_x = 0;
                 val_y = 0;
                 val_z = 0;
@@ -1983,9 +1983,9 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
             /* real -> voxel; floating space */
             reg_mat44_mul(floatingIJKMatrix, world, position);
 
-            previous[0] = static_cast<int>(reg_floor(position[0]));
-            previous[1] = static_cast<int>(reg_floor(position[1]));
-            previous[2] = static_cast<int>(reg_floor(position[2]));
+            previous[0] = Floor(position[0]);
+            previous[1] = Floor(position[1]);
+            previous[2] = Floor(position[2]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             xBasis[0] = (FieldType)(1.0 - relative);
@@ -2150,8 +2150,8 @@ void BilinearImageGradient(const nifti_image *floatingImage,
             position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
             position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
-            previous[0] = static_cast<int>(reg_floor(position[0]));
-            previous[1] = static_cast<int>(reg_floor(position[1]));
+            previous[0] = Floor(position[0]);
+            previous[1] = Floor(position[1]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             relative = relative > 0 ? relative : 0;
@@ -2260,9 +2260,9 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
             /* real -> voxel; floating space */
             reg_mat44_mul(floatingIJKMatrix, world, position);
 
-            previous[0] = static_cast<int>(reg_floor(position[0]));
-            previous[1] = static_cast<int>(reg_floor(position[1]));
-            previous[2] = static_cast<int>(reg_floor(position[2]));
+            previous[0] = Floor(position[0]);
+            previous[1] = Floor(position[1]);
+            previous[2] = Floor(position[2]);
 
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
@@ -2397,8 +2397,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
             position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
             position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
-            previous[0] = static_cast<int>(reg_floor(position[0]));
-            previous[1] = static_cast<int>(reg_floor(position[1]));
+            previous[0] = Floor(position[0]);
+            previous[1] = Floor(position[1]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             relative = relative > 0 ? relative : 0;
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index b3d805a2..3c8d912e 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -352,9 +352,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 
     // Compute the block size
     int blockSize[3] = {
-        (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
-        (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
-        (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
+        Ceil(controlPointGridImage->dx / refImage->dx),
+        Ceil(controlPointGridImage->dy / refImage->dy),
+        Ceil(controlPointGridImage->dz / refImage->dz),
     };
     int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
     int currentControlPoint = 0;
@@ -413,9 +413,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                 gridVox[0] = cpx;
                 // Compute the corresponding image voxel position
                 reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                imageVox[0] = reg_round(imageVox[0]);
-                imageVox[1] = reg_round(imageVox[1]);
-                imageVox[2] = reg_round(imageVox[2]);
+                imageVox[0] = Round(imageVox[0]);
+                imageVox[1] = Round(imageVox[1]);
+                imageVox[2] = Round(imageVox[2]);
 
                 // Extract the block in the reference image
                 blockIndex = 0;
@@ -487,7 +487,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 #ifdef MRF_USE_SAD
                                                 currentValue = fabs(warpedValue - refBlockValue[blockIndex]);
 #else
-                                                currentValue = reg_pow2(warpedValue - refBlockValue[blockIndex]);
+                                                currentValue = Square(warpedValue - refBlockValue[blockIndex]);
 #endif
                                                 if (currentValue == currentValue) {
                                                     currentSum -= currentValue;
@@ -546,7 +546,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                         // Check if the value is defined
                                         if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
                                             // compute the distance between label and label2
-                                            current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z);
+                                            current_distance = Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z);
                                             if (current_distance < min_distance) {
                                                 min_distance = current_distance;
                                                 discretisedValuePtr[label] = discretisedValuePtr[label2];
@@ -594,9 +594,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 
     // Compute the block size
     const int blockSize[3] = {
-        (int)reg_ceil(controlPointGridImage->dx / refImage->dx),
-        (int)reg_ceil(controlPointGridImage->dy / refImage->dy),
-        (int)reg_ceil(controlPointGridImage->dz / refImage->dz),
+        Ceil(controlPointGridImage->dx / refImage->dx),
+        Ceil(controlPointGridImage->dy / refImage->dy),
+        Ceil(controlPointGridImage->dz / refImage->dz),
     };
     int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2];
     int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
@@ -646,9 +646,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 
                 // Compute the corresponding image voxel position
                 reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                imageVox[0] = static_cast<float>(reg_round(imageVox[0]));
-                imageVox[1] = static_cast<float>(reg_round(imageVox[1]));
-                imageVox[2] = static_cast<float>(reg_round(imageVox[2]));
+                imageVox[0] = static_cast<float>(Round(imageVox[0]));
+                imageVox[1] = static_cast<float>(Round(imageVox[1]));
+                imageVox[2] = static_cast<float>(Round(imageVox[2]));
 
                 //INIT
                 for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) {
@@ -702,13 +702,13 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 #ifdef MRF_USE_SAD
                                                         currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]);
 #else
-                                                        currentValue = static_cast<float>(reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]));
+                                                        currentValue = static_cast<float>(Square(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]));
 #endif
                                                     } else {
 #ifdef MRF_USE_SAD
                                                         currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]);
 #else
-                                                        currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]);
+                                                        currentValue = Square(0 - refBlockValue[tid][blockIndex_t]);
 #endif
                                                     }
 
@@ -724,7 +724,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
 #ifdef MRF_USE_SAD
                                                     currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]);
 #else
-                                                    currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]);
+                                                    currentValue = Square(0 - refBlockValue[tid][blockIndex_t]);
 #endif
                                                     if (currentValue == currentValue) {
                                                         currentSum -= currentValue;
@@ -785,7 +785,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
                                         // Check if the value is defined
                                         if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
                                             // compute the distance between label and label2
-                                            current_distance = static_cast<float>(reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z));
+                                            current_distance = static_cast<float>(Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z));
                                             if (current_distance < min_distance) {
                                                 min_distance = current_distance;
                                                 discretisedValuePtr[label] = discretisedValuePtr[label2];
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 35bfebd1..753e158c 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -258,18 +258,18 @@ void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) {
     indexVoxel2[1] = indexVoxel2[2] = 0;
     indexVoxel2[0] = 1;
     reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-    spacingValues[0] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+    spacingValues[0] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     indexVoxel2[0] = indexVoxel2[2] = 0;
     indexVoxel2[1] = 1;
     reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-    spacingValues[1] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+    spacingValues[1] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     if (image->nz > 1) {
         indexVoxel2[0] = indexVoxel2[1] = 0;
         indexVoxel2[2] = 1;
         reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
-        spacingValues[2] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2]));
+        spacingValues[2] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
     }
 }
 /* *************************************************************** */
@@ -905,7 +905,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                             for (int i = -radius; i <= radius; i++) {
                                 // 2.506... = sqrt(2*pi)
                                 // temp contains the sigma in voxel
-                                kernel[radius + i] = static_cast<float>(exp(-(i * i) / (2.0 * reg_pow2(temp))) / (temp * 2.506628274631));
+                                kernel[radius + i] = static_cast<float>(exp(-(i * i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
                                 kernelSum += kernel[radius + i];
                             }
                         } else if (kernelType == LINEAR_KERNEL) {
@@ -1373,7 +1373,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
     int oldDim[4];
     for (int i = 1; i < 4; i++) {
         oldDim[i] = image->dim[i];
-        if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = static_cast<int>(reg_ceil(image->dim[i] / 2.0));
+        if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = Ceil(image->dim[i] / 2.0);
         if (image->pixdim[i] > 0 && downsampleAxis[i]) image->pixdim[i] = image->pixdim[i] * 2.0f;
     }
     image->nx = image->dim[1];
@@ -1451,9 +1451,9 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
                         z * image->qto_xyz.m[2][2] +
                         image->qto_xyz.m[2][3];
                     // Extract the position in voxel in the old image;
-                    position[0] = (int)reg_round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
-                    position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
-                    position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
+                    position[0] = Round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
+                    position[1] = Round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
+                    position[2] = Round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
                     if (oldDim[3] == 1) position[2] = 0;
                     // Nearest neighbour is used as downsampling ratio is constant
                     intensity = std::numeric_limits<ImageType>::quiet_NaN();
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 7f08b840..d91d7cf2 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -242,19 +242,19 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
         return static_cast<double>(intensity);
         break;
     case NIFTI_TYPE_UINT8:
-        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
         break;
     case NIFTI_TYPE_UINT16:
-        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
         break;
     case NIFTI_TYPE_UINT32:
-        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
         break;
     default:
-        return static_cast<DataType>(reg_round(intensity));
+        return static_cast<DataType>(Round(intensity));
         break;
     }
 }
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 63cc488f..1d485af8 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -160,19 +160,19 @@ DataType CudaContent::CastImageData(float intensity, int datatype) {
         return static_cast<double>(intensity);
         break;
     case NIFTI_TYPE_UINT8:
-        intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
+        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
         break;
     case NIFTI_TYPE_UINT16:
-        intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
+        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
         break;
     case NIFTI_TYPE_UINT32:
-        intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? reg_round(intensity) : 0);
+        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
+        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
         break;
     default:
-        return static_cast<DataType>(reg_round(intensity));
+        return static_cast<DataType>(Round(intensity));
         break;
     }
 }
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 8516a148..d4444b06 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -30,7 +30,7 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
     NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
 
     const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength;
-    const unsigned blocks = static_cast<unsigned>(reg_ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
+    const unsigned blocks = static_cast<unsigned>(Ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
     dim3 blockDims(threads, 1, 1);
     dim3 gridDims(blocks, blocks, 1);
     GetMaximalLengthKernel<<<gridDims, blockDims>>>(dists, *imageTexture, static_cast<unsigned>(nVoxels), optimiseX, optimiseY, optimiseZ);
@@ -65,7 +65,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
     const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned blocks = static_cast<unsigned>(ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
+    const unsigned blocks = static_cast<unsigned>(Ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
     const dim3 blockDims(threads, 1, 1);
     const dim3 gridDims(blocks, blocks, 1);
     NormaliseGradientKernel<<<gridDims, blockDims>>>(imageCuda, static_cast<unsigned>(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index 820cffe8..cb7127bd 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -28,7 +28,7 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix,
     const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_affine_deformationField_kernel<<<gridDims, blockDims>>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber);
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index a91b8f9b..3538edf4 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -147,7 +147,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
 
     if (referenceImage->nz > 1) {
         const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
@@ -157,7 +157,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 7f971b20..2c1bcf0b 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -177,7 +177,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                                           nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
-    const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
 
@@ -219,7 +219,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
 
     // gam = sum((grad+g)*grad)/sum(HxG);
     unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1;
-    unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     dim3 blockDims(blocks, 1, 1);
     dim3 gridDims(grids, grids, 1);
 
@@ -232,7 +232,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                              make_double2(0, 0), thrust::plus<double2>(), Float2Sum());
     float gam = static_cast<float>(gg.x / gg.y);
     if (isSymmetric) {
-        grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks));
+        grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks));
         gridDims = dim3(blocks, 1, 1);
         blockDims = dim3(grids, grids, 1);
         thrust::device_vector<float2> sumsBwCuda(nVoxelsBw + nVoxelsBw % 2);  // Make it even for thrust::inner_product
@@ -246,13 +246,13 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
     }
 
     blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2;
-    grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     gridDims = dim3(blocks, 1, 1);
     blockDims = dim3(grids, grids, 1);
     reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     if (isSymmetric) {
-        grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks));
+        grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks));
         gridDims = dim3(blocks, 1, 1);
         blockDims = dim3(grids, grids, 1);
         reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam);
@@ -274,7 +274,7 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
                                                           nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition;
-    const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
     reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 7f81bad9..a03688af 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -38,7 +38,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
 
     if (floatingImage->nz > 1) {
         const unsigned blocks = blockSize->reg_resampleImage3D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_resampleImage3D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
@@ -46,7 +46,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_resampleImage2D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_resampleImage2D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
@@ -75,7 +75,7 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
 
     if (floatingImage->nz > 1) {
         const unsigned blocks = blockSize->reg_getImageGradient3D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_getImageGradient3D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
@@ -83,7 +83,7 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_getImageGradient2D;
-        const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_getImageGradient2D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 07506c8d..8a853da9 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -42,7 +42,7 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
                               floatingMatrix.m[1][3]);
 
         // Compute the linear interpolation
-        const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) };
+        const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
         const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
         float xBasis[2], yBasis[2];
         InterpLinearKernel(relative.x, xBasis);
@@ -99,7 +99,7 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
                               floatingMatrix.m[2][3]);
 
         // Compute the linear interpolation
-        const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) };
+        const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
         const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
         float xBasis[2], yBasis[2], zBasis[2];
         InterpLinearKernel(relative.x, xBasis);
@@ -153,7 +153,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
                               floatingMatrix.m[1][3]);
 
         // Compute the gradient
-        const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) };
+        const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
         float xBasis[2], yBasis[2];
         const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
         InterpLinearKernel(relative.x, xBasis);
@@ -210,7 +210,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
                               floatingMatrix.m[2][3]);
 
         // Compute the gradient
-        const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) };
+        const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
         float xBasis[2], yBasis[2], zBasis[2];
         const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
         InterpLinearKernel(relative.x, xBasis);
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 2a3e853b..33973c5e 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -79,7 +79,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
 
     // Compute the absolute values
     const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     Cuda::GetSsdValueKernel<<<gridDims, blockDims>>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture,
@@ -152,7 +152,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     const float adjustedWeight = timepointWeight / static_cast<float>(validVoxelNumber);
 
     const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     Cuda::GetSsdGradientKernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 46f6417b..181b66f7 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -60,16 +60,16 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
     float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
     for (int i = 0; i < (is3d ? 3 : 2); ++i) {
         if (nodeImage->sform_code > 0) {
-            ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) +
-                            reg_pow2(nodeImage->sto_xyz.m[i][1]) +
-                            reg_pow2(nodeImage->sto_xyz.m[i][2]));
+            ratio[i] = sqrt(Square(nodeImage->sto_xyz.m[i][0]) +
+                            Square(nodeImage->sto_xyz.m[i][1]) +
+                            Square(nodeImage->sto_xyz.m[i][2]));
         }
         ratio[i] /= voxelImage->pixdim[i + 1];
         weight *= ratio[i];
     }
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_voxelCentric2NodeCentric_kernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
@@ -82,7 +82,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
                                                     float4 *nmiGradientCuda) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_convertNMIGradientFromVoxelToRealSpace_kernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
@@ -109,7 +109,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
             float currentSigma;
             if (sigma > 0) currentSigma = sigma / image->pixdim[n];
             else currentSigma = fabs(sigma); // voxel based if negative value
-            const int radius = (int)ceil(currentSigma * 3.0f);
+            const int radius = (int)Ceil(currentSigma * 3.0f);
             if (radius > 0) {
                 const int kernelSize = 1 + radius * 2;
                 float *kernel;
@@ -142,7 +142,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                 switch (n) {
                 case 1:
                     blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
-                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
                     reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -151,7 +151,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                     break;
                 case 2:
                     blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
-                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
                     reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -160,7 +160,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                     break;
                 case 3:
                     blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
-                    grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
                     reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -185,7 +185,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
 
     for (int n = 0; n < 3; n++) {
         if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) {
-            int radius = static_cast<int>(reg_ceil(2.0 * spacingVoxel[n]));
+            int radius = Ceil(2.0 * spacingVoxel[n]);
             int kernelSize = 1 + radius * 2;
 
             float *kernel;
@@ -220,7 +220,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
             switch (n) {
             case 0:
                 blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
-                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
                 reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -229,7 +229,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                 break;
             case 1:
                 blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
-                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
                 reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -238,7 +238,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                 break;
             case 2:
                 blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
-                grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks));
+                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
                 reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
@@ -255,7 +255,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
 /* *************************************************************** */
 void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
     const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
     reg_multiplyValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
@@ -264,7 +264,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float&
 /* *************************************************************** */
 void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
     const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
     reg_addValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
@@ -273,7 +273,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
     reg_multiplyArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
@@ -282,7 +282,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
 /* *************************************************************** */
 void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
     reg_addArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
@@ -291,7 +291,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
     const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
-    const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
     reg_fillMaskArray_kernel<<<gridDims, blockDims>>>(arrayCuda, (unsigned)count);
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index ac06be23..0f033d2d 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -34,7 +34,7 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
 
         // Linear interpolation
         float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
-        const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) };
+        const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
         basisX[1] = voxelCoord[0] - static_cast<float>(pre[0]);
         basisX[0] = 1.f - basisX[1];
         basisY[1] = voxelCoord[1] - static_cast<float>(pre[1]);
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 00a0f5a6..6e45e6d1 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -207,13 +207,13 @@ class LnccTest {
 
         for (int z = -kernel.radius[2]; z <= kernel.radius[2]; z++) {
             const float z_value = static_cast<float>(
-                exp(-(z * z) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
+                exp(-(z * z) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
             for (int y = -kernel.radius[1]; y <= kernel.radius[1]; y++) {
                 const float y_value = static_cast<float>(
-                    exp(-(y * y) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
+                    exp(-(y * y) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
                 for (int x = -kernel.radius[0]; x <= kernel.radius[0]; x++) {
                     const float x_value = static_cast<float>(
-                        exp(-(x * x) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
+                        exp(-(x * x) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631));
                     *kernelPtr++ = x_value * y_value * z_value;
                 }
             }
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index c23d95ac..47f56f90 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -141,9 +141,9 @@ class VoxelCentricToNodeCentricTest {
         float ratio[3] = { nodeGrad->dx, nodeGrad->dy, nodeGrad->dz };
         for (int i = 0; i < (is3d ? 3 : 2); ++i) {
             if (nodeGrad->sform_code > 0) {
-                ratio[i] = sqrt(reg_pow2(nodeGrad->sto_xyz.m[i][0]) +
-                                reg_pow2(nodeGrad->sto_xyz.m[i][1]) +
-                                reg_pow2(nodeGrad->sto_xyz.m[i][2]));
+                ratio[i] = sqrt(Square(nodeGrad->sto_xyz.m[i][0]) +
+                                Square(nodeGrad->sto_xyz.m[i][1]) +
+                                Square(nodeGrad->sto_xyz.m[i][2]));
             }
             ratio[i] /= voxelGrad->pixdim[i + 1];
             weight *= ratio[i];
@@ -159,7 +159,7 @@ class VoxelCentricToNodeCentricTest {
                     reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
                     // Linear interpolation
                     DataType basisX[2], basisY[2], basisZ[2];
-                    const int pre[3] = { (int)reg_floor(voxelCoord[0]), (int)reg_floor(voxelCoord[1]), (int)reg_floor(voxelCoord[2]) };
+                    const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
                     basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
                     basisX[0] = static_cast<DataType>(1) - basisX[1];
                     basisY[1] = voxelCoord[1] - static_cast<DataType>(pre[1]);

From c9540be577bb9b99c14a01c6bdbd0af24412d152 Mon Sep 17 00:00:00 2001
From: Marc Modat <marc.modat@gmail.com>
Date: Thu, 31 Aug 2023 10:15:06 +0100
Subject: [PATCH 193/314] #92 addded test for composition of DF. CPU only for
 now

---
 niftyreg_build_version.txt         |   2 +-
 reg-test/CMakeLists.txt            |   1 +
 reg-test/reg_test_composeField.cpp | 168 +++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_composeField.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a1e0432c..5478c714 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-312
+313
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 033ab263..aa400b40 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -113,6 +113,7 @@ set(EXEC_LIST reg_test_affineDeformationField)
 set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
+set(EXEC_LIST reg_test_composeField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
new file mode 100644
index 00000000..354f6c83
--- /dev/null
+++ b/reg-test/reg_test_composeField.cpp
@@ -0,0 +1,168 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+#include <iomanip>
+
+/*
+    This test file contains the following unit tests:
+    test functions: composition of deformation field
+*/
+
+
+class ComposeDeformationFieldTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ComposeDeformationFieldTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a 2D reference image
+        NiftiImage::dim_t size = 5;
+        vector<NiftiImage::dim_t> dimFlo{ size, size };
+        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Create a 3D reference image
+        dimFlo.push_back(size);
+        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+
+        // Data container for the test data
+        vector<TestData> testData;
+
+        // Create affine deformation fields
+        NiftiImage inDefField2d = CreateDeformationField(reference2d);
+        NiftiImage inDefField3d = CreateDeformationField(reference3d);
+        NiftiImage defField2d = CreateDeformationField(reference2d);
+        NiftiImage defField3d = CreateDeformationField(reference3d);
+        NiftiImage outDefField2d = CreateDeformationField(reference2d);
+        NiftiImage outDefField3d = CreateDeformationField(reference3d);
+
+        // Identity transformation tests
+        testData.emplace_back(TestData(
+            "2D ID",
+            reference2d,
+            inDefField2d,
+            defField2d,
+            outDefField2d
+        ));
+        testData.emplace_back(TestData(
+            "3D ID",
+            reference3d,
+            inDefField3d,
+            defField3d,
+            outDefField3d
+        ));
+
+        // Scaling transformation tests
+        float * inDefField2dPtr = static_cast<float *>(inDefField2d->data);
+        float * inDefField3dPtr = static_cast<float *>(inDefField3d->data);
+        float * def2dPtr = static_cast<float *>(defField2d->data);
+        float * def3dPtr = static_cast<float *>(defField3d->data);
+        for(size_t i=0; i<inDefField2d.nVoxels(); i++)
+            inDefField2dPtr[i] /= 1.11f;
+        for(size_t i=0; i<inDefField3d.nVoxels(); i++)
+            inDefField3dPtr[i] /= 1.11f;
+        for(size_t i=0; i<defField2d.nVoxels(); i++)
+            def2dPtr[i] *= 1.11f;
+        for(size_t i=0; i<defField3d.nVoxels(); i++)
+            def3dPtr[i] *= 1.11f;
+
+        testData.emplace_back(TestData(
+            "2D scaling",
+            reference2d,
+            inDefField2d,
+            defField2d,
+            outDefField2d
+        ));
+        testData.emplace_back(TestData(
+            "3D scaling",
+            reference3d,
+            inDefField3d,
+            defField3d,
+            outDefField3d
+        ));
+
+        // Check boundary conditions. The default behavior is to use the embedded
+        // affine transformation in the deformation field and shift the boundary
+        // transformation for padding.
+        reg_tools_multiplyValueToImage(defField2d, defField2d, 0.f);
+        reg_tools_multiplyValueToImage(defField3d, defField3d, 0.f);
+        reg_tools_multiplyValueToImage(inDefField2d, inDefField2d, 0.f);
+        reg_tools_multiplyValueToImage(inDefField3d, inDefField3d, 0.f);
+        reg_tools_multiplyValueToImage(outDefField2d, outDefField2d, 0.f);
+        reg_tools_multiplyValueToImage(outDefField3d, outDefField3d, 0.f);
+        reg_getDeformationFromDisplacement(defField2d);
+        reg_getDeformationFromDisplacement(defField3d);
+        reg_getDeformationFromDisplacement(inDefField2d);
+        reg_getDeformationFromDisplacement(inDefField3d);
+        reg_getDeformationFromDisplacement(outDefField2d);
+        reg_getDeformationFromDisplacement(outDefField3d);
+        float * outDefField2dPtr = static_cast<float *>(outDefField2d->data);
+        float * outDefField3dPtr = static_cast<float *>(outDefField3d->data);
+        for(size_t i=0; i<inDefField2d.nVoxels(); i++)
+            inDefField2dPtr[i] += 3.f;
+        for(size_t i=0; i<inDefField3d.nVoxels(); i++)
+            inDefField3dPtr[i] += 3.f;
+        for(size_t i=0; i<defField2d.nVoxels(); i++)
+            def2dPtr[i] += 1.f;
+        for(size_t i=0; i<defField3d.nVoxels(); i++)
+            def3dPtr[i] += 1.f;
+        for(size_t i=0; i<outDefField2d.nVoxels(); i++)
+            outDefField2dPtr[i] += 4.f;
+        for(size_t i=0; i<outDefField3d.nVoxels(); i++)
+            outDefField3dPtr[i] += 4.f;
+        testData.emplace_back(TestData(
+            "2D padding",
+            reference2d,
+            inDefField2d,
+            defField2d,
+            outDefField2d
+        ));
+        testData.emplace_back(TestData(
+            "3D padding",
+            reference3d,
+            inDefField3d,
+            defField3d,
+            outDefField3d
+        ));
+
+        // Run the actual computation with the provided input data
+        for (auto&& data : testData) {
+            auto&& [testName, reference, inDefField, defField, expectedField] = data;
+            // Run the compose on CPU only for now
+            reg_defField_compose(defField, inDefField, nullptr);
+            // Check the results
+            testCases.push_back({testName + " CPU", inDefField, expectedField});
+        }
+
+    }
+};
+
+TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[unit]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, result, expected] = testCase;
+
+        SECTION(testName) {
+            std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
+            float *resPtr = static_cast<float *>(result->data);
+            float *expPtr = static_cast<float *>(expected->data);
+            for(unsigned i=0; i<expected.nVoxels();++i){
+                const double diff = fabs(resPtr[i] - expPtr[i]);
+                if (diff > EPS){
+                    std::cout << "[i]=" << i;
+                    std::cout << " | diff=" << diff;
+                    std::cout << " | Result=" << resPtr[i];
+                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+                }
+                REQUIRE(diff < EPS);
+            }
+        }
+    }
+}

From a269f0596ca2b136df50681e5344977d617ceb6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 31 Aug 2023 13:09:12 +0100
Subject: [PATCH 194/314] Fix a bug causing shrunk output images

---
 niftyreg_build_version.txt      | 2 +-
 reg-lib/Content.cpp             | 3 +--
 reg-lib/cpu/_reg_localTrans.cpp | 2 --
 reg-test/reg_test_common.h      | 3 +++
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5478c714..9346fabb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-313
+314
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 3ce854b4..265f329a 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -69,11 +69,10 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
     memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
     strcpy(deformationField->intent_name, "NREG_TRANS");
-    deformationField->intent_p1 = DISP_FIELD;
+    deformationField->intent_p1 = DEF_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
     deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
-    reg_getDeformationFromDisplacement(deformationField);
 }
 /* *************************************************************** */
 void Content::DeallocateDeformationField() {
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 87d42be9..8c9d099e 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -122,8 +122,6 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR;
     memset(controlPointGridImage->intent_name, 0, 16);
     strcpy(controlPointGridImage->intent_name, "NREG_TRANS");
-    // Set to be the identity transformation by default
-    reg_getDeformationFromDisplacement(controlPointGridImage);
     controlPointGridImage->intent_p1 = CUB_SPLINE_GRID;
 }
 template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&, const float*);
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 3437eb3e..5e1c99c3 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -40,6 +40,9 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     NiftiImage controlPointGrid;
     reg_createControlPointGrid<float>(controlPointGrid, reference, gridSpacing);
 
+    // The control point position image is initialised with an identity transformation
+    reg_getDeformationFromDisplacement(controlPointGrid);
+
     return controlPointGrid;
 }
 

From 004414ef260da0dba2055d3ff46822b1c6861d02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 1 Sep 2023 16:27:21 +0100
Subject: [PATCH 195/314] Refactor Compute::GetApproximatedGradient()

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/Compute.cpp        | 53 ++++++++++++++++----------------------
 reg-lib/Compute.h          |  1 -
 3 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9346fabb..66953656 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-314
+315
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index cfae476c..0433ee2c 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -191,44 +191,35 @@ void Compute::SmoothGradient(float sigma) {
     }
 }
 /* *************************************************************** */
-template<typename Type>
 void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     nifti_image *controlPointGrid = con.GetControlPointGrid();
     nifti_image *transformationGradient = con.GetTransformationGradient();
+    std::visit([&](auto&& cppDataType) {
+        using Type = std::decay_t<decltype(cppDataType)>;
+
+        // Loop over every control point
+        Type *gridPtr = static_cast<Type*>(controlPointGrid->data);
+        Type *gradPtr = static_cast<Type*>(transformationGradient->data);
+        const Type eps = controlPointGrid->dx / Type(100);
+        for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
+            const Type currentValue = gridPtr[i];
+            gridPtr[i] = currentValue + eps;
+            // Update the changes for GPU
+            con.UpdateControlPointGrid();
+            double valPlus = opt.GetObjectiveFunctionValue();
+            gridPtr[i] = currentValue - eps;
+            // Update the changes for GPU
+            con.UpdateControlPointGrid();
+            double valMinus = opt.GetObjectiveFunctionValue();
+            gridPtr[i] = currentValue;
+            gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps));
+        }
 
-    // Loop over every control point
-    Type *gridPtr = static_cast<Type*>(controlPointGrid->data);
-    Type *gradPtr = static_cast<Type*>(transformationGradient->data);
-    const Type eps = controlPointGrid->dx / Type(100);
-    for (size_t i = 0; i < controlPointGrid->nvox; ++i) {
-        const Type currentValue = gridPtr[i];
-        gridPtr[i] = currentValue + eps;
-        // Update the changes for GPU
-        con.UpdateControlPointGrid();
-        double valPlus = opt.GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue - eps;
         // Update the changes for GPU
         con.UpdateControlPointGrid();
-        double valMinus = opt.GetObjectiveFunctionValue();
-        gridPtr[i] = currentValue;
-        gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps));
-    }
-
-    // Update the changes for GPU
-    con.UpdateControlPointGrid();
-    con.UpdateTransformationGradient();
-}
-/* *************************************************************** */
-void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
-    switch (dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid()->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        GetApproximatedGradient<float>(opt);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        GetApproximatedGradient<double>(opt);
-        break;
-    }
+        con.UpdateTransformationGradient();
+    }, NiftiImage::getFloatingDataType(controlPointGrid));
 }
 /* *************************************************************** */
 void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 3038bf85..b44063f3 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -43,6 +43,5 @@ class Compute {
     virtual void VoxelCentricToNodeCentric(float weight);
 
 private:
-    template<typename Type> void GetApproximatedGradient(InterfaceOptimiser&);
     nifti_image* ScaleGradient(const nifti_image&, float);
 };

From 97966ff82c650c7906caa4fc9eac465b3ba2ca1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 4 Sep 2023 16:57:53 +0100
Subject: [PATCH 196/314] Add NiftiImage::setIntentName()

---
 reg-io/RNifti/NiftiImage.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 308b814d..aacc0bab 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -2143,6 +2143,19 @@ class NiftiImage
         return *this;
     }
 
+    /**
+     * Set the intent name of the image
+     * @param name A string giving the new intent name
+    **/
+    void setIntentName(const std::string& name) {
+        if (image != nullptr)
+        {
+            constexpr size_t intentNameLength = sizeof(image->intent_name) / sizeof(*image->intent_name);
+            std::fill_n(image->intent_name, intentNameLength, 0);
+            std::copy_n(name.begin(), std::min(name.length(), intentNameLength - 1), image->intent_name);
+        }
+    }
+
     /**
      * Write the image to a NIfTI-1 file
      * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz")

From e1b7ad8331771884d0d265ea7c8af0a19a73a943 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 4 Sep 2023 16:50:49 +0100
Subject: [PATCH 197/314] Refactorisations

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_aladin.cpp                       |   2 +-
 reg-apps/reg_benchmark.cpp                    |  66 +++----
 reg-apps/reg_f3d.cpp                          |   2 +-
 reg-apps/reg_ppcnr.cpp                        |   2 +-
 reg-apps/reg_resample.cpp                     |   2 +-
 reg-apps/reg_tools.cpp                        |   6 +-
 reg-apps/reg_transform.cpp                    |   2 +-
 reg-io/RNifti/NiftiImage.h                    |  10 +-
 reg-io/RNifti/NiftiImage_impl.h               |  44 ++---
 reg-io/nrrd/reg_nrrd.cpp                      |   2 +-
 reg-lib/Compute.cpp                           |   2 +-
 reg-lib/Compute.h                             |   2 +-
 reg-lib/cl/resampleKernel.cl                  |  16 +-
 reg-lib/cpu/_reg_localTrans.cpp               | 180 ++++++++----------
 reg-lib/cpu/_reg_localTrans.h                 |   8 +-
 reg-lib/cpu/_reg_resampling.cpp               |   2 +-
 reg-lib/cpu/_reg_tools.cpp                    |  11 +-
 reg-lib/cuda/BlockSize.hpp                    |  12 +-
 reg-lib/cuda/CMakeLists.txt                   |   1 +
 .../cuda/CudaAffineDeformationFieldKernel.cpp |   6 +-
 reg-lib/cuda/CudaCommon.cu                    |   2 -
 reg-lib/cuda/CudaCommon.hpp                   |   9 +
 reg-lib/cuda/CudaCompute.h                    |   2 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         |   2 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      |   6 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |  12 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |   9 +-
 .../cuda/_reg_globalTransformation_kernels.cu |  12 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  78 ++++----
 reg-lib/cuda/_reg_localTransformation_gpu.h   |  26 +--
 .../cuda/_reg_localTransformation_kernels.cu  |  28 +--
 reg-lib/cuda/_reg_nmi_gpu.cu                  |   1 -
 reg-lib/cuda/_reg_optimiser_gpu.cu            |  24 ++-
 reg-lib/cuda/_reg_optimiser_gpu.h             |  21 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |   1 -
 reg-lib/cuda/_reg_tools_gpu.cu                |  16 +-
 reg-lib/cuda/_reg_tools_gpu.h                 |  14 --
 reg-lib/cuda/affineDeformationKernel.cu       |  47 ++---
 reg-lib/cuda/affineDeformationKernel.h        |   2 +-
 reg-lib/cuda/optimizeKernel.cu                |   4 -
 reg-lib/cuda/resampleKernel.cu                |  15 +-
 42 files changed, 321 insertions(+), 390 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 66953656..47eb669b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-315
+316
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index cb5f4162..9619dcec 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -460,7 +460,7 @@ int main(int argc, char **argv) {
 
     time_t end;
     time(&end);
-    const int minutes = static_cast<int>(floorf((end - start) / 60.0f));
+    const int minutes = Floor((end - start) / 60.0f);
     const int seconds = static_cast<int>(end - start) - 60 * minutes;
     NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
     NR_VERBOSE_APP("Have a good day!");
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index fe90b400..8606f563 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -122,9 +122,9 @@ int main(int argc, char **argv)
 
    // A control point image is created
    dim_img[0]=5;
-   dim_img[1]=(int)floor(targetImage->nx*targetImage->dx/gridSpacing)+4;
-   dim_img[2]=(int)floor(targetImage->ny*targetImage->dy/gridSpacing)+4;
-   dim_img[3]=(int)floor(targetImage->nz*targetImage->dz/gridSpacing)+4;
+   dim_img[1]=Floor(targetImage->nx*targetImage->dx/gridSpacing)+4;
+   dim_img[2]=Floor(targetImage->ny*targetImage->dy/gridSpacing)+4;
+   dim_img[3]=Floor(targetImage->nz*targetImage->dz/gridSpacing)+4;
    dim_img[5]=3;
    dim_img[4]=dim_img[6]=dim_img[7]=1;
    nifti_image *controlPointImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
@@ -245,7 +245,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf( "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -255,13 +255,13 @@ int main(int argc, char **argv)
          time(&start);
          for(int i=0; i<maxIt; ++i)
          {
-            reg_affine_positionField_gpu(   affineTransformation,
-                                            targetImage,
-                                            &deformationFieldImageArray_d);
+            reg_affine_getDeformationField_gpu(affineTransformation,
+                                               targetImage,
+                                               &deformationFieldImageArray_d);
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -314,7 +314,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -345,7 +345,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds);
@@ -362,7 +362,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds);
@@ -395,7 +395,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -416,7 +416,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -447,7 +447,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -466,7 +466,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -525,7 +525,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -558,7 +558,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -588,9 +588,9 @@ int main(int argc, char **argv)
       maxIt=10000 / dimension;
 //        maxIt=1;
       int smoothingRadius[3];
-      smoothingRadius[0] = (int)floor( 2.0*controlPointImage->dx/targetImage->dx );
-      smoothingRadius[1] = (int)floor( 2.0*controlPointImage->dy/targetImage->dy );
-      smoothingRadius[2] = (int)floor( 2.0*controlPointImage->dz/targetImage->dz );
+      smoothingRadius[0] = Floor( 2.0*controlPointImage->dx/targetImage->dx );
+      smoothingRadius[1] = Floor( 2.0*controlPointImage->dy/targetImage->dy );
+      smoothingRadius[2] = Floor( 2.0*controlPointImage->dz/targetImage->dz );
       time(&start);
       for(int i=0; i<maxIt; ++i)
       {
@@ -599,7 +599,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -620,7 +620,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -650,7 +650,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -665,7 +665,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -690,7 +690,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -708,7 +708,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -729,7 +729,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -743,7 +743,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -764,7 +764,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -778,7 +778,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -831,7 +831,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = (int)floorf(float(cpuTime)/60.0f);
+      minutes = Floor(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -852,7 +852,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = (int)floorf(float(gpuTime)/60.0f);
+         minutes = Floor(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 3ca1257c..28fc968c 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -710,7 +710,7 @@ int main(int argc, char **argv) {
 
     time_t end;
     time(&end);
-    const int minutes = static_cast<int>(floorf((end - start) / 60.0f));
+    const int minutes = Floor((end - start) / 60.0f);
     const int seconds = static_cast<int>(end - start) - 60 * minutes;
     NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
     NR_VERBOSE_APP("Have a good day!");
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index efc7268a..760a4d45 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -963,7 +963,7 @@ int main(int argc, char **argv)
 
    time_t end;
    time( &end );
-   int minutes = (int)floorf(float(end-start)/60.0f);
+   int minutes = Floor(float(end-start)/60.0f);
    int seconds = (int)(end-start - 60*minutes);
    NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
    if(flag->locality)
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 366cb4d5..9f42b089 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -473,7 +473,7 @@ int main(int argc, char **argv)
                                   param->interpolation,
                                   param->paddingValue,
                                   jacobian,
-                                  (char)round(param->PSF_Algorithm));
+                                  (char)Round(param->PSF_Algorithm));
             NR_DEBUG("PSF resampling completed");
             free(jacobian);
          }
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 3bcac2f6..002686c6 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -806,10 +806,10 @@ int main(int argc, char **argv)
         // Define the size of the new image
         int newDim[8];
         for(size_t i=0; i<8; ++i) newDim[i]=image->dim[i];
-        newDim[1]=(int)ceilf((float)image->dim[1]*image->pixdim[1]/param->pixdimX);
-        newDim[2]=(int)ceilf((float)image->dim[2]*image->pixdim[2]/param->pixdimY);
+        newDim[1]=Ceil((float)image->dim[1]*image->pixdim[1]/param->pixdimX);
+        newDim[2]=Ceil((float)image->dim[2]*image->pixdim[2]/param->pixdimY);
         if(image->nz>1)
-            newDim[3]=(int)ceilf((float)image->dim[3]*image->pixdim[3]/param->pixdimZ);
+            newDim[3]=Ceil((float)image->dim[3]*image->pixdim[3]/param->pixdimZ);
         // Create the new image
         nifti_image *newImg=nifti_make_new_nim(newDim,image->datatype,true);
         newImg->pixdim[1]=newImg->dx=param->pixdimX;
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index fa91583f..0bf20051 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -538,7 +538,7 @@ int main(int argc, char **argv)
       }
       // Save the generated transformation
       reg_io_WriteImageFile(outputTransformationImage,param->outputTransName);
-      switch(static_cast<int>(round(outputTransformationImage->intent_p1)))
+      switch(Round(outputTransformationImage->intent_p1))
       {
       case DEF_FIELD:
          NR_INFO("The deformation field has been saved as:");
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index aacc0bab..68dfcceb 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1365,15 +1365,15 @@ class NiftiImage
 
     /**
      * Modify the pixel dimensions, and potentially the xform matrices to match
-     * @param pixdim Vector of new pixel dimensions
+     * @param pixDims Vector of new pixel dimensions
     **/
-    void updatePixdim (const std::vector<pixdim_t> &pixdim);
+    void updatePixDim (const std::vector<pixdim_t> &pixDims);
 
     /**
      * Modify the pixel dimension units
-     * @param pixunits Vector of new pixel units, specified using their standard abbreviations
+     * @param pixUnits Vector of new pixel units, specified using their standard abbreviations
     **/
-    void setPixunits (const std::vector<std::string> &pixunits);
+    void setPixUnits (const std::vector<std::string> &pixUnits);
 
 public:
     /**
@@ -1668,7 +1668,7 @@ class NiftiImage
      * Set a pixel dimension of the image
      * @param dim The dimension to set
      * @param value The new value of the dimension
-    */
+    **/
     void setPixDim (const Dim dim, const pixdim_t value)
     {
         if (image == nullptr)
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 44085013..6ae2866c 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -989,7 +989,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
         this->image->pixdim[i+1] = std::abs(pixdimVector[i]);
 
     const std::vector<std::string> pixunitsVector = mriImage.field("voxelDimUnits");
-    setPixunits(pixunitsVector);
+    setPixUnits(pixunitsVector);
 
     if (xform.rows() != 4 || xform.cols() != 4)
         this->image->qform_code = this->image->sform_code = 0;
@@ -1074,7 +1074,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c
     if (object.hasAttribute("pixunits"))
     {
         const std::vector<std::string> pixunitsVector = object.attr("pixunits");
-        setPixunits(pixunitsVector);
+        setPixUnits(pixunitsVector);
     }
 }
 
@@ -1303,23 +1303,23 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
     RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)", RNIFTI_NIFTILIB_VERSION, this->image);
 }
 
-inline void NiftiImage::updatePixdim (const std::vector<pixdim_t> &pixdim)
+inline void NiftiImage::updatePixDim (const std::vector<pixdim_t> &pixDims)
 {
     const int nDims = image->dim[0];
-    const std::vector<pixdim_t> origPixdim(image->pixdim+1, image->pixdim+4);
+    const std::vector<pixdim_t> origPixDims(image->pixdim+1, image->pixdim+4);
 
     for (int i=1; i<8; i++)
         image->pixdim[i] = 0.0;
 
-    const int pixdimLength = static_cast<int>(pixdim.size());
+    const int pixdimLength = static_cast<int>(pixDims.size());
     for (int i=0; i<std::min(pixdimLength,nDims); i++)
-        image->pixdim[i+1] = pixdim[i];
+        image->pixdim[i+1] = pixDims[i];
 
-    if (!std::equal(origPixdim.begin(), origPixdim.begin() + std::min(3,nDims), pixdim.begin()))
+    if (!std::equal(origPixDims.begin(), origPixDims.begin() + std::min(3,nDims), pixDims.begin()))
     {
         Xform::Matrix scaleMatrix = Xform::Matrix::eye();
         for (int i=0; i<std::min(pixdimLength,3); i++)
-            scaleMatrix(i,i) = pixdim[i] / origPixdim[i];
+            scaleMatrix(i,i) = pixDims[i] / origPixDims[i];
 
         if (image->qform_code > 0)
             this->qform() = qform().matrix() * scaleMatrix;
@@ -1328,27 +1328,27 @@ inline void NiftiImage::updatePixdim (const std::vector<pixdim_t> &pixdim)
     }
 }
 
-inline void NiftiImage::setPixunits (const std::vector<std::string> &pixunits)
+inline void NiftiImage::setPixUnits (const std::vector<std::string> &pixUnits)
 {
-    for (size_t i=0; i<pixunits.size(); i++)
+    for (size_t i=0; i<pixUnits.size(); i++)
     {
-        if (pixunits[i] == "m")
+        if (pixUnits[i] == "m")
             image->xyz_units = NIFTI_UNITS_METER;
-        else if (pixunits[i] == "mm")
+        else if (pixUnits[i] == "mm")
             image->xyz_units = NIFTI_UNITS_MM;
-        else if (pixunits[i] == "um")
+        else if (pixUnits[i] == "um")
             image->xyz_units = NIFTI_UNITS_MICRON;
-        else if (pixunits[i] == "s")
+        else if (pixUnits[i] == "s")
             image->time_units = NIFTI_UNITS_SEC;
-        else if (pixunits[i] == "ms")
+        else if (pixUnits[i] == "ms")
             image->time_units = NIFTI_UNITS_MSEC;
-        else if (pixunits[i] == "us")
+        else if (pixUnits[i] == "us")
             image->time_units = NIFTI_UNITS_USEC;
-        else if (pixunits[i] == "Hz")
+        else if (pixUnits[i] == "Hz")
             image->time_units = NIFTI_UNITS_HZ;
-        else if (pixunits[i] == "ppm")
+        else if (pixUnits[i] == "ppm")
             image->time_units = NIFTI_UNITS_PPM;
-        else if (pixunits[i] == "rad/s")
+        else if (pixUnits[i] == "rad/s")
             image->time_units = NIFTI_UNITS_RADS;
     }
 }
@@ -1366,7 +1366,7 @@ inline NiftiImage & NiftiImage::rescale (const std::vector<pixdim_t> &scales)
         }
     }
 
-    updatePixdim(pixdim);
+    updatePixDim(pixdim);
 
     // Data vector is now the wrong size, so drop it
 #if RNIFTI_NIFTILIB_VERSION == 1
@@ -1685,13 +1685,13 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object)
         if (object.hasAttribute("pixdim"))
         {
             const std::vector<pixdim_t> pixdimVector = object.attr("pixdim");
-            updatePixdim(pixdimVector);
+            updatePixDim(pixdimVector);
         }
 
         if (object.hasAttribute("pixunits"))
         {
             const std::vector<std::string> pixunitsVector = object.attr("pixunits");
-            setPixunits(pixunitsVector);
+            setPixUnits(pixunitsVector);
         }
 
         // This library function clobbers dim[0] if the last dimension is unitary; we undo that here
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 60b79416..225d6f11 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -205,7 +205,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
       if(niiImage->ndim>=3)
          qform_orientation_matrix.m[2][3]=niiImage->qoffset_z=nrrdImage->spaceOrigin[2];
 
-      // Flipp the orientation to fit ITK's filters
+      // Flip the orientation to fit ITK's filters
       qform_orientation_matrix.m[0][0] *= -1.0f;
       qform_orientation_matrix.m[1][1] *= -1.0f;
 
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 0433ee2c..6d3d02a5 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -222,7 +222,7 @@ void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     }, NiftiImage::getFloatingDataType(controlPointGrid));
 }
 /* *************************************************************** */
-void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
+void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     reg_spline_getDefFieldFromVelocityGrid(con.GetControlPointGrid(),
                                            con.GetDeformationField(),
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index b44063f3..3cef7df7 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -25,7 +25,7 @@ class Compute {
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void SmoothGradient(float sigma);
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt);
-    virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber);
+    virtual void GetDefFieldFromVelocityGrid(const bool updateStepNumber);
     virtual void ConvolveVoxelBasedMeasureGradient(float weight);
     virtual void ExponentiateGradient(Content& conBw);
     virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl
index b1c1a468..3157c3cd 100755
--- a/reg-lib/cl/resampleKernel.cl
+++ b/reg-lib/cl/resampleKernel.cl
@@ -168,12 +168,6 @@ __inline real_t interpLoop3D(__global float* floatingIntensity,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-__inline int cl_reg_floor(real_t a)
-{
-    return a > 0.0 ? (int)a : (int)(a - 1);
-}
-/* *************************************************************** */
-/* *************************************************************** */
 __inline void reg_mat44_mul_cl(__global float const* mat,
     float const* in,
     float *out)
@@ -241,8 +235,8 @@ __kernel void ResampleImage2D(__global float* floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul_cl(sourceIJKMatrix, world, position);
 
-                previous[0] = cl_reg_floor(position[0]);
-                previous[1] = cl_reg_floor(position[1]);
+                previous[0] = Floor(position[0]);
+                previous[1] = Floor(position[1]);
 
                 relative[0] = (real_t)position[0] - (real_t)(previous[0]);
                 relative[1] = (real_t)position[1] - (real_t)(previous[1]);
@@ -333,9 +327,9 @@ __kernel void ResampleImage3D(__global float* floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul_cl(sourceIJKMatrix, world, position);
 
-                previous[0] = cl_reg_floor(position[0]);
-                previous[1] = cl_reg_floor(position[1]);
-                previous[2] = cl_reg_floor(position[2]);
+                previous[0] = Floor(position[0]);
+                previous[1] = Floor(position[1]);
+                previous[2] = Floor(position[2]);
 
                 relative[0] = (real_t)position[0] - (real_t)(previous[0]);
                 relative[1] = (real_t)position[1] - (real_t)(previous[1]);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 8c9d099e..82c622ab 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -3480,7 +3480,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                              nifti_image *flowField) {
     // Check first if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
-        NR_FATAL_ERROR("The provide grid is not a velocity field");
+        NR_FATAL_ERROR("The provided grid is not a velocity field");
 
     // Initialise the flow field with an identity transformation
     reg_tools_multiplyValueToImage(flowField, flowField, 0.f);
@@ -3492,7 +3492,6 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     if (oldNumExt > 1)
         velocityFieldGrid->num_ext = 1;
 
-
     // Copy over the number of required squaring steps
     flowField->intent_p2 = velocityFieldGrid->intent_p2;
     // The initial flow field is generated using cubic B-Spline interpolation/approximation
@@ -3505,124 +3504,115 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     velocityFieldGrid->num_ext = oldNumExt;
 }
 /* *************************************************************** */
-void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
-                                                   nifti_image *deformationFieldImage,
-                                                   bool updateStepNumber) {
+void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowField,
+                                                   nifti_image *deformationField,
+                                                   const bool updateStepNumber) {
     // Check first if the velocity field is actually a velocity field
-    if (flowFieldImage->intent_p1 != DEF_VEL_FIELD)
-        NR_FATAL_ERROR("The provide field is not a velocity field");
+    if (flowField->intent_p1 != DEF_VEL_FIELD)
+        NR_FATAL_ERROR("The provided field is not a velocity field");
 
     // Remove the affine component from the flow field
-    nifti_image *affineOnly = nullptr;
-    if (flowFieldImage->num_ext > 0) {
-        if (flowFieldImage->ext_list[0].edata != nullptr) {
+    NiftiImage affineOnly;
+    if (flowField->num_ext > 0) {
+        if (flowField->ext_list[0].edata != nullptr) {
             // Create a field that contains the affine component only
-            affineOnly = nifti_dup(*deformationFieldImage, false);
-            reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[0].edata),
+            affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
+            reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
                                            affineOnly,
                                            false);
-            reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage);
+            reg_tools_subtractImageFromImage(flowField, affineOnly, flowField);
         }
-    } else reg_getDisplacementFromDeformation(flowFieldImage);
+    } else reg_getDisplacementFromDeformation(flowField);
 
     // Compute the number of scaling value to ensure unfolded transformation
     int squaringNumber = 1;
-    if (updateStepNumber || flowFieldImage->intent_p2 == 0) {
+    if (updateStepNumber || flowField->intent_p2 == 0) {
         // Check the largest value
-        float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1));
-        float temp = reg_tools_getMaxValue(flowFieldImage, -1);
+        float extrema = fabsf(reg_tools_getMinValue(flowField, -1));
+        float temp = reg_tools_getMaxValue(flowField, -1);
         extrema = extrema > temp ? extrema : temp;
         // Check the values for scaling purpose
         float maxLength;
-        if (deformationFieldImage->nz > 1)
-            // 0.2888675 = sqrt(0.5^2/3)
-            maxLength = 0.28f;
-        // 0.3535533 = sqrt(0.5^2/2)
-        else maxLength = 0.35f;
-        while (true) {
-            if ((extrema / pow(2.0f, squaringNumber)) >= maxLength)
-                squaringNumber++;
-            else break;
-        }
+        if (deformationField->nz > 1)
+            maxLength = 0.28f;  // sqrt(0.5^2/3)
+        else maxLength = 0.35f; // sqrt(0.5^2/2)
+        while (extrema / pow(2.0f, squaringNumber) >= maxLength)
+            squaringNumber++;
         // The minimal number of step is set to 6 by default
         squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
         // Set the number of squaring step in the flow field
-        if (fabs(flowFieldImage->intent_p2) != squaringNumber) {
-            NR_WARN("Changing from " << Round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) <<
+        if (fabs(flowField->intent_p2) != squaringNumber) {
+            NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
                     " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
         }
         // Update the number of squaring step required
-        if (flowFieldImage->intent_p2 >= 0)
-            flowFieldImage->intent_p2 = static_cast<float>(squaringNumber);
-        else flowFieldImage->intent_p2 = static_cast<float>(-squaringNumber);
-    } else squaringNumber = static_cast<int>(fabsf(flowFieldImage->intent_p2));
+        if (flowField->intent_p2 >= 0)
+            flowField->intent_p2 = static_cast<float>(squaringNumber);
+        else flowField->intent_p2 = static_cast<float>(-squaringNumber);
+    } else squaringNumber = static_cast<int>(fabsf(flowField->intent_p2));
 
     // The displacement field is scaled
-    float scalingValue = pow(2.0f, std::abs(static_cast<float>(squaringNumber)));
-    if (flowFieldImage->intent_p2 < 0)
+    float scalingValue = pow(2.0f, static_cast<float>(std::abs(squaringNumber)));
+    if (flowField->intent_p2 < 0)
         // backward deformation field is scaled down
-        reg_tools_divideValueToImage(flowFieldImage,
-                                     flowFieldImage,
+        reg_tools_divideValueToImage(flowField,
+                                     flowField,
                                      -scalingValue); // (/-scalingValue)
     else
         // forward deformation field is scaled down
-        reg_tools_divideValueToImage(flowFieldImage,
-                                     flowFieldImage,
+        reg_tools_divideValueToImage(flowField,
+                                     flowField,
                                      scalingValue); // (/scalingValue)
 
     // Conversion from displacement to deformation
-    reg_getDeformationFromDisplacement(flowFieldImage);
+    reg_getDeformationFromDisplacement(flowField);
 
     // The computed scaled deformation field is copied over
-    memcpy(deformationFieldImage->data, flowFieldImage->data,
-           deformationFieldImage->nvox * deformationFieldImage->nbyper);
+    memcpy(deformationField->data, flowField->data,
+           deformationField->nvox * deformationField->nbyper);
 
     // The deformation field is squared
     for (unsigned short i = 0; i < squaringNumber; ++i) {
         // The deformation field is applied to itself
-        reg_defField_compose(deformationFieldImage,
-                             flowFieldImage,
+        reg_defField_compose(deformationField,
+                             flowField,
                              nullptr);
         // The computed scaled deformation field is copied over
-        memcpy(deformationFieldImage->data, flowFieldImage->data,
-               deformationFieldImage->nvox * deformationFieldImage->nbyper);
+        memcpy(deformationField->data, flowField->data,
+               deformationField->nvox * deformationField->nbyper);
         NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
     }
     // The affine conponent of the transformation is restored
-    if (affineOnly != nullptr) {
-        reg_getDisplacementFromDeformation(deformationFieldImage);
-        reg_tools_addImageToImage(deformationFieldImage, affineOnly, deformationFieldImage);
-        nifti_image_free(affineOnly);
-        affineOnly = nullptr;
+    if (affineOnly) {
+        reg_getDisplacementFromDeformation(deformationField);
+        reg_tools_addImageToImage(deformationField, affineOnly, deformationField);
     }
-    deformationFieldImage->intent_p1 = DEF_FIELD;
-    deformationFieldImage->intent_p2 = 0;
+    deformationField->intent_p1 = DEF_FIELD;
+    deformationField->intent_p2 = 0;
     // If required an affine component is composed
-    if (flowFieldImage->num_ext > 1) {
-        reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[1].edata), deformationFieldImage, true);
-    }
+    if (flowField->num_ext > 1)
+        reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[1].edata), deformationField, true);
 }
 /* *************************************************************** */
 void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
-                                            nifti_image *deformationFieldImage,
-                                            bool updateStepNumber) {
+                                            nifti_image *deformationField,
+                                            const bool updateStepNumber) {
     // Clean any extension in the deformation field as it is unexpected
-    nifti_free_extensions(deformationFieldImage);
+    nifti_free_extensions(deformationField);
 
     // Check if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) {
         // Use the spline approximation to generate the deformation field
         reg_spline_getDeformationField(velocityFieldGrid,
-                                       deformationFieldImage,
+                                       deformationField,
                                        nullptr,
                                        false, // composition
                                        true); // bspline
     } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
-        nifti_image *flowField = nifti_dup(*deformationFieldImage, false);
+        NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
+        flowField.setIntentName("NREG_TRANS"s);
         flowField->intent_code = NIFTI_INTENT_VECTOR;
-        memset(flowField->intent_name, 0, 16);
-        strcpy(flowField->intent_name, "NREG_TRANS");
         flowField->intent_p1 = DEF_VEL_FIELD;
         flowField->intent_p2 = velocityFieldGrid->intent_p2;
         if (velocityFieldGrid->num_ext > 0)
@@ -3631,40 +3621,38 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
         // Generate the velocity field
         reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField);
         // Exponentiate the flow field
-        reg_defField_getDeformationFieldFromFlowField(flowField, deformationFieldImage, updateStepNumber);
+        reg_defField_getDeformationFieldFromFlowField(flowField, deformationField, updateStepNumber);
         // Update the number of step required. No action otherwise
         velocityFieldGrid->intent_p2 = flowField->intent_p2;
-        // Deallocate the allocated flow field
-        nifti_image_free(flowField);
     } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
 void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
-                                                   nifti_image **deformationFieldImage) {
+                                                   nifti_image **deformationField) {
     // Check if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
-        nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false);
-        flowFieldImage->intent_code = NIFTI_INTENT_VECTOR;
-        memset(flowFieldImage->intent_name, 0, 16);
-        strcpy(flowFieldImage->intent_name, "NREG_TRANS");
-        flowFieldImage->intent_p1 = DEF_VEL_FIELD;
-        flowFieldImage->intent_p2 = velocityFieldGrid->intent_p2;
-        if (velocityFieldGrid->num_ext > 0 && flowFieldImage->ext_list == nullptr)
-            nifti_copy_extensions(flowFieldImage, velocityFieldGrid);
+        nifti_image *flowField = nifti_dup(*deformationField[0], false);
+        flowField->intent_code = NIFTI_INTENT_VECTOR;
+        memset(flowField->intent_name, 0, 16);
+        strcpy(flowField->intent_name, "NREG_TRANS");
+        flowField->intent_p1 = DEF_VEL_FIELD;
+        flowField->intent_p2 = velocityFieldGrid->intent_p2;
+        if (velocityFieldGrid->num_ext > 0 && flowField->ext_list == nullptr)
+            nifti_copy_extensions(flowField, velocityFieldGrid);
 
         // Generate the velocity field
-        reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowFieldImage);
+        reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField);
         // Remove the affine component from the flow field
         nifti_image *affineOnly = nullptr;
-        if (flowFieldImage->num_ext > 0) {
-            if (flowFieldImage->ext_list[0].edata != nullptr) {
+        if (flowField->num_ext > 0) {
+            if (flowField->ext_list[0].edata != nullptr) {
                 // Create a field that contains the affine component only
-                affineOnly = nifti_dup(*deformationFieldImage[0], false);
-                reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowFieldImage->ext_list[0].edata), affineOnly, false);
-                reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage);
+                affineOnly = nifti_dup(*deformationField[0], false);
+                reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata), affineOnly, false);
+                reg_tools_subtractImageFromImage(flowField, affineOnly, flowField);
             }
-        } else reg_getDisplacementFromDeformation(flowFieldImage);
+        } else reg_getDisplacementFromDeformation(flowField);
 
         // Compute the number of scaling value to ensure unfolded transformation
         int squaringNumber = static_cast<int>(fabsf(velocityFieldGrid->intent_p2));
@@ -3673,36 +3661,36 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
         float scalingValue = pow(2.0f, std::abs((float)squaringNumber));
         if (velocityFieldGrid->intent_p2 < 0)
             // backward deformation field is scaled down
-            reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], -scalingValue);
+            reg_tools_divideValueToImage(flowField, deformationField[0], -scalingValue);
         else
             // forward deformation field is scaled down
-            reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], scalingValue);
+            reg_tools_divideValueToImage(flowField, deformationField[0], scalingValue);
 
         // Deallocate the allocated flow field
-        nifti_image_free(flowFieldImage);
-        flowFieldImage = nullptr;
+        nifti_image_free(flowField);
+        flowField = nullptr;
 
         // Conversion from displacement to deformation
-        reg_getDeformationFromDisplacement(deformationFieldImage[0]);
+        reg_getDeformationFromDisplacement(deformationField[0]);
 
         // The deformation field is squared
         for (unsigned short i = 0; i < squaringNumber; ++i) {
             // The computed scaled deformation field is copied over
-            memcpy(deformationFieldImage[i + 1]->data, deformationFieldImage[i]->data,
-                   deformationFieldImage[i]->nvox * deformationFieldImage[i]->nbyper);
+            memcpy(deformationField[i + 1]->data, deformationField[i]->data,
+                   deformationField[i]->nvox * deformationField[i]->nbyper);
             // The deformation field is applied to itself
-            reg_defField_compose(deformationFieldImage[i], // to apply
-                                 deformationFieldImage[i + 1], // to update
+            reg_defField_compose(deformationField[i], // to apply
+                                 deformationField[i + 1], // to update
                                  nullptr);
             NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
         }
         // The affine conponent of the transformation is restored
         if (affineOnly != nullptr) {
             for (unsigned short i = 0; i <= squaringNumber; ++i) {
-                reg_getDisplacementFromDeformation(deformationFieldImage[i]);
-                reg_tools_addImageToImage(deformationFieldImage[i], affineOnly, deformationFieldImage[i]);
-                deformationFieldImage[i]->intent_p1 = DEF_FIELD;
-                deformationFieldImage[i]->intent_p2 = 0;
+                reg_getDisplacementFromDeformation(deformationField[i]);
+                reg_tools_addImageToImage(deformationField[i], affineOnly, deformationField[i]);
+                deformationField[i]->intent_p1 = DEF_FIELD;
+                deformationField[i]->intent_p2 = 0;
             }
             nifti_image_free(affineOnly);
             affineOnly = nullptr;
@@ -3711,7 +3699,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
         if (velocityFieldGrid->num_ext > 1) {
             for (unsigned short i = 0; i <= squaringNumber; ++i) {
                 reg_affine_getDeformationField(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
-                                               deformationFieldImage[i],
+                                               deformationField[i],
                                                true);
             }
         }
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index fd1ded7f..6a2a7a69 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -152,7 +152,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField,
 extern "C++"
 void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
                                                    nifti_image *deformationFieldImage,
-                                                   bool updateStepNumber);
+                                                   const bool updateStepNumber);
 /* *************************************************************** */
 /** @brief The deformation field (img2) is computed by integrating
  * a velocity Grid (img1)
@@ -164,7 +164,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
 extern "C++"
 void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                             nifti_image *deformationFieldImage,
-                                            bool updateStepNumber);
+                                            const bool updateStepNumber);
 /* *************************************************************** */
 extern "C++"
 void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
@@ -181,12 +181,12 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
 /* *********************************************** */
 
 /* *************************************************************** */
-/** @brief This function compute the BCH update using an initial verlocity field
+/** @brief This function compute the BCH update using an initial velocity field
  * and its gradient.
  * @param img1 Image that contains the velocity field parametrisation
  * This image is updated
  * @param img2 This image contains the gradient to use
- * @param type The type encodes the number of component of the serie
+ * @param type The type encodes the number of component of the series
  * to be considered:
  * 0 - w=u+v
  * 1 - w=u+v+0.5*[u,v]
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 1408df36..48c89449 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -2654,7 +2654,7 @@ nifti_image* reg_makeIsotropic(nifti_image *img, int inter) {
     for (size_t i = 0; i < 8; ++i) newDim[i] = img->dim[i];
     for (size_t i = 1; i < 4; ++i) {
         if (i < static_cast<size_t>(img->dim[0] + 1))
-            newDim[i] = (int)ceilf(img->dim[i] * img->pixdim[i] / smallestPixDim);
+            newDim[i] = Ceil(img->dim[i] * img->pixdim[i] / smallestPixDim);
     }
     // Create the new image
     nifti_image *newImg = nifti_make_new_nim(newDim, img->datatype, true);
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 753e158c..59aa73ba 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1886,13 +1886,18 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 
+    // The min/max function
+    const DataType& (*minMax)(const DataType&, const DataType&);
+    if (calcMin) minMax = std::min<DataType>;
+    else minMax = std::max<DataType>;
+
     for (int time = 0; time < image->nt; ++time) {
         if (time == timepoint || timepoint == -1) {
             for (int u = 0; u < image->nu; ++u) {
                 const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber];
                 for (size_t i = 0; i < voxelNumber; ++i) {
                     DataType currentVal = (DataType)((float)currentVolumePtr[i] * sclSlope + image->scl_inter);
-                    retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue);
+                    retValue = minMax(currentVal, retValue);
                 }
             }
         }
@@ -2045,11 +2050,11 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin
         }
     }
 
-    // Define the reading and writting pointers
+    // Define the reading and writing pointers
     const DataType *inputPtr = static_cast<const DataType*>(image->data);
     DataType *outputPtr = static_cast<DataType*>(*outputArray);
 
-    // Copy the data and flipp axis if required
+    // Copy the data and flip axis if required
     for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) {
         size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv;
         for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) {
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 0970e365..46a880b3 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -30,7 +30,7 @@ struct BlockSize {
     unsigned reg_getVoxelBasedNMIGradientUsingPW3D;
     unsigned reg_getVoxelBasedNMIGradientUsingPW2x2;
     /* _reg_globalTransformation_gpu */
-    unsigned reg_affine_deformationField;
+    unsigned reg_affine_getDeformationField;
     /* _reg_localTransformation_gpu */
     unsigned reg_spline_getDeformationField2D;
     unsigned reg_spline_getDeformationField3D;
@@ -70,7 +70,7 @@ struct BlockSize {
     unsigned reg_ApplyConvolutionWindowAlongX;
     unsigned reg_ApplyConvolutionWindowAlongY;
     unsigned reg_ApplyConvolutionWindowAlongZ;
-    unsigned reg_arithmetic;
+    unsigned Arithmetic;
     /* _reg_resampling_gpu */
     unsigned reg_resampleImage2D;
     unsigned reg_resampleImage3D;
@@ -95,7 +95,7 @@ struct BlockSize100: public BlockSize {
         reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem
         reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem
         /* _reg_globalTransformation_gpu */
-        reg_affine_deformationField = 512; // 16 reg - 24 smem
+        reg_affine_getDeformationField = 512; // 16 reg - 24 smem
         /* _reg_localTransformation_gpu */
         reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
         reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
@@ -135,7 +135,7 @@ struct BlockSize100: public BlockSize {
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
-        reg_arithmetic = 384; // 5 reg - 24 smem
+        Arithmetic = 384; // 5 reg - 24 smem
         /* _reg_resampling_gpu */
         reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem
         reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
@@ -162,7 +162,7 @@ struct BlockSize300: public BlockSize {
         reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg
         reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg
         /* _reg_globalTransformation_gpu */
-        reg_affine_deformationField = 1024; // 23 reg
+        reg_affine_getDeformationField = 1024; // 23 reg
         /* _reg_localTransformation_gpu */
         reg_spline_getDeformationField2D = 768; // 34 reg
         reg_spline_getDeformationField3D = 768; // 34 reg
@@ -202,7 +202,7 @@ struct BlockSize300: public BlockSize {
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
-        reg_arithmetic = 1024; //
+        Arithmetic = 1024; //
         /* _reg_resampling_gpu */
         reg_resampleImage2D = 1024; // 23 reg
         reg_resampleImage3D = 1024; // 24 reg
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 9c66607d..ccedd8ff 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -80,6 +80,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     ../AladinContent.cpp
     _reg_resampling_gpu.cu
     _reg_tools_gpu.cu
+    _reg_globalTransformation_gpu.cu
     _reg_localTransformation_gpu.cu
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
index 5912fc96..e1d5f0a0 100644
--- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp
@@ -18,9 +18,9 @@ CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(Content *conI
 void CudaAffineDeformationFieldKernel::Calculate(bool compose) {
    launchAffine(this->affineTransformation,
                 this->deformationFieldImage,
-                &deformationFieldArray_d,
-                &mask_d,
-                &transformationMatrix_d,
+                deformationFieldArray_d,
+                mask_d,
+                transformationMatrix_d,
                 compose);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu
index 870091c3..387dabad 100644
--- a/reg-lib/cuda/CudaCommon.cu
+++ b/reg-lib/cuda/CudaCommon.cu
@@ -10,8 +10,6 @@
  */
 
 #include "CudaCommon.hpp"
-#include <thrust/host_vector.h>
-#include <thrust/device_ptr.h>
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index 088b11f2..c94b6313 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -11,6 +11,15 @@
 
 #include <cuda_runtime.h>
 #include <cuda.h>
+#include <thrust/device_ptr.h>
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/gather.h>
+#include <thrust/host_vector.h>
+#include <thrust/inner_product.h>
+#include <thrust/reduce.h>
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
 #include "_reg_tools.h"
 #include "CudaContext.hpp"
 
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 0e71b10e..eee743c0 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -23,7 +23,7 @@ class CudaCompute: public Compute {
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void SmoothGradient(float sigma) override;
     virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override;
-    virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override;
+    virtual void GetDefFieldFromVelocityGrid(const bool updateStepNumber) override;
     virtual void ConvolveVoxelBasedMeasureGradient(float weight) override;
     virtual void ExponentiateGradient(Content& conBw) override;
     virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index d4444b06..61d5e626 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -64,7 +64,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
                                        const bool& optimiseX,
                                        const bool& optimiseY,
                                        const bool& optimiseZ) {
-    const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned threads = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned blocks = static_cast<unsigned>(Ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
     const dim3 blockDims(threads, 1, 1);
     const dim3 gridDims(blocks, blocks, 1);
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 3a30f9af..6d9a4361 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -21,7 +21,7 @@ __device__ __inline__ float4 operator*(const float4& a, const float4& b) {
     return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
 }
 __device__ __inline__ float4 operator*(const float& a, const float4& b) {
-    return { a * b.x, a * b.y, a * b.z, 0.0f };
+    return { a * b.x, a * b.y, a * b.z, a * b.w };
 }
 /* *************************************************************** */
 __device__ __inline__ float2 operator/(const float2& a, const float2& b) {
@@ -38,7 +38,7 @@ __device__ __inline__ float2 operator+(const float2& a, const float2& b) {
     return { a.x + b.x, a.y + b.y };
 }
 __device__ __inline__ float4 operator+(const float4& a, const float4& b) {
-    return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f };
+    return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w };
 }
 __device__ __inline__ float3 operator+(const float3& a, const float3& b) {
     return { a.x + b.x, a.y + b.y, a.z + b.z };
@@ -48,7 +48,7 @@ __device__ __inline__ float3 operator-(const float3& a, const float3& b) {
     return { a.x - b.x, a.y - b.y, a.z - b.z };
 }
 __device__ __inline__ float4 operator-(const float4& a, const float4& b) {
-    return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f };
+    return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w };
 }
 /* *************************************************************** */
 __device__ __inline__ double2 operator+(const double2& a, const double2& b) {
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index cb7127bd..d42ff980 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_affineTransformation_gpu.cu
+ *  _reg_globalTransformation_gpu.cu
  *
  *
  *  Created by Marc Modat on 25/03/2009.
@@ -14,9 +14,9 @@
 #include "_reg_globalTransformation_kernels.cu"
 
 /* *************************************************************** */
-void reg_affine_positionField_gpu(const mat44 *affineMatrix,
-                                  const nifti_image *targetImage,
-                                  float4 *deformationFieldCuda) {
+void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
+                                        const nifti_image *targetImage,
+                                        float4 *deformationFieldCuda) {
     const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
     const size_t voxelNumber = targetImage->nvox;
 
@@ -27,11 +27,11 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix,
     // Affine * TargetMat is constant
     const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_getDeformationField;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_affine_deformationField_kernel<<<gridDims, blockDims>>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber);
+    reg_affine_getDeformationField_kernel<<<gridDims, blockDims>>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 3c748bfd..5d33b155 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -1,5 +1,5 @@
 /*
- *  _reg_affineTransformation.h
+ *  _reg_globalTransformation_gpu.h
  *
  *
  *  Created by Marc Modat on 25/03/2009.
@@ -14,7 +14,6 @@
 
 #include "CudaCommon.hpp"
 
-extern "C++"
-void reg_affine_positionField_gpu(const mat44 *affineMatrix,
-                                  const nifti_image *targetImage,
-                                  float4 *deformationFieldCuda);
+void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
+                                        const nifti_image *targetImage,
+                                        float4 *deformationFieldCuda);
diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
index bbb8b1ce..e74b7119 100755
--- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
@@ -13,10 +13,10 @@
 #include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-__global__ void reg_affine_deformationField_kernel(float4 *deformationField,
-                                                   const mat44 affineMatrix,
-                                                   const int3 imageSize,
-                                                   const unsigned voxelNumber) {
+__global__ void reg_affine_getDeformationField_kernel(float4 *deformationField,
+                                                      const mat44 affineMatrix,
+                                                      const int3 imageSize,
+                                                      const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -25,14 +25,14 @@ __global__ void reg_affine_deformationField_kernel(float4 *deformationField,
         reg_div_cuda(rem, imageSize.x, quot, rem);
         const int y = quot, x = rem;
 
-        /* The transformation is applied */
+        // The transformation is applied
         const float4 position = {
             affineMatrix.m[0][0] * x + affineMatrix.m[0][1] * y + affineMatrix.m[0][2] * z + affineMatrix.m[0][3],
             affineMatrix.m[1][0] * x + affineMatrix.m[1][1] * y + affineMatrix.m[1][2] * z + affineMatrix.m[1][3],
             affineMatrix.m[2][0] * x + affineMatrix.m[2][1] * y + affineMatrix.m[2][2] * z + affineMatrix.m[2][3],
             0.f
         };
-        /* the deformation field (real coordinates) is stored */
+        // The deformation field (real coordinates) is stored
         deformationField[tid] = position;
     }
 }
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 476b69b6..be602b82 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -19,8 +19,8 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         const float4 *controlPointImageCuda,
                                         float4 *deformationFieldCuda,
                                         const int *maskCuda,
-                                        const size_t& activeVoxelNumber,
-                                        const bool& bspline) {
+                                        const size_t activeVoxelNumber,
+                                        const bool bspline) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -35,7 +35,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
 
     if (referenceImage->nz > 1) {
         const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
@@ -50,7 +50,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 4 floats of shared memory are allocated per thread
@@ -81,7 +81,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c
         secondDerivativeValuesSize = 6 * controlPointGridSize;
         NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
         const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
@@ -91,7 +91,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c
         secondDerivativeValuesSize = 3 * controlPointGridSize;
         NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
         const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
@@ -106,7 +106,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c
                                                               secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxBendingEnergy3D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
@@ -114,7 +114,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxBendingEnergy2D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
@@ -148,7 +148,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
         secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4);
         NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
         const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
@@ -158,7 +158,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
         secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4);
         NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
         const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
@@ -172,7 +172,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
                                                               secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxBendingEnergyGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
@@ -181,7 +181,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxBendingEnergyGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
@@ -208,7 +208,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage
     // The Jacobian matrix is computed for every control point
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxJacobianValues3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -216,7 +216,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getApproxJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -245,7 +245,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
     // The Jacobian matrix is computed for every voxel
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->reg_spline_getJacobianValues3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
@@ -256,7 +256,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_spline_getJacobianValues2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_getJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -269,7 +269,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
 double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
                                              const nifti_image *controlPointImage,
                                              const float4 *controlPointImageCuda,
-                                             const bool& approx) {
+                                             const bool approx) {
     // The Jacobian matrices and determinants are computed
     float *jacobianMatricesCuda, *jacobianDetCuda;
     size_t jacNumber; double jacSum;
@@ -294,7 +294,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
 
     // The Jacobian determinant are squared and logged (might not be english but will do)
     const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues;
-    const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDetCuda, (unsigned)jacNumber);
@@ -310,8 +310,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
                                                    const nifti_image *controlPointImage,
                                                    const float4 *controlPointImageCuda,
                                                    float4 *transGradientCuda,
-                                                   const float& jacobianWeight,
-                                                   const bool& approx) {
+                                                   const float jacobianWeight,
+                                                   const bool approx) {
     auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
@@ -348,7 +348,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
     if (approx) {
         if (controlPointImage->nz > 1) {
             const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D;
-            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             reg_spline_computeApproxJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -357,7 +357,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
             const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D;
-            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             reg_spline_computeApproxJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -372,7 +372,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
                                                             controlPointImage->dz / referenceImage->dz);
         if (controlPointImage->nz > 1) {
             const unsigned blocks = blockSize->reg_spline_computeJacGradient3D;
-            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             reg_spline_computeJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -382,7 +382,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
             const unsigned blocks = blockSize->reg_spline_computeJacGradient2D;
-            const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             reg_spline_computeJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -399,7 +399,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
 double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                      const nifti_image *controlPointImage,
                                      float4 *controlPointImageCuda,
-                                     const bool& approx) {
+                                     const bool approx) {
     auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
@@ -429,7 +429,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize));
     NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice));
     const unsigned blocks = blockSize->reg_spline_logSquaredValues;
-    const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDet2Cuda, (unsigned)jacNumber);
@@ -460,7 +460,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                                              cudaChannelFormatKindFloat, 1);
     if (approx) {
         const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_approxCorrectFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
@@ -473,7 +473,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
         const unsigned blocks = blockSize->reg_spline_correctFolding3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_spline_correctFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
@@ -487,14 +487,14 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool& reverse = false) {
+void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool reverse = false) {
     // Bind the qform or sform
-    const mat44 affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz;
+    const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
-    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
+    const int3 imageDim{ image->nx, image->ny, image->nz };
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement;
-    const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_getDeformationFromDisplacement3D_kernel<<<gridDims, blockDims>>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse);
@@ -552,18 +552,18 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPoint
 void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
                               float4 *deformationFieldCudaOut,
-                              const size_t& activeVoxelNumber) {
+                              const size_t activeVoxelNumber) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-    const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
-    const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
-    const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
+    const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz };
+    const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
+    const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
                                                              activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     if (deformationField->nz > 1) {
         const unsigned blocks = blockSize->reg_defField_compose3D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_defField_compose3D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
@@ -571,7 +571,7 @@ void reg_defField_compose_gpu(const nifti_image *deformationField,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_defField_compose2D;
-        const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         reg_defField_compose2D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
@@ -590,7 +590,7 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                                              voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
-    const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks));
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     reg_defField_getJacobianMatrix3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim,
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 40cfd892..aa8aee88 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -12,64 +12,52 @@
 
 #pragma once
 
-#include "CudaCommon.hpp"
-#include "_reg_maths.h"
 #include "_reg_tools_gpu.h"
-#include <limits>
 
 /* *************************************************************** */
-extern "C++"
 void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         const nifti_image *referenceImage,
                                         const float4 *controlPointImageCuda,
                                         float4 *deformationFieldCuda,
                                         const int *maskCuda,
-                                        const size_t& activeVoxelNumber,
-                                        const bool& bspline);
+                                        const size_t activeVoxelNumber,
+                                        const bool bspline);
 /* *************************************************************** */
-extern "C++"
 float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
                                          const float4 *controlPointImageCuda);
 /* *************************************************************** */
-extern "C++"
 void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage,
                                                 const float4 *controlPointImageCuda,
                                                 float4 *transGradientCuda,
                                                 float bendingEnergyWeight);
 /* *************************************************************** */
-extern "C++"
 double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
                                              const nifti_image *controlPointImage,
                                              const float4 *controlPointImageCuda,
-                                             const bool& approx);
+                                             const bool approx);
 /* *************************************************************** */
-extern "C++"
 void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage,
                                                    const nifti_image *controlPointImage,
                                                    const float4 *controlPointImageCuda,
                                                    float4 *transGradientCuda,
-                                                   const float& jacobianWeight,
-                                                   const bool& approx);
+                                                   const float jacobianWeight,
+                                                   const bool approx);
 /* *************************************************************** */
-extern "C++"
 double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                      const nifti_image *controlPointImage,
                                      float4 *controlPointImageCuda,
-                                     const bool& approx);
+                                     const bool approx);
 /* *************************************************************** */
-extern "C++"
 void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage,
                                                  const nifti_image *deformationField,
                                                  const float4 *controlPointImageCuda,
                                                  float4 *deformationFieldCuda);
 /* *************************************************************** */
-extern "C++"
 void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
                               float4 *deformationFieldOutCuda,
-                              const size_t& activeVoxelNumber);
+                              const size_t activeVoxelNumber);
 /* *************************************************************** */
-extern "C++"
 void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                         const float4 *deformationFieldCuda,
                                         float *jacobianMatricesCuda);
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 7dbb89cf..0a6719fe 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -865,7 +865,7 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
         const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
-        const int2 nodeAnte = { (int)floorf((float)x / controlPointSpacing.x), (int)floorf((float)y / controlPointSpacing.y) };
+        const int2 nodeAnte = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) };
 
         float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
 
@@ -937,9 +937,9 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
 
         // the "nearest previous" node is determined [0,0,0]
         const int3 nodeAnte = {
-            (int)floorf((float)x / controlPointSpacing.x),
-            (int)floorf((float)y / controlPointSpacing.y),
-            (int)floorf((float)z / controlPointSpacing.z)
+            Floor((float)x / controlPointSpacing.x),
+            Floor((float)y / controlPointSpacing.y),
+            Floor((float)z / controlPointSpacing.z)
         };
 
         extern __shared__ float yFirst[];
@@ -1193,14 +1193,14 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient,
         const int y = quot, x = rem;
 
         float2 jacobianGradient{};
-        for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+        for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
             if (-1 < pixelY && pixelY < referenceImageDim.y) {
                 const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                 float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                 float yBasis, yFirst;
                 GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                     if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
                         const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                         basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
@@ -1250,21 +1250,21 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient,
         const int y = quot, x = rem;
 
         float3 jacobianGradient{};
-        for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ <= (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+        for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
             if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
                 const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z);
                 float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre;
                 float zBasis, zFirst;
                 GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst);
 
-                for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
                     if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) {
                         const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                         basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                         float yBasis, yFirst;
                         GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                        for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                        for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                             if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
                                 const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                                 basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
@@ -1396,11 +1396,11 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
         const int y = quot, x = rem;
 
         float3 foldingCorrection{};
-        for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ < (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+        for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
             if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
-                for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY < (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
                     if (-1 < pixelY && pixelY < referenceImageDim.y) {
-                        for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX < (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                        for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                             if (-1 < pixelX && pixelX < referenceImageDim.x) {
                                 int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX;
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
@@ -1500,7 +1500,7 @@ __global__ void reg_defField_compose2D_kernel(float4 *deformationField,
         };
 
         // Linear interpolation
-        const int2 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y) };
+        const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) };
         float relX[2], relY[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
@@ -1544,7 +1544,7 @@ __global__ void reg_defField_compose3D_kernel(float4 *deformationField,
         };
 
         // Linear interpolation
-        const int3 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y), (int)floorf(voxelPosition.z) };
+        const int3 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y), Floor(voxelPosition.z) };
         float relX[2], relY[2], relZ[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 3538edf4..d6d3d7b8 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -12,7 +12,6 @@
 
 #include "_reg_nmi_gpu.h"
 #include "_reg_nmi_kernels.cu"
-#include <thrust/device_vector.h>
 
 /* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 2c1bcf0b..903ac197 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -1,9 +1,6 @@
 #include "_reg_optimiser_gpu.h"
 #include "_reg_optimiser_kernels.cu"
 #include "_reg_common_cuda_kernels.cu"
-#include <thrust/device_vector.h>
-#include <thrust/host_vector.h>
-#include <thrust/inner_product.h>
 
 /* *************************************************************** */
 reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
@@ -172,7 +169,7 @@ void reg_conjugateGradient_gpu::Perturbation(float length) {
 void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
-                                         const size_t& nVoxels) {
+                                         const size_t nVoxels) {
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
                                                           nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
@@ -195,12 +192,12 @@ struct Float2Sum {
 void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGCuda,
                                   float4 *conjugateHCuda,
-                                  const size_t& nVoxels,
-                                  const bool& isSymmetric,
+                                  const size_t nVoxels,
+                                  const bool isSymmetric,
                                   float4 *gradientImageBwCuda,
                                   float4 *conjugateGBwCuda,
                                   float4 *conjugateHBwCuda,
-                                  const size_t& nVoxelsBw) {
+                                  const size_t nVoxelsBw) {
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
                                                           nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear,
@@ -260,14 +257,14 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
     }
 }
 /* *************************************************************** */
-void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
+void reg_updateControlPointPosition_gpu(const size_t nVoxels,
                                         float4 *controlPointImageCuda,
                                         const float4 *bestControlPointCuda,
                                         const float4 *gradientImageCuda,
-                                        const float& scale,
-                                        const bool& optimiseX,
-                                        const bool& optimiseY,
-                                        const bool& optimiseZ) {
+                                        const float scale,
+                                        const bool optimiseX,
+                                        const bool optimiseY,
+                                        const bool optimiseZ) {
     auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear,
                                                              nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
@@ -277,7 +274,8 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
-    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
+    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture,
+                                                                   (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h
index 69e20f19..1950b463 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/_reg_optimiser_gpu.h
@@ -93,30 +93,27 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-extern "C++"
 void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
-                                         const size_t& nVoxels);
+                                         const size_t nVoxels);
 /* *************************************************************** */
-extern "C++"
 void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGCuda,
                                   float4 *conjugateHCuda,
-                                  const size_t& nVoxels,
-                                  const bool& isSymmetric,
+                                  const size_t nVoxels,
+                                  const bool isSymmetric,
                                   float4 *gradientImageBwCuda,
                                   float4 *conjugateGBwCuda,
                                   float4 *conjugateHBwCuda,
-                                  const size_t& nVoxelsBw);
+                                  const size_t nVoxelsBw);
 /* *************************************************************** */
-extern "C++"
-void reg_updateControlPointPosition_gpu(const size_t& nVoxels,
+void reg_updateControlPointPosition_gpu(const size_t nVoxels,
                                         float4 *controlPointImageCuda,
                                         const float4 *bestControlPointCuda,
                                         const float4 *gradientImageCuda,
-                                        const float& scale,
-                                        const bool& optimiseX,
-                                        const bool& optimiseY,
-                                        const bool& optimiseZ);
+                                        const float scale,
+                                        const bool optimiseX,
+                                        const bool optimiseY,
+                                        const bool optimiseZ);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 33973c5e..77dd8318 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -12,7 +12,6 @@
 
 #include "_reg_ssd_gpu.h"
 #include "_reg_ssd_kernels.cu"
-#include <thrust/device_vector.h>
 
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 181b66f7..6fef3795 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
 }
 /* *************************************************************** */
 void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float&
 }
 /* *************************************************************** */
 void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value
 }
 /* *************************************************************** */
 void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
 }
 /* *************************************************************** */
 void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -290,7 +290,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
 }
 /* *************************************************************** */
 void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic;
+    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
     const dim3 blockDims = dim3(blocks, 1, 1);
@@ -300,16 +300,16 @@ void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
 /* *************************************************************** */
 float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) {
     thrust::device_ptr<float> dptr(arrayCuda);
-    return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus<float>());
+    return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
 float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) {
     thrust::device_ptr<float> dptr(arrayCuda);
-    return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum<float>());
+    return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
 float reg_minReduction_gpu(float *arrayCuda, const size_t& size) {
     thrust::device_ptr<float> dptr(arrayCuda);
-    return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum<float>());
+    return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 947d8065..8872a365 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -14,11 +14,8 @@
 
 #include "CudaCommon.hpp"
 #include "_reg_tools.h"
-#include <thrust/device_ptr.h>
-#include <thrust/reduce.h>
 
 /* *************************************************************** */
-extern "C++"
 void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
                                       const nifti_image *voxelImage,
                                       float4 *nodeImageCuda,
@@ -26,43 +23,32 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
                                       float weight,
                                       const mat44 *voxelToMillimetre);
 /* *************************************************************** */
-extern "C++"
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
                                                     const nifti_image *controlPointImage,
                                                     float4 *nmiGradientCuda);
 /* *************************************************************** */
-extern "C++"
 void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
                                const float& sigma,
                                const bool axisToSmooth[8]);
 /* *************************************************************** */
-extern "C++"
 void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *smoothingRadius);
 /* *************************************************************** */
-extern "C++"
 void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
 /* *************************************************************** */
-extern "C++"
 void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
 /* *************************************************************** */
-extern "C++"
 void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-extern "C++"
 void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-extern "C++"
 void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count);
 /* *************************************************************** */
-extern "C++"
 float reg_sumReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
-extern "C++"
 float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
-extern "C++"
 float reg_minReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index d6cddd0b..9c3a5937 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -1,26 +1,5 @@
-#include <stdio.h>
-#include <assert.h>
-#include <cuda_runtime.h>
-#include <cuda.h>
-#include"_reg_resampling.h"
-#include"_reg_maths.h"
-#include "CudaCommon.hpp"
-#include"_reg_tools.h"
-#include"_reg_ReadWriteImage.h"
-#include <thrust/sort.h>
-#include <thrust/device_vector.h>
-#include <thrust/device_ptr.h>
-#include <thrust/gather.h>
-#include "affineDeformationKernel.h"
-//CUDA affine kernel
-/* *************************************************************** */
-__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned idx)
-{
-   position[idx] = (float) ((double) matrix[idx * 4 + 0] * voxel[0] +
-         (double) matrix[idx * 4 + 1] * voxel[1] +
-         (double) matrix[idx * 4 + 2] * voxel[2] +
-         (double) matrix[idx * 4 + 3]);
-}
+#include"_reg_tools_gpu.h"
+
 /* *************************************************************** */
 __device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx)
 {
@@ -31,20 +10,20 @@ __device__ __inline__ double getPosition(float* matrix, double* voxel, const uns
           (double)matrix[index];
 }
 /* *************************************************************** */
-__global__ void affineKernel(float* transformationMatrix,
-                             float* defField,
-                             int* mask,
+__global__ void affineKernel(float *transformationMatrix,
+                             float *defField,
+                             const int *mask,
                              const uint3 dims,
-                             const unsigned long voxelNumber,
+                             const unsigned voxelNumber,
                              const bool composition)
 {
    // Get the current coordinate
    const unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
    const unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
    const unsigned z = blockIdx.z * blockDim.z + threadIdx.z;
-   const unsigned long index = x + dims.x * (y + z * dims.y);
+   const unsigned index = x + dims.x * (y + z * dims.y);
 
-   if (z<dims.z && y<dims.y && x<dims.x &&  mask[index] >= 0)
+   if (z<dims.z && y<dims.y && x<dims.x && mask[index] >= 0)
    {
       double voxel[3];
       float *deformationFieldPtrX = &defField[index];
@@ -64,9 +43,9 @@ __global__ void affineKernel(float* transformationMatrix,
 /* *************************************************************** */
 void launchAffine(mat44 *affineTransformation,
                   nifti_image *deformationField,
-                  float **def_d,
-                  int **mask_d,
-                  float **trans_d,
+                  float *def_d,
+                  const int *mask_d,
+                  float *trans_d,
                   bool compose) {
 
    const unsigned xThreads = 8;
@@ -84,10 +63,10 @@ void launchAffine(mat44 *affineTransformation,
    const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz);
    mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix);
    mat44ToCptr(transformationMatrix, trans);
-   NR_CUDA_SAFE_CALL(cudaMemcpy(*trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice));
+   NR_CUDA_SAFE_CALL(cudaMemcpy(trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice));
    free(trans);
 
    uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz);
-   affineKernel<<<G1_b, B1_b>>>(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose);
+   affineKernel<<<G1_b, B1_b>>>(trans_d, def_d, mask_d, dims_d, (unsigned)NiftiImage::calcVoxelNumber(deformationField, 3), compose);
    NR_CUDA_CHECK_KERNEL(G1_b, B1_b);
 }
diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h
index 80466e59..ad55e735 100644
--- a/reg-lib/cuda/affineDeformationKernel.h
+++ b/reg-lib/cuda/affineDeformationKernel.h
@@ -2,4 +2,4 @@
 
 #include "RNifti.h"
 
-void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false);
\ No newline at end of file
+void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float* def_d, const int* mask_d, float* trans_d, bool compose = false);
\ No newline at end of file
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
index 82cb3c89..bc609b6b 100644
--- a/reg-lib/cuda/optimizeKernel.cu
+++ b/reg-lib/cuda/optimizeKernel.cu
@@ -3,10 +3,6 @@
 #include "cublas_v2.h"
 #include "cusolverDn.h"
 
-#include <thrust/device_vector.h>
-#include <thrust/device_ptr.h>
-#include <thrust/gather.h>
-#include <thrust/sort.h>
 #include <cmath>
 
 #include "_reg_maths.h"
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index c7e7d230..85656322 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -47,11 +47,6 @@ __device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, Da
    return;
 }
 /* *************************************************************** */
-__device__ __inline__ int cuda_reg_floor(double a)
-{
-   return (int) (floor(a));
-}
-/* *************************************************************** */
 template<class FieldTYPE>
 __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *basis)
 {
@@ -231,8 +226,8 @@ __global__ void ResampleImage2D(float* floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul_cuda<float>(sourceIJKMatrix, world, position);
 
-                previous[0] = cuda_reg_floor(position[0]);
-                previous[1] = cuda_reg_floor(position[1]);
+                previous[0] = Floor(position[0]);
+                previous[1] = Floor(position[1]);
 
                 relative[0] = (double)(position[0]) - (double)(previous[0]);
                 relative[1] = (double)(position[1]) - (double)(previous[1]);
@@ -324,9 +319,9 @@ __global__ void ResampleImage3D(float* floatingImage,
 				// real -> voxel; floating space
 				reg_mat44_mul_cuda<float>(sourceIJKMatrix, world, position);
 
-				previous[0] = cuda_reg_floor(position[0]);
-				previous[1] = cuda_reg_floor(position[1]);
-				previous[2] = cuda_reg_floor(position[2]);
+				previous[0] = Floor(position[0]);
+				previous[1] = Floor(position[1]);
+				previous[2] = Floor(position[2]);
 
                 relative[0] = (double)(position[0]) - (double)(previous[0]);
                 relative[1] = (double)(position[1]) - (double)(previous[1]);

From 43181d503aa34f68d1e19ffe86dc0846ff3aa718 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 11:52:52 +0100
Subject: [PATCH 198/314] Add more float* operations

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/cuda/CudaCommon.hpp              |   1 +
 reg-lib/cuda/FloatOps.hpp                | 158 +++++++++++++++++++++++
 reg-lib/cuda/_reg_common_cuda_kernels.cu |  47 -------
 4 files changed, 160 insertions(+), 48 deletions(-)
 create mode 100644 reg-lib/cuda/FloatOps.hpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 47eb669b..4dab36bb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-316
+317
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index c94b6313..65d8b9b1 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -22,6 +22,7 @@
 #include <thrust/sort.h>
 #include "_reg_tools.h"
 #include "CudaContext.hpp"
+#include "FloatOps.hpp"
 
 /* *************************************************************** */
 #ifndef __VECTOR_TYPES_H__
diff --git a/reg-lib/cuda/FloatOps.hpp b/reg-lib/cuda/FloatOps.hpp
new file mode 100644
index 00000000..23f8b8de
--- /dev/null
+++ b/reg-lib/cuda/FloatOps.hpp
@@ -0,0 +1,158 @@
+/*
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the root folder
+ */
+
+#pragma once
+
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float2 operator*(const T& a, const float2& b) {
+    return { static_cast<float>(a) * b.x, static_cast<float>(a) * b.y };
+}
+template<typename T>
+__device__ __inline__ float2 operator*(const float2& a, const T& b) {
+    return b * a;
+}
+__device__ __inline__ float2 operator*(const float2& a, const float2& b) {
+    return { a.x * b.x, a.y * b.y };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float3 operator*(const T& a, const float3& b) {
+    return { static_cast<float>(a) * b.x, static_cast<float>(a) * b.y, static_cast<float>(a) * b.z };
+}
+template<typename T>
+__device__ __inline__ float3 operator*(const float3& a, const T& b) {
+    return b * a;
+}
+__device__ __inline__ float3 operator*(const float3& a, const float3& b) {
+    return { a.x * b.x, a.y * b.y, a.z * b.z };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float4 operator*(const T& a, const float4& b) {
+    return { static_cast<float>(a) * b.x, static_cast<float>(a) * b.y, static_cast<float>(a) * b.z, static_cast<float>(a) * b.w };
+}
+template<typename T>
+__device__ __inline__ float4 operator*(const float4& a, const T& b) {
+    return b * a;
+}
+__device__ __inline__ float4 operator*(const float4& a, const float4& b) {
+    return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float2 operator/(const T& a, const float2& b) {
+    return { static_cast<float>(a) / b.x, static_cast<float>(a) / b.y };
+}
+template<typename T>
+__device__ __inline__ float2 operator/(const float2& a, const T& b) {
+    return { a.x / static_cast<float>(b), a.y / static_cast<float>(b) };
+}
+__device__ __inline__ float2 operator/(const float2& a, const float2& b) {
+    return { a.x / b.x, a.y / b.y };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float3 operator/(const T& a, const float3& b) {
+    return { static_cast<float>(a) / b.x, static_cast<float>(a) / b.y, static_cast<float>(a) / b.z };
+}
+template<typename T>
+__device__ __inline__ float3 operator/(const float3& a, const T& b) {
+    return { a.x / static_cast<float>(b), a.y / static_cast<float>(b), a.z / static_cast<float>(b) };
+}
+__device__ __inline__ float3 operator/(const float3& a, const float3& b) {
+    return { a.x / b.x, a.y / b.y, a.z / b.z };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float4 operator/(const T& a, const float4& b) {
+    return { static_cast<float>(a) / b.x, static_cast<float>(a) / b.y, static_cast<float>(a) / b.z, static_cast<float>(a) / b.w };
+}
+template<typename T>
+__device__ __inline__ float4 operator/(const float4& a, const T& b) {
+    return { a.x / static_cast<float>(b), a.y / static_cast<float>(b), a.z / static_cast<float>(b), a.w / static_cast<float>(b) };
+}
+__device__ __inline__ float4 operator/(const float4& a, const float4& b) {
+    return { a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float2 operator+(const T& a, const float2& b) {
+    return { static_cast<float>(a) + b.x, static_cast<float>(a) + b.y };
+}
+template<typename T>
+__device__ __inline__ float2 operator+(const float2& a, const T& b) {
+    return b + a;
+}
+__device__ __inline__ float2 operator+(const float2& a, const float2& b) {
+    return { a.x + b.x, a.y + b.y };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float3 operator+(const T& a, const float3& b) {
+    return { static_cast<float>(a) + b.x, static_cast<float>(a) + b.y, static_cast<float>(a) + b.z };
+}
+template<typename T>
+__device__ __inline__ float3 operator+(const float3& a, const T& b) {
+    return b + a;
+}
+__device__ __inline__ float3 operator+(const float3& a, const float3& b) {
+    return { a.x + b.x, a.y + b.y, a.z + b.z };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float4 operator+(const T& a, const float4& b) {
+    return { static_cast<float>(a) + b.x, static_cast<float>(a) + b.y, static_cast<float>(a) + b.z, static_cast<float>(a) + b.w };
+}
+template<typename T>
+__device__ __inline__ float4 operator+(const float4& a, const T& b) {
+    return b + a;
+}
+__device__ __inline__ float4 operator+(const float4& a, const float4& b) {
+    return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float2 operator-(const T& a, const float2& b) {
+    return { static_cast<float>(a) - b.x, static_cast<float>(a) - b.y };
+}
+template<typename T>
+__device__ __inline__ float2 operator-(const float2& a, const T& b) {
+    return { a.x - static_cast<float>(b), a.y - static_cast<float>(b) };
+}
+__device__ __inline__ float2 operator-(const float2& a, const float2& b) {
+    return { a.x - b.x, a.y - b.y };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float3 operator-(const T& a, const float3& b) {
+    return { static_cast<float>(a) - b.x, static_cast<float>(a) - b.y, static_cast<float>(a) - b.z };
+}
+template<typename T>
+__device__ __inline__ float3 operator-(const float3& a, const T& b) {
+    return { a.x - static_cast<float>(b), a.y - static_cast<float>(b), a.z - static_cast<float>(b) };
+}
+__device__ __inline__ float3 operator-(const float3& a, const float3& b) {
+    return { a.x - b.x, a.y - b.y, a.z - b.z };
+}
+/* *************************************************************** */
+template<typename T>
+__device__ __inline__ float4 operator-(const T& a, const float4& b) {
+    return { static_cast<float>(a) - b.x, static_cast<float>(a) - b.y, static_cast<float>(a) - b.z, static_cast<float>(a) - b.w };
+}
+template<typename T>
+__device__ __inline__ float4 operator-(const float4& a, const T& b) {
+    return { a.x - static_cast<float>(b), a.y - static_cast<float>(b), a.z - static_cast<float>(b), a.w - static_cast<float>(b) };
+}
+__device__ __inline__ float4 operator-(const float4& a, const float4& b) {
+    return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w };
+}
+/* *************************************************************** */
+__device__ __inline__ double2 operator+(const double2& a, const double2& b) {
+    return { a.x + b.x, a.y + b.y };
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 6d9a4361..7e944323 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -7,53 +7,6 @@
 
 #pragma once
 
-/* *************************************************************** */
-__device__ __inline__ float2 operator*(const float& a, const float2& b) {
-    return { a * b.x, a * b.y };
-}
-__device__ __inline__ float3 operator*(const float& a, const float3& b) {
-    return { a * b.x, a * b.y, a * b.z };
-}
-__device__ __inline__ float3 operator*(const float3& a, const float3& b) {
-    return { a.x * b.x, a.y * b.y, a.z * b.z };
-}
-__device__ __inline__ float4 operator*(const float4& a, const float4& b) {
-    return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w };
-}
-__device__ __inline__ float4 operator*(const float& a, const float4& b) {
-    return { a * b.x, a * b.y, a * b.z, a * b.w };
-}
-/* *************************************************************** */
-__device__ __inline__ float2 operator/(const float2& a, const float2& b) {
-    return { a.x / b.x, a.y / b.y };
-}
-__device__ __inline__ float3 operator/(const float3& a, const float& b) {
-    return { a.x / b, a.y / b, a.z / b };
-}
-__device__ __inline__ float3 operator/(const float3& a, const float3& b) {
-    return { a.x / b.x, a.y / b.y, a.z / b.z };
-}
-/* *************************************************************** */
-__device__ __inline__ float2 operator+(const float2& a, const float2& b) {
-    return { a.x + b.x, a.y + b.y };
-}
-__device__ __inline__ float4 operator+(const float4& a, const float4& b) {
-    return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w };
-}
-__device__ __inline__ float3 operator+(const float3& a, const float3& b) {
-    return { a.x + b.x, a.y + b.y, a.z + b.z };
-}
-/* *************************************************************** */
-__device__ __inline__ float3 operator-(const float3& a, const float3& b) {
-    return { a.x - b.x, a.y - b.y, a.z - b.z };
-}
-__device__ __inline__ float4 operator-(const float4& a, const float4& b) {
-    return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w };
-}
-/* *************************************************************** */
-__device__ __inline__ double2 operator+(const double2& a, const double2& b) {
-    return { a.x + b.x, a.y + b.y };
-}
 /* *************************************************************** */
 __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
     out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);

From 9fe839991e8ff2ec20b0294cedcb7a888ecd1bc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 11:58:12 +0100
Subject: [PATCH 199/314] Add CUDA image operations

---
 niftyreg_build_version.txt     |  2 +-
 reg-lib/cuda/_reg_tools_gpu.cu | 22 ++++++++++++++++++++++
 reg-lib/cuda/_reg_tools_gpu.h  |  8 ++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4dab36bb..dda3451c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-317
+318
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 6fef3795..08089854 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -313,3 +313,25 @@ float reg_minReduction_gpu(float *arrayCuda, const size_t& size) {
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
+template<typename Operation>
+void reg_operationOnImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+    thrust::transform(thrust::device, img1Cuda, img1Cuda + voxelNumber, img2Cuda, img1Cuda, operation);
+}
+/* *************************************************************** */
+void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::plus<float4>());
+}
+/* *************************************************************** */
+void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::minus<float4>());
+}
+/* *************************************************************** */
+void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::multiplies<float4>());
+}
+/* *************************************************************** */
+void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides<float4>());
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 8872a365..967d6afb 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -52,3 +52,11 @@ float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
 float reg_minReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
+void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */

From 557cc057231aa4f85ad4da02bf650669e3e8894d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 12:09:45 +0100
Subject: [PATCH 200/314] Add reg_getMinValue_gpu() and reg_getMaxValue_gpu()

---
 niftyreg_build_version.txt     |  2 +-
 reg-lib/cuda/_reg_tools_gpu.cu | 53 ++++++++++++++++++++++++++++++++++
 reg-lib/cuda/_reg_tools_gpu.h  |  4 +++
 3 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index dda3451c..9b5c4542 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-318
+319
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 08089854..0b6fbd56 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -335,3 +335,56 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4
     reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides<float4>());
 }
 /* *************************************************************** */
+DEVICE static float Min(const float& lhs, const float& rhs) {
+    return lhs < rhs ? lhs : rhs;
+}
+DEVICE static float Max(const float& lhs, const float& rhs) {
+    return lhs > rhs ? lhs : rhs;
+}
+using MinMaxFunc = decltype(&Min);
+__device__ static MinMaxFunc minCuda = Min;
+__device__ static MinMaxFunc maxCuda = Max;
+/* *************************************************************** */
+float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint, const bool calcMin) {
+    if (timePoint < -1 || timePoint >= img->nt)
+        NR_FATAL_ERROR("The required time point does not exist");
+
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+    const int timePoints = std::clamp(timePoint > -1 ? timePoint : int(NiftiImage::calcVoxelNumber(img, 7) / voxelNumber), 1, 4);
+    const float initValue = calcMin ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
+    float4 result{ initValue, initValue, initValue, initValue };
+
+    // Set the min/max functions
+    MinMaxFunc minMaxCuda, minMax = calcMin ? Min : Max;
+    cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc));
+
+    result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue),
+                            [=]__device__(const float4& lhs, const float4& rhs) {
+        float4 result{ initValue, initValue, initValue, initValue };
+        switch (timePoints) {
+        case 4:
+            result.w = minMaxCuda(lhs.w, rhs.w);
+            if (timePoint > -1) break;
+        case 3:
+            result.z = minMaxCuda(lhs.z, rhs.z);
+            if (timePoint > -1) break;
+        case 2:
+            result.y = minMaxCuda(lhs.y, rhs.y);
+            if (timePoint > -1) break;
+        case 1:
+            result.x = minMaxCuda(lhs.x, rhs.x);
+        }
+        return result;
+    });
+
+    return minMax(minMax(result.x, result.y), minMax(result.z, result.w));
+}
+/* *************************************************************** */
+float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+    return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, true);
+}
+/* *************************************************************** */
+float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+    return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, false);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 967d6afb..4532afab 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -60,3 +60,7 @@ void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const floa
 /* *************************************************************** */
 void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
 /* *************************************************************** */
+float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
+/* *************************************************************** */
+float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
+/* *************************************************************** */

From 754814745b7b2735875be4b547d96c51f7151f91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 12:11:36 +0100
Subject: [PATCH 201/314] Implement CudaCompute::GetDefFieldFromVelocityGrid()
 #92

---
 niftyreg_build_version.txt                   |   2 +-
 reg-lib/cuda/CudaCompute.cpp                 |  14 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu | 204 +++++++++++++++----
 reg-lib/cuda/_reg_localTransformation_gpu.h  |  11 +-
 reg-lib/cuda/_reg_tools_gpu.cu               |   9 -
 reg-lib/cuda/_reg_tools_gpu.h                |   2 -
 reg-lib/cuda/_reg_tools_kernels.cu           |   6 -
 7 files changed, 175 insertions(+), 73 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9b5c4542..18eed135 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-319
+320
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 8ebdb816..e1d6d4df 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -154,15 +154,13 @@ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     Compute::GetApproximatedGradient(opt);
 }
 /* *************************************************************** */
-void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    Compute::GetDefFieldFromVelocityGrid(updateStepNumber);
-    // Transfer the data back to the CUDA device
+void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    // TODO update only the required ones
-    con.UpdateControlPointGrid();
-    con.UpdateDeformationField();
+    reg_spline_getDefFieldFromVelocityGrid_gpu(con.F3dContent::GetControlPointGrid(),
+                                               con.F3dContent::GetDeformationField(),
+                                               con.GetControlPointGridCuda(),
+                                               con.GetDeformationFieldCuda(),
+                                               updateStepNumber);
 }
 /* *************************************************************** */
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index be602b82..573eacd5 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -12,6 +12,7 @@
 
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_localTransformation_kernels.cu"
+#include "_reg_globalTransformation_gpu.h"
 
 /* *************************************************************** */
 void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
@@ -505,48 +506,36 @@ void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *im
     reg_getDeformationFromDisplacement_gpu(image, imageCuda, true);
 }
 /* *************************************************************** */
-void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage,
-                                                 const nifti_image *deformationField,
-                                                 const float4 *controlPointImageCuda,
-                                                 float4 *deformationFieldCuda) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-
-    // Create a mask array where no voxel are excluded
-    int *maskCuda = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&maskCuda, voxelNumber * sizeof(int)));
-    reg_fillMaskArray_gpu(maskCuda, voxelNumber);
-
-    // Define some variables for the deformation fields
-    float4 *tempDefCuda = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&tempDefCuda, voxelNumber * sizeof(float4)));
-
-    // The deformation field is computed
-    reg_spline_getDeformationField_gpu(controlPointImage, deformationField, controlPointImageCuda,
-                                       deformationFieldCuda, maskCuda, voxelNumber, true); // non-interpolant spline is used
-
-    // The deformation field is converted into a displacement field
-    reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda);
-
-    // Scaling of the deformation field
-    const unsigned squaringNumber = (unsigned)fabs(controlPointImage->intent_p1);
-    const float scalingValue = pow(2.f, (float)squaringNumber);
-    // Backward/forward deformation field is scaled down
-    reg_multiplyValue_gpu((int)voxelNumber, deformationFieldCuda, (controlPointImage->intent_p1 < 0  ? -1.f : 1.f) / scalingValue);
-
-    // The displacement field is converted back into a deformation field
-    reg_getDeformationFromDisplacement_gpu(deformationField, deformationFieldCuda);
-
-    // The deformation field is squared
-    for (unsigned i = 0; i < squaringNumber; ++i) {
-        // The deformation field arrays are updated
-        NR_CUDA_SAFE_CALL(cudaMemcpy(tempDefCuda, deformationFieldCuda, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
-
-        // The deformation fields are composed
-        reg_defField_compose_gpu(deformationField, tempDefCuda, deformationFieldCuda, voxelNumber);
-    }
-
-    NR_CUDA_SAFE_CALL(cudaFree(tempDefCuda));
-    NR_CUDA_SAFE_CALL(cudaFree(maskCuda));
+void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
+                                                 const nifti_image *flowField,
+                                                 float4 *velocityFieldGridCuda,
+                                                 float4 *flowFieldCuda,
+                                                 const int *maskCuda,
+                                                 const size_t activeVoxelNumber) {
+    // Check first if the velocity field is actually a velocity field
+    if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
+        NR_FATAL_ERROR("The provided grid is not a velocity field");
+
+    // Initialise the flow field with an identity transformation
+    reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda);
+
+    // fake the number of extension here to avoid the second half of the affine
+    const auto oldNumExt = velocityFieldGrid->num_ext;
+    if (oldNumExt > 1)
+        velocityFieldGrid->num_ext = 1;
+
+    // Copy over the number of required squaring steps
+    // The initial flow field is generated using cubic B-Spline interpolation/approximation
+    // TODO Composition is needed
+    reg_spline_getDeformationField_gpu(velocityFieldGrid,
+                                       flowField,
+                                       velocityFieldGridCuda,
+                                       flowFieldCuda,
+                                       maskCuda,
+                                       activeVoxelNumber,
+                                       true); // bspline
+
+    velocityFieldGrid->num_ext = oldNumExt;
 }
 /* *************************************************************** */
 void reg_defField_compose_gpu(const nifti_image *deformationField,
@@ -580,6 +569,137 @@ void reg_defField_compose_gpu(const nifti_image *deformationField,
     }
 }
 /* *************************************************************** */
+void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
+                                                       nifti_image *deformationField,
+                                                       float4 *flowFieldCuda,
+                                                       float4 *deformationFieldCuda,
+                                                       const int *maskCuda,
+                                                       const bool updateStepNumber) {
+    // Check first if the velocity field is actually a velocity field
+    if (flowField->intent_p1 != DEF_VEL_FIELD)
+        NR_FATAL_ERROR("The provided field is not a velocity field");
+
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+
+    // Remove the affine component from the flow field
+    NiftiImage affineOnly;
+    thrust::device_vector<float4> affineOnlyCuda;
+    if (flowField->num_ext > 0) {
+        if (flowField->ext_list[0].edata != nullptr) {
+            // Create a field that contains the affine component only
+            affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
+            affineOnlyCuda.resize(voxelNumber);
+            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
+                                               affineOnly, affineOnlyCuda.data().get());
+            reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get());
+        }
+    } else reg_getDisplacementFromDeformation_gpu(flowField, flowFieldCuda);
+
+    // Compute the number of scaling value to ensure unfolded transformation
+    int squaringNumber = 1;
+    if (updateStepNumber || flowField->intent_p2 == 0) {
+        // Check the largest value
+        float extrema = fabsf(reg_getMinValue_gpu(flowField, flowFieldCuda, -1));
+        const float temp = reg_getMaxValue_gpu(flowField, flowFieldCuda, -1);
+        extrema = std::max(extrema, temp);
+        // Check the values for scaling purpose
+        float maxLength;
+        if (deformationField->nz > 1)
+            maxLength = 0.28f;  // sqrt(0.5^2/3)
+        else maxLength = 0.35f; // sqrt(0.5^2/2)
+        while (extrema / pow(2.0f, squaringNumber) >= maxLength)
+            squaringNumber++;
+        // The minimal number of step is set to 6 by default
+        squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
+        // Set the number of squaring step in the flow field
+        if (fabs(flowField->intent_p2) != squaringNumber)
+            NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
+                    " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
+        // Update the number of squaring step required
+        flowField->intent_p2 = static_cast<float>(flowField->intent_p2 >= 0 ? squaringNumber : -squaringNumber);
+    } else squaringNumber = static_cast<int>(fabsf(flowField->intent_p2));
+
+    // The displacement field is scaled
+    const float scalingValue = 1.f / pow(2.f, static_cast<float>(std::abs(squaringNumber)));
+    // Backward/forward deformation field is scaled down
+    reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue);
+
+    // Conversion from displacement to deformation
+    reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda);
+
+    // The computed scaled deformation field is copied over
+    thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
+
+    // The deformation field is squared
+    for (int i = 0; i < squaringNumber; ++i) {
+        // The deformation field is applied to itself
+        reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda, voxelNumber);
+        // The computed scaled deformation field is copied over
+        thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
+        NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
+    }
+    // The affine component of the transformation is restored
+    if (affineOnly) {
+        reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda);
+        reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get());
+    }
+    deformationField->intent_p1 = DEF_FIELD;
+    deformationField->intent_p2 = 0;
+    // If required an affine component is composed
+    // TODO Composition is needed
+    if (flowField->num_ext > 1)
+        reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
+                                           deformationField, deformationFieldCuda);
+}
+/* *************************************************************** */
+void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
+                                                nifti_image *deformationField,
+                                                float4 *velocityFieldGridCuda,
+                                                float4 *deformationFieldCuda,
+                                                const bool updateStepNumber) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+
+    // Create a mask array where no voxel is excluded
+    thrust::device_vector<int> maskCuda(voxelNumber);
+    thrust::sequence(maskCuda.begin(), maskCuda.end());
+
+    // Clean any extension in the deformation field as it is unexpected
+    nifti_free_extensions(deformationField);
+
+    // Check if the velocity field is actually a velocity field
+    if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) {
+        // Use the spline approximation to generate the deformation field
+        reg_spline_getDeformationField_gpu(velocityFieldGrid,
+                                           deformationField,
+                                           velocityFieldGridCuda,
+                                           deformationFieldCuda,
+                                           maskCuda.data().get(),
+                                           voxelNumber,
+                                           true); // bspline
+    } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
+        // Create an image to store the flow field
+        NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo);
+        flowField.setIntentName("NREG_TRANS"s);
+        flowField->intent_code = NIFTI_INTENT_VECTOR;
+        flowField->intent_p1 = DEF_VEL_FIELD;
+        flowField->intent_p2 = velocityFieldGrid->intent_p2;
+        if (velocityFieldGrid->num_ext > 0)
+            nifti_copy_extensions(flowField, velocityFieldGrid);
+
+        // Allocate CUDA memory for the flow field
+        thrust::device_vector<float4> flowFieldCuda(flowField.nVoxelsPerVolume());
+
+        // Generate the velocity field
+        reg_spline_getFlowFieldFromVelocityGrid_gpu(velocityFieldGrid, flowField, velocityFieldGridCuda,
+                                                    flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber);
+        // Exponentiate the flow field
+        reg_defField_getDeformationFieldFromFlowField_gpu(flowField, deformationField, flowFieldCuda.data().get(),
+                                                          deformationFieldCuda, maskCuda.data().get(), updateStepNumber);
+        // Update the number of step required. No action otherwise
+        velocityFieldGrid->intent_p2 = flowField->intent_p2;
+    } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
+}
+/* *************************************************************** */
 void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                         const float4 *deformationFieldCuda,
                                         float *jacobianMatricesCuda) {
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index aa8aee88..b55f97df 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -48,16 +48,17 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
                                      float4 *controlPointImageCuda,
                                      const bool approx);
 /* *************************************************************** */
-void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage,
-                                                 const nifti_image *deformationField,
-                                                 const float4 *controlPointImageCuda,
-                                                 float4 *deformationFieldCuda);
-/* *************************************************************** */
 void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
                               float4 *deformationFieldOutCuda,
                               const size_t activeVoxelNumber);
 /* *************************************************************** */
+void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
+                                                nifti_image *deformationField,
+                                                float4 *velocityFieldGridCuda,
+                                                float4 *deformationFieldCuda,
+                                                const bool updateStepNumber);
+/* *************************************************************** */
 void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                         const float4 *deformationFieldCuda,
                                         float *jacobianMatricesCuda);
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 0b6fbd56..7bc83d81 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -289,15 +289,6 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) {
-    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
-    const dim3 gridDims = dim3(grids, grids, 1);
-    const dim3 blockDims = dim3(blocks, 1, 1);
-    reg_fillMaskArray_kernel<<<gridDims, blockDims>>>(arrayCuda, (unsigned)count);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
 float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 4532afab..4444e7e8 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -44,8 +44,6 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
 /* *************************************************************** */
 void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count);
-/* *************************************************************** */
 float reg_sumReduction_gpu(float *arrayCuda, const size_t& size);
 /* *************************************************************** */
 float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 0f033d2d..a571970b 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -272,9 +272,3 @@ __global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, cons
     }
 }
 /* *************************************************************** */
-__global__ void reg_fillMaskArray_kernel(int *array, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count)
-        array[tid] = tid;
-}
-/* *************************************************************** */

From 3557636de01adf1a035ddab8014a1b72f22af936 Mon Sep 17 00:00:00 2001
From: mmodat <marc.modat@gmail.com>
Date: Fri, 8 Sep 2023 12:46:35 +0100
Subject: [PATCH 202/314] #92: fix issue in reg_resample - fix failing test -
 fix output display

---
 .gitignore                         |  1 +
 niftyreg_build_version.txt         |  2 +-
 reg-apps/reg_average.cpp           | 20 +-----
 reg-apps/reg_resample.cpp          | 76 +++++++++++++----------
 reg-lib/Content.cpp                |  6 +-
 reg-lib/Debug.hpp                  | 25 +++++---
 reg-lib/_reg_aladin_sym.cpp        |  8 +--
 reg-lib/_reg_f3d.cpp               | 24 ++++----
 reg-lib/_reg_f3d2.cpp              |  4 +-
 reg-lib/cpu/_reg_globalTrans.cpp   |  4 --
 reg-lib/cpu/_reg_localTrans.cpp    | 41 +++++++++++++
 reg-lib/cpu/_reg_localTrans.h      | 10 +++
 reg-lib/cpu/_reg_nmi.cpp           | 99 ++++++++++++++++++++++++------
 reg-lib/cpu/_reg_nmi.h             | 13 +++-
 reg-test/reg_test_common.h         | 29 ++-------
 reg-test/reg_test_nmi_gradient.cpp |  1 +
 16 files changed, 237 insertions(+), 126 deletions(-)

diff --git a/.gitignore b/.gitignore
index 34fd63b5..de49771d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ CMakeSettings.json
 
 # Build
 build*
+out*
 
 # Doxygen
 html
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 66953656..47eb669b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-315
+316
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 07f7d47c..d4bea706 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -382,23 +382,8 @@ int compute_average_image(nifti_image *averageImage,
    // Loop over all input images
    for(size_t i=0; i<imageNumber; ++i){
       // Generate a deformation field defined by the average final
-      nifti_image *deformationField=nifti_copy_nim_info(averageImage);
-      deformationField->ndim=deformationField->dim[0]=5;
-      deformationField->nt=deformationField->dim[4]=1;
-      deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2;
-      deformationField->nvox=NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
-      deformationField->nbyper=sizeof(float);
-      deformationField->datatype=NIFTI_TYPE_FLOAT32;
-      deformationField->intent_code=NIFTI_INTENT_VECTOR;
-      memset(deformationField->intent_name, 0, 16);
-      strcpy(deformationField->intent_name,"NREG_TRANS");
-      deformationField->scl_slope=1.f;
-      deformationField->scl_inter=0.f;
-      deformationField->intent_p1=DISP_FIELD;
-      deformationField->data=calloc(deformationField->nvox, deformationField->nbyper);
-      reg_tools_multiplyValueToImage(deformationField,deformationField,0.f);
-      // Set the transformation to identity
-      reg_getDeformationFromDisplacement(deformationField);
+      NiftiImage deformationField;
+      reg_createDeformationField<float>(deformationField, averageImage);
       // Compute the transformation if required
       if(inputNRRName!=nullptr){
          nifti_image *current_transformation = reg_io_ReadImageFile(inputNRRName[i]);
@@ -465,7 +450,6 @@ int compute_average_image(nifti_image *averageImage,
                         nullptr,
                         interpolation_order,
                         std::numeric_limits<float>::quiet_NaN());
-      nifti_image_free(deformationField);
       nifti_image_free(current_input_image);
       // Add the image to the average
       remove_nan_and_add(averageImage, warpedImage, definedValue);
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 366cb4d5..630a46cc 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -277,7 +277,7 @@ int main(int argc, char **argv)
    NR_VERBOSE_APP("Floating image name: " << floatingImage->fname);
    NR_VERBOSE_APP("\t" << floatingImage->nx << "x" << floatingImage->ny << "x" << floatingImage->nz << " voxels, " << floatingImage->nt << " volumes");
    NR_VERBOSE_APP("\t" << floatingImage->dx << "x" << floatingImage->dy << "x" << floatingImage->dz << " mm");
-   NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n");
+   NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 
    /* *********************** */
    /* READ THE TRANSFORMATION */
@@ -313,9 +313,6 @@ int main(int argc, char **argv)
    // Create a deformation field
    nifti_image *deformationFieldImage = nifti_copy_nim_info(referenceImage);
    deformationFieldImage->dim[0]=deformationFieldImage->ndim=5;
-   deformationFieldImage->dim[1]=deformationFieldImage->nx=referenceImage->nx;
-   deformationFieldImage->dim[2]=deformationFieldImage->ny=referenceImage->ny;
-   deformationFieldImage->dim[3]=deformationFieldImage->nz=referenceImage->nz;
    deformationFieldImage->dim[4]=deformationFieldImage->nt=1;
    deformationFieldImage->pixdim[4]=deformationFieldImage->dt=1.0;
    deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2;
@@ -336,10 +333,14 @@ int main(int argc, char **argv)
    }
    deformationFieldImage->data = calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper);
 
-   // Initialise the deformation field with an identity transformation
+   // Initialise as a displacement field with an identity transformation
+   deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+   memset(deformationFieldImage->intent_name, 0, 16);
+   strcpy(deformationFieldImage->intent_name, "NREG_TRANS");
+   deformationFieldImage->intent_p1 = DISP_FIELD;
    reg_tools_multiplyValueToImage(deformationFieldImage,deformationFieldImage,0.f);
+   // Convert it then to an deformation field with identity
    reg_getDeformationFromDisplacement(deformationFieldImage);
-   deformationFieldImage->intent_p1=DEF_FIELD;
 
    // Compute the transformation to apply
    if(inputTransformationImage!=nullptr)
@@ -348,40 +349,51 @@ int main(int argc, char **argv)
       {
       case LIN_SPLINE_GRID:
       case CUB_SPLINE_GRID:
-         reg_spline_getDeformationField(inputTransformationImage,
-                                        deformationFieldImage,
-                                        nullptr,
-                                        false,
-                                        true);
-         break;
+          NR_VERBOSE_APP("Input transformation is a cubic spline grid");
+          reg_spline_getDeformationField(inputTransformationImage,
+              deformationFieldImage,
+              nullptr, // no mask
+              true, // composition is used,
+              true); // b-spline are used
+          NR_VERBOSE_APP("Input transformation is converted to a deformation field");
+          break;
       case DISP_VEL_FIELD:
-         reg_getDeformationFromDisplacement(inputTransformationImage);
+          NR_VERBOSE_APP("Input transformation is a displacement velocity field");
+          reg_getDeformationFromDisplacement(inputTransformationImage);
+          NR_VERBOSE_APP("Input transformation is converted to a deformation velocity field");
       case DEF_VEL_FIELD:
          {
-            nifti_image *tempFlowField = nifti_dup(*deformationFieldImage);
-            reg_defField_compose(inputTransformationImage,
-                                 tempFlowField,
-                                 nullptr);
-            tempFlowField->intent_p1=inputTransformationImage->intent_p1;
-            tempFlowField->intent_p2=inputTransformationImage->intent_p2;
-            reg_defField_getDeformationFieldFromFlowField(tempFlowField,
-                                                          deformationFieldImage,
-                                                          false);
-            nifti_image_free(tempFlowField);
-         }
-         break;
+          NR_VERBOSE_APP("Input transformation is a deformation velocity field");
+          nifti_image *tempFlowField = nifti_dup(*deformationFieldImage);
+          reg_defField_compose(inputTransformationImage,
+                               tempFlowField,
+                               nullptr);
+          tempFlowField->intent_p1=inputTransformationImage->intent_p1;
+          tempFlowField->intent_p2=inputTransformationImage->intent_p2;
+          reg_defField_getDeformationFieldFromFlowField(tempFlowField,
+                                                        deformationFieldImage,
+                                                        false);
+          nifti_image_free(tempFlowField);
+          NR_VERBOSE_APP("Input transformation is converted to a deformation field");
+          }
+          break;
       case SPLINE_VEL_GRID:
-         reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
+          NR_VERBOSE_APP("Input transformation is a spine velocity grid");
+          reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
                                                 deformationFieldImage,
                                                 false);
-         break;
+          NR_VERBOSE_APP("Input transformation is converted to a deformation field");
+          break;
       case DISP_FIELD:
-         reg_getDeformationFromDisplacement(inputTransformationImage);
+          NR_VERBOSE_APP("Input transformation is a displacement field");
+          reg_getDeformationFromDisplacement(inputTransformationImage);
+          NR_VERBOSE_APP("Input transformation is converted to a deformation field");
       default:
-         reg_defField_compose(inputTransformationImage,
-                              deformationFieldImage,
-                              nullptr);
-         break;
+          NR_VERBOSE_APP("Input transformation is a deformation field");
+          reg_defField_compose(inputTransformationImage,
+                               deformationFieldImage,
+                               nullptr);
+          break;
       }
       nifti_image_free(inputTransformationImage);
       inputTransformationImage=nullptr;
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 265f329a..0ecbce6f 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -69,10 +69,14 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
     memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
     strcpy(deformationField->intent_name, "NREG_TRANS");
-    deformationField->intent_p1 = DEF_FIELD;
+    // First create a displacement field filled with 0 to obtain an identity disp
+    deformationField->intent_p1 = DISP_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
     deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
+    reg_tools_multiplyValueToImage(deformationField, deformationField, 0.f);
+    // Convert to an identity deformation field
+    reg_getDeformationFromDisplacement(deformationField);
 }
 /* *************************************************************** */
 void Content::DeallocateDeformationField() {
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
index c58bd383..6980a3cb 100644
--- a/reg-lib/Debug.hpp
+++ b/reg-lib/Debug.hpp
@@ -58,8 +58,8 @@ inline std::string StripFunctionName(const std::string& funcName) {
 #else
 #define NR_FUNC_CALLED()
 #define NR_DEBUG(msg)
-#define NR_VERBOSE(msg)     if (this->verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl
-#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl
+#define NR_VERBOSE(msg)     if (this->verbose) NR_COUT << "[NiftyReg INFO] " << msg << std::endl
+#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg INFO] " << msg << std::endl
 #endif
 /* *************************************************************** */
 #define NR_WARN(msg)        NR_COUT << "[NiftyReg WARNING] " << msg << std::endl
@@ -68,14 +68,19 @@ inline std::string StripFunctionName(const std::string& funcName) {
 #define NR_INFO(msg)        NR_COUT << "[NiftyReg INFO] " << msg << std::endl
 /* *************************************************************** */
 #ifndef NDEBUG
-#define NR_MAT33(mat, title)            reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
-#define NR_MAT33_VERBOSE(mat, title)    NR_MAT33(mat, title)
-#define NR_MAT44(mat, title)            reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
-#define NR_MAT44_VERBOSE(mat, title)    NR_MAT44(mat, title)
+#define NR_MAT33(mat, title)          reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT44(mat, title)          reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT33_DEBUG(mat, title)    NR_MAT33(mat, title)
+#define NR_MAT44_DEBUG(mat, title)    NR_MAT44(mat, title)
+#define NR_MAT33_VERBOSE(mat, title)  NR_MAT33(mat, title)
+#define NR_MAT44_VERBOSE(mat, title)  NR_MAT44(mat, title)
 #else
-#define NR_MAT33(mat, title)
-#define NR_MAT33_VERBOSE(mat, title)    if (this->verbose) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
-#define NR_MAT44(mat, title)
-#define NR_MAT44_VERBOSE(mat, title)    if (this->verbose) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT33(mat, title)          reg_mat33_disp(mat, title)
+#define NR_MAT44(mat, title)          reg_mat44_disp(mat, title)
+#define NR_MAT33_DEBUG(mat, title)    
+#define NR_MAT44_DEBUG(mat, title)    
+#define NR_MAT33_VERBOSE(mat, title)  if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title))
+#define NR_MAT44_VERBOSE(mat, title)  if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title))
+
 #endif
 /* *************************************************************** */
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 1d4bfbd4..381ca144 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -135,8 +135,8 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     this->bBlockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
     this->bLtsKernel->template castTo<LtsKernel>()->Calculate(type);
 
-    NR_MAT44_VERBOSE(*this->affineTransformation, "The pre-updated forward transformation matrix");
-    NR_MAT44_VERBOSE(*this->affineTransformationBw, "The pre-updated backward transformation matrix");
+    NR_MAT44_DEBUG(*this->affineTransformation, "The pre-updated forward transformation matrix");
+    NR_MAT44_DEBUG(*this->affineTransformationBw, "The pre-updated backward transformation matrix");
 
     // Forward and backward matrix are inverted
     mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation);
@@ -153,8 +153,8 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     this->affineTransformation->m[3][3] = 1.f;
     this->affineTransformationBw->m[3][3] = 1.f;
 
-    NR_MAT44_VERBOSE(*this->affineTransformation, "The updated forward transformation matrix");
-    NR_MAT44_VERBOSE(*this->affineTransformationBw, "The updated backward transformation matrix");
+    NR_MAT44_DEBUG(*this->affineTransformation, "The updated forward transformation matrix");
+    NR_MAT44_DEBUG(*this->affineTransformationBw, "The updated backward transformation matrix");
 }
 /* *************************************************************** */
 template <class T>
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index e8207c16..c17acd9a 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -176,13 +176,14 @@ void reg_f3d<T>::Initialise() {
         if (this->referencePyramid[0]->nz > 1)
             gridSpacing[2] = spacingInMillimetre[2] * powf(2, this->levelNumber - 1);
 
-        // Create and allocate the control point image
+        // Create and allocate the control point image - by default the transformation is initialised
+        // to an identity transformation
         reg_createControlPointGrid<T>(controlPointGrid, this->referencePyramid[0], gridSpacing);
 
-        // The control point position image is initialised with the affine transformation
-        if (!this->affineTransformation) {
-            reg_getDeformationFromDisplacement(controlPointGrid);
-        } else reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid);
+        // The control point grid is updated with an identity transformation
+        if (this->affineTransformation) {
+            reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid);
+        }
     } else {
         // The control point grid image is initialised with the provided grid
         controlPointGrid = inputControlPointGrid;
@@ -419,15 +420,16 @@ void reg_f3d<T>::DisplayCurrentLevelParameters(int currentLevel) {
     NR_VERBOSE("\t* image dimension: " << controlPointGrid->nx << " x " << controlPointGrid->ny << " x " << controlPointGrid->nz);
     NR_VERBOSE("\t* image spacing: " << controlPointGrid->dx << " x " << controlPointGrid->dy << " x " << controlPointGrid->dz << " mm");
 
+    // Input matrices are only printed out in debug
     if (reference->sform_code > 0)
-        NR_MAT44_VERBOSE(reference->sto_xyz, "Reference sform");
-    else NR_MAT44_VERBOSE(reference->qto_xyz, "Reference qform");
+        NR_MAT44_DEBUG(reference->sto_xyz, "Reference sform");
+    else NR_MAT44_DEBUG(reference->qto_xyz, "Reference qform");
     if (floating->sform_code > 0)
-        NR_MAT44_VERBOSE(floating->sto_xyz, "Floating sform");
-    else NR_MAT44_VERBOSE(floating->qto_xyz, "Floating qform");
+        NR_MAT44_DEBUG(floating->sto_xyz, "Floating sform");
+    else NR_MAT44_DEBUG(floating->qto_xyz, "Floating qform");
     if (controlPointGrid->sform_code > 0)
-        NR_MAT44_VERBOSE(controlPointGrid->sto_xyz, "CPP sform");
-    else NR_MAT44_VERBOSE(controlPointGrid->qto_xyz, "CPP qform");
+        NR_MAT44_DEBUG(controlPointGrid->sto_xyz, "CPP sform");
+    else NR_MAT44_DEBUG(controlPointGrid->qto_xyz, "CPP qform");
 
     NR_FUNC_CALLED();
 }
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 70ede1f8..ea0f0d56 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -422,8 +422,8 @@ void reg_f3d2<T>::DisplayCurrentLevelParameters(int currentLevel) {
     NR_VERBOSE("\t* image spacing: " << controlPointGridBw->dx << " x " << controlPointGridBw->dy << " x " << controlPointGridBw->dz << " mm");
 
     if (controlPointGridBw->sform_code > 0)
-        NR_MAT44_VERBOSE(controlPointGridBw->sto_xyz, "Backward CPP sform");
-    else NR_MAT44_VERBOSE(controlPointGridBw->qto_xyz, "Backward CPP qform");
+        NR_MAT44_DEBUG(controlPointGridBw->sto_xyz, "Backward CPP sform");
+    else NR_MAT44_DEBUG(controlPointGridBw->qto_xyz, "Backward CPP qform");
 
     NR_FUNC_CALLED();
 }
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index e8988b75..a2e8ef60 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -38,8 +38,6 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
-   NR_MAT44(transformationMatrix, "Global affine transformation");
-
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0;
    size_t index=0;
@@ -99,8 +97,6 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
       transformationMatrix = *affineTransformation;
    else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
 
-   NR_MAT44(transformationMatrix, "Global affine transformation");
-
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0, z=0;
    size_t index=0;
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 8c9d099e..c86550ad 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -119,6 +119,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
         controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz);
     }
 
+    // The grid is initialised with an identity transformation
+    reg_tools_multiplyValueToImage(controlPointGridImage, controlPointGridImage, 0.f);
+    reg_getDeformationFromDisplacement(controlPointGridImage);
     controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR;
     memset(controlPointGridImage->intent_name, 0, 16);
     strcpy(controlPointGridImage->intent_name, "NREG_TRANS");
@@ -360,6 +363,44 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 template void reg_createSymmetricControlPointGrids<float>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 template void reg_createSymmetricControlPointGrids<double>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 /* *************************************************************** */
+extern "C++" template <class DataType>
+void reg_createDeformationField(NiftiImage & deformationFieldImage,
+                                const NiftiImage & referenceImage) {
+    // The header information from the reference image are copied over
+    deformationFieldImage = nifti_copy_nim_info(referenceImage);
+    // The dimension are updated to store the deformation vector along U index
+    // in a 5D image
+    deformationFieldImage.setDim(NiftiDim::NDim, 5);
+    if (referenceImage->dim[0] == 2)
+        deformationFieldImage.setDim(NiftiDim::Z, 1);
+    deformationFieldImage.setDim(NiftiDim::T, 1);
+    deformationFieldImage.setPixDim(NiftiDim::T, 1);
+    deformationFieldImage.setDim(NiftiDim::U, referenceImage->nz > 1 ? 3 : 2);
+    deformationFieldImage.setPixDim(NiftiDim::U, 1);
+    deformationFieldImage.setDim(NiftiDim::V, 1);
+    deformationFieldImage.setPixDim(NiftiDim::V, 1);
+    deformationFieldImage.setDim(NiftiDim::W, 1);
+    deformationFieldImage.setPixDim(NiftiDim::W, 1);
+    // The deformation stores floating scalar
+    deformationFieldImage->datatype = sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64;
+    deformationFieldImage->nbyper = sizeof(DataType);
+    deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+    memset(deformationFieldImage->intent_name, 0, sizeof(deformationFieldImage->intent_name));
+    strcpy(deformationFieldImage->intent_name, "NREG_TRANS");
+    deformationFieldImage->scl_slope = 1;
+    deformationFieldImage->scl_inter = 0;
+
+    // The data is allocated given the new size
+    deformationFieldImage.realloc();
+    // The image is filled in with zero to represent an identity displacement field
+    reg_tools_multiplyValueToImage(deformationFieldImage, deformationFieldImage, 0.f);
+    deformationFieldImage->intent_p1 = DISP_FIELD;
+    // The displacement field is converted into a deformation field
+    reg_getDeformationFromDisplacement(deformationFieldImage);
+}
+template void reg_createDeformationField<float>(NiftiImage&, const NiftiImage&);
+template void reg_createDeformationField<double>(NiftiImage&, const NiftiImage&);
+/* *************************************************************** */
 template<class DataType>
 void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                              nifti_image *deformationField,
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index fd1ded7f..91cd3a23 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -48,6 +48,16 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
                                           const mat44 *forwardAffineTrans,
                                           const float *spacing);
 /* *************************************************************** */
+/** @brief Create a deformation field given a provided reference image.
+* @param deformationFieldImage Pointer to the newly created deformation
+* field
+* @param referenceImage Image used to specify the deformation field
+* size and orientation.
+*/
+extern "C++" template <class DataType>
+void reg_createDeformationField(NiftiImage & deformationFieldImage,
+                                const NiftiImage & referenceImage);
+/* *************************************************************** */
 /** @brief Compute a dense deformation field in the space of a reference
  * image from a grid of control point.
  * @param controlPointGridImage Control point grid that contains the deformation
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 3a4654f1..e3eee290 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -20,6 +20,7 @@ reg_nmi::reg_nmi(): reg_measure() {
     this->jointHistogramProBw = nullptr;
     this->jointHistogramLogBw = nullptr;
     this->entropyValuesBw = nullptr;
+    this->approximatePW = true;
     for (int i = 0; i < 255; ++i) {
         this->referenceBinNumber[i] = 68;
         this->floatingBinNumber[i] = 68;
@@ -201,7 +202,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                      double **jointHistogramLog,
                      double **jointHistogramPro,
                      double **entropyValues,
-                     const int *referenceMask) {
+                     const int *referenceMask,
+                     const bool approximation) {
     // Create pointers to the image data arrays
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
@@ -216,21 +218,24 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             double *jointHistoLogPtr = jointHistogramLog[t];
             // Empty the joint histogram
             memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double));
-            // Fill the joint histograms using an approximation
+            // Fill the joint histograms
             const DataType *refPtr = &refImagePtr[t * voxelNumber];
             const DataType *warPtr = &warImagePtr[t * voxelNumber];
-            for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
-                if (referenceMask[voxel] > -1) {
-                    const DataType refValue = refPtr[voxel];
-                    const DataType warValue = warPtr[voxel];
-                    if (refValue == refValue && warValue == warValue){
-                        for(int r = int(refValue-1); r < int(refValue+3); ++r){
-                            if( 0 <= r && r < referenceBinNumber[t]){
-                                const double refBasis = GetBasisSplineValue(refValue - r);
-                                for(int w = int(warValue-1); w < int(warValue+3); ++w){
-                                    if( 0 <= w && w < floatingBinNumber[t]){
-                                        const double warBasis = GetBasisSplineValue(warValue - w);
-                                        jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis;
+            if (approximation == false) {
+                // No approximation is used for the Parzen windowing
+                for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
+                    if (referenceMask[voxel] > -1) {
+                        const DataType refValue = refPtr[voxel];
+                        const DataType warValue = warPtr[voxel];
+                        if (refValue == refValue && warValue == warValue) {
+                            for (int r = int(refValue - 1); r < int(refValue + 3); ++r) {
+                                if (0 <= r && r < referenceBinNumber[t]) {
+                                    const double refBasis = GetBasisSplineValue(refValue - r);
+                                    for (int w = int(warValue - 1); w < int(warValue + 3); ++w) {
+                                        if (0 <= w && w < floatingBinNumber[t]) {
+                                            const double warBasis = GetBasisSplineValue(warValue - w);
+                                            jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis;
+                                        }
                                     }
                                 }
                             }
@@ -238,6 +243,60 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                     }
                 }
             }
+            else {
+                // An approximation is used for the Parzen windowing. First intensities are binarised then
+                // the histogram is convolved with a spine kernel function.
+                for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
+                    if (referenceMask[voxel] > -1) {
+                        const DataType& refValue = refPtr[voxel];
+                        const DataType& warValue = warPtr[voxel];
+                        if (refValue == refValue && warValue == warValue &&
+                            0 <= refValue && refValue < referenceBinNumber[t] &&
+                            0 <= warValue && warValue < floatingBinNumber[t]) {
+                            ++jointHistoProPtr[static_cast<int>(refValue) + static_cast<int>(warValue) * referenceBinNumber[t]];
+                        }
+                    }
+                }
+                // Convolve the histogram with a cubic B-spline kernel
+                double kernel[3];
+                kernel[0] = kernel[2] = GetBasisSplineValue(-1.0);
+                kernel[1] = GetBasisSplineValue(0.0);
+                // Histogram is first smooth along the reference axis
+                memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
+                for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                    for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                        double value = 0;
+                        int index = r - 1;
+                        double* ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f];
+
+                        for (int it = 0; it < 3; it++) {
+                            if (-1 < index && index < referenceBinNumber[t]) {
+                                value += *ptrHisto * kernel[it];
+                            }
+                            ++ptrHisto;
+                            ++index;
+                        }
+                        jointHistoLogPtr[r + referenceBinNumber[t] * f] = value;
+                    }
+                }
+                // Histogram is then smooth along the warped floating axis
+                for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                    for (int f = 0; f < floatingBinNumber[t]; ++f) {
+                        double value = 0.;
+                        int index = f - 1;
+                        double* ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index];
+
+                        for (int it = 0; it < 3; it++) {
+                            if (-1 < index && index < floatingBinNumber[t]) {
+                                value += *ptrHisto * kernel[it];
+                            }
+                            ptrHisto += referenceBinNumber[t];
+                            ++index;
+                        }
+                        jointHistoProPtr[r + referenceBinNumber[t] * f] = value;
+                    }
+                }
+            }
             // Normalise the histogram
             double activeVoxel = 0.f;
             for (int i = 0; i < totalBinNumber[t]; ++i)
@@ -316,7 +375,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  double **jointHistogramPro,
                                  double **entropyValues,
                                  const int *referenceMask,
-                                 const int& referenceTimePoint) {
+                                 const int& referenceTimePoint,
+                                 const bool approximatePW) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         reg_getNMIValue<RefImgDataType>(referenceImage,
@@ -328,7 +388,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                         jointHistogramLog,
                                         jointHistogramPro,
                                         entropyValues,
-                                        referenceMask);
+                                        referenceMask,
+                                        approximatePW);
     }, NiftiImage::getFloatingDataType(referenceImage));
 
     double nmi = 0;
@@ -350,7 +411,8 @@ double reg_nmi::GetSimilarityMeasureValueFw() {
                                        this->jointHistogramPro,
                                        this->entropyValues,
                                        this->referenceMask,
-                                       this->referenceTimePoint);
+                                       this->referenceTimePoint,
+                                       this->approximatePW);
 }
 /* *************************************************************** */
 double reg_nmi::GetSimilarityMeasureValueBw() {
@@ -364,7 +426,8 @@ double reg_nmi::GetSimilarityMeasureValueBw() {
                                        this->jointHistogramProBw,
                                        this->entropyValuesBw,
                                        this->floatingMask,
-                                       this->referenceTimePoint);
+                                       this->referenceTimePoint,
+                                       this->approximatePW);
 }
 /* *************************************************************** */
 template <class DataType>
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 84ea55ba..0599a70b 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -65,11 +65,21 @@ class reg_nmi: public reg_measure {
     virtual unsigned short* GetFloatingBinNumber() {
         return this->floatingBinNumber;
     }
+    virtual void SetApproximatePW(bool val) {
+        this->approximatePW = val;
+    }
+    virtual void ApproximatePW() {
+        this->approximatePW = true;
+    }
+    virtual void DoNotApproximatePW() {
+        this->approximatePW = false;
+    }
 
 protected:
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
     unsigned short totalBinNumber[255];
+    bool approximatePW;
     double **jointHistogramPro;
     double **jointHistogramLog;
     double **entropyValues;
@@ -90,7 +100,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                      double **jointHistogramLog,
                      double **jointHistogramPro,
                      double **entropyValues,
-                     const int *referenceMask);
+                     const int *referenceMask,
+                     const bool approximation=true);
 /* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 5e1c99c3..1a55b523 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -37,37 +37,18 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 };
 
     // Create and allocate the control point image
+    // It is initialised with an identity transformation by default
     NiftiImage controlPointGrid;
     reg_createControlPointGrid<float>(controlPointGrid, reference, gridSpacing);
 
-    // The control point position image is initialised with an identity transformation
-    reg_getDeformationFromDisplacement(controlPointGrid);
-
     return controlPointGrid;
 }
 
 NiftiImage CreateDeformationField(const NiftiImage& reference) {
     // Create and allocate a deformation field
-    NiftiImage deformationField(reference, NiftiImage::Copy::ImageInfo);
-    deformationField.setDim(NiftiDim::NDim, 5);
-    if (reference->dim[0] == 2)
-        deformationField.setDim(NiftiDim::Z, 1);
-    deformationField.setDim(NiftiDim::T, 1);
-    deformationField.setPixDim(NiftiDim::T, 1);
-    deformationField.setDim(NiftiDim::U, reference->nz > 1 ? 3 : 2);
-    deformationField.setPixDim(NiftiDim::U, 1);
-    deformationField.setDim(NiftiDim::V, 1);
-    deformationField.setPixDim(NiftiDim::V, 1);
-    deformationField.setDim(NiftiDim::W, 1);
-    deformationField.setPixDim(NiftiDim::W, 1);
-    deformationField->datatype = NIFTI_TYPE_FLOAT32;
-    deformationField->intent_code = NIFTI_INTENT_VECTOR;
-    memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
-    strcpy(deformationField->intent_name, "NREG_TRANS");
-    deformationField->intent_p1 = DISP_FIELD;
-    deformationField->scl_slope = 1;
-    deformationField->scl_inter = 0;
-    deformationField.realloc();
-    reg_getDeformationFromDisplacement(deformationField);
+    // It is initialised with an identity transformation by default
+    NiftiImage deformationField;
+    reg_createDeformationField<float>(deformationField, reference);
+
     return deformationField;
 }
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 860e2520..0e85de0c 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -95,6 +95,7 @@ class NMIGradientTest {
                 unique_ptr<Measure> measure{ platform->CreateMeasure() };
                 // Use NMI as a measure
                 unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
+                measure_nmi->DoNotApproximatePW();
                 measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0);
                 measure->Initialise(*measure_nmi, *content);

From ef4f55b4495bbeb7ce4b0e3fd6e2f6a39cd2aadc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 18:54:36 +0100
Subject: [PATCH 203/314] Fix a bug causing early freeing of the image data

Passing a nifti_image pointer to a NiftiImage causes unwanted freeing of the nifti_image pointer
---
 niftyreg_build_version.txt      | 2 +-
 reg-lib/cpu/_reg_localTrans.cpp | 8 ++++----
 reg-lib/cpu/_reg_localTrans.h   | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3ae0b938..18fdcb2a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-321
+322
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 88402798..8fc4871f 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -364,8 +364,8 @@ template void reg_createSymmetricControlPointGrids<float>(NiftiImage&, NiftiImag
 template void reg_createSymmetricControlPointGrids<double>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 /* *************************************************************** */
 extern "C++" template <class DataType>
-void reg_createDeformationField(NiftiImage & deformationFieldImage,
-                                const NiftiImage & referenceImage) {
+void reg_createDeformationField(NiftiImage& deformationFieldImage,
+                                const nifti_image *referenceImage) {
     // The header information from the reference image are copied over
     deformationFieldImage = nifti_copy_nim_info(referenceImage);
     // The dimension are updated to store the deformation vector along U index
@@ -398,8 +398,8 @@ void reg_createDeformationField(NiftiImage & deformationFieldImage,
     // The displacement field is converted into a deformation field
     reg_getDeformationFromDisplacement(deformationFieldImage);
 }
-template void reg_createDeformationField<float>(NiftiImage&, const NiftiImage&);
-template void reg_createDeformationField<double>(NiftiImage&, const NiftiImage&);
+template void reg_createDeformationField<float>(NiftiImage&, const nifti_image*);
+template void reg_createDeformationField<double>(NiftiImage&, const nifti_image*);
 /* *************************************************************** */
 template<class DataType>
 void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index e3c3008b..3e719aa0 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -55,8 +55,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 * size and orientation.
 */
 extern "C++" template <class DataType>
-void reg_createDeformationField(NiftiImage & deformationFieldImage,
-                                const NiftiImage & referenceImage);
+void reg_createDeformationField(NiftiImage& deformationFieldImage,
+                                const nifti_image *referenceImage);
 /* *************************************************************** */
 /** @brief Compute a dense deformation field in the space of a reference
  * image from a grid of control point.

From 327d516b1afef595ef3c332d41fe4eec500e0ef5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Sep 2023 18:59:59 +0100
Subject: [PATCH 204/314] Refactorisations

---
 niftyreg_build_version.txt            |  2 +-
 reg-io/_reg_ReadWriteMatrix.cpp       | 16 +++----
 reg-io/_reg_ReadWriteMatrix.h         |  9 +---
 reg-lib/Content.cpp                   |  1 -
 reg-lib/Debug.hpp                     |  5 +--
 reg-lib/_reg_aladin.cpp               | 10 ++---
 reg-lib/_reg_f3d.cpp                  |  3 +-
 reg-lib/cpu/_reg_blockMatching.h      |  2 -
 reg-lib/cpu/_reg_dti.h                |  4 +-
 reg-lib/cpu/_reg_globalTrans.h        |  1 -
 reg-lib/cpu/_reg_localTrans.cpp       | 61 +++++++++++----------------
 reg-lib/cpu/_reg_localTrans.h         | 35 ++++-----------
 reg-lib/cpu/_reg_localTrans_jac.cpp   |  3 --
 reg-lib/cpu/_reg_localTrans_jac.h     | 34 ++++-----------
 reg-lib/cpu/_reg_localTrans_regul.cpp |  2 -
 reg-lib/cpu/_reg_localTrans_regul.h   | 12 ------
 reg-lib/cpu/_reg_maths.h              | 31 +++++++-------
 reg-lib/cpu/_reg_maths_eigen.h        |  6 +--
 reg-lib/cpu/_reg_mind.h               |  2 -
 reg-lib/cpu/_reg_mrf.h                |  5 ---
 reg-lib/cpu/_reg_nmi.cpp              | 45 +++++++++-----------
 reg-lib/cpu/_reg_nmi.h                | 10 +----
 reg-lib/cpu/_reg_resampling.h         |  6 ---
 reg-lib/cpu/_reg_splineBasis.h        | 34 +++++++--------
 reg-lib/cpu/_reg_ssd.h                |  4 +-
 reg-lib/cpu/_reg_tools.h              | 53 ++++-------------------
 reg-lib/cuda/_reg_resampling_gpu.h    |  2 -
 reg-lib/cuda/blockMatchingKernel.h    |  1 -
 reg-lib/cuda/optimizeKernel.h         |  7 ---
 29 files changed, 132 insertions(+), 274 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 18fdcb2a..3860ed91 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-322
+323
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 48f8316d..baf0a6f5 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -27,7 +27,7 @@ void reg_tool_ReadAffineFile(mat44 *mat,
     }
     affineFile.close();
 
-    NR_MAT44(*mat, "Read affine transformation");
+    NR_MAT44_DEBUG(*mat, "Read affine transformation");
 
     if (flirtFile) {
         mat44 absoluteReference;
@@ -61,11 +61,11 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0;
 
         NR_DEBUG("An flirt affine file is assumed and is converted to a real word affine matrix");
-        NR_MAT44(*mat, "Matrix read from the input file");
-        NR_MAT44(*referenceMatrix, "Reference Matrix");
-        NR_MAT44(*floatingMatrix, "Floating Matrix");
-        NR_MAT44(absoluteReference, "Reference absolute Matrix");
-        NR_MAT44(absoluteFloating, "Floating absolute Matrix");
+        NR_MAT44_DEBUG(*mat, "Matrix read from the input file");
+        NR_MAT44_DEBUG(*referenceMatrix, "Reference Matrix");
+        NR_MAT44_DEBUG(*floatingMatrix, "Floating Matrix");
+        NR_MAT44_DEBUG(absoluteReference, "Reference absolute Matrix");
+        NR_MAT44_DEBUG(absoluteFloating, "Floating absolute Matrix");
 
         absoluteFloating = nifti_mat44_inverse(absoluteFloating);
         *mat = nifti_mat44_inverse(*mat);
@@ -77,7 +77,7 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         *mat = reg_mat44_mul(mat, &tmp);
     }
 
-    NR_MAT44(*mat, "Affine matrix");
+    NR_MAT44_DEBUG(*mat, "Affine matrix");
 }
 /* *************************************************************** */
 void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) {
@@ -223,7 +223,7 @@ mat44* reg_tool_ReadMat44File(char *fileName) {
     }
     matrixFile.close();
 
-    NR_MAT44(*mat, "mat44 matrix");
+    NR_MAT44_DEBUG(*mat, "mat44 matrix");
 
     return mat;
 }
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index 01e6a5b2..7ad758e8 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -31,7 +31,6 @@
  * @param flirtFile If this flag is set to true the matrix is converted
  * from a Flirt (FSL) parametrisation to a standard parametrisation
  */
-extern "C++"
 void reg_tool_ReadAffineFile(mat44 *mat,
                              nifti_image *referenceImage,
                              nifti_image *floatingImage,
@@ -44,7 +43,6 @@ void reg_tool_ReadAffineFile(mat44 *mat,
 * @param mat structure that store the affine transformation matrix
 * @param filename Filename of the text file that contains the matrix to read
 **/
-extern "C++"
 void reg_tool_ReadAffineFile(mat44 *mat,
                              char *filename);
 
@@ -54,14 +52,12 @@ void reg_tool_ReadAffineFile(mat44 *mat,
 * @param filename Filename of the text file that contains the matrix to read
 * @return mat44 structure that store the matrix
 **/
-extern "C++"
 mat44* reg_tool_ReadMat44File(char *fileName);
 
 /** @brief This function save a 4-by-4 matrix to the disk as a text file
  * @param mat Matrix to be saved on the disk
  * @param filename Name of the text file to save on the disk
  */
-extern "C++"
 void reg_tool_WriteAffineFile(const mat44 *mat,
                               const char *fileName);
 
@@ -70,7 +66,6 @@ void reg_tool_WriteAffineFile(const mat44 *mat,
 * @param filename Filename of the text file that contains the matrix to read
 * @return pair of values that contains the matrix size
 **/
-extern "C++"
 std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename);
 /**
 * @brief Read a file that contains a m-by-n matrix and store it into
@@ -80,7 +75,7 @@ std::pair<size_t, size_t> reg_tool_sizeInputMatrixFile(char *filename);
 * @param nbColumn number of column of the input matrix
 * @return a pointer to a 2D array that points the read matrix
 **/
-extern "C++" template <class T>
+template <class T>
 T** reg_tool_ReadMatrixFile(char *filename,
                             size_t nbLine,
                             size_t nbColumn);
@@ -92,7 +87,7 @@ T** reg_tool_ReadMatrixFile(char *filename,
 * @param nbLine number of line of the input matrix
 * @param nbColumn number of column of the input matrix
 **/
-extern "C++" template <class T>
+template <class T>
 void reg_tool_WriteMatrixFile(char *filename,
                               T **mat,
                               size_t nbLine,
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index 0ecbce6f..ca340144 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -74,7 +74,6 @@ void Content::AllocateDeformationField(size_t bytes) {
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
     deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
-    reg_tools_multiplyValueToImage(deformationField, deformationField, 0.f);
     // Convert to an identity deformation field
     reg_getDeformationFromDisplacement(deformationField);
 }
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
index 6980a3cb..95d1292a 100644
--- a/reg-lib/Debug.hpp
+++ b/reg-lib/Debug.hpp
@@ -77,10 +77,9 @@ inline std::string StripFunctionName(const std::string& funcName) {
 #else
 #define NR_MAT33(mat, title)          reg_mat33_disp(mat, title)
 #define NR_MAT44(mat, title)          reg_mat44_disp(mat, title)
-#define NR_MAT33_DEBUG(mat, title)    
-#define NR_MAT44_DEBUG(mat, title)    
+#define NR_MAT33_DEBUG(mat, title)
+#define NR_MAT44_DEBUG(mat, title)
 #define NR_MAT33_VERBOSE(mat, title)  if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title))
 #define NR_MAT44_VERBOSE(mat, title)  if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title))
-
 #endif
 /* *************************************************************** */
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 60543ebe..37e3619c 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -329,7 +329,7 @@ template<class T>
 void reg_aladin<T>::UpdateTransformationMatrix(int type) {
     this->blockMatchingKernel->template castTo<BlockMatchingKernel>()->Calculate();
     this->ltsKernel->template castTo<LtsKernel>()->Calculate(type);
-    NR_MAT44(*this->affineTransformation, "The updated forward matrix");
+    NR_MAT44_DEBUG(*this->affineTransformation, "The updated forward matrix");
 }
 /* *************************************************************** */
 template<class T>
@@ -381,11 +381,11 @@ void reg_aladin<T>::Run() {
         this->DebugPrintLevelInfoStart();
 
         if (this->con->Content::GetReference()->sform_code > 0)
-            NR_MAT44(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)");
-        else NR_MAT44(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)");
+            NR_MAT44_DEBUG(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)");
+        else NR_MAT44_DEBUG(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)");
         if (this->con->Content::GetFloating()->sform_code > 0)
-            NR_MAT44(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)");
-        else NR_MAT44(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)");
+            NR_MAT44_DEBUG(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)");
+        else NR_MAT44_DEBUG(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)");
 
         /* ****************** */
         /* Rigid registration */
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index c17acd9a..9c4722c0 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -181,9 +181,8 @@ void reg_f3d<T>::Initialise() {
         reg_createControlPointGrid<T>(controlPointGrid, this->referencePyramid[0], gridSpacing);
 
         // The control point grid is updated with an identity transformation
-        if (this->affineTransformation) {
+        if (this->affineTransformation)
             reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid);
-        }
     } else {
         // The control point grid image is initialised with the provided grid
         controlPointGrid = inputControlPointGrid;
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index cedadd9b..9639f43c 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -88,7 +88,6 @@ struct _reg_blockMatchingParam
  * image to consider for the registration
  * @param runningOnGPU Has to be set to true if the registration has to be performed on the GPU
  */
-extern "C++"
 void initialise_block_matching_method(nifti_image * referenceImage,
                                       _reg_blockMatchingParam *params,
                                       int percentToKeep_block,
@@ -104,7 +103,6 @@ void initialise_block_matching_method(nifti_image * referenceImage,
  * relevant information
  * @param mask Mask array where only voxel defined as active are considered
  */
-extern "C++"
 void block_matching_method(nifti_image * referenceImage,
                            nifti_image * warpedImage,
                            _reg_blockMatchingParam *params,
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 3ef169e0..1f96c167 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -59,7 +59,7 @@ class reg_dti: public reg_measure {
  * should be considered. If set to nullptr, all voxels are considered
  * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors
  */
-extern "C++" template <class DataType>
+template <class DataType>
 double reg_getDtiMeasureValue(const nifti_image *referenceImage,
                               const nifti_image *warpedImage,
                               const int *mask,
@@ -74,7 +74,7 @@ double reg_getDtiMeasureValue(const nifti_image *referenceImage,
  * @param mask Array that contains a mask to specify which voxel
  * should be considered. If set to nullptr, all voxels are considered
  */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_getVoxelBasedDtiMeasureGradient(nifti_image *referenceImage,
                                          nifti_image *warpedImage,
                                          nifti_image *warpedGradient,
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index dd771a3b..591ec0ca 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -75,7 +75,6 @@ typedef struct _reg_sorted_point2D _reg_sorted_point2D;
  * @param deformationField Image that contains the deformation field
  * that is being updated
  */
-extern "C++"
 void reg_affine_getDeformationField(mat44 *affine,
                                     nifti_image *deformationField,
                                     bool compose=false,
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 8fc4871f..41d8a6f5 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -35,15 +35,13 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     controlPointGridImage->cal_min = 0;
     controlPointGridImage->cal_max = 0;
     controlPointGridImage->pixdim[0] = 1.0f;
-    controlPointGridImage->pixdim[1] = controlPointGridImage->dx = spacing[0];
-    controlPointGridImage->pixdim[2] = controlPointGridImage->dy = spacing[1];
-    if (referenceImage->nz == 1) {
-        controlPointGridImage->pixdim[3] = controlPointGridImage->dz = 1.0f;
-    } else controlPointGridImage->pixdim[3] = controlPointGridImage->dz = spacing[2];
-    controlPointGridImage->pixdim[4] = controlPointGridImage->dt = 1.0f;
-    controlPointGridImage->pixdim[5] = controlPointGridImage->du = 1.0f;
-    controlPointGridImage->pixdim[6] = controlPointGridImage->dv = 1.0f;
-    controlPointGridImage->pixdim[7] = controlPointGridImage->dw = 1.0f;
+    controlPointGridImage.setPixDim(NiftiDim::X, spacing[0]);
+    controlPointGridImage.setPixDim(NiftiDim::Y, spacing[1]);
+    controlPointGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f);
+    controlPointGridImage.setPixDim(NiftiDim::T, 1.0f);
+    controlPointGridImage.setPixDim(NiftiDim::U, 1.0f);
+    controlPointGridImage.setPixDim(NiftiDim::V, 1.0f);
+    controlPointGridImage.setPixDim(NiftiDim::W, 1.0f);
 
     // Reproduce the orientation of the reference image and add a one voxel shift
     if (referenceImage->qform_code + referenceImage->sform_code > 0) {
@@ -80,7 +78,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     originIndex[1] = -1.0f;
     originIndex[2] = 0.0f;
     if (referenceImage->nz > 1) originIndex[2] = -1.0f;
-    reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal);
+    reg_mat44_mul(&controlPointGridImage->qto_xyz, originIndex, originReal);
     controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0];
     controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1];
     controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2];
@@ -112,7 +110,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
         controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3];
 
         // Origin is shifted from 1 control point in the sform
-        reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal);
+        reg_mat44_mul(&controlPointGridImage->sto_xyz, originIndex, originReal);
         controlPointGridImage->sto_xyz.m[0][3] = originReal[0];
         controlPointGridImage->sto_xyz.m[1][3] = originReal[1];
         controlPointGridImage->sto_xyz.m[2][3] = originReal[2];
@@ -120,11 +118,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     }
 
     // The grid is initialised with an identity transformation
-    reg_tools_multiplyValueToImage(controlPointGridImage, controlPointGridImage, 0.f);
     reg_getDeformationFromDisplacement(controlPointGridImage);
     controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR;
-    memset(controlPointGridImage->intent_name, 0, 16);
-    strcpy(controlPointGridImage->intent_name, "NREG_TRANS");
+    controlPointGridImage.setIntentName("NREG_TRANS"s);
     controlPointGridImage->intent_p1 = CUB_SPLINE_GRID;
 }
 template void reg_createControlPointGrid<float>(NiftiImage&, const NiftiImage&, const float*);
@@ -142,12 +138,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     mat44 referenceImageSpace = referenceImage->qto_xyz;
     if (referenceImage->sform_code > 0)
         referenceImageSpace = referenceImage->sto_xyz;
-    NR_MAT44(referenceImageSpace, "Input reference image orientation");
+    NR_MAT44_DEBUG(referenceImageSpace, "Input reference image orientation");
     // // Get the floating image space
     mat44 floatingImageSpace = floatingImage->qto_xyz;
     if (floatingImage->sform_code > 0)
         floatingImageSpace = floatingImage->sto_xyz;
-    NR_MAT44(floatingImageSpace, "Input floating image orientation");
+    NR_MAT44_DEBUG(floatingImageSpace, "Input floating image orientation");
     // Check if an affine transformation is specified
     mat44 halfForwardAffine, halfBackwardAffine;
     if (forwardAffineTrans != nullptr) {
@@ -290,10 +286,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64);
 
     // Set the control point grid spacing
-    forwardGridImage->pixdim[1] = forwardGridImage->dx = backwardGridImage->pixdim[1] = backwardGridImage->dx = spacing[0];
-    forwardGridImage->pixdim[2] = forwardGridImage->dy = backwardGridImage->pixdim[2] = backwardGridImage->dy = spacing[1];
-    if (referenceImage->nz > 1)
-        forwardGridImage->pixdim[3] = forwardGridImage->dz = backwardGridImage->pixdim[3] = backwardGridImage->dz = spacing[2];
+    forwardGridImage.setPixDim(NiftiDim::X, spacing[0]);
+    backwardGridImage.setPixDim(NiftiDim::X, spacing[0]);
+    forwardGridImage.setPixDim(NiftiDim::Y, spacing[1]);
+    backwardGridImage.setPixDim(NiftiDim::Y, spacing[1]);
+    forwardGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f);
+    backwardGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f);
     // Set the control point grid image orientation
     forwardGridImage->qform_code = backwardGridImage->qform_code = 0;
     forwardGridImage->sform_code = backwardGridImage->sform_code = 1;
@@ -313,10 +311,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     forwardGridImage->sto_ijk = backwardGridImage->sto_ijk = nifti_mat44_inverse(forwardGridImage->sto_xyz);
     // Set the intent type
     forwardGridImage->intent_code = backwardGridImage->intent_code = NIFTI_INTENT_VECTOR;
-    memset(forwardGridImage->intent_name, 0, 16);
-    memset(backwardGridImage->intent_name, 0, 16);
-    strcpy(forwardGridImage->intent_name, "NREG_TRANS");
-    strcpy(backwardGridImage->intent_name, "NREG_TRANS");
+    forwardGridImage.setIntentName("NREG_TRANS"s);
+    backwardGridImage.setIntentName("NREG_TRANS"s);
     forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID;
     // Set the affine matrices
     mat44 identity;
@@ -339,7 +335,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float));
         memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44));
         memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44));
-        NR_MAT44(halfForwardAffine, "Forward transformation half-affine");
+        NR_MAT44_DEBUG(halfForwardAffine, "Forward transformation half-affine");
         // Create extensions to store the affine parametrisations for the backward transformation
         backwardGridImage->num_ext = 2;
         backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension));
@@ -351,11 +347,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float));
         memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44));
         memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44));
-        NR_MAT44(halfBackwardAffine, "Backward transformation half-affine");
+        NR_MAT44_DEBUG(halfBackwardAffine, "Backward transformation half-affine");
     }
-    // Initialise the grid with identity transformations
-    reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f);
-    reg_tools_multiplyValueToImage(backwardGridImage, backwardGridImage, 0.f);
     // Convert the parametrisations into deformation fields
     reg_getDeformationFromDisplacement(forwardGridImage);
     reg_getDeformationFromDisplacement(backwardGridImage);
@@ -363,11 +356,11 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 template void reg_createSymmetricControlPointGrids<float>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 template void reg_createSymmetricControlPointGrids<double>(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*);
 /* *************************************************************** */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_createDeformationField(NiftiImage& deformationFieldImage,
                                 const nifti_image *referenceImage) {
     // The header information from the reference image are copied over
-    deformationFieldImage = nifti_copy_nim_info(referenceImage);
+    deformationFieldImage = NiftiImage(const_cast<nifti_image*>(referenceImage), NiftiImage::Copy::ImageInfo);
     // The dimension are updated to store the deformation vector along U index
     // in a 5D image
     deformationFieldImage.setDim(NiftiDim::NDim, 5);
@@ -390,10 +383,8 @@ void reg_createDeformationField(NiftiImage& deformationFieldImage,
     deformationFieldImage->scl_slope = 1;
     deformationFieldImage->scl_inter = 0;
 
-    // The data is allocated given the new size
+    // The data is allocated given the new size and filled in with zero to represent an identity displacement field
     deformationFieldImage.realloc();
-    // The image is filled in with zero to represent an identity displacement field
-    reg_tools_multiplyValueToImage(deformationFieldImage, deformationFieldImage, 0.f);
     deformationFieldImage->intent_p1 = DISP_FIELD;
     // The displacement field is converted into a deformation field
     reg_getDeformationFromDisplacement(deformationFieldImage);
@@ -1699,7 +1690,6 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
     } // loop over z
 }
 /* *************************************************************** */
-extern "C++"
 void reg_voxelCentric2NodeCentric(nifti_image * nodeImage,
                                   nifti_image * voxelImage,
                                   float weight,
@@ -2148,7 +2138,6 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
     free(oldGrid);
 }
 /* *************************************************************** */
-extern "C++"
 void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
                                        nifti_image *referenceImage) {
     NR_DEBUG("Starting the refine the control point grid");
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index 3e719aa0..d3d8d28c 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -16,14 +16,12 @@
 
 #pragma once
 
-#include "float.h"
 #include "_reg_globalTrans.h"
 #include "_reg_splineBasis.h"
 
-/* *********************************************** */
-/* ****      CUBIC SPLINE BASED FUNCTIONS     **** */
-/* *********************************************** */
-
+/* *************************************************************** */
+/* ****              CUBIC SPLINE BASED FUNCTIONS             **** */
+/* *************************************************************** */
 /* *************************************************************** */
 /** @brief Generate a control point grid image based on the dimension of a
  * reference image and on a spacing.
@@ -35,12 +33,12 @@
  * define the control point grid image space
  * @param spacing Control point spacing along each axis
  */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
                                 const NiftiImage& referenceImage,
                                 const float *spacing);
 
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
                                           NiftiImage& backwardGridImage,
                                           const NiftiImage& referenceImage,
@@ -54,7 +52,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 * @param referenceImage Image used to specify the deformation field
 * size and orientation.
 */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_createDeformationField(NiftiImage& deformationFieldImage,
                                 const nifti_image *referenceImage);
 /* *************************************************************** */
@@ -70,7 +68,6 @@ void reg_createDeformationField(NiftiImage& deformationFieldImage,
  * @param bspline A cubic B-Spline scheme is used if the value is set to true,
  * a cubic spline scheme is used otherwise (interpolant spline).
  */
-extern "C++"
 void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
                                     nifti_image *deformationField,
                                     int *mask = nullptr,
@@ -90,7 +87,6 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
  * @param update The values in node image will be incremented if
  * update is set to true; a blank node image is considered otherwise
  */
-extern "C++"
 void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
                                   nifti_image *voxelImage,
                                   float weight,
@@ -103,7 +99,6 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
  * @param controlPointGridImage This control point grid will be refined
  * by dividing the control point spacing by a ratio of 2
  */
-extern "C++"
 void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage,
                                        nifti_image *referenceImage = nullptr);
 /* *************************************************************** */
@@ -121,7 +116,6 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage,
  * @param Cubic B-Spline can be used (bspline==true)
  * or cubic Spline (bspline==false)
  */
-extern "C++"
 int reg_spline_cppComposition(nifti_image *grid1,
                               nifti_image *grid2,
                               bool displacement1,
@@ -140,7 +134,6 @@ int reg_spline_cppComposition(nifti_image *grid1,
  * within the mask will be updated. All positive values in the maks
  * are considered as belonging to the mask.
  */
-extern "C++"
 void reg_defField_compose(nifti_image *deformationField,
                           nifti_image *dfToUpdate,
                           int *mask);
@@ -154,12 +147,10 @@ void reg_defField_compose(nifti_image *deformationField,
  * @param tolerance Tolerance value for the optimisation. Set to nan
  * for the default value.
  */
-extern "C++"
 void reg_defFieldInvert(nifti_image *inputDeformationField,
                         nifti_image *outputDeformationField,
                         float tolerance);
 /* *************************************************************** */
-extern "C++"
 void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
                                                    nifti_image *deformationFieldImage,
                                                    const bool updateStepNumber);
@@ -171,25 +162,19 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage,
  * @param deformationFieldImage Deformation field image that will
  * be filled using the exponentiation of the velocity field.
  */
-extern "C++"
 void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                             nifti_image *deformationFieldImage,
                                             const bool updateStepNumber);
 /* *************************************************************** */
-extern "C++"
 void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
                                                    nifti_image **deformationFieldImage);
 /* *************************************************************** */
-extern "C++"
 void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                              nifti_image *flowField);
 /* *************************************************************** */
-
-
-/* *********************************************** */
-/* ****            OTHER FUNCTIONS            **** */
-/* *********************************************** */
-
+/* *************************************************************** */
+/* ****                    OTHER FUNCTIONS                    **** */
+/* *************************************************************** */
 /* *************************************************************** */
 /** @brief This function compute the BCH update using an initial velocity field
  * and its gradient.
@@ -204,7 +189,6 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
  * 3 - w=u+v+0.5*[u,v]+[u,[u,v]]/12-[v,[u,v]]/12
  * 4 - w=u+v+0.5*[u,v]+[u,[u,v]]/12-[v,[u,v]]/12-[v,[u,[u,g]]]/24
  */
-extern "C++"
 void compute_BCH_update(nifti_image *img1,
                         nifti_image *img2,
                         int type);
@@ -213,6 +197,5 @@ void compute_BCH_update(nifti_image *img1,
  * in order to get cubic B-Spline coefficient
  * @param img Image to be deconvolved
  */
-extern "C++"
 void reg_spline_getDeconvolvedCoefficents(nifti_image *img);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 62acf252..26678dde 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -1200,7 +1200,6 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
    return;
 }
 /* *************************************************************** */
-extern "C++"
 double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
                                          nifti_image *referenceImage,
                                          bool approximation,
@@ -1831,7 +1830,6 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    free(jacobianDeterminant);
 }
 /* *************************************************************** */
-extern "C++"
 void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint,
                                                nifti_image *referenceImage,
                                                nifti_image *gradientImage,
@@ -2435,7 +2433,6 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-extern "C++"
 double reg_spline_correctFolding(nifti_image *splineControlPoint,
                                  nifti_image *referenceImage,
                                  bool approx)
diff --git a/reg-lib/cpu/_reg_localTrans_jac.h b/reg-lib/cpu/_reg_localTrans_jac.h
index 0db8d485..990f3b92 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.h
+++ b/reg-lib/cpu/_reg_localTrans_jac.h
@@ -20,12 +20,10 @@
  * @param controlPointGridImage Image that contains the transformation
  * parametrisation.
  * @param jacobianImage Image that will be populated with the determinant
- * of the Jacobian matrix of the transformation at every voxel posision.
+ * of the Jacobian matrix of the transformation at every voxel position.
  */
-extern "C++"
 void reg_spline_GetJacobianMap(nifti_image *controlPointGridImage,
-                               nifti_image *jacobianImage
-                               );
+                               nifti_image *jacobianImage);
 /* *************************************************************** */
 /** @brief Compute the average Jacobian determinant
  * @param controlPointGridImage Image that contains the transformation
@@ -36,12 +34,10 @@ void reg_spline_GetJacobianMap(nifti_image *controlPointGridImage,
  * only the information from the control point if the value is set to true;
  * all voxels are considered if the value is set to false.
  */
-extern "C++"
 double reg_spline_getJacobianPenaltyTerm(nifti_image *controlPointGridImage,
                                          nifti_image *referenceImage,
                                          bool approx,
-                                         bool useHeaderInformation=false
-      );
+                                         bool useHeaderInformation=false);
 /* *************************************************************** */
 /** @brief Compute the gradient at every control point position of the
  * Jacobian determinant based penalty term
@@ -59,14 +55,12 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *controlPointGridImage,
  * from the control point if the value is set to true; all voxels are
  * considered if the value is set to false.
  */
-extern "C++"
 void reg_spline_getJacobianPenaltyTermGradient(nifti_image *controlPointGridImage,
                                                nifti_image *referenceImage,
                                                nifti_image *gradientImage,
                                                float weight,
                                                bool approx,
-                                               bool useHeaderInformation=false
-      );
+                                               bool useHeaderInformation=false);
 /* *************************************************************** */
 /** @brief Compute the Jacobian matrix at every voxel position
  * using a cubic b-spline parametrisation. This function does require
@@ -78,11 +72,9 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *controlPointGridImag
  * @param jacobianImage Array that is filled with the Jacobian matrices
  * for every voxel.
  */
-extern "C++"
 void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
                                   nifti_image *controlPointGridImage,
-                                  mat33 *jacobianImage
-                                  );
+                                  mat33 *jacobianImage);
 /* *************************************************************** */
 /** @brief Correct the folding in the transformation parametrised through
  * cubic B-Spline
@@ -92,11 +84,9 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage,
  * @param approx The function can be run be considering only the control
  * point position (approx==false) or every voxel (approx==true)
  */
-extern "C++"
 double reg_spline_correctFolding(nifti_image *controlPointGridImage,
                                  nifti_image *referenceImage,
-                                 bool approx
-                                 );
+                                 bool approx);
 /* *************************************************************** */
 /** @brief Compute the Jacobian determinant at every voxel position
  * from a deformation field. A linear interpolation is
@@ -105,7 +95,6 @@ double reg_spline_correctFolding(nifti_image *controlPointGridImage,
  * @param jacobianImage This image will be fill with the Jacobian
  * determinant of the transformation of every voxel.
  */
-extern "C++"
 void reg_defField_getJacobianMap(nifti_image *deformationField,
                                  nifti_image *jacobianImage);
 /* *************************************************************** */
@@ -116,7 +105,6 @@ void reg_defField_getJacobianMap(nifti_image *deformationField,
  * @param jacobianMatrices This array will be fill with the Jacobian
  * matrices of the transformation of every voxel.
  */
-extern "C++"
 void reg_defField_getJacobianMatrix(nifti_image *deformationField,
                                     mat33 *jacobianMatrices);
 /* *************************************************************** */
@@ -129,14 +117,11 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField,
  * @param jacobianMatrices Array of matrices that will be filled with
  * the Jacobian matrices of the transformation
  */
-extern "C++"
 int reg_defField_GetJacobianMatFromFlowField(mat33* jacobianMatrices,
                                              nifti_image *flowFieldImage);
-extern "C++"
 int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
                                               nifti_image *velocityGridImage,
-                                              nifti_image *referenceImage
-                                              );
+                                              nifti_image *referenceImage);
 /* *************************************************************** */
 /** @brief This function computed a Jacobian determinant map by integrating
  * the velocity grid
@@ -145,11 +130,8 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices,
  * @param velocityFieldImage Image that contains a velocity field
  * parametrised using a grid of control points
  */
-extern "C++"
 int reg_defField_GetJacobianDetFromFlowField(nifti_image *jacobianDetImage,
-                                             nifti_image *flowFieldImage
-                                             );
-extern "C++"
+                                             nifti_image *flowFieldImage);
 int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image *jacobianDetImage,
                                               nifti_image *velocityGridImage);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 4ecd3c77..8edc51be 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -152,7 +152,6 @@ double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoi
     return constraintValue / (double)splineControlPoint->nvox;
 }
 /* *************************************************************** */
-extern "C++"
 double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) {
     if (splineControlPoint->nz == 1) {
         switch (splineControlPoint->datatype) {
@@ -451,7 +450,6 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
     reg_getDeformationFromDisplacement(splineControlPoint);
 }
 /* *************************************************************** */
-extern "C++"
 void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint,
                                             nifti_image *gradientImage,
                                             float weight) {
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 237a06c1..1c929167 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -23,7 +23,6 @@
  * parametrisation
  * @return The normalised bending energy. Normalised by the number of voxel
  */
-extern "C++"
 double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage);
 /* *************************************************************** */
 /** @brief Compute and return the approximated (at the control point position)
@@ -35,7 +34,6 @@ double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage);
  * at every control point position.
  * @param weight Scalar which will be multiplied by the bending-energy gradient
  */
-extern "C++"
 void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage,
                                             nifti_image *gradientImage,
                                             float weight);
@@ -45,7 +43,6 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage,
  * parametrisation
  * @return The normalised linear energy. Normalised by the number of voxel
  */
-extern "C++"
 double reg_spline_linearEnergy(const nifti_image *referenceImage,
                                const nifti_image *controlPointGridImage);
 /* *************************************************************** */
@@ -55,7 +52,6 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage,
  * parametrisation
  * @return The normalised linear energy. Normalised by the number of voxel
  */
-extern "C++"
 double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage);
 /* *************************************************************** */
 /** @brief Compute the gradient of the linear elastic energy terms
@@ -69,7 +65,6 @@ double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage);
  * current values
  * @param weight Weight to apply to the term of the penalty
  */
-extern "C++"
 void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
                                      const nifti_image *controlPointGridImage,
                                      nifti_image *gradientImage,
@@ -85,7 +80,6 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
  * current values
  * @param weight Weight to apply to the term of the penalty
  */
-extern "C++"
 void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridImage,
                                            nifti_image *gradientImage,
                                            float weight);
@@ -94,14 +88,12 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridIm
  * @param deformationField Image that contains the transformation.
  * @return The normalised linear energy. Normalised by the number of voxel
  */
-extern "C++"
 double reg_defField_linearEnergy(const nifti_image *deformationField);
 /* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms.
  * @param deformationField Image that contains the transformation.
  * @param weight Weight to apply to the term of the penalty
  */
-extern "C++"
 void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
                                        nifti_image *gradientImage,
                                        float weight);
@@ -114,7 +106,6 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
  * @param landmarkReference Landmark in the reference image
  * @param landmarkFloating Landmark in the floating image
  */
-extern "C++"
 double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
                                       size_t landmarkNumber,
                                       float *landmarkReference,
@@ -131,7 +122,6 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage,
  * @param landmarkFloating Landmark in the floating image
  * @param weight weight to apply to the gradient
  */
-extern "C++"
 void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage,
                                             nifti_image *gradientImage,
                                             size_t landmarkNumber,
@@ -144,11 +134,9 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
  * parametrisation
  * @return The normalised pairwise energy. Normalised by the number of voxel
  */
-extern "C++"
 void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage,
                                              nifti_image *gradientImage,
                                              float weight);
 /* *************************************************************** */
-extern "C++"
 double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 2eac28f9..6a35bd6d 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -72,53 +72,53 @@ DEVICE inline int Round(const T& x) {
 /* *************************************************************** */
 } // namespace NiftyReg
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void reg_LUdecomposition(T *inputMatrix,
                          size_t dim,
                          size_t *index);
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void reg_matrixMultiply(T *mat1,
                         T *mat2,
                         size_t *dim1,
                         size_t *dim2,
                         T * &res);
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void reg_matrixInvertMultiply(T *mat,
                               size_t dim,
                               size_t *index,
                               T *vec);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T* reg_matrix1DAllocate(size_t arraySize);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T* reg_matrix1DAllocateAndInitToZero(size_t arraySize);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 void reg_matrix1DDeallocate(T* mat);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 void reg_matrix2DDeallocate(size_t arraySizeX, T** mat);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
-extern "C++" template<class T>
+template<class T>
 void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** res, bool transposeMat2);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect);
-extern "C++" template<class T>
+template<class T>
 void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res);
 /* *************************************************************** */
 /** @brief Add two 3-by-3 matrices
@@ -165,10 +165,9 @@ void reg_mat33_to_nan(mat33 *A);
 /** @brief Transform a mat44 to a mat33 matrix
 */
 mat33 reg_mat44_to_mat33(mat44 const* A);
-extern "C++"
 void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum);
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void reg_heapSort(T *array_tmp,int blockNum);
 /* *************************************************************** */
 bool operator==(mat44 A,mat44 B);
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
index 16c079c4..ce326b47 100644
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ b/reg-lib/cpu/_reg_maths_eigen.h
@@ -8,13 +8,13 @@
 /* *************************************************************** */
 
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void svd(T **in, size_t m, size_t n, T * w, T **v);
 /* *************************************************************** */
-extern "C++" template <class T>
+template <class T>
 void svd(T **in, size_t m, size_t n, T ***U, T ***S, T ***V);
 /* *************************************************************** */
-extern "C++" template<class T>
+template<class T>
 T reg_matrix2DDet(T** mat, size_t m, size_t n);
 /* *************************************************************** */
 /** @brief Compute the inverse of a  4-by-4 matrix
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index c1db52e6..92e08eeb 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -71,14 +71,12 @@ class reg_mindssc: public reg_mind {
     virtual ~reg_mindssc();
 };
 /* *************************************************************** */
-extern "C++"
 void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
                             const int& descriptorOffset,
                             const int& currentTimepoint);
 /* *************************************************************** */
-extern "C++"
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h
index 75a91ea4..9471d41a 100644
--- a/reg-lib/cpu/_reg_mrf.h
+++ b/reg-lib/cpu/_reg_mrf.h
@@ -102,23 +102,18 @@ class reg_mrf
    bool initialised; ///< Variable to access if the object has been initialised
 };
 /********************************************************************************************************/
-extern "C++"
 template <class DataType>
 void GetGraph_core3D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      float* index_neighbours,
                      nifti_image *refImage,
                      int *mask);
-extern "C++"
 template <class DataType>
 void GetGraph_core2D(nifti_image* controlPointGridImage,
                      float* edgeWeightMatrix,
                      float* index_neighbours,
                      nifti_image *refImage,
                      int *mask);
-
-extern "C++"
 void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1);
-extern "C++"
 void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz);
 /********************************************************************************************************/
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index e3eee290..cd309712 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -225,8 +225,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                 // No approximation is used for the Parzen windowing
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                     if (referenceMask[voxel] > -1) {
-                        const DataType refValue = refPtr[voxel];
-                        const DataType warValue = warPtr[voxel];
+                        const DataType& refValue = refPtr[voxel];
+                        const DataType& warValue = warPtr[voxel];
                         if (refValue == refValue && warValue == warValue) {
                             for (int r = int(refValue - 1); r < int(refValue + 3); ++r) {
                                 if (0 <= r && r < referenceBinNumber[t]) {
@@ -242,8 +242,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                         }
                     }
                 }
-            }
-            else {
+            } else {
                 // An approximation is used for the Parzen windowing. First intensities are binarised then
                 // the histogram is convolved with a spine kernel function.
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
@@ -267,12 +266,11 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                     for (int r = 0; r < referenceBinNumber[t]; ++r) {
                         double value = 0;
                         int index = r - 1;
-                        double* ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f];
+                        double *ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f];
 
                         for (int it = 0; it < 3; it++) {
-                            if (-1 < index && index < referenceBinNumber[t]) {
+                            if (-1 < index && index < referenceBinNumber[t])
                                 value += *ptrHisto * kernel[it];
-                            }
                             ++ptrHisto;
                             ++index;
                         }
@@ -282,14 +280,13 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                 // Histogram is then smooth along the warped floating axis
                 for (int r = 0; r < referenceBinNumber[t]; ++r) {
                     for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                        double value = 0.;
+                        double value = 0;
                         int index = f - 1;
-                        double* ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index];
+                        double *ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index];
 
                         for (int it = 0; it < 3; it++) {
-                            if (-1 < index && index < floatingBinNumber[t]) {
+                            if (-1 < index && index < floatingBinNumber[t])
                                 value += *ptrHisto * kernel[it];
-                            }
                             ptrHisto += referenceBinNumber[t];
                             ++index;
                         }
@@ -298,7 +295,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                 }
             }
             // Normalise the histogram
-            double activeVoxel = 0.f;
+            double activeVoxel = 0;
             for (int i = 0; i < totalBinNumber[t]; ++i)
                 activeVoxel += jointHistoProPtr[i];
             entropyValues[t][3] = activeVoxel;
@@ -306,7 +303,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                 jointHistoProPtr[i] /= activeVoxel;
             // Marginalise over the reference axis
             for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                double sum = 0.;
+                double sum = 0;
                 int index = r;
                 for (int f = 0; f < floatingBinNumber[t]; ++f) {
                     sum += jointHistoProPtr[index];
@@ -317,7 +314,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             }
             // Marginalise over the warped floating axis
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                double sum = 0.;
+                double sum = 0;
                 int index = referenceBinNumber[t] * f;
                 for (int r = 0; r < referenceBinNumber[t]; ++r) {
                     sum += jointHistoProPtr[index];
@@ -328,7 +325,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             // Set the log values to zero
             memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
             // Compute the entropy of the reference image
-            double referenceEntropy = 0.;
+            double referenceEntropy = 0;
             for (int r = 0; r < referenceBinNumber[t]; ++r) {
                 double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
                 if (valPro > 0) {
@@ -339,7 +336,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             }
             entropyValues[t][0] = referenceEntropy;
             // Compute the entropy of the warped floating image
-            double warpedEntropy = 0.;
+            double warpedEntropy = 0;
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
                 double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] +
                     referenceBinNumber[t] + f];
@@ -351,7 +348,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             }
             entropyValues[t][1] = warpedEntropy;
             // Compute the joint entropy
-            double jointEntropy = 0.;
+            double jointEntropy = 0;
             for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) {
                 double valPro = jointHistoProPtr[i];
                 if (valPro > 0) {
@@ -375,7 +372,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  double **jointHistogramPro,
                                  double **entropyValues,
                                  const int *referenceMask,
-                                 const int& referenceTimePoint,
+                                 const int referenceTimePoint,
                                  const bool approximatePW) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -440,8 +437,8 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     const int *referenceMask,
-                                    const int& currentTimepoint,
-                                    const double& timepointWeight) {
+                                    const int currentTimepoint,
+                                    const double timepointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2);
@@ -526,8 +523,8 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                                     const nifti_image *warpedGradient,
                                     nifti_image *measureGradientImage,
                                     const int *referenceMask,
-                                    const int& currentTimepoint,
-                                    const double& timepointWeight) {
+                                    const int currentTimepoint,
+                                    const double timepointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -619,8 +616,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             const nifti_image *warpedGradient,
                                             nifti_image *voxelBasedGradient,
                                             const int *referenceMask,
-                                            const int& currentTimepoint,
-                                            const double& timepointWeight) {
+                                            const int currentTimepoint,
+                                            const double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d<RefImgDataType> : reg_getVoxelBasedNmiGradient2d<RefImgDataType>;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 0599a70b..41040e48 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -65,9 +65,6 @@ class reg_nmi: public reg_measure {
     virtual unsigned short* GetFloatingBinNumber() {
         return this->floatingBinNumber;
     }
-    virtual void SetApproximatePW(bool val) {
-        this->approximatePW = val;
-    }
     virtual void ApproximatePW() {
         this->approximatePW = true;
     }
@@ -76,10 +73,10 @@ class reg_nmi: public reg_measure {
     }
 
 protected:
+    bool approximatePW;
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
     unsigned short totalBinNumber[255];
-    bool approximatePW;
     double **jointHistogramPro;
     double **jointHistogramLog;
     double **entropyValues;
@@ -90,7 +87,7 @@ class reg_nmi: public reg_measure {
     void DeallocateHistogram();
 };
 /* *************************************************************** */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_getNMIValue(const nifti_image *referenceImage,
                      const nifti_image *warpedImage,
                      const double *timePointWeight,
@@ -251,7 +248,6 @@ class reg_multichannel_nmi: public reg_measure {
 };
 /* *************************************************************** */
 /// Multi channel NMI version - Entropy
-extern "C++"
 void reg_getMultiChannelNmiValue(nifti_image *referenceImages,
                                  nifti_image *warpedImages,
                                  unsigned *referenceBins, // should be an array of size num_reference_volumes
@@ -263,7 +259,6 @@ void reg_getMultiChannelNmiValue(nifti_image *referenceImages,
                                  bool approx);
 /* *************************************************************** */
 /// Multi channel NMI version - Gradient
-extern "C++"
 void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
@@ -276,7 +271,6 @@ void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages,
                                                 bool approx);
 /* *************************************************************** */
 /// Multi channel NMI version - Gradient
-extern "C++"
 void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages,
                                                 nifti_image *warpedImages,
                                                 nifti_image *warpedImageGradient,
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index f69c4cf4..ab39078f 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -33,7 +33,6 @@
  * @param dtIndicies Array of 6 integers that correspond to the "time" indicies of the diffusion tensor
  * components in the order xx,yy,zz,xy,xz,yz. If there are no DT images, pass an array of -1's
  */
-extern "C++"
 void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
                        const nifti_image *deformationField,
@@ -43,7 +42,6 @@ void reg_resampleImage(nifti_image *floatingImage,
                        const bool *dtiTimepoint = nullptr,
                        const mat33 *jacMat = nullptr);
 /* *************************************************************** */
-extern "C++"
 void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            nifti_image *warpedImage,
                            const nifti_image *deformationField,
@@ -53,14 +51,12 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            const mat33 *jacMat,
                            const char& algorithm);
 /* *************************************************************** */
-extern "C++"
 void reg_resampleGradient(const nifti_image *gradientImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
                           const int& interp,
                           const float& paddingValue);
 /* *************************************************************** */
-extern "C++"
 void reg_getImageGradient(nifti_image *floatingImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
@@ -72,13 +68,11 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           const mat33 *jacMat = nullptr,
                           const nifti_image *warpedImage = nullptr);
 /* *************************************************************** */
-extern "C++"
 void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
                                   const float& paddingValue,
                                   const int& timepoint);
 /* *************************************************************** */
-extern "C++"
 nifti_image* reg_makeIsotropic(nifti_image*, int);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h
index 5436ea7e..8a0afe2d 100755
--- a/reg-lib/cpu/_reg_splineBasis.h
+++ b/reg-lib/cpu/_reg_splineBasis.h
@@ -16,50 +16,50 @@
 #include "_reg_tools.h"
 
 
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValues(DataType basis,
                             DataType *values);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValues(DataType basis,
                             DataType *values,
                             DataType *first);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValues(DataType basis,
                             DataType *values,
                             DataType *first,
                             DataType *second);
 
 
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValue(DataType basis,
                            int index,
                            DataType &value);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValue(DataType basis,
                            int index,
                            DataType &value,
                            DataType &first);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_BSplineBasisValue(DataType basis,
                            int index,
                            DataType &value,
                            DataType &first,
                            DataType &second);
 
-extern "C++" template <class DataType>
+template <class DataType>
 void set_first_order_basis_values(DataType *basisX,
                                   DataType *basisY);
 
-extern "C++" template <class DataType>
+template <class DataType>
 void set_first_order_basis_values(DataType *basisX,
                                   DataType *basisY,
                                   DataType *basisZ);
 
-extern "C++" template <class DataType>
+template <class DataType>
 void set_second_order_bspline_basis_values(DataType *basisXX,
                                            DataType *basisYY,
                                            DataType *basisXY);
-extern "C++" template <class DataType>
+template <class DataType>
 void set_second_order_bspline_basis_values(DataType *basisXX,
                                            DataType *basisYY,
                                            DataType *basisZZ,
@@ -68,20 +68,20 @@ void set_second_order_bspline_basis_values(DataType *basisXX,
                                            DataType *basisXZ);
 
 
-extern "C++" template<class DataType>
+template<class DataType>
 void get_SplineBasisValues(DataType basis,
                            DataType *values);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_SplineBasisValues(DataType basis,
                            DataType *values,
                            DataType *first);
-extern "C++" template<class DataType>
+template<class DataType>
 void get_SplineBasisValues(DataType basis,
                            DataType *values,
                            DataType *first,
                            DataType *second);
 
-extern "C++" template <class DataType>
+template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
                       int X,
@@ -91,7 +91,7 @@ void get_SlidedValues(DataType &defX,
                       mat44 *df_voxel2Real,
                       int *dim,
                       bool displacement);
-extern "C++" template <class DataType>
+template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
                       DataType &defZ,
@@ -106,7 +106,7 @@ void get_SlidedValues(DataType &defX,
                       bool displacement);
 
 
-extern "C++" template <class DataType>
+template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     nifti_image *splineControlPoint,
@@ -116,7 +116,7 @@ void get_GridValues(int startX,
                     DataType *dispY,
                     bool approx,
                     bool displacement);
-extern "C++" template <class DataType>
+template <class DataType>
 void get_GridValues(int startX,
                     int startY,
                     int startZ,
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 9a27c185..008178a4 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -70,7 +70,7 @@ class reg_ssd: public reg_measure {
  * @param localWeightSim Image that contains the local weight similarity
  * @return Returns the computed sum squared difference
  */
-extern "C++" template <class DataType>
+template <class DataType>
 double reg_getSsdValue(const nifti_image *referenceImage,
                        const nifti_image *warpedImage,
                        const double *timePointWeight,
@@ -94,7 +94,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
  * @param timepointWeight Weight of the specified time point
  * @param localWeightSim Image that contains the local weight similarity
  */
-extern "C++" template <class DataType>
+template <class DataType>
 void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   const nifti_image *warpedImage,
                                   const nifti_image *warpedGradient,
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 8b246513..d776017f 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -46,7 +46,6 @@ typedef enum {
  * both qform_code and sform_code are set to zero.
  * @param image Input image to check and correct if necessary
  */
-extern "C++"
 void reg_checkAndCorrectDimension(nifti_image *image);
 /* *************************************************************** */
 /** @brief Check if the specified filename corresponds to an image.
@@ -54,7 +53,6 @@ void reg_checkAndCorrectDimension(nifti_image *image);
  * @return True is the specified filename corresponds to an image,
  * false otherwise.
  */
-extern "C++"
 bool reg_isAnImageFileName(const char *name);
 /* *************************************************************** */
 /** @brief Rescale an input image between two user-defined values.
@@ -65,7 +63,6 @@ bool reg_isAnImageFileName(const char *name);
  * @param lowThr Intensity to use as lower threshold
  * @param upThr Intensity to use as higher threshold
  */
-extern "C++"
 void reg_intensityRescale(nifti_image *image,
                           int timepoint,
                           float newMin,
@@ -75,14 +72,12 @@ void reg_intensityRescale(nifti_image *image,
  * the intensity values
  * @param image Image to be updated
  */
-extern "C++"
 void reg_tools_removeSCLInfo(nifti_image *img);
 /* *************************************************************** */
 /** @brief reg_getRealImageSpacing
  * @param image image
  * @param spacingValues spacingValues
  */
-extern "C++"
 void reg_getRealImageSpacing(nifti_image *image,
                              float *spacingValues);
 /* *************************************************************** */
@@ -93,7 +88,6 @@ void reg_getRealImageSpacing(nifti_image *image,
  * @param axis Boolean array to specify which axis have to be
  * smoothed. The array follow the dim array of the nifti header.
  */
-extern "C++"
 void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
                                  const int& kernelType,
@@ -110,7 +104,6 @@ void reg_tools_kernelConvolution(nifti_image *image,
  * @param timePoint Boolean array to specify which timepoints have to be
  * smoothed.
  */
-extern "C++"
 void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceX,
                                       float varianceY,
@@ -126,7 +119,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
  * @param axis Boolean array to specify which axis have to be
  * downsampled. The array follow the dim array of the nifti header.
  */
-extern "C++" template <class PrecisionType>
+template <class PrecisionType>
 void reg_downsampleImage(nifti_image *image,
                          int type,
                          bool *axis);
@@ -137,7 +130,7 @@ void reg_downsampleImage(nifti_image *image,
  * @return Scalar value that corresponds to the longest
  * euclidean distance
  */
-extern "C++" template <class PrecisionType>
+template <class PrecisionType>
 PrecisionType reg_getMaximalLength(const nifti_image *image,
                                    const bool& optimiseX,
                                    const bool& optimiseY,
@@ -146,7 +139,7 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
 /** @brief Change the datatype of a nifti image
  * @param image Image to be updated.
  */
-extern "C++" template <class NewType>
+template <class NewType>
 void reg_tools_changeDatatype(nifti_image *image,
                               int type = -1);
 /* *************************************************************** */
@@ -156,7 +149,6 @@ void reg_tools_changeDatatype(nifti_image *image,
  * @param out Result image that contains the result of the operation
  * between the first and second image.
  */
-extern "C++"
 void reg_tools_addImageToImage(const nifti_image *img1,
                                const nifti_image *img2,
                                nifti_image *out);
@@ -167,7 +159,6 @@ void reg_tools_addImageToImage(const nifti_image *img1,
  * @param out Result image that contains the result of the operation
  * between the first and second image.
  */
-extern "C++"
 void reg_tools_subtractImageFromImage(const nifti_image *img1,
                                       const nifti_image *img2,
                                       nifti_image *out);
@@ -178,7 +169,6 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1,
  * @param out Result image that contains the result of the operation
  * between the first and second image.
  */
-extern "C++"
 void reg_tools_multiplyImageToImage(const nifti_image *img1,
                                     const nifti_image *img2,
                                     nifti_image *out);
@@ -189,7 +179,6 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1,
  * @param out Result image that contains the result of the operation
  * between the first and second image.
  */
-extern "C++"
 void reg_tools_divideImageToImage(const nifti_image *img1,
                                   const nifti_image *img2,
                                   nifti_image *out);
@@ -199,7 +188,6 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
  * @param out Result image that contains the result of the operation.
  * @param val Value to be added to input image
  */
-extern "C++"
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *out,
                                const double& val);
@@ -209,7 +197,6 @@ void reg_tools_addValueToImage(const nifti_image *img,
  * @param out Result image that contains the result of the operation.
  * @param val Value to be subtracted from input image
  */
-extern "C++"
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *out,
                                       const double& val);
@@ -219,7 +206,6 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
  * @param out Result image that contains the result of the operation.
  * @param val Value to be multiplied to input image
  */
-extern "C++"
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *out,
                                     const double& val);
@@ -229,7 +215,6 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
  * @param out Result image that contains the result of the operation.
  * @param val Value to be divided to input image
  */
-extern "C++"
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *out,
                                   const double& val);
@@ -238,7 +223,6 @@ void reg_tools_divideValueToImage(const nifti_image *img,
  * from 0 are set to 1, 0 otherwise.
  * @param img Image that will be binarise inline
  */
-extern "C++"
 void reg_tools_binarise_image(nifti_image *img);
 /* *************************************************************** */
 /** @brief Binarise an input image. The binarisation is
@@ -249,7 +233,6 @@ void reg_tools_binarise_image(nifti_image *img);
  * All values bellow thr are set to 0. All values equal
  * or bellow thr are set to 1
  */
-extern "C++"
 void reg_tools_binarise_image(nifti_image *img,
                               float thr);
 /* *************************************************************** */
@@ -260,7 +243,6 @@ void reg_tools_binarise_image(nifti_image *img,
  * @param array The data array from the input nifti image
  * is binarised and stored in this array.
  */
-extern "C++"
 void reg_tools_binaryImage2int(const nifti_image *img,
                                int *array);
 /* *************************************************************** */
@@ -270,7 +252,6 @@ void reg_tools_binaryImage2int(const nifti_image *img,
  * @param imgB Input vector image
  * @return Mean root mean squared error values returned
  */
-extern "C++"
 double reg_tools_getMeanRMS(const nifti_image *imgA,
                             const nifti_image *imgB);
 /* *************************************************************** */
@@ -281,7 +262,6 @@ double reg_tools_getMeanRMS(const nifti_image *imgA,
  * have to be set to NaN
  * @param res Output image
  */
-extern "C++"
 int reg_tools_nanMask_image(const nifti_image *img,
                             const nifti_image *mask,
                             nifti_image *res);
@@ -291,7 +271,6 @@ int reg_tools_nanMask_image(const nifti_image *img,
  * @param img Input image
  * @param mask Input mask which is updated in place
  */
-extern "C++"
 int reg_tools_removeNanFromMask(const nifti_image *image, int *mask);
 /* *************************************************************** */
 /** @brief Get the minimal value of an image
@@ -299,7 +278,6 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask);
  * @param timepoint active time point. All time points are used if set to -1
  * @return min value
  */
-extern "C++"
 float reg_tools_getMinValue(const nifti_image *img, int timepoint);
 /* *************************************************************** */
 /** @brief Get the maximal value of an image
@@ -307,21 +285,18 @@ float reg_tools_getMinValue(const nifti_image *img, int timepoint);
  * @param timepoint active time point. All time points are used if set to -1
  * @return max value
  */
-extern "C++"
 float reg_tools_getMaxValue(const nifti_image *img, int timepoint);
 /* *************************************************************** */
 /** @brief Get the mean value of an image
  * @param img Input image
  * @return mean value
  */
-extern "C++"
 float reg_tools_getMeanValue(const nifti_image *img);
 /* *************************************************************** */
 /** @brief Get the std value of an image
  * @param img Input image
  * @return std value
  */
-extern "C++"
 float reg_tools_getSTDValue(const nifti_image *img);
 /* *************************************************************** */
 /** @brief Generate a pyramid from an input image.
@@ -333,7 +308,7 @@ float reg_tools_getSTDValue(const nifti_image *img);
  * @param levelToPerform Number to level that will be perform during
  * the registration.
  */
-extern "C++" template<class DataType>
+template<class DataType>
 void reg_createImagePyramid(const NiftiImage& input,
                             vector<NiftiImage>& pyramid,
                             unsigned levelNumber,
@@ -348,7 +323,7 @@ void reg_createImagePyramid(const NiftiImage& input,
  * @param levelToPerform Number to level that will be perform during
  * the registration.
  */
-extern "C++" template<class DataType>
+template<class DataType>
 void reg_createMaskPyramid(const NiftiImage& input,
                            vector<unique_ptr<int[]>>& pyramid,
                            unsigned levelNumber,
@@ -364,7 +339,7 @@ void reg_createMaskPyramid(const NiftiImage& input,
  * @param upThr Upper threshold value. All Value above the threshold
  * are set to the threshold value.
  */
-extern "C++" template<class T>
+template<class T>
 void reg_thresholdImage(nifti_image *image,
                         T lowThr,
                         T upThr);
@@ -376,7 +351,6 @@ void reg_thresholdImage(nifti_image *image,
  * @param cmd String that contains the letter(s) of the axis
  * to flip (xyztuvw)
  */
-extern "C++"
 void reg_flipAxis(const nifti_image *image,
                   void **outputArray,
                   const std::string& cmd);
@@ -387,7 +361,6 @@ void reg_flipAxis(const nifti_image *image,
  * @param image Image that contains a deformation field and will be
  * converted into a displacement field
  */
-extern "C++"
 int reg_getDisplacementFromDeformation(nifti_image *image);
 /* *************************************************************** */
 /** @brief This function converts an image containing a displacement field
@@ -396,7 +369,6 @@ int reg_getDisplacementFromDeformation(nifti_image *image);
  * @param image Image that contains a deformation field and will be
  * converted into a displacement field
  */
-extern "C++"
 int reg_getDeformationFromDisplacement(nifti_image *image);
 /* *************************************************************** */
 /** @brief Set the gradient value along specified direction to zero
@@ -405,7 +377,6 @@ int reg_getDeformationFromDisplacement(nifti_image *image);
  * @param yAxis Boolean to specified if the y-axis has to be zeroed
  * @param zAxis Boolean to specified if the z-axis has to be zeroed
  */
-extern "C++"
 void reg_setGradientToZero(nifti_image *image,
                            bool xAxis,
                            bool yAxis,
@@ -416,7 +387,7 @@ void reg_setGradientToZero(nifti_image *image,
  * The returned value is the largest value computed as ((A/B)-1)
  * If A or B are zeros then the (A-B) value is returned.
  */
-extern "C++" template<class DataType>
+template<class DataType>
 double reg_test_compare_arrays(const DataType *ptrA,
                                const DataType *ptrB,
                                size_t nvox);
@@ -425,31 +396,25 @@ double reg_test_compare_arrays(const DataType *ptrA,
  * The returned value is the largest value computed as ((A/B)-1)
  * If A or B are zeros then the (A-B) value is returned.
  */
-extern "C++"
 double reg_test_compare_images(const nifti_image *imgA,
                                const nifti_image *imgB);
 /* *************************************************************** */
 /** @brief The absolute operator is applied to the input image
  */
-extern "C++"
 void reg_tools_abs_image(nifti_image *img);
 /* *************************************************************** */
-extern "C++"
 void mat44ToCptr(const mat44& mat, float *cMat);
 /* *************************************************************** */
-extern "C++"
 void cPtrToMat44(mat44 *mat, const float *cMat);
 /* *************************************************************** */
-extern "C++"
 void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats);
 /* *************************************************************** */
-extern "C++"
 void cPtrToMat33(mat33 *mat, const float *cMat);
 /* *************************************************************** */
-extern "C++" template<typename T>
+template<typename T>
 void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n);
 /* *************************************************************** */
-extern "C++" template<typename T>
+template<typename T>
 void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n);
 /* *************************************************************** */
 void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 9d720006..0fe28ea4 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -15,7 +15,6 @@
 #include "CudaCommon.hpp"
 
 /* *************************************************************** */
-extern "C++"
 void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            float *warpedImageCuda,
                            const cudaArray *floatingImageCuda,
@@ -24,7 +23,6 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            const size_t& activeVoxelNumber,
                            const float& paddingValue);
 /* *************************************************************** */
-extern "C++"
 void reg_getImageGradient_gpu(const nifti_image *floatingImage,
                               const cudaArray *floatingImageCuda,
                               const float4 *deformationFieldCuda,
diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h
index f341ff81..f1eb0943 100644
--- a/reg-lib/cuda/blockMatchingKernel.h
+++ b/reg-lib/cuda/blockMatchingKernel.h
@@ -27,7 +27,6 @@
  * @param maskCuda The mask image on the device.
  * @param refMatCuda The reference image transformation matrix on the device.
  */
-extern "C++"
 void block_matching_method_gpu(const nifti_image *referenceImage,
                                _reg_blockMatchingParam *params,
                                const float *referenceImageCuda,
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
index cfb7cb2c..c2d95bbc 100644
--- a/reg-lib/cuda/optimizeKernel.h
+++ b/reg-lib/cuda/optimizeKernel.h
@@ -3,28 +3,21 @@
 #include "RNifti.h"
 
 /*
-extern "C++"
 void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams,
                     mat44 *updateAffineMatrix,
                     float **targetPosition_d,
                     float **resultPosition_d,
                     bool affine = true);
 
-extern "C++"
 void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n);
 */
-extern "C++"
 void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d);
 
-extern "C++"
 void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine);
 /*
-extern "C++"
 void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n);
 
-extern "C++"
 void downloadMat44(mat44 *lastTransformation, float* transform_d);
 
-extern "C++"
 void uploadMat44(mat44 lastTransformation, float* transform_d);
 */

From 4aa2734426fb8b20e309375b273d00af20690069 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 12 Sep 2023 18:15:30 +0100
Subject: [PATCH 205/314] Fix precision of GetBasisSplineValue() and
 GetBasisSplineDerivativeValue()

---
 niftyreg_build_version.txt |  2 +-
 reg-lib/cpu/_reg_nmi.cpp   | 47 ++++++++++++++++++++------------------
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3860ed91..d3824c29 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-323
+324
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index cd309712..c2b5f998 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -164,29 +164,32 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-static double GetBasisSplineValue(double x) {
+template<class PrecisionType>
+PrecisionType GetBasisSplineValue(PrecisionType x) {
     x = fabs(x);
-    double value = 0;
-    if (x < 2.0) {
-        if (x < 1.0)
-            value = 2.0 / 3.0 + (0.5 * x - 1.0) * x * x;
+    PrecisionType value = 0;
+    if (x < 2.f) {
+        if (x < 1.f)
+            value = 2.f / 3.f + (0.5f * x - 1.f) * x * x;
         else {
-            x -= 2.0;
-            value = -x * x * x / 6.0;
+            x -= 2.f;
+            value = -x * x * x / 6.f;
         }
     }
     return value;
 }
 /* *************************************************************** */
-static double GetBasisSplineDerivativeValue(double ori) {
-    double x = fabs(ori), value = 0;
-    if (x < 2.0) {
-        if (x < 1.0)
-            value = (1.5 * x - 2.0) * ori;
+template<class PrecisionType>
+PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
+    PrecisionType x = fabs(ori);
+    PrecisionType value = 0;
+    if (x < 2.f) {
+        if (x < 1.f)
+            value = (1.5f * x - 2.f) * ori;
         else {
-            x -= 2.0;
-            value = -0.5 * x * x;
-            if (ori < 0.0) value = -value;
+            x -= 2.f;
+            value = -0.5f * x * x;
+            if (ori < 0) value = -value;
         }
     }
     return value;
@@ -485,11 +488,11 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                     if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
                         for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
                             if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                const double commun = GetBasisSplineValue(refValue - r) *
-                                    GetBasisSplineDerivativeValue(warValue - w);
-                                const double &jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
-                                const double &refLog = logHistoPtr[r + referenceOffset];
-                                const double &warLog = logHistoPtr[w + floatingOffset];
+                                const double commun = GetBasisSplineValue<double>(refValue - r) *
+                                    GetBasisSplineDerivativeValue<double>(warValue - w);
+                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double& refLog = logHistoPtr[r + referenceOffset];
+                                const double& warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     jointDeriv[0] += commun * gradX * jointLog;
                                     refDeriv[0] += commun * gradX * refLog;
@@ -572,8 +575,8 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                     if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
                         for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
                             if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                const double commun = GetBasisSplineValue(refValue - r) *
-                                    GetBasisSplineDerivativeValue(warValue - w);
+                                const double commun = GetBasisSplineValue<double>(refValue - r) *
+                                    GetBasisSplineDerivativeValue<double>(warValue - w);
                                 const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
                                 const double& refLog = logHistoPtr[r + referenceOffset];
                                 const double& warLog = logHistoPtr[w + floatingOffset];

From 1b8600c60712a447e6ef60942a72f42b436735aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 14 Sep 2023 12:30:14 +0100
Subject: [PATCH 206/314] Fix a bug causing inconsistent results in successive
 runs of 2D f3d registration #92

---
 niftyreg_build_version.txt            |  2 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp | 94 ++++++++++-----------------
 2 files changed, 34 insertions(+), 62 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d3824c29..6ac793b4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-324
+325
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 8edc51be..c384718c 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -1126,89 +1126,61 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
                                              nifti_image *gradientImage,
                                              float weight) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
-    int x, y, a, b, i, index;
 
-    // Create pointers to the spline coefficients
+    // Create the pointers
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DataType basisX[9];
-    DataType basisY[9];
+    DataType basisX[9], basisY[9];
     set_first_order_basis_values(basisX, basisY);
 
     // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-    DataType splineCoeffX;
-    DataType splineCoeffY;
+    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
-    mat33 matrix, R;
+    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
 
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    for (int y = 1; y < splineControlPoint->ny - 1; y++) {
+        for (int x = 1; x < splineControlPoint->nx - 1; x++) {
+            mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 };
 
-    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
-    DataType gradValues[2];
+            int i = 0;
+            for (int b = -1; b < 2; b++) {
+                for (int a = -1; a < 2; a++) {
+                    const int index = (y + b) * splineControlPoint->nx + x + a;
+                    const DataType& splineCoeffX = splinePtrX[index];
+                    const DataType& splineCoeffY = splinePtrY[index];
 
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-    shared(splineControlPoint, splinePtrX, splinePtrY, \
-    basisX, basisY, reorientation, inv_reorientation, \
-    gradientXPtr, gradientYPtr, approxRatio) \
-    private(x, a, b, i, index, gradValues, \
-    splineCoeffX, splineCoeffY, matrix, R)
-#endif
-    for (y = 1; y < splineControlPoint->ny - 1; y++) {
-        for (x = 1; x < splineControlPoint->nx - 1; x++) {
-            memset(&matrix, 0, sizeof(mat33));
-            matrix.m[2][2] = 1;
+                    matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                    matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
 
-            i = 0;
-            for (b = -1; b < 2; b++) {
-                for (a = -1; a < 2; a++) {
-                    index = (y + b) * splineControlPoint->nx + x + a;
-                    splineCoeffX = splinePtrX[index];
-                    splineCoeffY = splinePtrY[index];
-
-                    matrix.m[0][0] += basisX[i] * splineCoeffX;
-                    matrix.m[1][0] += basisY[i] * splineCoeffX;
-
-                    matrix.m[0][1] += basisX[i] * splineCoeffY;
-                    matrix.m[1][1] += basisY[i] * splineCoeffY;
+                    matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                    matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
                     ++i;
                 } // a
             } // b
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
-            --matrix.m[0][0];
-            --matrix.m[1][1];
+            matrix.m[0][0]--;
+            matrix.m[1][1]--;
             i = 8;
-            for (b = -1; b < 2; b++) {
-                for (a = -1; a < 2; a++) {
-                    index = (y + b) * splineControlPoint->nx + x + a;
-                    gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i];
-                    gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i];
-
-#ifdef _OPENMP
-#pragma omp atomic
-#endif
-                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                          inv_reorientation.m[0][1] * gradValues[1]);
-#ifdef _OPENMP
-#pragma omp atomic
-#endif
-                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                          inv_reorientation.m[1][1] * gradValues[1]);
+            for (int b = -1; b < 2; b++) {
+                for (int a = -1; a < 2; a++) {
+                    const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] };
+                    const int index = (y + b) * splineControlPoint->nx + x + a;
+
+                    gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                          invReorientation.m[0][1] * gradValues[1]);
+                    gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                          invReorientation.m[1][1] * gradValues[1]);
                     --i;
                 } // a
             } // b

From 242049a8cfa649abefecdfd5b02990384c40e852 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 14 Sep 2023 12:33:42 +0100
Subject: [PATCH 207/314] Refactorisations

---
 niftyreg_build_version.txt               |   2 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp    | 551 +++++++++++------------
 reg-lib/cpu/_reg_localTrans_regul.h      |   2 +-
 reg-lib/cpu/_reg_nmi.cpp                 |   4 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu |   6 +-
 5 files changed, 274 insertions(+), 291 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6ac793b4..d1e85f89 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-325
+326
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index c384718c..33dcfcee 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -237,15 +237,15 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint,
             *derivativeValuesPtr++ = XX_y;
             *derivativeValuesPtr++ = YY_x;
             *derivativeValuesPtr++ = YY_y;
-            *derivativeValuesPtr++ = (DataType)(2.0 * XY_x);
-            *derivativeValuesPtr++ = (DataType)(2.0 * XY_y);
+            *derivativeValuesPtr++ = 2.f * XY_x;
+            *derivativeValuesPtr++ = 2.f * XY_y;
         }
     }
 
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
     DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
     DataType gradientValue[2];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -392,7 +392,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint,
     DataType *gradientYPtr = &gradientXPtr[nodeNumber];
     DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
     DataType gradientValue[3];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -501,7 +501,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
     DataType splineCoeffX;
     DataType splineCoeffY;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -513,7 +513,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
 #pragma omp parallel for default(none) \
     shared(splinePtrX, splinePtrY, splineControlPoint, \
     basisX, basisY, reorientation) \
-    private(x, a, b, i, index, matrix, R, \
+    private(x, a, b, i, index, matrix, r, \
     splineCoeffX, splineCoeffY, currentValue) \
     reduction(+:constraintValue)
 #endif
@@ -528,18 +528,18 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
                     index = (y + b) * splineControlPoint->nx + x + a;
                     splineCoeffX = splinePtrX[index];
                     splineCoeffY = splinePtrY[index];
-                    matrix.m[0][0] += basisX[i] * splineCoeffX;
-                    matrix.m[1][0] += basisY[i] * splineCoeffX;
-                    matrix.m[0][1] += basisX[i] * splineCoeffY;
-                    matrix.m[1][1] += basisY[i] * splineCoeffY;
+                    matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                    matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
+                    matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                    matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
                     ++i;
                 }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
@@ -578,7 +578,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
     DataType splineCoeffY;
     DataType splineCoeffZ;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -590,7 +590,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
 #pragma omp parallel for default(none) \
     shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \
     basisX, basisY, basisZ, reorientation) \
-    private(x, y, a, b, c, i, index, matrix, R, \
+    private(x, y, a, b, c, i, index, matrix, r, \
     splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \
     reduction(+:constraintValue)
 #endif
@@ -608,17 +608,17 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
                             splineCoeffY = splinePtrY[index];
                             splineCoeffZ = splinePtrZ[index];
 
-                            matrix.m[0][0] += basisX[i] * splineCoeffX;
-                            matrix.m[1][0] += basisY[i] * splineCoeffX;
-                            matrix.m[2][0] += basisZ[i] * splineCoeffX;
+                            matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                            matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
+                            matrix.m[2][0] += static_cast<float>(basisZ[i] * splineCoeffX);
 
-                            matrix.m[0][1] += basisX[i] * splineCoeffY;
-                            matrix.m[1][1] += basisY[i] * splineCoeffY;
-                            matrix.m[2][1] += basisZ[i] * splineCoeffY;
+                            matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                            matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
+                            matrix.m[2][1] += static_cast<float>(basisZ[i] * splineCoeffY);
 
-                            matrix.m[0][2] += basisX[i] * splineCoeffZ;
-                            matrix.m[1][2] += basisY[i] * splineCoeffZ;
-                            matrix.m[2][2] += basisZ[i] * splineCoeffZ;
+                            matrix.m[0][2] += static_cast<float>(basisX[i] * splineCoeffZ);
+                            matrix.m[1][2] += static_cast<float>(basisY[i] * splineCoeffZ);
+                            matrix.m[2][2] += static_cast<float>(basisZ[i] * splineCoeffZ);
                             ++i;
                         }
                     }
@@ -626,8 +626,8 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
                 --matrix.m[0][0];
                 --matrix.m[1][1];
@@ -696,7 +696,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
     DataType basisX[4], basisY[4];
     DataType firstX[4], firstY[4];
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -725,18 +725,18 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
                     splineCoeffX = splinePtrX[index];
                     splineCoeffY = splinePtrY[index];
 
-                    matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX;
-                    matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX;
+                    matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffX);
+                    matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffX);
 
-                    matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY;
-                    matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY;
+                    matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffY);
+                    matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffY);
                 }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
@@ -781,7 +781,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
     DataType basisX[4], basisY[4], basisZ[4];
     DataType firstX[4], firstY[4], firstZ[4];
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -817,25 +817,25 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
                             splineCoeffY = splinePtrY[index];
                             splineCoeffZ = splinePtrZ[index];
 
-                            matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX;
-                            matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX;
-                            matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX;
+                            matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX);
+                            matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX);
+                            matrix.m[2][0] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX);
 
-                            matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY;
-                            matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY;
-                            matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY;
+                            matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY);
+                            matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY);
+                            matrix.m[2][1] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY);
 
-                            matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ;
-                            matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ;
-                            matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ;
+                            matrix.m[0][2] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ);
+                            matrix.m[1][2] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ);
+                            matrix.m[2][2] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ);
                         }
                     }
                 }
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
                 --matrix.m[0][0];
                 --matrix.m[1][1];
@@ -904,12 +904,12 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
     DataType basisX[4], basisY[4];
     DataType firstX[4], firstY[4];
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
     DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
     DataType gradValues[2];
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -917,7 +917,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
     if (splineControlPoint->sform_code > 0)
         reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
     else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+    mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     // Loop over all voxels
     for (y = 0; y < referenceImage->ny; ++y) {
@@ -940,30 +940,30 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
                     splineCoeffX = splinePtrX[index];
                     splineCoeffY = splinePtrY[index];
 
-                    matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX;
-                    matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX;
+                    matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffX);
+                    matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffX);
 
-                    matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY;
-                    matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY;
+                    matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffY);
+                    matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffY);
                 }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
             for (b = 0; b < 4; b++) {
                 for (a = 0; a < 4; a++) {
                     index = (yPre + b) * splineControlPoint->nx + xPre + a;
-                    gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b];
-                    gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b];
-                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                          inv_reorientation.m[0][1] * gradValues[1]);
-                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                          inv_reorientation.m[1][1] * gradValues[1]);
+                    gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b];
+                    gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b];
+                    gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                          invReorientation.m[0][1] * gradValues[1]);
+                    gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                          invReorientation.m[1][1] * gradValues[1]);
                 } // a
             } // b
         }
@@ -997,13 +997,13 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
     DataType basisX[4], basisY[4], basisZ[4];
     DataType firstX[4], firstY[4], firstZ[4];
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
     DataType *gradientYPtr = &gradientXPtr[nodeNumber];
     DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
     DataType gradValues[3];
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -1011,7 +1011,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
     if (splineControlPoint->sform_code > 0)
         reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
     else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+    mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     // Loop over all voxels
     for (z = 0; z < referenceImage->nz; ++z) {
@@ -1042,25 +1042,25 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
                             splineCoeffY = splinePtrY[index];
                             splineCoeffZ = splinePtrZ[index];
 
-                            matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX;
-                            matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX;
-                            matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX;
+                            matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX);
+                            matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX);
+                            matrix.m[2][0] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX);
 
-                            matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY;
-                            matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY;
-                            matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY;
+                            matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY);
+                            matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY);
+                            matrix.m[2][1] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY);
 
-                            matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ;
-                            matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ;
-                            matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ;
+                            matrix.m[0][2] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ);
+                            matrix.m[1][2] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ);
+                            matrix.m[2][2] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ);
                         }
                     }
                 }
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
                 --matrix.m[0][0];
                 --matrix.m[1][1];
@@ -1069,18 +1069,18 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
                     for (b = 0; b < 4; b++) {
                         for (a = 0; a < 4; a++) {
                             index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
-                            gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c];
-                            gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c];
-                            gradValues[2] = -2.0 * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c];
-                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                                  inv_reorientation.m[0][1] * gradValues[1] +
-                                                                  inv_reorientation.m[0][2] * gradValues[2]);
-                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                                  inv_reorientation.m[1][1] * gradValues[1] +
-                                                                  inv_reorientation.m[1][2] * gradValues[2]);
-                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
-                                                                  inv_reorientation.m[2][1] * gradValues[1] +
-                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                            gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c];
+                            gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c];
+                            gradValues[2] = -2.f * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c];
+                            gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                                  invReorientation.m[0][1] * gradValues[1] +
+                                                                  invReorientation.m[0][2] * gradValues[2]);
+                            gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                                  invReorientation.m[1][1] * gradValues[1] +
+                                                                  invReorientation.m[1][2] * gradValues[2]);
+                            gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                                                  invReorientation.m[2][1] * gradValues[1] +
+                                                                  invReorientation.m[2][2] * gradValues[2]);
                         } // a
                     } // b
                 } // c
@@ -1193,65 +1193,50 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
                                              nifti_image *gradientImage,
                                              float weight) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    int x, y, z, a, b, c, i, index;
 
-    // Create pointers to the spline coefficients
+    // Create the pointers
     const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
     const DataType *splinePtrY = &splinePtrX[nodeNumber];
     const DataType *splinePtrZ = &splinePtrY[nodeNumber];
+    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
     // Store the basis values since they are constant as the value is approximated
     // at the control point positions only
-    DataType basisX[27];
-    DataType basisY[27];
-    DataType basisZ[27];
+    DataType basisX[27], basisY[27], basisZ[27];
     set_first_order_basis_values(basisX, basisY, basisZ);
 
     // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
-
-    DataType splineCoeffX;
-    DataType splineCoeffY;
-    DataType splineCoeffZ;
-
-    mat33 matrix, R;
-
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
-    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
-
-    DataType approxRatio = (DataType)weight / (DataType)(nodeNumber);
-    DataType gradValues[3];
-
-    for (z = 1; z < splineControlPoint->nz - 1; z++) {
-        for (y = 1; y < splineControlPoint->ny - 1; y++) {
-            for (x = 1; x < splineControlPoint->nx - 1; x++) {
-                memset(&matrix, 0, sizeof(mat33));
-
-                i = 0;
-                for (c = -1; c < 2; c++) {
-                    for (b = -1; b < 2; b++) {
-                        for (a = -1; a < 2; a++) {
-                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            splineCoeffX = splinePtrX[index];
-                            splineCoeffY = splinePtrY[index];
-                            splineCoeffZ = splinePtrZ[index];
-
-                            matrix.m[0][0] += basisX[i] * splineCoeffX;
-                            matrix.m[1][0] += basisY[i] * splineCoeffX;
-                            matrix.m[2][0] += basisZ[i] * splineCoeffX;
+    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
-                            matrix.m[0][1] += basisX[i] * splineCoeffY;
-                            matrix.m[1][1] += basisY[i] * splineCoeffY;
-                            matrix.m[2][1] += basisZ[i] * splineCoeffY;
+    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
 
-                            matrix.m[0][2] += basisX[i] * splineCoeffZ;
-                            matrix.m[1][2] += basisY[i] * splineCoeffZ;
-                            matrix.m[2][2] += basisZ[i] * splineCoeffZ;
+    for (int z = 1; z < splineControlPoint->nz - 1; z++) {
+        for (int y = 1; y < splineControlPoint->ny - 1; y++) {
+            for (int x = 1; x < splineControlPoint->nx - 1; x++) {
+                mat33 matrix{};
+                int i = 0;
+                for (int c = -1; c < 2; c++) {
+                    for (int b = -1; b < 2; b++) {
+                        for (int a = -1; a < 2; a++) {
+                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            const DataType& splineCoeffX = splinePtrX[index];
+                            const DataType& splineCoeffY = splinePtrY[index];
+                            const DataType& splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                            matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
+                            matrix.m[2][0] += static_cast<float>(basisZ[i] * splineCoeffX);
+
+                            matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                            matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
+                            matrix.m[2][1] += static_cast<float>(basisZ[i] * splineCoeffY);
+
+                            matrix.m[0][2] += static_cast<float>(basisX[i] * splineCoeffZ);
+                            matrix.m[1][2] += static_cast<float>(basisY[i] * splineCoeffZ);
+                            matrix.m[2][2] += static_cast<float>(basisZ[i] * splineCoeffZ);
                             ++i;
                         }
                     }
@@ -1259,32 +1244,30 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
-                --matrix.m[0][0];
-                --matrix.m[1][1];
-                --matrix.m[2][2];
+                matrix.m[0][0]--;
+                matrix.m[1][1]--;
+                matrix.m[2][2]--;
                 i = 26;
-                for (c = -1; c < 2; c++) {
-                    for (b = -1; b < 2; b++) {
-                        for (a = -1; a < 2; a++) {
-                            index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i];
-                            gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i];
-                            gradValues[2] = -2.0 * matrix.m[2][2] * basisZ[i];
-
-                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                                  inv_reorientation.m[0][1] * gradValues[1] +
-                                                                  inv_reorientation.m[0][2] * gradValues[2]);
-
-                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                                  inv_reorientation.m[1][1] * gradValues[1] +
-                                                                  inv_reorientation.m[1][2] * gradValues[2]);
-
-                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
-                                                                  inv_reorientation.m[2][1] * gradValues[1] +
-                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                for (int c = -1; c < 2; c++) {
+                    for (int b = -1; b < 2; b++) {
+                        for (int a = -1; a < 2; a++) {
+                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i],
+                                                          -2.f * matrix.m[1][1] * basisY[i],
+                                                          -2.f * matrix.m[2][2] * basisZ[i] };
+
+                            gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                                  invReorientation.m[0][1] * gradValues[1] +
+                                                                  invReorientation.m[0][2] * gradValues[2]);
+                            gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                                  invReorientation.m[1][1] * gradValues[1] +
+                                                                  invReorientation.m[1][2] * gradValues[2]);
+                            gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                                                  invReorientation.m[2][1] * gradValues[1] +
+                                                                  invReorientation.m[2][2] * gradValues[2]);
                             --i;
                         } // a
                     } // b
@@ -1340,7 +1323,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
     const DataType *defPtrY = &defPtrX[voxelNumber];
     DataType defX, defY;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -1361,17 +1344,17 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
                     defX = defPtrX[index];
                     defY = defPtrY[index];
 
-                    matrix.m[0][0] += first[a] * basis[b] * defX;
-                    matrix.m[1][0] += basis[a] * first[b] * defX;
-                    matrix.m[0][1] += first[a] * basis[b] * defY;
-                    matrix.m[1][1] += basis[a] * first[b] * defY;
+                    matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * defX);
+                    matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * defX);
+                    matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * defY);
+                    matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * defY);
                 }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
@@ -1404,7 +1387,7 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
     const DataType *defPtrZ = &defPtrY[voxelNumber];
     DataType defX, defY, defZ;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
@@ -1429,25 +1412,25 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
                             defY = defPtrY[index];
                             defZ = defPtrZ[index];
 
-                            matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX;
-                            matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX;
-                            matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX;
+                            matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * basis[c] * defX);
+                            matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * basis[c] * defX);
+                            matrix.m[2][0] += static_cast<float>(basis[a] * basis[b] * first[c] * defX);
 
-                            matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY;
-                            matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY;
-                            matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY;
+                            matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * basis[c] * defY);
+                            matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * basis[c] * defY);
+                            matrix.m[2][1] += static_cast<float>(basis[a] * basis[b] * first[c] * defY);
 
-                            matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ;
-                            matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ;
-                            matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ;
+                            matrix.m[0][2] += static_cast<float>(first[a] * basis[b] * basis[c] * defZ);
+                            matrix.m[1][2] += static_cast<float>(basis[a] * first[b] * basis[c] * defZ);
+                            matrix.m[2][2] += static_cast<float>(basis[a] * basis[b] * first[c] * defZ);
                         }
                     }
                 }
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
                 --matrix.m[0][0];
                 --matrix.m[1][1];
@@ -1504,12 +1487,12 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
     const DataType *defPtrY = &defPtrX[voxelNumber];
     DataType defX, defY;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
     DataType *gradientYPtr = &gradientXPtr[voxelNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
     DataType gradValues[2];
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -1517,7 +1500,7 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
     if (deformationField->sform_code > 0)
         reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
     else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+    mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     for (y = 0; y < deformationField->ny; ++y) {
         Y = (y != deformationField->ny - 1) ? y : y - 1;
@@ -1532,17 +1515,17 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
                     defX = defPtrX[index];
                     defY = defPtrY[index];
 
-                    matrix.m[0][0] += first[a] * basis[b] * defX;
-                    matrix.m[1][0] += basis[a] * first[b] * defX;
-                    matrix.m[0][1] += first[a] * basis[b] * defY;
-                    matrix.m[1][1] += basis[a] * first[b] * defY;
+                    matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * defX);
+                    matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * defX);
+                    matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * defY);
+                    matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * defY);
                 }
             }
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(R, matrix);
+            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
             --matrix.m[0][0];
             --matrix.m[1][1];
@@ -1550,12 +1533,12 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
             for (b = 0; b < 2; b++) {
                 for (a = 0; a < 2; a++) {
                     index = (Y + b) * deformationField->nx + X + a;
-                    gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b];
-                    gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b];
-                    gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                          inv_reorientation.m[0][1] * gradValues[1]);
-                    gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                          inv_reorientation.m[1][1] * gradValues[1]);
+                    gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b];
+                    gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b];
+                    gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                          invReorientation.m[0][1] * gradValues[1]);
+                    gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                          invReorientation.m[1][1] * gradValues[1]);
                 } // a
             } // b
         }
@@ -1577,13 +1560,13 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
     const DataType *defPtrZ = &defPtrY[voxelNumber];
     DataType defX, defY, defZ;
 
-    mat33 matrix, R;
+    mat33 matrix, r;
 
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
     DataType *gradientYPtr = &gradientXPtr[voxelNumber];
     DataType *gradientZPtr = &gradientYPtr[voxelNumber];
 
-    DataType approxRatio = (DataType)weight / (DataType)voxelNumber;
+    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
     DataType gradValues[3];
 
     // Matrix to use to convert the gradient from mm to voxel
@@ -1591,7 +1574,7 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
     if (deformationField->sform_code > 0)
         reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
     else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-    mat33 inv_reorientation = nifti_mat33_inverse(reorientation);
+    mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     for (z = 0; z < deformationField->nz; ++z) {
         Z = (z != deformationField->nz - 1) ? z : z - 1;
@@ -1610,25 +1593,25 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                             defY = defPtrY[index];
                             defZ = defPtrZ[index];
 
-                            matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX;
-                            matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX;
-                            matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX;
+                            matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * basis[c] * defX);
+                            matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * basis[c] * defX);
+                            matrix.m[2][0] += static_cast<float>(basis[a] * basis[b] * first[c] * defX);
 
-                            matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY;
-                            matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY;
-                            matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY;
+                            matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * basis[c] * defY);
+                            matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * basis[c] * defY);
+                            matrix.m[2][1] += static_cast<float>(basis[a] * basis[b] * first[c] * defY);
 
-                            matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ;
-                            matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ;
-                            matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ;
+                            matrix.m[0][2] += static_cast<float>(first[a] * basis[b] * basis[c] * defZ);
+                            matrix.m[1][2] += static_cast<float>(basis[a] * first[b] * basis[c] * defZ);
+                            matrix.m[2][2] += static_cast<float>(basis[a] * basis[b] * first[c] * defZ);
                         }
                     }
                 }
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                R = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(R, matrix);
+                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
                 --matrix.m[0][0];
                 --matrix.m[1][1];
@@ -1637,18 +1620,18 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                     for (b = 0; b < 2; b++) {
                         for (a = 0; a < 2; a++) {
                             index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
-                            gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c];
-                            gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c];
-                            gradValues[2] = -2.0 * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c];
-                            gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] +
-                                                                  inv_reorientation.m[0][1] * gradValues[1] +
-                                                                  inv_reorientation.m[0][2] * gradValues[2]);
-                            gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] +
-                                                                  inv_reorientation.m[1][1] * gradValues[1] +
-                                                                  inv_reorientation.m[1][2] * gradValues[2]);
-                            gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] +
-                                                                  inv_reorientation.m[2][1] * gradValues[1] +
-                                                                  inv_reorientation.m[2][2] * gradValues[2]);
+                            gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c];
+                            gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c];
+                            gradValues[2] = -2.f * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c];
+                            gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                                  invReorientation.m[0][1] * gradValues[1] +
+                                                                  invReorientation.m[0][2] * gradValues[2]);
+                            gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                                  invReorientation.m[1][1] * gradValues[1] +
+                                                                  invReorientation.m[1][2] * gradValues[2]);
+                            gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                                                  invReorientation.m[2][1] * gradValues[1] +
+                                                                  invReorientation.m[2][2] * gradValues[2]);
                         } // a
                     } // b
                 } // c
@@ -1694,9 +1677,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     double constraintValue = 0;
     size_t l, index;
-    float ref_position[4];
-    float def_position[4];
-    float flo_position[4];
+    float refPosition[4];
+    float defPosition[4];
+    float floPosition[4];
     int previous[3], a, b, c;
     DataType basisX[4], basisY[4], basisZ[4], basis;
     const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
@@ -1711,33 +1694,33 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
     // Loop over all landmarks
     for (l = 0; l < landmarkNumber; ++l) {
         // fetch the initial positions
-        ref_position[0] = landmarkReference[l * imageDim];
-        flo_position[0] = landmarkFloating[l * imageDim];
-        ref_position[1] = landmarkReference[l * imageDim + 1];
-        flo_position[1] = landmarkFloating[l * imageDim + 1];
+        refPosition[0] = landmarkReference[l * imageDim];
+        floPosition[0] = landmarkFloating[l * imageDim];
+        refPosition[1] = landmarkReference[l * imageDim + 1];
+        floPosition[1] = landmarkFloating[l * imageDim + 1];
         if (imageDim > 2) {
-            ref_position[2] = landmarkReference[l * imageDim + 2];
-            flo_position[2] = landmarkFloating[l * imageDim + 2];
-        } else ref_position[2] = flo_position[2] = 0;
-        ref_position[3] = flo_position[3] = 1;
+            refPosition[2] = landmarkReference[l * imageDim + 2];
+            floPosition[2] = landmarkFloating[l * imageDim + 2];
+        } else refPosition[2] = floPosition[2] = 0;
+        refPosition[3] = floPosition[3] = 1;
         // Convert the reference position to voxel in the control point grid space
-        reg_mat44_mul(gridRealToVox, ref_position, def_position);
+        reg_mat44_mul(gridRealToVox, refPosition, defPosition);
 
         // Extract the corresponding nodes
-        previous[0] = Floor(def_position[0]) - 1;
-        previous[1] = Floor(def_position[1]) - 1;
-        previous[2] = Floor(def_position[2]) - 1;
+        previous[0] = Floor(defPosition[0]) - 1;
+        previous[1] = Floor(defPosition[1]) - 1;
+        previous[2] = Floor(defPosition[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
             ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
             // Extract the corresponding basis values
-            get_BSplineBasisValues<DataType>(def_position[0] - 1 - (DataType)previous[0], basisX);
-            get_BSplineBasisValues<DataType>(def_position[1] - 1 - (DataType)previous[1], basisY);
-            get_BSplineBasisValues<DataType>(def_position[2] - 1 - (DataType)previous[2], basisZ);
-            def_position[0] = 0;
-            def_position[1] = 0;
-            def_position[2] = 0;
+            get_BSplineBasisValues<DataType>(defPosition[0] - 1 - (DataType)previous[0], basisX);
+            get_BSplineBasisValues<DataType>(defPosition[1] - 1 - (DataType)previous[1], basisY);
+            get_BSplineBasisValues<DataType>(defPosition[2] - 1 - (DataType)previous[2], basisZ);
+            defPosition[0] = 0;
+            defPosition[1] = 0;
+            defPosition[2] = 0;
             if (imageDim > 2) {
                 for (c = 0; c < 4; ++c) {
                     for (b = 0; b < 4; ++b) {
@@ -1745,9 +1728,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                             index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
                                 controlPointImage->nx + previous[0] + a;
                             basis = basisX[a] * basisY[b] * basisZ[c];
-                            def_position[0] += gridPtrX[index] * basis;
-                            def_position[1] += gridPtrY[index] * basis;
-                            def_position[2] += gridPtrZ[index] * basis;
+                            defPosition[0] += static_cast<float>(gridPtrX[index] * basis);
+                            defPosition[1] += static_cast<float>(gridPtrY[index] * basis);
+                            defPosition[2] += static_cast<float>(gridPtrZ[index] * basis);
                         }
                     }
                 }
@@ -1756,18 +1739,18 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
                     for (a = 0; a < 4; ++a) {
                         index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
                         basis = basisX[a] * basisY[b];
-                        def_position[0] += gridPtrX[index] * basis;
-                        def_position[1] += gridPtrY[index] * basis;
+                        defPosition[0] += static_cast<float>(gridPtrX[index] * basis);
+                        defPosition[1] += static_cast<float>(gridPtrY[index] * basis);
                     }
                 }
             }
-            constraintValue += Square(flo_position[0] - def_position[0]);
-            constraintValue += Square(flo_position[1] - def_position[1]);
+            constraintValue += Square(floPosition[0] - defPosition[0]);
+            constraintValue += Square(floPosition[1] - defPosition[1]);
             if (imageDim > 2)
-                constraintValue += Square(flo_position[2] - def_position[2]);
+                constraintValue += Square(floPosition[2] - defPosition[2]);
         } else {
-            NR_WARN("The current landmark at position " << ref_position[0] << " " <<
-                    ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") <<
+            NR_WARN("The current landmark at position " << refPosition[0] << " " <<
+                    refPosition[1] << (imageDim > 2 ? " "s + std::to_string(refPosition[2]) : "") <<
                     " is ignored as it is not in the space of the reference image");
         }
     }
@@ -1803,9 +1786,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
     const int imageDim = controlPointImage->nz > 1 ? 3 : 2;
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     size_t l, index;
-    float ref_position[3];
-    float def_position[3];
-    float flo_position[3];
+    float refPosition[3];
+    float defPosition[3];
+    float floPosition[3];
     int previous[3], a, b, c;
     DataType basisX[4], basisY[4], basisZ[4], basis;
     const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
@@ -1825,32 +1808,32 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
     // Loop over all landmarks
     for (l = 0; l < landmarkNumber; ++l) {
         // fetch the initial positions
-        ref_position[0] = landmarkReference[l * imageDim];
-        flo_position[0] = landmarkFloating[l * imageDim];
-        ref_position[1] = landmarkReference[l * imageDim + 1];
-        flo_position[1] = landmarkFloating[l * imageDim + 1];
+        refPosition[0] = landmarkReference[l * imageDim];
+        floPosition[0] = landmarkFloating[l * imageDim];
+        refPosition[1] = landmarkReference[l * imageDim + 1];
+        floPosition[1] = landmarkFloating[l * imageDim + 1];
         if (imageDim > 2) {
-            ref_position[2] = landmarkReference[l * imageDim + 2];
-            flo_position[2] = landmarkFloating[l * imageDim + 2];
-        } else ref_position[2] = flo_position[2] = 0;
+            refPosition[2] = landmarkReference[l * imageDim + 2];
+            floPosition[2] = landmarkFloating[l * imageDim + 2];
+        } else refPosition[2] = floPosition[2] = 0;
         // Convert the reference position to voxel in the control point grid space
-        reg_mat44_mul(gridRealToVox, ref_position, def_position);
-        if (imageDim == 2) def_position[2] = 0;
+        reg_mat44_mul(gridRealToVox, refPosition, defPosition);
+        if (imageDim == 2) defPosition[2] = 0;
         // Extract the corresponding nodes
-        previous[0] = Floor(def_position[0]) - 1;
-        previous[1] = Floor(def_position[1]) - 1;
-        previous[2] = Floor(def_position[2]) - 1;
+        previous[0] = Floor(defPosition[0]) - 1;
+        previous[1] = Floor(defPosition[1]) - 1;
+        previous[2] = Floor(defPosition[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
             ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) {
             // Extract the corresponding basis values
-            get_BSplineBasisValues<DataType>(def_position[0] - 1 - (DataType)previous[0], basisX);
-            get_BSplineBasisValues<DataType>(def_position[1] - 1 - (DataType)previous[1], basisY);
-            get_BSplineBasisValues<DataType>(def_position[2] - 1 - (DataType)previous[2], basisZ);
-            def_position[0] = 0;
-            def_position[1] = 0;
-            def_position[2] = 0;
+            get_BSplineBasisValues<DataType>(defPosition[0] - 1 - (DataType)previous[0], basisX);
+            get_BSplineBasisValues<DataType>(defPosition[1] - 1 - (DataType)previous[1], basisY);
+            get_BSplineBasisValues<DataType>(defPosition[2] - 1 - (DataType)previous[2], basisZ);
+            defPosition[0] = 0;
+            defPosition[1] = 0;
+            defPosition[2] = 0;
             if (imageDim > 2) {
                 for (c = 0; c < 4; ++c) {
                     for (b = 0; b < 4; ++b) {
@@ -1858,9 +1841,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                             index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
                                 controlPointImage->nx + previous[0] + a;
                             basis = basisX[a] * basisY[b] * basisZ[c];
-                            def_position[0] += gridPtrX[index] * basis;
-                            def_position[1] += gridPtrY[index] * basis;
-                            def_position[2] += gridPtrZ[index] * basis;
+                            defPosition[0] += static_cast<float>(gridPtrX[index] * basis);
+                            defPosition[1] += static_cast<float>(gridPtrY[index] * basis);
+                            defPosition[2] += static_cast<float>(gridPtrZ[index] * basis);
                         }
                     }
                 }
@@ -1869,15 +1852,15 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                     for (a = 0; a < 4; ++a) {
                         index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
                         basis = basisX[a] * basisY[b];
-                        def_position[0] += gridPtrX[index] * basis;
-                        def_position[1] += gridPtrY[index] * basis;
+                        defPosition[0] += static_cast<float>(gridPtrX[index] * basis);
+                        defPosition[1] += static_cast<float>(gridPtrY[index] * basis);
                     }
                 }
             }
-            def_position[0] = flo_position[0] - def_position[0];
-            def_position[1] = flo_position[1] - def_position[1];
+            defPosition[0] = floPosition[0] - defPosition[0];
+            defPosition[1] = floPosition[1] - defPosition[1];
             if (imageDim > 2)
-                def_position[2] = flo_position[2] - def_position[2];
+                defPosition[2] = floPosition[2] - defPosition[2];
             if (imageDim > 2) {
                 for (c = 0; c < 4; ++c) {
                     for (b = 0; b < 4; ++b) {
@@ -1885,9 +1868,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                             index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) *
                                 controlPointImage->nx + previous[0] + a;
                             basis = basisX[a] * basisY[b] * basisZ[c] * weight;
-                            gradPtrX[index] -= def_position[0] * basis;
-                            gradPtrY[index] -= def_position[1] * basis;
-                            gradPtrZ[index] -= def_position[2] * basis;
+                            gradPtrX[index] -= defPosition[0] * basis;
+                            gradPtrY[index] -= defPosition[1] * basis;
+                            gradPtrZ[index] -= defPosition[2] * basis;
                         }
                     }
                 }
@@ -1896,14 +1879,14 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
                     for (a = 0; a < 4; ++a) {
                         index = (previous[1] + b) * controlPointImage->nx + previous[0] + a;
                         basis = basisX[a] * basisY[b] * weight;
-                        gradPtrX[index] -= def_position[0] * basis;
-                        gradPtrY[index] -= def_position[1] * basis;
+                        gradPtrX[index] -= defPosition[0] * basis;
+                        gradPtrY[index] -= defPosition[1] * basis;
                     }
                 }
             }
         } else {
-            NR_WARN("The current landmark at position " << ref_position[0] << " " <<
-                    ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") <<
+            NR_WARN("The current landmark at position " << refPosition[0] << " " <<
+                    refPosition[1] << (imageDim > 2 ? " "s + std::to_string(refPosition[2]) : "") <<
                     " is ignored as it is not in the space of the reference image");
         }
     }
@@ -2051,7 +2034,7 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
 
     double grad_values[3];
 
-    DataType approxRatio = (DataType)weight / (DataType)nodeNumber;
+    DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     private(index, x, y, centralCP, neigbCP, grad_values) \
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 1c929167..864bc9c7 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -61,7 +61,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage);
  * parametrisation
  * @param gradientImage Image of similar size than the control point
  * grid and that contains the gradient of the objective function.
- * The gradient of the linear elasticily terms are added to the
+ * The gradient of the linear elasticity terms are added to the
  * current values
  * @param weight Weight to apply to the term of the penalty
  */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index c2b5f998..19e001d6 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -165,7 +165,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
 }
 /* *************************************************************** */
 template<class PrecisionType>
-PrecisionType GetBasisSplineValue(PrecisionType x) {
+static PrecisionType GetBasisSplineValue(PrecisionType x) {
     x = fabs(x);
     PrecisionType value = 0;
     if (x < 2.f) {
@@ -180,7 +180,7 @@ PrecisionType GetBasisSplineValue(PrecisionType x) {
 }
 /* *************************************************************** */
 template<class PrecisionType>
-PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
+static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
     PrecisionType x = fabs(ori);
     PrecisionType value = 0;
     if (x < 2.f) {
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 7e944323..5c440afa 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -8,13 +8,13 @@
 #pragma once
 
 /* *************************************************************** */
-__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) {
+__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool is3d) {
     out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
     out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
     out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0;
 }
 /* *************************************************************** */
-__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) {
+__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool is3d) {
     out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3];
     out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3];
     out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
@@ -26,7 +26,7 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo
     rem = num % denom;
 }
 /* *************************************************************** */
-__device__ __inline__ int3 reg_indexToDims_cuda(const int& index, const int3& dims) {
+__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) {
     int quot = 0, rem;
     if (dims.z > 1)
         reg_div_cuda(index, dims.x * dims.y, quot, rem);

From 80cecbda81e8d3857ab294b6ef6fe7b464b760c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 14 Sep 2023 14:10:26 +0100
Subject: [PATCH 208/314] Fix wrong calculation of CUDA
 ApproxLinearEnergyGradient(), GetLandmarkDistance(), and
 LandmarkDistanceGradient() #92

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/Compute.cpp        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d1e85f89..27a69f60 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-326
+327
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 6d3d02a5..5409042a 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -56,14 +56,14 @@ double Compute::ApproxLinearEnergy() {
 /* *************************************************************** */
 void Compute::ApproxLinearEnergyGradient(float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    reg_spline_approxLinearEnergyGradient(con.F3dContent::GetControlPointGrid(),
+    reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(),
                                           con.GetTransformationGradient(),
                                           weight);
 }
 /* *************************************************************** */
 double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    return reg_spline_getLandmarkDistance(con.F3dContent::GetControlPointGrid(),
+    return reg_spline_getLandmarkDistance(con.GetControlPointGrid(),
                                           landmarkNumber,
                                           landmarkReference,
                                           landmarkFloating);
@@ -71,7 +71,7 @@ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkRefere
 /* *************************************************************** */
 void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    reg_spline_getLandmarkDistanceGradient(con.F3dContent::GetControlPointGrid(),
+    reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(),
                                            con.GetTransformationGradient(),
                                            landmarkNumber,
                                            landmarkReference,

From bc7ab04aeff37ec64e8bdd4cf44da32dca97a6d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 18 Sep 2023 15:17:18 +0100
Subject: [PATCH 209/314] Refactorisations

---
 niftyreg_build_version.txt                |    2 +-
 reg-lib/cpu/_reg_localTrans_regul.cpp     |    7 +-
 reg-lib/cpu/_reg_splineBasis.cpp          | 1187 ++++++++++-----------
 reg-test/reg_test_be.cpp                  |    1 -
 reg-test/reg_test_common.h                |    1 +
 reg-test/reg_test_composeField.cpp        |    1 -
 reg-test/reg_test_getDeformationField.cpp |    1 -
 reg-test/reg_test_regr_measure.cpp        |    3 +-
 8 files changed, 561 insertions(+), 642 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 27a69f60..86619979 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-327
+328
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 33dcfcee..46a3928c 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -1169,8 +1169,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
             const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
             matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
-            matrix.m[0][0]--;
-            matrix.m[1][1]--;
+            matrix.m[0][0]--; matrix.m[1][1]--;
             i = 8;
             for (int b = -1; b < 2; b++) {
                 for (int a = -1; a < 2; a++) {
@@ -1247,9 +1246,7 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
                 const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
                 matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
-                matrix.m[0][0]--;
-                matrix.m[1][1]--;
-                matrix.m[2][2]--;
+                matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--;
                 i = 26;
                 for (int c = -1; c < 2; c++) {
                     for (int b = -1; b < 2; b++) {
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index 911c5487..a47a635b 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -15,527 +15,491 @@
 
 /* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValues(DataType basis, DataType *values)
-{
-   DataType FF= basis*basis;
-   DataType FFF= FF*basis;
-   DataType MF=static_cast<DataType>(1.0-basis);
-   values[0] = static_cast<DataType>((MF)*(MF)*(MF)/(6.0));
-   values[1] = static_cast<DataType>((3.0*FFF - 6.0*FF + 4.0)/6.0);
-   values[2] = static_cast<DataType>((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0);
-   values[3] = static_cast<DataType>(FFF/6.0);
+void get_BSplineBasisValues(DataType basis, DataType *values) {
+    DataType FF = basis * basis;
+    DataType FFF = FF * basis;
+    DataType MF = static_cast<DataType>(1.0 - basis);
+    values[0] = static_cast<DataType>((MF) * (MF) * (MF) / (6.0));
+    values[1] = static_cast<DataType>((3.0 * FFF - 6.0 * FF + 4.0) / 6.0);
+    values[2] = static_cast<DataType>((-3.0 * FFF + 3.0 * FF + 3.0 * basis + 1.0) / 6.0);
+    values[3] = static_cast<DataType>(FFF / 6.0);
 }
 template void get_BSplineBasisValues<float>(float, float *);
 template void get_BSplineBasisValues<double>(double, double *);
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first)
-{
-   get_BSplineBasisValues<DataType>(basis, values);
-   first[3]= static_cast<DataType>(basis * basis / 2.0);
-   first[0]= static_cast<DataType>(basis - 1.0/2.0 - first[3]);
-   first[2]= static_cast<DataType>(1.0 + first[0] - 2.0*first[3]);
-   first[1]= - first[0] - first[2] - first[3];
+void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first) {
+    get_BSplineBasisValues<DataType>(basis, values);
+    first[3] = static_cast<DataType>(basis * basis / 2.0);
+    first[0] = static_cast<DataType>(basis - 1.0 / 2.0 - first[3]);
+    first[2] = static_cast<DataType>(1.0 + first[0] - 2.0 * first[3]);
+    first[1] = -first[0] - first[2] - first[3];
 }
-template void get_BSplineBasisValues<float>(float, float *, float *);
-template void get_BSplineBasisValues<double>(double, double *, double *);
-/* *************************************************************** */
+template void get_BSplineBasisValues<float>(float, float*, float *);
+template void get_BSplineBasisValues<double>(double, double*, double *);
 /* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second)
-{
-   get_BSplineBasisValues<DataType>(basis, values, first);
-   second[3]= basis;
-   second[0]= static_cast<DataType>(1.0 - second[3]);
-   second[2]= static_cast<DataType>(second[0] - 2.0*second[3]);
-   second[1]= - second[0] - second[2] - second[3];
+void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) {
+    get_BSplineBasisValues<DataType>(basis, values, first);
+    second[3] = basis;
+    second[0] = static_cast<DataType>(1.0 - second[3]);
+    second[2] = static_cast<DataType>(second[0] - 2.0 * second[3]);
+    second[1] = -second[0] - second[2] - second[3];
 }
-template void get_BSplineBasisValues<float>(float, float *, float *, float *);
-template void get_BSplineBasisValues<double>(double, double *, double *, double *);
-/* *************************************************************** */
+template void get_BSplineBasisValues<float>(float, float*, float*, float *);
+template void get_BSplineBasisValues<double>(double, double*, double*, double *);
 /* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValue(DataType basis, int index, DataType &value)
-{
-   switch(index)
-   {
-   case 0:
-      value = (DataType)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0);
-      break;
-   case 1:
-      value = (DataType)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0);
-      break;
-   case 2:
-      value = (DataType)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0);
-      break;
-   case 3:
-      value = (DataType)(basis*basis*basis/6.0);
-      break;
-   default:
-      value = (DataType)0;
-      break;
-   }
+void get_BSplineBasisValue(DataType basis, int index, DataType& value) {
+    switch (index) {
+    case 0:
+        value = (DataType)((1.0 - basis) * (1.0 - basis) * (1.0 - basis) / 6.0);
+        break;
+    case 1:
+        value = (DataType)((3.0 * basis * basis * basis - 6.0 * basis * basis + 4.0) / 6.0);
+        break;
+    case 2:
+        value = (DataType)((3.0 * basis * basis - 3.0 * basis * basis * basis + 3.0 * basis + 1.0) / 6.0);
+        break;
+    case 3:
+        value = (DataType)(basis * basis * basis / 6.0);
+        break;
+    default:
+        value = (DataType)0;
+        break;
+    }
 }
-template void get_BSplineBasisValue<float>(float, int, float &);
-template void get_BSplineBasisValue<double>(double, int, double &);
-/* *************************************************************** */
+template void get_BSplineBasisValue<float>(float, int, float&);
+template void get_BSplineBasisValue<double>(double, int, double&);
 /* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first)
-{
-   get_BSplineBasisValue<DataType>(basis, index, value);
-   switch(index)
-   {
-   case 0:
-      first = (DataType)((2.0*basis - basis*basis - 1.0)/2.0);
-      break;
-   case 1:
-      first = (DataType)((3.0*basis*basis - 4.0*basis)/2.0);
-      break;
-   case 2:
-      first = (DataType)((2.0*basis - 3.0*basis*basis + 1.0)/2.0);
-      break;
-   case 3:
-      first = (DataType)(basis*basis/2.0);
-      break;
-   default:
-      first = (DataType)0;
-      break;
-   }
+void get_BSplineBasisValue(DataType basis, int index, DataType& value, DataType& first) {
+    get_BSplineBasisValue<DataType>(basis, index, value);
+    switch (index) {
+    case 0:
+        first = (DataType)((2.0 * basis - basis * basis - 1.0) / 2.0);
+        break;
+    case 1:
+        first = (DataType)((3.0 * basis * basis - 4.0 * basis) / 2.0);
+        break;
+    case 2:
+        first = (DataType)((2.0 * basis - 3.0 * basis * basis + 1.0) / 2.0);
+        break;
+    case 3:
+        first = (DataType)(basis * basis / 2.0);
+        break;
+    default:
+        first = (DataType)0;
+        break;
+    }
 }
-template void get_BSplineBasisValue<float>(float, int, float &, float &);
-template void get_BSplineBasisValue<double>(double, int, double &, double &);
-/* *************************************************************** */
+template void get_BSplineBasisValue<float>(float, int, float&, float&);
+template void get_BSplineBasisValue<double>(double, int, double&, double&);
 /* *************************************************************** */
 template<class DataType>
-void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first, DataType &second)
-{
-   get_BSplineBasisValue<DataType>(basis, index, value, first);
-   switch(index)
-   {
-   case 0:
-      second = (DataType)(1.0 - basis);
-      break;
-   case 1:
-      second = (DataType)(3.0*basis -2.0);
-      break;
-   case 2:
-      second = (DataType)(1.0 - 3.0*basis);
-      break;
-   case 3:
-      second = (DataType)(basis);
-      break;
-   default:
-      second = (DataType)0;
-      break;
-   }
+void get_BSplineBasisValue(DataType basis, int index, DataType& value, DataType& first, DataType& second) {
+    get_BSplineBasisValue<DataType>(basis, index, value, first);
+    switch (index) {
+    case 0:
+        second = (DataType)(1.0 - basis);
+        break;
+    case 1:
+        second = (DataType)(3.0 * basis - 2.0);
+        break;
+    case 2:
+        second = (DataType)(1.0 - 3.0 * basis);
+        break;
+    case 3:
+        second = (DataType)(basis);
+        break;
+    default:
+        second = (DataType)0;
+        break;
+    }
 }
-template void get_BSplineBasisValue<float>(float, int, float &, float &, float &);
-template void get_BSplineBasisValue<double>(double, int, double &, double &, double &);
-/* *************************************************************** */
+template void get_BSplineBasisValue<float>(float, int, float&, float&, float&);
+template void get_BSplineBasisValue<double>(double, int, double&, double&, double&);
 /* *************************************************************** */
 template<class DataType>
-void get_SplineBasisValues(DataType basis, DataType *values)
-{
-   DataType FF= basis*basis;
-   values[0] = static_cast<DataType>((basis * ((2.0-basis)*basis - 1.0))/2.0);
-   values[1] = static_cast<DataType>((FF * (3.0*basis-5.0) + 2.0)/2.0);
-   values[2] = static_cast<DataType>((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0);
-   values[3] = static_cast<DataType>((basis-1.0) * FF/2.0);
+void get_SplineBasisValues(DataType basis, DataType *values) {
+    DataType FF = basis * basis;
+    values[0] = static_cast<DataType>((basis * ((2.0 - basis) * basis - 1.0)) / 2.0);
+    values[1] = static_cast<DataType>((FF * (3.0 * basis - 5.0) + 2.0) / 2.0);
+    values[2] = static_cast<DataType>((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0);
+    values[3] = static_cast<DataType>((basis - 1.0) * FF / 2.0);
 }
 template void get_SplineBasisValues<float>(float, float *);
 template void get_SplineBasisValues<double>(double, double *);
 /* *************************************************************** */
-/* *************************************************************** */
 template<class DataType>
-void get_SplineBasisValues(DataType basis, DataType *values, DataType *first)
-{
-   get_SplineBasisValues<DataType>(basis,values);
-   DataType FF= basis*basis;
-   first[0] = static_cast<DataType>((4.0*basis - 3.0*FF - 1.0)/2.0);
-   first[1] = static_cast<DataType>((9.0*basis - 10.0) * basis/2.0);
-   first[2] = static_cast<DataType>((8.0*basis - 9.0*FF + 1.0)/2.0);
-   first[3] = static_cast<DataType>((3.0*basis - 2.0) * basis/2.0);
+void get_SplineBasisValues(DataType basis, DataType *values, DataType *first) {
+    get_SplineBasisValues<DataType>(basis, values);
+    DataType FF = basis * basis;
+    first[0] = static_cast<DataType>((4.0 * basis - 3.0 * FF - 1.0) / 2.0);
+    first[1] = static_cast<DataType>((9.0 * basis - 10.0) * basis / 2.0);
+    first[2] = static_cast<DataType>((8.0 * basis - 9.0 * FF + 1.0) / 2.0);
+    first[3] = static_cast<DataType>((3.0 * basis - 2.0) * basis / 2.0);
 }
-template void get_SplineBasisValues<float>(float, float *, float *);
-template void get_SplineBasisValues<double>(double, double *, double *);
-/* *************************************************************** */
+template void get_SplineBasisValues<float>(float, float*, float *);
+template void get_SplineBasisValues<double>(double, double*, double *);
 /* *************************************************************** */
 template<class DataType>
-void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second)
-{
-   get_SplineBasisValues<DataType>(basis, values, first);
-   second[0] = static_cast<DataType>(2.0 - 3.0*basis);
-   second[1] = static_cast<DataType>(9.0*basis - 5.0);
-   second[2] = static_cast<DataType>(4.0 - 9.0*basis);
-   second[3] = static_cast<DataType>(3.0*basis - 1.0);
+void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) {
+    get_SplineBasisValues<DataType>(basis, values, first);
+    second[0] = static_cast<DataType>(2.0 - 3.0 * basis);
+    second[1] = static_cast<DataType>(9.0 * basis - 5.0);
+    second[2] = static_cast<DataType>(4.0 - 9.0 * basis);
+    second[3] = static_cast<DataType>(3.0 * basis - 1.0);
 }
-template void get_SplineBasisValues<float>(float, float *, float *, float *);
-template void get_SplineBasisValues<double>(double, double *, double *, double *);
-/* *************************************************************** */
+template void get_SplineBasisValues<float>(float, float*, float*, float *);
+template void get_SplineBasisValues<double>(double, double*, double*, double *);
 /* *************************************************************** */
 template <class DataType>
-void set_first_order_basis_values(DataType *basisX, DataType *basisY)
-{
-   double BASIS[4], FIRST[4];get_BSplineBasisValues<double>(0, BASIS, FIRST);
-   int index=0;
-   for(int y=0;y<3;++y){
-      for(int x=0;x<3;++x){
-         basisX[index] = FIRST[x] * BASIS[y];
-         basisY[index] = BASIS[x] * FIRST[y];
-         index++;
-      }
-   }
+void set_first_order_basis_values(DataType *basisX, DataType *basisY) {
+    double BASIS[4], FIRST[4]; get_BSplineBasisValues<double>(0, BASIS, FIRST);
+    int index = 0;
+    for (int y = 0; y < 3; ++y) {
+        for (int x = 0; x < 3; ++x) {
+            basisX[index] = FIRST[x] * BASIS[y];
+            basisY[index] = BASIS[x] * FIRST[y];
+            index++;
+        }
+    }
 }
-template void set_first_order_basis_values<float>(float *, float *);
-template void set_first_order_basis_values<double>(double *, double *);
+template void set_first_order_basis_values<float>(float*, float *);
+template void set_first_order_basis_values<double>(double*, double *);
 /* *************************************************************** */
 template <class DataType>
-void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ)
-{
-      basisX[0]=static_cast<DataType>(-0.0138889);
-      basisY[0]=static_cast<DataType>(-0.0138889);
-      basisZ[0]=static_cast<DataType>(-0.0138889);
-      basisX[1]=static_cast<DataType>(0);
-      basisY[1]=static_cast<DataType>(-0.0555556);
-      basisZ[1]=static_cast<DataType>(-0.0555556);
-      basisX[2]=static_cast<DataType>(0.0138889);
-      basisY[2]=static_cast<DataType>(-0.0138889);
-      basisZ[2]=static_cast<DataType>(-0.0138889);
-      basisX[3]=static_cast<DataType>(-0.0555556);
-      basisY[3]=static_cast<DataType>(0);
-      basisZ[3]=static_cast<DataType>(-0.0555556);
-      basisX[4]=static_cast<DataType>(0);
-      basisY[4]=static_cast<DataType>(0);
-      basisZ[4]=static_cast<DataType>(-0.222222);
-      basisX[5]=static_cast<DataType>(0.0555556);
-      basisY[5]=static_cast<DataType>(0);
-      basisZ[5]=static_cast<DataType>(-0.0555556);
-      basisX[6]=static_cast<DataType>(-0.0138889);
-      basisY[6]=static_cast<DataType>(0.0138889);
-      basisZ[6]=static_cast<DataType>(-0.0138889);
-      basisX[7]=static_cast<DataType>(0);
-      basisY[7]=static_cast<DataType>(0.0555556);
-      basisZ[7]=static_cast<DataType>(-0.0555556);
-      basisX[8]=static_cast<DataType>(0.0138889);
-      basisY[8]=static_cast<DataType>(0.0138889);
-      basisZ[8]=static_cast<DataType>(-0.0138889);
-      basisX[9]=static_cast<DataType>(-0.0555556);
-      basisY[9]=static_cast<DataType>(-0.0555556);
-      basisZ[9]=static_cast<DataType>(0);
-      basisX[10]=static_cast<DataType>(0);
-      basisY[10]=static_cast<DataType>(-0.222222);
-      basisZ[10]=static_cast<DataType>(0);
-      basisX[11]=static_cast<DataType>(0.0555556);
-      basisY[11]=static_cast<DataType>(-0.0555556);
-      basisZ[11]=static_cast<DataType>(0);
-      basisX[12]=static_cast<DataType>(-0.222222);
-      basisY[12]=static_cast<DataType>(0);
-      basisZ[12]=static_cast<DataType>(0);
-      basisX[13]=static_cast<DataType>(0);
-      basisY[13]=static_cast<DataType>(0);
-      basisZ[13]=static_cast<DataType>(0);
-      basisX[14]=static_cast<DataType>(0.222222);
-      basisY[14]=static_cast<DataType>(0);
-      basisZ[14]=static_cast<DataType>(0);
-      basisX[15]=static_cast<DataType>(-0.0555556);
-      basisY[15]=static_cast<DataType>(0.0555556);
-      basisZ[15]=static_cast<DataType>(0);
-      basisX[16]=static_cast<DataType>(0);
-      basisY[16]=static_cast<DataType>(0.222222);
-      basisZ[16]=static_cast<DataType>(0);
-      basisX[17]=static_cast<DataType>(0.0555556);
-      basisY[17]=static_cast<DataType>(0.0555556);
-      basisZ[17]=static_cast<DataType>(0);
-      basisX[18]=static_cast<DataType>(-0.0138889);
-      basisY[18]=static_cast<DataType>(-0.0138889);
-      basisZ[18]=static_cast<DataType>(0.0138889);
-      basisX[19]=static_cast<DataType>(0);
-      basisY[19]=static_cast<DataType>(-0.0555556);
-      basisZ[19]=static_cast<DataType>(0.0555556);
-      basisX[20]=static_cast<DataType>(0.0138889);
-      basisY[20]=static_cast<DataType>(-0.0138889);
-      basisZ[20]=static_cast<DataType>(0.0138889);
-      basisX[21]=static_cast<DataType>(-0.0555556);
-      basisY[21]=static_cast<DataType>(0);
-      basisZ[21]=static_cast<DataType>(0.0555556);
-      basisX[22]=static_cast<DataType>(0);
-      basisY[22]=static_cast<DataType>(0);
-      basisZ[22]=static_cast<DataType>(0.222222);
-      basisX[23]=static_cast<DataType>(0.0555556);
-      basisY[23]=static_cast<DataType>(0);
-      basisZ[23]=static_cast<DataType>(0.0555556);
-      basisX[24]=static_cast<DataType>(-0.0138889);
-      basisY[24]=static_cast<DataType>(0.0138889);
-      basisZ[24]=static_cast<DataType>(0.0138889);
-      basisX[25]=static_cast<DataType>(0);
-      basisY[25]=static_cast<DataType>(0.0555556);
-      basisZ[25]=static_cast<DataType>(0.0555556);
-      basisX[26]=static_cast<DataType>(0.0138889);
-      basisY[26]=static_cast<DataType>(0.0138889);
-      basisZ[26]=static_cast<DataType>(0.0138889);
+void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ) {
+    basisX[0] = static_cast<DataType>(-0.0138889);
+    basisY[0] = static_cast<DataType>(-0.0138889);
+    basisZ[0] = static_cast<DataType>(-0.0138889);
+    basisX[1] = static_cast<DataType>(0);
+    basisY[1] = static_cast<DataType>(-0.0555556);
+    basisZ[1] = static_cast<DataType>(-0.0555556);
+    basisX[2] = static_cast<DataType>(0.0138889);
+    basisY[2] = static_cast<DataType>(-0.0138889);
+    basisZ[2] = static_cast<DataType>(-0.0138889);
+    basisX[3] = static_cast<DataType>(-0.0555556);
+    basisY[3] = static_cast<DataType>(0);
+    basisZ[3] = static_cast<DataType>(-0.0555556);
+    basisX[4] = static_cast<DataType>(0);
+    basisY[4] = static_cast<DataType>(0);
+    basisZ[4] = static_cast<DataType>(-0.222222);
+    basisX[5] = static_cast<DataType>(0.0555556);
+    basisY[5] = static_cast<DataType>(0);
+    basisZ[5] = static_cast<DataType>(-0.0555556);
+    basisX[6] = static_cast<DataType>(-0.0138889);
+    basisY[6] = static_cast<DataType>(0.0138889);
+    basisZ[6] = static_cast<DataType>(-0.0138889);
+    basisX[7] = static_cast<DataType>(0);
+    basisY[7] = static_cast<DataType>(0.0555556);
+    basisZ[7] = static_cast<DataType>(-0.0555556);
+    basisX[8] = static_cast<DataType>(0.0138889);
+    basisY[8] = static_cast<DataType>(0.0138889);
+    basisZ[8] = static_cast<DataType>(-0.0138889);
+    basisX[9] = static_cast<DataType>(-0.0555556);
+    basisY[9] = static_cast<DataType>(-0.0555556);
+    basisZ[9] = static_cast<DataType>(0);
+    basisX[10] = static_cast<DataType>(0);
+    basisY[10] = static_cast<DataType>(-0.222222);
+    basisZ[10] = static_cast<DataType>(0);
+    basisX[11] = static_cast<DataType>(0.0555556);
+    basisY[11] = static_cast<DataType>(-0.0555556);
+    basisZ[11] = static_cast<DataType>(0);
+    basisX[12] = static_cast<DataType>(-0.222222);
+    basisY[12] = static_cast<DataType>(0);
+    basisZ[12] = static_cast<DataType>(0);
+    basisX[13] = static_cast<DataType>(0);
+    basisY[13] = static_cast<DataType>(0);
+    basisZ[13] = static_cast<DataType>(0);
+    basisX[14] = static_cast<DataType>(0.222222);
+    basisY[14] = static_cast<DataType>(0);
+    basisZ[14] = static_cast<DataType>(0);
+    basisX[15] = static_cast<DataType>(-0.0555556);
+    basisY[15] = static_cast<DataType>(0.0555556);
+    basisZ[15] = static_cast<DataType>(0);
+    basisX[16] = static_cast<DataType>(0);
+    basisY[16] = static_cast<DataType>(0.222222);
+    basisZ[16] = static_cast<DataType>(0);
+    basisX[17] = static_cast<DataType>(0.0555556);
+    basisY[17] = static_cast<DataType>(0.0555556);
+    basisZ[17] = static_cast<DataType>(0);
+    basisX[18] = static_cast<DataType>(-0.0138889);
+    basisY[18] = static_cast<DataType>(-0.0138889);
+    basisZ[18] = static_cast<DataType>(0.0138889);
+    basisX[19] = static_cast<DataType>(0);
+    basisY[19] = static_cast<DataType>(-0.0555556);
+    basisZ[19] = static_cast<DataType>(0.0555556);
+    basisX[20] = static_cast<DataType>(0.0138889);
+    basisY[20] = static_cast<DataType>(-0.0138889);
+    basisZ[20] = static_cast<DataType>(0.0138889);
+    basisX[21] = static_cast<DataType>(-0.0555556);
+    basisY[21] = static_cast<DataType>(0);
+    basisZ[21] = static_cast<DataType>(0.0555556);
+    basisX[22] = static_cast<DataType>(0);
+    basisY[22] = static_cast<DataType>(0);
+    basisZ[22] = static_cast<DataType>(0.222222);
+    basisX[23] = static_cast<DataType>(0.0555556);
+    basisY[23] = static_cast<DataType>(0);
+    basisZ[23] = static_cast<DataType>(0.0555556);
+    basisX[24] = static_cast<DataType>(-0.0138889);
+    basisY[24] = static_cast<DataType>(0.0138889);
+    basisZ[24] = static_cast<DataType>(0.0138889);
+    basisX[25] = static_cast<DataType>(0);
+    basisY[25] = static_cast<DataType>(0.0555556);
+    basisZ[25] = static_cast<DataType>(0.0555556);
+    basisX[26] = static_cast<DataType>(0.0138889);
+    basisY[26] = static_cast<DataType>(0.0138889);
+    basisZ[26] = static_cast<DataType>(0.0138889);
 }
-template void set_first_order_basis_values<float>(float *, float *, float *);
-template void set_first_order_basis_values<double>(double *, double *, double *);
+template void set_first_order_basis_values<float>(float*, float*, float *);
+template void set_first_order_basis_values<double>(double*, double*, double *);
 /* *************************************************************** */
 template <class DataType>
-void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY)
-{
-   basisXX[0]=0.166667f;
-   basisYY[0]=0.166667f;
-   basisXY[0]=0.25f;
-   basisXX[1]=-0.333333f;
-   basisYY[1]=0.666667f;
-   basisXY[1]=-0.f;
-   basisXX[2]=0.166667f;
-   basisYY[2]=0.166667f;
-   basisXY[2]=-0.25f;
-   basisXX[3]=0.666667f;
-   basisYY[3]=-0.333333f;
-   basisXY[3]=-0.f;
-   basisXX[4]=-1.33333f;
-   basisYY[4]=-1.33333f;
-   basisXY[4]=0.f;
-   basisXX[5]=0.666667f;
-   basisYY[5]=-0.333333f;
-   basisXY[5]=0.f;
-   basisXX[6]=0.166667f;
-   basisYY[6]=0.166667f;
-   basisXY[6]=-0.25f;
-   basisXX[7]=-0.333333f;
-   basisYY[7]=0.666667f;
-   basisXY[7]=0.f;
-   basisXX[8]=0.166667f;
-   basisYY[8]=0.166667f;
-   basisXY[8]=0.25f;
+void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY) {
+    basisXX[0] = 0.166667f;
+    basisYY[0] = 0.166667f;
+    basisXY[0] = 0.25f;
+    basisXX[1] = -0.333333f;
+    basisYY[1] = 0.666667f;
+    basisXY[1] = -0.f;
+    basisXX[2] = 0.166667f;
+    basisYY[2] = 0.166667f;
+    basisXY[2] = -0.25f;
+    basisXX[3] = 0.666667f;
+    basisYY[3] = -0.333333f;
+    basisXY[3] = -0.f;
+    basisXX[4] = -1.33333f;
+    basisYY[4] = -1.33333f;
+    basisXY[4] = 0.f;
+    basisXX[5] = 0.666667f;
+    basisYY[5] = -0.333333f;
+    basisXY[5] = 0.f;
+    basisXX[6] = 0.166667f;
+    basisYY[6] = 0.166667f;
+    basisXY[6] = -0.25f;
+    basisXX[7] = -0.333333f;
+    basisYY[7] = 0.666667f;
+    basisXY[7] = 0.f;
+    basisXX[8] = 0.166667f;
+    basisYY[8] = 0.166667f;
+    basisXY[8] = 0.25f;
 }
-template void set_second_order_bspline_basis_values<float>(float *, float *, float *);
-template void set_second_order_bspline_basis_values<double>(double *, double *, double *);
+template void set_second_order_bspline_basis_values<float>(float*, float*, float *);
+template void set_second_order_bspline_basis_values<double>(double*, double*, double *);
 /* *************************************************************** */
 template <class DataType>
-void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ)
-{
-   basisXX[0]=0.027778f;
-   basisYY[0]=0.027778f;
-   basisZZ[0]=0.027778f;
-   basisXY[0]=0.041667f;
-   basisYZ[0]=0.041667f;
-   basisXZ[0]=0.041667f;
-   basisXX[1]=-0.055556f;
-   basisYY[1]=0.111111f;
-   basisZZ[1]=0.111111f;
-   basisXY[1]=-0.000000f;
-   basisYZ[1]=0.166667f;
-   basisXZ[1]=-0.000000f;
-   basisXX[2]=0.027778f;
-   basisYY[2]=0.027778f;
-   basisZZ[2]=0.027778f;
-   basisXY[2]=-0.041667f;
-   basisYZ[2]=0.041667f;
-   basisXZ[2]=-0.041667f;
-   basisXX[3]=0.111111f;
-   basisYY[3]=-0.055556f;
-   basisZZ[3]=0.111111f;
-   basisXY[3]=-0.000000f;
-   basisYZ[3]=-0.000000f;
-   basisXZ[3]=0.166667f;
-   basisXX[4]=-0.222222f;
-   basisYY[4]=-0.222222f;
-   basisZZ[4]=0.444444f;
-   basisXY[4]=0.000000f;
-   basisYZ[4]=-0.000000f;
-   basisXZ[4]=-0.000000f;
-   basisXX[5]=0.111111f;
-   basisYY[5]=-0.055556f;
-   basisZZ[5]=0.111111f;
-   basisXY[5]=0.000000f;
-   basisYZ[5]=-0.000000f;
-   basisXZ[5]=-0.166667f;
-   basisXX[6]=0.027778f;
-   basisYY[6]=0.027778f;
-   basisZZ[6]=0.027778f;
-   basisXY[6]=-0.041667f;
-   basisYZ[6]=-0.041667f;
-   basisXZ[6]=0.041667f;
-   basisXX[7]=-0.055556f;
-   basisYY[7]=0.111111f;
-   basisZZ[7]=0.111111f;
-   basisXY[7]=0.000000f;
-   basisYZ[7]=-0.166667f;
-   basisXZ[7]=-0.000000f;
-   basisXX[8]=0.027778f;
-   basisYY[8]=0.027778f;
-   basisZZ[8]=0.027778f;
-   basisXY[8]=0.041667f;
-   basisYZ[8]=-0.041667f;
-   basisXZ[8]=-0.041667f;
-   basisXX[9]=0.111111f;
-   basisYY[9]=0.111111f;
-   basisZZ[9]=-0.055556f;
-   basisXY[9]=0.166667f;
-   basisYZ[9]=-0.000000f;
-   basisXZ[9]=-0.000000f;
-   basisXX[10]=-0.222222f;
-   basisYY[10]=0.444444f;
-   basisZZ[10]=-0.222222f;
-   basisXY[10]=-0.000000f;
-   basisYZ[10]=-0.000000f;
-   basisXZ[10]=0.000000f;
-   basisXX[11]=0.111111f;
-   basisYY[11]=0.111111f;
-   basisZZ[11]=-0.055556f;
-   basisXY[11]=-0.166667f;
-   basisYZ[11]=-0.000000f;
-   basisXZ[11]=0.000000f;
-   basisXX[12]=0.444444f;
-   basisYY[12]=-0.222222f;
-   basisZZ[12]=-0.222222f;
-   basisXY[12]=-0.000000f;
-   basisYZ[12]=0.000000f;
-   basisXZ[12]=-0.000000f;
-   basisXX[13]=-0.888889f;
-   basisYY[13]=-0.888889f;
-   basisZZ[13]=-0.888889f;
-   basisXY[13]=0.000000f;
-   basisYZ[13]=0.000000f;
-   basisXZ[13]=0.000000f;
-   basisXX[14]=0.444444f;
-   basisYY[14]=-0.222222f;
-   basisZZ[14]=-0.222222f;
-   basisXY[14]=0.000000f;
-   basisYZ[14]=0.000000f;
-   basisXZ[14]=0.000000f;
-   basisXX[15]=0.111111f;
-   basisYY[15]=0.111111f;
-   basisZZ[15]=-0.055556f;
-   basisXY[15]=-0.166667f;
-   basisYZ[15]=0.000000f;
-   basisXZ[15]=-0.000000f;
-   basisXX[16]=-0.222222f;
-   basisYY[16]=0.444444f;
-   basisZZ[16]=-0.222222f;
-   basisXY[16]=0.000000f;
-   basisYZ[16]=0.000000f;
-   basisXZ[16]=0.000000f;
-   basisXX[17]=0.111111f;
-   basisYY[17]=0.111111f;
-   basisZZ[17]=-0.055556f;
-   basisXY[17]=0.166667f;
-   basisYZ[17]=0.000000f;
-   basisXZ[17]=0.000000f;
-   basisXX[18]=0.027778f;
-   basisYY[18]=0.027778f;
-   basisZZ[18]=0.027778f;
-   basisXY[18]=0.041667f;
-   basisYZ[18]=-0.041667f;
-   basisXZ[18]=-0.041667f;
-   basisXX[19]=-0.055556f;
-   basisYY[19]=0.111111f;
-   basisZZ[19]=0.111111f;
-   basisXY[19]=-0.000000f;
-   basisYZ[19]=-0.166667f;
-   basisXZ[19]=0.000000f;
-   basisXX[20]=0.027778f;
-   basisYY[20]=0.027778f;
-   basisZZ[20]=0.027778f;
-   basisXY[20]=-0.041667f;
-   basisYZ[20]=-0.041667f;
-   basisXZ[20]=0.041667f;
-   basisXX[21]=0.111111f;
-   basisYY[21]=-0.055556f;
-   basisZZ[21]=0.111111f;
-   basisXY[21]=-0.000000f;
-   basisYZ[21]=0.000000f;
-   basisXZ[21]=-0.166667f;
-   basisXX[22]=-0.222222f;
-   basisYY[22]=-0.222222f;
-   basisZZ[22]=0.444444f;
-   basisXY[22]=0.000000f;
-   basisYZ[22]=0.000000f;
-   basisXZ[22]=0.000000f;
-   basisXX[23]=0.111111f;
-   basisYY[23]=-0.055556f;
-   basisZZ[23]=0.111111f;
-   basisXY[23]=0.000000f;
-   basisYZ[23]=0.000000f;
-   basisXZ[23]=0.166667f;
-   basisXX[24]=0.027778f;
-   basisYY[24]=0.027778f;
-   basisZZ[24]=0.027778f;
-   basisXY[24]=-0.041667f;
-   basisYZ[24]=0.041667f;
-   basisXZ[24]=-0.041667f;
-   basisXX[25]=-0.055556f;
-   basisYY[25]=0.111111f;
-   basisZZ[25]=0.111111f;
-   basisXY[25]=0.000000f;
-   basisYZ[25]=0.166667f;
-   basisXZ[25]=0.000000f;
-   basisXX[26]=0.027778f;
-   basisYY[26]=0.027778f;
-   basisZZ[26]=0.027778f;
-   basisXY[26]=0.041667f;
-   basisYZ[26]=0.041667f;
-   basisXZ[26]=0.041667f;
+void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ) {
+    basisXX[0] = 0.027778f;
+    basisYY[0] = 0.027778f;
+    basisZZ[0] = 0.027778f;
+    basisXY[0] = 0.041667f;
+    basisYZ[0] = 0.041667f;
+    basisXZ[0] = 0.041667f;
+    basisXX[1] = -0.055556f;
+    basisYY[1] = 0.111111f;
+    basisZZ[1] = 0.111111f;
+    basisXY[1] = -0.000000f;
+    basisYZ[1] = 0.166667f;
+    basisXZ[1] = -0.000000f;
+    basisXX[2] = 0.027778f;
+    basisYY[2] = 0.027778f;
+    basisZZ[2] = 0.027778f;
+    basisXY[2] = -0.041667f;
+    basisYZ[2] = 0.041667f;
+    basisXZ[2] = -0.041667f;
+    basisXX[3] = 0.111111f;
+    basisYY[3] = -0.055556f;
+    basisZZ[3] = 0.111111f;
+    basisXY[3] = -0.000000f;
+    basisYZ[3] = -0.000000f;
+    basisXZ[3] = 0.166667f;
+    basisXX[4] = -0.222222f;
+    basisYY[4] = -0.222222f;
+    basisZZ[4] = 0.444444f;
+    basisXY[4] = 0.000000f;
+    basisYZ[4] = -0.000000f;
+    basisXZ[4] = -0.000000f;
+    basisXX[5] = 0.111111f;
+    basisYY[5] = -0.055556f;
+    basisZZ[5] = 0.111111f;
+    basisXY[5] = 0.000000f;
+    basisYZ[5] = -0.000000f;
+    basisXZ[5] = -0.166667f;
+    basisXX[6] = 0.027778f;
+    basisYY[6] = 0.027778f;
+    basisZZ[6] = 0.027778f;
+    basisXY[6] = -0.041667f;
+    basisYZ[6] = -0.041667f;
+    basisXZ[6] = 0.041667f;
+    basisXX[7] = -0.055556f;
+    basisYY[7] = 0.111111f;
+    basisZZ[7] = 0.111111f;
+    basisXY[7] = 0.000000f;
+    basisYZ[7] = -0.166667f;
+    basisXZ[7] = -0.000000f;
+    basisXX[8] = 0.027778f;
+    basisYY[8] = 0.027778f;
+    basisZZ[8] = 0.027778f;
+    basisXY[8] = 0.041667f;
+    basisYZ[8] = -0.041667f;
+    basisXZ[8] = -0.041667f;
+    basisXX[9] = 0.111111f;
+    basisYY[9] = 0.111111f;
+    basisZZ[9] = -0.055556f;
+    basisXY[9] = 0.166667f;
+    basisYZ[9] = -0.000000f;
+    basisXZ[9] = -0.000000f;
+    basisXX[10] = -0.222222f;
+    basisYY[10] = 0.444444f;
+    basisZZ[10] = -0.222222f;
+    basisXY[10] = -0.000000f;
+    basisYZ[10] = -0.000000f;
+    basisXZ[10] = 0.000000f;
+    basisXX[11] = 0.111111f;
+    basisYY[11] = 0.111111f;
+    basisZZ[11] = -0.055556f;
+    basisXY[11] = -0.166667f;
+    basisYZ[11] = -0.000000f;
+    basisXZ[11] = 0.000000f;
+    basisXX[12] = 0.444444f;
+    basisYY[12] = -0.222222f;
+    basisZZ[12] = -0.222222f;
+    basisXY[12] = -0.000000f;
+    basisYZ[12] = 0.000000f;
+    basisXZ[12] = -0.000000f;
+    basisXX[13] = -0.888889f;
+    basisYY[13] = -0.888889f;
+    basisZZ[13] = -0.888889f;
+    basisXY[13] = 0.000000f;
+    basisYZ[13] = 0.000000f;
+    basisXZ[13] = 0.000000f;
+    basisXX[14] = 0.444444f;
+    basisYY[14] = -0.222222f;
+    basisZZ[14] = -0.222222f;
+    basisXY[14] = 0.000000f;
+    basisYZ[14] = 0.000000f;
+    basisXZ[14] = 0.000000f;
+    basisXX[15] = 0.111111f;
+    basisYY[15] = 0.111111f;
+    basisZZ[15] = -0.055556f;
+    basisXY[15] = -0.166667f;
+    basisYZ[15] = 0.000000f;
+    basisXZ[15] = -0.000000f;
+    basisXX[16] = -0.222222f;
+    basisYY[16] = 0.444444f;
+    basisZZ[16] = -0.222222f;
+    basisXY[16] = 0.000000f;
+    basisYZ[16] = 0.000000f;
+    basisXZ[16] = 0.000000f;
+    basisXX[17] = 0.111111f;
+    basisYY[17] = 0.111111f;
+    basisZZ[17] = -0.055556f;
+    basisXY[17] = 0.166667f;
+    basisYZ[17] = 0.000000f;
+    basisXZ[17] = 0.000000f;
+    basisXX[18] = 0.027778f;
+    basisYY[18] = 0.027778f;
+    basisZZ[18] = 0.027778f;
+    basisXY[18] = 0.041667f;
+    basisYZ[18] = -0.041667f;
+    basisXZ[18] = -0.041667f;
+    basisXX[19] = -0.055556f;
+    basisYY[19] = 0.111111f;
+    basisZZ[19] = 0.111111f;
+    basisXY[19] = -0.000000f;
+    basisYZ[19] = -0.166667f;
+    basisXZ[19] = 0.000000f;
+    basisXX[20] = 0.027778f;
+    basisYY[20] = 0.027778f;
+    basisZZ[20] = 0.027778f;
+    basisXY[20] = -0.041667f;
+    basisYZ[20] = -0.041667f;
+    basisXZ[20] = 0.041667f;
+    basisXX[21] = 0.111111f;
+    basisYY[21] = -0.055556f;
+    basisZZ[21] = 0.111111f;
+    basisXY[21] = -0.000000f;
+    basisYZ[21] = 0.000000f;
+    basisXZ[21] = -0.166667f;
+    basisXX[22] = -0.222222f;
+    basisYY[22] = -0.222222f;
+    basisZZ[22] = 0.444444f;
+    basisXY[22] = 0.000000f;
+    basisYZ[22] = 0.000000f;
+    basisXZ[22] = 0.000000f;
+    basisXX[23] = 0.111111f;
+    basisYY[23] = -0.055556f;
+    basisZZ[23] = 0.111111f;
+    basisXY[23] = 0.000000f;
+    basisYZ[23] = 0.000000f;
+    basisXZ[23] = 0.166667f;
+    basisXX[24] = 0.027778f;
+    basisYY[24] = 0.027778f;
+    basisZZ[24] = 0.027778f;
+    basisXY[24] = -0.041667f;
+    basisYZ[24] = 0.041667f;
+    basisXZ[24] = -0.041667f;
+    basisXX[25] = -0.055556f;
+    basisYY[25] = 0.111111f;
+    basisZZ[25] = 0.111111f;
+    basisXY[25] = 0.000000f;
+    basisYZ[25] = 0.166667f;
+    basisXZ[25] = 0.000000f;
+    basisXX[26] = 0.027778f;
+    basisYY[26] = 0.027778f;
+    basisZZ[26] = 0.027778f;
+    basisXY[26] = 0.041667f;
+    basisYZ[26] = 0.041667f;
+    basisXZ[26] = 0.041667f;
 }
-template void set_second_order_bspline_basis_values<float>(float *, float *, float *, float *, float *, float *);
-template void set_second_order_bspline_basis_values<double>(double *, double *, double *, double *, double *, double *);
-/* *************************************************************** */
+template void set_second_order_bspline_basis_values<float>(float*, float*, float*, float*, float*, float*);
+template void set_second_order_bspline_basis_values<double>(double*, double*, double*, double*, double*, double*);
 /* *************************************************************** */
 template <class DataType>
-void get_SlidedValues(DataType &defX,
-                      DataType &defY,
+void get_SlidedValues(DataType& defX,
+                      DataType& defY,
                       int X,
                       int Y,
                       DataType *defPtrX,
                       DataType *defPtrY,
                       mat44 *df_voxel2Real,
                       int *dim,
-                      bool displacement)
-{
-   int newX=X;
-   int newY=Y;
-   if(X<0)
-   {
-      newX=0;
-   }
-   else if(X>=dim[1])
-   {
-      newX=dim[1]-1;
-   }
-   if(Y<0)
-   {
-      newY=0;
-   }
-   else if(Y>=dim[2])
-   {
-      newY=dim[2]-1;
-   }
-   DataType shiftValueX = 0;
-   DataType shiftValueY = 0;
-   if(!displacement)
-   {
-      int shiftIndexX=X-newX;
-      int shiftIndexY=Y-newY;
-      shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] +
+                      bool displacement) {
+    int newX = X;
+    int newY = Y;
+    if (X < 0) {
+        newX = 0;
+    } else if (X >= dim[1]) {
+        newX = dim[1] - 1;
+    }
+    if (Y < 0) {
+        newY = 0;
+    } else if (Y >= dim[2]) {
+        newY = dim[2] - 1;
+    }
+    DataType shiftValueX = 0;
+    DataType shiftValueY = 0;
+    if (!displacement) {
+        int shiftIndexX = X - newX;
+        int shiftIndexY = Y - newY;
+        shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] +
             shiftIndexY * df_voxel2Real->m[0][1];
-      shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] +
+        shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] +
             shiftIndexY * df_voxel2Real->m[1][1];
-   }
-   size_t index=newY*dim[1]+newX;
-   defX = defPtrX[index] + shiftValueX;
-   defY = defPtrY[index] + shiftValueY;
+    }
+    size_t index = newY * dim[1] + newX;
+    defX = defPtrX[index] + shiftValueX;
+    defY = defPtrY[index] + shiftValueY;
 }
-template void get_SlidedValues<float>(float &, float &, int, int,
-float *, float *, mat44 *, int *, bool);
-template void get_SlidedValues<double>(double &, double &, int, int,
-double *, double *, mat44 *, int *, bool);
+template void get_SlidedValues<float>(float&, float&, int, int, float*, float*, mat44*, int*, bool);
+template void get_SlidedValues<double>(double&, double&, int, int, double*, double*, mat44*, int*, bool);
 /* *************************************************************** */
 template <class DataType>
-void get_SlidedValues(DataType &defX,
-                      DataType &defY,
-                      DataType &defZ,
+void get_SlidedValues(DataType& defX,
+                      DataType& defY,
+                      DataType& defZ,
                       int X,
                       int Y,
                       int Z,
@@ -544,66 +508,52 @@ void get_SlidedValues(DataType &defX,
                       DataType *defPtrZ,
                       mat44 *df_voxel2Real,
                       int *dim,
-                      bool displacement)
-{
-   int newX=X;
-   int newY=Y;
-   int newZ=Z;
-   if(X<0)
-   {
-      newX=0;
-   }
-   else if(X>=dim[1])
-   {
-      newX=dim[1]-1;
-   }
-   if(Y<0)
-   {
-      newY=0;
-   }
-   else if(Y>=dim[2])
-   {
-      newY=dim[2]-1;
-   }
-   if(Z<0)
-   {
-      newZ=0;
-   }
-   else if(Z>=dim[3])
-   {
-      newZ=dim[3]-1;
-   }
-   DataType shiftValueX=0;
-   DataType shiftValueY=0;
-   DataType shiftValueZ=0;
-   if(!displacement)
-   {
-      int shiftIndexX=X-newX;
-      int shiftIndexY=Y-newY;
-      int shiftIndexZ=Z-newZ;
-      shiftValueX =
+                      bool displacement) {
+    int newX = X;
+    int newY = Y;
+    int newZ = Z;
+    if (X < 0) {
+        newX = 0;
+    } else if (X >= dim[1]) {
+        newX = dim[1] - 1;
+    }
+    if (Y < 0) {
+        newY = 0;
+    } else if (Y >= dim[2]) {
+        newY = dim[2] - 1;
+    }
+    if (Z < 0) {
+        newZ = 0;
+    } else if (Z >= dim[3]) {
+        newZ = dim[3] - 1;
+    }
+    DataType shiftValueX = 0;
+    DataType shiftValueY = 0;
+    DataType shiftValueZ = 0;
+    if (!displacement) {
+        int shiftIndexX = X - newX;
+        int shiftIndexY = Y - newY;
+        int shiftIndexZ = Z - newZ;
+        shiftValueX =
             shiftIndexX * df_voxel2Real->m[0][0] +
             shiftIndexY * df_voxel2Real->m[0][1] +
             shiftIndexZ * df_voxel2Real->m[0][2];
-      shiftValueY =
+        shiftValueY =
             shiftIndexX * df_voxel2Real->m[1][0] +
             shiftIndexY * df_voxel2Real->m[1][1] +
             shiftIndexZ * df_voxel2Real->m[1][2];
-      shiftValueZ =
+        shiftValueZ =
             shiftIndexX * df_voxel2Real->m[2][0] +
             shiftIndexY * df_voxel2Real->m[2][1] +
             shiftIndexZ * df_voxel2Real->m[2][2];
-   }
-   size_t index=(newZ*dim[2]+newY)*dim[1]+newX;
-   defX = defPtrX[index] + shiftValueX;
-   defY = defPtrY[index] + shiftValueY;
-   defZ = defPtrZ[index] + shiftValueZ;
+    }
+    size_t index = (newZ * dim[2] + newY) * dim[1] + newX;
+    defX = defPtrX[index] + shiftValueX;
+    defY = defPtrY[index] + shiftValueY;
+    defZ = defPtrZ[index] + shiftValueZ;
 }
-template void get_SlidedValues<float>(float &, float &, float &, int, int, int,
-float *, float *, float *, mat44 *, int *, bool);
-template void get_SlidedValues<double>(double &, double &, double &, int, int, int,
-double *, double *, double *, mat44 *, int *, bool);
-/* *************************************************************** */
+template void get_SlidedValues<float>(float&, float&, float&, int, int, int, float*, float*, float*, mat44*, int*, bool);
+template void get_SlidedValues<double>(double&, double&, double&, int, int, int, double*, double*, double*, mat44*, int*, bool);
 /* *************************************************************** */
 template <class DataType>
 void get_GridValues(int startX,
@@ -614,58 +564,47 @@ void get_GridValues(int startX,
                     DataType *dispX,
                     DataType *dispY,
                     bool approx,
-                    bool displacement)
-
-{
-   int range=4;
-   if(approx) range=3;
+                    bool displacement) {
+    int range = 4;
+    if (approx) range = 3;
 
-   size_t index;
-   size_t coord=0;
-   DataType *xxPtr=nullptr, *yyPtr=nullptr;
+    size_t index;
+    size_t coord = 0;
+    DataType *xxPtr = nullptr, *yyPtr = nullptr;
 
-   mat44 *voxel2realMatrix=nullptr;
-   if(splineControlPoint->sform_code>0)
-      voxel2realMatrix=&(splineControlPoint->sto_xyz);
-   else voxel2realMatrix=&(splineControlPoint->qto_xyz);
+    mat44 *voxel2realMatrix = nullptr;
+    if (splineControlPoint->sform_code > 0)
+        voxel2realMatrix = &splineControlPoint->sto_xyz;
+    else voxel2realMatrix = &splineControlPoint->qto_xyz;
 
-   for(int Y=startY; Y<startY+range; Y++)
-   {
-      bool out=false;
-      if(Y>-1 && Y<splineControlPoint->ny)
-      {
-         index = Y*splineControlPoint->nx;
-         xxPtr = &splineX[index];
-         yyPtr = &splineY[index];
-      }
-      else out=true;
-      for(int X=startX; X<startX+range; X++)
-      {
-         if(X>-1 && X<splineControlPoint->nx && out==false)
-         {
-            dispX[coord] = xxPtr[X];
-            dispY[coord] = yyPtr[X];
-         }
-         else
-         {
-            get_SlidedValues<DataType>(dispX[coord],
-                                    dispY[coord],
-                                    X,
-                                    Y,
-                                    splineX,
-                                    splineY,
-                                    voxel2realMatrix,
-                                    splineControlPoint->dim,
-                                    displacement);
-         }
-         coord++;
-      }
-   }
+    for (int Y = startY; Y < startY + range; Y++) {
+        bool out = false;
+        if (Y > -1 && Y < splineControlPoint->ny) {
+            index = Y * splineControlPoint->nx;
+            xxPtr = &splineX[index];
+            yyPtr = &splineY[index];
+        } else out = true;
+        for (int X = startX; X < startX + range; X++) {
+            if (X > -1 && X < splineControlPoint->nx && out == false) {
+                dispX[coord] = xxPtr[X];
+                dispY[coord] = yyPtr[X];
+            } else {
+                get_SlidedValues<DataType>(dispX[coord],
+                                           dispY[coord],
+                                           X,
+                                           Y,
+                                           splineX,
+                                           splineY,
+                                           voxel2realMatrix,
+                                           splineControlPoint->dim,
+                                           displacement);
+            }
+            coord++;
+        }
+    }
 }
-template void get_GridValues<float>(int, int, nifti_image *,
-float *, float *, float *, float *, bool, bool);
-template void get_GridValues<double>(int, int, nifti_image *,
-double *, double *, double *, double *, bool, bool);
+template void get_GridValues<float>(int, int, nifti_image*, float*, float*, float*, float*, bool, bool);
+template void get_GridValues<double>(int, int, nifti_image*, double*, double*, double*, double*, bool, bool);
 /* *************************************************************** */
 template <class DataType>
 void get_GridValues(int startX,
@@ -679,74 +618,60 @@ void get_GridValues(int startX,
                     DataType *dispY,
                     DataType *dispZ,
                     bool approx,
-                    bool displacement)
-{
-   int range=4;
-   if(approx)
-      range=3;
+                    bool displacement) {
+    int range = 4;
+    if (approx)
+        range = 3;
 
-   size_t index;
-   size_t coord=0;
-   DataType *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr;
-   DataType *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr;
+    size_t index;
+    size_t coord = 0;
+    DataType *xPtr = nullptr, *yPtr = nullptr, *zPtr = nullptr;
+    DataType *xxPtr = nullptr, *yyPtr = nullptr, *zzPtr = nullptr;
 
-   mat44 *voxel2realMatrix=nullptr;
-   if(splineControlPoint->sform_code>0)
-      voxel2realMatrix=&(splineControlPoint->sto_xyz);
-   else voxel2realMatrix=&(splineControlPoint->qto_xyz);
+    mat44 *voxel2realMatrix = nullptr;
+    if (splineControlPoint->sform_code > 0)
+        voxel2realMatrix = &splineControlPoint->sto_xyz;
+    else voxel2realMatrix = &splineControlPoint->qto_xyz;
 
-   for(int Z=startZ; Z<startZ+range; Z++)
-   {
-      bool out=false;
-      if(Z>-1 && Z<splineControlPoint->nz)
-      {
-         index=Z*splineControlPoint->nx*splineControlPoint->ny;
-         xPtr = &splineX[index];
-         yPtr = &splineY[index];
-         zPtr = &splineZ[index];
-      }
-      else out=true;
-      for(int Y=startY; Y<startY+range; Y++)
-      {
-         if(Y>-1 && Y<splineControlPoint->ny && out==false)
-         {
-            index = Y*splineControlPoint->nx;
-            xxPtr = &xPtr[index];
-            yyPtr = &yPtr[index];
-            zzPtr = &zPtr[index];
-         }
-         else out=true;
-         for(int X=startX; X<startX+range; X++)
-         {
-            if(X>-1 && X<splineControlPoint->nx && out==false)
-            {
-               dispX[coord] = xxPtr[X];
-               dispY[coord] = yyPtr[X];
-               dispZ[coord] = zzPtr[X];
-            }
-            else
-            {
-               get_SlidedValues<DataType>(dispX[coord],
-                                       dispY[coord],
-                                       dispZ[coord],
-                                       X,
-                                       Y,
-                                       Z,
-                                       splineX,
-                                       splineY,
-                                       splineZ,
-                                       voxel2realMatrix,
-                                       splineControlPoint->dim,
-                                       displacement);
-            }
-            coord++;
-         } // X
-      } // Y
-   } // Z
+    for (int Z = startZ; Z < startZ + range; Z++) {
+        bool out = false;
+        if (Z > -1 && Z < splineControlPoint->nz) {
+            index = Z * splineControlPoint->nx * splineControlPoint->ny;
+            xPtr = &splineX[index];
+            yPtr = &splineY[index];
+            zPtr = &splineZ[index];
+        } else out = true;
+        for (int Y = startY; Y < startY + range; Y++) {
+            if (Y > -1 && Y < splineControlPoint->ny && out == false) {
+                index = Y * splineControlPoint->nx;
+                xxPtr = &xPtr[index];
+                yyPtr = &yPtr[index];
+                zzPtr = &zPtr[index];
+            } else out = true;
+            for (int X = startX; X < startX + range; X++) {
+                if (X > -1 && X < splineControlPoint->nx && out == false) {
+                    dispX[coord] = xxPtr[X];
+                    dispY[coord] = yyPtr[X];
+                    dispZ[coord] = zzPtr[X];
+                } else {
+                    get_SlidedValues<DataType>(dispX[coord],
+                                               dispY[coord],
+                                               dispZ[coord],
+                                               X,
+                                               Y,
+                                               Z,
+                                               splineX,
+                                               splineY,
+                                               splineZ,
+                                               voxel2realMatrix,
+                                               splineControlPoint->dim,
+                                               displacement);
+                }
+                coord++;
+            } // X
+        } // Y
+    } // Z
 }
-template void get_GridValues<float>(int, int, int, nifti_image *,
-float *, float *, float *, float *, float *, float *, bool, bool);
-template void get_GridValues<double>(int, int, int, nifti_image *,
-double *, double *, double *, double *, double *, double *, bool, bool);
-/* *************************************************************** */
+template void get_GridValues<float>(int, int, int, nifti_image*, float*, float*, float*, float*, float*, float*, bool, bool);
+template void get_GridValues<double>(int, int, int, nifti_image*, double*, double*, double*, double*, double*, double*, bool, bool);
 /* *************************************************************** */
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index 44c85e71..445d3959 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -2,7 +2,6 @@
 #undef _USE_OPENCL
 
 #include "reg_test_common.h"
-#include <iomanip>
 
 /*
     This test file contains the following unit tests:
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 1a55b523..9f6c192c 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -3,6 +3,7 @@
 
 #include <array>
 #include <random>
+#include <iomanip>
 #include <catch2/catch_test_macros.hpp>
 #include "_reg_lncc.h"
 #include "_reg_localTrans.h"
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
index 354f6c83..740a7a31 100644
--- a/reg-test/reg_test_composeField.cpp
+++ b/reg-test/reg_test_composeField.cpp
@@ -2,7 +2,6 @@
 #undef _USE_OPENCL
 
 #include "reg_test_common.h"
-#include <iomanip>
 
 /*
     This test file contains the following unit tests:
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 17bb21e5..0a912881 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -2,7 +2,6 @@
 #undef _USE_OPENCL
 
 #include "reg_test_common.h"
-#include <iomanip>
 
 /*
     This test file contains the following unit tests:
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 895cec69..29d95559 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -2,7 +2,6 @@
 #include "_reg_nmi.h"
 #include "CudaF3dContent.h"
 #include "CudaMeasure.h"
-#include <iomanip>
 
 /**
  *  Measure regression tests to ensure the CPU and CUDA versions yield the same output
@@ -256,7 +255,7 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") {
                 const float cpuVal = voxelBasedGradCpuPtr[i];
                 const float cudaVal = voxelBasedGradCudaPtr[i];
                 const double diff = fabs(cpuVal - cudaVal);
-                if(diff>EPS)
+                if (diff > EPS)
                     NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
                 REQUIRE(diff < EPS);
             }

From 336eec51d6e0e848ce030ae8f1ca9080b6d11854 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 18 Sep 2023 16:36:52 +0100
Subject: [PATCH 210/314] Implement CudaCompute::ApproxLinearEnergyGradient()
 #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/BlockSize.hpp                    |   3 +
 reg-lib/cuda/CudaCompute.cpp                  |  10 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      | 114 ++++++++++++
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  53 ++++++
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   5 +
 .../cuda/_reg_localTransformation_kernels.cu  | 167 ++++++++++++++++++
 7 files changed, 348 insertions(+), 6 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 86619979..6489928e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-328
+329
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 46a880b3..ed4d0c6d 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -42,6 +42,7 @@ struct BlockSize {
     unsigned reg_spline_getApproxBendingEnergyGradient3D;
     unsigned reg_spline_getApproxJacobianValues2D;
     unsigned reg_spline_getApproxJacobianValues3D;
+    unsigned reg_spline_approxLinearEnergyGradient;
     unsigned reg_spline_getJacobianValues2D;
     unsigned reg_spline_getJacobianValues3D;
     unsigned reg_spline_logSquaredValues;
@@ -107,6 +108,7 @@ struct BlockSize100: public BlockSize {
         reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem
         reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem
         reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem
+        reg_spline_approxLinearEnergyGradient = 384; // 40 reg
         reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
         reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
         reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
@@ -174,6 +176,7 @@ struct BlockSize300: public BlockSize {
         reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg
         reg_spline_getApproxJacobianValues2D = 768; // 34 reg
         reg_spline_getApproxJacobianValues3D = 640; // 46 reg
+        reg_spline_approxLinearEnergyGradient = 768; // 40 reg
         reg_spline_getJacobianValues2D = 768; // 34 reg
         reg_spline_getJacobianValues3D = 768; // 34 reg
         reg_spline_logSquaredValues = 1024; // 23 reg
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index e1d6d4df..8838c0e2 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -63,11 +63,11 @@ double CudaCompute::ApproxLinearEnergy() {
 }
 /* *************************************************************** */
 void CudaCompute::ApproxLinearEnergyGradient(float weight) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    Compute::ApproxLinearEnergyGradient(weight);
-    // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent&>(con).UpdateTransformationGradient();
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    reg_spline_approxLinearEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(),
+                                              con.GetControlPointGridCuda(),
+                                              con.GetTransformationGradientCuda(),
+                                              weight);
 }
 /* *************************************************************** */
 double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 5c440afa..af5d1b9c 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -20,6 +20,120 @@ __device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in
     out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
 }
 /* *************************************************************** */
+__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) {
+    mat33 c;
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            c.m[i][j] = a.m[i][0] * b.m[0][j] + a.m[i][1] * b.m[1][j] + a.m[i][2] * b.m[2][j];
+    return c;
+}
+/* *************************************************************** */
+__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33& r) {
+    double r11, r12, r13, r21, r22, r23, r31, r32, r33, deti;
+    mat33 q;
+    /*  INPUT MATRIX:  */
+    r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+
+    deti = r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33
+        + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13;
+
+    if (deti != 0.0) deti = 1.0 / deti;
+
+    q.m[0][0] = (float)(deti * (r22 * r33 - r32 * r23));
+    q.m[0][1] = (float)(deti * (-r12 * r33 + r32 * r13));
+    q.m[0][2] = (float)(deti * (r12 * r23 - r22 * r13));
+
+    q.m[1][0] = (float)(deti * (-r21 * r33 + r31 * r23));
+    q.m[1][1] = (float)(deti * (r11 * r33 - r31 * r13));
+    q.m[1][2] = (float)(deti * (-r11 * r23 + r21 * r13));
+
+    q.m[2][0] = (float)(deti * (r21 * r32 - r31 * r22));
+    q.m[2][1] = (float)(deti * (-r11 * r32 + r31 * r12));
+    q.m[2][2] = (float)(deti * (r11 * r22 - r21 * r12));
+
+    return q;
+}
+/* *************************************************************** */
+__device__ __inline__ float reg_mat33_determ_cuda(const mat33& r) {
+    double r11, r12, r13, r21, r22, r23, r31, r32, r33;
+    /*  INPUT MATRIX:  */
+    r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+
+    return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
+                 r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
+}
+/* *************************************************************** */
+__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33& a) {
+    float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]);
+    float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]);
+    float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]);
+    if (r1 < r2) r1 = r2;
+    if (r1 < r3) r1 = r3;
+    return r1;
+}
+/* *************************************************************** */
+__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33& A) {
+    float r1 = fabs(A.m[0][0]) + fabs(A.m[1][0]) + fabs(A.m[2][0]);
+    float r2 = fabs(A.m[0][1]) + fabs(A.m[1][1]) + fabs(A.m[2][1]);
+    float r3 = fabs(A.m[0][2]) + fabs(A.m[1][2]) + fabs(A.m[2][2]);
+    if (r1 < r2) r1 = r2;
+    if (r1 < r3) r1 = r3;
+    return r1;
+}
+/* *************************************************************** */
+__device__ __inline__ mat33 reg_mat33_polar_cuda(const mat33& a) {
+    mat33 x, y, z;
+    float alp, bet, gam, gmi, dif = 1.0f;
+    int k = 0;
+
+    x = a;
+
+    // Force matrix to be nonsingular
+    gam = reg_mat33_determ_cuda(x);
+    while (gam == 0.0) {        // Perturb matrix
+        gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x));
+        x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam;
+        gam = reg_mat33_determ_cuda(x);
+    }
+
+    while (1) {
+        y = reg_mat33_inverse_cuda(x);
+        if (dif > 0.3) {     // Far from convergence
+            alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x));
+            bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y));
+            gam = sqrt(bet / alp);
+            gmi = 1.f / gam;
+        } else {
+            gam = gmi = 1.0f;  // Close to convergence
+        }
+        z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]);
+        z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]);
+        z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]);
+        z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]);
+        z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]);
+        z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]);
+        z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]);
+        z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]);
+        z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]);
+
+        dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) +
+               fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) +
+               fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) +
+               fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) +
+               fabs(z.m[2][2] - x.m[2][2]));
+
+        k = k + 1;
+        if (k > 100 || dif < 3.e-6) break;  // Convergence or exhaustion
+        x = z;
+    }
+
+    return z;
+}
+/* *************************************************************** */
 __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) {
     // This will be optimised by the compiler into a single div instruction
     quot = num / denom;
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 573eacd5..923fa7a7 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -13,6 +13,7 @@
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_localTransformation_kernels.cu"
 #include "_reg_globalTransformation_gpu.h"
+#include "_reg_splineBasis.h"
 
 /* *************************************************************** */
 void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
@@ -718,3 +719,55 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
+void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
+                                               const float4 *controlPointGridCuda,
+                                               float4 *transGradCuda,
+                                               const float weight) {
+    const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    const float approxRatio = weight / static_cast<float>(voxelNumber);
+
+    // Matrix to use to convert the gradient from mm to voxel
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
+    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
+
+    // Store the basis values since they are constant as the value is approximated at the control point positions only
+    Basis2d basis2d; Basis3d basis3d;
+    if (controlPointGrid->nz > 1)
+        set_first_order_basis_values(basis3d.x, basis3d.y, basis3d.z);
+    else
+        set_first_order_basis_values(basis2d.x, basis2d.y);
+
+    // Kernel dims
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient;
+    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
+    const dim3 gridDims(grids, grids, 1);
+    const dim3 blockDims(blocks, 1, 1);
+
+    // Create the variable to store the displacement matrices
+    thrust::device_vector<mat33> dispMatricesCuda(voxelNumber);
+
+    // Create the textures
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear,
+                                                         voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear,
+                                                         voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1);
+
+    if (controlPointGrid->nz > 1) {
+        // Create the displacement matrices
+        reg_spline_createDisplacementMatrices3d_kernel<<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
+                                                                                cppDims, basis3d, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        reg_spline_approxLinearEnergyGradient3d_kernel<<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
+                                                                                approxRatio, basis3d, invReorientation);
+    } else {
+        // Create the displacement matrices
+        reg_spline_createDisplacementMatrices2d_kernel<<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
+                                                                                cppDims, basis2d, reorientation);
+        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        reg_spline_approxLinearEnergyGradient2d_kernel<<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
+                                                                                approxRatio, basis2d, invReorientation);
+    }
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index b55f97df..f15361e7 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -63,3 +63,8 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                         const float4 *deformationFieldCuda,
                                         float *jacobianMatricesCuda);
 /* *************************************************************** */
+void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
+                                               const float4 *controlPointGridCuda,
+                                               float4 *transGradCuda,
+                                               const float weight);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 0a6719fe..b5dd95ed 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -1634,3 +1634,170 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices,
     }
 }
 /* *************************************************************** */
+struct Basis2d {
+    float x[9], y[9];
+};
+struct Basis3d {
+    float x[27], y[27], z[27];
+};
+/* *************************************************************** */
+__global__ void reg_spline_createDisplacementMatrices2d_kernel(mat33 *dispMatrices,
+                                                               cudaTextureObject_t controlPointGridTexture,
+                                                               const int3 cppDims,
+                                                               const Basis2d basis,
+                                                               const mat33 reorientation) {
+    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1) return;
+
+    mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+    for (int b = -1, basInd = 0; b < 2; b++) {
+        const int yInd = (y + b) * cppDims.x;
+        for (int a = -1; a < 2; a++, basInd++) {
+            const int index = yInd + x + a;
+            const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+
+            matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
+            matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
+
+            matrix.m[0][1] += basis.x[basInd] * splineCoeff.y;
+            matrix.m[1][1] += basis.y[basInd] * splineCoeff.y;
+        }
+    }
+    // Convert from mm to voxel
+    matrix = reg_mat33_mul_cuda(reorientation, matrix);
+    // Removing the rotation component
+    const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix));
+    matrix = reg_mat33_mul_cuda(r, matrix);
+    // Convert to displacement
+    matrix.m[0][0]--; matrix.m[1][1]--;
+    dispMatrices[index] = matrix;
+}
+/* *************************************************************** */
+__global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatrices,
+                                                               cudaTextureObject_t controlPointGridTexture,
+                                                               const int3 cppDims,
+                                                               const Basis3d basis,
+                                                               const mat33 reorientation) {
+    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || z < 1 || z >= cppDims.z - 1) return;
+
+    mat33 matrix{};
+    for (int c = -1, basInd = 0; c < 2; c++) {
+        const int zInd = (z + c) * cppDims.y;
+        for (int b = -1; b < 2; b++) {
+            const int yInd = (zInd + y + b) * cppDims.x;
+            for (int a = -1; a < 2; a++, basInd++) {
+                const int index = yInd + x + a;
+                const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+
+                matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
+                matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
+                matrix.m[2][0] += basis.z[basInd] * splineCoeff.x;
+
+                matrix.m[0][1] += basis.x[basInd] * splineCoeff.y;
+                matrix.m[1][1] += basis.y[basInd] * splineCoeff.y;
+                matrix.m[2][1] += basis.z[basInd] * splineCoeff.y;
+
+                matrix.m[0][2] += basis.x[basInd] * splineCoeff.z;
+                matrix.m[1][2] += basis.y[basInd] * splineCoeff.z;
+                matrix.m[2][2] += basis.z[basInd] * splineCoeff.z;
+            }
+        }
+    }
+    // Convert from mm to voxel
+    matrix = reg_mat33_mul_cuda(reorientation, matrix);
+    // Removing the rotation component
+    const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix));
+    matrix = reg_mat33_mul_cuda(r, matrix);
+    // Convert to displacement
+    matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--;
+    dispMatrices[index] = matrix;
+}
+/* *************************************************************** */
+__global__ void reg_spline_approxLinearEnergyGradient2d_kernel(float4 *transGradient,
+                                                               cudaTextureObject_t dispMatricesTexture,
+                                                               const int3 cppDims,
+                                                               const float approxRatio,
+                                                               const Basis2d basis,
+                                                               const mat33 invReorientation) {
+    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    auto gradVal = transGradient[index];
+
+    for (int b = -1, basInd = 0; b < 2; b++) {
+        int yInd = y + b;
+        if (yInd < 1 || yInd >= cppDims.y - 1) {
+            basInd += 3;
+            continue;
+        }
+        yInd *= cppDims.x;
+        for (int a = -1; a < 2; a++, basInd++) {
+            const int xInd = x + a;
+            if (xInd < 1 || xInd >= cppDims.x - 1) continue;
+            const int matInd = (yInd + xInd) * 9;   // Multiply with the item count of mat33
+            const float dispMatrix[2]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                       tex1Dfetch<float>(dispMatricesTexture, matInd + 4) }; // m[1][1]
+            const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                       -2.f * dispMatrix[1] * basis.y[basInd] };
+
+            gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                        invReorientation.m[0][1] * gradValues[1]);
+            gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                        invReorientation.m[1][1] * gradValues[1]);
+        }
+    }
+    transGradient[index] = gradVal;
+}
+/* *************************************************************** */
+__global__ void reg_spline_approxLinearEnergyGradient3d_kernel(float4 *transGradient,
+                                                               cudaTextureObject_t dispMatricesTexture,
+                                                               const int3 cppDims,
+                                                               const float approxRatio,
+                                                               const Basis3d basis,
+                                                               const mat33 invReorientation) {
+    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    auto gradVal = transGradient[index];
+
+    for (int c = -1, basInd = 0; c < 2; c++) {
+        int zInd = z + c;
+        if (zInd < 1 || zInd >= cppDims.z - 1) {
+            basInd += 9;
+            continue;
+        }
+        zInd *= cppDims.y;
+        for (int b = -1; b < 2; b++) {
+            int yInd = y + b;
+            if (yInd < 1 || yInd >= cppDims.y - 1) {
+                basInd += 3;
+                continue;
+            }
+            yInd = (zInd + yInd) * cppDims.x;
+            for (int a = -1; a < 2; a++, basInd++) {
+                const int xInd = x + a;
+                if (xInd < 1 || xInd >= cppDims.x - 1) continue;
+                const int matInd = (yInd + xInd) * 9;   // Multiply with the item count of mat33
+                const float dispMatrix[3]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 4),   // m[1][1]
+                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 8) }; // m[2][2]
+                const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                           -2.f * dispMatrix[1] * basis.y[basInd],
+                                           -2.f * dispMatrix[2] * basis.z[basInd] };
+
+                gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                            invReorientation.m[0][1] * gradValues[1] +
+                                            invReorientation.m[0][2] * gradValues[2]);
+                gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                            invReorientation.m[1][1] * gradValues[1] +
+                                            invReorientation.m[1][2] * gradValues[2]);
+                gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                            invReorientation.m[2][1] * gradValues[1] +
+                                            invReorientation.m[2][2] * gradValues[2]);
+            }
+        }
+    }
+    transGradient[index] = gradVal;
+}
+/* *************************************************************** */

From 8b0df8b310a3259eccbe16dfac9b30dd242d712a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 18 Sep 2023 19:24:35 +0100
Subject: [PATCH 211/314] Fixes for CUDA 12

---
 niftyreg_build_version.txt     | 2 +-
 reg-lib/cuda/CudaCommon.hpp    | 1 +
 reg-lib/cuda/_reg_tools_gpu.cu | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6489928e..db2cef56 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-329
+330
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index 65d8b9b1..9c0ee6d8 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -11,6 +11,7 @@
 
 #include <cuda_runtime.h>
 #include <cuda.h>
+#include <thrust/count.h>
 #include <thrust/device_ptr.h>
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 7bc83d81..e41d9815 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -350,7 +350,7 @@ float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, cons
     cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc));
 
     result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue),
-                            [=]__device__(const float4& lhs, const float4& rhs) {
+                            [=]DEVICE(const float4& lhs, const float4& rhs) {
         float4 result{ initValue, initValue, initValue, initValue };
         switch (timePoints) {
         case 4:

From 422f69b3f683de71b4fd8eebfa4d27d9018db653 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 18 Sep 2023 19:27:11 +0100
Subject: [PATCH 212/314] Add approximate linear energy gradient regression
 test #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       |   1 +
 ...g_test_regr_approxLinearEnergyGradient.cpp | 146 ++++++++++++++++++
 3 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_regr_approxLinearEnergyGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index db2cef56..ec6cab01 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-330
+331
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index aa400b40..e857a818 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -123,6 +123,7 @@ set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
+  set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
new file mode 100644
index 00000000..a0647844
--- /dev/null
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -0,0 +1,146 @@
+#include "reg_test_common.h"
+#include "_reg_nmi.h"
+#include "CudaF3dContent.h"
+
+/**
+ *  Approximate linear energy gradient regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class ApproxLinearEnergyGradient {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage&, NiftiImage&, NiftiImage&, float>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ApproxLinearEnergyGradient() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create 2D reference, floating, control point grid and local weight similarity images
+        constexpr NiftiImage::dim_t size = 16;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d));
+
+        // Create 3D reference, floating, control point grid and local weight similarity images
+        dim.push_back(size);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d));
+
+        // Fill the control point grid 2d with random values
+        auto controlPointGrid2dPtr = controlPointGrid2d.data();
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) {
+            controlPointGrid2dPtr[i] = distr(gen);
+        }
+
+        // Fill the control point grid 3d with random values
+        auto controlPointGrid3dPtr = controlPointGrid3d.data();
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) {
+            controlPointGrid3dPtr[i] = distr(gen);
+        }
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        for (int i = 0; i < 5; i++) {
+            const float weight = distr(gen);
+            testData.emplace_back(TestData(
+                "2D weight: "s + std::to_string(weight),
+                reference2d,
+                floating2d,
+                controlPointGrid2d,
+                weight
+            ));
+            testData.emplace_back(TestData(
+                "3D weight: "s + std::to_string(weight),
+                reference3d,
+                floating3d,
+                controlPointGrid3d,
+                weight
+            ));
+        }
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, reference, floating, controlPointGrid, weight] = testData;
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage floatingCpu(floating), floatingCuda(floating);
+            NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid);
+
+            // Create the contents
+            unique_ptr<F3dContent> contentCpu{ new F3dContent(
+                referenceCpu,
+                floatingCpu,
+                controlPointGridCpu,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(
+                referenceCuda,
+                floatingCuda,
+                controlPointGridCuda,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+            // Compute the approximate linear energy gradient for CPU and CUDA
+            computeCpu->ApproxLinearEnergyGradient(weight);
+            computeCuda->ApproxLinearEnergyGradient(weight);
+
+            // Get the transformation gradients
+            NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
+            NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
+
+            // Save for testing
+            testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Energy Gradient", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, transGradCpu, transGradCuda] = testCase;
+
+        SECTION(testName) {
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the transformation gradients
+            const auto transGradCpuPtr = transGradCpu.data();
+            const auto transGradCudaPtr = transGradCuda.data();
+            for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) {
+                const float cpuVal = transGradCpuPtr[i];
+                const float cudaVal = transGradCudaPtr[i];
+                const double diff = fabs(cpuVal - cudaVal);
+                if (diff > EPS)
+                    NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
+                REQUIRE(diff < EPS);
+            }
+        }
+    }
+}

From 0ce908b7b2c8cb3a80dd3d06422aa4c044817b24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 21 Sep 2023 12:44:29 +0100
Subject: [PATCH 213/314] Implement CudaCompute::ApproxLinearEnergy() #92

Also refactor CudaCompute::ApproxLinearEnergyGradient()
---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/CudaCompute.cpp                  |  16 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  69 ++++--
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   5 +
 .../cuda/_reg_localTransformation_kernels.cu  | 212 ++++++++----------
 5 files changed, 154 insertions(+), 150 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ec6cab01..6f96da66 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-331
+332
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 8838c0e2..f9f81a84 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -57,17 +57,19 @@ void CudaCompute::ApproxBendingEnergyGradient(float weight) {
 }
 /* *************************************************************** */
 double CudaCompute::ApproxLinearEnergy() {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    return Compute::ApproxLinearEnergy();
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    auto approxLinearEnergy = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergy_gpu<true> :
+                                                         reg_spline_approxLinearEnergy_gpu<false>;
+    return approxLinearEnergy(controlPointGrid, con.GetControlPointGridCuda());
 }
 /* *************************************************************** */
 void CudaCompute::ApproxLinearEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_spline_approxLinearEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(),
-                                              con.GetControlPointGridCuda(),
-                                              con.GetTransformationGradientCuda(),
-                                              weight);
+    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergyGradient_gpu<true> :
+                                                                 reg_spline_approxLinearEnergyGradient_gpu<false>;
+    approxLinearEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), weight);
 }
 /* *************************************************************** */
 double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) {
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 923fa7a7..422694c2 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -719,6 +719,42 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
+template<bool is3d>
+double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
+                                         const float4 *controlPointGridCuda) {
+    const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+
+    // Matrix to use to convert the gradient from mm to voxel
+    const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
+
+    // Store the basis values since they are constant as the value is approximated at the control point positions only
+    Basis basis;
+    if constexpr (is3d)
+        set_first_order_basis_values(basis.x, basis.y, basis.z);
+    else
+        set_first_order_basis_values(basis.x, basis.y);
+
+    // Create the control point texture
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear,
+                                                            voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = *controlPointTexturePtr;
+
+    constexpr int matSize = is3d ? 3 : 2;
+    thrust::counting_iterator<unsigned> index(0);
+    return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const unsigned index) {
+        const mat33 matrix = CreateDisplacementMatrix<is3d>(index, controlPointTexture, cppDims, basis, reorientation);
+        double currentValue = 0;
+        for (int b = 0; b < matSize; b++)
+            for (int a = 0; a < matSize; a++)
+                currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a]));
+        return currentValue;
+    }, 0.0, thrust::plus<double>()) / static_cast<double>(controlPointGrid->nvox);
+}
+template double reg_spline_approxLinearEnergy_gpu<false>(const nifti_image*, const float4*);
+template double reg_spline_approxLinearEnergy_gpu<true>(const nifti_image*, const float4*);
+/* *************************************************************** */
+template<bool is3d>
 void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
                                                const float4 *controlPointGridCuda,
                                                float4 *transGradCuda,
@@ -732,11 +768,11 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
     const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     // Store the basis values since they are constant as the value is approximated at the control point positions only
-    Basis2d basis2d; Basis3d basis3d;
-    if (controlPointGrid->nz > 1)
-        set_first_order_basis_values(basis3d.x, basis3d.y, basis3d.z);
+    Basis basis;
+    if constexpr (is3d)
+        set_first_order_basis_values(basis.x, basis.y, basis.z);
     else
-        set_first_order_basis_values(basis2d.x, basis2d.y);
+        set_first_order_basis_values(basis.x, basis.y);
 
     // Kernel dims
     const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient;
@@ -753,21 +789,16 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
     auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear,
                                                          voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1);
 
-    if (controlPointGrid->nz > 1) {
-        // Create the displacement matrices
-        reg_spline_createDisplacementMatrices3d_kernel<<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
-                                                                                cppDims, basis3d, reorientation);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-        reg_spline_approxLinearEnergyGradient3d_kernel<<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
-                                                                                approxRatio, basis3d, invReorientation);
-    } else {
-        // Create the displacement matrices
-        reg_spline_createDisplacementMatrices2d_kernel<<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
-                                                                                cppDims, basis2d, reorientation);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-        reg_spline_approxLinearEnergyGradient2d_kernel<<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
-                                                                                approxRatio, basis2d, invReorientation);
-    }
+    // Create the displacement matrices
+    reg_spline_createDisplacementMatrices_kernel<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
+                                                                                cppDims, basis, reorientation);
+    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+
+    // Compute the gradient
+    reg_spline_approxLinearEnergyGradient_kernel<is3d><<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
+                                                                                approxRatio, basis, invReorientation);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
+template void reg_spline_approxLinearEnergyGradient_gpu<false>(const nifti_image*, const float4*, float4*, const float);
+template void reg_spline_approxLinearEnergyGradient_gpu<true>(const nifti_image*, const float4*, float4*, const float);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index f15361e7..63ae7107 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -63,6 +63,11 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
                                         const float4 *deformationFieldCuda,
                                         float *jacobianMatricesCuda);
 /* *************************************************************** */
+template<bool is3d>
+double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
+                                         const float4 *controlPointGridCuda);
+/* *************************************************************** */
+template<bool is3d>
 void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
                                                const float4 *controlPointGridCuda,
                                                float4 *transGradCuda,
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index b5dd95ed..7226cd8a 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -1634,75 +1634,57 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices,
     }
 }
 /* *************************************************************** */
-struct Basis2d {
-    float x[9], y[9];
-};
-struct Basis3d {
+struct Basis {
     float x[27], y[27], z[27];
 };
 /* *************************************************************** */
-__global__ void reg_spline_createDisplacementMatrices2d_kernel(mat33 *dispMatrices,
-                                                               cudaTextureObject_t controlPointGridTexture,
-                                                               const int3 cppDims,
-                                                               const Basis2d basis,
-                                                               const mat33 reorientation) {
-    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
-    if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1) return;
-
-    mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 };
-    for (int b = -1, basInd = 0; b < 2; b++) {
-        const int yInd = (y + b) * cppDims.x;
-        for (int a = -1; a < 2; a++, basInd++) {
-            const int index = yInd + x + a;
-            const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
-
-            matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
-            matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
-
-            matrix.m[0][1] += basis.x[basInd] * splineCoeff.y;
-            matrix.m[1][1] += basis.y[basInd] * splineCoeff.y;
-        }
-    }
-    // Convert from mm to voxel
-    matrix = reg_mat33_mul_cuda(reorientation, matrix);
-    // Removing the rotation component
-    const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix));
-    matrix = reg_mat33_mul_cuda(r, matrix);
-    // Convert to displacement
-    matrix.m[0][0]--; matrix.m[1][1]--;
-    dispMatrices[index] = matrix;
-}
-/* *************************************************************** */
-__global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatrices,
-                                                               cudaTextureObject_t controlPointGridTexture,
-                                                               const int3 cppDims,
-                                                               const Basis3d basis,
-                                                               const mat33 reorientation) {
-    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+template<bool is3d>
+__device__ static mat33 CreateDisplacementMatrix(const unsigned index,
+                                                 cudaTextureObject_t controlPointGridTexture,
+                                                 const int3& cppDims,
+                                                 const Basis& basis,
+                                                 const mat33& reorientation) {
     const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
-    if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || z < 1 || z >= cppDims.z - 1) return;
+    if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
+        (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
     mat33 matrix{};
-    for (int c = -1, basInd = 0; c < 2; c++) {
-        const int zInd = (z + c) * cppDims.y;
-        for (int b = -1; b < 2; b++) {
-            const int yInd = (zInd + y + b) * cppDims.x;
+    if constexpr (is3d) {
+        for (int c = -1, basInd = 0; c < 2; c++) {
+            const int zInd = (z + c) * cppDims.y;
+            for (int b = -1; b < 2; b++) {
+                const int yInd = (zInd + y + b) * cppDims.x;
+                for (int a = -1; a < 2; a++, basInd++) {
+                    const int index = yInd + x + a;
+                    const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+
+                    matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
+                    matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
+                    matrix.m[2][0] += basis.z[basInd] * splineCoeff.x;
+
+                    matrix.m[0][1] += basis.x[basInd] * splineCoeff.y;
+                    matrix.m[1][1] += basis.y[basInd] * splineCoeff.y;
+                    matrix.m[2][1] += basis.z[basInd] * splineCoeff.y;
+
+                    matrix.m[0][2] += basis.x[basInd] * splineCoeff.z;
+                    matrix.m[1][2] += basis.y[basInd] * splineCoeff.z;
+                    matrix.m[2][2] += basis.z[basInd] * splineCoeff.z;
+                }
+            }
+        }
+    } else {
+        matrix.m[2][2] = 1;
+        for (int b = -1, basInd = 0; b < 2; b++) {
+            const int yInd = (y + b) * cppDims.x;
             for (int a = -1; a < 2; a++, basInd++) {
                 const int index = yInd + x + a;
                 const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                 matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                 matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
-                matrix.m[2][0] += basis.z[basInd] * splineCoeff.x;
 
                 matrix.m[0][1] += basis.x[basInd] * splineCoeff.y;
                 matrix.m[1][1] += basis.y[basInd] * splineCoeff.y;
-                matrix.m[2][1] += basis.z[basInd] * splineCoeff.y;
-
-                matrix.m[0][2] += basis.x[basInd] * splineCoeff.z;
-                matrix.m[1][2] += basis.y[basInd] * splineCoeff.z;
-                matrix.m[2][2] += basis.z[basInd] * splineCoeff.z;
             }
         }
     }
@@ -1712,92 +1694,76 @@ __global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatric
     const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix));
     matrix = reg_mat33_mul_cuda(r, matrix);
     // Convert to displacement
-    matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--;
-    dispMatrices[index] = matrix;
+    matrix.m[0][0]--; matrix.m[1][1]--;
+    if constexpr (is3d) matrix.m[2][2]--;
+    return matrix;
 }
 /* *************************************************************** */
-__global__ void reg_spline_approxLinearEnergyGradient2d_kernel(float4 *transGradient,
-                                                               cudaTextureObject_t dispMatricesTexture,
-                                                               const int3 cppDims,
-                                                               const float approxRatio,
-                                                               const Basis2d basis,
-                                                               const mat33 invReorientation) {
+template<bool is3d>
+__global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices,
+                                                             cudaTextureObject_t controlPointGridTexture,
+                                                             const int3 cppDims,
+                                                             const Basis basis,
+                                                             const mat33 reorientation) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
-    auto gradVal = transGradient[index];
-
-    for (int b = -1, basInd = 0; b < 2; b++) {
-        int yInd = y + b;
-        if (yInd < 1 || yInd >= cppDims.y - 1) {
-            basInd += 3;
-            continue;
-        }
-        yInd *= cppDims.x;
-        for (int a = -1; a < 2; a++, basInd++) {
-            const int xInd = x + a;
-            if (xInd < 1 || xInd >= cppDims.x - 1) continue;
-            const int matInd = (yInd + xInd) * 9;   // Multiply with the item count of mat33
-            const float dispMatrix[2]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
-                                       tex1Dfetch<float>(dispMatricesTexture, matInd + 4) }; // m[1][1]
-            const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd],
-                                       -2.f * dispMatrix[1] * basis.y[basInd] };
-
-            gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                        invReorientation.m[0][1] * gradValues[1]);
-            gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                        invReorientation.m[1][1] * gradValues[1]);
-        }
-    }
-    transGradient[index] = gradVal;
+    dispMatrices[index] = CreateDisplacementMatrix<is3d>(index, controlPointGridTexture, cppDims, basis, reorientation);
 }
 /* *************************************************************** */
-__global__ void reg_spline_approxLinearEnergyGradient3d_kernel(float4 *transGradient,
-                                                               cudaTextureObject_t dispMatricesTexture,
-                                                               const int3 cppDims,
-                                                               const float approxRatio,
-                                                               const Basis3d basis,
-                                                               const mat33 invReorientation) {
+template<bool is3d>
+__global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradient,
+                                                             cudaTextureObject_t dispMatricesTexture,
+                                                             const int3 cppDims,
+                                                             const float approxRatio,
+                                                             const Basis basis,
+                                                             const mat33 invReorientation) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
     auto gradVal = transGradient[index];
 
-    for (int c = -1, basInd = 0; c < 2; c++) {
-        int zInd = z + c;
-        if (zInd < 1 || zInd >= cppDims.z - 1) {
-            basInd += 9;
-            continue;
-        }
-        zInd *= cppDims.y;
-        for (int b = -1; b < 2; b++) {
-            int yInd = y + b;
-            if (yInd < 1 || yInd >= cppDims.y - 1) {
-                basInd += 3;
-                continue;
+    if constexpr (is3d) {
+        for (int c = -1, basInd = 0; c < 2; c++) {
+            const int zInd = (z + c) * cppDims.y;
+            for (int b = -1; b < 2; b++) {
+                const int yInd = (zInd + y + b) * cppDims.x;
+                for (int a = -1; a < 2; a++, basInd++) {
+                    const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
+                    const float dispMatrix[3]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                               tex1Dfetch<float>(dispMatricesTexture, matInd + 4),   // m[1][1]
+                                               tex1Dfetch<float>(dispMatricesTexture, matInd + 8) }; // m[2][2]
+                    const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                               -2.f * dispMatrix[1] * basis.y[basInd],
+                                               -2.f * dispMatrix[2] * basis.z[basInd] };
+
+                    gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                invReorientation.m[0][1] * gradValues[1] +
+                                                invReorientation.m[0][2] * gradValues[2]);
+                    gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                invReorientation.m[1][1] * gradValues[1] +
+                                                invReorientation.m[1][2] * gradValues[2]);
+                    gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                                invReorientation.m[2][1] * gradValues[1] +
+                                                invReorientation.m[2][2] * gradValues[2]);
+                }
             }
-            yInd = (zInd + yInd) * cppDims.x;
+        }
+    } else {
+        for (int b = -1, basInd = 0; b < 2; b++) {
+            const int yInd = (y + b) * cppDims.x;
             for (int a = -1; a < 2; a++, basInd++) {
-                const int xInd = x + a;
-                if (xInd < 1 || xInd >= cppDims.x - 1) continue;
-                const int matInd = (yInd + xInd) * 9;   // Multiply with the item count of mat33
-                const float dispMatrix[3]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
-                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 4),   // m[1][1]
-                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 8) }; // m[2][2]
-                const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd],
-                                           -2.f * dispMatrix[1] * basis.y[basInd],
-                                           -2.f * dispMatrix[2] * basis.z[basInd] };
+                const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
+                const float dispMatrix[2]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 4) }; // m[1][1]
+                const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                           -2.f * dispMatrix[1] * basis.y[basInd] };
 
                 gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                            invReorientation.m[0][1] * gradValues[1] +
-                                            invReorientation.m[0][2] * gradValues[2]);
+                                            invReorientation.m[0][1] * gradValues[1]);
                 gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                            invReorientation.m[1][1] * gradValues[1] +
-                                            invReorientation.m[1][2] * gradValues[2]);
-                gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
-                                            invReorientation.m[2][1] * gradValues[1] +
-                                            invReorientation.m[2][2] * gradValues[2]);
+                                            invReorientation.m[1][1] * gradValues[1]);
             }
         }
     }
+
     transGradient[index] = gradVal;
 }
 /* *************************************************************** */

From 60939a38a6dd7a22d8b15cf87e104de6dab374c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 21 Sep 2023 12:47:11 +0100
Subject: [PATCH 214/314] Add approximate linear energy regression test #92

---
 niftyreg_build_version.txt                    |  2 +-
 ...g_test_regr_approxLinearEnergyGradient.cpp | 56 +++++++++++--------
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6f96da66..55bd0ac4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-332
+333
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
index a0647844..d0fb7543 100644
--- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -3,13 +3,14 @@
 #include "CudaF3dContent.h"
 
 /**
- *  Approximate linear energy gradient regression test to ensure the CPU and CUDA versions yield the same output
+ *  Approximate linear energy and approximate linear energy gradient regression tests
+ *  to ensure the CPU and CUDA versions yield the same output
 **/
 
 class ApproxLinearEnergyGradient {
 protected:
     using TestData = std::tuple<std::string, NiftiImage&, NiftiImage&, NiftiImage&, float>;
-    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
 
@@ -19,50 +20,51 @@ class ApproxLinearEnergyGradient {
             return;
 
         // Create a random number generator
-        std::mt19937 gen(0);
+        std::random_device rd;
+        std::mt19937 gen(rd());
         std::uniform_real_distribution<float> distr(0, 1);
 
-        // Create 2D reference, floating, control point grid and local weight similarity images
+        // Create 2D reference, floating and control point grid images
         constexpr NiftiImage::dim_t size = 16;
         vector<NiftiImage::dim_t> dim{ size, size };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
-        NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d));
+        NiftiImage controlPointGrid = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid2d[3]{ controlPointGrid, controlPointGrid, controlPointGrid };
 
-        // Create 3D reference, floating, control point grid and local weight similarity images
+        // Create 3D reference, floating and control point grid images
         dim.push_back(size);
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
-        NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d));
-
-        // Fill the control point grid 2d with random values
-        auto controlPointGrid2dPtr = controlPointGrid2d.data();
-        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) {
-            controlPointGrid2dPtr[i] = distr(gen);
-        }
-
-        // Fill the control point grid 3d with random values
-        auto controlPointGrid3dPtr = controlPointGrid3d.data();
-        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) {
-            controlPointGrid3dPtr[i] = distr(gen);
+        controlPointGrid = CreateControlPointGrid(reference3d);
+        NiftiImage controlPointGrid3d[3]{ controlPointGrid, controlPointGrid, controlPointGrid };
+
+        // Fill control point grids with random values
+        for (int i = 0; i < 3; i++) {
+            auto controlPointGridPtr = controlPointGrid2d[i].data();
+            for (size_t j = 0; j < controlPointGrid2d[i].nVoxels(); j++)
+                controlPointGridPtr[j] = distr(gen);
+            controlPointGridPtr = controlPointGrid3d[i].data();
+            for (size_t j = 0; j < controlPointGrid3d[i].nVoxels(); j++)
+                controlPointGridPtr[j] = distr(gen);
         }
 
         // Create the data container for the regression test
         vector<TestData> testData;
-        for (int i = 0; i < 5; i++) {
+        for (int i = 0; i < 3; i++) {
             const float weight = distr(gen);
             testData.emplace_back(TestData(
                 "2D weight: "s + std::to_string(weight),
                 reference2d,
                 floating2d,
-                controlPointGrid2d,
+                controlPointGrid2d[i],
                 weight
             ));
             testData.emplace_back(TestData(
                 "3D weight: "s + std::to_string(weight),
                 reference3d,
                 floating3d,
-                controlPointGrid3d,
+                controlPointGrid3d[i],
                 weight
             ));
         }
@@ -104,6 +106,10 @@ class ApproxLinearEnergyGradient {
             unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
             unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
 
+            // Compute the approximate linear energy for CPU and CUDA
+            const double approxLinearEnergyCpu = computeCpu->ApproxLinearEnergy();
+            const double approxLinearEnergyCuda = computeCuda->ApproxLinearEnergy();
+
             // Compute the approximate linear energy gradient for CPU and CUDA
             computeCpu->ApproxLinearEnergyGradient(weight);
             computeCuda->ApproxLinearEnergyGradient(weight);
@@ -113,7 +119,7 @@ class ApproxLinearEnergyGradient {
             NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
 
             // Save for testing
-            testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) });
+            testCases.push_back({ testName, approxLinearEnergyCpu, approxLinearEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) });
         }
     }
 };
@@ -122,7 +128,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Ener
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [testName, transGradCpu, transGradCuda] = testCase;
+        auto&& [testName, approxLinearEnergyCpu, approxLinearEnergyCuda, transGradCpu, transGradCuda] = testCase;
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
@@ -130,6 +136,10 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Ener
             // Increase the precision for the output
             NR_COUT << std::fixed << std::setprecision(10);
 
+            // Check the approximate linear energy
+            NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl;
+            REQUIRE(fabs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS);
+
             // Check the transformation gradients
             const auto transGradCpuPtr = transGradCpu.data();
             const auto transGradCudaPtr = transGradCuda.data();

From 118e1da1c4fbc1ff892cefd9b9d230c1cb8769f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 5 Oct 2023 20:51:18 +0100
Subject: [PATCH 215/314] Fix a bug in
 reg_spline_approxLinearEnergyGradient_gpu()

---
 niftyreg_build_version.txt                       |  2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu     |  4 ++--
 reg-lib/cuda/_reg_localTransformation_kernels.cu | 10 +++++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 55bd0ac4..0ae9d1ef 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-333
+334
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 422694c2..0bfcdcb2 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -791,12 +791,12 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
 
     // Create the displacement matrices
     reg_spline_createDisplacementMatrices_kernel<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
-                                                                                cppDims, basis, reorientation);
+                                                                                cppDims, basis, reorientation, (unsigned)voxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Compute the gradient
     reg_spline_approxLinearEnergyGradient_kernel<is3d><<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
-                                                                                approxRatio, basis, invReorientation);
+                                                                                approxRatio, basis, invReorientation, (unsigned)voxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 template void reg_spline_approxLinearEnergyGradient_gpu<false>(const nifti_image*, const float4*, float4*, const float);
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 7226cd8a..a95f4bba 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -1704,9 +1704,11 @@ __global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices
                                                              cudaTextureObject_t controlPointGridTexture,
                                                              const int3 cppDims,
                                                              const Basis basis,
-                                                             const mat33 reorientation) {
+                                                             const mat33 reorientation,
+                                                             const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    dispMatrices[index] = CreateDisplacementMatrix<is3d>(index, controlPointGridTexture, cppDims, basis, reorientation);
+    if (index < voxelNumber)
+        dispMatrices[index] = CreateDisplacementMatrix<is3d>(index, controlPointGridTexture, cppDims, basis, reorientation);
 }
 /* *************************************************************** */
 template<bool is3d>
@@ -1715,8 +1717,10 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
                                                              const int3 cppDims,
                                                              const float approxRatio,
                                                              const Basis basis,
-                                                             const mat33 invReorientation) {
+                                                             const mat33 invReorientation,
+                                                             const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
+    if (index >= voxelNumber) return;
     const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
     auto gradVal = transGradient[index];
 

From e1ec1f4244ae5fb9cc6575eaabc44d836f428a6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 5 Oct 2023 20:53:24 +0100
Subject: [PATCH 216/314] Refactorisations

---
 CMakeLists.txt                                |   2 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/_reg_f3d.cpp                          |   1 +
 reg-lib/_reg_f3d2.cpp                         |   5 +-
 reg-lib/cpu/_reg_maths.h                      |   4 +
 reg-lib/cpu/_reg_tools.cpp                    | 173 +++++++++---------
 reg-lib/cpu/_reg_tools.h                      |  16 +-
 reg-lib/cuda/CudaCommon.cu                    |  66 ++++---
 ...g_test_regr_approxLinearEnergyGradient.cpp |   6 +-
 9 files changed, 142 insertions(+), 133 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67368df2..87ee07e6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,7 +71,7 @@ option(BUILD_TESTING "To build the unit tests" OFF)
 option(USE_CUDA "To use the CUDA platform" OFF)
 option(USE_OPENCL "To use the OpenCL platform" OFF)
 option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
-option(USE_SSE "To enable SEE computation in some case" ON)
+option(USE_SSE "To enable SSE computation in some case" ON)
 #-----------------------------------------------------------------------------
 option(USE_NRRD "To use the NRRD file format" OFF)
 mark_as_advanced(USE_NRRD)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0ae9d1ef..3d9988ad 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-334
+335
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 9c4722c0..6eedbba3 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -482,6 +482,7 @@ void reg_f3d<T>::SetOptimiser() {
 template<class T>
 void reg_f3d<T>::SmoothGradient() {
     // The gradient is smoothed using a Gaussian kernel if it is required
+    if (this->gradientSmoothingSigma == 0) return;
     this->compute->SmoothGradient(this->gradientSmoothingSigma);
     NR_FUNC_CALLED();
 }
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index ea0f0d56..79317999 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -348,11 +348,10 @@ void reg_f3d2<T>::GetLandmarkDistanceGradient() {
 /* *************************************************************** */
 template <class T>
 void reg_f3d2<T>::SmoothGradient() {
-    reg_f3d<T>::SmoothGradient();
-
     // The gradient is smoothed using a Gaussian kernel if it is required
+    if (this->gradientSmoothingSigma == 0) return;
+    reg_f3d<T>::SmoothGradient();
     computeBw->SmoothGradient(this->gradientSmoothingSigma);
-
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 6a35bd6d..93151883 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -56,6 +56,10 @@ DEVICE inline T Square(const T& x) {
     return x * x;
 }
 template<typename T>
+DEVICE inline T Cube(const T& x) {
+    return x * x * x;
+}
+template<typename T>
 DEVICE inline int Floor(const T& x) {
     const int i = static_cast<int>(x);
     return i - (x < i);
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 59aa73ba..fbd7798d 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -833,10 +833,11 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
                                  const int& kernelType,
                                  const int *mask,
-                                 const bool *timePoint,
+                                 const bool *timePoints,
                                  const bool *axis) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
-        NR_FATAL_ERROR("This function does not support images with dimension > 2048");
+        NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048");
+
 #ifdef WIN32
     long index;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3);
@@ -844,37 +845,36 @@ void reg_tools_kernelConvolution(nifti_image *image,
     size_t index;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
 #endif
+
     DataType *imagePtr = static_cast<DataType*>(image->data);
-    int imageDim[3] = { image->nx, image->ny, image->nz };
+    const int imageDims[3]{ image->nx, image->ny, image->nz };
 
-    bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool));
-    float *densityPtr = (float*)calloc(voxelNumber, sizeof(float));
+    unique_ptr<bool[]> nanImagePtr{ new bool[voxelNumber]() };
+    unique_ptr<float[]> densityPtr{ new float[voxelNumber]() };
 
     // Loop over the dimension higher than 3
     for (int t = 0; t < image->nt * image->nu; t++) {
-        if (timePoint[t]) {
+        if (timePoints[t]) {
             DataType *intensityPtr = &imagePtr[t * voxelNumber];
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber)
 #endif
             for (index = 0; index < voxelNumber; index++) {
-                densityPtr[index] = intensityPtr[index] == intensityPtr[index] ? 1.f : 0;
-                densityPtr[index] *= mask[index] >= 0 ? 1 : 0;
-                nanImagePtr[index] = static_cast<bool>(densityPtr[index]);
-                if (nanImagePtr[index] == 0)
-                    intensityPtr[index] = 0;
+                densityPtr[index] = mask[index] >= 0 && intensityPtr[index] == intensityPtr[index] ? 1.f : 0;
+                nanImagePtr[index] = !static_cast<bool>(densityPtr[index]);
+                if (nanImagePtr[index]) intensityPtr[index] = 0;
             }
             // Loop over the x, y and z dimensions
             for (int n = 0; n < 3; n++) {
                 if (axis[n] && image->dim[n] > 1) {
                     double temp;
                     if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel
-                    else temp = fabs(sigma[t]); // voxel based if negative value
+                    else temp = fabs(sigma[t]); // voxel-based if negative value
                     int radius = 0;
                     // Define the kernel size
                     if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) {
-                        // Mean  or linear filtering
+                        // Mean or linear filtering
                         radius = static_cast<int>(temp);
                     } else if (kernelType == GAUSSIAN_KERNEL) {
                         // Gaussian kernel
@@ -895,8 +895,10 @@ void reg_tools_kernelConvolution(nifti_image *image,
                             for (int i = -radius; i <= radius; i++) {
                                 // temp contains the kernel node spacing
                                 double relative = fabs(i / temp);
-                                if (relative < 1.0) kernel[i + radius] = static_cast<float>(2.0 / 3.0 - relative * relative + 0.5 * relative * relative * relative);
-                                else if (relative < 2.0) kernel[i + radius] = static_cast<float>(-(relative - 2.0) * (relative - 2.0) * (relative - 2.0) / 6.0);
+                                if (relative < 1.0)
+                                    kernel[i + radius] = static_cast<float>(2.0 / 3.0 - Square(relative) + 0.5 * Cube(relative));
+                                else if (relative < 2.0)
+                                    kernel[i + radius] = static_cast<float>(-Cube(relative - 2.0) / 6.0);
                                 else kernel[i + radius] = 0;
                                 kernelSum += kernel[i + radius];
                             }
@@ -905,7 +907,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                             for (int i = -radius; i <= radius; i++) {
                                 // 2.506... = sqrt(2*pi)
                                 // temp contains the sigma in voxel
-                                kernel[radius + i] = static_cast<float>(exp(-(i * i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
+                                kernel[radius + i] = static_cast<float>(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
                                 kernelSum += kernel[radius + i];
                             }
                         } else if (kernelType == LINEAR_KERNEL) {
@@ -914,7 +916,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                 kernel[radius + i] = 1.f - fabs(i / static_cast<float>(radius));
                                 kernelSum += kernel[radius + i];
                             }
-                        } else if (kernelType == MEAN_KERNEL && imageDim[2] == 1) {
+                        } else if (kernelType == MEAN_KERNEL && imageDims[2] == 1) {
                             // Compute the mean kernel
                             for (int i = -radius; i <= radius; i++) {
                                 kernel[radius + i] = 1.f;
@@ -922,22 +924,22 @@ void reg_tools_kernelConvolution(nifti_image *image,
                             }
                         }
                         // No kernel is required for the mean filtering
-                        // No need for kernel normalisation as this is handle by the density function
+                        // No need for kernel normalisation as this is handled by the density function
                         NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
 
                         int planeNumber, planeIndex, lineOffset;
                         int lineIndex, shiftPre, shiftPst, k;
                         switch (n) {
                         case 0:
-                            planeNumber = imageDim[1] * imageDim[2];
+                            planeNumber = imageDims[1] * imageDims[2];
                             lineOffset = 1;
                             break;
                         case 1:
-                            planeNumber = imageDim[0] * imageDim[2];
-                            lineOffset = imageDim[0];
+                            planeNumber = imageDims[0] * imageDims[2];
+                            lineOffset = imageDims[0];
                             break;
                         case 2:
-                            planeNumber = imageDim[0] * imageDim[1];
+                            planeNumber = imageDims[0] * imageDims[1];
                             lineOffset = planeNumber;
                             break;
                         }
@@ -949,8 +951,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         float *currentDensityPtr = nullptr;
                         DataType bufferIntensity[2048];
                         float bufferDensity[2048];
-                        double bufferIntensitycur = 0;
-                        double bufferDensitycur = 0;
+                        double bufferIntensityCur = 0;
+                        double bufferDensityCur = 0;
 
 #ifdef _USE_SSE
                         union {
@@ -963,31 +965,27 @@ void reg_tools_kernelConvolution(nifti_image *image,
 #ifdef _OPENMP
 #ifdef _USE_SSE
 #pragma omp parallel for default(none) \
-   shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \
-   planeNumber,kernelSum) \
-   private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \
-   bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \
-   k, bufferIntensitycur,bufferDensitycur, \
+   shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \
+   private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \
+   bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \
+   k, bufferIntensityCur, bufferDensityCur, \
    kernel_sse, intensity_sse, density_sse, intensity_sum_sse, density_sum_sse)
 #else
 #pragma omp parallel for default(none) \
-   shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \
-   planeNumber,kernelSum) \
-   private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \
-   bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \
-   k, bufferIntensitycur,bufferDensitycur)
+   shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \
+   private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \
+   bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \
+   k, bufferIntensityCur, bufferDensityCur)
 #endif
 #endif // _OPENMP
                         // Loop over the different voxel
                         for (planeIndex = 0; planeIndex < planeNumber; ++planeIndex) {
                             switch (n) {
                             case 0:
-                                realIndex = planeIndex * imageDim[0];
+                                realIndex = planeIndex * imageDims[0];
                                 break;
                             case 1:
-                                realIndex = (planeIndex / imageDim[0]) *
-                                    imageDim[0] * imageDim[1] +
-                                    planeIndex % imageDim[0];
+                                realIndex = (planeIndex / imageDims[0]) * imageDims[0] * imageDims[1] + planeIndex % imageDims[0];
                                 break;
                             case 2:
                                 realIndex = planeIndex;
@@ -998,15 +996,15 @@ void reg_tools_kernelConvolution(nifti_image *image,
                             // Fetch the current line into a stack buffer
                             currentIntensityPtr = &intensityPtr[realIndex];
                             currentDensityPtr = &densityPtr[realIndex];
-                            for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) {
+                            for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) {
                                 bufferIntensity[lineIndex] = *currentIntensityPtr;
                                 bufferDensity[lineIndex] = *currentDensityPtr;
                                 currentIntensityPtr += lineOffset;
                                 currentDensityPtr += lineOffset;
                             }
                             if (kernelSum > 0) {
-                                // Perform the kernel convolution along 1 line
-                                for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) {
+                                // Perform the kernel convolution along one line
+                                for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) {
                                     // Define the kernel boundaries
                                     shiftPre = lineIndex - radius;
                                     shiftPst = lineIndex + radius + 1;
@@ -1014,7 +1012,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                         kernelPtr = &kernel[-shiftPre];
                                         shiftPre = 0;
                                     } else kernelPtr = &kernel[0];
-                                    if (shiftPst > imageDim[n]) shiftPst = imageDim[n];
+                                    if (shiftPst > imageDims[n]) shiftPst = imageDims[n];
                                     // Set the current values to zero
                                     // Increment the current value by performing the weighted sum
 #ifdef _USE_SSE
@@ -1066,33 +1064,32 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                 } // line convolution
                             } // kernel sum
                             else {
-                                for (lineIndex = 1; lineIndex < imageDim[n]; ++lineIndex) {
+                                for (lineIndex = 1; lineIndex < imageDims[n]; ++lineIndex) {
                                     bufferIntensity[lineIndex] += bufferIntensity[lineIndex - 1];
                                     bufferDensity[lineIndex] += bufferDensity[lineIndex - 1];
                                 }
                                 shiftPre = -radius - 1;
                                 shiftPst = radius;
-                                for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex, ++shiftPre, ++shiftPst) {
+                                for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex, ++shiftPre, ++shiftPst) {
                                     if (shiftPre > -1) {
-                                        if (shiftPst < imageDim[n]) {
-                                            bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst];
-                                            bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[shiftPst];
+                                        if (shiftPst < imageDims[n]) {
+                                            bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst];
+                                            bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[shiftPst];
                                         } else {
-                                            bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[imageDim[n] - 1];
-                                            bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[imageDim[n] - 1];
+                                            bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[imageDims[n] - 1];
+                                            bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[imageDims[n] - 1];
                                         }
                                     } else {
-                                        if (shiftPst < imageDim[n]) {
-                                            bufferIntensitycur = -bufferIntensity[shiftPst];
-                                            bufferDensitycur = -bufferDensity[shiftPst];
+                                        if (shiftPst < imageDims[n]) {
+                                            bufferIntensityCur = -bufferIntensity[shiftPst];
+                                            bufferDensityCur = -bufferDensity[shiftPst];
                                         } else {
-                                            bufferIntensitycur = 0;
-                                            bufferDensitycur = 0;
+                                            bufferIntensityCur = 0;
+                                            bufferDensityCur = 0;
                                         }
                                     }
-                                    intensityPtr[realIndex] = static_cast<DataType>(bufferIntensitycur);
-                                    densityPtr[realIndex] = static_cast<float>(bufferDensitycur);
-
+                                    intensityPtr[realIndex] = static_cast<DataType>(bufferIntensityCur);
+                                    densityPtr[realIndex] = static_cast<float>(bufferDensityCur);
                                     realIndex += lineOffset;
                                 } // line convolution of mean filter
                             } // No kernel computation
@@ -1106,14 +1103,12 @@ void reg_tools_kernelConvolution(nifti_image *image,
    shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr)
 #endif
             for (index = 0; index < voxelNumber; ++index) {
-                if (nanImagePtr[index] != 0)
-                    intensityPtr[index] = static_cast<DataType>((float)intensityPtr[index] / densityPtr[index]);
-                else intensityPtr[index] = std::numeric_limits<DataType>::quiet_NaN();
+                if (nanImagePtr[index])
+                    intensityPtr[index] = std::numeric_limits<DataType>::quiet_NaN();
+                else intensityPtr[index] = static_cast<DataType>(intensityPtr[index] / densityPtr[index]);
             }
         } // check if the time point is active
     } // loop over the time points
-    free(nanImagePtr);
-    free(densityPtr);
 }
 /* *************************************************************** */
 template <class DataType>
@@ -1122,7 +1117,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                                            float varianceY,
                                            float varianceZ,
                                            int *mask,
-                                           bool *timePoint) {
+                                           bool *timePoints) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
         NR_FATAL_ERROR("This function does not support images with dimension > 2048");
 #ifdef WIN32
@@ -1134,13 +1129,13 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
 #endif
     DataType *imagePtr = static_cast<DataType*>(image->data);
 
-    const int activeTimePointNumber = image->nt * image->nu;
-    bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool));
+    const int activeTimePointCount = image->nt * image->nu;
+    bool *activeTimePoints = (bool*)calloc(activeTimePointCount, sizeof(bool));
     // Check if input time points and masks are nullptr
-    if (timePoint == nullptr) {
+    if (timePoints == nullptr) {
         // All time points are considered as active
-        for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true;
-    } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i];
+        for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true;
+    } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i];
 
     int *currentMask = nullptr;
     if (mask == nullptr) {
@@ -1156,8 +1151,8 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
     typedef typename std::map<DataType, float>::iterator DataPointMapIt;
 
     // Loop over the dimension higher than 3
-    for (int t = 0; t < activeTimePointNumber; t++) {
-        if (activeTimePoint[t]) {
+    for (int t = 0; t < activeTimePointCount; t++) {
+        if (activeTimePoints[t]) {
             DataType *intensityPtr = &imagePtr[t * voxelNumber];
             for (index = 0; index < voxelNumber; index++) {
                 nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false;
@@ -1268,7 +1263,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
 
     free(tmpImagePtr);
     free(currentMask);
-    free(activeTimePoint);
+    free(activeTimePoints);
     free(nanImagePtr);
 }
 /* *************************************************************** */
@@ -1277,31 +1272,31 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceY,
                                       float varianceZ,
                                       int *mask,
-                                      bool *timePoint) {
+                                      bool *timePoints) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_tools_labelKernelConvolution_core<unsigned char>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<unsigned char>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_INT8:
-        reg_tools_labelKernelConvolution_core<char>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<char>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_tools_labelKernelConvolution_core<unsigned short>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<unsigned short>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_INT16:
-        reg_tools_labelKernelConvolution_core<short>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<short>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_tools_labelKernelConvolution_core<unsigned>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<unsigned>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_INT32:
-        reg_tools_labelKernelConvolution_core<int>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<int>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_tools_labelKernelConvolution_core<float>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<float>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_tools_labelKernelConvolution_core<double>(image, varianceX, varianceY, varianceZ, mask, timePoint);
+        reg_tools_labelKernelConvolution_core<double>(image, varianceX, varianceY, varianceZ, mask, timePoints);
         break;
     default:
         NR_FATAL_ERROR("The image data type is not supported");
@@ -1312,7 +1307,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
                                  const int& kernelType,
                                  const int *mask,
-                                 const bool *timePoint,
+                                 const bool *timePoints,
                                  const bool *axis) {
     if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64)
         NR_FATAL_ERROR("The image is expected to be of floating precision type");
@@ -1320,18 +1315,18 @@ void reg_tools_kernelConvolution(nifti_image *image,
     if (image->nt <= 0) image->nt = image->dim[4] = 1;
     if (image->nu <= 0) image->nu = image->dim[5] = 1;
 
-    unique_ptr<bool[]> axisToSmooth{ new bool[3] };
+    bool axisToSmooth[3];
     if (axis == nullptr) {
         // All axis are smoothed by default
-        for (int i = 0; i < 3; i++) axisToSmooth[i] = true;
+        axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true;
     } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
 
-    const int activeTimePointNumber = image->nt * image->nu;
-    unique_ptr<bool[]> activeTimePoint{ new bool[activeTimePointNumber] };
-    if (timePoint == nullptr) {
+    const int activeTimePointCount = image->nt * image->nu;
+    unique_ptr<bool[]> activeTimePoints{ new bool[activeTimePointCount] };
+    if (timePoints == nullptr) {
         // All time points are considered as active
-        for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true;
-    } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i];
+        for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true;
+    } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i];
 
     unique_ptr<int[]> currentMask;
     if (!mask) {
@@ -1341,7 +1336,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
 
     std::visit([&](auto&& imgDataType) {
         using ImgDataType = std::decay_t<decltype(imgDataType)>;
-        reg_tools_kernelConvolution<ImgDataType>(image, sigma, kernelType, mask, activeTimePoint.get(), axisToSmooth.get());
+        reg_tools_kernelConvolution<ImgDataType>(image, sigma, kernelType, mask, activeTimePoints.get(), axisToSmooth);
     }, NiftiImage::getFloatingDataType(image));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index d776017f..d392d9c1 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -81,10 +81,14 @@ void reg_tools_removeSCLInfo(nifti_image *img);
 void reg_getRealImageSpacing(nifti_image *image,
                              float *spacingValues);
 /* *************************************************************** */
-/** @brief Smooth an image using a Gaussian kernel
+/** @brief Smooth an image using a specified kernel
  * @param image Image to be smoothed
- * @param sigma Standard deviation of the Gaussian kernel
- * to use. The kernel is bounded between +/- 3 sigma.
+ * @param sigma Standard deviation of the kernel to use.
+ * The kernel is bounded between +/- 3 sigma.
+ * @param kernelType Type of kernel to use.
+ * @param mask An integer mask over which the smoothing should occur.
+ * @param timePoints Boolean array to specify which time points have to be
+ * smoothed. The array follow the dim array of the nifti header.
  * @param axis Boolean array to specify which axis have to be
  * smoothed. The array follow the dim array of the nifti header.
  */
@@ -100,8 +104,8 @@ void reg_tools_kernelConvolution(nifti_image *image,
  * @param varianceX The variance of the Gaussian kernel in X
  * @param varianceY The variance of the Gaussian kernel in Y
  * @param varianceZ The variance of the Gaussian kernel in Z
- * @param mask An integer mask over which the Gaussian smoothing should occur
- * @param timePoint Boolean array to specify which timepoints have to be
+ * @param mask An integer mask over which the Gaussian smoothing should occur.
+ * @param timePoints Boolean array to specify which time points have to be
  * smoothed.
  */
 void reg_tools_labelKernelConvolution(nifti_image *image,
@@ -109,7 +113,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
                                       float varianceY,
                                       float varianceZ,
                                       int *mask = nullptr,
-                                      bool *timePoint = nullptr);
+                                      bool *timePoints = nullptr);
 /* *************************************************************** */
 /** @brief Downsample an image by a ratio of two
  * @param image Image to be downsampled
diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu
index 387dabad..27804dcb 100644
--- a/reg-lib/cuda/CudaCommon.cu
+++ b/reg-lib/cuda/CudaCommon.cu
@@ -82,22 +82,23 @@ void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
 template <class DataType>
 void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
-            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         unique_ptr<float4[]> array(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
             array[i].x = *niftiImgValues++;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].y = *niftiImgValues++;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].w = *niftiImgValues++;
         }
@@ -153,29 +154,30 @@ void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const n
 template <class DataType>
 void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
-            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         unique_ptr<float4[]> array1(new float4[voxelNumber]());
         unique_ptr<float4[]> array2(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
             array1[i].x = *niftiImgValues++;
         for (size_t i = 0; i < voxelNumber; i++)
             array2[i].x = *niftiImgValues++;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].y = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].y = *niftiImgValues++;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].z = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].w = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
@@ -223,22 +225,23 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
 template <class DataType>
 void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
-            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         unique_ptr<float4[]> array(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
             array[i].x = *niftiImgValues++;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].y = *niftiImgValues++;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array[i].w = *niftiImgValues++;
         }
@@ -273,29 +276,30 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif
 template <class DataType>
 void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1)
-            NR_FATAL_ERROR("The specified image is not a single precision deformation field image");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const float *niftiImgValues = static_cast<float*>(img->data);
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         unique_ptr<float4[]> array1(new float4[voxelNumber]());
         unique_ptr<float4[]> array2(new float4[voxelNumber]());
         for (size_t i = 0; i < voxelNumber; i++)
             array1[i].x = *niftiImgValues++;
         for (size_t i = 0; i < voxelNumber; i++)
             array2[i].x = *niftiImgValues++;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].y = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].y = *niftiImgValues++;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].z = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
                 array2[i].z = *niftiImgValues++;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 array1[i].w = *niftiImgValues++;
             for (size_t i = 0; i < voxelNumber; i++)
@@ -350,23 +354,24 @@ template <class DataType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
-        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
-            NR_FATAL_ERROR("The nifti image is not a 5D volume");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         thrust::device_ptr<const float4> arrayCudaPtr(reinterpret_cast<const float4*>(arrayCuda));
         const thrust::host_vector<float4> array(arrayCudaPtr, arrayCudaPtr + voxelNumber);
         float *niftiImgValues = static_cast<float*>(img->data);
         for (size_t i = 0; i < voxelNumber; i++)
             *niftiImgValues++ = array[i].x;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array[i].y;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array[i].z;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array[i].w;
         }
@@ -399,9 +404,10 @@ template <class DataType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
-        if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32)
-            NR_FATAL_ERROR("The nifti image is not a 5D volume");
+        if (img->datatype != NIFTI_TYPE_FLOAT32)
+            NR_FATAL_ERROR("The specified image is not a single precision image");
         const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
+        const auto timePointCount = img->dim[4] * img->dim[5];
         thrust::device_ptr<const float4> array1CudaPtr(reinterpret_cast<const float4*>(array1Cuda));
         thrust::device_ptr<const float4> array2CudaPtr(reinterpret_cast<const float4*>(array2Cuda));
         const thrust::host_vector<float4> array1(array1CudaPtr, array1CudaPtr + voxelNumber);
@@ -411,19 +417,19 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con
             *niftiImgValues++ = array1[i].x;
         for (size_t i = 0; i < voxelNumber; i++)
             *niftiImgValues++ = array2[i].x;
-        if (img->dim[5] >= 2) {
+        if (timePointCount >= 2) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].y;
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array2[i].y;
         }
-        if (img->dim[5] >= 3) {
+        if (timePointCount >= 3) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].z;
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array2[i].z;
         }
-        if (img->dim[5] >= 4) {
+        if (timePointCount >= 4) {
             for (size_t i = 0; i < voxelNumber; i++)
                 *niftiImgValues++ = array1[i].w;
             for (size_t i = 0; i < voxelNumber; i++)
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
index d0fb7543..8d982112 100644
--- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -7,7 +7,7 @@
  *  to ensure the CPU and CUDA versions yield the same output
 **/
 
-class ApproxLinearEnergyGradient {
+class ApproxLinearEnergyGradientTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage&, NiftiImage&, NiftiImage&, float>;
     using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
@@ -15,7 +15,7 @@ class ApproxLinearEnergyGradient {
     inline static vector<TestCase> testCases;
 
 public:
-    ApproxLinearEnergyGradient() {
+    ApproxLinearEnergyGradientTest() {
         if (!testCases.empty())
             return;
 
@@ -124,7 +124,7 @@ class ApproxLinearEnergyGradient {
     }
 };
 
-TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Energy Gradient", "[regression]") {
+TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear Energy Gradient", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information

From 47c4a84ba61a03b4ff33ce3da9bfb44cad58316e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 6 Oct 2023 16:27:40 +0100
Subject: [PATCH 217/314] Implement the kernel convolution for CUDA #92

---
 niftyreg_build_version.txt             |   2 +-
 reg-lib/Compute.cpp                    |   3 +-
 reg-lib/Compute.h                      |   3 +-
 reg-lib/cuda/CMakeLists.txt            |   5 +-
 reg-lib/cuda/CudaCompute.cpp           |  55 ++++--
 reg-lib/cuda/CudaCompute.h             |   4 +
 reg-lib/cuda/CudaKernelConvolution.cu  | 227 +++++++++++++++++++++++++
 reg-lib/cuda/CudaKernelConvolution.hpp |  27 +++
 8 files changed, 306 insertions(+), 20 deletions(-)
 create mode 100644 reg-lib/cuda/CudaKernelConvolution.cu
 create mode 100644 reg-lib/cuda/CudaKernelConvolution.hpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3d9988ad..e64f24d5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-335
+336
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 5409042a..68397be8 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -231,7 +231,7 @@ void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
 /* *************************************************************** */
 void Compute::ConvolveImage(nifti_image *image) {
     const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
-    const int kernelType = CUBIC_SPLINE_KERNEL;
+    constexpr int kernelType = CUBIC_SPLINE_KERNEL;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
     bool activeAxis[3] = { 1, 0, 0 };
@@ -278,7 +278,6 @@ void Compute::VoxelCentricToNodeCentric(float weight) {
 void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     ConvolveImage(con.GetVoxelBasedMeasureGradient());
-
     // The node-based NMI gradient is extracted from the voxel-based gradient
     VoxelCentricToNodeCentric(weight);
 }
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 3cef7df7..821103d3 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -35,13 +35,12 @@ class Compute {
 protected:
     Content& con;
 
-    void ConvolveImage(nifti_image*);
-
 #ifdef NR_TESTING
 public:
 #endif
     virtual void VoxelCentricToNodeCentric(float weight);
 
 private:
+    void ConvolveImage(nifti_image*);
     nifti_image* ScaleGradient(const nifti_image&, float);
 };
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index ccedd8ff..18f68628 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -21,8 +21,8 @@ elseif(RUN_RESULT_VAR)
     return()
 else(NOT COMPILE_RESULT_VAR)
     message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
-    # Set C++ standard version for CUDA and enable extended lambdas
-    set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda")
+    # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
+    set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda --expt-relaxed-constexpr")
     #check cuda version and adjust compile flags
     if("${RUN_OUTPUT_VAR}" LESS "30")
         set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
@@ -66,6 +66,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaContext.cpp
     CudaDefContent.cpp
     CudaF3dContent.cpp
+    CudaKernelConvolution.cu
     CudaKernelFactory.cpp
     CudaMeasure.cpp
     affineDeformationKernel.cu
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index f9f81a84..53e54d04 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -1,5 +1,6 @@
 #include "CudaCompute.h"
 #include "CudaF3dContent.h"
+#include "CudaKernelConvolution.hpp"
 #include "CudaNormaliseGradient.hpp"
 #include "_reg_resampling_gpu.h"
 #include "_reg_localTransformation_gpu.h"
@@ -141,13 +142,10 @@ void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool o
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    if (sigma != 0) {
-        Compute::SmoothGradient(sigma);
-        // Update the changes for GPU
-        dynamic_cast<CudaF3dContent&>(con).UpdateTransformationGradient();
-    }
+    if (sigma == 0) return;
+    sigma = fabs(sigma);
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, GAUSSIAN_KERNEL);
 }
 /* *************************************************************** */
 void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
@@ -165,6 +163,42 @@ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
                                                updateStepNumber);
 }
 /* *************************************************************** */
+void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
+    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
+    constexpr int kernelType = CUBIC_SPLINE_KERNEL;
+    float currentNodeSpacing[3];
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
+    bool activeAxis[3] = { 1, 0, 0 };
+    Cuda::KernelConvolution(image,
+                            imageCuda,
+                            currentNodeSpacing,
+                            kernelType,
+                            nullptr, // all volumes are considered as active
+                            activeAxis);
+    // Convolution along the y axis
+    currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy;
+    activeAxis[0] = 0;
+    activeAxis[1] = 1;
+    Cuda::KernelConvolution(image,
+                            imageCuda,
+                            currentNodeSpacing,
+                            kernelType,
+                            nullptr, // all volumes are considered as active
+                            activeAxis);
+    // Convolution along the z axis if required
+    if (image->nz > 1) {
+        currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz;
+        activeAxis[1] = 0;
+        activeAxis[2] = 1;
+        Cuda::KernelConvolution(image,
+                                imageCuda,
+                                currentNodeSpacing,
+                                kernelType,
+                                nullptr, // all volumes are considered as active
+                                activeAxis);
+    }
+}
+/* *************************************************************** */
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
@@ -177,13 +211,8 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) {
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient());
-    // Transfer the data back to the CUDA device
-    con.UpdateVoxelBasedMeasureGradient();
-
+    ConvolveImage(con.DefContent::GetVoxelBasedMeasureGradient(), con.GetVoxelBasedMeasureGradientCuda());
     // The node-based NMI gradient is extracted from the voxel-based gradient
     VoxelCentricToNodeCentric(weight);
 }
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index eee743c0..9779f805 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "Compute.h"
+#include "CudaCommon.hpp"
 
 class CudaCompute: public Compute {
 public:
@@ -34,4 +35,7 @@ class CudaCompute: public Compute {
 protected:
 #endif
     virtual void VoxelCentricToNodeCentric(float weight) override;
+
+private:
+    void ConvolveImage(const nifti_image*, float4*);
 };
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
new file mode 100644
index 00000000..ae562206
--- /dev/null
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -0,0 +1,227 @@
+#include "CudaKernelConvolution.hpp"
+
+/* *************************************************************** */
+void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
+                                       float4 *imageCuda,
+                                       const float *sigma,
+                                       const int kernelType,
+                                       const bool *timePoints,
+                                       const bool *axis) {
+    if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
+        NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048");
+
+    bool axisToSmooth[3];
+    if (axis == nullptr) {
+        // All axis are smoothed by default
+        axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true;
+    } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
+
+    const auto activeTimePointCount = std::min(image->nt * image->nu, 4);
+    bool activeTimePoints[4]{}; // 4 is the maximum number of time points
+    if (timePoints == nullptr) {
+        // All time points are considered as active
+        for (auto i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true;
+    } else for (auto i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i];
+
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
+    const int3 imageDims = make_int3(image->nx, image->ny, image->nz);
+
+    thrust::device_vector<float> densityCuda(voxelNumber);
+    thrust::device_vector<bool> nanImageCuda(voxelNumber);
+    thrust::device_vector<float> bufferIntensityCuda(voxelNumber);
+    thrust::device_vector<float> bufferDensityCuda(voxelNumber);
+    float *densityCudaPtr = densityCuda.data().get();
+    bool *nanImageCudaPtr = nanImageCuda.data().get();
+    float *bufferIntensityCudaPtr = bufferIntensityCuda.data().get();
+    float *bufferDensityCudaPtr = bufferDensityCuda.data().get();
+
+    for (int t = 0; t < activeTimePointCount; t++) {
+        if (!activeTimePoints[t]) continue;
+
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
+            float& intensityVal = reinterpret_cast<float*>(&imageCuda[index])[t];
+            float& densityVal = densityCudaPtr[index];
+            bool& nanImageVal = nanImageCudaPtr[index];
+            densityVal = intensityVal == intensityVal ? 1.f : 0;
+            nanImageVal = !static_cast<bool>(densityVal);
+            if (nanImageVal) intensityVal = 0;
+        });
+
+        // Loop over the x, y and z dimensions
+        for (int n = 0; n < 3; n++) {
+            if (!axisToSmooth[n] || image->dim[n] <= 1) continue;
+
+            double temp;
+            if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel
+            else temp = fabs(sigma[t]); // voxel-based if negative value
+            int radius = 0;
+            // Define the kernel size
+            if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) {
+                // Mean or linear filtering
+                radius = static_cast<int>(temp);
+            } else if (kernelType == GAUSSIAN_KERNEL) {
+                // Gaussian kernel
+                radius = static_cast<int>(temp * 3.0);
+            } else if (kernelType == CUBIC_SPLINE_KERNEL) {
+                // Spline kernel
+                radius = static_cast<int>(temp * 2.0);
+            } else {
+                NR_FATAL_ERROR("Unknown kernel type");
+            }
+            if (radius <= 0) continue;
+
+            // Allocate the kernel
+            vector<float> kernel(2 * radius + 1);
+            double kernelSum = 0;
+            // Fill the kernel
+            if (kernelType == CUBIC_SPLINE_KERNEL) {
+                // Compute the Cubic Spline kernel
+                for (int i = -radius; i <= radius; i++) {
+                    // temp contains the kernel node spacing
+                    double relative = fabs(i / temp);
+                    if (relative < 1.0)
+                        kernel[i + radius] = static_cast<float>(2.0 / 3.0 - Square(relative) + 0.5 * Cube(relative));
+                    else if (relative < 2.0)
+                        kernel[i + radius] = static_cast<float>(-Cube(relative - 2.0) / 6.0);
+                    else kernel[i + radius] = 0;
+                    kernelSum += kernel[i + radius];
+                }
+            } else if (kernelType == GAUSSIAN_KERNEL) {
+                // Compute the Gaussian kernel
+                for (int i = -radius; i <= radius; i++) {
+                    // 2.506... = sqrt(2*pi)
+                    // temp contains the sigma in voxel
+                    kernel[i + radius] = static_cast<float>(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
+                    kernelSum += kernel[i + radius];
+                }
+            } else if (kernelType == LINEAR_KERNEL) {
+                // Compute the linear kernel
+                for (int i = -radius; i <= radius; i++) {
+                    kernel[i + radius] = 1.f - fabs(i / static_cast<float>(radius));
+                    kernelSum += kernel[i + radius];
+                }
+            } else if (kernelType == MEAN_KERNEL && imageDims.z == 1) {
+                // Compute the mean kernel
+                for (int i = -radius; i <= radius; i++) {
+                    kernel[i + radius] = 1.f;
+                    kernelSum += kernel[i + radius];
+                }
+            }
+            // No kernel is required for the mean filtering
+            // No need for kernel normalisation as this is handled by the density function
+            NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
+
+            int planeCount, lineOffset;
+            switch (n) {
+            case 0:
+                planeCount = imageDims.y * imageDims.z;
+                lineOffset = 1;
+                break;
+            case 1:
+                planeCount = imageDims.x * imageDims.z;
+                lineOffset = imageDims.x;
+                break;
+            case 2:
+                planeCount = imageDims.x * imageDims.y;
+                lineOffset = planeCount;
+                break;
+            }
+
+            thrust::device_vector<float> kernelCuda(kernel.begin(), kernel.end());
+            float *kernelCudaPtr = kernelCuda.data().get();
+            const int imageDim = reinterpret_cast<const int*>(&imageDims)[n];
+
+            // Loop over the different voxel
+            thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), planeCount, [=]__device__(const int planeIndex) {
+                int realIndex = 0;
+                switch (n) {
+                case 0:
+                    realIndex = planeIndex * imageDims.x;
+                    break;
+                case 1:
+                    realIndex = (planeIndex / imageDims.x) * imageDims.x * imageDims.y + planeIndex % imageDims.x;
+                    break;
+                case 2:
+                    realIndex = planeIndex;
+                    break;
+                }
+                // Fetch the current line into a stack buffer
+                float *bufferIntensityPtr = &bufferIntensityCudaPtr[planeIndex * imageDim];
+                float *bufferDensityPtr = &bufferDensityCudaPtr[planeIndex * imageDim];
+                float4 *currentIntensityPtr = &imageCuda[realIndex];
+                float *currentDensityPtr = &densityCudaPtr[realIndex];
+                for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) {
+                    bufferIntensityPtr[lineIndex] = reinterpret_cast<float*>(currentIntensityPtr)[t];
+                    bufferDensityPtr[lineIndex] = *currentDensityPtr;
+                    currentIntensityPtr += lineOffset;
+                    currentDensityPtr += lineOffset;
+                }
+                if (kernelSum > 0) {
+                    // Perform the kernel convolution along 1 line
+                    for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) {
+                        // Define the kernel boundaries
+                        int shiftPre = lineIndex - radius;
+                        int shiftPst = lineIndex + radius + 1;
+                        float *kernelPtr;
+                        if (shiftPre < 0) {
+                            kernelPtr = &kernelCudaPtr[-shiftPre];
+                            shiftPre = 0;
+                        } else kernelPtr = kernelCudaPtr;
+                        if (shiftPst > imageDim) shiftPst = imageDim;
+                        // Set the current values to zero
+                        // Increment the current value by performing the weighted sum
+                        double intensitySum = 0, densitySum = 0;
+                        for (int k = shiftPre; k < shiftPst; ++k) {
+                            float& kernelValue = *kernelPtr++;
+                            intensitySum += kernelValue * bufferIntensityPtr[k];
+                            densitySum += kernelValue * bufferDensityPtr[k];
+                        }
+                        // Store the computed value in place
+                        reinterpret_cast<float*>(&imageCuda[realIndex])[t] = static_cast<float>(intensitySum);
+                        densityCudaPtr[realIndex] = static_cast<float>(densitySum);
+                        realIndex += lineOffset;
+                    } // line convolution
+                } else { // kernelSum <= 0
+                    for (int lineIndex = 1; lineIndex < imageDim; ++lineIndex) {
+                        bufferIntensityPtr[lineIndex] += bufferIntensityPtr[lineIndex - 1];
+                        bufferDensityPtr[lineIndex] += bufferDensityPtr[lineIndex - 1];
+                    }
+                    int shiftPre = -radius - 1;
+                    int shiftPst = radius;
+                    for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex, ++shiftPre, ++shiftPst) {
+                        float bufferIntensityCur, bufferDensityCur;
+                        if (shiftPre > -1) {
+                            if (shiftPst < imageDim) {
+                                bufferIntensityCur = bufferIntensityPtr[shiftPre] - bufferIntensityPtr[shiftPst];
+                                bufferDensityCur = bufferDensityPtr[shiftPre] - bufferDensityPtr[shiftPst];
+                            } else {
+                                bufferIntensityCur = bufferIntensityPtr[shiftPre] - bufferIntensityPtr[imageDim - 1];
+                                bufferDensityCur = bufferDensityPtr[shiftPre] - bufferDensityPtr[imageDim - 1];
+                            }
+                        } else {
+                            if (shiftPst < imageDim) {
+                                bufferIntensityCur = -bufferIntensityPtr[shiftPst];
+                                bufferDensityCur = -bufferDensityPtr[shiftPst];
+                            } else {
+                                bufferIntensityCur = 0;
+                                bufferDensityCur = 0;
+                            }
+                        }
+                        reinterpret_cast<float*>(&imageCuda[realIndex])[t] = bufferIntensityCur;
+                        densityCudaPtr[realIndex] = bufferDensityCur;
+                        realIndex += lineOffset;
+                    } // line convolution of mean filter
+                } // No kernel computation
+            }); // pixel in starting plane
+        } // axes
+
+        // Normalise per time point
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
+            float& intensityVal = reinterpret_cast<float*>(&imageCuda[index])[t];
+            const float& densityVal = densityCudaPtr[index];
+            const bool& nanImageVal = nanImageCudaPtr[index];
+            intensityVal = nanImageVal ? std::numeric_limits<float>::quiet_NaN() : intensityVal / densityVal;
+        });
+    } // check if the time point is active
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp
new file mode 100644
index 00000000..5388861d
--- /dev/null
+++ b/reg-lib/cuda/CudaKernelConvolution.hpp
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "_reg_tools_gpu.h"
+
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+/** @brief Smooth an image using a specified kernel
+ * @param image Image to be smoothed
+ * @param imageCuda Image to be smoothed
+ * @param sigma Standard deviation of the kernel to use.
+ * The kernel is bounded between +/- 3 sigma.
+ * @param kernelType Type of kernel to use.
+ * @param timePoints Boolean array to specify which time points have to be
+ * smoothed. The array follow the dim array of the nifti header.
+ * @param axis Boolean array to specify which axis have to be
+ * smoothed. The array follow the dim array of the nifti header.
+ */
+void KernelConvolution(const nifti_image *image,
+                       float4 *imageCuda,
+                       const float *sigma,
+                       const int kernelType,
+                       const bool *timePoints = nullptr,
+                       const bool *axis = nullptr);
+/* *************************************************************** */
+}
+/* *************************************************************** */

From 9f24fa19edcba03365d1e794c626f73e02c72787 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 9 Oct 2023 13:36:23 +0100
Subject: [PATCH 218/314] Add regression tests for kernel convolution #92

---
 niftyreg_build_version.txt                   |   2 +-
 reg-lib/cpu/_reg_tools.cpp                   |  18 +-
 reg-lib/cpu/_reg_tools.h                     |   8 +-
 reg-lib/cuda/CudaKernelConvolution.cu        |  14 +-
 reg-lib/cuda/CudaKernelConvolution.hpp       |   4 +-
 reg-test/CMakeLists.txt                      |   1 +
 reg-test/reg_test_common.h                   |   1 +
 reg-test/reg_test_regr_kernelConvolution.cpp | 168 +++++++++++++++++++
 8 files changed, 193 insertions(+), 23 deletions(-)
 create mode 100644 reg-test/reg_test_regr_kernelConvolution.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e64f24d5..f59a90f3 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-336
+337
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index fbd7798d..ae9d6e2a 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -834,7 +834,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const int& kernelType,
                                  const int *mask,
                                  const bool *timePoints,
-                                 const bool *axis) {
+                                 const bool *axes) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
         NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048");
 
@@ -867,7 +867,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
             }
             // Loop over the x, y and z dimensions
             for (int n = 0; n < 3; n++) {
-                if (axis[n] && image->dim[n] > 1) {
+                if (axes[n] && image->dim[n] > 1) {
                     double temp;
                     if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel
                     else temp = fabs(sigma[t]); // voxel-based if negative value
@@ -1308,18 +1308,18 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const int& kernelType,
                                  const int *mask,
                                  const bool *timePoints,
-                                 const bool *axis) {
+                                 const bool *axes) {
     if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64)
         NR_FATAL_ERROR("The image is expected to be of floating precision type");
 
     if (image->nt <= 0) image->nt = image->dim[4] = 1;
     if (image->nu <= 0) image->nu = image->dim[5] = 1;
 
-    bool axisToSmooth[3];
-    if (axis == nullptr) {
-        // All axis are smoothed by default
-        axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true;
-    } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
+    bool axesToSmooth[3];
+    if (axes == nullptr) {
+        // All axes are smoothed by default
+        axesToSmooth[0] = axesToSmooth[1] = axesToSmooth[2] = true;
+    } else for (int i = 0; i < 3; i++) axesToSmooth[i] = axes[i];
 
     const int activeTimePointCount = image->nt * image->nu;
     unique_ptr<bool[]> activeTimePoints{ new bool[activeTimePointCount] };
@@ -1336,7 +1336,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
 
     std::visit([&](auto&& imgDataType) {
         using ImgDataType = std::decay_t<decltype(imgDataType)>;
-        reg_tools_kernelConvolution<ImgDataType>(image, sigma, kernelType, mask, activeTimePoints.get(), axisToSmooth);
+        reg_tools_kernelConvolution<ImgDataType>(image, sigma, kernelType, mask, activeTimePoints.get(), axesToSmooth);
     }, NiftiImage::getFloatingDataType(image));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index d392d9c1..77d01e55 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -89,7 +89,7 @@ void reg_getRealImageSpacing(nifti_image *image,
  * @param mask An integer mask over which the smoothing should occur.
  * @param timePoints Boolean array to specify which time points have to be
  * smoothed. The array follow the dim array of the nifti header.
- * @param axis Boolean array to specify which axis have to be
+ * @param axes Boolean array to specify which axes have to be
  * smoothed. The array follow the dim array of the nifti header.
  */
 void reg_tools_kernelConvolution(nifti_image *image,
@@ -97,7 +97,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                  const int& kernelType,
                                  const int *mask = nullptr,
                                  const bool *timePoints = nullptr,
-                                 const bool *axis = nullptr);
+                                 const bool *axes = nullptr);
 /* *************************************************************** */
 /** @brief Smooth a label image using a Gaussian kernel
  * @param image Image to be smoothed
@@ -120,13 +120,13 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
  * @param type The image is first smoothed  using a Gaussian
  * kernel of 0.7 voxel standard deviation before being downsample
  * if type is set to true.
- * @param axis Boolean array to specify which axis have to be
+ * @param axes Boolean array to specify which axes have to be
  * downsampled. The array follow the dim array of the nifti header.
  */
 template <class PrecisionType>
 void reg_downsampleImage(nifti_image *image,
                          int type,
-                         bool *axis);
+                         bool *axes);
 /* *************************************************************** */
 /** @brief Returns the maximal euclidean distance from a
  * deformation field image
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index ae562206..7f446c53 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -6,15 +6,15 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                                        const float *sigma,
                                        const int kernelType,
                                        const bool *timePoints,
-                                       const bool *axis) {
+                                       const bool *axes) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
         NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048");
 
-    bool axisToSmooth[3];
-    if (axis == nullptr) {
-        // All axis are smoothed by default
-        axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true;
-    } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i];
+    bool axesToSmooth[3];
+    if (axes == nullptr) {
+        // All axes are smoothed by default
+        axesToSmooth[0] = axesToSmooth[1] = axesToSmooth[2] = true;
+    } else for (int i = 0; i < 3; i++) axesToSmooth[i] = axes[i];
 
     const auto activeTimePointCount = std::min(image->nt * image->nu, 4);
     bool activeTimePoints[4]{}; // 4 is the maximum number of time points
@@ -49,7 +49,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
 
         // Loop over the x, y and z dimensions
         for (int n = 0; n < 3; n++) {
-            if (!axisToSmooth[n] || image->dim[n] <= 1) continue;
+            if (!axesToSmooth[n] || image->dim[n] <= 1) continue;
 
             double temp;
             if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel
diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp
index 5388861d..7d74c944 100644
--- a/reg-lib/cuda/CudaKernelConvolution.hpp
+++ b/reg-lib/cuda/CudaKernelConvolution.hpp
@@ -13,7 +13,7 @@ namespace NiftyReg::Cuda {
  * @param kernelType Type of kernel to use.
  * @param timePoints Boolean array to specify which time points have to be
  * smoothed. The array follow the dim array of the nifti header.
- * @param axis Boolean array to specify which axis have to be
+ * @param axes Boolean array to specify which axes have to be
  * smoothed. The array follow the dim array of the nifti header.
  */
 void KernelConvolution(const nifti_image *image,
@@ -21,7 +21,7 @@ void KernelConvolution(const nifti_image *image,
                        const float *sigma,
                        const int kernelType,
                        const bool *timePoints = nullptr,
-                       const bool *axis = nullptr);
+                       const bool *axes = nullptr);
 /* *************************************************************** */
 }
 /* *************************************************************** */
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index e857a818..c86af8a0 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -125,6 +125,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
 endif(USE_CUDA)
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 9f6c192c..c05cc586 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -4,6 +4,7 @@
 #include <array>
 #include <random>
 #include <iomanip>
+#include <numeric>
 #include <catch2/catch_test_macros.hpp>
 #include "_reg_lncc.h"
 #include "_reg_localTrans.h"
diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp
new file mode 100644
index 00000000..c4fe1bd8
--- /dev/null
+++ b/reg-test/reg_test_regr_kernelConvolution.cpp
@@ -0,0 +1,168 @@
+#include "reg_test_common.h"
+#include "CudaContent.h"
+#include "CudaKernelConvolution.hpp"
+
+/**
+ *  Kernel convolution regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class KernelConvolutionTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage&, vector<float>, int, bool*, bool*>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    KernelConvolutionTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create images
+        constexpr int imageCount = 8;
+        constexpr NiftiImage::dim_t size = 16;
+        vector<NiftiImage::dim_t> dims[imageCount]{ { size, size },
+                                                   { size, size, 1, 1, 2 },
+                                                   { size, size, 1, 1, 3 },
+                                                   { size, size, 1, 2, 2 },
+                                                   { size, size, size },
+                                                   { size, size, size, 2, 1 },
+                                                   { size, size, size, 3, 1 },
+                                                   { size, size, size, 2, 2 } };
+        NiftiImage images[imageCount];
+
+        // Fill images with random values
+        for (int i = 0; i < imageCount; i++) {
+            images[i] = NiftiImage(dims[i], NIFTI_TYPE_FLOAT32);
+            auto imagePtr = images[i].data();
+            for (size_t j = 0; j < images[i].nVoxels(); j++)
+                imagePtr[j] = distr(gen);
+        }
+
+        // Create a lambda to concatenate strings for std::accumulate
+        auto strConcat = [](const std::string& str, const auto& val) { return str + " "s + std::to_string(val); };
+
+        // Create the data container for the regression test
+        constexpr int kernelTypeCount = 4;
+        distr.param(std::uniform_real_distribution<float>::param_type(1, 10));  // Change the range of the distribution
+        vector<TestData> testData;
+        for (int i = 0; i < imageCount; i++) {
+            for (int kernelType = 0; kernelType < kernelTypeCount; kernelType++) {
+                vector<float> sigmaValues(images[i]->nt * images[i]->nu);
+                std::generate(sigmaValues.begin(), sigmaValues.end(), [&]() { return distr(gen); });
+                const std::string sigmaStr = std::accumulate(sigmaValues.begin(), sigmaValues.end(), ""s, strConcat);
+                const std::string dimsStr = std::accumulate(dims[i].begin(), dims[i].end(), ""s, strConcat);
+                testData.emplace_back(TestData(
+                    "Kernel: "s + std::to_string(kernelType) + " Sigma:"s + sigmaStr + " Dims:"s + dimsStr,
+                    images[i],
+                    std::move(sigmaValues),
+                    kernelType,
+                    nullptr,
+                    nullptr
+                ));
+            }
+        }
+
+        // Define time points and axes to smooth
+        constexpr auto timePointCount = 4;
+        bool timePoints[timePointCount][4]{ { true, false, false, false },
+                                           { false, true, false, false },
+                                           { false, false, true, false },
+                                           { false, false, false, true } };
+        bool axes[timePointCount][3]{ { true, false, false },
+                                     { false, true, false },
+                                     { false, false, true },
+                                     { true, true, true } };
+
+        // Add the time points and axes to the latest test data
+        auto latestTestData = testData.end() - timePointCount;
+        for (int i = 0; i < timePointCount; i++) {
+            auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = latestTestData[i];
+            const std::string timePointsStr = std::accumulate(timePoints[i], timePoints[i] + 4, ""s, strConcat);
+            const std::string axesStr = std::accumulate(axes[i], axes[i] + 3, ""s, strConcat);
+            testData.emplace_back(TestData(
+                testName + " TimePoints:"s + timePointsStr + " Axes:"s + axesStr,
+                image,
+                sigmaValues,
+                kernelType,
+                timePoints[i],
+                axes[i]
+            ));
+        }
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = testData;
+
+            // Create images
+            NiftiImage imageCpu(image), imageCuda(image);
+
+            // Create the contents
+            unique_ptr<Content> contentCpu{ new Content(
+                imageCpu,
+                imageCpu,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+            unique_ptr<CudaContent> contentCuda{ new CudaContent(
+                imageCuda,
+                imageCuda,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+
+            // Use deformation fields to store images
+            contentCpu->SetDeformationField(imageCpu.disown());
+            contentCuda->SetDeformationField(imageCuda.disown());
+
+            // Compute the kernel convolution for CPU and CUDA
+            reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), kernelType, nullptr, activeTimePoints, activeAxes);
+            Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), kernelType, activeTimePoints, activeAxes);
+
+            // Get the images
+            imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image);
+            imageCuda = NiftiImage(contentCuda->GetDeformationField(), NiftiImage::Copy::Image);
+
+            // Save for testing
+            testCases.push_back({ testName, std::move(imageCpu), std::move(imageCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(KernelConvolutionTest, "Regression Kernel Convolution", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, imageCpu, imageCuda] = testCase;
+
+        SECTION(testName) {
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the images
+            const auto imageCpuPtr = imageCpu.data();
+            const auto imageCudaPtr = imageCuda.data();
+            for (size_t i = 0; i < imageCpu.nVoxels(); ++i) {
+                const float cpuVal = imageCpuPtr[i];
+                const float cudaVal = imageCudaPtr[i];
+                if (cpuVal != cpuVal && cudaVal != cudaVal) continue;  // Skip NaN values
+                const float diff = fabs(cpuVal - cudaVal);
+                if (diff > EPS)
+                    NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
+                REQUIRE(diff < EPS);
+            }
+        }
+    }
+}

From 9b326322acc6e8dad3645b4b357d9d27669e0950 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 10 Oct 2023 13:20:28 +0100
Subject: [PATCH 219/314] Use ConvKernelType instead of NREG_CONV_KERNEL_TYPE

---
 niftyreg_build_version.txt                   |  2 +-
 reg-apps/reg_f3d.cpp                         |  2 +-
 reg-apps/reg_tools.cpp                       | 18 +++----
 reg-lib/Compute.cpp                          |  4 +-
 reg-lib/ConvolutionKernel.h                  |  4 +-
 reg-lib/_reg_aladin.cpp                      |  4 +-
 reg-lib/_reg_base.cpp                        |  6 +--
 reg-lib/_reg_base.h                          |  2 +-
 reg-lib/cl/ClConvolutionKernel.cpp           |  2 +-
 reg-lib/cl/ClConvolutionKernel.h             |  2 +-
 reg-lib/cpu/CpuConvolutionKernel.cpp         |  2 +-
 reg-lib/cpu/CpuConvolutionKernel.h           |  2 +-
 reg-lib/cpu/_reg_lncc.cpp                    | 26 +++++-----
 reg-lib/cpu/_reg_lncc.h                      |  4 +-
 reg-lib/cpu/_reg_mind.cpp                    |  4 +-
 reg-lib/cpu/_reg_tools.cpp                   | 52 ++++++++++----------
 reg-lib/cpu/_reg_tools.h                     | 27 +++++-----
 reg-lib/cuda/CudaCompute.cpp                 |  4 +-
 reg-lib/cuda/CudaConvolutionKernel.cpp       |  2 +-
 reg-lib/cuda/CudaConvolutionKernel.h         |  2 +-
 reg-lib/cuda/CudaKernelConvolution.cu        | 18 +++----
 reg-lib/cuda/CudaKernelConvolution.hpp       |  2 +-
 reg-test/reg_test_nmi.cpp                    |  2 +-
 reg-test/reg_test_regr_kernelConvolution.cpp |  4 +-
 24 files changed, 96 insertions(+), 101 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f59a90f3..87537f49 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-337
+338
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 28fc968c..104803a4 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -625,7 +625,7 @@ int main(int argc, char **argv) {
         }
     }
     if (useMeanLNCC)
-        reg->SetLNCCKernelType(2);
+        reg->SetLNCCKernelType(ConvKernelType::Gaussian);
 
     NR_DEBUG("*******************************************");
     NR_DEBUG("*******************************************");
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 002686c6..70ff5741 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -520,24 +520,24 @@ int main(int argc, char **argv)
         bool boolX[3]= {1,0,0};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueX;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolX);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolX);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolX);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolX);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolX);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolX);
         bool boolY[3]= {0,1,0};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueY;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolY);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolY);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolY);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolY);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolY);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolY);
         bool boolZ[3]= {0,0,1};
         for(int i=0; i<smoothImg->nt*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueZ;
         if(flag->smoothMeanFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolZ);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolZ);
         else if(flag->smoothSplineFlag)
-            reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolZ);
-        else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolZ);
+            reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolZ);
+        else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolZ);
         delete []kernelSize;
         delete []timePoint;
         if(flag->outputImageFlag)
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 68397be8..6814785d 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -187,7 +187,7 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim
 void Compute::SmoothGradient(float sigma) {
     if (sigma != 0) {
         sigma = fabs(sigma);
-        reg_tools_kernelConvolution(dynamic_cast<F3dContent&>(con).GetTransformationGradient(), &sigma, GAUSSIAN_KERNEL);
+        reg_tools_kernelConvolution(dynamic_cast<F3dContent&>(con).GetTransformationGradient(), &sigma, ConvKernelType::Gaussian);
     }
 }
 /* *************************************************************** */
@@ -231,7 +231,7 @@ void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
 /* *************************************************************** */
 void Compute::ConvolveImage(nifti_image *image) {
     const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
-    constexpr int kernelType = CUBIC_SPLINE_KERNEL;
+    constexpr ConvKernelType kernelType = ConvKernelType::Cubic;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
     bool activeAxis[3] = { 1, 0, 0 };
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index 9acc6446..8d4fdd52 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Kernel.h"
-#include "RNifti.h"
+#include "_reg_tools.h"
 
 class ConvolutionKernel: public Kernel {
 public:
@@ -10,5 +10,5 @@ class ConvolutionKernel: public Kernel {
     }
     ConvolutionKernel() : Kernel() {}
     virtual ~ConvolutionKernel() {}
-    virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0;
+    virtual void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0;
 };
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 37e3619c..f8445e3f 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -174,7 +174,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             for (int i = 1; i < this->referencePyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = this->referenceSigma;
-            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->referencePyramid[l], sigma.get(), 0, nullptr, active.get());
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->referencePyramid[l], sigma.get(), ConvKernelType::Mean, nullptr, active.get());
         }
         if (this->floatingSigma != 0) {
             // Only the first image is smoothed
@@ -184,7 +184,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             for (int i = 1; i < this->floatingPyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = this->floatingSigma;
-            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->floatingPyramid[l], sigma.get(), 0, nullptr, active.get());
+            convolutionKernel->castTo<ConvolutionKernel>()->Calculate(this->floatingPyramid[l], sigma.get(), ConvKernelType::Mean, nullptr, active.get());
         }
     }
 
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 80882617..903f3731 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -434,7 +434,7 @@ void reg_base<T>::Initialise() {
             for (int i = 1; i < referencePyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = referenceSmoothingSigma;
-            reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get());
+            reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), ConvKernelType::Gaussian, nullptr, active.get());
         }
         if (floatingSmoothingSigma != 0) {
             // Only the first image is smoothed
@@ -444,7 +444,7 @@ void reg_base<T>::Initialise() {
             for (int i = 1; i < floatingPyramid[l]->nt; ++i)
                 active[i] = false;
             sigma[0] = floatingSmoothingSigma;
-            reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get());
+            reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), ConvKernelType::Gaussian, nullptr, active.get());
         }
     }
 
@@ -623,7 +623,7 @@ void reg_base<T>::UseLNCC(int timepoint, float stddev) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLNCCKernelType(int type) {
+void reg_base<T>::SetLNCCKernelType(ConvKernelType type) {
     if (!measure_lncc)
         NR_FATAL_ERROR("The LNCC object has to be created first");
     measure_lncc->SetKernelType(type);
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index eb5d4d3d..01155ebe 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -169,7 +169,7 @@ class reg_base: public InterfaceOptimiser {
     virtual void UseKLDivergence(int);
     virtual void UseDTI(bool*);
     virtual void UseLNCC(int, float);
-    virtual void SetLNCCKernelType(int type);
+    virtual void SetLNCCKernelType(ConvKernelType type);
     virtual void SetLocalWeightSim(NiftiImage);
 
     virtual void SetNMIWeight(int, double);
diff --git a/reg-lib/cl/ClConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp
index 299cef9c..1fb8932a 100644
--- a/reg-lib/cl/ClConvolutionKernel.cpp
+++ b/reg-lib/cl/ClConvolutionKernel.cpp
@@ -2,7 +2,7 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
+void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask, bool *timePoints, bool *axis) {
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h
index 4d1b31d1..824578d5 100644
--- a/reg-lib/cl/ClConvolutionKernel.h
+++ b/reg-lib/cl/ClConvolutionKernel.h
@@ -7,5 +7,5 @@ class ClConvolutionKernel: public ConvolutionKernel {
 public:
     ClConvolutionKernel() : ConvolutionKernel() {}
     ~ClConvolutionKernel() {}
-    void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
+    void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
 };
diff --git a/reg-lib/cpu/CpuConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp
index f91b3133..57b78b48 100644
--- a/reg-lib/cpu/CpuConvolutionKernel.cpp
+++ b/reg-lib/cpu/CpuConvolutionKernel.cpp
@@ -2,7 +2,7 @@
 #include "_reg_globalTrans.h"
 
 /* *************************************************************** */
-void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) {
+void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask, bool *timePoints, bool *axis) {
     reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h
index 49e2b333..3e960308 100644
--- a/reg-lib/cpu/CpuConvolutionKernel.h
+++ b/reg-lib/cpu/CpuConvolutionKernel.h
@@ -6,5 +6,5 @@
 class CpuConvolutionKernel: public ConvolutionKernel {
 public:
     CpuConvolutionKernel() : ConvolutionKernel() {}
-    void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
+    void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
 };
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 51e4c82b..76145602 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -29,7 +29,7 @@ reg_lncc::reg_lncc(): reg_measure() {
     this->backwardMask = nullptr;
 
     // Gaussian kernel is used by default
-    this->kernelType = GAUSSIAN_KERNEL;
+    this->kernelType = ConvKernelType::Gaussian;
 
     for (int i = 0; i < 255; ++i)
         kernelStandardDeviation[i] = -5.f;
@@ -201,8 +201,8 @@ void UpdateLocalStatImages(const nifti_image *refImage,
                            const int *refMask,
                            int *combinedMask,
                            const float *kernelStandardDeviation,
-                           const int& kernelType,
-                           const int& currentTimepoint) {
+                           const ConvKernelType kernelType,
+                           const int currentTimepoint) {
     // Generate the combined mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
@@ -258,8 +258,8 @@ double reg_getLnccValue(const nifti_image *referenceImage,
                         const int *combinedMask,
                         const float *kernelStandardDeviation,
                         nifti_image *correlationImage,
-                        const int& kernelType,
-                        const int& currentTimepoint) {
+                        const ConvKernelType kernelType,
+                        const int currentTimepoint) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -318,8 +318,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  int *forwardMask,
                                  const float *kernelStandardDeviation,
                                  nifti_image *correlationImage,
-                                 const int& kernelType,
-                                 const int& referenceTimePoint,
+                                 const ConvKernelType kernelType,
+                                 const int referenceTimePoint,
                                  const double *timePointWeight) {
     double lncc = 0;
     for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) {
@@ -401,9 +401,9 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
                                    nifti_image *correlationImage,
                                    const nifti_image *warpedGradient,
                                    nifti_image *measureGradient,
-                                   const int& kernelType,
-                                   const int& currentTimepoint,
-                                   const double& timepointWeight) {
+                                   const ConvKernelType kernelType,
+                                   const int currentTimepoint,
+                                   const double timepointWeight) {
 #ifdef _WIN32
     long voxel;
     long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -529,9 +529,9 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             nifti_image *correlationImage,
                                             const nifti_image *warpedGradient,
                                             nifti_image *measureGradient,
-                                            const int& kernelType,
-                                            const int& currentTimepoint,
-                                            const double& timepointWeight) {
+                                            const ConvKernelType kernelType,
+                                            const int currentTimepoint,
+                                            const double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         // Compute the mean and variance of the reference and warped floating
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index b59b48fd..fea5e464 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -47,7 +47,7 @@ class reg_lncc: public reg_measure {
         this->kernelStandardDeviation[t] = stddev;
     }
     /// @brief Set the kernel type
-    virtual void SetKernelType(int t) {
+    virtual void SetKernelType(ConvKernelType t) {
         this->kernelType = t;
     }
 
@@ -67,6 +67,6 @@ class reg_lncc: public reg_measure {
     nifti_image *warpedSdevImageBw;
     int *backwardMask;
 
-    int kernelType;
+    ConvKernelType kernelType;
 };
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 3fa94f11..b620e9e6 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -100,7 +100,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
         ShiftImage<DataType>(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage);
         reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage);
-        reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
+        reg_tools_kernelConvolution(diffImage, &sigma, ConvKernelType::Gaussian, mask);
         reg_tools_addImageToImage(meanImage, diffImage, meanImage);
         // Store the current descriptor
         const size_t index = i * diffImage->nvox;
@@ -217,7 +217,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
         ShiftImage<DataType>(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]);
         reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage);
         reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage);
-        reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask);
+        reg_tools_kernelConvolution(diffImage, &sigma, ConvKernelType::Gaussian, mask);
 
         for (int j = 0; j < 2; j++) {
             ShiftImage<DataType>(diffImage, diffImageShifted, maskDiffImage, tx[compteurId], ty[compteurId], tz[compteurId]);
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index ae9d6e2a..a0255b23 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -334,9 +334,9 @@ template void reg_thresholdImage<double>(nifti_image*, double, double);
 /* *************************************************************** */
 template <class PrecisionType, class DataType>
 PrecisionType reg_getMaximalLength(const nifti_image *image,
-                                   const bool& optimiseX,
-                                   const bool& optimiseY,
-                                   const bool& optimiseZ) {
+                                   const bool optimiseX,
+                                   const bool optimiseY,
+                                   const bool optimiseZ) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const DataType *dataPtrX = static_cast<DataType*>(image->data);
     const DataType *dataPtrY = &dataPtrX[voxelNumber];
@@ -354,9 +354,9 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
 /* *************************************************************** */
 template <class PrecisionType>
 PrecisionType reg_getMaximalLength(const nifti_image *image,
-                                   const bool& optimiseX,
-                                   const bool& optimiseY,
-                                   const bool& optimiseZ) {
+                                   const bool optimiseX,
+                                   const bool optimiseY,
+                                   const bool optimiseZ) {
     switch (image->datatype) {
     case NIFTI_TYPE_FLOAT32:
         return reg_getMaximalLength<PrecisionType, float>(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false);
@@ -367,8 +367,8 @@ PrecisionType reg_getMaximalLength(const nifti_image *image,
     }
     return EXIT_SUCCESS;
 }
-template float reg_getMaximalLength<float>(const nifti_image*, const bool&, const bool&, const bool&);
-template double reg_getMaximalLength<double>(const nifti_image*, const bool&, const bool&, const bool&);
+template float reg_getMaximalLength<float>(const nifti_image*, const bool, const bool, const bool);
+template double reg_getMaximalLength<double>(const nifti_image*, const bool, const bool, const bool);
 /* *************************************************************** */
 template <class NewType, class DataType>
 void reg_tools_changeDatatype(nifti_image *image, int type) {
@@ -650,7 +650,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
 template <class Type>
 void reg_tools_operationValueToImage(const nifti_image *img,
                                      nifti_image *res,
-                                     const double& val,
+                                     const double val,
                                      const Operation& operation) {
     const Type *imgPtr = static_cast<Type*>(img->data);
     Type *resPtr = static_cast<Type*>(res->data);
@@ -678,7 +678,7 @@ void reg_tools_operationValueToImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *res,
-                               const double& val) {
+                               const double val) {
     if (img->datatype != res->datatype)
         NR_FATAL_ERROR("Input and output image are expected to be of the same type");
     if (img->nvox != res->nvox)
@@ -716,7 +716,7 @@ void reg_tools_addValueToImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *res,
-                                      const double& val) {
+                                      const double val) {
     if (img->datatype != res->datatype)
         NR_FATAL_ERROR("Input and output image are expected to be of the same type");
     if (img->nvox != res->nvox)
@@ -754,7 +754,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *res,
-                                    const double& val) {
+                                    const double val) {
     if (img->datatype != res->datatype)
         NR_FATAL_ERROR("Input and output image are expected to be of the same type");
     if (img->nvox != res->nvox)
@@ -792,7 +792,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
 /* *************************************************************** */
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *res,
-                                  const double& val) {
+                                  const double val) {
     if (img->datatype != res->datatype)
         NR_FATAL_ERROR("Input and output image are expected to be of the same type");
     if (img->nvox != res->nvox)
@@ -831,7 +831,7 @@ void reg_tools_divideValueToImage(const nifti_image *img,
 template <class DataType>
 void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
-                                 const int& kernelType,
+                                 const ConvKernelType kernelType,
                                  const int *mask,
                                  const bool *timePoints,
                                  const bool *axes) {
@@ -873,13 +873,13 @@ void reg_tools_kernelConvolution(nifti_image *image,
                     else temp = fabs(sigma[t]); // voxel-based if negative value
                     int radius = 0;
                     // Define the kernel size
-                    if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) {
+                    if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) {
                         // Mean or linear filtering
                         radius = static_cast<int>(temp);
-                    } else if (kernelType == GAUSSIAN_KERNEL) {
+                    } else if (kernelType == ConvKernelType::Gaussian) {
                         // Gaussian kernel
                         radius = static_cast<int>(temp * 3.0f);
-                    } else if (kernelType == CUBIC_SPLINE_KERNEL) {
+                    } else if (kernelType == ConvKernelType::Cubic) {
                         // Spline kernel
                         radius = static_cast<int>(temp * 2.0f);
                     } else {
@@ -890,7 +890,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         float kernel[4096];
                         double kernelSum = 0;
                         // Fill the kernel
-                        if (kernelType == CUBIC_SPLINE_KERNEL) {
+                        if (kernelType == ConvKernelType::Cubic) {
                             // Compute the Cubic Spline kernel
                             for (int i = -radius; i <= radius; i++) {
                                 // temp contains the kernel node spacing
@@ -902,7 +902,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                 else kernel[i + radius] = 0;
                                 kernelSum += kernel[i + radius];
                             }
-                        } else if (kernelType == GAUSSIAN_KERNEL) {
+                        } else if (kernelType == ConvKernelType::Gaussian) {
                             // Compute the Gaussian kernel
                             for (int i = -radius; i <= radius; i++) {
                                 // 2.506... = sqrt(2*pi)
@@ -910,13 +910,13 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                 kernel[radius + i] = static_cast<float>(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
                                 kernelSum += kernel[radius + i];
                             }
-                        } else if (kernelType == LINEAR_KERNEL) {
+                        } else if (kernelType == ConvKernelType::Linear) {
                             // Compute the linear kernel
                             for (int i = -radius; i <= radius; i++) {
                                 kernel[radius + i] = 1.f - fabs(i / static_cast<float>(radius));
                                 kernelSum += kernel[radius + i];
                             }
-                        } else if (kernelType == MEAN_KERNEL && imageDims[2] == 1) {
+                        } else if (kernelType == ConvKernelType::Mean && imageDims[2] == 1) {
                             // Compute the mean kernel
                             for (int i = -radius; i <= radius; i++) {
                                 kernel[radius + i] = 1.f;
@@ -925,7 +925,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         }
                         // No kernel is required for the mean filtering
                         // No need for kernel normalisation as this is handled by the density function
-                        NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
+                        NR_DEBUG("Convolution type[" << int(kernelType) << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
 
                         int planeNumber, planeIndex, lineOffset;
                         int lineIndex, shiftPre, shiftPst, k;
@@ -1305,7 +1305,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image,
 /* *************************************************************** */
 void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
-                                 const int& kernelType,
+                                 const ConvKernelType kernelType,
                                  const int *mask,
                                  const bool *timePoints,
                                  const bool *axes) {
@@ -1346,7 +1346,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
         /* the input image is first smooth */
         float *sigma = new float[image->nt];
         for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f;
-        reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL);
+        reg_tools_kernelConvolution(image, sigma, ConvKernelType::Gaussian);
         delete[] sigma;
     }
 
@@ -2556,7 +2556,7 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
     z = index;
 }
 /* *************************************************************** */
-nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) {
+nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
     nifti_image *newImage = nifti_copy_nim_info(&image);
     newImage->data = calloc(image.nvox, image.nbyper);
     if (copyData)
@@ -2564,7 +2564,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) {
     return newImage;
 }
 /* *************************************************************** */
-void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose) {
+void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose) {
 #ifdef NDEBUG
     if (!verbose) return;
 #endif
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 77d01e55..c014e6d1 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -32,12 +32,7 @@ using RNifti::NiftiImage;
 using RNifti::NiftiImageData;
 using NiftiDim = NiftiImage::Dim;
 
-typedef enum {
-    MEAN_KERNEL,
-    LINEAR_KERNEL,
-    GAUSSIAN_KERNEL,
-    CUBIC_SPLINE_KERNEL
-} NREG_CONV_KERNEL_TYPE;
+enum class ConvKernelType { Mean, Linear, Gaussian, Cubic };
 
 /* *************************************************************** */
 /** @brief This function check some header parameters and correct them in
@@ -94,7 +89,7 @@ void reg_getRealImageSpacing(nifti_image *image,
  */
 void reg_tools_kernelConvolution(nifti_image *image,
                                  const float *sigma,
-                                 const int& kernelType,
+                                 const ConvKernelType kernelType,
                                  const int *mask = nullptr,
                                  const bool *timePoints = nullptr,
                                  const bool *axes = nullptr);
@@ -136,9 +131,9 @@ void reg_downsampleImage(nifti_image *image,
  */
 template <class PrecisionType>
 PrecisionType reg_getMaximalLength(const nifti_image *image,
-                                   const bool& optimiseX,
-                                   const bool& optimiseY,
-                                   const bool& optimiseZ);
+                                   const bool optimiseX,
+                                   const bool optimiseY,
+                                   const bool optimiseZ);
 /* *************************************************************** */
 /** @brief Change the datatype of a nifti image
  * @param image Image to be updated.
@@ -194,7 +189,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1,
  */
 void reg_tools_addValueToImage(const nifti_image *img,
                                nifti_image *out,
-                               const double& val);
+                               const double val);
 /* *************************************************************** */
 /** @brief Subtract a scalar from all image intensity
  * @param img Input image
@@ -203,7 +198,7 @@ void reg_tools_addValueToImage(const nifti_image *img,
  */
 void reg_tools_subtractValueFromImage(const nifti_image *img,
                                       nifti_image *out,
-                                      const double& val);
+                                      const double val);
 /* *************************************************************** */
 /** @brief Multiply a scalar to all image intensity
  * @param img Input image
@@ -212,7 +207,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img,
  */
 void reg_tools_multiplyValueToImage(const nifti_image *img,
                                     nifti_image *out,
-                                    const double& val);
+                                    const double val);
 /* *************************************************************** */
 /** @brief Divide a scalar to all image intensity
  * @param img Input image
@@ -221,7 +216,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img,
  */
 void reg_tools_divideValueToImage(const nifti_image *img,
                                   nifti_image *out,
-                                  const double& val);
+                                  const double val);
 /* *************************************************************** */
 /** @brief Binarise an input image. All values different
  * from 0 are set to 1, 0 otherwise.
@@ -428,8 +423,8 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
  * @param copyData Boolean to specify if the image data should be copied
  * @return The duplicated image
  */
-nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true);
+nifti_image* nifti_dup(const nifti_image& image, const bool copyData = true);
 /* *************************************************************** */
 /// @brief Prints the command line
-void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose);
+void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index 53e54d04..dc573e42 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -145,7 +145,7 @@ void CudaCompute::SmoothGradient(float sigma) {
     if (sigma == 0) return;
     sigma = fabs(sigma);
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, GAUSSIAN_KERNEL);
+    Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, ConvKernelType::Gaussian);
 }
 /* *************************************************************** */
 void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
@@ -165,7 +165,7 @@ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
 /* *************************************************************** */
 void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
     const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
-    constexpr int kernelType = CUBIC_SPLINE_KERNEL;
+    constexpr ConvKernelType kernelType = ConvKernelType::Cubic;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
     bool activeAxis[3] = { 1, 0, 0 };
diff --git a/reg-lib/cuda/CudaConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp
index 60d7b9cd..9e0882a6 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.cpp
+++ b/reg-lib/cuda/CudaConvolutionKernel.cpp
@@ -4,7 +4,7 @@
 /* *************************************************************** */
 void CudaConvolutionKernel::Calculate(nifti_image *image,
                                       float *sigma,
-                                      int kernelType,
+                                      ConvKernelType kernelType,
                                       int *mask,
                                       bool *timePoint,
                                       bool *axis) {
diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h
index 832ec853..f0d9ca74 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.h
+++ b/reg-lib/cuda/CudaConvolutionKernel.h
@@ -9,7 +9,7 @@ class CudaConvolutionKernel: public ConvolutionKernel {
     CudaConvolutionKernel() : ConvolutionKernel() {}
     void Calculate(nifti_image *image,
                    float *sigma,
-                   int kernelType,
+                   ConvKernelType kernelType,
                    int *mask = nullptr,
                    bool *timePoints = nullptr,
                    bool *axis = nullptr);
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index 7f446c53..2f8ddcaf 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -4,7 +4,7 @@
 void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *sigma,
-                                       const int kernelType,
+                                       const ConvKernelType kernelType,
                                        const bool *timePoints,
                                        const bool *axes) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
@@ -56,13 +56,13 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
             else temp = fabs(sigma[t]); // voxel-based if negative value
             int radius = 0;
             // Define the kernel size
-            if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) {
+            if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) {
                 // Mean or linear filtering
                 radius = static_cast<int>(temp);
-            } else if (kernelType == GAUSSIAN_KERNEL) {
+            } else if (kernelType == ConvKernelType::Gaussian) {
                 // Gaussian kernel
                 radius = static_cast<int>(temp * 3.0);
-            } else if (kernelType == CUBIC_SPLINE_KERNEL) {
+            } else if (kernelType == ConvKernelType::Cubic) {
                 // Spline kernel
                 radius = static_cast<int>(temp * 2.0);
             } else {
@@ -74,7 +74,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
             vector<float> kernel(2 * radius + 1);
             double kernelSum = 0;
             // Fill the kernel
-            if (kernelType == CUBIC_SPLINE_KERNEL) {
+            if (kernelType == ConvKernelType::Cubic) {
                 // Compute the Cubic Spline kernel
                 for (int i = -radius; i <= radius; i++) {
                     // temp contains the kernel node spacing
@@ -86,7 +86,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                     else kernel[i + radius] = 0;
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == GAUSSIAN_KERNEL) {
+            } else if (kernelType == ConvKernelType::Gaussian) {
                 // Compute the Gaussian kernel
                 for (int i = -radius; i <= radius; i++) {
                     // 2.506... = sqrt(2*pi)
@@ -94,13 +94,13 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                     kernel[i + radius] = static_cast<float>(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == LINEAR_KERNEL) {
+            } else if (kernelType == ConvKernelType::Linear) {
                 // Compute the linear kernel
                 for (int i = -radius; i <= radius; i++) {
                     kernel[i + radius] = 1.f - fabs(i / static_cast<float>(radius));
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == MEAN_KERNEL && imageDims.z == 1) {
+            } else if (kernelType == ConvKernelType::Mean && imageDims.z == 1) {
                 // Compute the mean kernel
                 for (int i = -radius; i <= radius; i++) {
                     kernel[i + radius] = 1.f;
@@ -109,7 +109,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
             }
             // No kernel is required for the mean filtering
             // No need for kernel normalisation as this is handled by the density function
-            NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
+            NR_DEBUG("Convolution type[" << int(kernelType) << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]");
 
             int planeCount, lineOffset;
             switch (n) {
diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp
index 7d74c944..de1a3c0c 100644
--- a/reg-lib/cuda/CudaKernelConvolution.hpp
+++ b/reg-lib/cuda/CudaKernelConvolution.hpp
@@ -19,7 +19,7 @@ namespace NiftyReg::Cuda {
 void KernelConvolution(const nifti_image *image,
                        float4 *imageCuda,
                        const float *sigma,
-                       const int kernelType,
+                       const ConvKernelType kernelType,
                        const bool *timePoints = nullptr,
                        const bool *axes = nullptr);
 /* *************************************************************** */
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 39841b80..1ad4bd2c 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -121,7 +121,7 @@ class NmiTest {
                 *jhPtr++ = jh[i][j] / ref.nVoxels();
         // Apply a convolution to mimic the parzen windowing
         float sigma[1] = { 1.f };
-        reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL);
+        reg_tools_kernelConvolution(jointHistogram, sigma, ConvKernelType::Cubic);
         // Restore the jh array
         jhPtr = static_cast<double*>(jointHistogram->data);
         for (unsigned i = 0; i < 68; ++i)
diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp
index c4fe1bd8..034a9fd4 100644
--- a/reg-test/reg_test_regr_kernelConvolution.cpp
+++ b/reg-test/reg_test_regr_kernelConvolution.cpp
@@ -126,8 +126,8 @@ class KernelConvolutionTest {
             contentCuda->SetDeformationField(imageCuda.disown());
 
             // Compute the kernel convolution for CPU and CUDA
-            reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), kernelType, nullptr, activeTimePoints, activeAxes);
-            Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), kernelType, activeTimePoints, activeAxes);
+            reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), ConvKernelType(kernelType), nullptr, activeTimePoints, activeAxes);
+            Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), ConvKernelType(kernelType), activeTimePoints, activeAxes);
 
             // Get the images
             imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image);

From 230c6b936842037a4155e5bf44c87e8968b48e72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Oct 2023 12:51:31 +0100
Subject: [PATCH 220/314] Fix a bug in the kernel convolution regression test

---
 niftyreg_build_version.txt                   | 2 +-
 reg-test/CMakeLists.txt                      | 2 +-
 reg-test/reg_test_regr_kernelConvolution.cpp | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 87537f49..1ce6b02d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-338
+339
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index c86af8a0..e999620b 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -110,6 +110,7 @@ include(Catch)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
 set(EXEC_LIST reg_test_affineDeformationField)
+set(EXEC_LIST reg_test_be ${EXEC_LIST})
 set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
@@ -118,7 +119,6 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})
 set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi ${EXEC_LIST})
-set(EXEC_LIST reg_test_be ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp
index 034a9fd4..342ca9ee 100644
--- a/reg-test/reg_test_regr_kernelConvolution.cpp
+++ b/reg-test/reg_test_regr_kernelConvolution.cpp
@@ -79,9 +79,8 @@ class KernelConvolutionTest {
                                      { true, true, true } };
 
         // Add the time points and axes to the latest test data
-        auto latestTestData = testData.end() - timePointCount;
-        for (int i = 0; i < timePointCount; i++) {
-            auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = latestTestData[i];
+        for (int i = 0, latestIndex = int(testData.size()) - timePointCount; i < timePointCount; i++, latestIndex++) {
+            auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = testData[latestIndex];
             const std::string timePointsStr = std::accumulate(timePoints[i], timePoints[i] + 4, ""s, strConcat);
             const std::string axesStr = std::accumulate(axes[i], axes[i] + 3, ""s, strConcat);
             testData.emplace_back(TestData(

From e04dacd52f0a40438cd0228dd77f51c3edf401de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 11 Oct 2023 14:21:34 +0100
Subject: [PATCH 221/314] Optimise Cuda::KernelConvolution()

---
 niftyreg_build_version.txt                   |   2 +-
 reg-lib/cuda/CudaCompute.cpp                 |  35 +++---
 reg-lib/cuda/CudaKernelConvolution.cu        | 109 +++++++++++--------
 reg-lib/cuda/CudaKernelConvolution.hpp       |   2 +-
 reg-test/reg_test_regr_kernelConvolution.cpp |  10 +-
 5 files changed, 91 insertions(+), 67 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1ce6b02d..51272bac 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-339
+340
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp
index dc573e42..928faa87 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cpp
@@ -145,7 +145,7 @@ void CudaCompute::SmoothGradient(float sigma) {
     if (sigma == 0) return;
     sigma = fabs(sigma);
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, ConvKernelType::Gaussian);
+    Cuda::KernelConvolution<ConvKernelType::Gaussian>(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma);
 }
 /* *************************************************************** */
 void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
@@ -169,33 +169,30 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
     bool activeAxis[3] = { 1, 0, 0 };
-    Cuda::KernelConvolution(image,
-                            imageCuda,
-                            currentNodeSpacing,
-                            kernelType,
-                            nullptr, // all volumes are considered as active
-                            activeAxis);
+    Cuda::KernelConvolution<kernelType>(image,
+                                        imageCuda,
+                                        currentNodeSpacing,
+                                        nullptr, // all volumes are considered as active
+                                        activeAxis);
     // Convolution along the y axis
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy;
     activeAxis[0] = 0;
     activeAxis[1] = 1;
-    Cuda::KernelConvolution(image,
-                            imageCuda,
-                            currentNodeSpacing,
-                            kernelType,
-                            nullptr, // all volumes are considered as active
-                            activeAxis);
+    Cuda::KernelConvolution<kernelType>(image,
+                                        imageCuda,
+                                        currentNodeSpacing,
+                                        nullptr, // all volumes are considered as active
+                                        activeAxis);
     // Convolution along the z axis if required
     if (image->nz > 1) {
         currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz;
         activeAxis[1] = 0;
         activeAxis[2] = 1;
-        Cuda::KernelConvolution(image,
-                                imageCuda,
-                                currentNodeSpacing,
-                                kernelType,
-                                nullptr, // all volumes are considered as active
-                                activeAxis);
+        Cuda::KernelConvolution<kernelType>(image,
+                                            imageCuda,
+                                            currentNodeSpacing,
+                                            nullptr, // all volumes are considered as active
+                                            activeAxis);
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index 2f8ddcaf..a9b9ece2 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -1,10 +1,10 @@
 #include "CudaKernelConvolution.hpp"
 
 /* *************************************************************** */
+template<ConvKernelType kernelType>
 void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *sigma,
-                                       const ConvKernelType kernelType,
                                        const bool *timePoints,
                                        const bool *axes) {
     if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048)
@@ -35,16 +35,27 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
     float *bufferIntensityCudaPtr = bufferIntensityCuda.data().get();
     float *bufferDensityCudaPtr = bufferDensityCuda.data().get();
 
+    // Create texture objects
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                     voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 1);
+    auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, cudaResourceTypeLinear,
+                                                       voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
+    auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, cudaResourceTypeLinear,
+                                                        voxelNumber * sizeof(bool), cudaChannelFormatKindUnsigned, 1);
+    auto imageTexture = *imageTexturePtr;
+    auto densityTexture = *densityTexturePtr;
+    auto nanImageTexture = *nanImageTexturePtr;
+
     for (int t = 0; t < activeTimePointCount; t++) {
         if (!activeTimePoints[t]) continue;
 
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            float& intensityVal = reinterpret_cast<float*>(&imageCuda[index])[t];
+            const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
             float& densityVal = densityCudaPtr[index];
             bool& nanImageVal = nanImageCudaPtr[index];
             densityVal = intensityVal == intensityVal ? 1.f : 0;
             nanImageVal = !static_cast<bool>(densityVal);
-            if (nanImageVal) intensityVal = 0;
+            if (nanImageVal) reinterpret_cast<float*>(&imageCuda[index])[t] = 0;
         });
 
         // Loop over the x, y and z dimensions
@@ -56,25 +67,20 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
             else temp = fabs(sigma[t]); // voxel-based if negative value
             int radius = 0;
             // Define the kernel size
-            if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) {
-                // Mean or linear filtering
+            if constexpr (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear)
                 radius = static_cast<int>(temp);
-            } else if (kernelType == ConvKernelType::Gaussian) {
-                // Gaussian kernel
+            else if constexpr (kernelType == ConvKernelType::Gaussian)
                 radius = static_cast<int>(temp * 3.0);
-            } else if (kernelType == ConvKernelType::Cubic) {
-                // Spline kernel
+            else if constexpr (kernelType == ConvKernelType::Cubic)
                 radius = static_cast<int>(temp * 2.0);
-            } else {
-                NR_FATAL_ERROR("Unknown kernel type");
-            }
+            else NR_FATAL_ERROR("Unknown kernel type");
             if (radius <= 0) continue;
 
             // Allocate the kernel
             vector<float> kernel(2 * radius + 1);
             double kernelSum = 0;
             // Fill the kernel
-            if (kernelType == ConvKernelType::Cubic) {
+            if constexpr (kernelType == ConvKernelType::Cubic) {
                 // Compute the Cubic Spline kernel
                 for (int i = -radius; i <= radius; i++) {
                     // temp contains the kernel node spacing
@@ -86,7 +92,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                     else kernel[i + radius] = 0;
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == ConvKernelType::Gaussian) {
+            } else if constexpr (kernelType == ConvKernelType::Gaussian) {
                 // Compute the Gaussian kernel
                 for (int i = -radius; i <= radius; i++) {
                     // 2.506... = sqrt(2*pi)
@@ -94,17 +100,19 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                     kernel[i + radius] = static_cast<float>(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631));
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == ConvKernelType::Linear) {
+            } else if constexpr (kernelType == ConvKernelType::Linear) {
                 // Compute the linear kernel
                 for (int i = -radius; i <= radius; i++) {
                     kernel[i + radius] = 1.f - fabs(i / static_cast<float>(radius));
                     kernelSum += kernel[i + radius];
                 }
-            } else if (kernelType == ConvKernelType::Mean && imageDims.z == 1) {
-                // Compute the mean kernel
-                for (int i = -radius; i <= radius; i++) {
-                    kernel[i + radius] = 1.f;
-                    kernelSum += kernel[i + radius];
+            } else if constexpr (kernelType == ConvKernelType::Mean) {
+                if (imageDims.z == 1) {
+                    // Compute the mean kernel
+                    for (int i = -radius; i <= radius; i++) {
+                        kernel[i + radius] = 1.f;
+                        kernelSum += kernel[i + radius];
+                    }
                 }
             }
             // No kernel is required for the mean filtering
@@ -127,9 +135,17 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                 break;
             }
 
-            thrust::device_vector<float> kernelCuda(kernel.begin(), kernel.end());
-            float *kernelCudaPtr = kernelCuda.data().get();
             const int imageDim = reinterpret_cast<const int*>(&imageDims)[n];
+            // Create the kernel texture
+            thrust::device_vector<float> kernelCuda;
+            Cuda::UniqueTextureObjectPtr kernelTexturePtr(nullptr, nullptr);
+            cudaTextureObject_t kernelTexture = 0;
+            if (kernelSum > 0) {
+                kernelCuda = kernel;
+                kernelTexturePtr = std::move(Cuda::CreateTextureObject(kernelCuda.data().get(), cudaResourceTypeLinear,
+                                                                       kernel.size() * sizeof(float), cudaChannelFormatKindFloat, 1));
+                kernelTexture = *kernelTexturePtr;
+            }
 
             // Loop over the different voxel
             thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), planeCount, [=]__device__(const int planeIndex) {
@@ -146,49 +162,45 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                     break;
                 }
                 // Fetch the current line into a stack buffer
-                float *bufferIntensityPtr = &bufferIntensityCudaPtr[planeIndex * imageDim];
-                float *bufferDensityPtr = &bufferDensityCudaPtr[planeIndex * imageDim];
-                float4 *currentIntensityPtr = &imageCuda[realIndex];
-                float *currentDensityPtr = &densityCudaPtr[realIndex];
-                for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) {
-                    bufferIntensityPtr[lineIndex] = reinterpret_cast<float*>(currentIntensityPtr)[t];
-                    bufferDensityPtr[lineIndex] = *currentDensityPtr;
-                    currentIntensityPtr += lineOffset;
-                    currentDensityPtr += lineOffset;
+                const auto bufferIndex = planeIndex * imageDim;
+                float *bufferIntensityPtr = &bufferIntensityCudaPtr[bufferIndex];
+                float *bufferDensityPtr = &bufferDensityCudaPtr[bufferIndex];
+                for (int lineIndex = 0, index = realIndex; lineIndex < imageDim; lineIndex++, index += lineOffset) {
+                    bufferIntensityPtr[lineIndex] = tex1Dfetch<float>(imageTexture, index * 4 + t);
+                    bufferDensityPtr[lineIndex] = tex1Dfetch<float>(densityTexture, index);
                 }
                 if (kernelSum > 0) {
                     // Perform the kernel convolution along 1 line
-                    for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) {
+                    for (int lineIndex = 0; lineIndex < imageDim; lineIndex++, realIndex += lineOffset) {
                         // Define the kernel boundaries
                         int shiftPre = lineIndex - radius;
                         int shiftPst = lineIndex + radius + 1;
-                        float *kernelPtr;
+                        int kernelIndex = 0;
                         if (shiftPre < 0) {
-                            kernelPtr = &kernelCudaPtr[-shiftPre];
+                            kernelIndex = -shiftPre;
                             shiftPre = 0;
-                        } else kernelPtr = kernelCudaPtr;
+                        }
                         if (shiftPst > imageDim) shiftPst = imageDim;
                         // Set the current values to zero
                         // Increment the current value by performing the weighted sum
                         double intensitySum = 0, densitySum = 0;
-                        for (int k = shiftPre; k < shiftPst; ++k) {
-                            float& kernelValue = *kernelPtr++;
+                        for (int k = shiftPre; k < shiftPst; k++, kernelIndex++) {
+                            const float& kernelValue = tex1Dfetch<float>(kernelTexture, kernelIndex);
                             intensitySum += kernelValue * bufferIntensityPtr[k];
                             densitySum += kernelValue * bufferDensityPtr[k];
                         }
                         // Store the computed value in place
                         reinterpret_cast<float*>(&imageCuda[realIndex])[t] = static_cast<float>(intensitySum);
                         densityCudaPtr[realIndex] = static_cast<float>(densitySum);
-                        realIndex += lineOffset;
                     } // line convolution
                 } else { // kernelSum <= 0
-                    for (int lineIndex = 1; lineIndex < imageDim; ++lineIndex) {
+                    for (int lineIndex = 1; lineIndex < imageDim; lineIndex++) {
                         bufferIntensityPtr[lineIndex] += bufferIntensityPtr[lineIndex - 1];
                         bufferDensityPtr[lineIndex] += bufferDensityPtr[lineIndex - 1];
                     }
                     int shiftPre = -radius - 1;
                     int shiftPst = radius;
-                    for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex, ++shiftPre, ++shiftPst) {
+                    for (int lineIndex = 0; lineIndex < imageDim; lineIndex++, shiftPre++, shiftPst++, realIndex += lineOffset) {
                         float bufferIntensityCur, bufferDensityCur;
                         if (shiftPre > -1) {
                             if (shiftPst < imageDim) {
@@ -209,7 +221,6 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                         }
                         reinterpret_cast<float*>(&imageCuda[realIndex])[t] = bufferIntensityCur;
                         densityCudaPtr[realIndex] = bufferDensityCur;
-                        realIndex += lineOffset;
                     } // line convolution of mean filter
                 } // No kernel computation
             }); // pixel in starting plane
@@ -217,11 +228,19 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
 
         // Normalise per time point
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            float& intensityVal = reinterpret_cast<float*>(&imageCuda[index])[t];
-            const float& densityVal = densityCudaPtr[index];
-            const bool& nanImageVal = nanImageCudaPtr[index];
-            intensityVal = nanImageVal ? std::numeric_limits<float>::quiet_NaN() : intensityVal / densityVal;
+            const bool& nanImageVal = tex1Dfetch<char>(nanImageTexture, index);
+            if (nanImageVal) {
+                reinterpret_cast<float*>(&imageCuda[index])[t] = std::numeric_limits<float>::quiet_NaN();
+            } else {
+                const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
+                const float& densityVal = tex1Dfetch<float>(densityTexture, index);
+                reinterpret_cast<float*>(&imageCuda[index])[t] = intensityVal / densityVal;
+            }
         });
     } // check if the time point is active
 }
+template void NiftyReg::Cuda::KernelConvolution<ConvKernelType::Mean>(const nifti_image*, float4*, const float*, const bool*, const bool*);
+template void NiftyReg::Cuda::KernelConvolution<ConvKernelType::Linear>(const nifti_image*, float4*, const float*, const bool*, const bool*);
+template void NiftyReg::Cuda::KernelConvolution<ConvKernelType::Gaussian>(const nifti_image*, float4*, const float*, const bool*, const bool*);
+template void NiftyReg::Cuda::KernelConvolution<ConvKernelType::Cubic>(const nifti_image*, float4*, const float*, const bool*, const bool*);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp
index de1a3c0c..a4b703b0 100644
--- a/reg-lib/cuda/CudaKernelConvolution.hpp
+++ b/reg-lib/cuda/CudaKernelConvolution.hpp
@@ -16,10 +16,10 @@ namespace NiftyReg::Cuda {
  * @param axes Boolean array to specify which axes have to be
  * smoothed. The array follow the dim array of the nifti header.
  */
+template<ConvKernelType kernelType>
 void KernelConvolution(const nifti_image *image,
                        float4 *imageCuda,
                        const float *sigma,
-                       const ConvKernelType kernelType,
                        const bool *timePoints = nullptr,
                        const bool *axes = nullptr);
 /* *************************************************************** */
diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp
index 342ca9ee..a65e4879 100644
--- a/reg-test/reg_test_regr_kernelConvolution.cpp
+++ b/reg-test/reg_test_regr_kernelConvolution.cpp
@@ -124,9 +124,17 @@ class KernelConvolutionTest {
             contentCpu->SetDeformationField(imageCpu.disown());
             contentCuda->SetDeformationField(imageCuda.disown());
 
+            // Create the kernel convolution function for CUDA
+            auto cudaKernelConvolution = Cuda::KernelConvolution<ConvKernelType(0)>;
+            switch (kernelType) {
+            case 1: cudaKernelConvolution = Cuda::KernelConvolution<ConvKernelType(1)>; break;
+            case 2: cudaKernelConvolution = Cuda::KernelConvolution<ConvKernelType(2)>; break;
+            case 3: cudaKernelConvolution = Cuda::KernelConvolution<ConvKernelType(3)>; break;
+            }
+
             // Compute the kernel convolution for CPU and CUDA
             reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), ConvKernelType(kernelType), nullptr, activeTimePoints, activeAxes);
-            Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), ConvKernelType(kernelType), activeTimePoints, activeAxes);
+            cudaKernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), activeTimePoints, activeAxes);
 
             // Get the images
             imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image);

From b6d5097272627f18537fc1be78f75ba59766c793 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 12 Oct 2023 11:00:35 +0100
Subject: [PATCH 222/314] Optimise reg_getMinMaxValue_gpu()

---
 niftyreg_build_version.txt     |  2 +-
 reg-lib/cpu/_reg_tools.cpp     |  6 +--
 reg-lib/cuda/_reg_tools_gpu.cu | 80 ++++++++++++++++++++--------------
 3 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 51272bac..947e93bc 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-340
+341
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index a0255b23..93a0a76c 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1872,18 +1872,18 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) {
 }
 /* *************************************************************** */
 template <class DataType>
-DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) {
+DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool isMin = true) {
     if (timepoint < -1 || timepoint >= image->nt)
         NR_FATAL_ERROR("The required time point does not exist");
 
     const DataType *imgPtr = static_cast<DataType*>(image->data);
-    DataType retValue = calcMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::lowest();
+    DataType retValue = isMin ? std::numeric_limits<DataType>::max() : std::numeric_limits<DataType>::lowest();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope;
 
     // The min/max function
     const DataType& (*minMax)(const DataType&, const DataType&);
-    if (calcMin) minMax = std::min<DataType>;
+    if (isMin) minMax = std::min<DataType>;
     else minMax = std::max<DataType>;
 
     for (int time = 0; time < image->nt; ++time) {
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index e41d9815..7e39c3ec 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -326,56 +326,72 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4
     reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides<float4>());
 }
 /* *************************************************************** */
-DEVICE static float Min(const float& lhs, const float& rhs) {
-    return lhs < rhs ? lhs : rhs;
+template<bool isMin>
+DEVICE static inline float MinMax(const float& lhs, const float& rhs) {
+    if constexpr (isMin) return lhs < rhs ? lhs : rhs;
+    else return lhs > rhs ? lhs : rhs;
 }
-DEVICE static float Max(const float& lhs, const float& rhs) {
-    return lhs > rhs ? lhs : rhs;
-}
-using MinMaxFunc = decltype(&Min);
-__device__ static MinMaxFunc minCuda = Min;
-__device__ static MinMaxFunc maxCuda = Max;
 /* *************************************************************** */
-float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint, const bool calcMin) {
-    if (timePoint < -1 || timePoint >= img->nt)
-        NR_FATAL_ERROR("The required time point does not exist");
-
+template<bool isMin, bool isSingleTimePoint, int timePoints>
+inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-    const int timePoints = std::clamp(timePoint > -1 ? timePoint : int(NiftiImage::calcVoxelNumber(img, 7) / voxelNumber), 1, 4);
-    const float initValue = calcMin ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
-    float4 result{ initValue, initValue, initValue, initValue };
-
-    // Set the min/max functions
-    MinMaxFunc minMaxCuda, minMax = calcMin ? Min : Max;
-    cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc));
+    constexpr float initVal = isMin ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
 
-    result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue),
-                            [=]DEVICE(const float4& lhs, const float4& rhs) {
-        float4 result{ initValue, initValue, initValue, initValue };
+    const float4 result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initVal, initVal, initVal, initVal),
+                                         [=]DEVICE(const float4& lhs, const float4& rhs) {
+        float4 result{ initVal, initVal, initVal, initVal };
         switch (timePoints) {
         case 4:
-            result.w = minMaxCuda(lhs.w, rhs.w);
-            if (timePoint > -1) break;
+            result.w = MinMax<isMin>(lhs.w, rhs.w);
+            if constexpr (isSingleTimePoint) break;
         case 3:
-            result.z = minMaxCuda(lhs.z, rhs.z);
-            if (timePoint > -1) break;
+            result.z = MinMax<isMin>(lhs.z, rhs.z);
+            if constexpr (isSingleTimePoint) break;
         case 2:
-            result.y = minMaxCuda(lhs.y, rhs.y);
-            if (timePoint > -1) break;
+            result.y = MinMax<isMin>(lhs.y, rhs.y);
+            if constexpr (isSingleTimePoint) break;
         case 1:
-            result.x = minMaxCuda(lhs.x, rhs.x);
+            result.x = MinMax<isMin>(lhs.x, rhs.x);
         }
         return result;
     });
 
-    return minMax(minMax(result.x, result.y), minMax(result.z, result.w));
+    return MinMax<isMin>(MinMax<isMin>(result.x, result.y), MinMax<isMin>(result.z, result.w));
+}
+/* *************************************************************** */
+template<bool isMin, bool isSingleTimePoint>
+inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoints) {
+    auto getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 1>;
+    switch (timePoints) {
+    case 2:
+        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 2>;
+        break;
+    case 3:
+        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 3>;
+        break;
+    case 4:
+        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 4>;
+        break;
+    }
+    return getMinMaxValue(img, imgCuda);
+}
+/* *************************************************************** */
+template<bool isMin>
+inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+    if (timePoint < -1 || timePoint >= img->nt)
+        NR_FATAL_ERROR("The required time point does not exist");
+    const bool isSingleTimePoint = timePoint > -1;
+    const int timePoints = std::clamp(isSingleTimePoint ? timePoint + 1 : img->nt * img->nu, 1, 4);
+    auto getMinMaxValue = reg_getMinMaxValue_gpu<isMin, false>;
+    if (isSingleTimePoint) getMinMaxValue = reg_getMinMaxValue_gpu<isMin, true>;
+    return getMinMaxValue(img, imgCuda, timePoints);
 }
 /* *************************************************************** */
 float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
-    return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, true);
+    return reg_getMinMaxValue_gpu<true>(img, imgCuda, timePoint);
 }
 /* *************************************************************** */
 float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
-    return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, false);
+    return reg_getMinMaxValue_gpu<false>(img, imgCuda, timePoint);
 }
 /* *************************************************************** */

From a8f12326319ad2bc554b95bf21771d0107b805d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 12 Oct 2023 14:38:31 +0100
Subject: [PATCH 223/314] Update compose deformation field test to include CUDA

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/Compute.cpp                           |   4 +
 reg-lib/Compute.h                             |   1 +
 reg-lib/cpu/_reg_localTrans.cpp               | 104 +++++-------
 reg-lib/cpu/_reg_localTrans.h                 |   6 +-
 reg-lib/cpu/_reg_splineBasis.cpp              |  40 ++---
 reg-lib/cpu/_reg_splineBasis.h                |  32 ++--
 reg-lib/cuda/CMakeLists.txt                   |   2 +-
 .../cuda/{CudaCompute.cpp => CudaCompute.cu}  |   8 +
 reg-lib/cuda/CudaCompute.h                    |   1 +
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |   7 +-
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   3 +-
 reg-test/reg_test_composeField.cpp            | 153 ++++++++++--------
 reg-test/reg_test_getDeformationField.cpp     | 139 ++++++++--------
 14 files changed, 251 insertions(+), 251 deletions(-)
 rename reg-lib/cuda/{CudaCompute.cpp => CudaCompute.cu} (96%)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 947e93bc..c9693eb7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-341
+342
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 6814785d..42fa2ed1 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -409,3 +409,7 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) {
     nifti_image_free(warpedTransBw);
 }
 /* *************************************************************** */
+void Compute::DefFieldCompose(const nifti_image *defField) {
+    reg_defField_compose(defField, con.GetDeformationField(), nullptr);
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 821103d3..a810ceaf 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -38,6 +38,7 @@ class Compute {
 #ifdef NR_TESTING
 public:
 #endif
+    virtual void DefFieldCompose(const nifti_image *defField);
     virtual void VoxelCentricToNodeCentric(float weight);
 
 private:
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 41d8a6f5..685ab580 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -2258,10 +2258,10 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_defField_compose2D(nifti_image *deformationField,
+void reg_defField_compose2D(const nifti_image *deformationField,
                             nifti_image *dfToUpdate,
-                            int *mask) {
-    const size_t DFVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
+                            const int *mask) {
+    const size_t dfVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
 #ifdef _WIN32
     long i;
     const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 2);
@@ -2269,14 +2269,14 @@ void reg_defField_compose2D(nifti_image *deformationField,
     size_t i;
     const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 2);
 #endif
-    DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    DataType *defPtrY = &defPtrX[DFVoxelNumber];
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[dfVoxelNumber];
 
     DataType *resPtrX = static_cast<DataType*>(dfToUpdate->data);
     DataType *resPtrY = &resPtrX[warVoxelNumber];
 
     const mat44 *df_real2Voxel;
-    mat44 *df_voxel2Real;
+    const mat44 *df_voxel2Real;
     if (deformationField->sform_code > 0) {
         df_real2Voxel = &dfToUpdate->sto_ijk;
         df_voxel2Real = &deformationField->sto_xyz;
@@ -2302,12 +2302,14 @@ void reg_defField_compose2D(nifti_image *deformationField,
             realDefY = resPtrY[i];
 
             // Conversion from real to voxel in the deformation field
-            voxelX = realDefX * df_real2Voxel->m[0][0]
-                + realDefY * df_real2Voxel->m[0][1]
-                + df_real2Voxel->m[0][3];
-            voxelY = realDefX * df_real2Voxel->m[1][0]
-                + realDefY * df_real2Voxel->m[1][1]
-                + df_real2Voxel->m[1][3];
+            voxelX =
+                realDefX * df_real2Voxel->m[0][0] +
+                realDefY * df_real2Voxel->m[0][1] +
+                df_real2Voxel->m[0][3];
+            voxelY =
+                realDefX * df_real2Voxel->m[1][0] +
+                realDefY * df_real2Voxel->m[1][1] +
+                df_real2Voxel->m[1][3];
 
             // Linear interpolation to compute the new deformation
             pre[0] = Floor(voxelX);
@@ -2316,12 +2318,12 @@ void reg_defField_compose2D(nifti_image *deformationField,
             relX[0] = 1.f - relX[1];
             relY[1] = voxelY - static_cast<DataType>(pre[1]);
             relY[0] = 1.f - relY[1];
-            realDefX = realDefY = 0.f;
+            realDefX = realDefY = 0;
             for (b = 0; b < 2; ++b) {
                 for (a = 0; a < 2; ++a) {
                     basis = relX[a] * relY[b];
-                    if (pre[0] + a > -1 && pre[0] + a<deformationField->nx &&
-                        pre[1] + b>-1 && pre[1] + b < deformationField->ny) {
+                    if (pre[0] + a > -1 && pre[0] + a < deformationField->nx &&
+                        pre[1] + b > -1 && pre[1] + b < deformationField->ny) {
                         // Uses the deformation field if voxel is in its space
                         index = (pre[1] + b) * deformationField->nx + pre[0] + a;
                         defX = defPtrX[index];
@@ -2349,11 +2351,10 @@ void reg_defField_compose2D(nifti_image *deformationField,
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_defField_compose3D(nifti_image *deformationField,
+void reg_defField_compose3D(const nifti_image *deformationField,
                             nifti_image *dfToUpdate,
-                            int *mask) {
-    const int DefFieldDim[3] = { deformationField->nx, deformationField->ny, deformationField->nz };
-    const size_t DFVoxelNumber = (size_t)DefFieldDim[0] * DefFieldDim[1] * DefFieldDim[2];
+                            const int *mask) {
+    const size_t dfVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
 #ifdef _WIN32
     long i;
     const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 3);
@@ -2361,10 +2362,9 @@ void reg_defField_compose3D(nifti_image *deformationField,
     size_t i;
     const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 3);
 #endif
-
-    DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    DataType *defPtrY = &defPtrX[DFVoxelNumber];
-    DataType *defPtrZ = &defPtrY[DFVoxelNumber];
+    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
+    const DataType *defPtrY = &defPtrX[dfVoxelNumber];
+    const DataType *defPtrZ = &defPtrY[dfVoxelNumber];
 
     DataType *resPtrX = static_cast<DataType*>(dfToUpdate->data);
     DataType *resPtrY = &resPtrX[warVoxelNumber];
@@ -2375,7 +2375,7 @@ void reg_defField_compose3D(nifti_image *deformationField,
 #else
     mat44 df_real2Voxel __attribute__((aligned(16)));
 #endif
-    mat44 *df_voxel2Real;
+    const mat44 *df_voxel2Real;
     if (deformationField->sform_code > 0) {
         df_real2Voxel = deformationField->sto_ijk;
         df_voxel2Real = &deformationField->sto_xyz;
@@ -2391,7 +2391,7 @@ void reg_defField_compose3D(nifti_image *deformationField,
     bool inY, inZ;
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
-   shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \
+   shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \
    defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \
    private(a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \
    realDef, voxel, tempBasis, defX, defY, defZ, relX, relY, relZ, basis, inY, inZ)
@@ -2429,21 +2429,21 @@ void reg_defField_compose3D(nifti_image *deformationField,
             relY[0] = 1.f - relY[1];
             relZ[1] = voxel[2] - static_cast<DataType>(pre[2]);
             relZ[0] = 1.f - relZ[1];
-            realDef[0] = realDef[1] = realDef[2] = 0.;
+            realDef[0] = realDef[1] = realDef[2] = 0;
             for (c = 0; c < 2; ++c) {
                 currentZ = pre[2] + c;
-                tempIndex = currentZ * DefFieldDim[0] * DefFieldDim[1];
-                if (currentZ > -1 && currentZ < DefFieldDim[2]) inZ = true;
+                tempIndex = currentZ * deformationField->nx * deformationField->ny;
+                if (currentZ > -1 && currentZ < deformationField->nz) inZ = true;
                 else inZ = false;
                 for (b = 0; b < 2; ++b) {
                     currentY = pre[1] + b;
-                    index = tempIndex + currentY * DefFieldDim[0] + pre[0];
+                    index = tempIndex + currentY * deformationField->nx + pre[0];
                     tempBasis = relY[b] * relZ[c];
-                    if (currentY > -1 && currentY < DefFieldDim[1]) inY = true;
+                    if (currentY > -1 && currentY < deformationField->ny) inY = true;
                     else inY = false;
                     for (a = 0; a < 2; ++a) {
                         currentX = pre[0] + a;
-                        if (currentX > -1 && currentX < DefFieldDim[0] && inY && inZ) {
+                        if (currentX > -1 && currentX < deformationField->nx && inY && inZ) {
                             // Uses the deformation field if voxel is in its space
                             defX = defPtrX[index];
                             defY = defPtrY[index];
@@ -2478,43 +2478,23 @@ void reg_defField_compose3D(nifti_image *deformationField,
     }// loop over every voxel
 }
 /* *************************************************************** */
-void reg_defField_compose(nifti_image *deformationField,
+void reg_defField_compose(const nifti_image *deformationField,
                           nifti_image *dfToUpdate,
-                          int *mask) {
+                          const int *mask) {
     if (deformationField->datatype != dfToUpdate->datatype)
         NR_FATAL_ERROR("Both deformation fields are expected to have the same type");
 
-    bool freeMask = false;
-    if (mask == nullptr) {
-        mask = (int*)calloc(NiftiImage::calcVoxelNumber(dfToUpdate, 3), sizeof(int));
-        freeMask = true;
-    }
-
-    if (dfToUpdate->nu == 2) {
-        switch (deformationField->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_defField_compose2D<float>(deformationField, dfToUpdate, mask);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_defField_compose2D<double>(deformationField, dfToUpdate, mask);
-            break;
-        default:
-            NR_FATAL_ERROR("Deformation field pixel type is unsupported");
-        }
-    } else {
-        switch (deformationField->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_defField_compose3D<float>(deformationField, dfToUpdate, mask);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_defField_compose3D<double>(deformationField, dfToUpdate, mask);
-            break;
-        default:
-            NR_FATAL_ERROR("Deformation field pixel type is unsupported");
-        }
+    unique_ptr<int[]> currentMask;
+    if (!mask) {
+        currentMask.reset(new int[NiftiImage::calcVoxelNumber(dfToUpdate, 3)]());
+        mask = currentMask.get();
     }
 
-    if (freeMask) free(mask);
+    std::visit([&](auto&& defFieldDataType) {
+        using DefFieldDataType = std::decay_t<decltype(defFieldDataType)>;
+        auto defFieldCompose = dfToUpdate->nu == 2 ? reg_defField_compose2D<DefFieldDataType> : reg_defField_compose3D<DefFieldDataType>;
+        defFieldCompose(deformationField, dfToUpdate, mask);
+    }, NiftiImage::getFloatingDataType(deformationField));
 }
 /* *************************************************************** */
 /// @brief Internal data structure to pass user data into optimizer that get passed to cost_function
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index d3d8d28c..ad1f0daf 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -131,12 +131,12 @@ int reg_spline_cppComposition(nifti_image *grid1,
  * @param dfToUpdate Image that contains the deformation field that
  * is being updated
  * @param mask Mask overlaid on the dfToUpdate field where only voxel
- * within the mask will be updated. All positive values in the maks
+ * within the mask will be updated. All positive values in the mask
  * are considered as belonging to the mask.
  */
-void reg_defField_compose(nifti_image *deformationField,
+void reg_defField_compose(const nifti_image *deformationField,
                           nifti_image *dfToUpdate,
-                          int *mask);
+                          const int *mask);
 /* *************************************************************** */
 /** @brief Compute the inverse of a deformation field
  * @author Marcel van Herk (CMIC / NKI / AVL)
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index a47a635b..6565cb83 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -460,13 +460,13 @@ template void set_second_order_bspline_basis_values<double>(double*, double*, do
 template <class DataType>
 void get_SlidedValues(DataType& defX,
                       DataType& defY,
-                      int X,
-                      int Y,
-                      DataType *defPtrX,
-                      DataType *defPtrY,
-                      mat44 *df_voxel2Real,
-                      int *dim,
-                      bool displacement) {
+                      const int X,
+                      const int Y,
+                      const DataType *defPtrX,
+                      const DataType *defPtrY,
+                      const mat44 *df_voxel2Real,
+                      const int *dim,
+                      const bool displacement) {
     int newX = X;
     int newY = Y;
     if (X < 0) {
@@ -493,22 +493,22 @@ void get_SlidedValues(DataType& defX,
     defX = defPtrX[index] + shiftValueX;
     defY = defPtrY[index] + shiftValueY;
 }
-template void get_SlidedValues<float>(float&, float&, int, int, float*, float*, mat44*, int*, bool);
-template void get_SlidedValues<double>(double&, double&, int, int, double*, double*, mat44*, int*, bool);
+template void get_SlidedValues<float>(float&, float&, const int, const int, const float*, const float*, const mat44*, const int*, const bool);
+template void get_SlidedValues<double>(double&, double&, const int, const int, const double*, const double*, const mat44*, const int*, const bool);
 /* *************************************************************** */
 template <class DataType>
 void get_SlidedValues(DataType& defX,
                       DataType& defY,
                       DataType& defZ,
-                      int X,
-                      int Y,
-                      int Z,
-                      DataType *defPtrX,
-                      DataType *defPtrY,
-                      DataType *defPtrZ,
-                      mat44 *df_voxel2Real,
-                      int *dim,
-                      bool displacement) {
+                      const int X,
+                      const int Y,
+                      const int Z,
+                      const DataType *defPtrX,
+                      const DataType *defPtrY,
+                      const DataType *defPtrZ,
+                      const mat44 *df_voxel2Real,
+                      const int *dim,
+                      const bool displacement) {
     int newX = X;
     int newY = Y;
     int newZ = Z;
@@ -552,8 +552,8 @@ void get_SlidedValues(DataType& defX,
     defY = defPtrY[index] + shiftValueY;
     defZ = defPtrZ[index] + shiftValueZ;
 }
-template void get_SlidedValues<float>(float&, float&, float&, int, int, int, float*, float*, float*, mat44*, int*, bool);
-template void get_SlidedValues<double>(double&, double&, double&, int, int, int, double*, double*, double*, mat44*, int*, bool);
+template void get_SlidedValues<float>(float&, float&, float&, const int, const int, const int, const float*, const float*, const float*, const mat44*, const int*, const bool);
+template void get_SlidedValues<double>(double&, double&, double&, const int, const int, const int, const double*, const double*, const double*, const mat44*, const int*, const bool);
 /* *************************************************************** */
 template <class DataType>
 void get_GridValues(int startX,
diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h
index 8a0afe2d..77cd6dd8 100755
--- a/reg-lib/cpu/_reg_splineBasis.h
+++ b/reg-lib/cpu/_reg_splineBasis.h
@@ -84,26 +84,26 @@ void get_SplineBasisValues(DataType basis,
 template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
-                      int X,
-                      int Y,
-                      DataType *defPtrX,
-                      DataType *defPtrY,
-                      mat44 *df_voxel2Real,
-                      int *dim,
-                      bool displacement);
+                      const int X,
+                      const int Y,
+                      const DataType *defPtrX,
+                      const DataType *defPtrY,
+                      const mat44 *df_voxel2Real,
+                      const int *dim,
+                      const bool displacement);
 template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
                       DataType &defZ,
-                      int X,
-                      int Y,
-                      int Z,
-                      DataType *defPtrX,
-                      DataType *defPtrY,
-                      DataType *defPtrZ,
-                      mat44 *df_voxel2Real,
-                      int *dim,
-                      bool displacement);
+                      const int X,
+                      const int Y,
+                      const int Z,
+                      const DataType *defPtrX,
+                      const DataType *defPtrY,
+                      const DataType *defPtrZ,
+                      const mat44 *df_voxel2Real,
+                      const int *dim,
+                      const bool displacement);
 
 
 template <class DataType>
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 18f68628..d4fb3af0 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -61,7 +61,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaAladinContent.cpp
-    CudaCompute.cpp
+    CudaCompute.cu
     CudaContent.cpp
     CudaContext.cpp
     CudaDefContent.cpp
diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cu
similarity index 96%
rename from reg-lib/cuda/CudaCompute.cpp
rename to reg-lib/cuda/CudaCompute.cu
index 928faa87..202eaa76 100644
--- a/reg-lib/cuda/CudaCompute.cpp
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -247,3 +247,11 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
     dynamic_cast<CudaF3dContent&>(conBwIn).UpdateControlPointGrid();
 }
 /* *************************************************************** */
+void CudaCompute::DefFieldCompose(const nifti_image *defField) {
+    CudaContent& con = dynamic_cast<CudaContent&>(this->con);
+    const size_t& voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
+    thrust::device_vector<float4> defFieldCuda(voxelNumber);
+    Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
+    reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 9779f805..ed0514e1 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -34,6 +34,7 @@ class CudaCompute: public Compute {
 #ifndef NR_TESTING
 protected:
 #endif
+    virtual void DefFieldCompose(const nifti_image *defField) override;
     virtual void VoxelCentricToNodeCentric(float weight) override;
 
 private:
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 0bfcdcb2..9ce6ec2c 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -541,15 +541,14 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
 /* *************************************************************** */
 void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
-                              float4 *deformationFieldCudaOut,
-                              const size_t activeVoxelNumber) {
+                              float4 *deformationFieldCudaOut) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz };
     const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
     const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+                                                             voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
     if (deformationField->nz > 1) {
         const unsigned blocks = blockSize->reg_defField_compose3D;
@@ -634,7 +633,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
     // The deformation field is squared
     for (int i = 0; i < squaringNumber; ++i) {
         // The deformation field is applied to itself
-        reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda, voxelNumber);
+        reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda);
         // The computed scaled deformation field is copied over
         thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
         NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 63ae7107..0c0e80a7 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -50,8 +50,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
 /* *************************************************************** */
 void reg_defField_compose_gpu(const nifti_image *deformationField,
                               const float4 *deformationFieldCuda,
-                              float4 *deformationFieldOutCuda,
-                              const size_t activeVoxelNumber);
+                              float4 *deformationFieldOutCuda);
 /* *************************************************************** */
 void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
                                                 nifti_image *deformationField,
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
index 740a7a31..6bd7662e 100644
--- a/reg-test/reg_test_composeField.cpp
+++ b/reg-test/reg_test_composeField.cpp
@@ -11,7 +11,7 @@
 
 class ComposeDeformationFieldTest {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+    using TestData = std::tuple<std::string, NiftiImage&, NiftiImage, NiftiImage, NiftiImage>;
     using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
@@ -21,69 +21,65 @@ class ComposeDeformationFieldTest {
         if (!testCases.empty())
             return;
 
-        // Create a 2D reference image
-        NiftiImage::dim_t size = 5;
-        vector<NiftiImage::dim_t> dimFlo{ size, size };
-        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
-
-        // Create a 3D reference image
-        dimFlo.push_back(size);
-        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+        // Create reference images
+        constexpr NiftiImage::dim_t size = 5;
+        NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32);
 
         // Data container for the test data
         vector<TestData> testData;
 
         // Create affine deformation fields
-        NiftiImage inDefField2d = CreateDeformationField(reference2d);
-        NiftiImage inDefField3d = CreateDeformationField(reference3d);
         NiftiImage defField2d = CreateDeformationField(reference2d);
         NiftiImage defField3d = CreateDeformationField(reference3d);
         NiftiImage outDefField2d = CreateDeformationField(reference2d);
         NiftiImage outDefField3d = CreateDeformationField(reference3d);
+        NiftiImage expDefField2d = CreateDeformationField(reference2d);
+        NiftiImage expDefField3d = CreateDeformationField(reference3d);
 
         // Identity transformation tests
         testData.emplace_back(TestData(
             "2D ID",
             reference2d,
-            inDefField2d,
             defField2d,
-            outDefField2d
+            outDefField2d,
+            expDefField2d
         ));
         testData.emplace_back(TestData(
             "3D ID",
             reference3d,
-            inDefField3d,
             defField3d,
-            outDefField3d
+            outDefField3d,
+            expDefField3d
         ));
 
         // Scaling transformation tests
-        float * inDefField2dPtr = static_cast<float *>(inDefField2d->data);
-        float * inDefField3dPtr = static_cast<float *>(inDefField3d->data);
-        float * def2dPtr = static_cast<float *>(defField2d->data);
-        float * def3dPtr = static_cast<float *>(defField3d->data);
-        for(size_t i=0; i<inDefField2d.nVoxels(); i++)
-            inDefField2dPtr[i] /= 1.11f;
-        for(size_t i=0; i<inDefField3d.nVoxels(); i++)
-            inDefField3dPtr[i] /= 1.11f;
-        for(size_t i=0; i<defField2d.nVoxels(); i++)
-            def2dPtr[i] *= 1.11f;
-        for(size_t i=0; i<defField3d.nVoxels(); i++)
-            def3dPtr[i] *= 1.11f;
+        float *defField2dPtr = static_cast<float*>(defField2d->data);
+        float *defField3dPtr = static_cast<float*>(defField3d->data);
+        float *outDefField2dPtr = static_cast<float*>(outDefField2d->data);
+        float *outDefField3dPtr = static_cast<float*>(outDefField3d->data);
+        for (size_t i = 0; i < defField2d.nVoxels(); i++)
+            defField2dPtr[i] *= 1.11f;
+        for (size_t i = 0; i < defField3d.nVoxels(); i++)
+            defField3dPtr[i] *= 1.11f;
+        for (size_t i = 0; i < outDefField2d.nVoxels(); i++)
+            outDefField2dPtr[i] /= 1.11f;
+        for (size_t i = 0; i < outDefField3d.nVoxels(); i++)
+            outDefField3dPtr[i] /= 1.11f;
 
         testData.emplace_back(TestData(
-            "2D scaling",
+            "2D Scaling",
             reference2d,
-            inDefField2d,
             defField2d,
-            outDefField2d
+            outDefField2d,
+            expDefField2d
         ));
         testData.emplace_back(TestData(
-            "3D scaling",
+            "3D Scaling",
             reference3d,
-            inDefField3d,
             defField3d,
-            outDefField3d
+            outDefField3d,
+            expDefField3d
         ));
 
         // Check boundary conditions. The default behavior is to use the embedded
@@ -91,54 +87,64 @@ class ComposeDeformationFieldTest {
         // transformation for padding.
         reg_tools_multiplyValueToImage(defField2d, defField2d, 0.f);
         reg_tools_multiplyValueToImage(defField3d, defField3d, 0.f);
-        reg_tools_multiplyValueToImage(inDefField2d, inDefField2d, 0.f);
-        reg_tools_multiplyValueToImage(inDefField3d, inDefField3d, 0.f);
         reg_tools_multiplyValueToImage(outDefField2d, outDefField2d, 0.f);
         reg_tools_multiplyValueToImage(outDefField3d, outDefField3d, 0.f);
+        reg_tools_multiplyValueToImage(expDefField2d, expDefField2d, 0.f);
+        reg_tools_multiplyValueToImage(expDefField3d, expDefField3d, 0.f);
         reg_getDeformationFromDisplacement(defField2d);
         reg_getDeformationFromDisplacement(defField3d);
-        reg_getDeformationFromDisplacement(inDefField2d);
-        reg_getDeformationFromDisplacement(inDefField3d);
         reg_getDeformationFromDisplacement(outDefField2d);
         reg_getDeformationFromDisplacement(outDefField3d);
-        float * outDefField2dPtr = static_cast<float *>(outDefField2d->data);
-        float * outDefField3dPtr = static_cast<float *>(outDefField3d->data);
-        for(size_t i=0; i<inDefField2d.nVoxels(); i++)
-            inDefField2dPtr[i] += 3.f;
-        for(size_t i=0; i<inDefField3d.nVoxels(); i++)
-            inDefField3dPtr[i] += 3.f;
-        for(size_t i=0; i<defField2d.nVoxels(); i++)
-            def2dPtr[i] += 1.f;
-        for(size_t i=0; i<defField3d.nVoxels(); i++)
-            def3dPtr[i] += 1.f;
-        for(size_t i=0; i<outDefField2d.nVoxels(); i++)
-            outDefField2dPtr[i] += 4.f;
-        for(size_t i=0; i<outDefField3d.nVoxels(); i++)
-            outDefField3dPtr[i] += 4.f;
+        reg_getDeformationFromDisplacement(expDefField2d);
+        reg_getDeformationFromDisplacement(expDefField3d);
+        float *expDefField2dPtr = static_cast<float*>(expDefField2d->data);
+        float *expDefField3dPtr = static_cast<float*>(expDefField3d->data);
+        for (size_t i = 0; i < defField2d.nVoxels(); i++)
+            defField2dPtr[i] += 1.f;
+        for (size_t i = 0; i < defField3d.nVoxels(); i++)
+            defField3dPtr[i] += 1.f;
+        for (size_t i = 0; i < outDefField2d.nVoxels(); i++)
+            outDefField2dPtr[i] += 3.f;
+        for (size_t i = 0; i < outDefField3d.nVoxels(); i++)
+            outDefField3dPtr[i] += 3.f;
+        for (size_t i = 0; i < expDefField2d.nVoxels(); i++)
+            expDefField2dPtr[i] += 4.f;
+        for (size_t i = 0; i < expDefField3d.nVoxels(); i++)
+            expDefField3dPtr[i] += 4.f;
         testData.emplace_back(TestData(
-            "2D padding",
+            "2D Padding",
             reference2d,
-            inDefField2d,
             defField2d,
-            outDefField2d
+            outDefField2d,
+            expDefField2d
         ));
         testData.emplace_back(TestData(
-            "3D padding",
+            "3D Padding",
             reference3d,
-            inDefField3d,
             defField3d,
-            outDefField3d
+            outDefField3d,
+            expDefField3d
         ));
 
         // Run the actual computation with the provided input data
         for (auto&& data : testData) {
-            auto&& [testName, reference, inDefField, defField, expectedField] = data;
-            // Run the compose on CPU only for now
-            reg_defField_compose(defField, inDefField, nullptr);
-            // Check the results
-            testCases.push_back({testName + " CPU", inDefField, expectedField});
+            // Get the test data
+            auto&& [testName, reference, defField, outDefField, expDefField] = data;
+            for (auto&& platformType : PlatformTypes) {
+                unique_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<ContentCreator> contentCreator{ dynamic_cast<ContentCreator*>(platform->CreateContentCreator()) };
+                // Create the content and the compute
+                unique_ptr<Content> content{ contentCreator->Create(reference, reference) };
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                // Run the compose
+                content->SetDeformationField(NiftiImage(outDefField).disown());
+                compute->DefFieldCompose(defField);
+                // Get the result
+                NiftiImage resDefField(content->GetDeformationField(), NiftiImage::Copy::Image);
+                // Save for testing
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(resDefField), expDefField });
+            }
         }
-
     }
 };
 
@@ -150,15 +156,22 @@ TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[uni
 
         SECTION(testName) {
             std::cout << "\n**************** Section " << testName << " ****************" << std::endl;
-            float *resPtr = static_cast<float *>(result->data);
-            float *expPtr = static_cast<float *>(expected->data);
-            for(unsigned i=0; i<expected.nVoxels();++i){
-                const double diff = fabs(resPtr[i] - expPtr[i]);
-                if (diff > EPS){
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the deformation fields
+            const auto resPtr = result.data();
+            const auto expPtr = expected.data();
+            for (auto i = 0; i < expected.nVoxels(); i++) {
+                const float resVal = resPtr[i];
+                const float expVal = expPtr[i];
+                const float diff = abs(resVal - expVal);
+                if (diff > EPS) {
                     std::cout << "[i]=" << i;
                     std::cout << " | diff=" << diff;
-                    std::cout << " | Result=" << resPtr[i];
-                    std::cout << " | Expected=" << expPtr[i] << std::endl;
+                    std::cout << " | Result=" << resVal;
+                    std::cout << " | Expected=" << expVal << std::endl;
                 }
                 REQUIRE(diff < EPS);
             }
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 0a912881..444e6025 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -28,14 +28,10 @@ class GetDeformationFieldTest {
         std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(0, 1);
 
-        // Create a 2D reference image
-        NiftiImage::dim_t size = 5;
-        vector<NiftiImage::dim_t> dimFlo{ size, size };
-        NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
-
-        // Create a 3D reference image
-        dimFlo.push_back(size);
-        NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32);
+        // Create reference images
+        constexpr NiftiImage::dim_t size = 5;
+        NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32);
 
         // Data container for the test data
         vector<TestData> testData;
@@ -45,46 +41,46 @@ class GetDeformationFieldTest {
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
         NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
         // Create the expected deformation field result with an identity
-        NiftiImage deformationField2d = CreateDeformationField(reference2d);
-        NiftiImage deformationField3d = CreateDeformationField(reference3d);
+        NiftiImage expDefField2d = CreateDeformationField(reference2d);
+        NiftiImage expDefField3d = CreateDeformationField(reference3d);
         testData.emplace_back(TestData(
             "2D ID",
             reference2d,
             controlPointGrid2d,
-            deformationField2d
+            expDefField2d
         ));
         testData.emplace_back(TestData(
             "3D ID",
             reference3d,
             controlPointGrid3d,
-            deformationField3d
+            expDefField3d
         ));
 
         // Translation transformation tests - translation of 2 along each axis
         float *cpp2dPtr = static_cast<float*>(controlPointGrid2d->data);
         float *cpp3dPtr = static_cast<float*>(controlPointGrid3d->data);
-        float *def2dPtr = static_cast<float*>(deformationField2d->data);
-        float *def3dPtr = static_cast<float*>(deformationField3d->data);
+        float *expDefField2dPtr = static_cast<float*>(expDefField2d->data);
+        float *expDefField3dPtr = static_cast<float*>(expDefField3d->data);
         for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++)
             cpp2dPtr[i] += 2.f;
         for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
             cpp3dPtr[i] += 2.f;
-        for (size_t i = 0; i < deformationField2d.nVoxels(); i++)
-            def2dPtr[i] += 2.f;
-        for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
-            def3dPtr[i] += 2.f;
+        for (size_t i = 0; i < expDefField2d.nVoxels(); i++)
+            expDefField2dPtr[i] += 2.f;
+        for (size_t i = 0; i < expDefField3d.nVoxels(); i++)
+            expDefField3dPtr[i] += 2.f;
 
         testData.emplace_back(TestData(
             "2D Trans",
             reference2d,
             controlPointGrid2d,
-            deformationField2d
+            expDefField2d
         ));
         testData.emplace_back(TestData(
             "3D Trans",
             reference3d,
             controlPointGrid3d,
-            deformationField3d
+            expDefField3d
         ));
 
         // Scaling transformation tests
@@ -92,41 +88,40 @@ class GetDeformationFieldTest {
             cpp2dPtr[i] = (cpp2dPtr[i] - 2.f) * 1.1f;
         for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
             cpp3dPtr[i] = (cpp3dPtr[i] - 2.f) * 1.1f;
-        for (size_t i = 0; i < deformationField2d.nVoxels(); i++)
-            def2dPtr[i] = (def2dPtr[i] - 2.f) * 1.1f;
-        for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
-            def3dPtr[i] = (def3dPtr[i] - 2.f) * 1.1f;
+        for (size_t i = 0; i < expDefField2d.nVoxels(); i++)
+            expDefField2dPtr[i] = (expDefField2dPtr[i] - 2.f) * 1.1f;
+        for (size_t i = 0; i < expDefField3d.nVoxels(); i++)
+            expDefField3dPtr[i] = (expDefField3dPtr[i] - 2.f) * 1.1f;
 
         testData.emplace_back(TestData(
-            "2D scaling",
+            "2D Scaling",
             reference2d,
-            (controlPointGrid2d),
-            (deformationField2d)
+            controlPointGrid2d,
+            expDefField2d
         ));
         testData.emplace_back(TestData(
-            "3D scaling",
+            "3D Scaling",
             reference3d,
             controlPointGrid3d,
-            deformationField3d
+            expDefField3d
         ));
 
         // Run the actual computation with the provided input data
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
-                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 // Make a copy of the test data
-                auto [testName, reference, controlPointGrid, defFieldExp] = data;
-                // Add content
+                auto [testName, reference, controlPointGrid, expDefField] = data;
+                // Create the content and the compute
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                // Add compute
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Compute the deformation field
                 compute->GetDeformationField(false, true); // no composition - use bspline
                 // Retrieve the deformation field
                 NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
                 // Save for testing
-                testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) });
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) });
             }
         }
 
@@ -134,77 +129,75 @@ class GetDeformationFieldTest {
         vector<TestDataComp> testDataComp;
 
         // Ensures composition of identity transformation yield identity
-        NiftiImage deformationFieldInput2d = CreateDeformationField(reference2d);
-        NiftiImage deformationFieldInput3d = CreateDeformationField(reference3d);
-        reg_tools_multiplyValueToImage(deformationField2d, deformationField2d, 0.f);
-        reg_tools_multiplyValueToImage(deformationField3d, deformationField3d, 0.f);
+        NiftiImage defField2d = CreateDeformationField(reference2d);
+        NiftiImage defField3d = CreateDeformationField(reference3d);
+        reg_tools_multiplyValueToImage(expDefField2d, expDefField2d, 0.f);
+        reg_tools_multiplyValueToImage(expDefField3d, expDefField3d, 0.f);
         reg_tools_multiplyValueToImage(controlPointGrid2d, controlPointGrid2d, 0.f);
         reg_tools_multiplyValueToImage(controlPointGrid3d, controlPointGrid3d, 0.f);
-        reg_getDeformationFromDisplacement(deformationField2d);
-        reg_getDeformationFromDisplacement(deformationField3d);
+        reg_getDeformationFromDisplacement(expDefField2d);
+        reg_getDeformationFromDisplacement(expDefField3d);
         reg_getDeformationFromDisplacement(controlPointGrid2d);
         reg_getDeformationFromDisplacement(controlPointGrid3d);
         testDataComp.emplace_back(TestDataComp(
-            "2D composition ID",
+            "2D Composition ID",
             reference3d,
             controlPointGrid2d,
-            deformationFieldInput2d,
-            deformationField2d
+            defField2d,
+            expDefField2d
         ));
         testDataComp.emplace_back(TestDataComp(
-            "3D composition ID",
+            "3D Composition ID",
             reference3d,
             controlPointGrid3d,
-            deformationFieldInput3d,
-            deformationField3d
+            defField3d,
+            expDefField3d
         ));
 
         // Ensures composition from zooming and and out goes back identity ID
-        float *def2dInPtr = static_cast<float*>(deformationFieldInput2d->data);
-        float *def3dInPtr = static_cast<float*>(deformationFieldInput3d->data);
+        float *defField2dPtr = static_cast<float*>(defField2d->data);
+        float *defField3dPtr = static_cast<float*>(defField3d->data);
         for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++)
             cpp2dPtr[i] *= 1.1f;
         for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++)
             cpp3dPtr[i] *= 1.1f;
-        for (size_t i = 0; i < deformationFieldInput2d.nVoxels(); i++)
-            def2dInPtr[i] /= 1.1f;
-        for (size_t i = 0; i < deformationFieldInput3d.nVoxels(); i++)
-            def3dInPtr[i] /= 1.1f;
+        for (size_t i = 0; i < defField2d.nVoxels(); i++)
+            defField2dPtr[i] /= 1.1f;
+        for (size_t i = 0; i < defField3d.nVoxels(); i++)
+            defField3dPtr[i] /= 1.1f;
         testDataComp.emplace_back(TestDataComp(
-            "2D composition scaling",
+            "2D Composition Scaling",
             reference3d,
             controlPointGrid2d,
-            deformationFieldInput2d,
-            deformationField2d
+            defField2d,
+            expDefField2d
         ));
         testDataComp.emplace_back(TestDataComp(
-            "3D composition scaling",
+            "3D Composition Scaling",
             reference3d,
             controlPointGrid3d,
-            deformationFieldInput3d,
-            deformationField3d
+            defField3d,
+            expDefField3d
         ));
 
         for (auto&& data : testDataComp) {
-            for (auto&& platformType : { PlatformType::Cpu }) {
-                shared_ptr<Platform> platform{ new Platform(platformType) };
+            for (auto&& platformType : { PlatformType::Cpu }) { // Test only on CPU
+                unique_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 // Make a copy of the test data
-                auto [testName, reference, controlPointGrid, defField, defFieldExp] = data;
-                // Add content
+                auto [testName, reference, controlPointGrid, defField, expDefField] = data;
+                // Create the content and the compute
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                content->SetDeformationField(defField.disown());
-                // Add compute
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Compute the deformation field
+                content->SetDeformationField(defField.disown());
                 compute->GetDeformationField(true, true); // with composition - use bspline
                 // Retrieve the deformation field
                 defField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image);
                 // Save for testing
-                testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) });
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) });
             }
         }
-
     }
 };
 
@@ -216,15 +209,17 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid"
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
-            float *resPtr = static_cast<float*>(result->data);
-            float *expPtr = static_cast<float*>(expected->data);
-            for (unsigned i = 0; i < expected.nVoxels(); ++i) {
-                const double diff = fabs(resPtr[i] - expPtr[i]);
+            const auto resPtr = result.data();
+            const auto expPtr = expected.data();
+            for (auto i = 0; i < expected.nVoxels(); i++) {
+                const float resVal = resPtr[i];
+                const float expVal = expPtr[i];
+                const float diff = abs(resVal - expVal);
                 if (diff > EPS) {
                     NR_COUT << "[i]=" << i;
                     NR_COUT << " | diff=" << diff;
-                    NR_COUT << " | Result=" << resPtr[i];
-                    NR_COUT << " | Expected=" << expPtr[i] << std::endl;
+                    NR_COUT << " | Result=" << resVal;
+                    NR_COUT << " | Expected=" << expVal << std::endl;
                 }
                 REQUIRE(diff < EPS);
             }

From d925b8c99fdc4c66033a3ccdd2d881d06cb5ea7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 13 Oct 2023 20:17:48 +0100
Subject: [PATCH 224/314] Add composition support for
 CudaCompute::GetDeformationField() #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  69 ++--
 reg-lib/cpu/_reg_localTrans.h                 |   2 +-
 reg-lib/cpu/_reg_splineBasis.cpp              |  92 +++--
 reg-lib/cpu/_reg_splineBasis.h                |  14 +-
 reg-lib/cuda/CudaCompute.cu                   |   2 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      |   9 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  15 +-
 reg-lib/cuda/_reg_localTransformation_gpu.h   |   1 +
 .../cuda/_reg_localTransformation_kernels.cu  | 341 ++++++++++--------
 reg-test/reg_test_getDeformationField.cpp     |   6 +-
 11 files changed, 302 insertions(+), 251 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c9693eb7..fe2cd8b0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-342
+343
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 685ab580..2dac9946 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -563,7 +563,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
     } val;
     __m128 tempCurrent, tempX, tempY;
 #ifdef _WIN32
-    __declspec(align(16)) DataType temp[4];
+    __declspec(align(16)) DataType xBasis[4];
     __declspec(align(16)) DataType yBasis[4];
     union {
         __m128 m[16];
@@ -578,7 +578,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
         __declspec(align(16)) DataType f[16];
     } xyBasis;
 #else // _WIN32
-    DataType temp[4] __attribute__((aligned(16)));
+    DataType xBasis[4] __attribute__((aligned(16)));
     DataType yBasis[4] __attribute__((aligned(16)));
     union {
         __m128 m[16];
@@ -594,7 +594,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
     } xyBasis;
 #endif // _WIN32
 #else // _USE_SSE
-    DataType temp[4];
+    DataType xBasis[4];
     DataType yBasis[4];
     DataType xyBasis[16];
     DataType xControlPointCoordinates[16];
@@ -626,7 +626,6 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
             index = y * deformationField->nx;
             oldXpre = oldYpre = 99999999;
             for (x = 0; x < deformationField->nx; x++) {
-
                 // The previous position at the current pixel position is read
                 xReal = static_cast<DataType>(fieldPtrX[index]);
                 yReal = static_cast<DataType>(fieldPtrY[index]);
@@ -643,8 +642,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                 xPre = Floor(xVoxel);
                 basis = xVoxel - static_cast<DataType>(xPre--);
                 if (basis < 0) basis = 0; //rounding error
-                if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                else get_SplineBasisValues<DataType>(basis, temp);
+                if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                else get_SplineBasisValues<DataType>(basis, xBasis);
 
                 yPre = Floor(yVoxel);
                 basis = yVoxel - static_cast<DataType>(yPre--);
@@ -688,7 +687,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                         coord = 0;
                         for (b = 0; b < 4; b++) {
                             for (a = 0; a < 4; a++) {
-                                xyBasis.f[coord++] = temp[a] * yBasis[b];
+                                xyBasis.f[coord++] = xBasis[a] * yBasis[b];
                             }
                         }
 
@@ -707,7 +706,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 #else
                         for (b = 0; b < 4; b++) {
                             for (a = 0; a < 4; a++) {
-                                DataType tempValue = temp[a] * yBasis[b];
+                                DataType tempValue = xBasis[a] * yBasis[b];
                                 xReal += xControlPointCoordinates[b * 4 + a] * tempValue;
                                 yReal += yControlPointCoordinates[b * 4 + a] * tempValue;
                             }
@@ -728,14 +727,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
    private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \
-   val, temp, yBasis, tempCurrent, xyBasis, tempX, tempY, \
+   val, xBasis, yBasis, tempCurrent, xyBasis, tempX, tempY, \
    xControlPointCoordinates, yControlPointCoordinates)
 #else // _USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
    private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \
-   temp, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates)
+   xBasis, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates)
 #endif // _USE_SEE
 #endif // _OPENMP
         for (y = 0; y < deformationField->ny; y++) {
@@ -744,21 +743,21 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
             yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
             basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
-            if (basis < 0) basis = 0; //rounding error
+            if (basis < 0) basis = 0; // rounding error
             if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
             else get_SplineBasisValues<DataType>(basis, yBasis);
 
             for (x = 0; x < deformationField->nx; x++) {
                 xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
                 basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
-                if (basis < 0) basis = 0; //rounding error
-                if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
-                else get_SplineBasisValues<DataType>(basis, temp);
+                if (basis < 0) basis = 0; // rounding error
+                if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                else get_SplineBasisValues<DataType>(basis, xBasis);
 #if _USE_SSE
-                val.f[0] = static_cast<float>(temp[0]);
-                val.f[1] = static_cast<float>(temp[1]);
-                val.f[2] = static_cast<float>(temp[2]);
-                val.f[3] = static_cast<float>(temp[3]);
+                val.f[0] = static_cast<float>(xBasis[0]);
+                val.f[1] = static_cast<float>(xBasis[1]);
+                val.f[2] = static_cast<float>(xBasis[2]);
+                val.f[3] = static_cast<float>(xBasis[3]);
                 tempCurrent = val.m;
                 for (a = 0; a < 4; a++) {
                     val.m = _mm_set_ps1(static_cast<float>(yBasis[a]));
@@ -767,10 +766,10 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 #else
                 coord = 0;
                 for (a = 0; a < 4; a++) {
-                    xyBasis[coord++] = temp[0] * yBasis[a];
-                    xyBasis[coord++] = temp[1] * yBasis[a];
-                    xyBasis[coord++] = temp[2] * yBasis[a];
-                    xyBasis[coord++] = temp[3] * yBasis[a];
+                    xyBasis[coord++] = xBasis[0] * yBasis[a];
+                    xyBasis[coord++] = xBasis[1] * yBasis[a];
+                    xyBasis[coord++] = xBasis[2] * yBasis[a];
+                    xyBasis[coord++] = xBasis[3] * yBasis[a];
                 }
 #endif
                 if (oldXpre != xPre || oldYpre != yPre) {
@@ -837,7 +836,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                             int *mask,
                                             bool composition,
                                             bool bspline,
-                                            bool force_no_lut = false) {
+                                            bool forceNoLut = false) {
 #if _USE_SSE
     union {
         __m128 m;
@@ -1111,7 +1110,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #endif // _USE_SSE
 
         // Assess if lookup table can be used
-        if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && force_no_lut == false) {
+        if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) {
             // Assign a single array that will contain all coefficients
             DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType));
             // Compute and store all required coefficients
@@ -1462,7 +1461,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
                                     int *mask,
                                     bool composition,
                                     bool bspline,
-                                    bool force_no_lut) {
+                                    bool forceNoLut) {
     if (splineControlPoint->datatype != deformationField->datatype)
         NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type");
 
@@ -1471,11 +1470,11 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
         NR_FATAL_ERROR("SSE computation has only been implemented for single precision");
 #endif
 
-    bool MrPropre = false;
-    if (mask == nullptr) {
+    unique_ptr<int[]> currentMask;
+    if (!mask) {
         // Active voxel are all superior to -1, 0 thus will do !
-        MrPropre = true;
-        mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int));
+        currentMask.reset(new int[NiftiImage::calcVoxelNumber(deformationField, 3)]());
+        mask = currentMask.get();
     }
 
     // Check if an affine initialisation is required
@@ -1519,10 +1518,10 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
         } else {
             switch (deformationField->datatype) {
             case NIFTI_TYPE_FLOAT32:
-                reg_cubic_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
+                reg_cubic_spline_getDeformationField3D<float>(splineControlPoint, deformationField, mask, composition, bspline, forceNoLut);
                 break;
             case NIFTI_TYPE_FLOAT64:
-                reg_cubic_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut);
+                reg_cubic_spline_getDeformationField3D<double>(splineControlPoint, deformationField, mask, composition, bspline, forceNoLut);
                 break;
             default:
                 NR_FATAL_ERROR("Only single or double precision is implemented for deformation field");
@@ -1534,12 +1533,10 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
         if (splineControlPoint->ext_list[1].edata != nullptr) {
             reg_affine_getDeformationField(reinterpret_cast<mat44*>(splineControlPoint->ext_list[1].edata),
                                            deformationField,
-                                           true, //composition
+                                           true, // composition
                                            mask);
         }
     }
-    if (MrPropre)
-        free(mask);
 }
 /* *************************************************************** */
 template<class DataType>
@@ -3497,7 +3494,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     flowField->intent_p1 = DISP_VEL_FIELD;
     reg_getDeformationFromDisplacement(flowField);
 
-    // fake the number of extension here to avoid the second half of the affine
+    // Fake the number of extension here to avoid the second half of the affine
     int oldNumExt = velocityFieldGrid->num_ext;
     if (oldNumExt > 1)
         velocityFieldGrid->num_ext = 1;
@@ -3508,7 +3505,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     reg_spline_getDeformationField(velocityFieldGrid,
                                    flowField,
                                    nullptr, // mask
-                                   true,  //composition
+                                   true,  // composition
                                    true); // bspline
 
     velocityFieldGrid->num_ext = oldNumExt;
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index ad1f0daf..ad6f930d 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -73,7 +73,7 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
                                     int *mask = nullptr,
                                     bool composition = false,
                                     bool bspline = true,
-                                    bool force_no_lut = false);
+                                    bool forceNoLut = false);
 /* *************************************************************** */
 /** @brief Upsample an image from voxel space to node space using
  * millimetre correspondences.
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index 6565cb83..244bf4c0 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -460,36 +460,34 @@ template void set_second_order_bspline_basis_values<double>(double*, double*, do
 template <class DataType>
 void get_SlidedValues(DataType& defX,
                       DataType& defY,
-                      const int X,
-                      const int Y,
+                      const int x,
+                      const int y,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
-                      const mat44 *df_voxel2Real,
+                      const mat44 *dfVoxel2Real,
                       const int *dim,
                       const bool displacement) {
-    int newX = X;
-    int newY = Y;
-    if (X < 0) {
+    int newX = x;
+    if (x < 0)
         newX = 0;
-    } else if (X >= dim[1]) {
+    else if (x >= dim[1])
         newX = dim[1] - 1;
-    }
-    if (Y < 0) {
+
+    int newY = y;
+    if (y < 0)
         newY = 0;
-    } else if (Y >= dim[2]) {
+    else if (y >= dim[2])
         newY = dim[2] - 1;
-    }
+
     DataType shiftValueX = 0;
     DataType shiftValueY = 0;
     if (!displacement) {
-        int shiftIndexX = X - newX;
-        int shiftIndexY = Y - newY;
-        shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] +
-            shiftIndexY * df_voxel2Real->m[0][1];
-        shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] +
-            shiftIndexY * df_voxel2Real->m[1][1];
+        const int shiftIndexX = x - newX;
+        const int shiftIndexY = y - newY;
+        shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1];
+        shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1];
     }
-    size_t index = newY * dim[1] + newX;
+    const int index = newY * dim[1] + newX;
     defX = defPtrX[index] + shiftValueX;
     defY = defPtrY[index] + shiftValueY;
 }
@@ -500,54 +498,54 @@ template <class DataType>
 void get_SlidedValues(DataType& defX,
                       DataType& defY,
                       DataType& defZ,
-                      const int X,
-                      const int Y,
-                      const int Z,
+                      const int x,
+                      const int y,
+                      const int z,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
                       const DataType *defPtrZ,
-                      const mat44 *df_voxel2Real,
+                      const mat44 *dfVoxel2Real,
                       const int *dim,
                       const bool displacement) {
-    int newX = X;
-    int newY = Y;
-    int newZ = Z;
-    if (X < 0) {
+    int newX = x;
+    if (x < 0)
         newX = 0;
-    } else if (X >= dim[1]) {
+    else if (x >= dim[1])
         newX = dim[1] - 1;
-    }
-    if (Y < 0) {
+
+    int newY = y;
+    if (y < 0)
         newY = 0;
-    } else if (Y >= dim[2]) {
+    else if (y >= dim[2])
         newY = dim[2] - 1;
-    }
-    if (Z < 0) {
+
+    int newZ = z;
+    if (z < 0)
         newZ = 0;
-    } else if (Z >= dim[3]) {
+    else if (z >= dim[3])
         newZ = dim[3] - 1;
-    }
+
     DataType shiftValueX = 0;
     DataType shiftValueY = 0;
     DataType shiftValueZ = 0;
     if (!displacement) {
-        int shiftIndexX = X - newX;
-        int shiftIndexY = Y - newY;
-        int shiftIndexZ = Z - newZ;
+        const int shiftIndexX = x - newX;
+        const int shiftIndexY = y - newY;
+        const int shiftIndexZ = z - newZ;
         shiftValueX =
-            shiftIndexX * df_voxel2Real->m[0][0] +
-            shiftIndexY * df_voxel2Real->m[0][1] +
-            shiftIndexZ * df_voxel2Real->m[0][2];
+            shiftIndexX * dfVoxel2Real->m[0][0] +
+            shiftIndexY * dfVoxel2Real->m[0][1] +
+            shiftIndexZ * dfVoxel2Real->m[0][2];
         shiftValueY =
-            shiftIndexX * df_voxel2Real->m[1][0] +
-            shiftIndexY * df_voxel2Real->m[1][1] +
-            shiftIndexZ * df_voxel2Real->m[1][2];
+            shiftIndexX * dfVoxel2Real->m[1][0] +
+            shiftIndexY * dfVoxel2Real->m[1][1] +
+            shiftIndexZ * dfVoxel2Real->m[1][2];
         shiftValueZ =
-            shiftIndexX * df_voxel2Real->m[2][0] +
-            shiftIndexY * df_voxel2Real->m[2][1] +
-            shiftIndexZ * df_voxel2Real->m[2][2];
+            shiftIndexX * dfVoxel2Real->m[2][0] +
+            shiftIndexY * dfVoxel2Real->m[2][1] +
+            shiftIndexZ * dfVoxel2Real->m[2][2];
     }
-    size_t index = (newZ * dim[2] + newY) * dim[1] + newX;
+    const int index = (newZ * dim[2] + newY) * dim[1] + newX;
     defX = defPtrX[index] + shiftValueX;
     defY = defPtrY[index] + shiftValueY;
     defZ = defPtrZ[index] + shiftValueZ;
diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h
index 77cd6dd8..9c645a26 100755
--- a/reg-lib/cpu/_reg_splineBasis.h
+++ b/reg-lib/cpu/_reg_splineBasis.h
@@ -84,24 +84,24 @@ void get_SplineBasisValues(DataType basis,
 template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
-                      const int X,
-                      const int Y,
+                      const int x,
+                      const int y,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
-                      const mat44 *df_voxel2Real,
+                      const mat44 *dfVoxel2Real,
                       const int *dim,
                       const bool displacement);
 template <class DataType>
 void get_SlidedValues(DataType &defX,
                       DataType &defY,
                       DataType &defZ,
-                      const int X,
-                      const int Y,
-                      const int Z,
+                      const int x,
+                      const int y,
+                      const int z,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
                       const DataType *defPtrZ,
-                      const mat44 *df_voxel2Real,
+                      const mat44 *dfVoxel2Real,
                       const int *dim,
                       const bool displacement);
 
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 202eaa76..6a7d53a2 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -88,7 +88,6 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
 }
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
-    // TODO Fix reg_spline_getDeformationField_gpu to accept composition
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(),
                                        con.F3dContent::GetReference(),
@@ -96,6 +95,7 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) {
                                        con.GetDeformationFieldCuda(),
                                        con.GetReferenceMaskCuda(),
                                        con.GetActiveVoxelNumber(),
+                                       composition,
                                        bspline);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index af5d1b9c..87e1f975 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -140,14 +140,19 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo
     rem = num % denom;
 }
 /* *************************************************************** */
+template<bool is3d>
 __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) {
     int quot = 0, rem;
-    if (dims.z > 1)
+    if constexpr (is3d)
         reg_div_cuda(index, dims.x * dims.y, quot, rem);
     else rem = index;
     const int z = quot;
     reg_div_cuda(rem, dims.x, quot, rem);
-    const int y = quot, x = rem;
+    const int& y = quot, &x = rem;
     return { x, y, z };
 }
 /* *************************************************************** */
+__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) {
+    return dims.z > 1 ? reg_indexToDims_cuda<true>(index, dims) : reg_indexToDims_cuda<false>(index, dims);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 9ce6ec2c..f221a67d 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -22,6 +22,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         float4 *deformationFieldCuda,
                                         const int *maskCuda,
                                         const size_t activeVoxelNumber,
+                                        const bool composition,
                                         const bool bspline) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
@@ -35,6 +36,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
     auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear,
                                                  activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
 
+    // Get the reference matrix if composition is required
+    thrust::device_vector<mat44> referenceMatrix;
+    if (composition) {
+        const mat44 *refMatPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk;
+        referenceMatrix = thrust::device_vector<mat44>(refMatPtr, refMatPtr + 1);
+    }
+
     if (referenceImage->nz > 1) {
         const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
@@ -44,10 +52,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
         reg_spline_getDeformationField3D<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
                                                                                               *controlPointTexture,
                                                                                               *maskTexture,
+                                                                                              referenceMatrix.data().get(),
                                                                                               referenceImageDim,
                                                                                               controlPointImageDim,
                                                                                               controlPointVoxelSpacing,
                                                                                               (unsigned)activeVoxelNumber,
+                                                                                              composition,
                                                                                               bspline);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
@@ -59,10 +69,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
         reg_spline_getDeformationField2D<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
                                                                                               *controlPointTexture,
                                                                                               *maskTexture,
+                                                                                              referenceMatrix.data().get(),
                                                                                               referenceImageDim,
                                                                                               controlPointImageDim,
                                                                                               controlPointVoxelSpacing,
                                                                                               (unsigned)activeVoxelNumber,
+                                                                                              composition,
                                                                                               bspline);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
@@ -527,13 +539,13 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
 
     // Copy over the number of required squaring steps
     // The initial flow field is generated using cubic B-Spline interpolation/approximation
-    // TODO Composition is needed
     reg_spline_getDeformationField_gpu(velocityFieldGrid,
                                        flowField,
                                        velocityFieldGridCuda,
                                        flowFieldCuda,
                                        maskCuda,
                                        activeVoxelNumber,
+                                       true,  // composition
                                        true); // bspline
 
     velocityFieldGrid->num_ext = oldNumExt;
@@ -675,6 +687,7 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
                                            deformationFieldCuda,
                                            maskCuda.data().get(),
                                            voxelNumber,
+                                           false, // composition
                                            true); // bspline
     } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index 0c0e80a7..d3432ca1 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -21,6 +21,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         float4 *deformationFieldCuda,
                                         const int *maskCuda,
                                         const size_t activeVoxelNumber,
+                                        const bool composition,
                                         const bool bspline);
 /* *************************************************************** */
 float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index a95f4bba..05644a08 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -256,23 +256,22 @@ __device__ float4 GetSlidedValues(int x, int y,
                                   const int3& referenceImageDim,
                                   const mat44& affineMatrix) {
     int newX = x;
-    int newY = y;
-    if (x < 0) {
+    if (x < 0)
         newX = 0;
-    } else if (x >= referenceImageDim.x) {
+    else if (x >= referenceImageDim.x)
         newX = referenceImageDim.x - 1;
-    }
-    if (y < 0) {
+
+    int newY = y;
+    if (y < 0)
         newY = 0;
-    } else if (y >= referenceImageDim.y) {
+    else if (y >= referenceImageDim.y)
         newY = referenceImageDim.y - 1;
-    }
 
     x -= newX;
     y -= newY;
-    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
-                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
-                                            0.f, 0.f);
+    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
+                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
+                                             0.f, 0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
@@ -281,177 +280,215 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
                                   const int3& referenceImageDim,
                                   const mat44& affineMatrix) {
     int newX = x;
-    int newY = y;
-    int newZ = z;
-    if (x < 0) {
+    if (x < 0)
         newX = 0;
-    } else if (x >= referenceImageDim.x) {
+    else if (x >= referenceImageDim.x)
         newX = referenceImageDim.x - 1;
-    }
-    if (y < 0) {
+
+    int newY = y;
+    if (y < 0)
         newY = 0;
-    } else if (y >= referenceImageDim.y) {
+    else if (y >= referenceImageDim.y)
         newY = referenceImageDim.y - 1;
-    }
-    if (z < 0) {
+
+    int newZ = z;
+    if (z < 0)
         newZ = 0;
-    } else if (z >= referenceImageDim.z) {
+    else if (z >= referenceImageDim.z)
         newZ = referenceImageDim.z - 1;
-    }
 
     x -= newX;
     y -= newY;
     z -= newZ;
-    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
-                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
-                                            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
-                                            0.f);
+    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
+                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
+                                             x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
+                                             0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
 __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
                                                  cudaTextureObject_t controlPointTexture,
                                                  cudaTextureObject_t maskTexture,
+                                                 const mat44 *referenceMatrix,
                                                  const int3 referenceImageDim,
                                                  const int3 controlPointImageDim,
                                                  const float3 controlPointVoxelSpacing,
                                                  const unsigned activeVoxelNumber,
+                                                 const bool composition,
                                                  const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(tid2, referenceImageDim.x * referenceImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
+    if (tid >= activeVoxelNumber) return;
+    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+    const auto&& [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
+    int3 nodePre;
+    float3 basis;
+
+    if (composition) { // Composition of deformation fields
+        // The previous position at the current pixel position is read
+        const float4 node = deformationField[tid];
+
+        // From real to pixel position in the CPP
+        const float xVoxel = (referenceMatrix->m[0][0] * node.x +
+                              referenceMatrix->m[0][1] * node.y +
+                              referenceMatrix->m[0][2] * node.z +
+                              referenceMatrix->m[0][3]);
+        const float yVoxel = (referenceMatrix->m[1][0] * node.x +
+                              referenceMatrix->m[1][1] * node.y +
+                              referenceMatrix->m[1][2] * node.z +
+                              referenceMatrix->m[1][3]);
+        const float zVoxel = (referenceMatrix->m[2][0] * node.x +
+                              referenceMatrix->m[2][1] * node.y +
+                              referenceMatrix->m[2][2] * node.z +
+                              referenceMatrix->m[2][3]);
+
+        if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
+            yVoxel < 0 || yVoxel >= referenceImageDim.y ||
+            zVoxel < 0 || zVoxel >= referenceImageDim.z) return;
+
+        nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
+        basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
+    } else { // starting deformation field is blank - !composition
         // The "nearest previous" node is determined [0,0,0]
-        const int3 nodeAnte = {
-            int((float)x / controlPointVoxelSpacing.x),
-            int((float)y / controlPointVoxelSpacing.y),
-            int((float)z / controlPointVoxelSpacing.z)
-        };
-
-        // Z basis values
-        extern __shared__ float yBasis[];   // Shared memory
-        const unsigned sharedMemIndex = 4 * threadIdx.x;
-        // Compute the shared memory offset which corresponds to four times the number of threads per block
-        float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
-        float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z;
-        if (relative < 0) relative = 0; // rounding error
-        if (bspline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]);
-        else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]);
-
-        // Y basis values
-        relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y;
-        if (relative < 0) relative = 0; // rounding error
-        if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
-        else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
-
-        // X basis values
-        float xBasis[4];
-        relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x;
-        if (relative < 0) relative = 0; // rounding error
-        if (bspline) GetBasisBSplineValues(relative, xBasis);
-        else GetBasisSplineValues(relative, xBasis);
-
-        float4 displacement{};
-        for (int c = 0; c < 4; c++) {
-            float3 tempDisplacement{};
-            int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x;
-            for (int b = 0; b < 4; b++) {
-                int indexXYZ = indexYZ + nodeAnte.x;
-                const float4 nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-                const float4 nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
-
-                const float& basis = yBasis[sharedMemIndex + b];
-                tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
-                                               nodeCoefficientB.x * xBasis[1] +
-                                               nodeCoefficientC.x * xBasis[2] +
-                                               nodeCoefficientD.x * xBasis[3]);
-
-                tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
-                                               nodeCoefficientB.y * xBasis[1] +
-                                               nodeCoefficientC.y * xBasis[2] +
-                                               nodeCoefficientD.y * xBasis[3]);
-
-                tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] +
-                                               nodeCoefficientB.z * xBasis[1] +
-                                               nodeCoefficientC.z * xBasis[2] +
-                                               nodeCoefficientD.z * xBasis[3]);
-
-                indexYZ += controlPointImageDim.x;
-            }
+        const float xVoxel = float(x) / controlPointVoxelSpacing.x;
+        const float yVoxel = float(y) / controlPointVoxelSpacing.y;
+        const float zVoxel = float(z) / controlPointVoxelSpacing.z;
+        nodePre = { int(xVoxel), int(yVoxel), int(zVoxel) };
+        basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y), zVoxel - float(nodePre.z) };
+    }
+    // Z basis values
+    extern __shared__ float yBasis[];   // Shared memory
+    const unsigned sharedMemIndex = 4 * threadIdx.x;
+    // Compute the shared memory offset which corresponds to four times the number of threads per block
+    float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
+    if (basis.z < 0) basis.z = 0; // rounding error
+    if (bspline) GetBasisBSplineValues(basis.z, &zBasis[sharedMemIndex]);
+    else GetBasisSplineValues(basis.z, &zBasis[sharedMemIndex]);
+
+    // Y basis values
+    if (basis.y < 0) basis.y = 0; // rounding error
+    if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]);
+    else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]);
+
+    // X basis values
+    float xBasis[4];
+    if (basis.x < 0) basis.x = 0; // rounding error
+    if (bspline) GetBasisBSplineValues(basis.x, xBasis);
+    else GetBasisSplineValues(basis.x, xBasis);
+
+    float4 displacement{};
+    for (int c = 0; c < 4; c++) {
+        float3 tempDisplacement{};
+        int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x;
+        for (int b = 0; b < 4; b++) {
+            int indexXYZ = indexYZ + nodePre.x;
+            const float4& nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+            const float4& nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+            const float4& nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
+            const float4& nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
 
-            const float& basis = zBasis[sharedMemIndex + c];
-            displacement.x += basis * tempDisplacement.x;
-            displacement.y += basis * tempDisplacement.y;
-            displacement.z += basis * tempDisplacement.z;
+            const float& basis = yBasis[sharedMemIndex + b];
+            tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
+                                           nodeCoefficientB.x * xBasis[1] +
+                                           nodeCoefficientC.x * xBasis[2] +
+                                           nodeCoefficientD.x * xBasis[3]);
+
+            tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
+                                           nodeCoefficientB.y * xBasis[1] +
+                                           nodeCoefficientC.y * xBasis[2] +
+                                           nodeCoefficientD.y * xBasis[3]);
+
+            tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] +
+                                           nodeCoefficientB.z * xBasis[1] +
+                                           nodeCoefficientC.z * xBasis[2] +
+                                           nodeCoefficientD.z * xBasis[3]);
+
+            indexYZ += controlPointImageDim.x;
         }
 
-        deformationField[tid] = displacement;
+        const float& basis = zBasis[sharedMemIndex + c];
+        displacement.x += basis * tempDisplacement.x;
+        displacement.y += basis * tempDisplacement.y;
+        displacement.z += basis * tempDisplacement.z;
     }
+    deformationField[tid] = displacement;
 }
 /* *************************************************************** */
 __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
                                                  cudaTextureObject_t controlPointTexture,
                                                  cudaTextureObject_t maskTexture,
+                                                 const mat44 *referenceMatrix,
                                                  const int3 referenceImageDim,
                                                  const int3 controlPointImageDim,
                                                  const float3 controlPointVoxelSpacing,
                                                  const unsigned activeVoxelNumber,
+                                                 const bool composition,
                                                  const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(tid2, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
+    if (tid >= activeVoxelNumber) return;
+    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+    const auto&& [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
+    int2 nodePre;
+    float2 basis;
+
+    if (composition) { // Composition of deformation fields
+        // The previous position at the current pixel position is read
+        const float4 node = deformationField[tid];
+
+        // From real to pixel position in the CPP
+        const float xVoxel = (referenceMatrix->m[0][0] * node.x +
+                              referenceMatrix->m[0][1] * node.y +
+                              referenceMatrix->m[0][3]);
+        const float yVoxel = (referenceMatrix->m[1][0] * node.x +
+                              referenceMatrix->m[1][1] * node.y +
+                              referenceMatrix->m[1][3]);
+
+        if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
+            yVoxel < 0 || yVoxel >= referenceImageDim.y) return;
+
+        nodePre = { Floor(xVoxel), Floor(yVoxel) };
+        basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
+    } else { // starting deformation field is blank - !composition
         // The "nearest previous" node is determined [0,0,0]
-        const int2 nodeAnte = { int((float)x / controlPointVoxelSpacing.x), int((float)y / controlPointVoxelSpacing.y) };
-
-        // Y basis values
-        extern __shared__ float yBasis[];   // Shared memory
-        const unsigned sharedMemIndex = 4 * threadIdx.x;
-        float relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y;
-        if (relative < 0) relative = 0; // rounding error
-        if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]);
-        else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]);
-
-        // X basis values
-        float xBasis[4];
-        relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x;
-        if (relative < 0) relative = 0; // rounding error
-        if (bspline) GetBasisBSplineValues(relative, xBasis);
-        else GetBasisSplineValues(relative, xBasis);
-
-        float4 displacement{};
-        for (int b = 0; b < 4; b++) {
-            int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
-
-            const float4 nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, index++);
-            const float4 nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, index++);
-            const float4 nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, index++);
-            const float4 nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, index);
-
-            const float& basis = yBasis[sharedMemIndex + b];
-            displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
-                                       nodeCoefficientB.x * xBasis[1] +
-                                       nodeCoefficientC.x * xBasis[2] +
-                                       nodeCoefficientD.x * xBasis[3]);
-
-            displacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
-                                       nodeCoefficientB.y * xBasis[1] +
-                                       nodeCoefficientC.y * xBasis[2] +
-                                       nodeCoefficientD.y * xBasis[3]);
-        }
-
-        deformationField[tid] = displacement;
+        const float xVoxel = float(x) / controlPointVoxelSpacing.x;
+        const float yVoxel = float(y) / controlPointVoxelSpacing.y;
+        nodePre = { int(xVoxel), int(yVoxel) };
+        basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y) };
+    }
+    // Y basis values
+    extern __shared__ float yBasis[]; // Shared memory
+    const unsigned sharedMemIndex = 4 * threadIdx.x;
+    if (basis.y < 0) basis.y = 0; // rounding error
+    if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]);
+    else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]);
+
+    // X basis values
+    float xBasis[4];
+    if (basis.x < 0) basis.x = 0; // rounding error
+    if (bspline) GetBasisBSplineValues(basis.x, xBasis);
+    else GetBasisSplineValues(basis.x, xBasis);
+
+    float4 displacement{};
+    for (int b = 0; b < 4; b++) {
+        int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
+
+        const float4& nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, index++);
+        const float4& nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, index++);
+        const float4& nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, index++);
+        const float4& nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, index);
+
+        const float& basis = yBasis[sharedMemIndex + b];
+        displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
+                                   nodeCoefficientB.x * xBasis[1] +
+                                   nodeCoefficientC.x * xBasis[2] +
+                                   nodeCoefficientD.x * xBasis[3]);
+
+        displacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
+                                   nodeCoefficientB.y * xBasis[1] +
+                                   nodeCoefficientC.y * xBasis[2] +
+                                   nodeCoefficientD.y * xBasis[3]);
     }
+    deformationField[tid] = displacement;
 }
 /* *************************************************************** */
 __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues,
@@ -865,19 +902,19 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
         const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
-        const int2 nodeAnte = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) };
+        const int2 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) };
 
         float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
 
-        relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x);
+        relative = fabsf((float)x / controlPointSpacing.x - (float)nodePre.x);
         GetFirstBSplineValues(relative, xBasis, xFirst);
 
-        relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y);
+        relative = fabsf((float)y / controlPointSpacing.y - (float)nodePre.y);
         GetFirstBSplineValues(relative, yBasis, yFirst);
 
         float2 tx{}, ty{};
         for (int b = 0; b < 4; ++b) {
-            int indexXY = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
+            int indexXY = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
 
             float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY++);
             float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]);
@@ -936,7 +973,7 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
         const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
-        const int3 nodeAnte = {
+        const int3 nodePre = {
             Floor((float)x / controlPointSpacing.x),
             Floor((float)y / controlPointSpacing.y),
             Floor((float)z / controlPointSpacing.z)
@@ -948,19 +985,19 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
         float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative;
         const unsigned sharedMemIndex = 4 * threadIdx.x;
 
-        relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x);
+        relative = fabsf((float)x / controlPointSpacing.x - (float)nodePre.x);
         GetFirstBSplineValues(relative, xBasis, xFirst);
 
-        relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y);
+        relative = fabsf((float)y / controlPointSpacing.y - (float)nodePre.y);
         GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]);
 
-        relative = fabsf((float)z / controlPointSpacing.z - (float)nodeAnte.z);
+        relative = fabsf((float)z / controlPointSpacing.z - (float)nodePre.z);
         GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]);
 
         float3 tx{}, ty{}, tz{};
         for (int c = 0; c < 4; ++c) {
             for (int b = 0; b < 4; ++b) {
-                int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x;
+                int indexXYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y + b) * controlPointImageDim.x + nodePre.x;
                 float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] };
 
                 float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
@@ -1644,7 +1681,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                                                  const int3& cppDims,
                                                  const Basis& basis,
                                                  const mat33& reorientation) {
-    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
         (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
@@ -1721,7 +1758,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
                                                              const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (index >= voxelNumber) return;
-    const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims);
+    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     auto gradVal = transGradient[index];
 
     if constexpr (is3d) {
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 444e6025..c49a1a24 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -141,7 +141,7 @@ class GetDeformationFieldTest {
         reg_getDeformationFromDisplacement(controlPointGrid3d);
         testDataComp.emplace_back(TestDataComp(
             "2D Composition ID",
-            reference3d,
+            reference2d,
             controlPointGrid2d,
             defField2d,
             expDefField2d
@@ -167,7 +167,7 @@ class GetDeformationFieldTest {
             defField3dPtr[i] /= 1.1f;
         testDataComp.emplace_back(TestDataComp(
             "2D Composition Scaling",
-            reference3d,
+            reference2d,
             controlPointGrid2d,
             defField2d,
             expDefField2d
@@ -181,7 +181,7 @@ class GetDeformationFieldTest {
         ));
 
         for (auto&& data : testDataComp) {
-            for (auto&& platformType : { PlatformType::Cpu }) { // Test only on CPU
+            for (auto&& platformType : PlatformTypes) {
                 unique_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 // Make a copy of the test data

From 563a84291a97fe4b939184966d28e7dbfc41c590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 18 Oct 2023 15:17:25 +0100
Subject: [PATCH 225/314] Refactorisations

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/Compute.cpp                           |   4 +-
 reg-lib/Compute.h                             |   2 +-
 reg-lib/cpu/_reg_kld.cpp                      |   4 +-
 reg-lib/cpu/_reg_mind.cpp                     |  14 +--
 reg-lib/cpu/_reg_nmi.cpp                      |   4 +-
 reg-lib/cpu/_reg_nmi.h                        |   2 +-
 reg-lib/cpu/_reg_resampling.cpp               | 106 +++++++++---------
 reg-lib/cpu/_reg_resampling.h                 |  26 ++---
 reg-lib/cpu/_reg_splineBasis.cpp              |  55 ++++-----
 reg-lib/cpu/_reg_ssd.cpp                      |  12 +-
 reg-lib/cpu/_reg_ssd.h                        |   4 +-
 reg-lib/cuda/CudaCompute.cu                   |   6 +-
 reg-lib/cuda/CudaCompute.h                    |   2 +-
 reg-lib/cuda/CudaContent.cpp                  |   1 +
 reg-lib/cuda/CudaF3dContent.cpp               |   2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  |  10 +-
 .../cuda/_reg_localTransformation_kernels.cu  |  40 +++----
 reg-lib/cuda/_reg_nmi_gpu.cu                  |  19 ++--
 reg-lib/cuda/_reg_resampling_gpu.cu           |  17 ++-
 reg-lib/cuda/_reg_resampling_gpu.h            |  10 +-
 reg-lib/cuda/_reg_resampling_kernels.cu       |  18 ++-
 reg-test/reg_test_common.h                    |   5 +-
 reg-test/reg_test_nmi.cpp                     |   8 +-
 reg-test/reg_test_nmi_gradient.cpp            |  18 ++-
 reg-test/reg_test_regr_measure.cpp            |   6 +-
 26 files changed, 209 insertions(+), 188 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fe2cd8b0..4772052f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-343
+344
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 42fa2ed1..64e73969 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -5,12 +5,12 @@
 #include "_reg_localTrans_regul.h"
 
 /* *************************************************************** */
-void Compute::ResampleImage(int inter, float paddingValue) {
+void Compute::ResampleImage(int interpolation, float paddingValue) {
     reg_resampleImage(con.GetFloating(),
                       con.GetWarped(),
                       con.GetDeformationField(),
                       con.GetReferenceMask(),
-                      inter,
+                      interpolation,
                       paddingValue);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index a810ceaf..a4137f5b 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -8,7 +8,7 @@ class Compute {
     Compute() = delete;
     Compute(Content& conIn): con(conIn) {}
 
-    virtual void ResampleImage(int inter, float paddingValue);
+    virtual void ResampleImage(int interpolation, float paddingValue);
     virtual double GetJacobianPenaltyTerm(bool approx);
     virtual void JacobianPenaltyTermGradient(float weight, bool approx);
     virtual double CorrectFolding(bool approx);
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index c202d0a5..cf3f5deb 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -173,8 +173,8 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                                            nifti_image *measureGradient,
                                            const nifti_image *jacobianDetImg,
                                            const int *mask,
-                                           const int& currentTimepoint,
-                                           const double& timepointWeight) {
+                                           const int currentTimepoint,
+                                           const double timepointWeight) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index b620e9e6..375bc917 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -399,9 +399,9 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                  const double *timePointWeight,
                                  double *timePointWeightDescriptor,
                                  nifti_image *jacobianDetImage,
-                                 const int& descriptorOffset,
-                                 const int& referenceTimePoint,
-                                 const int& mindType) {
+                                 const int descriptorOffset,
+                                 const int referenceTimePoint,
+                                 const int mindType) {
     if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 &&
         referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64)
         NR_FATAL_ERROR("The reference image descriptor is expected to be of floating precision type");
@@ -469,10 +469,10 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                             nifti_image *warpedGradient,
                                             nifti_image *warpedFloatingImageDescriptor,
                                             nifti_image *voxelBasedGradient,
-                                            const int& mindType,
-                                            const int& descriptorOffset,
-                                            const int& descriptorNumber,
-                                            const int& currentTimepoint) {
+                                            const int mindType,
+                                            const int descriptorOffset,
+                                            const int descriptorNumber,
+                                            const int currentTimepoint) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     vector<int> combinedMask(referenceMask, referenceMask + voxelNumber);
     reg_tools_removeNanFromMask(referenceImage, combinedMask.data());
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 19e001d6..240c9e3d 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -376,7 +376,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  double **entropyValues,
                                  const int *referenceMask,
                                  const int referenceTimePoint,
-                                 const bool approximatePW) {
+                                 const bool approximation) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         reg_getNMIValue<RefImgDataType>(referenceImage,
@@ -389,7 +389,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                         jointHistogramPro,
                                         entropyValues,
                                         referenceMask,
-                                        approximatePW);
+                                        approximation);
     }, NiftiImage::getFloatingDataType(referenceImage));
 
     double nmi = 0;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 41040e48..063bf8f8 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -98,7 +98,7 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                      double **jointHistogramPro,
                      double **entropyValues,
                      const int *referenceMask,
-                     const bool approximation=true);
+                     const bool approximation);
 /* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 48c89449..4b316d95 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -122,8 +122,8 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
         memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType));
         NR_DEBUG("The floating image data has been copied");
 
-        /* As the tensor has 6 unique components that we need to worry about, read them out
-      for the floating image. */
+        // As the tensor has 6 unique components that we need to worry about, read them out
+        // for the floating image.
         DataType *firstVox = static_cast<DataType*>(floatingImage->data);
         // CAUTION: Here the tensor is assumed to be encoding in lower triangular order
         DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber * dtIndicies[0]];
@@ -318,8 +318,8 @@ void ResampleImage3D(const nifti_image *floatingImage,
                      const nifti_image *deformationField,
                      nifti_image *warpedImage,
                      const int *mask,
-                     const FieldType& paddingValue,
-                     const int& kernel) {
+                     const FieldType paddingValue,
+                     const int kernel) {
 #ifdef _WIN32
     long  index;
     const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
@@ -499,8 +499,8 @@ void ResampleImage2D(const nifti_image *floatingImage,
                      const nifti_image *deformationField,
                      nifti_image *warpedImage,
                      const int *mask,
-                     const FieldType& paddingValue,
-                     const int& kernel) {
+                     const FieldType paddingValue,
+                     const int kernel) {
 #ifdef _WIN32
     long  index;
     const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2);
@@ -640,7 +640,7 @@ void ResampleImage2D(const nifti_image *floatingImage,
  * a deformation field. The affine transformation has to be in
  * real coordinate and the deformation field is in mm in the space
  * of the reference image.
- * interp can be either 0, 1 or 3 meaning nearest neighbor, linear
+ * interpolation can be either 0, 1 or 3 meaning nearest neighbor, linear
  * or cubic spline interpolation.
  * every voxel which is not fully in the floating image takes the
  * backgreg_round value. The dtIndicies are an array of size 6
@@ -652,8 +652,8 @@ void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
                        const nifti_image *deformationFieldImage,
                        const int *mask,
-                       const int& interp,
-                       const FieldType& paddingValue,
+                       const int interpolation,
+                       const FieldType paddingValue,
                        const int *dtIndicies,
                        const mat33 *jacMat) {
     // The floating image data is copied in case one deal with DTI
@@ -668,14 +668,14 @@ void reg_resampleImage(nifti_image *floatingImage,
                                                  warpedImage,
                                                  mask,
                                                  paddingValue,
-                                                 interp);
+                                                 interpolation);
     } else {
         ResampleImage2D<FloatingType, FieldType>(floatingImage,
                                                  deformationFieldImage,
                                                  warpedImage,
                                                  mask,
                                                  paddingValue,
-                                                 interp);
+                                                 interpolation);
     }
     // The temporary logged floating array is deleted and the original restored
     if (originalFloatingData != nullptr) {
@@ -692,8 +692,8 @@ void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
                        const nifti_image *deformationField,
                        const int *mask,
-                       const int& interp,
-                       const float& paddingValue,
+                       const int interpolation,
+                       const float paddingValue,
                        const bool *dtiTimepoint,
                        const mat33 *jacMat) {
     if (floatingImage->datatype != warpedImage->datatype)
@@ -733,7 +733,7 @@ void reg_resampleImage(nifti_image *floatingImage,
                                                             warpedImage,
                                                             deformationField,
                                                             mask,
-                                                            interp,
+                                                            interpolation,
                                                             paddingValue,
                                                             dtIndicies,
                                                             jacMat);
@@ -748,8 +748,8 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                               const nifti_image *deformationField,
                               nifti_image *warpedImage,
                               const int *mask,
-                              const FieldType& paddingValue,
-                              const int& kernel) {
+                              const FieldType paddingValue,
+                              const int kernel) {
 #ifdef _WIN32
     long index;
     const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
@@ -1017,10 +1017,10 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                          const nifti_image *deformationField,
                          nifti_image *warpedImage,
                          const int *mask,
-                         const FieldType& paddingValue,
-                         const int& kernel,
+                         const FieldType paddingValue,
+                         const int kernel,
                          const mat33 *jacMat,
-                         const char& algorithm) {
+                         const char algorithm) {
 #ifdef _WIN32
     long index;
     const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3);
@@ -1433,10 +1433,10 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                             nifti_image *warpedImage,
                             const nifti_image *deformationFieldImage,
                             const int *mask,
-                            const int& interp,
-                            const FieldType& paddingValue,
+                            const int interpolation,
+                            const FieldType paddingValue,
                             const mat33 *jacMat,
-                            const char& algorithm) {
+                            const char algorithm) {
     // The deformation field contains the position in the real world
     if (deformationFieldImage->nu > 2) {
         if (algorithm == 2) {
@@ -1446,7 +1446,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                                                               warpedImage,
                                                               mask,
                                                               paddingValue,
-                                                              interp);
+                                                              interpolation);
         } else {
             NR_DEBUG("Running ResampleImage3D_PSF");
             ResampleImage3D_PSF<FloatingType, FieldType>(floatingImage,
@@ -1454,7 +1454,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                                                          warpedImage,
                                                          mask,
                                                          paddingValue,
-                                                         interp,
+                                                         interpolation,
                                                          jacMat,
                                                          algorithm);
         }
@@ -1467,10 +1467,10 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            nifti_image *warpedImage,
                            const nifti_image *deformationField,
                            const int *mask,
-                           const int& interp,
-                           const float& paddingValue,
+                           const int interpolation,
+                           const float paddingValue,
                            const mat33 *jacMat,
-                           const char& algorithm) {
+                           const char algorithm) {
     if (floatingImage->datatype != warpedImage->datatype)
         NR_FATAL_ERROR("The floating and warped image should have the same data type");
     if (floatingImage->nt != warpedImage->nt)
@@ -1493,7 +1493,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                                                                 warpedImage,
                                                                 deformationField,
                                                                 mask,
-                                                                interp,
+                                                                interpolation,
                                                                 paddingValue,
                                                                 jacMat,
                                                                 algorithm);
@@ -1507,7 +1507,7 @@ template <class DataType>
 void reg_bilinearResampleGradient(const nifti_image *floatingImage,
                                   nifti_image *warpedImage,
                                   const nifti_image *deformationField,
-                                  const float& paddingValue) {
+                                  const float paddingValue) {
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
     const DataType *floatingIntensityX = static_cast<DataType*>(floatingImage->data);
@@ -1672,7 +1672,7 @@ template <class DataType>
 void reg_trilinearResampleGradient(const nifti_image *floatingImage,
                                    nifti_image *warpedImage,
                                    const nifti_image *deformationField,
-                                   const float& paddingValue) {
+                                   const float paddingValue) {
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
     const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
@@ -1893,9 +1893,9 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage,
 void reg_resampleGradient(const nifti_image *floatingImage,
                           nifti_image *warpedImage,
                           const nifti_image *deformationField,
-                          const int& interp,
-                          const float& paddingValue) {
-    if (interp != 1)
+                          const int interpolation,
+                          const float paddingValue) {
+    if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported");
     if (floatingImage->datatype != warpedImage->datatype || floatingImage->datatype != deformationField->datatype)
         NR_FATAL_ERROR("Input images are expected to have the same type");
@@ -1923,8 +1923,8 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
                             const nifti_image *deformationField,
                             nifti_image *warpedGradient,
                             const int *mask,
-                            const float& paddingValue,
-                            const int& activeTimepoint) {
+                            const float paddingValue,
+                            const int activeTimepoint) {
     if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
         NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
@@ -2092,8 +2092,8 @@ void BilinearImageGradient(const nifti_image *floatingImage,
                            const nifti_image *deformationField,
                            nifti_image *warpedGradient,
                            const int *mask,
-                           const float& paddingValue,
-                           const int& activeTimepoint) {
+                           const float paddingValue,
+                           const int activeTimepoint) {
     if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
         NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
@@ -2202,8 +2202,8 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
                                 const nifti_image *deformationField,
                                 nifti_image *warpedGradient,
                                 const int *mask,
-                                const float& paddingValue,
-                                const int& activeTimepoint) {
+                                const float paddingValue,
+                                const int activeTimepoint) {
     if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
         NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
@@ -2343,8 +2343,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
                                 const nifti_image *deformationField,
                                 nifti_image *warpedGradient,
                                 const int *mask,
-                                const float& paddingValue,
-                                const int& activeTimepoint) {
+                                const float paddingValue,
+                                const int activeTimepoint) {
     if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
         NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
 #ifdef _WIN32
@@ -2453,9 +2453,9 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
                           const int *mask,
-                          const int& interp,
-                          const float& paddingValue,
-                          const int& activeTimepoint,
+                          const int interpolation,
+                          const float paddingValue,
+                          const int activeTimepoint,
                           const int *dtIndicies,
                           const mat33 *jacMat,
                           const nifti_image *warpedImage = nullptr) {
@@ -2464,7 +2464,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
     // The DTI are logged
     reg_dti_resampling_preprocessing<FloatingType>(floatingImage, &originalFloatingData, dtIndicies);
     /* The deformation field contains the position in the real world */
-    if (interp == 3) {
+    if (interpolation == 3) {
         if (deformationField->nu > 2) {
             CubicSplineImageGradient3D<FloatingType, GradientType, FieldType>(floatingImage,
                                                                               deformationField,
@@ -2511,9 +2511,9 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
                           const int *mask,
-                          const int& interp,
-                          const float& paddingValue,
-                          const int& activeTimepoint,
+                          const int interpolation,
+                          const float paddingValue,
+                          const int activeTimepoint,
                           const bool *dtiTimepoint,
                           const mat33 *jacMat,
                           const nifti_image *warpedImage) {
@@ -2553,7 +2553,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
                                                                                    warpedGradient,
                                                                                    deformationField,
                                                                                    mask,
-                                                                                   interp,
+                                                                                   interpolation,
                                                                                    paddingValue,
                                                                                    activeTimepoint,
                                                                                    dtIndicies,
@@ -2569,8 +2569,8 @@ template<class DataType>
 void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
-                                  const float& paddingValue,
-                                  const int& timepoint) {
+                                  const float paddingValue,
+                                  const int timepoint) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
 
     int dimImg = img->nz > 1 ? 3 : 2;
@@ -2630,8 +2630,8 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
 void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
-                                  const float& paddingValue,
-                                  const int& timepoint) {
+                                  const float paddingValue,
+                                  const int timepoint) {
     if (img->datatype != gradImg->datatype)
         NR_FATAL_ERROR("Input images are expected to be of the same type");
     if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64)
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index ab39078f..04b59979 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -26,7 +26,7 @@
  * @param deformationField Vector field image that contains the dense correspondences
  * @param mask Array that contains information about the mask. Only voxel with mask value different
  * from zero are being considered. If nullptr, all voxels are considered
- * @param interp Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic
+ * @param interpolation Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic
  * interpolation
  * @param paddingValue Value to be used for padding when the correspondences are outside of the
  * reference image space.
@@ -37,8 +37,8 @@ void reg_resampleImage(nifti_image *floatingImage,
                        nifti_image *warpedImage,
                        const nifti_image *deformationField,
                        const int *mask,
-                       const int& interp,
-                       const float& paddingValue,
+                       const int interpolation,
+                       const float paddingValue,
                        const bool *dtiTimepoint = nullptr,
                        const mat33 *jacMat = nullptr);
 /* *************************************************************** */
@@ -46,24 +46,24 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage,
                            nifti_image *warpedImage,
                            const nifti_image *deformationField,
                            const int *mask,
-                           const int& interp,
-                           const float& paddingValue,
+                           const int interpolation,
+                           const float paddingValue,
                            const mat33 *jacMat,
-                           const char& algorithm);
+                           const char algorithm);
 /* *************************************************************** */
 void reg_resampleGradient(const nifti_image *gradientImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
-                          const int& interp,
-                          const float& paddingValue);
+                          const int interpolation,
+                          const float paddingValue);
 /* *************************************************************** */
 void reg_getImageGradient(nifti_image *floatingImage,
                           nifti_image *warpedGradient,
                           const nifti_image *deformationField,
                           const int *mask,
-                          const int& interp,
-                          const float& paddingValue,
-                          const int& activeTimepoint,
+                          const int interpolation,
+                          const float paddingValue,
+                          const int activeTimepoint,
                           const bool *dtiTimepoint = nullptr,
                           const mat33 *jacMat = nullptr,
                           const nifti_image *warpedImage = nullptr);
@@ -71,8 +71,8 @@ void reg_getImageGradient(nifti_image *floatingImage,
 void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
-                                  const float& paddingValue,
-                                  const int& timepoint);
+                                  const float paddingValue,
+                                  const int timepoint);
 /* *************************************************************** */
 nifti_image* reg_makeIsotropic(nifti_image*, int);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp
index 244bf4c0..ee01efbb 100755
--- a/reg-lib/cpu/_reg_splineBasis.cpp
+++ b/reg-lib/cpu/_reg_splineBasis.cpp
@@ -158,11 +158,10 @@ template <class DataType>
 void set_first_order_basis_values(DataType *basisX, DataType *basisY) {
     double BASIS[4], FIRST[4]; get_BSplineBasisValues<double>(0, BASIS, FIRST);
     int index = 0;
-    for (int y = 0; y < 3; ++y) {
-        for (int x = 0; x < 3; ++x) {
-            basisX[index] = FIRST[x] * BASIS[y];
-            basisY[index] = BASIS[x] * FIRST[y];
-            index++;
+    for (int y = 0; y < 3; y++) {
+        for (int x = 0; x < 3; x++, index++) {
+            basisX[index] = static_cast<DataType>(FIRST[x] * BASIS[y]);
+            basisY[index] = static_cast<DataType>(BASIS[x] * FIRST[y]);
         }
     }
 }
@@ -464,7 +463,7 @@ void get_SlidedValues(DataType& defX,
                       const int y,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
-                      const mat44 *dfVoxel2Real,
+                      const mat44 *dfVoxelToReal,
                       const int *dim,
                       const bool displacement) {
     int newX = x;
@@ -484,8 +483,8 @@ void get_SlidedValues(DataType& defX,
     if (!displacement) {
         const int shiftIndexX = x - newX;
         const int shiftIndexY = y - newY;
-        shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1];
-        shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1];
+        shiftValueX = shiftIndexX * dfVoxelToReal->m[0][0] + shiftIndexY * dfVoxelToReal->m[0][1];
+        shiftValueY = shiftIndexX * dfVoxelToReal->m[1][0] + shiftIndexY * dfVoxelToReal->m[1][1];
     }
     const int index = newY * dim[1] + newX;
     defX = defPtrX[index] + shiftValueX;
@@ -504,7 +503,7 @@ void get_SlidedValues(DataType& defX,
                       const DataType *defPtrX,
                       const DataType *defPtrY,
                       const DataType *defPtrZ,
-                      const mat44 *dfVoxel2Real,
+                      const mat44 *dfVoxelToReal,
                       const int *dim,
                       const bool displacement) {
     int newX = x;
@@ -533,17 +532,17 @@ void get_SlidedValues(DataType& defX,
         const int shiftIndexY = y - newY;
         const int shiftIndexZ = z - newZ;
         shiftValueX =
-            shiftIndexX * dfVoxel2Real->m[0][0] +
-            shiftIndexY * dfVoxel2Real->m[0][1] +
-            shiftIndexZ * dfVoxel2Real->m[0][2];
+            shiftIndexX * dfVoxelToReal->m[0][0] +
+            shiftIndexY * dfVoxelToReal->m[0][1] +
+            shiftIndexZ * dfVoxelToReal->m[0][2];
         shiftValueY =
-            shiftIndexX * dfVoxel2Real->m[1][0] +
-            shiftIndexY * dfVoxel2Real->m[1][1] +
-            shiftIndexZ * dfVoxel2Real->m[1][2];
+            shiftIndexX * dfVoxelToReal->m[1][0] +
+            shiftIndexY * dfVoxelToReal->m[1][1] +
+            shiftIndexZ * dfVoxelToReal->m[1][2];
         shiftValueZ =
-            shiftIndexX * dfVoxel2Real->m[2][0] +
-            shiftIndexY * dfVoxel2Real->m[2][1] +
-            shiftIndexZ * dfVoxel2Real->m[2][2];
+            shiftIndexX * dfVoxelToReal->m[2][0] +
+            shiftIndexY * dfVoxelToReal->m[2][1] +
+            shiftIndexZ * dfVoxelToReal->m[2][2];
     }
     const int index = (newZ * dim[2] + newY) * dim[1] + newX;
     defX = defPtrX[index] + shiftValueX;
@@ -570,10 +569,7 @@ void get_GridValues(int startX,
     size_t coord = 0;
     DataType *xxPtr = nullptr, *yyPtr = nullptr;
 
-    mat44 *voxel2realMatrix = nullptr;
-    if (splineControlPoint->sform_code > 0)
-        voxel2realMatrix = &splineControlPoint->sto_xyz;
-    else voxel2realMatrix = &splineControlPoint->qto_xyz;
+    const mat44 *voxelToReal = splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_xyz : &splineControlPoint->qto_xyz;
 
     for (int Y = startY; Y < startY + range; Y++) {
         bool out = false;
@@ -582,7 +578,7 @@ void get_GridValues(int startX,
             xxPtr = &splineX[index];
             yyPtr = &splineY[index];
         } else out = true;
-        for (int X = startX; X < startX + range; X++) {
+        for (int X = startX; X < startX + range; X++, coord++) {
             if (X > -1 && X < splineControlPoint->nx && out == false) {
                 dispX[coord] = xxPtr[X];
                 dispY[coord] = yyPtr[X];
@@ -593,11 +589,10 @@ void get_GridValues(int startX,
                                            Y,
                                            splineX,
                                            splineY,
-                                           voxel2realMatrix,
+                                           voxelToReal,
                                            splineControlPoint->dim,
                                            displacement);
             }
-            coord++;
         }
     }
 }
@@ -626,10 +621,7 @@ void get_GridValues(int startX,
     DataType *xPtr = nullptr, *yPtr = nullptr, *zPtr = nullptr;
     DataType *xxPtr = nullptr, *yyPtr = nullptr, *zzPtr = nullptr;
 
-    mat44 *voxel2realMatrix = nullptr;
-    if (splineControlPoint->sform_code > 0)
-        voxel2realMatrix = &splineControlPoint->sto_xyz;
-    else voxel2realMatrix = &splineControlPoint->qto_xyz;
+    const mat44 *voxelToReal = splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_xyz : &splineControlPoint->qto_xyz;
 
     for (int Z = startZ; Z < startZ + range; Z++) {
         bool out = false;
@@ -646,7 +638,7 @@ void get_GridValues(int startX,
                 yyPtr = &yPtr[index];
                 zzPtr = &zPtr[index];
             } else out = true;
-            for (int X = startX; X < startX + range; X++) {
+            for (int X = startX; X < startX + range; X++, coord++) {
                 if (X > -1 && X < splineControlPoint->nx && out == false) {
                     dispX[coord] = xxPtr[X];
                     dispY[coord] = yyPtr[X];
@@ -661,11 +653,10 @@ void get_GridValues(int startX,
                                                splineX,
                                                splineY,
                                                splineZ,
-                                               voxel2realMatrix,
+                                               voxelToReal,
                                                splineControlPoint->dim,
                                                displacement);
                 }
-                coord++;
             } // X
         } // Y
     } // Z
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 3c8d912e..aecab542 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -196,8 +196,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   nifti_image *measureGradientImage,
                                   const nifti_image *jacobianDetImage,
                                   const int *mask,
-                                  const int& currentTimepoint,
-                                  const double& timepointWeight,
+                                  const int currentTimepoint,
+                                  const double timepointWeight,
                                   const nifti_image *localWeightSim) {
     // Create pointers to the reference and warped images
 #ifdef _WIN32
@@ -275,8 +275,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
         }
     }
 }
-template void reg_getVoxelBasedSsdGradient<float>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
-template void reg_getVoxelBasedSsdGradient<double>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*);
+template void reg_getVoxelBasedSsdGradient<float>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int, const double, const nifti_image*);
+template void reg_getVoxelBasedSsdGradient<double>(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int, const double, const nifti_image*);
 /* *************************************************************** */
 void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             const nifti_image *warpedImage,
@@ -284,8 +284,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             nifti_image *voxelBasedGradient,
                                             const nifti_image *jacobianDetImage,
                                             const int *mask,
-                                            const int& currentTimepoint,
-                                            const double& timepointWeight,
+                                            const int currentTimepoint,
+                                            const double timepointWeight,
                                             const nifti_image *localWeightSim) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index 008178a4..f840e1c6 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -101,7 +101,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   nifti_image *measureGradientImage,
                                   const nifti_image *jacobianDetImage,
                                   const int *mask,
-                                  const int& currentTimepoint,
-                                  const double& timepointWeight,
+                                  const int currentTimepoint,
+                                  const double timepointWeight,
                                   const nifti_image *localWeightSim);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 6a7d53a2..a5877a43 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -7,7 +7,7 @@
 #include "_reg_optimiser_gpu.h"
 
 /* *************************************************************** */
-void CudaCompute::ResampleImage(int inter, float paddingValue) {
+void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
     reg_resampleImage_gpu(con.Content::GetFloating(),
                           con.GetWarpedCuda(),
@@ -15,6 +15,7 @@ void CudaCompute::ResampleImage(int inter, float paddingValue) {
                           con.GetDeformationFieldCuda(),
                           con.GetReferenceMaskCuda(),
                           con.GetActiveVoxelNumber(),
+                          interpolation,
                           paddingValue);
 }
 /* *************************************************************** */
@@ -117,13 +118,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
-    // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint
+    // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
     reg_getImageGradient_gpu(con.DefContent::GetFloating(),
                              con.GetFloatingCuda(),
                              con.GetDeformationFieldCuda(),
                              con.GetWarpedGradientCuda(),
                              con.GetActiveVoxelNumber(),
+                             interpolation,
                              paddingValue);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index ed0514e1..4a8bef91 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -7,7 +7,7 @@ class CudaCompute: public Compute {
 public:
     CudaCompute(Content& con): Compute(con) {}
 
-    virtual void ResampleImage(int inter, float paddingValue) override;
+    virtual void ResampleImage(int interpolation, float paddingValue) override;
     virtual double GetJacobianPenaltyTerm(bool approx) override;
     virtual void JacobianPenaltyTermGradient(float weight, bool approx) override;
     virtual double CorrectFolding(bool approx) override;
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 1d485af8..abfc980c 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -46,6 +46,7 @@ void CudaContent::DeallocateImages() {
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
     Cuda::Allocate(&deformationFieldCuda, deformationField->dim);
+    UpdateDeformationField();
 }
 /* *************************************************************** */
 void CudaContent::DeallocateDeformationField() {
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index 1ea4efa8..6c73f9cd 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -25,7 +25,7 @@ CudaF3dContent::~CudaF3dContent() {
 /* *************************************************************** */
 void CudaF3dContent::AllocateControlPointGrid() {
     Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim);
-    Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid);
+    UpdateControlPointGrid();
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateControlPointGrid() {
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index f221a67d..b7c03485 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -37,10 +37,10 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                                  activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
 
     // Get the reference matrix if composition is required
-    thrust::device_vector<mat44> referenceMatrix;
+    thrust::device_vector<mat44> realToVoxel;
     if (composition) {
-        const mat44 *refMatPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk;
-        referenceMatrix = thrust::device_vector<mat44>(refMatPtr, refMatPtr + 1);
+        const mat44 *matPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk;
+        realToVoxel = thrust::device_vector<mat44>(matPtr, matPtr + 1);
     }
 
     if (referenceImage->nz > 1) {
@@ -52,7 +52,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
         reg_spline_getDeformationField3D<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
                                                                                               *controlPointTexture,
                                                                                               *maskTexture,
-                                                                                              referenceMatrix.data().get(),
+                                                                                              realToVoxel.data().get(),
                                                                                               referenceImageDim,
                                                                                               controlPointImageDim,
                                                                                               controlPointVoxelSpacing,
@@ -69,7 +69,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
         reg_spline_getDeformationField2D<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
                                                                                               *controlPointTexture,
                                                                                               *maskTexture,
-                                                                                              referenceMatrix.data().get(),
+                                                                                              realToVoxel.data().get(),
                                                                                               referenceImageDim,
                                                                                               controlPointImageDim,
                                                                                               controlPointVoxelSpacing,
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 05644a08..86395269 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -310,7 +310,7 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
 __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
                                                  cudaTextureObject_t controlPointTexture,
                                                  cudaTextureObject_t maskTexture,
-                                                 const mat44 *referenceMatrix,
+                                                 const mat44 *realToVoxel,
                                                  const int3 referenceImageDim,
                                                  const int3 controlPointImageDim,
                                                  const float3 controlPointVoxelSpacing,
@@ -329,18 +329,18 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
         const float4 node = deformationField[tid];
 
         // From real to pixel position in the CPP
-        const float xVoxel = (referenceMatrix->m[0][0] * node.x +
-                              referenceMatrix->m[0][1] * node.y +
-                              referenceMatrix->m[0][2] * node.z +
-                              referenceMatrix->m[0][3]);
-        const float yVoxel = (referenceMatrix->m[1][0] * node.x +
-                              referenceMatrix->m[1][1] * node.y +
-                              referenceMatrix->m[1][2] * node.z +
-                              referenceMatrix->m[1][3]);
-        const float zVoxel = (referenceMatrix->m[2][0] * node.x +
-                              referenceMatrix->m[2][1] * node.y +
-                              referenceMatrix->m[2][2] * node.z +
-                              referenceMatrix->m[2][3]);
+        const float xVoxel = (realToVoxel->m[0][0] * node.x +
+                              realToVoxel->m[0][1] * node.y +
+                              realToVoxel->m[0][2] * node.z +
+                              realToVoxel->m[0][3]);
+        const float yVoxel = (realToVoxel->m[1][0] * node.x +
+                              realToVoxel->m[1][1] * node.y +
+                              realToVoxel->m[1][2] * node.z +
+                              realToVoxel->m[1][3]);
+        const float zVoxel = (realToVoxel->m[2][0] * node.x +
+                              realToVoxel->m[2][1] * node.y +
+                              realToVoxel->m[2][2] * node.z +
+                              realToVoxel->m[2][3]);
 
         if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
             yVoxel < 0 || yVoxel >= referenceImageDim.y ||
@@ -417,7 +417,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
 __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
                                                  cudaTextureObject_t controlPointTexture,
                                                  cudaTextureObject_t maskTexture,
-                                                 const mat44 *referenceMatrix,
+                                                 const mat44 *realToVoxel,
                                                  const int3 referenceImageDim,
                                                  const int3 controlPointImageDim,
                                                  const float3 controlPointVoxelSpacing,
@@ -436,12 +436,12 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         const float4 node = deformationField[tid];
 
         // From real to pixel position in the CPP
-        const float xVoxel = (referenceMatrix->m[0][0] * node.x +
-                              referenceMatrix->m[0][1] * node.y +
-                              referenceMatrix->m[0][3]);
-        const float yVoxel = (referenceMatrix->m[1][0] * node.x +
-                              referenceMatrix->m[1][1] * node.y +
-                              referenceMatrix->m[1][3]);
+        const float xVoxel = (realToVoxel->m[0][0] * node.x +
+                              realToVoxel->m[0][1] * node.y +
+                              realToVoxel->m[0][3]);
+        const float yVoxel = (realToVoxel->m[1][0] * node.x +
+                              realToVoxel->m[1][1] * node.y +
+                              realToVoxel->m[1][3]);
 
         if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
             yVoxel < 0 || yVoxel >= referenceImageDim.y) return;
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index d6d3d7b8..873102df 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -61,7 +61,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  double **jointHistogramPro,
                                  double **entropyValues,
                                  const int *referenceMask,
-                                 const int& referenceTimePoint) {
+                                 const int referenceTimePoint,
+                                 const bool approximation) {
+    // TODO: Implement the NMI computation for CUDA
     // The NMI computation is performed on the host for now
     Cuda::TransferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
     reg_getNMIValue<float>(referenceImage,
@@ -73,7 +75,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                            jointHistogramLog,
                            jointHistogramPro,
                            entropyValues,
-                           referenceMask);
+                           referenceMask,
+                           approximation);
 
     double nmi = 0;
     for (int t = 0; t < referenceTimePoint; ++t) {
@@ -95,7 +98,8 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() {
                                        this->jointHistogramPro,
                                        this->entropyValues,
                                        this->referenceMask,
-                                       this->referenceTimePoint);
+                                       this->referenceTimePoint,
+                                       this->approximatePW);
 }
 /* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
@@ -110,7 +114,8 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
                                        this->jointHistogramProBw,
                                        this->entropyValuesBw,
                                        this->floatingMask,
-                                       this->referenceTimePoint);
+                                       this->referenceTimePoint,
+                                       this->approximatePW);
 }
 /* *************************************************************** */
 /// Called when we only have one target and one source image
@@ -121,10 +126,10 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
                                       const float *logJointHistogramCuda,
                                       float4 *voxelBasedGradientCuda,
                                       const int *maskCuda,
-                                      const size_t& activeVoxelNumber,
+                                      const size_t activeVoxelNumber,
                                       const double *entropies,
-                                      const int& refBinning,
-                                      const int& floBinning) {
+                                      const int refBinning,
+                                      const int floBinning) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index a03688af..6eb684ff 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -19,8 +19,12 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            const cudaArray *floatingImageCuda,
                            const float4 *deformationFieldCuda,
                            const int *maskCuda,
-                           const size_t& activeVoxelNumber,
-                           const float& paddingValue) {
+                           const size_t activeVoxelNumber,
+                           const int interpolation,
+                           const float paddingValue) {
+    if (interpolation != 1)
+        NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
+
     auto blockSize = CudaContext::GetBlockSize();
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
@@ -59,10 +63,15 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
                               const cudaArray *floatingImageCuda,
                               const float4 *deformationFieldCuda,
                               float4 *warpedGradientCuda,
-                              const size_t& activeVoxelNumber,
-                              const float& paddingValue) {
+                              const size_t activeVoxelNumber,
+                              const int interpolation,
+                              float paddingValue) {
+    if (interpolation != 1)
+        NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
+
     auto blockSize = CudaContext::GetBlockSize();
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
+    if (paddingValue != paddingValue) paddingValue = 0;
 
     // Create the texture object for the floating image
     auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray);
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 0fe28ea4..6afd287a 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -20,13 +20,15 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            const cudaArray *floatingImageCuda,
                            const float4 *deformationFieldCuda,
                            const int *maskCuda,
-                           const size_t& activeVoxelNumber,
-                           const float& paddingValue);
+                           const size_t activeVoxelNumber,
+                           const int interpolation,
+                           const float paddingValue);
 /* *************************************************************** */
 void reg_getImageGradient_gpu(const nifti_image *floatingImage,
                               const cudaArray *floatingImageCuda,
                               const float4 *deformationFieldCuda,
                               float4 *warpedGradientCuda,
-                              const size_t& activeVoxelNumber,
-                              const float& paddingValue);
+                              const size_t activeVoxelNumber,
+                              const int interpolation,
+                              float paddingValue);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 8a853da9..8180ca1d 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -144,7 +144,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
         float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
         // Get the voxel-based deformation in the floating space
-        float3 voxelDeformation;
+        float2 voxelDeformation;
         voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
                               floatingMatrix.m[0][1] * realDeformation.y +
                               floatingMatrix.m[0][3]);
@@ -158,7 +158,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
         const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
         InterpLinearKernel(relative.x, xBasis);
         InterpLinearKernel(relative.y, yBasis);
-        const float deriv[] = { -1.0f, 1.0f };
+        constexpr float deriv[] = { -1.0f, 1.0f };
 
         float4 gradientValue{};
         for (short b = 0; b < 2; b++) {
@@ -178,6 +178,11 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
             gradientValue.y += tempValueX.y * deriv[b];
         }
 
+        if (gradientValue.x != gradientValue.x)
+            gradientValue.x = 0;
+        if (gradientValue.y != gradientValue.y)
+            gradientValue.y = 0;
+
         gradientArray[tid] = gradientValue;
     }
 }
@@ -216,7 +221,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
         InterpLinearKernel(relative.x, xBasis);
         InterpLinearKernel(relative.y, yBasis);
         InterpLinearKernel(relative.z, zBasis);
-        const float deriv[] = { -1.0f, 1.0f };
+        constexpr float deriv[] = { -1.0f, 1.0f };
 
         float4 gradientValue{};
         for (short c = 0; c < 2; c++) {
@@ -244,6 +249,13 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
             gradientValue.z += tempValueY.z * deriv[c];
         }
 
+        if (gradientValue.x != gradientValue.x)
+            gradientValue.x = 0;
+        if (gradientValue.y != gradientValue.y)
+            gradientValue.y = 0;
+        if (gradientValue.z != gradientValue.z)
+            gradientValue.z = 0;
+
         gradientArray[tid] = gradientValue;
     }
 }
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index c05cc586..69dd285b 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -8,9 +8,10 @@
 #include <catch2/catch_test_macros.hpp>
 #include "_reg_lncc.h"
 #include "_reg_localTrans.h"
+#include "_reg_nmi.h"
+#include "AffineDeformationFieldKernel.h"
 #include "Platform.h"
 #include "ResampleImageKernel.h"
-#include "AffineDeformationFieldKernel.h"
 
 
 template<typename T>
@@ -36,7 +37,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) {
 
 NiftiImage CreateControlPointGrid(const NiftiImage& reference) {
     // Set the spacing for the control point grid to 2 voxel along each axis
-    float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 };
+    const float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 };
 
     // Create and allocate the control point image
     // It is initialised with an identity transformation by default
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 1ad4bd2c..1b61ac39 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -1,4 +1,4 @@
-// OpenCL and CUDA are not supported for this test yet
+// OpenCL is not supported for this test yet
 #undef _USE_OPENCL
 
 #include "reg_test_common.h"
@@ -158,10 +158,10 @@ TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") {
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
-            if (fabs(result - expected) > EPS) {
+            const auto diff = abs(result - expected);
+            if (diff > EPS)
                 NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
-            }
-            REQUIRE(fabs(result - expected) < EPS);
+            REQUIRE(diff < EPS);
         }
     }
 }
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 0e85de0c..0f5e19cf 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -3,9 +3,6 @@
 #undef _USE_CUDA
 
 #include "reg_test_common.h"
-#include "_reg_tools.h"
-#include "_reg_ReadWriteImage.h"
-#include "_reg_nmi.h"
 
 /*
     This test file contains the following unit tests:
@@ -23,17 +20,18 @@ class NMIGradientTest {
         std::mt19937 gen(0);
         // Images will be rescaled between 2 and bin-3
         // Default bin value is 68 (64+4 for Parzen windowing)
-        const unsigned binNumber = 8;
-        const float padding = 2; //std::numeric_limits<float>::quiet_NaN();
+        constexpr unsigned binNumber = 8;
+        constexpr float padding = 2; //std::numeric_limits<float>::quiet_NaN();
         std::uniform_real_distribution<float> distr(2, binNumber - 3);
 
         // Create reference and floating 2D images
-        vector<NiftiImage::dim_t> dim{ 4, 4 };
+        constexpr NiftiImage::dim_t dimSize = 4;
+        vector<NiftiImage::dim_t> dim{ dimSize, dimSize };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create reference and floating 3D images
-        dim.push_back(4);
+        dim.push_back(dimSize);
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
 
@@ -74,7 +72,7 @@ class NMIGradientTest {
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
                 // Create the platform
-                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
                 auto [testName, reference, floating] = data;
                 // Create the content creator
@@ -122,7 +120,7 @@ class NMIGradientTest {
                     gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta);
                     defPtr[index] = current_value;
                 }
-                testCases.push_back({ testName + " " + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) });
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) });
             }
         }
     }
@@ -157,7 +155,7 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") {
             const double norm = std::max(fabs(reg_tools_getMinValue(expected, 0)),
                                          fabs(reg_tools_getMaxValue(expected, 0)));
             for (size_t i = 0; i < expected.nVoxels(); ++i) {
-                const double ratio = fabs(resPtr[i] - expPtr[i]) / norm;
+                const double ratio = abs(resPtr[i] - expPtr[i]) / norm;
                 if (ratio > .1) {
                     NR_COUT << "[i]=" << i;
                     NR_COUT << " | ratio=" << ratio;
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 29d95559..49020304 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -68,7 +68,7 @@ class MeasureTest {
         for (auto&& measure : testMeasures) {
             for (int sym = 0; sym < 2; ++sym) {
                 testData.emplace_back(TestData(
-                    measureNames[(int)measure] + " 2D"s + (sym ? " Symmetric" : ""),
+                    measureNames[int(measure)] + " 2D"s + (sym ? " Symmetric" : ""),
                     reference2d,
                     floating2d,
                     controlPointGrid2d,
@@ -77,7 +77,7 @@ class MeasureTest {
                     sym
                 ));
                 testData.emplace_back(TestData(
-                    measureNames[(int)measure] + " 3D"s + (sym ? " Symmetric" : ""),
+                    measureNames[int(measure)] + " 3D"s + (sym ? " Symmetric" : ""),
                     reference3d,
                     floating3d,
                     controlPointGrid3d,
@@ -196,7 +196,7 @@ class MeasureTest {
             const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue();
 
             // Compute the similarity measure gradient for CPU
-            int timepoint = 0;
+            constexpr int timepoint = 0;
             contentCpu->ZeroVoxelBasedMeasureGradient();
             computeCpu->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             if (isSymmetric) {

From 4c1bc6a6c1e8c82ef0c278b60d14bb0346e11928 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 19 Oct 2023 13:56:47 +0100
Subject: [PATCH 226/314] Enable CUDA for NMI gradient unit test

---
 niftyreg_build_version.txt         |  2 +-
 reg-test/reg_test_nmi_gradient.cpp | 35 ++++++++++++++++--------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4772052f..51b40081 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-344
+345
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 0f5e19cf..f19ac9bd 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -1,6 +1,5 @@
-// OpenCL and CUDA are not supported for this test yet
+// OpenCL is not supported for this test yet
 #undef _USE_OPENCL
-#undef _USE_CUDA
 
 #include "reg_test_common.h"
 
@@ -82,9 +81,11 @@ class NMIGradientTest {
                 // Create the content
                 unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Add some displacements to the deformation field to avoid grid effect
-                float *defPtr = static_cast<float*>(content->GetDeformationField()->data);
-                for (size_t index = 0; index < content->GetDeformationField()->nvox; ++index)
+                nifti_image *defField = content->Content::GetDeformationField();
+                float *defPtr = static_cast<float*>(defField->data);
+                for (size_t index = 0; index < defField->nvox; ++index)
                     defPtr[index] += 0.1f;
+                content->UpdateDeformationField();
                 // Compute the warped image given the current transformation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(1, padding);
@@ -104,21 +105,23 @@ class NMIGradientTest {
                 // Create an image to store the expected gradient values
                 NiftiImage expectedGradientImage(content->GetDeformationField(), NiftiImage::Copy::Image);
                 // Apply perturbations to each value in the deformation field
-                float *gradPtr = static_cast<float *>(expectedGradientImage->data);
-                const float delta = 0.00001;
-                for (unsigned index = 0; index < expectedGradientImage.nVoxels(); ++index) {
-                    float current_value = defPtr[index];
-                    // compute the NMI when removing delta(s)
-                    defPtr[index] = current_value - delta;
+                float *gradPtr = static_cast<float*>(expectedGradientImage->data);
+                constexpr float delta = 0.00001f;
+                for (auto index = 0; index < expectedGradientImage.nVoxels(); ++index) {
+                    const float orgDefValue = defPtr[index];
+                    // Compute the NMI when removing delta(s)
+                    defPtr[index] = orgDefValue - delta;
+                    content->UpdateDeformationField();
                     compute->ResampleImage(1, padding);
-                    const double nmi_pre = measure_nmi->GetSimilarityMeasureValue();
-                    // compute the NMI when adding delta(s)
-                    defPtr[index] = current_value + delta;
+                    const double nmiPre = measure_nmi->GetSimilarityMeasureValue();
+                    // Compute the NMI when adding delta(s)
+                    defPtr[index] = orgDefValue + delta;
+                    content->UpdateDeformationField();
                     compute->ResampleImage(1, padding);
-                    const double nmi_post = measure_nmi->GetSimilarityMeasureValue();
+                    const double nmiPost = measure_nmi->GetSimilarityMeasureValue();
                     // Compute the difference
-                    gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta);
-                    defPtr[index] = current_value;
+                    gradPtr[index] = float(-(nmiPost - nmiPre) / (2.0 * delta));
+                    defPtr[index] = orgDefValue;
                 }
                 testCases.push_back({ testName + " "s + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) });
             }

From a39e68545347a1a80698823d03a3d32391b1c1ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 19 Oct 2023 14:11:39 +0100
Subject: [PATCH 227/314] Add content creator for f3d2 #92

This fixes incorrect NMI calculation for CUDA by sharing reference and floating CUDA arrays between contents
---
 niftyreg_build_version.txt               |  2 +-
 reg-lib/ContentCreatorFactory.h          | 10 ++++-
 reg-lib/F3d2ContentCreator.h             | 22 ++++++++++
 reg-lib/_reg_f3d2.cpp                    | 16 +++++---
 reg-lib/_reg_f3d2.h                      |  2 +-
 reg-lib/cuda/CudaCommon.hpp              |  8 ++++
 reg-lib/cuda/CudaContent.cpp             | 28 +++++--------
 reg-lib/cuda/CudaContent.h               | 11 ++++-
 reg-lib/cuda/CudaContentCreatorFactory.h | 52 +++++++++++++-----------
 reg-lib/cuda/CudaF3d2ContentCreator.h    | 24 +++++++++++
 reg-test/reg_test_regr_measure.cpp       | 51 ++++++-----------------
 11 files changed, 136 insertions(+), 90 deletions(-)
 create mode 100644 reg-lib/F3d2ContentCreator.h
 create mode 100644 reg-lib/cuda/CudaF3d2ContentCreator.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 51b40081..99ca0d5f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-345
+346
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
index 450b38b0..ca1001f9 100644
--- a/reg-lib/ContentCreatorFactory.h
+++ b/reg-lib/ContentCreatorFactory.h
@@ -4,21 +4,27 @@
 #include "AladinContentCreator.h"
 #include "DefContentCreator.h"
 #include "F3dContentCreator.h"
+#include "F3d2ContentCreator.h"
 
-enum class ContentType { Base, Aladin, Def, F3d };
+enum class ContentType { Base, Aladin, Def, F3d, F3d2 };
 
 class ContentCreatorFactory {
 public:
     virtual ContentCreator* Produce(const ContentType& conType) {
         switch (conType) {
+        case ContentType::Base:
+            return new ContentCreator();
         case ContentType::Aladin:
             return new AladinContentCreator();
         case ContentType::Def:
             return new DefContentCreator();
         case ContentType::F3d:
             return new F3dContentCreator();
+        case ContentType::F3d2:
+            return new F3d2ContentCreator();
         default:
-            return new ContentCreator();
+            NR_FATAL_ERROR("Unsupported content type");
+            return nullptr;
         }
     }
 };
diff --git a/reg-lib/F3d2ContentCreator.h b/reg-lib/F3d2ContentCreator.h
new file mode 100644
index 00000000..106b5ede
--- /dev/null
+++ b/reg-lib/F3d2ContentCreator.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "ContentCreator.h"
+#include "F3dContent.h"
+
+class F3d2ContentCreator: public ContentCreator {
+public:
+    virtual std::pair<F3dContent*, F3dContent*> Create(nifti_image *reference,
+                                                       nifti_image *floating,
+                                                       nifti_image *controlPointGrid,
+                                                       nifti_image *controlPointGridBw,
+                                                       nifti_image *localWeightSim = nullptr,
+                                                       int *referenceMask = nullptr,
+                                                       int *floatingMask = nullptr,
+                                                       mat44 *transformationMatrix = nullptr,
+                                                       mat44 *transformationMatrixBw = nullptr,
+                                                       size_t bytes = sizeof(float)) {
+        auto con = new F3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes);
+        auto conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, floatingMask, transformationMatrixBw, bytes);
+        return { con, conBw };
+    }
+};
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 79317999..9df66103 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -38,10 +38,15 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
-    unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
-    conBw.reset(contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw.get(), sizeof(T)));
-    computeBw.reset(this->platform->CreateCompute(*conBw));
+void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *referenceMask, int *floatingMask) {
+    unique_ptr<F3d2ContentCreator> contentCreator{ dynamic_cast<F3d2ContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d2)) };
+    auto&& [con, conBw] = contentCreator->Create(reference, floating, this->controlPointGrid, controlPointGridBw,
+                                                 this->localWeightSimInput, referenceMask, floatingMask,
+                                                 this->affineTransformation.get(), affineTransformationBw.get(), sizeof(T));
+    this->con.reset(con);
+    this->conBw.reset(conBw);
+    this->compute.reset(this->platform->CreateCompute(*con));
+    this->computeBw.reset(this->platform->CreateCompute(*conBw));
 }
 /* *************************************************************** */
 template <class T>
@@ -90,8 +95,7 @@ T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
         }
     }
 
-    reg_f3d<T>::InitContent(reference, floating, referenceMask);
-    InitContent(reference, floating, floatingMask);
+    InitContent(reference, floating, referenceMask, floatingMask);
 
     NR_FUNC_CALLED();
     return maxStepSize;
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index e8d6fdec..a231ec46 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -55,7 +55,7 @@ class reg_f3d2: public reg_f3d<T> {
     virtual void PrintCurrentObjFunctionValue(T) override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual double GetObjectiveFunctionValue() override;
-    void InitContent(nifti_image*, nifti_image*, int*);
+    void InitContent(nifti_image*, nifti_image*, int*, int*);
     virtual T InitCurrentLevel(int) override;
     virtual void DeinitCurrentLevel(int) override;
     virtual void UpdateParameters(float) override;
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index 9c0ee6d8..ad6ff06d 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -118,6 +118,14 @@ void Free(cudaArray*);
 template <class DataType>
 void Free(DataType*);
 /* *************************************************************** */
+namespace Internal {
+template <class T>
+struct UniquePtrDeleter { void operator()(T *ptr) const { Free(ptr); } };
+}
+/* *************************************************************** */
+template <class T>
+using UniquePtr = unique_ptr<T, Internal::UniquePtrDeleter<T>>;
+/* *************************************************************** */
 using UniqueTextureObjectPtr = unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
 /* *************************************************************** */
 UniqueTextureObjectPtr CreateTextureObject(const void *devPtr,
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index abfc980c..37df05ab 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -7,7 +7,8 @@ CudaContent::CudaContent(nifti_image *referenceIn,
                          mat44 *transformationMatrixIn,
                          size_t bytesIn):
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) {
-    AllocateImages();
+    AllocateReference();
+    AllocateFloating();
     AllocateWarped();
     AllocateDeformationField();
     SetReferenceMask(referenceMask);
@@ -15,33 +16,26 @@ CudaContent::CudaContent(nifti_image *referenceIn,
 }
 /* *************************************************************** */
 CudaContent::~CudaContent() {
-    DeallocateImages();
     DeallocateWarped();
     DeallocateDeformationField();
     SetReferenceMask(nullptr);
     SetTransformationMatrix(nullptr);
 }
 /* *************************************************************** */
-void CudaContent::AllocateImages() {
+void CudaContent::AllocateReference() {
     if (reference->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(reference);
-    if (floating->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(floating);
     Cuda::Allocate<float>(&referenceCuda, reference->dim);
+    referenceCudaManaged.reset(referenceCuda);
     Cuda::TransferNiftiToDevice<float>(referenceCuda, reference);
-    Cuda::Allocate<float>(&floatingCuda, floating->dim);
-    Cuda::TransferNiftiToDevice<float>(floatingCuda, floating);
 }
 /* *************************************************************** */
-void CudaContent::DeallocateImages() {
-    if (referenceCuda) {
-        Cuda::Free(referenceCuda);
-        referenceCuda = nullptr;
-    }
-    if (floatingCuda) {
-        Cuda::Free(floatingCuda);
-        floatingCuda = nullptr;
-    }
+void CudaContent::AllocateFloating() {
+    if (floating->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(floating);
+    Cuda::Allocate<float>(&floatingCuda, floating->dim);
+    floatingCudaManaged.reset(floatingCuda);
+    Cuda::TransferNiftiToDevice<float>(floatingCuda, floating);
 }
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
@@ -99,7 +93,7 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
 
     if (!referenceMask) return;
 
-    int *targetMask;
+    decltype(referenceMask) targetMask;
     NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask)));
     int *targetMaskPtr = targetMask;
     activeVoxelNumber = 0;
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index d914bbc2..f308ec1b 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -31,15 +31,17 @@ class CudaContent: public virtual Content {
 
 protected:
     cudaArray *referenceCuda = nullptr;
+    Cuda::UniquePtr<cudaArray> referenceCudaManaged;
     cudaArray *floatingCuda = nullptr;
+    Cuda::UniquePtr<cudaArray> floatingCudaManaged;
     float4 *deformationFieldCuda = nullptr;
     int *referenceMaskCuda = nullptr;
     float *transformationMatrixCuda = nullptr;
     float *warpedCuda = nullptr;
 
 private:
-    void AllocateImages();
-    void DeallocateImages();
+    void AllocateReference();
+    void AllocateFloating();
     void AllocateDeformationField();
     void DeallocateDeformationField();
     void AllocateWarped();
@@ -47,6 +49,11 @@ class CudaContent: public virtual Content {
     template<class DataType> DataType CastImageData(float intensity, int datatype);
     template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
     void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+    void SetReferenceCuda(cudaArray *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; }
+    void SetFloatingCuda(cudaArray *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; }
+
+    // Friend classes
+    friend class CudaF3d2ContentCreator;
 
 #ifdef NR_TESTING
 public:
diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h
index 5d89e839..a42360a3 100644
--- a/reg-lib/cuda/CudaContentCreatorFactory.h
+++ b/reg-lib/cuda/CudaContentCreatorFactory.h
@@ -1,23 +1,29 @@
-#pragma once
-
-#include "ContentCreatorFactory.h"
-#include "CudaContentCreator.h"
-#include "CudaAladinContentCreator.h"
-#include "CudaDefContentCreator.h"
-#include "CudaF3dContentCreator.h"
-
-class CudaContentCreatorFactory: public ContentCreatorFactory {
-public:
-    virtual ContentCreator* Produce(const ContentType& conType) override {
-        switch (conType) {
-        case ContentType::Aladin:
-            return new CudaAladinContentCreator();
-        case ContentType::Def:
-            return new CudaDefContentCreator();
-        case ContentType::F3d:
-            return new CudaF3dContentCreator();
-        default:
-            return new CudaContentCreator();
-        }
-    }
-};
+#pragma once
+
+#include "ContentCreatorFactory.h"
+#include "CudaContentCreator.h"
+#include "CudaAladinContentCreator.h"
+#include "CudaDefContentCreator.h"
+#include "CudaF3dContentCreator.h"
+#include "CudaF3d2ContentCreator.h"
+
+class CudaContentCreatorFactory: public ContentCreatorFactory {
+public:
+    virtual ContentCreator* Produce(const ContentType& conType) override {
+        switch (conType) {
+        case ContentType::Base:
+            return new CudaContentCreator();
+        case ContentType::Aladin:
+            return new CudaAladinContentCreator();
+        case ContentType::Def:
+            return new CudaDefContentCreator();
+        case ContentType::F3d:
+            return new CudaF3dContentCreator();
+        case ContentType::F3d2:
+            return new CudaF3d2ContentCreator();
+        default:
+            NR_FATAL_ERROR("Unsupported content type");
+            return nullptr;
+        }
+    }
+};
diff --git a/reg-lib/cuda/CudaF3d2ContentCreator.h b/reg-lib/cuda/CudaF3d2ContentCreator.h
new file mode 100644
index 00000000..347e07cc
--- /dev/null
+++ b/reg-lib/cuda/CudaF3d2ContentCreator.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "F3d2ContentCreator.h"
+#include "CudaF3dContent.h"
+
+class CudaF3d2ContentCreator: public F3d2ContentCreator {
+public:
+    virtual std::pair<F3dContent*, F3dContent*> Create(nifti_image *reference,
+                                                       nifti_image *floating,
+                                                       nifti_image *controlPointGrid,
+                                                       nifti_image *controlPointGridBw,
+                                                       nifti_image *localWeightSim = nullptr,
+                                                       int *referenceMask = nullptr,
+                                                       int *floatingMask = nullptr,
+                                                       mat44 *transformationMatrix = nullptr,
+                                                       mat44 *transformationMatrixBw = nullptr,
+                                                       size_t bytes = sizeof(float)) override {
+        auto con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes);
+        auto conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, floatingMask, transformationMatrixBw, bytes);
+        conBw->SetReferenceCuda(con->GetFloatingCuda());
+        conBw->SetFloatingCuda(con->GetReferenceCuda());
+        return { con, conBw };
+    }
+};
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 49020304..89b5627e 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -96,6 +96,10 @@ class MeasureTest {
         unique_ptr<Measure> measureCreatorCpu{ new Measure() };
         unique_ptr<Measure> measureCreatorCuda{ new CudaMeasure() };
 
+        // Create the content creators
+        unique_ptr<F3d2ContentCreator> contentCreatorCpu{ dynamic_cast<F3d2ContentCreator*>(platformCpu.CreateContentCreator(ContentType::F3d2)) };
+        unique_ptr<F3d2ContentCreator> contentCreatorCuda{ dynamic_cast<F3d2ContentCreator*>(platformCuda.CreateContentCreator(ContentType::F3d2)) };
+
         for (auto&& testData : testData) {
             // Get the test data
             auto&& [testName, reference, floating, controlPointGrid, localWeightSim, measureType, isSymmetric] = testData;
@@ -108,45 +112,16 @@ class MeasureTest {
             NiftiImage localWeightSimCpu(localWeightSim), localWeightSimCuda(localWeightSim);
 
             // Create the contents
-            unique_ptr<F3dContent> contentCpu{ new F3dContent(
-                referenceCpu,
-                floatingCpu,
-                controlPointGridCpu,
-                localWeightSimCpu,
-                nullptr,
-                nullptr,
-                sizeof(float)
-            ) };
-            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(
-                referenceCuda,
-                floatingCuda,
-                controlPointGridCuda,
-                localWeightSimCuda,
-                nullptr,
-                nullptr,
-                sizeof(float)
-            ) };
-            unique_ptr<F3dContent> contentCpuBw, contentCudaBw;
-            if (isSymmetric) {
-                contentCpuBw.reset(new F3dContent(
-                    floatingCpu,
-                    referenceCpu,
-                    controlPointGridCpuBw,
-                    nullptr,
-                    nullptr,
-                    nullptr,
-                    sizeof(float)
-                ));
-                contentCudaBw.reset(new CudaF3dContent(
-                    floatingCuda,
-                    referenceCuda,
-                    controlPointGridCudaBw,
-                    nullptr,
-                    nullptr,
-                    nullptr,
-                    sizeof(float)
-                ));
+            auto contentsCpu = contentCreatorCpu->Create(referenceCpu, floatingCpu, controlPointGridCpu, controlPointGridCpuBw, localWeightSimCpu, nullptr, nullptr, nullptr, nullptr, sizeof(float));
+            auto contentsCuda = contentCreatorCuda->Create(referenceCuda, floatingCuda, controlPointGridCuda, controlPointGridCudaBw, localWeightSimCuda, nullptr, nullptr, nullptr, nullptr, sizeof(float));
+            if (!isSymmetric) {
+                delete contentsCpu.second;
+                delete contentsCuda.second;
+                contentsCpu.second = nullptr;
+                contentsCuda.second = nullptr;
             }
+            unique_ptr<F3dContent> contentCpu{ contentsCpu.first }, contentCpuBw{ contentsCpu.second };
+            unique_ptr<F3dContent> contentCuda{ contentsCuda.first }, contentCudaBw{ contentsCuda.second };
 
             // Create the computes
             unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };

From a4e191827928820a533922a0ca853e2a415940b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 23 Oct 2023 16:40:35 +0100
Subject: [PATCH 228/314] Refactorisations

---
 CMakeLists.txt                                |   8 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/CMakeLists.txt                        |   5 +-
 reg-lib/cpu/_reg_localTrans.cpp               | 118 +++++++++---------
 reg-lib/cpu/_reg_nmi.cpp                      |   2 +
 reg-lib/cuda/CMakeLists.txt                   |  17 +--
 .../cuda/_reg_localTransformation_kernels.cu  |  28 ++---
 reg-lib/cuda/_reg_resampling_kernels.cu       |   3 +-
 8 files changed, 85 insertions(+), 98 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 87ee07e6..14b7b278 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,12 +1,12 @@
-project(NiftyReg)
-#-----------------------------------------------------------------------------
 cmake_minimum_required(VERSION 3.2.2)
 if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
- mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
+  mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
 else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
- mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
+  mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
 endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
 #-----------------------------------------------------------------------------
+project(NiftyReg)
+#-----------------------------------------------------------------------------
 # Set C++ standard version
 set(CMAKE_CXX_STANDARD 17)
 #-----------------------------------------------------------------------------
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 99ca0d5f..538ad4bc 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-346
+347
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 3b0c528e..658fe990 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -1,10 +1,7 @@
 #-----------------------------------------------------------------------------
 if(USE_CUDA)
   add_subdirectory(cuda)
-  set(NR_CUDA_LIBRARIES
-    CudaCommon
-    _reg_cuda_kernels
-  )
+  set(NR_CUDA_LIBRARIES _reg_cuda_kernels)
 endif(USE_CUDA)
 #-----------------------------------------------------------------------------
 if(USE_OPENCL)
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 2dac9946..77b21238 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -10,7 +10,6 @@
  *
  */
 
-#include <cmath>
 #include "_reg_localTrans.h"
 #include "_reg_maths_eigen.h"
 
@@ -605,7 +604,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
     DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)];
 
     DataType *fieldPtrX = static_cast<DataType*>(deformationField->data);
-    DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)];
+    DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 2)];
 
     DataType gridVoxelSpacing[2];
     gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx;
@@ -624,65 +623,62 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 
         for (y = 0; y < deformationField->ny; y++) {
             index = y * deformationField->nx;
-            oldXpre = oldYpre = 99999999;
+            oldXpre = oldYpre = -99;
             for (x = 0; x < deformationField->nx; x++) {
-                // The previous position at the current pixel position is read
-                xReal = static_cast<DataType>(fieldPtrX[index]);
-                yReal = static_cast<DataType>(fieldPtrY[index]);
-
-                // From real to pixel position in the CPP
-                xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal
-                    + referenceMatrix_real_to_voxel->m[0][1] * yReal
-                    + referenceMatrix_real_to_voxel->m[0][3];
-                yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal
-                    + referenceMatrix_real_to_voxel->m[1][1] * yReal
-                    + referenceMatrix_real_to_voxel->m[1][3];
-
-                // The spline coefficients are computed
-                xPre = Floor(xVoxel);
-                basis = xVoxel - static_cast<DataType>(xPre--);
-                if (basis < 0) basis = 0; //rounding error
-                if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
-                else get_SplineBasisValues<DataType>(basis, xBasis);
+                if (mask[index] > -1) {
+                    // The previous position at the current pixel position is read
+                    xReal = fieldPtrX[index];
+                    yReal = fieldPtrY[index];
+
+                    // From real to pixel position in the CPP
+                    xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal
+                        + referenceMatrix_real_to_voxel->m[0][1] * yReal
+                        + referenceMatrix_real_to_voxel->m[0][3];
+                    yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal
+                        + referenceMatrix_real_to_voxel->m[1][1] * yReal
+                        + referenceMatrix_real_to_voxel->m[1][3];
+
+                    // The spline coefficients are computed
+                    xPre = Floor(xVoxel);
+                    basis = xVoxel - static_cast<DataType>(xPre--);
+                    if (basis < 0) basis = 0; //rounding error
+                    if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
+                    else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                yPre = Floor(yVoxel);
-                basis = yVoxel - static_cast<DataType>(yPre--);
-                if (basis < 0) basis = 0; //rounding error
-                if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
-                else get_SplineBasisValues<DataType>(basis, yBasis);
+                    yPre = Floor(yVoxel);
+                    basis = yVoxel - static_cast<DataType>(yPre--);
+                    if (basis < 0) basis = 0; //rounding error
+                    if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
+                    else get_SplineBasisValues<DataType>(basis, yBasis);
 
-                if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 &&
-                    yVoxel >= 0 && yVoxel <= deformationField->ny - 1) {
-                    // The control point positions are extracted
-                    if (oldXpre != xPre || oldYpre != yPre) {
+                    if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 &&
+                        yVoxel >= 0 && yVoxel <= deformationField->ny - 1) {
+                        // The control point positions are extracted
+                        if (oldXpre != xPre || oldYpre != yPre) {
 #ifdef _USE_SSE
-                        get_GridValues<DataType>(xPre,
-                                                 yPre,
-                                                 splineControlPoint,
-                                                 controlPointPtrX,
-                                                 controlPointPtrY,
-                                                 xControlPointCoordinates.f,
-                                                 yControlPointCoordinates.f,
-                                                 false,  // no approximation
-                                                 false); // not a displacement field
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     xControlPointCoordinates.f,
+                                                     yControlPointCoordinates.f,
+                                                     false,  // no approximation
+                                                     false); // not a displacement field
 #else // _USE_SSE
-                        get_GridValues<DataType>(xPre,
-                                                 yPre,
-                                                 splineControlPoint,
-                                                 controlPointPtrX,
-                                                 controlPointPtrY,
-                                                 xControlPointCoordinates,
-                                                 yControlPointCoordinates,
-                                                 false,  // no approximation
-                                                 false); // not a displacement field
+                            get_GridValues<DataType>(xPre,
+                                                     yPre,
+                                                     splineControlPoint,
+                                                     controlPointPtrX,
+                                                     controlPointPtrY,
+                                                     xControlPointCoordinates,
+                                                     yControlPointCoordinates,
+                                                     false,  // no approximation
+                                                     false); // not a displacement field
 #endif // _USE_SSE
-                        oldXpre = xPre;
-                        oldYpre = yPre;
-                    }
-                    xReal = 0;
-                    yReal = 0;
-
-                    if (mask[index] > -1) {
+                            oldXpre = xPre;
+                            oldYpre = yPre;
+                        }
 #if _USE_SSE
                         coord = 0;
                         for (b = 0; b < 4; b++) {
@@ -704,6 +700,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                         val.m = tempY;
                         yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #else
+                        xReal = 0;
+                        yReal = 0;
                         for (b = 0; b < 4; b++) {
                             for (a = 0; a < 4; a++) {
                                 DataType tempValue = xBasis[a] * yBasis[b];
@@ -714,8 +712,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 #endif
                     }
 
-                    fieldPtrX[index] = (DataType)xReal;
-                    fieldPtrY[index] = (DataType)yReal;
+                    fieldPtrX[index] = xReal;
+                    fieldPtrY[index] = yReal;
                 }
                 index++;
             }
@@ -739,7 +737,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
 #endif // _OPENMP
         for (y = 0; y < deformationField->ny; y++) {
             index = y * deformationField->nx;
-            oldXpre = oldYpre = 9999999;
+            oldXpre = oldYpre = -99;
 
             yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
             basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
@@ -943,9 +941,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #endif // _OPENMP
         for (z = 0; z < deformationField->nz; z++) {
             index = z * deformationField->nx * deformationField->ny;
-            oldPreX = -99;
-            oldPreY = -99;
-            oldPreZ = -99;
+            oldPreX = oldPreY = oldPreZ = -99;
             for (y = 0; y < deformationField->ny; y++) {
                 for (x = 0; x < deformationField->nx; x++) {
                     if (mask[index] > -1) {
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 240c9e3d..bd3fda06 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -364,6 +364,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
         } // if active time point
     } // iterate over all time point in the reference image
 }
+template void reg_getNMIValue<float>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
+template void reg_getNMIValue<double>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
 /* *************************************************************** */
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const nifti_image *warpedImage,
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index d4fb3af0..0ddb1e93 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -33,10 +33,10 @@ else(NOT COMPILE_RESULT_VAR)
     set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_${CAPABILITY_CODE},code=sm_${CAPABILITY_CODE}")
     # If desired, add PIC flags
     if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
-        # add (undocumented) CMake flag that should tell the host compiler to generate position independent code
+        # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
     endif()
-    #adjust for debug and release versions
+    # Adjust for debug and release versions
     if(CMAKE_BUILD_TYPE STREQUAL "Debug")
         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G")
     else(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -48,19 +48,10 @@ else(NOT COMPILE_RESULT_VAR)
     endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
 endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
-set(NAME CudaCommon)
-cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu)
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
-install(TARGETS ${NAME}
-    RUNTIME DESTINATION bin
-    LIBRARY DESTINATION lib
-    ARCHIVE DESTINATION lib
-)
-set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
-#-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaAladinContent.cpp
+    CudaCommon.cu
     CudaCompute.cu
     CudaContent.cpp
     CudaContext.cpp
@@ -87,7 +78,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
 )
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} CudaCommon)
+target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 86395269..69e44967 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -14,40 +14,40 @@
 
 /* *************************************************************** */
 __device__ void GetBasisBSplineValues(const double basis, float *values) {
-    const double ff = basis * basis;
-    const double fff = basis * basis * basis;
+    const double ff = Square(basis);
+    const double fff = Cube(basis);
     const double mf = 1.0 - basis;
-    values[0] = static_cast<float>(mf * mf * mf / 6.0);
+    values[0] = static_cast<float>(Cube(mf) / 6.0);
     values[1] = static_cast<float>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
     values[2] = static_cast<float>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
     values[3] = static_cast<float>(fff / 6.0);
 }
 /* *************************************************************** */
-__device__ void GetFirstBSplineValues(const float& basis, float *values, float *first) {
+__device__ void GetFirstBSplineValues(const float basis, float *values, float *first) {
     GetBasisBSplineValues(basis, values);
-    first[3] = basis * basis / 2.f;
+    first[3] = Square(basis) / 2.f;
     first[0] = basis - 0.5f - first[3];
     first[2] = 1.f + first[0] - 2.f * first[3];
     first[1] = -first[0] - first[2] - first[3];
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValues(const float& basis, float *values) {
-    const float ff = basis * basis;
+__device__ void GetBasisSplineValues(const float basis, float *values) {
+    const float ff = Square(basis);
     values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
     values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
     values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
     values[3] = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValuesX(const float& basis, float4 *values) {
-    const float ff = basis * basis;
+__device__ void GetBasisSplineValuesX(const float basis, float4 *values) {
+    const float ff = Square(basis);
     values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
     values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
     values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
     values->w = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBSplineBasisValue(const float& basis, const int& index, float *value, float *first) {
+__device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) {
     switch (index) {
     case 0:
         *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f;
@@ -72,7 +72,7 @@ __device__ void GetBSplineBasisValue(const float& basis, const int& index, float
     }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis, float *yBasis) {
+__device__ void GetFirstDerivativeBasisValues2D(const int index, float *xBasis, float *yBasis) {
     switch (index) {
     case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break;
     case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break;
@@ -86,7 +86,7 @@ __device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis,
     }
 }
 /* *************************************************************** */
-__device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis, float *yBasis, float *zBasis) {
+__device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis, float *yBasis, float *zBasis) {
     switch (index) {
     case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break;
     case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break;
@@ -118,7 +118,7 @@ __device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis,
     }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasis, float *yyBasis, float *xyBasis) {
+__device__ void GetSecondDerivativeBasisValues2D(const int index, float *xxBasis, float *yyBasis, float *xyBasis) {
     switch (index) {
     case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break;
     case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break;
@@ -132,7 +132,7 @@ __device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasi
     }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues3D(const int& index,
+__device__ void GetSecondDerivativeBasisValues3D(const int index,
                                                  float *xxBasis,
                                                  float *yyBasis,
                                                  float *zzBasis,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 8180ca1d..1c14369c 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -11,7 +11,8 @@
  */
 
 /* *************************************************************** */
-__inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2]) {
+template<typename T>
+__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) {
     if (relative < 0)
         relative = 0;  // reg_rounding error
     basis[1] = relative;

From 67cc12337afebb9f6968932c0d4227178804a246 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 23 Oct 2023 18:04:40 +0100
Subject: [PATCH 229/314] Add regression tests for
 Compute::GetDeformationField() #92

---
 CMakeLists.txt                                |   7 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  10 +-
 reg-test/CMakeLists.txt                       |   2 +-
 reg-test/reg_test_common.h                    |   2 +-
 .../reg_test_regr_getDeformationField.cpp     | 574 ++++++++++++++++++
 6 files changed, 588 insertions(+), 9 deletions(-)
 create mode 100644 reg-test/reg_test_regr_getDeformationField.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 14b7b278..4d7122ef 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -204,14 +204,17 @@ else(BUILD_SHARED_LIBS)
   set(NIFTYREG_LIBRARY_TYPE STATIC)
 endif(BUILD_SHARED_LIBS)
 #-----------------------------------------------------------------------------
+if(BUILD_TESTING)
+  enable_testing()
+  add_definitions(-DBUILD_TESTS)
+endif(BUILD_TESTING)
+#-----------------------------------------------------------------------------
 add_subdirectory(third-party)
 add_subdirectory(reg-io)
 add_subdirectory(reg-lib)
 add_subdirectory(reg-apps)
 add_subdirectory(cmake)
-#-----------------------------------------------------------------------------
 if(BUILD_TESTING)
-  enable_testing()
   add_subdirectory(${CMAKE_SOURCE_DIR}/reg-test)
 endif(BUILD_TESTING)
 #-----------------------------------------------------------------------------
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 538ad4bc..71627d71 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-347
+348
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 77b21238..15185c8a 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -13,6 +13,10 @@
 #include "_reg_localTrans.h"
 #include "_reg_maths_eigen.h"
 
+#ifdef BUILD_TESTS
+#undef _USE_SSE
+#endif
+
 /* *************************************************************** */
 template <class DataType>
 void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
@@ -681,11 +685,9 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                         }
 #if _USE_SSE
                         coord = 0;
-                        for (b = 0; b < 4; b++) {
-                            for (a = 0; a < 4; a++) {
+                        for (b = 0; b < 4; b++)
+                            for (a = 0; a < 4; a++)
                                 xyBasis.f[coord++] = xBasis[a] * yBasis[b];
-                            }
-                        }
 
                         tempX = _mm_set_ps1(0);
                         tempY = _mm_set_ps1(0);
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index e999620b..a2e304f6 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -121,6 +121,7 @@ set(EXEC_LIST reg_test_lncc ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi ${EXEC_LIST})
 set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
+set(EXEC_LIST reg_test_regr_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST})
@@ -130,7 +131,6 @@ if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
 endif(USE_CUDA)
 
-
 foreach(EXEC ${EXEC_LIST})
   add_executable(${EXEC} ${EXEC}.cpp)
   target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain _reg_aladin _reg_f3d)
diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h
index 69dd285b..ab4cdded 100644
--- a/reg-test/reg_test_common.h
+++ b/reg-test/reg_test_common.h
@@ -1,5 +1,5 @@
 #define NR_TESTING  // Enable testing
-#define EPS     0.000001
+#define EPS     0.000001f
 
 #include <array>
 #include <random>
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
new file mode 100644
index 00000000..d5c0a8de
--- /dev/null
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -0,0 +1,574 @@
+// OpenCL is not supported for this test
+#undef _USE_OPENCL
+
+#include "reg_test_common.h"
+
+/*
+    This test file contains the following regression tests:
+    test functions: creation of a deformation field from a control point grid
+    In 2D and 3D
+    Cubic spline
+*/
+
+
+class GetDeformationFieldTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    GetDeformationFieldTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(0, 1);
+
+        // Create reference images
+        constexpr NiftiImage::dim_t size = 5;
+        NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32);
+
+        // Generate the different test cases
+        // Test 2D
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        auto cpp2dPtr = controlPointGrid2d.data();
+        for (auto i = 0; i < controlPointGrid2d.nVoxels(); ++i)
+            cpp2dPtr[i] = distr(gen);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D"s,
+            std::move(reference2d),
+            std::move(controlPointGrid2d)
+        ));
+
+        // Test 3D
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        auto cpp3dPtr = controlPointGrid3d.data();
+        for (auto i = 0; i < controlPointGrid3d.nVoxels(); ++i)
+            cpp3dPtr[i] = distr(gen);
+
+        // Add the test data
+        testData.emplace_back(TestData(
+            "3D"s,
+            std::move(reference3d),
+            std::move(controlPointGrid3d)
+        ));
+
+        // Add platforms, composition, and bspline to the test data
+        for (auto&& testData : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                unique_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
+                for (int composition = 0; composition < 2; composition++) {
+                    for (int bspline = 0; bspline < 2; bspline++) {
+                        // Make a copy of the test data
+                        auto [testName, reference, controlPointGrid] = testData;
+                        testName += " "s + platform->GetName() + " Composition="s + std::to_string(composition) + " Bspline="s + std::to_string(bspline);
+                        unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
+                        unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                        NiftiImage expDefField(content->GetDeformationField(), NiftiImage::Copy::Image);
+                        // Compute the deformation field
+                        compute->GetDeformationField(composition, bspline);
+                        NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
+                        // Compute the expected deformation field
+                        GetDeformationField<float>(controlPointGrid, expDefField, content->GetReferenceMask(), composition, bspline);
+                        // Save for testing
+                        testCases.push_back({ std::move(testName), std::move(defField), std::move(expDefField) });
+                    }
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) {
+        const DataType ff = basis * basis;
+        const DataType fff = ff * basis;
+        const DataType mf = static_cast<DataType>(1.0 - basis);
+        values[0] = static_cast<DataType>(mf * mf * mf / 6.0);
+        values[1] = static_cast<DataType>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
+        values[2] = static_cast<DataType>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
+        values[3] = static_cast<DataType>(fff / 6.0);
+    }
+
+    template<class DataType>
+    void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) {
+        const DataType ff = basis * basis;
+        values[0] = static_cast<DataType>((basis * ((2.0 - basis) * basis - 1.0)) / 2.0);
+        values[1] = static_cast<DataType>((ff * (3.0 * basis - 5.0) + 2.0) / 2.0);
+        values[2] = static_cast<DataType>((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0);
+        values[3] = static_cast<DataType>((basis - 1.0) * ff / 2.0);
+    }
+
+    template <class DataType>
+    void GetSlidedValues(DataType defX,
+                         DataType defY,
+                         const int x,
+                         const int y,
+                         const NiftiImageData::Iterator& defPtrX,
+                         const NiftiImageData::Iterator& defPtrY,
+                         const mat44 *dfVoxel2Real,
+                         const int *dim,
+                         const bool displacement) {
+        int newX = x;
+        if (x < 0)
+            newX = 0;
+        else if (x >= dim[1])
+            newX = dim[1] - 1;
+
+        int newY = y;
+        if (y < 0)
+            newY = 0;
+        else if (y >= dim[2])
+            newY = dim[2] - 1;
+
+        DataType shiftValueX = 0;
+        DataType shiftValueY = 0;
+        if (!displacement) {
+            const int shiftIndexX = x - newX;
+            const int shiftIndexY = y - newY;
+            shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1];
+            shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1];
+        }
+        const int index = newY * dim[1] + newX;
+        defX = DataType(defPtrX[index]) + shiftValueX;
+        defY = DataType(defPtrY[index]) + shiftValueY;
+    }
+
+    template <class DataType>
+    void GetSlidedValues(DataType defX,
+                         DataType defY,
+                         DataType defZ,
+                         const int x,
+                         const int y,
+                         const int z,
+                         const NiftiImageData::Iterator& defPtrX,
+                         const NiftiImageData::Iterator& defPtrY,
+                         const NiftiImageData::Iterator& defPtrZ,
+                         const mat44 *dfVoxel2Real,
+                         const int *dim,
+                         const bool displacement) {
+        int newX = x;
+        if (x < 0)
+            newX = 0;
+        else if (x >= dim[1])
+            newX = dim[1] - 1;
+
+        int newY = y;
+        if (y < 0)
+            newY = 0;
+        else if (y >= dim[2])
+            newY = dim[2] - 1;
+
+        int newZ = z;
+        if (z < 0)
+            newZ = 0;
+        else if (z >= dim[3])
+            newZ = dim[3] - 1;
+
+        DataType shiftValueX = 0;
+        DataType shiftValueY = 0;
+        DataType shiftValueZ = 0;
+        if (!displacement) {
+            const int shiftIndexX = x - newX;
+            const int shiftIndexY = y - newY;
+            const int shiftIndexZ = z - newZ;
+            shiftValueX =
+                shiftIndexX * dfVoxel2Real->m[0][0] +
+                shiftIndexY * dfVoxel2Real->m[0][1] +
+                shiftIndexZ * dfVoxel2Real->m[0][2];
+            shiftValueY =
+                shiftIndexX * dfVoxel2Real->m[1][0] +
+                shiftIndexY * dfVoxel2Real->m[1][1] +
+                shiftIndexZ * dfVoxel2Real->m[1][2];
+            shiftValueZ =
+                shiftIndexX * dfVoxel2Real->m[2][0] +
+                shiftIndexY * dfVoxel2Real->m[2][1] +
+                shiftIndexZ * dfVoxel2Real->m[2][2];
+        }
+        const int index = (newZ * dim[2] + newY) * dim[1] + newX;
+        defX = DataType(defPtrX[index]) + shiftValueX;
+        defY = DataType(defPtrY[index]) + shiftValueY;
+        defZ = DataType(defPtrZ[index]) + shiftValueZ;
+    }
+
+    template <class DataType>
+    void GetGridValues(const int xPre, const int yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) {
+        const auto cppPtr = controlPointGrid.data();
+        const auto cppPtrX = cppPtr.begin();
+        const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice();
+        const mat44 *voxelToRealMatrix = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_xyz : &controlPointGrid->qto_xyz;
+        size_t coord = 0;
+        for (int y = yPre; y < yPre + 4; y++) {
+            const bool in = -1 < y && y < controlPointGrid->ny;
+            const size_t index = y * controlPointGrid->nx;
+            for (int x = xPre; x < xPre + 4; x++, coord++) {
+                if (in && -1 < x && x < controlPointGrid->nx) {
+                    xControlPointCoordinates[coord] = cppPtrX[index + x];
+                    yControlPointCoordinates[coord] = cppPtrY[index + x];
+                } else {
+                    GetSlidedValues<DataType>(xControlPointCoordinates[coord],
+                                              yControlPointCoordinates[coord],
+                                              x,
+                                              y,
+                                              cppPtrX,
+                                              cppPtrY,
+                                              voxelToRealMatrix,
+                                              controlPointGrid->dim,
+                                              false);
+                }
+            }
+        }
+    }
+
+    template <class DataType>
+    void GetGridValues(const int xPre, const int yPre, const int zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) {
+        const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume();
+        const auto cppPtr = controlPointGrid.data();
+        const auto cppPtrX = cppPtr.begin();
+        const auto cppPtrY = cppPtrX + cppVoxelNumber;
+        const auto cppPtrZ = cppPtrY + cppVoxelNumber;
+        const mat44 *voxelToRealMatrix = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_xyz : &controlPointGrid->qto_xyz;
+        size_t coord = 0, yIndex, zIndex;
+        for (int z = zPre; z < zPre + 4; z++) {
+            bool in = true;
+            if (-1 < z && z < controlPointGrid->nz)
+                zIndex = z * controlPointGrid->nx * controlPointGrid->ny;
+            else in = false;
+            for (int y = yPre; y < yPre + 4; y++) {
+                if (in && -1 < y && y < controlPointGrid->ny)
+                    yIndex = y * controlPointGrid->nx;
+                else in = false;
+                for (int x = xPre; x < xPre + 4; x++, coord++) {
+                    if (in && -1 < x && x < controlPointGrid->nx) {
+                        xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x];
+                        yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x];
+                        zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x];
+                    } else {
+                        GetSlidedValues<DataType>(xControlPointCoordinates[coord],
+                                                  yControlPointCoordinates[coord],
+                                                  zControlPointCoordinates[coord],
+                                                  x,
+                                                  y,
+                                                  z,
+                                                  cppPtrX,
+                                                  cppPtrY,
+                                                  cppPtrZ,
+                                                  voxelToRealMatrix,
+                                                  controlPointGrid->dim,
+                                                  false);
+                    }
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
+        if (controlPointGrid->nz > 1)
+            GetDeformationField3D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+        else
+            GetDeformationField2D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+    }
+
+    template<class DataType>
+    void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
+        auto defFieldPtr = defField.data();
+        auto defFieldPtrX = defFieldPtr.begin();
+        auto defFieldPtrY = defFieldPtrX + defField.nVoxelsPerSlice();
+
+        const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy };
+        DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16];
+        int oldXPre = -1, oldYPre = -1;
+
+        if (composition) {  // Composition of deformation fields
+            // Read the ijk sform or qform, as appropriate
+            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
+
+            for (int y = 0; y < defField->ny; y++) {
+                size_t index = y * defField->nx;
+                for (int x = 0; x < defField->nx; x++, index++) {
+                    // The previous position at the current pixel position is read
+                    DataType xReal = defFieldPtrX[index];
+                    DataType yReal = defFieldPtrY[index];
+
+                    // From real to pixel position in the CPP
+                    const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3];
+                    const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3];
+
+                    // The spline coefficients are computed
+                    int xPre = int(std::floor(xVoxel));
+                    DataType basis = xVoxel - (DataType)xPre--;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                    else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                    int yPre = int(std::floor(yVoxel));
+                    basis = yVoxel - (DataType)yPre--;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                    else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                    if (xVoxel >= 0 && xVoxel <= defField->nx - 1 &&
+                        yVoxel >= 0 && yVoxel <= defField->ny - 1) {
+                        // The control point positions are extracted
+                        if (oldXPre != xPre || oldYPre != yPre) {
+                            GetGridValues<DataType>(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
+                            oldXPre = xPre;
+                            oldYPre = yPre;
+                        }
+
+                        xReal = 0; yReal = 0;
+                        if (mask[index] > -1) {
+                            for (int b = 0; b < 4; b++) {
+                                for (int a = 0; a < 4; a++) {
+                                    const DataType xyBasis = xBasis[a] * yBasis[b];
+                                    xReal += xControlPointCoordinates[b * 4 + a] * xyBasis;
+                                    yReal += yControlPointCoordinates[b * 4 + a] * xyBasis;
+                                }
+                            }
+                        }
+
+                        defFieldPtrX[index] = xReal;
+                        defFieldPtrY[index] = yReal;
+                    }
+                }
+            }
+        } else {    // If the deformation field is blank - !composition
+            for (int y = 0; y < defField->ny; y++) {
+                size_t index = y * defField->nx;
+
+                int yPre = (int)((DataType)y / gridVoxelSpacing[1]);
+                DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
+                if (basis < 0) basis = 0; // rounding error
+                if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                for (int x = 0; x < defField->nx; x++, index++) {
+                    int xPre = (int)((DataType)x / gridVoxelSpacing[0]);
+                    basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                    else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                    size_t coord = 0;
+                    for (int a = 0; a < 4; a++) {
+                        xyBasis[coord++] = xBasis[0] * yBasis[a];
+                        xyBasis[coord++] = xBasis[1] * yBasis[a];
+                        xyBasis[coord++] = xBasis[2] * yBasis[a];
+                        xyBasis[coord++] = xBasis[3] * yBasis[a];
+                    }
+
+                    if (oldXPre != xPre || oldYPre != yPre) {
+                        GetGridValues<DataType>(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates);
+                        oldXPre = xPre;
+                        oldYPre = yPre;
+                    }
+
+                    DataType xReal = 0, yReal = 0;
+                    if (mask[index] > -1) {
+                        for (int a = 0; a < 16; a++) {
+                            xReal += xControlPointCoordinates[a] * xyBasis[a];
+                            yReal += yControlPointCoordinates[a] * xyBasis[a];
+                        }
+                    }
+                    defFieldPtrX[index] = xReal;
+                    defFieldPtrY[index] = yReal;
+                }
+            }
+        }
+    }
+
+    template<class DataType>
+    void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
+        DataType xBasis[4], yBasis[4], zBasis[4];
+        DataType xControlPointCoordinates[64];
+        DataType yControlPointCoordinates[64];
+        DataType zControlPointCoordinates[64];
+
+        const size_t defFieldVoxelNumber = defField.nVoxelsPerVolume();
+        auto defFieldPtr = defField.data();
+        auto defFieldPtrX = defFieldPtr.begin();
+        auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber;
+        auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber;
+
+        if (composition) {  // Composition of deformation fields
+            // Read the ijk sform or qform, as appropriate
+            const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk;
+            for (int z = 0; z < defField->nz; z++) {
+                size_t index = z * defField->nx * defField->ny;
+                int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99;
+                for (int y = 0; y < defField->ny; y++) {
+                    for (int x = 0; x < defField->nx; x++, index++) {
+                        if (mask[index] > -1) {
+                            // The previous position at the current pixel position is read
+                            DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] };
+
+                            // From real to pixel position in the control point space
+                            DataType voxel[3];
+                            voxel[0] =
+                                realToVoxel->m[0][0] * real[0] +
+                                realToVoxel->m[0][1] * real[1] +
+                                realToVoxel->m[0][2] * real[2] +
+                                realToVoxel->m[0][3];
+                            voxel[1] =
+                                realToVoxel->m[1][0] * real[0] +
+                                realToVoxel->m[1][1] * real[1] +
+                                realToVoxel->m[1][2] * real[2] +
+                                realToVoxel->m[1][3];
+                            voxel[2] =
+                                realToVoxel->m[2][0] * real[0] +
+                                realToVoxel->m[2][1] * real[1] +
+                                realToVoxel->m[2][2] * real[2] +
+                                realToVoxel->m[2][3];
+
+                            // The spline coefficients are computed
+                            int xPre = int(std::floor(voxel[0]));
+                            DataType basis = voxel[0] - (DataType)xPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                            else GetSplineBasisValues<DataType>(basis, xBasis);
+
+                            int yPre = int(std::floor(voxel[1]));
+                            basis = voxel[1] - (DataType)yPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                            else GetSplineBasisValues<DataType>(basis, yBasis);
+
+                            int zPre = int(std::floor(voxel[2]));
+                            basis = voxel[2] - (DataType)zPre--;
+                            if (basis < 0) basis = 0; // rounding error
+                            if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
+                            else GetSplineBasisValues<DataType>(basis, zBasis);
+
+                            // The control point positions are extracted
+                            if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) {
+                                GetGridValues<DataType>(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
+                                oldPreX = xPre;
+                                oldPreY = yPre;
+                                oldPreZ = zPre;
+                            }
+
+                            real[0] = real[1] = real[2] = 0;
+                            int coord = 0;
+                            for (int c = 0; c < 4; c++) {
+                                for (int b = 0; b < 4; b++) {
+                                    for (int a = 0; a < 4; a++, coord++) {
+                                        DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c];
+                                        real[0] += xControlPointCoordinates[coord] * tempValue;
+                                        real[1] += yControlPointCoordinates[coord] * tempValue;
+                                        real[2] += zControlPointCoordinates[coord] * tempValue;
+                                    }
+                                }
+                            }
+                            defFieldPtrX[index] = real[0];
+                            defFieldPtrY[index] = real[1];
+                            defFieldPtrZ[index] = real[2];
+                        }
+                    }
+                }
+            }
+        } else {    // If the deformation field is blank - !composition
+            const DataType gridVoxelSpacing[3] = {
+                controlPointGrid->dx / defField->dx,
+                controlPointGrid->dy / defField->dy,
+                controlPointGrid->dz / defField->dz
+            };
+
+            for (int z = 0; z < defField->nz; z++) {
+                size_t index = z * defField->nx * defField->ny;
+                DataType oldBasis = DataType(1.1);
+
+                int zPre = int(DataType(z) / gridVoxelSpacing[2]);
+                DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre;
+                if (basis < 0) basis = 0; // rounding error
+                if (bspline) GetBSplineBasisValues<DataType>(basis, zBasis);
+                else GetSplineBasisValues<DataType>(basis, zBasis);
+
+                for (int y = 0; y < defField->ny; y++) {
+                    int yPre = int(DataType(y) / gridVoxelSpacing[1]);
+                    basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre;
+                    if (basis < 0) basis = 0; // rounding error
+                    if (bspline) GetBSplineBasisValues<DataType>(basis, yBasis);
+                    else GetSplineBasisValues<DataType>(basis, yBasis);
+                    int coord = 0;
+                    DataType yzBasis[16];
+                    for (int a = 0; a < 4; a++) {
+                        yzBasis[coord++] = yBasis[0] * zBasis[a];
+                        yzBasis[coord++] = yBasis[1] * zBasis[a];
+                        yzBasis[coord++] = yBasis[2] * zBasis[a];
+                        yzBasis[coord++] = yBasis[3] * zBasis[a];
+                    }
+
+                    for (int x = 0; x < defField->nx; x++, index++) {
+                        int xPre = int(DataType(x) / gridVoxelSpacing[0]);
+                        basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre;
+                        if (basis < 0) basis = 0; // rounding error
+                        if (bspline) GetBSplineBasisValues<DataType>(basis, xBasis);
+                        else GetSplineBasisValues<DataType>(basis, xBasis);
+                        coord = 0;
+                        DataType xyzBasis[64];
+                        for (int a = 0; a < 16; a++) {
+                            xyzBasis[coord++] = xBasis[0] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[1] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[2] * yzBasis[a];
+                            xyzBasis[coord++] = xBasis[3] * yzBasis[a];
+                        }
+                        if (basis <= oldBasis || x == 0)
+                            GetGridValues<DataType>(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates);
+                        oldBasis = basis;
+
+                        DataType real[3]{};
+                        if (mask[index] > -1) {
+                            for (int a = 0; a < 64; a++) {
+                                real[0] += xControlPointCoordinates[a] * xyzBasis[a];
+                                real[1] += yControlPointCoordinates[a] * xyzBasis[a];
+                                real[2] += zControlPointCoordinates[a] * xyzBasis[a];
+                            }
+                        }// mask
+                        defFieldPtrX[index] = real[0];
+                        defFieldPtrY[index] = real[1];
+                        defFieldPtrZ[index] = real[2];
+                    } // x
+                } // y
+            } // z
+        } // composition
+    }
+};
+
+TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-spline Grid", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, defField, expDefField] = testCase;
+
+        SECTION(testName) {
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the results
+            const auto defFieldPtr = defField.data();
+            const auto defFieldExpPtr = expDefField.data();
+            for (auto i = 0; i < expDefField.nVoxels(); i++) {
+                const float defFieldVal = defFieldPtr[i];
+                const float expDefFieldVal = defFieldExpPtr[i];
+                const float diff = abs(defFieldVal - expDefFieldVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | Result=" << defFieldVal;
+                    NR_COUT << " | Expected=" << expDefFieldVal << std::endl;
+                }
+                REQUIRE(diff < EPS);
+            }
+        }
+    }
+}

From 694ec8760b8b30545998e1cce2a05919225c8729 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 23 Oct 2023 18:22:43 +0100
Subject: [PATCH 230/314] Remove identifiers starting with a single underscore

Identifiers in global scope starting with a single underscore are reserved and isn't allowed to use
---
 CMakeLists.txt                                |   6 +-
 Doxyfile.in                                   |   2 +-
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_benchmark.cpp                    |  54 +++----
 reg-apps/reg_f3d.cpp                          |   2 +-
 reg-apps/reg_gpuinfo.cpp                      |  10 +-
 reg-io/CMakeLists.txt                         |   2 +-
 reg-io/_reg_ReadWriteImage.cpp                |   8 +-
 reg-io/_reg_ReadWriteImage.h                  |   4 +-
 reg-io/niftilib/nifti1_io.c                   |   6 +-
 reg-io/niftilib/nifti1_io.h                   |   6 +-
 reg-io/niftilib/nifti2_io.c                   |   4 +-
 reg-io/niftilib/nifti2_io.h                   |   6 +-
 reg-io/nrrd/NrrdIO/mangle.pl                  |  14 +-
 reg-lib/Platform.cpp                          |  14 +-
 reg-lib/Platform.h                            |   8 +-
 reg-lib/cpu/_reg_localTrans.cpp               | 132 +++++++++---------
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  44 +++---
 reg-lib/cpu/_reg_maths.h                      |   2 +-
 reg-lib/cpu/_reg_tools.cpp                    |   6 +-
 reg-test/reg_test_be.cpp                      |   2 +-
 reg-test/reg_test_composeField.cpp            |   2 +-
 reg-test/reg_test_conjugateGradient.cpp       |   2 +-
 reg-test/reg_test_getDeformationField.cpp     |   2 +-
 reg-test/reg_test_imageGradient.cpp           |   2 +-
 reg-test/reg_test_interpolation.cpp           |   2 +-
 reg-test/reg_test_lncc.cpp                    |   4 +-
 reg-test/reg_test_nmi.cpp                     |   2 +-
 reg-test/reg_test_nmi_gradient.cpp            |   2 +-
 reg-test/reg_test_normaliseGradient.cpp       |   2 +-
 .../reg_test_regr_getDeformationField.cpp     |   2 +-
 .../reg_test_voxelCentricToNodeCentric.cpp    |   2 +-
 32 files changed, 178 insertions(+), 180 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d7122ef..a5aa1fc3 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,7 +152,7 @@ if(USE_OPENCL)
     message(STATUS "Found OpenCL")
     include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl)
     include_directories(${OpenCL_INCLUDE_DIRS})
-    add_definitions(-D_USE_OPENCL)
+    add_definitions(-DUSE_OPENCL)
   endif(NOT OpenCL_FOUND)
 endif(USE_OPENCL)
 #-----------------------------------------------------------------------------
@@ -168,7 +168,7 @@ if(USE_CUDA)
   else(NOT CUDA_FOUND)
     include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
     include_directories(${CUDA_INCLUDE_DIRS})
-    add_definitions(-D_USE_CUDA)
+    add_definitions(-DUSE_CUDA)
   endif(NOT CUDA_FOUND)
 endif(USE_CUDA)
 #-----------------------------------------------------------------------------
@@ -177,7 +177,7 @@ if(USE_SSE)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
   endif(NOT MSVC)
-  add_definitions(-D_USE_SSE)
+  add_definitions(-DUSE_SSE)
 endif(USE_SSE)
 #-----------------------------------------------------------------------------
 if(USE_OPENMP)
diff --git a/Doxyfile.in b/Doxyfile.in
index 8257d784..df013886 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -1449,7 +1449,7 @@ INCLUDE_FILE_PATTERNS  =
 # undefined via #undef or recursively expanded use the := operator
 # instead of the = operator.
 
-PREDEFINED             = _USE_CUDA
+PREDEFINED             = USE_CUDA
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
 # this tag can be used to specify a list of macro names that should be expanded.
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 71627d71..aef2e272 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-348
+349
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 8606f563..aab0086c 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -19,7 +19,7 @@
 #include "_reg_tools.h"
 #include "_reg_blockMatching.h"
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
 #include "_reg_cudaCommon.h"
 #include "_reg_resampling_gpu.h"
 #include "_reg_affineTransformation_gpu.h"
@@ -179,7 +179,7 @@ int main(int argc, char **argv)
    nodeNMIGradientImage->nbyper = sizeof(float);
    nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper);
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float *targetImageArray_d;
    cudaArray *sourceImageArray_d;
    int *targetMask_d;
@@ -198,7 +198,7 @@ int main(int argc, char **argv)
 
    time_t start,end;
    int minutes, seconds, cpuTime, maxIt;
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    int gpuTime
 #endif
 
@@ -249,7 +249,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf( "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -273,7 +273,7 @@ int main(int argc, char **argv)
    }
 
    // SPLINE DEFORMATION FIELD CREATION
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float4 *controlPointImageArray_d;
    if(runGPU)
    {
@@ -299,7 +299,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -326,7 +326,7 @@ int main(int argc, char **argv)
    }
 
    // SCALING-AND-SQUARING APPROACH
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float4 *velocityFieldImageArray_d;
    if(runGPU)
    {
@@ -350,7 +350,7 @@ int main(int argc, char **argv)
       printf("CPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds);
       time(&start);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          for(int i=0; i<maxIt; ++i)
@@ -374,7 +374,7 @@ int main(int argc, char **argv)
    }
 
    // LINEAR INTERPOLATION
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float *resultImageArray_d;
    if(runGPU)
       Cuda::Allocate<float>(&resultImageArray_d, targetImage->dim);
@@ -399,7 +399,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -428,7 +428,7 @@ int main(int argc, char **argv)
    }
 
    // SPATIAL GRADIENT COMPUTATION
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float4 *resultGradientArray_d;
    CUDA_SAFE_CALL(cudaMalloc((void **)&resultGradientArray_d, targetImage->nvox*sizeof(float4)));
 #endif
@@ -451,7 +451,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -479,7 +479,7 @@ int main(int argc, char **argv)
    }
    nifti_image_free(sourceImage);
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    if(runGPU)
    {
       Cuda::Free(deformationFieldImageArray_d);
@@ -501,7 +501,7 @@ int main(int argc, char **argv)
    }
 
    // VOXEL-BASED NMI GRADIENT COMPUTATION
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float4 *voxelNMIGradientArray_d;
    if(runGPU)
       Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim);
@@ -529,7 +529,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       float *logJointHistogram_d;
       if(runGPU)
       {
@@ -571,7 +571,7 @@ int main(int argc, char **argv)
       printf("Voxel-based NMI gradient done\n\n");
    }
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    if(runGPU)
    {
       Cuda::Free(resultGradientArray_d);
@@ -579,7 +579,7 @@ int main(int argc, char **argv)
 #endif
 
    // NODE-BASED NMI GRADIENT COMPUTATION
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    float4 *nodeNMIGradientArray_d;
    if(runGPU)
       Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim);
@@ -603,7 +603,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -631,7 +631,7 @@ int main(int argc, char **argv)
       printf("Node-based NMI gradient done\n\n");
    }
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    if(runGPU)
    {
       Cuda::Free(voxelNMIGradientArray_d);
@@ -654,7 +654,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -694,7 +694,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -733,7 +733,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -768,7 +768,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -789,7 +789,7 @@ int main(int argc, char **argv)
       printf("Approx. |Jac| penalty term done\n\n");
    }
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    if(runGPU)
    {
       Cuda::Free(controlPointImageArray_d );
@@ -806,7 +806,7 @@ int main(int argc, char **argv)
                                           100,    // percentage of block kept
                                           50,     // percentage of inlier in the optimisation process
                                           maskImage);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       int *activeBlock_d;
       float *targetPosition_d;
       float *resultPosition_d;
@@ -835,7 +835,7 @@ int main(int argc, char **argv)
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
       if(runGPU)
       {
          time(&start);
@@ -880,7 +880,7 @@ int main(int argc, char **argv)
    free(probaJointHistogram);
    free(logJointHistogram);
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    if(runGPU)
    {
       Cuda::Free(targetImageArray_d);
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 104803a4..ad804dcd 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -11,7 +11,7 @@
  */
 
 // OpenCL isn't supported!
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "_reg_ReadWriteImage.h"
 #include "_reg_ReadWriteMatrix.h"
diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp
index 22008d4e..d4858ead 100644
--- a/reg-apps/reg_gpuinfo.cpp
+++ b/reg-apps/reg_gpuinfo.cpp
@@ -1,25 +1,25 @@
 #include "_reg_maths.h"
 #include "Platform.h"
 
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
   #include "../reg-lib/cuda/_reg_cudainfo.h"
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
   #include "../reg-lib/cl/_reg_openclinfo.h"
 #endif
 
 /* *************************************************************** */
 int main()
 {
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
    showCUDAInfo();
 #else
-#ifndef _USE_OPENCL
+#ifndef USE_OPENCL
    NR_WARN("NiftyReg has not been compiled with CUDA or OpenCL");
    NR_WARN("No GPU device information to display");
 #endif
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
    showCLInfo();
 #endif
 
diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt
index 82a541ca..639785ea 100644
--- a/reg-io/CMakeLists.txt
+++ b/reg-io/CMakeLists.txt
@@ -17,7 +17,7 @@ set(LIBRARIES reg_nifti reg_png)
 
 # Build the NRRD file format library if required
 if(USE_NRRD)
-  add_definitions(-D_USE_NRRD)
+  add_definitions(-DUSE_NRRD)
   subdirs(nrrd)
   set(LIBRARIES ${LIBRARIES} reg_nrrd)
 endif(USE_NRRD)
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 4902881d..b5413b21 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -42,7 +42,7 @@ int reg_io_checkFileFormat(const std::string& filename) {
         return NR_NII_FORMAT;
     else if (filename.find(".png") != std::string::npos)
         return NR_PNG_FORMAT;
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
     else if (filename.find(".nrrd") != std::string::npos)
         return NR_NRRD_FORMAT;
     else if (filename.find(".nhdr") != std::string::npos)
@@ -72,7 +72,7 @@ nifti_image* reg_io_ReadImageFile(const char *filename) {
         image = reg_io_readPNGfile(filename, true);
         reg_hack_filename(image, filename);
         break;
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
     case NR_NRRD_FORMAT:
         Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
         image = reg_io_nrdd2nifti(nrrdImage);
@@ -103,7 +103,7 @@ nifti_image* reg_io_ReadImageHeader(const char *filename) {
         image = reg_io_readPNGfile(filename, false);
         reg_hack_filename(image, filename);
         break;
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
     case NR_NRRD_FORMAT:
         Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
         image = reg_io_nrdd2nifti(nrrdImage);
@@ -154,7 +154,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
     case NR_PNG_FORMAT:
         reg_io_writePNGfile(image, filename);
         break;
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
     case NR_NRRD_FORMAT:
         Nrrd *nrrdImage = reg_io_nifti2nrrd(image);
         reg_io_writeNRRDfile(nrrdImage, filename);
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index a012f6c0..c1356f02 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -18,7 +18,7 @@
 #include "_reg_tools.h"
 
 #include "reg_png.h"
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
 #include "reg_nrrd.h"
 #endif
 /** @defgroup NIFTYREG_FILEFORMAT_TYPE
@@ -27,7 +27,7 @@
  */
 #define NR_NII_FORMAT 0
 #define NR_PNG_FORMAT 1
-#ifdef _USE_NRRD
+#ifdef USE_NRRD
 #define NR_NRRD_FORMAT 2
 #endif
 /* @} */
diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c
index afd444c9..d8bee4da 100644
--- a/reg-io/niftilib/nifti1_io.c
+++ b/reg-io/niftilib/nifti1_io.c
@@ -1,4 +1,4 @@
-#define _NIFTI1_IO_C_
+#define NIFTI1_IO_C
 
 #include "niftilib/nifti1_io.h"   /* typedefs, prototypes, macros, etc. */
 
@@ -192,7 +192,7 @@ static char const * const gni_history[] =
   "\n",
   "1.3  09 Feb 2005 [rickr]\n"
   "   - nifti1.h: added doxygen comments for extension structs\n"
-  "   - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n"
+  "   - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n"
   "   - added a doxygen-style description to every exported function\n"
   "   - added doxygen-style comments within some functions\n"
   "   - re-exported many znzFile functions that I had made static\n"
@@ -7188,7 +7188,7 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[],
    dim_index = nim->dim[0];
    while( dim_index > 0 ){
       prods[len] = 1;
-      while( dim_index > 0 && 
+      while( dim_index > 0 &&
              (nim->dim[dim_index] == 1 || dims[dim_index] == -1) ){
          prods[len] *= nim->dim[dim_index];
          dim_index--;
diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h
index 14ed0d3a..0e95531c 100644
--- a/reg-io/niftilib/nifti1_io.h
+++ b/reg-io/niftilib/nifti1_io.h
@@ -517,9 +517,9 @@ int    valid_nifti_extensions(const nifti_image *nim);
 #endif
 
 /*------------------------------------------------------------------------*/
-/*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */
+/*-- the rest of these apply only to nifti1_io.c, check for NIFTI1_IO_C */
 /*                                                    Feb 9, 2005 [rickr] */
-#ifdef _NIFTI1_IO_C_
+#ifdef NIFTI1_IO_C
 
 typedef struct {
     int debug;               /*!< debug level for status reports  */
@@ -574,7 +574,7 @@ typedef struct {
 
 #define LNI_MAX_NIA_EXT_LEN 100000  /* consider a longer extension invalid */
 
-#endif  /* _NIFTI1_IO_C_ section */
+#endif  /* NIFTI1_IO_C section */
 /*------------------------------------------------------------------------*/
 
 /*=================*/
diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c
index da972895..a87fa3fd 100644
--- a/reg-io/niftilib/nifti2_io.c
+++ b/reg-io/niftilib/nifti2_io.c
@@ -1,4 +1,4 @@
-#define _NIFTI2_IO_C_
+#define NIFTI2_IO_C
 
 #include "niftilib/nifti2_io.h"   /* typedefs, prototypes, macros, etc. */
 
@@ -192,7 +192,7 @@ static char const * const gni1_history[] =
   "\n",
   "1.3  09 Feb 2005 [rickr]\n"
   "   - nifti1.h: added doxygen comments for extension structs\n"
-  "   - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n"
+  "   - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n"
   "   - added a doxygen-style description to every exported function\n"
   "   - added doxygen-style comments within some functions\n"
   "   - re-exported many znzFile functions that I had made static\n"
diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h
index ff215d19..946e6d4e 100644
--- a/reg-io/niftilib/nifti2_io.h
+++ b/reg-io/niftilib/nifti2_io.h
@@ -756,9 +756,9 @@ int    nifti_valid_header_size(int ni_ver, int whine);
 #endif
 
 /*------------------------------------------------------------------------*/
-/*-- the rest of these apply only to nifti2_io.c, check for _NIFTI2_IO_C_ */
+/*-- the rest of these apply only to nifti2_io.c, check for NIFTI2_IO_C */
 
-#ifdef _NIFTI2_IO_C_
+#ifdef NIFTI2_IO_C
 
 typedef struct {
     int debug;               /*!< debug level for status reports  */
@@ -817,7 +817,7 @@ typedef struct {
 #undef NIFTI_IS_16_BIT_INT
 #define NIFTI_IS_16_BIT_INT(x) ((x) <= 32767 && (x) >= -32768)
 
-#endif  /* _NIFTI2_IO_C_ section */
+#endif  /* NIFTI2_IO_C section */
 /*------------------------------------------------------------------------*/
 
 /*=================*/
diff --git a/reg-io/nrrd/NrrdIO/mangle.pl b/reg-io/nrrd/NrrdIO/mangle.pl
index 37c44fa9..f71c3299 100644
--- a/reg-io/nrrd/NrrdIO/mangle.pl
+++ b/reg-io/nrrd/NrrdIO/mangle.pl
@@ -2,23 +2,23 @@
 #  NrrdIO: stand-alone code for basic nrrd functionality
 #  Copyright (C) 2005  Gordon Kindlmann
 #  Copyright (C) 2004, 2003, 2002, 2001, 2000, 1999, 1998  University of Utah
-# 
+#
 #  This software is provided 'as-is', without any express or implied
 #  warranty.  In no event will the authors be held liable for any
 #  damages arising from the use of this software.
-# 
+#
 #  Permission is granted to anyone to use this software for any
 #  purpose, including commercial applications, and to alter it and
 #  redistribute it freely, subject to the following restrictions:
-# 
+#
 #  1. The origin of this software must not be misrepresented; you must
 #     not claim that you wrote the original software. If you use this
 #     software in a product, an acknowledgment in the product
 #     documentation would be appreciated but is not required.
-# 
+#
 #  2. Altered source versions must be plainly marked as such, and must
 #     not be misrepresented as being the original software.
-# 
+#
 #  3. This notice may not be removed or altered from any source distribution.
 #
 #
@@ -41,8 +41,7 @@
     $mac = 0;
 }
 
-print "#ifndef __${prefix}_NrrdIO_mangle_h\n";
-print "#define __${prefix}_NrrdIO_mangle_h\n";
+print "#pragma once\n";
 print "\n";
 print "/*\n";
 print "\n";
@@ -89,4 +88,3 @@
     }
 }
 close(NM);
-print "#endif  /* __${prefix}_NrrdIO_mangle_h */ \n";
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 271273f4..19826418 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -1,6 +1,6 @@
 #include "Platform.h"
 #include "CpuKernelFactory.h"
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
 #include "CudaContext.hpp"
 #include "CudaF3dContent.h"
 #include "CudaComputeFactory.h"
@@ -9,7 +9,7 @@
 #include "CudaMeasureFactory.h"
 #include "_reg_optimiser_gpu.h"
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
 #include "ClContextSingleton.h"
 #include "ClComputeFactory.h"
 #include "ClContentCreatorFactory.h"
@@ -26,7 +26,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         kernelFactory = new CpuKernelFactory();
         measureFactory = new MeasureFactory();
     }
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         platformName = "CUDA";
         SetGpuIdx(999);
@@ -36,7 +36,7 @@ Platform::Platform(const PlatformType& platformTypeIn) {
         measureFactory = new CudaMeasureFactory();
     }
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
     else if (platformType == PlatformType::OpenCl) {
         platformName = "OpenCL";
         SetGpuIdx(999);
@@ -71,7 +71,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
     if (platformType == PlatformType::Cpu) {
         gpuIdx = 999;
     }
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         CudaContext& cudaContext = CudaContext::GetInstance();
         if (gpuIdxIn != 999) {
@@ -80,7 +80,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
         }
     }
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
     else if (platformType == PlatformType::OpenCl) {
         ClContextSingleton& clContext = ClContextSingleton::GetInstance();
         if (gpuIdxIn != 999) {
@@ -138,7 +138,7 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
             transformationGradientDataBw = (Type*)conBw->GetTransformationGradient()->data;
         }
     }
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
         controlPointGridData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda();
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 5c7ed55f..42a0a823 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -10,10 +10,10 @@
 enum class PlatformType { Cpu, Cuda, OpenCl };
 constexpr PlatformType PlatformTypes[] = {
     PlatformType::Cpu,
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
     PlatformType::Cuda,
 #endif
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
     PlatformType::OpenCl
 #endif
 };
@@ -43,13 +43,13 @@ class Platform {
                                          F3dContent *conBw = nullptr) const;
 
     static constexpr bool IsCudaEnabled() {
-#ifdef _USE_CUDA
+#ifdef USE_CUDA
         return true;
 #endif
         return false;
     }
     static constexpr bool IsOpenClEnabled() {
-#ifdef _USE_OPENCL
+#ifdef USE_OPENCL
         return true;
 #endif
         return false;
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 15185c8a..6f95de7a 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -14,7 +14,7 @@
 #include "_reg_maths_eigen.h"
 
 #ifdef BUILD_TESTS
-#undef _USE_SSE
+#undef USE_SSE
 #endif
 
 /* *************************************************************** */
@@ -559,7 +559,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                             int *mask,
                                             bool composition,
                                             bool bspline) {
-#if _USE_SSE
+#if USE_SSE
     union {
         __m128 m;
         float f[4];
@@ -596,13 +596,13 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
         DataType f[16] __attribute__((aligned(16)));
     } xyBasis;
 #endif // _WIN32
-#else // _USE_SSE
+#else // USE_SSE
     DataType xBasis[4];
     DataType yBasis[4];
     DataType xyBasis[16];
     DataType xControlPointCoordinates[16];
     DataType yControlPointCoordinates[16];
-#endif // _USE_SSE
+#endif // USE_SSE
 
     DataType *controlPointPtrX = static_cast<DataType*>(splineControlPoint->data);
     DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)];
@@ -659,7 +659,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                         yVoxel >= 0 && yVoxel <= deformationField->ny - 1) {
                         // The control point positions are extracted
                         if (oldXpre != xPre || oldYpre != yPre) {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      splineControlPoint,
@@ -669,7 +669,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                                      yControlPointCoordinates.f,
                                                      false,  // no approximation
                                                      false); // not a displacement field
-#else // _USE_SSE
+#else // USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      splineControlPoint,
@@ -679,11 +679,11 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                                      yControlPointCoordinates,
                                                      false,  // no approximation
                                                      false); // not a displacement field
-#endif // _USE_SSE
+#endif // USE_SSE
                             oldXpre = xPre;
                             oldYpre = yPre;
                         }
-#if _USE_SSE
+#if USE_SSE
                         coord = 0;
                         for (b = 0; b < 4; b++)
                             for (a = 0; a < 4; a++)
@@ -722,14 +722,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
         }
     } else { // starting deformation field is blank - !composition
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
    private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \
    val, xBasis, yBasis, tempCurrent, xyBasis, tempX, tempY, \
    xControlPointCoordinates, yControlPointCoordinates)
-#else // _USE_SSE
+#else // USE_SSE
 #pragma  omp parallel for default(none) \
    shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \
    controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \
@@ -753,7 +753,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                 if (basis < 0) basis = 0; // rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                 else get_SplineBasisValues<DataType>(basis, xBasis);
-#if _USE_SSE
+#if USE_SSE
                 val.f[0] = static_cast<float>(xBasis[0]);
                 val.f[1] = static_cast<float>(xBasis[1]);
                 val.f[2] = static_cast<float>(xBasis[2]);
@@ -773,7 +773,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                 }
 #endif
                 if (oldXpre != xPre || oldYpre != yPre) {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                     get_GridValues<DataType>(xPre,
                                              yPre,
                                              splineControlPoint,
@@ -783,7 +783,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                              yControlPointCoordinates.f,
                                              false,  // no approximation
                                              false); // not a deformation field
-#else // _USE_SSE
+#else // USE_SSE
                     get_GridValues<DataType>(xPre,
                                              yPre,
                                              splineControlPoint,
@@ -793,7 +793,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                                              yControlPointCoordinates,
                                              false,  // no approximation
                                              false); // not a deformation field
-#endif // _USE_SSE
+#endif // USE_SSE
                     oldXpre = xPre;
                     oldYpre = yPre;
                 }
@@ -802,7 +802,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                 yReal = 0;
 
                 if (mask[index] > -1) {
-#if _USE_SSE
+#if USE_SSE
                     tempX = _mm_set_ps1(0);
                     tempY = _mm_set_ps1(0);
                     //addition and multiplication of the 64 basis value and CP displacement for each axis
@@ -837,7 +837,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                             bool composition,
                                             bool bspline,
                                             bool forceNoLut = false) {
-#if _USE_SSE
+#if USE_SSE
     union {
         __m128 m;
         float f[4];
@@ -876,14 +876,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
         DataType f[16] __attribute__((aligned(16)));
     } zControlPointCoordinates;
 #endif // _WIN32
-#else // _USE_SSE
+#else // USE_SSE
     DataType temp[4];
     DataType zBasis[4];
     DataType xControlPointCoordinates[64];
     DataType yControlPointCoordinates[64];
     DataType zControlPointCoordinates[64];
     int coord;
-#endif // _USE_SSE
+#endif // USE_SSE
 
     const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
     DataType *controlPointPtrX = static_cast<DataType*>(splineControlPoint->data);
@@ -906,7 +906,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
         if (splineControlPoint->sform_code > 0)
             referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk;
         else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk;
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #ifdef _WIN32
         __declspec(align(16)) DataType xBasis[4];
         __declspec(align(16)) DataType yBasis[4];
@@ -914,14 +914,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
         DataType xBasis[4] __attribute__((aligned(16)));
         DataType yBasis[4] __attribute__((aligned(16)));
 #endif
-#else // _USE_SSE
+#else // USE_SSE
         DataType xBasis[4], yBasis[4];
-#endif // _USE_SSE
+#endif // USE_SSE
 
         DataType voxel[3];
 
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
    private(x, y, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \
    index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \
@@ -939,7 +939,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, referenceMatrix_real_to_voxel, \
    bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \
    splineControlPoint, mask)
-#endif // _USE_SSE
+#endif // USE_SSE
 #endif // _OPENMP
         for (z = 0; z < deformationField->nz; z++) {
             index = z * deformationField->nx * deformationField->ny;
@@ -990,7 +990,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 
                         // The control point positions are extracted
                         if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      zPre,
@@ -1003,7 +1003,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                      zControlPointCoordinates.f,
                                                      false,  // no approximation
                                                      false); // not a deformation field
-#else // _USE_SSE
+#else // USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      zPre,
@@ -1016,13 +1016,13 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                      zControlPointCoordinates,
                                                      false,  // no approximation
                                                      false); // not a deformation field
-#endif // _USE_SSE
+#endif // USE_SSE
                             oldPreX = xPre;
                             oldPreY = yPre;
                             oldPreZ = zPre;
                         }
 
-#if _USE_SSE
+#if USE_SSE
                         tempX = _mm_set_ps1(0);
                         tempY = _mm_set_ps1(0);
                         tempZ = _mm_set_ps1(0);
@@ -1083,7 +1083,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
         gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy;
         gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz;
 
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #ifdef _WIN32
         union u1 {
             __m128 m[4];
@@ -1103,9 +1103,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
             DataType f[64] __attribute__((aligned(16)));
         } xyzBasis;
 #endif // _WIN32
-#else // _USE_SSE
+#else // USE_SSE
         DataType yzBasis[16], xyzBasis[64];
-#endif // _USE_SSE
+#endif // USE_SSE
 
         // Assess if lookup table can be used
         if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) {
@@ -1114,15 +1114,15 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
             // Compute and store all required coefficients
             int coeff_index;
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, a, coeff_index, basis, zBasis, temp, val, tempCurrent, yzBasis) \
     shared(coefficients, bspline)
-#else //  _USE_SSE
+#else //  USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, a, coeff_index, basis, zBasis, temp, yzBasis, coord) \
     shared(coefficients, bspline)
-#endif // _USE_SSE
+#endif // USE_SSE
 #endif // _OPENMP
             for (z = 0; z < 5; ++z) {
                 coeff_index = z * 5 * 5 * 64;
@@ -1133,7 +1133,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                     basis = static_cast<DataType>(y) / 5.f;
                     if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
                     else get_SplineBasisValues<DataType>(basis, temp);
-#if _USE_SSE
+#if USE_SSE
                     val.f[0] = static_cast<float>(temp[0]);
                     val.f[1] = static_cast<float>(temp[1]);
                     val.f[2] = static_cast<float>(temp[2]);
@@ -1157,7 +1157,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                         basis = static_cast<DataType>(x) / 5.f;
                         if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
                         else get_SplineBasisValues<DataType>(basis, temp);
-#if _USE_SSE
+#if USE_SSE
                         val.f[0] = static_cast<float>(temp[0]);
                         val.f[1] = static_cast<float>(temp[1]);
                         val.f[2] = static_cast<float>(temp[2]);
@@ -1184,11 +1184,11 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
             } // z
 
             // Loop over block of 5x5x5 voxels
-#if _USE_SSE
+#if USE_SSE
             int coord;
 #endif // USE_SSE
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
    private(x, y, z, a, b, c, xPre, yPre, real, \
    index, coeff_index, coord, tempX, tempY, tempZ, val,\
@@ -1196,7 +1196,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
    gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \
    coefficients)
-#else //  _USE_SSE
+#else //  USE_SSE
 #pragma omp parallel for default(none) \
    private(x, y, z, a, b, c, xPre, yPre, real, \
    index, coeff_index, coord, basis, \
@@ -1204,12 +1204,12 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
    shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
    gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \
    coefficients)
-#endif // _USE_SSE
+#endif // USE_SSE
 #endif // _OPENMP
             for (zPre = 0; zPre < splineControlPoint->nz - 3; zPre++) {
                 for (yPre = 0; yPre < splineControlPoint->ny - 3; yPre++) {
                     for (xPre = 0; xPre < splineControlPoint->nx - 3; xPre++) {
-#if _USE_SSE
+#if USE_SSE
                         get_GridValues<DataType>(xPre,
                                                  yPre,
                                                  zPre,
@@ -1222,7 +1222,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                  zControlPointCoordinates.f,
                                                  false,  // no approximation
                                                  false); // not a deformation field
-#else // _USE_SSE
+#else // USE_SSE
                         get_GridValues<DataType>(xPre,
                                                  yPre,
                                                  zPre,
@@ -1235,7 +1235,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                  zControlPointCoordinates,
                                                  false,  // no approximation
                                                  false); // not a deformation field
-#endif // _USE_SSE
+#endif // USE_SSE
                         coeff_index = 0;
                         for (c = 0; c < 5; ++c) {
                             z = zPre * 5 + c;
@@ -1247,7 +1247,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                         for (a = 0; a < 5; ++a) {
                                             x = xPre * 5 + a;
                                             if (x<deformationField->nx && mask[index]>-1) {
-#if _USE_SSE
+#if USE_SSE
                                                 tempX = _mm_set_ps1(0);
                                                 tempY = _mm_set_ps1(0);
                                                 tempZ = _mm_set_ps1(0);
@@ -1276,7 +1276,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                 val.m = tempZ;
                                                 real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3];
 #endif
-#else // _USE_SSE
+#else // USE_SSE
                                                 real[0] = real[1] = real[2] = 0;
                                                 for (coord = 0; coord < 64; ++coord) {
                                                     basis = coefficients[coeff_index++];
@@ -1284,7 +1284,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                     real[1] += yControlPointCoordinates[coord] * basis;
                                                     real[2] += zControlPointCoordinates[coord] * basis;
                                                 }
-#endif // _USE_SSE
+#endif // USE_SSE
                                                 fieldPtrX[index] = real[0];
                                                 fieldPtrY[index] = real[1];
                                                 fieldPtrZ[index] = real[2];
@@ -1304,7 +1304,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
             free(coefficients);
         } else { // if spacings!=5 voxels
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, a, xPre, yPre, zPre, real, \
     index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \
@@ -1313,14 +1313,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
     temp_basis_sse, basis_sse, val, tempCurrent) \
     shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
     gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ)
-#else //  _USE_SSE
+#else //  USE_SSE
 #pragma omp parallel for default(none) \
     private(x, y, a, xPre, yPre, zPre, real, \
     index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \
     yControlPointCoordinates, zControlPointCoordinates, oldBasis, coord) \
     shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \
     gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ)
-#endif // _USE_SSE
+#endif // USE_SSE
 #endif // _OPENMP
             for (z = 0; z < deformationField->nz; z++) {
                 index = z * deformationField->nx * deformationField->ny;
@@ -1338,7 +1338,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                     if (basis < 0) basis = 0; //rounding error
                     if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
                     else get_SplineBasisValues<DataType>(basis, temp);
-#if _USE_SSE
+#if USE_SSE
                     val.f[0] = static_cast<float>(temp[0]);
                     val.f[1] = static_cast<float>(temp[1]);
                     val.f[2] = static_cast<float>(temp[2]);
@@ -1363,7 +1363,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, temp);
                         else get_SplineBasisValues<DataType>(basis, temp);
-#if _USE_SSE
+#if USE_SSE
                         val.f[0] = static_cast<float>(temp[0]);
                         val.f[1] = static_cast<float>(temp[1]);
                         val.f[2] = static_cast<float>(temp[2]);
@@ -1383,7 +1383,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                         }
 #endif
                         if (basis <= oldBasis || x == 0) {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      zPre,
@@ -1396,7 +1396,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                      zControlPointCoordinates.f,
                                                      false,  // no approximation
                                                      false); // not a deformation field
-#else // _USE_SSE
+#else // USE_SSE
                             get_GridValues<DataType>(xPre,
                                                      yPre,
                                                      zPre,
@@ -1409,7 +1409,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                                                      zControlPointCoordinates,
                                                      false,  // no approximation
                                                      false); // not a deformation field
-#endif // _USE_SSE
+#endif // USE_SSE
                         }
                         oldBasis = basis;
 
@@ -1418,7 +1418,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                         real[2] = 0;
 
                         if (mask[index] > -1) {
-#if _USE_SSE
+#if USE_SSE
                             tempX = _mm_set_ps1(0);
                             tempY = _mm_set_ps1(0);
                             tempZ = _mm_set_ps1(0);
@@ -1463,7 +1463,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
     if (splineControlPoint->datatype != deformationField->datatype)
         NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type");
 
-#if _USE_SSE
+#if USE_SSE
     if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32)
         NR_FATAL_ERROR("SSE computation has only been implemented for single precision");
 #endif
@@ -3056,12 +3056,12 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
                                   bool bspline) {
     // REMINDER Grid2(x)=Grid1(Grid2(x))
 
-#if _USE_SSE
+#if USE_SSE
     union {
         __m128 m;
         float f[4];
     } val;
-#endif // _USE_SSE
+#endif // USE_SSE
 
     DataType *outCPPPtrX = static_cast<DataType*>(grid2->data);
     DataType *outCPPPtrY = &outCPPPtrX[NiftiImage::calcVoxelNumber(grid2, 2)];
@@ -3074,18 +3074,18 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
 #ifdef _WIN32
     __declspec(align(16)) DataType xBasis[4];
     __declspec(align(16)) DataType yBasis[4];
-#if _USE_SSE
+#if USE_SSE
     __declspec(align(16)) DataType xyBasis[16];
-#endif  //_USE_SSE
+#endif  //USE_SSE
 
     __declspec(align(16)) DataType xControlPointCoordinates[16];
     __declspec(align(16)) DataType yControlPointCoordinates[16];
 #else // _WIN32
     DataType xBasis[4] __attribute__((aligned(16)));
     DataType yBasis[4] __attribute__((aligned(16)));
-#if _USE_SSE
+#if USE_SSE
     DataType xyBasis[16] __attribute__((aligned(16)));
-#endif  //_USE_SSE
+#endif  //USE_SSE
 
     DataType xControlPointCoordinates[16] __attribute__((aligned(16)));
     DataType yControlPointCoordinates[16] __attribute__((aligned(16)));
@@ -3153,7 +3153,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
                                      displacement1); // displacement field?
             xReal = 0;
             yReal = 0;
-#if _USE_SSE
+#if USE_SSE
             coord = 0;
             for (unsigned b = 0; b < 4; b++) {
                 for (unsigned a = 0; a < 4; a++) {
@@ -3206,7 +3206,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                                   bool displacement2,
                                   bool bspline) {
     // REMINDER Grid2(x)=Grid1(Grid2(x))
-#if _USE_SSE
+#if USE_SSE
     union {
         __m128 m;
         float f[4];
@@ -3272,7 +3272,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
     else matrix_voxel_to_real2 = &grid2->qto_xyz;
 
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
    shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \
    outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \
@@ -3380,7 +3380,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                 xReal = 0;
                 yReal = 0;
                 zReal = 0;
-#if _USE_SSE
+#if USE_SSE
                 val.f[0] = static_cast<float>(xBasis[0]);
                 val.f[1] = static_cast<float>(xBasis[1]);
                 val.f[2] = static_cast<float>(xBasis[2]);
@@ -3450,7 +3450,7 @@ int reg_spline_cppComposition(nifti_image *grid1,
     if (grid1->datatype != grid2->datatype)
         NR_FATAL_ERROR("Both input images are expected to have the same data type");
 
-#if _USE_SSE
+#if USE_SSE
     if (grid1->datatype != NIFTI_TYPE_FLOAT32)
         NR_FATAL_ERROR("SSE computation has only been implemented for single precision");
 #endif
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 26678dde..75c0b6ee 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -12,7 +12,7 @@
 
 #include "_reg_localTrans_jac.h"
 
-#define _USE_SQUARE_LOG_JAC
+#define USE_SQUARE_LOG_JAC
 
 /* *************************************************************** */
 /* *************************************************************** */
@@ -643,7 +643,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
       // Allocate variables that are used in both scenarii
       int pre[3], oldPre[3], incr0;
       DataType basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4];
-#if _USE_SSE
+#if USE_SSE
       union
       {
          __m128 m;
@@ -805,7 +805,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   basis = gridCoord[2] - pre[2];
                   get_BSplineBasisValues<DataType>(basis, zBasis, zFirst);
                   // Compute the 64 basis values and the corresponding derivatives
-#if _USE_SSE
+#if USE_SSE
                   val.f[0]=yBasis[0];
                   val.f[1]=yBasis[1];
                   val.f[2]=yBasis[2];
@@ -862,7 +862,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   // Fetch the required coefficients
                   if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2])
                   {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                      get_GridValues<DataType>(pre[0]-1,
                            pre[1]-1,
                            pre[2]-1,
@@ -876,7 +876,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false, // no approx
                            false // not disp
                            );
-#else // _USE_SSE
+#else // USE_SSE
                      get_GridValues<DataType>(pre[0]-1,
                            pre[1]-1,
                            pre[2]-1,
@@ -890,13 +890,13 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false, // no approx
                            false // not disp
                            );
-#endif // _USE_SSE
+#endif // USE_SSE
                      oldPre[0]=pre[0];
                      oldPre[1]=pre[1];
                      oldPre[2]=pre[2];
                   }
                   // Compute the Jacobian matrix
-#if _USE_SSE
+#if USE_SSE
                   tempX_x =  _mm_set_ps1(0);
                   tempX_y =  _mm_set_ps1(0);
                   tempX_z =  _mm_set_ps1(0);
@@ -973,7 +973,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
       {
          // The grid is assumed to be aligned with the reference image
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
    shared(referenceImage, gridVoxelSpacing, splineControlPoint, \
    coeffPtrX, coeffPtrY, coeffPtrZ,reorientation, JacobianMatrices, \
@@ -1015,7 +1015,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                if(basis<0) basis=0; //rounding error
                get_BSplineBasisValues<DataType>(basis, yBasis, yFirst);
 
-#if _USE_SSE
+#if USE_SSE
                val.f[0]=yBasis[0];
                val.f[1]=yBasis[1];
                val.f[2]=yBasis[2];
@@ -1055,7 +1055,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   if(basis<0) basis=0; //rounding error
                   get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
 
-#if _USE_SSE
+#if USE_SSE
                   val.f[0]=xBasis[0];
                   val.f[1]=xBasis[1];
                   val.f[2]=xBasis[2];
@@ -1091,7 +1091,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
 
                   if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2])
                   {
-#ifdef _USE_SSE
+#ifdef USE_SSE
                      get_GridValues<DataType>(pre[0],
                            pre[1],
                            pre[2],
@@ -1105,7 +1105,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false, // no approx
                            false // not disp
                            );
-#else // _USE_SSE
+#else // USE_SSE
                      get_GridValues<DataType>(pre[0],
                            pre[1],
                            pre[2],
@@ -1119,12 +1119,12 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                            false, // no approx
                            false // not disp
                            );
-#endif // _USE_SSE
+#endif // USE_SSE
                      oldPre[0]=pre[0];
                      oldPre[1]=pre[1];
                      oldPre[2]=pre[2];
                   }
-#if _USE_SSE
+#if USE_SSE
                   tempX_x =  _mm_set_ps1(0);
                   tempX_y =  _mm_set_ps1(0);
                   tempX_z =  _mm_set_ps1(0);
@@ -1278,7 +1278,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
          for(size_t i=0; i<detNumber; ++i)
          {
             double logDet = log(jacDetPtr[i]);
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
             penaltySum += logDet * logDet;
 #else
             penaltySum += fasb(logDet);
@@ -1292,7 +1292,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint,
          for(size_t i=0; i<detNumber; ++i)
          {
             double logDet = log(jacDetPtr[i]);
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
             penaltySum += logDet * logDet;
 #else
             penaltySum += fasb(logDet);
@@ -1410,7 +1410,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                         if(detJac>0)
                         {
                            jacobianMatrix = jacobianMatrices[jacIndex];
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
                            detJac = 2.0*log(detJac) / detJac;
 #else
                            detJac = (log(detJac)>0?1.0:-1.0) / detJac;
@@ -1513,7 +1513,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                               basisValues[1] = xBasis * yFirst ;
 
                               jacobianMatrix = jacobianMatrices[jacIndex];
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
                               detJac= 2.0*log(detJac) / detJac;
 #else
                               detJac = (log(detJac)>0?1.0:-1.0) / detJac;
@@ -1661,7 +1661,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                  if(detJac>0)
                                  {
                                     jacobianMatrix = jacobianMatrices[jacIndex];
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
                                     detJac = 2.0*log(detJac) / detJac;
 #else
                                     detJac = (log(detJac)>0?1.0:-1.0) / detJac;
@@ -1787,7 +1787,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                                        basisValues[2] = xBasis * yBasis * zFirst ;
 
                                        jacobianMatrix = jacobianMatrices[jacIndex];
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
                                        detJac= 2.0*log(detJac) / detJac;
 #else
                                        detJac = (log(detJac)>0?1.0:-1.0) / detJac;
@@ -1931,7 +1931,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
    for(i=0; i< jacobianNumber; i++)
    {
       logDet = log(jacobianDeterminant[i]);
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
       penaltyTerm += logDet*logDet;
 #else
       penaltyTerm +=  fabs(log(logDet));
@@ -2180,7 +2180,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
    for(i=0; i< jacobianNumber; i++)
    {
       logDet = log(jacobianDeterminant[i]);
-#ifdef _USE_SQUARE_LOG_JAC
+#ifdef USE_SQUARE_LOG_JAC
       penaltyTerm += logDet*logDet;
 #else
       penaltyTerm +=  fabs(log(logDet));
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index 93151883..c77e18fd 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -20,7 +20,7 @@
 #include <omp.h>
 #endif
 
-#if _USE_SSE
+#if USE_SSE
 #include <emmintrin.h>
 #include <xmmintrin.h>
 #ifdef __SSE3__
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 93a0a76c..91a85e3a 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -954,7 +954,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                         double bufferIntensityCur = 0;
                         double bufferDensityCur = 0;
 
-#ifdef _USE_SSE
+#ifdef USE_SSE
                         union {
                             __m128 m;
                             float f[4];
@@ -963,7 +963,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
 #endif
 
 #ifdef _OPENMP
-#ifdef _USE_SSE
+#ifdef USE_SSE
 #pragma omp parallel for default(none) \
    shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \
    private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \
@@ -1015,7 +1015,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                                     if (shiftPst > imageDims[n]) shiftPst = imageDims[n];
                                     // Set the current values to zero
                                     // Increment the current value by performing the weighted sum
-#ifdef _USE_SSE
+#ifdef USE_SSE
                                     intensity_sum_sse.m = _mm_set_ps1(0);
                                     density_sum_sse.m = _mm_set_ps1(0);
                                     k = shiftPre;
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index 445d3959..afe18f83 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
index 6bd7662e..49550c77 100644
--- a/reg-test/reg_test_composeField.cpp
+++ b/reg-test/reg_test_composeField.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index bb2d4e63..644eb49b 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index c49a1a24..b213f3fc 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 8689954a..1b243132 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index 3de5aae3..b3d05830 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 6e45e6d1..0355aa84 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -1,6 +1,6 @@
 // OpenCL and CUDA are not supported for this test yet
-#undef _USE_OPENCL
-#undef _USE_CUDA
+#undef USE_OPENCL
+#undef USE_CUDA
 
 #include "reg_test_common.h"
 #include "_reg_lncc.h"
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 1b61ac39..21847f10 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test yet
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 #include "_reg_tools.h"
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index f19ac9bd..95283b0f 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test yet
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index d56cd356..53c6f40e 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
index d5c0a8de..62955c0b 100644
--- a/reg-test/reg_test_regr_getDeformationField.cpp
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 47f56f90..3339cbbc 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -1,5 +1,5 @@
 // OpenCL is not supported for this test
-#undef _USE_OPENCL
+#undef USE_OPENCL
 
 #include "reg_test_common.h"
 

From b1670e402b9ae536a38d1b5736cd2ab235dc444e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 30 Oct 2023 13:44:42 +0000
Subject: [PATCH 231/314] Refactorisations

---
 niftyreg_build_version.txt                   |  2 +-
 reg-apps/reg_benchmark.cpp                   | 18 ++--
 reg-lib/Compute.cpp                          | 20 ++---
 reg-lib/Compute.h                            |  2 +-
 reg-lib/cpu/_reg_localTrans.cpp              | 27 +++---
 reg-lib/cpu/_reg_localTrans.h                | 10 +--
 reg-lib/cpu/_reg_nmi.cpp                     | 90 ++++++++++----------
 reg-lib/cpu/_reg_optimiser.cpp               |  9 +-
 reg-lib/cuda/BlockSize.hpp                   | 10 +--
 reg-lib/cuda/CudaCompute.cu                  | 20 ++---
 reg-lib/cuda/CudaCompute.h                   |  2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu |  2 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                 |  1 -
 reg-lib/cuda/_reg_optimiser_gpu.cu           |  2 +
 reg-lib/cuda/_reg_optimiser_kernels.cu       |  5 +-
 reg-lib/cuda/_reg_tools_gpu.cu               | 19 ++---
 reg-lib/cuda/_reg_tools_gpu.h                | 12 +--
 reg-test/reg_test_be.cpp                     | 42 +++++----
 reg-test/reg_test_normaliseGradient.cpp      |  9 +-
 19 files changed, 148 insertions(+), 154 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index aef2e272..0fecf653 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-349
+350
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index aab0086c..8f0adff4 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -595,7 +595,7 @@ int main(int argc, char **argv)
       for(int i=0; i<maxIt; ++i)
       {
          reg_smoothImageForCubicSpline<float>(voxelNMIGradientImage,smoothingRadius);
-         reg_voxelCentric2NodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f);
+         reg_voxelCentricToNodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f);
       }
       time(&end);
       cpuTime=(end-start);
@@ -609,14 +609,14 @@ int main(int argc, char **argv)
          time(&start);
          for(int i=0; i<maxIt; ++i)
          {
-            reg_smoothImageForCubicSpline_gpu(  resultImage,
-                                                &voxelNMIGradientArray_d,
-                                                smoothingRadius);
-            reg_voxelCentric2NodeCentric_gpu(   targetImage,
-                                                controlPointImage,
-                                                &voxelNMIGradientArray_d,
-                                                &nodeNMIGradientArray_d,
-                                                1.0f);
+            reg_smoothImageForCubicSpline_gpu(resultImage,
+                                              &voxelNMIGradientArray_d,
+                                              smoothingRadius);
+            reg_voxelCentricToNodeCentric_gpu(targetImage,
+                                              controlPointImage,
+                                              &voxelNMIGradientArray_d,
+                                              &nodeNMIGradientArray_d,
+                                              1.0f);
          }
          time(&end);
          gpuTime=(end-start);
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 64e73969..a0be36d6 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -91,11 +91,11 @@ void Compute::GetDeformationField(bool composition, bool bspline) {
 void Compute::UpdateControlPointPosition(float *currentDof,
                                          const float *bestDof,
                                          const float *gradient,
-                                         const float& scale,
-                                         const bool& optimiseX,
-                                         const bool& optimiseY,
-                                         const bool& optimiseZ) {
-    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).GetControlPointGrid();
+                                         const float scale,
+                                         const bool optimiseX,
+                                         const bool optimiseY,
+                                         const bool optimiseZ) {
+    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
     if (optimiseX && optimiseY && optimiseZ) {
         // Update the values for all axis displacement
         for (size_t i = 0; i < controlPointGrid->nvox; ++i)
@@ -268,11 +268,11 @@ void Compute::ConvolveImage(nifti_image *image) {
 void Compute::VoxelCentricToNodeCentric(float weight) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating());
-    reg_voxelCentric2NodeCentric(con.GetTransformationGradient(),
-                                 con.GetVoxelBasedMeasureGradient(),
-                                 weight,
-                                 false, // no update
-                                 reorientation);
+    reg_voxelCentricToNodeCentric(con.GetTransformationGradient(),
+                                  con.GetVoxelBasedMeasureGradient(),
+                                  weight,
+                                  false, // no update
+                                  reorientation);
 }
 /* *************************************************************** */
 void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index a4137f5b..ecf11f0f 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -19,7 +19,7 @@ class Compute {
     virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating);
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight);
     virtual void GetDeformationField(bool composition, bool bspline);
-    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ);
+    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ);
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 6f95de7a..269e4e98 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1538,11 +1538,11 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint,
 }
 /* *************************************************************** */
 template<class DataType>
-void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
-                                       nifti_image *voxelImage,
-                                       float weight,
-                                       bool update,
-                                       const mat44 *voxelToMillimetre) {
+void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
+                                   nifti_image *voxelImage,
+                                   float weight,
+                                   bool update,
+                                   const mat44 *voxelToMillimetre) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
     DataType *nodePtrX = static_cast<DataType*>(nodeImage->data);
@@ -1603,8 +1603,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
         weight *= ratio[i];
     }
     // For each node, the corresponding voxel is computed
-    float nodeCoord[3];
-    float voxelCoord[3];
+    float nodeCoord[3], voxelCoord[3];
     for (int z = 0; z < nodeImage->nz; z++) {
         nodeCoord[2] = static_cast<float>(z);
         for (int y = 0; y < nodeImage->ny; y++) {
@@ -1685,20 +1684,20 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
     } // loop over z
 }
 /* *************************************************************** */
-void reg_voxelCentric2NodeCentric(nifti_image * nodeImage,
-                                  nifti_image * voxelImage,
-                                  float weight,
-                                  bool update,
-                                  const mat44 * voxelToMillimetre) {
+void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
+                                   nifti_image *voxelImage,
+                                   float weight,
+                                   bool update,
+                                   const mat44 *voxelToMillimetre) {
     if (nodeImage->datatype != voxelImage->datatype)
         NR_FATAL_ERROR("Both input images are expected to have the same data type");
 
     switch (nodeImage->datatype) {
     case NIFTI_TYPE_FLOAT32:
-        reg_voxelCentric2NodeCentric<float>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
+        reg_voxelCentricToNodeCentric<float>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_voxelCentric2NodeCentric<double>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
+        reg_voxelCentricToNodeCentric<double>(nodeImage, voxelImage, weight, update, voxelToMillimetre);
         break;
     default:
         NR_FATAL_ERROR("Data type not supported");
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index ad6f930d..5263d9c4 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -87,11 +87,11 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage,
  * @param update The values in node image will be incremented if
  * update is set to true; a blank node image is considered otherwise
  */
-void reg_voxelCentric2NodeCentric(nifti_image *nodeImage,
-                                  nifti_image *voxelImage,
-                                  float weight,
-                                  bool update,
-                                  const mat44 *voxelToMillimetre = nullptr);
+void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
+                                   nifti_image *voxelImage,
+                                   float weight,
+                                   bool update,
+                                   const mat44 *voxelToMillimetre = nullptr);
 /* *************************************************************** */
 /** @brief Refine a grid of control points
  * @param referenceImage Image that defined the space of the reference
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index bd3fda06..e6fc735f 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -367,18 +367,18 @@ void reg_getNMIValue(const nifti_image *referenceImage,
 template void reg_getNMIValue<float>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
 template void reg_getNMIValue<double>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
 /* *************************************************************** */
-double GetSimilarityMeasureValue(const nifti_image *referenceImage,
-                                 const nifti_image *warpedImage,
-                                 const double *timePointWeight,
-                                 const unsigned short *referenceBinNumber,
-                                 const unsigned short *floatingBinNumber,
-                                 const unsigned short *totalBinNumber,
-                                 double **jointHistogramLog,
-                                 double **jointHistogramPro,
-                                 double **entropyValues,
-                                 const int *referenceMask,
-                                 const int referenceTimePoint,
-                                 const bool approximation) {
+static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                        const nifti_image *warpedImage,
+                                        const double *timePointWeight,
+                                        const unsigned short *referenceBinNumber,
+                                        const unsigned short *floatingBinNumber,
+                                        const unsigned short *totalBinNumber,
+                                        double **jointHistogramLog,
+                                        double **jointHistogramPro,
+                                        double **entropyValues,
+                                        const int *referenceMask,
+                                        const int referenceTimePoint,
+                                        const bool approximation) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         reg_getNMIValue<RefImgDataType>(referenceImage,
@@ -433,17 +433,17 @@ double reg_nmi::GetSimilarityMeasureValueBw() {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
-                                    const nifti_image *warpedImage,
-                                    const unsigned short *referenceBinNumber,
-                                    const unsigned short *floatingBinNumber,
-                                    const double *const *jointHistogramLog,
-                                    const double *const *entropyValues,
-                                    const nifti_image *warpedGradient,
-                                    nifti_image *measureGradientImage,
-                                    const int *referenceMask,
-                                    const int currentTimepoint,
-                                    const double timepointWeight) {
+static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
+                                           const nifti_image *warpedImage,
+                                           const unsigned short *referenceBinNumber,
+                                           const unsigned short *floatingBinNumber,
+                                           const double *const *jointHistogramLog,
+                                           const double *const *entropyValues,
+                                           const nifti_image *warpedGradient,
+                                           nifti_image *measureGradientImage,
+                                           const int *referenceMask,
+                                           const int currentTimepoint,
+                                           const double timepointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2);
@@ -519,17 +519,17 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
-                                    const nifti_image *warpedImage,
-                                    const unsigned short *referenceBinNumber,
-                                    const unsigned short *floatingBinNumber,
-                                    const double *const *jointHistogramLog,
-                                    const double *const *entropyValues,
-                                    const nifti_image *warpedGradient,
-                                    nifti_image *measureGradientImage,
-                                    const int *referenceMask,
-                                    const int currentTimepoint,
-                                    const double timepointWeight) {
+static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
+                                           const nifti_image *warpedImage,
+                                           const unsigned short *referenceBinNumber,
+                                           const unsigned short *floatingBinNumber,
+                                           const double *const *jointHistogramLog,
+                                           const double *const *entropyValues,
+                                           const nifti_image *warpedGradient,
+                                           nifti_image *measureGradientImage,
+                                           const int *referenceMask,
+                                           const int currentTimepoint,
+                                           const double timepointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -612,17 +612,17 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
     } // loop over all voxel
 }
 /* *************************************************************** */
-void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
-                                            const nifti_image *warpedImage,
-                                            const unsigned short *referenceBinNumber,
-                                            const unsigned short *floatingBinNumber,
-                                            const double *const *jointHistogramLog,
-                                            const double *const *entropyValues,
-                                            const nifti_image *warpedGradient,
-                                            nifti_image *voxelBasedGradient,
-                                            const int *referenceMask,
-                                            const int currentTimepoint,
-                                            const double timepointWeight) {
+static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
+                                                   const nifti_image *warpedImage,
+                                                   const unsigned short *referenceBinNumber,
+                                                   const unsigned short *floatingBinNumber,
+                                                   const double *const *jointHistogramLog,
+                                                   const double *const *entropyValues,
+                                                   const nifti_image *warpedGradient,
+                                                   nifti_image *voxelBasedGradient,
+                                                   const int *referenceMask,
+                                                   const int currentTimepoint,
+                                                   const double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d<RefImgDataType> : reg_getVoxelBasedNmiGradient2d<RefImgDataType>;
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp
index c25ef7e4..5eb9f661 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/cpu/_reg_optimiser.cpp
@@ -258,17 +258,15 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr)
 #endif
-        for (i = 0; i < num; i++) {
+        for (i = 0; i < num; i++)
             array2Ptr[i] = array1Ptr[i] = -gradientPtr[i];
-        }
         if (this->isSymmetric) {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw)
 #endif
-            for (i = 0; i < numBw; i++) {
+            for (i = 0; i < numBw; i++)
                 array2PtrBw[i] = array1PtrBw[i] = -gradientPtrBw[i];
-            }
         }
         this->firstCall = false;
     } else {
@@ -277,8 +275,7 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(num,array1Ptr,array2Ptr,gradientPtr) \
-    reduction(+:gg) \
-    reduction(+:dgg)
+    reduction(+:gg, dgg)
 #endif
         for (i = 0; i < num; i++) {
             gg += array2Ptr[i] * array1Ptr[i];
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index ed4d0c6d..ee1f0cef 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -66,7 +66,7 @@ struct BlockSize {
     unsigned GetSsdValue;
     unsigned GetSsdGradient;
     /* _reg_tools_gpu */
-    unsigned reg_voxelCentric2NodeCentric;
+    unsigned reg_voxelCentricToNodeCentric;
     unsigned reg_convertNMIGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
     unsigned reg_ApplyConvolutionWindowAlongY;
@@ -132,7 +132,7 @@ struct BlockSize100: public BlockSize {
         GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem
         GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem
         /* _reg_tools_gpu */
-        reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem
+        reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
@@ -166,8 +166,8 @@ struct BlockSize300: public BlockSize {
         /* _reg_globalTransformation_gpu */
         reg_affine_getDeformationField = 1024; // 23 reg
         /* _reg_localTransformation_gpu */
-        reg_spline_getDeformationField2D = 768; // 34 reg
-        reg_spline_getDeformationField3D = 768; // 34 reg
+        reg_spline_getDeformationField2D = 1024; // 34 reg
+        reg_spline_getDeformationField3D = 1024; // 34 reg
         reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg
         reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg
         reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg
@@ -200,7 +200,7 @@ struct BlockSize300: public BlockSize {
         GetSsdValue = 768; // 34 reg
         GetSsdGradient = 768; // 34 reg
         /* _reg_tools_gpu */
-        reg_voxelCentric2NodeCentric = 1024; // 23 reg
+        reg_voxelCentricToNodeCentric = 1024; // 23 reg
         reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index a5877a43..8871f2dc 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -103,10 +103,10 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) {
 void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const float *bestDof,
                                              const float *gradient,
-                                             const float& scale,
-                                             const bool& optimiseX,
-                                             const bool& optimiseY,
-                                             const bool& optimiseZ) {
+                                             const float scale,
+                                             const bool optimiseX,
+                                             const bool optimiseY,
+                                             const bool optimiseZ) {
     reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(), 3),
                                        reinterpret_cast<float4*>(currentDof),
                                        reinterpret_cast<const float4*>(bestDof),
@@ -201,12 +201,12 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
-    reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetTransformationGradient(),
-                                     con.F3dContent::GetVoxelBasedMeasureGradient(),
-                                     con.GetTransformationGradientCuda(),
-                                     con.GetVoxelBasedMeasureGradientCuda(),
-                                     weight,
-                                     reorientation);
+    reg_voxelCentricToNodeCentric_gpu(con.F3dContent::GetTransformationGradient(),
+                                      con.F3dContent::GetVoxelBasedMeasureGradient(),
+                                      con.GetTransformationGradientCuda(),
+                                      con.GetVoxelBasedMeasureGradientCuda(),
+                                      weight,
+                                      reorientation);
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 4a8bef91..842be37a 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -18,7 +18,7 @@ class CudaCompute: public Compute {
     virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override;
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override;
     virtual void GetDeformationField(bool composition, bool bspline) override;
-    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) override;
+    virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ) override;
     virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index b7c03485..0041e9a0 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -181,7 +181,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI
     }
 
     // Compute the gradient
-    bendingEnergyWeight *= 1.f / (float)controlPointNumber;
+    bendingEnergyWeight /= (float)controlPointNumber;
     auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
                                                               secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
     if (controlPointImage->nz > 1) {
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 873102df..722144a4 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -147,7 +147,6 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
                                                       cudaChannelFormatKindFloat, 1);
     auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
                                                  cudaChannelFormatKindSigned, 1);
-    NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4)));
 
     if (referenceImage->nz > 1) {
         const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D;
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 903ac197..474ff131 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -144,11 +144,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::UpdateGradientValues() {
     if (this->firstCall) {
+        NR_DEBUG("Conjugate gradient initialisation");
         reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
         if (this->isSymmetric)
             reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
         this->firstCall = false;
     } else {
+        NR_DEBUG("Conjugate gradient update");
         reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
                                      this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
     }
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index 2ebb18f5..a97a2455 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -62,15 +62,14 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 value = controlPointImageCuda[tid];
-        const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
-        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
+        const float4& bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
+        const float4& gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
         if (optimiseX)
             value.x = bestValue.x + scale * gradValue.x;
         if (optimiseY)
             value.y = bestValue.y + scale * gradValue.y;
         if (optimiseZ)
             value.z = bestValue.z + scale * gradValue.z;
-        value.w = 0;
         controlPointImageCuda[tid] = value;
     }
 }
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 7e39c3ec..10b4ad0c 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -15,18 +15,17 @@
 #include "_reg_tools_kernels.cu"
 
 /* *************************************************************** */
-void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
-                                      const nifti_image *voxelImage,
-                                      float4 *nodeImageCuda,
-                                      float4 *voxelImageCuda,
-                                      float weight,
-                                      const mat44 *voxelToMillimetre) {
+void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
+                                       const nifti_image *voxelImage,
+                                       float4 *nodeImageCuda,
+                                       float4 *voxelImageCuda,
+                                       float weight,
+                                       const mat44 *voxelToMillimetre) {
     const bool is3d = nodeImage->nz > 1;
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
     const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz);
     const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz);
-
     auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear,
                                                        voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
 
@@ -43,9 +42,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
         transformation = reg_mat44_mul(&temp, &transformation);
     }
     // Millimetre to voxel in the reference image
-    if (voxelImage->sform_code > 0)
-        transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation);
-    else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation);
+    transformation = reg_mat44_mul(voxelImage->sform_code > 0 ? &voxelImage->sto_ijk : &voxelImage->qto_ijk, &transformation);
 
     // The information has to be reoriented
     // Voxel to millimetre contains the orientation of the image that is used
@@ -68,7 +65,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
         weight *= ratio[i];
     }
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentricToNodeCentric;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 4444e7e8..41916575 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -16,12 +16,12 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage,
-                                      const nifti_image *voxelImage,
-                                      float4 *nodeImageCuda,
-                                      float4 *voxelImageCuda,
-                                      float weight,
-                                      const mat44 *voxelToMillimetre);
+void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
+                                       const nifti_image *voxelImage,
+                                       float4 *nodeImageCuda,
+                                       float4 *voxelImageCuda,
+                                       float weight,
+                                       const mat44 *voxelToMillimetre = nullptr);
 /* *************************************************************** */
 void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
                                                     const nifti_image *controlPointImage,
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index afe18f83..f6889700 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -27,13 +27,10 @@ class BendingEnergyTest {
         std::mt19937 gen(0);
         std::uniform_real_distribution<float> distr(-1, 1);
 
-        // Create a 2D reference image
-        vector<NiftiImage::dim_t> dim{ 4, 4 };
-        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
-
-        // Create a 3D reference image
-        dim.push_back(4);
-        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        // Create 2D and 3D reference images
+        constexpr NiftiImage::dim_t dimSize = 4;
+        NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
 
         // Create 2D and 3D control point grids
         NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
@@ -44,20 +41,20 @@ class BendingEnergyTest {
         testData.emplace_back(TestData(
             "BE identity 2D",
             reference2d,
-            NiftiImage(controlPointGrid2d),
+            controlPointGrid2d,
             0.f
         ));
         testData.emplace_back(TestData(
             "BE identity 3D",
             reference3d,
-            NiftiImage(controlPointGrid3d),
+            controlPointGrid3d,
             0.f
         ));
         // Add random values to the control point grid coefficients
         // No += or + operator for RNifti::NiftiImageData:Element
         // so reverting to old school for now
         float *cpp2dPtr = static_cast<float*>(controlPointGrid2d->data);
-        float *cpp3dPtr = static_cast<float *>(controlPointGrid3d->data);
+        float *cpp3dPtr = static_cast<float*>(controlPointGrid3d->data);
         for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i)
             cpp2dPtr[i] += distr(gen);
         for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i)
@@ -66,13 +63,13 @@ class BendingEnergyTest {
         testData.emplace_back(TestData(
             "BE random 2D",
             reference2d,
-            NiftiImage(controlPointGrid2d),
+            controlPointGrid2d,
             this->GetBe2d(controlPointGrid2d)
         ));
         testData.emplace_back(TestData(
             "BE random 3D",
             reference3d,
-            NiftiImage(controlPointGrid3d),
+            controlPointGrid3d,
             this->GetBe3d(controlPointGrid3d)
         ));
 
@@ -90,13 +87,13 @@ class BendingEnergyTest {
         testData.emplace_back(TestData(
             "BE scaling 2D",
             reference2d,
-            NiftiImage(controlPointGrid2d),
+            controlPointGrid2d,
             0.f
         ));
         testData.emplace_back(TestData(
             "BE scaling 3D",
             reference3d,
-            NiftiImage(controlPointGrid3d),
+            controlPointGrid3d,
             0.f
         ));
 
@@ -123,7 +120,7 @@ class BendingEnergyTest {
         // The BSpine basis values are known since the control points all have a relative position equal to 0
         float basis[3], first[3], second[3];
         basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f;
-        first[0] = -.5f; first[1] = 0.f; first[2] = .5f;
+        first[0] = -0.5f; first[1] = 0.f; first[2] = 0.5f;
         second[0] = 1.f; second[1] = -2.f; second[2] = 1.f;
 
         // the first and last control points along each axis are
@@ -148,11 +145,10 @@ class BendingEnergyTest {
                         XY_y += y_val * first[i] * first[j];
                     }
                 }
-                be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + \
-                    2. * XY_x * XY_x + 2. * XY_y * XY_y;
+                be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + 2.0 * XY_x * XY_x + 2.0 * XY_y * XY_y;
             }
         }
-        return (float)(be / (double)cpp.nVoxels());
+        return float(be / (double)cpp.nVoxels());
     }
 
     float GetBe3d(const NiftiImage& cpp) {
@@ -162,7 +158,7 @@ class BendingEnergyTest {
         // The BSpine basis values are known since the control points all have a relative position equal to 0
         float basis[3], first[3], second[3];
         basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f;
-        first[0] = -.5f; first[1] = 0.f; first[2] = .5f;
+        first[0] = -0.5f; first[1] = 0.f; first[2] = 0.5f;
         second[0] = 1.f; second[1] = -2.f; second[2] = 1.f;
 
         const auto cppPtr = cpp.data();
@@ -207,13 +203,13 @@ class BendingEnergyTest {
                     be += XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + \
                         XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + \
                         XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + \
-                        2. * XY_x * XY_x + 2. * YZ_x * YZ_x + 2. * XZ_x * XZ_x + \
-                        2. * XY_y * XY_y + 2. * YZ_y * YZ_y + 2. * XZ_y * XZ_y + \
-                        2. * XY_z * XY_z + 2. * YZ_z * YZ_z + 2. * XZ_z * XZ_z;
+                        2.0 * XY_x * XY_x + 2.0 * YZ_x * YZ_x + 2.0 * XZ_x * XZ_x + \
+                        2.0 * XY_y * XY_y + 2.0 * YZ_y * YZ_y + 2.0 * XZ_y * XZ_y + \
+                        2.0 * XY_z * XY_z + 2.0 * YZ_z * YZ_z + 2.0 * XZ_z * XZ_z;
                 }
             }
         }
-        return (float)(be / (double)cpp.nVoxels());
+        return float(be / (double)cpp.nVoxels());
     }
 };
 
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 53c6f40e..4b4a8d38 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -183,6 +183,9 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
         SECTION(sectionName) {
             NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             // Set the transformation gradient image to host the computation
             NiftiImage transGrad = content->GetTransformationGradient();
             transGrad.copyData(testGrad);
@@ -208,8 +211,10 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
             for (size_t i = 0; i < testGrad.nVoxels(); ++i) {
                 const float transGradVal = transGradPtr[i];
                 const float testGradVal = testGradPtr[i];
-                NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl;
-                REQUIRE(fabs(transGradVal - testGradVal) < EPS);
+                const float diff = abs(transGradVal - testGradVal);
+                if (diff > EPS)
+                    NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl;
+                REQUIRE(diff < EPS);
             }
             // Ensure the termination of content before CudaContext
             content.reset();

From 44e6b1b2ece72442577df7b29301cf6f794c50c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 30 Oct 2023 14:00:30 +0000
Subject: [PATCH 232/314] Fix a bug in CudaCompute::VoxelCentricToNodeCentric()
 #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      | 20 ++--
 reg-lib/cuda/_reg_tools_gpu.cu                | 20 ++--
 reg-lib/cuda/_reg_tools_kernels.cu            | 97 +++++++++---------
 .../reg_test_voxelCentricToNodeCentric.cpp    | 98 ++++++++++---------
 5 files changed, 125 insertions(+), 112 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0fecf653..1caed7b7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-350
+351
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 87e1f975..43783b4d 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -8,16 +8,20 @@
 #pragma once
 
 /* *************************************************************** */
-__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool is3d) {
-    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]);
-    out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]);
-    out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0;
+template<bool is3d>
+__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const double weight, float (&out)[3]) {
+    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]);
+    out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]);
+    if constexpr (is3d)
+        out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]);
 }
 /* *************************************************************** */
-__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool is3d) {
-    out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3];
-    out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3];
-    out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0;
+template<bool is3d>
+__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3]) {
+    out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]);
+    out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]);
+    if constexpr (is3d)
+        out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]);
 }
 /* *************************************************************** */
 __device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) {
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 10b4ad0c..4db039cd 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -47,12 +47,15 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
     // The information has to be reoriented
     // Voxel to millimetre contains the orientation of the image that is used
     // to compute the spatial gradient (floating image)
-    mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre);
-    if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
-        mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
-        temp = nifti_mat33_inverse(temp);
-        reorientation = nifti_mat33_mul(temp, reorientation);
-    }
+    mat33 reorientation;
+    if (voxelToMillimetre) {
+        reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
+            mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+            temp = nifti_mat33_inverse(temp);
+            reorientation = nifti_mat33_mul(temp, reorientation);
+        }
+    } else reg_mat33_eye(&reorientation);
     // The information has to be weighted
     float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
     for (int i = 0; i < (is3d ? 3 : 2); ++i) {
@@ -69,8 +72,9 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_voxelCentric2NodeCentric_kernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
-                                                                 voxelImageDims, is3d, weight, transformation, reorientation);
+    auto voxelCentricToNodeCentricKernel = is3d ? reg_voxelCentricToNodeCentric_kernel<true> : reg_voxelCentricToNodeCentric_kernel<false>;
+    voxelCentricToNodeCentricKernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
+                                                             voxelImageDims, weight, transformation, reorientation);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index a571970b..8dba6af3 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -11,64 +11,61 @@
 #include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda,
-                                                    cudaTextureObject_t voxelImageTexture,
-                                                    const unsigned nodeNumber,
-                                                    const int3 nodeImageDims,
-                                                    const int3 voxelImageDims,
-                                                    const bool is3d,
-                                                    const float weight,
-                                                    const mat44 transformation,
-                                                    const mat33 reorientation) {
+template<bool is3d>
+__global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
+                                                     cudaTextureObject_t voxelImageTexture,
+                                                     const unsigned nodeNumber,
+                                                     const int3 nodeImageDims,
+                                                     const int3 voxelImageDims,
+                                                     const float weight,
+                                                     const mat44 transformation,
+                                                     const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nodeNumber) {
-        float nodeCoord[3], voxelCoord[3], reorientedValue[3];
-        // Calculate the node coordinates
-        int quot, rem;
-        reg_div_cuda(tid, nodeImageDims.x * nodeImageDims.y, quot, rem);
-        nodeCoord[2] = quot;
-        reg_div_cuda(rem, nodeImageDims.x, quot, rem);
-        nodeCoord[1] = quot; nodeCoord[0] = rem;
-        // Transform into voxel coordinates
-        reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d);
+    if (tid >= nodeNumber) return;
+    // Calculate the node coordinates
+    auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(tid, nodeImageDims);
+    // Transform into voxel coordinates
+    float voxelCoord[3], nodeCoord[3] = { static_cast<float>(x), static_cast<float>(y), static_cast<float>(z) };
+    reg_mat44_mul_cuda<is3d>(transformation, nodeCoord, voxelCoord);
 
-        // Linear interpolation
-        float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
-        const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
-        basisX[1] = voxelCoord[0] - static_cast<float>(pre[0]);
-        basisX[0] = 1.f - basisX[1];
-        basisY[1] = voxelCoord[1] - static_cast<float>(pre[1]);
-        basisY[0] = 1.f - basisY[1];
-        if (is3d) {
-            basisZ[1] = voxelCoord[2] - static_cast<float>(pre[2]);
-            basisZ[0] = 1.f - basisZ[1];
-        }
-        for (short c = 0; c < 2; ++c) {
-            const int indexZ = pre[2] + c;
-            if (-1 < indexZ && indexZ < voxelImageDims.z) {
-                for (short b = 0; b < 2; ++b) {
-                    const int indexY = pre[1] + b;
-                    if (-1 < indexY && indexY < voxelImageDims.y) {
-                        for (short a = 0; a < 2; ++a) {
-                            const int indexX = pre[0] + a;
-                            if (-1 < indexX && indexX < voxelImageDims.x) {
-                                const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX;
-                                const float linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1);
-                                const float4 voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
-                                interpolatedValue[0] += linearWeight * voxelValue.x;
-                                interpolatedValue[1] += linearWeight * voxelValue.y;
-                                if (is3d)
-                                    interpolatedValue[2] += linearWeight * voxelValue.z;
-                            }
+    // Linear interpolation
+    float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
+    const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
+    basisX[1] = voxelCoord[0] - static_cast<float>(pre[0]);
+    basisX[0] = 1.f - basisX[1];
+    basisY[1] = voxelCoord[1] - static_cast<float>(pre[1]);
+    basisY[0] = 1.f - basisY[1];
+    if constexpr (is3d) {
+        basisZ[1] = voxelCoord[2] - static_cast<float>(pre[2]);
+        basisZ[0] = 1.f - basisZ[1];
+    }
+    for (char c = 0; c < 2; c++) {
+        const int indexZ = pre[2] + c;
+        if (-1 < indexZ && indexZ < voxelImageDims.z) {
+            for (char b = 0; b < 2; b++) {
+                const int indexY = pre[1] + b;
+                if (-1 < indexY && indexY < voxelImageDims.y) {
+                    for (char a = 0; a < 2; a++) {
+                        const int indexX = pre[0] + a;
+                        if (-1 < indexX && indexX < voxelImageDims.x) {
+                            const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX;
+                            float linearWeight = basisX[a] * basisY[b];
+                            if constexpr (is3d) linearWeight *= basisZ[c];
+                            const float4& voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
+                            interpolatedValue[0] += linearWeight * voxelValue.x;
+                            interpolatedValue[1] += linearWeight * voxelValue.y;
+                            if constexpr (is3d)
+                                interpolatedValue[2] += linearWeight * voxelValue.z;
                         }
                     }
                 }
             }
         }
-
-        reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue, is3d);
-        nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
     }
+
+    float reorientedValue[3];
+    reg_mat33_mul_cuda<is3d>(reorientation, interpolatedValue, weight, reorientedValue);
+    nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
 __global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 3339cbbc..551fe96d 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -13,7 +13,7 @@
 class VoxelCentricToNodeCentricTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<unique_ptr<Platform>, unique_ptr<F3dContent>, TestData, std::array<mat44, 4>, float>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
 
@@ -85,11 +85,44 @@ class VoxelCentricToNodeCentricTest {
                 unique_ptr<Platform> platform{ new Platform(platformType) };
                 unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(platform->CreateContentCreator(ContentType::F3d)) };
                 // Make a copy of the test data
-                auto td = testData;
-                auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td;
-                // Add content
+                auto [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData;
+                // Create the content
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                testCases.push_back({ std::move(platform), std::move(content), std::move(td), matrices, distr(gen) });
+
+                // Set the matrices required for computation
+                nifti_image *floating = content->Content::GetFloating();
+                if (floating->sform_code > 0)
+                    floating->sto_ijk = matrices[0];
+                else floating->qto_ijk = matrices[0];
+                NiftiImage transGrad = content->F3dContent::GetTransformationGradient();
+                static int sfc = 0;
+                transGrad->sform_code = sfc++ % 2;
+                if (transGrad->sform_code > 0)
+                    transGrad->sto_xyz = matrices[1];
+                else transGrad->qto_xyz = matrices[1];
+                const mat44 invMatrix = nifti_mat44_inverse(matrices[2]);
+                nifti_add_extension(transGrad, reinterpret_cast<const char*>(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE);
+
+                // Set the voxel-based measure gradient to host the computation
+                NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient();
+                if (voxelGrad->sform_code > 0)
+                    voxelGrad->sto_ijk = matrices[3];
+                else voxelGrad->qto_ijk = matrices[3];
+                voxelGrad.copyData(voxelBasedMeasureGradient);
+                content->UpdateVoxelBasedMeasureGradient();
+
+                // Compute the expected node-based NMI gradient
+                const float weight = distr(gen);
+                NiftiImage expTransGrad(transGrad, NiftiImage::Copy::ImageInfoAndAllocData);
+                VoxelCentricToNodeCentric<float>(floating, expTransGrad, voxelGrad, weight);
+                transGrad.disown(); voxelGrad.disown();
+
+                // Extract the node-based NMI gradient from the voxel-based NMI gradient
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                compute->VoxelCentricToNodeCentric(weight);
+                transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image);
+
+                testCases.push_back({ testName + " "s + platform->GetName() + " Weight="s + std::to_string(weight), std::move(transGrad), std::move(expTransGrad) });
             }
         }
     }
@@ -214,58 +247,33 @@ class VoxelCentricToNodeCentricTest {
     }
 };
 
-TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", "[VoxelCentricToNodeCentric]") {
+TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel Centric to Node Centric", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [platform, content, testData, matrices, weight] = testCase;
-        auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData;
-        const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight);
+        auto&& [sectionName, transGrad, expTransGrad] = testCase;
 
         SECTION(sectionName) {
             NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
-            // Set the matrices required for computation
-            nifti_image *floating = content->Content::GetFloating();
-            if (floating->sform_code > 0)
-                floating->sto_ijk = matrices[0];
-            else floating->qto_ijk = matrices[0];
-            NiftiImage transGrad = content->F3dContent::GetTransformationGradient();
-            static int sfc = 0;
-            transGrad->sform_code = sfc++ % 2;
-            if (transGrad->sform_code > 0)
-                transGrad->sto_xyz = matrices[1];
-            else transGrad->qto_xyz = matrices[1];
-            const mat44 invMatrix = nifti_mat44_inverse(matrices[2]);
-            nifti_add_extension(transGrad, reinterpret_cast<const char*>(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE);
-
-            // Set the voxel-based measure gradient to host the computation
-            NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient();
-            if (voxelGrad->sform_code > 0)
-                voxelGrad->sto_ijk = matrices[3];
-            else voxelGrad->qto_ijk = matrices[3];
-            voxelGrad.copyData(voxelBasedMeasureGradient);
-            content->UpdateVoxelBasedMeasureGradient();
 
-            // Extract the node-based NMI gradient from the voxel-based NMI gradient
-            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-            compute->VoxelCentricToNodeCentric(weight);
-            NiftiImage transGradExp(transGrad, NiftiImage::Copy::ImageInfoAndAllocData);
-            VoxelCentricToNodeCentric<float>(floating, transGradExp, voxelGrad, weight);
-            transGrad.disown(); voxelGrad.disown();
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
 
             // Check the results
-            transGrad = content->GetTransformationGradient();
             const auto transGradPtr = transGrad.data();
-            const auto transGradExpPtr = transGradExp.data();
-            transGrad.disown();
-            for (size_t i = 0; i < transGradExp.nVoxels(); ++i) {
+            const auto expTransGradPtr = expTransGrad.data();
+            for (size_t i = 0; i < expTransGrad.nVoxels(); ++i) {
                 const float transGradVal = transGradPtr[i];
-                const float transGradExpVal = transGradExpPtr[i];
-                NR_COUT << i << " " << transGradVal << " " << transGradExpVal << std::endl;
-                REQUIRE(fabs(transGradVal - transGradExpVal) < EPS);
+                const float expTransGradVal = expTransGradPtr[i];
+                const float diff = abs(transGradVal - expTransGradVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | Result=" << transGradVal;
+                    NR_COUT << " | Expected=" << expTransGradVal << std::endl;
+                }
+                REQUIRE(diff < EPS);
             }
-            // Ensure the termination of content before CudaContext
-            content.reset();
         }
     }
 }

From 2f65fc99ee3fc660944d1d4ac784b3627a280379 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 30 Oct 2023 15:35:19 +0000
Subject: [PATCH 233/314] Make CudaCompute::ResampleImage() on a par with CPU
 #92

---
 niftyreg_build_version.txt              |   2 +-
 reg-lib/cuda/BlockSize.hpp              |   4 +-
 reg-lib/cuda/_reg_resampling_kernels.cu | 356 +++++++++++-------------
 3 files changed, 173 insertions(+), 189 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1caed7b7..ec9163d7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-351
+352
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index ee1f0cef..aeaf3631 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -209,8 +209,8 @@ struct BlockSize300: public BlockSize {
         /* _reg_resampling_gpu */
         reg_resampleImage2D = 1024; // 23 reg
         reg_resampleImage3D = 1024; // 24 reg
-        reg_getImageGradient2D = 768; // 34 reg
-        reg_getImageGradient3D = 768; // 34 reg
+        reg_getImageGradient2D = 1024; // 34 reg
+        reg_getImageGradient3D = 1024; // 34 reg
         NR_FUNC_CALLED();
     }
 };
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 1c14369c..8a04ce12 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -28,45 +28,44 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
                                            const unsigned activeVoxelNumber,
                                            const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        // Get the real world deformation in the floating space
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-        // Get the voxel-based deformation in the floating space
-        float2 voxelDeformation;
-        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                              floatingMatrix.m[0][1] * realDeformation.y +
-                              floatingMatrix.m[0][3]);
-        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                              floatingMatrix.m[1][1] * realDeformation.y +
-                              floatingMatrix.m[1][3]);
-
-        // Compute the linear interpolation
-        const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
-        const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
-        float xBasis[2], yBasis[2];
-        InterpLinearKernel(relative.x, xBasis);
-        InterpLinearKernel(relative.y, yBasis);
-
-        float intensity = 0;
-        for (short b = 0; b < 2; b++) {
-            const int y = previous.y + b;
-            float xTempNewValue = 0;
-            for (short a = 0; a < 2; a++) {
-                const int x = previous.x + a;
-                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
-                    xTempNewValue += tex3D<float>(floatingTexture, x, y, 0) * xBasis[a];
-                } else {
-                    // Padding value
-                    xTempNewValue += paddingValue * xBasis[a];
-                }
+    if (tid >= activeVoxelNumber) return;
+    // Get the real world deformation in the floating space
+    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+
+    // Get the voxel-based deformation in the floating space
+    double2 voxelDeformation;
+    voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) +
+                          double(floatingMatrix.m[0][1]) * double(realDeformation.y) +
+                          double(floatingMatrix.m[0][3]));
+    voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) +
+                          double(floatingMatrix.m[1][1]) * double(realDeformation.y) +
+                          double(floatingMatrix.m[1][3]));
+
+    // Compute the linear interpolation
+    const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
+    const double2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
+    double xBasis[2], yBasis[2];
+    InterpLinearKernel(relative.x, xBasis);
+    InterpLinearKernel(relative.y, yBasis);
+
+    double intensity = 0;
+    for (char b = 0; b < 2; b++) {
+        const int y = previous.y + b;
+        double xTempNewValue = 0;
+        for (char a = 0; a < 2; a++) {
+            const int x = previous.x + a;
+            if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
+                xTempNewValue += tex3D<float>(floatingTexture, x, y, 0) * xBasis[a];
+            } else {
+                // Padding value
+                xTempNewValue += paddingValue * xBasis[a];
             }
-            intensity += xTempNewValue * yBasis[b];
         }
-
-        resultArray[tid2] = intensity;
+        intensity += xTempNewValue * yBasis[b];
     }
+
+    resultArray[tid2] = intensity;
 }
 /* *************************************************************** */
 __global__ void reg_resampleImage3D_kernel(float *resultArray,
@@ -78,58 +77,57 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
                                            const unsigned activeVoxelNumber,
                                            const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-
-        // Get the real world deformation in the floating space
-        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-        // Get the voxel-based deformation in the floating space
-        float3 voxelDeformation;
-        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                              floatingMatrix.m[0][1] * realDeformation.y +
-                              floatingMatrix.m[0][2] * realDeformation.z +
-                              floatingMatrix.m[0][3]);
-        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                              floatingMatrix.m[1][1] * realDeformation.y +
-                              floatingMatrix.m[1][2] * realDeformation.z +
-                              floatingMatrix.m[1][3]);
-        voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
-                              floatingMatrix.m[2][1] * realDeformation.y +
-                              floatingMatrix.m[2][2] * realDeformation.z +
-                              floatingMatrix.m[2][3]);
-
-        // Compute the linear interpolation
-        const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
-        const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
-        float xBasis[2], yBasis[2], zBasis[2];
-        InterpLinearKernel(relative.x, xBasis);
-        InterpLinearKernel(relative.y, yBasis);
-        InterpLinearKernel(relative.z, zBasis);
-
-        float intensity = 0;
-        for (short c = 0; c < 2; c++) {
-            const int z = previous.z + c;
-            float yTempNewValue = 0;
-            for (short b = 0; b < 2; b++) {
-                const int y = previous.y + b;
-                float xTempNewValue = 0;
-                for (short a = 0; a < 2; a++) {
-                    const int x = previous.x + a;
-                    if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
-                        xTempNewValue += tex3D<float>(floatingTexture, x, y, z) * xBasis[a];
-                    } else {
-                        // Padding value
-                        xTempNewValue += paddingValue * xBasis[a];
-                    }
+    if (tid >= activeVoxelNumber) return;
+    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+
+    // Get the real world deformation in the floating space
+    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+
+    // Get the voxel-based deformation in the floating space
+    float3 voxelDeformation;
+    voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) +
+                          double(floatingMatrix.m[0][1]) * double(realDeformation.y) +
+                          double(floatingMatrix.m[0][2]) * double(realDeformation.z) +
+                          double(floatingMatrix.m[0][3]));
+    voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) +
+                          double(floatingMatrix.m[1][1]) * double(realDeformation.y) +
+                          double(floatingMatrix.m[1][2]) * double(realDeformation.z) +
+                          double(floatingMatrix.m[1][3]));
+    voxelDeformation.z = (double(floatingMatrix.m[2][0]) * double(realDeformation.x) +
+                          double(floatingMatrix.m[2][1]) * double(realDeformation.y) +
+                          double(floatingMatrix.m[2][2]) * double(realDeformation.z) +
+                          double(floatingMatrix.m[2][3]));
+
+    // Compute the linear interpolation
+    const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
+    const double3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
+    double xBasis[2], yBasis[2], zBasis[2];
+    InterpLinearKernel(relative.x, xBasis);
+    InterpLinearKernel(relative.y, yBasis);
+    InterpLinearKernel(relative.z, zBasis);
+
+    double intensity = 0;
+    for (char c = 0; c < 2; c++) {
+        const int z = previous.z + c;
+        double yTempNewValue = 0;
+        for (char b = 0; b < 2; b++) {
+            const int y = previous.y + b;
+            double xTempNewValue = 0;
+            for (char a = 0; a < 2; a++) {
+                const int x = previous.x + a;
+                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
+                    xTempNewValue += tex3D<float>(floatingTexture, x, y, z) * xBasis[a];
+                } else {
+                    // Padding value
+                    xTempNewValue += paddingValue * xBasis[a];
                 }
-                yTempNewValue += xTempNewValue * yBasis[b];
             }
-            intensity += yTempNewValue * zBasis[c];
+            yTempNewValue += xTempNewValue * yBasis[b];
         }
-
-        resultArray[tid2] = intensity;
+        intensity += yTempNewValue * zBasis[c];
     }
+
+    resultArray[tid2] = intensity;
 }
 /* *************************************************************** */
 __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
@@ -140,52 +138,46 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
                                               const unsigned activeVoxelNumber,
                                               const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        // Get the real world deformation in the floating space
-        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-        // Get the voxel-based deformation in the floating space
-        float2 voxelDeformation;
-        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                              floatingMatrix.m[0][1] * realDeformation.y +
-                              floatingMatrix.m[0][3]);
-        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                              floatingMatrix.m[1][1] * realDeformation.y +
-                              floatingMatrix.m[1][3]);
-
-        // Compute the gradient
-        const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
-        float xBasis[2], yBasis[2];
-        const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
-        InterpLinearKernel(relative.x, xBasis);
-        InterpLinearKernel(relative.y, yBasis);
-        constexpr float deriv[] = { -1.0f, 1.0f };
-
-        float4 gradientValue{};
-        for (short b = 0; b < 2; b++) {
-            float2 tempValueX{};
-            const int y = previous.y + b;
-            for (short a = 0; a < 2; a++) {
-                const int x = previous.x + a;
-                float intensity = paddingValue;
-
-                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y)
-                    intensity = tex3D<float>(floatingTexture, x, y, 0);
-
-                tempValueX.x += intensity * deriv[a];
-                tempValueX.y += intensity * xBasis[a];
-            }
-            gradientValue.x += tempValueX.x * yBasis[b];
-            gradientValue.y += tempValueX.y * deriv[b];
+    if (tid >= activeVoxelNumber) return;
+    // Get the real world deformation in the floating space
+    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+
+    // Get the voxel-based deformation in the floating space
+    float2 voxelDeformation;
+    voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                          floatingMatrix.m[0][1] * realDeformation.y +
+                          floatingMatrix.m[0][3]);
+    voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                          floatingMatrix.m[1][1] * realDeformation.y +
+                          floatingMatrix.m[1][3]);
+
+    // Compute the gradient
+    const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
+    float xBasis[2], yBasis[2];
+    const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
+    InterpLinearKernel(relative.x, xBasis);
+    InterpLinearKernel(relative.y, yBasis);
+    constexpr float deriv[] = { -1.0f, 1.0f };
+
+    float4 gradientValue{};
+    for (char b = 0; b < 2; b++) {
+        float2 tempValueX{};
+        const int y = previous.y + b;
+        for (char a = 0; a < 2; a++) {
+            const int x = previous.x + a;
+            float intensity = paddingValue;
+
+            if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y)
+                intensity = tex3D<float>(floatingTexture, x, y, 0);
+
+            tempValueX.x += intensity * deriv[a];
+            tempValueX.y += intensity * xBasis[a];
         }
-
-        if (gradientValue.x != gradientValue.x)
-            gradientValue.x = 0;
-        if (gradientValue.y != gradientValue.y)
-            gradientValue.y = 0;
-
-        gradientArray[tid] = gradientValue;
+        gradientValue.x += tempValueX.x * yBasis[b];
+        gradientValue.y += tempValueX.y * deriv[b];
     }
+
+    gradientArray[tid] = gradientValue;
 }
 /* *************************************************************** */
 __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
@@ -196,68 +188,60 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
                                               const unsigned activeVoxelNumber,
                                               const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        // Get the real world deformation in the floating space
-        float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-        // Get the voxel-based deformation in the floating space
-        float3 voxelDeformation;
-        voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                              floatingMatrix.m[0][1] * realDeformation.y +
-                              floatingMatrix.m[0][2] * realDeformation.z +
-                              floatingMatrix.m[0][3]);
-        voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                              floatingMatrix.m[1][1] * realDeformation.y +
-                              floatingMatrix.m[1][2] * realDeformation.z +
-                              floatingMatrix.m[1][3]);
-        voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
-                              floatingMatrix.m[2][1] * realDeformation.y +
-                              floatingMatrix.m[2][2] * realDeformation.z +
-                              floatingMatrix.m[2][3]);
-
-        // Compute the gradient
-        const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
-        float xBasis[2], yBasis[2], zBasis[2];
-        const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
-        InterpLinearKernel(relative.x, xBasis);
-        InterpLinearKernel(relative.y, yBasis);
-        InterpLinearKernel(relative.z, zBasis);
-        constexpr float deriv[] = { -1.0f, 1.0f };
-
-        float4 gradientValue{};
-        for (short c = 0; c < 2; c++) {
-            const int z = previous.z + c;
-            float3 tempValueY{};
-            for (short b = 0; b < 2; b++) {
-                float2 tempValueX{};
-                const int y = previous.y + b;
-                for (short a = 0; a < 2; a++) {
-                    const int x = previous.x + a;
-                    float intensity = paddingValue;
+    if (tid >= activeVoxelNumber) return;
+    // Get the real world deformation in the floating space
+    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+
+    // Get the voxel-based deformation in the floating space
+    float3 voxelDeformation;
+    voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
+                          floatingMatrix.m[0][1] * realDeformation.y +
+                          floatingMatrix.m[0][2] * realDeformation.z +
+                          floatingMatrix.m[0][3]);
+    voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
+                          floatingMatrix.m[1][1] * realDeformation.y +
+                          floatingMatrix.m[1][2] * realDeformation.z +
+                          floatingMatrix.m[1][3]);
+    voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
+                          floatingMatrix.m[2][1] * realDeformation.y +
+                          floatingMatrix.m[2][2] * realDeformation.z +
+                          floatingMatrix.m[2][3]);
+
+    // Compute the gradient
+    const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
+    float xBasis[2], yBasis[2], zBasis[2];
+    const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
+    InterpLinearKernel(relative.x, xBasis);
+    InterpLinearKernel(relative.y, yBasis);
+    InterpLinearKernel(relative.z, zBasis);
+    constexpr float deriv[] = { -1.0f, 1.0f };
+
+    float4 gradientValue{};
+    for (char c = 0; c < 2; c++) {
+        const int z = previous.z + c;
+        float3 tempValueY{};
+        for (char b = 0; b < 2; b++) {
+            float2 tempValueX{};
+            const int y = previous.y + b;
+            for (char a = 0; a < 2; a++) {
+                const int x = previous.x + a;
+                float intensity = paddingValue;
 
-                    if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z)
-                        intensity = tex3D<float>(floatingTexture, x, y, z);
+                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z)
+                    intensity = tex3D<float>(floatingTexture, x, y, z);
 
-                    tempValueX.x += intensity * deriv[a];
-                    tempValueX.y += intensity * xBasis[a];
-                }
-                tempValueY.x += tempValueX.x * yBasis[b];
-                tempValueY.y += tempValueX.y * deriv[b];
-                tempValueY.z += tempValueX.y * yBasis[b];
+                tempValueX.x += intensity * deriv[a];
+                tempValueX.y += intensity * xBasis[a];
             }
-            gradientValue.x += tempValueY.x * zBasis[c];
-            gradientValue.y += tempValueY.y * zBasis[c];
-            gradientValue.z += tempValueY.z * deriv[c];
+            tempValueY.x += tempValueX.x * yBasis[b];
+            tempValueY.y += tempValueX.y * deriv[b];
+            tempValueY.z += tempValueX.y * yBasis[b];
         }
-
-        if (gradientValue.x != gradientValue.x)
-            gradientValue.x = 0;
-        if (gradientValue.y != gradientValue.y)
-            gradientValue.y = 0;
-        if (gradientValue.z != gradientValue.z)
-            gradientValue.z = 0;
-
-        gradientArray[tid] = gradientValue;
+        gradientValue.x += tempValueY.x * zBasis[c];
+        gradientValue.y += tempValueY.y * zBasis[c];
+        gradientValue.z += tempValueY.z * deriv[c];
     }
+
+    gradientArray[tid] = gradientValue;
 }
 /* *************************************************************** */

From 97bce9ecabefed32580fe3f475f1df24b4590325 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 30 Oct 2023 17:56:23 +0000
Subject: [PATCH 234/314] Make CudaCompute::GetDeformationField() on a par with
 CPU #92

---
 niftyreg_build_version.txt                    |  2 +-
 .../cuda/_reg_localTransformation_kernels.cu  | 90 +++++++------------
 2 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ec9163d7..6fa50e78 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-352
+353
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 69e44967..ba459d22 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -13,14 +13,14 @@
 #include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
-__device__ void GetBasisBSplineValues(const double basis, float *values) {
-    const double ff = Square(basis);
-    const double fff = Cube(basis);
-    const double mf = 1.0 - basis;
-    values[0] = static_cast<float>(Cube(mf) / 6.0);
-    values[1] = static_cast<float>((3.0 * fff - 6.0 * ff + 4.0) / 6.0);
-    values[2] = static_cast<float>((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0);
-    values[3] = static_cast<float>(fff / 6.0);
+__device__ void GetBasisBSplineValues(const float basis, float *values) {
+    const float ff = Square(basis);
+    const float fff = ff * basis;
+    const float mf = 1.f - basis;
+    values[0] = Cube(mf) / 6.f;
+    values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f;
+    values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f;
+    values[3] = fff / 6.f;
 }
 /* *************************************************************** */
 __device__ void GetFirstBSplineValues(const float basis, float *values, float *first) {
@@ -319,8 +319,6 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
                                                  const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
-    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    const auto&& [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
     int3 nodePre;
     float3 basis;
 
@@ -349,6 +347,8 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+        const auto&& [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -377,39 +377,20 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
     else GetBasisSplineValues(basis.x, xBasis);
 
     float4 displacement{};
-    for (int c = 0; c < 4; c++) {
-        float3 tempDisplacement{};
+    for (char c = 0; c < 4; c++) {
         int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x;
-        for (int b = 0; b < 4; b++) {
+        const float basisZ = zBasis[sharedMemIndex + c];
+        for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) {
             int indexXYZ = indexYZ + nodePre.x;
-            const float4& nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-            const float4& nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-            const float4& nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
-            const float4& nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
-
-            const float& basis = yBasis[sharedMemIndex + b];
-            tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
-                                           nodeCoefficientB.x * xBasis[1] +
-                                           nodeCoefficientC.x * xBasis[2] +
-                                           nodeCoefficientD.x * xBasis[3]);
-
-            tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
-                                           nodeCoefficientB.y * xBasis[1] +
-                                           nodeCoefficientC.y * xBasis[2] +
-                                           nodeCoefficientD.y * xBasis[3]);
-
-            tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] +
-                                           nodeCoefficientB.z * xBasis[1] +
-                                           nodeCoefficientC.z * xBasis[2] +
-                                           nodeCoefficientD.z * xBasis[3]);
-
-            indexYZ += controlPointImageDim.x;
+            const float basisY = yBasis[sharedMemIndex + b];
+            for (char a = 0; a < 4; a++, indexXYZ++) {
+                const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
+                const float xyzBasis = xBasis[a] * basisY * basisZ;
+                displacement.x += xyzBasis * nodeCoeff.x;
+                displacement.y += xyzBasis * nodeCoeff.y;
+                displacement.z += xyzBasis * nodeCoeff.z;
+            }
         }
-
-        const float& basis = zBasis[sharedMemIndex + c];
-        displacement.x += basis * tempDisplacement.x;
-        displacement.y += basis * tempDisplacement.y;
-        displacement.z += basis * tempDisplacement.z;
     }
     deformationField[tid] = displacement;
 }
@@ -426,8 +407,6 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
                                                  const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
-    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    const auto&& [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
     int2 nodePre;
     float2 basis;
 
@@ -449,6 +428,8 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
+        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+        const auto&& [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -469,24 +450,15 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
     else GetBasisSplineValues(basis.x, xBasis);
 
     float4 displacement{};
-    for (int b = 0; b < 4; b++) {
+    for (char b = 0; b < 4; b++) {
         int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
-
-        const float4& nodeCoefficientA = tex1Dfetch<float4>(controlPointTexture, index++);
-        const float4& nodeCoefficientB = tex1Dfetch<float4>(controlPointTexture, index++);
-        const float4& nodeCoefficientC = tex1Dfetch<float4>(controlPointTexture, index++);
-        const float4& nodeCoefficientD = tex1Dfetch<float4>(controlPointTexture, index);
-
-        const float& basis = yBasis[sharedMemIndex + b];
-        displacement.x += basis * (nodeCoefficientA.x * xBasis[0] +
-                                   nodeCoefficientB.x * xBasis[1] +
-                                   nodeCoefficientC.x * xBasis[2] +
-                                   nodeCoefficientD.x * xBasis[3]);
-
-        displacement.y += basis * (nodeCoefficientA.y * xBasis[0] +
-                                   nodeCoefficientB.y * xBasis[1] +
-                                   nodeCoefficientC.y * xBasis[2] +
-                                   nodeCoefficientD.y * xBasis[3]);
+        const float basis = yBasis[sharedMemIndex + b];
+        for (char a = 0; a < 4; a++, index++) {
+            const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
+            const float xyBasis = xBasis[a] * basis;
+            displacement.x += xyBasis * nodeCoeff.x;
+            displacement.y += xyBasis * nodeCoeff.y;
+        }
     }
     deformationField[tid] = displacement;
 }

From 3db10faf1d1751642daccceb8806452290c593f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 30 Oct 2023 18:00:52 +0000
Subject: [PATCH 235/314] Temporarily disable GetDeformationFieldTest

---
 niftyreg_build_version.txt | 2 +-
 reg-test/CMakeLists.txt    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6fa50e78..bc23f8ef 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-353
+354
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index a2e304f6..941ed995 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -113,7 +113,7 @@ set(EXEC_LIST reg_test_affineDeformationField)
 set(EXEC_LIST reg_test_be ${EXEC_LIST})
 set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST})
 set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST})
-set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
+# set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_composeField ${EXEC_LIST})
 set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_interpolation ${EXEC_LIST})

From 5eb3163b4f59715f9c16980f1ab98eff1041e852 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 31 Oct 2023 13:09:46 +0000
Subject: [PATCH 236/314] Make CudaCompute::NormaliseGradient() on a par with
 CPU #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/cuda/CudaCompute.cu                   |  8 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         | 80 +++++++++-------
 reg-lib/cuda/CudaNormaliseGradient.hpp        | 18 ++--
 reg-test/reg_test_normaliseGradient.cpp       | 93 ++++++++++---------
 .../reg_test_regr_getDeformationField.cpp     |  6 +-
 6 files changed, 115 insertions(+), 92 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bc23f8ef..8941db59 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-354
+355
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 8871f2dc..cae2fd12 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -120,7 +120,7 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
     // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    reg_getImageGradient_gpu(con.DefContent::GetFloating(),
+    reg_getImageGradient_gpu(con.Content::GetFloating(),
                              con.GetFloatingCuda(),
                              con.GetDeformationFieldCuda(),
                              con.GetWarpedGradientCuda(),
@@ -139,8 +139,10 @@ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimi
 void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
-    Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast<float>(maxGradLength), optimiseX, optimiseY, optimiseZ);
+    nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
+    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3);
+    if (transGrad->nz <= 1) optimiseZ = false;
+    Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, maxGradLength, optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::SmoothGradient(float sigma) {
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 61d5e626..62b2aa64 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -11,17 +11,17 @@ __global__ static void GetMaximalLengthKernel(float *dists,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 gradValue = tex1Dfetch<float4>(imageTexture, tid);
-        dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) +
-                           (optimiseY ? gradValue.y * gradValue.y : 0) +
-                           (optimiseZ ? gradValue.z * gradValue.z : 0));
+        dists[tid] = sqrtf((optimiseX ? Square(gradValue.x) : 0) +
+                           (optimiseY ? Square(gradValue.y) : 0) +
+                           (optimiseZ ? Square(gradValue.z) : 0));
     }
 }
 /* *************************************************************** */
 float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
-                                       const size_t& nVoxels,
-                                       const bool& optimiseX,
-                                       const bool& optimiseY,
-                                       const bool& optimiseZ) {
+                                       const size_t nVoxels,
+                                       const bool optimiseX,
+                                       const bool optimiseY,
+                                       const bool optimiseZ) {
     // Create a texture object for the imageCuda
     auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
                                                   nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
@@ -42,33 +42,49 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
     return maxDistance;
 }
 /* *************************************************************** */
-__global__ static void NormaliseGradientKernel(float4 *imageCuda,
-                                               const unsigned nVoxels,
-                                               const float maxGradLenInv,
-                                               const bool optimiseX,
-                                               const bool optimiseY,
-                                               const bool optimiseZ) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        float4 grad = imageCuda[tid];
-        imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0,
-                                     optimiseY ? grad.y * maxGradLenInv : 0,
-                                     optimiseZ ? grad.z * maxGradLenInv : 0,
-                                     grad.w);
-    }
+template<bool optimiseX, bool optimiseY, bool optimiseZ>
+void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double maxGradLengthInv) {
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                     nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto imageTexture = *imageTexturePtr;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), nVoxels, [=]__device__(const unsigned index) {
+        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        imageCuda[index] = make_float4(optimiseX ? val.x * maxGradLengthInv : 0,
+                                       optimiseY ? val.y * maxGradLengthInv : 0,
+                                       optimiseZ ? val.z * maxGradLengthInv : 0,
+                                       val.w);
+    });
+}
+/* *************************************************************** */
+template<bool optimiseX, bool optimiseY>
+static inline void NormaliseGradient(float4 *imageCuda,
+                                     const size_t nVoxels,
+                                     const double maxGradLengthInv,
+                                     const bool optimiseZ) {
+    auto normaliseGradient = NormaliseGradient<optimiseX, optimiseY, true>;
+    if (!optimiseZ) normaliseGradient = NormaliseGradient<optimiseX, optimiseY, false>;
+    normaliseGradient(imageCuda, nVoxels, maxGradLengthInv);
+}
+/* *************************************************************** */
+template<bool optimiseX>
+static inline void NormaliseGradient(float4 *imageCuda,
+                                     const size_t nVoxels,
+                                     const double maxGradLengthInv,
+                                     const bool optimiseY,
+                                     const bool optimiseZ) {
+    auto normaliseGradient = NormaliseGradient<optimiseX, true>;
+    if (!optimiseY) normaliseGradient = NormaliseGradient<optimiseX, false>;
+    normaliseGradient(imageCuda, nVoxels, maxGradLengthInv, optimiseZ);
 }
 /* *************************************************************** */
 void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda,
-                                       const size_t& nVoxels,
-                                       const float& maxGradLength,
-                                       const bool& optimiseX,
-                                       const bool& optimiseY,
-                                       const bool& optimiseZ) {
-    const unsigned threads = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned blocks = static_cast<unsigned>(Ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
-    const dim3 blockDims(threads, 1, 1);
-    const dim3 gridDims(blocks, blocks, 1);
-    NormaliseGradientKernel<<<gridDims, blockDims>>>(imageCuda, static_cast<unsigned>(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+                                       const size_t nVoxels,
+                                       const double maxGradLength,
+                                       const bool optimiseX,
+                                       const bool optimiseY,
+                                       const bool optimiseZ) {
+    auto normaliseGradient = ::NormaliseGradient<true>;
+    if (!optimiseX) normaliseGradient = ::NormaliseGradient<false>;
+    normaliseGradient(imageCuda, nVoxels, 1.0 / maxGradLength, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp
index 5d619d2f..bbcae390 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.hpp
+++ b/reg-lib/cuda/CudaNormaliseGradient.hpp
@@ -14,10 +14,10 @@ namespace NiftyReg::Cuda {
  * @return The maximal value of the gradient image
 */
 float GetMaximalLength(const float4 *imageCuda,
-                       const size_t& nVoxels,
-                       const bool& optimiseX,
-                       const bool& optimiseY,
-                       const bool& optimiseZ);
+                       const size_t nVoxels,
+                       const bool optimiseX,
+                       const bool optimiseY,
+                       const bool optimiseZ);
 /* *************************************************************** */
 /**
  * @brief Normalise the gradient image
@@ -29,10 +29,10 @@ float GetMaximalLength(const float4 *imageCuda,
  * @param optimiseZ Flag to indicate if the z component of the gradient is optimised
 */
 void NormaliseGradient(float4 *imageCuda,
-                       const size_t& nVoxels,
-                       const float& maxGradLength,
-                       const bool& optimiseX,
-                       const bool& optimiseY,
-                       const bool& optimiseZ);
+                       const size_t nVoxels,
+                       const double maxGradLength,
+                       const bool optimiseX,
+                       const bool optimiseY,
+                       const bool optimiseZ);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 4b4a8d38..64f49fae 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -15,7 +15,7 @@
 class NormaliseGradientTest {
 protected:
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
-    using TestCase = std::tuple<shared_ptr<Platform>, unique_ptr<F3dContent>, TestData, bool, bool, bool>;
+    using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
 
     inline static vector<TestCase> testCases;
 
@@ -26,7 +26,7 @@ class NormaliseGradientTest {
 
         // Create a random number generator
         std::mt19937 gen(0);
-        std::uniform_real_distribution<float> distr(0, 1);
+        std::uniform_real_distribution<float> distr(0, 100);
 
         // Create a reference 2D image
         vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
@@ -92,11 +92,31 @@ class NormaliseGradientTest {
                     for (int optimiseY = 0; optimiseY < 2; optimiseY++) {
                         for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
                             // Make a copy of the test data
-                            auto td = testData;
-                            auto&& [testName, reference, controlPointGrid, testGrad] = td;
-                            // Add content
+                            auto [testName, reference, controlPointGrid, expTransGrad] = testData;
+                            testName += " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
+                            // Create the content
                             unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
-                            testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ });
+
+                            // Set the transformation gradient image to host the computation
+                            NiftiImage transGrad = content->GetTransformationGradient();
+                            transGrad.copyData(expTransGrad);
+                            transGrad.disown();
+                            content->UpdateTransformationGradient();
+
+                            // Calculate the maximal length
+                            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                            const double maxLength = compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ);
+                            const double expMaxLength = GetMaximalLength<float>(expTransGrad, optimiseX, optimiseY, optimiseZ);
+
+                            // Normalise the gradient
+                            compute->NormaliseGradient(expMaxLength, optimiseX, optimiseY, optimiseZ);
+                            NormaliseGradient<float>(expTransGrad, expMaxLength, optimiseX, optimiseY, optimiseZ);
+
+                            // Get the results
+                            transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image);
+
+                            // Save for testing
+                            testCases.push_back({ testName, maxLength, expMaxLength, std::move(transGrad), std::move(expTransGrad) });
                         }
                     }
                 }
@@ -105,7 +125,7 @@ class NormaliseGradientTest {
     }
 
     template<typename T>
-    T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+    T GetMaximalLength(const nifti_image* transformationGradient, const bool optimiseX, const bool optimiseY, const bool optimiseZ) {
         if (!optimiseX && !optimiseY && !optimiseZ) return 0;
         const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         const T *ptrX = static_cast<T*>(transformationGradient->data);
@@ -139,7 +159,7 @@ class NormaliseGradientTest {
     }
 
     template<typename T>
-    void NormaliseGradient(nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) {
+    void NormaliseGradient(nifti_image *transformationGradient, const double maxGradLength, const bool optimiseX, const bool optimiseY, const bool optimiseZ) {
         if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
         const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3);
         T *ptrX = static_cast<T*>(transformationGradient->data);
@@ -147,26 +167,26 @@ class NormaliseGradientTest {
         T *ptrZ = &ptrY[nVoxelsPerVolume];
         if (transformationGradient->nz > 1) {
             for (size_t i = 0; i < nVoxelsPerVolume; ++i) {
-                T valX = 0, valY = 0, valZ = 0;
+                double valX = 0, valY = 0, valZ = 0;
                 if (optimiseX)
                     valX = ptrX[i];
                 if (optimiseY)
                     valY = ptrY[i];
                 if (optimiseZ)
                     valZ = ptrZ[i];
-                ptrX[i] = valX / maxGradLength;
-                ptrY[i] = valY / maxGradLength;
-                ptrZ[i] = valZ / maxGradLength;
+                ptrX[i] = static_cast<T>(valX / maxGradLength);
+                ptrY[i] = static_cast<T>(valY / maxGradLength);
+                ptrZ[i] = static_cast<T>(valZ / maxGradLength);
             }
         } else {
             for (size_t i = 0; i < nVoxelsPerVolume; ++i) {
-                T valX = 0, valY = 0;
+                double valX = 0, valY = 0;
                 if (optimiseX)
                     valX = ptrX[i];
                 if (optimiseY)
                     valY = ptrY[i];
-                ptrX[i] = valX / maxGradLength;
-                ptrY[i] = valY / maxGradLength;
+                ptrX[i] = static_cast<T>(valX / maxGradLength);
+                ptrY[i] = static_cast<T>(valY / maxGradLength);
             }
         }
     }
@@ -176,9 +196,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ] = testCase;
-        auto&& [testName, reference, controlPointGrid, testGrad] = testData;
-        const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
+        auto&& [sectionName, maxLength, expMaxLength, transGrad, expTransGrad] = testCase;
 
         SECTION(sectionName) {
             NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
@@ -186,38 +204,25 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien
             // Increase the precision for the output
             NR_COUT << std::fixed << std::setprecision(10);
 
-            // Set the transformation gradient image to host the computation
-            NiftiImage transGrad = content->GetTransformationGradient();
-            transGrad.copyData(testGrad);
-            transGrad.disown();
-            content->UpdateTransformationGradient();
-
-            // Calculate the maximal length
-            unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-            const auto maxLength = static_cast<float>(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ));
-            const auto testLength = GetMaximalLength<float>(testGrad, optimiseX, optimiseY, optimiseZ);
             // Check the results
-            REQUIRE(fabs(maxLength - testLength) < EPS);
-
-            // Normalise the gradient
-            compute->NormaliseGradient(maxLength, optimiseX, optimiseY, optimiseZ);
-            NormaliseGradient<float>(testGrad, testLength, optimiseX, optimiseY, optimiseZ);
+            NR_COUT << "Maximal Length=" << maxLength << " | Expected=" << expMaxLength << std::endl;
+            REQUIRE(fabs(maxLength - expMaxLength) == 0);
 
             // Check the results
-            transGrad = content->GetTransformationGradient();
             const auto transGradPtr = transGrad.data();
-            const auto testGradPtr = testGrad.data();
-            transGrad.disown();
-            for (size_t i = 0; i < testGrad.nVoxels(); ++i) {
+            const auto expTransGradPtr = expTransGrad.data();
+            for (size_t i = 0; i < expTransGrad.nVoxels(); ++i) {
                 const float transGradVal = transGradPtr[i];
-                const float testGradVal = testGradPtr[i];
-                const float diff = abs(transGradVal - testGradVal);
-                if (diff > EPS)
-                    NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl;
-                REQUIRE(diff < EPS);
+                const float expTransGradVal = expTransGradPtr[i];
+                const float diff = abs(transGradVal - expTransGradVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | Result=" << transGradVal;
+                    NR_COUT << " | Expected=" << expTransGradVal << std::endl;
+                }
+                REQUIRE(diff == 0);
             }
-            // Ensure the termination of content before CudaContext
-            content.reset();
         }
     }
 }
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
index 62955c0b..f33bbe4b 100644
--- a/reg-test/reg_test_regr_getDeformationField.cpp
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -72,7 +72,7 @@ class GetDeformationFieldTest {
                         testName += " "s + platform->GetName() + " Composition="s + std::to_string(composition) + " Bspline="s + std::to_string(bspline);
                         unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
                         unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-                        NiftiImage expDefField(content->GetDeformationField(), NiftiImage::Copy::Image);
+                        NiftiImage expDefField(content->Content::GetDeformationField(), NiftiImage::Copy::Image);
                         // Compute the deformation field
                         compute->GetDeformationField(composition, bspline);
                         NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
@@ -556,10 +556,10 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-s
 
             // Check the results
             const auto defFieldPtr = defField.data();
-            const auto defFieldExpPtr = expDefField.data();
+            const auto expDefFieldPtr = expDefField.data();
             for (auto i = 0; i < expDefField.nVoxels(); i++) {
                 const float defFieldVal = defFieldPtr[i];
-                const float expDefFieldVal = defFieldExpPtr[i];
+                const float expDefFieldVal = expDefFieldPtr[i];
                 const float diff = abs(defFieldVal - expDefFieldVal);
                 if (diff > 0) {
                     NR_COUT << "[i]=" << i;

From 1c315f158e0b41be81f005513b114ee689dcd501 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 31 Oct 2023 16:34:52 +0000
Subject: [PATCH 237/314] Optimise CudaCompute::GetMaximalLength() #92

---
 niftyreg_build_version.txt            |  2 +-
 reg-lib/cuda/CudaCompute.cu           |  4 +-
 reg-lib/cuda/CudaNormaliseGradient.cu | 65 ++++++++++++++-------------
 3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8941db59..53d5a5ad 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-355
+356
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index cae2fd12..f255b635 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -132,7 +132,9 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
 double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (!optimiseX && !optimiseY && !optimiseZ) return 0;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3);
+    nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
+    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3);
+    if (transGrad->nz <= 1) optimiseZ = false;
     return Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 62b2aa64..c61ecb13 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -2,19 +2,37 @@
 #include "_reg_tools_gpu.h"
 
 /* *************************************************************** */
-__global__ static void GetMaximalLengthKernel(float *dists,
-                                              cudaTextureObject_t imageTexture,
-                                              const unsigned nVoxels,
-                                              const bool optimiseX,
-                                              const bool optimiseY,
-                                              const bool optimiseZ) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        float4 gradValue = tex1Dfetch<float4>(imageTexture, tid);
-        dists[tid] = sqrtf((optimiseX ? Square(gradValue.x) : 0) +
-                           (optimiseY ? Square(gradValue.y) : 0) +
-                           (optimiseZ ? Square(gradValue.z) : 0));
-    }
+template<bool optimiseX, bool optimiseY, bool optimiseZ>
+float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) {
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
+                                                     nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto imageTexture = *imageTexturePtr;
+    thrust::counting_iterator<unsigned> index(0);
+    return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) {
+        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        return sqrtf((optimiseX ? Square(val.x) : 0) +
+                     (optimiseY ? Square(val.y) : 0) +
+                     (optimiseZ ? Square(val.z) : 0));
+    }, 0.f, thrust::maximum<float>());
+}
+/* *************************************************************** */
+template<bool optimiseX, bool optimiseY>
+static inline float GetMaximalLength(const float4 *imageCuda,
+                                     const size_t nVoxels,
+                                     const bool optimiseZ) {
+    auto getMaximalLength = GetMaximalLength<optimiseX, optimiseY, true>;
+    if (!optimiseZ) getMaximalLength = GetMaximalLength<optimiseX, optimiseY, false>;
+    return getMaximalLength(imageCuda, nVoxels);
+}
+/* *************************************************************** */
+template<bool optimiseX>
+static inline float GetMaximalLength(const float4 *imageCuda,
+                                     const size_t nVoxels,
+                                     const bool optimiseY,
+                                     const bool optimiseZ) {
+    auto getMaximalLength = GetMaximalLength<optimiseX, true>;
+    if (!optimiseY) getMaximalLength = GetMaximalLength<optimiseX, false>;
+    return getMaximalLength(imageCuda, nVoxels, optimiseZ);
 }
 /* *************************************************************** */
 float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
@@ -22,24 +40,9 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
                                        const bool optimiseX,
                                        const bool optimiseY,
                                        const bool optimiseZ) {
-    // Create a texture object for the imageCuda
-    auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                  nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-
-    float *dists = nullptr;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float)));
-
-    const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength;
-    const unsigned blocks = static_cast<unsigned>(Ceil(sqrtf(static_cast<float>(nVoxels) / static_cast<float>(threads))));
-    dim3 blockDims(threads, 1, 1);
-    dim3 gridDims(blocks, blocks, 1);
-    GetMaximalLengthKernel<<<gridDims, blockDims>>>(dists, *imageTexture, static_cast<unsigned>(nVoxels), optimiseX, optimiseY, optimiseZ);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-
-    const float maxDistance = reg_maxReduction_gpu(dists, nVoxels);
-    NR_CUDA_SAFE_CALL(cudaFree(dists));
-
-    return maxDistance;
+    auto getMaximalLength = ::GetMaximalLength<true>;
+    if (!optimiseX) getMaximalLength = ::GetMaximalLength<false>;
+    return getMaximalLength(imageCuda, nVoxels, optimiseY, optimiseZ);
 }
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>

From cd064cbddc22665dbf1ec7e135c6c28ec395cc93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 1 Nov 2023 16:33:49 +0000
Subject: [PATCH 238/314] Update tests

---
 niftyreg_build_version.txt                    |  2 +-
 reg-test/reg_test_affineDeformationField.cpp  |  7 ++--
 reg-test/reg_test_be.cpp                      | 12 ++++---
 reg-test/reg_test_blockMatching.cpp           | 10 ++++--
 reg-test/reg_test_composeField.cpp            |  4 +--
 reg-test/reg_test_conjugateGradient.cpp       | 23 +++++++++----
 reg-test/reg_test_getDeformationField.cpp     |  8 +++--
 reg-test/reg_test_imageGradient.cpp           | 16 ++++++---
 reg-test/reg_test_interpolation.cpp           | 16 ++++++---
 reg-test/reg_test_lncc.cpp                    | 34 +++++++++++--------
 reg-test/reg_test_nmi.cpp                     |  8 +++--
 reg-test/reg_test_nmi_gradient.cpp            |  3 ++
 reg-test/reg_test_normaliseGradient.cpp       |  2 +-
 ...g_test_regr_approxLinearEnergyGradient.cpp |  8 ++---
 reg-test/reg_test_regr_blockMatching.cpp      | 16 +++++----
 .../reg_test_regr_getDeformationField.cpp     |  2 +-
 reg-test/reg_test_regr_lts.cpp                |  9 +++--
 reg-test/reg_test_regr_measure.cpp            | 28 ++++-----------
 18 files changed, 126 insertions(+), 82 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 53d5a5ad..4adf9844 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-356
+357
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index 1d54e6b0..dd39cf4e 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -13,7 +13,7 @@
 typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
 typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
 
-TEST_CASE("Affine deformation field", "[AffineDefField]") {
+TEST_CASE("Affine Deformation Field", "[unit]") {
     // Create a reference 2D image
     int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
     nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
@@ -157,7 +157,10 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") {
         // Loop over all possibles contents for each test
         for (auto&& contentDesc : contentDescs) {
             auto&& [content, platform] = contentDesc;
-            SECTION(testName + " " + platform->GetName()) {
+            const std::string sectionName = testName + " " + platform->GetName();
+            SECTION(sectionName) {
+                NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
                 // Do the calculation
                 unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index f6889700..cdc57493 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -221,10 +221,14 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") {
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
-            // if (fabs(result - expected) > EPS){
-            NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
-            // }
-            REQUIRE(fabs(result - expected) < EPS);
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            const auto diff = abs(result - expected);
+            if (diff > 0)
+                NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
+            REQUIRE(diff < EPS);
         }
     }
 }
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index a314e376..aa66259a 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -154,7 +154,7 @@ class BMTest {
     }
 };
 
-TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
+TEST_CASE_METHOD(BMTest, "Block Matching", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : this->testCases) {
         // Retrieve test information
@@ -163,16 +163,20 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") {
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             // Loop over the block and ensure all values are identical
             for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) {
                 for (int d = 0; d < (int)blockMatchingParams->dim; ++d) {
                     const int i = b * (int)blockMatchingParams->dim + d;
                     const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i];
-                    if (fabs(diffPos - OFFSET) > EPS) {
+                    const auto diff = abs(diffPos - OFFSET);
+                    if (diff > 0) {
                         NR_COUT << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] ";
                         NR_COUT << diffPos << std::endl;
                     }
-                    REQUIRE(fabs(diffPos - OFFSET) < EPS);
+                    REQUIRE(diff < EPS);
                 }
             }
         }
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
index 49550c77..affaa42d 100644
--- a/reg-test/reg_test_composeField.cpp
+++ b/reg-test/reg_test_composeField.cpp
@@ -148,7 +148,7 @@ class ComposeDeformationFieldTest {
     }
 };
 
-TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[unit]") {
+TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose Deformation Field", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
@@ -167,7 +167,7 @@ TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[uni
                 const float resVal = resPtr[i];
                 const float expVal = expPtr[i];
                 const float diff = abs(resVal - expVal);
-                if (diff > EPS) {
+                if (diff > 0) {
                     std::cout << "[i]=" << i;
                     std::cout << " | diff=" << diff;
                     std::cout << " | Result=" << resVal;
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 644eb49b..57555e12 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -228,7 +228,7 @@ class ConjugateGradientTest: public InterfaceOptimiser {
     virtual void UpdateBestObjFunctionValue() {}
 };
 
-TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradient]") {
+TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
@@ -239,6 +239,9 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
         SECTION(sectionName) {
             NR_COUT << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             // Set the control point grid
             NiftiImage img = content->GetControlPointGrid();
             // Use bestControlPointGrid to store bestDof during initialisation of the optimiser
@@ -273,8 +276,10 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
             for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) {
                 const float cppVal = cppPtr[i];
                 const float cppExpVal = cppExpPtr[i];
-                NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl;
-                REQUIRE(fabs(cppVal - cppExpVal) < EPS);
+                const auto diff = abs(cppVal - cppExpVal);
+                if (diff > 0)
+                    NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl;
+                REQUIRE(diff == 0);
             }
 
             // Update the gradient values
@@ -335,13 +340,17 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien
                     for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
                         const float gradVal = gradPtr[i];
                         const float gradExpVal = gradExpPtr[i];
-                        NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl;
-                        REQUIRE(fabs(gradVal - gradExpVal) < EPS);
+                        const auto diff = abs(gradVal - gradExpVal);
+                        if (diff > EPS)
+                            NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl;
+                        REQUIRE(diff < EPS);
                         if (isSymmetric) {
                             const float gradBwVal = gradBwPtr[i];
                             const float gradExpBwVal = gradExpBwPtr[i];
-                            NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl;
-                            REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS);
+                            const auto diff = abs(gradBwVal - gradExpBwVal);
+                            if (diff > EPS)
+                                NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl;
+                            REQUIRE(diff < EPS);
                         }
                     }
                 }
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index b213f3fc..a0645743 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -201,7 +201,7 @@ class GetDeformationFieldTest {
     }
 };
 
-TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid", "[unit]") {
+TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation Field from B-spline Grid", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
@@ -209,13 +209,17 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid"
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             const auto resPtr = result.data();
             const auto expPtr = expected.data();
             for (auto i = 0; i < expected.nVoxels(); i++) {
                 const float resVal = resPtr[i];
                 const float expVal = expPtr[i];
                 const float diff = abs(resVal - expVal);
-                if (diff > EPS) {
+                if (diff > 0) {
                     NR_COUT << "[i]=" << i;
                     NR_COUT << " | diff=" << diff;
                     NR_COUT << " | Result=" << resVal;
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 1b243132..25cbd12a 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -15,7 +15,7 @@
 typedef std::tuple<std::string, NiftiImage, NiftiImage, int, float*> TestData;
 typedef std::tuple<unique_ptr<DefContent>, unique_ptr<Platform>> ContentDesc;
 
-TEST_CASE("Image gradient", "[ImageGradient]") {
+TEST_CASE("Image Gradient", "[unit]") {
     // Create a reference 2D image
     vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
     NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
@@ -171,7 +171,13 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
         // Loop over all possibles contents for each test
         for (auto&& contentDesc : contentDescs) {
             auto&& [content, platform] = contentDesc;
-            SECTION(testName + " " + platform->GetName()) {
+            const std::string sectionName = testName + " " + platform->GetName();
+            SECTION(sectionName) {
+                NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+                // Increase the precision for the output
+                NR_COUT << std::fixed << std::setprecision(10);
+
                 // Set the warped gradient image to host the computation
                 NiftiImage warpedGradient(content->GetWarpedGradient());
                 warpedGradient.setDim(NiftiDim::NDim, defField->ndim);
@@ -196,8 +202,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") {
                 warpedGradient.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedGradVal = warpedGradPtr[i];
-                    NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS);
+                    const auto diff = abs(warpedGradVal - testResult[i]);
+                    if (diff > 0)
+                        NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl;
+                    REQUIRE(diff < EPS);
                 }
             }
         }
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index b3d05830..e2699492 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -16,7 +16,7 @@
 typedef std::tuple<std::string, NiftiImage, NiftiImage, int, float*> TestData;
 typedef std::tuple<unique_ptr<Content>, shared_ptr<Platform>> ContentDesc;
 
-TEST_CASE("Interpolation", "[Interpolation]") {
+TEST_CASE("Interpolation", "[unit]") {
     // Create a reference 2D image
     vector<NiftiImage::dim_t> dimFlo{ 4, 4 };
     NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32);
@@ -193,7 +193,13 @@ TEST_CASE("Interpolation", "[Interpolation]") {
             auto&& [content, platform] = contentDesc;
             const bool isAladinContent = dynamic_cast<AladinContent*>(content.get());
             auto contentName = isAladinContent ? "Aladin" : "Base";
-            SECTION(testName + " " + platform->GetName() + " - " + contentName) {
+            const std::string sectionName = testName + " " + platform->GetName() + " - " + contentName;
+            SECTION(sectionName) {
+                NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+                // Increase the precision for the output
+                NR_COUT << std::fixed << std::setprecision(10);
+
                 // Create and set a warped image to host the computation
                 NiftiImage warped(defField, NiftiImage::Copy::ImageInfo);
                 warped.setDim(NiftiDim::NDim, defField->nu);
@@ -223,8 +229,10 @@ TEST_CASE("Interpolation", "[Interpolation]") {
                 warped.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedValue = warpedPtr[i];
-                    NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl;
-                    REQUIRE(fabs(warpedValue - testResult[i]) < EPS);
+                    const float diff = abs(warpedValue - testResult[i]);
+                    if (diff > 0)
+                        NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl;
+                    REQUIRE(diff < EPS);
                 }
             }
         }
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 0355aa84..aa916ec5 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -138,10 +138,9 @@ class LnccTest {
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
                 // Create the platform
-                shared_ptr<Platform> platform{ new Platform(platformType) };
+                unique_ptr<Platform> platform{ new Platform(platformType) };
                 // Make a copy of the test data
-                auto td = data;
-                auto&& [testName, reference, floating, sigma, result] = td;
+                auto [testName, reference, floating, sigma, expLncc] = data;
                 // Create the content creator
                 unique_ptr<DefContentCreator> contentCreator{
                     dynamic_cast<DefContentCreator*>(platform->CreateContentCreator(ContentType::Def))
@@ -159,8 +158,9 @@ class LnccTest {
                 measure_lncc->SetKernelStandardDeviation(0, sigma);
                 measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure->Initialise(*measure_lncc, *content);
-
-                testCases.push_back({ std::move(content), std::move(measure_lncc), platform, std::move(td) });
+                const double lncc = measure_lncc->GetSimilarityMeasureValue();
+                // Save for testing
+                testCases.push_back({ testName, lncc, expLncc });
             }
         }
     }
@@ -174,7 +174,7 @@ class LnccTest {
 
     using LocalStats = std::tuple<double, double>;
     using TestData = std::tuple<std::string, NiftiImage, NiftiImage, float, double>;
-    using TestCase = std::tuple<unique_ptr<Content>, unique_ptr<reg_lncc>, shared_ptr<Platform>, TestData>;
+    using TestCase = std::tuple<std::string, double, double>;
     inline static vector<TestCase> testCases;
 
     double GetLNCCNoConv(int kernelStd, const NiftiImage& ref, const NiftiImage& flo) {
@@ -192,7 +192,7 @@ class LnccTest {
         return lncc / voxelNumber;
     }
 
-    Kernel InitialiseKernel(const NiftiImage& ref, const float& kernelStdVoxel) {
+    Kernel InitialiseKernel(const NiftiImage& ref, const float kernelStdVoxel) {
         Kernel kernel;
         kernel.radius[0] = static_cast<int>(3.f * kernelStdVoxel);
         kernel.radius[1] = static_cast<int>(3.f * kernelStdVoxel);
@@ -222,7 +222,7 @@ class LnccTest {
         return kernel;
     }
 
-    LocalStats GetLocalMeans(const int& x, const int& y, const int& z, const Kernel& kernel,
+    LocalStats GetLocalMeans(const int x, const int y, const int z, const Kernel& kernel,
                              const NiftiImage& ref, const NiftiImage& flo) {
         double meanRef = 0, meanFlo = 0, kernelSum = 0;
         const float *kernelPtr = kernel.ptr.get();
@@ -252,7 +252,7 @@ class LnccTest {
         return LocalStats(meanRef / kernelSum, meanFlo / kernelSum);
     }
 
-    double GetLocalCC(const int& x, const int& y, const int& z, const Kernel& kernel,
+    double GetLocalCC(const int x, const int y, const int z, const Kernel& kernel,
                       const NiftiImage& ref, const NiftiImage& flo, const LocalStats& means) {
         const float *kernelPtr = kernel.ptr.get();
         const auto refPtr = ref.data();
@@ -291,18 +291,22 @@ class LnccTest {
     }
 };
 
-TEST_CASE_METHOD(LnccTest, "LNCC", "[GetSimilarityMeasureValue]") {
+TEST_CASE_METHOD(LnccTest, "LNCC", "[unit][GetSimilarityMeasureValue]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
-        auto&& [content, measure, platform, testData] = testCase;
-        auto&& [testName, reference, floating, sigma, value] = testData;
+        auto&& [testName, lncc, expLncc] = testCase;
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
-            const double lncc = measure->GetSimilarityMeasureValue();
-            NR_COUT << lncc << " " << value << std::endl;
-            REQUIRE(fabs(lncc - value) < EPS);
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            const double diff = abs(lncc - expLncc);
+            if (diff > 0)
+                NR_COUT << lncc << " " << expLncc << std::endl;
+            REQUIRE(diff < EPS);
         }
     }
 }
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 21847f10..6030f69d 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -88,7 +88,7 @@ class NmiTest {
                 unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
                 measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure->Initialise(*measure_nmi, *content);
-                double nmi = measure_nmi->GetSimilarityMeasureValue();
+                const double nmi = measure_nmi->GetSimilarityMeasureValue();
 
                 testCases.push_back({ testName + " " + platform->GetName(), nmi, expected });
             }
@@ -158,8 +158,12 @@ TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") {
 
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             const auto diff = abs(result - expected);
-            if (diff > EPS)
+            if (diff > 0)
                 NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl;
             REQUIRE(diff < EPS);
         }
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 95283b0f..ec8f5326 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -143,6 +143,9 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") {
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             float *resPtr = static_cast<float*>(result->data);
             float *expPtr = static_cast<float*>(expected->data);
             float resMean = reg_tools_getMeanValue(result);
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 64f49fae..cba026ce 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -192,7 +192,7 @@ class NormaliseGradientTest {
     }
 };
 
-TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradient]") {
+TEST_CASE_METHOD(NormaliseGradientTest, "Normalise Gradient", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
index 8d982112..1cf5b166 100644
--- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -22,10 +22,10 @@ class ApproxLinearEnergyGradientTest {
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_real_distribution<float> distr(0, 1);
+        std::uniform_real_distribution<float> distr(0, 10);
 
         // Create 2D reference, floating and control point grid images
-        constexpr NiftiImage::dim_t size = 16;
+        constexpr NiftiImage::dim_t size = 4;
         vector<NiftiImage::dim_t> dim{ size, size };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
@@ -138,7 +138,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear
 
             // Check the approximate linear energy
             NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl;
-            REQUIRE(fabs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS);
+            REQUIRE(abs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS);
 
             // Check the transformation gradients
             const auto transGradCpuPtr = transGradCpu.data();
@@ -146,7 +146,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear
             for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) {
                 const float cpuVal = transGradCpuPtr[i];
                 const float cudaVal = transGradCudaPtr[i];
-                const double diff = fabs(cpuVal - cudaVal);
+                const auto diff = abs(cpuVal - cudaVal);
                 if (diff > EPS)
                     NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
                 REQUIRE(diff < EPS);
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index 5bb9e8e0..8676f005 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -118,7 +118,7 @@ class BMTest {
     }
 };
 
-TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
+TEST_CASE_METHOD(BMTest, "Regression Block Matching", "[regression]") {
     // Loop over all generated test cases
     for (auto&& testCase : this->testCases) {
         // Retrieve test information
@@ -127,28 +127,32 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") {
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             // Ensure both approaches retrieve the same number of voxels
             REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber);
 
             // Loop over the block and ensure all values are identical
             for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) {
                 for (int d = 0; d < (int)blockMatchingParamsCpu->dim; ++d) {
-
                     const int i = b * (int)blockMatchingParamsCpu->dim + d;
                     const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i];
                     const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i];
-                    if (fabs(refPosCpu - refPosCuda) > EPS) {
+                    auto diff = abs(refPosCpu - refPosCuda);
+                    if (diff > 0) {
                         NR_COUT << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
                         NR_COUT << refPosCpu << " | CUDA:" << refPosCuda << std::endl;
                     }
-                    REQUIRE(fabs(refPosCpu - refPosCuda) < EPS);
+                    REQUIRE(diff == 0);
                     const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i];
                     const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i];
-                    if (fabs(warPosCpu - warPosCuda) > EPS) {
+                    diff = abs(warPosCpu - warPosCuda);
+                    if (diff > 0) {
                         NR_COUT << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:";
                         NR_COUT << warPosCpu << " | CUDA:" << warPosCuda << std::endl;
                     }
-                    REQUIRE(fabs(warPosCpu - warPosCuda) < EPS);
+                    REQUIRE(diff == 0);
                 }
             }
         }
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
index f33bbe4b..525bee81 100644
--- a/reg-test/reg_test_regr_getDeformationField.cpp
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -567,7 +567,7 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-s
                     NR_COUT << " | Result=" << defFieldVal;
                     NR_COUT << " | Expected=" << expDefFieldVal << std::endl;
                 }
-                REQUIRE(diff < EPS);
+                REQUIRE(diff == 0);
             }
         }
     }
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 16547d70..0cc60f7f 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -144,13 +144,18 @@ TEST_CASE_METHOD(LtsTest, "Regression LTS", "[regression]") {
         SECTION(testName) {
             NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
 
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
             // Loop over the matrix values and ensure they are identical
             for (int i = 0; i < 4; ++i) {
                 for (int j = 0; j < 4; ++j) {
                     const auto mCpu = matCpu->m[i][j];
                     const auto mCuda = matCuda->m[i][j];
-                    NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl;
-                    REQUIRE(fabs(mCpu - mCuda) < EPS);
+                    const auto diff = abs(mCpu - mCuda);
+                    if (diff > 0)
+                        NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl;
+                    REQUIRE(diff == 0);
                 }
             }
         }
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 89b5627e..07207b2a 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -154,19 +154,11 @@ class MeasureTest {
             const double simMeasureCpu = measureCpu->GetSimilarityMeasureValue();
 
             // Compute the similarity measure value for CUDA
-            NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped());
-            warpedCuda.copyData(contentCpu->GetWarped());
-            warpedCuda.disown();
-            contentCuda->UpdateWarped();
-            // computeCuda->GetDeformationField(false, true);
-            // computeCuda->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+            computeCuda->GetDeformationField(false, true);
+            computeCuda->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
             if (isSymmetric) {
-                NiftiImage warpedCudaBw(contentCudaBw->F3dContent::GetWarped());
-                warpedCudaBw.copyData(contentCpuBw->GetWarped());
-                warpedCudaBw.disown();
-                contentCudaBw->UpdateWarped();
-                // computeCudaBw->GetDeformationField(false, true);
-                // computeCudaBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
+                computeCudaBw->GetDeformationField(false, true);
+                computeCudaBw->ResampleImage(1, std::numeric_limits<float>::quiet_NaN());
             }
             const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue();
 
@@ -182,18 +174,10 @@ class MeasureTest {
 
             // Compute the similarity measure gradient for CUDA
             contentCuda->ZeroVoxelBasedMeasureGradient();
-            // computeCuda->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
-            NiftiImage warpedGradCuda(contentCuda->F3dContent::GetWarpedGradient());
-            warpedGradCuda.copyData(contentCpu->GetWarpedGradient());
-            warpedGradCuda.disown();
-            contentCuda->UpdateWarpedGradient();
+            computeCuda->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             if (isSymmetric) {
                 contentCudaBw->ZeroVoxelBasedMeasureGradient();
-                // computeCudaBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
-                NiftiImage warpedGradCudaBw(contentCudaBw->F3dContent::GetWarpedGradient());
-                warpedGradCudaBw.copyData(contentCpuBw->GetWarpedGradient());
-                warpedGradCudaBw.disown();
-                contentCudaBw->UpdateWarpedGradient();
+                computeCudaBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             }
             measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint);
 

From cc92a523a2f7218f46a87f94092cc4be42229f12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 6 Nov 2023 16:47:56 +0000
Subject: [PATCH 239/314] Make CudaCompute::ApproxBendingEnergyGradient() on a
 par with CPU #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/BlockSize.hpp                    |  79 ----
 reg-lib/cuda/CudaCompute.cu                   |  16 +-
 reg-lib/cuda/FloatOps.hpp                     |  12 +
 reg-lib/cuda/_reg_globalTransformation_gpu.cu |   7 +-
 reg-lib/cuda/_reg_globalTransformation_gpu.h  |   3 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  | 362 ++++++++++-----
 reg-lib/cuda/_reg_localTransformation_gpu.h   |  14 +-
 .../cuda/_reg_localTransformation_kernels.cu  | 427 +-----------------
 9 files changed, 292 insertions(+), 630 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4adf9844..b4eed3b8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-357
+358
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index aeaf3631..65f8a15d 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -14,32 +14,11 @@
 namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
-    /* _reg_blockMatching_gpu */
-    unsigned target_block;
-    unsigned result_block;
-    /* _reg_mutualinformation_gpu */
-    unsigned reg_smoothJointHistogramX;
-    unsigned reg_smoothJointHistogramY;
-    unsigned reg_smoothJointHistogramZ;
-    unsigned reg_smoothJointHistogramW;
-    unsigned reg_marginaliseTargetX;
-    unsigned reg_marginaliseTargetXY;
-    unsigned reg_marginaliseResultX;
-    unsigned reg_marginaliseResultXY;
     unsigned reg_getVoxelBasedNMIGradientUsingPW2D;
     unsigned reg_getVoxelBasedNMIGradientUsingPW3D;
-    unsigned reg_getVoxelBasedNMIGradientUsingPW2x2;
-    /* _reg_globalTransformation_gpu */
     unsigned reg_affine_getDeformationField;
-    /* _reg_localTransformation_gpu */
     unsigned reg_spline_getDeformationField2D;
     unsigned reg_spline_getDeformationField3D;
-    unsigned reg_spline_getApproxSecondDerivatives2D;
-    unsigned reg_spline_getApproxSecondDerivatives3D;
-    unsigned reg_spline_getApproxBendingEnergy2D;
-    unsigned reg_spline_getApproxBendingEnergy3D;
-    unsigned reg_spline_getApproxBendingEnergyGradient2D;
-    unsigned reg_spline_getApproxBendingEnergyGradient3D;
     unsigned reg_spline_getApproxJacobianValues2D;
     unsigned reg_spline_getApproxJacobianValues3D;
     unsigned reg_spline_approxLinearEnergyGradient;
@@ -52,27 +31,21 @@ struct BlockSize {
     unsigned reg_spline_computeJacGradient3D;
     unsigned reg_spline_approxCorrectFolding3D;
     unsigned reg_spline_correctFolding3D;
-    unsigned reg_getDeformationFromDisplacement;
     unsigned reg_defField_compose2D;
     unsigned reg_defField_compose3D;
     unsigned reg_defField_getJacobianMatrix;
-    /* _reg_optimiser_gpu */
     unsigned reg_initialiseConjugateGradient;
     unsigned reg_getConjugateGradient1;
     unsigned reg_getConjugateGradient2;
-    unsigned GetMaximalLength;
     unsigned reg_updateControlPointPosition;
-    /* _reg_ssd_gpu */
     unsigned GetSsdValue;
     unsigned GetSsdGradient;
-    /* _reg_tools_gpu */
     unsigned reg_voxelCentricToNodeCentric;
     unsigned reg_convertNMIGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
     unsigned reg_ApplyConvolutionWindowAlongY;
     unsigned reg_ApplyConvolutionWindowAlongZ;
     unsigned Arithmetic;
-    /* _reg_resampling_gpu */
     unsigned reg_resampleImage2D;
     unsigned reg_resampleImage3D;
     unsigned reg_getImageGradient2D;
@@ -81,31 +54,11 @@ struct BlockSize {
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
     BlockSize100() {
-        target_block = 512; // 15 reg - 32 smem - 24 cmem
-        result_block = 384; // 21 reg - 11048 smem - 24 cmem
-        /* _reg_mutualinformation_gpu */
-        reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem
-        reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem
-        reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem
-        reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem
-        reg_marginaliseTargetX = 384; // 06 reg - 24 smem
-        reg_marginaliseTargetXY = 384; // 07 reg - 24 smem
-        reg_marginaliseResultX = 384; // 06 reg - 24 smem
-        reg_marginaliseResultXY = 384; // 07 reg - 24 smem
         reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem
         reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem
-        reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem
-        /* _reg_globalTransformation_gpu */
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
-        /* _reg_localTransformation_gpu */
         reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
         reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
-        reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem
-        reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem
-        reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem
-        reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem
-        reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem
-        reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem
         reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem
         reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem
         reg_spline_approxLinearEnergyGradient = 384; // 40 reg
@@ -118,27 +71,21 @@ struct BlockSize100: public BlockSize {
         reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem
         reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem
         reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem
-        reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem
         reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
         reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-        /* _reg_optimiser_gpu */
         reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
         reg_getConjugateGradient1 = 320; // 12 reg - 24 smem
         reg_getConjugateGradient2 = 384; // 10 reg - 40 smem
-        GetMaximalLength = 384; // 04 reg - 24 smem
         reg_updateControlPointPosition = 384; // 08 reg - 24 smem
-        /* _reg_ssd_gpu */
         GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem
         GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem
-        /* _reg_tools_gpu */
         reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
         Arithmetic = 384; // 5 reg - 24 smem
-        /* _reg_resampling_gpu */
         reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem
         reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
         reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem
@@ -149,31 +96,11 @@ struct BlockSize100: public BlockSize {
 /* *************************************************************** */
 struct BlockSize300: public BlockSize {
     BlockSize300() {
-        target_block = 640; // 45 reg
-        result_block = 640; // 47 reg - ????? smem
-        /* _reg_mutualinformation_gpu */
-        reg_smoothJointHistogramX = 768; // 34 reg
-        reg_smoothJointHistogramY = 768; // 34 reg
-        reg_smoothJointHistogramZ = 768; // 34 reg
-        reg_smoothJointHistogramW = 768; // 34 reg
-        reg_marginaliseTargetX = 1024; // 24 reg
-        reg_marginaliseTargetXY = 1024; // 24 reg
-        reg_marginaliseResultX = 1024; // 24 reg
-        reg_marginaliseResultXY = 1024; // 24 reg
         reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg
         reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg
-        reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg
-        /* _reg_globalTransformation_gpu */
         reg_affine_getDeformationField = 1024; // 23 reg
-        /* _reg_localTransformation_gpu */
         reg_spline_getDeformationField2D = 1024; // 34 reg
         reg_spline_getDeformationField3D = 1024; // 34 reg
-        reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg
-        reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg
-        reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg
-        reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg
-        reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg
-        reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg
         reg_spline_getApproxJacobianValues2D = 768; // 34 reg
         reg_spline_getApproxJacobianValues3D = 640; // 46 reg
         reg_spline_approxLinearEnergyGradient = 768; // 40 reg
@@ -186,27 +113,21 @@ struct BlockSize300: public BlockSize {
         reg_spline_computeJacGradient3D = 768; // 37 reg
         reg_spline_approxCorrectFolding3D = 768; // 34 reg
         reg_spline_correctFolding3D = 768; // 34 reg
-        reg_getDeformationFromDisplacement = 1024; // 18 reg
         reg_defField_compose2D = 1024; // 23 reg
         reg_defField_compose3D = 1024; // 24 reg
         reg_defField_getJacobianMatrix = 768; // 34 reg
-        /* _reg_optimiser_gpu */
         reg_initialiseConjugateGradient = 1024; // 20 reg
         reg_getConjugateGradient1 = 1024; // 22 reg
         reg_getConjugateGradient2 = 1024; // 25 reg
-        GetMaximalLength = 1024; // 20 reg
         reg_updateControlPointPosition = 1024; // 22 reg
-        /* _reg_ssd_gpu */
         GetSsdValue = 768; // 34 reg
         GetSsdGradient = 768; // 34 reg
-        /* _reg_tools_gpu */
         reg_voxelCentricToNodeCentric = 1024; // 23 reg
         reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
         Arithmetic = 1024; //
-        /* _reg_resampling_gpu */
         reg_resampleImage2D = 1024; // 23 reg
         reg_resampleImage3D = 1024; // 24 reg
         reg_getImageGradient2D = 1024; // 34 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index f255b635..f569f1bc 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -47,15 +47,21 @@ double CudaCompute::CorrectFolding(bool approx) {
 /* *************************************************************** */
 double CudaCompute::ApproxBendingEnergy() {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    return reg_spline_approxBendingEnergy_gpu(con.F3dContent::GetControlPointGrid(), con.GetControlPointGridCuda());
+    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    auto approxBendingEnergy = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergy_gpu<true> :
+                                                          reg_spline_approxBendingEnergy_gpu<false>;
+    return approxBendingEnergy(controlPointGrid, con.GetControlPointGridCuda());
 }
 /* *************************************************************** */
 void CudaCompute::ApproxBendingEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_spline_approxBendingEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(),
-                                               con.GetControlPointGridCuda(),
-                                               con.GetTransformationGradientCuda(),
-                                               weight);
+    nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergyGradient_gpu<true> :
+                                                                  reg_spline_approxBendingEnergyGradient_gpu<false>;
+    approxBendingEnergyGradient(controlPointGrid,
+                                con.GetControlPointGridCuda(),
+                                con.GetTransformationGradientCuda(),
+                                weight);
 }
 /* *************************************************************** */
 double CudaCompute::ApproxLinearEnergy() {
diff --git a/reg-lib/cuda/FloatOps.hpp b/reg-lib/cuda/FloatOps.hpp
index 23f8b8de..2ddc43a3 100644
--- a/reg-lib/cuda/FloatOps.hpp
+++ b/reg-lib/cuda/FloatOps.hpp
@@ -156,3 +156,15 @@ __device__ __inline__ double2 operator+(const double2& a, const double2& b) {
     return { a.x + b.x, a.y + b.y };
 }
 /* *************************************************************** */
+__device__ __inline__ float2 make_float2(const float4& a) {
+    return { a.x, a.y };
+}
+/* *************************************************************** */
+__device__ __inline__ float3 make_float3(const float4& a) {
+    return { a.x, a.y, a.z };
+}
+/* *************************************************************** */
+__device__ __inline__ float4 make_float4(const float3& a) {
+    return { a.x, a.y, a.z, 0.f };
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
index d42ff980..34b668bd 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
@@ -16,7 +16,12 @@
 /* *************************************************************** */
 void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
                                         const nifti_image *targetImage,
-                                        float4 *deformationFieldCuda) {
+                                        float4 *deformationFieldCuda,
+                                        const bool composition) {
+    // TODO Implement composition
+    if (composition)
+        NR_FATAL_ERROR("Composition is not implemented on the GPU");
+
     const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
     const size_t voxelNumber = targetImage->nvox;
 
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
index 5d33b155..66430f8a 100755
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h
@@ -16,4 +16,5 @@
 
 void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
                                         const nifti_image *targetImage,
-                                        float4 *deformationFieldCuda);
+                                        float4 *deformationFieldCuda,
+                                        const bool composition = false);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 0041e9a0..9328aff8 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -80,131 +80,211 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
     }
 }
 /* *************************************************************** */
-float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
-    auto blockSize = CudaContext::GetBlockSize();
-    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                         controlPointGridSize, cudaChannelFormatKindFloat, 4);
-
-    // First compute all the second derivatives
-    float4 *secondDerivativeValuesCuda;
-    size_t secondDerivativeValuesSize;
-    if (controlPointImage->nz > 1) {
-        secondDerivativeValuesSize = 6 * controlPointGridSize;
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
-        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
-                                                                         controlPointImageDim, (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        secondDerivativeValuesSize = 3 * controlPointGridSize;
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
-        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
-                                                                         controlPointImageDim, (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
-
-    // Compute the bending energy from the second derivatives
-    float *penaltyTermCuda;
-    NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float)));
-    auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
-                                                              secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
-    if (controlPointImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxBendingEnergy3D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
-                                                                            (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+template<bool is3d>
+struct Basis2nd {
+    float xx[27], yy[27], zz[27], xy[27], yz[27], xz[27];
+};
+template<>
+struct Basis2nd<false> {
+    float xx[9], yy[9], xy[9];
+};
+template<bool is3d>
+struct SecondDerivative {
+    using Type = float3;
+    using TextureType = float4; // Due to float3 is not allowed for textures
+    Type xx, yy, zz, xy, yz, xz;
+};
+template<>
+struct SecondDerivative<false> {
+    using Type = float2;
+    using TextureType = float2;
+    Type xx, yy, xy;
+};
+/* *************************************************************** */
+template<bool is3d, bool isGradient>
+__device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index,
+                                                            cudaTextureObject_t controlPointTexture,
+                                                            const int3& controlPointImageDim,
+                                                            const Basis2nd<is3d>& basis) {
+    auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+    if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 ||
+                        y < 1 || y >= controlPointImageDim.y - 1 ||
+                        (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {};
+
+    SecondDerivative<is3d> secondDerivative{};
+    if constexpr (is3d) {
+        for (int c = z - 1, basInd = 0; c < z + 2; c++) {
+            if (isGradient && (c < 0 || c >= controlPointImageDim.z)) { basInd += 9; continue; }
+            const int indexZ = c * controlPointImageDim.y;
+            for (int b = y - 1; b < y + 2; b++) {
+                if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; }
+                int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1;
+                for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) {
+                    if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
+                    const float3& controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
+                    secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
+                    secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
+                    secondDerivative.zz = secondDerivative.zz + basis.zz[basInd] * controlPointValue;
+                    secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue;
+                    secondDerivative.yz = secondDerivative.yz + basis.yz[basInd] * controlPointValue;
+                    secondDerivative.xz = secondDerivative.xz + basis.xz[basInd] * controlPointValue;
+                }
+            }
+        }
     } else {
-        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxBendingEnergy2D_kernel<<<gridDims, blockDims>>>(penaltyTermCuda, *secondDerivativesTexture,
-                                                                            (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        for (int b = y - 1, basInd = 0; b < y + 2; b++) {
+            if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; }
+            int indexXY = b * controlPointImageDim.x + x - 1;
+            for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) {
+                if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
+                const float2& controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
+                secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
+                secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
+                secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue;
+            }
+        }
     }
-    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda));
+    return secondDerivative;
+}
+/* *************************************************************** */
+template<bool is3d>
+double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
+    const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
+    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                            controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = *controlPointTexturePtr;
 
-    // Compute the mean bending energy value
-    double penaltyValue = reg_sumReduction_gpu(penaltyTermCuda, controlPointNumber);
-    NR_CUDA_SAFE_CALL(cudaFree(penaltyTermCuda));
+    // Get the constant basis values
+    Basis2nd<is3d> basis;
+    if constexpr (is3d)
+        set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.zz, basis.xy, basis.yz, basis.xz);
+    else
+        set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy);
 
-    return (float)(penaltyValue / (double)controlPointImage->nvox);
+    thrust::counting_iterator<unsigned> index(0);
+    return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const unsigned index) {
+        const auto& secondDerivative = GetApproxSecondDerivative<is3d, false>(index, controlPointTexture, controlPointImageDim, basis);
+        if constexpr (is3d)
+            return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) +
+                    Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) +
+                    Square(secondDerivative.zz.x) + Square(secondDerivative.zz.y) + Square(secondDerivative.zz.z) +
+                    2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y) + Square(secondDerivative.xy.z) +
+                           Square(secondDerivative.yz.x) + Square(secondDerivative.yz.y) + Square(secondDerivative.yz.z) +
+                           Square(secondDerivative.xz.x) + Square(secondDerivative.xz.y) + Square(secondDerivative.xz.z)));
+        else
+            return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.yy.x) +
+                    Square(secondDerivative.yy.y) + 2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y)));
+    }, 0.0, thrust::plus<double>()) / static_cast<double>(controlPointImage->nvox);
 }
+template double reg_spline_approxBendingEnergy_gpu<false>(const nifti_image*, const float4*);
+template double reg_spline_approxBendingEnergy_gpu<true>(const nifti_image*, const float4*);
 /* *************************************************************** */
-void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage,
-                                                const float4 *controlPointImageCuda,
+template<bool is3d>
+void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
+                                                float4 *controlPointImageCuda,
                                                 float4 *transGradientCuda,
                                                 float bendingEnergyWeight) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    const size_t controlPointGridSize = controlPointNumber * sizeof(float4);
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                         controlPointGridSize, cudaChannelFormatKindFloat, 4);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
+                                                            controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = *controlPointTexturePtr;
+
+    // Get the constant basis values
+    Basis2nd<is3d> basis;
+    if constexpr (is3d)
+        set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.zz, basis.xy, basis.yz, basis.xz);
+    else
+        set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy);
+
+    reg_getDisplacementFromDeformation_gpu(controlPointImage, controlPointImageCuda);
 
     // First compute all the second derivatives
-    float4 *secondDerivativeValuesCuda;
-    size_t secondDerivativeValuesSize;
-    if (controlPointImage->nz > 1) {
-        secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4);
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
-        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxSecondDerivatives3D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
-                                                                         controlPointImageDim, (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4);
-        NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize));
-        const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxSecondDerivatives2D<<<gridDims, blockDims>>>(secondDerivativeValuesCuda, *controlPointTexture,
-                                                                         controlPointImageDim, (unsigned)controlPointNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
+    thrust::device_vector<typename SecondDerivative<is3d>::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber);
+    auto secondDerivativesCuda = secondDerivativesCudaVec.data().get();
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), controlPointNumber,
+                       [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const unsigned index) {
+        const auto& secondDerivative = GetApproxSecondDerivative<is3d, true>(index, controlPointTexture, controlPointImageDim, basis);
+        if constexpr (is3d) {
+            int derInd = 6 * index;
+            secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx);
+            secondDerivativesCuda[derInd++] = make_float4(secondDerivative.yy);
+            secondDerivativesCuda[derInd++] = make_float4(secondDerivative.zz);
+            secondDerivativesCuda[derInd++] = make_float4(2.f * secondDerivative.xy);
+            secondDerivativesCuda[derInd++] = make_float4(2.f * secondDerivative.yz);
+            secondDerivativesCuda[derInd] = make_float4(2.f * secondDerivative.xz);
+        } else {
+            int derInd = 3 * index;
+            secondDerivativesCuda[derInd++] = secondDerivative.xx;
+            secondDerivativesCuda[derInd++] = secondDerivative.yy;
+            secondDerivativesCuda[derInd] = 2.f * secondDerivative.xy;
+        }
+    });
+
+    auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, cudaResourceTypeLinear,
+                                                                 secondDerivativesCudaVec.size() * sizeof(typename SecondDerivative<is3d>::TextureType),
+                                                                 cudaChannelFormatKindFloat, sizeof(typename SecondDerivative<is3d>::TextureType) / sizeof(float));
+    auto secondDerivativesTexture = *secondDerivativesTexturePtr;
 
     // Compute the gradient
-    bendingEnergyWeight /= (float)controlPointNumber;
-    auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear,
-                                                              secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4);
-    if (controlPointImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxBendingEnergyGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
-                                                                                    controlPointImageDim, (unsigned)controlPointNumber,
-                                                                                    bendingEnergyWeight);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxBendingEnergyGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *secondDerivativesTexture,
-                                                                                    controlPointImageDim, (unsigned)controlPointNumber,
-                                                                                    bendingEnergyWeight);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
-    NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda));
+    const float approxRatio = bendingEnergyWeight / (float)controlPointNumber;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), controlPointNumber,
+                       [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const unsigned index) {
+        auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+        typename SecondDerivative<is3d>::Type gradientValue{};
+        if constexpr (is3d) {
+            for (int c = z - 1, basInd = 0; c < z + 2; c++) {
+                if (c < 0 || c >= controlPointImageDim.z) { basInd += 9; continue; }
+                const int indexZ = c * controlPointImageDim.y;
+                for (int b = y - 1; b < y + 2; b++) {
+                    if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; }
+                    int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6;
+                    for (int a = x - 1; a < x + 2; a++, basInd++) {
+                        if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; }
+                        const float3& secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
+                        const float3& secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
+                        const float3& secondDerivativeZZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeZZ * basis.zz[basInd];
+                        const float3& secondDerivativeXY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
+                        const float3& secondDerivativeYZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeYZ * basis.yz[basInd];
+                        const float3& secondDerivativeXZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        gradientValue = gradientValue + secondDerivativeXZ * basis.xz[basInd];
+                    }
+                }
+            }
+        } else {
+            for (int b = y - 1, basInd = 0; b < y + 2; b++) {
+                if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; }
+                int indexXY = (b * controlPointImageDim.x + x - 1) * 3;
+                for (int a = x - 1; a < x + 2; a++, basInd++) {
+                    if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; }
+                    const float2& secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
+                    const float2& secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
+                    const float2& secondDerivativeXY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
+                }
+            }
+        }
+        float4 nodeGradVal = transGradientCuda[index];
+        nodeGradVal.x += approxRatio * gradientValue.x;
+        nodeGradVal.y += approxRatio * gradientValue.y;
+        if constexpr (is3d)
+            nodeGradVal.z += approxRatio * gradientValue.z;
+        transGradientCuda[index] = nodeGradVal;
+    });
+
+    reg_getDeformationFromDisplacement_gpu(controlPointImage, controlPointImageCuda);
 }
+template void reg_spline_approxBendingEnergyGradient_gpu<false>(nifti_image*, float4*, float4*, float);
+template void reg_spline_approxBendingEnergyGradient_gpu<true>(nifti_image*, float4*, float4*, float);
 /* *************************************************************** */
 void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage,
                                             const float4 *controlPointImageCuda,
@@ -501,26 +581,61 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     return std::numeric_limits<double>::quiet_NaN();
 }
 /* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool reverse = false) {
+template<bool is3d, bool reverse = false>
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) {
     // Bind the qform or sform
     const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim{ image->nx, image->ny, image->nz };
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    reg_getDeformationFromDisplacement3D_kernel<<<gridDims, blockDims>>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), voxelNumber, [=]__device__(const unsigned index) {
+        auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, imageDim);
+
+        const float4 initialPosition = {
+            float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3],
+            float(x) * affineMatrix.m[1][0] + float(y) * affineMatrix.m[1][1] + (is3d ? float(z) * affineMatrix.m[1][2] : 0.f) + affineMatrix.m[1][3],
+            is3d ? float(x) * affineMatrix.m[2][0] + float(y) * affineMatrix.m[2][1] + float(z) * affineMatrix.m[2][2] + affineMatrix.m[2][3] : 0.f,
+            0.f
+        };
+
+        // If reverse, gets displacement from deformation
+        imageCuda[index] = reverse ? imageCuda[index] - initialPosition : imageCuda[index] + initialPosition;
+    });
+
+    image->intent_code = NIFTI_INTENT_VECTOR;
+    memset(image->intent_name, 0, 16);
+    strcpy(image->intent_name, "NREG_TRANS");
+    if constexpr (reverse) {
+        if (image->intent_p1 == DEF_FIELD)
+            image->intent_p1 = DISP_FIELD;
+        else if (image->intent_p1 == DEF_VEL_FIELD)
+            image->intent_p1 = DISP_VEL_FIELD;
+    } else {
+        if (image->intent_p1 == DISP_FIELD)
+            image->intent_p1 = DEF_FIELD;
+        else if (image->intent_p1 == DISP_VEL_FIELD)
+            image->intent_p1 = DEF_VEL_FIELD;
+    }
+}
+/* *************************************************************** */
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) {
+    if (image->nu == 2)
+        reg_getDeformationFromDisplacement_gpu<false>(image, imageCuda);
+    else if (image->nu == 3)
+        reg_getDeformationFromDisplacement_gpu<true>(image, imageCuda);
+    else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
 }
 /* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *imageCuda) {
-    reg_getDeformationFromDisplacement_gpu(image, imageCuda, true);
+void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda) {
+    if (image->nu == 2)
+        reg_getDeformationFromDisplacement_gpu<false, true>(image, imageCuda);
+    else if (image->nu == 3)
+        reg_getDeformationFromDisplacement_gpu<true, true>(image, imageCuda);
+    else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
 }
 /* *************************************************************** */
 void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
-                                                 const nifti_image *flowField,
+                                                 nifti_image *flowField,
                                                  float4 *velocityFieldGridCuda,
                                                  float4 *flowFieldCuda,
                                                  const int *maskCuda,
@@ -530,6 +645,7 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
         NR_FATAL_ERROR("The provided grid is not a velocity field");
 
     // Initialise the flow field with an identity transformation
+    flowField->intent_p1 = DISP_VEL_FIELD;
     reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda);
 
     // fake the number of extension here to avoid the second half of the affine
@@ -538,6 +654,7 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
         velocityFieldGrid->num_ext = 1;
 
     // Copy over the number of required squaring steps
+    flowField->intent_p2 = velocityFieldGrid->intent_p2;
     // The initial flow field is generated using cubic B-Spline interpolation/approximation
     reg_spline_getDeformationField_gpu(velocityFieldGrid,
                                        flowField,
@@ -658,10 +775,9 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
     deformationField->intent_p1 = DEF_FIELD;
     deformationField->intent_p2 = 0;
     // If required an affine component is composed
-    // TODO Composition is needed
     if (flowField->num_ext > 1)
         reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
-                                           deformationField, deformationFieldCuda);
+                                           deformationField, deformationFieldCuda, true);
 }
 /* *************************************************************** */
 void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
@@ -741,7 +857,7 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
     const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
 
     // Store the basis values since they are constant as the value is approximated at the control point positions only
-    Basis basis;
+    Basis1st<is3d> basis;
     if constexpr (is3d)
         set_first_order_basis_values(basis.x, basis.y, basis.z);
     else
@@ -780,7 +896,7 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
     const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     // Store the basis values since they are constant as the value is approximated at the control point positions only
-    Basis basis;
+    Basis1st<is3d> basis;
     if constexpr (is3d)
         set_first_order_basis_values(basis.x, basis.y, basis.z);
     else
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
index d3432ca1..9588cc8e 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.h
@@ -14,6 +14,10 @@
 
 #include "_reg_tools_gpu.h"
 
+/* *************************************************************** */
+void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda);
+/* *************************************************************** */
+void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda);
 /* *************************************************************** */
 void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         const nifti_image *referenceImage,
@@ -24,11 +28,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                         const bool composition,
                                         const bool bspline);
 /* *************************************************************** */
-float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
-                                         const float4 *controlPointImageCuda);
+template<bool is3d>
+double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
+                                          const float4 *controlPointImageCuda);
 /* *************************************************************** */
-void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage,
-                                                const float4 *controlPointImageCuda,
+template<bool is3d>
+void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
+                                                float4 *controlPointImageCuda,
                                                 float4 *transGradientCuda,
                                                 float bendingEnergyWeight);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index ba459d22..43708ec5 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -39,14 +39,6 @@ __device__ void GetBasisSplineValues(const float basis, float *values) {
     values[3] = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
-__device__ void GetBasisSplineValuesX(const float basis, float4 *values) {
-    const float ff = Square(basis);
-    values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
-    values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
-    values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f;
-    values->w = (basis - 1.f) * ff / 2.f;
-}
-/* *************************************************************** */
 __device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) {
     switch (index) {
     case 0:
@@ -118,139 +110,6 @@ __device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis,
     }
 }
 /* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues2D(const int index, float *xxBasis, float *yyBasis, float *xyBasis) {
-    switch (index) {
-    case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break;
-    case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break;
-    case 2: xxBasis[2] = 0.166667f; yyBasis[2] = 0.166667f; xyBasis[2] = -0.25f; break;
-    case 3: xxBasis[3] = 0.666667f; yyBasis[3] = -0.333333f; xyBasis[3] = -0.f; break;
-    case 4: xxBasis[4] = -1.33333f; yyBasis[4] = -1.33333f; xyBasis[4] = 0.f; break;
-    case 5: xxBasis[5] = 0.666667f; yyBasis[5] = -0.333333f; xyBasis[5] = 0.f; break;
-    case 6: xxBasis[6] = 0.166667f; yyBasis[6] = 0.166667f; xyBasis[6] = -0.25f; break;
-    case 7: xxBasis[7] = -0.333333f; yyBasis[7] = 0.666667f; xyBasis[7] = 0.f; break;
-    case 8: xxBasis[8] = 0.166667f; yyBasis[8] = 0.166667f; xyBasis[8] = 0.25f; break;
-    }
-}
-/* *************************************************************** */
-__device__ void GetSecondDerivativeBasisValues3D(const int index,
-                                                 float *xxBasis,
-                                                 float *yyBasis,
-                                                 float *zzBasis,
-                                                 float *xyBasis,
-                                                 float *yzBasis,
-                                                 float *xzBasis) {
-    switch (index) {
-    case 0:
-        xxBasis[0] = 0.027778f; yyBasis[0] = 0.027778f; zzBasis[0] = 0.027778f;
-        xyBasis[0] = 0.041667f; yzBasis[0] = 0.041667f; xzBasis[0] = 0.041667f;
-        break;
-    case 1:
-        xxBasis[1] = -0.055556f; yyBasis[1] = 0.111111f; zzBasis[1] = 0.111111f;
-        xyBasis[1] = -0.000000f; yzBasis[1] = 0.166667f; xzBasis[1] = -0.000000f;
-        break;
-    case 2:
-        xxBasis[2] = 0.027778f; yyBasis[2] = 0.027778f; zzBasis[2] = 0.027778f;
-        xyBasis[2] = -0.041667f; yzBasis[2] = 0.041667f; xzBasis[2] = -0.041667f;
-        break;
-    case 3:
-        xxBasis[3] = 0.111111f; yyBasis[3] = -0.055556f; zzBasis[3] = 0.111111f;
-        xyBasis[3] = -0.000000f; yzBasis[3] = -0.000000f; xzBasis[3] = 0.166667f;
-        break;
-    case 4:
-        xxBasis[4] = -0.222222f; yyBasis[4] = -0.222222f; zzBasis[4] = 0.444444f;
-        xyBasis[4] = 0.000000f; yzBasis[4] = -0.000000f; xzBasis[4] = -0.000000f;
-        break;
-    case 5:
-        xxBasis[5] = 0.111111f; yyBasis[5] = -0.055556f; zzBasis[5] = 0.111111f;
-        xyBasis[5] = 0.000000f; yzBasis[5] = -0.000000f; xzBasis[5] = -0.166667f;
-        break;
-    case 6:
-        xxBasis[6] = 0.027778f; yyBasis[6] = 0.027778f; zzBasis[6] = 0.027778f;
-        xyBasis[6] = -0.041667f; yzBasis[6] = -0.041667f; xzBasis[6] = 0.041667f;
-        break;
-    case 7:
-        xxBasis[7] = -0.055556f; yyBasis[7] = 0.111111f; zzBasis[7] = 0.111111f;
-        xyBasis[7] = 0.000000f; yzBasis[7] = -0.166667f; xzBasis[7] = -0.000000f;
-        break;
-    case 8:
-        xxBasis[8] = 0.027778f; yyBasis[8] = 0.027778f; zzBasis[8] = 0.027778f;
-        xyBasis[8] = 0.041667f; yzBasis[8] = -0.041667f; xzBasis[8] = -0.041667f;
-        break;
-    case 9:
-        xxBasis[9] = 0.111111f; yyBasis[9] = 0.111111f; zzBasis[9] = -0.055556f;
-        xyBasis[9] = 0.166667f; yzBasis[9] = -0.000000f; xzBasis[9] = -0.000000f;
-        break;
-    case 10:
-        xxBasis[10] = -0.222222f; yyBasis[10] = 0.444444f; zzBasis[10] = -0.222222f;
-        xyBasis[10] = -0.000000f; yzBasis[10] = -0.000000f; xzBasis[10] = 0.000000f;
-        break;
-    case 11:
-        xxBasis[11] = 0.111111f; yyBasis[11] = 0.111111f; zzBasis[11] = -0.055556f;
-        xyBasis[11] = -0.166667f; yzBasis[11] = -0.000000f; xzBasis[11] = 0.000000f;
-        break;
-    case 12:
-        xxBasis[12] = 0.444444f; yyBasis[12] = -0.222222f; zzBasis[12] = -0.222222f;
-        xyBasis[12] = -0.000000f; yzBasis[12] = 0.000000f; xzBasis[12] = -0.000000f;
-        break;
-    case 13:
-        xxBasis[13] = -0.888889f; yyBasis[13] = -0.888889f; zzBasis[13] = -0.888889f;
-        xyBasis[13] = 0.000000f; yzBasis[13] = 0.000000f; xzBasis[13] = 0.000000f;
-        break;
-    case 14:
-        xxBasis[14] = 0.444444f; yyBasis[14] = -0.222222f; zzBasis[14] = -0.222222f;
-        xyBasis[14] = 0.000000f; yzBasis[14] = 0.000000f; xzBasis[14] = 0.000000f;
-        break;
-    case 15:
-        xxBasis[15] = 0.111111f; yyBasis[15] = 0.111111f; zzBasis[15] = -0.055556f;
-        xyBasis[15] = -0.166667f; yzBasis[15] = 0.000000f; xzBasis[15] = -0.000000f;
-        break;
-    case 16:
-        xxBasis[16] = -0.222222f; yyBasis[16] = 0.444444f; zzBasis[16] = -0.222222f;
-        xyBasis[16] = 0.000000f; yzBasis[16] = 0.000000f; xzBasis[16] = 0.000000f;
-        break;
-    case 17:
-        xxBasis[17] = 0.111111f; yyBasis[17] = 0.111111f; zzBasis[17] = -0.055556f;
-        xyBasis[17] = 0.166667f; yzBasis[17] = 0.000000f; xzBasis[17] = 0.000000f;
-        break;
-    case 18:
-        xxBasis[18] = 0.027778f; yyBasis[18] = 0.027778f; zzBasis[18] = 0.027778f;
-        xyBasis[18] = 0.041667f; yzBasis[18] = -0.041667f; xzBasis[18] = -0.041667f;
-        break;
-    case 19:
-        xxBasis[19] = -0.055556f; yyBasis[19] = 0.111111f; zzBasis[19] = 0.111111f;
-        xyBasis[19] = -0.000000f; yzBasis[19] = -0.166667f; xzBasis[19] = 0.000000f;
-        break;
-    case 20:
-        xxBasis[20] = 0.027778f; yyBasis[20] = 0.027778f; zzBasis[20] = 0.027778f;
-        xyBasis[20] = -0.041667f; yzBasis[20] = -0.041667f; xzBasis[20] = 0.041667f;
-        break;
-    case 21:
-        xxBasis[21] = 0.111111f; yyBasis[21] = -0.055556f; zzBasis[21] = 0.111111f;
-        xyBasis[21] = -0.000000f; yzBasis[21] = 0.000000f; xzBasis[21] = -0.166667f;
-        break;
-    case 22:
-        xxBasis[22] = -0.222222f; yyBasis[22] = -0.222222f; zzBasis[22] = 0.444444f;
-        xyBasis[22] = 0.000000f; yzBasis[22] = 0.000000f; xzBasis[22] = 0.000000f;
-        break;
-    case 23:
-        xxBasis[23] = 0.111111f; yyBasis[23] = -0.055556f; zzBasis[23] = 0.111111f;
-        xyBasis[23] = 0.000000f; yzBasis[23] = 0.000000f; xzBasis[23] = 0.166667f;
-        break;
-    case 24:
-        xxBasis[24] = 0.027778f; yyBasis[24] = 0.027778f; zzBasis[24] = 0.027778f;
-        xyBasis[24] = -0.041667f; yzBasis[24] = 0.041667f; xzBasis[24] = -0.041667f;
-        break;
-    case 25:
-        xxBasis[25] = -0.055556f; yyBasis[25] = 0.111111f; zzBasis[25] = 0.111111f;
-        xyBasis[25] = 0.000000f; yzBasis[25] = 0.166667f; xzBasis[25] = 0.000000f;
-        break;
-    case 26:
-        xxBasis[26] = 0.027778f; yyBasis[26] = 0.027778f; zzBasis[26] = 0.027778f;
-        xyBasis[26] = 0.041667f; yzBasis[26] = 0.041667f; xzBasis[26] = 0.041667f;
-        break;
-    }
-}
-/* *************************************************************** */
 __device__ float4 GetSlidedValues(int x, int y,
                                   cudaTextureObject_t deformationFieldTexture,
                                   const int3& referenceImageDim,
@@ -463,250 +322,6 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
     deformationField[tid] = displacement;
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues,
-                                                        cudaTextureObject_t controlPointTexture,
-                                                        const int3 controlPointImageDim,
-                                                        const unsigned controlPointNumber) {
-    __shared__ float xxbasis[9];
-    __shared__ float yybasis[9];
-    __shared__ float xybasis[9];
-
-    if (threadIdx.x < 9)
-        GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis);
-    __syncthreads();
-
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        float4 xx{}, yy{}, xy{};
-        unsigned tempIndex;
-        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) {
-            tempIndex = 0;
-            for (int b = y - 1; b < y + 2; ++b) {
-                for (int a = x - 1; a < x + 2; ++a) {
-                    const int indexXY = b * controlPointImageDim.x + a;
-                    const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXY);
-                    xx.x += xxbasis[tempIndex] * controlPointValues.x;
-                    xx.y += xxbasis[tempIndex] * controlPointValues.y;
-                    yy.x += yybasis[tempIndex] * controlPointValues.x;
-                    yy.y += yybasis[tempIndex] * controlPointValues.y;
-                    xy.x += xybasis[tempIndex] * controlPointValues.x;
-                    xy.y += xybasis[tempIndex] * controlPointValues.y;
-                    tempIndex++;
-                }
-            }
-        }
-
-        tempIndex = 3 * tid;
-        secondDerivativeValues[tempIndex++] = xx;
-        secondDerivativeValues[tempIndex++] = yy;
-        secondDerivativeValues[tempIndex] = xy;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues,
-                                                        cudaTextureObject_t controlPointTexture,
-                                                        const int3 controlPointImageDim,
-                                                        const unsigned controlPointNumber) {
-    __shared__ float xxbasis[27];
-    __shared__ float yybasis[27];
-    __shared__ float zzbasis[27];
-    __shared__ float xybasis[27];
-    __shared__ float yzbasis[27];
-    __shared__ float xzbasis[27];
-
-    if (threadIdx.x < 27)
-        GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis);
-    __syncthreads();
-
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        int tempIndex = tid;
-        int quot, rem;
-        reg_div_cuda(tempIndex, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        float4 xx{}, yy{}, zz{}, xy{}, yz{}, xz{};
-        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) {
-            tempIndex = 0;
-            for (int c = z - 1; c < z + 2; ++c) {
-                for (int b = y - 1; b < y + 2; ++b) {
-                    for (int a = x - 1; a < x + 2; ++a) {
-                        const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a;
-                        const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
-                        xx = xx + xxbasis[tempIndex] * controlPointValues;
-                        yy = yy + yybasis[tempIndex] * controlPointValues;
-                        zz = zz + zzbasis[tempIndex] * controlPointValues;
-                        xy = xy + xybasis[tempIndex] * controlPointValues;
-                        yz = yz + yzbasis[tempIndex] * controlPointValues;
-                        xz = xz + xzbasis[tempIndex] * controlPointValues;
-                        tempIndex++;
-                    }
-                }
-            }
-        }
-
-        tempIndex = 6 * tid;
-        secondDerivativeValues[tempIndex++] = xx;
-        secondDerivativeValues[tempIndex++] = yy;
-        secondDerivativeValues[tempIndex++] = zz;
-        secondDerivativeValues[tempIndex++] = xy;
-        secondDerivativeValues[tempIndex++] = yz;
-        secondDerivativeValues[tempIndex] = xz;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm,
-                                                           cudaTextureObject_t secondDerivativesTexture,
-                                                           const unsigned controlPointNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        unsigned index = tid * 3;
-        float4 xx = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xx = xx * xx;
-        float4 yy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yy = yy * yy;
-        float4 xy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xy = xy * xy;
-        penaltyTerm[tid] = xx.x + xx.y + yy.x + yy.y + 2.f * (xy.x + xy.y);
-    }
-}
-/* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm,
-                                                           cudaTextureObject_t secondDerivativesTexture,
-                                                           const unsigned controlPointNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        unsigned index = tid * 6;
-        float4 xx = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xx = xx * xx;
-        float4 yy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yy = yy * yy;
-        float4 zz = tex1Dfetch<float4>(secondDerivativesTexture, index++);  zz = zz * zz;
-        float4 xy = tex1Dfetch<float4>(secondDerivativesTexture, index++);  xy = xy * xy;
-        float4 yz = tex1Dfetch<float4>(secondDerivativesTexture, index++);  yz = yz * yz;
-        float4 xz = tex1Dfetch<float4>(secondDerivativesTexture, index);    xz = xz * xz;
-        penaltyTerm[tid] = xx.x + xx.y + xx.z + yy.x + yy.y + yy.z + zz.x + zz.y + zz.z +
-            2.f * (xy.x + xy.y + xy.z + yz.x + yz.y + yz.z + xz.x + xz.y + xz.z);
-    }
-}
-/* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradient,
-                                                                   cudaTextureObject_t secondDerivativesTexture,
-                                                                   const int3 controlPointImageDim,
-                                                                   const unsigned controlPointNumber,
-                                                                   const float weight) {
-    __shared__ float xxbasis[9];
-    __shared__ float yybasis[9];
-    __shared__ float xybasis[9];
-
-    if (threadIdx.x < 9)
-        GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis);
-    __syncthreads();
-
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        float2 gradientValue{};
-        float4 secondDerivativeValues;
-        int coord = 0;
-        for (int b = y - 1; b < y + 2; ++b) {
-            for (int a = x - 1; a < x + 2; ++a) {
-                if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y) {
-                    int indexXY = 3 * (b * controlPointImageDim.x + a);
-                    secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXY++); // XX
-                    gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
-                    gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
-                    secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXY++); // YY
-                    gradientValue.x += secondDerivativeValues.x * yybasis[coord];
-                    gradientValue.y += secondDerivativeValues.y * yybasis[coord];
-                    secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXY); // XY
-                    gradientValue.x += secondDerivativeValues.x * xybasis[coord];
-                    gradientValue.y += secondDerivativeValues.y * xybasis[coord];
-                }
-                coord++;
-            }
-        }
-
-        nodeGradient[tid].x += weight * gradientValue.x;
-        nodeGradient[tid].y += weight * gradientValue.y;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradient,
-                                                                   cudaTextureObject_t secondDerivativesTexture,
-                                                                   const int3 controlPointImageDim,
-                                                                   const unsigned controlPointNumber,
-                                                                   const float weight) {
-    __shared__ float xxbasis[27];
-    __shared__ float yybasis[27];
-    __shared__ float zzbasis[27];
-    __shared__ float xybasis[27];
-    __shared__ float yzbasis[27];
-    __shared__ float xzbasis[27];
-
-    if (threadIdx.x < 27)
-        GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis);
-    __syncthreads();
-
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < controlPointNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        float3 gradientValue{};
-        float4 secondDerivativeValues;
-        int coord = 0;
-        for (int c = z - 1; c < z + 2; ++c) {
-            for (int b = y - 1; b < y + 2; ++b) {
-                for (int a = x - 1; a < x + 2; ++a) {
-                    if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y && -1 < c && c < controlPointImageDim.z) {
-                        unsigned indexXYZ = 6 * ((c * controlPointImageDim.y + b) * controlPointImageDim.x + a);
-                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // XX
-                        gradientValue.x += secondDerivativeValues.x * xxbasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * xxbasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * xxbasis[coord];
-                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // YY
-                        gradientValue.x += secondDerivativeValues.x * yybasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * yybasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * yybasis[coord];
-                        secondDerivativeValues = tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // ZZ
-                        gradientValue.x += secondDerivativeValues.x * zzbasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * zzbasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * zzbasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // XY
-                        gradientValue.x += secondDerivativeValues.x * xybasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * xybasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * xybasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++); // YZ
-                        gradientValue.x += secondDerivativeValues.x * yzbasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * yzbasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * yzbasis[coord];
-                        secondDerivativeValues = 2.f * tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ); // XZ
-                        gradientValue.x += secondDerivativeValues.x * xzbasis[coord];
-                        gradientValue.y += secondDerivativeValues.y * xzbasis[coord];
-                        gradientValue.z += secondDerivativeValues.z * xzbasis[coord];
-                    }
-                    coord++;
-                }
-            }
-        }
-        gradientValue = weight * gradientValue;
-
-        float4 metricGradientValue = nodeGradient[tid];
-        metricGradientValue.x += gradientValue.x;
-        metricGradientValue.y += gradientValue.y;
-        metricGradientValue.z += gradientValue.z;
-        nodeGradient[tid] = metricGradientValue;
-    }
-}
-/* *************************************************************** */
 __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices,
                                                             float *jacobianDet,
                                                             cudaTextureObject_t controlPointTexture,
@@ -1464,31 +1079,6 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *image,
-                                                            const int3 imageDim,
-                                                            const unsigned voxelNumber,
-                                                            const mat44 affineMatrix,
-                                                            const bool reverse = false) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, imageDim.x * imageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, imageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        const float4 initialPosition = {
-            x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2] + affineMatrix.m[0][3],
-            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2] + affineMatrix.m[1][3],
-            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2] + affineMatrix.m[2][3],
-            0.f
-        };
-
-        // If reverse, gets displacement from deformation
-        image[tid] = image[tid] + (reverse ? -1 : 1) * initialPosition;
-    }
-}
-/* *************************************************************** */
 __global__ void reg_defField_compose2D_kernel(float4 *deformationField,
                                               cudaTextureObject_t deformationFieldTexture,
                                               const int3 referenceImageDim,
@@ -1643,15 +1233,20 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices,
     }
 }
 /* *************************************************************** */
-struct Basis {
+template<bool is3d>
+struct Basis1st {
     float x[27], y[27], z[27];
 };
+template<>
+struct Basis1st<false> {
+    float x[9], y[9];
+};
 /* *************************************************************** */
 template<bool is3d>
 __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                                                  cudaTextureObject_t controlPointGridTexture,
                                                  const int3& cppDims,
-                                                 const Basis& basis,
+                                                 const Basis1st<is3d>& basis,
                                                  const mat33& reorientation) {
     const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
@@ -1665,7 +1260,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                 const int yInd = (zInd + y + b) * cppDims.x;
                 for (int a = -1; a < 2; a++, basInd++) {
                     const int index = yInd + x + a;
-                    const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                    const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                     matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                     matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1687,7 +1282,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
             const int yInd = (y + b) * cppDims.x;
             for (int a = -1; a < 2; a++, basInd++) {
                 const int index = yInd + x + a;
-                const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                 matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                 matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1712,7 +1307,7 @@ template<bool is3d>
 __global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices,
                                                              cudaTextureObject_t controlPointGridTexture,
                                                              const int3 cppDims,
-                                                             const Basis basis,
+                                                             const Basis1st<is3d> basis,
                                                              const mat33 reorientation,
                                                              const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
@@ -1725,7 +1320,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
                                                              cudaTextureObject_t dispMatricesTexture,
                                                              const int3 cppDims,
                                                              const float approxRatio,
-                                                             const Basis basis,
+                                                             const Basis1st<is3d> basis,
                                                              const mat33 invReorientation,
                                                              const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;

From f021929edf45dabdf867944d16608a440b296dab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 6 Nov 2023 18:39:07 +0000
Subject: [PATCH 240/314] Add regression tests for approximate bending energy
 and approximate bending energy gradient #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       |   1 +
 ..._test_regr_approxBendingEnergyGradient.cpp | 154 ++++++++++++++++++
 ...g_test_regr_approxLinearEnergyGradient.cpp |   2 +-
 4 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 reg-test/reg_test_regr_approxBendingEnergyGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b4eed3b8..cf7ff50f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-358
+359
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 941ed995..b08293d5 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -124,6 +124,7 @@ set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST})
 set(EXEC_LIST reg_test_regr_getDeformationField ${EXEC_LIST})
 set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST})
 if(USE_CUDA)
+  set(EXEC_LIST reg_test_regr_approxBendingEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp
new file mode 100644
index 00000000..a2a01bdf
--- /dev/null
+++ b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp
@@ -0,0 +1,154 @@
+#include "reg_test_common.h"
+#include "CudaF3dContent.h"
+
+/**
+ *  Approximate bending energy and approximate bending energy gradient regression tests
+ *  to ensure the CPU and CUDA versions yield the same output
+**/
+
+class ApproxBendingEnergyGradientTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage&, NiftiImage&, NiftiImage&, float>;
+    using TestCase = std::tuple<std::string, double, double, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ApproxBendingEnergyGradientTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(0, 10);
+
+        // Create 2D reference, floating and control point grid images
+        constexpr NiftiImage::dim_t size = 4;
+        vector<NiftiImage::dim_t> dim{ size, size };
+        NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage controlPointGrid = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid2d[3]{ controlPointGrid, controlPointGrid, controlPointGrid };
+
+        // Create 3D reference, floating and control point grid images
+        dim.push_back(size);
+        NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
+        NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
+        controlPointGrid = CreateControlPointGrid(reference3d);
+        NiftiImage controlPointGrid3d[3]{ controlPointGrid, controlPointGrid, controlPointGrid };
+
+        // Fill control point grids with random values
+        for (int i = 0; i < 3; i++) {
+            auto controlPointGridPtr = controlPointGrid2d[i].data();
+            for (size_t j = 0; j < controlPointGrid2d[i].nVoxels(); j++)
+                controlPointGridPtr[j] = distr(gen);
+            controlPointGridPtr = controlPointGrid3d[i].data();
+            for (size_t j = 0; j < controlPointGrid3d[i].nVoxels(); j++)
+                controlPointGridPtr[j] = distr(gen);
+        }
+
+        // Create the data container for the regression test
+        vector<TestData> testData;
+        for (int i = 0; i < 3; i++) {
+            const float weight = distr(gen);
+            testData.emplace_back(TestData(
+                "2D weight: "s + std::to_string(weight),
+                reference2d,
+                floating2d,
+                controlPointGrid2d[i],
+                weight
+            ));
+            testData.emplace_back(TestData(
+                "3D weight: "s + std::to_string(weight),
+                reference3d,
+                floating3d,
+                controlPointGrid3d[i],
+                weight
+            ));
+        }
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, reference, floating, controlPointGrid, weight] = testData;
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage floatingCpu(floating), floatingCuda(floating);
+            NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid);
+
+            // Create the contents
+            unique_ptr<F3dContent> contentCpu{ new F3dContent(
+                referenceCpu,
+                floatingCpu,
+                controlPointGridCpu,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(
+                referenceCuda,
+                floatingCuda,
+                controlPointGridCuda,
+                nullptr,
+                nullptr,
+                nullptr,
+                sizeof(float)
+            ) };
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+            // Compute the approximate bending energy for CPU and CUDA
+            const double approxBendingEnergyCpu = computeCpu->ApproxBendingEnergy();
+            const double approxBendingEnergyCuda = computeCuda->ApproxBendingEnergy();
+
+            // Compute the approximate bending energy gradient for CPU and CUDA
+            computeCpu->ApproxBendingEnergyGradient(weight);
+            computeCuda->ApproxBendingEnergyGradient(weight);
+
+            // Get the transformation gradients
+            NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
+            NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
+
+            // Save for testing
+            testCases.push_back({ testName, approxBendingEnergyCpu, approxBendingEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(ApproxBendingEnergyGradientTest, "Regression Approximate Bending Energy Gradient", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [testName, approxBendingEnergyCpu, approxBendingEnergyCuda, transGradCpu, transGradCuda] = testCase;
+
+        SECTION(testName) {
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the approximate bending energy values
+            NR_COUT << "Approx Bending Energy: " << approxBendingEnergyCpu << " " << approxBendingEnergyCuda << std::endl;
+            REQUIRE(abs(approxBendingEnergyCpu - approxBendingEnergyCuda) < EPS);
+
+            // Check the transformation gradients
+            const auto transGradCpuPtr = transGradCpu.data();
+            const auto transGradCudaPtr = transGradCuda.data();
+            for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) {
+                const float cpuVal = transGradCpuPtr[i];
+                const float cudaVal = transGradCudaPtr[i];
+                const auto diff = abs(cpuVal - cudaVal);
+                if (diff > 0)
+                    NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
+                REQUIRE(diff < EPS);
+            }
+        }
+    }
+}
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
index 1cf5b166..530d404b 100644
--- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -136,7 +136,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear
             // Increase the precision for the output
             NR_COUT << std::fixed << std::setprecision(10);
 
-            // Check the approximate linear energy
+            // Check the approximate linear energy values
             NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl;
             REQUIRE(abs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS);
 

From f4c3c159bff17c0b2e8ad553ef4ef292623a70a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 13 Nov 2023 17:41:18 +0000
Subject: [PATCH 241/314] Refactorisations

---
 niftyreg_build_version.txt         |   2 +-
 reg-apps/reg_benchmark.cpp         |  54 ++++-----
 reg-apps/reg_measure.cpp           |   8 +-
 reg-lib/Debug.hpp                  |   2 +-
 reg-lib/_reg_base.cpp              |  64 +++++------
 reg-lib/_reg_base.h                |   6 +-
 reg-lib/_reg_f3d.cpp               |   8 +-
 reg-lib/_reg_f3d.h                 |   2 +-
 reg-lib/_reg_f3d2.cpp              |   4 +-
 reg-lib/_reg_f3d2.h                |   2 +-
 reg-lib/_reg_polyAffine.cpp        |   4 +-
 reg-lib/_reg_polyAffine.h          |   2 +-
 reg-lib/cpu/_reg_dti.cpp           |   8 +-
 reg-lib/cpu/_reg_dti.h             |   4 +-
 reg-lib/cpu/_reg_kld.cpp           |  48 ++++----
 reg-lib/cpu/_reg_kld.h             |   4 +-
 reg-lib/cpu/_reg_lncc.cpp          |  70 ++++++------
 reg-lib/cpu/_reg_lncc.h            |   4 +-
 reg-lib/cpu/_reg_measure.h         |  31 +++---
 reg-lib/cpu/_reg_mind.cpp          |  63 +++++------
 reg-lib/cpu/_reg_mind.h            |  10 +-
 reg-lib/cpu/_reg_nmi.cpp           | 170 ++++++++++++++---------------
 reg-lib/cpu/_reg_nmi.h             |  21 ++--
 reg-lib/cpu/_reg_polyAffine.cpp    |   4 +-
 reg-lib/cpu/_reg_polyAffine.h      |   2 +-
 reg-lib/cpu/_reg_ssd.cpp           |  65 ++++++-----
 reg-lib/cpu/_reg_ssd.h             |  13 ++-
 reg-lib/cuda/BlockSize.hpp         |  18 +--
 reg-lib/cuda/_reg_measure_gpu.h    |  12 +-
 reg-lib/cuda/_reg_nmi_gpu.cu       |  47 ++++----
 reg-lib/cuda/_reg_nmi_gpu.h        |   8 +-
 reg-lib/cuda/_reg_nmi_kernels.cu   |   6 +-
 reg-lib/cuda/_reg_ssd_gpu.cu       |  14 +--
 reg-lib/cuda/_reg_ssd_gpu.h        |   4 +-
 reg-lib/cuda/_reg_tools_gpu.cu     |   6 +-
 reg-lib/cuda/_reg_tools_gpu.h      |   2 +-
 reg-lib/cuda/_reg_tools_kernels.cu |   2 +-
 reg-test/reg_test_lncc.cpp         |   2 +-
 reg-test/reg_test_nmi.cpp          |   8 +-
 reg-test/reg_test_nmi_gradient.cpp |  10 +-
 reg-test/reg_test_regr_measure.cpp |   6 +-
 41 files changed, 413 insertions(+), 407 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cf7ff50f..2921a158 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-359
+360
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 8f0adff4..52661f88 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -170,14 +170,14 @@ int main(int argc, char **argv)
    resultGradientImage->datatype = NIFTI_TYPE_FLOAT32;
    resultGradientImage->nbyper = sizeof(float);
    resultGradientImage->data = calloc(resultGradientImage->nvox, resultGradientImage->nbyper);
-   nifti_image *voxelNMIGradientImage = nifti_copy_nim_info(deformationFieldImage);
-   voxelNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32;
-   voxelNMIGradientImage->nbyper = sizeof(float);
-   voxelNMIGradientImage->data = calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper);
-   nifti_image *nodeNMIGradientImage = nifti_copy_nim_info(controlPointImage);
-   nodeNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32;
-   nodeNMIGradientImage->nbyper = sizeof(float);
-   nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper);
+   nifti_image *voxelNmiGradientImage = nifti_copy_nim_info(deformationFieldImage);
+   voxelNmiGradientImage->datatype = NIFTI_TYPE_FLOAT32;
+   voxelNmiGradientImage->nbyper = sizeof(float);
+   voxelNmiGradientImage->data = calloc(voxelNmiGradientImage->nvox, voxelNmiGradientImage->nbyper);
+   nifti_image *nodeNmiGradientImage = nifti_copy_nim_info(controlPointImage);
+   nodeNmiGradientImage->datatype = NIFTI_TYPE_FLOAT32;
+   nodeNmiGradientImage->nbyper = sizeof(float);
+   nodeNmiGradientImage->data = calloc(nodeNmiGradientImage->nvox, nodeNmiGradientImage->nbyper);
 
 #ifdef USE_CUDA
    float *targetImageArray_d;
@@ -502,9 +502,9 @@ int main(int argc, char **argv)
 
    // VOXEL-BASED NMI GRADIENT COMPUTATION
 #ifdef USE_CUDA
-   float4 *voxelNMIGradientArray_d;
+   float4 *voxelNmiGradientArray_d;
    if(runGPU)
-      Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim);
+      Cuda::Allocate(&voxelNmiGradientArray_d, resultImage->dim);
 #endif
    {
       maxIt=100000 / dimension;
@@ -512,7 +512,7 @@ int main(int argc, char **argv)
       time(&start);
       for(int i=0; i<maxIt; ++i)
       {
-         reg_getVoxelBasedNMIGradientUsingPW<double>(targetImage,
+         reg_getVoxelBasedNmiGradientUsingPw<double>(targetImage,
                resultImage,
                2,
                resultGradientImage,
@@ -520,7 +520,7 @@ int main(int argc, char **argv)
                &binning,
                logJointHistogram,
                entropies,
-               voxelNMIGradientImage,
+               voxelNmiGradientImage,
                maskImage);
       }
       time(&end);
@@ -544,13 +544,13 @@ int main(int argc, char **argv)
          time(&start);
          for(int i=0; i<maxIt; ++i)
          {
-            reg_getVoxelBasedNMIGradientUsingPW_gpu(targetImage,
+            reg_getVoxelBasedNmiGradientUsingPw_gpu(targetImage,
                                                     resultImage,
                                                     &targetImageArray_d,
                                                     &resultImageArray_d,
                                                     &resultGradientArray_d,
                                                     &logJointHistogram_d,
-                                                    &voxelNMIGradientArray_d,
+                                                    &voxelNmiGradientArray_d,
                                                     &targetMask_d,
                                                     targetImage->nvox,
                                                     entropies,
@@ -580,9 +580,9 @@ int main(int argc, char **argv)
 
    // NODE-BASED NMI GRADIENT COMPUTATION
 #ifdef USE_CUDA
-   float4 *nodeNMIGradientArray_d;
+   float4 *nodeNmiGradientArray_d;
    if(runGPU)
-      Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim);
+      Cuda::Allocate(&nodeNmiGradientArray_d, controlPointImage->dim);
 #endif
    {
       maxIt=10000 / dimension;
@@ -594,8 +594,8 @@ int main(int argc, char **argv)
       time(&start);
       for(int i=0; i<maxIt; ++i)
       {
-         reg_smoothImageForCubicSpline<float>(voxelNMIGradientImage,smoothingRadius);
-         reg_voxelCentricToNodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f);
+         reg_smoothImageForCubicSpline<float>(voxelNmiGradientImage,smoothingRadius);
+         reg_voxelCentricToNodeCentric(nodeNmiGradientImage,voxelNmiGradientImage,1.0f);
       }
       time(&end);
       cpuTime=(end-start);
@@ -610,12 +610,12 @@ int main(int argc, char **argv)
          for(int i=0; i<maxIt; ++i)
          {
             reg_smoothImageForCubicSpline_gpu(resultImage,
-                                              &voxelNMIGradientArray_d,
+                                              &voxelNmiGradientArray_d,
                                               smoothingRadius);
             reg_voxelCentricToNodeCentric_gpu(targetImage,
                                               controlPointImage,
-                                              &voxelNMIGradientArray_d,
-                                              &nodeNMIGradientArray_d,
+                                              &voxelNmiGradientArray_d,
+                                              &nodeNmiGradientArray_d,
                                               1.0f);
          }
          time(&end);
@@ -634,8 +634,8 @@ int main(int argc, char **argv)
 #ifdef USE_CUDA
    if(runGPU)
    {
-      Cuda::Free(voxelNMIGradientArray_d);
-      Cuda::Free(nodeNMIGradientArray_d);
+      Cuda::Free(voxelNmiGradientArray_d);
+      Cuda::Free(nodeNmiGradientArray_d);
    }
 #endif
 
@@ -685,7 +685,7 @@ int main(int argc, char **argv)
       {
          reg_bspline_bendingEnergyGradient<float>(   controlPointImage,
                targetImage,
-               nodeNMIGradientImage,
+               nodeNmiGradientImage,
                0.01f);
       }
       time(&end);
@@ -703,7 +703,7 @@ int main(int argc, char **argv)
             reg_bspline_ApproxBendingEnergyGradient_gpu(targetImage,
                   controlPointImage,
                   &controlPointImageArray_d,
-                  &nodeNMIGradientArray_d,
+                  &nodeNmiGradientArray_d,
                   0.01f);
          }
          time(&end);
@@ -874,8 +874,8 @@ int main(int argc, char **argv)
    nifti_image_free(controlPointImage);
    nifti_image_free(deformationFieldImage);
    nifti_image_free(resultGradientImage);
-   nifti_image_free(voxelNMIGradientImage);
-   nifti_image_free(nodeNMIGradientImage);
+   nifti_image_free(voxelNmiGradientImage);
+   nifti_image_free(nodeNmiGradientImage);
    free(maskImage);
    free(probaJointHistogram);
    free(logJointHistogram);
diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp
index 07f6a60f..611e4170 100755
--- a/reg-apps/reg_measure.cpp
+++ b/reg-apps/reg_measure.cpp
@@ -326,7 +326,7 @@ int main(int argc, char **argv)
    if(flag->returnLNCCFlag){
       reg_lncc *lncc_object=new reg_lncc();
       for(int i=0;i<(refImage->nt<warpedFloImage->nt?refImage->nt:warpedFloImage->nt);++i)
-         lncc_object->SetTimepointWeight(i,1.0);
+         lncc_object->SetTimePointWeight(i,1.0);
       lncc_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
                                     refMask.get(),
@@ -343,7 +343,7 @@ int main(int argc, char **argv)
    if(flag->returnNMIFlag){
       reg_nmi *nmi_object=new reg_nmi();
       for(int i=0;i<(refImage->nt<warpedFloImage->nt?refImage->nt:warpedFloImage->nt);++i)
-        nmi_object->SetTimepointWeight(i, 1.0);
+        nmi_object->SetTimePointWeight(i, 1.0);
       nmi_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
                                     refMask.get(),
@@ -360,7 +360,7 @@ int main(int argc, char **argv)
    if(flag->returnSSDFlag){
       reg_ssd *ssd_object=new reg_ssd();
       for(int i=0;i<(refImage->nt<warpedFloImage->nt?refImage->nt:warpedFloImage->nt);++i)
-        ssd_object->SetTimepointWeight(i, 1.0);
+        ssd_object->SetTimePointWeight(i, 1.0);
       ssd_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
                                     refMask.get(),
@@ -378,7 +378,7 @@ int main(int argc, char **argv)
    if(flag->returnMINDFlag){
       reg_mind *mind_object=new reg_mind();
       for(int i=0;i<(refImage->nt<warpedFloImage->nt?refImage->nt:warpedFloImage->nt);++i)
-        mind_object->SetTimepointWeight(i, 1.0);
+        mind_object->SetTimePointWeight(i, 1.0);
       mind_object->InitialiseMeasure(refImage,
                                     warpedFloImage,
                                     refMask.get(),
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
index 95d1292a..cbd29581 100644
--- a/reg-lib/Debug.hpp
+++ b/reg-lib/Debug.hpp
@@ -16,7 +16,7 @@
 /* *************************************************************** */
 namespace NiftyReg::Internal {
 /* *************************************************************** */
-inline void FatalError(const std::string& fileName, const int& line, const std::string& funcName, const std::string& msg) {
+inline void FatalError(const std::string& fileName, const int line, const std::string& funcName, const std::string& msg) {
     const std::string errMsg = "[NiftyReg ERROR] File: " + fileName + ":" + std::to_string(line) + "\n" +
                                "[NiftyReg ERROR] Function: " + funcName + "\n" +
                                "[NiftyReg ERROR] " + msg + "\n";
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 903f3731..566bc2f9 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -14,7 +14,7 @@
 
 /* *************************************************************** */
 template<class T>
-reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
+reg_base<T>::reg_base(int refTimePoints, int floTimePoints) {
     SetPlatformType(PlatformType::Cpu);
 
     maxIterationNumber = 150;
@@ -28,19 +28,19 @@ reg_base<T>::reg_base(int refTimePoint, int floTimePoint) {
     similarityWeight = 0; // automatically set depending of the penalty term weights
 
     executableName = (char*)"NiftyReg BASE";
-    referenceTimePoint = refTimePoint;
-    floatingTimePoint = floTimePoint;
+    referenceTimePoints = refTimePoints;
+    floatingTimePoints = floTimePoints;
     referenceSmoothingSigma = 0;
     floatingSmoothingSigma = 0;
 
-    referenceThresholdLow.reset(new T[referenceTimePoint]);
-    std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits<T>::lowest());
-    referenceThresholdUp.reset(new T[referenceTimePoint]);
-    std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits<T>::max());
-    floatingThresholdLow.reset(new T[floatingTimePoint]);
-    std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits<T>::lowest());
-    floatingThresholdUp.reset(new T[floatingTimePoint]);
-    std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits<T>::max());
+    referenceThresholdLow.reset(new T[referenceTimePoints]);
+    std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoints, std::numeric_limits<T>::lowest());
+    referenceThresholdUp.reset(new T[referenceTimePoints]);
+    std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoints, std::numeric_limits<T>::max());
+    floatingThresholdLow.reset(new T[floatingTimePoints]);
+    std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoints, std::numeric_limits<T>::lowest());
+    floatingThresholdUp.reset(new T[floatingTimePoints]);
+    std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoints, std::numeric_limits<T>::max());
 
     robustRange = false;
     warpedPaddingValue = std::numeric_limits<T>::quiet_NaN();
@@ -262,7 +262,7 @@ void reg_base<T>::CheckParameters() {
         !measure_kld && !measure_mind && !measure_mindssc) {
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
         for (int i = 0; i < inputReference->nt; ++i)
-            measure_nmi->SetTimepointWeight(i, 1.0);
+            measure_nmi->SetTimePointWeight(i, 1.0);
     }
 
     // Check that images have same number of channels (timepoints)
@@ -280,7 +280,7 @@ void reg_base<T>::CheckParameters() {
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
         if (measure_nmi) {
-            nmiWeights = measure_nmi->GetTimepointsWeights();
+            nmiWeights = measure_nmi->GetTimePointWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (nmiWeights[n] < 0)
@@ -293,7 +293,7 @@ void reg_base<T>::CheckParameters() {
                 NR_WARN_WFCT("The NMI similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_ssd) {
-            ssdWeights = measure_ssd->GetTimepointsWeights();
+            ssdWeights = measure_ssd->GetTimePointWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (ssdWeights[n] < 0)
@@ -306,7 +306,7 @@ void reg_base<T>::CheckParameters() {
                 NR_WARN_WFCT("The SSD similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_kld) {
-            kldWeights = measure_kld->GetTimepointsWeights();
+            kldWeights = measure_kld->GetTimePointWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (kldWeights[n] < 0)
@@ -319,7 +319,7 @@ void reg_base<T>::CheckParameters() {
                 NR_WARN_WFCT("The KLD similarity measure has a weight of 0 for all channels so will be ignored");
         }
         if (measure_lncc) {
-            lnccWeights = measure_lncc->GetTimepointsWeights();
+            lnccWeights = measure_lncc->GetTimePointWeights();
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (lnccWeights[n] < 0)
@@ -335,13 +335,13 @@ void reg_base<T>::CheckParameters() {
             if (chanWeightSum[n] == 0)
                 NR_WARN_WFCT("Channel " << n << " has a weight of 0 for all similarity measures so will be ignored");
             if (measure_nmi)
-                measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum);
+                measure_nmi->SetTimePointWeight(n, nmiWeights[n] / totWeightSum);
             if (measure_ssd)
-                measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum);
+                measure_ssd->SetTimePointWeight(n, ssdWeights[n] / totWeightSum);
             if (measure_kld)
-                measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum);
+                measure_kld->SetTimePointWeight(n, kldWeights[n] / totWeightSum);
             if (measure_lncc)
-                measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum);
+                measure_lncc->SetTimePointWeight(n, lnccWeights[n] / totWeightSum);
         }
     }
 
@@ -560,7 +560,7 @@ template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
     if (!measure_nmi)
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
-    measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
     measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
@@ -571,7 +571,7 @@ template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
     if (!measure_nmi)
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
-    measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
     measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
@@ -582,7 +582,7 @@ template<class T>
 void reg_base<T>::UseSSD(int timepoint, bool normalise) {
     if (!measure_ssd)
         measure_ssd.reset(dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd)));
-    measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_ssd->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
     NR_FUNC_CALLED();
 }
@@ -591,7 +591,7 @@ template<class T>
 void reg_base<T>::UseMIND(int timepoint, int offset) {
     if (!measure_mind)
         measure_mind.reset(dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind)));
-    measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mind->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mind->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
 }
@@ -600,7 +600,7 @@ template<class T>
 void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
     if (!measure_mindssc)
         measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::MindSsc)));
-    measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mindssc->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
     measure_mindssc->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
 }
@@ -609,7 +609,7 @@ template<class T>
 void reg_base<T>::UseKLDivergence(int timepoint) {
     if (!measure_kld)
         measure_kld.reset(dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld)));
-    measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_kld->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -618,7 +618,7 @@ void reg_base<T>::UseLNCC(int timepoint, float stddev) {
     if (!measure_lncc)
         measure_lncc.reset(dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)));
     measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
-    measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
+    measure_lncc->SetTimePointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -638,7 +638,7 @@ void reg_base<T>::UseDTI(bool *timepoint) {
         measure_dti.reset(dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti)));
     for (int i = 0; i < inputReference->nt; ++i) {
         if (timepoint[i])
-            measure_dti->SetTimepointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
+            measure_dti->SetTimePointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
     }
     NR_FUNC_CALLED();
 }
@@ -647,28 +647,28 @@ template<class T>
 void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
     if (!measure_nmi)
         NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set");
-    measure_nmi->SetTimepointWeight(timepoint, weight);
+    measure_nmi->SetTimePointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
     if (!measure_lncc)
         NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set");
-    measure_lncc->SetTimepointWeight(timepoint, weight);
+    measure_lncc->SetTimePointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
     if (!measure_ssd)
         NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set");
-    measure_ssd->SetTimepointWeight(timepoint, weight);
+    measure_ssd->SetTimePointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
     if (!measure_kld)
         NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set");
-    measure_kld->SetTimepointWeight(timepoint, weight);
+    measure_kld->SetTimePointWeight(timepoint, weight);
 }
 /* *************************************************************** */
 template<class T>
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 01155ebe..007f26ec 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -63,8 +63,8 @@ class reg_base: public InterfaceOptimiser {
     NiftiImage localWeightSimInput;
 
     char *executableName;
-    int referenceTimePoint;
-    int floatingTimePoint;
+    int referenceTimePoints;
+    int floatingTimePoints;
     NiftiImage inputReference;
     NiftiImage inputFloating;
     NiftiImage maskImage;
@@ -133,7 +133,7 @@ class reg_base: public InterfaceOptimiser {
     virtual void CorrectTransformation() = 0;
 
 public:
-    reg_base(int refTimePoint, int floTimePoint);
+    reg_base(int refTimePoints, int floTimePoints);
 
     virtual void Run();
     virtual vector<NiftiImage> GetWarpedImage() = 0;
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 6eedbba3..0fece668 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -15,8 +15,8 @@
 
 /* *************************************************************** */
 template<class T>
-reg_f3d<T>::reg_f3d(int refTimePoint, int floTimePoint):
-    reg_base<T>::reg_base(refTimePoint, floTimePoint) {
+reg_f3d<T>::reg_f3d(int refTimePoints, int floTimePoints):
+    reg_base<T>::reg_base(refTimePoints, floTimePoints) {
 
     this->executableName = (char*)"NiftyReg F3D";
     bendingEnergyWeight = 0.001;
@@ -207,7 +207,7 @@ void reg_f3d<T>::Initialise() {
         NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" <<
                    this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]");
         if (this->measure_nmi) {
-            if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
+            if (this->measure_nmi->GetTimePointWeights()[i] > 0) {
                 NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " <<
                            this->measure_nmi->GetReferenceBinNumber()[i] - 4);
             }
@@ -225,7 +225,7 @@ void reg_f3d<T>::Initialise() {
         NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" <<
                    this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]");
         if (this->measure_nmi) {
-            if (this->measure_nmi->GetTimepointsWeights()[i] > 0) {
+            if (this->measure_nmi->GetTimePointWeights()[i] > 0) {
                 NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " <<
                            this->measure_nmi->GetFloatingBinNumber()[i] - 4);
             }
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index 882020b4..a7a793ca 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -63,7 +63,7 @@ class reg_f3d: public reg_base<T> {
     virtual void GetLandmarkDistanceGradient();
 
 public:
-    reg_f3d(int refTimePoint, int floTimePoint);
+    reg_f3d(int refTimePoints, int floTimePoints);
 
     virtual NiftiImage GetControlPointPositionImage();
     virtual vector<NiftiImage> GetWarpedImage() override;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 9df66103..4337dd7f 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -15,8 +15,8 @@
 
 /* *************************************************************** */
 template <class T>
-reg_f3d2<T>::reg_f3d2(int refTimePoint, int floTimePoint):
-    reg_f3d<T>::reg_f3d(refTimePoint, floTimePoint) {
+reg_f3d2<T>::reg_f3d2(int refTimePoints, int floTimePoints):
+    reg_f3d<T>::reg_f3d(refTimePoints, floTimePoints) {
     this->executableName = (char*)"NiftyReg F3D2";
     inverseConsistencyWeight = 0;
     bchUpdate = false;
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index a231ec46..c11c857e 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -66,7 +66,7 @@ class reg_f3d2: public reg_f3d<T> {
     virtual void ExponentiateGradient();
 
 public:
-    reg_f3d2(int refTimePoint, int floTimePoint);
+    reg_f3d2(int refTimePoints, int floTimePoints);
 
     virtual NiftiImage GetBackwardControlPointPositionImage() override;
     virtual vector<NiftiImage> GetWarpedImage() override;
diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp
index 27569d2c..73ed7b97 100644
--- a/reg-lib/_reg_polyAffine.cpp
+++ b/reg-lib/_reg_polyAffine.cpp
@@ -15,8 +15,8 @@
 /* *************************************************************** */
 /* *************************************************************** */
 template <class T>
-reg_polyAffine<T>::reg_polyAffine(int refTimePoint,int floTimePoint)
-   : reg_base<T>::reg_base(refTimePoint,floTimePoint)
+reg_polyAffine<T>::reg_polyAffine(int refTimePoints,int floTimePoints)
+   : reg_base<T>::reg_base(refTimePoints,floTimePoints)
 {
    this->executableName=(char *)"NiftyReg PolyAffine";
    NR_FUNC_CALLED();
diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h
index dbbc831a..28a7f5ff 100644
--- a/reg-lib/_reg_polyAffine.h
+++ b/reg-lib/_reg_polyAffine.h
@@ -34,7 +34,7 @@ class reg_polyAffine : public reg_base<T>
    void DeallocateTransformationGradient();
 
 public:
-   reg_polyAffine(int refTimePoint,int floTimePoint);
+   reg_polyAffine(int refTimePoints,int floTimePoints);
    ~reg_polyAffine();
 };
 
diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp
index 7e563abe..c702c241 100755
--- a/reg-lib/cpu/_reg_dti.cpp
+++ b/reg-lib/cpu/_reg_dti.cpp
@@ -48,9 +48,9 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg,
 
     int j = 0;
     for (int i = 0; i < refImg->nt; ++i) {
-        // JM - note, the specific value of timePointWeight is not used for DTI images
+        // JM - note, the specific value of timePointWeights is not used for DTI images
         // any value > 0 indicates the 'time point' is active
-        if (this->timePointWeight[i] > 0) {
+        if (this->timePointWeights[i] > 0) {
             this->dtIndicies[j++] = i;
             NR_DEBUG("Active time point: " << i);
         }
@@ -258,7 +258,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
-void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->warpedImage,
                                              this->warpedGradient,
@@ -267,7 +267,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
                                              this->dtIndicies);
 }
 /* *************************************************************** */
-void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->warpedImageBw,
                                              this->warpedGradientBw,
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 1f96c167..83fd60fa 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -42,9 +42,9 @@ class reg_dti: public reg_measure {
     /// @brief Returns the dti value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based gradient for DTI images forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based gradient for DTI images backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
 
 protected:
     // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index cf3f5deb..68de1aa8 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -46,8 +46,8 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
         NR_FATAL_ERROR("This number of time point should be the same for both input images");
 
     // Input images are expected to be bounded between 0 and 1 as they are meant to be probabilities
-    for (int t = 0; t < this->referenceImage->nt; ++t) {
-        if (this->timePointWeight[t] > 0) {
+    for (int t = 0; t < this->referenceTimePoints; ++t) {
+        if (this->timePointWeights[t] > 0) {
             const float minRef = reg_tools_getMinValue(this->referenceImage, t);
             const float maxRef = reg_tools_getMaxValue(this->referenceImage, t);
             const float minFlo = reg_tools_getMinValue(this->floatingImage, t);
@@ -57,15 +57,15 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
         }
     }
 
-    for (int i = 0; i < this->referenceImage->nt; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    for (int i = 0; i < this->referenceTimePoints; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 /** @brief Computes and returns the KLD between two input image
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
- * @param timePointWeight Array that contains the weight of each time point
+ * @param timePointWeights Array that contains the weight of each time point
  * @param jacobianDetImg Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the KLD. The argument is ignored if the
@@ -77,7 +77,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
 template <class DataType>
 double reg_getKLDivergence(const nifti_image *referenceImage,
                            const nifti_image *warpedImage,
-                           const double *timePointWeight,
+                           const double *timePointWeights,
                            const nifti_image *jacobianDetImg,
                            const int *mask) {
 #ifdef _WIN32
@@ -94,7 +94,7 @@ double reg_getKLDivergence(const nifti_image *referenceImage,
     double measure = 0, measureTp = 0, num = 0;
 
     for (int time = 0; time < referenceImage->nt; ++time) {
-        if (timePointWeight[time] > 0) {
+        if (timePointWeights[time] > 0) {
             const DataType *currentRefPtr = &refPtr[time * voxelNumber];
             const DataType *currentWarPtr = &warPtr[time * voxelNumber];
 #ifdef _OPENMP
@@ -114,7 +114,7 @@ double reg_getKLDivergence(const nifti_image *referenceImage,
                     }
                 }
             }
-            measure += measureTp * timePointWeight[time] / num;
+            measure += measureTp * timePointWeights[time] / num;
         }
     }
     return measure;
@@ -122,14 +122,14 @@ double reg_getKLDivergence(const nifti_image *referenceImage,
 /* *************************************************************** */
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const nifti_image *warpedImage,
-                                 const double *timePointWeight,
+                                 const double *timePointWeights,
                                  const nifti_image *jacobianDetImg,
                                  const int *mask) {
     return std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         return reg_getKLDivergence<RefImgDataType>(referenceImage,
                                                    warpedImage,
-                                                   timePointWeight,
+                                                   timePointWeights,
                                                    jacobianDetImg,
                                                    mask);
     }, NiftiImage::getFloatingDataType(referenceImage));
@@ -138,7 +138,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
 double reg_kld::GetSimilarityMeasureValueFw() {
     return ::GetSimilarityMeasureValue(this->referenceImage,
                                        this->warpedImage,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
                                        nullptr, // TODO this->forwardJacDetImagePointer,
                                        this->referenceMask);
 }
@@ -146,7 +146,7 @@ double reg_kld::GetSimilarityMeasureValueFw() {
 double reg_kld::GetSimilarityMeasureValueBw() {
     return ::GetSimilarityMeasureValue(this->floatingImage,
                                        this->warpedImageBw,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
                                        nullptr, // TODO this->backwardJacDetImagePointer,
                                        this->floatingMask);
 }
@@ -163,7 +163,7 @@ double reg_kld::GetSimilarityMeasureValueBw() {
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
  * should be considered
- * @param currentTimepoint Specified which time point volumes have to be considered
+ * @param currentTimePoint Specified which time point volumes have to be considered
  * @param timepointWeight Weight of the current time point
  */
 template <class DataType>
@@ -173,7 +173,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                                            nifti_image *measureGradient,
                                            const nifti_image *jacobianDetImg,
                                            const int *mask,
-                                           const int currentTimepoint,
+                                           const int currentTimePoint,
                                            const double timepointWeight) {
 #ifdef _WIN32
     long voxel;
@@ -184,8 +184,8 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
 #endif
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
-    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber];
+    const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber];
     const DataType *jacPtr = jacobianDetImg ? static_cast<DataType*>(jacobianDetImg->data) : nullptr;
 
     // Create pointers to the spatial gradient of the current warped volume
@@ -262,7 +262,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                             nifti_image *voxelBasedGradient,
                                             nifti_image *jacobianDetImg,
                                             int *mask,
-                                            int currentTimepoint,
+                                            int currentTimePoint,
                                             double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -272,30 +272,30 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                                               voxelBasedGradient,
                                                               jacobianDetImg,
                                                               mask,
-                                                              currentTimepoint,
+                                                              currentTimePoint,
                                                               timepointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
-void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->warpedImage,
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
                                              nullptr, // TODO this->forwardJacDetImagePointer,
                                              this->referenceMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
-void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->warpedImageBw,
                                              this->warpedGradientBw,
                                              this->voxelBasedGradientBw,
                                              nullptr, // TODO this->backwardJacDetImagePointer,
                                              this->floatingMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h
index 1f4b30de..e484b328 100755
--- a/reg-lib/cpu/_reg_kld.h
+++ b/reg-lib/cpu/_reg_kld.h
@@ -39,8 +39,8 @@ class reg_kld: public reg_measure {
     /// @brief Returns the kld value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based kld gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based kld gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
 };
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 76145602..9b823da1 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -100,8 +100,8 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
                                    warpedGradBw,
                                    voxelBasedGradBw);
 
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        if (this->timePointWeight[i] > 0) {
+    for (int i = 0; i < this->referenceTimePoints; ++i) {
+        if (this->timePointWeights[i] > 0) {
             reg_intensityRescale(this->referenceImage, i, 0.f, 1.f);
             reg_intensityRescale(this->floatingImage, i, 0.f, 1.f);
         }
@@ -186,8 +186,8 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
         this->backwardMask = (int*)malloc(voxelNumber * sizeof(int));
     }
 
-    for (int i = 0; i < this->referenceImage->nt; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    for (int i = 0; i < this->referenceTimePoints; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -202,7 +202,7 @@ void UpdateLocalStatImages(const nifti_image *refImage,
                            int *combinedMask,
                            const float *kernelStandardDeviation,
                            const ConvKernelType kernelType,
-                           const int currentTimepoint) {
+                           const int currentTimePoint) {
     // Generate the combined mask to ignore all NaN values
 #ifdef _WIN32
     long voxel;
@@ -218,8 +218,8 @@ void UpdateLocalStatImages(const nifti_image *refImage,
     const DataType *origRefPtr = static_cast<DataType*>(refImage->data);
     DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     DataType *sdevImgPtr = static_cast<DataType*>(sdevImage->data);
-    memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
-    memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper);
+    memcpy(meanImgPtr, &origRefPtr[currentTimePoint * voxelNumber], voxelNumber * refImage->nbyper);
+    memcpy(sdevImgPtr, &origRefPtr[currentTimePoint * voxelNumber], voxelNumber * refImage->nbyper);
 
     reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage);
     reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask);
@@ -228,8 +228,8 @@ void UpdateLocalStatImages(const nifti_image *refImage,
     const DataType *origWarPtr = static_cast<DataType*>(warImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
     DataType *warSdevPtr = static_cast<DataType*>(warpedSdevImage->data);
-    memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
-    memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper);
+    memcpy(warMeanPtr, &origWarPtr[currentTimePoint * voxelNumber], voxelNumber * warImage->nbyper);
+    memcpy(warSdevPtr, &origWarPtr[currentTimePoint * voxelNumber], voxelNumber * warImage->nbyper);
 
     reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage);
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
@@ -259,7 +259,7 @@ double reg_getLnccValue(const nifti_image *referenceImage,
                         const float *kernelStandardDeviation,
                         nifti_image *correlationImage,
                         const ConvKernelType kernelType,
-                        const int currentTimepoint) {
+                        const int currentTimePoint) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -269,10 +269,10 @@ double reg_getLnccValue(const nifti_image *referenceImage,
 #endif
     // Compute the local correlation
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber];
 
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber];
 
     const DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     const DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
@@ -319,11 +319,11 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const float *kernelStandardDeviation,
                                  nifti_image *correlationImage,
                                  const ConvKernelType kernelType,
-                                 const int referenceTimePoint,
-                                 const double *timePointWeight) {
+                                 const int referenceTimePoints,
+                                 const double *timePointWeights) {
     double lncc = 0;
-    for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) {
-        if (timePointWeight[currentTimepoint] > 0) {
+    for (int currentTimePoint = 0; currentTimePoint < referenceTimePoints; ++currentTimePoint) {
+        if (timePointWeights[currentTimePoint] > 0) {
             const double tp = std::visit([&](auto&& refImgDataType) {
                 using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
                 // Compute the mean and variance of the reference and warped floating
@@ -337,7 +337,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                                       forwardMask,
                                                       kernelStandardDeviation,
                                                       kernelType,
-                                                      currentTimepoint);
+                                                      currentTimePoint);
                 // Compute the LNCC value
                 return reg_getLnccValue<RefImgDataType>(referenceImage,
                                                         meanImage,
@@ -349,9 +349,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                                         kernelStandardDeviation,
                                                         correlationImage,
                                                         kernelType,
-                                                        currentTimepoint);
+                                                        currentTimePoint);
             }, NiftiImage::getFloatingDataType(referenceImage));
-            lncc += tp * timePointWeight[currentTimepoint];
+            lncc += tp * timePointWeights[currentTimePoint];
         }
     }
     return lncc;
@@ -369,8 +369,8 @@ double reg_lncc::GetSimilarityMeasureValueFw() {
                                        this->kernelStandardDeviation,
                                        this->correlationImage,
                                        this->kernelType,
-                                       this->referenceTimePoint,
-                                       this->timePointWeight);
+                                       this->referenceTimePoints,
+                                       this->timePointWeights);
 }
 /* *************************************************************** */
 double reg_lncc::GetSimilarityMeasureValueBw() {
@@ -385,8 +385,8 @@ double reg_lncc::GetSimilarityMeasureValueBw() {
                                        this->kernelStandardDeviation,
                                        this->correlationImageBw,
                                        this->kernelType,
-                                       this->referenceTimePoint,
-                                       this->timePointWeight);
+                                       this->referenceTimePoints,
+                                       this->timePointWeights);
 }
 /* *************************************************************** */
 template <class DataType>
@@ -402,7 +402,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
                                    const nifti_image *warpedGradient,
                                    nifti_image *measureGradient,
                                    const ConvKernelType kernelType,
-                                   const int currentTimepoint,
+                                   const int currentTimePoint,
                                    const double timepointWeight) {
 #ifdef _WIN32
     long voxel;
@@ -413,10 +413,10 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
 #endif
     // Compute the local correlation
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber];
 
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber];
 
     const DataType *meanImgPtr = static_cast<DataType*>(meanImage->data);
     DataType *warMeanPtr = static_cast<DataType*>(warpedMeanImage->data);
@@ -530,7 +530,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             const nifti_image *warpedGradient,
                                             nifti_image *measureGradient,
                                             const ConvKernelType kernelType,
-                                            const int currentTimepoint,
+                                            const int currentTimePoint,
                                             const double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -545,7 +545,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                               forwardMask,
                                               kernelStandardDeviation,
                                               kernelType,
-                                              currentTimepoint);
+                                              currentTimePoint);
         // Compute the LNCC gradient
         reg_getVoxelBasedLnccGradient<RefImgDataType>(referenceImage,
                                                       meanImage,
@@ -559,12 +559,12 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                                       warpedGradient,
                                                       measureGradient,
                                                       kernelType,
-                                                      currentTimepoint,
+                                                      currentTimePoint,
                                                       timepointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
-void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->meanImage,
                                              this->sdevImage,
@@ -578,11 +578,11 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
                                              this->kernelType,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
-void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->meanImageBw,
                                              this->sdevImageBw,
@@ -596,7 +596,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
                                              this->warpedGradientBw,
                                              this->voxelBasedGradientBw,
                                              this->kernelType,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h
index fea5e464..bb3140b9 100644
--- a/reg-lib/cpu/_reg_lncc.h
+++ b/reg-lib/cpu/_reg_lncc.h
@@ -39,9 +39,9 @@ class reg_lncc: public reg_measure {
     /// @brief Returns the lncc value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based lncc gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based lncc gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
     /// @brief Set the kernel standard deviation
     virtual void SetKernelStandardDeviation(int t, float stddev) {
         this->kernelStandardDeviation[t] = stddev;
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 68277bdb..7017548d 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -8,7 +8,6 @@
 #pragma once
 
 #include "_reg_tools.h"
-#include <time.h>
 
 /// @brief Class common to all measure of similarity classes
 class reg_measure {
@@ -34,7 +33,7 @@ class reg_measure {
                                    nifti_image *voxelBasedGradBw = nullptr) {
         this->isSymmetric = false;
         this->referenceImage = refImg;
-        this->referenceTimePoint = this->referenceImage->nt;
+        this->referenceTimePoints = this->referenceImage->nt;
         this->floatingImage = floImg;
         this->referenceMask = refMask;
         this->warpedImage = warpedImg;
@@ -81,15 +80,15 @@ class reg_measure {
     }
 
     /// @brief Compute the forward voxel-based measure of similarity gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) = 0;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) = 0;
     /// @brief Compute the backward voxel-based measure of similarity gradient
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) = 0;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) = 0;
     /// @brief Compute the voxel-based measure of similarity gradient
-    void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) {  // Do not override
+    void GetVoxelBasedSimilarityMeasureGradient(int currentTimePoint) {  // Do not override
         // Check if the specified time point exists and is active
-        if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt)
-            NR_FATAL_ERROR("The specified active timepoint is not defined in the ref/war images");
-        if (this->timePointWeight[currentTimepoint] == 0)
+        if (currentTimePoint < 0 || currentTimePoint >= this->referenceTimePoints)
+            NR_FATAL_ERROR("The specified active time point is not defined in the ref/war images");
+        if (this->timePointWeights[currentTimePoint] == 0)
             return;
         // Check if all required input images are of the same data type
         int dtype = this->referenceImage->datatype;
@@ -100,7 +99,7 @@ class reg_measure {
             this->voxelBasedGradient->datatype != dtype)
             NR_FATAL_ERROR("Input images are expected to be of the same type");
         // Compute the gradient
-        GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint);
+        GetVoxelBasedSimilarityMeasureGradientFw(currentTimePoint);
         if (this->isSymmetric) {
             dtype = this->floatingImage->datatype;
             if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64)
@@ -109,16 +108,16 @@ class reg_measure {
                 this->warpedGradientBw->datatype != dtype ||
                 this->voxelBasedGradientBw->datatype != dtype)
                 NR_FATAL_ERROR("Input images are expected to be of the same type");
-            GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint);
+            GetVoxelBasedSimilarityMeasureGradientBw(currentTimePoint);
         }
         NR_FUNC_CALLED();
     }
     virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {}
-    virtual void SetTimepointWeight(int timepoint, double weight) {
-        this->timePointWeight[timepoint] = weight;
+    virtual void SetTimePointWeight(int timePoint, double weight) {
+        this->timePointWeights[timePoint] = weight;
     }
-    virtual double* GetTimepointsWeights() {
-        return this->timePointWeight;
+    virtual double* GetTimePointWeights() {
+        return this->timePointWeights;
     }
     virtual nifti_image* GetReferenceImage() {
         return this->referenceImage;
@@ -142,6 +141,6 @@ class reg_measure {
     nifti_image *warpedGradientBw;
     nifti_image *voxelBasedGradientBw;
 
-    double timePointWeight[255] = {0};
-    int referenceTimePoint;
+    double timePointWeights[255]{};
+    int referenceTimePoints;
 };
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 375bc917..30e15cff 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -58,7 +58,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
                                 nifti_image *mindImage,
                                 const int *mask,
                                 const int& descriptorOffset,
-                                const int& currentTimepoint) {
+                                const int& currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -75,7 +75,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber];
+    currentInputImage->data = &inputImagePtr[currentTimePoint * voxelNumber];
 
     // Allocate an image to store the mean image
     nifti_image *meanImage = nifti_dup(*currentInputImage, false);
@@ -148,12 +148,12 @@ void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
                             const int& descriptorOffset,
-                            const int& currentTimepoint) {
+                            const int& currentTimePoint) {
     if (inputImage->datatype != mindImage->datatype)
         NR_FATAL_ERROR("The input image and the MIND image must have the same datatype");
     std::visit([&](auto&& imgType) {
         using ImgType = std::decay_t<decltype(imgType)>;
-        GetMindImageDescriptorCore<ImgType>(inputImage, mindImage, mask, descriptorOffset, currentTimepoint);
+        GetMindImageDescriptorCore<ImgType>(inputImage, mindImage, mask, descriptorOffset, currentTimePoint);
     }, NiftiImage::getFloatingDataType(inputImage));
     NR_FUNC_CALLED();
 }
@@ -163,7 +163,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
                                    nifti_image *mindSscImage,
                                    const int *mask,
                                    const int& descriptorOffset,
-                                   const int& currentTimepoint) {
+                                   const int& currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -180,7 +180,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
     currentInputImage->nt = currentInputImage->dim[4] = 1;
     currentInputImage->nvox = voxelNumber;
     DataType *inputImagePtr = static_cast<DataType*>(inputImage->data);
-    currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber];
+    currentInputImage->data = &inputImagePtr[currentTimePoint * voxelNumber];
 
     // Allocate an image to store the mean image
     nifti_image *meanImg = nifti_dup(*currentInputImage, false);
@@ -272,12 +272,12 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
                                const int& descriptorOffset,
-                               const int& currentTimepoint) {
+                               const int& currentTimePoint) {
     if (inputImage->datatype != mindSscImage->datatype)
         NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!");
     std::visit([&](auto&& imgType) {
         using ImgType = std::decay_t<decltype(imgType)>;
-        GetMindSscImageDescriptorCore<ImgType>(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint);
+        GetMindSscImageDescriptorCore<ImgType>(inputImage, mindSscImage, mask, descriptorOffset, currentTimePoint);
     }, NiftiImage::getFloatingDataType(inputImage));
     NR_FUNC_CALLED();
 }
@@ -378,13 +378,13 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg,
     }
 
     for (int i = 0; i < referenceImageDescriptor->nt; ++i) {
-        this->timePointWeightDescriptor[i] = 1.0;
+        this->timePointWeightsDescriptor[i] = 1.0;
     }
 
 #ifndef NDEBUG
     std::string msg = "Active time point:";
     for (int i = 0; i < this->referenceImageDescriptor->nt; ++i)
-        if (this->timePointWeightDescriptor[i] > 0)
+        if (this->timePointWeightsDescriptor[i] > 0)
             msg += " " + std::to_string(i);
     NR_DEBUG(msg);
     NR_FUNC_CALLED();
@@ -396,11 +396,11 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
                                  const int *referenceMask,
                                  nifti_image *warpedImage,
                                  nifti_image *warpedFloatingImageDescriptor,
-                                 const double *timePointWeight,
-                                 double *timePointWeightDescriptor,
+                                 const double *timePointWeights,
+                                 double *timePointWeightsDescriptor,
                                  nifti_image *jacobianDetImage,
                                  const int descriptorOffset,
-                                 const int referenceTimePoint,
+                                 const int referenceTimePoints,
                                  const int mindType) {
     if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 &&
         referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64)
@@ -411,20 +411,21 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
     unique_ptr<int[]> combinedMask(new int[voxelNumber]);
     auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
 
-    for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) {
-        if (timePointWeight[currentTimepoint] > 0) {
+    for (int currentTimePoint = 0; currentTimePoint < referenceTimePoints; ++currentTimePoint) {
+        if (timePointWeights[currentTimePoint] > 0) {
             memcpy(combinedMask.get(), referenceMask, voxelNumber * sizeof(int));
             reg_tools_removeNanFromMask(referenceImage, combinedMask.get());
             reg_tools_removeNanFromMask(warpedImage, combinedMask.get());
 
-            GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint);
-            GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint);
+            GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
+            GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
 
             std::visit([&](auto&& refImgDataType) {
                 using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
                 mind += reg_getSsdValue<RefImgDataType>(referenceImageDescriptor,
                                                         warpedFloatingImageDescriptor,
-                                                        timePointWeightDescriptor,
+                                                        timePointWeightsDescriptor,
+                                                        referenceTimePoints,
                                                         jacobianDetImage,
                                                         combinedMask.get(),
                                                         nullptr);
@@ -440,11 +441,11 @@ double reg_mind::GetSimilarityMeasureValueFw() {
                                        this->referenceMask,
                                        this->warpedImage,
                                        this->warpedFloatingImageDescriptor,
-                                       this->timePointWeight,
-                                       this->timePointWeightDescriptor,
+                                       this->timePointWeights,
+                                       this->timePointWeightsDescriptor,
                                        nullptr, // TODO this->forwardJacDetImagePointer,
                                        this->descriptorOffset,
-                                       this->referenceTimePoint,
+                                       this->referenceTimePoints,
                                        this->mindType);
 }
 /* *************************************************************** */
@@ -454,11 +455,11 @@ double reg_mind::GetSimilarityMeasureValueBw() {
                                        this->floatingMask,
                                        this->warpedImageBw,
                                        this->warpedReferenceImageDescriptor,
-                                       this->timePointWeight,
-                                       this->timePointWeightDescriptor,
+                                       this->timePointWeights,
+                                       this->timePointWeightsDescriptor,
                                        nullptr, // TODO this->backwardJacDetImagePointer,
                                        this->descriptorOffset,
-                                       this->referenceTimePoint,
+                                       this->referenceTimePoints,
                                        this->mindType);
 }
 /* *************************************************************** */
@@ -472,7 +473,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                             const int mindType,
                                             const int descriptorOffset,
                                             const int descriptorNumber,
-                                            const int currentTimepoint) {
+                                            const int currentTimePoint) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     vector<int> combinedMask(referenceMask, referenceMask + voxelNumber);
     reg_tools_removeNanFromMask(referenceImage, combinedMask.data());
@@ -480,9 +481,9 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
 
     auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
     // Compute the reference image descriptors
-    GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint);
+    GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
     // Compute the warped floating image descriptors
-    GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint);
+    GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
 
     for (int descIndex = 0; descIndex < descriptorNumber; ++descIndex) {
         // Compute the warped image descriptors gradient
@@ -508,7 +509,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->referenceImageDescriptor,
                                              this->referenceMask,
@@ -519,10 +520,10 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
                                              this->mindType,
                                              this->descriptorOffset,
                                              this->descriptorNumber,
-                                             currentTimepoint);
+                                             currentTimePoint);
 }
 /* *************************************************************** */
-void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->floatingImageDescriptor,
                                              this->floatingMask,
@@ -533,7 +534,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
                                              this->mindType,
                                              this->descriptorOffset,
                                              this->descriptorNumber,
-                                             currentTimepoint);
+                                             currentTimePoint);
 }
 /* *************************************************************** */
 reg_mindssc::reg_mindssc(): reg_mind() {
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 92e08eeb..b32dee3e 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -45,9 +45,9 @@ class reg_mind: public reg_ssd {
     /// @brief Returns the backward mind-based measure of similarity value
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based mind gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based mind gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
     virtual void SetDescriptorOffset(int val) { this->descriptorOffset = val; }
     virtual int GetDescriptorOffset() { return this->descriptorOffset; }
 
@@ -56,7 +56,7 @@ class reg_mind: public reg_ssd {
     nifti_image *floatingImageDescriptor;
     nifti_image *warpedReferenceImageDescriptor;
     nifti_image *warpedFloatingImageDescriptor;
-    double timePointWeightDescriptor[255]{};
+    double timePointWeightsDescriptor[255]{};
     int descriptorOffset;
     int mindType;
     int descriptorNumber;
@@ -75,11 +75,11 @@ void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
                             const int& descriptorOffset,
-                            const int& currentTimepoint);
+                            const int& currentTimePoint);
 /* *************************************************************** */
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
                                const int& descriptorOffset,
-                               const int& currentTimepoint);
+                               const int& currentTimePoint);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index e6fc735f..f8d0d548 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -20,7 +20,7 @@ reg_nmi::reg_nmi(): reg_measure() {
     this->jointHistogramProBw = nullptr;
     this->jointHistogramLogBw = nullptr;
     this->entropyValuesBw = nullptr;
-    this->approximatePW = true;
+    this->approximatePw = true;
     for (int i = 0; i < 255; ++i) {
         this->referenceBinNumber[i] = 68;
         this->floatingBinNumber[i] = 68;
@@ -34,7 +34,7 @@ reg_nmi::~reg_nmi() {
 }
 /* *************************************************************** */
 void reg_nmi::DeallocateHistogram() {
-    int timepoint = this->referenceTimePoint;
+    int timepoint = this->referenceTimePoints;
     // Free the joint histograms and the entropy arrays
     if (this->jointHistogramPro != nullptr) {
         for (int i = 0; i < timepoint; ++i) {
@@ -122,8 +122,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
     // Deallocate all allocated arrays
     this->DeallocateHistogram();
     // Reference and floating are resampled between 2 and bin-3
-    for (int i = 0; i < this->referenceTimePoint; ++i) {
-        if (this->timePointWeight[i] > 0) {
+    for (int i = 0; i < this->referenceTimePoints; ++i) {
+        if (this->timePointWeights[i] > 0) {
             reg_intensityRescale(this->referenceImage,
                                  i,
                                  2.f,
@@ -143,8 +143,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
         this->jointHistogramLogBw = (double**)calloc(255, sizeof(double*));
         this->entropyValuesBw = (double**)calloc(255, sizeof(double*));
     }
-    for (int i = 0; i < this->referenceTimePoint; ++i) {
-        if (this->timePointWeight[i] > 0) {
+    for (int i = 0; i < this->referenceTimePoints; ++i) {
+        if (this->timePointWeights[i] > 0) {
             // Compute the total number of bin
             this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] +
                 this->referenceBinNumber[i] + this->floatingBinNumber[i];
@@ -159,8 +159,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
         }
     }
 
-    for (int i = 0; i < this->referenceImage->nt; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    for (int i = 0; i < this->referenceTimePoints; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -196,9 +196,10 @@ static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_getNMIValue(const nifti_image *referenceImage,
+void reg_getNmiValue(const nifti_image *referenceImage,
                      const nifti_image *warpedImage,
-                     const double *timePointWeight,
+                     const double *timePointWeights,
+                     const int referenceTimePoints,
                      const unsigned short *referenceBinNumber,
                      const unsigned short *floatingBinNumber,
                      const unsigned short *totalBinNumber,
@@ -213,8 +214,8 @@ void reg_getNMIValue(const nifti_image *referenceImage,
     // Useful variable
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     // Iterate over all active time points
-    for (int t = 0; t < referenceImage->nt; ++t) {
-        if (timePointWeight[t] > 0) {
+    for (int t = 0; t < referenceTimePoints; ++t) {
+        if (timePointWeights[t] > 0) {
             NR_DEBUG("Computing NMI for time point " << t);
             // Define some pointers to the current histograms
             double *jointHistoProPtr = jointHistogramPro[t];
@@ -312,17 +313,14 @@ void reg_getNMIValue(const nifti_image *referenceImage,
                     sum += jointHistoProPtr[index];
                     index += referenceBinNumber[t];
                 }
-                jointHistoProPtr[referenceBinNumber[t] *
-                    floatingBinNumber[t] + r] = sum;
+                jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = sum;
             }
             // Marginalise over the warped floating axis
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
                 double sum = 0;
                 int index = referenceBinNumber[t] * f;
-                for (int r = 0; r < referenceBinNumber[t]; ++r) {
+                for (int r = 0; r < referenceBinNumber[t]; ++r, ++index)
                     sum += jointHistoProPtr[index];
-                    ++index;
-                }
                 jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = sum;
             }
             // Set the log values to zero
@@ -330,9 +328,9 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             // Compute the entropy of the reference image
             double referenceEntropy = 0;
             for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
+                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
                 if (valPro > 0) {
-                    double valLog = log(valPro);
+                    const double& valLog = log(valPro);
                     referenceEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog;
                 }
@@ -341,10 +339,9 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             // Compute the entropy of the warped floating image
             double warpedEntropy = 0;
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] +
-                    referenceBinNumber[t] + f];
+                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f];
                 if (valPro > 0) {
-                    double valLog = log(valPro);
+                    const double& valLog = log(valPro);
                     warpedEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog;
                 }
@@ -353,9 +350,9 @@ void reg_getNMIValue(const nifti_image *referenceImage,
             // Compute the joint entropy
             double jointEntropy = 0;
             for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) {
-                double valPro = jointHistoProPtr[i];
+                const double& valPro = jointHistoProPtr[i];
                 if (valPro > 0) {
-                    double valLog = log(valPro);
+                    const double& valLog = log(valPro);
                     jointEntropy -= valPro * valLog;
                     jointHistoLogPtr[i] = valLog;
                 }
@@ -364,12 +361,13 @@ void reg_getNMIValue(const nifti_image *referenceImage,
         } // if active time point
     } // iterate over all time point in the reference image
 }
-template void reg_getNMIValue<float>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
-template void reg_getNMIValue<double>(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
+template void reg_getNmiValue<float>(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
+template void reg_getNmiValue<double>(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
 /* *************************************************************** */
 static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                         const nifti_image *warpedImage,
-                                        const double *timePointWeight,
+                                        const double *timePointWeights,
+                                        const int referenceTimePoints,
                                         const unsigned short *referenceBinNumber,
                                         const unsigned short *floatingBinNumber,
                                         const unsigned short *totalBinNumber,
@@ -377,13 +375,13 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                         double **jointHistogramPro,
                                         double **entropyValues,
                                         const int *referenceMask,
-                                        const int referenceTimePoint,
                                         const bool approximation) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
-        reg_getNMIValue<RefImgDataType>(referenceImage,
+        reg_getNmiValue<RefImgDataType>(referenceImage,
                                         warpedImage,
-                                        timePointWeight,
+                                        timePointWeights,
+                                        referenceTimePoints,
                                         referenceBinNumber,
                                         floatingBinNumber,
                                         totalBinNumber,
@@ -395,9 +393,9 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
     }, NiftiImage::getFloatingDataType(referenceImage));
 
     double nmi = 0;
-    for (int t = 0; t < referenceTimePoint; ++t) {
-        if (timePointWeight[t] > 0)
-            nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
+    for (int t = 0; t < referenceTimePoints; ++t) {
+        if (timePointWeights[t] > 0)
+            nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
     }
     return nmi;
 }
@@ -405,7 +403,8 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
 double reg_nmi::GetSimilarityMeasureValueFw() {
     return ::GetSimilarityMeasureValue(this->referenceImage,
                                        this->warpedImage,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
+                                       this->referenceTimePoints,
                                        this->referenceBinNumber,
                                        this->floatingBinNumber,
                                        this->totalBinNumber,
@@ -413,14 +412,14 @@ double reg_nmi::GetSimilarityMeasureValueFw() {
                                        this->jointHistogramPro,
                                        this->entropyValues,
                                        this->referenceMask,
-                                       this->referenceTimePoint,
-                                       this->approximatePW);
+                                       this->approximatePw);
 }
 /* *************************************************************** */
 double reg_nmi::GetSimilarityMeasureValueBw() {
     return ::GetSimilarityMeasureValue(this->floatingImage,
                                        this->warpedImageBw,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
+                                       this->referenceTimePoints,
                                        this->floatingBinNumber,
                                        this->referenceBinNumber,
                                        this->totalBinNumber,
@@ -428,8 +427,7 @@ double reg_nmi::GetSimilarityMeasureValueBw() {
                                        this->jointHistogramProBw,
                                        this->entropyValuesBw,
                                        this->floatingMask,
-                                       this->referenceTimePoint,
-                                       this->approximatePW);
+                                       this->approximatePw);
 }
 /* *************************************************************** */
 template <class DataType>
@@ -442,7 +440,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                                            const nifti_image *warpedGradient,
                                            nifti_image *measureGradientImage,
                                            const int *referenceMask,
-                                           const int currentTimepoint,
+                                           const int currentTimePoint,
                                            const double timepointWeight) {
 #ifdef WIN32
     long i;
@@ -453,9 +451,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *refPtr = &refImagePtr[currentTimePoint * voxelNumber];
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *warPtr = &warImagePtr[currentTimePoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
@@ -466,18 +464,18 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
     DataType *measureGradPtrY = &measureGradPtrX[voxelNumber];
 
     // Create pointers to the current joint histogram
-    const double *logHistoPtr = jointHistogramLog[currentTimepoint];
-    const double *entropyPtr = entropyValues[currentTimepoint];
+    const double *logHistoPtr = jointHistogramLog[currentTimePoint];
+    const double *entropyPtr = entropyValues[currentTimePoint];
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
-    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
+    const size_t referenceOffset = referenceBinNumber[currentTimePoint] * floatingBinNumber[currentTimePoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimePoint];
 
     // Iterate over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \
-    warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimepoint,timepointWeight)
+    warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timepointWeight)
 #endif // _OPENMP
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -487,23 +485,23 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                 DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i];
                 double jointDeriv[2]{}, refDeriv[2]{}, warDeriv[2]{};
                 for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) {
-                    if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
+                    if (-1 < r && r < referenceBinNumber[currentTimePoint]) {
                         for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
-                            if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                const double commun = GetBasisSplineValue<double>(refValue - r) *
+                            if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
+                                const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
                                 const double& refLog = logHistoPtr[r + referenceOffset];
                                 const double& warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
-                                    jointDeriv[0] += commun * gradX * jointLog;
-                                    refDeriv[0] += commun * gradX * refLog;
-                                    warDeriv[0] += commun * gradX * warLog;
+                                    jointDeriv[0] += common * gradX * jointLog;
+                                    refDeriv[0] += common * gradX * refLog;
+                                    warDeriv[0] += common * gradX * warLog;
                                 }
                                 if (gradY == gradY) {
-                                    jointDeriv[1] += commun * gradY * jointLog;
-                                    refDeriv[1] += commun * gradY * refLog;
-                                    warDeriv[1] += commun * gradY * warLog;
+                                    jointDeriv[1] += common * gradY * jointLog;
+                                    refDeriv[1] += common * gradY * refLog;
+                                    warDeriv[1] += common * gradY * warLog;
                                 }
                             }
                         }
@@ -528,7 +526,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                                            const nifti_image *warpedGradient,
                                            nifti_image *measureGradientImage,
                                            const int *referenceMask,
-                                           const int currentTimepoint,
+                                           const int currentTimePoint,
                                            const double timepointWeight) {
 #ifdef WIN32
     long i;
@@ -539,9 +537,9 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *refPtr = &refImagePtr[currentTimePoint * voxelNumber];
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *warPtr = &warImagePtr[currentTimePoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     const DataType *warGradPtrX = static_cast<DataType*>(warpedGradient->data);
@@ -554,17 +552,17 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
     DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber];
 
     // Create pointers to the current joint histogram
-    const double *logHistoPtr = jointHistogramLog[currentTimepoint];
-    const double *entropyPtr = entropyValues[currentTimepoint];
+    const double *logHistoPtr = jointHistogramLog[currentTimePoint];
+    const double *entropyPtr = entropyValues[currentTimePoint];
     const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2];
-    const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint];
-    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint];
+    const size_t referenceOffset = referenceBinNumber[currentTimePoint] * floatingBinNumber[currentTimePoint];
+    const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimePoint];
     // Iterate over all voxel
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
-    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight)
+    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timepointWeight)
 #endif // _OPENMP
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -574,28 +572,28 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                 DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i], gradZ = warGradPtrZ[i];
                 double jointDeriv[3]{}, refDeriv[3]{}, warDeriv[3]{};
                 for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) {
-                    if (-1 < r && r < referenceBinNumber[currentTimepoint]) {
+                    if (-1 < r && r < referenceBinNumber[currentTimePoint]) {
                         for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) {
-                            if (-1 < w && w < floatingBinNumber[currentTimepoint]) {
-                                const double commun = GetBasisSplineValue<double>(refValue - r) *
+                            if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
+                                const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]];
+                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
                                 const double& refLog = logHistoPtr[r + referenceOffset];
                                 const double& warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
-                                    refDeriv[0] += commun * gradX * refLog;
-                                    warDeriv[0] += commun * gradX * warLog;
-                                    jointDeriv[0] += commun * gradX * jointLog;
+                                    refDeriv[0] += common * gradX * refLog;
+                                    warDeriv[0] += common * gradX * warLog;
+                                    jointDeriv[0] += common * gradX * jointLog;
                                 }
                                 if (gradY == gradY) {
-                                    refDeriv[1] += commun * gradY * refLog;
-                                    warDeriv[1] += commun * gradY * warLog;
-                                    jointDeriv[1] += commun * gradY * jointLog;
+                                    refDeriv[1] += common * gradY * refLog;
+                                    warDeriv[1] += common * gradY * warLog;
+                                    jointDeriv[1] += common * gradY * jointLog;
                                 }
                                 if (gradZ == gradZ) {
-                                    refDeriv[2] += commun * gradZ * refLog;
-                                    warDeriv[2] += commun * gradZ * warLog;
-                                    jointDeriv[2] += commun * gradZ * jointLog;
+                                    refDeriv[2] += common * gradZ * refLog;
+                                    warDeriv[2] += common * gradZ * warLog;
+                                    jointDeriv[2] += common * gradZ * jointLog;
                                 }
                             }
                         }
@@ -621,7 +619,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI
                                                    const nifti_image *warpedGradient,
                                                    nifti_image *voxelBasedGradient,
                                                    const int *referenceMask,
-                                                   const int currentTimepoint,
+                                                   const int currentTimePoint,
                                                    const double timepointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -635,12 +633,12 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI
                                  warpedGradient,
                                  voxelBasedGradient,
                                  referenceMask,
-                                 currentTimepoint,
+                                 currentTimePoint,
                                  timepointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
-void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     // Call compute similarity measure to calculate joint histogram
     this->GetSimilarityMeasureValue();
 
@@ -653,11 +651,11 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
                                              this->referenceMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
-void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->warpedImageBw,
                                              this->floatingBinNumber,
@@ -667,7 +665,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
                                              this->warpedGradientBw,
                                              this->voxelBasedGradientBw,
                                              this->floatingMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint]);
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint]);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 063bf8f8..91f37bdb 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -43,9 +43,9 @@ class reg_nmi: public reg_measure {
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based nmi gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based nmi gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
 
     virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
                                           unsigned short floBinNumber,
@@ -65,15 +65,15 @@ class reg_nmi: public reg_measure {
     virtual unsigned short* GetFloatingBinNumber() {
         return this->floatingBinNumber;
     }
-    virtual void ApproximatePW() {
-        this->approximatePW = true;
+    virtual void ApproximatePw() {
+        this->approximatePw = true;
     }
-    virtual void DoNotApproximatePW() {
-        this->approximatePW = false;
+    virtual void DoNotApproximatePw() {
+        this->approximatePw = false;
     }
 
 protected:
-    bool approximatePW;
+    bool approximatePw;
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
     unsigned short totalBinNumber[255];
@@ -88,9 +88,10 @@ class reg_nmi: public reg_measure {
 };
 /* *************************************************************** */
 template <class DataType>
-void reg_getNMIValue(const nifti_image *referenceImage,
+void reg_getNmiValue(const nifti_image *referenceImage,
                      const nifti_image *warpedImage,
                      const double *timePointWeight,
+                     const int referenceTimePoints,
                      const unsigned short *referenceBinNumber,
                      const unsigned short *floatingBinNumber,
                      const unsigned short *totalBinNumber,
@@ -231,9 +232,9 @@ class reg_multichannel_nmi: public reg_measure {
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
 
     /// @brief Compute the voxel-based nmi gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {}
     /// @brief Compute the voxel-based nmi gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 
 protected:
     unsigned short referenceBinNumber[255];
diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp
index ddd2a8aa..231a6797 100644
--- a/reg-lib/cpu/_reg_polyAffine.cpp
+++ b/reg-lib/cpu/_reg_polyAffine.cpp
@@ -15,8 +15,8 @@
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
 template <class T>
-reg_polyAffine<T>::reg_polyAffine(int refTimePoint,int floTimePoint)
-   : reg_base<T>::reg_base(refTimePoint,floTimePoint)
+reg_polyAffine<T>::reg_polyAffine(int refTimePoints,int floTimePoints)
+   : reg_base<T>::reg_base(refTimePoints,floTimePoints)
 {
    this->executableName=(char *)"NiftyReg PolyAffine";
    NR_FUNC_CALLED();
diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h
index dbbc831a..28a7f5ff 100644
--- a/reg-lib/cpu/_reg_polyAffine.h
+++ b/reg-lib/cpu/_reg_polyAffine.h
@@ -34,7 +34,7 @@ class reg_polyAffine : public reg_base<T>
    void DeallocateTransformationGradient();
 
 public:
-   reg_polyAffine(int refTimePoint,int floTimePoint);
+   reg_polyAffine(int refTimePoints,int floTimePoints);
    ~reg_polyAffine();
 };
 
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index aecab542..1f41f389 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -48,8 +48,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
     if (this->referenceImage->nt != this->floatingImage->nt)
         NR_FATAL_ERROR("This number of time point should be the same for both input images");
     // Input images are normalised between 0 and 1
-    for (int i = 0; i < this->referenceImage->nt; ++i) {
-        if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
+    for (int i = 0; i < this->referenceTimePoints; ++i) {
+        if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) {
             //sets max value over both images to be 1 and min value over both images to be 0
             //scales values such that identical values in the images are still identical after scaling
             float maxF = reg_tools_getMaxValue(this->floatingImage, i);
@@ -73,10 +73,10 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
     NR_WARN("SAD is used instead of SSD");
 #endif
 #ifndef NDEBUG
-    for (int i = 0; i < this->referenceImage->nt; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]);
+    for (int i = 0; i < this->referenceTimePoints; ++i)
+        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
     std::string msg = "Normalize time point:";
-    for (int i = 0; i < this->referenceImage->nt; ++i)
+    for (int i = 0; i < this->referenceTimePoints; ++i)
         if (this->normaliseTimePoint[i])
             msg += " " + std::to_string(i);
     NR_DEBUG(msg);
@@ -91,7 +91,8 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
 template<class DataType>
 double reg_getSsdValue(const nifti_image *referenceImage,
                        const nifti_image *warpedImage,
-                       const double *timePointWeight,
+                       const double *timePointWeights,
+                       const int referenceTimePoints,
                        const nifti_image *jacobianDetImage,
                        const int *mask,
                        const nifti_image *localWeightSim) {
@@ -113,8 +114,8 @@ double reg_getSsdValue(const nifti_image *referenceImage,
     double ssdGlobal = 0;
 
     // Loop over the different time points
-    for (int time = 0; time < referenceImage->nt; ++time) {
-        if (timePointWeight[time] > 0) {
+    for (int time = 0; time < referenceTimePoints; ++time) {
+        if (timePointWeights[time] > 0) {
             // Create pointers to the current time point of the reference and warped images
             const DataType *currentRefPtr = &referencePtr[time * voxelNumber];
             const DataType *currentWarPtr = &warpedPtr[time * voxelNumber];
@@ -145,18 +146,19 @@ double reg_getSsdValue(const nifti_image *referenceImage,
                 }
             }
 
-            ssdLocal *= timePointWeight[time];
+            ssdLocal *= timePointWeights[time];
             ssdGlobal -= ssdLocal / n;
         }
     }
     return ssdGlobal;
 }
-template double reg_getSsdValue<float>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*);
-template double reg_getSsdValue<double>(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*);
+template double reg_getSsdValue<float>(const nifti_image*, const nifti_image*, const double*, const int, const nifti_image*, const int*, const nifti_image*);
+template double reg_getSsdValue<double>(const nifti_image*, const nifti_image*, const double*, const int, const nifti_image*, const int*, const nifti_image*);
 /* *************************************************************** */
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  const nifti_image *warpedImage,
-                                 const double *timePointWeight,
+                                 const double *timePointWeights,
+                                 const int referenceTimePoints,
                                  const nifti_image *jacobianDetImage,
                                  const int *mask,
                                  const nifti_image *localWeightSim) {
@@ -164,7 +166,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         return reg_getSsdValue<RefImgDataType>(referenceImage,
                                                warpedImage,
-                                               timePointWeight,
+                                               timePointWeights,
+                                               referenceTimePoints,
                                                jacobianDetImage,
                                                mask,
                                                localWeightSim);
@@ -174,7 +177,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
 double reg_ssd::GetSimilarityMeasureValueFw() {
     return ::GetSimilarityMeasureValue(this->referenceImage,
                                        this->warpedImage,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
+                                       this->referenceTimePoints,
                                        nullptr, // TODO this->forwardJacDetImagePointer,
                                        this->referenceMask,
                                        this->localWeightSim);
@@ -183,7 +187,8 @@ double reg_ssd::GetSimilarityMeasureValueFw() {
 double reg_ssd::GetSimilarityMeasureValueBw() {
     return ::GetSimilarityMeasureValue(this->floatingImage,
                                        this->warpedImageBw,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
+                                       this->referenceTimePoints,
                                        nullptr, // TODO this->backwardJacDetImagePointer,
                                        this->floatingMask,
                                        nullptr);
@@ -196,8 +201,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   nifti_image *measureGradientImage,
                                   const nifti_image *jacobianDetImage,
                                   const int *mask,
-                                  const int currentTimepoint,
-                                  const double timepointWeight,
+                                  const int currentTimePoint,
+                                  const double timePointWeight,
                                   const nifti_image *localWeightSim) {
     // Create pointers to the reference and warped images
 #ifdef _WIN32
@@ -209,9 +214,9 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
 #endif
     // Pointers to the image data
     const DataType *refImagePtr = static_cast<DataType*>(referenceImage->data);
-    const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber];
     const DataType *warImagePtr = static_cast<DataType*>(warpedImage->data);
-    const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber];
+    const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber];
 
     // Pointers to the spatial gradient of the warped image
     const DataType *spatialGradPtrX = static_cast<DataType*>(warpedGradient->data);
@@ -236,7 +241,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                 activeVoxelNumber++;
         }
     }
-    const double adjustedWeight = timepointWeight / activeVoxelNumber;
+    const double adjustedWeight = timePointWeight / activeVoxelNumber;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -284,8 +289,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             nifti_image *voxelBasedGradient,
                                             const nifti_image *jacobianDetImage,
                                             const int *mask,
-                                            const int currentTimepoint,
-                                            const double timepointWeight,
+                                            const int currentTimePoint,
+                                            const double timePointWeight,
                                             const nifti_image *localWeightSim) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -295,33 +300,33 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                                      voxelBasedGradient,
                                                      jacobianDetImage,
                                                      mask,
-                                                     currentTimepoint,
-                                                     timepointWeight,
+                                                     currentTimePoint,
+                                                     timePointWeight,
                                                      localWeightSim);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
-void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage,
                                              this->warpedImage,
                                              this->warpedGradient,
                                              this->voxelBasedGradient,
                                              nullptr, // TODO this->forwardJacDetImagePointer,
                                              this->referenceMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint],
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint],
                                              this->localWeightSim);
 }
 /* *************************************************************** */
-void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage,
                                              this->warpedImageBw,
                                              this->warpedGradientBw,
                                              this->voxelBasedGradientBw,
                                              nullptr, // TODO this->backwardJacDetImagePointer,
                                              this->floatingMask,
-                                             currentTimepoint,
-                                             this->timePointWeight[currentTimepoint],
+                                             currentTimePoint,
+                                             this->timePointWeights[currentTimePoint],
                                              nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index f840e1c6..fe359865 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -44,9 +44,9 @@ class reg_ssd: public reg_measure {
     /// @brief Returns the ssd value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based ssd gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based ssd gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
     /// @brief Here
     virtual void GetDiscretisedValue(nifti_image *controlPointGridImage,
                                      float *discretisedValue,
@@ -60,7 +60,7 @@ class reg_ssd: public reg_measure {
 /** @brief Computes and returns the SSD between two input images
  * @param referenceImage First input image to use to compute the metric
  * @param warpedImage Second input image to use to compute the metric
- * @param timePointWeight Array that contains the weight of each time point
+ * @param timePointWeights Array that contains the weight of each time point
  * @param jacobianDetImage Image that contains the Jacobian
  * determinant of a transformation at every voxel position. This
  * image is used to modulate the SSD. The argument is ignored if the
@@ -73,7 +73,8 @@ class reg_ssd: public reg_measure {
 template <class DataType>
 double reg_getSsdValue(const nifti_image *referenceImage,
                        const nifti_image *warpedImage,
-                       const double *timePointWeight,
+                       const double *timePointWeights,
+                       const int referenceTimePoints,
                        const nifti_image *jacobianDetImage,
                        const int *mask,
                        const nifti_image *localWeightSim);
@@ -90,7 +91,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
  * pointer is set to nullptr
  * @param mask Array that contains a mask to specify which voxel
  * should be considered
- * @param currentTimepoint Specifies which time point volumes have to be considered
+ * @param currentTimePoint Specifies which time point volumes have to be considered
  * @param timepointWeight Weight of the specified time point
  * @param localWeightSim Image that contains the local weight similarity
  */
@@ -101,7 +102,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   nifti_image *measureGradientImage,
                                   const nifti_image *jacobianDetImage,
                                   const int *mask,
-                                  const int currentTimepoint,
+                                  const int currentTimePoint,
                                   const double timepointWeight,
                                   const nifti_image *localWeightSim);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 65f8a15d..06beca8a 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -14,8 +14,8 @@
 namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
-    unsigned reg_getVoxelBasedNMIGradientUsingPW2D;
-    unsigned reg_getVoxelBasedNMIGradientUsingPW3D;
+    unsigned reg_getVoxelBasedNmiGradientUsingPw2D;
+    unsigned reg_getVoxelBasedNmiGradientUsingPw3D;
     unsigned reg_affine_getDeformationField;
     unsigned reg_spline_getDeformationField2D;
     unsigned reg_spline_getDeformationField3D;
@@ -41,7 +41,7 @@ struct BlockSize {
     unsigned GetSsdValue;
     unsigned GetSsdGradient;
     unsigned reg_voxelCentricToNodeCentric;
-    unsigned reg_convertNMIGradientFromVoxelToRealSpace;
+    unsigned reg_convertNmiGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
     unsigned reg_ApplyConvolutionWindowAlongY;
     unsigned reg_ApplyConvolutionWindowAlongZ;
@@ -54,8 +54,8 @@ struct BlockSize {
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
     BlockSize100() {
-        reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem
-        reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem
+        reg_getVoxelBasedNmiGradientUsingPw2D = 384; // 21 reg - 24 smem - 32 cmem
+        reg_getVoxelBasedNmiGradientUsingPw3D = 320; // 25 reg - 24 smem - 32 cmem
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
         reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
         reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
@@ -81,7 +81,7 @@ struct BlockSize100: public BlockSize {
         GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem
         GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem
         reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
-        reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
+        reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
@@ -96,8 +96,8 @@ struct BlockSize100: public BlockSize {
 /* *************************************************************** */
 struct BlockSize300: public BlockSize {
     BlockSize300() {
-        reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg
-        reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg
+        reg_getVoxelBasedNmiGradientUsingPw2D = 768; // 38 reg
+        reg_getVoxelBasedNmiGradientUsingPw3D = 640; // 45 reg
         reg_affine_getDeformationField = 1024; // 23 reg
         reg_spline_getDeformationField2D = 1024; // 34 reg
         reg_spline_getDeformationField3D = 1024; // 34 reg
@@ -123,7 +123,7 @@ struct BlockSize300: public BlockSize {
         GetSsdValue = 768; // 34 reg
         GetSsdGradient = 768; // 34 reg
         reg_voxelCentricToNodeCentric = 1024; // 23 reg
-        reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg
+        reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 7055465e..e2c4e836 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -127,9 +127,9 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     /// @brief Returns the lncc value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel-based lncc gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {}
     /// @brief Compute the voxel-based lncc gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 };
 /* *************************************************************** */
 class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
@@ -169,9 +169,9 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     /// @brief Returns the kld value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel-based kld gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {}
     /// @brief Compute the voxel-based kld gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 };
 /* *************************************************************** */
 class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
@@ -211,8 +211,8 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     /// @brief Returns the dti value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel-based dti gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {}
     /// @brief Compute the voxel-based dti gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 722144a4..170c128e 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -42,7 +42,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                        localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
                                        warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check if the input images have multiple timepoints
-    if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1)
+    if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1)
         NR_FATAL_ERROR("Multiple timepoints are not yet supported");
     // The reference and floating images have to be updated on the device
     Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
@@ -53,7 +53,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
 double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  nifti_image *warpedImage,
                                  const float *warpedImageCuda,
-                                 const double *timePointWeight,
+                                 const double *timePointWeights,
                                  const unsigned short *referenceBinNumber,
                                  const unsigned short *floatingBinNumber,
                                  const unsigned short *totalBinNumber,
@@ -61,14 +61,15 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                  double **jointHistogramPro,
                                  double **entropyValues,
                                  const int *referenceMask,
-                                 const int referenceTimePoint,
+                                 const int referenceTimePoints,
                                  const bool approximation) {
     // TODO: Implement the NMI computation for CUDA
     // The NMI computation is performed on the host for now
     Cuda::TransferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
-    reg_getNMIValue<float>(referenceImage,
+    reg_getNmiValue<float>(referenceImage,
                            warpedImage,
-                           timePointWeight,
+                           timePointWeights,
+                           referenceTimePoints,
                            referenceBinNumber,
                            floatingBinNumber,
                            totalBinNumber,
@@ -79,9 +80,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                            approximation);
 
     double nmi = 0;
-    for (int t = 0; t < referenceTimePoint; ++t) {
-        if (timePointWeight[t] > 0)
-            nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
+    for (int t = 0; t < referenceTimePoints; ++t) {
+        if (timePointWeights[t] > 0)
+            nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
     }
     return nmi;
 }
@@ -90,7 +91,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() {
     return ::GetSimilarityMeasureValue(this->referenceImage,
                                        this->warpedImage,
                                        this->warpedImageCuda,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
                                        this->referenceBinNumber,
                                        this->floatingBinNumber,
                                        this->totalBinNumber,
@@ -98,15 +99,15 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() {
                                        this->jointHistogramPro,
                                        this->entropyValues,
                                        this->referenceMask,
-                                       this->referenceTimePoint,
-                                       this->approximatePW);
+                                       this->referenceTimePoints,
+                                       this->approximatePw);
 }
 /* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
     return ::GetSimilarityMeasureValue(this->floatingImage,
                                        this->warpedImageBw,
                                        this->warpedImageBwCuda,
-                                       this->timePointWeight,
+                                       this->timePointWeights,
                                        this->floatingBinNumber,
                                        this->referenceBinNumber,
                                        this->totalBinNumber,
@@ -114,12 +115,12 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
                                        this->jointHistogramProBw,
                                        this->entropyValuesBw,
                                        this->floatingMask,
-                                       this->referenceTimePoint,
-                                       this->approximatePW);
+                                       this->referenceTimePoints,
+                                       this->approximatePw);
 }
 /* *************************************************************** */
 /// Called when we only have one target and one source image
-void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
+void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
                                       const cudaArray *referenceImageCuda,
                                       const float *warpedImageCuda,
                                       const float4 *warpedGradientCuda,
@@ -149,21 +150,21 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
                                                  cudaChannelFormatKindSigned, 1);
 
     if (referenceImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D;
+        const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw3D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
+        reg_getVoxelBasedNmiGradientUsingPw3D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
                                                                               *warpedGradientTexture, *histogramTexture, *maskTexture,
                                                                               imageSize, refBinning, floBinning, normalisedJE, nmi,
                                                                               (unsigned)activeVoxelNumber);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D;
+        const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw2D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
+        reg_getVoxelBasedNmiGradientUsingPw2D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
                                                                               *warpedGradientTexture, *histogramTexture, *maskTexture,
                                                                               imageSize, refBinning, floBinning, normalisedJE, nmi,
                                                                               (unsigned)activeVoxelNumber);
@@ -171,7 +172,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage,
     }
 }
 /* *************************************************************** */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     // Call compute similarity measure to calculate joint histogram
     this->GetSimilarityMeasureValue();
 
@@ -179,7 +180,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint)
     thrust::device_vector<float> jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]);
 
     // The gradient of the NMI is computed on the GPU
-    reg_getVoxelBasedNMIGradient_gpu(this->referenceImage,
+    reg_getVoxelBasedNmiGradient_gpu(this->referenceImage,
                                      this->referenceImageCuda,
                                      this->warpedImageCuda,
                                      this->warpedGradientCuda,
@@ -192,12 +193,12 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint)
                                      this->floatingBinNumber[0]);
 }
 /* *************************************************************** */
-void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     // The latest joint histogram is transferred onto the GPU
     thrust::device_vector<float> jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]);
 
     // The gradient of the NMI is computed on the GPU
-    reg_getVoxelBasedNMIGradient_gpu(this->floatingImage,
+    reg_getVoxelBasedNmiGradient_gpu(this->floatingImage,
                                      this->floatingImageCuda,
                                      this->warpedImageBwCuda,
                                      this->warpedGradientBwCuda,
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index be6479ec..51bc12a8 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -53,9 +53,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based nmi gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based nmi gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
 };
 /* *************************************************************** */
 /// @brief NMI measure of similarity class
@@ -93,8 +93,8 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_
     /// @brief Returns the nmi value backwards
     virtual double GetSimilarityMeasureValueBw() override { return 0; }
     /// @brief Compute the voxel-based nmi gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {}
     /// @brief Compute the voxel-based nmi gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {}
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu
index 9218537c..0da6c415 100755
--- a/reg-lib/cuda/_reg_nmi_kernels.cu
+++ b/reg-lib/cuda/_reg_nmi_kernels.cu
@@ -44,7 +44,7 @@ __device__ float GetBasisSplineDerivativeValue(const float& ori) {
     return value;
 }
 /* *************************************************************** */
-__global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelBasedGradient,
+__global__ void reg_getVoxelBasedNmiGradientUsingPw2D_kernel(float4 *voxelBasedGradient,
                                                              cudaTextureObject_t referenceImageTexture,
                                                              cudaTextureObject_t warpedImageTexture,
                                                              cudaTextureObject_t warpedGradientTexture,
@@ -121,7 +121,7 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelBasedG
     }
 }
 /* *************************************************************** */
-__global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelBasedGradient,
+__global__ void reg_getVoxelBasedNmiGradientUsingPw3D_kernel(float4 *voxelBasedGradient,
                                                              cudaTextureObject_t referenceImageTexture,
                                                              cudaTextureObject_t warpedImageTexture,
                                                              cudaTextureObject_t warpedGradientTexture,
@@ -210,7 +210,7 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelBasedG
 }
 /* *************************************************************** */
 // Multichannel NMI gradient. Hardcoded for 2x2 NMI channels.
-/* __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelBasedGradient) {
+/* __global__ void reg_getVoxelBasedNmiGradientUsingPw2x2_kernel(float4 *voxelBasedGradient) {
     const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < c_ActiveVoxelNumber) {
         const int targetIndex = tex1Dfetch(maskTexture, tid);
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 77dd8318..6c2e6c69 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -42,10 +42,10 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                        warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
     // Check that the input images have only one time point
     if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1)
-        NR_FATAL_ERROR("Multiple timepoints are not yet supported");
+        NR_FATAL_ERROR("Multiple time points are not yet supported");
     // Check if the reference and floating images need to be updated
-    for (int i = 0; i < this->referenceImage->nt; ++i)
-        if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) {
+    for (int i = 0; i < this->referenceTimePoints; ++i)
+        if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) {
             Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
             Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
             break;
@@ -160,7 +160,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) {
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     reg_getVoxelBasedSsdGradient_gpu(this->referenceImage,
                                      this->referenceImageCuda,
                                      this->warpedImageCuda,
@@ -169,10 +169,10 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint)
                                      this->voxelBasedGradientCuda,
                                      this->referenceMaskCuda,
                                      this->activeVoxelNumber,
-                                     static_cast<float>(this->timePointWeight[currentTimepoint]));
+                                     static_cast<float>(this->timePointWeights[currentTimePoint]));
 }
 /* *************************************************************** */
-void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) {
+void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
     reg_getVoxelBasedSsdGradient_gpu(this->floatingImage,
                                      this->floatingImageCuda,
                                      this->warpedImageBwCuda,
@@ -181,6 +181,6 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint)
                                      this->voxelBasedGradientBwCuda,
                                      this->floatingMaskCuda,
                                      this->activeVoxelNumber,
-                                     static_cast<float>(this->timePointWeight[currentTimepoint]));
+                                     static_cast<float>(this->timePointWeights[currentTimePoint]));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 9dfd2960..03f184a4 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -54,8 +54,8 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
     /// @brief Returns the ssd value backwards
     virtual double GetSimilarityMeasureValueBw() override;
     /// @brief Compute the voxel-based ssd gradient forwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based ssd gradient backwards
-    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override;
+    virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
 };
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 4db039cd..aa8f8c38 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -78,15 +78,15 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
+void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
                                                     const nifti_image *controlPointImage,
                                                     float4 *nmiGradientCuda) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace;
+    const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNmiGradientFromVoxelToRealSpace;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_convertNMIGradientFromVoxelToRealSpace_kernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
+    reg_convertNmiGradientFromVoxelToRealSpace_kernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 41916575..6d60ea4d 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -23,7 +23,7 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
                                        float weight,
                                        const mat44 *voxelToMillimetre = nullptr);
 /* *************************************************************** */
-void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
+void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
                                                     const nifti_image *controlPointImage,
                                                     float4 *nmiGradientCuda);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 8dba6af3..8782ded1 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -68,7 +68,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
     nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
-__global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
+__global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nodeNumber) {
         const float4 voxelGradient = gradient[tid];
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index aa916ec5..e98dd2e2 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -156,7 +156,7 @@ class LnccTest {
                 // Use LNCC as a measure
                 unique_ptr<reg_lncc> measure_lncc{ dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)) };
                 measure_lncc->SetKernelStandardDeviation(0, sigma);
-                measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure_lncc->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure->Initialise(*measure_lncc, *content);
                 const double lncc = measure_lncc->GetSimilarityMeasureValue();
                 // Save for testing
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 6030f69d..12941952 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -60,13 +60,13 @@ class NmiTest {
             "NMI 2D",
             reference2d,
             floating2d,
-            GetNMIPW(reference2d, floating2d)
+            GetNmiPw(reference2d, floating2d)
         ));
         testData.emplace_back(TestData(
             "NMI 3D",
             reference3d,
             floating3d,
-            GetNMIPW(reference3d, floating3d)
+            GetNmiPw(reference3d, floating3d)
         ));
         for (auto&& data : testData) {
             for (auto&& platformType : PlatformTypes) {
@@ -86,7 +86,7 @@ class NmiTest {
                 unique_ptr<Measure> measure{ platform->CreateMeasure() };
                 // Use NMI as a measure
                 unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
-                measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure->Initialise(*measure_nmi, *content);
                 const double nmi = measure_nmi->GetSimilarityMeasureValue();
 
@@ -100,7 +100,7 @@ class NmiTest {
     using TestCase = std::tuple<std::string, double, double>;
     inline static vector<TestCase> testCases;
 
-    double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) {
+    double GetNmiPw(const NiftiImage& ref, const NiftiImage& flo) {
         // Allocate a joint histogram and fill it with zeros
         double jh[68][68];
         for (unsigned i = 0; i < 68; ++i)
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index ec8f5326..fdb769ba 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -9,9 +9,9 @@
     The analytical formulation is compared against an approximation
 */
 
-class NMIGradientTest {
+class NmiGradientTest {
 public:
-    NMIGradientTest() {
+    NmiGradientTest() {
         if (!testCases.empty())
             return;
 
@@ -94,8 +94,8 @@ class NMIGradientTest {
                 unique_ptr<Measure> measure{ platform->CreateMeasure() };
                 // Use NMI as a measure
                 unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
-                measure_nmi->DoNotApproximatePW();
-                measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0
+                measure_nmi->DoNotApproximatePw();
+                measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0);
                 measure->Initialise(*measure_nmi, *content);
                 // Compute the NMI gradient
@@ -134,7 +134,7 @@ class NMIGradientTest {
     inline static vector<TestCase> testCases;
 };
 
-TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") {
+TEST_CASE_METHOD(NmiGradientTest, "NMI Gradient", "[unit]") {
     // Loop over all generated test cases
     for (auto&& testCase : testCases) {
         // Retrieve test information
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 07207b2a..81c150e8 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -138,8 +138,8 @@ class MeasureTest {
 
             // Initialise the measures
             for (int i = 0; i < referenceCpu->nt; ++i) {
-                measureCpu->SetTimepointWeight(i, 1.0);
-                measureCuda->SetTimepointWeight(i, 1.0);
+                measureCpu->SetTimePointWeight(i, 1.0);
+                measureCuda->SetTimePointWeight(i, 1.0);
             }
             measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get());
             measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get());
@@ -213,7 +213,7 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") {
             for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) {
                 const float cpuVal = voxelBasedGradCpuPtr[i];
                 const float cudaVal = voxelBasedGradCudaPtr[i];
-                const double diff = fabs(cpuVal - cudaVal);
+                const float diff = fabs(cpuVal - cudaVal);
                 if (diff > EPS)
                     NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl;
                 REQUIRE(diff < EPS);

From 52204d77b2423cfd9d077df101d899963b873786 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 14 Nov 2023 15:27:41 +0000
Subject: [PATCH 242/314] Implement reg_getNmiValue for CUDA #92

---
 niftyreg_build_version.txt   |   2 +-
 reg-lib/cpu/_reg_nmi.cpp     |  37 +----
 reg-lib/cpu/_reg_nmi.h       |  45 ++++--
 reg-lib/cuda/_reg_nmi_gpu.cu | 259 +++++++++++++++++++++++++++++------
 reg-lib/cuda/_reg_nmi_gpu.h  |   6 +
 5 files changed, 258 insertions(+), 91 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2921a158..35329ed8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-360
+361
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index f8d0d548..9e3801c1 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -164,37 +164,6 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-template<class PrecisionType>
-static PrecisionType GetBasisSplineValue(PrecisionType x) {
-    x = fabs(x);
-    PrecisionType value = 0;
-    if (x < 2.f) {
-        if (x < 1.f)
-            value = 2.f / 3.f + (0.5f * x - 1.f) * x * x;
-        else {
-            x -= 2.f;
-            value = -x * x * x / 6.f;
-        }
-    }
-    return value;
-}
-/* *************************************************************** */
-template<class PrecisionType>
-static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) {
-    PrecisionType x = fabs(ori);
-    PrecisionType value = 0;
-    if (x < 2.f) {
-        if (x < 1.f)
-            value = (1.5f * x - 2.f) * ori;
-        else {
-            x -= 2.f;
-            value = -0.5f * x * x;
-            if (ori < 0) value = -value;
-        }
-    }
-    return value;
-}
-/* *************************************************************** */
 template <class DataType>
 void reg_getNmiValue(const nifti_image *referenceImage,
                      const nifti_image *warpedImage,
@@ -261,9 +230,7 @@ void reg_getNmiValue(const nifti_image *referenceImage,
                     }
                 }
                 // Convolve the histogram with a cubic B-spline kernel
-                double kernel[3];
-                kernel[0] = kernel[2] = GetBasisSplineValue(-1.0);
-                kernel[1] = GetBasisSplineValue(0.0);
+                constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) };
                 // Histogram is first smooth along the reference axis
                 memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double));
                 for (int f = 0; f < floatingBinNumber[t]; ++f) {
@@ -361,8 +328,6 @@ void reg_getNmiValue(const nifti_image *referenceImage,
         } // if active time point
     } // iterate over all time point in the reference image
 }
-template void reg_getNmiValue<float>(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
-template void reg_getNmiValue<double>(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool);
 /* *************************************************************** */
 static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
                                         const nifti_image *warpedImage,
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 91f37bdb..1c01ba91 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -87,20 +87,6 @@ class reg_nmi: public reg_measure {
     void DeallocateHistogram();
 };
 /* *************************************************************** */
-template <class DataType>
-void reg_getNmiValue(const nifti_image *referenceImage,
-                     const nifti_image *warpedImage,
-                     const double *timePointWeight,
-                     const int referenceTimePoints,
-                     const unsigned short *referenceBinNumber,
-                     const unsigned short *floatingBinNumber,
-                     const unsigned short *totalBinNumber,
-                     double **jointHistogramLog,
-                     double **jointHistogramPro,
-                     double **entropyValues,
-                     const int *referenceMask,
-                     const bool approximation);
-/* *************************************************************** */
 // Simple class to dynamically manage an array of pointers
 // Needed for multi channel NMI
 template<class DataTYPE>
@@ -283,3 +269,34 @@ void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages,
                                                 int *mask,
                                                 bool approx);
 /* *************************************************************** */
+template<class PrecisionType>
+DEVICE constexpr PrecisionType GetBasisSplineValue(PrecisionType x) {
+    x = x < 0 ? -x : x;
+    PrecisionType value = 0;
+    if (x < 2.f) {
+        if (x < 1.f)
+            value = 2.f / 3.f + (0.5f * x - 1.f) * x * x;
+        else {
+            x -= 2.f;
+            value = -x * x * x / 6.f;
+        }
+    }
+    return value;
+}
+/* *************************************************************** */
+template<class PrecisionType>
+DEVICE constexpr PrecisionType GetBasisSplineDerivativeValue(const PrecisionType origX) {
+    PrecisionType x = origX < 0 ? -origX : origX;
+    PrecisionType value = 0;
+    if (x < 2.f) {
+        if (x < 1.f)
+            value = (1.5f * x - 2.f) * origX;
+        else {
+            x -= 2.f;
+            value = -0.5f * x * x;
+            if (origX < 0) value = -value;
+        }
+    }
+    return value;
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 170c128e..f48fff8f 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -34,53 +34,228 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
                                     nifti_image *warpedImgBw, float *warpedImgBwCuda,
                                     nifti_image *warpedGradBw, float4 *warpedGradBwCuda,
                                     nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) {
-    this->DeallocateHistogram();
     reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad,
                                localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw);
     reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum,
                                        warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda,
                                        localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
                                        warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
-    // Check if the input images have multiple timepoints
+    // Check if the input images have multiple time points
     if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1)
-        NR_FATAL_ERROR("Multiple timepoints are not yet supported");
+        NR_FATAL_ERROR("Multiple time points are not yet supported");
     // The reference and floating images have to be updated on the device
     Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
     Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
+    // Create the joint histograms
+    this->jointHistogramLogCudaVecs.resize(this->referenceTimePoints);
+    this->jointHistogramProCudaVecs.resize(this->referenceTimePoints);
+    if (this->isSymmetric) {
+        this->jointHistogramLogBwCudaVecs.resize(this->referenceTimePoints);
+        this->jointHistogramProBwCudaVecs.resize(this->referenceTimePoints);
+    }
+    for (int i = 0; i < this->referenceTimePoints; ++i) {
+        if (this->timePointWeights[i] > 0) {
+            this->jointHistogramLogCudaVecs[i].resize(this->totalBinNumber[i]);
+            this->jointHistogramProCudaVecs[i].resize(this->totalBinNumber[i]);
+            if (this->isSymmetric) {
+                this->jointHistogramLogBwCudaVecs[i].resize(this->totalBinNumber[i]);
+                this->jointHistogramProBwCudaVecs[i].resize(this->totalBinNumber[i]);
+            }
+        }
+    }
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-double GetSimilarityMeasureValue(const nifti_image *referenceImage,
-                                 nifti_image *warpedImage,
-                                 const float *warpedImageCuda,
-                                 const double *timePointWeights,
-                                 const unsigned short *referenceBinNumber,
-                                 const unsigned short *floatingBinNumber,
-                                 const unsigned short *totalBinNumber,
-                                 double **jointHistogramLog,
-                                 double **jointHistogramPro,
-                                 double **entropyValues,
-                                 const int *referenceMask,
-                                 const int referenceTimePoints,
-                                 const bool approximation) {
-    // TODO: Implement the NMI computation for CUDA
-    // The NMI computation is performed on the host for now
-    Cuda::TransferFromDeviceToNifti<float>(warpedImage, warpedImageCuda);
-    reg_getNmiValue<float>(referenceImage,
-                           warpedImage,
-                           timePointWeights,
-                           referenceTimePoints,
-                           referenceBinNumber,
-                           floatingBinNumber,
-                           totalBinNumber,
-                           jointHistogramLog,
-                           jointHistogramPro,
-                           entropyValues,
-                           referenceMask,
-                           approximation);
+void reg_getNmiValue_gpu(const nifti_image *referenceImage,
+                         const cudaArray *referenceImageCuda,
+                         const float *warpedImageCuda,
+                         const double *timePointWeights,
+                         const int referenceTimePoints,
+                         const unsigned short *referenceBinNumber,
+                         const unsigned short *floatingBinNumber,
+                         const unsigned short *totalBinNumber,
+                         vector<thrust::device_vector<double>>& jointHistogramLogCudaVecs,
+                         vector<thrust::device_vector<double>>& jointHistogramProCudaVecs,
+                         double **entropyValues,
+                         const int *maskCuda,
+                         const size_t activeVoxelNumber,
+                         const bool approximation) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
+    const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                    cudaChannelFormatKindSigned, 1);
+    auto referenceImageTexture = *referenceImageTexturePtr;
+    auto maskTexture = *maskTexturePtr;
+
+    // Iterate over all active time points
+    for (int t = 0; t < referenceTimePoints; t++) {
+        if (timePointWeights[t] <= 0) continue;
+        NR_DEBUG("Computing NMI for time point " << t);
+        const auto& curTotalBinNumber = totalBinNumber[t];
+        const auto& curRefBinNumber = referenceBinNumber[t];
+        const auto& curFloBinNumber = floatingBinNumber[t];
+        // Define the current histograms
+        thrust::fill(thrust::device, jointHistogramLogCudaVecs[t].begin(), jointHistogramLogCudaVecs[t].end(), 0.0);
+        thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0);
+        double *jointHistogramLogCuda = jointHistogramLogCudaVecs[t].data().get();
+        double *jointHistogramProCuda = jointHistogramProCudaVecs[t].data().get();
+        // Define warped image texture
+        auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, cudaResourceTypeLinear,
+                                                               voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
+        auto warpedImageTexture = *warpedImageTexturePtr;
+        // Fill the joint histograms
+        if (approximation == false) {
+            // No approximation is used for the Parzen windowing
+            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
+                const int& voxel = tex1Dfetch<int>(maskTexture, index);
+                const float& warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                if (warValue != warValue) return;
+                auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims);
+                const float& refValue = tex3D<float>(referenceImageTexture, x, y, z);
+                if (refValue != refValue) return;
+                for (int r = int(refValue - 1); r < int(refValue + 3); r++) {
+                    if (0 <= r && r < curRefBinNumber) {
+                        const double& refBasis = GetBasisSplineValue<double>(refValue - r);
+                        for (int w = int(warValue - 1); w < int(warValue + 3); w++) {
+                            if (0 <= w && w < curFloBinNumber) {
+                                const double& warBasis = GetBasisSplineValue<double>(warValue - w);
+                                atomicAdd(&jointHistogramProCuda[r + w * curRefBinNumber], refBasis * warBasis);
+                            }
+                        }
+                    }
+                }
+            });
+        } else {
+            // An approximation is used for the Parzen windowing. First intensities are binarised then
+            // the histogram is convolved with a spine kernel function.
+            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
+                const int& voxel = tex1Dfetch<int>(maskTexture, index);
+                const float& warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                if (warValue != warValue) return;
+                auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims);
+                const float& refValue = tex3D<float>(referenceImageTexture, x, y, z);
+                if (refValue != refValue) return;
+                if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber)
+                    atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0);
+            });
+            // Convolve the histogram with a cubic B-spline kernel
+            // Histogram is first smooth along the reference axis
+            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned short>(0), curFloBinNumber, [=]__device__(const unsigned short f) {
+                constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) };
+                for (unsigned short r = 0; r < curRefBinNumber; r++) {
+                    double value = 0;
+                    short index = r - 1;
+                    double *histoPtr = &jointHistogramProCuda[index + curRefBinNumber * f];
+
+                    for (char it = 0; it < 3; it++, index++, histoPtr++)
+                        if (-1 < index && index < curRefBinNumber)
+                            value += *histoPtr * kernel[it];
+                    jointHistogramLogCuda[r + curRefBinNumber * f] = value;
+                }
+            });
+            // Histogram is then smooth along the warped floating axis
+            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned short>(0), curRefBinNumber, [=]__device__(const unsigned short r) {
+                constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) };
+                for (unsigned short f = 0; f < curFloBinNumber; f++) {
+                    double value = 0;
+                    short index = f - 1;
+                    double *histoPtr = &jointHistogramLogCuda[r + curRefBinNumber * index];
+
+                    for (char it = 0; it < 3; it++, index++, histoPtr += curRefBinNumber)
+                        if (-1 < index && index < curFloBinNumber)
+                            value += *histoPtr * kernel[it];
+                    jointHistogramProCuda[r + curRefBinNumber * f] = value;
+                }
+            });
+        }
+        // Normalise the histogram
+        const double& activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus<double>());
+        entropyValues[t][3] = activeVoxel;
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), curTotalBinNumber, [=]__device__(const unsigned index) {
+            jointHistogramProCuda[index] /= activeVoxel;
+        });
+        // Marginalise over the reference axis
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned short>(0), curRefBinNumber, [=]__device__(const unsigned short r) {
+            double sum = 0;
+            unsigned short index = r;
+            for (unsigned short f = 0; f < curFloBinNumber; f++, index += curRefBinNumber)
+                sum += jointHistogramProCuda[index];
+            jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r] = sum;
+        });
+        // Marginalise over the warped floating axis
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned short>(0), curFloBinNumber, [=]__device__(const unsigned short f) {
+            double sum = 0;
+            unsigned short index = curRefBinNumber * f;
+            for (unsigned short r = 0; r < curRefBinNumber; r++, index++)
+                sum += jointHistogramProCuda[index];
+            jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = sum;
+        });
+        // Compute the entropy of the reference image
+        thrust::counting_iterator<unsigned short> it(0);
+        entropyValues[t][0] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber, [=]__device__(const unsigned short r) {
+            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r];
+            if (valPro > 0) {
+                const double& valLog = log(valPro);
+                jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + r] = valLog;
+                return -valPro * valLog;
+            } else return 0.0;
+        }, 0.0, thrust::plus<double>());
+        // Compute the entropy of the warped floating image
+        it = thrust::counting_iterator<unsigned short>(0);
+        entropyValues[t][1] = thrust::transform_reduce(thrust::device, it, it + curFloBinNumber, [=]__device__(const unsigned short f) {
+            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f];
+            if (valPro > 0) {
+                const double& valLog = log(valPro);
+                jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = valLog;
+                return -valPro * valLog;
+            } else return 0.0;
+        }, 0.0, thrust::plus<double>());
+        // Compute the joint entropy
+        it = thrust::counting_iterator<unsigned short>(0);
+        entropyValues[t][2] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber * curFloBinNumber, [=]__device__(const unsigned short index) {
+            const double& valPro = jointHistogramProCuda[index];
+            if (valPro > 0) {
+                const double& valLog = log(valPro);
+                jointHistogramLogCuda[index] = valLog;
+                return -valPro * valLog;
+            } else return 0.0;
+        }, 0.0, thrust::plus<double>());
+    } // iterate over all time point in the reference image
+}
+/* *************************************************************** */
+static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
+                                        const cudaArray *referenceImageCuda,
+                                        const nifti_image *warpedImage,
+                                        const float *warpedImageCuda,
+                                        const double *timePointWeights,
+                                        const int referenceTimePoints,
+                                        const unsigned short *referenceBinNumber,
+                                        const unsigned short *floatingBinNumber,
+                                        const unsigned short *totalBinNumber,
+                                        vector<thrust::device_vector<double>>& jointHistogramLogCudaVecs,
+                                        vector<thrust::device_vector<double>>& jointHistogramProCudaVecs,
+                                        double **entropyValues,
+                                        const int *referenceMaskCuda,
+                                        const size_t activeVoxelNumber,
+                                        const bool approximation) {
+    reg_getNmiValue_gpu(referenceImage,
+                        referenceImageCuda,
+                        warpedImageCuda,
+                        timePointWeights,
+                        referenceTimePoints,
+                        referenceBinNumber,
+                        floatingBinNumber,
+                        totalBinNumber,
+                        jointHistogramLogCudaVecs,
+                        jointHistogramProCudaVecs,
+                        entropyValues,
+                        referenceMaskCuda,
+                        activeVoxelNumber,
+                        approximation);
 
     double nmi = 0;
-    for (int t = 0; t < referenceTimePoints; ++t) {
+    for (int t = 0; t < referenceTimePoints; t++) {
         if (timePointWeights[t] > 0)
             nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2];
     }
@@ -89,33 +264,37 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage,
 /* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValueFw() {
     return ::GetSimilarityMeasureValue(this->referenceImage,
+                                       this->referenceImageCuda,
                                        this->warpedImage,
                                        this->warpedImageCuda,
                                        this->timePointWeights,
+                                       this->referenceTimePoints,
                                        this->referenceBinNumber,
                                        this->floatingBinNumber,
                                        this->totalBinNumber,
-                                       this->jointHistogramLog,
-                                       this->jointHistogramPro,
+                                       this->jointHistogramLogCudaVecs,
+                                       this->jointHistogramProCudaVecs,
                                        this->entropyValues,
-                                       this->referenceMask,
-                                       this->referenceTimePoints,
+                                       this->referenceMaskCuda,
+                                       this->activeVoxelNumber,
                                        this->approximatePw);
 }
 /* *************************************************************** */
 double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
     return ::GetSimilarityMeasureValue(this->floatingImage,
+                                       this->floatingImageCuda,
                                        this->warpedImageBw,
                                        this->warpedImageBwCuda,
                                        this->timePointWeights,
+                                       this->referenceTimePoints,
                                        this->floatingBinNumber,
                                        this->referenceBinNumber,
                                        this->totalBinNumber,
-                                       this->jointHistogramLogBw,
-                                       this->jointHistogramProBw,
+                                       this->jointHistogramLogBwCudaVecs,
+                                       this->jointHistogramProBwCudaVecs,
                                        this->entropyValuesBw,
-                                       this->floatingMask,
-                                       this->referenceTimePoints,
+                                       this->floatingMaskCuda,
+                                       this->activeVoxelNumber,
                                        this->approximatePw);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index 51bc12a8..c3f33d4c 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -56,6 +56,12 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
     virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override;
     /// @brief Compute the voxel-based nmi gradient backwards
     virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override;
+
+protected:
+    vector<thrust::device_vector<double>> jointHistogramLogCudaVecs;
+    vector<thrust::device_vector<double>> jointHistogramProCudaVecs;
+    vector<thrust::device_vector<double>> jointHistogramLogBwCudaVecs;
+    vector<thrust::device_vector<double>> jointHistogramProBwCudaVecs;
 };
 /* *************************************************************** */
 /// @brief NMI measure of similarity class

From bc4c672772b44e22ba32a82d00ca521881229dd6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Nov 2023 15:25:22 +0000
Subject: [PATCH 243/314] Make reg_getVoxelBasedNmiGradient_gpu() on a par with
 CPU #92

- Optimise reg_getVoxelBasedNmiGradient_gpu()
- Get the function ready for multi-timepoint support
---
 niftyreg_build_version.txt       |   2 +-
 reg-lib/cuda/BlockSize.hpp       |   6 -
 reg-lib/cuda/_reg_nmi_gpu.cu     | 184 +++++++----
 reg-lib/cuda/_reg_nmi_kernels.cu | 519 -------------------------------
 4 files changed, 116 insertions(+), 595 deletions(-)
 delete mode 100755 reg-lib/cuda/_reg_nmi_kernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 35329ed8..e5db9a27 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-361
+362
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 06beca8a..a86430ec 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -14,8 +14,6 @@
 namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
-    unsigned reg_getVoxelBasedNmiGradientUsingPw2D;
-    unsigned reg_getVoxelBasedNmiGradientUsingPw3D;
     unsigned reg_affine_getDeformationField;
     unsigned reg_spline_getDeformationField2D;
     unsigned reg_spline_getDeformationField3D;
@@ -54,8 +52,6 @@ struct BlockSize {
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
     BlockSize100() {
-        reg_getVoxelBasedNmiGradientUsingPw2D = 384; // 21 reg - 24 smem - 32 cmem
-        reg_getVoxelBasedNmiGradientUsingPw3D = 320; // 25 reg - 24 smem - 32 cmem
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
         reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
         reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
@@ -96,8 +92,6 @@ struct BlockSize100: public BlockSize {
 /* *************************************************************** */
 struct BlockSize300: public BlockSize {
     BlockSize300() {
-        reg_getVoxelBasedNmiGradientUsingPw2D = 768; // 38 reg
-        reg_getVoxelBasedNmiGradientUsingPw3D = 640; // 45 reg
         reg_affine_getDeformationField = 1024; // 23 reg
         reg_spline_getDeformationField2D = 1024; // 34 reg
         reg_spline_getDeformationField3D = 1024; // 34 reg
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index f48fff8f..d0c3056d 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -11,7 +11,7 @@
  */
 
 #include "_reg_nmi_gpu.h"
-#include "_reg_nmi_kernels.cu"
+#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
@@ -298,95 +298,141 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() {
                                        this->approximatePw);
 }
 /* *************************************************************** */
+template<bool is3d> struct Derivative { using Type = double3; };
+template<> struct Derivative<false> { using Type = double2; };
+/* *************************************************************** */
 /// Called when we only have one target and one source image
+template<bool is3d>
 void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
                                       const cudaArray *referenceImageCuda,
                                       const float *warpedImageCuda,
                                       const float4 *warpedGradientCuda,
-                                      const float *logJointHistogramCuda,
+                                      const double *jointHistogramLogCuda,
                                       float4 *voxelBasedGradientCuda,
                                       const int *maskCuda,
                                       const size_t activeVoxelNumber,
                                       const double *entropies,
-                                      const int refBinning,
-                                      const int floBinning) {
-    auto blockSize = CudaContext::GetBlockSize();
+                                      const int refBinNumber,
+                                      const int floBinNumber,
+                                      const int totalBinNumber,
+                                      const double timePointWeight,
+                                      const int currentTimePoint) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    const int binNumber = refBinning * floBinning + refBinning + floBinning;
-    const float normalisedJE = (float)(entropies[2] * entropies[3]);
-    const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]);
+    const double normalisedJE = entropies[2] * entropies[3];
+    const double nmi = (entropies[0] + entropies[1]) / entropies[2];
+    const int referenceOffset = refBinNumber * floBinNumber;
+    const int floatingOffset = referenceOffset + refBinNumber;
 
-    auto referenceImageTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                           cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
-    auto warpedImageTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                        cudaChannelFormatKindFloat, 1);
-    auto warpedGradientTexture = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
-                                                           cudaChannelFormatKindFloat, 4);
-    auto histogramTexture = Cuda::CreateTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float),
-                                                      cudaChannelFormatKindFloat, 1);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                 cudaChannelFormatKindSigned, 1);
+    auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
+                                                              cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
+    auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, cudaResourceTypeLinear,
+                                                           voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
+    auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
+                                                              cudaChannelFormatKindFloat, 4);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
+                                                    cudaChannelFormatKindSigned, 1);
+    auto referenceImageTexture = *referenceImageTexturePtr;
+    auto warpedImageTexture = *warpedImageTexturePtr;
+    auto warpedGradientTexture = *warpedGradientTexturePtr;
+    auto maskTexture = *maskTexturePtr;
 
-    if (referenceImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_getVoxelBasedNmiGradientUsingPw3D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
-                                                                              *warpedGradientTexture, *histogramTexture, *maskTexture,
-                                                                              imageSize, refBinning, floBinning, normalisedJE, nmi,
-                                                                              (unsigned)activeVoxelNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_getVoxelBasedNmiGradientUsingPw2D_kernel<<<gridDims, blockDims>>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture,
-                                                                              *warpedGradientTexture, *histogramTexture, *maskTexture,
-                                                                              imageSize, refBinning, floBinning, normalisedJE, nmi,
-                                                                              (unsigned)activeVoxelNumber);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
+        const int targetIndex = tex1Dfetch<int>(maskTexture, index);
+        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
+        if (warpedImageValue != warpedImageValue) return;
+        const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(targetIndex, imageSize);
+        const float referenceImageValue = tex3D<float>(referenceImageTexture,
+                                                       (float(x) + 0.5f) / float(imageSize.x),
+                                                       (float(y) + 0.5f) / float(imageSize.y),
+                                                       is3d ? (float(z) + 0.5f) / float(imageSize.z) : 0.5f);
+        if (referenceImageValue != referenceImageValue) return;
+        const float4& warpedGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);
+        float4 gradValue = voxelBasedGradientCuda[targetIndex];
+
+        // No computation is performed if any of the point is part of the background
+        // The two is added because the image is resample between 2 and bin+2
+        // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
+        typename Derivative<is3d>::Type jointDeriv{}, refDeriv{}, warDeriv{};
+        for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
+            if (-1 < r && r < refBinNumber) {
+                for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
+                    if (-1 < w && w < floBinNumber) {
+                        const double commonValue = (GetBasisSplineValue<double>(referenceImageValue - r) *
+                                                    GetBasisSplineDerivativeValue<double>(warpedImageValue - w));
+                        const double jointLog = jointHistogramLogCuda[r + w * refBinNumber];
+                        const double refLog = jointHistogramLogCuda[r + referenceOffset];
+                        const double warLog = jointHistogramLogCuda[w + floatingOffset];
+                        if (warpedGradValue.x == warpedGradValue.x) {
+                            const double commonMultGrad = commonValue * warpedGradValue.x;
+                            jointDeriv.x += commonMultGrad * jointLog;
+                            refDeriv.x += commonMultGrad * refLog;
+                            warDeriv.x += commonMultGrad * warLog;
+                        }
+                        if (warpedGradValue.y == warpedGradValue.y) {
+                            const double commonMultGrad = commonValue * warpedGradValue.y;
+                            jointDeriv.y += commonMultGrad * jointLog;
+                            refDeriv.y += commonMultGrad * refLog;
+                            warDeriv.y += commonMultGrad * warLog;
+                        }
+                        if constexpr (is3d) {
+                            if (warpedGradValue.z == warpedGradValue.z) {
+                                const double commonMultGrad = commonValue * warpedGradValue.z;
+                                jointDeriv.z += commonMultGrad * jointLog;
+                                refDeriv.z += commonMultGrad * refLog;
+                                warDeriv.z += commonMultGrad * warLog;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
+        gradValue.x += static_cast<float>(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE);
+        gradValue.y += static_cast<float>(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE);
+        if constexpr (is3d)
+            gradValue.z += static_cast<float>(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE);
+        voxelBasedGradientCuda[targetIndex] = gradValue;
+    });
 }
 /* *************************************************************** */
 void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
     // Call compute similarity measure to calculate joint histogram
     this->GetSimilarityMeasureValue();
 
-    // The latest joint histogram is transferred onto the GPU
-    thrust::device_vector<float> jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]);
-
-    // The gradient of the NMI is computed on the GPU
-    reg_getVoxelBasedNmiGradient_gpu(this->referenceImage,
-                                     this->referenceImageCuda,
-                                     this->warpedImageCuda,
-                                     this->warpedGradientCuda,
-                                     jointHistogramLogCuda.data().get(),
-                                     this->voxelBasedGradientCuda,
-                                     this->referenceMaskCuda,
-                                     this->activeVoxelNumber,
-                                     this->entropyValues[0],
-                                     this->referenceBinNumber[0],
-                                     this->floatingBinNumber[0]);
+    auto getVoxelBasedNmiGradient = this->referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient_gpu<true> : reg_getVoxelBasedNmiGradient_gpu<false>;
+    getVoxelBasedNmiGradient(this->referenceImage,
+                             this->referenceImageCuda,
+                             this->warpedImageCuda,
+                             this->warpedGradientCuda,
+                             this->jointHistogramLogCudaVecs[currentTimePoint].data().get(),
+                             this->voxelBasedGradientCuda,
+                             this->referenceMaskCuda,
+                             this->activeVoxelNumber,
+                             this->entropyValues[currentTimePoint],
+                             this->referenceBinNumber[currentTimePoint],
+                             this->floatingBinNumber[currentTimePoint],
+                             this->totalBinNumber[currentTimePoint],
+                             this->timePointWeights[currentTimePoint],
+                             currentTimePoint);
 }
 /* *************************************************************** */
 void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
-    // The latest joint histogram is transferred onto the GPU
-    thrust::device_vector<float> jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]);
-
-    // The gradient of the NMI is computed on the GPU
-    reg_getVoxelBasedNmiGradient_gpu(this->floatingImage,
-                                     this->floatingImageCuda,
-                                     this->warpedImageBwCuda,
-                                     this->warpedGradientBwCuda,
-                                     jointHistogramLogCudaBw.data().get(),
-                                     this->voxelBasedGradientBwCuda,
-                                     this->floatingMaskCuda,
-                                     this->activeVoxelNumber,
-                                     this->entropyValuesBw[0],
-                                     this->floatingBinNumber[0],
-                                     this->referenceBinNumber[0]);
+    auto getVoxelBasedNmiGradient = this->floatingImage->nz > 1 ? reg_getVoxelBasedNmiGradient_gpu<true> : reg_getVoxelBasedNmiGradient_gpu<false>;
+    getVoxelBasedNmiGradient(this->floatingImage,
+                             this->floatingImageCuda,
+                             this->warpedImageBwCuda,
+                             this->warpedGradientBwCuda,
+                             this->jointHistogramLogBwCudaVecs[currentTimePoint].data().get(),
+                             this->voxelBasedGradientBwCuda,
+                             this->floatingMaskCuda,
+                             this->activeVoxelNumber,
+                             this->entropyValuesBw[currentTimePoint],
+                             this->floatingBinNumber[currentTimePoint],
+                             this->referenceBinNumber[currentTimePoint],
+                             this->totalBinNumber[currentTimePoint],
+                             this->timePointWeights[currentTimePoint],
+                             currentTimePoint);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu
deleted file mode 100755
index 0da6c415..00000000
--- a/reg-lib/cuda/_reg_nmi_kernels.cu
+++ /dev/null
@@ -1,519 +0,0 @@
-/*
- *  _reg_mutualinformation_kernels.cu
- *
- *
- *  Created by Marc Modat on 24/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_common_cuda_kernels.cu"
-
-#define COEFF_L 0.16666666f
-#define COEFF_C 0.66666666f
-#define COEFF_B 0.83333333f
-
-/* *************************************************************** */
-__device__ float GetBasisSplineValue(float x) {
-    x = fabsf(x);
-    float value = 0.0f;
-    if (x < 2.0f)
-        if (x < 1.0f)
-            value = 2.0f / 3.0f + (0.5f * x - 1.0f) * x * x;
-        else {
-            x -= 2.0f;
-            value = -x * x * x / 6.0f;
-        }
-    return value;
-}
-/* *************************************************************** */
-__device__ float GetBasisSplineDerivativeValue(const float& ori) {
-    float x = fabsf(ori);
-    float value = 0.0f;
-    if (x < 2.0f)
-        if (x < 1.0f)
-            value = (1.5f * x - 2.0f) * ori;
-        else {
-            x -= 2.0f;
-            value = -0.5f * x * x;
-            if (ori < 0.0f) value = -value;
-        }
-    return value;
-}
-/* *************************************************************** */
-__global__ void reg_getVoxelBasedNmiGradientUsingPw2D_kernel(float4 *voxelBasedGradient,
-                                                             cudaTextureObject_t referenceImageTexture,
-                                                             cudaTextureObject_t warpedImageTexture,
-                                                             cudaTextureObject_t warpedGradientTexture,
-                                                             cudaTextureObject_t histogramTexture,
-                                                             cudaTextureObject_t maskTexture,
-                                                             const int3 imageSize,
-                                                             const int refBinning,
-                                                             const int floBinning,
-                                                             const float normalisedJE,
-                                                             const float nmi,
-                                                             const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int targetIndex = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(targetIndex, imageSize.x, quot, rem);
-        const int y = quot, x = rem;
-
-        const float referenceImageValue = tex3D<float>(referenceImageTexture,
-                                                       ((float)x + 0.5f) / (float)imageSize.x,
-                                                       ((float)y + 0.5f) / (float)imageSize.y,
-                                                       0.5f);
-        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
-        const float4 warpedImageGradient = tex1Dfetch<float4>(warpedGradientTexture, tid);
-
-        float4 gradValue{};
-
-        // No computation is performed if any of the point is part of the background
-        // The two is added because the image is resample between 2 and bin +2
-        // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
-        if (0.f < referenceImageValue && referenceImageValue < refBinning &&
-            0.f < warpedImageValue && warpedImageValue < floBinning &&
-            referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) {
-            const float2 resDeriv = make_float2(warpedImageGradient.x, warpedImageGradient.y);
-            if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y) {
-                float jointEntropyDerivative_X = 0.0f;
-                float warpedEntropyDerivative_X = 0.0f;
-                float referenceEntropyDerivative_X = 0.0f;
-                float jointEntropyDerivative_Y = 0.0f;
-                float warpedEntropyDerivative_Y = 0.0f;
-                float referenceEntropyDerivative_Y = 0.0f;
-                for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
-                    if (-1 < r && r < refBinning) {
-                        for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
-                            if (-1 < w && w < floBinning) {
-                                const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) *
-                                                           GetBasisSplineDerivativeValue(warpedImageValue - (float)w));
-
-                                const float jointLog = tex1Dfetch<float>(histogramTexture, w * floBinning + r);
-                                const float targetLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + r);
-                                const float resultLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + refBinning + w);
-
-                                float temp = commonValue * resDeriv.x;
-                                jointEntropyDerivative_X += temp * jointLog;
-                                referenceEntropyDerivative_X += temp * targetLog;
-                                warpedEntropyDerivative_X += temp * resultLog;
-
-                                temp = commonValue * resDeriv.y;
-                                jointEntropyDerivative_Y += temp * jointLog;
-                                referenceEntropyDerivative_Y += temp * targetLog;
-                                warpedEntropyDerivative_Y += temp * resultLog;
-                            } // O<t<bin
-                        } // t
-                    } // 0<r<bin
-                } // r
-
-                // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - nmi * jointEntropyDerivative_X) / normalisedJE;
-                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - nmi * jointEntropyDerivative_Y) / normalisedJE;
-
-            }
-        }
-        voxelBasedGradient[targetIndex] = gradValue;
-    }
-}
-/* *************************************************************** */
-__global__ void reg_getVoxelBasedNmiGradientUsingPw3D_kernel(float4 *voxelBasedGradient,
-                                                             cudaTextureObject_t referenceImageTexture,
-                                                             cudaTextureObject_t warpedImageTexture,
-                                                             cudaTextureObject_t warpedGradientTexture,
-                                                             cudaTextureObject_t histogramTexture,
-                                                             cudaTextureObject_t maskTexture,
-                                                             const int3 imageSize,
-                                                             const int refBinning,
-                                                             const int floBinning,
-                                                             const float normalisedJE,
-                                                             const float nmi,
-                                                             const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int targetIndex = tex1Dfetch<int>(maskTexture, tid);
-        int quot, rem;
-        reg_div_cuda(targetIndex, imageSize.x * imageSize.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, imageSize.x, quot, rem);
-        const int y = quot, x = rem;
-
-        const float referenceImageValue = tex3D<float>(referenceImageTexture,
-                                                       ((float)x + 0.5f) / (float)imageSize.x,
-                                                       ((float)y + 0.5f) / (float)imageSize.y,
-                                                       ((float)z + 0.5f) / (float)imageSize.z);
-        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
-        const float4 warpedImageGradient = tex1Dfetch<float4>(warpedGradientTexture, tid);
-
-        float4 gradValue{};
-
-        // No computation is performed if any of the point is part of the background
-        // The two is added because the image is resample between 2 and bin +2
-        // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
-        if (0.f < referenceImageValue && referenceImageValue < refBinning &&
-            0.f < warpedImageValue && warpedImageValue < floBinning &&
-            referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) {
-            const float3 resDeriv = make_float3(warpedImageGradient.x, warpedImageGradient.y, warpedImageGradient.z);
-            if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y && resDeriv.z == resDeriv.z) {
-                float jointEntropyDerivative_X = 0.0f;
-                float warpedEntropyDerivative_X = 0.0f;
-                float referenceEntropyDerivative_X = 0.0f;
-                float jointEntropyDerivative_Y = 0.0f;
-                float warpedEntropyDerivative_Y = 0.0f;
-                float referenceEntropyDerivative_Y = 0.0f;
-                float jointEntropyDerivative_Z = 0.0f;
-                float warpedEntropyDerivative_Z = 0.0f;
-                float referenceEntropyDerivative_Z = 0.0f;
-                for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
-                    if (-1 < r && r < refBinning) {
-                        for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
-                            if (-1 < w && w < floBinning) {
-                                const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) *
-                                                           GetBasisSplineDerivativeValue(warpedImageValue - (float)w));
-
-                                const float jointLog = tex1Dfetch<float>(histogramTexture, w * floBinning + r);
-                                const float targetLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + r);
-                                const float resultLog = tex1Dfetch<float>(histogramTexture, refBinning * floBinning + refBinning + w);
-
-                                float temp = commonValue * resDeriv.x;
-                                jointEntropyDerivative_X += temp * jointLog;
-                                referenceEntropyDerivative_X += temp * targetLog;
-                                warpedEntropyDerivative_X += temp * resultLog;
-
-                                temp = commonValue * resDeriv.y;
-                                jointEntropyDerivative_Y += temp * jointLog;
-                                referenceEntropyDerivative_Y += temp * targetLog;
-                                warpedEntropyDerivative_Y += temp * resultLog;
-
-                                temp = commonValue * resDeriv.z;
-                                jointEntropyDerivative_Z += temp * jointLog;
-                                referenceEntropyDerivative_Z += temp * targetLog;
-                                warpedEntropyDerivative_Z += temp * resultLog;
-                            } // O<t<bin
-                        } // t
-                    } // 0<r<bin
-                } // r
-
-                // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - nmi * jointEntropyDerivative_X) / normalisedJE;
-                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - nmi * jointEntropyDerivative_Y) / normalisedJE;
-                gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - nmi * jointEntropyDerivative_Z) / normalisedJE;
-
-            }
-        }
-        voxelBasedGradient[targetIndex] = gradValue;
-    }
-}
-/* *************************************************************** */
-// Multichannel NMI gradient. Hardcoded for 2x2 NMI channels.
-/* __global__ void reg_getVoxelBasedNmiGradientUsingPw2x2_kernel(float4 *voxelBasedGradient) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_ActiveVoxelNumber) {
-        const int targetIndex = tex1Dfetch(maskTexture, tid);
-        int tempIndex = targetIndex;
-        const int z = tempIndex / (c_ImageSize.x * c_ImageSize.y);
-        tempIndex -= z * c_ImageSize.x * c_ImageSize.y;
-        const int y = tempIndex / c_ImageSize.x;
-        const int x = tempIndex - y * c_ImageSize.x;
-
-        float4 voxelValues = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        voxelValues.x = tex3D(firstreferenceImageTexture,
-                              ((float)x + 0.5f) / (float)c_ImageSize.x,
-                              ((float)y + 0.5f) / (float)c_ImageSize.y,
-                              ((float)z + 0.5f) / (float)c_ImageSize.z);
-        voxelValues.y = tex3D(secondreferenceImageTexture,
-                              ((float)x + 0.5f) / (float)c_ImageSize.x,
-                              ((float)y + 0.5f) / (float)c_ImageSize.y,
-                              ((float)z + 0.5f) / (float)c_ImageSize.z);
-        voxelValues.z = tex1Dfetch(firstwarpedImageTexture, targetIndex);
-        voxelValues.w = tex1Dfetch(secondwarpedImageTexture, targetIndex);
-
-        float4 firstwarpedImageGradient = tex1Dfetch(firstwarpedImageGradientTexture, tid);
-        float4 secondwarpedImageGradient = tex1Dfetch(secondwarpedImageGradientTexture, tid);
-        float4 gradValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-        // Could remove some tests (which are not really needed) to reduce register
-        // count. They should be put in again at some point for completeness and generality.
-        if (voxelValues.x == voxelValues.x &&
-            voxelValues.y == voxelValues.y &&
-            voxelValues.z == voxelValues.z &&
-            voxelValues.w == voxelValues.w &&
-            voxelValues.x >= 0.0f &&
-            voxelValues.y >= 0.0f &&
-            voxelValues.z >= 0.0f &&
-            voxelValues.w >= 0.0f &&
-            voxelValues.x < c_firstTargetBin &&
-            voxelValues.y < c_secondTargetBin &&
-            voxelValues.z < c_firstResultBin &&
-            voxelValues.w < c_secondResultBin) {
-            voxelValues.x = (float)((int)voxelValues.x);
-            voxelValues.y = (float)((int)voxelValues.y);
-            voxelValues.z = (float)((int)voxelValues.z);
-            voxelValues.w = (float)((int)voxelValues.w);
-
-            if (firstwarpedImageGradient.x == firstwarpedImageGradient.x &&
-                firstwarpedImageGradient.y == firstwarpedImageGradient.y &&
-                firstwarpedImageGradient.z == firstwarpedImageGradient.z &&
-                secondwarpedImageGradient.x == secondwarpedImageGradient.x &&
-                secondwarpedImageGradient.y == secondwarpedImageGradient.y &&
-                secondwarpedImageGradient.z == secondwarpedImageGradient.z) {
-                float jointEntropyDerivative_X = 0.0f;
-                float warpedEntropyDerivative_X = 0.0f;
-                float referenceEntropyDerivative_X = 0.0f;
-
-                float jointEntropyDerivative_Y = 0.0f;
-                float warpedEntropyDerivative_Y = 0.0f;
-                float referenceEntropyDerivative_Y = 0.0f;
-
-                float jointEntropyDerivative_Z = 0.0f;
-                float warpedEntropyDerivative_Z = 0.0f;
-                float referenceEntropyDerivative_Z = 0.0f;
-
-                float jointLog, targetLog, resultLog, temp;
-                float4 relative_pos = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-
-                float s_x, s_y, s_z, s_w;
-                float common_target_value = 0.0f;
-                int target_flat_index, result_flat_index, total_target_entries, num_probabilities;
-                for (int i = -1; i < 2; ++i) {
-                    relative_pos.x = (int)(voxelValues.x + i);
-
-                    if (-1 < relative_pos.x && relative_pos.x < c_firstTargetBin) {
-                        for (int j = -1; j < 2; ++j) {
-                            relative_pos.y = (int)(voxelValues.y + j);
-
-                            if (-1 < relative_pos.y && relative_pos.y < c_secondTargetBin) {
-                                s_x = GetBasisSplineValue(relative_pos.x - voxelValues.x);
-                                s_y = GetBasisSplineValue(relative_pos.y - voxelValues.y);
-                                common_target_value = s_x * s_y;
-
-                                for (int k = -1; k < 2; ++k) {
-                                    relative_pos.z = (int)(voxelValues.z + k);
-                                    if (-1 < relative_pos.z && relative_pos.z < c_firstResultBin) {
-                                        s_x = GetBasisSplineDerivativeValue(relative_pos.z - voxelValues.z);
-                                        s_w = GetBasisSplineValue(relative_pos.z - voxelValues.z);
-                                        for (int l = -1; l < 2; ++l) {
-                                            relative_pos.w = (int)(voxelValues.w + l);
-                                            if (-1 < relative_pos.w && relative_pos.w < c_secondResultBin) {
-                                                target_flat_index = relative_pos.x + relative_pos.y * c_firstTargetBin;
-                                                result_flat_index = relative_pos.z + relative_pos.w * c_firstResultBin;
-                                                total_target_entries = c_firstTargetBin * c_secondTargetBin;
-                                                num_probabilities = total_target_entries * c_firstResultBin * c_secondResultBin;
-
-                                                jointLog = tex1Dfetch(histogramTexture, target_flat_index + (result_flat_index * total_target_entries));
-                                                targetLog = tex1Dfetch(histogramTexture, num_probabilities + target_flat_index);
-                                                resultLog = tex1Dfetch(histogramTexture, num_probabilities + total_target_entries + result_flat_index);
-
-                                                // Contribution from floating images. These arithmetic operations use
-                                                // a lot of registers. Need to look into whether this can be reduced somehow.
-                                                s_y = GetBasisSplineValue(relative_pos.w - voxelValues.w);
-                                                s_z = GetBasisSplineDerivativeValue(relative_pos.w - voxelValues.w);
-                                                temp = (s_x * firstwarpedImageGradient.x * s_y) +
-                                                    (s_z * secondwarpedImageGradient.x * s_w);
-                                                temp *= common_target_value;
-
-                                                jointEntropyDerivative_X -= temp * jointLog;
-                                                referenceEntropyDerivative_X -= temp * targetLog;
-                                                warpedEntropyDerivative_X -= temp * resultLog;
-
-                                                temp = (s_x * firstwarpedImageGradient.y * s_y) +
-                                                    (s_z * secondwarpedImageGradient.y * s_w);
-                                                temp *= common_target_value;
-                                                jointEntropyDerivative_Y -= temp * jointLog;
-                                                referenceEntropyDerivative_Y -= temp * targetLog;
-                                                warpedEntropyDerivative_Y -= temp * resultLog;
-
-                                                temp = (s_x * firstwarpedImageGradient.z * s_y) +
-                                                    (s_z * secondwarpedImageGradient.z * s_w);
-                                                temp *= common_target_value;
-                                                jointEntropyDerivative_Z -= temp * jointLog;
-                                                referenceEntropyDerivative_Z -= temp * targetLog;
-                                                warpedEntropyDerivative_Z -= temp * resultLog;
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-
-                gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - c_NMI * jointEntropyDerivative_X) / c_NormalisedJE;
-                gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - c_NMI * jointEntropyDerivative_Y) / c_NormalisedJE;
-                gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - c_NMI * jointEntropyDerivative_Z) / c_NormalisedJE;
-            }
-        }
-        voxelBasedGradient[targetIndex] = gradValue;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) {
-        // The starting index is computed
-        unsigned startingPoint = tid * c_firstTargetBin;
-        unsigned finishPoint = startingPoint + c_firstTargetBin;
-
-        // The first point is computed
-        tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                        tex1Dfetch(histogramTexture, startingPoint + 1) * COEFF_L) / COEFF_B;
-        // The middle points are computed
-        for (unsigned i = startingPoint + 1; i < finishPoint - 1; ++i) {
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i - 1) * COEFF_L +
-                tex1Dfetch(histogramTexture, i) * COEFF_C +
-                tex1Dfetch(histogramTexture, i + 1) * COEFF_L;
-        }
-        // The last point is computed
-        tempHistogram[finishPoint - 1] = (tex1Dfetch(histogramTexture, finishPoint - 2) * COEFF_L +
-                                          tex1Dfetch(histogramTexture, finishPoint - 1) * COEFF_C) / COEFF_B;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_smoothJointHistogramY_kernel(float *tempHistogram) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstTargetBin * c_firstResultBin * c_secondResultBin) {
-        // The starting index is computed
-        unsigned startingPoint = tid + c_firstTargetBin * (c_secondTargetBin - 1) * (c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin)) +
-                                                                                     (int)(tid / c_firstTargetBin - c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin))));
-        unsigned increment = c_firstTargetBin;
-        unsigned finishPoint = startingPoint + increment * c_secondTargetBin;
-
-        // The first point is computed
-        tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
-        // The middle points are computed
-        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
-                tex1Dfetch(histogramTexture, i) * COEFF_C +
-                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
-        }
-        // The last point is computed
-        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
-                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_smoothJointHistogramZ_kernel(float *tempHistogram) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstTargetBin * c_secondTargetBin * c_secondResultBin) {
-        // The starting index is computed
-        unsigned startingPoint = tid + c_firstTargetBin * c_secondTargetBin * (c_firstResultBin - 1) * (int)(tid / (c_firstTargetBin * c_secondTargetBin));
-        unsigned increment = c_firstTargetBin * c_secondTargetBin;
-        unsigned finishPoint = startingPoint + increment * c_firstResultBin;
-
-        // The first point is computed
-        tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
-        // The middle points are computed
-        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
-                tex1Dfetch(histogramTexture, i) * COEFF_C +
-                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
-        }
-        // The last point is computed
-        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
-                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_smoothJointHistogramW_kernel(float *tempHistogram) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) {
-        // The starting index is computed
-        unsigned startingPoint = tid;
-        unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin;
-        unsigned finishPoint = increment * c_secondResultBin;
-
-        // The first point is computed
-        tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C +
-                                        tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B;
-        // The middle points are computed
-        for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) {
-            tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L +
-                tex1Dfetch(histogramTexture, i) * COEFF_C +
-                tex1Dfetch(histogramTexture, i + increment) * COEFF_L;
-        }
-        // The last point is computed
-        tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L +
-                                                  tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B;
-    }
-} */
-/* *************************************************************** */
-// Kernels for marginalisation along the different axes
-/* __global__ void reg_marginaliseTargetX_kernel(float *babyHisto) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) {
-        unsigned startingPoint = tid * c_firstTargetBin;
-        unsigned finishPoint = startingPoint + c_firstTargetBin;
-
-        float sum = tex1Dfetch(histogramTexture, startingPoint);
-        float c = 0.f, Y, t;
-        for (unsigned i = startingPoint + 1; i < finishPoint; ++i) {
-            Y = tex1Dfetch(histogramTexture, i) - c;
-            t = sum + Y;
-            c = (t - sum) - Y;
-            sum = t;
-        }
-        babyHisto[tid] = sum;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_marginaliseTargetXY_kernel(float *babyHisto) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstResultBin * c_secondResultBin) {
-        unsigned startingPoint = tid * c_secondTargetBin;
-        unsigned finishPoint = startingPoint + c_secondTargetBin;
-
-        float sum = tex1Dfetch(histogramTexture, startingPoint);
-        float c = 0.f, Y, t;
-        for (unsigned i = startingPoint + 1; i < finishPoint; ++i) {
-            Y = tex1Dfetch(histogramTexture, i) - c;
-            t = sum + Y;
-            c = (t - sum) - Y;
-            sum = t;
-        }
-        babyHisto[tid] = sum;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_marginaliseResultX_kernel(float *babyHisto) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) {
-        unsigned startingPoint = tid;
-        float sum = tex1Dfetch(histogramTexture, startingPoint);
-        // increment by a the cube
-        unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin;
-        float c = 0.f, Y, t;
-
-        for (unsigned i = 1; i < c_secondResultBin; ++i) {
-            Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c;
-            t = sum + Y;
-            c = (t - sum) - Y;
-            sum = t;
-        }
-        babyHisto[tid] = sum;
-    }
-} */
-/* *************************************************************** */
-/* __global__ void reg_marginaliseResultXY_kernel(float *babyHisto) {
-    const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < c_firstTargetBin * c_secondTargetBin) {
-        unsigned startingPoint = tid;
-        float sum = tex1Dfetch(histogramTexture, startingPoint);
-        // increment by the plane.
-        unsigned increment = c_firstTargetBin * c_secondTargetBin;
-        float c = 0.f, Y, t;
-        for (unsigned i = 1; i < c_firstResultBin; ++i) {
-            Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c;
-            t = sum + Y;
-            c = (t - sum) - Y;
-            sum = t;
-        }
-        babyHisto[tid] = sum;
-    }
-} */
-/* *************************************************************** */

From 86db4340cb3fdee93f6538abfa0eb7bda30c4ee1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 15 Nov 2023 15:27:09 +0000
Subject: [PATCH 244/314] Add multi-timepoint support for MeasureTest #92

---
 niftyreg_build_version.txt         |  2 +-
 reg-test/reg_test_regr_measure.cpp | 41 ++++++++++++++++--------------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e5db9a27..8c0a1869 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-362
+363
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 81c150e8..2c26a8d1 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -28,14 +28,15 @@ class MeasureTest {
 
         // Create 2D reference, floating, control point grid and local weight similarity images
         constexpr NiftiImage::dim_t size = 16;
-        vector<NiftiImage::dim_t> dim{ size, size };
+        constexpr NiftiImage::dim_t timePoints = 1;
+        vector<NiftiImage::dim_t> dim{ size, size, 1, timePoints };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d));
         NiftiImage localWeightSim2d(dim, NIFTI_TYPE_FLOAT32);
 
         // Create 3D reference, floating, control point grid and local weight similarity images
-        dim.push_back(size);
+        dim[2] = size;
         NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d));
@@ -63,7 +64,7 @@ class MeasureTest {
 
         // Create the data container for the regression test
         const std::string measureNames[]{ "NMI"s, "SSD"s, "DTI"s, "LNCC"s, "KLD"s, "MIND"s, "MINDSSC"s };
-        const MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd };
+        constexpr MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd };
         vector<TestData> testData;
         for (auto&& measure : testMeasures) {
             for (int sym = 0; sym < 2; ++sym) {
@@ -137,9 +138,9 @@ class MeasureTest {
             unique_ptr<reg_measure> measureCuda{ measureCreatorCuda->Create(measureType) };
 
             // Initialise the measures
-            for (int i = 0; i < referenceCpu->nt; ++i) {
-                measureCpu->SetTimePointWeight(i, 1.0);
-                measureCuda->SetTimePointWeight(i, 1.0);
+            for (int t = 0; t < referenceCpu->nt; t++) {
+                measureCpu->SetTimePointWeight(t, 1.0);
+                measureCuda->SetTimePointWeight(t, 1.0);
             }
             measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get());
             measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get());
@@ -162,24 +163,26 @@ class MeasureTest {
             }
             const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue();
 
-            // Compute the similarity measure gradient for CPU
-            constexpr int timepoint = 0;
+            // Compute the similarity measure gradients
             contentCpu->ZeroVoxelBasedMeasureGradient();
-            computeCpu->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
-            if (isSymmetric) {
-                contentCpuBw->ZeroVoxelBasedMeasureGradient();
-                computeCpuBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
-            }
-            measureCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint);
-
-            // Compute the similarity measure gradient for CUDA
             contentCuda->ZeroVoxelBasedMeasureGradient();
-            computeCuda->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             if (isSymmetric) {
+                contentCpuBw->ZeroVoxelBasedMeasureGradient();
                 contentCudaBw->ZeroVoxelBasedMeasureGradient();
-                computeCudaBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), timepoint);
             }
-            measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint);
+            for (int t = 0; t < referenceCpu->nt; t++) {
+                // Compute the similarity measure gradient for CPU
+                computeCpu->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), t);
+                if (isSymmetric)
+                    computeCpuBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), t);
+                measureCpu->GetVoxelBasedSimilarityMeasureGradient(t);
+
+                // Compute the similarity measure gradient for CUDA
+                computeCuda->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), t);
+                if (isSymmetric)
+                    computeCudaBw->GetImageGradient(1, std::numeric_limits<float>::quiet_NaN(), t);
+                measureCuda->GetVoxelBasedSimilarityMeasureGradient(t);
+            }
 
             // Get the voxel-based similarity measure gradients
             NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);

From 55775715561937ddcb15e611d6722ee65c33222a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 16 Nov 2023 19:44:23 +0000
Subject: [PATCH 245/314] Refactorisations

---
 .gitignore                                    |  1 +
 niftyreg_build_version.txt                    |  2 +-
 reg-io/RNifti/NiftiImage.h                    |  2 +-
 reg-lib/AladinContent.h                       |  2 +-
 reg-lib/ContentCreatorFactory.h               |  2 +-
 reg-lib/Measure.cpp                           |  2 +-
 reg-lib/Measure.h                             |  2 +-
 reg-lib/Platform.cpp                          |  4 +--
 reg-lib/Platform.h                            |  4 +--
 reg-lib/_reg_aladin.h                         |  2 +-
 reg-lib/_reg_base.h                           |  4 +--
 reg-lib/cl/ClContentCreatorFactory.h          |  2 +-
 reg-lib/cpu/_reg_globalTrans.h                |  4 +--
 reg-lib/cpu/_reg_kld.cpp                      |  6 ++--
 reg-lib/cpu/_reg_lncc.cpp                     | 10 +++---
 reg-lib/cpu/_reg_localTrans_regul.cpp         | 10 +++---
 reg-lib/cpu/_reg_mind.cpp                     | 26 +++++++--------
 reg-lib/cpu/_reg_mind.h                       |  8 ++---
 reg-lib/cpu/_reg_nmi.cpp                      | 32 +++++++++----------
 reg-lib/cpu/_reg_nmi.h                        |  6 ++--
 reg-lib/cpu/_reg_ssd.cpp                      |  2 +-
 reg-lib/cpu/_reg_tools.cpp                    |  4 +--
 reg-lib/cpu/_reg_tools.h                      |  2 +-
 reg-lib/cuda/CudaCommon.hpp                   |  4 +--
 reg-lib/cuda/CudaCompute.cu                   |  2 +-
 reg-lib/cuda/CudaContentCreatorFactory.h      |  2 +-
 reg-lib/cuda/CudaKernelConvolution.cu         | 10 +++---
 reg-lib/cuda/CudaMeasure.cpp                  |  2 +-
 reg-lib/cuda/CudaMeasure.h                    |  2 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         |  4 +--
 reg-lib/cuda/_reg_common_cuda_kernels.cu      |  2 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu  | 22 ++++++-------
 .../cuda/_reg_localTransformation_kernels.cu  | 30 ++++++++---------
 reg-lib/cuda/_reg_nmi_gpu.cu                  | 28 ++++++++--------
 reg-lib/cuda/_reg_optimiser_kernels.cu        |  4 +--
 reg-lib/cuda/_reg_resampling_kernels.cu       |  2 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  4 +--
 reg-lib/cuda/_reg_ssd_kernels.cu              |  7 +++-
 reg-lib/cuda/_reg_tools_gpu.cu                | 18 +++++------
 reg-lib/cuda/_reg_tools_gpu.h                 | 16 +++++-----
 reg-lib/cuda/_reg_tools_kernels.cu            |  2 +-
 reg-test/reg_test_conjugateGradient.cpp       | 10 +++---
 reg-test/reg_test_lncc.cpp                    |  4 +--
 43 files changed, 160 insertions(+), 154 deletions(-)

diff --git a/.gitignore b/.gitignore
index de49771d..9accdc5d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@
 *.app
 
 # IDE
+.devcontainer
 .idea
 .vscode
 .vs
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8c0a1869..9c6f0c3e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-363
+364
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 68dfcceb..b03f5837 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -2021,7 +2021,7 @@ class NiftiImage
      * @param dimCount Number of dimensions to consider
      * @return The number of voxels in the image
      */
-    static size_t calcVoxelNumber(const nifti_image *image, const int& dimCount) {
+    static size_t calcVoxelNumber(const nifti_image *image, const int dimCount) {
         if (image == nullptr)
             return 0;
         size_t voxelNumber = 1;
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index bd71257a..9757f5fe 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -36,6 +36,6 @@ class AladinContent: public Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetCaptureRange(const int& captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; }
+    virtual void SetCaptureRange(const int captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; }
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; }
 };
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
index ca1001f9..4d9ddddc 100644
--- a/reg-lib/ContentCreatorFactory.h
+++ b/reg-lib/ContentCreatorFactory.h
@@ -10,7 +10,7 @@ enum class ContentType { Base, Aladin, Def, F3d, F3d2 };
 
 class ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) {
+    virtual ContentCreator* Produce(const ContentType conType) {
         switch (conType) {
         case ContentType::Base:
             return new ContentCreator();
diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp
index e61a7ce1..bd586b8b 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/Measure.cpp
@@ -7,7 +7,7 @@
 #include "_reg_mind.h"
 
 /* *************************************************************** */
-reg_measure* Measure::Create(const MeasureType& measureType) {
+reg_measure* Measure::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi();
diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h
index f8527631..c20989d7 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/Measure.h
@@ -7,6 +7,6 @@ enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc };
 
 class Measure {
 public:
-    virtual reg_measure* Create(const MeasureType& measureType);
+    virtual reg_measure* Create(const MeasureType measureType);
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr);
 };
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 19826418..8e609ffe 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -17,7 +17,7 @@
 #endif
 
 /* *************************************************************** */
-Platform::Platform(const PlatformType& platformTypeIn) {
+Platform::Platform(const PlatformType platformTypeIn) {
     platformType = platformTypeIn;
     if (platformType == PlatformType::Cpu) {
         platformName = "CPU";
@@ -102,7 +102,7 @@ Compute* Platform::CreateCompute(Content& con) const {
     return computeFactory->Produce(con);
 }
 /* *************************************************************** */
-ContentCreator* Platform::CreateContentCreator(const ContentType& conType) const {
+ContentCreator* Platform::CreateContentCreator(const ContentType conType) const {
     return contentCreatorFactory->Produce(conType);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 42a0a823..b049732a 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -20,7 +20,7 @@ constexpr PlatformType PlatformTypes[] = {
 
 class Platform {
 public:
-    Platform(const PlatformType& platformTypeIn);
+    Platform(const PlatformType platformTypeIn);
     ~Platform();
 
     std::string GetName() const;
@@ -29,7 +29,7 @@ class Platform {
     void SetGpuIdx(unsigned gpuIdxIn);
 
     Compute* CreateCompute(Content& con) const;
-    ContentCreator* CreateContentCreator(const ContentType& conType = ContentType::Base) const;
+    ContentCreator* CreateContentCreator(const ContentType conType = ContentType::Base) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
     Measure* CreateMeasure() const;
     template<typename Type>
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 8f47979b..59c99fa2 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -169,7 +169,7 @@ class reg_aladin {
     }
     NiftiImage GetFinalWarpedImage();
 
-    void SetPlatformType(const PlatformType& platformTypeIn) {
+    void SetPlatformType(const PlatformType platformTypeIn) {
         this->platformType = platformTypeIn;
     }
     void SetGpuIdx(unsigned gpuIdxIn) {
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 007f26ec..c589afe7 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -141,11 +141,11 @@ class reg_base: public InterfaceOptimiser {
     virtual bool GetSymmetricStatus() { return false; }
 
     // Platform
-    virtual void SetPlatformType(const PlatformType& platformType) {
+    virtual void SetPlatformType(const PlatformType platformType) {
         platform.reset(new Platform(platformType));
         measure.reset(platform->CreateMeasure());
     }
-    virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); }
+    virtual void SetGpuIdx(const unsigned gpuIdx) { platform->SetGpuIdx(gpuIdx); }
 
     // Optimisation-related functions
     virtual void SetMaximalIterationNumber(unsigned);
diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h
index cc6f8620..ecba7ae0 100644
--- a/reg-lib/cl/ClContentCreatorFactory.h
+++ b/reg-lib/cl/ClContentCreatorFactory.h
@@ -5,7 +5,7 @@
 
 class ClContentCreatorFactory: public ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) override {
+    virtual ContentCreator* Produce(const ContentType conType) override {
         switch (conType) {
         case ContentType::Aladin:
             return new ClAladinContentCreator();
diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h
index 591ec0ca..4b1917a8 100755
--- a/reg-lib/cpu/_reg_globalTrans.h
+++ b/reg-lib/cpu/_reg_globalTrans.h
@@ -37,7 +37,7 @@ struct _reg_sorted_point3D
         warped[2] = r[2];
     }
 
-    bool operator <(const _reg_sorted_point3D &sp) const
+    bool operator <(const _reg_sorted_point3D& sp) const
     {
         return (sp.distance < distance);
     }
@@ -61,7 +61,7 @@ struct _reg_sorted_point2D
         warped[0] = r[0];
         warped[1] = r[1];
     }
-    bool operator <(const _reg_sorted_point2D &sp) const
+    bool operator <(const _reg_sorted_point2D& sp) const
     {
         return (sp.distance < distance);
     }
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index 68de1aa8..eefab0bc 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -232,13 +232,13 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                     tempValue *= jacPtr[voxel];
 
                 // Ensure that gradient of the warpedImage image along x-axis is not NaN
-                const double& tempGradX = currentGradPtrX[voxel];
+                const double tempGradX = currentGradPtrX[voxel];
                 if (tempGradX == tempGradX)
                     // Update the gradient along the x-axis
                     measureGradPtrX[voxel] -= static_cast<DataType>(tempValue * tempGradX);
 
                 // Ensure that gradient of the warpedImage image along y-axis is not NaN
-                const double& tempGradY = currentGradPtrY[voxel];
+                const double tempGradY = currentGradPtrY[voxel];
                 if (tempGradY == tempGradY)
                     // Update the gradient along the y-axis
                     measureGradPtrY[voxel] -= static_cast<DataType>(tempValue * tempGradY);
@@ -246,7 +246,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                 // Check if the current images are 3D
                 if (referenceImage->nz > 1) {
                     // Ensure that gradient of the warpedImage image along z-axis is not NaN
-                    const double& tempGradZ = currentGradPtrZ[voxel];
+                    const double tempGradZ = currentGradPtrZ[voxel];
                     if (tempGradZ == tempGradZ)
                         // Update the gradient along the z-axis
                         measureGradPtrZ[voxel] -= static_cast<DataType>(tempValue * tempGradZ);
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 9b823da1..6ce58b3f 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -441,10 +441,10 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
         // Check if the current voxel belongs to the mask
         if (combinedMask[voxel] > -1) {
-            const double& refMeanValue = meanImgPtr[voxel];
-            const double& warMeanValue = warMeanPtr[voxel];
-            const double& refSdevValue = sdevImgPtr[voxel];
-            const double& warSdevValue = warSdevPtr[voxel];
+            const double refMeanValue = meanImgPtr[voxel];
+            const double warMeanValue = warMeanPtr[voxel];
+            const double refSdevValue = sdevImgPtr[voxel];
+            const double warSdevValue = warSdevPtr[voxel];
             const double correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue);
             double temp1 = 1.0 / (refSdevValue * warSdevValue);
             double temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue);
@@ -511,7 +511,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
     shared(voxelNumber, measureGradPtrX)
 #endif
     for (voxel = 0; voxel < voxelNumber; ++voxel) {
-        const DataType& val = measureGradPtrX[voxel];
+        const DataType val = measureGradPtrX[voxel];
         if (val != val || isinf(val))
             measureGradPtrX[voxel] = 0;
     }
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 46a3928c..44feb651 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -1152,8 +1152,8 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
             for (int b = -1; b < 2; b++) {
                 for (int a = -1; a < 2; a++) {
                     const int index = (y + b) * splineControlPoint->nx + x + a;
-                    const DataType& splineCoeffX = splinePtrX[index];
-                    const DataType& splineCoeffY = splinePtrY[index];
+                    const DataType splineCoeffX = splinePtrX[index];
+                    const DataType splineCoeffY = splinePtrY[index];
 
                     matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
                     matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
@@ -1221,9 +1221,9 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
                     for (int b = -1; b < 2; b++) {
                         for (int a = -1; a < 2; a++) {
                             const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            const DataType& splineCoeffX = splinePtrX[index];
-                            const DataType& splineCoeffY = splinePtrY[index];
-                            const DataType& splineCoeffZ = splinePtrZ[index];
+                            const DataType splineCoeffX = splinePtrX[index];
+                            const DataType splineCoeffY = splinePtrY[index];
+                            const DataType splineCoeffZ = splinePtrZ[index];
 
                             matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
                             matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 30e15cff..ff5ae86d 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -17,9 +17,9 @@ template <class DataType>
 void ShiftImage(const nifti_image *inputImage,
                 nifti_image *shiftedImage,
                 const int *mask,
-                const int& tx,
-                const int& ty,
-                const int& tz) {
+                const int tx,
+                const int ty,
+                const int tz) {
     const DataType* inputData = static_cast<DataType*>(inputImage->data);
     DataType* shiftImageData = static_cast<DataType*>(shiftedImage->data);
 #ifdef _OPENMP
@@ -57,8 +57,8 @@ template <class DataType>
 void GetMindImageDescriptorCore(const nifti_image *inputImage,
                                 nifti_image *mindImage,
                                 const int *mask,
-                                const int& descriptorOffset,
-                                const int& currentTimePoint) {
+                                const int descriptorOffset,
+                                const int currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -131,7 +131,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < samplingNbr; t++) {
-                const DataType& descValue = mindImgDataPtr[mindIndex];
+                const DataType descValue = mindImgDataPtr[mindIndex];
                 mindImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
@@ -147,8 +147,8 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
 void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
-                            const int& descriptorOffset,
-                            const int& currentTimePoint) {
+                            const int descriptorOffset,
+                            const int currentTimePoint) {
     if (inputImage->datatype != mindImage->datatype)
         NR_FATAL_ERROR("The input image and the MIND image must have the same datatype");
     std::visit([&](auto&& imgType) {
@@ -162,8 +162,8 @@ template <class DataType>
 void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
                                    nifti_image *mindSscImage,
                                    const int *mask,
-                                   const int& descriptorOffset,
-                                   const int& currentTimePoint) {
+                                   const int descriptorOffset,
+                                   const int currentTimePoint) {
 #ifdef WIN32
     long voxelIndex;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3);
@@ -253,7 +253,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
 
             mindIndex = voxelIndex;
             for (int t = 0; t < lengthDescriptor; t++) {
-                const DataType& descValue = mindSscImgDataPtr[mindIndex];
+                const DataType descValue = mindSscImgDataPtr[mindIndex];
                 mindSscImgDataPtr[mindIndex] = descValue / maxDesc;
                 mindIndex += voxelNumber;
             }
@@ -271,8 +271,8 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
-                               const int& descriptorOffset,
-                               const int& currentTimePoint) {
+                               const int descriptorOffset,
+                               const int currentTimePoint) {
     if (inputImage->datatype != mindSscImage->datatype)
         NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!");
     std::visit([&](auto&& imgType) {
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index b32dee3e..35c21203 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -74,12 +74,12 @@ class reg_mindssc: public reg_mind {
 void GetMindImageDescriptor(const nifti_image *inputImage,
                             nifti_image *mindImage,
                             const int *mask,
-                            const int& descriptorOffset,
-                            const int& currentTimePoint);
+                            const int descriptorOffset,
+                            const int currentTimePoint);
 /* *************************************************************** */
 void GetMindSscImageDescriptor(const nifti_image *inputImage,
                                nifti_image *mindSscImage,
                                const int *mask,
-                               const int& descriptorOffset,
-                               const int& currentTimePoint);
+                               const int descriptorOffset,
+                               const int currentTimePoint);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 9e3801c1..9918c5e7 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -198,8 +198,8 @@ void reg_getNmiValue(const nifti_image *referenceImage,
                 // No approximation is used for the Parzen windowing
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                     if (referenceMask[voxel] > -1) {
-                        const DataType& refValue = refPtr[voxel];
-                        const DataType& warValue = warPtr[voxel];
+                        const DataType refValue = refPtr[voxel];
+                        const DataType warValue = warPtr[voxel];
                         if (refValue == refValue && warValue == warValue) {
                             for (int r = int(refValue - 1); r < int(refValue + 3); ++r) {
                                 if (0 <= r && r < referenceBinNumber[t]) {
@@ -220,8 +220,8 @@ void reg_getNmiValue(const nifti_image *referenceImage,
                 // the histogram is convolved with a spine kernel function.
                 for (size_t voxel = 0; voxel < voxelNumber; ++voxel) {
                     if (referenceMask[voxel] > -1) {
-                        const DataType& refValue = refPtr[voxel];
-                        const DataType& warValue = warPtr[voxel];
+                        const DataType refValue = refPtr[voxel];
+                        const DataType warValue = warPtr[voxel];
                         if (refValue == refValue && warValue == warValue &&
                             0 <= refValue && refValue < referenceBinNumber[t] &&
                             0 <= warValue && warValue < floatingBinNumber[t]) {
@@ -295,9 +295,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the entropy of the reference image
             double referenceEntropy = 0;
             for (int r = 0; r < referenceBinNumber[t]; ++r) {
-                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
+                const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     referenceEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog;
                 }
@@ -306,9 +306,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the entropy of the warped floating image
             double warpedEntropy = 0;
             for (int f = 0; f < floatingBinNumber[t]; ++f) {
-                const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f];
+                const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     warpedEntropy -= valPro * valLog;
                     jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog;
                 }
@@ -317,9 +317,9 @@ void reg_getNmiValue(const nifti_image *referenceImage,
             // Compute the joint entropy
             double jointEntropy = 0;
             for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) {
-                const double& valPro = jointHistoProPtr[i];
+                const double valPro = jointHistoProPtr[i];
                 if (valPro > 0) {
-                    const double& valLog = log(valPro);
+                    const double valLog = log(valPro);
                     jointEntropy -= valPro * valLog;
                     jointHistoLogPtr[i] = valLog;
                 }
@@ -455,9 +455,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                             if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
                                 const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
-                                const double& refLog = logHistoPtr[r + referenceOffset];
-                                const double& warLog = logHistoPtr[w + floatingOffset];
+                                const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
+                                const double refLog = logHistoPtr[r + referenceOffset];
+                                const double warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     jointDeriv[0] += common * gradX * jointLog;
                                     refDeriv[0] += common * gradX * refLog;
@@ -542,9 +542,9 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                             if (-1 < w && w < floatingBinNumber[currentTimePoint]) {
                                 const double common = GetBasisSplineValue<double>(refValue - r) *
                                     GetBasisSplineDerivativeValue<double>(warValue - w);
-                                const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
-                                const double& refLog = logHistoPtr[r + referenceOffset];
-                                const double& warLog = logHistoPtr[w + floatingOffset];
+                                const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]];
+                                const double refLog = logHistoPtr[r + referenceOffset];
+                                const double warLog = logHistoPtr[w + floatingOffset];
                                 if (gradX == gradX) {
                                     refDeriv[0] += common * gradX * refLog;
                                     warDeriv[0] += common * gradX * warLog;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 1c01ba91..16fbda9f 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -108,8 +108,8 @@ class SafeArray {
     }
 
 private:
-    void operator=(const SafeArray &) {};
-    SafeArray(const SafeArray &) {};
+    void operator=(const SafeArray&) {};
+    SafeArray(const SafeArray&) {};
 
     DataTYPE *data;
 };
@@ -141,7 +141,7 @@ class Multi_Loop {
     }
 
     /// Gets the index or iterator for the specified loop.
-    const T &operator [](int index) const {
+    const T& operator [](int index) const {
         return (current[index]);
     }
 
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 1f41f389..2a130c4d 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -139,7 +139,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
                         const double diff = std::pow(refValue - warValue, 2.0);
 #endif
                         // Jacobian determinant modulation of the ssd if required
-                        const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
+                        const DataType val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
                         ssdLocal += diff * val;
                         n += val;
                     }
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 91a85e3a..f363d8ee 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -446,7 +446,7 @@ template void reg_tools_changeDatatype<double>(nifti_image*, int);
 struct Operation {
     enum class Type { Add, Subtract, Multiply, Divide } type;
     Operation(Type type) : type(type) {}
-    double operator()(const double& lhs, const double& rhs) const {
+    double operator()(const double lhs, const double rhs) const {
         switch (type) {
         case Type::Add:
             return lhs + rhs;
@@ -2564,7 +2564,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
     return newImage;
 }
 /* *************************************************************** */
-void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose) {
+void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) {
 #ifdef NDEBUG
     if (!verbose) return;
 #endif
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index c014e6d1..5064d800 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -426,5 +426,5 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x
 nifti_image* nifti_dup(const nifti_image& image, const bool copyData = true);
 /* *************************************************************** */
 /// @brief Prints the command line
-void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose);
+void PrintCmdLine(const int argc, const char *const *argv, const bool verbose);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index ad6ff06d..9b32dd4d 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -37,7 +37,7 @@ namespace NiftyReg::Cuda {
 /* *************************************************************** */
 namespace Internal {
 /* *************************************************************** */
-inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) {
+inline void SafeCall(const std::string& file, const int line, const std::string& funcName) {
 #if CUDART_VERSION >= 3200
 	const cudaError_t err = cudaPeekAtLastError();
 #else
@@ -47,7 +47,7 @@ inline void SafeCall(const std::string& file, const int& line, const std::string
         NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err));
 }
 /* *************************************************************** */
-inline void CheckKernel(const std::string& file, const int& line, const std::string& funcName, const dim3& grid, const dim3& block) {
+inline void CheckKernel(const std::string& file, const int line, const std::string& funcName, const dim3& grid, const dim3& block) {
 #if CUDART_VERSION >= 3200
 	cudaDeviceSynchronize();
 	const cudaError_t err = cudaPeekAtLastError();
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index f569f1bc..02c83dc8 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -261,7 +261,7 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
 /* *************************************************************** */
 void CudaCompute::DefFieldCompose(const nifti_image *defField) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
-    const size_t& voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
     thrust::device_vector<float4> defFieldCuda(voxelNumber);
     Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
     reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h
index a42360a3..72e42885 100644
--- a/reg-lib/cuda/CudaContentCreatorFactory.h
+++ b/reg-lib/cuda/CudaContentCreatorFactory.h
@@ -9,7 +9,7 @@
 
 class CudaContentCreatorFactory: public ContentCreatorFactory {
 public:
-    virtual ContentCreator* Produce(const ContentType& conType) override {
+    virtual ContentCreator* Produce(const ContentType conType) override {
         switch (conType) {
         case ContentType::Base:
             return new CudaContentCreator();
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index a9b9ece2..ff2037ff 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -50,7 +50,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
         if (!activeTimePoints[t]) continue;
 
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
+            const float intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
             float& densityVal = densityCudaPtr[index];
             bool& nanImageVal = nanImageCudaPtr[index];
             densityVal = intensityVal == intensityVal ? 1.f : 0;
@@ -185,7 +185,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
                         // Increment the current value by performing the weighted sum
                         double intensitySum = 0, densitySum = 0;
                         for (int k = shiftPre; k < shiftPst; k++, kernelIndex++) {
-                            const float& kernelValue = tex1Dfetch<float>(kernelTexture, kernelIndex);
+                            const float kernelValue = tex1Dfetch<float>(kernelTexture, kernelIndex);
                             intensitySum += kernelValue * bufferIntensityPtr[k];
                             densitySum += kernelValue * bufferDensityPtr[k];
                         }
@@ -228,12 +228,12 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
 
         // Normalise per time point
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxelNumber, [=]__device__(const size_t index) {
-            const bool& nanImageVal = tex1Dfetch<char>(nanImageTexture, index);
+            const bool nanImageVal = tex1Dfetch<char>(nanImageTexture, index);
             if (nanImageVal) {
                 reinterpret_cast<float*>(&imageCuda[index])[t] = std::numeric_limits<float>::quiet_NaN();
             } else {
-                const float& intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
-                const float& densityVal = tex1Dfetch<float>(densityTexture, index);
+                const float intensityVal = tex1Dfetch<float>(imageTexture, index * 4 + t);
+                const float densityVal = tex1Dfetch<float>(densityTexture, index);
                 reinterpret_cast<float*>(&imageCuda[index])[t] = intensityVal / densityVal;
             }
         });
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp
index 4cdfbdc8..793aa61a 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasure.cpp
@@ -4,7 +4,7 @@
 #include "_reg_ssd_gpu.h"
 
 /* *************************************************************** */
-reg_measure* CudaMeasure::Create(const MeasureType& measureType) {
+reg_measure* CudaMeasure::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi_gpu();
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h
index 928f4fc4..76f73900 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasure.h
@@ -4,6 +4,6 @@
 
 class CudaMeasure: public Measure {
 public:
-    virtual reg_measure* Create(const MeasureType& measureType) override;
+    virtual reg_measure* Create(const MeasureType measureType) override;
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override;
 };
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index c61ecb13..85a250a5 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -9,7 +9,7 @@ float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) {
     auto imageTexture = *imageTexturePtr;
     thrust::counting_iterator<unsigned> index(0);
     return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) {
-        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        const float4 val = tex1Dfetch<float4>(imageTexture, index);
         return sqrtf((optimiseX ? Square(val.x) : 0) +
                      (optimiseY ? Square(val.y) : 0) +
                      (optimiseZ ? Square(val.z) : 0));
@@ -51,7 +51,7 @@ void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double max
                                                      nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
     auto imageTexture = *imageTexturePtr;
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), nVoxels, [=]__device__(const unsigned index) {
-        const float4& val = tex1Dfetch<float4>(imageTexture, index);
+        const float4 val = tex1Dfetch<float4>(imageTexture, index);
         imageCuda[index] = make_float4(optimiseX ? val.x * maxGradLengthInv : 0,
                                        optimiseY ? val.y * maxGradLengthInv : 0,
                                        optimiseZ ? val.z * maxGradLengthInv : 0,
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index 43783b4d..ee0e4bcf 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -152,7 +152,7 @@ __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dim
     else rem = index;
     const int z = quot;
     reg_div_cuda(rem, dims.x, quot, rem);
-    const int& y = quot, &x = rem;
+    const int y = quot, x = rem;
     return { x, y, z };
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 9328aff8..569136b1 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -121,7 +121,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index
                 int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1;
                 for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) {
                     if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
-                    const float3& controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
+                    const float3 controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
                     secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                     secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
                     secondDerivative.zz = secondDerivative.zz + basis.zz[basInd] * controlPointValue;
@@ -137,7 +137,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index
             int indexXY = b * controlPointImageDim.x + x - 1;
             for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) {
                 if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
-                const float2& controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
+                const float2 controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
                 secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                 secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
                 secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue;
@@ -243,17 +243,17 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
                     int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6;
                     for (int a = x - 1; a < x + 2; a++, basInd++) {
                         if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; }
-                        const float3& secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
-                        const float3& secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
-                        const float3& secondDerivativeZZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeZZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeZZ * basis.zz[basInd];
-                        const float3& secondDerivativeXY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
-                        const float3& secondDerivativeYZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeYZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeYZ * basis.yz[basInd];
-                        const float3& secondDerivativeXZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
+                        const float3 secondDerivativeXZ = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXZ * basis.xz[basInd];
                     }
                 }
@@ -264,11 +264,11 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
                 int indexXY = (b * controlPointImageDim.x + x - 1) * 3;
                 for (int a = x - 1; a < x + 2; a++, basInd++) {
                     if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; }
-                    const float2& secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
-                    const float2& secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd];
-                    const float2& secondDerivativeXY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
+                    const float2 secondDerivativeXY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd];
                 }
             }
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu
index 43708ec5..342864aa 100755
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu
@@ -128,9 +128,9 @@ __device__ float4 GetSlidedValues(int x, int y,
 
     x -= newX;
     y -= newY;
-    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
-                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
-                                             0.f, 0.f);
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
+                                            0.f, 0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
@@ -159,10 +159,10 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
     x -= newX;
     y -= newY;
     z -= newZ;
-    const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
-                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
-                                             x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
-                                             0.f);
+    const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2],
+                                            x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
+                                            x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
+                                            0.f);
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
@@ -207,7 +207,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto&& [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -243,7 +243,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
             int indexXYZ = indexYZ + nodePre.x;
             const float basisY = yBasis[sharedMemIndex + b];
             for (char a = 0; a < 4; a++, indexXYZ++) {
-                const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
+                const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
                 const float xyzBasis = xBasis[a] * basisY * basisZ;
                 displacement.x += xyzBasis * nodeCoeff.x;
                 displacement.y += xyzBasis * nodeCoeff.y;
@@ -288,7 +288,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
         const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto&& [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -313,7 +313,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
         int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
         const float basis = yBasis[sharedMemIndex + b];
         for (char a = 0; a < 4; a++, index++) {
-            const float4& nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
+            const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
             const float xyBasis = xBasis[a] * basis;
             displacement.x += xyBasis * nodeCoeff.x;
             displacement.y += xyBasis * nodeCoeff.y;
@@ -1248,7 +1248,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                                                  const int3& cppDims,
                                                  const Basis1st<is3d>& basis,
                                                  const mat33& reorientation) {
-    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
         (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
@@ -1260,7 +1260,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
                 const int yInd = (zInd + y + b) * cppDims.x;
                 for (int a = -1; a < 2; a++, basInd++) {
                     const int index = yInd + x + a;
-                    const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                    const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                     matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                     matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1282,7 +1282,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
             const int yInd = (y + b) * cppDims.x;
             for (int a = -1; a < 2; a++, basInd++) {
                 const int index = yInd + x + a;
-                const float4& splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
+                const float4 splineCoeff = tex1Dfetch<float4>(controlPointGridTexture, index);
 
                 matrix.m[0][0] += basis.x[basInd] * splineCoeff.x;
                 matrix.m[1][0] += basis.y[basInd] * splineCoeff.x;
@@ -1325,7 +1325,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
                                                              const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (index >= voxelNumber) return;
-    const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
     auto gradVal = transGradient[index];
 
     if constexpr (is3d) {
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index d0c3056d..45a6616d 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -53,7 +53,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
         this->jointHistogramLogBwCudaVecs.resize(this->referenceTimePoints);
         this->jointHistogramProBwCudaVecs.resize(this->referenceTimePoints);
     }
-    for (int i = 0; i < this->referenceTimePoints; ++i) {
+    for (int i = 0; i < this->referenceTimePoints; i++) {
         if (this->timePointWeights[i] > 0) {
             this->jointHistogramLogCudaVecs[i].resize(this->totalBinNumber[i]);
             this->jointHistogramProCudaVecs[i].resize(this->totalBinNumber[i]);
@@ -92,9 +92,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
     for (int t = 0; t < referenceTimePoints; t++) {
         if (timePointWeights[t] <= 0) continue;
         NR_DEBUG("Computing NMI for time point " << t);
-        const auto& curTotalBinNumber = totalBinNumber[t];
-        const auto& curRefBinNumber = referenceBinNumber[t];
-        const auto& curFloBinNumber = floatingBinNumber[t];
+        const auto curTotalBinNumber = totalBinNumber[t];
+        const auto curRefBinNumber = referenceBinNumber[t];
+        const auto curFloBinNumber = floatingBinNumber[t];
         // Define the current histograms
         thrust::fill(thrust::device, jointHistogramLogCudaVecs[t].begin(), jointHistogramLogCudaVecs[t].end(), 0.0);
         thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0);
@@ -116,10 +116,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
                 if (refValue != refValue) return;
                 for (int r = int(refValue - 1); r < int(refValue + 3); r++) {
                     if (0 <= r && r < curRefBinNumber) {
-                        const double& refBasis = GetBasisSplineValue<double>(refValue - r);
-                        for (int w = int(warValue - 1); w < int(warValue + 3); w++) {
+                        const double refBasis = GetBasisSplineValue<double>(refValue - r);
+                        for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) {
                             if (0 <= w && w < curFloBinNumber) {
-                                const double& warBasis = GetBasisSplineValue<double>(warValue - w);
+                                const double warBasis = GetBasisSplineValue<double>(warValue - w);
                                 atomicAdd(&jointHistogramProCuda[r + w * curRefBinNumber], refBasis * warBasis);
                             }
                         }
@@ -170,7 +170,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
             });
         }
         // Normalise the histogram
-        const double& activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus<double>());
+        const double activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus<double>());
         entropyValues[t][3] = activeVoxel;
         thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), curTotalBinNumber, [=]__device__(const unsigned index) {
             jointHistogramProCuda[index] /= activeVoxel;
@@ -194,9 +194,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the entropy of the reference image
         thrust::counting_iterator<unsigned short> it(0);
         entropyValues[t][0] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber, [=]__device__(const unsigned short r) {
-            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r];
+            const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + r] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
@@ -204,9 +204,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the entropy of the warped floating image
         it = thrust::counting_iterator<unsigned short>(0);
         entropyValues[t][1] = thrust::transform_reduce(thrust::device, it, it + curFloBinNumber, [=]__device__(const unsigned short f) {
-            const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f];
+            const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
@@ -214,9 +214,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Compute the joint entropy
         it = thrust::counting_iterator<unsigned short>(0);
         entropyValues[t][2] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber * curFloBinNumber, [=]__device__(const unsigned short index) {
-            const double& valPro = jointHistogramProCuda[index];
+            const double valPro = jointHistogramProCuda[index];
             if (valPro > 0) {
-                const double& valLog = log(valPro);
+                const double valLog = log(valPro);
                 jointHistogramLogCuda[index] = valLog;
                 return -valPro * valLog;
             } else return 0.0;
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu
index a97a2455..45b9f2a0 100755
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/_reg_optimiser_kernels.cu
@@ -62,8 +62,8 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 value = controlPointImageCuda[tid];
-        const float4& bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
-        const float4& gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
+        const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
+        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
         if (optimiseX)
             value.x = bestValue.x + scale * gradValue.x;
         if (optimiseY)
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 8a04ce12..0782a984 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -31,7 +31,7 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
     const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+    const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
     // Get the voxel-based deformation in the floating space
     double2 voxelDeformation;
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 6c2e6c69..bf414396 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -117,8 +117,8 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       const float *localWeightSimCuda,
                                       float4 *ssdGradientCuda,
                                       const int *maskCuda,
-                                      const size_t& activeVoxelNumber,
-                                      const float& timepointWeight) {
+                                      const size_t activeVoxelNumber,
+                                      const float timepointWeight) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 794c3a23..3b0255e7 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -73,7 +73,12 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient,
 
         const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
         const float common = -2.f * (refValue - warValue) * adjustedWeight * val;
-        ssdGradient[index] = ssdGradient[index] + make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f);
+
+        float4 ssdGradientValue = ssdGradient[index];
+        ssdGradientValue.x += common * spaGradientValue.x;
+        ssdGradientValue.y += common * spaGradientValue.y;
+        ssdGradientValue.z += common * spaGradientValue.z;
+        ssdGradient[index] = ssdGradientValue;
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index aa8f8c38..2a4bb2bb 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -92,7 +92,7 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
 /* *************************************************************** */
 void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
-                               const float& sigma,
+                               const float sigma,
                                const bool smoothXYZ[8]) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
@@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float&
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) {
+void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) {
+void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
     const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
     const dim3 gridDims = dim3(grids, grids, 1);
@@ -290,17 +290,17 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_sumReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_maxReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t& size) {
+float reg_minReduction_gpu(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
@@ -328,7 +328,7 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4
 }
 /* *************************************************************** */
 template<bool isMin>
-DEVICE static inline float MinMax(const float& lhs, const float& rhs) {
+DEVICE static inline float MinMax(const float lhs, const float rhs) {
     if constexpr (isMin) return lhs < rhs ? lhs : rhs;
     else return lhs > rhs ? lhs : rhs;
 }
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
index 6d60ea4d..7cbb1e8a 100755
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ b/reg-lib/cuda/_reg_tools_gpu.h
@@ -29,26 +29,26 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ
 /* *************************************************************** */
 void reg_gaussianSmoothing_gpu(const nifti_image *image,
                                float4 *imageCuda,
-                               const float& sigma,
+                               const float sigma,
                                const bool axisToSmooth[8]);
 /* *************************************************************** */
 void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
                                        float4 *imageCuda,
                                        const float *smoothingRadius);
 /* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
+void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
-void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value);
+void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
+void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda);
+void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_sumReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_maxReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t& size);
+float reg_minReduction_gpu(float *arrayCuda, const size_t size);
 /* *************************************************************** */
 void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 8782ded1..2dcf468a 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -51,7 +51,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
                             const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX;
                             float linearWeight = basisX[a] * basisY[b];
                             if constexpr (is3d) linearWeight *= basisZ[c];
-                            const float4& voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
+                            const float4 voxelValue = tex1Dfetch<float4>(voxelImageTexture, index);
                             interpolatedValue[0] += linearWeight * voxelValue.x;
                             interpolatedValue[1] += linearWeight * voxelValue.y;
                             if constexpr (is3d)
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 57555e12..0a97bd01 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -131,10 +131,10 @@ class ConjugateGradientTest: public InterfaceOptimiser {
     void UpdateControlPointPosition(NiftiImage& currentDof,
                                     const NiftiImage& bestDof,
                                     const NiftiImage& gradient,
-                                    const float& scale,
-                                    const bool& optimiseX,
-                                    const bool& optimiseY,
-                                    const bool& optimiseZ) {
+                                    const float scale,
+                                    const bool optimiseX,
+                                    const bool optimiseY,
+                                    const bool optimiseZ) {
         // Update the values for the x-axis displacement
         if (optimiseX) {
             auto currentDofPtr = currentDof.data(0);
@@ -161,7 +161,7 @@ class ConjugateGradientTest: public InterfaceOptimiser {
         }
     }
 
-    void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall, const bool& isSymmetric, NiftiImage *gradientBw) {
+    void UpdateGradientValues(NiftiImage& gradient, const bool firstCall, const bool isSymmetric, NiftiImage *gradientBw) {
         // Create array1 and array2
         static NiftiImage array1, array1Bw;
         static NiftiImage array2, array2Bw;
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index e98dd2e2..528a1642 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -237,7 +237,7 @@ class LnccTest {
                         for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) {
                             int xx = x + i;
                             if (0 <= xx && xx < ref->nx) {
-                                const double& kernelValue = *kernelPtr;
+                                const double kernelValue = *kernelPtr;
                                 const int index = (zz * ref->ny + yy) * ref->nx + xx;
                                 meanRef += kernelValue * static_cast<double>(refPtr[index]);
                                 meanFlo += kernelValue * static_cast<double>(floPtr[index]);
@@ -257,7 +257,7 @@ class LnccTest {
         const float *kernelPtr = kernel.ptr.get();
         const auto refPtr = ref.data();
         const auto floPtr = flo.data();
-        const auto& [meanRef, meanFlo] = means;
+        const auto [meanRef, meanFlo] = means;
         double varRef = 0, varFlo = 0, wdiff = 0, kernelSum = 0;
         for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) {
             int zz = z + k;

From f953b5f9e540e978d3072fb2b06f2f72da63f06f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 17 Nov 2023 09:53:15 +0000
Subject: [PATCH 246/314] Convert reference and floating images to float arrays
 from cudaArrays #92

 - Eliminate unnecessary Cuda::* functions
 - Refactor Cuda::CreateTextureObject()
---
 niftyreg_build_version.txt                   |   2 +-
 reg-apps/reg_benchmark.cpp                   |  20 +-
 reg-lib/cuda/CudaCommon.cu                   | 325 ++++---------------
 reg-lib/cuda/CudaCommon.hpp                  |  62 ++--
 reg-lib/cuda/CudaContent.cpp                 |   8 +-
 reg-lib/cuda/CudaContent.h                   |  16 +-
 reg-lib/cuda/CudaKernelConvolution.cu        |  14 +-
 reg-lib/cuda/CudaNormaliseGradient.cu        |   6 +-
 reg-lib/cuda/_reg_localTransformation_gpu.cu |  60 ++--
 reg-lib/cuda/_reg_measure_gpu.h              |  20 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                 |  94 +++---
 reg-lib/cuda/_reg_nmi_gpu.h                  |   8 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu           |  29 +-
 reg-lib/cuda/_reg_resampling_gpu.cu          |  19 +-
 reg-lib/cuda/_reg_resampling_gpu.h           |   4 +-
 reg-lib/cuda/_reg_resampling_kernels.cu      |  43 ++-
 reg-lib/cuda/_reg_ssd_gpu.cu                 |  62 ++--
 reg-lib/cuda/_reg_ssd_gpu.h                  |   4 +-
 reg-lib/cuda/_reg_ssd_kernels.cu             |  14 +-
 reg-lib/cuda/_reg_tools_gpu.cu               |  15 +-
 reg-lib/cuda/blockMatchingKernel.cu          |   9 +-
 21 files changed, 281 insertions(+), 553 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9c6f0c3e..47531021 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-364
+365
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 52661f88..6a8ebfbe 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -181,18 +181,18 @@ int main(int argc, char **argv)
 
 #ifdef USE_CUDA
    float *targetImageArray_d;
-   cudaArray *sourceImageArray_d;
+   float *sourceImageArray_d;
    int *targetMask_d;
    float4 *deformationFieldImageArray_d;
    if(runGPU)
    {
-      Cuda::Allocate<float>(&targetImageArray_d, targetImage->nvox);
-      Cuda::TransferNiftiToDevice<float>(targetImageArray_d, targetImage);
+      Cuda::Allocate(&targetImageArray_d, targetImage->nvox);
+      Cuda::TransferNiftiToDevice(targetImageArray_d, targetImage);
       Cuda::Allocate<float>(&sourceImageArray_d, sourceImage->nvox);
-      Cuda::TransferNiftiToDevice<float>(sourceImageArray_d,sourceImage);
-      CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int)));
+      Cuda::TransferNiftiToDevice(sourceImageArray_d,sourceImage);
+      CUDA_SAFE_CALL(cudaMalloc((void**)&targetMask_d, targetImage->nvox*sizeof(int)));
       CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice));
-      CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4)));
+      CUDA_SAFE_CALL(cudaMalloc((void**)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4)));
    }
 #endif
 
@@ -277,8 +277,8 @@ int main(int argc, char **argv)
    float4 *controlPointImageArray_d;
    if(runGPU)
    {
-      Cuda::Allocate<float4>(&controlPointImageArray_d, controlPointImage->dim);
-      Cuda::TransferNiftiToDevice<float4>(controlPointImageArray_d,controlPointImage);
+      Cuda::Allocate(&controlPointImageArray_d, controlPointImage->dim);
+      Cuda::TransferNiftiToDevice(controlPointImageArray_d, controlPointImage);
    }
 #endif
    {
@@ -330,8 +330,8 @@ int main(int argc, char **argv)
    float4 *velocityFieldImageArray_d;
    if(runGPU)
    {
-      Cuda::Allocate<float4>(&velocityFieldImageArray_d, velocityFieldImage->dim);
-      Cuda::TransferNiftiToDevice<float4>(velocityFieldImageArray_d,velocityFieldImage);
+      Cuda::Allocate(&velocityFieldImageArray_d, velocityFieldImage->dim);
+      Cuda::TransferNiftiToDevice(velocityFieldImageArray_d, velocityFieldImage);
    }
 #endif
    {
diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu
index 27804dcb..1f56f95e 100644
--- a/reg-lib/cuda/CudaCommon.cu
+++ b/reg-lib/cuda/CudaCommon.cu
@@ -14,37 +14,16 @@
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
-template <class DataType>
-void Allocate(cudaArray **arrayCuda, const int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
-    const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize));
-}
-template void Allocate<float>(cudaArray**, const int*);
-template void Allocate<double>(cudaArray**, const int*);
-template void Allocate<float4>(cudaArray**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-void Allocate(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) {
-    const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3]));
-    const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc<DataType>();
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize));
-    NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize));
-}
-template void Allocate<float>(cudaArray**, cudaArray**, const int*);
-template void Allocate<double>(cudaArray**, cudaArray**, const int*);
-template void Allocate<float4>(cudaArray**, cudaArray**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType>
-void Allocate(DataType **arrayCuda, const size_t& nVoxels) {
+template<class DataType>
+void Allocate(DataType **arrayCuda, const size_t nVoxels) {
     NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType)));
 }
-template void Allocate<int>(int**, const size_t&);
-template void Allocate<float>(float**, const size_t&);
-template void Allocate<double>(double**, const size_t&);
-template void Allocate<float4>(float4**, const size_t&); // for deformation field
+template void Allocate<int>(int**, const size_t);
+template void Allocate<float>(float**, const size_t);
+template void Allocate<double>(double**, const size_t);
+template void Allocate<float4>(float4**, const size_t);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Allocate(DataType **arrayCuda, const int *dim) {
     const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize));
@@ -52,9 +31,9 @@ void Allocate(DataType **arrayCuda, const int *dim) {
 template void Allocate<int>(int**, const int*);
 template void Allocate<float>(float**, const int*);
 template void Allocate<double>(double**, const int*);
-template void Allocate<float4>(float4**, const int*); // for deformation field
+template void Allocate<float4>(float4**, const int*);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) {
     const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType);
     NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize));
@@ -62,167 +41,16 @@ void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) {
 }
 template void Allocate<float>(float**, float**, const int*);
 template void Allocate<double>(double**, double**, const int*);
-template void Allocate<float4>(float4**, float4**, const int*); // for deformation field
-/* *************************************************************** */
-template <class DataType, class NiftiType>
-void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType))
-        NR_FATAL_ERROR("The host and device arrays are of different types");
-    cudaMemcpy3DParms copyParams{};
-    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-    copyParams.srcPtr = make_cudaPitchedPtr(img->data,
-                                            copyParams.extent.width * sizeof(DataType),
-                                            copyParams.extent.width,
-                                            copyParams.extent.height);
-    copyParams.dstArray = arrayCuda;
-    copyParams.kind = cudaMemcpyHostToDevice;
-    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-}
+template void Allocate<float4>(float4**, float4**, const int*);
 /* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) {
-    if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32)
-            NR_FATAL_ERROR("The specified image is not a single precision image");
-        const float *niftiImgValues = static_cast<float*>(img->data);
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-        const auto timePointCount = img->dim[4] * img->dim[5];
-        unique_ptr<float4[]> array(new float4[voxelNumber]());
-        for (size_t i = 0; i < voxelNumber; i++)
-            array[i].x = *niftiImgValues++;
-        if (timePointCount >= 2) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array[i].y = *niftiImgValues++;
-        }
-        if (timePointCount >= 3) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array[i].z = *niftiImgValues++;
-        }
-        if (timePointCount >= 4) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array[i].w = *niftiImgValues++;
-        }
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.srcPtr = make_cudaPitchedPtr(array.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = arrayCuda;
-        copyParams.kind = cudaMemcpyHostToDevice;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
-        switch (img->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            TransferNiftiToDevice<DataType, float>(arrayCuda, img);
-            break;
-        default:
-            NR_FATAL_ERROR("The image data type is not supported");
-        }
-    }
-}
-template void TransferNiftiToDevice<int>(cudaArray*, const nifti_image*);
-template void TransferNiftiToDevice<float>(cudaArray*, const nifti_image*);
-template void TransferNiftiToDevice<double>(cudaArray*, const nifti_image*);
-template void TransferNiftiToDevice<float4>(cudaArray*, const nifti_image*); // for deformation field
-/* *************************************************************** */
-template <class DataType, class NiftiType>
-void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
-    if (sizeof(DataType) != sizeof(NiftiType))
-        NR_FATAL_ERROR("The host and device arrays are of different types");
-    NiftiType *array1 = static_cast<NiftiType*>(img->data);
-    NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)];
-    cudaMemcpy3DParms copyParams{};
-    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-    copyParams.kind = cudaMemcpyHostToDevice;
-    // First timepoint
-    copyParams.srcPtr = make_cudaPitchedPtr(array1,
-                                            copyParams.extent.width * sizeof(DataType),
-                                            copyParams.extent.width,
-                                            copyParams.extent.height);
-    copyParams.dstArray = array1Cuda;
-    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    // Second timepoint
-    copyParams.srcPtr = make_cudaPitchedPtr(array2,
-                                            copyParams.extent.width * sizeof(DataType),
-                                            copyParams.extent.width,
-                                            copyParams.extent.height);
-    copyParams.dstArray = array2Cuda;
-    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-}
-/* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) {
-    if (sizeof(DataType) == sizeof(float4)) {
-        if (img->datatype != NIFTI_TYPE_FLOAT32)
-            NR_FATAL_ERROR("The specified image is not a single precision image");
-        const float *niftiImgValues = static_cast<float*>(img->data);
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
-        const auto timePointCount = img->dim[4] * img->dim[5];
-        unique_ptr<float4[]> array1(new float4[voxelNumber]());
-        unique_ptr<float4[]> array2(new float4[voxelNumber]());
-        for (size_t i = 0; i < voxelNumber; i++)
-            array1[i].x = *niftiImgValues++;
-        for (size_t i = 0; i < voxelNumber; i++)
-            array2[i].x = *niftiImgValues++;
-        if (timePointCount >= 2) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array1[i].y = *niftiImgValues++;
-            for (size_t i = 0; i < voxelNumber; i++)
-                array2[i].y = *niftiImgValues++;
-        }
-        if (timePointCount >= 3) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array1[i].z = *niftiImgValues++;
-            for (size_t i = 0; i < voxelNumber; i++)
-                array2[i].z = *niftiImgValues++;
-        }
-        if (timePointCount >= 4) {
-            for (size_t i = 0; i < voxelNumber; i++)
-                array1[i].w = *niftiImgValues++;
-            for (size_t i = 0; i < voxelNumber; i++)
-                array2[i].w = *niftiImgValues++;
-        }
-
-        cudaMemcpy3DParms copyParams{};
-        copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-        copyParams.kind = cudaMemcpyHostToDevice;
-        // First timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array1.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array1Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-        // Second timepoint
-        copyParams.srcPtr = make_cudaPitchedPtr(array2.get(),
-                                                copyParams.extent.width * sizeof(DataType),
-                                                copyParams.extent.width,
-                                                copyParams.extent.height);
-        copyParams.dstArray = array2Cuda;
-        NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
-        switch (img->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            TransferNiftiToDevice<DataType, float>(array1Cuda, array2Cuda, img);
-            break;
-        default:
-            NR_FATAL_ERROR("The image data type is not supported");
-        }
-    }
-}
-template void TransferNiftiToDevice<float>(cudaArray*, cudaArray*, const nifti_image*);
-template void TransferNiftiToDevice<double>(cudaArray*, cudaArray*, const nifti_image*);
-template void TransferNiftiToDevice<float4>(cudaArray*, cudaArray*, const nifti_image*); // for deformation field
-/* *************************************************************** */
-template <class DataType, class NiftiType>
+template<class DataType, class NiftiType>
 void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
     NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(NiftiType), cudaMemcpyHostToDevice));
 }
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32)
@@ -246,7 +74,7 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) {
                 array[i].w = *niftiImgValues++;
         }
         NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+    } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
             TransferNiftiToDevice<DataType, float>(arrayCuda, img);
@@ -261,7 +89,7 @@ template void TransferNiftiToDevice<float>(float*, const nifti_image*);
 template void TransferNiftiToDevice<double>(double*, const nifti_image*);
 template void TransferNiftiToDevice<float4>(float4*, const nifti_image*);
 /* *************************************************************** */
-template <class DataType, class NiftiType>
+template<class DataType, class NiftiType>
 void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
@@ -273,7 +101,7 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif
     NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice));
 }
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) {
     if (sizeof(DataType) == sizeof(float4)) {
         if (img->datatype != NIFTI_TYPE_FLOAT32)
@@ -307,7 +135,7 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif
         }
         NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
         NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice));
-    } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement
+    } else {
         switch (img->datatype) {
         case NIFTI_TYPE_FLOAT32:
             TransferNiftiToDevice<DataType, float>(array1Cuda, array2Cuda, img);
@@ -319,38 +147,24 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif
 }
 template void TransferNiftiToDevice<float>(float*, float*, const nifti_image*);
 template void TransferNiftiToDevice<double>(double*, double*, const nifti_image*);
-template void TransferNiftiToDevice<float4>(float4*, float4*, const nifti_image*); // for deformation field
+template void TransferNiftiToDevice<float4>(float4*, float4*, const nifti_image*);
 /* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t& nvox) {
+template<class DataType>
+void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t nvox) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice));
 }
-template void TransferNiftiToDevice<int>(int*, const int*, const size_t&);
-template void TransferNiftiToDevice<float>(float*, const float*, const size_t&);
-template void TransferNiftiToDevice<double>(double*, const double*, const size_t&);
+template void TransferNiftiToDevice<int>(int*, const int*, const size_t);
+template void TransferNiftiToDevice<float>(float*, const float*, const size_t);
+template void TransferNiftiToDevice<double>(double*, const double*, const size_t);
 /* *************************************************************** */
-void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) {
-    if (img->datatype != NIFTI_TYPE_FLOAT32)
-        NR_FATAL_ERROR("The image data type is not supported");
-    cudaMemcpy3DParms copyParams{};
-    copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3]));
-    copyParams.srcArray = const_cast<cudaArray*>(arrayCuda);
-    copyParams.dstPtr = make_cudaPitchedPtr(img->data,
-                                            copyParams.extent.width * sizeof(float),
-                                            copyParams.extent.width,
-                                            copyParams.extent.height);
-    copyParams.kind = cudaMemcpyDeviceToHost;
-    NR_CUDA_SAFE_CALL(cudaMemcpy3D(&copyParams));
-}
-/* *************************************************************** */
-template <class DataType, class NiftiType>
+template<class DataType, class NiftiType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
     NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
@@ -387,9 +201,9 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) {
 }
 template void TransferFromDeviceToNifti<float>(nifti_image*, const float*);
 template void TransferFromDeviceToNifti<double>(nifti_image*, const double*);
-template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*); // for deformation field
+template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*);
 /* *************************************************************** */
-template <class DataType, class NiftiType>
+template<class DataType, class NiftiType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) != sizeof(NiftiType))
         NR_FATAL_ERROR("The host and device arrays are of different types");
@@ -400,7 +214,7 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con
     NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) {
     if (sizeof(DataType) == sizeof(float4)) {
         // A nifti 5D volume is expected
@@ -447,29 +261,24 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con
 }
 template void TransferFromDeviceToNifti<float>(nifti_image*, const float*, const float*);
 template void TransferFromDeviceToNifti<double>(nifti_image*, const double*, const double*);
-template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*); // for deformation field
+template void TransferFromDeviceToNifti<float4>(nifti_image*, const float4*, const float4*);
 /* *************************************************************** */
-template <class DataType>
-void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) {
+template<class DataType>
+void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t nElements) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost));
 }
-template void TransferFromDeviceToHost<float>(float*, const float*, const size_t&);
-template void TransferFromDeviceToHost<double>(double*, const double*, const size_t&);
+template void TransferFromDeviceToHost<float>(float*, const float*, const size_t);
+template void TransferFromDeviceToHost<double>(double*, const double*, const size_t);
 /* *************************************************************** */
-template <class DataType>
-void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t& nElements) {
+template<class DataType>
+void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t nElements) {
     NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array, nElements * sizeof(DataType), cudaMemcpyHostToDevice));
 }
-template void TransferFromHostToDevice<int>(int*, const int*, const size_t&);
-template void TransferFromHostToDevice<float>(float*, const float*, const size_t&);
-template void TransferFromHostToDevice<double>(double*, const double*, const size_t&);
+template void TransferFromHostToDevice<int>(int*, const int*, const size_t);
+template void TransferFromHostToDevice<float>(float*, const float*, const size_t);
+template void TransferFromHostToDevice<double>(double*, const double*, const size_t);
 /* *************************************************************** */
-void Free(cudaArray *arrayCuda) {
-    if (arrayCuda != nullptr)
-        NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda));
-}
-/* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Free(DataType *arrayCuda) {
     if (arrayCuda != nullptr)
         NR_CUDA_SAFE_CALL(cudaFree(arrayCuda));
@@ -479,56 +288,52 @@ template void Free<float>(float*);
 template void Free<double>(double*);
 template void Free<float4>(float4*);
 /* *************************************************************** */
-void DestroyTextureObject(cudaTextureObject_t *texObj) {
+template<>
+void Free(cudaTextureObject_t *texObj) {
     NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj));
     delete texObj;
 }
 /* *************************************************************** */
-UniqueTextureObjectPtr CreateTextureObject(const void *devPtr,
-                                           const cudaResourceType& resType,
-                                           const size_t& size,
-                                           const cudaChannelFormatKind& channelFormat,
-                                           const unsigned& channelCount,
-                                           const cudaTextureFilterMode& filterMode,
-                                           const bool& normalizedCoordinates) {
+template<class DataType>
+UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr,
+                                           const size_t count,
+                                           const cudaChannelFormatKind channelFormat,
+                                           const unsigned channelCount) {
     // Specify texture
     cudaResourceDesc resDesc{};
-    resDesc.resType = resType;
-    switch (resType) {
-    case cudaResourceTypeLinear:
-        resDesc.res.linear.devPtr = const_cast<void*>(devPtr);
-        resDesc.res.linear.desc.f = channelFormat;
-        resDesc.res.linear.desc.x = 32;
-        if (channelCount > 1)
-            resDesc.res.linear.desc.y = 32;
-        if (channelCount > 2)
-            resDesc.res.linear.desc.z = 32;
-        if (channelCount > 3)
-            resDesc.res.linear.desc.w = 32;
-        resDesc.res.linear.sizeInBytes = size;
-        break;
-    case cudaResourceTypeArray:
-        resDesc.res.array.array = static_cast<cudaArray*>(const_cast<void*>(devPtr));
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported resource type");
-    }
+    resDesc.resType = cudaResourceTypeLinear;
+    resDesc.res.linear.devPtr = const_cast<DataType*>(devPtr);
+    resDesc.res.linear.desc.f = channelFormat;
+    resDesc.res.linear.desc.x = 32;
+    if (channelCount > 1)
+        resDesc.res.linear.desc.y = 32;
+    if (channelCount > 2)
+        resDesc.res.linear.desc.z = 32;
+    if (channelCount > 3)
+        resDesc.res.linear.desc.w = 32;
+    resDesc.res.linear.sizeInBytes = count * sizeof(DataType);
 
     // Specify texture object parameters
     cudaTextureDesc texDesc{};
     texDesc.addressMode[0] = cudaAddressModeWrap;
     texDesc.addressMode[1] = cudaAddressModeWrap;
     texDesc.addressMode[2] = cudaAddressModeWrap;
-    texDesc.filterMode = filterMode;
+    texDesc.filterMode = cudaFilterModePoint;
     texDesc.readMode = cudaReadModeElementType;
-    texDesc.normalizedCoords = normalizedCoordinates;
+    texDesc.normalizedCoords = false;
 
     // Create texture object
-    UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), DestroyTextureObject);
+    UniqueTextureObjectPtr texObj(new cudaTextureObject_t());
     NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr));
 
     return texObj;
 }
+template UniqueTextureObjectPtr CreateTextureObject<bool>(const bool*, const size_t, const cudaChannelFormatKind, const unsigned);
+template UniqueTextureObjectPtr CreateTextureObject<int>(const int*, const size_t, const cudaChannelFormatKind, const unsigned);
+template UniqueTextureObjectPtr CreateTextureObject<float>(const float*, const size_t, const cudaChannelFormatKind, const unsigned);
+template UniqueTextureObjectPtr CreateTextureObject<float2>(const float2*, const size_t, const cudaChannelFormatKind, const unsigned);
+template UniqueTextureObjectPtr CreateTextureObject<float4>(const float4*, const size_t, const cudaChannelFormatKind, const unsigned);
+template UniqueTextureObjectPtr CreateTextureObject<mat33>(const mat33*, const size_t, const cudaChannelFormatKind, const unsigned);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index 9b32dd4d..b5872e56 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -69,53 +69,37 @@ inline void CheckKernel(const std::string& file, const int line, const std::stri
 #define NR_CUDA_SAFE_CALL(call)             { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); }
 #define NR_CUDA_CHECK_KERNEL(grid, block)   NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block)
 /* *************************************************************** */
-template <class DataType>
-void Allocate(cudaArray**, const int*);
+template<class DataType>
+void Allocate(DataType**, const size_t);
 /* *************************************************************** */
-template <class DataType>
-void Allocate(cudaArray**, cudaArray**, const int*);
-/* *************************************************************** */
-template <class DataType>
-void Allocate(DataType**, const size_t&);
-/* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Allocate(DataType**, const int*);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Allocate(DataType**, DataType**, const int*);
 /* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(cudaArray*, const nifti_image*);
-/* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*);
-/* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferNiftiToDevice(DataType*, const nifti_image*);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*);
 /* *************************************************************** */
-template <class DataType>
-void TransferNiftiToDevice(DataType*, const DataType*, const size_t&);
+template<class DataType>
+void TransferNiftiToDevice(DataType*, const DataType*, const size_t);
 /* *************************************************************** */
-void TransferFromDeviceToNifti(nifti_image*, const cudaArray*);
-/* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferFromDeviceToNifti(nifti_image*, const DataType*);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*);
 /* *************************************************************** */
-template <class DataType>
-void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&);
-/* *************************************************************** */
-template <class DataType>
-void TransferFromHostToDevice(DataType*, const DataType*, const size_t&);
+template<class DataType>
+void TransferFromDeviceToHost(DataType*, const DataType*, const size_t);
 /* *************************************************************** */
-void Free(cudaArray*);
+template<class DataType>
+void TransferFromHostToDevice(DataType*, const DataType*, const size_t);
 /* *************************************************************** */
-template <class DataType>
+template<class DataType>
 void Free(DataType*);
 /* *************************************************************** */
 namespace Internal {
@@ -123,18 +107,16 @@ template <class T>
 struct UniquePtrDeleter { void operator()(T *ptr) const { Free(ptr); } };
 }
 /* *************************************************************** */
-template <class T>
+template<class T>
 using UniquePtr = unique_ptr<T, Internal::UniquePtrDeleter<T>>;
 /* *************************************************************** */
-using UniqueTextureObjectPtr = unique_ptr<cudaTextureObject_t, void(*)(cudaTextureObject_t*)>;
+using UniqueTextureObjectPtr = UniquePtr<cudaTextureObject_t>;
 /* *************************************************************** */
-UniqueTextureObjectPtr CreateTextureObject(const void *devPtr,
-                                           const cudaResourceType& resType,
-                                           const size_t& size = 0,
-                                           const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone,
-                                           const unsigned& channelCount = 1,
-                                           const cudaTextureFilterMode& filterMode = cudaFilterModePoint,
-                                           const bool& normalizedCoordinates = false);
+template<class DataType>
+UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr,
+                                           const size_t count,
+                                           const cudaChannelFormatKind channelFormat,
+                                           const unsigned channelCount);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 37df05ab..f26f8c69 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -25,17 +25,17 @@ CudaContent::~CudaContent() {
 void CudaContent::AllocateReference() {
     if (reference->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(reference);
-    Cuda::Allocate<float>(&referenceCuda, reference->dim);
+    Cuda::Allocate(&referenceCuda, reference->nvox);
     referenceCudaManaged.reset(referenceCuda);
-    Cuda::TransferNiftiToDevice<float>(referenceCuda, reference);
+    Cuda::TransferNiftiToDevice(referenceCuda, reference);
 }
 /* *************************************************************** */
 void CudaContent::AllocateFloating() {
     if (floating->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(floating);
-    Cuda::Allocate<float>(&floatingCuda, floating->dim);
+    Cuda::Allocate(&floatingCuda, floating->nvox);
     floatingCudaManaged.reset(floatingCuda);
-    Cuda::TransferNiftiToDevice<float>(floatingCuda, floating);
+    Cuda::TransferNiftiToDevice(floatingCuda, floating);
 }
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index f308ec1b..bf3230c4 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -18,8 +18,8 @@ class CudaContent: public virtual Content {
     // Getters
     virtual nifti_image* GetDeformationField() override;
     virtual nifti_image* GetWarped() override;
-    virtual cudaArray* GetReferenceCuda() { return referenceCuda; }
-    virtual cudaArray* GetFloatingCuda() { return floatingCuda; }
+    virtual float* GetReferenceCuda() { return referenceCuda; }
+    virtual float* GetFloatingCuda() { return floatingCuda; }
     virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
     virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; }
     virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; }
@@ -30,10 +30,10 @@ class CudaContent: public virtual Content {
     virtual void UpdateWarped() override;
 
 protected:
-    cudaArray *referenceCuda = nullptr;
-    Cuda::UniquePtr<cudaArray> referenceCudaManaged;
-    cudaArray *floatingCuda = nullptr;
-    Cuda::UniquePtr<cudaArray> floatingCudaManaged;
+    float *referenceCuda = nullptr;
+    Cuda::UniquePtr<float> referenceCudaManaged;
+    float *floatingCuda = nullptr;
+    Cuda::UniquePtr<float> floatingCudaManaged;
     float4 *deformationFieldCuda = nullptr;
     int *referenceMaskCuda = nullptr;
     float *transformationMatrixCuda = nullptr;
@@ -49,8 +49,8 @@ class CudaContent: public virtual Content {
     template<class DataType> DataType CastImageData(float intensity, int datatype);
     template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
     void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
-    void SetReferenceCuda(cudaArray *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; }
-    void SetFloatingCuda(cudaArray *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; }
+    void SetReferenceCuda(float *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; }
+    void SetFloatingCuda(float *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; }
 
     // Friend classes
     friend class CudaF3d2ContentCreator;
diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu
index ff2037ff..67a081ed 100644
--- a/reg-lib/cuda/CudaKernelConvolution.cu
+++ b/reg-lib/cuda/CudaKernelConvolution.cu
@@ -36,12 +36,9 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
     float *bufferDensityCudaPtr = bufferDensityCuda.data().get();
 
     // Create texture objects
-    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                     voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 1);
-    auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, cudaResourceTypeLinear,
-                                                       voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
-    auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, cudaResourceTypeLinear,
-                                                        voxelNumber * sizeof(bool), cudaChannelFormatKindUnsigned, 1);
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, voxelNumber, cudaChannelFormatKindUnsigned, 1);
     auto imageTexture = *imageTexturePtr;
     auto densityTexture = *densityTexturePtr;
     auto nanImageTexture = *nanImageTexturePtr;
@@ -138,12 +135,11 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image,
             const int imageDim = reinterpret_cast<const int*>(&imageDims)[n];
             // Create the kernel texture
             thrust::device_vector<float> kernelCuda;
-            Cuda::UniqueTextureObjectPtr kernelTexturePtr(nullptr, nullptr);
+            Cuda::UniqueTextureObjectPtr kernelTexturePtr;
             cudaTextureObject_t kernelTexture = 0;
             if (kernelSum > 0) {
                 kernelCuda = kernel;
-                kernelTexturePtr = std::move(Cuda::CreateTextureObject(kernelCuda.data().get(), cudaResourceTypeLinear,
-                                                                       kernel.size() * sizeof(float), cudaChannelFormatKindFloat, 1));
+                kernelTexturePtr = Cuda::CreateTextureObject(kernelCuda.data().get(), kernel.size(), cudaChannelFormatKindFloat, 1);
                 kernelTexture = *kernelTexturePtr;
             }
 
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 85a250a5..8d948c2e 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -4,8 +4,7 @@
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>
 float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) {
-    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                     nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
     auto imageTexture = *imageTexturePtr;
     thrust::counting_iterator<unsigned> index(0);
     return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) {
@@ -47,8 +46,7 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda,
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>
 void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double maxGradLengthInv) {
-    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                     nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
     auto imageTexture = *imageTexturePtr;
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), nVoxels, [=]__device__(const unsigned index) {
         const float4 val = tex1Dfetch<float4>(imageTexture, index);
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu
index 569136b1..ac5be2b0 100755
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu
@@ -31,10 +31,8 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
                                                         controlPointImage->dy / referenceImage->dy,
                                                         controlPointImage->dz / referenceImage->dz);
 
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear,
-                                                 activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
 
     // Get the reference matrix if composition is required
     thrust::device_vector<mat44> realToVoxel;
@@ -151,8 +149,7 @@ template<bool is3d>
 double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                            controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
     auto controlPointTexture = *controlPointTexturePtr;
 
     // Get the constant basis values
@@ -188,8 +185,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
     auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                            controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
     auto controlPointTexture = *controlPointTexturePtr;
 
     // Get the constant basis values
@@ -223,9 +219,8 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
         }
     });
 
-    auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, cudaResourceTypeLinear,
-                                                                 secondDerivativesCudaVec.size() * sizeof(typename SecondDerivative<is3d>::TextureType),
-                                                                 cudaChannelFormatKindFloat, sizeof(typename SecondDerivative<is3d>::TextureType) / sizeof(float));
+    auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, secondDerivativesCudaVec.size(), cudaChannelFormatKindFloat,
+                                                                 sizeof(typename SecondDerivative<is3d>::TextureType) / sizeof(float));
     auto secondDerivativesTexture = *secondDerivativesTexturePtr;
 
     // Compute the gradient
@@ -293,8 +288,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage
     auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
     const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
@@ -330,8 +324,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear,
-                                                         controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
     const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
@@ -434,10 +427,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
     const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
                                       referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
                                       referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz));
-    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float),
-                                                                cudaChannelFormatKindFloat, 1);
-    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear,
-                                                             (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float),
+    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber,
                                                              cudaChannelFormatKindFloat, 1);
     if (approx) {
         if (controlPointImage->nz > 1) {
@@ -498,22 +489,20 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
 
     // The Jacobian matrices and determinants are computed
     float *jacobianMatricesCuda, *jacobianDetCuda;
-    size_t jacobianDetSize, jacobianMatricesSize;
-    size_t jacNumber; double jacSum;
+    size_t jacobianDetSize, jacNumber;
+    double jacSum;
     if (approx) {
         jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
         jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2);
         jacobianDetSize = jacNumber * sizeof(float);
-        jacobianMatricesSize = 9 * jacobianDetSize;
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
         reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
         jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
         jacSum = static_cast<double>(jacNumber);
         jacobianDetSize = jacNumber * sizeof(float);
-        jacobianMatricesSize = 9 * jacobianDetSize;
-        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize));
+        NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
         reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
@@ -548,10 +537,8 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
-    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize,
-                                                                cudaChannelFormatKindFloat, 1);
-    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize,
-                                                             cudaChannelFormatKindFloat, 1);
+    auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1);
+    auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1);
     if (approx) {
         const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
@@ -676,8 +663,7 @@ void reg_defField_compose_gpu(const nifti_image *deformationField,
     const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz };
     const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
     const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                             voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
 
     if (deformationField->nz > 1) {
         const unsigned blocks = blockSize->reg_defField_compose3D;
@@ -835,8 +821,7 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
     const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz);
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                             voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
@@ -864,8 +849,7 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
         set_first_order_basis_values(basis.x, basis.y);
 
     // Create the control point texture
-    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear,
-                                                            voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
     auto controlPointTexture = *controlPointTexturePtr;
 
     constexpr int matSize = is3d ? 3 : 2;
@@ -912,10 +896,8 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
     thrust::device_vector<mat33> dispMatricesCuda(voxelNumber);
 
     // Create the textures
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear,
-                                                         voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear,
-                                                         voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1);
+    auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1);
 
     // Create the displacement matrices
     reg_spline_createDisplacementMatrices_kernel<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index e2c4e836..8d753747 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -22,9 +22,9 @@ class reg_measure_gpu {
     virtual ~reg_measure_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
@@ -75,8 +75,8 @@ class reg_measure_gpu {
     }
 
 protected:
-    cudaArray *referenceImageCuda;
-    cudaArray *floatingImageCuda;
+    float *referenceImageCuda;
+    float *floatingImageCuda;
     int *referenceMaskCuda;
     size_t activeVoxelNumber;
     float *warpedImageCuda;
@@ -100,9 +100,9 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
     virtual ~reg_lncc_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
@@ -142,9 +142,9 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu {
     virtual ~reg_kld_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
@@ -184,9 +184,9 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu {
     virtual ~reg_dti_gpu() {}
 
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 45a6616d..1758eda5 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -22,8 +22,8 @@ reg_nmi_gpu::~reg_nmi_gpu() {
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
-                                    nifti_image *floImg, cudaArray *floImgCuda,
+void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda,
+                                    nifti_image *floImg, float *floImgCuda,
                                     int *refMask, int *refMaskCuda,
                                     size_t activeVoxNum,
                                     nifti_image *warpedImg, float *warpedImgCuda,
@@ -44,8 +44,8 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
     if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1)
         NR_FATAL_ERROR("Multiple time points are not yet supported");
     // The reference and floating images have to be updated on the device
-    Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
-    Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
+    Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage);
+    Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage);
     // Create the joint histograms
     this->jointHistogramLogCudaVecs.resize(this->referenceTimePoints);
     this->jointHistogramProCudaVecs.resize(this->referenceTimePoints);
@@ -67,7 +67,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
 }
 /* *************************************************************** */
 void reg_getNmiValue_gpu(const nifti_image *referenceImage,
-                         const cudaArray *referenceImageCuda,
+                         const float *referenceImageCuda,
                          const float *warpedImageCuda,
                          const double *timePointWeights,
                          const int referenceTimePoints,
@@ -82,10 +82,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
                          const bool approximation) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                    cudaChannelFormatKindSigned, 1);
-    auto referenceImageTexture = *referenceImageTexturePtr;
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
     auto maskTexture = *maskTexturePtr;
 
     // Iterate over all active time points
@@ -100,21 +97,21 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0);
         double *jointHistogramLogCuda = jointHistogramLogCudaVecs[t].data().get();
         double *jointHistogramProCuda = jointHistogramProCudaVecs[t].data().get();
-        // Define warped image texture
-        auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, cudaResourceTypeLinear,
-                                                               voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
+        // Define the current textures
+        auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto referenceImageTexture = *referenceImageTexturePtr;
         auto warpedImageTexture = *warpedImageTexturePtr;
         // Fill the joint histograms
         if (approximation == false) {
             // No approximation is used for the Parzen windowing
             thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-                const int& voxel = tex1Dfetch<int>(maskTexture, index);
-                const float& warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
-                if (warValue != warValue) return;
-                auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims);
-                const float& refValue = tex3D<float>(referenceImageTexture, x, y, z);
+                const int voxel = tex1Dfetch<int>(maskTexture, index);
+                const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
                 if (refValue != refValue) return;
-                for (int r = int(refValue - 1); r < int(refValue + 3); r++) {
+                const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                if (warValue != warValue) return;
+                for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) {
                     if (0 <= r && r < curRefBinNumber) {
                         const double refBasis = GetBasisSplineValue<double>(refValue - r);
                         for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) {
@@ -130,12 +127,11 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
             // An approximation is used for the Parzen windowing. First intensities are binarised then
             // the histogram is convolved with a spine kernel function.
             thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-                const int& voxel = tex1Dfetch<int>(maskTexture, index);
-                const float& warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
-                if (warValue != warValue) return;
-                auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims);
-                const float& refValue = tex3D<float>(referenceImageTexture, x, y, z);
+                const int voxel = tex1Dfetch<int>(maskTexture, index);
+                const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
                 if (refValue != refValue) return;
+                const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                if (warValue != warValue) return;
                 if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber)
                     atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0);
             });
@@ -225,7 +221,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 static double GetSimilarityMeasureValue(const nifti_image *referenceImage,
-                                        const cudaArray *referenceImageCuda,
+                                        const float *referenceImageCuda,
                                         const nifti_image *warpedImage,
                                         const float *warpedImageCuda,
                                         const double *timePointWeights,
@@ -304,7 +300,7 @@ template<> struct Derivative<false> { using Type = double2; };
 /// Called when we only have one target and one source image
 template<bool is3d>
 void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
-                                      const cudaArray *referenceImageCuda,
+                                      const float *referenceImageCuda,
                                       const float *warpedImageCuda,
                                       const float4 *warpedGradientCuda,
                                       const double *jointHistogramLogCuda,
@@ -324,14 +320,10 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
     const int referenceOffset = refBinNumber * floBinNumber;
     const int floatingOffset = referenceOffset + refBinNumber;
 
-    auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0,
-                                                              cudaChannelFormatKindNone, 1, cudaFilterModePoint, true);
-    auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, cudaResourceTypeLinear,
-                                                           voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1);
-    auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
-                                                              cudaChannelFormatKindFloat, 4);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                    cudaChannelFormatKindSigned, 1);
+    auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
     auto referenceImageTexture = *referenceImageTexturePtr;
     auto warpedImageTexture = *warpedImageTexturePtr;
     auto warpedGradientTexture = *warpedGradientTexturePtr;
@@ -339,45 +331,40 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
 
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
         const int targetIndex = tex1Dfetch<int>(maskTexture, index);
-        const float warpedImageValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
-        if (warpedImageValue != warpedImageValue) return;
-        const auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(targetIndex, imageSize);
-        const float referenceImageValue = tex3D<float>(referenceImageTexture,
-                                                       (float(x) + 0.5f) / float(imageSize.x),
-                                                       (float(y) + 0.5f) / float(imageSize.y),
-                                                       is3d ? (float(z) + 0.5f) / float(imageSize.z) : 0.5f);
-        if (referenceImageValue != referenceImageValue) return;
-        const float4& warpedGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);
-        float4 gradValue = voxelBasedGradientCuda[targetIndex];
+        const float refValue = tex1Dfetch<float>(referenceImageTexture, targetIndex);
+        if (refValue != refValue) return;
+        const float warValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
+        if (warValue != warValue) return;
+        const float4 warGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);
 
         // No computation is performed if any of the point is part of the background
         // The two is added because the image is resample between 2 and bin+2
         // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65
         typename Derivative<is3d>::Type jointDeriv{}, refDeriv{}, warDeriv{};
-        for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) {
+        for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) {
             if (-1 < r && r < refBinNumber) {
-                for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) {
+                for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) {
                     if (-1 < w && w < floBinNumber) {
-                        const double commonValue = (GetBasisSplineValue<double>(referenceImageValue - r) *
-                                                    GetBasisSplineDerivativeValue<double>(warpedImageValue - w));
+                        const double commonValue = (GetBasisSplineValue<double>(refValue - r) *
+                                                    GetBasisSplineDerivativeValue<double>(warValue - w));
                         const double jointLog = jointHistogramLogCuda[r + w * refBinNumber];
                         const double refLog = jointHistogramLogCuda[r + referenceOffset];
                         const double warLog = jointHistogramLogCuda[w + floatingOffset];
-                        if (warpedGradValue.x == warpedGradValue.x) {
-                            const double commonMultGrad = commonValue * warpedGradValue.x;
+                        if (warGradValue.x == warGradValue.x) {
+                            const double commonMultGrad = commonValue * warGradValue.x;
                             jointDeriv.x += commonMultGrad * jointLog;
                             refDeriv.x += commonMultGrad * refLog;
                             warDeriv.x += commonMultGrad * warLog;
                         }
-                        if (warpedGradValue.y == warpedGradValue.y) {
-                            const double commonMultGrad = commonValue * warpedGradValue.y;
+                        if (warGradValue.y == warGradValue.y) {
+                            const double commonMultGrad = commonValue * warGradValue.y;
                             jointDeriv.y += commonMultGrad * jointLog;
                             refDeriv.y += commonMultGrad * refLog;
                             warDeriv.y += commonMultGrad * warLog;
                         }
                         if constexpr (is3d) {
-                            if (warpedGradValue.z == warpedGradValue.z) {
-                                const double commonMultGrad = commonValue * warpedGradValue.z;
+                            if (warGradValue.z == warGradValue.z) {
+                                const double commonMultGrad = commonValue * warGradValue.z;
                                 jointDeriv.z += commonMultGrad * jointLog;
                                 refDeriv.z += commonMultGrad * refLog;
                                 warDeriv.z += commonMultGrad * warLog;
@@ -389,6 +376,7 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
         }
 
         // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
+        float4 gradValue = voxelBasedGradientCuda[targetIndex];
         gradValue.x += static_cast<float>(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE);
         gradValue.y += static_cast<float>(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE);
         if constexpr (is3d)
diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h
index c3f33d4c..3af164a9 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.h
+++ b/reg-lib/cuda/_reg_nmi_gpu.h
@@ -26,9 +26,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 
     /// @brief Initialise the reg_nmi_gpu object
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
@@ -68,9 +68,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu {
 class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu {
 public:
     void InitialiseMeasure(nifti_image *refImg,
-                           cudaArray *refImgCuda,
+                           float *refImgCuda,
                            nifti_image *floImg,
-                           cudaArray *floImgCuda,
+                           float *floImgCuda,
                            int *refMask,
                            int *refMaskCuda,
                            size_t activeVoxNum,
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 474ff131..28b187b6 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -172,8 +172,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
                                          float4 *conjugateGCuda,
                                          float4 *conjugateHCuda,
                                          const size_t nVoxels) {
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
@@ -200,20 +199,14 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
                                   float4 *conjugateGBwCuda,
                                   float4 *conjugateHBwCuda,
                                   const size_t nVoxelsBw) {
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear,
-                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, cudaResourceTypeLinear,
-                                                       nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    Cuda::UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    Cuda::UniqueTextureObjectPtr gradientImageBwTexture, conjugateGBwTexture, conjugateHBwTexture;
     if (isSymmetric) {
-        gradientImageBwTexture = std::move(Cuda::CreateTextureObject(gradientImageBwCuda, cudaResourceTypeLinear,
-                                                                     nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
-        conjugateGBwTexture = std::move(Cuda::CreateTextureObject(conjugateGBwCuda, cudaResourceTypeLinear,
-                                                                  nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
-        conjugateHBwTexture = std::move(Cuda::CreateTextureObject(conjugateHBwCuda, cudaResourceTypeLinear,
-                                                                  nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4));
+        gradientImageBwTexture = Cuda::CreateTextureObject(gradientImageBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        conjugateGBwTexture = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        conjugateHBwTexture = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
     }
 
     // gam = sum((grad+g)*grad)/sum(HxG);
@@ -267,10 +260,8 @@ void reg_updateControlPointPosition_gpu(const size_t nVoxels,
                                         const bool optimiseX,
                                         const bool optimiseY,
                                         const bool optimiseZ) {
-    auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear,
-                                                             nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear,
-                                                          nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu
index 6eb684ff..fe3eb39b 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/_reg_resampling_gpu.cu
@@ -16,7 +16,7 @@
 /* *************************************************************** */
 void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            float *warpedImageCuda,
-                           const cudaArray *floatingImageCuda,
+                           const float *floatingImageCuda,
                            const float4 *deformationFieldCuda,
                            const int *maskCuda,
                            const size_t activeVoxelNumber,
@@ -26,16 +26,15 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
     auto blockSize = CudaContext::GetBlockSize();
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
 
     // Create the texture object for the floating image
-    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray);
+    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
     // Create the texture object for the mask
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                 cudaChannelFormatKindSigned, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
 
     // Bind the real to voxel matrix to the texture
     const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
@@ -60,7 +59,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
 }
 /* *************************************************************** */
 void reg_getImageGradient_gpu(const nifti_image *floatingImage,
-                              const cudaArray *floatingImageCuda,
+                              const float *floatingImageCuda,
                               const float4 *deformationFieldCuda,
                               float4 *warpedGradientCuda,
                               const size_t activeVoxelNumber,
@@ -70,14 +69,14 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
     auto blockSize = CudaContext::GetBlockSize();
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
     if (paddingValue != paddingValue) paddingValue = 0;
 
     // Create the texture object for the floating image
-    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray);
+    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
     // Create the texture object for the deformation field
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear,
-                                                             activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
 
     // Bind the real to voxel matrix to the texture
     const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h
index 6afd287a..5fc18144 100755
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/_reg_resampling_gpu.h
@@ -17,7 +17,7 @@
 /* *************************************************************** */
 void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            float *warpedImageCuda,
-                           const cudaArray *floatingImageCuda,
+                           const float *floatingImageCuda,
                            const float4 *deformationFieldCuda,
                            const int *maskCuda,
                            const size_t activeVoxelNumber,
@@ -25,7 +25,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage,
                            const float paddingValue);
 /* *************************************************************** */
 void reg_getImageGradient_gpu(const nifti_image *floatingImage,
-                              const cudaArray *floatingImageCuda,
+                              const float *floatingImageCuda,
                               const float4 *deformationFieldCuda,
                               float4 *warpedGradientCuda,
                               const size_t activeVoxelNumber,
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu
index 0782a984..c2711fdf 100755
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/_reg_resampling_kernels.cu
@@ -50,13 +50,15 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
     InterpLinearKernel(relative.y, yBasis);
 
     double intensity = 0;
-    for (char b = 0; b < 2; b++) {
+    int indexY = previous.y * floatingDim.x + previous.x;
+    for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
         const int y = previous.y + b;
+        int index = indexY;
         double xTempNewValue = 0;
-        for (char a = 0; a < 2; a++) {
+        for (char a = 0; a < 2; a++, index++) {
             const int x = previous.x + a;
             if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
-                xTempNewValue += tex3D<float>(floatingTexture, x, y, 0) * xBasis[a];
+                xTempNewValue += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
             } else {
                 // Padding value
                 xTempNewValue += paddingValue * xBasis[a];
@@ -78,13 +80,12 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
                                            const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
-    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-
     // Get the real world deformation in the floating space
-    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
+    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
+    const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
 
     // Get the voxel-based deformation in the floating space
-    float3 voxelDeformation;
+    double3 voxelDeformation;
     voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) +
                           double(floatingMatrix.m[0][1]) * double(realDeformation.y) +
                           double(floatingMatrix.m[0][2]) * double(realDeformation.z) +
@@ -109,14 +110,16 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
     double intensity = 0;
     for (char c = 0; c < 2; c++) {
         const int z = previous.z + c;
+        int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
         double yTempNewValue = 0;
-        for (char b = 0; b < 2; b++) {
+        for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
             const int y = previous.y + b;
+            int index = indexYZ + previous.x;
             double xTempNewValue = 0;
-            for (char a = 0; a < 2; a++) {
+            for (char a = 0; a < 2; a++, index++) {
                 const int x = previous.x + a;
                 if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
-                    xTempNewValue += tex3D<float>(floatingTexture, x, y, z) * xBasis[a];
+                    xTempNewValue += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
                 } else {
                     // Padding value
                     xTempNewValue += paddingValue * xBasis[a];
@@ -160,15 +163,17 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
     constexpr float deriv[] = { -1.0f, 1.0f };
 
     float4 gradientValue{};
-    for (char b = 0; b < 2; b++) {
-        float2 tempValueX{};
+    int indexY = previous.y * floatingDim.x + previous.x;
+    for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
         const int y = previous.y + b;
-        for (char a = 0; a < 2; a++) {
+        int index = indexY;
+        float2 tempValueX{};
+        for (char a = 0; a < 2; a++, index++) {
             const int x = previous.x + a;
             float intensity = paddingValue;
 
             if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y)
-                intensity = tex3D<float>(floatingTexture, x, y, 0);
+                intensity = tex1Dfetch<float>(floatingTexture, index);
 
             tempValueX.x += intensity * deriv[a];
             tempValueX.y += intensity * xBasis[a];
@@ -219,16 +224,18 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
     float4 gradientValue{};
     for (char c = 0; c < 2; c++) {
         const int z = previous.z + c;
+        int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
         float3 tempValueY{};
-        for (char b = 0; b < 2; b++) {
-            float2 tempValueX{};
+        for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
             const int y = previous.y + b;
-            for (char a = 0; a < 2; a++) {
+            int index = indexYZ + previous.x;
+            float2 tempValueX{};
+            for (char a = 0; a < 2; a++, index++) {
                 const int x = previous.x + a;
                 float intensity = paddingValue;
 
                 if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z)
-                    intensity = tex3D<float>(floatingTexture, x, y, z);
+                    intensity = tex1Dfetch<float>(floatingTexture, index);
 
                 tempValueX.x += intensity * deriv[a];
                 tempValueX.y += intensity * xBasis[a];
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index bf414396..7b7d94d4 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -22,8 +22,8 @@ reg_ssd_gpu::~reg_ssd_gpu() {
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
-                                    nifti_image *floImg, cudaArray *floImgCuda,
+void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda,
+                                    nifti_image *floImg, float *floImgCuda,
                                     int *refMask, int *refMaskCuda,
                                     size_t activeVoxNum,
                                     nifti_image *warpedImg, float *warpedImgCuda,
@@ -46,32 +46,29 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda,
     // Check if the reference and floating images need to be updated
     for (int i = 0; i < this->referenceTimePoints; ++i)
         if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) {
-            Cuda::TransferNiftiToDevice<float>(this->referenceImageCuda, this->referenceImage);
-            Cuda::TransferNiftiToDevice<float>(this->floatingImageCuda, this->floatingImage);
+            Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage);
+            Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage);
             break;
         }
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 double reg_getSsdValue_gpu(const nifti_image *referenceImage,
-                           const cudaArray *referenceImageCuda,
+                           const float *referenceImageCuda,
                            const float *warpedCuda,
                            const float *localWeightSimCuda,
                            const int *maskCuda,
-                           const size_t& activeVoxelNumber) {
+                           const size_t activeVoxelNumber) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
-    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                   cudaChannelFormatKindFloat, 1);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                 cudaChannelFormatKindSigned, 1);
-    Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    Cuda::UniqueTextureObjectPtr localWeightSimTexture;
     if (localWeightSimCuda)
-        localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear,
-                                                                    voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1));
+        localWeightSimTexture = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
 
     // Create an array on the device to store the absolute difference values
     thrust::device_vector<float> ssdSum(1), ssdCount(1);
@@ -111,7 +108,7 @@ double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
 }
 /* *************************************************************** */
 void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
-                                      const cudaArray *referenceImageCuda,
+                                      const float *referenceImageCuda,
                                       const float *warpedCuda,
                                       const float4 *spatialGradCuda,
                                       const float *localWeightSimCuda,
@@ -123,29 +120,22 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray);
-    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float),
-                                                   cudaChannelFormatKindFloat, 1);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int),
-                                                 cudaChannelFormatKindSigned, 1);
-    auto spatialGradTexture = Cuda::CreateTextureObject(spatialGradCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4),
-                                                        cudaChannelFormatKindFloat, 4);
-    Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr);
+    auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto spatialGradTexturePtr = Cuda::CreateTextureObject(spatialGradCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr;
     if (localWeightSimCuda)
-        localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear,
-                                                                    voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1));
+        localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
 
     // Find number of valid voxels and correct weight
-    const cudaTextureObject_t referenceTextureObject = *referenceTexture;
-    const cudaTextureObject_t warpedTextureObject = *warpedTexture;
-    const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int& index) {
-        const float warValue = tex1Dfetch<float>(warpedTextureObject, index);
-        if (warValue != warValue) return false;
-
-        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
-        const float refValue = tex3D<float>(referenceTextureObject, x, y, z);
+    const auto referenceTexture = *referenceTexturePtr;
+    const auto warpedTexture = *warpedTexturePtr;
+    const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) {
+        const float refValue = tex1Dfetch<float>(referenceTexture, index);
         if (refValue != refValue) return false;
-
+        const float warValue = tex1Dfetch<float>(warpedTexture, index);
+        if (warValue != warValue) return false;
         return true;
     });
     const float adjustedWeight = timepointWeight / static_cast<float>(validVoxelNumber);
@@ -154,8 +144,8 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    Cuda::GetSsdGradientKernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture,
-                                                        *spatialGradTexture, localWeightSimCuda ? *localWeightSimTexture : 0,
+    Cuda::GetSsdGradientKernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexturePtr, *warpedTexturePtr, *maskTexturePtr,
+                                                        *spatialGradTexturePtr, localWeightSimCuda ? *localWeightSimTexturePtr : 0,
                                                         referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 03f184a4..23bd6fd5 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -27,9 +27,9 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu {
 
     /// @brief Initialise the reg_ssd object
     virtual void InitialiseMeasure(nifti_image *refImg,
-                                   cudaArray *refImgCuda,
+                                   float *refImgCuda,
                                    nifti_image *floImg,
-                                   cudaArray *floImgCuda,
+                                   float *floImgCuda,
                                    int *refMask,
                                    int *refMaskCuda,
                                    size_t activeVoxNum,
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
index 3b0255e7..99a61530 100755
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ b/reg-lib/cuda/_reg_ssd_kernels.cu
@@ -31,13 +31,12 @@ __global__ void GetSsdValueKernel(float *ssdSum,
     if (tid < activeVoxelNumber) {
         const int index = tex1Dfetch<int>(maskTexture, tid);
 
+        const float refValue = tex1Dfetch<float>(referenceTexture, index);
+        if (refValue != refValue) return;
+
         const float warValue = tex1Dfetch<float>(warpedTexture, index);
         if (warValue != warValue) return;
 
-        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
-        const float refValue = tex3D<float>(referenceTexture, x, y, z);
-        if (refValue != refValue) return;
-
         const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
         const float diff = refValue - warValue;
         atomicAdd(ssdSum, diff * diff * val);
@@ -58,6 +57,9 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient,
     if (tid < activeVoxelNumber) {
         const int index = tex1Dfetch<int>(maskTexture, tid);
 
+        const float refValue = tex1Dfetch<float>(referenceTexture, index);
+        if (refValue != refValue) return;
+
         const float warValue = tex1Dfetch<float>(warpedTexture, index);
         if (warValue != warValue) return;
 
@@ -67,10 +69,6 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient,
             spaGradientValue.z != spaGradientValue.z)
             return;
 
-        const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim);
-        const float refValue = tex3D<float>(referenceTexture, x, y, z);
-        if (refValue != refValue) return;
-
         const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
         const float common = -2.f * (refValue - warValue) * adjustedWeight * val;
 
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu
index 2a4bb2bb..f1b9c401 100755
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/_reg_tools_gpu.cu
@@ -26,8 +26,7 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
     const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz);
     const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz);
-    auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear,
-                                                       voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
+    auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
 
     // The transformation between the image and the grid
     mat44 transformation;
@@ -133,10 +132,8 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                 float4 *smoothedImage;
                 NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
 
-                auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                              voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-                auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear,
-                                                               kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
+                auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+                auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1);
 
                 unsigned blocks, grids;
                 dim3 blockDims, gridDims;
@@ -208,10 +205,8 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
             NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
             NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
 
-            auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear,
-                                                          voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4);
-            auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear,
-                                                           kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1);
+            auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+            auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1);
 
             float4 *smoothedImage;
             NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index d638755d..035e29c3 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -345,12 +345,9 @@ void block_matching_method_gpu(const nifti_image *referenceImage,
     const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]);
     const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2];
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
-                                                      cudaChannelFormatKindFloat, 1);
-    auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float),
-                                                   cudaChannelFormatKindFloat, 1);
-    auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int),
-                                                       cudaChannelFormatKindSigned, 1);
+    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, referenceImage->nvox, cudaChannelFormatKindFloat, 1);
+    auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, referenceImage->nvox, cudaChannelFormatKindFloat, 1);
+    auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, numBlocks, cudaChannelFormatKindSigned, 1);
 
     unsigned definedBlock = 0, *definedBlockCuda;
     NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned)));

From 1e8b36e027e08d28ad498923a779c87e6aa61678 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Nov 2023 16:21:16 +0000
Subject: [PATCH 247/314] Refactorisations

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_benchmark.cpp                    |  2 +-
 reg-apps/reg_f3d.cpp                          | 48 +++++------
 reg-apps/reg_ppcnr.cpp                        | 14 ++--
 reg-apps/reg_resample.cpp                     | 10 +--
 reg-apps/reg_resample.h.in                    |  2 +-
 reg-lib/Compute.cpp                           |  4 +-
 reg-lib/Compute.h                             |  2 +-
 reg-lib/ResampleImageKernel.h                 |  2 +-
 reg-lib/_reg_base.cpp                         | 82 +++++++++----------
 reg-lib/_reg_f3d.cpp                          |  8 +-
 reg-lib/_reg_f3d2.cpp                         |  6 +-
 reg-lib/cl/ClResampleImageKernel.cpp          |  4 +-
 reg-lib/cl/ClResampleImageKernel.h            |  2 +-
 reg-lib/cpu/CpuResampleImageKernel.cpp        |  4 +-
 reg-lib/cpu/CpuResampleImageKernel.h          |  2 +-
 reg-lib/cpu/_reg_kld.cpp                      | 12 +--
 reg-lib/cpu/_reg_lncc.cpp                     | 10 +--
 reg-lib/cpu/_reg_mind.cpp                     |  4 +-
 reg-lib/cpu/_reg_nmi.cpp                      | 38 ++++-----
 reg-lib/cpu/_reg_nmi.h                        |  6 +-
 reg-lib/cpu/_reg_resampling.cpp               | 78 +++++++++---------
 reg-lib/cpu/_reg_resampling.h                 |  8 +-
 reg-lib/cpu/_reg_ssd.cpp                      |  8 +-
 reg-lib/cpu/_reg_ssd.h                        |  6 +-
 reg-lib/cpu/_reg_tools.cpp                    | 64 +++++++--------
 reg-lib/cpu/_reg_tools.h                      | 10 +--
 reg-lib/cuda/CMakeLists.txt                   | 22 ++---
 reg-lib/cuda/CudaCompute.cu                   |  6 +-
 reg-lib/cuda/CudaCompute.h                    |  2 +-
 reg-lib/cuda/CudaNormaliseGradient.hpp        |  2 +
 reg-lib/cuda/CudaResampleImageKernel.cpp      |  4 +-
 reg-lib/cuda/CudaResampleImageKernel.h        |  2 +-
 ...eg_resampling_gpu.cu => CudaResampling.cu} |  6 +-
 ...eg_resampling_gpu.h => CudaResampling.hpp} |  2 +-
 ...ng_kernels.cu => CudaResamplingKernels.cu} |  2 +-
 reg-lib/cuda/_reg_ssd_gpu.cu                  |  4 +-
 reg-lib/cuda/resampleKernel.cu                |  4 +-
 reg-lib/cuda/resampleKernel.h                 |  2 +-
 39 files changed, 249 insertions(+), 247 deletions(-)
 rename reg-lib/cuda/{_reg_resampling_gpu.cu => CudaResampling.cu} (98%)
 mode change 100755 => 100644
 rename reg-lib/cuda/{_reg_resampling_gpu.h => CudaResampling.hpp} (98%)
 mode change 100755 => 100644
 rename reg-lib/cuda/{_reg_resampling_kernels.cu => CudaResamplingKernels.cu} (99%)
 mode change 100755 => 100644

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 47531021..4203007d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-365
+366
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 6a8ebfbe..c579d61f 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -21,7 +21,7 @@
 
 #ifdef USE_CUDA
 #include "_reg_cudaCommon.h"
-#include "_reg_resampling_gpu.h"
+#include "CudaResampling.hpp"
 #include "_reg_affineTransformation_gpu.h"
 #include "_reg_bspline_gpu.h"
 #include "_reg_mutualinformation_gpu.h"
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index ad804dcd..92f944d2 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -60,14 +60,14 @@ void Usage(char *exec) {
     NR_INFO("\t-rmask <filename>\t\tFilename of a mask image in the reference space");
     NR_INFO("\t-smooR <float>\t\t\tSmooth the reference image using the specified sigma (mm) [0]");
     NR_INFO("\t-smooF <float>\t\t\tSmooth the floating image using the specified sigma (mm) [0]");
-    NR_INFO("\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-    NR_INFO("\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*");
-    NR_INFO("\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-    NR_INFO("\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*");
-    NR_INFO("\t-rLwTh <timepoint> <float>\tLower threshold to apply to the reference image intensities [none]*");
-    NR_INFO("\t-rUpTh <timepoint> <float>\tUpper threshold to apply to the reference image intensities [none]*");
-    NR_INFO("\t-fLwTh <timepoint> <float>\tLower threshold to apply to the floating image intensities [none]*");
-    NR_INFO("\t-fUpTh <timepoint> <float>\tUpper threshold to apply to the floating image intensities [none]*");
+    NR_INFO("\t--rLwTh <float>\t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every time point.*");
+    NR_INFO("\t--rUpTh <float>\t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every time point.*");
+    NR_INFO("\t--fLwTh <float>\t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every time point.*");
+    NR_INFO("\t--fUpTh <float>\t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every time point.*");
+    NR_INFO("\t-rLwTh <tp> <float>\tLower threshold to apply to the reference image intensities [none]*");
+    NR_INFO("\t-rUpTh <tp> <float>\tUpper threshold to apply to the reference image intensities [none]*");
+    NR_INFO("\t-fLwTh <tp> <float>\tLower threshold to apply to the floating image intensities [none]*");
+    NR_INFO("\t-fUpTh <tp> <float>\tUpper threshold to apply to the floating image intensities [none]*");
     NR_INFO("\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds");
     NR_INFO("");
     NR_INFO("*** Spline options (All defined at full resolution):");
@@ -89,29 +89,29 @@ void Usage(char *exec) {
     NR_INFO("*** Measure of similarity options:");
     NR_INFO("*** NMI with 64 bins is used except if specified otherwise");
     NR_INFO("\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified");
-    NR_INFO("\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint");
-    NR_INFO("\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint");
+    NR_INFO("\t--rbn <int>\t\tNMI. Number of bin to use for the reference image histogram. Identical value for every time point");
+    NR_INFO("\t--fbn <int>\t\tNMI. Number of bin to use for the floating image histogram. Identical value for every time point");
     NR_INFO("\t-rbn <tp> <int>\t\tNMI. Number of bin to use for the reference image histogram for the specified time point");
     NR_INFO("\t-fbn <tp> <int>\t\tNMI. Number of bin to use for the floating image histogram for the specified time point");
-    NR_INFO("\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint");
-    NR_INFO("\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint");
+    NR_INFO("\t--lncc <float>\t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every time point");
+    NR_INFO("\t-lncc <tp> <float>\tLNCC. Standard deviation of the Gaussian kernel for the specified time point");
     NR_INFO("\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure");
-    NR_INFO("\t-ssd <tp> \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t-ssd <tp> \t\tSSD. Used for the specified time point - images are normalized between 0 and 1 before computing the measure");
     NR_INFO("\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure");
-    NR_INFO("\t-ssdn <tp> \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure");
+    NR_INFO("\t-ssdn <tp> \t\tSSD. Used for the specified time point - images are NOT normalized between 0 and 1 before computing the measure");
     NR_INFO("\t--mind <offset>\t\tMIND and the offset to use to compute the descriptor");
     NR_INFO("\t--mindssc <offset>\tMIND-SCC and the offset to use to compute the descriptor");
     NR_INFO("\t--kld\t\t\tKLD. Used for all time points");
-    NR_INFO("\t-kld <tp>\t\tKLD. Used for the specified timepoint");
+    NR_INFO("\t-kld <tp>\t\tKLD. Used for the specified time point");
     NR_INFO("\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities");
     NR_INFO("\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile");
-    NR_INFO("*** Options for setting the weights for each timepoint for each similarity");
+    NR_INFO("*** Options for setting the weights for each time point for each similarity");
     NR_INFO("*** Note, the options above should be used first and will set a default weight of 1");
     NR_INFO("*** The options below should be used afterwards to set the desired weight if different to 1");
-    NR_INFO("\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint");
-    NR_INFO("\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint");
-    NR_INFO("\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint");
-    NR_INFO("\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint");
+    NR_INFO("\t-nmiw <tp> <float>\tNMI Weight. Weight to use for the NMI similarity measure for the specified time point");
+    NR_INFO("\t-lnccw <tp> <float>\tLNCC Weight. Weight to use for the LNCC similarity measure for the specified time point");
+    NR_INFO("\t-ssdw <tp> <float>\tSSD Weight. Weight to use for the SSD similarity measure for the specified time point");
+    NR_INFO("\t-kldw <tp> <float>\tKLD Weight. Weight to use for the KLD similarity measure for the specified time point");
     NR_INFO("\t-wSim <filename>\tWeight to apply to the measure of similarity at each voxel position");
 
     // NR_INFO("\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)");
@@ -472,17 +472,17 @@ int main(int argc, char **argv) {
         } else if (strcmp(argv[i], "--smoothGrad") == 0) {
             reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i]));
         } else if (strcmp(argv[i], "-ssd") == 0) {
-            int timepoint = atoi(argv[++i]);
+            int timePoint = atoi(argv[++i]);
             bool normalise = 1;
-            reg->UseSSD(timepoint, normalise);
+            reg->UseSSD(timePoint, normalise);
         } else if (strcmp(argv[i], "--ssd") == 0) {
             bool normalise = 1;
             for (int t = 0; t < floatingImage->nt; ++t)
                 reg->UseSSD(t, normalise);
         } else if (strcmp(argv[i], "-ssdn") == 0) {
-            int timepoint = atoi(argv[++i]);
+            int timePoint = atoi(argv[++i]);
             bool normalise = 0;
-            reg->UseSSD(timepoint, normalise);
+            reg->UseSSD(timePoint, normalise);
         } else if (strcmp(argv[i], "--ssdn") == 0) {
             bool normalise = 0;
             for (int t = 0; t < floatingImage->nt; ++t)
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 760a4d45..1724475a 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -1,7 +1,7 @@
 /**
  * @file reg_ppcnr.cpp
  * @author Andrew Melbourne
- * @brief Executable for 4D non-rigid and affine registration (Registration to a single timepoint, timeseries mean, local mean or Progressive Principal Component Registration)
+ * @brief Executable for 4D non-rigid and affine registration (Registration to a single time point, timeseries mean, local mean or Progressive Principal Component Registration)
  * @date 17/07/2013
  *
  *  Copyright (c) 2009-2018, University College London
@@ -110,7 +110,7 @@ void Usage(char *exec)
    NR_INFO("\n*** Alternative Registration Options:");
    NR_INFO("\t-mean \t\t\tIterative registration to the mean image only (no PPCR)."); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model.
    NR_INFO("\t-locality <int>\t\tIterative registration to the local mean image (pm <int> images - no PPCR).");
-   NR_INFO("\t-tp       <int>\t\tIterative registration to single timepoint (no PPCR).");
+   NR_INFO("\t-tp       <int>\t\tIterative registration to single time point (no PPCR).");
    NR_INFO("\t-noinit \t\tTurn off cpp initialisation from previous iteration.");
    //NR_INFO("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)");
    NR_INFO("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options).");
@@ -438,12 +438,12 @@ int main(int argc, char **argv)
    else if(flag->meanonly && flag->locality)
       NR_COUT << "Iterative registration to local mean only (pm" << param->locality << ") (Algorithm will ignore PCA results)----------------" << std::endl;
    else if(flag->tp)
-      NR_COUT << "Iterative registration to single timepoint only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl;
+      NR_COUT << "Iterative registration to single time point only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl;
    else
       NR_COUT << "PPCNR Parameters\n----------------" << std::endl;
    NR_COUT << "Source image name: " << param->sourceImageName << std::endl;
    if(flag->pmask) NR_COUT << "PCA Mask image name: " << param->pcaMaskName << std::endl;
-   NR_COUT << "Number of timepoints: " << image->nt << std::endl;
+   NR_COUT << "Number of time points: " << image->nt << std::endl;
    NR_COUT << "Number of principal components: " << param->prinComp << std::endl;
    NR_COUT << "Registration max iterations: " << param->maxIteration << std::endl;
 
@@ -718,7 +718,7 @@ int main(int argc, char **argv)
                z[i+image->nt*j]=1.0/sqrtf(image->nt*prinCompNumber); // is this right?! - if using NMI it's rather moot so I'm not too bothered at the moment...
       }
       if(flag->locality) NR_COUT << "Iterative registration to local mean only (pm " << param->locality << " images).\n";
-      if(flag->tp) NR_COUT << "Registration to single timepoint (" << param->tp << ").\n";
+      if(flag->tp) NR_COUT << "Registration to single time point (" << param->tp << ").\n";
 
       // 4. rebuild images
       nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original.
@@ -742,7 +742,7 @@ int main(int argc, char **argv)
             }
          }
       }
-      else if(flag->tp)  // single timepoint
+      else if(flag->tp)  // single time point
       {
          PrecisionType *intensityPtr1 = static_cast<PrecisionType *>(image->data);
          PrecisionType *intensityPtr2 = static_cast<PrecisionType *>(imagep->data);
@@ -969,7 +969,7 @@ int main(int argc, char **argv)
    if(flag->locality)
       NR_COUT << "Registration to " << param->locality << "-local mean with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n";
    if(flag->tp)
-      NR_COUT << "Single timepoint registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n";
+      NR_COUT << "Single time point registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n";
    if(flag->meanonly & !flag->locality)
       NR_COUT << "Registration to mean image with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n";
    if(!flag->locality & !flag->meanonly & !flag->tp)
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index c45a0cb8..e2fe543d 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -65,7 +65,7 @@ void Usage(char *exec)
    NR_INFO("\t-blank <filename>\n\t\tFilename of the resampled blank grid [none]");
    NR_INFO("\t-inter <int>\n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)");
    NR_INFO("\t-pad <int>\n\t\tInterpolation padding value [0]");
-   NR_INFO("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]");
+   NR_INFO("\t-tensor\n\t\tThe last six time points of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]");
    NR_INFO("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]");
    NR_INFO("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]");
    NR_INFO("\t-voff\n\t\tTurns verbose off [on]");
@@ -459,16 +459,16 @@ int main(int argc, char **argv)
          mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33));
          reg_defField_getJacobianMatrix(deformationFieldImage, jacobian);
          // resample the DTI image
-         bool timepoints[7];
-         for(int i=0; i<7; ++i) timepoints[i]=true;
-         if(floatingImage->dim[4]==7) timepoints[0]=false;
+         bool timePoints[7];
+         for(int i=0; i<7; ++i) timePoints[i]=true;
+         if(floatingImage->dim[4]==7) timePoints[0]=false;
          reg_resampleImage(floatingImage,
                            warpedImage,
                            deformationFieldImage,
                            nullptr,
                            param->interpolation,
                            std::numeric_limits<float>::quiet_NaN(),
-                           timepoints,
+                           timePoints,
                            jacobian
                            );
       }
diff --git a/reg-apps/reg_resample.h.in b/reg-apps/reg_resample.h.in
index 7f2f741d..0579282b 100644
--- a/reg-apps/reg_resample.h.in
+++ b/reg-apps/reg_resample.h.in
@@ -127,7 +127,7 @@ char xml_resample[] =
    "    <boolean>\n"
    "      <name>tensorImage</name>\n"
    "      <longflag>tensor</longflag>\n"
-   "      <description>The last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ</description>\n"
+   "      <description>The last six time points of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ</description>\n"
    "      <label>Tensor Image</label>\n"
    "      <default>false</default>\n"
    "    </boolean>\n"
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index a0be36d6..0a7232b6 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -126,7 +126,7 @@ void Compute::UpdateControlPointPosition(float *currentDof,
     }
 }
 /* *************************************************************** */
-void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
+void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
     DefContent& con = dynamic_cast<DefContent&>(this->con);
     reg_getImageGradient(con.GetFloating(),
                          con.GetWarpedGradient(),
@@ -134,7 +134,7 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
                          con.GetReferenceMask(),
                          interpolation,
                          paddingValue,
-                         activeTimepoint);
+                         activeTimePoint);
 }
 /* *************************************************************** */
 double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index ecf11f0f..f3ccd5eb 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -20,7 +20,7 @@ class Compute {
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight);
     virtual void GetDeformationField(bool composition, bool bspline);
     virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ);
-    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint);
+    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimePoint);
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void SmoothGradient(float sigma);
diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h
index d4c32991..15b91ef1 100644
--- a/reg-lib/ResampleImageKernel.h
+++ b/reg-lib/ResampleImageKernel.h
@@ -10,5 +10,5 @@ class ResampleImageKernel: public Kernel {
     }
     ResampleImageKernel() : Kernel() {}
     virtual ~ResampleImageKernel() {}
-    virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0;
+    virtual void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr) = 0;
 };
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 566bc2f9..564276f6 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -265,7 +265,7 @@ void reg_base<T>::CheckParameters() {
             measure_nmi->SetTimePointWeight(i, 1.0);
     }
 
-    // Check that images have same number of channels (timepoints)
+    // Check that images have same number of channels (time points)
     // that each channel has at least one similarity measure assigned
     // and that each similarity measure is used for at least one channel
     // Normalise channel and similarity weights so total = 1
@@ -275,7 +275,7 @@ void reg_base<T>::CheckParameters() {
     // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting
     if (!measure_mind && !measure_mindssc) {
         if (inputFloating->nt != inputReference->nt)
-            NR_FATAL_ERROR("The reference and floating images have different numbers of channels (timepoints)");
+            NR_FATAL_ERROR("The reference and floating images have different numbers of channels (time points)");
         unique_ptr<double[]> chanWeightSum(new double[inputReference->nt]());
         double simWeightSum, totWeightSum = 0.;
         double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr;
@@ -284,7 +284,7 @@ void reg_base<T>::CheckParameters() {
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (nmiWeights[n] < 0)
-                    NR_FATAL_ERROR("The NMI weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
+                    NR_FATAL_ERROR("The NMI weight for time point " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += nmiWeights[n];
                 simWeightSum += nmiWeights[n];
                 totWeightSum += nmiWeights[n];
@@ -297,7 +297,7 @@ void reg_base<T>::CheckParameters() {
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (ssdWeights[n] < 0)
-                    NR_FATAL_ERROR("The SSD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
+                    NR_FATAL_ERROR("The SSD weight for time point " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += ssdWeights[n];
                 simWeightSum += ssdWeights[n];
                 totWeightSum += ssdWeights[n];
@@ -310,7 +310,7 @@ void reg_base<T>::CheckParameters() {
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (kldWeights[n] < 0)
-                    NR_FATAL_ERROR("The KLD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
+                    NR_FATAL_ERROR("The KLD weight for time point " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += kldWeights[n];
                 simWeightSum += kldWeights[n];
                 totWeightSum += kldWeights[n];
@@ -323,7 +323,7 @@ void reg_base<T>::CheckParameters() {
             simWeightSum = 0;
             for (int n = 0; n < inputReference->nt; n++) {
                 if (lnccWeights[n] < 0)
-                    NR_FATAL_ERROR("The LNCC weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive");
+                    NR_FATAL_ERROR("The LNCC weight for time point " + std::to_string(n) + " has a negative value - weights must be positive");
                 chanWeightSum[n] += lnccWeights[n];
                 simWeightSum += lnccWeights[n];
                 totWeightSum += lnccWeights[n];
@@ -503,7 +503,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
     //                             currentMask,
     //                             interpolation,
     //                             warpedPaddingValue,
-    //                             measure_dti->GetActiveTimepoints(),
+    //                             measure_dti->GetActiveTimePoints(),
     //		 					   forwardJacobianMatrix,
     //							   warped);
     //    }
@@ -557,68 +557,68 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //}
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) {
+void reg_base<T>::UseNMISetReferenceBinNumber(int timePoint, int refBinNumber) {
     if (!measure_nmi)
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
-    measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
-    measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint);
+    measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timePoint);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) {
+void reg_base<T>::UseNMISetFloatingBinNumber(int timePoint, int floBinNumber) {
     if (!measure_nmi)
         measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
-    measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
-    measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint);
+    measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timePoint);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseSSD(int timepoint, bool normalise) {
+void reg_base<T>::UseSSD(int timePoint, bool normalise) {
     if (!measure_ssd)
         measure_ssd.reset(dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd)));
-    measure_ssd->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
-    measure_ssd->SetNormaliseTimepoint(timepoint, normalise);
+    measure_ssd->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
+    measure_ssd->SetNormaliseTimePoint(timePoint, normalise);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseMIND(int timepoint, int offset) {
+void reg_base<T>::UseMIND(int timePoint, int offset) {
     if (!measure_mind)
         measure_mind.reset(dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind)));
-    measure_mind->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mind->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active
     measure_mind->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseMINDSSC(int timepoint, int offset) {
+void reg_base<T>::UseMINDSSC(int timePoint, int offset) {
     if (!measure_mindssc)
         measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::MindSsc)));
-    measure_mindssc->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active
+    measure_mindssc->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active
     measure_mindssc->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseKLDivergence(int timepoint) {
+void reg_base<T>::UseKLDivergence(int timePoint) {
     if (!measure_kld)
         measure_kld.reset(dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld)));
-    measure_kld->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0
+    measure_kld->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseLNCC(int timepoint, float stddev) {
+void reg_base<T>::UseLNCC(int timePoint, float stddev) {
     if (!measure_lncc)
         measure_lncc.reset(dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)));
-    measure_lncc->SetKernelStandardDeviation(timepoint, stddev);
-    measure_lncc->SetTimePointWeight(timepoint, 1.0); // weight initially set to default value of 1.0
+    measure_lncc->SetKernelStandardDeviation(timePoint, stddev);
+    measure_lncc->SetTimePointWeight(timePoint, 1.0); // weight initially set to default value of 1.0
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -631,44 +631,44 @@ void reg_base<T>::SetLNCCKernelType(ConvKernelType type) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::UseDTI(bool *timepoint) {
+void reg_base<T>::UseDTI(bool *timePoint) {
     NR_FATAL_ERROR("The use of DTI has been deactivated as it requires some refactoring");
 
     if (!measure_dti)
         measure_dti.reset(dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti)));
     for (int i = 0; i < inputReference->nt; ++i) {
-        if (timepoint[i])
-            measure_dti->SetTimePointWeight(i, 1.0);  // weight set to 1.0 to indicate timepoint is active
+        if (timePoint[i])
+            measure_dti->SetTimePointWeight(i, 1.0);  // weight set to 1.0 to indicate time point is active
     }
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetNMIWeight(int timepoint, double weight) {
+void reg_base<T>::SetNMIWeight(int timePoint, double weight) {
     if (!measure_nmi)
-        NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set");
-    measure_nmi->SetTimePointWeight(timepoint, weight);
+        NR_FATAL_ERROR("The NMI object has to be created before the time point weights can be set");
+    measure_nmi->SetTimePointWeight(timePoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetLNCCWeight(int timepoint, double weight) {
+void reg_base<T>::SetLNCCWeight(int timePoint, double weight) {
     if (!measure_lncc)
-        NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set");
-    measure_lncc->SetTimePointWeight(timepoint, weight);
+        NR_FATAL_ERROR("The LNCC object has to be created before the time point weights can be set");
+    measure_lncc->SetTimePointWeight(timePoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetSSDWeight(int timepoint, double weight) {
+void reg_base<T>::SetSSDWeight(int timePoint, double weight) {
     if (!measure_ssd)
-        NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set");
-    measure_ssd->SetTimePointWeight(timepoint, weight);
+        NR_FATAL_ERROR("The SSD object has to be created before the time point weights can be set");
+    measure_ssd->SetTimePointWeight(timePoint, weight);
 }
 /* *************************************************************** */
 template<class T>
-void reg_base<T>::SetKLDWeight(int timepoint, double weight) {
+void reg_base<T>::SetKLDWeight(int timePoint, double weight) {
     if (!measure_kld)
-        NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set");
-    measure_kld->SetTimePointWeight(timepoint, weight);
+        NR_FATAL_ERROR("The KLD object has to be created before the time point weights can be set");
+    measure_kld->SetTimePointWeight(timePoint, weight);
 }
 /* *************************************************************** */
 template<class T>
@@ -694,7 +694,7 @@ void reg_base<T>::WarpFloatingImage(int inter) {
                           currentMask,
                           inter,
                           warpedPaddingValue,
-                          measure_dti->GetActiveTimepoints(),
+                          measure_dti->GetActiveTimePoints(),
                           forwardJacobianMatrix);*/
     }
     NR_FUNC_CALLED();
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 0fece668..afef536b 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -204,11 +204,11 @@ void reg_f3d<T>::Initialise() {
     NR_VERBOSE("\t* image spacing: " << this->inputReference->dx << " x " << this->inputReference->dy << " x " <<
                this->inputReference->dz << " mm");
     for (int i = 0; i < this->inputReference->nt; i++) {
-        NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" <<
+        NR_VERBOSE("\t* intensity threshold for time point " << i << "/" << this->inputReference->nt - 1 << ": [" <<
                    this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]");
         if (this->measure_nmi) {
             if (this->measure_nmi->GetTimePointWeights()[i] > 0) {
-                NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " <<
+                NR_VERBOSE("\t* binning size for time point " << i << "/" << this->inputReference->nt - 1 << ": " <<
                            this->measure_nmi->GetReferenceBinNumber()[i] - 4);
             }
         }
@@ -222,11 +222,11 @@ void reg_f3d<T>::Initialise() {
     NR_VERBOSE("\t* image spacing: " << this->inputFloating->dx << " x " << this->inputFloating->dy << " x " <<
                this->inputFloating->dz << " mm");
     for (int i = 0; i < this->inputFloating->nt; i++) {
-        NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" <<
+        NR_VERBOSE("\t* intensity threshold for time point " << i << "/" << this->inputFloating->nt - 1 << ": [" <<
                    this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]");
         if (this->measure_nmi) {
             if (this->measure_nmi->GetTimePointWeights()[i] > 0) {
-                NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " <<
+                NR_VERBOSE("\t* binning size for time point " << i << "/" << this->inputFloating->nt - 1 << ": " <<
                            this->measure_nmi->GetFloatingBinNumber()[i] - 4);
             }
         }
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index 4337dd7f..c994a471 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -174,7 +174,7 @@ void reg_f3d2<T>::WarpFloatingImage(int inter) {
                           floatingMask, // mask
                           inter, // interpolation type
                           this->warpedPaddingValue, // padding value
-                          this->measure_dti->GetActiveTimepoints(),
+                          this->measure_dti->GetActiveTimePoints(),
                           backwardJacobianMatrix);*/
     }
     NR_FUNC_CALLED();
@@ -255,7 +255,7 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
     //                             this->currentMask,
     //                             this->interpolation,
     //                             this->warpedPaddingValue,
-    //                             this->measure_dti->GetActiveTimepoints(),
+    //                             this->measure_dti->GetActiveTimePoints(),
     //                             this->forwardJacobianMatrix,
     //                             this->warped);
 
@@ -265,7 +265,7 @@ void reg_f3d2<T>::GetVoxelBasedGradient() {
     //                             floatingMask,
     //                             this->interpolation,
     //                             this->warpedPaddingValue,
-    //                             this->measure_dti->GetActiveTimepoints(),
+    //                             this->measure_dti->GetActiveTimePoints(),
     //                             backwardJacobianMatrix,
     //                             backwardWarped);
     //   if(this->measure_dti)
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 4867af20..59e76be1 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -55,11 +55,11 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern
 /* *************************************************************** */
 void ClResampleImageKernel::Calculate(int interp,
                                       float paddingValue,
-                                      bool *dti_timepoint,
+                                      bool *dtiTimePoint,
                                       mat33 *jacMat) {
     cl_int errNum;
     // Define the DTI indices if required
-    if (dti_timepoint != nullptr || jacMat != nullptr)
+    if (dtiTimePoint != nullptr || jacMat != nullptr)
         NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the OpenCL platform");
 
     if (this->floatingImage->nz > 1) {
diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h
index c6db7d23..06f7afde 100644
--- a/reg-lib/cl/ClResampleImageKernel.h
+++ b/reg-lib/cl/ClResampleImageKernel.h
@@ -7,7 +7,7 @@ class ClResampleImageKernel: public ResampleImageKernel {
 public:
     ClResampleImageKernel(Content *conIn);
     ~ClResampleImageKernel();
-    void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
+    void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr);
 
 private:
     nifti_image *floatingImage;
diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp
index a5791b13..1544e9d5 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.cpp
+++ b/reg-lib/cpu/CpuResampleImageKernel.cpp
@@ -12,7 +12,7 @@ CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKe
 /* *************************************************************** */
 void CpuResampleImageKernel::Calculate(int interp,
                                        float paddingValue,
-                                       bool *dti_timepoint,
+                                       bool *dtiTimePoint,
                                        mat33 * jacMat) {
     reg_resampleImage(floatingImage,
                       warpedImage,
@@ -20,7 +20,7 @@ void CpuResampleImageKernel::Calculate(int interp,
                       mask,
                       interp,
                       paddingValue,
-                      dti_timepoint,
+                      dtiTimePoint,
                       jacMat);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h
index 81982fba..cea843e8 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.h
+++ b/reg-lib/cpu/CpuResampleImageKernel.h
@@ -6,7 +6,7 @@
 class CpuResampleImageKernel: public ResampleImageKernel {
 public:
     CpuResampleImageKernel(Content *con);
-    void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr);
+    void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr);
 
 private:
     nifti_image *floatingImage;
diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp
index eefab0bc..d7a99965 100755
--- a/reg-lib/cpu/_reg_kld.cpp
+++ b/reg-lib/cpu/_reg_kld.cpp
@@ -58,7 +58,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg,
     }
 
     for (int i = 0; i < this->referenceTimePoints; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
+        NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -164,7 +164,7 @@ double reg_kld::GetSimilarityMeasureValueBw() {
  * @param mask Array that contains a mask to specify which voxel
  * should be considered
  * @param currentTimePoint Specified which time point volumes have to be considered
- * @param timepointWeight Weight of the current time point
+ * @param timePointWeight Weight of the current time point
  */
 template <class DataType>
 void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
@@ -174,7 +174,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                                            const nifti_image *jacobianDetImg,
                                            const int *mask,
                                            const int currentTimePoint,
-                                           const double timepointWeight) {
+                                           const double timePointWeight) {
 #ifdef _WIN32
     long voxel;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -206,7 +206,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage,
                 activeVoxelNumber++;
         }
     }
-    const double adjustedWeight = timepointWeight / activeVoxelNumber;
+    const double adjustedWeight = timePointWeight / activeVoxelNumber;
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -263,7 +263,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                             nifti_image *jacobianDetImg,
                                             int *mask,
                                             int currentTimePoint,
-                                            double timepointWeight) {
+                                            double timePointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         reg_getKLDivergenceVoxelBasedGradient<RefImgDataType>(referenceImage,
@@ -273,7 +273,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
                                                               jacobianDetImg,
                                                               mask,
                                                               currentTimePoint,
-                                                              timepointWeight);
+                                                              timePointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp
index 6ce58b3f..cecc2c8d 100644
--- a/reg-lib/cpu/_reg_lncc.cpp
+++ b/reg-lib/cpu/_reg_lncc.cpp
@@ -187,7 +187,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg,
     }
 
     for (int i = 0; i < this->referenceTimePoints; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
+        NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -403,7 +403,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
                                    nifti_image *measureGradient,
                                    const ConvKernelType kernelType,
                                    const int currentTimePoint,
-                                   const double timepointWeight) {
+                                   const double timePointWeight) {
 #ifdef _WIN32
     long voxel;
     long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -468,7 +468,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage,
     }
 
     //adjust weight for number of voxels
-    const double adjustedWeight = timepointWeight / activeVoxelNumber;
+    const double adjustedWeight = timePointWeight / activeVoxelNumber;
 
     // Smooth the newly computed values
     reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask);
@@ -531,7 +531,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                             nifti_image *measureGradient,
                                             const ConvKernelType kernelType,
                                             const int currentTimePoint,
-                                            const double timepointWeight) {
+                                            const double timePointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         // Compute the mean and variance of the reference and warped floating
@@ -560,7 +560,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage,
                                                       measureGradient,
                                                       kernelType,
                                                       currentTimePoint,
-                                                      timepointWeight);
+                                                      timePointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index ff5ae86d..ea4f1739 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -69,7 +69,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage,
     // Create a pointer to the descriptor image
     DataType* mindImgDataPtr = static_cast<DataType*>(mindImage->data);
 
-    // Allocate an image to store the current timepoint reference image
+    // Allocate an image to store the current time point reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
     currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
     currentInputImage->nt = currentInputImage->dim[4] = 1;
@@ -174,7 +174,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage,
     // Create a pointer to the descriptor image
     DataType* mindSscImgDataPtr = static_cast<DataType*>(mindSscImage->data);
 
-    // Allocate an image to store the current timepoint reference image
+    // Allocate an image to store the current time point reference image
     nifti_image *currentInputImage = nifti_copy_nim_info(inputImage);
     currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2;
     currentInputImage->nt = currentInputImage->dim[4] = 1;
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 9918c5e7..97b1138b 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -34,10 +34,10 @@ reg_nmi::~reg_nmi() {
 }
 /* *************************************************************** */
 void reg_nmi::DeallocateHistogram() {
-    int timepoint = this->referenceTimePoints;
+    int timePoint = this->referenceTimePoints;
     // Free the joint histograms and the entropy arrays
     if (this->jointHistogramPro != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->jointHistogramPro[i] != nullptr)
                 free(this->jointHistogramPro[i]);
             this->jointHistogramPro[i] = nullptr;
@@ -46,7 +46,7 @@ void reg_nmi::DeallocateHistogram() {
     }
     this->jointHistogramPro = nullptr;
     if (this->jointHistogramProBw != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->jointHistogramProBw[i] != nullptr)
                 free(this->jointHistogramProBw[i]);
             this->jointHistogramProBw[i] = nullptr;
@@ -56,7 +56,7 @@ void reg_nmi::DeallocateHistogram() {
     this->jointHistogramProBw = nullptr;
 
     if (this->jointHistogramLog != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->jointHistogramLog[i] != nullptr)
                 free(this->jointHistogramLog[i]);
             this->jointHistogramLog[i] = nullptr;
@@ -65,7 +65,7 @@ void reg_nmi::DeallocateHistogram() {
     }
     this->jointHistogramLog = nullptr;
     if (this->jointHistogramLogBw != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->jointHistogramLogBw[i] != nullptr)
                 free(this->jointHistogramLogBw[i]);
             this->jointHistogramLogBw[i] = nullptr;
@@ -75,7 +75,7 @@ void reg_nmi::DeallocateHistogram() {
     this->jointHistogramLogBw = nullptr;
 
     if (this->entropyValues != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->entropyValues[i] != nullptr)
                 free(this->entropyValues[i]);
             this->entropyValues[i] = nullptr;
@@ -84,7 +84,7 @@ void reg_nmi::DeallocateHistogram() {
     }
     this->entropyValues = nullptr;
     if (this->entropyValuesBw != nullptr) {
-        for (int i = 0; i < timepoint; ++i) {
+        for (int i = 0; i < timePoint; ++i) {
             if (this->entropyValuesBw[i] != nullptr)
                 free(this->entropyValuesBw[i]);
             this->entropyValuesBw[i] = nullptr;
@@ -160,7 +160,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg,
     }
 
     for (int i = 0; i < this->referenceTimePoints; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
+        NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]);
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
@@ -406,7 +406,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                                            nifti_image *measureGradientImage,
                                            const int *referenceMask,
                                            const int currentTimePoint,
-                                           const double timepointWeight) {
+                                           const double timePointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2);
@@ -440,7 +440,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \
-    warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timepointWeight)
+    warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timePointWeight)
 #endif // _OPENMP
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -472,9 +472,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += static_cast<DataType>(timepointWeight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += static_cast<DataType>(timePointWeight * (refDeriv[0] + warDeriv[0] -
                                                                                nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += static_cast<DataType>(timepointWeight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += static_cast<DataType>(timePointWeight * (refDeriv[1] + warDeriv[1] -
                                                                                nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
@@ -492,7 +492,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                                            nifti_image *measureGradientImage,
                                            const int *referenceMask,
                                            const int currentTimePoint,
-                                           const double timepointWeight) {
+                                           const double timePointWeight) {
 #ifdef WIN32
     long i;
     const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -527,7 +527,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
 #pragma omp parallel for default(none) \
     shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \
     logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \
-    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timepointWeight)
+    warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timePointWeight)
 #endif // _OPENMP
     for (i = 0; i < voxelNumber; ++i) {
         // Check if the voxel belongs to the image mask
@@ -564,11 +564,11 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage,
                         }
                     }
                 }
-                measureGradPtrX[i] += static_cast<DataType>(timepointWeight * (refDeriv[0] + warDeriv[0] -
+                measureGradPtrX[i] += static_cast<DataType>(timePointWeight * (refDeriv[0] + warDeriv[0] -
                                                                                nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrY[i] += static_cast<DataType>(timepointWeight * (refDeriv[1] + warDeriv[1] -
+                measureGradPtrY[i] += static_cast<DataType>(timePointWeight * (refDeriv[1] + warDeriv[1] -
                                                                                nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3]));
-                measureGradPtrZ[i] += static_cast<DataType>(timepointWeight * (refDeriv[2] + warDeriv[2] -
+                measureGradPtrZ[i] += static_cast<DataType>(timePointWeight * (refDeriv[2] + warDeriv[2] -
                                                                                nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3]));
             }// Check that the values are defined
         } // mask
@@ -585,7 +585,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI
                                                    nifti_image *voxelBasedGradient,
                                                    const int *referenceMask,
                                                    const int currentTimePoint,
-                                                   const double timepointWeight) {
+                                                   const double timePointWeight) {
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d<RefImgDataType> : reg_getVoxelBasedNmiGradient2d<RefImgDataType>;
@@ -599,7 +599,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI
                                  voxelBasedGradient,
                                  referenceMask,
                                  currentTimePoint,
-                                 timepointWeight);
+                                 timePointWeight);
     }, NiftiImage::getFloatingDataType(referenceImage));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 16fbda9f..7daea41a 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -49,9 +49,9 @@ class reg_nmi: public reg_measure {
 
     virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber,
                                           unsigned short floBinNumber,
-                                          int timepoint) {
-        this->referenceBinNumber[timepoint] = refBinNumber;
-        this->floatingBinNumber[timepoint] = floBinNumber;
+                                          int timePoint) {
+        this->referenceBinNumber[timePoint] = refBinNumber;
+        this->floatingBinNumber[timePoint] = floBinNumber;
     }
     virtual void SetReferenceBinNumber(int b, int t) {
         this->referenceBinNumber[t] = b;
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 4b316d95..61d9743b 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -368,7 +368,7 @@ void ResampleImage3D(const nifti_image *floatingImage,
     }
 
     // Iteration over the different volume along the 4th axis
-    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
+    for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
         NR_DEBUG("3D resampling of volume number " << t);
 
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
@@ -547,7 +547,7 @@ void ResampleImage2D(const nifti_image *floatingImage,
     }
 
     // Iteration over the different volume along the 4th axis
-    for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) {
+    for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
         NR_DEBUG("2D resampling of volume number " << t);
 
         FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber];
@@ -694,7 +694,7 @@ void reg_resampleImage(nifti_image *floatingImage,
                        const int *mask,
                        const int interpolation,
                        const float paddingValue,
-                       const bool *dtiTimepoint,
+                       const bool *dtiTimePoint,
                        const mat33 *jacMat) {
     if (floatingImage->datatype != warpedImage->datatype)
         NR_FATAL_ERROR("The floating and warped image should have the same data type");
@@ -706,12 +706,12 @@ void reg_resampleImage(nifti_image *floatingImage,
     // Define the DTI indices if required
     int dtIndicies[6];
     for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
-    if (dtiTimepoint != nullptr) {
+    if (dtiTimePoint != nullptr) {
         if (jacMat == nullptr)
             NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided");
         int j = 0;
         for (int i = 0; i < floatingImage->nt; ++i) {
-            if (dtiTimepoint[i])
+            if (dtiTimePoint[i])
                 dtIndicies[j++] = i;
         }
         if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3))
@@ -1924,9 +1924,9 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
                             nifti_image *warpedGradient,
                             const int *mask,
                             const float paddingValue,
-                            const int activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
-        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
+                            const int activeTimePoint) {
+    if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active time point is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
@@ -1937,7 +1937,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
     const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
-    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber];
 
     const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
     const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
@@ -1952,7 +1952,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-    NR_DEBUG("3D linear gradient computation of volume number " << activeTimepoint);
+    NR_DEBUG("3D linear gradient computation of volume number " << activeTimePoint);
 
     int previous[3], a, b, c, X, Y, Z;
     FieldType position[3], xBasis[2], yBasis[2], zBasis[2];
@@ -2093,9 +2093,9 @@ void BilinearImageGradient(const nifti_image *floatingImage,
                            nifti_image *warpedGradient,
                            const int *mask,
                            const float paddingValue,
-                           const int activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
-        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
+                           const int activeTimePoint) {
+    if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active time point is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
@@ -2106,7 +2106,7 @@ void BilinearImageGradient(const nifti_image *floatingImage,
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2);
 #endif
     const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
-    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber];
 
     const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
     const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
@@ -2119,7 +2119,7 @@ void BilinearImageGradient(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-    NR_DEBUG("2D linear gradient computation of volume number " << activeTimepoint);
+    NR_DEBUG("2D linear gradient computation of volume number " << activeTimePoint);
 
     FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2];
     FieldType deriv[2];
@@ -2203,9 +2203,9 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
                                 nifti_image *warpedGradient,
                                 const int *mask,
                                 const float paddingValue,
-                                const int activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
-        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
+                                const int activeTimePoint) {
+    if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active time point is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3);
@@ -2216,7 +2216,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
 #endif
     const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
-    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber];
 
     const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
     const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
@@ -2231,7 +2231,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-    NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimepoint);
+    NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimePoint);
 
     int previous[3], c, Z, b, Y, a;
 
@@ -2344,9 +2344,9 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
                                 nifti_image *warpedGradient,
                                 const int *mask,
                                 const float paddingValue,
-                                const int activeTimepoint) {
-    if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt)
-        NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image");
+                                const int activeTimePoint) {
+    if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt)
+        NR_FATAL_ERROR("The specified active time point is not defined in the floating image");
 #ifdef _WIN32
     long index;
     const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2);
@@ -2357,7 +2357,7 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
     const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2);
 #endif
     const FloatingType *floatingIntensityPtr = static_cast<FloatingType*>(floatingImage->data);
-    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber];
+    const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber];
 
     const FieldType *deformationFieldPtrX = static_cast<FieldType*>(deformationField->data);
     const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber];
@@ -2370,7 +2370,7 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
         floatingIJKMatrix = &floatingImage->sto_ijk;
     else floatingIJKMatrix = &floatingImage->qto_ijk;
 
-    NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimepoint);
+    NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimePoint);
 
     int previous[2], b, Y, a;
     double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative;
@@ -2455,7 +2455,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           const int *mask,
                           const int interpolation,
                           const float paddingValue,
-                          const int activeTimepoint,
+                          const int activeTimePoint,
                           const int *dtIndicies,
                           const mat33 *jacMat,
                           const nifti_image *warpedImage = nullptr) {
@@ -2471,14 +2471,14 @@ void reg_getImageGradient(nifti_image *floatingImage,
                                                                               warpedGradient,
                                                                               mask,
                                                                               paddingValue,
-                                                                              activeTimepoint);
+                                                                              activeTimePoint);
         } else {
             CubicSplineImageGradient2D<FloatingType, GradientType, FieldType>(floatingImage,
                                                                               deformationField,
                                                                               warpedGradient,
                                                                               mask,
                                                                               paddingValue,
-                                                                              activeTimepoint);
+                                                                              activeTimePoint);
         }
     } else { // trilinear interpolation [ by default ]
         if (deformationField->nu > 2) {
@@ -2487,14 +2487,14 @@ void reg_getImageGradient(nifti_image *floatingImage,
                                                                           warpedGradient,
                                                                           mask,
                                                                           paddingValue,
-                                                                          activeTimepoint);
+                                                                          activeTimePoint);
         } else {
             BilinearImageGradient<FloatingType, GradientType, FieldType>(floatingImage,
                                                                          deformationField,
                                                                          warpedGradient,
                                                                          mask,
                                                                          paddingValue,
-                                                                         activeTimepoint);
+                                                                         activeTimePoint);
         }
     }
     // The temporary logged floating array is deleted
@@ -2513,8 +2513,8 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           const int *mask,
                           const int interpolation,
                           const float paddingValue,
-                          const int activeTimepoint,
-                          const bool *dtiTimepoint,
+                          const int activeTimePoint,
+                          const bool *dtiTimePoint,
                           const mat33 *jacMat,
                           const nifti_image *warpedImage) {
     if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64)
@@ -2533,12 +2533,12 @@ void reg_getImageGradient(nifti_image *floatingImage,
     // Define the DTI indices if required
     int dtIndicies[6];
     for (int i = 0; i < 6; ++i) dtIndicies[i] = -1;
-    if (dtiTimepoint != nullptr) {
+    if (dtiTimePoint != nullptr) {
         if (jacMat == nullptr)
             NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided");
         int j = 0;
         for (int i = 0; i < floatingImage->nt; ++i) {
-            if (dtiTimepoint[i])
+            if (dtiTimePoint[i])
                 dtIndicies[j++] = i;
         }
         if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3))
@@ -2555,7 +2555,7 @@ void reg_getImageGradient(nifti_image *floatingImage,
                                                                                    mask,
                                                                                    interpolation,
                                                                                    paddingValue,
-                                                                                   activeTimepoint,
+                                                                                   activeTimePoint,
                                                                                    dtIndicies,
                                                                                    jacMat,
                                                                                    warpedImage);
@@ -2570,14 +2570,14 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
                                   const float paddingValue,
-                                  const int timepoint) {
+                                  const int timePoint) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
 
     int dimImg = img->nz > 1 ? 3 : 2;
     int x, y, z;
 
     const DataType *imgPtr = static_cast<DataType*>(img->data);
-    const DataType *currentImgPtr = &imgPtr[timepoint * voxelNumber];
+    const DataType *currentImgPtr = &imgPtr[timePoint * voxelNumber];
 
     DataType *gradPtrX = static_cast<DataType*>(gradImg->data);
     DataType *gradPtrY = &gradPtrX[voxelNumber];
@@ -2631,7 +2631,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
                                   const float paddingValue,
-                                  const int timepoint) {
+                                  const int timePoint) {
     if (img->datatype != gradImg->datatype)
         NR_FATAL_ERROR("Input images are expected to be of the same type");
     if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64)
@@ -2639,7 +2639,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
 
     std::visit([&](auto&& imgDataType) {
         using ImgDataType = std::decay_t<decltype(imgDataType)>;
-        reg_getImageGradient_symDiff<ImgDataType>(img, gradImg, mask, paddingValue, timepoint);
+        reg_getImageGradient_symDiff<ImgDataType>(img, gradImg, mask, paddingValue, timePoint);
     }, NiftiImage::getFloatingDataType(img));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h
index 04b59979..3fdab5b3 100755
--- a/reg-lib/cpu/_reg_resampling.h
+++ b/reg-lib/cpu/_reg_resampling.h
@@ -39,7 +39,7 @@ void reg_resampleImage(nifti_image *floatingImage,
                        const int *mask,
                        const int interpolation,
                        const float paddingValue,
-                       const bool *dtiTimepoint = nullptr,
+                       const bool *dtiTimePoint = nullptr,
                        const mat33 *jacMat = nullptr);
 /* *************************************************************** */
 void reg_resampleImage_PSF(const nifti_image *floatingImage,
@@ -63,8 +63,8 @@ void reg_getImageGradient(nifti_image *floatingImage,
                           const int *mask,
                           const int interpolation,
                           const float paddingValue,
-                          const int activeTimepoint,
-                          const bool *dtiTimepoint = nullptr,
+                          const int activeTimePoint,
+                          const bool *dtiTimePoint = nullptr,
                           const mat33 *jacMat = nullptr,
                           const nifti_image *warpedImage = nullptr);
 /* *************************************************************** */
@@ -72,7 +72,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img,
                                   nifti_image *gradImg,
                                   const int *mask,
                                   const float paddingValue,
-                                  const int timepoint);
+                                  const int timePoint);
 /* *************************************************************** */
 nifti_image* reg_makeIsotropic(nifti_image*, int);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 2a130c4d..b20f9581 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -74,7 +74,7 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
 #endif
 #ifndef NDEBUG
     for (int i = 0; i < this->referenceTimePoints; ++i)
-        NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]);
+        NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]);
     std::string msg = "Normalize time point:";
     for (int i = 0; i < this->referenceTimePoints; ++i)
         if (this->normaliseTimePoint[i])
@@ -84,8 +84,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg,
 #endif
 }
 /* *************************************************************** */
-void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) {
-    this->normaliseTimePoint[timepoint] = normalise;
+void reg_ssd::SetNormaliseTimePoint(int timePoint, bool normalise) {
+    this->normaliseTimePoint[timePoint] = normalise;
 }
 /* *************************************************************** */
 template<class DataType>
@@ -136,7 +136,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
 #ifdef MRF_USE_SAD
                         const double diff = fabs(refValue - warValue);
 #else
-                        const double diff = std::pow(refValue - warValue, 2.0);
+                        const double diff = Square(refValue - warValue);
 #endif
                         // Jacobian determinant modulation of the ssd if required
                         const DataType val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1);
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index fe359865..b05eded2 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -38,7 +38,7 @@ class reg_ssd: public reg_measure {
                                    nifti_image *warpedGradBw = nullptr,
                                    nifti_image *voxelBasedGradBw = nullptr) override;
     /// @brief Define if the specified time point should be normalised
-    void SetNormaliseTimepoint(int timepoint, bool normalise);
+    void SetNormaliseTimePoint(int timePoint, bool normalise);
     /// @brief Returns the ssd value forwards
     virtual double GetSimilarityMeasureValueFw() override;
     /// @brief Returns the ssd value backwards
@@ -92,7 +92,7 @@ double reg_getSsdValue(const nifti_image *referenceImage,
  * @param mask Array that contains a mask to specify which voxel
  * should be considered
  * @param currentTimePoint Specifies which time point volumes have to be considered
- * @param timepointWeight Weight of the specified time point
+ * @param timePointWeight Weight of the specified time point
  * @param localWeightSim Image that contains the local weight similarity
  */
 template <class DataType>
@@ -103,6 +103,6 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage,
                                   const nifti_image *jacobianDetImage,
                                   const int *mask,
                                   const int currentTimePoint,
-                                  const double timepointWeight,
+                                  const double timePointWeight,
                                   const nifti_image *localWeightSim);
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index f363d8ee..0c95c8e5 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -174,33 +174,33 @@ void reg_intensityRescale_core(nifti_image *image,
 }
 /* *************************************************************** */
 void reg_intensityRescale(nifti_image *image,
-                          int timepoint,
+                          int timePoint,
                           float newMin,
                           float newMax) {
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        reg_intensityRescale_core<unsigned char>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<unsigned char>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_INT8:
-        reg_intensityRescale_core<char>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<char>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_UINT16:
-        reg_intensityRescale_core<unsigned short>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<unsigned short>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_INT16:
-        reg_intensityRescale_core<short>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<short>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_UINT32:
-        reg_intensityRescale_core<unsigned>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<unsigned>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_INT32:
-        reg_intensityRescale_core<int>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<int>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_FLOAT32:
-        reg_intensityRescale_core<float>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<float>(image, timePoint, newMin, newMax);
         break;
     case NIFTI_TYPE_FLOAT64:
-        reg_intensityRescale_core<double>(image, timepoint, newMin, newMax);
+        reg_intensityRescale_core<double>(image, timePoint, newMin, newMax);
         break;
     default:
         NR_FATAL_ERROR("The image data type is not supported");
@@ -1097,7 +1097,7 @@ void reg_tools_kernelConvolution(nifti_image *image,
                     } // radius > 0
                 } // active axis
             } // axes
-            // Normalise per timepoint
+            // Normalise per time point
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
    shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr)
@@ -1251,7 +1251,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image,
                     }
                 }
             }
-            // Normalise per timepoint
+            // Normalise per time point
             for (index = 0; index < voxelNumber; ++index) {
                 if (nanImagePtr[index] == 0)
                     intensityPtr[index] = std::numeric_limits<DataType>::quiet_NaN();
@@ -1872,8 +1872,8 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) {
 }
 /* *************************************************************** */
 template <class DataType>
-DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool isMin = true) {
-    if (timepoint < -1 || timepoint >= image->nt)
+DataType reg_tools_getMinMaxValue(const nifti_image *image, int timePoint, bool isMin = true) {
+    if (timePoint < -1 || timePoint >= image->nt)
         NR_FATAL_ERROR("The required time point does not exist");
 
     const DataType *imgPtr = static_cast<DataType*>(image->data);
@@ -1887,7 +1887,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool
     else minMax = std::max<DataType>;
 
     for (int time = 0; time < image->nt; ++time) {
-        if (time == timepoint || timepoint == -1) {
+        if (time == timePoint || timePoint == -1) {
             for (int u = 0; u < image->nu; ++u) {
                 const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber];
                 for (size_t i = 0; i < voxelNumber; ++i) {
@@ -1900,50 +1900,50 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool
     return retValue;
 }
 /* *************************************************************** */
-float reg_tools_getMinValue(const nifti_image *image, int timepoint) {
+float reg_tools_getMinValue(const nifti_image *image, int timePoint) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMinMaxValue<unsigned char>(image, timepoint);
+        return reg_tools_getMinMaxValue<unsigned char>(image, timePoint);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMinMaxValue<char>(image, timepoint);
+        return reg_tools_getMinMaxValue<char>(image, timePoint);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMinMaxValue<unsigned short>(image, timepoint);
+        return reg_tools_getMinMaxValue<unsigned short>(image, timePoint);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMinMaxValue<short>(image, timepoint);
+        return reg_tools_getMinMaxValue<short>(image, timePoint);
     case NIFTI_TYPE_UINT32:
-        return (float)reg_tools_getMinMaxValue<unsigned>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<unsigned>(image, timePoint);
     case NIFTI_TYPE_INT32:
-        return (float)reg_tools_getMinMaxValue<int>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<int>(image, timePoint);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMinMaxValue<float>(image, timepoint);
+        return reg_tools_getMinMaxValue<float>(image, timePoint);
     case NIFTI_TYPE_FLOAT64:
-        return (float)reg_tools_getMinMaxValue<double>(image, timepoint);
+        return (float)reg_tools_getMinMaxValue<double>(image, timePoint);
     default:
         NR_FATAL_ERROR("The image data type is not supported");
         return 0;
     }
 }
 /* *************************************************************** */
-float reg_tools_getMaxValue(const nifti_image *image, int timepoint) {
+float reg_tools_getMaxValue(const nifti_image *image, int timePoint) {
     // Check the image data type
     switch (image->datatype) {
     case NIFTI_TYPE_UINT8:
-        return reg_tools_getMinMaxValue<unsigned char>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<unsigned char>(image, timePoint, false);
     case NIFTI_TYPE_INT8:
-        return reg_tools_getMinMaxValue<char>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<char>(image, timePoint, false);
     case NIFTI_TYPE_UINT16:
-        return reg_tools_getMinMaxValue<unsigned short>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<unsigned short>(image, timePoint, false);
     case NIFTI_TYPE_INT16:
-        return reg_tools_getMinMaxValue<short>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<short>(image, timePoint, false);
     case NIFTI_TYPE_UINT32:
-        return (float)reg_tools_getMinMaxValue<unsigned>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<unsigned>(image, timePoint, false);
     case NIFTI_TYPE_INT32:
-        return (float)reg_tools_getMinMaxValue<int>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<int>(image, timePoint, false);
     case NIFTI_TYPE_FLOAT32:
-        return reg_tools_getMinMaxValue<float>(image, timepoint, false);
+        return reg_tools_getMinMaxValue<float>(image, timePoint, false);
     case NIFTI_TYPE_FLOAT64:
-        return (float)reg_tools_getMinMaxValue<double>(image, timepoint, false);
+        return (float)reg_tools_getMinMaxValue<double>(image, timePoint, false);
     default:
         NR_FATAL_ERROR("The image data type is not supported");
         return 0;
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 5064d800..81c9e633 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -59,7 +59,7 @@ bool reg_isAnImageFileName(const char *name);
  * @param upThr Intensity to use as higher threshold
  */
 void reg_intensityRescale(nifti_image *image,
-                          int timepoint,
+                          int timePoint,
                           float newMin,
                           float newMax);
 /* *************************************************************** */
@@ -274,17 +274,17 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask);
 /* *************************************************************** */
 /** @brief Get the minimal value of an image
  * @param img Input image
- * @param timepoint active time point. All time points are used if set to -1
+ * @param timePoint active time point. All time points are used if set to -1
  * @return min value
  */
-float reg_tools_getMinValue(const nifti_image *img, int timepoint);
+float reg_tools_getMinValue(const nifti_image *img, int timePoint);
 /* *************************************************************** */
 /** @brief Get the maximal value of an image
  * @param img Input image
- * @param timepoint active time point. All time points are used if set to -1
+ * @param timePoint active time point. All time points are used if set to -1
  * @return max value
  */
-float reg_tools_getMaxValue(const nifti_image *img, int timepoint);
+float reg_tools_getMaxValue(const nifti_image *img, int timePoint);
 /* *************************************************************** */
 /** @brief Get the mean value of an image
  * @param img Input image
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 0ddb1e93..ca24678a 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -50,33 +50,33 @@ endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
+    ../AladinContent.cpp
+    affineDeformationKernel.cu
+    blockMatchingKernel.cu
+    CudaAffineDeformationFieldKernel.cpp
     CudaAladinContent.cpp
+    CudaBlockMatchingKernel.cpp
     CudaCommon.cu
     CudaCompute.cu
     CudaContent.cpp
     CudaContext.cpp
+    CudaConvolutionKernel.cpp
     CudaDefContent.cpp
     CudaF3dContent.cpp
     CudaKernelConvolution.cu
     CudaKernelFactory.cpp
+    CudaLtsKernel.cpp
     CudaMeasure.cpp
-    affineDeformationKernel.cu
-    blockMatchingKernel.cu
-    resampleKernel.cu
-    CudaAffineDeformationFieldKernel.cpp
-    CudaBlockMatchingKernel.cpp
-    CudaConvolutionKernel.cpp
     CudaNormaliseGradient.cu
-    CudaLtsKernel.cpp
     CudaResampleImageKernel.cpp
-    ../AladinContent.cpp
-    _reg_resampling_gpu.cu
-    _reg_tools_gpu.cu
+    CudaResampling.cu
+    resampleKernel.cu
     _reg_globalTransformation_gpu.cu
     _reg_localTransformation_gpu.cu
     _reg_nmi_gpu.cu
-    _reg_ssd_gpu.cu
     _reg_optimiser_gpu.cu
+    _reg_ssd_gpu.cu
+    _reg_tools_gpu.cu
 )
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 02c83dc8..17bb8905 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -2,7 +2,7 @@
 #include "CudaF3dContent.h"
 #include "CudaKernelConvolution.hpp"
 #include "CudaNormaliseGradient.hpp"
-#include "_reg_resampling_gpu.h"
+#include "CudaResampling.hpp"
 #include "_reg_localTransformation_gpu.h"
 #include "_reg_optimiser_gpu.h"
 
@@ -123,8 +123,8 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                        optimiseZ);
 }
 /* *************************************************************** */
-void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) {
-    // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint
+void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
+    // TODO Fix reg_getImageGradient_gpu to accept activeTimePoint
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
     reg_getImageGradient_gpu(con.Content::GetFloating(),
                              con.GetFloatingCuda(),
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 842be37a..3aa8bec5 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -19,7 +19,7 @@ class CudaCompute: public Compute {
     virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override;
     virtual void GetDeformationField(bool composition, bool bspline) override;
     virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ) override;
-    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override;
+    virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) override;
     virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void SmoothGradient(float sigma) override;
diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp
index bbcae390..c389d149 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.hpp
+++ b/reg-lib/cuda/CudaNormaliseGradient.hpp
@@ -2,6 +2,7 @@
 
 #include "CudaCommon.hpp"
 
+/* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
 /**
@@ -36,3 +37,4 @@ void NormaliseGradient(float4 *imageCuda,
                        const bool optimiseZ);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp
index e17b22da..5ab6dcf4 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.cpp
+++ b/reg-lib/cuda/CudaResampleImageKernel.cpp
@@ -24,13 +24,13 @@ CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImage
 /* *************************************************************** */
 void CudaResampleImageKernel::Calculate(int interp,
                                         float paddingValue,
-                                        bool *dti_timepoint,
+                                        bool *dtiTimePoint,
                                         mat33 * jacMat) {
     launchResample(floatingImage,
                    warpedImage,
                    interp,
                    paddingValue,
-                   dti_timepoint,
+                   dtiTimePoint,
                    jacMat,
                    &floatingImageArray_d,
                    &warpedImageArray_d,
diff --git a/reg-lib/cuda/CudaResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h
index 216ae432..a4eec6b9 100644
--- a/reg-lib/cuda/CudaResampleImageKernel.h
+++ b/reg-lib/cuda/CudaResampleImageKernel.h
@@ -11,7 +11,7 @@ class CudaResampleImageKernel: public ResampleImageKernel {
     CudaResampleImageKernel(Content *conIn);
     void Calculate(int interp,
                    float paddingValue,
-                   bool *dti_timepoint = nullptr,
+                   bool *dtiTimePoint = nullptr,
                    mat33 *jacMat = nullptr);
 
 private:
diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/CudaResampling.cu
old mode 100755
new mode 100644
similarity index 98%
rename from reg-lib/cuda/_reg_resampling_gpu.cu
rename to reg-lib/cuda/CudaResampling.cu
index fe3eb39b..b33f078a
--- a/reg-lib/cuda/_reg_resampling_gpu.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_resampling_gpu.cu
+ *  CudaResampling.cu
  *
  *
  *  Created by Marc Modat on 24/03/2009.
@@ -10,8 +10,8 @@
  *
  */
 
-#include "_reg_resampling_gpu.h"
-#include "_reg_resampling_kernels.cu"
+#include "CudaResampling.hpp"
+#include "CudaResamplingKernels.cu"
 
 /* *************************************************************** */
 void reg_resampleImage_gpu(const nifti_image *floatingImage,
diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/CudaResampling.hpp
old mode 100755
new mode 100644
similarity index 98%
rename from reg-lib/cuda/_reg_resampling_gpu.h
rename to reg-lib/cuda/CudaResampling.hpp
index 5fc18144..8b3d3069
--- a/reg-lib/cuda/_reg_resampling_gpu.h
+++ b/reg-lib/cuda/CudaResampling.hpp
@@ -1,5 +1,5 @@
 /*
- *  _reg_resampling_gpu.h
+ *  CudaResampling.hpp
  *
  *
  *  Created by Marc Modat on 24/03/2009.
diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu
old mode 100755
new mode 100644
similarity index 99%
rename from reg-lib/cuda/_reg_resampling_kernels.cu
rename to reg-lib/cuda/CudaResamplingKernels.cu
index c2711fdf..cc7263b1
--- a/reg-lib/cuda/_reg_resampling_kernels.cu
+++ b/reg-lib/cuda/CudaResamplingKernels.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_resampling_kernels.cu
+ *  CudaResamplingKernels.cu
  *
  *
  *  Created by Marc Modat on 24/03/2009.
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 7b7d94d4..11ccd80e 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -115,7 +115,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       float4 *ssdGradientCuda,
                                       const int *maskCuda,
                                       const size_t activeVoxelNumber,
-                                      const float timepointWeight) {
+                                      const float timePointWeight) {
     // Copy the constant memory variables
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
@@ -138,7 +138,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
         if (warValue != warValue) return false;
         return true;
     });
-    const float adjustedWeight = timepointWeight / static_cast<float>(validVoxelNumber);
+    const float adjustedWeight = timePointWeight / static_cast<float>(validVoxelNumber);
 
     const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 85656322..cfbe514f 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -377,7 +377,7 @@ void launchResample(nifti_image *floatingImage,
 						  nifti_image *warpedImage,
 						  int interp,
 						  float paddingValue,
-						  bool *dti_timepoint,
+						  bool *dtiTimePoint,
 						  mat33 *jacMat,
 						  float **floatingImage_d,
 						  float **warpedImage_d,
@@ -385,7 +385,7 @@ void launchResample(nifti_image *floatingImage,
 						  int **mask_d,
 						  float **sourceIJKMatrix_d) {
 	// Define the DTI indices if required
-	if (dti_timepoint != nullptr || jacMat != nullptr)
+	if (dtiTimePoint != nullptr || jacMat != nullptr)
 		NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the CUDA platform");
 
 	const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3);
diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h
index 758a38ed..eea28625 100644
--- a/reg-lib/cuda/resampleKernel.h
+++ b/reg-lib/cuda/resampleKernel.h
@@ -3,7 +3,7 @@
 #include "RNifti.h"
 
 void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis);
-void launchResample(nifti_image *floatingImage, nifti_image *warpedImage,  int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d);
+void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dtiTimePoint, mat33 *jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d);
 void launchOptimizer();//TODO
 
 double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned numBlocks, const unsigned numToKeep, const unsigned m);

From 13697c353336406d3a6db0bb1fd596a8348c4d47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 20 Nov 2023 16:38:48 +0000
Subject: [PATCH 248/314] Implement multi-timepoint support for CUDA #92

---
 niftyreg_build_version.txt            |   2 +-
 reg-lib/Content.cpp                   |   2 +-
 reg-lib/cuda/BlockSize.hpp            |   6 --
 reg-lib/cuda/CudaCompute.cu           |  35 +++----
 reg-lib/cuda/CudaContent.cpp          |  15 ++-
 reg-lib/cuda/CudaResampling.cu        |  99 +++++++++----------
 reg-lib/cuda/CudaResampling.hpp       |  37 +++++---
 reg-lib/cuda/CudaResamplingKernels.cu |  66 +++++++------
 reg-lib/cuda/_reg_nmi_gpu.cu          |  31 +++---
 reg-lib/cuda/_reg_ssd_gpu.cu          | 131 ++++++++++++++++----------
 reg-lib/cuda/_reg_ssd_kernels.cu      |  84 -----------------
 reg-test/reg_test_regr_measure.cpp    |   6 +-
 12 files changed, 229 insertions(+), 285 deletions(-)
 delete mode 100755 reg-lib/cuda/_reg_ssd_kernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4203007d..526204c8 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-366
+367
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index ca340144..b64a48b8 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -15,7 +15,7 @@ Content::Content(nifti_image *referenceIn,
         NR_FATAL_ERROR("referenceIn or floatingIn can't be nullptr");
     AllocateWarped();
     AllocateDeformationField(bytesIn);
-    activeVoxelNumber = reference->nvox;
+    activeVoxelNumber = NiftiImage::calcVoxelNumber(reference, 3);
     if (!referenceMask) {
         referenceMaskManaged.reset(new int[activeVoxelNumber]());
         referenceMask = referenceMaskManaged.get();
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index a86430ec..5483ae59 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -36,8 +36,6 @@ struct BlockSize {
     unsigned reg_getConjugateGradient1;
     unsigned reg_getConjugateGradient2;
     unsigned reg_updateControlPointPosition;
-    unsigned GetSsdValue;
-    unsigned GetSsdGradient;
     unsigned reg_voxelCentricToNodeCentric;
     unsigned reg_convertNmiGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
@@ -74,8 +72,6 @@ struct BlockSize100: public BlockSize {
         reg_getConjugateGradient1 = 320; // 12 reg - 24 smem
         reg_getConjugateGradient2 = 384; // 10 reg - 40 smem
         reg_updateControlPointPosition = 384; // 08 reg - 24 smem
-        GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem
-        GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem
         reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
@@ -114,8 +110,6 @@ struct BlockSize300: public BlockSize {
         reg_getConjugateGradient1 = 1024; // 22 reg
         reg_getConjugateGradient2 = 1024; // 25 reg
         reg_updateControlPointPosition = 1024; // 22 reg
-        GetSsdValue = 768; // 34 reg
-        GetSsdGradient = 768; // 34 reg
         reg_voxelCentricToNodeCentric = 1024; // 23 reg
         reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 17bb8905..08766f26 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -9,14 +9,17 @@
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
-    reg_resampleImage_gpu(con.Content::GetFloating(),
-                          con.GetWarpedCuda(),
-                          con.GetFloatingCuda(),
-                          con.GetDeformationFieldCuda(),
-                          con.GetReferenceMaskCuda(),
-                          con.GetActiveVoxelNumber(),
-                          interpolation,
-                          paddingValue);
+    const nifti_image *floating = con.Content::GetFloating();
+    auto resampleImage = floating->nz > 1 ? Cuda::ResampleImage<true> : Cuda::ResampleImage<false>;
+    resampleImage(floating,
+                  con.GetFloatingCuda(),
+                  con.Content::GetWarped(),
+                  con.GetWarpedCuda(),
+                  con.GetDeformationFieldCuda(),
+                  con.GetReferenceMaskCuda(),
+                  con.GetActiveVoxelNumber(),
+                  interpolation,
+                  paddingValue);
 }
 /* *************************************************************** */
 double CudaCompute::GetJacobianPenaltyTerm(bool approx) {
@@ -124,15 +127,15 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
-    // TODO Fix reg_getImageGradient_gpu to accept activeTimePoint
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    reg_getImageGradient_gpu(con.Content::GetFloating(),
-                             con.GetFloatingCuda(),
-                             con.GetDeformationFieldCuda(),
-                             con.GetWarpedGradientCuda(),
-                             con.GetActiveVoxelNumber(),
-                             interpolation,
-                             paddingValue);
+    Cuda::GetImageGradient(con.Content::GetFloating(),
+                           con.GetFloatingCuda(),
+                           con.GetDeformationFieldCuda(),
+                           con.GetWarpedGradientCuda(),
+                           con.GetActiveVoxelNumber(),
+                           interpolation,
+                           paddingValue,
+                           activeTimePoint);
 }
 /* *************************************************************** */
 double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index f26f8c69..c25cff9d 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -91,22 +91,21 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
         referenceMaskCuda = nullptr;
     }
 
+    activeVoxelNumber = 0;
     if (!referenceMask) return;
 
-    decltype(referenceMask) targetMask;
-    NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask)));
-    int *targetMaskPtr = targetMask;
-    activeVoxelNumber = 0;
-    for (size_t i = 0; i < reference->nvox; i++) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(reference, 3);
+    thrust::host_vector<int> mask(voxelNumber);
+    int *maskPtr = mask.data();
+    for (size_t i = 0; i < voxelNumber; i++) {
         if (referenceMask[i] != -1) {
-            *targetMaskPtr++ = i;
+            *maskPtr++ = static_cast<int>(i);
             activeVoxelNumber++;
         }
     }
 
     Cuda::Allocate(&referenceMaskCuda, activeVoxelNumber);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice));
-    NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask));
+    thrust::copy_n(mask.begin(), activeVoxelNumber, thrust::device_ptr<int>(referenceMaskCuda));
 }
 /* *************************************************************** */
 void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index b33f078a..f72f6bee 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -14,57 +14,62 @@
 #include "CudaResamplingKernels.cu"
 
 /* *************************************************************** */
-void reg_resampleImage_gpu(const nifti_image *floatingImage,
-                           float *warpedImageCuda,
-                           const float *floatingImageCuda,
-                           const float4 *deformationFieldCuda,
-                           const int *maskCuda,
-                           const size_t activeVoxelNumber,
-                           const int interpolation,
-                           const float paddingValue) {
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+template<bool is3d>
+void ResampleImage(const nifti_image *floatingImage,
+                   const float *floatingImageCuda,
+                   const nifti_image *warpedImage,
+                   float *warpedImageCuda,
+                   const float4 *deformationFieldCuda,
+                   const int *maskCuda,
+                   const size_t activeVoxelNumber,
+                   const int interpolation,
+                   const float paddingValue) {
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
-
-    // Create the texture object for the floating image
-    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    // Create the texture object for the deformation field
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
-    // Create the texture object for the mask
     auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
-
     // Bind the real to voxel matrix to the texture
-    const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
+    const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
-    if (floatingImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_resampleImage3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_resampleImage3D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
-                                                            floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        const unsigned blocks = blockSize->reg_resampleImage2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        reg_resampleImage2D_kernel<<<gridDims, blockDims>>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture,
-                                                            floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
+        NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t);
+        auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        if constexpr (is3d) {
+            const unsigned blocks = blockSize->reg_resampleImage3D;
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            ResampleImage3D<<<gridDims, blockDims>>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture,
+                                                     floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        } else {
+            const unsigned blocks = blockSize->reg_resampleImage2D;
+            const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
+            const dim3 gridDims(grids, grids, 1);
+            const dim3 blockDims(blocks, 1, 1);
+            ResampleImage2D<<<gridDims, blockDims>>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture,
+                                                     floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        }
     }
 }
+template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
+template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
 /* *************************************************************** */
-void reg_getImageGradient_gpu(const nifti_image *floatingImage,
-                              const float *floatingImageCuda,
-                              const float4 *deformationFieldCuda,
-                              float4 *warpedGradientCuda,
-                              const size_t activeVoxelNumber,
-                              const int interpolation,
-                              float paddingValue) {
+void GetImageGradient(const nifti_image *floatingImage,
+                      const float *floatingImageCuda,
+                      const float4 *deformationFieldCuda,
+                      float4 *warpedGradientCuda,
+                      const size_t activeVoxelNumber,
+                      const int interpolation,
+                      float paddingValue,
+                      const int activeTimePoint) {
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
@@ -72,31 +77,29 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
     if (paddingValue != paddingValue) paddingValue = 0;
-
-    // Create the texture object for the floating image
-    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    // Create the texture object for the deformation field
+    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
-
     // Bind the real to voxel matrix to the texture
-    const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
+    const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     if (floatingImage->nz > 1) {
         const unsigned blocks = blockSize->reg_getImageGradient3D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_getImageGradient3D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
-                                                               floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        GetImageGradient3D<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
+                                                    floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->reg_getImageGradient2D;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_getImageGradient2D_kernel<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
-                                                               floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
+        GetImageGradient2D<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
+                                                    floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp
index 8b3d3069..6d54dad6 100644
--- a/reg-lib/cuda/CudaResampling.hpp
+++ b/reg-lib/cuda/CudaResampling.hpp
@@ -15,20 +15,27 @@
 #include "CudaCommon.hpp"
 
 /* *************************************************************** */
-void reg_resampleImage_gpu(const nifti_image *floatingImage,
-                           float *warpedImageCuda,
-                           const float *floatingImageCuda,
-                           const float4 *deformationFieldCuda,
-                           const int *maskCuda,
-                           const size_t activeVoxelNumber,
-                           const int interpolation,
-                           const float paddingValue);
+namespace NiftyReg::Cuda {
 /* *************************************************************** */
-void reg_getImageGradient_gpu(const nifti_image *floatingImage,
-                              const float *floatingImageCuda,
-                              const float4 *deformationFieldCuda,
-                              float4 *warpedGradientCuda,
-                              const size_t activeVoxelNumber,
-                              const int interpolation,
-                              float paddingValue);
+template<bool is3d>
+void ResampleImage(const nifti_image *floatingImage,
+                   const float *floatingImageCuda,
+                   const nifti_image *warpedImage,
+                   float *warpedImageCuda,
+                   const float4 *deformationFieldCuda,
+                   const int *maskCuda,
+                   const size_t activeVoxelNumber,
+                   const int interpolation,
+                   const float paddingValue);
+/* *************************************************************** */
+void GetImageGradient(const nifti_image *floatingImage,
+                      const float *floatingImageCuda,
+                      const float4 *deformationFieldCuda,
+                      float4 *warpedGradientCuda,
+                      const size_t activeVoxelNumber,
+                      const int interpolation,
+                      float paddingValue,
+                      const int activeTimePoint);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResamplingKernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu
index cc7263b1..868d03f5 100644
--- a/reg-lib/cuda/CudaResamplingKernels.cu
+++ b/reg-lib/cuda/CudaResamplingKernels.cu
@@ -10,23 +10,25 @@
  *
  */
 
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
 /* *************************************************************** */
 template<typename T>
-__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) {
+__inline__ __device__ constexpr void InterpLinearKernel(T relative, T (&basis)[2]) {
     if (relative < 0)
         relative = 0;  // reg_rounding error
     basis[1] = relative;
     basis[0] = 1.f - relative;
 }
 /* *************************************************************** */
-__global__ void reg_resampleImage2D_kernel(float *resultArray,
-                                           cudaTextureObject_t floatingTexture,
-                                           cudaTextureObject_t deformationFieldTexture,
-                                           cudaTextureObject_t maskTexture,
-                                           const mat44 floatingMatrix,
-                                           const int3 floatingDim,
-                                           const unsigned activeVoxelNumber,
-                                           const float paddingValue) {
+__global__ void ResampleImage2D(float *resultArray,
+                                cudaTextureObject_t floatingTexture,
+                                cudaTextureObject_t deformationFieldTexture,
+                                cudaTextureObject_t maskTexture,
+                                const mat44 floatingMatrix,
+                                const int3 floatingDim,
+                                const unsigned activeVoxelNumber,
+                                const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
@@ -70,14 +72,14 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray,
     resultArray[tid2] = intensity;
 }
 /* *************************************************************** */
-__global__ void reg_resampleImage3D_kernel(float *resultArray,
-                                           cudaTextureObject_t floatingTexture,
-                                           cudaTextureObject_t deformationFieldTexture,
-                                           cudaTextureObject_t maskTexture,
-                                           const mat44 floatingMatrix,
-                                           const int3 floatingDim,
-                                           const unsigned activeVoxelNumber,
-                                           const float paddingValue) {
+__global__ void ResampleImage3D(float *resultArray,
+                                cudaTextureObject_t floatingTexture,
+                                cudaTextureObject_t deformationFieldTexture,
+                                cudaTextureObject_t maskTexture,
+                                const mat44 floatingMatrix,
+                                const int3 floatingDim,
+                                const unsigned activeVoxelNumber,
+                                const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
@@ -133,13 +135,13 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray,
     resultArray[tid2] = intensity;
 }
 /* *************************************************************** */
-__global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
-                                              cudaTextureObject_t floatingTexture,
-                                              cudaTextureObject_t deformationFieldTexture,
-                                              const mat44 floatingMatrix,
-                                              const int3 floatingDim,
-                                              const unsigned activeVoxelNumber,
-                                              const float paddingValue) {
+__global__ void GetImageGradient2D(float4 *gradientArray,
+                                   cudaTextureObject_t floatingTexture,
+                                   cudaTextureObject_t deformationFieldTexture,
+                                   const mat44 floatingMatrix,
+                                   const int3 floatingDim,
+                                   const unsigned activeVoxelNumber,
+                                   const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
@@ -185,13 +187,13 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray,
     gradientArray[tid] = gradientValue;
 }
 /* *************************************************************** */
-__global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
-                                              cudaTextureObject_t floatingTexture,
-                                              cudaTextureObject_t deformationFieldTexture,
-                                              const mat44 floatingMatrix,
-                                              const int3 floatingDim,
-                                              const unsigned activeVoxelNumber,
-                                              const float paddingValue) {
+__global__ void GetImageGradient3D(float4 *gradientArray,
+                                   cudaTextureObject_t floatingTexture,
+                                   cudaTextureObject_t deformationFieldTexture,
+                                   const mat44 floatingMatrix,
+                                   const int3 floatingDim,
+                                   const unsigned activeVoxelNumber,
+                                   const float paddingValue) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     // Get the real world deformation in the floating space
@@ -252,3 +254,5 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray,
     gradientArray[tid] = gradientValue;
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 1758eda5..8d482b89 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -40,9 +40,6 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda,
                                        warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda,
                                        localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
                                        warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
-    // Check if the input images have multiple time points
-    if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1)
-        NR_FATAL_ERROR("Multiple time points are not yet supported");
     // The reference and floating images have to be updated on the device
     Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage);
     Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage);
@@ -82,8 +79,6 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
                          const bool approximation) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
-    auto maskTexture = *maskTexturePtr;
 
     // Iterate over all active time points
     for (int t = 0; t < referenceTimePoints; t++) {
@@ -105,11 +100,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         // Fill the joint histograms
         if (approximation == false) {
             // No approximation is used for the Parzen windowing
-            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-                const int voxel = tex1Dfetch<int>(maskTexture, index);
-                const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
+            thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+                const float refValue = tex1Dfetch<float>(referenceImageTexture, index);
                 if (refValue != refValue) return;
-                const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                const float warValue = tex1Dfetch<float>(warpedImageTexture, index);
                 if (warValue != warValue) return;
                 for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) {
                     if (0 <= r && r < curRefBinNumber) {
@@ -126,11 +120,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage,
         } else {
             // An approximation is used for the Parzen windowing. First intensities are binarised then
             // the histogram is convolved with a spine kernel function.
-            thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-                const int voxel = tex1Dfetch<int>(maskTexture, index);
-                const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
+            thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+                const float refValue = tex1Dfetch<float>(referenceImageTexture, index);
                 if (refValue != refValue) return;
-                const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+                const float warValue = tex1Dfetch<float>(warpedImageTexture, index);
                 if (warValue != warValue) return;
                 if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber)
                     atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0);
@@ -323,17 +316,15 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
     auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
     auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
     auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
     auto referenceImageTexture = *referenceImageTexturePtr;
     auto warpedImageTexture = *warpedImageTexturePtr;
     auto warpedGradientTexture = *warpedGradientTexturePtr;
-    auto maskTexture = *maskTexturePtr;
 
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-        const int targetIndex = tex1Dfetch<int>(maskTexture, index);
-        const float refValue = tex1Dfetch<float>(referenceImageTexture, targetIndex);
+        const int voxel = maskCuda[index];
+        const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
         if (refValue != refValue) return;
-        const float warValue = tex1Dfetch<float>(warpedImageTexture, targetIndex);
+        const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
         if (warValue != warValue) return;
         const float4 warGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);
 
@@ -376,12 +367,12 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
         }
 
         // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-        float4 gradValue = voxelBasedGradientCuda[targetIndex];
+        float4 gradValue = voxelBasedGradientCuda[voxel];
         gradValue.x += static_cast<float>(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE);
         gradValue.y += static_cast<float>(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE);
         if constexpr (is3d)
             gradValue.z += static_cast<float>(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE);
-        voxelBasedGradientCuda[targetIndex] = gradValue;
+        voxelBasedGradientCuda[voxel] = gradValue;
     });
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 11ccd80e..2a0a775f 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -11,7 +11,6 @@
  */
 
 #include "_reg_ssd_gpu.h"
-#include "_reg_ssd_kernels.cu"
 
 /* *************************************************************** */
 reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() {
@@ -40,9 +39,6 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda,
                                        warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda,
                                        localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda,
                                        warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda);
-    // Check that the input images have only one time point
-    if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1)
-        NR_FATAL_ERROR("Multiple time points are not yet supported");
     // Check if the reference and floating images need to be updated
     for (int i = 0; i < this->referenceTimePoints; ++i)
         if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) {
@@ -58,33 +54,39 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
                            const float *warpedCuda,
                            const float *localWeightSimCuda,
                            const int *maskCuda,
-                           const size_t activeVoxelNumber) {
-    // Copy the constant memory variables
+                           const size_t activeVoxelNumber,
+                           const double *timePointWeights,
+                           const int referenceTimePoints) {
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
-    Cuda::UniqueTextureObjectPtr localWeightSimTexture;
-    if (localWeightSimCuda)
-        localWeightSimTexture = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-
-    // Create an array on the device to store the absolute difference values
-    thrust::device_vector<float> ssdSum(1), ssdCount(1);
-
-    // Compute the absolute values
-    const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    Cuda::GetSsdValueKernel<<<gridDims, blockDims>>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture,
-                                                     *warpedTexture, localWeightSimCuda ? *localWeightSimTexture : 0,
-                                                     *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-
-    // Calculate the SSD
-    const float ssd = ssdSum[0] / ssdCount[0];
+    Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0;
+    if (localWeightSimCuda) {
+        localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+        localWeightSimTexture = *localWeightSimTexturePtr;
+    }
+
+    double ssd = 0.0;
+    for (int t = 0; t < referenceTimePoints; t++) {
+        auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto referenceTexture = *referenceTexturePtr;
+        auto warpedTexture = *warpedTexturePtr;
+
+        const auto ssdAndCount = thrust::transform_reduce(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) -> double2 {
+            const double refValue = tex1Dfetch<float>(referenceTexture, index);
+            if (refValue != refValue) return {};
+
+            const double warValue = tex1Dfetch<float>(warpedTexture, index);
+            if (warValue != warValue) return {};
+
+            const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
+            const double diff = refValue - warValue;
+            return { Square(diff) * weight, weight };  // ssd and count
+        }, make_double2(0.0, 0.0), thrust::plus<double2>());
+
+        ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y;
+    }
 
     return -ssd;
 }
@@ -95,7 +97,9 @@ double reg_ssd_gpu::GetSimilarityMeasureValueFw() {
                                this->warpedImageCuda,
                                this->localWeightSimCuda,
                                this->referenceMaskCuda,
-                               this->activeVoxelNumber);
+                               this->activeVoxelNumber,
+                               this->timePointWeights,
+                               this->referenceTimePoints);
 }
 /* *************************************************************** */
 double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
@@ -104,7 +108,9 @@ double reg_ssd_gpu::GetSimilarityMeasureValueBw() {
                                this->warpedImageBwCuda,
                                nullptr,
                                this->floatingMaskCuda,
-                               this->activeVoxelNumber);
+                               this->activeVoxelNumber,
+                               this->timePointWeights,
+                               this->referenceTimePoints);
 }
 /* *************************************************************** */
 void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
@@ -115,39 +121,58 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       float4 *ssdGradientCuda,
                                       const int *maskCuda,
                                       const size_t activeVoxelNumber,
-                                      const float timePointWeight) {
-    // Copy the constant memory variables
+                                      const double timePointWeight,
+                                      const int currentTimePoint) {
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
-    auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
     auto spatialGradTexturePtr = Cuda::CreateTextureObject(spatialGradCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-    Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr;
-    if (localWeightSimCuda)
+    auto referenceTexture = *referenceTexturePtr;
+    auto warpedTexture = *warpedTexturePtr;
+    auto spatialGradTexture = *spatialGradTexturePtr;
+    Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0;
+    if (localWeightSimCuda) {
         localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+        localWeightSimTexture = *localWeightSimTexturePtr;
+    }
 
     // Find number of valid voxels and correct weight
-    const auto referenceTexture = *referenceTexturePtr;
-    const auto warpedTexture = *warpedTexturePtr;
-    const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) {
+    const auto validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) {
         const float refValue = tex1Dfetch<float>(referenceTexture, index);
         if (refValue != refValue) return false;
         const float warValue = tex1Dfetch<float>(warpedTexture, index);
         if (warValue != warValue) return false;
         return true;
     });
-    const float adjustedWeight = timePointWeight / static_cast<float>(validVoxelNumber);
-
-    const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    Cuda::GetSsdGradientKernel<<<gridDims, blockDims>>>(ssdGradientCuda, *referenceTexturePtr, *warpedTexturePtr, *maskTexturePtr,
-                                                        *spatialGradTexturePtr, localWeightSimCuda ? *localWeightSimTexturePtr : 0,
-                                                        referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    const double adjustedWeight = timePointWeight / validVoxelNumber;
+
+    // Calculate the SSD gradient
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
+        const int voxel = maskCuda[index];
+
+        const double refValue = tex1Dfetch<float>(referenceTexture, voxel);
+        if (refValue != refValue) return;
+
+        const double warValue = tex1Dfetch<float>(warpedTexture, voxel);
+        if (warValue != warValue) return;
+
+        const float4 spaGradientValue = tex1Dfetch<float4>(spatialGradTexture, index);
+        if (spaGradientValue.x != spaGradientValue.x ||
+            spaGradientValue.y != spaGradientValue.y ||
+            spaGradientValue.z != spaGradientValue.z)
+            return;
+
+        const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, voxel) : 1.f;
+        const double common = -2.0 * (refValue - warValue) * adjustedWeight * weight;
+
+        float4 ssdGradientValue = ssdGradientCuda[voxel];
+        ssdGradientValue.x += common * spaGradientValue.x;
+        ssdGradientValue.y += common * spaGradientValue.y;
+        ssdGradientValue.z += common * spaGradientValue.z;
+        ssdGradientCuda[voxel] = ssdGradientValue;
+    });
 }
 /* *************************************************************** */
 void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) {
@@ -159,7 +184,8 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint)
                                      this->voxelBasedGradientCuda,
                                      this->referenceMaskCuda,
                                      this->activeVoxelNumber,
-                                     static_cast<float>(this->timePointWeights[currentTimePoint]));
+                                     this->timePointWeights[currentTimePoint],
+                                     currentTimePoint);
 }
 /* *************************************************************** */
 void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
@@ -171,6 +197,7 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint)
                                      this->voxelBasedGradientBwCuda,
                                      this->floatingMaskCuda,
                                      this->activeVoxelNumber,
-                                     static_cast<float>(this->timePointWeights[currentTimePoint]));
+                                     this->timePointWeights[currentTimePoint],
+                                     currentTimePoint);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu
deleted file mode 100755
index 99a61530..00000000
--- a/reg-lib/cuda/_reg_ssd_kernels.cu
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * @file _reg_ssd_kernels.cu
- * @author Marc Modat
- * @date 14/11/2012
- *
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_ssd_gpu.h"
-#include "_reg_ssd_kernels.cu"
-#include "_reg_common_cuda_kernels.cu"
-
-/* *************************************************************** */
-namespace NiftyReg::Cuda {
-/* *************************************************************** */
-__global__ void GetSsdValueKernel(float *ssdSum,
-                                  float *ssdCount,
-                                  cudaTextureObject_t referenceTexture,
-                                  cudaTextureObject_t warpedTexture,
-                                  cudaTextureObject_t localWeightSimTexture,
-                                  cudaTextureObject_t maskTexture,
-                                  const int3 referenceImageDim,
-                                  const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int index = tex1Dfetch<int>(maskTexture, tid);
-
-        const float refValue = tex1Dfetch<float>(referenceTexture, index);
-        if (refValue != refValue) return;
-
-        const float warValue = tex1Dfetch<float>(warpedTexture, index);
-        if (warValue != warValue) return;
-
-        const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
-        const float diff = refValue - warValue;
-        atomicAdd(ssdSum, diff * diff * val);
-        atomicAdd(ssdCount, val);
-    }
-}
-/* *************************************************************** */
-__global__ void GetSsdGradientKernel(float4 *ssdGradient,
-                                     cudaTextureObject_t referenceTexture,
-                                     cudaTextureObject_t warpedTexture,
-                                     cudaTextureObject_t maskTexture,
-                                     cudaTextureObject_t spatialGradTexture,
-                                     cudaTextureObject_t localWeightSimTexture,
-                                     const int3 referenceImageDim,
-                                     const float adjustedWeight,
-                                     const unsigned activeVoxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < activeVoxelNumber) {
-        const int index = tex1Dfetch<int>(maskTexture, tid);
-
-        const float refValue = tex1Dfetch<float>(referenceTexture, index);
-        if (refValue != refValue) return;
-
-        const float warValue = tex1Dfetch<float>(warpedTexture, index);
-        if (warValue != warValue) return;
-
-        const float4 spaGradientValue = tex1Dfetch<float4>(spatialGradTexture, tid);
-        if (spaGradientValue.x != spaGradientValue.x ||
-            spaGradientValue.y != spaGradientValue.y ||
-            spaGradientValue.z != spaGradientValue.z)
-            return;
-
-        const float val = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
-        const float common = -2.f * (refValue - warValue) * adjustedWeight * val;
-
-        float4 ssdGradientValue = ssdGradient[index];
-        ssdGradientValue.x += common * spaGradientValue.x;
-        ssdGradientValue.y += common * spaGradientValue.y;
-        ssdGradientValue.z += common * spaGradientValue.z;
-        ssdGradient[index] = ssdGradientValue;
-    }
-}
-/* *************************************************************** */
-} // namespace NiftyReg::Cuda
-/* *************************************************************** */
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 2c26a8d1..6bcdf88e 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -28,7 +28,7 @@ class MeasureTest {
 
         // Create 2D reference, floating, control point grid and local weight similarity images
         constexpr NiftiImage::dim_t size = 16;
-        constexpr NiftiImage::dim_t timePoints = 1;
+        constexpr NiftiImage::dim_t timePoints = 2;
         vector<NiftiImage::dim_t> dim{ size, size, 1, timePoints };
         NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32);
         NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32);
@@ -139,8 +139,8 @@ class MeasureTest {
 
             // Initialise the measures
             for (int t = 0; t < referenceCpu->nt; t++) {
-                measureCpu->SetTimePointWeight(t, 1.0);
-                measureCuda->SetTimePointWeight(t, 1.0);
+                measureCpu->SetTimePointWeight(t, 1.5);
+                measureCuda->SetTimePointWeight(t, 1.5);
             }
             measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get());
             measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get());

From b2a32ffc0f9742a9196c1ea1fcb4550a01ad7af2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 23 Nov 2023 13:37:03 +0000
Subject: [PATCH 249/314] Implement reg_optimiser_gpu::Perturbation() #92

---
 niftyreg_build_version.txt         |  2 +-
 reg-lib/cuda/_reg_optimiser_gpu.cu | 38 ++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 526204c8..cb35cf9f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-367
+368
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu
index 28b187b6..27f2ada8 100755
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/_reg_optimiser_gpu.cu
@@ -1,6 +1,7 @@
 #include "_reg_optimiser_gpu.h"
 #include "_reg_optimiser_kernels.cu"
 #include "_reg_common_cuda_kernels.cu"
+#include <curand_kernel.h>
 
 /* *************************************************************** */
 reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
@@ -85,7 +86,40 @@ void reg_optimiser_gpu::StoreCurrentDof() {
 }
 /* *************************************************************** */
 void reg_optimiser_gpu::Perturbation(float length) {
-    // TODO: Implement reg_optimiser_gpu::Perturbation()
+    // Reset the number of iteration
+    this->currentIterationNumber = 0;
+
+    auto perturbate = []__device__(float4 *currentDofCuda, cudaTextureObject_t bestDofTexture, const float length, const size_t index) {
+        curandState_t state;
+        curand_init(clock64(), index, 0, &state);
+        const float4 bestDofVal = tex1Dfetch<float4>(bestDofTexture, index);
+        float4 curDofVal = currentDofCuda[index];
+        curDofVal.x = bestDofVal.x + length * curand_uniform(&state);
+        curDofVal.y = bestDofVal.y + length * curand_uniform(&state);
+        curDofVal.z = bestDofVal.z + length * curand_uniform(&state);
+        curDofVal.w = bestDofVal.w + length * curand_uniform(&state);
+        currentDofCuda[index] = curDofVal;
+    };
+
+    // Create some perturbation for degree of freedom
+    const size_t voxNumber = this->GetVoxNumber();
+    auto currentDofCuda = this->currentDofCuda;
+    auto bestDofTexturePtr = Cuda::CreateTextureObject(this->bestDofCuda, voxNumber, cudaChannelFormatKindFloat, 4);
+    auto bestDofTexture = *bestDofTexturePtr;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxNumber, [=]__device__(const size_t index) {
+        perturbate(currentDofCuda, bestDofTexture, length, index);
+    });
+    if (this->isSymmetric) {
+        const size_t voxNumberBw = this->GetVoxNumberBw();
+        auto currentDofBwCuda = this->currentDofBwCuda;
+        auto bestDofBwTexturePtr = Cuda::CreateTextureObject(this->bestDofBwCuda, voxNumberBw, cudaChannelFormatKindFloat, 4);
+        auto bestDofBwTexture = *bestDofBwTexturePtr;
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator<size_t>(0), voxNumberBw, [=]__device__(const size_t index) {
+            perturbate(currentDofBwCuda, bestDofBwTexture, length, index);
+        });
+    }
+    this->StoreCurrentDof();
+    this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 }
 /* *************************************************************** */
 reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() {
@@ -160,7 +194,7 @@ void reg_conjugateGradient_gpu::Optimise(float maxLength,
                                          float smallLength,
                                          float& startLength) {
     this->UpdateGradientValues();
-    reg_optimiser::Optimise(maxLength, smallLength, startLength);
+    reg_optimiser_gpu::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
 void reg_conjugateGradient_gpu::Perturbation(float length) {

From 8182839c8358a51bd4507eda901c08ad2a608da3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 23 Nov 2023 14:31:06 +0000
Subject: [PATCH 250/314] Refactor Optimiser #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/CMakeLists.txt                        |   1 +
 reg-lib/Compute.h                             |   2 +-
 .../{cpu/_reg_optimiser.cpp => Optimiser.cpp} | 140 ++++++++--------
 .../{cpu/_reg_optimiser.h => Optimiser.hpp}   |  34 ++--
 reg-lib/Platform.cpp                          |  28 ++--
 reg-lib/Platform.h                            |  18 +-
 reg-lib/_reg_base.h                           |   4 +-
 reg-lib/cpu/_reg_discrete_init.h              |   2 +-
 reg-lib/cuda/CMakeLists.txt                   |   2 +-
 reg-lib/cuda/CudaCompute.cu                   |  18 +-
 ..._reg_optimiser_gpu.cu => CudaOptimiser.cu} | 154 +++++++++---------
 ..._reg_optimiser_gpu.h => CudaOptimiser.hpp} |  66 ++++----
 ...ser_kernels.cu => CudaOptimiserKernels.cu} |  46 +++---
 reg-test/reg_test_conjugateGradient.cpp       |   2 +-
 15 files changed, 269 insertions(+), 250 deletions(-)
 rename reg-lib/{cpu/_reg_optimiser.cpp => Optimiser.cpp} (80%)
 rename reg-lib/{cpu/_reg_optimiser.h => Optimiser.hpp} (92%)
 rename reg-lib/cuda/{_reg_optimiser_gpu.cu => CudaOptimiser.cu} (67%)
 mode change 100755 => 100644
 rename reg-lib/cuda/{_reg_optimiser_gpu.h => CudaOptimiser.hpp} (65%)
 mode change 100755 => 100644
 rename reg-lib/cuda/{_reg_optimiser_kernels.cu => CudaOptimiserKernels.cu} (66%)
 mode change 100755 => 100644

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index cb35cf9f..446dfcc5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-368
+369
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index 658fe990..c417e42e 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -110,6 +110,7 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   Content.cpp
   DefContent.cpp
   F3dContent.cpp
+  Optimiser.cpp
   Platform.cpp
   Measure.cpp
 )
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index f3ccd5eb..6ad1061b 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "Content.h"
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 
 class Compute {
 public:
diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/Optimiser.cpp
similarity index 80%
rename from reg-lib/cpu/_reg_optimiser.cpp
rename to reg-lib/Optimiser.cpp
index 5eb9f661..cf696b95 100644
--- a/reg-lib/cpu/_reg_optimiser.cpp
+++ b/reg-lib/Optimiser.cpp
@@ -1,13 +1,15 @@
-/** @file _reg_optimiser.cpp
+/** @file Optimiser.cpp
  * @author Marc Modat
  * @date 20/07/2012
  */
 
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 
+/* *************************************************************** */
+namespace NiftyReg {
 /* *************************************************************** */
 template <class T>
-reg_optimiser<T>::reg_optimiser() {
+Optimiser<T>::Optimiser() {
     this->dofNumber = 0;
     this->dofNumberBw = 0;
     this->ndim = 3;
@@ -30,7 +32,7 @@ reg_optimiser<T>::reg_optimiser() {
 }
 /* *************************************************************** */
 template <class T>
-reg_optimiser<T>::~reg_optimiser() {
+Optimiser<T>::~Optimiser() {
     if (this->bestDof) {
         free(this->bestDof);
         this->bestDof = nullptr;
@@ -43,19 +45,19 @@ reg_optimiser<T>::~reg_optimiser() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Initialise(size_t nvox,
-                                  int ndim,
-                                  bool optX,
-                                  bool optY,
-                                  bool optZ,
-                                  size_t maxIt,
-                                  size_t startIt,
-                                  InterfaceOptimiser *intOpt,
-                                  T *cppData,
-                                  T *gradData,
-                                  size_t nvoxBw,
-                                  T *cppDataBw,
-                                  T *gradDataBw) {
+void Optimiser<T>::Initialise(size_t nvox,
+                              int ndim,
+                              bool optX,
+                              bool optY,
+                              bool optZ,
+                              size_t maxIt,
+                              size_t startIt,
+                              InterfaceOptimiser *intOpt,
+                              T *cppData,
+                              T *gradData,
+                              size_t nvoxBw,
+                              T *cppDataBw,
+                              T *gradDataBw) {
     this->dofNumber = nvox;
     this->ndim = ndim;
     this->optimiseX = optX;
@@ -87,7 +89,7 @@ void reg_optimiser<T>::Initialise(size_t nvox,
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::RestoreBestDof() {
+void Optimiser<T>::RestoreBestDof() {
     // Restore forward transformation
     memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T));
     // Restore backward transformation if required
@@ -96,7 +98,7 @@ void reg_optimiser<T>::RestoreBestDof() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::StoreCurrentDof() {
+void Optimiser<T>::StoreCurrentDof() {
     // Save forward transformation
     memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T));
     // Save backward transformation if required
@@ -105,7 +107,7 @@ void reg_optimiser<T>::StoreCurrentDof() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Perturbation(float length) {
+void Optimiser<T>::Perturbation(float length) {
     // Initialise the randomiser
     srand((unsigned)time(nullptr));
     // Reset the number of iteration
@@ -124,7 +126,7 @@ void reg_optimiser<T>::Perturbation(float length) {
 }
 /* *************************************************************** */
 template <class T>
-void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
+void Optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
     size_t lineIteration = 0;
     float addedLength = 0;
     float currentLength = static_cast<float>(startLength);
@@ -170,8 +172,11 @@ void reg_optimiser<T>::Optimise(T maxLength, T smallLength, T& startLength) {
     this->RestoreBestDof();
 }
 /* *************************************************************** */
+template class Optimiser<float>;
+template class Optimiser<double>;
+/* *************************************************************** */
 template <class T>
-reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimiser() {
+ConjugateGradient<T>::ConjugateGradient(): Optimiser<T>::Optimiser() {
     this->array1 = nullptr;
     this->array1Bw = nullptr;
     this->array2 = nullptr;
@@ -180,7 +185,7 @@ reg_conjugateGradient<T>::reg_conjugateGradient(): reg_optimiser<T>::reg_optimis
 }
 /* *************************************************************** */
 template <class T>
-reg_conjugateGradient<T>::~reg_conjugateGradient() {
+ConjugateGradient<T>::~ConjugateGradient() {
     if (this->array1) {
         free(this->array1);
         this->array1 = nullptr;
@@ -201,20 +206,20 @@ reg_conjugateGradient<T>::~reg_conjugateGradient() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::Initialise(size_t nvox,
-                                          int ndim,
-                                          bool optX,
-                                          bool optY,
-                                          bool optZ,
-                                          size_t maxIt,
-                                          size_t startIt,
-                                          InterfaceOptimiser *intOpt,
-                                          T *cppData,
-                                          T *gradData,
-                                          size_t nvoxBw,
-                                          T *cppDataBw,
-                                          T *gradDataBw) {
-    reg_optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
+void ConjugateGradient<T>::Initialise(size_t nvox,
+                                      int ndim,
+                                      bool optX,
+                                      bool optY,
+                                      bool optZ,
+                                      size_t maxIt,
+                                      size_t startIt,
+                                      InterfaceOptimiser *intOpt,
+                                      T *cppData,
+                                      T *gradData,
+                                      size_t nvoxBw,
+                                      T *cppDataBw,
+                                      T *gradDataBw) {
+    Optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->firstCall = true;
     if (this->array1) free(this->array1);
     if (this->array2) free(this->array2);
@@ -232,7 +237,7 @@ void reg_conjugateGradient<T>::Initialise(size_t nvox,
 }
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::UpdateGradientValues() {
+void ConjugateGradient<T>::UpdateGradientValues() {
 #ifdef WIN32
     long i;
     long num = (long)this->dofNumber;
@@ -321,21 +326,22 @@ void reg_conjugateGradient<T>::UpdateGradientValues() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::Optimise(T maxLength,
-                                        T smallLength,
-                                        T &startLength) {
+void ConjugateGradient<T>::Optimise(T maxLength, T smallLength, T& startLength) {
     this->UpdateGradientValues();
-    reg_optimiser<T>::Optimise(maxLength, smallLength, startLength);
+    Optimiser<T>::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
 template <class T>
-void reg_conjugateGradient<T>::Perturbation(float length) {
-    reg_optimiser<T>::Perturbation(length);
+void ConjugateGradient<T>::Perturbation(float length) {
+    Optimiser<T>::Perturbation(length);
     this->firstCall = true;
 }
 /* *************************************************************** */
+template class ConjugateGradient<float>;
+template class ConjugateGradient<double>;
+/* *************************************************************** */
 template <class T>
-reg_lbfgs<T>::reg_lbfgs(): reg_optimiser<T>::reg_optimiser() {
+Lbfgs<T>::Lbfgs(): Optimiser<T>::Optimiser() {
     this->stepToKeep = 5;
     this->oldDof = nullptr;
     this->oldGrad = nullptr;
@@ -344,7 +350,7 @@ reg_lbfgs<T>::reg_lbfgs(): reg_optimiser<T>::reg_optimiser() {
 }
 /* *************************************************************** */
 template <class T>
-reg_lbfgs<T>::~reg_lbfgs() {
+Lbfgs<T>::~Lbfgs() {
     if (this->oldDof) {
         free(this->oldDof);
         this->oldDof = nullptr;
@@ -374,20 +380,20 @@ reg_lbfgs<T>::~reg_lbfgs() {
 }
 /* *************************************************************** */
 template <class T>
-void reg_lbfgs<T>::Initialise(size_t nvox,
-                              int ndim,
-                              bool optX,
-                              bool optY,
-                              bool optZ,
-                              size_t maxIt,
-                              size_t startIt,
-                              InterfaceOptimiser *intOpt,
-                              T *cppData,
-                              T *gradData,
-                              size_t nvoxBw,
-                              T *cppDataBw,
-                              T *gradDataBw) {
-    reg_optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
+void Lbfgs<T>::Initialise(size_t nvox,
+                          int ndim,
+                          bool optX,
+                          bool optY,
+                          bool optZ,
+                          size_t maxIt,
+                          size_t startIt,
+                          InterfaceOptimiser *intOpt,
+                          T *cppData,
+                          T *gradData,
+                          size_t nvoxBw,
+                          T *cppDataBw,
+                          T *gradDataBw) {
+    Optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->stepToKeep = 5;
     this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*));
     this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*));
@@ -404,17 +410,15 @@ void reg_lbfgs<T>::Initialise(size_t nvox,
 }
 /* *************************************************************** */
 template <class T>
-void reg_lbfgs<T>::UpdateGradientValues() {
-
+void Lbfgs<T>::UpdateGradientValues() {
+    NR_FATAL_ERROR("Not implemented");
 }
 /* *************************************************************** */
 template <class T>
-void reg_lbfgs<T>::Optimise(T maxLength,
-                            T smallLength,
-                            T &startLength) {
+void Lbfgs<T>::Optimise(T maxLength, T smallLength, T& startLength) {
     this->UpdateGradientValues();
-    reg_optimiser<T>::Optimise(maxLength,
-                               smallLength,
-                               startLength);
+    Optimiser<T>::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
+} // namespace NiftyReg
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/Optimiser.hpp
similarity index 92%
rename from reg-lib/cpu/_reg_optimiser.h
rename to reg-lib/Optimiser.hpp
index 6ada7867..3f672b54 100644
--- a/reg-lib/cpu/_reg_optimiser.h
+++ b/reg-lib/Optimiser.hpp
@@ -1,15 +1,14 @@
-/** @file _reg_optimiser.h
+/** @file Optimiser.hpp
  * @author Marc Modat
  * @date 20/07/2012
  */
 
 #pragma once
 
-#include "_reg_maths.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
+#include "_reg_tools.h"
 
+/* *************************************************************** */
+namespace NiftyReg {
 /* *************************************************************** */
 /** @brief Interface between the registration class and the optimiser
  */
@@ -23,11 +22,11 @@ class InterfaceOptimiser {
     virtual void UpdateBestObjFunctionValue() = 0;
 };
 /* *************************************************************** */
-/** @class reg_optimiser
+/** @class Optimiser
  * @brief Standard gradient ascent optimisation
  */
 template <class T>
-class reg_optimiser {
+class Optimiser {
 protected:
     bool isSymmetric;
     size_t dofNumber;
@@ -55,8 +54,8 @@ class reg_optimiser {
     virtual void UpdateGradientValues() {}
 
 public:
-    reg_optimiser();
-    virtual ~reg_optimiser();
+    Optimiser();
+    virtual ~Optimiser();
     virtual void StoreCurrentDof();
     virtual void RestoreBestDof();
     virtual size_t GetDofNumber() {
@@ -141,11 +140,11 @@ class reg_optimiser {
     virtual void Perturbation(float length);
 };
 /* *************************************************************** */
-/** @class reg_conjugateGradient
+/** @class ConjugateGradient
  * @brief Conjugate gradient ascent optimisation
  */
 template <class T>
-class reg_conjugateGradient: public reg_optimiser<T> {
+class ConjugateGradient: public Optimiser<T> {
 protected:
     T *array1;
     T *array1Bw;
@@ -159,8 +158,8 @@ class reg_conjugateGradient: public reg_optimiser<T> {
     virtual void UpdateGradientValues() override;
 
 public:
-    reg_conjugateGradient();
-    virtual ~reg_conjugateGradient();
+    ConjugateGradient();
+    virtual ~ConjugateGradient();
     virtual void Initialise(size_t nvox,
                             int ndim,
                             bool optX,
@@ -184,7 +183,7 @@ class reg_conjugateGradient: public reg_optimiser<T> {
  * @brief
  */
 template <class T>
-class reg_lbfgs: public reg_optimiser<T> {
+class Lbfgs: public Optimiser<T> {
 protected:
     size_t stepToKeep;
     T *oldDof;
@@ -198,8 +197,8 @@ class reg_lbfgs: public reg_optimiser<T> {
     virtual void UpdateGradientValues() override;
 
 public:
-    reg_lbfgs();
-    virtual ~reg_lbfgs();
+    Lbfgs();
+    virtual ~Lbfgs();
     virtual void Initialise(size_t nvox,
                             int ndim,
                             bool optX,
@@ -218,4 +217,5 @@ class reg_lbfgs: public reg_optimiser<T> {
                           T& startLength) override;
 };
 /* *************************************************************** */
-#include "_reg_optimiser.cpp"
+} // namespace NiftyReg
+/* *************************************************************** */
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 8e609ffe..77035b04 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -7,7 +7,7 @@
 #include "CudaContentCreatorFactory.h"
 #include "CudaKernelFactory.h"
 #include "CudaMeasureFactory.h"
-#include "_reg_optimiser_gpu.h"
+#include "CudaOptimiser.hpp"
 #endif
 #ifdef USE_OPENCL
 #include "ClContextSingleton.h"
@@ -115,22 +115,22 @@ Measure* Platform::CreateMeasure() const {
 }
 /* *************************************************************** */
 template<typename Type>
-reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
-                                               InterfaceOptimiser& opt,
-                                               size_t maxIterationNumber,
-                                               bool useConjGradient,
-                                               bool optimiseX,
-                                               bool optimiseY,
-                                               bool optimiseZ,
-                                               F3dContent *conBw) const {
-    reg_optimiser<Type> *optimiser;
+Optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
+                                           InterfaceOptimiser& opt,
+                                           size_t maxIterationNumber,
+                                           bool useConjGradient,
+                                           bool optimiseX,
+                                           bool optimiseY,
+                                           bool optimiseZ,
+                                           F3dContent *conBw) const {
+    Optimiser<Type> *optimiser;
     nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
     nifti_image *controlPointGridBw = conBw ? conBw->F3dContent::GetControlPointGrid() : nullptr;
     Type *controlPointGridData, *transformationGradientData;
     Type *controlPointGridDataBw = nullptr, *transformationGradientDataBw = nullptr;
 
     if (platformType == PlatformType::Cpu) {
-        optimiser = useConjGradient ? new reg_conjugateGradient<Type>() : new reg_optimiser<Type>();
+        optimiser = useConjGradient ? new ConjugateGradient<Type>() : new Optimiser<Type>();
         controlPointGridData = (Type*)controlPointGrid->data;
         transformationGradientData = (Type*)con.GetTransformationGradient()->data;
         if (conBw) {
@@ -140,7 +140,7 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
     }
 #ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
-        optimiser = dynamic_cast<reg_optimiser<Type>*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu());
+        optimiser = dynamic_cast<Optimiser<Type>*>(useConjGradient ? new CudaConjugateGradient() : new CudaOptimiser());
         controlPointGridData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda();
         transformationGradientData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda();
         if (conBw) {
@@ -166,6 +166,6 @@ reg_optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
 
     return optimiser;
 }
-template reg_optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
-template reg_optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
+template Optimiser<float>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
+template Optimiser<double>* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const;
 /* *************************************************************** */
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index b049732a..71d2b3b7 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -5,7 +5,7 @@
 #include "ContentCreatorFactory.h"
 #include "KernelFactory.h"
 #include "MeasureFactory.h"
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 
 enum class PlatformType { Cpu, Cuda, OpenCl };
 constexpr PlatformType PlatformTypes[] = {
@@ -33,14 +33,14 @@ class Platform {
     Kernel* CreateKernel(const std::string& name, Content *con) const;
     Measure* CreateMeasure() const;
     template<typename Type>
-    reg_optimiser<Type>* CreateOptimiser(F3dContent& con,
-                                         InterfaceOptimiser& opt,
-                                         size_t maxIterationNumber,
-                                         bool useConjGradient,
-                                         bool optimiseX,
-                                         bool optimiseY,
-                                         bool optimiseZ,
-                                         F3dContent *conBw = nullptr) const;
+    Optimiser<Type>* CreateOptimiser(F3dContent& con,
+                                     InterfaceOptimiser& opt,
+                                     size_t maxIterationNumber,
+                                     bool useConjGradient,
+                                     bool optimiseX,
+                                     bool optimiseY,
+                                     bool optimiseZ,
+                                     F3dContent *conBw = nullptr) const;
 
     static constexpr bool IsCudaEnabled() {
 #ifdef USE_CUDA
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index c589afe7..4973fc99 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -25,7 +25,7 @@
 #include "_reg_lncc.h"
 #include "_reg_tools.h"
 #include "_reg_ReadWriteImage.h"
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 #include "Platform.h"
 
 /// @brief Base registration class
@@ -45,7 +45,7 @@ class reg_base: public InterfaceOptimiser {
     unique_ptr<Measure> measure;
 
     // Optimiser-related variables
-    unique_ptr<reg_optimiser<T>> optimiser;
+    unique_ptr<Optimiser<T>> optimiser;
     size_t maxIterationNumber;
     size_t perturbationNumber;
     bool optimiseX;
diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h
index d8e1e948..d4ae28cf 100644
--- a/reg-lib/cpu/_reg_discrete_init.h
+++ b/reg-lib/cpu/_reg_discrete_init.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "_reg_measure.h"
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 #include "_reg_localTrans_regul.h"
 #include "_reg_localTrans.h"
 #include "_reg_ReadWriteImage.h"
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index ca24678a..99030c7b 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -68,13 +68,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaLtsKernel.cpp
     CudaMeasure.cpp
     CudaNormaliseGradient.cu
+    CudaOptimiser.cu
     CudaResampleImageKernel.cpp
     CudaResampling.cu
     resampleKernel.cu
     _reg_globalTransformation_gpu.cu
     _reg_localTransformation_gpu.cu
     _reg_nmi_gpu.cu
-    _reg_optimiser_gpu.cu
     _reg_ssd_gpu.cu
     _reg_tools_gpu.cu
 )
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 08766f26..1b8f140d 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -3,8 +3,8 @@
 #include "CudaKernelConvolution.hpp"
 #include "CudaNormaliseGradient.hpp"
 #include "CudaResampling.hpp"
+#include "CudaOptimiser.hpp"
 #include "_reg_localTransformation_gpu.h"
-#include "_reg_optimiser_gpu.h"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
@@ -116,14 +116,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const bool optimiseX,
                                              const bool optimiseY,
                                              const bool optimiseZ) {
-    reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(), 3),
-                                       reinterpret_cast<float4*>(currentDof),
-                                       reinterpret_cast<const float4*>(bestDof),
-                                       reinterpret_cast<const float4*>(gradient),
-                                       scale,
-                                       optimiseX,
-                                       optimiseY,
-                                       optimiseZ);
+    Cuda::UpdateControlPointPosition(NiftiImage::calcVoxelNumber(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(), 3),
+                                     reinterpret_cast<float4*>(currentDof),
+                                     reinterpret_cast<const float4*>(bestDof),
+                                     reinterpret_cast<const float4*>(gradient),
+                                     scale,
+                                     optimiseX,
+                                     optimiseY,
+                                     optimiseZ);
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/CudaOptimiser.cu
old mode 100755
new mode 100644
similarity index 67%
rename from reg-lib/cuda/_reg_optimiser_gpu.cu
rename to reg-lib/cuda/CudaOptimiser.cu
index 27f2ada8..1a094805
--- a/reg-lib/cuda/_reg_optimiser_gpu.cu
+++ b/reg-lib/cuda/CudaOptimiser.cu
@@ -1,10 +1,12 @@
-#include "_reg_optimiser_gpu.h"
-#include "_reg_optimiser_kernels.cu"
+#include "CudaOptimiser.hpp"
+#include "CudaOptimiserKernels.cu"
 #include "_reg_common_cuda_kernels.cu"
 #include <curand_kernel.h>
 
 /* *************************************************************** */
-reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
+namespace NiftyReg {
+/* *************************************************************** */
+CudaOptimiser::CudaOptimiser(): Optimiser<float>::Optimiser() {
     this->currentDofCuda = nullptr;
     this->currentDofBwCuda = nullptr;
     this->bestDofCuda = nullptr;
@@ -14,7 +16,7 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser<float>::reg_optimiser() {
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-reg_optimiser_gpu::~reg_optimiser_gpu() {
+CudaOptimiser::~CudaOptimiser() {
     if (this->bestDofCuda) {
         Cuda::Free(this->bestDofCuda);
         this->bestDofCuda = nullptr;
@@ -26,19 +28,19 @@ reg_optimiser_gpu::~reg_optimiser_gpu() {
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_optimiser_gpu::Initialise(size_t nvox,
-                                   int ndim,
-                                   bool optX,
-                                   bool optY,
-                                   bool optZ,
-                                   size_t maxIt,
-                                   size_t startIt,
-                                   InterfaceOptimiser *intOpt,
-                                   float *cppData,
-                                   float *gradData,
-                                   size_t nvoxBw,
-                                   float *cppDataBw,
-                                   float *gradDataBw) {
+void CudaOptimiser::Initialise(size_t nvox,
+                               int ndim,
+                               bool optX,
+                               bool optY,
+                               bool optZ,
+                               size_t maxIt,
+                               size_t startIt,
+                               InterfaceOptimiser *intOpt,
+                               float *cppData,
+                               float *gradData,
+                               size_t nvoxBw,
+                               float *cppDataBw,
+                               float *gradDataBw) {
     this->dofNumber = nvox;
     this->ndim = ndim;
     this->optimiseX = optX;
@@ -69,7 +71,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_optimiser_gpu::RestoreBestDof() {
+void CudaOptimiser::RestoreBestDof() {
     // Restore forward transformation
     NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, this->bestDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice));
     // Restore backward transformation if required
@@ -77,7 +79,7 @@ void reg_optimiser_gpu::RestoreBestDof() {
         NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofBwCuda, this->bestDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* *************************************************************** */
-void reg_optimiser_gpu::StoreCurrentDof() {
+void CudaOptimiser::StoreCurrentDof() {
     // Store forward transformation
     NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, this->currentDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice));
     // Store backward transformation if required
@@ -85,7 +87,7 @@ void reg_optimiser_gpu::StoreCurrentDof() {
         NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofBwCuda, this->currentDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
 /* *************************************************************** */
-void reg_optimiser_gpu::Perturbation(float length) {
+void CudaOptimiser::Perturbation(float length) {
     // Reset the number of iteration
     this->currentIterationNumber = 0;
 
@@ -122,7 +124,7 @@ void reg_optimiser_gpu::Perturbation(float length) {
     this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue();
 }
 /* *************************************************************** */
-reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() {
+CudaConjugateGradient::CudaConjugateGradient(): CudaOptimiser::CudaOptimiser() {
     this->array1 = nullptr;
     this->array1Bw = nullptr;
     this->array2 = nullptr;
@@ -130,7 +132,7 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
+CudaConjugateGradient::~CudaConjugateGradient() {
     if (this->array1) {
         Cuda::Free(this->array1);
         this->array1 = nullptr;
@@ -150,20 +152,20 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() {
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_conjugateGradient_gpu::Initialise(size_t nvox,
-                                           int ndim,
-                                           bool optX,
-                                           bool optY,
-                                           bool optZ,
-                                           size_t maxIt,
-                                           size_t startIt,
-                                           InterfaceOptimiser *intOpt,
-                                           float *cppData,
-                                           float *gradData,
-                                           size_t nvoxBw,
-                                           float *cppDataBw,
-                                           float *gradDataBw) {
-    reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
+void CudaConjugateGradient::Initialise(size_t nvox,
+                                       int ndim,
+                                       bool optX,
+                                       bool optY,
+                                       bool optZ,
+                                       size_t maxIt,
+                                       size_t startIt,
+                                       InterfaceOptimiser *intOpt,
+                                       float *cppData,
+                                       float *gradData,
+                                       size_t nvoxBw,
+                                       float *cppDataBw,
+                                       float *gradDataBw) {
+    CudaOptimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
     this->firstCall = true;
     Cuda::Free(this->array1); Cuda::Free(this->array2);
     Cuda::Allocate<float4>(&this->array1, this->GetVoxNumber());
@@ -176,36 +178,36 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void reg_conjugateGradient_gpu::UpdateGradientValues() {
+void CudaConjugateGradient::UpdateGradientValues() {
     if (this->firstCall) {
         NR_DEBUG("Conjugate gradient initialisation");
-        reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
+        InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
         if (this->isSymmetric)
-            reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
+            InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
         this->firstCall = false;
     } else {
         NR_DEBUG("Conjugate gradient update");
-        reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
-                                     this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
+        GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
+                             this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
     }
 }
 /* *************************************************************** */
-void reg_conjugateGradient_gpu::Optimise(float maxLength,
+void CudaConjugateGradient::Optimise(float maxLength,
                                          float smallLength,
                                          float& startLength) {
     this->UpdateGradientValues();
-    reg_optimiser_gpu::Optimise(maxLength, smallLength, startLength);
+    CudaOptimiser::Optimise(maxLength, smallLength, startLength);
 }
 /* *************************************************************** */
-void reg_conjugateGradient_gpu::Perturbation(float length) {
-    reg_optimiser_gpu::Perturbation(length);
+void CudaConjugateGradient::Perturbation(float length) {
+    CudaOptimiser::Perturbation(length);
     this->firstCall = true;
 }
 /* *************************************************************** */
-void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
-                                         float4 *conjugateGCuda,
-                                         float4 *conjugateHCuda,
-                                         const size_t nVoxels) {
+void CudaConjugateGradient::InitialiseConjugateGradient(float4 *gradientImageCuda,
+                                                        float4 *conjugateGCuda,
+                                                        float4 *conjugateHCuda,
+                                                        const size_t nVoxels) {
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
 
     const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
@@ -213,7 +215,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
 
-    reg_initialiseConjugateGradient_kernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels);
+    Cuda::InitialiseConjugateGradientKernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice));
 }
@@ -224,15 +226,15 @@ struct Float2Sum {
     }
 };
 /* *************************************************************** */
-void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
-                                  float4 *conjugateGCuda,
-                                  float4 *conjugateHCuda,
-                                  const size_t nVoxels,
-                                  const bool isSymmetric,
-                                  float4 *gradientImageBwCuda,
-                                  float4 *conjugateGBwCuda,
-                                  float4 *conjugateHBwCuda,
-                                  const size_t nVoxelsBw) {
+void CudaConjugateGradient::GetConjugateGradient(float4 *gradientImageCuda,
+                                                 float4 *conjugateGCuda,
+                                                 float4 *conjugateHCuda,
+                                                 const size_t nVoxels,
+                                                 const bool isSymmetric,
+                                                 float4 *gradientImageBwCuda,
+                                                 float4 *conjugateGBwCuda,
+                                                 float4 *conjugateHBwCuda,
+                                                 const size_t nVoxelsBw) {
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
     auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4);
     auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4);
@@ -250,8 +252,8 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
     dim3 gridDims(grids, grids, 1);
 
     thrust::device_vector<float2> sumsCuda(nVoxels + nVoxels % 2);  // Make it even for thrust::inner_product
-    reg_getConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsCuda.data().get(), *gradientImageTexture,
-                                                              *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
+    Cuda::GetConjugateGradientKernel1<<<gridDims, blockDims>>>(sumsCuda.data().get(), *gradientImageTexture,
+                                                         *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     const size_t sumsSizeHalf = sumsCuda.size() / 2;
     const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf,
@@ -262,8 +264,8 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
         gridDims = dim3(blocks, 1, 1);
         blockDims = dim3(grids, grids, 1);
         thrust::device_vector<float2> sumsBwCuda(nVoxelsBw + nVoxelsBw % 2);  // Make it even for thrust::inner_product
-        reg_getConjugateGradient1_kernel<<<gridDims, blockDims>>>(sumsBwCuda.data().get(), *gradientImageBwTexture,
-                                                                  *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw);
+        Cuda::GetConjugateGradientKernel1<<<gridDims, blockDims>>>(sumsBwCuda.data().get(), *gradientImageBwTexture,
+                                                             *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2;
         const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf,
@@ -275,25 +277,25 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
     grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     gridDims = dim3(blocks, 1, 1);
     blockDims = dim3(grids, grids, 1);
-    reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
+    Cuda::GetConjugateGradientKernel2<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     if (isSymmetric) {
         grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks));
         gridDims = dim3(blocks, 1, 1);
         blockDims = dim3(grids, grids, 1);
-        reg_getConjugateGradient2_kernel<<<blockDims, gridDims>>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam);
+        Cuda::GetConjugateGradientKernel2<<<blockDims, gridDims>>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
-void reg_updateControlPointPosition_gpu(const size_t nVoxels,
-                                        float4 *controlPointImageCuda,
-                                        const float4 *bestControlPointCuda,
-                                        const float4 *gradientImageCuda,
-                                        const float scale,
-                                        const bool optimiseX,
-                                        const bool optimiseY,
-                                        const bool optimiseZ) {
+void Cuda::UpdateControlPointPosition(const size_t nVoxels,
+                                      float4 *controlPointImageCuda,
+                                      const float4 *bestControlPointCuda,
+                                      const float4 *gradientImageCuda,
+                                      const float scale,
+                                      const bool optimiseX,
+                                      const bool optimiseY,
+                                      const bool optimiseZ) {
     auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4);
     auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
 
@@ -301,8 +303,10 @@ void reg_updateControlPointPosition_gpu(const size_t nVoxels,
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
     const dim3 blockDims(blocks, 1, 1);
     const dim3 gridDims(grids, grids, 1);
-    reg_updateControlPointPosition_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture,
-                                                                   (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
+    UpdateControlPointPositionKernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture,
+                                                              (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
+} // namespace NiftyReg
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/CudaOptimiser.hpp
old mode 100755
new mode 100644
similarity index 65%
rename from reg-lib/cuda/_reg_optimiser_gpu.h
rename to reg-lib/cuda/CudaOptimiser.hpp
index 1950b463..fa9fec4d
--- a/reg-lib/cuda/_reg_optimiser_gpu.h
+++ b/reg-lib/cuda/CudaOptimiser.hpp
@@ -1,22 +1,24 @@
 #pragma once
 
 #include "CudaCommon.hpp"
-#include "_reg_optimiser.h"
+#include "Optimiser.hpp"
 #include "_reg_tools_gpu.h"
 
 /* *************************************************************** */
-/** @class reg_optimiser_gpu
+namespace NiftyReg {
+/* *************************************************************** */
+/** @class CudaOptimiser
  * @brief Standard gradient ascent optimisation for GPU
  */
-class reg_optimiser_gpu: public reg_optimiser<float> {
+class CudaOptimiser: public Optimiser<float> {
 protected:
     float4 *currentDofCuda, *currentDofBwCuda;
     float4 *bestDofCuda, *bestDofBwCuda;
     float4 *gradientCuda, *gradientBwCuda;
 
 public:
-    reg_optimiser_gpu();
-    virtual ~reg_optimiser_gpu();
+    CudaOptimiser();
+    virtual ~CudaOptimiser();
     virtual void StoreCurrentDof() override;
     virtual void RestoreBestDof() override;
 
@@ -56,23 +58,36 @@ class reg_optimiser_gpu: public reg_optimiser<float> {
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-/** @class reg_conjugateGradient_gpu
+/** @class CudaConjugateGradient
  * @brief Conjugate gradient ascent optimisation for GPU
  */
-class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
+class CudaConjugateGradient: public CudaOptimiser {
 protected:
     float4 *array1, *array1Bw;
     float4 *array2, *array2Bw;
     bool firstCall;
 
+    void InitialiseConjugateGradient(float4 *gradientImageCuda,
+                                     float4 *conjugateGCuda,
+                                     float4 *conjugateHCuda,
+                                     const size_t nVoxels);
+    void GetConjugateGradient(float4 *gradientImageCuda,
+                              float4 *conjugateGCuda,
+                              float4 *conjugateHCuda,
+                              const size_t nVoxels,
+                              const bool isSymmetric,
+                              float4 *gradientImageBwCuda,
+                              float4 *conjugateGBwCuda,
+                              float4 *conjugateHBwCuda,
+                              const size_t nVoxelsBw);
 #ifdef NR_TESTING
 public:
 #endif
     virtual void UpdateGradientValues() override;
 
 public:
-    reg_conjugateGradient_gpu();
-    virtual ~reg_conjugateGradient_gpu();
+    CudaConjugateGradient();
+    virtual ~CudaConjugateGradient();
 
     virtual void Initialise(size_t nvox,
                             int ndim,
@@ -93,27 +108,18 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu {
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda,
-                                         float4 *conjugateGCuda,
-                                         float4 *conjugateHCuda,
-                                         const size_t nVoxels);
+namespace Cuda {
+/* *************************************************************** */
+void UpdateControlPointPosition(const size_t nVoxels,
+                                float4 *controlPointImageCuda,
+                                const float4 *bestControlPointCuda,
+                                const float4 *gradientImageCuda,
+                                const float scale,
+                                const bool optimiseX,
+                                const bool optimiseY,
+                                const bool optimiseZ);
 /* *************************************************************** */
-void reg_getConjugateGradient_gpu(float4 *gradientImageCuda,
-                                  float4 *conjugateGCuda,
-                                  float4 *conjugateHCuda,
-                                  const size_t nVoxels,
-                                  const bool isSymmetric,
-                                  float4 *gradientImageBwCuda,
-                                  float4 *conjugateGBwCuda,
-                                  float4 *conjugateHBwCuda,
-                                  const size_t nVoxelsBw);
+} // namespace Cuda
 /* *************************************************************** */
-void reg_updateControlPointPosition_gpu(const size_t nVoxels,
-                                        float4 *controlPointImageCuda,
-                                        const float4 *bestControlPointCuda,
-                                        const float4 *gradientImageCuda,
-                                        const float scale,
-                                        const bool optimiseX,
-                                        const bool optimiseY,
-                                        const bool optimiseZ);
+} // namespace NiftyReg
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/CudaOptimiserKernels.cu
old mode 100755
new mode 100644
similarity index 66%
rename from reg-lib/cuda/_reg_optimiser_kernels.cu
rename to reg-lib/cuda/CudaOptimiserKernels.cu
index 45b9f2a0..22a56c00
--- a/reg-lib/cuda/_reg_optimiser_kernels.cu
+++ b/reg-lib/cuda/CudaOptimiserKernels.cu
@@ -1,7 +1,9 @@
 /* *************************************************************** */
-__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda,
-                                                       cudaTextureObject_t gradientImageTexture,
-                                                       const unsigned nVoxels) {
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+__global__ void InitialiseConjugateGradientKernel(float4 *conjugateGCuda,
+                                                  cudaTextureObject_t gradientImageTexture,
+                                                  const unsigned nVoxels) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
@@ -9,11 +11,11 @@ __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getConjugateGradient1_kernel(float2 *sums,
-                                                 cudaTextureObject_t gradientImageTexture,
-                                                 cudaTextureObject_t conjugateGTexture,
-                                                 cudaTextureObject_t conjugateHTexture,
-                                                 const unsigned nVoxels) {
+__global__ void GetConjugateGradientKernel1(float2 *sums,
+                                            cudaTextureObject_t gradientImageTexture,
+                                            cudaTextureObject_t conjugateGTexture,
+                                            cudaTextureObject_t conjugateHTexture,
+                                            const unsigned nVoxels) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         const float4 valueH = tex1Dfetch<float4>(conjugateHTexture, tid);
@@ -27,11 +29,11 @@ __global__ void reg_getConjugateGradient1_kernel(float2 *sums,
     }
 }
 /* *************************************************************** */
-__global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda,
-                                                 float4 *conjugateGCuda,
-                                                 float4 *conjugateHCuda,
-                                                 const unsigned nVoxels,
-                                                 const float scale) {
+__global__ void GetConjugateGradientKernel2(float4 *gradientImageCuda,
+                                            float4 *conjugateGCuda,
+                                            float4 *conjugateHCuda,
+                                            const unsigned nVoxels,
+                                            const float scale) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         // G = - grad
@@ -51,14 +53,14 @@ __global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda,
     }
 }
 /* *************************************************************** */
-__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda,
-                                                      cudaTextureObject_t bestControlPointTexture,
-                                                      cudaTextureObject_t gradientImageTexture,
-                                                      const unsigned nVoxels,
-                                                      const float scale,
-                                                      const bool optimiseX,
-                                                      const bool optimiseY,
-                                                      const bool optimiseZ) {
+__global__ void UpdateControlPointPositionKernel(float4 *controlPointImageCuda,
+                                                 cudaTextureObject_t bestControlPointTexture,
+                                                 cudaTextureObject_t gradientImageTexture,
+                                                 const unsigned nVoxels,
+                                                 const float scale,
+                                                 const bool optimiseX,
+                                                 const bool optimiseY,
+                                                 const bool optimiseZ) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nVoxels) {
         float4 value = controlPointImageCuda[tid];
@@ -74,3 +76,5 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC
     }
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 0a97bd01..411af7c5 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -263,7 +263,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
             NiftiImage controlPointGridExpected = bestControlPointGrid;
 
             // Update the control point position
-            unique_ptr<reg_optimiser<float>> optimiser{ platform->template CreateOptimiser<float>(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) };
+            unique_ptr<Optimiser<float>> optimiser{ platform->template CreateOptimiser<float>(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) };
             unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
             compute->UpdateControlPointPosition(optimiser->GetCurrentDof(), optimiser->GetBestDof(), optimiser->GetGradient(), scale, optimiseX, optimiseY, optimiseZ);
             UpdateControlPointPosition(controlPointGridExpected, bestControlPointGrid, transGrad, scale, optimiseX, optimiseY, optimiseZ);

From 592d01d3d24172f8bed63f56ae21286e14d628ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Nov 2023 15:17:38 +0000
Subject: [PATCH 251/314] Optimise Optimiser #92

---
 niftyreg_build_version.txt           |   2 +-
 reg-lib/cuda/BlockSize.hpp           |  12 --
 reg-lib/cuda/CudaCompute.cu          |  63 +++++++--
 reg-lib/cuda/CudaOptimiser.cu        | 192 ++++++++++++---------------
 reg-lib/cuda/CudaOptimiser.hpp       |  26 ----
 reg-lib/cuda/CudaOptimiserKernels.cu |  80 -----------
 6 files changed, 144 insertions(+), 231 deletions(-)
 delete mode 100644 reg-lib/cuda/CudaOptimiserKernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 446dfcc5..5b0cffbc 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-369
+370
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 5483ae59..fe411adb 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -32,10 +32,6 @@ struct BlockSize {
     unsigned reg_defField_compose2D;
     unsigned reg_defField_compose3D;
     unsigned reg_defField_getJacobianMatrix;
-    unsigned reg_initialiseConjugateGradient;
-    unsigned reg_getConjugateGradient1;
-    unsigned reg_getConjugateGradient2;
-    unsigned reg_updateControlPointPosition;
     unsigned reg_voxelCentricToNodeCentric;
     unsigned reg_convertNmiGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
@@ -68,10 +64,6 @@ struct BlockSize100: public BlockSize {
         reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
         reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-        reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem
-        reg_getConjugateGradient1 = 320; // 12 reg - 24 smem
-        reg_getConjugateGradient2 = 384; // 10 reg - 40 smem
-        reg_updateControlPointPosition = 384; // 08 reg - 24 smem
         reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
@@ -106,10 +98,6 @@ struct BlockSize300: public BlockSize {
         reg_defField_compose2D = 1024; // 23 reg
         reg_defField_compose3D = 1024; // 24 reg
         reg_defField_getJacobianMatrix = 768; // 34 reg
-        reg_initialiseConjugateGradient = 1024; // 20 reg
-        reg_getConjugateGradient1 = 1024; // 22 reg
-        reg_getConjugateGradient2 = 1024; // 25 reg
-        reg_updateControlPointPosition = 1024; // 22 reg
         reg_voxelCentricToNodeCentric = 1024; // 23 reg
         reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 1b8f140d..9dfae7b0 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -109,6 +109,51 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) {
                                        bspline);
 }
 /* *************************************************************** */
+template<bool optimiseX, bool optimiseY, bool optimiseZ>
+inline void UpdateControlPointPosition(float4 *currentDofCuda,
+                                       cudaTextureObject_t bestDofTexture,
+                                       cudaTextureObject_t gradientTexture,
+                                       const size_t nVoxels,
+                                       const float scale) {
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) {
+        float4 dofValue = currentDofCuda[index]; scale; // To capture scale
+        const float4 bestValue = tex1Dfetch<float4>(bestDofTexture, index);
+        const float4 gradValue = tex1Dfetch<float4>(gradientTexture, index);
+        if constexpr (optimiseX)
+            dofValue.x = bestValue.x + scale * gradValue.x;
+        if constexpr (optimiseY)
+            dofValue.y = bestValue.y + scale * gradValue.y;
+        if constexpr (optimiseZ)
+            dofValue.z = bestValue.z + scale * gradValue.z;
+        currentDofCuda[index] = dofValue;
+    });
+}
+/* *************************************************************** */
+template<bool optimiseX, bool optimiseY>
+static inline void UpdateControlPointPosition(float4 *currentDofCuda,
+                                              cudaTextureObject_t bestDofTexture,
+                                              cudaTextureObject_t gradientTexture,
+                                              const size_t nVoxels,
+                                              const float scale,
+                                              const bool optimiseZ) {
+    auto updateControlPointPosition = UpdateControlPointPosition<optimiseX, optimiseY, true>;
+    if (!optimiseZ) updateControlPointPosition = UpdateControlPointPosition<optimiseX, optimiseY, false>;
+    updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale);
+}
+/* *************************************************************** */
+template<bool optimiseX>
+static inline void UpdateControlPointPosition(float4 *currentDofCuda,
+                                              cudaTextureObject_t bestDofTexture,
+                                              cudaTextureObject_t gradientTexture,
+                                              const size_t nVoxels,
+                                              const float scale,
+                                              const bool optimiseY,
+                                              const bool optimiseZ) {
+    auto updateControlPointPosition = UpdateControlPointPosition<optimiseX, true>;
+    if (!optimiseY) updateControlPointPosition = UpdateControlPointPosition<optimiseX, false>;
+    updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale, optimiseZ);
+}
+/* *************************************************************** */
 void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const float *bestDof,
                                              const float *gradient,
@@ -116,14 +161,16 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const bool optimiseX,
                                              const bool optimiseY,
                                              const bool optimiseZ) {
-    Cuda::UpdateControlPointPosition(NiftiImage::calcVoxelNumber(dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid(), 3),
-                                     reinterpret_cast<float4*>(currentDof),
-                                     reinterpret_cast<const float4*>(bestDof),
-                                     reinterpret_cast<const float4*>(gradient),
-                                     scale,
-                                     optimiseX,
-                                     optimiseY,
-                                     optimiseZ);
+    const nifti_image *controlPointGrid = dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid();
+    const bool is3d = controlPointGrid->nz > 1;
+    const size_t nVoxels = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    auto bestDofTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(bestDof), nVoxels, cudaChannelFormatKindFloat, 4);
+    auto gradientTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(gradient), nVoxels, cudaChannelFormatKindFloat, 4);
+
+    auto updateControlPointPosition = ::UpdateControlPointPosition<true>;
+    if (!optimiseX) updateControlPointPosition = ::UpdateControlPointPosition<false>;
+    updateControlPointPosition(reinterpret_cast<float4*>(currentDof), *bestDofTexturePtr, *gradientTexturePtr,
+                               nVoxels, scale, optimiseY, is3d ? optimiseZ : false);
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu
index 1a094805..587b4f7d 100644
--- a/reg-lib/cuda/CudaOptimiser.cu
+++ b/reg-lib/cuda/CudaOptimiser.cu
@@ -1,5 +1,4 @@
 #include "CudaOptimiser.hpp"
-#include "CudaOptimiserKernels.cu"
 #include "_reg_common_cuda_kernels.cu"
 #include <curand_kernel.h>
 
@@ -178,20 +177,6 @@ void CudaConjugateGradient::Initialise(size_t nvox,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-void CudaConjugateGradient::UpdateGradientValues() {
-    if (this->firstCall) {
-        NR_DEBUG("Conjugate gradient initialisation");
-        InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
-        if (this->isSymmetric)
-            InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
-        this->firstCall = false;
-    } else {
-        NR_DEBUG("Conjugate gradient update");
-        GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
-                             this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
-    }
-}
-/* *************************************************************** */
 void CudaConjugateGradient::Optimise(float maxLength,
                                          float smallLength,
                                          float& startLength) {
@@ -204,108 +189,107 @@ void CudaConjugateGradient::Perturbation(float length) {
     this->firstCall = true;
 }
 /* *************************************************************** */
-void CudaConjugateGradient::InitialiseConjugateGradient(float4 *gradientImageCuda,
-                                                        float4 *conjugateGCuda,
-                                                        float4 *conjugateHCuda,
-                                                        const size_t nVoxels) {
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-
-    Cuda::InitialiseConjugateGradientKernel<<<gridDims, blockDims>>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice));
+void InitialiseConjugateGradient(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t nVoxels) {
+    auto gradientTexturePtr = Cuda::CreateTextureObject(gradientCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto gradientTexture = *gradientTexturePtr;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) {
+        const float4 gradValue = tex1Dfetch<float4>(gradientTexture, index);
+        conjugateGCuda[index] = conjugateHCuda[index] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0);
+    });
 }
 /* *************************************************************** */
-struct Float2Sum {
-    __host__ __device__ double2 operator()(const float2& a, const float2& b) const {
-        return make_double2((double)a.x + (double)b.x, (double)a.y + (double)b.y);
-    }
-};
-/* *************************************************************** */
-void CudaConjugateGradient::GetConjugateGradient(float4 *gradientImageCuda,
-                                                 float4 *conjugateGCuda,
-                                                 float4 *conjugateHCuda,
-                                                 const size_t nVoxels,
-                                                 const bool isSymmetric,
-                                                 float4 *gradientImageBwCuda,
-                                                 float4 *conjugateGBwCuda,
-                                                 float4 *conjugateHBwCuda,
-                                                 const size_t nVoxelsBw) {
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-    auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-    auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-    Cuda::UniqueTextureObjectPtr gradientImageBwTexture, conjugateGBwTexture, conjugateHBwTexture;
+void GetConjugateGradient(float4 *gradientCuda,
+                          float4 *conjugateGCuda,
+                          float4 *conjugateHCuda,
+                          const size_t nVoxels,
+                          const bool isSymmetric,
+                          float4 *gradientBwCuda,
+                          float4 *conjugateGBwCuda,
+                          float4 *conjugateHBwCuda,
+                          const size_t nVoxelsBw) {
+    auto gradientTexturePtr = Cuda::CreateTextureObject(gradientCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto conjugateGTexturePtr = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto conjugateHTexturePtr = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4);
+    auto gradientTexture = *gradientTexturePtr;
+    auto conjugateGTexture = *conjugateGTexturePtr;
+    auto conjugateHTexture = *conjugateHTexturePtr;
+    Cuda::UniqueTextureObjectPtr gradientBwTexturePtr, conjugateGBwTexturePtr, conjugateHBwTexturePtr;
+    cudaTextureObject_t gradientBwTexture = 0, conjugateGBwTexture = 0, conjugateHBwTexture = 0;
     if (isSymmetric) {
-        gradientImageBwTexture = Cuda::CreateTextureObject(gradientImageBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
-        conjugateGBwTexture = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
-        conjugateHBwTexture = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        gradientBwTexturePtr = Cuda::CreateTextureObject(gradientBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        conjugateGBwTexturePtr = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        conjugateHBwTexturePtr = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4);
+        gradientBwTexture = *gradientBwTexturePtr;
+        conjugateGBwTexture = *conjugateGBwTexturePtr;
+        conjugateHBwTexture = *conjugateHBwTexturePtr;
     }
 
     // gam = sum((grad+g)*grad)/sum(HxG);
-    unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1;
-    unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
-    dim3 blockDims(blocks, 1, 1);
-    dim3 gridDims(grids, grids, 1);
+    auto calcGam = []__device__(cudaTextureObject_t gradientTexture, cudaTextureObject_t conjugateGTexture,
+                                cudaTextureObject_t conjugateHTexture, const int index) {
+        const float4 hValue = tex1Dfetch<float4>(conjugateHTexture, index);
+        const float4 gValue = tex1Dfetch<float4>(conjugateGTexture, index);
+        const float gg = gValue.x * hValue.x + gValue.y * hValue.y + gValue.z * hValue.z;
+
+        const float4 grad = tex1Dfetch<float4>(gradientTexture, index);
+        const float dgg = (grad.x + gValue.x) * grad.x + (grad.y + gValue.y) * grad.y + (grad.z + gValue.z) * grad.z;
+
+        return make_double2(dgg, gg);
+    };
 
-    thrust::device_vector<float2> sumsCuda(nVoxels + nVoxels % 2);  // Make it even for thrust::inner_product
-    Cuda::GetConjugateGradientKernel1<<<gridDims, blockDims>>>(sumsCuda.data().get(), *gradientImageTexture,
-                                                         *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    const size_t sumsSizeHalf = sumsCuda.size() / 2;
-    const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf,
-                                             make_double2(0, 0), thrust::plus<double2>(), Float2Sum());
-    float gam = static_cast<float>(gg.x / gg.y);
+    double gam;
+    thrust::counting_iterator<int> it(0);
+    const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) {
+        return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index);
+    }, make_double2(0, 0), thrust::plus<double2>());
     if (isSymmetric) {
-        grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks));
-        gridDims = dim3(blocks, 1, 1);
-        blockDims = dim3(grids, grids, 1);
-        thrust::device_vector<float2> sumsBwCuda(nVoxelsBw + nVoxelsBw % 2);  // Make it even for thrust::inner_product
-        Cuda::GetConjugateGradientKernel1<<<gridDims, blockDims>>>(sumsBwCuda.data().get(), *gradientImageBwTexture,
-                                                             *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-        const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2;
-        const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf,
-                                                   make_double2(0, 0), thrust::plus<double2>(), Float2Sum());
-        gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
-    }
+        it = thrust::counting_iterator<int>(0);
+        const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) {
+            return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index);
+        }, make_double2(0, 0), thrust::plus<double2>());
+        gam = (gg.x + ggBw.x) / (gg.y + ggBw.y);
+    } else gam = gg.x / gg.y;
+
+    // Conjugate gradient
+    auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda,
+                                     cudaTextureObject_t gradientTexture, cudaTextureObject_t conjugateHTexture, const int index) {
+        // G = -grad
+        float4 gradGValue = tex1Dfetch<float4>(gradientTexture, index);
+        gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0);
+        conjugateGCuda[index] = gradGValue;
+
+        // H = G + gam * H
+        float4 gradHValue = tex1Dfetch<float4>(conjugateHTexture, index);
+        gradHValue = make_float4(gradGValue.x + gam * gradHValue.x,
+                                 gradGValue.y + gam * gradHValue.y,
+                                 gradGValue.z + gam * gradHValue.z, 0);
+        conjugateHCuda[index] = gradHValue;
+
+        gradientCuda[index] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0);
+    };
 
-    blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2;
-    grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
-    gridDims = dim3(blocks, 1, 1);
-    blockDims = dim3(grids, grids, 1);
-    Cuda::GetConjugateGradientKernel2<<<blockDims, gridDims>>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) {
+        conjugate(gradientCuda, conjugateGCuda, conjugateHCuda, gradientTexture, conjugateHTexture, index);
+    });
     if (isSymmetric) {
-        grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks));
-        gridDims = dim3(blocks, 1, 1);
-        blockDims = dim3(grids, grids, 1);
-        Cuda::GetConjugateGradientKernel2<<<blockDims, gridDims>>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxelsBw, [=]__device__(const int index) {
+            conjugate(gradientBwCuda, conjugateGBwCuda, conjugateHBwCuda, gradientBwTexture, conjugateHBwTexture, index);
+        });
     }
 }
 /* *************************************************************** */
-void Cuda::UpdateControlPointPosition(const size_t nVoxels,
-                                      float4 *controlPointImageCuda,
-                                      const float4 *bestControlPointCuda,
-                                      const float4 *gradientImageCuda,
-                                      const float scale,
-                                      const bool optimiseX,
-                                      const bool optimiseY,
-                                      const bool optimiseZ) {
-    auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-    auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4);
-
-    const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks));
-    const dim3 blockDims(blocks, 1, 1);
-    const dim3 gridDims(grids, grids, 1);
-    UpdateControlPointPositionKernel<<<gridDims, blockDims>>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture,
-                                                              (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+void CudaConjugateGradient::UpdateGradientValues() {
+    if (this->firstCall) {
+        NR_DEBUG("Conjugate gradient initialisation");
+        InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber());
+        if (this->isSymmetric)
+            InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
+        this->firstCall = false;
+    } else {
+        NR_DEBUG("Conjugate gradient update");
+        GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(),
+                             this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw());
+    }
 }
 /* *************************************************************** */
 } // namespace NiftyReg
diff --git a/reg-lib/cuda/CudaOptimiser.hpp b/reg-lib/cuda/CudaOptimiser.hpp
index fa9fec4d..56a1aceb 100644
--- a/reg-lib/cuda/CudaOptimiser.hpp
+++ b/reg-lib/cuda/CudaOptimiser.hpp
@@ -67,19 +67,6 @@ class CudaConjugateGradient: public CudaOptimiser {
     float4 *array2, *array2Bw;
     bool firstCall;
 
-    void InitialiseConjugateGradient(float4 *gradientImageCuda,
-                                     float4 *conjugateGCuda,
-                                     float4 *conjugateHCuda,
-                                     const size_t nVoxels);
-    void GetConjugateGradient(float4 *gradientImageCuda,
-                              float4 *conjugateGCuda,
-                              float4 *conjugateHCuda,
-                              const size_t nVoxels,
-                              const bool isSymmetric,
-                              float4 *gradientImageBwCuda,
-                              float4 *conjugateGBwCuda,
-                              float4 *conjugateHBwCuda,
-                              const size_t nVoxelsBw);
 #ifdef NR_TESTING
 public:
 #endif
@@ -108,18 +95,5 @@ class CudaConjugateGradient: public CudaOptimiser {
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-namespace Cuda {
-/* *************************************************************** */
-void UpdateControlPointPosition(const size_t nVoxels,
-                                float4 *controlPointImageCuda,
-                                const float4 *bestControlPointCuda,
-                                const float4 *gradientImageCuda,
-                                const float scale,
-                                const bool optimiseX,
-                                const bool optimiseY,
-                                const bool optimiseZ);
-/* *************************************************************** */
-} // namespace Cuda
-/* *************************************************************** */
 } // namespace NiftyReg
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaOptimiserKernels.cu b/reg-lib/cuda/CudaOptimiserKernels.cu
deleted file mode 100644
index 22a56c00..00000000
--- a/reg-lib/cuda/CudaOptimiserKernels.cu
+++ /dev/null
@@ -1,80 +0,0 @@
-/* *************************************************************** */
-namespace NiftyReg::Cuda {
-/* *************************************************************** */
-__global__ void InitialiseConjugateGradientKernel(float4 *conjugateGCuda,
-                                                  cudaTextureObject_t gradientImageTexture,
-                                                  const unsigned nVoxels) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
-        conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0);
-    }
-}
-/* *************************************************************** */
-__global__ void GetConjugateGradientKernel1(float2 *sums,
-                                            cudaTextureObject_t gradientImageTexture,
-                                            cudaTextureObject_t conjugateGTexture,
-                                            cudaTextureObject_t conjugateHTexture,
-                                            const unsigned nVoxels) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        const float4 valueH = tex1Dfetch<float4>(conjugateHTexture, tid);
-        const float4 valueG = tex1Dfetch<float4>(conjugateGTexture, tid);
-        const float gg = valueG.x * valueH.x + valueG.y * valueH.y + valueG.z * valueH.z;
-
-        const float4 grad = tex1Dfetch<float4>(gradientImageTexture, tid);
-        const float dgg = (grad.x + valueG.x) * grad.x + (grad.y + valueG.y) * grad.y + (grad.z + valueG.z) * grad.z;
-
-        sums[tid] = make_float2(dgg, gg);
-    }
-}
-/* *************************************************************** */
-__global__ void GetConjugateGradientKernel2(float4 *gradientImageCuda,
-                                            float4 *conjugateGCuda,
-                                            float4 *conjugateHCuda,
-                                            const unsigned nVoxels,
-                                            const float scale) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        // G = - grad
-        float4 gradGValue = gradientImageCuda[tid];
-        gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0);
-        conjugateGCuda[tid] = gradGValue;
-
-        // H = G + gam * H
-        float4 gradHValue = conjugateHCuda[tid];
-        gradHValue = make_float4(gradGValue.x + scale * gradHValue.x,
-                                 gradGValue.y + scale * gradHValue.y,
-                                 gradGValue.z + scale * gradHValue.z,
-                                 0);
-        conjugateHCuda[tid] = gradHValue;
-
-        gradientImageCuda[tid] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0);
-    }
-}
-/* *************************************************************** */
-__global__ void UpdateControlPointPositionKernel(float4 *controlPointImageCuda,
-                                                 cudaTextureObject_t bestControlPointTexture,
-                                                 cudaTextureObject_t gradientImageTexture,
-                                                 const unsigned nVoxels,
-                                                 const float scale,
-                                                 const bool optimiseX,
-                                                 const bool optimiseY,
-                                                 const bool optimiseZ) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nVoxels) {
-        float4 value = controlPointImageCuda[tid];
-        const float4 bestValue = tex1Dfetch<float4>(bestControlPointTexture, tid);
-        const float4 gradValue = tex1Dfetch<float4>(gradientImageTexture, tid);
-        if (optimiseX)
-            value.x = bestValue.x + scale * gradValue.x;
-        if (optimiseY)
-            value.y = bestValue.y + scale * gradValue.y;
-        if (optimiseZ)
-            value.z = bestValue.z + scale * gradValue.z;
-        controlPointImageCuda[tid] = value;
-    }
-}
-/* *************************************************************** */
-} // namespace NiftyReg::Cuda
-/* *************************************************************** */

From 25aba87c70549a9ca273d6f87493df88ffbe9798 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 24 Nov 2023 15:18:34 +0000
Subject: [PATCH 252/314] Refactorisations

---
 niftyreg_build_version.txt      |  2 +-
 reg-lib/cpu/_reg_localTrans.cpp |  3 ++-
 reg-lib/cuda/CudaContext.cpp    | 44 ++++++++++++++++-----------------
 reg-lib/cuda/_reg_ssd_gpu.cu    |  2 +-
 4 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5b0cffbc..67bf40fe 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-370
+371
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 269e4e98..90967d07 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -13,7 +13,8 @@
 #include "_reg_localTrans.h"
 #include "_reg_maths_eigen.h"
 
-#ifdef BUILD_TESTS
+// Due to SSE usage creates incorrect test results
+#if defined(BUILD_TESTS) && !defined(NDEBUG)
 #undef USE_SSE
 #endif
 
diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp
index 9be42aca..ebfd5173 100644
--- a/reg-lib/cuda/CudaContext.cpp
+++ b/reg-lib/cuda/CudaContext.cpp
@@ -51,22 +51,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) {
         return;
     }
 
-    // following code is from cutGetMaxGflopsDeviceId()
-    int max_gflops_device = 0;
-    int max_gflops = 0;
-    unsigned current_device = 0;
-    while (current_device < numDevices) {
-        cudaGetDeviceProperties(&deviceProp, current_device);
+    // The following code is from cutGetMaxGflopsDeviceId()
+    int maxGflopsDevice = 0;
+    int maxGflops = 0;
+    unsigned currentDevice = 0;
+    while (currentDevice < numDevices) {
+        cudaGetDeviceProperties(&deviceProp, currentDevice);
         int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate;
-        if (gflops > max_gflops) {
-            max_gflops = gflops;
-            max_gflops_device = current_device;
+        if (gflops > maxGflops) {
+            maxGflops = gflops;
+            maxGflopsDevice = currentDevice;
         }
-        ++current_device;
+        ++currentDevice;
     }
-    NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device));
-    NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device));
-    NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device));
+    NR_CUDA_SAFE_CALL(cudaSetDevice(maxGflopsDevice));
+    NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, maxGflopsDevice));
+    NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, maxGflopsDevice));
 
     if (deviceProp.major < 1) {
         NR_FATAL_ERROR("The specified graphics card does not exist");
@@ -77,15 +77,15 @@ void CudaContext::PickCard(unsigned deviceId = 999) {
         if (deviceProp.totalGlobalMem != total)
             NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s +
                            "Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) +
-                           " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB");
-        NR_DEBUG("The following device is used: "s + deviceProp.name);
-        NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB");
-        NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor));
-        NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock));
-        NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION));
-        NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz");
-        NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors");
-        cudaIdx = max_gflops_device;
+                           " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"s);
+        NR_DEBUG("The following device is used: " << deviceProp.name);
+        NR_DEBUG("It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB");
+        NR_DEBUG("The CUDA compute capability is " << deviceProp.major << "." << deviceProp.minor);
+        NR_DEBUG("The shared memory size in bytes: " << deviceProp.sharedMemPerBlock);
+        NR_DEBUG("The CUDA version is " << CUDART_VERSION);
+        NR_DEBUG("The card clock rate is " << deviceProp.clockRate / 1000 << " MHz");
+        NR_DEBUG("The card has " << deviceProp.multiProcessorCount << " multiprocessors");
+        cudaIdx = maxGflopsDevice;
         cudaGetDeviceProperties(&deviceProp, cudaIdx);
         if (deviceProp.major > 1) {
             isCardDoubleCapable = true;
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 2a0a775f..77a2f739 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -83,7 +83,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
             const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
             const double diff = refValue - warValue;
             return { Square(diff) * weight, weight };  // ssd and count
-        }, make_double2(0.0, 0.0), thrust::plus<double2>());
+        }, make_double2(0, 0), thrust::plus<double2>());
 
         ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y;
     }

From b9c9beca65c9c7f6862e1c1ca50c70eaafd1fbfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 27 Nov 2023 14:58:05 +0000
Subject: [PATCH 253/314] Use float gam instead of double in CudaOptimiser

Even though tests show otherwise, using float gets better results in real world scenarios.
---
 niftyreg_build_version.txt    | 2 +-
 reg-lib/cuda/CudaOptimiser.cu | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 67bf40fe..ba300673 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-371
+372
diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu
index 587b4f7d..fb662d23 100644
--- a/reg-lib/cuda/CudaOptimiser.cu
+++ b/reg-lib/cuda/CudaOptimiser.cu
@@ -237,7 +237,7 @@ void GetConjugateGradient(float4 *gradientCuda,
         return make_double2(dgg, gg);
     };
 
-    double gam;
+    float gam;
     thrust::counting_iterator<int> it(0);
     const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) {
         return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index);
@@ -247,8 +247,8 @@ void GetConjugateGradient(float4 *gradientCuda,
         const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) {
             return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index);
         }, make_double2(0, 0), thrust::plus<double2>());
-        gam = (gg.x + ggBw.x) / (gg.y + ggBw.y);
-    } else gam = gg.x / gg.y;
+        gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
+    } else gam = static_cast<float>(gg.x / gg.y);
 
     // Conjugate gradient
     auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda,

From 708106f0592549203e05083f03a205b767bdce7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 28 Nov 2023 09:29:18 +0000
Subject: [PATCH 254/314] Optimise CudaResampling #92

---
 niftyreg_build_version.txt            |   2 +-
 reg-lib/cuda/BlockSize.hpp            |  12 --
 reg-lib/cuda/CudaCompute.cu           |  18 +-
 reg-lib/cuda/CudaResampling.cu        | 215 ++++++++++++++++-----
 reg-lib/cuda/CudaResampling.hpp       |   1 +
 reg-lib/cuda/CudaResamplingKernels.cu | 258 --------------------------
 6 files changed, 185 insertions(+), 321 deletions(-)
 delete mode 100644 reg-lib/cuda/CudaResamplingKernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ba300673..a5c3fde3 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-372
+373
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index fe411adb..50a0cfbc 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -38,10 +38,6 @@ struct BlockSize {
     unsigned reg_ApplyConvolutionWindowAlongY;
     unsigned reg_ApplyConvolutionWindowAlongZ;
     unsigned Arithmetic;
-    unsigned reg_resampleImage2D;
-    unsigned reg_resampleImage3D;
-    unsigned reg_getImageGradient2D;
-    unsigned reg_getImageGradient3D;
 };
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
@@ -70,10 +66,6 @@ struct BlockSize100: public BlockSize {
         reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
         reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
         Arithmetic = 384; // 5 reg - 24 smem
-        reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem
-        reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem
-        reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem
-        reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem
         NR_FUNC_CALLED();
     }
 };
@@ -104,10 +96,6 @@ struct BlockSize300: public BlockSize {
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
         Arithmetic = 1024; //
-        reg_resampleImage2D = 1024; // 23 reg
-        reg_resampleImage3D = 1024; // 24 reg
-        reg_getImageGradient2D = 1024; // 34 reg
-        reg_getImageGradient3D = 1024; // 34 reg
         NR_FUNC_CALLED();
     }
 };
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 9dfae7b0..629ed5e0 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -175,14 +175,16 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    Cuda::GetImageGradient(con.Content::GetFloating(),
-                           con.GetFloatingCuda(),
-                           con.GetDeformationFieldCuda(),
-                           con.GetWarpedGradientCuda(),
-                           con.GetActiveVoxelNumber(),
-                           interpolation,
-                           paddingValue,
-                           activeTimePoint);
+    const nifti_image *floating = con.Content::GetFloating();
+    auto getImageGradient = floating->nz > 1 ? Cuda::GetImageGradient<true> : Cuda::GetImageGradient<false>;
+    getImageGradient(floating,
+                     con.GetFloatingCuda(),
+                     con.GetDeformationFieldCuda(),
+                     con.GetWarpedGradientCuda(),
+                     con.GetActiveVoxelNumber(),
+                     interpolation,
+                     paddingValue,
+                     activeTimePoint);
 }
 /* *************************************************************** */
 double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index f72f6bee..ee2deab5 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -11,11 +11,54 @@
  */
 
 #include "CudaResampling.hpp"
-#include "CudaResamplingKernels.cu"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
+template<typename T>
+__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) {
+    basis[1] = relative;
+    basis[0] = 1.f - relative;
+}
+/* *************************************************************** */
+template<typename T, bool is3d>
+__inline__ __device__ void TransformInterpolate(const mat44 matrix, const float4 realDeformation, int3& previous,
+                                                T (&xBasis)[2], T (&yBasis)[2], T (&zBasis)[2]) {
+    // Get the voxel-based deformation
+    T voxelDeformation[is3d ? 3 : 2];
+    if constexpr (is3d) {
+        voxelDeformation[0] = (static_cast<T>(matrix.m[0][0]) * static_cast<T>(realDeformation.x) +
+                               static_cast<T>(matrix.m[0][1]) * static_cast<T>(realDeformation.y) +
+                               static_cast<T>(matrix.m[0][2]) * static_cast<T>(realDeformation.z) +
+                               static_cast<T>(matrix.m[0][3]));
+        voxelDeformation[1] = (static_cast<T>(matrix.m[1][0]) * static_cast<T>(realDeformation.x) +
+                               static_cast<T>(matrix.m[1][1]) * static_cast<T>(realDeformation.y) +
+                               static_cast<T>(matrix.m[1][2]) * static_cast<T>(realDeformation.z) +
+                               static_cast<T>(matrix.m[1][3]));
+        voxelDeformation[2] = (static_cast<T>(matrix.m[2][0]) * static_cast<T>(realDeformation.x) +
+                               static_cast<T>(matrix.m[2][1]) * static_cast<T>(realDeformation.y) +
+                               static_cast<T>(matrix.m[2][2]) * static_cast<T>(realDeformation.z) +
+                               static_cast<T>(matrix.m[2][3]));
+    } else {
+        voxelDeformation[0] = (static_cast<T>(matrix.m[0][0]) * static_cast<T>(realDeformation.x) +
+                               static_cast<T>(matrix.m[0][1]) * static_cast<T>(realDeformation.y) +
+                               static_cast<T>(matrix.m[0][3]));
+        voxelDeformation[1] = (static_cast<T>(matrix.m[1][0]) * static_cast<T>(realDeformation.x) +
+                               static_cast<T>(matrix.m[1][1]) * static_cast<T>(realDeformation.y) +
+                               static_cast<T>(matrix.m[1][3]));
+    }
+
+    // Compute the linear interpolation
+    previous.x = Floor(voxelDeformation[0]);
+    previous.y = Floor(voxelDeformation[1]);
+    InterpLinearKernel(voxelDeformation[0] - static_cast<T>(previous.x), xBasis);
+    InterpLinearKernel(voxelDeformation[1] - static_cast<T>(previous.y), yBasis);
+    if constexpr (is3d) {
+        previous.z = Floor(voxelDeformation[2]);
+        InterpLinearKernel(voxelDeformation[2] - static_cast<T>(previous.z), zBasis);
+    }
+}
+/* *************************************************************** */
 template<bool is3d>
 void ResampleImage(const nifti_image *floatingImage,
                    const float *floatingImageCuda,
@@ -29,39 +72,82 @@ void ResampleImage(const nifti_image *floatingImage,
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
-    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto deformationFieldTexture = *deformationFieldTexturePtr;
+    auto maskTexture = *maskTexturePtr;
     // Bind the real to voxel matrix to the texture
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
         NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t);
-        auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
-        if constexpr (is3d) {
-            const unsigned blocks = blockSize->reg_resampleImage3D;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-            const dim3 gridDims(grids, grids, 1);
-            const dim3 blockDims(blocks, 1, 1);
-            ResampleImage3D<<<gridDims, blockDims>>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture,
-                                                     floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-        } else {
-            const unsigned blocks = blockSize->reg_resampleImage2D;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-            const dim3 gridDims(grids, grids, 1);
-            const dim3 blockDims(blocks, 1, 1);
-            ResampleImage2D<<<gridDims, blockDims>>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture,
-                                                     floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-            NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-        }
+        auto curWarpedCuda = warpedImageCuda + t * voxelNumber;
+        auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto floatingTexture = *floatingTexturePtr;
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
+            curWarpedCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDim, paddingValue
+        ]__device__(const int index) {
+            // Get the real world deformation in the floating space
+            const int voxel = tex1Dfetch<int>(maskTexture, index);
+            const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);
+
+            // Get the voxel-based deformation in the floating space and compute the linear interpolation
+            int3 previous;
+            double xBasis[2], yBasis[2], zBasis[2];
+            TransformInterpolate<double, is3d>(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis);
+
+            double intensity = 0;
+            if constexpr (is3d) {
+                for (char c = 0; c < 2; c++) {
+                    const int z = previous.z + c;
+                    int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
+                    double tempY = 0;
+                    for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
+                        const int y = previous.y + b;
+                        int index = indexYZ + previous.x;
+                        double tempX = 0;
+                        for (char a = 0; a < 2; a++, index++) {
+                            const int x = previous.x + a;
+                            if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
+                                tempX += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
+                            } else {
+                                // Padding value
+                                tempX += paddingValue * xBasis[a];
+                            }
+                        }
+                        tempY += tempX * yBasis[b];
+                    }
+                    intensity += tempY * zBasis[c];
+                }
+            } else {
+                int indexY = previous.y * floatingDim.x + previous.x;
+                for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
+                    const int y = previous.y + b;
+                    int index = indexY;
+                    double tempX = 0;
+                    for (char a = 0; a < 2; a++, index++) {
+                        const int x = previous.x + a;
+                        if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
+                            tempX += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
+                        } else {
+                            // Padding value
+                            tempX += paddingValue * xBasis[a];
+                        }
+                    }
+                    intensity += tempX * yBasis[b];
+                }
+            }
+
+            curWarpedCuda[voxel] = intensity;
+        });
     }
 }
 template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
 template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
 /* *************************************************************** */
+template<bool is3d>
 void GetImageGradient(const nifti_image *floatingImage,
                       const float *floatingImageCuda,
                       const float4 *deformationFieldCuda,
@@ -73,33 +159,78 @@ void GetImageGradient(const nifti_image *floatingImage,
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
-    auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
     if (paddingValue != paddingValue) paddingValue = 0;
-    auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto floatingTexture = *floatingTexturePtr;
+    auto deformationFieldTexture = *deformationFieldTexturePtr;
     // Bind the real to voxel matrix to the texture
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
-    if (floatingImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_getImageGradient3D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        GetImageGradient3D<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
-                                                    floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        const unsigned blocks = blockSize->reg_getImageGradient2D;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        GetImageGradient2D<<<gridDims, blockDims>>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture,
-                                                    floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
+        warpedGradientCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue
+    ]__device__(const int index) {
+            // Get the real world deformation in the floating space
+            float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);
+
+            // Get the voxel-based deformation in the floating space and compute the linear interpolation
+            int3 previous;
+            float xBasis[2], yBasis[2], zBasis[2];
+            TransformInterpolate<float, is3d>(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis);
+            constexpr float deriv[] = { -1.0f, 1.0f };
+
+            float4 gradientValue{};
+            if constexpr (is3d) {
+                for (char c = 0; c < 2; c++) {
+                    const int z = previous.z + c;
+                    int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
+                    float3 tempY{};
+                    for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
+                        const int y = previous.y + b;
+                        int index = indexYZ + previous.x;
+                        float2 tempX{};
+                        for (char a = 0; a < 2; a++, index++) {
+                            const int x = previous.x + a;
+                            const float intensity = -1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z ?
+                                tex1Dfetch<float>(floatingTexture, index) : paddingValue;
+
+                            tempX.x += intensity * deriv[a];
+                            tempX.y += intensity * xBasis[a];
+                        }
+                        tempY.x += tempX.x * yBasis[b];
+                        tempY.y += tempX.y * deriv[b];
+                        tempY.z += tempX.y * yBasis[b];
+                    }
+                    gradientValue.x += tempY.x * zBasis[c];
+                    gradientValue.y += tempY.y * zBasis[c];
+                    gradientValue.z += tempY.z * deriv[c];
+                }
+            } else {
+                int indexY = previous.y * floatingDim.x + previous.x;
+                for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
+                    const int y = previous.y + b;
+                    int index = indexY;
+                    float2 tempX{};
+                    for (char a = 0; a < 2; a++, index++) {
+                        const int x = previous.x + a;
+                        const float intensity = -1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y ?
+                            tex1Dfetch<float>(floatingTexture, index) : paddingValue;
+
+                        tempX.x += intensity * deriv[a];
+                        tempX.y += intensity * xBasis[a];
+                    }
+                    gradientValue.x += tempX.x * yBasis[b];
+                    gradientValue.y += tempX.y * deriv[b];
+                }
+            }
+
+            warpedGradientCuda[index] = gradientValue;
+    });
 }
+template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
+template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp
index 6d54dad6..1366ccc7 100644
--- a/reg-lib/cuda/CudaResampling.hpp
+++ b/reg-lib/cuda/CudaResampling.hpp
@@ -28,6 +28,7 @@ void ResampleImage(const nifti_image *floatingImage,
                    const int interpolation,
                    const float paddingValue);
 /* *************************************************************** */
+template<bool is3d>
 void GetImageGradient(const nifti_image *floatingImage,
                       const float *floatingImageCuda,
                       const float4 *deformationFieldCuda,
diff --git a/reg-lib/cuda/CudaResamplingKernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu
deleted file mode 100644
index 868d03f5..00000000
--- a/reg-lib/cuda/CudaResamplingKernels.cu
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- *  CudaResamplingKernels.cu
- *
- *
- *  Created by Marc Modat on 24/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-/* *************************************************************** */
-namespace NiftyReg::Cuda {
-/* *************************************************************** */
-template<typename T>
-__inline__ __device__ constexpr void InterpLinearKernel(T relative, T (&basis)[2]) {
-    if (relative < 0)
-        relative = 0;  // reg_rounding error
-    basis[1] = relative;
-    basis[0] = 1.f - relative;
-}
-/* *************************************************************** */
-__global__ void ResampleImage2D(float *resultArray,
-                                cudaTextureObject_t floatingTexture,
-                                cudaTextureObject_t deformationFieldTexture,
-                                cudaTextureObject_t maskTexture,
-                                const mat44 floatingMatrix,
-                                const int3 floatingDim,
-                                const unsigned activeVoxelNumber,
-                                const float paddingValue) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
-    // Get the real world deformation in the floating space
-    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-    // Get the voxel-based deformation in the floating space
-    double2 voxelDeformation;
-    voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) +
-                          double(floatingMatrix.m[0][1]) * double(realDeformation.y) +
-                          double(floatingMatrix.m[0][3]));
-    voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) +
-                          double(floatingMatrix.m[1][1]) * double(realDeformation.y) +
-                          double(floatingMatrix.m[1][3]));
-
-    // Compute the linear interpolation
-    const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
-    const double2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
-    double xBasis[2], yBasis[2];
-    InterpLinearKernel(relative.x, xBasis);
-    InterpLinearKernel(relative.y, yBasis);
-
-    double intensity = 0;
-    int indexY = previous.y * floatingDim.x + previous.x;
-    for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
-        const int y = previous.y + b;
-        int index = indexY;
-        double xTempNewValue = 0;
-        for (char a = 0; a < 2; a++, index++) {
-            const int x = previous.x + a;
-            if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) {
-                xTempNewValue += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
-            } else {
-                // Padding value
-                xTempNewValue += paddingValue * xBasis[a];
-            }
-        }
-        intensity += xTempNewValue * yBasis[b];
-    }
-
-    resultArray[tid2] = intensity;
-}
-/* *************************************************************** */
-__global__ void ResampleImage3D(float *resultArray,
-                                cudaTextureObject_t floatingTexture,
-                                cudaTextureObject_t deformationFieldTexture,
-                                cudaTextureObject_t maskTexture,
-                                const mat44 floatingMatrix,
-                                const int3 floatingDim,
-                                const unsigned activeVoxelNumber,
-                                const float paddingValue) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
-    // Get the real world deformation in the floating space
-    const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-    const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-    // Get the voxel-based deformation in the floating space
-    double3 voxelDeformation;
-    voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) +
-                          double(floatingMatrix.m[0][1]) * double(realDeformation.y) +
-                          double(floatingMatrix.m[0][2]) * double(realDeformation.z) +
-                          double(floatingMatrix.m[0][3]));
-    voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) +
-                          double(floatingMatrix.m[1][1]) * double(realDeformation.y) +
-                          double(floatingMatrix.m[1][2]) * double(realDeformation.z) +
-                          double(floatingMatrix.m[1][3]));
-    voxelDeformation.z = (double(floatingMatrix.m[2][0]) * double(realDeformation.x) +
-                          double(floatingMatrix.m[2][1]) * double(realDeformation.y) +
-                          double(floatingMatrix.m[2][2]) * double(realDeformation.z) +
-                          double(floatingMatrix.m[2][3]));
-
-    // Compute the linear interpolation
-    const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
-    const double3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
-    double xBasis[2], yBasis[2], zBasis[2];
-    InterpLinearKernel(relative.x, xBasis);
-    InterpLinearKernel(relative.y, yBasis);
-    InterpLinearKernel(relative.z, zBasis);
-
-    double intensity = 0;
-    for (char c = 0; c < 2; c++) {
-        const int z = previous.z + c;
-        int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
-        double yTempNewValue = 0;
-        for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
-            const int y = previous.y + b;
-            int index = indexYZ + previous.x;
-            double xTempNewValue = 0;
-            for (char a = 0; a < 2; a++, index++) {
-                const int x = previous.x + a;
-                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) {
-                    xTempNewValue += tex1Dfetch<float>(floatingTexture, index) * xBasis[a];
-                } else {
-                    // Padding value
-                    xTempNewValue += paddingValue * xBasis[a];
-                }
-            }
-            yTempNewValue += xTempNewValue * yBasis[b];
-        }
-        intensity += yTempNewValue * zBasis[c];
-    }
-
-    resultArray[tid2] = intensity;
-}
-/* *************************************************************** */
-__global__ void GetImageGradient2D(float4 *gradientArray,
-                                   cudaTextureObject_t floatingTexture,
-                                   cudaTextureObject_t deformationFieldTexture,
-                                   const mat44 floatingMatrix,
-                                   const int3 floatingDim,
-                                   const unsigned activeVoxelNumber,
-                                   const float paddingValue) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
-    // Get the real world deformation in the floating space
-    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-    // Get the voxel-based deformation in the floating space
-    float2 voxelDeformation;
-    voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                          floatingMatrix.m[0][1] * realDeformation.y +
-                          floatingMatrix.m[0][3]);
-    voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                          floatingMatrix.m[1][1] * realDeformation.y +
-                          floatingMatrix.m[1][3]);
-
-    // Compute the gradient
-    const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) };
-    float xBasis[2], yBasis[2];
-    const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y };
-    InterpLinearKernel(relative.x, xBasis);
-    InterpLinearKernel(relative.y, yBasis);
-    constexpr float deriv[] = { -1.0f, 1.0f };
-
-    float4 gradientValue{};
-    int indexY = previous.y * floatingDim.x + previous.x;
-    for (char b = 0; b < 2; b++, indexY += floatingDim.x) {
-        const int y = previous.y + b;
-        int index = indexY;
-        float2 tempValueX{};
-        for (char a = 0; a < 2; a++, index++) {
-            const int x = previous.x + a;
-            float intensity = paddingValue;
-
-            if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y)
-                intensity = tex1Dfetch<float>(floatingTexture, index);
-
-            tempValueX.x += intensity * deriv[a];
-            tempValueX.y += intensity * xBasis[a];
-        }
-        gradientValue.x += tempValueX.x * yBasis[b];
-        gradientValue.y += tempValueX.y * deriv[b];
-    }
-
-    gradientArray[tid] = gradientValue;
-}
-/* *************************************************************** */
-__global__ void GetImageGradient3D(float4 *gradientArray,
-                                   cudaTextureObject_t floatingTexture,
-                                   cudaTextureObject_t deformationFieldTexture,
-                                   const mat44 floatingMatrix,
-                                   const int3 floatingDim,
-                                   const unsigned activeVoxelNumber,
-                                   const float paddingValue) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
-    // Get the real world deformation in the floating space
-    float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, tid);
-
-    // Get the voxel-based deformation in the floating space
-    float3 voxelDeformation;
-    voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x +
-                          floatingMatrix.m[0][1] * realDeformation.y +
-                          floatingMatrix.m[0][2] * realDeformation.z +
-                          floatingMatrix.m[0][3]);
-    voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x +
-                          floatingMatrix.m[1][1] * realDeformation.y +
-                          floatingMatrix.m[1][2] * realDeformation.z +
-                          floatingMatrix.m[1][3]);
-    voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x +
-                          floatingMatrix.m[2][1] * realDeformation.y +
-                          floatingMatrix.m[2][2] * realDeformation.z +
-                          floatingMatrix.m[2][3]);
-
-    // Compute the gradient
-    const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) };
-    float xBasis[2], yBasis[2], zBasis[2];
-    const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z };
-    InterpLinearKernel(relative.x, xBasis);
-    InterpLinearKernel(relative.y, yBasis);
-    InterpLinearKernel(relative.z, zBasis);
-    constexpr float deriv[] = { -1.0f, 1.0f };
-
-    float4 gradientValue{};
-    for (char c = 0; c < 2; c++) {
-        const int z = previous.z + c;
-        int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x;
-        float3 tempValueY{};
-        for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) {
-            const int y = previous.y + b;
-            int index = indexYZ + previous.x;
-            float2 tempValueX{};
-            for (char a = 0; a < 2; a++, index++) {
-                const int x = previous.x + a;
-                float intensity = paddingValue;
-
-                if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z)
-                    intensity = tex1Dfetch<float>(floatingTexture, index);
-
-                tempValueX.x += intensity * deriv[a];
-                tempValueX.y += intensity * xBasis[a];
-            }
-            tempValueY.x += tempValueX.x * yBasis[b];
-            tempValueY.y += tempValueX.y * deriv[b];
-            tempValueY.z += tempValueX.y * yBasis[b];
-        }
-        gradientValue.x += tempValueY.x * zBasis[c];
-        gradientValue.y += tempValueY.y * zBasis[c];
-        gradientValue.z += tempValueY.z * deriv[c];
-    }
-
-    gradientArray[tid] = gradientValue;
-}
-/* *************************************************************** */
-} // namespace NiftyReg::Cuda
-/* *************************************************************** */

From b46beb17fdc0782ecab8c79542c22d0fd1ab5c76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 29 Nov 2023 16:59:27 +0000
Subject: [PATCH 255/314] Combine GitHub Actions workflows for testing into a
 single file

---
 .github/workflows/macos.yml                | 55 ---------------------
 .github/workflows/{linux.yml => tests.yml} | 25 ++++++----
 .github/workflows/windows.yml              | 56 ----------------------
 niftyreg_build_version.txt                 |  2 +-
 4 files changed, 16 insertions(+), 122 deletions(-)
 delete mode 100644 .github/workflows/macos.yml
 rename .github/workflows/{linux.yml => tests.yml} (63%)
 delete mode 100644 .github/workflows/windows.yml

diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
deleted file mode 100644
index 80b3ad99..00000000
--- a/.github/workflows/macos.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-name: macos
-on: [push, pull_request]
-jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-    strategy: 
-      max-parallel: 1
-      matrix:
-        os: [macos-11]
-        include:
-          - os: macos-11
-            c-compiler: "gcc"
-            cxx-compiler: "g++"
-            use_cuda: "OFF"
-            use_opencl: "OFF"
-            use_openmp: "OFF"
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Get CMake
-        uses: lukka/get-cmake@latest
-
-      - name: Install Catch2
-        run:  |
-          git clone https://github.com/catchorg/Catch2.git
-          cd Catch2
-          cmake -Bbuild -H. -DBUILD_TESTING=OFF
-          sudo cmake --build build/ --target install --config Release
-        shell: bash
-
-      - name: Configure NiftyReg
-        run: |
-          mkdir build
-          cd build
-          cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
-                -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
-                -DCMAKE_BUILD_TYPE=Release \
-                -DBUILD_ALL_DEP=ON \
-                -DUSE_CUDA=${{ matrix.use_cuda }} \
-                -DUSE_OPENCL=${{ matrix.use_opencl }} \
-                -DUSE_SSE=ON \
-                -DUSE_OPENMP=${{ matrix.use_openmp }} \
-                -DBUILD_TESTING=ON \
-                ..
-        shell: bash
-
-      - name: Build NiftyReg
-        run: cmake --build build --config Release
-        shell: bash
-
-      - name: Run tests
-        run: ctest -V
-        working-directory: build
-        shell: bash
\ No newline at end of file
diff --git a/.github/workflows/linux.yml b/.github/workflows/tests.yml
similarity index 63%
rename from .github/workflows/linux.yml
rename to .github/workflows/tests.yml
index 6b9c1f84..117a9e0c 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/tests.yml
@@ -1,16 +1,20 @@
-name: linux
+name: Tests
 on: [push, pull_request]
 jobs:
   build:
     runs-on: ${{ matrix.os }}
-    strategy: 
-      max-parallel: 1
+    strategy:
       matrix:
-        os: [ubuntu-20.04]
+        os: [ubuntu-latest, macos-latest, windows-latest]
         include:
-          - os: ubuntu-20.04
+          - sudo: "sudo"        # For ubuntu and macos
             c-compiler: "gcc"
             cxx-compiler: "g++"
+          - os: windows-latest  # For windows only
+            sudo: ""
+            c-compiler: "cl.exe"
+            cxx-compiler: "cl.exe"
+          - build_type: "Debug" # For all platforms
             use_cuda: "OFF"
             use_opencl: "OFF"
             use_openmp: "ON"
@@ -18,15 +22,16 @@ jobs:
     steps:
       - uses: actions/checkout@v3
 
-      - name: Get CMake
-        uses: lukka/get-cmake@latest
+      - name: Add MSBuild to PATH
+        uses: microsoft/setup-msbuild@v1.3
+        if: matrix.os == 'windows-latest'
 
       - name: Install Catch2
         run:  |
           git clone https://github.com/catchorg/Catch2.git
           cd Catch2
           cmake -Bbuild -H. -DBUILD_TESTING=OFF
-          sudo cmake --build build/ --target install --config Release
+          ${{ matrix.sudo }} cmake --build build/ --target install --config ${{ matrix.build_type }}
         shell: bash
 
       - name: Configure NiftyReg
@@ -35,7 +40,7 @@ jobs:
           cd build
           cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
                 -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
-                -DCMAKE_BUILD_TYPE=Release \
+                -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
                 -DBUILD_ALL_DEP=ON \
                 -DUSE_CUDA=${{ matrix.use_cuda }} \
                 -DUSE_OPENCL=${{ matrix.use_opencl }} \
@@ -46,7 +51,7 @@ jobs:
         shell: bash
 
       - name: Build NiftyReg
-        run: cmake --build build --config Release
+        run: cmake --build build --config ${{ matrix.build_type }}
         shell: bash
 
       - name: Run tests
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
deleted file mode 100644
index d2079515..00000000
--- a/.github/workflows/windows.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: windows
-on: [push, pull_request]
-jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-    strategy: 
-      max-parallel: 1
-      matrix:
-        os: [windows-2022]
-        include:
-          - os: windows-2022
-            c-compiler: "cl.exe"
-            cxx-compiler: "cl.exe"
-            use_cuda: "OFF"
-            use_opencl: "OFF"
-            use_openmp: "ON"
-            vcvars64: "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Get CMake
-        uses: lukka/get-cmake@latest
-
-      - name: Install Catch2
-        run:  |
-          call "${{ matrix.vcvars64 }}"
-          git clone https://github.com/catchorg/Catch2.git
-          cd Catch2
-          cmake -Bbuild -H. -DBUILD_TESTING=OFF
-          cmake --build build/ --target install --config Release
-        shell: cmd
-
-
-      - name: Configure NiftyReg
-        run: |
-          mkdir build
-          cd build
-          call "${{ matrix.vcvars64 }}"
-          cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} -DCMAKE_BUILD_TYPE=Release -DBUILD_ALL_DEP=ON -DUSE_CUDA=${{ matrix.use_cuda }} -DUSE_OPENCL=${{ matrix.use_opencl }} -DUSE_SSE=ON  -DUSE_OPENMP=${{ matrix.use_openmp }}  -DBUILD_TESTING=ON ..
-        shell: cmd
-
-
-      - name: Build NiftyReg
-        run: |
-          call "${{ matrix.vcvars64 }}"
-          cmake --build build --config Release
-        shell: cmd
-
-
-      - name: Run tests
-        run: |
-          call "${{ matrix.vcvars64 }}"
-          ctest -V
-        working-directory: build
-        shell: cmd
-
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index a5c3fde3..38a45c3e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-373
+374

From 07d5ce75a7df353778b518500e4aebdc2a29ab9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 29 Nov 2023 16:59:47 +0000
Subject: [PATCH 256/314] Update GitHub Actions workflow for coverage

---
 .github/workflows/coverage.yml | 4 ++--
 niftyreg_build_version.txt     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index f90f1da2..46c18082 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -2,7 +2,7 @@ name: Coverage
 on: [push, pull_request]
 jobs:
   Coverage:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     steps:
       - name: Clone NiftyReg
         uses: actions/checkout@v3
@@ -27,7 +27,7 @@ jobs:
                 -DBUILD_ALL_DEP=ON \
                 -DUSE_CUDA=OFF \
                 -DUSE_OPENCL=OFF \
-                -DUSE_SSE=ON \
+                -DUSE_SSE=OFF \
                 -DUSE_OPENMP=OFF \
                 -DBUILD_TESTING=ON \
                 -DWITH_COVERAGE=ON \
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 38a45c3e..d9061d95 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-374
+375

From f0ebbb1f72c7e592a2247d92bd796bdad03f6748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 29 Nov 2023 17:05:35 +0000
Subject: [PATCH 257/314] Refactor CudaLocalTransformation

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/BlockSize.hpp                    | 104 ++---
 reg-lib/cuda/CMakeLists.txt                   |   2 +-
 reg-lib/cuda/CudaCompute.cu                   |  74 ++--
 ...tion_gpu.cu => CudaLocalTransformation.cu} | 401 +++++++++---------
 reg-lib/cuda/CudaLocalTransformation.hpp      |  85 ++++
 ...s.cu => CudaLocalTransformationKernels.cu} | 280 ++++++------
 reg-lib/cuda/_reg_localTransformation_gpu.h   |  81 ----
 .../reg_test_regr_getDeformationField.cpp     |   8 +-
 9 files changed, 524 insertions(+), 513 deletions(-)
 rename reg-lib/cuda/{_reg_localTransformation_gpu.cu => CudaLocalTransformation.cu} (71%)
 mode change 100755 => 100644
 create mode 100644 reg-lib/cuda/CudaLocalTransformation.hpp
 rename reg-lib/cuda/{_reg_localTransformation_kernels.cu => CudaLocalTransformationKernels.cu} (85%)
 mode change 100755 => 100644
 delete mode 100755 reg-lib/cuda/_reg_localTransformation_gpu.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d9061d95..100000a6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-375
+376
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 50a0cfbc..e6146b2f 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -15,23 +15,23 @@ namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
     unsigned reg_affine_getDeformationField;
-    unsigned reg_spline_getDeformationField2D;
-    unsigned reg_spline_getDeformationField3D;
-    unsigned reg_spline_getApproxJacobianValues2D;
-    unsigned reg_spline_getApproxJacobianValues3D;
-    unsigned reg_spline_approxLinearEnergyGradient;
-    unsigned reg_spline_getJacobianValues2D;
-    unsigned reg_spline_getJacobianValues3D;
-    unsigned reg_spline_logSquaredValues;
-    unsigned reg_spline_computeApproxJacGradient2D;
-    unsigned reg_spline_computeApproxJacGradient3D;
-    unsigned reg_spline_computeJacGradient2D;
-    unsigned reg_spline_computeJacGradient3D;
-    unsigned reg_spline_approxCorrectFolding3D;
-    unsigned reg_spline_correctFolding3D;
-    unsigned reg_defField_compose2D;
-    unsigned reg_defField_compose3D;
-    unsigned reg_defField_getJacobianMatrix;
+    unsigned GetDeformationField2d;
+    unsigned GetDeformationField3d;
+    unsigned GetApproxJacobianValues2d;
+    unsigned GetApproxJacobianValues3d;
+    unsigned ApproxLinearEnergyGradient;
+    unsigned GetJacobianValues2d;
+    unsigned GetJacobianValues3d;
+    unsigned LogSquaredValues;
+    unsigned ComputeApproxJacGradient2d;
+    unsigned ComputeApproxJacGradient3d;
+    unsigned ComputeJacGradient2d;
+    unsigned ComputeJacGradient3d;
+    unsigned ApproxCorrectFolding3d;
+    unsigned CorrectFolding3d;
+    unsigned DefFieldCompose2d;
+    unsigned DefFieldCompose3d;
+    unsigned GetJacobianMatrix;
     unsigned reg_voxelCentricToNodeCentric;
     unsigned reg_convertNmiGradientFromVoxelToRealSpace;
     unsigned reg_ApplyConvolutionWindowAlongX;
@@ -43,23 +43,23 @@ struct BlockSize {
 struct BlockSize100: public BlockSize {
     BlockSize100() {
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
-        reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem
-        reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem
-        reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem
-        reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem
-        reg_spline_approxLinearEnergyGradient = 384; // 40 reg
-        reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
-        reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
-        reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
-        reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem
-        reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem
-        reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem
-        reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem
-        reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem
-        reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem
-        reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
-        reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
-        reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
+        GetDeformationField2d = 384; // 20 reg - 6168 smem - 28 cmem
+        GetDeformationField3d = 192; // 37 reg - 6168 smem - 28 cmem
+        GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem
+        GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem
+        ApproxLinearEnergyGradient = 384; // 40 reg
+        GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
+        GetJacobianValues3d = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
+        LogSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
+        ComputeApproxJacGradient2d = 320; // 23 reg - 96 smem - 72 cmem
+        ComputeApproxJacGradient3d = 256; // 32 reg - 384 smem - 144 cmem
+        ComputeJacGradient2d = 384; // 21 reg - 24 smem - 64 cmem
+        ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem
+        ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem
+        CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem
+        DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
+        DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
+        GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
         reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
@@ -73,29 +73,29 @@ struct BlockSize100: public BlockSize {
 struct BlockSize300: public BlockSize {
     BlockSize300() {
         reg_affine_getDeformationField = 1024; // 23 reg
-        reg_spline_getDeformationField2D = 1024; // 34 reg
-        reg_spline_getDeformationField3D = 1024; // 34 reg
-        reg_spline_getApproxJacobianValues2D = 768; // 34 reg
-        reg_spline_getApproxJacobianValues3D = 640; // 46 reg
-        reg_spline_approxLinearEnergyGradient = 768; // 40 reg
-        reg_spline_getJacobianValues2D = 768; // 34 reg
-        reg_spline_getJacobianValues3D = 768; // 34 reg
-        reg_spline_logSquaredValues = 1024; // 23 reg
-        reg_spline_computeApproxJacGradient2D = 768; // 34 reg
-        reg_spline_computeApproxJacGradient3D = 768; // 38 reg
-        reg_spline_computeJacGradient2D = 768; // 34 reg
-        reg_spline_computeJacGradient3D = 768; // 37 reg
-        reg_spline_approxCorrectFolding3D = 768; // 34 reg
-        reg_spline_correctFolding3D = 768; // 34 reg
-        reg_defField_compose2D = 1024; // 23 reg
-        reg_defField_compose3D = 1024; // 24 reg
-        reg_defField_getJacobianMatrix = 768; // 34 reg
+        GetDeformationField2d = 1024; // 34 reg
+        GetDeformationField3d = 1024; // 34 reg
+        GetApproxJacobianValues2d = 768; // 34 reg
+        GetApproxJacobianValues3d = 640; // 46 reg
+        ApproxLinearEnergyGradient = 768; // 40 reg
+        GetJacobianValues2d = 768; // 34 reg
+        GetJacobianValues3d = 768; // 34 reg
+        LogSquaredValues = 1024; // 23 reg
+        ComputeApproxJacGradient2d = 768; // 34 reg
+        ComputeApproxJacGradient3d = 768; // 38 reg
+        ComputeJacGradient2d = 768; // 34 reg
+        ComputeJacGradient3d = 768; // 37 reg
+        ApproxCorrectFolding3d = 768; // 34 reg
+        CorrectFolding3d = 768; // 34 reg
+        DefFieldCompose2d = 1024; // 23 reg
+        DefFieldCompose3d = 1024; // 24 reg
+        GetJacobianMatrix = 768; // 34 reg
         reg_voxelCentricToNodeCentric = 1024; // 23 reg
         reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
         reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
-        Arithmetic = 1024; //
+        Arithmetic = 1024;
         NR_FUNC_CALLED();
     }
 };
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 99030c7b..f24f1cad 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -65,6 +65,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaF3dContent.cpp
     CudaKernelConvolution.cu
     CudaKernelFactory.cpp
+    CudaLocalTransformation.cu
     CudaLtsKernel.cpp
     CudaMeasure.cpp
     CudaNormaliseGradient.cu
@@ -73,7 +74,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaResampling.cu
     resampleKernel.cu
     _reg_globalTransformation_gpu.cu
-    _reg_localTransformation_gpu.cu
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
     _reg_tools_gpu.cu
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 629ed5e0..f13d93e2 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -1,10 +1,10 @@
 #include "CudaCompute.h"
 #include "CudaF3dContent.h"
 #include "CudaKernelConvolution.hpp"
+#include "CudaLocalTransformation.hpp"
 #include "CudaNormaliseGradient.hpp"
 #include "CudaResampling.hpp"
 #include "CudaOptimiser.hpp"
-#include "_reg_localTransformation_gpu.h"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
@@ -24,43 +24,43 @@ void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
 /* *************************************************************** */
 double CudaCompute::GetJacobianPenaltyTerm(bool approx) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    return reg_spline_getJacobianPenaltyTerm_gpu(con.F3dContent::GetReference(),
-                                                 con.F3dContent::GetControlPointGrid(),
-                                                 con.GetControlPointGridCuda(),
-                                                 approx);
+    return Cuda::GetJacobianPenaltyTerm(con.F3dContent::GetReference(),
+                                        con.F3dContent::GetControlPointGrid(),
+                                        con.GetControlPointGridCuda(),
+                                        approx);
 }
 /* *************************************************************** */
 void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_spline_getJacobianPenaltyTermGradient_gpu(con.F3dContent::GetReference(),
-                                                  con.F3dContent::GetControlPointGrid(),
-                                                  con.GetControlPointGridCuda(),
-                                                  con.GetTransformationGradientCuda(),
-                                                  weight,
-                                                  approx);
+    Cuda::GetJacobianPenaltyTermGradient(con.F3dContent::GetReference(),
+                                         con.F3dContent::GetControlPointGrid(),
+                                         con.GetControlPointGridCuda(),
+                                         con.GetTransformationGradientCuda(),
+                                         weight,
+                                         approx);
 }
 /* *************************************************************** */
 double CudaCompute::CorrectFolding(bool approx) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    return reg_spline_correctFolding_gpu(con.F3dContent::GetReference(),
-                                         con.F3dContent::GetControlPointGrid(),
-                                         con.GetControlPointGridCuda(),
-                                         approx);
+    return Cuda::CorrectFolding(con.F3dContent::GetReference(),
+                                con.F3dContent::GetControlPointGrid(),
+                                con.GetControlPointGridCuda(),
+                                approx);
 }
 /* *************************************************************** */
 double CudaCompute::ApproxBendingEnergy() {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    auto approxBendingEnergy = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergy_gpu<true> :
-                                                          reg_spline_approxBendingEnergy_gpu<false>;
+    auto approxBendingEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergy<true> :
+                                                          Cuda::ApproxBendingEnergy<false>;
     return approxBendingEnergy(controlPointGrid, con.GetControlPointGridCuda());
 }
 /* *************************************************************** */
 void CudaCompute::ApproxBendingEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergyGradient_gpu<true> :
-                                                                  reg_spline_approxBendingEnergyGradient_gpu<false>;
+    auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergyGradient<true> :
+                                                                  Cuda::ApproxBendingEnergyGradient<false>;
     approxBendingEnergyGradient(controlPointGrid,
                                 con.GetControlPointGridCuda(),
                                 con.GetTransformationGradientCuda(),
@@ -70,16 +70,16 @@ void CudaCompute::ApproxBendingEnergyGradient(float weight) {
 double CudaCompute::ApproxLinearEnergy() {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    auto approxLinearEnergy = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergy_gpu<true> :
-                                                         reg_spline_approxLinearEnergy_gpu<false>;
+    auto approxLinearEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergy<true> :
+                                                         Cuda::ApproxLinearEnergy<false>;
     return approxLinearEnergy(controlPointGrid, con.GetControlPointGridCuda());
 }
 /* *************************************************************** */
 void CudaCompute::ApproxLinearEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergyGradient_gpu<true> :
-                                                                 reg_spline_approxLinearEnergyGradient_gpu<false>;
+    auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergyGradient<true> :
+                                                                 Cuda::ApproxLinearEnergyGradient<false>;
     approxLinearEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), weight);
 }
 /* *************************************************************** */
@@ -99,14 +99,14 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(),
-                                       con.F3dContent::GetReference(),
-                                       con.GetControlPointGridCuda(),
-                                       con.GetDeformationFieldCuda(),
-                                       con.GetReferenceMaskCuda(),
-                                       con.GetActiveVoxelNumber(),
-                                       composition,
-                                       bspline);
+    Cuda::GetDeformationField(con.F3dContent::GetControlPointGrid(),
+                              con.F3dContent::GetReference(),
+                              con.GetControlPointGridCuda(),
+                              con.GetDeformationFieldCuda(),
+                              con.GetReferenceMaskCuda(),
+                              con.GetActiveVoxelNumber(),
+                              composition,
+                              bspline);
 }
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>
@@ -220,11 +220,11 @@ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) {
 /* *************************************************************** */
 void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    reg_spline_getDefFieldFromVelocityGrid_gpu(con.F3dContent::GetControlPointGrid(),
-                                               con.F3dContent::GetDeformationField(),
-                                               con.GetControlPointGridCuda(),
-                                               con.GetDeformationFieldCuda(),
-                                               updateStepNumber);
+    Cuda::GetDefFieldFromVelocityGrid(con.F3dContent::GetControlPointGrid(),
+                                      con.F3dContent::GetDeformationField(),
+                                      con.GetControlPointGridCuda(),
+                                      con.GetDeformationFieldCuda(),
+                                      updateStepNumber);
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
@@ -316,6 +316,6 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
     thrust::device_vector<float4> defFieldCuda(voxelNumber);
     Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
-    reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
+    Cuda::DefFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/CudaLocalTransformation.cu
old mode 100755
new mode 100644
similarity index 71%
rename from reg-lib/cuda/_reg_localTransformation_gpu.cu
rename to reg-lib/cuda/CudaLocalTransformation.cu
index ac5be2b0..89fe20cf
--- a/reg-lib/cuda/_reg_localTransformation_gpu.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_spline_gpu.cu
+ *  CudaLocalTransformation.cu
  *
  *
  *  Created by Marc Modat on 24/03/2009.
@@ -10,20 +10,22 @@
  *
  */
 
-#include "_reg_localTransformation_gpu.h"
-#include "_reg_localTransformation_kernels.cu"
+#include "CudaLocalTransformation.hpp"
+#include "CudaLocalTransformationKernels.cu"
 #include "_reg_globalTransformation_gpu.h"
 #include "_reg_splineBasis.h"
 
 /* *************************************************************** */
-void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
-                                        const nifti_image *referenceImage,
-                                        const float4 *controlPointImageCuda,
-                                        float4 *deformationFieldCuda,
-                                        const int *maskCuda,
-                                        const size_t activeVoxelNumber,
-                                        const bool composition,
-                                        const bool bspline) {
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+void GetDeformationField(const nifti_image *controlPointImage,
+                         const nifti_image *referenceImage,
+                         const float4 *controlPointImageCuda,
+                         float4 *deformationFieldCuda,
+                         const int *maskCuda,
+                         const size_t activeVoxelNumber,
+                         const bool composition,
+                         const bool bspline) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -42,38 +44,38 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
     }
 
     if (referenceImage->nz > 1) {
-        const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D;
+        const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
-        reg_spline_getDeformationField3D<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
-                                                                                              *controlPointTexture,
-                                                                                              *maskTexture,
-                                                                                              realToVoxel.data().get(),
-                                                                                              referenceImageDim,
-                                                                                              controlPointImageDim,
-                                                                                              controlPointVoxelSpacing,
-                                                                                              (unsigned)activeVoxelNumber,
-                                                                                              composition,
-                                                                                              bspline);
+        GetDeformationField3d<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
+                                                                                   *controlPointTexture,
+                                                                                   *maskTexture,
+                                                                                   realToVoxel.data().get(),
+                                                                                   referenceImageDim,
+                                                                                   controlPointImageDim,
+                                                                                   controlPointVoxelSpacing,
+                                                                                   (unsigned)activeVoxelNumber,
+                                                                                   composition,
+                                                                                   bspline);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D;
+        const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField2d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 4 floats of shared memory are allocated per thread
-        reg_spline_getDeformationField2D<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
-                                                                                              *controlPointTexture,
-                                                                                              *maskTexture,
-                                                                                              realToVoxel.data().get(),
-                                                                                              referenceImageDim,
-                                                                                              controlPointImageDim,
-                                                                                              controlPointVoxelSpacing,
-                                                                                              (unsigned)activeVoxelNumber,
-                                                                                              composition,
-                                                                                              bspline);
+        GetDeformationField2d<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
+                                                                                   *controlPointTexture,
+                                                                                   *maskTexture,
+                                                                                   realToVoxel.data().get(),
+                                                                                   referenceImageDim,
+                                                                                   controlPointImageDim,
+                                                                                   controlPointVoxelSpacing,
+                                                                                   (unsigned)activeVoxelNumber,
+                                                                                   composition,
+                                                                                   bspline);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
@@ -146,7 +148,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index
 }
 /* *************************************************************** */
 template<bool is3d>
-double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
+double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
@@ -174,15 +176,14 @@ double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
                     Square(secondDerivative.yy.y) + 2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y)));
     }, 0.0, thrust::plus<double>()) / static_cast<double>(controlPointImage->nvox);
 }
-template double reg_spline_approxBendingEnergy_gpu<false>(const nifti_image*, const float4*);
-template double reg_spline_approxBendingEnergy_gpu<true>(const nifti_image*, const float4*);
+template double ApproxBendingEnergy<false>(const nifti_image*, const float4*);
+template double ApproxBendingEnergy<true>(const nifti_image*, const float4*);
 /* *************************************************************** */
 template<bool is3d>
-void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-                                                float4 *controlPointImageCuda,
-                                                float4 *transGradientCuda,
-                                                float bendingEnergyWeight) {
-    auto blockSize = CudaContext::GetBlockSize();
+void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
+                                 float4 *controlPointImageCuda,
+                                 float4 *transGradientCuda,
+                                 float bendingEnergyWeight) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
@@ -195,7 +196,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
     else
         set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy);
 
-    reg_getDisplacementFromDeformation_gpu(controlPointImage, controlPointImageCuda);
+    GetDisplacementFromDeformation(controlPointImage, controlPointImageCuda);
 
     // First compute all the second derivatives
     thrust::device_vector<typename SecondDerivative<is3d>::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber);
@@ -276,15 +277,15 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
         transGradientCuda[index] = nodeGradVal;
     });
 
-    reg_getDeformationFromDisplacement_gpu(controlPointImage, controlPointImageCuda);
+    GetDeformationFromDisplacement(controlPointImage, controlPointImageCuda);
 }
-template void reg_spline_approxBendingEnergyGradient_gpu<false>(nifti_image*, float4*, float4*, float);
-template void reg_spline_approxBendingEnergyGradient_gpu<true>(nifti_image*, float4*, float4*, float);
+template void ApproxBendingEnergyGradient<false>(nifti_image*, float4*, float4*, float);
+template void ApproxBendingEnergyGradient<true>(nifti_image*, float4*, float4*, float);
 /* *************************************************************** */
-void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage,
-                                            const float4 *controlPointImageCuda,
-                                            float *jacobianMatricesCuda,
-                                            float *jacobianDetCuda) {
+void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
+                                 const float4 *controlPointImageCuda,
+                                 float *jacobianMatricesCuda,
+                                 float *jacobianDetCuda) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -295,29 +296,29 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage
 
     // The Jacobian matrix is computed for every control point
     if (controlPointImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D;
+        const unsigned blocks = blockSize->GetApproxJacobianValues3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxJacobianValues3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                                             controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+        GetApproxJacobianValues3d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                           controlPointImageDim, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D;
+        const unsigned blocks = blockSize->GetApproxJacobianValues2d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getApproxJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                                             controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+        GetApproxJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                           controlPointImageDim, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
-void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
-                                      const nifti_image *referenceImage,
-                                      const float4 *controlPointImageCuda,
-                                      float *jacobianMatricesCuda,
-                                      float *jacobianDetCuda) {
+void ComputeJacobianValues(const nifti_image *controlPointImage,
+                           const nifti_image *referenceImage,
+                           const float4 *controlPointImageCuda,
+                           float *jacobianMatricesCuda,
+                           float *jacobianDetCuda) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
@@ -331,32 +332,32 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage,
 
     // The Jacobian matrix is computed for every voxel
     if (controlPointImage->nz > 1) {
-        const unsigned blocks = blockSize->reg_spline_getJacobianValues3D;
+        const unsigned blocks = blockSize->GetJacobianValues3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
         const unsigned sharedMemSize = blocks * 8 * sizeof(float);
-        reg_spline_getJacobianValues3D_kernel<<<gridDims, blockDims, sharedMemSize>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                                                      controlPointImageDim, controlPointSpacing, referenceImageDim,
-                                                                                      (unsigned)voxelNumber, reorientation);
+        GetJacobianValues3d<<<gridDims, blockDims, sharedMemSize>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                                    controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                                    (unsigned)voxelNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = blockSize->reg_spline_getJacobianValues2D;
+        const unsigned blocks = blockSize->GetJacobianValues2d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_spline_getJacobianValues2D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                                       controlPointImageDim, controlPointSpacing, referenceImageDim,
-                                                                       (unsigned)voxelNumber, reorientation);
+        GetJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
+                                                     controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                     (unsigned)voxelNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
-double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
-                                             const nifti_image *controlPointImage,
-                                             const float4 *controlPointImageCuda,
-                                             const bool approx) {
+double GetJacobianPenaltyTerm(const nifti_image *referenceImage,
+                              const nifti_image *controlPointImage,
+                              const float4 *controlPointImageCuda,
+                              const bool approx) {
     // The Jacobian matrices and determinants are computed
     float *jacobianMatricesCuda, *jacobianDetCuda;
     size_t jacNumber; double jacSum;
@@ -368,23 +369,23 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
         // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
         jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
         jacSum = static_cast<double>(jacNumber);
         // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
     NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
 
     // The Jacobian determinant are squared and logged (might not be english but will do)
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues;
+    const unsigned blocks = CudaContext::GetBlockSize()->LogSquaredValues;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDetCuda, (unsigned)jacNumber);
+    LogSquaredValues<<<gridDims, blockDims>>>(jacobianDetCuda, (unsigned)jacNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Perform the reduction
@@ -393,12 +394,12 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
     return penaltyTermValue / jacSum;
 }
 /* *************************************************************** */
-void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage,
-                                                   const nifti_image *controlPointImage,
-                                                   const float4 *controlPointImageCuda,
-                                                   float4 *transGradientCuda,
-                                                   const float jacobianWeight,
-                                                   const bool approx) {
+void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
+                                    const nifti_image *controlPointImage,
+                                    const float4 *controlPointImageCuda,
+                                    float4 *transGradientCuda,
+                                    const float jacobianWeight,
+                                    const bool approx) {
     auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
@@ -409,13 +410,13 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
         // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
         jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
         // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float)));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float)));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
 
     // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
@@ -432,22 +433,22 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
                                                              cudaChannelFormatKindFloat, 1);
     if (approx) {
         if (controlPointImage->nz > 1) {
-            const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D;
+            const unsigned blocks = blockSize->ComputeApproxJacGradient3d;
             const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
-            reg_spline_computeApproxJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                                  *jacobianMatricesTexture, controlPointImageDim,
-                                                                                  (unsigned)controlPointNumber, reorientation, weight);
+            ComputeApproxJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                *jacobianMatricesTexture, controlPointImageDim,
+                                                                (unsigned)controlPointNumber, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
-            const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D;
+            const unsigned blocks = blockSize->ComputeApproxJacGradient2d;
             const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
-            reg_spline_computeApproxJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                                  *jacobianMatricesTexture, controlPointImageDim,
-                                                                                  (unsigned)controlPointNumber, reorientation, weight);
+            ComputeApproxJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                                *jacobianMatricesTexture, controlPointImageDim,
+                                                                (unsigned)controlPointNumber, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     } else {
@@ -456,24 +457,24 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
         if (controlPointImage->nz > 1) {
-            const unsigned blocks = blockSize->reg_spline_computeJacGradient3D;
+            const unsigned blocks = blockSize->ComputeJacGradient3d;
             const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
-            reg_spline_computeJacGradient3D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                            *jacobianMatricesTexture, controlPointImageDim,
-                                                                            controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                                            referenceImageDim, reorientation, weight);
+            ComputeJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                          *jacobianMatricesTexture, controlPointImageDim,
+                                                          controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                          referenceImageDim, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
-            const unsigned blocks = blockSize->reg_spline_computeJacGradient2D;
+            const unsigned blocks = blockSize->ComputeJacGradient2d;
             const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
-            reg_spline_computeJacGradient2D_kernel<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                            *jacobianMatricesTexture, controlPointImageDim,
-                                                                            controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                                            referenceImageDim, reorientation, weight);
+            ComputeJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
+                                                          *jacobianMatricesTexture, controlPointImageDim,
+                                                          controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                          referenceImageDim, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     }
@@ -481,10 +482,10 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI
     NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda));
 }
 /* *************************************************************** */
-double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
-                                     const nifti_image *controlPointImage,
-                                     float4 *controlPointImageCuda,
-                                     const bool approx) {
+double CorrectFolding(const nifti_image *referenceImage,
+                      const nifti_image *controlPointImage,
+                      float4 *controlPointImageCuda,
+                      const bool approx) {
     auto blockSize = CudaContext::GetBlockSize();
 
     // The Jacobian matrices and determinants are computed
@@ -497,25 +498,25 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
         jacobianDetSize = jacNumber * sizeof(float);
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
-        reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     } else {
         jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
         jacSum = static_cast<double>(jacNumber);
         jacobianDetSize = jacNumber * sizeof(float);
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize));
         NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize));
-        reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
+        ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda);
     }
 
     // Check if the Jacobian determinant average
     float *jacobianDet2Cuda;
     NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize));
     NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice));
-    const unsigned blocks = blockSize->reg_spline_logSquaredValues;
+    const unsigned blocks = blockSize->LogSquaredValues;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_spline_logSquaredValues_kernel<<<gridDims, blockDims>>>(jacobianDet2Cuda, (unsigned)jacNumber);
+    LogSquaredValues<<<gridDims, blockDims>>>(jacobianDet2Cuda, (unsigned)jacNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     float *jacobianDet;
     NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet, jacobianDetSize));
@@ -540,27 +541,27 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
     auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1);
     auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1);
     if (approx) {
-        const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D;
+        const unsigned blocks = blockSize->ApproxCorrectFolding3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_spline_approxCorrectFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
-                                                                          *jacobianMatricesTexture, controlPointImageDim,
-                                                                          controlPointSpacing, (unsigned)controlPointNumber, reorientation);
+        ApproxCorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
+                                                        *jacobianMatricesTexture, controlPointImageDim,
+                                                        controlPointSpacing, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
         const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
-        const unsigned blocks = blockSize->reg_spline_correctFolding3D;
+        const unsigned blocks = blockSize->CorrectFolding3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_spline_correctFolding3D_kernel<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
-                                                                    *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing,
-                                                                    controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                                    referenceImageDim, reorientation);
+        CorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
+                                                  *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing,
+                                                  controlPointVoxelSpacing, (unsigned)controlPointNumber,
+                                                  referenceImageDim, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
     NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
@@ -569,7 +570,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
 }
 /* *************************************************************** */
 template<bool is3d, bool reverse = false>
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) {
+void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) {
     // Bind the qform or sform
     const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
@@ -578,7 +579,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCud
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), voxelNumber, [=]__device__(const unsigned index) {
         auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, imageDim);
 
-        const float4 initialPosition = {
+        const float4 initialPosition{
             float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3],
             float(x) * affineMatrix.m[1][0] + float(y) * affineMatrix.m[1][1] + (is3d ? float(z) * affineMatrix.m[1][2] : 0.f) + affineMatrix.m[1][3],
             is3d ? float(x) * affineMatrix.m[2][0] + float(y) * affineMatrix.m[2][1] + float(z) * affineMatrix.m[2][2] + affineMatrix.m[2][3] : 0.f,
@@ -605,35 +606,35 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCud
     }
 }
 /* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) {
+void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) {
     if (image->nu == 2)
-        reg_getDeformationFromDisplacement_gpu<false>(image, imageCuda);
+        GetDeformationFromDisplacement<false>(image, imageCuda);
     else if (image->nu == 3)
-        reg_getDeformationFromDisplacement_gpu<true>(image, imageCuda);
+        GetDeformationFromDisplacement<true>(image, imageCuda);
     else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
 }
 /* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda) {
+void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda) {
     if (image->nu == 2)
-        reg_getDeformationFromDisplacement_gpu<false, true>(image, imageCuda);
+        GetDeformationFromDisplacement<false, true>(image, imageCuda);
     else if (image->nu == 3)
-        reg_getDeformationFromDisplacement_gpu<true, true>(image, imageCuda);
+        GetDeformationFromDisplacement<true, true>(image, imageCuda);
     else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields");
 }
 /* *************************************************************** */
-void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
-                                                 nifti_image *flowField,
-                                                 float4 *velocityFieldGridCuda,
-                                                 float4 *flowFieldCuda,
-                                                 const int *maskCuda,
-                                                 const size_t activeVoxelNumber) {
+void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
+                                  nifti_image *flowField,
+                                  float4 *velocityFieldGridCuda,
+                                  float4 *flowFieldCuda,
+                                  const int *maskCuda,
+                                  const size_t activeVoxelNumber) {
     // Check first if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
         NR_FATAL_ERROR("The provided grid is not a velocity field");
 
     // Initialise the flow field with an identity transformation
     flowField->intent_p1 = DISP_VEL_FIELD;
-    reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda);
+    GetDeformationFromDisplacement(flowField, flowFieldCuda);
 
     // fake the number of extension here to avoid the second half of the affine
     const auto oldNumExt = velocityFieldGrid->num_ext;
@@ -643,21 +644,21 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
     // Copy over the number of required squaring steps
     flowField->intent_p2 = velocityFieldGrid->intent_p2;
     // The initial flow field is generated using cubic B-Spline interpolation/approximation
-    reg_spline_getDeformationField_gpu(velocityFieldGrid,
-                                       flowField,
-                                       velocityFieldGridCuda,
-                                       flowFieldCuda,
-                                       maskCuda,
-                                       activeVoxelNumber,
-                                       true,  // composition
-                                       true); // bspline
+    GetDeformationField(velocityFieldGrid,
+                        flowField,
+                        velocityFieldGridCuda,
+                        flowFieldCuda,
+                        maskCuda,
+                        activeVoxelNumber,
+                        true,  // composition
+                        true); // bspline
 
     velocityFieldGrid->num_ext = oldNumExt;
 }
 /* *************************************************************** */
-void reg_defField_compose_gpu(const nifti_image *deformationField,
-                              const float4 *deformationFieldCuda,
-                              float4 *deformationFieldCudaOut) {
+void DefFieldCompose(const nifti_image *deformationField,
+                     const float4 *deformationFieldCuda,
+                     float4 *deformationFieldCudaOut) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz };
@@ -666,30 +667,30 @@ void reg_defField_compose_gpu(const nifti_image *deformationField,
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
 
     if (deformationField->nz > 1) {
-        const unsigned blocks = blockSize->reg_defField_compose3D;
+        const unsigned blocks = blockSize->DefFieldCompose3d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_defField_compose3D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
-                                                               (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
+        DefFieldCompose3d<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
+                                                   (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const unsigned blocks = blockSize->reg_defField_compose2D;
+        const unsigned blocks = blockSize->DefFieldCompose2d;
         const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
-        reg_defField_compose2D_kernel<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
-                                                               (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
+        DefFieldCompose2d<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
+                                                   (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
 /* *************************************************************** */
-void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
-                                                       nifti_image *deformationField,
-                                                       float4 *flowFieldCuda,
-                                                       float4 *deformationFieldCuda,
-                                                       const int *maskCuda,
-                                                       const bool updateStepNumber) {
+void GetDeformationFieldFromFlowField(nifti_image *flowField,
+                                      nifti_image *deformationField,
+                                      float4 *flowFieldCuda,
+                                      float4 *deformationFieldCuda,
+                                      const int *maskCuda,
+                                      const bool updateStepNumber) {
     // Check first if the velocity field is actually a velocity field
     if (flowField->intent_p1 != DEF_VEL_FIELD)
         NR_FATAL_ERROR("The provided field is not a velocity field");
@@ -708,7 +709,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
                                                affineOnly, affineOnlyCuda.data().get());
             reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get());
         }
-    } else reg_getDisplacementFromDeformation_gpu(flowField, flowFieldCuda);
+    } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
 
     // Compute the number of scaling value to ensure unfolded transformation
     int squaringNumber = 1;
@@ -740,7 +741,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
     reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue);
 
     // Conversion from displacement to deformation
-    reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda);
+    GetDeformationFromDisplacement(flowField, flowFieldCuda);
 
     // The computed scaled deformation field is copied over
     thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
@@ -748,14 +749,14 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
     // The deformation field is squared
     for (int i = 0; i < squaringNumber; ++i) {
         // The deformation field is applied to itself
-        reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda);
+        DefFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda);
         // The computed scaled deformation field is copied over
         thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
         NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
     }
     // The affine component of the transformation is restored
     if (affineOnly) {
-        reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda);
+        GetDisplacementFromDeformation(deformationField, deformationFieldCuda);
         reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get());
     }
     deformationField->intent_p1 = DEF_FIELD;
@@ -766,11 +767,11 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField,
                                            deformationField, deformationFieldCuda, true);
 }
 /* *************************************************************** */
-void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
-                                                nifti_image *deformationField,
-                                                float4 *velocityFieldGridCuda,
-                                                float4 *deformationFieldCuda,
-                                                const bool updateStepNumber) {
+void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
+                                 nifti_image *deformationField,
+                                 float4 *velocityFieldGridCuda,
+                                 float4 *deformationFieldCuda,
+                                 const bool updateStepNumber) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
 
     // Create a mask array where no voxel is excluded
@@ -783,14 +784,14 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
     // Check if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) {
         // Use the spline approximation to generate the deformation field
-        reg_spline_getDeformationField_gpu(velocityFieldGrid,
-                                           deformationField,
-                                           velocityFieldGridCuda,
-                                           deformationFieldCuda,
-                                           maskCuda.data().get(),
-                                           voxelNumber,
-                                           false, // composition
-                                           true); // bspline
+        GetDeformationField(velocityFieldGrid,
+                            deformationField,
+                            velocityFieldGridCuda,
+                            deformationFieldCuda,
+                            maskCuda.data().get(),
+                            voxelNumber,
+                            false, // composition
+                            true); // bspline
     } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
         NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo);
@@ -805,36 +806,36 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
         thrust::device_vector<float4> flowFieldCuda(flowField.nVoxelsPerVolume());
 
         // Generate the velocity field
-        reg_spline_getFlowFieldFromVelocityGrid_gpu(velocityFieldGrid, flowField, velocityFieldGridCuda,
-                                                    flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber);
+        GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda,
+                                     flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber);
         // Exponentiate the flow field
-        reg_defField_getDeformationFieldFromFlowField_gpu(flowField, deformationField, flowFieldCuda.data().get(),
-                                                          deformationFieldCuda, maskCuda.data().get(), updateStepNumber);
+        GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCuda.data().get(),
+                                         deformationFieldCuda, maskCuda.data().get(), updateStepNumber);
         // Update the number of step required. No action otherwise
         velocityFieldGrid->intent_p2 = flowField->intent_p2;
     } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
-void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
-                                        const float4 *deformationFieldCuda,
-                                        float *jacobianMatricesCuda) {
+void GetJacobianMatrix(const nifti_image *deformationField,
+                       const float4 *deformationFieldCuda,
+                       float *jacobianMatricesCuda) {
     const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz);
     auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix;
+    const unsigned blocks = CudaContext::GetBlockSize()->GetJacobianMatrix;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_defField_getJacobianMatrix3D_kernel<<<gridDims, blockDims>>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim,
-                                                                     (unsigned)voxelNumber, reorientation);
+    GetJacobianMatrix3d<<<gridDims, blockDims>>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim,
+                                                 (unsigned)voxelNumber, reorientation);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
 template<bool is3d>
-double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
-                                         const float4 *controlPointGridCuda) {
+double ApproxLinearEnergy(const nifti_image *controlPointGrid,
+                          const float4 *controlPointGridCuda) {
     const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
 
@@ -863,14 +864,14 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
         return currentValue;
     }, 0.0, thrust::plus<double>()) / static_cast<double>(controlPointGrid->nvox);
 }
-template double reg_spline_approxLinearEnergy_gpu<false>(const nifti_image*, const float4*);
-template double reg_spline_approxLinearEnergy_gpu<true>(const nifti_image*, const float4*);
+template double ApproxLinearEnergy<false>(const nifti_image*, const float4*);
+template double ApproxLinearEnergy<true>(const nifti_image*, const float4*);
 /* *************************************************************** */
 template<bool is3d>
-void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
-                                               const float4 *controlPointGridCuda,
-                                               float4 *transGradCuda,
-                                               const float weight) {
+void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid,
+                                const float4 *controlPointGridCuda,
+                                float4 *transGradCuda,
+                                const float weight) {
     const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
     const float approxRatio = weight / static_cast<float>(voxelNumber);
@@ -887,7 +888,7 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
         set_first_order_basis_values(basis.x, basis.y);
 
     // Kernel dims
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient;
+    const unsigned blocks = CudaContext::GetBlockSize()->ApproxLinearEnergyGradient;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
@@ -900,15 +901,17 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr
     auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1);
 
     // Create the displacement matrices
-    reg_spline_createDisplacementMatrices_kernel<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
-                                                                                cppDims, basis, reorientation, (unsigned)voxelNumber);
+    CreateDisplacementMatrices<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
+                                                              cppDims, basis, reorientation, (unsigned)voxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Compute the gradient
-    reg_spline_approxLinearEnergyGradient_kernel<is3d><<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
-                                                                                approxRatio, basis, invReorientation, (unsigned)voxelNumber);
+    ApproxLinearEnergyGradientKernel<is3d><<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
+                                                                    approxRatio, basis, invReorientation, (unsigned)voxelNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
-template void reg_spline_approxLinearEnergyGradient_gpu<false>(const nifti_image*, const float4*, float4*, const float);
-template void reg_spline_approxLinearEnergyGradient_gpu<true>(const nifti_image*, const float4*, float4*, const float);
+template void ApproxLinearEnergyGradient<false>(const nifti_image*, const float4*, float4*, const float);
+template void ApproxLinearEnergyGradient<true>(const nifti_image*, const float4*, float4*, const float);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
new file mode 100644
index 00000000..fd59d4e2
--- /dev/null
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -0,0 +1,85 @@
+/*
+ *  CudaLocalTransformation.hpp
+ *
+ *
+ *  Created by Marc Modat on 24/03/2009.
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
+
+#pragma once
+
+#include "_reg_tools_gpu.h"
+
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda);
+/* *************************************************************** */
+void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda);
+/* *************************************************************** */
+void GetDeformationField(const nifti_image *controlPointImage,
+                         const nifti_image *referenceImage,
+                         const float4 *controlPointImageCuda,
+                         float4 *deformationFieldCuda,
+                         const int *maskCuda,
+                         const size_t activeVoxelNumber,
+                         const bool composition,
+                         const bool bspline);
+/* *************************************************************** */
+template<bool is3d>
+double ApproxBendingEnergy(const nifti_image *controlPointImage,
+                           const float4 *controlPointImageCuda);
+/* *************************************************************** */
+template<bool is3d>
+void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
+                                 float4 *controlPointImageCuda,
+                                 float4 *transGradientCuda,
+                                 float bendingEnergyWeight);
+/* *************************************************************** */
+double GetJacobianPenaltyTerm(const nifti_image *referenceImage,
+                              const nifti_image *controlPointImage,
+                              const float4 *controlPointImageCuda,
+                              const bool approx);
+/* *************************************************************** */
+void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
+                                    const nifti_image *controlPointImage,
+                                    const float4 *controlPointImageCuda,
+                                    float4 *transGradientCuda,
+                                    const float jacobianWeight,
+                                    const bool approx);
+/* *************************************************************** */
+double CorrectFolding(const nifti_image *referenceImage,
+                      const nifti_image *controlPointImage,
+                      float4 *controlPointImageCuda,
+                      const bool approx);
+/* *************************************************************** */
+void DefFieldCompose(const nifti_image *deformationField,
+                     const float4 *deformationFieldCuda,
+                     float4 *deformationFieldOutCuda);
+/* *************************************************************** */
+void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
+                                 nifti_image *deformationField,
+                                 float4 *velocityFieldGridCuda,
+                                 float4 *deformationFieldCuda,
+                                 const bool updateStepNumber);
+/* *************************************************************** */
+void GetJacobianMatrix(const nifti_image *deformationField,
+                       const float4 *deformationFieldCuda,
+                       float *jacobianMatricesCuda);
+/* *************************************************************** */
+template<bool is3d>
+double ApproxLinearEnergy(const nifti_image *controlPointGrid,
+                          const float4 *controlPointGridCuda);
+/* *************************************************************** */
+template<bool is3d>
+void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid,
+                                const float4 *controlPointGridCuda,
+                                float4 *transGradCuda,
+                                const float weight);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
old mode 100755
new mode 100644
similarity index 85%
rename from reg-lib/cuda/_reg_localTransformation_kernels.cu
rename to reg-lib/cuda/CudaLocalTransformationKernels.cu
index 342864aa..bdc483cb
--- a/reg-lib/cuda/_reg_localTransformation_kernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_localTransformation_kernels.cu
+ *  CudaLocalTransformationKernels.cu
  *
  *
  *  Created by Marc Modat on 24/03/2009.
@@ -12,6 +12,8 @@
 
 #include "_reg_common_cuda_kernels.cu"
 
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
 /* *************************************************************** */
 __device__ void GetBasisBSplineValues(const float basis, float *values) {
     const float ff = Square(basis);
@@ -166,16 +168,16 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
-__global__ void reg_spline_getDeformationField3D(float4 *deformationField,
-                                                 cudaTextureObject_t controlPointTexture,
-                                                 cudaTextureObject_t maskTexture,
-                                                 const mat44 *realToVoxel,
-                                                 const int3 referenceImageDim,
-                                                 const int3 controlPointImageDim,
-                                                 const float3 controlPointVoxelSpacing,
-                                                 const unsigned activeVoxelNumber,
-                                                 const bool composition,
-                                                 const bool bspline) {
+__global__ void GetDeformationField3d(float4 *deformationField,
+                                      cudaTextureObject_t controlPointTexture,
+                                      cudaTextureObject_t maskTexture,
+                                      const mat44 *realToVoxel,
+                                      const int3 referenceImageDim,
+                                      const int3 controlPointImageDim,
+                                      const float3 controlPointVoxelSpacing,
+                                      const unsigned activeVoxelNumber,
+                                      const bool composition,
+                                      const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     int3 nodePre;
@@ -254,16 +256,16 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField,
     deformationField[tid] = displacement;
 }
 /* *************************************************************** */
-__global__ void reg_spline_getDeformationField2D(float4 *deformationField,
-                                                 cudaTextureObject_t controlPointTexture,
-                                                 cudaTextureObject_t maskTexture,
-                                                 const mat44 *realToVoxel,
-                                                 const int3 referenceImageDim,
-                                                 const int3 controlPointImageDim,
-                                                 const float3 controlPointVoxelSpacing,
-                                                 const unsigned activeVoxelNumber,
-                                                 const bool composition,
-                                                 const bool bspline) {
+__global__ void GetDeformationField2d(float4 *deformationField,
+                                      cudaTextureObject_t controlPointTexture,
+                                      cudaTextureObject_t maskTexture,
+                                      const mat44 *realToVoxel,
+                                      const int3 referenceImageDim,
+                                      const int3 controlPointImageDim,
+                                      const float3 controlPointVoxelSpacing,
+                                      const unsigned activeVoxelNumber,
+                                      const bool composition,
+                                      const bool bspline) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= activeVoxelNumber) return;
     int2 nodePre;
@@ -322,12 +324,12 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField,
     deformationField[tid] = displacement;
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices,
-                                                            float *jacobianDet,
-                                                            cudaTextureObject_t controlPointTexture,
-                                                            const int3 controlPointImageDim,
-                                                            const unsigned controlPointNumber,
-                                                            const mat33 reorientation) {
+__global__ void GetApproxJacobianValues2d(float *jacobianMatrices,
+                                          float *jacobianDet,
+                                          cudaTextureObject_t controlPointTexture,
+                                          const int3 controlPointImageDim,
+                                          const unsigned controlPointNumber,
+                                          const mat33 reorientation) {
     __shared__ float xbasis[9];
     __shared__ float ybasis[9];
 
@@ -383,12 +385,12 @@ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatri
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices,
-                                                            float *jacobianDet,
-                                                            cudaTextureObject_t controlPointTexture,
-                                                            const int3 controlPointImageDim,
-                                                            const unsigned controlPointNumber,
-                                                            const mat33 reorientation) {
+__global__ void GetApproxJacobianValues3d(float *jacobianMatrices,
+                                          float *jacobianDet,
+                                          cudaTextureObject_t controlPointTexture,
+                                          const int3 controlPointImageDim,
+                                          const unsigned controlPointNumber,
+                                          const mat33 reorientation) {
     __shared__ float xbasis[27];
     __shared__ float ybasis[27];
     __shared__ float zbasis[27];
@@ -474,14 +476,14 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
-                                                      float *jacobianDet,
-                                                      cudaTextureObject_t controlPointTexture,
-                                                      const int3 controlPointImageDim,
-                                                      const float3 controlPointSpacing,
-                                                      const int3 referenceImageDim,
-                                                      const unsigned voxelNumber,
-                                                      const mat33 reorientation) {
+__global__ void GetJacobianValues2d(float *jacobianMatrices,
+                                    float *jacobianDet,
+                                    cudaTextureObject_t controlPointTexture,
+                                    const int3 controlPointImageDim,
+                                    const float3 controlPointSpacing,
+                                    const int3 referenceImageDim,
+                                    const unsigned voxelNumber,
+                                    const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -543,14 +545,14 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
-                                                      float *jacobianDet,
-                                                      cudaTextureObject_t controlPointTexture,
-                                                      const int3 controlPointImageDim,
-                                                      const float3 controlPointSpacing,
-                                                      const int3 referenceImageDim,
-                                                      const unsigned voxelNumber,
-                                                      const mat33 reorientation) {
+__global__ void GetJacobianValues3d(float *jacobianMatrices,
+                                    float *jacobianDet,
+                                    cudaTextureObject_t controlPointTexture,
+                                    const int3 controlPointImageDim,
+                                    const float3 controlPointSpacing,
+                                    const int3 referenceImageDim,
+                                    const unsigned voxelNumber,
+                                    const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -647,7 +649,7 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned voxelNumber) {
+__global__ void LogSquaredValues(float *det, const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         const float val = logf(det[tid]);
@@ -655,7 +657,7 @@ __global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned vo
     }
 }
 /* *************************************************************** */
-__device__ void GetJacobianGradientValues2D(float *jacobianMatrix,
+__device__ void GetJacobianGradientValues2d(float *jacobianMatrix,
                                             float detJac,
                                             float basisX,
                                             float basisY,
@@ -664,7 +666,7 @@ __device__ void GetJacobianGradientValues2D(float *jacobianMatrix,
     jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]);
 }
 /* *************************************************************** */
-__device__ void GetJacobianGradientValues3D(float *jacobianMatrix,
+__device__ void GetJacobianGradientValues3d(float *jacobianMatrix,
                                             float detJac,
                                             float basisX,
                                             float basisY,
@@ -686,13 +688,13 @@ __device__ void GetJacobianGradientValues3D(float *jacobianMatrix,
         basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3]));
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient,
-                                                             cudaTextureObject_t jacobianDeterminantTexture,
-                                                             cudaTextureObject_t jacobianMatricesTexture,
-                                                             const int3 controlPointImageDim,
-                                                             const unsigned controlPointNumber,
-                                                             const mat33 reorientation,
-                                                             const float3 weight) {
+__global__ void ComputeApproxJacGradient2d(float4 *gradient,
+                                           cudaTextureObject_t jacobianDeterminantTexture,
+                                           cudaTextureObject_t jacobianMatricesTexture,
+                                           const int3 controlPointImageDim,
+                                           const unsigned controlPointNumber,
+                                           const mat33 reorientation,
+                                           const float3 weight) {
     __shared__ float xbasis[9];
     __shared__ float ybasis[9];
 
@@ -721,7 +723,7 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient,
                             jacobianMatrix[1] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 1);
                             jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 2);
                             jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 4 + 3);
-                            GetJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient);
+                            GetJacobianGradientValues2d(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient);
                         }
                     }
                     jacIndex++;
@@ -737,13 +739,13 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient,
-                                                             cudaTextureObject_t jacobianDeterminantTexture,
-                                                             cudaTextureObject_t jacobianMatricesTexture,
-                                                             const int3 controlPointImageDim,
-                                                             const unsigned controlPointNumber,
-                                                             const mat33 reorientation,
-                                                             const float3 weight) {
+__global__ void ComputeApproxJacGradient3d(float4 *gradient,
+                                           cudaTextureObject_t jacobianDeterminantTexture,
+                                           cudaTextureObject_t jacobianMatricesTexture,
+                                           const int3 controlPointImageDim,
+                                           const unsigned controlPointNumber,
+                                           const mat33 reorientation,
+                                           const float3 weight) {
     __shared__ float xbasis[27];
     __shared__ float ybasis[27];
     __shared__ float zbasis[27];
@@ -782,7 +784,7 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient,
                                     jacobianMatrix[6] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 6);
                                     jacobianMatrix[7] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 7);
                                     jacobianMatrix[8] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex * 9 + 8);
-                                    GetJacobianGradientValues3D(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient);
+                                    GetJacobianGradientValues3d(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient);
                                 }
                             }
                             jacIndex++;
@@ -801,15 +803,15 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient,
-                                                       cudaTextureObject_t jacobianDeterminantTexture,
-                                                       cudaTextureObject_t jacobianMatricesTexture,
-                                                       const int3 controlPointImageDim,
-                                                       const float3 controlPointVoxelSpacing,
-                                                       const unsigned controlPointNumber,
-                                                       const int3 referenceImageDim,
-                                                       const mat33 reorientation,
-                                                       const float3 weight) {
+__global__ void ComputeJacGradient2d(float4 *gradient,
+                                     cudaTextureObject_t jacobianDeterminantTexture,
+                                     cudaTextureObject_t jacobianMatricesTexture,
+                                     const int3 controlPointImageDim,
+                                     const float3 controlPointVoxelSpacing,
+                                     const unsigned controlPointNumber,
+                                     const int3 referenceImageDim,
+                                     const mat33 reorientation,
+                                     const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
@@ -843,7 +845,7 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient,
                             jacobianMatrix[2] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex++);
                             jacobianMatrix[3] = tex1Dfetch<float>(jacobianMatricesTexture, jacIndex);
                             const float2 basisValues = { xFirst * yBasis, xBasis * yFirst };
-                            GetJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient);
+                            GetJacobianGradientValues2d(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient);
                         }
                     }
                 }
@@ -856,15 +858,15 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient,
-                                                       cudaTextureObject_t jacobianDeterminantTexture,
-                                                       cudaTextureObject_t jacobianMatricesTexture,
-                                                       const int3 controlPointImageDim,
-                                                       const float3 controlPointVoxelSpacing,
-                                                       const unsigned controlPointNumber,
-                                                       const int3 referenceImageDim,
-                                                       const mat33 reorientation,
-                                                       const float3 weight) {
+__global__ void ComputeJacGradient3d(float4 *gradient,
+                                     cudaTextureObject_t jacobianDeterminantTexture,
+                                     cudaTextureObject_t jacobianMatricesTexture,
+                                     const int3 controlPointImageDim,
+                                     const float3 controlPointVoxelSpacing,
+                                     const unsigned controlPointNumber,
+                                     const int3 referenceImageDim,
+                                     const mat33 reorientation,
+                                     const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
@@ -917,7 +919,7 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient,
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    GetJacobianGradientValues3D(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient);
+                                    GetJacobianGradientValues3d(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient);
                                 }
                             }
                         }
@@ -933,13 +935,13 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient,
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid,
-                                                         cudaTextureObject_t jacobianDeterminantTexture,
-                                                         cudaTextureObject_t jacobianMatricesTexture,
-                                                         const int3 controlPointImageDim,
-                                                         const float3 controlPointSpacing,
-                                                         const unsigned controlPointNumber,
-                                                         const mat33 reorientation) {
+__global__ void ApproxCorrectFolding3d(float4 *controlPointGrid,
+                                       cudaTextureObject_t jacobianDeterminantTexture,
+                                       cudaTextureObject_t jacobianMatricesTexture,
+                                       const int3 controlPointImageDim,
+                                       const float3 controlPointSpacing,
+                                       const unsigned controlPointNumber,
+                                       const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
@@ -980,7 +982,7 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
+                                    GetJacobianGradientValues3d(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
                                 }
                             }
                         }
@@ -1002,15 +1004,15 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri
     }
 }
 /* *************************************************************** */
-__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
-                                                   cudaTextureObject_t jacobianDeterminantTexture,
-                                                   cudaTextureObject_t jacobianMatricesTexture,
-                                                   const int3 controlPointImageDim,
-                                                   const float3 controlPointSpacing,
-                                                   const float3 controlPointVoxelSpacing,
-                                                   const unsigned controlPointNumber,
-                                                   const int3 referenceImageDim,
-                                                   const mat33 reorientation) {
+__global__ void CorrectFolding3d(float4 *controlPointGrid,
+                                 cudaTextureObject_t jacobianDeterminantTexture,
+                                 cudaTextureObject_t jacobianMatricesTexture,
+                                 const int3 controlPointImageDim,
+                                 const float3 controlPointSpacing,
+                                 const float3 controlPointVoxelSpacing,
+                                 const unsigned controlPointNumber,
+                                 const int3 referenceImageDim,
+                                 const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
@@ -1057,7 +1059,7 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
                                         xBasis * yFirst * zBasis,
                                         xBasis * yBasis * zFirst
                                     };
-                                    GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
+                                    GetJacobianGradientValues3d(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection);
                                 }
                             }
                         }
@@ -1079,19 +1081,19 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid,
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose2D_kernel(float4 *deformationField,
-                                              cudaTextureObject_t deformationFieldTexture,
-                                              const int3 referenceImageDim,
-                                              const unsigned voxelNumber,
-                                              const mat44 affineMatrixB,
-                                              const mat44 affineMatrixC) {
+__global__ void DefFieldCompose2d(float4 *deformationField,
+                                  cudaTextureObject_t deformationFieldTexture,
+                                  const int3 referenceImageDim,
+                                  const unsigned voxelNumber,
+                                  const mat44 affineMatrixB,
+                                  const mat44 affineMatrixC) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         // Extract the original voxel position
         float4 position = deformationField[tid];
 
         // Conversion from real position to voxel coordinate
-        float4 voxelPosition = {
+        const float4 voxelPosition{
             position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3],
             position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3],
             0.f,
@@ -1123,19 +1125,19 @@ __global__ void reg_defField_compose2D_kernel(float4 *deformationField,
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_compose3D_kernel(float4 *deformationField,
-                                              cudaTextureObject_t deformationFieldTexture,
-                                              const int3 referenceImageDim,
-                                              const unsigned voxelNumber,
-                                              const mat44 affineMatrixB,
-                                              const mat44 affineMatrixC) {
+__global__ void DefFieldCompose3d(float4 *deformationField,
+                                  cudaTextureObject_t deformationFieldTexture,
+                                  const int3 referenceImageDim,
+                                  const unsigned voxelNumber,
+                                  const mat44 affineMatrixB,
+                                  const mat44 affineMatrixC) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         // Extract the original voxel position
         float4 position = deformationField[tid];
 
         // Conversion from real position to voxel coordinate
-        const float4 voxelPosition = {
+        const float4 voxelPosition{
             position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3],
             position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3],
             position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3],
@@ -1171,11 +1173,11 @@ __global__ void reg_defField_compose3D_kernel(float4 *deformationField,
     }
 }
 /* *************************************************************** */
-__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices,
-                                                        cudaTextureObject_t deformationFieldTexture,
-                                                        const int3 referenceImageDim,
-                                                        const unsigned voxelNumber,
-                                                        const mat33 reorientation) {
+__global__ void GetJacobianMatrix3d(float *jacobianMatrices,
+                                    cudaTextureObject_t deformationFieldTexture,
+                                    const int3 referenceImageDim,
+                                    const unsigned voxelNumber,
+                                    const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -1304,25 +1306,25 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
 }
 /* *************************************************************** */
 template<bool is3d>
-__global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices,
-                                                             cudaTextureObject_t controlPointGridTexture,
-                                                             const int3 cppDims,
-                                                             const Basis1st<is3d> basis,
-                                                             const mat33 reorientation,
-                                                             const unsigned voxelNumber) {
+__global__ void CreateDisplacementMatrices(mat33 *dispMatrices,
+                                           cudaTextureObject_t controlPointGridTexture,
+                                           const int3 cppDims,
+                                           const Basis1st<is3d> basis,
+                                           const mat33 reorientation,
+                                           const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (index < voxelNumber)
         dispMatrices[index] = CreateDisplacementMatrix<is3d>(index, controlPointGridTexture, cppDims, basis, reorientation);
 }
 /* *************************************************************** */
 template<bool is3d>
-__global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradient,
-                                                             cudaTextureObject_t dispMatricesTexture,
-                                                             const int3 cppDims,
-                                                             const float approxRatio,
-                                                             const Basis1st<is3d> basis,
-                                                             const mat33 invReorientation,
-                                                             const unsigned voxelNumber) {
+__global__ void ApproxLinearEnergyGradientKernel(float4 *transGradient,
+                                                 cudaTextureObject_t dispMatricesTexture,
+                                                 const int3 cppDims,
+                                                 const float approxRatio,
+                                                 const Basis1st<is3d> basis,
+                                                 const mat33 invReorientation,
+                                                 const unsigned voxelNumber) {
     const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (index >= voxelNumber) return;
     const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
@@ -1375,3 +1377,5 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie
     transGradient[index] = gradVal;
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h
deleted file mode 100755
index 9588cc8e..00000000
--- a/reg-lib/cuda/_reg_localTransformation_gpu.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- *  _reg_spline_gpu.h
- *
- *
- *  Created by Marc Modat on 24/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_tools_gpu.h"
-
-/* *************************************************************** */
-void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda);
-/* *************************************************************** */
-void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda);
-/* *************************************************************** */
-void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage,
-                                        const nifti_image *referenceImage,
-                                        const float4 *controlPointImageCuda,
-                                        float4 *deformationFieldCuda,
-                                        const int *maskCuda,
-                                        const size_t activeVoxelNumber,
-                                        const bool composition,
-                                        const bool bspline);
-/* *************************************************************** */
-template<bool is3d>
-double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage,
-                                          const float4 *controlPointImageCuda);
-/* *************************************************************** */
-template<bool is3d>
-void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage,
-                                                float4 *controlPointImageCuda,
-                                                float4 *transGradientCuda,
-                                                float bendingEnergyWeight);
-/* *************************************************************** */
-double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage,
-                                             const nifti_image *controlPointImage,
-                                             const float4 *controlPointImageCuda,
-                                             const bool approx);
-/* *************************************************************** */
-void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage,
-                                                   const nifti_image *controlPointImage,
-                                                   const float4 *controlPointImageCuda,
-                                                   float4 *transGradientCuda,
-                                                   const float jacobianWeight,
-                                                   const bool approx);
-/* *************************************************************** */
-double reg_spline_correctFolding_gpu(const nifti_image *referenceImage,
-                                     const nifti_image *controlPointImage,
-                                     float4 *controlPointImageCuda,
-                                     const bool approx);
-/* *************************************************************** */
-void reg_defField_compose_gpu(const nifti_image *deformationField,
-                              const float4 *deformationFieldCuda,
-                              float4 *deformationFieldOutCuda);
-/* *************************************************************** */
-void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid,
-                                                nifti_image *deformationField,
-                                                float4 *velocityFieldGridCuda,
-                                                float4 *deformationFieldCuda,
-                                                const bool updateStepNumber);
-/* *************************************************************** */
-void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField,
-                                        const float4 *deformationFieldCuda,
-                                        float *jacobianMatricesCuda);
-/* *************************************************************** */
-template<bool is3d>
-double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid,
-                                         const float4 *controlPointGridCuda);
-/* *************************************************************** */
-template<bool is3d>
-void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid,
-                                               const float4 *controlPointGridCuda,
-                                               float4 *transGradCuda,
-                                               const float weight);
-/* *************************************************************** */
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
index 525bee81..a4e8cc11 100644
--- a/reg-test/reg_test_regr_getDeformationField.cpp
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -272,13 +272,13 @@ class GetDeformationFieldTest {
     template<class DataType>
     void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
         if (controlPointGrid->nz > 1)
-            GetDeformationField3D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+            GetDeformationField3d<DataType>(controlPointGrid, defField, mask, composition, bspline);
         else
-            GetDeformationField2D<DataType>(controlPointGrid, defField, mask, composition, bspline);
+            GetDeformationField2d<DataType>(controlPointGrid, defField, mask, composition, bspline);
     }
 
     template<class DataType>
-    void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
+    void GetDeformationField2d(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
         auto defFieldPtr = defField.data();
         auto defFieldPtrX = defFieldPtr.begin();
         auto defFieldPtrY = defFieldPtrX + defField.nVoxelsPerSlice();
@@ -386,7 +386,7 @@ class GetDeformationFieldTest {
     }
 
     template<class DataType>
-    void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
+    void GetDeformationField3d(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) {
         DataType xBasis[4], yBasis[4], zBasis[4];
         DataType xControlPointCoordinates[64];
         DataType yControlPointCoordinates[64];

From ce26c691b0f0af412e1bfe92a5255ec9fbc5fd29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 30 Nov 2023 15:54:06 +0000
Subject: [PATCH 258/314] Optimise Cuda::ApproxLinearEnergyGradient() #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/cuda/CudaLocalTransformation.cu       | 80 +++++++++++++++----
 .../cuda/CudaLocalTransformationKernels.cu    | 76 +-----------------
 3 files changed, 66 insertions(+), 92 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 100000a6..66a899ac 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-376
+377
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 89fe20cf..c97f45a9 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -854,8 +854,8 @@ double ApproxLinearEnergy(const nifti_image *controlPointGrid,
     auto controlPointTexture = *controlPointTexturePtr;
 
     constexpr int matSize = is3d ? 3 : 2;
-    thrust::counting_iterator<unsigned> index(0);
-    return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const unsigned index) {
+    thrust::counting_iterator index(0);
+    return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const int index) {
         const mat33 matrix = CreateDisplacementMatrix<is3d>(index, controlPointTexture, cppDims, basis, reorientation);
         double currentValue = 0;
         for (int b = 0; b < matSize; b++)
@@ -887,28 +887,74 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid,
     else
         set_first_order_basis_values(basis.x, basis.y);
 
-    // Kernel dims
-    const unsigned blocks = CudaContext::GetBlockSize()->ApproxLinearEnergyGradient;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-
     // Create the variable to store the displacement matrices
-    thrust::device_vector<mat33> dispMatricesCuda(voxelNumber);
+    thrust::device_vector<mat33> dispMatricesCudaVec(voxelNumber);
+    auto dispMatricesCuda = dispMatricesCudaVec.data().get();
 
     // Create the textures
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-    auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto dispMatricesTexturePtr = Cuda::CreateTextureObject(dispMatricesCuda, voxelNumber, cudaChannelFormatKindFloat, 1);
+    auto controlPointTexture = *controlPointTexturePtr;
+    auto dispMatricesTexture = *dispMatricesTexturePtr;
 
     // Create the displacement matrices
-    CreateDisplacementMatrices<is3d><<<gridDims, blockDims>>>(dispMatricesCuda.data().get(), *controlPointTexture,
-                                                              cppDims, basis, reorientation, (unsigned)voxelNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) {
+        dispMatricesCuda[index] = CreateDisplacementMatrix<is3d>(index, controlPointTexture, cppDims, basis, reorientation);
+    });
 
     // Compute the gradient
-    ApproxLinearEnergyGradientKernel<is3d><<<gridDims, blockDims>>>(transGradCuda, *dispMatricesTexture, cppDims,
-                                                                    approxRatio, basis, invReorientation, (unsigned)voxelNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [
+        transGradCuda, dispMatricesTexture, cppDims, approxRatio, basis, invReorientation
+    ]__device__(const int index) {
+        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, cppDims);
+        auto gradVal = transGradCuda[index];
+
+        if constexpr (is3d) {
+            for (int c = -1, basInd = 0; c < 2; c++) {
+                const int zInd = (z + c) * cppDims.y;
+                for (int b = -1; b < 2; b++) {
+                    const int yInd = (zInd + y + b) * cppDims.x;
+                    for (int a = -1; a < 2; a++, basInd++) {
+                        const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
+                        const float dispMatrix[3]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                                   tex1Dfetch<float>(dispMatricesTexture, matInd + 4),   // m[1][1]
+                                                   tex1Dfetch<float>(dispMatricesTexture, matInd + 8) }; // m[2][2]
+                        const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                                   -2.f * dispMatrix[1] * basis.y[basInd],
+                                                   -2.f * dispMatrix[2] * basis.z[basInd] };
+
+                        gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                    invReorientation.m[0][1] * gradValues[1] +
+                                                    invReorientation.m[0][2] * gradValues[2]);
+                        gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                    invReorientation.m[1][1] * gradValues[1] +
+                                                    invReorientation.m[1][2] * gradValues[2]);
+                        gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
+                                                    invReorientation.m[2][1] * gradValues[1] +
+                                                    invReorientation.m[2][2] * gradValues[2]);
+                    }
+                }
+            }
+        } else {
+            for (int b = -1, basInd = 0; b < 2; b++) {
+                const int yInd = (y + b) * cppDims.x;
+                for (int a = -1; a < 2; a++, basInd++) {
+                    const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
+                    const float dispMatrix[2]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
+                                               tex1Dfetch<float>(dispMatricesTexture, matInd + 4) }; // m[1][1]
+                    const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd],
+                                               -2.f * dispMatrix[1] * basis.y[basInd] };
+
+                    gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
+                                                invReorientation.m[0][1] * gradValues[1]);
+                    gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
+                                                invReorientation.m[1][1] * gradValues[1]);
+                }
+            }
+        }
+
+        transGradCuda[index] = gradVal;
+    });
 }
 template void ApproxLinearEnergyGradient<false>(const nifti_image*, const float4*, float4*, const float);
 template void ApproxLinearEnergyGradient<true>(const nifti_image*, const float4*, float4*, const float);
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index bdc483cb..6bb0e04f 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -1245,12 +1245,12 @@ struct Basis1st<false> {
 };
 /* *************************************************************** */
 template<bool is3d>
-__device__ static mat33 CreateDisplacementMatrix(const unsigned index,
+__device__ static mat33 CreateDisplacementMatrix(const int index,
                                                  cudaTextureObject_t controlPointGridTexture,
                                                  const int3& cppDims,
                                                  const Basis1st<is3d>& basis,
                                                  const mat33& reorientation) {
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
         (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
@@ -1305,77 +1305,5 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index,
     return matrix;
 }
 /* *************************************************************** */
-template<bool is3d>
-__global__ void CreateDisplacementMatrices(mat33 *dispMatrices,
-                                           cudaTextureObject_t controlPointGridTexture,
-                                           const int3 cppDims,
-                                           const Basis1st<is3d> basis,
-                                           const mat33 reorientation,
-                                           const unsigned voxelNumber) {
-    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (index < voxelNumber)
-        dispMatrices[index] = CreateDisplacementMatrix<is3d>(index, controlPointGridTexture, cppDims, basis, reorientation);
-}
-/* *************************************************************** */
-template<bool is3d>
-__global__ void ApproxLinearEnergyGradientKernel(float4 *transGradient,
-                                                 cudaTextureObject_t dispMatricesTexture,
-                                                 const int3 cppDims,
-                                                 const float approxRatio,
-                                                 const Basis1st<is3d> basis,
-                                                 const mat33 invReorientation,
-                                                 const unsigned voxelNumber) {
-    const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (index >= voxelNumber) return;
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>((int)index, cppDims);
-    auto gradVal = transGradient[index];
-
-    if constexpr (is3d) {
-        for (int c = -1, basInd = 0; c < 2; c++) {
-            const int zInd = (z + c) * cppDims.y;
-            for (int b = -1; b < 2; b++) {
-                const int yInd = (zInd + y + b) * cppDims.x;
-                for (int a = -1; a < 2; a++, basInd++) {
-                    const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
-                    const float dispMatrix[3]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
-                                               tex1Dfetch<float>(dispMatricesTexture, matInd + 4),   // m[1][1]
-                                               tex1Dfetch<float>(dispMatricesTexture, matInd + 8) }; // m[2][2]
-                    const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd],
-                                               -2.f * dispMatrix[1] * basis.y[basInd],
-                                               -2.f * dispMatrix[2] * basis.z[basInd] };
-
-                    gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                                invReorientation.m[0][1] * gradValues[1] +
-                                                invReorientation.m[0][2] * gradValues[2]);
-                    gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                                invReorientation.m[1][1] * gradValues[1] +
-                                                invReorientation.m[1][2] * gradValues[2]);
-                    gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
-                                                invReorientation.m[2][1] * gradValues[1] +
-                                                invReorientation.m[2][2] * gradValues[2]);
-                }
-            }
-        }
-    } else {
-        for (int b = -1, basInd = 0; b < 2; b++) {
-            const int yInd = (y + b) * cppDims.x;
-            for (int a = -1; a < 2; a++, basInd++) {
-                const int matInd = (yInd + x + a) * 9;   // Multiply with the item count of mat33
-                const float dispMatrix[2]{ tex1Dfetch<float>(dispMatricesTexture, matInd),       // m[0][0]
-                                           tex1Dfetch<float>(dispMatricesTexture, matInd + 4) }; // m[1][1]
-                const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd],
-                                           -2.f * dispMatrix[1] * basis.y[basInd] };
-
-                gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                            invReorientation.m[0][1] * gradValues[1]);
-                gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                            invReorientation.m[1][1] * gradValues[1]);
-            }
-        }
-    }
-
-    transGradient[index] = gradVal;
-}
-/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */

From 120386a91dc2f3da07e61a822ab88f02b9a505eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 1 Dec 2023 15:41:53 +0000
Subject: [PATCH 259/314] Refactorisations

---
 niftyreg_build_version.txt               |  2 +-
 reg-lib/cuda/CudaLocalTransformation.cu  | 30 ++++-----
 reg-lib/cuda/_reg_common_cuda_kernels.cu | 85 +++++++++++-------------
 reg-lib/cuda/_reg_tools_kernels.cu       |  2 +-
 4 files changed, 57 insertions(+), 62 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 66a899ac..61ab674d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-377
+378
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index c97f45a9..b759455a 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -102,11 +102,11 @@ struct SecondDerivative<false> {
 };
 /* *************************************************************** */
 template<bool is3d, bool isGradient>
-__device__ SecondDerivative<is3d> GetApproxSecondDerivative(const unsigned index,
+__device__ SecondDerivative<is3d> GetApproxSecondDerivative(const int index,
                                                             cudaTextureObject_t controlPointTexture,
-                                                            const int3& controlPointImageDim,
-                                                            const Basis2nd<is3d>& basis) {
-    auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+                                                            const int3 controlPointImageDim,
+                                                            const Basis2nd<is3d> basis) {
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
     if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 ||
                         y < 1 || y >= controlPointImageDim.y - 1 ||
                         (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {};
@@ -161,9 +161,9 @@ double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *c
     else
         set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy);
 
-    thrust::counting_iterator<unsigned> index(0);
-    return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const unsigned index) {
-        const auto& secondDerivative = GetApproxSecondDerivative<is3d, false>(index, controlPointTexture, controlPointImageDim, basis);
+    thrust::counting_iterator index(0);
+    return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const int index) {
+        const auto secondDerivative = GetApproxSecondDerivative<is3d, false>(index, controlPointTexture, controlPointImageDim, basis);
         if constexpr (is3d)
             return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) +
                     Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) +
@@ -201,9 +201,9 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
     // First compute all the second derivatives
     thrust::device_vector<typename SecondDerivative<is3d>::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber);
     auto secondDerivativesCuda = secondDerivativesCudaVec.data().get();
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), controlPointNumber,
-                       [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const unsigned index) {
-        const auto& secondDerivative = GetApproxSecondDerivative<is3d, true>(index, controlPointTexture, controlPointImageDim, basis);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber,
+                       [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const int index) {
+        const auto secondDerivative = GetApproxSecondDerivative<is3d, true>(index, controlPointTexture, controlPointImageDim, basis);
         if constexpr (is3d) {
             int derInd = 6 * index;
             secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx);
@@ -226,9 +226,9 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
 
     // Compute the gradient
     const float approxRatio = bendingEnergyWeight / (float)controlPointNumber;
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), controlPointNumber,
-                       [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const unsigned index) {
-        auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber,
+                       [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) {
+        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
         typename SecondDerivative<is3d>::Type gradientValue{};
         if constexpr (is3d) {
             for (int c = z - 1, basInd = 0; c < z + 2; c++) {
@@ -576,8 +576,8 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim{ image->nx, image->ny, image->nz };
 
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), voxelNumber, [=]__device__(const unsigned index) {
-        auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(index, imageDim);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) {
+        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, imageDim);
 
         const float4 initialPosition{
             float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3],
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
index ee0e4bcf..4206931d 100644
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu
@@ -9,7 +9,7 @@
 
 /* *************************************************************** */
 template<bool is3d>
-__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const double weight, float (&out)[3]) {
+__device__ __inline__ void reg_mat33_mul_cuda(const mat33 mat, const float (&in)[3], const double weight, float (&out)[3]) {
     out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]);
     out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]);
     if constexpr (is3d)
@@ -17,14 +17,14 @@ __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in
 }
 /* *************************************************************** */
 template<bool is3d>
-__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3]) {
+__device__ __inline__ void reg_mat44_mul_cuda(const mat44 mat, const float (&in)[3], float (&out)[3]) {
     out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]);
     out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]);
     if constexpr (is3d)
         out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]);
 }
 /* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) {
+__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33 a, const mat33 b) {
     mat33 c;
     for (int i = 0; i < 3; i++)
         for (int j = 0; j < 3; j++)
@@ -32,83 +32,78 @@ __device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) {
     return c;
 }
 /* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33& r) {
-    double r11, r12, r13, r21, r22, r23, r31, r32, r33, deti;
-    mat33 q;
+__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33 r) {
     /*  INPUT MATRIX:  */
-    r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
-    r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
-    r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
 
-    deti = r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33
-        + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13;
+    double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
+                   r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
 
     if (deti != 0.0) deti = 1.0 / deti;
 
-    q.m[0][0] = (float)(deti * (r22 * r33 - r32 * r23));
-    q.m[0][1] = (float)(deti * (-r12 * r33 + r32 * r13));
-    q.m[0][2] = (float)(deti * (r12 * r23 - r22 * r13));
+    mat33 q;
+    q.m[0][0] = float(deti * (r22 * r33 - r32 * r23));
+    q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13));
+    q.m[0][2] = float(deti * (r12 * r23 - r22 * r13));
 
-    q.m[1][0] = (float)(deti * (-r21 * r33 + r31 * r23));
-    q.m[1][1] = (float)(deti * (r11 * r33 - r31 * r13));
-    q.m[1][2] = (float)(deti * (-r11 * r23 + r21 * r13));
+    q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23));
+    q.m[1][1] = float(deti * (r11 * r33 - r31 * r13));
+    q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13));
 
-    q.m[2][0] = (float)(deti * (r21 * r32 - r31 * r22));
-    q.m[2][1] = (float)(deti * (-r11 * r32 + r31 * r12));
-    q.m[2][2] = (float)(deti * (r11 * r22 - r21 * r12));
+    q.m[2][0] = float(deti * (r21 * r32 - r31 * r22));
+    q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12));
+    q.m[2][2] = float(deti * (r11 * r22 - r21 * r12));
 
     return q;
 }
 /* *************************************************************** */
-__device__ __inline__ float reg_mat33_determ_cuda(const mat33& r) {
-    double r11, r12, r13, r21, r22, r23, r31, r32, r33;
+__device__ __inline__ float reg_mat33_determ_cuda(const mat33 r) {
     /*  INPUT MATRIX:  */
-    r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
-    r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
-    r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
 
     return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
                  r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
 }
 /* *************************************************************** */
-__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33& a) {
+__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33 a) {
     float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]);
-    float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]);
-    float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]);
+    const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]);
+    const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]);
     if (r1 < r2) r1 = r2;
     if (r1 < r3) r1 = r3;
     return r1;
 }
 /* *************************************************************** */
-__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33& A) {
-    float r1 = fabs(A.m[0][0]) + fabs(A.m[1][0]) + fabs(A.m[2][0]);
-    float r2 = fabs(A.m[0][1]) + fabs(A.m[1][1]) + fabs(A.m[2][1]);
-    float r3 = fabs(A.m[0][2]) + fabs(A.m[1][2]) + fabs(A.m[2][2]);
+__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33 a) {
+    float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]);
+    const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]);
+    const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]);
     if (r1 < r2) r1 = r2;
     if (r1 < r3) r1 = r3;
     return r1;
 }
 /* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_polar_cuda(const mat33& a) {
-    mat33 x, y, z;
-    float alp, bet, gam, gmi, dif = 1.0f;
-    int k = 0;
-
-    x = a;
-
+__device__ __inline__ mat33 reg_mat33_polar_cuda(mat33 x) {
     // Force matrix to be nonsingular
-    gam = reg_mat33_determ_cuda(x);
+    float gam = reg_mat33_determ_cuda(x);
     while (gam == 0.0) {        // Perturb matrix
         gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x));
         x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam;
         gam = reg_mat33_determ_cuda(x);
     }
 
+    mat33 z;
+    float gmi, dif = 1.0f;
+    int k = 0;
     while (1) {
-        y = reg_mat33_inverse_cuda(x);
+        const mat33 y = reg_mat33_inverse_cuda(x);
         if (dif > 0.3) {     // Far from convergence
-            alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x));
-            bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y));
+            const float alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x));
+            const float bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y));
             gam = sqrt(bet / alp);
             gmi = 1.f / gam;
         } else {
@@ -145,7 +140,7 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo
 }
 /* *************************************************************** */
 template<bool is3d>
-__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) {
+__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) {
     int quot = 0, rem;
     if constexpr (is3d)
         reg_div_cuda(index, dims.x * dims.y, quot, rem);
@@ -156,7 +151,7 @@ __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dim
     return { x, y, z };
 }
 /* *************************************************************** */
-__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) {
+__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) {
     return dims.z > 1 ? reg_indexToDims_cuda<true>(index, dims) : reg_indexToDims_cuda<false>(index, dims);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu
index 2dcf468a..b39d117a 100755
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/_reg_tools_kernels.cu
@@ -23,7 +23,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= nodeNumber) return;
     // Calculate the node coordinates
-    auto&& [x, y, z] = reg_indexToDims_cuda<is3d>(tid, nodeImageDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(tid, nodeImageDims);
     // Transform into voxel coordinates
     float voxelCoord[3], nodeCoord[3] = { static_cast<float>(x), static_cast<float>(y), static_cast<float>(z) };
     reg_mat44_mul_cuda<is3d>(transformation, nodeCoord, voxelCoord);

From 65117400b225357aa90ae8b1ce5328ee31e251c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 1 Dec 2023 23:44:36 +0000
Subject: [PATCH 260/314] Optimise Cuda::GetDeformationField() #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/BlockSize.hpp                    |   6 -
 reg-lib/cuda/CudaCompute.cu                   |  21 ++--
 reg-lib/cuda/CudaLocalTransformation.cu       |  88 ++++++--------
 reg-lib/cuda/CudaLocalTransformation.hpp      |   5 +-
 .../cuda/CudaLocalTransformationKernels.cu    | 108 +++++++-----------
 6 files changed, 90 insertions(+), 140 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 61ab674d..3b2f92ea 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-378
+379
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index e6146b2f..45164b1f 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -15,8 +15,6 @@ namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
     unsigned reg_affine_getDeformationField;
-    unsigned GetDeformationField2d;
-    unsigned GetDeformationField3d;
     unsigned GetApproxJacobianValues2d;
     unsigned GetApproxJacobianValues3d;
     unsigned ApproxLinearEnergyGradient;
@@ -43,8 +41,6 @@ struct BlockSize {
 struct BlockSize100: public BlockSize {
     BlockSize100() {
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
-        GetDeformationField2d = 384; // 20 reg - 6168 smem - 28 cmem
-        GetDeformationField3d = 192; // 37 reg - 6168 smem - 28 cmem
         GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem
         GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem
         ApproxLinearEnergyGradient = 384; // 40 reg
@@ -73,8 +69,6 @@ struct BlockSize100: public BlockSize {
 struct BlockSize300: public BlockSize {
     BlockSize300() {
         reg_affine_getDeformationField = 1024; // 23 reg
-        GetDeformationField2d = 1024; // 34 reg
-        GetDeformationField3d = 1024; // 34 reg
         GetApproxJacobianValues2d = 768; // 34 reg
         GetApproxJacobianValues3d = 640; // 46 reg
         ApproxLinearEnergyGradient = 768; // 40 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index f13d93e2..4d57c327 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -98,15 +98,20 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar
 }
 /* *************************************************************** */
 void CudaCompute::GetDeformationField(bool composition, bool bspline) {
+    decltype(Cuda::GetDeformationField<true, true>) *getDeformationField;
+    if (composition)
+        getDeformationField = bspline ? Cuda::GetDeformationField<true, true> :
+                                        Cuda::GetDeformationField<true, false>;
+    else
+        getDeformationField = bspline ? Cuda::GetDeformationField<false, true> :
+                                        Cuda::GetDeformationField<false, false>;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    Cuda::GetDeformationField(con.F3dContent::GetControlPointGrid(),
-                              con.F3dContent::GetReference(),
-                              con.GetControlPointGridCuda(),
-                              con.GetDeformationFieldCuda(),
-                              con.GetReferenceMaskCuda(),
-                              con.GetActiveVoxelNumber(),
-                              composition,
-                              bspline);
+    getDeformationField(con.F3dContent::GetControlPointGrid(),
+                        con.F3dContent::GetReference(),
+                        con.GetControlPointGridCuda(),
+                        con.GetDeformationFieldCuda(),
+                        con.GetReferenceMaskCuda(),
+                        con.GetActiveVoxelNumber());
 }
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index b759455a..232a8410 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -18,14 +18,13 @@
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
+template<bool composition, bool bspline>
 void GetDeformationField(const nifti_image *controlPointImage,
                          const nifti_image *referenceImage,
                          const float4 *controlPointImageCuda,
                          float4 *deformationFieldCuda,
                          const int *maskCuda,
-                         const size_t activeVoxelNumber,
-                         const bool composition,
-                         const bool bspline) {
+                         const size_t activeVoxelNumber) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -33,52 +32,33 @@ void GetDeformationField(const nifti_image *controlPointImage,
                                                         controlPointImage->dy / referenceImage->dy,
                                                         controlPointImage->dz / referenceImage->dz);
 
-    auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto controlPointTexture = *controlPointTexturePtr;
+    auto maskTexture = *maskTexturePtr;
 
     // Get the reference matrix if composition is required
-    thrust::device_vector<mat44> realToVoxel;
-    if (composition) {
+    thrust::device_vector<mat44> realToVoxelCudaVec;
+    if constexpr (composition) {
         const mat44 *matPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk;
-        realToVoxel = thrust::device_vector<mat44>(matPtr, matPtr + 1);
+        realToVoxelCudaVec = thrust::device_vector<mat44>(matPtr, matPtr + 1);
     }
+    const auto realToVoxelCuda = composition ? realToVoxelCudaVec.data().get() : nullptr;
 
     if (referenceImage->nz > 1) {
-        const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        // 8 floats of shared memory are allocated per thread
-        GetDeformationField3d<<<gridDims, blockDims, blocks * 8 * sizeof(float)>>>(deformationFieldCuda,
-                                                                                   *controlPointTexture,
-                                                                                   *maskTexture,
-                                                                                   realToVoxel.data().get(),
-                                                                                   referenceImageDim,
-                                                                                   controlPointImageDim,
-                                                                                   controlPointVoxelSpacing,
-                                                                                   (unsigned)activeVoxelNumber,
-                                                                                   composition,
-                                                                                   bspline);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
+            GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
+                                                        referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
+        });
     } else {
-        const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField2d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        // 4 floats of shared memory are allocated per thread
-        GetDeformationField2d<<<gridDims, blockDims, blocks * 4 * sizeof(float)>>>(deformationFieldCuda,
-                                                                                   *controlPointTexture,
-                                                                                   *maskTexture,
-                                                                                   realToVoxel.data().get(),
-                                                                                   referenceImageDim,
-                                                                                   controlPointImageDim,
-                                                                                   controlPointVoxelSpacing,
-                                                                                   (unsigned)activeVoxelNumber,
-                                                                                   composition,
-                                                                                   bspline);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
+            GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
+                                                        referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
+        });
     }
 }
+template void GetDeformationField<false, false>(const nifti_image*, const nifti_image*, const float4*, float4*, const int*, const size_t);
+template void GetDeformationField<true, false>(const nifti_image*, const nifti_image*, const float4*, float4*, const int*, const size_t);
 /* *************************************************************** */
 template<bool is3d>
 struct Basis2nd {
@@ -644,14 +624,12 @@ void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     // Copy over the number of required squaring steps
     flowField->intent_p2 = velocityFieldGrid->intent_p2;
     // The initial flow field is generated using cubic B-Spline interpolation/approximation
-    GetDeformationField(velocityFieldGrid,
-                        flowField,
-                        velocityFieldGridCuda,
-                        flowFieldCuda,
-                        maskCuda,
-                        activeVoxelNumber,
-                        true,  // composition
-                        true); // bspline
+    GetDeformationField<true, true>(velocityFieldGrid,
+                                    flowField,
+                                    velocityFieldGridCuda,
+                                    flowFieldCuda,
+                                    maskCuda,
+                                    activeVoxelNumber);
 
     velocityFieldGrid->num_ext = oldNumExt;
 }
@@ -784,14 +762,12 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     // Check if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) {
         // Use the spline approximation to generate the deformation field
-        GetDeformationField(velocityFieldGrid,
-                            deformationField,
-                            velocityFieldGridCuda,
-                            deformationFieldCuda,
-                            maskCuda.data().get(),
-                            voxelNumber,
-                            false, // composition
-                            true); // bspline
+        GetDeformationField<false, true>(velocityFieldGrid,
+                                         deformationField,
+                                         velocityFieldGridCuda,
+                                         deformationFieldCuda,
+                                         maskCuda.data().get(),
+                                         voxelNumber);
     } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
         NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo);
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
index fd59d4e2..9530929b 100644
--- a/reg-lib/cuda/CudaLocalTransformation.hpp
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -21,14 +21,13 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda);
 /* *************************************************************** */
 void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda);
 /* *************************************************************** */
+template<bool composition, bool bspline>
 void GetDeformationField(const nifti_image *controlPointImage,
                          const nifti_image *referenceImage,
                          const float4 *controlPointImageCuda,
                          float4 *deformationFieldCuda,
                          const int *maskCuda,
-                         const size_t activeVoxelNumber,
-                         const bool composition,
-                         const bool bspline);
+                         const size_t activeVoxelNumber);
 /* *************************************************************** */
 template<bool is3d>
 double ApproxBendingEnergy(const nifti_image *controlPointImage,
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index 6bb0e04f..ef900936 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -15,7 +15,8 @@
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
-__device__ void GetBasisBSplineValues(const float basis, float *values) {
+template<bool bspline>
+__device__ __inline__ void GetBasisSplineValues(const float basis, float *values) {
     const float ff = Square(basis);
     const float fff = ff * basis;
     const float mf = 1.f - basis;
@@ -25,15 +26,8 @@ __device__ void GetBasisBSplineValues(const float basis, float *values) {
     values[3] = fff / 6.f;
 }
 /* *************************************************************** */
-__device__ void GetFirstBSplineValues(const float basis, float *values, float *first) {
-    GetBasisBSplineValues(basis, values);
-    first[3] = Square(basis) / 2.f;
-    first[0] = basis - 0.5f - first[3];
-    first[2] = 1.f + first[0] - 2.f * first[3];
-    first[1] = -first[0] - first[2] - first[3];
-}
-/* *************************************************************** */
-__device__ void GetBasisSplineValues(const float basis, float *values) {
+template<>
+__device__ __inline__ void GetBasisSplineValues<false>(const float basis, float *values) {
     const float ff = Square(basis);
     values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f;
     values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f;
@@ -41,6 +35,14 @@ __device__ void GetBasisSplineValues(const float basis, float *values) {
     values[3] = (basis - 1.f) * ff / 2.f;
 }
 /* *************************************************************** */
+__device__ __inline__ void GetFirstBSplineValues(const float basis, float *values, float *first) {
+    GetBasisSplineValues<true>(basis, values);
+    first[3] = Square(basis) / 2.f;
+    first[0] = basis - 0.5f - first[3];
+    first[2] = 1.f + first[0] - 2.f * first[3];
+    first[1] = -first[0] - first[2] - first[3];
+}
+/* *************************************************************** */
 __device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) {
     switch (index) {
     case 0:
@@ -168,24 +170,21 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
     return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
 }
 /* *************************************************************** */
-__global__ void GetDeformationField3d(float4 *deformationField,
+template<bool composition, bool bspline>
+__device__ void GetDeformationField3d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
                                       cudaTextureObject_t maskTexture,
                                       const mat44 *realToVoxel,
                                       const int3 referenceImageDim,
                                       const int3 controlPointImageDim,
                                       const float3 controlPointVoxelSpacing,
-                                      const unsigned activeVoxelNumber,
-                                      const bool composition,
-                                      const bool bspline) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
+                                      const int index) {
     int3 nodePre;
     float3 basis;
 
-    if (composition) { // Composition of deformation fields
+    if constexpr (composition) { // Composition of deformation fields
         // The previous position at the current pixel position is read
-        const float4 node = deformationField[tid];
+        const float4 node = deformationField[index];
 
         // From real to pixel position in the CPP
         const float xVoxel = (realToVoxel->m[0][0] * node.x +
@@ -208,8 +207,8 @@ __global__ void GetDeformationField3d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto [x, y, z] = reg_indexToDims_cuda<true>(tid2, referenceImageDim);
+        const int voxel = tex1Dfetch<int>(maskTexture, index);
+        const auto [x, y, z] = reg_indexToDims_cuda<true>(voxel, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -217,33 +216,20 @@ __global__ void GetDeformationField3d(float4 *deformationField,
         nodePre = { int(xVoxel), int(yVoxel), int(zVoxel) };
         basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y), zVoxel - float(nodePre.z) };
     }
-    // Z basis values
-    extern __shared__ float yBasis[];   // Shared memory
-    const unsigned sharedMemIndex = 4 * threadIdx.x;
-    // Compute the shared memory offset which corresponds to four times the number of threads per block
-    float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z];
-    if (basis.z < 0) basis.z = 0; // rounding error
-    if (bspline) GetBasisBSplineValues(basis.z, &zBasis[sharedMemIndex]);
-    else GetBasisSplineValues(basis.z, &zBasis[sharedMemIndex]);
-
-    // Y basis values
-    if (basis.y < 0) basis.y = 0; // rounding error
-    if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]);
-    else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]);
-
-    // X basis values
-    float xBasis[4];
-    if (basis.x < 0) basis.x = 0; // rounding error
-    if (bspline) GetBasisBSplineValues(basis.x, xBasis);
-    else GetBasisSplineValues(basis.x, xBasis);
+
+    // Basis values
+    float xBasis[4], yBasis[4], zBasis[4];
+    GetBasisSplineValues<bspline>(basis.x, xBasis);
+    GetBasisSplineValues<bspline>(basis.y, yBasis);
+    GetBasisSplineValues<bspline>(basis.z, zBasis);
 
     float4 displacement{};
     for (char c = 0; c < 4; c++) {
         int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x;
-        const float basisZ = zBasis[sharedMemIndex + c];
+        const float basisZ = zBasis[c];
         for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) {
             int indexXYZ = indexYZ + nodePre.x;
-            const float basisY = yBasis[sharedMemIndex + b];
+            const float basisY = yBasis[b];
             for (char a = 0; a < 4; a++, indexXYZ++) {
                 const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
                 const float xyzBasis = xBasis[a] * basisY * basisZ;
@@ -253,27 +239,24 @@ __global__ void GetDeformationField3d(float4 *deformationField,
             }
         }
     }
-    deformationField[tid] = displacement;
+    deformationField[index] = displacement;
 }
 /* *************************************************************** */
-__global__ void GetDeformationField2d(float4 *deformationField,
+template<bool composition, bool bspline>
+__device__ void GetDeformationField2d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
                                       cudaTextureObject_t maskTexture,
                                       const mat44 *realToVoxel,
                                       const int3 referenceImageDim,
                                       const int3 controlPointImageDim,
                                       const float3 controlPointVoxelSpacing,
-                                      const unsigned activeVoxelNumber,
-                                      const bool composition,
-                                      const bool bspline) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= activeVoxelNumber) return;
+                                      const int index) {
     int2 nodePre;
     float2 basis;
 
-    if (composition) { // Composition of deformation fields
+    if constexpr (composition) { // Composition of deformation fields
         // The previous position at the current pixel position is read
-        const float4 node = deformationField[tid];
+        const float4 node = deformationField[index];
 
         // From real to pixel position in the CPP
         const float xVoxel = (realToVoxel->m[0][0] * node.x +
@@ -289,31 +272,24 @@ __global__ void GetDeformationField2d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
-        const int tid2 = tex1Dfetch<int>(maskTexture, tid);
-        const auto [x, y, z] = reg_indexToDims_cuda<false>(tid2, referenceImageDim);
+        const int voxel = tex1Dfetch<int>(maskTexture, index);
+        const auto [x, y, z] = reg_indexToDims_cuda<false>(voxel, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
         nodePre = { int(xVoxel), int(yVoxel) };
         basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y) };
     }
-    // Y basis values
-    extern __shared__ float yBasis[]; // Shared memory
-    const unsigned sharedMemIndex = 4 * threadIdx.x;
-    if (basis.y < 0) basis.y = 0; // rounding error
-    if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]);
-    else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]);
-
-    // X basis values
-    float xBasis[4];
-    if (basis.x < 0) basis.x = 0; // rounding error
-    if (bspline) GetBasisBSplineValues(basis.x, xBasis);
-    else GetBasisSplineValues(basis.x, xBasis);
+
+    // Basis values
+    float xBasis[4], yBasis[4];
+    GetBasisSplineValues<bspline>(basis.x, xBasis);
+    GetBasisSplineValues<bspline>(basis.y, yBasis);
 
     float4 displacement{};
     for (char b = 0; b < 4; b++) {
         int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
-        const float basis = yBasis[sharedMemIndex + b];
+        const float basis = yBasis[b];
         for (char a = 0; a < 4; a++, index++) {
             const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
             const float xyBasis = xBasis[a] * basis;
@@ -321,7 +297,7 @@ __global__ void GetDeformationField2d(float4 *deformationField,
             displacement.y += xyBasis * nodeCoeff.y;
         }
     }
-    deformationField[tid] = displacement;
+    deformationField[index] = displacement;
 }
 /* *************************************************************** */
 __global__ void GetApproxJacobianValues2d(float *jacobianMatrices,

From 29647ad28126eba40aa9fc711e05eedfc66785d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 8 Jan 2024 16:05:24 +0000
Subject: [PATCH 261/314] Refactor CudaTools #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_benchmark.cpp                    |  18 +-
 reg-lib/cuda/BlockSize.hpp                    |  33 ++--
 reg-lib/cuda/CMakeLists.txt                   |   2 +-
 reg-lib/cuda/CudaCompute.cu                   |  12 +-
 reg-lib/cuda/CudaKernelConvolution.hpp        |   2 +-
 reg-lib/cuda/CudaLocalTransformation.cu       |  12 +-
 reg-lib/cuda/CudaLocalTransformation.hpp      |   2 +-
 reg-lib/cuda/CudaNormaliseGradient.cu         |   2 +-
 reg-lib/cuda/CudaOptimiser.hpp                |   2 +-
 .../cuda/{_reg_tools_gpu.cu => CudaTools.cu}  | 164 ++++++++----------
 reg-lib/cuda/CudaTools.hpp                    |  64 +++++++
 ...g_tools_kernels.cu => CudaToolsKernels.cu} | 116 ++++---------
 reg-lib/cuda/_reg_ssd_gpu.h                   |   2 +-
 reg-lib/cuda/_reg_tools_gpu.h                 |  64 -------
 reg-lib/cuda/affineDeformationKernel.cu       |   2 +-
 16 files changed, 211 insertions(+), 288 deletions(-)
 rename reg-lib/cuda/{_reg_tools_gpu.cu => CudaTools.cu} (68%)
 mode change 100755 => 100644
 create mode 100644 reg-lib/cuda/CudaTools.hpp
 rename reg-lib/cuda/{_reg_tools_kernels.cu => CudaToolsKernels.cu} (60%)
 mode change 100755 => 100644
 delete mode 100755 reg-lib/cuda/_reg_tools_gpu.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 3b2f92ea..c2f53117 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-379
+380
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index c579d61f..47ad511a 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -25,7 +25,7 @@
 #include "_reg_affineTransformation_gpu.h"
 #include "_reg_bspline_gpu.h"
 #include "_reg_mutualinformation_gpu.h"
-#include "_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 #include "_reg_blockMatching_gpu.h"
 #endif
 
@@ -609,14 +609,14 @@ int main(int argc, char **argv)
          time(&start);
          for(int i=0; i<maxIt; ++i)
          {
-            reg_smoothImageForCubicSpline_gpu(resultImage,
-                                              &voxelNmiGradientArray_d,
-                                              smoothingRadius);
-            reg_voxelCentricToNodeCentric_gpu(targetImage,
-                                              controlPointImage,
-                                              &voxelNmiGradientArray_d,
-                                              &nodeNmiGradientArray_d,
-                                              1.0f);
+            Cuda::SmoothImageForCubicSpline(resultImage,
+                                            &voxelNmiGradientArray_d,
+                                            smoothingRadius);
+            Cuda::VoxelCentricToNodeCentric(targetImage,
+                                            controlPointImage,
+                                            &voxelNmiGradientArray_d,
+                                            &nodeNmiGradientArray_d,
+                                            1.0f);
          }
          time(&end);
          gpuTime=(end-start);
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 45164b1f..c72420e8 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -30,12 +30,11 @@ struct BlockSize {
     unsigned DefFieldCompose2d;
     unsigned DefFieldCompose3d;
     unsigned GetJacobianMatrix;
-    unsigned reg_voxelCentricToNodeCentric;
-    unsigned reg_convertNmiGradientFromVoxelToRealSpace;
-    unsigned reg_ApplyConvolutionWindowAlongX;
-    unsigned reg_ApplyConvolutionWindowAlongY;
-    unsigned reg_ApplyConvolutionWindowAlongZ;
-    unsigned Arithmetic;
+    unsigned VoxelCentricToNodeCentric;
+    unsigned ConvertNmiGradientFromVoxelToRealSpace;
+    unsigned ApplyConvolutionWindowAlongX;
+    unsigned ApplyConvolutionWindowAlongY;
+    unsigned ApplyConvolutionWindowAlongZ;
 };
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
@@ -56,12 +55,11 @@ struct BlockSize100: public BlockSize {
         DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
         DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-        reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
-        reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
-        reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
-        reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
-        reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
-        Arithmetic = 384; // 5 reg - 24 smem
+        VoxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
+        ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
+        ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
+        ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
+        ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
         NR_FUNC_CALLED();
     }
 };
@@ -84,12 +82,11 @@ struct BlockSize300: public BlockSize {
         DefFieldCompose2d = 1024; // 23 reg
         DefFieldCompose3d = 1024; // 24 reg
         GetJacobianMatrix = 768; // 34 reg
-        reg_voxelCentricToNodeCentric = 1024; // 23 reg
-        reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
-        reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg
-        reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg
-        reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg
-        Arithmetic = 1024;
+        VoxelCentricToNodeCentric = 1024; // 23 reg
+        ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
+        ApplyConvolutionWindowAlongX = 1024; // 25 reg
+        ApplyConvolutionWindowAlongY = 1024; // 25 reg
+        ApplyConvolutionWindowAlongZ = 1024; // 25 reg
         NR_FUNC_CALLED();
     }
 };
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index f24f1cad..750c230b 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -72,11 +72,11 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaOptimiser.cu
     CudaResampleImageKernel.cpp
     CudaResampling.cu
+    CudaTools.cu
     resampleKernel.cu
     _reg_globalTransformation_gpu.cu
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
-    _reg_tools_gpu.cu
 )
 target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
 install(TARGETS ${NAME}
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 4d57c327..5d663a4f 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -268,12 +268,12 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
-    reg_voxelCentricToNodeCentric_gpu(con.F3dContent::GetTransformationGradient(),
-                                      con.F3dContent::GetVoxelBasedMeasureGradient(),
-                                      con.GetTransformationGradientCuda(),
-                                      con.GetVoxelBasedMeasureGradientCuda(),
-                                      weight,
-                                      reorientation);
+    Cuda::VoxelCentricToNodeCentric(con.F3dContent::GetTransformationGradient(),
+                                    con.F3dContent::GetVoxelBasedMeasureGradient(),
+                                    con.GetTransformationGradientCuda(),
+                                    con.GetVoxelBasedMeasureGradientCuda(),
+                                    weight,
+                                    reorientation);
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp
index a4b703b0..8d1a07f1 100644
--- a/reg-lib/cuda/CudaKernelConvolution.hpp
+++ b/reg-lib/cuda/CudaKernelConvolution.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 232a8410..3c1ff918 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -369,7 +369,7 @@ double GetJacobianPenaltyTerm(const nifti_image *referenceImage,
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 
     // Perform the reduction
-    const double penaltyTermValue = reg_sumReduction_gpu(jacobianDetCuda, jacNumber);
+    const double penaltyTermValue = SumReduction(jacobianDetCuda, jacNumber);
     NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
     return penaltyTermValue / jacSum;
 }
@@ -685,7 +685,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
             affineOnlyCuda.resize(voxelNumber);
             reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
                                                affineOnly, affineOnlyCuda.data().get());
-            reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get());
+            SubtractImages(flowField, flowFieldCuda, affineOnlyCuda.data().get());
         }
     } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
 
@@ -693,8 +693,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
     int squaringNumber = 1;
     if (updateStepNumber || flowField->intent_p2 == 0) {
         // Check the largest value
-        float extrema = fabsf(reg_getMinValue_gpu(flowField, flowFieldCuda, -1));
-        const float temp = reg_getMaxValue_gpu(flowField, flowFieldCuda, -1);
+        float extrema = fabsf(GetMinValue(flowField, flowFieldCuda, -1));
+        const float temp = GetMaxValue(flowField, flowFieldCuda, -1);
         extrema = std::max(extrema, temp);
         // Check the values for scaling purpose
         float maxLength;
@@ -716,7 +716,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
     // The displacement field is scaled
     const float scalingValue = 1.f / pow(2.f, static_cast<float>(std::abs(squaringNumber)));
     // Backward/forward deformation field is scaled down
-    reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue);
+    MultiplyValue(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue);
 
     // Conversion from displacement to deformation
     GetDeformationFromDisplacement(flowField, flowFieldCuda);
@@ -735,7 +735,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
     // The affine component of the transformation is restored
     if (affineOnly) {
         GetDisplacementFromDeformation(deformationField, deformationFieldCuda);
-        reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get());
+        AddImages(deformationField, deformationFieldCuda, affineOnlyCuda.data().get());
     }
     deformationField->intent_p1 = DEF_FIELD;
     deformationField->intent_p2 = 0;
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
index 9530929b..90a13749 100644
--- a/reg-lib/cuda/CudaLocalTransformation.hpp
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu
index 8d948c2e..6bef4bc3 100644
--- a/reg-lib/cuda/CudaNormaliseGradient.cu
+++ b/reg-lib/cuda/CudaNormaliseGradient.cu
@@ -1,5 +1,5 @@
 #include "CudaNormaliseGradient.hpp"
-#include "_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 
 /* *************************************************************** */
 template<bool optimiseX, bool optimiseY, bool optimiseZ>
diff --git a/reg-lib/cuda/CudaOptimiser.hpp b/reg-lib/cuda/CudaOptimiser.hpp
index 56a1aceb..92d55cf6 100644
--- a/reg-lib/cuda/CudaOptimiser.hpp
+++ b/reg-lib/cuda/CudaOptimiser.hpp
@@ -1,8 +1,8 @@
 #pragma once
 
 #include "CudaCommon.hpp"
+#include "CudaTools.hpp"
 #include "Optimiser.hpp"
-#include "_reg_tools_gpu.h"
 
 /* *************************************************************** */
 namespace NiftyReg {
diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/CudaTools.cu
old mode 100755
new mode 100644
similarity index 68%
rename from reg-lib/cuda/_reg_tools_gpu.cu
rename to reg-lib/cuda/CudaTools.cu
index f1b9c401..a8ee68ad
--- a/reg-lib/cuda/_reg_tools_gpu.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_tools_gpu.cu
+ *  CudaTools.cu
  *
  *
  *  Created by Marc Modat and Pankaj Daga on 24/03/2009.
@@ -11,16 +11,18 @@
  */
 
 #include "CudaCommon.hpp"
-#include "_reg_tools_gpu.h"
-#include "_reg_tools_kernels.cu"
+#include "CudaTools.hpp"
+#include "CudaToolsKernels.cu"
 
 /* *************************************************************** */
-void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
-                                       const nifti_image *voxelImage,
-                                       float4 *nodeImageCuda,
-                                       float4 *voxelImageCuda,
-                                       float weight,
-                                       const mat44 *voxelToMillimetre) {
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
+                               const nifti_image *voxelImage,
+                               float4 *nodeImageCuda,
+                               float4 *voxelImageCuda,
+                               float weight,
+                               const mat44 *voxelToMillimetre) {
     const bool is3d = nodeImage->nz > 1;
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
@@ -67,32 +69,32 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
         weight *= ratio[i];
     }
 
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentricToNodeCentric;
+    const unsigned blocks = CudaContext::GetBlockSize()->VoxelCentricToNodeCentric;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    auto voxelCentricToNodeCentricKernel = is3d ? reg_voxelCentricToNodeCentric_kernel<true> : reg_voxelCentricToNodeCentric_kernel<false>;
+    auto voxelCentricToNodeCentricKernel = is3d ? VoxelCentricToNodeCentricKernel<true> : VoxelCentricToNodeCentricKernel<false>;
     voxelCentricToNodeCentricKernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
                                                              voxelImageDims, weight, transformation, reorientation);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
-                                                    const nifti_image *controlPointImage,
-                                                    float4 *nmiGradientCuda) {
+void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ,
+                                            const nifti_image *controlPointImage,
+                                            float4 *nmiGradientCuda) {
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNmiGradientFromVoxelToRealSpace;
+    const unsigned blocks = CudaContext::GetBlockSize()->ConvertNmiGradientFromVoxelToRealSpace;
     const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
-    reg_convertNmiGradientFromVoxelToRealSpace_kernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
+    ConvertNmiGradientFromVoxelToRealSpaceKernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
 }
 /* *************************************************************** */
-void reg_gaussianSmoothing_gpu(const nifti_image *image,
-                               float4 *imageCuda,
-                               const float sigma,
-                               const bool smoothXYZ[8]) {
+void GaussianSmoothing(const nifti_image *image,
+                       float4 *imageCuda,
+                       const float sigma,
+                       const bool smoothXYZ[8]) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -139,29 +141,29 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
                 dim3 blockDims, gridDims;
                 switch (n) {
                 case 1:
-                    blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
+                    blocks = blockSize->ApplyConvolutionWindowAlongX;
                     grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
-                    reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                    ApplyConvolutionWindowAlongXKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                      kernelSize, imageDim, (unsigned)voxelNumber);
                     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
                 case 2:
-                    blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
+                    blocks = blockSize->ApplyConvolutionWindowAlongY;
                     grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
-                    reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                    ApplyConvolutionWindowAlongYKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                      kernelSize, imageDim, (unsigned)voxelNumber);
                     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
                 case 3:
-                    blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
+                    blocks = blockSize->ApplyConvolutionWindowAlongZ;
                     grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                     gridDims = dim3(grids, grids, 1);
                     blockDims = dim3(blocks, 1, 1);
-                    reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                    ApplyConvolutionWindowAlongZKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                      kernelSize, imageDim, (unsigned)voxelNumber);
                     NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                     break;
@@ -174,9 +176,9 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image,
     }
 }
 /* *************************************************************** */
-void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
-                                       float4 *imageCuda,
-                                       const float *spacingVoxel) {
+void SmoothImageForCubicSpline(const nifti_image *image,
+                               float4 *imageCuda,
+                               const float *spacingVoxel) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
     const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
@@ -215,29 +217,29 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
             dim3 blockDims, gridDims;
             switch (n) {
             case 0:
-                blocks = blockSize->reg_ApplyConvolutionWindowAlongX;
+                blocks = blockSize->ApplyConvolutionWindowAlongX;
                 grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
-                reg_applyConvolutionWindowAlongX_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                ApplyConvolutionWindowAlongXKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                  kernelSize, imageDim, (unsigned)voxelNumber);
                 NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
             case 1:
-                blocks = blockSize->reg_ApplyConvolutionWindowAlongY;
+                blocks = blockSize->ApplyConvolutionWindowAlongY;
                 grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
-                reg_applyConvolutionWindowAlongY_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                ApplyConvolutionWindowAlongYKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                  kernelSize, imageDim, (unsigned)voxelNumber);
                 NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
             case 2:
-                blocks = blockSize->reg_ApplyConvolutionWindowAlongZ;
+                blocks = blockSize->ApplyConvolutionWindowAlongZ;
                 grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
                 gridDims = dim3(grids, grids, 1);
                 blockDims = dim3(blocks, 1, 1);
-                reg_applyConvolutionWindowAlongZ_kernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
+                ApplyConvolutionWindowAlongZKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
                                                                                  kernelSize, imageDim, (unsigned)voxelNumber);
                 NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
                 break;
@@ -249,77 +251,53 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
     }
 }
 /* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
-    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
-    const dim3 gridDims = dim3(grids, grids, 1);
-    const dim3 blockDims = dim3(blocks, 1, 1);
-    reg_multiplyValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
-void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value) {
-    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
-    const dim3 gridDims = dim3(grids, grids, 1);
-    const dim3 blockDims = dim3(blocks, 1, 1);
-    reg_addValue_kernel_float4<<<gridDims, blockDims>>>(arrayCuda, value, (unsigned)count);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
-    const dim3 gridDims = dim3(grids, grids, 1);
-    const dim3 blockDims = dim3(blocks, 1, 1);
-    reg_multiplyArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+void AddValue(const size_t count, float4 *arrayCuda, const float addition) {
+    thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) {
+        val = val + addition;
+    });
 }
 /* *************************************************************** */
-void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) {
-    const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks));
-    const dim3 gridDims = dim3(grids, grids, 1);
-    const dim3 blockDims = dim3(blocks, 1, 1);
-    reg_addArrays_kernel_float4<<<gridDims, blockDims>>>(array1Cuda, array2Cuda, (unsigned)count);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier) {
+    thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) {
+        val = val * multiplier;
+    });
 }
 /* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t size) {
+float SumReduction(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t size) {
+float MaxReduction(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum<float>());
 }
 /* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t size) {
+float MinReduction(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
 }
 /* *************************************************************** */
 template<typename Operation>
-void reg_operationOnImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) {
+void OperationOnImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
     thrust::transform(thrust::device, img1Cuda, img1Cuda + voxelNumber, img2Cuda, img1Cuda, operation);
 }
 /* *************************************************************** */
-void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::plus<float4>());
+void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    OperationOnImages(img, img1Cuda, img2Cuda, thrust::plus<float4>());
 }
 /* *************************************************************** */
-void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::minus<float4>());
+void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    OperationOnImages(img, img1Cuda, img2Cuda, thrust::minus<float4>());
 }
 /* *************************************************************** */
-void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::multiplies<float4>());
+void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    OperationOnImages(img, img1Cuda, img2Cuda, thrust::multiplies<float4>());
 }
 /* *************************************************************** */
-void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides<float4>());
+void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
+    OperationOnImages(img, img1Cuda, img2Cuda, thrust::divides<float4>());
 }
 /* *************************************************************** */
 template<bool isMin>
@@ -329,7 +307,7 @@ DEVICE static inline float MinMax(const float lhs, const float rhs) {
 }
 /* *************************************************************** */
 template<bool isMin, bool isSingleTimePoint, int timePoints>
-inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda) {
+inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
     constexpr float initVal = isMin ? std::numeric_limits<float>::max() : std::numeric_limits<float>::lowest();
 
@@ -356,38 +334,40 @@ inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCud
 }
 /* *************************************************************** */
 template<bool isMin, bool isSingleTimePoint>
-inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoints) {
-    auto getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 1>;
+static inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoints) {
+    auto getMinMaxValue = GetMinMaxValue<isMin, isSingleTimePoint, 1>;
     switch (timePoints) {
     case 2:
-        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 2>;
+        getMinMaxValue = GetMinMaxValue<isMin, isSingleTimePoint, 2>;
         break;
     case 3:
-        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 3>;
+        getMinMaxValue = GetMinMaxValue<isMin, isSingleTimePoint, 3>;
         break;
     case 4:
-        getMinMaxValue = reg_getMinMaxValue_gpu<isMin, isSingleTimePoint, 4>;
+        getMinMaxValue = GetMinMaxValue<isMin, isSingleTimePoint, 4>;
         break;
     }
     return getMinMaxValue(img, imgCuda);
 }
 /* *************************************************************** */
 template<bool isMin>
-inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+static inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
     if (timePoint < -1 || timePoint >= img->nt)
         NR_FATAL_ERROR("The required time point does not exist");
     const bool isSingleTimePoint = timePoint > -1;
     const int timePoints = std::clamp(isSingleTimePoint ? timePoint + 1 : img->nt * img->nu, 1, 4);
-    auto getMinMaxValue = reg_getMinMaxValue_gpu<isMin, false>;
-    if (isSingleTimePoint) getMinMaxValue = reg_getMinMaxValue_gpu<isMin, true>;
+    auto getMinMaxValue = GetMinMaxValue<isMin, false>;
+    if (isSingleTimePoint) getMinMaxValue = GetMinMaxValue<isMin, true>;
     return getMinMaxValue(img, imgCuda, timePoints);
 }
 /* *************************************************************** */
-float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
-    return reg_getMinMaxValue_gpu<true>(img, imgCuda, timePoint);
+float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+    return GetMinMaxValue<true>(img, imgCuda, timePoint);
 }
 /* *************************************************************** */
-float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
-    return reg_getMinMaxValue_gpu<false>(img, imgCuda, timePoint);
+float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) {
+    return GetMinMaxValue<false>(img, imgCuda, timePoint);
 }
 /* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp
new file mode 100644
index 00000000..14e68a24
--- /dev/null
+++ b/reg-lib/cuda/CudaTools.hpp
@@ -0,0 +1,64 @@
+/*
+ * @file CudaTools.hpp
+ * @author Marc Modat
+ * @date 24/03/2009
+ *
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ * See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
+
+#pragma once
+
+#include "CudaCommon.hpp"
+#include "_reg_tools.h"
+
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
+                               const nifti_image *voxelImage,
+                               float4 *nodeImageCuda,
+                               float4 *voxelImageCuda,
+                               float weight,
+                               const mat44 *voxelToMillimetre = nullptr);
+/* *************************************************************** */
+void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ,
+                                            const nifti_image *controlPointImage,
+                                            float4 *nmiGradientCuda);
+/* *************************************************************** */
+void GaussianSmoothing(const nifti_image *image,
+                       float4 *imageCuda,
+                       const float sigma,
+                       const bool axisToSmooth[8]);
+/* *************************************************************** */
+void SmoothImageForCubicSpline(const nifti_image *image,
+                               float4 *imageCuda,
+                               const float *smoothingRadius);
+/* *************************************************************** */
+void AddValue(const size_t count, float4 *arrayCuda, const float value);
+/* *************************************************************** */
+void MultiplyValue(const size_t count, float4 *arrayCuda, const float value);
+/* *************************************************************** */
+float SumReduction(float *arrayCuda, const size_t size);
+/* *************************************************************** */
+float MaxReduction(float *arrayCuda, const size_t size);
+/* *************************************************************** */
+float MinReduction(float *arrayCuda, const size_t size);
+/* *************************************************************** */
+void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
+/* *************************************************************** */
+float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
+/* *************************************************************** */
+float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
old mode 100755
new mode 100644
similarity index 60%
rename from reg-lib/cuda/_reg_tools_kernels.cu
rename to reg-lib/cuda/CudaToolsKernels.cu
index b39d117a..54a415ba
--- a/reg-lib/cuda/_reg_tools_kernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -1,5 +1,5 @@
 /*
- *  _reg_tools_kernels.cu
+ *  CudaToolsKernels.cu
  *
  *  Created by Marc Modat and Pankaj Daga on 24/03/2009.
  *  Copyright (c) 2009-2018, University College London
@@ -10,16 +10,18 @@
 
 #include "_reg_common_cuda_kernels.cu"
 
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
 /* *************************************************************** */
 template<bool is3d>
-__global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
-                                                     cudaTextureObject_t voxelImageTexture,
-                                                     const unsigned nodeNumber,
-                                                     const int3 nodeImageDims,
-                                                     const int3 voxelImageDims,
-                                                     const float weight,
-                                                     const mat44 transformation,
-                                                     const mat33 reorientation) {
+__global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
+                                                cudaTextureObject_t voxelImageTexture,
+                                                const unsigned nodeNumber,
+                                                const int3 nodeImageDims,
+                                                const int3 voxelImageDims,
+                                                const float weight,
+                                                const mat44 transformation,
+                                                const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid >= nodeNumber) return;
     // Calculate the node coordinates
@@ -68,7 +70,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda,
     nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
-__global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
+__global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nodeNumber) {
         const float4 voxelGradient = gradient[tid];
@@ -80,12 +82,12 @@ __global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradie
     }
 }
 /* *************************************************************** */
-__global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage,
-                                                        cudaTextureObject_t imageTexture,
-                                                        cudaTextureObject_t kernelTexture,
-                                                        const int kernelSize,
-                                                        const int3 imageSize,
-                                                        const unsigned voxelNumber) {
+__global__ void ApplyConvolutionWindowAlongXKernel(float4 *smoothedImage,
+                                                   cudaTextureObject_t imageTexture,
+                                                   cudaTextureObject_t kernelTexture,
+                                                   const int kernelSize,
+                                                   const int3 imageSize,
+                                                   const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -125,12 +127,12 @@ __global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage,
-                                                        cudaTextureObject_t imageTexture,
-                                                        cudaTextureObject_t kernelTexture,
-                                                        const int kernelSize,
-                                                        const int3 imageSize,
-                                                        const unsigned voxelNumber) {
+__global__ void ApplyConvolutionWindowAlongYKernel(float4 *smoothedImage,
+                                                   cudaTextureObject_t imageTexture,
+                                                   cudaTextureObject_t kernelTexture,
+                                                   const int kernelSize,
+                                                   const int3 imageSize,
+                                                   const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
@@ -169,12 +171,12 @@ __global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage,
-                                                        cudaTextureObject_t imageTexture,
-                                                        cudaTextureObject_t kernelTexture,
-                                                        const int kernelSize,
-                                                        const int3 imageSize,
-                                                        const unsigned voxelNumber) {
+__global__ void ApplyConvolutionWindowAlongZKernel(float4 *smoothedImage,
+                                                   cudaTextureObject_t imageTexture,
+                                                   cudaTextureObject_t kernelTexture,
+                                                   const int kernelSize,
+                                                   const int3 imageSize,
+                                                   const unsigned voxelNumber) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int z = (int)tid / (imageSize.x * imageSize.y);
@@ -211,61 +213,5 @@ __global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage,
     }
 }
 /* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float(float *array, const float value, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count)
-        array[tid] *= value;
-}
-/* *************************************************************** */
-__global__ void reg_multiplyValue_kernel_float4(float4 *array, const float value, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count) {
-        const float4 temp = array[tid];
-        array[tid] = make_float4(temp.x * value, temp.y * value, temp.z * value, temp.w * value);
-    }
-}
-/* *************************************************************** */
-__global__ void reg_addValue_kernel_float(float *array, const float value, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count)
-        array[tid] += value;
-}
-/* *************************************************************** */
-__global__ void reg_addValue_kernel_float4(float4 *array, const float value, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count) {
-        const float4 temp = array[tid];
-        array[tid] = make_float4(temp.x + value, temp.y + value, temp.z + value, temp.w + value);
-    }
-}
-/* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float(float *array1, float *array2, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count)
-        array1[tid] *= array2[tid];
-}
-/* *************************************************************** */
-__global__ void reg_multiplyArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count) {
-        const float4 a = array1[tid];
-        const float4 b = array2[tid];
-        array1[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-    }
-}
-/* *************************************************************** */
-__global__ void reg_addArrays_kernel_float(float *array1, float *array2, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count)
-        array1[tid] += array2[tid];
-}
-/* *************************************************************** */
-__global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < count) {
-        const float4 a = array1[tid];
-        const float4 b = array2[tid];
-        array1[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-    }
-}
+} // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h
index 23bd6fd5..a9b07e56 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.h
+++ b/reg-lib/cuda/_reg_ssd_gpu.h
@@ -12,7 +12,7 @@
 
 #pragma once
 
-#include "_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 #include "_reg_measure_gpu.h"
 #include "_reg_ssd.h"
 
diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h
deleted file mode 100755
index 7cbb1e8a..00000000
--- a/reg-lib/cuda/_reg_tools_gpu.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * @file _reg_tools_gpu.h
- * @author Marc Modat
- * @date 24/03/2009
- *
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "CudaCommon.hpp"
-#include "_reg_tools.h"
-
-/* *************************************************************** */
-void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage,
-                                       const nifti_image *voxelImage,
-                                       float4 *nodeImageCuda,
-                                       float4 *voxelImageCuda,
-                                       float weight,
-                                       const mat44 *voxelToMillimetre = nullptr);
-/* *************************************************************** */
-void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ,
-                                                    const nifti_image *controlPointImage,
-                                                    float4 *nmiGradientCuda);
-/* *************************************************************** */
-void reg_gaussianSmoothing_gpu(const nifti_image *image,
-                               float4 *imageCuda,
-                               const float sigma,
-                               const bool axisToSmooth[8]);
-/* *************************************************************** */
-void reg_smoothImageForCubicSpline_gpu(const nifti_image *image,
-                                       float4 *imageCuda,
-                                       const float *smoothingRadius);
-/* *************************************************************** */
-void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value);
-/* *************************************************************** */
-void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value);
-/* *************************************************************** */
-void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
-/* *************************************************************** */
-void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda);
-/* *************************************************************** */
-float reg_sumReduction_gpu(float *arrayCuda, const size_t size);
-/* *************************************************************** */
-float reg_maxReduction_gpu(float *arrayCuda, const size_t size);
-/* *************************************************************** */
-float reg_minReduction_gpu(float *arrayCuda, const size_t size);
-/* *************************************************************** */
-void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
-void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
-void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
-void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
-float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
-/* *************************************************************** */
-float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
-/* *************************************************************** */
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index 9c3a5937..a9ec43a1 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -1,4 +1,4 @@
-#include"_reg_tools_gpu.h"
+#include "CudaTools.hpp"
 
 /* *************************************************************** */
 __device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx)

From a73014e514c17996323e6d464ceac3fdaa5f61af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 9 Jan 2024 12:29:02 +0000
Subject: [PATCH 262/314] Optimise Cuda::VoxelCentricToNodeCentric() #92

---
 niftyreg_build_version.txt       |  2 +-
 reg-lib/cuda/BlockSize.hpp       |  6 ------
 reg-lib/cuda/CudaCompute.cu      | 15 +++++++++------
 reg-lib/cuda/CudaTools.cu        | 18 ++++++++----------
 reg-lib/cuda/CudaTools.hpp       |  1 +
 reg-lib/cuda/CudaToolsKernels.cu | 12 +++++-------
 6 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c2f53117..fae51388 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-380
+381
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index c72420e8..a0d2ea14 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -17,7 +17,6 @@ struct BlockSize {
     unsigned reg_affine_getDeformationField;
     unsigned GetApproxJacobianValues2d;
     unsigned GetApproxJacobianValues3d;
-    unsigned ApproxLinearEnergyGradient;
     unsigned GetJacobianValues2d;
     unsigned GetJacobianValues3d;
     unsigned LogSquaredValues;
@@ -30,7 +29,6 @@ struct BlockSize {
     unsigned DefFieldCompose2d;
     unsigned DefFieldCompose3d;
     unsigned GetJacobianMatrix;
-    unsigned VoxelCentricToNodeCentric;
     unsigned ConvertNmiGradientFromVoxelToRealSpace;
     unsigned ApplyConvolutionWindowAlongX;
     unsigned ApplyConvolutionWindowAlongY;
@@ -42,7 +40,6 @@ struct BlockSize100: public BlockSize {
         reg_affine_getDeformationField = 512; // 16 reg - 24 smem
         GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem
         GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem
-        ApproxLinearEnergyGradient = 384; // 40 reg
         GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
         GetJacobianValues3d = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem
         LogSquaredValues = 384; // 07 reg - 24 smem - 36 cmem
@@ -55,7 +52,6 @@ struct BlockSize100: public BlockSize {
         DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
         DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-        VoxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem
         ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
         ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
@@ -69,7 +65,6 @@ struct BlockSize300: public BlockSize {
         reg_affine_getDeformationField = 1024; // 23 reg
         GetApproxJacobianValues2d = 768; // 34 reg
         GetApproxJacobianValues3d = 640; // 46 reg
-        ApproxLinearEnergyGradient = 768; // 40 reg
         GetJacobianValues2d = 768; // 34 reg
         GetJacobianValues3d = 768; // 34 reg
         LogSquaredValues = 1024; // 23 reg
@@ -82,7 +77,6 @@ struct BlockSize300: public BlockSize {
         DefFieldCompose2d = 1024; // 23 reg
         DefFieldCompose3d = 1024; // 24 reg
         GetJacobianMatrix = 768; // 34 reg
-        VoxelCentricToNodeCentric = 1024; // 23 reg
         ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         ApplyConvolutionWindowAlongX = 1024; // 25 reg
         ApplyConvolutionWindowAlongY = 1024; // 25 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 5d663a4f..7b49be10 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -268,12 +268,15 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
-    Cuda::VoxelCentricToNodeCentric(con.F3dContent::GetTransformationGradient(),
-                                    con.F3dContent::GetVoxelBasedMeasureGradient(),
-                                    con.GetTransformationGradientCuda(),
-                                    con.GetVoxelBasedMeasureGradientCuda(),
-                                    weight,
-                                    reorientation);
+    const nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
+    auto voxelCentricToNodeCentric = transGrad->nz > 1 ? Cuda::VoxelCentricToNodeCentric<true> :
+                                                         Cuda::VoxelCentricToNodeCentric<false>;
+    voxelCentricToNodeCentric(transGrad,
+                              con.F3dContent::GetVoxelBasedMeasureGradient(),
+                              con.GetTransformationGradientCuda(),
+                              con.GetVoxelBasedMeasureGradientCuda(),
+                              weight,
+                              reorientation);
 }
 /* *************************************************************** */
 void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu
index a8ee68ad..c84cf344 100644
--- a/reg-lib/cuda/CudaTools.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -17,18 +17,19 @@
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
+template<bool is3d>
 void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
                                const nifti_image *voxelImage,
                                float4 *nodeImageCuda,
                                float4 *voxelImageCuda,
                                float weight,
                                const mat44 *voxelToMillimetre) {
-    const bool is3d = nodeImage->nz > 1;
     const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3);
     const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz);
     const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz);
-    auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto voxelImageTexturePtr = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto voxelImageTexture = *voxelImageTexturePtr;
 
     // The transformation between the image and the grid
     mat44 transformation;
@@ -69,15 +70,12 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
         weight *= ratio[i];
     }
 
-    const unsigned blocks = CudaContext::GetBlockSize()->VoxelCentricToNodeCentric;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    auto voxelCentricToNodeCentricKernel = is3d ? VoxelCentricToNodeCentricKernel<true> : VoxelCentricToNodeCentricKernel<false>;
-    voxelCentricToNodeCentricKernel<<<gridDims, blockDims>>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims,
-                                                             voxelImageDims, weight, transformation, reorientation);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nodeNumber, [=]__device__(const int index) {
+        VoxelCentricToNodeCentricKernel<is3d>(nodeImageCuda, voxelImageTexture, nodeImageDims, voxelImageDims, weight, transformation, reorientation, index);
+    });
 }
+template void VoxelCentricToNodeCentric<false>(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*);
+template void VoxelCentricToNodeCentric<true>(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*);
 /* *************************************************************** */
 void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ,
                                             const nifti_image *controlPointImage,
diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp
index 14e68a24..8dfcbf6d 100644
--- a/reg-lib/cuda/CudaTools.hpp
+++ b/reg-lib/cuda/CudaTools.hpp
@@ -18,6 +18,7 @@
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
+template<bool is3d>
 void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
                                const nifti_image *voxelImage,
                                float4 *nodeImageCuda,
diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
index 54a415ba..fc38446e 100644
--- a/reg-lib/cuda/CudaToolsKernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -14,18 +14,16 @@
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
 template<bool is3d>
-__global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
+__device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
                                                 cudaTextureObject_t voxelImageTexture,
-                                                const unsigned nodeNumber,
                                                 const int3 nodeImageDims,
                                                 const int3 voxelImageDims,
                                                 const float weight,
                                                 const mat44 transformation,
-                                                const mat33 reorientation) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid >= nodeNumber) return;
+                                                const mat33 reorientation,
+                                                const int index) {
     // Calculate the node coordinates
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(tid, nodeImageDims);
+    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, nodeImageDims);
     // Transform into voxel coordinates
     float voxelCoord[3], nodeCoord[3] = { static_cast<float>(x), static_cast<float>(y), static_cast<float>(z) };
     reg_mat44_mul_cuda<is3d>(transformation, nodeCoord, voxelCoord);
@@ -67,7 +65,7 @@ __global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
 
     float reorientedValue[3];
     reg_mat33_mul_cuda<is3d>(reorientation, interpolatedValue, weight, reorientedValue);
-    nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
+    nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
 __global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {

From 92ec3cef24d394735b28536bc84f90fbfc6220c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 10 Jan 2024 12:55:06 +0000
Subject: [PATCH 263/314] Optimise Cuda::DefFieldCompose() #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/BlockSize.hpp                    |   6 -
 reg-lib/cuda/CudaCompute.cu                   |   3 +-
 reg-lib/cuda/CudaLocalTransformation.cu       |  32 ++---
 reg-lib/cuda/CudaLocalTransformation.hpp      |   1 +
 .../cuda/CudaLocalTransformationKernels.cu    | 110 ++++++++----------
 6 files changed, 61 insertions(+), 93 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index fae51388..77851f13 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-381
+382
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index a0d2ea14..6338cf87 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -26,8 +26,6 @@ struct BlockSize {
     unsigned ComputeJacGradient3d;
     unsigned ApproxCorrectFolding3d;
     unsigned CorrectFolding3d;
-    unsigned DefFieldCompose2d;
-    unsigned DefFieldCompose3d;
     unsigned GetJacobianMatrix;
     unsigned ConvertNmiGradientFromVoxelToRealSpace;
     unsigned ApplyConvolutionWindowAlongX;
@@ -49,8 +47,6 @@ struct BlockSize100: public BlockSize {
         ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem
         ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem
         CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem
-        DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem
-        DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem
         GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
         ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
         ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
@@ -74,8 +70,6 @@ struct BlockSize300: public BlockSize {
         ComputeJacGradient3d = 768; // 37 reg
         ApproxCorrectFolding3d = 768; // 34 reg
         CorrectFolding3d = 768; // 34 reg
-        DefFieldCompose2d = 1024; // 23 reg
-        DefFieldCompose3d = 1024; // 24 reg
         GetJacobianMatrix = 768; // 34 reg
         ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
         ApplyConvolutionWindowAlongX = 1024; // 25 reg
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 7b49be10..c81a0e97 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -324,6 +324,7 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
     thrust::device_vector<float4> defFieldCuda(voxelNumber);
     Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
-    Cuda::DefFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
+    auto defFieldCompose = defField->nz > 1 ? Cuda::DefFieldCompose<true> : Cuda::DefFieldCompose<false>;
+    defFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 3c1ff918..20d2c471 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -634,33 +634,20 @@ void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     velocityFieldGrid->num_ext = oldNumExt;
 }
 /* *************************************************************** */
+template<bool is3d>
 void DefFieldCompose(const nifti_image *deformationField,
                      const float4 *deformationFieldCuda,
-                     float4 *deformationFieldCudaOut) {
-    auto blockSize = CudaContext::GetBlockSize();
+                     float4 *deformationFieldOutCuda) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-    const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz };
+    const int3 referenceImageDims{ deformationField->nx, deformationField->ny, deformationField->nz };
     const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk;
     const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz;
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexture = *deformationFieldTexturePtr;
 
-    if (deformationField->nz > 1) {
-        const unsigned blocks = blockSize->DefFieldCompose3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        DefFieldCompose3d<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
-                                                   (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    } else {
-        const unsigned blocks = blockSize->DefFieldCompose2d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-        const dim3 gridDims(grids, grids, 1);
-        const dim3 blockDims(blocks, 1, 1);
-        DefFieldCompose2d<<<gridDims, blockDims>>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim,
-                                                   (unsigned)voxelNumber, affineMatrixB, affineMatrixC);
-        NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-    }
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) {
+        DefFieldComposeKernel<is3d>(deformationFieldOutCuda, deformationFieldTexture, referenceImageDims, affineMatrixB, affineMatrixC, index);
+    });
 }
 /* *************************************************************** */
 void GetDeformationFieldFromFlowField(nifti_image *flowField,
@@ -725,9 +712,10 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
     thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
 
     // The deformation field is squared
+    auto defFieldCompose = deformationField->nz > 1 ? DefFieldCompose<true> : DefFieldCompose<false>;
     for (int i = 0; i < squaringNumber; ++i) {
         // The deformation field is applied to itself
-        DefFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda);
+        defFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda);
         // The computed scaled deformation field is copied over
         thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
         NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
index 90a13749..8e718822 100644
--- a/reg-lib/cuda/CudaLocalTransformation.hpp
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -56,6 +56,7 @@ double CorrectFolding(const nifti_image *referenceImage,
                       float4 *controlPointImageCuda,
                       const bool approx);
 /* *************************************************************** */
+template<bool is3d>
 void DefFieldCompose(const nifti_image *deformationField,
                      const float4 *deformationFieldCuda,
                      float4 *deformationFieldOutCuda);
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index ef900936..af983f9b 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -1057,67 +1057,22 @@ __global__ void CorrectFolding3d(float4 *controlPointGrid,
     }
 }
 /* *************************************************************** */
-__global__ void DefFieldCompose2d(float4 *deformationField,
-                                  cudaTextureObject_t deformationFieldTexture,
-                                  const int3 referenceImageDim,
-                                  const unsigned voxelNumber,
-                                  const mat44 affineMatrixB,
-                                  const mat44 affineMatrixC) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        // Extract the original voxel position
-        float4 position = deformationField[tid];
-
-        // Conversion from real position to voxel coordinate
-        const float4 voxelPosition{
-            position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3],
-            position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3],
-            0.f,
-            0.f
-        };
-
-        // Linear interpolation
-        const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) };
-        float relX[2], relY[2];
-        relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
-        relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
-
-        position = make_float4(0.f, 0.f, 0.f, 0.f);
-        for (short b = 0; b < 2; ++b) {
-            for (short a = 0; a < 2; ++a) {
-                float4 deformation;
-                if (-1 < ante.x + a && ante.x + a < referenceImageDim.x &&
-                    -1 < ante.y + b && ante.y + b < referenceImageDim.y) {
-                    const int index = (ante.y + b) * referenceImageDim.x + ante.x + a;
-                    deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
-                } else {
-                    deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDim, affineMatrixC);
-                }
-                const float basis = relX[a] * relY[b];
-                position = position + basis * deformation;
-            }
-        }
-        deformationField[tid] = position;
-    }
-}
-/* *************************************************************** */
-__global__ void DefFieldCompose3d(float4 *deformationField,
-                                  cudaTextureObject_t deformationFieldTexture,
-                                  const int3 referenceImageDim,
-                                  const unsigned voxelNumber,
-                                  const mat44 affineMatrixB,
-                                  const mat44 affineMatrixC) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        // Extract the original voxel position
-        float4 position = deformationField[tid];
+template<bool is3d>
+__device__ void DefFieldComposeKernel(float4 *deformationField,
+                                      cudaTextureObject_t deformationFieldTexture,
+                                      const int3 referenceImageDims,
+                                      const mat44 affineMatrixB,
+                                      const mat44 affineMatrixC,
+                                      const int index) {
+    // Extract the original voxel position
+    float4 position = deformationField[index];
 
+    if constexpr (is3d) {
         // Conversion from real position to voxel coordinate
-        const float4 voxelPosition{
+        const float3 voxelPosition{
             position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3],
             position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3],
-            position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3],
-            0.f
+            position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3]
         };
 
         // Linear interpolation
@@ -1132,21 +1087,50 @@ __global__ void DefFieldCompose3d(float4 *deformationField,
             for (short b = 0; b < 2; ++b) {
                 for (short a = 0; a < 2; ++a) {
                     float4 deformation;
-                    if (-1 < ante.x + a && ante.x + a < referenceImageDim.x &&
-                        -1 < ante.y + b && ante.y + b < referenceImageDim.y &&
-                        -1 < ante.z + c && ante.z + c < referenceImageDim.z) {
-                        const int index = ((ante.z + c) * referenceImageDim.y + ante.y + b) * referenceImageDim.x + ante.x + a;
+                    if (-1 < ante.x + a && ante.x + a < referenceImageDims.x &&
+                        -1 < ante.y + b && ante.y + b < referenceImageDims.y &&
+                        -1 < ante.z + c && ante.z + c < referenceImageDims.z) {
+                        const int index = ((ante.z + c) * referenceImageDims.y + ante.y + b) * referenceImageDims.x + ante.x + a;
                         deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
                     } else {
-                        deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDim, affineMatrixC);
+                        deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDims, affineMatrixC);
                     }
                     const float basis = relX[a] * relY[b] * relZ[c];
                     position = position + basis * deformation;
                 }
             }
         }
-        deformationField[tid] = position;
+    } else {
+        // Conversion from real position to voxel coordinate
+        const float2 voxelPosition{
+            position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3],
+            position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3]
+        };
+
+        // Linear interpolation
+        const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) };
+        float relX[2], relY[2];
+        relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
+        relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
+
+        position = make_float4(0.f, 0.f, 0.f, 0.f);
+        for (short b = 0; b < 2; ++b) {
+            for (short a = 0; a < 2; ++a) {
+                float4 deformation;
+                if (-1 < ante.x + a && ante.x + a < referenceImageDims.x &&
+                    -1 < ante.y + b && ante.y + b < referenceImageDims.y) {
+                    const int index = (ante.y + b) * referenceImageDims.x + ante.x + a;
+                    deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
+                } else {
+                    deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDims, affineMatrixC);
+                }
+                const float basis = relX[a] * relY[b];
+                position = position + basis * deformation;
+            }
+        }
     }
+
+    deformationField[index] = position;
 }
 /* *************************************************************** */
 __global__ void GetJacobianMatrix3d(float *jacobianMatrices,

From 4c60059bd446a95a1fffebf1f71cf6896feafd02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 15 Jan 2024 12:19:09 +0000
Subject: [PATCH 264/314] Implement CudaCompute::SymmetriseVelocityFields() #92

---
 niftyreg_build_version.txt  |  2 +-
 reg-lib/cuda/CudaCompute.cu | 37 +++++++++++++++++++++++++++++++------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 77851f13..f1386578 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-382
+383
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index c81a0e97..43fb41d2 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -311,12 +311,37 @@ void CudaCompute::BchUpdate(float scale, int bchUpdateValue) {
 }
 /* *************************************************************** */
 void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    Compute::SymmetriseVelocityFields(conBwIn);
-    // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent&>(con).UpdateControlPointGrid();
-    dynamic_cast<CudaF3dContent&>(conBwIn).UpdateControlPointGrid();
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    CudaF3dContent& conBw = dynamic_cast<CudaF3dContent&>(conBwIn);
+
+    nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
+    float4 *controlPointGridCuda = con.GetControlPointGridCuda();
+    float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda();
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+
+    // In order to ensure symmetry, the forward and backward velocity fields
+    // are averaged in both image spaces: reference and floating
+
+    // Both parametrisations are converted into displacement
+    Cuda::GetDisplacementFromDeformation(controlPointGrid, controlPointGridCuda);
+    Cuda::GetDisplacementFromDeformation(controlPointGridBw, controlPointGridBwCuda);
+
+    // Backup the backward displacement field
+    thrust::device_ptr<float4> controlPointGridBwCudaPtr(controlPointGridBwCuda);
+    thrust::device_vector<float4> controlPointGridBwOrgCudaVec(controlPointGridBwCudaPtr, controlPointGridBwCudaPtr + voxelNumber);
+
+    // Both parametrisations are subtracted (sum and negation)
+    Cuda::SubtractImages(controlPointGridBw, controlPointGridBwCuda, controlPointGridCuda);
+    Cuda::SubtractImages(controlPointGrid, controlPointGridCuda, controlPointGridBwOrgCudaVec.data().get());
+
+    // Divide by 2
+    Cuda::MultiplyValue(voxelNumber, controlPointGridCuda, 0.5f);
+    Cuda::MultiplyValue(voxelNumber, controlPointGridBwCuda, 0.5f);
+
+    // Convert the velocity field from displacement to deformation
+    Cuda::GetDeformationFromDisplacement(controlPointGrid, controlPointGridCuda);
+    Cuda::GetDeformationFromDisplacement(controlPointGridBw, controlPointGridBwCuda);
 }
 /* *************************************************************** */
 void CudaCompute::DefFieldCompose(const nifti_image *defField) {

From 540f10b13c4785f9d9169b9c345ee35f0a5989d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 15 Jan 2024 17:34:19 +0000
Subject: [PATCH 265/314] Add symmetrise velocity fields regression test #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       |   1 +
 ...reg_test_regr_symmetriseVelocityFields.cpp | 158 ++++++++++++++++++
 3 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_regr_symmetriseVelocityFields.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f1386578..e45b99e9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-383
+384
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index b08293d5..1e0304ab 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -130,6 +130,7 @@ if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST})
 endif(USE_CUDA)
 
 foreach(EXEC ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_symmetriseVelocityFields.cpp b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp
new file mode 100644
index 00000000..d7149814
--- /dev/null
+++ b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp
@@ -0,0 +1,158 @@
+#include "reg_test_common.h"
+#include "CudaF3dContent.h"
+
+/**
+ *  Symmetrise velocity fields regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class SymmetriseVelocityFieldsTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    SymmetriseVelocityFieldsTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(-1, 1);
+
+        // Create 2D and 3D reference images
+        constexpr NiftiImage::dim_t dimSize = 4;
+        NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+
+        // Create 2D and 3D control point grids
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGridBw2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        NiftiImage controlPointGridBw3d = CreateControlPointGrid(reference3d);
+
+        // Add random values to the control point grid coefficients
+        // No += or + operator for RNifti::NiftiImageData:Element
+        // so reverting to old school for now
+        float *cpp2dPtr = static_cast<float*>(controlPointGrid2d->data);
+        float *cpp2dBwPtr = static_cast<float*>(controlPointGridBw2d->data);
+        float *cpp3dPtr = static_cast<float*>(controlPointGrid3d->data);
+        float *cpp3dBwPtr = static_cast<float*>(controlPointGridBw3d->data);
+        for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) {
+            cpp2dPtr[i] += distr(gen);
+            cpp2dBwPtr[i] += distr(gen);
+        }
+        for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) {
+            cpp3dPtr[i] += distr(gen);
+            cpp3dBwPtr[i] += distr(gen);
+        }
+
+        // Create the affine matrices and fill them with random values
+        std::array<mat44, 2> matrices{};
+        for (int i = 0; i < matrices.size(); ++i)
+            for (int j = 0; j < 4; ++j)
+                for (int k = 0; k < 4; ++k)
+                    matrices[i].m[j][k] = j == k ? distr(gen) : 0;
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d),
+            std::move(controlPointGridBw2d)
+        ));
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d),
+            std::move(controlPointGridBw3d)
+        ));
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Make a copy of the test data
+            auto [testName, reference, controlPointGrid, controlPointGridBw] = testData;
+
+            // Set the affine matrices
+            controlPointGrid->sform_code = 0;
+            controlPointGrid->qto_xyz = matrices[0];
+            controlPointGridBw->sform_code = 1;
+            controlPointGridBw->sto_xyz = matrices[1];
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid);
+            NiftiImage cppBwCpu(controlPointGrid), cppBwCuda(controlPointGrid);
+
+            // Create the content
+            unique_ptr<F3dContent> contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) };
+            unique_ptr<F3dContent> contentBwCpu{ new F3dContent(referenceCpu, referenceCpu, cppBwCpu) };
+            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) };
+            unique_ptr<F3dContent> contentBwCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppBwCuda) };
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+            // Symmetrise the velocity fields
+            computeCpu->SymmetriseVelocityFields(*contentBwCpu);
+            computeCuda->SymmetriseVelocityFields(*contentBwCuda);
+
+            // Get the results of CUDA since CPU results are already inplace
+            contentCuda->GetControlPointGrid();
+            contentBwCuda->GetControlPointGrid();
+
+            // Save for testing
+            testCases.push_back({ testName, std::move(cppCpu), std::move(cppBwCpu), std::move(cppCuda), std::move(cppBwCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(SymmetriseVelocityFieldsTest, "Regression Symmetrise Velocity Fields", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [sectionName, cppCpu, cppBwCpu, cppCuda, cppBwCuda] = testCase;
+
+        SECTION(sectionName) {
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the results
+            const auto cppCpuPtr = cppCpu.data();
+            const auto cppBwCpuPtr = cppBwCpu.data();
+            const auto cppCudaPtr = cppCuda.data();
+            const auto cppBwCudaPtr = cppBwCuda.data();
+            for (size_t i = 0; i < cppCpu.nVoxels(); i++) {
+                const float cppCpuVal = cppCpuPtr[i];
+                const float cppCudaVal = cppCudaPtr[i];
+                const float diff = abs(cppCpuVal - cppCudaVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | CPU=" << cppCpuVal;
+                    NR_COUT << " | CUDA=" << cppCudaVal << std::endl;
+                }
+                REQUIRE(diff == 0);
+                // Check the results of the backwards
+                const float cppBwCpuVal = cppBwCpuPtr[i];
+                const float cppBwCudaVal = cppBwCudaPtr[i];
+                const float diffBw = abs(cppBwCpuVal - cppBwCudaVal);
+                if (diffBw > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diffBw=" << diffBw;
+                    NR_COUT << " | CPU=" << cppBwCpuVal;
+                    NR_COUT << " | CUDA=" << cppBwCudaVal << std::endl;
+                }
+                REQUIRE(diffBw == 0);
+            }
+        }
+    }
+}

From b34de37eecec313f96f6d506c700f2fc6c2fc5eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 16 Jan 2024 12:24:41 +0000
Subject: [PATCH 266/314] Implement CudaCompute::UpdateVelocityField() #92

---
 niftyreg_build_version.txt  |  2 +-
 reg-lib/cuda/CudaCompute.cu | 25 +++++++++++++++++++-----
 reg-lib/cuda/CudaCompute.h  |  1 +
 reg-lib/cuda/CudaTools.cu   | 39 +++++++++++++++++++++++++++++++++++++
 reg-lib/cuda/CudaTools.hpp  |  8 ++++++++
 5 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e45b99e9..df90c3c7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-384
+385
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 43fb41d2..f90f4b11 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -294,12 +294,27 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) {
     dynamic_cast<CudaDefContent&>(con).UpdateVoxelBasedMeasureGradient();
 }
 /* *************************************************************** */
+Cuda::UniquePtr<float4> CudaCompute::ScaleGradient(const float4 *transGradCuda, const size_t voxelNumber, const float scale) {
+    float4 *scaledGradient;
+    Cuda::Allocate(&scaledGradient, voxelNumber);
+    Cuda::MultiplyValue(voxelNumber, transGradCuda, scaledGradient, scale);
+    return Cuda::UniquePtr<float4>(scaledGradient);
+}
+/* *************************************************************** */
 void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    Compute::UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
-    // Transfer the data back to the CUDA device
-    dynamic_cast<CudaF3dContent&>(con).UpdateControlPointGrid();
+    if (!optimiseX && !optimiseY && !optimiseZ) return;
+
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    auto scaledGradientCudaPtr = ScaleGradient(con.GetTransformationGradientCuda(), voxelNumber, scale);
+
+    // Reset the gradient along the axes if appropriate
+    if (controlPointGrid->nu < 3) optimiseZ = true;
+    Cuda::SetGradientToZero(scaledGradientCudaPtr.get(), voxelNumber, !optimiseX, !optimiseY, !optimiseZ);
+
+    // Update the velocity field
+    Cuda::AddImages(controlPointGrid, con.GetControlPointGridCuda(), scaledGradientCudaPtr.get());
 }
 /* *************************************************************** */
 void CudaCompute::BchUpdate(float scale, int bchUpdateValue) {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 3aa8bec5..51a38e29 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -39,4 +39,5 @@ class CudaCompute: public Compute {
 
 private:
     void ConvolveImage(const nifti_image*, float4*);
+    Cuda::UniquePtr<float4> ScaleGradient(const float4*, const size_t, const float);
 };
diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu
index c84cf344..4a48d26b 100644
--- a/reg-lib/cuda/CudaTools.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -261,6 +261,15 @@ void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier
     });
 }
 /* *************************************************************** */
+void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float multiplier) {
+    auto arrayTexturePtr = Cuda::CreateTextureObject(arrayCuda, count, cudaChannelFormatKindFloat, 4);
+    auto arrayTexture = *arrayTexturePtr;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), count, [=]__device__(const int index) {
+        float4 val = tex1Dfetch<float4>(arrayTexture, index);
+        arrayOutCuda[index] = val * multiplier;
+    });
+}
+/* *************************************************************** */
 float SumReduction(float *arrayCuda, const size_t size) {
     thrust::device_ptr<float> dptr(arrayCuda);
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
@@ -367,5 +376,35 @@ float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timeP
     return GetMinMaxValue<false>(img, imgCuda, timePoint);
 }
 /* *************************************************************** */
+template<bool xAxis, bool yAxis, bool zAxis>
+void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber) {
+    auto gradTexturePtr = Cuda::CreateTextureObject(gradCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto gradTexture = *gradTexturePtr;
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [gradCuda, gradTexture]__device__(const int index) {
+        if constexpr (xAxis && yAxis && zAxis) {
+            gradCuda[index] = make_float4(0.f, 0.f, 0.f, 0.f);
+        } else {
+            float4 val = tex1Dfetch<float4>(gradTexture, index);
+            if constexpr (xAxis) val.x = 0;
+            if constexpr (yAxis) val.y = 0;
+            if constexpr (zAxis) val.z = 0;
+            gradCuda[index] = val;
+        }
+    });
+}
+/* *************************************************************** */
+void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xAxis, const bool yAxis, const bool zAxis) {
+    if (!xAxis && !yAxis && !zAxis) return;
+    decltype(SetGradientToZero<true, true, true>) *setGradientToZero;
+    if (xAxis && yAxis && zAxis) setGradientToZero = SetGradientToZero<true, true, true>;
+    else if (xAxis && yAxis) setGradientToZero = SetGradientToZero<true, true, false>;
+    else if (xAxis && zAxis) setGradientToZero = SetGradientToZero<true, false, true>;
+    else if (yAxis && zAxis) setGradientToZero = SetGradientToZero<false, true, true>;
+    else if (xAxis) setGradientToZero = SetGradientToZero<true, false, false>;
+    else if (yAxis) setGradientToZero = SetGradientToZero<false, true, false>;
+    else if (zAxis) setGradientToZero = SetGradientToZero<false, false, true>;
+    setGradientToZero(gradCuda, voxelNumber);
+}
+/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp
index 8dfcbf6d..010e3017 100644
--- a/reg-lib/cuda/CudaTools.hpp
+++ b/reg-lib/cuda/CudaTools.hpp
@@ -43,6 +43,8 @@ void AddValue(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
 void MultiplyValue(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
+void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float value);
+/* *************************************************************** */
 float SumReduction(float *arrayCuda, const size_t size);
 /* *************************************************************** */
 float MaxReduction(float *arrayCuda, const size_t size);
@@ -61,5 +63,11 @@ float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timeP
 /* *************************************************************** */
 float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
 /* *************************************************************** */
+void SetGradientToZero(float4 *gradCuda,
+                       const size_t voxelNumber,
+                       const bool xAxis,
+                       const bool yAxis,
+                       const bool zAxis);
+/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */

From 0ddb72b61d25cd81b5362edd9401046bbb271e0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 16 Jan 2024 13:32:53 +0000
Subject: [PATCH 267/314] Add update velocity field regression test #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       |   1 +
 .../reg_test_regr_updateVelocityField.cpp     | 140 ++++++++++++++++++
 3 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 reg-test/reg_test_regr_updateVelocityField.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index df90c3c7..aeccadf7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-385
+386
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 1e0304ab..b04cdedc 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -131,6 +131,7 @@ if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_updateVelocityField ${EXEC_LIST})
 endif(USE_CUDA)
 
 foreach(EXEC ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_updateVelocityField.cpp b/reg-test/reg_test_regr_updateVelocityField.cpp
new file mode 100644
index 00000000..48a1aefb
--- /dev/null
+++ b/reg-test/reg_test_regr_updateVelocityField.cpp
@@ -0,0 +1,140 @@
+#include "reg_test_common.h"
+#include "CudaF3dContent.h"
+
+/**
+ *  Update velocity field regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class UpdateVelocityFieldTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, float>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    UpdateVelocityFieldTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(-1, 1);
+
+        // Create 2D and 3D reference images
+        constexpr NiftiImage::dim_t dimSize = 4;
+        NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+
+        // Create 2D and 3D control point grids
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+
+        // Create transformation gradient images and fill them with random values
+        NiftiImage transGrad2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage transGrad3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData);
+        auto transGrad2dPtr = transGrad2d.data();
+        auto transGrad3dPtr = transGrad3d.data();
+        for (size_t i = 0; i < transGrad2d.nVoxels(); i++)
+            transGrad2dPtr[i] = distr(gen);
+        for (size_t i = 0; i < transGrad3d.nVoxels(); i++)
+            transGrad3dPtr[i] = distr(gen);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(controlPointGrid2d),
+            std::move(transGrad2d),
+            distr(gen)  // scale
+        ));
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(controlPointGrid3d),
+            std::move(transGrad3d),
+            distr(gen)  // scale
+        ));
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            for (int optimiseX = 0; optimiseX < 2; optimiseX++) {
+                for (int optimiseY = 0; optimiseY < 2; optimiseY++) {
+                    for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) {
+                        // Get the test data
+                        auto&& [testName, reference, controlPointGrid, transGrad, scale] = testData;
+                        testName += " scale=" + std::to_string(scale) + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ");
+
+                        // Create images
+                        NiftiImage referenceCpu(reference), referenceCuda(reference);
+                        NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid);
+
+                        // Create the content
+                        unique_ptr<F3dContent> contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) };
+                        unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) };
+
+                        // Set the transformation gradient image to host the computation
+                        NiftiImage transGradCpu = contentCpu->GetTransformationGradient();
+                        transGradCpu.copyData(transGrad);
+                        transGradCpu.disown();
+                        contentCpu->UpdateTransformationGradient();
+                        NiftiImage transGradCuda = contentCuda->GetTransformationGradient();
+                        transGradCuda.copyData(transGrad);
+                        transGradCuda.disown();
+                        contentCuda->UpdateTransformationGradient();
+
+                        // Create the computes
+                        unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+                        unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+                        // Update the velocity field
+                        computeCpu->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
+                        computeCuda->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
+
+                        // Get the results
+                        transGradCpu = NiftiImage(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
+                        transGradCuda = NiftiImage(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
+
+                        // Save for testing
+                        testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) });
+                    }
+                }
+            }
+        }
+    }
+};
+
+TEST_CASE_METHOD(UpdateVelocityFieldTest, "Regression Update Velocity Field", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [sectionName, transGradCpu, transGradCuda] = testCase;
+
+        SECTION(sectionName) {
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the results
+            const auto transGradCpuPtr = transGradCpu.data();
+            const auto transGradCudaPtr = transGradCuda.data();
+            for (size_t i = 0; i < transGradCpu.nVoxels(); i++) {
+                const float transGradCpuVal = transGradCpuPtr[i];
+                const float transGradCudaVal = transGradCudaPtr[i];
+                const float diff = abs(transGradCpuVal - transGradCudaVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | CPU=" << transGradCpuVal;
+                    NR_COUT << " | CUDA=" << transGradCudaVal << std::endl;
+                }
+                REQUIRE(diff == 0);
+            }
+        }
+    }
+}

From cbdea7c4e5c33e0f2a4a8979622a1563da808305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 16 Jan 2024 14:11:54 +0000
Subject: [PATCH 268/314] Print the version info

---
 niftyreg_build_version.txt | 2 +-
 reg-lib/cpu/_reg_tools.cpp | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index aeccadf7..32890dbd 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-386
+387
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 0c95c8e5..1b63bcdb 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -2565,10 +2565,12 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
 }
 /* *************************************************************** */
 void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) {
+    // Print the version
+    NR_INFO(argv[0] << " v" << NR_VERSION);
+    NR_INFO("");
 #ifdef NDEBUG
     if (!verbose) return;
 #endif
-    NR_INFO("");
     NR_INFO("Command line:");
     std::string text("\t");
     for (int i = 0; i < argc; i++)

From d2bfbe193e4b21ebc9ed38766638cd50c379116c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 22 Jan 2024 15:51:25 +0000
Subject: [PATCH 269/314] Implement Cuda::ResampleGradient() #92

---
 niftyreg_build_version.txt      |   2 +-
 reg-lib/cuda/CudaResampling.cu  | 210 +++++++++++++++++++++++++++++++-
 reg-lib/cuda/CudaResampling.hpp |  12 ++
 3 files changed, 221 insertions(+), 3 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 32890dbd..2c60641d 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-387
+388
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index ee2deab5..6cde737d 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -11,6 +11,7 @@
  */
 
 #include "CudaResampling.hpp"
+#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
@@ -78,7 +79,7 @@ void ResampleImage(const nifti_image *floatingImage,
     auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
     auto deformationFieldTexture = *deformationFieldTexturePtr;
     auto maskTexture = *maskTexturePtr;
-    // Bind the real to voxel matrix to the texture
+    // Get the real to voxel matrix
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
@@ -166,7 +167,7 @@ void GetImageGradient(const nifti_image *floatingImage,
     auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
     auto floatingTexture = *floatingTexturePtr;
     auto deformationFieldTexture = *deformationFieldTexturePtr;
-    // Bind the real to voxel matrix to the texture
+    // Get the real to voxel matrix
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
@@ -232,5 +233,210 @@ void GetImageGradient(const nifti_image *floatingImage,
 template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
 template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
 /* *************************************************************** */
+template<bool is3d>
+static float3 GetRealImageSpacing(const nifti_image *image) {
+    float3 spacing{};
+    float indexVoxel1[3]{}, indexVoxel2[3], realVoxel1[3], realVoxel2[3];
+    reg_mat44_mul(&image->sto_xyz, indexVoxel1, realVoxel1);
+
+    indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1;
+    reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+    spacing.x = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
+
+    indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1;
+    reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+    spacing.y = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
+
+    if constexpr (is3d) {
+        indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1;
+        reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+        spacing.z = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
+    }
+
+    return spacing;
+}
+/* *************************************************************** */
+template<bool is3d> struct Gradient { using Type = float3; };
+template<> struct Gradient<false> { using Type = float2; };
+/* *************************************************************** */
+template<bool is3d>
+void ResampleGradient(const nifti_image *floatingImage,
+                      const float4 *floatingImageCuda,
+                      const nifti_image *warpedImage,
+                      float4 *warpedImageCuda,
+                      const nifti_image *deformationField,
+                      const float4 *deformationFieldCuda,
+                      const int *maskCuda,
+                      const size_t activeVoxelNumber,
+                      const int interpolation,
+                      const float paddingValue) {
+    if (interpolation != 1)
+        NR_FATAL_ERROR("Only linear interpolation is supported");
+
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const int3 floatingDims = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
+    const int3 defFieldDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
+    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto floatingTexture = *floatingTexturePtr;
+    auto deformationFieldTexture = *deformationFieldTexturePtr;
+    auto maskTexture = *maskTexturePtr;
+
+    // Get the real to voxel matrix
+    const mat44& floatingMatrix = floatingImage->sform_code != 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
+
+    // The spacing is computed if the sform is defined
+    const float3 realSpacing = warpedImage->sform_code > 0 ? GetRealImageSpacing<is3d>(warpedImage) :
+                                                             make_float3(warpedImage->dx, warpedImage->dy, warpedImage->dz);
+
+    // Reorientation matrix is assessed in order to remove the rigid component
+    const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
+        warpedImageCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
+    ]__device__(const int index) {
+        // Get the real world deformation in the floating space
+        const int voxel = tex1Dfetch<int>(maskTexture, index);
+        const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);
+
+        // Get the voxel-based deformation in the floating space and compute the linear interpolation
+        int3 previous;
+        float xBasis[2], yBasis[2], zBasis[2];
+        TransformInterpolate<float, is3d>(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis);
+
+        typename Gradient<is3d>::Type gradientValue{};
+        if constexpr (is3d) {
+            for (char c = 0; c < 2; c++) {
+                const int z = previous.z + c;
+                if (-1 < z && z < floatingDims.z) {
+                    for (char b = 0; b < 2; b++) {
+                        const int y = previous.y + b;
+                        if (-1 < y && y < floatingDims.y) {
+                            for (char a = 0; a < 2; a++) {
+                                const int x = previous.x + a;
+                                const float weight = xBasis[a] * yBasis[b] * zBasis[c];
+                                if (-1 < x && x < floatingDims.x) {
+                                    const int floIndex = (z * floatingDims.y + y) * floatingDims.x + x;
+                                    const float3 intensity = make_float3(tex1Dfetch<float4>(floatingTexture, floIndex));
+                                    gradientValue = gradientValue + intensity * weight;
+                                } else gradientValue = gradientValue + paddingValue * weight;
+                            }
+                        } else gradientValue = gradientValue + paddingValue * yBasis[b] * zBasis[c];
+                    }
+                } else gradientValue = gradientValue + paddingValue * zBasis[c];
+            }
+        } else {
+            for (char b = 0; b < 2; b++) {
+                const int y = previous.y + b;
+                if (-1 < y && y < floatingDims.y) {
+                    for (char a = 0; a < 2; a++) {
+                        const int x = previous.x + a;
+                        const float weight = xBasis[a] * yBasis[b];
+                        if (-1 < x && x < floatingDims.x) {
+                            const int floIndex = y * floatingDims.x + x;
+                            const float2 intensity = make_float2(tex1Dfetch<float4>(floatingTexture, floIndex));
+                            gradientValue = gradientValue + intensity * weight;
+                        } else gradientValue = gradientValue + paddingValue * weight;
+                    }
+                } else gradientValue = gradientValue + paddingValue * yBasis[b];
+            }
+        }
+
+        // Compute the Jacobian matrix
+        constexpr float basis[] = { 1.f, 0.f };
+        constexpr float deriv[] = { -1.f, 1.f };
+        auto [x, y, z] = reg_indexToDims_cuda<is3d>(voxel, defFieldDims);
+        mat33 jacMat{};
+        for (char c = 0; c < (is3d ? 2 : 1); c++) {
+            if constexpr (is3d) {
+                previous.z = z + c;
+                zBasis[0] = basis[c];
+                zBasis[1] = deriv[c];
+                // Boundary conditions along z - slidding
+                if (z == defFieldDims.z - 1) {
+                    if (c == 1)
+                        previous.z -= 2;
+                    zBasis[0] = fabs(zBasis[0] - 1);
+                    zBasis[1] *= -1;
+                }
+            }
+            for (char b = 0; b < 2; b++) {
+                previous.y = y + b;
+                yBasis[0] = basis[b];
+                yBasis[1] = deriv[b];
+                // Boundary conditions along y - slidding
+                if (y == defFieldDims.y - 1) {
+                    if (b == 1)
+                        previous.y -= 2;
+                    yBasis[0] = fabs(yBasis[0] - 1);
+                    yBasis[1] *= -1;
+                }
+                for (char a = 0; a < 2; a++) {
+                    previous.x = x + a;
+                    xBasis[0] = basis[a];
+                    xBasis[1] = deriv[a];
+                    // Boundary conditions along x - slidding
+                    if (x == defFieldDims.x - 1) {
+                        if (a == 1)
+                            previous.x -= 2;
+                        xBasis[0] = fabs(xBasis[0] - 1);
+                        xBasis[1] *= -1;
+                    }
+
+                    // Compute the basis function values
+                    const float3 weight = make_float3(xBasis[1] * yBasis[0] * (is3d ? zBasis[0] : 1),
+                                                      xBasis[0] * yBasis[1] * (is3d ? zBasis[0] : 1),
+                                                      is3d ? xBasis[0] * yBasis[0] * zBasis[1] : 0);
+
+                    // Get the deformation field values
+                    const int defIndex = ((is3d ? previous.z * defFieldDims.y : 0) + previous.y) * defFieldDims.x + previous.x;
+                    const float4 defFieldValue = tex1Dfetch<float4>(deformationFieldTexture, defIndex);
+
+                    // Symmetric difference to compute the derivatives
+                    jacMat.m[0][0] += weight.x * defFieldValue.x;
+                    jacMat.m[0][1] += weight.y * defFieldValue.x;
+                    jacMat.m[1][0] += weight.x * defFieldValue.y;
+                    jacMat.m[1][1] += weight.y * defFieldValue.y;
+                    if constexpr (is3d) {
+                        jacMat.m[0][2] += weight.z * defFieldValue.x;
+                        jacMat.m[1][2] += weight.z * defFieldValue.y;
+                        jacMat.m[2][0] += weight.x * defFieldValue.z;
+                        jacMat.m[2][1] += weight.y * defFieldValue.z;
+                        jacMat.m[2][2] += weight.z * defFieldValue.z;
+                    }
+                }
+            }
+        }
+        // reorient and scale the Jacobian matrix
+        jacMat = reg_mat33_mul_cuda(reorient, jacMat);
+        jacMat.m[0][0] /= realSpacing.x;
+        jacMat.m[0][1] /= realSpacing.y;
+        jacMat.m[1][0] /= realSpacing.x;
+        jacMat.m[1][1] /= realSpacing.y;
+        if constexpr (is3d) {
+            jacMat.m[0][2] /= realSpacing.z;
+            jacMat.m[1][2] /= realSpacing.z;
+            jacMat.m[2][0] /= realSpacing.x;
+            jacMat.m[2][1] /= realSpacing.y;
+            jacMat.m[2][2] /= realSpacing.z;
+        }
+
+        // Modulate the gradient scalar values
+        float4 warpedValue{};
+        if constexpr (is3d) {
+            warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y + jacMat.m[0][2] * gradientValue.z;
+            warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y + jacMat.m[1][2] * gradientValue.z;
+            warpedValue.z = jacMat.m[2][0] * gradientValue.x + jacMat.m[2][1] * gradientValue.y + jacMat.m[2][2] * gradientValue.z;
+        } else {
+            warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y;
+            warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y;
+        }
+        warpedImageCuda[voxel] = warpedValue;
+    });
+}
+template void ResampleGradient<false>(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
+template void ResampleGradient<true>(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
+/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp
index 1366ccc7..7f6bbac8 100644
--- a/reg-lib/cuda/CudaResampling.hpp
+++ b/reg-lib/cuda/CudaResampling.hpp
@@ -38,5 +38,17 @@ void GetImageGradient(const nifti_image *floatingImage,
                       float paddingValue,
                       const int activeTimePoint);
 /* *************************************************************** */
+template<bool is3d>
+void ResampleGradient(const nifti_image *floatingImage,
+                      const float4 *floatingImageCuda,
+                      const nifti_image *warpedImage,
+                      float4 *warpedImageCuda,
+                      const nifti_image *deformationField,
+                      const float4 *deformationFieldCuda,
+                      const int *maskCuda,
+                      const size_t activeVoxelNumber,
+                      const int interpolation,
+                      const float paddingValue);
+/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */

From be7e6b7410050335cbad56dbeb2e665a8a35e635 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 22 Jan 2024 15:58:15 +0000
Subject: [PATCH 270/314] Add resample gradient regression test #92

---
 niftyreg_build_version.txt                  |   2 +-
 reg-lib/Compute.cpp                         |   8 +
 reg-lib/Compute.h                           |   1 +
 reg-lib/cpu/_reg_resampling.cpp             |   2 +-
 reg-lib/cuda/CudaCompute.cu                 |  17 +++
 reg-lib/cuda/CudaCompute.h                  |   1 +
 reg-test/CMakeLists.txt                     |   1 +
 reg-test/reg_test_regr_resampleGradient.cpp | 161 ++++++++++++++++++++
 8 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 reg-test/reg_test_regr_resampleGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 2c60641d..6bb2f4ee 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-388
+389
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 0a7232b6..0f2729d1 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -413,3 +413,11 @@ void Compute::DefFieldCompose(const nifti_image *defField) {
     reg_defField_compose(defField, con.GetDeformationField(), nullptr);
 }
 /* *************************************************************** */
+NiftiImage Compute::ResampleGradient(int interpolation, float padding) {
+    DefContent& con = dynamic_cast<DefContent&>(this->con);
+    nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
+    NiftiImage warpedImage = NiftiImage(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfoAndAllocData);
+    reg_resampleGradient(voxelBasedMeasureGradient, warpedImage, con.GetDeformationField(), interpolation, padding);
+    return warpedImage;
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index 6ad1061b..d39f8b45 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -39,6 +39,7 @@ class Compute {
 public:
 #endif
     virtual void DefFieldCompose(const nifti_image *defField);
+    virtual NiftiImage ResampleGradient(int interpolation, float padding);
     virtual void VoxelCentricToNodeCentric(float weight);
 
 private:
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 61d9743b..483d5911 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -1691,7 +1691,7 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage,
     if (floatingImage->sform_code != 0)
         floating_mm_to_voxel = &floatingImage->sto_ijk;
 
-    // The spacing is computed in case the sform if defined
+    // The spacing is computed if the sform is defined
     float realSpacing[3];
     if (warpedImage->sform_code > 0) {
         reg_getRealImageSpacing(warpedImage, realSpacing);
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index f90f4b11..dd0b3518 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -368,3 +368,20 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) {
     defFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda());
 }
 /* *************************************************************** */
+NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) {
+    CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
+    const nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
+    auto resampleGradient = voxelBasedMeasureGradient->nz > 1 ? Cuda::ResampleGradient<true> : Cuda::ResampleGradient<false>;
+    resampleGradient(voxelBasedMeasureGradient,
+                     con.GetVoxelBasedMeasureGradientCuda(),
+                     voxelBasedMeasureGradient,
+                     con.GetWarpedGradientCuda(),
+                     con.Content::GetDeformationField(),
+                     con.GetDeformationFieldCuda(),
+                     con.GetReferenceMaskCuda(),
+                     con.GetActiveVoxelNumber(),
+                     interpolation,
+                     padding);
+    return NiftiImage(con.GetWarpedGradient(), NiftiImage::Copy::Image);
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 51a38e29..124d6b86 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -35,6 +35,7 @@ class CudaCompute: public Compute {
 protected:
 #endif
     virtual void DefFieldCompose(const nifti_image *defField) override;
+    virtual NiftiImage ResampleGradient(int interpolation, float padding) override;
     virtual void VoxelCentricToNodeCentric(float weight) override;
 
 private:
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index b04cdedc..26e6d058 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -130,6 +130,7 @@ if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_resampleGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_updateVelocityField ${EXEC_LIST})
 endif(USE_CUDA)
diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp
new file mode 100644
index 00000000..062c442c
--- /dev/null
+++ b/reg-test/reg_test_regr_resampleGradient.cpp
@@ -0,0 +1,161 @@
+#include "reg_test_common.h"
+#include "CudaDefContent.h"
+
+/**
+ *  Resample gradient regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class ResampleGradientTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ResampleGradientTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(-1, 1);
+
+        // Create reference images
+        constexpr NiftiImage::dim_t dimSize = 4;
+        NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+
+        // Create deformation fields and fill them with random values
+        NiftiImage deformationField2d = CreateDeformationField(reference2d);
+        NiftiImage deformationField3d = CreateDeformationField(reference3d);
+        auto deformationField2dPtr = deformationField2d.data();
+        auto deformationField3dPtr = deformationField3d.data();
+        for (size_t i = 0; i < deformationField2d.nVoxels(); i++)
+            deformationField2dPtr[i] = distr(gen);
+        for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
+            deformationField3dPtr[i] = distr(gen);
+
+        // Create transformation gradient images and fill them with random values
+        NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData);
+        auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data();
+        auto voxelBasedGrad3dPtr = voxelBasedGrad3d.data();
+        for (size_t i = 0; i < voxelBasedGrad2d.nVoxels(); i++)
+            voxelBasedGrad2dPtr[i] = distr(gen);
+        for (size_t i = 0; i < voxelBasedGrad3d.nVoxels(); i++)
+            voxelBasedGrad3dPtr[i] = distr(gen);
+
+        // Fill the matrices with random values
+        voxelBasedGrad2d->sform_code = 0;
+        voxelBasedGrad3d->sform_code = 1;
+        for (int j = 0; j < 4; j++) {
+            for (int k = 0; k < 4; k++) {
+                voxelBasedGrad2d->qto_ijk.m[j][k] = j == k ? distr(gen) : 0;
+                voxelBasedGrad3d->sto_ijk.m[j][k] = j == k ? distr(gen) : 0;
+                deformationField2d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0;
+                deformationField3d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0;
+            }
+        }
+        voxelBasedGrad2d->qto_xyz = nifti_mat44_inverse(voxelBasedGrad2d->qto_ijk);
+        voxelBasedGrad3d->sto_xyz = nifti_mat44_inverse(voxelBasedGrad3d->sto_ijk);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(deformationField2d),
+            std::move(voxelBasedGrad2d)
+        ));
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(deformationField3d),
+            std::move(voxelBasedGrad3d)
+        ));
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, reference, defField, voxelBasedGrad] = testData;
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage defFieldCpu(defField), defFieldCuda(defField);
+
+            // Create the contents
+            unique_ptr<DefContent> contentCpu{ new DefContent(referenceCpu, referenceCpu) };
+            unique_ptr<DefContent> contentCuda{ new CudaDefContent(referenceCuda, referenceCuda) };
+
+            // Set the deformation fields
+            contentCpu->SetDeformationField(defFieldCpu.disown());
+            contentCuda->SetDeformationField(defFieldCuda.disown());
+
+            // Set the voxel-based measure gradient images
+            NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient();
+            voxelGrad->sform_code = voxelBasedGrad->sform_code;
+            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGrad.copyData(voxelBasedGrad);
+            voxelGrad.disown();
+            contentCpu->UpdateVoxelBasedMeasureGradient();
+            voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
+            voxelGrad->sform_code = voxelBasedGrad->sform_code;
+            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGrad.copyData(voxelBasedGrad);
+            voxelGrad.disown();
+            contentCuda->UpdateVoxelBasedMeasureGradient();
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+            // Resample gradient
+            NiftiImage warpedCpu = computeCpu->ResampleGradient(1, -2.f);
+            NiftiImage warpedCuda = computeCuda->ResampleGradient(1, -2.f);
+
+            // Save for testing
+            testCases.push_back({ testName, std::move(warpedCpu), std::move(warpedCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(ResampleGradientTest, "Regression Resample Gradient", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [sectionName, warpedCpu, warpedCuda] = testCase;
+
+        SECTION(sectionName) {
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the results
+            const auto warpedCpuPtr = warpedCpu.data();
+            const auto warpedCudaPtr = warpedCuda.data();
+            for (size_t i = 0; i < warpedCpu.nVoxels(); i++) {
+                const float warpedCpuVal = warpedCpuPtr[i];
+                const float warpedCudaVal = warpedCudaPtr[i];
+                const float diff = abs(warpedCpuVal - warpedCudaVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | CPU=" << warpedCpuVal;
+                    NR_COUT << " | CUDA=" << warpedCudaVal << std::endl;
+                }
+                REQUIRE(diff == 0);
+            }
+        }
+    }
+}

From 1675ba7526d2c093467c148abaf669ffcfe043c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 23 Jan 2024 12:40:38 +0000
Subject: [PATCH 271/314] Refactorisations

---
 niftyreg_build_version.txt              |  2 +-
 reg-lib/cuda/CudaLocalTransformation.cu | 29 ++++++++++++-------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6bb2f4ee..6f8a8c5e 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-389
+390
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 20d2c471..f265db16 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -654,7 +654,6 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
                                       nifti_image *deformationField,
                                       float4 *flowFieldCuda,
                                       float4 *deformationFieldCuda,
-                                      const int *maskCuda,
                                       const bool updateStepNumber) {
     // Check first if the velocity field is actually a velocity field
     if (flowField->intent_p1 != DEF_VEL_FIELD)
@@ -664,15 +663,15 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
 
     // Remove the affine component from the flow field
     NiftiImage affineOnly;
-    thrust::device_vector<float4> affineOnlyCuda;
+    thrust::device_vector<float4> affineOnlyCudaVec;
     if (flowField->num_ext > 0) {
         if (flowField->ext_list[0].edata != nullptr) {
             // Create a field that contains the affine component only
             affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
-            affineOnlyCuda.resize(voxelNumber);
+            affineOnlyCudaVec.resize(voxelNumber);
             reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
-                                               affineOnly, affineOnlyCuda.data().get());
-            SubtractImages(flowField, flowFieldCuda, affineOnlyCuda.data().get());
+                                               affineOnly, affineOnlyCudaVec.data().get());
+            SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
         }
     } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
 
@@ -717,13 +716,13 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
         // The deformation field is applied to itself
         defFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda);
         // The computed scaled deformation field is copied over
-        thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda);
+        thrust::copy_n(thrust::device, flowFieldCuda, voxelNumber, deformationFieldCuda);
         NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
     }
     // The affine component of the transformation is restored
-    if (affineOnly) {
+    if (!affineOnlyCudaVec.empty()) {
         GetDisplacementFromDeformation(deformationField, deformationFieldCuda);
-        AddImages(deformationField, deformationFieldCuda, affineOnlyCuda.data().get());
+        AddImages(deformationField, deformationFieldCuda, affineOnlyCudaVec.data().get());
     }
     deformationField->intent_p1 = DEF_FIELD;
     deformationField->intent_p2 = 0;
@@ -741,8 +740,8 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
 
     // Create a mask array where no voxel is excluded
-    thrust::device_vector<int> maskCuda(voxelNumber);
-    thrust::sequence(maskCuda.begin(), maskCuda.end());
+    thrust::device_vector<int> maskCudaVec(voxelNumber);
+    thrust::sequence(maskCudaVec.begin(), maskCudaVec.end());
 
     // Clean any extension in the deformation field as it is unexpected
     nifti_free_extensions(deformationField);
@@ -754,7 +753,7 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                          deformationField,
                                          velocityFieldGridCuda,
                                          deformationFieldCuda,
-                                         maskCuda.data().get(),
+                                         maskCudaVec.data().get(),
                                          voxelNumber);
     } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
@@ -767,14 +766,14 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
             nifti_copy_extensions(flowField, velocityFieldGrid);
 
         // Allocate CUDA memory for the flow field
-        thrust::device_vector<float4> flowFieldCuda(flowField.nVoxelsPerVolume());
+        thrust::device_vector<float4> flowFieldCudaVec(voxelNumber);
 
         // Generate the velocity field
         GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda,
-                                     flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber);
+                                     flowFieldCudaVec.data().get(), maskCudaVec.data().get(), voxelNumber);
         // Exponentiate the flow field
-        GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCuda.data().get(),
-                                         deformationFieldCuda, maskCuda.data().get(), updateStepNumber);
+        GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCudaVec.data().get(),
+                                         deformationFieldCuda, updateStepNumber);
         // Update the number of step required. No action otherwise
         velocityFieldGrid->intent_p2 = flowField->intent_p2;
     } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");

From c19f0bd3996527ce37e82e692f9564adcd99a414 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 23 Jan 2024 16:41:56 +0000
Subject: [PATCH 272/314] Add Cuda::GetIntermediateDefFieldFromVelGrid() #92

---
 niftyreg_build_version.txt               |  2 +-
 reg-lib/cuda/CudaLocalTransformation.cu  | 82 ++++++++++++++++++++++++
 reg-lib/cuda/CudaLocalTransformation.hpp |  5 ++
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 6f8a8c5e..b570ddbf 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-390
+391
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index f265db16..b22736b9 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -779,6 +779,88 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
+void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
+                                        float4 *velocityFieldGridCuda,
+                                        vector<NiftiImage>& deformationFields,
+                                        vector<thrust::device_vector<float4>>& deformationFieldCudaVecs) {
+    if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID)
+        NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
+
+    // Create a mask array where no voxel is excluded
+    const size_t voxelNumber = deformationFields[0].nVoxelsPerVolume();
+    thrust::device_vector<int> maskCudaVec(voxelNumber);
+    thrust::sequence(maskCudaVec.begin(), maskCudaVec.end());
+
+    // Create an image to store the flow field
+    NiftiImage flowField(deformationFields[0], NiftiImage::Copy::ImageInfo);
+    flowField.setIntentName("NREG_TRANS"s);
+    flowField->intent_code = NIFTI_INTENT_VECTOR;
+    flowField->intent_p1 = DEF_VEL_FIELD;
+    flowField->intent_p2 = velocityFieldGrid->intent_p2;
+    if (velocityFieldGrid->num_ext > 0)
+        nifti_copy_extensions(flowField, velocityFieldGrid);
+
+    // Allocate CUDA memory for the flow field
+    thrust::device_vector<float4> flowFieldCudaVec(voxelNumber);
+    auto flowFieldCuda = flowFieldCudaVec.data().get();
+
+    // Generate the velocity field
+    GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda,
+                                 flowFieldCuda, maskCudaVec.data().get(), voxelNumber);
+
+    // Remove the affine component from the flow field
+    NiftiImage affineOnly;
+    thrust::device_vector<float4> affineOnlyCudaVec;
+    if (flowField->num_ext > 0) {
+        if (flowField->ext_list[0].edata != nullptr) {
+            // Create a field that contains the affine component only
+            affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfo);
+            affineOnlyCudaVec.resize(voxelNumber);
+            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
+                                               affineOnly, affineOnlyCudaVec.data().get());
+            SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
+        }
+    } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
+
+    // Get the number of scaling value
+    int squaringNumber = std::abs(static_cast<int>(velocityFieldGrid->intent_p2));
+
+    // The displacement field is scaled
+    const float scalingValue = 1.f / pow(2.f, static_cast<float>(squaringNumber));
+    // Backward/forward deformation field is scaled down
+    MultiplyValue(voxelNumber, flowFieldCuda, deformationFieldCudaVecs[0].data().get(),
+                  flowField->intent_p2 < 0 ? -scalingValue : scalingValue);
+
+    // Conversion from displacement to deformation
+    GetDeformationFromDisplacement(deformationFields[0], deformationFieldCudaVecs[0].data().get());
+
+    // The deformation field is squared
+    auto defFieldCompose = deformationFields[0]->nz > 1 ? DefFieldCompose<true> : DefFieldCompose<false>;
+    for (int i = 0; i < squaringNumber; i++) {
+        // The computed scaled deformation field is copied over
+        thrust::copy_n(thrust::device, deformationFieldCudaVecs[i].data().get(), voxelNumber, deformationFieldCudaVecs[i + 1].data().get());
+        // The deformation field is applied to itself
+        defFieldCompose(deformationFields[i], deformationFieldCudaVecs[i].data().get(), deformationFieldCudaVecs[i + 1].data().get());
+        NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
+    }
+
+    // The affine component of the transformation is restored
+    if (!affineOnlyCudaVec.empty()) {
+        for (int i = 0; i <= squaringNumber; i++) {
+            GetDisplacementFromDeformation(deformationFields[i], deformationFieldCudaVecs[i].data().get());
+            AddImages(deformationFields[i], deformationFieldCudaVecs[i].data().get(), affineOnlyCudaVec.data().get());
+            deformationFields[i]->intent_p1 = DEF_FIELD;
+            deformationFields[i]->intent_p2 = 0;
+        }
+    }
+    // If required an affine component is composed
+    if (velocityFieldGrid->num_ext > 1) {
+        for (int i = 0; i <= squaringNumber; i++)
+            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
+                                               deformationFields[i], deformationFieldCudaVecs[i].data().get(), true);
+    }
+}
+/* *************************************************************** */
 void GetJacobianMatrix(const nifti_image *deformationField,
                        const float4 *deformationFieldCuda,
                        float *jacobianMatricesCuda) {
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
index 8e718822..6be6b2d3 100644
--- a/reg-lib/cuda/CudaLocalTransformation.hpp
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -67,6 +67,11 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                  float4 *deformationFieldCuda,
                                  const bool updateStepNumber);
 /* *************************************************************** */
+void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
+                                        float4 *velocityFieldGridCuda,
+                                        vector<NiftiImage>& deformationFields,
+                                        vector<thrust::device_vector<float4>>& deformationFieldCudaVecs);
+/* *************************************************************** */
 void GetJacobianMatrix(const nifti_image *deformationField,
                        const float4 *deformationFieldCuda,
                        float *jacobianMatricesCuda);

From 1a8f4e11de3f190743cc6937dd6de78303fb9fa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 24 Jan 2024 14:36:49 +0000
Subject: [PATCH 273/314] Implement CudaCompute::ExponentiateGradient() #92

---
 niftyreg_build_version.txt  |  2 +-
 reg-lib/cuda/CudaCompute.cu | 55 +++++++++++++++++++++++++++++++++----
 2 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index b570ddbf..bd03e260 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-391
+392
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index dd0b3518..b8541e8c 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -5,6 +5,7 @@
 #include "CudaNormaliseGradient.hpp"
 #include "CudaResampling.hpp"
 #include "CudaOptimiser.hpp"
+#include "_reg_globalTransformation_gpu.h"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
@@ -287,11 +288,55 @@ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
 }
 /* *************************************************************** */
 void CudaCompute::ExponentiateGradient(Content& conBwIn) {
-    // TODO Implement this for CUDA
-    // Use CPU temporarily
-    Compute::ExponentiateGradient(conBwIn);
-    // Transfer the data back to the CUDA device
-    dynamic_cast<CudaDefContent&>(con).UpdateVoxelBasedMeasureGradient();
+    CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
+    CudaF3dContent& conBw = dynamic_cast<CudaF3dContent&>(conBwIn);
+    nifti_image *deformationField = con.Content::GetDeformationField();
+    nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
+    float4 *voxelBasedMeasureGradientCuda = con.GetVoxelBasedMeasureGradientCuda();
+    nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
+    float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda();
+    mat44 *affineTransformationBw = conBw.Content::GetTransformationMatrix();
+    const int compNum = std::abs(static_cast<int>(controlPointGridBw->intent_p2)); // The number of composition
+
+    /* Allocate a temporary gradient image to store the backward gradient */
+    const size_t voxelGradNumber = NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3);
+    NiftiImage warped(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfo);
+    thrust::device_vector<float4> warpedCudaVec(voxelGradNumber);
+
+    // Create all deformation field images needed for resampling
+    const size_t defFieldNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    vector<NiftiImage> defFields(compNum + 1, NiftiImage(deformationField, NiftiImage::Copy::ImageInfo));
+    vector<thrust::device_vector<float4>> defFieldCudaVecs(compNum + 1, thrust::device_vector<float4>(defFieldNumber));
+
+    // Generate all intermediate deformation fields
+    Cuda::GetIntermediateDefFieldFromVelGrid(controlPointGridBw, controlPointGridBwCuda, defFields, defFieldCudaVecs);
+
+    // Remove the affine component
+    NiftiImage affineDisp;
+    thrust::device_vector<float4> affineDispCudaVec;
+    if (affineTransformationBw) {
+        affineDisp = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
+        affineDispCudaVec.resize(defFieldNumber);
+        reg_affine_getDeformationField_gpu(affineTransformationBw, affineDisp, affineDispCudaVec.data().get());
+        Cuda::GetDisplacementFromDeformation(affineDisp, affineDispCudaVec.data().get());
+    }
+
+    auto resampleGradient = voxelBasedMeasureGradient->nz > 1 ? Cuda::ResampleGradient<true> : Cuda::ResampleGradient<false>;
+    for (int i = 0; i < compNum; i++) {
+        if (affineTransformationBw)
+            Cuda::SubtractImages(defFields[i], defFieldCudaVecs[i].data().get(), affineDispCudaVec.data().get());
+        resampleGradient(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda,    // Floating
+                         warped, warpedCudaVec.data().get(),  // Output
+                         defFields[i], defFieldCudaVecs[i].data().get(),
+                         con.GetReferenceMaskCuda(),
+                         con.GetActiveVoxelNumber(),
+                         1,   // Interpolation type - linear
+                         0);  // Padding value
+        Cuda::AddImages(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda, warpedCudaVec.data().get());
+    }
+
+    // Normalise the forward gradient
+    Cuda::MultiplyValue(voxelGradNumber, voxelBasedMeasureGradientCuda, 1.f / powf(2.f, static_cast<float>(compNum)));
 }
 /* *************************************************************** */
 Cuda::UniquePtr<float4> CudaCompute::ScaleGradient(const float4 *transGradCuda, const size_t voxelNumber, const float scale) {

From 69c1fe65fd2a70e1f372dff7cb64befca711370d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 24 Jan 2024 14:38:02 +0000
Subject: [PATCH 274/314] Add exponentiate gradient regression test #92

---
 niftyreg_build_version.txt                    |   2 +-
 reg-test/CMakeLists.txt                       |   1 +
 .../reg_test_regr_exponentiateGradient.cpp    | 188 ++++++++++++++++++
 reg-test/reg_test_regr_resampleGradient.cpp   |   2 +-
 4 files changed, 191 insertions(+), 2 deletions(-)
 create mode 100644 reg-test/reg_test_regr_exponentiateGradient.cpp

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index bd03e260..25685cf6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-392
+393
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 26e6d058..4bf3d667 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -127,6 +127,7 @@ if(USE_CUDA)
   set(EXEC_LIST reg_test_regr_approxBendingEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST})
+  set(EXEC_LIST reg_test_regr_exponentiateGradient ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST})
   set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST})
diff --git a/reg-test/reg_test_regr_exponentiateGradient.cpp b/reg-test/reg_test_regr_exponentiateGradient.cpp
new file mode 100644
index 00000000..81f50055
--- /dev/null
+++ b/reg-test/reg_test_regr_exponentiateGradient.cpp
@@ -0,0 +1,188 @@
+#include "reg_test_common.h"
+#include "CudaF3dContent.h"
+
+/**
+ *  Exponentiate gradient regression test to ensure the CPU and CUDA versions yield the same output
+**/
+
+class ExponentiateGradientTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage, NiftiImage, NiftiImage, NiftiImage, NiftiImage>;
+    using TestCase = std::tuple<std::string, NiftiImage, NiftiImage>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    ExponentiateGradientTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create a random number generator
+        std::mt19937 gen(0);
+        std::uniform_real_distribution<float> distr(-1, 1);
+
+        // Create reference images
+        constexpr NiftiImage::dim_t dimSize = 4;
+        NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32);
+
+        // Create deformation fields
+        NiftiImage deformationField2d = CreateDeformationField(reference2d);
+        NiftiImage deformationField3d = CreateDeformationField(reference3d);
+
+        // Create control point grids and fill them with random values
+        NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGridBw2d = CreateControlPointGrid(reference2d);
+        NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d);
+        NiftiImage controlPointGridBw3d = CreateControlPointGrid(reference3d);
+        controlPointGridBw2d->intent_p1 = SPLINE_VEL_GRID;
+        controlPointGridBw3d->intent_p1 = SPLINE_VEL_GRID;
+        auto cpp2dPtr = controlPointGrid2d.data();
+        auto cppBw2dPtr = controlPointGridBw2d.data();
+        auto cpp3dPtr = controlPointGrid3d.data();
+        auto cppBw3dPtr = controlPointGridBw3d.data();
+        for (auto i = 0; i < controlPointGrid2d.nVoxels(); i++) {
+            cpp2dPtr[i] = distr(gen);
+            cppBw2dPtr[i] = distr(gen);
+        }
+        for (auto i = 0; i < controlPointGrid3d.nVoxels(); i++) {
+            cpp3dPtr[i] = distr(gen);
+            cppBw3dPtr[i] = distr(gen);
+        }
+
+        // Create voxel-based measure gradients and fill them with random values
+        NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData);
+        NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData);
+        auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data();
+        auto voxelBasedGrad3dPtr = voxelBasedGrad3d.data();
+        for (auto i = 0; i < voxelBasedGrad2d.nVoxels(); i++)
+            voxelBasedGrad2dPtr[i] = distr(gen);
+        for (auto i = 0; i < voxelBasedGrad3d.nVoxels(); i++)
+            voxelBasedGrad3dPtr[i] = distr(gen);
+
+        // Fill the matrices with random values
+        voxelBasedGrad2d->sform_code = 0;
+        voxelBasedGrad3d->sform_code = 1;
+        for (int j = 0; j < 4; j++) {
+            for (int k = 0; k < 4; k++) {
+                voxelBasedGrad2d->qto_ijk.m[j][k] = j == k ? distr(gen) : 0;
+                voxelBasedGrad3d->sto_ijk.m[j][k] = j == k ? distr(gen) : 0;
+                deformationField2d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0;
+                deformationField3d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0;
+            }
+        }
+        voxelBasedGrad2d->qto_xyz = nifti_mat44_inverse(voxelBasedGrad2d->qto_ijk);
+        voxelBasedGrad3d->sto_xyz = nifti_mat44_inverse(voxelBasedGrad3d->sto_ijk);
+
+        // Add the test data
+        vector<TestData> testData;
+        testData.emplace_back(TestData(
+            "2D",
+            std::move(reference2d),
+            std::move(deformationField2d),
+            std::move(controlPointGrid2d),
+            std::move(controlPointGridBw2d),
+            std::move(voxelBasedGrad2d)
+        ));
+        testData.emplace_back(TestData(
+            "3D",
+            std::move(reference3d),
+            std::move(deformationField3d),
+            std::move(controlPointGrid3d),
+            std::move(controlPointGridBw3d),
+            std::move(voxelBasedGrad3d)
+        ));
+
+        // Create the platforms
+        Platform platformCpu(PlatformType::Cpu);
+        Platform platformCuda(PlatformType::Cuda);
+
+        for (auto&& testData : testData) {
+            // Get the test data
+            auto&& [testName, reference, defField, controlPointGrid, controlPointGridBw, voxelBasedGrad] = testData;
+
+            // Create images
+            NiftiImage referenceCpu(reference), referenceCuda(reference);
+            NiftiImage referenceBwCpu(reference), referenceBwCuda(reference);
+            NiftiImage defFieldCpu(defField), defFieldCuda(defField);
+            NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid);
+            NiftiImage cppBwCpu(controlPointGridBw), cppBwCuda(controlPointGridBw);
+
+            // Create the contents
+            unique_ptr<F3dContent> contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) };
+            unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) };
+            unique_ptr<F3dContent> contentBwCpu{ new F3dContent(referenceBwCpu, referenceBwCpu, cppBwCpu) };
+            unique_ptr<F3dContent> contentBwCuda{ new CudaF3dContent(referenceBwCuda, referenceBwCuda, cppBwCuda) };
+
+            // Set the deformation fields
+            contentCpu->SetDeformationField(defFieldCpu.disown());
+            contentCuda->SetDeformationField(defFieldCuda.disown());
+
+            // Set the voxel-based measure gradient images
+            NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient();
+            voxelGrad->sform_code = voxelBasedGrad->sform_code;
+            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGrad.copyData(voxelBasedGrad);
+            voxelGrad.disown();
+            contentCpu->UpdateVoxelBasedMeasureGradient();
+            voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
+            voxelGrad->sform_code = voxelBasedGrad->sform_code;
+            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGrad.copyData(voxelBasedGrad);
+            voxelGrad.disown();
+            contentCuda->UpdateVoxelBasedMeasureGradient();
+
+            // Create the computes
+            unique_ptr<Compute> computeCpu{ platformCpu.CreateCompute(*contentCpu) };
+            unique_ptr<Compute> computeCuda{ platformCuda.CreateCompute(*contentCuda) };
+
+            // Resample gradient
+            computeCpu->ExponentiateGradient(*contentBwCpu);
+            computeCuda->ExponentiateGradient(*contentBwCuda);
+
+            // Get the results
+            NiftiImage voxelGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+            NiftiImage voxelGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+
+            // Save for testing
+            testCases.push_back({ testName, std::move(voxelGradCpu), std::move(voxelGradCuda) });
+        }
+    }
+};
+
+TEST_CASE_METHOD(ExponentiateGradientTest, "Regression Exponentiate Gradient", "[regression]") {
+    // Loop over all generated test cases
+    for (auto&& testCase : testCases) {
+        // Retrieve test information
+        auto&& [sectionName, voxelGradCpu, voxelGradCuda] = testCase;
+
+        SECTION(sectionName) {
+            NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check the results
+            const auto voxelGradCpuPtr = voxelGradCpu.data();
+            const auto voxelGradCudaPtr = voxelGradCuda.data();
+            for (size_t i = 0; i < voxelGradCpu.nVoxels(); i++) {
+                const float voxelGradCpuVal = voxelGradCpuPtr[i];
+                const float voxelGradCudaVal = voxelGradCudaPtr[i];
+                const float diff = abs(voxelGradCpuVal - voxelGradCudaVal);
+                if (diff > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff;
+                    NR_COUT << " | CPU=" << voxelGradCpuVal;
+                    NR_COUT << " | CUDA=" << voxelGradCudaVal << std::endl;
+                }
+                REQUIRE(diff == 0);
+            }
+        }
+    }
+}
diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp
index 062c442c..638cb190 100644
--- a/reg-test/reg_test_regr_resampleGradient.cpp
+++ b/reg-test/reg_test_regr_resampleGradient.cpp
@@ -36,7 +36,7 @@ class ResampleGradientTest {
         for (size_t i = 0; i < deformationField3d.nVoxels(); i++)
             deformationField3dPtr[i] = distr(gen);
 
-        // Create transformation gradient images and fill them with random values
+        // Create voxel-based measure gradients and fill them with random values
         NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData);
         NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData);
         auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data();

From 45698ba062669b25768ab70d94e56f4b0dd27862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 25 Jan 2024 15:49:16 +0000
Subject: [PATCH 275/314] Refactor Cuda::GetAffineDeformationField() #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_benchmark.cpp                    |  6 +-
 reg-lib/cuda/BlockSize.hpp                    |  3 -
 reg-lib/cuda/CMakeLists.txt                   |  2 +-
 reg-lib/cuda/CudaCompute.cu                   |  4 +-
 reg-lib/cuda/CudaGlobalTransformation.cu      | 64 +++++++++++++++++++
 reg-lib/cuda/CudaGlobalTransformation.hpp     | 26 ++++++++
 reg-lib/cuda/CudaLocalTransformation.cu       | 18 +++---
 reg-lib/cuda/_reg_globalTransformation_gpu.cu | 42 ------------
 reg-lib/cuda/_reg_globalTransformation_gpu.h  | 20 ------
 .../cuda/_reg_globalTransformation_kernels.cu | 39 -----------
 11 files changed, 106 insertions(+), 120 deletions(-)
 create mode 100644 reg-lib/cuda/CudaGlobalTransformation.cu
 create mode 100644 reg-lib/cuda/CudaGlobalTransformation.hpp
 delete mode 100755 reg-lib/cuda/_reg_globalTransformation_gpu.cu
 delete mode 100755 reg-lib/cuda/_reg_globalTransformation_gpu.h
 delete mode 100755 reg-lib/cuda/_reg_globalTransformation_kernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 25685cf6..0ca45a09 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-393
+394
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 47ad511a..828b050e 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -255,9 +255,9 @@ int main(int argc, char **argv)
          time(&start);
          for(int i=0; i<maxIt; ++i)
          {
-            reg_affine_getDeformationField_gpu(affineTransformation,
-                                               targetImage,
-                                               &deformationFieldImageArray_d);
+            Cuda::GetAffineDeformationField(affineTransformation,
+                                            targetImage,
+                                            &deformationFieldImageArray_d);
          }
          time(&end);
          gpuTime=(end-start);
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 6338cf87..5f70e968 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -14,7 +14,6 @@
 namespace NiftyReg {
 /* *************************************************************** */
 struct BlockSize {
-    unsigned reg_affine_getDeformationField;
     unsigned GetApproxJacobianValues2d;
     unsigned GetApproxJacobianValues3d;
     unsigned GetJacobianValues2d;
@@ -35,7 +34,6 @@ struct BlockSize {
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
     BlockSize100() {
-        reg_affine_getDeformationField = 512; // 16 reg - 24 smem
         GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem
         GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem
         GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
@@ -58,7 +56,6 @@ struct BlockSize100: public BlockSize {
 /* *************************************************************** */
 struct BlockSize300: public BlockSize {
     BlockSize300() {
-        reg_affine_getDeformationField = 1024; // 23 reg
         GetApproxJacobianValues2d = 768; // 34 reg
         GetApproxJacobianValues3d = 640; // 46 reg
         GetJacobianValues2d = 768; // 34 reg
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 750c230b..0fc5d63c 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -63,6 +63,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaConvolutionKernel.cpp
     CudaDefContent.cpp
     CudaF3dContent.cpp
+    CudaGlobalTransformation.cu
     CudaKernelConvolution.cu
     CudaKernelFactory.cpp
     CudaLocalTransformation.cu
@@ -74,7 +75,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaResampling.cu
     CudaTools.cu
     resampleKernel.cu
-    _reg_globalTransformation_gpu.cu
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
 )
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index b8541e8c..29033bb9 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -1,11 +1,11 @@
 #include "CudaCompute.h"
 #include "CudaF3dContent.h"
+#include "CudaGlobalTransformation.hpp"
 #include "CudaKernelConvolution.hpp"
 #include "CudaLocalTransformation.hpp"
 #include "CudaNormaliseGradient.hpp"
 #include "CudaResampling.hpp"
 #include "CudaOptimiser.hpp"
-#include "_reg_globalTransformation_gpu.h"
 
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
@@ -317,7 +317,7 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) {
     if (affineTransformationBw) {
         affineDisp = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
         affineDispCudaVec.resize(defFieldNumber);
-        reg_affine_getDeformationField_gpu(affineTransformationBw, affineDisp, affineDispCudaVec.data().get());
+        Cuda::GetAffineDeformationField(affineTransformationBw, affineDisp, affineDispCudaVec.data().get());
         Cuda::GetDisplacementFromDeformation(affineDisp, affineDispCudaVec.data().get());
     }
 
diff --git a/reg-lib/cuda/CudaGlobalTransformation.cu b/reg-lib/cuda/CudaGlobalTransformation.cu
new file mode 100644
index 00000000..a5c0b82f
--- /dev/null
+++ b/reg-lib/cuda/CudaGlobalTransformation.cu
@@ -0,0 +1,64 @@
+/*
+ *  CudaGlobalTransformation.cu
+ *
+ *
+ *  Created by Marc Modat on 25/03/2009.
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
+
+#include "CudaGlobalTransformation.hpp"
+#include "_reg_common_cuda_kernels.cu"
+
+/* *************************************************************** */
+template<bool is3d, bool compose>
+void GetAffineDeformationField(const mat44 *affineMatrix,
+                               const nifti_image *deformationField,
+                               float4 *deformationFieldCuda) {
+    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, is3d ? 3 : 2);
+    const int3 imageDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
+    const mat44 *targetMatrix = deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz;
+    const mat44 transMatrix = compose ? *affineMatrix : reg_mat44_mul(affineMatrix, targetMatrix);
+    Cuda::UniqueTextureObjectPtr deformationFieldTexturePtr; cudaTextureObject_t deformationFieldTexture = 0;
+    if constexpr (compose) {
+        deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
+        deformationFieldTexture = *deformationFieldTexturePtr;
+    }
+
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [
+        deformationFieldCuda, deformationFieldTexture, transMatrix, imageDims
+    ]__device__(const int index) {
+        float voxel[3];
+        if constexpr (compose) {
+            float4 defVal = tex1Dfetch<float4>(deformationFieldTexture, index);
+            voxel[0] = defVal.x; voxel[1] = defVal.y; voxel[2] = defVal.z;
+        } else {
+            auto dims = reg_indexToDims_cuda<is3d>(index, imageDims);
+            voxel[0] = static_cast<float>(dims.x);
+            voxel[1] = static_cast<float>(dims.y);
+            voxel[2] = static_cast<float>(dims.z);
+        }
+
+        // The transformation is applied
+        float position[3];
+        reg_mat44_mul_cuda<is3d>(transMatrix, voxel, position);
+
+        // The deformation field (real coordinates) is stored
+        deformationFieldCuda[index] = make_float4(position[0], position[1], position[2], 0);
+    });
+}
+/* *************************************************************** */
+template<bool compose>
+void Cuda::GetAffineDeformationField(const mat44 *affineMatrix,
+                                     const nifti_image *deformationField,
+                                     float4 *deformationFieldCuda) {
+    auto getAffineDeformationField = deformationField->nz > 1 ? ::GetAffineDeformationField<true, compose> :
+                                                                ::GetAffineDeformationField<false, compose>;
+    getAffineDeformationField(affineMatrix, deformationField, deformationFieldCuda);
+}
+template void Cuda::GetAffineDeformationField<false>(const mat44*, const nifti_image*, float4*);
+template void Cuda::GetAffineDeformationField<true>(const mat44*, const nifti_image*, float4*);
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaGlobalTransformation.hpp b/reg-lib/cuda/CudaGlobalTransformation.hpp
new file mode 100644
index 00000000..b5c483a7
--- /dev/null
+++ b/reg-lib/cuda/CudaGlobalTransformation.hpp
@@ -0,0 +1,26 @@
+/*
+ *  CudaGlobalTransformation.hpp
+ *
+ *
+ *  Created by Marc Modat on 25/03/2009.
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
+
+#pragma once
+
+#include "CudaCommon.hpp"
+
+/* *************************************************************** */
+namespace NiftyReg::Cuda {
+/* *************************************************************** */
+template<bool compose=false>
+void GetAffineDeformationField(const mat44 *affineMatrix,
+                               const nifti_image *targetImage,
+                               float4 *deformationFieldCuda);
+/* *************************************************************** */
+} // namespace NiftyReg::Cuda
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index b22736b9..71dd9c0e 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -12,7 +12,7 @@
 
 #include "CudaLocalTransformation.hpp"
 #include "CudaLocalTransformationKernels.cu"
-#include "_reg_globalTransformation_gpu.h"
+#include "CudaGlobalTransformation.hpp"
 #include "_reg_splineBasis.h"
 
 /* *************************************************************** */
@@ -669,8 +669,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
             // Create a field that contains the affine component only
             affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
             affineOnlyCudaVec.resize(voxelNumber);
-            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
-                                               affineOnly, affineOnlyCudaVec.data().get());
+            Cuda::GetAffineDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
+                                            affineOnly, affineOnlyCudaVec.data().get());
             SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
         }
     } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
@@ -728,8 +728,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
     deformationField->intent_p2 = 0;
     // If required an affine component is composed
     if (flowField->num_ext > 1)
-        reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
-                                           deformationField, deformationFieldCuda, true);
+        Cuda::GetAffineDeformationField<true>(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
+                                              deformationField, deformationFieldCuda);
 }
 /* *************************************************************** */
 void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
@@ -816,8 +816,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
             // Create a field that contains the affine component only
             affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfo);
             affineOnlyCudaVec.resize(voxelNumber);
-            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
-                                               affineOnly, affineOnlyCudaVec.data().get());
+            Cuda::GetAffineDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
+                                            affineOnly, affineOnlyCudaVec.data().get());
             SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
         }
     } else GetDisplacementFromDeformation(flowField, flowFieldCuda);
@@ -856,8 +856,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
     // If required an affine component is composed
     if (velocityFieldGrid->num_ext > 1) {
         for (int i = 0; i <= squaringNumber; i++)
-            reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
-                                               deformationFields[i], deformationFieldCudaVecs[i].data().get(), true);
+            Cuda::GetAffineDeformationField<true>(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
+                                                  deformationFields[i], deformationFieldCudaVecs[i].data().get());
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu
deleted file mode 100755
index 34b668bd..00000000
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- *  _reg_globalTransformation_gpu.cu
- *
- *
- *  Created by Marc Modat on 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_globalTransformation_gpu.h"
-#include "_reg_globalTransformation_kernels.cu"
-
-/* *************************************************************** */
-void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
-                                        const nifti_image *targetImage,
-                                        float4 *deformationFieldCuda,
-                                        const bool composition) {
-    // TODO Implement composition
-    if (composition)
-        NR_FATAL_ERROR("Composition is not implemented on the GPU");
-
-    const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz);
-    const size_t voxelNumber = targetImage->nvox;
-
-    // If the target sform is defined, it is used. The qform is used otherwise
-    const mat44 *targetMatrix = targetImage->sform_code > 0 ? &targetImage->sto_xyz : &targetImage->qto_xyz;
-
-    // Affine * TargetMat * voxelIndex is performed
-    // Affine * TargetMat is constant
-    const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix);
-
-    const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_getDeformationField;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    reg_affine_getDeformationField_kernel<<<gridDims, blockDims>>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h
deleted file mode 100755
index 66430f8a..00000000
--- a/reg-lib/cuda/_reg_globalTransformation_gpu.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  _reg_globalTransformation_gpu.h
- *
- *
- *  Created by Marc Modat on 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "CudaCommon.hpp"
-
-void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix,
-                                        const nifti_image *targetImage,
-                                        float4 *deformationFieldCuda,
-                                        const bool composition = false);
diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu
deleted file mode 100755
index e74b7119..00000000
--- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  _reg_affineTransformation.h
- *
- *
- *  Created by Marc Modat on 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_common_cuda_kernels.cu"
-
-/* *************************************************************** */
-__global__ void reg_affine_getDeformationField_kernel(float4 *deformationField,
-                                                      const mat44 affineMatrix,
-                                                      const int3 imageSize,
-                                                      const unsigned voxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, imageSize.x, quot, rem);
-        const int y = quot, x = rem;
-
-        // The transformation is applied
-        const float4 position = {
-            affineMatrix.m[0][0] * x + affineMatrix.m[0][1] * y + affineMatrix.m[0][2] * z + affineMatrix.m[0][3],
-            affineMatrix.m[1][0] * x + affineMatrix.m[1][1] * y + affineMatrix.m[1][2] * z + affineMatrix.m[1][3],
-            affineMatrix.m[2][0] * x + affineMatrix.m[2][1] * y + affineMatrix.m[2][2] * z + affineMatrix.m[2][3],
-            0.f
-        };
-        // The deformation field (real coordinates) is stored
-        deformationField[tid] = position;
-    }
-}
-/* *************************************************************** */

From db09c2f159a9b4a6cd15db454eba67e8063847b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 26 Jan 2024 12:14:42 +0000
Subject: [PATCH 276/314] Refactor affine deformation field unit test #92

---
 niftyreg_build_version.txt                   |   2 +-
 reg-test/reg_test_affineDeformationField.cpp | 375 ++++++++++---------
 2 files changed, 190 insertions(+), 187 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 0ca45a09..e537bfeb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-394
+395
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index dd39cf4e..f38ce164 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -1,186 +1,189 @@
-#include "reg_test_common.h"
-
-/*
-    This test file contains the following unit tests:
-    test function: creation of a deformation field from an affine matrix
-    In 2D and 3D
-    identity
-    translation
-    affine
-*/
-
-
-typedef std::tuple<std::string, nifti_image*, mat44*, float*, float*, float*> TestData;
-typedef std::tuple<unique_ptr<AladinContent>, unique_ptr<Platform>> ContentDesc;
-
-TEST_CASE("Affine Deformation Field", "[unit]") {
-    // Create a reference 2D image
-    int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 };
-    nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference2d);
-
-    // Create a reference 3D image
-    dim[0] = 3;
-    dim[3] = 2;
-    nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true);
-    reg_checkAndCorrectDimension(reference3d);
-
-    // Generate the different test cases
-    vector<TestData> testCases;
-
-    // Identity use case - 2D
-    mat44 identity;
-    reg_mat44_eye(&identity);
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    float identityResult2x[4] = { 0, 1, 0, 1 };
-    float identityResult2y[4] = { 0, 0, 1, 1 };
-    testCases.emplace_back(TestData(
-        "identity 2D",
-        reference2d,
-        &identity,
-        identityResult2x,
-        identityResult2y,
-        nullptr
-    ));
-
-    // Identity use case - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 };
-    float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 };
-    float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 };
-    testCases.emplace_back(TestData(
-        "identity 3D",
-        reference3d,
-        &identity,
-        identityResult3x,
-        identityResult3y,
-        identityResult3z
-    ));
-
-    // Translation - 2D
-    mat44 translation;
-    reg_mat44_eye(&translation);
-    translation.m[0][3] = -0.5;
-    translation.m[1][3] = 1.5;
-    translation.m[2][3] = 0.75;
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    float translationResult2x[4] = { -0.5, .5, -0.5, .5 };
-    float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 };
-    testCases.emplace_back(TestData(
-        "translation 2D",
-        reference2d,
-        &translation,
-        translationResult2x,
-        translationResult2y,
-        nullptr
-    ));
-
-    // Translation - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 };
-    float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 };
-    float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 };
-    testCases.emplace_back(TestData(
-        "translation 3D",
-        reference3d,
-        &translation,
-        translationResult3x,
-        translationResult3y,
-        translationResult3z
-    ));
-
-    // Full affine - 2D
-    // Test order [0,0] [1,0] [0,1] [1,1]
-    mat44 affine;
-    reg_mat44_eye(&affine);
-    affine.m[0][3] = -0.5;
-    affine.m[1][3] = 1.5;
-    affine.m[2][3] = 0.75;
-    for (int i = 0; i < 4; ++i) {
-        for (int j = 0; j < 4; ++j) {
-            affine.m[i][j] += ((static_cast<float>(rand()) / RAND_MAX) - 0.5f) / 10.f;
-        }
-    }
-    float affineResult2x[4];
-    float affineResult2y[4];
-    for (int i = 0; i < 4; ++i) {
-        auto x = identityResult2x[i];
-        auto y = identityResult2y[i];
-        affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y;
-        affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y;
-
-    }
-    testCases.emplace_back(TestData(
-        "full affine 2D",
-        reference2d,
-        &affine,
-        affineResult2x,
-        affineResult2y,
-        nullptr
-    ));
-
-    // Full affine - 3D
-    // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
-    float affineResult3x[8];
-    float affineResult3y[8];
-    float affineResult3z[8];
-    for (int i = 0; i < 8; ++i) {
-        auto x = identityResult3x[i];
-        auto y = identityResult3y[i];
-        auto z = identityResult3z[i];
-        affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z;
-        affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z;
-        affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z;
-    }
-    testCases.emplace_back(TestData(
-        "affine 3D",
-        reference3d,
-        &affine,
-        affineResult3x,
-        affineResult3y,
-        affineResult3z
-    ));
-
-    // Loop over all generated test cases
-    for (auto&& testCase : testCases) {
-        // Retrieve test information
-        auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase;
-
-        // Accumulate all required contents with a vector
-        vector<ContentDesc> contentDescs;
-        for (auto&& platformType : PlatformTypes) {
-            unique_ptr<Platform> platform{ new Platform(platformType) };
-            unique_ptr<AladinContentCreator> contentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
-            unique_ptr<AladinContent> content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) };
-            contentDescs.push_back({ std::move(content), std::move(platform) });
-        }
-        // Loop over all possibles contents for each test
-        for (auto&& contentDesc : contentDescs) {
-            auto&& [content, platform] = contentDesc;
-            const std::string sectionName = testName + " " + platform->GetName();
-            SECTION(sectionName) {
-                NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl;
-
-                // Do the calculation
-                unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) };
-                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
-
-                // Check all values
-                nifti_image *defField = content->GetDeformationField();
-                auto defFieldPtrX = static_cast<float*>(defField->data);
-                const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
-                auto defFieldPtrY = &defFieldPtrX[voxelNumber];
-                auto defFieldPtrZ = &defFieldPtrY[voxelNumber];
-                for (size_t i = 0; i < voxelNumber; ++i) {
-                    REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS);
-                    REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS);
-                    if (testResZ)
-                        REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS);
-                }
-            }
-        }
-    }
-    // Clean up
-    nifti_image_free(reference2d);
-    nifti_image_free(reference3d);
-}
+#include "reg_test_common.h"
+
+/*
+    This test file contains the following unit tests:
+    test function: creation of a deformation field from an affine matrix
+    In 2D and 3D
+    Identity
+    Translation
+    Affine
+*/
+
+struct float3 {
+    float x, y, z;
+
+    std::string to_string() const {
+        return "(" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(z) + ")";
+    }
+};
+
+class AffineDeformationFieldTest {
+protected:
+    using TestData = std::tuple<std::string, NiftiImage&, mat44, vector<float3>>;
+    using TestCase = std::tuple<std::string, NiftiImage, vector<float3>>;
+
+    inline static vector<TestCase> testCases;
+
+public:
+    AffineDeformationFieldTest() {
+        if (!testCases.empty())
+            return;
+
+        // Create reference images
+        constexpr NiftiImage::dim_t size = 2;
+        NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32);
+        NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32);
+
+        // Data container for the test data
+        vector<TestData> testData;
+
+        // Identity use case - 2D
+        mat44 identity;
+        reg_mat44_eye(&identity);
+        // Test order [0,0] [1,0] [0,1] [1,1]
+        vector<float3> identityResult2d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 } };
+        testData.emplace_back(TestData(
+            "2D Identity",
+            reference2d,
+            identity,
+            identityResult2d
+        ));
+
+        // Identity use case - 3D
+        // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+        vector<float3> identityResult3d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 }, { 1, 1, 1 } };
+        testData.emplace_back(TestData(
+            "3D Identity",
+            reference3d,
+            identity,
+            identityResult3d
+        ));
+
+        // Translation - 2D
+        mat44 translation;
+        reg_mat44_eye(&translation);
+        translation.m[0][3] = -0.5;
+        translation.m[1][3] = 1.5;
+        translation.m[2][3] = 0.75;
+        // Test order [0,0] [1,0] [0,1] [1,1]
+        vector<float3> translationResult2d{ { -0.5f, 1.5f, 0 }, { 0.5f, 1.5f, 0 }, { -0.5f, 2.5f, 0 }, { 0.5f, 2.5f, 0 } };
+        testData.emplace_back(TestData(
+            "2D Translation",
+            reference2d,
+            translation,
+            std::move(translationResult2d)
+        ));
+
+        // Translation - 3D
+        // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+        vector<float3> translationResult3d{ { -0.5f, 1.5f, 0.75f }, { 0.5f, 1.5f, 0.75f },
+                                            { -0.5f, 2.5f, 0.75f }, { 0.5f, 2.5f, 0.75f },
+                                            { -0.5f, 1.5f, 1.75f }, { 0.5f, 1.5f, 1.75f },
+                                            { -0.5f, 2.5f, 1.75f }, { 0.5f, 2.5f, 1.75f } };
+        testData.emplace_back(TestData(
+            "3D Translation",
+            reference3d,
+            translation,
+            std::move(translationResult3d)
+        ));
+
+        // Full affine - 2D
+        // Test order [0,0] [1,0] [0,1] [1,1]
+        mat44 affine;
+        reg_mat44_eye(&affine);
+        affine.m[0][3] = -0.5;
+        affine.m[1][3] = 1.5;
+        affine.m[2][3] = 0.75;
+        for (int i = 0; i < 4; ++i)
+            for (int j = 0; j < 4; ++j)
+                affine.m[i][j] += ((static_cast<float>(rand()) / RAND_MAX) - 0.5f) / 10.f;
+        vector<float3> affineResult2d(4);
+        for (int i = 0; i < 4; ++i) {
+            double x = identityResult2d[i].x;
+            double y = identityResult2d[i].y;
+            affineResult2d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y);
+            affineResult2d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y);
+
+        }
+        testData.emplace_back(TestData(
+            "2D Affine",
+            reference2d,
+            affine,
+            std::move(affineResult2d)
+        ));
+
+        // Full affine - 3D
+        // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
+        vector<float3> affineResult3d(8);
+        for (int i = 0; i < 8; ++i) {
+            double x = identityResult3d[i].x;
+            double y = identityResult3d[i].y;
+            double z = identityResult3d[i].z;
+            affineResult3d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z);
+            affineResult3d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z);
+            affineResult3d[i].z = static_cast<float>(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z);
+        }
+        testData.emplace_back(TestData(
+            "3D Affine",
+            reference3d,
+            affine,
+            std::move(affineResult3d)
+        ));
+
+        for (auto&& testData : testData) {
+            for (auto&& platformType : PlatformTypes) {
+                // Make a copy of the test data
+                auto [testName, reference, transMat, expRes] = testData;
+
+                // Create the platform
+                unique_ptr<Platform> platform{ new Platform(platformType) };
+                testName += " "s + platform->GetName();
+
+                // Create the content for Aladin
+                unique_ptr<AladinContentCreator> aladinContentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
+                unique_ptr<AladinContent> aladinContent{ aladinContentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) };
+
+                // Do the calculation for Aladin
+                unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), aladinContent.get()) };
+                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
+
+                // Get the result
+                NiftiImage defField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image);
+
+                // Save for testing
+                testCases.push_back({ testName + " - Aladin", std::move(defField), std::move(expRes) });
+            }
+        }
+    }
+};
+
+TEST_CASE_METHOD(AffineDeformationFieldTest, "Affine Deformation Field", "[unit]") {
+    // Loop over all possibles contents for each test
+    for (auto&& testCase : testCases) {
+        auto&& [testName, defField, expected] = testCase;
+        SECTION(testName) {
+            NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl;
+
+            // Increase the precision for the output
+            NR_COUT << std::fixed << std::setprecision(10);
+
+            // Check all values
+            const bool is3d = defField->nz > 1;
+            const size_t voxelNumber = defField.nVoxelsPerVolume();
+            const auto defFieldPtrX = defField.data(0);
+            const auto defFieldPtrY = defField.data(1);
+            const auto defFieldPtrZ = defField.data(2);
+            for (auto i = 0; i < voxelNumber; i++) {
+                float3 result{ static_cast<float>(defFieldPtrX[i]), static_cast<float>(defFieldPtrY[i]), is3d ? defFieldPtrZ[i] : 0.f };
+                float3 diff{ abs(result.x - expected[i].x), abs(result.y - expected[i].y), abs(result.z - expected[i].z) };
+                if (diff.x > 0 || diff.y > 0 || diff.z > 0) {
+                    NR_COUT << "[i]=" << i;
+                    NR_COUT << " | diff=" << diff.to_string();
+                    NR_COUT << " | Result=" << result.to_string();
+                    NR_COUT << " | Expected=" << expected[i].to_string() << std::endl;
+                }
+                REQUIRE((diff.x == 0 && diff.y == 0 && diff.z == 0));
+            }
+        }
+    }
+}

From 387139adbf77e20fb6e6d5733992c168d4b7a16a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 26 Jan 2024 15:58:50 +0000
Subject: [PATCH 277/314] Add composition to affine deformation field unit test
 #92

---
 niftyreg_build_version.txt                   |  2 +-
 reg-test/reg_test_affineDeformationField.cpp | 92 +++++++++++++-------
 2 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e537bfeb..4391a334 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-395
+396
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index f38ce164..d4ade149 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -19,7 +19,7 @@ struct float3 {
 
 class AffineDeformationFieldTest {
 protected:
-    using TestData = std::tuple<std::string, NiftiImage&, mat44, vector<float3>>;
+    using TestData = std::tuple<std::string, NiftiImage&, NiftiImage, mat44, vector<float3>>;
     using TestCase = std::tuple<std::string, NiftiImage, vector<float3>>;
 
     inline static vector<TestCase> testCases;
@@ -45,6 +45,7 @@ class AffineDeformationFieldTest {
         testData.emplace_back(TestData(
             "2D Identity",
             reference2d,
+            NiftiImage(),
             identity,
             identityResult2d
         ));
@@ -55,6 +56,7 @@ class AffineDeformationFieldTest {
         testData.emplace_back(TestData(
             "3D Identity",
             reference3d,
+            NiftiImage(),
             identity,
             identityResult3d
         ));
@@ -70,6 +72,7 @@ class AffineDeformationFieldTest {
         testData.emplace_back(TestData(
             "2D Translation",
             reference2d,
+            NiftiImage(),
             translation,
             std::move(translationResult2d)
         ));
@@ -83,10 +86,26 @@ class AffineDeformationFieldTest {
         testData.emplace_back(TestData(
             "3D Translation",
             reference3d,
+            NiftiImage(),
             translation,
             std::move(translationResult3d)
         ));
 
+        // Create deformation fields and fill them with random values
+        NiftiImage defField2d = CreateDeformationField(reference2d);
+        NiftiImage defField3d = CreateDeformationField(reference3d);
+        auto defField2dPtr = defField2d.data();
+        auto defField2dPtrX = defField2d.data(0);
+        auto defField2dPtrY = defField2d.data(1);
+        auto defField3dPtr = defField3d.data();
+        auto defField3dPtrX = defField3d.data(0);
+        auto defField3dPtrY = defField3d.data(1);
+        auto defField3dPtrZ = defField3d.data(2);
+        for (auto i = 0; i < defField2d.nVoxels(); i++)
+            defField2dPtr[i] = static_cast<float>(rand()) / RAND_MAX;
+        for (auto i = 0; i < defField3d.nVoxels(); i++)
+            defField3dPtr[i] = static_cast<float>(rand()) / RAND_MAX;
+
         // Full affine - 2D
         // Test order [0,0] [1,0] [0,1] [1,1]
         mat44 affine;
@@ -94,46 +113,51 @@ class AffineDeformationFieldTest {
         affine.m[0][3] = -0.5;
         affine.m[1][3] = 1.5;
         affine.m[2][3] = 0.75;
-        for (int i = 0; i < 4; ++i)
-            for (int j = 0; j < 4; ++j)
+        for (int i = 0; i < 4; i++)
+            for (int j = 0; j < 4; j++)
                 affine.m[i][j] += ((static_cast<float>(rand()) / RAND_MAX) - 0.5f) / 10.f;
         vector<float3> affineResult2d(4);
-        for (int i = 0; i < 4; ++i) {
-            double x = identityResult2d[i].x;
-            double y = identityResult2d[i].y;
-            affineResult2d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y);
-            affineResult2d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y);
-
+        for (char compose = 0; compose < 2; compose++) {
+            for (int i = 0; i < 4; i++) {
+                double x = compose ? defField2dPtrX[i] : identityResult2d[i].x;
+                double y = compose ? defField2dPtrY[i] : identityResult2d[i].y;
+                affineResult2d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y);
+                affineResult2d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y);
+            }
+            testData.emplace_back(TestData(
+                "2D Affine"s + (compose ? " with Composition" : ""),
+                reference2d,
+                compose ? std::move(defField2d) : NiftiImage(),
+                affine,
+                affineResult2d
+            ));
         }
-        testData.emplace_back(TestData(
-            "2D Affine",
-            reference2d,
-            affine,
-            std::move(affineResult2d)
-        ));
 
         // Full affine - 3D
         // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1]
         vector<float3> affineResult3d(8);
-        for (int i = 0; i < 8; ++i) {
-            double x = identityResult3d[i].x;
-            double y = identityResult3d[i].y;
-            double z = identityResult3d[i].z;
-            affineResult3d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z);
-            affineResult3d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z);
-            affineResult3d[i].z = static_cast<float>(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z);
+        for (char compose = 0; compose < 2; compose++) {
+            for (int i = 0; i < 8; i++) {
+                double x = compose ? defField3dPtrX[i] : identityResult3d[i].x;
+                double y = compose ? defField3dPtrY[i] : identityResult3d[i].y;
+                double z = compose ? defField3dPtrZ[i] : identityResult3d[i].z;
+                affineResult3d[i].x = static_cast<float>(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z);
+                affineResult3d[i].y = static_cast<float>(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z);
+                affineResult3d[i].z = static_cast<float>(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z);
+            }
+            testData.emplace_back(TestData(
+                "3D Affine"s + (compose ? " with Composition" : ""),
+                reference3d,
+                compose ? std::move(defField3d) : NiftiImage(),
+                affine,
+                affineResult3d
+            ));
         }
-        testData.emplace_back(TestData(
-            "3D Affine",
-            reference3d,
-            affine,
-            std::move(affineResult3d)
-        ));
 
         for (auto&& testData : testData) {
             for (auto&& platformType : PlatformTypes) {
                 // Make a copy of the test data
-                auto [testName, reference, transMat, expRes] = testData;
+                auto [testName, reference, defField, transMat, expRes] = testData;
 
                 // Create the platform
                 unique_ptr<Platform> platform{ new Platform(platformType) };
@@ -143,15 +167,19 @@ class AffineDeformationFieldTest {
                 unique_ptr<AladinContentCreator> aladinContentCreator{ dynamic_cast<AladinContentCreator*>(platform->CreateContentCreator(ContentType::Aladin)) };
                 unique_ptr<AladinContent> aladinContent{ aladinContentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) };
 
+                // Set the deformation field if composition is required
+                if (defField)
+                    aladinContent->SetDeformationField(NiftiImage(defField).disown());
+
                 // Do the calculation for Aladin
                 unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), aladinContent.get()) };
-                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate();
+                affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate(defField);
 
                 // Get the result
-                NiftiImage defField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image);
+                NiftiImage resDefField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image);
 
                 // Save for testing
-                testCases.push_back({ testName + " - Aladin", std::move(defField), std::move(expRes) });
+                testCases.push_back({ testName + " - Aladin", std::move(resDefField), std::move(expRes) });
             }
         }
     }

From f7d5fc52dbd90c205bb6c42374b8f821f08cd735 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 29 Jan 2024 12:58:57 +0000
Subject: [PATCH 278/314] Add testing for compute to affine deformation field
 unit test #92

---
 niftyreg_build_version.txt                   |  2 +-
 reg-lib/Compute.cpp                          |  7 ++++++
 reg-lib/Compute.h                            |  1 +
 reg-lib/cuda/CudaCompute.cu                  |  9 +++++++
 reg-lib/cuda/CudaCompute.h                   |  1 +
 reg-lib/cuda/CudaGlobalTransformation.hpp    |  2 +-
 reg-test/reg_test_affineDeformationField.cpp | 25 +++++++++++++++++++-
 reg-test/reg_test_getDeformationField.cpp    |  4 ----
 8 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 4391a334..8b84f570 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-396
+397
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 0f2729d1..5c7882a9 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -421,3 +421,10 @@ NiftiImage Compute::ResampleGradient(int interpolation, float padding) {
     return warpedImage;
 }
 /* *************************************************************** */
+void Compute::GetAffineDeformationField(bool compose) {
+    reg_affine_getDeformationField(con.GetTransformationMatrix(),
+                                   con.GetDeformationField(),
+                                   compose,
+                                   con.GetReferenceMask());
+}
+/* *************************************************************** */
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index d39f8b45..fdf3e673 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -31,6 +31,7 @@ class Compute {
     virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ);
     virtual void BchUpdate(float scale, int bchUpdateValue);
     virtual void SymmetriseVelocityFields(Content& conBw);
+    virtual void GetAffineDeformationField(bool compose);
 
 protected:
     Content& con;
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 29033bb9..92eee7a1 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -430,3 +430,12 @@ NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) {
     return NiftiImage(con.GetWarpedGradient(), NiftiImage::Copy::Image);
 }
 /* *************************************************************** */
+void CudaCompute::GetAffineDeformationField(bool compose) {
+    CudaContent& con = dynamic_cast<CudaContent&>(this->con);
+    auto getAffineDeformationField = compose ? Cuda::GetAffineDeformationField<true> :
+                                               Cuda::GetAffineDeformationField<false>;
+    getAffineDeformationField(con.Content::GetTransformationMatrix(),
+                              con.Content::GetDeformationField(),
+                              con.GetDeformationFieldCuda());
+}
+/* *************************************************************** */
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 124d6b86..0982623d 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -30,6 +30,7 @@ class CudaCompute: public Compute {
     virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override;
     virtual void BchUpdate(float scale, int bchUpdateValue) override;
     virtual void SymmetriseVelocityFields(Content& conBw) override;
+    virtual void GetAffineDeformationField(bool compose) override;
 
 #ifndef NR_TESTING
 protected:
diff --git a/reg-lib/cuda/CudaGlobalTransformation.hpp b/reg-lib/cuda/CudaGlobalTransformation.hpp
index b5c483a7..0ecbc447 100644
--- a/reg-lib/cuda/CudaGlobalTransformation.hpp
+++ b/reg-lib/cuda/CudaGlobalTransformation.hpp
@@ -19,7 +19,7 @@ namespace NiftyReg::Cuda {
 /* *************************************************************** */
 template<bool compose=false>
 void GetAffineDeformationField(const mat44 *affineMatrix,
-                               const nifti_image *targetImage,
+                               const nifti_image *deformationField,
                                float4 *deformationFieldCuda);
 /* *************************************************************** */
 } // namespace NiftyReg::Cuda
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index d4ade149..858b541b 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -179,7 +179,30 @@ class AffineDeformationFieldTest {
                 NiftiImage resDefField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image);
 
                 // Save for testing
-                testCases.push_back({ testName + " - Aladin", std::move(resDefField), std::move(expRes) });
+                testCases.push_back({ testName + " - Aladin", std::move(resDefField), expRes });
+
+                // Do the calculation also for Compute using Content
+                // Skip OpenCL as it is not supported
+                if (platform->GetPlatformType() == PlatformType::OpenCl)
+                    continue;
+
+                // Create the content
+                unique_ptr<ContentCreator> contentCreator{ platform->CreateContentCreator() };
+                unique_ptr<Content> content{ contentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) };
+
+                // Set the deformation field if composition is required
+                if (defField)
+                    content->SetDeformationField(NiftiImage(defField).disown());
+
+                // Do the calculation
+                unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
+                compute->GetAffineDeformationField(defField);
+
+                // Get the result
+                resDefField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image);
+
+                // Save for testing
+                testCases.push_back({ testName, std::move(resDefField), std::move(expRes) });
             }
         }
     }
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index a0645743..8c6e0c67 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -24,10 +24,6 @@ class GetDeformationFieldTest {
         if (!testCases.empty())
             return;
 
-        // Create a random number generator
-        std::mt19937 gen(0);
-        std::uniform_real_distribution<float> distr(0, 1);
-
         // Create reference images
         constexpr NiftiImage::dim_t size = 5;
         NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32);

From 0a4ba267cf15dbca85d34295495d0d993222013a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 30 Jan 2024 15:32:50 +0000
Subject: [PATCH 279/314] Use real index numbers returned from maskCuda in
 deformationFieldCuda and warpedGradientCuda #92

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/cuda/CudaCompute.cu                   |  3 +-
 reg-lib/cuda/CudaLocalTransformation.cu       | 10 ++--
 .../cuda/CudaLocalTransformationKernels.cu    |  8 +--
 reg-lib/cuda/CudaResampling.cu                | 56 +++++++++----------
 reg-lib/cuda/CudaResampling.hpp               |  3 +-
 reg-lib/cuda/_reg_nmi_gpu.cu                  | 11 ++--
 reg-lib/cuda/_reg_ssd_gpu.cu                  | 14 ++---
 8 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8b84f570..7ea3cf60 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-397
+398
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 92eee7a1..569581b1 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -16,6 +16,7 @@ void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
                   con.GetFloatingCuda(),
                   con.Content::GetWarped(),
                   con.GetWarpedCuda(),
+                  con.Content::GetDeformationField(),
                   con.GetDeformationFieldCuda(),
                   con.GetReferenceMaskCuda(),
                   con.GetActiveVoxelNumber(),
@@ -186,8 +187,8 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
     getImageGradient(floating,
                      con.GetFloatingCuda(),
                      con.GetDeformationFieldCuda(),
+                     con.DefContent::GetWarpedGradient(),
                      con.GetWarpedGradientCuda(),
-                     con.GetActiveVoxelNumber(),
                      interpolation,
                      paddingValue,
                      activeTimePoint);
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 71dd9c0e..06972269 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -33,9 +33,7 @@ void GetDeformationField(const nifti_image *controlPointImage,
                                                         controlPointImage->dz / referenceImage->dz);
 
     auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
     auto controlPointTexture = *controlPointTexturePtr;
-    auto maskTexture = *maskTexturePtr;
 
     // Get the reference matrix if composition is required
     thrust::device_vector<mat44> realToVoxelCudaVec;
@@ -46,13 +44,13 @@ void GetDeformationField(const nifti_image *controlPointImage,
     const auto realToVoxelCuda = composition ? realToVoxelCudaVec.data().get() : nullptr;
 
     if (referenceImage->nz > 1) {
-        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
-            GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
+        thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+            GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
                                                         referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
         });
     } else {
-        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
-            GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda,
+        thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+            GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
                                                         referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
         });
     }
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index af983f9b..536f7719 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -173,7 +173,6 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
 template<bool composition, bool bspline>
 __device__ void GetDeformationField3d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
-                                      cudaTextureObject_t maskTexture,
                                       const mat44 *realToVoxel,
                                       const int3 referenceImageDim,
                                       const int3 controlPointImageDim,
@@ -207,8 +206,7 @@ __device__ void GetDeformationField3d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
-        const int voxel = tex1Dfetch<int>(maskTexture, index);
-        const auto [x, y, z] = reg_indexToDims_cuda<true>(voxel, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<true>(index, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -245,7 +243,6 @@ __device__ void GetDeformationField3d(float4 *deformationField,
 template<bool composition, bool bspline>
 __device__ void GetDeformationField2d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
-                                      cudaTextureObject_t maskTexture,
                                       const mat44 *realToVoxel,
                                       const int3 referenceImageDim,
                                       const int3 controlPointImageDim,
@@ -272,8 +269,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
-        const int voxel = tex1Dfetch<int>(maskTexture, index);
-        const auto [x, y, z] = reg_indexToDims_cuda<false>(voxel, referenceImageDim);
+        const auto [x, y, z] = reg_indexToDims_cuda<false>(index, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index 6cde737d..5c21bee8 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -65,6 +65,7 @@ void ResampleImage(const nifti_image *floatingImage,
                    const float *floatingImageCuda,
                    const nifti_image *warpedImage,
                    float *warpedImageCuda,
+                   const nifti_image *deformationField,
                    const float4 *deformationFieldCuda,
                    const int *maskCuda,
                    const size_t activeVoxelNumber,
@@ -73,25 +74,23 @@ void ResampleImage(const nifti_image *floatingImage,
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
-    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4);
     auto deformationFieldTexture = *deformationFieldTexturePtr;
-    auto maskTexture = *maskTexturePtr;
     // Get the real to voxel matrix
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
     for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) {
         NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t);
-        auto curWarpedCuda = warpedImageCuda + t * voxelNumber;
-        auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
+        auto curWarpedCuda = warpedImageCuda + t * floVoxelNumber;
+        auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1);
         auto floatingTexture = *floatingTexturePtr;
-        thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
-            curWarpedCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDim, paddingValue
+        thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [
+            curWarpedCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue
         ]__device__(const int index) {
             // Get the real world deformation in the floating space
-            const int voxel = tex1Dfetch<int>(maskTexture, index);
             const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);
 
             // Get the voxel-based deformation in the floating space and compute the linear interpolation
@@ -141,36 +140,37 @@ void ResampleImage(const nifti_image *floatingImage,
                 }
             }
 
-            curWarpedCuda[voxel] = intensity;
+            curWarpedCuda[index] = intensity;
         });
     }
 }
-template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
-template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float);
+template void ResampleImage<false>(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
+template void ResampleImage<true>(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
 /* *************************************************************** */
 template<bool is3d>
 void GetImageGradient(const nifti_image *floatingImage,
                       const float *floatingImageCuda,
                       const float4 *deformationFieldCuda,
+                      const nifti_image *warpedGradient,
                       float4 *warpedGradientCuda,
-                      const size_t activeVoxelNumber,
                       const int interpolation,
                       float paddingValue,
                       const int activeTimePoint) {
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported on the GPU");
 
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t refVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3);
+    const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
     const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
     if (paddingValue != paddingValue) paddingValue = 0;
-    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);
-    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, refVoxelNumber, cudaChannelFormatKindFloat, 4);
     auto floatingTexture = *floatingTexturePtr;
     auto deformationFieldTexture = *deformationFieldTexturePtr;
     // Get the real to voxel matrix
     const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
 
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), refVoxelNumber, [
         warpedGradientCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue
     ]__device__(const int index) {
             // Get the real world deformation in the floating space
@@ -230,8 +230,8 @@ void GetImageGradient(const nifti_image *floatingImage,
             warpedGradientCuda[index] = gradientValue;
     });
 }
-template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
-template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int);
+template void GetImageGradient<false>(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int);
+template void GetImageGradient<true>(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int);
 /* *************************************************************** */
 template<bool is3d>
 static float3 GetRealImageSpacing(const nifti_image *image) {
@@ -273,15 +273,14 @@ void ResampleGradient(const nifti_image *floatingImage,
     if (interpolation != 1)
         NR_FATAL_ERROR("Only linear interpolation is supported");
 
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3);
+    const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
     const int3 floatingDims = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz);
     const int3 defFieldDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
-    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4);
-    auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1);
+    auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, floVoxelNumber, cudaChannelFormatKindFloat, 4);
+    auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4);
     auto floatingTexture = *floatingTexturePtr;
     auto deformationFieldTexture = *deformationFieldTexturePtr;
-    auto maskTexture = *maskTexturePtr;
 
     // Get the real to voxel matrix
     const mat44& floatingMatrix = floatingImage->sform_code != 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk;
@@ -293,11 +292,10 @@ void ResampleGradient(const nifti_image *floatingImage,
     // Reorientation matrix is assessed in order to remove the rigid component
     const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
 
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [
-        warpedImageCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
+    thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [
+        warpedImageCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
     ]__device__(const int index) {
         // Get the real world deformation in the floating space
-        const int voxel = tex1Dfetch<int>(maskTexture, index);
         const float4 realDeformation = tex1Dfetch<float4>(deformationFieldTexture, index);
 
         // Get the voxel-based deformation in the floating space and compute the linear interpolation
@@ -346,7 +344,7 @@ void ResampleGradient(const nifti_image *floatingImage,
         // Compute the Jacobian matrix
         constexpr float basis[] = { 1.f, 0.f };
         constexpr float deriv[] = { -1.f, 1.f };
-        auto [x, y, z] = reg_indexToDims_cuda<is3d>(voxel, defFieldDims);
+        auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, defFieldDims);
         mat33 jacMat{};
         for (char c = 0; c < (is3d ? 2 : 1); c++) {
             if constexpr (is3d) {
@@ -432,7 +430,7 @@ void ResampleGradient(const nifti_image *floatingImage,
             warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y;
             warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y;
         }
-        warpedImageCuda[voxel] = warpedValue;
+        warpedImageCuda[index] = warpedValue;
     });
 }
 template void ResampleGradient<false>(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float);
diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp
index 7f6bbac8..fbbcc95a 100644
--- a/reg-lib/cuda/CudaResampling.hpp
+++ b/reg-lib/cuda/CudaResampling.hpp
@@ -22,6 +22,7 @@ void ResampleImage(const nifti_image *floatingImage,
                    const float *floatingImageCuda,
                    const nifti_image *warpedImage,
                    float *warpedImageCuda,
+                   const nifti_image *deformationField,
                    const float4 *deformationFieldCuda,
                    const int *maskCuda,
                    const size_t activeVoxelNumber,
@@ -32,8 +33,8 @@ template<bool is3d>
 void GetImageGradient(const nifti_image *floatingImage,
                       const float *floatingImageCuda,
                       const float4 *deformationFieldCuda,
+                      const nifti_image *warpedGradient,
                       float4 *warpedGradientCuda,
-                      const size_t activeVoxelNumber,
                       const int interpolation,
                       float paddingValue,
                       const int activeTimePoint);
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index 8d482b89..b117e568 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -320,11 +320,10 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
     auto warpedImageTexture = *warpedImageTexturePtr;
     auto warpedGradientTexture = *warpedGradientTexturePtr;
 
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator<unsigned>(0), activeVoxelNumber, [=]__device__(const unsigned index) {
-        const int voxel = maskCuda[index];
-        const float refValue = tex1Dfetch<float>(referenceImageTexture, voxel);
+    thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+        const float refValue = tex1Dfetch<float>(referenceImageTexture, index);
         if (refValue != refValue) return;
-        const float warValue = tex1Dfetch<float>(warpedImageTexture, voxel);
+        const float warValue = tex1Dfetch<float>(warpedImageTexture, index);
         if (warValue != warValue) return;
         const float4 warGradValue = tex1Dfetch<float4>(warpedGradientTexture, index);
 
@@ -367,12 +366,12 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage,
         }
 
         // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way
-        float4 gradValue = voxelBasedGradientCuda[voxel];
+        float4 gradValue = voxelBasedGradientCuda[index];
         gradValue.x += static_cast<float>(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE);
         gradValue.y += static_cast<float>(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE);
         if constexpr (is3d)
             gradValue.z += static_cast<float>(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE);
-        voxelBasedGradientCuda[voxel] = gradValue;
+        voxelBasedGradientCuda[index] = gradValue;
     });
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 77a2f739..073906b7 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -149,13 +149,11 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
     const double adjustedWeight = timePointWeight / validVoxelNumber;
 
     // Calculate the SSD gradient
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) {
-        const int voxel = maskCuda[index];
-
-        const double refValue = tex1Dfetch<float>(referenceTexture, voxel);
+    thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
+        const double refValue = tex1Dfetch<float>(referenceTexture, index);
         if (refValue != refValue) return;
 
-        const double warValue = tex1Dfetch<float>(warpedTexture, voxel);
+        const double warValue = tex1Dfetch<float>(warpedTexture, index);
         if (warValue != warValue) return;
 
         const float4 spaGradientValue = tex1Dfetch<float4>(spatialGradTexture, index);
@@ -164,14 +162,14 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
             spaGradientValue.z != spaGradientValue.z)
             return;
 
-        const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, voxel) : 1.f;
+        const double weight = localWeightSimTexture ? tex1Dfetch<float>(localWeightSimTexture, index) : 1.f;
         const double common = -2.0 * (refValue - warValue) * adjustedWeight * weight;
 
-        float4 ssdGradientValue = ssdGradientCuda[voxel];
+        float4 ssdGradientValue = ssdGradientCuda[index];
         ssdGradientValue.x += common * spaGradientValue.x;
         ssdGradientValue.y += common * spaGradientValue.y;
         ssdGradientValue.z += common * spaGradientValue.z;
-        ssdGradientCuda[voxel] = ssdGradientValue;
+        ssdGradientCuda[index] = ssdGradientValue;
     });
 }
 /* *************************************************************** */

From 587eac1072cd681fc62c2cc6909f36108f0cc326 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 31 Jan 2024 16:00:00 +0000
Subject: [PATCH 280/314] Use Codecov for coverage

---
 .github/workflows/coverage.yml | 8 +++++---
 README.md                      | 6 ++----
 niftyreg_build_version.txt     | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 46c18082..419d0e27 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -44,7 +44,9 @@ jobs:
         run: make coverage
         working-directory: build
 
-      - name: Upload coverage to Coveralls
-        uses: coverallsapp/github-action@v1
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
         with:
-          path-to-lcov: build/coverage.info
\ No newline at end of file
+          directory: build
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
\ No newline at end of file
diff --git a/README.md b/README.md
index 8e1e3689..ad24879e 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,8 @@
 # NiftyReg
 
 [![License](https://img.shields.io/github/license/KCL-BMEIS/NiftyReg)](https://github.com/KCL-BMEIS/niftyreg/blob/master/LICENSE.txt)
-[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml?query=branch%3Amaster)
-[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml?query=branch%3Amaster)
-[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml?query=branch%3Amaster)
-[![Coverage Status](https://coveralls.io/repos/github/KCL-BMEIS/niftyreg/badge.svg?branch=master)](https://coveralls.io/github/KCL-BMEIS/niftyreg?branch=master)
+[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml)
+[![Coverage Status](https://codecov.io/gh/KCL-BMEIS/niftyreg/graph/badge.svg?token=lgLtkSC7kX)](https://codecov.io/gh/KCL-BMEIS/niftyreg)
 
 
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7ea3cf60..45843d29 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-398
+399

From c3c9fdaf04fdb0c77bc64a539a502e6d72368d89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 5 Feb 2024 15:32:14 +0000
Subject: [PATCH 281/314] Update minimum CMake version and use the new CUDA
 method

---
 CMakeLists.txt                                | 21 ++++------
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/CMakeLists.txt                       | 25 ++++-------
 reg-lib/cuda/CMakeLists.txt                   | 41 ++++++++++---------
 .../{_reg_cudainfo.cpp => _reg_cudainfo.cu}   |  0
 .../{checkCudaCard.cpp => checkCudaCard.cu}   |  8 ++--
 6 files changed, 41 insertions(+), 56 deletions(-)
 rename reg-lib/cuda/{_reg_cudainfo.cpp => _reg_cudainfo.cu} (100%)
 rename reg-lib/cuda/{checkCudaCard.cpp => checkCudaCard.cu} (78%)
 mode change 100755 => 100644

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5aa1fc3..73aab8b1 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,9 +1,4 @@
-cmake_minimum_required(VERSION 3.2.2)
-if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
-  mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY)
-else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
-  mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY)
-endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$")
+cmake_minimum_required(VERSION 3.18)
 #-----------------------------------------------------------------------------
 project(NiftyReg)
 #-----------------------------------------------------------------------------
@@ -157,19 +152,19 @@ if(USE_OPENCL)
 endif(USE_OPENCL)
 #-----------------------------------------------------------------------------
 if(USE_CUDA)
-  # Check if the CUDA drivers are available
-  find_package(CUDA REQUIRED)
-  mark_as_advanced(CUDA_SDK_ROOT_DIR)
+  # Check if the CUDA Toolkit is available
+  enable_language(CUDA)
+  find_package(CUDAToolkit)
   option(CUDA_FAST_MATH "To use the fast math flag" OFF)
   mark_as_advanced(CUDA_FAST_MATH)
-  if(NOT CUDA_FOUND)
+  if(NOT CMAKE_CUDA_COMPILER)
     set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
     message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF")
-  else(NOT CUDA_FOUND)
+  else(NOT CMAKE_CUDA_COMPILER)
     include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
-    include_directories(${CUDA_INCLUDE_DIRS})
+    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
     add_definitions(-DUSE_CUDA)
-  endif(NOT CUDA_FOUND)
+  endif(NOT CMAKE_CUDA_COMPILER)
 endif(USE_CUDA)
 #-----------------------------------------------------------------------------
 if(USE_SSE)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 45843d29..d411bb7c 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-399
+400
diff --git a/reg-apps/CMakeLists.txt b/reg-apps/CMakeLists.txt
index c9a9e955..85b033ed 100755
--- a/reg-apps/CMakeLists.txt
+++ b/reg-apps/CMakeLists.txt
@@ -1,5 +1,4 @@
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
-
 #-----------------------------------------------------------------------------
 add_executable(reg_average reg_average.cpp)
 target_link_libraries(reg_average _reg_resampling _reg_globalTrans _reg_localTrans _reg_maths _reg_tools _reg_ReadWriteImage)
@@ -24,19 +23,11 @@ add_executable(reg_jacobian reg_jacobian.cpp)
 target_link_libraries(reg_jacobian _reg_resampling _reg_localTrans _reg_tools _reg_globalTrans _reg_ReadWriteImage)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_jacobian.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_jacobian.h @ONLY)
 #-----------------------------------------------------------------------------
-if(USE_CUDA)
-  cuda_add_executable(reg_f3d reg_f3d.cpp)
-else(USE_CUDA)
-  add_executable(reg_f3d reg_f3d.cpp)
-endif(USE_CUDA)
+add_executable(reg_f3d reg_f3d.cpp)
 target_link_libraries(reg_f3d _reg_f3d)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_f3d.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_f3d.h @ONLY)
 #-----------------------------------------------------------------------------
-if(USE_CUDA)
-  cuda_add_executable(reg_aladin reg_aladin.cpp)
-else(USE_CUDA)
-  add_executable(reg_aladin reg_aladin.cpp)
-endif(USE_CUDA)
+add_executable(reg_aladin reg_aladin.cpp)
 target_link_libraries(reg_aladin _reg_aladin)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_aladin.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_aladin.h @ONLY)
 #-----------------------------------------------------------------------------
@@ -65,11 +56,11 @@ if(USE_CUDA OR USE_OPENCL)
 endif(USE_CUDA OR USE_OPENCL)
 #-----------------------------------------------------------------------------
 foreach(MODULE_NAME ${MODULE_LIST})
-    install(TARGETS ${MODULE_NAME}
-        RUNTIME DESTINATION bin COMPONENT Runtime
-        LIBRARY DESTINATION lib COMPONENT Runtime
-        ARCHIVE DESTINATION lib COMPONENT Runtime
-    )
+  install(TARGETS ${MODULE_NAME}
+    RUNTIME DESTINATION bin COMPONENT Runtime
+    LIBRARY DESTINATION lib COMPONENT Runtime
+    ARCHIVE DESTINATION lib COMPONENT Runtime
+  )
 endforeach(MODULE_NAME)
 #-----------------------------------------------------------------------------
 install(PROGRAMS groupwise_niftyreg_params.sh DESTINATION bin COMPONENT Runtime)
@@ -77,4 +68,4 @@ install(PROGRAMS groupwise_niftyreg_run.sh DESTINATION bin COMPONENT Runtime)
 #-----------------------------------------------------------------------------
 #add_executable(reg_ppcnr reg_ppcnr.cpp)
 #target_link_libraries(reg_ppcnr _reg_ReadWriteImage)
-#-----------------------------------------------------------------------------
+#-----------------------------------------------------------------------------
\ No newline at end of file
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 0fc5d63c..c0aa3c0f 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -1,10 +1,10 @@
 #-----------------------------------------------------------------------------
 # Compile an executable to check if there is at least one suitable graphical card
-try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cpp
-  CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CUDA_INCLUDE_DIRS} -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
-  COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
-  RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
-  )
+try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu
+    CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+    COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
+    RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
+)
 # Check if the executable could not compile
 if(NOT COMPILE_RESULT_VAR)
     message(WARNING "The code to check the presence of a CUDA-enabled card failed.")
@@ -20,36 +20,37 @@ elseif(RUN_RESULT_VAR)
     set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE)
     return()
 else(NOT COMPILE_RESULT_VAR)
-    message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
+    message(STATUS "Found a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
+    string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR})
     # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
-    set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda --expt-relaxed-constexpr")
-    #check cuda version and adjust compile flags
-    if("${RUN_OUTPUT_VAR}" LESS "30")
+    set(CMAKE_CUDA_STANDARD 17)
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr")
+    # Check CUDA version and adjust compile flags
+    if("${CAPABILITY_CODE}" LESS "30")
         set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
         message(SEND_ERROR "CUDA card with capability 1.x or 2.x are not supported. The USE_CUDA flag is turned OFF")
         return()
-    endif("${RUN_OUTPUT_VAR}" LESS "30")
-    string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR})
-    set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_${CAPABILITY_CODE},code=sm_${CAPABILITY_CODE}")
+    endif("${CAPABILITY_CODE}" LESS "30")
+    set(CMAKE_CUDA_ARCHITECTURES "${CAPABILITY_CODE}-real")
     # If desired, add PIC flags
     if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
         # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
     endif()
     # Adjust for debug and release versions
     if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G")
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G")
     else(CMAKE_BUILD_TYPE STREQUAL "Debug")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-O3")
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3")
     endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
     if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -use_fast_math")
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math")
         message(STATUS "CUDA fast math enabled")
     endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
 endif(NOT COMPILE_RESULT_VAR)
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
-cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
+add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     ../AladinContent.cpp
     affineDeformationKernel.cu
     blockMatchingKernel.cu
@@ -78,7 +79,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     _reg_nmi_gpu.cu
     _reg_ssd_gpu.cu
 )
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
+target_link_libraries(${NAME} CUDA::cuda_driver)
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
@@ -87,8 +88,8 @@ install(TARGETS ${NAME}
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cudainfo)
-cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp)
-target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY})
+add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu)
+target_link_libraries(${NAME} CUDA::cuda_driver)
 install(TARGETS ${NAME}
     RUNTIME DESTINATION lib
     LIBRARY DESTINATION lib
diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cu
similarity index 100%
rename from reg-lib/cuda/_reg_cudainfo.cpp
rename to reg-lib/cuda/_reg_cudainfo.cu
diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cu
old mode 100755
new mode 100644
similarity index 78%
rename from reg-lib/cuda/checkCudaCard.cpp
rename to reg-lib/cuda/checkCudaCard.cu
index b278076e..65ae90d6
--- a/reg-lib/cuda/checkCudaCard.cpp
+++ b/reg-lib/cuda/checkCudaCard.cu
@@ -7,20 +7,18 @@ int main() {
     int deviceCount = 0, output = 0;
     const cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount);
 
-    // Error when running cudaGetDeviceCount
     if (cudaResultCode != cudaSuccess) {
         std::cerr << cudaGetErrorString(cudaResultCode) << " (CUDA Error Code=" << cudaResultCode << ")" << std::endl;
         return EXIT_FAILURE;
     }
 
-    // Error when running cudaGetDeviceCount
     if (deviceCount == 0) {
         std::cerr << "No device detected" << std::endl;
         return EXIT_FAILURE;
     }
 
-    // Detect device capability and picks the best
-    for (unsigned i = 0; i < deviceCount; ++i) {
+    // Detect device capability and pick the best
+    for (int i = 0; i < deviceCount; i++) {
         cudaSetDevice(i);
         cudaDeviceProp deviceProp;
         cudaGetDeviceProperties(&deviceProp, i);
@@ -28,7 +26,7 @@ int main() {
     }
 
     // Output for device capability
-    std::cout << output;
+    std::cout << output / 10 << "." << output % 10;
 
     return EXIT_SUCCESS;
 }

From 2c3432c31bb5c792e9e3419954ce8a206b22fc77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 6 Feb 2024 13:07:51 +0000
Subject: [PATCH 282/314] Add a GitHub Action to build executables on a release
 #92

---
 .github/workflows/release.yml | 114 ++++++++++++++++++++++++++++++++++
 CMakeLists.txt                |   1 +
 niftyreg_build_version.txt    |   2 +-
 reg-lib/cuda/CMakeLists.txt   |  97 +++++++++++++++--------------
 4 files changed, 168 insertions(+), 46 deletions(-)
 create mode 100644 .github/workflows/release.yml

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..9b001d7d
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,114 @@
+name: Release
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-20.04, macos-latest, windows-2019]
+        platform: [cpu, cuda]
+        include:
+          - platform: cpu
+            platform-name: ""
+            use-cuda: "OFF"
+            use-opencl: "OFF"
+          - platform: cuda
+            platform-name: "-CUDA"
+            use-cuda: "ON"
+            use-opencl: "ON"
+          - os: ubuntu-20.04    # For Ubuntu only
+            os-name: "Ubuntu"
+          - os: macos-latest    # For macOS only
+            os-name: "macOS"
+            use-opencl: "ON"
+          - sudo: "sudo"        # For Ubuntu and macOS
+            c-compiler: "gcc"
+            cxx-compiler: "g++"
+          - os: windows-2019    # For Windows only
+            os-name: "Windows"
+            sudo: ""
+            c-compiler: "cl.exe"
+            cxx-compiler: "cl.exe"
+          - build_type: "Release" # For all platforms
+        exclude:
+          - os: macos-latest
+            platform: cuda
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Add MSBuild to PATH
+        uses: microsoft/setup-msbuild@v1.3
+        if: matrix.os-name == 'Windows'
+
+      - name: Install CUDA Toolkit
+        uses: Jimver/cuda-toolkit@v0.2.14
+        id: cuda-toolkit
+        if: matrix.platform == 'cuda'
+        with:
+          cuda: '11.8.0'
+          method: network
+          use-github-cache: false
+          use-local-cache: false
+
+      - name: Configure NiftyReg
+        shell: bash
+        run: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
+                -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
+                -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+                -DBUILD_ALL_DEP=ON \
+                -DCHECK_GPU=OFF \
+                -DUSE_CUDA=${{ matrix.use-cuda }} \
+                -DUSE_OPENCL=${{ matrix.use-opencl }} \
+                -DUSE_SSE=ON \
+                -DUSE_OPENMP=ON \
+                -DBUILD_TESTING=OFF \
+                ..
+
+      - name: Build NiftyReg
+        shell: bash
+        run: cmake --build build --config ${{ matrix.build_type }}
+
+      - name: Prepare the variables
+        id: vars
+        shell: bash
+        run: echo "output-folder=NiftyReg-${{ matrix.os-name }}${{ matrix.platform-name }}-${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
+
+      - name: Prepare the package
+        if: matrix.os-name == 'Windows'
+        shell: powershell
+        working-directory: build/reg-apps/Release
+        run: |
+          New-Item -ItemType Directory -Force -Path ${{ steps.vars.outputs.output-folder }}
+          Move-Item -Path *.exe -Destination ${{ steps.vars.outputs.output-folder }}
+          Compress-Archive -Path ${{ steps.vars.outputs.output-folder }} -DestinationPath ../../NiftyReg.zip
+
+      - name: Prepare the package
+        if: matrix.os-name == 'Ubuntu'
+        working-directory: build/reg-apps
+        run: |
+          mkdir -p ${{ steps.vars.outputs.output-folder }}
+          find . -maxdepth 1 -type f -executable -exec mv {} ${{ steps.vars.outputs.output-folder }} \;
+          zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }}
+
+      - name: Prepare the package
+        if: matrix.os-name == 'macOS'
+        working-directory: build/reg-apps
+        run: |
+          mkdir -p ${{ steps.vars.outputs.output-folder }}
+          find . -maxdepth 1 -type f -perm +111 -exec mv {} ${{ steps.vars.outputs.output-folder }} \;
+          zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }}
+
+      - name: Upload the package
+        uses: svenstaro/upload-release-action@v2
+        with:
+          repo_token: ${{ github.token }}
+          file: build/NiftyReg.zip
+          asset_name: ${{ steps.vars.outputs.output-folder }}.zip
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 73aab8b1..7a59b40a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -67,6 +67,7 @@ option(USE_CUDA "To use the CUDA platform" OFF)
 option(USE_OPENCL "To use the OpenCL platform" OFF)
 option(USE_OPENMP "To use openMP for multi-CPU processing" ON)
 option(USE_SSE "To enable SSE computation in some case" ON)
+option(CHECK_GPU "To check if a GPU is available" ON)
 #-----------------------------------------------------------------------------
 option(USE_NRRD "To use the NRRD file format" OFF)
 mark_as_advanced(USE_NRRD)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d411bb7c..066cbfe9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-400
+401
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index c0aa3c0f..9685b0b6 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -1,53 +1,60 @@
-#-----------------------------------------------------------------------------
-# Compile an executable to check if there is at least one suitable graphical card
-try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu
-    CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-    COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
-    RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
-)
-# Check if the executable could not compile
-if(NOT COMPILE_RESULT_VAR)
-    message(WARNING "The code to check the presence of a CUDA-enabled card failed.")
-    message("The USE_CUDA flag has been turned OFF.")
-    set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
-    return()
-    # Check if the executable return failure
-elseif(RUN_RESULT_VAR)
-    message(WARNING "No CUDA-enabled card has been detected")
-    message("Result code: ${RUN_RESULT_VAR}")
-    message("Error message: ${RUN_OUTPUT_VAR}")
-    message("The USE_CUDA flag has been turned OFF.")
-    set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE)
-    return()
-else(NOT COMPILE_RESULT_VAR)
+if(CHECK_GPU)
+    # Compile an executable to check if there is at least one suitable CUDA card
+    try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu
+        CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+        COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
+        RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR
+    )
+    # Check if the executable won't compile
+    if(NOT COMPILE_RESULT_VAR)
+        message(WARNING "The code to check the presence of a CUDA-enabled card failed.")
+        message("The USE_CUDA flag has been turned OFF.")
+        set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
+        return()
+    # Check if the executable returns failure
+    elseif(RUN_RESULT_VAR)
+        message(WARNING "No CUDA-enabled card has been detected")
+        message("Result code: ${RUN_RESULT_VAR}")
+        message("Error message: ${RUN_OUTPUT_VAR}")
+        message("The USE_CUDA flag has been turned OFF.")
+        set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
+        return()
+    endif(NOT COMPILE_RESULT_VAR)
     message(STATUS "Found a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})")
     string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR})
-    # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
-    set(CMAKE_CUDA_STANDARD 17)
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr")
     # Check CUDA version and adjust compile flags
-    if("${CAPABILITY_CODE}" LESS "30")
+    if("${CAPABILITY_CODE}" LESS "60")
         set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
-        message(SEND_ERROR "CUDA card with capability 1.x or 2.x are not supported. The USE_CUDA flag is turned OFF")
+        message(SEND_ERROR "CUDA cards with capability less than 6.0 are not supported. The USE_CUDA flag is turned OFF")
         return()
-    endif("${CAPABILITY_CODE}" LESS "30")
+    endif("${CAPABILITY_CODE}" LESS "60")
     set(CMAKE_CUDA_ARCHITECTURES "${CAPABILITY_CODE}-real")
-    # If desired, add PIC flags
-    if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
-        # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
-        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
-    endif()
-    # Adjust for debug and release versions
-    if(CMAKE_BUILD_TYPE STREQUAL "Debug")
-        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G")
-    else(CMAKE_BUILD_TYPE STREQUAL "Debug")
-        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3")
-    endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
-    if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
-        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math")
-        message(STATUS "CUDA fast math enabled")
-    endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
-endif(NOT COMPILE_RESULT_VAR)
+else(CHECK_GPU)
+    # If no GPU check is performed, assume a minimum capability of 6.0
+    # Generate compiled code for all architectures supported by CUDA 11.8
+    # Also, generate PTX code for future architectures
+    # Therefore, the code should run on any GPU with a capability of 6.0 or higher
+    set(CMAKE_CUDA_ARCHITECTURES "60-real;61-real;70-real;75-real;80-real;86-real;89")
+endif(CHECK_GPU)
+#-----------------------------------------------------------------------------
+# Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support
+set(CMAKE_CUDA_STANDARD 17)
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr")
+# If desired, add PIC flags
+if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC)
+    # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}")
+endif()
+# Adjust for debug and release versions
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G")
+else(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3")
+endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
+if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math")
+    message(STATUS "CUDA fast math enabled")
+endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF")
 #-----------------------------------------------------------------------------
 set(NAME _reg_cuda_kernels)
 add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
@@ -97,4 +104,4 @@ install(TARGETS ${NAME}
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}")
 #-----------------------------------------------------------------------------
-set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
+set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
\ No newline at end of file

From 2931554d634306b540d563e3c1494c81a7208044 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 8 Feb 2024 16:52:26 +0000
Subject: [PATCH 283/314] Use self-hosted runners to enable CUDA for coverage

---
 .github/workflows/coverage.yml | 21 ++++++++++-----------
 .github/workflows/tests.yml    | 23 ++++++++++-------------
 niftyreg_build_version.txt     |  2 +-
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 419d0e27..e54b253e 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -2,10 +2,12 @@ name: Coverage
 on: [push, pull_request]
 jobs:
   Coverage:
-    runs-on: ubuntu-latest
+    runs-on: [self-hosted, linux, gpu]
     steps:
-      - name: Clone NiftyReg
-        uses: actions/checkout@v3
+      - uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: sudo apt-get update && sudo apt-get install -y cmake git lcov
 
       - name: Install Catch2
         run:  |
@@ -14,18 +16,15 @@ jobs:
           cmake -Bbuild -H. -DBUILD_TESTING=OFF
           sudo cmake --build build/ --target install --config Debug
 
-      - name: Install lcov
-        run: sudo apt-get install lcov
-
       - name: Configure NiftyReg
         run: |
           mkdir build
           cd build
-          cmake -DCMAKE_CXX_COMPILER=g++ \
-                -DCMAKE_C_COMPILER=gcc \
+          cmake -DCMAKE_C_COMPILER=gcc \
+                -DCMAKE_CXX_COMPILER=g++ \
                 -DCMAKE_BUILD_TYPE=Debug \
                 -DBUILD_ALL_DEP=ON \
-                -DUSE_CUDA=OFF \
+                -DUSE_CUDA=ON \
                 -DUSE_OPENCL=OFF \
                 -DUSE_SSE=OFF \
                 -DUSE_OPENMP=OFF \
@@ -37,12 +36,12 @@ jobs:
         run: cmake --build build --config Debug
 
       - name: Run tests
-        run: ctest -V
         working-directory: build
+        run: ctest -V
 
       - name: Coverage
-        run: make coverage
         working-directory: build
+        run: make coverage
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 117a9e0c..5f1f5660 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,9 +15,6 @@ jobs:
             c-compiler: "cl.exe"
             cxx-compiler: "cl.exe"
           - build_type: "Debug" # For all platforms
-            use_cuda: "OFF"
-            use_opencl: "OFF"
-            use_openmp: "ON"
 
     steps:
       - uses: actions/checkout@v3
@@ -27,34 +24,34 @@ jobs:
         if: matrix.os == 'windows-latest'
 
       - name: Install Catch2
+        shell: bash
         run:  |
           git clone https://github.com/catchorg/Catch2.git
           cd Catch2
           cmake -Bbuild -H. -DBUILD_TESTING=OFF
           ${{ matrix.sudo }} cmake --build build/ --target install --config ${{ matrix.build_type }}
-        shell: bash
 
       - name: Configure NiftyReg
+        shell: bash
         run: |
           mkdir build
           cd build
-          cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
-                -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
+          cmake -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \
+                -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \
                 -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
                 -DBUILD_ALL_DEP=ON \
-                -DUSE_CUDA=${{ matrix.use_cuda }} \
-                -DUSE_OPENCL=${{ matrix.use_opencl }} \
+                -DUSE_CUDA=OFF \
+                -DUSE_OPENCL=OFF \
                 -DUSE_SSE=ON \
-                -DUSE_OPENMP=${{ matrix.use_openmp }} \
+                -DUSE_OPENMP=ON \
                 -DBUILD_TESTING=ON \
                 ..
-        shell: bash
 
       - name: Build NiftyReg
-        run: cmake --build build --config ${{ matrix.build_type }}
         shell: bash
+        run: cmake --build build --config ${{ matrix.build_type }}
 
       - name: Run tests
-        run: ctest -V
+        shell: bash
         working-directory: build
-        shell: bash
\ No newline at end of file
+        run: ctest -V
\ No newline at end of file
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 066cbfe9..52f22458 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-401
+402

From 18cc32b3305da8b031e19605c954c21a4c8df519 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 13 Feb 2024 15:37:37 +0000
Subject: [PATCH 284/314] Add a GitHub Action for static code analysis

---
 .github/code_analysis.py       | 596 +++++++++++++++++++++++++++++++++
 .github/workflows/analysis.yml |  61 ++++
 README.md                      |   3 +-
 niftyreg_build_version.txt     |   2 +-
 4 files changed, 660 insertions(+), 2 deletions(-)
 create mode 100644 .github/code_analysis.py
 create mode 100644 .github/workflows/analysis.yml

diff --git a/.github/code_analysis.py b/.github/code_analysis.py
new file mode 100644
index 00000000..1d3f0e2d
--- /dev/null
+++ b/.github/code_analysis.py
@@ -0,0 +1,596 @@
+import os
+import re
+import subprocess
+import argparse
+import sys
+from github import Github
+
+# Input variables from Github action
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+PR_NUM = os.getenv("PR_NUMBER", "-1")
+WORK_DIR = f'{os.getenv("GITHUB_WORKSPACE")}'
+REPO_NAME = os.getenv("REPO")
+TARGET_REPO_NAME = os.getenv("REPO", "")
+SHA = os.getenv("GITHUB_SHA")
+COMMENT_TITLE = os.getenv("COMMENT_TITLE", "Static Analysis")
+ONLY_PR_CHANGES = os.getenv("REPORT_PR_CHANGES_ONLY", "False").lower()
+VERBOSE = os.getenv("VERBOSE", "False").lower() == "true"
+FILES_WITH_ISSUES = {}
+
+# Max characters per comment - 65536
+# Make some room for HTML tags and error message
+MAX_CHAR_COUNT_REACHED = "!Maximum character count per GitHub comment has been reached! Not all warnings/errors has been parsed!"
+COMMENT_MAX_SIZE = 65000
+CURRENT_COMMENT_LENGTH = 0
+
+
+def debug_print(message):
+    if VERBOSE:
+        lines = message.split("\n")
+        for line in lines:
+            print(f"\033[96m {line}")
+
+
+def parse_diff_output(changed_files):
+    """
+    Parses the diff output to extract filenames and corresponding line numbers of changes.
+
+    The function identifies changed lines in files and excludes certain directories
+    based on the file extension. It then extracts the line numbers of the changes
+    (additions) and associates them with their respective files.
+
+    Parameters:
+    - changed_files (str): The diff output string.
+
+    Returns:
+    - dict: A dictionary where keys are filenames and values are lists of line numbers
+            that have changes.
+
+    Usage Example:
+    ```python
+    diff_output = "<output from `git diff` command>"
+    changed_file_data = parse_diff_output(diff_output)
+    for file, lines in changed_file_data.items():
+        print(f"File: {file}, Changed Lines: {lines}")
+    ```
+
+    Note:
+    - The function only considers additions in the diff, lines starting with "+".
+    - Filenames in the return dictionary include their paths relative to the repo root.
+    """
+
+    # Regex to capture filename and the line numbers of the changes
+    file_pattern = re.compile(r"^\+\+\+ b/(.*?)$", re.MULTILINE)
+    line_pattern = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", re.MULTILINE)
+
+    supported_extensions = (".h", ".hpp", ".hcc", ".c", ".cc", ".cpp", ".cxx", ".cu", ".cl")
+
+    files = {}
+    for match in file_pattern.finditer(changed_files):
+        file_name = match.group(1)
+
+        # Filtering for language specific files and excluding certain directories
+        if file_name.endswith(supported_extensions):
+            # Find the lines that changed for this file
+            lines_start_at = match.end()
+            next_file_match = file_pattern.search(changed_files, pos=match.span(0)[1])
+
+            # Slice out the part of the diff that pertains to this file
+            file_diff = changed_files[lines_start_at : next_file_match.span(0)[0] if next_file_match else None]
+
+            # Extract line numbers of the changes
+            changed_lines = []
+            for line_match in line_pattern.finditer(file_diff):
+                start_line = int(line_match.group(1))
+
+                # The start and end positions for this chunk of diff
+                chunk_start = line_match.end()
+                next_chunk = line_pattern.search(file_diff, pos=line_match.span(0)[1])
+                chunk_diff = file_diff[chunk_start : next_chunk.span(0)[0] if next_chunk else None]
+
+                lines = chunk_diff.splitlines()
+                line_counter = 0
+                for line in lines:
+                    if line.startswith("+"):
+                        changed_lines.append(start_line + line_counter)
+                        line_counter += 1
+
+            if changed_lines:
+                files[file_name] = changed_lines
+
+    return files
+
+
+def get_changed_files(common_ancestor, feature_branch):
+    """Get a dictionary of files and their changed lines between the common ancestor and feature_branch."""
+    cmd = ["git", "diff", "-U0", "--ignore-all-space", common_ancestor, feature_branch]
+    result = subprocess.check_output(cmd).decode("utf-8")
+
+    return parse_diff_output(result)
+
+
+def is_part_of_pr_changes(file_path, issue_file_line, files_changed_in_pr):
+    """
+    Check if a given file and line number corresponds to a change in the files included in a pull request.
+
+    Args:
+        file_path (str): The path to the file in question.
+        issue_file_line (int): The line number within the file to check.
+        files_changed_in_pr (dict): A dictionary of files changed in a pull request, where the keys are file paths
+                                    and the values are tuples of the form (status, lines_changed_for_file), where
+                                    status is a string indicating the change status ("added", "modified", or "removed"),
+                                    and lines_changed_for_file is a list of tuples, where each tuple represents a range
+                                    of lines changed in the file (e.g. [(10, 15), (20, 25)] indicates that lines 10-15
+                                    and 20-25 were changed in the file).
+
+    Returns:
+        bool: True if the file and line number correspond to a change in the pull request, False otherwise.
+    """
+
+    if ONLY_PR_CHANGES == "false":
+        return True
+
+    debug_print(f"Looking for issue found in file={file_path} at line={issue_file_line}...")
+    for file, lines_changed_for_file in files_changed_in_pr.items():
+        debug_print(f'Changed file by this PR "{file}" with changed lines "{lines_changed_for_file}"')
+        if file == file_path:
+            for line in lines_changed_for_file:
+                if line == issue_file_line:
+                    debug_print(f"Issue line {issue_file_line} is a part of PR!")
+                    return True
+
+    return False
+
+
+def get_lines_changed_from_patch(patch):
+    """
+    Parses a unified diff patch and returns the range of lines that were changed.
+
+    Parameters:
+        patch (str): The unified diff patch to parse.
+
+    Returns:
+        list: A list of tuples containing the beginning and ending line numbers for each
+        section of the file that was changed by the patch.
+    """
+
+    lines_changed = []
+    lines = patch.split("\n")
+
+    for line in lines:
+        # Example line @@ -43,6 +48,8 @@
+        # ------------ ^
+        if line.startswith("@@"):
+            # Example line @@ -43,6 +48,8 @@
+            # ----------------------^
+            idx_beg = line.index("+")
+
+            # Example line @@ -43,6 +48,8 @@
+            #                       ^--^
+            try:
+                idx_end = line[idx_beg:].index(",")
+                line_begin = int(line[idx_beg + 1 : idx_beg + idx_end])
+
+                idx_beg = idx_beg + idx_end
+                idx_end = line[idx_beg + 1 :].index("@@")
+
+                num_lines = int(line[idx_beg + 1 : idx_beg + idx_end])
+            except ValueError:
+                # Special case for single line files
+                # such as @@ -0,0 +1 @@
+                idx_end = line[idx_beg:].index(" ")
+                line_begin = int(line[idx_beg + 1 : idx_beg + idx_end])
+                num_lines = 0
+
+            lines_changed.append((line_begin, line_begin + num_lines))
+
+    return lines_changed
+
+
+def check_for_char_limit(incoming_line):
+    return (CURRENT_COMMENT_LENGTH + len(incoming_line)) <= COMMENT_MAX_SIZE
+
+
+def is_excluded_dir(line):
+    """
+    Determines if a given line is from a directory that should be excluded from processing.
+
+    Args:
+        line (str): The line to check.
+
+    Returns:
+        bool: True if the line is from a directory that should be excluded, False otherwise.
+    """
+
+    # In future this could be multiple different directories
+    exclude_dir = os.getenv("EXCLUDE_DIR")
+    if not exclude_dir:
+        return False
+
+    excluded_dir = f"{WORK_DIR}/{exclude_dir}"
+    debug_print(f"{line} and {excluded_dir} with result {line.startswith(excluded_dir)}")
+
+    return line.startswith(excluded_dir)
+
+
+def get_file_line_end(file_in, file_line_start_in):
+    """
+    Returns the ending line number for a given file, starting from a specified line number.
+
+    Args:
+        file_in (str): The name of the file to read.
+        file_line_start_in (int): The starting line number.
+
+    Returns:
+        int: The ending line number, which is either `file_line_start + 5`
+        or the total number of lines in the file, whichever is smaller.
+    """
+
+    with open(f"{WORK_DIR}/{file_in}", encoding="utf-8") as file:
+        num_lines = sum(1 for line in file)
+
+    return min(file_line_start_in + 5, num_lines)
+
+
+def generate_description(is_note, was_note, file_line_start, issue_description, output_string):
+    """Generate description for an issue
+
+    is_note -- is the current issue a Note: or not
+    was_note -- was the previous issue a Note: or not
+    file_line_start -- line to which the issue corresponds
+    issue_description -- the description from cppcheck
+    output_string -- entire description (can be altered if the current/previous issue is/was Note:)
+    """
+    global CURRENT_COMMENT_LENGTH
+
+    if not is_note:
+        description = f"\n```diff\n!Line: {file_line_start} - {issue_description}\n``` \n"
+    else:
+        if not was_note:
+            # Previous line consists of ```diff <content> ```, so remove the closing ```
+            # and append the <content> with Note: ...`
+
+            # 12 here means "``` \n<br>\n"`
+            num_chars_to_remove = 12
+        else:
+            # Previous line is Note: so it ends with "``` \n"
+            num_chars_to_remove = 6
+
+        output_string = output_string[:-num_chars_to_remove]
+        CURRENT_COMMENT_LENGTH -= num_chars_to_remove
+        description = f"\n!Line: {file_line_start} - {issue_description}``` \n"
+
+    return output_string, description
+
+
+def create_or_edit_comment(comment_body):
+    """
+    Creates or edits a comment on a pull request with the given comment body.
+
+    Args:
+    - comment_body: A string containing the full comment body to be created or edited.
+
+    Returns:
+    - None.
+    """
+
+    github = Github(GITHUB_TOKEN)
+    repo = github.get_repo(TARGET_REPO_NAME)
+    pull_request = repo.get_pull(int(PR_NUM))
+
+    comments = pull_request.get_issue_comments()
+    found_id = -1
+    comment_to_edit = None
+    for comment in comments:
+        if (comment.user.login == "github-actions[bot]") and (COMMENT_TITLE in comment.body):
+            found_id = comment.id
+            comment_to_edit = comment
+            break
+
+    if found_id != -1 and comment_to_edit:
+        comment_to_edit.edit(body=comment_body)
+    else:
+        pull_request.create_issue_comment(body=comment_body)
+
+
+def generate_output(is_note, file_path, file_line_start, file_line_end, description):
+    """
+    Generate a formatted output string based on the details of a code issue.
+
+    This function takes information about a code issue and constructs a string that
+    includes details such as the location of the issue in the codebase, the affected code
+    lines, and a description of the issue. If the issue is a note, only the description
+    is returned. If the issue occurs in a different repository than the target, it
+    also fetches the lines where the issue was detected.
+
+    Parameters:
+    - is_note (bool): Whether the issue is just a note or a code issue.
+    - file_path (str): Path to the file where the issue was detected.
+    - file_line_start (int): The line number in the file where the issue starts.
+    - file_line_end (int): The line number in the file where the issue ends.
+    - description (str): Description of the issue.
+
+    Returns:
+    - str: Formatted string with details of the issue.
+
+    Note:
+    - This function relies on several global variables like TARGET_REPO_NAME, REPO_NAME,
+      FILES_WITH_ISSUES, and SHA which should be set before calling this function.
+    """
+
+    if not is_note:
+        if TARGET_REPO_NAME != REPO_NAME:
+            if file_path not in FILES_WITH_ISSUES:
+                try:
+                    with open(f"{file_path}", encoding="utf-8") as file:
+                        lines = file.readlines()
+                        FILES_WITH_ISSUES[file_path] = lines
+                except FileNotFoundError:
+                    print(f"Error: The file '{file_path}' was not found.")
+
+            modified_content = FILES_WITH_ISSUES[file_path][file_line_start - 1 : file_line_end - 1]
+
+            debug_print(f"generate_output for following file: \nfile_path={file_path} \nmodified_content={modified_content}\n")
+
+            modified_content[0] = modified_content[0][:-1] + " <---- HERE\n"
+            file_content = "".join(modified_content)
+
+            file_url = f"https://github.com/{REPO_NAME}/blob/{SHA}/{file_path}#L{file_line_start}"
+            new_line = (
+                "\n\n------"
+                f"\n\n <b><i>Issue found in file</b></i> [{REPO_NAME}/{file_path}]({file_url})\n"
+                f"{file_content}"
+                f"\n``` \n"
+                f"{description} <br>\n"
+            )
+
+        else:
+            new_line = (
+                f"\n\nhttps://github.com/{REPO_NAME}/blob/{SHA}/{file_path}"
+                f"#L{file_line_start}-L{file_line_end} {description} <br>\n"
+            )
+    else:
+        new_line = description
+
+    return new_line
+
+
+def extract_info(line, prefix):
+    """
+    Extracts information from a given line containing file path, line number, and issue description.
+
+    Args:
+    - line (str): The input string containing file path, line number, and issue description.
+    - prefix (str): The prefix to remove from the start of the file path in the line.
+    - was_note (bool): Indicates if the previous issue was a note.
+    - output_string (str): The string containing previous output information.
+
+    Returns:
+    - tuple: A tuple containing:
+        - file_path (str): The path to the file.
+        - is_note (bool): A flag indicating if the issue is a note.
+        - description (str): Description of the issue.
+        - file_line_start (int): The starting line number of the issue.
+        - file_line_end (int): The ending line number of the issue.
+    """
+
+    # Clean up line
+    line = line.replace(prefix, "").lstrip("/")
+
+    # Get the line starting position /path/to/file:line and trim it
+    file_path_end_idx = line.index(":")
+    file_path = line[:file_path_end_idx]
+
+    # Extract the lines information
+    line = line[file_path_end_idx + 1 :]
+
+    # Get line (start, end)
+    file_line_start = int(line[: line.index(":")])
+    file_line_end = get_file_line_end(file_path, file_line_start)
+
+    # Get content of the issue
+    issue_description = line[line.index(" ") + 1 :]
+    is_note = issue_description.startswith("note:")
+
+    return (file_path, is_note, file_line_start, file_line_end, issue_description)
+
+
+def create_common_input_vars_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-o",
+        "--output_to_console",
+        help="Whether to output the result to console",
+        required=True,
+    )
+    parser.add_argument(
+        "-fk",
+        "--fork_repository",
+        help="Whether the actual code is in 'pr_tree' directory",
+        required=True,
+    )
+    parser.add_argument(
+        "--common",
+        default="",
+        help="common ancestor between two branches (default: %(default)s)",
+    )
+    parser.add_argument("--head", default="", help="Head branch (default: %(default)s)")
+
+    return parser
+
+
+def append_issue(is_note, per_issue_string, new_line, list_of_issues):
+    if not is_note:
+        if len(per_issue_string) > 0 and (per_issue_string not in list_of_issues):
+            list_of_issues.append(per_issue_string)
+        per_issue_string = new_line
+    else:
+        per_issue_string += new_line
+
+    return per_issue_string
+
+
+def create_comment_for_output(tool_output, prefix, files_changed_in_pr, output_to_console):
+    """
+    Generates a comment for a GitHub pull request based on the tool output.
+
+    Parameters:
+        tool_output (str): The tool output to parse.
+        prefix (str): The prefix to look for in order to identify issues.
+        files_changed_in_pr (dict): A dictionary containing the files that were
+            changed in the pull request and the lines that were modified.
+        output_to_console (bool): Whether or not to output the results to the console.
+
+    Returns:
+        tuple: A tuple containing the generated comment and the number of issues found.
+    """
+    list_of_issues = []
+    per_issue_string = ""
+    was_note = False
+
+    for line in tool_output:
+        if line.startswith(prefix) and not is_excluded_dir(line):
+            (
+                file_path,
+                is_note,
+                file_line_start,
+                file_line_end,
+                issue_description,
+            ) = extract_info(line, prefix)
+
+            # In case where we only output to console, skip the next part
+            if output_to_console:
+                per_issue_string = append_issue(is_note, per_issue_string, line, list_of_issues)
+                continue
+
+            if is_part_of_pr_changes(file_path, file_line_start, files_changed_in_pr):
+                per_issue_string, description = generate_description(
+                    is_note,
+                    was_note,
+                    file_line_start,
+                    issue_description,
+                    per_issue_string,
+                )
+                was_note = is_note
+                new_line = generate_output(is_note, file_path, file_line_start, file_line_end, description)
+
+                global CURRENT_COMMENT_LENGTH
+                if check_for_char_limit(new_line):
+                    per_issue_string = append_issue(is_note, per_issue_string, new_line, list_of_issues)
+                    CURRENT_COMMENT_LENGTH += len(new_line)
+
+                else:
+                    CURRENT_COMMENT_LENGTH = COMMENT_MAX_SIZE
+
+                    return "\n".join(list_of_issues), len(list_of_issues)
+
+    # Append any unprocessed issues
+    if len(per_issue_string) > 0 and (per_issue_string not in list_of_issues):
+        list_of_issues.append(per_issue_string)
+
+    output_string = "\n".join(list_of_issues)
+
+    debug_print(f"\nFinal output_string = \n{output_string}\n")
+
+    return output_string, len(list_of_issues)
+
+
+def read_files_and_parse_results():
+    """Reads the output files generated by cppcheck and creates comments
+    for the pull request, based on the issues found. The comments can be output to console
+    and/or added to the pull request. Returns a tuple with the comments generated for
+    cppcheck, and boolean values indicating whether issues were found by each tool,
+    whether output was generated to the console, and whether the actual code
+    is in the 'pr_tree' directory.
+
+    Returns:
+        A tuple with the following values:
+        - cppcheck_comment (str): The comment generated for cppcheck, if any issues were found.
+        - cppcheck_issues_found (bool): Whether issues were found by cppcheck.
+        - output_to_console (bool): Whether output was generated to the console.
+    """
+
+    # Get cppcheck files
+    parser = create_common_input_vars_parser()
+    parser.add_argument("-cc", "--cppcheck", help="Output file name for cppcheck", required=True)
+
+    if parser.parse_args().fork_repository == "true":
+        # Make sure to use Head repository
+        global REPO_NAME
+        REPO_NAME = os.getenv("PR_REPO")
+
+    cppcheck_file_name = parser.parse_args().cppcheck
+    output_to_console = parser.parse_args().output_to_console == "true"
+
+    cppcheck_content = ""
+    with open(cppcheck_file_name, "r", encoding="utf-8") as file:
+        cppcheck_content = file.readlines()
+
+    common_ancestor = parser.parse_args().common
+    feature_branch = parser.parse_args().head
+
+    line_prefix = f"{WORK_DIR}"
+
+    debug_print(f"cppcheck result: \n {cppcheck_content} \n" f"line_prefix: {line_prefix} \n")
+
+    files_changed_in_pr = {}
+    if not output_to_console and (ONLY_PR_CHANGES == "true"):
+        files_changed_in_pr = get_changed_files(common_ancestor, feature_branch)
+
+    cppcheck_comment, cppcheck_issues_found = create_comment_for_output(
+        cppcheck_content, line_prefix, files_changed_in_pr, output_to_console
+    )
+
+    if output_to_console and cppcheck_issues_found:
+        print("##[error] Issues found!\n")
+        error_color = "\u001b[31m"
+
+        if cppcheck_issues_found:
+            print(f"{error_color}cppcheck results: {cppcheck_comment}")
+
+    return cppcheck_comment, cppcheck_issues_found, output_to_console
+
+
+def prepare_comment_body(cppcheck_comment, cppcheck_issues_found):
+    """
+    Generates a comment body based on the results of the cppcheck analysis.
+
+    Args:
+        cppcheck_comment (str): The comment body generated for the cppcheck analysis.
+        cppcheck_issues_found (int): The number of issues found by cppcheck analysis.
+
+    Returns:
+        str: The final comment body that will be posted as a comment on the pull request.
+    """
+
+    if cppcheck_issues_found == 0:
+        full_comment_body = (
+            '## <p align="center"><b> :white_check_mark:' f"{COMMENT_TITLE} - no issues found! :white_check_mark: </b></p>"
+        )
+    else:
+        full_comment_body = f'## <p align="center"><b> :zap: {COMMENT_TITLE} :zap: </b></p> \n\n'
+
+        if len(cppcheck_comment) > 0:
+            full_comment_body += (
+                f"<details> <summary> <b> :red_circle: cppcheck found "
+                f"{cppcheck_issues_found} {'issues' if cppcheck_issues_found > 1 else 'issue'}!"
+                " Click here to see details. </b> </summary> <br>"
+                f"{cppcheck_comment} </details>"
+            )
+
+    if CURRENT_COMMENT_LENGTH == COMMENT_MAX_SIZE:
+        full_comment_body += f"\n```diff\n{MAX_CHAR_COUNT_REACHED}\n```"
+
+    debug_print(f"Repo={REPO_NAME} pr_num={PR_NUM} comment_title={COMMENT_TITLE}")
+
+    return full_comment_body
+
+
+if __name__ == "__main__":
+    cppcheck_comment_in, cppcheck_issues_found_in, output_to_console_in = read_files_and_parse_results()
+
+    if not output_to_console_in:
+        comment_body_in = prepare_comment_body(cppcheck_comment_in, cppcheck_issues_found_in)
+        create_or_edit_comment(comment_body_in)
+
+    sys.exit(cppcheck_issues_found_in)
diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
new file mode 100644
index 00000000..2cce5a89
--- /dev/null
+++ b/.github/workflows/analysis.yml
@@ -0,0 +1,61 @@
+name: Code Analysis
+on: [push, pull_request]
+jobs:
+  Code-Analysis:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install Cppcheck
+        run: |
+          sudo apt-get update && sudo apt-get install libpcre3-dev
+          git clone https://github.com/danmar/cppcheck.git
+          cd cppcheck
+          git checkout 2.13.x
+          # Disable color output of cppcheck
+          sed -i 's/ *bool *gDisableColors *= *false;/bool gDisableColors = true;/' lib/color.cpp
+          sudo make -j4 MATCHCOMPILER=yes FILESDIR=/usr/share/cppcheck HAVE_RULES=yes CXXFLAGS="-O2 -DNDEBUG -Wall -Wno-sign-compare -Wno-unused-function" install
+
+      - name: Install Python dependencies
+        run: pip3 install --upgrade setuptools urllib3 chardet pyOpenSSL pygithub
+
+      - name: Install CUDA Toolkit
+        uses: Jimver/cuda-toolkit@v0.2.14
+        with:
+          method: network
+          use-github-cache: false
+          use-local-cache: false
+
+      - name: Configure NiftyReg
+        run: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_C_COMPILER=gcc \
+                -DCMAKE_CXX_COMPILER=g++ \
+                -DCMAKE_BUILD_TYPE=Debug \
+                -DBUILD_ALL_DEP=ON \
+                -DCHECK_GPU=OFF \
+                -DUSE_CUDA=ON \
+                -DUSE_OPENCL=ON \
+                -DUSE_SSE=ON \
+                -DUSE_OPENMP=ON \
+                -DBUILD_TESTING=OFF \
+                -DWITH_COVERAGE=OFF \
+                -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+                ..
+
+      - name: Code Analysis
+        env:
+            COMMENT_TITLE: Code Analysis Results
+            GITHUB_TOKEN: ${{ github.token }}
+            PR_NUMBER: ${{ github.event.pull_request.number }}
+            REPO: ${{ github.repository }}
+            REPORT_PR_CHANGES_ONLY: false
+        run: |
+            analysis_file="analysis.txt"
+            cppcheck_params="--enable=warning --check-level=exhaustive --suppress=internalError --suppress=internalAstError"
+            cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file
+            # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately
+            find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
+            find $(pwd)/reg-lib/cuda/. -name "*.cu" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
+            python3 .github/code_analysis.py -cc $analysis_file -o ${{ github.event_name == 'push' }} -fk false
\ No newline at end of file
diff --git a/README.md b/README.md
index ad24879e..f330ea49 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,9 @@
 # NiftyReg
 
 [![License](https://img.shields.io/github/license/KCL-BMEIS/NiftyReg)](https://github.com/KCL-BMEIS/niftyreg/blob/master/LICENSE.txt)
-[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml)
 [![Coverage Status](https://codecov.io/gh/KCL-BMEIS/niftyreg/graph/badge.svg?token=lgLtkSC7kX)](https://codecov.io/gh/KCL-BMEIS/niftyreg)
+[![Static Code Analysis](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/analysis.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/analysis.yml)
+[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml)
 
 
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 52f22458..e1a29c1f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-402
+403

From 640ec07051aa353f8d000fb563a37b42c51233c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 14 Feb 2024 15:19:53 +0000
Subject: [PATCH 285/314] Fix GPU ID parsing error in reg_f3d

---
 niftyreg_build_version.txt | 2 +-
 reg-apps/reg_f3d.cpp       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e1a29c1f..f1b1cb3a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-403
+404
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index 92f944d2..e0b3fe48 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -301,7 +301,8 @@ int main(int argc, char **argv) {
         if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 ||
             strcmp(argv[i], "--ref") == 0 || strcmp(argv[i], "-flo") == 0 ||
             strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "--flo") == 0 ||
-            strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) {
+            strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0 ||
+            strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
             // argument has already been parsed
             ++i;
         } else if (strcmp(argv[i], "-voff") == 0) {

From f5a5990cd3253c0f3c433ff17710bc19e6b75301 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 15 Feb 2024 17:06:56 +0000
Subject: [PATCH 286/314] Upgrade libpng to 1.6.42

---
 CMakeLists.txt                                |    4 +-
 niftyreg_build_version.txt                    |    2 +-
 reg-io/png/CMakeLists.txt                     |   62 +-
 reg-io/png/lpng/LICENSE                       |  134 +
 reg-io/png/lpng/png.c                         | 4562 ++++++++++++++++
 reg-io/png/lpng/png.h                         | 3251 ++++++++++++
 reg-io/png/lpng/pngconf.h                     |  623 +++
 reg-io/png/{lpng1510 => lpng}/pngdebug.h      |  308 +-
 reg-io/png/{lpng1510 => lpng}/pngerror.c      |  482 +-
 reg-io/png/lpng/pngget.c                      | 1267 +++++
 reg-io/png/{lpng1510 => lpng}/pnginfo.h       |  534 +-
 .../{lpng1510 => lpng}/pnglibconf.h.prebuilt  |  198 +-
 reg-io/png/lpng/pngmem.c                      |  284 +
 reg-io/png/{lpng1510 => lpng}/pngpread.c      |  574 +-
 reg-io/png/lpng/pngpriv.h                     | 2221 ++++++++
 reg-io/png/lpng/pngread.c                     | 4224 +++++++++++++++
 reg-io/png/{lpng1510 => lpng}/pngrio.c        |   86 +-
 reg-io/png/{lpng1510 => lpng}/pngrtran.c      | 3284 ++++++------
 reg-io/png/lpng/pngrutil.c                    | 4680 +++++++++++++++++
 reg-io/png/lpng/pngset.c                      | 1803 +++++++
 reg-io/png/{lpng1510 => lpng}/pngstruct.h     |  835 +--
 reg-io/png/lpng/pngtest.c                     | 2158 ++++++++
 reg-io/png/{lpng1510 => lpng}/pngtrans.c      |  257 +-
 reg-io/png/{lpng1510 => lpng}/pngwio.c        |  124 +-
 reg-io/png/lpng/pngwrite.c                    | 2418 +++++++++
 reg-io/png/{lpng1510 => lpng}/pngwtran.c      |  278 +-
 reg-io/png/lpng/pngwutil.c                    | 2781 ++++++++++
 reg-io/png/lpng1510/LICENSE                   |  111 -
 reg-io/png/lpng1510/png.c                     | 2874 ----------
 reg-io/png/lpng1510/png.h                     | 2664 ----------
 reg-io/png/lpng1510/pngconf.h                 |  594 ---
 reg-io/png/lpng1510/pngget.c                  | 1124 ----
 reg-io/png/lpng1510/pngmem.c                  |  667 ---
 reg-io/png/lpng1510/pngpriv.h                 | 1674 ------
 reg-io/png/lpng1510/pngread.c                 | 1305 -----
 reg-io/png/lpng1510/pngrutil.c                | 4159 ---------------
 reg-io/png/lpng1510/pngset.c                  | 1309 -----
 reg-io/png/lpng1510/pngtest.c                 | 1820 -------
 reg-io/png/lpng1510/pngwrite.c                | 1667 ------
 reg-io/png/lpng1510/pngwutil.c                | 3179 -----------
 40 files changed, 33974 insertions(+), 26607 deletions(-)
 create mode 100644 reg-io/png/lpng/LICENSE
 create mode 100644 reg-io/png/lpng/png.c
 create mode 100644 reg-io/png/lpng/png.h
 create mode 100644 reg-io/png/lpng/pngconf.h
 rename reg-io/png/{lpng1510 => lpng}/pngdebug.h (82%)
 rename reg-io/png/{lpng1510 => lpng}/pngerror.c (54%)
 create mode 100644 reg-io/png/lpng/pngget.c
 rename reg-io/png/{lpng1510 => lpng}/pnginfo.h (63%)
 rename reg-io/png/{lpng1510 => lpng}/pnglibconf.h.prebuilt (69%)
 create mode 100644 reg-io/png/lpng/pngmem.c
 rename reg-io/png/{lpng1510 => lpng}/pngpread.c (64%)
 create mode 100644 reg-io/png/lpng/pngpriv.h
 create mode 100644 reg-io/png/lpng/pngread.c
 rename reg-io/png/{lpng1510 => lpng}/pngrio.c (60%)
 rename reg-io/png/{lpng1510 => lpng}/pngrtran.c (67%)
 create mode 100644 reg-io/png/lpng/pngrutil.c
 create mode 100644 reg-io/png/lpng/pngset.c
 rename reg-io/png/{lpng1510 => lpng}/pngstruct.h (52%)
 create mode 100644 reg-io/png/lpng/pngtest.c
 rename reg-io/png/{lpng1510 => lpng}/pngtrans.c (73%)
 rename reg-io/png/{lpng1510 => lpng}/pngwio.c (61%)
 create mode 100644 reg-io/png/lpng/pngwrite.c
 rename reg-io/png/{lpng1510 => lpng}/pngwtran.c (72%)
 create mode 100644 reg-io/png/lpng/pngwutil.c
 delete mode 100644 reg-io/png/lpng1510/LICENSE
 delete mode 100644 reg-io/png/lpng1510/png.c
 delete mode 100644 reg-io/png/lpng1510/png.h
 delete mode 100644 reg-io/png/lpng1510/pngconf.h
 delete mode 100644 reg-io/png/lpng1510/pngget.c
 delete mode 100644 reg-io/png/lpng1510/pngmem.c
 delete mode 100644 reg-io/png/lpng1510/pngpriv.h
 delete mode 100644 reg-io/png/lpng1510/pngread.c
 delete mode 100644 reg-io/png/lpng1510/pngrutil.c
 delete mode 100644 reg-io/png/lpng1510/pngset.c
 delete mode 100644 reg-io/png/lpng1510/pngtest.c
 delete mode 100644 reg-io/png/lpng1510/pngwrite.c
 delete mode 100644 reg-io/png/lpng1510/pngwutil.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a59b40a..3601fb55 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,7 +117,7 @@ if(NOT BUILD_ALL_DEP)
     # If the png library and header can not be found, it is build from the sources
     if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
         message(STATUS "libpng not found - the png library will be built")
-        set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
+        set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng)
         set(PNG_LIBRARY png)
         set(BUILD_INTERNAL_PNG true)
     else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
@@ -125,7 +125,7 @@ if(NOT BUILD_ALL_DEP)
         set(BUILD_INTERNAL_PNG false)
     endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
 else(NOT BUILD_ALL_DEP)
-    set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510)
+    set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng)
     set(PNG_LIBRARY png)
 endif(NOT BUILD_ALL_DEP)
 include_directories(${CMAKE_SOURCE_DIR}/reg-io/png)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index f1b1cb3a..ec8785ec 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-404
+405
diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt
index afbf0dc5..56f0424f 100644
--- a/reg-io/png/CMakeLists.txt
+++ b/reg-io/png/CMakeLists.txt
@@ -1,11 +1,6 @@
 if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
     # If the png library is not present on the machine, it is build from the sources
-    # Most of the following lines are extracted from the libpng1510 CMakeLists.txt
-    set(PNGLIB_MAJOR 1)
-    set(PNGLIB_MINOR 5)
-    set(PNGLIB_RELEASE 10)
-    set(PNGLIB_NAME libpng${PNGLIB_MAJOR}${PNGLIB_MINOR})
-    set(PNGLIB_VERSION ${PNGLIB_MAJOR}.${PNGLIB_MINOR}.${PNGLIB_RELEASE})
+    # Most of the following lines are extracted from the libpng CMakeLists.txt
     # Check if the m library is present
     if(NOT WIN32)
       find_library(M_LIBRARY
@@ -13,51 +8,50 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
         PATHS /usr/lib /usr/local/lib
       )
       if(NOT M_LIBRARY)
-        message(STATUS
-          "math library 'libm' not found - floating point support disabled")
+        message(STATUS "math library 'libm' not found - floating point support disabled")
       endif(NOT M_LIBRARY)
     else(NOT WIN32)
       # the m library is not needed on windows
       set(M_LIBRARY "")
     endif(NOT WIN32)
     # generate the config file for libpng and set the path to use it
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lpng1510/pnglibconf.h.prebuilt
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lpng/pnglibconf.h.prebuilt
                    ${CMAKE_BINARY_DIR}/pnglibconf.h)
     include_directories(${CMAKE_BINARY_DIR})
     # Set the libpng sources
-    set(png_HDRS
-      lpng1510/png.h
-      lpng1510/pngconf.h
-      lpng1510/pngdebug.h
-      lpng1510/pnginfo.h
-      lpng1510/pngpriv.h
-      lpng1510/pngstruct.h
+    set(png_hdrs
+      lpng/png.h
+      lpng/pngconf.h
+      lpng/pngdebug.h
+      lpng/pnginfo.h
+      lpng/pngpriv.h
+      lpng/pngstruct.h
     )
-    set(png_SRCS
-      lpng1510/png.c
-      lpng1510/pngerror.c
-      lpng1510/pngget.c
-      lpng1510/pngmem.c
-      lpng1510/pngpread.c
-      lpng1510/pngread.c
-      lpng1510/pngrio.c
-      lpng1510/pngrtran.c
-      lpng1510/pngrutil.c
-      lpng1510/pngset.c
-      lpng1510/pngtrans.c
-      lpng1510/pngwio.c
-      lpng1510/pngwrite.c
-      lpng1510/pngwtran.c
-      lpng1510/pngwutil.c
+    set(png_srcs
+      lpng/png.c
+      lpng/pngerror.c
+      lpng/pngget.c
+      lpng/pngmem.c
+      lpng/pngpread.c
+      lpng/pngread.c
+      lpng/pngrio.c
+      lpng/pngrtran.c
+      lpng/pngrutil.c
+      lpng/pngset.c
+      lpng/pngtrans.c
+      lpng/pngwio.c
+      lpng/pngwrite.c
+      lpng/pngwtran.c
+      lpng/pngwutil.c
     )
     # Build the library
-    add_library(png STATIC ${png_SRCS})
+    add_library(png STATIC ${png_srcs})
     target_link_libraries(png z)
     install(TARGETS png
         LIBRARY DESTINATION lib COMPONENT Development
         ARCHIVE DESTINATION lib COMPONENT Development
     )
-    install(FILES ${png_HDRS} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development)
+    install(FILES ${png_hdrs} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development)
 endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
 
 add_library(reg_png reg_png.cpp readpng.cpp)
diff --git a/reg-io/png/lpng/LICENSE b/reg-io/png/lpng/LICENSE
new file mode 100644
index 00000000..1cd26554
--- /dev/null
+++ b/reg-io/png/lpng/LICENSE
@@ -0,0 +1,134 @@
+COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
+=========================================
+
+PNG Reference Library License version 2
+---------------------------------------
+
+ * Copyright (c) 1995-2024 The PNG Reference Library Authors.
+ * Copyright (c) 2018-2024 Cosmin Truta.
+ * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
+ * Copyright (c) 1996-1997 Andreas Dilger.
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+
+The software is supplied "as is", without warranty of any kind,
+express or implied, including, without limitation, the warranties
+of merchantability, fitness for a particular purpose, title, and
+non-infringement.  In no event shall the Copyright owners, or
+anyone distributing the software, be liable for any damages or
+other liability, whether in contract, tort or otherwise, arising
+from, out of, or in connection with the software, or the use or
+other dealings in the software, even if advised of the possibility
+of such damage.
+
+Permission is hereby granted to use, copy, modify, and distribute
+this software, or portions hereof, for any purpose, without fee,
+subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you
+    must not claim that you wrote the original software.  If you
+    use this software in a product, an acknowledgment in the product
+    documentation would be appreciated, but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must
+    not be misrepresented as being the original software.
+
+ 3. This Copyright notice may not be removed or altered from any
+    source or altered source distribution.
+
+
+PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
+-----------------------------------------------------------------------
+
+libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
+Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
+derived from libpng-1.0.6, and are distributed according to the same
+disclaimer and license as libpng-1.0.6 with the following individuals
+added to the list of Contributing Authors:
+
+    Simon-Pierre Cadieux
+    Eric S. Raymond
+    Mans Rullgard
+    Cosmin Truta
+    Gilles Vollant
+    James Yu
+    Mandar Sahastrabuddhe
+    Google Inc.
+    Vadim Barkov
+
+and with the following additions to the disclaimer:
+
+    There is no warranty against interference with your enjoyment of
+    the library or against infringement.  There is no warranty that our
+    efforts or the library will fulfill any of your particular purposes
+    or needs.  This library is provided with all faults, and the entire
+    risk of satisfactory quality, performance, accuracy, and effort is
+    with the user.
+
+Some files in the "contrib" directory and some configure-generated
+files that are distributed with libpng have other copyright owners, and
+are released under other open source licenses.
+
+libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
+Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from
+libpng-0.96, and are distributed according to the same disclaimer and
+license as libpng-0.96, with the following individuals added to the
+list of Contributing Authors:
+
+    Tom Lane
+    Glenn Randers-Pehrson
+    Willem van Schaik
+
+libpng versions 0.89, June 1996, through 0.96, May 1997, are
+Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88,
+and are distributed according to the same disclaimer and license as
+libpng-0.88, with the following individuals added to the list of
+Contributing Authors:
+
+    John Bowler
+    Kevin Bracey
+    Sam Bushell
+    Magnus Holmgren
+    Greg Roelofs
+    Tom Tanner
+
+Some files in the "scripts" directory have other copyright owners,
+but are released under this license.
+
+libpng versions 0.5, May 1995, through 0.88, January 1996, are
+Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+
+For the purposes of this copyright and license, "Contributing Authors"
+is defined as the following set of individuals:
+
+    Andreas Dilger
+    Dave Martindale
+    Guy Eric Schalnat
+    Paul Schmidt
+    Tim Wegner
+
+The PNG Reference Library is supplied "AS IS".  The Contributing
+Authors and Group 42, Inc. disclaim all warranties, expressed or
+implied, including, without limitation, the warranties of
+merchantability and of fitness for any purpose.  The Contributing
+Authors and Group 42, Inc. assume no liability for direct, indirect,
+incidental, special, exemplary, or consequential damages, which may
+result from the use of the PNG Reference Library, even if advised of
+the possibility of such damage.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+source code, or portions hereof, for any purpose, without fee, subject
+to the following restrictions:
+
+ 1. The origin of this source code must not be misrepresented.
+
+ 2. Altered versions must be plainly marked as such and must not
+    be misrepresented as being the original source.
+
+ 3. This Copyright notice may not be removed or altered from any
+    source or altered source distribution.
+
+The Contributing Authors and Group 42, Inc. specifically permit,
+without fee, and encourage the use of this source code as a component
+to supporting the PNG file format in commercial products.  If you use
+this source code in a product, acknowledgment is not required but would
+be appreciated.
diff --git a/reg-io/png/lpng/png.c b/reg-io/png/lpng/png.c
new file mode 100644
index 00000000..fcd030a4
--- /dev/null
+++ b/reg-io/png/lpng/png.c
@@ -0,0 +1,4562 @@
+
+/* png.c - location for general purpose libpng functions
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "pngpriv.h"
+
+/* Generate a compiler error if there is an old png.h in the search path. */
+typedef png_libpng_version_1_6_42 Your_png_h_is_not_version_1_6_42;
+
+/* Tells libpng that we have already handled the first "num_bytes" bytes
+ * of the PNG file signature.  If the PNG data is embedded into another
+ * stream we can set num_bytes = 8 so that libpng will not attempt to read
+ * or write any of the magic bytes before it starts on the IHDR.
+ */
+
+#ifdef PNG_READ_SUPPORTED
+void PNGAPI
+png_set_sig_bytes(png_structrp png_ptr, int num_bytes)
+{
+   unsigned int nb = (unsigned int)num_bytes;
+
+   png_debug(1, "in png_set_sig_bytes");
+
+   if (png_ptr == NULL)
+      return;
+
+   if (num_bytes < 0)
+      nb = 0;
+
+   if (nb > 8)
+      png_error(png_ptr, "Too many bytes for PNG signature");
+
+   png_ptr->sig_bytes = (png_byte)nb;
+}
+
+/* Checks whether the supplied bytes match the PNG signature.  We allow
+ * checking less than the full 8-byte signature so that those apps that
+ * already read the first few bytes of a file to determine the file type
+ * can simply check the remaining bytes for extra assurance.  Returns
+ * an integer less than, equal to, or greater than zero if sig is found,
+ * respectively, to be less than, to match, or be greater than the correct
+ * PNG signature (this is the same behavior as strcmp, memcmp, etc).
+ */
+int PNGAPI
+png_sig_cmp(png_const_bytep sig, size_t start, size_t num_to_check)
+{
+   static const png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+   if (num_to_check > 8)
+      num_to_check = 8;
+
+   else if (num_to_check < 1)
+      return -1;
+
+   if (start > 7)
+      return -1;
+
+   if (start + num_to_check > 8)
+      num_to_check = 8 - start;
+
+   return memcmp(&sig[start], &png_signature[start], num_to_check);
+}
+
+#endif /* READ */
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+/* Function to allocate memory for zlib */
+PNG_FUNCTION(voidpf /* PRIVATE */,
+png_zalloc,(voidpf png_ptr, uInt items, uInt size),PNG_ALLOCATED)
+{
+   png_alloc_size_t num_bytes = size;
+
+   if (png_ptr == NULL)
+      return NULL;
+
+   if (items >= (~(png_alloc_size_t)0)/size)
+   {
+      png_warning (png_voidcast(png_structrp, png_ptr),
+          "Potential overflow in png_zalloc()");
+      return NULL;
+   }
+
+   num_bytes *= items;
+   return png_malloc_warn(png_voidcast(png_structrp, png_ptr), num_bytes);
+}
+
+/* Function to free memory for zlib */
+void /* PRIVATE */
+png_zfree(voidpf png_ptr, voidpf ptr)
+{
+   png_free(png_voidcast(png_const_structrp,png_ptr), ptr);
+}
+
+/* Reset the CRC variable to 32 bits of 1's.  Care must be taken
+ * in case CRC is > 32 bits to leave the top bits 0.
+ */
+void /* PRIVATE */
+png_reset_crc(png_structrp png_ptr)
+{
+   /* The cast is safe because the crc is a 32-bit value. */
+   png_ptr->crc = (png_uint_32)crc32(0, Z_NULL, 0);
+}
+
+/* Calculate the CRC over a section of data.  We can only pass as
+ * much data to this routine as the largest single buffer size.  We
+ * also check that this data will actually be used before going to the
+ * trouble of calculating it.
+ */
+void /* PRIVATE */
+png_calculate_crc(png_structrp png_ptr, png_const_bytep ptr, size_t length)
+{
+   int need_crc = 1;
+
+   if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0)
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+
+   else /* critical */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0)
+         need_crc = 0;
+   }
+
+   /* 'uLong' is defined in zlib.h as unsigned long; this means that on some
+    * systems it is a 64-bit value.  crc32, however, returns 32 bits so the
+    * following cast is safe.  'uInt' may be no more than 16 bits, so it is
+    * necessary to perform a loop here.
+    */
+   if (need_crc != 0 && length > 0)
+   {
+      uLong crc = png_ptr->crc; /* Should never issue a warning */
+
+      do
+      {
+         uInt safe_length = (uInt)length;
+#ifndef __COVERITY__
+         if (safe_length == 0)
+            safe_length = (uInt)-1; /* evil, but safe */
+#endif
+
+         crc = crc32(crc, ptr, safe_length);
+
+         /* The following should never issue compiler warnings; if they do the
+          * target system has characteristics that will probably violate other
+          * assumptions within the libpng code.
+          */
+         ptr += safe_length;
+         length -= safe_length;
+      }
+      while (length > 0);
+
+      /* And the following is always safe because the crc is only 32 bits. */
+      png_ptr->crc = (png_uint_32)crc;
+   }
+}
+
+/* Check a user supplied version number, called from both read and write
+ * functions that create a png_struct.
+ */
+int
+png_user_version_check(png_structrp png_ptr, png_const_charp user_png_ver)
+{
+   /* Libpng versions 1.0.0 and later are binary compatible if the version
+    * string matches through the second '.'; we must recompile any
+    * applications that use any older library version.
+    */
+
+   if (user_png_ver != NULL)
+   {
+      int i = -1;
+      int found_dots = 0;
+
+      do
+      {
+         i++;
+         if (user_png_ver[i] != PNG_LIBPNG_VER_STRING[i])
+            png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+         if (user_png_ver[i] == '.')
+            found_dots++;
+      } while (found_dots < 2 && user_png_ver[i] != 0 &&
+            PNG_LIBPNG_VER_STRING[i] != 0);
+   }
+
+   else
+      png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+
+   if ((png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH) != 0)
+   {
+#ifdef PNG_WARNINGS_SUPPORTED
+      size_t pos = 0;
+      char m[128];
+
+      pos = png_safecat(m, (sizeof m), pos,
+          "Application built with libpng-");
+      pos = png_safecat(m, (sizeof m), pos, user_png_ver);
+      pos = png_safecat(m, (sizeof m), pos, " but running with ");
+      pos = png_safecat(m, (sizeof m), pos, PNG_LIBPNG_VER_STRING);
+      PNG_UNUSED(pos)
+
+      png_warning(png_ptr, m);
+#endif
+
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+      png_ptr->flags = 0;
+#endif
+
+      return 0;
+   }
+
+   /* Success return. */
+   return 1;
+}
+
+/* Generic function to create a png_struct for either read or write - this
+ * contains the common initialization.
+ */
+PNG_FUNCTION(png_structp /* PRIVATE */,
+png_create_png_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
+{
+   png_struct create_struct;
+#  ifdef PNG_SETJMP_SUPPORTED
+      jmp_buf create_jmp_buf;
+#  endif
+
+   /* This temporary stack-allocated structure is used to provide a place to
+    * build enough context to allow the user provided memory allocator (if any)
+    * to be called.
+    */
+   memset(&create_struct, 0, (sizeof create_struct));
+
+   /* Added at libpng-1.2.6 */
+#  ifdef PNG_USER_LIMITS_SUPPORTED
+      create_struct.user_width_max = PNG_USER_WIDTH_MAX;
+      create_struct.user_height_max = PNG_USER_HEIGHT_MAX;
+
+#     ifdef PNG_USER_CHUNK_CACHE_MAX
+      /* Added at libpng-1.2.43 and 1.4.0 */
+      create_struct.user_chunk_cache_max = PNG_USER_CHUNK_CACHE_MAX;
+#     endif
+
+#     ifdef PNG_USER_CHUNK_MALLOC_MAX
+      /* Added at libpng-1.2.43 and 1.4.1, required only for read but exists
+       * in png_struct regardless.
+       */
+      create_struct.user_chunk_malloc_max = PNG_USER_CHUNK_MALLOC_MAX;
+#     endif
+#  endif
+
+   /* The following two API calls simply set fields in png_struct, so it is safe
+    * to do them now even though error handling is not yet set up.
+    */
+#  ifdef PNG_USER_MEM_SUPPORTED
+      png_set_mem_fn(&create_struct, mem_ptr, malloc_fn, free_fn);
+#  else
+      PNG_UNUSED(mem_ptr)
+      PNG_UNUSED(malloc_fn)
+      PNG_UNUSED(free_fn)
+#  endif
+
+   /* (*error_fn) can return control to the caller after the error_ptr is set,
+    * this will result in a memory leak unless the error_fn does something
+    * extremely sophisticated.  The design lacks merit but is implicit in the
+    * API.
+    */
+   png_set_error_fn(&create_struct, error_ptr, error_fn, warn_fn);
+
+#  ifdef PNG_SETJMP_SUPPORTED
+      if (!setjmp(create_jmp_buf))
+#  endif
+      {
+#  ifdef PNG_SETJMP_SUPPORTED
+         /* Temporarily fake out the longjmp information until we have
+          * successfully completed this function.  This only works if we have
+          * setjmp() support compiled in, but it is safe - this stuff should
+          * never happen.
+          */
+         create_struct.jmp_buf_ptr = &create_jmp_buf;
+         create_struct.jmp_buf_size = 0; /*stack allocation*/
+         create_struct.longjmp_fn = longjmp;
+#  endif
+         /* Call the general version checker (shared with read and write code):
+          */
+         if (png_user_version_check(&create_struct, user_png_ver) != 0)
+         {
+            png_structrp png_ptr = png_voidcast(png_structrp,
+                png_malloc_warn(&create_struct, (sizeof *png_ptr)));
+
+            if (png_ptr != NULL)
+            {
+               /* png_ptr->zstream holds a back-pointer to the png_struct, so
+                * this can only be done now:
+                */
+               create_struct.zstream.zalloc = png_zalloc;
+               create_struct.zstream.zfree = png_zfree;
+               create_struct.zstream.opaque = png_ptr;
+
+#              ifdef PNG_SETJMP_SUPPORTED
+               /* Eliminate the local error handling: */
+               create_struct.jmp_buf_ptr = NULL;
+               create_struct.jmp_buf_size = 0;
+               create_struct.longjmp_fn = 0;
+#              endif
+
+               *png_ptr = create_struct;
+
+               /* This is the successful return point */
+               return png_ptr;
+            }
+         }
+      }
+
+   /* A longjmp because of a bug in the application storage allocator or a
+    * simple failure to allocate the png_struct.
+    */
+   return NULL;
+}
+
+/* Allocate the memory for an info_struct for the application. */
+PNG_FUNCTION(png_infop,PNGAPI
+png_create_info_struct,(png_const_structrp png_ptr),PNG_ALLOCATED)
+{
+   png_inforp info_ptr;
+
+   png_debug(1, "in png_create_info_struct");
+
+   if (png_ptr == NULL)
+      return NULL;
+
+   /* Use the internal API that does not (or at least should not) error out, so
+    * that this call always returns ok.  The application typically sets up the
+    * error handling *after* creating the info_struct because this is the way it
+    * has always been done in 'example.c'.
+    */
+   info_ptr = png_voidcast(png_inforp, png_malloc_base(png_ptr,
+       (sizeof *info_ptr)));
+
+   if (info_ptr != NULL)
+      memset(info_ptr, 0, (sizeof *info_ptr));
+
+   return info_ptr;
+}
+
+/* This function frees the memory associated with a single info struct.
+ * Normally, one would use either png_destroy_read_struct() or
+ * png_destroy_write_struct() to free an info struct, but this may be
+ * useful for some applications.  From libpng 1.6.0 this function is also used
+ * internally to implement the png_info release part of the 'struct' destroy
+ * APIs.  This ensures that all possible approaches free the same data (all of
+ * it).
+ */
+void PNGAPI
+png_destroy_info_struct(png_const_structrp png_ptr, png_infopp info_ptr_ptr)
+{
+   png_inforp info_ptr = NULL;
+
+   png_debug(1, "in png_destroy_info_struct");
+
+   if (png_ptr == NULL)
+      return;
+
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (info_ptr != NULL)
+   {
+      /* Do this first in case of an error below; if the app implements its own
+       * memory management this can lead to png_free calling png_error, which
+       * will abort this routine and return control to the app error handler.
+       * An infinite loop may result if it then tries to free the same info
+       * ptr.
+       */
+      *info_ptr_ptr = NULL;
+
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+      memset(info_ptr, 0, (sizeof *info_ptr));
+      png_free(png_ptr, info_ptr);
+   }
+}
+
+/* Initialize the info structure.  This is now an internal function (0.89)
+ * and applications using it are urged to use png_create_info_struct()
+ * instead.  Use deprecated in 1.6.0, internal use removed (used internally it
+ * is just a memset).
+ *
+ * NOTE: it is almost inconceivable that this API is used because it bypasses
+ * the user-memory mechanism and the user error handling/warning mechanisms in
+ * those cases where it does anything other than a memset.
+ */
+PNG_FUNCTION(void,PNGAPI
+png_info_init_3,(png_infopp ptr_ptr, size_t png_info_struct_size),
+    PNG_DEPRECATED)
+{
+   png_inforp info_ptr = *ptr_ptr;
+
+   png_debug(1, "in png_info_init_3");
+
+   if (info_ptr == NULL)
+      return;
+
+   if ((sizeof (png_info)) > png_info_struct_size)
+   {
+      *ptr_ptr = NULL;
+      /* The following line is why this API should not be used: */
+      free(info_ptr);
+      info_ptr = png_voidcast(png_inforp, png_malloc_base(NULL,
+          (sizeof *info_ptr)));
+      if (info_ptr == NULL)
+         return;
+      *ptr_ptr = info_ptr;
+   }
+
+   /* Set everything to 0 */
+   memset(info_ptr, 0, (sizeof *info_ptr));
+}
+
+void PNGAPI
+png_data_freer(png_const_structrp png_ptr, png_inforp info_ptr,
+    int freer, png_uint_32 mask)
+{
+   png_debug(1, "in png_data_freer");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (freer == PNG_DESTROY_WILL_FREE_DATA)
+      info_ptr->free_me |= mask;
+
+   else if (freer == PNG_USER_WILL_FREE_DATA)
+      info_ptr->free_me &= ~mask;
+
+   else
+      png_error(png_ptr, "Unknown freer parameter in png_data_freer");
+}
+
+void PNGAPI
+png_free_data(png_const_structrp png_ptr, png_inforp info_ptr, png_uint_32 mask,
+    int num)
+{
+   png_debug(1, "in png_free_data");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+#ifdef PNG_TEXT_SUPPORTED
+   /* Free text item num or (if num == -1) all text items */
+   if (info_ptr->text != NULL &&
+       ((mask & PNG_FREE_TEXT) & info_ptr->free_me) != 0)
+   {
+      if (num != -1)
+      {
+         png_free(png_ptr, info_ptr->text[num].key);
+         info_ptr->text[num].key = NULL;
+      }
+
+      else
+      {
+         int i;
+
+         for (i = 0; i < info_ptr->num_text; i++)
+            png_free(png_ptr, info_ptr->text[i].key);
+
+         png_free(png_ptr, info_ptr->text);
+         info_ptr->text = NULL;
+         info_ptr->num_text = 0;
+         info_ptr->max_text = 0;
+      }
+   }
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+   /* Free any tRNS entry */
+   if (((mask & PNG_FREE_TRNS) & info_ptr->free_me) != 0)
+   {
+      info_ptr->valid &= ~PNG_INFO_tRNS;
+      png_free(png_ptr, info_ptr->trans_alpha);
+      info_ptr->trans_alpha = NULL;
+      info_ptr->num_trans = 0;
+   }
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+   /* Free any sCAL entry */
+   if (((mask & PNG_FREE_SCAL) & info_ptr->free_me) != 0)
+   {
+      png_free(png_ptr, info_ptr->scal_s_width);
+      png_free(png_ptr, info_ptr->scal_s_height);
+      info_ptr->scal_s_width = NULL;
+      info_ptr->scal_s_height = NULL;
+      info_ptr->valid &= ~PNG_INFO_sCAL;
+   }
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+   /* Free any pCAL entry */
+   if (((mask & PNG_FREE_PCAL) & info_ptr->free_me) != 0)
+   {
+      png_free(png_ptr, info_ptr->pcal_purpose);
+      png_free(png_ptr, info_ptr->pcal_units);
+      info_ptr->pcal_purpose = NULL;
+      info_ptr->pcal_units = NULL;
+
+      if (info_ptr->pcal_params != NULL)
+         {
+            int i;
+
+            for (i = 0; i < info_ptr->pcal_nparams; i++)
+               png_free(png_ptr, info_ptr->pcal_params[i]);
+
+            png_free(png_ptr, info_ptr->pcal_params);
+            info_ptr->pcal_params = NULL;
+         }
+      info_ptr->valid &= ~PNG_INFO_pCAL;
+   }
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+   /* Free any profile entry */
+   if (((mask & PNG_FREE_ICCP) & info_ptr->free_me) != 0)
+   {
+      png_free(png_ptr, info_ptr->iccp_name);
+      png_free(png_ptr, info_ptr->iccp_profile);
+      info_ptr->iccp_name = NULL;
+      info_ptr->iccp_profile = NULL;
+      info_ptr->valid &= ~PNG_INFO_iCCP;
+   }
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+   /* Free a given sPLT entry, or (if num == -1) all sPLT entries */
+   if (info_ptr->splt_palettes != NULL &&
+       ((mask & PNG_FREE_SPLT) & info_ptr->free_me) != 0)
+   {
+      if (num != -1)
+      {
+         png_free(png_ptr, info_ptr->splt_palettes[num].name);
+         png_free(png_ptr, info_ptr->splt_palettes[num].entries);
+         info_ptr->splt_palettes[num].name = NULL;
+         info_ptr->splt_palettes[num].entries = NULL;
+      }
+
+      else
+      {
+         int i;
+
+         for (i = 0; i < info_ptr->splt_palettes_num; i++)
+         {
+            png_free(png_ptr, info_ptr->splt_palettes[i].name);
+            png_free(png_ptr, info_ptr->splt_palettes[i].entries);
+         }
+
+         png_free(png_ptr, info_ptr->splt_palettes);
+         info_ptr->splt_palettes = NULL;
+         info_ptr->splt_palettes_num = 0;
+         info_ptr->valid &= ~PNG_INFO_sPLT;
+      }
+   }
+#endif
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+   if (info_ptr->unknown_chunks != NULL &&
+       ((mask & PNG_FREE_UNKN) & info_ptr->free_me) != 0)
+   {
+      if (num != -1)
+      {
+          png_free(png_ptr, info_ptr->unknown_chunks[num].data);
+          info_ptr->unknown_chunks[num].data = NULL;
+      }
+
+      else
+      {
+         int i;
+
+         for (i = 0; i < info_ptr->unknown_chunks_num; i++)
+            png_free(png_ptr, info_ptr->unknown_chunks[i].data);
+
+         png_free(png_ptr, info_ptr->unknown_chunks);
+         info_ptr->unknown_chunks = NULL;
+         info_ptr->unknown_chunks_num = 0;
+      }
+   }
+#endif
+
+#ifdef PNG_eXIf_SUPPORTED
+   /* Free any eXIf entry */
+   if (((mask & PNG_FREE_EXIF) & info_ptr->free_me) != 0)
+   {
+# ifdef PNG_READ_eXIf_SUPPORTED
+      if (info_ptr->eXIf_buf)
+      {
+         png_free(png_ptr, info_ptr->eXIf_buf);
+         info_ptr->eXIf_buf = NULL;
+      }
+# endif
+      if (info_ptr->exif)
+      {
+         png_free(png_ptr, info_ptr->exif);
+         info_ptr->exif = NULL;
+      }
+      info_ptr->valid &= ~PNG_INFO_eXIf;
+   }
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+   /* Free any hIST entry */
+   if (((mask & PNG_FREE_HIST) & info_ptr->free_me) != 0)
+   {
+      png_free(png_ptr, info_ptr->hist);
+      info_ptr->hist = NULL;
+      info_ptr->valid &= ~PNG_INFO_hIST;
+   }
+#endif
+
+   /* Free any PLTE entry that was internally allocated */
+   if (((mask & PNG_FREE_PLTE) & info_ptr->free_me) != 0)
+   {
+      png_free(png_ptr, info_ptr->palette);
+      info_ptr->palette = NULL;
+      info_ptr->valid &= ~PNG_INFO_PLTE;
+      info_ptr->num_palette = 0;
+   }
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+   /* Free any image bits attached to the info structure */
+   if (((mask & PNG_FREE_ROWS) & info_ptr->free_me) != 0)
+   {
+      if (info_ptr->row_pointers != NULL)
+      {
+         png_uint_32 row;
+         for (row = 0; row < info_ptr->height; row++)
+            png_free(png_ptr, info_ptr->row_pointers[row]);
+
+         png_free(png_ptr, info_ptr->row_pointers);
+         info_ptr->row_pointers = NULL;
+      }
+      info_ptr->valid &= ~PNG_INFO_IDAT;
+   }
+#endif
+
+   if (num != -1)
+      mask &= ~PNG_FREE_MUL;
+
+   info_ptr->free_me &= ~mask;
+}
+#endif /* READ || WRITE */
+
+/* This function returns a pointer to the io_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy() or png_read_destroy() are called.
+ */
+png_voidp PNGAPI
+png_get_io_ptr(png_const_structrp png_ptr)
+{
+   if (png_ptr == NULL)
+      return NULL;
+
+   return png_ptr->io_ptr;
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#  ifdef PNG_STDIO_SUPPORTED
+/* Initialize the default input/output functions for the PNG file.  If you
+ * use your own read or write routines, you can call either png_set_read_fn()
+ * or png_set_write_fn() instead of png_init_io().  If you have defined
+ * PNG_NO_STDIO or otherwise disabled PNG_STDIO_SUPPORTED, you must use a
+ * function of your own because "FILE *" isn't necessarily available.
+ */
+void PNGAPI
+png_init_io(png_structrp png_ptr, png_FILE_p fp)
+{
+   png_debug(1, "in png_init_io");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->io_ptr = (png_voidp)fp;
+}
+#  endif
+
+#  ifdef PNG_SAVE_INT_32_SUPPORTED
+/* PNG signed integers are saved in 32-bit 2's complement format.  ANSI C-90
+ * defines a cast of a signed integer to an unsigned integer either to preserve
+ * the value, if it is positive, or to calculate:
+ *
+ *     (UNSIGNED_MAX+1) + integer
+ *
+ * Where UNSIGNED_MAX is the appropriate maximum unsigned value, so when the
+ * negative integral value is added the result will be an unsigned value
+ * corresponding to the 2's complement representation.
+ */
+void PNGAPI
+png_save_int_32(png_bytep buf, png_int_32 i)
+{
+   png_save_uint_32(buf, (png_uint_32)i);
+}
+#  endif
+
+#  ifdef PNG_TIME_RFC1123_SUPPORTED
+/* Convert the supplied time into an RFC 1123 string suitable for use in
+ * a "Creation Time" or other text-based time string.
+ */
+int PNGAPI
+png_convert_to_rfc1123_buffer(char out[29], png_const_timep ptime)
+{
+   static const char short_months[12][4] =
+        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+   if (out == NULL)
+      return 0;
+
+   if (ptime->year > 9999 /* RFC1123 limitation */ ||
+       ptime->month == 0    ||  ptime->month > 12  ||
+       ptime->day   == 0    ||  ptime->day   > 31  ||
+       ptime->hour  > 23    ||  ptime->minute > 59 ||
+       ptime->second > 60)
+      return 0;
+
+   {
+      size_t pos = 0;
+      char number_buf[5] = {0, 0, 0, 0, 0}; /* enough for a four-digit year */
+
+#     define APPEND_STRING(string) pos = png_safecat(out, 29, pos, (string))
+#     define APPEND_NUMBER(format, value)\
+         APPEND_STRING(PNG_FORMAT_NUMBER(number_buf, format, (value)))
+#     define APPEND(ch) if (pos < 28) out[pos++] = (ch)
+
+      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, (unsigned)ptime->day);
+      APPEND(' ');
+      APPEND_STRING(short_months[(ptime->month - 1)]);
+      APPEND(' ');
+      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, ptime->year);
+      APPEND(' ');
+      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->hour);
+      APPEND(':');
+      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->minute);
+      APPEND(':');
+      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->second);
+      APPEND_STRING(" +0000"); /* This reliably terminates the buffer */
+      PNG_UNUSED (pos)
+
+#     undef APPEND
+#     undef APPEND_NUMBER
+#     undef APPEND_STRING
+   }
+
+   return 1;
+}
+
+#    if PNG_LIBPNG_VER < 10700
+/* To do: remove the following from libpng-1.7 */
+/* Original API that uses a private buffer in png_struct.
+ * Deprecated because it causes png_struct to carry a spurious temporary
+ * buffer (png_struct::time_buffer), better to have the caller pass this in.
+ */
+png_const_charp PNGAPI
+png_convert_to_rfc1123(png_structrp png_ptr, png_const_timep ptime)
+{
+   if (png_ptr != NULL)
+   {
+      /* The only failure above if png_ptr != NULL is from an invalid ptime */
+      if (png_convert_to_rfc1123_buffer(png_ptr->time_buffer, ptime) == 0)
+         png_warning(png_ptr, "Ignoring invalid time value");
+
+      else
+         return png_ptr->time_buffer;
+   }
+
+   return NULL;
+}
+#    endif /* LIBPNG_VER < 10700 */
+#  endif /* TIME_RFC1123 */
+
+#endif /* READ || WRITE */
+
+png_const_charp PNGAPI
+png_get_copyright(png_const_structrp png_ptr)
+{
+   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
+#ifdef PNG_STRING_COPYRIGHT
+   return PNG_STRING_COPYRIGHT
+#else
+   return PNG_STRING_NEWLINE \
+      "libpng version 1.6.42" PNG_STRING_NEWLINE \
+      "Copyright (c) 2018-2024 Cosmin Truta" PNG_STRING_NEWLINE \
+      "Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson" \
+      PNG_STRING_NEWLINE \
+      "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \
+      "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \
+      PNG_STRING_NEWLINE;
+#endif
+}
+
+/* The following return the library version as a short string in the
+ * format 1.0.0 through 99.99.99zz.  To get the version of *.h files
+ * used with your application, print out PNG_LIBPNG_VER_STRING, which
+ * is defined in png.h.
+ * Note: now there is no difference between png_get_libpng_ver() and
+ * png_get_header_ver().  Due to the version_nn_nn_nn typedef guard,
+ * it is guaranteed that png.c uses the correct version of png.h.
+ */
+png_const_charp PNGAPI
+png_get_libpng_ver(png_const_structrp png_ptr)
+{
+   /* Version of *.c files used when building libpng */
+   return png_get_header_ver(png_ptr);
+}
+
+png_const_charp PNGAPI
+png_get_header_ver(png_const_structrp png_ptr)
+{
+   /* Version of *.h files used when building libpng */
+   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
+   return PNG_LIBPNG_VER_STRING;
+}
+
+png_const_charp PNGAPI
+png_get_header_version(png_const_structrp png_ptr)
+{
+   /* Returns longer string containing both version and date */
+   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
+#ifdef __STDC__
+   return PNG_HEADER_VERSION_STRING
+#  ifndef PNG_READ_SUPPORTED
+      " (NO READ SUPPORT)"
+#  endif
+      PNG_STRING_NEWLINE;
+#else
+   return PNG_HEADER_VERSION_STRING;
+#endif
+}
+
+#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
+/* NOTE: this routine is not used internally! */
+/* Build a grayscale palette.  Palette is assumed to be 1 << bit_depth
+ * large of png_color.  This lets grayscale images be treated as
+ * paletted.  Most useful for gamma correction and simplification
+ * of code.  This API is not used internally.
+ */
+void PNGAPI
+png_build_grayscale_palette(int bit_depth, png_colorp palette)
+{
+   int num_palette;
+   int color_inc;
+   int i;
+   int v;
+
+   png_debug(1, "in png_do_build_grayscale_palette");
+
+   if (palette == NULL)
+      return;
+
+   switch (bit_depth)
+   {
+      case 1:
+         num_palette = 2;
+         color_inc = 0xff;
+         break;
+
+      case 2:
+         num_palette = 4;
+         color_inc = 0x55;
+         break;
+
+      case 4:
+         num_palette = 16;
+         color_inc = 0x11;
+         break;
+
+      case 8:
+         num_palette = 256;
+         color_inc = 1;
+         break;
+
+      default:
+         num_palette = 0;
+         color_inc = 0;
+         break;
+   }
+
+   for (i = 0, v = 0; i < num_palette; i++, v += color_inc)
+   {
+      palette[i].red = (png_byte)(v & 0xff);
+      palette[i].green = (png_byte)(v & 0xff);
+      palette[i].blue = (png_byte)(v & 0xff);
+   }
+}
+#endif
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+int PNGAPI
+png_handle_as_unknown(png_const_structrp png_ptr, png_const_bytep chunk_name)
+{
+   /* Check chunk_name and return "keep" value if it's on the list, else 0 */
+   png_const_bytep p, p_end;
+
+   if (png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list == 0)
+      return PNG_HANDLE_CHUNK_AS_DEFAULT;
+
+   p_end = png_ptr->chunk_list;
+   p = p_end + png_ptr->num_chunk_list*5; /* beyond end */
+
+   /* The code is the fifth byte after each four byte string.  Historically this
+    * code was always searched from the end of the list, this is no longer
+    * necessary because the 'set' routine handles duplicate entries correctly.
+    */
+   do /* num_chunk_list > 0, so at least one */
+   {
+      p -= 5;
+
+      if (memcmp(chunk_name, p, 4) == 0)
+         return p[4];
+   }
+   while (p > p_end);
+
+   /* This means that known chunks should be processed and unknown chunks should
+    * be handled according to the value of png_ptr->unknown_default; this can be
+    * confusing because, as a result, there are two levels of defaulting for
+    * unknown chunks.
+    */
+   return PNG_HANDLE_CHUNK_AS_DEFAULT;
+}
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\
+   defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
+int /* PRIVATE */
+png_chunk_unknown_handling(png_const_structrp png_ptr, png_uint_32 chunk_name)
+{
+   png_byte chunk_string[5];
+
+   PNG_CSTRING_FROM_CHUNK(chunk_string, chunk_name);
+   return png_handle_as_unknown(png_ptr, chunk_string);
+}
+#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */
+#endif /* SET_UNKNOWN_CHUNKS */
+
+#ifdef PNG_READ_SUPPORTED
+/* This function, added to libpng-1.0.6g, is untested. */
+int PNGAPI
+png_reset_zstream(png_structrp png_ptr)
+{
+   if (png_ptr == NULL)
+      return Z_STREAM_ERROR;
+
+   /* WARNING: this resets the window bits to the maximum! */
+   return inflateReset(&png_ptr->zstream);
+}
+#endif /* READ */
+
+/* This function was added to libpng-1.0.7 */
+png_uint_32 PNGAPI
+png_access_version_number(void)
+{
+   /* Version of *.c files used when building libpng */
+   return (png_uint_32)PNG_LIBPNG_VER;
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+/* Ensure that png_ptr->zstream.msg holds some appropriate error message string.
+ * If it doesn't 'ret' is used to set it to something appropriate, even in cases
+ * like Z_OK or Z_STREAM_END where the error code is apparently a success code.
+ */
+void /* PRIVATE */
+png_zstream_error(png_structrp png_ptr, int ret)
+{
+   /* Translate 'ret' into an appropriate error string, priority is given to the
+    * one in zstream if set.  This always returns a string, even in cases like
+    * Z_OK or Z_STREAM_END where the error code is a success code.
+    */
+   if (png_ptr->zstream.msg == NULL) switch (ret)
+   {
+      default:
+      case Z_OK:
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return code");
+         break;
+
+      case Z_STREAM_END:
+         /* Normal exit */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected end of LZ stream");
+         break;
+
+      case Z_NEED_DICT:
+         /* This means the deflate stream did not have a dictionary; this
+          * indicates a bogus PNG.
+          */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("missing LZ dictionary");
+         break;
+
+      case Z_ERRNO:
+         /* gz APIs only: should not happen */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("zlib IO error");
+         break;
+
+      case Z_STREAM_ERROR:
+         /* internal libpng error */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("bad parameters to zlib");
+         break;
+
+      case Z_DATA_ERROR:
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("damaged LZ stream");
+         break;
+
+      case Z_MEM_ERROR:
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("insufficient memory");
+         break;
+
+      case Z_BUF_ERROR:
+         /* End of input or output; not a problem if the caller is doing
+          * incremental read or write.
+          */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("truncated");
+         break;
+
+      case Z_VERSION_ERROR:
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("unsupported zlib version");
+         break;
+
+      case PNG_UNEXPECTED_ZLIB_RETURN:
+         /* Compile errors here mean that zlib now uses the value co-opted in
+          * pngpriv.h for PNG_UNEXPECTED_ZLIB_RETURN; update the switch above
+          * and change pngpriv.h.  Note that this message is "... return",
+          * whereas the default/Z_OK one is "... return code".
+          */
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return");
+         break;
+   }
+}
+
+/* png_convert_size: a PNGAPI but no longer in png.h, so deleted
+ * at libpng 1.5.5!
+ */
+
+/* Added at libpng version 1.2.34 and 1.4.0 (moved from pngset.c) */
+#ifdef PNG_GAMMA_SUPPORTED /* always set if COLORSPACE */
+static int
+png_colorspace_check_gamma(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, png_fixed_point gAMA, int from)
+   /* This is called to check a new gamma value against an existing one.  The
+    * routine returns false if the new gamma value should not be written.
+    *
+    * 'from' says where the new gamma value comes from:
+    *
+    *    0: the new gamma value is the libpng estimate for an ICC profile
+    *    1: the new gamma value comes from a gAMA chunk
+    *    2: the new gamma value comes from an sRGB chunk
+    */
+{
+   png_fixed_point gtest;
+
+   if ((colorspace->flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
+       (png_muldiv(&gtest, colorspace->gamma, PNG_FP_1, gAMA) == 0  ||
+      png_gamma_significant(gtest) != 0))
+   {
+      /* Either this is an sRGB image, in which case the calculated gamma
+       * approximation should match, or this is an image with a profile and the
+       * value libpng calculates for the gamma of the profile does not match the
+       * value recorded in the file.  The former, sRGB, case is an error, the
+       * latter is just a warning.
+       */
+      if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0 || from == 2)
+      {
+         png_chunk_report(png_ptr, "gamma value does not match sRGB",
+             PNG_CHUNK_ERROR);
+         /* Do not overwrite an sRGB value */
+         return from == 2;
+      }
+
+      else /* sRGB tag not involved */
+      {
+         png_chunk_report(png_ptr, "gamma value does not match libpng estimate",
+             PNG_CHUNK_WARNING);
+         return from == 1;
+      }
+   }
+
+   return 1;
+}
+
+void /* PRIVATE */
+png_colorspace_set_gamma(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, png_fixed_point gAMA)
+{
+   /* Changed in libpng-1.5.4 to limit the values to ensure overflow can't
+    * occur.  Since the fixed point representation is asymmetrical it is
+    * possible for 1/gamma to overflow the limit of 21474 and this means the
+    * gamma value must be at least 5/100000 and hence at most 20000.0.  For
+    * safety the limits here are a little narrower.  The values are 0.00016 to
+    * 6250.0, which are truly ridiculous gamma values (and will produce
+    * displays that are all black or all white.)
+    *
+    * In 1.6.0 this test replaces the ones in pngrutil.c, in the gAMA chunk
+    * handling code, which only required the value to be >0.
+    */
+   png_const_charp errmsg;
+
+   if (gAMA < 16 || gAMA > 625000000)
+      errmsg = "gamma value out of range";
+
+#  ifdef PNG_READ_gAMA_SUPPORTED
+   /* Allow the application to set the gamma value more than once */
+   else if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
+      (colorspace->flags & PNG_COLORSPACE_FROM_gAMA) != 0)
+      errmsg = "duplicate";
+#  endif
+
+   /* Do nothing if the colorspace is already invalid */
+   else if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
+      return;
+
+   else
+   {
+      if (png_colorspace_check_gamma(png_ptr, colorspace, gAMA,
+          1/*from gAMA*/) != 0)
+      {
+         /* Store this gamma value. */
+         colorspace->gamma = gAMA;
+         colorspace->flags |=
+            (PNG_COLORSPACE_HAVE_GAMMA | PNG_COLORSPACE_FROM_gAMA);
+      }
+
+      /* At present if the check_gamma test fails the gamma of the colorspace is
+       * not updated however the colorspace is not invalidated.  This
+       * corresponds to the case where the existing gamma comes from an sRGB
+       * chunk or profile.  An error message has already been output.
+       */
+      return;
+   }
+
+   /* Error exit - errmsg has been set. */
+   colorspace->flags |= PNG_COLORSPACE_INVALID;
+   png_chunk_report(png_ptr, errmsg, PNG_CHUNK_WRITE_ERROR);
+}
+
+void /* PRIVATE */
+png_colorspace_sync_info(png_const_structrp png_ptr, png_inforp info_ptr)
+{
+   if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
+   {
+      /* Everything is invalid */
+      info_ptr->valid &= ~(PNG_INFO_gAMA|PNG_INFO_cHRM|PNG_INFO_sRGB|
+         PNG_INFO_iCCP);
+
+#     ifdef PNG_COLORSPACE_SUPPORTED
+      /* Clean up the iCCP profile now if it won't be used. */
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, -1/*not used*/);
+#     else
+      PNG_UNUSED(png_ptr)
+#     endif
+   }
+
+   else
+   {
+#     ifdef PNG_COLORSPACE_SUPPORTED
+      /* Leave the INFO_iCCP flag set if the pngset.c code has already set
+       * it; this allows a PNG to contain a profile which matches sRGB and
+       * yet still have that profile retrievable by the application.
+       */
+      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_MATCHES_sRGB) != 0)
+         info_ptr->valid |= PNG_INFO_sRGB;
+
+      else
+         info_ptr->valid &= ~PNG_INFO_sRGB;
+
+      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+         info_ptr->valid |= PNG_INFO_cHRM;
+
+      else
+         info_ptr->valid &= ~PNG_INFO_cHRM;
+#     endif
+
+      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0)
+         info_ptr->valid |= PNG_INFO_gAMA;
+
+      else
+         info_ptr->valid &= ~PNG_INFO_gAMA;
+   }
+}
+
+#ifdef PNG_READ_SUPPORTED
+void /* PRIVATE */
+png_colorspace_sync(png_const_structrp png_ptr, png_inforp info_ptr)
+{
+   if (info_ptr == NULL) /* reduce code size; check here not in the caller */
+      return;
+
+   info_ptr->colorspace = png_ptr->colorspace;
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+#endif
+#endif /* GAMMA */
+
+#ifdef PNG_COLORSPACE_SUPPORTED
+/* Added at libpng-1.5.5 to support read and write of true CIEXYZ values for
+ * cHRM, as opposed to using chromaticities.  These internal APIs return
+ * non-zero on a parameter error.  The X, Y and Z values are required to be
+ * positive and less than 1.0.
+ */
+static int
+png_xy_from_XYZ(png_xy *xy, const png_XYZ *XYZ)
+{
+   png_int_32 d, dwhite, whiteX, whiteY;
+
+   d = XYZ->red_X + XYZ->red_Y + XYZ->red_Z;
+   if (png_muldiv(&xy->redx, XYZ->red_X, PNG_FP_1, d) == 0)
+      return 1;
+   if (png_muldiv(&xy->redy, XYZ->red_Y, PNG_FP_1, d) == 0)
+      return 1;
+   dwhite = d;
+   whiteX = XYZ->red_X;
+   whiteY = XYZ->red_Y;
+
+   d = XYZ->green_X + XYZ->green_Y + XYZ->green_Z;
+   if (png_muldiv(&xy->greenx, XYZ->green_X, PNG_FP_1, d) == 0)
+      return 1;
+   if (png_muldiv(&xy->greeny, XYZ->green_Y, PNG_FP_1, d) == 0)
+      return 1;
+   dwhite += d;
+   whiteX += XYZ->green_X;
+   whiteY += XYZ->green_Y;
+
+   d = XYZ->blue_X + XYZ->blue_Y + XYZ->blue_Z;
+   if (png_muldiv(&xy->bluex, XYZ->blue_X, PNG_FP_1, d) == 0)
+      return 1;
+   if (png_muldiv(&xy->bluey, XYZ->blue_Y, PNG_FP_1, d) == 0)
+      return 1;
+   dwhite += d;
+   whiteX += XYZ->blue_X;
+   whiteY += XYZ->blue_Y;
+
+   /* The reference white is simply the sum of the end-point (X,Y,Z) vectors,
+    * thus:
+    */
+   if (png_muldiv(&xy->whitex, whiteX, PNG_FP_1, dwhite) == 0)
+      return 1;
+   if (png_muldiv(&xy->whitey, whiteY, PNG_FP_1, dwhite) == 0)
+      return 1;
+
+   return 0;
+}
+
+static int
+png_XYZ_from_xy(png_XYZ *XYZ, const png_xy *xy)
+{
+   png_fixed_point red_inverse, green_inverse, blue_scale;
+   png_fixed_point left, right, denominator;
+
+   /* Check xy and, implicitly, z.  Note that wide gamut color spaces typically
+    * have end points with 0 tristimulus values (these are impossible end
+    * points, but they are used to cover the possible colors).  We check
+    * xy->whitey against 5, not 0, to avoid a possible integer overflow.
+    */
+   if (xy->redx   < 0 || xy->redx > PNG_FP_1) return 1;
+   if (xy->redy   < 0 || xy->redy > PNG_FP_1-xy->redx) return 1;
+   if (xy->greenx < 0 || xy->greenx > PNG_FP_1) return 1;
+   if (xy->greeny < 0 || xy->greeny > PNG_FP_1-xy->greenx) return 1;
+   if (xy->bluex  < 0 || xy->bluex > PNG_FP_1) return 1;
+   if (xy->bluey  < 0 || xy->bluey > PNG_FP_1-xy->bluex) return 1;
+   if (xy->whitex < 0 || xy->whitex > PNG_FP_1) return 1;
+   if (xy->whitey < 5 || xy->whitey > PNG_FP_1-xy->whitex) return 1;
+
+   /* The reverse calculation is more difficult because the original tristimulus
+    * value had 9 independent values (red,green,blue)x(X,Y,Z) however only 8
+    * derived values were recorded in the cHRM chunk;
+    * (red,green,blue,white)x(x,y).  This loses one degree of freedom and
+    * therefore an arbitrary ninth value has to be introduced to undo the
+    * original transformations.
+    *
+    * Think of the original end-points as points in (X,Y,Z) space.  The
+    * chromaticity values (c) have the property:
+    *
+    *           C
+    *   c = ---------
+    *       X + Y + Z
+    *
+    * For each c (x,y,z) from the corresponding original C (X,Y,Z).  Thus the
+    * three chromaticity values (x,y,z) for each end-point obey the
+    * relationship:
+    *
+    *   x + y + z = 1
+    *
+    * This describes the plane in (X,Y,Z) space that intersects each axis at the
+    * value 1.0; call this the chromaticity plane.  Thus the chromaticity
+    * calculation has scaled each end-point so that it is on the x+y+z=1 plane
+    * and chromaticity is the intersection of the vector from the origin to the
+    * (X,Y,Z) value with the chromaticity plane.
+    *
+    * To fully invert the chromaticity calculation we would need the three
+    * end-point scale factors, (red-scale, green-scale, blue-scale), but these
+    * were not recorded.  Instead we calculated the reference white (X,Y,Z) and
+    * recorded the chromaticity of this.  The reference white (X,Y,Z) would have
+    * given all three of the scale factors since:
+    *
+    *    color-C = color-c * color-scale
+    *    white-C = red-C + green-C + blue-C
+    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
+    *
+    * But cHRM records only white-x and white-y, so we have lost the white scale
+    * factor:
+    *
+    *    white-C = white-c*white-scale
+    *
+    * To handle this the inverse transformation makes an arbitrary assumption
+    * about white-scale:
+    *
+    *    Assume: white-Y = 1.0
+    *    Hence:  white-scale = 1/white-y
+    *    Or:     red-Y + green-Y + blue-Y = 1.0
+    *
+    * Notice the last statement of the assumption gives an equation in three of
+    * the nine values we want to calculate.  8 more equations come from the
+    * above routine as summarised at the top above (the chromaticity
+    * calculation):
+    *
+    *    Given: color-x = color-X / (color-X + color-Y + color-Z)
+    *    Hence: (color-x - 1)*color-X + color.x*color-Y + color.x*color-Z = 0
+    *
+    * This is 9 simultaneous equations in the 9 variables "color-C" and can be
+    * solved by Cramer's rule.  Cramer's rule requires calculating 10 9x9 matrix
+    * determinants, however this is not as bad as it seems because only 28 of
+    * the total of 90 terms in the various matrices are non-zero.  Nevertheless
+    * Cramer's rule is notoriously numerically unstable because the determinant
+    * calculation involves the difference of large, but similar, numbers.  It is
+    * difficult to be sure that the calculation is stable for real world values
+    * and it is certain that it becomes unstable where the end points are close
+    * together.
+    *
+    * So this code uses the perhaps slightly less optimal but more
+    * understandable and totally obvious approach of calculating color-scale.
+    *
+    * This algorithm depends on the precision in white-scale and that is
+    * (1/white-y), so we can immediately see that as white-y approaches 0 the
+    * accuracy inherent in the cHRM chunk drops off substantially.
+    *
+    * libpng arithmetic: a simple inversion of the above equations
+    * ------------------------------------------------------------
+    *
+    *    white_scale = 1/white-y
+    *    white-X = white-x * white-scale
+    *    white-Y = 1.0
+    *    white-Z = (1 - white-x - white-y) * white_scale
+    *
+    *    white-C = red-C + green-C + blue-C
+    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
+    *
+    * This gives us three equations in (red-scale,green-scale,blue-scale) where
+    * all the coefficients are now known:
+    *
+    *    red-x*red-scale + green-x*green-scale + blue-x*blue-scale
+    *       = white-x/white-y
+    *    red-y*red-scale + green-y*green-scale + blue-y*blue-scale = 1
+    *    red-z*red-scale + green-z*green-scale + blue-z*blue-scale
+    *       = (1 - white-x - white-y)/white-y
+    *
+    * In the last equation color-z is (1 - color-x - color-y) so we can add all
+    * three equations together to get an alternative third:
+    *
+    *    red-scale + green-scale + blue-scale = 1/white-y = white-scale
+    *
+    * So now we have a Cramer's rule solution where the determinants are just
+    * 3x3 - far more tractible.  Unfortunately 3x3 determinants still involve
+    * multiplication of three coefficients so we can't guarantee to avoid
+    * overflow in the libpng fixed point representation.  Using Cramer's rule in
+    * floating point is probably a good choice here, but it's not an option for
+    * fixed point.  Instead proceed to simplify the first two equations by
+    * eliminating what is likely to be the largest value, blue-scale:
+    *
+    *    blue-scale = white-scale - red-scale - green-scale
+    *
+    * Hence:
+    *
+    *    (red-x - blue-x)*red-scale + (green-x - blue-x)*green-scale =
+    *                (white-x - blue-x)*white-scale
+    *
+    *    (red-y - blue-y)*red-scale + (green-y - blue-y)*green-scale =
+    *                1 - blue-y*white-scale
+    *
+    * And now we can trivially solve for (red-scale,green-scale):
+    *
+    *    green-scale =
+    *                (white-x - blue-x)*white-scale - (red-x - blue-x)*red-scale
+    *                -----------------------------------------------------------
+    *                                  green-x - blue-x
+    *
+    *    red-scale =
+    *                1 - blue-y*white-scale - (green-y - blue-y) * green-scale
+    *                ---------------------------------------------------------
+    *                                  red-y - blue-y
+    *
+    * Hence:
+    *
+    *    red-scale =
+    *          ( (green-x - blue-x) * (white-y - blue-y) -
+    *            (green-y - blue-y) * (white-x - blue-x) ) / white-y
+    * -------------------------------------------------------------------------
+    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
+    *
+    *    green-scale =
+    *          ( (red-y - blue-y) * (white-x - blue-x) -
+    *            (red-x - blue-x) * (white-y - blue-y) ) / white-y
+    * -------------------------------------------------------------------------
+    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
+    *
+    * Accuracy:
+    * The input values have 5 decimal digits of accuracy.  The values are all in
+    * the range 0 < value < 1, so simple products are in the same range but may
+    * need up to 10 decimal digits to preserve the original precision and avoid
+    * underflow.  Because we are using a 32-bit signed representation we cannot
+    * match this; the best is a little over 9 decimal digits, less than 10.
+    *
+    * The approach used here is to preserve the maximum precision within the
+    * signed representation.  Because the red-scale calculation above uses the
+    * difference between two products of values that must be in the range -1..+1
+    * it is sufficient to divide the product by 7; ceil(100,000/32767*2).  The
+    * factor is irrelevant in the calculation because it is applied to both
+    * numerator and denominator.
+    *
+    * Note that the values of the differences of the products of the
+    * chromaticities in the above equations tend to be small, for example for
+    * the sRGB chromaticities they are:
+    *
+    * red numerator:    -0.04751
+    * green numerator:  -0.08788
+    * denominator:      -0.2241 (without white-y multiplication)
+    *
+    *  The resultant Y coefficients from the chromaticities of some widely used
+    *  color space definitions are (to 15 decimal places):
+    *
+    *  sRGB
+    *    0.212639005871510 0.715168678767756 0.072192315360734
+    *  Kodak ProPhoto
+    *    0.288071128229293 0.711843217810102 0.000085653960605
+    *  Adobe RGB
+    *    0.297344975250536 0.627363566255466 0.075291458493998
+    *  Adobe Wide Gamut RGB
+    *    0.258728243040113 0.724682314948566 0.016589442011321
+    */
+   /* By the argument, above overflow should be impossible here. The return
+    * value of 2 indicates an internal error to the caller.
+    */
+   if (png_muldiv(&left, xy->greenx-xy->bluex, xy->redy - xy->bluey, 7) == 0)
+      return 2;
+   if (png_muldiv(&right, xy->greeny-xy->bluey, xy->redx - xy->bluex, 7) == 0)
+      return 2;
+   denominator = left - right;
+
+   /* Now find the red numerator. */
+   if (png_muldiv(&left, xy->greenx-xy->bluex, xy->whitey-xy->bluey, 7) == 0)
+      return 2;
+   if (png_muldiv(&right, xy->greeny-xy->bluey, xy->whitex-xy->bluex, 7) == 0)
+      return 2;
+
+   /* Overflow is possible here and it indicates an extreme set of PNG cHRM
+    * chunk values.  This calculation actually returns the reciprocal of the
+    * scale value because this allows us to delay the multiplication of white-y
+    * into the denominator, which tends to produce a small number.
+    */
+   if (png_muldiv(&red_inverse, xy->whitey, denominator, left-right) == 0 ||
+       red_inverse <= xy->whitey /* r+g+b scales = white scale */)
+      return 1;
+
+   /* Similarly for green_inverse: */
+   if (png_muldiv(&left, xy->redy-xy->bluey, xy->whitex-xy->bluex, 7) == 0)
+      return 2;
+   if (png_muldiv(&right, xy->redx-xy->bluex, xy->whitey-xy->bluey, 7) == 0)
+      return 2;
+   if (png_muldiv(&green_inverse, xy->whitey, denominator, left-right) == 0 ||
+       green_inverse <= xy->whitey)
+      return 1;
+
+   /* And the blue scale, the checks above guarantee this can't overflow but it
+    * can still produce 0 for extreme cHRM values.
+    */
+   blue_scale = png_reciprocal(xy->whitey) - png_reciprocal(red_inverse) -
+       png_reciprocal(green_inverse);
+   if (blue_scale <= 0)
+      return 1;
+
+
+   /* And fill in the png_XYZ: */
+   if (png_muldiv(&XYZ->red_X, xy->redx, PNG_FP_1, red_inverse) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->red_Y, xy->redy, PNG_FP_1, red_inverse) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->red_Z, PNG_FP_1 - xy->redx - xy->redy, PNG_FP_1,
+       red_inverse) == 0)
+      return 1;
+
+   if (png_muldiv(&XYZ->green_X, xy->greenx, PNG_FP_1, green_inverse) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->green_Y, xy->greeny, PNG_FP_1, green_inverse) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->green_Z, PNG_FP_1 - xy->greenx - xy->greeny, PNG_FP_1,
+       green_inverse) == 0)
+      return 1;
+
+   if (png_muldiv(&XYZ->blue_X, xy->bluex, blue_scale, PNG_FP_1) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->blue_Y, xy->bluey, blue_scale, PNG_FP_1) == 0)
+      return 1;
+   if (png_muldiv(&XYZ->blue_Z, PNG_FP_1 - xy->bluex - xy->bluey, blue_scale,
+       PNG_FP_1) == 0)
+      return 1;
+
+   return 0; /*success*/
+}
+
+static int
+png_XYZ_normalize(png_XYZ *XYZ)
+{
+   png_int_32 Y;
+
+   if (XYZ->red_Y < 0 || XYZ->green_Y < 0 || XYZ->blue_Y < 0 ||
+      XYZ->red_X < 0 || XYZ->green_X < 0 || XYZ->blue_X < 0 ||
+      XYZ->red_Z < 0 || XYZ->green_Z < 0 || XYZ->blue_Z < 0)
+      return 1;
+
+   /* Normalize by scaling so the sum of the end-point Y values is PNG_FP_1.
+    * IMPLEMENTATION NOTE: ANSI requires signed overflow not to occur, therefore
+    * relying on addition of two positive values producing a negative one is not
+    * safe.
+    */
+   Y = XYZ->red_Y;
+   if (0x7fffffff - Y < XYZ->green_X)
+      return 1;
+   Y += XYZ->green_Y;
+   if (0x7fffffff - Y < XYZ->blue_X)
+      return 1;
+   Y += XYZ->blue_Y;
+
+   if (Y != PNG_FP_1)
+   {
+      if (png_muldiv(&XYZ->red_X, XYZ->red_X, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->red_Y, XYZ->red_Y, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->red_Z, XYZ->red_Z, PNG_FP_1, Y) == 0)
+         return 1;
+
+      if (png_muldiv(&XYZ->green_X, XYZ->green_X, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->green_Y, XYZ->green_Y, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->green_Z, XYZ->green_Z, PNG_FP_1, Y) == 0)
+         return 1;
+
+      if (png_muldiv(&XYZ->blue_X, XYZ->blue_X, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->blue_Y, XYZ->blue_Y, PNG_FP_1, Y) == 0)
+         return 1;
+      if (png_muldiv(&XYZ->blue_Z, XYZ->blue_Z, PNG_FP_1, Y) == 0)
+         return 1;
+   }
+
+   return 0;
+}
+
+static int
+png_colorspace_endpoints_match(const png_xy *xy1, const png_xy *xy2, int delta)
+{
+   /* Allow an error of +/-0.01 (absolute value) on each chromaticity */
+   if (PNG_OUT_OF_RANGE(xy1->whitex, xy2->whitex,delta) ||
+       PNG_OUT_OF_RANGE(xy1->whitey, xy2->whitey,delta) ||
+       PNG_OUT_OF_RANGE(xy1->redx,   xy2->redx,  delta) ||
+       PNG_OUT_OF_RANGE(xy1->redy,   xy2->redy,  delta) ||
+       PNG_OUT_OF_RANGE(xy1->greenx, xy2->greenx,delta) ||
+       PNG_OUT_OF_RANGE(xy1->greeny, xy2->greeny,delta) ||
+       PNG_OUT_OF_RANGE(xy1->bluex,  xy2->bluex, delta) ||
+       PNG_OUT_OF_RANGE(xy1->bluey,  xy2->bluey, delta))
+      return 0;
+   return 1;
+}
+
+/* Added in libpng-1.6.0, a different check for the validity of a set of cHRM
+ * chunk chromaticities.  Earlier checks used to simply look for the overflow
+ * condition (where the determinant of the matrix to solve for XYZ ends up zero
+ * because the chromaticity values are not all distinct.)  Despite this it is
+ * theoretically possible to produce chromaticities that are apparently valid
+ * but that rapidly degrade to invalid, potentially crashing, sets because of
+ * arithmetic inaccuracies when calculations are performed on them.  The new
+ * check is to round-trip xy -> XYZ -> xy and then check that the result is
+ * within a small percentage of the original.
+ */
+static int
+png_colorspace_check_xy(png_XYZ *XYZ, const png_xy *xy)
+{
+   int result;
+   png_xy xy_test;
+
+   /* As a side-effect this routine also returns the XYZ endpoints. */
+   result = png_XYZ_from_xy(XYZ, xy);
+   if (result != 0)
+      return result;
+
+   result = png_xy_from_XYZ(&xy_test, XYZ);
+   if (result != 0)
+      return result;
+
+   if (png_colorspace_endpoints_match(xy, &xy_test,
+       5/*actually, the math is pretty accurate*/) != 0)
+      return 0;
+
+   /* Too much slip */
+   return 1;
+}
+
+/* This is the check going the other way.  The XYZ is modified to normalize it
+ * (another side-effect) and the xy chromaticities are returned.
+ */
+static int
+png_colorspace_check_XYZ(png_xy *xy, png_XYZ *XYZ)
+{
+   int result;
+   png_XYZ XYZtemp;
+
+   result = png_XYZ_normalize(XYZ);
+   if (result != 0)
+      return result;
+
+   result = png_xy_from_XYZ(xy, XYZ);
+   if (result != 0)
+      return result;
+
+   XYZtemp = *XYZ;
+   return png_colorspace_check_xy(&XYZtemp, xy);
+}
+
+/* Used to check for an endpoint match against sRGB */
+static const png_xy sRGB_xy = /* From ITU-R BT.709-3 */
+{
+   /* color      x       y */
+   /* red   */ 64000, 33000,
+   /* green */ 30000, 60000,
+   /* blue  */ 15000,  6000,
+   /* white */ 31270, 32900
+};
+
+static int
+png_colorspace_set_xy_and_XYZ(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, const png_xy *xy, const png_XYZ *XYZ,
+    int preferred)
+{
+   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
+      return 0;
+
+   /* The consistency check is performed on the chromaticities; this factors out
+    * variations because of the normalization (or not) of the end point Y
+    * values.
+    */
+   if (preferred < 2 &&
+       (colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      /* The end points must be reasonably close to any we already have.  The
+       * following allows an error of up to +/-.001
+       */
+      if (png_colorspace_endpoints_match(xy, &colorspace->end_points_xy,
+          100) == 0)
+      {
+         colorspace->flags |= PNG_COLORSPACE_INVALID;
+         png_benign_error(png_ptr, "inconsistent chromaticities");
+         return 0; /* failed */
+      }
+
+      /* Only overwrite with preferred values */
+      if (preferred == 0)
+         return 1; /* ok, but no change */
+   }
+
+   colorspace->end_points_xy = *xy;
+   colorspace->end_points_XYZ = *XYZ;
+   colorspace->flags |= PNG_COLORSPACE_HAVE_ENDPOINTS;
+
+   /* The end points are normally quoted to two decimal digits, so allow +/-0.01
+    * on this test.
+    */
+   if (png_colorspace_endpoints_match(xy, &sRGB_xy, 1000) != 0)
+      colorspace->flags |= PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB;
+
+   else
+      colorspace->flags &= PNG_COLORSPACE_CANCEL(
+         PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB);
+
+   return 2; /* ok and changed */
+}
+
+int /* PRIVATE */
+png_colorspace_set_chromaticities(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, const png_xy *xy, int preferred)
+{
+   /* We must check the end points to ensure they are reasonable - in the past
+    * color management systems have crashed as a result of getting bogus
+    * colorant values, while this isn't the fault of libpng it is the
+    * responsibility of libpng because PNG carries the bomb and libpng is in a
+    * position to protect against it.
+    */
+   png_XYZ XYZ;
+
+   switch (png_colorspace_check_xy(&XYZ, xy))
+   {
+      case 0: /* success */
+         return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, xy, &XYZ,
+             preferred);
+
+      case 1:
+         /* We can't invert the chromaticities so we can't produce value XYZ
+          * values.  Likely as not a color management system will fail too.
+          */
+         colorspace->flags |= PNG_COLORSPACE_INVALID;
+         png_benign_error(png_ptr, "invalid chromaticities");
+         break;
+
+      default:
+         /* libpng is broken; this should be a warning but if it happens we
+          * want error reports so for the moment it is an error.
+          */
+         colorspace->flags |= PNG_COLORSPACE_INVALID;
+         png_error(png_ptr, "internal error checking chromaticities");
+   }
+
+   return 0; /* failed */
+}
+
+int /* PRIVATE */
+png_colorspace_set_endpoints(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, const png_XYZ *XYZ_in, int preferred)
+{
+   png_XYZ XYZ = *XYZ_in;
+   png_xy xy;
+
+   switch (png_colorspace_check_XYZ(&xy, &XYZ))
+   {
+      case 0:
+         return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, &xy, &XYZ,
+             preferred);
+
+      case 1:
+         /* End points are invalid. */
+         colorspace->flags |= PNG_COLORSPACE_INVALID;
+         png_benign_error(png_ptr, "invalid end points");
+         break;
+
+      default:
+         colorspace->flags |= PNG_COLORSPACE_INVALID;
+         png_error(png_ptr, "internal error checking chromaticities");
+   }
+
+   return 0; /* failed */
+}
+
+#if defined(PNG_sRGB_SUPPORTED) || defined(PNG_iCCP_SUPPORTED)
+/* Error message generation */
+static char
+png_icc_tag_char(png_uint_32 byte)
+{
+   byte &= 0xff;
+   if (byte >= 32 && byte <= 126)
+      return (char)byte;
+   else
+      return '?';
+}
+
+static void
+png_icc_tag_name(char *name, png_uint_32 tag)
+{
+   name[0] = '\'';
+   name[1] = png_icc_tag_char(tag >> 24);
+   name[2] = png_icc_tag_char(tag >> 16);
+   name[3] = png_icc_tag_char(tag >>  8);
+   name[4] = png_icc_tag_char(tag      );
+   name[5] = '\'';
+}
+
+static int
+is_ICC_signature_char(png_alloc_size_t it)
+{
+   return it == 32 || (it >= 48 && it <= 57) || (it >= 65 && it <= 90) ||
+      (it >= 97 && it <= 122);
+}
+
+static int
+is_ICC_signature(png_alloc_size_t it)
+{
+   return is_ICC_signature_char(it >> 24) /* checks all the top bits */ &&
+      is_ICC_signature_char((it >> 16) & 0xff) &&
+      is_ICC_signature_char((it >> 8) & 0xff) &&
+      is_ICC_signature_char(it & 0xff);
+}
+
+static int
+png_icc_profile_error(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_alloc_size_t value, png_const_charp reason)
+{
+   size_t pos;
+   char message[196]; /* see below for calculation */
+
+   if (colorspace != NULL)
+      colorspace->flags |= PNG_COLORSPACE_INVALID;
+
+   pos = png_safecat(message, (sizeof message), 0, "profile '"); /* 9 chars */
+   pos = png_safecat(message, pos+79, pos, name); /* Truncate to 79 chars */
+   pos = png_safecat(message, (sizeof message), pos, "': "); /* +2 = 90 */
+   if (is_ICC_signature(value) != 0)
+   {
+      /* So 'value' is at most 4 bytes and the following cast is safe */
+      png_icc_tag_name(message+pos, (png_uint_32)value);
+      pos += 6; /* total +8; less than the else clause */
+      message[pos++] = ':';
+      message[pos++] = ' ';
+   }
+#  ifdef PNG_WARNINGS_SUPPORTED
+   else
+      {
+         char number[PNG_NUMBER_BUFFER_SIZE]; /* +24 = 114 */
+
+         pos = png_safecat(message, (sizeof message), pos,
+             png_format_number(number, number+(sizeof number),
+             PNG_NUMBER_FORMAT_x, value));
+         pos = png_safecat(message, (sizeof message), pos, "h: "); /* +2 = 116 */
+      }
+#  endif
+   /* The 'reason' is an arbitrary message, allow +79 maximum 195 */
+   pos = png_safecat(message, (sizeof message), pos, reason);
+   PNG_UNUSED(pos)
+
+   /* This is recoverable, but make it unconditionally an app_error on write to
+    * avoid writing invalid ICC profiles into PNG files (i.e., we handle them
+    * on read, with a warning, but on write unless the app turns off
+    * application errors the PNG won't be written.)
+    */
+   png_chunk_report(png_ptr, message,
+       (colorspace != NULL) ? PNG_CHUNK_ERROR : PNG_CHUNK_WRITE_ERROR);
+
+   return 0;
+}
+#endif /* sRGB || iCCP */
+
+#ifdef PNG_sRGB_SUPPORTED
+int /* PRIVATE */
+png_colorspace_set_sRGB(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    int intent)
+{
+   /* sRGB sets known gamma, end points and (from the chunk) intent. */
+   /* IMPORTANT: these are not necessarily the values found in an ICC profile
+    * because ICC profiles store values adapted to a D50 environment; it is
+    * expected that the ICC profile mediaWhitePointTag will be D50; see the
+    * checks and code elsewhere to understand this better.
+    *
+    * These XYZ values, which are accurate to 5dp, produce rgb to gray
+    * coefficients of (6968,23435,2366), which are reduced (because they add up
+    * to 32769 not 32768) to (6968,23434,2366).  These are the values that
+    * libpng has traditionally used (and are the best values given the 15bit
+    * algorithm used by the rgb to gray code.)
+    */
+   static const png_XYZ sRGB_XYZ = /* D65 XYZ (*not* the D50 adapted values!) */
+   {
+      /* color      X      Y      Z */
+      /* red   */ 41239, 21264,  1933,
+      /* green */ 35758, 71517, 11919,
+      /* blue  */ 18048,  7219, 95053
+   };
+
+   /* Do nothing if the colorspace is already invalidated. */
+   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
+      return 0;
+
+   /* Check the intent, then check for existing settings.  It is valid for the
+    * PNG file to have cHRM or gAMA chunks along with sRGB, but the values must
+    * be consistent with the correct values.  If, however, this function is
+    * called below because an iCCP chunk matches sRGB then it is quite
+    * conceivable that an older app recorded incorrect gAMA and cHRM because of
+    * an incorrect calculation based on the values in the profile - this does
+    * *not* invalidate the profile (though it still produces an error, which can
+    * be ignored.)
+    */
+   if (intent < 0 || intent >= PNG_sRGB_INTENT_LAST)
+      return png_icc_profile_error(png_ptr, colorspace, "sRGB",
+          (png_alloc_size_t)intent, "invalid sRGB rendering intent");
+
+   if ((colorspace->flags & PNG_COLORSPACE_HAVE_INTENT) != 0 &&
+       colorspace->rendering_intent != intent)
+      return png_icc_profile_error(png_ptr, colorspace, "sRGB",
+         (png_alloc_size_t)intent, "inconsistent rendering intents");
+
+   if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0)
+   {
+      png_benign_error(png_ptr, "duplicate sRGB information ignored");
+      return 0;
+   }
+
+   /* If the standard sRGB cHRM chunk does not match the one from the PNG file
+    * warn but overwrite the value with the correct one.
+    */
+   if ((colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0 &&
+       !png_colorspace_endpoints_match(&sRGB_xy, &colorspace->end_points_xy,
+       100))
+      png_chunk_report(png_ptr, "cHRM chunk does not match sRGB",
+         PNG_CHUNK_ERROR);
+
+   /* This check is just done for the error reporting - the routine always
+    * returns true when the 'from' argument corresponds to sRGB (2).
+    */
+   (void)png_colorspace_check_gamma(png_ptr, colorspace, PNG_GAMMA_sRGB_INVERSE,
+       2/*from sRGB*/);
+
+   /* intent: bugs in GCC force 'int' to be used as the parameter type. */
+   colorspace->rendering_intent = (png_uint_16)intent;
+   colorspace->flags |= PNG_COLORSPACE_HAVE_INTENT;
+
+   /* endpoints */
+   colorspace->end_points_xy = sRGB_xy;
+   colorspace->end_points_XYZ = sRGB_XYZ;
+   colorspace->flags |=
+      (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB);
+
+   /* gamma */
+   colorspace->gamma = PNG_GAMMA_sRGB_INVERSE;
+   colorspace->flags |= PNG_COLORSPACE_HAVE_GAMMA;
+
+   /* Finally record that we have an sRGB profile */
+   colorspace->flags |=
+      (PNG_COLORSPACE_MATCHES_sRGB|PNG_COLORSPACE_FROM_sRGB);
+
+   return 1; /* set */
+}
+#endif /* sRGB */
+
+#ifdef PNG_iCCP_SUPPORTED
+/* Encoded value of D50 as an ICC XYZNumber.  From the ICC 2010 spec the value
+ * is XYZ(0.9642,1.0,0.8249), which scales to:
+ *
+ *    (63189.8112, 65536, 54060.6464)
+ */
+static const png_byte D50_nCIEXYZ[12] =
+   { 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d };
+
+static int /* bool */
+icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_uint_32 profile_length)
+{
+   if (profile_length < 132)
+      return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
+          "too short");
+   return 1;
+}
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+int /* PRIVATE */
+png_icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_uint_32 profile_length)
+{
+   if (!icc_check_length(png_ptr, colorspace, name, profile_length))
+      return 0;
+
+   /* This needs to be here because the 'normal' check is in
+    * png_decompress_chunk, yet this happens after the attempt to
+    * png_malloc_base the required data.  We only need this on read; on write
+    * the caller supplies the profile buffer so libpng doesn't allocate it.  See
+    * the call to icc_check_length below (the write case).
+    */
+#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
+      else if (png_ptr->user_chunk_malloc_max > 0 &&
+               png_ptr->user_chunk_malloc_max < profile_length)
+         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
+             "exceeds application limits");
+#  elif PNG_USER_CHUNK_MALLOC_MAX > 0
+      else if (PNG_USER_CHUNK_MALLOC_MAX < profile_length)
+         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
+             "exceeds libpng limits");
+#  else /* !SET_USER_LIMITS */
+      /* This will get compiled out on all 32-bit and better systems. */
+      else if (PNG_SIZE_MAX < profile_length)
+         return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
+             "exceeds system limits");
+#  endif /* !SET_USER_LIMITS */
+
+   return 1;
+}
+#endif /* READ_iCCP */
+
+int /* PRIVATE */
+png_icc_check_header(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_uint_32 profile_length,
+    png_const_bytep profile/* first 132 bytes only */, int color_type)
+{
+   png_uint_32 temp;
+
+   /* Length check; this cannot be ignored in this code because profile_length
+    * is used later to check the tag table, so even if the profile seems over
+    * long profile_length from the caller must be correct.  The caller can fix
+    * this up on read or write by just passing in the profile header length.
+    */
+   temp = png_get_uint_32(profile);
+   if (temp != profile_length)
+      return png_icc_profile_error(png_ptr, colorspace, name, temp,
+          "length does not match profile");
+
+   temp = (png_uint_32) (*(profile+8));
+   if (temp > 3 && (profile_length & 3))
+      return png_icc_profile_error(png_ptr, colorspace, name, profile_length,
+          "invalid length");
+
+   temp = png_get_uint_32(profile+128); /* tag count: 12 bytes/tag */
+   if (temp > 357913930 || /* (2^32-4-132)/12: maximum possible tag count */
+      profile_length < 132+12*temp) /* truncated tag table */
+      return png_icc_profile_error(png_ptr, colorspace, name, temp,
+          "tag count too large");
+
+   /* The 'intent' must be valid or we can't store it, ICC limits the intent to
+    * 16 bits.
+    */
+   temp = png_get_uint_32(profile+64);
+   if (temp >= 0xffff) /* The ICC limit */
+      return png_icc_profile_error(png_ptr, colorspace, name, temp,
+          "invalid rendering intent");
+
+   /* This is just a warning because the profile may be valid in future
+    * versions.
+    */
+   if (temp >= PNG_sRGB_INTENT_LAST)
+      (void)png_icc_profile_error(png_ptr, NULL, name, temp,
+          "intent outside defined range");
+
+   /* At this point the tag table can't be checked because it hasn't necessarily
+    * been loaded; however, various header fields can be checked.  These checks
+    * are for values permitted by the PNG spec in an ICC profile; the PNG spec
+    * restricts the profiles that can be passed in an iCCP chunk (they must be
+    * appropriate to processing PNG data!)
+    */
+
+   /* Data checks (could be skipped).  These checks must be independent of the
+    * version number; however, the version number doesn't accommodate changes in
+    * the header fields (just the known tags and the interpretation of the
+    * data.)
+    */
+   temp = png_get_uint_32(profile+36); /* signature 'ascp' */
+   if (temp != 0x61637370)
+      return png_icc_profile_error(png_ptr, colorspace, name, temp,
+          "invalid signature");
+
+   /* Currently the PCS illuminant/adopted white point (the computational
+    * white point) are required to be D50,
+    * however the profile contains a record of the illuminant so perhaps ICC
+    * expects to be able to change this in the future (despite the rationale in
+    * the introduction for using a fixed PCS adopted white.)  Consequently the
+    * following is just a warning.
+    */
+   if (memcmp(profile+68, D50_nCIEXYZ, 12) != 0)
+      (void)png_icc_profile_error(png_ptr, NULL, name, 0/*no tag value*/,
+          "PCS illuminant is not D50");
+
+   /* The PNG spec requires this:
+    * "If the iCCP chunk is present, the image samples conform to the colour
+    * space represented by the embedded ICC profile as defined by the
+    * International Color Consortium [ICC]. The colour space of the ICC profile
+    * shall be an RGB colour space for colour images (PNG colour types 2, 3, and
+    * 6), or a greyscale colour space for greyscale images (PNG colour types 0
+    * and 4)."
+    *
+    * This checking code ensures the embedded profile (on either read or write)
+    * conforms to the specification requirements.  Notice that an ICC 'gray'
+    * color-space profile contains the information to transform the monochrome
+    * data to XYZ or L*a*b (according to which PCS the profile uses) and this
+    * should be used in preference to the standard libpng K channel replication
+    * into R, G and B channels.
+    *
+    * Previously it was suggested that an RGB profile on grayscale data could be
+    * handled.  However it it is clear that using an RGB profile in this context
+    * must be an error - there is no specification of what it means.  Thus it is
+    * almost certainly more correct to ignore the profile.
+    */
+   temp = png_get_uint_32(profile+16); /* data colour space field */
+   switch (temp)
+   {
+      case 0x52474220: /* 'RGB ' */
+         if ((color_type & PNG_COLOR_MASK_COLOR) == 0)
+            return png_icc_profile_error(png_ptr, colorspace, name, temp,
+                "RGB color space not permitted on grayscale PNG");
+         break;
+
+      case 0x47524159: /* 'GRAY' */
+         if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
+            return png_icc_profile_error(png_ptr, colorspace, name, temp,
+                "Gray color space not permitted on RGB PNG");
+         break;
+
+      default:
+         return png_icc_profile_error(png_ptr, colorspace, name, temp,
+             "invalid ICC profile color space");
+   }
+
+   /* It is up to the application to check that the profile class matches the
+    * application requirements; the spec provides no guidance, but it's pretty
+    * weird if the profile is not scanner ('scnr'), monitor ('mntr'), printer
+    * ('prtr') or 'spac' (for generic color spaces).  Issue a warning in these
+    * cases.  Issue an error for device link or abstract profiles - these don't
+    * contain the records necessary to transform the color-space to anything
+    * other than the target device (and not even that for an abstract profile).
+    * Profiles of these classes may not be embedded in images.
+    */
+   temp = png_get_uint_32(profile+12); /* profile/device class */
+   switch (temp)
+   {
+      case 0x73636e72: /* 'scnr' */
+      case 0x6d6e7472: /* 'mntr' */
+      case 0x70727472: /* 'prtr' */
+      case 0x73706163: /* 'spac' */
+         /* All supported */
+         break;
+
+      case 0x61627374: /* 'abst' */
+         /* May not be embedded in an image */
+         return png_icc_profile_error(png_ptr, colorspace, name, temp,
+             "invalid embedded Abstract ICC profile");
+
+      case 0x6c696e6b: /* 'link' */
+         /* DeviceLink profiles cannot be interpreted in a non-device specific
+          * fashion, if an app uses the AToB0Tag in the profile the results are
+          * undefined unless the result is sent to the intended device,
+          * therefore a DeviceLink profile should not be found embedded in a
+          * PNG.
+          */
+         return png_icc_profile_error(png_ptr, colorspace, name, temp,
+             "unexpected DeviceLink ICC profile class");
+
+      case 0x6e6d636c: /* 'nmcl' */
+         /* A NamedColor profile is also device specific, however it doesn't
+          * contain an AToB0 tag that is open to misinterpretation.  Almost
+          * certainly it will fail the tests below.
+          */
+         (void)png_icc_profile_error(png_ptr, NULL, name, temp,
+             "unexpected NamedColor ICC profile class");
+         break;
+
+      default:
+         /* To allow for future enhancements to the profile accept unrecognized
+          * profile classes with a warning, these then hit the test below on the
+          * tag content to ensure they are backward compatible with one of the
+          * understood profiles.
+          */
+         (void)png_icc_profile_error(png_ptr, NULL, name, temp,
+             "unrecognized ICC profile class");
+         break;
+   }
+
+   /* For any profile other than a device link one the PCS must be encoded
+    * either in XYZ or Lab.
+    */
+   temp = png_get_uint_32(profile+20);
+   switch (temp)
+   {
+      case 0x58595a20: /* 'XYZ ' */
+      case 0x4c616220: /* 'Lab ' */
+         break;
+
+      default:
+         return png_icc_profile_error(png_ptr, colorspace, name, temp,
+             "unexpected ICC PCS encoding");
+   }
+
+   return 1;
+}
+
+int /* PRIVATE */
+png_icc_check_tag_table(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_uint_32 profile_length,
+    png_const_bytep profile /* header plus whole tag table */)
+{
+   png_uint_32 tag_count = png_get_uint_32(profile+128);
+   png_uint_32 itag;
+   png_const_bytep tag = profile+132; /* The first tag */
+
+   /* First scan all the tags in the table and add bits to the icc_info value
+    * (temporarily in 'tags').
+    */
+   for (itag=0; itag < tag_count; ++itag, tag += 12)
+   {
+      png_uint_32 tag_id = png_get_uint_32(tag+0);
+      png_uint_32 tag_start = png_get_uint_32(tag+4); /* must be aligned */
+      png_uint_32 tag_length = png_get_uint_32(tag+8);/* not padded */
+
+      /* The ICC specification does not exclude zero length tags, therefore the
+       * start might actually be anywhere if there is no data, but this would be
+       * a clear abuse of the intent of the standard so the start is checked for
+       * being in range.  All defined tag types have an 8 byte header - a 4 byte
+       * type signature then 0.
+       */
+
+      /* This is a hard error; potentially it can cause read outside the
+       * profile.
+       */
+      if (tag_start > profile_length || tag_length > profile_length - tag_start)
+         return png_icc_profile_error(png_ptr, colorspace, name, tag_id,
+             "ICC profile tag outside profile");
+
+      if ((tag_start & 3) != 0)
+      {
+         /* CNHP730S.icc shipped with Microsoft Windows 64 violates this; it is
+          * only a warning here because libpng does not care about the
+          * alignment.
+          */
+         (void)png_icc_profile_error(png_ptr, NULL, name, tag_id,
+             "ICC profile tag start not a multiple of 4");
+      }
+   }
+
+   return 1; /* success, maybe with warnings */
+}
+
+#ifdef PNG_sRGB_SUPPORTED
+#if PNG_sRGB_PROFILE_CHECKS >= 0
+/* Information about the known ICC sRGB profiles */
+static const struct
+{
+   png_uint_32 adler, crc, length;
+   png_uint_32 md5[4];
+   png_byte    have_md5;
+   png_byte    is_broken;
+   png_uint_16 intent;
+
+#  define PNG_MD5(a,b,c,d) { a, b, c, d }, (a!=0)||(b!=0)||(c!=0)||(d!=0)
+#  define PNG_ICC_CHECKSUM(adler, crc, md5, intent, broke, date, length, fname)\
+      { adler, crc, length, md5, broke, intent },
+
+} png_sRGB_checks[] =
+{
+   /* This data comes from contrib/tools/checksum-icc run on downloads of
+    * all four ICC sRGB profiles from www.color.org.
+    */
+   /* adler32, crc32, MD5[4], intent, date, length, file-name */
+   PNG_ICC_CHECKSUM(0x0a3fd9f6, 0x3b8772b9,
+       PNG_MD5(0x29f83dde, 0xaff255ae, 0x7842fae4, 0xca83390d), 0, 0,
+       "2009/03/27 21:36:31", 3048, "sRGB_IEC61966-2-1_black_scaled.icc")
+
+   /* ICC sRGB v2 perceptual no black-compensation: */
+   PNG_ICC_CHECKSUM(0x4909e5e1, 0x427ebb21,
+       PNG_MD5(0xc95bd637, 0xe95d8a3b, 0x0df38f99, 0xc1320389), 1, 0,
+       "2009/03/27 21:37:45", 3052, "sRGB_IEC61966-2-1_no_black_scaling.icc")
+
+   PNG_ICC_CHECKSUM(0xfd2144a1, 0x306fd8ae,
+       PNG_MD5(0xfc663378, 0x37e2886b, 0xfd72e983, 0x8228f1b8), 0, 0,
+       "2009/08/10 17:28:01", 60988, "sRGB_v4_ICC_preference_displayclass.icc")
+
+   /* ICC sRGB v4 perceptual */
+   PNG_ICC_CHECKSUM(0x209c35d2, 0xbbef7812,
+       PNG_MD5(0x34562abf, 0x994ccd06, 0x6d2c5721, 0xd0d68c5d), 0, 0,
+       "2007/07/25 00:05:37", 60960, "sRGB_v4_ICC_preference.icc")
+
+   /* The following profiles have no known MD5 checksum. If there is a match
+    * on the (empty) MD5 the other fields are used to attempt a match and
+    * a warning is produced.  The first two of these profiles have a 'cprt' tag
+    * which suggests that they were also made by Hewlett Packard.
+    */
+   PNG_ICC_CHECKSUM(0xa054d762, 0x5d5129ce,
+       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 0,
+       "2004/07/21 18:57:42", 3024, "sRGB_IEC61966-2-1_noBPC.icc")
+
+   /* This is a 'mntr' (display) profile with a mediaWhitePointTag that does not
+    * match the D50 PCS illuminant in the header (it is in fact the D65 values,
+    * so the white point is recorded as the un-adapted value.)  The profiles
+    * below only differ in one byte - the intent - and are basically the same as
+    * the previous profile except for the mediaWhitePointTag error and a missing
+    * chromaticAdaptationTag.
+    */
+   PNG_ICC_CHECKSUM(0xf784f3fb, 0x182ea552,
+       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 1/*broken*/,
+       "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 perceptual")
+
+   PNG_ICC_CHECKSUM(0x0398f3fc, 0xf29e526d,
+       PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 1/*broken*/,
+       "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 media-relative")
+};
+
+static int
+png_compare_ICC_profile_with_sRGB(png_const_structrp png_ptr,
+    png_const_bytep profile, uLong adler)
+{
+   /* The quick check is to verify just the MD5 signature and trust the
+    * rest of the data.  Because the profile has already been verified for
+    * correctness this is safe.  png_colorspace_set_sRGB will check the 'intent'
+    * field too, so if the profile has been edited with an intent not defined
+    * by sRGB (but maybe defined by a later ICC specification) the read of
+    * the profile will fail at that point.
+    */
+
+   png_uint_32 length = 0;
+   png_uint_32 intent = 0x10000; /* invalid */
+#if PNG_sRGB_PROFILE_CHECKS > 1
+   uLong crc = 0; /* the value for 0 length data */
+#endif
+   unsigned int i;
+
+#ifdef PNG_SET_OPTION_SUPPORTED
+   /* First see if PNG_SKIP_sRGB_CHECK_PROFILE has been set to "on" */
+   if (((png_ptr->options >> PNG_SKIP_sRGB_CHECK_PROFILE) & 3) ==
+               PNG_OPTION_ON)
+      return 0;
+#endif
+
+   for (i=0; i < (sizeof png_sRGB_checks) / (sizeof png_sRGB_checks[0]); ++i)
+   {
+      if (png_get_uint_32(profile+84) == png_sRGB_checks[i].md5[0] &&
+         png_get_uint_32(profile+88) == png_sRGB_checks[i].md5[1] &&
+         png_get_uint_32(profile+92) == png_sRGB_checks[i].md5[2] &&
+         png_get_uint_32(profile+96) == png_sRGB_checks[i].md5[3])
+      {
+         /* This may be one of the old HP profiles without an MD5, in that
+          * case we can only use the length and Adler32 (note that these
+          * are not used by default if there is an MD5!)
+          */
+#        if PNG_sRGB_PROFILE_CHECKS == 0
+            if (png_sRGB_checks[i].have_md5 != 0)
+               return 1+png_sRGB_checks[i].is_broken;
+#        endif
+
+         /* Profile is unsigned or more checks have been configured in. */
+         if (length == 0)
+         {
+            length = png_get_uint_32(profile);
+            intent = png_get_uint_32(profile+64);
+         }
+
+         /* Length *and* intent must match */
+         if (length == (png_uint_32) png_sRGB_checks[i].length &&
+            intent == (png_uint_32) png_sRGB_checks[i].intent)
+         {
+            /* Now calculate the adler32 if not done already. */
+            if (adler == 0)
+            {
+               adler = adler32(0, NULL, 0);
+               adler = adler32(adler, profile, length);
+            }
+
+            if (adler == png_sRGB_checks[i].adler)
+            {
+               /* These basic checks suggest that the data has not been
+                * modified, but if the check level is more than 1 perform
+                * our own crc32 checksum on the data.
+                */
+#              if PNG_sRGB_PROFILE_CHECKS > 1
+                  if (crc == 0)
+                  {
+                     crc = crc32(0, NULL, 0);
+                     crc = crc32(crc, profile, length);
+                  }
+
+                  /* So this check must pass for the 'return' below to happen.
+                   */
+                  if (crc == png_sRGB_checks[i].crc)
+#              endif
+               {
+                  if (png_sRGB_checks[i].is_broken != 0)
+                  {
+                     /* These profiles are known to have bad data that may cause
+                      * problems if they are used, therefore attempt to
+                      * discourage their use, skip the 'have_md5' warning below,
+                      * which is made irrelevant by this error.
+                      */
+                     png_chunk_report(png_ptr, "known incorrect sRGB profile",
+                         PNG_CHUNK_ERROR);
+                  }
+
+                  /* Warn that this being done; this isn't even an error since
+                   * the profile is perfectly valid, but it would be nice if
+                   * people used the up-to-date ones.
+                   */
+                  else if (png_sRGB_checks[i].have_md5 == 0)
+                  {
+                     png_chunk_report(png_ptr,
+                         "out-of-date sRGB profile with no signature",
+                         PNG_CHUNK_WARNING);
+                  }
+
+                  return 1+png_sRGB_checks[i].is_broken;
+               }
+            }
+
+# if PNG_sRGB_PROFILE_CHECKS > 0
+         /* The signature matched, but the profile had been changed in some
+          * way.  This probably indicates a data error or uninformed hacking.
+          * Fall through to "no match".
+          */
+         png_chunk_report(png_ptr,
+             "Not recognizing known sRGB profile that has been edited",
+             PNG_CHUNK_WARNING);
+         break;
+# endif
+         }
+      }
+   }
+
+   return 0; /* no match */
+}
+
+void /* PRIVATE */
+png_icc_set_sRGB(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, png_const_bytep profile, uLong adler)
+{
+   /* Is this profile one of the known ICC sRGB profiles?  If it is, just set
+    * the sRGB information.
+    */
+   if (png_compare_ICC_profile_with_sRGB(png_ptr, profile, adler) != 0)
+      (void)png_colorspace_set_sRGB(png_ptr, colorspace,
+         (int)/*already checked*/png_get_uint_32(profile+64));
+}
+#endif /* PNG_sRGB_PROFILE_CHECKS >= 0 */
+#endif /* sRGB */
+
+int /* PRIVATE */
+png_colorspace_set_ICC(png_const_structrp png_ptr, png_colorspacerp colorspace,
+    png_const_charp name, png_uint_32 profile_length, png_const_bytep profile,
+    int color_type)
+{
+   if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0)
+      return 0;
+
+   if (icc_check_length(png_ptr, colorspace, name, profile_length) != 0 &&
+       png_icc_check_header(png_ptr, colorspace, name, profile_length, profile,
+           color_type) != 0 &&
+       png_icc_check_tag_table(png_ptr, colorspace, name, profile_length,
+           profile) != 0)
+   {
+#     if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0
+         /* If no sRGB support, don't try storing sRGB information */
+         png_icc_set_sRGB(png_ptr, colorspace, profile, 0);
+#     endif
+      return 1;
+   }
+
+   /* Failure case */
+   return 0;
+}
+#endif /* iCCP */
+
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+void /* PRIVATE */
+png_colorspace_set_rgb_coefficients(png_structrp png_ptr)
+{
+   /* Set the rgb_to_gray coefficients from the colorspace. */
+   if (png_ptr->rgb_to_gray_coefficients_set == 0 &&
+      (png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      /* png_set_background has not been called, get the coefficients from the Y
+       * values of the colorspace colorants.
+       */
+      png_fixed_point r = png_ptr->colorspace.end_points_XYZ.red_Y;
+      png_fixed_point g = png_ptr->colorspace.end_points_XYZ.green_Y;
+      png_fixed_point b = png_ptr->colorspace.end_points_XYZ.blue_Y;
+      png_fixed_point total = r+g+b;
+
+      if (total > 0 &&
+         r >= 0 && png_muldiv(&r, r, 32768, total) && r >= 0 && r <= 32768 &&
+         g >= 0 && png_muldiv(&g, g, 32768, total) && g >= 0 && g <= 32768 &&
+         b >= 0 && png_muldiv(&b, b, 32768, total) && b >= 0 && b <= 32768 &&
+         r+g+b <= 32769)
+      {
+         /* We allow 0 coefficients here.  r+g+b may be 32769 if two or
+          * all of the coefficients were rounded up.  Handle this by
+          * reducing the *largest* coefficient by 1; this matches the
+          * approach used for the default coefficients in pngrtran.c
+          */
+         int add = 0;
+
+         if (r+g+b > 32768)
+            add = -1;
+         else if (r+g+b < 32768)
+            add = 1;
+
+         if (add != 0)
+         {
+            if (g >= r && g >= b)
+               g += add;
+            else if (r >= g && r >= b)
+               r += add;
+            else
+               b += add;
+         }
+
+         /* Check for an internal error. */
+         if (r+g+b != 32768)
+            png_error(png_ptr,
+                "internal error handling cHRM coefficients");
+
+         else
+         {
+            png_ptr->rgb_to_gray_red_coeff   = (png_uint_16)r;
+            png_ptr->rgb_to_gray_green_coeff = (png_uint_16)g;
+         }
+      }
+
+      /* This is a png_error at present even though it could be ignored -
+       * it should never happen, but it is important that if it does, the
+       * bug is fixed.
+       */
+      else
+         png_error(png_ptr, "internal error handling cHRM->XYZ");
+   }
+}
+#endif /* READ_RGB_TO_GRAY */
+
+#endif /* COLORSPACE */
+
+#ifdef __GNUC__
+/* This exists solely to work round a warning from GNU C. */
+static int /* PRIVATE */
+png_gt(size_t a, size_t b)
+{
+   return a > b;
+}
+#else
+#   define png_gt(a,b) ((a) > (b))
+#endif
+
+void /* PRIVATE */
+png_check_IHDR(png_const_structrp png_ptr,
+    png_uint_32 width, png_uint_32 height, int bit_depth,
+    int color_type, int interlace_type, int compression_type,
+    int filter_type)
+{
+   int error = 0;
+
+   /* Check for width and height valid values */
+   if (width == 0)
+   {
+      png_warning(png_ptr, "Image width is zero in IHDR");
+      error = 1;
+   }
+
+   if (width > PNG_UINT_31_MAX)
+   {
+      png_warning(png_ptr, "Invalid image width in IHDR");
+      error = 1;
+   }
+
+   if (png_gt(((width + 7) & (~7U)),
+       ((PNG_SIZE_MAX
+           - 48        /* big_row_buf hack */
+           - 1)        /* filter byte */
+           / 8)        /* 8-byte RGBA pixels */
+           - 1))       /* extra max_pixel_depth pad */
+   {
+      /* The size of the row must be within the limits of this architecture.
+       * Because the read code can perform arbitrary transformations the
+       * maximum size is checked here.  Because the code in png_read_start_row
+       * adds extra space "for safety's sake" in several places a conservative
+       * limit is used here.
+       *
+       * NOTE: it would be far better to check the size that is actually used,
+       * but the effect in the real world is minor and the changes are more
+       * extensive, therefore much more dangerous and much more difficult to
+       * write in a way that avoids compiler warnings.
+       */
+      png_warning(png_ptr, "Image width is too large for this architecture");
+      error = 1;
+   }
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (width > png_ptr->user_width_max)
+#else
+   if (width > PNG_USER_WIDTH_MAX)
+#endif
+   {
+      png_warning(png_ptr, "Image width exceeds user limit in IHDR");
+      error = 1;
+   }
+
+   if (height == 0)
+   {
+      png_warning(png_ptr, "Image height is zero in IHDR");
+      error = 1;
+   }
+
+   if (height > PNG_UINT_31_MAX)
+   {
+      png_warning(png_ptr, "Invalid image height in IHDR");
+      error = 1;
+   }
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (height > png_ptr->user_height_max)
+#else
+   if (height > PNG_USER_HEIGHT_MAX)
+#endif
+   {
+      png_warning(png_ptr, "Image height exceeds user limit in IHDR");
+      error = 1;
+   }
+
+   /* Check other values */
+   if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 &&
+       bit_depth != 8 && bit_depth != 16)
+   {
+      png_warning(png_ptr, "Invalid bit depth in IHDR");
+      error = 1;
+   }
+
+   if (color_type < 0 || color_type == 1 ||
+       color_type == 5 || color_type > 6)
+   {
+      png_warning(png_ptr, "Invalid color type in IHDR");
+      error = 1;
+   }
+
+   if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) ||
+       ((color_type == PNG_COLOR_TYPE_RGB ||
+         color_type == PNG_COLOR_TYPE_GRAY_ALPHA ||
+         color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8))
+   {
+      png_warning(png_ptr, "Invalid color type/bit depth combination in IHDR");
+      error = 1;
+   }
+
+   if (interlace_type >= PNG_INTERLACE_LAST)
+   {
+      png_warning(png_ptr, "Unknown interlace method in IHDR");
+      error = 1;
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Unknown compression method in IHDR");
+      error = 1;
+   }
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   /* Accept filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not read a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 &&
+       png_ptr->mng_features_permitted != 0)
+      png_warning(png_ptr, "MNG features are not allowed in a PNG datastream");
+
+   if (filter_type != PNG_FILTER_TYPE_BASE)
+   {
+      if (!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
+          (filter_type == PNG_INTRAPIXEL_DIFFERENCING) &&
+          ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) &&
+          (color_type == PNG_COLOR_TYPE_RGB ||
+          color_type == PNG_COLOR_TYPE_RGB_ALPHA)))
+      {
+         png_warning(png_ptr, "Unknown filter method in IHDR");
+         error = 1;
+      }
+
+      if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0)
+      {
+         png_warning(png_ptr, "Invalid filter method in IHDR");
+         error = 1;
+      }
+   }
+
+#else
+   if (filter_type != PNG_FILTER_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Unknown filter method in IHDR");
+      error = 1;
+   }
+#endif
+
+   if (error == 1)
+      png_error(png_ptr, "Invalid IHDR data");
+}
+
+#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
+/* ASCII to fp functions */
+/* Check an ASCII formatted floating point value, see the more detailed
+ * comments in pngpriv.h
+ */
+/* The following is used internally to preserve the sticky flags */
+#define png_fp_add(state, flags) ((state) |= (flags))
+#define png_fp_set(state, value) ((state) = (value) | ((state) & PNG_FP_STICKY))
+
+int /* PRIVATE */
+png_check_fp_number(png_const_charp string, size_t size, int *statep,
+    size_t *whereami)
+{
+   int state = *statep;
+   size_t i = *whereami;
+
+   while (i < size)
+   {
+      int type;
+      /* First find the type of the next character */
+      switch (string[i])
+      {
+      case 43:  type = PNG_FP_SAW_SIGN;                   break;
+      case 45:  type = PNG_FP_SAW_SIGN + PNG_FP_NEGATIVE; break;
+      case 46:  type = PNG_FP_SAW_DOT;                    break;
+      case 48:  type = PNG_FP_SAW_DIGIT;                  break;
+      case 49: case 50: case 51: case 52:
+      case 53: case 54: case 55: case 56:
+      case 57:  type = PNG_FP_SAW_DIGIT + PNG_FP_NONZERO; break;
+      case 69:
+      case 101: type = PNG_FP_SAW_E;                      break;
+      default:  goto PNG_FP_End;
+      }
+
+      /* Now deal with this type according to the current
+       * state, the type is arranged to not overlap the
+       * bits of the PNG_FP_STATE.
+       */
+      switch ((state & PNG_FP_STATE) + (type & PNG_FP_SAW_ANY))
+      {
+      case PNG_FP_INTEGER + PNG_FP_SAW_SIGN:
+         if ((state & PNG_FP_SAW_ANY) != 0)
+            goto PNG_FP_End; /* not a part of the number */
+
+         png_fp_add(state, type);
+         break;
+
+      case PNG_FP_INTEGER + PNG_FP_SAW_DOT:
+         /* Ok as trailer, ok as lead of fraction. */
+         if ((state & PNG_FP_SAW_DOT) != 0) /* two dots */
+            goto PNG_FP_End;
+
+         else if ((state & PNG_FP_SAW_DIGIT) != 0) /* trailing dot? */
+            png_fp_add(state, type);
+
+         else
+            png_fp_set(state, PNG_FP_FRACTION | type);
+
+         break;
+
+      case PNG_FP_INTEGER + PNG_FP_SAW_DIGIT:
+         if ((state & PNG_FP_SAW_DOT) != 0) /* delayed fraction */
+            png_fp_set(state, PNG_FP_FRACTION | PNG_FP_SAW_DOT);
+
+         png_fp_add(state, type | PNG_FP_WAS_VALID);
+
+         break;
+
+      case PNG_FP_INTEGER + PNG_FP_SAW_E:
+         if ((state & PNG_FP_SAW_DIGIT) == 0)
+            goto PNG_FP_End;
+
+         png_fp_set(state, PNG_FP_EXPONENT);
+
+         break;
+
+   /* case PNG_FP_FRACTION + PNG_FP_SAW_SIGN:
+         goto PNG_FP_End; ** no sign in fraction */
+
+   /* case PNG_FP_FRACTION + PNG_FP_SAW_DOT:
+         goto PNG_FP_End; ** Because SAW_DOT is always set */
+
+      case PNG_FP_FRACTION + PNG_FP_SAW_DIGIT:
+         png_fp_add(state, type | PNG_FP_WAS_VALID);
+         break;
+
+      case PNG_FP_FRACTION + PNG_FP_SAW_E:
+         /* This is correct because the trailing '.' on an
+          * integer is handled above - so we can only get here
+          * with the sequence ".E" (with no preceding digits).
+          */
+         if ((state & PNG_FP_SAW_DIGIT) == 0)
+            goto PNG_FP_End;
+
+         png_fp_set(state, PNG_FP_EXPONENT);
+
+         break;
+
+      case PNG_FP_EXPONENT + PNG_FP_SAW_SIGN:
+         if ((state & PNG_FP_SAW_ANY) != 0)
+            goto PNG_FP_End; /* not a part of the number */
+
+         png_fp_add(state, PNG_FP_SAW_SIGN);
+
+         break;
+
+   /* case PNG_FP_EXPONENT + PNG_FP_SAW_DOT:
+         goto PNG_FP_End; */
+
+      case PNG_FP_EXPONENT + PNG_FP_SAW_DIGIT:
+         png_fp_add(state, PNG_FP_SAW_DIGIT | PNG_FP_WAS_VALID);
+
+         break;
+
+   /* case PNG_FP_EXPONEXT + PNG_FP_SAW_E:
+         goto PNG_FP_End; */
+
+      default: goto PNG_FP_End; /* I.e. break 2 */
+      }
+
+      /* The character seems ok, continue. */
+      ++i;
+   }
+
+PNG_FP_End:
+   /* Here at the end, update the state and return the correct
+    * return code.
+    */
+   *statep = state;
+   *whereami = i;
+
+   return (state & PNG_FP_SAW_DIGIT) != 0;
+}
+
+
+/* The same but for a complete string. */
+int
+png_check_fp_string(png_const_charp string, size_t size)
+{
+   int        state=0;
+   size_t char_index=0;
+
+   if (png_check_fp_number(string, size, &state, &char_index) != 0 &&
+      (char_index == size || string[char_index] == 0))
+      return state /* must be non-zero - see above */;
+
+   return 0; /* i.e. fail */
+}
+#endif /* pCAL || sCAL */
+
+#ifdef PNG_sCAL_SUPPORTED
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+/* Utility used below - a simple accurate power of ten from an integral
+ * exponent.
+ */
+static double
+png_pow10(int power)
+{
+   int recip = 0;
+   double d = 1;
+
+   /* Handle negative exponent with a reciprocal at the end because
+    * 10 is exact whereas .1 is inexact in base 2
+    */
+   if (power < 0)
+   {
+      if (power < DBL_MIN_10_EXP) return 0;
+      recip = 1; power = -power;
+   }
+
+   if (power > 0)
+   {
+      /* Decompose power bitwise. */
+      double mult = 10;
+      do
+      {
+         if (power & 1) d *= mult;
+         mult *= mult;
+         power >>= 1;
+      }
+      while (power > 0);
+
+      if (recip != 0) d = 1/d;
+   }
+   /* else power is 0 and d is 1 */
+
+   return d;
+}
+
+/* Function to format a floating point value in ASCII with a given
+ * precision.
+ */
+void /* PRIVATE */
+png_ascii_from_fp(png_const_structrp png_ptr, png_charp ascii, size_t size,
+    double fp, unsigned int precision)
+{
+   /* We use standard functions from math.h, but not printf because
+    * that would require stdio.  The caller must supply a buffer of
+    * sufficient size or we will png_error.  The tests on size and
+    * the space in ascii[] consumed are indicated below.
+    */
+   if (precision < 1)
+      precision = DBL_DIG;
+
+   /* Enforce the limit of the implementation precision too. */
+   if (precision > DBL_DIG+1)
+      precision = DBL_DIG+1;
+
+   /* Basic sanity checks */
+   if (size >= precision+5) /* See the requirements below. */
+   {
+      if (fp < 0)
+      {
+         fp = -fp;
+         *ascii++ = 45; /* '-'  PLUS 1 TOTAL 1 */
+         --size;
+      }
+
+      if (fp >= DBL_MIN && fp <= DBL_MAX)
+      {
+         int exp_b10;   /* A base 10 exponent */
+         double base;   /* 10^exp_b10 */
+
+         /* First extract a base 10 exponent of the number,
+          * the calculation below rounds down when converting
+          * from base 2 to base 10 (multiply by log10(2) -
+          * 0.3010, but 77/256 is 0.3008, so exp_b10 needs to
+          * be increased.  Note that the arithmetic shift
+          * performs a floor() unlike C arithmetic - using a
+          * C multiply would break the following for negative
+          * exponents.
+          */
+         (void)frexp(fp, &exp_b10); /* exponent to base 2 */
+
+         exp_b10 = (exp_b10 * 77) >> 8; /* <= exponent to base 10 */
+
+         /* Avoid underflow here. */
+         base = png_pow10(exp_b10); /* May underflow */
+
+         while (base < DBL_MIN || base < fp)
+         {
+            /* And this may overflow. */
+            double test = png_pow10(exp_b10+1);
+
+            if (test <= DBL_MAX)
+            {
+               ++exp_b10; base = test;
+            }
+
+            else
+               break;
+         }
+
+         /* Normalize fp and correct exp_b10, after this fp is in the
+          * range [.1,1) and exp_b10 is both the exponent and the digit
+          * *before* which the decimal point should be inserted
+          * (starting with 0 for the first digit).  Note that this
+          * works even if 10^exp_b10 is out of range because of the
+          * test on DBL_MAX above.
+          */
+         fp /= base;
+         while (fp >= 1)
+         {
+            fp /= 10; ++exp_b10;
+         }
+
+         /* Because of the code above fp may, at this point, be
+          * less than .1, this is ok because the code below can
+          * handle the leading zeros this generates, so no attempt
+          * is made to correct that here.
+          */
+
+         {
+            unsigned int czero, clead, cdigits;
+            char exponent[10];
+
+            /* Allow up to two leading zeros - this will not lengthen
+             * the number compared to using E-n.
+             */
+            if (exp_b10 < 0 && exp_b10 > -3) /* PLUS 3 TOTAL 4 */
+            {
+               czero = 0U-exp_b10; /* PLUS 2 digits: TOTAL 3 */
+               exp_b10 = 0;      /* Dot added below before first output. */
+            }
+            else
+               czero = 0;    /* No zeros to add */
+
+            /* Generate the digit list, stripping trailing zeros and
+             * inserting a '.' before a digit if the exponent is 0.
+             */
+            clead = czero; /* Count of leading zeros */
+            cdigits = 0;   /* Count of digits in list. */
+
+            do
+            {
+               double d;
+
+               fp *= 10;
+               /* Use modf here, not floor and subtract, so that
+                * the separation is done in one step.  At the end
+                * of the loop don't break the number into parts so
+                * that the final digit is rounded.
+                */
+               if (cdigits+czero+1 < precision+clead)
+                  fp = modf(fp, &d);
+
+               else
+               {
+                  d = floor(fp + .5);
+
+                  if (d > 9)
+                  {
+                     /* Rounding up to 10, handle that here. */
+                     if (czero > 0)
+                     {
+                        --czero; d = 1;
+                        if (cdigits == 0) --clead;
+                     }
+                     else
+                     {
+                        while (cdigits > 0 && d > 9)
+                        {
+                           int ch = *--ascii;
+
+                           if (exp_b10 != (-1))
+                              ++exp_b10;
+
+                           else if (ch == 46)
+                           {
+                              ch = *--ascii; ++size;
+                              /* Advance exp_b10 to '1', so that the
+                               * decimal point happens after the
+                               * previous digit.
+                               */
+                              exp_b10 = 1;
+                           }
+
+                           --cdigits;
+                           d = ch - 47;  /* I.e. 1+(ch-48) */
+                        }
+
+                        /* Did we reach the beginning? If so adjust the
+                         * exponent but take into account the leading
+                         * decimal point.
+                         */
+                        if (d > 9)  /* cdigits == 0 */
+                        {
+                           if (exp_b10 == (-1))
+                           {
+                              /* Leading decimal point (plus zeros?), if
+                               * we lose the decimal point here it must
+                               * be reentered below.
+                               */
+                              int ch = *--ascii;
+
+                              if (ch == 46)
+                              {
+                                 ++size; exp_b10 = 1;
+                              }
+
+                              /* Else lost a leading zero, so 'exp_b10' is
+                               * still ok at (-1)
+                               */
+                           }
+                           else
+                              ++exp_b10;
+
+                           /* In all cases we output a '1' */
+                           d = 1;
+                        }
+                     }
+                  }
+                  fp = 0; /* Guarantees termination below. */
+               }
+
+               if (d == 0)
+               {
+                  ++czero;
+                  if (cdigits == 0) ++clead;
+               }
+               else
+               {
+                  /* Included embedded zeros in the digit count. */
+                  cdigits += czero - clead;
+                  clead = 0;
+
+                  while (czero > 0)
+                  {
+                     /* exp_b10 == (-1) means we just output the decimal
+                      * place - after the DP don't adjust 'exp_b10' any
+                      * more!
+                      */
+                     if (exp_b10 != (-1))
+                     {
+                        if (exp_b10 == 0)
+                        {
+                           *ascii++ = 46; --size;
+                        }
+                        /* PLUS 1: TOTAL 4 */
+                        --exp_b10;
+                     }
+                     *ascii++ = 48; --czero;
+                  }
+
+                  if (exp_b10 != (-1))
+                  {
+                     if (exp_b10 == 0)
+                     {
+                        *ascii++ = 46; --size; /* counted above */
+                     }
+
+                     --exp_b10;
+                  }
+                  *ascii++ = (char)(48 + (int)d); ++cdigits;
+               }
+            }
+            while (cdigits+czero < precision+clead && fp > DBL_MIN);
+
+            /* The total output count (max) is now 4+precision */
+
+            /* Check for an exponent, if we don't need one we are
+             * done and just need to terminate the string.  At this
+             * point, exp_b10==(-1) is effectively a flag: it got
+             * to '-1' because of the decrement, after outputting
+             * the decimal point above. (The exponent required is
+             * *not* -1.)
+             */
+            if (exp_b10 >= (-1) && exp_b10 <= 2)
+            {
+               /* The following only happens if we didn't output the
+                * leading zeros above for negative exponent, so this
+                * doesn't add to the digit requirement.  Note that the
+                * two zeros here can only be output if the two leading
+                * zeros were *not* output, so this doesn't increase
+                * the output count.
+                */
+               while (exp_b10-- > 0) *ascii++ = 48;
+
+               *ascii = 0;
+
+               /* Total buffer requirement (including the '\0') is
+                * 5+precision - see check at the start.
+                */
+               return;
+            }
+
+            /* Here if an exponent is required, adjust size for
+             * the digits we output but did not count.  The total
+             * digit output here so far is at most 1+precision - no
+             * decimal point and no leading or trailing zeros have
+             * been output.
+             */
+            size -= cdigits;
+
+            *ascii++ = 69; --size;    /* 'E': PLUS 1 TOTAL 2+precision */
+
+            /* The following use of an unsigned temporary avoids ambiguities in
+             * the signed arithmetic on exp_b10 and permits GCC at least to do
+             * better optimization.
+             */
+            {
+               unsigned int uexp_b10;
+
+               if (exp_b10 < 0)
+               {
+                  *ascii++ = 45; --size; /* '-': PLUS 1 TOTAL 3+precision */
+                  uexp_b10 = 0U-exp_b10;
+               }
+
+               else
+                  uexp_b10 = 0U+exp_b10;
+
+               cdigits = 0;
+
+               while (uexp_b10 > 0)
+               {
+                  exponent[cdigits++] = (char)(48 + uexp_b10 % 10);
+                  uexp_b10 /= 10;
+               }
+            }
+
+            /* Need another size check here for the exponent digits, so
+             * this need not be considered above.
+             */
+            if (size > cdigits)
+            {
+               while (cdigits > 0) *ascii++ = exponent[--cdigits];
+
+               *ascii = 0;
+
+               return;
+            }
+         }
+      }
+      else if (!(fp >= DBL_MIN))
+      {
+         *ascii++ = 48; /* '0' */
+         *ascii = 0;
+         return;
+      }
+      else
+      {
+         *ascii++ = 105; /* 'i' */
+         *ascii++ = 110; /* 'n' */
+         *ascii++ = 102; /* 'f' */
+         *ascii = 0;
+         return;
+      }
+   }
+
+   /* Here on buffer too small. */
+   png_error(png_ptr, "ASCII conversion buffer too small");
+}
+#  endif /* FLOATING_POINT */
+
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+/* Function to format a fixed point value in ASCII.
+ */
+void /* PRIVATE */
+png_ascii_from_fixed(png_const_structrp png_ptr, png_charp ascii,
+    size_t size, png_fixed_point fp)
+{
+   /* Require space for 10 decimal digits, a decimal point, a minus sign and a
+    * trailing \0, 13 characters:
+    */
+   if (size > 12)
+   {
+      png_uint_32 num;
+
+      /* Avoid overflow here on the minimum integer. */
+      if (fp < 0)
+      {
+         *ascii++ = 45; num = (png_uint_32)(-fp);
+      }
+      else
+         num = (png_uint_32)fp;
+
+      if (num <= 0x80000000) /* else overflowed */
+      {
+         unsigned int ndigits = 0, first = 16 /* flag value */;
+         char digits[10] = {0};
+
+         while (num)
+         {
+            /* Split the low digit off num: */
+            unsigned int tmp = num/10;
+            num -= tmp*10;
+            digits[ndigits++] = (char)(48 + num);
+            /* Record the first non-zero digit, note that this is a number
+             * starting at 1, it's not actually the array index.
+             */
+            if (first == 16 && num > 0)
+               first = ndigits;
+            num = tmp;
+         }
+
+         if (ndigits > 0)
+         {
+            while (ndigits > 5) *ascii++ = digits[--ndigits];
+            /* The remaining digits are fractional digits, ndigits is '5' or
+             * smaller at this point.  It is certainly not zero.  Check for a
+             * non-zero fractional digit:
+             */
+            if (first <= 5)
+            {
+               unsigned int i;
+               *ascii++ = 46; /* decimal point */
+               /* ndigits may be <5 for small numbers, output leading zeros
+                * then ndigits digits to first:
+                */
+               i = 5;
+               while (ndigits < i)
+               {
+                  *ascii++ = 48; --i;
+               }
+               while (ndigits >= first) *ascii++ = digits[--ndigits];
+               /* Don't output the trailing zeros! */
+            }
+         }
+         else
+            *ascii++ = 48;
+
+         /* And null terminate the string: */
+         *ascii = 0;
+         return;
+      }
+   }
+
+   /* Here on buffer too small. */
+   png_error(png_ptr, "ASCII conversion buffer too small");
+}
+#   endif /* FIXED_POINT */
+#endif /* SCAL */
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
+   !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \
+   (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \
+   defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \
+   (defined(PNG_sCAL_SUPPORTED) && \
+   defined(PNG_FLOATING_ARITHMETIC_SUPPORTED))
+png_fixed_point
+png_fixed(png_const_structrp png_ptr, double fp, png_const_charp text)
+{
+   double r = floor(100000 * fp + .5);
+
+   if (r > 2147483647. || r < -2147483648.)
+      png_fixed_error(png_ptr, text);
+
+#  ifndef PNG_ERROR_TEXT_SUPPORTED
+   PNG_UNUSED(text)
+#  endif
+
+   return (png_fixed_point)r;
+}
+#endif
+
+#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_COLORSPACE_SUPPORTED) ||\
+    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED)
+/* muldiv functions */
+/* This API takes signed arguments and rounds the result to the nearest
+ * integer (or, for a fixed point number - the standard argument - to
+ * the nearest .00001).  Overflow and divide by zero are signalled in
+ * the result, a boolean - true on success, false on overflow.
+ */
+int
+png_muldiv(png_fixed_point_p res, png_fixed_point a, png_int_32 times,
+    png_int_32 divisor)
+{
+   /* Return a * times / divisor, rounded. */
+   if (divisor != 0)
+   {
+      if (a == 0 || times == 0)
+      {
+         *res = 0;
+         return 1;
+      }
+      else
+      {
+#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+         double r = a;
+         r *= times;
+         r /= divisor;
+         r = floor(r+.5);
+
+         /* A png_fixed_point is a 32-bit integer. */
+         if (r <= 2147483647. && r >= -2147483648.)
+         {
+            *res = (png_fixed_point)r;
+            return 1;
+         }
+#else
+         int negative = 0;
+         png_uint_32 A, T, D;
+         png_uint_32 s16, s32, s00;
+
+         if (a < 0)
+            negative = 1, A = -a;
+         else
+            A = a;
+
+         if (times < 0)
+            negative = !negative, T = -times;
+         else
+            T = times;
+
+         if (divisor < 0)
+            negative = !negative, D = -divisor;
+         else
+            D = divisor;
+
+         /* Following can't overflow because the arguments only
+          * have 31 bits each, however the result may be 32 bits.
+          */
+         s16 = (A >> 16) * (T & 0xffff) +
+                           (A & 0xffff) * (T >> 16);
+         /* Can't overflow because the a*times bit is only 30
+          * bits at most.
+          */
+         s32 = (A >> 16) * (T >> 16) + (s16 >> 16);
+         s00 = (A & 0xffff) * (T & 0xffff);
+
+         s16 = (s16 & 0xffff) << 16;
+         s00 += s16;
+
+         if (s00 < s16)
+            ++s32; /* carry */
+
+         if (s32 < D) /* else overflow */
+         {
+            /* s32.s00 is now the 64-bit product, do a standard
+             * division, we know that s32 < D, so the maximum
+             * required shift is 31.
+             */
+            int bitshift = 32;
+            png_fixed_point result = 0; /* NOTE: signed */
+
+            while (--bitshift >= 0)
+            {
+               png_uint_32 d32, d00;
+
+               if (bitshift > 0)
+                  d32 = D >> (32-bitshift), d00 = D << bitshift;
+
+               else
+                  d32 = 0, d00 = D;
+
+               if (s32 > d32)
+               {
+                  if (s00 < d00) --s32; /* carry */
+                  s32 -= d32, s00 -= d00, result += 1<<bitshift;
+               }
+
+               else
+                  if (s32 == d32 && s00 >= d00)
+                     s32 = 0, s00 -= d00, result += 1<<bitshift;
+            }
+
+            /* Handle the rounding. */
+            if (s00 >= (D >> 1))
+               ++result;
+
+            if (negative != 0)
+               result = -result;
+
+            /* Check for overflow. */
+            if ((negative != 0 && result <= 0) ||
+                (negative == 0 && result >= 0))
+            {
+               *res = result;
+               return 1;
+            }
+         }
+#endif
+      }
+   }
+
+   return 0;
+}
+#endif /* READ_GAMMA || INCH_CONVERSIONS */
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
+/* The following is for when the caller doesn't much care about the
+ * result.
+ */
+png_fixed_point
+png_muldiv_warn(png_const_structrp png_ptr, png_fixed_point a, png_int_32 times,
+    png_int_32 divisor)
+{
+   png_fixed_point result;
+
+   if (png_muldiv(&result, a, times, divisor) != 0)
+      return result;
+
+   png_warning(png_ptr, "fixed point overflow ignored");
+   return 0;
+}
+#endif
+
+#ifdef PNG_GAMMA_SUPPORTED /* more fixed point functions for gamma */
+/* Calculate a reciprocal, return 0 on div-by-zero or overflow. */
+png_fixed_point
+png_reciprocal(png_fixed_point a)
+{
+#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+   double r = floor(1E10/a+.5);
+
+   if (r <= 2147483647. && r >= -2147483648.)
+      return (png_fixed_point)r;
+#else
+   png_fixed_point res;
+
+   if (png_muldiv(&res, 100000, 100000, a) != 0)
+      return res;
+#endif
+
+   return 0; /* error/overflow */
+}
+
+/* This is the shared test on whether a gamma value is 'significant' - whether
+ * it is worth doing gamma correction.
+ */
+int /* PRIVATE */
+png_gamma_significant(png_fixed_point gamma_val)
+{
+   return gamma_val < PNG_FP_1 - PNG_GAMMA_THRESHOLD_FIXED ||
+       gamma_val > PNG_FP_1 + PNG_GAMMA_THRESHOLD_FIXED;
+}
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+#ifdef PNG_16BIT_SUPPORTED
+/* A local convenience routine. */
+static png_fixed_point
+png_product2(png_fixed_point a, png_fixed_point b)
+{
+   /* The required result is 1/a * 1/b; the following preserves accuracy. */
+#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+   double r = a * 1E-5;
+   r *= b;
+   r = floor(r+.5);
+
+   if (r <= 2147483647. && r >= -2147483648.)
+      return (png_fixed_point)r;
+#else
+   png_fixed_point res;
+
+   if (png_muldiv(&res, a, b, 100000) != 0)
+      return res;
+#endif
+
+   return 0; /* overflow */
+}
+#endif /* 16BIT */
+
+/* The inverse of the above. */
+png_fixed_point
+png_reciprocal2(png_fixed_point a, png_fixed_point b)
+{
+   /* The required result is 1/a * 1/b; the following preserves accuracy. */
+#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+   if (a != 0 && b != 0)
+   {
+      double r = 1E15/a;
+      r /= b;
+      r = floor(r+.5);
+
+      if (r <= 2147483647. && r >= -2147483648.)
+         return (png_fixed_point)r;
+   }
+#else
+   /* This may overflow because the range of png_fixed_point isn't symmetric,
+    * but this API is only used for the product of file and screen gamma so it
+    * doesn't matter that the smallest number it can produce is 1/21474, not
+    * 1/100000
+    */
+   png_fixed_point res = png_product2(a, b);
+
+   if (res != 0)
+      return png_reciprocal(res);
+#endif
+
+   return 0; /* overflow */
+}
+#endif /* READ_GAMMA */
+
+#ifdef PNG_READ_GAMMA_SUPPORTED /* gamma table code */
+#ifndef PNG_FLOATING_ARITHMETIC_SUPPORTED
+/* Fixed point gamma.
+ *
+ * The code to calculate the tables used below can be found in the shell script
+ * contrib/tools/intgamma.sh
+ *
+ * To calculate gamma this code implements fast log() and exp() calls using only
+ * fixed point arithmetic.  This code has sufficient precision for either 8-bit
+ * or 16-bit sample values.
+ *
+ * The tables used here were calculated using simple 'bc' programs, but C double
+ * precision floating point arithmetic would work fine.
+ *
+ * 8-bit log table
+ *   This is a table of -log(value/255)/log(2) for 'value' in the range 128 to
+ *   255, so it's the base 2 logarithm of a normalized 8-bit floating point
+ *   mantissa.  The numbers are 32-bit fractions.
+ */
+static const png_uint_32
+png_8bit_l2[128] =
+{
+   4270715492U, 4222494797U, 4174646467U, 4127164793U, 4080044201U, 4033279239U,
+   3986864580U, 3940795015U, 3895065449U, 3849670902U, 3804606499U, 3759867474U,
+   3715449162U, 3671346997U, 3627556511U, 3584073329U, 3540893168U, 3498011834U,
+   3455425220U, 3413129301U, 3371120137U, 3329393864U, 3287946700U, 3246774933U,
+   3205874930U, 3165243125U, 3124876025U, 3084770202U, 3044922296U, 3005329011U,
+   2965987113U, 2926893432U, 2888044853U, 2849438323U, 2811070844U, 2772939474U,
+   2735041326U, 2697373562U, 2659933400U, 2622718104U, 2585724991U, 2548951424U,
+   2512394810U, 2476052606U, 2439922311U, 2404001468U, 2368287663U, 2332778523U,
+   2297471715U, 2262364947U, 2227455964U, 2192742551U, 2158222529U, 2123893754U,
+   2089754119U, 2055801552U, 2022034013U, 1988449497U, 1955046031U, 1921821672U,
+   1888774511U, 1855902668U, 1823204291U, 1790677560U, 1758320682U, 1726131893U,
+   1694109454U, 1662251657U, 1630556815U, 1599023271U, 1567649391U, 1536433567U,
+   1505374214U, 1474469770U, 1443718700U, 1413119487U, 1382670639U, 1352370686U,
+   1322218179U, 1292211689U, 1262349810U, 1232631153U, 1203054352U, 1173618059U,
+   1144320946U, 1115161701U, 1086139034U, 1057251672U, 1028498358U, 999877854U,
+   971388940U, 943030410U, 914801076U, 886699767U, 858725327U, 830876614U,
+   803152505U, 775551890U, 748073672U, 720716771U, 693480120U, 666362667U,
+   639363374U, 612481215U, 585715177U, 559064263U, 532527486U, 506103872U,
+   479792461U, 453592303U, 427502463U, 401522014U, 375650043U, 349885648U,
+   324227938U, 298676034U, 273229066U, 247886176U, 222646516U, 197509248U,
+   172473545U, 147538590U, 122703574U, 97967701U, 73330182U, 48790236U,
+   24347096U, 0U
+
+#if 0
+   /* The following are the values for 16-bit tables - these work fine for the
+    * 8-bit conversions but produce very slightly larger errors in the 16-bit
+    * log (about 1.2 as opposed to 0.7 absolute error in the final value).  To
+    * use these all the shifts below must be adjusted appropriately.
+    */
+   65166, 64430, 63700, 62976, 62257, 61543, 60835, 60132, 59434, 58741, 58054,
+   57371, 56693, 56020, 55352, 54689, 54030, 53375, 52726, 52080, 51439, 50803,
+   50170, 49542, 48918, 48298, 47682, 47070, 46462, 45858, 45257, 44661, 44068,
+   43479, 42894, 42312, 41733, 41159, 40587, 40020, 39455, 38894, 38336, 37782,
+   37230, 36682, 36137, 35595, 35057, 34521, 33988, 33459, 32932, 32408, 31887,
+   31369, 30854, 30341, 29832, 29325, 28820, 28319, 27820, 27324, 26830, 26339,
+   25850, 25364, 24880, 24399, 23920, 23444, 22970, 22499, 22029, 21562, 21098,
+   20636, 20175, 19718, 19262, 18808, 18357, 17908, 17461, 17016, 16573, 16132,
+   15694, 15257, 14822, 14390, 13959, 13530, 13103, 12678, 12255, 11834, 11415,
+   10997, 10582, 10168, 9756, 9346, 8937, 8531, 8126, 7723, 7321, 6921, 6523,
+   6127, 5732, 5339, 4947, 4557, 4169, 3782, 3397, 3014, 2632, 2251, 1872, 1495,
+   1119, 744, 372
+#endif
+};
+
+static png_int_32
+png_log8bit(unsigned int x)
+{
+   unsigned int lg2 = 0;
+   /* Each time 'x' is multiplied by 2, 1 must be subtracted off the final log,
+    * because the log is actually negate that means adding 1.  The final
+    * returned value thus has the range 0 (for 255 input) to 7.994 (for 1
+    * input), return -1 for the overflow (log 0) case, - so the result is
+    * always at most 19 bits.
+    */
+   if ((x &= 0xff) == 0)
+      return -1;
+
+   if ((x & 0xf0) == 0)
+      lg2  = 4, x <<= 4;
+
+   if ((x & 0xc0) == 0)
+      lg2 += 2, x <<= 2;
+
+   if ((x & 0x80) == 0)
+      lg2 += 1, x <<= 1;
+
+   /* result is at most 19 bits, so this cast is safe: */
+   return (png_int_32)((lg2 << 16) + ((png_8bit_l2[x-128]+32768)>>16));
+}
+
+/* The above gives exact (to 16 binary places) log2 values for 8-bit images,
+ * for 16-bit images we use the most significant 8 bits of the 16-bit value to
+ * get an approximation then multiply the approximation by a correction factor
+ * determined by the remaining up to 8 bits.  This requires an additional step
+ * in the 16-bit case.
+ *
+ * We want log2(value/65535), we have log2(v'/255), where:
+ *
+ *    value = v' * 256 + v''
+ *          = v' * f
+ *
+ * So f is value/v', which is equal to (256+v''/v') since v' is in the range 128
+ * to 255 and v'' is in the range 0 to 255 f will be in the range 256 to less
+ * than 258.  The final factor also needs to correct for the fact that our 8-bit
+ * value is scaled by 255, whereas the 16-bit values must be scaled by 65535.
+ *
+ * This gives a final formula using a calculated value 'x' which is value/v' and
+ * scaling by 65536 to match the above table:
+ *
+ *   log2(x/257) * 65536
+ *
+ * Since these numbers are so close to '1' we can use simple linear
+ * interpolation between the two end values 256/257 (result -368.61) and 258/257
+ * (result 367.179).  The values used below are scaled by a further 64 to give
+ * 16-bit precision in the interpolation:
+ *
+ * Start (256): -23591
+ * Zero  (257):      0
+ * End   (258):  23499
+ */
+#ifdef PNG_16BIT_SUPPORTED
+static png_int_32
+png_log16bit(png_uint_32 x)
+{
+   unsigned int lg2 = 0;
+
+   /* As above, but now the input has 16 bits. */
+   if ((x &= 0xffff) == 0)
+      return -1;
+
+   if ((x & 0xff00) == 0)
+      lg2  = 8, x <<= 8;
+
+   if ((x & 0xf000) == 0)
+      lg2 += 4, x <<= 4;
+
+   if ((x & 0xc000) == 0)
+      lg2 += 2, x <<= 2;
+
+   if ((x & 0x8000) == 0)
+      lg2 += 1, x <<= 1;
+
+   /* Calculate the base logarithm from the top 8 bits as a 28-bit fractional
+    * value.
+    */
+   lg2 <<= 28;
+   lg2 += (png_8bit_l2[(x>>8)-128]+8) >> 4;
+
+   /* Now we need to interpolate the factor, this requires a division by the top
+    * 8 bits.  Do this with maximum precision.
+    */
+   x = ((x << 16) + (x >> 9)) / (x >> 8);
+
+   /* Since we divided by the top 8 bits of 'x' there will be a '1' at 1<<24,
+    * the value at 1<<16 (ignoring this) will be 0 or 1; this gives us exactly
+    * 16 bits to interpolate to get the low bits of the result.  Round the
+    * answer.  Note that the end point values are scaled by 64 to retain overall
+    * precision and that 'lg2' is current scaled by an extra 12 bits, so adjust
+    * the overall scaling by 6-12.  Round at every step.
+    */
+   x -= 1U << 24;
+
+   if (x <= 65536U) /* <= '257' */
+      lg2 += ((23591U * (65536U-x)) + (1U << (16+6-12-1))) >> (16+6-12);
+
+   else
+      lg2 -= ((23499U * (x-65536U)) + (1U << (16+6-12-1))) >> (16+6-12);
+
+   /* Safe, because the result can't have more than 20 bits: */
+   return (png_int_32)((lg2 + 2048) >> 12);
+}
+#endif /* 16BIT */
+
+/* The 'exp()' case must invert the above, taking a 20-bit fixed point
+ * logarithmic value and returning a 16 or 8-bit number as appropriate.  In
+ * each case only the low 16 bits are relevant - the fraction - since the
+ * integer bits (the top 4) simply determine a shift.
+ *
+ * The worst case is the 16-bit distinction between 65535 and 65534. This
+ * requires perhaps spurious accuracy in the decoding of the logarithm to
+ * distinguish log2(65535/65534.5) - 10^-5 or 17 bits.  There is little chance
+ * of getting this accuracy in practice.
+ *
+ * To deal with this the following exp() function works out the exponent of the
+ * fractional part of the logarithm by using an accurate 32-bit value from the
+ * top four fractional bits then multiplying in the remaining bits.
+ */
+static const png_uint_32
+png_32bit_exp[16] =
+{
+   /* NOTE: the first entry is deliberately set to the maximum 32-bit value. */
+   4294967295U, 4112874773U, 3938502376U, 3771522796U, 3611622603U, 3458501653U,
+   3311872529U, 3171459999U, 3037000500U, 2908241642U, 2784941738U, 2666869345U,
+   2553802834U, 2445529972U, 2341847524U, 2242560872U
+};
+
+/* Adjustment table; provided to explain the numbers in the code below. */
+#if 0
+for (i=11;i>=0;--i){ print i, " ", (1 - e(-(2^i)/65536*l(2))) * 2^(32-i), "\n"}
+   11 44937.64284865548751208448
+   10 45180.98734845585101160448
+    9 45303.31936980687359311872
+    8 45364.65110595323018870784
+    7 45395.35850361789624614912
+    6 45410.72259715102037508096
+    5 45418.40724413220722311168
+    4 45422.25021786898173001728
+    3 45424.17186732298419044352
+    2 45425.13273269940811464704
+    1 45425.61317555035558641664
+    0 45425.85339951654943850496
+#endif
+
+static png_uint_32
+png_exp(png_fixed_point x)
+{
+   if (x > 0 && x <= 0xfffff) /* Else overflow or zero (underflow) */
+   {
+      /* Obtain a 4-bit approximation */
+      png_uint_32 e = png_32bit_exp[(x >> 12) & 0x0f];
+
+      /* Incorporate the low 12 bits - these decrease the returned value by
+       * multiplying by a number less than 1 if the bit is set.  The multiplier
+       * is determined by the above table and the shift. Notice that the values
+       * converge on 45426 and this is used to allow linear interpolation of the
+       * low bits.
+       */
+      if (x & 0x800)
+         e -= (((e >> 16) * 44938U) +  16U) >> 5;
+
+      if (x & 0x400)
+         e -= (((e >> 16) * 45181U) +  32U) >> 6;
+
+      if (x & 0x200)
+         e -= (((e >> 16) * 45303U) +  64U) >> 7;
+
+      if (x & 0x100)
+         e -= (((e >> 16) * 45365U) + 128U) >> 8;
+
+      if (x & 0x080)
+         e -= (((e >> 16) * 45395U) + 256U) >> 9;
+
+      if (x & 0x040)
+         e -= (((e >> 16) * 45410U) + 512U) >> 10;
+
+      /* And handle the low 6 bits in a single block. */
+      e -= (((e >> 16) * 355U * (x & 0x3fU)) + 256U) >> 9;
+
+      /* Handle the upper bits of x. */
+      e >>= x >> 16;
+      return e;
+   }
+
+   /* Check for overflow */
+   if (x <= 0)
+      return png_32bit_exp[0];
+
+   /* Else underflow */
+   return 0;
+}
+
+static png_byte
+png_exp8bit(png_fixed_point lg2)
+{
+   /* Get a 32-bit value: */
+   png_uint_32 x = png_exp(lg2);
+
+   /* Convert the 32-bit value to 0..255 by multiplying by 256-1. Note that the
+    * second, rounding, step can't overflow because of the first, subtraction,
+    * step.
+    */
+   x -= x >> 8;
+   return (png_byte)(((x + 0x7fffffU) >> 24) & 0xff);
+}
+
+#ifdef PNG_16BIT_SUPPORTED
+static png_uint_16
+png_exp16bit(png_fixed_point lg2)
+{
+   /* Get a 32-bit value: */
+   png_uint_32 x = png_exp(lg2);
+
+   /* Convert the 32-bit value to 0..65535 by multiplying by 65536-1: */
+   x -= x >> 16;
+   return (png_uint_16)((x + 32767U) >> 16);
+}
+#endif /* 16BIT */
+#endif /* FLOATING_ARITHMETIC */
+
+png_byte
+png_gamma_8bit_correct(unsigned int value, png_fixed_point gamma_val)
+{
+   if (value > 0 && value < 255)
+   {
+#     ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+         /* 'value' is unsigned, ANSI-C90 requires the compiler to correctly
+          * convert this to a floating point value.  This includes values that
+          * would overflow if 'value' were to be converted to 'int'.
+          *
+          * Apparently GCC, however, does an intermediate conversion to (int)
+          * on some (ARM) but not all (x86) platforms, possibly because of
+          * hardware FP limitations.  (E.g. if the hardware conversion always
+          * assumes the integer register contains a signed value.)  This results
+          * in ANSI-C undefined behavior for large values.
+          *
+          * Other implementations on the same machine might actually be ANSI-C90
+          * conformant and therefore compile spurious extra code for the large
+          * values.
+          *
+          * We can be reasonably sure that an unsigned to float conversion
+          * won't be faster than an int to float one.  Therefore this code
+          * assumes responsibility for the undefined behavior, which it knows
+          * can't happen because of the check above.
+          *
+          * Note the argument to this routine is an (unsigned int) because, on
+          * 16-bit platforms, it is assigned a value which might be out of
+          * range for an (int); that would result in undefined behavior in the
+          * caller if the *argument* ('value') were to be declared (int).
+          */
+         double r = floor(255*pow((int)/*SAFE*/value/255.,gamma_val*.00001)+.5);
+         return (png_byte)r;
+#     else
+         png_int_32 lg2 = png_log8bit(value);
+         png_fixed_point res;
+
+         if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0)
+            return png_exp8bit(res);
+
+         /* Overflow. */
+         value = 0;
+#     endif
+   }
+
+   return (png_byte)(value & 0xff);
+}
+
+#ifdef PNG_16BIT_SUPPORTED
+png_uint_16
+png_gamma_16bit_correct(unsigned int value, png_fixed_point gamma_val)
+{
+   if (value > 0 && value < 65535)
+   {
+# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+      /* The same (unsigned int)->(double) constraints apply here as above,
+       * however in this case the (unsigned int) to (int) conversion can
+       * overflow on an ANSI-C90 compliant system so the cast needs to ensure
+       * that this is not possible.
+       */
+      double r = floor(65535*pow((png_int_32)value/65535.,
+          gamma_val*.00001)+.5);
+      return (png_uint_16)r;
+# else
+      png_int_32 lg2 = png_log16bit(value);
+      png_fixed_point res;
+
+      if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0)
+         return png_exp16bit(res);
+
+      /* Overflow. */
+      value = 0;
+# endif
+   }
+
+   return (png_uint_16)value;
+}
+#endif /* 16BIT */
+
+/* This does the right thing based on the bit_depth field of the
+ * png_struct, interpreting values as 8-bit or 16-bit.  While the result
+ * is nominally a 16-bit value if bit depth is 8 then the result is
+ * 8-bit (as are the arguments.)
+ */
+png_uint_16 /* PRIVATE */
+png_gamma_correct(png_structrp png_ptr, unsigned int value,
+    png_fixed_point gamma_val)
+{
+   if (png_ptr->bit_depth == 8)
+      return png_gamma_8bit_correct(value, gamma_val);
+
+#ifdef PNG_16BIT_SUPPORTED
+   else
+      return png_gamma_16bit_correct(value, gamma_val);
+#else
+      /* should not reach this */
+      return 0;
+#endif /* 16BIT */
+}
+
+#ifdef PNG_16BIT_SUPPORTED
+/* Internal function to build a single 16-bit table - the table consists of
+ * 'num' 256 entry subtables, where 'num' is determined by 'shift' - the amount
+ * to shift the input values right (or 16-number_of_signifiant_bits).
+ *
+ * The caller is responsible for ensuring that the table gets cleaned up on
+ * png_error (i.e. if one of the mallocs below fails) - i.e. the *table argument
+ * should be somewhere that will be cleaned.
+ */
+static void
+png_build_16bit_table(png_structrp png_ptr, png_uint_16pp *ptable,
+    unsigned int shift, png_fixed_point gamma_val)
+{
+   /* Various values derived from 'shift': */
+   unsigned int num = 1U << (8U - shift);
+#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+   /* CSE the division and work round wacky GCC warnings (see the comments
+    * in png_gamma_8bit_correct for where these come from.)
+    */
+   double fmax = 1.0 / (((png_int_32)1 << (16U - shift)) - 1);
+#endif
+   unsigned int max = (1U << (16U - shift)) - 1U;
+   unsigned int max_by_2 = 1U << (15U - shift);
+   unsigned int i;
+
+   png_uint_16pp table = *ptable =
+       (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p)));
+
+   for (i = 0; i < num; i++)
+   {
+      png_uint_16p sub_table = table[i] =
+          (png_uint_16p)png_malloc(png_ptr, 256 * (sizeof (png_uint_16)));
+
+      /* The 'threshold' test is repeated here because it can arise for one of
+       * the 16-bit tables even if the others don't hit it.
+       */
+      if (png_gamma_significant(gamma_val) != 0)
+      {
+         /* The old code would overflow at the end and this would cause the
+          * 'pow' function to return a result >1, resulting in an
+          * arithmetic error.  This code follows the spec exactly; ig is
+          * the recovered input sample, it always has 8-16 bits.
+          *
+          * We want input * 65535/max, rounded, the arithmetic fits in 32
+          * bits (unsigned) so long as max <= 32767.
+          */
+         unsigned int j;
+         for (j = 0; j < 256; j++)
+         {
+            png_uint_32 ig = (j << (8-shift)) + i;
+#           ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
+               /* Inline the 'max' scaling operation: */
+               /* See png_gamma_8bit_correct for why the cast to (int) is
+                * required here.
+                */
+               double d = floor(65535.*pow(ig*fmax, gamma_val*.00001)+.5);
+               sub_table[j] = (png_uint_16)d;
+#           else
+               if (shift != 0)
+                  ig = (ig * 65535U + max_by_2)/max;
+
+               sub_table[j] = png_gamma_16bit_correct(ig, gamma_val);
+#           endif
+         }
+      }
+      else
+      {
+         /* We must still build a table, but do it the fast way. */
+         unsigned int j;
+
+         for (j = 0; j < 256; j++)
+         {
+            png_uint_32 ig = (j << (8-shift)) + i;
+
+            if (shift != 0)
+               ig = (ig * 65535U + max_by_2)/max;
+
+            sub_table[j] = (png_uint_16)ig;
+         }
+      }
+   }
+}
+
+/* NOTE: this function expects the *inverse* of the overall gamma transformation
+ * required.
+ */
+static void
+png_build_16to8_table(png_structrp png_ptr, png_uint_16pp *ptable,
+    unsigned int shift, png_fixed_point gamma_val)
+{
+   unsigned int num = 1U << (8U - shift);
+   unsigned int max = (1U << (16U - shift))-1U;
+   unsigned int i;
+   png_uint_32 last;
+
+   png_uint_16pp table = *ptable =
+       (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p)));
+
+   /* 'num' is the number of tables and also the number of low bits of low
+    * bits of the input 16-bit value used to select a table.  Each table is
+    * itself indexed by the high 8 bits of the value.
+    */
+   for (i = 0; i < num; i++)
+      table[i] = (png_uint_16p)png_malloc(png_ptr,
+          256 * (sizeof (png_uint_16)));
+
+   /* 'gamma_val' is set to the reciprocal of the value calculated above, so
+    * pow(out,g) is an *input* value.  'last' is the last input value set.
+    *
+    * In the loop 'i' is used to find output values.  Since the output is
+    * 8-bit there are only 256 possible values.  The tables are set up to
+    * select the closest possible output value for each input by finding
+    * the input value at the boundary between each pair of output values
+    * and filling the table up to that boundary with the lower output
+    * value.
+    *
+    * The boundary values are 0.5,1.5..253.5,254.5.  Since these are 9-bit
+    * values the code below uses a 16-bit value in i; the values start at
+    * 128.5 (for 0.5) and step by 257, for a total of 254 values (the last
+    * entries are filled with 255).  Start i at 128 and fill all 'last'
+    * table entries <= 'max'
+    */
+   last = 0;
+   for (i = 0; i < 255; ++i) /* 8-bit output value */
+   {
+      /* Find the corresponding maximum input value */
+      png_uint_16 out = (png_uint_16)(i * 257U); /* 16-bit output value */
+
+      /* Find the boundary value in 16 bits: */
+      png_uint_32 bound = png_gamma_16bit_correct(out+128U, gamma_val);
+
+      /* Adjust (round) to (16-shift) bits: */
+      bound = (bound * max + 32768U)/65535U + 1U;
+
+      while (last < bound)
+      {
+         table[last & (0xffU >> shift)][last >> (8U - shift)] = out;
+         last++;
+      }
+   }
+
+   /* And fill in the final entries. */
+   while (last < (num << 8))
+   {
+      table[last & (0xff >> shift)][last >> (8U - shift)] = 65535U;
+      last++;
+   }
+}
+#endif /* 16BIT */
+
+/* Build a single 8-bit table: same as the 16-bit case but much simpler (and
+ * typically much faster).  Note that libpng currently does no sBIT processing
+ * (apparently contrary to the spec) so a 256-entry table is always generated.
+ */
+static void
+png_build_8bit_table(png_structrp png_ptr, png_bytepp ptable,
+    png_fixed_point gamma_val)
+{
+   unsigned int i;
+   png_bytep table = *ptable = (png_bytep)png_malloc(png_ptr, 256);
+
+   if (png_gamma_significant(gamma_val) != 0)
+      for (i=0; i<256; i++)
+         table[i] = png_gamma_8bit_correct(i, gamma_val);
+
+   else
+      for (i=0; i<256; ++i)
+         table[i] = (png_byte)(i & 0xff);
+}
+
+/* Used from png_read_destroy and below to release the memory used by the gamma
+ * tables.
+ */
+void /* PRIVATE */
+png_destroy_gamma_table(png_structrp png_ptr)
+{
+   png_free(png_ptr, png_ptr->gamma_table);
+   png_ptr->gamma_table = NULL;
+
+#ifdef PNG_16BIT_SUPPORTED
+   if (png_ptr->gamma_16_table != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_table[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_table);
+   png_ptr->gamma_16_table = NULL;
+   }
+#endif /* 16BIT */
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   png_free(png_ptr, png_ptr->gamma_from_1);
+   png_ptr->gamma_from_1 = NULL;
+   png_free(png_ptr, png_ptr->gamma_to_1);
+   png_ptr->gamma_to_1 = NULL;
+
+#ifdef PNG_16BIT_SUPPORTED
+   if (png_ptr->gamma_16_from_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_from_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_from_1);
+   png_ptr->gamma_16_from_1 = NULL;
+   }
+   if (png_ptr->gamma_16_to_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_to_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_to_1);
+   png_ptr->gamma_16_to_1 = NULL;
+   }
+#endif /* 16BIT */
+#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
+}
+
+/* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
+ * tables, we don't make a full table if we are reducing to 8-bit in
+ * the future.  Note also how the gamma_16 tables are segmented so that
+ * we don't need to allocate > 64K chunks for a full 16-bit table.
+ */
+void /* PRIVATE */
+png_build_gamma_table(png_structrp png_ptr, int bit_depth)
+{
+   png_debug(1, "in png_build_gamma_table");
+
+   /* Remove any existing table; this copes with multiple calls to
+    * png_read_update_info. The warning is because building the gamma tables
+    * multiple times is a performance hit - it's harmless but the ability to
+    * call png_read_update_info() multiple times is new in 1.5.6 so it seems
+    * sensible to warn if the app introduces such a hit.
+    */
+   if (png_ptr->gamma_table != NULL || png_ptr->gamma_16_table != NULL)
+   {
+      png_warning(png_ptr, "gamma table being rebuilt");
+      png_destroy_gamma_table(png_ptr);
+   }
+
+   if (bit_depth <= 8)
+   {
+      png_build_8bit_table(png_ptr, &png_ptr->gamma_table,
+          png_ptr->screen_gamma > 0 ?
+          png_reciprocal2(png_ptr->colorspace.gamma,
+          png_ptr->screen_gamma) : PNG_FP_1);
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+      if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0)
+      {
+         png_build_8bit_table(png_ptr, &png_ptr->gamma_to_1,
+             png_reciprocal(png_ptr->colorspace.gamma));
+
+         png_build_8bit_table(png_ptr, &png_ptr->gamma_from_1,
+             png_ptr->screen_gamma > 0 ?
+             png_reciprocal(png_ptr->screen_gamma) :
+             png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */);
+      }
+#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
+   }
+#ifdef PNG_16BIT_SUPPORTED
+   else
+   {
+      png_byte shift, sig_bit;
+
+      if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+      {
+         sig_bit = png_ptr->sig_bit.red;
+
+         if (png_ptr->sig_bit.green > sig_bit)
+            sig_bit = png_ptr->sig_bit.green;
+
+         if (png_ptr->sig_bit.blue > sig_bit)
+            sig_bit = png_ptr->sig_bit.blue;
+      }
+      else
+         sig_bit = png_ptr->sig_bit.gray;
+
+      /* 16-bit gamma code uses this equation:
+       *
+       *   ov = table[(iv & 0xff) >> gamma_shift][iv >> 8]
+       *
+       * Where 'iv' is the input color value and 'ov' is the output value -
+       * pow(iv, gamma).
+       *
+       * Thus the gamma table consists of up to 256 256-entry tables.  The table
+       * is selected by the (8-gamma_shift) most significant of the low 8 bits
+       * of the color value then indexed by the upper 8 bits:
+       *
+       *   table[low bits][high 8 bits]
+       *
+       * So the table 'n' corresponds to all those 'iv' of:
+       *
+       *   <all high 8-bit values><n << gamma_shift>..<(n+1 << gamma_shift)-1>
+       *
+       */
+      if (sig_bit > 0 && sig_bit < 16U)
+         /* shift == insignificant bits */
+         shift = (png_byte)((16U - sig_bit) & 0xff);
+
+      else
+         shift = 0; /* keep all 16 bits */
+
+      if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0)
+      {
+         /* PNG_MAX_GAMMA_8 is the number of bits to keep - effectively
+          * the significant bits in the *input* when the output will
+          * eventually be 8 bits.  By default it is 11.
+          */
+         if (shift < (16U - PNG_MAX_GAMMA_8))
+            shift = (16U - PNG_MAX_GAMMA_8);
+      }
+
+      if (shift > 8U)
+         shift = 8U; /* Guarantees at least one table! */
+
+      png_ptr->gamma_shift = shift;
+
+      /* NOTE: prior to 1.5.4 this test used to include PNG_BACKGROUND (now
+       * PNG_COMPOSE).  This effectively smashed the background calculation for
+       * 16-bit output because the 8-bit table assumes the result will be
+       * reduced to 8 bits.
+       */
+      if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0)
+          png_build_16to8_table(png_ptr, &png_ptr->gamma_16_table, shift,
+          png_ptr->screen_gamma > 0 ? png_product2(png_ptr->colorspace.gamma,
+          png_ptr->screen_gamma) : PNG_FP_1);
+
+      else
+          png_build_16bit_table(png_ptr, &png_ptr->gamma_16_table, shift,
+          png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->colorspace.gamma,
+          png_ptr->screen_gamma) : PNG_FP_1);
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+      if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0)
+      {
+         png_build_16bit_table(png_ptr, &png_ptr->gamma_16_to_1, shift,
+             png_reciprocal(png_ptr->colorspace.gamma));
+
+         /* Notice that the '16 from 1' table should be full precision, however
+          * the lookup on this table still uses gamma_shift, so it can't be.
+          * TODO: fix this.
+          */
+         png_build_16bit_table(png_ptr, &png_ptr->gamma_16_from_1, shift,
+             png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) :
+             png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */);
+      }
+#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
+   }
+#endif /* 16BIT */
+}
+#endif /* READ_GAMMA */
+
+/* HARDWARE OR SOFTWARE OPTION SUPPORT */
+#ifdef PNG_SET_OPTION_SUPPORTED
+int PNGAPI
+png_set_option(png_structrp png_ptr, int option, int onoff)
+{
+   if (png_ptr != NULL && option >= 0 && option < PNG_OPTION_NEXT &&
+      (option & 1) == 0)
+   {
+      png_uint_32 mask = 3U << option;
+      png_uint_32 setting = (2U + (onoff != 0)) << option;
+      png_uint_32 current = png_ptr->options;
+
+      png_ptr->options = (png_uint_32)((current & ~mask) | setting);
+
+      return (int)(current & mask) >> option;
+   }
+
+   return PNG_OPTION_INVALID;
+}
+#endif
+
+/* sRGB support */
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
+   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+/* sRGB conversion tables; these are machine generated with the code in
+ * contrib/tools/makesRGB.c.  The actual sRGB transfer curve defined in the
+ * specification (see the article at https://en.wikipedia.org/wiki/SRGB)
+ * is used, not the gamma=1/2.2 approximation use elsewhere in libpng.
+ * The sRGB to linear table is exact (to the nearest 16-bit linear fraction).
+ * The inverse (linear to sRGB) table has accuracies as follows:
+ *
+ * For all possible (255*65535+1) input values:
+ *
+ *    error: -0.515566 - 0.625971, 79441 (0.475369%) of readings inexact
+ *
+ * For the input values corresponding to the 65536 16-bit values:
+ *
+ *    error: -0.513727 - 0.607759, 308 (0.469978%) of readings inexact
+ *
+ * In all cases the inexact readings are only off by one.
+ */
+
+#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
+/* The convert-to-sRGB table is only currently required for read. */
+const png_uint_16 png_sRGB_table[256] =
+{
+   0,20,40,60,80,99,119,139,
+   159,179,199,219,241,264,288,313,
+   340,367,396,427,458,491,526,562,
+   599,637,677,718,761,805,851,898,
+   947,997,1048,1101,1156,1212,1270,1330,
+   1391,1453,1517,1583,1651,1720,1790,1863,
+   1937,2013,2090,2170,2250,2333,2418,2504,
+   2592,2681,2773,2866,2961,3058,3157,3258,
+   3360,3464,3570,3678,3788,3900,4014,4129,
+   4247,4366,4488,4611,4736,4864,4993,5124,
+   5257,5392,5530,5669,5810,5953,6099,6246,
+   6395,6547,6700,6856,7014,7174,7335,7500,
+   7666,7834,8004,8177,8352,8528,8708,8889,
+   9072,9258,9445,9635,9828,10022,10219,10417,
+   10619,10822,11028,11235,11446,11658,11873,12090,
+   12309,12530,12754,12980,13209,13440,13673,13909,
+   14146,14387,14629,14874,15122,15371,15623,15878,
+   16135,16394,16656,16920,17187,17456,17727,18001,
+   18277,18556,18837,19121,19407,19696,19987,20281,
+   20577,20876,21177,21481,21787,22096,22407,22721,
+   23038,23357,23678,24002,24329,24658,24990,25325,
+   25662,26001,26344,26688,27036,27386,27739,28094,
+   28452,28813,29176,29542,29911,30282,30656,31033,
+   31412,31794,32179,32567,32957,33350,33745,34143,
+   34544,34948,35355,35764,36176,36591,37008,37429,
+   37852,38278,38706,39138,39572,40009,40449,40891,
+   41337,41785,42236,42690,43147,43606,44069,44534,
+   45002,45473,45947,46423,46903,47385,47871,48359,
+   48850,49344,49841,50341,50844,51349,51858,52369,
+   52884,53401,53921,54445,54971,55500,56032,56567,
+   57105,57646,58190,58737,59287,59840,60396,60955,
+   61517,62082,62650,63221,63795,64372,64952,65535
+};
+#endif /* SIMPLIFIED_READ */
+
+/* The base/delta tables are required for both read and write (but currently
+ * only the simplified versions.)
+ */
+const png_uint_16 png_sRGB_base[512] =
+{
+   128,1782,3383,4644,5675,6564,7357,8074,
+   8732,9346,9921,10463,10977,11466,11935,12384,
+   12816,13233,13634,14024,14402,14769,15125,15473,
+   15812,16142,16466,16781,17090,17393,17690,17981,
+   18266,18546,18822,19093,19359,19621,19879,20133,
+   20383,20630,20873,21113,21349,21583,21813,22041,
+   22265,22487,22707,22923,23138,23350,23559,23767,
+   23972,24175,24376,24575,24772,24967,25160,25352,
+   25542,25730,25916,26101,26284,26465,26645,26823,
+   27000,27176,27350,27523,27695,27865,28034,28201,
+   28368,28533,28697,28860,29021,29182,29341,29500,
+   29657,29813,29969,30123,30276,30429,30580,30730,
+   30880,31028,31176,31323,31469,31614,31758,31902,
+   32045,32186,32327,32468,32607,32746,32884,33021,
+   33158,33294,33429,33564,33697,33831,33963,34095,
+   34226,34357,34486,34616,34744,34873,35000,35127,
+   35253,35379,35504,35629,35753,35876,35999,36122,
+   36244,36365,36486,36606,36726,36845,36964,37083,
+   37201,37318,37435,37551,37668,37783,37898,38013,
+   38127,38241,38354,38467,38580,38692,38803,38915,
+   39026,39136,39246,39356,39465,39574,39682,39790,
+   39898,40005,40112,40219,40325,40431,40537,40642,
+   40747,40851,40955,41059,41163,41266,41369,41471,
+   41573,41675,41777,41878,41979,42079,42179,42279,
+   42379,42478,42577,42676,42775,42873,42971,43068,
+   43165,43262,43359,43456,43552,43648,43743,43839,
+   43934,44028,44123,44217,44311,44405,44499,44592,
+   44685,44778,44870,44962,45054,45146,45238,45329,
+   45420,45511,45601,45692,45782,45872,45961,46051,
+   46140,46229,46318,46406,46494,46583,46670,46758,
+   46846,46933,47020,47107,47193,47280,47366,47452,
+   47538,47623,47709,47794,47879,47964,48048,48133,
+   48217,48301,48385,48468,48552,48635,48718,48801,
+   48884,48966,49048,49131,49213,49294,49376,49458,
+   49539,49620,49701,49782,49862,49943,50023,50103,
+   50183,50263,50342,50422,50501,50580,50659,50738,
+   50816,50895,50973,51051,51129,51207,51285,51362,
+   51439,51517,51594,51671,51747,51824,51900,51977,
+   52053,52129,52205,52280,52356,52432,52507,52582,
+   52657,52732,52807,52881,52956,53030,53104,53178,
+   53252,53326,53400,53473,53546,53620,53693,53766,
+   53839,53911,53984,54056,54129,54201,54273,54345,
+   54417,54489,54560,54632,54703,54774,54845,54916,
+   54987,55058,55129,55199,55269,55340,55410,55480,
+   55550,55620,55689,55759,55828,55898,55967,56036,
+   56105,56174,56243,56311,56380,56448,56517,56585,
+   56653,56721,56789,56857,56924,56992,57059,57127,
+   57194,57261,57328,57395,57462,57529,57595,57662,
+   57728,57795,57861,57927,57993,58059,58125,58191,
+   58256,58322,58387,58453,58518,58583,58648,58713,
+   58778,58843,58908,58972,59037,59101,59165,59230,
+   59294,59358,59422,59486,59549,59613,59677,59740,
+   59804,59867,59930,59993,60056,60119,60182,60245,
+   60308,60370,60433,60495,60558,60620,60682,60744,
+   60806,60868,60930,60992,61054,61115,61177,61238,
+   61300,61361,61422,61483,61544,61605,61666,61727,
+   61788,61848,61909,61969,62030,62090,62150,62211,
+   62271,62331,62391,62450,62510,62570,62630,62689,
+   62749,62808,62867,62927,62986,63045,63104,63163,
+   63222,63281,63340,63398,63457,63515,63574,63632,
+   63691,63749,63807,63865,63923,63981,64039,64097,
+   64155,64212,64270,64328,64385,64443,64500,64557,
+   64614,64672,64729,64786,64843,64900,64956,65013,
+   65070,65126,65183,65239,65296,65352,65409,65465
+};
+
+const png_byte png_sRGB_delta[512] =
+{
+   207,201,158,129,113,100,90,82,77,72,68,64,61,59,56,54,
+   52,50,49,47,46,45,43,42,41,40,39,39,38,37,36,36,
+   35,34,34,33,33,32,32,31,31,30,30,30,29,29,28,28,
+   28,27,27,27,27,26,26,26,25,25,25,25,24,24,24,24,
+   23,23,23,23,23,22,22,22,22,22,22,21,21,21,21,21,
+   21,20,20,20,20,20,20,20,20,19,19,19,19,19,19,19,
+   19,18,18,18,18,18,18,18,18,18,18,17,17,17,17,17,
+   17,17,17,17,17,17,16,16,16,16,16,16,16,16,16,16,
+   16,16,16,16,15,15,15,15,15,15,15,15,15,15,15,15,
+   15,15,15,15,14,14,14,14,14,14,14,14,14,14,14,14,
+   14,14,14,14,14,14,14,13,13,13,13,13,13,13,13,13,
+   13,13,13,13,13,13,13,13,13,13,13,13,13,13,12,12,
+   12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
+   12,12,12,12,12,12,12,12,12,12,12,12,11,11,11,11,
+   11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
+   11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
+   11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
+   10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
+   10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
+   10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+#endif /* SIMPLIFIED READ/WRITE sRGB support */
+
+/* SIMPLIFIED READ/WRITE SUPPORT */
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
+   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+static int
+png_image_free_function(png_voidp argument)
+{
+   png_imagep image = png_voidcast(png_imagep, argument);
+   png_controlp cp = image->opaque;
+   png_control c;
+
+   /* Double check that we have a png_ptr - it should be impossible to get here
+    * without one.
+    */
+   if (cp->png_ptr == NULL)
+      return 0;
+
+   /* First free any data held in the control structure. */
+#  ifdef PNG_STDIO_SUPPORTED
+      if (cp->owned_file != 0)
+      {
+         FILE *fp = png_voidcast(FILE*, cp->png_ptr->io_ptr);
+         cp->owned_file = 0;
+
+         /* Ignore errors here. */
+         if (fp != NULL)
+         {
+            cp->png_ptr->io_ptr = NULL;
+            (void)fclose(fp);
+         }
+      }
+#  endif
+
+   /* Copy the control structure so that the original, allocated, version can be
+    * safely freed.  Notice that a png_error here stops the remainder of the
+    * cleanup, but this is probably fine because that would indicate bad memory
+    * problems anyway.
+    */
+   c = *cp;
+   image->opaque = &c;
+   png_free(c.png_ptr, cp);
+
+   /* Then the structures, calling the correct API. */
+   if (c.for_write != 0)
+   {
+#     ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
+         png_destroy_write_struct(&c.png_ptr, &c.info_ptr);
+#     else
+         png_error(c.png_ptr, "simplified write not supported");
+#     endif
+   }
+   else
+   {
+#     ifdef PNG_SIMPLIFIED_READ_SUPPORTED
+         png_destroy_read_struct(&c.png_ptr, &c.info_ptr, NULL);
+#     else
+         png_error(c.png_ptr, "simplified read not supported");
+#     endif
+   }
+
+   /* Success. */
+   return 1;
+}
+
+void PNGAPI
+png_image_free(png_imagep image)
+{
+   /* Safely call the real function, but only if doing so is safe at this point
+    * (if not inside an error handling context).  Otherwise assume
+    * png_safe_execute will call this API after the return.
+    */
+   if (image != NULL && image->opaque != NULL &&
+      image->opaque->error_buf == NULL)
+   {
+      png_image_free_function(image);
+      image->opaque = NULL;
+   }
+}
+
+int /* PRIVATE */
+png_image_error(png_imagep image, png_const_charp error_message)
+{
+   /* Utility to log an error. */
+   png_safecat(image->message, (sizeof image->message), 0, error_message);
+   image->warning_or_error |= PNG_IMAGE_ERROR;
+   png_image_free(image);
+   return 0;
+}
+
+#endif /* SIMPLIFIED READ/WRITE */
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng/png.h b/reg-io/png/lpng/png.h
new file mode 100644
index 00000000..f64d4467
--- /dev/null
+++ b/reg-io/png/lpng/png.h
@@ -0,0 +1,3251 @@
+
+/* png.h - header file for PNG reference library
+ *
+ * libpng version 1.6.42
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license. (See LICENSE, below.)
+ *
+ * Authors and maintainers:
+ *   libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
+ *   libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger
+ *   libpng versions 0.97, January 1998, through 1.6.35, July 2018:
+ *     Glenn Randers-Pehrson
+ *   libpng versions 1.6.36, December 2018, through 1.6.42, January 2024:
+ *     Cosmin Truta
+ *   See also "Contributing Authors", below.
+ */
+
+/*
+ * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
+ * =========================================
+ *
+ * PNG Reference Library License version 2
+ * ---------------------------------------
+ *
+ *  * Copyright (c) 1995-2024 The PNG Reference Library Authors.
+ *  * Copyright (c) 2018-2024 Cosmin Truta.
+ *  * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
+ *  * Copyright (c) 1996-1997 Andreas Dilger.
+ *  * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * The software is supplied "as is", without warranty of any kind,
+ * express or implied, including, without limitation, the warranties
+ * of merchantability, fitness for a particular purpose, title, and
+ * non-infringement.  In no event shall the Copyright owners, or
+ * anyone distributing the software, be liable for any damages or
+ * other liability, whether in contract, tort or otherwise, arising
+ * from, out of, or in connection with the software, or the use or
+ * other dealings in the software, even if advised of the possibility
+ * of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute
+ * this software, or portions hereof, for any purpose, without fee,
+ * subject to the following restrictions:
+ *
+ *  1. The origin of this software must not be misrepresented; you
+ *     must not claim that you wrote the original software.  If you
+ *     use this software in a product, an acknowledgment in the product
+ *     documentation would be appreciated, but is not required.
+ *
+ *  2. Altered source versions must be plainly marked as such, and must
+ *     not be misrepresented as being the original software.
+ *
+ *  3. This Copyright notice may not be removed or altered from any
+ *     source or altered source distribution.
+ *
+ *
+ * PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
+ * -----------------------------------------------------------------------
+ *
+ * libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
+ * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
+ * derived from libpng-1.0.6, and are distributed according to the same
+ * disclaimer and license as libpng-1.0.6 with the following individuals
+ * added to the list of Contributing Authors:
+ *
+ *     Simon-Pierre Cadieux
+ *     Eric S. Raymond
+ *     Mans Rullgard
+ *     Cosmin Truta
+ *     Gilles Vollant
+ *     James Yu
+ *     Mandar Sahastrabuddhe
+ *     Google Inc.
+ *     Vadim Barkov
+ *
+ * and with the following additions to the disclaimer:
+ *
+ *     There is no warranty against interference with your enjoyment of
+ *     the library or against infringement.  There is no warranty that our
+ *     efforts or the library will fulfill any of your particular purposes
+ *     or needs.  This library is provided with all faults, and the entire
+ *     risk of satisfactory quality, performance, accuracy, and effort is
+ *     with the user.
+ *
+ * Some files in the "contrib" directory and some configure-generated
+ * files that are distributed with libpng have other copyright owners, and
+ * are released under other open source licenses.
+ *
+ * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
+ * Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from
+ * libpng-0.96, and are distributed according to the same disclaimer and
+ * license as libpng-0.96, with the following individuals added to the
+ * list of Contributing Authors:
+ *
+ *     Tom Lane
+ *     Glenn Randers-Pehrson
+ *     Willem van Schaik
+ *
+ * libpng versions 0.89, June 1996, through 0.96, May 1997, are
+ * Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88,
+ * and are distributed according to the same disclaimer and license as
+ * libpng-0.88, with the following individuals added to the list of
+ * Contributing Authors:
+ *
+ *     John Bowler
+ *     Kevin Bracey
+ *     Sam Bushell
+ *     Magnus Holmgren
+ *     Greg Roelofs
+ *     Tom Tanner
+ *
+ * Some files in the "scripts" directory have other copyright owners,
+ * but are released under this license.
+ *
+ * libpng versions 0.5, May 1995, through 0.88, January 1996, are
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * For the purposes of this copyright and license, "Contributing Authors"
+ * is defined as the following set of individuals:
+ *
+ *     Andreas Dilger
+ *     Dave Martindale
+ *     Guy Eric Schalnat
+ *     Paul Schmidt
+ *     Tim Wegner
+ *
+ * The PNG Reference Library is supplied "AS IS".  The Contributing
+ * Authors and Group 42, Inc. disclaim all warranties, expressed or
+ * implied, including, without limitation, the warranties of
+ * merchantability and of fitness for any purpose.  The Contributing
+ * Authors and Group 42, Inc. assume no liability for direct, indirect,
+ * incidental, special, exemplary, or consequential damages, which may
+ * result from the use of the PNG Reference Library, even if advised of
+ * the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * source code, or portions hereof, for any purpose, without fee, subject
+ * to the following restrictions:
+ *
+ *  1. The origin of this source code must not be misrepresented.
+ *
+ *  2. Altered versions must be plainly marked as such and must not
+ *     be misrepresented as being the original source.
+ *
+ *  3. This Copyright notice may not be removed or altered from any
+ *     source or altered source distribution.
+ *
+ * The Contributing Authors and Group 42, Inc. specifically permit,
+ * without fee, and encourage the use of this source code as a component
+ * to supporting the PNG file format in commercial products.  If you use
+ * this source code in a product, acknowledgment is not required but would
+ * be appreciated.
+ *
+ * END OF COPYRIGHT NOTICE, DISCLAIMER, and LICENSE.
+ *
+ * TRADEMARK
+ * =========
+ *
+ * The name "libpng" has not been registered by the Copyright owners
+ * as a trademark in any jurisdiction.  However, because libpng has
+ * been distributed and maintained world-wide, continually since 1995,
+ * the Copyright owners claim "common-law trademark protection" in any
+ * jurisdiction where common-law trademark is recognized.
+ */
+
+/*
+ * A "png_get_copyright" function is available, for convenient use in "about"
+ * boxes and the like:
+ *
+ *    printf("%s", png_get_copyright(NULL));
+ *
+ * Also, the PNG logo (in PNG format, of course) is supplied in the
+ * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
+ */
+
+/*
+ * The contributing authors would like to thank all those who helped
+ * with testing, bug fixes, and patience.  This wouldn't have been
+ * possible without all of you.
+ *
+ * Thanks to Frank J. T. Wojcik for helping with the documentation.
+ */
+
+/* Note about libpng version numbers:
+ *
+ *    Due to various miscommunications, unforeseen code incompatibilities
+ *    and occasional factors outside the authors' control, version numbering
+ *    on the library has not always been consistent and straightforward.
+ *    The following table summarizes matters since version 0.89c, which was
+ *    the first widely used release:
+ *
+ *    source                 png.h  png.h  shared-lib
+ *    version                string   int  version
+ *    -------                ------ -----  ----------
+ *    0.89c "1.0 beta 3"     0.89      89  1.0.89
+ *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
+ *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
+ *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
+ *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
+ *    0.97c                  0.97      97  2.0.97
+ *    0.98                   0.98      98  2.0.98
+ *    0.99                   0.99      98  2.0.99
+ *    0.99a-m                0.99      99  2.0.99
+ *    1.00                   1.00     100  2.1.0 [100 should be 10000]
+ *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
+ *    1.0.1       png.h string is   10001  2.1.0
+ *    1.0.1a-e    identical to the  10002  from here on, the shared library
+ *    1.0.2       source version)   10002  is 2.V where V is the source code
+ *    1.0.2a-b                      10003  version, except as noted.
+ *    1.0.3                         10003
+ *    1.0.3a-d                      10004
+ *    1.0.4                         10004
+ *    1.0.4a-f                      10005
+ *    1.0.5 (+ 2 patches)           10005
+ *    1.0.5a-d                      10006
+ *    1.0.5e-r                      10100 (not source compatible)
+ *    1.0.5s-v                      10006 (not binary compatible)
+ *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
+ *    1.0.6d-f                      10007 (still binary incompatible)
+ *    1.0.6g                        10007
+ *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
+ *    1.0.6i                        10007  10.6i
+ *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
+ *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
+ *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
+ *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
+ *    1.0.7                    1    10007  (still compatible)
+ *    ...
+ *    1.0.69                  10    10069  10.so.0.69[.0]
+ *    ...
+ *    1.2.59                  13    10259  12.so.0.59[.0]
+ *    ...
+ *    1.4.20                  14    10420  14.so.0.20[.0]
+ *    ...
+ *    1.5.30                  15    10530  15.so.15.30[.0]
+ *    ...
+ *    1.6.42                  16    10641  16.so.16.41[.0]
+ *
+ *    Henceforth the source version will match the shared-library major and
+ *    minor numbers; the shared-library major version number will be used for
+ *    changes in backward compatibility, as it is intended.
+ *    The PNG_LIBPNG_VER macro, which is not used within libpng but is
+ *    available for applications, is an unsigned integer of the form XYYZZ
+ *    corresponding to the source version X.Y.Z (leading zeros in Y and Z).
+ *    Beta versions were given the previous public release number plus a
+ *    letter, until version 1.0.6j; from then on they were given the upcoming
+ *    public release number plus "betaNN" or "rcNN".
+ *
+ *    Binary incompatibility exists only when applications make direct access
+ *    to the info_ptr or png_ptr members through png.h, and the compiled
+ *    application is loaded with a different version of the library.
+ *
+ *    DLLNUM will change each time there are forward or backward changes
+ *    in binary compatibility (e.g., when a new feature is added).
+ *
+ * See libpng.txt or libpng.3 for more information.  The PNG specification
+ * is available as a W3C Recommendation and as an ISO/IEC Standard; see
+ * <https://www.w3.org/TR/2003/REC-PNG-20031110/>
+ */
+
+#ifndef PNG_H
+#define PNG_H
+
+/* This is not the place to learn how to use libpng. The file libpng-manual.txt
+ * describes how to use libpng, and the file example.c summarizes it
+ * with some code on which to build.  This file is useful for looking
+ * at the actual function definitions and structure components.  If that
+ * file has been stripped from your copy of libpng, you can find it at
+ * <http://www.libpng.org/pub/png/libpng-manual.txt>
+ *
+ * If you just need to read a PNG file and don't want to read the documentation
+ * skip to the end of this file and read the section entitled 'simplified API'.
+ */
+
+/* Version information for png.h - this should match the version in png.c */
+#define PNG_LIBPNG_VER_STRING "1.6.42"
+#define PNG_HEADER_VERSION_STRING " libpng version " PNG_LIBPNG_VER_STRING "\n"
+
+#define PNG_LIBPNG_VER_SONUM   16
+#define PNG_LIBPNG_VER_DLLNUM  16
+
+/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
+#define PNG_LIBPNG_VER_MAJOR   1
+#define PNG_LIBPNG_VER_MINOR   6
+#define PNG_LIBPNG_VER_RELEASE 41
+
+/* This should be zero for a public release, or non-zero for a
+ * development version.  [Deprecated]
+ */
+#define PNG_LIBPNG_VER_BUILD  0
+
+/* Release Status */
+#define PNG_LIBPNG_BUILD_ALPHA    1
+#define PNG_LIBPNG_BUILD_BETA     2
+#define PNG_LIBPNG_BUILD_RC       3
+#define PNG_LIBPNG_BUILD_STABLE   4
+#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
+
+/* Release-Specific Flags */
+#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
+                                       PNG_LIBPNG_BUILD_STABLE only */
+#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_SPECIAL */
+#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_PRIVATE */
+
+#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
+
+/* Careful here.  At one time, Guy wanted to use 082, but that
+ * would be octal.  We must not include leading zeros.
+ * Versions 0.7 through 1.0.0 were in the range 0 to 100 here
+ * (only version 1.0.0 was mis-numbered 100 instead of 10000).
+ * From version 1.0.1 it is:
+ * XXYYZZ, where XX=major, YY=minor, ZZ=release
+ */
+#define PNG_LIBPNG_VER 10641 /* 1.6.42 */
+
+/* Library configuration: these options cannot be changed after
+ * the library has been built.
+ */
+#ifndef PNGLCONF_H
+/* If pnglibconf.h is missing, you can
+ * copy scripts/pnglibconf.h.prebuilt to pnglibconf.h
+ */
+#   include "pnglibconf.h"
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* Machine specific configuration. */
+#  include "pngconf.h"
+#endif
+
+/*
+ * Added at libpng-1.2.8
+ *
+ * Ref MSDN: Private as priority over Special
+ * VS_FF_PRIVATEBUILD File *was not* built using standard release
+ * procedures. If this value is given, the StringFileInfo block must
+ * contain a PrivateBuild string.
+ *
+ * VS_FF_SPECIALBUILD File *was* built by the original company using
+ * standard release procedures but is a variation of the standard
+ * file of the same version number. If this value is given, the
+ * StringFileInfo block must contain a SpecialBuild string.
+ */
+
+#ifdef PNG_USER_PRIVATEBUILD /* From pnglibconf.h */
+#  define PNG_LIBPNG_BUILD_TYPE \
+       (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
+#else
+#  ifdef PNG_LIBPNG_SPECIALBUILD
+#    define PNG_LIBPNG_BUILD_TYPE \
+         (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
+#  else
+#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
+#  endif
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Version information for C files, stored in png.c.  This had better match
+ * the version above.
+ */
+#define png_libpng_ver png_get_header_ver(NULL)
+
+/* This file is arranged in several sections:
+ *
+ * 1. [omitted]
+ * 2. Any configuration options that can be specified by for the application
+ *    code when it is built.  (Build time configuration is in pnglibconf.h)
+ * 3. Type definitions (base types are defined in pngconf.h), structure
+ *    definitions.
+ * 4. Exported library functions.
+ * 5. Simplified API.
+ * 6. Implementation options.
+ *
+ * The library source code has additional files (principally pngpriv.h) that
+ * allow configuration of the library.
+ */
+
+/* Section 1: [omitted] */
+
+/* Section 2: run time configuration
+ * See pnglibconf.h for build time configuration
+ *
+ * Run time configuration allows the application to choose between
+ * implementations of certain arithmetic APIs.  The default is set
+ * at build time and recorded in pnglibconf.h, but it is safe to
+ * override these (and only these) settings.  Note that this won't
+ * change what the library does, only application code, and the
+ * settings can (and probably should) be made on a per-file basis
+ * by setting the #defines before including png.h
+ *
+ * Use macros to read integers from PNG data or use the exported
+ * functions?
+ *   PNG_USE_READ_MACROS: use the macros (see below)  Note that
+ *     the macros evaluate their argument multiple times.
+ *   PNG_NO_USE_READ_MACROS: call the relevant library function.
+ *
+ * Use the alternative algorithm for compositing alpha samples that
+ * does not use division?
+ *   PNG_READ_COMPOSITE_NODIV_SUPPORTED: use the 'no division'
+ *      algorithm.
+ *   PNG_NO_READ_COMPOSITE_NODIV: use the 'division' algorithm.
+ *
+ * How to handle benign errors if PNG_ALLOW_BENIGN_ERRORS is
+ * false?
+ *   PNG_ALLOW_BENIGN_ERRORS: map calls to the benign error
+ *      APIs to png_warning.
+ * Otherwise the calls are mapped to png_error.
+ */
+
+/* Section 3: type definitions, including structures and compile time
+ * constants.
+ * See pngconf.h for base types that vary by machine/system
+ */
+
+/* This triggers a compiler error in png.c, if png.c and png.h
+ * do not agree upon the version number.
+ */
+typedef char* png_libpng_version_1_6_42;
+
+/* Basic control structions.  Read libpng-manual.txt or libpng.3 for more info.
+ *
+ * png_struct is the cache of information used while reading or writing a single
+ * PNG file.  One of these is always required, although the simplified API
+ * (below) hides the creation and destruction of it.
+ */
+typedef struct png_struct_def png_struct;
+typedef const png_struct * png_const_structp;
+typedef png_struct * png_structp;
+typedef png_struct * * png_structpp;
+
+/* png_info contains information read from or to be written to a PNG file.  One
+ * or more of these must exist while reading or creating a PNG file.  The
+ * information is not used by libpng during read but is used to control what
+ * gets written when a PNG file is created.  "png_get_" function calls read
+ * information during read and "png_set_" functions calls write information
+ * when creating a PNG.
+ * been moved into a separate header file that is not accessible to
+ * applications.  Read libpng-manual.txt or libpng.3 for more info.
+ */
+typedef struct png_info_def png_info;
+typedef png_info * png_infop;
+typedef const png_info * png_const_infop;
+typedef png_info * * png_infopp;
+
+/* Types with names ending 'p' are pointer types.  The corresponding types with
+ * names ending 'rp' are identical pointer types except that the pointer is
+ * marked 'restrict', which means that it is the only pointer to the object
+ * passed to the function.  Applications should not use the 'restrict' types;
+ * it is always valid to pass 'p' to a pointer with a function argument of the
+ * corresponding 'rp' type.  Different compilers have different rules with
+ * regard to type matching in the presence of 'restrict'.  For backward
+ * compatibility libpng callbacks never have 'restrict' in their parameters and,
+ * consequentially, writing portable application code is extremely difficult if
+ * an attempt is made to use 'restrict'.
+ */
+typedef png_struct * PNG_RESTRICT png_structrp;
+typedef const png_struct * PNG_RESTRICT png_const_structrp;
+typedef png_info * PNG_RESTRICT png_inforp;
+typedef const png_info * PNG_RESTRICT png_const_inforp;
+
+/* Three color definitions.  The order of the red, green, and blue, (and the
+ * exact size) is not important, although the size of the fields need to
+ * be png_byte or png_uint_16 (as defined below).
+ */
+typedef struct png_color_struct
+{
+   png_byte red;
+   png_byte green;
+   png_byte blue;
+} png_color;
+typedef png_color * png_colorp;
+typedef const png_color * png_const_colorp;
+typedef png_color * * png_colorpp;
+
+typedef struct png_color_16_struct
+{
+   png_byte index;    /* used for palette files */
+   png_uint_16 red;   /* for use in red green blue files */
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 gray;  /* for use in grayscale files */
+} png_color_16;
+typedef png_color_16 * png_color_16p;
+typedef const png_color_16 * png_const_color_16p;
+typedef png_color_16 * * png_color_16pp;
+
+typedef struct png_color_8_struct
+{
+   png_byte red;   /* for use in red green blue files */
+   png_byte green;
+   png_byte blue;
+   png_byte gray;  /* for use in grayscale files */
+   png_byte alpha; /* for alpha channel files */
+} png_color_8;
+typedef png_color_8 * png_color_8p;
+typedef const png_color_8 * png_const_color_8p;
+typedef png_color_8 * * png_color_8pp;
+
+/*
+ * The following two structures are used for the in-core representation
+ * of sPLT chunks.
+ */
+typedef struct png_sPLT_entry_struct
+{
+   png_uint_16 red;
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 alpha;
+   png_uint_16 frequency;
+} png_sPLT_entry;
+typedef png_sPLT_entry * png_sPLT_entryp;
+typedef const png_sPLT_entry * png_const_sPLT_entryp;
+typedef png_sPLT_entry * * png_sPLT_entrypp;
+
+/*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
+ *  occupy the LSB of their respective members, and the MSB of each member
+ *  is zero-filled.  The frequency member always occupies the full 16 bits.
+ */
+
+typedef struct png_sPLT_struct
+{
+   png_charp name;           /* palette name */
+   png_byte depth;           /* depth of palette samples */
+   png_sPLT_entryp entries;  /* palette entries */
+   png_int_32 nentries;      /* number of palette entries */
+} png_sPLT_t;
+typedef png_sPLT_t * png_sPLT_tp;
+typedef const png_sPLT_t * png_const_sPLT_tp;
+typedef png_sPLT_t * * png_sPLT_tpp;
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
+ * and whether that contents is compressed or not.  The "key" field
+ * points to a regular zero-terminated C string.  The "text" fields can be a
+ * regular C string, an empty string, or a NULL pointer.
+ * However, the structure returned by png_get_text() will always contain
+ * the "text" field as a regular zero-terminated C string (possibly
+ * empty), never a NULL pointer, so it can be safely used in printf() and
+ * other string-handling functions.  Note that the "itxt_length", "lang", and
+ * "lang_key" members of the structure only exist when the library is built
+ * with iTXt chunk support.  Prior to libpng-1.4.0 the library was built by
+ * default without iTXt support. Also note that when iTXt *is* supported,
+ * the "lang" and "lang_key" fields contain NULL pointers when the
+ * "compression" field contains * PNG_TEXT_COMPRESSION_NONE or
+ * PNG_TEXT_COMPRESSION_zTXt. Note that the "compression value" is not the
+ * same as what appears in the PNG tEXt/zTXt/iTXt chunk's "compression flag"
+ * which is always 0 or 1, or its "compression method" which is always 0.
+ */
+typedef struct png_text_struct
+{
+   int  compression;       /* compression value:
+                             -1: tEXt, none
+                              0: zTXt, deflate
+                              1: iTXt, none
+                              2: iTXt, deflate  */
+   png_charp key;          /* keyword, 1-79 character description of "text" */
+   png_charp text;         /* comment, may be an empty string (ie "")
+                              or a NULL pointer */
+   size_t text_length;     /* length of the text string */
+   size_t itxt_length;     /* length of the itxt string */
+   png_charp lang;         /* language code, 0-79 characters
+                              or a NULL pointer */
+   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
+                              chars or a NULL pointer */
+} png_text;
+typedef png_text * png_textp;
+typedef const png_text * png_const_textp;
+typedef png_text * * png_textpp;
+#endif
+
+/* Supported compression types for text in PNG files (tEXt, and zTXt).
+ * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
+#define PNG_TEXT_COMPRESSION_NONE_WR -3
+#define PNG_TEXT_COMPRESSION_zTXt_WR -2
+#define PNG_TEXT_COMPRESSION_NONE    -1
+#define PNG_TEXT_COMPRESSION_zTXt     0
+#define PNG_ITXT_COMPRESSION_NONE     1
+#define PNG_ITXT_COMPRESSION_zTXt     2
+#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
+
+/* png_time is a way to hold the time in an machine independent way.
+ * Two conversions are provided, both from time_t and struct tm.  There
+ * is no portable way to convert to either of these structures, as far
+ * as I know.  If you know of a portable way, send it to me.  As a side
+ * note - PNG has always been Year 2000 compliant!
+ */
+typedef struct png_time_struct
+{
+   png_uint_16 year; /* full year, as in, 1995 */
+   png_byte month;   /* month of year, 1 - 12 */
+   png_byte day;     /* day of month, 1 - 31 */
+   png_byte hour;    /* hour of day, 0 - 23 */
+   png_byte minute;  /* minute of hour, 0 - 59 */
+   png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
+} png_time;
+typedef png_time * png_timep;
+typedef const png_time * png_const_timep;
+typedef png_time * * png_timepp;
+
+#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) ||\
+   defined(PNG_USER_CHUNKS_SUPPORTED)
+/* png_unknown_chunk is a structure to hold queued chunks for which there is
+ * no specific support.  The idea is that we can use this to queue
+ * up private chunks for output even though the library doesn't actually
+ * know about their semantics.
+ *
+ * The data in the structure is set by libpng on read and used on write.
+ */
+typedef struct png_unknown_chunk_t
+{
+   png_byte name[5]; /* Textual chunk name with '\0' terminator */
+   png_byte *data;   /* Data, should not be modified on read! */
+   size_t size;
+
+   /* On write 'location' must be set using the flag values listed below.
+    * Notice that on read it is set by libpng however the values stored have
+    * more bits set than are listed below.  Always treat the value as a
+    * bitmask.  On write set only one bit - setting multiple bits may cause the
+    * chunk to be written in multiple places.
+    */
+   png_byte location; /* mode of operation at read time */
+}
+png_unknown_chunk;
+
+typedef png_unknown_chunk * png_unknown_chunkp;
+typedef const png_unknown_chunk * png_const_unknown_chunkp;
+typedef png_unknown_chunk * * png_unknown_chunkpp;
+#endif
+
+/* Flag values for the unknown chunk location byte. */
+#define PNG_HAVE_IHDR  0x01
+#define PNG_HAVE_PLTE  0x02
+#define PNG_AFTER_IDAT 0x08
+
+/* Maximum positive integer used in PNG is (2^31)-1 */
+#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
+#define PNG_UINT_32_MAX ((png_uint_32)(-1))
+#define PNG_SIZE_MAX ((size_t)(-1))
+
+/* These are constants for fixed point values encoded in the
+ * PNG specification manner (x100000)
+ */
+#define PNG_FP_1    100000
+#define PNG_FP_HALF  50000
+#define PNG_FP_MAX  ((png_fixed_point)0x7fffffffL)
+#define PNG_FP_MIN  (-PNG_FP_MAX)
+
+/* These describe the color_type field in png_info. */
+/* color type masks */
+#define PNG_COLOR_MASK_PALETTE    1
+#define PNG_COLOR_MASK_COLOR      2
+#define PNG_COLOR_MASK_ALPHA      4
+
+/* color types.  Note that not all combinations are legal */
+#define PNG_COLOR_TYPE_GRAY 0
+#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
+#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
+#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
+#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
+/* aliases */
+#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
+#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
+
+/* This is for compression type. PNG 1.0-1.2 only define the single type. */
+#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
+#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
+
+/* This is for filter type. PNG 1.0-1.2 only define the single type. */
+#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
+#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
+#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
+
+/* These are for the interlacing type.  These values should NOT be changed. */
+#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
+#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
+#define PNG_INTERLACE_LAST        2 /* Not a valid value */
+
+/* These are for the oFFs chunk.  These values should NOT be changed. */
+#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
+#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
+#define PNG_OFFSET_LAST           2 /* Not a valid value */
+
+/* These are for the pCAL chunk.  These values should NOT be changed. */
+#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
+#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
+#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
+#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
+#define PNG_EQUATION_LAST         4 /* Not a valid value */
+
+/* These are for the sCAL chunk.  These values should NOT be changed. */
+#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
+#define PNG_SCALE_METER           1 /* meters per pixel */
+#define PNG_SCALE_RADIAN          2 /* radians per pixel */
+#define PNG_SCALE_LAST            3 /* Not a valid value */
+
+/* These are for the pHYs chunk.  These values should NOT be changed. */
+#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
+#define PNG_RESOLUTION_METER      1 /* pixels/meter */
+#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
+
+/* These are for the sRGB chunk.  These values should NOT be changed. */
+#define PNG_sRGB_INTENT_PERCEPTUAL 0
+#define PNG_sRGB_INTENT_RELATIVE   1
+#define PNG_sRGB_INTENT_SATURATION 2
+#define PNG_sRGB_INTENT_ABSOLUTE   3
+#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
+
+/* This is for text chunks */
+#define PNG_KEYWORD_MAX_LENGTH     79
+
+/* Maximum number of entries in PLTE/sPLT/tRNS arrays */
+#define PNG_MAX_PALETTE_LENGTH    256
+
+/* These determine if an ancillary chunk's data has been successfully read
+ * from the PNG header, or if the application has filled in the corresponding
+ * data in the info_struct to be written into the output file.  The values
+ * of the PNG_INFO_<chunk> defines should NOT be changed.
+ */
+#define PNG_INFO_gAMA 0x0001U
+#define PNG_INFO_sBIT 0x0002U
+#define PNG_INFO_cHRM 0x0004U
+#define PNG_INFO_PLTE 0x0008U
+#define PNG_INFO_tRNS 0x0010U
+#define PNG_INFO_bKGD 0x0020U
+#define PNG_INFO_hIST 0x0040U
+#define PNG_INFO_pHYs 0x0080U
+#define PNG_INFO_oFFs 0x0100U
+#define PNG_INFO_tIME 0x0200U
+#define PNG_INFO_pCAL 0x0400U
+#define PNG_INFO_sRGB 0x0800U  /* GR-P, 0.96a */
+#define PNG_INFO_iCCP 0x1000U  /* ESR, 1.0.6 */
+#define PNG_INFO_sPLT 0x2000U  /* ESR, 1.0.6 */
+#define PNG_INFO_sCAL 0x4000U  /* ESR, 1.0.6 */
+#define PNG_INFO_IDAT 0x8000U  /* ESR, 1.0.6 */
+#define PNG_INFO_eXIf 0x10000U /* GR-P, 1.6.31 */
+
+/* This is used for the transformation routines, as some of them
+ * change these values for the row.  It also should enable using
+ * the routines for other purposes.
+ */
+typedef struct png_row_info_struct
+{
+   png_uint_32 width;    /* width of row */
+   size_t rowbytes;      /* number of bytes in row */
+   png_byte color_type;  /* color type of row */
+   png_byte bit_depth;   /* bit depth of row */
+   png_byte channels;    /* number of channels (1, 2, 3, or 4) */
+   png_byte pixel_depth; /* bits per pixel (depth * channels) */
+} png_row_info;
+
+typedef png_row_info * png_row_infop;
+typedef png_row_info * * png_row_infopp;
+
+/* These are the function types for the I/O functions and for the functions
+ * that allow the user to override the default I/O functions with his or her
+ * own.  The png_error_ptr type should match that of user-supplied warning
+ * and error functions, while the png_rw_ptr type should match that of the
+ * user read/write data functions.  Note that the 'write' function must not
+ * modify the buffer it is passed. The 'read' function, on the other hand, is
+ * expected to return the read data in the buffer.
+ */
+typedef PNG_CALLBACK(void, *png_error_ptr, (png_structp, png_const_charp));
+typedef PNG_CALLBACK(void, *png_rw_ptr, (png_structp, png_bytep, size_t));
+typedef PNG_CALLBACK(void, *png_flush_ptr, (png_structp));
+typedef PNG_CALLBACK(void, *png_read_status_ptr, (png_structp, png_uint_32,
+    int));
+typedef PNG_CALLBACK(void, *png_write_status_ptr, (png_structp, png_uint_32,
+    int));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+typedef PNG_CALLBACK(void, *png_progressive_info_ptr, (png_structp, png_infop));
+typedef PNG_CALLBACK(void, *png_progressive_end_ptr, (png_structp, png_infop));
+
+/* The following callback receives png_uint_32 row_number, int pass for the
+ * png_bytep data of the row.  When transforming an interlaced image the
+ * row number is the row number within the sub-image of the interlace pass, so
+ * the value will increase to the height of the sub-image (not the full image)
+ * then reset to 0 for the next pass.
+ *
+ * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
+ * find the output pixel (x,y) given an interlaced sub-image pixel
+ * (row,col,pass).  (See below for these macros.)
+ */
+typedef PNG_CALLBACK(void, *png_progressive_row_ptr, (png_structp, png_bytep,
+    png_uint_32, int));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+typedef PNG_CALLBACK(void, *png_user_transform_ptr, (png_structp, png_row_infop,
+    png_bytep));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+typedef PNG_CALLBACK(int, *png_user_chunk_ptr, (png_structp,
+    png_unknown_chunkp));
+#endif
+#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
+/* not used anywhere */
+/* typedef PNG_CALLBACK(void, *png_unknown_chunk_ptr, (png_structp)); */
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* This must match the function definition in <setjmp.h>, and the application
+ * must include this before png.h to obtain the definition of jmp_buf.  The
+ * function is required to be PNG_NORETURN, but this is not checked.  If the
+ * function does return the application will crash via an abort() or similar
+ * system level call.
+ *
+ * If you get a warning here while building the library you may need to make
+ * changes to ensure that pnglibconf.h records the calling convention used by
+ * your compiler.  This may be very difficult - try using a different compiler
+ * to build the library!
+ */
+PNG_FUNCTION(void, (PNGCAPI *png_longjmp_ptr), PNGARG((jmp_buf, int)), typedef);
+#endif
+
+/* Transform masks for the high-level interface */
+#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
+#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
+#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
+#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
+#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
+#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
+#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
+#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
+#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
+#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
+#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
+#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
+#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* write only */
+/* Added to libpng-1.2.34 */
+#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER
+#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */
+/* Added to libpng-1.4.0 */
+#define PNG_TRANSFORM_GRAY_TO_RGB   0x2000      /* read only */
+/* Added to libpng-1.5.4 */
+#define PNG_TRANSFORM_EXPAND_16     0x4000      /* read only */
+#if ~0U > 0xffffU /* or else this might break on a 16-bit machine */
+#define PNG_TRANSFORM_SCALE_16      0x8000      /* read only */
+#endif
+
+/* Flags for MNG supported features */
+#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
+#define PNG_FLAG_MNG_FILTER_64      0x04
+#define PNG_ALL_MNG_FEATURES        0x05
+
+/* NOTE: prior to 1.5 these functions had no 'API' style declaration,
+ * this allowed the zlib default functions to be used on Windows
+ * platforms.  In 1.5 the zlib default malloc (which just calls malloc and
+ * ignores the first argument) should be completely compatible with the
+ * following.
+ */
+typedef PNG_CALLBACK(png_voidp, *png_malloc_ptr, (png_structp,
+    png_alloc_size_t));
+typedef PNG_CALLBACK(void, *png_free_ptr, (png_structp, png_voidp));
+
+/* Section 4: exported functions
+ * Here are the function definitions most commonly used.  This is not
+ * the place to find out how to use libpng.  See libpng-manual.txt for the
+ * full explanation, see example.c for the summary.  This just provides
+ * a simple one line description of the use of each function.
+ *
+ * The PNG_EXPORT() and PNG_EXPORTA() macros used below are defined in
+ * pngconf.h and in the *.dfn files in the scripts directory.
+ *
+ *   PNG_EXPORT(ordinal, type, name, (args));
+ *
+ *       ordinal:    ordinal that is used while building
+ *                   *.def files. The ordinal value is only
+ *                   relevant when preprocessing png.h with
+ *                   the *.dfn files for building symbol table
+ *                   entries, and are removed by pngconf.h.
+ *       type:       return type of the function
+ *       name:       function name
+ *       args:       function arguments, with types
+ *
+ * When we wish to append attributes to a function prototype we use
+ * the PNG_EXPORTA() macro instead.
+ *
+ *   PNG_EXPORTA(ordinal, type, name, (args), attributes);
+ *
+ *       ordinal, type, name, and args: same as in PNG_EXPORT().
+ *       attributes: function attributes
+ */
+
+/* Returns the version number of the library */
+PNG_EXPORT(1, png_uint_32, png_access_version_number, (void));
+
+/* Tell lib we have already handled the first <num_bytes> magic bytes.
+ * Handling more than 8 bytes from the beginning of the file is an error.
+ */
+PNG_EXPORT(2, void, png_set_sig_bytes, (png_structrp png_ptr, int num_bytes));
+
+/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
+ * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
+ * signature, and non-zero otherwise.  Having num_to_check == 0 or
+ * start > 7 will always fail (i.e. return non-zero).
+ */
+PNG_EXPORT(3, int, png_sig_cmp, (png_const_bytep sig, size_t start,
+    size_t num_to_check));
+
+/* Simple signature checking function.  This is the same as calling
+ * png_check_sig(sig, n) := (png_sig_cmp(sig, 0, n) == 0).
+ */
+#define png_check_sig(sig, n) (png_sig_cmp((sig), 0, (n)) == 0) /* DEPRECATED */
+
+/* Allocate and initialize png_ptr struct for reading, and any other memory. */
+PNG_EXPORTA(4, png_structp, png_create_read_struct,
+    (png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn),
+    PNG_ALLOCATED);
+
+/* Allocate and initialize png_ptr struct for writing, and any other memory */
+PNG_EXPORTA(5, png_structp, png_create_write_struct,
+    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
+    png_error_ptr warn_fn),
+    PNG_ALLOCATED);
+
+PNG_EXPORT(6, size_t, png_get_compression_buffer_size,
+    (png_const_structrp png_ptr));
+
+PNG_EXPORT(7, void, png_set_compression_buffer_size, (png_structrp png_ptr,
+    size_t size));
+
+/* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp
+ * match up.
+ */
+#ifdef PNG_SETJMP_SUPPORTED
+/* This function returns the jmp_buf built in to *png_ptr.  It must be
+ * supplied with an appropriate 'longjmp' function to use on that jmp_buf
+ * unless the default error function is overridden in which case NULL is
+ * acceptable.  The size of the jmp_buf is checked against the actual size
+ * allocated by the library - the call will return NULL on a mismatch
+ * indicating an ABI mismatch.
+ */
+PNG_EXPORT(8, jmp_buf*, png_set_longjmp_fn, (png_structrp png_ptr,
+    png_longjmp_ptr longjmp_fn, size_t jmp_buf_size));
+#  define png_jmpbuf(png_ptr) \
+      (*png_set_longjmp_fn((png_ptr), longjmp, (sizeof (jmp_buf))))
+#else
+#  define png_jmpbuf(png_ptr) \
+      (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP)
+#endif
+/* This function should be used by libpng applications in place of
+ * longjmp(png_ptr->jmpbuf, val).  If longjmp_fn() has been set, it
+ * will use it; otherwise it will call PNG_ABORT().  This function was
+ * added in libpng-1.5.0.
+ */
+PNG_EXPORTA(9, void, png_longjmp, (png_const_structrp png_ptr, int val),
+    PNG_NORETURN);
+
+#ifdef PNG_READ_SUPPORTED
+/* Reset the compression stream */
+PNG_EXPORTA(10, int, png_reset_zstream, (png_structrp png_ptr), PNG_DEPRECATED);
+#endif
+
+/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
+#ifdef PNG_USER_MEM_SUPPORTED
+PNG_EXPORTA(11, png_structp, png_create_read_struct_2,
+    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
+    png_error_ptr warn_fn,
+    png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
+    PNG_ALLOCATED);
+PNG_EXPORTA(12, png_structp, png_create_write_struct_2,
+    (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
+    png_error_ptr warn_fn,
+    png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
+    PNG_ALLOCATED);
+#endif
+
+/* Write the PNG file signature. */
+PNG_EXPORT(13, void, png_write_sig, (png_structrp png_ptr));
+
+/* Write a PNG chunk - size, type, (optional) data, CRC. */
+PNG_EXPORT(14, void, png_write_chunk, (png_structrp png_ptr, png_const_bytep
+    chunk_name, png_const_bytep data, size_t length));
+
+/* Write the start of a PNG chunk - length and chunk name. */
+PNG_EXPORT(15, void, png_write_chunk_start, (png_structrp png_ptr,
+    png_const_bytep chunk_name, png_uint_32 length));
+
+/* Write the data of a PNG chunk started with png_write_chunk_start(). */
+PNG_EXPORT(16, void, png_write_chunk_data, (png_structrp png_ptr,
+    png_const_bytep data, size_t length));
+
+/* Finish a chunk started with png_write_chunk_start() (includes CRC). */
+PNG_EXPORT(17, void, png_write_chunk_end, (png_structrp png_ptr));
+
+/* Allocate and initialize the info structure */
+PNG_EXPORTA(18, png_infop, png_create_info_struct, (png_const_structrp png_ptr),
+    PNG_ALLOCATED);
+
+/* DEPRECATED: this function allowed init structures to be created using the
+ * default allocation method (typically malloc).  Use is deprecated in 1.6.0 and
+ * the API will be removed in the future.
+ */
+PNG_EXPORTA(19, void, png_info_init_3, (png_infopp info_ptr,
+    size_t png_info_struct_size), PNG_DEPRECATED);
+
+/* Writes all the PNG information before the image. */
+PNG_EXPORT(20, void, png_write_info_before_PLTE,
+    (png_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(21, void, png_write_info,
+    (png_structrp png_ptr, png_const_inforp info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the information before the actual image data. */
+PNG_EXPORT(22, void, png_read_info,
+    (png_structrp png_ptr, png_inforp info_ptr));
+#endif
+
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+   /* Convert to a US string format: there is no localization support in this
+    * routine.  The original implementation used a 29 character buffer in
+    * png_struct, this will be removed in future versions.
+    */
+#if PNG_LIBPNG_VER < 10700
+/* To do: remove this from libpng17 (and from libpng17/png.c and pngstruct.h) */
+PNG_EXPORTA(23, png_const_charp, png_convert_to_rfc1123, (png_structrp png_ptr,
+    png_const_timep ptime),PNG_DEPRECATED);
+#endif
+PNG_EXPORT(241, int, png_convert_to_rfc1123_buffer, (char out[29],
+    png_const_timep ptime));
+#endif
+
+#ifdef PNG_CONVERT_tIME_SUPPORTED
+/* Convert from a struct tm to png_time */
+PNG_EXPORT(24, void, png_convert_from_struct_tm, (png_timep ptime,
+    const struct tm * ttime));
+
+/* Convert from time_t to png_time.  Uses gmtime() */
+PNG_EXPORT(25, void, png_convert_from_time_t, (png_timep ptime, time_t ttime));
+#endif /* CONVERT_tIME */
+
+#ifdef PNG_READ_EXPAND_SUPPORTED
+/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
+PNG_EXPORT(26, void, png_set_expand, (png_structrp png_ptr));
+PNG_EXPORT(27, void, png_set_expand_gray_1_2_4_to_8, (png_structrp png_ptr));
+PNG_EXPORT(28, void, png_set_palette_to_rgb, (png_structrp png_ptr));
+PNG_EXPORT(29, void, png_set_tRNS_to_alpha, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_EXPAND_16_SUPPORTED
+/* Expand to 16-bit channels, forces conversion of palette to RGB and expansion
+ * of a tRNS chunk if present.
+ */
+PNG_EXPORT(221, void, png_set_expand_16, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* Use blue, green, red order for pixels. */
+PNG_EXPORT(30, void, png_set_bgr, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+/* Expand the grayscale to 24-bit RGB if necessary. */
+PNG_EXPORT(31, void, png_set_gray_to_rgb, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+/* Reduce RGB to grayscale. */
+#define PNG_ERROR_ACTION_NONE  1
+#define PNG_ERROR_ACTION_WARN  2
+#define PNG_ERROR_ACTION_ERROR 3
+#define PNG_RGB_TO_GRAY_DEFAULT (-1)/*for red/green coefficients*/
+
+PNG_FP_EXPORT(32, void, png_set_rgb_to_gray, (png_structrp png_ptr,
+    int error_action, double red, double green))
+PNG_FIXED_EXPORT(33, void, png_set_rgb_to_gray_fixed, (png_structrp png_ptr,
+    int error_action, png_fixed_point red, png_fixed_point green))
+
+PNG_EXPORT(34, png_byte, png_get_rgb_to_gray_status, (png_const_structrp
+    png_ptr));
+#endif
+
+#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
+PNG_EXPORT(35, void, png_build_grayscale_palette, (int bit_depth,
+    png_colorp palette));
+#endif
+
+#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
+/* How the alpha channel is interpreted - this affects how the color channels
+ * of a PNG file are returned to the calling application when an alpha channel,
+ * or a tRNS chunk in a palette file, is present.
+ *
+ * This has no effect on the way pixels are written into a PNG output
+ * datastream. The color samples in a PNG datastream are never premultiplied
+ * with the alpha samples.
+ *
+ * The default is to return data according to the PNG specification: the alpha
+ * channel is a linear measure of the contribution of the pixel to the
+ * corresponding composited pixel, and the color channels are unassociated
+ * (not premultiplied).  The gamma encoded color channels must be scaled
+ * according to the contribution and to do this it is necessary to undo
+ * the encoding, scale the color values, perform the composition and re-encode
+ * the values.  This is the 'PNG' mode.
+ *
+ * The alternative is to 'associate' the alpha with the color information by
+ * storing color channel values that have been scaled by the alpha.
+ * image.  These are the 'STANDARD', 'ASSOCIATED' or 'PREMULTIPLIED' modes
+ * (the latter being the two common names for associated alpha color channels).
+ *
+ * For the 'OPTIMIZED' mode, a pixel is treated as opaque only if the alpha
+ * value is equal to the maximum value.
+ *
+ * The final choice is to gamma encode the alpha channel as well.  This is
+ * broken because, in practice, no implementation that uses this choice
+ * correctly undoes the encoding before handling alpha composition.  Use this
+ * choice only if other serious errors in the software or hardware you use
+ * mandate it; the typical serious error is for dark halos to appear around
+ * opaque areas of the composited PNG image because of arithmetic overflow.
+ *
+ * The API function png_set_alpha_mode specifies which of these choices to use
+ * with an enumerated 'mode' value and the gamma of the required output:
+ */
+#define PNG_ALPHA_PNG           0 /* according to the PNG standard */
+#define PNG_ALPHA_STANDARD      1 /* according to Porter/Duff */
+#define PNG_ALPHA_ASSOCIATED    1 /* as above; this is the normal practice */
+#define PNG_ALPHA_PREMULTIPLIED 1 /* as above */
+#define PNG_ALPHA_OPTIMIZED     2 /* 'PNG' for opaque pixels, else 'STANDARD' */
+#define PNG_ALPHA_BROKEN        3 /* the alpha channel is gamma encoded */
+
+PNG_FP_EXPORT(227, void, png_set_alpha_mode, (png_structrp png_ptr, int mode,
+    double output_gamma))
+PNG_FIXED_EXPORT(228, void, png_set_alpha_mode_fixed, (png_structrp png_ptr,
+    int mode, png_fixed_point output_gamma))
+#endif
+
+#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_READ_ALPHA_MODE_SUPPORTED)
+/* The output_gamma value is a screen gamma in libpng terminology: it expresses
+ * how to decode the output values, not how they are encoded.
+ */
+#define PNG_DEFAULT_sRGB -1       /* sRGB gamma and color space */
+#define PNG_GAMMA_MAC_18 -2       /* Old Mac '1.8' gamma and color space */
+#define PNG_GAMMA_sRGB   220000   /* Television standards--matches sRGB gamma */
+#define PNG_GAMMA_LINEAR PNG_FP_1 /* Linear */
+#endif
+
+/* The following are examples of calls to png_set_alpha_mode to achieve the
+ * required overall gamma correction and, where necessary, alpha
+ * premultiplication.
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
+ *    This is the default libpng handling of the alpha channel - it is not
+ *    pre-multiplied into the color components.  In addition the call states
+ *    that the output is for a sRGB system and causes all PNG files without gAMA
+ *    chunks to be assumed to be encoded using sRGB.
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
+ *    In this case the output is assumed to be something like an sRGB conformant
+ *    display preceded by a power-law lookup table of power 1.45.  This is how
+ *    early Mac systems behaved.
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_GAMMA_LINEAR);
+ *    This is the classic Jim Blinn approach and will work in academic
+ *    environments where everything is done by the book.  It has the shortcoming
+ *    of assuming that input PNG data with no gamma information is linear - this
+ *    is unlikely to be correct unless the PNG files where generated locally.
+ *    Most of the time the output precision will be so low as to show
+ *    significant banding in dark areas of the image.
+ *
+ * png_set_expand_16(pp);
+ * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_DEFAULT_sRGB);
+ *    This is a somewhat more realistic Jim Blinn inspired approach.  PNG files
+ *    are assumed to have the sRGB encoding if not marked with a gamma value and
+ *    the output is always 16 bits per component.  This permits accurate scaling
+ *    and processing of the data.  If you know that your input PNG files were
+ *    generated locally you might need to replace PNG_DEFAULT_sRGB with the
+ *    correct value for your system.
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_OPTIMIZED, PNG_DEFAULT_sRGB);
+ *    If you just need to composite the PNG image onto an existing background
+ *    and if you control the code that does this you can use the optimization
+ *    setting.  In this case you just copy completely opaque pixels to the
+ *    output.  For pixels that are not completely transparent (you just skip
+ *    those) you do the composition math using png_composite or png_composite_16
+ *    below then encode the resultant 8-bit or 16-bit values to match the output
+ *    encoding.
+ *
+ * Other cases
+ *    If neither the PNG nor the standard linear encoding work for you because
+ *    of the software or hardware you use then you have a big problem.  The PNG
+ *    case will probably result in halos around the image.  The linear encoding
+ *    will probably result in a washed out, too bright, image (it's actually too
+ *    contrasty.)  Try the ALPHA_OPTIMIZED mode above - this will probably
+ *    substantially reduce the halos.  Alternatively try:
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_BROKEN, PNG_DEFAULT_sRGB);
+ *    This option will also reduce the halos, but there will be slight dark
+ *    halos round the opaque parts of the image where the background is light.
+ *    In the OPTIMIZED mode the halos will be light halos where the background
+ *    is dark.  Take your pick - the halos are unavoidable unless you can get
+ *    your hardware/software fixed!  (The OPTIMIZED approach is slightly
+ *    faster.)
+ *
+ * When the default gamma of PNG files doesn't match the output gamma.
+ *    If you have PNG files with no gamma information png_set_alpha_mode allows
+ *    you to provide a default gamma, but it also sets the output gamma to the
+ *    matching value.  If you know your PNG files have a gamma that doesn't
+ *    match the output you can take advantage of the fact that
+ *    png_set_alpha_mode always sets the output gamma but only sets the PNG
+ *    default if it is not already set:
+ *
+ * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
+ * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
+ *    The first call sets both the default and the output gamma values, the
+ *    second call overrides the output gamma without changing the default.  This
+ *    is easier than achieving the same effect with png_set_gamma.  You must use
+ *    PNG_ALPHA_PNG for the first call - internal checking in png_set_alpha will
+ *    fire if more than one call to png_set_alpha_mode and png_set_background is
+ *    made in the same read operation, however multiple calls with PNG_ALPHA_PNG
+ *    are ignored.
+ */
+
+#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
+PNG_EXPORT(36, void, png_set_strip_alpha, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+PNG_EXPORT(37, void, png_set_swap_alpha, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+PNG_EXPORT(38, void, png_set_invert_alpha, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */
+PNG_EXPORT(39, void, png_set_filler, (png_structrp png_ptr, png_uint_32 filler,
+    int flags));
+/* The values of the PNG_FILLER_ defines should NOT be changed */
+#  define PNG_FILLER_BEFORE 0
+#  define PNG_FILLER_AFTER 1
+/* Add an alpha byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */
+PNG_EXPORT(40, void, png_set_add_alpha, (png_structrp png_ptr,
+    png_uint_32 filler, int flags));
+#endif /* READ_FILLER || WRITE_FILLER */
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* Swap bytes in 16-bit depth files. */
+PNG_EXPORT(41, void, png_set_swap, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
+PNG_EXPORT(42, void, png_set_packing, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
+    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* Swap packing order of pixels in bytes. */
+PNG_EXPORT(43, void, png_set_packswap, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Converts files to legal bit depths. */
+PNG_EXPORT(44, void, png_set_shift, (png_structrp png_ptr, png_const_color_8p
+    true_bits));
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Have the code handle the interlacing.  Returns the number of passes.
+ * MUST be called before png_read_update_info or png_start_read_image,
+ * otherwise it will not have the desired effect.  Note that it is still
+ * necessary to call png_read_row or png_read_rows png_get_image_height
+ * times for each pass.
+*/
+PNG_EXPORT(45, int, png_set_interlace_handling, (png_structrp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+/* Invert monochrome files */
+PNG_EXPORT(46, void, png_set_invert_mono, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_BACKGROUND_SUPPORTED
+/* Handle alpha and tRNS by replacing with a background color.  Prior to
+ * libpng-1.5.4 this API must not be called before the PNG file header has been
+ * read.  Doing so will result in unexpected behavior and possible warnings or
+ * errors if the PNG file contains a bKGD chunk.
+ */
+PNG_FP_EXPORT(47, void, png_set_background, (png_structrp png_ptr,
+    png_const_color_16p background_color, int background_gamma_code,
+    int need_expand, double background_gamma))
+PNG_FIXED_EXPORT(215, void, png_set_background_fixed, (png_structrp png_ptr,
+    png_const_color_16p background_color, int background_gamma_code,
+    int need_expand, png_fixed_point background_gamma))
+#endif
+#ifdef PNG_READ_BACKGROUND_SUPPORTED
+#  define PNG_BACKGROUND_GAMMA_UNKNOWN 0
+#  define PNG_BACKGROUND_GAMMA_SCREEN  1
+#  define PNG_BACKGROUND_GAMMA_FILE    2
+#  define PNG_BACKGROUND_GAMMA_UNIQUE  3
+#endif
+
+#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
+/* Scale a 16-bit depth file down to 8-bit, accurately. */
+PNG_EXPORT(229, void, png_set_scale_16, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
+#define PNG_READ_16_TO_8_SUPPORTED /* Name prior to 1.5.4 */
+/* Strip the second byte of information from a 16-bit depth file. */
+PNG_EXPORT(48, void, png_set_strip_16, (png_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_QUANTIZE_SUPPORTED
+/* Turn on quantizing, and reduce the palette to the number of colors
+ * available.
+ */
+PNG_EXPORT(49, void, png_set_quantize, (png_structrp png_ptr,
+    png_colorp palette, int num_palette, int maximum_colors,
+    png_const_uint_16p histogram, int full_quantize));
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+/* The threshold on gamma processing is configurable but hard-wired into the
+ * library.  The following is the floating point variant.
+ */
+#define PNG_GAMMA_THRESHOLD (PNG_GAMMA_THRESHOLD_FIXED*.00001)
+
+/* Handle gamma correction. Screen_gamma=(display_exponent).
+ * NOTE: this API simply sets the screen and file gamma values. It will
+ * therefore override the value for gamma in a PNG file if it is called after
+ * the file header has been read - use with care  - call before reading the PNG
+ * file for best results!
+ *
+ * These routines accept the same gamma values as png_set_alpha_mode (described
+ * above).  The PNG_GAMMA_ defines and PNG_DEFAULT_sRGB can be passed to either
+ * API (floating point or fixed.)  Notice, however, that the 'file_gamma' value
+ * is the inverse of a 'screen gamma' value.
+ */
+PNG_FP_EXPORT(50, void, png_set_gamma, (png_structrp png_ptr,
+    double screen_gamma, double override_file_gamma))
+PNG_FIXED_EXPORT(208, void, png_set_gamma_fixed, (png_structrp png_ptr,
+    png_fixed_point screen_gamma, png_fixed_point override_file_gamma))
+#endif
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+/* Set how many lines between output flushes - 0 for no flushing */
+PNG_EXPORT(51, void, png_set_flush, (png_structrp png_ptr, int nrows));
+/* Flush the current PNG output buffer */
+PNG_EXPORT(52, void, png_write_flush, (png_structrp png_ptr));
+#endif
+
+/* Optional update palette with requested transformations */
+PNG_EXPORT(53, void, png_start_read_image, (png_structrp png_ptr));
+
+/* Optional call to update the users info structure */
+PNG_EXPORT(54, void, png_read_update_info, (png_structrp png_ptr,
+    png_inforp info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read one or more rows of image data. */
+PNG_EXPORT(55, void, png_read_rows, (png_structrp png_ptr, png_bytepp row,
+    png_bytepp display_row, png_uint_32 num_rows));
+#endif
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read a row of data. */
+PNG_EXPORT(56, void, png_read_row, (png_structrp png_ptr, png_bytep row,
+    png_bytep display_row));
+#endif
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the whole image into memory at once. */
+PNG_EXPORT(57, void, png_read_image, (png_structrp png_ptr, png_bytepp image));
+#endif
+
+/* Write a row of image data */
+PNG_EXPORT(58, void, png_write_row, (png_structrp png_ptr,
+    png_const_bytep row));
+
+/* Write a few rows of image data: (*row) is not written; however, the type
+ * is declared as writeable to maintain compatibility with previous versions
+ * of libpng and to allow the 'display_row' array from read_rows to be passed
+ * unchanged to write_rows.
+ */
+PNG_EXPORT(59, void, png_write_rows, (png_structrp png_ptr, png_bytepp row,
+    png_uint_32 num_rows));
+
+/* Write the image data */
+PNG_EXPORT(60, void, png_write_image, (png_structrp png_ptr, png_bytepp image));
+
+/* Write the end of the PNG file. */
+PNG_EXPORT(61, void, png_write_end, (png_structrp png_ptr,
+    png_inforp info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the end of the PNG file. */
+PNG_EXPORT(62, void, png_read_end, (png_structrp png_ptr, png_inforp info_ptr));
+#endif
+
+/* Free any memory associated with the png_info_struct */
+PNG_EXPORT(63, void, png_destroy_info_struct, (png_const_structrp png_ptr,
+    png_infopp info_ptr_ptr));
+
+/* Free any memory associated with the png_struct and the png_info_structs */
+PNG_EXPORT(64, void, png_destroy_read_struct, (png_structpp png_ptr_ptr,
+    png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
+
+/* Free any memory associated with the png_struct and the png_info_structs */
+PNG_EXPORT(65, void, png_destroy_write_struct, (png_structpp png_ptr_ptr,
+    png_infopp info_ptr_ptr));
+
+/* Set the libpng method of handling chunk CRC errors */
+PNG_EXPORT(66, void, png_set_crc_action, (png_structrp png_ptr, int crit_action,
+    int ancil_action));
+
+/* Values for png_set_crc_action() say how to handle CRC errors in
+ * ancillary and critical chunks, and whether to use the data contained
+ * therein.  Note that it is impossible to "discard" data in a critical
+ * chunk.  For versions prior to 0.90, the action was always error/quit,
+ * whereas in version 0.90 and later, the action for CRC errors in ancillary
+ * chunks is warn/discard.  These values should NOT be changed.
+ *
+ *      value                       action:critical     action:ancillary
+ */
+#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
+#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
+#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
+#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
+#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
+#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
+
+#ifdef PNG_WRITE_SUPPORTED
+/* These functions give the user control over the scan-line filtering in
+ * libpng and the compression methods used by zlib.  These functions are
+ * mainly useful for testing, as the defaults should work with most users.
+ * Those users who are tight on memory or want faster performance at the
+ * expense of compression can modify them.  See the compression library
+ * header file (zlib.h) for an explanation of the compression functions.
+ */
+
+/* Set the filtering method(s) used by libpng.  Currently, the only valid
+ * value for "method" is 0.
+ */
+PNG_EXPORT(67, void, png_set_filter, (png_structrp png_ptr, int method,
+    int filters));
+#endif /* WRITE */
+
+/* Flags for png_set_filter() to say which filters to use.  The flags
+ * are chosen so that they don't conflict with real filter types
+ * below, in case they are supplied instead of the #defined constants.
+ * These values should NOT be changed.
+ */
+#define PNG_NO_FILTERS     0x00
+#define PNG_FILTER_NONE    0x08
+#define PNG_FILTER_SUB     0x10
+#define PNG_FILTER_UP      0x20
+#define PNG_FILTER_AVG     0x40
+#define PNG_FILTER_PAETH   0x80
+#define PNG_FAST_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP)
+#define PNG_ALL_FILTERS (PNG_FAST_FILTERS | PNG_FILTER_AVG | PNG_FILTER_PAETH)
+
+/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
+ * These defines should NOT be changed.
+ */
+#define PNG_FILTER_VALUE_NONE  0
+#define PNG_FILTER_VALUE_SUB   1
+#define PNG_FILTER_VALUE_UP    2
+#define PNG_FILTER_VALUE_AVG   3
+#define PNG_FILTER_VALUE_PAETH 4
+#define PNG_FILTER_VALUE_LAST  5
+
+#ifdef PNG_WRITE_SUPPORTED
+#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */
+PNG_FP_EXPORT(68, void, png_set_filter_heuristics, (png_structrp png_ptr,
+    int heuristic_method, int num_weights, png_const_doublep filter_weights,
+    png_const_doublep filter_costs))
+PNG_FIXED_EXPORT(209, void, png_set_filter_heuristics_fixed,
+    (png_structrp png_ptr, int heuristic_method, int num_weights,
+    png_const_fixed_point_p filter_weights,
+    png_const_fixed_point_p filter_costs))
+#endif /* WRITE_WEIGHTED_FILTER */
+
+/* The following are no longer used and will be removed from libpng-1.7: */
+#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
+#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
+#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
+#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
+
+/* Set the library compression level.  Currently, valid values range from
+ * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
+ * (0 - no compression, 9 - "maximal" compression).  Note that tests have
+ * shown that zlib compression levels 3-6 usually perform as well as level 9
+ * for PNG images, and do considerably fewer calculations.  In the future,
+ * these values may not correspond directly to the zlib compression levels.
+ */
+#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
+PNG_EXPORT(69, void, png_set_compression_level, (png_structrp png_ptr,
+    int level));
+
+PNG_EXPORT(70, void, png_set_compression_mem_level, (png_structrp png_ptr,
+    int mem_level));
+
+PNG_EXPORT(71, void, png_set_compression_strategy, (png_structrp png_ptr,
+    int strategy));
+
+/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
+ * smaller value of window_bits if it can do so safely.
+ */
+PNG_EXPORT(72, void, png_set_compression_window_bits, (png_structrp png_ptr,
+    int window_bits));
+
+PNG_EXPORT(73, void, png_set_compression_method, (png_structrp png_ptr,
+    int method));
+#endif /* WRITE_CUSTOMIZE_COMPRESSION */
+
+#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
+/* Also set zlib parameters for compressing non-IDAT chunks */
+PNG_EXPORT(222, void, png_set_text_compression_level, (png_structrp png_ptr,
+    int level));
+
+PNG_EXPORT(223, void, png_set_text_compression_mem_level, (png_structrp png_ptr,
+    int mem_level));
+
+PNG_EXPORT(224, void, png_set_text_compression_strategy, (png_structrp png_ptr,
+    int strategy));
+
+/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
+ * smaller value of window_bits if it can do so safely.
+ */
+PNG_EXPORT(225, void, png_set_text_compression_window_bits,
+    (png_structrp png_ptr, int window_bits));
+
+PNG_EXPORT(226, void, png_set_text_compression_method, (png_structrp png_ptr,
+    int method));
+#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */
+#endif /* WRITE */
+
+/* These next functions are called for input/output, memory, and error
+ * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
+ * and call standard C I/O routines such as fread(), fwrite(), and
+ * fprintf().  These functions can be made to use other I/O routines
+ * at run time for those applications that need to handle I/O in a
+ * different manner by calling png_set_???_fn().  See libpng-manual.txt for
+ * more information.
+ */
+
+#ifdef PNG_STDIO_SUPPORTED
+/* Initialize the input/output for the PNG file to the default functions. */
+PNG_EXPORT(74, void, png_init_io, (png_structrp png_ptr, png_FILE_p fp));
+#endif
+
+/* Replace the (error and abort), and warning functions with user
+ * supplied functions.  If no messages are to be printed you must still
+ * write and use replacement functions. The replacement error_fn should
+ * still do a longjmp to the last setjmp location if you are using this
+ * method of error handling.  If error_fn or warning_fn is NULL, the
+ * default function will be used.
+ */
+
+PNG_EXPORT(75, void, png_set_error_fn, (png_structrp png_ptr,
+    png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn));
+
+/* Return the user pointer associated with the error functions */
+PNG_EXPORT(76, png_voidp, png_get_error_ptr, (png_const_structrp png_ptr));
+
+/* Replace the default data output functions with a user supplied one(s).
+ * If buffered output is not used, then output_flush_fn can be set to NULL.
+ * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
+ * output_flush_fn will be ignored (and thus can be NULL).
+ * It is probably a mistake to use NULL for output_flush_fn if
+ * write_data_fn is not also NULL unless you have built libpng with
+ * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's
+ * default flush function, which uses the standard *FILE structure, will
+ * be used.
+ */
+PNG_EXPORT(77, void, png_set_write_fn, (png_structrp png_ptr, png_voidp io_ptr,
+    png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
+
+/* Replace the default data input function with a user supplied one. */
+PNG_EXPORT(78, void, png_set_read_fn, (png_structrp png_ptr, png_voidp io_ptr,
+    png_rw_ptr read_data_fn));
+
+/* Return the user pointer associated with the I/O functions */
+PNG_EXPORT(79, png_voidp, png_get_io_ptr, (png_const_structrp png_ptr));
+
+PNG_EXPORT(80, void, png_set_read_status_fn, (png_structrp png_ptr,
+    png_read_status_ptr read_row_fn));
+
+PNG_EXPORT(81, void, png_set_write_status_fn, (png_structrp png_ptr,
+    png_write_status_ptr write_row_fn));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* Replace the default memory allocation functions with user supplied one(s). */
+PNG_EXPORT(82, void, png_set_mem_fn, (png_structrp png_ptr, png_voidp mem_ptr,
+    png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+/* Return the user pointer associated with the memory functions */
+PNG_EXPORT(83, png_voidp, png_get_mem_ptr, (png_const_structrp png_ptr));
+#endif
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+PNG_EXPORT(84, void, png_set_read_user_transform_fn, (png_structrp png_ptr,
+    png_user_transform_ptr read_user_transform_fn));
+#endif
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+PNG_EXPORT(85, void, png_set_write_user_transform_fn, (png_structrp png_ptr,
+    png_user_transform_ptr write_user_transform_fn));
+#endif
+
+#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
+PNG_EXPORT(86, void, png_set_user_transform_info, (png_structrp png_ptr,
+    png_voidp user_transform_ptr, int user_transform_depth,
+    int user_transform_channels));
+/* Return the user pointer associated with the user transform functions */
+PNG_EXPORT(87, png_voidp, png_get_user_transform_ptr,
+    (png_const_structrp png_ptr));
+#endif
+
+#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED
+/* Return information about the row currently being processed.  Note that these
+ * APIs do not fail but will return unexpected results if called outside a user
+ * transform callback.  Also note that when transforming an interlaced image the
+ * row number is the row number within the sub-image of the interlace pass, so
+ * the value will increase to the height of the sub-image (not the full image)
+ * then reset to 0 for the next pass.
+ *
+ * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
+ * find the output pixel (x,y) given an interlaced sub-image pixel
+ * (row,col,pass).  (See below for these macros.)
+ */
+PNG_EXPORT(217, png_uint_32, png_get_current_row_number, (png_const_structrp));
+PNG_EXPORT(218, png_byte, png_get_current_pass_number, (png_const_structrp));
+#endif
+
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+/* This callback is called only for *unknown* chunks.  If
+ * PNG_HANDLE_AS_UNKNOWN_SUPPORTED is set then it is possible to set known
+ * chunks to be treated as unknown, however in this case the callback must do
+ * any processing required by the chunk (e.g. by calling the appropriate
+ * png_set_ APIs.)
+ *
+ * There is no write support - on write, by default, all the chunks in the
+ * 'unknown' list are written in the specified position.
+ *
+ * The integer return from the callback function is interpreted thus:
+ *
+ * negative: An error occurred; png_chunk_error will be called.
+ *     zero: The chunk was not handled, the chunk will be saved. A critical
+ *           chunk will cause an error at this point unless it is to be saved.
+ * positive: The chunk was handled, libpng will ignore/discard it.
+ *
+ * See "INTERACTION WITH USER CHUNK CALLBACKS" below for important notes about
+ * how this behavior will change in libpng 1.7
+ */
+PNG_EXPORT(88, void, png_set_read_user_chunk_fn, (png_structrp png_ptr,
+    png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+PNG_EXPORT(89, png_voidp, png_get_user_chunk_ptr, (png_const_structrp png_ptr));
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+/* Sets the function callbacks for the push reader, and a pointer to a
+ * user-defined structure available to the callback functions.
+ */
+PNG_EXPORT(90, void, png_set_progressive_read_fn, (png_structrp png_ptr,
+    png_voidp progressive_ptr, png_progressive_info_ptr info_fn,
+    png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn));
+
+/* Returns the user pointer associated with the push read functions */
+PNG_EXPORT(91, png_voidp, png_get_progressive_ptr,
+    (png_const_structrp png_ptr));
+
+/* Function to be called when data becomes available */
+PNG_EXPORT(92, void, png_process_data, (png_structrp png_ptr,
+    png_inforp info_ptr, png_bytep buffer, size_t buffer_size));
+
+/* A function which may be called *only* within png_process_data to stop the
+ * processing of any more data.  The function returns the number of bytes
+ * remaining, excluding any that libpng has cached internally.  A subsequent
+ * call to png_process_data must supply these bytes again.  If the argument
+ * 'save' is set to true the routine will first save all the pending data and
+ * will always return 0.
+ */
+PNG_EXPORT(219, size_t, png_process_data_pause, (png_structrp, int save));
+
+/* A function which may be called *only* outside (after) a call to
+ * png_process_data.  It returns the number of bytes of data to skip in the
+ * input.  Normally it will return 0, but if it returns a non-zero value the
+ * application must skip than number of bytes of input data and pass the
+ * following data to the next call to png_process_data.
+ */
+PNG_EXPORT(220, png_uint_32, png_process_data_skip, (png_structrp));
+
+/* Function that combines rows.  'new_row' is a flag that should come from
+ * the callback and be non-NULL if anything needs to be done; the library
+ * stores its own version of the new data internally and ignores the passed
+ * in value.
+ */
+PNG_EXPORT(93, void, png_progressive_combine_row, (png_const_structrp png_ptr,
+    png_bytep old_row, png_const_bytep new_row));
+#endif /* PROGRESSIVE_READ */
+
+PNG_EXPORTA(94, png_voidp, png_malloc, (png_const_structrp png_ptr,
+    png_alloc_size_t size), PNG_ALLOCATED);
+/* Added at libpng version 1.4.0 */
+PNG_EXPORTA(95, png_voidp, png_calloc, (png_const_structrp png_ptr,
+    png_alloc_size_t size), PNG_ALLOCATED);
+
+/* Added at libpng version 1.2.4 */
+PNG_EXPORTA(96, png_voidp, png_malloc_warn, (png_const_structrp png_ptr,
+    png_alloc_size_t size), PNG_ALLOCATED);
+
+/* Frees a pointer allocated by png_malloc() */
+PNG_EXPORT(97, void, png_free, (png_const_structrp png_ptr, png_voidp ptr));
+
+/* Free data that was allocated internally */
+PNG_EXPORT(98, void, png_free_data, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 free_me, int num));
+
+/* Reassign the responsibility for freeing existing data, whether allocated
+ * by libpng or by the application; this works on the png_info structure passed
+ * in, without changing the state for other png_info structures.
+ */
+PNG_EXPORT(99, void, png_data_freer, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int freer, png_uint_32 mask));
+
+/* Assignments for png_data_freer */
+#define PNG_DESTROY_WILL_FREE_DATA 1
+#define PNG_SET_WILL_FREE_DATA 1
+#define PNG_USER_WILL_FREE_DATA 2
+/* Flags for png_ptr->free_me and info_ptr->free_me */
+#define PNG_FREE_HIST 0x0008U
+#define PNG_FREE_ICCP 0x0010U
+#define PNG_FREE_SPLT 0x0020U
+#define PNG_FREE_ROWS 0x0040U
+#define PNG_FREE_PCAL 0x0080U
+#define PNG_FREE_SCAL 0x0100U
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+#  define PNG_FREE_UNKN 0x0200U
+#endif
+/*      PNG_FREE_LIST 0x0400U   removed in 1.6.0 because it is ignored */
+#define PNG_FREE_PLTE 0x1000U
+#define PNG_FREE_TRNS 0x2000U
+#define PNG_FREE_TEXT 0x4000U
+#define PNG_FREE_EXIF 0x8000U /* Added at libpng-1.6.31 */
+#define PNG_FREE_ALL  0xffffU
+#define PNG_FREE_MUL  0x4220U /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
+
+#ifdef PNG_USER_MEM_SUPPORTED
+PNG_EXPORTA(100, png_voidp, png_malloc_default, (png_const_structrp png_ptr,
+    png_alloc_size_t size), PNG_ALLOCATED PNG_DEPRECATED);
+PNG_EXPORTA(101, void, png_free_default, (png_const_structrp png_ptr,
+    png_voidp ptr), PNG_DEPRECATED);
+#endif
+
+#ifdef PNG_ERROR_TEXT_SUPPORTED
+/* Fatal error in PNG image of libpng - can't continue */
+PNG_EXPORTA(102, void, png_error, (png_const_structrp png_ptr,
+    png_const_charp error_message), PNG_NORETURN);
+
+/* The same, but the chunk name is prepended to the error string. */
+PNG_EXPORTA(103, void, png_chunk_error, (png_const_structrp png_ptr,
+    png_const_charp error_message), PNG_NORETURN);
+
+#else
+/* Fatal error in PNG image of libpng - can't continue */
+PNG_EXPORTA(104, void, png_err, (png_const_structrp png_ptr), PNG_NORETURN);
+#  define png_error(s1,s2) png_err(s1)
+#  define png_chunk_error(s1,s2) png_err(s1)
+#endif
+
+#ifdef PNG_WARNINGS_SUPPORTED
+/* Non-fatal error in libpng.  Can continue, but may have a problem. */
+PNG_EXPORT(105, void, png_warning, (png_const_structrp png_ptr,
+    png_const_charp warning_message));
+
+/* Non-fatal error in libpng, chunk name is prepended to message. */
+PNG_EXPORT(106, void, png_chunk_warning, (png_const_structrp png_ptr,
+    png_const_charp warning_message));
+#else
+#  define png_warning(s1,s2) ((void)(s1))
+#  define png_chunk_warning(s1,s2) ((void)(s1))
+#endif
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+/* Benign error in libpng.  Can continue, but may have a problem.
+ * User can choose whether to handle as a fatal error or as a warning. */
+PNG_EXPORT(107, void, png_benign_error, (png_const_structrp png_ptr,
+    png_const_charp warning_message));
+
+#ifdef PNG_READ_SUPPORTED
+/* Same, chunk name is prepended to message (only during read) */
+PNG_EXPORT(108, void, png_chunk_benign_error, (png_const_structrp png_ptr,
+    png_const_charp warning_message));
+#endif
+
+PNG_EXPORT(109, void, png_set_benign_errors,
+    (png_structrp png_ptr, int allowed));
+#else
+#  ifdef PNG_ALLOW_BENIGN_ERRORS
+#    define png_benign_error png_warning
+#    define png_chunk_benign_error png_chunk_warning
+#  else
+#    define png_benign_error png_error
+#    define png_chunk_benign_error png_chunk_error
+#  endif
+#endif
+
+/* The png_set_<chunk> functions are for storing values in the png_info_struct.
+ * Similarly, the png_get_<chunk> calls are used to read values from the
+ * png_info_struct, either storing the parameters in the passed variables, or
+ * setting pointers into the png_info_struct where the data is stored.  The
+ * png_get_<chunk> functions return a non-zero value if the data was available
+ * in info_ptr, or return zero and do not change any of the parameters if the
+ * data was not available.
+ *
+ * These functions should be used instead of directly accessing png_info
+ * to avoid problems with future changes in the size and internal layout of
+ * png_info_struct.
+ */
+/* Returns "flag" if chunk data is valid in info_ptr. */
+PNG_EXPORT(110, png_uint_32, png_get_valid, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, png_uint_32 flag));
+
+/* Returns number of bytes needed to hold a transformed row. */
+PNG_EXPORT(111, size_t, png_get_rowbytes, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+/* Returns row_pointers, which is an array of pointers to scanlines that was
+ * returned from png_read_png().
+ */
+PNG_EXPORT(112, png_bytepp, png_get_rows, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Set row_pointers, which is an array of pointers to scanlines for use
+ * by png_write_png().
+ */
+PNG_EXPORT(113, void, png_set_rows, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_bytepp row_pointers));
+#endif
+
+/* Returns number of color channels in image. */
+PNG_EXPORT(114, png_byte, png_get_channels, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* Returns image width in pixels. */
+PNG_EXPORT(115, png_uint_32, png_get_image_width, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image height in pixels. */
+PNG_EXPORT(116, png_uint_32, png_get_image_height, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image bit_depth. */
+PNG_EXPORT(117, png_byte, png_get_bit_depth, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image color_type. */
+PNG_EXPORT(118, png_byte, png_get_color_type, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image filter_type. */
+PNG_EXPORT(119, png_byte, png_get_filter_type, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image interlace_type. */
+PNG_EXPORT(120, png_byte, png_get_interlace_type, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image compression_type. */
+PNG_EXPORT(121, png_byte, png_get_compression_type, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+
+/* Returns image resolution in pixels per meter, from pHYs chunk data. */
+PNG_EXPORT(122, png_uint_32, png_get_pixels_per_meter,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(123, png_uint_32, png_get_x_pixels_per_meter,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(124, png_uint_32, png_get_y_pixels_per_meter,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+
+/* Returns pixel aspect ratio, computed from pHYs chunk data.  */
+PNG_FP_EXPORT(125, float, png_get_pixel_aspect_ratio,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr))
+PNG_FIXED_EXPORT(210, png_fixed_point, png_get_pixel_aspect_ratio_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr))
+
+/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
+PNG_EXPORT(126, png_int_32, png_get_x_offset_pixels,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(127, png_int_32, png_get_y_offset_pixels,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(128, png_int_32, png_get_x_offset_microns,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+PNG_EXPORT(129, png_int_32, png_get_y_offset_microns,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+
+#endif /* EASY_ACCESS */
+
+#ifdef PNG_READ_SUPPORTED
+/* Returns pointer to signature string read from PNG header */
+PNG_EXPORT(130, png_const_bytep, png_get_signature, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr));
+#endif
+
+#ifdef PNG_bKGD_SUPPORTED
+PNG_EXPORT(131, png_uint_32, png_get_bKGD, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_color_16p *background));
+#endif
+
+#ifdef PNG_bKGD_SUPPORTED
+PNG_EXPORT(132, void, png_set_bKGD, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_color_16p background));
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+PNG_FP_EXPORT(133, png_uint_32, png_get_cHRM, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, double *white_x, double *white_y, double *red_x,
+    double *red_y, double *green_x, double *green_y, double *blue_x,
+    double *blue_y))
+PNG_FP_EXPORT(230, png_uint_32, png_get_cHRM_XYZ, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, double *red_X, double *red_Y, double *red_Z,
+    double *green_X, double *green_Y, double *green_Z, double *blue_X,
+    double *blue_Y, double *blue_Z))
+PNG_FIXED_EXPORT(134, png_uint_32, png_get_cHRM_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *int_white_x, png_fixed_point *int_white_y,
+    png_fixed_point *int_red_x, png_fixed_point *int_red_y,
+    png_fixed_point *int_green_x, png_fixed_point *int_green_y,
+    png_fixed_point *int_blue_x, png_fixed_point *int_blue_y))
+PNG_FIXED_EXPORT(231, png_uint_32, png_get_cHRM_XYZ_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
+    png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
+    png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
+    png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
+    png_fixed_point *int_blue_Z))
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+PNG_FP_EXPORT(135, void, png_set_cHRM, (png_const_structrp png_ptr,
+    png_inforp info_ptr,
+    double white_x, double white_y, double red_x, double red_y, double green_x,
+    double green_y, double blue_x, double blue_y))
+PNG_FP_EXPORT(232, void, png_set_cHRM_XYZ, (png_const_structrp png_ptr,
+    png_inforp info_ptr, double red_X, double red_Y, double red_Z,
+    double green_X, double green_Y, double green_Z, double blue_X,
+    double blue_Y, double blue_Z))
+PNG_FIXED_EXPORT(136, void, png_set_cHRM_fixed, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_fixed_point int_white_x,
+    png_fixed_point int_white_y, png_fixed_point int_red_x,
+    png_fixed_point int_red_y, png_fixed_point int_green_x,
+    png_fixed_point int_green_y, png_fixed_point int_blue_x,
+    png_fixed_point int_blue_y))
+PNG_FIXED_EXPORT(233, void, png_set_cHRM_XYZ_fixed, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_fixed_point int_red_X, png_fixed_point int_red_Y,
+    png_fixed_point int_red_Z, png_fixed_point int_green_X,
+    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
+    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
+    png_fixed_point int_blue_Z))
+#endif
+
+#ifdef PNG_eXIf_SUPPORTED
+PNG_EXPORT(246, png_uint_32, png_get_eXIf, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_bytep *exif));
+PNG_EXPORT(247, void, png_set_eXIf, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_bytep exif));
+
+PNG_EXPORT(248, png_uint_32, png_get_eXIf_1, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, png_uint_32 *num_exif, png_bytep *exif));
+PNG_EXPORT(249, void, png_set_eXIf_1, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 num_exif, png_bytep exif));
+#endif
+
+#ifdef PNG_gAMA_SUPPORTED
+PNG_FP_EXPORT(137, png_uint_32, png_get_gAMA, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, double *file_gamma))
+PNG_FIXED_EXPORT(138, png_uint_32, png_get_gAMA_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *int_file_gamma))
+#endif
+
+#ifdef PNG_gAMA_SUPPORTED
+PNG_FP_EXPORT(139, void, png_set_gAMA, (png_const_structrp png_ptr,
+    png_inforp info_ptr, double file_gamma))
+PNG_FIXED_EXPORT(140, void, png_set_gAMA_fixed, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_fixed_point int_file_gamma))
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+PNG_EXPORT(141, png_uint_32, png_get_hIST, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_uint_16p *hist));
+PNG_EXPORT(142, void, png_set_hIST, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_uint_16p hist));
+#endif
+
+PNG_EXPORT(143, png_uint_32, png_get_IHDR, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, png_uint_32 *width, png_uint_32 *height,
+    int *bit_depth, int *color_type, int *interlace_method,
+    int *compression_method, int *filter_method));
+
+PNG_EXPORT(144, void, png_set_IHDR, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth,
+    int color_type, int interlace_method, int compression_method,
+    int filter_method));
+
+#ifdef PNG_oFFs_SUPPORTED
+PNG_EXPORT(145, png_uint_32, png_get_oFFs, (png_const_structrp png_ptr,
+   png_const_inforp info_ptr, png_int_32 *offset_x, png_int_32 *offset_y,
+   int *unit_type));
+#endif
+
+#ifdef PNG_oFFs_SUPPORTED
+PNG_EXPORT(146, void, png_set_oFFs, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_int_32 offset_x, png_int_32 offset_y,
+    int unit_type));
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+PNG_EXPORT(147, png_uint_32, png_get_pCAL, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_charp *purpose, png_int_32 *X0,
+    png_int_32 *X1, int *type, int *nparams, png_charp *units,
+    png_charpp *params));
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+PNG_EXPORT(148, void, png_set_pCAL, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_charp purpose, png_int_32 X0, png_int_32 X1,
+    int type, int nparams, png_const_charp units, png_charpp params));
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+PNG_EXPORT(149, png_uint_32, png_get_pHYs, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y,
+    int *unit_type));
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+PNG_EXPORT(150, void, png_set_pHYs, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type));
+#endif
+
+PNG_EXPORT(151, png_uint_32, png_get_PLTE, (png_const_structrp png_ptr,
+   png_inforp info_ptr, png_colorp *palette, int *num_palette));
+
+PNG_EXPORT(152, void, png_set_PLTE, (png_structrp png_ptr,
+    png_inforp info_ptr, png_const_colorp palette, int num_palette));
+
+#ifdef PNG_sBIT_SUPPORTED
+PNG_EXPORT(153, png_uint_32, png_get_sBIT, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_color_8p *sig_bit));
+#endif
+
+#ifdef PNG_sBIT_SUPPORTED
+PNG_EXPORT(154, void, png_set_sBIT, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_color_8p sig_bit));
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+PNG_EXPORT(155, png_uint_32, png_get_sRGB, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, int *file_srgb_intent));
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+PNG_EXPORT(156, void, png_set_sRGB, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int srgb_intent));
+PNG_EXPORT(157, void, png_set_sRGB_gAMA_and_cHRM, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int srgb_intent));
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+PNG_EXPORT(158, png_uint_32, png_get_iCCP, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_charpp name, int *compression_type,
+    png_bytepp profile, png_uint_32 *proflen));
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+PNG_EXPORT(159, void, png_set_iCCP, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_charp name, int compression_type,
+    png_const_bytep profile, png_uint_32 proflen));
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+PNG_EXPORT(160, int, png_get_sPLT, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_sPLT_tpp entries));
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+PNG_EXPORT(161, void, png_set_sPLT, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_sPLT_tp entries, int nentries));
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_get_text also returns the number of text chunks in *num_text */
+PNG_EXPORT(162, int, png_get_text, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_textp *text_ptr, int *num_text));
+#endif
+
+/* Note while png_set_text() will accept a structure whose text,
+ * language, and  translated keywords are NULL pointers, the structure
+ * returned by png_get_text will always contain regular
+ * zero-terminated C strings.  They might be empty strings but
+ * they will never be NULL pointers.
+ */
+
+#ifdef PNG_TEXT_SUPPORTED
+PNG_EXPORT(163, void, png_set_text, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_textp text_ptr, int num_text));
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+PNG_EXPORT(164, png_uint_32, png_get_tIME, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_timep *mod_time));
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+PNG_EXPORT(165, void, png_set_tIME, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_timep mod_time));
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+PNG_EXPORT(166, png_uint_32, png_get_tRNS, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_bytep *trans_alpha, int *num_trans,
+    png_color_16p *trans_color));
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+PNG_EXPORT(167, void, png_set_tRNS, (png_structrp png_ptr,
+    png_inforp info_ptr, png_const_bytep trans_alpha, int num_trans,
+    png_const_color_16p trans_color));
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+PNG_FP_EXPORT(168, png_uint_32, png_get_sCAL, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, int *unit, double *width, double *height))
+#if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \
+   defined(PNG_FLOATING_POINT_SUPPORTED)
+/* NOTE: this API is currently implemented using floating point arithmetic,
+ * consequently it can only be used on systems with floating point support.
+ * In any case the range of values supported by png_fixed_point is small and it
+ * is highly recommended that png_get_sCAL_s be used instead.
+ */
+PNG_FIXED_EXPORT(214, png_uint_32, png_get_sCAL_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit,
+    png_fixed_point *width, png_fixed_point *height))
+#endif
+PNG_EXPORT(169, png_uint_32, png_get_sCAL_s,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit,
+    png_charpp swidth, png_charpp sheight));
+
+PNG_FP_EXPORT(170, void, png_set_sCAL, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int unit, double width, double height))
+PNG_FIXED_EXPORT(213, void, png_set_sCAL_fixed, (png_const_structrp png_ptr,
+   png_inforp info_ptr, int unit, png_fixed_point width,
+   png_fixed_point height))
+PNG_EXPORT(171, void, png_set_sCAL_s, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int unit,
+    png_const_charp swidth, png_const_charp sheight));
+#endif /* sCAL */
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+/* Provide the default handling for all unknown chunks or, optionally, for
+ * specific unknown chunks.
+ *
+ * NOTE: prior to 1.6.0 the handling specified for particular chunks on read was
+ * ignored and the default was used, the per-chunk setting only had an effect on
+ * write.  If you wish to have chunk-specific handling on read in code that must
+ * work on earlier versions you must use a user chunk callback to specify the
+ * desired handling (keep or discard.)
+ *
+ * The 'keep' parameter is a PNG_HANDLE_CHUNK_ value as listed below.  The
+ * parameter is interpreted as follows:
+ *
+ * READ:
+ *    PNG_HANDLE_CHUNK_AS_DEFAULT:
+ *       Known chunks: do normal libpng processing, do not keep the chunk (but
+ *          see the comments below about PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
+ *       Unknown chunks: for a specific chunk use the global default, when used
+ *          as the default discard the chunk data.
+ *    PNG_HANDLE_CHUNK_NEVER:
+ *       Discard the chunk data.
+ *    PNG_HANDLE_CHUNK_IF_SAFE:
+ *       Keep the chunk data if the chunk is not critical else raise a chunk
+ *       error.
+ *    PNG_HANDLE_CHUNK_ALWAYS:
+ *       Keep the chunk data.
+ *
+ * If the chunk data is saved it can be retrieved using png_get_unknown_chunks,
+ * below.  Notice that specifying "AS_DEFAULT" as a global default is equivalent
+ * to specifying "NEVER", however when "AS_DEFAULT" is used for specific chunks
+ * it simply resets the behavior to the libpng default.
+ *
+ * INTERACTION WITH USER CHUNK CALLBACKS:
+ * The per-chunk handling is always used when there is a png_user_chunk_ptr
+ * callback and the callback returns 0; the chunk is then always stored *unless*
+ * it is critical and the per-chunk setting is other than ALWAYS.  Notice that
+ * the global default is *not* used in this case.  (In effect the per-chunk
+ * value is incremented to at least IF_SAFE.)
+ *
+ * IMPORTANT NOTE: this behavior will change in libpng 1.7 - the global and
+ * per-chunk defaults will be honored.  If you want to preserve the current
+ * behavior when your callback returns 0 you must set PNG_HANDLE_CHUNK_IF_SAFE
+ * as the default - if you don't do this libpng 1.6 will issue a warning.
+ *
+ * If you want unhandled unknown chunks to be discarded in libpng 1.6 and
+ * earlier simply return '1' (handled).
+ *
+ * PNG_HANDLE_AS_UNKNOWN_SUPPORTED:
+ *    If this is *not* set known chunks will always be handled by libpng and
+ *    will never be stored in the unknown chunk list.  Known chunks listed to
+ *    png_set_keep_unknown_chunks will have no effect.  If it is set then known
+ *    chunks listed with a keep other than AS_DEFAULT will *never* be processed
+ *    by libpng, in addition critical chunks must either be processed by the
+ *    callback or saved.
+ *
+ *    The IHDR and IEND chunks must not be listed.  Because this turns off the
+ *    default handling for chunks that would otherwise be recognized the
+ *    behavior of libpng transformations may well become incorrect!
+ *
+ * WRITE:
+ *    When writing chunks the options only apply to the chunks specified by
+ *    png_set_unknown_chunks (below), libpng will *always* write known chunks
+ *    required by png_set_ calls and will always write the core critical chunks
+ *    (as required for PLTE).
+ *
+ *    Each chunk in the png_set_unknown_chunks list is looked up in the
+ *    png_set_keep_unknown_chunks list to find the keep setting, this is then
+ *    interpreted as follows:
+ *
+ *    PNG_HANDLE_CHUNK_AS_DEFAULT:
+ *       Write safe-to-copy chunks and write other chunks if the global
+ *       default is set to _ALWAYS, otherwise don't write this chunk.
+ *    PNG_HANDLE_CHUNK_NEVER:
+ *       Do not write the chunk.
+ *    PNG_HANDLE_CHUNK_IF_SAFE:
+ *       Write the chunk if it is safe-to-copy, otherwise do not write it.
+ *    PNG_HANDLE_CHUNK_ALWAYS:
+ *       Write the chunk.
+ *
+ * Note that the default behavior is effectively the opposite of the read case -
+ * in read unknown chunks are not stored by default, in write they are written
+ * by default.  Also the behavior of PNG_HANDLE_CHUNK_IF_SAFE is very different
+ * - on write the safe-to-copy bit is checked, on read the critical bit is
+ * checked and on read if the chunk is critical an error will be raised.
+ *
+ * num_chunks:
+ * ===========
+ *    If num_chunks is positive, then the "keep" parameter specifies the manner
+ *    for handling only those chunks appearing in the chunk_list array,
+ *    otherwise the chunk list array is ignored.
+ *
+ *    If num_chunks is 0 the "keep" parameter specifies the default behavior for
+ *    unknown chunks, as described above.
+ *
+ *    If num_chunks is negative, then the "keep" parameter specifies the manner
+ *    for handling all unknown chunks plus all chunks recognized by libpng
+ *    except for the IHDR, PLTE, tRNS, IDAT, and IEND chunks (which continue to
+ *    be processed by libpng.
+ */
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+PNG_EXPORT(172, void, png_set_keep_unknown_chunks, (png_structrp png_ptr,
+    int keep, png_const_bytep chunk_list, int num_chunks));
+#endif /* HANDLE_AS_UNKNOWN */
+
+/* The "keep" PNG_HANDLE_CHUNK_ parameter for the specified chunk is returned;
+ * the result is therefore true (non-zero) if special handling is required,
+ * false for the default handling.
+ */
+PNG_EXPORT(173, int, png_handle_as_unknown, (png_const_structrp png_ptr,
+    png_const_bytep chunk_name));
+#endif /* SET_UNKNOWN_CHUNKS */
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+PNG_EXPORT(174, void, png_set_unknown_chunks, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_unknown_chunkp unknowns,
+    int num_unknowns));
+   /* NOTE: prior to 1.6.0 this routine set the 'location' field of the added
+    * unknowns to the location currently stored in the png_struct.  This is
+    * invariably the wrong value on write.  To fix this call the following API
+    * for each chunk in the list with the correct location.  If you know your
+    * code won't be compiled on earlier versions you can rely on
+    * png_set_unknown_chunks(write-ptr, png_get_unknown_chunks(read-ptr)) doing
+    * the correct thing.
+    */
+
+PNG_EXPORT(175, void, png_set_unknown_chunk_location,
+    (png_const_structrp png_ptr, png_inforp info_ptr, int chunk, int location));
+
+PNG_EXPORT(176, int, png_get_unknown_chunks, (png_const_structrp png_ptr,
+    png_inforp info_ptr, png_unknown_chunkpp entries));
+#endif
+
+/* Png_free_data() will turn off the "valid" flag for anything it frees.
+ * If you need to turn it off for a chunk that your application has freed,
+ * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK);
+ */
+PNG_EXPORT(177, void, png_set_invalid, (png_const_structrp png_ptr,
+    png_inforp info_ptr, int mask));
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+/* The "params" pointer is currently not used and is for future expansion. */
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+PNG_EXPORT(178, void, png_read_png, (png_structrp png_ptr, png_inforp info_ptr,
+    int transforms, png_voidp params));
+#endif
+#ifdef PNG_WRITE_SUPPORTED
+PNG_EXPORT(179, void, png_write_png, (png_structrp png_ptr, png_inforp info_ptr,
+    int transforms, png_voidp params));
+#endif
+#endif
+
+PNG_EXPORT(180, png_const_charp, png_get_copyright,
+    (png_const_structrp png_ptr));
+PNG_EXPORT(181, png_const_charp, png_get_header_ver,
+    (png_const_structrp png_ptr));
+PNG_EXPORT(182, png_const_charp, png_get_header_version,
+    (png_const_structrp png_ptr));
+PNG_EXPORT(183, png_const_charp, png_get_libpng_ver,
+    (png_const_structrp png_ptr));
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+PNG_EXPORT(184, png_uint_32, png_permit_mng_features, (png_structrp png_ptr,
+    png_uint_32 mng_features_permitted));
+#endif
+
+/* For use in png_set_keep_unknown, added to version 1.2.6 */
+#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
+#define PNG_HANDLE_CHUNK_NEVER        1
+#define PNG_HANDLE_CHUNK_IF_SAFE      2
+#define PNG_HANDLE_CHUNK_ALWAYS       3
+#define PNG_HANDLE_CHUNK_LAST         4
+
+/* Strip the prepended error numbers ("#nnn ") from error and warning
+ * messages before passing them to the error or warning handler.
+ */
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+PNG_EXPORT(185, void, png_set_strip_error_numbers, (png_structrp png_ptr,
+    png_uint_32 strip_mode));
+#endif
+
+/* Added in libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+PNG_EXPORT(186, void, png_set_user_limits, (png_structrp png_ptr,
+    png_uint_32 user_width_max, png_uint_32 user_height_max));
+PNG_EXPORT(187, png_uint_32, png_get_user_width_max,
+    (png_const_structrp png_ptr));
+PNG_EXPORT(188, png_uint_32, png_get_user_height_max,
+    (png_const_structrp png_ptr));
+/* Added in libpng-1.4.0 */
+PNG_EXPORT(189, void, png_set_chunk_cache_max, (png_structrp png_ptr,
+    png_uint_32 user_chunk_cache_max));
+PNG_EXPORT(190, png_uint_32, png_get_chunk_cache_max,
+    (png_const_structrp png_ptr));
+/* Added in libpng-1.4.1 */
+PNG_EXPORT(191, void, png_set_chunk_malloc_max, (png_structrp png_ptr,
+    png_alloc_size_t user_chunk_cache_max));
+PNG_EXPORT(192, png_alloc_size_t, png_get_chunk_malloc_max,
+    (png_const_structrp png_ptr));
+#endif
+
+#if defined(PNG_INCH_CONVERSIONS_SUPPORTED)
+PNG_EXPORT(193, png_uint_32, png_get_pixels_per_inch,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+
+PNG_EXPORT(194, png_uint_32, png_get_x_pixels_per_inch,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+
+PNG_EXPORT(195, png_uint_32, png_get_y_pixels_per_inch,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr));
+
+PNG_FP_EXPORT(196, float, png_get_x_offset_inches,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr))
+#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
+PNG_FIXED_EXPORT(211, png_fixed_point, png_get_x_offset_inches_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr))
+#endif
+
+PNG_FP_EXPORT(197, float, png_get_y_offset_inches, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr))
+#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
+PNG_FIXED_EXPORT(212, png_fixed_point, png_get_y_offset_inches_fixed,
+    (png_const_structrp png_ptr, png_const_inforp info_ptr))
+#endif
+
+#  ifdef PNG_pHYs_SUPPORTED
+PNG_EXPORT(198, png_uint_32, png_get_pHYs_dpi, (png_const_structrp png_ptr,
+    png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y,
+    int *unit_type));
+#  endif /* pHYs */
+#endif  /* INCH_CONVERSIONS */
+
+/* Added in libpng-1.4.0 */
+#ifdef PNG_IO_STATE_SUPPORTED
+PNG_EXPORT(199, png_uint_32, png_get_io_state, (png_const_structrp png_ptr));
+
+/* Removed from libpng 1.6; use png_get_io_chunk_type. */
+PNG_REMOVED(200, png_const_bytep, png_get_io_chunk_name, (png_structrp png_ptr),
+    PNG_DEPRECATED)
+
+PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type,
+    (png_const_structrp png_ptr));
+
+/* The flags returned by png_get_io_state() are the following: */
+#  define PNG_IO_NONE        0x0000   /* no I/O at this moment */
+#  define PNG_IO_READING     0x0001   /* currently reading */
+#  define PNG_IO_WRITING     0x0002   /* currently writing */
+#  define PNG_IO_SIGNATURE   0x0010   /* currently at the file signature */
+#  define PNG_IO_CHUNK_HDR   0x0020   /* currently at the chunk header */
+#  define PNG_IO_CHUNK_DATA  0x0040   /* currently at the chunk data */
+#  define PNG_IO_CHUNK_CRC   0x0080   /* currently at the chunk crc */
+#  define PNG_IO_MASK_OP     0x000f   /* current operation: reading/writing */
+#  define PNG_IO_MASK_LOC    0x00f0   /* current location: sig/hdr/data/crc */
+#endif /* IO_STATE */
+
+/* Interlace support.  The following macros are always defined so that if
+ * libpng interlace handling is turned off the macros may be used to handle
+ * interlaced images within the application.
+ */
+#define PNG_INTERLACE_ADAM7_PASSES 7
+
+/* Two macros to return the first row and first column of the original,
+ * full, image which appears in a given pass.  'pass' is in the range 0
+ * to 6 and the result is in the range 0 to 7.
+ */
+#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7)
+#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7)
+
+/* A macro to return the offset between pixels in the output row for a pair of
+ * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that
+ * follows.  Note that ROW_OFFSET is the offset from one row to the next whereas
+ * COL_OFFSET is from one column to the next, within a row.
+ */
+#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8)
+#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1))
+
+/* Two macros to help evaluate the number of rows or columns in each
+ * pass.  This is expressed as a shift - effectively log2 of the number or
+ * rows or columns in each 8x8 tile of the original image.
+ */
+#define PNG_PASS_ROW_SHIFT(pass) ((pass)>2?(8-(pass))>>1:3)
+#define PNG_PASS_COL_SHIFT(pass) ((pass)>1?(7-(pass))>>1:3)
+
+/* Hence two macros to determine the number of rows or columns in a given
+ * pass of an image given its height or width.  In fact these macros may
+ * return non-zero even though the sub-image is empty, because the other
+ * dimension may be empty for a small image.
+ */
+#define PNG_PASS_ROWS(height, pass) (((height)+(((1<<PNG_PASS_ROW_SHIFT(pass))\
+   -1)-PNG_PASS_START_ROW(pass)))>>PNG_PASS_ROW_SHIFT(pass))
+#define PNG_PASS_COLS(width, pass) (((width)+(((1<<PNG_PASS_COL_SHIFT(pass))\
+   -1)-PNG_PASS_START_COL(pass)))>>PNG_PASS_COL_SHIFT(pass))
+
+/* For the reader row callbacks (both progressive and sequential) it is
+ * necessary to find the row in the output image given a row in an interlaced
+ * image, so two more macros:
+ */
+#define PNG_ROW_FROM_PASS_ROW(y_in, pass) \
+   (((y_in)<<PNG_PASS_ROW_SHIFT(pass))+PNG_PASS_START_ROW(pass))
+#define PNG_COL_FROM_PASS_COL(x_in, pass) \
+   (((x_in)<<PNG_PASS_COL_SHIFT(pass))+PNG_PASS_START_COL(pass))
+
+/* Two macros which return a boolean (0 or 1) saying whether the given row
+ * or column is in a particular pass.  These use a common utility macro that
+ * returns a mask for a given pass - the offset 'off' selects the row or
+ * column version.  The mask has the appropriate bit set for each column in
+ * the tile.
+ */
+#define PNG_PASS_MASK(pass,off) ( \
+   ((0x110145AF>>(((7-(off))-(pass))<<2)) & 0xF) | \
+   ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0))
+
+#define PNG_ROW_IN_INTERLACE_PASS(y, pass) \
+   ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1)
+#define PNG_COL_IN_INTERLACE_PASS(x, pass) \
+   ((PNG_PASS_MASK(pass,1) >> ((x)&7)) & 1)
+
+#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
+/* With these routines we avoid an integer divide, which will be slower on
+ * most machines.  However, it does take more operations than the corresponding
+ * divide method, so it may be slower on a few RISC systems.  There are two
+ * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
+ *
+ * Note that the rounding factors are NOT supposed to be the same!  128 and
+ * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
+ * standard method.
+ *
+ * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
+ */
+
+ /* fg and bg should be in `gamma 1.0' space; alpha is the opacity */
+
+#  define png_composite(composite, fg, alpha, bg)        \
+   {                                                     \
+      png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \
+          * (png_uint_16)(alpha)                         \
+          + (png_uint_16)(bg)*(png_uint_16)(255          \
+          - (png_uint_16)(alpha)) + 128);                \
+      (composite) = (png_byte)(((temp + (temp >> 8)) >> 8) & 0xff); \
+   }
+
+#  define png_composite_16(composite, fg, alpha, bg)     \
+   {                                                     \
+      png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) \
+          * (png_uint_32)(alpha)                         \
+          + (png_uint_32)(bg)*(65535                     \
+          - (png_uint_32)(alpha)) + 32768);              \
+      (composite) = (png_uint_16)(0xffff & ((temp + (temp >> 16)) >> 16)); \
+   }
+
+#else  /* Standard method using integer division */
+
+#  define png_composite(composite, fg, alpha, bg)                      \
+   (composite) =                                                       \
+       (png_byte)(0xff & (((png_uint_16)(fg) * (png_uint_16)(alpha) +  \
+       (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) + \
+       127) / 255))
+
+#  define png_composite_16(composite, fg, alpha, bg)                       \
+   (composite) =                                                           \
+       (png_uint_16)(0xffff & (((png_uint_32)(fg) * (png_uint_32)(alpha) + \
+       (png_uint_32)(bg)*(png_uint_32)(65535 - (png_uint_32)(alpha)) +     \
+       32767) / 65535))
+#endif /* READ_COMPOSITE_NODIV */
+
+#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
+PNG_EXPORT(201, png_uint_32, png_get_uint_32, (png_const_bytep buf));
+PNG_EXPORT(202, png_uint_16, png_get_uint_16, (png_const_bytep buf));
+PNG_EXPORT(203, png_int_32, png_get_int_32, (png_const_bytep buf));
+#endif
+
+PNG_EXPORT(204, png_uint_32, png_get_uint_31, (png_const_structrp png_ptr,
+    png_const_bytep buf));
+/* No png_get_int_16 -- may be added if there's a real need for it. */
+
+/* Place a 32-bit number into a buffer in PNG byte order (big-endian). */
+#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
+PNG_EXPORT(205, void, png_save_uint_32, (png_bytep buf, png_uint_32 i));
+#endif
+#ifdef PNG_SAVE_INT_32_SUPPORTED
+PNG_EXPORT(206, void, png_save_int_32, (png_bytep buf, png_int_32 i));
+#endif
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
+PNG_EXPORT(207, void, png_save_uint_16, (png_bytep buf, unsigned int i));
+/* No png_save_int_16 -- may be added if there's a real need for it. */
+#endif
+
+#ifdef PNG_USE_READ_MACROS
+/* Inline macros to do direct reads of bytes from the input buffer.
+ * The png_get_int_32() routine assumes we are using two's complement
+ * format for negative values, which is almost certainly true.
+ */
+#  define PNG_get_uint_32(buf) \
+   (((png_uint_32)(*(buf)) << 24) + \
+    ((png_uint_32)(*((buf) + 1)) << 16) + \
+    ((png_uint_32)(*((buf) + 2)) << 8) + \
+    ((png_uint_32)(*((buf) + 3))))
+
+   /* From libpng-1.4.0 until 1.4.4, the png_get_uint_16 macro (but not the
+    * function) incorrectly returned a value of type png_uint_32.
+    */
+#  define PNG_get_uint_16(buf) \
+   ((png_uint_16) \
+    (((unsigned int)(*(buf)) << 8) + \
+    ((unsigned int)(*((buf) + 1)))))
+
+#  define PNG_get_int_32(buf) \
+   ((png_int_32)((*(buf) & 0x80) \
+    ? -((png_int_32)(((png_get_uint_32(buf)^0xffffffffU)+1U)&0x7fffffffU)) \
+    : (png_int_32)png_get_uint_32(buf)))
+
+/* If PNG_PREFIX is defined the same thing as below happens in pnglibconf.h,
+ * but defining a macro name prefixed with PNG_PREFIX.
+ */
+#  ifndef PNG_PREFIX
+#    define png_get_uint_32(buf) PNG_get_uint_32(buf)
+#    define png_get_uint_16(buf) PNG_get_uint_16(buf)
+#    define png_get_int_32(buf)  PNG_get_int_32(buf)
+#  endif
+#else
+#  ifdef PNG_PREFIX
+   /* No macros; revert to the (redefined) function */
+#    define PNG_get_uint_32 (png_get_uint_32)
+#    define PNG_get_uint_16 (png_get_uint_16)
+#    define PNG_get_int_32  (png_get_int_32)
+#  endif
+#endif
+
+#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
+PNG_EXPORT(242, void, png_set_check_for_invalid_index,
+    (png_structrp png_ptr, int allowed));
+#  ifdef PNG_GET_PALETTE_MAX_SUPPORTED
+PNG_EXPORT(243, int, png_get_palette_max, (png_const_structp png_ptr,
+    png_const_infop info_ptr));
+#  endif
+#endif /* CHECK_FOR_INVALID_INDEX */
+
+/*******************************************************************************
+ * Section 5: SIMPLIFIED API
+ *******************************************************************************
+ *
+ * Please read the documentation in libpng-manual.txt (TODO: write said
+ * documentation) if you don't understand what follows.
+ *
+ * The simplified API hides the details of both libpng and the PNG file format
+ * itself.  It allows PNG files to be read into a very limited number of
+ * in-memory bitmap formats or to be written from the same formats.  If these
+ * formats do not accommodate your needs then you can, and should, use the more
+ * sophisticated APIs above - these support a wide variety of in-memory formats
+ * and a wide variety of sophisticated transformations to those formats as well
+ * as a wide variety of APIs to manipulate ancillary information.
+ *
+ * To read a PNG file using the simplified API:
+ *
+ * 1) Declare a 'png_image' structure (see below) on the stack, set the
+ *    version field to PNG_IMAGE_VERSION and the 'opaque' pointer to NULL
+ *    (this is REQUIRED, your program may crash if you don't do it.)
+ * 2) Call the appropriate png_image_begin_read... function.
+ * 3) Set the png_image 'format' member to the required sample format.
+ * 4) Allocate a buffer for the image and, if required, the color-map.
+ * 5) Call png_image_finish_read to read the image and, if required, the
+ *    color-map into your buffers.
+ *
+ * There are no restrictions on the format of the PNG input itself; all valid
+ * color types, bit depths, and interlace methods are acceptable, and the
+ * input image is transformed as necessary to the requested in-memory format
+ * during the png_image_finish_read() step.  The only caveat is that if you
+ * request a color-mapped image from a PNG that is full-color or makes
+ * complex use of an alpha channel the transformation is extremely lossy and the
+ * result may look terrible.
+ *
+ * To write a PNG file using the simplified API:
+ *
+ * 1) Declare a 'png_image' structure on the stack and memset() it to all zero.
+ * 2) Initialize the members of the structure that describe the image, setting
+ *    the 'format' member to the format of the image samples.
+ * 3) Call the appropriate png_image_write... function with a pointer to the
+ *    image and, if necessary, the color-map to write the PNG data.
+ *
+ * png_image is a structure that describes the in-memory format of an image
+ * when it is being read or defines the in-memory format of an image that you
+ * need to write:
+ */
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) || \
+    defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+
+#define PNG_IMAGE_VERSION 1
+
+typedef struct png_control *png_controlp;
+typedef struct
+{
+   png_controlp opaque;    /* Initialize to NULL, free with png_image_free */
+   png_uint_32  version;   /* Set to PNG_IMAGE_VERSION */
+   png_uint_32  width;     /* Image width in pixels (columns) */
+   png_uint_32  height;    /* Image height in pixels (rows) */
+   png_uint_32  format;    /* Image format as defined below */
+   png_uint_32  flags;     /* A bit mask containing informational flags */
+   png_uint_32  colormap_entries;
+                           /* Number of entries in the color-map */
+
+   /* In the event of an error or warning the following field will be set to a
+    * non-zero value and the 'message' field will contain a '\0' terminated
+    * string with the libpng error or warning message.  If both warnings and
+    * an error were encountered, only the error is recorded.  If there
+    * are multiple warnings, only the first one is recorded.
+    *
+    * The upper 30 bits of this value are reserved, the low two bits contain
+    * a value as follows:
+    */
+#  define PNG_IMAGE_WARNING 1
+#  define PNG_IMAGE_ERROR 2
+   /*
+    * The result is a two-bit code such that a value more than 1 indicates
+    * a failure in the API just called:
+    *
+    *    0 - no warning or error
+    *    1 - warning
+    *    2 - error
+    *    3 - error preceded by warning
+    */
+#  define PNG_IMAGE_FAILED(png_cntrl) ((((png_cntrl).warning_or_error)&0x03)>1)
+
+   png_uint_32  warning_or_error;
+
+   char         message[64];
+} png_image, *png_imagep;
+
+/* The samples of the image have one to four channels whose components have
+ * original values in the range 0 to 1.0:
+ *
+ * 1: A single gray or luminance channel (G).
+ * 2: A gray/luminance channel and an alpha channel (GA).
+ * 3: Three red, green, blue color channels (RGB).
+ * 4: Three color channels and an alpha channel (RGBA).
+ *
+ * The components are encoded in one of two ways:
+ *
+ * a) As a small integer, value 0..255, contained in a single byte.  For the
+ * alpha channel the original value is simply value/255.  For the color or
+ * luminance channels the value is encoded according to the sRGB specification
+ * and matches the 8-bit format expected by typical display devices.
+ *
+ * The color/gray channels are not scaled (pre-multiplied) by the alpha
+ * channel and are suitable for passing to color management software.
+ *
+ * b) As a value in the range 0..65535, contained in a 2-byte integer.  All
+ * channels can be converted to the original value by dividing by 65535; all
+ * channels are linear.  Color channels use the RGB encoding (RGB end-points) of
+ * the sRGB specification.  This encoding is identified by the
+ * PNG_FORMAT_FLAG_LINEAR flag below.
+ *
+ * When the simplified API needs to convert between sRGB and linear colorspaces,
+ * the actual sRGB transfer curve defined in the sRGB specification (see the
+ * article at <https://en.wikipedia.org/wiki/SRGB>) is used, not the gamma=1/2.2
+ * approximation used elsewhere in libpng.
+ *
+ * When an alpha channel is present it is expected to denote pixel coverage
+ * of the color or luminance channels and is returned as an associated alpha
+ * channel: the color/gray channels are scaled (pre-multiplied) by the alpha
+ * value.
+ *
+ * The samples are either contained directly in the image data, between 1 and 8
+ * bytes per pixel according to the encoding, or are held in a color-map indexed
+ * by bytes in the image data.  In the case of a color-map the color-map entries
+ * are individual samples, encoded as above, and the image data has one byte per
+ * pixel to select the relevant sample from the color-map.
+ */
+
+/* PNG_FORMAT_*
+ *
+ * #defines to be used in png_image::format.  Each #define identifies a
+ * particular layout of sample data and, if present, alpha values.  There are
+ * separate defines for each of the two component encodings.
+ *
+ * A format is built up using single bit flag values.  All combinations are
+ * valid.  Formats can be built up from the flag values or you can use one of
+ * the predefined values below.  When testing formats always use the FORMAT_FLAG
+ * macros to test for individual features - future versions of the library may
+ * add new flags.
+ *
+ * When reading or writing color-mapped images the format should be set to the
+ * format of the entries in the color-map then png_image_{read,write}_colormap
+ * called to read or write the color-map and set the format correctly for the
+ * image data.  Do not set the PNG_FORMAT_FLAG_COLORMAP bit directly!
+ *
+ * NOTE: libpng can be built with particular features disabled. If you see
+ * compiler errors because the definition of one of the following flags has been
+ * compiled out it is because libpng does not have the required support.  It is
+ * possible, however, for the libpng configuration to enable the format on just
+ * read or just write; in that case you may see an error at run time.  You can
+ * guard against this by checking for the definition of the appropriate
+ * "_SUPPORTED" macro, one of:
+ *
+ *    PNG_SIMPLIFIED_{READ,WRITE}_{BGR,AFIRST}_SUPPORTED
+ */
+#define PNG_FORMAT_FLAG_ALPHA    0x01U /* format with an alpha channel */
+#define PNG_FORMAT_FLAG_COLOR    0x02U /* color format: otherwise grayscale */
+#define PNG_FORMAT_FLAG_LINEAR   0x04U /* 2-byte channels else 1-byte */
+#define PNG_FORMAT_FLAG_COLORMAP 0x08U /* image data is color-mapped */
+
+#ifdef PNG_FORMAT_BGR_SUPPORTED
+#  define PNG_FORMAT_FLAG_BGR    0x10U /* BGR colors, else order is RGB */
+#endif
+
+#ifdef PNG_FORMAT_AFIRST_SUPPORTED
+#  define PNG_FORMAT_FLAG_AFIRST 0x20U /* alpha channel comes first */
+#endif
+
+#define PNG_FORMAT_FLAG_ASSOCIATED_ALPHA 0x40U /* alpha channel is associated */
+
+/* Commonly used formats have predefined macros.
+ *
+ * First the single byte (sRGB) formats:
+ */
+#define PNG_FORMAT_GRAY 0
+#define PNG_FORMAT_GA   PNG_FORMAT_FLAG_ALPHA
+#define PNG_FORMAT_AG   (PNG_FORMAT_GA|PNG_FORMAT_FLAG_AFIRST)
+#define PNG_FORMAT_RGB  PNG_FORMAT_FLAG_COLOR
+#define PNG_FORMAT_BGR  (PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_BGR)
+#define PNG_FORMAT_RGBA (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_ALPHA)
+#define PNG_FORMAT_ARGB (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_AFIRST)
+#define PNG_FORMAT_BGRA (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_ALPHA)
+#define PNG_FORMAT_ABGR (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_AFIRST)
+
+/* Then the linear 2-byte formats.  When naming these "Y" is used to
+ * indicate a luminance (gray) channel.
+ */
+#define PNG_FORMAT_LINEAR_Y PNG_FORMAT_FLAG_LINEAR
+#define PNG_FORMAT_LINEAR_Y_ALPHA (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_ALPHA)
+#define PNG_FORMAT_LINEAR_RGB (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR)
+#define PNG_FORMAT_LINEAR_RGB_ALPHA \
+   (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA)
+
+/* With color-mapped formats the image data is one byte for each pixel, the byte
+ * is an index into the color-map which is formatted as above.  To obtain a
+ * color-mapped format it is sufficient just to add the PNG_FOMAT_FLAG_COLORMAP
+ * to one of the above definitions, or you can use one of the definitions below.
+ */
+#define PNG_FORMAT_RGB_COLORMAP  (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_COLORMAP)
+#define PNG_FORMAT_BGR_COLORMAP  (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_COLORMAP)
+#define PNG_FORMAT_RGBA_COLORMAP (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_COLORMAP)
+#define PNG_FORMAT_ARGB_COLORMAP (PNG_FORMAT_ARGB|PNG_FORMAT_FLAG_COLORMAP)
+#define PNG_FORMAT_BGRA_COLORMAP (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_COLORMAP)
+#define PNG_FORMAT_ABGR_COLORMAP (PNG_FORMAT_ABGR|PNG_FORMAT_FLAG_COLORMAP)
+
+/* PNG_IMAGE macros
+ *
+ * These are convenience macros to derive information from a png_image
+ * structure.  The PNG_IMAGE_SAMPLE_ macros return values appropriate to the
+ * actual image sample values - either the entries in the color-map or the
+ * pixels in the image.  The PNG_IMAGE_PIXEL_ macros return corresponding values
+ * for the pixels and will always return 1 for color-mapped formats.  The
+ * remaining macros return information about the rows in the image and the
+ * complete image.
+ *
+ * NOTE: All the macros that take a png_image::format parameter are compile time
+ * constants if the format parameter is, itself, a constant.  Therefore these
+ * macros can be used in array declarations and case labels where required.
+ * Similarly the macros are also pre-processor constants (sizeof is not used) so
+ * they can be used in #if tests.
+ *
+ * First the information about the samples.
+ */
+#define PNG_IMAGE_SAMPLE_CHANNELS(fmt)\
+   (((fmt)&(PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA))+1)
+   /* Return the total number of channels in a given format: 1..4 */
+
+#define PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt)\
+   ((((fmt) & PNG_FORMAT_FLAG_LINEAR) >> 2)+1)
+   /* Return the size in bytes of a single component of a pixel or color-map
+    * entry (as appropriate) in the image: 1 or 2.
+    */
+
+#define PNG_IMAGE_SAMPLE_SIZE(fmt)\
+   (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt))
+   /* This is the size of the sample data for one sample.  If the image is
+    * color-mapped it is the size of one color-map entry (and image pixels are
+    * one byte in size), otherwise it is the size of one image pixel.
+    */
+
+#define PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(fmt)\
+   (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * 256)
+   /* The maximum size of the color-map required by the format expressed in a
+    * count of components.  This can be used to compile-time allocate a
+    * color-map:
+    *
+    * png_uint_16 colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(linear_fmt)];
+    *
+    * png_byte colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(sRGB_fmt)];
+    *
+    * Alternatively use the PNG_IMAGE_COLORMAP_SIZE macro below to use the
+    * information from one of the png_image_begin_read_ APIs and dynamically
+    * allocate the required memory.
+    */
+
+/* Corresponding information about the pixels */
+#define PNG_IMAGE_PIXEL_(test,fmt)\
+   (((fmt)&PNG_FORMAT_FLAG_COLORMAP)?1:test(fmt))
+
+#define PNG_IMAGE_PIXEL_CHANNELS(fmt)\
+   PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_CHANNELS,fmt)
+   /* The number of separate channels (components) in a pixel; 1 for a
+    * color-mapped image.
+    */
+
+#define PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)\
+   PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_COMPONENT_SIZE,fmt)
+   /* The size, in bytes, of each component in a pixel; 1 for a color-mapped
+    * image.
+    */
+
+#define PNG_IMAGE_PIXEL_SIZE(fmt) PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_SIZE,fmt)
+   /* The size, in bytes, of a complete pixel; 1 for a color-mapped image. */
+
+/* Information about the whole row, or whole image */
+#define PNG_IMAGE_ROW_STRIDE(image)\
+   (PNG_IMAGE_PIXEL_CHANNELS((image).format) * (image).width)
+   /* Return the total number of components in a single row of the image; this
+    * is the minimum 'row stride', the minimum count of components between each
+    * row.  For a color-mapped image this is the minimum number of bytes in a
+    * row.
+    *
+    * WARNING: this macro overflows for some images with more than one component
+    * and very large image widths.  libpng will refuse to process an image where
+    * this macro would overflow.
+    */
+
+#define PNG_IMAGE_BUFFER_SIZE(image, row_stride)\
+   (PNG_IMAGE_PIXEL_COMPONENT_SIZE((image).format)*(image).height*(row_stride))
+   /* Return the size, in bytes, of an image buffer given a png_image and a row
+    * stride - the number of components to leave space for in each row.
+    *
+    * WARNING: this macro overflows a 32-bit integer for some large PNG images,
+    * libpng will refuse to process an image where such an overflow would occur.
+    */
+
+#define PNG_IMAGE_SIZE(image)\
+   PNG_IMAGE_BUFFER_SIZE(image, PNG_IMAGE_ROW_STRIDE(image))
+   /* Return the size, in bytes, of the image in memory given just a png_image;
+    * the row stride is the minimum stride required for the image.
+    */
+
+#define PNG_IMAGE_COLORMAP_SIZE(image)\
+   (PNG_IMAGE_SAMPLE_SIZE((image).format) * (image).colormap_entries)
+   /* Return the size, in bytes, of the color-map of this image.  If the image
+    * format is not a color-map format this will return a size sufficient for
+    * 256 entries in the given format; check PNG_FORMAT_FLAG_COLORMAP if
+    * you don't want to allocate a color-map in this case.
+    */
+
+/* PNG_IMAGE_FLAG_*
+ *
+ * Flags containing additional information about the image are held in the
+ * 'flags' field of png_image.
+ */
+#define PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB 0x01
+   /* This indicates that the RGB values of the in-memory bitmap do not
+    * correspond to the red, green and blue end-points defined by sRGB.
+    */
+
+#define PNG_IMAGE_FLAG_FAST 0x02
+   /* On write emphasise speed over compression; the resultant PNG file will be
+    * larger but will be produced significantly faster, particular for large
+    * images.  Do not use this option for images which will be distributed, only
+    * used it when producing intermediate files that will be read back in
+    * repeatedly.  For a typical 24-bit image the option will double the read
+    * speed at the cost of increasing the image size by 25%, however for many
+    * more compressible images the PNG file can be 10 times larger with only a
+    * slight speed gain.
+    */
+
+#define PNG_IMAGE_FLAG_16BIT_sRGB 0x04
+   /* On read if the image is a 16-bit per component image and there is no gAMA
+    * or sRGB chunk assume that the components are sRGB encoded.  Notice that
+    * images output by the simplified API always have gamma information; setting
+    * this flag only affects the interpretation of 16-bit images from an
+    * external source.  It is recommended that the application expose this flag
+    * to the user; the user can normally easily recognize the difference between
+    * linear and sRGB encoding.  This flag has no effect on write - the data
+    * passed to the write APIs must have the correct encoding (as defined
+    * above.)
+    *
+    * If the flag is not set (the default) input 16-bit per component data is
+    * assumed to be linear.
+    *
+    * NOTE: the flag can only be set after the png_image_begin_read_ call,
+    * because that call initializes the 'flags' field.
+    */
+
+#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
+/* READ APIs
+ * ---------
+ *
+ * The png_image passed to the read APIs must have been initialized by setting
+ * the png_controlp field 'opaque' to NULL (or, safer, memset the whole thing.)
+ */
+#ifdef PNG_STDIO_SUPPORTED
+PNG_EXPORT(234, int, png_image_begin_read_from_file, (png_imagep image,
+   const char *file_name));
+   /* The named file is opened for read and the image header is filled in
+    * from the PNG header in the file.
+    */
+
+PNG_EXPORT(235, int, png_image_begin_read_from_stdio, (png_imagep image,
+   FILE* file));
+   /* The PNG header is read from the stdio FILE object. */
+#endif /* STDIO */
+
+PNG_EXPORT(236, int, png_image_begin_read_from_memory, (png_imagep image,
+   png_const_voidp memory, size_t size));
+   /* The PNG header is read from the given memory buffer. */
+
+PNG_EXPORT(237, int, png_image_finish_read, (png_imagep image,
+   png_const_colorp background, void *buffer, png_int_32 row_stride,
+   void *colormap));
+   /* Finish reading the image into the supplied buffer and clean up the
+    * png_image structure.
+    *
+    * row_stride is the step, in byte or 2-byte units as appropriate,
+    * between adjacent rows.  A positive stride indicates that the top-most row
+    * is first in the buffer - the normal top-down arrangement.  A negative
+    * stride indicates that the bottom-most row is first in the buffer.
+    *
+    * background need only be supplied if an alpha channel must be removed from
+    * a png_byte format and the removal is to be done by compositing on a solid
+    * color; otherwise it may be NULL and any composition will be done directly
+    * onto the buffer.  The value is an sRGB color to use for the background,
+    * for grayscale output the green channel is used.
+    *
+    * background must be supplied when an alpha channel must be removed from a
+    * single byte color-mapped output format, in other words if:
+    *
+    * 1) The original format from png_image_begin_read_from_* had
+    *    PNG_FORMAT_FLAG_ALPHA set.
+    * 2) The format set by the application does not.
+    * 3) The format set by the application has PNG_FORMAT_FLAG_COLORMAP set and
+    *    PNG_FORMAT_FLAG_LINEAR *not* set.
+    *
+    * For linear output removing the alpha channel is always done by compositing
+    * on black and background is ignored.
+    *
+    * colormap must be supplied when PNG_FORMAT_FLAG_COLORMAP is set.  It must
+    * be at least the size (in bytes) returned by PNG_IMAGE_COLORMAP_SIZE.
+    * image->colormap_entries will be updated to the actual number of entries
+    * written to the colormap; this may be less than the original value.
+    */
+
+PNG_EXPORT(238, void, png_image_free, (png_imagep image));
+   /* Free any data allocated by libpng in image->opaque, setting the pointer to
+    * NULL.  May be called at any time after the structure is initialized.
+    */
+#endif /* SIMPLIFIED_READ */
+
+#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
+/* WRITE APIS
+ * ----------
+ * For write you must initialize a png_image structure to describe the image to
+ * be written.  To do this use memset to set the whole structure to 0 then
+ * initialize fields describing your image.
+ *
+ * version: must be set to PNG_IMAGE_VERSION
+ * opaque: must be initialized to NULL
+ * width: image width in pixels
+ * height: image height in rows
+ * format: the format of the data (image and color-map) you wish to write
+ * flags: set to 0 unless one of the defined flags applies; set
+ *    PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB for color format images where the RGB
+ *    values do not correspond to the colors in sRGB.
+ * colormap_entries: set to the number of entries in the color-map (0 to 256)
+ */
+#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
+PNG_EXPORT(239, int, png_image_write_to_file, (png_imagep image,
+   const char *file, int convert_to_8bit, const void *buffer,
+   png_int_32 row_stride, const void *colormap));
+   /* Write the image to the named file. */
+
+PNG_EXPORT(240, int, png_image_write_to_stdio, (png_imagep image, FILE *file,
+   int convert_to_8_bit, const void *buffer, png_int_32 row_stride,
+   const void *colormap));
+   /* Write the image to the given (FILE*). */
+#endif /* SIMPLIFIED_WRITE_STDIO */
+
+/* With all write APIs if image is in one of the linear formats with 16-bit
+ * data then setting convert_to_8_bit will cause the output to be an 8-bit PNG
+ * gamma encoded according to the sRGB specification, otherwise a 16-bit linear
+ * encoded PNG file is written.
+ *
+ * With color-mapped data formats the colormap parameter point to a color-map
+ * with at least image->colormap_entries encoded in the specified format.  If
+ * the format is linear the written PNG color-map will be converted to sRGB
+ * regardless of the convert_to_8_bit flag.
+ *
+ * With all APIs row_stride is handled as in the read APIs - it is the spacing
+ * from one row to the next in component sized units (1 or 2 bytes) and if
+ * negative indicates a bottom-up row layout in the buffer.  If row_stride is
+ * zero, libpng will calculate it for you from the image width and number of
+ * channels.
+ *
+ * Note that the write API does not support interlacing, sub-8-bit pixels or
+ * most ancillary chunks.  If you need to write text chunks (e.g. for copyright
+ * notices) you need to use one of the other APIs.
+ */
+
+PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory,
+   png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8_bit,
+   const void *buffer, png_int_32 row_stride, const void *colormap));
+   /* Write the image to the given memory buffer.  The function both writes the
+    * whole PNG data stream to *memory and updates *memory_bytes with the count
+    * of bytes written.
+    *
+    * 'memory' may be NULL.  In this case *memory_bytes is not read however on
+    * success the number of bytes which would have been written will still be
+    * stored in *memory_bytes.  On failure *memory_bytes will contain 0.
+    *
+    * If 'memory' is not NULL it must point to memory[*memory_bytes] of
+    * writeable memory.
+    *
+    * If the function returns success memory[*memory_bytes] (if 'memory' is not
+    * NULL) contains the written PNG data.  *memory_bytes will always be less
+    * than or equal to the original value.
+    *
+    * If the function returns false and *memory_bytes was not changed an error
+    * occurred during write.  If *memory_bytes was changed, or is not 0 if
+    * 'memory' was NULL, the write would have succeeded but for the memory
+    * buffer being too small.  *memory_bytes contains the required number of
+    * bytes and will be bigger that the original value.
+    */
+
+#define png_image_write_get_memory_size(image, size, convert_to_8_bit, buffer,\
+   row_stride, colormap)\
+   png_image_write_to_memory(&(image), 0, &(size), convert_to_8_bit, buffer,\
+         row_stride, colormap)
+   /* Return the amount of memory in 'size' required to compress this image.
+    * The png_image structure 'image' must be filled in as in the above
+    * function and must not be changed before the actual write call, the buffer
+    * and all other parameters must also be identical to that in the final
+    * write call.  The 'size' variable need not be initialized.
+    *
+    * NOTE: the macro returns true/false, if false is returned 'size' will be
+    * set to zero and the write failed and probably will fail if tried again.
+    */
+
+/* You can pre-allocate the buffer by making sure it is of sufficient size
+ * regardless of the amount of compression achieved.  The buffer size will
+ * always be bigger than the original image and it will never be filled.  The
+ * following macros are provided to assist in allocating the buffer.
+ */
+#define PNG_IMAGE_DATA_SIZE(image) (PNG_IMAGE_SIZE(image)+(image).height)
+   /* The number of uncompressed bytes in the PNG byte encoding of the image;
+    * uncompressing the PNG IDAT data will give this number of bytes.
+    *
+    * NOTE: while PNG_IMAGE_SIZE cannot overflow for an image in memory this
+    * macro can because of the extra bytes used in the PNG byte encoding.  You
+    * need to avoid this macro if your image size approaches 2^30 in width or
+    * height.  The same goes for the remainder of these macros; they all produce
+    * bigger numbers than the actual in-memory image size.
+    */
+#ifndef PNG_ZLIB_MAX_SIZE
+#  define PNG_ZLIB_MAX_SIZE(b) ((b)+(((b)+7U)>>3)+(((b)+63U)>>6)+11U)
+   /* An upper bound on the number of compressed bytes given 'b' uncompressed
+    * bytes.  This is based on deflateBounds() in zlib; different
+    * implementations of zlib compression may conceivably produce more data so
+    * if your zlib implementation is not zlib itself redefine this macro
+    * appropriately.
+    */
+#endif
+
+#define PNG_IMAGE_COMPRESSED_SIZE_MAX(image)\
+   PNG_ZLIB_MAX_SIZE((png_alloc_size_t)PNG_IMAGE_DATA_SIZE(image))
+   /* An upper bound on the size of the data in the PNG IDAT chunks. */
+
+#define PNG_IMAGE_PNG_SIZE_MAX_(image, image_size)\
+   ((8U/*sig*/+25U/*IHDR*/+16U/*gAMA*/+44U/*cHRM*/+12U/*IEND*/+\
+    (((image).format&PNG_FORMAT_FLAG_COLORMAP)?/*colormap: PLTE, tRNS*/\
+    12U+3U*(image).colormap_entries/*PLTE data*/+\
+    (((image).format&PNG_FORMAT_FLAG_ALPHA)?\
+    12U/*tRNS*/+(image).colormap_entries:0U):0U)+\
+    12U)+(12U*((image_size)/PNG_ZBUF_SIZE))/*IDAT*/+(image_size))
+   /* A helper for the following macro; if your compiler cannot handle the
+    * following macro use this one with the result of
+    * PNG_IMAGE_COMPRESSED_SIZE_MAX(image) as the second argument (most
+    * compilers should handle this just fine.)
+    */
+
+#define PNG_IMAGE_PNG_SIZE_MAX(image)\
+   PNG_IMAGE_PNG_SIZE_MAX_(image, PNG_IMAGE_COMPRESSED_SIZE_MAX(image))
+   /* An upper bound on the total length of the PNG data stream for 'image'.
+    * The result is of type png_alloc_size_t, on 32-bit systems this may
+    * overflow even though PNG_IMAGE_DATA_SIZE does not overflow; the write will
+    * run out of buffer space but return a corrected size which should work.
+    */
+#endif /* SIMPLIFIED_WRITE */
+/*******************************************************************************
+ *  END OF SIMPLIFIED API
+ ******************************************************************************/
+#endif /* SIMPLIFIED_{READ|WRITE} */
+
+/*******************************************************************************
+ * Section 6: IMPLEMENTATION OPTIONS
+ *******************************************************************************
+ *
+ * Support for arbitrary implementation-specific optimizations.  The API allows
+ * particular options to be turned on or off.  'Option' is the number of the
+ * option and 'onoff' is 0 (off) or non-0 (on).  The value returned is given
+ * by the PNG_OPTION_ defines below.
+ *
+ * HARDWARE: normally hardware capabilities, such as the Intel SSE instructions,
+ *           are detected at run time, however sometimes it may be impossible
+ *           to do this in user mode, in which case it is necessary to discover
+ *           the capabilities in an OS specific way.  Such capabilities are
+ *           listed here when libpng has support for them and must be turned
+ *           ON by the application if present.
+ *
+ * SOFTWARE: sometimes software optimizations actually result in performance
+ *           decrease on some architectures or systems, or with some sets of
+ *           PNG images.  'Software' options allow such optimizations to be
+ *           selected at run time.
+ */
+#ifdef PNG_SET_OPTION_SUPPORTED
+#ifdef PNG_ARM_NEON_API_SUPPORTED
+#  define PNG_ARM_NEON   0 /* HARDWARE: ARM Neon SIMD instructions supported */
+#endif
+#define PNG_MAXIMUM_INFLATE_WINDOW 2 /* SOFTWARE: force maximum window */
+#define PNG_SKIP_sRGB_CHECK_PROFILE 4 /* SOFTWARE: Check ICC profile for sRGB */
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+#  define PNG_MIPS_MSA   6 /* HARDWARE: MIPS Msa SIMD instructions supported */
+#endif
+#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED
+#  define PNG_IGNORE_ADLER32 8 /* SOFTWARE: disable Adler32 check on IDAT */
+#endif
+#ifdef PNG_POWERPC_VSX_API_SUPPORTED
+#  define PNG_POWERPC_VSX   10 /* HARDWARE: PowerPC VSX SIMD instructions
+                                * supported */
+#endif
+#ifdef PNG_MIPS_MMI_API_SUPPORTED
+#  define PNG_MIPS_MMI   12 /* HARDWARE: MIPS MMI SIMD instructions supported */
+#endif
+
+#define PNG_OPTION_NEXT  14 /* Next option - numbers must be even */
+
+/* Return values: NOTE: there are four values and 'off' is *not* zero */
+#define PNG_OPTION_UNSET   0 /* Unset - defaults to off */
+#define PNG_OPTION_INVALID 1 /* Option number out of range */
+#define PNG_OPTION_OFF     2
+#define PNG_OPTION_ON      3
+
+PNG_EXPORT(244, int, png_set_option, (png_structrp png_ptr, int option,
+   int onoff));
+#endif /* SET_OPTION */
+
+/*******************************************************************************
+ *  END OF HARDWARE AND SOFTWARE OPTIONS
+ ******************************************************************************/
+
+/* Maintainer: Put new public prototypes here ^, in libpng.3, in project
+ * defs, and in scripts/symbols.def.
+ */
+
+/* The last ordinal number (this is the *last* one already used; the next
+ * one to use is one more than this.)
+ */
+#ifdef PNG_EXPORT_LAST_ORDINAL
+  PNG_EXPORT_LAST_ORDINAL(249);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PNG_VERSION_INFO_ONLY */
+/* Do not put anything past this line */
+#endif /* PNG_H */
diff --git a/reg-io/png/lpng/pngconf.h b/reg-io/png/lpng/pngconf.h
new file mode 100644
index 00000000..4dba8921
--- /dev/null
+++ b/reg-io/png/lpng/pngconf.h
@@ -0,0 +1,623 @@
+
+/* pngconf.h - machine-configurable file for libpng
+ *
+ * libpng version 1.6.42
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * Any machine specific code is near the front of this file, so if you
+ * are configuring libpng for a machine, you may want to read the section
+ * starting here down to where it starts to typedef png_color, png_text,
+ * and png_info.
+ */
+
+#ifndef PNGCONF_H
+#define PNGCONF_H
+
+#ifndef PNG_BUILDING_SYMBOL_TABLE /* else includes may cause problems */
+
+/* From libpng 1.6.0 libpng requires an ANSI X3.159-1989 ("ISOC90") compliant C
+ * compiler for correct compilation.  The following header files are required by
+ * the standard.  If your compiler doesn't provide these header files, or they
+ * do not match the standard, you will need to provide/improve them.
+ */
+#include <limits.h>
+#include <stddef.h>
+
+/* Library header files.  These header files are all defined by ISOC90; libpng
+ * expects conformant implementations, however, an ISOC90 conformant system need
+ * not provide these header files if the functionality cannot be implemented.
+ * In this case it will be necessary to disable the relevant parts of libpng in
+ * the build of pnglibconf.h.
+ *
+ * Prior to 1.6.0 string.h was included here; the API changes in 1.6.0 to not
+ * include this unnecessary header file.
+ */
+
+#ifdef PNG_STDIO_SUPPORTED
+   /* Required for the definition of FILE: */
+#  include <stdio.h>
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* Required for the definition of jmp_buf and the declaration of longjmp: */
+#  include <setjmp.h>
+#endif
+
+#ifdef PNG_CONVERT_tIME_SUPPORTED
+   /* Required for struct tm: */
+#  include <time.h>
+#endif
+
+#endif /* PNG_BUILDING_SYMBOL_TABLE */
+
+/* Prior to 1.6.0, it was possible to turn off 'const' in declarations,
+ * using PNG_NO_CONST.  This is no longer supported.
+ */
+#define PNG_CONST const /* backward compatibility only */
+
+/* This controls optimization of the reading of 16-bit and 32-bit
+ * values from PNG files.  It can be set on a per-app-file basis: it
+ * just changes whether a macro is used when the function is called.
+ * The library builder sets the default; if read functions are not
+ * built into the library the macro implementation is forced on.
+ */
+#ifndef PNG_READ_INT_FUNCTIONS_SUPPORTED
+#  define PNG_USE_READ_MACROS
+#endif
+#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS)
+#  if PNG_DEFAULT_READ_MACROS
+#    define PNG_USE_READ_MACROS
+#  endif
+#endif
+
+/* COMPILER SPECIFIC OPTIONS.
+ *
+ * These options are provided so that a variety of difficult compilers
+ * can be used.  Some are fixed at build time (e.g. PNG_API_RULE
+ * below) but still have compiler specific implementations, others
+ * may be changed on a per-file basis when compiling against libpng.
+ */
+
+/* The PNGARG macro was used in versions of libpng prior to 1.6.0 to protect
+ * against legacy (pre ISOC90) compilers that did not understand function
+ * prototypes.  It is not required for modern C compilers.
+ */
+#ifndef PNGARG
+#  define PNGARG(arglist) arglist
+#endif
+
+/* Function calling conventions.
+ * =============================
+ * Normally it is not necessary to specify to the compiler how to call
+ * a function - it just does it - however on x86 systems derived from
+ * Microsoft and Borland C compilers ('IBM PC', 'DOS', 'Windows' systems
+ * and some others) there are multiple ways to call a function and the
+ * default can be changed on the compiler command line.  For this reason
+ * libpng specifies the calling convention of every exported function and
+ * every function called via a user supplied function pointer.  This is
+ * done in this file by defining the following macros:
+ *
+ * PNGAPI    Calling convention for exported functions.
+ * PNGCBAPI  Calling convention for user provided (callback) functions.
+ * PNGCAPI   Calling convention used by the ANSI-C library (required
+ *           for longjmp callbacks and sometimes used internally to
+ *           specify the calling convention for zlib).
+ *
+ * These macros should never be overridden.  If it is necessary to
+ * change calling convention in a private build this can be done
+ * by setting PNG_API_RULE (which defaults to 0) to one of the values
+ * below to select the correct 'API' variants.
+ *
+ * PNG_API_RULE=0 Use PNGCAPI - the 'C' calling convention - throughout.
+ *                This is correct in every known environment.
+ * PNG_API_RULE=1 Use the operating system convention for PNGAPI and
+ *                the 'C' calling convention (from PNGCAPI) for
+ *                callbacks (PNGCBAPI).  This is no longer required
+ *                in any known environment - if it has to be used
+ *                please post an explanation of the problem to the
+ *                libpng mailing list.
+ *
+ * These cases only differ if the operating system does not use the C
+ * calling convention, at present this just means the above cases
+ * (x86 DOS/Windows systems) and, even then, this does not apply to
+ * Cygwin running on those systems.
+ *
+ * Note that the value must be defined in pnglibconf.h so that what
+ * the application uses to call the library matches the conventions
+ * set when building the library.
+ */
+
+/* Symbol export
+ * =============
+ * When building a shared library it is almost always necessary to tell
+ * the compiler which symbols to export.  The png.h macro 'PNG_EXPORT'
+ * is used to mark the symbols.  On some systems these symbols can be
+ * extracted at link time and need no special processing by the compiler,
+ * on other systems the symbols are flagged by the compiler and just
+ * the declaration requires a special tag applied (unfortunately) in a
+ * compiler dependent way.  Some systems can do either.
+ *
+ * A small number of older systems also require a symbol from a DLL to
+ * be flagged to the program that calls it.  This is a problem because
+ * we do not know in the header file included by application code that
+ * the symbol will come from a shared library, as opposed to a statically
+ * linked one.  For this reason the application must tell us by setting
+ * the magic flag PNG_USE_DLL to turn on the special processing before
+ * it includes png.h.
+ *
+ * Four additional macros are used to make this happen:
+ *
+ * PNG_IMPEXP The magic (if any) to cause a symbol to be exported from
+ *            the build or imported if PNG_USE_DLL is set - compiler
+ *            and system specific.
+ *
+ * PNG_EXPORT_TYPE(type) A macro that pre or appends PNG_IMPEXP to
+ *                       'type', compiler specific.
+ *
+ * PNG_DLL_EXPORT Set to the magic to use during a libpng build to
+ *                make a symbol exported from the DLL.  Not used in the
+ *                public header files; see pngpriv.h for how it is used
+ *                in the libpng build.
+ *
+ * PNG_DLL_IMPORT Set to the magic to force the libpng symbols to come
+ *                from a DLL - used to define PNG_IMPEXP when
+ *                PNG_USE_DLL is set.
+ */
+
+/* System specific discovery.
+ * ==========================
+ * This code is used at build time to find PNG_IMPEXP, the API settings
+ * and PNG_EXPORT_TYPE(), it may also set a macro to indicate the DLL
+ * import processing is possible.  On Windows systems it also sets
+ * compiler-specific macros to the values required to change the calling
+ * conventions of the various functions.
+ */
+#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__) || \
+    defined(__CYGWIN__)
+  /* Windows system (DOS doesn't support DLLs).  Includes builds under Cygwin or
+   * MinGW on any architecture currently supported by Windows.  Also includes
+   * Watcom builds but these need special treatment because they are not
+   * compatible with GCC or Visual C because of different calling conventions.
+   */
+#  if PNG_API_RULE == 2
+   /* If this line results in an error, either because __watcall is not
+    * understood or because of a redefine just below you cannot use *this*
+    * build of the library with the compiler you are using.  *This* build was
+    * build using Watcom and applications must also be built using Watcom!
+    */
+#    define PNGCAPI __watcall
+#  endif
+
+#  if defined(__GNUC__) || (defined(_MSC_VER) && (_MSC_VER >= 800))
+#    define PNGCAPI __cdecl
+#    if PNG_API_RULE == 1
+   /* If this line results in an error __stdcall is not understood and
+    * PNG_API_RULE should not have been set to '1'.
+    */
+#      define PNGAPI __stdcall
+#    endif
+#  else
+   /* An older compiler, or one not detected (erroneously) above,
+    * if necessary override on the command line to get the correct
+    * variants for the compiler.
+    */
+#    ifndef PNGCAPI
+#      define PNGCAPI _cdecl
+#    endif
+#    if PNG_API_RULE == 1 && !defined(PNGAPI)
+#      define PNGAPI _stdcall
+#    endif
+#  endif /* compiler/api */
+
+  /* NOTE: PNGCBAPI always defaults to PNGCAPI. */
+
+#  if defined(PNGAPI) && !defined(PNG_USER_PRIVATEBUILD)
+#     error "PNG_USER_PRIVATEBUILD must be defined if PNGAPI is changed"
+#  endif
+
+#  if (defined(_MSC_VER) && _MSC_VER < 800) ||\
+      (defined(__BORLANDC__) && __BORLANDC__ < 0x500)
+   /* older Borland and MSC
+    * compilers used '__export' and required this to be after
+    * the type.
+    */
+#    ifndef PNG_EXPORT_TYPE
+#      define PNG_EXPORT_TYPE(type) type PNG_IMPEXP
+#    endif
+#    define PNG_DLL_EXPORT __export
+#  else /* newer compiler */
+#    define PNG_DLL_EXPORT __declspec(dllexport)
+#    ifndef PNG_DLL_IMPORT
+#      define PNG_DLL_IMPORT __declspec(dllimport)
+#    endif
+#  endif /* compiler */
+
+#else /* !Windows */
+#  if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
+#    define PNGAPI _System
+#  else /* !Windows/x86 && !OS/2 */
+   /* Use the defaults, or define PNG*API on the command line (but
+    * this will have to be done for every compile!)
+    */
+#  endif /* other system, !OS/2 */
+#endif /* !Windows/x86 */
+
+/* Now do all the defaulting . */
+#ifndef PNGCAPI
+#  define PNGCAPI
+#endif
+#ifndef PNGCBAPI
+#  define PNGCBAPI PNGCAPI
+#endif
+#ifndef PNGAPI
+#  define PNGAPI PNGCAPI
+#endif
+
+/* PNG_IMPEXP may be set on the compilation system command line or (if not set)
+ * then in an internal header file when building the library, otherwise (when
+ * using the library) it is set here.
+ */
+#ifndef PNG_IMPEXP
+#  if defined(PNG_USE_DLL) && defined(PNG_DLL_IMPORT)
+   /* This forces use of a DLL, disallowing static linking */
+#    define PNG_IMPEXP PNG_DLL_IMPORT
+#  endif
+
+#  ifndef PNG_IMPEXP
+#    define PNG_IMPEXP
+#  endif
+#endif
+
+/* In 1.5.2 the definition of PNG_FUNCTION has been changed to always treat
+ * 'attributes' as a storage class - the attributes go at the start of the
+ * function definition, and attributes are always appended regardless of the
+ * compiler.  This considerably simplifies these macros but may cause problems
+ * if any compilers both need function attributes and fail to handle them as
+ * a storage class (this is unlikely.)
+ */
+#ifndef PNG_FUNCTION
+#  define PNG_FUNCTION(type, name, args, attributes) attributes type name args
+#endif
+
+#ifndef PNG_EXPORT_TYPE
+#  define PNG_EXPORT_TYPE(type) PNG_IMPEXP type
+#endif
+
+   /* The ordinal value is only relevant when preprocessing png.h for symbol
+    * table entries, so we discard it here.  See the .dfn files in the
+    * scripts directory.
+    */
+
+#ifndef PNG_EXPORTA
+#  define PNG_EXPORTA(ordinal, type, name, args, attributes) \
+      PNG_FUNCTION(PNG_EXPORT_TYPE(type), (PNGAPI name), PNGARG(args), \
+      PNG_LINKAGE_API attributes)
+#endif
+
+/* ANSI-C (C90) does not permit a macro to be invoked with an empty argument,
+ * so make something non-empty to satisfy the requirement:
+ */
+#define PNG_EMPTY /*empty list*/
+
+#define PNG_EXPORT(ordinal, type, name, args) \
+   PNG_EXPORTA(ordinal, type, name, args, PNG_EMPTY)
+
+/* Use PNG_REMOVED to comment out a removed interface. */
+#ifndef PNG_REMOVED
+#  define PNG_REMOVED(ordinal, type, name, args, attributes)
+#endif
+
+#ifndef PNG_CALLBACK
+#  define PNG_CALLBACK(type, name, args) type (PNGCBAPI name) PNGARG(args)
+#endif
+
+/* Support for compiler specific function attributes.  These are used
+ * so that where compiler support is available incorrect use of API
+ * functions in png.h will generate compiler warnings.
+ *
+ * Added at libpng-1.2.41.
+ */
+
+#ifndef PNG_NO_PEDANTIC_WARNINGS
+#  ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED
+#    define PNG_PEDANTIC_WARNINGS_SUPPORTED
+#  endif
+#endif
+
+#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED
+  /* Support for compiler specific function attributes.  These are used
+   * so that where compiler support is available, incorrect use of API
+   * functions in png.h will generate compiler warnings.  Added at libpng
+   * version 1.2.41.  Disabling these removes the warnings but may also produce
+   * less efficient code.
+   */
+#  if defined(__clang__) && defined(__has_attribute)
+   /* Clang defines both __clang__ and __GNUC__. Check __clang__ first. */
+#    if !defined(PNG_USE_RESULT) && __has_attribute(__warn_unused_result__)
+#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
+#    endif
+#    if !defined(PNG_NORETURN) && __has_attribute(__noreturn__)
+#      define PNG_NORETURN __attribute__((__noreturn__))
+#    endif
+#    if !defined(PNG_ALLOCATED) && __has_attribute(__malloc__)
+#      define PNG_ALLOCATED __attribute__((__malloc__))
+#    endif
+#    if !defined(PNG_DEPRECATED) && __has_attribute(__deprecated__)
+#      define PNG_DEPRECATED __attribute__((__deprecated__))
+#    endif
+#    if !defined(PNG_PRIVATE)
+#      ifdef __has_extension
+#        if __has_extension(attribute_unavailable_with_message)
+#          define PNG_PRIVATE __attribute__((__unavailable__(\
+             "This function is not exported by libpng.")))
+#        endif
+#      endif
+#    endif
+#    ifndef PNG_RESTRICT
+#      define PNG_RESTRICT __restrict
+#    endif
+
+#  elif defined(__GNUC__)
+#    ifndef PNG_USE_RESULT
+#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
+#    endif
+#    ifndef PNG_NORETURN
+#      define PNG_NORETURN   __attribute__((__noreturn__))
+#    endif
+#    if __GNUC__ >= 3
+#      ifndef PNG_ALLOCATED
+#        define PNG_ALLOCATED  __attribute__((__malloc__))
+#      endif
+#      ifndef PNG_DEPRECATED
+#        define PNG_DEPRECATED __attribute__((__deprecated__))
+#      endif
+#      ifndef PNG_PRIVATE
+#        if 0 /* Doesn't work so we use deprecated instead*/
+#          define PNG_PRIVATE \
+            __attribute__((warning("This function is not exported by libpng.")))
+#        else
+#          define PNG_PRIVATE \
+            __attribute__((__deprecated__))
+#        endif
+#      endif
+#      if ((__GNUC__ > 3) || !defined(__GNUC_MINOR__) || (__GNUC_MINOR__ >= 1))
+#        ifndef PNG_RESTRICT
+#          define PNG_RESTRICT __restrict
+#        endif
+#      endif /* __GNUC__.__GNUC_MINOR__ > 3.0 */
+#    endif /* __GNUC__ >= 3 */
+
+#  elif defined(_MSC_VER)  && (_MSC_VER >= 1300)
+#    ifndef PNG_USE_RESULT
+#      define PNG_USE_RESULT /* not supported */
+#    endif
+#    ifndef PNG_NORETURN
+#      define PNG_NORETURN   __declspec(noreturn)
+#    endif
+#    ifndef PNG_ALLOCATED
+#      if (_MSC_VER >= 1400)
+#        define PNG_ALLOCATED __declspec(restrict)
+#      endif
+#    endif
+#    ifndef PNG_DEPRECATED
+#      define PNG_DEPRECATED __declspec(deprecated)
+#    endif
+#    ifndef PNG_PRIVATE
+#      define PNG_PRIVATE __declspec(deprecated)
+#    endif
+#    ifndef PNG_RESTRICT
+#      if (_MSC_VER >= 1400)
+#        define PNG_RESTRICT __restrict
+#      endif
+#    endif
+
+#  elif defined(__WATCOMC__)
+#    ifndef PNG_RESTRICT
+#      define PNG_RESTRICT __restrict
+#    endif
+#  endif
+#endif /* PNG_PEDANTIC_WARNINGS */
+
+#ifndef PNG_DEPRECATED
+#  define PNG_DEPRECATED  /* Use of this function is deprecated */
+#endif
+#ifndef PNG_USE_RESULT
+#  define PNG_USE_RESULT  /* The result of this function must be checked */
+#endif
+#ifndef PNG_NORETURN
+#  define PNG_NORETURN    /* This function does not return */
+#endif
+#ifndef PNG_ALLOCATED
+#  define PNG_ALLOCATED   /* The result of the function is new memory */
+#endif
+#ifndef PNG_PRIVATE
+#  define PNG_PRIVATE     /* This is a private libpng function */
+#endif
+#ifndef PNG_RESTRICT
+#  define PNG_RESTRICT    /* The C99 "restrict" feature */
+#endif
+
+#ifndef PNG_FP_EXPORT     /* A floating point API. */
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+#     define PNG_FP_EXPORT(ordinal, type, name, args)\
+         PNG_EXPORT(ordinal, type, name, args);
+#  else                   /* No floating point APIs */
+#     define PNG_FP_EXPORT(ordinal, type, name, args)
+#  endif
+#endif
+#ifndef PNG_FIXED_EXPORT  /* A fixed point API. */
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+#     define PNG_FIXED_EXPORT(ordinal, type, name, args)\
+         PNG_EXPORT(ordinal, type, name, args);
+#  else                   /* No fixed point APIs */
+#     define PNG_FIXED_EXPORT(ordinal, type, name, args)
+#  endif
+#endif
+
+#ifndef PNG_BUILDING_SYMBOL_TABLE
+/* Some typedefs to get us started.  These should be safe on most of the common
+ * platforms.
+ *
+ * png_uint_32 and png_int_32 may, currently, be larger than required to hold a
+ * 32-bit value however this is not normally advisable.
+ *
+ * png_uint_16 and png_int_16 should always be two bytes in size - this is
+ * verified at library build time.
+ *
+ * png_byte must always be one byte in size.
+ *
+ * The checks below use constants from limits.h, as defined by the ISOC90
+ * standard.
+ */
+#if CHAR_BIT == 8 && UCHAR_MAX == 255
+   typedef unsigned char png_byte;
+#else
+#  error "libpng requires 8-bit bytes"
+#endif
+
+#if INT_MIN == -32768 && INT_MAX == 32767
+   typedef int png_int_16;
+#elif SHRT_MIN == -32768 && SHRT_MAX == 32767
+   typedef short png_int_16;
+#else
+#  error "libpng requires a signed 16-bit type"
+#endif
+
+#if UINT_MAX == 65535
+   typedef unsigned int png_uint_16;
+#elif USHRT_MAX == 65535
+   typedef unsigned short png_uint_16;
+#else
+#  error "libpng requires an unsigned 16-bit type"
+#endif
+
+#if INT_MIN < -2147483646 && INT_MAX > 2147483646
+   typedef int png_int_32;
+#elif LONG_MIN < -2147483646 && LONG_MAX > 2147483646
+   typedef long int png_int_32;
+#else
+#  error "libpng requires a signed 32-bit (or more) type"
+#endif
+
+#if UINT_MAX > 4294967294U
+   typedef unsigned int png_uint_32;
+#elif ULONG_MAX > 4294967294U
+   typedef unsigned long int png_uint_32;
+#else
+#  error "libpng requires an unsigned 32-bit (or more) type"
+#endif
+
+/* Prior to 1.6.0, it was possible to disable the use of size_t and ptrdiff_t.
+ * From 1.6.0 onwards, an ISO C90 compiler, as well as a standard-compliant
+ * behavior of sizeof and ptrdiff_t are required.
+ * The legacy typedefs are provided here for backwards compatibility.
+ */
+typedef size_t png_size_t;
+typedef ptrdiff_t png_ptrdiff_t;
+
+/* libpng needs to know the maximum value of 'size_t' and this controls the
+ * definition of png_alloc_size_t, below.  This maximum value of size_t limits
+ * but does not control the maximum allocations the library makes - there is
+ * direct application control of this through png_set_user_limits().
+ */
+#ifndef PNG_SMALL_SIZE_T
+   /* Compiler specific tests for systems where size_t is known to be less than
+    * 32 bits (some of these systems may no longer work because of the lack of
+    * 'far' support; see above.)
+    */
+#  if (defined(__TURBOC__) && !defined(__FLAT__)) ||\
+   (defined(_MSC_VER) && defined(MAXSEG_64K))
+#     define PNG_SMALL_SIZE_T
+#  endif
+#endif
+
+/* png_alloc_size_t is guaranteed to be no smaller than size_t, and no smaller
+ * than png_uint_32.  Casts from size_t or png_uint_32 to png_alloc_size_t are
+ * not necessary; in fact, it is recommended not to use them at all, so that
+ * the compiler can complain when something turns out to be problematic.
+ *
+ * Casts in the other direction (from png_alloc_size_t to size_t or
+ * png_uint_32) should be explicitly applied; however, we do not expect to
+ * encounter practical situations that require such conversions.
+ *
+ * PNG_SMALL_SIZE_T must be defined if the maximum value of size_t is less than
+ * 4294967295 - i.e. less than the maximum value of png_uint_32.
+ */
+#ifdef PNG_SMALL_SIZE_T
+   typedef png_uint_32 png_alloc_size_t;
+#else
+   typedef size_t png_alloc_size_t;
+#endif
+
+/* Prior to 1.6.0 libpng offered limited support for Microsoft C compiler
+ * implementations of Intel CPU specific support of user-mode segmented address
+ * spaces, where 16-bit pointers address more than 65536 bytes of memory using
+ * separate 'segment' registers.  The implementation requires two different
+ * types of pointer (only one of which includes the segment value.)
+ *
+ * If required this support is available in version 1.2 of libpng and may be
+ * available in versions through 1.5, although the correctness of the code has
+ * not been verified recently.
+ */
+
+/* Typedef for floating-point numbers that are converted to fixed-point with a
+ * multiple of 100,000, e.g., gamma
+ */
+typedef png_int_32 png_fixed_point;
+
+/* Add typedefs for pointers */
+typedef void                  * png_voidp;
+typedef const void            * png_const_voidp;
+typedef png_byte              * png_bytep;
+typedef const png_byte        * png_const_bytep;
+typedef png_uint_32           * png_uint_32p;
+typedef const png_uint_32     * png_const_uint_32p;
+typedef png_int_32            * png_int_32p;
+typedef const png_int_32      * png_const_int_32p;
+typedef png_uint_16           * png_uint_16p;
+typedef const png_uint_16     * png_const_uint_16p;
+typedef png_int_16            * png_int_16p;
+typedef const png_int_16      * png_const_int_16p;
+typedef char                  * png_charp;
+typedef const char            * png_const_charp;
+typedef png_fixed_point       * png_fixed_point_p;
+typedef const png_fixed_point * png_const_fixed_point_p;
+typedef size_t                * png_size_tp;
+typedef const size_t          * png_const_size_tp;
+
+#ifdef PNG_STDIO_SUPPORTED
+typedef FILE            * png_FILE_p;
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double       * png_doublep;
+typedef const double * png_const_doublep;
+#endif
+
+/* Pointers to pointers; i.e. arrays */
+typedef png_byte        * * png_bytepp;
+typedef png_uint_32     * * png_uint_32pp;
+typedef png_int_32      * * png_int_32pp;
+typedef png_uint_16     * * png_uint_16pp;
+typedef png_int_16      * * png_int_16pp;
+typedef const char      * * png_const_charpp;
+typedef char            * * png_charpp;
+typedef png_fixed_point * * png_fixed_point_pp;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          * * png_doublepp;
+#endif
+
+/* Pointers to pointers to pointers; i.e., pointer to array */
+typedef char            * * * png_charppp;
+
+#endif /* PNG_BUILDING_SYMBOL_TABLE */
+
+#endif /* PNGCONF_H */
diff --git a/reg-io/png/lpng1510/pngdebug.h b/reg-io/png/lpng/pngdebug.h
similarity index 82%
rename from reg-io/png/lpng1510/pngdebug.h
rename to reg-io/png/lpng/pngdebug.h
index 3b3fa85a..5530c0c9 100644
--- a/reg-io/png/lpng1510/pngdebug.h
+++ b/reg-io/png/lpng/pngdebug.h
@@ -1,155 +1,153 @@
-
-/* pngdebug.h - Debugging macros for libpng, also used in pngtest.c
- *
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * Last changed in libpng 1.5.0 [January 6, 2011]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* Define PNG_DEBUG at compile time for debugging information.  Higher
- * numbers for PNG_DEBUG mean more debugging information.  This has
- * only been added since version 0.95 so it is not implemented throughout
- * libpng yet, but more support will be added as needed.
- *
- * png_debug[1-2]?(level, message ,arg{0-2})
- *   Expands to a statement (either a simple expression or a compound
- *   do..while(0) statement) that outputs a message with parameter
- *   substitution if PNG_DEBUG is defined to 2 or more.  If PNG_DEBUG
- *   is undefined, 0 or 1 every png_debug expands to a simple expression
- *   (actually ((void)0)).
- *
- *   level: level of detail of message, starting at 0.  A level 'n'
- *          message is preceded by 'n' tab characters (not implemented
- *          on Microsoft compilers unless PNG_DEBUG_FILE is also
- *          defined, to allow debug DLL compilation with no standard IO).
- *   message: a printf(3) style text string.  A trailing '\n' is added
- *            to the message.
- *   arg: 0 to 2 arguments for printf(3) style substitution in message.
- */
-#pragma once
-/* These settings control the formatting of messages in png.c and pngerror.c */
-/* Moved to pngdebug.h at 1.5.0 */
-#  ifndef PNG_LITERAL_SHARP
-#    define PNG_LITERAL_SHARP 0x23
-#  endif
-#  ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET
-#    define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b
-#  endif
-#  ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET
-#    define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d
-#  endif
-#  ifndef PNG_STRING_NEWLINE
-#    define PNG_STRING_NEWLINE "\n"
-#  endif
-
-#ifdef PNG_DEBUG
-#  if (PNG_DEBUG > 0)
-#    if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER)
-#      include <crtdbg.h>
-#      if (PNG_DEBUG > 1)
-#        ifndef _DEBUG
-#          define _DEBUG
-#        endif
-#        ifndef png_debug
-#          define png_debug(l,m)  _RPT0(_CRT_WARN,m PNG_STRING_NEWLINE)
-#        endif
-#        ifndef png_debug1
-#          define png_debug1(l,m,p1)  _RPT1(_CRT_WARN,m PNG_STRING_NEWLINE,p1)
-#        endif
-#        ifndef png_debug2
-#          define png_debug2(l,m,p1,p2) \
-             _RPT2(_CRT_WARN,m PNG_STRING_NEWLINE,p1,p2)
-#        endif
-#      endif
-#    else /* PNG_DEBUG_FILE || !_MSC_VER */
-#      ifndef PNG_STDIO_SUPPORTED
-#        include <stdio.h> /* not included yet */
-#      endif
-#      ifndef PNG_DEBUG_FILE
-#        define PNG_DEBUG_FILE stderr
-#      endif /* PNG_DEBUG_FILE */
-
-#      if (PNG_DEBUG > 1)
-/* Note: ["%s"m PNG_STRING_NEWLINE] probably does not work on
- * non-ISO compilers
- */
-#        ifdef __STDC__
-#          ifndef png_debug
-#            define png_debug(l,m) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":"")))); \
-       } while (0)
-#          endif
-#          ifndef png_debug1
-#            define png_debug1(l,m,p1) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1); \
-       } while (0)
-#          endif
-#          ifndef png_debug2
-#            define png_debug2(l,m,p1,p2) \
-       do { \
-       int num_tabs=l; \
-       fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1,p2); \
-       } while (0)
-#          endif
-#        else /* __STDC __ */
-#          ifndef png_debug
-#            define png_debug(l,m) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format); \
-       } while (0)
-#          endif
-#          ifndef png_debug1
-#            define png_debug1(l,m,p1) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format,p1); \
-       } while (0)
-#          endif
-#          ifndef png_debug2
-#            define png_debug2(l,m,p1,p2) \
-       do { \
-       int num_tabs=l; \
-       char format[256]; \
-       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
-         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
-         m,PNG_STRING_NEWLINE); \
-       fprintf(PNG_DEBUG_FILE,format,p1,p2); \
-       } while (0)
-#          endif
-#        endif /* __STDC __ */
-#      endif /* (PNG_DEBUG > 1) */
-
-#    endif /* _MSC_VER */
-#  endif /* (PNG_DEBUG > 0) */
-#endif /* PNG_DEBUG */
-#ifndef png_debug
-#  define png_debug(l, m) ((void)0)
-#endif
-#ifndef png_debug1
-#  define png_debug1(l, m, p1) ((void)0)
-#endif
-#ifndef png_debug2
-#  define png_debug2(l, m, p1, p2) ((void)0)
-#endif
+
+/* pngdebug.h - Debugging macros for libpng, also used in pngtest.c
+ *
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2013 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* Define PNG_DEBUG at compile time for debugging information.  Higher
+ * numbers for PNG_DEBUG mean more debugging information.  This has
+ * only been added since version 0.95 so it is not implemented throughout
+ * libpng yet, but more support will be added as needed.
+ *
+ * png_debug[1-2]?(level, message ,arg{0-2})
+ *   Expands to a statement (either a simple expression or a compound
+ *   do..while(0) statement) that outputs a message with parameter
+ *   substitution if PNG_DEBUG is defined to 2 or more.  If PNG_DEBUG
+ *   is undefined, 0 or 1 every png_debug expands to a simple expression
+ *   (actually ((void)0)).
+ *
+ *   level: level of detail of message, starting at 0.  A level 'n'
+ *          message is preceded by 'n' 3-space indentations (not implemented
+ *          on Microsoft compilers unless PNG_DEBUG_FILE is also
+ *          defined, to allow debug DLL compilation with no standard IO).
+ *   message: a printf(3) style text string.  A trailing '\n' is added
+ *            to the message.
+ *   arg: 0 to 2 arguments for printf(3) style substitution in message.
+ */
+#ifndef PNGDEBUG_H
+#define PNGDEBUG_H
+/* These settings control the formatting of messages in png.c and pngerror.c */
+/* Moved to pngdebug.h at 1.5.0 */
+#  ifndef PNG_LITERAL_SHARP
+#    define PNG_LITERAL_SHARP 0x23
+#  endif
+#  ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET
+#    define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b
+#  endif
+#  ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET
+#    define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d
+#  endif
+#  ifndef PNG_STRING_NEWLINE
+#    define PNG_STRING_NEWLINE "\n"
+#  endif
+
+#ifdef PNG_DEBUG
+#  if (PNG_DEBUG > 0)
+#    if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER)
+#      include <crtdbg.h>
+#      if (PNG_DEBUG > 1)
+#        ifndef _DEBUG
+#          define _DEBUG
+#        endif
+#        ifndef png_debug
+#          define png_debug(l,m)  _RPT0(_CRT_WARN,m PNG_STRING_NEWLINE)
+#        endif
+#        ifndef png_debug1
+#          define png_debug1(l,m,p1)  _RPT1(_CRT_WARN,m PNG_STRING_NEWLINE,p1)
+#        endif
+#        ifndef png_debug2
+#          define png_debug2(l,m,p1,p2) \
+             _RPT2(_CRT_WARN,m PNG_STRING_NEWLINE,p1,p2)
+#        endif
+#      endif
+#    else /* PNG_DEBUG_FILE || !_MSC_VER */
+#      ifndef PNG_STDIO_SUPPORTED
+#        include <stdio.h> /* not included yet */
+#      endif
+#      ifndef PNG_DEBUG_FILE
+#        define PNG_DEBUG_FILE stderr
+#      endif /* PNG_DEBUG_FILE */
+
+#      if (PNG_DEBUG > 1)
+#        ifdef __STDC__
+#          ifndef png_debug
+#            define png_debug(l,m) \
+       do { \
+       int num_tabs=l; \
+       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
+         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : "")))); \
+       } while (0)
+#          endif
+#          ifndef png_debug1
+#            define png_debug1(l,m,p1) \
+       do { \
+       int num_tabs=l; \
+       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
+         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : ""))),p1); \
+       } while (0)
+#          endif
+#          ifndef png_debug2
+#            define png_debug2(l,m,p1,p2) \
+       do { \
+       int num_tabs=l; \
+       fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? "   " : \
+         (num_tabs==2 ? "      " : (num_tabs>2 ? "         " : ""))),p1,p2);\
+       } while (0)
+#          endif
+#        else /* __STDC __ */
+#          ifndef png_debug
+#            define png_debug(l,m) \
+       do { \
+       int num_tabs=l; \
+       char format[256]; \
+       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
+         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
+         m,PNG_STRING_NEWLINE); \
+       fprintf(PNG_DEBUG_FILE,format); \
+       } while (0)
+#          endif
+#          ifndef png_debug1
+#            define png_debug1(l,m,p1) \
+       do { \
+       int num_tabs=l; \
+       char format[256]; \
+       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
+         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
+         m,PNG_STRING_NEWLINE); \
+       fprintf(PNG_DEBUG_FILE,format,p1); \
+       } while (0)
+#          endif
+#          ifndef png_debug2
+#            define png_debug2(l,m,p1,p2) \
+       do { \
+       int num_tabs=l; \
+       char format[256]; \
+       snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \
+         (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \
+         m,PNG_STRING_NEWLINE); \
+       fprintf(PNG_DEBUG_FILE,format,p1,p2); \
+       } while (0)
+#          endif
+#        endif /* __STDC __ */
+#      endif /* (PNG_DEBUG > 1) */
+
+#    endif /* _MSC_VER */
+#  endif /* (PNG_DEBUG > 0) */
+#endif /* PNG_DEBUG */
+#ifndef png_debug
+#  define png_debug(l, m) ((void)0)
+#endif
+#ifndef png_debug1
+#  define png_debug1(l, m, p1) ((void)0)
+#endif
+#ifndef png_debug2
+#  define png_debug2(l, m, p1, p2) ((void)0)
+#endif
+#endif /* PNGDEBUG_H */
diff --git a/reg-io/png/lpng1510/pngerror.c b/reg-io/png/lpng/pngerror.c
similarity index 54%
rename from reg-io/png/lpng1510/pngerror.c
rename to reg-io/png/lpng/pngerror.c
index 9df97f58..db4869fe 100644
--- a/reg-io/png/lpng1510/pngerror.c
+++ b/reg-io/png/lpng/pngerror.c
@@ -1,10 +1,10 @@
 
 /* pngerror.c - stub functions for i/o and memory allocation
  *
- * Last changed in libpng 1.5.8 [February 1, 2011]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2017 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -20,14 +20,14 @@
 
 #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
 
-static PNG_FUNCTION(void, png_default_error,PNGARG((png_structp png_ptr,
+static PNG_FUNCTION(void, png_default_error,PNGARG((png_const_structrp png_ptr,
     png_const_charp error_message)),PNG_NORETURN);
 
 #ifdef PNG_WARNINGS_SUPPORTED
 static void /* PRIVATE */
-png_default_warning PNGARG((png_structp png_ptr,
-   png_const_charp warning_message));
-#endif /* PNG_WARNINGS_SUPPORTED */
+png_default_warning PNGARG((png_const_structrp png_ptr,
+    png_const_charp warning_message));
+#endif /* WARNINGS */
 
 /* This function is called whenever there is a fatal error.  This function
  * should not be changed.  If there is a need to handle errors differently,
@@ -36,14 +36,15 @@ png_default_warning PNGARG((png_structp png_ptr,
  */
 #ifdef PNG_ERROR_TEXT_SUPPORTED
 PNG_FUNCTION(void,PNGAPI
-png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN)
+png_error,(png_const_structrp png_ptr, png_const_charp error_message),
+    PNG_NORETURN)
 {
 #ifdef PNG_ERROR_NUMBERS_SUPPORTED
    char msg[16];
    if (png_ptr != NULL)
    {
-      if (png_ptr->flags&
-         (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+      if ((png_ptr->flags &
+         (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0)
       {
          if (*error_message == PNG_LITERAL_SHARP)
          {
@@ -53,7 +54,7 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN)
                if (error_message[offset] == ' ')
                   break;
 
-            if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+            if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0)
             {
                int i;
                for (i = 0; i < offset - 1; i++)
@@ -64,22 +65,23 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN)
 
             else
                error_message += offset;
-      }
+         }
 
-      else
-      {
-         if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+         else
          {
-            msg[0] = '0';
-            msg[1] = '\0';
-            error_message = msg;
+            if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0)
+            {
+               msg[0] = '0';
+               msg[1] = '\0';
+               error_message = msg;
+            }
          }
-       }
-     }
+      }
    }
 #endif
    if (png_ptr != NULL && png_ptr->error_fn != NULL)
-      (*(png_ptr->error_fn))(png_ptr, error_message);
+      (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr),
+          error_message);
 
    /* If the custom handler doesn't exist, or if it returns,
       use the default handler, which will not return. */
@@ -87,7 +89,7 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN)
 }
 #else
 PNG_FUNCTION(void,PNGAPI
-png_err,(png_structp png_ptr),PNG_NORETURN)
+png_err,(png_const_structrp png_ptr),PNG_NORETURN)
 {
    /* Prior to 1.5.2 the error_fn received a NULL pointer, expressed
     * erroneously as '\0', instead of the empty string "".  This was
@@ -95,20 +97,20 @@ png_err,(png_structp png_ptr),PNG_NORETURN)
     * will crash in this case.
     */
    if (png_ptr != NULL && png_ptr->error_fn != NULL)
-      (*(png_ptr->error_fn))(png_ptr, "");
+      (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr), "");
 
    /* If the custom handler doesn't exist, or if it returns,
       use the default handler, which will not return. */
    png_default_error(png_ptr, "");
 }
-#endif /* PNG_ERROR_TEXT_SUPPORTED */
+#endif /* ERROR_TEXT */
 
 /* Utility to safely appends strings to a buffer.  This never errors out so
  * error checking is not required in the caller.
  */
 size_t
 png_safecat(png_charp buffer, size_t bufsize, size_t pos,
-   png_const_charp string)
+    png_const_charp string)
 {
    if (buffer != NULL && pos < bufsize)
    {
@@ -129,7 +131,7 @@ png_safecat(png_charp buffer, size_t bufsize, size_t pos,
  */
 png_charp
 png_format_number(png_const_charp start, png_charp end, int format,
-   png_alloc_size_t number)
+    png_alloc_size_t number)
 {
    int count = 0;    /* number of digits output */
    int mincount = 1; /* minimum number required */
@@ -150,7 +152,7 @@ png_format_number(png_const_charp start, png_charp end, int format,
          case PNG_NUMBER_FORMAT_fixed:
             /* Needs five digits (the fraction) */
             mincount = 5;
-            if (output || number % 10 != 0)
+            if (output != 0 || number % 10 != 0)
             {
                *--end = digits[number % 10];
                output = 1;
@@ -161,7 +163,7 @@ png_format_number(png_const_charp start, png_charp end, int format,
          case PNG_NUMBER_FORMAT_02u:
             /* Expects at least 2 digits. */
             mincount = 2;
-            /* fall through */
+            /* FALLTHROUGH */
 
          case PNG_NUMBER_FORMAT_u:
             *--end = digits[number % 10];
@@ -171,7 +173,7 @@ png_format_number(png_const_charp start, png_charp end, int format,
          case PNG_NUMBER_FORMAT_02x:
             /* This format expects at least two digits */
             mincount = 2;
-            /* fall through */
+            /* FALLTHROUGH */
 
          case PNG_NUMBER_FORMAT_x:
             *--end = digits[number & 0xf];
@@ -187,13 +189,13 @@ png_format_number(png_const_charp start, png_charp end, int format,
       ++count;
 
       /* Float a fixed number here: */
-      if (format == PNG_NUMBER_FORMAT_fixed) if (count == 5) if (end > start)
+      if ((format == PNG_NUMBER_FORMAT_fixed) && (count == 5) && (end > start))
       {
          /* End of the fraction, but maybe nothing was output?  In that case
           * drop the decimal point.  If the number is a true zero handle that
           * here.
           */
-         if (output)
+         if (output != 0)
             *--end = '.';
          else if (number == 0) /* and !output */
             *--end = '0';
@@ -211,14 +213,14 @@ png_format_number(png_const_charp start, png_charp end, int format,
  * png_set_error_fn() to replace the warning function at run-time.
  */
 void PNGAPI
-png_warning(png_structp png_ptr, png_const_charp warning_message)
+png_warning(png_const_structrp png_ptr, png_const_charp warning_message)
 {
    int offset = 0;
    if (png_ptr != NULL)
    {
 #ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   if (png_ptr->flags&
-       (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+   if ((png_ptr->flags &
+       (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0)
 #endif
       {
          if (*warning_message == PNG_LITERAL_SHARP)
@@ -230,7 +232,8 @@ png_warning(png_structp png_ptr, png_const_charp warning_message)
       }
    }
    if (png_ptr != NULL && png_ptr->warning_fn != NULL)
-      (*(png_ptr->warning_fn))(png_ptr, warning_message + offset);
+      (*(png_ptr->warning_fn))(png_constcast(png_structrp,png_ptr),
+          warning_message + offset);
    else
       png_default_warning(png_ptr, warning_message + offset);
 }
@@ -242,7 +245,7 @@ png_warning(png_structp png_ptr, png_const_charp warning_message)
  */
 void
 png_warning_parameter(png_warning_parameters p, int number,
-   png_const_charp string)
+    png_const_charp string)
 {
    if (number > 0 && number <= PNG_WARNING_PARAMETER_COUNT)
       (void)png_safecat(p[number-1], (sizeof p[number-1]), 0, string);
@@ -250,19 +253,19 @@ png_warning_parameter(png_warning_parameters p, int number,
 
 void
 png_warning_parameter_unsigned(png_warning_parameters p, int number, int format,
-   png_alloc_size_t value)
+    png_alloc_size_t value)
 {
-   char buffer[PNG_NUMBER_BUFFER_SIZE];
+   char buffer[PNG_NUMBER_BUFFER_SIZE] = {0};
    png_warning_parameter(p, number, PNG_FORMAT_NUMBER(buffer, format, value));
 }
 
 void
 png_warning_parameter_signed(png_warning_parameters p, int number, int format,
-   png_int_32 value)
+    png_int_32 value)
 {
    png_alloc_size_t u;
    png_charp str;
-   char buffer[PNG_NUMBER_BUFFER_SIZE];
+   char buffer[PNG_NUMBER_BUFFER_SIZE] = {0};
 
    /* Avoid overflow by doing the negate in a png_alloc_size_t: */
    u = (png_alloc_size_t)value;
@@ -278,8 +281,8 @@ png_warning_parameter_signed(png_warning_parameters p, int number, int format,
 }
 
 void
-png_formatted_warning(png_structp png_ptr, png_warning_parameters p,
-   png_const_charp message)
+png_formatted_warning(png_const_structrp png_ptr, png_warning_parameters p,
+    png_const_charp message)
 {
    /* The internal buffer is just 192 bytes - enough for all our messages,
     * overflow doesn't happen because this code checks!  If someone figures
@@ -346,41 +349,89 @@ png_formatted_warning(png_structp png_ptr, png_warning_parameters p,
    /* i is always less than (sizeof msg), so: */
    msg[i] = '\0';
 
-   /* And this is the formatted message, it may be larger than
-    * PNG_MAX_ERROR_TEXT, but that is only used for 'chunk' errors and these are
-    * not (currently) formatted.
+   /* And this is the formatted message. It may be larger than
+    * PNG_MAX_ERROR_TEXT, but that is only used for 'chunk' errors and these
+    * are not (currently) formatted.
     */
    png_warning(png_ptr, msg);
 }
-#endif /* PNG_WARNINGS_SUPPORTED */
+#endif /* WARNINGS */
 
 #ifdef PNG_BENIGN_ERRORS_SUPPORTED
 void PNGAPI
-png_benign_error(png_structp png_ptr, png_const_charp error_message)
+png_benign_error(png_const_structrp png_ptr, png_const_charp error_message)
 {
-  if (png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN)
-     png_warning(png_ptr, error_message);
-  else
-     png_error(png_ptr, error_message);
+   if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0)
+   {
+#     ifdef PNG_READ_SUPPORTED
+         if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
+            png_ptr->chunk_name != 0)
+            png_chunk_warning(png_ptr, error_message);
+         else
+#     endif
+      png_warning(png_ptr, error_message);
+   }
+
+   else
+   {
+#     ifdef PNG_READ_SUPPORTED
+         if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
+            png_ptr->chunk_name != 0)
+            png_chunk_error(png_ptr, error_message);
+         else
+#     endif
+      png_error(png_ptr, error_message);
+   }
+
+#  ifndef PNG_ERROR_TEXT_SUPPORTED
+      PNG_UNUSED(error_message)
+#  endif
 }
-#endif
 
+void /* PRIVATE */
+png_app_warning(png_const_structrp png_ptr, png_const_charp error_message)
+{
+   if ((png_ptr->flags & PNG_FLAG_APP_WARNINGS_WARN) != 0)
+      png_warning(png_ptr, error_message);
+   else
+      png_error(png_ptr, error_message);
+
+#  ifndef PNG_ERROR_TEXT_SUPPORTED
+      PNG_UNUSED(error_message)
+#  endif
+}
+
+void /* PRIVATE */
+png_app_error(png_const_structrp png_ptr, png_const_charp error_message)
+{
+   if ((png_ptr->flags & PNG_FLAG_APP_ERRORS_WARN) != 0)
+      png_warning(png_ptr, error_message);
+   else
+      png_error(png_ptr, error_message);
+
+#  ifndef PNG_ERROR_TEXT_SUPPORTED
+      PNG_UNUSED(error_message)
+#  endif
+}
+#endif /* BENIGN_ERRORS */
+
+#define PNG_MAX_ERROR_TEXT 196 /* Currently limited by profile_error in png.c */
+#if defined(PNG_WARNINGS_SUPPORTED) || \
+   (defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED))
 /* These utilities are used internally to build an error message that relates
  * to the current chunk.  The chunk name comes from png_ptr->chunk_name,
- * this is used to prefix the message.  The message is limited in length
- * to 63 bytes, the name characters are output as hex digits wrapped in []
+ * which is used to prefix the message.  The message is limited in length
+ * to 63 bytes. The name characters are output as hex digits wrapped in []
  * if the character is invalid.
  */
 #define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
-static PNG_CONST char png_digit[16] = {
+static const char png_digit[16] = {
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'A', 'B', 'C', 'D', 'E', 'F'
 };
 
-#define PNG_MAX_ERROR_TEXT 64
-#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_ERROR_TEXT_SUPPORTED)
 static void /* PRIVATE */
-png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp
+png_format_buffer(png_const_structrp png_ptr, png_charp buffer, png_const_charp
     error_message)
 {
    png_uint_32 chunk_name = png_ptr->chunk_name;
@@ -391,7 +442,7 @@ png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp
       int c = (int)(chunk_name >> ishift) & 0xff;
 
       ishift -= 8;
-      if (isnonalpha(c))
+      if (isnonalpha(c) != 0)
       {
          buffer[iout++] = PNG_LITERAL_LEFT_SQUARE_BRACKET;
          buffer[iout++] = png_digit[(c & 0xf0) >> 4];
@@ -422,12 +473,12 @@ png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp
       buffer[iout] = '\0';
    }
 }
-#endif /* PNG_WARNINGS_SUPPORTED || PNG_ERROR_TEXT_SUPPORTED */
+#endif /* WARNINGS || ERROR_TEXT */
 
 #if defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)
 PNG_FUNCTION(void,PNGAPI
-png_chunk_error,(png_structp png_ptr, png_const_charp error_message),
-   PNG_NORETURN)
+png_chunk_error,(png_const_structrp png_ptr, png_const_charp error_message),
+    PNG_NORETURN)
 {
    char msg[18+PNG_MAX_ERROR_TEXT];
    if (png_ptr == NULL)
@@ -439,11 +490,11 @@ png_chunk_error,(png_structp png_ptr, png_const_charp error_message),
       png_error(png_ptr, msg);
    }
 }
-#endif /* PNG_READ_SUPPORTED && PNG_ERROR_TEXT_SUPPORTED */
+#endif /* READ && ERROR_TEXT */
 
 #ifdef PNG_WARNINGS_SUPPORTED
 void PNGAPI
-png_chunk_warning(png_structp png_ptr, png_const_charp warning_message)
+png_chunk_warning(png_const_structrp png_ptr, png_const_charp warning_message)
 {
    char msg[18+PNG_MAX_ERROR_TEXT];
    if (png_ptr == NULL)
@@ -455,38 +506,83 @@ png_chunk_warning(png_structp png_ptr, png_const_charp warning_message)
       png_warning(png_ptr, msg);
    }
 }
-#endif /* PNG_WARNINGS_SUPPORTED */
+#endif /* WARNINGS */
 
 #ifdef PNG_READ_SUPPORTED
 #ifdef PNG_BENIGN_ERRORS_SUPPORTED
 void PNGAPI
-png_chunk_benign_error(png_structp png_ptr, png_const_charp error_message)
+png_chunk_benign_error(png_const_structrp png_ptr, png_const_charp
+    error_message)
 {
-   if (png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN)
+   if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0)
       png_chunk_warning(png_ptr, error_message);
 
    else
       png_chunk_error(png_ptr, error_message);
+
+#  ifndef PNG_ERROR_TEXT_SUPPORTED
+      PNG_UNUSED(error_message)
+#  endif
 }
 #endif
-#endif /* PNG_READ_SUPPORTED */
+#endif /* READ */
+
+void /* PRIVATE */
+png_chunk_report(png_const_structrp png_ptr, png_const_charp message, int error)
+{
+#  ifndef PNG_WARNINGS_SUPPORTED
+      PNG_UNUSED(message)
+#  endif
+
+   /* This is always supported, but for just read or just write it
+    * unconditionally does the right thing.
+    */
+#  if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED)
+      if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
+#  endif
+
+#  ifdef PNG_READ_SUPPORTED
+      {
+         if (error < PNG_CHUNK_ERROR)
+            png_chunk_warning(png_ptr, message);
+
+         else
+            png_chunk_benign_error(png_ptr, message);
+      }
+#  endif
+
+#  if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED)
+      else if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
+#  endif
+
+#  ifdef PNG_WRITE_SUPPORTED
+      {
+         if (error < PNG_CHUNK_WRITE_ERROR)
+            png_app_warning(png_ptr, message);
+
+         else
+            png_app_error(png_ptr, message);
+      }
+#  endif
+}
 
 #ifdef PNG_ERROR_TEXT_SUPPORTED
 #ifdef PNG_FLOATING_POINT_SUPPORTED
 PNG_FUNCTION(void,
-png_fixed_error,(png_structp png_ptr, png_const_charp name),PNG_NORETURN)
+png_fixed_error,(png_const_structrp png_ptr, png_const_charp name),PNG_NORETURN)
 {
 #  define fixed_message "fixed point overflow in "
 #  define fixed_message_ln ((sizeof fixed_message)-1)
-   int  iin;
+   unsigned int  iin;
    char msg[fixed_message_ln+PNG_MAX_ERROR_TEXT];
-   png_memcpy(msg, fixed_message, fixed_message_ln);
+   memcpy(msg, fixed_message, fixed_message_ln);
    iin = 0;
-   if (name != NULL) while (iin < (PNG_MAX_ERROR_TEXT-1) && name[iin] != 0)
-   {
-      msg[fixed_message_ln + iin] = name[iin];
-      ++iin;
-   }
+   if (name != NULL)
+      while (iin < (PNG_MAX_ERROR_TEXT-1) && name[iin] != 0)
+      {
+         msg[fixed_message_ln + iin] = name[iin];
+         ++iin;
+      }
    msg[fixed_message_ln + iin] = 0;
    png_error(png_ptr, msg);
 }
@@ -498,14 +594,111 @@ png_fixed_error,(png_structp png_ptr, png_const_charp name),PNG_NORETURN)
  * otherwise it is necessary for png_default_error to be overridden.
  */
 jmp_buf* PNGAPI
-png_set_longjmp_fn(png_structp png_ptr, png_longjmp_ptr longjmp_fn,
+png_set_longjmp_fn(png_structrp png_ptr, png_longjmp_ptr longjmp_fn,
     size_t jmp_buf_size)
 {
-   if (png_ptr == NULL || jmp_buf_size != png_sizeof(jmp_buf))
+   /* From libpng 1.6.0 the app gets one chance to set a 'jmpbuf_size' value
+    * and it must not change after that.  Libpng doesn't care how big the
+    * buffer is, just that it doesn't change.
+    *
+    * If the buffer size is no *larger* than the size of jmp_buf when libpng is
+    * compiled a built in jmp_buf is returned; this preserves the pre-1.6.0
+    * semantics that this call will not fail.  If the size is larger, however,
+    * the buffer is allocated and this may fail, causing the function to return
+    * NULL.
+    */
+   if (png_ptr == NULL)
       return NULL;
 
+   if (png_ptr->jmp_buf_ptr == NULL)
+   {
+      png_ptr->jmp_buf_size = 0; /* not allocated */
+
+      if (jmp_buf_size <= (sizeof png_ptr->jmp_buf_local))
+         png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local;
+
+      else
+      {
+         png_ptr->jmp_buf_ptr = png_voidcast(jmp_buf *,
+             png_malloc_warn(png_ptr, jmp_buf_size));
+
+         if (png_ptr->jmp_buf_ptr == NULL)
+            return NULL; /* new NULL return on OOM */
+
+         png_ptr->jmp_buf_size = jmp_buf_size;
+      }
+   }
+
+   else /* Already allocated: check the size */
+   {
+      size_t size = png_ptr->jmp_buf_size;
+
+      if (size == 0)
+      {
+         size = (sizeof png_ptr->jmp_buf_local);
+         if (png_ptr->jmp_buf_ptr != &png_ptr->jmp_buf_local)
+         {
+            /* This is an internal error in libpng: somehow we have been left
+             * with a stack allocated jmp_buf when the application regained
+             * control.  It's always possible to fix this up, but for the moment
+             * this is a png_error because that makes it easy to detect.
+             */
+            png_error(png_ptr, "Libpng jmp_buf still allocated");
+            /* png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local; */
+         }
+      }
+
+      if (size != jmp_buf_size)
+      {
+         png_warning(png_ptr, "Application jmp_buf size changed");
+         return NULL; /* caller will probably crash: no choice here */
+      }
+   }
+
+   /* Finally fill in the function, now we have a satisfactory buffer. It is
+    * valid to change the function on every call.
+    */
    png_ptr->longjmp_fn = longjmp_fn;
-   return &png_ptr->longjmp_buffer;
+   return png_ptr->jmp_buf_ptr;
+}
+
+void /* PRIVATE */
+png_free_jmpbuf(png_structrp png_ptr)
+{
+   if (png_ptr != NULL)
+   {
+      jmp_buf *jb = png_ptr->jmp_buf_ptr;
+
+      /* A size of 0 is used to indicate a local, stack, allocation of the
+       * pointer; used here and in png.c
+       */
+      if (jb != NULL && png_ptr->jmp_buf_size > 0)
+      {
+
+         /* This stuff is so that a failure to free the error control structure
+          * does not leave libpng in a state with no valid error handling: the
+          * free always succeeds, if there is an error it gets ignored.
+          */
+         if (jb != &png_ptr->jmp_buf_local)
+         {
+            /* Make an internal, libpng, jmp_buf to return here */
+            jmp_buf free_jmp_buf;
+
+            if (!setjmp(free_jmp_buf))
+            {
+               png_ptr->jmp_buf_ptr = &free_jmp_buf; /* come back here */
+               png_ptr->jmp_buf_size = 0; /* stack allocation */
+               png_ptr->longjmp_fn = longjmp;
+               png_free(png_ptr, jb); /* Return to setjmp on error */
+            }
+         }
+      }
+
+      /* *Always* cancel everything out: */
+      png_ptr->jmp_buf_size = 0;
+      png_ptr->jmp_buf_ptr = NULL;
+      png_ptr->longjmp_fn = 0;
+   }
 }
 #endif
 
@@ -515,8 +708,8 @@ png_set_longjmp_fn(png_structp png_ptr, png_longjmp_ptr longjmp_fn,
  * error function pointer in png_set_error_fn().
  */
 static PNG_FUNCTION(void /* PRIVATE */,
-png_default_error,(png_structp png_ptr, png_const_charp error_message),
-   PNG_NORETURN)
+png_default_error,(png_const_structrp png_ptr, png_const_charp error_message),
+    PNG_NORETURN)
 {
 #ifdef PNG_CONSOLE_IO_SUPPORTED
 #ifdef PNG_ERROR_NUMBERS_SUPPORTED
@@ -562,24 +755,23 @@ png_default_error,(png_structp png_ptr, png_const_charp error_message),
 }
 
 PNG_FUNCTION(void,PNGAPI
-png_longjmp,(png_structp png_ptr, int val),PNG_NORETURN)
+png_longjmp,(png_const_structrp png_ptr, int val),PNG_NORETURN)
 {
 #ifdef PNG_SETJMP_SUPPORTED
-   if (png_ptr && png_ptr->longjmp_fn)
-   {
-#  ifdef USE_FAR_KEYWORD
-      {
-         jmp_buf tmp_jmpbuf;
-         png_memcpy(tmp_jmpbuf, png_ptr->longjmp_buffer, png_sizeof(jmp_buf));
-         png_ptr->longjmp_fn(tmp_jmpbuf, val);
-      }
-
-#  else
-   png_ptr->longjmp_fn(png_ptr->longjmp_buffer, val);
-#  endif
-   }
+   if (png_ptr != NULL && png_ptr->longjmp_fn != NULL &&
+       png_ptr->jmp_buf_ptr != NULL)
+      png_ptr->longjmp_fn(*png_ptr->jmp_buf_ptr, val);
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(val)
 #endif
-   /* Here if not setjmp support or if png_ptr is null. */
+
+   /* If control reaches this point, png_longjmp() must not return. The only
+    * choice is to terminate the whole process (or maybe the thread); to do
+    * this the ANSI-C abort() function is used unless a different method is
+    * implemented by overriding the default configuration setting for
+    * PNG_ABORT().
+    */
    PNG_ABORT();
 }
 
@@ -590,7 +782,7 @@ png_longjmp,(png_structp png_ptr, int val),PNG_NORETURN)
  * not used, but it is passed in case it may be useful.
  */
 static void /* PRIVATE */
-png_default_warning(png_structp png_ptr, png_const_charp warning_message)
+png_default_warning(png_const_structrp png_ptr, png_const_charp warning_message)
 {
 #ifdef PNG_CONSOLE_IO_SUPPORTED
 #  ifdef PNG_ERROR_NUMBERS_SUPPORTED
@@ -632,15 +824,15 @@ png_default_warning(png_structp png_ptr, png_const_charp warning_message)
 #endif
    PNG_UNUSED(png_ptr) /* Make compiler happy */
 }
-#endif /* PNG_WARNINGS_SUPPORTED */
+#endif /* WARNINGS */
 
 /* This function is called when the application wants to use another method
  * of handling errors and warnings.  Note that the error function MUST NOT
  * return to the calling routine or serious problems will occur.  The return
- * method used in the default routine calls longjmp(png_ptr->longjmp_buffer, 1)
+ * method used in the default routine calls longjmp(png_ptr->jmp_buf_ptr, 1)
  */
 void PNGAPI
-png_set_error_fn(png_structp png_ptr, png_voidp error_ptr,
+png_set_error_fn(png_structrp png_ptr, png_voidp error_ptr,
     png_error_ptr error_fn, png_error_ptr warning_fn)
 {
    if (png_ptr == NULL)
@@ -661,18 +853,18 @@ png_set_error_fn(png_structp png_ptr, png_voidp error_ptr,
  * pointer before png_write_destroy and png_read_destroy are called.
  */
 png_voidp PNGAPI
-png_get_error_ptr(png_const_structp png_ptr)
+png_get_error_ptr(png_const_structrp png_ptr)
 {
    if (png_ptr == NULL)
       return NULL;
 
-   return ((png_voidp)png_ptr->error_ptr);
+   return (png_voidp)png_ptr->error_ptr;
 }
 
 
 #ifdef PNG_ERROR_NUMBERS_SUPPORTED
 void PNGAPI
-png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode)
+png_set_strip_error_numbers(png_structrp png_ptr, png_uint_32 strip_mode)
 {
    if (png_ptr != NULL)
    {
@@ -682,4 +874,84 @@ png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode)
    }
 }
 #endif
-#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
+
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
+   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+   /* Currently the above both depend on SETJMP_SUPPORTED, however it would be
+    * possible to implement without setjmp support just so long as there is some
+    * way to handle the error return here:
+    */
+PNG_FUNCTION(void /* PRIVATE */, (PNGCBAPI
+png_safe_error),(png_structp png_nonconst_ptr, png_const_charp error_message),
+    PNG_NORETURN)
+{
+   png_const_structrp png_ptr = png_nonconst_ptr;
+   png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr);
+
+   /* An error is always logged here, overwriting anything (typically a warning)
+    * that is already there:
+    */
+   if (image != NULL)
+   {
+      png_safecat(image->message, (sizeof image->message), 0, error_message);
+      image->warning_or_error |= PNG_IMAGE_ERROR;
+
+      /* Retrieve the jmp_buf from within the png_control, making this work for
+       * C++ compilation too is pretty tricky: C++ wants a pointer to the first
+       * element of a jmp_buf, but C doesn't tell us the type of that.
+       */
+      if (image->opaque != NULL && image->opaque->error_buf != NULL)
+         longjmp(png_control_jmp_buf(image->opaque), 1);
+
+      /* Missing longjmp buffer, the following is to help debugging: */
+      {
+         size_t pos = png_safecat(image->message, (sizeof image->message), 0,
+             "bad longjmp: ");
+         png_safecat(image->message, (sizeof image->message), pos,
+             error_message);
+      }
+   }
+
+   /* Here on an internal programming error. */
+   abort();
+}
+
+#ifdef PNG_WARNINGS_SUPPORTED
+void /* PRIVATE */ PNGCBAPI
+png_safe_warning(png_structp png_nonconst_ptr, png_const_charp warning_message)
+{
+   png_const_structrp png_ptr = png_nonconst_ptr;
+   png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr);
+
+   /* A warning is only logged if there is no prior warning or error. */
+   if (image->warning_or_error == 0)
+   {
+      png_safecat(image->message, (sizeof image->message), 0, warning_message);
+      image->warning_or_error |= PNG_IMAGE_WARNING;
+   }
+}
+#endif
+
+int /* PRIVATE */
+png_safe_execute(png_imagep image, int (*function)(png_voidp), png_voidp arg)
+{
+   png_voidp saved_error_buf = image->opaque->error_buf;
+   jmp_buf safe_jmpbuf;
+   int result;
+
+   /* Safely execute function(arg), with png_error returning back here. */
+   if (setjmp(safe_jmpbuf) == 0)
+   {
+      image->opaque->error_buf = safe_jmpbuf;
+      result = function(arg);
+      image->opaque->error_buf = saved_error_buf;
+      return result;
+   }
+
+   /* On png_error, return via longjmp, pop the jmpbuf, and free the image. */
+   image->opaque->error_buf = saved_error_buf;
+   png_image_free(image);
+   return 0;
+}
+#endif /* SIMPLIFIED READ || SIMPLIFIED_WRITE */
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng/pngget.c b/reg-io/png/lpng/pngget.c
new file mode 100644
index 00000000..7d2f0c04
--- /dev/null
+++ b/reg-io/png/lpng/pngget.c
@@ -0,0 +1,1267 @@
+
+/* pngget.c - retrieval of values from info struct
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ */
+
+#include "pngpriv.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+png_uint_32 PNGAPI
+png_get_valid(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_uint_32 flag)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+#ifdef PNG_READ_tRNS_SUPPORTED
+      /* png_handle_PLTE() may have canceled a valid tRNS chunk but left the
+       * 'valid' flag for the detection of duplicate chunks. Do not report a
+       * valid tRNS chunk in this case.
+       */
+      if (flag == PNG_INFO_tRNS && png_ptr->num_trans == 0)
+         return 0;
+#endif
+
+      return info_ptr->valid & flag;
+   }
+
+   return 0;
+}
+
+size_t PNGAPI
+png_get_rowbytes(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->rowbytes;
+
+   return 0;
+}
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+png_bytepp PNGAPI
+png_get_rows(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->row_pointers;
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* Easy access to info, added in libpng-0.99 */
+png_uint_32 PNGAPI
+png_get_image_width(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->width;
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_image_height(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->height;
+
+   return 0;
+}
+
+png_byte PNGAPI
+png_get_bit_depth(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->bit_depth;
+
+   return 0;
+}
+
+png_byte PNGAPI
+png_get_color_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->color_type;
+
+   return 0;
+}
+
+png_byte PNGAPI
+png_get_filter_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->filter_type;
+
+   return 0;
+}
+
+png_byte PNGAPI
+png_get_interlace_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->interlace_type;
+
+   return 0;
+}
+
+png_byte PNGAPI
+png_get_compression_type(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->compression_type;
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp
+   info_ptr)
+{
+#ifdef PNG_pHYs_SUPPORTED
+   png_debug(1, "in png_get_x_pixels_per_meter");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
+         return info_ptr->x_pixels_per_unit;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp
+    info_ptr)
+{
+#ifdef PNG_pHYs_SUPPORTED
+   png_debug(1, "in png_get_y_pixels_per_meter");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
+         return info_ptr->y_pixels_per_unit;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+#ifdef PNG_pHYs_SUPPORTED
+   png_debug(1, "in png_get_pixels_per_meter");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER &&
+          info_ptr->x_pixels_per_unit == info_ptr->y_pixels_per_unit)
+         return info_ptr->x_pixels_per_unit;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+float PNGAPI
+png_get_pixel_aspect_ratio(png_const_structrp png_ptr, png_const_inforp
+   info_ptr)
+{
+#ifdef PNG_READ_pHYs_SUPPORTED
+   png_debug(1, "in png_get_pixel_aspect_ratio");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (info_ptr->x_pixels_per_unit != 0)
+         return (float)info_ptr->y_pixels_per_unit
+              / (float)info_ptr->x_pixels_per_unit;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return (float)0.0;
+}
+#endif
+
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_fixed_point PNGAPI
+png_get_pixel_aspect_ratio_fixed(png_const_structrp png_ptr,
+    png_const_inforp info_ptr)
+{
+#ifdef PNG_READ_pHYs_SUPPORTED
+   png_debug(1, "in png_get_pixel_aspect_ratio_fixed");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0 &&
+       info_ptr->x_pixels_per_unit > 0 && info_ptr->y_pixels_per_unit > 0 &&
+       info_ptr->x_pixels_per_unit <= PNG_UINT_31_MAX &&
+       info_ptr->y_pixels_per_unit <= PNG_UINT_31_MAX)
+   {
+      png_fixed_point res;
+
+      /* The following casts work because a PNG 4 byte integer only has a valid
+       * range of 0..2^31-1; otherwise the cast might overflow.
+       */
+      if (png_muldiv(&res, (png_int_32)info_ptr->y_pixels_per_unit, PNG_FP_1,
+          (png_int_32)info_ptr->x_pixels_per_unit) != 0)
+         return res;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+#endif
+
+png_int_32 PNGAPI
+png_get_x_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+#ifdef PNG_oFFs_SUPPORTED
+   png_debug(1, "in png_get_x_offset_microns");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_oFFs) != 0)
+   {
+      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
+         return info_ptr->x_offset;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+#ifdef PNG_oFFs_SUPPORTED
+   png_debug(1, "in png_get_y_offset_microns");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_oFFs) != 0)
+   {
+      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
+         return info_ptr->y_offset;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+png_int_32 PNGAPI
+png_get_x_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+#ifdef PNG_oFFs_SUPPORTED
+   png_debug(1, "in png_get_x_offset_pixels");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_oFFs) != 0)
+   {
+      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
+         return info_ptr->x_offset;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+#ifdef PNG_oFFs_SUPPORTED
+   png_debug(1, "in png_get_y_offset_pixels");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_oFFs) != 0)
+   {
+      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
+         return info_ptr->y_offset;
+   }
+#else
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(info_ptr)
+#endif
+
+   return 0;
+}
+
+#ifdef PNG_INCH_CONVERSIONS_SUPPORTED
+static png_uint_32
+ppi_from_ppm(png_uint_32 ppm)
+{
+#if 0
+   /* The conversion is *(2.54/100), in binary (32 digits):
+    * .00000110100000001001110101001001
+    */
+   png_uint_32 t1001, t1101;
+   ppm >>= 1;                  /* .1 */
+   t1001 = ppm + (ppm >> 3);   /* .1001 */
+   t1101 = t1001 + (ppm >> 1); /* .1101 */
+   ppm >>= 20;                 /* .000000000000000000001 */
+   t1101 += t1101 >> 15;       /* .1101000000000001101 */
+   t1001 >>= 11;               /* .000000000001001 */
+   t1001 += t1001 >> 12;       /* .000000000001001000000001001 */
+   ppm += t1001;               /* .000000000001001000001001001 */
+   ppm += t1101;               /* .110100000001001110101001001 */
+   return (ppm + 16) >> 5;/* .00000110100000001001110101001001 */
+#else
+   /* The argument is a PNG unsigned integer, so it is not permitted
+    * to be bigger than 2^31.
+    */
+   png_fixed_point result;
+   if (ppm <= PNG_UINT_31_MAX && png_muldiv(&result, (png_int_32)ppm, 127,
+       5000) != 0)
+      return (png_uint_32)result;
+
+   /* Overflow. */
+   return 0;
+#endif
+}
+
+png_uint_32 PNGAPI
+png_get_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   return ppi_from_ppm(png_get_pixels_per_meter(png_ptr, info_ptr));
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   return ppi_from_ppm(png_get_x_pixels_per_meter(png_ptr, info_ptr));
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   return ppi_from_ppm(png_get_y_pixels_per_meter(png_ptr, info_ptr));
+}
+
+#ifdef PNG_FIXED_POINT_SUPPORTED
+static png_fixed_point
+png_fixed_inches_from_microns(png_const_structrp png_ptr, png_int_32 microns)
+{
+   /* Convert from meters * 1,000,000 to inches * 100,000, meters to
+    * inches is simply *(100/2.54), so we want *(10/2.54) == 500/127.
+    * Notice that this can overflow - a warning is output and 0 is
+    * returned.
+    */
+   return png_muldiv_warn(png_ptr, microns, 500, 127);
+}
+
+png_fixed_point PNGAPI
+png_get_x_offset_inches_fixed(png_const_structrp png_ptr,
+    png_const_inforp info_ptr)
+{
+   return png_fixed_inches_from_microns(png_ptr,
+       png_get_x_offset_microns(png_ptr, info_ptr));
+}
+#endif
+
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_fixed_point PNGAPI
+png_get_y_offset_inches_fixed(png_const_structrp png_ptr,
+    png_const_inforp info_ptr)
+{
+   return png_fixed_inches_from_microns(png_ptr,
+       png_get_y_offset_microns(png_ptr, info_ptr));
+}
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+float PNGAPI
+png_get_x_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   /* To avoid the overflow do the conversion directly in floating
+    * point.
+    */
+   return (float)(png_get_x_offset_microns(png_ptr, info_ptr) * .00003937);
+}
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+float PNGAPI
+png_get_y_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   /* To avoid the overflow do the conversion directly in floating
+    * point.
+    */
+   return (float)(png_get_y_offset_microns(png_ptr, info_ptr) * .00003937);
+}
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+png_uint_32 PNGAPI
+png_get_pHYs_dpi(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   png_debug1(1, "in %s retrieval function", "pHYs");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+
+         if (*unit_type == 1)
+         {
+            if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50);
+            if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50);
+         }
+      }
+   }
+
+   return retval;
+}
+#endif /* pHYs */
+#endif /* INCH_CONVERSIONS */
+
+/* png_get_channels really belongs in here, too, but it's been around longer */
+
+#endif /* EASY_ACCESS */
+
+
+png_byte PNGAPI
+png_get_channels(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->channels;
+
+   return 0;
+}
+
+#ifdef PNG_READ_SUPPORTED
+png_const_bytep PNGAPI
+png_get_signature(png_const_structrp png_ptr, png_const_inforp info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return info_ptr->signature;
+
+   return NULL;
+}
+#endif
+
+#ifdef PNG_bKGD_SUPPORTED
+png_uint_32 PNGAPI
+png_get_bKGD(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_color_16p *background)
+{
+   png_debug1(1, "in %s retrieval function", "bKGD");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_bKGD) != 0 &&
+       background != NULL)
+   {
+      *background = &(info_ptr->background);
+      return PNG_INFO_bKGD;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+/* The XYZ APIs were added in 1.5.5 to take advantage of the code added at the
+ * same time to correct the rgb grayscale coefficient defaults obtained from the
+ * cHRM chunk in 1.5.4
+ */
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    double *white_x, double *white_y, double *red_x, double *red_y,
+    double *green_x, double *green_y, double *blue_x, double *blue_y)
+{
+   png_debug1(1, "in %s retrieval function", "cHRM");
+
+   /* Quiet API change: this code used to only return the end points if a cHRM
+    * chunk was present, but the end points can also come from iCCP or sRGB
+    * chunks, so in 1.6.0 the png_get_ APIs return the end points regardless and
+    * the png_set_ APIs merely check that set end points are mutually
+    * consistent.
+    */
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      if (white_x != NULL)
+         *white_x = png_float(png_ptr,
+             info_ptr->colorspace.end_points_xy.whitex, "cHRM white X");
+      if (white_y != NULL)
+         *white_y = png_float(png_ptr,
+             info_ptr->colorspace.end_points_xy.whitey, "cHRM white Y");
+      if (red_x != NULL)
+         *red_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redx,
+             "cHRM red X");
+      if (red_y != NULL)
+         *red_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redy,
+             "cHRM red Y");
+      if (green_x != NULL)
+         *green_x = png_float(png_ptr,
+             info_ptr->colorspace.end_points_xy.greenx, "cHRM green X");
+      if (green_y != NULL)
+         *green_y = png_float(png_ptr,
+             info_ptr->colorspace.end_points_xy.greeny, "cHRM green Y");
+      if (blue_x != NULL)
+         *blue_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluex,
+             "cHRM blue X");
+      if (blue_y != NULL)
+         *blue_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluey,
+             "cHRM blue Y");
+      return PNG_INFO_cHRM;
+   }
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_cHRM_XYZ(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    double *red_X, double *red_Y, double *red_Z, double *green_X,
+    double *green_Y, double *green_Z, double *blue_X, double *blue_Y,
+    double *blue_Z)
+{
+   png_debug1(1, "in %s retrieval function", "cHRM_XYZ(float)");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      if (red_X != NULL)
+         *red_X = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_X,
+             "cHRM red X");
+      if (red_Y != NULL)
+         *red_Y = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Y,
+             "cHRM red Y");
+      if (red_Z != NULL)
+         *red_Z = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Z,
+             "cHRM red Z");
+      if (green_X != NULL)
+         *green_X = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.green_X, "cHRM green X");
+      if (green_Y != NULL)
+         *green_Y = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.green_Y, "cHRM green Y");
+      if (green_Z != NULL)
+         *green_Z = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.green_Z, "cHRM green Z");
+      if (blue_X != NULL)
+         *blue_X = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.blue_X, "cHRM blue X");
+      if (blue_Y != NULL)
+         *blue_Y = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.blue_Y, "cHRM blue Y");
+      if (blue_Z != NULL)
+         *blue_Z = png_float(png_ptr,
+             info_ptr->colorspace.end_points_XYZ.blue_Z, "cHRM blue Z");
+      return PNG_INFO_cHRM;
+   }
+
+   return 0;
+}
+#  endif
+
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
+    png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
+    png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
+    png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
+    png_fixed_point *int_blue_Z)
+{
+   png_debug1(1, "in %s retrieval function", "cHRM_XYZ");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      if (int_red_X != NULL)
+         *int_red_X = info_ptr->colorspace.end_points_XYZ.red_X;
+      if (int_red_Y != NULL)
+         *int_red_Y = info_ptr->colorspace.end_points_XYZ.red_Y;
+      if (int_red_Z != NULL)
+         *int_red_Z = info_ptr->colorspace.end_points_XYZ.red_Z;
+      if (int_green_X != NULL)
+         *int_green_X = info_ptr->colorspace.end_points_XYZ.green_X;
+      if (int_green_Y != NULL)
+         *int_green_Y = info_ptr->colorspace.end_points_XYZ.green_Y;
+      if (int_green_Z != NULL)
+         *int_green_Z = info_ptr->colorspace.end_points_XYZ.green_Z;
+      if (int_blue_X != NULL)
+         *int_blue_X = info_ptr->colorspace.end_points_XYZ.blue_X;
+      if (int_blue_Y != NULL)
+         *int_blue_Y = info_ptr->colorspace.end_points_XYZ.blue_Y;
+      if (int_blue_Z != NULL)
+         *int_blue_Z = info_ptr->colorspace.end_points_XYZ.blue_Z;
+      return PNG_INFO_cHRM;
+   }
+
+   return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_cHRM_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
+    png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
+    png_fixed_point *blue_x, png_fixed_point *blue_y)
+{
+   png_debug1(1, "in %s retrieval function", "cHRM");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0)
+   {
+      if (white_x != NULL)
+         *white_x = info_ptr->colorspace.end_points_xy.whitex;
+      if (white_y != NULL)
+         *white_y = info_ptr->colorspace.end_points_xy.whitey;
+      if (red_x != NULL)
+         *red_x = info_ptr->colorspace.end_points_xy.redx;
+      if (red_y != NULL)
+         *red_y = info_ptr->colorspace.end_points_xy.redy;
+      if (green_x != NULL)
+         *green_x = info_ptr->colorspace.end_points_xy.greenx;
+      if (green_y != NULL)
+         *green_y = info_ptr->colorspace.end_points_xy.greeny;
+      if (blue_x != NULL)
+         *blue_x = info_ptr->colorspace.end_points_xy.bluex;
+      if (blue_y != NULL)
+         *blue_y = info_ptr->colorspace.end_points_xy.bluey;
+      return PNG_INFO_cHRM;
+   }
+
+   return 0;
+}
+#  endif
+#endif
+
+#ifdef PNG_gAMA_SUPPORTED
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_fixed_point *file_gamma)
+{
+   png_debug1(1, "in %s retrieval function", "gAMA");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
+       file_gamma != NULL)
+   {
+      *file_gamma = info_ptr->colorspace.gamma;
+      return PNG_INFO_gAMA;
+   }
+
+   return 0;
+}
+#  endif
+
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    double *file_gamma)
+{
+   png_debug1(1, "in %s retrieval function", "gAMA(float)");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 &&
+      file_gamma != NULL)
+   {
+      *file_gamma = png_float(png_ptr, info_ptr->colorspace.gamma,
+          "png_get_gAMA");
+      return PNG_INFO_gAMA;
+   }
+
+   return 0;
+}
+#  endif
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sRGB(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    int *file_srgb_intent)
+{
+   png_debug1(1, "in %s retrieval function", "sRGB");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->valid & PNG_INFO_sRGB) != 0 && file_srgb_intent != NULL)
+   {
+      *file_srgb_intent = info_ptr->colorspace.rendering_intent;
+      return PNG_INFO_sRGB;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+png_uint_32 PNGAPI
+png_get_iCCP(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_charpp name, int *compression_type,
+    png_bytepp profile, png_uint_32 *proflen)
+{
+   png_debug1(1, "in %s retrieval function", "iCCP");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_iCCP) != 0 &&
+       name != NULL && profile != NULL && proflen != NULL)
+   {
+      *name = info_ptr->iccp_name;
+      *profile = info_ptr->iccp_profile;
+      *proflen = png_get_uint_32(info_ptr->iccp_profile);
+      /* This is somewhat irrelevant since the profile data returned has
+       * actually been uncompressed.
+       */
+      if (compression_type != NULL)
+         *compression_type = PNG_COMPRESSION_TYPE_BASE;
+      return PNG_INFO_iCCP;
+   }
+
+   return 0;
+
+}
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+int PNGAPI
+png_get_sPLT(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_sPLT_tpp spalettes)
+{
+   png_debug1(1, "in %s retrieval function", "sPLT");
+
+   if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
+   {
+      *spalettes = info_ptr->splt_palettes;
+      return info_ptr->splt_palettes_num;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_eXIf_SUPPORTED
+png_uint_32 PNGAPI
+png_get_eXIf(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_bytep *exif)
+{
+  png_warning(png_ptr, "png_get_eXIf does not work; use png_get_eXIf_1");
+  PNG_UNUSED(info_ptr)
+  PNG_UNUSED(exif)
+  return 0;
+}
+
+png_uint_32 PNGAPI
+png_get_eXIf_1(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_uint_32 *num_exif, png_bytep *exif)
+{
+   png_debug1(1, "in %s retrieval function", "eXIf");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_eXIf) != 0 && exif != NULL)
+   {
+      *num_exif = info_ptr->num_exif;
+      *exif = info_ptr->exif;
+      return PNG_INFO_eXIf;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+png_uint_32 PNGAPI
+png_get_hIST(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_uint_16p *hist)
+{
+   png_debug1(1, "in %s retrieval function", "hIST");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_hIST) != 0 && hist != NULL)
+   {
+      *hist = info_ptr->hist;
+      return PNG_INFO_hIST;
+   }
+
+   return 0;
+}
+#endif
+
+png_uint_32 PNGAPI
+png_get_IHDR(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_uint_32 *width, png_uint_32 *height, int *bit_depth,
+    int *color_type, int *interlace_type, int *compression_type,
+    int *filter_type)
+{
+   png_debug1(1, "in %s retrieval function", "IHDR");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return 0;
+
+   if (width != NULL)
+       *width = info_ptr->width;
+
+   if (height != NULL)
+       *height = info_ptr->height;
+
+   if (bit_depth != NULL)
+       *bit_depth = info_ptr->bit_depth;
+
+   if (color_type != NULL)
+       *color_type = info_ptr->color_type;
+
+   if (compression_type != NULL)
+      *compression_type = info_ptr->compression_type;
+
+   if (filter_type != NULL)
+      *filter_type = info_ptr->filter_type;
+
+   if (interlace_type != NULL)
+      *interlace_type = info_ptr->interlace_type;
+
+   /* This is redundant if we can be sure that the info_ptr values were all
+    * assigned in png_set_IHDR().  We do the check anyhow in case an
+    * application has ignored our advice not to mess with the members
+    * of info_ptr directly.
+    */
+   png_check_IHDR(png_ptr, info_ptr->width, info_ptr->height,
+       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
+       info_ptr->compression_type, info_ptr->filter_type);
+
+   return 1;
+}
+
+#ifdef PNG_oFFs_SUPPORTED
+png_uint_32 PNGAPI
+png_get_oFFs(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)
+{
+   png_debug1(1, "in %s retrieval function", "oFFs");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_oFFs) != 0 &&
+       offset_x != NULL && offset_y != NULL && unit_type != NULL)
+   {
+      *offset_x = info_ptr->x_offset;
+      *offset_y = info_ptr->y_offset;
+      *unit_type = (int)info_ptr->offset_unit_type;
+      return PNG_INFO_oFFs;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+png_uint_32 PNGAPI
+png_get_pCAL(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams,
+    png_charp *units, png_charpp *params)
+{
+   png_debug1(1, "in %s retrieval function", "pCAL");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pCAL) != 0 &&
+       purpose != NULL && X0 != NULL && X1 != NULL && type != NULL &&
+       nparams != NULL && units != NULL && params != NULL)
+   {
+      *purpose = info_ptr->pcal_purpose;
+      *X0 = info_ptr->pcal_X0;
+      *X1 = info_ptr->pcal_X1;
+      *type = (int)info_ptr->pcal_type;
+      *nparams = (int)info_ptr->pcal_nparams;
+      *units = info_ptr->pcal_units;
+      *params = info_ptr->pcal_params;
+      return PNG_INFO_pCAL;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+#    if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \
+         defined(PNG_FLOATING_POINT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sCAL_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    int *unit, png_fixed_point *width, png_fixed_point *height)
+{
+   png_debug1(1, "in %s retrieval function", "sCAL");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL) != 0)
+   {
+      *unit = info_ptr->scal_unit;
+      /*TODO: make this work without FP support; the API is currently eliminated
+       * if neither floating point APIs nor internal floating point arithmetic
+       * are enabled.
+       */
+      *width = png_fixed(png_ptr, atof(info_ptr->scal_s_width), "sCAL width");
+      *height = png_fixed(png_ptr, atof(info_ptr->scal_s_height),
+          "sCAL height");
+      return PNG_INFO_sCAL;
+   }
+
+   return 0;
+}
+#    endif /* FLOATING_ARITHMETIC */
+#  endif /* FIXED_POINT */
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sCAL(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    int *unit, double *width, double *height)
+{
+   png_debug1(1, "in %s retrieval function", "sCAL(float)");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL) != 0)
+   {
+      *unit = info_ptr->scal_unit;
+      *width = atof(info_ptr->scal_s_width);
+      *height = atof(info_ptr->scal_s_height);
+      return PNG_INFO_sCAL;
+   }
+
+   return 0;
+}
+#  endif /* FLOATING POINT */
+png_uint_32 PNGAPI
+png_get_sCAL_s(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    int *unit, png_charpp width, png_charpp height)
+{
+   png_debug1(1, "in %s retrieval function", "sCAL(str)");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL) != 0)
+   {
+      *unit = info_ptr->scal_unit;
+      *width = info_ptr->scal_s_width;
+      *height = info_ptr->scal_s_height;
+      return PNG_INFO_sCAL;
+   }
+
+   return 0;
+}
+#endif /* sCAL */
+
+#ifdef PNG_pHYs_SUPPORTED
+png_uint_32 PNGAPI
+png_get_pHYs(png_const_structrp png_ptr, png_const_inforp info_ptr,
+    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   png_debug1(1, "in %s retrieval function", "pHYs");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+      }
+   }
+
+   return retval;
+}
+#endif /* pHYs */
+
+png_uint_32 PNGAPI
+png_get_PLTE(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_colorp *palette, int *num_palette)
+{
+   png_debug1(1, "in %s retrieval function", "PLTE");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_PLTE) != 0 && palette != NULL)
+   {
+      *palette = info_ptr->palette;
+      *num_palette = info_ptr->num_palette;
+      png_debug1(3, "num_palette = %d", *num_palette);
+      return PNG_INFO_PLTE;
+   }
+
+   return 0;
+}
+
+#ifdef PNG_sBIT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sBIT(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_color_8p *sig_bit)
+{
+   png_debug1(1, "in %s retrieval function", "sBIT");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sBIT) != 0 && sig_bit != NULL)
+   {
+      *sig_bit = &(info_ptr->sig_bit);
+      return PNG_INFO_sBIT;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+int PNGAPI
+png_get_text(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_textp *text_ptr, int *num_text)
+{
+   if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0)
+   {
+      png_debug1(1, "in text retrieval function, chunk typeid = 0x%lx",
+         (unsigned long)png_ptr->chunk_name);
+
+      if (text_ptr != NULL)
+         *text_ptr = info_ptr->text;
+
+      if (num_text != NULL)
+         *num_text = info_ptr->num_text;
+
+      return info_ptr->num_text;
+   }
+
+   if (num_text != NULL)
+      *num_text = 0;
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+png_uint_32 PNGAPI
+png_get_tIME(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_timep *mod_time)
+{
+   png_debug1(1, "in %s retrieval function", "tIME");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_tIME) != 0 && mod_time != NULL)
+   {
+      *mod_time = &(info_ptr->mod_time);
+      return PNG_INFO_tIME;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+png_uint_32 PNGAPI
+png_get_tRNS(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color)
+{
+   png_uint_32 retval = 0;
+
+   png_debug1(1, "in %s retrieval function", "tRNS");
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_tRNS) != 0)
+   {
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (trans_alpha != NULL)
+         {
+            *trans_alpha = info_ptr->trans_alpha;
+            retval |= PNG_INFO_tRNS;
+         }
+
+         if (trans_color != NULL)
+            *trans_color = &(info_ptr->trans_color);
+      }
+
+      else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */
+      {
+         if (trans_color != NULL)
+         {
+            *trans_color = &(info_ptr->trans_color);
+            retval |= PNG_INFO_tRNS;
+         }
+
+         if (trans_alpha != NULL)
+            *trans_alpha = NULL;
+      }
+
+      if (num_trans != NULL)
+      {
+         *num_trans = info_ptr->num_trans;
+         retval |= PNG_INFO_tRNS;
+      }
+   }
+
+   return retval;
+}
+#endif
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+int PNGAPI
+png_get_unknown_chunks(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_unknown_chunkpp unknowns)
+{
+   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
+   {
+      *unknowns = info_ptr->unknown_chunks;
+      return info_ptr->unknown_chunks_num;
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+png_byte PNGAPI
+png_get_rgb_to_gray_status(png_const_structrp png_ptr)
+{
+   return (png_byte)(png_ptr ? png_ptr->rgb_to_gray_status : 0);
+}
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+png_voidp PNGAPI
+png_get_user_chunk_ptr(png_const_structrp png_ptr)
+{
+   return (png_ptr ? png_ptr->user_chunk_ptr : NULL);
+}
+#endif
+
+size_t PNGAPI
+png_get_compression_buffer_size(png_const_structrp png_ptr)
+{
+   if (png_ptr == NULL)
+      return 0;
+
+#ifdef PNG_WRITE_SUPPORTED
+   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
+#endif
+   {
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+      return png_ptr->IDAT_read_size;
+#else
+      return PNG_IDAT_READ_SIZE;
+#endif
+   }
+
+#ifdef PNG_WRITE_SUPPORTED
+   else
+      return png_ptr->zbuffer_size;
+#endif
+}
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* These functions were added to libpng 1.2.6 and were enabled
+ * by default in libpng-1.4.0 */
+png_uint_32 PNGAPI
+png_get_user_width_max(png_const_structrp png_ptr)
+{
+   return (png_ptr ? png_ptr->user_width_max : 0);
+}
+
+png_uint_32 PNGAPI
+png_get_user_height_max(png_const_structrp png_ptr)
+{
+   return (png_ptr ? png_ptr->user_height_max : 0);
+}
+
+/* This function was added to libpng 1.4.0 */
+png_uint_32 PNGAPI
+png_get_chunk_cache_max(png_const_structrp png_ptr)
+{
+   return (png_ptr ? png_ptr->user_chunk_cache_max : 0);
+}
+
+/* This function was added to libpng 1.4.1 */
+png_alloc_size_t PNGAPI
+png_get_chunk_malloc_max(png_const_structrp png_ptr)
+{
+   return (png_ptr ? png_ptr->user_chunk_malloc_max : 0);
+}
+#endif /* SET_USER_LIMITS */
+
+/* These functions were added to libpng 1.4.0 */
+#ifdef PNG_IO_STATE_SUPPORTED
+png_uint_32 PNGAPI
+png_get_io_state(png_const_structrp png_ptr)
+{
+   return png_ptr->io_state;
+}
+
+png_uint_32 PNGAPI
+png_get_io_chunk_type(png_const_structrp png_ptr)
+{
+   return png_ptr->chunk_name;
+}
+#endif /* IO_STATE */
+
+#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
+#  ifdef PNG_GET_PALETTE_MAX_SUPPORTED
+int PNGAPI
+png_get_palette_max(png_const_structp png_ptr, png_const_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return png_ptr->num_palette_max;
+
+   return -1;
+}
+#  endif
+#endif
+
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng1510/pnginfo.h b/reg-io/png/lpng/pnginfo.h
similarity index 63%
rename from reg-io/png/lpng1510/pnginfo.h
rename to reg-io/png/lpng/pnginfo.h
index 926b66c8..dbbc35bc 100644
--- a/reg-io/png/lpng1510/pnginfo.h
+++ b/reg-io/png/lpng/pnginfo.h
@@ -1,267 +1,267 @@
-
-/* pnginfo.h - header file for PNG reference library
- *
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * Last changed in libpng 1.5.0 [January 6, 2011]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* png_info is a structure that holds the information in a PNG file so
-* that the application can find out the characteristics of the image.
-* If you are reading the file, this structure will tell you what is
-* in the PNG file.  If you are writing the file, fill in the information
-* you want to put into the PNG file, using png_set_*() functions, then
-* call png_write_info().
-*
-* The names chosen should be very close to the PNG specification, so
-* consult that document for information about the meaning of each field.
-*
-* With libpng < 0.95, it was only possible to directly set and read the
-* the values in the png_info_struct, which meant that the contents and
-* order of the values had to remain fixed.  With libpng 0.95 and later,
-* however, there are now functions that abstract the contents of
-* png_info_struct from the application, so this makes it easier to use
-* libpng with dynamic libraries, and even makes it possible to use
-* libraries that don't have all of the libpng ancillary chunk-handing
-* functionality.  In libpng-1.5.0 this was moved into a separate private
-* file that is not visible to applications.
-*
-* The following members may have allocated storage attached that should be
-* cleaned up before the structure is discarded: palette, trans, text,
-* pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
-* splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
-* are automatically freed when the info structure is deallocated, if they were
-* allocated internally by libpng.  This behavior can be changed by means
-* of the png_data_freer() function.
-*
-* More allocation details: all the chunk-reading functions that
-* change these members go through the corresponding png_set_*
-* functions.  A function to clear these members is available: see
-* png_free_data().  The png_set_* functions do not depend on being
-* able to point info structure members to any of the storage they are
-* passed (they make their own copies), EXCEPT that the png_set_text
-* functions use the same storage passed to them in the text_ptr or
-* itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
-* functions do not make their own copies.
-*/
-#pragma once
-
-struct png_info_def
-{
-   /* the following are necessary for every PNG file */
-   png_uint_32 width;  /* width of image in pixels (from IHDR) */
-   png_uint_32 height; /* height of image in pixels (from IHDR) */
-   png_uint_32 valid;  /* valid chunk data (see PNG_INFO_ below) */
-   png_size_t rowbytes; /* bytes needed to hold an untransformed row */
-   png_colorp palette;      /* array of color values (valid & PNG_INFO_PLTE) */
-   png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */
-   png_uint_16 num_trans;   /* number of transparent palette color (tRNS) */
-   png_byte bit_depth;      /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */
-   png_byte color_type;     /* see PNG_COLOR_TYPE_ below (from IHDR) */
-   /* The following three should have been named *_method not *_type */
-   png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */
-   png_byte filter_type;    /* must be PNG_FILTER_TYPE_BASE (from IHDR) */
-   png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
-
-   /* The following is informational only on read, and not used on writes. */
-   png_byte channels;       /* number of data channels per pixel (1, 2, 3, 4) */
-   png_byte pixel_depth;    /* number of bits per pixel */
-   png_byte spare_byte;     /* to align the data, and for future use */
-   png_byte signature[8];   /* magic bytes read by libpng from start of file */
-
-   /* The rest of the data is optional.  If you are reading, check the
-    * valid field to see if the information in these are valid.  If you
-    * are writing, set the valid field to those chunks you want written,
-    * and initialize the appropriate fields below.
-    */
-
-#if defined(PNG_gAMA_SUPPORTED)
-   /* The gAMA chunk describes the gamma characteristics of the system
-    * on which the image was created, normally in the range [1.0, 2.5].
-    * Data is valid if (valid & PNG_INFO_gAMA) is non-zero.
-    */
-   png_fixed_point gamma;
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-   /* GR-P, 0.96a */
-   /* Data valid if (valid & PNG_INFO_sRGB) non-zero. */
-   png_byte srgb_intent; /* sRGB rendering intent [0, 1, 2, or 3] */
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* The tEXt, and zTXt chunks contain human-readable textual data in
-    * uncompressed, compressed, and optionally compressed forms, respectively.
-    * The data in "text" is an array of pointers to uncompressed,
-    * null-terminated C strings. Each chunk has a keyword that describes the
-    * textual data contained in that chunk.  Keywords are not required to be
-    * unique, and the text string may be empty.  Any number of text chunks may
-    * be in an image.
-    */
-   int num_text; /* number of comments read or comments to write */
-   int max_text; /* current size of text array */
-   png_textp text; /* array of comments read or comments to write */
-#endif /* PNG_TEXT_SUPPORTED */
-
-#ifdef PNG_tIME_SUPPORTED
-   /* The tIME chunk holds the last time the displayed image data was
-    * modified.  See the png_time struct for the contents of this struct.
-    */
-   png_time mod_time;
-#endif
-
-#ifdef PNG_sBIT_SUPPORTED
-   /* The sBIT chunk specifies the number of significant high-order bits
-    * in the pixel data.  Values are in the range [1, bit_depth], and are
-    * only specified for the channels in the pixel data.  The contents of
-    * the low-order bits is not specified.  Data is valid if
-    * (valid & PNG_INFO_sBIT) is non-zero.
-    */
-   png_color_8 sig_bit; /* significant bits in color channels */
-#endif
-
-#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
-defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* The tRNS chunk supplies transparency data for paletted images and
-    * other image types that don't need a full alpha channel.  There are
-    * "num_trans" transparency values for a paletted image, stored in the
-    * same order as the palette colors, starting from index 0.  Values
-    * for the data are in the range [0, 255], ranging from fully transparent
-    * to fully opaque, respectively.  For non-paletted images, there is a
-    * single color specified that should be treated as fully transparent.
-    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
-    */
-   png_bytep trans_alpha;    /* alpha values for paletted image */
-   png_color_16 trans_color; /* transparent color for non-palette image */
-#endif
-
-#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   /* The bKGD chunk gives the suggested image background color if the
-    * display program does not have its own background color and the image
-    * is needs to composited onto a background before display.  The colors
-    * in "background" are normally in the same color space/depth as the
-    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
-    */
-   png_color_16 background;
-#endif
-
-#ifdef PNG_oFFs_SUPPORTED
-   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
-    * and downwards from the top-left corner of the display, page, or other
-    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
-    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
-    */
-   png_int_32 x_offset; /* x offset on page */
-   png_int_32 y_offset; /* y offset on page */
-   png_byte offset_unit_type; /* offset units type */
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-   /* The pHYs chunk gives the physical pixel density of the image for
-    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
-    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
-    */
-   png_uint_32 x_pixels_per_unit; /* horizontal pixel density */
-   png_uint_32 y_pixels_per_unit; /* vertical pixel density */
-   png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   /* The hIST chunk contains the relative frequency or importance of the
-    * various palette entries, so that a viewer can intelligently select a
-    * reduced-color palette, if required.  Data is an array of "num_palette"
-    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
-    * is non-zero.
-    */
-   png_uint_16p hist;
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-   /* The cHRM chunk describes the CIE color characteristics of the monitor
-    * on which the PNG was created.  This data allows the viewer to do gamut
-    * mapping of the input image to ensure that the viewer sees the same
-    * colors in the image as the creator.  Values are in the range
-    * [0.0, 0.8].  Data valid if (valid & PNG_INFO_cHRM) non-zero.
-    */
-   png_fixed_point x_white;
-   png_fixed_point y_white;
-   png_fixed_point x_red;
-   png_fixed_point y_red;
-   png_fixed_point x_green;
-   png_fixed_point y_green;
-   png_fixed_point x_blue;
-   png_fixed_point y_blue;
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   /* The pCAL chunk describes a transformation between the stored pixel
-    * values and original physical data values used to create the image.
-    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
-    * range given by [pcal_X0, pcal_X1], and are further transformed by a
-    * (possibly non-linear) transformation function given by "pcal_type"
-    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
-    * defines below, and the PNG-Group's PNG extensions document for a
-    * complete description of the transformations and how they should be
-    * implemented, and for a description of the ASCII parameter strings.
-    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
-    */
-   png_charp pcal_purpose;  /* pCAL chunk description string */
-   png_int_32 pcal_X0;      /* minimum value */
-   png_int_32 pcal_X1;      /* maximum value */
-   png_charp pcal_units;    /* Latin-1 string giving physical units */
-   png_charpp pcal_params;  /* ASCII strings containing parameter values */
-   png_byte pcal_type;      /* equation type (see PNG_EQUATION_ below) */
-   png_byte pcal_nparams;   /* number of parameters given in pcal_params */
-#endif
-
-   /* New members added in libpng-1.0.6 */
-   png_uint_32 free_me;     /* flags items libpng is responsible for freeing */
-
-#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \
- defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
-   /* Storage for unknown chunks that the library doesn't recognize. */
-   png_unknown_chunkp unknown_chunks;
-   int unknown_chunks_num;
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   /* iCCP chunk data. */
-   png_charp iccp_name;     /* profile name */
-   png_bytep iccp_profile;  /* International Color Consortium profile data */
-   png_uint_32 iccp_proflen;  /* ICC profile data length */
-   png_byte iccp_compression; /* Always zero */
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   /* Data on sPLT chunks (there may be more than one). */
-   png_sPLT_tp splt_palettes;
-   png_uint_32 splt_palettes_num;
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-   /* The sCAL chunk describes the actual physical dimensions of the
-    * subject matter of the graphic.  The chunk contains a unit specification
-    * a byte value, and two ASCII strings representing floating-point
-    * values.  The values are width and height corresponsing to one pixel
-    * in the image.  Data values are valid if (valid & PNG_INFO_sCAL) is
-    * non-zero.
-    */
-   png_byte scal_unit;         /* unit of physical scale */
-   png_charp scal_s_width;     /* string containing height */
-   png_charp scal_s_height;    /* string containing width */
-#endif
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS)
-      non-zero */
-   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
-   png_bytepp row_pointers;        /* the image bits */
-#endif
-
-};
+
+/* pnginfo.h - header file for PNG reference library
+ *
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2013,2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+ /* png_info is a structure that holds the information in a PNG file so
+ * that the application can find out the characteristics of the image.
+ * If you are reading the file, this structure will tell you what is
+ * in the PNG file.  If you are writing the file, fill in the information
+ * you want to put into the PNG file, using png_set_*() functions, then
+ * call png_write_info().
+ *
+ * The names chosen should be very close to the PNG specification, so
+ * consult that document for information about the meaning of each field.
+ *
+ * With libpng < 0.95, it was only possible to directly set and read the
+ * the values in the png_info_struct, which meant that the contents and
+ * order of the values had to remain fixed.  With libpng 0.95 and later,
+ * however, there are now functions that abstract the contents of
+ * png_info_struct from the application, so this makes it easier to use
+ * libpng with dynamic libraries, and even makes it possible to use
+ * libraries that don't have all of the libpng ancillary chunk-handing
+ * functionality.  In libpng-1.5.0 this was moved into a separate private
+ * file that is not visible to applications.
+ *
+ * The following members may have allocated storage attached that should be
+ * cleaned up before the structure is discarded: palette, trans, text,
+ * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
+ * splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
+ * are automatically freed when the info structure is deallocated, if they were
+ * allocated internally by libpng.  This behavior can be changed by means
+ * of the png_data_freer() function.
+ *
+ * More allocation details: all the chunk-reading functions that
+ * change these members go through the corresponding png_set_*
+ * functions.  A function to clear these members is available: see
+ * png_free_data().  The png_set_* functions do not depend on being
+ * able to point info structure members to any of the storage they are
+ * passed (they make their own copies), EXCEPT that the png_set_text
+ * functions use the same storage passed to them in the text_ptr or
+ * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
+ * functions do not make their own copies.
+ */
+#ifndef PNGINFO_H
+#define PNGINFO_H
+
+struct png_info_def
+{
+   /* The following are necessary for every PNG file */
+   png_uint_32 width;       /* width of image in pixels (from IHDR) */
+   png_uint_32 height;      /* height of image in pixels (from IHDR) */
+   png_uint_32 valid;       /* valid chunk data (see PNG_INFO_ below) */
+   size_t rowbytes;         /* bytes needed to hold an untransformed row */
+   png_colorp palette;      /* array of color values (valid & PNG_INFO_PLTE) */
+   png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */
+   png_uint_16 num_trans;   /* number of transparent palette color (tRNS) */
+   png_byte bit_depth;      /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */
+   png_byte color_type;     /* see PNG_COLOR_TYPE_ below (from IHDR) */
+   /* The following three should have been named *_method not *_type */
+   png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */
+   png_byte filter_type;    /* must be PNG_FILTER_TYPE_BASE (from IHDR) */
+   png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+
+   /* The following are set by png_set_IHDR, called from the application on
+    * write, but the are never actually used by the write code.
+    */
+   png_byte channels;       /* number of data channels per pixel (1, 2, 3, 4) */
+   png_byte pixel_depth;    /* number of bits per pixel */
+   png_byte spare_byte;     /* to align the data, and for future use */
+
+#ifdef PNG_READ_SUPPORTED
+   /* This is never set during write */
+   png_byte signature[8];   /* magic bytes read by libpng from start of file */
+#endif
+
+   /* The rest of the data is optional.  If you are reading, check the
+    * valid field to see if the information in these are valid.  If you
+    * are writing, set the valid field to those chunks you want written,
+    * and initialize the appropriate fields below.
+    */
+
+#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
+   /* png_colorspace only contains 'flags' if neither GAMMA or COLORSPACE are
+    * defined.  When COLORSPACE is switched on all the colorspace-defining
+    * chunks should be enabled, when GAMMA is switched on all the gamma-defining
+    * chunks should be enabled.  If this is not done it becomes possible to read
+    * inconsistent PNG files and assign a probably incorrect interpretation to
+    * the information.  (In other words, by carefully choosing which chunks to
+    * recognize the system configuration can select an interpretation for PNG
+    * files containing ambiguous data and this will result in inconsistent
+    * behavior between different libpng builds!)
+    */
+   png_colorspace colorspace;
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+   /* iCCP chunk data. */
+   png_charp iccp_name;     /* profile name */
+   png_bytep iccp_profile;  /* International Color Consortium profile data */
+   png_uint_32 iccp_proflen;  /* ICC profile data length */
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+   /* The tEXt, and zTXt chunks contain human-readable textual data in
+    * uncompressed, compressed, and optionally compressed forms, respectively.
+    * The data in "text" is an array of pointers to uncompressed,
+    * null-terminated C strings. Each chunk has a keyword that describes the
+    * textual data contained in that chunk.  Keywords are not required to be
+    * unique, and the text string may be empty.  Any number of text chunks may
+    * be in an image.
+    */
+   int num_text; /* number of comments read or comments to write */
+   int max_text; /* current size of text array */
+   png_textp text; /* array of comments read or comments to write */
+#endif /* TEXT */
+
+#ifdef PNG_tIME_SUPPORTED
+   /* The tIME chunk holds the last time the displayed image data was
+    * modified.  See the png_time struct for the contents of this struct.
+    */
+   png_time mod_time;
+#endif
+
+#ifdef PNG_sBIT_SUPPORTED
+   /* The sBIT chunk specifies the number of significant high-order bits
+    * in the pixel data.  Values are in the range [1, bit_depth], and are
+    * only specified for the channels in the pixel data.  The contents of
+    * the low-order bits is not specified.  Data is valid if
+    * (valid & PNG_INFO_sBIT) is non-zero.
+    */
+   png_color_8 sig_bit; /* significant bits in color channels */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
+defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The tRNS chunk supplies transparency data for paletted images and
+    * other image types that don't need a full alpha channel.  There are
+    * "num_trans" transparency values for a paletted image, stored in the
+    * same order as the palette colors, starting from index 0.  Values
+    * for the data are in the range [0, 255], ranging from fully transparent
+    * to fully opaque, respectively.  For non-paletted images, there is a
+    * single color specified that should be treated as fully transparent.
+    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
+    */
+   png_bytep trans_alpha;    /* alpha values for paletted image */
+   png_color_16 trans_color; /* transparent color for non-palette image */
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The bKGD chunk gives the suggested image background color if the
+    * display program does not have its own background color and the image
+    * is needs to composited onto a background before display.  The colors
+    * in "background" are normally in the same color space/depth as the
+    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
+    */
+   png_color_16 background;
+#endif
+
+#ifdef PNG_oFFs_SUPPORTED
+   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
+    * and downwards from the top-left corner of the display, page, or other
+    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
+    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
+    */
+   png_int_32 x_offset; /* x offset on page */
+   png_int_32 y_offset; /* y offset on page */
+   png_byte offset_unit_type; /* offset units type */
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+   /* The pHYs chunk gives the physical pixel density of the image for
+    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
+    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
+    */
+   png_uint_32 x_pixels_per_unit; /* horizontal pixel density */
+   png_uint_32 y_pixels_per_unit; /* vertical pixel density */
+   png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */
+#endif
+
+#ifdef PNG_eXIf_SUPPORTED
+   int num_exif;  /* Added at libpng-1.6.31 */
+   png_bytep exif;
+# ifdef PNG_READ_eXIf_SUPPORTED
+   png_bytep eXIf_buf;  /* Added at libpng-1.6.32 */
+# endif
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+   /* The hIST chunk contains the relative frequency or importance of the
+    * various palette entries, so that a viewer can intelligently select a
+    * reduced-color palette, if required.  Data is an array of "num_palette"
+    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
+    * is non-zero.
+    */
+   png_uint_16p hist;
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+   /* The pCAL chunk describes a transformation between the stored pixel
+    * values and original physical data values used to create the image.
+    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
+    * range given by [pcal_X0, pcal_X1], and are further transformed by a
+    * (possibly non-linear) transformation function given by "pcal_type"
+    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
+    * defines below, and the PNG-Group's PNG extensions document for a
+    * complete description of the transformations and how they should be
+    * implemented, and for a description of the ASCII parameter strings.
+    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
+    */
+   png_charp pcal_purpose;  /* pCAL chunk description string */
+   png_int_32 pcal_X0;      /* minimum value */
+   png_int_32 pcal_X1;      /* maximum value */
+   png_charp pcal_units;    /* Latin-1 string giving physical units */
+   png_charpp pcal_params;  /* ASCII strings containing parameter values */
+   png_byte pcal_type;      /* equation type (see PNG_EQUATION_ below) */
+   png_byte pcal_nparams;   /* number of parameters given in pcal_params */
+#endif
+
+/* New members added in libpng-1.0.6 */
+   png_uint_32 free_me;     /* flags items libpng is responsible for freeing */
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+   /* Storage for unknown chunks that the library doesn't recognize. */
+   png_unknown_chunkp unknown_chunks;
+
+   /* The type of this field is limited by the type of
+    * png_struct::user_chunk_cache_max, else overflow can occur.
+    */
+   int                unknown_chunks_num;
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+   /* Data on sPLT chunks (there may be more than one). */
+   png_sPLT_tp splt_palettes;
+   int         splt_palettes_num; /* Match type returned by png_get API */
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+   /* The sCAL chunk describes the actual physical dimensions of the
+    * subject matter of the graphic.  The chunk contains a unit specification
+    * a byte value, and two ASCII strings representing floating-point
+    * values.  The values are width and height corresponding to one pixel
+    * in the image.  Data values are valid if (valid & PNG_INFO_sCAL) is
+    * non-zero.
+    */
+   png_byte scal_unit;         /* unit of physical scale */
+   png_charp scal_s_width;     /* string containing height */
+   png_charp scal_s_height;    /* string containing width */
+#endif
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS)
+      non-zero */
+   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
+   png_bytepp row_pointers;        /* the image bits */
+#endif
+
+};
+#endif /* PNGINFO_H */
diff --git a/reg-io/png/lpng1510/pnglibconf.h.prebuilt b/reg-io/png/lpng/pnglibconf.h.prebuilt
similarity index 69%
rename from reg-io/png/lpng1510/pnglibconf.h.prebuilt
rename to reg-io/png/lpng/pnglibconf.h.prebuilt
index 5fba410d..4247719f 100644
--- a/reg-io/png/lpng1510/pnglibconf.h.prebuilt
+++ b/reg-io/png/lpng/pnglibconf.h.prebuilt
@@ -1,136 +1,127 @@
-
-/* libpng STANDARD API DEFINITION */
-
 /* pnglibconf.h - library build configuration */
 
-/* Libpng 1.5.10 - March 29, 2012 */
+/* libpng version 1.6.42 */
 
-/* Copyright (c) 1998-2012 Glenn Randers-Pehrson */
+/* Copyright (c) 2018-2024 Cosmin Truta */
+/* Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson */
 
 /* This code is released under the libpng license. */
 /* For conditions of distribution and use, see the disclaimer */
 /* and license in png.h */
 
 /* pnglibconf.h */
+/* Machine generated file: DO NOT EDIT */
 /* Derived from: scripts/pnglibconf.dfa */
-/* If you edit this file by hand you must obey the rules expressed in */
-/* pnglibconf.dfa with respect to the dependencies between the following */
-/* symbols.  It is much better to generate a new file using */
-/* scripts/libpngconf.mak */
-
-#pragma once
-/* settings */
-#define PNG_API_RULE 0
-#define PNG_CALLOC_SUPPORTED
-#define PNG_COST_SHIFT 3
-#define PNG_DEFAULT_READ_MACROS 1
-#define PNG_GAMMA_THRESHOLD_FIXED 5000
-#define PNG_MAX_GAMMA_8 11
-#define PNG_QUANTIZE_BLUE_BITS 5
-#define PNG_QUANTIZE_GREEN_BITS 5
-#define PNG_QUANTIZE_RED_BITS 5
-#define PNG_sCAL_PRECISION 5
-#define PNG_WEIGHT_SHIFT 8
-#define PNG_ZBUF_SIZE 8192
-/* end of settings */
+#ifndef PNGLCONF_H
+#define PNGLCONF_H
 /* options */
 #define PNG_16BIT_SUPPORTED
-#define PNG_ALIGN_MEMORY_SUPPORTED
+#define PNG_ALIGNED_MEMORY_SUPPORTED
+/*#undef PNG_ARM_NEON_API_SUPPORTED*/
+/*#undef PNG_ARM_NEON_CHECK_SUPPORTED*/
 #define PNG_BENIGN_ERRORS_SUPPORTED
-#define PNG_bKGD_SUPPORTED
+#define PNG_BENIGN_READ_ERRORS_SUPPORTED
+/*#undef PNG_BENIGN_WRITE_ERRORS_SUPPORTED*/
 #define PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-#define PNG_CHECK_cHRM_SUPPORTED
 #define PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_cHRM_SUPPORTED
+#define PNG_COLORSPACE_SUPPORTED
 #define PNG_CONSOLE_IO_SUPPORTED
 #define PNG_CONVERT_tIME_SUPPORTED
+/*#undef PNG_DISABLE_ADLER32_CHECK_SUPPORTED*/
 #define PNG_EASY_ACCESS_SUPPORTED
 /*#undef PNG_ERROR_NUMBERS_SUPPORTED*/
 #define PNG_ERROR_TEXT_SUPPORTED
 #define PNG_FIXED_POINT_SUPPORTED
 #define PNG_FLOATING_ARITHMETIC_SUPPORTED
 #define PNG_FLOATING_POINT_SUPPORTED
-#define PNG_gAMA_SUPPORTED
+#define PNG_FORMAT_AFIRST_SUPPORTED
+#define PNG_FORMAT_BGR_SUPPORTED
+#define PNG_GAMMA_SUPPORTED
+#define PNG_GET_PALETTE_MAX_SUPPORTED
 #define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-#define PNG_hIST_SUPPORTED
-#define PNG_iCCP_SUPPORTED
 #define PNG_INCH_CONVERSIONS_SUPPORTED
 #define PNG_INFO_IMAGE_SUPPORTED
 #define PNG_IO_STATE_SUPPORTED
-#define PNG_iTXt_SUPPORTED
+/*#undef PNG_MIPS_MMI_API_SUPPORTED*/
+/*#undef PNG_MIPS_MMI_CHECK_SUPPORTED*/
+/*#undef PNG_MIPS_MSA_API_SUPPORTED*/
+/*#undef PNG_MIPS_MSA_CHECK_SUPPORTED*/
 #define PNG_MNG_FEATURES_SUPPORTED
-#define PNG_oFFs_SUPPORTED
-#define PNG_pCAL_SUPPORTED
-#define PNG_pHYs_SUPPORTED
 #define PNG_POINTER_INDEXING_SUPPORTED
+/*#undef PNG_POWERPC_VSX_API_SUPPORTED*/
+/*#undef PNG_POWERPC_VSX_CHECK_SUPPORTED*/
 #define PNG_PROGRESSIVE_READ_SUPPORTED
 #define PNG_READ_16BIT_SUPPORTED
 #define PNG_READ_ALPHA_MODE_SUPPORTED
 #define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
 #define PNG_READ_BACKGROUND_SUPPORTED
 #define PNG_READ_BGR_SUPPORTED
-#define PNG_READ_bKGD_SUPPORTED
 #define PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_READ_cHRM_SUPPORTED
 #define PNG_READ_COMPOSITE_NODIV_SUPPORTED
 #define PNG_READ_COMPRESSED_TEXT_SUPPORTED
 #define PNG_READ_EXPAND_16_SUPPORTED
 #define PNG_READ_EXPAND_SUPPORTED
 #define PNG_READ_FILLER_SUPPORTED
-#define PNG_READ_gAMA_SUPPORTED
 #define PNG_READ_GAMMA_SUPPORTED
+#define PNG_READ_GET_PALETTE_MAX_SUPPORTED
 #define PNG_READ_GRAY_TO_RGB_SUPPORTED
-#define PNG_READ_hIST_SUPPORTED
-#define PNG_READ_iCCP_SUPPORTED
 #define PNG_READ_INTERLACING_SUPPORTED
 #define PNG_READ_INT_FUNCTIONS_SUPPORTED
 #define PNG_READ_INVERT_ALPHA_SUPPORTED
 #define PNG_READ_INVERT_SUPPORTED
-#define PNG_READ_iTXt_SUPPORTED
-#define PNG_READ_oFFs_SUPPORTED
 #define PNG_READ_OPT_PLTE_SUPPORTED
-#define PNG_READ_PACK_SUPPORTED
 #define PNG_READ_PACKSWAP_SUPPORTED
-#define PNG_READ_pCAL_SUPPORTED
-#define PNG_READ_pHYs_SUPPORTED
+#define PNG_READ_PACK_SUPPORTED
 #define PNG_READ_QUANTIZE_SUPPORTED
 #define PNG_READ_RGB_TO_GRAY_SUPPORTED
-#define PNG_READ_sBIT_SUPPORTED
 #define PNG_READ_SCALE_16_TO_8_SUPPORTED
-#define PNG_READ_sCAL_SUPPORTED
 #define PNG_READ_SHIFT_SUPPORTED
-#define PNG_READ_sPLT_SUPPORTED
-#define PNG_READ_sRGB_SUPPORTED
 #define PNG_READ_STRIP_16_TO_8_SUPPORTED
 #define PNG_READ_STRIP_ALPHA_SUPPORTED
 #define PNG_READ_SUPPORTED
 #define PNG_READ_SWAP_ALPHA_SUPPORTED
 #define PNG_READ_SWAP_SUPPORTED
-#define PNG_READ_tEXt_SUPPORTED
 #define PNG_READ_TEXT_SUPPORTED
-#define PNG_READ_tIME_SUPPORTED
 #define PNG_READ_TRANSFORMS_SUPPORTED
-#define PNG_READ_tRNS_SUPPORTED
 #define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_READ_USER_CHUNKS_SUPPORTED
 #define PNG_READ_USER_TRANSFORM_SUPPORTED
+#define PNG_READ_bKGD_SUPPORTED
+#define PNG_READ_cHRM_SUPPORTED
+#define PNG_READ_eXIf_SUPPORTED
+#define PNG_READ_gAMA_SUPPORTED
+#define PNG_READ_hIST_SUPPORTED
+#define PNG_READ_iCCP_SUPPORTED
+#define PNG_READ_iTXt_SUPPORTED
+#define PNG_READ_oFFs_SUPPORTED
+#define PNG_READ_pCAL_SUPPORTED
+#define PNG_READ_pHYs_SUPPORTED
+#define PNG_READ_sBIT_SUPPORTED
+#define PNG_READ_sCAL_SUPPORTED
+#define PNG_READ_sPLT_SUPPORTED
+#define PNG_READ_sRGB_SUPPORTED
+#define PNG_READ_tEXt_SUPPORTED
+#define PNG_READ_tIME_SUPPORTED
+#define PNG_READ_tRNS_SUPPORTED
 #define PNG_READ_zTXt_SUPPORTED
 #define PNG_SAVE_INT_32_SUPPORTED
-#define PNG_sBIT_SUPPORTED
-#define PNG_sCAL_SUPPORTED
+#define PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_SEQUENTIAL_READ_SUPPORTED
-#define PNG_SET_CHUNK_CACHE_LIMIT_SUPPORTED
-#define PNG_SET_CHUNK_MALLOC_LIMIT_SUPPORTED
 #define PNG_SETJMP_SUPPORTED
+#define PNG_SET_OPTION_SUPPORTED
+#define PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_SET_USER_LIMITS_SUPPORTED
-#define PNG_sPLT_SUPPORTED
-#define PNG_sRGB_SUPPORTED
+#define PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED
+#define PNG_SIMPLIFIED_READ_BGR_SUPPORTED
+#define PNG_SIMPLIFIED_READ_SUPPORTED
+#define PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
+#define PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED
+#define PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
+#define PNG_SIMPLIFIED_WRITE_SUPPORTED
 #define PNG_STDIO_SUPPORTED
-#define PNG_tEXt_SUPPORTED
+#define PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_TEXT_SUPPORTED
 #define PNG_TIME_RFC1123_SUPPORTED
-#define PNG_tIME_SUPPORTED
-#define PNG_tRNS_SUPPORTED
 #define PNG_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_USER_CHUNKS_SUPPORTED
 #define PNG_USER_LIMITS_SUPPORTED
@@ -141,44 +132,93 @@
 #define PNG_WRITE_16BIT_SUPPORTED
 #define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
 #define PNG_WRITE_BGR_SUPPORTED
-#define PNG_WRITE_bKGD_SUPPORTED
 #define PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-#define PNG_WRITE_cHRM_SUPPORTED
 #define PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
+#define PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
 #define PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
 #define PNG_WRITE_FILLER_SUPPORTED
 #define PNG_WRITE_FILTER_SUPPORTED
 #define PNG_WRITE_FLUSH_SUPPORTED
-#define PNG_WRITE_gAMA_SUPPORTED
-#define PNG_WRITE_hIST_SUPPORTED
-#define PNG_WRITE_iCCP_SUPPORTED
+#define PNG_WRITE_GET_PALETTE_MAX_SUPPORTED
 #define PNG_WRITE_INTERLACING_SUPPORTED
 #define PNG_WRITE_INT_FUNCTIONS_SUPPORTED
 #define PNG_WRITE_INVERT_ALPHA_SUPPORTED
 #define PNG_WRITE_INVERT_SUPPORTED
-#define PNG_WRITE_iTXt_SUPPORTED
-#define PNG_WRITE_oFFs_SUPPORTED
 #define PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-#define PNG_WRITE_PACK_SUPPORTED
 #define PNG_WRITE_PACKSWAP_SUPPORTED
-#define PNG_WRITE_pCAL_SUPPORTED
-#define PNG_WRITE_pHYs_SUPPORTED
-#define PNG_WRITE_sBIT_SUPPORTED
-#define PNG_WRITE_sCAL_SUPPORTED
+#define PNG_WRITE_PACK_SUPPORTED
 #define PNG_WRITE_SHIFT_SUPPORTED
-#define PNG_WRITE_sPLT_SUPPORTED
-#define PNG_WRITE_sRGB_SUPPORTED
 #define PNG_WRITE_SUPPORTED
 #define PNG_WRITE_SWAP_ALPHA_SUPPORTED
 #define PNG_WRITE_SWAP_SUPPORTED
-#define PNG_WRITE_tEXt_SUPPORTED
 #define PNG_WRITE_TEXT_SUPPORTED
-#define PNG_WRITE_tIME_SUPPORTED
 #define PNG_WRITE_TRANSFORMS_SUPPORTED
-#define PNG_WRITE_tRNS_SUPPORTED
 #define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
 #define PNG_WRITE_USER_TRANSFORM_SUPPORTED
 #define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
+#define PNG_WRITE_bKGD_SUPPORTED
+#define PNG_WRITE_cHRM_SUPPORTED
+#define PNG_WRITE_eXIf_SUPPORTED
+#define PNG_WRITE_gAMA_SUPPORTED
+#define PNG_WRITE_hIST_SUPPORTED
+#define PNG_WRITE_iCCP_SUPPORTED
+#define PNG_WRITE_iTXt_SUPPORTED
+#define PNG_WRITE_oFFs_SUPPORTED
+#define PNG_WRITE_pCAL_SUPPORTED
+#define PNG_WRITE_pHYs_SUPPORTED
+#define PNG_WRITE_sBIT_SUPPORTED
+#define PNG_WRITE_sCAL_SUPPORTED
+#define PNG_WRITE_sPLT_SUPPORTED
+#define PNG_WRITE_sRGB_SUPPORTED
+#define PNG_WRITE_tEXt_SUPPORTED
+#define PNG_WRITE_tIME_SUPPORTED
+#define PNG_WRITE_tRNS_SUPPORTED
 #define PNG_WRITE_zTXt_SUPPORTED
+#define PNG_bKGD_SUPPORTED
+#define PNG_cHRM_SUPPORTED
+#define PNG_eXIf_SUPPORTED
+#define PNG_gAMA_SUPPORTED
+#define PNG_hIST_SUPPORTED
+#define PNG_iCCP_SUPPORTED
+#define PNG_iTXt_SUPPORTED
+#define PNG_oFFs_SUPPORTED
+#define PNG_pCAL_SUPPORTED
+#define PNG_pHYs_SUPPORTED
+#define PNG_sBIT_SUPPORTED
+#define PNG_sCAL_SUPPORTED
+#define PNG_sPLT_SUPPORTED
+#define PNG_sRGB_SUPPORTED
+#define PNG_tEXt_SUPPORTED
+#define PNG_tIME_SUPPORTED
+#define PNG_tRNS_SUPPORTED
 #define PNG_zTXt_SUPPORTED
 /* end of options */
+/* settings */
+#define PNG_API_RULE 0
+#define PNG_DEFAULT_READ_MACROS 1
+#define PNG_GAMMA_THRESHOLD_FIXED 5000
+#define PNG_IDAT_READ_SIZE PNG_ZBUF_SIZE
+#define PNG_INFLATE_BUF_SIZE 1024
+#define PNG_LINKAGE_API extern
+#define PNG_LINKAGE_CALLBACK extern
+#define PNG_LINKAGE_DATA extern
+#define PNG_LINKAGE_FUNCTION extern
+#define PNG_MAX_GAMMA_8 11
+#define PNG_QUANTIZE_BLUE_BITS 5
+#define PNG_QUANTIZE_GREEN_BITS 5
+#define PNG_QUANTIZE_RED_BITS 5
+#define PNG_TEXT_Z_DEFAULT_COMPRESSION (-1)
+#define PNG_TEXT_Z_DEFAULT_STRATEGY 0
+#define PNG_USER_CHUNK_CACHE_MAX 1000
+#define PNG_USER_CHUNK_MALLOC_MAX 8000000
+#define PNG_USER_HEIGHT_MAX 1000000
+#define PNG_USER_WIDTH_MAX 1000000
+#define PNG_ZBUF_SIZE 8192
+#define PNG_ZLIB_VERNUM 0 /* unknown */
+#define PNG_Z_DEFAULT_COMPRESSION (-1)
+#define PNG_Z_DEFAULT_NOFILTER_STRATEGY 0
+#define PNG_Z_DEFAULT_STRATEGY 1
+#define PNG_sCAL_PRECISION 5
+#define PNG_sRGB_PROFILE_CHECKS 2
+/* end of settings */
+#endif /* PNGLCONF_H */
diff --git a/reg-io/png/lpng/pngmem.c b/reg-io/png/lpng/pngmem.c
new file mode 100644
index 00000000..5780e764
--- /dev/null
+++ b/reg-io/png/lpng/pngmem.c
@@ -0,0 +1,284 @@
+
+/* pngmem.c - stub functions for memory allocation
+ *
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2014,2016 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * This file provides a location for all memory allocation.  Users who
+ * need special memory handling are expected to supply replacement
+ * functions for png_malloc() and png_free(), and to use
+ * png_create_read_struct_2() and png_create_write_struct_2() to
+ * identify the replacement functions.
+ */
+
+#include "pngpriv.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+/* Free a png_struct */
+void /* PRIVATE */
+png_destroy_png_struct(png_structrp png_ptr)
+{
+   if (png_ptr != NULL)
+   {
+      /* png_free might call png_error and may certainly call
+       * png_get_mem_ptr, so fake a temporary png_struct to support this.
+       */
+      png_struct dummy_struct = *png_ptr;
+      memset(png_ptr, 0, (sizeof *png_ptr));
+      png_free(&dummy_struct, png_ptr);
+
+#     ifdef PNG_SETJMP_SUPPORTED
+         /* We may have a jmp_buf left to deallocate. */
+         png_free_jmpbuf(&dummy_struct);
+#     endif
+   }
+}
+
+/* Allocate memory.  For reasonable files, size should never exceed
+ * 64K.  However, zlib may allocate more than 64K if you don't tell
+ * it not to.  See zconf.h and png.h for more information.  zlib does
+ * need to allocate exactly 64K, so whatever you call here must
+ * have the ability to do that.
+ */
+PNG_FUNCTION(png_voidp,PNGAPI
+png_calloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
+{
+   png_voidp ret;
+
+   ret = png_malloc(png_ptr, size);
+
+   if (ret != NULL)
+      memset(ret, 0, size);
+
+   return ret;
+}
+
+/* png_malloc_base, an internal function added at libpng 1.6.0, does the work of
+ * allocating memory, taking into account limits and PNG_USER_MEM_SUPPORTED.
+ * Checking and error handling must happen outside this routine; it returns NULL
+ * if the allocation cannot be done (for any reason.)
+ */
+PNG_FUNCTION(png_voidp /* PRIVATE */,
+png_malloc_base,(png_const_structrp png_ptr, png_alloc_size_t size),
+    PNG_ALLOCATED)
+{
+   /* Moved to png_malloc_base from png_malloc_default in 1.6.0; the DOS
+    * allocators have also been removed in 1.6.0, so any 16-bit system now has
+    * to implement a user memory handler.  This checks to be sure it isn't
+    * called with big numbers.
+    */
+#ifndef PNG_USER_MEM_SUPPORTED
+   PNG_UNUSED(png_ptr)
+#endif
+
+   /* Some compilers complain that this is always true.  However, it
+    * can be false when integer overflow happens.
+    */
+   if (size > 0 && size <= PNG_SIZE_MAX
+#     ifdef PNG_MAX_MALLOC_64K
+         && size <= 65536U
+#     endif
+      )
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      if (png_ptr != NULL && png_ptr->malloc_fn != NULL)
+         return png_ptr->malloc_fn(png_constcast(png_structrp,png_ptr), size);
+
+      else
+#endif
+         return malloc((size_t)size); /* checked for truncation above */
+   }
+
+   else
+      return NULL;
+}
+
+#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\
+   defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED)
+/* This is really here only to work round a spurious warning in GCC 4.6 and 4.7
+ * that arises because of the checks in png_realloc_array that are repeated in
+ * png_malloc_array.
+ */
+static png_voidp
+png_malloc_array_checked(png_const_structrp png_ptr, int nelements,
+    size_t element_size)
+{
+   png_alloc_size_t req = (png_alloc_size_t)nelements; /* known to be > 0 */
+
+   if (req <= PNG_SIZE_MAX/element_size)
+      return png_malloc_base(png_ptr, req * element_size);
+
+   /* The failure case when the request is too large */
+   return NULL;
+}
+
+PNG_FUNCTION(png_voidp /* PRIVATE */,
+png_malloc_array,(png_const_structrp png_ptr, int nelements,
+    size_t element_size),PNG_ALLOCATED)
+{
+   if (nelements <= 0 || element_size == 0)
+      png_error(png_ptr, "internal error: array alloc");
+
+   return png_malloc_array_checked(png_ptr, nelements, element_size);
+}
+
+PNG_FUNCTION(png_voidp /* PRIVATE */,
+png_realloc_array,(png_const_structrp png_ptr, png_const_voidp old_array,
+    int old_elements, int add_elements, size_t element_size),PNG_ALLOCATED)
+{
+   /* These are internal errors: */
+   if (add_elements <= 0 || element_size == 0 || old_elements < 0 ||
+      (old_array == NULL && old_elements > 0))
+      png_error(png_ptr, "internal error: array realloc");
+
+   /* Check for overflow on the elements count (so the caller does not have to
+    * check.)
+    */
+   if (add_elements <= INT_MAX - old_elements)
+   {
+      png_voidp new_array = png_malloc_array_checked(png_ptr,
+          old_elements+add_elements, element_size);
+
+      if (new_array != NULL)
+      {
+         /* Because png_malloc_array worked the size calculations below cannot
+          * overflow.
+          */
+         if (old_elements > 0)
+            memcpy(new_array, old_array, element_size*(unsigned)old_elements);
+
+         memset((char*)new_array + element_size*(unsigned)old_elements, 0,
+             element_size*(unsigned)add_elements);
+
+         return new_array;
+      }
+   }
+
+   return NULL; /* error */
+}
+#endif /* TEXT || sPLT || STORE_UNKNOWN_CHUNKS */
+
+/* Various functions that have different error handling are derived from this.
+ * png_malloc always exists, but if PNG_USER_MEM_SUPPORTED is defined a separate
+ * function png_malloc_default is also provided.
+ */
+PNG_FUNCTION(png_voidp,PNGAPI
+png_malloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
+{
+   png_voidp ret;
+
+   if (png_ptr == NULL)
+      return NULL;
+
+   ret = png_malloc_base(png_ptr, size);
+
+   if (ret == NULL)
+       png_error(png_ptr, "Out of memory"); /* 'm' means png_malloc */
+
+   return ret;
+}
+
+#ifdef PNG_USER_MEM_SUPPORTED
+PNG_FUNCTION(png_voidp,PNGAPI
+png_malloc_default,(png_const_structrp png_ptr, png_alloc_size_t size),
+    PNG_ALLOCATED PNG_DEPRECATED)
+{
+   png_voidp ret;
+
+   if (png_ptr == NULL)
+      return NULL;
+
+   /* Passing 'NULL' here bypasses the application provided memory handler. */
+   ret = png_malloc_base(NULL/*use malloc*/, size);
+
+   if (ret == NULL)
+      png_error(png_ptr, "Out of Memory"); /* 'M' means png_malloc_default */
+
+   return ret;
+}
+#endif /* USER_MEM */
+
+/* This function was added at libpng version 1.2.3.  The png_malloc_warn()
+ * function will issue a png_warning and return NULL instead of issuing a
+ * png_error, if it fails to allocate the requested memory.
+ */
+PNG_FUNCTION(png_voidp,PNGAPI
+png_malloc_warn,(png_const_structrp png_ptr, png_alloc_size_t size),
+    PNG_ALLOCATED)
+{
+   if (png_ptr != NULL)
+   {
+      png_voidp ret = png_malloc_base(png_ptr, size);
+
+      if (ret != NULL)
+         return ret;
+
+      png_warning(png_ptr, "Out of memory");
+   }
+
+   return NULL;
+}
+
+/* Free a pointer allocated by png_malloc().  If ptr is NULL, return
+ * without taking any action.
+ */
+void PNGAPI
+png_free(png_const_structrp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr->free_fn != NULL)
+      png_ptr->free_fn(png_constcast(png_structrp,png_ptr), ptr);
+
+   else
+      png_free_default(png_ptr, ptr);
+}
+
+PNG_FUNCTION(void,PNGAPI
+png_free_default,(png_const_structrp png_ptr, png_voidp ptr),PNG_DEPRECATED)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+#endif /* USER_MEM */
+
+   free(ptr);
+}
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* This function is called when the application wants to use another method
+ * of allocating and freeing memory.
+ */
+void PNGAPI
+png_set_mem_fn(png_structrp png_ptr, png_voidp mem_ptr, png_malloc_ptr
+  malloc_fn, png_free_ptr free_fn)
+{
+   if (png_ptr != NULL)
+   {
+      png_ptr->mem_ptr = mem_ptr;
+      png_ptr->malloc_fn = malloc_fn;
+      png_ptr->free_fn = free_fn;
+   }
+}
+
+/* This function returns a pointer to the mem_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy and png_read_destroy are called.
+ */
+png_voidp PNGAPI
+png_get_mem_ptr(png_const_structrp png_ptr)
+{
+   if (png_ptr == NULL)
+      return NULL;
+
+   return png_ptr->mem_ptr;
+}
+#endif /* USER_MEM */
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng1510/pngpread.c b/reg-io/png/lpng/pngpread.c
similarity index 64%
rename from reg-io/png/lpng1510/pngpread.c
rename to reg-io/png/lpng/pngpread.c
index c5fe0a2e..70965527 100644
--- a/reg-io/png/lpng1510/pngpread.c
+++ b/reg-io/png/lpng/pngpread.c
@@ -1,10 +1,10 @@
 
 /* pngpread.c - read a png file in push mode
  *
- * Last changed in libpng 1.5.9 [February 18, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -19,16 +19,22 @@
 #define PNG_READ_SIG_MODE   0
 #define PNG_READ_CHUNK_MODE 1
 #define PNG_READ_IDAT_MODE  2
-#define PNG_SKIP_MODE       3
 #define PNG_READ_tEXt_MODE  4
 #define PNG_READ_zTXt_MODE  5
 #define PNG_READ_DONE_MODE  6
 #define PNG_READ_iTXt_MODE  7
 #define PNG_ERROR_MODE      8
 
+#define PNG_PUSH_SAVE_BUFFER_IF_FULL \
+if (png_ptr->push_length + 4 > png_ptr->buffer_size) \
+   { png_push_save_buffer(png_ptr); return; }
+#define PNG_PUSH_SAVE_BUFFER_IF_LT(N) \
+if (png_ptr->buffer_size < N) \
+   { png_push_save_buffer(png_ptr); return; }
+
 void PNGAPI
-png_process_data(png_structp png_ptr, png_infop info_ptr,
-    png_bytep buffer, png_size_t buffer_size)
+png_process_data(png_structrp png_ptr, png_inforp info_ptr,
+    png_bytep buffer, size_t buffer_size)
 {
    if (png_ptr == NULL || info_ptr == NULL)
       return;
@@ -41,20 +47,20 @@ png_process_data(png_structp png_ptr, png_infop info_ptr,
    }
 }
 
-png_size_t PNGAPI
-png_process_data_pause(png_structp png_ptr, int save)
+size_t PNGAPI
+png_process_data_pause(png_structrp png_ptr, int save)
 {
    if (png_ptr != NULL)
    {
-      /* It's easiest for the caller if we do the save, then the caller doesn't
+      /* It's easiest for the caller if we do the save; then the caller doesn't
        * have to supply the same data again:
        */
-      if (save)
+      if (save != 0)
          png_push_save_buffer(png_ptr);
       else
       {
          /* This includes any pending saved bytes: */
-         png_size_t remaining = png_ptr->buffer_size;
+         size_t remaining = png_ptr->buffer_size;
          png_ptr->buffer_size = 0;
 
          /* So subtract the saved buffer size, unless all the data
@@ -69,41 +75,23 @@ png_process_data_pause(png_structp png_ptr, int save)
 }
 
 png_uint_32 PNGAPI
-png_process_data_skip(png_structp png_ptr)
+png_process_data_skip(png_structrp png_ptr)
 {
-   png_uint_32 remaining = 0;
-
-   if (png_ptr != NULL && png_ptr->process_mode == PNG_SKIP_MODE &&
-      png_ptr->skip_length > 0)
-   {
-      /* At the end of png_process_data the buffer size must be 0 (see the loop
-       * above) so we can detect a broken call here:
-       */
-      if (png_ptr->buffer_size != 0)
-         png_error(png_ptr,
-            "png_process_data_skip called inside png_process_data");
-
-      /* If is impossible for there to be a saved buffer at this point -
-       * otherwise we could not be in SKIP mode.  This will also happen if
-       * png_process_skip is called inside png_process_data (but only very
-       * rarely.)
-       */
-      if (png_ptr->save_buffer_size != 0)
-         png_error(png_ptr, "png_process_data_skip called with saved data");
-
-      remaining = png_ptr->skip_length;
-      png_ptr->skip_length = 0;
-      png_ptr->process_mode = PNG_READ_CHUNK_MODE;
-   }
-
-   return remaining;
+/* TODO: Deprecate and remove this API.
+ * Somewhere the implementation of this seems to have been lost,
+ * or abandoned.  It was only to support some internal back-door access
+ * to png_struct) in libpng-1.4.x.
+ */
+   png_app_warning(png_ptr,
+"png_process_data_skip is not implemented in any current version of libpng");
+   return 0;
 }
 
 /* What we do with the incoming data depends on what we were previously
  * doing before we ran out of data...
  */
 void /* PRIVATE */
-png_process_some_data(png_structp png_ptr, png_infop info_ptr)
+png_process_some_data(png_structrp png_ptr, png_inforp info_ptr)
 {
    if (png_ptr == NULL)
       return;
@@ -128,12 +116,6 @@ png_process_some_data(png_structp png_ptr, png_infop info_ptr)
          break;
       }
 
-      case PNG_SKIP_MODE:
-      {
-         png_push_crc_finish(png_ptr);
-         break;
-      }
-
       default:
       {
          png_ptr->buffer_size = 0;
@@ -149,10 +131,10 @@ png_process_some_data(png_structp png_ptr, png_infop info_ptr)
  * routine.
  */
 void /* PRIVATE */
-png_push_read_sig(png_structp png_ptr, png_infop info_ptr)
+png_push_read_sig(png_structrp png_ptr, png_inforp info_ptr)
 {
-   png_size_t num_checked = png_ptr->sig_bytes,
-       num_to_check = 8 - num_checked;
+   size_t num_checked = png_ptr->sig_bytes; /* SAFE, does not exceed 8 */
+   size_t num_to_check = 8 - num_checked;
 
    if (png_ptr->buffer_size < num_to_check)
    {
@@ -163,16 +145,15 @@ png_push_read_sig(png_structp png_ptr, png_infop info_ptr)
        num_to_check);
    png_ptr->sig_bytes = (png_byte)(png_ptr->sig_bytes + num_to_check);
 
-   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
+   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0)
    {
       if (num_checked < 4 &&
-          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
+          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0)
          png_error(png_ptr, "Not a PNG file");
 
       else
          png_error(png_ptr, "PNG file corrupted by ASCII conversion");
    }
-
    else
    {
       if (png_ptr->sig_bytes >= 8)
@@ -183,33 +164,32 @@ png_push_read_sig(png_structp png_ptr, png_infop info_ptr)
 }
 
 void /* PRIVATE */
-png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
+png_push_read_chunk(png_structrp png_ptr, png_inforp info_ptr)
 {
    png_uint_32 chunk_name;
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   int keep; /* unknown handling method */
+#endif
 
-   /* First we make sure we have enough data for the 4 byte chunk name
-    * and the 4 byte chunk length before proceeding with decoding the
+   /* First we make sure we have enough data for the 4-byte chunk name
+    * and the 4-byte chunk length before proceeding with decoding the
     * chunk data.  To fully decode each of these chunks, we also make
-    * sure we have enough data in the buffer for the 4 byte CRC at the
+    * sure we have enough data in the buffer for the 4-byte CRC at the
     * end of every chunk (except IDAT, which is handled separately).
     */
-   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0)
    {
       png_byte chunk_length[4];
       png_byte chunk_tag[4];
 
-      if (png_ptr->buffer_size < 8)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_LT(8)
       png_push_fill_buffer(png_ptr, chunk_length, 4);
       png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length);
       png_reset_crc(png_ptr);
       png_crc_read(png_ptr, chunk_tag, 4);
       png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(chunk_tag);
       png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+      png_check_chunk_length(png_ptr, png_ptr->push_length);
       png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
    }
 
@@ -217,14 +197,31 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 
    if (chunk_name == png_IDAT)
    {
-      /* This is here above the if/else case statement below because if the
-       * unknown handling marks 'IDAT' as unknown then the IDAT handling case is
-       * completely skipped.
-       *
-       * TODO: there must be a better way of doing this.
-       */
-      if (png_ptr->mode & PNG_AFTER_IDAT)
+      if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
          png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+      /* If we reach an IDAT chunk, this means we have read all of the
+       * header chunks, and we can start reading the image (or if this
+       * is called after the image has been read - we have an error).
+       */
+      if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+         png_error(png_ptr, "Missing IHDR before IDAT");
+
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+          (png_ptr->mode & PNG_HAVE_PLTE) == 0)
+         png_error(png_ptr, "Missing PLTE before IDAT");
+
+      png_ptr->process_mode = PNG_READ_IDAT_MODE;
+
+      if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+         if ((png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) == 0)
+            if (png_ptr->push_length == 0)
+               return;
+
+      png_ptr->mode |= PNG_HAVE_IDAT;
+
+      if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
+         png_benign_error(png_ptr, "Too many IDATs found");
    }
 
    if (chunk_name == png_IHDR)
@@ -232,23 +229,13 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
       if (png_ptr->push_length != 13)
          png_error(png_ptr, "Invalid IHDR length");
 
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_IHDR(png_ptr, info_ptr, png_ptr->push_length);
    }
 
    else if (chunk_name == png_IEND)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_IEND(png_ptr, info_ptr, png_ptr->push_length);
 
       png_ptr->process_mode = PNG_READ_DONE_MODE;
@@ -256,70 +243,25 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
    }
 
 #ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   else if (png_chunk_unknown_handling(png_ptr, chunk_name))
+   else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
-      if (chunk_name == png_IDAT)
-         png_ptr->mode |= PNG_HAVE_IDAT;
-
-      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
+      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length, keep);
 
       if (chunk_name == png_PLTE)
          png_ptr->mode |= PNG_HAVE_PLTE;
-
-      else if (chunk_name == png_IDAT)
-      {
-         if (!(png_ptr->mode & PNG_HAVE_IHDR))
-            png_error(png_ptr, "Missing IHDR before IDAT");
-
-         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-             !(png_ptr->mode & PNG_HAVE_PLTE))
-            png_error(png_ptr, "Missing PLTE before IDAT");
-      }
    }
-
 #endif
+
    else if (chunk_name == png_PLTE)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_PLTE(png_ptr, info_ptr, png_ptr->push_length);
    }
 
    else if (chunk_name == png_IDAT)
    {
-      /* If we reach an IDAT chunk, this means we have read all of the
-       * header chunks, and we can start reading the image (or if this
-       * is called after the image has been read - we have an error).
-       */
-
-      if (!(png_ptr->mode & PNG_HAVE_IHDR))
-         png_error(png_ptr, "Missing IHDR before IDAT");
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-          !(png_ptr->mode & PNG_HAVE_PLTE))
-         png_error(png_ptr, "Missing PLTE before IDAT");
-
-      if (png_ptr->mode & PNG_HAVE_IDAT)
-      {
-         if (!(png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
-            if (png_ptr->push_length == 0)
-               return;
-
-         if (png_ptr->mode & PNG_AFTER_IDAT)
-            png_benign_error(png_ptr, "Too many IDATs found");
-      }
-
       png_ptr->idat_size = png_ptr->push_length;
-      png_ptr->mode |= PNG_HAVE_IDAT;
       png_ptr->process_mode = PNG_READ_IDAT_MODE;
       png_push_have_info(png_ptr, info_ptr);
       png_ptr->zstream.avail_out =
@@ -332,12 +274,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_gAMA_SUPPORTED
    else if (png_ptr->chunk_name == png_gAMA)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_gAMA(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -345,12 +282,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_sBIT_SUPPORTED
    else if (png_ptr->chunk_name == png_sBIT)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_sBIT(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -358,12 +290,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_cHRM_SUPPORTED
    else if (png_ptr->chunk_name == png_cHRM)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_cHRM(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -371,12 +298,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_sRGB_SUPPORTED
    else if (chunk_name == png_sRGB)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -384,12 +306,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_iCCP_SUPPORTED
    else if (png_ptr->chunk_name == png_iCCP)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -397,12 +314,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_sPLT_SUPPORTED
    else if (chunk_name == png_sPLT)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -410,12 +322,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_tRNS_SUPPORTED
    else if (chunk_name == png_tRNS)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_tRNS(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -423,12 +330,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_bKGD_SUPPORTED
    else if (chunk_name == png_bKGD)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_bKGD(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -436,12 +338,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_hIST_SUPPORTED
    else if (chunk_name == png_hIST)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_hIST(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -449,12 +346,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_pHYs_SUPPORTED
    else if (chunk_name == png_pHYs)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_pHYs(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -462,12 +354,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_oFFs_SUPPORTED
    else if (chunk_name == png_oFFs)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_oFFs(png_ptr, info_ptr, png_ptr->push_length);
    }
 #endif
@@ -475,12 +362,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_pCAL_SUPPORTED
    else if (chunk_name == png_pCAL)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -488,12 +370,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_sCAL_SUPPORTED
    else if (chunk_name == png_sCAL)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -501,12 +378,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_tIME_SUPPORTED
    else if (chunk_name == png_tIME)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_tIME(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -514,12 +386,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_tEXt_SUPPORTED
    else if (chunk_name == png_tEXt)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_tEXt(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -527,12 +394,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_zTXt_SUPPORTED
    else if (chunk_name == png_zTXt)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_zTXt(png_ptr, info_ptr, png_ptr->push_length);
    }
 
@@ -540,101 +402,23 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
 #ifdef PNG_READ_iTXt_SUPPORTED
    else if (chunk_name == png_iTXt)
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
       png_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length);
    }
-
 #endif
+
    else
    {
-      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+      PNG_PUSH_SAVE_BUFFER_IF_FULL
+      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length,
+          PNG_HANDLE_CHUNK_AS_DEFAULT);
    }
 
    png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
 }
 
-void /* PRIVATE */
-png_push_crc_skip(png_structp png_ptr, png_uint_32 skip)
-{
-   png_ptr->process_mode = PNG_SKIP_MODE;
-   png_ptr->skip_length = skip;
-}
-
-void /* PRIVATE */
-png_push_crc_finish(png_structp png_ptr)
-{
-   if (png_ptr->skip_length && png_ptr->save_buffer_size)
-   {
-      png_size_t save_size = png_ptr->save_buffer_size;
-      png_uint_32 skip_length = png_ptr->skip_length;
-
-      /* We want the smaller of 'skip_length' and 'save_buffer_size', but
-       * they are of different types and we don't know which variable has the
-       * fewest bits.  Carefully select the smaller and cast it to the type of
-       * the larger - this cannot overflow.  Do not cast in the following test
-       * - it will break on either 16 or 64 bit platforms.
-       */
-      if (skip_length < save_size)
-         save_size = (png_size_t)skip_length;
-
-      else
-         skip_length = (png_uint_32)save_size;
-
-      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
-
-      png_ptr->skip_length -= skip_length;
-      png_ptr->buffer_size -= save_size;
-      png_ptr->save_buffer_size -= save_size;
-      png_ptr->save_buffer_ptr += save_size;
-   }
-
-   if (png_ptr->skip_length && png_ptr->current_buffer_size)
-   {
-      png_size_t save_size = png_ptr->current_buffer_size;
-      png_uint_32 skip_length = png_ptr->skip_length;
-
-      /* We want the smaller of 'skip_length' and 'current_buffer_size', here,
-       * the same problem exists as above and the same solution.
-       */
-      if (skip_length < save_size)
-         save_size = (png_size_t)skip_length;
-
-      else
-         skip_length = (png_uint_32)save_size;
-
-      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
-
-      png_ptr->skip_length -= skip_length;
-      png_ptr->buffer_size -= save_size;
-      png_ptr->current_buffer_size -= save_size;
-      png_ptr->current_buffer_ptr += save_size;
-   }
-
-   if (!png_ptr->skip_length)
-   {
-      if (png_ptr->buffer_size < 4)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
-      png_crc_finish(png_ptr, 0);
-      png_ptr->process_mode = PNG_READ_CHUNK_MODE;
-   }
-}
-
 void PNGCBAPI
-png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
+png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, size_t length)
 {
    png_bytep ptr;
 
@@ -642,10 +426,9 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
       return;
 
    ptr = buffer;
-
-   if (png_ptr->save_buffer_size)
+   if (png_ptr->save_buffer_size != 0)
    {
-      png_size_t save_size;
+      size_t save_size;
 
       if (length < png_ptr->save_buffer_size)
          save_size = length;
@@ -653,17 +436,16 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
       else
          save_size = png_ptr->save_buffer_size;
 
-      png_memcpy(ptr, png_ptr->save_buffer_ptr, save_size);
+      memcpy(ptr, png_ptr->save_buffer_ptr, save_size);
       length -= save_size;
       ptr += save_size;
       png_ptr->buffer_size -= save_size;
       png_ptr->save_buffer_size -= save_size;
       png_ptr->save_buffer_ptr += save_size;
    }
-
-   if (length && png_ptr->current_buffer_size)
+   if (length != 0 && png_ptr->current_buffer_size != 0)
    {
-      png_size_t save_size;
+      size_t save_size;
 
       if (length < png_ptr->current_buffer_size)
          save_size = length;
@@ -671,7 +453,7 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
       else
          save_size = png_ptr->current_buffer_size;
 
-      png_memcpy(ptr, png_ptr->current_buffer_ptr, save_size);
+      memcpy(ptr, png_ptr->current_buffer_ptr, save_size);
       png_ptr->buffer_size -= save_size;
       png_ptr->current_buffer_size -= save_size;
       png_ptr->current_buffer_ptr += save_size;
@@ -679,18 +461,17 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
 }
 
 void /* PRIVATE */
-png_push_save_buffer(png_structp png_ptr)
+png_push_save_buffer(png_structrp png_ptr)
 {
-   if (png_ptr->save_buffer_size)
+   if (png_ptr->save_buffer_size != 0)
    {
       if (png_ptr->save_buffer_ptr != png_ptr->save_buffer)
       {
-         png_size_t i, istop;
+         size_t i, istop;
          png_bytep sp;
          png_bytep dp;
 
          istop = png_ptr->save_buffer_size;
-
          for (i = 0, sp = png_ptr->save_buffer_ptr, dp = png_ptr->save_buffer;
              i < istop; i++, sp++, dp++)
          {
@@ -698,11 +479,10 @@ png_push_save_buffer(png_structp png_ptr)
          }
       }
    }
-
    if (png_ptr->save_buffer_size + png_ptr->current_buffer_size >
        png_ptr->save_buffer_max)
    {
-      png_size_t new_max;
+      size_t new_max;
       png_bytep old_buffer;
 
       if (png_ptr->save_buffer_size > PNG_SIZE_MAX -
@@ -713,7 +493,8 @@ png_push_save_buffer(png_structp png_ptr)
 
       new_max = png_ptr->save_buffer_size + png_ptr->current_buffer_size + 256;
       old_buffer = png_ptr->save_buffer;
-      png_ptr->save_buffer = (png_bytep)png_malloc_warn(png_ptr, new_max);
+      png_ptr->save_buffer = (png_bytep)png_malloc_warn(png_ptr,
+          (size_t)new_max);
 
       if (png_ptr->save_buffer == NULL)
       {
@@ -721,26 +502,27 @@ png_push_save_buffer(png_structp png_ptr)
          png_error(png_ptr, "Insufficient memory for save_buffer");
       }
 
-      png_memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size);
+      if (old_buffer)
+         memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size);
+      else if (png_ptr->save_buffer_size)
+         png_error(png_ptr, "save_buffer error");
       png_free(png_ptr, old_buffer);
       png_ptr->save_buffer_max = new_max;
    }
-
    if (png_ptr->current_buffer_size)
    {
-      png_memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size,
+      memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size,
          png_ptr->current_buffer_ptr, png_ptr->current_buffer_size);
       png_ptr->save_buffer_size += png_ptr->current_buffer_size;
       png_ptr->current_buffer_size = 0;
    }
-
    png_ptr->save_buffer_ptr = png_ptr->save_buffer;
    png_ptr->buffer_size = 0;
 }
 
 void /* PRIVATE */
-png_push_restore_buffer(png_structp png_ptr, png_bytep buffer,
-   png_size_t buffer_length)
+png_push_restore_buffer(png_structrp png_ptr, png_bytep buffer,
+    size_t buffer_length)
 {
    png_ptr->current_buffer = buffer;
    png_ptr->current_buffer_size = buffer_length;
@@ -749,20 +531,15 @@ png_push_restore_buffer(png_structp png_ptr, png_bytep buffer,
 }
 
 void /* PRIVATE */
-png_push_read_IDAT(png_structp png_ptr)
+png_push_read_IDAT(png_structrp png_ptr)
 {
-   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0)
    {
       png_byte chunk_length[4];
       png_byte chunk_tag[4];
 
       /* TODO: this code can be commoned up with the same code in push_read */
-      if (png_ptr->buffer_size < 8)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_LT(8)
       png_push_fill_buffer(png_ptr, chunk_length, 4);
       png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length);
       png_reset_crc(png_ptr);
@@ -774,7 +551,7 @@ png_push_read_IDAT(png_structp png_ptr)
       {
          png_ptr->process_mode = PNG_READ_CHUNK_MODE;
 
-         if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+         if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
             png_error(png_ptr, "Not enough compressed data");
 
          return;
@@ -783,19 +560,19 @@ png_push_read_IDAT(png_structp png_ptr)
       png_ptr->idat_size = png_ptr->push_length;
    }
 
-   if (png_ptr->idat_size && png_ptr->save_buffer_size)
+   if (png_ptr->idat_size != 0 && png_ptr->save_buffer_size != 0)
    {
-      png_size_t save_size = png_ptr->save_buffer_size;
+      size_t save_size = png_ptr->save_buffer_size;
       png_uint_32 idat_size = png_ptr->idat_size;
 
       /* We want the smaller of 'idat_size' and 'current_buffer_size', but they
        * are of different types and we don't know which variable has the fewest
        * bits.  Carefully select the smaller and cast it to the type of the
        * larger - this cannot overflow.  Do not cast in the following test - it
-       * will break on either 16 or 64 bit platforms.
+       * will break on either 16-bit or 64-bit platforms.
        */
       if (idat_size < save_size)
-         save_size = (png_size_t)idat_size;
+         save_size = (size_t)idat_size;
 
       else
          idat_size = (png_uint_32)save_size;
@@ -810,9 +587,9 @@ png_push_read_IDAT(png_structp png_ptr)
       png_ptr->save_buffer_ptr += save_size;
    }
 
-   if (png_ptr->idat_size && png_ptr->current_buffer_size)
+   if (png_ptr->idat_size != 0 && png_ptr->current_buffer_size != 0)
    {
-      png_size_t save_size = png_ptr->current_buffer_size;
+      size_t save_size = png_ptr->current_buffer_size;
       png_uint_32 idat_size = png_ptr->idat_size;
 
       /* We want the smaller of 'idat_size' and 'current_buffer_size', but they
@@ -821,7 +598,7 @@ png_push_read_IDAT(png_structp png_ptr)
        * larger - this cannot overflow.
        */
       if (idat_size < save_size)
-         save_size = (png_size_t)idat_size;
+         save_size = (size_t)idat_size;
 
       else
          idat_size = (png_uint_32)save_size;
@@ -836,23 +613,19 @@ png_push_read_IDAT(png_structp png_ptr)
       png_ptr->current_buffer_ptr += save_size;
    }
 
-   if (!png_ptr->idat_size)
+   if (png_ptr->idat_size == 0)
    {
-      if (png_ptr->buffer_size < 4)
-      {
-         png_push_save_buffer(png_ptr);
-         return;
-      }
-
+      PNG_PUSH_SAVE_BUFFER_IF_LT(4)
       png_crc_finish(png_ptr, 0);
       png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
       png_ptr->mode |= PNG_AFTER_IDAT;
+      png_ptr->zowner = 0;
    }
 }
 
 void /* PRIVATE */
-png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
-   png_size_t buffer_length)
+png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer,
+    size_t buffer_length)
 {
    /* The caller checks for a non-zero buffer length. */
    if (!(buffer_length > 0) || buffer == NULL)
@@ -863,13 +636,14 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
     * handle the uncompressed results.
     */
    png_ptr->zstream.next_in = buffer;
+   /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
    png_ptr->zstream.avail_in = (uInt)buffer_length;
 
    /* Keep going until the decompressed data is all processed
     * or the stream marked as finished.
     */
    while (png_ptr->zstream.avail_in > 0 &&
-          !(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+      (png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
    {
       int ret;
 
@@ -880,9 +654,9 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
        */
       if (!(png_ptr->zstream.avail_out > 0))
       {
-         png_ptr->zstream.avail_out =
-             (uInt) PNG_ROWBYTES(png_ptr->pixel_depth,
-             png_ptr->iwidth) + 1;
+         /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */
+         png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
+             png_ptr->iwidth) + 1);
 
          png_ptr->zstream.next_out = png_ptr->row_buf;
       }
@@ -894,13 +668,14 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
        * change the current behavior (see comments in inflate.c
        * for why this doesn't happen at present with zlib 1.2.5).
        */
-      ret = inflate(&png_ptr->zstream, Z_SYNC_FLUSH);
+      ret = PNG_INFLATE(png_ptr, Z_SYNC_FLUSH);
 
       /* Check for any failure before proceeding. */
       if (ret != Z_OK && ret != Z_STREAM_END)
       {
          /* Terminate the decompression. */
-         png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
+         png_ptr->zowner = 0;
 
          /* This may be a truncated stream (missing or
           * damaged end code).  Treat that as a warning.
@@ -910,7 +685,12 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
             png_warning(png_ptr, "Truncated compressed data in IDAT");
 
          else
-            png_error(png_ptr, "Decompression error in IDAT");
+         {
+            if (ret == Z_DATA_ERROR)
+               png_benign_error(png_ptr, "IDAT: ADLER32 checksum mismatch");
+            else
+               png_error(png_ptr, "Decompression error in IDAT");
+         }
 
          /* Skip the check on unprocessed input */
          return;
@@ -928,7 +708,8 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
          {
             /* Extra data. */
             png_warning(png_ptr, "Extra compressed data in IDAT");
-            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
+            png_ptr->zowner = 0;
 
             /* Do no more processing; skip the unprocessed
              * input check below.
@@ -943,7 +724,7 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
 
       /* And check for the end of the stream. */
       if (ret == Z_STREAM_END)
-         png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
    }
 
    /* All the data should have been processed, if anything
@@ -955,7 +736,7 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
 }
 
 void /* PRIVATE */
-png_push_process_row(png_structp png_ptr)
+png_push_process_row(png_structrp png_ptr)
 {
    /* 1.5.6: row_info moved out of png_struct to a local here. */
    png_row_info row_info;
@@ -981,10 +762,10 @@ png_push_process_row(png_structp png_ptr)
     * it may not be in the future, so this was changed just to copy the
     * interlaced row count:
     */
-   png_memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
+   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
 
 #ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   if (png_ptr->transformations)
+   if (png_ptr->transformations != 0)
       png_do_read_transformations(png_ptr, &row_info);
 #endif
 
@@ -1001,15 +782,16 @@ png_push_process_row(png_structp png_ptr)
 
 
 #ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Blow up interlaced rows to full size */
-   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   /* Expand interlaced rows to full size */
+   if (png_ptr->interlaced != 0 &&
+       (png_ptr->transformations & PNG_INTERLACE) != 0)
    {
       if (png_ptr->pass < 6)
          png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass,
-            png_ptr->transformations);
+             png_ptr->transformations);
 
-    switch (png_ptr->pass)
-    {
+      switch (png_ptr->pass)
+      {
          case 0:
          {
             int i;
@@ -1184,26 +966,26 @@ png_push_process_row(png_structp png_ptr)
 }
 
 void /* PRIVATE */
-png_read_push_finish_row(png_structp png_ptr)
+png_read_push_finish_row(png_structrp png_ptr)
 {
 #ifdef PNG_READ_INTERLACING_SUPPORTED
    /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
 
    /* Start of interlace block */
-   static PNG_CONST png_byte FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+   static const png_byte png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
 
    /* Offset to next interlace block */
-   static PNG_CONST png_byte FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+   static const png_byte png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
 
    /* Start of interlace block in the y direction */
-   static PNG_CONST png_byte FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+   static const png_byte png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
 
    /* Offset to next interlace block in the y direction */
-   static PNG_CONST png_byte FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+   static const png_byte png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
 
    /* Height of interlace block.  This is not currently used - if you need
     * it, uncomment it here and in png.h
-   static PNG_CONST png_byte FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+   static const png_byte png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
    */
 #endif
 
@@ -1212,10 +994,10 @@ png_read_push_finish_row(png_structp png_ptr)
       return;
 
 #ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced)
+   if (png_ptr->interlaced != 0)
    {
       png_ptr->row_number = 0;
-      png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+      memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
 
       do
       {
@@ -1236,7 +1018,7 @@ png_read_push_finish_row(png_structp png_ptr)
              png_pass_start[png_ptr->pass]) /
              png_pass_inc[png_ptr->pass];
 
-         if (png_ptr->transformations & PNG_INTERLACE)
+         if ((png_ptr->transformations & PNG_INTERLACE) != 0)
             break;
 
          png_ptr->num_rows = (png_ptr->height +
@@ -1246,34 +1028,34 @@ png_read_push_finish_row(png_structp png_ptr)
 
       } while (png_ptr->iwidth == 0 || png_ptr->num_rows == 0);
    }
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
+#endif /* READ_INTERLACING */
 }
 
 void /* PRIVATE */
-png_push_have_info(png_structp png_ptr, png_infop info_ptr)
+png_push_have_info(png_structrp png_ptr, png_inforp info_ptr)
 {
    if (png_ptr->info_fn != NULL)
       (*(png_ptr->info_fn))(png_ptr, info_ptr);
 }
 
 void /* PRIVATE */
-png_push_have_end(png_structp png_ptr, png_infop info_ptr)
+png_push_have_end(png_structrp png_ptr, png_inforp info_ptr)
 {
    if (png_ptr->end_fn != NULL)
       (*(png_ptr->end_fn))(png_ptr, info_ptr);
 }
 
 void /* PRIVATE */
-png_push_have_row(png_structp png_ptr, png_bytep row)
+png_push_have_row(png_structrp png_ptr, png_bytep row)
 {
    if (png_ptr->row_fn != NULL)
       (*(png_ptr->row_fn))(png_ptr, row, png_ptr->row_number,
-         (int)png_ptr->pass);
+          (int)png_ptr->pass);
 }
 
 #ifdef PNG_READ_INTERLACING_SUPPORTED
 void PNGAPI
-png_progressive_combine_row (png_structp png_ptr, png_bytep old_row,
+png_progressive_combine_row(png_const_structrp png_ptr, png_bytep old_row,
     png_const_bytep new_row)
 {
    if (png_ptr == NULL)
@@ -1284,12 +1066,12 @@ png_progressive_combine_row (png_structp png_ptr, png_bytep old_row,
     * it must be png_ptr->row_buf+1
     */
    if (new_row != NULL)
-      png_combine_row(png_ptr, old_row, 1/*display*/);
+      png_combine_row(png_ptr, old_row, 1/*blocky display*/);
 }
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
+#endif /* READ_INTERLACING */
 
 void PNGAPI
-png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr,
+png_set_progressive_read_fn(png_structrp png_ptr, png_voidp progressive_ptr,
     png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
     png_progressive_end_ptr end_fn)
 {
@@ -1304,11 +1086,11 @@ png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr,
 }
 
 png_voidp PNGAPI
-png_get_progressive_ptr(png_const_structp png_ptr)
+png_get_progressive_ptr(png_const_structrp png_ptr)
 {
    if (png_ptr == NULL)
-      return (NULL);
+      return NULL;
 
    return png_ptr->io_ptr;
 }
-#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+#endif /* PROGRESSIVE_READ */
diff --git a/reg-io/png/lpng/pngpriv.h b/reg-io/png/lpng/pngpriv.h
new file mode 100644
index 00000000..0a160ac4
--- /dev/null
+++ b/reg-io/png/lpng/pngpriv.h
@@ -0,0 +1,2221 @@
+
+/* pngpriv.h - private declarations for use inside libpng
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* The symbols declared in this file (including the functions declared
+ * as extern) are PRIVATE.  They are not part of the libpng public
+ * interface, and are not recommended for use by regular applications.
+ * Some of them may become public in the future; others may stay private,
+ * change in an incompatible way, or even disappear.
+ * Although the libpng users are not forbidden to include this header,
+ * they should be well aware of the issues that may arise from doing so.
+ */
+
+#ifndef PNGPRIV_H
+#define PNGPRIV_H
+
+/* Feature Test Macros.  The following are defined here to ensure that correctly
+ * implemented libraries reveal the APIs libpng needs to build and hide those
+ * that are not needed and potentially damaging to the compilation.
+ *
+ * Feature Test Macros must be defined before any system header is included (see
+ * POSIX 1003.1 2.8.2 "POSIX Symbols."
+ *
+ * These macros only have an effect if the operating system supports either
+ * POSIX 1003.1 or C99, or both.  On other operating systems (particularly
+ * Windows/Visual Studio) there is no effect; the OS specific tests below are
+ * still required (as of 2011-05-02.)
+ */
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE 1 /* Just the POSIX 1003.1 and C89 APIs */
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* Standard library headers not required by png.h: */
+#  include <stdlib.h>
+#  include <string.h>
+#endif
+
+#define PNGLIB_BUILD /*libpng is being built, not used*/
+
+/* If HAVE_CONFIG_H is defined during the build then the build system must
+ * provide an appropriate "config.h" file on the include path.  The header file
+ * must provide definitions as required below (search for "HAVE_CONFIG_H");
+ * see configure.ac for more details of the requirements.  The macro
+ * "PNG_NO_CONFIG_H" is provided for maintainers to test for dependencies on
+ * 'configure'; define this macro to prevent the configure build including the
+ * configure generated config.h.  Libpng is expected to compile without *any*
+ * special build system support on a reasonably ANSI-C compliant system.
+ */
+#if defined(HAVE_CONFIG_H) && !defined(PNG_NO_CONFIG_H)
+#  include <config.h>
+
+   /* Pick up the definition of 'restrict' from config.h if it was read: */
+#  define PNG_RESTRICT restrict
+#endif
+
+/* To support symbol prefixing it is necessary to know *before* including png.h
+ * whether the fixed point (and maybe other) APIs are exported, because if they
+ * are not internal definitions may be required.  This is handled below just
+ * before png.h is included, but load the configuration now if it is available.
+ */
+#ifndef PNGLCONF_H
+#  include "pnglibconf.h"
+#endif
+
+/* Local renames may change non-exported API functions from png.h */
+#if defined(PNG_PREFIX) && !defined(PNGPREFIX_H)
+#  include "pngprefix.h"
+#endif
+
+#ifdef PNG_USER_CONFIG
+#  include "pngusr.h"
+   /* These should have been defined in pngusr.h */
+#  ifndef PNG_USER_PRIVATEBUILD
+#    define PNG_USER_PRIVATEBUILD "Custom libpng build"
+#  endif
+#  ifndef PNG_USER_DLLFNAME_POSTFIX
+#    define PNG_USER_DLLFNAME_POSTFIX "Cb"
+#  endif
+#endif
+
+/* Compile time options.
+ * =====================
+ * In a multi-arch build the compiler may compile the code several times for the
+ * same object module, producing different binaries for different architectures.
+ * When this happens configure-time setting of the target host options cannot be
+ * done and this interferes with the handling of the ARM NEON optimizations, and
+ * possibly other similar optimizations.  Put additional tests here; in general
+ * this is needed when the same option can be changed at both compile time and
+ * run time depending on the target OS (i.e. iOS vs Android.)
+ *
+ * NOTE: symbol prefixing does not pass $(CFLAGS) to the preprocessor, because
+ * this is not possible with certain compilers (Oracle SUN OS CC), as a result
+ * it is necessary to ensure that all extern functions that *might* be used
+ * regardless of $(CFLAGS) get declared in this file.  The test on __ARM_NEON__
+ * below is one example of this behavior because it is controlled by the
+ * presence or not of -mfpu=neon on the GCC command line, it is possible to do
+ * this in $(CC), e.g. "CC=gcc -mfpu=neon", but people who build libpng rarely
+ * do this.
+ */
+#ifndef PNG_ARM_NEON_OPT
+   /* ARM NEON optimizations are being controlled by the compiler settings,
+    * typically the target FPU.  If the FPU has been set to NEON (-mfpu=neon
+    * with GCC) then the compiler will define __ARM_NEON__ and we can rely
+    * unconditionally on NEON instructions not crashing, otherwise we must
+    * disable use of NEON instructions.
+    *
+    * NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they
+    * can only be turned on automatically if that is supported too.  If
+    * PNG_ARM_NEON_OPT is set in CPPFLAGS (to >0) then arm/arm_init.c will fail
+    * to compile with an appropriate #error if ALIGNED_MEMORY has been turned
+    * off.
+    *
+    * Note that gcc-4.9 defines __ARM_NEON instead of the deprecated
+    * __ARM_NEON__, so we check both variants.
+    *
+    * To disable ARM_NEON optimizations entirely, and skip compiling the
+    * associated assembler code, pass --enable-arm-neon=no to configure
+    * or put -DPNG_ARM_NEON_OPT=0 in CPPFLAGS.
+    */
+#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \
+   defined(PNG_ALIGNED_MEMORY_SUPPORTED)
+#     define PNG_ARM_NEON_OPT 2
+#  else
+#     define PNG_ARM_NEON_OPT 0
+#  endif
+#endif
+
+#if PNG_ARM_NEON_OPT > 0
+   /* NEON optimizations are to be at least considered by libpng, so enable the
+    * callbacks to do this.
+    */
+#  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_neon
+
+   /* By default the 'intrinsics' code in arm/filter_neon_intrinsics.c is used
+    * if possible - if __ARM_NEON__ is set and the compiler version is not known
+    * to be broken.  This is controlled by PNG_ARM_NEON_IMPLEMENTATION which can
+    * be:
+    *
+    *    1  The intrinsics code (the default with __ARM_NEON__)
+    *    2  The hand coded assembler (the default without __ARM_NEON__)
+    *
+    * It is possible to set PNG_ARM_NEON_IMPLEMENTATION in CPPFLAGS, however
+    * this is *NOT* supported and may cease to work even after a minor revision
+    * to libpng.  It *is* valid to do this for testing purposes, e.g. speed
+    * testing or a new compiler, but the results should be communicated to the
+    * libpng implementation list for incorporation in the next minor release.
+    */
+#  ifndef PNG_ARM_NEON_IMPLEMENTATION
+#     if defined(__ARM_NEON__) || defined(__ARM_NEON)
+#        if defined(__clang__)
+            /* At present it is unknown by the libpng developers which versions
+             * of clang support the intrinsics, however some or perhaps all
+             * versions do not work with the assembler so this may be
+             * irrelevant, so just use the default (do nothing here.)
+             */
+#        elif defined(__GNUC__)
+            /* GCC 4.5.4 NEON support is known to be broken.  4.6.3 is known to
+             * work, so if this *is* GCC, or G++, look for a version >4.5
+             */
+#           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)
+#              define PNG_ARM_NEON_IMPLEMENTATION 2
+#           endif /* no GNUC support */
+#        endif /* __GNUC__ */
+#     else /* !defined __ARM_NEON__ */
+         /* The 'intrinsics' code simply won't compile without this -mfpu=neon:
+          */
+#        if !defined(__aarch64__) && !defined(_M_ARM64)
+            /* The assembler code currently does not work on ARM64 */
+#          define PNG_ARM_NEON_IMPLEMENTATION 2
+#        endif /* __aarch64__ */
+#     endif /* __ARM_NEON__ */
+#  endif /* !PNG_ARM_NEON_IMPLEMENTATION */
+
+#  ifndef PNG_ARM_NEON_IMPLEMENTATION
+      /* Use the intrinsics code by default. */
+#     define PNG_ARM_NEON_IMPLEMENTATION 1
+#  endif
+#else /* PNG_ARM_NEON_OPT == 0 */
+#     define PNG_ARM_NEON_IMPLEMENTATION 0
+#endif /* PNG_ARM_NEON_OPT > 0 */
+
+#ifndef PNG_MIPS_MSA_OPT
+#  if defined(__mips_msa) && (__mips_isa_rev >= 5) && \
+   defined(PNG_ALIGNED_MEMORY_SUPPORTED)
+#     define PNG_MIPS_MSA_OPT 2
+#  else
+#     define PNG_MIPS_MSA_OPT 0
+#  endif
+#endif
+
+#ifndef PNG_MIPS_MMI_OPT
+#  ifdef PNG_MIPS_MMI
+#    if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && \
+     defined(PNG_ALIGNED_MEMORY_SUPPORTED)
+#       define PNG_MIPS_MMI_OPT 1
+#    else
+#       define PNG_MIPS_MMI_OPT 0
+#    endif
+#  else
+#    define PNG_MIPS_MMI_OPT 0
+#  endif
+#endif
+
+#ifndef PNG_POWERPC_VSX_OPT
+#  if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)
+#     define PNG_POWERPC_VSX_OPT 2
+#  else
+#     define PNG_POWERPC_VSX_OPT 0
+#  endif
+#endif
+
+#ifndef PNG_LOONGARCH_LSX_OPT
+#  if defined(__loongarch_sx)
+#     define PNG_LOONGARCH_LSX_OPT 1
+#  else
+#     define PNG_LOONGARCH_LSX_OPT 0
+#  endif
+#endif
+
+#ifndef PNG_INTEL_SSE_OPT
+#   ifdef PNG_INTEL_SSE
+      /* Only check for SSE if the build configuration has been modified to
+       * enable SSE optimizations.  This means that these optimizations will
+       * be off by default.  See contrib/intel for more details.
+       */
+#      if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
+       defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+#         define PNG_INTEL_SSE_OPT 1
+#      else
+#         define PNG_INTEL_SSE_OPT 0
+#      endif
+#   else
+#      define PNG_INTEL_SSE_OPT 0
+#   endif
+#endif
+
+#if PNG_INTEL_SSE_OPT > 0
+#   ifndef PNG_INTEL_SSE_IMPLEMENTATION
+#      if defined(__SSE4_1__) || defined(__AVX__)
+          /* We are not actually using AVX, but checking for AVX is the best
+             way we can detect SSE4.1 and SSSE3 on MSVC.
+          */
+#         define PNG_INTEL_SSE_IMPLEMENTATION 3
+#      elif defined(__SSSE3__)
+#         define PNG_INTEL_SSE_IMPLEMENTATION 2
+#      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+#         define PNG_INTEL_SSE_IMPLEMENTATION 1
+#      else
+#         define PNG_INTEL_SSE_IMPLEMENTATION 0
+#      endif
+#   endif
+
+#   if PNG_INTEL_SSE_IMPLEMENTATION > 0
+#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
+#   endif
+#else
+#   define PNG_INTEL_SSE_IMPLEMENTATION 0
+#endif
+
+#if PNG_MIPS_MSA_OPT > 0
+#  ifndef PNG_MIPS_MSA_IMPLEMENTATION
+#     if defined(__mips_msa)
+#        if defined(__clang__)
+#        elif defined(__GNUC__)
+#           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
+#              define PNG_MIPS_MSA_IMPLEMENTATION 2
+#           endif /* no GNUC support */
+#        endif /* __GNUC__ */
+#     else /* !defined __mips_msa */
+#        define PNG_MIPS_MSA_IMPLEMENTATION 2
+#     endif /* __mips_msa */
+#  endif /* !PNG_MIPS_MSA_IMPLEMENTATION */
+
+#  ifndef PNG_MIPS_MSA_IMPLEMENTATION
+#     define PNG_MIPS_MSA_IMPLEMENTATION 1
+#     define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
+#  endif
+#else
+#  define PNG_MIPS_MSA_IMPLEMENTATION 0
+#endif /* PNG_MIPS_MSA_OPT > 0 */
+
+#if PNG_MIPS_MMI_OPT > 0
+#  ifndef PNG_MIPS_MMI_IMPLEMENTATION
+#     if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64)
+#        define PNG_MIPS_MMI_IMPLEMENTATION 2
+#     else /* !defined __mips_loongson_mmi  || _MIPS_SIM != _ABI64 */
+#        define PNG_MIPS_MMI_IMPLEMENTATION 0
+#     endif /* __mips_loongson_mmi  && _MIPS_SIM == _ABI64 */
+#  endif /* !PNG_MIPS_MMI_IMPLEMENTATION */
+
+#   if PNG_MIPS_MMI_IMPLEMENTATION > 0
+#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips
+#   endif
+#else
+#   define PNG_MIPS_MMI_IMPLEMENTATION 0
+#endif /* PNG_MIPS_MMI_OPT > 0 */
+
+#if PNG_POWERPC_VSX_OPT > 0
+#  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx
+#  define PNG_POWERPC_VSX_IMPLEMENTATION 1
+#else
+#  define PNG_POWERPC_VSX_IMPLEMENTATION 0
+#endif
+
+#if PNG_LOONGARCH_LSX_OPT > 0
+#   define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_lsx
+#   define PNG_LOONGARCH_LSX_IMPLEMENTATION 1
+#else
+#   define PNG_LOONGARCH_LSX_IMPLEMENTATION 0
+#endif
+
+/* Is this a build of a DLL where compilation of the object modules requires
+ * different preprocessor settings to those required for a simple library?  If
+ * so PNG_BUILD_DLL must be set.
+ *
+ * If libpng is used inside a DLL but that DLL does not export the libpng APIs
+ * PNG_BUILD_DLL must not be set.  To avoid the code below kicking in build a
+ * static library of libpng then link the DLL against that.
+ */
+#ifndef PNG_BUILD_DLL
+#  ifdef DLL_EXPORT
+      /* This is set by libtool when files are compiled for a DLL; libtool
+       * always compiles twice, even on systems where it isn't necessary.  Set
+       * PNG_BUILD_DLL in case it is necessary:
+       */
+#     define PNG_BUILD_DLL
+#  else
+#     ifdef _WINDLL
+         /* This is set by the Microsoft Visual Studio IDE in projects that
+          * build a DLL.  It can't easily be removed from those projects (it
+          * isn't visible in the Visual Studio UI) so it is a fairly reliable
+          * indication that PNG_IMPEXP needs to be set to the DLL export
+          * attributes.
+          */
+#        define PNG_BUILD_DLL
+#     else
+#        ifdef __DLL__
+            /* This is set by the Borland C system when compiling for a DLL
+             * (as above.)
+             */
+#           define PNG_BUILD_DLL
+#        else
+            /* Add additional compiler cases here. */
+#        endif
+#     endif
+#  endif
+#endif /* Setting PNG_BUILD_DLL if required */
+
+/* See pngconf.h for more details: the builder of the library may set this on
+ * the command line to the right thing for the specific compilation system or it
+ * may be automagically set above (at present we know of no system where it does
+ * need to be set on the command line.)
+ *
+ * PNG_IMPEXP must be set here when building the library to prevent pngconf.h
+ * setting it to the "import" setting for a DLL build.
+ */
+#ifndef PNG_IMPEXP
+#  ifdef PNG_BUILD_DLL
+#     define PNG_IMPEXP PNG_DLL_EXPORT
+#  else
+      /* Not building a DLL, or the DLL doesn't require specific export
+       * definitions.
+       */
+#     define PNG_IMPEXP
+#  endif
+#endif
+
+/* No warnings for private or deprecated functions in the build: */
+#ifndef PNG_DEPRECATED
+#  define PNG_DEPRECATED
+#endif
+#ifndef PNG_PRIVATE
+#  define PNG_PRIVATE
+#endif
+
+/* Symbol preprocessing support.
+ *
+ * To enable listing global, but internal, symbols the following macros should
+ * always be used to declare an extern data or function object in this file.
+ */
+#ifndef PNG_INTERNAL_DATA
+#  define PNG_INTERNAL_DATA(type, name, array) PNG_LINKAGE_DATA type name array
+#endif
+
+#ifndef PNG_INTERNAL_FUNCTION
+#  define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\
+      PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_EMPTY attributes)
+#endif
+
+#ifndef PNG_INTERNAL_CALLBACK
+#  define PNG_INTERNAL_CALLBACK(type, name, args, attributes)\
+      PNG_LINKAGE_CALLBACK PNG_FUNCTION(type, (PNGCBAPI name), args,\
+         PNG_EMPTY attributes)
+#endif
+
+/* If floating or fixed point APIs are disabled they may still be compiled
+ * internally.  To handle this make sure they are declared as the appropriate
+ * internal extern function (otherwise the symbol prefixing stuff won't work and
+ * the functions will be used without definitions.)
+ *
+ * NOTE: although all the API functions are declared here they are not all
+ * actually built!  Because the declarations are still made it is necessary to
+ * fake out types that they depend on.
+ */
+#ifndef PNG_FP_EXPORT
+#  ifndef PNG_FLOATING_POINT_SUPPORTED
+#     define PNG_FP_EXPORT(ordinal, type, name, args)\
+         PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY);
+#     ifndef PNG_VERSION_INFO_ONLY
+         typedef struct png_incomplete png_double;
+         typedef png_double*           png_doublep;
+         typedef const png_double*     png_const_doublep;
+         typedef png_double**          png_doublepp;
+#     endif
+#  endif
+#endif
+#ifndef PNG_FIXED_EXPORT
+#  ifndef PNG_FIXED_POINT_SUPPORTED
+#     define PNG_FIXED_EXPORT(ordinal, type, name, args)\
+         PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY);
+#  endif
+#endif
+
+#include "png.h"
+
+/* pngconf.h does not set PNG_DLL_EXPORT unless it is required, so: */
+#ifndef PNG_DLL_EXPORT
+#  define PNG_DLL_EXPORT
+#endif
+
+/* This is a global switch to set the compilation for an installed system
+ * (a release build).  It can be set for testing debug builds to ensure that
+ * they will compile when the build type is switched to RC or STABLE, the
+ * default is just to use PNG_LIBPNG_BUILD_BASE_TYPE.  Set this in CPPFLAGS
+ * with either:
+ *
+ *   -DPNG_RELEASE_BUILD Turns on the release compile path
+ *   -DPNG_RELEASE_BUILD=0 Turns it off
+ * or in your pngusr.h with
+ *   #define PNG_RELEASE_BUILD=1 Turns on the release compile path
+ *   #define PNG_RELEASE_BUILD=0 Turns it off
+ */
+#ifndef PNG_RELEASE_BUILD
+#  define PNG_RELEASE_BUILD (PNG_LIBPNG_BUILD_BASE_TYPE >= PNG_LIBPNG_BUILD_RC)
+#endif
+
+/* SECURITY and SAFETY:
+ *
+ * libpng is built with support for internal limits on image dimensions and
+ * memory usage.  These are documented in scripts/pnglibconf.dfa of the
+ * source and recorded in the machine generated header file pnglibconf.h.
+ */
+
+/* If you are running on a machine where you cannot allocate more
+ * than 64K of memory at once, uncomment this.  While libpng will not
+ * normally need that much memory in a chunk (unless you load up a very
+ * large file), zlib needs to know how big of a chunk it can use, and
+ * libpng thus makes sure to check any memory allocation to verify it
+ * will fit into memory.
+ *
+ * zlib provides 'MAXSEG_64K' which, if defined, indicates the
+ * same limit and pngconf.h (already included) sets the limit
+ * if certain operating systems are detected.
+ */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+#  define PNG_MAX_MALLOC_64K
+#endif
+
+#ifndef PNG_UNUSED
+/* Unused formal parameter warnings are silenced using the following macro
+ * which is expected to have no bad effects on performance (optimizing
+ * compilers will probably remove it entirely).  Note that if you replace
+ * it with something other than whitespace, you must include the terminating
+ * semicolon.
+ */
+#  define PNG_UNUSED(param) (void)param;
+#endif
+
+/* Just a little check that someone hasn't tried to define something
+ * contradictory.
+ */
+#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
+#  undef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 65536L
+#endif
+
+/* If warnings or errors are turned off the code is disabled or redirected here.
+ * From 1.5.4 functions have been added to allow very limited formatting of
+ * error and warning messages - this code will also be disabled here.
+ */
+#ifdef PNG_WARNINGS_SUPPORTED
+#  define PNG_WARNING_PARAMETERS(p) png_warning_parameters p;
+#else
+#  define png_warning_parameter(p,number,string) ((void)0)
+#  define png_warning_parameter_unsigned(p,number,format,value) ((void)0)
+#  define png_warning_parameter_signed(p,number,format,value) ((void)0)
+#  define png_formatted_warning(pp,p,message) ((void)(pp))
+#  define PNG_WARNING_PARAMETERS(p)
+#endif
+#ifndef PNG_ERROR_TEXT_SUPPORTED
+#  define png_fixed_error(s1,s2) png_err(s1)
+#endif
+
+/* Some fixed point APIs are still required even if not exported because
+ * they get used by the corresponding floating point APIs.  This magic
+ * deals with this:
+ */
+#ifdef PNG_FIXED_POINT_SUPPORTED
+#  define PNGFAPI PNGAPI
+#else
+#  define PNGFAPI /* PRIVATE */
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* Other defines specific to compilers can go here.  Try to keep
+ * them inside an appropriate ifdef/endif pair for portability.
+ */
+
+/* C allows up-casts from (void*) to any pointer and (const void*) to any
+ * pointer to a const object.  C++ regards this as a type error and requires an
+ * explicit, static, cast and provides the static_cast<> rune to ensure that
+ * const is not cast away.
+ */
+#ifdef __cplusplus
+#  define png_voidcast(type, value) static_cast<type>(value)
+#  define png_constcast(type, value) const_cast<type>(value)
+#  define png_aligncast(type, value) \
+   static_cast<type>(static_cast<void*>(value))
+#  define png_aligncastconst(type, value) \
+   static_cast<type>(static_cast<const void*>(value))
+#else
+#  define png_voidcast(type, value) (value)
+#  define png_constcast(type, value) ((type)(void*)(const void*)(value))
+#  define png_aligncast(type, value) ((void*)(value))
+#  define png_aligncastconst(type, value) ((const void*)(value))
+#endif /* __cplusplus */
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED) ||\
+    defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)
+   /* png.c requires the following ANSI-C constants if the conversion of
+    * floating point to ASCII is implemented therein:
+    *
+    *  DBL_DIG  Maximum number of decimal digits (can be set to any constant)
+    *  DBL_MIN  Smallest normalized fp number (can be set to an arbitrary value)
+    *  DBL_MAX  Maximum floating point number (can be set to an arbitrary value)
+    */
+#  include <float.h>
+
+#  include <math.h>
+
+#  if defined(_AMIGA) && defined(__SASC) && defined(_M68881)
+   /* Amiga SAS/C: We must include builtin FPU functions when compiling using
+    * MATH=68881
+    */
+#    include <m68881.h>
+#  endif
+#endif
+
+/* This provides the non-ANSI (far) memory allocation routines. */
+#if defined(__TURBOC__) && defined(__MSDOS__)
+#  include <mem.h>
+#  include <alloc.h>
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
+#  include <windows.h>
+#endif
+#endif /* PNG_VERSION_INFO_ONLY */
+
+/* Moved here around 1.5.0beta36 from pngconf.h */
+/* Users may want to use these so they are not private.  Any library
+ * functions that are passed far data must be model-independent.
+ */
+
+/* Platform-independent functions */
+#ifndef PNG_ABORT
+#  define PNG_ABORT() abort()
+#endif
+
+/* These macros may need to be architecture dependent. */
+#define PNG_ALIGN_NONE      0 /* do not use data alignment */
+#define PNG_ALIGN_ALWAYS    1 /* assume unaligned accesses are OK */
+#ifdef offsetof
+#  define PNG_ALIGN_OFFSET  2 /* use offsetof to determine alignment */
+#else
+#  define PNG_ALIGN_OFFSET -1 /* prevent the use of this */
+#endif
+#define PNG_ALIGN_SIZE      3 /* use sizeof to determine alignment */
+
+#ifndef PNG_ALIGN_TYPE
+   /* Default to using aligned access optimizations and requiring alignment to a
+    * multiple of the data type size.  Override in a compiler specific fashion
+    * if necessary by inserting tests here:
+    */
+#  define PNG_ALIGN_TYPE PNG_ALIGN_SIZE
+#endif
+
+#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE
+   /* This is used because in some compiler implementations non-aligned
+    * structure members are supported, so the offsetof approach below fails.
+    * Set PNG_ALIGN_SIZE=0 for compiler combinations where unaligned access
+    * is good for performance.  Do not do this unless you have tested the
+    * result and understand it.
+    */
+#  define png_alignof(type) (sizeof(type))
+#else
+#  if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET
+#    define png_alignof(type) offsetof(struct{char c; type t;}, t)
+#  else
+#    if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS
+#      define png_alignof(type) 1
+#    endif
+     /* Else leave png_alignof undefined to prevent use thereof */
+#  endif
+#endif
+
+/* This implicitly assumes alignment is always a multiple of 2. */
+#ifdef png_alignof
+#  define png_isaligned(ptr, type) \
+   (((type)(size_t)((const void*)(ptr)) & (type)(png_alignof(type)-1)) == 0)
+#else
+#  define png_isaligned(ptr, type) 0
+#endif
+
+/* End of memory model/platform independent support */
+/* End of 1.5.0beta36 move from pngconf.h */
+
+/* CONSTANTS and UTILITY MACROS
+ * These are used internally by libpng and not exposed in the API
+ */
+
+/* Various modes of operation.  Note that after an init, mode is set to
+ * zero automatically when the structure is created.  Three of these
+ * are defined in png.h because they need to be visible to applications
+ * that call png_set_unknown_chunk().
+ */
+/* #define PNG_HAVE_IHDR            0x01U (defined in png.h) */
+/* #define PNG_HAVE_PLTE            0x02U (defined in png.h) */
+#define PNG_HAVE_IDAT               0x04U
+/* #define PNG_AFTER_IDAT           0x08U (defined in png.h) */
+#define PNG_HAVE_IEND               0x10U
+                   /*               0x20U (unused) */
+                   /*               0x40U (unused) */
+                   /*               0x80U (unused) */
+#define PNG_HAVE_CHUNK_HEADER      0x100U
+#define PNG_WROTE_tIME             0x200U
+#define PNG_WROTE_INFO_BEFORE_PLTE 0x400U
+#define PNG_BACKGROUND_IS_GRAY     0x800U
+#define PNG_HAVE_PNG_SIGNATURE    0x1000U
+#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000U /* Have another chunk after IDAT */
+#define PNG_WROTE_eXIf            0x4000U
+#define PNG_IS_READ_STRUCT        0x8000U /* Else is a write struct */
+
+/* Flags for the transformations the PNG library does on the image data */
+#define PNG_BGR                 0x0001U
+#define PNG_INTERLACE           0x0002U
+#define PNG_PACK                0x0004U
+#define PNG_SHIFT               0x0008U
+#define PNG_SWAP_BYTES          0x0010U
+#define PNG_INVERT_MONO         0x0020U
+#define PNG_QUANTIZE            0x0040U
+#define PNG_COMPOSE             0x0080U    /* Was PNG_BACKGROUND */
+#define PNG_BACKGROUND_EXPAND   0x0100U
+#define PNG_EXPAND_16           0x0200U    /* Added to libpng 1.5.2 */
+#define PNG_16_TO_8             0x0400U    /* Becomes 'chop' in 1.5.4 */
+#define PNG_RGBA                0x0800U
+#define PNG_EXPAND              0x1000U
+#define PNG_GAMMA               0x2000U
+#define PNG_GRAY_TO_RGB         0x4000U
+#define PNG_FILLER              0x8000U
+#define PNG_PACKSWAP           0x10000U
+#define PNG_SWAP_ALPHA         0x20000U
+#define PNG_STRIP_ALPHA        0x40000U
+#define PNG_INVERT_ALPHA       0x80000U
+#define PNG_USER_TRANSFORM    0x100000U
+#define PNG_RGB_TO_GRAY_ERR   0x200000U
+#define PNG_RGB_TO_GRAY_WARN  0x400000U
+#define PNG_RGB_TO_GRAY       0x600000U /* two bits, RGB_TO_GRAY_ERR|WARN */
+#define PNG_ENCODE_ALPHA      0x800000U /* Added to libpng-1.5.4 */
+#define PNG_ADD_ALPHA        0x1000000U /* Added to libpng-1.2.7 */
+#define PNG_EXPAND_tRNS      0x2000000U /* Added to libpng-1.2.9 */
+#define PNG_SCALE_16_TO_8    0x4000000U /* Added to libpng-1.5.4 */
+                       /*    0x8000000U unused */
+                       /*   0x10000000U unused */
+                       /*   0x20000000U unused */
+                       /*   0x40000000U unused */
+/* Flags for png_create_struct */
+#define PNG_STRUCT_PNG   0x0001U
+#define PNG_STRUCT_INFO  0x0002U
+
+/* Flags for the png_ptr->flags rather than declaring a byte for each one */
+#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY     0x0001U
+#define PNG_FLAG_ZSTREAM_INITIALIZED      0x0002U /* Added to libpng-1.6.0 */
+                                  /*      0x0004U    unused */
+#define PNG_FLAG_ZSTREAM_ENDED            0x0008U /* Added to libpng-1.6.0 */
+                                  /*      0x0010U    unused */
+                                  /*      0x0020U    unused */
+#define PNG_FLAG_ROW_INIT                 0x0040U
+#define PNG_FLAG_FILLER_AFTER             0x0080U
+#define PNG_FLAG_CRC_ANCILLARY_USE        0x0100U
+#define PNG_FLAG_CRC_ANCILLARY_NOWARN     0x0200U
+#define PNG_FLAG_CRC_CRITICAL_USE         0x0400U
+#define PNG_FLAG_CRC_CRITICAL_IGNORE      0x0800U
+#define PNG_FLAG_ASSUME_sRGB              0x1000U /* Added to libpng-1.5.4 */
+#define PNG_FLAG_OPTIMIZE_ALPHA           0x2000U /* Added to libpng-1.5.4 */
+#define PNG_FLAG_DETECT_UNINITIALIZED     0x4000U /* Added to libpng-1.5.4 */
+/* #define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000U */
+/* #define PNG_FLAG_KEEP_UNSAFE_CHUNKS      0x10000U */
+#define PNG_FLAG_LIBRARY_MISMATCH        0x20000U
+#define PNG_FLAG_STRIP_ERROR_NUMBERS     0x40000U
+#define PNG_FLAG_STRIP_ERROR_TEXT        0x80000U
+#define PNG_FLAG_BENIGN_ERRORS_WARN     0x100000U /* Added to libpng-1.4.0 */
+#define PNG_FLAG_APP_WARNINGS_WARN      0x200000U /* Added to libpng-1.6.0 */
+#define PNG_FLAG_APP_ERRORS_WARN        0x400000U /* Added to libpng-1.6.0 */
+                                  /*    0x800000U    unused */
+                                  /*   0x1000000U    unused */
+                                  /*   0x2000000U    unused */
+                                  /*   0x4000000U    unused */
+                                  /*   0x8000000U    unused */
+                                  /*  0x10000000U    unused */
+                                  /*  0x20000000U    unused */
+                                  /*  0x40000000U    unused */
+
+#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
+                                     PNG_FLAG_CRC_ANCILLARY_NOWARN)
+
+#define PNG_FLAG_CRC_CRITICAL_MASK  (PNG_FLAG_CRC_CRITICAL_USE | \
+                                     PNG_FLAG_CRC_CRITICAL_IGNORE)
+
+#define PNG_FLAG_CRC_MASK           (PNG_FLAG_CRC_ANCILLARY_MASK | \
+                                     PNG_FLAG_CRC_CRITICAL_MASK)
+
+/* Save typing and make code easier to understand */
+
+#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \
+   abs((int)((c1).green) - (int)((c2).green)) + \
+   abs((int)((c1).blue) - (int)((c2).blue)))
+
+/* Added to libpng-1.6.0: scale a 16-bit value in the range 0..65535 to 0..255
+ * by dividing by 257 *with rounding*.  This macro is exact for the given range.
+ * See the discourse in pngrtran.c png_do_scale_16_to_8.  The values in the
+ * macro were established by experiment (modifying the added value).  The macro
+ * has a second variant that takes a value already scaled by 255 and divides by
+ * 65535 - this has a maximum error of .502.  Over the range 0..65535*65535 it
+ * only gives off-by-one errors and only for 0.5% (1 in 200) of the values.
+ */
+#define PNG_DIV65535(v24) (((v24) + 32895) >> 16)
+#define PNG_DIV257(v16) PNG_DIV65535((png_uint_32)(v16) * 255)
+
+/* Added to libpng-1.2.6 JB */
+#define PNG_ROWBYTES(pixel_bits, width) \
+    ((pixel_bits) >= 8 ? \
+    ((size_t)(width) * (((size_t)(pixel_bits)) >> 3)) : \
+    (( ((size_t)(width) * ((size_t)(pixel_bits))) + 7) >> 3) )
+
+/* This returns the number of trailing bits in the last byte of a row, 0 if the
+ * last byte is completely full of pixels.  It is, in principle, (pixel_bits x
+ * width) % 8, but that would overflow for large 'width'.  The second macro is
+ * the same except that it returns the number of unused bits in the last byte;
+ * (8-TRAILBITS), but 0 when TRAILBITS is 0.
+ *
+ * NOTE: these macros are intended to be self-evidently correct and never
+ * overflow on the assumption that pixel_bits is in the range 0..255.  The
+ * arguments are evaluated only once and they can be signed (e.g. as a result of
+ * the integral promotions).  The result of the expression always has type
+ * (png_uint_32), however the compiler always knows it is in the range 0..7.
+ */
+#define PNG_TRAILBITS(pixel_bits, width) \
+    (((pixel_bits) * ((width) % (png_uint_32)8)) % 8)
+
+#define PNG_PADBITS(pixel_bits, width) \
+    ((8 - PNG_TRAILBITS(pixel_bits, width)) % 8)
+
+/* PNG_OUT_OF_RANGE returns true if value is outside the range
+ * ideal-delta..ideal+delta.  Each argument is evaluated twice.
+ * "ideal" and "delta" should be constants, normally simple
+ * integers, "value" a variable. Added to libpng-1.2.6 JB
+ */
+#define PNG_OUT_OF_RANGE(value, ideal, delta) \
+   ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) )
+
+/* Conversions between fixed and floating point, only defined if
+ * required (to make sure the code doesn't accidentally use float
+ * when it is supposedly disabled.)
+ */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+/* The floating point conversion can't overflow, though it can and
+ * does lose accuracy relative to the original fixed point value.
+ * In practice this doesn't matter because png_fixed_point only
+ * stores numbers with very low precision.  The png_ptr and s
+ * arguments are unused by default but are there in case error
+ * checking becomes a requirement.
+ */
+#define png_float(png_ptr, fixed, s) (.00001 * (fixed))
+
+/* The fixed point conversion performs range checking and evaluates
+ * its argument multiple times, so must be used with care.  The
+ * range checking uses the PNG specification values for a signed
+ * 32-bit fixed point value except that the values are deliberately
+ * rounded-to-zero to an integral value - 21474 (21474.83 is roughly
+ * (2^31-1) * 100000). 's' is a string that describes the value being
+ * converted.
+ *
+ * NOTE: this macro will raise a png_error if the range check fails,
+ * therefore it is normally only appropriate to use this on values
+ * that come from API calls or other sources where an out of range
+ * error indicates a programming error, not a data error!
+ *
+ * NOTE: by default this is off - the macro is not used - because the
+ * function call saves a lot of code.
+ */
+#ifdef PNG_FIXED_POINT_MACRO_SUPPORTED
+#define png_fixed(png_ptr, fp, s) ((fp) <= 21474 && (fp) >= -21474 ?\
+    ((png_fixed_point)(100000 * (fp))) : (png_fixed_error(png_ptr, s),0))
+#endif
+/* else the corresponding function is defined below, inside the scope of the
+ * cplusplus test.
+ */
+#endif
+
+/* Constants for known chunk types.  If you need to add a chunk, define the name
+ * here.  For historical reasons these constants have the form png_<name>; i.e.
+ * the prefix is lower case.  Please use decimal values as the parameters to
+ * match the ISO PNG specification and to avoid relying on the C locale
+ * interpretation of character values.
+ *
+ * Prior to 1.5.6 these constants were strings, as of 1.5.6 png_uint_32 values
+ * are computed and a new macro (PNG_STRING_FROM_CHUNK) added to allow a string
+ * to be generated if required.
+ *
+ * PNG_32b correctly produces a value shifted by up to 24 bits, even on
+ * architectures where (int) is only 16 bits.
+ */
+#define PNG_32b(b,s) ((png_uint_32)(b) << (s))
+#define PNG_U32(b1,b2,b3,b4) \
+   (PNG_32b(b1,24) | PNG_32b(b2,16) | PNG_32b(b3,8) | PNG_32b(b4,0))
+
+/* Constants for known chunk types.
+ *
+ * MAINTAINERS: If you need to add a chunk, define the name here.
+ * For historical reasons these constants have the form png_<name>; i.e.
+ * the prefix is lower case.  Please use decimal values as the parameters to
+ * match the ISO PNG specification and to avoid relying on the C locale
+ * interpretation of character values.  Please keep the list sorted.
+ *
+ * Notice that PNG_U32 is used to define a 32-bit value for the 4 byte chunk
+ * type.  In fact the specification does not express chunk types this way,
+ * however using a 32-bit value means that the chunk type can be read from the
+ * stream using exactly the same code as used for a 32-bit unsigned value and
+ * can be examined far more efficiently (using one arithmetic compare).
+ *
+ * Prior to 1.5.6 the chunk type constants were expressed as C strings.  The
+ * libpng API still uses strings for 'unknown' chunks and a macro,
+ * PNG_STRING_FROM_CHUNK, allows a string to be generated if required.  Notice
+ * that for portable code numeric values must still be used; the string "IHDR"
+ * is not portable and neither is PNG_U32('I', 'H', 'D', 'R').
+ *
+ * In 1.7.0 the definitions will be made public in png.h to avoid having to
+ * duplicate the same definitions in application code.
+ */
+#define png_IDAT PNG_U32( 73,  68,  65,  84)
+#define png_IEND PNG_U32( 73,  69,  78,  68)
+#define png_IHDR PNG_U32( 73,  72,  68,  82)
+#define png_PLTE PNG_U32( 80,  76,  84,  69)
+#define png_bKGD PNG_U32( 98,  75,  71,  68)
+#define png_cHRM PNG_U32( 99,  72,  82,  77)
+#define png_eXIf PNG_U32(101,  88,  73, 102) /* registered July 2017 */
+#define png_fRAc PNG_U32(102,  82,  65,  99) /* registered, not defined */
+#define png_gAMA PNG_U32(103,  65,  77,  65)
+#define png_gIFg PNG_U32(103,  73,  70, 103)
+#define png_gIFt PNG_U32(103,  73,  70, 116) /* deprecated */
+#define png_gIFx PNG_U32(103,  73,  70, 120)
+#define png_hIST PNG_U32(104,  73,  83,  84)
+#define png_iCCP PNG_U32(105,  67,  67,  80)
+#define png_iTXt PNG_U32(105,  84,  88, 116)
+#define png_oFFs PNG_U32(111,  70,  70, 115)
+#define png_pCAL PNG_U32(112,  67,  65,  76)
+#define png_pHYs PNG_U32(112,  72,  89, 115)
+#define png_sBIT PNG_U32(115,  66,  73,  84)
+#define png_sCAL PNG_U32(115,  67,  65,  76)
+#define png_sPLT PNG_U32(115,  80,  76,  84)
+#define png_sRGB PNG_U32(115,  82,  71,  66)
+#define png_sTER PNG_U32(115,  84,  69,  82)
+#define png_tEXt PNG_U32(116,  69,  88, 116)
+#define png_tIME PNG_U32(116,  73,  77,  69)
+#define png_tRNS PNG_U32(116,  82,  78,  83)
+#define png_zTXt PNG_U32(122,  84,  88, 116)
+
+/* The following will work on (signed char*) strings, whereas the get_uint_32
+ * macro will fail on top-bit-set values because of the sign extension.
+ */
+#define PNG_CHUNK_FROM_STRING(s)\
+   PNG_U32(0xff & (s)[0], 0xff & (s)[1], 0xff & (s)[2], 0xff & (s)[3])
+
+/* This uses (char), not (png_byte) to avoid warnings on systems where (char) is
+ * signed and the argument is a (char[])  This macro will fail miserably on
+ * systems where (char) is more than 8 bits.
+ */
+#define PNG_STRING_FROM_CHUNK(s,c)\
+   (void)(((char*)(s))[0]=(char)(((c)>>24) & 0xff), \
+   ((char*)(s))[1]=(char)(((c)>>16) & 0xff),\
+   ((char*)(s))[2]=(char)(((c)>>8) & 0xff), \
+   ((char*)(s))[3]=(char)((c & 0xff)))
+
+/* Do the same but terminate with a null character. */
+#define PNG_CSTRING_FROM_CHUNK(s,c)\
+   (void)(PNG_STRING_FROM_CHUNK(s,c), ((char*)(s))[4] = 0)
+
+/* Test on flag values as defined in the spec (section 5.4): */
+#define PNG_CHUNK_ANCILLARY(c)   (1 & ((c) >> 29))
+#define PNG_CHUNK_CRITICAL(c)     (!PNG_CHUNK_ANCILLARY(c))
+#define PNG_CHUNK_PRIVATE(c)      (1 & ((c) >> 21))
+#define PNG_CHUNK_RESERVED(c)     (1 & ((c) >> 13))
+#define PNG_CHUNK_SAFE_TO_COPY(c) (1 & ((c) >>  5))
+
+/* Gamma values (new at libpng-1.5.4): */
+#define PNG_GAMMA_MAC_OLD 151724  /* Assume '1.8' is really 2.2/1.45! */
+#define PNG_GAMMA_MAC_INVERSE 65909
+#define PNG_GAMMA_sRGB_INVERSE 45455
+
+/* Almost everything below is C specific; the #defines above can be used in
+ * non-C code (so long as it is C-preprocessed) the rest of this stuff cannot.
+ */
+#ifndef PNG_VERSION_INFO_ONLY
+
+#include "pngstruct.h"
+#include "pnginfo.h"
+
+/* Validate the include paths - the include path used to generate pnglibconf.h
+ * must match that used in the build, or we must be using pnglibconf.h.prebuilt:
+ */
+#if PNG_ZLIB_VERNUM != 0 && PNG_ZLIB_VERNUM != ZLIB_VERNUM
+#  error ZLIB_VERNUM != PNG_ZLIB_VERNUM \
+      "-I (include path) error: see the notes in pngpriv.h"
+   /* This means that when pnglibconf.h was built the copy of zlib.h that it
+    * used is not the same as the one being used here.  Because the build of
+    * libpng makes decisions to use inflateInit2 and inflateReset2 based on the
+    * zlib version number and because this affects handling of certain broken
+    * PNG files the -I directives must match.
+    *
+    * The most likely explanation is that you passed a -I in CFLAGS. This will
+    * not work; all the preprocessor directives and in particular all the -I
+    * directives must be in CPPFLAGS.
+    */
+#endif
+
+/* This is used for 16-bit gamma tables -- only the top level pointers are
+ * const; this could be changed:
+ */
+typedef const png_uint_16p * png_const_uint_16pp;
+
+/* Added to libpng-1.5.7: sRGB conversion tables */
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
+   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
+PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_table, [256]);
+   /* Convert from an sRGB encoded value 0..255 to a 16-bit linear value,
+    * 0..65535.  This table gives the closest 16-bit answers (no errors).
+    */
+#endif
+
+PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_base, [512]);
+PNG_INTERNAL_DATA(const png_byte, png_sRGB_delta, [512]);
+
+#define PNG_sRGB_FROM_LINEAR(linear) \
+  ((png_byte)(0xff & ((png_sRGB_base[(linear)>>15] \
+   + ((((linear) & 0x7fff)*png_sRGB_delta[(linear)>>15])>>12)) >> 8)))
+   /* Given a value 'linear' in the range 0..255*65535 calculate the 8-bit sRGB
+    * encoded value with maximum error 0.646365.  Note that the input is not a
+    * 16-bit value; it has been multiplied by 255! */
+#endif /* SIMPLIFIED_READ/WRITE */
+
+
+/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Internal functions; these are not exported from a DLL however because they
+ * are used within several of the C source files they have to be C extern.
+ *
+ * All of these functions must be declared with PNG_INTERNAL_FUNCTION.
+ */
+
+/* Zlib support */
+#define PNG_UNEXPECTED_ZLIB_RETURN (-7)
+PNG_INTERNAL_FUNCTION(void, png_zstream_error,(png_structrp png_ptr, int ret),
+   PNG_EMPTY);
+   /* Used by the zlib handling functions to ensure that z_stream::msg is always
+    * set before they return.
+    */
+
+#ifdef PNG_WRITE_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_free_buffer_list,(png_structrp png_ptr,
+   png_compression_bufferp *list),PNG_EMPTY);
+   /* Free the buffer list used by the compressed write code. */
+#endif
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
+   !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \
+   (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \
+   defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \
+   (defined(PNG_sCAL_SUPPORTED) && \
+   defined(PNG_FLOATING_ARITHMETIC_SUPPORTED))
+PNG_INTERNAL_FUNCTION(png_fixed_point,png_fixed,(png_const_structrp png_ptr,
+   double fp, png_const_charp text),PNG_EMPTY);
+#endif
+
+/* Check the user version string for compatibility, returns false if the version
+ * numbers aren't compatible.
+ */
+PNG_INTERNAL_FUNCTION(int,png_user_version_check,(png_structrp png_ptr,
+   png_const_charp user_png_ver),PNG_EMPTY);
+
+/* Internal base allocator - no messages, NULL on failure to allocate.  This
+ * does, however, call the application provided allocator and that could call
+ * png_error (although that would be a bug in the application implementation.)
+ */
+PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_base,(png_const_structrp png_ptr,
+   png_alloc_size_t size),PNG_ALLOCATED);
+
+#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\
+   defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED)
+/* Internal array allocator, outputs no error or warning messages on failure,
+ * just returns NULL.
+ */
+PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_array,(png_const_structrp png_ptr,
+   int nelements, size_t element_size),PNG_ALLOCATED);
+
+/* The same but an existing array is extended by add_elements.  This function
+ * also memsets the new elements to 0 and copies the old elements.  The old
+ * array is not freed or altered.
+ */
+PNG_INTERNAL_FUNCTION(png_voidp,png_realloc_array,(png_const_structrp png_ptr,
+   png_const_voidp array, int old_elements, int add_elements,
+   size_t element_size),PNG_ALLOCATED);
+#endif /* text, sPLT or unknown chunks */
+
+/* Magic to create a struct when there is no struct to call the user supplied
+ * memory allocators.  Because error handling has not been set up the memory
+ * handlers can't safely call png_error, but this is an obscure and undocumented
+ * restriction so libpng has to assume that the 'free' handler, at least, might
+ * call png_error.
+ */
+PNG_INTERNAL_FUNCTION(png_structp,png_create_png_struct,
+   (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
+    png_error_ptr warn_fn, png_voidp mem_ptr, png_malloc_ptr malloc_fn,
+    png_free_ptr free_fn),PNG_ALLOCATED);
+
+/* Free memory from internal libpng struct */
+PNG_INTERNAL_FUNCTION(void,png_destroy_png_struct,(png_structrp png_ptr),
+   PNG_EMPTY);
+
+/* Free an allocated jmp_buf (always succeeds) */
+PNG_INTERNAL_FUNCTION(void,png_free_jmpbuf,(png_structrp png_ptr),PNG_EMPTY);
+
+/* Function to allocate memory for zlib.  PNGAPI is disallowed. */
+PNG_INTERNAL_FUNCTION(voidpf,png_zalloc,(voidpf png_ptr, uInt items, uInt size),
+   PNG_ALLOCATED);
+
+/* Function to free memory for zlib.  PNGAPI is disallowed. */
+PNG_INTERNAL_FUNCTION(void,png_zfree,(voidpf png_ptr, voidpf ptr),PNG_EMPTY);
+
+/* Next four functions are used internally as callbacks.  PNGCBAPI is required
+ * but not PNG_EXPORT.  PNGAPI added at libpng version 1.2.3, changed to
+ * PNGCBAPI at 1.5.0
+ */
+
+PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_read_data,(png_structp png_ptr,
+    png_bytep data, size_t length),PNG_EMPTY);
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_push_fill_buffer,(png_structp png_ptr,
+    png_bytep buffer, size_t length),PNG_EMPTY);
+#endif
+
+PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_write_data,(png_structp png_ptr,
+    png_bytep data, size_t length),PNG_EMPTY);
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+#  ifdef PNG_STDIO_SUPPORTED
+PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_flush,(png_structp png_ptr),
+   PNG_EMPTY);
+#  endif
+#endif
+
+/* Reset the CRC variable */
+PNG_INTERNAL_FUNCTION(void,png_reset_crc,(png_structrp png_ptr),PNG_EMPTY);
+
+/* Write the "data" buffer to whatever output you are using */
+PNG_INTERNAL_FUNCTION(void,png_write_data,(png_structrp png_ptr,
+    png_const_bytep data, size_t length),PNG_EMPTY);
+
+/* Read and check the PNG file signature */
+PNG_INTERNAL_FUNCTION(void,png_read_sig,(png_structrp png_ptr,
+   png_inforp info_ptr),PNG_EMPTY);
+
+/* Read the chunk header (length + type name) */
+PNG_INTERNAL_FUNCTION(png_uint_32,png_read_chunk_header,(png_structrp png_ptr),
+   PNG_EMPTY);
+
+/* Read data from whatever input you are using into the "data" buffer */
+PNG_INTERNAL_FUNCTION(void,png_read_data,(png_structrp png_ptr, png_bytep data,
+    size_t length),PNG_EMPTY);
+
+/* Read bytes into buf, and update png_ptr->crc */
+PNG_INTERNAL_FUNCTION(void,png_crc_read,(png_structrp png_ptr, png_bytep buf,
+    png_uint_32 length),PNG_EMPTY);
+
+/* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */
+PNG_INTERNAL_FUNCTION(int,png_crc_finish,(png_structrp png_ptr,
+   png_uint_32 skip),PNG_EMPTY);
+
+/* Read the CRC from the file and compare it to the libpng calculated CRC */
+PNG_INTERNAL_FUNCTION(int,png_crc_error,(png_structrp png_ptr),PNG_EMPTY);
+
+/* Calculate the CRC over a section of data.  Note that we are only
+ * passing a maximum of 64K on systems that have this as a memory limit,
+ * since this is the maximum buffer size we can specify.
+ */
+PNG_INTERNAL_FUNCTION(void,png_calculate_crc,(png_structrp png_ptr,
+   png_const_bytep ptr, size_t length),PNG_EMPTY);
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_flush,(png_structrp png_ptr),PNG_EMPTY);
+#endif
+
+/* Write various chunks */
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.
+ */
+PNG_INTERNAL_FUNCTION(void,png_write_IHDR,(png_structrp png_ptr,
+   png_uint_32 width, png_uint_32 height, int bit_depth, int color_type,
+   int compression_method, int filter_method, int interlace_method),PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_write_PLTE,(png_structrp png_ptr,
+   png_const_colorp palette, png_uint_32 num_pal),PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_compress_IDAT,(png_structrp png_ptr,
+   png_const_bytep row_data, png_alloc_size_t row_data_length, int flush),
+   PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_write_IEND,(png_structrp png_ptr),PNG_EMPTY);
+
+#ifdef PNG_WRITE_gAMA_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_gAMA_fixed,(png_structrp png_ptr,
+    png_fixed_point file_gamma),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_sBIT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_sBIT,(png_structrp png_ptr,
+    png_const_color_8p sbit, int color_type),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_cHRM_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_cHRM_fixed,(png_structrp png_ptr,
+    const png_xy *xy), PNG_EMPTY);
+   /* The xy value must have been previously validated */
+#endif
+
+#ifdef PNG_WRITE_sRGB_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_sRGB,(png_structrp png_ptr,
+    int intent),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_eXIf_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_eXIf,(png_structrp png_ptr,
+    png_bytep exif, int num_exif),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_iCCP_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_iCCP,(png_structrp png_ptr,
+   png_const_charp name, png_const_bytep profile), PNG_EMPTY);
+   /* The profile must have been previously validated for correctness, the
+    * length comes from the first four bytes.  Only the base, deflate,
+    * compression is supported.
+    */
+#endif
+
+#ifdef PNG_WRITE_sPLT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_sPLT,(png_structrp png_ptr,
+    png_const_sPLT_tp palette),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_tRNS_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_tRNS,(png_structrp png_ptr,
+    png_const_bytep trans, png_const_color_16p values, int number,
+    int color_type),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_bKGD_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_bKGD,(png_structrp png_ptr,
+    png_const_color_16p values, int color_type),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_hIST_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_hIST,(png_structrp png_ptr,
+    png_const_uint_16p hist, int num_hist),PNG_EMPTY);
+#endif
+
+/* Chunks that have keywords */
+#ifdef PNG_WRITE_tEXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_tEXt,(png_structrp png_ptr,
+   png_const_charp key, png_const_charp text, size_t text_len),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_zTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_zTXt,(png_structrp png_ptr, png_const_charp
+    key, png_const_charp text, int compression),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_iTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_iTXt,(png_structrp png_ptr,
+    int compression, png_const_charp key, png_const_charp lang,
+    png_const_charp lang_key, png_const_charp text),PNG_EMPTY);
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED  /* Added at version 1.0.14 and 1.2.4 */
+PNG_INTERNAL_FUNCTION(int,png_set_text_2,(png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_textp text_ptr, int num_text),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_oFFs_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_oFFs,(png_structrp png_ptr,
+    png_int_32 x_offset, png_int_32 y_offset, int unit_type),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_pCAL_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_pCAL,(png_structrp png_ptr,
+    png_charp purpose, png_int_32 X0, png_int_32 X1, int type, int nparams,
+    png_const_charp units, png_charpp params),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_pHYs_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_pHYs,(png_structrp png_ptr,
+    png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit,
+    int unit_type),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_tIME_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_tIME,(png_structrp png_ptr,
+    png_const_timep mod_time),PNG_EMPTY);
+#endif
+
+#ifdef PNG_WRITE_sCAL_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_write_sCAL_s,(png_structrp png_ptr,
+    int unit, png_const_charp width, png_const_charp height),PNG_EMPTY);
+#endif
+
+/* Called when finished processing a row of data */
+PNG_INTERNAL_FUNCTION(void,png_write_finish_row,(png_structrp png_ptr),
+    PNG_EMPTY);
+
+/* Internal use only.   Called before first row of data */
+PNG_INTERNAL_FUNCTION(void,png_write_start_row,(png_structrp png_ptr),
+    PNG_EMPTY);
+
+/* Combine a row of data, dealing with alpha, etc. if requested.  'row' is an
+ * array of png_ptr->width pixels.  If the image is not interlaced or this
+ * is the final pass this just does a memcpy, otherwise the "display" flag
+ * is used to determine whether to copy pixels that are not in the current pass.
+ *
+ * Because 'png_do_read_interlace' (below) replicates pixels this allows this
+ * function to achieve the documented 'blocky' appearance during interlaced read
+ * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row'
+ * are not changed if they are not in the current pass, when display is 0.
+ *
+ * 'display' must be 0 or 1, otherwise the memcpy will be done regardless.
+ *
+ * The API always reads from the png_struct row buffer and always assumes that
+ * it is full width (png_do_read_interlace has already been called.)
+ *
+ * This function is only ever used to write to row buffers provided by the
+ * caller of the relevant libpng API and the row must have already been
+ * transformed by the read transformations.
+ *
+ * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed
+ * bitmasks for use within the code, otherwise runtime generated masks are used.
+ * The default is compile time masks.
+ */
+#ifndef PNG_USE_COMPILE_TIME_MASKS
+#  define PNG_USE_COMPILE_TIME_MASKS 1
+#endif
+PNG_INTERNAL_FUNCTION(void,png_combine_row,(png_const_structrp png_ptr,
+    png_bytep row, int display),PNG_EMPTY);
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+/* Expand an interlaced row: the 'row_info' describes the pass data that has
+ * been read in and must correspond to the pixels in 'row', the pixels are
+ * expanded (moved apart) in 'row' to match the final layout, when doing this
+ * the pixels are *replicated* to the intervening space.  This is essential for
+ * the correct operation of png_combine_row, above.
+ */
+PNG_INTERNAL_FUNCTION(void,png_do_read_interlace,(png_row_infop row_info,
+    png_bytep row, int pass, png_uint_32 transformations),PNG_EMPTY);
+#endif
+
+/* GRR TO DO (2.0 or whenever):  simplify other internal calling interfaces */
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+/* Grab pixels out of a row for an interlaced pass */
+PNG_INTERNAL_FUNCTION(void,png_do_write_interlace,(png_row_infop row_info,
+    png_bytep row, int pass),PNG_EMPTY);
+#endif
+
+/* Unfilter a row: check the filter value before calling this, there is no point
+ * calling it for PNG_FILTER_VALUE_NONE.
+ */
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row,(png_structrp pp, png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row, int filter),PNG_EMPTY);
+
+#if PNG_ARM_NEON_OPT > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_neon,(png_row_infop row_info,
+    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_msa,(png_row_infop row_info,
+    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+#if PNG_MIPS_MMI_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info,
+    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+#if PNG_POWERPC_VSX_OPT > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info,
+    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop
+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
+/* Choose the best filter to use and filter the row data */
+PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
+    png_row_infop row_info),PNG_EMPTY);
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr,
+   png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY);
+   /* Read 'avail_out' bytes of data from the IDAT stream.  If the output buffer
+    * is NULL the function checks, instead, for the end of the stream.  In this
+    * case a benign error will be issued if the stream end is not found or if
+    * extra data has to be consumed.
+    */
+PNG_INTERNAL_FUNCTION(void,png_read_finish_IDAT,(png_structrp png_ptr),
+   PNG_EMPTY);
+   /* This cleans up when the IDAT LZ stream does not end when the last image
+    * byte is read; there is still some pending input.
+    */
+
+PNG_INTERNAL_FUNCTION(void,png_read_finish_row,(png_structrp png_ptr),
+   PNG_EMPTY);
+   /* Finish a row while reading, dealing with interlacing passes, etc. */
+#endif /* SEQUENTIAL_READ */
+
+/* Initialize the row buffers, etc. */
+PNG_INTERNAL_FUNCTION(void,png_read_start_row,(png_structrp png_ptr),PNG_EMPTY);
+
+#if ZLIB_VERNUM >= 0x1240
+PNG_INTERNAL_FUNCTION(int,png_zlib_inflate,(png_structrp png_ptr, int flush),
+      PNG_EMPTY);
+#  define PNG_INFLATE(pp, flush) png_zlib_inflate(pp, flush)
+#else /* Zlib < 1.2.4 */
+#  define PNG_INFLATE(pp, flush) inflate(&(pp)->zstream, flush)
+#endif /* Zlib < 1.2.4 */
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+/* Optional call to update the users info structure */
+PNG_INTERNAL_FUNCTION(void,png_read_transform_info,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+#endif
+
+/* Shared transform functions, defined in pngtran.c */
+#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
+    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_strip_channel,(png_row_infop row_info,
+    png_bytep row, int at_start),PNG_EMPTY);
+#endif
+
+#ifdef PNG_16BIT_SUPPORTED
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_swap,(png_row_infop row_info,
+    png_bytep row),PNG_EMPTY);
+#endif
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
+    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_packswap,(png_row_infop row_info,
+    png_bytep row),PNG_EMPTY);
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_invert,(png_row_infop row_info,
+    png_bytep row),PNG_EMPTY);
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_bgr,(png_row_infop row_info,
+    png_bytep row),PNG_EMPTY);
+#endif
+
+/* The following decodes the appropriate chunks, and does error correction,
+ * then calls the appropriate callback for the chunk if it is valid.
+ */
+
+/* Decode the IHDR chunk */
+PNG_INTERNAL_FUNCTION(void,png_handle_IHDR,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_handle_PLTE,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_handle_IEND,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+
+#ifdef PNG_READ_bKGD_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_bKGD,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_cHRM,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_eXIf_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_eXIf,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_gAMA_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_gAMA,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_hIST_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_hIST,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_iCCP,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif /* READ_iCCP */
+
+#ifdef PNG_READ_iTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_iTXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_oFFs_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_oFFs,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_pCAL_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_pCAL,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_pHYs_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_pHYs,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_sBIT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_sBIT,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_sCAL_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_sCAL,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_sPLT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_sPLT,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif /* READ_sPLT */
+
+#ifdef PNG_READ_sRGB_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_sRGB,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_tEXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_tEXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_tIME_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_tIME,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_tRNS_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_tRNS,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_zTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_handle_zTXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+#endif
+
+PNG_INTERNAL_FUNCTION(void,png_check_chunk_name,(png_const_structrp png_ptr,
+    png_uint_32 chunk_name),PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_check_chunk_length,(png_const_structrp png_ptr,
+    png_uint_32 chunk_length),PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_handle_unknown,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length, int keep),PNG_EMPTY);
+   /* This is the function that gets called for unknown chunks.  The 'keep'
+    * argument is either non-zero for a known chunk that has been set to be
+    * handled as unknown or zero for an unknown chunk.  By default the function
+    * just skips the chunk or errors out if it is critical.
+    */
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\
+    defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
+PNG_INTERNAL_FUNCTION(int,png_chunk_unknown_handling,
+    (png_const_structrp png_ptr, png_uint_32 chunk_name),PNG_EMPTY);
+   /* Exactly as the API png_handle_as_unknown() except that the argument is a
+    * 32-bit chunk name, not a string.
+    */
+#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */
+
+/* Handle the transformations for reading and writing */
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_do_read_transformations,(png_structrp png_ptr,
+   png_row_infop row_info),PNG_EMPTY);
+#endif
+#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_do_write_transformations,(png_structrp png_ptr,
+   png_row_infop row_info),PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_init_read_transformations,(png_structrp png_ptr),
+    PNG_EMPTY);
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_push_read_chunk,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_sig,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_check_crc,(png_structrp png_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_save_buffer,(png_structrp png_ptr),
+    PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_restore_buffer,(png_structrp png_ptr,
+    png_bytep buffer, size_t buffer_length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_IDAT,(png_structrp png_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_process_IDAT_data,(png_structrp png_ptr,
+    png_bytep buffer, size_t buffer_length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_process_row,(png_structrp png_ptr),
+    PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_handle_unknown,(png_structrp png_ptr,
+   png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_have_info,(png_structrp png_ptr,
+   png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_have_end,(png_structrp png_ptr,
+   png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_have_row,(png_structrp png_ptr,
+    png_bytep row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_end,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_process_some_data,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_push_finish_row,(png_structrp png_ptr),
+    PNG_EMPTY);
+#  ifdef PNG_READ_tEXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_push_handle_tEXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_tEXt,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+#  endif
+#  ifdef PNG_READ_zTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_push_handle_zTXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_zTXt,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+#  endif
+#  ifdef PNG_READ_iTXt_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_push_handle_iTXt,(png_structrp png_ptr,
+    png_inforp info_ptr, png_uint_32 length),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_push_read_iTXt,(png_structrp png_ptr,
+    png_inforp info_ptr),PNG_EMPTY);
+#  endif
+
+#endif /* PROGRESSIVE_READ */
+
+/* Added at libpng version 1.6.0 */
+#ifdef PNG_GAMMA_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_colorspace_set_gamma,(png_const_structrp png_ptr,
+    png_colorspacerp colorspace, png_fixed_point gAMA), PNG_EMPTY);
+   /* Set the colorspace gamma with a value provided by the application or by
+    * the gAMA chunk on read.  The value will override anything set by an ICC
+    * profile.
+    */
+
+PNG_INTERNAL_FUNCTION(void,png_colorspace_sync_info,(png_const_structrp png_ptr,
+    png_inforp info_ptr), PNG_EMPTY);
+   /* Synchronize the info 'valid' flags with the colorspace */
+
+PNG_INTERNAL_FUNCTION(void,png_colorspace_sync,(png_const_structrp png_ptr,
+    png_inforp info_ptr), PNG_EMPTY);
+   /* Copy the png_struct colorspace to the info_struct and call the above to
+    * synchronize the flags.  Checks for NULL info_ptr and does nothing.
+    */
+#endif
+
+/* Added at libpng version 1.4.0 */
+#ifdef PNG_COLORSPACE_SUPPORTED
+/* These internal functions are for maintaining the colorspace structure within
+ * a png_info or png_struct (or, indeed, both).
+ */
+PNG_INTERNAL_FUNCTION(int,png_colorspace_set_chromaticities,
+   (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_xy *xy,
+    int preferred), PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(int,png_colorspace_set_endpoints,
+   (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_XYZ *XYZ,
+    int preferred), PNG_EMPTY);
+
+#ifdef PNG_sRGB_SUPPORTED
+PNG_INTERNAL_FUNCTION(int,png_colorspace_set_sRGB,(png_const_structrp png_ptr,
+   png_colorspacerp colorspace, int intent), PNG_EMPTY);
+   /* This does set the colorspace gAMA and cHRM values too, but doesn't set the
+    * flags to write them, if it returns false there was a problem and an error
+    * message has already been output (but the colorspace may still need to be
+    * synced to record the invalid flag).
+    */
+#endif /* sRGB */
+
+#ifdef PNG_iCCP_SUPPORTED
+PNG_INTERNAL_FUNCTION(int,png_colorspace_set_ICC,(png_const_structrp png_ptr,
+   png_colorspacerp colorspace, png_const_charp name,
+   png_uint_32 profile_length, png_const_bytep profile, int color_type),
+   PNG_EMPTY);
+   /* The 'name' is used for information only */
+
+/* Routines for checking parts of an ICC profile. */
+#ifdef PNG_READ_iCCP_SUPPORTED
+PNG_INTERNAL_FUNCTION(int,png_icc_check_length,(png_const_structrp png_ptr,
+   png_colorspacerp colorspace, png_const_charp name,
+   png_uint_32 profile_length), PNG_EMPTY);
+#endif /* READ_iCCP */
+PNG_INTERNAL_FUNCTION(int,png_icc_check_header,(png_const_structrp png_ptr,
+   png_colorspacerp colorspace, png_const_charp name,
+   png_uint_32 profile_length,
+   png_const_bytep profile /* first 132 bytes only */, int color_type),
+   PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(int,png_icc_check_tag_table,(png_const_structrp png_ptr,
+   png_colorspacerp colorspace, png_const_charp name,
+   png_uint_32 profile_length,
+   png_const_bytep profile /* header plus whole tag table */), PNG_EMPTY);
+#ifdef PNG_sRGB_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_icc_set_sRGB,(
+   png_const_structrp png_ptr, png_colorspacerp colorspace,
+   png_const_bytep profile, uLong adler), PNG_EMPTY);
+   /* 'adler' is the Adler32 checksum of the uncompressed profile data. It may
+    * be zero to indicate that it is not available.  It is used, if provided,
+    * as a fast check on the profile when checking to see if it is sRGB.
+    */
+#endif
+#endif /* iCCP */
+
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_colorspace_set_rgb_coefficients,
+   (png_structrp png_ptr), PNG_EMPTY);
+   /* Set the rgb_to_gray coefficients from the colorspace Y values */
+#endif /* READ_RGB_TO_GRAY */
+#endif /* COLORSPACE */
+
+/* Added at libpng version 1.4.0 */
+PNG_INTERNAL_FUNCTION(void,png_check_IHDR,(png_const_structrp png_ptr,
+    png_uint_32 width, png_uint_32 height, int bit_depth,
+    int color_type, int interlace_type, int compression_type,
+    int filter_type),PNG_EMPTY);
+
+/* Added at libpng version 1.5.10 */
+#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
+    defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_do_check_palette_indexes,
+   (png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY);
+#endif
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)
+PNG_INTERNAL_FUNCTION(void,png_fixed_error,(png_const_structrp png_ptr,
+   png_const_charp name),PNG_NORETURN);
+#endif
+
+/* Puts 'string' into 'buffer' at buffer[pos], taking care never to overwrite
+ * the end.  Always leaves the buffer nul terminated.  Never errors out (and
+ * there is no error code.)
+ */
+PNG_INTERNAL_FUNCTION(size_t,png_safecat,(png_charp buffer, size_t bufsize,
+   size_t pos, png_const_charp string),PNG_EMPTY);
+
+/* Various internal functions to handle formatted warning messages, currently
+ * only implemented for warnings.
+ */
+#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED)
+/* Utility to dump an unsigned value into a buffer, given a start pointer and
+ * and end pointer (which should point just *beyond* the end of the buffer!)
+ * Returns the pointer to the start of the formatted string.  This utility only
+ * does unsigned values.
+ */
+PNG_INTERNAL_FUNCTION(png_charp,png_format_number,(png_const_charp start,
+   png_charp end, int format, png_alloc_size_t number),PNG_EMPTY);
+
+/* Convenience macro that takes an array: */
+#define PNG_FORMAT_NUMBER(buffer,format,number) \
+   png_format_number(buffer, buffer + (sizeof buffer), format, number)
+
+/* Suggested size for a number buffer (enough for 64 bits and a sign!) */
+#define PNG_NUMBER_BUFFER_SIZE 24
+
+/* These are the integer formats currently supported, the name is formed from
+ * the standard printf(3) format string.
+ */
+#define PNG_NUMBER_FORMAT_u     1 /* chose unsigned API! */
+#define PNG_NUMBER_FORMAT_02u   2
+#define PNG_NUMBER_FORMAT_d     1 /* chose signed API! */
+#define PNG_NUMBER_FORMAT_02d   2
+#define PNG_NUMBER_FORMAT_x     3
+#define PNG_NUMBER_FORMAT_02x   4
+#define PNG_NUMBER_FORMAT_fixed 5 /* choose the signed API */
+#endif
+
+#ifdef PNG_WARNINGS_SUPPORTED
+/* New defines and members adding in libpng-1.5.4 */
+#  define PNG_WARNING_PARAMETER_SIZE 32
+#  define PNG_WARNING_PARAMETER_COUNT 8 /* Maximum 9; see pngerror.c */
+
+/* An l-value of this type has to be passed to the APIs below to cache the
+ * values of the parameters to a formatted warning message.
+ */
+typedef char png_warning_parameters[PNG_WARNING_PARAMETER_COUNT][
+   PNG_WARNING_PARAMETER_SIZE];
+
+PNG_INTERNAL_FUNCTION(void,png_warning_parameter,(png_warning_parameters p,
+   int number, png_const_charp string),PNG_EMPTY);
+   /* Parameters are limited in size to PNG_WARNING_PARAMETER_SIZE characters,
+    * including the trailing '\0'.
+    */
+PNG_INTERNAL_FUNCTION(void,png_warning_parameter_unsigned,
+   (png_warning_parameters p, int number, int format, png_alloc_size_t value),
+   PNG_EMPTY);
+   /* Use png_alloc_size_t because it is an unsigned type as big as any we
+    * need to output.  Use the following for a signed value.
+    */
+PNG_INTERNAL_FUNCTION(void,png_warning_parameter_signed,
+   (png_warning_parameters p, int number, int format, png_int_32 value),
+   PNG_EMPTY);
+
+PNG_INTERNAL_FUNCTION(void,png_formatted_warning,(png_const_structrp png_ptr,
+   png_warning_parameters p, png_const_charp message),PNG_EMPTY);
+   /* 'message' follows the X/Open approach of using @1, @2 to insert
+    * parameters previously supplied using the above functions.  Errors in
+    * specifying the parameters will simply result in garbage substitutions.
+    */
+#endif
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+/* Application errors (new in 1.6); use these functions (declared below) for
+ * errors in the parameters or order of API function calls on read.  The
+ * 'warning' should be used for an error that can be handled completely; the
+ * 'error' for one which can be handled safely but which may lose application
+ * information or settings.
+ *
+ * By default these both result in a png_error call prior to release, while in a
+ * released version the 'warning' is just a warning.  However if the application
+ * explicitly disables benign errors (explicitly permitting the code to lose
+ * information) they both turn into warnings.
+ *
+ * If benign errors aren't supported they end up as the corresponding base call
+ * (png_warning or png_error.)
+ */
+PNG_INTERNAL_FUNCTION(void,png_app_warning,(png_const_structrp png_ptr,
+   png_const_charp message),PNG_EMPTY);
+   /* The application provided invalid parameters to an API function or called
+    * an API function at the wrong time, libpng can completely recover.
+    */
+
+PNG_INTERNAL_FUNCTION(void,png_app_error,(png_const_structrp png_ptr,
+   png_const_charp message),PNG_EMPTY);
+   /* As above but libpng will ignore the call, or attempt some other partial
+    * recovery from the error.
+    */
+#else
+#  define png_app_warning(pp,s) png_warning(pp,s)
+#  define png_app_error(pp,s) png_error(pp,s)
+#endif
+
+PNG_INTERNAL_FUNCTION(void,png_chunk_report,(png_const_structrp png_ptr,
+   png_const_charp message, int error),PNG_EMPTY);
+   /* Report a recoverable issue in chunk data.  On read this is used to report
+    * a problem found while reading a particular chunk and the
+    * png_chunk_benign_error or png_chunk_warning function is used as
+    * appropriate.  On write this is used to report an error that comes from
+    * data set via an application call to a png_set_ API and png_app_error or
+    * png_app_warning is used as appropriate.
+    *
+    * The 'error' parameter must have one of the following values:
+    */
+#define PNG_CHUNK_WARNING     0 /* never an error */
+#define PNG_CHUNK_WRITE_ERROR 1 /* an error only on write */
+#define PNG_CHUNK_ERROR       2 /* always an error */
+
+/* ASCII to FP interfaces, currently only implemented if sCAL
+ * support is required.
+ */
+#if defined(PNG_sCAL_SUPPORTED)
+/* MAX_DIGITS is actually the maximum number of characters in an sCAL
+ * width or height, derived from the precision (number of significant
+ * digits - a build time settable option) and assumptions about the
+ * maximum ridiculous exponent.
+ */
+#define PNG_sCAL_MAX_DIGITS (PNG_sCAL_PRECISION+1/*.*/+1/*E*/+10/*exponent*/)
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_ascii_from_fp,(png_const_structrp png_ptr,
+   png_charp ascii, size_t size, double fp, unsigned int precision),
+   PNG_EMPTY);
+#endif /* FLOATING_POINT */
+
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_INTERNAL_FUNCTION(void,png_ascii_from_fixed,(png_const_structrp png_ptr,
+   png_charp ascii, size_t size, png_fixed_point fp),PNG_EMPTY);
+#endif /* FIXED_POINT */
+#endif /* sCAL */
+
+#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
+/* An internal API to validate the format of a floating point number.
+ * The result is the index of the next character.  If the number is
+ * not valid it will be the index of a character in the supposed number.
+ *
+ * The format of a number is defined in the PNG extensions specification
+ * and this API is strictly conformant to that spec, not anyone elses!
+ *
+ * The format as a regular expression is:
+ *
+ * [+-]?[0-9]+.?([Ee][+-]?[0-9]+)?
+ *
+ * or:
+ *
+ * [+-]?.[0-9]+(.[0-9]+)?([Ee][+-]?[0-9]+)?
+ *
+ * The complexity is that either integer or fraction must be present and the
+ * fraction is permitted to have no digits only if the integer is present.
+ *
+ * NOTE: The dangling E problem.
+ *   There is a PNG valid floating point number in the following:
+ *
+ *       PNG floating point numbers are not greedy.
+ *
+ *   Working this out requires *TWO* character lookahead (because of the
+ *   sign), the parser does not do this - it will fail at the 'r' - this
+ *   doesn't matter for PNG sCAL chunk values, but it requires more care
+ *   if the value were ever to be embedded in something more complex.  Use
+ *   ANSI-C strtod if you need the lookahead.
+ */
+/* State table for the parser. */
+#define PNG_FP_INTEGER    0  /* before or in integer */
+#define PNG_FP_FRACTION   1  /* before or in fraction */
+#define PNG_FP_EXPONENT   2  /* before or in exponent */
+#define PNG_FP_STATE      3  /* mask for the above */
+#define PNG_FP_SAW_SIGN   4  /* Saw +/- in current state */
+#define PNG_FP_SAW_DIGIT  8  /* Saw a digit in current state */
+#define PNG_FP_SAW_DOT   16  /* Saw a dot in current state */
+#define PNG_FP_SAW_E     32  /* Saw an E (or e) in current state */
+#define PNG_FP_SAW_ANY   60  /* Saw any of the above 4 */
+
+/* These three values don't affect the parser.  They are set but not used.
+ */
+#define PNG_FP_WAS_VALID 64  /* Preceding substring is a valid fp number */
+#define PNG_FP_NEGATIVE 128  /* A negative number, including "-0" */
+#define PNG_FP_NONZERO  256  /* A non-zero value */
+#define PNG_FP_STICKY   448  /* The above three flags */
+
+/* This is available for the caller to store in 'state' if required.  Do not
+ * call the parser after setting it (the parser sometimes clears it.)
+ */
+#define PNG_FP_INVALID  512  /* Available for callers as a distinct value */
+
+/* Result codes for the parser (boolean - true means ok, false means
+ * not ok yet.)
+ */
+#define PNG_FP_MAYBE      0  /* The number may be valid in the future */
+#define PNG_FP_OK         1  /* The number is valid */
+
+/* Tests on the sticky non-zero and negative flags.  To pass these checks
+ * the state must also indicate that the whole number is valid - this is
+ * achieved by testing PNG_FP_SAW_DIGIT (see the implementation for why this
+ * is equivalent to PNG_FP_OK above.)
+ */
+#define PNG_FP_NZ_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NEGATIVE | PNG_FP_NONZERO)
+   /* NZ_MASK: the string is valid and a non-zero negative value */
+#define PNG_FP_Z_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NONZERO)
+   /* Z MASK: the string is valid and a non-zero value. */
+   /* PNG_FP_SAW_DIGIT: the string is valid. */
+#define PNG_FP_IS_ZERO(state) (((state) & PNG_FP_Z_MASK) == PNG_FP_SAW_DIGIT)
+#define PNG_FP_IS_POSITIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_Z_MASK)
+#define PNG_FP_IS_NEGATIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_NZ_MASK)
+
+/* The actual parser.  This can be called repeatedly. It updates
+ * the index into the string and the state variable (which must
+ * be initialized to 0).  It returns a result code, as above.  There
+ * is no point calling the parser any more if it fails to advance to
+ * the end of the string - it is stuck on an invalid character (or
+ * terminated by '\0').
+ *
+ * Note that the pointer will consume an E or even an E+ and then leave
+ * a 'maybe' state even though a preceding integer.fraction is valid.
+ * The PNG_FP_WAS_VALID flag indicates that a preceding substring was
+ * a valid number.  It's possible to recover from this by calling
+ * the parser again (from the start, with state 0) but with a string
+ * that omits the last character (i.e. set the size to the index of
+ * the problem character.)  This has not been tested within libpng.
+ */
+PNG_INTERNAL_FUNCTION(int,png_check_fp_number,(png_const_charp string,
+   size_t size, int *statep, size_t *whereami),PNG_EMPTY);
+
+/* This is the same but it checks a complete string and returns true
+ * only if it just contains a floating point number.  As of 1.5.4 this
+ * function also returns the state at the end of parsing the number if
+ * it was valid (otherwise it returns 0.)  This can be used for testing
+ * for negative or zero values using the sticky flag.
+ */
+PNG_INTERNAL_FUNCTION(int,png_check_fp_string,(png_const_charp string,
+   size_t size),PNG_EMPTY);
+#endif /* pCAL || sCAL */
+
+#if defined(PNG_GAMMA_SUPPORTED) ||\
+    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED)
+/* Added at libpng version 1.5.0 */
+/* This is a utility to provide a*times/div (rounded) and indicate
+ * if there is an overflow.  The result is a boolean - false (0)
+ * for overflow, true (1) if no overflow, in which case *res
+ * holds the result.
+ */
+PNG_INTERNAL_FUNCTION(int,png_muldiv,(png_fixed_point_p res, png_fixed_point a,
+   png_int_32 multiplied_by, png_int_32 divided_by),PNG_EMPTY);
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
+/* Same deal, but issue a warning on overflow and return 0. */
+PNG_INTERNAL_FUNCTION(png_fixed_point,png_muldiv_warn,
+   (png_const_structrp png_ptr, png_fixed_point a, png_int_32 multiplied_by,
+   png_int_32 divided_by),PNG_EMPTY);
+#endif
+
+#ifdef PNG_GAMMA_SUPPORTED
+/* Calculate a reciprocal - used for gamma values.  This returns
+ * 0 if the argument is 0 in order to maintain an undefined value;
+ * there are no warnings.
+ */
+PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal,(png_fixed_point a),
+   PNG_EMPTY);
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+/* The same but gives a reciprocal of the product of two fixed point
+ * values.  Accuracy is suitable for gamma calculations but this is
+ * not exact - use png_muldiv for that.  Only required at present on read.
+ */
+PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal2,(png_fixed_point a,
+   png_fixed_point b),PNG_EMPTY);
+#endif
+
+/* Return true if the gamma value is significantly different from 1.0 */
+PNG_INTERNAL_FUNCTION(int,png_gamma_significant,(png_fixed_point gamma_value),
+   PNG_EMPTY);
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+/* Internal fixed point gamma correction.  These APIs are called as
+ * required to convert single values - they don't need to be fast,
+ * they are not used when processing image pixel values.
+ *
+ * While the input is an 'unsigned' value it must actually be the
+ * correct bit value - 0..255 or 0..65535 as required.
+ */
+PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_correct,(png_structrp png_ptr,
+   unsigned int value, png_fixed_point gamma_value),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_16bit_correct,(unsigned int value,
+   png_fixed_point gamma_value),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(png_byte,png_gamma_8bit_correct,(unsigned int value,
+   png_fixed_point gamma_value),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_destroy_gamma_table,(png_structrp png_ptr),
+   PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_build_gamma_table,(png_structrp png_ptr,
+   int bit_depth),PNG_EMPTY);
+#endif
+
+/* SIMPLIFIED READ/WRITE SUPPORT */
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\
+   defined(PNG_SIMPLIFIED_WRITE_SUPPORTED)
+/* The internal structure that png_image::opaque points to. */
+typedef struct png_control
+{
+   png_structp png_ptr;
+   png_infop   info_ptr;
+   png_voidp   error_buf;           /* Always a jmp_buf at present. */
+
+   png_const_bytep memory;          /* Memory buffer. */
+   size_t          size;            /* Size of the memory buffer. */
+
+   unsigned int for_write       :1; /* Otherwise it is a read structure */
+   unsigned int owned_file      :1; /* We own the file in io_ptr */
+} png_control;
+
+/* Return the pointer to the jmp_buf from a png_control: necessary because C
+ * does not reveal the type of the elements of jmp_buf.
+ */
+#ifdef __cplusplus
+#  define png_control_jmp_buf(pc) (((jmp_buf*)((pc)->error_buf))[0])
+#else
+#  define png_control_jmp_buf(pc) ((pc)->error_buf)
+#endif
+
+/* Utility to safely execute a piece of libpng code catching and logging any
+ * errors that might occur.  Returns true on success, false on failure (either
+ * of the function or as a result of a png_error.)
+ */
+PNG_INTERNAL_CALLBACK(void,png_safe_error,(png_structp png_ptr,
+   png_const_charp error_message),PNG_NORETURN);
+
+#ifdef PNG_WARNINGS_SUPPORTED
+PNG_INTERNAL_CALLBACK(void,png_safe_warning,(png_structp png_ptr,
+   png_const_charp warning_message),PNG_EMPTY);
+#else
+#  define png_safe_warning 0/*dummy argument*/
+#endif
+
+PNG_INTERNAL_FUNCTION(int,png_safe_execute,(png_imagep image,
+   int (*function)(png_voidp), png_voidp arg),PNG_EMPTY);
+
+/* Utility to log an error; this also cleans up the png_image; the function
+ * always returns 0 (false).
+ */
+PNG_INTERNAL_FUNCTION(int,png_image_error,(png_imagep image,
+   png_const_charp error_message),PNG_EMPTY);
+
+#ifndef PNG_SIMPLIFIED_READ_SUPPORTED
+/* png_image_free is used by the write code but not exported */
+PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
+#endif /* !SIMPLIFIED_READ */
+
+#endif /* SIMPLIFIED READ/WRITE */
+
+/* These are initialization functions for hardware specific PNG filter
+ * optimizations; list these here then select the appropriate one at compile
+ * time using the macro PNG_FILTER_OPTIMIZATIONS.  If the macro is not defined
+ * the generic code is used.
+ */
+#ifdef PNG_FILTER_OPTIMIZATIONS
+PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr,
+   unsigned int bpp), PNG_EMPTY);
+   /* Just declare the optimization that will be used */
+#else
+   /* List *all* the possible optimizations here - this branch is required if
+    * the builder of libpng passes the definition of PNG_FILTER_OPTIMIZATIONS in
+    * CFLAGS in place of CPPFLAGS *and* uses symbol prefixing.
+    */
+#  if PNG_ARM_NEON_OPT > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon,
+   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#endif
+
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
+   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#endif
+
+#  if PNG_MIPS_MMI_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips,
+   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#  endif
+
+#  if PNG_INTEL_SSE_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
+   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#  endif
+#endif
+
+#if PNG_LOONGARCH_LSX_OPT > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx,
+    (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+#endif
+
+PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
+   png_const_charp key, png_bytep new_key), PNG_EMPTY);
+
+#if PNG_ARM_NEON_IMPLEMENTATION == 1
+PNG_INTERNAL_FUNCTION(void,
+                      png_riffle_palette_neon,
+                      (png_structrp),
+                      PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(int,
+                      png_do_expand_palette_rgba8_neon,
+                      (png_structrp,
+                       png_row_infop,
+                       png_const_bytep,
+                       const png_bytepp,
+                       const png_bytepp),
+                      PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(int,
+                      png_do_expand_palette_rgb8_neon,
+                      (png_structrp,
+                       png_row_infop,
+                       png_const_bytep,
+                       const png_bytepp,
+                       const png_bytepp),
+                      PNG_EMPTY);
+#endif
+
+/* Maintainer: Put new private prototypes here ^ */
+
+#include "pngdebug.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PNG_VERSION_INFO_ONLY */
+#endif /* PNGPRIV_H */
diff --git a/reg-io/png/lpng/pngread.c b/reg-io/png/lpng/pngread.c
new file mode 100644
index 00000000..0d54f34d
--- /dev/null
+++ b/reg-io/png/lpng/pngread.c
@@ -0,0 +1,4224 @@
+
+/* pngread.c - read a PNG file
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * This file contains routines that an application calls directly to
+ * read a PNG file or stream.
+ */
+
+#include "pngpriv.h"
+#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) && defined(PNG_STDIO_SUPPORTED)
+#  include <errno.h>
+#endif
+
+#ifdef PNG_READ_SUPPORTED
+
+/* Create a PNG structure for reading, and allocate any memory needed. */
+PNG_FUNCTION(png_structp,PNGAPI
+png_create_read_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
+{
+#ifndef PNG_USER_MEM_SUPPORTED
+   png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
+        error_fn, warn_fn, NULL, NULL, NULL);
+#else
+   return png_create_read_struct_2(user_png_ver, error_ptr, error_fn,
+        warn_fn, NULL, NULL, NULL);
+}
+
+/* Alternate create PNG structure for reading, and allocate any memory
+ * needed.
+ */
+PNG_FUNCTION(png_structp,PNGAPI
+png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
+{
+   png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
+       error_fn, warn_fn, mem_ptr, malloc_fn, free_fn);
+#endif /* USER_MEM */
+
+   if (png_ptr != NULL)
+   {
+      png_ptr->mode = PNG_IS_READ_STRUCT;
+
+      /* Added in libpng-1.6.0; this can be used to detect a read structure if
+       * required (it will be zero in a write structure.)
+       */
+#     ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+         png_ptr->IDAT_read_size = PNG_IDAT_READ_SIZE;
+#     endif
+
+#     ifdef PNG_BENIGN_READ_ERRORS_SUPPORTED
+         png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN;
+
+         /* In stable builds only warn if an application error can be completely
+          * handled.
+          */
+#        if PNG_RELEASE_BUILD
+            png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN;
+#        endif
+#     endif
+
+      /* TODO: delay this, it can be done in png_init_io (if the app doesn't
+       * do it itself) avoiding setting the default function if it is not
+       * required.
+       */
+      png_set_read_fn(png_ptr, NULL, NULL);
+   }
+
+   return png_ptr;
+}
+
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the information before the actual image data.  This has been
+ * changed in v0.90 to allow reading a file that already has the magic
+ * bytes read from the stream.  You can tell libpng how many bytes have
+ * been read from the beginning of the stream (up to the maximum of 8)
+ * via png_set_sig_bytes(), and we will only check the remaining bytes
+ * here.  The application can then have access to the signature bytes we
+ * read if it is determined that this isn't a valid PNG file.
+ */
+void PNGAPI
+png_read_info(png_structrp png_ptr, png_inforp info_ptr)
+{
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   int keep;
+#endif
+
+   png_debug(1, "in png_read_info");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* Read and check the PNG file signature. */
+   png_read_sig(png_ptr, info_ptr);
+
+   for (;;)
+   {
+      png_uint_32 length = png_read_chunk_header(png_ptr);
+      png_uint_32 chunk_name = png_ptr->chunk_name;
+
+      /* IDAT logic needs to happen here to simplify getting the two flags
+       * right.
+       */
+      if (chunk_name == png_IDAT)
+      {
+         if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+            png_chunk_error(png_ptr, "Missing IHDR before IDAT");
+
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+             (png_ptr->mode & PNG_HAVE_PLTE) == 0)
+            png_chunk_error(png_ptr, "Missing PLTE before IDAT");
+
+         else if ((png_ptr->mode & PNG_AFTER_IDAT) != 0)
+            png_chunk_benign_error(png_ptr, "Too many IDATs found");
+
+         png_ptr->mode |= PNG_HAVE_IDAT;
+      }
+
+      else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+      {
+         png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+         png_ptr->mode |= PNG_AFTER_IDAT;
+      }
+
+      /* This should be a binary subdivision search or a hash for
+       * matching the chunk name rather than a linear search.
+       */
+      if (chunk_name == png_IHDR)
+         png_handle_IHDR(png_ptr, info_ptr, length);
+
+      else if (chunk_name == png_IEND)
+         png_handle_IEND(png_ptr, info_ptr, length);
+
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
+      {
+         png_handle_unknown(png_ptr, info_ptr, length, keep);
+
+         if (chunk_name == png_PLTE)
+            png_ptr->mode |= PNG_HAVE_PLTE;
+
+         else if (chunk_name == png_IDAT)
+         {
+            png_ptr->idat_size = 0; /* It has been consumed */
+            break;
+         }
+      }
+#endif
+      else if (chunk_name == png_PLTE)
+         png_handle_PLTE(png_ptr, info_ptr, length);
+
+      else if (chunk_name == png_IDAT)
+      {
+         png_ptr->idat_size = length;
+         break;
+      }
+
+#ifdef PNG_READ_bKGD_SUPPORTED
+      else if (chunk_name == png_bKGD)
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+      else if (chunk_name == png_cHRM)
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_eXIf_SUPPORTED
+      else if (chunk_name == png_eXIf)
+         png_handle_eXIf(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_gAMA_SUPPORTED
+      else if (chunk_name == png_gAMA)
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_hIST_SUPPORTED
+      else if (chunk_name == png_hIST)
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_oFFs_SUPPORTED
+      else if (chunk_name == png_oFFs)
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_pCAL_SUPPORTED
+      else if (chunk_name == png_pCAL)
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sCAL_SUPPORTED
+      else if (chunk_name == png_sCAL)
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_pHYs_SUPPORTED
+      else if (chunk_name == png_pHYs)
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sBIT_SUPPORTED
+      else if (chunk_name == png_sBIT)
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sRGB_SUPPORTED
+      else if (chunk_name == png_sRGB)
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+      else if (chunk_name == png_iCCP)
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sPLT_SUPPORTED
+      else if (chunk_name == png_sPLT)
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tEXt_SUPPORTED
+      else if (chunk_name == png_tEXt)
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tIME_SUPPORTED
+      else if (chunk_name == png_tIME)
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tRNS_SUPPORTED
+      else if (chunk_name == png_tRNS)
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_zTXt_SUPPORTED
+      else if (chunk_name == png_zTXt)
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_iTXt_SUPPORTED
+      else if (chunk_name == png_iTXt)
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+
+      else
+         png_handle_unknown(png_ptr, info_ptr, length,
+             PNG_HANDLE_CHUNK_AS_DEFAULT);
+   }
+}
+#endif /* SEQUENTIAL_READ */
+
+/* Optional call to update the users info_ptr structure */
+void PNGAPI
+png_read_update_info(png_structrp png_ptr, png_inforp info_ptr)
+{
+   png_debug(1, "in png_read_update_info");
+
+   if (png_ptr != NULL)
+   {
+      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
+      {
+         png_read_start_row(png_ptr);
+
+#        ifdef PNG_READ_TRANSFORMS_SUPPORTED
+            png_read_transform_info(png_ptr, info_ptr);
+#        else
+            PNG_UNUSED(info_ptr)
+#        endif
+      }
+
+      /* New in 1.6.0 this avoids the bug of doing the initializations twice */
+      else
+         png_app_error(png_ptr,
+             "png_read_update_info/png_start_read_image: duplicate call");
+   }
+}
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Initialize palette, background, etc, after transformations
+ * are set, but before any reading takes place.  This allows
+ * the user to obtain a gamma-corrected palette, for example.
+ * If the user doesn't call this, we will do it ourselves.
+ */
+void PNGAPI
+png_start_read_image(png_structrp png_ptr)
+{
+   png_debug(1, "in png_start_read_image");
+
+   if (png_ptr != NULL)
+   {
+      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
+         png_read_start_row(png_ptr);
+
+      /* New in 1.6.0 this avoids the bug of doing the initializations twice */
+      else
+         png_app_error(png_ptr,
+             "png_start_read_image/png_read_update_info: duplicate call");
+   }
+}
+#endif /* SEQUENTIAL_READ */
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+/* Undoes intrapixel differencing,
+ * NOTE: this is apparently only supported in the 'sequential' reader.
+ */
+static void
+png_do_read_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_intrapixel");
+
+   if (
+       (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp) = (png_byte)((256 + *rp + *(rp + 1)) & 0xff);
+            *(rp+2) = (png_byte)((256 + *(rp + 2) + *(rp + 1)) & 0xff);
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (png_uint_32)(*(rp    ) << 8) | *(rp + 1);
+            png_uint_32 s1   = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3);
+            png_uint_32 s2   = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5);
+            png_uint_32 red  = (s0 + s1 + 65536) & 0xffff;
+            png_uint_32 blue = (s2 + s1 + 65536) & 0xffff;
+            *(rp    ) = (png_byte)((red >> 8) & 0xff);
+            *(rp + 1) = (png_byte)(red & 0xff);
+            *(rp + 4) = (png_byte)((blue >> 8) & 0xff);
+            *(rp + 5) = (png_byte)(blue & 0xff);
+         }
+      }
+   }
+}
+#endif /* MNG_FEATURES */
+
+void PNGAPI
+png_read_row(png_structrp png_ptr, png_bytep row, png_bytep dsp_row)
+{
+   png_row_info row_info;
+
+   if (png_ptr == NULL)
+      return;
+
+   png_debug2(1, "in png_read_row (row %lu, pass %d)",
+       (unsigned long)png_ptr->row_number, png_ptr->pass);
+
+   /* png_read_start_row sets the information (in particular iwidth) for this
+    * interlace pass.
+    */
+   if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
+      png_read_start_row(png_ptr);
+
+   /* 1.5.6: row_info moved out of png_struct to a local here. */
+   row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */
+   row_info.color_type = png_ptr->color_type;
+   row_info.bit_depth = png_ptr->bit_depth;
+   row_info.channels = png_ptr->channels;
+   row_info.pixel_depth = png_ptr->pixel_depth;
+   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
+
+#ifdef PNG_WARNINGS_SUPPORTED
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+   /* Check for transforms that have been set but were defined out */
+#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
+   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
+      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
+   if ((png_ptr->transformations & PNG_FILLER) != 0)
+      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
+    !defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACK) != 0)
+      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
+   if ((png_ptr->transformations & PNG_SHIFT) != 0)
+      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
+   if ((png_ptr->transformations & PNG_BGR) != 0)
+      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined");
+#endif
+
+#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
+   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
+      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined");
+#endif
+   }
+#endif /* WARNINGS */
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   /* If interlaced and we do not need a new row, combine row and return.
+    * Notice that the pixels we have from previous rows have been transformed
+    * already; we can only combine like with like (transformed or
+    * untransformed) and, because of the libpng API for interlaced images, this
+    * means we must transform before de-interlacing.
+    */
+   if (png_ptr->interlaced != 0 &&
+       (png_ptr->transformations & PNG_INTERLACE) != 0)
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if (png_ptr->row_number & 0x07)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 1:
+            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 4))
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 3:
+            if ((png_ptr->row_number & 3) || png_ptr->width < 3)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 4:
+            if ((png_ptr->row_number & 3) != 2)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 2))
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 5:
+            if ((png_ptr->row_number & 1) || png_ptr->width < 2)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         default:
+         case 6:
+            if ((png_ptr->row_number & 1) == 0)
+            {
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+      }
+   }
+#endif
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) == 0)
+      png_error(png_ptr, "Invalid attempt to read row data");
+
+   /* Fill the row with IDAT data: */
+   png_ptr->row_buf[0]=255; /* to force error if no data was found */
+   png_read_IDAT_data(png_ptr, png_ptr->row_buf, row_info.rowbytes + 1);
+
+   if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
+   {
+      if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
+         png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
+             png_ptr->prev_row + 1, png_ptr->row_buf[0]);
+      else
+         png_error(png_ptr, "bad adaptive filter value");
+   }
+
+   /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before
+    * 1.5.6, while the buffer really is this big in current versions of libpng
+    * it may not be in the future, so this was changed just to copy the
+    * interlaced count:
+    */
+   memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
+       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
+   }
+#endif
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+   if (png_ptr->transformations || png_ptr->num_palette_max >= 0)
+      png_do_read_transformations(png_ptr, &row_info);
+#endif
+
+   /* The transformed pixel depth should match the depth now in row_info. */
+   if (png_ptr->transformed_pixel_depth == 0)
+   {
+      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
+      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
+         png_error(png_ptr, "sequential row overflow");
+   }
+
+   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
+      png_error(png_ptr, "internal sequential row size calculation error");
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   /* Expand interlaced rows to full size */
+   if (png_ptr->interlaced != 0 &&
+      (png_ptr->transformations & PNG_INTERLACE) != 0)
+   {
+      if (png_ptr->pass < 6)
+         png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass,
+             png_ptr->transformations);
+
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row, 1/*display*/);
+
+      if (row != NULL)
+         png_combine_row(png_ptr, row, 0/*row*/);
+   }
+
+   else
+#endif
+   {
+      if (row != NULL)
+         png_combine_row(png_ptr, row, -1/*ignored*/);
+
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row, -1/*ignored*/);
+   }
+   png_read_finish_row(png_ptr);
+
+   if (png_ptr->read_row_fn != NULL)
+      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+
+}
+#endif /* SEQUENTIAL_READ */
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read one or more rows of image data.  If the image is interlaced,
+ * and png_set_interlace_handling() has been called, the rows need to
+ * contain the contents of the rows from the previous pass.  If the
+ * image has alpha or transparency, and png_handle_alpha()[*] has been
+ * called, the rows contents must be initialized to the contents of the
+ * screen.
+ *
+ * "row" holds the actual image, and pixels are placed in it
+ * as they arrive.  If the image is displayed after each pass, it will
+ * appear to "sparkle" in.  "display_row" can be used to display a
+ * "chunky" progressive image, with finer detail added as it becomes
+ * available.  If you do not want this "chunky" display, you may pass
+ * NULL for display_row.  If you do not want the sparkle display, and
+ * you have not called png_handle_alpha(), you may pass NULL for rows.
+ * If you have called png_handle_alpha(), and the image has either an
+ * alpha channel or a transparency chunk, you must provide a buffer for
+ * rows.  In this case, you do not have to provide a display_row buffer
+ * also, but you may.  If the image is not interlaced, or if you have
+ * not called png_set_interlace_handling(), the display_row buffer will
+ * be ignored, so pass NULL to it.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+
+void PNGAPI
+png_read_rows(png_structrp png_ptr, png_bytepp row,
+    png_bytepp display_row, png_uint_32 num_rows)
+{
+   png_uint_32 i;
+   png_bytepp rp;
+   png_bytepp dp;
+
+   png_debug(1, "in png_read_rows");
+
+   if (png_ptr == NULL)
+      return;
+
+   rp = row;
+   dp = display_row;
+   if (rp != NULL && dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp++;
+         png_bytep dptr = *dp++;
+
+         png_read_row(png_ptr, rptr, dptr);
+      }
+
+   else if (rp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp;
+         png_read_row(png_ptr, rptr, NULL);
+         rp++;
+      }
+
+   else if (dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep dptr = *dp;
+         png_read_row(png_ptr, NULL, dptr);
+         dp++;
+      }
+}
+#endif /* SEQUENTIAL_READ */
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the entire image.  If the image has an alpha channel or a tRNS
+ * chunk, and you have called png_handle_alpha()[*], you will need to
+ * initialize the image to the current image that PNG will be overlaying.
+ * We set the num_rows again here, in case it was incorrectly set in
+ * png_read_start_row() by a call to png_read_update_info() or
+ * png_start_read_image() if png_set_interlace_handling() wasn't called
+ * prior to either of these functions like it should have been.  You can
+ * only call this function once.  If you desire to have an image for
+ * each pass of a interlaced image, use png_read_rows() instead.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+void PNGAPI
+png_read_image(png_structrp png_ptr, png_bytepp image)
+{
+   png_uint_32 i, image_height;
+   int pass, j;
+   png_bytepp rp;
+
+   png_debug(1, "in png_read_image");
+
+   if (png_ptr == NULL)
+      return;
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
+   {
+      pass = png_set_interlace_handling(png_ptr);
+      /* And make sure transforms are initialized. */
+      png_start_read_image(png_ptr);
+   }
+   else
+   {
+      if (png_ptr->interlaced != 0 &&
+          (png_ptr->transformations & PNG_INTERLACE) == 0)
+      {
+         /* Caller called png_start_read_image or png_read_update_info without
+          * first turning on the PNG_INTERLACE transform.  We can fix this here,
+          * but the caller should do it!
+          */
+         png_warning(png_ptr, "Interlace handling should be turned on when "
+             "using png_read_image");
+         /* Make sure this is set correctly */
+         png_ptr->num_rows = png_ptr->height;
+      }
+
+      /* Obtain the pass number, which also turns on the PNG_INTERLACE flag in
+       * the above error case.
+       */
+      pass = png_set_interlace_handling(png_ptr);
+   }
+#else
+   if (png_ptr->interlaced)
+      png_error(png_ptr,
+          "Cannot read interlaced image -- interlace handler disabled");
+
+   pass = 1;
+#endif
+
+   image_height=png_ptr->height;
+
+   for (j = 0; j < pass; j++)
+   {
+      rp = image;
+      for (i = 0; i < image_height; i++)
+      {
+         png_read_row(png_ptr, *rp, NULL);
+         rp++;
+      }
+   }
+}
+#endif /* SEQUENTIAL_READ */
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the end of the PNG file.  Will not read past the end of the
+ * file, will verify the end is accurate, and will read any comments
+ * or time information at the end of the file, if info is not NULL.
+ */
+void PNGAPI
+png_read_end(png_structrp png_ptr, png_inforp info_ptr)
+{
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   int keep;
+#endif
+
+   png_debug(1, "in png_read_end");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* If png_read_end is called in the middle of reading the rows there may
+    * still be pending IDAT data and an owned zstream.  Deal with this here.
+    */
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   if (png_chunk_unknown_handling(png_ptr, png_IDAT) == 0)
+#endif
+      png_read_finish_IDAT(png_ptr);
+
+#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   /* Report invalid palette index; added at libng-1.5.10 */
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+       png_ptr->num_palette_max >= png_ptr->num_palette)
+      png_benign_error(png_ptr, "Read palette index exceeding num_palette");
+#endif
+
+   do
+   {
+      png_uint_32 length = png_read_chunk_header(png_ptr);
+      png_uint_32 chunk_name = png_ptr->chunk_name;
+
+      if (chunk_name != png_IDAT)
+         png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+      if (chunk_name == png_IEND)
+         png_handle_IEND(png_ptr, info_ptr, length);
+
+      else if (chunk_name == png_IHDR)
+         png_handle_IHDR(png_ptr, info_ptr, length);
+
+      else if (info_ptr == NULL)
+         png_crc_finish(png_ptr, length);
+
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0)
+      {
+         if (chunk_name == png_IDAT)
+         {
+            if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED))
+                || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0)
+               png_benign_error(png_ptr, ".Too many IDATs found");
+         }
+         png_handle_unknown(png_ptr, info_ptr, length, keep);
+         if (chunk_name == png_PLTE)
+            png_ptr->mode |= PNG_HAVE_PLTE;
+      }
+#endif
+
+      else if (chunk_name == png_IDAT)
+      {
+         /* Zero length IDATs are legal after the last IDAT has been
+          * read, but not after other chunks have been read.  1.6 does not
+          * always read all the deflate data; specifically it cannot be relied
+          * upon to read the Adler32 at the end.  If it doesn't ignore IDAT
+          * chunks which are longer than zero as well:
+          */
+         if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED))
+             || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0)
+            png_benign_error(png_ptr, "..Too many IDATs found");
+
+         png_crc_finish(png_ptr, length);
+      }
+      else if (chunk_name == png_PLTE)
+         png_handle_PLTE(png_ptr, info_ptr, length);
+
+#ifdef PNG_READ_bKGD_SUPPORTED
+      else if (chunk_name == png_bKGD)
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+      else if (chunk_name == png_cHRM)
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_eXIf_SUPPORTED
+      else if (chunk_name == png_eXIf)
+         png_handle_eXIf(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_gAMA_SUPPORTED
+      else if (chunk_name == png_gAMA)
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_hIST_SUPPORTED
+      else if (chunk_name == png_hIST)
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_oFFs_SUPPORTED
+      else if (chunk_name == png_oFFs)
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_pCAL_SUPPORTED
+      else if (chunk_name == png_pCAL)
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sCAL_SUPPORTED
+      else if (chunk_name == png_sCAL)
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_pHYs_SUPPORTED
+      else if (chunk_name == png_pHYs)
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sBIT_SUPPORTED
+      else if (chunk_name == png_sBIT)
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sRGB_SUPPORTED
+      else if (chunk_name == png_sRGB)
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+      else if (chunk_name == png_iCCP)
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_sPLT_SUPPORTED
+      else if (chunk_name == png_sPLT)
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tEXt_SUPPORTED
+      else if (chunk_name == png_tEXt)
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tIME_SUPPORTED
+      else if (chunk_name == png_tIME)
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_tRNS_SUPPORTED
+      else if (chunk_name == png_tRNS)
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_zTXt_SUPPORTED
+      else if (chunk_name == png_zTXt)
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+
+#ifdef PNG_READ_iTXt_SUPPORTED
+      else if (chunk_name == png_iTXt)
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+
+      else
+         png_handle_unknown(png_ptr, info_ptr, length,
+             PNG_HANDLE_CHUNK_AS_DEFAULT);
+   } while ((png_ptr->mode & PNG_HAVE_IEND) == 0);
+}
+#endif /* SEQUENTIAL_READ */
+
+/* Free all memory used in the read struct */
+static void
+png_read_destroy(png_structrp png_ptr)
+{
+   png_debug(1, "in png_read_destroy");
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+   png_destroy_gamma_table(png_ptr);
+#endif
+
+   png_free(png_ptr, png_ptr->big_row_buf);
+   png_ptr->big_row_buf = NULL;
+   png_free(png_ptr, png_ptr->big_prev_row);
+   png_ptr->big_prev_row = NULL;
+   png_free(png_ptr, png_ptr->read_buffer);
+   png_ptr->read_buffer = NULL;
+
+#ifdef PNG_READ_QUANTIZE_SUPPORTED
+   png_free(png_ptr, png_ptr->palette_lookup);
+   png_ptr->palette_lookup = NULL;
+   png_free(png_ptr, png_ptr->quantize_index);
+   png_ptr->quantize_index = NULL;
+#endif
+
+   if ((png_ptr->free_me & PNG_FREE_PLTE) != 0)
+   {
+      png_zfree(png_ptr, png_ptr->palette);
+      png_ptr->palette = NULL;
+   }
+   png_ptr->free_me &= ~PNG_FREE_PLTE;
+
+#if defined(PNG_tRNS_SUPPORTED) || \
+    defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if ((png_ptr->free_me & PNG_FREE_TRNS) != 0)
+   {
+      png_free(png_ptr, png_ptr->trans_alpha);
+      png_ptr->trans_alpha = NULL;
+   }
+   png_ptr->free_me &= ~PNG_FREE_TRNS;
+#endif
+
+   inflateEnd(&png_ptr->zstream);
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_free(png_ptr, png_ptr->save_buffer);
+   png_ptr->save_buffer = NULL;
+#endif
+
+#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) && \
+   defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   png_free(png_ptr, png_ptr->unknown_chunk.data);
+   png_ptr->unknown_chunk.data = NULL;
+#endif
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+   png_free(png_ptr, png_ptr->chunk_list);
+   png_ptr->chunk_list = NULL;
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED) && \
+    defined(PNG_ARM_NEON_IMPLEMENTATION)
+   png_free(png_ptr, png_ptr->riffled_palette);
+   png_ptr->riffled_palette = NULL;
+#endif
+
+   /* NOTE: the 'setjmp' buffer may still be allocated and the memory and error
+    * callbacks are still set at this point.  They are required to complete the
+    * destruction of the png_struct itself.
+    */
+}
+
+/* Free all memory used by the read */
+void PNGAPI
+png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr,
+    png_infopp end_info_ptr_ptr)
+{
+   png_structrp png_ptr = NULL;
+
+   png_debug(1, "in png_destroy_read_struct");
+
+   if (png_ptr_ptr != NULL)
+      png_ptr = *png_ptr_ptr;
+
+   if (png_ptr == NULL)
+      return;
+
+   /* libpng 1.6.0: use the API to destroy info structs to ensure consistent
+    * behavior.  Prior to 1.6.0 libpng did extra 'info' destruction in this API.
+    * The extra was, apparently, unnecessary yet this hides memory leak bugs.
+    */
+   png_destroy_info_struct(png_ptr, end_info_ptr_ptr);
+   png_destroy_info_struct(png_ptr, info_ptr_ptr);
+
+   *png_ptr_ptr = NULL;
+   png_read_destroy(png_ptr);
+   png_destroy_png_struct(png_ptr);
+}
+
+void PNGAPI
+png_set_read_status_fn(png_structrp png_ptr, png_read_status_ptr read_row_fn)
+{
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->read_row_fn = read_row_fn;
+}
+
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+void PNGAPI
+png_read_png(png_structrp png_ptr, png_inforp info_ptr,
+    int transforms, voidp params)
+{
+   png_debug(1, "in png_read_png");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* png_read_info() gives us all of the information from the
+    * PNG file before the first IDAT (image data chunk).
+    */
+   png_read_info(png_ptr, info_ptr);
+   if (info_ptr->height > PNG_UINT_32_MAX/(sizeof (png_bytep)))
+      png_error(png_ptr, "Image is too high to process with png_read_png()");
+
+   /* -------------- image transformations start here ------------------- */
+   /* libpng 1.6.10: add code to cause a png_app_error if a selected TRANSFORM
+    * is not implemented.  This will only happen in de-configured (non-default)
+    * libpng builds.  The results can be unexpected - png_read_png may return
+    * short or mal-formed rows because the transform is skipped.
+    */
+
+   /* Tell libpng to strip 16-bit/color files down to 8 bits per color.
+    */
+   if ((transforms & PNG_TRANSFORM_SCALE_16) != 0)
+      /* Added at libpng-1.5.4. "strip_16" produces the same result that it
+       * did in earlier versions, while "scale_16" is now more accurate.
+       */
+#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
+      png_set_scale_16(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SCALE_16 not supported");
+#endif
+
+   /* If both SCALE and STRIP are required pngrtran will effectively cancel the
+    * latter by doing SCALE first.  This is ok and allows apps not to check for
+    * which is supported to get the right answer.
+    */
+   if ((transforms & PNG_TRANSFORM_STRIP_16) != 0)
+#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
+      png_set_strip_16(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_16 not supported");
+#endif
+
+   /* Strip alpha bytes from the input data without combining with
+    * the background (not recommended).
+    */
+   if ((transforms & PNG_TRANSFORM_STRIP_ALPHA) != 0)
+#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
+      png_set_strip_alpha(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_ALPHA not supported");
+#endif
+
+   /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single
+    * byte into separate bytes (useful for paletted and grayscale images).
+    */
+   if ((transforms & PNG_TRANSFORM_PACKING) != 0)
+#ifdef PNG_READ_PACK_SUPPORTED
+      png_set_packing(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported");
+#endif
+
+   /* Change the order of packed pixels to least significant bit first
+    * (not useful if you are using png_set_packing).
+    */
+   if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0)
+#ifdef PNG_READ_PACKSWAP_SUPPORTED
+      png_set_packswap(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported");
+#endif
+
+   /* Expand paletted colors into true RGB triplets
+    * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel
+    * Expand paletted or RGB images with transparency to full alpha
+    * channels so the data will be available as RGBA quartets.
+    */
+   if ((transforms & PNG_TRANSFORM_EXPAND) != 0)
+#ifdef PNG_READ_EXPAND_SUPPORTED
+      png_set_expand(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND not supported");
+#endif
+
+   /* We don't handle background color or gamma transformation or quantizing.
+    */
+
+   /* Invert monochrome files to have 0 as white and 1 as black
+    */
+   if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0)
+#ifdef PNG_READ_INVERT_SUPPORTED
+      png_set_invert_mono(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported");
+#endif
+
+   /* If you want to shift the pixel values from the range [0,255] or
+    * [0,65535] to the original [0,7] or [0,31], or whatever range the
+    * colors were originally in:
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT) != 0)
+#ifdef PNG_READ_SHIFT_SUPPORTED
+      if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
+         png_set_shift(png_ptr, &info_ptr->sig_bit);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported");
+#endif
+
+   /* Flip the RGB pixels to BGR (or RGBA to BGRA) */
+   if ((transforms & PNG_TRANSFORM_BGR) != 0)
+#ifdef PNG_READ_BGR_SUPPORTED
+      png_set_bgr(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported");
+#endif
+
+   /* Swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR) */
+   if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0)
+#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
+      png_set_swap_alpha(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported");
+#endif
+
+   /* Swap bytes of 16-bit files to least significant byte first */
+   if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0)
+#ifdef PNG_READ_SWAP_SUPPORTED
+      png_set_swap(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported");
+#endif
+
+/* Added at libpng-1.2.41 */
+   /* Invert the alpha channel from opacity to transparency */
+   if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0)
+#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
+      png_set_invert_alpha(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported");
+#endif
+
+/* Added at libpng-1.2.41 */
+   /* Expand grayscale image to RGB */
+   if ((transforms & PNG_TRANSFORM_GRAY_TO_RGB) != 0)
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+      png_set_gray_to_rgb(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_GRAY_TO_RGB not supported");
+#endif
+
+/* Added at libpng-1.5.4 */
+   if ((transforms & PNG_TRANSFORM_EXPAND_16) != 0)
+#ifdef PNG_READ_EXPAND_16_SUPPORTED
+      png_set_expand_16(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND_16 not supported");
+#endif
+
+   /* We don't handle adding filler bytes */
+
+   /* We use png_read_image and rely on that for interlace handling, but we also
+    * call png_read_update_info therefore must turn on interlace handling now:
+    */
+   (void)png_set_interlace_handling(png_ptr);
+
+   /* Optional call to gamma correct and add the background to the palette
+    * and update info structure.  REQUIRED if you are expecting libpng to
+    * update the palette for you (i.e., you selected such a transform above).
+    */
+   png_read_update_info(png_ptr, info_ptr);
+
+   /* -------------- image transformations end here ------------------- */
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+   if (info_ptr->row_pointers == NULL)
+   {
+      png_uint_32 iptr;
+
+      info_ptr->row_pointers = png_voidcast(png_bytepp, png_malloc(png_ptr,
+          info_ptr->height * (sizeof (png_bytep))));
+
+      for (iptr=0; iptr<info_ptr->height; iptr++)
+         info_ptr->row_pointers[iptr] = NULL;
+
+      info_ptr->free_me |= PNG_FREE_ROWS;
+
+      for (iptr = 0; iptr < info_ptr->height; iptr++)
+         info_ptr->row_pointers[iptr] = png_voidcast(png_bytep,
+             png_malloc(png_ptr, info_ptr->rowbytes));
+   }
+
+   png_read_image(png_ptr, info_ptr->row_pointers);
+   info_ptr->valid |= PNG_INFO_IDAT;
+
+   /* Read rest of file, and get additional chunks in info_ptr - REQUIRED */
+   png_read_end(png_ptr, info_ptr);
+
+   PNG_UNUSED(params)
+}
+#endif /* INFO_IMAGE */
+#endif /* SEQUENTIAL_READ */
+
+#ifdef PNG_SIMPLIFIED_READ_SUPPORTED
+/* SIMPLIFIED READ
+ *
+ * This code currently relies on the sequential reader, though it could easily
+ * be made to work with the progressive one.
+ */
+/* Arguments to png_image_finish_read: */
+
+/* Encoding of PNG data (used by the color-map code) */
+#  define P_NOTSET  0 /* File encoding not yet known */
+#  define P_sRGB    1 /* 8-bit encoded to sRGB gamma */
+#  define P_LINEAR  2 /* 16-bit linear: not encoded, NOT pre-multiplied! */
+#  define P_FILE    3 /* 8-bit encoded to file gamma, not sRGB or linear */
+#  define P_LINEAR8 4 /* 8-bit linear: only from a file value */
+
+/* Color-map processing: after libpng has run on the PNG image further
+ * processing may be needed to convert the data to color-map indices.
+ */
+#define PNG_CMAP_NONE      0
+#define PNG_CMAP_GA        1 /* Process GA data to a color-map with alpha */
+#define PNG_CMAP_TRANS     2 /* Process GA data to a background index */
+#define PNG_CMAP_RGB       3 /* Process RGB data */
+#define PNG_CMAP_RGB_ALPHA 4 /* Process RGBA data */
+
+/* The following document where the background is for each processing case. */
+#define PNG_CMAP_NONE_BACKGROUND      256
+#define PNG_CMAP_GA_BACKGROUND        231
+#define PNG_CMAP_TRANS_BACKGROUND     254
+#define PNG_CMAP_RGB_BACKGROUND       256
+#define PNG_CMAP_RGB_ALPHA_BACKGROUND 216
+
+typedef struct
+{
+   /* Arguments: */
+   png_imagep image;
+   png_voidp  buffer;
+   png_int_32 row_stride;
+   png_voidp  colormap;
+   png_const_colorp background;
+   /* Local variables: */
+   png_voidp       local_row;
+   png_voidp       first_row;
+   ptrdiff_t       row_bytes;           /* step between rows */
+   int             file_encoding;       /* E_ values above */
+   png_fixed_point gamma_to_linear;     /* For P_FILE, reciprocal of gamma */
+   int             colormap_processing; /* PNG_CMAP_ values above */
+} png_image_read_control;
+
+/* Do all the *safe* initialization - 'safe' means that png_error won't be
+ * called, so setting up the jmp_buf is not required.  This means that anything
+ * called from here must *not* call png_malloc - it has to call png_malloc_warn
+ * instead so that control is returned safely back to this routine.
+ */
+static int
+png_image_read_init(png_imagep image)
+{
+   if (image->opaque == NULL)
+   {
+      png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, image,
+          png_safe_error, png_safe_warning);
+
+      /* And set the rest of the structure to NULL to ensure that the various
+       * fields are consistent.
+       */
+      memset(image, 0, (sizeof *image));
+      image->version = PNG_IMAGE_VERSION;
+
+      if (png_ptr != NULL)
+      {
+         png_infop info_ptr = png_create_info_struct(png_ptr);
+
+         if (info_ptr != NULL)
+         {
+            png_controlp control = png_voidcast(png_controlp,
+                png_malloc_warn(png_ptr, (sizeof *control)));
+
+            if (control != NULL)
+            {
+               memset(control, 0, (sizeof *control));
+
+               control->png_ptr = png_ptr;
+               control->info_ptr = info_ptr;
+               control->for_write = 0;
+
+               image->opaque = control;
+               return 1;
+            }
+
+            /* Error clean up */
+            png_destroy_info_struct(png_ptr, &info_ptr);
+         }
+
+         png_destroy_read_struct(&png_ptr, NULL, NULL);
+      }
+
+      return png_image_error(image, "png_image_read: out of memory");
+   }
+
+   return png_image_error(image, "png_image_read: opaque pointer not NULL");
+}
+
+/* Utility to find the base format of a PNG file from a png_struct. */
+static png_uint_32
+png_image_format(png_structrp png_ptr)
+{
+   png_uint_32 format = 0;
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+      format |= PNG_FORMAT_FLAG_COLOR;
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
+      format |= PNG_FORMAT_FLAG_ALPHA;
+
+   /* Use png_ptr here, not info_ptr, because by examination png_handle_tRNS
+    * sets the png_struct fields; that's all we are interested in here.  The
+    * precise interaction with an app call to png_set_tRNS and PNG file reading
+    * is unclear.
+    */
+   else if (png_ptr->num_trans > 0)
+      format |= PNG_FORMAT_FLAG_ALPHA;
+
+   if (png_ptr->bit_depth == 16)
+      format |= PNG_FORMAT_FLAG_LINEAR;
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_PALETTE) != 0)
+      format |= PNG_FORMAT_FLAG_COLORMAP;
+
+   return format;
+}
+
+/* Is the given gamma significantly different from sRGB?  The test is the same
+ * one used in pngrtran.c when deciding whether to do gamma correction.  The
+ * arithmetic optimizes the division by using the fact that the inverse of the
+ * file sRGB gamma is 2.2
+ */
+static int
+png_gamma_not_sRGB(png_fixed_point g)
+{
+   if (g < PNG_FP_1)
+   {
+      /* An uninitialized gamma is assumed to be sRGB for the simplified API. */
+      if (g == 0)
+         return 0;
+
+      return png_gamma_significant((g * 11 + 2)/5 /* i.e. *2.2, rounded */);
+   }
+
+   return 1;
+}
+
+/* Do the main body of a 'png_image_begin_read' function; read the PNG file
+ * header and fill in all the information.  This is executed in a safe context,
+ * unlike the init routine above.
+ */
+static int
+png_image_read_header(png_voidp argument)
+{
+   png_imagep image = png_voidcast(png_imagep, argument);
+   png_structrp png_ptr = image->opaque->png_ptr;
+   png_inforp info_ptr = image->opaque->info_ptr;
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+   png_set_benign_errors(png_ptr, 1/*warn*/);
+#endif
+   png_read_info(png_ptr, info_ptr);
+
+   /* Do this the fast way; just read directly out of png_struct. */
+   image->width = png_ptr->width;
+   image->height = png_ptr->height;
+
+   {
+      png_uint_32 format = png_image_format(png_ptr);
+
+      image->format = format;
+
+#ifdef PNG_COLORSPACE_SUPPORTED
+      /* Does the colorspace match sRGB?  If there is no color endpoint
+       * (colorant) information assume yes, otherwise require the
+       * 'ENDPOINTS_MATCHP_sRGB' colorspace flag to have been set.  If the
+       * colorspace has been determined to be invalid ignore it.
+       */
+      if ((format & PNG_FORMAT_FLAG_COLOR) != 0 && ((png_ptr->colorspace.flags
+         & (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB|
+            PNG_COLORSPACE_INVALID)) == PNG_COLORSPACE_HAVE_ENDPOINTS))
+         image->flags |= PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB;
+#endif
+   }
+
+   /* We need the maximum number of entries regardless of the format the
+    * application sets here.
+    */
+   {
+      png_uint_32 cmap_entries;
+
+      switch (png_ptr->color_type)
+      {
+         case PNG_COLOR_TYPE_GRAY:
+            cmap_entries = 1U << png_ptr->bit_depth;
+            break;
+
+         case PNG_COLOR_TYPE_PALETTE:
+            cmap_entries = (png_uint_32)png_ptr->num_palette;
+            break;
+
+         default:
+            cmap_entries = 256;
+            break;
+      }
+
+      if (cmap_entries > 256)
+         cmap_entries = 256;
+
+      image->colormap_entries = cmap_entries;
+   }
+
+   return 1;
+}
+
+#ifdef PNG_STDIO_SUPPORTED
+int PNGAPI
+png_image_begin_read_from_stdio(png_imagep image, FILE* file)
+{
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (file != NULL)
+      {
+         if (png_image_read_init(image) != 0)
+         {
+            /* This is slightly evil, but png_init_io doesn't do anything other
+             * than this and we haven't changed the standard IO functions so
+             * this saves a 'safe' function.
+             */
+            image->opaque->png_ptr->io_ptr = file;
+            return png_safe_execute(image, png_image_read_header, image);
+         }
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_begin_read_from_stdio: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_begin_read_from_stdio: incorrect PNG_IMAGE_VERSION");
+
+   return 0;
+}
+
+int PNGAPI
+png_image_begin_read_from_file(png_imagep image, const char *file_name)
+{
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (file_name != NULL)
+      {
+         FILE *fp = fopen(file_name, "rb");
+
+         if (fp != NULL)
+         {
+            if (png_image_read_init(image) != 0)
+            {
+               image->opaque->png_ptr->io_ptr = fp;
+               image->opaque->owned_file = 1;
+               return png_safe_execute(image, png_image_read_header, image);
+            }
+
+            /* Clean up: just the opened file. */
+            (void)fclose(fp);
+         }
+
+         else
+            return png_image_error(image, strerror(errno));
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_begin_read_from_file: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_begin_read_from_file: incorrect PNG_IMAGE_VERSION");
+
+   return 0;
+}
+#endif /* STDIO */
+
+static void PNGCBAPI
+png_image_memory_read(png_structp png_ptr, png_bytep out, size_t need)
+{
+   if (png_ptr != NULL)
+   {
+      png_imagep image = png_voidcast(png_imagep, png_ptr->io_ptr);
+      if (image != NULL)
+      {
+         png_controlp cp = image->opaque;
+         if (cp != NULL)
+         {
+            png_const_bytep memory = cp->memory;
+            size_t size = cp->size;
+
+            if (memory != NULL && size >= need)
+            {
+               memcpy(out, memory, need);
+               cp->memory = memory + need;
+               cp->size = size - need;
+               return;
+            }
+
+            png_error(png_ptr, "read beyond end of data");
+         }
+      }
+
+      png_error(png_ptr, "invalid memory read");
+   }
+}
+
+int PNGAPI png_image_begin_read_from_memory(png_imagep image,
+    png_const_voidp memory, size_t size)
+{
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (memory != NULL && size > 0)
+      {
+         if (png_image_read_init(image) != 0)
+         {
+            /* Now set the IO functions to read from the memory buffer and
+             * store it into io_ptr.  Again do this in-place to avoid calling a
+             * libpng function that requires error handling.
+             */
+            image->opaque->memory = png_voidcast(png_const_bytep, memory);
+            image->opaque->size = size;
+            image->opaque->png_ptr->io_ptr = image;
+            image->opaque->png_ptr->read_data_fn = png_image_memory_read;
+
+            return png_safe_execute(image, png_image_read_header, image);
+         }
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_begin_read_from_memory: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_begin_read_from_memory: incorrect PNG_IMAGE_VERSION");
+
+   return 0;
+}
+
+/* Utility function to skip chunks that are not used by the simplified image
+ * read functions and an appropriate macro to call it.
+ */
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+static void
+png_image_skip_unused_chunks(png_structrp png_ptr)
+{
+   /* Prepare the reader to ignore all recognized chunks whose data will not
+    * be used, i.e., all chunks recognized by libpng except for those
+    * involved in basic image reading:
+    *
+    *    IHDR, PLTE, IDAT, IEND
+    *
+    * Or image data handling:
+    *
+    *    tRNS, bKGD, gAMA, cHRM, sRGB, [iCCP] and sBIT.
+    *
+    * This provides a small performance improvement and eliminates any
+    * potential vulnerability to security problems in the unused chunks.
+    *
+    * At present the iCCP chunk data isn't used, so iCCP chunk can be ignored
+    * too.  This allows the simplified API to be compiled without iCCP support,
+    * however if the support is there the chunk is still checked to detect
+    * errors (which are unfortunately quite common.)
+    */
+   {
+         static const png_byte chunks_to_process[] = {
+            98,  75,  71,  68, '\0',  /* bKGD */
+            99,  72,  82,  77, '\0',  /* cHRM */
+           103,  65,  77,  65, '\0',  /* gAMA */
+#        ifdef PNG_READ_iCCP_SUPPORTED
+           105,  67,  67,  80, '\0',  /* iCCP */
+#        endif
+           115,  66,  73,  84, '\0',  /* sBIT */
+           115,  82,  71,  66, '\0',  /* sRGB */
+           };
+
+       /* Ignore unknown chunks and all other chunks except for the
+        * IHDR, PLTE, tRNS, IDAT, and IEND chunks.
+        */
+       png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_NEVER,
+           NULL, -1);
+
+       /* But do not ignore image data handling chunks */
+       png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_AS_DEFAULT,
+           chunks_to_process, (int)/*SAFE*/(sizeof chunks_to_process)/5);
+   }
+}
+
+#  define PNG_SKIP_CHUNKS(p) png_image_skip_unused_chunks(p)
+#else
+#  define PNG_SKIP_CHUNKS(p) ((void)0)
+#endif /* HANDLE_AS_UNKNOWN */
+
+/* The following macro gives the exact rounded answer for all values in the
+ * range 0..255 (it actually divides by 51.2, but the rounding still generates
+ * the correct numbers 0..5
+ */
+#define PNG_DIV51(v8) (((v8) * 5 + 130) >> 8)
+
+/* Utility functions to make particular color-maps */
+static void
+set_file_encoding(png_image_read_control *display)
+{
+   png_fixed_point g = display->image->opaque->png_ptr->colorspace.gamma;
+   if (png_gamma_significant(g) != 0)
+   {
+      if (png_gamma_not_sRGB(g) != 0)
+      {
+         display->file_encoding = P_FILE;
+         display->gamma_to_linear = png_reciprocal(g);
+      }
+
+      else
+         display->file_encoding = P_sRGB;
+   }
+
+   else
+      display->file_encoding = P_LINEAR8;
+}
+
+static unsigned int
+decode_gamma(png_image_read_control *display, png_uint_32 value, int encoding)
+{
+   if (encoding == P_FILE) /* double check */
+      encoding = display->file_encoding;
+
+   if (encoding == P_NOTSET) /* must be the file encoding */
+   {
+      set_file_encoding(display);
+      encoding = display->file_encoding;
+   }
+
+   switch (encoding)
+   {
+      case P_FILE:
+         value = png_gamma_16bit_correct(value*257, display->gamma_to_linear);
+         break;
+
+      case P_sRGB:
+         value = png_sRGB_table[value];
+         break;
+
+      case P_LINEAR:
+         break;
+
+      case P_LINEAR8:
+         value *= 257;
+         break;
+
+#ifdef __GNUC__
+      default:
+         png_error(display->image->opaque->png_ptr,
+             "unexpected encoding (internal error)");
+#endif
+   }
+
+   return value;
+}
+
+static png_uint_32
+png_colormap_compose(png_image_read_control *display,
+    png_uint_32 foreground, int foreground_encoding, png_uint_32 alpha,
+    png_uint_32 background, int encoding)
+{
+   /* The file value is composed on the background, the background has the given
+    * encoding and so does the result, the file is encoded with P_FILE and the
+    * file and alpha are 8-bit values.  The (output) encoding will always be
+    * P_LINEAR or P_sRGB.
+    */
+   png_uint_32 f = decode_gamma(display, foreground, foreground_encoding);
+   png_uint_32 b = decode_gamma(display, background, encoding);
+
+   /* The alpha is always an 8-bit value (it comes from the palette), the value
+    * scaled by 255 is what PNG_sRGB_FROM_LINEAR requires.
+    */
+   f = f * alpha + b * (255-alpha);
+
+   if (encoding == P_LINEAR)
+   {
+      /* Scale to 65535; divide by 255, approximately (in fact this is extremely
+       * accurate, it divides by 255.00000005937181414556, with no overflow.)
+       */
+      f *= 257; /* Now scaled by 65535 */
+      f += f >> 16;
+      f = (f+32768) >> 16;
+   }
+
+   else /* P_sRGB */
+      f = PNG_sRGB_FROM_LINEAR(f);
+
+   return f;
+}
+
+/* NOTE: P_LINEAR values to this routine must be 16-bit, but P_FILE values must
+ * be 8-bit.
+ */
+static void
+png_create_colormap_entry(png_image_read_control *display,
+    png_uint_32 ip, png_uint_32 red, png_uint_32 green, png_uint_32 blue,
+    png_uint_32 alpha, int encoding)
+{
+   png_imagep image = display->image;
+   int output_encoding = (image->format & PNG_FORMAT_FLAG_LINEAR) != 0 ?
+       P_LINEAR : P_sRGB;
+   int convert_to_Y = (image->format & PNG_FORMAT_FLAG_COLOR) == 0 &&
+       (red != green || green != blue);
+
+   if (ip > 255)
+      png_error(image->opaque->png_ptr, "color-map index out of range");
+
+   /* Update the cache with whether the file gamma is significantly different
+    * from sRGB.
+    */
+   if (encoding == P_FILE)
+   {
+      if (display->file_encoding == P_NOTSET)
+         set_file_encoding(display);
+
+      /* Note that the cached value may be P_FILE too, but if it is then the
+       * gamma_to_linear member has been set.
+       */
+      encoding = display->file_encoding;
+   }
+
+   if (encoding == P_FILE)
+   {
+      png_fixed_point g = display->gamma_to_linear;
+
+      red = png_gamma_16bit_correct(red*257, g);
+      green = png_gamma_16bit_correct(green*257, g);
+      blue = png_gamma_16bit_correct(blue*257, g);
+
+      if (convert_to_Y != 0 || output_encoding == P_LINEAR)
+      {
+         alpha *= 257;
+         encoding = P_LINEAR;
+      }
+
+      else
+      {
+         red = PNG_sRGB_FROM_LINEAR(red * 255);
+         green = PNG_sRGB_FROM_LINEAR(green * 255);
+         blue = PNG_sRGB_FROM_LINEAR(blue * 255);
+         encoding = P_sRGB;
+      }
+   }
+
+   else if (encoding == P_LINEAR8)
+   {
+      /* This encoding occurs quite frequently in test cases because PngSuite
+       * includes a gAMA 1.0 chunk with most images.
+       */
+      red *= 257;
+      green *= 257;
+      blue *= 257;
+      alpha *= 257;
+      encoding = P_LINEAR;
+   }
+
+   else if (encoding == P_sRGB &&
+       (convert_to_Y  != 0 || output_encoding == P_LINEAR))
+   {
+      /* The values are 8-bit sRGB values, but must be converted to 16-bit
+       * linear.
+       */
+      red = png_sRGB_table[red];
+      green = png_sRGB_table[green];
+      blue = png_sRGB_table[blue];
+      alpha *= 257;
+      encoding = P_LINEAR;
+   }
+
+   /* This is set if the color isn't gray but the output is. */
+   if (encoding == P_LINEAR)
+   {
+      if (convert_to_Y != 0)
+      {
+         /* NOTE: these values are copied from png_do_rgb_to_gray */
+         png_uint_32 y = (png_uint_32)6968 * red  + (png_uint_32)23434 * green +
+            (png_uint_32)2366 * blue;
+
+         if (output_encoding == P_LINEAR)
+            y = (y + 16384) >> 15;
+
+         else
+         {
+            /* y is scaled by 32768, we need it scaled by 255: */
+            y = (y + 128) >> 8;
+            y *= 255;
+            y = PNG_sRGB_FROM_LINEAR((y + 64) >> 7);
+            alpha = PNG_DIV257(alpha);
+            encoding = P_sRGB;
+         }
+
+         blue = red = green = y;
+      }
+
+      else if (output_encoding == P_sRGB)
+      {
+         red = PNG_sRGB_FROM_LINEAR(red * 255);
+         green = PNG_sRGB_FROM_LINEAR(green * 255);
+         blue = PNG_sRGB_FROM_LINEAR(blue * 255);
+         alpha = PNG_DIV257(alpha);
+         encoding = P_sRGB;
+      }
+   }
+
+   if (encoding != output_encoding)
+      png_error(image->opaque->png_ptr, "bad encoding (internal error)");
+
+   /* Store the value. */
+   {
+#     ifdef PNG_FORMAT_AFIRST_SUPPORTED
+         int afirst = (image->format & PNG_FORMAT_FLAG_AFIRST) != 0 &&
+            (image->format & PNG_FORMAT_FLAG_ALPHA) != 0;
+#     else
+#        define afirst 0
+#     endif
+#     ifdef PNG_FORMAT_BGR_SUPPORTED
+         int bgr = (image->format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0;
+#     else
+#        define bgr 0
+#     endif
+
+      if (output_encoding == P_LINEAR)
+      {
+         png_uint_16p entry = png_voidcast(png_uint_16p, display->colormap);
+
+         entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format);
+
+         /* The linear 16-bit values must be pre-multiplied by the alpha channel
+          * value, if less than 65535 (this is, effectively, composite on black
+          * if the alpha channel is removed.)
+          */
+         switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format))
+         {
+            case 4:
+               entry[afirst ? 0 : 3] = (png_uint_16)alpha;
+               /* FALLTHROUGH */
+
+            case 3:
+               if (alpha < 65535)
+               {
+                  if (alpha > 0)
+                  {
+                     blue = (blue * alpha + 32767U)/65535U;
+                     green = (green * alpha + 32767U)/65535U;
+                     red = (red * alpha + 32767U)/65535U;
+                  }
+
+                  else
+                     red = green = blue = 0;
+               }
+               entry[afirst + (2 ^ bgr)] = (png_uint_16)blue;
+               entry[afirst + 1] = (png_uint_16)green;
+               entry[afirst + bgr] = (png_uint_16)red;
+               break;
+
+            case 2:
+               entry[1 ^ afirst] = (png_uint_16)alpha;
+               /* FALLTHROUGH */
+
+            case 1:
+               if (alpha < 65535)
+               {
+                  if (alpha > 0)
+                     green = (green * alpha + 32767U)/65535U;
+
+                  else
+                     green = 0;
+               }
+               entry[afirst] = (png_uint_16)green;
+               break;
+
+            default:
+               break;
+         }
+      }
+
+      else /* output encoding is P_sRGB */
+      {
+         png_bytep entry = png_voidcast(png_bytep, display->colormap);
+
+         entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format);
+
+         switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format))
+         {
+            case 4:
+               entry[afirst ? 0 : 3] = (png_byte)alpha;
+               /* FALLTHROUGH */
+            case 3:
+               entry[afirst + (2 ^ bgr)] = (png_byte)blue;
+               entry[afirst + 1] = (png_byte)green;
+               entry[afirst + bgr] = (png_byte)red;
+               break;
+
+            case 2:
+               entry[1 ^ afirst] = (png_byte)alpha;
+               /* FALLTHROUGH */
+            case 1:
+               entry[afirst] = (png_byte)green;
+               break;
+
+            default:
+               break;
+         }
+      }
+
+#     ifdef afirst
+#        undef afirst
+#     endif
+#     ifdef bgr
+#        undef bgr
+#     endif
+   }
+}
+
+static int
+make_gray_file_colormap(png_image_read_control *display)
+{
+   unsigned int i;
+
+   for (i=0; i<256; ++i)
+      png_create_colormap_entry(display, i, i, i, i, 255, P_FILE);
+
+   return (int)i;
+}
+
+static int
+make_gray_colormap(png_image_read_control *display)
+{
+   unsigned int i;
+
+   for (i=0; i<256; ++i)
+      png_create_colormap_entry(display, i, i, i, i, 255, P_sRGB);
+
+   return (int)i;
+}
+#define PNG_GRAY_COLORMAP_ENTRIES 256
+
+static int
+make_ga_colormap(png_image_read_control *display)
+{
+   unsigned int i, a;
+
+   /* Alpha is retained, the output will be a color-map with entries
+    * selected by six levels of alpha.  One transparent entry, 6 gray
+    * levels for all the intermediate alpha values, leaving 230 entries
+    * for the opaque grays.  The color-map entries are the six values
+    * [0..5]*51, the GA processing uses PNG_DIV51(value) to find the
+    * relevant entry.
+    *
+    * if (alpha > 229) // opaque
+    * {
+    *    // The 231 entries are selected to make the math below work:
+    *    base = 0;
+    *    entry = (231 * gray + 128) >> 8;
+    * }
+    * else if (alpha < 26) // transparent
+    * {
+    *    base = 231;
+    *    entry = 0;
+    * }
+    * else // partially opaque
+    * {
+    *    base = 226 + 6 * PNG_DIV51(alpha);
+    *    entry = PNG_DIV51(gray);
+    * }
+    */
+   i = 0;
+   while (i < 231)
+   {
+      unsigned int gray = (i * 256 + 115) / 231;
+      png_create_colormap_entry(display, i++, gray, gray, gray, 255, P_sRGB);
+   }
+
+   /* 255 is used here for the component values for consistency with the code
+    * that undoes premultiplication in pngwrite.c.
+    */
+   png_create_colormap_entry(display, i++, 255, 255, 255, 0, P_sRGB);
+
+   for (a=1; a<5; ++a)
+   {
+      unsigned int g;
+
+      for (g=0; g<6; ++g)
+         png_create_colormap_entry(display, i++, g*51, g*51, g*51, a*51,
+             P_sRGB);
+   }
+
+   return (int)i;
+}
+
+#define PNG_GA_COLORMAP_ENTRIES 256
+
+static int
+make_rgb_colormap(png_image_read_control *display)
+{
+   unsigned int i, r;
+
+   /* Build a 6x6x6 opaque RGB cube */
+   for (i=r=0; r<6; ++r)
+   {
+      unsigned int g;
+
+      for (g=0; g<6; ++g)
+      {
+         unsigned int b;
+
+         for (b=0; b<6; ++b)
+            png_create_colormap_entry(display, i++, r*51, g*51, b*51, 255,
+                P_sRGB);
+      }
+   }
+
+   return (int)i;
+}
+
+#define PNG_RGB_COLORMAP_ENTRIES 216
+
+/* Return a palette index to the above palette given three 8-bit sRGB values. */
+#define PNG_RGB_INDEX(r,g,b) \
+   ((png_byte)(6 * (6 * PNG_DIV51(r) + PNG_DIV51(g)) + PNG_DIV51(b)))
+
+static int
+png_image_read_colormap(png_voidp argument)
+{
+   png_image_read_control *display =
+      png_voidcast(png_image_read_control*, argument);
+   png_imagep image = display->image;
+
+   png_structrp png_ptr = image->opaque->png_ptr;
+   png_uint_32 output_format = image->format;
+   int output_encoding = (output_format & PNG_FORMAT_FLAG_LINEAR) != 0 ?
+      P_LINEAR : P_sRGB;
+
+   unsigned int cmap_entries;
+   unsigned int output_processing;        /* Output processing option */
+   unsigned int data_encoding = P_NOTSET; /* Encoding libpng must produce */
+
+   /* Background information; the background color and the index of this color
+    * in the color-map if it exists (else 256).
+    */
+   unsigned int background_index = 256;
+   png_uint_32 back_r, back_g, back_b;
+
+   /* Flags to accumulate things that need to be done to the input. */
+   int expand_tRNS = 0;
+
+   /* Exclude the NYI feature of compositing onto a color-mapped buffer; it is
+    * very difficult to do, the results look awful, and it is difficult to see
+    * what possible use it is because the application can't control the
+    * color-map.
+    */
+   if (((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0 ||
+         png_ptr->num_trans > 0) /* alpha in input */ &&
+      ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0) /* no alpha in output */)
+   {
+      if (output_encoding == P_LINEAR) /* compose on black */
+         back_b = back_g = back_r = 0;
+
+      else if (display->background == NULL /* no way to remove it */)
+         png_error(png_ptr,
+             "background color must be supplied to remove alpha/transparency");
+
+      /* Get a copy of the background color (this avoids repeating the checks
+       * below.)  The encoding is 8-bit sRGB or 16-bit linear, depending on the
+       * output format.
+       */
+      else
+      {
+         back_g = display->background->green;
+         if ((output_format & PNG_FORMAT_FLAG_COLOR) != 0)
+         {
+            back_r = display->background->red;
+            back_b = display->background->blue;
+         }
+         else
+            back_b = back_r = back_g;
+      }
+   }
+
+   else if (output_encoding == P_LINEAR)
+      back_b = back_r = back_g = 65535;
+
+   else
+      back_b = back_r = back_g = 255;
+
+   /* Default the input file gamma if required - this is necessary because
+    * libpng assumes that if no gamma information is present the data is in the
+    * output format, but the simplified API deduces the gamma from the input
+    * format.
+    */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) == 0)
+   {
+      /* Do this directly, not using the png_colorspace functions, to ensure
+       * that it happens even if the colorspace is invalid (though probably if
+       * it is the setting will be ignored)  Note that the same thing can be
+       * achieved at the application interface with png_set_gAMA.
+       */
+      if (png_ptr->bit_depth == 16 &&
+         (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0)
+         png_ptr->colorspace.gamma = PNG_GAMMA_LINEAR;
+
+      else
+         png_ptr->colorspace.gamma = PNG_GAMMA_sRGB_INVERSE;
+
+      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
+   }
+
+   /* Decide what to do based on the PNG color type of the input data.  The
+    * utility function png_create_colormap_entry deals with most aspects of the
+    * output transformations; this code works out how to produce bytes of
+    * color-map entries from the original format.
+    */
+   switch (png_ptr->color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+         if (png_ptr->bit_depth <= 8)
+         {
+            /* There at most 256 colors in the output, regardless of
+             * transparency.
+             */
+            unsigned int step, i, val, trans = 256/*ignore*/, back_alpha = 0;
+
+            cmap_entries = 1U << png_ptr->bit_depth;
+            if (cmap_entries > image->colormap_entries)
+               png_error(png_ptr, "gray[8] color-map: too few entries");
+
+            step = 255 / (cmap_entries - 1);
+            output_processing = PNG_CMAP_NONE;
+
+            /* If there is a tRNS chunk then this either selects a transparent
+             * value or, if the output has no alpha, the background color.
+             */
+            if (png_ptr->num_trans > 0)
+            {
+               trans = png_ptr->trans_color.gray;
+
+               if ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0)
+                  back_alpha = output_encoding == P_LINEAR ? 65535 : 255;
+            }
+
+            /* png_create_colormap_entry just takes an RGBA and writes the
+             * corresponding color-map entry using the format from 'image',
+             * including the required conversion to sRGB or linear as
+             * appropriate.  The input values are always either sRGB (if the
+             * gamma correction flag is 0) or 0..255 scaled file encoded values
+             * (if the function must gamma correct them).
+             */
+            for (i=val=0; i<cmap_entries; ++i, val += step)
+            {
+               /* 'i' is a file value.  While this will result in duplicated
+                * entries for 8-bit non-sRGB encoded files it is necessary to
+                * have non-gamma corrected values to do tRNS handling.
+                */
+               if (i != trans)
+                  png_create_colormap_entry(display, i, val, val, val, 255,
+                      P_FILE/*8-bit with file gamma*/);
+
+               /* Else this entry is transparent.  The colors don't matter if
+                * there is an alpha channel (back_alpha == 0), but it does no
+                * harm to pass them in; the values are not set above so this
+                * passes in white.
+                *
+                * NOTE: this preserves the full precision of the application
+                * supplied background color when it is used.
+                */
+               else
+                  png_create_colormap_entry(display, i, back_r, back_g, back_b,
+                      back_alpha, output_encoding);
+            }
+
+            /* We need libpng to preserve the original encoding. */
+            data_encoding = P_FILE;
+
+            /* The rows from libpng, while technically gray values, are now also
+             * color-map indices; however, they may need to be expanded to 1
+             * byte per pixel.  This is what png_set_packing does (i.e., it
+             * unpacks the bit values into bytes.)
+             */
+            if (png_ptr->bit_depth < 8)
+               png_set_packing(png_ptr);
+         }
+
+         else /* bit depth is 16 */
+         {
+            /* The 16-bit input values can be converted directly to 8-bit gamma
+             * encoded values; however, if a tRNS chunk is present 257 color-map
+             * entries are required.  This means that the extra entry requires
+             * special processing; add an alpha channel, sacrifice gray level
+             * 254 and convert transparent (alpha==0) entries to that.
+             *
+             * Use libpng to chop the data to 8 bits.  Convert it to sRGB at the
+             * same time to minimize quality loss.  If a tRNS chunk is present
+             * this means libpng must handle it too; otherwise it is impossible
+             * to do the exact match on the 16-bit value.
+             *
+             * If the output has no alpha channel *and* the background color is
+             * gray then it is possible to let libpng handle the substitution by
+             * ensuring that the corresponding gray level matches the background
+             * color exactly.
+             */
+            data_encoding = P_sRGB;
+
+            if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
+               png_error(png_ptr, "gray[16] color-map: too few entries");
+
+            cmap_entries = (unsigned int)make_gray_colormap(display);
+
+            if (png_ptr->num_trans > 0)
+            {
+               unsigned int back_alpha;
+
+               if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+                  back_alpha = 0;
+
+               else
+               {
+                  if (back_r == back_g && back_g == back_b)
+                  {
+                     /* Background is gray; no special processing will be
+                      * required.
+                      */
+                     png_color_16 c;
+                     png_uint_32 gray = back_g;
+
+                     if (output_encoding == P_LINEAR)
+                     {
+                        gray = PNG_sRGB_FROM_LINEAR(gray * 255);
+
+                        /* And make sure the corresponding palette entry
+                         * matches.
+                         */
+                        png_create_colormap_entry(display, gray, back_g, back_g,
+                            back_g, 65535, P_LINEAR);
+                     }
+
+                     /* The background passed to libpng, however, must be the
+                      * sRGB value.
+                      */
+                     c.index = 0; /*unused*/
+                     c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
+
+                     /* NOTE: does this work without expanding tRNS to alpha?
+                      * It should be the color->gray case below apparently
+                      * doesn't.
+                      */
+                     png_set_background_fixed(png_ptr, &c,
+                         PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
+                         0/*gamma: not used*/);
+
+                     output_processing = PNG_CMAP_NONE;
+                     break;
+                  }
+#ifdef __COVERITY__
+                 /* Coverity claims that output_encoding cannot be 2 (P_LINEAR)
+                  * here.
+                  */
+                  back_alpha = 255;
+#else
+                  back_alpha = output_encoding == P_LINEAR ? 65535 : 255;
+#endif
+               }
+
+               /* output_processing means that the libpng-processed row will be
+                * 8-bit GA and it has to be processing to single byte color-map
+                * values.  Entry 254 is replaced by either a completely
+                * transparent entry or by the background color at full
+                * precision (and the background color is not a simple gray
+                * level in this case.)
+                */
+               expand_tRNS = 1;
+               output_processing = PNG_CMAP_TRANS;
+               background_index = 254;
+
+               /* And set (overwrite) color-map entry 254 to the actual
+                * background color at full precision.
+                */
+               png_create_colormap_entry(display, 254, back_r, back_g, back_b,
+                   back_alpha, output_encoding);
+            }
+
+            else
+               output_processing = PNG_CMAP_NONE;
+         }
+         break;
+
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         /* 8-bit or 16-bit PNG with two channels - gray and alpha.  A minimum
+          * of 65536 combinations.  If, however, the alpha channel is to be
+          * removed there are only 256 possibilities if the background is gray.
+          * (Otherwise there is a subset of the 65536 possibilities defined by
+          * the triangle between black, white and the background color.)
+          *
+          * Reduce 16-bit files to 8-bit and sRGB encode the result.  No need to
+          * worry about tRNS matching - tRNS is ignored if there is an alpha
+          * channel.
+          */
+         data_encoding = P_sRGB;
+
+         if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+         {
+            if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
+               png_error(png_ptr, "gray+alpha color-map: too few entries");
+
+            cmap_entries = (unsigned int)make_ga_colormap(display);
+
+            background_index = PNG_CMAP_GA_BACKGROUND;
+            output_processing = PNG_CMAP_GA;
+         }
+
+         else /* alpha is removed */
+         {
+            /* Alpha must be removed as the PNG data is processed when the
+             * background is a color because the G and A channels are
+             * independent and the vector addition (non-parallel vectors) is a
+             * 2-D problem.
+             *
+             * This can be reduced to the same algorithm as above by making a
+             * colormap containing gray levels (for the opaque grays), a
+             * background entry (for a transparent pixel) and a set of four six
+             * level color values, one set for each intermediate alpha value.
+             * See the comments in make_ga_colormap for how this works in the
+             * per-pixel processing.
+             *
+             * If the background is gray, however, we only need a 256 entry gray
+             * level color map.  It is sufficient to make the entry generated
+             * for the background color be exactly the color specified.
+             */
+            if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0 ||
+               (back_r == back_g && back_g == back_b))
+            {
+               /* Background is gray; no special processing will be required. */
+               png_color_16 c;
+               png_uint_32 gray = back_g;
+
+               if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
+                  png_error(png_ptr, "gray-alpha color-map: too few entries");
+
+               cmap_entries = (unsigned int)make_gray_colormap(display);
+
+               if (output_encoding == P_LINEAR)
+               {
+                  gray = PNG_sRGB_FROM_LINEAR(gray * 255);
+
+                  /* And make sure the corresponding palette entry matches. */
+                  png_create_colormap_entry(display, gray, back_g, back_g,
+                      back_g, 65535, P_LINEAR);
+               }
+
+               /* The background passed to libpng, however, must be the sRGB
+                * value.
+                */
+               c.index = 0; /*unused*/
+               c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
+
+               png_set_background_fixed(png_ptr, &c,
+                   PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
+                   0/*gamma: not used*/);
+
+               output_processing = PNG_CMAP_NONE;
+            }
+
+            else
+            {
+               png_uint_32 i, a;
+
+               /* This is the same as png_make_ga_colormap, above, except that
+                * the entries are all opaque.
+                */
+               if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
+                  png_error(png_ptr, "ga-alpha color-map: too few entries");
+
+               i = 0;
+               while (i < 231)
+               {
+                  png_uint_32 gray = (i * 256 + 115) / 231;
+                  png_create_colormap_entry(display, i++, gray, gray, gray,
+                      255, P_sRGB);
+               }
+
+               /* NOTE: this preserves the full precision of the application
+                * background color.
+                */
+               background_index = i;
+               png_create_colormap_entry(display, i++, back_r, back_g, back_b,
+#ifdef __COVERITY__
+                   /* Coverity claims that output_encoding
+                    * cannot be 2 (P_LINEAR) here.
+                    */ 255U,
+#else
+                    output_encoding == P_LINEAR ? 65535U : 255U,
+#endif
+                    output_encoding);
+
+               /* For non-opaque input composite on the sRGB background - this
+                * requires inverting the encoding for each component.  The input
+                * is still converted to the sRGB encoding because this is a
+                * reasonable approximate to the logarithmic curve of human
+                * visual sensitivity, at least over the narrow range which PNG
+                * represents.  Consequently 'G' is always sRGB encoded, while
+                * 'A' is linear.  We need the linear background colors.
+                */
+               if (output_encoding == P_sRGB) /* else already linear */
+               {
+                  /* This may produce a value not exactly matching the
+                   * background, but that's ok because these numbers are only
+                   * used when alpha != 0
+                   */
+                  back_r = png_sRGB_table[back_r];
+                  back_g = png_sRGB_table[back_g];
+                  back_b = png_sRGB_table[back_b];
+               }
+
+               for (a=1; a<5; ++a)
+               {
+                  unsigned int g;
+
+                  /* PNG_sRGB_FROM_LINEAR expects a 16-bit linear value scaled
+                   * by an 8-bit alpha value (0..255).
+                   */
+                  png_uint_32 alpha = 51 * a;
+                  png_uint_32 back_rx = (255-alpha) * back_r;
+                  png_uint_32 back_gx = (255-alpha) * back_g;
+                  png_uint_32 back_bx = (255-alpha) * back_b;
+
+                  for (g=0; g<6; ++g)
+                  {
+                     png_uint_32 gray = png_sRGB_table[g*51] * alpha;
+
+                     png_create_colormap_entry(display, i++,
+                         PNG_sRGB_FROM_LINEAR(gray + back_rx),
+                         PNG_sRGB_FROM_LINEAR(gray + back_gx),
+                         PNG_sRGB_FROM_LINEAR(gray + back_bx), 255, P_sRGB);
+                  }
+               }
+
+               cmap_entries = i;
+               output_processing = PNG_CMAP_GA;
+            }
+         }
+         break;
+
+      case PNG_COLOR_TYPE_RGB:
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         /* Exclude the case where the output is gray; we can always handle this
+          * with the cases above.
+          */
+         if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0)
+         {
+            /* The color-map will be grayscale, so we may as well convert the
+             * input RGB values to a simple grayscale and use the grayscale
+             * code above.
+             *
+             * NOTE: calling this apparently damages the recognition of the
+             * transparent color in background color handling; call
+             * png_set_tRNS_to_alpha before png_set_background_fixed.
+             */
+            png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE, -1,
+                -1);
+            data_encoding = P_sRGB;
+
+            /* The output will now be one or two 8-bit gray or gray+alpha
+             * channels.  The more complex case arises when the input has alpha.
+             */
+            if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+               png_ptr->num_trans > 0) &&
+               (output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+            {
+               /* Both input and output have an alpha channel, so no background
+                * processing is required; just map the GA bytes to the right
+                * color-map entry.
+                */
+               expand_tRNS = 1;
+
+               if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries)
+                  png_error(png_ptr, "rgb[ga] color-map: too few entries");
+
+               cmap_entries = (unsigned int)make_ga_colormap(display);
+               background_index = PNG_CMAP_GA_BACKGROUND;
+               output_processing = PNG_CMAP_GA;
+            }
+
+            else
+            {
+               /* Either the input or the output has no alpha channel, so there
+                * will be no non-opaque pixels in the color-map; it will just be
+                * grayscale.
+                */
+               if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries)
+                  png_error(png_ptr, "rgb[gray] color-map: too few entries");
+
+               /* Ideally this code would use libpng to do the gamma correction,
+                * but if an input alpha channel is to be removed we will hit the
+                * libpng bug in gamma+compose+rgb-to-gray (the double gamma
+                * correction bug).  Fix this by dropping the gamma correction in
+                * this case and doing it in the palette; this will result in
+                * duplicate palette entries, but that's better than the
+                * alternative of double gamma correction.
+                */
+               if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+                  png_ptr->num_trans > 0) &&
+                  png_gamma_not_sRGB(png_ptr->colorspace.gamma) != 0)
+               {
+                  cmap_entries = (unsigned int)make_gray_file_colormap(display);
+                  data_encoding = P_FILE;
+               }
+
+               else
+                  cmap_entries = (unsigned int)make_gray_colormap(display);
+
+               /* But if the input has alpha or transparency it must be removed
+                */
+               if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+                  png_ptr->num_trans > 0)
+               {
+                  png_color_16 c;
+                  png_uint_32 gray = back_g;
+
+                  /* We need to ensure that the application background exists in
+                   * the colormap and that completely transparent pixels map to
+                   * it.  Achieve this simply by ensuring that the entry
+                   * selected for the background really is the background color.
+                   */
+                  if (data_encoding == P_FILE) /* from the fixup above */
+                  {
+                     /* The app supplied a gray which is in output_encoding, we
+                      * need to convert it to a value of the input (P_FILE)
+                      * encoding then set this palette entry to the required
+                      * output encoding.
+                      */
+                     if (output_encoding == P_sRGB)
+                        gray = png_sRGB_table[gray]; /* now P_LINEAR */
+
+                     gray = PNG_DIV257(png_gamma_16bit_correct(gray,
+                         png_ptr->colorspace.gamma)); /* now P_FILE */
+
+                     /* And make sure the corresponding palette entry contains
+                      * exactly the required sRGB value.
+                      */
+                     png_create_colormap_entry(display, gray, back_g, back_g,
+                         back_g, 0/*unused*/, output_encoding);
+                  }
+
+                  else if (output_encoding == P_LINEAR)
+                  {
+                     gray = PNG_sRGB_FROM_LINEAR(gray * 255);
+
+                     /* And make sure the corresponding palette entry matches.
+                      */
+                     png_create_colormap_entry(display, gray, back_g, back_g,
+                        back_g, 0/*unused*/, P_LINEAR);
+                  }
+
+                  /* The background passed to libpng, however, must be the
+                   * output (normally sRGB) value.
+                   */
+                  c.index = 0; /*unused*/
+                  c.gray = c.red = c.green = c.blue = (png_uint_16)gray;
+
+                  /* NOTE: the following is apparently a bug in libpng. Without
+                   * it the transparent color recognition in
+                   * png_set_background_fixed seems to go wrong.
+                   */
+                  expand_tRNS = 1;
+                  png_set_background_fixed(png_ptr, &c,
+                      PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
+                      0/*gamma: not used*/);
+               }
+
+               output_processing = PNG_CMAP_NONE;
+            }
+         }
+
+         else /* output is color */
+         {
+            /* We could use png_quantize here so long as there is no transparent
+             * color or alpha; png_quantize ignores alpha.  Easier overall just
+             * to do it once and using PNG_DIV51 on the 6x6x6 reduced RGB cube.
+             * Consequently we always want libpng to produce sRGB data.
+             */
+            data_encoding = P_sRGB;
+
+            /* Is there any transparency or alpha? */
+            if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+               png_ptr->num_trans > 0)
+            {
+               /* Is there alpha in the output too?  If so all four channels are
+                * processed into a special RGB cube with alpha support.
+                */
+               if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+               {
+                  png_uint_32 r;
+
+                  if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries)
+                     png_error(png_ptr, "rgb+alpha color-map: too few entries");
+
+                  cmap_entries = (unsigned int)make_rgb_colormap(display);
+
+                  /* Add a transparent entry. */
+                  png_create_colormap_entry(display, cmap_entries, 255, 255,
+                      255, 0, P_sRGB);
+
+                  /* This is stored as the background index for the processing
+                   * algorithm.
+                   */
+                  background_index = cmap_entries++;
+
+                  /* Add 27 r,g,b entries each with alpha 0.5. */
+                  for (r=0; r<256; r = (r << 1) | 0x7f)
+                  {
+                     png_uint_32 g;
+
+                     for (g=0; g<256; g = (g << 1) | 0x7f)
+                     {
+                        png_uint_32 b;
+
+                        /* This generates components with the values 0, 127 and
+                         * 255
+                         */
+                        for (b=0; b<256; b = (b << 1) | 0x7f)
+                           png_create_colormap_entry(display, cmap_entries++,
+                               r, g, b, 128, P_sRGB);
+                     }
+                  }
+
+                  expand_tRNS = 1;
+                  output_processing = PNG_CMAP_RGB_ALPHA;
+               }
+
+               else
+               {
+                  /* Alpha/transparency must be removed.  The background must
+                   * exist in the color map (achieved by setting adding it after
+                   * the 666 color-map).  If the standard processing code will
+                   * pick up this entry automatically that's all that is
+                   * required; libpng can be called to do the background
+                   * processing.
+                   */
+                  unsigned int sample_size =
+                     PNG_IMAGE_SAMPLE_SIZE(output_format);
+                  png_uint_32 r, g, b; /* sRGB background */
+
+                  if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries)
+                     png_error(png_ptr, "rgb-alpha color-map: too few entries");
+
+                  cmap_entries = (unsigned int)make_rgb_colormap(display);
+
+                  png_create_colormap_entry(display, cmap_entries, back_r,
+                      back_g, back_b, 0/*unused*/, output_encoding);
+
+                  if (output_encoding == P_LINEAR)
+                  {
+                     r = PNG_sRGB_FROM_LINEAR(back_r * 255);
+                     g = PNG_sRGB_FROM_LINEAR(back_g * 255);
+                     b = PNG_sRGB_FROM_LINEAR(back_b * 255);
+                  }
+
+                  else
+                  {
+                     r = back_r;
+                     g = back_g;
+                     b = back_g;
+                  }
+
+                  /* Compare the newly-created color-map entry with the one the
+                   * PNG_CMAP_RGB algorithm will use.  If the two entries don't
+                   * match, add the new one and set this as the background
+                   * index.
+                   */
+                  if (memcmp((png_const_bytep)display->colormap +
+                      sample_size * cmap_entries,
+                      (png_const_bytep)display->colormap +
+                          sample_size * PNG_RGB_INDEX(r,g,b),
+                     sample_size) != 0)
+                  {
+                     /* The background color must be added. */
+                     background_index = cmap_entries++;
+
+                     /* Add 27 r,g,b entries each with created by composing with
+                      * the background at alpha 0.5.
+                      */
+                     for (r=0; r<256; r = (r << 1) | 0x7f)
+                     {
+                        for (g=0; g<256; g = (g << 1) | 0x7f)
+                        {
+                           /* This generates components with the values 0, 127
+                            * and 255
+                            */
+                           for (b=0; b<256; b = (b << 1) | 0x7f)
+                              png_create_colormap_entry(display, cmap_entries++,
+                                  png_colormap_compose(display, r, P_sRGB, 128,
+                                      back_r, output_encoding),
+                                  png_colormap_compose(display, g, P_sRGB, 128,
+                                      back_g, output_encoding),
+                                  png_colormap_compose(display, b, P_sRGB, 128,
+                                      back_b, output_encoding),
+                                  0/*unused*/, output_encoding);
+                        }
+                     }
+
+                     expand_tRNS = 1;
+                     output_processing = PNG_CMAP_RGB_ALPHA;
+                  }
+
+                  else /* background color is in the standard color-map */
+                  {
+                     png_color_16 c;
+
+                     c.index = 0; /*unused*/
+                     c.red = (png_uint_16)back_r;
+                     c.gray = c.green = (png_uint_16)back_g;
+                     c.blue = (png_uint_16)back_b;
+
+                     png_set_background_fixed(png_ptr, &c,
+                         PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
+                         0/*gamma: not used*/);
+
+                     output_processing = PNG_CMAP_RGB;
+                  }
+               }
+            }
+
+            else /* no alpha or transparency in the input */
+            {
+               /* Alpha in the output is irrelevant, simply map the opaque input
+                * pixels to the 6x6x6 color-map.
+                */
+               if (PNG_RGB_COLORMAP_ENTRIES > image->colormap_entries)
+                  png_error(png_ptr, "rgb color-map: too few entries");
+
+               cmap_entries = (unsigned int)make_rgb_colormap(display);
+               output_processing = PNG_CMAP_RGB;
+            }
+         }
+         break;
+
+      case PNG_COLOR_TYPE_PALETTE:
+         /* It's already got a color-map.  It may be necessary to eliminate the
+          * tRNS entries though.
+          */
+         {
+            unsigned int num_trans = png_ptr->num_trans;
+            png_const_bytep trans = num_trans > 0 ? png_ptr->trans_alpha : NULL;
+            png_const_colorp colormap = png_ptr->palette;
+            int do_background = trans != NULL &&
+               (output_format & PNG_FORMAT_FLAG_ALPHA) == 0;
+            unsigned int i;
+
+            /* Just in case: */
+            if (trans == NULL)
+               num_trans = 0;
+
+            output_processing = PNG_CMAP_NONE;
+            data_encoding = P_FILE; /* Don't change from color-map indices */
+            cmap_entries = (unsigned int)png_ptr->num_palette;
+            if (cmap_entries > 256)
+               cmap_entries = 256;
+
+            if (cmap_entries > (unsigned int)image->colormap_entries)
+               png_error(png_ptr, "palette color-map: too few entries");
+
+            for (i=0; i < cmap_entries; ++i)
+            {
+               if (do_background != 0 && i < num_trans && trans[i] < 255)
+               {
+                  if (trans[i] == 0)
+                     png_create_colormap_entry(display, i, back_r, back_g,
+                         back_b, 0, output_encoding);
+
+                  else
+                  {
+                     /* Must compose the PNG file color in the color-map entry
+                      * on the sRGB color in 'back'.
+                      */
+                     png_create_colormap_entry(display, i,
+                         png_colormap_compose(display, colormap[i].red,
+                             P_FILE, trans[i], back_r, output_encoding),
+                         png_colormap_compose(display, colormap[i].green,
+                             P_FILE, trans[i], back_g, output_encoding),
+                         png_colormap_compose(display, colormap[i].blue,
+                             P_FILE, trans[i], back_b, output_encoding),
+                         output_encoding == P_LINEAR ? trans[i] * 257U :
+                             trans[i],
+                         output_encoding);
+                  }
+               }
+
+               else
+                  png_create_colormap_entry(display, i, colormap[i].red,
+                      colormap[i].green, colormap[i].blue,
+                      i < num_trans ? trans[i] : 255U, P_FILE/*8-bit*/);
+            }
+
+            /* The PNG data may have indices packed in fewer than 8 bits, it
+             * must be expanded if so.
+             */
+            if (png_ptr->bit_depth < 8)
+               png_set_packing(png_ptr);
+         }
+         break;
+
+      default:
+         png_error(png_ptr, "invalid PNG color type");
+         /*NOT REACHED*/
+   }
+
+   /* Now deal with the output processing */
+   if (expand_tRNS != 0 && png_ptr->num_trans > 0 &&
+       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) == 0)
+      png_set_tRNS_to_alpha(png_ptr);
+
+   switch (data_encoding)
+   {
+      case P_sRGB:
+         /* Change to 8-bit sRGB */
+         png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, PNG_GAMMA_sRGB);
+         /* FALLTHROUGH */
+
+      case P_FILE:
+         if (png_ptr->bit_depth > 8)
+            png_set_scale_16(png_ptr);
+         break;
+
+#ifdef __GNUC__
+      default:
+         png_error(png_ptr, "bad data option (internal error)");
+#endif
+   }
+
+   if (cmap_entries > 256 || cmap_entries > image->colormap_entries)
+      png_error(png_ptr, "color map overflow (BAD internal error)");
+
+   image->colormap_entries = cmap_entries;
+
+   /* Double check using the recorded background index */
+   switch (output_processing)
+   {
+      case PNG_CMAP_NONE:
+         if (background_index != PNG_CMAP_NONE_BACKGROUND)
+            goto bad_background;
+         break;
+
+      case PNG_CMAP_GA:
+         if (background_index != PNG_CMAP_GA_BACKGROUND)
+            goto bad_background;
+         break;
+
+      case PNG_CMAP_TRANS:
+         if (background_index >= cmap_entries ||
+            background_index != PNG_CMAP_TRANS_BACKGROUND)
+            goto bad_background;
+         break;
+
+      case PNG_CMAP_RGB:
+         if (background_index != PNG_CMAP_RGB_BACKGROUND)
+            goto bad_background;
+         break;
+
+      case PNG_CMAP_RGB_ALPHA:
+         if (background_index != PNG_CMAP_RGB_ALPHA_BACKGROUND)
+            goto bad_background;
+         break;
+
+      default:
+         png_error(png_ptr, "bad processing option (internal error)");
+
+      bad_background:
+         png_error(png_ptr, "bad background index (internal error)");
+   }
+
+   display->colormap_processing = (int)output_processing;
+
+   return 1/*ok*/;
+}
+
+/* The final part of the color-map read called from png_image_finish_read. */
+static int
+png_image_read_and_map(png_voidp argument)
+{
+   png_image_read_control *display = png_voidcast(png_image_read_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+   int passes;
+
+   /* Called when the libpng data must be transformed into the color-mapped
+    * form.  There is a local row buffer in display->local and this routine must
+    * do the interlace handling.
+    */
+   switch (png_ptr->interlaced)
+   {
+      case PNG_INTERLACE_NONE:
+         passes = 1;
+         break;
+
+      case PNG_INTERLACE_ADAM7:
+         passes = PNG_INTERLACE_ADAM7_PASSES;
+         break;
+
+      default:
+         png_error(png_ptr, "unknown interlace type");
+   }
+
+   {
+      png_uint_32  height = image->height;
+      png_uint_32  width = image->width;
+      int          proc = display->colormap_processing;
+      png_bytep    first_row = png_voidcast(png_bytep, display->first_row);
+      ptrdiff_t    step_row = display->row_bytes;
+      int pass;
+
+      for (pass = 0; pass < passes; ++pass)
+      {
+         unsigned int     startx, stepx, stepy;
+         png_uint_32      y;
+
+         if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
+         {
+            /* The row may be empty for a short image: */
+            if (PNG_PASS_COLS(width, pass) == 0)
+               continue;
+
+            startx = PNG_PASS_START_COL(pass);
+            stepx = PNG_PASS_COL_OFFSET(pass);
+            y = PNG_PASS_START_ROW(pass);
+            stepy = PNG_PASS_ROW_OFFSET(pass);
+         }
+
+         else
+         {
+            y = 0;
+            startx = 0;
+            stepx = stepy = 1;
+         }
+
+         for (; y<height; y += stepy)
+         {
+            png_bytep inrow = png_voidcast(png_bytep, display->local_row);
+            png_bytep outrow = first_row + y * step_row;
+            png_const_bytep end_row = outrow + width;
+
+            /* Read read the libpng data into the temporary buffer. */
+            png_read_row(png_ptr, inrow, NULL);
+
+            /* Now process the row according to the processing option, note
+             * that the caller verifies that the format of the libpng output
+             * data is as required.
+             */
+            outrow += startx;
+            switch (proc)
+            {
+               case PNG_CMAP_GA:
+                  for (; outrow < end_row; outrow += stepx)
+                  {
+                     /* The data is always in the PNG order */
+                     unsigned int gray = *inrow++;
+                     unsigned int alpha = *inrow++;
+                     unsigned int entry;
+
+                     /* NOTE: this code is copied as a comment in
+                      * make_ga_colormap above.  Please update the
+                      * comment if you change this code!
+                      */
+                     if (alpha > 229) /* opaque */
+                     {
+                        entry = (231 * gray + 128) >> 8;
+                     }
+                     else if (alpha < 26) /* transparent */
+                     {
+                        entry = 231;
+                     }
+                     else /* partially opaque */
+                     {
+                        entry = 226 + 6 * PNG_DIV51(alpha) + PNG_DIV51(gray);
+                     }
+
+                     *outrow = (png_byte)entry;
+                  }
+                  break;
+
+               case PNG_CMAP_TRANS:
+                  for (; outrow < end_row; outrow += stepx)
+                  {
+                     png_byte gray = *inrow++;
+                     png_byte alpha = *inrow++;
+
+                     if (alpha == 0)
+                        *outrow = PNG_CMAP_TRANS_BACKGROUND;
+
+                     else if (gray != PNG_CMAP_TRANS_BACKGROUND)
+                        *outrow = gray;
+
+                     else
+                        *outrow = (png_byte)(PNG_CMAP_TRANS_BACKGROUND+1);
+                  }
+                  break;
+
+               case PNG_CMAP_RGB:
+                  for (; outrow < end_row; outrow += stepx)
+                  {
+                     *outrow = PNG_RGB_INDEX(inrow[0], inrow[1], inrow[2]);
+                     inrow += 3;
+                  }
+                  break;
+
+               case PNG_CMAP_RGB_ALPHA:
+                  for (; outrow < end_row; outrow += stepx)
+                  {
+                     unsigned int alpha = inrow[3];
+
+                     /* Because the alpha entries only hold alpha==0.5 values
+                      * split the processing at alpha==0.25 (64) and 0.75
+                      * (196).
+                      */
+
+                     if (alpha >= 196)
+                        *outrow = PNG_RGB_INDEX(inrow[0], inrow[1],
+                            inrow[2]);
+
+                     else if (alpha < 64)
+                        *outrow = PNG_CMAP_RGB_ALPHA_BACKGROUND;
+
+                     else
+                     {
+                        /* Likewise there are three entries for each of r, g
+                         * and b.  We could select the entry by popcount on
+                         * the top two bits on those architectures that
+                         * support it, this is what the code below does,
+                         * crudely.
+                         */
+                        unsigned int back_i = PNG_CMAP_RGB_ALPHA_BACKGROUND+1;
+
+                        /* Here are how the values map:
+                         *
+                         * 0x00 .. 0x3f -> 0
+                         * 0x40 .. 0xbf -> 1
+                         * 0xc0 .. 0xff -> 2
+                         *
+                         * So, as above with the explicit alpha checks, the
+                         * breakpoints are at 64 and 196.
+                         */
+                        if (inrow[0] & 0x80) back_i += 9; /* red */
+                        if (inrow[0] & 0x40) back_i += 9;
+                        if (inrow[0] & 0x80) back_i += 3; /* green */
+                        if (inrow[0] & 0x40) back_i += 3;
+                        if (inrow[0] & 0x80) back_i += 1; /* blue */
+                        if (inrow[0] & 0x40) back_i += 1;
+
+                        *outrow = (png_byte)back_i;
+                     }
+
+                     inrow += 4;
+                  }
+                  break;
+
+               default:
+                  break;
+            }
+         }
+      }
+   }
+
+   return 1;
+}
+
+static int
+png_image_read_colormapped(png_voidp argument)
+{
+   png_image_read_control *display = png_voidcast(png_image_read_control*,
+       argument);
+   png_imagep image = display->image;
+   png_controlp control = image->opaque;
+   png_structrp png_ptr = control->png_ptr;
+   png_inforp info_ptr = control->info_ptr;
+
+   int passes = 0; /* As a flag */
+
+   PNG_SKIP_CHUNKS(png_ptr);
+
+   /* Update the 'info' structure and make sure the result is as required; first
+    * make sure to turn on the interlace handling if it will be required
+    * (because it can't be turned on *after* the call to png_read_update_info!)
+    */
+   if (display->colormap_processing == PNG_CMAP_NONE)
+      passes = png_set_interlace_handling(png_ptr);
+
+   png_read_update_info(png_ptr, info_ptr);
+
+   /* The expected output can be deduced from the colormap_processing option. */
+   switch (display->colormap_processing)
+   {
+      case PNG_CMAP_NONE:
+         /* Output must be one channel and one byte per pixel, the output
+          * encoding can be anything.
+          */
+         if ((info_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
+            info_ptr->color_type == PNG_COLOR_TYPE_GRAY) &&
+            info_ptr->bit_depth == 8)
+            break;
+
+         goto bad_output;
+
+      case PNG_CMAP_TRANS:
+      case PNG_CMAP_GA:
+         /* Output must be two channels and the 'G' one must be sRGB, the latter
+          * can be checked with an exact number because it should have been set
+          * to this number above!
+          */
+         if (info_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+            info_ptr->bit_depth == 8 &&
+            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
+            image->colormap_entries == 256)
+            break;
+
+         goto bad_output;
+
+      case PNG_CMAP_RGB:
+         /* Output must be 8-bit sRGB encoded RGB */
+         if (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
+            info_ptr->bit_depth == 8 &&
+            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
+            image->colormap_entries == 216)
+            break;
+
+         goto bad_output;
+
+      case PNG_CMAP_RGB_ALPHA:
+         /* Output must be 8-bit sRGB encoded RGBA */
+         if (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
+            info_ptr->bit_depth == 8 &&
+            png_ptr->screen_gamma == PNG_GAMMA_sRGB &&
+            image->colormap_entries == 244 /* 216 + 1 + 27 */)
+            break;
+
+         goto bad_output;
+
+      default:
+      bad_output:
+         png_error(png_ptr, "bad color-map processing (internal error)");
+   }
+
+   /* Now read the rows.  Do this here if it is possible to read directly into
+    * the output buffer, otherwise allocate a local row buffer of the maximum
+    * size libpng requires and call the relevant processing routine safely.
+    */
+   {
+      png_voidp first_row = display->buffer;
+      ptrdiff_t row_bytes = display->row_stride;
+
+      /* The following expression is designed to work correctly whether it gives
+       * a signed or an unsigned result.
+       */
+      if (row_bytes < 0)
+      {
+         char *ptr = png_voidcast(char*, first_row);
+         ptr += (image->height-1) * (-row_bytes);
+         first_row = png_voidcast(png_voidp, ptr);
+      }
+
+      display->first_row = first_row;
+      display->row_bytes = row_bytes;
+   }
+
+   if (passes == 0)
+   {
+      int result;
+      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
+
+      display->local_row = row;
+      result = png_safe_execute(image, png_image_read_and_map, display);
+      display->local_row = NULL;
+      png_free(png_ptr, row);
+
+      return result;
+   }
+
+   else
+   {
+      png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes;
+
+      while (--passes >= 0)
+      {
+         png_uint_32      y = image->height;
+         png_bytep        row = png_voidcast(png_bytep, display->first_row);
+
+         for (; y > 0; --y)
+         {
+            png_read_row(png_ptr, row, NULL);
+            row += row_bytes;
+         }
+      }
+
+      return 1;
+   }
+}
+
+/* Just the row reading part of png_image_read. */
+static int
+png_image_read_composite(png_voidp argument)
+{
+   png_image_read_control *display = png_voidcast(png_image_read_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+   int passes;
+
+   switch (png_ptr->interlaced)
+   {
+      case PNG_INTERLACE_NONE:
+         passes = 1;
+         break;
+
+      case PNG_INTERLACE_ADAM7:
+         passes = PNG_INTERLACE_ADAM7_PASSES;
+         break;
+
+      default:
+         png_error(png_ptr, "unknown interlace type");
+   }
+
+   {
+      png_uint_32  height = image->height;
+      png_uint_32  width = image->width;
+      ptrdiff_t    step_row = display->row_bytes;
+      unsigned int channels =
+          (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? 3 : 1;
+      int pass;
+
+      for (pass = 0; pass < passes; ++pass)
+      {
+         unsigned int     startx, stepx, stepy;
+         png_uint_32      y;
+
+         if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
+         {
+            /* The row may be empty for a short image: */
+            if (PNG_PASS_COLS(width, pass) == 0)
+               continue;
+
+            startx = PNG_PASS_START_COL(pass) * channels;
+            stepx = PNG_PASS_COL_OFFSET(pass) * channels;
+            y = PNG_PASS_START_ROW(pass);
+            stepy = PNG_PASS_ROW_OFFSET(pass);
+         }
+
+         else
+         {
+            y = 0;
+            startx = 0;
+            stepx = channels;
+            stepy = 1;
+         }
+
+         for (; y<height; y += stepy)
+         {
+            png_bytep inrow = png_voidcast(png_bytep, display->local_row);
+            png_bytep outrow;
+            png_const_bytep end_row;
+
+            /* Read the row, which is packed: */
+            png_read_row(png_ptr, inrow, NULL);
+
+            outrow = png_voidcast(png_bytep, display->first_row);
+            outrow += y * step_row;
+            end_row = outrow + width * channels;
+
+            /* Now do the composition on each pixel in this row. */
+            outrow += startx;
+            for (; outrow < end_row; outrow += stepx)
+            {
+               png_byte alpha = inrow[channels];
+
+               if (alpha > 0) /* else no change to the output */
+               {
+                  unsigned int c;
+
+                  for (c=0; c<channels; ++c)
+                  {
+                     png_uint_32 component = inrow[c];
+
+                     if (alpha < 255) /* else just use component */
+                     {
+                        /* This is PNG_OPTIMIZED_ALPHA, the component value
+                         * is a linear 8-bit value.  Combine this with the
+                         * current outrow[c] value which is sRGB encoded.
+                         * Arithmetic here is 16-bits to preserve the output
+                         * values correctly.
+                         */
+                        component *= 257*255; /* =65535 */
+                        component += (255-alpha)*png_sRGB_table[outrow[c]];
+
+                        /* So 'component' is scaled by 255*65535 and is
+                         * therefore appropriate for the sRGB to linear
+                         * conversion table.
+                         */
+                        component = PNG_sRGB_FROM_LINEAR(component);
+                     }
+
+                     outrow[c] = (png_byte)component;
+                  }
+               }
+
+               inrow += channels+1; /* components and alpha channel */
+            }
+         }
+      }
+   }
+
+   return 1;
+}
+
+/* The do_local_background case; called when all the following transforms are to
+ * be done:
+ *
+ * PNG_RGB_TO_GRAY
+ * PNG_COMPOSITE
+ * PNG_GAMMA
+ *
+ * This is a work-around for the fact that both the PNG_RGB_TO_GRAY and
+ * PNG_COMPOSITE code performs gamma correction, so we get double gamma
+ * correction.  The fix-up is to prevent the PNG_COMPOSITE operation from
+ * happening inside libpng, so this routine sees an 8 or 16-bit gray+alpha
+ * row and handles the removal or pre-multiplication of the alpha channel.
+ */
+static int
+png_image_read_background(png_voidp argument)
+{
+   png_image_read_control *display = png_voidcast(png_image_read_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+   png_inforp info_ptr = image->opaque->info_ptr;
+   png_uint_32 height = image->height;
+   png_uint_32 width = image->width;
+   int pass, passes;
+
+   /* Double check the convoluted logic below.  We expect to get here with
+    * libpng doing rgb to gray and gamma correction but background processing
+    * left to the png_image_read_background function.  The rows libpng produce
+    * might be 8 or 16-bit but should always have two channels; gray plus alpha.
+    */
+   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == 0)
+      png_error(png_ptr, "lost rgb to gray");
+
+   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
+      png_error(png_ptr, "unexpected compose");
+
+   if (png_get_channels(png_ptr, info_ptr) != 2)
+      png_error(png_ptr, "lost/gained channels");
+
+   /* Expect the 8-bit case to always remove the alpha channel */
+   if ((image->format & PNG_FORMAT_FLAG_LINEAR) == 0 &&
+      (image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
+      png_error(png_ptr, "unexpected 8-bit transformation");
+
+   switch (png_ptr->interlaced)
+   {
+      case PNG_INTERLACE_NONE:
+         passes = 1;
+         break;
+
+      case PNG_INTERLACE_ADAM7:
+         passes = PNG_INTERLACE_ADAM7_PASSES;
+         break;
+
+      default:
+         png_error(png_ptr, "unknown interlace type");
+   }
+
+   /* Use direct access to info_ptr here because otherwise the simplified API
+    * would require PNG_EASY_ACCESS_SUPPORTED (just for this.)  Note this is
+    * checking the value after libpng expansions, not the original value in the
+    * PNG.
+    */
+   switch (info_ptr->bit_depth)
+   {
+      case 8:
+         /* 8-bit sRGB gray values with an alpha channel; the alpha channel is
+          * to be removed by composing on a background: either the row if
+          * display->background is NULL or display->background->green if not.
+          * Unlike the code above ALPHA_OPTIMIZED has *not* been done.
+          */
+         {
+            png_bytep first_row = png_voidcast(png_bytep, display->first_row);
+            ptrdiff_t step_row = display->row_bytes;
+
+            for (pass = 0; pass < passes; ++pass)
+            {
+               unsigned int     startx, stepx, stepy;
+               png_uint_32      y;
+
+               if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
+               {
+                  /* The row may be empty for a short image: */
+                  if (PNG_PASS_COLS(width, pass) == 0)
+                     continue;
+
+                  startx = PNG_PASS_START_COL(pass);
+                  stepx = PNG_PASS_COL_OFFSET(pass);
+                  y = PNG_PASS_START_ROW(pass);
+                  stepy = PNG_PASS_ROW_OFFSET(pass);
+               }
+
+               else
+               {
+                  y = 0;
+                  startx = 0;
+                  stepx = stepy = 1;
+               }
+
+               if (display->background == NULL)
+               {
+                  for (; y<height; y += stepy)
+                  {
+                     png_bytep inrow = png_voidcast(png_bytep,
+                         display->local_row);
+                     png_bytep outrow = first_row + y * step_row;
+                     png_const_bytep end_row = outrow + width;
+
+                     /* Read the row, which is packed: */
+                     png_read_row(png_ptr, inrow, NULL);
+
+                     /* Now do the composition on each pixel in this row. */
+                     outrow += startx;
+                     for (; outrow < end_row; outrow += stepx)
+                     {
+                        png_byte alpha = inrow[1];
+
+                        if (alpha > 0) /* else no change to the output */
+                        {
+                           png_uint_32 component = inrow[0];
+
+                           if (alpha < 255) /* else just use component */
+                           {
+                              /* Since PNG_OPTIMIZED_ALPHA was not set it is
+                               * necessary to invert the sRGB transfer
+                               * function and multiply the alpha out.
+                               */
+                              component = png_sRGB_table[component] * alpha;
+                              component += png_sRGB_table[outrow[0]] *
+                                 (255-alpha);
+                              component = PNG_sRGB_FROM_LINEAR(component);
+                           }
+
+                           outrow[0] = (png_byte)component;
+                        }
+
+                        inrow += 2; /* gray and alpha channel */
+                     }
+                  }
+               }
+
+               else /* constant background value */
+               {
+                  png_byte background8 = display->background->green;
+                  png_uint_16 background = png_sRGB_table[background8];
+
+                  for (; y<height; y += stepy)
+                  {
+                     png_bytep inrow = png_voidcast(png_bytep,
+                         display->local_row);
+                     png_bytep outrow = first_row + y * step_row;
+                     png_const_bytep end_row = outrow + width;
+
+                     /* Read the row, which is packed: */
+                     png_read_row(png_ptr, inrow, NULL);
+
+                     /* Now do the composition on each pixel in this row. */
+                     outrow += startx;
+                     for (; outrow < end_row; outrow += stepx)
+                     {
+                        png_byte alpha = inrow[1];
+
+                        if (alpha > 0) /* else use background */
+                        {
+                           png_uint_32 component = inrow[0];
+
+                           if (alpha < 255) /* else just use component */
+                           {
+                              component = png_sRGB_table[component] * alpha;
+                              component += background * (255-alpha);
+                              component = PNG_sRGB_FROM_LINEAR(component);
+                           }
+
+                           outrow[0] = (png_byte)component;
+                        }
+
+                        else
+                           outrow[0] = background8;
+
+                        inrow += 2; /* gray and alpha channel */
+                     }
+                  }
+               }
+            }
+         }
+         break;
+
+      case 16:
+         /* 16-bit linear with pre-multiplied alpha; the pre-multiplication must
+          * still be done and, maybe, the alpha channel removed.  This code also
+          * handles the alpha-first option.
+          */
+         {
+            png_uint_16p first_row = png_voidcast(png_uint_16p,
+                display->first_row);
+            /* The division by two is safe because the caller passed in a
+             * stride which was multiplied by 2 (below) to get row_bytes.
+             */
+            ptrdiff_t    step_row = display->row_bytes / 2;
+            unsigned int preserve_alpha = (image->format &
+                PNG_FORMAT_FLAG_ALPHA) != 0;
+            unsigned int outchannels = 1U+preserve_alpha;
+            int swap_alpha = 0;
+
+#           ifdef PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED
+               if (preserve_alpha != 0 &&
+                   (image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
+                  swap_alpha = 1;
+#           endif
+
+            for (pass = 0; pass < passes; ++pass)
+            {
+               unsigned int     startx, stepx, stepy;
+               png_uint_32      y;
+
+               /* The 'x' start and step are adjusted to output components here.
+                */
+               if (png_ptr->interlaced == PNG_INTERLACE_ADAM7)
+               {
+                  /* The row may be empty for a short image: */
+                  if (PNG_PASS_COLS(width, pass) == 0)
+                     continue;
+
+                  startx = PNG_PASS_START_COL(pass) * outchannels;
+                  stepx = PNG_PASS_COL_OFFSET(pass) * outchannels;
+                  y = PNG_PASS_START_ROW(pass);
+                  stepy = PNG_PASS_ROW_OFFSET(pass);
+               }
+
+               else
+               {
+                  y = 0;
+                  startx = 0;
+                  stepx = outchannels;
+                  stepy = 1;
+               }
+
+               for (; y<height; y += stepy)
+               {
+                  png_const_uint_16p inrow;
+                  png_uint_16p outrow = first_row + y*step_row;
+                  png_uint_16p end_row = outrow + width * outchannels;
+
+                  /* Read the row, which is packed: */
+                  png_read_row(png_ptr, png_voidcast(png_bytep,
+                      display->local_row), NULL);
+                  inrow = png_voidcast(png_const_uint_16p, display->local_row);
+
+                  /* Now do the pre-multiplication on each pixel in this row.
+                   */
+                  outrow += startx;
+                  for (; outrow < end_row; outrow += stepx)
+                  {
+                     png_uint_32 component = inrow[0];
+                     png_uint_16 alpha = inrow[1];
+
+                     if (alpha > 0) /* else 0 */
+                     {
+                        if (alpha < 65535) /* else just use component */
+                        {
+                           component *= alpha;
+                           component += 32767;
+                           component /= 65535;
+                        }
+                     }
+
+                     else
+                        component = 0;
+
+                     outrow[swap_alpha] = (png_uint_16)component;
+                     if (preserve_alpha != 0)
+                        outrow[1 ^ swap_alpha] = alpha;
+
+                     inrow += 2; /* components and alpha channel */
+                  }
+               }
+            }
+         }
+         break;
+
+#ifdef __GNUC__
+      default:
+         png_error(png_ptr, "unexpected bit depth");
+#endif
+   }
+
+   return 1;
+}
+
+/* The guts of png_image_finish_read as a png_safe_execute callback. */
+static int
+png_image_read_direct(png_voidp argument)
+{
+   png_image_read_control *display = png_voidcast(png_image_read_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+   png_inforp info_ptr = image->opaque->info_ptr;
+
+   png_uint_32 format = image->format;
+   int linear = (format & PNG_FORMAT_FLAG_LINEAR) != 0;
+   int do_local_compose = 0;
+   int do_local_background = 0; /* to avoid double gamma correction bug */
+   int passes = 0;
+
+   /* Add transforms to ensure the correct output format is produced then check
+    * that the required implementation support is there.  Always expand; always
+    * need 8 bits minimum, no palette and expanded tRNS.
+    */
+   png_set_expand(png_ptr);
+
+   /* Now check the format to see if it was modified. */
+   {
+      png_uint_32 base_format = png_image_format(png_ptr) &
+         ~PNG_FORMAT_FLAG_COLORMAP /* removed by png_set_expand */;
+      png_uint_32 change = format ^ base_format;
+      png_fixed_point output_gamma;
+      int mode; /* alpha mode */
+
+      /* Do this first so that we have a record if rgb to gray is happening. */
+      if ((change & PNG_FORMAT_FLAG_COLOR) != 0)
+      {
+         /* gray<->color transformation required. */
+         if ((format & PNG_FORMAT_FLAG_COLOR) != 0)
+            png_set_gray_to_rgb(png_ptr);
+
+         else
+         {
+            /* libpng can't do both rgb to gray and
+             * background/pre-multiplication if there is also significant gamma
+             * correction, because both operations require linear colors and
+             * the code only supports one transform doing the gamma correction.
+             * Handle this by doing the pre-multiplication or background
+             * operation in this code, if necessary.
+             *
+             * TODO: fix this by rewriting pngrtran.c (!)
+             *
+             * For the moment (given that fixing this in pngrtran.c is an
+             * enormous change) 'do_local_background' is used to indicate that
+             * the problem exists.
+             */
+            if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+               do_local_background = 1/*maybe*/;
+
+            png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE,
+                PNG_RGB_TO_GRAY_DEFAULT, PNG_RGB_TO_GRAY_DEFAULT);
+         }
+
+         change &= ~PNG_FORMAT_FLAG_COLOR;
+      }
+
+      /* Set the gamma appropriately, linear for 16-bit input, sRGB otherwise.
+       */
+      {
+         png_fixed_point input_gamma_default;
+
+         if ((base_format & PNG_FORMAT_FLAG_LINEAR) != 0 &&
+             (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0)
+            input_gamma_default = PNG_GAMMA_LINEAR;
+         else
+            input_gamma_default = PNG_DEFAULT_sRGB;
+
+         /* Call png_set_alpha_mode to set the default for the input gamma; the
+          * output gamma is set by a second call below.
+          */
+         png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, input_gamma_default);
+      }
+
+      if (linear != 0)
+      {
+         /* If there *is* an alpha channel in the input it must be multiplied
+          * out; use PNG_ALPHA_STANDARD, otherwise just use PNG_ALPHA_PNG.
+          */
+         if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+            mode = PNG_ALPHA_STANDARD; /* associated alpha */
+
+         else
+            mode = PNG_ALPHA_PNG;
+
+         output_gamma = PNG_GAMMA_LINEAR;
+      }
+
+      else
+      {
+         mode = PNG_ALPHA_PNG;
+         output_gamma = PNG_DEFAULT_sRGB;
+      }
+
+      if ((change & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0)
+      {
+         mode = PNG_ALPHA_OPTIMIZED;
+         change &= ~PNG_FORMAT_FLAG_ASSOCIATED_ALPHA;
+      }
+
+      /* If 'do_local_background' is set check for the presence of gamma
+       * correction; this is part of the work-round for the libpng bug
+       * described above.
+       *
+       * TODO: fix libpng and remove this.
+       */
+      if (do_local_background != 0)
+      {
+         png_fixed_point gtest;
+
+         /* This is 'png_gamma_threshold' from pngrtran.c; the test used for
+          * gamma correction, the screen gamma hasn't been set on png_struct
+          * yet; it's set below.  png_struct::gamma, however, is set to the
+          * final value.
+          */
+         if (png_muldiv(&gtest, output_gamma, png_ptr->colorspace.gamma,
+             PNG_FP_1) != 0 && png_gamma_significant(gtest) == 0)
+            do_local_background = 0;
+
+         else if (mode == PNG_ALPHA_STANDARD)
+         {
+            do_local_background = 2/*required*/;
+            mode = PNG_ALPHA_PNG; /* prevent libpng doing it */
+         }
+
+         /* else leave as 1 for the checks below */
+      }
+
+      /* If the bit-depth changes then handle that here. */
+      if ((change & PNG_FORMAT_FLAG_LINEAR) != 0)
+      {
+         if (linear != 0 /*16-bit output*/)
+            png_set_expand_16(png_ptr);
+
+         else /* 8-bit output */
+            png_set_scale_16(png_ptr);
+
+         change &= ~PNG_FORMAT_FLAG_LINEAR;
+      }
+
+      /* Now the background/alpha channel changes. */
+      if ((change & PNG_FORMAT_FLAG_ALPHA) != 0)
+      {
+         /* Removing an alpha channel requires composition for the 8-bit
+          * formats; for the 16-bit it is already done, above, by the
+          * pre-multiplication and the channel just needs to be stripped.
+          */
+         if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0)
+         {
+            /* If RGB->gray is happening the alpha channel must be left and the
+             * operation completed locally.
+             *
+             * TODO: fix libpng and remove this.
+             */
+            if (do_local_background != 0)
+               do_local_background = 2/*required*/;
+
+            /* 16-bit output: just remove the channel */
+            else if (linear != 0) /* compose on black (well, pre-multiply) */
+               png_set_strip_alpha(png_ptr);
+
+            /* 8-bit output: do an appropriate compose */
+            else if (display->background != NULL)
+            {
+               png_color_16 c;
+
+               c.index = 0; /*unused*/
+               c.red = display->background->red;
+               c.green = display->background->green;
+               c.blue = display->background->blue;
+               c.gray = display->background->green;
+
+               /* This is always an 8-bit sRGB value, using the 'green' channel
+                * for gray is much better than calculating the luminance here;
+                * we can get off-by-one errors in that calculation relative to
+                * the app expectations and that will show up in transparent
+                * pixels.
+                */
+               png_set_background_fixed(png_ptr, &c,
+                   PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/,
+                   0/*gamma: not used*/);
+            }
+
+            else /* compose on row: implemented below. */
+            {
+               do_local_compose = 1;
+               /* This leaves the alpha channel in the output, so it has to be
+                * removed by the code below.  Set the encoding to the 'OPTIMIZE'
+                * one so the code only has to hack on the pixels that require
+                * composition.
+                */
+               mode = PNG_ALPHA_OPTIMIZED;
+            }
+         }
+
+         else /* output needs an alpha channel */
+         {
+            /* This is tricky because it happens before the swap operation has
+             * been accomplished; however, the swap does *not* swap the added
+             * alpha channel (weird API), so it must be added in the correct
+             * place.
+             */
+            png_uint_32 filler; /* opaque filler */
+            int where;
+
+            if (linear != 0)
+               filler = 65535;
+
+            else
+               filler = 255;
+
+#ifdef PNG_FORMAT_AFIRST_SUPPORTED
+            if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
+            {
+               where = PNG_FILLER_BEFORE;
+               change &= ~PNG_FORMAT_FLAG_AFIRST;
+            }
+
+            else
+#endif
+            where = PNG_FILLER_AFTER;
+
+            png_set_add_alpha(png_ptr, filler, where);
+         }
+
+         /* This stops the (irrelevant) call to swap_alpha below. */
+         change &= ~PNG_FORMAT_FLAG_ALPHA;
+      }
+
+      /* Now set the alpha mode correctly; this is always done, even if there is
+       * no alpha channel in either the input or the output because it correctly
+       * sets the output gamma.
+       */
+      png_set_alpha_mode_fixed(png_ptr, mode, output_gamma);
+
+#     ifdef PNG_FORMAT_BGR_SUPPORTED
+         if ((change & PNG_FORMAT_FLAG_BGR) != 0)
+         {
+            /* Check only the output format; PNG is never BGR; don't do this if
+             * the output is gray, but fix up the 'format' value in that case.
+             */
+            if ((format & PNG_FORMAT_FLAG_COLOR) != 0)
+               png_set_bgr(png_ptr);
+
+            else
+               format &= ~PNG_FORMAT_FLAG_BGR;
+
+            change &= ~PNG_FORMAT_FLAG_BGR;
+         }
+#     endif
+
+#     ifdef PNG_FORMAT_AFIRST_SUPPORTED
+         if ((change & PNG_FORMAT_FLAG_AFIRST) != 0)
+         {
+            /* Only relevant if there is an alpha channel - it's particularly
+             * important to handle this correctly because do_local_compose may
+             * be set above and then libpng will keep the alpha channel for this
+             * code to remove.
+             */
+            if ((format & PNG_FORMAT_FLAG_ALPHA) != 0)
+            {
+               /* Disable this if doing a local background,
+                * TODO: remove this when local background is no longer required.
+                */
+               if (do_local_background != 2)
+                  png_set_swap_alpha(png_ptr);
+            }
+
+            else
+               format &= ~PNG_FORMAT_FLAG_AFIRST;
+
+            change &= ~PNG_FORMAT_FLAG_AFIRST;
+         }
+#     endif
+
+      /* If the *output* is 16-bit then we need to check for a byte-swap on this
+       * architecture.
+       */
+      if (linear != 0)
+      {
+         png_uint_16 le = 0x0001;
+
+         if ((*(png_const_bytep) & le) != 0)
+            png_set_swap(png_ptr);
+      }
+
+      /* If change is not now 0 some transformation is missing - error out. */
+      if (change != 0)
+         png_error(png_ptr, "png_read_image: unsupported transformation");
+   }
+
+   PNG_SKIP_CHUNKS(png_ptr);
+
+   /* Update the 'info' structure and make sure the result is as required; first
+    * make sure to turn on the interlace handling if it will be required
+    * (because it can't be turned on *after* the call to png_read_update_info!)
+    *
+    * TODO: remove the do_local_background fixup below.
+    */
+   if (do_local_compose == 0 && do_local_background != 2)
+      passes = png_set_interlace_handling(png_ptr);
+
+   png_read_update_info(png_ptr, info_ptr);
+
+   {
+      png_uint_32 info_format = 0;
+
+      if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+         info_format |= PNG_FORMAT_FLAG_COLOR;
+
+      if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
+      {
+         /* do_local_compose removes this channel below. */
+         if (do_local_compose == 0)
+         {
+            /* do_local_background does the same if required. */
+            if (do_local_background != 2 ||
+               (format & PNG_FORMAT_FLAG_ALPHA) != 0)
+               info_format |= PNG_FORMAT_FLAG_ALPHA;
+         }
+      }
+
+      else if (do_local_compose != 0) /* internal error */
+         png_error(png_ptr, "png_image_read: alpha channel lost");
+
+      if ((format & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0) {
+         info_format |= PNG_FORMAT_FLAG_ASSOCIATED_ALPHA;
+      }
+
+      if (info_ptr->bit_depth == 16)
+         info_format |= PNG_FORMAT_FLAG_LINEAR;
+
+#ifdef PNG_FORMAT_BGR_SUPPORTED
+      if ((png_ptr->transformations & PNG_BGR) != 0)
+         info_format |= PNG_FORMAT_FLAG_BGR;
+#endif
+
+#ifdef PNG_FORMAT_AFIRST_SUPPORTED
+         if (do_local_background == 2)
+         {
+            if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
+               info_format |= PNG_FORMAT_FLAG_AFIRST;
+         }
+
+         if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0 ||
+            ((png_ptr->transformations & PNG_ADD_ALPHA) != 0 &&
+            (png_ptr->flags & PNG_FLAG_FILLER_AFTER) == 0))
+         {
+            if (do_local_background == 2)
+               png_error(png_ptr, "unexpected alpha swap transformation");
+
+            info_format |= PNG_FORMAT_FLAG_AFIRST;
+         }
+#     endif
+
+      /* This is actually an internal error. */
+      if (info_format != format)
+         png_error(png_ptr, "png_read_image: invalid transformations");
+   }
+
+   /* Now read the rows.  If do_local_compose is set then it is necessary to use
+    * a local row buffer.  The output will be GA, RGBA or BGRA and must be
+    * converted to G, RGB or BGR as appropriate.  The 'local_row' member of the
+    * display acts as a flag.
+    */
+   {
+      png_voidp first_row = display->buffer;
+      ptrdiff_t row_bytes = display->row_stride;
+
+      if (linear != 0)
+         row_bytes *= 2;
+
+      /* The following expression is designed to work correctly whether it gives
+       * a signed or an unsigned result.
+       */
+      if (row_bytes < 0)
+      {
+         char *ptr = png_voidcast(char*, first_row);
+         ptr += (image->height-1) * (-row_bytes);
+         first_row = png_voidcast(png_voidp, ptr);
+      }
+
+      display->first_row = first_row;
+      display->row_bytes = row_bytes;
+   }
+
+   if (do_local_compose != 0)
+   {
+      int result;
+      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
+
+      display->local_row = row;
+      result = png_safe_execute(image, png_image_read_composite, display);
+      display->local_row = NULL;
+      png_free(png_ptr, row);
+
+      return result;
+   }
+
+   else if (do_local_background == 2)
+   {
+      int result;
+      png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr));
+
+      display->local_row = row;
+      result = png_safe_execute(image, png_image_read_background, display);
+      display->local_row = NULL;
+      png_free(png_ptr, row);
+
+      return result;
+   }
+
+   else
+   {
+      png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes;
+
+      while (--passes >= 0)
+      {
+         png_uint_32      y = image->height;
+         png_bytep        row = png_voidcast(png_bytep, display->first_row);
+
+         for (; y > 0; --y)
+         {
+            png_read_row(png_ptr, row, NULL);
+            row += row_bytes;
+         }
+      }
+
+      return 1;
+   }
+}
+
+int PNGAPI
+png_image_finish_read(png_imagep image, png_const_colorp background,
+    void *buffer, png_int_32 row_stride, void *colormap)
+{
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      /* Check for row_stride overflow.  This check is not performed on the
+       * original PNG format because it may not occur in the output PNG format
+       * and libpng deals with the issues of reading the original.
+       */
+      unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format);
+
+      /* The following checks just the 'row_stride' calculation to ensure it
+       * fits in a signed 32-bit value.  Because channels/components can be
+       * either 1 or 2 bytes in size the length of a row can still overflow 32
+       * bits; this is just to verify that the 'row_stride' argument can be
+       * represented.
+       */
+      if (image->width <= 0x7fffffffU/channels) /* no overflow */
+      {
+         png_uint_32 check;
+         png_uint_32 png_row_stride = image->width * channels;
+
+         if (row_stride == 0)
+            row_stride = (png_int_32)/*SAFE*/png_row_stride;
+
+         if (row_stride < 0)
+            check = (png_uint_32)(-row_stride);
+
+         else
+            check = (png_uint_32)row_stride;
+
+         /* This verifies 'check', the absolute value of the actual stride
+          * passed in and detects overflow in the application calculation (i.e.
+          * if the app did actually pass in a non-zero 'row_stride'.
+          */
+         if (image->opaque != NULL && buffer != NULL && check >= png_row_stride)
+         {
+            /* Now check for overflow of the image buffer calculation; this
+             * limits the whole image size to 32 bits for API compatibility with
+             * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro.
+             *
+             * The PNG_IMAGE_BUFFER_SIZE macro is:
+             *
+             *    (PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)*height*(row_stride))
+             *
+             * And the component size is always 1 or 2, so make sure that the
+             * number of *bytes* that the application is saying are available
+             * does actually fit into a 32-bit number.
+             *
+             * NOTE: this will be changed in 1.7 because PNG_IMAGE_BUFFER_SIZE
+             * will be changed to use png_alloc_size_t; bigger images can be
+             * accommodated on 64-bit systems.
+             */
+            if (image->height <=
+                0xffffffffU/PNG_IMAGE_PIXEL_COMPONENT_SIZE(image->format)/check)
+            {
+               if ((image->format & PNG_FORMAT_FLAG_COLORMAP) == 0 ||
+                  (image->colormap_entries > 0 && colormap != NULL))
+               {
+                  int result;
+                  png_image_read_control display;
+
+                  memset(&display, 0, (sizeof display));
+                  display.image = image;
+                  display.buffer = buffer;
+                  display.row_stride = row_stride;
+                  display.colormap = colormap;
+                  display.background = background;
+                  display.local_row = NULL;
+
+                  /* Choose the correct 'end' routine; for the color-map case
+                   * all the setup has already been done.
+                   */
+                  if ((image->format & PNG_FORMAT_FLAG_COLORMAP) != 0)
+                     result =
+                         png_safe_execute(image,
+                             png_image_read_colormap, &display) &&
+                             png_safe_execute(image,
+                             png_image_read_colormapped, &display);
+
+                  else
+                     result =
+                        png_safe_execute(image,
+                            png_image_read_direct, &display);
+
+                  png_image_free(image);
+                  return result;
+               }
+
+               else
+                  return png_image_error(image,
+                      "png_image_finish_read[color-map]: no color-map");
+            }
+
+            else
+               return png_image_error(image,
+                   "png_image_finish_read: image too large");
+         }
+
+         else
+            return png_image_error(image,
+                "png_image_finish_read: invalid argument");
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_finish_read: row_stride too large");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_finish_read: damaged PNG_IMAGE_VERSION");
+
+   return 0;
+}
+
+#endif /* SIMPLIFIED_READ */
+#endif /* READ */
diff --git a/reg-io/png/lpng1510/pngrio.c b/reg-io/png/lpng/pngrio.c
similarity index 60%
rename from reg-io/png/lpng1510/pngrio.c
rename to reg-io/png/lpng/pngrio.c
index d0d9d8a7..7d30c7a5 100644
--- a/reg-io/png/lpng1510/pngrio.c
+++ b/reg-io/png/lpng/pngrio.c
@@ -1,10 +1,10 @@
 
 /* pngrio.c - functions for data input
  *
- * Last changed in libpng 1.5.0 [January 6, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -26,10 +26,10 @@
  * reads from a file pointer.  Note that this routine sometimes gets called
  * with very small lengths, so you should implement some kind of simple
  * buffering if you are using unbuffered reads.  This should never be asked
- * to read more then 64K on a 16 bit machine.
+ * to read more than 64K on a 16-bit machine.
  */
 void /* PRIVATE */
-png_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+png_read_data(png_structrp png_ptr, png_bytep data, size_t length)
 {
    png_debug1(4, "reading %d bytes", (int)length);
 
@@ -46,80 +46,22 @@ png_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
  * read_data function and use it at run time with png_set_read_fn(), rather
  * than changing the library.
  */
-#  ifndef USE_FAR_KEYWORD
 void PNGCBAPI
-png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+png_default_read_data(png_structp png_ptr, png_bytep data, size_t length)
 {
-   png_size_t check;
+   size_t check;
 
    if (png_ptr == NULL)
       return;
 
-   /* fread() returns 0 on error, so it is OK to store this in a png_size_t
+   /* fread() returns 0 on error, so it is OK to store this in a size_t
     * instead of an int, which is what fread() actually returns.
     */
-   check = fread(data, 1, length, (png_FILE_p)png_ptr->io_ptr);
+   check = fread(data, 1, length, png_voidcast(png_FILE_p, png_ptr->io_ptr));
 
    if (check != length)
       png_error(png_ptr, "Read Error");
 }
-#  else
-/* This is the model-independent version. Since the standard I/O library
-   can't handle far buffers in the medium and small models, we have to copy
-   the data.
-*/
-
-#define NEAR_BUF_SIZE 1024
-#define MIN(a,b) (a <= b ? a : b)
-
-static void PNGCBAPI
-png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_size_t check;
-   png_byte *n_data;
-   png_FILE_p io_ptr;
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Check if data really is near. If so, use usual code. */
-   n_data = (png_byte *)CVT_PTR_NOCHECK(data);
-   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
-
-   if ((png_bytep)n_data == data)
-   {
-      check = fread(n_data, 1, length, io_ptr);
-   }
-
-   else
-   {
-      png_byte buf[NEAR_BUF_SIZE];
-      png_size_t read, remaining, err;
-      check = 0;
-      remaining = length;
-
-      do
-      {
-         read = MIN(NEAR_BUF_SIZE, remaining);
-         err = fread(buf, 1, read, io_ptr);
-         png_memcpy(data, buf, read); /* copy far buffer to near buffer */
-
-         if (err != read)
-            break;
-
-         else
-            check += err;
-
-         data += read;
-         remaining -= read;
-      }
-      while (remaining != 0);
-   }
-
-   if ((png_uint_32)check != (png_uint_32)length)
-      png_error(png_ptr, "read Error");
-}
-#  endif
 #endif
 
 /* This function allows the application to supply a new input function
@@ -142,8 +84,8 @@ png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
  *                be used.
  */
 void PNGAPI
-png_set_read_fn(png_structp png_ptr, png_voidp io_ptr,
-   png_rw_ptr read_data_fn)
+png_set_read_fn(png_structrp png_ptr, png_voidp io_ptr,
+    png_rw_ptr read_data_fn)
 {
    if (png_ptr == NULL)
       return;
@@ -160,6 +102,7 @@ png_set_read_fn(png_structp png_ptr, png_voidp io_ptr,
    png_ptr->read_data_fn = read_data_fn;
 #endif
 
+#ifdef PNG_WRITE_SUPPORTED
    /* It is an error to write to a read device */
    if (png_ptr->write_data_fn != NULL)
    {
@@ -168,9 +111,10 @@ png_set_read_fn(png_structp png_ptr, png_voidp io_ptr,
           "Can't set both read_data_fn and write_data_fn in the"
           " same structure");
    }
+#endif
 
 #ifdef PNG_WRITE_FLUSH_SUPPORTED
    png_ptr->output_flush_fn = NULL;
 #endif
 }
-#endif /* PNG_READ_SUPPORTED */
+#endif /* READ */
diff --git a/reg-io/png/lpng1510/pngrtran.c b/reg-io/png/lpng/pngrtran.c
similarity index 67%
rename from reg-io/png/lpng1510/pngrtran.c
rename to reg-io/png/lpng/pngrtran.c
index 6ec9089a..87f48aad 100644
--- a/reg-io/png/lpng1510/pngrtran.c
+++ b/reg-io/png/lpng/pngrtran.c
@@ -1,10 +1,10 @@
 
 /* pngrtran.c - transforms the data in a row for PNG readers
  *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -18,11 +18,22 @@
 
 #include "pngpriv.h"
 
+#ifdef PNG_ARM_NEON_IMPLEMENTATION
+#  if PNG_ARM_NEON_IMPLEMENTATION == 1
+#    define PNG_ARM_NEON_INTRINSICS_AVAILABLE
+#    if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
+#      include <arm64_neon.h>
+#    else
+#      include <arm_neon.h>
+#    endif
+#  endif
+#endif
+
 #ifdef PNG_READ_SUPPORTED
 
 /* Set the action on getting a CRC error for an ancillary or critical chunk. */
 void PNGAPI
-png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action)
+png_set_crc_action(png_structrp png_ptr, int crit_action, int ancil_action)
 {
    png_debug(1, "in png_set_crc_action");
 
@@ -48,7 +59,8 @@ png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action)
 
       case PNG_CRC_WARN_DISCARD:    /* Not a valid action for critical data */
          png_warning(png_ptr,
-            "Can't discard critical data on CRC error");
+             "Can't discard critical data on CRC error");
+         /* FALLTHROUGH */
       case PNG_CRC_ERROR_QUIT:                                /* Error/quit */
 
       case PNG_CRC_DEFAULT:
@@ -88,16 +100,47 @@ png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action)
    }
 }
 
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+/* Is it OK to set a transformation now?  Only if png_start_read_image or
+ * png_read_update_info have not been called.  It is not necessary for the IHDR
+ * to have been read in all cases; the need_IHDR parameter allows for this
+ * check too.
+ */
+static int
+png_rtran_ok(png_structrp png_ptr, int need_IHDR)
+{
+   if (png_ptr != NULL)
+   {
+      if ((png_ptr->flags & PNG_FLAG_ROW_INIT) != 0)
+         png_app_error(png_ptr,
+             "invalid after png_start_read_image or png_read_update_info");
+
+      else if (need_IHDR && (png_ptr->mode & PNG_HAVE_IHDR) == 0)
+         png_app_error(png_ptr, "invalid before the PNG header has been read");
+
+      else
+      {
+         /* Turn on failure to initialize correctly for all transforms. */
+         png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED;
+
+         return 1; /* Ok */
+      }
+   }
+
+   return 0; /* no png_error possible! */
+}
+#endif
+
 #ifdef PNG_READ_BACKGROUND_SUPPORTED
 /* Handle alpha and tRNS via a background color */
 void PNGFAPI
-png_set_background_fixed(png_structp png_ptr,
+png_set_background_fixed(png_structrp png_ptr,
     png_const_color_16p background_color, int background_gamma_code,
     int need_expand, png_fixed_point background_gamma)
 {
    png_debug(1, "in png_set_background_fixed");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0 || background_color == NULL)
       return;
 
    if (background_gamma_code == PNG_BACKGROUND_GAMMA_UNKNOWN)
@@ -110,11 +153,10 @@ png_set_background_fixed(png_structp png_ptr,
    png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
    png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
 
-   png_memcpy(&(png_ptr->background), background_color,
-      png_sizeof(png_color_16));
+   png_ptr->background = *background_color;
    png_ptr->background_gamma = background_gamma;
    png_ptr->background_gamma_type = (png_byte)(background_gamma_code);
-   if (need_expand)
+   if (need_expand != 0)
       png_ptr->transformations |= PNG_BACKGROUND_EXPAND;
    else
       png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND;
@@ -122,14 +164,14 @@ png_set_background_fixed(png_structp png_ptr,
 
 #  ifdef PNG_FLOATING_POINT_SUPPORTED
 void PNGAPI
-png_set_background(png_structp png_ptr,
+png_set_background(png_structrp png_ptr,
     png_const_color_16p background_color, int background_gamma_code,
     int need_expand, double background_gamma)
 {
    png_set_background_fixed(png_ptr, background_color, background_gamma_code,
       need_expand, png_fixed(png_ptr, background_gamma, "png_set_background"));
 }
-#  endif  /* FLOATING_POINT */
+#  endif /* FLOATING_POINT */
 #endif /* READ_BACKGROUND */
 
 /* Scale 16-bit depth files to 8-bit depth.  If both of these are set then the
@@ -138,11 +180,11 @@ png_set_background(png_structp png_ptr,
  */
 #ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
 void PNGAPI
-png_set_scale_16(png_structp png_ptr)
+png_set_scale_16(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_scale_16");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= PNG_SCALE_16_TO_8;
@@ -152,11 +194,11 @@ png_set_scale_16(png_structp png_ptr)
 #ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
 /* Chop 16-bit depth files to 8-bit depth */
 void PNGAPI
-png_set_strip_16(png_structp png_ptr)
+png_set_strip_16(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_strip_16");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= PNG_16_TO_8;
@@ -165,11 +207,11 @@ png_set_strip_16(png_structp png_ptr)
 
 #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
 void PNGAPI
-png_set_strip_alpha(png_structp png_ptr)
+png_set_strip_alpha(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_strip_alpha");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= PNG_STRIP_ALPHA;
@@ -178,8 +220,8 @@ png_set_strip_alpha(png_structp png_ptr)
 
 #if defined(PNG_READ_ALPHA_MODE_SUPPORTED) || defined(PNG_READ_GAMMA_SUPPORTED)
 static png_fixed_point
-translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma,
-   int is_screen)
+translate_gamma_flags(png_structrp png_ptr, png_fixed_point output_gamma,
+    int is_screen)
 {
    /* Check for flag values.  The main reason for having the old Mac value as a
     * flag is that it is pretty near impossible to work out what the correct
@@ -194,8 +236,10 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma,
        */
 #     ifdef PNG_READ_sRGB_SUPPORTED
          png_ptr->flags |= PNG_FLAG_ASSUME_sRGB;
+#     else
+         PNG_UNUSED(png_ptr)
 #     endif
-      if (is_screen)
+      if (is_screen != 0)
          output_gamma = PNG_GAMMA_sRGB;
       else
          output_gamma = PNG_GAMMA_sRGB_INVERSE;
@@ -204,7 +248,7 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma,
    else if (output_gamma == PNG_GAMMA_MAC_18 ||
       output_gamma == PNG_FP_1 / PNG_GAMMA_MAC_18)
    {
-      if (is_screen)
+      if (is_screen != 0)
          output_gamma = PNG_GAMMA_MAC_OLD;
       else
          output_gamma = PNG_GAMMA_MAC_INVERSE;
@@ -215,7 +259,7 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma,
 
 #  ifdef PNG_FLOATING_POINT_SUPPORTED
 static png_fixed_point
-convert_gamma_value(png_structp png_ptr, double output_gamma)
+convert_gamma_value(png_structrp png_ptr, double output_gamma)
 {
    /* The following silently ignores cases where fixed point (times 100,000)
     * gamma values are passed to the floating point API.  This is safe and it
@@ -240,15 +284,15 @@ convert_gamma_value(png_structp png_ptr, double output_gamma)
 
 #ifdef PNG_READ_ALPHA_MODE_SUPPORTED
 void PNGFAPI
-png_set_alpha_mode_fixed(png_structp png_ptr, int mode,
-   png_fixed_point output_gamma)
+png_set_alpha_mode_fixed(png_structrp png_ptr, int mode,
+    png_fixed_point output_gamma)
 {
    int compose = 0;
    png_fixed_point file_gamma;
 
-   png_debug(1, "in png_set_alpha_mode");
+   png_debug(1, "in png_set_alpha_mode_fixed");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    output_gamma = translate_gamma_flags(png_ptr, output_gamma, 1/*screen*/);
@@ -257,9 +301,12 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode,
     * is expected to be 1 or greater, but this range test allows for some
     * viewing correction values.  The intent is to weed out users of this API
     * who use the inverse of the gamma value accidentally!  Since some of these
-    * values are reasonable this may have to be changed.
+    * values are reasonable this may have to be changed:
+    *
+    * 1.6.x: changed from 0.07..3 to 0.01..100 (to accommodate the optimal 16-bit
+    * gamma of 36, and its reciprocal.)
     */
-   if (output_gamma < 70000 || output_gamma > 300000)
+   if (output_gamma < 1000 || output_gamma > 10000000)
       png_error(png_ptr, "output gamma out of expected range");
 
    /* The default file gamma is the inverse of the output gamma; the output
@@ -320,8 +367,11 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode,
     * the side effect that the gamma in a second call to png_set_alpha_mode will
     * be ignored.)
     */
-   if (png_ptr->gamma == 0)
-      png_ptr->gamma = file_gamma;
+   if (png_ptr->colorspace.gamma == 0)
+   {
+      png_ptr->colorspace.gamma = file_gamma;
+      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
+   }
 
    /* But always set the output gamma: */
    png_ptr->screen_gamma = output_gamma;
@@ -329,31 +379,28 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode,
    /* Finally, if pre-multiplying, set the background fields to achieve the
     * desired result.
     */
-   if (compose)
+   if (compose != 0)
    {
       /* And obtain alpha pre-multiplication by composing on black: */
-      png_memset(&png_ptr->background, 0, sizeof png_ptr->background);
-      png_ptr->background_gamma = png_ptr->gamma; /* just in case */
+      memset(&png_ptr->background, 0, (sizeof png_ptr->background));
+      png_ptr->background_gamma = png_ptr->colorspace.gamma; /* just in case */
       png_ptr->background_gamma_type = PNG_BACKGROUND_GAMMA_FILE;
       png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND;
 
-      if (png_ptr->transformations & PNG_COMPOSE)
+      if ((png_ptr->transformations & PNG_COMPOSE) != 0)
          png_error(png_ptr,
-            "conflicting calls to set alpha mode and background");
+             "conflicting calls to set alpha mode and background");
 
       png_ptr->transformations |= PNG_COMPOSE;
    }
-
-   /* New API, make sure apps call the correct initializers: */
-   png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED;
 }
 
 #  ifdef PNG_FLOATING_POINT_SUPPORTED
 void PNGAPI
-png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma)
+png_set_alpha_mode(png_structrp png_ptr, int mode, double output_gamma)
 {
    png_set_alpha_mode_fixed(png_ptr, mode, convert_gamma_value(png_ptr,
-      output_gamma));
+       output_gamma));
 }
 #  endif
 #endif
@@ -362,7 +409,7 @@ png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma)
 /* Dither file to 8-bit.  Supply a palette, the current number
  * of elements in the palette, the maximum number of elements
  * allowed, and a histogram if possible.  If the current number
- * of colors is greater then the maximum number, the palette will be
+ * of colors is greater than the maximum number, the palette will be
  * modified to fit in the maximum number.  "full_quantize" indicates
  * whether we need a quantizing cube set up for RGB images, or if we
  * simply are reducing the number of colors in a paletted image.
@@ -370,31 +417,31 @@ png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma)
 
 typedef struct png_dsort_struct
 {
-   struct png_dsort_struct FAR * next;
+   struct png_dsort_struct * next;
    png_byte left;
    png_byte right;
 } png_dsort;
-typedef png_dsort FAR *       png_dsortp;
-typedef png_dsort FAR * FAR * png_dsortpp;
+typedef png_dsort *   png_dsortp;
+typedef png_dsort * * png_dsortpp;
 
 void PNGAPI
-png_set_quantize(png_structp png_ptr, png_colorp palette,
+png_set_quantize(png_structrp png_ptr, png_colorp palette,
     int num_palette, int maximum_colors, png_const_uint_16p histogram,
     int full_quantize)
 {
    png_debug(1, "in png_set_quantize");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= PNG_QUANTIZE;
 
-   if (!full_quantize)
+   if (full_quantize == 0)
    {
       int i;
 
       png_ptr->quantize_index = (png_bytep)png_malloc(png_ptr,
-          (png_uint_32)(num_palette * png_sizeof(png_byte)));
+          (png_alloc_size_t)((png_uint_32)num_palette * (sizeof (png_byte))));
       for (i = 0; i < num_palette; i++)
          png_ptr->quantize_index[i] = (png_byte)i;
    }
@@ -411,7 +458,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
 
          /* Initialize an array to sort colors */
          png_ptr->quantize_sort = (png_bytep)png_malloc(png_ptr,
-             (png_uint_32)(num_palette * png_sizeof(png_byte)));
+             (png_alloc_size_t)((png_uint_32)num_palette * (sizeof (png_byte))));
 
          /* Initialize the quantize_sort array */
          for (i = 0; i < num_palette; i++)
@@ -444,12 +491,12 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
                }
             }
 
-            if (done)
+            if (done != 0)
                break;
          }
 
          /* Swap the palette around, and set up a table, if necessary */
-         if (full_quantize)
+         if (full_quantize != 0)
          {
             int j = num_palette;
 
@@ -545,9 +592,11 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
 
          /* Initialize palette index arrays */
          png_ptr->index_to_palette = (png_bytep)png_malloc(png_ptr,
-             (png_uint_32)(num_palette * png_sizeof(png_byte)));
+             (png_alloc_size_t)((png_uint_32)num_palette *
+             (sizeof (png_byte))));
          png_ptr->palette_to_index = (png_bytep)png_malloc(png_ptr,
-             (png_uint_32)(num_palette * png_sizeof(png_byte)));
+             (png_alloc_size_t)((png_uint_32)num_palette *
+             (sizeof (png_byte))));
 
          /* Initialize the sort array */
          for (i = 0; i < num_palette; i++)
@@ -556,8 +605,8 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
             png_ptr->palette_to_index[i] = (png_byte)i;
          }
 
-         hash = (png_dsortpp)png_calloc(png_ptr, (png_uint_32)(769 *
-             png_sizeof(png_dsortp)));
+         hash = (png_dsortpp)png_calloc(png_ptr, (png_alloc_size_t)(769 *
+             (sizeof (png_dsortp))));
 
          num_new_palette = num_palette;
 
@@ -587,7 +636,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
                   {
 
                      t = (png_dsortp)png_malloc_warn(png_ptr,
-                         (png_uint_32)(png_sizeof(png_dsort)));
+                         (png_alloc_size_t)(sizeof (png_dsort)));
 
                      if (t == NULL)
                          break;
@@ -632,7 +681,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
                         num_new_palette--;
                         palette[png_ptr->index_to_palette[j]]
                             = palette[num_new_palette];
-                        if (!full_quantize)
+                        if (full_quantize == 0)
                         {
                            int k;
 
@@ -700,7 +749,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
    }
    png_ptr->num_palette = (png_uint_16)num_palette;
 
-   if (full_quantize)
+   if (full_quantize != 0)
    {
       int i;
       png_bytep distance;
@@ -709,15 +758,15 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
       int num_red = (1 << PNG_QUANTIZE_RED_BITS);
       int num_green = (1 << PNG_QUANTIZE_GREEN_BITS);
       int num_blue = (1 << PNG_QUANTIZE_BLUE_BITS);
-      png_size_t num_entries = ((png_size_t)1 << total_bits);
+      size_t num_entries = ((size_t)1 << total_bits);
 
       png_ptr->palette_lookup = (png_bytep)png_calloc(png_ptr,
-          (png_uint_32)(num_entries * png_sizeof(png_byte)));
+          (png_alloc_size_t)(num_entries * (sizeof (png_byte))));
 
-      distance = (png_bytep)png_malloc(png_ptr, (png_uint_32)(num_entries *
-          png_sizeof(png_byte)));
+      distance = (png_bytep)png_malloc(png_ptr, (png_alloc_size_t)(num_entries *
+          (sizeof (png_byte))));
 
-      png_memset(distance, 0xff, num_entries * png_sizeof(png_byte));
+      memset(distance, 0xff, num_entries * (sizeof (png_byte)));
 
       for (i = 0; i < num_palette; i++)
       {
@@ -762,23 +811,22 @@ png_set_quantize(png_structp png_ptr, png_colorp palette,
       png_free(png_ptr, distance);
    }
 }
-#endif /* PNG_READ_QUANTIZE_SUPPORTED */
+#endif /* READ_QUANTIZE */
 
 #ifdef PNG_READ_GAMMA_SUPPORTED
 void PNGFAPI
-png_set_gamma_fixed(png_structp png_ptr, png_fixed_point scrn_gamma,
-   png_fixed_point file_gamma)
+png_set_gamma_fixed(png_structrp png_ptr, png_fixed_point scrn_gamma,
+    png_fixed_point file_gamma)
 {
    png_debug(1, "in png_set_gamma_fixed");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    /* New in libpng-1.5.4 - reserve particular negative values as flags. */
    scrn_gamma = translate_gamma_flags(png_ptr, scrn_gamma, 1/*screen*/);
    file_gamma = translate_gamma_flags(png_ptr, file_gamma, 0/*file*/);
 
-#if PNG_LIBPNG_VER >= 10600
    /* Checking the gamma values for being >0 was added in 1.5.4 along with the
     * premultiplied alpha support; this actually hides an undocumented feature
     * of the previous implementation which allowed gamma processing to be
@@ -787,31 +835,32 @@ png_set_gamma_fixed(png_structp png_ptr, png_fixed_point scrn_gamma,
     * accept '0' for the gamma value it takes, because it isn't always used.
     *
     * Since this is an API change (albeit a very minor one that removes an
-    * undocumented API feature) it will not be made until libpng-1.6.0.
+    * undocumented API feature) the following checks were only enabled in
+    * libpng-1.6.0.
     */
    if (file_gamma <= 0)
       png_error(png_ptr, "invalid file gamma in png_set_gamma");
 
    if (scrn_gamma <= 0)
       png_error(png_ptr, "invalid screen gamma in png_set_gamma");
-#endif
 
    /* Set the gamma values unconditionally - this overrides the value in the PNG
     * file if a gAMA chunk was present.  png_set_alpha_mode provides a
     * different, easier, way to default the file gamma.
     */
-   png_ptr->gamma = file_gamma;
+   png_ptr->colorspace.gamma = file_gamma;
+   png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
    png_ptr->screen_gamma = scrn_gamma;
 }
 
 #  ifdef PNG_FLOATING_POINT_SUPPORTED
 void PNGAPI
-png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma)
+png_set_gamma(png_structrp png_ptr, double scrn_gamma, double file_gamma)
 {
    png_set_gamma_fixed(png_ptr, convert_gamma_value(png_ptr, scrn_gamma),
-      convert_gamma_value(png_ptr, file_gamma));
+       convert_gamma_value(png_ptr, file_gamma));
 }
-#  endif /* FLOATING_POINT_SUPPORTED */
+#  endif /* FLOATING_POINT */
 #endif /* READ_GAMMA */
 
 #ifdef PNG_READ_EXPAND_SUPPORTED
@@ -820,15 +869,14 @@ png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma)
  * to alpha channels.
  */
 void PNGAPI
-png_set_expand(png_structp png_ptr)
+png_set_expand(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_expand");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
 }
 
 /* GRR 19990627:  the following three functions currently are identical
@@ -851,90 +899,85 @@ png_set_expand(png_structp png_ptr)
 
 /* Expand paletted images to RGB. */
 void PNGAPI
-png_set_palette_to_rgb(png_structp png_ptr)
+png_set_palette_to_rgb(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_palette_to_rgb");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
 }
 
 /* Expand grayscale images of less than 8-bit depth to 8 bits. */
 void PNGAPI
-png_set_expand_gray_1_2_4_to_8(png_structp png_ptr)
+png_set_expand_gray_1_2_4_to_8(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_expand_gray_1_2_4_to_8");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= PNG_EXPAND;
-   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
 }
 
-
-
 /* Expand tRNS chunks to alpha channels. */
 void PNGAPI
-png_set_tRNS_to_alpha(png_structp png_ptr)
+png_set_tRNS_to_alpha(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_tRNS_to_alpha");
 
+   if (png_rtran_ok(png_ptr, 0) == 0)
+      return;
+
    png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
-   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
 }
-#endif /* defined(PNG_READ_EXPAND_SUPPORTED) */
+#endif /* READ_EXPAND */
 
 #ifdef PNG_READ_EXPAND_16_SUPPORTED
 /* Expand to 16-bit channels, expand the tRNS chunk too (because otherwise
  * it may not work correctly.)
  */
 void PNGAPI
-png_set_expand_16(png_structp png_ptr)
+png_set_expand_16(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_expand_16");
 
-   if (png_ptr == NULL)
+   if (png_rtran_ok(png_ptr, 0) == 0)
       return;
 
    png_ptr->transformations |= (PNG_EXPAND_16 | PNG_EXPAND | PNG_EXPAND_tRNS);
-   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
-
-   /* New API, make sure apps call the correct initializers: */
-   png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED;
 }
 #endif
 
 #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
 void PNGAPI
-png_set_gray_to_rgb(png_structp png_ptr)
+png_set_gray_to_rgb(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_gray_to_rgb");
 
-   if (png_ptr != NULL)
-   {
-      /* Because rgb must be 8 bits or more: */
-      png_set_expand_gray_1_2_4_to_8(png_ptr);
-      png_ptr->transformations |= PNG_GRAY_TO_RGB;
-      png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
-   }
+   if (png_rtran_ok(png_ptr, 0) == 0)
+      return;
+
+   /* Because rgb must be 8 bits or more: */
+   png_set_expand_gray_1_2_4_to_8(png_ptr);
+   png_ptr->transformations |= PNG_GRAY_TO_RGB;
 }
 #endif
 
 #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
 void PNGFAPI
-png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
+png_set_rgb_to_gray_fixed(png_structrp png_ptr, int error_action,
     png_fixed_point red, png_fixed_point green)
 {
-   png_debug(1, "in png_set_rgb_to_gray");
+   png_debug(1, "in png_set_rgb_to_gray_fixed");
 
-   if (png_ptr == NULL)
+   /* Need the IHDR here because of the check on color_type below. */
+   /* TODO: fix this */
+   if (png_rtran_ok(png_ptr, 1) == 0)
       return;
 
-   switch(error_action)
+   switch (error_action)
    {
       case PNG_ERROR_ACTION_NONE:
          png_ptr->transformations |= PNG_RGB_TO_GRAY;
@@ -950,17 +993,20 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
 
       default:
          png_error(png_ptr, "invalid error action to rgb_to_gray");
-         break;
    }
+
    if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
 #ifdef PNG_READ_EXPAND_SUPPORTED
       png_ptr->transformations |= PNG_EXPAND;
 #else
    {
-      png_warning(png_ptr,
-        "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED");
+      /* Make this an error in 1.6 because otherwise the application may assume
+       * that it just worked and get a memory overwrite.
+       */
+      png_error(png_ptr,
+          "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED");
 
-      png_ptr->transformations &= ~PNG_RGB_TO_GRAY;
+      /* png_ptr->transformations &= ~PNG_RGB_TO_GRAY; */
    }
 #endif
    {
@@ -969,7 +1015,7 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
          png_uint_16 red_int, green_int;
 
          /* NOTE: this calculation does not round, but this behavior is retained
-          * for consistency, the inaccuracy is very small.  The code here always
+          * for consistency; the inaccuracy is very small.  The code here always
           * overwrites the coefficients, regardless of whether they have been
           * defaulted or set already.
           */
@@ -984,8 +1030,8 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
       else
       {
          if (red >= 0 && green >= 0)
-            png_warning(png_ptr,
-               "ignoring out of range rgb_to_gray coefficients");
+            png_app_warning(png_ptr,
+                "ignoring out of range rgb_to_gray coefficients");
 
          /* Use the defaults, from the cHRM chunk if set, else the historical
           * values which are close to the sRGB/HDTV/ITU-Rec 709 values.  See
@@ -994,7 +1040,7 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
           * something has already provided a default.
           */
          if (png_ptr->rgb_to_gray_red_coeff == 0 &&
-            png_ptr->rgb_to_gray_green_coeff == 0)
+             png_ptr->rgb_to_gray_green_coeff == 0)
          {
             png_ptr->rgb_to_gray_red_coeff   = 6968;
             png_ptr->rgb_to_gray_green_coeff = 23434;
@@ -1010,31 +1056,25 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
  */
 
 void PNGAPI
-png_set_rgb_to_gray(png_structp png_ptr, int error_action, double red,
-   double green)
+png_set_rgb_to_gray(png_structrp png_ptr, int error_action, double red,
+    double green)
 {
-   if (png_ptr == NULL)
-      return;
-
    png_set_rgb_to_gray_fixed(png_ptr, error_action,
-      png_fixed(png_ptr, red, "rgb to gray red coefficient"),
+       png_fixed(png_ptr, red, "rgb to gray red coefficient"),
       png_fixed(png_ptr, green, "rgb to gray green coefficient"));
 }
 #endif /* FLOATING POINT */
 
-#endif
+#endif /* RGB_TO_GRAY */
 
 #if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
     defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
 void PNGAPI
-png_set_read_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
+png_set_read_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr
     read_user_transform_fn)
 {
    png_debug(1, "in png_set_read_user_transform_fn");
 
-   if (png_ptr == NULL)
-      return;
-
 #ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
    png_ptr->transformations |= PNG_USER_TRANSFORM;
    png_ptr->read_user_transform_fn = read_user_transform_fn;
@@ -1068,13 +1108,13 @@ png_gamma_threshold(png_fixed_point screen_gamma, png_fixed_point file_gamma)
  * the palette.
  */
 
-/*For the moment 'png_init_palette_transformations' and
+/* For the moment 'png_init_palette_transformations' and
  * 'png_init_rgb_transformations' only do some flag canceling optimizations.
  * The intent is that these two routines should have palette or rgb operations
  * extracted from 'png_init_read_transformations'.
  */
 static void /* PRIVATE */
-png_init_palette_transformations(png_structp png_ptr)
+png_init_palette_transformations(png_structrp png_ptr)
 {
    /* Called to handle the (input) palette case.  In png_do_read_transformations
     * the first step is to expand the palette if requested, so this code must
@@ -1093,25 +1133,31 @@ png_init_palette_transformations(png_structp png_ptr)
 
       /* Ignore if all the entries are opaque (unlikely!) */
       for (i=0; i<png_ptr->num_trans; ++i)
+      {
          if (png_ptr->trans_alpha[i] == 255)
             continue;
          else if (png_ptr->trans_alpha[i] == 0)
             input_has_transparency = 1;
          else
+         {
+            input_has_transparency = 1;
             input_has_alpha = 1;
+            break;
+         }
+      }
    }
 
    /* If no alpha we can optimize. */
-   if (!input_has_alpha)
+   if (input_has_alpha == 0)
    {
       /* Any alpha means background and associative alpha processing is
-       * required, however if the alpha is 0 or 1 throughout OPTIIMIZE_ALPHA
+       * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA
        * and ENCODE_ALPHA are irrelevant.
        */
       png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
       png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
 
-      if (!input_has_transparency)
+      if (input_has_transparency == 0)
          png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND);
    }
 
@@ -1124,8 +1170,8 @@ png_init_palette_transformations(png_structp png_ptr)
    /* The following code cannot be entered in the alpha pre-multiplication case
     * because PNG_BACKGROUND_EXPAND is cancelled below.
     */
-   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
-       (png_ptr->transformations & PNG_EXPAND))
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 &&
+       (png_ptr->transformations & PNG_EXPAND) != 0)
    {
       {
          png_ptr->background.red   =
@@ -1136,28 +1182,28 @@ png_init_palette_transformations(png_structp png_ptr)
              png_ptr->palette[png_ptr->background.index].blue;
 
 #ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-        if (png_ptr->transformations & PNG_INVERT_ALPHA)
-        {
-           if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
-           {
-              /* Invert the alpha channel (in tRNS) unless the pixels are
-               * going to be expanded, in which case leave it for later
-               */
-              int i, istop = png_ptr->num_trans;
-
-              for (i=0; i<istop; i++)
-                 png_ptr->trans_alpha[i] = (png_byte)(255 -
-                    png_ptr->trans_alpha[i]);
-           }
-        }
-#endif /* PNG_READ_INVERT_ALPHA_SUPPORTED */
+         if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
+         {
+            if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
+            {
+               /* Invert the alpha channel (in tRNS) unless the pixels are
+                * going to be expanded, in which case leave it for later
+                */
+               int i, istop = png_ptr->num_trans;
+
+               for (i = 0; i < istop; i++)
+                  png_ptr->trans_alpha[i] =
+                      (png_byte)(255 - png_ptr->trans_alpha[i]);
+            }
+         }
+#endif /* READ_INVERT_ALPHA */
       }
    } /* background expand and (therefore) no alpha association. */
-#endif /* PNG_READ_EXPAND_SUPPORTED && PNG_READ_BACKGROUND_SUPPORTED */
+#endif /* READ_EXPAND && READ_BACKGROUND */
 }
 
 static void /* PRIVATE */
-png_init_rgb_transformations(png_structp png_ptr)
+png_init_rgb_transformations(png_structrp png_ptr)
 {
    /* Added to libpng-1.5.4: check the color type to determine whether there
     * is any alpha or transparency in the image and simply cancel the
@@ -1167,10 +1213,10 @@ png_init_rgb_transformations(png_structp png_ptr)
    int input_has_transparency = png_ptr->num_trans > 0;
 
    /* If no alpha we can optimize. */
-   if (!input_has_alpha)
+   if (input_has_alpha == 0)
    {
       /* Any alpha means background and associative alpha processing is
-       * required, however if the alpha is 0 or 1 throughout OPTIIMIZE_ALPHA
+       * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA
        * and ENCODE_ALPHA are irrelevant.
        */
 #     ifdef PNG_READ_ALPHA_MODE_SUPPORTED
@@ -1178,7 +1224,7 @@ png_init_rgb_transformations(png_structp png_ptr)
          png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
 #     endif
 
-      if (!input_has_transparency)
+      if (input_has_transparency == 0)
          png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND);
    }
 
@@ -1191,9 +1237,9 @@ png_init_rgb_transformations(png_structp png_ptr)
    /* The following code cannot be entered in the alpha pre-multiplication case
     * because PNG_BACKGROUND_EXPAND is cancelled below.
     */
-   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
-       (png_ptr->transformations & PNG_EXPAND) &&
-       !(png_ptr->color_type & PNG_COLOR_MASK_COLOR))
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 &&
+       (png_ptr->transformations & PNG_EXPAND) != 0 &&
+       (png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
        /* i.e., GRAY or GRAY_ALPHA */
    {
       {
@@ -1221,7 +1267,7 @@ png_init_rgb_transformations(png_structp png_ptr)
             default:
 
             case 8:
-               /* Already 8 bits, fall through */
+               /* FALLTHROUGH */ /*  (Already 8 bits) */
 
             case 16:
                /* Already a full 16 bits */
@@ -1231,18 +1277,18 @@ png_init_rgb_transformations(png_structp png_ptr)
          png_ptr->background.red = png_ptr->background.green =
             png_ptr->background.blue = (png_uint_16)gray;
 
-         if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+         if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0)
          {
             png_ptr->trans_color.red = png_ptr->trans_color.green =
                png_ptr->trans_color.blue = (png_uint_16)trans_gray;
          }
       }
    } /* background expand and (therefore) no alpha association. */
-#endif /* PNG_READ_EXPAND_SUPPORTED && PNG_READ_BACKGROUND_SUPPORTED */
+#endif /* READ_EXPAND && READ_BACKGROUND */
 }
 
 void /* PRIVATE */
-png_init_read_transformations(png_structp png_ptr)
+png_init_read_transformations(png_structrp png_ptr)
 {
    png_debug(1, "in png_init_read_transformations");
 
@@ -1267,26 +1313,26 @@ png_init_read_transformations(png_structp png_ptr)
        */
       int gamma_correction = 0;
 
-      if (png_ptr->gamma != 0) /* has been set */
+      if (png_ptr->colorspace.gamma != 0) /* has been set */
       {
          if (png_ptr->screen_gamma != 0) /* screen set too */
-            gamma_correction = png_gamma_threshold(png_ptr->gamma,
-               png_ptr->screen_gamma);
+            gamma_correction = png_gamma_threshold(png_ptr->colorspace.gamma,
+                png_ptr->screen_gamma);
 
          else
             /* Assume the output matches the input; a long time default behavior
              * of libpng, although the standard has nothing to say about this.
              */
-            png_ptr->screen_gamma = png_reciprocal(png_ptr->gamma);
+            png_ptr->screen_gamma = png_reciprocal(png_ptr->colorspace.gamma);
       }
 
       else if (png_ptr->screen_gamma != 0)
          /* The converse - assume the file matches the screen, note that this
-          * perhaps undesireable default can (from 1.5.4) be changed by calling
+          * perhaps undesirable default can (from 1.5.4) be changed by calling
           * png_set_alpha_mode (even if the alpha handling mode isn't required
           * or isn't changed from the default.)
           */
-         png_ptr->gamma = png_reciprocal(png_ptr->screen_gamma);
+         png_ptr->colorspace.gamma = png_reciprocal(png_ptr->screen_gamma);
 
       else /* neither are set */
          /* Just in case the following prevents any processing - file and screen
@@ -1294,7 +1340,10 @@ png_init_read_transformations(png_structp png_ptr)
           * third gamma value other than png_set_background with 'UNIQUE', and,
           * prior to 1.5.4
           */
-         png_ptr->screen_gamma = png_ptr->gamma = PNG_FP_1;
+         png_ptr->screen_gamma = png_ptr->colorspace.gamma = PNG_FP_1;
+
+      /* We have a gamma value now. */
+      png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA;
 
       /* Now turn the gamma transformation on or off as appropriate.  Notice
        * that PNG_GAMMA just refers to the file->screen correction.  Alpha
@@ -1304,7 +1353,7 @@ png_init_read_transformations(png_structp png_ptr)
        * the code immediately below if the transform can be handled outside the
        * row loop.
        */
-      if (gamma_correction)
+      if (gamma_correction != 0)
          png_ptr->transformations |= PNG_GAMMA;
 
       else
@@ -1313,7 +1362,7 @@ png_init_read_transformations(png_structp png_ptr)
 #endif
 
    /* Certain transformations have the effect of preventing other
-    * transformations that happen afterward in png_do_read_transformations,
+    * transformations that happen afterward in png_do_read_transformations;
     * resolve the interdependencies here.  From the code of
     * png_do_read_transformations the order is:
     *
@@ -1331,19 +1380,19 @@ png_init_read_transformations(png_structp png_ptr)
     * 12) PNG_EXPAND_16
     * 13) PNG_GRAY_TO_RGB iff PNG_BACKGROUND_IS_GRAY
     * 14) PNG_INVERT_MONO
-    * 15) PNG_SHIFT
-    * 16) PNG_PACK
-    * 17) PNG_BGR
-    * 18) PNG_PACKSWAP
-    * 19) PNG_FILLER (includes PNG_ADD_ALPHA)
-    * 20) PNG_INVERT_ALPHA
+    * 15) PNG_INVERT_ALPHA
+    * 16) PNG_SHIFT
+    * 17) PNG_PACK
+    * 18) PNG_BGR
+    * 19) PNG_PACKSWAP
+    * 20) PNG_FILLER (includes PNG_ADD_ALPHA)
     * 21) PNG_SWAP_ALPHA
     * 22) PNG_SWAP_BYTES
     * 23) PNG_USER_TRANSFORM [must be last]
     */
 #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) &&
-      !(png_ptr->transformations & PNG_COMPOSE))
+   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
+       (png_ptr->transformations & PNG_COMPOSE) == 0)
    {
       /* Stripping the alpha channel happens immediately after the 'expand'
        * transformations, before all other transformation, so it cancels out
@@ -1369,16 +1418,23 @@ png_init_read_transformations(png_structp png_ptr)
    /* If the screen gamma is about 1.0 then the OPTIMIZE_ALPHA and ENCODE_ALPHA
     * settings will have no effect.
     */
-   if (!png_gamma_significant(png_ptr->screen_gamma))
+   if (png_gamma_significant(png_ptr->screen_gamma) == 0)
    {
       png_ptr->transformations &= ~PNG_ENCODE_ALPHA;
       png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA;
    }
 #endif
 
-#if defined(PNG_READ_EXPAND_SUPPORTED) && \
-   defined(PNG_READ_BACKGROUND_SUPPORTED) && \
-   defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+   /* Make sure the coefficients for the rgb to gray conversion are set
+    * appropriately.
+    */
+   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
+      png_colorspace_set_rgb_coefficients(png_ptr);
+#endif
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
    /* Detect gray background and attempt to enable optimization for
     * gray --> RGB case.
     *
@@ -1394,23 +1450,23 @@ png_init_read_transformations(png_structp png_ptr)
     * png_set_background, along with the bit depth, then the code has a record
     * of exactly what color space the background is currently in.
     */
-   if (png_ptr->transformations & PNG_BACKGROUND_EXPAND)
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0)
    {
       /* PNG_BACKGROUND_EXPAND: the background is in the file color space, so if
        * the file was grayscale the background value is gray.
        */
-      if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR))
+      if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
          png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
    }
 
-   else if (png_ptr->transformations & PNG_COMPOSE)
+   else if ((png_ptr->transformations & PNG_COMPOSE) != 0)
    {
       /* PNG_COMPOSE: png_set_background was called with need_expand false,
        * so the color is in the color space of the output or png_set_alpha_mode
        * was called and the color is black.  Ignore RGB_TO_GRAY because that
        * happens before GRAY_TO_RGB.
        */
-      if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+      if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
       {
          if (png_ptr->background.red == png_ptr->background.green &&
              png_ptr->background.red == png_ptr->background.blue)
@@ -1420,7 +1476,8 @@ png_init_read_transformations(png_structp png_ptr)
          }
       }
    }
-#endif /* PNG_READ_GRAY_TO_RGB_SUPPORTED (etc) */
+#endif /* READ_EXPAND && READ_BACKGROUND */
+#endif /* READ_GRAY_TO_RGB */
 
    /* For indexed PNG data (PNG_COLOR_TYPE_PALETTE) many of the transformations
     * can be performed directly on the palette, and some (such as rgb to gray)
@@ -1441,10 +1498,10 @@ png_init_read_transformations(png_structp png_ptr)
 
 #if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
    defined(PNG_READ_EXPAND_16_SUPPORTED)
-   if ((png_ptr->transformations & PNG_EXPAND_16) &&
-      (png_ptr->transformations & PNG_COMPOSE) &&
-      !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
-      png_ptr->bit_depth != 16)
+   if ((png_ptr->transformations & PNG_EXPAND_16) != 0 &&
+       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
+       (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 &&
+       png_ptr->bit_depth != 16)
    {
       /* TODO: fix this.  Because the expand_16 operation is after the compose
        * handling the background color must be 8, not 16, bits deep, but the
@@ -1456,22 +1513,22 @@ png_init_read_transformations(png_structp png_ptr)
        * NOTE: this discards the low 16 bits of the user supplied background
        * color, but until expand_16 works properly there is no choice!
        */
-#     define CHOP(x) (x)=((png_uint_16)(((png_uint_32)(x)*255+32895) >> 16))
+#     define CHOP(x) (x)=((png_uint_16)PNG_DIV257(x))
       CHOP(png_ptr->background.red);
       CHOP(png_ptr->background.green);
       CHOP(png_ptr->background.blue);
       CHOP(png_ptr->background.gray);
 #     undef CHOP
    }
-#endif /* PNG_READ_BACKGROUND_SUPPORTED && PNG_READ_EXPAND_16_SUPPORTED */
+#endif /* READ_BACKGROUND && READ_EXPAND_16 */
 
 #if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
    (defined(PNG_READ_SCALE_16_TO_8_SUPPORTED) || \
    defined(PNG_READ_STRIP_16_TO_8_SUPPORTED))
-   if ((png_ptr->transformations & (PNG_16_TO_8|PNG_SCALE_16_TO_8)) &&
-      (png_ptr->transformations & PNG_COMPOSE) &&
-      !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
-      png_ptr->bit_depth == 16)
+   if ((png_ptr->transformations & (PNG_16_TO_8|PNG_SCALE_16_TO_8)) != 0 &&
+       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
+       (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 &&
+       png_ptr->bit_depth == 16)
    {
       /* On the other hand, if a 16-bit file is to be reduced to 8-bits per
        * component this will also happen after PNG_COMPOSE and so the background
@@ -1514,25 +1571,24 @@ png_init_read_transformations(png_structp png_ptr)
     * file gamma - if it is not 1.0 both RGB_TO_GRAY and COMPOSE need the
     * tables.
     */
-   if ((png_ptr->transformations & PNG_GAMMA)
-      || ((png_ptr->transformations & PNG_RGB_TO_GRAY)
-         && (png_gamma_significant(png_ptr->gamma) ||
-            png_gamma_significant(png_ptr->screen_gamma)))
-      || ((png_ptr->transformations & PNG_COMPOSE)
-         && (png_gamma_significant(png_ptr->gamma)
-            || png_gamma_significant(png_ptr->screen_gamma)
+   if ((png_ptr->transformations & PNG_GAMMA) != 0 ||
+       ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0 &&
+        (png_gamma_significant(png_ptr->colorspace.gamma) != 0 ||
+         png_gamma_significant(png_ptr->screen_gamma) != 0)) ||
+        ((png_ptr->transformations & PNG_COMPOSE) != 0 &&
+         (png_gamma_significant(png_ptr->colorspace.gamma) != 0 ||
+          png_gamma_significant(png_ptr->screen_gamma) != 0
 #  ifdef PNG_READ_BACKGROUND_SUPPORTED
-            || (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_UNIQUE
-               && png_gamma_significant(png_ptr->background_gamma))
+         || (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_UNIQUE &&
+           png_gamma_significant(png_ptr->background_gamma) != 0)
 #  endif
-      )) || ((png_ptr->transformations & PNG_ENCODE_ALPHA)
-         && png_gamma_significant(png_ptr->screen_gamma))
-      )
+        )) || ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 &&
+       png_gamma_significant(png_ptr->screen_gamma) != 0))
    {
       png_build_gamma_table(png_ptr, png_ptr->bit_depth);
 
 #ifdef PNG_READ_BACKGROUND_SUPPORTED
-      if (png_ptr->transformations & PNG_COMPOSE)
+      if ((png_ptr->transformations & PNG_COMPOSE) != 0)
       {
          /* Issue a warning about this combination: because RGB_TO_GRAY is
           * optimized to do the gamma transform if present yet do_background has
@@ -1540,11 +1596,11 @@ png_init_read_transformations(png_structp png_ptr)
           * double-gamma-correction happens.  This is true in all versions of
           * libpng to date.
           */
-         if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
             png_warning(png_ptr,
-               "libpng does not support gamma+background+rgb_to_gray");
+                "libpng does not support gamma+background+rgb_to_gray");
 
-         if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         if ((png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) != 0)
          {
             /* We don't get to here unless there is a tRNS chunk with non-opaque
              * entries - see the checking code at the start of this function.
@@ -1576,15 +1632,15 @@ png_init_read_transformations(png_structp png_ptr)
                      break;
 
                   case PNG_BACKGROUND_GAMMA_FILE:
-                     g = png_reciprocal(png_ptr->gamma);
-                     gs = png_reciprocal2(png_ptr->gamma,
-                        png_ptr->screen_gamma);
+                     g = png_reciprocal(png_ptr->colorspace.gamma);
+                     gs = png_reciprocal2(png_ptr->colorspace.gamma,
+                         png_ptr->screen_gamma);
                      break;
 
                   case PNG_BACKGROUND_GAMMA_UNIQUE:
                      g = png_reciprocal(png_ptr->background_gamma);
                      gs = png_reciprocal2(png_ptr->background_gamma,
-                        png_ptr->screen_gamma);
+                         png_ptr->screen_gamma);
                      break;
                   default:
                      g = PNG_FP_1;    /* back_1 */
@@ -1592,7 +1648,7 @@ png_init_read_transformations(png_structp png_ptr)
                      break;
                }
 
-               if (png_gamma_significant(gs))
+               if (png_gamma_significant(gs) != 0)
                {
                   back.red = png_gamma_8bit_correct(png_ptr->background.red,
                       gs);
@@ -1609,14 +1665,14 @@ png_init_read_transformations(png_structp png_ptr)
                   back.blue  = (png_byte)png_ptr->background.blue;
                }
 
-               if (png_gamma_significant(g))
+               if (png_gamma_significant(g) != 0)
                {
                   back_1.red = png_gamma_8bit_correct(png_ptr->background.red,
-                     g);
+                      g);
                   back_1.green = png_gamma_8bit_correct(
-                     png_ptr->background.green, g);
+                      png_ptr->background.green, g);
                   back_1.blue = png_gamma_8bit_correct(png_ptr->background.blue,
-                     g);
+                      g);
                }
 
                else
@@ -1685,8 +1741,9 @@ png_init_read_transformations(png_structp png_ptr)
                   break;
 
                case PNG_BACKGROUND_GAMMA_FILE:
-                  g = png_reciprocal(png_ptr->gamma);
-                  gs = png_reciprocal2(png_ptr->gamma, png_ptr->screen_gamma);
+                  g = png_reciprocal(png_ptr->colorspace.gamma);
+                  gs = png_reciprocal2(png_ptr->colorspace.gamma,
+                      png_ptr->screen_gamma);
                   break;
 
                case PNG_BACKGROUND_GAMMA_UNIQUE:
@@ -1702,11 +1759,11 @@ png_init_read_transformations(png_structp png_ptr)
             g_sig = png_gamma_significant(g);
             gs_sig = png_gamma_significant(gs);
 
-            if (g_sig)
+            if (g_sig != 0)
                png_ptr->background_1.gray = png_gamma_correct(png_ptr,
                    png_ptr->background.gray, g);
 
-            if (gs_sig)
+            if (gs_sig != 0)
                png_ptr->background.gray = png_gamma_correct(png_ptr,
                    png_ptr->background.gray, gs);
 
@@ -1715,7 +1772,7 @@ png_init_read_transformations(png_structp png_ptr)
                 (png_ptr->background.red != png_ptr->background.gray))
             {
                /* RGB or RGBA with color background */
-               if (g_sig)
+               if (g_sig != 0)
                {
                   png_ptr->background_1.red = png_gamma_correct(png_ptr,
                       png_ptr->background.red, g);
@@ -1727,7 +1784,7 @@ png_init_read_transformations(png_structp png_ptr)
                       png_ptr->background.blue, g);
                }
 
-               if (gs_sig)
+               if (gs_sig != 0)
                {
                   png_ptr->background.red = png_gamma_correct(png_ptr,
                       png_ptr->background.red, gs);
@@ -1757,7 +1814,7 @@ png_init_read_transformations(png_structp png_ptr)
 
       else
       /* Transformation does not include PNG_BACKGROUND */
-#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+#endif /* READ_BACKGROUND */
       if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE
 #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
          /* RGB_TO_GRAY needs to have non-gamma-corrected values! */
@@ -1770,8 +1827,8 @@ png_init_read_transformations(png_structp png_ptr)
          int num_palette = png_ptr->num_palette;
          int i;
 
-         /*NOTE: there are other transformations that should probably be in here
-          * too.
+         /* NOTE: there are other transformations that should probably be in
+          * here too.
           */
          for (i = 0; i < num_palette; i++)
          {
@@ -1787,11 +1844,11 @@ png_init_read_transformations(png_structp png_ptr)
 #ifdef PNG_READ_BACKGROUND_SUPPORTED
    else
 #endif
-#endif /* PNG_READ_GAMMA_SUPPORTED */
+#endif /* READ_GAMMA */
 
 #ifdef PNG_READ_BACKGROUND_SUPPORTED
    /* No GAMMA transformation (see the hanging else 4 lines above) */
-   if ((png_ptr->transformations & PNG_COMPOSE) &&
+   if ((png_ptr->transformations & PNG_COMPOSE) != 0 &&
        (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
    {
       int i;
@@ -1826,11 +1883,11 @@ png_init_read_transformations(png_structp png_ptr)
 
       png_ptr->transformations &= ~PNG_COMPOSE;
    }
-#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+#endif /* READ_BACKGROUND */
 
 #ifdef PNG_READ_SHIFT_SUPPORTED
-   if ((png_ptr->transformations & PNG_SHIFT) &&
-      !(png_ptr->transformations & PNG_EXPAND) &&
+   if ((png_ptr->transformations & PNG_SHIFT) != 0 &&
+       (png_ptr->transformations & PNG_EXPAND) == 0 &&
        (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
    {
       int i;
@@ -1839,37 +1896,40 @@ png_init_read_transformations(png_structp png_ptr)
 
       png_ptr->transformations &= ~PNG_SHIFT;
 
-      /* significant bits can be in the range 1 to 7 for a meaninful result, if
+      /* significant bits can be in the range 1 to 7 for a meaningful result, if
        * the number of significant bits is 0 then no shift is done (this is an
        * error condition which is silently ignored.)
        */
-      if (shift > 0 && shift < 8) for (i=0; i<istop; ++i)
-      {
-         int component = png_ptr->palette[i].red;
+      if (shift > 0 && shift < 8)
+         for (i=0; i<istop; ++i)
+         {
+            int component = png_ptr->palette[i].red;
 
-         component >>= shift;
-         png_ptr->palette[i].red = (png_byte)component;
-      }
+            component >>= shift;
+            png_ptr->palette[i].red = (png_byte)component;
+         }
 
       shift = 8 - png_ptr->sig_bit.green;
-      if (shift > 0 && shift < 8) for (i=0; i<istop; ++i)
-      {
-         int component = png_ptr->palette[i].green;
+      if (shift > 0 && shift < 8)
+         for (i=0; i<istop; ++i)
+         {
+            int component = png_ptr->palette[i].green;
 
-         component >>= shift;
-         png_ptr->palette[i].green = (png_byte)component;
-      }
+            component >>= shift;
+            png_ptr->palette[i].green = (png_byte)component;
+         }
 
       shift = 8 - png_ptr->sig_bit.blue;
-      if (shift > 0 && shift < 8) for (i=0; i<istop; ++i)
-      {
-         int component = png_ptr->palette[i].blue;
+      if (shift > 0 && shift < 8)
+         for (i=0; i<istop; ++i)
+         {
+            int component = png_ptr->palette[i].blue;
 
-         component >>= shift;
-         png_ptr->palette[i].blue = (png_byte)component;
-      }
+            component >>= shift;
+            png_ptr->palette[i].blue = (png_byte)component;
+         }
    }
-#endif  /* PNG_READ_SHIFT_SUPPORTED */
+#endif /* READ_SHIFT */
 }
 
 /* Modify the info structure to reflect the transformations.  The
@@ -1877,12 +1937,12 @@ png_init_read_transformations(png_structp png_ptr)
  * assuming the transformations result in valid PNG data.
  */
 void /* PRIVATE */
-png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
+png_read_transform_info(png_structrp png_ptr, png_inforp info_ptr)
 {
    png_debug(1, "in png_read_transform_info");
 
 #ifdef PNG_READ_EXPAND_SUPPORTED
-   if (png_ptr->transformations & PNG_EXPAND)
+   if ((png_ptr->transformations & PNG_EXPAND) != 0)
    {
       if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
       {
@@ -1898,12 +1958,15 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
 
          info_ptr->bit_depth = 8;
          info_ptr->num_trans = 0;
+
+         if (png_ptr->palette == NULL)
+            png_error (png_ptr, "Palette is NULL in indexed image");
       }
       else
       {
-         if (png_ptr->num_trans)
+         if (png_ptr->num_trans != 0)
          {
-            if (png_ptr->transformations & PNG_EXPAND_tRNS)
+            if ((png_ptr->transformations & PNG_EXPAND_tRNS) != 0)
                info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
          }
          if (info_ptr->bit_depth < 8)
@@ -1919,7 +1982,7 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
    /* The following is almost certainly wrong unless the background value is in
     * the screen space!
     */
-   if (png_ptr->transformations & PNG_COMPOSE)
+   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
       info_ptr->background = png_ptr->background;
 #endif
 
@@ -1928,25 +1991,29 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
     * however it seems that the code in png_init_read_transformations, which has
     * been called before this from png_read_update_info->png_read_start_row
     * sometimes does the gamma transform and cancels the flag.
+    *
+    * TODO: this looks wrong; the info_ptr should end up with a gamma equal to
+    * the screen_gamma value.  The following probably results in weirdness if
+    * the info_ptr is used by the app after the rows have been read.
     */
-   info_ptr->gamma = png_ptr->gamma;
+   info_ptr->colorspace.gamma = png_ptr->colorspace.gamma;
 #endif
 
    if (info_ptr->bit_depth == 16)
    {
 #  ifdef PNG_READ_16BIT_SUPPORTED
 #     ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-         if (png_ptr->transformations & PNG_SCALE_16_TO_8)
+         if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0)
             info_ptr->bit_depth = 8;
 #     endif
 
 #     ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-         if (png_ptr->transformations & PNG_16_TO_8)
+         if ((png_ptr->transformations & PNG_16_TO_8) != 0)
             info_ptr->bit_depth = 8;
 #     endif
 
 #  else
-      /* No 16 bit support: force chopping 16-bit input down to 8, in this case
+      /* No 16-bit support: force chopping 16-bit input down to 8, in this case
        * the app program can chose if both APIs are available by setting the
        * correct scaling to use.
        */
@@ -1967,27 +2034,27 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
             CONFIGURATION ERROR: you must enable at least one 16 to 8 method
 #        endif
 #    endif
-#endif /* !READ_16BIT_SUPPORTED */
+#endif /* !READ_16BIT */
    }
 
 #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
       info_ptr->color_type = (png_byte)(info_ptr->color_type |
          PNG_COLOR_MASK_COLOR);
 #endif
 
 #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
       info_ptr->color_type = (png_byte)(info_ptr->color_type &
          ~PNG_COLOR_MASK_COLOR);
 #endif
 
 #ifdef PNG_READ_QUANTIZE_SUPPORTED
-   if (png_ptr->transformations & PNG_QUANTIZE)
+   if ((png_ptr->transformations & PNG_QUANTIZE) != 0)
    {
       if (((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
           (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)) &&
-          png_ptr->palette_lookup && info_ptr->bit_depth == 8)
+          png_ptr->palette_lookup != 0 && info_ptr->bit_depth == 8)
       {
          info_ptr->color_type = PNG_COLOR_TYPE_PALETTE;
       }
@@ -1995,29 +2062,31 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
 #endif
 
 #ifdef PNG_READ_EXPAND_16_SUPPORTED
-   if (png_ptr->transformations & PNG_EXPAND_16 && info_ptr->bit_depth == 8 &&
-      info_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+   if ((png_ptr->transformations & PNG_EXPAND_16) != 0 &&
+       info_ptr->bit_depth == 8 &&
+       info_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
    {
       info_ptr->bit_depth = 16;
    }
 #endif
 
 #ifdef PNG_READ_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) && (info_ptr->bit_depth < 8))
+   if ((png_ptr->transformations & PNG_PACK) != 0 &&
+       (info_ptr->bit_depth < 8))
       info_ptr->bit_depth = 8;
 #endif
 
    if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
       info_ptr->channels = 1;
 
-   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
+   else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
       info_ptr->channels = 3;
 
    else
       info_ptr->channels = 1;
 
 #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if (png_ptr->transformations & PNG_STRIP_ALPHA)
+   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0)
    {
       info_ptr->color_type = (png_byte)(info_ptr->color_type &
          ~PNG_COLOR_MASK_ALPHA);
@@ -2025,30 +2094,30 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
    }
 #endif
 
-   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
+   if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
       info_ptr->channels++;
 
 #ifdef PNG_READ_FILLER_SUPPORTED
    /* STRIP_ALPHA and FILLER allowed:  MASK_ALPHA bit stripped above */
-   if ((png_ptr->transformations & PNG_FILLER) &&
-       ((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
-       (info_ptr->color_type == PNG_COLOR_TYPE_GRAY)))
+   if ((png_ptr->transformations & PNG_FILLER) != 0 &&
+       (info_ptr->color_type == PNG_COLOR_TYPE_RGB ||
+       info_ptr->color_type == PNG_COLOR_TYPE_GRAY))
    {
       info_ptr->channels++;
       /* If adding a true alpha channel not just filler */
-      if (png_ptr->transformations & PNG_ADD_ALPHA)
+      if ((png_ptr->transformations & PNG_ADD_ALPHA) != 0)
          info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
    }
 #endif
 
 #if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \
 defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
-   if (png_ptr->transformations & PNG_USER_TRANSFORM)
+   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
    {
-      if (info_ptr->bit_depth < png_ptr->user_transform_depth)
+      if (png_ptr->user_transform_depth != 0)
          info_ptr->bit_depth = png_ptr->user_transform_depth;
 
-      if (info_ptr->channels < png_ptr->user_transform_channels)
+      if (png_ptr->user_transform_channels != 0)
          info_ptr->channels = png_ptr->user_transform_channels;
    }
 #endif
@@ -2067,441 +2136,146 @@ defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
    png_ptr->info_rowbytes = info_ptr->rowbytes;
 
 #ifndef PNG_READ_EXPAND_SUPPORTED
-   if (png_ptr)
+   if (png_ptr != NULL)
       return;
 #endif
 }
 
-/* Transform the row.  The order of transformations is significant,
- * and is very touchy.  If you add a transformation, take care to
- * decide how it fits in with the other transformations here.
+#ifdef PNG_READ_PACK_SUPPORTED
+/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel,
+ * without changing the actual values.  Thus, if you had a row with
+ * a bit depth of 1, you would end up with bytes that only contained
+ * the numbers 0 or 1.  If you would rather they contain 0 and 255, use
+ * png_do_shift() after this.
  */
-void /* PRIVATE */
-png_do_read_transformations(png_structp png_ptr, png_row_infop row_info)
+static void
+png_do_unpack(png_row_infop row_info, png_bytep row)
 {
-   png_debug(1, "in png_do_read_transformations");
-
-   if (png_ptr->row_buf == NULL)
-   {
-      /* Prior to 1.5.4 this output row/pass where the NULL pointer is, but this
-       * error is incredibly rare and incredibly easy to debug without this
-       * information.
-       */
-      png_error(png_ptr, "NULL row buffer");
-   }
+   png_debug(1, "in png_do_unpack");
 
-   /* The following is debugging; prior to 1.5.4 the code was never compiled in;
-    * in 1.5.4 PNG_FLAG_DETECT_UNINITIALIZED was added and the macro
-    * PNG_WARN_UNINITIALIZED_ROW removed.  In 1.5 the new flag is set only for
-    * selected new APIs to ensure that there is no API change.
-    */
-   if ((png_ptr->flags & PNG_FLAG_DETECT_UNINITIALIZED) != 0 &&
-      !(png_ptr->flags & PNG_FLAG_ROW_INIT))
+   if (row_info->bit_depth < 8)
    {
-      /* Application has failed to call either png_read_start_image() or
-       * png_read_update_info() after setting transforms that expand pixels.
-       * This check added to libpng-1.2.19 (but not enabled until 1.5.4).
-       */
-      png_error(png_ptr, "Uninitialized row");
-   }
+      png_uint_32 i;
+      png_uint_32 row_width=row_info->width;
 
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   if (png_ptr->transformations & PNG_EXPAND)
-   {
-      if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
+      switch (row_info->bit_depth)
       {
-         png_do_expand_palette(row_info, png_ptr->row_buf + 1,
-             png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
-      }
+         case 1:
+         {
+            png_bytep sp = row + (size_t)((row_width - 1) >> 3);
+            png_bytep dp = row + (size_t)row_width - 1;
+            png_uint_32 shift = 7U - ((row_width + 7U) & 0x07);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x01);
 
-      else
-      {
-         if (png_ptr->num_trans &&
-             (png_ptr->transformations & PNG_EXPAND_tRNS))
-            png_do_expand(row_info, png_ptr->row_buf + 1,
-                &(png_ptr->trans_color));
+               if (shift == 7)
+               {
+                  shift = 0;
+                  sp--;
+               }
 
-         else
-            png_do_expand(row_info, png_ptr->row_buf + 1,
-                NULL);
+               else
+                  shift++;
+
+               dp--;
+            }
+            break;
+         }
+
+         case 2:
+         {
+
+            png_bytep sp = row + (size_t)((row_width - 1) >> 2);
+            png_bytep dp = row + (size_t)row_width - 1;
+            png_uint_32 shift = ((3U - ((row_width + 3U) & 0x03)) << 1);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x03);
+
+               if (shift == 6)
+               {
+                  shift = 0;
+                  sp--;
+               }
+
+               else
+                  shift += 2;
+
+               dp--;
+            }
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp = row + (size_t)((row_width - 1) >> 1);
+            png_bytep dp = row + (size_t)row_width - 1;
+            png_uint_32 shift = ((1U - ((row_width + 1U) & 0x01)) << 2);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x0f);
+
+               if (shift == 4)
+               {
+                  shift = 0;
+                  sp--;
+               }
+
+               else
+                  shift = 4;
+
+               dp--;
+            }
+            break;
+         }
+
+         default:
+            break;
       }
+      row_info->bit_depth = 8;
+      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
+      row_info->rowbytes = row_width * row_info->channels;
    }
+}
 #endif
 
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) &&
-      !(png_ptr->transformations & PNG_COMPOSE) &&
-      (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-      row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-         0 /* at_start == false, because SWAP_ALPHA happens later */);
-#endif
+#ifdef PNG_READ_SHIFT_SUPPORTED
+/* Reverse the effects of png_do_shift.  This routine merely shifts the
+ * pixels back to their significant bits values.  Thus, if you have
+ * a row of bit depth 8, but only 5 are significant, this will shift
+ * the values back to 0 through 31.
+ */
+static void
+png_do_unshift(png_row_infop row_info, png_bytep row,
+    png_const_color_8p sig_bits)
+{
+   int color_type;
 
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+   png_debug(1, "in png_do_unshift");
+
+   /* The palette case has already been handled in the _init routine. */
+   color_type = row_info->color_type;
+
+   if (color_type != PNG_COLOR_TYPE_PALETTE)
    {
-      int rgb_error =
-          png_do_rgb_to_gray(png_ptr, row_info,
-              png_ptr->row_buf + 1);
+      int shift[4];
+      int channels = 0;
+      int bit_depth = row_info->bit_depth;
 
-      if (rgb_error)
+      if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
       {
-         png_ptr->rgb_to_gray_status=1;
-         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
-             PNG_RGB_TO_GRAY_WARN)
-            png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+         shift[channels++] = bit_depth - sig_bits->red;
+         shift[channels++] = bit_depth - sig_bits->green;
+         shift[channels++] = bit_depth - sig_bits->blue;
+      }
 
-         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
-             PNG_RGB_TO_GRAY_ERR)
-            png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+      else
+      {
+         shift[channels++] = bit_depth - sig_bits->gray;
       }
-   }
-#endif
 
-/* From Andreas Dilger e-mail to png-implement, 26 March 1998:
- *
- *   In most cases, the "simple transparency" should be done prior to doing
- *   gray-to-RGB, or you will have to test 3x as many bytes to check if a
- *   pixel is transparent.  You would also need to make sure that the
- *   transparency information is upgraded to RGB.
- *
- *   To summarize, the current flow is:
- *   - Gray + simple transparency -> compare 1 or 2 gray bytes and composite
- *                                   with background "in place" if transparent,
- *                                   convert to RGB if necessary
- *   - Gray + alpha -> composite with gray background and remove alpha bytes,
- *                                   convert to RGB if necessary
- *
- *   To support RGB backgrounds for gray images we need:
- *   - Gray + simple transparency -> convert to RGB + simple transparency,
- *                                   compare 3 or 6 bytes and composite with
- *                                   background "in place" if transparent
- *                                   (3x compare/pixel compared to doing
- *                                   composite with gray bkgrnd)
- *   - Gray + alpha -> convert to RGB + alpha, composite with background and
- *                                   remove alpha bytes (3x float
- *                                   operations/pixel compared with composite
- *                                   on gray background)
- *
- *  Greg's change will do this.  The reason it wasn't done before is for
- *  performance, as this increases the per-pixel operations.  If we would check
- *  in advance if the background was gray or RGB, and position the gray-to-RGB
- *  transform appropriately, then it would save a lot of work/time.
- */
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /* If gray -> RGB, do so now only if background is non-gray; else do later
-    * for performance reasons
-    */
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
-       !(png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
-      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
-#endif
-
-#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\
-   (defined PNG_READ_ALPHA_MODE_SUPPORTED)
-   if (png_ptr->transformations & PNG_COMPOSE)
-      png_do_compose(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   if ((png_ptr->transformations & PNG_GAMMA) &&
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-      /* Because RGB_TO_GRAY does the gamma transform. */
-      !(png_ptr->transformations & PNG_RGB_TO_GRAY) &&
-#endif
-#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\
-   (defined PNG_READ_ALPHA_MODE_SUPPORTED)
-      /* Because PNG_COMPOSE does the gamma transform if there is something to
-       * do (if there is an alpha channel or transparency.)
-       */
-       !((png_ptr->transformations & PNG_COMPOSE) &&
-       ((png_ptr->num_trans != 0) ||
-       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA))) &&
-#endif
-      /* Because png_init_read_transformations transforms the palette, unless
-       * RGB_TO_GRAY will do the transform.
-       */
-       (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE))
-      png_do_gamma(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   if ((png_ptr->transformations & PNG_STRIP_ALPHA) &&
-      (png_ptr->transformations & PNG_COMPOSE) &&
-      (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
-      row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-         0 /* at_start == false, because SWAP_ALPHA happens later */);
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-   if ((png_ptr->transformations & PNG_ENCODE_ALPHA) &&
-      (row_info->color_type & PNG_COLOR_MASK_ALPHA))
-      png_do_encode_alpha(row_info, png_ptr->row_buf + 1, png_ptr);
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-   if (png_ptr->transformations & PNG_SCALE_16_TO_8)
-      png_do_scale_16_to_8(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-   /* There is no harm in doing both of these because only one has any effect,
-    * by putting the 'scale' option first if the app asks for scale (either by
-    * calling the API or in a TRANSFORM flag) this is what happens.
-    */
-   if (png_ptr->transformations & PNG_16_TO_8)
-      png_do_chop(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   if (png_ptr->transformations & PNG_QUANTIZE)
-   {
-      png_do_quantize(row_info, png_ptr->row_buf + 1,
-          png_ptr->palette_lookup, png_ptr->quantize_index);
-
-      if (row_info->rowbytes == 0)
-         png_error(png_ptr, "png_do_quantize returned rowbytes=0");
-   }
-#endif /* PNG_READ_QUANTIZE_SUPPORTED */
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   /* Do the expansion now, after all the arithmetic has been done.  Notice
-    * that previous transformations can handle the PNG_EXPAND_16 flag if this
-    * is efficient (particularly true in the case of gamma correction, where
-    * better accuracy results faster!)
-    */
-   if (png_ptr->transformations & PNG_EXPAND_16)
-      png_do_expand_16(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /*NOTE: moved here in 1.5.4 (from much later in this list.) */
-   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
-       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
-      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_INVERT_SUPPORTED
-   if (png_ptr->transformations & PNG_INVERT_MONO)
-      png_do_invert(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-   if (png_ptr->transformations & PNG_SHIFT)
-      png_do_unshift(row_info, png_ptr->row_buf + 1,
-          &(png_ptr->shift));
-#endif
-
-#ifdef PNG_READ_PACK_SUPPORTED
-   if (png_ptr->transformations & PNG_PACK)
-      png_do_unpack(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Added at libpng-1.5.10 */
-   if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
-      png_do_check_palette_indexes(png_ptr, row_info);
-#endif
-
-#ifdef PNG_READ_BGR_SUPPORTED
-   if (png_ptr->transformations & PNG_BGR)
-      png_do_bgr(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-   if (png_ptr->transformations & PNG_PACKSWAP)
-      png_do_packswap(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-   if (png_ptr->transformations & PNG_FILLER)
-      png_do_read_filler(row_info, png_ptr->row_buf + 1,
-          (png_uint_32)png_ptr->filler, png_ptr->flags);
-#endif
-
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-   if (png_ptr->transformations & PNG_INVERT_ALPHA)
-      png_do_read_invert_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-   if (png_ptr->transformations & PNG_SWAP_ALPHA)
-      png_do_read_swap_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_READ_16BIT_SUPPORTED
-#ifdef PNG_READ_SWAP_SUPPORTED
-   if (png_ptr->transformations & PNG_SWAP_BYTES)
-      png_do_swap(row_info, png_ptr->row_buf + 1);
-#endif
-#endif
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   if (png_ptr->transformations & PNG_USER_TRANSFORM)
-    {
-      if (png_ptr->read_user_transform_fn != NULL)
-         (*(png_ptr->read_user_transform_fn)) /* User read transform function */
-             (png_ptr,     /* png_ptr */
-             row_info,     /* row_info: */
-                /*  png_uint_32 width;       width of row */
-                /*  png_size_t rowbytes;     number of bytes in row */
-                /*  png_byte color_type;     color type of pixels */
-                /*  png_byte bit_depth;      bit depth of samples */
-                /*  png_byte channels;       number of channels (1-4) */
-                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
-             png_ptr->row_buf + 1);    /* start of pixel data for row */
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-      if (png_ptr->user_transform_depth)
-         row_info->bit_depth = png_ptr->user_transform_depth;
-
-      if (png_ptr->user_transform_channels)
-         row_info->channels = png_ptr->user_transform_channels;
-#endif
-      row_info->pixel_depth = (png_byte)(row_info->bit_depth *
-          row_info->channels);
-
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_info->width);
-   }
-#endif
-}
-
-#ifdef PNG_READ_PACK_SUPPORTED
-/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel,
- * without changing the actual values.  Thus, if you had a row with
- * a bit depth of 1, you would end up with bytes that only contained
- * the numbers 0 or 1.  If you would rather they contain 0 and 255, use
- * png_do_shift() after this.
- */
-void /* PRIVATE */
-png_do_unpack(png_row_infop row_info, png_bytep row)
-{
-   png_debug(1, "in png_do_unpack");
-
-   if (row_info->bit_depth < 8)
-   {
-      png_uint_32 i;
-      png_uint_32 row_width=row_info->width;
-
-      switch (row_info->bit_depth)
-      {
-         case 1:
-         {
-            png_bytep sp = row + (png_size_t)((row_width - 1) >> 3);
-            png_bytep dp = row + (png_size_t)row_width - 1;
-            png_uint_32 shift = 7 - (int)((row_width + 7) & 0x07);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x01);
-
-               if (shift == 7)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift++;
-
-               dp--;
-            }
-            break;
-         }
-
-         case 2:
-         {
-
-            png_bytep sp = row + (png_size_t)((row_width - 1) >> 2);
-            png_bytep dp = row + (png_size_t)row_width - 1;
-            png_uint_32 shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x03);
-
-               if (shift == 6)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift += 2;
-
-               dp--;
-            }
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp = row + (png_size_t)((row_width - 1) >> 1);
-            png_bytep dp = row + (png_size_t)row_width - 1;
-            png_uint_32 shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
-            for (i = 0; i < row_width; i++)
-            {
-               *dp = (png_byte)((*sp >> shift) & 0x0f);
-
-               if (shift == 4)
-               {
-                  shift = 0;
-                  sp--;
-               }
-
-               else
-                  shift = 4;
-
-               dp--;
-            }
-            break;
-         }
-
-         default:
-            break;
-      }
-      row_info->bit_depth = 8;
-      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
-      row_info->rowbytes = row_width * row_info->channels;
-   }
-}
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-/* Reverse the effects of png_do_shift.  This routine merely shifts the
- * pixels back to their significant bits values.  Thus, if you have
- * a row of bit depth 8, but only 5 are significant, this will shift
- * the values back to 0 through 31.
- */
-void /* PRIVATE */
-png_do_unshift(png_row_infop row_info, png_bytep row,
-    png_const_color_8p sig_bits)
-{
-   int color_type;
-
-   png_debug(1, "in png_do_unshift");
-
-   /* The palette case has already been handled in the _init routine. */
-   color_type = row_info->color_type;
-
-   if (color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      int shift[4];
-      int channels = 0;
-      int bit_depth = row_info->bit_depth;
-
-      if (color_type & PNG_COLOR_MASK_COLOR)
-      {
-         shift[channels++] = bit_depth - sig_bits->red;
-         shift[channels++] = bit_depth - sig_bits->green;
-         shift[channels++] = bit_depth - sig_bits->blue;
-      }
-
-      else
-      {
-         shift[channels++] = bit_depth - sig_bits->gray;
-      }
-
-      if (color_type & PNG_COLOR_MASK_ALPHA)
+      if ((color_type & PNG_COLOR_MASK_ALPHA) != 0)
       {
          shift[channels++] = bit_depth - sig_bits->alpha;
       }
@@ -2521,7 +2295,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row,
                have_shift = 1;
          }
 
-         if (!have_shift)
+         if (have_shift == 0)
             return;
       }
 
@@ -2599,7 +2373,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row,
                if (++channel >= channels)
                   channel = 0;
                *bp++ = (png_byte)(value >> 8);
-               *bp++ = (png_byte)(value & 0xff);
+               *bp++ = (png_byte)value;
             }
             break;
          }
@@ -2611,7 +2385,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row,
 
 #ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
 /* Scale rows of bit depth 16 down to 8 accurately */
-void /* PRIVATE */
+static void
 png_do_scale_16_to_8(png_row_infop row_info, png_bytep row)
 {
    png_debug(1, "in png_do_scale_16_to_8");
@@ -2624,8 +2398,8 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row)
 
       while (sp < ep)
       {
-         /* The input is an array of 16 bit components, these must be scaled to
-          * 8 bits each.  For a 16 bit value V the required value (from the PNG
+         /* The input is an array of 16-bit components, these must be scaled to
+          * 8 bits each.  For a 16-bit value V the required value (from the PNG
           * specification) is:
           *
           *    (V * 255) / 65535
@@ -2646,7 +2420,7 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row)
           *
           * The approximate differs from the exact answer only when (vlo-vhi) is
           * 128; it then gives a correction of +1 when the exact correction is
-          * 0.  This gives 128 errors.  The exact answer (correct for all 16 bit
+          * 0.  This gives 128 errors.  The exact answer (correct for all 16-bit
           * input values) is:
           *
           *    error = (vlo-vhi+128)*65535 >> 24;
@@ -2669,7 +2443,7 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row)
 #endif
 
 #ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-void /* PRIVATE */
+static void
 /* Simply discard the low byte.  This was the default behavior prior
  * to libpng-1.5.4.
  */
@@ -2697,104 +2471,103 @@ png_do_chop(png_row_infop row_info, png_bytep row)
 #endif
 
 #ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-void /* PRIVATE */
+static void
 png_do_read_swap_alpha(png_row_infop row_info, png_bytep row)
 {
+   png_uint_32 row_width = row_info->width;
+
    png_debug(1, "in png_do_read_swap_alpha");
 
+   if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
    {
-      png_uint_32 row_width = row_info->width;
-      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      /* This converts from RGBA to ARGB */
+      if (row_info->bit_depth == 8)
       {
-         /* This converts from RGBA to ARGB */
-         if (row_info->bit_depth == 8)
-         {
-            png_bytep sp = row + row_info->rowbytes;
-            png_bytep dp = sp;
-            png_byte save;
-            png_uint_32 i;
+         png_bytep sp = row + row_info->rowbytes;
+         png_bytep dp = sp;
+         png_byte save;
+         png_uint_32 i;
 
-            for (i = 0; i < row_width; i++)
-            {
-               save = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = save;
-            }
+         for (i = 0; i < row_width; i++)
+         {
+            save = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = save;
          }
+      }
 
 #ifdef PNG_READ_16BIT_SUPPORTED
-         /* This converts from RRGGBBAA to AARRGGBB */
-         else
-         {
-            png_bytep sp = row + row_info->rowbytes;
-            png_bytep dp = sp;
-            png_byte save[2];
-            png_uint_32 i;
+      /* This converts from RRGGBBAA to AARRGGBB */
+      else
+      {
+         png_bytep sp = row + row_info->rowbytes;
+         png_bytep dp = sp;
+         png_byte save[2];
+         png_uint_32 i;
 
-            for (i = 0; i < row_width; i++)
-            {
-               save[0] = *(--sp);
-               save[1] = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = save[0];
-               *(--dp) = save[1];
-            }
+         for (i = 0; i < row_width; i++)
+         {
+            save[0] = *(--sp);
+            save[1] = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = save[0];
+            *(--dp) = save[1];
          }
-#endif
       }
+#endif
+   }
 
-      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+   {
+      /* This converts from GA to AG */
+      if (row_info->bit_depth == 8)
       {
-         /* This converts from GA to AG */
-         if (row_info->bit_depth == 8)
-         {
-            png_bytep sp = row + row_info->rowbytes;
-            png_bytep dp = sp;
-            png_byte save;
-            png_uint_32 i;
+         png_bytep sp = row + row_info->rowbytes;
+         png_bytep dp = sp;
+         png_byte save;
+         png_uint_32 i;
 
-            for (i = 0; i < row_width; i++)
-            {
-               save = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = save;
-            }
+         for (i = 0; i < row_width; i++)
+         {
+            save = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = save;
          }
+      }
 
 #ifdef PNG_READ_16BIT_SUPPORTED
-         /* This converts from GGAA to AAGG */
-         else
-         {
-            png_bytep sp = row + row_info->rowbytes;
-            png_bytep dp = sp;
-            png_byte save[2];
-            png_uint_32 i;
+      /* This converts from GGAA to AAGG */
+      else
+      {
+         png_bytep sp = row + row_info->rowbytes;
+         png_bytep dp = sp;
+         png_byte save[2];
+         png_uint_32 i;
 
-            for (i = 0; i < row_width; i++)
-            {
-               save[0] = *(--sp);
-               save[1] = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = *(--sp);
-               *(--dp) = save[0];
-               *(--dp) = save[1];
-            }
+         for (i = 0; i < row_width; i++)
+         {
+            save[0] = *(--sp);
+            save[1] = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = *(--sp);
+            *(--dp) = save[0];
+            *(--dp) = save[1];
          }
-#endif
       }
+#endif
    }
 }
 #endif
 
 #ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-void /* PRIVATE */
+static void
 png_do_read_invert_alpha(png_row_infop row_info, png_bytep row)
 {
    png_uint_32 row_width;
@@ -2896,7 +2669,7 @@ png_do_read_invert_alpha(png_row_infop row_info, png_bytep row)
 
 #ifdef PNG_READ_FILLER_SUPPORTED
 /* Add filler channel if we have RGB color */
-void /* PRIVATE */
+static void
 png_do_read_filler(png_row_infop row_info, png_bytep row,
     png_uint_32 filler, png_uint_32 flags)
 {
@@ -2904,9 +2677,9 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
    png_uint_32 row_width = row_info->width;
 
 #ifdef PNG_READ_16BIT_SUPPORTED
-   png_byte hi_filler = (png_byte)((filler>>8) & 0xff);
+   png_byte hi_filler = (png_byte)(filler>>8);
 #endif
-   png_byte lo_filler = (png_byte)(filler & 0xff);
+   png_byte lo_filler = (png_byte)filler;
 
    png_debug(1, "in png_do_read_filler");
 
@@ -2915,11 +2688,11 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
    {
       if (row_info->bit_depth == 8)
       {
-         if (flags & PNG_FLAG_FILLER_AFTER)
+         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
          {
             /* This changes the data from G to GX */
-            png_bytep sp = row + (png_size_t)row_width;
-            png_bytep dp =  sp + (png_size_t)row_width;
+            png_bytep sp = row + (size_t)row_width;
+            png_bytep dp =  sp + (size_t)row_width;
             for (i = 1; i < row_width; i++)
             {
                *(--dp) = lo_filler;
@@ -2934,8 +2707,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
          else
          {
             /* This changes the data from G to XG */
-            png_bytep sp = row + (png_size_t)row_width;
-            png_bytep dp = sp  + (png_size_t)row_width;
+            png_bytep sp = row + (size_t)row_width;
+            png_bytep dp = sp  + (size_t)row_width;
             for (i = 0; i < row_width; i++)
             {
                *(--dp) = *(--sp);
@@ -2950,20 +2723,20 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
 #ifdef PNG_READ_16BIT_SUPPORTED
       else if (row_info->bit_depth == 16)
       {
-         if (flags & PNG_FLAG_FILLER_AFTER)
+         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
          {
             /* This changes the data from GG to GGXX */
-            png_bytep sp = row + (png_size_t)row_width * 2;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width * 2;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 1; i < row_width; i++)
             {
-               *(--dp) = hi_filler;
                *(--dp) = lo_filler;
+               *(--dp) = hi_filler;
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
             }
-            *(--dp) = hi_filler;
             *(--dp) = lo_filler;
+            *(--dp) = hi_filler;
             row_info->channels = 2;
             row_info->pixel_depth = 32;
             row_info->rowbytes = row_width * 4;
@@ -2972,14 +2745,14 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
          else
          {
             /* This changes the data from GG to XXGG */
-            png_bytep sp = row + (png_size_t)row_width * 2;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width * 2;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 0; i < row_width; i++)
             {
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
-               *(--dp) = hi_filler;
                *(--dp) = lo_filler;
+               *(--dp) = hi_filler;
             }
             row_info->channels = 2;
             row_info->pixel_depth = 32;
@@ -2992,11 +2765,11 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
    {
       if (row_info->bit_depth == 8)
       {
-         if (flags & PNG_FLAG_FILLER_AFTER)
+         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
          {
             /* This changes the data from RGB to RGBX */
-            png_bytep sp = row + (png_size_t)row_width * 3;
-            png_bytep dp = sp  + (png_size_t)row_width;
+            png_bytep sp = row + (size_t)row_width * 3;
+            png_bytep dp = sp  + (size_t)row_width;
             for (i = 1; i < row_width; i++)
             {
                *(--dp) = lo_filler;
@@ -3013,8 +2786,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
          else
          {
             /* This changes the data from RGB to XRGB */
-            png_bytep sp = row + (png_size_t)row_width * 3;
-            png_bytep dp = sp + (png_size_t)row_width;
+            png_bytep sp = row + (size_t)row_width * 3;
+            png_bytep dp = sp + (size_t)row_width;
             for (i = 0; i < row_width; i++)
             {
                *(--dp) = *(--sp);
@@ -3031,15 +2804,15 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
 #ifdef PNG_READ_16BIT_SUPPORTED
       else if (row_info->bit_depth == 16)
       {
-         if (flags & PNG_FLAG_FILLER_AFTER)
+         if ((flags & PNG_FLAG_FILLER_AFTER) != 0)
          {
             /* This changes the data from RRGGBB to RRGGBBXX */
-            png_bytep sp = row + (png_size_t)row_width * 6;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width * 6;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 1; i < row_width; i++)
             {
-               *(--dp) = hi_filler;
                *(--dp) = lo_filler;
+               *(--dp) = hi_filler;
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
@@ -3047,8 +2820,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
             }
-            *(--dp) = hi_filler;
             *(--dp) = lo_filler;
+            *(--dp) = hi_filler;
             row_info->channels = 4;
             row_info->pixel_depth = 64;
             row_info->rowbytes = row_width * 8;
@@ -3057,8 +2830,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
          else
          {
             /* This changes the data from RRGGBB to XXRRGGBB */
-            png_bytep sp = row + (png_size_t)row_width * 6;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width * 6;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 0; i < row_width; i++)
             {
                *(--dp) = *(--sp);
@@ -3067,8 +2840,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
                *(--dp) = *(--sp);
-               *(--dp) = hi_filler;
                *(--dp) = lo_filler;
+               *(--dp) = hi_filler;
             }
 
             row_info->channels = 4;
@@ -3083,7 +2856,7 @@ png_do_read_filler(png_row_infop row_info, png_bytep row,
 
 #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
 /* Expand grayscale files to RGB, with or without alpha */
-void /* PRIVATE */
+static void
 png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
 {
    png_uint_32 i;
@@ -3092,15 +2865,15 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
    png_debug(1, "in png_do_gray_to_rgb");
 
    if (row_info->bit_depth >= 8 &&
-       !(row_info->color_type & PNG_COLOR_MASK_COLOR))
+       (row_info->color_type & PNG_COLOR_MASK_COLOR) == 0)
    {
       if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
       {
          if (row_info->bit_depth == 8)
          {
             /* This changes G to RGB */
-            png_bytep sp = row + (png_size_t)row_width - 1;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width - 1;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 0; i < row_width; i++)
             {
                *(dp--) = *sp;
@@ -3112,8 +2885,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
          else
          {
             /* This changes GG to RRGGBB */
-            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
-            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            png_bytep sp = row + (size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (size_t)row_width * 4;
             for (i = 0; i < row_width; i++)
             {
                *(dp--) = *sp;
@@ -3131,8 +2904,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
          if (row_info->bit_depth == 8)
          {
             /* This changes GA to RGBA */
-            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
-            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            png_bytep sp = row + (size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (size_t)row_width * 2;
             for (i = 0; i < row_width; i++)
             {
                *(dp--) = *(sp--);
@@ -3145,8 +2918,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
          else
          {
             /* This changes GGAA to RRGGBBAA */
-            png_bytep sp = row + (png_size_t)row_width * 4 - 1;
-            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            png_bytep sp = row + (size_t)row_width * 4 - 1;
+            png_bytep dp = sp  + (size_t)row_width * 4;
             for (i = 0; i < row_width; i++)
             {
                *(dp--) = *(sp--);
@@ -3174,7 +2947,7 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
  * using the equation given in Poynton's ColorFAQ of 1998-01-04 at
  * <http://www.inforamp.net/~poynton/>  (THIS LINK IS DEAD June 2008 but
  * versions dated 1998 through November 2002 have been archived at
- * http://web.archive.org/web/20000816232553/http://www.inforamp.net/
+ * https://web.archive.org/web/20000816232553/www.inforamp.net/
  * ~poynton/notes/colour_and_gamma/ColorFAQ.txt )
  * Charles Poynton poynton at poynton.com
  *
@@ -3217,32 +2990,30 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
  *  values this results in an implicit assumption that the original PNG RGB
  *  values were linear.
  *
- *  Other integer coefficents can be used via png_set_rgb_to_gray().  Because
+ *  Other integer coefficients can be used via png_set_rgb_to_gray().  Because
  *  the API takes just red and green coefficients the blue coefficient is
  *  calculated to make the sum 32768.  This will result in different rounding
  *  to that used above.
  */
-int /* PRIVATE */
-png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
-
+static int
+png_do_rgb_to_gray(png_structrp png_ptr, png_row_infop row_info, png_bytep row)
 {
    int rgb_error = 0;
 
    png_debug(1, "in png_do_rgb_to_gray");
 
-   if (!(row_info->color_type & PNG_COLOR_MASK_PALETTE) &&
-       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   if ((row_info->color_type & PNG_COLOR_MASK_PALETTE) == 0 &&
+       (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
    {
-      PNG_CONST png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff;
-      PNG_CONST png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff;
-      PNG_CONST png_uint_32 bc = 32768 - rc - gc;
-      PNG_CONST png_uint_32 row_width = row_info->width;
-      PNG_CONST int have_alpha =
-         (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0;
+      png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff;
+      png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff;
+      png_uint_32 bc = 32768 - rc - gc;
+      png_uint_32 row_width = row_info->width;
+      int have_alpha = (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0;
 
       if (row_info->bit_depth == 8)
       {
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+#ifdef PNG_READ_GAMMA_SUPPORTED
          /* Notice that gamma to/from 1 are not necessarily inverses (if
           * there is an overall gamma correction).  Prior to 1.5.5 this code
           * checked the linearized values for equality; this doesn't match
@@ -3282,7 +3053,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
                   *(dp++) = red;
                }
 
-               if (have_alpha)
+               if (have_alpha != 0)
                   *(dp++) = *(sp++);
             }
          }
@@ -3302,7 +3073,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
                if (red != green || red != blue)
                {
                   rgb_error |= 1;
-                  /*NOTE: this is the historical approach which simply
+                  /* NOTE: this is the historical approach which simply
                    * truncates the results.
                    */
                   *(dp++) = (png_byte)((rc*red + gc*green + bc*blue)>>15);
@@ -3311,7 +3082,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
                else
                   *(dp++) = red;
 
-               if (have_alpha)
+               if (have_alpha != 0)
                   *(dp++) = *(sp++);
             }
          }
@@ -3319,7 +3090,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
 
       else /* RGB bit_depth == 16 */
       {
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+#ifdef PNG_READ_GAMMA_SUPPORTED
          if (png_ptr->gamma_16_to_1 != NULL && png_ptr->gamma_16_from_1 != NULL)
          {
             png_bytep sp = row;
@@ -3329,16 +3100,17 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
             for (i = 0; i < row_width; i++)
             {
                png_uint_16 red, green, blue, w;
+               png_byte hi,lo;
 
-               red   = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
-               green = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
-               blue  = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
+               hi=*(sp)++; lo=*(sp)++; red   = (png_uint_16)((hi << 8) | (lo));
+               hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo));
+               hi=*(sp)++; lo=*(sp)++; blue  = (png_uint_16)((hi << 8) | (lo));
 
                if (red == green && red == blue)
                {
                   if (png_ptr->gamma_16_table != NULL)
-                     w = png_ptr->gamma_16_table[(red&0xff)
-                         >> png_ptr->gamma_shift][red>>8];
+                     w = png_ptr->gamma_16_table[(red & 0xff)
+                         >> png_ptr->gamma_shift][red >> 8];
 
                   else
                      w = red;
@@ -3346,16 +3118,16 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
 
                else
                {
-                  png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red&0xff)
+                  png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red & 0xff)
                       >> png_ptr->gamma_shift][red>>8];
                   png_uint_16 green_1 =
-                      png_ptr->gamma_16_to_1[(green&0xff) >>
+                      png_ptr->gamma_16_to_1[(green & 0xff) >>
                       png_ptr->gamma_shift][green>>8];
-                  png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff)
+                  png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue & 0xff)
                       >> png_ptr->gamma_shift][blue>>8];
                   png_uint_16 gray16  = (png_uint_16)((rc*red_1 + gc*green_1
                       + bc*blue_1 + 16384)>>15);
-                  w = png_ptr->gamma_16_from_1[(gray16&0xff) >>
+                  w = png_ptr->gamma_16_from_1[(gray16 & 0xff) >>
                       png_ptr->gamma_shift][gray16 >> 8];
                   rgb_error |= 1;
                }
@@ -3363,7 +3135,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
                *(dp++) = (png_byte)((w>>8) & 0xff);
                *(dp++) = (png_byte)(w & 0xff);
 
-               if (have_alpha)
+               if (have_alpha != 0)
                {
                   *(dp++) = *(sp++);
                   *(dp++) = *(sp++);
@@ -3380,24 +3152,25 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
             for (i = 0; i < row_width; i++)
             {
                png_uint_16 red, green, blue, gray16;
+               png_byte hi,lo;
 
-               red   = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
-               green = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
-               blue  = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2;
+               hi=*(sp)++; lo=*(sp)++; red   = (png_uint_16)((hi << 8) | (lo));
+               hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo));
+               hi=*(sp)++; lo=*(sp)++; blue  = (png_uint_16)((hi << 8) | (lo));
 
                if (red != green || red != blue)
                   rgb_error |= 1;
 
-               /* From 1.5.5 in the 16 bit case do the accurate conversion even
+               /* From 1.5.5 in the 16-bit case do the accurate conversion even
                 * in the 'fast' case - this is because this is where the code
-                * ends up when handling linear 16 bit data.
+                * ends up when handling linear 16-bit data.
                 */
                gray16  = (png_uint_16)((rc*red + gc*green + bc*blue + 16384) >>
                   15);
-               *(dp++) = (png_byte)((gray16>>8) & 0xff);
+               *(dp++) = (png_byte)((gray16 >> 8) & 0xff);
                *(dp++) = (png_byte)(gray16 & 0xff);
 
-               if (have_alpha)
+               if (have_alpha != 0)
                {
                   *(dp++) = *(sp++);
                   *(dp++) = *(sp++);
@@ -3416,74 +3189,15 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
    return rgb_error;
 }
 #endif
-#endif /* PNG_READ_TRANSFORMS_SUPPORTED */
-
-#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-/* Build a grayscale palette.  Palette is assumed to be 1 << bit_depth
- * large of png_color.  This lets grayscale images be treated as
- * paletted.  Most useful for gamma correction and simplification
- * of code.  This API is not used internally.
- */
-void PNGAPI
-png_build_grayscale_palette(int bit_depth, png_colorp palette)
-{
-   int num_palette;
-   int color_inc;
-   int i;
-   int v;
 
-   png_debug(1, "in png_do_build_grayscale_palette");
-
-   if (palette == NULL)
-      return;
-
-   switch (bit_depth)
-   {
-      case 1:
-         num_palette = 2;
-         color_inc = 0xff;
-         break;
-
-      case 2:
-         num_palette = 4;
-         color_inc = 0x55;
-         break;
-
-      case 4:
-         num_palette = 16;
-         color_inc = 0x11;
-         break;
-
-      case 8:
-         num_palette = 256;
-         color_inc = 1;
-         break;
-
-      default:
-         num_palette = 0;
-         color_inc = 0;
-         break;
-   }
-
-   for (i = 0, v = 0; i < num_palette; i++, v += color_inc)
-   {
-      palette[i].red = (png_byte)v;
-      palette[i].green = (png_byte)v;
-      palette[i].blue = (png_byte)v;
-   }
-}
-#endif
-
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\
-   (defined PNG_READ_ALPHA_MODE_SUPPORTED)
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
 /* Replace any alpha or transparency with the supplied background color.
  * "background" is already in the screen gamma, while "background_1" is
  * at a gamma of 1.0.  Paletted files have already been taken care of.
  */
-void /* PRIVATE */
-png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr)
+static void
+png_do_compose(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
 {
 #ifdef PNG_READ_GAMMA_SUPPORTED
    png_const_bytep gamma_table = png_ptr->gamma_table;
@@ -3493,699 +3207,731 @@ png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr)
    png_const_uint_16pp gamma_16_from_1 = png_ptr->gamma_16_from_1;
    png_const_uint_16pp gamma_16_to_1 = png_ptr->gamma_16_to_1;
    int gamma_shift = png_ptr->gamma_shift;
+   int optimize = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0;
 #endif
 
    png_bytep sp;
    png_uint_32 i;
    png_uint_32 row_width = row_info->width;
-   int optimize = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0;
    int shift;
 
    png_debug(1, "in png_do_compose");
 
+   switch (row_info->color_type)
    {
-      switch (row_info->color_type)
+      case PNG_COLOR_TYPE_GRAY:
       {
-         case PNG_COLOR_TYPE_GRAY:
+         switch (row_info->bit_depth)
          {
-            switch (row_info->bit_depth)
+            case 1:
             {
-               case 1:
-               {
-                  sp = row;
-                  shift = 7;
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((png_uint_16)((*sp >> shift) & 0x01)
-                        == png_ptr->trans_color.gray)
-                     {
-                        *sp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
-                        *sp |= (png_byte)(png_ptr->background.gray << shift);
-                     }
-
-                     if (!shift)
-                     {
-                        shift = 7;
-                        sp++;
-                     }
-
-                     else
-                        shift--;
-                  }
-                  break;
-               }
-
-               case 2:
+               sp = row;
+               shift = 7;
+               for (i = 0; i < row_width; i++)
                {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-                  if (gamma_table != NULL)
+                  if ((png_uint_16)((*sp >> shift) & 0x01)
+                     == png_ptr->trans_color.gray)
                   {
-                     sp = row;
-                     shift = 6;
-                     for (i = 0; i < row_width; i++)
-                     {
-                        if ((png_uint_16)((*sp >> shift) & 0x03)
-                            == png_ptr->trans_color.gray)
-                        {
-                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
-                           *sp |= (png_byte)(png_ptr->background.gray << shift);
-                        }
-
-                        else
-                        {
-                           png_byte p = (png_byte)((*sp >> shift) & 0x03);
-                           png_byte g = (png_byte)((gamma_table [p | (p << 2) |
-                               (p << 4) | (p << 6)] >> 6) & 0x03);
-                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
-                           *sp |= (png_byte)(g << shift);
-                        }
-
-                        if (!shift)
-                        {
-                           shift = 6;
-                           sp++;
-                        }
-
-                        else
-                           shift -= 2;
-                     }
+                     unsigned int tmp = *sp & (0x7f7f >> (7 - shift));
+                     tmp |=
+                         (unsigned int)(png_ptr->background.gray << shift);
+                     *sp = (png_byte)(tmp & 0xff);
                   }
 
-                  else
-#endif
-                  {
-                     sp = row;
-                     shift = 6;
-                     for (i = 0; i < row_width; i++)
-                     {
-                        if ((png_uint_16)((*sp >> shift) & 0x03)
-                            == png_ptr->trans_color.gray)
-                        {
-                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
-                           *sp |= (png_byte)(png_ptr->background.gray << shift);
-                        }
-
-                        if (!shift)
-                        {
-                           shift = 6;
-                           sp++;
-                        }
-
-                        else
-                           shift -= 2;
-                     }
+                  if (shift == 0)
+                  {
+                     shift = 7;
+                     sp++;
                   }
-                  break;
+
+                  else
+                     shift--;
                }
+               break;
+            }
 
-               case 4:
-               {
+            case 2:
+            {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-                  if (gamma_table != NULL)
+               if (gamma_table != NULL)
+               {
+                  sp = row;
+                  shift = 6;
+                  for (i = 0; i < row_width; i++)
                   {
-                     sp = row;
-                     shift = 4;
-                     for (i = 0; i < row_width; i++)
+                     if ((png_uint_16)((*sp >> shift) & 0x03)
+                         == png_ptr->trans_color.gray)
                      {
-                        if ((png_uint_16)((*sp >> shift) & 0x0f)
-                            == png_ptr->trans_color.gray)
-                        {
-                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
-                           *sp |= (png_byte)(png_ptr->background.gray << shift);
-                        }
-
-                        else
-                        {
-                           png_byte p = (png_byte)((*sp >> shift) & 0x0f);
-                           png_byte g = (png_byte)((gamma_table[p |
-                               (p << 4)] >> 4) & 0x0f);
-                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
-                           *sp |= (png_byte)(g << shift);
-                        }
-
-                        if (!shift)
-                        {
-                           shift = 4;
-                           sp++;
-                        }
-
-                        else
-                           shift -= 4;
+                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
+                        tmp |=
+                           (unsigned int)png_ptr->background.gray << shift;
+                        *sp = (png_byte)(tmp & 0xff);
                      }
-                  }
 
-                  else
-#endif
-                  {
-                     sp = row;
-                     shift = 4;
-                     for (i = 0; i < row_width; i++)
+                     else
                      {
-                        if ((png_uint_16)((*sp >> shift) & 0x0f)
-                            == png_ptr->trans_color.gray)
-                        {
-                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
-                           *sp |= (png_byte)(png_ptr->background.gray << shift);
-                        }
-
-                        if (!shift)
-                        {
-                           shift = 4;
-                           sp++;
-                        }
+                        unsigned int p = (*sp >> shift) & 0x03;
+                        unsigned int g = (gamma_table [p | (p << 2) |
+                            (p << 4) | (p << 6)] >> 6) & 0x03;
+                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
+                        tmp |= (unsigned int)(g << shift);
+                        *sp = (png_byte)(tmp & 0xff);
+                     }
 
-                        else
-                           shift -= 4;
+                     if (shift == 0)
+                     {
+                        shift = 6;
+                        sp++;
                      }
+
+                     else
+                        shift -= 2;
                   }
-                  break;
                }
 
-               case 8:
+               else
+#endif
                {
-#ifdef PNG_READ_GAMMA_SUPPORTED
-                  if (gamma_table != NULL)
+                  sp = row;
+                  shift = 6;
+                  for (i = 0; i < row_width; i++)
                   {
-                     sp = row;
-                     for (i = 0; i < row_width; i++, sp++)
+                     if ((png_uint_16)((*sp >> shift) & 0x03)
+                         == png_ptr->trans_color.gray)
                      {
-                        if (*sp == png_ptr->trans_color.gray)
-                           *sp = (png_byte)png_ptr->background.gray;
-
-                        else
-                           *sp = gamma_table[*sp];
+                        unsigned int tmp = *sp & (0x3f3f >> (6 - shift));
+                        tmp |=
+                            (unsigned int)png_ptr->background.gray << shift;
+                        *sp = (png_byte)(tmp & 0xff);
                      }
-                  }
-                  else
-#endif
-                  {
-                     sp = row;
-                     for (i = 0; i < row_width; i++, sp++)
+
+                     if (shift == 0)
                      {
-                        if (*sp == png_ptr->trans_color.gray)
-                           *sp = (png_byte)png_ptr->background.gray;
+                        shift = 6;
+                        sp++;
                      }
+
+                     else
+                        shift -= 2;
                   }
-                  break;
                }
+               break;
+            }
 
-               case 16:
-               {
+            case 4:
+            {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-                  if (gamma_16 != NULL)
+               if (gamma_table != NULL)
+               {
+                  sp = row;
+                  shift = 4;
+                  for (i = 0; i < row_width; i++)
                   {
-                     sp = row;
-                     for (i = 0; i < row_width; i++, sp += 2)
+                     if ((png_uint_16)((*sp >> shift) & 0x0f)
+                         == png_ptr->trans_color.gray)
                      {
-                        png_uint_16 v;
-
-                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
+                        tmp |=
+                           (unsigned int)(png_ptr->background.gray << shift);
+                        *sp = (png_byte)(tmp & 0xff);
+                     }
 
-                        if (v == png_ptr->trans_color.gray)
-                        {
-                           /* Background is already in screen gamma */
-                           *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff);
-                           *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                        }
+                     else
+                     {
+                        unsigned int p = (*sp >> shift) & 0x0f;
+                        unsigned int g = (gamma_table[p | (p << 4)] >> 4) &
+                           0x0f;
+                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
+                        tmp |= (unsigned int)(g << shift);
+                        *sp = (png_byte)(tmp & 0xff);
+                     }
 
-                        else
-                        {
-                           v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                           *sp = (png_byte)((v >> 8) & 0xff);
-                           *(sp + 1) = (png_byte)(v & 0xff);
-                        }
+                     if (shift == 0)
+                     {
+                        shift = 4;
+                        sp++;
                      }
+
+                     else
+                        shift -= 4;
                   }
-                  else
+               }
+
+               else
 #endif
+               {
+                  sp = row;
+                  shift = 4;
+                  for (i = 0; i < row_width; i++)
                   {
-                     sp = row;
-                     for (i = 0; i < row_width; i++, sp += 2)
+                     if ((png_uint_16)((*sp >> shift) & 0x0f)
+                         == png_ptr->trans_color.gray)
                      {
-                        png_uint_16 v;
-
-                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        unsigned int tmp = *sp & (0x0f0f >> (4 - shift));
+                        tmp |=
+                           (unsigned int)(png_ptr->background.gray << shift);
+                        *sp = (png_byte)(tmp & 0xff);
+                     }
 
-                        if (v == png_ptr->trans_color.gray)
-                        {
-                           *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff);
-                           *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                        }
+                     if (shift == 0)
+                     {
+                        shift = 4;
+                        sp++;
                      }
+
+                     else
+                        shift -= 4;
                   }
-                  break;
                }
-
-               default:
-                  break;
+               break;
             }
-            break;
-         }
 
-         case PNG_COLOR_TYPE_RGB:
-         {
-            if (row_info->bit_depth == 8)
+            case 8:
             {
 #ifdef PNG_READ_GAMMA_SUPPORTED
                if (gamma_table != NULL)
                {
                   sp = row;
-                  for (i = 0; i < row_width; i++, sp += 3)
+                  for (i = 0; i < row_width; i++, sp++)
                   {
-                     if (*sp == png_ptr->trans_color.red &&
-                         *(sp + 1) == png_ptr->trans_color.green &&
-                         *(sp + 2) == png_ptr->trans_color.blue)
-                     {
-                        *sp = (png_byte)png_ptr->background.red;
-                        *(sp + 1) = (png_byte)png_ptr->background.green;
-                        *(sp + 2) = (png_byte)png_ptr->background.blue;
-                     }
+                     if (*sp == png_ptr->trans_color.gray)
+                        *sp = (png_byte)png_ptr->background.gray;
 
                      else
-                     {
                         *sp = gamma_table[*sp];
-                        *(sp + 1) = gamma_table[*(sp + 1)];
-                        *(sp + 2) = gamma_table[*(sp + 2)];
-                     }
                   }
                }
                else
 #endif
                {
                   sp = row;
-                  for (i = 0; i < row_width; i++, sp += 3)
+                  for (i = 0; i < row_width; i++, sp++)
                   {
-                     if (*sp == png_ptr->trans_color.red &&
-                         *(sp + 1) == png_ptr->trans_color.green &&
-                         *(sp + 2) == png_ptr->trans_color.blue)
-                     {
-                        *sp = (png_byte)png_ptr->background.red;
-                        *(sp + 1) = (png_byte)png_ptr->background.green;
-                        *(sp + 2) = (png_byte)png_ptr->background.blue;
-                     }
+                     if (*sp == png_ptr->trans_color.gray)
+                        *sp = (png_byte)png_ptr->background.gray;
                   }
                }
+               break;
             }
-            else /* if (row_info->bit_depth == 16) */
+
+            case 16:
             {
 #ifdef PNG_READ_GAMMA_SUPPORTED
                if (gamma_16 != NULL)
                {
                   sp = row;
-                  for (i = 0; i < row_width; i++, sp += 6)
+                  for (i = 0; i < row_width; i++, sp += 2)
                   {
-                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                     png_uint_16 v;
 
-                     png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                         + *(sp + 3));
-
-                     png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                         + *(sp + 5));
+                     v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
 
-                     if (r == png_ptr->trans_color.red &&
-                         g == png_ptr->trans_color.green &&
-                         b == png_ptr->trans_color.blue)
+                     if (v == png_ptr->trans_color.gray)
                      {
                         /* Background is already in screen gamma */
-                        *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                        *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff);
-                        *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                        *sp = (png_byte)((png_ptr->background.gray >> 8)
+                             & 0xff);
+                        *(sp + 1) = (png_byte)(png_ptr->background.gray
+                             & 0xff);
                      }
 
                      else
                      {
-                        png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
                         *sp = (png_byte)((v >> 8) & 0xff);
                         *(sp + 1) = (png_byte)(v & 0xff);
-
-                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                        *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(v & 0xff);
-
-                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                        *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(v & 0xff);
                      }
                   }
                }
-
                else
 #endif
                {
                   sp = row;
-                  for (i = 0; i < row_width; i++, sp += 6)
+                  for (i = 0; i < row_width; i++, sp += 2)
                   {
-                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-
-                     png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                         + *(sp + 3));
+                     png_uint_16 v;
 
-                     png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                         + *(sp + 5));
+                     v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
 
-                     if (r == png_ptr->trans_color.red &&
-                         g == png_ptr->trans_color.green &&
-                         b == png_ptr->trans_color.blue)
+                     if (v == png_ptr->trans_color.gray)
                      {
-                        *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                        *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff);
-                        *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                        *sp = (png_byte)((png_ptr->background.gray >> 8)
+                             & 0xff);
+                        *(sp + 1) = (png_byte)(png_ptr->background.gray
+                             & 0xff);
                      }
                   }
                }
+               break;
             }
-            break;
+
+            default:
+               break;
          }
+         break;
+      }
 
-         case PNG_COLOR_TYPE_GRAY_ALPHA:
+      case PNG_COLOR_TYPE_RGB:
+      {
+         if (row_info->bit_depth == 8)
          {
-            if (row_info->bit_depth == 8)
-            {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
-                   gamma_table != NULL)
+            if (gamma_table != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 3)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 2)
+                  if (*sp == png_ptr->trans_color.red &&
+                      *(sp + 1) == png_ptr->trans_color.green &&
+                      *(sp + 2) == png_ptr->trans_color.blue)
                   {
-                     png_uint_16 a = *(sp + 1);
+                     *sp = (png_byte)png_ptr->background.red;
+                     *(sp + 1) = (png_byte)png_ptr->background.green;
+                     *(sp + 2) = (png_byte)png_ptr->background.blue;
+                  }
 
-                     if (a == 0xff)
-                        *sp = gamma_table[*sp];
+                  else
+                  {
+                     *sp = gamma_table[*sp];
+                     *(sp + 1) = gamma_table[*(sp + 1)];
+                     *(sp + 2) = gamma_table[*(sp + 2)];
+                  }
+               }
+            }
+            else
+#endif
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 3)
+               {
+                  if (*sp == png_ptr->trans_color.red &&
+                      *(sp + 1) == png_ptr->trans_color.green &&
+                      *(sp + 2) == png_ptr->trans_color.blue)
+                  {
+                     *sp = (png_byte)png_ptr->background.red;
+                     *(sp + 1) = (png_byte)png_ptr->background.green;
+                     *(sp + 2) = (png_byte)png_ptr->background.blue;
+                  }
+               }
+            }
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
+#ifdef PNG_READ_GAMMA_SUPPORTED
+            if (gamma_16 != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 6)
+               {
+                  png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
 
-                     else if (a == 0)
-                     {
-                        /* Background is already in screen gamma */
-                        *sp = (png_byte)png_ptr->background.gray;
-                     }
+                  png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
+                      + *(sp + 3));
 
-                     else
-                     {
-                        png_byte v, w;
+                  png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
+                      + *(sp + 5));
 
-                        v = gamma_to_1[*sp];
-                        png_composite(w, v, a, png_ptr->background_1.gray);
-                        if (!optimize)
-                           w = gamma_from_1[w];
-                        *sp = w;
-                     }
+                  if (r == png_ptr->trans_color.red &&
+                      g == png_ptr->trans_color.green &&
+                      b == png_ptr->trans_color.blue)
+                  {
+                     /* Background is already in screen gamma */
+                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
+                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
+                             & 0xff);
+                     *(sp + 3) = (png_byte)(png_ptr->background.green
+                             & 0xff);
+                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
+                             & 0xff);
+                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                  }
+
+                  else
+                  {
+                     png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                     *sp = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(v & 0xff);
+
+                     v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 3) = (png_byte)(v & 0xff);
+
+                     v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 5) = (png_byte)(v & 0xff);
                   }
                }
-               else
+            }
+
+            else
 #endif
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 6)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 2)
+                  png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+
+                  png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
+                      + *(sp + 3));
+
+                  png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
+                      + *(sp + 5));
+
+                  if (r == png_ptr->trans_color.red &&
+                      g == png_ptr->trans_color.green &&
+                      b == png_ptr->trans_color.blue)
                   {
-                     png_byte a = *(sp + 1);
+                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
+                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
+                             & 0xff);
+                     *(sp + 3) = (png_byte)(png_ptr->background.green
+                             & 0xff);
+                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
+                             & 0xff);
+                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                  }
+               }
+            }
+         }
+         break;
+      }
 
-                     if (a == 0)
-                        *sp = (png_byte)png_ptr->background.gray;
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+      {
+         if (row_info->bit_depth == 8)
+         {
+#ifdef PNG_READ_GAMMA_SUPPORTED
+            if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                gamma_table != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 2)
+               {
+                  png_uint_16 a = *(sp + 1);
+
+                  if (a == 0xff)
+                     *sp = gamma_table[*sp];
+
+                  else if (a == 0)
+                  {
+                     /* Background is already in screen gamma */
+                     *sp = (png_byte)png_ptr->background.gray;
+                  }
+
+                  else
+                  {
+                     png_byte v, w;
 
-                     else if (a < 0xff)
-                        png_composite(*sp, *sp, a, png_ptr->background_1.gray);
+                     v = gamma_to_1[*sp];
+                     png_composite(w, v, a, png_ptr->background_1.gray);
+                     if (optimize == 0)
+                        w = gamma_from_1[w];
+                     *sp = w;
                   }
                }
             }
-            else /* if (png_ptr->bit_depth == 16) */
+            else
+#endif
             {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 2)
+               {
+                  png_byte a = *(sp + 1);
+
+                  if (a == 0)
+                     *sp = (png_byte)png_ptr->background.gray;
+
+                  else if (a < 0xff)
+                     png_composite(*sp, *sp, a, png_ptr->background.gray);
+               }
+            }
+         }
+         else /* if (png_ptr->bit_depth == 16) */
+         {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
-                   gamma_16_to_1 != NULL)
+            if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                gamma_16_to_1 != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 4)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 4)
-                  {
-                     png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
-                         + *(sp + 3));
+                  png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
+                      + *(sp + 3));
 
-                     if (a == (png_uint_16)0xffff)
-                     {
-                        png_uint_16 v;
+                  if (a == (png_uint_16)0xffff)
+                  {
+                     png_uint_16 v;
 
-                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                        *sp = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(v & 0xff);
-                     }
+                     v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                     *sp = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(v & 0xff);
+                  }
 
-                     else if (a == 0)
-                     {
-                        /* Background is already in screen gamma */
-                        *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                     }
+                  else if (a == 0)
+                  {
+                     /* Background is already in screen gamma */
+                     *sp = (png_byte)((png_ptr->background.gray >> 8)
+                             & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
+                  }
 
-                     else
-                     {
-                        png_uint_16 g, v, w;
+                  else
+                  {
+                     png_uint_16 g, v, w;
 
-                        g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
-                        png_composite_16(v, g, a, png_ptr->background_1.gray);
-                        if (optimize)
-                           w = v;
-                        else
-                           w = gamma_16_from_1[(v&0xff) >> gamma_shift][v >> 8];
-                        *sp = (png_byte)((w >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(w & 0xff);
-                     }
+                     g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                     png_composite_16(v, g, a, png_ptr->background_1.gray);
+                     if (optimize != 0)
+                        w = v;
+                     else
+                        w = gamma_16_from_1[(v & 0xff) >>
+                            gamma_shift][v >> 8];
+                     *sp = (png_byte)((w >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(w & 0xff);
                   }
                }
-               else
+            }
+            else
 #endif
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 4)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 4)
-                  {
-                     png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
-                         + *(sp + 3));
+                  png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8)
+                      + *(sp + 3));
 
-                     if (a == 0)
-                     {
-                        *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
-                     }
+                  if (a == 0)
+                  {
+                     *sp = (png_byte)((png_ptr->background.gray >> 8)
+                             & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff);
+                  }
 
-                     else if (a < 0xffff)
-                     {
-                        png_uint_16 g, v;
+                  else if (a < 0xffff)
+                  {
+                     png_uint_16 g, v;
 
-                        g = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-                        png_composite_16(v, g, a, png_ptr->background_1.gray);
-                        *sp = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(v & 0xff);
-                     }
+                     g = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                     png_composite_16(v, g, a, png_ptr->background.gray);
+                     *sp = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(v & 0xff);
                   }
                }
             }
-            break;
          }
+         break;
+      }
 
-         case PNG_COLOR_TYPE_RGB_ALPHA:
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+      {
+         if (row_info->bit_depth == 8)
          {
-            if (row_info->bit_depth == 8)
-            {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
-                   gamma_table != NULL)
+            if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                gamma_table != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 4)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 4)
+                  png_byte a = *(sp + 3);
+
+                  if (a == 0xff)
                   {
-                     png_byte a = *(sp + 3);
+                     *sp = gamma_table[*sp];
+                     *(sp + 1) = gamma_table[*(sp + 1)];
+                     *(sp + 2) = gamma_table[*(sp + 2)];
+                  }
 
-                     if (a == 0xff)
-                     {
-                        *sp = gamma_table[*sp];
-                        *(sp + 1) = gamma_table[*(sp + 1)];
-                        *(sp + 2) = gamma_table[*(sp + 2)];
-                     }
+                  else if (a == 0)
+                  {
+                     /* Background is already in screen gamma */
+                     *sp = (png_byte)png_ptr->background.red;
+                     *(sp + 1) = (png_byte)png_ptr->background.green;
+                     *(sp + 2) = (png_byte)png_ptr->background.blue;
+                  }
 
-                     else if (a == 0)
-                     {
-                        /* Background is already in screen gamma */
-                        *sp = (png_byte)png_ptr->background.red;
-                        *(sp + 1) = (png_byte)png_ptr->background.green;
-                        *(sp + 2) = (png_byte)png_ptr->background.blue;
-                     }
+                  else
+                  {
+                     png_byte v, w;
 
-                     else
-                     {
-                        png_byte v, w;
-
-                        v = gamma_to_1[*sp];
-                        png_composite(w, v, a, png_ptr->background_1.red);
-                        if (!optimize) w = gamma_from_1[w];
-                        *sp = w;
-
-                        v = gamma_to_1[*(sp + 1)];
-                        png_composite(w, v, a, png_ptr->background_1.green);
-                        if (!optimize) w = gamma_from_1[w];
-                        *(sp + 1) = w;
-
-                        v = gamma_to_1[*(sp + 2)];
-                        png_composite(w, v, a, png_ptr->background_1.blue);
-                        if (!optimize) w = gamma_from_1[w];
-                        *(sp + 2) = w;
-                     }
+                     v = gamma_to_1[*sp];
+                     png_composite(w, v, a, png_ptr->background_1.red);
+                     if (optimize == 0) w = gamma_from_1[w];
+                     *sp = w;
+
+                     v = gamma_to_1[*(sp + 1)];
+                     png_composite(w, v, a, png_ptr->background_1.green);
+                     if (optimize == 0) w = gamma_from_1[w];
+                     *(sp + 1) = w;
+
+                     v = gamma_to_1[*(sp + 2)];
+                     png_composite(w, v, a, png_ptr->background_1.blue);
+                     if (optimize == 0) w = gamma_from_1[w];
+                     *(sp + 2) = w;
                   }
                }
-               else
+            }
+            else
 #endif
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 4)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 4)
-                  {
-                     png_byte a = *(sp + 3);
+                  png_byte a = *(sp + 3);
 
-                     if (a == 0)
-                     {
-                        *sp = (png_byte)png_ptr->background.red;
-                        *(sp + 1) = (png_byte)png_ptr->background.green;
-                        *(sp + 2) = (png_byte)png_ptr->background.blue;
-                     }
+                  if (a == 0)
+                  {
+                     *sp = (png_byte)png_ptr->background.red;
+                     *(sp + 1) = (png_byte)png_ptr->background.green;
+                     *(sp + 2) = (png_byte)png_ptr->background.blue;
+                  }
 
-                     else if (a < 0xff)
-                     {
-                        png_composite(*sp, *sp, a, png_ptr->background.red);
+                  else if (a < 0xff)
+                  {
+                     png_composite(*sp, *sp, a, png_ptr->background.red);
 
-                        png_composite(*(sp + 1), *(sp + 1), a,
-                            png_ptr->background.green);
+                     png_composite(*(sp + 1), *(sp + 1), a,
+                         png_ptr->background.green);
 
-                        png_composite(*(sp + 2), *(sp + 2), a,
-                            png_ptr->background.blue);
-                     }
+                     png_composite(*(sp + 2), *(sp + 2), a,
+                         png_ptr->background.blue);
                   }
                }
             }
-            else /* if (row_info->bit_depth == 16) */
-            {
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
 #ifdef PNG_READ_GAMMA_SUPPORTED
-               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
-                   gamma_16_to_1 != NULL)
+            if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                gamma_16_to_1 != NULL)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 8)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 8)
-                  {
-                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
-                         << 8) + (png_uint_16)(*(sp + 7)));
+                  png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                      << 8) + (png_uint_16)(*(sp + 7)));
 
-                     if (a == (png_uint_16)0xffff)
-                     {
-                        png_uint_16 v;
+                  if (a == (png_uint_16)0xffff)
+                  {
+                     png_uint_16 v;
 
-                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
-                        *sp = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(v & 0xff);
+                     v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                     *sp = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(v & 0xff);
 
-                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                        *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(v & 0xff);
+                     v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 3) = (png_byte)(v & 0xff);
 
-                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                        *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(v & 0xff);
-                     }
+                     v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 5) = (png_byte)(v & 0xff);
+                  }
 
-                     else if (a == 0)
-                     {
-                        /* Background is already in screen gamma */
-                        *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                        *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff);
-                        *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                     }
+                  else if (a == 0)
+                  {
+                     /* Background is already in screen gamma */
+                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
+                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
+                             & 0xff);
+                     *(sp + 3) = (png_byte)(png_ptr->background.green
+                             & 0xff);
+                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
+                             & 0xff);
+                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                  }
 
-                     else
-                     {
-                        png_uint_16 v, w;
-
-                        v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
-                        png_composite_16(w, v, a, png_ptr->background_1.red);
-                        if (!optimize)
-                           w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
-                        *sp = (png_byte)((w >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(w & 0xff);
-
-                        v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)];
-                        png_composite_16(w, v, a, png_ptr->background_1.green);
-                        if (!optimize)
-                           w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
-
-                        *(sp + 2) = (png_byte)((w >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(w & 0xff);
-
-                        v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)];
-                        png_composite_16(w, v, a, png_ptr->background_1.blue);
-                        if (!optimize)
-                           w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
-
-                        *(sp + 4) = (png_byte)((w >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(w & 0xff);
-                     }
+                  else
+                  {
+                     png_uint_16 v, w;
+
+                     v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                     png_composite_16(w, v, a, png_ptr->background_1.red);
+                     if (optimize == 0)
+                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
+                             8];
+                     *sp = (png_byte)((w >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(w & 0xff);
+
+                     v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                     png_composite_16(w, v, a, png_ptr->background_1.green);
+                     if (optimize == 0)
+                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
+                             8];
+
+                     *(sp + 2) = (png_byte)((w >> 8) & 0xff);
+                     *(sp + 3) = (png_byte)(w & 0xff);
+
+                     v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                     png_composite_16(w, v, a, png_ptr->background_1.blue);
+                     if (optimize == 0)
+                        w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >>
+                             8];
+
+                     *(sp + 4) = (png_byte)((w >> 8) & 0xff);
+                     *(sp + 5) = (png_byte)(w & 0xff);
                   }
                }
+            }
 
-               else
+            else
 #endif
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++, sp += 8)
                {
-                  sp = row;
-                  for (i = 0; i < row_width; i++, sp += 8)
-                  {
-                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
-                         << 8) + (png_uint_16)(*(sp + 7)));
+                  png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                      << 8) + (png_uint_16)(*(sp + 7)));
 
-                     if (a == 0)
-                     {
-                        *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
-                        *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff);
-                        *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
-                     }
+                  if (a == 0)
+                  {
+                     *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff);
+                     *(sp + 2) = (png_byte)((png_ptr->background.green >> 8)
+                             & 0xff);
+                     *(sp + 3) = (png_byte)(png_ptr->background.green
+                             & 0xff);
+                     *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8)
+                             & 0xff);
+                     *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff);
+                  }
 
-                     else if (a < 0xffff)
-                     {
-                        png_uint_16 v;
+                  else if (a < 0xffff)
+                  {
+                     png_uint_16 v;
 
-                        png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
-                        png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
-                            + *(sp + 3));
-                        png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
-                            + *(sp + 5));
+                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                     png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
+                         + *(sp + 3));
+                     png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
+                         + *(sp + 5));
 
-                        png_composite_16(v, r, a, png_ptr->background.red);
-                        *sp = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 1) = (png_byte)(v & 0xff);
+                     png_composite_16(v, r, a, png_ptr->background.red);
+                     *sp = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 1) = (png_byte)(v & 0xff);
 
-                        png_composite_16(v, g, a, png_ptr->background.green);
-                        *(sp + 2) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 3) = (png_byte)(v & 0xff);
+                     png_composite_16(v, g, a, png_ptr->background.green);
+                     *(sp + 2) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 3) = (png_byte)(v & 0xff);
 
-                        png_composite_16(v, b, a, png_ptr->background.blue);
-                        *(sp + 4) = (png_byte)((v >> 8) & 0xff);
-                        *(sp + 5) = (png_byte)(v & 0xff);
-                     }
+                     png_composite_16(v, b, a, png_ptr->background.blue);
+                     *(sp + 4) = (png_byte)((v >> 8) & 0xff);
+                     *(sp + 5) = (png_byte)(v & 0xff);
                   }
                }
             }
-            break;
          }
-
-         default:
-            break;
+         break;
       }
+
+      default:
+         break;
    }
 }
-#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_READ_ALPHA_MODE_SUPPORTED */
+#endif /* READ_BACKGROUND || READ_ALPHA_MODE */
 
 #ifdef PNG_READ_GAMMA_SUPPORTED
 /* Gamma correct the image, avoiding the alpha channel.  Make sure
@@ -4194,8 +3940,8 @@ png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr)
  * is 16, use gamma_16_table and gamma_shift.  Build these with
  * build_gamma_table().
  */
-void /* PRIVATE */
-png_do_gamma(png_row_infop row_info, png_bytep row, png_structp png_ptr)
+static void
+png_do_gamma(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
 {
    png_const_bytep gamma_table = png_ptr->gamma_table;
    png_const_uint_16pp gamma_16_table = png_ptr->gamma_16_table;
@@ -4395,23 +4141,22 @@ png_do_gamma(png_row_infop row_info, png_bytep row, png_structp png_ptr)
  * linear.)  Called only with color types that have an alpha channel.  Needs the
  * from_1 tables.
  */
-void /* PRIVATE */
-png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr)
+static void
+png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
 {
    png_uint_32 row_width = row_info->width;
 
    png_debug(1, "in png_do_encode_alpha");
 
-   if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+   if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
    {
       if (row_info->bit_depth == 8)
       {
-         PNG_CONST png_bytep table = png_ptr->gamma_from_1;
+         png_bytep table = png_ptr->gamma_from_1;
 
          if (table != NULL)
          {
-            PNG_CONST int step =
-               (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 4 : 2;
+            int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 4 : 2;
 
             /* The alpha channel is the last component: */
             row += step - 1;
@@ -4425,13 +4170,12 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr)
 
       else if (row_info->bit_depth == 16)
       {
-         PNG_CONST png_uint_16pp table = png_ptr->gamma_16_from_1;
-         PNG_CONST int gamma_shift = png_ptr->gamma_shift;
+         png_uint_16pp table = png_ptr->gamma_16_from_1;
+         int gamma_shift = png_ptr->gamma_shift;
 
          if (table != NULL)
          {
-            PNG_CONST int step =
-               (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 8 : 4;
+            int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 8 : 4;
 
             /* The alpha channel is the last component: */
             row += step - 2;
@@ -4461,9 +4205,10 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr)
 /* Expands a palette row to an RGB or RGBA row depending
  * upon whether you supply trans and num_trans.
  */
-void /* PRIVATE */
-png_do_expand_palette(png_row_infop row_info, png_bytep row,
-   png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
+static void
+png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
+    png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
+    int num_trans)
 {
    int shift, value;
    png_bytep sp, dp;
@@ -4480,8 +4225,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
          {
             case 1:
             {
-               sp = row + (png_size_t)((row_width - 1) >> 3);
-               dp = row + (png_size_t)row_width - 1;
+               sp = row + (size_t)((row_width - 1) >> 3);
+               dp = row + (size_t)row_width - 1;
                shift = 7 - (int)((row_width + 7) & 0x07);
                for (i = 0; i < row_width; i++)
                {
@@ -4507,8 +4252,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
 
             case 2:
             {
-               sp = row + (png_size_t)((row_width - 1) >> 2);
-               dp = row + (png_size_t)row_width - 1;
+               sp = row + (size_t)((row_width - 1) >> 2);
+               dp = row + (size_t)row_width - 1;
                shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
                for (i = 0; i < row_width; i++)
                {
@@ -4530,8 +4275,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
 
             case 4:
             {
-               sp = row + (png_size_t)((row_width - 1) >> 1);
-               dp = row + (png_size_t)row_width - 1;
+               sp = row + (size_t)((row_width - 1) >> 1);
+               dp = row + (size_t)row_width - 1;
                shift = (int)((row_width & 0x01) << 2);
                for (i = 0; i < row_width; i++)
                {
@@ -4564,17 +4309,30 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
          {
             if (num_trans > 0)
             {
-               sp = row + (png_size_t)row_width - 1;
-               dp = row + (png_size_t)(row_width << 2) - 1;
+               sp = row + (size_t)row_width - 1;
+               dp = row + ((size_t)row_width << 2) - 1;
 
-               for (i = 0; i < row_width; i++)
+               i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+               if (png_ptr->riffled_palette != NULL)
+               {
+                  /* The RGBA optimization works with png_ptr->bit_depth == 8
+                   * but sometimes row_info->bit_depth has been changed to 8.
+                   * In these cases, the palette hasn't been riffled.
+                   */
+                  i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row,
+                      &sp, &dp);
+               }
+#else
+               PNG_UNUSED(png_ptr)
+#endif
+
+               for (; i < row_width; i++)
                {
                   if ((int)(*sp) >= num_trans)
                      *dp-- = 0xff;
-
                   else
                      *dp-- = trans_alpha[*sp];
-
                   *dp-- = palette[*sp].blue;
                   *dp-- = palette[*sp].green;
                   *dp-- = palette[*sp].red;
@@ -4589,10 +4347,17 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
 
             else
             {
-               sp = row + (png_size_t)row_width - 1;
-               dp = row + (png_size_t)(row_width * 3) - 1;
+               sp = row + (size_t)row_width - 1;
+               dp = row + (size_t)(row_width * 3) - 1;
+               i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+               i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row,
+                   &sp, &dp);
+#else
+               PNG_UNUSED(png_ptr)
+#endif
 
-               for (i = 0; i < row_width; i++)
+               for (; i < row_width; i++)
                {
                   *dp-- = palette[*sp].blue;
                   *dp-- = palette[*sp].green;
@@ -4614,7 +4379,7 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
 /* If the bit depth < 8, it is expanded to 8.  Also, if the already
  * expanded transparency value is supplied, an alpha channel is built.
  */
-void /* PRIVATE */
+static void
 png_do_expand(png_row_infop row_info, png_bytep row,
     png_const_color_16p trans_color)
 {
@@ -4625,193 +4390,130 @@ png_do_expand(png_row_infop row_info, png_bytep row,
 
    png_debug(1, "in png_do_expand");
 
+   if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
    {
-      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         png_uint_16 gray = (png_uint_16)(trans_color ? trans_color->gray : 0);
+      unsigned int gray = trans_color != NULL ? trans_color->gray : 0;
 
-         if (row_info->bit_depth < 8)
+      if (row_info->bit_depth < 8)
+      {
+         switch (row_info->bit_depth)
          {
-            switch (row_info->bit_depth)
+            case 1:
             {
-               case 1:
+               gray = (gray & 0x01) * 0xff;
+               sp = row + (size_t)((row_width - 1) >> 3);
+               dp = row + (size_t)row_width - 1;
+               shift = 7 - (int)((row_width + 7) & 0x07);
+               for (i = 0; i < row_width; i++)
                {
-                  gray = (png_uint_16)((gray & 0x01) * 0xff);
-                  sp = row + (png_size_t)((row_width - 1) >> 3);
-                  dp = row + (png_size_t)row_width - 1;
-                  shift = 7 - (int)((row_width + 7) & 0x07);
-                  for (i = 0; i < row_width; i++)
-                  {
-                     if ((*sp >> shift) & 0x01)
-                        *dp = 0xff;
-
-                     else
-                        *dp = 0;
-
-                     if (shift == 7)
-                     {
-                        shift = 0;
-                        sp--;
-                     }
-
-                     else
-                        shift++;
+                  if ((*sp >> shift) & 0x01)
+                     *dp = 0xff;
 
-                     dp--;
-                  }
-                  break;
-               }
+                  else
+                     *dp = 0;
 
-               case 2:
-               {
-                  gray = (png_uint_16)((gray & 0x03) * 0x55);
-                  sp = row + (png_size_t)((row_width - 1) >> 2);
-                  dp = row + (png_size_t)row_width - 1;
-                  shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
-                  for (i = 0; i < row_width; i++)
+                  if (shift == 7)
                   {
-                     value = (*sp >> shift) & 0x03;
-                     *dp = (png_byte)(value | (value << 2) | (value << 4) |
-                        (value << 6));
-                     if (shift == 6)
-                     {
-                        shift = 0;
-                        sp--;
-                     }
-
-                     else
-                        shift += 2;
-
-                     dp--;
+                     shift = 0;
+                     sp--;
                   }
-                  break;
-               }
-
-               case 4:
-               {
-                  gray = (png_uint_16)((gray & 0x0f) * 0x11);
-                  sp = row + (png_size_t)((row_width - 1) >> 1);
-                  dp = row + (png_size_t)row_width - 1;
-                  shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
-                  for (i = 0; i < row_width; i++)
-                  {
-                     value = (*sp >> shift) & 0x0f;
-                     *dp = (png_byte)(value | (value << 4));
-                     if (shift == 4)
-                     {
-                        shift = 0;
-                        sp--;
-                     }
 
-                     else
-                        shift = 4;
+                  else
+                     shift++;
 
-                     dp--;
-                  }
-                  break;
+                  dp--;
                }
-
-               default:
-                  break;
+               break;
             }
 
-            row_info->bit_depth = 8;
-            row_info->pixel_depth = 8;
-            row_info->rowbytes = row_width;
-         }
-
-         if (trans_color != NULL)
-         {
-            if (row_info->bit_depth == 8)
+            case 2:
             {
-               gray = gray & 0xff;
-               sp = row + (png_size_t)row_width - 1;
-               dp = row + (png_size_t)(row_width << 1) - 1;
-
+               gray = (gray & 0x03) * 0x55;
+               sp = row + (size_t)((row_width - 1) >> 2);
+               dp = row + (size_t)row_width - 1;
+               shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
                for (i = 0; i < row_width; i++)
                {
-                  if (*sp == gray)
-                     *dp-- = 0;
+                  value = (*sp >> shift) & 0x03;
+                  *dp = (png_byte)(value | (value << 2) | (value << 4) |
+                     (value << 6));
+                  if (shift == 6)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
 
                   else
-                     *dp-- = 0xff;
+                     shift += 2;
 
-                  *dp-- = *sp--;
+                  dp--;
                }
+               break;
             }
 
-            else if (row_info->bit_depth == 16)
+            case 4:
             {
-               png_byte gray_high = (png_byte)((gray >> 8) & 0xff);
-               png_byte gray_low = (png_byte)(gray & 0xff);
-               sp = row + row_info->rowbytes - 1;
-               dp = row + (row_info->rowbytes << 1) - 1;
+               gray = (gray & 0x0f) * 0x11;
+               sp = row + (size_t)((row_width - 1) >> 1);
+               dp = row + (size_t)row_width - 1;
+               shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
                for (i = 0; i < row_width; i++)
                {
-                  if (*(sp - 1) == gray_high && *(sp) == gray_low)
+                  value = (*sp >> shift) & 0x0f;
+                  *dp = (png_byte)(value | (value << 4));
+                  if (shift == 4)
                   {
-                     *dp-- = 0;
-                     *dp-- = 0;
+                     shift = 0;
+                     sp--;
                   }
 
                   else
-                  {
-                     *dp-- = 0xff;
-                     *dp-- = 0xff;
-                  }
+                     shift = 4;
 
-                  *dp-- = *sp--;
-                  *dp-- = *sp--;
+                  dp--;
                }
+               break;
             }
 
-            row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
-            row_info->channels = 2;
-            row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1);
-            row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
-               row_width);
+            default:
+               break;
          }
+
+         row_info->bit_depth = 8;
+         row_info->pixel_depth = 8;
+         row_info->rowbytes = row_width;
       }
-      else if (row_info->color_type == PNG_COLOR_TYPE_RGB && trans_color)
+
+      if (trans_color != NULL)
       {
          if (row_info->bit_depth == 8)
          {
-            png_byte red = (png_byte)(trans_color->red & 0xff);
-            png_byte green = (png_byte)(trans_color->green & 0xff);
-            png_byte blue = (png_byte)(trans_color->blue & 0xff);
-            sp = row + (png_size_t)row_info->rowbytes - 1;
-            dp = row + (png_size_t)(row_width << 2) - 1;
+            gray = gray & 0xff;
+            sp = row + (size_t)row_width - 1;
+            dp = row + ((size_t)row_width << 1) - 1;
+
             for (i = 0; i < row_width; i++)
             {
-               if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
+               if ((*sp & 0xffU) == gray)
                   *dp-- = 0;
 
                else
                   *dp-- = 0xff;
 
                *dp-- = *sp--;
-               *dp-- = *sp--;
-               *dp-- = *sp--;
             }
          }
+
          else if (row_info->bit_depth == 16)
          {
-            png_byte red_high = (png_byte)((trans_color->red >> 8) & 0xff);
-            png_byte green_high = (png_byte)((trans_color->green >> 8) & 0xff);
-            png_byte blue_high = (png_byte)((trans_color->blue >> 8) & 0xff);
-            png_byte red_low = (png_byte)(trans_color->red & 0xff);
-            png_byte green_low = (png_byte)(trans_color->green & 0xff);
-            png_byte blue_low = (png_byte)(trans_color->blue & 0xff);
+            unsigned int gray_high = (gray >> 8) & 0xff;
+            unsigned int gray_low = gray & 0xff;
             sp = row + row_info->rowbytes - 1;
-            dp = row + (png_size_t)(row_width << 3) - 1;
+            dp = row + (row_info->rowbytes << 1) - 1;
             for (i = 0; i < row_width; i++)
             {
-               if (*(sp - 5) == red_high &&
-                   *(sp - 4) == red_low &&
-                   *(sp - 3) == green_high &&
-                   *(sp - 2) == green_low &&
-                   *(sp - 1) == blue_high &&
-                   *(sp    ) == blue_low)
+               if ((*(sp - 1) & 0xffU) == gray_high &&
+                   (*(sp) & 0xffU) == gray_low)
                {
                   *dp-- = 0;
                   *dp-- = 0;
@@ -4825,17 +4527,80 @@ png_do_expand(png_row_infop row_info, png_bytep row,
 
                *dp-- = *sp--;
                *dp-- = *sp--;
-               *dp-- = *sp--;
-               *dp-- = *sp--;
-               *dp-- = *sp--;
-               *dp-- = *sp--;
             }
          }
-         row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
-         row_info->channels = 4;
-         row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2);
-         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
+
+         row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
+         row_info->channels = 2;
+         row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1);
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+             row_width);
+      }
+   }
+   else if (row_info->color_type == PNG_COLOR_TYPE_RGB &&
+       trans_color != NULL)
+   {
+      if (row_info->bit_depth == 8)
+      {
+         png_byte red = (png_byte)(trans_color->red & 0xff);
+         png_byte green = (png_byte)(trans_color->green & 0xff);
+         png_byte blue = (png_byte)(trans_color->blue & 0xff);
+         sp = row + (size_t)row_info->rowbytes - 1;
+         dp = row + ((size_t)row_width << 2) - 1;
+         for (i = 0; i < row_width; i++)
+         {
+            if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
+               *dp-- = 0;
+
+            else
+               *dp-- = 0xff;
+
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_byte red_high = (png_byte)((trans_color->red >> 8) & 0xff);
+         png_byte green_high = (png_byte)((trans_color->green >> 8) & 0xff);
+         png_byte blue_high = (png_byte)((trans_color->blue >> 8) & 0xff);
+         png_byte red_low = (png_byte)(trans_color->red & 0xff);
+         png_byte green_low = (png_byte)(trans_color->green & 0xff);
+         png_byte blue_low = (png_byte)(trans_color->blue & 0xff);
+         sp = row + row_info->rowbytes - 1;
+         dp = row + ((size_t)row_width << 3) - 1;
+         for (i = 0; i < row_width; i++)
+         {
+            if (*(sp - 5) == red_high &&
+                *(sp - 4) == red_low &&
+                *(sp - 3) == green_high &&
+                *(sp - 2) == green_low &&
+                *(sp - 1) == blue_high &&
+                *(sp    ) == blue_low)
+            {
+               *dp-- = 0;
+               *dp-- = 0;
+            }
+
+            else
+            {
+               *dp-- = 0xff;
+               *dp-- = 0xff;
+            }
+
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+            *dp-- = *sp--;
+         }
       }
+      row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+      row_info->channels = 4;
+      row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width);
    }
 }
 #endif
@@ -4844,7 +4609,7 @@ png_do_expand(png_row_infop row_info, png_bytep row,
 /* If the bit depth is 8 and the color type is not a palette type expand the
  * whole row to 16 bits.  Has no effect otherwise.
  */
-void /* PRIVATE */
+static void
 png_do_expand_16(png_row_infop row_info, png_bytep row)
 {
    if (row_info->bit_depth == 8 &&
@@ -4862,7 +4627,9 @@ png_do_expand_16(png_row_infop row_info, png_bytep row)
       png_byte *sp = row + row_info->rowbytes; /* source, last byte + 1 */
       png_byte *dp = sp + row_info->rowbytes;  /* destination, end + 1 */
       while (dp > sp)
-         dp[-2] = dp[-1] = *--sp, dp -= 2;
+      {
+         dp[-2] = dp[-1] = *--sp; dp -= 2;
+      }
 
       row_info->rowbytes *= 2;
       row_info->bit_depth = 16;
@@ -4872,7 +4639,7 @@ png_do_expand_16(png_row_infop row_info, png_bytep row)
 #endif
 
 #ifdef PNG_READ_QUANTIZE_SUPPORTED
-void /* PRIVATE */
+static void
 png_do_quantize(png_row_infop row_info, png_bytep row,
     png_const_bytep palette_lookup, png_const_bytep quantize_lookup)
 {
@@ -4963,70 +4730,315 @@ png_do_quantize(png_row_infop row_info, png_bytep row,
       }
    }
 }
-#endif /* PNG_READ_QUANTIZE_SUPPORTED */
-#endif /* PNG_READ_TRANSFORMS_SUPPORTED */
+#endif /* READ_QUANTIZE */
 
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-/* Undoes intrapixel differencing  */
+/* Transform the row.  The order of transformations is significant,
+ * and is very touchy.  If you add a transformation, take care to
+ * decide how it fits in with the other transformations here.
+ */
 void /* PRIVATE */
-png_do_read_intrapixel(png_row_infop row_info, png_bytep row)
+png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
 {
-   png_debug(1, "in png_do_read_intrapixel");
+   png_debug(1, "in png_do_read_transformations");
 
-   if (
-       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   if (png_ptr->row_buf == NULL)
    {
-      int bytes_per_pixel;
-      png_uint_32 row_width = row_info->width;
-
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep rp;
-         png_uint_32 i;
-
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 3;
-
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 4;
+      /* Prior to 1.5.4 this output row/pass where the NULL pointer is, but this
+       * error is incredibly rare and incredibly easy to debug without this
+       * information.
+       */
+      png_error(png_ptr, "NULL row buffer");
+   }
 
-         else
-            return;
+   /* The following is debugging; prior to 1.5.4 the code was never compiled in;
+    * in 1.5.4 PNG_FLAG_DETECT_UNINITIALIZED was added and the macro
+    * PNG_WARN_UNINITIALIZED_ROW removed.  In 1.6 the new flag is set only for
+    * all transformations, however in practice the ROW_INIT always gets done on
+    * demand, if necessary.
+    */
+   if ((png_ptr->flags & PNG_FLAG_DETECT_UNINITIALIZED) != 0 &&
+       (png_ptr->flags & PNG_FLAG_ROW_INIT) == 0)
+   {
+      /* Application has failed to call either png_read_start_image() or
+       * png_read_update_info() after setting transforms that expand pixels.
+       * This check added to libpng-1.2.19 (but not enabled until 1.5.4).
+       */
+      png_error(png_ptr, "Uninitialized row");
+   }
 
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+#ifdef PNG_READ_EXPAND_SUPPORTED
+   if ((png_ptr->transformations & PNG_EXPAND) != 0)
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+         if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8))
          {
-            *(rp) = (png_byte)((256 + *rp + *(rp + 1)) & 0xff);
-            *(rp+2) = (png_byte)((256 + *(rp + 2) + *(rp + 1)) & 0xff);
+            if (png_ptr->riffled_palette == NULL)
+            {
+               /* Initialize the accelerated palette expansion. */
+               png_ptr->riffled_palette =
+                   (png_bytep)png_malloc(png_ptr, 256 * 4);
+               png_riffle_palette_neon(png_ptr);
+            }
          }
+#endif
+         png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
+             png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
       }
-      else if (row_info->bit_depth == 16)
+
+      else
       {
-         png_bytep rp;
-         png_uint_32 i;
+         if (png_ptr->num_trans != 0 &&
+             (png_ptr->transformations & PNG_EXPAND_tRNS) != 0)
+            png_do_expand(row_info, png_ptr->row_buf + 1,
+                &(png_ptr->trans_color));
+
+         else
+            png_do_expand(row_info, png_ptr->row_buf + 1, NULL);
+      }
+   }
+#endif
 
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 6;
+#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
+       (png_ptr->transformations & PNG_COMPOSE) == 0 &&
+       (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+       row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
+      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
+          0 /* at_start == false, because SWAP_ALPHA happens later */);
+#endif
 
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 8;
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+   if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0)
+   {
+      int rgb_error =
+          png_do_rgb_to_gray(png_ptr, row_info,
+              png_ptr->row_buf + 1);
 
-         else
-            return;
+      if (rgb_error != 0)
+      {
+         png_ptr->rgb_to_gray_status=1;
+         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
+             PNG_RGB_TO_GRAY_WARN)
+            png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
 
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            png_uint_32 s0   = (*(rp    ) << 8) | *(rp + 1);
-            png_uint_32 s1   = (*(rp + 2) << 8) | *(rp + 3);
-            png_uint_32 s2   = (*(rp + 4) << 8) | *(rp + 5);
-            png_uint_32 red  = (s0 + s1 + 65536) & 0xffff;
-            png_uint_32 blue = (s2 + s1 + 65536) & 0xffff;
-            *(rp    ) = (png_byte)((red >> 8) & 0xff);
-            *(rp + 1) = (png_byte)(red & 0xff);
-            *(rp + 4) = (png_byte)((blue >> 8) & 0xff);
-            *(rp + 5) = (png_byte)(blue & 0xff);
-         }
+         if ((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
+             PNG_RGB_TO_GRAY_ERR)
+            png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
       }
    }
+#endif
+
+/* From Andreas Dilger e-mail to png-implement, 26 March 1998:
+ *
+ *   In most cases, the "simple transparency" should be done prior to doing
+ *   gray-to-RGB, or you will have to test 3x as many bytes to check if a
+ *   pixel is transparent.  You would also need to make sure that the
+ *   transparency information is upgraded to RGB.
+ *
+ *   To summarize, the current flow is:
+ *   - Gray + simple transparency -> compare 1 or 2 gray bytes and composite
+ *                                   with background "in place" if transparent,
+ *                                   convert to RGB if necessary
+ *   - Gray + alpha -> composite with gray background and remove alpha bytes,
+ *                                   convert to RGB if necessary
+ *
+ *   To support RGB backgrounds for gray images we need:
+ *   - Gray + simple transparency -> convert to RGB + simple transparency,
+ *                                   compare 3 or 6 bytes and composite with
+ *                                   background "in place" if transparent
+ *                                   (3x compare/pixel compared to doing
+ *                                   composite with gray bkgrnd)
+ *   - Gray + alpha -> convert to RGB + alpha, composite with background and
+ *                                   remove alpha bytes (3x float
+ *                                   operations/pixel compared with composite
+ *                                   on gray background)
+ *
+ *  Greg's change will do this.  The reason it wasn't done before is for
+ *  performance, as this increases the per-pixel operations.  If we would check
+ *  in advance if the background was gray or RGB, and position the gray-to-RGB
+ *  transform appropriately, then it would save a lot of work/time.
+ */
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+   /* If gray -> RGB, do so now only if background is non-gray; else do later
+    * for performance reasons
+    */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 &&
+       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) == 0)
+      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
+   if ((png_ptr->transformations & PNG_COMPOSE) != 0)
+      png_do_compose(row_info, png_ptr->row_buf + 1, png_ptr);
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+   if ((png_ptr->transformations & PNG_GAMMA) != 0 &&
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+      /* Because RGB_TO_GRAY does the gamma transform. */
+      (png_ptr->transformations & PNG_RGB_TO_GRAY) == 0 &&
+#endif
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
+      /* Because PNG_COMPOSE does the gamma transform if there is something to
+       * do (if there is an alpha channel or transparency.)
+       */
+       !((png_ptr->transformations & PNG_COMPOSE) != 0 &&
+       ((png_ptr->num_trans != 0) ||
+       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)) &&
+#endif
+      /* Because png_init_read_transformations transforms the palette, unless
+       * RGB_TO_GRAY will do the transform.
+       */
+       (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE))
+      png_do_gamma(row_info, png_ptr->row_buf + 1, png_ptr);
+#endif
+
+#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 &&
+       (png_ptr->transformations & PNG_COMPOSE) != 0 &&
+       (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA ||
+       row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA))
+      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
+          0 /* at_start == false, because SWAP_ALPHA happens later */);
+#endif
+
+#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
+   if ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 &&
+       (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
+      png_do_encode_alpha(row_info, png_ptr->row_buf + 1, png_ptr);
+#endif
+
+#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
+   if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0)
+      png_do_scale_16_to_8(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
+   /* There is no harm in doing both of these because only one has any effect,
+    * by putting the 'scale' option first if the app asks for scale (either by
+    * calling the API or in a TRANSFORM flag) this is what happens.
+    */
+   if ((png_ptr->transformations & PNG_16_TO_8) != 0)
+      png_do_chop(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_QUANTIZE_SUPPORTED
+   if ((png_ptr->transformations & PNG_QUANTIZE) != 0)
+   {
+      png_do_quantize(row_info, png_ptr->row_buf + 1,
+          png_ptr->palette_lookup, png_ptr->quantize_index);
+
+      if (row_info->rowbytes == 0)
+         png_error(png_ptr, "png_do_quantize returned rowbytes=0");
+   }
+#endif /* READ_QUANTIZE */
+
+#ifdef PNG_READ_EXPAND_16_SUPPORTED
+   /* Do the expansion now, after all the arithmetic has been done.  Notice
+    * that previous transformations can handle the PNG_EXPAND_16 flag if this
+    * is efficient (particularly true in the case of gamma correction, where
+    * better accuracy results faster!)
+    */
+   if ((png_ptr->transformations & PNG_EXPAND_16) != 0)
+      png_do_expand_16(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+   /* NOTE: moved here in 1.5.4 (from much later in this list.) */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 &&
+       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) != 0)
+      png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_INVERT_SUPPORTED
+   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
+      png_do_invert(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
+      png_do_read_invert_alpha(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_SHIFT_SUPPORTED
+   if ((png_ptr->transformations & PNG_SHIFT) != 0)
+      png_do_unshift(row_info, png_ptr->row_buf + 1,
+          &(png_ptr->shift));
+#endif
+
+#ifdef PNG_READ_PACK_SUPPORTED
+   if ((png_ptr->transformations & PNG_PACK) != 0)
+      png_do_unpack(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   /* Added at libpng-1.5.10 */
+   if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
+       png_ptr->num_palette_max >= 0)
+      png_do_check_palette_indexes(png_ptr, row_info);
+#endif
+
+#ifdef PNG_READ_BGR_SUPPORTED
+   if ((png_ptr->transformations & PNG_BGR) != 0)
+      png_do_bgr(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_PACKSWAP_SUPPORTED
+   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+      png_do_packswap(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_FILLER_SUPPORTED
+   if ((png_ptr->transformations & PNG_FILLER) != 0)
+      png_do_read_filler(row_info, png_ptr->row_buf + 1,
+          (png_uint_32)png_ptr->filler, png_ptr->flags);
+#endif
+
+#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0)
+      png_do_read_swap_alpha(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_READ_16BIT_SUPPORTED
+#ifdef PNG_READ_SWAP_SUPPORTED
+   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
+      png_do_swap(row_info, png_ptr->row_buf + 1);
+#endif
+#endif
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
+   {
+      if (png_ptr->read_user_transform_fn != NULL)
+         (*(png_ptr->read_user_transform_fn)) /* User read transform function */
+             (png_ptr,     /* png_ptr */
+             row_info,     /* row_info: */
+                /*  png_uint_32 width;       width of row */
+                /*  size_t rowbytes;         number of bytes in row */
+                /*  png_byte color_type;     color type of pixels */
+                /*  png_byte bit_depth;      bit depth of samples */
+                /*  png_byte channels;       number of channels (1-4) */
+                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
+             png_ptr->row_buf + 1);    /* start of pixel data for row */
+#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
+      if (png_ptr->user_transform_depth != 0)
+         row_info->bit_depth = png_ptr->user_transform_depth;
+
+      if (png_ptr->user_transform_channels != 0)
+         row_info->channels = png_ptr->user_transform_channels;
+#endif
+      row_info->pixel_depth = (png_byte)(row_info->bit_depth *
+          row_info->channels);
+
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_info->width);
+   }
+#endif
 }
-#endif /* PNG_MNG_FEATURES_SUPPORTED */
-#endif /* PNG_READ_SUPPORTED */
+
+#endif /* READ_TRANSFORMS */
+#endif /* READ */
diff --git a/reg-io/png/lpng/pngrutil.c b/reg-io/png/lpng/pngrutil.c
new file mode 100644
index 00000000..236e982f
--- /dev/null
+++ b/reg-io/png/lpng/pngrutil.c
@@ -0,0 +1,4680 @@
+
+/* pngrutil.c - utilities to read a PNG file
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * This file contains routines that are only called from within
+ * libpng itself during the course of reading an image.
+ */
+
+#include "pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+png_uint_32 PNGAPI
+png_get_uint_31(png_const_structrp png_ptr, png_const_bytep buf)
+{
+   png_uint_32 uval = png_get_uint_32(buf);
+
+   if (uval > PNG_UINT_31_MAX)
+      png_error(png_ptr, "PNG unsigned integer out of range");
+
+   return uval;
+}
+
+#if defined(PNG_READ_gAMA_SUPPORTED) || defined(PNG_READ_cHRM_SUPPORTED)
+/* The following is a variation on the above for use with the fixed
+ * point values used for gAMA and cHRM.  Instead of png_error it
+ * issues a warning and returns (-1) - an invalid value because both
+ * gAMA and cHRM use *unsigned* integers for fixed point values.
+ */
+#define PNG_FIXED_ERROR (-1)
+
+static png_fixed_point /* PRIVATE */
+png_get_fixed_point(png_structrp png_ptr, png_const_bytep buf)
+{
+   png_uint_32 uval = png_get_uint_32(buf);
+
+   if (uval <= PNG_UINT_31_MAX)
+      return (png_fixed_point)uval; /* known to be in range */
+
+   /* The caller can turn off the warning by passing NULL. */
+   if (png_ptr != NULL)
+      png_warning(png_ptr, "PNG fixed point integer out of range");
+
+   return PNG_FIXED_ERROR;
+}
+#endif
+
+#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
+/* NOTE: the read macros will obscure these definitions, so that if
+ * PNG_USE_READ_MACROS is set the library will not use them internally,
+ * but the APIs will still be available externally.
+ *
+ * The parentheses around "PNGAPI function_name" in the following three
+ * functions are necessary because they allow the macros to co-exist with
+ * these (unused but exported) functions.
+ */
+
+/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */
+png_uint_32 (PNGAPI
+png_get_uint_32)(png_const_bytep buf)
+{
+   png_uint_32 uval =
+       ((png_uint_32)(*(buf    )) << 24) +
+       ((png_uint_32)(*(buf + 1)) << 16) +
+       ((png_uint_32)(*(buf + 2)) <<  8) +
+       ((png_uint_32)(*(buf + 3))      ) ;
+
+   return uval;
+}
+
+/* Grab a signed 32-bit integer from a buffer in big-endian format.  The
+ * data is stored in the PNG file in two's complement format and there
+ * is no guarantee that a 'png_int_32' is exactly 32 bits, therefore
+ * the following code does a two's complement to native conversion.
+ */
+png_int_32 (PNGAPI
+png_get_int_32)(png_const_bytep buf)
+{
+   png_uint_32 uval = png_get_uint_32(buf);
+   if ((uval & 0x80000000) == 0) /* non-negative */
+      return (png_int_32)uval;
+
+   uval = (uval ^ 0xffffffff) + 1;  /* 2's complement: -x = ~x+1 */
+   if ((uval & 0x80000000) == 0) /* no overflow */
+      return -(png_int_32)uval;
+   /* The following has to be safe; this function only gets called on PNG data
+    * and if we get here that data is invalid.  0 is the most safe value and
+    * if not then an attacker would surely just generate a PNG with 0 instead.
+    */
+   return 0;
+}
+
+/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */
+png_uint_16 (PNGAPI
+png_get_uint_16)(png_const_bytep buf)
+{
+   /* ANSI-C requires an int value to accommodate at least 16 bits so this
+    * works and allows the compiler not to worry about possible narrowing
+    * on 32-bit systems.  (Pre-ANSI systems did not make integers smaller
+    * than 16 bits either.)
+    */
+   unsigned int val =
+       ((unsigned int)(*buf) << 8) +
+       ((unsigned int)(*(buf + 1)));
+
+   return (png_uint_16)val;
+}
+
+#endif /* READ_INT_FUNCTIONS */
+
+/* Read and check the PNG file signature */
+void /* PRIVATE */
+png_read_sig(png_structrp png_ptr, png_inforp info_ptr)
+{
+   size_t num_checked, num_to_check;
+
+   /* Exit if the user application does not expect a signature. */
+   if (png_ptr->sig_bytes >= 8)
+      return;
+
+   num_checked = png_ptr->sig_bytes;
+   num_to_check = 8 - num_checked;
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   png_ptr->io_state = PNG_IO_READING | PNG_IO_SIGNATURE;
+#endif
+
+   /* The signature must be serialized in a single I/O call. */
+   png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check);
+   png_ptr->sig_bytes = 8;
+
+   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0)
+   {
+      if (num_checked < 4 &&
+          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0)
+         png_error(png_ptr, "Not a PNG file");
+      else
+         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
+   }
+   if (num_checked < 3)
+      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+}
+
+/* Read the chunk header (length + type name).
+ * Put the type name into png_ptr->chunk_name, and return the length.
+ */
+png_uint_32 /* PRIVATE */
+png_read_chunk_header(png_structrp png_ptr)
+{
+   png_byte buf[8];
+   png_uint_32 length;
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_HDR;
+#endif
+
+   /* Read the length and the chunk name.
+    * This must be performed in a single I/O call.
+    */
+   png_read_data(png_ptr, buf, 8);
+   length = png_get_uint_31(png_ptr, buf);
+
+   /* Put the chunk name into png_ptr->chunk_name. */
+   png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(buf+4);
+
+   png_debug2(0, "Reading chunk typeid = 0x%lx, length = %lu",
+       (unsigned long)png_ptr->chunk_name, (unsigned long)length);
+
+   /* Reset the crc and run it over the chunk name. */
+   png_reset_crc(png_ptr);
+   png_calculate_crc(png_ptr, buf + 4, 4);
+
+   /* Check to see if chunk name is valid. */
+   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+
+   /* Check for too-large chunk length */
+   png_check_chunk_length(png_ptr, length);
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_DATA;
+#endif
+
+   return length;
+}
+
+/* Read data, and (optionally) run it through the CRC. */
+void /* PRIVATE */
+png_crc_read(png_structrp png_ptr, png_bytep buf, png_uint_32 length)
+{
+   if (png_ptr == NULL)
+      return;
+
+   png_read_data(png_ptr, buf, length);
+   png_calculate_crc(png_ptr, buf, length);
+}
+
+/* Optionally skip data and then check the CRC.  Depending on whether we
+ * are reading an ancillary or critical chunk, and how the program has set
+ * things up, we may calculate the CRC on the data and print a message.
+ * Returns '1' if there was a CRC error, '0' otherwise.
+ */
+int /* PRIVATE */
+png_crc_finish(png_structrp png_ptr, png_uint_32 skip)
+{
+   /* The size of the local buffer for inflate is a good guess as to a
+    * reasonable size to use for buffering reads from the application.
+    */
+   while (skip > 0)
+   {
+      png_uint_32 len;
+      png_byte tmpbuf[PNG_INFLATE_BUF_SIZE];
+
+      len = (sizeof tmpbuf);
+      if (len > skip)
+         len = skip;
+      skip -= len;
+
+      png_crc_read(png_ptr, tmpbuf, len);
+   }
+
+   if (png_crc_error(png_ptr) != 0)
+   {
+      if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0 ?
+          (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0 :
+          (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE) != 0)
+      {
+         png_chunk_warning(png_ptr, "CRC error");
+      }
+
+      else
+         png_chunk_error(png_ptr, "CRC error");
+
+      return 1;
+   }
+
+   return 0;
+}
+
+/* Compare the CRC stored in the PNG file with that calculated by libpng from
+ * the data it has read thus far.
+ */
+int /* PRIVATE */
+png_crc_error(png_structrp png_ptr)
+{
+   png_byte crc_bytes[4];
+   png_uint_32 crc;
+   int need_crc = 1;
+
+   if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0)
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+
+   else /* critical */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0)
+         need_crc = 0;
+   }
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_CRC;
+#endif
+
+   /* The chunk CRC must be serialized in a single I/O call. */
+   png_read_data(png_ptr, crc_bytes, 4);
+
+   if (need_crc != 0)
+   {
+      crc = png_get_uint_32(crc_bytes);
+      return crc != png_ptr->crc;
+   }
+
+   else
+      return 0;
+}
+
+#if defined(PNG_READ_iCCP_SUPPORTED) || defined(PNG_READ_iTXt_SUPPORTED) ||\
+    defined(PNG_READ_pCAL_SUPPORTED) || defined(PNG_READ_sCAL_SUPPORTED) ||\
+    defined(PNG_READ_sPLT_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) ||\
+    defined(PNG_READ_zTXt_SUPPORTED) || defined(PNG_SEQUENTIAL_READ_SUPPORTED)
+/* Manage the read buffer; this simply reallocates the buffer if it is not small
+ * enough (or if it is not allocated).  The routine returns a pointer to the
+ * buffer; if an error occurs and 'warn' is set the routine returns NULL, else
+ * it will call png_error (via png_malloc) on failure.  (warn == 2 means
+ * 'silent').
+ */
+static png_bytep
+png_read_buffer(png_structrp png_ptr, png_alloc_size_t new_size, int warn)
+{
+   png_bytep buffer = png_ptr->read_buffer;
+
+   if (buffer != NULL && new_size > png_ptr->read_buffer_size)
+   {
+      png_ptr->read_buffer = NULL;
+      png_ptr->read_buffer_size = 0;
+      png_free(png_ptr, buffer);
+      buffer = NULL;
+   }
+
+   if (buffer == NULL)
+   {
+      buffer = png_voidcast(png_bytep, png_malloc_base(png_ptr, new_size));
+
+      if (buffer != NULL)
+      {
+         memset(buffer, 0, new_size); /* just in case */
+         png_ptr->read_buffer = buffer;
+         png_ptr->read_buffer_size = new_size;
+      }
+
+      else if (warn < 2) /* else silent */
+      {
+         if (warn != 0)
+             png_chunk_warning(png_ptr, "insufficient memory to read chunk");
+
+         else
+             png_chunk_error(png_ptr, "insufficient memory to read chunk");
+      }
+   }
+
+   return buffer;
+}
+#endif /* READ_iCCP|iTXt|pCAL|sCAL|sPLT|tEXt|zTXt|SEQUENTIAL_READ */
+
+/* png_inflate_claim: claim the zstream for some nefarious purpose that involves
+ * decompression.  Returns Z_OK on success, else a zlib error code.  It checks
+ * the owner but, in final release builds, just issues a warning if some other
+ * chunk apparently owns the stream.  Prior to release it does a png_error.
+ */
+static int
+png_inflate_claim(png_structrp png_ptr, png_uint_32 owner)
+{
+   if (png_ptr->zowner != 0)
+   {
+      char msg[64];
+
+      PNG_STRING_FROM_CHUNK(msg, png_ptr->zowner);
+      /* So the message that results is "<chunk> using zstream"; this is an
+       * internal error, but is very useful for debugging.  i18n requirements
+       * are minimal.
+       */
+      (void)png_safecat(msg, (sizeof msg), 4, " using zstream");
+#if PNG_RELEASE_BUILD
+      png_chunk_warning(png_ptr, msg);
+      png_ptr->zowner = 0;
+#else
+      png_chunk_error(png_ptr, msg);
+#endif
+   }
+
+   /* Implementation note: unlike 'png_deflate_claim' this internal function
+    * does not take the size of the data as an argument.  Some efficiency could
+    * be gained by using this when it is known *if* the zlib stream itself does
+    * not record the number; however, this is an illusion: the original writer
+    * of the PNG may have selected a lower window size, and we really must
+    * follow that because, for systems with with limited capabilities, we
+    * would otherwise reject the application's attempts to use a smaller window
+    * size (zlib doesn't have an interface to say "this or lower"!).
+    *
+    * inflateReset2 was added to zlib 1.2.4; before this the window could not be
+    * reset, therefore it is necessary to always allocate the maximum window
+    * size with earlier zlibs just in case later compressed chunks need it.
+    */
+   {
+      int ret; /* zlib return code */
+#if ZLIB_VERNUM >= 0x1240
+      int window_bits = 0;
+
+# if defined(PNG_SET_OPTION_SUPPORTED) && defined(PNG_MAXIMUM_INFLATE_WINDOW)
+      if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) ==
+          PNG_OPTION_ON)
+      {
+         window_bits = 15;
+         png_ptr->zstream_start = 0; /* fixed window size */
+      }
+
+      else
+      {
+         png_ptr->zstream_start = 1;
+      }
+# endif
+
+#endif /* ZLIB_VERNUM >= 0x1240 */
+
+      /* Set this for safety, just in case the previous owner left pointers to
+       * memory allocations.
+       */
+      png_ptr->zstream.next_in = NULL;
+      png_ptr->zstream.avail_in = 0;
+      png_ptr->zstream.next_out = NULL;
+      png_ptr->zstream.avail_out = 0;
+
+      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
+      {
+#if ZLIB_VERNUM >= 0x1240
+         ret = inflateReset2(&png_ptr->zstream, window_bits);
+#else
+         ret = inflateReset(&png_ptr->zstream);
+#endif
+      }
+
+      else
+      {
+#if ZLIB_VERNUM >= 0x1240
+         ret = inflateInit2(&png_ptr->zstream, window_bits);
+#else
+         ret = inflateInit(&png_ptr->zstream);
+#endif
+
+         if (ret == Z_OK)
+            png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED;
+      }
+
+#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED
+      if (((png_ptr->options >> PNG_IGNORE_ADLER32) & 3) == PNG_OPTION_ON)
+         /* Turn off validation of the ADLER32 checksum in IDAT chunks */
+         ret = inflateValidate(&png_ptr->zstream, 0);
+#endif
+
+      if (ret == Z_OK)
+         png_ptr->zowner = owner;
+
+      else
+         png_zstream_error(png_ptr, ret);
+
+      return ret;
+   }
+
+#ifdef window_bits
+# undef window_bits
+#endif
+}
+
+#if ZLIB_VERNUM >= 0x1240
+/* Handle the start of the inflate stream if we called inflateInit2(strm,0);
+ * in this case some zlib versions skip validation of the CINFO field and, in
+ * certain circumstances, libpng may end up displaying an invalid image, in
+ * contrast to implementations that call zlib in the normal way (e.g. libpng
+ * 1.5).
+ */
+int /* PRIVATE */
+png_zlib_inflate(png_structrp png_ptr, int flush)
+{
+   if (png_ptr->zstream_start && png_ptr->zstream.avail_in > 0)
+   {
+      if ((*png_ptr->zstream.next_in >> 4) > 7)
+      {
+         png_ptr->zstream.msg = "invalid window size (libpng)";
+         return Z_DATA_ERROR;
+      }
+
+      png_ptr->zstream_start = 0;
+   }
+
+   return inflate(&png_ptr->zstream, flush);
+}
+#endif /* Zlib >= 1.2.4 */
+
+#ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED
+#if defined(PNG_READ_zTXt_SUPPORTED) || defined (PNG_READ_iTXt_SUPPORTED)
+/* png_inflate now returns zlib error codes including Z_OK and Z_STREAM_END to
+ * allow the caller to do multiple calls if required.  If the 'finish' flag is
+ * set Z_FINISH will be passed to the final inflate() call and Z_STREAM_END must
+ * be returned or there has been a problem, otherwise Z_SYNC_FLUSH is used and
+ * Z_OK or Z_STREAM_END will be returned on success.
+ *
+ * The input and output sizes are updated to the actual amounts of data consumed
+ * or written, not the amount available (as in a z_stream).  The data pointers
+ * are not changed, so the next input is (data+input_size) and the next
+ * available output is (output+output_size).
+ */
+static int
+png_inflate(png_structrp png_ptr, png_uint_32 owner, int finish,
+    /* INPUT: */ png_const_bytep input, png_uint_32p input_size_ptr,
+    /* OUTPUT: */ png_bytep output, png_alloc_size_t *output_size_ptr)
+{
+   if (png_ptr->zowner == owner) /* Else not claimed */
+   {
+      int ret;
+      png_alloc_size_t avail_out = *output_size_ptr;
+      png_uint_32 avail_in = *input_size_ptr;
+
+      /* zlib can't necessarily handle more than 65535 bytes at once (i.e. it
+       * can't even necessarily handle 65536 bytes) because the type uInt is
+       * "16 bits or more".  Consequently it is necessary to chunk the input to
+       * zlib.  This code uses ZLIB_IO_MAX, from pngpriv.h, as the maximum (the
+       * maximum value that can be stored in a uInt.)  It is possible to set
+       * ZLIB_IO_MAX to a lower value in pngpriv.h and this may sometimes have
+       * a performance advantage, because it reduces the amount of data accessed
+       * at each step and that may give the OS more time to page it in.
+       */
+      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
+      /* avail_in and avail_out are set below from 'size' */
+      png_ptr->zstream.avail_in = 0;
+      png_ptr->zstream.avail_out = 0;
+
+      /* Read directly into the output if it is available (this is set to
+       * a local buffer below if output is NULL).
+       */
+      if (output != NULL)
+         png_ptr->zstream.next_out = output;
+
+      do
+      {
+         uInt avail;
+         Byte local_buffer[PNG_INFLATE_BUF_SIZE];
+
+         /* zlib INPUT BUFFER */
+         /* The setting of 'avail_in' used to be outside the loop; by setting it
+          * inside it is possible to chunk the input to zlib and simply rely on
+          * zlib to advance the 'next_in' pointer.  This allows arbitrary
+          * amounts of data to be passed through zlib at the unavoidable cost of
+          * requiring a window save (memcpy of up to 32768 output bytes)
+          * every ZLIB_IO_MAX input bytes.
+          */
+         avail_in += png_ptr->zstream.avail_in; /* not consumed last time */
+
+         avail = ZLIB_IO_MAX;
+
+         if (avail_in < avail)
+            avail = (uInt)avail_in; /* safe: < than ZLIB_IO_MAX */
+
+         avail_in -= avail;
+         png_ptr->zstream.avail_in = avail;
+
+         /* zlib OUTPUT BUFFER */
+         avail_out += png_ptr->zstream.avail_out; /* not written last time */
+
+         avail = ZLIB_IO_MAX; /* maximum zlib can process */
+
+         if (output == NULL)
+         {
+            /* Reset the output buffer each time round if output is NULL and
+             * make available the full buffer, up to 'remaining_space'
+             */
+            png_ptr->zstream.next_out = local_buffer;
+            if ((sizeof local_buffer) < avail)
+               avail = (sizeof local_buffer);
+         }
+
+         if (avail_out < avail)
+            avail = (uInt)avail_out; /* safe: < ZLIB_IO_MAX */
+
+         png_ptr->zstream.avail_out = avail;
+         avail_out -= avail;
+
+         /* zlib inflate call */
+         /* In fact 'avail_out' may be 0 at this point, that happens at the end
+          * of the read when the final LZ end code was not passed at the end of
+          * the previous chunk of input data.  Tell zlib if we have reached the
+          * end of the output buffer.
+          */
+         ret = PNG_INFLATE(png_ptr, avail_out > 0 ? Z_NO_FLUSH :
+             (finish ? Z_FINISH : Z_SYNC_FLUSH));
+      } while (ret == Z_OK);
+
+      /* For safety kill the local buffer pointer now */
+      if (output == NULL)
+         png_ptr->zstream.next_out = NULL;
+
+      /* Claw back the 'size' and 'remaining_space' byte counts. */
+      avail_in += png_ptr->zstream.avail_in;
+      avail_out += png_ptr->zstream.avail_out;
+
+      /* Update the input and output sizes; the updated values are the amount
+       * consumed or written, effectively the inverse of what zlib uses.
+       */
+      if (avail_out > 0)
+         *output_size_ptr -= avail_out;
+
+      if (avail_in > 0)
+         *input_size_ptr -= avail_in;
+
+      /* Ensure png_ptr->zstream.msg is set (even in the success case!) */
+      png_zstream_error(png_ptr, ret);
+      return ret;
+   }
+
+   else
+   {
+      /* This is a bad internal error.  The recovery assigns to the zstream msg
+       * pointer, which is not owned by the caller, but this is safe; it's only
+       * used on errors!
+       */
+      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
+      return Z_STREAM_ERROR;
+   }
+}
+
+/*
+ * Decompress trailing data in a chunk.  The assumption is that read_buffer
+ * points at an allocated area holding the contents of a chunk with a
+ * trailing compressed part.  What we get back is an allocated area
+ * holding the original prefix part and an uncompressed version of the
+ * trailing part (the malloc area passed in is freed).
+ */
+static int
+png_decompress_chunk(png_structrp png_ptr,
+    png_uint_32 chunklength, png_uint_32 prefix_size,
+    png_alloc_size_t *newlength /* must be initialized to the maximum! */,
+    int terminate /*add a '\0' to the end of the uncompressed data*/)
+{
+   /* TODO: implement different limits for different types of chunk.
+    *
+    * The caller supplies *newlength set to the maximum length of the
+    * uncompressed data, but this routine allocates space for the prefix and
+    * maybe a '\0' terminator too.  We have to assume that 'prefix_size' is
+    * limited only by the maximum chunk size.
+    */
+   png_alloc_size_t limit = PNG_SIZE_MAX;
+
+# ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_malloc_max > 0 &&
+       png_ptr->user_chunk_malloc_max < limit)
+      limit = png_ptr->user_chunk_malloc_max;
+# elif PNG_USER_CHUNK_MALLOC_MAX > 0
+   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
+      limit = PNG_USER_CHUNK_MALLOC_MAX;
+# endif
+
+   if (limit >= prefix_size + (terminate != 0))
+   {
+      int ret;
+
+      limit -= prefix_size + (terminate != 0);
+
+      if (limit < *newlength)
+         *newlength = limit;
+
+      /* Now try to claim the stream. */
+      ret = png_inflate_claim(png_ptr, png_ptr->chunk_name);
+
+      if (ret == Z_OK)
+      {
+         png_uint_32 lzsize = chunklength - prefix_size;
+
+         ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/,
+             /* input: */ png_ptr->read_buffer + prefix_size, &lzsize,
+             /* output: */ NULL, newlength);
+
+         if (ret == Z_STREAM_END)
+         {
+            /* Use 'inflateReset' here, not 'inflateReset2' because this
+             * preserves the previously decided window size (otherwise it would
+             * be necessary to store the previous window size.)  In practice
+             * this doesn't matter anyway, because png_inflate will call inflate
+             * with Z_FINISH in almost all cases, so the window will not be
+             * maintained.
+             */
+            if (inflateReset(&png_ptr->zstream) == Z_OK)
+            {
+               /* Because of the limit checks above we know that the new,
+                * expanded, size will fit in a size_t (let alone an
+                * png_alloc_size_t).  Use png_malloc_base here to avoid an
+                * extra OOM message.
+                */
+               png_alloc_size_t new_size = *newlength;
+               png_alloc_size_t buffer_size = prefix_size + new_size +
+                   (terminate != 0);
+               png_bytep text = png_voidcast(png_bytep, png_malloc_base(png_ptr,
+                   buffer_size));
+
+               if (text != NULL)
+               {
+                  memset(text, 0, buffer_size);
+
+                  ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/,
+                      png_ptr->read_buffer + prefix_size, &lzsize,
+                      text + prefix_size, newlength);
+
+                  if (ret == Z_STREAM_END)
+                  {
+                     if (new_size == *newlength)
+                     {
+                        if (terminate != 0)
+                           text[prefix_size + *newlength] = 0;
+
+                        if (prefix_size > 0)
+                           memcpy(text, png_ptr->read_buffer, prefix_size);
+
+                        {
+                           png_bytep old_ptr = png_ptr->read_buffer;
+
+                           png_ptr->read_buffer = text;
+                           png_ptr->read_buffer_size = buffer_size;
+                           text = old_ptr; /* freed below */
+                        }
+                     }
+
+                     else
+                     {
+                        /* The size changed on the second read, there can be no
+                         * guarantee that anything is correct at this point.
+                         * The 'msg' pointer has been set to "unexpected end of
+                         * LZ stream", which is fine, but return an error code
+                         * that the caller won't accept.
+                         */
+                        ret = PNG_UNEXPECTED_ZLIB_RETURN;
+                     }
+                  }
+
+                  else if (ret == Z_OK)
+                     ret = PNG_UNEXPECTED_ZLIB_RETURN; /* for safety */
+
+                  /* Free the text pointer (this is the old read_buffer on
+                   * success)
+                   */
+                  png_free(png_ptr, text);
+
+                  /* This really is very benign, but it's still an error because
+                   * the extra space may otherwise be used as a Trojan Horse.
+                   */
+                  if (ret == Z_STREAM_END &&
+                      chunklength - prefix_size != lzsize)
+                     png_chunk_benign_error(png_ptr, "extra compressed data");
+               }
+
+               else
+               {
+                  /* Out of memory allocating the buffer */
+                  ret = Z_MEM_ERROR;
+                  png_zstream_error(png_ptr, Z_MEM_ERROR);
+               }
+            }
+
+            else
+            {
+               /* inflateReset failed, store the error message */
+               png_zstream_error(png_ptr, ret);
+               ret = PNG_UNEXPECTED_ZLIB_RETURN;
+            }
+         }
+
+         else if (ret == Z_OK)
+            ret = PNG_UNEXPECTED_ZLIB_RETURN;
+
+         /* Release the claimed stream */
+         png_ptr->zowner = 0;
+      }
+
+      else /* the claim failed */ if (ret == Z_STREAM_END) /* impossible! */
+         ret = PNG_UNEXPECTED_ZLIB_RETURN;
+
+      return ret;
+   }
+
+   else
+   {
+      /* Application/configuration limits exceeded */
+      png_zstream_error(png_ptr, Z_MEM_ERROR);
+      return Z_MEM_ERROR;
+   }
+}
+#endif /* READ_zTXt || READ_iTXt */
+#endif /* READ_COMPRESSED_TEXT */
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+/* Perform a partial read and decompress, producing 'avail_out' bytes and
+ * reading from the current chunk as required.
+ */
+static int
+png_inflate_read(png_structrp png_ptr, png_bytep read_buffer, uInt read_size,
+    png_uint_32p chunk_bytes, png_bytep next_out, png_alloc_size_t *out_size,
+    int finish)
+{
+   if (png_ptr->zowner == png_ptr->chunk_name)
+   {
+      int ret;
+
+      /* next_in and avail_in must have been initialized by the caller. */
+      png_ptr->zstream.next_out = next_out;
+      png_ptr->zstream.avail_out = 0; /* set in the loop */
+
+      do
+      {
+         if (png_ptr->zstream.avail_in == 0)
+         {
+            if (read_size > *chunk_bytes)
+               read_size = (uInt)*chunk_bytes;
+            *chunk_bytes -= read_size;
+
+            if (read_size > 0)
+               png_crc_read(png_ptr, read_buffer, read_size);
+
+            png_ptr->zstream.next_in = read_buffer;
+            png_ptr->zstream.avail_in = read_size;
+         }
+
+         if (png_ptr->zstream.avail_out == 0)
+         {
+            uInt avail = ZLIB_IO_MAX;
+            if (avail > *out_size)
+               avail = (uInt)*out_size;
+            *out_size -= avail;
+
+            png_ptr->zstream.avail_out = avail;
+         }
+
+         /* Use Z_SYNC_FLUSH when there is no more chunk data to ensure that all
+          * the available output is produced; this allows reading of truncated
+          * streams.
+          */
+         ret = PNG_INFLATE(png_ptr, *chunk_bytes > 0 ?
+             Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH));
+      }
+      while (ret == Z_OK && (*out_size > 0 || png_ptr->zstream.avail_out > 0));
+
+      *out_size += png_ptr->zstream.avail_out;
+      png_ptr->zstream.avail_out = 0; /* Should not be required, but is safe */
+
+      /* Ensure the error message pointer is always set: */
+      png_zstream_error(png_ptr, ret);
+      return ret;
+   }
+
+   else
+   {
+      png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed");
+      return Z_STREAM_ERROR;
+   }
+}
+#endif /* READ_iCCP */
+
+/* Read and check the IDHR chunk */
+
+void /* PRIVATE */
+png_handle_IHDR(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte buf[13];
+   png_uint_32 width, height;
+   int bit_depth, color_type, compression_type, filter_type;
+   int interlace_type;
+
+   png_debug(1, "in png_handle_IHDR");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) != 0)
+      png_chunk_error(png_ptr, "out of place");
+
+   /* Check the length */
+   if (length != 13)
+      png_chunk_error(png_ptr, "invalid");
+
+   png_ptr->mode |= PNG_HAVE_IHDR;
+
+   png_crc_read(png_ptr, buf, 13);
+   png_crc_finish(png_ptr, 0);
+
+   width = png_get_uint_31(png_ptr, buf);
+   height = png_get_uint_31(png_ptr, buf + 4);
+   bit_depth = buf[8];
+   color_type = buf[9];
+   compression_type = buf[10];
+   filter_type = buf[11];
+   interlace_type = buf[12];
+
+   /* Set internal variables */
+   png_ptr->width = width;
+   png_ptr->height = height;
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->interlaced = (png_byte)interlace_type;
+   png_ptr->color_type = (png_byte)color_type;
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+
+   /* Find number of channels */
+   switch (png_ptr->color_type)
+   {
+      default: /* invalid, png_set_IHDR calls png_error */
+      case PNG_COLOR_TYPE_GRAY:
+      case PNG_COLOR_TYPE_PALETTE:
+         png_ptr->channels = 1;
+         break;
+
+      case PNG_COLOR_TYPE_RGB:
+         png_ptr->channels = 3;
+         break;
+
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         png_ptr->channels = 2;
+         break;
+
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         png_ptr->channels = 4;
+         break;
+   }
+
+   /* Set up other useful info */
+   png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth * png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->width);
+   png_debug1(3, "bit_depth = %d", png_ptr->bit_depth);
+   png_debug1(3, "channels = %d", png_ptr->channels);
+   png_debug1(3, "rowbytes = %lu", (unsigned long)png_ptr->rowbytes);
+   png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth,
+       color_type, interlace_type, compression_type, filter_type);
+}
+
+/* Read and check the palette */
+void /* PRIVATE */
+png_handle_PLTE(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_color palette[PNG_MAX_PALETTE_LENGTH];
+   int max_palette_length, num, i;
+#ifdef PNG_POINTER_INDEXING_SUPPORTED
+   png_colorp pal_ptr;
+#endif
+
+   png_debug(1, "in png_handle_PLTE");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   /* Moved to before the 'after IDAT' check below because otherwise duplicate
+    * PLTE chunks are potentially ignored (the spec says there shall not be more
+    * than one PLTE, the error is not treated as benign, so this check trumps
+    * the requirement that PLTE appears before IDAT.)
+    */
+   else if ((png_ptr->mode & PNG_HAVE_PLTE) != 0)
+      png_chunk_error(png_ptr, "duplicate");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      /* This is benign because the non-benign error happened before, when an
+       * IDAT was encountered in a color-mapped image with no PLTE.
+       */
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   png_ptr->mode |= PNG_HAVE_PLTE;
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "ignored in grayscale PNG");
+      return;
+   }
+
+#ifndef PNG_READ_OPT_PLTE_SUPPORTED
+   if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+#endif
+
+   if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3)
+   {
+      png_crc_finish(png_ptr, length);
+
+      if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+         png_chunk_benign_error(png_ptr, "invalid");
+
+      else
+         png_chunk_error(png_ptr, "invalid");
+
+      return;
+   }
+
+   /* The cast is safe because 'length' is less than 3*PNG_MAX_PALETTE_LENGTH */
+   num = (int)length / 3;
+
+   /* If the palette has 256 or fewer entries but is too large for the bit
+    * depth, we don't issue an error, to preserve the behavior of previous
+    * libpng versions. We silently truncate the unused extra palette entries
+    * here.
+    */
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      max_palette_length = (1 << png_ptr->bit_depth);
+   else
+      max_palette_length = PNG_MAX_PALETTE_LENGTH;
+
+   if (num > max_palette_length)
+      num = max_palette_length;
+
+#ifdef PNG_POINTER_INDEXING_SUPPORTED
+   for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      pal_ptr->red = buf[0];
+      pal_ptr->green = buf[1];
+      pal_ptr->blue = buf[2];
+   }
+#else
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      /* Don't depend upon png_color being any order */
+      palette[i].red = buf[0];
+      palette[i].green = buf[1];
+      palette[i].blue = buf[2];
+   }
+#endif
+
+   /* If we actually need the PLTE chunk (ie for a paletted image), we do
+    * whatever the normal CRC configuration tells us.  However, if we
+    * have an RGB image, the PLTE can be considered ancillary, so
+    * we will act as though it is.
+    */
+#ifndef PNG_READ_OPT_PLTE_SUPPORTED
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+#endif
+   {
+      png_crc_finish(png_ptr, (png_uint_32) (length - (unsigned int)num * 3));
+   }
+
+#ifndef PNG_READ_OPT_PLTE_SUPPORTED
+   else if (png_crc_error(png_ptr) != 0)  /* Only if we have a CRC error */
+   {
+      /* If we don't want to use the data from an ancillary chunk,
+       * we have two options: an error abort, or a warning and we
+       * ignore the data in this chunk (which should be OK, since
+       * it's considered ancillary for a RGB or RGBA image).
+       *
+       * IMPLEMENTATION NOTE: this is only here because png_crc_finish uses the
+       * chunk type to determine whether to check the ancillary or the critical
+       * flags.
+       */
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE) == 0)
+      {
+         if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) != 0)
+            return;
+
+         else
+            png_chunk_error(png_ptr, "CRC error");
+      }
+
+      /* Otherwise, we (optionally) emit a warning and use the chunk. */
+      else if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0)
+         png_chunk_warning(png_ptr, "CRC error");
+   }
+#endif
+
+   /* TODO: png_set_PLTE has the side effect of setting png_ptr->palette to its
+    * own copy of the palette.  This has the side effect that when png_start_row
+    * is called (this happens after any call to png_read_update_info) the
+    * info_ptr palette gets changed.  This is extremely unexpected and
+    * confusing.
+    *
+    * Fix this by not sharing the palette in this way.
+    */
+   png_set_PLTE(png_ptr, info_ptr, palette, num);
+
+   /* The three chunks, bKGD, hIST and tRNS *must* appear after PLTE and before
+    * IDAT.  Prior to 1.6.0 this was not checked; instead the code merely
+    * checked the apparent validity of a tRNS chunk inserted before PLTE on a
+    * palette PNG.  1.6.0 attempts to rigorously follow the standard and
+    * therefore does a benign error if the erroneous condition is detected *and*
+    * cancels the tRNS if the benign error returns.  The alternative is to
+    * amend the standard since it would be rather hypocritical of the standards
+    * maintainers to ignore it.
+    */
+#ifdef PNG_READ_tRNS_SUPPORTED
+   if (png_ptr->num_trans > 0 ||
+       (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0))
+   {
+      /* Cancel this because otherwise it would be used if the transforms
+       * require it.  Don't cancel the 'valid' flag because this would prevent
+       * detection of duplicate chunks.
+       */
+      png_ptr->num_trans = 0;
+
+      if (info_ptr != NULL)
+         info_ptr->num_trans = 0;
+
+      png_chunk_benign_error(png_ptr, "tRNS must be after");
+   }
+#endif
+
+#ifdef PNG_READ_hIST_SUPPORTED
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0)
+      png_chunk_benign_error(png_ptr, "hIST must be after");
+#endif
+
+#ifdef PNG_READ_bKGD_SUPPORTED
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0)
+      png_chunk_benign_error(png_ptr, "bKGD must be after");
+#endif
+}
+
+void /* PRIVATE */
+png_handle_IEND(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_debug(1, "in png_handle_IEND");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0 ||
+       (png_ptr->mode & PNG_HAVE_IDAT) == 0)
+      png_chunk_error(png_ptr, "out of place");
+
+   png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND);
+
+   png_crc_finish(png_ptr, length);
+
+   if (length != 0)
+      png_chunk_benign_error(png_ptr, "invalid");
+
+   PNG_UNUSED(info_ptr)
+}
+
+#ifdef PNG_READ_gAMA_SUPPORTED
+void /* PRIVATE */
+png_handle_gAMA(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_fixed_point igamma;
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_gAMA");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   if (length != 4)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 4);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   igamma = png_get_fixed_point(NULL, buf);
+
+   png_colorspace_set_gamma(png_ptr, &png_ptr->colorspace, igamma);
+   png_colorspace_sync(png_ptr, info_ptr);
+}
+#endif
+
+#ifdef PNG_READ_sBIT_SUPPORTED
+void /* PRIVATE */
+png_handle_sBIT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   unsigned int truelen, i;
+   png_byte sample_depth;
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_sBIT");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      truelen = 3;
+      sample_depth = 8;
+   }
+
+   else
+   {
+      truelen = png_ptr->channels;
+      sample_depth = png_ptr->bit_depth;
+   }
+
+   if (length != truelen || length > 4)
+   {
+      png_chunk_benign_error(png_ptr, "invalid");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   buf[0] = buf[1] = buf[2] = buf[3] = sample_depth;
+   png_crc_read(png_ptr, buf, truelen);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   for (i=0; i<truelen; ++i)
+   {
+      if (buf[i] == 0 || buf[i] > sample_depth)
+      {
+         png_chunk_benign_error(png_ptr, "invalid");
+         return;
+      }
+   }
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+   {
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[1];
+      png_ptr->sig_bit.blue = buf[2];
+      png_ptr->sig_bit.alpha = buf[3];
+   }
+
+   else
+   {
+      png_ptr->sig_bit.gray = buf[0];
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[0];
+      png_ptr->sig_bit.blue = buf[0];
+      png_ptr->sig_bit.alpha = buf[1];
+   }
+
+   png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit));
+}
+#endif
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+void /* PRIVATE */
+png_handle_cHRM(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte buf[32];
+   png_xy xy;
+
+   png_debug(1, "in png_handle_cHRM");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   if (length != 32)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 32);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   xy.whitex = png_get_fixed_point(NULL, buf);
+   xy.whitey = png_get_fixed_point(NULL, buf + 4);
+   xy.redx   = png_get_fixed_point(NULL, buf + 8);
+   xy.redy   = png_get_fixed_point(NULL, buf + 12);
+   xy.greenx = png_get_fixed_point(NULL, buf + 16);
+   xy.greeny = png_get_fixed_point(NULL, buf + 20);
+   xy.bluex  = png_get_fixed_point(NULL, buf + 24);
+   xy.bluey  = png_get_fixed_point(NULL, buf + 28);
+
+   if (xy.whitex == PNG_FIXED_ERROR ||
+       xy.whitey == PNG_FIXED_ERROR ||
+       xy.redx   == PNG_FIXED_ERROR ||
+       xy.redy   == PNG_FIXED_ERROR ||
+       xy.greenx == PNG_FIXED_ERROR ||
+       xy.greeny == PNG_FIXED_ERROR ||
+       xy.bluex  == PNG_FIXED_ERROR ||
+       xy.bluey  == PNG_FIXED_ERROR)
+   {
+      png_chunk_benign_error(png_ptr, "invalid values");
+      return;
+   }
+
+   /* If a colorspace error has already been output skip this chunk */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
+      return;
+
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0)
+   {
+      png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
+      png_colorspace_sync(png_ptr, info_ptr);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   png_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
+   (void)png_colorspace_set_chromaticities(png_ptr, &png_ptr->colorspace, &xy,
+       1/*prefer cHRM values*/);
+   png_colorspace_sync(png_ptr, info_ptr);
+}
+#endif
+
+#ifdef PNG_READ_sRGB_SUPPORTED
+void /* PRIVATE */
+png_handle_sRGB(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte intent;
+
+   png_debug(1, "in png_handle_sRGB");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   if (length != 1)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, &intent, 1);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   /* If a colorspace error has already been output skip this chunk */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
+      return;
+
+   /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect
+    * this.
+    */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) != 0)
+   {
+      png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
+      png_colorspace_sync(png_ptr, info_ptr);
+      png_chunk_benign_error(png_ptr, "too many profiles");
+      return;
+   }
+
+   (void)png_colorspace_set_sRGB(png_ptr, &png_ptr->colorspace, intent);
+   png_colorspace_sync(png_ptr, info_ptr);
+}
+#endif /* READ_sRGB */
+
+#ifdef PNG_READ_iCCP_SUPPORTED
+void /* PRIVATE */
+png_handle_iCCP(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+/* Note: this does not properly handle profiles that are > 64K under DOS */
+{
+   png_const_charp errmsg = NULL; /* error message output, or no error */
+   int finished = 0; /* crc checked */
+
+   png_debug(1, "in png_handle_iCCP");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   /* Consistent with all the above colorspace handling an obviously *invalid*
+    * chunk is just ignored, so does not invalidate the color space.  An
+    * alternative is to set the 'invalid' flags at the start of this routine
+    * and only clear them in they were not set before and all the tests pass.
+    */
+
+   /* The keyword must be at least one character and there is a
+    * terminator (0) byte and the compression method byte, and the
+    * 'zlib' datastream is at least 11 bytes.
+    */
+   if (length < 14)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "too short");
+      return;
+   }
+
+   /* If a colorspace error has already been output skip this chunk */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect
+    * this.
+    */
+   if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) == 0)
+   {
+      uInt read_length, keyword_length;
+      char keyword[81];
+
+      /* Find the keyword; the keyword plus separator and compression method
+       * bytes can be at most 81 characters long.
+       */
+      read_length = 81; /* maximum */
+      if (read_length > length)
+         read_length = (uInt)length;
+
+      png_crc_read(png_ptr, (png_bytep)keyword, read_length);
+      length -= read_length;
+
+      /* The minimum 'zlib' stream is assumed to be just the 2 byte header,
+       * 5 bytes minimum 'deflate' stream, and the 4 byte checksum.
+       */
+      if (length < 11)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "too short");
+         return;
+      }
+
+      keyword_length = 0;
+      while (keyword_length < 80 && keyword_length < read_length &&
+         keyword[keyword_length] != 0)
+         ++keyword_length;
+
+      /* TODO: make the keyword checking common */
+      if (keyword_length >= 1 && keyword_length <= 79)
+      {
+         /* We only understand '0' compression - deflate - so if we get a
+          * different value we can't safely decode the chunk.
+          */
+         if (keyword_length+1 < read_length &&
+            keyword[keyword_length+1] == PNG_COMPRESSION_TYPE_BASE)
+         {
+            read_length -= keyword_length+2;
+
+            if (png_inflate_claim(png_ptr, png_iCCP) == Z_OK)
+            {
+               Byte profile_header[132]={0};
+               Byte local_buffer[PNG_INFLATE_BUF_SIZE];
+               png_alloc_size_t size = (sizeof profile_header);
+
+               png_ptr->zstream.next_in = (Bytef*)keyword + (keyword_length+2);
+               png_ptr->zstream.avail_in = read_length;
+               (void)png_inflate_read(png_ptr, local_buffer,
+                   (sizeof local_buffer), &length, profile_header, &size,
+                   0/*finish: don't, because the output is too small*/);
+
+               if (size == 0)
+               {
+                  /* We have the ICC profile header; do the basic header checks.
+                   */
+                  png_uint_32 profile_length = png_get_uint_32(profile_header);
+
+                  if (png_icc_check_length(png_ptr, &png_ptr->colorspace,
+                      keyword, profile_length) != 0)
+                  {
+                     /* The length is apparently ok, so we can check the 132
+                      * byte header.
+                      */
+                     if (png_icc_check_header(png_ptr, &png_ptr->colorspace,
+                         keyword, profile_length, profile_header,
+                         png_ptr->color_type) != 0)
+                     {
+                        /* Now read the tag table; a variable size buffer is
+                         * needed at this point, allocate one for the whole
+                         * profile.  The header check has already validated
+                         * that none of this stuff will overflow.
+                         */
+                        png_uint_32 tag_count =
+                           png_get_uint_32(profile_header + 128);
+                        png_bytep profile = png_read_buffer(png_ptr,
+                            profile_length, 2/*silent*/);
+
+                        if (profile != NULL)
+                        {
+                           memcpy(profile, profile_header,
+                               (sizeof profile_header));
+
+                           size = 12 * tag_count;
+
+                           (void)png_inflate_read(png_ptr, local_buffer,
+                               (sizeof local_buffer), &length,
+                               profile + (sizeof profile_header), &size, 0);
+
+                           /* Still expect a buffer error because we expect
+                            * there to be some tag data!
+                            */
+                           if (size == 0)
+                           {
+                              if (png_icc_check_tag_table(png_ptr,
+                                  &png_ptr->colorspace, keyword, profile_length,
+                                  profile) != 0)
+                              {
+                                 /* The profile has been validated for basic
+                                  * security issues, so read the whole thing in.
+                                  */
+                                 size = profile_length - (sizeof profile_header)
+                                     - 12 * tag_count;
+
+                                 (void)png_inflate_read(png_ptr, local_buffer,
+                                     (sizeof local_buffer), &length,
+                                     profile + (sizeof profile_header) +
+                                     12 * tag_count, &size, 1/*finish*/);
+
+                                 if (length > 0 && !(png_ptr->flags &
+                                     PNG_FLAG_BENIGN_ERRORS_WARN))
+                                    errmsg = "extra compressed data";
+
+                                 /* But otherwise allow extra data: */
+                                 else if (size == 0)
+                                 {
+                                    if (length > 0)
+                                    {
+                                       /* This can be handled completely, so
+                                        * keep going.
+                                        */
+                                       png_chunk_warning(png_ptr,
+                                           "extra compressed data");
+                                    }
+
+                                    png_crc_finish(png_ptr, length);
+                                    finished = 1;
+
+# if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0
+                                    /* Check for a match against sRGB */
+                                    png_icc_set_sRGB(png_ptr,
+                                        &png_ptr->colorspace, profile,
+                                        png_ptr->zstream.adler);
+# endif
+
+                                    /* Steal the profile for info_ptr. */
+                                    if (info_ptr != NULL)
+                                    {
+                                       png_free_data(png_ptr, info_ptr,
+                                           PNG_FREE_ICCP, 0);
+
+                                       info_ptr->iccp_name = png_voidcast(char*,
+                                           png_malloc_base(png_ptr,
+                                           keyword_length+1));
+                                       if (info_ptr->iccp_name != NULL)
+                                       {
+                                          memcpy(info_ptr->iccp_name, keyword,
+                                              keyword_length+1);
+                                          info_ptr->iccp_proflen =
+                                              profile_length;
+                                          info_ptr->iccp_profile = profile;
+                                          png_ptr->read_buffer = NULL; /*steal*/
+                                          info_ptr->free_me |= PNG_FREE_ICCP;
+                                          info_ptr->valid |= PNG_INFO_iCCP;
+                                       }
+
+                                       else
+                                       {
+                                          png_ptr->colorspace.flags |=
+                                             PNG_COLORSPACE_INVALID;
+                                          errmsg = "out of memory";
+                                       }
+                                    }
+
+                                    /* else the profile remains in the read
+                                     * buffer which gets reused for subsequent
+                                     * chunks.
+                                     */
+
+                                    if (info_ptr != NULL)
+                                       png_colorspace_sync(png_ptr, info_ptr);
+
+                                    if (errmsg == NULL)
+                                    {
+                                       png_ptr->zowner = 0;
+                                       return;
+                                    }
+                                 }
+                                 if (errmsg == NULL)
+                                    errmsg = png_ptr->zstream.msg;
+                              }
+                              /* else png_icc_check_tag_table output an error */
+                           }
+                           else /* profile truncated */
+                              errmsg = png_ptr->zstream.msg;
+                        }
+
+                        else
+                           errmsg = "out of memory";
+                     }
+
+                     /* else png_icc_check_header output an error */
+                  }
+
+                  /* else png_icc_check_length output an error */
+               }
+
+               else /* profile truncated */
+                  errmsg = png_ptr->zstream.msg;
+
+               /* Release the stream */
+               png_ptr->zowner = 0;
+            }
+
+            else /* png_inflate_claim failed */
+               errmsg = png_ptr->zstream.msg;
+         }
+
+         else
+            errmsg = "bad compression method"; /* or missing */
+      }
+
+      else
+         errmsg = "bad keyword";
+   }
+
+   else
+      errmsg = "too many profiles";
+
+   /* Failure: the reason is in 'errmsg' */
+   if (finished == 0)
+      png_crc_finish(png_ptr, length);
+
+   png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID;
+   png_colorspace_sync(png_ptr, info_ptr);
+   if (errmsg != NULL) /* else already output */
+      png_chunk_benign_error(png_ptr, errmsg);
+}
+#endif /* READ_iCCP */
+
+#ifdef PNG_READ_sPLT_SUPPORTED
+void /* PRIVATE */
+png_handle_sPLT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+{
+   png_bytep entry_start, buffer;
+   png_sPLT_t new_palette;
+   png_sPLT_entryp pp;
+   png_uint_32 data_length;
+   int entry_size, i;
+   png_uint_32 skip = 0;
+   png_uint_32 dl;
+   size_t max_dl;
+
+   png_debug(1, "in png_handle_sPLT");
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_cache_max != 0)
+   {
+      if (png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      if (--png_ptr->user_chunk_cache_max == 1)
+      {
+         png_warning(png_ptr, "No space in chunk cache for sPLT");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+   }
+#endif
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > 65535U)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "too large to fit in memory");
+      return;
+   }
+#endif
+
+   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
+   if (buffer == NULL)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+
+   /* WARNING: this may break if size_t is less than 32 bits; it is assumed
+    * that the PNG_MAX_MALLOC_64K test is enabled in this case, but this is a
+    * potential breakage point if the types in pngconf.h aren't exactly right.
+    */
+   png_crc_read(png_ptr, buffer, length);
+
+   if (png_crc_finish(png_ptr, skip) != 0)
+      return;
+
+   buffer[length] = 0;
+
+   for (entry_start = buffer; *entry_start; entry_start++)
+      /* Empty loop to find end of name */ ;
+
+   ++entry_start;
+
+   /* A sample depth should follow the separator, and we should be on it  */
+   if (length < 2U || entry_start > buffer + (length - 2U))
+   {
+      png_warning(png_ptr, "malformed sPLT chunk");
+      return;
+   }
+
+   new_palette.depth = *entry_start++;
+   entry_size = (new_palette.depth == 8 ? 6 : 10);
+   /* This must fit in a png_uint_32 because it is derived from the original
+    * chunk data length.
+    */
+   data_length = length - (png_uint_32)(entry_start - buffer);
+
+   /* Integrity-check the data length */
+   if ((data_length % (unsigned int)entry_size) != 0)
+   {
+      png_warning(png_ptr, "sPLT chunk has bad length");
+      return;
+   }
+
+   dl = (png_uint_32)(data_length / (unsigned int)entry_size);
+   max_dl = PNG_SIZE_MAX / (sizeof (png_sPLT_entry));
+
+   if (dl > max_dl)
+   {
+      png_warning(png_ptr, "sPLT chunk too long");
+      return;
+   }
+
+   new_palette.nentries = (png_int_32)(data_length / (unsigned int)entry_size);
+
+   new_palette.entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
+       (png_alloc_size_t) new_palette.nentries * (sizeof (png_sPLT_entry)));
+
+   if (new_palette.entries == NULL)
+   {
+      png_warning(png_ptr, "sPLT chunk requires too much memory");
+      return;
+   }
+
+#ifdef PNG_POINTER_INDEXING_SUPPORTED
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+      pp = new_palette.entries + i;
+
+      if (new_palette.depth == 8)
+      {
+         pp->red = *entry_start++;
+         pp->green = *entry_start++;
+         pp->blue = *entry_start++;
+         pp->alpha = *entry_start++;
+      }
+
+      else
+      {
+         pp->red   = png_get_uint_16(entry_start); entry_start += 2;
+         pp->green = png_get_uint_16(entry_start); entry_start += 2;
+         pp->blue  = png_get_uint_16(entry_start); entry_start += 2;
+         pp->alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+
+      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#else
+   pp = new_palette.entries;
+
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+
+      if (new_palette.depth == 8)
+      {
+         pp[i].red   = *entry_start++;
+         pp[i].green = *entry_start++;
+         pp[i].blue  = *entry_start++;
+         pp[i].alpha = *entry_start++;
+      }
+
+      else
+      {
+         pp[i].red   = png_get_uint_16(entry_start); entry_start += 2;
+         pp[i].green = png_get_uint_16(entry_start); entry_start += 2;
+         pp[i].blue  = png_get_uint_16(entry_start); entry_start += 2;
+         pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+
+      pp[i].frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#endif
+
+   /* Discard all chunk data except the name and stash that */
+   new_palette.name = (png_charp)buffer;
+
+   png_set_sPLT(png_ptr, info_ptr, &new_palette, 1);
+
+   png_free(png_ptr, new_palette.entries);
+}
+#endif /* READ_sPLT */
+
+#ifdef PNG_READ_tRNS_SUPPORTED
+void /* PRIVATE */
+png_handle_tRNS(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
+
+   png_debug(1, "in png_handle_tRNS");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      png_byte buf[2];
+
+      if (length != 2)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "invalid");
+         return;
+      }
+
+      png_crc_read(png_ptr, buf, 2);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_color.gray = png_get_uint_16(buf);
+   }
+
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      png_byte buf[6];
+
+      if (length != 6)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "invalid");
+         return;
+      }
+
+      png_crc_read(png_ptr, buf, length);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_color.red = png_get_uint_16(buf);
+      png_ptr->trans_color.green = png_get_uint_16(buf + 2);
+      png_ptr->trans_color.blue = png_get_uint_16(buf + 4);
+   }
+
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if ((png_ptr->mode & PNG_HAVE_PLTE) == 0)
+      {
+         /* TODO: is this actually an error in the ISO spec? */
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "out of place");
+         return;
+      }
+
+      if (length > (unsigned int) png_ptr->num_palette ||
+         length > (unsigned int) PNG_MAX_PALETTE_LENGTH ||
+         length == 0)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "invalid");
+         return;
+      }
+
+      png_crc_read(png_ptr, readbuf, length);
+      png_ptr->num_trans = (png_uint_16)length;
+   }
+
+   else
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid with alpha channel");
+      return;
+   }
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+   {
+      png_ptr->num_trans = 0;
+      return;
+   }
+
+   /* TODO: this is a horrible side effect in the palette case because the
+    * png_struct ends up with a pointer to the tRNS buffer owned by the
+    * png_info.  Fix this.
+    */
+   png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans,
+       &(png_ptr->trans_color));
+}
+#endif
+
+#ifdef PNG_READ_bKGD_SUPPORTED
+void /* PRIVATE */
+png_handle_bKGD(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   unsigned int truelen;
+   png_byte buf[6];
+   png_color_16 background;
+
+   png_debug(1, "in png_handle_bKGD");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 ||
+       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+       (png_ptr->mode & PNG_HAVE_PLTE) == 0))
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      truelen = 1;
+
+   else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+      truelen = 6;
+
+   else
+      truelen = 2;
+
+   if (length != truelen)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, truelen);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   /* We convert the index value into RGB components so that we can allow
+    * arbitrary RGB values for background when we have transparency, and
+    * so it is easy to determine the RGB values of the background color
+    * from the info_ptr struct.
+    */
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      background.index = buf[0];
+
+      if (info_ptr != NULL && info_ptr->num_palette != 0)
+      {
+         if (buf[0] >= info_ptr->num_palette)
+         {
+            png_chunk_benign_error(png_ptr, "invalid index");
+            return;
+         }
+
+         background.red = (png_uint_16)png_ptr->palette[buf[0]].red;
+         background.green = (png_uint_16)png_ptr->palette[buf[0]].green;
+         background.blue = (png_uint_16)png_ptr->palette[buf[0]].blue;
+      }
+
+      else
+         background.red = background.green = background.blue = 0;
+
+      background.gray = 0;
+   }
+
+   else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) /* GRAY */
+   {
+      if (png_ptr->bit_depth <= 8)
+      {
+         if (buf[0] != 0 || buf[1] >= (unsigned int)(1 << png_ptr->bit_depth))
+         {
+            png_chunk_benign_error(png_ptr, "invalid gray level");
+            return;
+         }
+      }
+
+      background.index = 0;
+      background.red =
+      background.green =
+      background.blue =
+      background.gray = png_get_uint_16(buf);
+   }
+
+   else
+   {
+      if (png_ptr->bit_depth <= 8)
+      {
+         if (buf[0] != 0 || buf[2] != 0 || buf[4] != 0)
+         {
+            png_chunk_benign_error(png_ptr, "invalid color");
+            return;
+         }
+      }
+
+      background.index = 0;
+      background.red = png_get_uint_16(buf);
+      background.green = png_get_uint_16(buf + 2);
+      background.blue = png_get_uint_16(buf + 4);
+      background.gray = 0;
+   }
+
+   png_set_bKGD(png_ptr, info_ptr, &background);
+}
+#endif
+
+#ifdef PNG_READ_eXIf_SUPPORTED
+void /* PRIVATE */
+png_handle_eXIf(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   unsigned int i;
+
+   png_debug(1, "in png_handle_eXIf");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   if (length < 2)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "too short");
+      return;
+   }
+
+   else if (info_ptr == NULL || (info_ptr->valid & PNG_INFO_eXIf) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   info_ptr->free_me |= PNG_FREE_EXIF;
+
+   info_ptr->eXIf_buf = png_voidcast(png_bytep,
+             png_malloc_warn(png_ptr, length));
+
+   if (info_ptr->eXIf_buf == NULL)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   for (i = 0; i < length; i++)
+   {
+      png_byte buf[1];
+      png_crc_read(png_ptr, buf, 1);
+      info_ptr->eXIf_buf[i] = buf[0];
+      if (i == 1)
+      {
+         if ((buf[0] != 'M' && buf[0] != 'I') ||
+             (info_ptr->eXIf_buf[0] != buf[0]))
+         {
+            png_crc_finish(png_ptr, length - 2);
+            png_chunk_benign_error(png_ptr, "incorrect byte-order specifier");
+            png_free(png_ptr, info_ptr->eXIf_buf);
+            info_ptr->eXIf_buf = NULL;
+            return;
+         }
+      }
+   }
+
+   if (png_crc_finish(png_ptr, 0) == 0)
+      png_set_eXIf_1(png_ptr, info_ptr, length, info_ptr->eXIf_buf);
+
+   png_free(png_ptr, info_ptr->eXIf_buf);
+   info_ptr->eXIf_buf = NULL;
+}
+#endif
+
+#ifdef PNG_READ_hIST_SUPPORTED
+void /* PRIVATE */
+png_handle_hIST(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   unsigned int num, i;
+   png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH];
+
+   png_debug(1, "in png_handle_hIST");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 ||
+       (png_ptr->mode & PNG_HAVE_PLTE) == 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   num = length / 2 ;
+
+   if (length != num * 2 ||
+       num != (unsigned int)png_ptr->num_palette ||
+       num > (unsigned int)PNG_MAX_PALETTE_LENGTH)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[2];
+
+      png_crc_read(png_ptr, buf, 2);
+      readbuf[i] = png_get_uint_16(buf);
+   }
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   png_set_hIST(png_ptr, info_ptr, readbuf);
+}
+#endif
+
+#ifdef PNG_READ_pHYs_SUPPORTED
+void /* PRIVATE */
+png_handle_pHYs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_uint_32 res_x, res_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_pHYs");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   res_x = png_get_uint_32(buf);
+   res_y = png_get_uint_32(buf + 4);
+   unit_type = buf[8];
+   png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type);
+}
+#endif
+
+#ifdef PNG_READ_oFFs_SUPPORTED
+void /* PRIVATE */
+png_handle_oFFs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_int_32 offset_x, offset_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_oFFs");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   offset_x = png_get_int_32(buf);
+   offset_y = png_get_int_32(buf + 4);
+   unit_type = buf[8];
+   png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type);
+}
+#endif
+
+#ifdef PNG_READ_pCAL_SUPPORTED
+/* Read the pCAL chunk (described in the PNG Extensions document) */
+void /* PRIVATE */
+png_handle_pCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_int_32 X0, X1;
+   png_byte type, nparams;
+   png_bytep buffer, buf, units, endptr;
+   png_charpp params;
+   int i;
+
+   png_debug(1, "in png_handle_pCAL");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading pCAL chunk data (%u bytes)",
+       length + 1);
+
+   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
+
+   if (buffer == NULL)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   png_crc_read(png_ptr, buffer, length);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   buffer[length] = 0; /* Null terminate the last string */
+
+   png_debug(3, "Finding end of pCAL purpose string");
+   for (buf = buffer; *buf; buf++)
+      /* Empty loop */ ;
+
+   endptr = buffer + length;
+
+   /* We need to have at least 12 bytes after the purpose string
+    * in order to get the parameter information.
+    */
+   if (endptr - buf <= 12)
+   {
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_debug(3, "Reading pCAL X0, X1, type, nparams, and units");
+   X0 = png_get_int_32((png_bytep)buf+1);
+   X1 = png_get_int_32((png_bytep)buf+5);
+   type = buf[9];
+   nparams = buf[10];
+   units = buf + 11;
+
+   png_debug(3, "Checking pCAL equation type and number of parameters");
+   /* Check that we have the right number of parameters for known
+    * equation types.
+    */
+   if ((type == PNG_EQUATION_LINEAR && nparams != 2) ||
+       (type == PNG_EQUATION_BASE_E && nparams != 3) ||
+       (type == PNG_EQUATION_ARBITRARY && nparams != 3) ||
+       (type == PNG_EQUATION_HYPERBOLIC && nparams != 4))
+   {
+      png_chunk_benign_error(png_ptr, "invalid parameter count");
+      return;
+   }
+
+   else if (type >= PNG_EQUATION_LAST)
+   {
+      png_chunk_benign_error(png_ptr, "unrecognized equation type");
+   }
+
+   for (buf = units; *buf; buf++)
+      /* Empty loop to move past the units string. */ ;
+
+   png_debug(3, "Allocating pCAL parameters array");
+
+   params = png_voidcast(png_charpp, png_malloc_warn(png_ptr,
+       nparams * (sizeof (png_charp))));
+
+   if (params == NULL)
+   {
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   /* Get pointers to the start of each parameter string. */
+   for (i = 0; i < nparams; i++)
+   {
+      buf++; /* Skip the null string terminator from previous parameter. */
+
+      png_debug1(3, "Reading pCAL parameter %d", i);
+
+      for (params[i] = (png_charp)buf; buf <= endptr && *buf != 0; buf++)
+         /* Empty loop to move past each parameter string */ ;
+
+      /* Make sure we haven't run out of data yet */
+      if (buf > endptr)
+      {
+         png_free(png_ptr, params);
+         png_chunk_benign_error(png_ptr, "invalid data");
+         return;
+      }
+   }
+
+   png_set_pCAL(png_ptr, info_ptr, (png_charp)buffer, X0, X1, type, nparams,
+       (png_charp)units, params);
+
+   png_free(png_ptr, params);
+}
+#endif
+
+#ifdef PNG_READ_sCAL_SUPPORTED
+/* Read the sCAL chunk */
+void /* PRIVATE */
+png_handle_sCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_bytep buffer;
+   size_t i;
+   int state;
+
+   png_debug(1, "in png_handle_sCAL");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of place");
+      return;
+   }
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   /* Need unit type, width, \0, height: minimum 4 bytes */
+   else if (length < 4)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading sCAL chunk data (%u bytes)",
+       length + 1);
+
+   buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/);
+
+   if (buffer == NULL)
+   {
+      png_chunk_benign_error(png_ptr, "out of memory");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buffer, length);
+   buffer[length] = 0; /* Null terminate the last string */
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   /* Validate the unit. */
+   if (buffer[0] != 1 && buffer[0] != 2)
+   {
+      png_chunk_benign_error(png_ptr, "invalid unit");
+      return;
+   }
+
+   /* Validate the ASCII numbers, need two ASCII numbers separated by
+    * a '\0' and they need to fit exactly in the chunk data.
+    */
+   i = 1;
+   state = 0;
+
+   if (png_check_fp_number((png_const_charp)buffer, length, &state, &i) == 0 ||
+       i >= length || buffer[i++] != 0)
+      png_chunk_benign_error(png_ptr, "bad width format");
+
+   else if (PNG_FP_IS_POSITIVE(state) == 0)
+      png_chunk_benign_error(png_ptr, "non-positive width");
+
+   else
+   {
+      size_t heighti = i;
+
+      state = 0;
+      if (png_check_fp_number((png_const_charp)buffer, length,
+          &state, &i) == 0 || i != length)
+         png_chunk_benign_error(png_ptr, "bad height format");
+
+      else if (PNG_FP_IS_POSITIVE(state) == 0)
+         png_chunk_benign_error(png_ptr, "non-positive height");
+
+      else
+         /* This is the (only) success case. */
+         png_set_sCAL_s(png_ptr, info_ptr, buffer[0],
+             (png_charp)buffer+1, (png_charp)buffer+heighti);
+   }
+}
+#endif
+
+#ifdef PNG_READ_tIME_SUPPORTED
+void /* PRIVATE */
+png_handle_tIME(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_byte buf[7];
+   png_time mod_time;
+
+   png_debug(1, "in png_handle_tIME");
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME) != 0)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "duplicate");
+      return;
+   }
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+   if (length != 7)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "invalid");
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 7);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   mod_time.second = buf[6];
+   mod_time.minute = buf[5];
+   mod_time.hour = buf[4];
+   mod_time.day = buf[3];
+   mod_time.month = buf[2];
+   mod_time.year = png_get_uint_16(buf);
+
+   png_set_tIME(png_ptr, info_ptr, &mod_time);
+}
+#endif
+
+#ifdef PNG_READ_tEXt_SUPPORTED
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_tEXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_text  text_info;
+   png_bytep buffer;
+   png_charp key;
+   png_charp text;
+   png_uint_32 skip = 0;
+
+   png_debug(1, "in png_handle_tEXt");
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_cache_max != 0)
+   {
+      if (png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      if (--png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "no space in chunk cache");
+         return;
+      }
+   }
+#endif
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > 65535U)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "too large to fit in memory");
+      return;
+   }
+#endif
+
+   buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/);
+
+   if (buffer == NULL)
+   {
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   png_crc_read(png_ptr, buffer, length);
+
+   if (png_crc_finish(png_ptr, skip) != 0)
+      return;
+
+   key = (png_charp)buffer;
+   key[length] = 0;
+
+   for (text = key; *text; text++)
+      /* Empty loop to find end of key */ ;
+
+   if (text != key + length)
+      text++;
+
+   text_info.compression = PNG_TEXT_COMPRESSION_NONE;
+   text_info.key = key;
+   text_info.lang = NULL;
+   text_info.lang_key = NULL;
+   text_info.itxt_length = 0;
+   text_info.text = text;
+   text_info.text_length = strlen(text);
+
+   if (png_set_text_2(png_ptr, info_ptr, &text_info, 1) != 0)
+      png_warning(png_ptr, "Insufficient memory to process text chunk");
+}
+#endif
+
+#ifdef PNG_READ_zTXt_SUPPORTED
+/* Note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_zTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_const_charp errmsg = NULL;
+   png_bytep       buffer;
+   png_uint_32     keyword_length;
+
+   png_debug(1, "in png_handle_zTXt");
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_cache_max != 0)
+   {
+      if (png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      if (--png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "no space in chunk cache");
+         return;
+      }
+   }
+#endif
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+   /* Note, "length" is sufficient here; we won't be adding
+    * a null terminator later.
+    */
+   buffer = png_read_buffer(png_ptr, length, 2/*silent*/);
+
+   if (buffer == NULL)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   png_crc_read(png_ptr, buffer, length);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   /* TODO: also check that the keyword contents match the spec! */
+   for (keyword_length = 0;
+      keyword_length < length && buffer[keyword_length] != 0;
+      ++keyword_length)
+      /* Empty loop to find end of name */ ;
+
+   if (keyword_length > 79 || keyword_length < 1)
+      errmsg = "bad keyword";
+
+   /* zTXt must have some LZ data after the keyword, although it may expand to
+    * zero bytes; we need a '\0' at the end of the keyword, the compression type
+    * then the LZ data:
+    */
+   else if (keyword_length + 3 > length)
+      errmsg = "truncated";
+
+   else if (buffer[keyword_length+1] != PNG_COMPRESSION_TYPE_BASE)
+      errmsg = "unknown compression type";
+
+   else
+   {
+      png_alloc_size_t uncompressed_length = PNG_SIZE_MAX;
+
+      /* TODO: at present png_decompress_chunk imposes a single application
+       * level memory limit, this should be split to different values for iCCP
+       * and text chunks.
+       */
+      if (png_decompress_chunk(png_ptr, length, keyword_length+2,
+          &uncompressed_length, 1/*terminate*/) == Z_STREAM_END)
+      {
+         png_text text;
+
+         if (png_ptr->read_buffer == NULL)
+           errmsg="Read failure in png_handle_zTXt";
+         else
+         {
+            /* It worked; png_ptr->read_buffer now looks like a tEXt chunk
+             * except for the extra compression type byte and the fact that
+             * it isn't necessarily '\0' terminated.
+             */
+            buffer = png_ptr->read_buffer;
+            buffer[uncompressed_length+(keyword_length+2)] = 0;
+
+            text.compression = PNG_TEXT_COMPRESSION_zTXt;
+            text.key = (png_charp)buffer;
+            text.text = (png_charp)(buffer + keyword_length+2);
+            text.text_length = uncompressed_length;
+            text.itxt_length = 0;
+            text.lang = NULL;
+            text.lang_key = NULL;
+
+            if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0)
+               errmsg = "insufficient memory";
+         }
+      }
+
+      else
+         errmsg = png_ptr->zstream.msg;
+   }
+
+   if (errmsg != NULL)
+      png_chunk_benign_error(png_ptr, errmsg);
+}
+#endif
+
+#ifdef PNG_READ_iTXt_SUPPORTED
+/* Note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_iTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length)
+{
+   png_const_charp errmsg = NULL;
+   png_bytep buffer;
+   png_uint_32 prefix_length;
+
+   png_debug(1, "in png_handle_iTXt");
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_cache_max != 0)
+   {
+      if (png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      if (--png_ptr->user_chunk_cache_max == 1)
+      {
+         png_crc_finish(png_ptr, length);
+         png_chunk_benign_error(png_ptr, "no space in chunk cache");
+         return;
+      }
+   }
+#endif
+
+   if ((png_ptr->mode & PNG_HAVE_IHDR) == 0)
+      png_chunk_error(png_ptr, "missing IHDR");
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) != 0)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+   buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/);
+
+   if (buffer == NULL)
+   {
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "out of memory");
+      return;
+   }
+
+   png_crc_read(png_ptr, buffer, length);
+
+   if (png_crc_finish(png_ptr, 0) != 0)
+      return;
+
+   /* First the keyword. */
+   for (prefix_length=0;
+      prefix_length < length && buffer[prefix_length] != 0;
+      ++prefix_length)
+      /* Empty loop */ ;
+
+   /* Perform a basic check on the keyword length here. */
+   if (prefix_length > 79 || prefix_length < 1)
+      errmsg = "bad keyword";
+
+   /* Expect keyword, compression flag, compression type, language, translated
+    * keyword (both may be empty but are 0 terminated) then the text, which may
+    * be empty.
+    */
+   else if (prefix_length + 5 > length)
+      errmsg = "truncated";
+
+   else if (buffer[prefix_length+1] == 0 ||
+      (buffer[prefix_length+1] == 1 &&
+      buffer[prefix_length+2] == PNG_COMPRESSION_TYPE_BASE))
+   {
+      int compressed = buffer[prefix_length+1] != 0;
+      png_uint_32 language_offset, translated_keyword_offset;
+      png_alloc_size_t uncompressed_length = 0;
+
+      /* Now the language tag */
+      prefix_length += 3;
+      language_offset = prefix_length;
+
+      for (; prefix_length < length && buffer[prefix_length] != 0;
+         ++prefix_length)
+         /* Empty loop */ ;
+
+      /* WARNING: the length may be invalid here, this is checked below. */
+      translated_keyword_offset = ++prefix_length;
+
+      for (; prefix_length < length && buffer[prefix_length] != 0;
+         ++prefix_length)
+         /* Empty loop */ ;
+
+      /* prefix_length should now be at the trailing '\0' of the translated
+       * keyword, but it may already be over the end.  None of this arithmetic
+       * can overflow because chunks are at most 2^31 bytes long, but on 16-bit
+       * systems the available allocation may overflow.
+       */
+      ++prefix_length;
+
+      if (compressed == 0 && prefix_length <= length)
+         uncompressed_length = length - prefix_length;
+
+      else if (compressed != 0 && prefix_length < length)
+      {
+         uncompressed_length = PNG_SIZE_MAX;
+
+         /* TODO: at present png_decompress_chunk imposes a single application
+          * level memory limit, this should be split to different values for
+          * iCCP and text chunks.
+          */
+         if (png_decompress_chunk(png_ptr, length, prefix_length,
+             &uncompressed_length, 1/*terminate*/) == Z_STREAM_END)
+            buffer = png_ptr->read_buffer;
+
+         else
+            errmsg = png_ptr->zstream.msg;
+      }
+
+      else
+         errmsg = "truncated";
+
+      if (errmsg == NULL)
+      {
+         png_text text;
+
+         buffer[uncompressed_length+prefix_length] = 0;
+
+         if (compressed == 0)
+            text.compression = PNG_ITXT_COMPRESSION_NONE;
+
+         else
+            text.compression = PNG_ITXT_COMPRESSION_zTXt;
+
+         text.key = (png_charp)buffer;
+         text.lang = (png_charp)buffer + language_offset;
+         text.lang_key = (png_charp)buffer + translated_keyword_offset;
+         text.text = (png_charp)buffer + prefix_length;
+         text.text_length = 0;
+         text.itxt_length = uncompressed_length;
+
+         if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0)
+            errmsg = "insufficient memory";
+      }
+   }
+
+   else
+      errmsg = "bad compression info";
+
+   if (errmsg != NULL)
+      png_chunk_benign_error(png_ptr, errmsg);
+}
+#endif
+
+#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+/* Utility function for png_handle_unknown; set up png_ptr::unknown_chunk */
+static int
+png_cache_unknown_chunk(png_structrp png_ptr, png_uint_32 length)
+{
+   png_alloc_size_t limit = PNG_SIZE_MAX;
+
+   if (png_ptr->unknown_chunk.data != NULL)
+   {
+      png_free(png_ptr, png_ptr->unknown_chunk.data);
+      png_ptr->unknown_chunk.data = NULL;
+   }
+
+#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_malloc_max > 0 &&
+       png_ptr->user_chunk_malloc_max < limit)
+      limit = png_ptr->user_chunk_malloc_max;
+
+#  elif PNG_USER_CHUNK_MALLOC_MAX > 0
+   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
+      limit = PNG_USER_CHUNK_MALLOC_MAX;
+#  endif
+
+   if (length <= limit)
+   {
+      PNG_CSTRING_FROM_CHUNK(png_ptr->unknown_chunk.name, png_ptr->chunk_name);
+      /* The following is safe because of the PNG_SIZE_MAX init above */
+      png_ptr->unknown_chunk.size = (size_t)length/*SAFE*/;
+      /* 'mode' is a flag array, only the bottom four bits matter here */
+      png_ptr->unknown_chunk.location = (png_byte)png_ptr->mode/*SAFE*/;
+
+      if (length == 0)
+         png_ptr->unknown_chunk.data = NULL;
+
+      else
+      {
+         /* Do a 'warn' here - it is handled below. */
+         png_ptr->unknown_chunk.data = png_voidcast(png_bytep,
+             png_malloc_warn(png_ptr, length));
+      }
+   }
+
+   if (png_ptr->unknown_chunk.data == NULL && length > 0)
+   {
+      /* This is benign because we clean up correctly */
+      png_crc_finish(png_ptr, length);
+      png_chunk_benign_error(png_ptr, "unknown chunk exceeds memory limits");
+      return 0;
+   }
+
+   else
+   {
+      if (length > 0)
+         png_crc_read(png_ptr, png_ptr->unknown_chunk.data, length);
+      png_crc_finish(png_ptr, 0);
+      return 1;
+   }
+}
+#endif /* READ_UNKNOWN_CHUNKS */
+
+/* Handle an unknown, or known but disabled, chunk */
+void /* PRIVATE */
+png_handle_unknown(png_structrp png_ptr, png_inforp info_ptr,
+    png_uint_32 length, int keep)
+{
+   int handled = 0; /* the chunk was handled */
+
+   png_debug(1, "in png_handle_unknown");
+
+#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+   /* NOTE: this code is based on the code in libpng-1.4.12 except for fixing
+    * the bug which meant that setting a non-default behavior for a specific
+    * chunk would be ignored (the default was always used unless a user
+    * callback was installed).
+    *
+    * 'keep' is the value from the png_chunk_unknown_handling, the setting for
+    * this specific chunk_name, if PNG_HANDLE_AS_UNKNOWN_SUPPORTED, if not it
+    * will always be PNG_HANDLE_CHUNK_AS_DEFAULT and it needs to be set here.
+    * This is just an optimization to avoid multiple calls to the lookup
+    * function.
+    */
+#  ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#     ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+   keep = png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name);
+#     endif
+#  endif
+
+   /* One of the following methods will read the chunk or skip it (at least one
+    * of these is always defined because this is the only way to switch on
+    * PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+    */
+#  ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+   /* The user callback takes precedence over the chunk keep value, but the
+    * keep value is still required to validate a save of a critical chunk.
+    */
+   if (png_ptr->read_user_chunk_fn != NULL)
+   {
+      if (png_cache_unknown_chunk(png_ptr, length) != 0)
+      {
+         /* Callback to user unknown chunk handler */
+         int ret = (*(png_ptr->read_user_chunk_fn))(png_ptr,
+             &png_ptr->unknown_chunk);
+
+         /* ret is:
+          * negative: An error occurred; png_chunk_error will be called.
+          *     zero: The chunk was not handled, the chunk will be discarded
+          *           unless png_set_keep_unknown_chunks has been used to set
+          *           a 'keep' behavior for this particular chunk, in which
+          *           case that will be used.  A critical chunk will cause an
+          *           error at this point unless it is to be saved.
+          * positive: The chunk was handled, libpng will ignore/discard it.
+          */
+         if (ret < 0)
+            png_chunk_error(png_ptr, "error in user chunk");
+
+         else if (ret == 0)
+         {
+            /* If the keep value is 'default' or 'never' override it, but
+             * still error out on critical chunks unless the keep value is
+             * 'always'  While this is weird it is the behavior in 1.4.12.
+             * A possible improvement would be to obey the value set for the
+             * chunk, but this would be an API change that would probably
+             * damage some applications.
+             *
+             * The png_app_warning below catches the case that matters, where
+             * the application has not set specific save or ignore for this
+             * chunk or global save or ignore.
+             */
+            if (keep < PNG_HANDLE_CHUNK_IF_SAFE)
+            {
+#              ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+               if (png_ptr->unknown_default < PNG_HANDLE_CHUNK_IF_SAFE)
+               {
+                  png_chunk_warning(png_ptr, "Saving unknown chunk:");
+                  png_app_warning(png_ptr,
+                      "forcing save of an unhandled chunk;"
+                      " please call png_set_keep_unknown_chunks");
+                      /* with keep = PNG_HANDLE_CHUNK_IF_SAFE */
+               }
+#              endif
+               keep = PNG_HANDLE_CHUNK_IF_SAFE;
+            }
+         }
+
+         else /* chunk was handled */
+         {
+            handled = 1;
+            /* Critical chunks can be safely discarded at this point. */
+            keep = PNG_HANDLE_CHUNK_NEVER;
+         }
+      }
+
+      else
+         keep = PNG_HANDLE_CHUNK_NEVER; /* insufficient memory */
+   }
+
+   else
+   /* Use the SAVE_UNKNOWN_CHUNKS code or skip the chunk */
+#  endif /* READ_USER_CHUNKS */
+
+#  ifdef PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED
+   {
+      /* keep is currently just the per-chunk setting, if there was no
+       * setting change it to the global default now (not that this may
+       * still be AS_DEFAULT) then obtain the cache of the chunk if required,
+       * if not simply skip the chunk.
+       */
+      if (keep == PNG_HANDLE_CHUNK_AS_DEFAULT)
+         keep = png_ptr->unknown_default;
+
+      if (keep == PNG_HANDLE_CHUNK_ALWAYS ||
+         (keep == PNG_HANDLE_CHUNK_IF_SAFE &&
+          PNG_CHUNK_ANCILLARY(png_ptr->chunk_name)))
+      {
+         if (png_cache_unknown_chunk(png_ptr, length) == 0)
+            keep = PNG_HANDLE_CHUNK_NEVER;
+      }
+
+      else
+         png_crc_finish(png_ptr, length);
+   }
+#  else
+#     ifndef PNG_READ_USER_CHUNKS_SUPPORTED
+#        error no method to support READ_UNKNOWN_CHUNKS
+#     endif
+
+   {
+      /* If here there is no read callback pointer set and no support is
+       * compiled in to just save the unknown chunks, so simply skip this
+       * chunk.  If 'keep' is something other than AS_DEFAULT or NEVER then
+       * the app has erroneously asked for unknown chunk saving when there
+       * is no support.
+       */
+      if (keep > PNG_HANDLE_CHUNK_NEVER)
+         png_app_error(png_ptr, "no unknown chunk support available");
+
+      png_crc_finish(png_ptr, length);
+   }
+#  endif
+
+#  ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+   /* Now store the chunk in the chunk list if appropriate, and if the limits
+    * permit it.
+    */
+   if (keep == PNG_HANDLE_CHUNK_ALWAYS ||
+      (keep == PNG_HANDLE_CHUNK_IF_SAFE &&
+       PNG_CHUNK_ANCILLARY(png_ptr->chunk_name)))
+   {
+#     ifdef PNG_USER_LIMITS_SUPPORTED
+      switch (png_ptr->user_chunk_cache_max)
+      {
+         case 2:
+            png_ptr->user_chunk_cache_max = 1;
+            png_chunk_benign_error(png_ptr, "no space in chunk cache");
+            /* FALLTHROUGH */
+         case 1:
+            /* NOTE: prior to 1.6.0 this case resulted in an unknown critical
+             * chunk being skipped, now there will be a hard error below.
+             */
+            break;
+
+         default: /* not at limit */
+            --(png_ptr->user_chunk_cache_max);
+            /* FALLTHROUGH */
+         case 0: /* no limit */
+#  endif /* USER_LIMITS */
+            /* Here when the limit isn't reached or when limits are compiled
+             * out; store the chunk.
+             */
+            png_set_unknown_chunks(png_ptr, info_ptr,
+                &png_ptr->unknown_chunk, 1);
+            handled = 1;
+#  ifdef PNG_USER_LIMITS_SUPPORTED
+            break;
+      }
+#  endif
+   }
+#  else /* no store support: the chunk must be handled by the user callback */
+   PNG_UNUSED(info_ptr)
+#  endif
+
+   /* Regardless of the error handling below the cached data (if any) can be
+    * freed now.  Notice that the data is not freed if there is a png_error, but
+    * it will be freed by destroy_read_struct.
+    */
+   if (png_ptr->unknown_chunk.data != NULL)
+      png_free(png_ptr, png_ptr->unknown_chunk.data);
+   png_ptr->unknown_chunk.data = NULL;
+
+#else /* !PNG_READ_UNKNOWN_CHUNKS_SUPPORTED */
+   /* There is no support to read an unknown chunk, so just skip it. */
+   png_crc_finish(png_ptr, length);
+   PNG_UNUSED(info_ptr)
+   PNG_UNUSED(keep)
+#endif /* !READ_UNKNOWN_CHUNKS */
+
+   /* Check for unhandled critical chunks */
+   if (handled == 0 && PNG_CHUNK_CRITICAL(png_ptr->chunk_name))
+      png_chunk_error(png_ptr, "unhandled critical chunk");
+}
+
+/* This function is called to verify that a chunk name is valid.
+ * This function can't have the "critical chunk check" incorporated
+ * into it, since in the future we will need to be able to call user
+ * functions to handle unknown critical chunks after we check that
+ * the chunk name itself is valid.
+ */
+
+/* Bit hacking: the test for an invalid byte in the 4 byte chunk name is:
+ *
+ * ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+ */
+
+void /* PRIVATE */
+png_check_chunk_name(png_const_structrp png_ptr, png_uint_32 chunk_name)
+{
+   int i;
+   png_uint_32 cn=chunk_name;
+
+   png_debug(1, "in png_check_chunk_name");
+
+   for (i=1; i<=4; ++i)
+   {
+      int c = cn & 0xff;
+
+      if (c < 65 || c > 122 || (c > 90 && c < 97))
+         png_chunk_error(png_ptr, "invalid chunk type");
+
+      cn >>= 8;
+   }
+}
+
+void /* PRIVATE */
+png_check_chunk_length(png_const_structrp png_ptr, png_uint_32 length)
+{
+   png_alloc_size_t limit = PNG_UINT_31_MAX;
+
+# ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (png_ptr->user_chunk_malloc_max > 0 &&
+       png_ptr->user_chunk_malloc_max < limit)
+      limit = png_ptr->user_chunk_malloc_max;
+# elif PNG_USER_CHUNK_MALLOC_MAX > 0
+   if (PNG_USER_CHUNK_MALLOC_MAX < limit)
+      limit = PNG_USER_CHUNK_MALLOC_MAX;
+# endif
+   if (png_ptr->chunk_name == png_IDAT)
+   {
+      png_alloc_size_t idat_limit = PNG_UINT_31_MAX;
+      size_t row_factor =
+         (size_t)png_ptr->width
+         * (size_t)png_ptr->channels
+         * (png_ptr->bit_depth > 8? 2: 1)
+         + 1
+         + (png_ptr->interlaced? 6: 0);
+      if (png_ptr->height > PNG_UINT_32_MAX/row_factor)
+         idat_limit = PNG_UINT_31_MAX;
+      else
+         idat_limit = png_ptr->height * row_factor;
+      row_factor = row_factor > 32566? 32566 : row_factor;
+      idat_limit += 6 + 5*(idat_limit/row_factor+1); /* zlib+deflate overhead */
+      idat_limit=idat_limit < PNG_UINT_31_MAX? idat_limit : PNG_UINT_31_MAX;
+      limit = limit < idat_limit? idat_limit : limit;
+   }
+
+   if (length > limit)
+   {
+      png_debug2(0," length = %lu, limit = %lu",
+         (unsigned long)length,(unsigned long)limit);
+      png_benign_error(png_ptr, "chunk data is too large");
+   }
+}
+
+/* Combines the row recently read in with the existing pixels in the row.  This
+ * routine takes care of alpha and transparency if requested.  This routine also
+ * handles the two methods of progressive display of interlaced images,
+ * depending on the 'display' value; if 'display' is true then the whole row
+ * (dp) is filled from the start by replicating the available pixels.  If
+ * 'display' is false only those pixels present in the pass are filled in.
+ */
+void /* PRIVATE */
+png_combine_row(png_const_structrp png_ptr, png_bytep dp, int display)
+{
+   unsigned int pixel_depth = png_ptr->transformed_pixel_depth;
+   png_const_bytep sp = png_ptr->row_buf + 1;
+   png_alloc_size_t row_width = png_ptr->width;
+   unsigned int pass = png_ptr->pass;
+   png_bytep end_ptr = 0;
+   png_byte end_byte = 0;
+   unsigned int end_mask;
+
+   png_debug(1, "in png_combine_row");
+
+   /* Added in 1.5.6: it should not be possible to enter this routine until at
+    * least one row has been read from the PNG data and transformed.
+    */
+   if (pixel_depth == 0)
+      png_error(png_ptr, "internal row logic error");
+
+   /* Added in 1.5.4: the pixel depth should match the information returned by
+    * any call to png_read_update_info at this point.  Do not continue if we got
+    * this wrong.
+    */
+   if (png_ptr->info_rowbytes != 0 && png_ptr->info_rowbytes !=
+          PNG_ROWBYTES(pixel_depth, row_width))
+      png_error(png_ptr, "internal row size calculation error");
+
+   /* Don't expect this to ever happen: */
+   if (row_width == 0)
+      png_error(png_ptr, "internal row width error");
+
+   /* Preserve the last byte in cases where only part of it will be overwritten,
+    * the multiply below may overflow, we don't care because ANSI-C guarantees
+    * we get the low bits.
+    */
+   end_mask = (pixel_depth * row_width) & 7;
+   if (end_mask != 0)
+   {
+      /* end_ptr == NULL is a flag to say do nothing */
+      end_ptr = dp + PNG_ROWBYTES(pixel_depth, row_width) - 1;
+      end_byte = *end_ptr;
+#     ifdef PNG_READ_PACKSWAP_SUPPORTED
+      if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+         /* little-endian byte */
+         end_mask = (unsigned int)(0xff << end_mask);
+
+      else /* big-endian byte */
+#     endif
+      end_mask = 0xff >> end_mask;
+      /* end_mask is now the bits to *keep* from the destination row */
+   }
+
+   /* For non-interlaced images this reduces to a memcpy(). A memcpy()
+    * will also happen if interlacing isn't supported or if the application
+    * does not call png_set_interlace_handling().  In the latter cases the
+    * caller just gets a sequence of the unexpanded rows from each interlace
+    * pass.
+    */
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   if (png_ptr->interlaced != 0 &&
+       (png_ptr->transformations & PNG_INTERLACE) != 0 &&
+       pass < 6 && (display == 0 ||
+       /* The following copies everything for 'display' on passes 0, 2 and 4. */
+       (display == 1 && (pass & 1) != 0)))
+   {
+      /* Narrow images may have no bits in a pass; the caller should handle
+       * this, but this test is cheap:
+       */
+      if (row_width <= PNG_PASS_START_COL(pass))
+         return;
+
+      if (pixel_depth < 8)
+      {
+         /* For pixel depths up to 4 bpp the 8-pixel mask can be expanded to fit
+          * into 32 bits, then a single loop over the bytes using the four byte
+          * values in the 32-bit mask can be used.  For the 'display' option the
+          * expanded mask may also not require any masking within a byte.  To
+          * make this work the PACKSWAP option must be taken into account - it
+          * simply requires the pixels to be reversed in each byte.
+          *
+          * The 'regular' case requires a mask for each of the first 6 passes,
+          * the 'display' case does a copy for the even passes in the range
+          * 0..6.  This has already been handled in the test above.
+          *
+          * The masks are arranged as four bytes with the first byte to use in
+          * the lowest bits (little-endian) regardless of the order (PACKSWAP or
+          * not) of the pixels in each byte.
+          *
+          * NOTE: the whole of this logic depends on the caller of this function
+          * only calling it on rows appropriate to the pass.  This function only
+          * understands the 'x' logic; the 'y' logic is handled by the caller.
+          *
+          * The following defines allow generation of compile time constant bit
+          * masks for each pixel depth and each possibility of swapped or not
+          * swapped bytes.  Pass 'p' is in the range 0..6; 'x', a pixel index,
+          * is in the range 0..7; and the result is 1 if the pixel is to be
+          * copied in the pass, 0 if not.  'S' is for the sparkle method, 'B'
+          * for the block method.
+          *
+          * With some compilers a compile time expression of the general form:
+          *
+          *    (shift >= 32) ? (a >> (shift-32)) : (b >> shift)
+          *
+          * Produces warnings with values of 'shift' in the range 33 to 63
+          * because the right hand side of the ?: expression is evaluated by
+          * the compiler even though it isn't used.  Microsoft Visual C (various
+          * versions) and the Intel C compiler are known to do this.  To avoid
+          * this the following macros are used in 1.5.6.  This is a temporary
+          * solution to avoid destabilizing the code during the release process.
+          */
+#        if PNG_USE_COMPILE_TIME_MASKS
+#           define PNG_LSR(x,s) ((x)>>((s) & 0x1f))
+#           define PNG_LSL(x,s) ((x)<<((s) & 0x1f))
+#        else
+#           define PNG_LSR(x,s) ((x)>>(s))
+#           define PNG_LSL(x,s) ((x)<<(s))
+#        endif
+#        define S_COPY(p,x) (((p)<4 ? PNG_LSR(0x80088822,(3-(p))*8+(7-(x))) :\
+           PNG_LSR(0xaa55ff00,(7-(p))*8+(7-(x)))) & 1)
+#        define B_COPY(p,x) (((p)<4 ? PNG_LSR(0xff0fff33,(3-(p))*8+(7-(x))) :\
+           PNG_LSR(0xff55ff00,(7-(p))*8+(7-(x)))) & 1)
+
+         /* Return a mask for pass 'p' pixel 'x' at depth 'd'.  The mask is
+          * little endian - the first pixel is at bit 0 - however the extra
+          * parameter 's' can be set to cause the mask position to be swapped
+          * within each byte, to match the PNG format.  This is done by XOR of
+          * the shift with 7, 6 or 4 for bit depths 1, 2 and 4.
+          */
+#        define PIXEL_MASK(p,x,d,s) \
+            (PNG_LSL(((PNG_LSL(1U,(d)))-1),(((x)*(d))^((s)?8-(d):0))))
+
+         /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask.
+          */
+#        define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
+#        define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
+
+         /* Combine 8 of these to get the full mask.  For the 1-bpp and 2-bpp
+          * cases the result needs replicating, for the 4-bpp case the above
+          * generates a full 32 bits.
+          */
+#        define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1)))
+
+#        define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\
+            S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\
+            S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d)
+
+#        define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\
+            B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\
+            B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d)
+
+#if PNG_USE_COMPILE_TIME_MASKS
+         /* Utility macros to construct all the masks for a depth/swap
+          * combination.  The 's' parameter says whether the format is PNG
+          * (big endian bytes) or not.  Only the three odd-numbered passes are
+          * required for the display/block algorithm.
+          */
+#        define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\
+            S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) }
+
+#        define B_MASKS(d,s) { B_MASK(1,d,s), B_MASK(3,d,s), B_MASK(5,d,s) }
+
+#        define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2))
+
+         /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and
+          * then pass:
+          */
+         static const png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] =
+         {
+            /* Little-endian byte masks for PACKSWAP */
+            { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) },
+            /* Normal (big-endian byte) masks - PNG format */
+            { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) }
+         };
+
+         /* display_mask has only three entries for the odd passes, so index by
+          * pass>>1.
+          */
+         static const png_uint_32 display_mask[2][3][3] =
+         {
+            /* Little-endian byte masks for PACKSWAP */
+            { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) },
+            /* Normal (big-endian byte) masks - PNG format */
+            { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) }
+         };
+
+#        define MASK(pass,depth,display,png)\
+            ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\
+               row_mask[png][DEPTH_INDEX(depth)][pass])
+
+#else /* !PNG_USE_COMPILE_TIME_MASKS */
+         /* This is the runtime alternative: it seems unlikely that this will
+          * ever be either smaller or faster than the compile time approach.
+          */
+#        define MASK(pass,depth,display,png)\
+            ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png))
+#endif /* !USE_COMPILE_TIME_MASKS */
+
+         /* Use the appropriate mask to copy the required bits.  In some cases
+          * the byte mask will be 0 or 0xff; optimize these cases.  row_width is
+          * the number of pixels, but the code copies bytes, so it is necessary
+          * to special case the end.
+          */
+         png_uint_32 pixels_per_byte = 8 / pixel_depth;
+         png_uint_32 mask;
+
+#        ifdef PNG_READ_PACKSWAP_SUPPORTED
+         if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+            mask = MASK(pass, pixel_depth, display, 0);
+
+         else
+#        endif
+         mask = MASK(pass, pixel_depth, display, 1);
+
+         for (;;)
+         {
+            png_uint_32 m;
+
+            /* It doesn't matter in the following if png_uint_32 has more than
+             * 32 bits because the high bits always match those in m<<24; it is,
+             * however, essential to use OR here, not +, because of this.
+             */
+            m = mask;
+            mask = (m >> 8) | (m << 24); /* rotate right to good compilers */
+            m &= 0xff;
+
+            if (m != 0) /* something to copy */
+            {
+               if (m != 0xff)
+                  *dp = (png_byte)((*dp & ~m) | (*sp & m));
+               else
+                  *dp = *sp;
+            }
+
+            /* NOTE: this may overwrite the last byte with garbage if the image
+             * is not an exact number of bytes wide; libpng has always done
+             * this.
+             */
+            if (row_width <= pixels_per_byte)
+               break; /* May need to restore part of the last byte */
+
+            row_width -= pixels_per_byte;
+            ++dp;
+            ++sp;
+         }
+      }
+
+      else /* pixel_depth >= 8 */
+      {
+         unsigned int bytes_to_copy, bytes_to_jump;
+
+         /* Validate the depth - it must be a multiple of 8 */
+         if (pixel_depth & 7)
+            png_error(png_ptr, "invalid user transform pixel depth");
+
+         pixel_depth >>= 3; /* now in bytes */
+         row_width *= pixel_depth;
+
+         /* Regardless of pass number the Adam 7 interlace always results in a
+          * fixed number of pixels to copy then to skip.  There may be a
+          * different number of pixels to skip at the start though.
+          */
+         {
+            unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth;
+
+            row_width -= offset;
+            dp += offset;
+            sp += offset;
+         }
+
+         /* Work out the bytes to copy. */
+         if (display != 0)
+         {
+            /* When doing the 'block' algorithm the pixel in the pass gets
+             * replicated to adjacent pixels.  This is why the even (0,2,4,6)
+             * passes are skipped above - the entire expanded row is copied.
+             */
+            bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth;
+
+            /* But don't allow this number to exceed the actual row width. */
+            if (bytes_to_copy > row_width)
+               bytes_to_copy = (unsigned int)/*SAFE*/row_width;
+         }
+
+         else /* normal row; Adam7 only ever gives us one pixel to copy. */
+            bytes_to_copy = pixel_depth;
+
+         /* In Adam7 there is a constant offset between where the pixels go. */
+         bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth;
+
+         /* And simply copy these bytes.  Some optimization is possible here,
+          * depending on the value of 'bytes_to_copy'.  Special case the low
+          * byte counts, which we know to be frequent.
+          *
+          * Notice that these cases all 'return' rather than 'break' - this
+          * avoids an unnecessary test on whether to restore the last byte
+          * below.
+          */
+         switch (bytes_to_copy)
+         {
+            case 1:
+               for (;;)
+               {
+                  *dp = *sp;
+
+                  if (row_width <= bytes_to_jump)
+                     return;
+
+                  dp += bytes_to_jump;
+                  sp += bytes_to_jump;
+                  row_width -= bytes_to_jump;
+               }
+
+            case 2:
+               /* There is a possibility of a partial copy at the end here; this
+                * slows the code down somewhat.
+                */
+               do
+               {
+                  dp[0] = sp[0]; dp[1] = sp[1];
+
+                  if (row_width <= bytes_to_jump)
+                     return;
+
+                  sp += bytes_to_jump;
+                  dp += bytes_to_jump;
+                  row_width -= bytes_to_jump;
+               }
+               while (row_width > 1);
+
+               /* And there can only be one byte left at this point: */
+               *dp = *sp;
+               return;
+
+            case 3:
+               /* This can only be the RGB case, so each copy is exactly one
+                * pixel and it is not necessary to check for a partial copy.
+                */
+               for (;;)
+               {
+                  dp[0] = sp[0]; dp[1] = sp[1]; dp[2] = sp[2];
+
+                  if (row_width <= bytes_to_jump)
+                     return;
+
+                  sp += bytes_to_jump;
+                  dp += bytes_to_jump;
+                  row_width -= bytes_to_jump;
+               }
+
+            default:
+#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE
+               /* Check for double byte alignment and, if possible, use a
+                * 16-bit copy.  Don't attempt this for narrow images - ones that
+                * are less than an interlace panel wide.  Don't attempt it for
+                * wide bytes_to_copy either - use the memcpy there.
+                */
+               if (bytes_to_copy < 16 /*else use memcpy*/ &&
+                   png_isaligned(dp, png_uint_16) &&
+                   png_isaligned(sp, png_uint_16) &&
+                   bytes_to_copy % (sizeof (png_uint_16)) == 0 &&
+                   bytes_to_jump % (sizeof (png_uint_16)) == 0)
+               {
+                  /* Everything is aligned for png_uint_16 copies, but try for
+                   * png_uint_32 first.
+                   */
+                  if (png_isaligned(dp, png_uint_32) &&
+                      png_isaligned(sp, png_uint_32) &&
+                      bytes_to_copy % (sizeof (png_uint_32)) == 0 &&
+                      bytes_to_jump % (sizeof (png_uint_32)) == 0)
+                  {
+                     png_uint_32p dp32 = png_aligncast(png_uint_32p,dp);
+                     png_const_uint_32p sp32 = png_aligncastconst(
+                         png_const_uint_32p, sp);
+                     size_t skip = (bytes_to_jump-bytes_to_copy) /
+                         (sizeof (png_uint_32));
+
+                     do
+                     {
+                        size_t c = bytes_to_copy;
+                        do
+                        {
+                           *dp32++ = *sp32++;
+                           c -= (sizeof (png_uint_32));
+                        }
+                        while (c > 0);
+
+                        if (row_width <= bytes_to_jump)
+                           return;
+
+                        dp32 += skip;
+                        sp32 += skip;
+                        row_width -= bytes_to_jump;
+                     }
+                     while (bytes_to_copy <= row_width);
+
+                     /* Get to here when the row_width truncates the final copy.
+                      * There will be 1-3 bytes left to copy, so don't try the
+                      * 16-bit loop below.
+                      */
+                     dp = (png_bytep)dp32;
+                     sp = (png_const_bytep)sp32;
+                     do
+                        *dp++ = *sp++;
+                     while (--row_width > 0);
+                     return;
+                  }
+
+                  /* Else do it in 16-bit quantities, but only if the size is
+                   * not too large.
+                   */
+                  else
+                  {
+                     png_uint_16p dp16 = png_aligncast(png_uint_16p, dp);
+                     png_const_uint_16p sp16 = png_aligncastconst(
+                        png_const_uint_16p, sp);
+                     size_t skip = (bytes_to_jump-bytes_to_copy) /
+                        (sizeof (png_uint_16));
+
+                     do
+                     {
+                        size_t c = bytes_to_copy;
+                        do
+                        {
+                           *dp16++ = *sp16++;
+                           c -= (sizeof (png_uint_16));
+                        }
+                        while (c > 0);
+
+                        if (row_width <= bytes_to_jump)
+                           return;
+
+                        dp16 += skip;
+                        sp16 += skip;
+                        row_width -= bytes_to_jump;
+                     }
+                     while (bytes_to_copy <= row_width);
+
+                     /* End of row - 1 byte left, bytes_to_copy > row_width: */
+                     dp = (png_bytep)dp16;
+                     sp = (png_const_bytep)sp16;
+                     do
+                        *dp++ = *sp++;
+                     while (--row_width > 0);
+                     return;
+                  }
+               }
+#endif /* ALIGN_TYPE code */
+
+               /* The true default - use a memcpy: */
+               for (;;)
+               {
+                  memcpy(dp, sp, bytes_to_copy);
+
+                  if (row_width <= bytes_to_jump)
+                     return;
+
+                  sp += bytes_to_jump;
+                  dp += bytes_to_jump;
+                  row_width -= bytes_to_jump;
+                  if (bytes_to_copy > row_width)
+                     bytes_to_copy = (unsigned int)/*SAFE*/row_width;
+               }
+         }
+
+         /* NOT REACHED*/
+      } /* pixel_depth >= 8 */
+
+      /* Here if pixel_depth < 8 to check 'end_ptr' below. */
+   }
+   else
+#endif /* READ_INTERLACING */
+
+   /* If here then the switch above wasn't used so just memcpy the whole row
+    * from the temporary row buffer (notice that this overwrites the end of the
+    * destination row if it is a partial byte.)
+    */
+   memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width));
+
+   /* Restore the overwritten bits from the last byte if necessary. */
+   if (end_ptr != NULL)
+      *end_ptr = (png_byte)((end_byte & end_mask) | (*end_ptr & ~end_mask));
+}
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+void /* PRIVATE */
+png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
+    png_uint_32 transformations /* Because these may affect the byte layout */)
+{
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+   /* Offset to next interlace block */
+   static const unsigned int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   png_debug(1, "in png_do_read_interlace");
+   if (row != NULL && row_info != NULL)
+   {
+      png_uint_32 final_width;
+
+      final_width = row_info->width * png_pass_inc[pass];
+
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = row + (size_t)((row_info->width - 1) >> 3);
+            png_bytep dp = row + (size_t)((final_width - 1) >> 3);
+            unsigned int sshift, dshift;
+            unsigned int s_start, s_end;
+            int s_inc;
+            int jstop = (int)png_pass_inc[pass];
+            png_byte v;
+            png_uint_32 i;
+            int j;
+
+#ifdef PNG_READ_PACKSWAP_SUPPORTED
+            if ((transformations & PNG_PACKSWAP) != 0)
+            {
+                sshift = ((row_info->width + 7) & 0x07);
+                dshift = ((final_width + 7) & 0x07);
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            else
+#endif
+            {
+                sshift = 7 - ((row_info->width + 7) & 0x07);
+                dshift = 7 - ((final_width + 7) & 0x07);
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               v = (png_byte)((*sp >> sshift) & 0x01);
+               for (j = 0; j < jstop; j++)
+               {
+                  unsigned int tmp = *dp & (0x7f7f >> (7 - dshift));
+                  tmp |= (unsigned int)(v << dshift);
+                  *dp = (png_byte)(tmp & 0xff);
+
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+
+                  else
+                     dshift = (unsigned int)((int)dshift + s_inc);
+               }
+
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+
+               else
+                  sshift = (unsigned int)((int)sshift + s_inc);
+            }
+            break;
+         }
+
+         case 2:
+         {
+            png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2);
+            png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2);
+            unsigned int sshift, dshift;
+            unsigned int s_start, s_end;
+            int s_inc;
+            int jstop = (int)png_pass_inc[pass];
+            png_uint_32 i;
+
+#ifdef PNG_READ_PACKSWAP_SUPPORTED
+            if ((transformations & PNG_PACKSWAP) != 0)
+            {
+               sshift = (((row_info->width + 3) & 0x03) << 1);
+               dshift = (((final_width + 3) & 0x03) << 1);
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            else
+#endif
+            {
+               sshift = ((3 - ((row_info->width + 3) & 0x03)) << 1);
+               dshift = ((3 - ((final_width + 3) & 0x03)) << 1);
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0x03);
+               for (j = 0; j < jstop; j++)
+               {
+                  unsigned int tmp = *dp & (0x3f3f >> (6 - dshift));
+                  tmp |= (unsigned int)(v << dshift);
+                  *dp = (png_byte)(tmp & 0xff);
+
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+
+                  else
+                     dshift = (unsigned int)((int)dshift + s_inc);
+               }
+
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+
+               else
+                  sshift = (unsigned int)((int)sshift + s_inc);
+            }
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp = row + (size_t)((row_info->width - 1) >> 1);
+            png_bytep dp = row + (size_t)((final_width - 1) >> 1);
+            unsigned int sshift, dshift;
+            unsigned int s_start, s_end;
+            int s_inc;
+            png_uint_32 i;
+            int jstop = (int)png_pass_inc[pass];
+
+#ifdef PNG_READ_PACKSWAP_SUPPORTED
+            if ((transformations & PNG_PACKSWAP) != 0)
+            {
+               sshift = (((row_info->width + 1) & 0x01) << 2);
+               dshift = (((final_width + 1) & 0x01) << 2);
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+
+            else
+#endif
+            {
+               sshift = ((1 - ((row_info->width + 1) & 0x01)) << 2);
+               dshift = ((1 - ((final_width + 1) & 0x01)) << 2);
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v = (png_byte)((*sp >> sshift) & 0x0f);
+               int j;
+
+               for (j = 0; j < jstop; j++)
+               {
+                  unsigned int tmp = *dp & (0xf0f >> (4 - dshift));
+                  tmp |= (unsigned int)(v << dshift);
+                  *dp = (png_byte)(tmp & 0xff);
+
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+
+                  else
+                     dshift = (unsigned int)((int)dshift + s_inc);
+               }
+
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+
+               else
+                  sshift = (unsigned int)((int)sshift + s_inc);
+            }
+            break;
+         }
+
+         default:
+         {
+            size_t pixel_bytes = (row_info->pixel_depth >> 3);
+
+            png_bytep sp = row + (size_t)(row_info->width - 1)
+                * pixel_bytes;
+
+            png_bytep dp = row + (size_t)(final_width - 1) * pixel_bytes;
+
+            int jstop = (int)png_pass_inc[pass];
+            png_uint_32 i;
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v[8]; /* SAFE; pixel_depth does not exceed 64 */
+               int j;
+
+               memcpy(v, sp, pixel_bytes);
+
+               for (j = 0; j < jstop; j++)
+               {
+                  memcpy(dp, v, pixel_bytes);
+                  dp -= pixel_bytes;
+               }
+
+               sp -= pixel_bytes;
+            }
+            break;
+         }
+      }
+
+      row_info->width = final_width;
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, final_width);
+   }
+#ifndef PNG_READ_PACKSWAP_SUPPORTED
+   PNG_UNUSED(transformations)  /* Silence compiler warning */
+#endif
+}
+#endif /* READ_INTERLACING */
+
+static void
+png_read_filter_row_sub(png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row)
+{
+   size_t i;
+   size_t istop = row_info->rowbytes;
+   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
+   png_bytep rp = row + bpp;
+
+   PNG_UNUSED(prev_row)
+
+   for (i = bpp; i < istop; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+      rp++;
+   }
+}
+
+static void
+png_read_filter_row_up(png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row)
+{
+   size_t i;
+   size_t istop = row_info->rowbytes;
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+
+   for (i = 0; i < istop; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+      rp++;
+   }
+}
+
+static void
+png_read_filter_row_avg(png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row)
+{
+   size_t i;
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
+   size_t istop = row_info->rowbytes - bpp;
+
+   for (i = 0; i < bpp; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) +
+         ((int)(*pp++) / 2 )) & 0xff);
+
+      rp++;
+   }
+
+   for (i = 0; i < istop; i++)
+   {
+      *rp = (png_byte)(((int)(*rp) +
+         (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
+
+      rp++;
+   }
+}
+
+static void
+png_read_filter_row_paeth_1byte_pixel(png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row)
+{
+   png_bytep rp_end = row + row_info->rowbytes;
+   int a, c;
+
+   /* First pixel/byte */
+   c = *prev_row++;
+   a = *row + c;
+   *row++ = (png_byte)a;
+
+   /* Remainder */
+   while (row < rp_end)
+   {
+      int b, pa, pb, pc, p;
+
+      a &= 0xff; /* From previous iteration or start */
+      b = *prev_row++;
+
+      p = b - c;
+      pc = a - c;
+
+#ifdef PNG_USE_ABS
+      pa = abs(p);
+      pb = abs(pc);
+      pc = abs(p + pc);
+#else
+      pa = p < 0 ? -p : p;
+      pb = pc < 0 ? -pc : pc;
+      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+      /* Find the best predictor, the least of pa, pb, pc favoring the earlier
+       * ones in the case of a tie.
+       */
+      if (pb < pa)
+      {
+         pa = pb; a = b;
+      }
+      if (pc < pa) a = c;
+
+      /* Calculate the current pixel in a, and move the previous row pixel to c
+       * for the next time round the loop
+       */
+      c = b;
+      a += *row;
+      *row++ = (png_byte)a;
+   }
+}
+
+static void
+png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row)
+{
+   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
+   png_bytep rp_end = row + bpp;
+
+   /* Process the first pixel in the row completely (this is the same as 'up'
+    * because there is only one candidate predictor for the first row).
+    */
+   while (row < rp_end)
+   {
+      int a = *row + *prev_row++;
+      *row++ = (png_byte)a;
+   }
+
+   /* Remainder */
+   rp_end = rp_end + (row_info->rowbytes - bpp);
+
+   while (row < rp_end)
+   {
+      int a, b, c, pa, pb, pc, p;
+
+      c = *(prev_row - bpp);
+      a = *(row - bpp);
+      b = *prev_row++;
+
+      p = b - c;
+      pc = a - c;
+
+#ifdef PNG_USE_ABS
+      pa = abs(p);
+      pb = abs(pc);
+      pc = abs(p + pc);
+#else
+      pa = p < 0 ? -p : p;
+      pb = pc < 0 ? -pc : pc;
+      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+      if (pb < pa)
+      {
+         pa = pb; a = b;
+      }
+      if (pc < pa) a = c;
+
+      a += *row;
+      *row++ = (png_byte)a;
+   }
+}
+
+static void
+png_init_filter_functions(png_structrp pp)
+   /* This function is called once for every PNG image (except for PNG images
+    * that only use PNG_FILTER_VALUE_NONE for all rows) to set the
+    * implementations required to reverse the filtering of PNG rows.  Reversing
+    * the filter is the first transformation performed on the row data.  It is
+    * performed in place, therefore an implementation can be selected based on
+    * the image pixel format.  If the implementation depends on image width then
+    * take care to ensure that it works correctly if the image is interlaced -
+    * interlacing causes the actual row width to vary.
+    */
+{
+   unsigned int bpp = (pp->pixel_depth + 7) >> 3;
+
+   pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub;
+   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up;
+   pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg;
+   if (bpp == 1)
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+         png_read_filter_row_paeth_1byte_pixel;
+   else
+      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+         png_read_filter_row_paeth_multibyte_pixel;
+
+#ifdef PNG_FILTER_OPTIMIZATIONS
+   /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
+    * call to install hardware optimizations for the above functions; simply
+    * replace whatever elements of the pp->read_filter[] array with a hardware
+    * specific (or, for that matter, generic) optimization.
+    *
+    * To see an example of this examine what configure.ac does when
+    * --enable-arm-neon is specified on the command line.
+    */
+   PNG_FILTER_OPTIMIZATIONS(pp, bpp);
+#endif
+}
+
+void /* PRIVATE */
+png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row,
+    png_const_bytep prev_row, int filter)
+{
+   /* OPTIMIZATION: DO NOT MODIFY THIS FUNCTION, instead #define
+    * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic
+    * implementations.  See png_init_filter_functions above.
+    */
+   if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST)
+   {
+      if (pp->read_filter[0] == NULL)
+         png_init_filter_functions(pp);
+
+      pp->read_filter[filter-1](row_info, row, prev_row);
+   }
+}
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+void /* PRIVATE */
+png_read_IDAT_data(png_structrp png_ptr, png_bytep output,
+    png_alloc_size_t avail_out)
+{
+   /* Loop reading IDATs and decompressing the result into output[avail_out] */
+   png_ptr->zstream.next_out = output;
+   png_ptr->zstream.avail_out = 0; /* safety: set below */
+
+   if (output == NULL)
+      avail_out = 0;
+
+   do
+   {
+      int ret;
+      png_byte tmpbuf[PNG_INFLATE_BUF_SIZE];
+
+      if (png_ptr->zstream.avail_in == 0)
+      {
+         uInt avail_in;
+         png_bytep buffer;
+
+         while (png_ptr->idat_size == 0)
+         {
+            png_crc_finish(png_ptr, 0);
+
+            png_ptr->idat_size = png_read_chunk_header(png_ptr);
+            /* This is an error even in the 'check' case because the code just
+             * consumed a non-IDAT header.
+             */
+            if (png_ptr->chunk_name != png_IDAT)
+               png_error(png_ptr, "Not enough image data");
+         }
+
+         avail_in = png_ptr->IDAT_read_size;
+
+         if (avail_in > png_ptr->idat_size)
+            avail_in = (uInt)png_ptr->idat_size;
+
+         /* A PNG with a gradually increasing IDAT size will defeat this attempt
+          * to minimize memory usage by causing lots of re-allocs, but
+          * realistically doing IDAT_read_size re-allocs is not likely to be a
+          * big problem.
+          */
+         buffer = png_read_buffer(png_ptr, avail_in, 0/*error*/);
+
+         png_crc_read(png_ptr, buffer, avail_in);
+         png_ptr->idat_size -= avail_in;
+
+         png_ptr->zstream.next_in = buffer;
+         png_ptr->zstream.avail_in = avail_in;
+      }
+
+      /* And set up the output side. */
+      if (output != NULL) /* standard read */
+      {
+         uInt out = ZLIB_IO_MAX;
+
+         if (out > avail_out)
+            out = (uInt)avail_out;
+
+         avail_out -= out;
+         png_ptr->zstream.avail_out = out;
+      }
+
+      else /* after last row, checking for end */
+      {
+         png_ptr->zstream.next_out = tmpbuf;
+         png_ptr->zstream.avail_out = (sizeof tmpbuf);
+      }
+
+      /* Use NO_FLUSH; this gives zlib the maximum opportunity to optimize the
+       * process.  If the LZ stream is truncated the sequential reader will
+       * terminally damage the stream, above, by reading the chunk header of the
+       * following chunk (it then exits with png_error).
+       *
+       * TODO: deal more elegantly with truncated IDAT lists.
+       */
+      ret = PNG_INFLATE(png_ptr, Z_NO_FLUSH);
+
+      /* Take the unconsumed output back. */
+      if (output != NULL)
+         avail_out += png_ptr->zstream.avail_out;
+
+      else /* avail_out counts the extra bytes */
+         avail_out += (sizeof tmpbuf) - png_ptr->zstream.avail_out;
+
+      png_ptr->zstream.avail_out = 0;
+
+      if (ret == Z_STREAM_END)
+      {
+         /* Do this for safety; we won't read any more into this row. */
+         png_ptr->zstream.next_out = NULL;
+
+         png_ptr->mode |= PNG_AFTER_IDAT;
+         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
+
+         if (png_ptr->zstream.avail_in > 0 || png_ptr->idat_size > 0)
+            png_chunk_benign_error(png_ptr, "Extra compressed data");
+         break;
+      }
+
+      if (ret != Z_OK)
+      {
+         png_zstream_error(png_ptr, ret);
+
+         if (output != NULL)
+            png_chunk_error(png_ptr, png_ptr->zstream.msg);
+
+         else /* checking */
+         {
+            png_chunk_benign_error(png_ptr, png_ptr->zstream.msg);
+            return;
+         }
+      }
+   } while (avail_out > 0);
+
+   if (avail_out > 0)
+   {
+      /* The stream ended before the image; this is the same as too few IDATs so
+       * should be handled the same way.
+       */
+      if (output != NULL)
+         png_error(png_ptr, "Not enough image data");
+
+      else /* the deflate stream contained extra data */
+         png_chunk_benign_error(png_ptr, "Too much image data");
+   }
+}
+
+void /* PRIVATE */
+png_read_finish_IDAT(png_structrp png_ptr)
+{
+   /* We don't need any more data and the stream should have ended, however the
+    * LZ end code may actually not have been processed.  In this case we must
+    * read it otherwise stray unread IDAT data or, more likely, an IDAT chunk
+    * may still remain to be consumed.
+    */
+   if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
+   {
+      /* The NULL causes png_read_IDAT_data to swallow any remaining bytes in
+       * the compressed stream, but the stream may be damaged too, so even after
+       * this call we may need to terminate the zstream ownership.
+       */
+      png_read_IDAT_data(png_ptr, NULL, 0);
+      png_ptr->zstream.next_out = NULL; /* safety */
+
+      /* Now clear everything out for safety; the following may not have been
+       * done.
+       */
+      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0)
+      {
+         png_ptr->mode |= PNG_AFTER_IDAT;
+         png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED;
+      }
+   }
+
+   /* If the zstream has not been released do it now *and* terminate the reading
+    * of the final IDAT chunk.
+    */
+   if (png_ptr->zowner == png_IDAT)
+   {
+      /* Always do this; the pointers otherwise point into the read buffer. */
+      png_ptr->zstream.next_in = NULL;
+      png_ptr->zstream.avail_in = 0;
+
+      /* Now we no longer own the zstream. */
+      png_ptr->zowner = 0;
+
+      /* The slightly weird semantics of the sequential IDAT reading is that we
+       * are always in or at the end of an IDAT chunk, so we always need to do a
+       * crc_finish here.  If idat_size is non-zero we also need to read the
+       * spurious bytes at the end of the chunk now.
+       */
+      (void)png_crc_finish(png_ptr, png_ptr->idat_size);
+   }
+}
+
+void /* PRIVATE */
+png_read_finish_row(png_structrp png_ptr)
+{
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* Start of interlace block */
+   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* Offset to next interlace block */
+   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* Start of interlace block in the y direction */
+   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* Offset to next interlace block in the y direction */
+   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+
+   png_debug(1, "in png_read_finish_row");
+   png_ptr->row_number++;
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+   if (png_ptr->interlaced != 0)
+   {
+      png_ptr->row_number = 0;
+
+      /* TO DO: don't do this if prev_row isn't needed (requires
+       * read-ahead of the next row's filter byte.
+       */
+      memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+
+      do
+      {
+         png_ptr->pass++;
+
+         if (png_ptr->pass >= 7)
+            break;
+
+         png_ptr->iwidth = (png_ptr->width +
+            png_pass_inc[png_ptr->pass] - 1 -
+            png_pass_start[png_ptr->pass]) /
+            png_pass_inc[png_ptr->pass];
+
+         if ((png_ptr->transformations & PNG_INTERLACE) == 0)
+         {
+            png_ptr->num_rows = (png_ptr->height +
+                png_pass_yinc[png_ptr->pass] - 1 -
+                png_pass_ystart[png_ptr->pass]) /
+                png_pass_yinc[png_ptr->pass];
+         }
+
+         else  /* if (png_ptr->transformations & PNG_INTERLACE) */
+            break; /* libpng deinterlacing sees every row */
+
+      } while (png_ptr->num_rows == 0 || png_ptr->iwidth == 0);
+
+      if (png_ptr->pass < 7)
+         return;
+   }
+
+   /* Here after at the end of the last row of the last pass. */
+   png_read_finish_IDAT(png_ptr);
+}
+#endif /* SEQUENTIAL_READ */
+
+void /* PRIVATE */
+png_read_start_row(png_structrp png_ptr)
+{
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* Start of interlace block */
+   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* Offset to next interlace block */
+   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* Start of interlace block in the y direction */
+   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* Offset to next interlace block in the y direction */
+   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+
+   unsigned int max_pixel_depth;
+   size_t row_bytes;
+
+   png_debug(1, "in png_read_start_row");
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+   png_init_read_transformations(png_ptr);
+#endif
+   if (png_ptr->interlaced != 0)
+   {
+      if ((png_ptr->transformations & PNG_INTERLACE) == 0)
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+             png_pass_ystart[0]) / png_pass_yinc[0];
+
+      else
+         png_ptr->num_rows = png_ptr->height;
+
+      png_ptr->iwidth = (png_ptr->width +
+          png_pass_inc[png_ptr->pass] - 1 -
+          png_pass_start[png_ptr->pass]) /
+          png_pass_inc[png_ptr->pass];
+   }
+
+   else
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->iwidth = png_ptr->width;
+   }
+
+   max_pixel_depth = (unsigned int)png_ptr->pixel_depth;
+
+   /* WARNING: * png_read_transform_info (pngrtran.c) performs a simpler set of
+    * calculations to calculate the final pixel depth, then
+    * png_do_read_transforms actually does the transforms.  This means that the
+    * code which effectively calculates this value is actually repeated in three
+    * separate places.  They must all match.  Innocent changes to the order of
+    * transformations can and will break libpng in a way that causes memory
+    * overwrites.
+    *
+    * TODO: fix this.
+    */
+#ifdef PNG_READ_PACK_SUPPORTED
+   if ((png_ptr->transformations & PNG_PACK) != 0 && png_ptr->bit_depth < 8)
+      max_pixel_depth = 8;
+#endif
+
+#ifdef PNG_READ_EXPAND_SUPPORTED
+   if ((png_ptr->transformations & PNG_EXPAND) != 0)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (png_ptr->num_trans != 0)
+            max_pixel_depth = 32;
+
+         else
+            max_pixel_depth = 24;
+      }
+
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth < 8)
+            max_pixel_depth = 8;
+
+         if (png_ptr->num_trans != 0)
+            max_pixel_depth *= 2;
+      }
+
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (png_ptr->num_trans != 0)
+         {
+            max_pixel_depth *= 4;
+            max_pixel_depth /= 3;
+         }
+      }
+   }
+#endif
+
+#ifdef PNG_READ_EXPAND_16_SUPPORTED
+   if ((png_ptr->transformations & PNG_EXPAND_16) != 0)
+   {
+#  ifdef PNG_READ_EXPAND_SUPPORTED
+      /* In fact it is an error if it isn't supported, but checking is
+       * the safe way.
+       */
+      if ((png_ptr->transformations & PNG_EXPAND) != 0)
+      {
+         if (png_ptr->bit_depth < 16)
+            max_pixel_depth *= 2;
+      }
+      else
+#  endif
+      png_ptr->transformations &= ~PNG_EXPAND_16;
+   }
+#endif
+
+#ifdef PNG_READ_FILLER_SUPPORTED
+   if ((png_ptr->transformations & (PNG_FILLER)) != 0)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth <= 8)
+            max_pixel_depth = 16;
+
+         else
+            max_pixel_depth = 32;
+      }
+
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB ||
+         png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (max_pixel_depth <= 32)
+            max_pixel_depth = 32;
+
+         else
+            max_pixel_depth = 64;
+      }
+   }
+#endif
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0)
+   {
+      if (
+#ifdef PNG_READ_EXPAND_SUPPORTED
+          (png_ptr->num_trans != 0 &&
+          (png_ptr->transformations & PNG_EXPAND) != 0) ||
+#endif
+#ifdef PNG_READ_FILLER_SUPPORTED
+          (png_ptr->transformations & (PNG_FILLER)) != 0 ||
+#endif
+          png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         if (max_pixel_depth <= 16)
+            max_pixel_depth = 32;
+
+         else
+            max_pixel_depth = 64;
+      }
+
+      else
+      {
+         if (max_pixel_depth <= 8)
+         {
+            if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+               max_pixel_depth = 32;
+
+            else
+               max_pixel_depth = 24;
+         }
+
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            max_pixel_depth = 64;
+
+         else
+            max_pixel_depth = 48;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \
+defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
+   {
+      unsigned int user_pixel_depth = png_ptr->user_transform_depth *
+         png_ptr->user_transform_channels;
+
+      if (user_pixel_depth > max_pixel_depth)
+         max_pixel_depth = user_pixel_depth;
+   }
+#endif
+
+   /* This value is stored in png_struct and double checked in the row read
+    * code.
+    */
+   png_ptr->maximum_pixel_depth = (png_byte)max_pixel_depth;
+   png_ptr->transformed_pixel_depth = 0; /* calculated on demand */
+
+   /* Align the width on the next larger 8 pixels.  Mainly used
+    * for interlacing
+    */
+   row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7));
+   /* Calculate the maximum bytes needed, adding a byte and a pixel
+    * for safety's sake
+    */
+   row_bytes = PNG_ROWBYTES(max_pixel_depth, row_bytes) +
+       1 + ((max_pixel_depth + 7) >> 3U);
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (row_bytes > (png_uint_32)65536L)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+#endif
+
+   if (row_bytes + 48 > png_ptr->old_big_row_buf_size)
+   {
+      png_free(png_ptr, png_ptr->big_row_buf);
+      png_free(png_ptr, png_ptr->big_prev_row);
+
+      if (png_ptr->interlaced != 0)
+         png_ptr->big_row_buf = (png_bytep)png_calloc(png_ptr,
+             row_bytes + 48);
+
+      else
+         png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
+
+      png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
+
+#ifdef PNG_ALIGNED_MEMORY_SUPPORTED
+      /* Use 16-byte aligned memory for row_buf with at least 16 bytes
+       * of padding before and after row_buf; treat prev_row similarly.
+       * NOTE: the alignment is to the start of the pixels, one beyond the start
+       * of the buffer, because of the filter byte.  Prior to libpng 1.5.6 this
+       * was incorrect; the filter byte was aligned, which had the exact
+       * opposite effect of that intended.
+       */
+      {
+         png_bytep temp = png_ptr->big_row_buf + 32;
+         size_t extra = (size_t)temp & 0x0f;
+         png_ptr->row_buf = temp - extra - 1/*filter byte*/;
+
+         temp = png_ptr->big_prev_row + 32;
+         extra = (size_t)temp & 0x0f;
+         png_ptr->prev_row = temp - extra - 1/*filter byte*/;
+      }
+#else
+      /* Use 31 bytes of padding before and 17 bytes after row_buf. */
+      png_ptr->row_buf = png_ptr->big_row_buf + 31;
+      png_ptr->prev_row = png_ptr->big_prev_row + 31;
+#endif
+      png_ptr->old_big_row_buf_size = row_bytes + 48;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (png_ptr->rowbytes > 65535)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+
+#endif
+   if (png_ptr->rowbytes > (PNG_SIZE_MAX - 1))
+      png_error(png_ptr, "Row has too many bytes to allocate in memory");
+
+   memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+
+   png_debug1(3, "width = %u,", png_ptr->width);
+   png_debug1(3, "height = %u,", png_ptr->height);
+   png_debug1(3, "iwidth = %u,", png_ptr->iwidth);
+   png_debug1(3, "num_rows = %u,", png_ptr->num_rows);
+   png_debug1(3, "rowbytes = %lu,", (unsigned long)png_ptr->rowbytes);
+   png_debug1(3, "irowbytes = %lu",
+       (unsigned long)PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->iwidth) + 1);
+
+   /* The sequential reader needs a buffer for IDAT, but the progressive reader
+    * does not, so free the read buffer now regardless; the sequential reader
+    * reallocates it on demand.
+    */
+   if (png_ptr->read_buffer != NULL)
+   {
+      png_bytep buffer = png_ptr->read_buffer;
+
+      png_ptr->read_buffer_size = 0;
+      png_ptr->read_buffer = NULL;
+      png_free(png_ptr, buffer);
+   }
+
+   /* Finally claim the zstream for the inflate of the IDAT data, use the bits
+    * value from the stream (note that this will result in a fatal error if the
+    * IDAT stream has a bogus deflate header window_bits value, but this should
+    * not be happening any longer!)
+    */
+   if (png_inflate_claim(png_ptr, png_IDAT) != Z_OK)
+      png_error(png_ptr, png_ptr->zstream.msg);
+
+   png_ptr->flags |= PNG_FLAG_ROW_INIT;
+}
+#endif /* READ */
diff --git a/reg-io/png/lpng/pngset.c b/reg-io/png/lpng/pngset.c
new file mode 100644
index 00000000..372b9f50
--- /dev/null
+++ b/reg-io/png/lpng/pngset.c
@@ -0,0 +1,1803 @@
+
+/* pngset.c - storage of image information into info struct
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * The functions here are used during reads to store data from the file
+ * into the info struct, and during writes to store application data
+ * into the info struct for writing into the file.  This abstracts the
+ * info struct and allows us to change the structure in the future.
+ */
+
+#include "pngpriv.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+#ifdef PNG_bKGD_SUPPORTED
+void PNGAPI
+png_set_bKGD(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_color_16p background)
+{
+   png_debug1(1, "in %s storage function", "bKGD");
+
+   if (png_ptr == NULL || info_ptr == NULL || background == NULL)
+      return;
+
+   info_ptr->background = *background;
+   info_ptr->valid |= PNG_INFO_bKGD;
+}
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+void PNGFAPI
+png_set_cHRM_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
+    png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
+    png_fixed_point blue_x, png_fixed_point blue_y)
+{
+   png_xy xy;
+
+   png_debug1(1, "in %s storage function", "cHRM fixed");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   xy.redx = red_x;
+   xy.redy = red_y;
+   xy.greenx = green_x;
+   xy.greeny = green_y;
+   xy.bluex = blue_x;
+   xy.bluey = blue_y;
+   xy.whitex = white_x;
+   xy.whitey = white_y;
+
+   if (png_colorspace_set_chromaticities(png_ptr, &info_ptr->colorspace, &xy,
+       2/* override with app values*/) != 0)
+      info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
+
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+
+void PNGFAPI
+png_set_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_fixed_point int_red_X, png_fixed_point int_red_Y,
+    png_fixed_point int_red_Z, png_fixed_point int_green_X,
+    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
+    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
+    png_fixed_point int_blue_Z)
+{
+   png_XYZ XYZ;
+
+   png_debug1(1, "in %s storage function", "cHRM XYZ fixed");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   XYZ.red_X = int_red_X;
+   XYZ.red_Y = int_red_Y;
+   XYZ.red_Z = int_red_Z;
+   XYZ.green_X = int_green_X;
+   XYZ.green_Y = int_green_Y;
+   XYZ.green_Z = int_green_Z;
+   XYZ.blue_X = int_blue_X;
+   XYZ.blue_Y = int_blue_Y;
+   XYZ.blue_Z = int_blue_Z;
+
+   if (png_colorspace_set_endpoints(png_ptr, &info_ptr->colorspace,
+       &XYZ, 2) != 0)
+      info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM;
+
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_cHRM(png_const_structrp png_ptr, png_inforp info_ptr,
+    double white_x, double white_y, double red_x, double red_y,
+    double green_x, double green_y, double blue_x, double blue_y)
+{
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+       png_fixed(png_ptr, white_x, "cHRM White X"),
+       png_fixed(png_ptr, white_y, "cHRM White Y"),
+       png_fixed(png_ptr, red_x, "cHRM Red X"),
+       png_fixed(png_ptr, red_y, "cHRM Red Y"),
+       png_fixed(png_ptr, green_x, "cHRM Green X"),
+       png_fixed(png_ptr, green_y, "cHRM Green Y"),
+       png_fixed(png_ptr, blue_x, "cHRM Blue X"),
+       png_fixed(png_ptr, blue_y, "cHRM Blue Y"));
+}
+
+void PNGAPI
+png_set_cHRM_XYZ(png_const_structrp png_ptr, png_inforp info_ptr, double red_X,
+    double red_Y, double red_Z, double green_X, double green_Y, double green_Z,
+    double blue_X, double blue_Y, double blue_Z)
+{
+   png_set_cHRM_XYZ_fixed(png_ptr, info_ptr,
+       png_fixed(png_ptr, red_X, "cHRM Red X"),
+       png_fixed(png_ptr, red_Y, "cHRM Red Y"),
+       png_fixed(png_ptr, red_Z, "cHRM Red Z"),
+       png_fixed(png_ptr, green_X, "cHRM Green X"),
+       png_fixed(png_ptr, green_Y, "cHRM Green Y"),
+       png_fixed(png_ptr, green_Z, "cHRM Green Z"),
+       png_fixed(png_ptr, blue_X, "cHRM Blue X"),
+       png_fixed(png_ptr, blue_Y, "cHRM Blue Y"),
+       png_fixed(png_ptr, blue_Z, "cHRM Blue Z"));
+}
+#  endif /* FLOATING_POINT */
+
+#endif /* cHRM */
+
+#ifdef PNG_eXIf_SUPPORTED
+void PNGAPI
+png_set_eXIf(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_bytep exif)
+{
+  png_warning(png_ptr, "png_set_eXIf does not work; use png_set_eXIf_1");
+  PNG_UNUSED(info_ptr)
+  PNG_UNUSED(exif)
+}
+
+void PNGAPI
+png_set_eXIf_1(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_uint_32 num_exif, png_bytep exif)
+{
+   png_bytep new_exif;
+
+   png_debug1(1, "in %s storage function", "eXIf");
+
+   if (png_ptr == NULL || info_ptr == NULL ||
+       (png_ptr->mode & PNG_WROTE_eXIf) != 0)
+      return;
+
+   new_exif = png_voidcast(png_bytep, png_malloc_warn(png_ptr, num_exif));
+
+   if (new_exif == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory for eXIf chunk data");
+      return;
+   }
+
+   memcpy(new_exif, exif, (size_t)num_exif);
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_EXIF, 0);
+
+   info_ptr->num_exif = num_exif;
+   info_ptr->exif = new_exif;
+   info_ptr->free_me |= PNG_FREE_EXIF;
+   info_ptr->valid |= PNG_INFO_eXIf;
+}
+#endif /* eXIf */
+
+#ifdef PNG_gAMA_SUPPORTED
+void PNGFAPI
+png_set_gAMA_fixed(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_fixed_point file_gamma)
+{
+   png_debug1(1, "in %s storage function", "gAMA");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_colorspace_set_gamma(png_ptr, &info_ptr->colorspace, file_gamma);
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_gAMA(png_const_structrp png_ptr, png_inforp info_ptr, double file_gamma)
+{
+   png_set_gAMA_fixed(png_ptr, info_ptr, png_fixed(png_ptr, file_gamma,
+       "png_set_gAMA"));
+}
+#  endif
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+void PNGAPI
+png_set_hIST(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_uint_16p hist)
+{
+   int i;
+
+   png_debug1(1, "in %s storage function", "hIST");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (info_ptr->num_palette == 0 || info_ptr->num_palette
+       > PNG_MAX_PALETTE_LENGTH)
+   {
+      png_warning(png_ptr,
+          "Invalid palette size, hIST allocation skipped");
+
+      return;
+   }
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0);
+
+   /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in
+    * version 1.2.1
+    */
+   info_ptr->hist = png_voidcast(png_uint_16p, png_malloc_warn(png_ptr,
+       PNG_MAX_PALETTE_LENGTH * (sizeof (png_uint_16))));
+
+   if (info_ptr->hist == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory for hIST chunk data");
+      return;
+   }
+
+   for (i = 0; i < info_ptr->num_palette; i++)
+      info_ptr->hist[i] = hist[i];
+
+   info_ptr->free_me |= PNG_FREE_HIST;
+   info_ptr->valid |= PNG_INFO_hIST;
+}
+#endif
+
+void PNGAPI
+png_set_IHDR(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_uint_32 width, png_uint_32 height, int bit_depth,
+    int color_type, int interlace_type, int compression_type,
+    int filter_type)
+{
+   png_debug1(1, "in %s storage function", "IHDR");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->width = width;
+   info_ptr->height = height;
+   info_ptr->bit_depth = (png_byte)bit_depth;
+   info_ptr->color_type = (png_byte)color_type;
+   info_ptr->compression_type = (png_byte)compression_type;
+   info_ptr->filter_type = (png_byte)filter_type;
+   info_ptr->interlace_type = (png_byte)interlace_type;
+
+   png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height,
+       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
+       info_ptr->compression_type, info_ptr->filter_type);
+
+   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      info_ptr->channels = 1;
+
+   else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0)
+      info_ptr->channels = 3;
+
+   else
+      info_ptr->channels = 1;
+
+   if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)
+      info_ptr->channels++;
+
+   info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
+
+   info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, width);
+}
+
+#ifdef PNG_oFFs_SUPPORTED
+void PNGAPI
+png_set_oFFs(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_int_32 offset_x, png_int_32 offset_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function", "oFFs");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_offset = offset_x;
+   info_ptr->y_offset = offset_y;
+   info_ptr->offset_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_oFFs;
+}
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+void PNGAPI
+png_set_pCAL(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type,
+    int nparams, png_const_charp units, png_charpp params)
+{
+   size_t length;
+   int i;
+
+   png_debug1(1, "in %s storage function", "pCAL");
+
+   if (png_ptr == NULL || info_ptr == NULL || purpose == NULL || units == NULL
+       || (nparams > 0 && params == NULL))
+      return;
+
+   length = strlen(purpose) + 1;
+   png_debug1(3, "allocating purpose for info (%lu bytes)",
+       (unsigned long)length);
+
+   /* TODO: validate format of calibration name and unit name */
+
+   /* Check that the type matches the specification. */
+   if (type < 0 || type > 3)
+   {
+      png_chunk_report(png_ptr, "Invalid pCAL equation type",
+            PNG_CHUNK_WRITE_ERROR);
+      return;
+   }
+
+   if (nparams < 0 || nparams > 255)
+   {
+      png_chunk_report(png_ptr, "Invalid pCAL parameter count",
+            PNG_CHUNK_WRITE_ERROR);
+      return;
+   }
+
+   /* Validate params[nparams] */
+   for (i=0; i<nparams; ++i)
+   {
+      if (params[i] == NULL ||
+          !png_check_fp_string(params[i], strlen(params[i])))
+      {
+         png_chunk_report(png_ptr, "Invalid format for pCAL parameter",
+               PNG_CHUNK_WRITE_ERROR);
+         return;
+      }
+   }
+
+   info_ptr->pcal_purpose = png_voidcast(png_charp,
+       png_malloc_warn(png_ptr, length));
+
+   if (info_ptr->pcal_purpose == NULL)
+   {
+      png_chunk_report(png_ptr, "Insufficient memory for pCAL purpose",
+            PNG_CHUNK_WRITE_ERROR);
+      return;
+   }
+
+   memcpy(info_ptr->pcal_purpose, purpose, length);
+
+   info_ptr->free_me |= PNG_FREE_PCAL;
+
+   png_debug(3, "storing X0, X1, type, and nparams in info");
+   info_ptr->pcal_X0 = X0;
+   info_ptr->pcal_X1 = X1;
+   info_ptr->pcal_type = (png_byte)type;
+   info_ptr->pcal_nparams = (png_byte)nparams;
+
+   length = strlen(units) + 1;
+   png_debug1(3, "allocating units for info (%lu bytes)",
+       (unsigned long)length);
+
+   info_ptr->pcal_units = png_voidcast(png_charp,
+       png_malloc_warn(png_ptr, length));
+
+   if (info_ptr->pcal_units == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory for pCAL units");
+      return;
+   }
+
+   memcpy(info_ptr->pcal_units, units, length);
+
+   info_ptr->pcal_params = png_voidcast(png_charpp, png_malloc_warn(png_ptr,
+       (size_t)(((unsigned int)nparams + 1) * (sizeof (png_charp)))));
+
+   if (info_ptr->pcal_params == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory for pCAL params");
+      return;
+   }
+
+   memset(info_ptr->pcal_params, 0, ((unsigned int)nparams + 1) *
+       (sizeof (png_charp)));
+
+   for (i = 0; i < nparams; i++)
+   {
+      length = strlen(params[i]) + 1;
+      png_debug2(3, "allocating parameter %d for info (%lu bytes)", i,
+          (unsigned long)length);
+
+      info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length);
+
+      if (info_ptr->pcal_params[i] == NULL)
+      {
+         png_warning(png_ptr, "Insufficient memory for pCAL parameter");
+         return;
+      }
+
+      memcpy(info_ptr->pcal_params[i], params[i], length);
+   }
+
+   info_ptr->valid |= PNG_INFO_pCAL;
+}
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+void PNGAPI
+png_set_sCAL_s(png_const_structrp png_ptr, png_inforp info_ptr,
+    int unit, png_const_charp swidth, png_const_charp sheight)
+{
+   size_t lengthw = 0, lengthh = 0;
+
+   png_debug1(1, "in %s storage function", "sCAL");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* Double check the unit (should never get here with an invalid
+    * unit unless this is an API call.)
+    */
+   if (unit != 1 && unit != 2)
+      png_error(png_ptr, "Invalid sCAL unit");
+
+   if (swidth == NULL || (lengthw = strlen(swidth)) == 0 ||
+       swidth[0] == 45 /* '-' */ || !png_check_fp_string(swidth, lengthw))
+      png_error(png_ptr, "Invalid sCAL width");
+
+   if (sheight == NULL || (lengthh = strlen(sheight)) == 0 ||
+       sheight[0] == 45 /* '-' */ || !png_check_fp_string(sheight, lengthh))
+      png_error(png_ptr, "Invalid sCAL height");
+
+   info_ptr->scal_unit = (png_byte)unit;
+
+   ++lengthw;
+
+   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthw);
+
+   info_ptr->scal_s_width = png_voidcast(png_charp,
+       png_malloc_warn(png_ptr, lengthw));
+
+   if (info_ptr->scal_s_width == NULL)
+   {
+      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
+
+      return;
+   }
+
+   memcpy(info_ptr->scal_s_width, swidth, lengthw);
+
+   ++lengthh;
+
+   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthh);
+
+   info_ptr->scal_s_height = png_voidcast(png_charp,
+       png_malloc_warn(png_ptr, lengthh));
+
+   if (info_ptr->scal_s_height == NULL)
+   {
+      png_free(png_ptr, info_ptr->scal_s_width);
+      info_ptr->scal_s_width = NULL;
+
+      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
+      return;
+   }
+
+   memcpy(info_ptr->scal_s_height, sheight, lengthh);
+
+   info_ptr->free_me |= PNG_FREE_SCAL;
+   info_ptr->valid |= PNG_INFO_sCAL;
+}
+
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL(png_const_structrp png_ptr, png_inforp info_ptr, int unit,
+    double width, double height)
+{
+   png_debug1(1, "in %s storage function", "sCAL");
+
+   /* Check the arguments. */
+   if (width <= 0)
+      png_warning(png_ptr, "Invalid sCAL width ignored");
+
+   else if (height <= 0)
+      png_warning(png_ptr, "Invalid sCAL height ignored");
+
+   else
+   {
+      /* Convert 'width' and 'height' to ASCII. */
+      char swidth[PNG_sCAL_MAX_DIGITS+1];
+      char sheight[PNG_sCAL_MAX_DIGITS+1];
+
+      png_ascii_from_fp(png_ptr, swidth, (sizeof swidth), width,
+          PNG_sCAL_PRECISION);
+      png_ascii_from_fp(png_ptr, sheight, (sizeof sheight), height,
+          PNG_sCAL_PRECISION);
+
+      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
+   }
+}
+#  endif
+
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL_fixed(png_const_structrp png_ptr, png_inforp info_ptr, int unit,
+    png_fixed_point width, png_fixed_point height)
+{
+   png_debug1(1, "in %s storage function", "sCAL");
+
+   /* Check the arguments. */
+   if (width <= 0)
+      png_warning(png_ptr, "Invalid sCAL width ignored");
+
+   else if (height <= 0)
+      png_warning(png_ptr, "Invalid sCAL height ignored");
+
+   else
+   {
+      /* Convert 'width' and 'height' to ASCII. */
+      char swidth[PNG_sCAL_MAX_DIGITS+1];
+      char sheight[PNG_sCAL_MAX_DIGITS+1];
+
+      png_ascii_from_fixed(png_ptr, swidth, (sizeof swidth), width);
+      png_ascii_from_fixed(png_ptr, sheight, (sizeof sheight), height);
+
+      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
+   }
+}
+#  endif
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+void PNGAPI
+png_set_pHYs(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_uint_32 res_x, png_uint_32 res_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function", "pHYs");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_pixels_per_unit = res_x;
+   info_ptr->y_pixels_per_unit = res_y;
+   info_ptr->phys_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_pHYs;
+}
+#endif
+
+void PNGAPI
+png_set_PLTE(png_structrp png_ptr, png_inforp info_ptr,
+    png_const_colorp palette, int num_palette)
+{
+
+   png_uint_32 max_palette_length;
+
+   png_debug1(1, "in %s storage function", "PLTE");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   max_palette_length = (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ?
+      (1 << info_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH;
+
+   if (num_palette < 0 || num_palette > (int) max_palette_length)
+   {
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         png_error(png_ptr, "Invalid palette length");
+
+      else
+      {
+         png_warning(png_ptr, "Invalid palette length");
+
+         return;
+      }
+   }
+
+   if ((num_palette > 0 && palette == NULL) ||
+      (num_palette == 0
+#        ifdef PNG_MNG_FEATURES_SUPPORTED
+            && (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0
+#        endif
+      ))
+   {
+      png_error(png_ptr, "Invalid palette");
+   }
+
+   /* It may not actually be necessary to set png_ptr->palette here;
+    * we do it for backward compatibility with the way the png_handle_tRNS
+    * function used to do the allocation.
+    *
+    * 1.6.0: the above statement appears to be incorrect; something has to set
+    * the palette inside png_struct on read.
+    */
+   png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0);
+
+   /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead
+    * of num_palette entries, in case of an invalid PNG file or incorrect
+    * call to png_set_PLTE() with too-large sample values.
+    */
+   png_ptr->palette = png_voidcast(png_colorp, png_calloc(png_ptr,
+       PNG_MAX_PALETTE_LENGTH * (sizeof (png_color))));
+
+   if (num_palette > 0)
+      memcpy(png_ptr->palette, palette, (unsigned int)num_palette *
+          (sizeof (png_color)));
+
+   info_ptr->palette = png_ptr->palette;
+   info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette;
+   info_ptr->free_me |= PNG_FREE_PLTE;
+   info_ptr->valid |= PNG_INFO_PLTE;
+}
+
+#ifdef PNG_sBIT_SUPPORTED
+void PNGAPI
+png_set_sBIT(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_color_8p sig_bit)
+{
+   png_debug1(1, "in %s storage function", "sBIT");
+
+   if (png_ptr == NULL || info_ptr == NULL || sig_bit == NULL)
+      return;
+
+   info_ptr->sig_bit = *sig_bit;
+   info_ptr->valid |= PNG_INFO_sBIT;
+}
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+void PNGAPI
+png_set_sRGB(png_const_structrp png_ptr, png_inforp info_ptr, int srgb_intent)
+{
+   png_debug1(1, "in %s storage function", "sRGB");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   (void)png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace, srgb_intent);
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+
+void PNGAPI
+png_set_sRGB_gAMA_and_cHRM(png_const_structrp png_ptr, png_inforp info_ptr,
+    int srgb_intent)
+{
+   png_debug1(1, "in %s storage function", "sRGB_gAMA_and_cHRM");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace,
+       srgb_intent) != 0)
+   {
+      /* This causes the gAMA and cHRM to be written too */
+      info_ptr->colorspace.flags |=
+         PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM;
+   }
+
+   png_colorspace_sync_info(png_ptr, info_ptr);
+}
+#endif /* sRGB */
+
+
+#ifdef PNG_iCCP_SUPPORTED
+void PNGAPI
+png_set_iCCP(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_charp name, int compression_type,
+    png_const_bytep profile, png_uint_32 proflen)
+{
+   png_charp new_iccp_name;
+   png_bytep new_iccp_profile;
+   size_t length;
+
+   png_debug1(1, "in %s storage function", "iCCP");
+
+   if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL)
+      return;
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+      png_app_error(png_ptr, "Invalid iCCP compression method");
+
+   /* Set the colorspace first because this validates the profile; do not
+    * override previously set app cHRM or gAMA here (because likely as not the
+    * application knows better than libpng what the correct values are.)  Pass
+    * the info_ptr color_type field to png_colorspace_set_ICC because in the
+    * write case it has not yet been stored in png_ptr.
+    */
+   {
+      int result = png_colorspace_set_ICC(png_ptr, &info_ptr->colorspace, name,
+          proflen, profile, info_ptr->color_type);
+
+      png_colorspace_sync_info(png_ptr, info_ptr);
+
+      /* Don't do any of the copying if the profile was bad, or inconsistent. */
+      if (result == 0)
+         return;
+
+      /* But do write the gAMA and cHRM chunks from the profile. */
+      info_ptr->colorspace.flags |=
+         PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM;
+   }
+
+   length = strlen(name)+1;
+   new_iccp_name = png_voidcast(png_charp, png_malloc_warn(png_ptr, length));
+
+   if (new_iccp_name == NULL)
+   {
+      png_benign_error(png_ptr, "Insufficient memory to process iCCP chunk");
+
+      return;
+   }
+
+   memcpy(new_iccp_name, name, length);
+   new_iccp_profile = png_voidcast(png_bytep,
+       png_malloc_warn(png_ptr, proflen));
+
+   if (new_iccp_profile == NULL)
+   {
+      png_free(png_ptr, new_iccp_name);
+      png_benign_error(png_ptr,
+          "Insufficient memory to process iCCP profile");
+
+      return;
+   }
+
+   memcpy(new_iccp_profile, profile, proflen);
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0);
+
+   info_ptr->iccp_proflen = proflen;
+   info_ptr->iccp_name = new_iccp_name;
+   info_ptr->iccp_profile = new_iccp_profile;
+   info_ptr->free_me |= PNG_FREE_ICCP;
+   info_ptr->valid |= PNG_INFO_iCCP;
+}
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+void PNGAPI
+png_set_text(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_textp text_ptr, int num_text)
+{
+   int ret;
+   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, num_text);
+
+   if (ret != 0)
+      png_error(png_ptr, "Insufficient memory to store text");
+}
+
+int /* PRIVATE */
+png_set_text_2(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_textp text_ptr, int num_text)
+{
+   int i;
+
+   png_debug1(1, "in text storage function, chunk typeid = 0x%lx",
+      png_ptr == NULL ? 0xabadca11UL : (unsigned long)png_ptr->chunk_name);
+
+   if (png_ptr == NULL || info_ptr == NULL || num_text <= 0 || text_ptr == NULL)
+      return 0;
+
+   /* Make sure we have enough space in the "text" array in info_struct
+    * to hold all of the incoming text_ptr objects.  This compare can't overflow
+    * because max_text >= num_text (anyway, subtract of two positive integers
+    * can't overflow in any case.)
+    */
+   if (num_text > info_ptr->max_text - info_ptr->num_text)
+   {
+      int old_num_text = info_ptr->num_text;
+      int max_text;
+      png_textp new_text = NULL;
+
+      /* Calculate an appropriate max_text, checking for overflow. */
+      max_text = old_num_text;
+      if (num_text <= INT_MAX - max_text)
+      {
+         max_text += num_text;
+
+         /* Round up to a multiple of 8 */
+         if (max_text < INT_MAX-8)
+            max_text = (max_text + 8) & ~0x7;
+
+         else
+            max_text = INT_MAX;
+
+         /* Now allocate a new array and copy the old members in; this does all
+          * the overflow checks.
+          */
+         new_text = png_voidcast(png_textp,png_realloc_array(png_ptr,
+             info_ptr->text, old_num_text, max_text-old_num_text,
+             sizeof *new_text));
+      }
+
+      if (new_text == NULL)
+      {
+         png_chunk_report(png_ptr, "too many text chunks",
+             PNG_CHUNK_WRITE_ERROR);
+
+         return 1;
+      }
+
+      png_free(png_ptr, info_ptr->text);
+
+      info_ptr->text = new_text;
+      info_ptr->free_me |= PNG_FREE_TEXT;
+      info_ptr->max_text = max_text;
+      /* num_text is adjusted below as the entries are copied in */
+
+      png_debug1(3, "allocated %d entries for info_ptr->text", max_text);
+   }
+
+   for (i = 0; i < num_text; i++)
+   {
+      size_t text_length, key_len;
+      size_t lang_len, lang_key_len;
+      png_textp textp = &(info_ptr->text[info_ptr->num_text]);
+
+      if (text_ptr[i].key == NULL)
+          continue;
+
+      if (text_ptr[i].compression < PNG_TEXT_COMPRESSION_NONE ||
+          text_ptr[i].compression >= PNG_TEXT_COMPRESSION_LAST)
+      {
+         png_chunk_report(png_ptr, "text compression mode is out of range",
+             PNG_CHUNK_WRITE_ERROR);
+         continue;
+      }
+
+      key_len = strlen(text_ptr[i].key);
+
+      if (text_ptr[i].compression <= 0)
+      {
+         lang_len = 0;
+         lang_key_len = 0;
+      }
+
+      else
+#  ifdef PNG_iTXt_SUPPORTED
+      {
+         /* Set iTXt data */
+
+         if (text_ptr[i].lang != NULL)
+            lang_len = strlen(text_ptr[i].lang);
+
+         else
+            lang_len = 0;
+
+         if (text_ptr[i].lang_key != NULL)
+            lang_key_len = strlen(text_ptr[i].lang_key);
+
+         else
+            lang_key_len = 0;
+      }
+#  else /* iTXt */
+      {
+         png_chunk_report(png_ptr, "iTXt chunk not supported",
+             PNG_CHUNK_WRITE_ERROR);
+         continue;
+      }
+#  endif
+
+      if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0')
+      {
+         text_length = 0;
+#  ifdef PNG_iTXt_SUPPORTED
+         if (text_ptr[i].compression > 0)
+            textp->compression = PNG_ITXT_COMPRESSION_NONE;
+
+         else
+#  endif
+            textp->compression = PNG_TEXT_COMPRESSION_NONE;
+      }
+
+      else
+      {
+         text_length = strlen(text_ptr[i].text);
+         textp->compression = text_ptr[i].compression;
+      }
+
+      textp->key = png_voidcast(png_charp,png_malloc_base(png_ptr,
+          key_len + text_length + lang_len + lang_key_len + 4));
+
+      if (textp->key == NULL)
+      {
+         png_chunk_report(png_ptr, "text chunk: out of memory",
+             PNG_CHUNK_WRITE_ERROR);
+
+         return 1;
+      }
+
+      png_debug2(2, "Allocated %lu bytes at %p in png_set_text",
+          (unsigned long)(png_uint_32)
+          (key_len + lang_len + lang_key_len + text_length + 4),
+          textp->key);
+
+      memcpy(textp->key, text_ptr[i].key, key_len);
+      *(textp->key + key_len) = '\0';
+
+      if (text_ptr[i].compression > 0)
+      {
+         textp->lang = textp->key + key_len + 1;
+         memcpy(textp->lang, text_ptr[i].lang, lang_len);
+         *(textp->lang + lang_len) = '\0';
+         textp->lang_key = textp->lang + lang_len + 1;
+         memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
+         *(textp->lang_key + lang_key_len) = '\0';
+         textp->text = textp->lang_key + lang_key_len + 1;
+      }
+
+      else
+      {
+         textp->lang=NULL;
+         textp->lang_key=NULL;
+         textp->text = textp->key + key_len + 1;
+      }
+
+      if (text_length != 0)
+         memcpy(textp->text, text_ptr[i].text, text_length);
+
+      *(textp->text + text_length) = '\0';
+
+#  ifdef PNG_iTXt_SUPPORTED
+      if (textp->compression > 0)
+      {
+         textp->text_length = 0;
+         textp->itxt_length = text_length;
+      }
+
+      else
+#  endif
+      {
+         textp->text_length = text_length;
+         textp->itxt_length = 0;
+      }
+
+      info_ptr->num_text++;
+      png_debug1(3, "transferred text chunk %d", info_ptr->num_text);
+   }
+
+   return 0;
+}
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+void PNGAPI
+png_set_tIME(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_const_timep mod_time)
+{
+   png_debug1(1, "in %s storage function", "tIME");
+
+   if (png_ptr == NULL || info_ptr == NULL || mod_time == NULL ||
+       (png_ptr->mode & PNG_WROTE_tIME) != 0)
+      return;
+
+   if (mod_time->month == 0   || mod_time->month > 12  ||
+       mod_time->day   == 0   || mod_time->day   > 31  ||
+       mod_time->hour  > 23   || mod_time->minute > 59 ||
+       mod_time->second > 60)
+   {
+      png_warning(png_ptr, "Ignoring invalid time value");
+
+      return;
+   }
+
+   info_ptr->mod_time = *mod_time;
+   info_ptr->valid |= PNG_INFO_tIME;
+}
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+void PNGAPI
+png_set_tRNS(png_structrp png_ptr, png_inforp info_ptr,
+    png_const_bytep trans_alpha, int num_trans, png_const_color_16p trans_color)
+{
+   png_debug1(1, "in %s storage function", "tRNS");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+
+      return;
+
+   if (trans_alpha != NULL)
+   {
+       /* It may not actually be necessary to set png_ptr->trans_alpha here;
+        * we do it for backward compatibility with the way the png_handle_tRNS
+        * function used to do the allocation.
+        *
+        * 1.6.0: The above statement is incorrect; png_handle_tRNS effectively
+        * relies on png_set_tRNS storing the information in png_struct
+        * (otherwise it won't be there for the code in pngrtran.c).
+        */
+
+       png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0);
+
+       if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH)
+       {
+         /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */
+          info_ptr->trans_alpha = png_voidcast(png_bytep,
+              png_malloc(png_ptr, PNG_MAX_PALETTE_LENGTH));
+          memcpy(info_ptr->trans_alpha, trans_alpha, (size_t)num_trans);
+
+          info_ptr->free_me |= PNG_FREE_TRNS;
+          info_ptr->valid |= PNG_INFO_tRNS;
+       }
+       png_ptr->trans_alpha = info_ptr->trans_alpha;
+   }
+
+   if (trans_color != NULL)
+   {
+#ifdef PNG_WARNINGS_SUPPORTED
+      if (info_ptr->bit_depth < 16)
+      {
+         int sample_max = (1 << info_ptr->bit_depth) - 1;
+
+         if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY &&
+             trans_color->gray > sample_max) ||
+             (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
+             (trans_color->red > sample_max ||
+             trans_color->green > sample_max ||
+             trans_color->blue > sample_max)))
+            png_warning(png_ptr,
+                "tRNS chunk has out-of-range samples for bit_depth");
+      }
+#endif
+
+      info_ptr->trans_color = *trans_color;
+
+      if (num_trans == 0)
+         num_trans = 1;
+   }
+
+   info_ptr->num_trans = (png_uint_16)num_trans;
+
+   if (num_trans != 0)
+   {
+      info_ptr->free_me |= PNG_FREE_TRNS;
+      info_ptr->valid |= PNG_INFO_tRNS;
+   }
+}
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+void PNGAPI
+png_set_sPLT(png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_sPLT_tp entries, int nentries)
+/*
+ *  entries        - array of png_sPLT_t structures
+ *                   to be added to the list of palettes
+ *                   in the info structure.
+ *
+ *  nentries       - number of palette structures to be
+ *                   added.
+ */
+{
+   png_sPLT_tp np;
+
+   png_debug1(1, "in %s storage function", "sPLT");
+
+   if (png_ptr == NULL || info_ptr == NULL || nentries <= 0 || entries == NULL)
+      return;
+
+   /* Use the internal realloc function, which checks for all the possible
+    * overflows.  Notice that the parameters are (int) and (size_t)
+    */
+   np = png_voidcast(png_sPLT_tp,png_realloc_array(png_ptr,
+       info_ptr->splt_palettes, info_ptr->splt_palettes_num, nentries,
+       sizeof *np));
+
+   if (np == NULL)
+   {
+      /* Out of memory or too many chunks */
+      png_chunk_report(png_ptr, "too many sPLT chunks", PNG_CHUNK_WRITE_ERROR);
+      return;
+   }
+
+   png_free(png_ptr, info_ptr->splt_palettes);
+
+   info_ptr->splt_palettes = np;
+   info_ptr->free_me |= PNG_FREE_SPLT;
+
+   np += info_ptr->splt_palettes_num;
+
+   do
+   {
+      size_t length;
+
+      /* Skip invalid input entries */
+      if (entries->name == NULL || entries->entries == NULL)
+      {
+         /* png_handle_sPLT doesn't do this, so this is an app error */
+         png_app_error(png_ptr, "png_set_sPLT: invalid sPLT");
+         /* Just skip the invalid entry */
+         continue;
+      }
+
+      np->depth = entries->depth;
+
+      /* In the event of out-of-memory just return - there's no point keeping
+       * on trying to add sPLT chunks.
+       */
+      length = strlen(entries->name) + 1;
+      np->name = png_voidcast(png_charp, png_malloc_base(png_ptr, length));
+
+      if (np->name == NULL)
+         break;
+
+      memcpy(np->name, entries->name, length);
+
+      /* IMPORTANT: we have memory now that won't get freed if something else
+       * goes wrong; this code must free it.  png_malloc_array produces no
+       * warnings; use a png_chunk_report (below) if there is an error.
+       */
+      np->entries = png_voidcast(png_sPLT_entryp, png_malloc_array(png_ptr,
+          entries->nentries, sizeof (png_sPLT_entry)));
+
+      if (np->entries == NULL)
+      {
+         png_free(png_ptr, np->name);
+         np->name = NULL;
+         break;
+      }
+
+      np->nentries = entries->nentries;
+      /* This multiply can't overflow because png_malloc_array has already
+       * checked it when doing the allocation.
+       */
+      memcpy(np->entries, entries->entries,
+          (unsigned int)entries->nentries * sizeof (png_sPLT_entry));
+
+      /* Note that 'continue' skips the advance of the out pointer and out
+       * count, so an invalid entry is not added.
+       */
+      info_ptr->valid |= PNG_INFO_sPLT;
+      ++(info_ptr->splt_palettes_num);
+      ++np;
+      ++entries;
+   }
+   while (--nentries);
+
+   if (nentries > 0)
+      png_chunk_report(png_ptr, "sPLT out of memory", PNG_CHUNK_WRITE_ERROR);
+}
+#endif /* sPLT */
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+static png_byte
+check_location(png_const_structrp png_ptr, int location)
+{
+   location &= (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT);
+
+   /* New in 1.6.0; copy the location and check it.  This is an API
+    * change; previously the app had to use the
+    * png_set_unknown_chunk_location API below for each chunk.
+    */
+   if (location == 0 && (png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
+   {
+      /* Write struct, so unknown chunks come from the app */
+      png_app_warning(png_ptr,
+          "png_set_unknown_chunks now expects a valid location");
+      /* Use the old behavior */
+      location = (png_byte)(png_ptr->mode &
+          (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT));
+   }
+
+   /* This need not be an internal error - if the app calls
+    * png_set_unknown_chunks on a read pointer it must get the location right.
+    */
+   if (location == 0)
+      png_error(png_ptr, "invalid location in png_set_unknown_chunks");
+
+   /* Now reduce the location to the top-most set bit by removing each least
+    * significant bit in turn.
+    */
+   while (location != (location & -location))
+      location &= ~(location & -location);
+
+   /* The cast is safe because 'location' is a bit mask and only the low four
+    * bits are significant.
+    */
+   return (png_byte)location;
+}
+
+void PNGAPI
+png_set_unknown_chunks(png_const_structrp png_ptr,
+    png_inforp info_ptr, png_const_unknown_chunkp unknowns, int num_unknowns)
+{
+   png_unknown_chunkp np;
+
+   if (png_ptr == NULL || info_ptr == NULL || num_unknowns <= 0 ||
+       unknowns == NULL)
+      return;
+
+   /* Check for the failure cases where support has been disabled at compile
+    * time.  This code is hardly ever compiled - it's here because
+    * STORE_UNKNOWN_CHUNKS is set by both read and write code (compiling in this
+    * code) but may be meaningless if the read or write handling of unknown
+    * chunks is not compiled in.
+    */
+#  if !defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) && \
+      defined(PNG_READ_SUPPORTED)
+      if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
+      {
+         png_app_error(png_ptr, "no unknown chunk support on read");
+
+         return;
+      }
+#  endif
+#  if !defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED) && \
+      defined(PNG_WRITE_SUPPORTED)
+      if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
+      {
+         png_app_error(png_ptr, "no unknown chunk support on write");
+
+         return;
+      }
+#  endif
+
+   /* Prior to 1.6.0 this code used png_malloc_warn; however, this meant that
+    * unknown critical chunks could be lost with just a warning resulting in
+    * undefined behavior.  Now png_chunk_report is used to provide behavior
+    * appropriate to read or write.
+    */
+   np = png_voidcast(png_unknown_chunkp, png_realloc_array(png_ptr,
+       info_ptr->unknown_chunks, info_ptr->unknown_chunks_num, num_unknowns,
+       sizeof *np));
+
+   if (np == NULL)
+   {
+      png_chunk_report(png_ptr, "too many unknown chunks",
+          PNG_CHUNK_WRITE_ERROR);
+      return;
+   }
+
+   png_free(png_ptr, info_ptr->unknown_chunks);
+
+   info_ptr->unknown_chunks = np; /* safe because it is initialized */
+   info_ptr->free_me |= PNG_FREE_UNKN;
+
+   np += info_ptr->unknown_chunks_num;
+
+   /* Increment unknown_chunks_num each time round the loop to protect the
+    * just-allocated chunk data.
+    */
+   for (; num_unknowns > 0; --num_unknowns, ++unknowns)
+   {
+      memcpy(np->name, unknowns->name, (sizeof np->name));
+      np->name[(sizeof np->name)-1] = '\0';
+      np->location = check_location(png_ptr, unknowns->location);
+
+      if (unknowns->size == 0)
+      {
+         np->data = NULL;
+         np->size = 0;
+      }
+
+      else
+      {
+         np->data = png_voidcast(png_bytep,
+             png_malloc_base(png_ptr, unknowns->size));
+
+         if (np->data == NULL)
+         {
+            png_chunk_report(png_ptr, "unknown chunk: out of memory",
+                PNG_CHUNK_WRITE_ERROR);
+            /* But just skip storing the unknown chunk */
+            continue;
+         }
+
+         memcpy(np->data, unknowns->data, unknowns->size);
+         np->size = unknowns->size;
+      }
+
+      /* These increments are skipped on out-of-memory for the data - the
+       * unknown chunk entry gets overwritten if the png_chunk_report returns.
+       * This is correct in the read case (the chunk is just dropped.)
+       */
+      ++np;
+      ++(info_ptr->unknown_chunks_num);
+   }
+}
+
+void PNGAPI
+png_set_unknown_chunk_location(png_const_structrp png_ptr, png_inforp info_ptr,
+    int chunk, int location)
+{
+   /* This API is pretty pointless in 1.6.0 because the location can be set
+    * before the call to png_set_unknown_chunks.
+    *
+    * TODO: add a png_app_warning in 1.7
+    */
+   if (png_ptr != NULL && info_ptr != NULL && chunk >= 0 &&
+      chunk < info_ptr->unknown_chunks_num)
+   {
+      if ((location & (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT)) == 0)
+      {
+         png_app_error(png_ptr, "invalid unknown chunk location");
+         /* Fake out the pre 1.6.0 behavior: */
+         if (((unsigned int)location & PNG_HAVE_IDAT) != 0) /* undocumented! */
+            location = PNG_AFTER_IDAT;
+
+         else
+            location = PNG_HAVE_IHDR; /* also undocumented */
+      }
+
+      info_ptr->unknown_chunks[chunk].location =
+         check_location(png_ptr, location);
+   }
+}
+#endif /* STORE_UNKNOWN_CHUNKS */
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+png_uint_32 PNGAPI
+png_permit_mng_features(png_structrp png_ptr, png_uint_32 mng_features)
+{
+   png_debug(1, "in png_permit_mng_features");
+
+   if (png_ptr == NULL)
+      return 0;
+
+   png_ptr->mng_features_permitted = mng_features & PNG_ALL_MNG_FEATURES;
+
+   return png_ptr->mng_features_permitted;
+}
+#endif
+
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+static unsigned int
+add_one_chunk(png_bytep list, unsigned int count, png_const_bytep add, int keep)
+{
+   unsigned int i;
+
+   /* Utility function: update the 'keep' state of a chunk if it is already in
+    * the list, otherwise add it to the list.
+    */
+   for (i=0; i<count; ++i, list += 5)
+   {
+      if (memcmp(list, add, 4) == 0)
+      {
+         list[4] = (png_byte)keep;
+
+         return count;
+      }
+   }
+
+   if (keep != PNG_HANDLE_CHUNK_AS_DEFAULT)
+   {
+      ++count;
+      memcpy(list, add, 4);
+      list[4] = (png_byte)keep;
+   }
+
+   return count;
+}
+
+void PNGAPI
+png_set_keep_unknown_chunks(png_structrp png_ptr, int keep,
+    png_const_bytep chunk_list, int num_chunks_in)
+{
+   png_bytep new_list;
+   unsigned int num_chunks, old_num_chunks;
+
+   if (png_ptr == NULL)
+      return;
+
+   if (keep < 0 || keep >= PNG_HANDLE_CHUNK_LAST)
+   {
+      png_app_error(png_ptr, "png_set_keep_unknown_chunks: invalid keep");
+
+      return;
+   }
+
+   if (num_chunks_in <= 0)
+   {
+      png_ptr->unknown_default = keep;
+
+      /* '0' means just set the flags, so stop here */
+      if (num_chunks_in == 0)
+        return;
+   }
+
+   if (num_chunks_in < 0)
+   {
+      /* Ignore all unknown chunks and all chunks recognized by
+       * libpng except for IHDR, PLTE, tRNS, IDAT, and IEND
+       */
+      static const png_byte chunks_to_ignore[] = {
+         98,  75,  71,  68, '\0',  /* bKGD */
+         99,  72,  82,  77, '\0',  /* cHRM */
+        101,  88,  73, 102, '\0',  /* eXIf */
+        103,  65,  77,  65, '\0',  /* gAMA */
+        104,  73,  83,  84, '\0',  /* hIST */
+        105,  67,  67,  80, '\0',  /* iCCP */
+        105,  84,  88, 116, '\0',  /* iTXt */
+        111,  70,  70, 115, '\0',  /* oFFs */
+        112,  67,  65,  76, '\0',  /* pCAL */
+        112,  72,  89, 115, '\0',  /* pHYs */
+        115,  66,  73,  84, '\0',  /* sBIT */
+        115,  67,  65,  76, '\0',  /* sCAL */
+        115,  80,  76,  84, '\0',  /* sPLT */
+        115,  84,  69,  82, '\0',  /* sTER */
+        115,  82,  71,  66, '\0',  /* sRGB */
+        116,  69,  88, 116, '\0',  /* tEXt */
+        116,  73,  77,  69, '\0',  /* tIME */
+        122,  84,  88, 116, '\0'   /* zTXt */
+      };
+
+      chunk_list = chunks_to_ignore;
+      num_chunks = (unsigned int)/*SAFE*/(sizeof chunks_to_ignore)/5U;
+   }
+
+   else /* num_chunks_in > 0 */
+   {
+      if (chunk_list == NULL)
+      {
+         /* Prior to 1.6.0 this was silently ignored, now it is an app_error
+          * which can be switched off.
+          */
+         png_app_error(png_ptr, "png_set_keep_unknown_chunks: no chunk list");
+
+         return;
+      }
+
+      num_chunks = (unsigned int)num_chunks_in;
+   }
+
+   old_num_chunks = png_ptr->num_chunk_list;
+   if (png_ptr->chunk_list == NULL)
+      old_num_chunks = 0;
+
+   /* Since num_chunks is always restricted to UINT_MAX/5 this can't overflow.
+    */
+   if (num_chunks + old_num_chunks > UINT_MAX/5)
+   {
+      png_app_error(png_ptr, "png_set_keep_unknown_chunks: too many chunks");
+
+      return;
+   }
+
+   /* If these chunks are being reset to the default then no more memory is
+    * required because add_one_chunk above doesn't extend the list if the 'keep'
+    * parameter is the default.
+    */
+   if (keep != 0)
+   {
+      new_list = png_voidcast(png_bytep, png_malloc(png_ptr,
+          5 * (num_chunks + old_num_chunks)));
+
+      if (old_num_chunks > 0)
+         memcpy(new_list, png_ptr->chunk_list, 5*old_num_chunks);
+   }
+
+   else if (old_num_chunks > 0)
+      new_list = png_ptr->chunk_list;
+
+   else
+      new_list = NULL;
+
+   /* Add the new chunks together with each one's handling code.  If the chunk
+    * already exists the code is updated, otherwise the chunk is added to the
+    * end.  (In libpng 1.6.0 order no longer matters because this code enforces
+    * the earlier convention that the last setting is the one that is used.)
+    */
+   if (new_list != NULL)
+   {
+      png_const_bytep inlist;
+      png_bytep outlist;
+      unsigned int i;
+
+      for (i=0; i<num_chunks; ++i)
+      {
+         old_num_chunks = add_one_chunk(new_list, old_num_chunks,
+             chunk_list+5*i, keep);
+      }
+
+      /* Now remove any spurious 'default' entries. */
+      num_chunks = 0;
+      for (i=0, inlist=outlist=new_list; i<old_num_chunks; ++i, inlist += 5)
+      {
+         if (inlist[4])
+         {
+            if (outlist != inlist)
+               memcpy(outlist, inlist, 5);
+            outlist += 5;
+            ++num_chunks;
+         }
+      }
+
+      /* This means the application has removed all the specialized handling. */
+      if (num_chunks == 0)
+      {
+         if (png_ptr->chunk_list != new_list)
+            png_free(png_ptr, new_list);
+
+         new_list = NULL;
+      }
+   }
+
+   else
+      num_chunks = 0;
+
+   png_ptr->num_chunk_list = num_chunks;
+
+   if (png_ptr->chunk_list != new_list)
+   {
+      if (png_ptr->chunk_list != NULL)
+         png_free(png_ptr, png_ptr->chunk_list);
+
+      png_ptr->chunk_list = new_list;
+   }
+}
+#endif
+
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+void PNGAPI
+png_set_read_user_chunk_fn(png_structrp png_ptr, png_voidp user_chunk_ptr,
+    png_user_chunk_ptr read_user_chunk_fn)
+{
+   png_debug(1, "in png_set_read_user_chunk_fn");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->read_user_chunk_fn = read_user_chunk_fn;
+   png_ptr->user_chunk_ptr = user_chunk_ptr;
+}
+#endif
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+void PNGAPI
+png_set_rows(png_const_structrp png_ptr, png_inforp info_ptr,
+    png_bytepp row_pointers)
+{
+   png_debug(1, "in png_set_rows");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (info_ptr->row_pointers != NULL &&
+       (info_ptr->row_pointers != row_pointers))
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+
+   info_ptr->row_pointers = row_pointers;
+
+   if (row_pointers != NULL)
+      info_ptr->valid |= PNG_INFO_IDAT;
+}
+#endif
+
+void PNGAPI
+png_set_compression_buffer_size(png_structrp png_ptr, size_t size)
+{
+   png_debug(1, "in png_set_compression_buffer_size");
+
+   if (png_ptr == NULL)
+      return;
+
+   if (size == 0 || size > PNG_UINT_31_MAX)
+      png_error(png_ptr, "invalid compression buffer size");
+
+#  ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
+   {
+      png_ptr->IDAT_read_size = (png_uint_32)size; /* checked above */
+      return;
+   }
+#  endif
+
+#  ifdef PNG_WRITE_SUPPORTED
+   if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0)
+   {
+      if (png_ptr->zowner != 0)
+      {
+         png_warning(png_ptr,
+             "Compression buffer size cannot be changed because it is in use");
+
+         return;
+      }
+
+#ifndef __COVERITY__
+      /* Some compilers complain that this is always false.  However, it
+       * can be true when integer overflow happens.
+       */
+      if (size > ZLIB_IO_MAX)
+      {
+         png_warning(png_ptr,
+             "Compression buffer size limited to system maximum");
+         size = ZLIB_IO_MAX; /* must fit */
+      }
+#endif
+
+      if (size < 6)
+      {
+         /* Deflate will potentially go into an infinite loop on a SYNC_FLUSH
+          * if this is permitted.
+          */
+         png_warning(png_ptr,
+             "Compression buffer size cannot be reduced below 6");
+
+         return;
+      }
+
+      if (png_ptr->zbuffer_size != size)
+      {
+         png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
+         png_ptr->zbuffer_size = (uInt)size;
+      }
+   }
+#  endif
+}
+
+void PNGAPI
+png_set_invalid(png_const_structrp png_ptr, png_inforp info_ptr, int mask)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      info_ptr->valid &= (unsigned int)(~mask);
+}
+
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* This function was added to libpng 1.2.6 */
+void PNGAPI
+png_set_user_limits(png_structrp png_ptr, png_uint_32 user_width_max,
+    png_uint_32 user_height_max)
+{
+   png_debug(1, "in png_set_user_limits");
+
+   /* Images with dimensions larger than these limits will be
+    * rejected by png_set_IHDR().  To accept any PNG datastream
+    * regardless of dimensions, set both limits to 0x7fffffff.
+    */
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->user_width_max = user_width_max;
+   png_ptr->user_height_max = user_height_max;
+}
+
+/* This function was added to libpng 1.4.0 */
+void PNGAPI
+png_set_chunk_cache_max(png_structrp png_ptr, png_uint_32 user_chunk_cache_max)
+{
+   png_debug(1, "in png_set_chunk_cache_max");
+
+   if (png_ptr != NULL)
+      png_ptr->user_chunk_cache_max = user_chunk_cache_max;
+}
+
+/* This function was added to libpng 1.4.1 */
+void PNGAPI
+png_set_chunk_malloc_max(png_structrp png_ptr,
+    png_alloc_size_t user_chunk_malloc_max)
+{
+   png_debug(1, "in png_set_chunk_malloc_max");
+
+   if (png_ptr != NULL)
+      png_ptr->user_chunk_malloc_max = user_chunk_malloc_max;
+}
+#endif /* ?SET_USER_LIMITS */
+
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+void PNGAPI
+png_set_benign_errors(png_structrp png_ptr, int allowed)
+{
+   png_debug(1, "in png_set_benign_errors");
+
+   /* If allowed is 1, png_benign_error() is treated as a warning.
+    *
+    * If allowed is 0, png_benign_error() is treated as an error (which
+    * is the default behavior if png_set_benign_errors() is not called).
+    */
+
+   if (allowed != 0)
+      png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN |
+         PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN;
+
+   else
+      png_ptr->flags &= ~(PNG_FLAG_BENIGN_ERRORS_WARN |
+         PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN);
+}
+#endif /* BENIGN_ERRORS */
+
+#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   /* Whether to report invalid palette index; added at libng-1.5.10.
+    * It is possible for an indexed (color-type==3) PNG file to contain
+    * pixels with invalid (out-of-range) indexes if the PLTE chunk has
+    * fewer entries than the image's bit-depth would allow. We recover
+    * from this gracefully by filling any incomplete palette with zeros
+    * (opaque black).  By default, when this occurs libpng will issue
+    * a benign error.  This API can be used to override that behavior.
+    */
+void PNGAPI
+png_set_check_for_invalid_index(png_structrp png_ptr, int allowed)
+{
+   png_debug(1, "in png_set_check_for_invalid_index");
+
+   if (allowed > 0)
+      png_ptr->num_palette_max = 0;
+
+   else
+      png_ptr->num_palette_max = -1;
+}
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) || \
+    defined(PNG_iCCP_SUPPORTED) || defined(PNG_sPLT_SUPPORTED)
+/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification,
+ * and if invalid, correct the keyword rather than discarding the entire
+ * chunk.  The PNG 1.0 specification requires keywords 1-79 characters in
+ * length, forbids leading or trailing whitespace, multiple internal spaces,
+ * and the non-break space (0x80) from ISO 8859-1.  Returns keyword length.
+ *
+ * The 'new_key' buffer must be 80 characters in size (for the keyword plus a
+ * trailing '\0').  If this routine returns 0 then there was no keyword, or a
+ * valid one could not be generated, and the caller must png_error.
+ */
+png_uint_32 /* PRIVATE */
+png_check_keyword(png_structrp png_ptr, png_const_charp key, png_bytep new_key)
+{
+#ifdef PNG_WARNINGS_SUPPORTED
+   png_const_charp orig_key = key;
+#endif
+   png_uint_32 key_len = 0;
+   int bad_character = 0;
+   int space = 1;
+
+   png_debug(1, "in png_check_keyword");
+
+   if (key == NULL)
+   {
+      *new_key = 0;
+      return 0;
+   }
+
+   while (*key && key_len < 79)
+   {
+      png_byte ch = (png_byte)*key++;
+
+      if ((ch > 32 && ch <= 126) || (ch >= 161 /*&& ch <= 255*/))
+      {
+         *new_key++ = ch; ++key_len; space = 0;
+      }
+
+      else if (space == 0)
+      {
+         /* A space or an invalid character when one wasn't seen immediately
+          * before; output just a space.
+          */
+         *new_key++ = 32; ++key_len; space = 1;
+
+         /* If the character was not a space then it is invalid. */
+         if (ch != 32)
+            bad_character = ch;
+      }
+
+      else if (bad_character == 0)
+         bad_character = ch; /* just skip it, record the first error */
+   }
+
+   if (key_len > 0 && space != 0) /* trailing space */
+   {
+      --key_len; --new_key;
+      if (bad_character == 0)
+         bad_character = 32;
+   }
+
+   /* Terminate the keyword */
+   *new_key = 0;
+
+   if (key_len == 0)
+      return 0;
+
+#ifdef PNG_WARNINGS_SUPPORTED
+   /* Try to only output one warning per keyword: */
+   if (*key != 0) /* keyword too long */
+      png_warning(png_ptr, "keyword truncated");
+
+   else if (bad_character != 0)
+   {
+      PNG_WARNING_PARAMETERS(p)
+
+      png_warning_parameter(p, 1, orig_key);
+      png_warning_parameter_signed(p, 2, PNG_NUMBER_FORMAT_02x, bad_character);
+
+      png_formatted_warning(png_ptr, p, "keyword \"@1\": bad character '0x@2'");
+   }
+#else /* !WARNINGS */
+   PNG_UNUSED(png_ptr)
+#endif /* !WARNINGS */
+
+   return key_len;
+}
+#endif /* TEXT || pCAL || iCCP || sPLT */
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng1510/pngstruct.h b/reg-io/png/lpng/pngstruct.h
similarity index 52%
rename from reg-io/png/lpng1510/pngstruct.h
rename to reg-io/png/lpng/pngstruct.h
index edc335f3..09ea883d 100644
--- a/reg-io/png/lpng1510/pngstruct.h
+++ b/reg-io/png/lpng/pngstruct.h
@@ -1,356 +1,479 @@
-
-/* pngstruct.h - header file for PNG reference library
- *
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * Last changed in libpng 1.5.9 [March 29, 2012]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* The structure that holds the information to read and write PNG files.
- * The only people who need to care about what is inside of this are the
- * people who will be modifying the library for their own special needs.
- * It should NOT be accessed directly by an application.
- */
-
-#pragma once
-
-/* zlib.h defines the structure z_stream, an instance of which is included
- * in this structure and is required for decompressing the LZ compressed
- * data in PNG files.
- */
-#include "zlib.h"
-
-struct png_struct_def
-{
-#ifdef PNG_SETJMP_SUPPORTED
-   jmp_buf longjmp_buffer;    /* used in png_error */
-   png_longjmp_ptr longjmp_fn;/* setjmp non-local goto function. */
-#endif
-   png_error_ptr error_fn;    /* function for printing errors and aborting */
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_error_ptr warning_fn;  /* function for printing warnings */
-#endif
-   png_voidp error_ptr;       /* user supplied struct for error functions */
-   png_rw_ptr write_data_fn;  /* function for writing output data */
-   png_rw_ptr read_data_fn;   /* function for reading input data */
-   png_voidp io_ptr;          /* ptr to application struct for I/O functions */
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   png_user_transform_ptr read_user_transform_fn; /* user read transform */
-#endif
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   png_user_transform_ptr write_user_transform_fn; /* user write transform */
-#endif
-
-   /* These were added in libpng-1.0.2 */
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-   png_voidp user_transform_ptr; /* user supplied struct for user transform */
-   png_byte user_transform_depth;    /* bit depth of user transformed pixels */
-   png_byte user_transform_channels; /* channels in user transformed pixels */
-#endif
-#endif
-
-   png_uint_32 mode;          /* tells us where we are in the PNG file */
-   png_uint_32 flags;         /* flags indicating various things to libpng */
-   png_uint_32 transformations; /* which transformations to perform */
-
-   z_stream zstream;          /* pointer to decompression structure (below) */
-   png_bytep zbuf;            /* buffer for zlib */
-   uInt zbuf_size;            /* size of zbuf (typically 65536) */
-#ifdef PNG_WRITE_SUPPORTED
-
-   /* Added in 1.5.4: state to keep track of whether the zstream has been
-    * initialized and if so whether it is for IDAT or some other chunk.
-    */
-#define PNG_ZLIB_UNINITIALIZED 0
-#define PNG_ZLIB_FOR_IDAT      1
-#define PNG_ZLIB_FOR_TEXT      2 /* anything other than IDAT */
-#define PNG_ZLIB_USE_MASK      3 /* bottom two bits */
-#define PNG_ZLIB_IN_USE        4 /* a flag value */
-
-   png_uint_32 zlib_state;       /* State of zlib initialization */
-   /* End of material added at libpng 1.5.4 */
-
-   int zlib_level;            /* holds zlib compression level */
-   int zlib_method;           /* holds zlib compression method */
-   int zlib_window_bits;      /* holds zlib compression window bits */
-   int zlib_mem_level;        /* holds zlib compression memory level */
-   int zlib_strategy;         /* holds zlib compression strategy */
-#endif
-   /* Added at libpng 1.5.4 */
-#if defined(PNG_WRITE_COMPRESSED_TEXT_SUPPORTED) || \
-    defined(PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED)
-   int zlib_text_level;            /* holds zlib compression level */
-   int zlib_text_method;           /* holds zlib compression method */
-   int zlib_text_window_bits;      /* holds zlib compression window bits */
-   int zlib_text_mem_level;        /* holds zlib compression memory level */
-   int zlib_text_strategy;         /* holds zlib compression strategy */
-#endif
-   /* End of material added at libpng 1.5.4 */
-
-   png_uint_32 width;         /* width of image in pixels */
-   png_uint_32 height;        /* height of image in pixels */
-   png_uint_32 num_rows;      /* number of rows in current pass */
-   png_uint_32 usr_width;     /* width of row at start of write */
-   png_size_t rowbytes;       /* size of row in bytes */
-   png_uint_32 iwidth;        /* width of current interlaced row in pixels */
-   png_uint_32 row_number;    /* current row in interlace pass */
-   png_uint_32 chunk_name;    /* PNG_CHUNK() id of current chunk */
-   png_bytep prev_row;        /* buffer to save previous (unfiltered) row.
-                               * This is a pointer into big_prev_row
-                               */
-   png_bytep row_buf;         /* buffer to save current (unfiltered) row.
-                               * This is a pointer into big_row_buf
-                               */
-   png_bytep sub_row;         /* buffer to save "sub" row when filtering */
-   png_bytep up_row;          /* buffer to save "up" row when filtering */
-   png_bytep avg_row;         /* buffer to save "avg" row when filtering */
-   png_bytep paeth_row;       /* buffer to save "Paeth" row when filtering */
-   png_size_t info_rowbytes;  /* Added in 1.5.4: cache of updated row bytes */
-
-   png_uint_32 idat_size;     /* current IDAT size for read */
-   png_uint_32 crc;           /* current chunk CRC value */
-   png_colorp palette;        /* palette from the input file */
-   png_uint_16 num_palette;   /* number of color entries in palette */
-
-   /* Added at libpng-1.5.10 */
-#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   int num_palette_max;       /* maximum palette index found in IDAT */
-#endif
-
-   png_uint_16 num_trans;     /* number of transparency values */
-   png_byte compression;      /* file compression type (always 0) */
-   png_byte filter;           /* file filter type (always 0) */
-   png_byte interlaced;       /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
-   png_byte pass;             /* current interlace pass (0 - 6) */
-   png_byte do_filter;        /* row filter flags (see PNG_FILTER_ below ) */
-   png_byte color_type;       /* color type of file */
-   png_byte bit_depth;        /* bit depth of file */
-   png_byte usr_bit_depth;    /* bit depth of users row: write only */
-   png_byte pixel_depth;      /* number of bits per pixel */
-   png_byte channels;         /* number of channels in file */
-   png_byte usr_channels;     /* channels at start of write: write only */
-   png_byte sig_bytes;        /* magic bytes read/written from start of file */
-   png_byte maximum_pixel_depth;
-   /* pixel depth used for the row buffers */
-   png_byte transformed_pixel_depth;
-   /* pixel depth after read/write transforms */
-   png_byte io_chunk_string[5];
-   /* string name of chunk */
-
-#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
-   png_uint_16 filler;           /* filler bytes for pixel expansion */
-#endif
-
-#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   png_byte background_gamma_type;
-   png_fixed_point background_gamma;
-   png_color_16 background;   /* background color in screen gamma space */
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   png_color_16 background_1; /* background normalized to gamma 1.0 */
-#endif
-#endif /* PNG_bKGD_SUPPORTED */
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   png_flush_ptr output_flush_fn; /* Function for flushing output */
-   png_uint_32 flush_dist;    /* how many rows apart to flush, 0 - no flush */
-   png_uint_32 flush_rows;    /* number of rows written since last flush */
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   int gamma_shift;      /* number of "insignificant" bits in 16-bit gamma */
-   png_fixed_point gamma;        /* file gamma value */
-   png_fixed_point screen_gamma; /* screen gamma value (display_exponent) */
-
-   png_bytep gamma_table;     /* gamma table for 8-bit depth files */
-   png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-   png_bytep gamma_from_1;    /* converts from 1.0 to screen */
-   png_bytep gamma_to_1;      /* converts from file to 1.0 */
-   png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */
-   png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
-   png_color_8 sig_bit;       /* significant bits in each available channel */
-#endif
-
-#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
-   png_color_8 shift;         /* shift for significant bit tranformation */
-#endif
-
-#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
- || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   png_bytep trans_alpha;           /* alpha values for paletted files */
-   png_color_16 trans_color;  /* transparent color for non-paletted files */
-#endif
-
-   png_read_status_ptr read_row_fn;   /* called after each row is decoded */
-   png_write_status_ptr write_row_fn; /* called after each row is encoded */
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   png_progressive_info_ptr info_fn; /* called after header data fully read */
-   png_progressive_row_ptr row_fn;   /* called after a prog. row is decoded */
-   png_progressive_end_ptr end_fn;   /* called after image is complete */
-   png_bytep save_buffer_ptr;        /* current location in save_buffer */
-   png_bytep save_buffer;            /* buffer for previously read data */
-   png_bytep current_buffer_ptr;     /* current location in current_buffer */
-   png_bytep current_buffer;         /* buffer for recently used data */
-   png_uint_32 push_length;          /* size of current input chunk */
-   png_uint_32 skip_length;          /* bytes to skip in input data */
-   png_size_t save_buffer_size;      /* amount of data now in save_buffer */
-   png_size_t save_buffer_max;       /* total size of save_buffer */
-   png_size_t buffer_size;           /* total amount of available input data */
-   png_size_t current_buffer_size;   /* amount of data now in current_buffer */
-   int process_mode;                 /* what push library is currently doing */
-   int cur_palette;                  /* current push library palette index */
-
-#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
-
-#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
-   /* For the Borland special 64K segment handler */
-   png_bytepp offset_table_ptr;
-   png_bytep offset_table;
-   png_uint_16 offset_table_number;
-   png_uint_16 offset_table_count;
-   png_uint_16 offset_table_count_free;
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   png_bytep palette_lookup; /* lookup table for quantizing */
-   png_bytep quantize_index; /* index translation for palette files */
-#endif
-
-#if defined(PNG_READ_QUANTIZE_SUPPORTED) || defined(PNG_hIST_SUPPORTED)
-   png_uint_16p hist;                /* histogram */
-#endif
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-   png_byte heuristic_method;        /* heuristic for row filter selection */
-   png_byte num_prev_filters;        /* number of weights for previous rows */
-   png_bytep prev_filters;           /* filter type(s) of previous row(s) */
-   png_uint_16p filter_weights;      /* weight(s) for previous line(s) */
-   png_uint_16p inv_filter_weights;  /* 1/weight(s) for previous line(s) */
-   png_uint_16p filter_costs;        /* relative filter calculation cost */
-   png_uint_16p inv_filter_costs;    /* 1/relative filter calculation cost */
-#endif
-
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-   char time_buffer[29]; /* String to hold RFC 1123 time text */
-#endif
-
-   /* New members added in libpng-1.0.6 */
-
-   png_uint_32 free_me;    /* flags items libpng is responsible for freeing */
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-   png_voidp user_chunk_ptr;
-   png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */
-#endif
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   int num_chunk_list;
-   png_bytep chunk_list;
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-   /* Added in 1.5.5 to record an sRGB chunk in the png. */
-   png_byte is_sRGB;
-#endif
-
-   /* New members added in libpng-1.0.3 */
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   png_byte rgb_to_gray_status;
-   /* Added in libpng 1.5.5 to record setting of coefficients: */
-   png_byte rgb_to_gray_coefficients_set;
-   /* These were changed from png_byte in libpng-1.0.6 */
-   png_uint_16 rgb_to_gray_red_coeff;
-   png_uint_16 rgb_to_gray_green_coeff;
-   /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */
-#endif
-
-   /* New member added in libpng-1.0.4 (renamed in 1.0.9) */
-#if defined(PNG_MNG_FEATURES_SUPPORTED)
-   /* Changed from png_byte to png_uint_32 at version 1.2.0 */
-   png_uint_32 mng_features_permitted;
-#endif
-
-   /* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_byte filter_type;
-#endif
-
-   /* New members added in libpng-1.2.0 */
-
-   /* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_voidp mem_ptr;             /* user supplied struct for mem functions */
-   png_malloc_ptr malloc_fn;      /* function for allocating memory */
-   png_free_ptr free_fn;          /* function for freeing memory */
-#endif
-
-   /* New member added in libpng-1.0.13 and 1.2.0 */
-   png_bytep big_row_buf;         /* buffer to save current (unfiltered) row */
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   /* The following three members were added at version 1.0.14 and 1.2.4 */
-   png_bytep quantize_sort;          /* working sort array */
-   png_bytep index_to_palette;       /* where the original index currently is
-                                        in the palette */
-   png_bytep palette_to_index;       /* which original index points to this
-                                         palette color */
-#endif
-
-   /* New members added in libpng-1.0.16 and 1.2.6 */
-   png_byte compression_type;
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   png_uint_32 user_width_max;
-   png_uint_32 user_height_max;
-
-   /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown
-    * chunks that can be stored (0 means unlimited).
-    */
-   png_uint_32 user_chunk_cache_max;
-
-   /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk
-    * can occupy when decompressed.  0 means unlimited.
-    */
-   png_alloc_size_t user_chunk_malloc_max;
-#endif
-
-   /* New member added in libpng-1.0.25 and 1.2.17 */
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   /* Storage for unknown chunk that the library doesn't recognize. */
-   png_unknown_chunk unknown_chunk;
-#endif
-
-   /* New member added in libpng-1.2.26 */
-   png_size_t old_big_row_buf_size;
-
-   /* New member added in libpng-1.2.30 */
-   png_charp chunkdata;  /* buffer for reading chunk data */
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* New member added in libpng-1.4.0 */
-   png_uint_32 io_state;
-#endif
-
-   /* New member added in libpng-1.5.6 */
-   png_bytep big_prev_row;
-
-   void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row);
-};
+
+/* pngstruct.h - header file for PNG reference library
+ *
+ * Copyright (c) 2018-2022 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+/* The structure that holds the information to read and write PNG files.
+ * The only people who need to care about what is inside of this are the
+ * people who will be modifying the library for their own special needs.
+ * It should NOT be accessed directly by an application.
+ */
+
+#ifndef PNGSTRUCT_H
+#define PNGSTRUCT_H
+/* zlib.h defines the structure z_stream, an instance of which is included
+ * in this structure and is required for decompressing the LZ compressed
+ * data in PNG files.
+ */
+#ifndef ZLIB_CONST
+   /* We must ensure that zlib uses 'const' in declarations. */
+#  define ZLIB_CONST
+#endif
+#include "zlib.h"
+#ifdef const
+   /* zlib.h sometimes #defines const to nothing, undo this. */
+#  undef const
+#endif
+
+/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility
+ * with older builds.
+ */
+#if ZLIB_VERNUM < 0x1260
+#  define PNGZ_MSG_CAST(s) png_constcast(char*,s)
+#  define PNGZ_INPUT_CAST(b) png_constcast(png_bytep,b)
+#else
+#  define PNGZ_MSG_CAST(s) (s)
+#  define PNGZ_INPUT_CAST(b) (b)
+#endif
+
+/* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib
+ * can handle at once.  This type need be no larger than 16 bits (so maximum of
+ * 65535), this define allows us to discover how big it is, but limited by the
+ * maximum for size_t.  The value can be overridden in a library build
+ * (pngusr.h, or set it in CPPFLAGS) and it works to set it to a considerably
+ * lower value (e.g. 255 works).  A lower value may help memory usage (slightly)
+ * and may even improve performance on some systems (and degrade it on others.)
+ */
+#ifndef ZLIB_IO_MAX
+#  define ZLIB_IO_MAX ((uInt)-1)
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+/* The type of a compression buffer list used by the write code. */
+typedef struct png_compression_buffer
+{
+   struct png_compression_buffer *next;
+   png_byte                       output[1]; /* actually zbuf_size */
+} png_compression_buffer, *png_compression_bufferp;
+
+#define PNG_COMPRESSION_BUFFER_SIZE(pp)\
+   (offsetof(png_compression_buffer, output) + (pp)->zbuffer_size)
+#endif
+
+/* Colorspace support; structures used in png_struct, png_info and in internal
+ * functions to hold and communicate information about the color space.
+ *
+ * PNG_COLORSPACE_SUPPORTED is only required if the application will perform
+ * colorspace corrections, otherwise all the colorspace information can be
+ * skipped and the size of libpng can be reduced (significantly) by compiling
+ * out the colorspace support.
+ */
+#ifdef PNG_COLORSPACE_SUPPORTED
+/* The chromaticities of the red, green and blue colorants and the chromaticity
+ * of the corresponding white point (i.e. of rgb(1.0,1.0,1.0)).
+ */
+typedef struct png_xy
+{
+   png_fixed_point redx, redy;
+   png_fixed_point greenx, greeny;
+   png_fixed_point bluex, bluey;
+   png_fixed_point whitex, whitey;
+} png_xy;
+
+/* The same data as above but encoded as CIE XYZ values.  When this data comes
+ * from chromaticities the sum of the Y values is assumed to be 1.0
+ */
+typedef struct png_XYZ
+{
+   png_fixed_point red_X, red_Y, red_Z;
+   png_fixed_point green_X, green_Y, green_Z;
+   png_fixed_point blue_X, blue_Y, blue_Z;
+} png_XYZ;
+#endif /* COLORSPACE */
+
+#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
+/* A colorspace is all the above plus, potentially, profile information;
+ * however at present libpng does not use the profile internally so it is only
+ * stored in the png_info struct (if iCCP is supported.)  The rendering intent
+ * is retained here and is checked.
+ *
+ * The file gamma encoding information is also stored here and gamma correction
+ * is done by libpng, whereas color correction must currently be done by the
+ * application.
+ */
+typedef struct png_colorspace
+{
+#ifdef PNG_GAMMA_SUPPORTED
+   png_fixed_point gamma;        /* File gamma */
+#endif
+
+#ifdef PNG_COLORSPACE_SUPPORTED
+   png_xy      end_points_xy;    /* End points as chromaticities */
+   png_XYZ     end_points_XYZ;   /* End points as CIE XYZ colorant values */
+   png_uint_16 rendering_intent; /* Rendering intent of a profile */
+#endif
+
+   /* Flags are always defined to simplify the code. */
+   png_uint_16 flags;            /* As defined below */
+} png_colorspace, * PNG_RESTRICT png_colorspacerp;
+
+typedef const png_colorspace * PNG_RESTRICT png_const_colorspacerp;
+
+/* General flags for the 'flags' field */
+#define PNG_COLORSPACE_HAVE_GAMMA           0x0001
+#define PNG_COLORSPACE_HAVE_ENDPOINTS       0x0002
+#define PNG_COLORSPACE_HAVE_INTENT          0x0004
+#define PNG_COLORSPACE_FROM_gAMA            0x0008
+#define PNG_COLORSPACE_FROM_cHRM            0x0010
+#define PNG_COLORSPACE_FROM_sRGB            0x0020
+#define PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB 0x0040
+#define PNG_COLORSPACE_MATCHES_sRGB         0x0080 /* exact match on profile */
+#define PNG_COLORSPACE_INVALID              0x8000
+#define PNG_COLORSPACE_CANCEL(flags)        (0xffff ^ (flags))
+#endif /* COLORSPACE || GAMMA */
+
+struct png_struct_def
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf jmp_buf_local;     /* New name in 1.6.0 for jmp_buf in png_struct */
+   png_longjmp_ptr longjmp_fn;/* setjmp non-local goto function. */
+   jmp_buf *jmp_buf_ptr;      /* passed to longjmp_fn */
+   size_t jmp_buf_size;       /* size of the above, if allocated */
+#endif
+   png_error_ptr error_fn;    /* function for printing errors and aborting */
+#ifdef PNG_WARNINGS_SUPPORTED
+   png_error_ptr warning_fn;  /* function for printing warnings */
+#endif
+   png_voidp error_ptr;       /* user supplied struct for error functions */
+   png_rw_ptr write_data_fn;  /* function for writing output data */
+   png_rw_ptr read_data_fn;   /* function for reading input data */
+   png_voidp io_ptr;          /* ptr to application struct for I/O functions */
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+   png_user_transform_ptr read_user_transform_fn; /* user read transform */
+#endif
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+   png_user_transform_ptr write_user_transform_fn; /* user write transform */
+#endif
+
+/* These were added in libpng-1.0.2 */
+#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_voidp user_transform_ptr; /* user supplied struct for user transform */
+   png_byte user_transform_depth;    /* bit depth of user transformed pixels */
+   png_byte user_transform_channels; /* channels in user transformed pixels */
+#endif
+#endif
+
+   png_uint_32 mode;          /* tells us where we are in the PNG file */
+   png_uint_32 flags;         /* flags indicating various things to libpng */
+   png_uint_32 transformations; /* which transformations to perform */
+
+   png_uint_32 zowner;        /* ID (chunk type) of zstream owner, 0 if none */
+   z_stream    zstream;       /* decompression structure */
+
+#ifdef PNG_WRITE_SUPPORTED
+   png_compression_bufferp zbuffer_list; /* Created on demand during write */
+   uInt                    zbuffer_size; /* size of the actual buffer */
+
+   int zlib_level;            /* holds zlib compression level */
+   int zlib_method;           /* holds zlib compression method */
+   int zlib_window_bits;      /* holds zlib compression window bits */
+   int zlib_mem_level;        /* holds zlib compression memory level */
+   int zlib_strategy;         /* holds zlib compression strategy */
+#endif
+/* Added at libpng 1.5.4 */
+#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
+   int zlib_text_level;            /* holds zlib compression level */
+   int zlib_text_method;           /* holds zlib compression method */
+   int zlib_text_window_bits;      /* holds zlib compression window bits */
+   int zlib_text_mem_level;        /* holds zlib compression memory level */
+   int zlib_text_strategy;         /* holds zlib compression strategy */
+#endif
+/* End of material added at libpng 1.5.4 */
+/* Added at libpng 1.6.0 */
+#ifdef PNG_WRITE_SUPPORTED
+   int zlib_set_level;        /* Actual values set into the zstream on write */
+   int zlib_set_method;
+   int zlib_set_window_bits;
+   int zlib_set_mem_level;
+   int zlib_set_strategy;
+#endif
+
+   png_uint_32 width;         /* width of image in pixels */
+   png_uint_32 height;        /* height of image in pixels */
+   png_uint_32 num_rows;      /* number of rows in current pass */
+   png_uint_32 usr_width;     /* width of row at start of write */
+   size_t rowbytes;           /* size of row in bytes */
+   png_uint_32 iwidth;        /* width of current interlaced row in pixels */
+   png_uint_32 row_number;    /* current row in interlace pass */
+   png_uint_32 chunk_name;    /* PNG_CHUNK() id of current chunk */
+   png_bytep prev_row;        /* buffer to save previous (unfiltered) row.
+                               * While reading this is a pointer into
+                               * big_prev_row; while writing it is separately
+                               * allocated if needed.
+                               */
+   png_bytep row_buf;         /* buffer to save current (unfiltered) row.
+                               * While reading, this is a pointer into
+                               * big_row_buf; while writing it is separately
+                               * allocated.
+                               */
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   png_bytep try_row;    /* buffer to save trial row when filtering */
+   png_bytep tst_row;    /* buffer to save best trial row when filtering */
+#endif
+   size_t info_rowbytes;      /* Added in 1.5.4: cache of updated row bytes */
+
+   png_uint_32 idat_size;     /* current IDAT size for read */
+   png_uint_32 crc;           /* current chunk CRC value */
+   png_colorp palette;        /* palette from the input file */
+   png_uint_16 num_palette;   /* number of color entries in palette */
+
+/* Added at libpng-1.5.10 */
+#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   int num_palette_max;       /* maximum palette index found in IDAT */
+#endif
+
+   png_uint_16 num_trans;     /* number of transparency values */
+   png_byte compression;      /* file compression type (always 0) */
+   png_byte filter;           /* file filter type (always 0) */
+   png_byte interlaced;       /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+   png_byte pass;             /* current interlace pass (0 - 6) */
+   png_byte do_filter;        /* row filter flags (see PNG_FILTER_ in png.h ) */
+   png_byte color_type;       /* color type of file */
+   png_byte bit_depth;        /* bit depth of file */
+   png_byte usr_bit_depth;    /* bit depth of users row: write only */
+   png_byte pixel_depth;      /* number of bits per pixel */
+   png_byte channels;         /* number of channels in file */
+#ifdef PNG_WRITE_SUPPORTED
+   png_byte usr_channels;     /* channels at start of write: write only */
+#endif
+   png_byte sig_bytes;        /* magic bytes read/written from start of file */
+   png_byte maximum_pixel_depth;
+                              /* pixel depth used for the row buffers */
+   png_byte transformed_pixel_depth;
+                              /* pixel depth after read/write transforms */
+#if ZLIB_VERNUM >= 0x1240
+   png_byte zstream_start;    /* at start of an input zlib stream */
+#endif /* Zlib >= 1.2.4 */
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+   png_uint_16 filler;           /* filler bytes for pixel expansion */
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED)
+   png_byte background_gamma_type;
+   png_fixed_point background_gamma;
+   png_color_16 background;   /* background color in screen gamma space */
+#ifdef PNG_READ_GAMMA_SUPPORTED
+   png_color_16 background_1; /* background normalized to gamma 1.0 */
+#endif
+#endif /* bKGD */
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+   png_flush_ptr output_flush_fn; /* Function for flushing output */
+   png_uint_32 flush_dist;    /* how many rows apart to flush, 0 - no flush */
+   png_uint_32 flush_rows;    /* number of rows written since last flush */
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+   int gamma_shift;      /* number of "insignificant" bits in 16-bit gamma */
+   png_fixed_point screen_gamma; /* screen gamma value (display_exponent) */
+
+   png_bytep gamma_table;     /* gamma table for 8-bit depth files */
+   png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   png_bytep gamma_from_1;    /* converts from 1.0 to screen */
+   png_bytep gamma_to_1;      /* converts from file to 1.0 */
+   png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */
+   png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */
+#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
+   png_color_8 sig_bit;       /* significant bits in each available channel */
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+   png_color_8 shift;         /* shift for significant bit transformation */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
+ || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep trans_alpha;           /* alpha values for paletted files */
+   png_color_16 trans_color;  /* transparent color for non-paletted files */
+#endif
+
+   png_read_status_ptr read_row_fn;   /* called after each row is decoded */
+   png_write_status_ptr write_row_fn; /* called after each row is encoded */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_progressive_info_ptr info_fn; /* called after header data fully read */
+   png_progressive_row_ptr row_fn;   /* called after a prog. row is decoded */
+   png_progressive_end_ptr end_fn;   /* called after image is complete */
+   png_bytep save_buffer_ptr;        /* current location in save_buffer */
+   png_bytep save_buffer;            /* buffer for previously read data */
+   png_bytep current_buffer_ptr;     /* current location in current_buffer */
+   png_bytep current_buffer;         /* buffer for recently used data */
+   png_uint_32 push_length;          /* size of current input chunk */
+   png_uint_32 skip_length;          /* bytes to skip in input data */
+   size_t save_buffer_size;          /* amount of data now in save_buffer */
+   size_t save_buffer_max;           /* total size of save_buffer */
+   size_t buffer_size;               /* total amount of available input data */
+   size_t current_buffer_size;       /* amount of data now in current_buffer */
+   int process_mode;                 /* what push library is currently doing */
+   int cur_palette;                  /* current push library palette index */
+#endif /* PROGRESSIVE_READ */
+
+#ifdef PNG_READ_QUANTIZE_SUPPORTED
+   png_bytep palette_lookup; /* lookup table for quantizing */
+   png_bytep quantize_index; /* index translation for palette files */
+#endif
+
+/* Options */
+#ifdef PNG_SET_OPTION_SUPPORTED
+   png_uint_32 options;           /* On/off state (up to 16 options) */
+#endif
+
+#if PNG_LIBPNG_VER < 10700
+/* To do: remove this from libpng-1.7 */
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+   char time_buffer[29]; /* String to hold RFC 1123 time text */
+#endif
+#endif
+
+/* New members added in libpng-1.0.6 */
+
+   png_uint_32 free_me;    /* flags items libpng is responsible for freeing */
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+   png_voidp user_chunk_ptr;
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+   png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */
+#endif
+#endif
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+   int          unknown_default; /* As PNG_HANDLE_* */
+   unsigned int num_chunk_list;  /* Number of entries in the list */
+   png_bytep    chunk_list;      /* List of png_byte[5]; the textual chunk name
+                                  * followed by a PNG_HANDLE_* byte */
+#endif
+
+/* New members added in libpng-1.0.3 */
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+   png_byte rgb_to_gray_status;
+   /* Added in libpng 1.5.5 to record setting of coefficients: */
+   png_byte rgb_to_gray_coefficients_set;
+   /* These were changed from png_byte in libpng-1.0.6 */
+   png_uint_16 rgb_to_gray_red_coeff;
+   png_uint_16 rgb_to_gray_green_coeff;
+   /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */
+#endif
+
+/* New member added in libpng-1.6.36 */
+#if defined(PNG_READ_EXPAND_SUPPORTED) && \
+    defined(PNG_ARM_NEON_IMPLEMENTATION)
+   png_bytep riffled_palette; /* buffer for accelerated palette expansion */
+#endif
+
+/* New member added in libpng-1.0.4 (renamed in 1.0.9) */
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+/* Changed from png_byte to png_uint_32 at version 1.2.0 */
+   png_uint_32 mng_features_permitted;
+#endif
+
+/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   png_byte filter_type;
+#endif
+
+/* New members added in libpng-1.2.0 */
+
+/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_voidp mem_ptr;             /* user supplied struct for mem functions */
+   png_malloc_ptr malloc_fn;      /* function for allocating memory */
+   png_free_ptr free_fn;          /* function for freeing memory */
+#endif
+
+/* New member added in libpng-1.0.13 and 1.2.0 */
+   png_bytep big_row_buf;         /* buffer to save current (unfiltered) row */
+
+#ifdef PNG_READ_QUANTIZE_SUPPORTED
+/* The following three members were added at version 1.0.14 and 1.2.4 */
+   png_bytep quantize_sort;          /* working sort array */
+   png_bytep index_to_palette;       /* where the original index currently is
+                                        in the palette */
+   png_bytep palette_to_index;       /* which original index points to this
+                                         palette color */
+#endif
+
+/* New members added in libpng-1.0.16 and 1.2.6 */
+   png_byte compression_type;
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   png_uint_32 user_width_max;
+   png_uint_32 user_height_max;
+
+   /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown
+    * chunks that can be stored (0 means unlimited).
+    */
+   png_uint_32 user_chunk_cache_max;
+
+   /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk
+    * can occupy when decompressed.  0 means unlimited.
+    */
+   png_alloc_size_t user_chunk_malloc_max;
+#endif
+
+/* New member added in libpng-1.0.25 and 1.2.17 */
+#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+   /* Temporary storage for unknown chunk that the library doesn't recognize,
+    * used while reading the chunk.
+    */
+   png_unknown_chunk unknown_chunk;
+#endif
+
+/* New member added in libpng-1.2.26 */
+   size_t old_big_row_buf_size;
+
+#ifdef PNG_READ_SUPPORTED
+/* New member added in libpng-1.2.30 */
+  png_bytep        read_buffer;      /* buffer for reading chunk data */
+  png_alloc_size_t read_buffer_size; /* current size of the buffer */
+#endif
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+  uInt             IDAT_read_size;   /* limit on read buffer size for IDAT */
+#endif
+
+#ifdef PNG_IO_STATE_SUPPORTED
+/* New member added in libpng-1.4.0 */
+   png_uint_32 io_state;
+#endif
+
+/* New member added in libpng-1.5.6 */
+   png_bytep big_prev_row;
+
+/* New member added in libpng-1.5.7 */
+   void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
+      png_bytep row, png_const_bytep prev_row);
+
+#ifdef PNG_READ_SUPPORTED
+#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED)
+   png_colorspace   colorspace;
+#endif
+#endif
+};
+#endif /* PNGSTRUCT_H */
diff --git a/reg-io/png/lpng/pngtest.c b/reg-io/png/lpng/pngtest.c
new file mode 100644
index 00000000..cc3e6e9a
--- /dev/null
+++ b/reg-io/png/lpng/pngtest.c
@@ -0,0 +1,2158 @@
+
+/* pngtest.c - a simple test program to test libpng
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * This program reads in a PNG image, writes it out again, and then
+ * compares the two files.  If the files are identical, this shows that
+ * the basic chunk handling, filtering, and (de)compression code is working
+ * properly.  It does not currently test all of the transforms, although
+ * it probably should.
+ *
+ * The program will report "FAIL" in certain legitimate cases:
+ * 1) when the compression level or filter selection method is changed.
+ * 2) when the maximum IDAT size (PNG_ZBUF_SIZE in pngconf.h) is not 8192.
+ * 3) unknown unsafe-to-copy ancillary chunks or unknown critical chunks
+ *    exist in the input file.
+ * 4) others not listed here...
+ * In these cases, it is best to check with another tool such as "pngcheck"
+ * to see what the differences between the two files are.
+ *
+ * If a filename is given on the command-line, then this file is used
+ * for the input, rather than the default "pngtest.png".  This allows
+ * testing a wide variety of files easily.  You can also test a number
+ * of files at once by typing "pngtest -m file1.png file2.png ..."
+ */
+
+#define _POSIX_SOURCE 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Defined so I can write to a file on gui/windowing platforms */
+/*  #define STDERR stderr  */
+#define STDERR stdout   /* For DOS */
+
+#include "png.h"
+
+/* Known chunks that exist in pngtest.png must be supported or pngtest will fail
+ * simply as a result of re-ordering them.  This may be fixed in 1.7
+ *
+ * pngtest allocates a single row buffer for each row and overwrites it,
+ * therefore if the write side doesn't support the writing of interlaced images
+ * nothing can be done for an interlaced image (and the code below will fail
+ * horribly trying to write extra data after writing garbage).
+ */
+#if defined PNG_READ_SUPPORTED && /* else nothing can be done */\
+   defined PNG_READ_bKGD_SUPPORTED &&\
+   defined PNG_READ_cHRM_SUPPORTED &&\
+   defined PNG_READ_gAMA_SUPPORTED &&\
+   defined PNG_READ_oFFs_SUPPORTED &&\
+   defined PNG_READ_pCAL_SUPPORTED &&\
+   defined PNG_READ_pHYs_SUPPORTED &&\
+   defined PNG_READ_sBIT_SUPPORTED &&\
+   defined PNG_READ_sCAL_SUPPORTED &&\
+   defined PNG_READ_sRGB_SUPPORTED &&\
+   defined PNG_READ_sPLT_SUPPORTED &&\
+   defined PNG_READ_tEXt_SUPPORTED &&\
+   defined PNG_READ_tIME_SUPPORTED &&\
+   defined PNG_READ_zTXt_SUPPORTED &&\
+   (defined PNG_WRITE_INTERLACING_SUPPORTED || PNG_LIBPNG_VER >= 10700)
+
+#ifdef PNG_ZLIB_HEADER
+#  include PNG_ZLIB_HEADER /* defined by pnglibconf.h from 1.7 */
+#else
+#  include "zlib.h"
+#endif
+
+/* Copied from pngpriv.h but only used in error messages below. */
+#ifndef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 8192
+#endif
+#define FCLOSE(file) fclose(file)
+
+#ifndef PNG_STDIO_SUPPORTED
+typedef FILE                * png_FILE_p;
+#endif
+
+/* Makes pngtest verbose so we can find problems. */
+#ifndef PNG_DEBUG
+#  define PNG_DEBUG 0
+#endif
+
+#if PNG_DEBUG > 1
+#  define pngtest_debug(m)        ((void)fprintf(stderr, m "\n"))
+#  define pngtest_debug1(m,p1)    ((void)fprintf(stderr, m "\n", p1))
+#  define pngtest_debug2(m,p1,p2) ((void)fprintf(stderr, m "\n", p1, p2))
+#else
+#  define pngtest_debug(m)        ((void)0)
+#  define pngtest_debug1(m,p1)    ((void)0)
+#  define pngtest_debug2(m,p1,p2) ((void)0)
+#endif
+
+#if !PNG_DEBUG
+#  define SINGLE_ROWBUF_ALLOC  /* Makes buffer overruns easier to nail */
+#endif
+
+#ifndef PNG_UNUSED
+#  define PNG_UNUSED(param) (void)param;
+#endif
+
+/* Turn on CPU timing
+#define PNGTEST_TIMING
+*/
+
+#ifndef PNG_FLOATING_POINT_SUPPORTED
+#undef PNGTEST_TIMING
+#endif
+
+#ifdef PNGTEST_TIMING
+static float t_start, t_stop, t_decode, t_encode, t_misc;
+#include <time.h>
+#endif
+
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+#define PNG_tIME_STRING_LENGTH 29
+static int tIME_chunk_present = 0;
+static char tIME_string[PNG_tIME_STRING_LENGTH] = "tIME chunk is not present";
+
+#if PNG_LIBPNG_VER < 10619
+#define png_convert_to_rfc1123_buffer(ts, t) tIME_to_str(read_ptr, ts, t)
+
+static int
+tIME_to_str(png_structp png_ptr, png_charp ts, png_const_timep t)
+{
+   png_const_charp str = png_convert_to_rfc1123(png_ptr, t);
+
+   if (str == NULL)
+       return 0;
+
+   strcpy(ts, str);
+   return 1;
+}
+#endif /* older libpng */
+#endif
+
+static int verbose = 0;
+static int strict = 0;
+static int relaxed = 0;
+static int xfail = 0;
+static int unsupported_chunks = 0; /* chunk unsupported by libpng in input */
+static int error_count = 0; /* count calls to png_error */
+static int warning_count = 0; /* count calls to png_warning */
+
+/* Define png_jmpbuf() in case we are using a pre-1.0.6 version of libpng */
+#ifndef png_jmpbuf
+#  define png_jmpbuf(png_ptr) png_ptr->jmpbuf
+#endif
+
+/* Defines for unknown chunk handling if required. */
+#ifndef PNG_HANDLE_CHUNK_ALWAYS
+#  define PNG_HANDLE_CHUNK_ALWAYS       3
+#endif
+#ifndef PNG_HANDLE_CHUNK_IF_SAFE
+#  define PNG_HANDLE_CHUNK_IF_SAFE      2
+#endif
+
+/* Utility to save typing/errors, the argument must be a name */
+#define MEMZERO(var) ((void)memset(&var, 0, sizeof var))
+
+/* Example of using row callbacks to make a simple progress meter */
+static int status_pass = 1;
+static int status_dots_requested = 0;
+static int status_dots = 1;
+
+static void PNGCBAPI
+read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
+{
+   if (png_ptr == NULL || row_number > PNG_UINT_31_MAX)
+      return;
+
+   if (status_pass != pass)
+   {
+      fprintf(stdout, "\n Pass %d: ", pass);
+      status_pass = pass;
+      status_dots = 31;
+   }
+
+   status_dots--;
+
+   if (status_dots == 0)
+   {
+      fprintf(stdout, "\n         ");
+      status_dots=30;
+   }
+
+   fprintf(stdout, "r");
+}
+
+#ifdef PNG_WRITE_SUPPORTED
+static void PNGCBAPI
+write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
+{
+   if (png_ptr == NULL || row_number > PNG_UINT_31_MAX || pass > 7)
+      return;
+
+   fprintf(stdout, "w");
+}
+#endif
+
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+/* Example of using a user transform callback (doesn't do anything at present).
+ */
+static void PNGCBAPI
+read_user_callback(png_structp png_ptr, png_row_infop row_info, png_bytep data)
+{
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(row_info)
+   PNG_UNUSED(data)
+}
+#endif
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+/* Example of using user transform callback (we don't transform anything,
+ * but merely count the zero samples)
+ */
+
+static png_uint_32 zero_samples;
+
+static void PNGCBAPI
+count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data)
+{
+   png_bytep dp = data;
+   if (png_ptr == NULL)
+      return;
+
+   /* Contents of row_info:
+    *  png_uint_32 width      width of row
+    *  png_uint_32 rowbytes   number of bytes in row
+    *  png_byte color_type    color type of pixels
+    *  png_byte bit_depth     bit depth of samples
+    *  png_byte channels      number of channels (1-4)
+    *  png_byte pixel_depth   bits per pixel (depth*channels)
+    */
+
+   /* Counts the number of zero samples (or zero pixels if color_type is 3 */
+
+   if (row_info->color_type == 0 || row_info->color_type == 3)
+   {
+      int pos = 0;
+      png_uint_32 n, nstop;
+
+      for (n = 0, nstop=row_info->width; n<nstop; n++)
+      {
+         if (row_info->bit_depth == 1)
+         {
+            if (((*dp << pos++ ) & 0x80) == 0)
+               zero_samples++;
+
+            if (pos == 8)
+            {
+               pos = 0;
+               dp++;
+            }
+         }
+
+         if (row_info->bit_depth == 2)
+         {
+            if (((*dp << (pos+=2)) & 0xc0) == 0)
+               zero_samples++;
+
+            if (pos == 8)
+            {
+               pos = 0;
+               dp++;
+            }
+         }
+
+         if (row_info->bit_depth == 4)
+         {
+            if (((*dp << (pos+=4)) & 0xf0) == 0)
+               zero_samples++;
+
+            if (pos == 8)
+            {
+               pos = 0;
+               dp++;
+            }
+         }
+
+         if (row_info->bit_depth == 8)
+            if (*dp++ == 0)
+               zero_samples++;
+
+         if (row_info->bit_depth == 16)
+         {
+            if ((*dp | *(dp+1)) == 0)
+               zero_samples++;
+            dp+=2;
+         }
+      }
+   }
+   else /* Other color types */
+   {
+      png_uint_32 n, nstop;
+      int channel;
+      int color_channels = row_info->channels;
+      if (row_info->color_type > 3)
+         color_channels--;
+
+      for (n = 0, nstop=row_info->width; n<nstop; n++)
+      {
+         for (channel = 0; channel < color_channels; channel++)
+         {
+            if (row_info->bit_depth == 8)
+               if (*dp++ == 0)
+                  zero_samples++;
+
+            if (row_info->bit_depth == 16)
+            {
+               if ((*dp | *(dp+1)) == 0)
+                  zero_samples++;
+
+               dp+=2;
+            }
+         }
+         if (row_info->color_type > 3)
+         {
+            dp++;
+            if (row_info->bit_depth == 16)
+               dp++;
+         }
+      }
+   }
+}
+#endif /* WRITE_USER_TRANSFORM */
+
+#ifndef PNG_STDIO_SUPPORTED
+/* START of code to validate stdio-free compilation */
+/* These copies of the default read/write functions come from pngrio.c and
+ * pngwio.c.  They allow "don't include stdio" testing of the library.
+ * This is the function that does the actual reading of data.  If you are
+ * not reading from a standard C stream, you should create a replacement
+ * read_data function and use it at run time with png_set_read_fn(), rather
+ * than changing the library.
+ */
+
+#ifdef PNG_IO_STATE_SUPPORTED
+void
+pngtest_check_io_state(png_structp png_ptr, size_t data_length,
+    png_uint_32 io_op);
+void
+pngtest_check_io_state(png_structp png_ptr, size_t data_length,
+    png_uint_32 io_op)
+{
+   png_uint_32 io_state = png_get_io_state(png_ptr);
+   int err = 0;
+
+   /* Check if the current operation (reading / writing) is as expected. */
+   if ((io_state & PNG_IO_MASK_OP) != io_op)
+      png_error(png_ptr, "Incorrect operation in I/O state");
+
+   /* Check if the buffer size specific to the current location
+    * (file signature / header / data / crc) is as expected.
+    */
+   switch (io_state & PNG_IO_MASK_LOC)
+   {
+   case PNG_IO_SIGNATURE:
+      if (data_length > 8)
+         err = 1;
+      break;
+   case PNG_IO_CHUNK_HDR:
+      if (data_length != 8)
+         err = 1;
+      break;
+   case PNG_IO_CHUNK_DATA:
+      break;  /* no restrictions here */
+   case PNG_IO_CHUNK_CRC:
+      if (data_length != 4)
+         err = 1;
+      break;
+   default:
+      err = 1;  /* uninitialized */
+   }
+   if (err != 0)
+      png_error(png_ptr, "Bad I/O state or buffer size");
+}
+#endif
+
+static void PNGCBAPI
+pngtest_read_data(png_structp png_ptr, png_bytep data, size_t length)
+{
+   size_t check = 0;
+   png_voidp io_ptr;
+
+   /* fread() returns 0 on error, so it is OK to store this in a size_t
+    * instead of an int, which is what fread() actually returns.
+    */
+   io_ptr = png_get_io_ptr(png_ptr);
+   if (io_ptr != NULL)
+   {
+      check = fread(data, 1, length, (png_FILE_p)io_ptr);
+   }
+
+   if (check != length)
+   {
+      png_error(png_ptr, "Read Error");
+   }
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   pngtest_check_io_state(png_ptr, length, PNG_IO_READING);
+#endif
+}
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+static void PNGCBAPI
+pngtest_flush(png_structp png_ptr)
+{
+   /* Do nothing; fflush() is said to be just a waste of energy. */
+   PNG_UNUSED(png_ptr)   /* Stifle compiler warning */
+}
+#endif
+
+/* This is the function that does the actual writing of data.  If you are
+ * not writing to a standard C stream, you should create a replacement
+ * write_data function and use it at run time with png_set_write_fn(), rather
+ * than changing the library.
+ */
+static void PNGCBAPI
+pngtest_write_data(png_structp png_ptr, png_bytep data, size_t length)
+{
+   size_t check;
+
+   check = fwrite(data, 1, length, (png_FILE_p)png_get_io_ptr(png_ptr));
+
+   if (check != length)
+   {
+      png_error(png_ptr, "Write Error");
+   }
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING);
+#endif
+}
+#endif /* !STDIO */
+
+/* This function is called when there is a warning, but the library thinks
+ * it can continue anyway.  Replacement functions don't have to do anything
+ * here if you don't want to.  In the default configuration, png_ptr is
+ * not used, but it is passed in case it may be useful.
+ */
+typedef struct
+{
+   const char *file_name;
+}  pngtest_error_parameters;
+
+static void PNGCBAPI
+pngtest_warning(png_structp png_ptr, png_const_charp message)
+{
+   const char *name = "UNKNOWN (ERROR!)";
+   pngtest_error_parameters *test =
+      (pngtest_error_parameters*)png_get_error_ptr(png_ptr);
+
+   ++warning_count;
+
+   if (test != NULL && test->file_name != NULL)
+      name = test->file_name;
+
+   fprintf(STDERR, "\n%s: libpng warning: %s\n", name, message);
+}
+
+/* This is the default error handling function.  Note that replacements for
+ * this function MUST NOT RETURN, or the program will likely crash.  This
+ * function is used by default, or if the program supplies NULL for the
+ * error function pointer in png_set_error_fn().
+ */
+static void PNGCBAPI
+pngtest_error(png_structp png_ptr, png_const_charp message)
+{
+   ++error_count;
+
+   pngtest_warning(png_ptr, message);
+   /* We can return because png_error calls the default handler, which is
+    * actually OK in this case.
+    */
+}
+
+/* END of code to validate stdio-free compilation */
+
+/* START of code to validate memory allocation and deallocation */
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+
+/* Allocate memory.  For reasonable files, size should never exceed
+ * 64K.  However, zlib may allocate more than 64K if you don't tell
+ * it not to.  See zconf.h and png.h for more information.  zlib does
+ * need to allocate exactly 64K, so whatever you call here must
+ * have the ability to do that.
+ *
+ * This piece of code can be compiled to validate max 64K allocations
+ * by setting MAXSEG_64K in zlib zconf.h *or* PNG_MAX_MALLOC_64K.
+ */
+typedef struct memory_information
+{
+   png_alloc_size_t          size;
+   png_voidp                 pointer;
+   struct memory_information *next;
+} memory_information;
+typedef memory_information *memory_infop;
+
+static memory_infop pinformation = NULL;
+static int current_allocation = 0;
+static int maximum_allocation = 0;
+static int total_allocation = 0;
+static int num_allocations = 0;
+
+png_voidp PNGCBAPI png_debug_malloc PNGARG((png_structp png_ptr,
+    png_alloc_size_t size));
+void PNGCBAPI png_debug_free PNGARG((png_structp png_ptr, png_voidp ptr));
+
+png_voidp
+PNGCBAPI png_debug_malloc(png_structp png_ptr, png_alloc_size_t size)
+{
+
+   /* png_malloc has already tested for NULL; png_create_struct calls
+    * png_debug_malloc directly, with png_ptr == NULL which is OK
+    */
+
+   if (size == 0)
+      return NULL;
+
+   /* This calls the library allocator twice, once to get the requested
+      buffer and once to get a new free list entry. */
+   {
+      /* Disable malloc_fn and free_fn */
+      memory_infop pinfo;
+      png_set_mem_fn(png_ptr, NULL, NULL, NULL);
+      pinfo = (memory_infop)png_malloc(png_ptr,
+          (sizeof *pinfo));
+      pinfo->size = size;
+      current_allocation += size;
+      total_allocation += size;
+      num_allocations ++;
+
+      if (current_allocation > maximum_allocation)
+         maximum_allocation = current_allocation;
+
+      pinfo->pointer = png_malloc(png_ptr, size);
+      /* Restore malloc_fn and free_fn */
+
+      png_set_mem_fn(png_ptr,
+          NULL, png_debug_malloc, png_debug_free);
+
+      if (size != 0 && pinfo->pointer == NULL)
+      {
+         current_allocation -= size;
+         total_allocation -= size;
+         png_error(png_ptr,
+           "out of memory in pngtest->png_debug_malloc");
+      }
+
+      pinfo->next = pinformation;
+      pinformation = pinfo;
+      /* Make sure the caller isn't assuming zeroed memory. */
+      memset(pinfo->pointer, 0xdd, pinfo->size);
+
+      if (verbose != 0)
+         printf("png_malloc %lu bytes at %p\n", (unsigned long)size,
+             pinfo->pointer);
+
+      return (png_voidp)pinfo->pointer;
+   }
+}
+
+/* Free a pointer.  It is removed from the list at the same time. */
+void PNGCBAPI
+png_debug_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL)
+      fprintf(STDERR, "NULL pointer to png_debug_free.\n");
+
+   if (ptr == 0)
+   {
+#if 0 /* This happens all the time. */
+      fprintf(STDERR, "WARNING: freeing NULL pointer\n");
+#endif
+      return;
+   }
+
+   /* Unlink the element from the list. */
+   if (pinformation != NULL)
+   {
+      memory_infop *ppinfo = &pinformation;
+
+      for (;;)
+      {
+         memory_infop pinfo = *ppinfo;
+
+         if (pinfo->pointer == ptr)
+         {
+            *ppinfo = pinfo->next;
+            current_allocation -= pinfo->size;
+            if (current_allocation < 0)
+               fprintf(STDERR, "Duplicate free of memory\n");
+            /* We must free the list element too, but first kill
+               the memory that is to be freed. */
+            memset(ptr, 0x55, pinfo->size);
+            free(pinfo);
+            pinfo = NULL;
+            break;
+         }
+
+         if (pinfo->next == NULL)
+         {
+            fprintf(STDERR, "Pointer %p not found\n", ptr);
+            break;
+         }
+
+         ppinfo = &pinfo->next;
+      }
+   }
+
+   /* Finally free the data. */
+   if (verbose != 0)
+      printf("Freeing %p\n", ptr);
+
+   if (ptr != NULL)
+      free(ptr);
+   ptr = NULL;
+}
+#endif /* USER_MEM && DEBUG */
+/* END of code to test memory allocation/deallocation */
+
+
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+/* Demonstration of user chunk support of the sTER and vpAg chunks */
+
+/* (sTER is a public chunk not yet known by libpng.  vpAg is a private
+chunk used in ImageMagick to store "virtual page" size).  */
+
+static struct user_chunk_data
+{
+   png_const_infop info_ptr;
+   png_uint_32     vpAg_width, vpAg_height;
+   png_byte        vpAg_units;
+   png_byte        sTER_mode;
+   int             location[2];
+}
+user_chunk_data;
+
+/* Used for location and order; zero means nothing. */
+#define have_sTER   0x01
+#define have_vpAg   0x02
+#define before_PLTE 0x10
+#define before_IDAT 0x20
+#define after_IDAT  0x40
+
+static void
+init_callback_info(png_const_infop info_ptr)
+{
+   MEMZERO(user_chunk_data);
+   user_chunk_data.info_ptr = info_ptr;
+}
+
+static int
+set_location(png_structp png_ptr, struct user_chunk_data *data, int what)
+{
+   int location;
+
+   if ((data->location[0] & what) != 0 || (data->location[1] & what) != 0)
+      return 0; /* already have one of these */
+
+   /* Find where we are (the code below zeroes info_ptr to indicate that the
+    * chunks before the first IDAT have been read.)
+    */
+   if (data->info_ptr == NULL) /* after IDAT */
+      location = what | after_IDAT;
+
+   else if (png_get_valid(png_ptr, data->info_ptr, PNG_INFO_PLTE) != 0)
+      location = what | before_IDAT;
+
+   else
+      location = what | before_PLTE;
+
+   if (data->location[0] == 0)
+      data->location[0] = location;
+
+   else
+      data->location[1] = location;
+
+   return 1; /* handled */
+}
+
+static int PNGCBAPI
+read_user_chunk_callback(png_struct *png_ptr, png_unknown_chunkp chunk)
+{
+   struct user_chunk_data *my_user_chunk_data =
+      (struct user_chunk_data*)png_get_user_chunk_ptr(png_ptr);
+
+   if (my_user_chunk_data == NULL)
+      png_error(png_ptr, "lost user chunk pointer");
+
+   /* Return one of the following:
+    *    return -n;  chunk had an error
+    *    return 0;   did not recognize
+    *    return n;   success
+    *
+    * The unknown chunk structure contains the chunk data:
+    * png_byte name[5];
+    * png_byte *data;
+    * size_t size;
+    *
+    * Note that libpng has already taken care of the CRC handling.
+    */
+
+   if (chunk->name[0] == 115 && chunk->name[1] ==  84 &&     /* s  T */
+       chunk->name[2] ==  69 && chunk->name[3] ==  82)       /* E  R */
+      {
+         /* Found sTER chunk */
+         if (chunk->size != 1)
+            return -1; /* Error return */
+
+         if (chunk->data[0] != 0 && chunk->data[0] != 1)
+            return -1;  /* Invalid mode */
+
+         if (set_location(png_ptr, my_user_chunk_data, have_sTER) != 0)
+         {
+            my_user_chunk_data->sTER_mode=chunk->data[0];
+            return 1;
+         }
+
+         else
+            return 0; /* duplicate sTER - give it to libpng */
+      }
+
+   if (chunk->name[0] != 118 || chunk->name[1] != 112 ||    /* v  p */
+       chunk->name[2] !=  65 || chunk->name[3] != 103)      /* A  g */
+      return 0; /* Did not recognize */
+
+   /* Found ImageMagick vpAg chunk */
+
+   if (chunk->size != 9)
+      return -1; /* Error return */
+
+   if (set_location(png_ptr, my_user_chunk_data, have_vpAg) == 0)
+      return 0;  /* duplicate vpAg */
+
+   my_user_chunk_data->vpAg_width = png_get_uint_31(png_ptr, chunk->data);
+   my_user_chunk_data->vpAg_height = png_get_uint_31(png_ptr, chunk->data + 4);
+   my_user_chunk_data->vpAg_units = chunk->data[8];
+
+   return 1;
+}
+
+#ifdef PNG_WRITE_SUPPORTED
+static void
+write_sTER_chunk(png_structp write_ptr)
+{
+   png_byte sTER[5] = {115,  84,  69,  82, '\0'};
+
+   if (verbose != 0)
+      fprintf(STDERR, "\n stereo mode = %d\n", user_chunk_data.sTER_mode);
+
+   png_write_chunk(write_ptr, sTER, &user_chunk_data.sTER_mode, 1);
+}
+
+static void
+write_vpAg_chunk(png_structp write_ptr)
+{
+   png_byte vpAg[5] = {118, 112,  65, 103, '\0'};
+
+   png_byte vpag_chunk_data[9];
+
+   if (verbose != 0)
+      fprintf(STDERR, " vpAg = %lu x %lu, units = %d\n",
+          (unsigned long)user_chunk_data.vpAg_width,
+          (unsigned long)user_chunk_data.vpAg_height,
+          user_chunk_data.vpAg_units);
+
+   png_save_uint_32(vpag_chunk_data, user_chunk_data.vpAg_width);
+   png_save_uint_32(vpag_chunk_data + 4, user_chunk_data.vpAg_height);
+   vpag_chunk_data[8] = user_chunk_data.vpAg_units;
+   png_write_chunk(write_ptr, vpAg, vpag_chunk_data, 9);
+}
+
+static void
+write_chunks(png_structp write_ptr, int location)
+{
+   int i;
+
+   /* Notice that this preserves the original chunk order, however chunks
+    * intercepted by the callback will be written *after* chunks passed to
+    * libpng.  This will actually reverse a pair of sTER chunks or a pair of
+    * vpAg chunks, resulting in an error later.  This is not worth worrying
+    * about - the chunks should not be duplicated!
+    */
+   for (i=0; i<2; ++i)
+   {
+      if (user_chunk_data.location[i] == (location | have_sTER))
+         write_sTER_chunk(write_ptr);
+
+      else if (user_chunk_data.location[i] == (location | have_vpAg))
+         write_vpAg_chunk(write_ptr);
+   }
+}
+#endif /* WRITE */
+#else /* !READ_USER_CHUNKS */
+#  define write_chunks(pp,loc) ((void)0)
+#endif
+/* END of code to demonstrate user chunk support */
+
+/* START of code to check that libpng has the required text support; this only
+ * checks for the write support because if read support is missing the chunk
+ * will simply not be reported back to pngtest.
+ */
+#ifdef PNG_TEXT_SUPPORTED
+static void
+pngtest_check_text_support(png_structp png_ptr, png_textp text_ptr,
+    int num_text)
+{
+   while (num_text > 0)
+   {
+      switch (text_ptr[--num_text].compression)
+      {
+         case PNG_TEXT_COMPRESSION_NONE:
+            break;
+
+         case PNG_TEXT_COMPRESSION_zTXt:
+#           ifndef PNG_WRITE_zTXt_SUPPORTED
+               ++unsupported_chunks;
+               /* In libpng 1.7 this now does an app-error, so stop it: */
+               text_ptr[num_text].compression = PNG_TEXT_COMPRESSION_NONE;
+#           endif
+            break;
+
+         case PNG_ITXT_COMPRESSION_NONE:
+         case PNG_ITXT_COMPRESSION_zTXt:
+#           ifndef PNG_WRITE_iTXt_SUPPORTED
+               ++unsupported_chunks;
+               text_ptr[num_text].compression = PNG_TEXT_COMPRESSION_NONE;
+#           endif
+            break;
+
+         default:
+            /* This is an error */
+            png_error(png_ptr, "invalid text chunk compression field");
+            break;
+      }
+   }
+}
+#endif
+/* END of code to check that libpng has the required text support */
+
+/* Test one file */
+static int
+test_one_file(const char *inname, const char *outname)
+{
+   static png_FILE_p fpin;
+   static png_FILE_p fpout;  /* "static" prevents setjmp corruption */
+   pngtest_error_parameters error_parameters;
+   png_structp read_ptr;
+   png_infop read_info_ptr, end_info_ptr;
+#ifdef PNG_WRITE_SUPPORTED
+   png_structp write_ptr;
+   png_infop write_info_ptr;
+   png_infop write_end_info_ptr;
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   int interlace_preserved = 1;
+#endif /* WRITE_FILTER */
+#else /* !WRITE */
+   png_structp write_ptr = NULL;
+   png_infop write_info_ptr = NULL;
+   png_infop write_end_info_ptr = NULL;
+#endif /* !WRITE */
+   png_bytep row_buf;
+   png_uint_32 y;
+   png_uint_32 width, height;
+   volatile int num_passes;
+   int pass;
+   int bit_depth, color_type;
+
+   row_buf = NULL;
+   error_parameters.file_name = inname;
+
+   if ((fpin = fopen(inname, "rb")) == NULL)
+   {
+      fprintf(STDERR, "Could not find input file %s\n", inname);
+      return 1;
+   }
+
+   if ((fpout = fopen(outname, "wb")) == NULL)
+   {
+      fprintf(STDERR, "Could not open output file %s\n", outname);
+      FCLOSE(fpin);
+      return 1;
+   }
+
+   pngtest_debug("Allocating read and write structures");
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+   read_ptr =
+       png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL,
+       NULL, NULL, NULL, png_debug_malloc, png_debug_free);
+#else
+   read_ptr =
+       png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+#endif
+   png_set_error_fn(read_ptr, &error_parameters, pngtest_error,
+       pngtest_warning);
+
+#ifdef PNG_WRITE_SUPPORTED
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+   write_ptr =
+       png_create_write_struct_2(PNG_LIBPNG_VER_STRING, NULL,
+       NULL, NULL, NULL, png_debug_malloc, png_debug_free);
+#else
+   write_ptr =
+       png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+#endif
+   png_set_error_fn(write_ptr, &error_parameters, pngtest_error,
+       pngtest_warning);
+#endif
+   pngtest_debug("Allocating read_info, write_info and end_info structures");
+   read_info_ptr = png_create_info_struct(read_ptr);
+   end_info_ptr = png_create_info_struct(read_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+   write_info_ptr = png_create_info_struct(write_ptr);
+   write_end_info_ptr = png_create_info_struct(write_ptr);
+#endif
+
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+   init_callback_info(read_info_ptr);
+   png_set_read_user_chunk_fn(read_ptr, &user_chunk_data,
+       read_user_chunk_callback);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   pngtest_debug("Setting jmpbuf for read struct");
+   if (setjmp(png_jmpbuf(read_ptr)))
+   {
+      fprintf(STDERR, "%s -> %s: libpng read error\n", inname, outname);
+      png_free(read_ptr, row_buf);
+      row_buf = NULL;
+      if (verbose != 0)
+        fprintf(STDERR, "   destroy read structs\n");
+      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+      if (verbose != 0)
+        fprintf(STDERR, "   destroy write structs\n");
+      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+      png_destroy_write_struct(&write_ptr, &write_info_ptr);
+#endif
+      FCLOSE(fpin);
+      FCLOSE(fpout);
+      return 1;
+   }
+
+#ifdef PNG_WRITE_SUPPORTED
+   pngtest_debug("Setting jmpbuf for write struct");
+
+   if (setjmp(png_jmpbuf(write_ptr)))
+   {
+      fprintf(STDERR, "%s -> %s: libpng write error\n", inname, outname);
+      png_free(read_ptr, row_buf);
+      row_buf = NULL;
+      if (verbose != 0)
+        fprintf(STDERR, "   destroying read structs\n");
+      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+      if (verbose != 0)
+        fprintf(STDERR, "   destroying write structs\n");
+      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+      png_destroy_write_struct(&write_ptr, &write_info_ptr);
+      FCLOSE(fpin);
+      FCLOSE(fpout);
+      return 1;
+   }
+#endif
+#endif
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+   if (strict != 0)
+   {
+      /* Treat png_benign_error() as errors on read */
+      png_set_benign_errors(read_ptr, 0);
+
+# ifdef PNG_WRITE_SUPPORTED
+      /* Treat them as errors on write */
+      png_set_benign_errors(write_ptr, 0);
+# endif
+
+      /* if strict is not set, then app warnings and errors are treated as
+       * warnings in release builds, but not in unstable builds; this can be
+       * changed with '--relaxed'.
+       */
+   }
+
+   else if (relaxed != 0)
+   {
+      /* Allow application (pngtest) errors and warnings to pass */
+      png_set_benign_errors(read_ptr, 1);
+
+      /* Turn off CRC checking while reading */
+      png_set_crc_action(read_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
+
+#ifdef PNG_IGNORE_ADLER32
+      /* Turn off ADLER32 checking while reading */
+      png_set_option(read_ptr, PNG_IGNORE_ADLER32, PNG_OPTION_ON);
+#endif
+
+# ifdef PNG_WRITE_SUPPORTED
+      png_set_benign_errors(write_ptr, 1);
+# endif
+
+   }
+#endif /* BENIGN_ERRORS */
+
+   pngtest_debug("Initializing input and output streams");
+#ifdef PNG_STDIO_SUPPORTED
+   png_init_io(read_ptr, fpin);
+#  ifdef PNG_WRITE_SUPPORTED
+   png_init_io(write_ptr, fpout);
+#  endif
+#else
+   png_set_read_fn(read_ptr, (png_voidp)fpin, pngtest_read_data);
+#  ifdef PNG_WRITE_SUPPORTED
+   png_set_write_fn(write_ptr, (png_voidp)fpout,  pngtest_write_data,
+#    ifdef PNG_WRITE_FLUSH_SUPPORTED
+       pngtest_flush);
+#    else
+       NULL);
+#    endif
+#  endif
+#endif
+
+   if (status_dots_requested == 1)
+   {
+#ifdef PNG_WRITE_SUPPORTED
+      png_set_write_status_fn(write_ptr, write_row_callback);
+#endif
+      png_set_read_status_fn(read_ptr, read_row_callback);
+   }
+
+   else
+   {
+#ifdef PNG_WRITE_SUPPORTED
+      png_set_write_status_fn(write_ptr, NULL);
+#endif
+      png_set_read_status_fn(read_ptr, NULL);
+   }
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+   png_set_read_user_transform_fn(read_ptr, read_user_callback);
+#endif
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+   zero_samples = 0;
+   png_set_write_user_transform_fn(write_ptr, count_zero_samples);
+#endif
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+   /* Preserve all the unknown chunks, if possible.  If this is disabled then,
+    * even if the png_{get,set}_unknown_chunks stuff is enabled, we can't use
+    * libpng to *save* the unknown chunks on read (because we can't switch the
+    * save option on!)
+    *
+    * Notice that if SET_UNKNOWN_CHUNKS is *not* supported read will discard all
+    * unknown chunks and write will write them all.
+    */
+#ifdef PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED
+   png_set_keep_unknown_chunks(read_ptr, PNG_HANDLE_CHUNK_ALWAYS,
+       NULL, 0);
+#endif
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+   png_set_keep_unknown_chunks(write_ptr, PNG_HANDLE_CHUNK_ALWAYS,
+       NULL, 0);
+#endif
+#endif
+
+   pngtest_debug("Reading info struct");
+   png_read_info(read_ptr, read_info_ptr);
+
+#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
+   /* This is a bit of a hack; there is no obvious way in the callback function
+    * to determine that the chunks before the first IDAT have been read, so
+    * remove the info_ptr (which is only used to determine position relative to
+    * PLTE) here to indicate that we are after the IDAT.
+    */
+   user_chunk_data.info_ptr = NULL;
+#endif
+
+   pngtest_debug("Transferring info struct");
+   {
+      int interlace_type, compression_type, filter_type;
+
+      if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth,
+          &color_type, &interlace_type, &compression_type, &filter_type) != 0)
+      {
+         png_set_IHDR(write_ptr, write_info_ptr, width, height, bit_depth,
+             color_type, interlace_type, compression_type, filter_type);
+         /* num_passes may not be available below if interlace support is not
+          * provided by libpng for both read and write.
+          */
+         switch (interlace_type)
+         {
+            case PNG_INTERLACE_NONE:
+               num_passes = 1;
+               break;
+
+            case PNG_INTERLACE_ADAM7:
+               num_passes = 7;
+               break;
+
+            default:
+               png_error(read_ptr, "invalid interlace type");
+               /*NOT REACHED*/
+         }
+      }
+
+      else
+         png_error(read_ptr, "png_get_IHDR failed");
+   }
+#ifdef PNG_FIXED_POINT_SUPPORTED
+#ifdef PNG_cHRM_SUPPORTED
+   {
+      png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
+          blue_y;
+
+      if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y,
+          &red_x, &red_y, &green_x, &green_y, &blue_x, &blue_y) != 0)
+      {
+         png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y, red_x,
+             red_y, green_x, green_y, blue_x, blue_y);
+      }
+   }
+#endif
+#ifdef PNG_gAMA_SUPPORTED
+   {
+      png_fixed_point gamma;
+
+      if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &gamma) != 0)
+         png_set_gAMA_fixed(write_ptr, write_info_ptr, gamma);
+   }
+#endif
+#else /* Use floating point versions */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+#ifdef PNG_cHRM_SUPPORTED
+   {
+      double white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
+          blue_y;
+
+      if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x,
+          &red_y, &green_x, &green_y, &blue_x, &blue_y) != 0)
+      {
+         png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x,
+             red_y, green_x, green_y, blue_x, blue_y);
+      }
+   }
+#endif
+#ifdef PNG_gAMA_SUPPORTED
+   {
+      double gamma;
+
+      if (png_get_gAMA(read_ptr, read_info_ptr, &gamma) != 0)
+         png_set_gAMA(write_ptr, write_info_ptr, gamma);
+   }
+#endif
+#endif /* Floating point */
+#endif /* Fixed point */
+#ifdef PNG_iCCP_SUPPORTED
+   {
+      png_charp name;
+      png_bytep profile;
+      png_uint_32 proflen;
+      int compression_type;
+
+      if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type,
+          &profile, &proflen) != 0)
+      {
+         png_set_iCCP(write_ptr, write_info_ptr, name, compression_type,
+             profile, proflen);
+      }
+   }
+#endif
+#ifdef PNG_sRGB_SUPPORTED
+   {
+      int intent;
+
+      if (png_get_sRGB(read_ptr, read_info_ptr, &intent) != 0)
+         png_set_sRGB(write_ptr, write_info_ptr, intent);
+   }
+#endif
+   {
+      png_colorp palette;
+      int num_palette;
+
+      if (png_get_PLTE(read_ptr, read_info_ptr, &palette, &num_palette) != 0)
+         png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette);
+   }
+#ifdef PNG_bKGD_SUPPORTED
+   {
+      png_color_16p background;
+
+      if (png_get_bKGD(read_ptr, read_info_ptr, &background) != 0)
+      {
+         png_set_bKGD(write_ptr, write_info_ptr, background);
+      }
+   }
+#endif
+#ifdef PNG_READ_eXIf_SUPPORTED
+   {
+      png_bytep exif=NULL;
+      png_uint_32 exif_length;
+
+      if (png_get_eXIf_1(read_ptr, read_info_ptr, &exif_length, &exif) != 0)
+      {
+         if (exif_length > 1)
+            fprintf(STDERR," eXIf type %c%c, %lu bytes\n",exif[0],exif[1],
+               (unsigned long)exif_length);
+# ifdef PNG_WRITE_eXIf_SUPPORTED
+         png_set_eXIf_1(write_ptr, write_info_ptr, exif_length, exif);
+# endif
+      }
+   }
+#endif
+#ifdef PNG_hIST_SUPPORTED
+   {
+      png_uint_16p hist;
+
+      if (png_get_hIST(read_ptr, read_info_ptr, &hist) != 0)
+         png_set_hIST(write_ptr, write_info_ptr, hist);
+   }
+#endif
+#ifdef PNG_oFFs_SUPPORTED
+   {
+      png_int_32 offset_x, offset_y;
+      int unit_type;
+
+      if (png_get_oFFs(read_ptr, read_info_ptr, &offset_x, &offset_y,
+          &unit_type) != 0)
+      {
+         png_set_oFFs(write_ptr, write_info_ptr, offset_x, offset_y, unit_type);
+      }
+   }
+#endif
+#ifdef PNG_pCAL_SUPPORTED
+   {
+      png_charp purpose, units;
+      png_charpp params;
+      png_int_32 X0, X1;
+      int type, nparams;
+
+      if (png_get_pCAL(read_ptr, read_info_ptr, &purpose, &X0, &X1, &type,
+          &nparams, &units, &params) != 0)
+      {
+         png_set_pCAL(write_ptr, write_info_ptr, purpose, X0, X1, type,
+             nparams, units, params);
+      }
+   }
+#endif
+#ifdef PNG_pHYs_SUPPORTED
+   {
+      png_uint_32 res_x, res_y;
+      int unit_type;
+
+      if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y,
+          &unit_type) != 0)
+         png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type);
+   }
+#endif
+#ifdef PNG_sBIT_SUPPORTED
+   {
+      png_color_8p sig_bit;
+
+      if (png_get_sBIT(read_ptr, read_info_ptr, &sig_bit) != 0)
+         png_set_sBIT(write_ptr, write_info_ptr, sig_bit);
+   }
+#endif
+#ifdef PNG_sCAL_SUPPORTED
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
+   defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)
+   {
+      int unit;
+      double scal_width, scal_height;
+
+      if (png_get_sCAL(read_ptr, read_info_ptr, &unit, &scal_width,
+          &scal_height) != 0)
+      {
+         png_set_sCAL(write_ptr, write_info_ptr, unit, scal_width, scal_height);
+      }
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   {
+      int unit;
+      png_charp scal_width, scal_height;
+
+      if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &scal_width,
+           &scal_height) != 0)
+      {
+         png_set_sCAL_s(write_ptr, write_info_ptr, unit, scal_width,
+             scal_height);
+      }
+   }
+#endif
+#endif
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+   {
+       png_sPLT_tp entries;
+
+       int num_entries = (int) png_get_sPLT(read_ptr, read_info_ptr, &entries);
+       if (num_entries)
+       {
+           png_set_sPLT(write_ptr, write_info_ptr, entries, num_entries);
+       }
+   }
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+   {
+      png_textp text_ptr;
+      int num_text;
+
+      if (png_get_text(read_ptr, read_info_ptr, &text_ptr, &num_text) > 0)
+      {
+         pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text);
+
+         pngtest_check_text_support(read_ptr, text_ptr, num_text);
+
+         if (verbose != 0)
+         {
+            int i;
+
+            fprintf(STDERR,"\n");
+            for (i=0; i<num_text; i++)
+            {
+               fprintf(STDERR,"   Text compression[%d]=%d\n",
+                   i, text_ptr[i].compression);
+            }
+         }
+
+         png_set_text(write_ptr, write_info_ptr, text_ptr, num_text);
+      }
+   }
+#endif
+#ifdef PNG_tIME_SUPPORTED
+   {
+      png_timep mod_time;
+
+      if (png_get_tIME(read_ptr, read_info_ptr, &mod_time) != 0)
+      {
+         png_set_tIME(write_ptr, write_info_ptr, mod_time);
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+         if (png_convert_to_rfc1123_buffer(tIME_string, mod_time) != 0)
+            tIME_string[(sizeof tIME_string) - 1] = '\0';
+
+         else
+         {
+            strncpy(tIME_string, "*** invalid time ***", (sizeof tIME_string));
+            tIME_string[(sizeof tIME_string) - 1] = '\0';
+         }
+
+         tIME_chunk_present++;
+#endif /* TIME_RFC1123 */
+      }
+   }
+#endif
+#ifdef PNG_tRNS_SUPPORTED
+   {
+      png_bytep trans_alpha;
+      int num_trans;
+      png_color_16p trans_color;
+
+      if (png_get_tRNS(read_ptr, read_info_ptr, &trans_alpha, &num_trans,
+          &trans_color) != 0)
+      {
+         int sample_max = (1 << bit_depth);
+         /* libpng doesn't reject a tRNS chunk with out-of-range samples */
+         if (!((color_type == PNG_COLOR_TYPE_GRAY &&
+             (int)trans_color->gray > sample_max) ||
+             (color_type == PNG_COLOR_TYPE_RGB &&
+             ((int)trans_color->red > sample_max ||
+             (int)trans_color->green > sample_max ||
+             (int)trans_color->blue > sample_max))))
+            png_set_tRNS(write_ptr, write_info_ptr, trans_alpha, num_trans,
+               trans_color);
+      }
+   }
+#endif
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+   {
+      png_unknown_chunkp unknowns;
+      int num_unknowns = png_get_unknown_chunks(read_ptr, read_info_ptr,
+          &unknowns);
+
+      if (num_unknowns != 0)
+      {
+         png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns,
+             num_unknowns);
+#if PNG_LIBPNG_VER < 10600
+         /* Copy the locations from the read_info_ptr.  The automatically
+          * generated locations in write_end_info_ptr are wrong prior to 1.6.0
+          * because they are reset from the write pointer (removed in 1.6.0).
+          */
+         {
+            int i;
+            for (i = 0; i < num_unknowns; i++)
+              png_set_unknown_chunk_location(write_ptr, write_info_ptr, i,
+                  unknowns[i].location);
+         }
+#endif
+      }
+   }
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+   pngtest_debug("Writing info struct");
+
+   /* Write the info in two steps so that if we write the 'unknown' chunks here
+    * they go to the correct place.
+    */
+   png_write_info_before_PLTE(write_ptr, write_info_ptr);
+
+   write_chunks(write_ptr, before_PLTE); /* before PLTE */
+
+   png_write_info(write_ptr, write_info_ptr);
+
+   write_chunks(write_ptr, before_IDAT); /* after PLTE */
+
+   png_write_info(write_ptr, write_end_info_ptr);
+
+   write_chunks(write_ptr, after_IDAT); /* after IDAT */
+
+#ifdef PNG_COMPRESSION_COMPAT
+   /* Test the 'compatibility' setting here, if it is available. */
+   png_set_compression(write_ptr, PNG_COMPRESSION_COMPAT);
+#endif
+#endif
+
+#ifdef SINGLE_ROWBUF_ALLOC
+   pngtest_debug("Allocating row buffer...");
+   row_buf = (png_bytep)png_malloc(read_ptr,
+       png_get_rowbytes(read_ptr, read_info_ptr));
+
+   pngtest_debug1("\t%p", row_buf);
+#endif /* SINGLE_ROWBUF_ALLOC */
+   pngtest_debug("Writing row data");
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) &&\
+   defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* Both must be defined for libpng to be able to handle the interlace,
+    * otherwise it gets handled below by simply reading and writing the passes
+    * directly.
+    */
+   if (png_set_interlace_handling(read_ptr) != num_passes)
+      png_error(write_ptr,
+          "png_set_interlace_handling(read): wrong pass count ");
+   if (png_set_interlace_handling(write_ptr) != num_passes)
+      png_error(write_ptr,
+          "png_set_interlace_handling(write): wrong pass count ");
+#else /* png_set_interlace_handling not called on either read or write */
+#  define calc_pass_height
+#endif /* not using libpng interlace handling */
+
+#ifdef PNGTEST_TIMING
+   t_stop = (float)clock();
+   t_misc += (t_stop - t_start);
+   t_start = t_stop;
+#endif
+   for (pass = 0; pass < num_passes; pass++)
+   {
+#     ifdef calc_pass_height
+         png_uint_32 pass_height;
+
+         if (num_passes == 7) /* interlaced */
+         {
+            if (PNG_PASS_COLS(width, pass) > 0)
+               pass_height = PNG_PASS_ROWS(height, pass);
+
+            else
+               pass_height = 0;
+         }
+
+         else /* not interlaced */
+            pass_height = height;
+#     else
+#        define pass_height height
+#     endif
+
+      pngtest_debug1("Writing row data for pass %d", pass);
+      for (y = 0; y < pass_height; y++)
+      {
+#ifndef SINGLE_ROWBUF_ALLOC
+         pngtest_debug2("Allocating row buffer (pass %d, y = %u)...", pass, y);
+
+         row_buf = (png_bytep)png_malloc(read_ptr,
+             png_get_rowbytes(read_ptr, read_info_ptr));
+
+         pngtest_debug2("\t%p (%lu bytes)", row_buf,
+             (unsigned long)png_get_rowbytes(read_ptr, read_info_ptr));
+
+#endif /* !SINGLE_ROWBUF_ALLOC */
+         png_read_rows(read_ptr, (png_bytepp)&row_buf, NULL, 1);
+
+#ifdef PNG_WRITE_SUPPORTED
+#ifdef PNGTEST_TIMING
+         t_stop = (float)clock();
+         t_decode += (t_stop - t_start);
+         t_start = t_stop;
+#endif
+         png_write_rows(write_ptr, (png_bytepp)&row_buf, 1);
+#ifdef PNGTEST_TIMING
+         t_stop = (float)clock();
+         t_encode += (t_stop - t_start);
+         t_start = t_stop;
+#endif
+#endif /* WRITE */
+
+#ifndef SINGLE_ROWBUF_ALLOC
+         pngtest_debug2("Freeing row buffer (pass %d, y = %u)", pass, y);
+         png_free(read_ptr, row_buf);
+         row_buf = NULL;
+#endif /* !SINGLE_ROWBUF_ALLOC */
+      }
+   }
+
+#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED
+#  ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+      png_free_data(read_ptr, read_info_ptr, PNG_FREE_UNKN, -1);
+#  endif
+#  ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+      png_free_data(write_ptr, write_info_ptr, PNG_FREE_UNKN, -1);
+#  endif
+#endif
+
+   pngtest_debug("Reading and writing end_info data");
+
+   png_read_end(read_ptr, end_info_ptr);
+#ifdef PNG_TEXT_SUPPORTED
+   {
+      png_textp text_ptr;
+      int num_text;
+
+      if (png_get_text(read_ptr, end_info_ptr, &text_ptr, &num_text) > 0)
+      {
+         pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text);
+
+         pngtest_check_text_support(read_ptr, text_ptr, num_text);
+
+         if (verbose != 0)
+         {
+            int i;
+
+            fprintf(STDERR,"\n");
+            for (i=0; i<num_text; i++)
+            {
+               fprintf(STDERR,"   Text compression[%d]=%d\n",
+                   i, text_ptr[i].compression);
+            }
+         }
+
+         png_set_text(write_ptr, write_end_info_ptr, text_ptr, num_text);
+      }
+   }
+#endif
+#ifdef PNG_READ_eXIf_SUPPORTED
+   {
+      png_bytep exif=NULL;
+      png_uint_32 exif_length;
+
+      if (png_get_eXIf_1(read_ptr, end_info_ptr, &exif_length, &exif) != 0)
+      {
+         if (exif_length > 1)
+            fprintf(STDERR," eXIf type %c%c, %lu bytes\n",exif[0],exif[1],
+               (unsigned long)exif_length);
+# ifdef PNG_WRITE_eXIf_SUPPORTED
+         png_set_eXIf_1(write_ptr, write_end_info_ptr, exif_length, exif);
+# endif
+      }
+   }
+#endif
+#ifdef PNG_tIME_SUPPORTED
+   {
+      png_timep mod_time;
+
+      if (png_get_tIME(read_ptr, end_info_ptr, &mod_time) != 0)
+      {
+         png_set_tIME(write_ptr, write_end_info_ptr, mod_time);
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+         if (png_convert_to_rfc1123_buffer(tIME_string, mod_time) != 0)
+            tIME_string[(sizeof tIME_string) - 1] = '\0';
+
+         else
+         {
+            strncpy(tIME_string, "*** invalid time ***", sizeof tIME_string);
+            tIME_string[(sizeof tIME_string)-1] = '\0';
+         }
+
+         tIME_chunk_present++;
+#endif /* TIME_RFC1123 */
+      }
+   }
+#endif
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+   {
+      png_unknown_chunkp unknowns;
+      int num_unknowns = png_get_unknown_chunks(read_ptr, end_info_ptr,
+          &unknowns);
+
+      if (num_unknowns != 0)
+      {
+         png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns,
+             num_unknowns);
+#if PNG_LIBPNG_VER < 10600
+         /* Copy the locations from the read_info_ptr.  The automatically
+          * generated locations in write_end_info_ptr are wrong prior to 1.6.0
+          * because they are reset from the write pointer (removed in 1.6.0).
+          */
+         {
+            int i;
+            for (i = 0; i < num_unknowns; i++)
+              png_set_unknown_chunk_location(write_ptr, write_end_info_ptr, i,
+                  unknowns[i].location);
+         }
+#endif
+      }
+   }
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
+   /* Normally one would use Z_DEFAULT_STRATEGY for text compression.
+    * This is here just to make pngtest replicate the results from libpng
+    * versions prior to 1.5.4, and to test this new API.
+    */
+   png_set_text_compression_strategy(write_ptr, Z_FILTERED);
+#endif
+
+   /* When the unknown vpAg/sTER chunks are written by pngtest the only way to
+    * do it is to write them *before* calling png_write_end.  When unknown
+    * chunks are written by libpng, however, they are written just before IEND.
+    * There seems to be no way round this, however vpAg/sTER are not expected
+    * after IDAT.
+    */
+   write_chunks(write_ptr, after_IDAT);
+
+   png_write_end(write_ptr, write_end_info_ptr);
+#endif
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+   if (verbose != 0)
+   {
+      png_uint_32 iwidth, iheight;
+      iwidth = png_get_image_width(write_ptr, write_info_ptr);
+      iheight = png_get_image_height(write_ptr, write_info_ptr);
+      fprintf(STDERR, "\n Image width = %lu, height = %lu\n",
+          (unsigned long)iwidth, (unsigned long)iheight);
+   }
+#endif
+
+   pngtest_debug("Destroying data structs");
+#ifdef SINGLE_ROWBUF_ALLOC
+   pngtest_debug("Destroying row_buf for read_ptr");
+   png_free(read_ptr, row_buf);
+   row_buf = NULL;
+#endif /* SINGLE_ROWBUF_ALLOC */
+   pngtest_debug("Destroying read_ptr, read_info_ptr, end_info_ptr");
+   png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
+#ifdef PNG_WRITE_SUPPORTED
+   pngtest_debug("Destroying write_end_info_ptr");
+   png_destroy_info_struct(write_ptr, &write_end_info_ptr);
+   pngtest_debug("Destroying write_ptr, write_info_ptr");
+   png_destroy_write_struct(&write_ptr, &write_info_ptr);
+#endif
+   pngtest_debug("Destruction complete.");
+
+   FCLOSE(fpin);
+   FCLOSE(fpout);
+
+   /* Summarize any warnings or errors and in 'strict' mode fail the test.
+    * Unsupported chunks can result in warnings, in that case ignore the strict
+    * setting, otherwise fail the test on warnings as well as errors.
+    */
+   if (error_count > 0)
+   {
+      /* We don't really expect to get here because of the setjmp handling
+       * above, but this is safe.
+       */
+      fprintf(STDERR, "\n  %s: %d libpng errors found (%d warnings)",
+          inname, error_count, warning_count);
+
+      if (strict != 0)
+         return 1;
+   }
+
+#  ifdef PNG_WRITE_SUPPORTED
+      /* If there is no write support nothing was written! */
+      else if (unsupported_chunks > 0)
+      {
+         fprintf(STDERR, "\n  %s: unsupported chunks (%d)%s",
+             inname, unsupported_chunks, strict ? ": IGNORED --strict!" : "");
+      }
+#  endif
+
+   else if (warning_count > 0)
+   {
+      fprintf(STDERR, "\n  %s: %d libpng warnings found",
+          inname, warning_count);
+
+      if (strict != 0)
+         return 1;
+   }
+
+   pngtest_debug("Opening files for comparison");
+   if ((fpin = fopen(inname, "rb")) == NULL)
+   {
+      fprintf(STDERR, "Could not find file %s\n", inname);
+      return 1;
+   }
+
+   if ((fpout = fopen(outname, "rb")) == NULL)
+   {
+      fprintf(STDERR, "Could not find file %s\n", outname);
+      FCLOSE(fpin);
+      return 1;
+   }
+
+#if defined (PNG_WRITE_SUPPORTED) /* else nothing was written */ &&\
+    defined (PNG_WRITE_FILTER_SUPPORTED)
+   if (interlace_preserved != 0) /* else the files will be changed */
+   {
+      for (;;)
+      {
+         static int wrote_question = 0;
+         size_t num_in, num_out;
+         char inbuf[256], outbuf[256];
+
+         num_in = fread(inbuf, 1, sizeof inbuf, fpin);
+         num_out = fread(outbuf, 1, sizeof outbuf, fpout);
+
+         if (num_in != num_out)
+         {
+            fprintf(STDERR, "\nFiles %s and %s are of a different size\n",
+                inname, outname);
+
+            if (wrote_question == 0 && unsupported_chunks == 0)
+            {
+               fprintf(STDERR,
+                   "   Was %s written with the same maximum IDAT"
+                   " chunk size (%d bytes),",
+                   inname, PNG_ZBUF_SIZE);
+               fprintf(STDERR,
+                   "\n   filtering heuristic (libpng default), compression");
+               fprintf(STDERR,
+                   " level (zlib default),\n   and zlib version (%s)?\n\n",
+                   ZLIB_VERSION);
+               wrote_question = 1;
+            }
+
+            FCLOSE(fpin);
+            FCLOSE(fpout);
+
+            if (strict != 0 && unsupported_chunks == 0)
+              return 1;
+
+            else
+              return 0;
+         }
+
+         if (num_in == 0)
+            break;
+
+         if (memcmp(inbuf, outbuf, num_in))
+         {
+            fprintf(STDERR, "\nFiles %s and %s are different\n", inname,
+                outname);
+
+            if (wrote_question == 0 && unsupported_chunks == 0)
+            {
+               fprintf(STDERR,
+                   "   Was %s written with the same maximum"
+                   " IDAT chunk size (%d bytes),",
+                    inname, PNG_ZBUF_SIZE);
+               fprintf(STDERR,
+                   "\n   filtering heuristic (libpng default), compression");
+               fprintf(STDERR,
+                   " level (zlib default),\n   and zlib version (%s)?\n\n",
+                 ZLIB_VERSION);
+               wrote_question = 1;
+            }
+
+            FCLOSE(fpin);
+            FCLOSE(fpout);
+
+            /* NOTE: the unsupported_chunks escape is permitted here because
+             * unsupported text chunk compression will result in the compression
+             * mode being changed (to NONE) yet, in the test case, the result
+             * can be exactly the same size!
+             */
+            if (strict != 0 && unsupported_chunks == 0)
+              return 1;
+
+            else
+              return 0;
+         }
+      }
+   }
+#endif /* WRITE && WRITE_FILTER */
+
+   FCLOSE(fpin);
+   FCLOSE(fpout);
+
+   return 0;
+}
+
+/* Input and output filenames */
+#ifdef RISCOS
+static const char *inname = "pngtest/png";
+static const char *outname = "pngout/png";
+#else
+static const char *inname = "pngtest.png";
+static const char *outname = "pngout.png";
+#endif
+
+int
+main(int argc, char *argv[])
+{
+   int multiple = 0;
+   int ierror = 0;
+
+   png_structp dummy_ptr;
+
+   fprintf(STDERR, "\n Testing libpng version %s\n", PNG_LIBPNG_VER_STRING);
+   fprintf(STDERR, "   with zlib   version %s\n", ZLIB_VERSION);
+   fprintf(STDERR, "%s", png_get_copyright(NULL));
+   /* Show the version of libpng used in building the library */
+   fprintf(STDERR, " library (%lu):%s",
+       (unsigned long)png_access_version_number(),
+       png_get_header_version(NULL));
+
+   /* Show the version of libpng used in building the application */
+   fprintf(STDERR, " pngtest (%lu):%s", (unsigned long)PNG_LIBPNG_VER,
+       PNG_HEADER_VERSION_STRING);
+
+   /* Do some consistency checking on the memory allocation settings, I'm
+    * not sure this matters, but it is nice to know, the first of these
+    * tests should be impossible because of the way the macros are set
+    * in pngconf.h
+    */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+      fprintf(STDERR, " NOTE: Zlib compiled for max 64k, libpng not\n");
+#endif
+   /* I think the following can happen. */
+#if !defined(MAXSEG_64K) && defined(PNG_MAX_MALLOC_64K)
+      fprintf(STDERR, " NOTE: libpng compiled for max 64k, zlib not\n");
+#endif
+
+   if (strcmp(png_libpng_ver, PNG_LIBPNG_VER_STRING))
+   {
+      fprintf(STDERR,
+          "Warning: versions are different between png.h and png.c\n");
+      fprintf(STDERR, "  png.h version: %s\n", PNG_LIBPNG_VER_STRING);
+      fprintf(STDERR, "  png.c version: %s\n\n", png_libpng_ver);
+      ++ierror;
+   }
+
+   if (argc > 1)
+   {
+      if (strcmp(argv[1], "-m") == 0)
+      {
+         multiple = 1;
+         status_dots_requested = 0;
+      }
+
+      else if (strcmp(argv[1], "-mv") == 0 ||
+               strcmp(argv[1], "-vm") == 0 )
+      {
+         multiple = 1;
+         verbose = 1;
+         status_dots_requested = 1;
+      }
+
+      else if (strcmp(argv[1], "-v") == 0)
+      {
+         verbose = 1;
+         status_dots_requested = 1;
+         inname = argv[2];
+      }
+
+      else if (strcmp(argv[1], "--strict") == 0)
+      {
+         status_dots_requested = 0;
+         verbose = 1;
+         inname = argv[2];
+         strict++;
+         relaxed = 0;
+         multiple=1;
+      }
+
+      else if (strcmp(argv[1], "--relaxed") == 0)
+      {
+         status_dots_requested = 0;
+         verbose = 1;
+         inname = argv[2];
+         strict = 0;
+         relaxed++;
+         multiple=1;
+      }
+      else if (strcmp(argv[1], "--xfail") == 0)
+      {
+         status_dots_requested = 0;
+         verbose = 1;
+         inname = argv[2];
+         strict = 0;
+         xfail++;
+         relaxed++;
+         multiple=1;
+      }
+
+      else
+      {
+         inname = argv[1];
+         status_dots_requested = 0;
+      }
+   }
+
+   if (multiple == 0 && argc == 3 + verbose)
+      outname = argv[2 + verbose];
+
+   if ((multiple == 0 && argc > 3 + verbose) ||
+       (multiple != 0 && argc < 2))
+   {
+      fprintf(STDERR,
+          "usage: %s [infile.png] [outfile.png]\n\t%s -m {infile.png}\n",
+          argv[0], argv[0]);
+      fprintf(STDERR,
+          "  reads/writes one PNG file (without -m) or multiple files (-m)\n");
+      fprintf(STDERR,
+          "  with -m %s is used as a temporary file\n", outname);
+      exit(1);
+   }
+
+   if (multiple != 0)
+   {
+      int i;
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+      int allocation_now = current_allocation;
+#endif
+      for (i=2; i<argc; ++i)
+      {
+         int kerror;
+         fprintf(STDERR, "\n Testing %s:", argv[i]);
+#if PNG_DEBUG > 0
+         fprintf(STDERR, "\n");
+#endif
+         kerror = test_one_file(argv[i], outname);
+         if (kerror == 0)
+         {
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+            fprintf(STDERR, "\n PASS (%lu zero samples)\n",
+                (unsigned long)zero_samples);
+#else
+            fprintf(STDERR, " PASS\n");
+#endif
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+            if (tIME_chunk_present != 0)
+               fprintf(STDERR, " tIME = %s\n", tIME_string);
+
+            tIME_chunk_present = 0;
+#endif /* TIME_RFC1123 */
+         }
+
+         else
+         {
+            if (xfail)
+              fprintf(STDERR, " XFAIL\n");
+            else
+            {
+              fprintf(STDERR, " FAIL\n");
+              ierror += kerror;
+            }
+         }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         if (allocation_now != current_allocation)
+            fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
+                current_allocation - allocation_now);
+
+         if (current_allocation != 0)
+         {
+            memory_infop pinfo = pinformation;
+
+            fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
+                current_allocation);
+
+            while (pinfo != NULL)
+            {
+               fprintf(STDERR, " %lu bytes at %p\n",
+                   (unsigned long)pinfo->size,
+                   pinfo->pointer);
+               pinfo = pinfo->next;
+            }
+         }
+#endif
+      }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         fprintf(STDERR, " Current memory allocation: %10d bytes\n",
+             current_allocation);
+         fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
+             maximum_allocation);
+         fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
+             total_allocation);
+         fprintf(STDERR, "     Number of allocations: %10d\n",
+             num_allocations);
+#endif
+   }
+
+   else
+   {
+      int i;
+      for (i = 0; i<3; ++i)
+      {
+         int kerror;
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         int allocation_now = current_allocation;
+#endif
+         if (i == 1)
+            status_dots_requested = 1;
+
+         else if (verbose == 0)
+            status_dots_requested = 0;
+
+         if (i == 0 || verbose == 1 || ierror != 0)
+         {
+            fprintf(STDERR, "\n Testing %s:", inname);
+#if PNG_DEBUG > 0
+            fprintf(STDERR, "\n");
+#endif
+         }
+
+         kerror = test_one_file(inname, outname);
+
+         if (kerror == 0)
+         {
+            if (verbose == 1 || i == 2)
+            {
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+                fprintf(STDERR, "\n PASS (%lu zero samples)\n",
+                    (unsigned long)zero_samples);
+#else
+                fprintf(STDERR, " PASS\n");
+#endif
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+             if (tIME_chunk_present != 0)
+                fprintf(STDERR, " tIME = %s\n", tIME_string);
+#endif /* TIME_RFC1123 */
+            }
+         }
+
+         else
+         {
+            if (verbose == 0 && i != 2)
+            {
+               fprintf(STDERR, "\n Testing %s:", inname);
+#if PNG_DEBUG > 0
+               fprintf(STDERR, "\n");
+#endif
+            }
+
+            if (xfail)
+              fprintf(STDERR, " XFAIL\n");
+            else
+            {
+              fprintf(STDERR, " FAIL\n");
+              ierror += kerror;
+            }
+         }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+         if (allocation_now != current_allocation)
+             fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
+                 current_allocation - allocation_now);
+
+         if (current_allocation != 0)
+         {
+             memory_infop pinfo = pinformation;
+
+             fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
+                 current_allocation);
+
+             while (pinfo != NULL)
+             {
+                fprintf(STDERR, " %lu bytes at %p\n",
+                    (unsigned long)pinfo->size, pinfo->pointer);
+                pinfo = pinfo->next;
+             }
+          }
+#endif
+       }
+#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
+       fprintf(STDERR, " Current memory allocation: %10d bytes\n",
+           current_allocation);
+       fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
+           maximum_allocation);
+       fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
+           total_allocation);
+       fprintf(STDERR, "     Number of allocations: %10d\n",
+           num_allocations);
+#endif
+   }
+
+#ifdef PNGTEST_TIMING
+   t_stop = (float)clock();
+   t_misc += (t_stop - t_start);
+   t_start = t_stop;
+   fprintf(STDERR, " CPU time used = %.3f seconds",
+       (t_misc+t_decode+t_encode)/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR, " (decoding %.3f,\n",
+       t_decode/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR, "        encoding %.3f ,",
+       t_encode/(float)CLOCKS_PER_SEC);
+   fprintf(STDERR, " other %.3f seconds)\n\n",
+       t_misc/(float)CLOCKS_PER_SEC);
+#endif
+
+   if (ierror == 0)
+      fprintf(STDERR, " libpng passes test\n");
+
+   else
+      fprintf(STDERR, " libpng FAILS test\n");
+
+   dummy_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+   fprintf(STDERR, " Default limits:\n");
+   fprintf(STDERR, "  width_max  = %lu\n",
+       (unsigned long) png_get_user_width_max(dummy_ptr));
+   fprintf(STDERR, "  height_max = %lu\n",
+       (unsigned long) png_get_user_height_max(dummy_ptr));
+   if (png_get_chunk_cache_max(dummy_ptr) == 0)
+      fprintf(STDERR, "  cache_max  = unlimited\n");
+   else
+      fprintf(STDERR, "  cache_max  = %lu\n",
+          (unsigned long) png_get_chunk_cache_max(dummy_ptr));
+   if (png_get_chunk_malloc_max(dummy_ptr) == 0)
+      fprintf(STDERR, "  malloc_max = unlimited\n");
+   else
+      fprintf(STDERR, "  malloc_max = %lu\n",
+          (unsigned long) png_get_chunk_malloc_max(dummy_ptr));
+   png_destroy_read_struct(&dummy_ptr, NULL, NULL);
+
+   return (int)(ierror != 0);
+}
+#else
+int
+main(void)
+{
+   fprintf(STDERR,
+       " test ignored because libpng was not built with read support\n");
+   /* And skip this test */
+   return PNG_LIBPNG_VER < 10600 ? 0 : 77;
+}
+#endif
+
+/* Generate a compiler error if there is an old png.h in the search path. */
+typedef png_libpng_version_1_6_42 Your_png_h_is_not_version_1_6_42;
diff --git a/reg-io/png/lpng1510/pngtrans.c b/reg-io/png/lpng/pngtrans.c
similarity index 73%
rename from reg-io/png/lpng1510/pngtrans.c
rename to reg-io/png/lpng/pngtrans.c
index 9748b184..72642a75 100644
--- a/reg-io/png/lpng1510/pngtrans.c
+++ b/reg-io/png/lpng/pngtrans.c
@@ -1,10 +1,10 @@
 
 /* pngtrans.c - transforms the data in a row (used by both readers and writers)
  *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -18,7 +18,7 @@
 #if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
 /* Turn on BGR-to-RGB mapping */
 void PNGAPI
-png_set_bgr(png_structp png_ptr)
+png_set_bgr(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_bgr");
 
@@ -30,9 +30,9 @@ png_set_bgr(png_structp png_ptr)
 #endif
 
 #if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-/* Turn on 16 bit byte swapping */
+/* Turn on 16-bit byte swapping */
 void PNGAPI
-png_set_swap(png_structp png_ptr)
+png_set_swap(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_swap");
 
@@ -47,7 +47,7 @@ png_set_swap(png_structp png_ptr)
 #if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
 /* Turn on pixel packing */
 void PNGAPI
-png_set_packing(png_structp png_ptr)
+png_set_packing(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_packing");
 
@@ -57,7 +57,9 @@ png_set_packing(png_structp png_ptr)
    if (png_ptr->bit_depth < 8)
    {
       png_ptr->transformations |= PNG_PACK;
-      png_ptr->usr_bit_depth = 8;
+#     ifdef PNG_WRITE_SUPPORTED
+         png_ptr->usr_bit_depth = 8;
+#     endif
    }
 }
 #endif
@@ -65,7 +67,7 @@ png_set_packing(png_structp png_ptr)
 #if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
 /* Turn on packed pixel swapping */
 void PNGAPI
-png_set_packswap(png_structp png_ptr)
+png_set_packswap(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_packswap");
 
@@ -79,7 +81,7 @@ png_set_packswap(png_structp png_ptr)
 
 #if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
 void PNGAPI
-png_set_shift(png_structp png_ptr, png_const_color_8p true_bits)
+png_set_shift(png_structrp png_ptr, png_const_color_8p true_bits)
 {
    png_debug(1, "in png_set_shift");
 
@@ -94,17 +96,17 @@ png_set_shift(png_structp png_ptr, png_const_color_8p true_bits)
 #if defined(PNG_READ_INTERLACING_SUPPORTED) || \
     defined(PNG_WRITE_INTERLACING_SUPPORTED)
 int PNGAPI
-png_set_interlace_handling(png_structp png_ptr)
+png_set_interlace_handling(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_interlace handling");
 
-   if (png_ptr && png_ptr->interlaced)
+   if (png_ptr != 0 && png_ptr->interlaced != 0)
    {
       png_ptr->transformations |= PNG_INTERLACE;
-      return (7);
+      return 7;
    }
 
-   return (1);
+   return 1;
 }
 #endif
 
@@ -115,44 +117,92 @@ png_set_interlace_handling(png_structp png_ptr)
  * that don't like bytes as parameters.
  */
 void PNGAPI
-png_set_filler(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+png_set_filler(png_structrp png_ptr, png_uint_32 filler, int filler_loc)
 {
    png_debug(1, "in png_set_filler");
 
    if (png_ptr == NULL)
       return;
 
+   /* In libpng 1.6 it is possible to determine whether this is a read or write
+    * operation and therefore to do more checking here for a valid call.
+    */
+   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0)
+   {
+#     ifdef PNG_READ_FILLER_SUPPORTED
+         /* On read png_set_filler is always valid, regardless of the base PNG
+          * format, because other transformations can give a format where the
+          * filler code can execute (basically an 8 or 16-bit component RGB or G
+          * format.)
+          *
+          * NOTE: usr_channels is not used by the read code!  (This has led to
+          * confusion in the past.)  The filler is only used in the read code.
+          */
+         png_ptr->filler = (png_uint_16)filler;
+#     else
+         png_app_error(png_ptr, "png_set_filler not supported on read");
+         PNG_UNUSED(filler) /* not used in the write case */
+         return;
+#     endif
+   }
+
+   else /* write */
+   {
+#     ifdef PNG_WRITE_FILLER_SUPPORTED
+         /* On write the usr_channels parameter must be set correctly at the
+          * start to record the number of channels in the app-supplied data.
+          */
+         switch (png_ptr->color_type)
+         {
+            case PNG_COLOR_TYPE_RGB:
+               png_ptr->usr_channels = 4;
+               break;
+
+            case PNG_COLOR_TYPE_GRAY:
+               if (png_ptr->bit_depth >= 8)
+               {
+                  png_ptr->usr_channels = 2;
+                  break;
+               }
+
+               else
+               {
+                  /* There simply isn't any code in libpng to strip out bits
+                   * from bytes when the components are less than a byte in
+                   * size!
+                   */
+                  png_app_error(png_ptr,
+                      "png_set_filler is invalid for"
+                      " low bit depth gray output");
+                  return;
+               }
+
+            default:
+               png_app_error(png_ptr,
+                   "png_set_filler: inappropriate color type");
+               return;
+         }
+#     else
+         png_app_error(png_ptr, "png_set_filler not supported on write");
+         return;
+#     endif
+   }
+
+   /* Here on success - libpng supports the operation, set the transformation
+    * and the flag to say where the filler channel is.
+    */
    png_ptr->transformations |= PNG_FILLER;
-   png_ptr->filler = (png_uint_16)filler;
 
    if (filler_loc == PNG_FILLER_AFTER)
       png_ptr->flags |= PNG_FLAG_FILLER_AFTER;
 
    else
       png_ptr->flags &= ~PNG_FLAG_FILLER_AFTER;
-
-   /* This should probably go in the "do_read_filler" routine.
-    * I attempted to do that in libpng-1.0.1a but that caused problems
-    * so I restored it in libpng-1.0.2a
-   */
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
-   {
-      png_ptr->usr_channels = 4;
-   }
-
-   /* Also I added this in libpng-1.0.2a (what happens when we expand
-    * a less-than-8-bit grayscale to GA?) */
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY && png_ptr->bit_depth >= 8)
-   {
-      png_ptr->usr_channels = 2;
-   }
 }
 
 /* Added to libpng-1.2.7 */
 void PNGAPI
-png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+png_set_add_alpha(png_structrp png_ptr, png_uint_32 filler, int filler_loc)
 {
    png_debug(1, "in png_set_add_alpha");
 
@@ -160,7 +210,9 @@ png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc)
       return;
 
    png_set_filler(png_ptr, filler, filler_loc);
-   png_ptr->transformations |= PNG_ADD_ALPHA;
+   /* The above may fail to do anything. */
+   if ((png_ptr->transformations & PNG_FILLER) != 0)
+      png_ptr->transformations |= PNG_ADD_ALPHA;
 }
 
 #endif
@@ -168,7 +220,7 @@ png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc)
 #if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
     defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
 void PNGAPI
-png_set_swap_alpha(png_structp png_ptr)
+png_set_swap_alpha(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_swap_alpha");
 
@@ -182,7 +234,7 @@ png_set_swap_alpha(png_structp png_ptr)
 #if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
     defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
 void PNGAPI
-png_set_invert_alpha(png_structp png_ptr)
+png_set_invert_alpha(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_invert_alpha");
 
@@ -195,7 +247,7 @@ png_set_invert_alpha(png_structp png_ptr)
 
 #if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
 void PNGAPI
-png_set_invert_mono(png_structp png_ptr)
+png_set_invert_mono(png_structrp png_ptr)
 {
    png_debug(1, "in png_set_invert_mono");
 
@@ -217,8 +269,8 @@ png_do_invert(png_row_infop row_info, png_bytep row)
    if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
    {
       png_bytep rp = row;
-      png_size_t i;
-      png_size_t istop = row_info->rowbytes;
+      size_t i;
+      size_t istop = row_info->rowbytes;
 
       for (i = 0; i < istop; i++)
       {
@@ -231,8 +283,8 @@ png_do_invert(png_row_infop row_info, png_bytep row)
       row_info->bit_depth == 8)
    {
       png_bytep rp = row;
-      png_size_t i;
-      png_size_t istop = row_info->rowbytes;
+      size_t i;
+      size_t istop = row_info->rowbytes;
 
       for (i = 0; i < istop; i += 2)
       {
@@ -246,8 +298,8 @@ png_do_invert(png_row_infop row_info, png_bytep row)
       row_info->bit_depth == 16)
    {
       png_bytep rp = row;
-      png_size_t i;
-      png_size_t istop = row_info->rowbytes;
+      size_t i;
+      size_t istop = row_info->rowbytes;
 
       for (i = 0; i < istop; i += 4)
       {
@@ -262,7 +314,7 @@ png_do_invert(png_row_infop row_info, png_bytep row)
 
 #ifdef PNG_16BIT_SUPPORTED
 #if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-/* Swaps byte order on 16 bit depth images */
+/* Swaps byte order on 16-bit depth images */
 void /* PRIVATE */
 png_do_swap(png_row_infop row_info, png_bytep row)
 {
@@ -276,9 +328,16 @@ png_do_swap(png_row_infop row_info, png_bytep row)
 
       for (i = 0; i < istop; i++, rp += 2)
       {
+#ifdef PNG_BUILTIN_BSWAP16_SUPPORTED
+         /* Feature added to libpng-1.6.11 for testing purposes, not
+          * enabled by default.
+          */
+         *(png_uint_16*)rp = __builtin_bswap16(*(png_uint_16*)rp);
+#else
          png_byte t = *rp;
          *rp = *(rp + 1);
          *(rp + 1) = t;
+#endif
       }
    }
 }
@@ -286,7 +345,7 @@ png_do_swap(png_row_infop row_info, png_bytep row)
 #endif
 
 #if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-static PNG_CONST png_byte onebppswaptable[256] = {
+static const png_byte onebppswaptable[256] = {
    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
    0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
@@ -321,7 +380,7 @@ static PNG_CONST png_byte onebppswaptable[256] = {
    0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
 };
 
-static PNG_CONST png_byte twobppswaptable[256] = {
+static const png_byte twobppswaptable[256] = {
    0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0,
    0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
    0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4,
@@ -356,7 +415,7 @@ static PNG_CONST png_byte twobppswaptable[256] = {
    0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
 };
 
-static PNG_CONST png_byte fourbppswaptable[256] = {
+static const png_byte fourbppswaptable[256] = {
    0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
    0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0,
    0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
@@ -420,7 +479,7 @@ png_do_packswap(png_row_infop row_info, png_bytep row)
          *rp = table[*rp];
    }
 }
-#endif /* PNG_READ_PACKSWAP_SUPPORTED or PNG_WRITE_PACKSWAP_SUPPORTED */
+#endif /* PACKSWAP || WRITE_PACKSWAP */
 
 #if defined(PNG_WRITE_FILLER_SUPPORTED) || \
     defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
@@ -439,6 +498,8 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start)
    png_bytep dp = row; /* destination pointer */
    png_bytep ep = row + row_info->rowbytes; /* One beyond end of row */
 
+   png_debug(1, "in png_do_strip_channel");
+
    /* At the start sp will point to the first byte to copy and dp to where
     * it is copied to.  ep always points just beyond the end of the row, so
     * the loop simply copies (channels-1) channels until sp reaches ep.
@@ -452,27 +513,35 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start)
    {
       if (row_info->bit_depth == 8)
       {
-         if (at_start) /* Skip initial filler */
+         if (at_start != 0) /* Skip initial filler */
             ++sp;
          else          /* Skip initial channel and, for sp, the filler */
-            sp += 2, ++dp;
+         {
+            sp += 2; ++dp;
+         }
 
          /* For a 1 pixel wide image there is nothing to do */
          while (sp < ep)
-            *dp++ = *sp, sp += 2;
+         {
+            *dp++ = *sp; sp += 2;
+         }
 
          row_info->pixel_depth = 8;
       }
 
       else if (row_info->bit_depth == 16)
       {
-         if (at_start) /* Skip initial filler */
+         if (at_start != 0) /* Skip initial filler */
             sp += 2;
          else          /* Skip initial channel and, for sp, the filler */
-            sp += 4, dp += 2;
+         {
+            sp += 4; dp += 2;
+         }
 
          while (sp < ep)
-            *dp++ = *sp++, *dp++ = *sp, sp += 3;
+         {
+            *dp++ = *sp++; *dp++ = *sp; sp += 3;
+         }
 
          row_info->pixel_depth = 16;
       }
@@ -492,31 +561,37 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start)
    {
       if (row_info->bit_depth == 8)
       {
-         if (at_start) /* Skip initial filler */
+         if (at_start != 0) /* Skip initial filler */
             ++sp;
          else          /* Skip initial channels and, for sp, the filler */
-            sp += 4, dp += 3;
+         {
+            sp += 4; dp += 3;
+         }
 
          /* Note that the loop adds 3 to dp and 4 to sp each time. */
          while (sp < ep)
-            *dp++ = *sp++, *dp++ = *sp++, *dp++ = *sp, sp += 2;
+         {
+            *dp++ = *sp++; *dp++ = *sp++; *dp++ = *sp; sp += 2;
+         }
 
          row_info->pixel_depth = 24;
       }
 
       else if (row_info->bit_depth == 16)
       {
-         if (at_start) /* Skip initial filler */
+         if (at_start != 0) /* Skip initial filler */
             sp += 2;
          else          /* Skip initial channels and, for sp, the filler */
-            sp += 8, dp += 6;
+         {
+            sp += 8; dp += 6;
+         }
 
          while (sp < ep)
          {
             /* Copy 6 bytes, skip 2 */
-            *dp++ = *sp++, *dp++ = *sp++;
-            *dp++ = *sp++, *dp++ = *sp++;
-            *dp++ = *sp++, *dp++ = *sp, sp += 3;
+            *dp++ = *sp++; *dp++ = *sp++;
+            *dp++ = *sp++; *dp++ = *sp++;
+            *dp++ = *sp++; *dp++ = *sp; sp += 3;
          }
 
          row_info->pixel_depth = 48;
@@ -536,7 +611,7 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start)
       return; /* The filler channel has gone already */
 
    /* Fix the rowbytes value. */
-   row_info->rowbytes = dp-row;
+   row_info->rowbytes = (size_t)(dp-row);
 }
 #endif
 
@@ -547,7 +622,7 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
 {
    png_debug(1, "in png_do_bgr");
 
-   if ((row_info->color_type & PNG_COLOR_MASK_COLOR))
+   if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
    {
       png_uint_32 row_width = row_info->width;
       if (row_info->bit_depth == 8)
@@ -617,16 +692,18 @@ png_do_bgr(png_row_infop row_info, png_bytep row)
 #endif
    }
 }
-#endif /* PNG_READ_BGR_SUPPORTED or PNG_WRITE_BGR_SUPPORTED */
+#endif /* READ_BGR || WRITE_BGR */
 
 #if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
     defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
 /* Added at libpng-1.5.10 */
 void /* PRIVATE */
-png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info)
+png_do_check_palette_indexes(png_structrp png_ptr, png_row_infop row_info)
 {
+   png_debug(1, "in png_do_check_palette_indexes");
+
    if (png_ptr->num_palette < (1 << row_info->bit_depth) &&
-      png_ptr->num_palette_max >= 0)
+      png_ptr->num_palette > 0) /* num_palette can be 0 in MNG files */
    {
       /* Calculations moved outside switch in an attempt to stop different
        * compiler warnings.  'padding' is in *bits* within the last byte, it is
@@ -634,7 +711,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info)
        * and this calculation is used because it avoids warnings that other
        * forms produced on either GCC or MSVC.
        */
-      int padding = (-row_info->pixel_depth * row_info->width) & 7;
+      int padding = PNG_PADBITS(row_info->pixel_depth, row_info->width);
       png_bytep rp = png_ptr->row_buf + row_info->rowbytes;
 
       switch (row_info->bit_depth)
@@ -646,7 +723,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info)
              */
             for (; rp > png_ptr->row_buf; rp--)
             {
-              if (*rp >> padding != 0)
+              if ((*rp >> padding) != 0)
                  png_ptr->num_palette_max = 1;
               padding = 0;
             }
@@ -708,7 +785,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info)
          {
             for (; rp > png_ptr->row_buf; rp--)
             {
-               if (*rp >= png_ptr->num_palette_max)
+               if (*rp > png_ptr->num_palette_max)
                   png_ptr->num_palette_max = (int) *rp;
             }
 
@@ -720,19 +797,30 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info)
       }
    }
 }
-#endif /* PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED */
+#endif /* CHECK_FOR_INVALID_INDEX */
 
 #if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
     defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
 #ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
 void PNGAPI
-png_set_user_transform_info(png_structp png_ptr, png_voidp
+png_set_user_transform_info(png_structrp png_ptr, png_voidp
    user_transform_ptr, int user_transform_depth, int user_transform_channels)
 {
    png_debug(1, "in png_set_user_transform_info");
 
    if (png_ptr == NULL)
       return;
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+   if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 &&
+      (png_ptr->flags & PNG_FLAG_ROW_INIT) != 0)
+   {
+      png_app_error(png_ptr,
+          "info change after png_start_read_image or png_read_update_info");
+      return;
+   }
+#endif
+
    png_ptr->user_transform_ptr = user_transform_ptr;
    png_ptr->user_transform_depth = (png_byte)user_transform_depth;
    png_ptr->user_transform_channels = (png_byte)user_transform_channels;
@@ -746,20 +834,20 @@ png_set_user_transform_info(png_structp png_ptr, png_voidp
  */
 #ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
 png_voidp PNGAPI
-png_get_user_transform_ptr(png_const_structp png_ptr)
+png_get_user_transform_ptr(png_const_structrp png_ptr)
 {
    if (png_ptr == NULL)
-      return (NULL);
+      return NULL;
 
-   return ((png_voidp)png_ptr->user_transform_ptr);
+   return png_ptr->user_transform_ptr;
 }
 #endif
 
 #ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED
 png_uint_32 PNGAPI
-png_get_current_row_number(png_const_structp png_ptr)
+png_get_current_row_number(png_const_structrp png_ptr)
 {
-   /* See the comments in png.h - this is the sub-image row when reading and
+   /* See the comments in png.h - this is the sub-image row when reading an
     * interlaced image.
     */
    if (png_ptr != NULL)
@@ -769,13 +857,12 @@ png_get_current_row_number(png_const_structp png_ptr)
 }
 
 png_byte PNGAPI
-png_get_current_pass_number(png_const_structp png_ptr)
+png_get_current_pass_number(png_const_structrp png_ptr)
 {
    if (png_ptr != NULL)
       return png_ptr->pass;
    return 8; /* invalid */
 }
-#endif /* PNG_USER_TRANSFORM_INFO_SUPPORTED */
-#endif /* PNG_READ_USER_TRANSFORM_SUPPORTED ||
-          PNG_WRITE_USER_TRANSFORM_SUPPORTED */
-#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
+#endif /* USER_TRANSFORM_INFO */
+#endif /* READ_USER_TRANSFORM || WRITE_USER_TRANSFORM */
+#endif /* READ || WRITE */
diff --git a/reg-io/png/lpng1510/pngwio.c b/reg-io/png/lpng/pngwio.c
similarity index 61%
rename from reg-io/png/lpng1510/pngwio.c
rename to reg-io/png/lpng/pngwio.c
index 8eacf9f6..b6adfd53 100644
--- a/reg-io/png/lpng1510/pngwio.c
+++ b/reg-io/png/lpng/pngwio.c
@@ -1,10 +1,10 @@
 
 /* pngwio.c - functions for data output
  *
- * Last changed in libpng 1.5.0 [January 6, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2014,2016,2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -26,15 +26,16 @@
  * writes to a file pointer.  Note that this routine sometimes gets called
  * with very small lengths, so you should implement some kind of simple
  * buffering if you are using unbuffered writes.  This should never be asked
- * to write more than 64K on a 16 bit machine.
+ * to write more than 64K on a 16-bit machine.
  */
 
 void /* PRIVATE */
-png_write_data(png_structp png_ptr, png_const_bytep data, png_size_t length)
+png_write_data(png_structrp png_ptr, png_const_bytep data, size_t length)
 {
    /* NOTE: write_data_fn must not change the buffer! */
    if (png_ptr->write_data_fn != NULL )
-      (*(png_ptr->write_data_fn))(png_ptr, (png_bytep)data, length);
+      (*(png_ptr->write_data_fn))(png_ptr, png_constcast(png_bytep,data),
+          length);
 
    else
       png_error(png_ptr, "Call to NULL write function");
@@ -46,11 +47,10 @@ png_write_data(png_structp png_ptr, png_const_bytep data, png_size_t length)
  * write_data function and use it at run time with png_set_write_fn(), rather
  * than changing the library.
  */
-#ifndef USE_FAR_KEYWORD
 void PNGCBAPI
-png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+png_default_write_data(png_structp png_ptr, png_bytep data, size_t length)
 {
-   png_size_t check;
+   size_t check;
 
    if (png_ptr == NULL)
       return;
@@ -60,64 +60,6 @@ png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
    if (check != length)
       png_error(png_ptr, "Write Error");
 }
-#else
-/* This is the model-independent version. Since the standard I/O library
- * can't handle far buffers in the medium and small models, we have to copy
- * the data.
- */
-
-#define NEAR_BUF_SIZE 1024
-#define MIN(a,b) (a <= b ? a : b)
-
-void PNGCBAPI
-png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_uint_32 check;
-   png_byte *near_data;  /* Needs to be "png_byte *" instead of "png_bytep" */
-   png_FILE_p io_ptr;
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Check if data really is near. If so, use usual code. */
-   near_data = (png_byte *)CVT_PTR_NOCHECK(data);
-   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
-
-   if ((png_bytep)near_data == data)
-   {
-      check = fwrite(near_data, 1, length, io_ptr);
-   }
-
-   else
-   {
-      png_byte buf[NEAR_BUF_SIZE];
-      png_size_t written, remaining, err;
-      check = 0;
-      remaining = length;
-
-      do
-      {
-         written = MIN(NEAR_BUF_SIZE, remaining);
-         png_memcpy(buf, data, written); /* Copy far buffer to near buffer */
-         err = fwrite(buf, 1, written, io_ptr);
-
-         if (err != written)
-            break;
-
-         else
-            check += err;
-
-         data += written;
-         remaining -= written;
-      }
-      while (remaining != 0);
-   }
-
-   if (check != length)
-      png_error(png_ptr, "Write Error");
-}
-
-#endif
 #endif
 
 /* This function is called to output any data pending writing (normally
@@ -126,7 +68,7 @@ png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
  */
 #ifdef PNG_WRITE_FLUSH_SUPPORTED
 void /* PRIVATE */
-png_flush(png_structp png_ptr)
+png_flush(png_structrp png_ptr)
 {
    if (png_ptr->output_flush_fn != NULL)
       (*(png_ptr->output_flush_fn))(png_ptr);
@@ -141,7 +83,7 @@ png_default_flush(png_structp png_ptr)
    if (png_ptr == NULL)
       return;
 
-   io_ptr = (png_FILE_p)CVT_PTR((png_ptr->io_ptr));
+   io_ptr = png_voidcast(png_FILE_p, (png_ptr->io_ptr));
    fflush(io_ptr);
 }
 #  endif
@@ -177,7 +119,7 @@ png_default_flush(png_structp png_ptr)
  *                 *FILE structure.
  */
 void PNGAPI
-png_set_write_fn(png_structp png_ptr, png_voidp io_ptr,
+png_set_write_fn(png_structrp png_ptr, png_voidp io_ptr,
     png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)
 {
    if (png_ptr == NULL)
@@ -207,8 +149,11 @@ png_set_write_fn(png_structp png_ptr, png_voidp io_ptr,
 #  else
    png_ptr->output_flush_fn = output_flush_fn;
 #  endif
-#endif /* PNG_WRITE_FLUSH_SUPPORTED */
+#else
+   PNG_UNUSED(output_flush_fn)
+#endif /* WRITE_FLUSH */
 
+#ifdef PNG_READ_SUPPORTED
    /* It is an error to read while writing a png file */
    if (png_ptr->read_data_fn != NULL)
    {
@@ -218,37 +163,6 @@ png_set_write_fn(png_structp png_ptr, png_voidp io_ptr,
           "Can't set both read_data_fn and write_data_fn in the"
           " same structure");
    }
-}
-
-#ifdef USE_FAR_KEYWORD
-#  ifdef _MSC_VER
-void *png_far_to_near(png_structp png_ptr, png_voidp ptr, int check)
-{
-   void *near_ptr;
-   void FAR *far_ptr;
-   FP_OFF(near_ptr) = FP_OFF(ptr);
-   far_ptr = (void FAR *)near_ptr;
-
-   if (check != 0)
-      if (FP_SEG(ptr) != FP_SEG(far_ptr))
-         png_error(png_ptr, "segment lost in conversion");
-
-   return(near_ptr);
-}
-#  else
-void *png_far_to_near(png_structp png_ptr, png_voidp ptr, int check)
-{
-   void *near_ptr;
-   void FAR *far_ptr;
-   near_ptr = (void FAR *)ptr;
-   far_ptr = (void FAR *)near_ptr;
-
-   if (check != 0)
-      if (far_ptr != ptr)
-         png_error(png_ptr, "segment lost in conversion");
-
-   return(near_ptr);
-}
-#  endif
 #endif
-#endif /* PNG_WRITE_SUPPORTED */
+}
+#endif /* WRITE */
diff --git a/reg-io/png/lpng/pngwrite.c b/reg-io/png/lpng/pngwrite.c
new file mode 100644
index 00000000..780c7901
--- /dev/null
+++ b/reg-io/png/lpng/pngwrite.c
@@ -0,0 +1,2418 @@
+
+/* pngwrite.c - general routines to write a PNG file
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "pngpriv.h"
+#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
+#  include <errno.h>
+#endif /* SIMPLIFIED_WRITE_STDIO */
+
+#ifdef PNG_WRITE_SUPPORTED
+
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+/* Write out all the unknown chunks for the current given location */
+static void
+write_unknown_chunks(png_structrp png_ptr, png_const_inforp info_ptr,
+    unsigned int where)
+{
+   if (info_ptr->unknown_chunks_num != 0)
+   {
+      png_const_unknown_chunkp up;
+
+      png_debug(5, "writing extra chunks");
+
+      for (up = info_ptr->unknown_chunks;
+           up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+           ++up)
+         if ((up->location & where) != 0)
+      {
+         /* If per-chunk unknown chunk handling is enabled use it, otherwise
+          * just write the chunks the application has set.
+          */
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+         int keep = png_handle_as_unknown(png_ptr, up->name);
+
+         /* NOTE: this code is radically different from the read side in the
+          * matter of handling an ancillary unknown chunk.  In the read side
+          * the default behavior is to discard it, in the code below the default
+          * behavior is to write it.  Critical chunks are, however, only
+          * written if explicitly listed or if the default is set to write all
+          * unknown chunks.
+          *
+          * The default handling is also slightly weird - it is not possible to
+          * stop the writing of all unsafe-to-copy chunks!
+          *
+          * TODO: REVIEW: this would seem to be a bug.
+          */
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+             ((up->name[3] & 0x20) /* safe-to-copy overrides everything */ ||
+              keep == PNG_HANDLE_CHUNK_ALWAYS ||
+              (keep == PNG_HANDLE_CHUNK_AS_DEFAULT &&
+               png_ptr->unknown_default == PNG_HANDLE_CHUNK_ALWAYS)))
+#endif
+         {
+            /* TODO: review, what is wrong with a zero length unknown chunk? */
+            if (up->size == 0)
+               png_warning(png_ptr, "Writing zero-length unknown chunk");
+
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+      }
+   }
+}
+#endif /* WRITE_UNKNOWN_CHUNKS */
+
+/* Writes all the PNG information.  This is the suggested way to use the
+ * library.  If you have a new chunk to add, make a function to write it,
+ * and put it in the correct location here.  If you want the chunk written
+ * after the image data, put it in png_write_end().  I strongly encourage
+ * you to supply a PNG_INFO_<chunk> flag, and check info_ptr->valid before
+ * writing the chunk, as that will keep the code from breaking if you want
+ * to just write a plain PNG file.  If you have long comments, I suggest
+ * writing them in png_write_end(), and compressing them.
+ */
+void PNGAPI
+png_write_info_before_PLTE(png_structrp png_ptr, png_const_inforp info_ptr)
+{
+   png_debug(1, "in png_write_info_before_PLTE");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0)
+   {
+      /* Write PNG signature */
+      png_write_sig(png_ptr);
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+      if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 && \
+          png_ptr->mng_features_permitted != 0)
+      {
+         png_warning(png_ptr,
+             "MNG features are not allowed in a PNG datastream");
+         png_ptr->mng_features_permitted = 0;
+      }
+#endif
+
+      /* Write IHDR information. */
+      png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height,
+          info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type,
+          info_ptr->filter_type,
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+          info_ptr->interlace_type
+#else
+          0
+#endif
+         );
+
+      /* The rest of these check to see if the valid field has the appropriate
+       * flag set, and if it does, writes the chunk.
+       *
+       * 1.6.0: COLORSPACE support controls the writing of these chunks too, and
+       * the chunks will be written if the WRITE routine is there and
+       * information * is available in the COLORSPACE. (See
+       * png_colorspace_sync_info in png.c for where the valid flags get set.)
+       *
+       * Under certain circumstances the colorspace can be invalidated without
+       * syncing the info_struct 'valid' flags; this happens if libpng detects
+       * an error and calls png_error while the color space is being set, yet
+       * the application continues writing the PNG.  So check the 'invalid'
+       * flag here too.
+       */
+#ifdef PNG_GAMMA_SUPPORTED
+#  ifdef PNG_WRITE_gAMA_SUPPORTED
+      if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
+          (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_gAMA) != 0 &&
+          (info_ptr->valid & PNG_INFO_gAMA) != 0)
+         png_write_gAMA_fixed(png_ptr, info_ptr->colorspace.gamma);
+#  endif
+#endif
+
+#ifdef PNG_COLORSPACE_SUPPORTED
+      /* Write only one of sRGB or an ICC profile.  If a profile was supplied
+       * and it matches one of the known sRGB ones issue a warning.
+       */
+#  ifdef PNG_WRITE_iCCP_SUPPORTED
+         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
+             (info_ptr->valid & PNG_INFO_iCCP) != 0)
+         {
+#    ifdef PNG_WRITE_sRGB_SUPPORTED
+               if ((info_ptr->valid & PNG_INFO_sRGB) != 0)
+                  png_app_warning(png_ptr,
+                      "profile matches sRGB but writing iCCP instead");
+#     endif
+
+            png_write_iCCP(png_ptr, info_ptr->iccp_name,
+                info_ptr->iccp_profile);
+         }
+#     ifdef PNG_WRITE_sRGB_SUPPORTED
+         else
+#     endif
+#  endif
+
+#  ifdef PNG_WRITE_sRGB_SUPPORTED
+         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
+             (info_ptr->valid & PNG_INFO_sRGB) != 0)
+            png_write_sRGB(png_ptr, info_ptr->colorspace.rendering_intent);
+#  endif /* WRITE_sRGB */
+#endif /* COLORSPACE */
+
+#ifdef PNG_WRITE_sBIT_SUPPORTED
+         if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
+            png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
+#endif
+
+#ifdef PNG_COLORSPACE_SUPPORTED
+#  ifdef PNG_WRITE_cHRM_SUPPORTED
+         if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 &&
+             (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0 &&
+             (info_ptr->valid & PNG_INFO_cHRM) != 0)
+            png_write_cHRM_fixed(png_ptr, &info_ptr->colorspace.end_points_xy);
+#  endif
+#endif
+
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+         write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_IHDR);
+#endif
+
+      png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
+   }
+}
+
+void PNGAPI
+png_write_info(png_structrp png_ptr, png_const_inforp info_ptr)
+{
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+   int i;
+#endif
+
+   png_debug(1, "in png_write_info");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_write_info_before_PLTE(png_ptr, info_ptr);
+
+   if ((info_ptr->valid & PNG_INFO_PLTE) != 0)
+      png_write_PLTE(png_ptr, info_ptr->palette,
+          (png_uint_32)info_ptr->num_palette);
+
+   else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      png_error(png_ptr, "Valid palette required for paletted images");
+
+#ifdef PNG_WRITE_tRNS_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_tRNS) !=0)
+   {
+#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
+      /* Invert the alpha channel (in tRNS) */
+      if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0 &&
+          info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         int j, jend;
+
+         jend = info_ptr->num_trans;
+         if (jend > PNG_MAX_PALETTE_LENGTH)
+            jend = PNG_MAX_PALETTE_LENGTH;
+
+         for (j = 0; j<jend; ++j)
+            info_ptr->trans_alpha[j] =
+               (png_byte)(255 - info_ptr->trans_alpha[j]);
+      }
+#endif
+      png_write_tRNS(png_ptr, info_ptr->trans_alpha, &(info_ptr->trans_color),
+          info_ptr->num_trans, info_ptr->color_type);
+   }
+#endif
+#ifdef PNG_WRITE_bKGD_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_bKGD) != 0)
+      png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type);
+#endif
+
+#ifdef PNG_WRITE_eXIf_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_eXIf) != 0)
+   {
+      png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif);
+      png_ptr->mode |= PNG_WROTE_eXIf;
+   }
+#endif
+
+#ifdef PNG_WRITE_hIST_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_hIST) != 0)
+      png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette);
+#endif
+
+#ifdef PNG_WRITE_oFFs_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_oFFs) != 0)
+      png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset,
+          info_ptr->offset_unit_type);
+#endif
+
+#ifdef PNG_WRITE_pCAL_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_pCAL) != 0)
+      png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0,
+          info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams,
+          info_ptr->pcal_units, info_ptr->pcal_params);
+#endif
+
+#ifdef PNG_WRITE_sCAL_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_sCAL) != 0)
+      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_s_width, info_ptr->scal_s_height);
+#endif /* sCAL */
+
+#ifdef PNG_WRITE_pHYs_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_pHYs) != 0)
+      png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit,
+          info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type);
+#endif /* pHYs */
+
+#ifdef PNG_WRITE_tIME_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_tIME) != 0)
+   {
+      png_write_tIME(png_ptr, &(info_ptr->mod_time));
+      png_ptr->mode |= PNG_WROTE_tIME;
+   }
+#endif /* tIME */
+
+#ifdef PNG_WRITE_sPLT_SUPPORTED
+   if ((info_ptr->valid & PNG_INFO_sPLT) != 0)
+      for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+         png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
+#endif /* sPLT */
+
+#ifdef PNG_WRITE_TEXT_SUPPORTED
+   /* Check to see if we need to write text chunks */
+   for (i = 0; i < info_ptr->num_text; i++)
+   {
+      png_debug2(2, "Writing header text chunk %d, type %d", i,
+          info_ptr->text[i].compression);
+      /* An internationalized chunk? */
+      if (info_ptr->text[i].compression > 0)
+      {
+#ifdef PNG_WRITE_iTXt_SUPPORTED
+         /* Write international chunk */
+         png_write_iTXt(png_ptr,
+             info_ptr->text[i].compression,
+             info_ptr->text[i].key,
+             info_ptr->text[i].lang,
+             info_ptr->text[i].lang_key,
+             info_ptr->text[i].text);
+         /* Mark this chunk as written */
+         if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         else
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+#else
+         png_warning(png_ptr, "Unable to write international text");
+#endif
+      }
+
+      /* If we want a compressed text chunk */
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
+      {
+#ifdef PNG_WRITE_zTXt_SUPPORTED
+         /* Write compressed chunk */
+         png_write_zTXt(png_ptr, info_ptr->text[i].key,
+             info_ptr->text[i].text, info_ptr->text[i].compression);
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+#else
+         png_warning(png_ptr, "Unable to write compressed text");
+#endif
+      }
+
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+      {
+#ifdef PNG_WRITE_tEXt_SUPPORTED
+         /* Write uncompressed chunk */
+         png_write_tEXt(png_ptr, info_ptr->text[i].key,
+             info_ptr->text[i].text,
+             0);
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+#else
+         /* Can't get here */
+         png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+      }
+   }
+#endif /* tEXt */
+
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+   write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_PLTE);
+#endif
+}
+
+/* Writes the end of the PNG file.  If you don't want to write comments or
+ * time information, you can pass NULL for info.  If you already wrote these
+ * in png_write_info(), do not write them again here.  If you have long
+ * comments, I suggest writing them here, and compressing them.
+ */
+void PNGAPI
+png_write_end(png_structrp png_ptr, png_inforp info_ptr)
+{
+   png_debug(1, "in png_write_end");
+
+   if (png_ptr == NULL)
+      return;
+
+   if ((png_ptr->mode & PNG_HAVE_IDAT) == 0)
+      png_error(png_ptr, "No IDATs written into file");
+
+#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+       png_ptr->num_palette_max >= png_ptr->num_palette)
+      png_benign_error(png_ptr, "Wrote palette index exceeding num_palette");
+#endif
+
+   /* See if user wants us to write information chunks */
+   if (info_ptr != NULL)
+   {
+#ifdef PNG_WRITE_TEXT_SUPPORTED
+      int i; /* local index variable */
+#endif
+#ifdef PNG_WRITE_tIME_SUPPORTED
+      /* Check to see if user has supplied a time chunk */
+      if ((info_ptr->valid & PNG_INFO_tIME) != 0 &&
+          (png_ptr->mode & PNG_WROTE_tIME) == 0)
+         png_write_tIME(png_ptr, &(info_ptr->mod_time));
+
+#endif
+#ifdef PNG_WRITE_TEXT_SUPPORTED
+      /* Loop through comment chunks */
+      for (i = 0; i < info_ptr->num_text; i++)
+      {
+         png_debug2(2, "Writing trailer text chunk %d, type %d", i,
+             info_ptr->text[i].compression);
+         /* An internationalized chunk? */
+         if (info_ptr->text[i].compression > 0)
+         {
+#ifdef PNG_WRITE_iTXt_SUPPORTED
+            /* Write international chunk */
+            png_write_iTXt(png_ptr,
+                info_ptr->text[i].compression,
+                info_ptr->text[i].key,
+                info_ptr->text[i].lang,
+                info_ptr->text[i].lang_key,
+                info_ptr->text[i].text);
+            /* Mark this chunk as written */
+            if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+               info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+            else
+               info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+#else
+            png_warning(png_ptr, "Unable to write international text");
+#endif
+         }
+
+         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
+         {
+#ifdef PNG_WRITE_zTXt_SUPPORTED
+            /* Write compressed chunk */
+            png_write_zTXt(png_ptr, info_ptr->text[i].key,
+                info_ptr->text[i].text, info_ptr->text[i].compression);
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+#else
+            png_warning(png_ptr, "Unable to write compressed text");
+#endif
+         }
+
+         else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+         {
+#ifdef PNG_WRITE_tEXt_SUPPORTED
+            /* Write uncompressed chunk */
+            png_write_tEXt(png_ptr, info_ptr->text[i].key,
+                info_ptr->text[i].text, 0);
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+#else
+            png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+         }
+      }
+#endif
+
+#ifdef PNG_WRITE_eXIf_SUPPORTED
+      if ((info_ptr->valid & PNG_INFO_eXIf) != 0 &&
+          (png_ptr->mode & PNG_WROTE_eXIf) == 0)
+         png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif);
+#endif
+
+#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+      write_unknown_chunks(png_ptr, info_ptr, PNG_AFTER_IDAT);
+#endif
+   }
+
+   png_ptr->mode |= PNG_AFTER_IDAT;
+
+   /* Write end of PNG file */
+   png_write_IEND(png_ptr);
+
+   /* This flush, added in libpng-1.0.8, removed from libpng-1.0.9beta03,
+    * and restored again in libpng-1.2.30, may cause some applications that
+    * do not set png_ptr->output_flush_fn to crash.  If your application
+    * experiences a problem, please try building libpng with
+    * PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED defined, and report the event to
+    * png-mng-implement at lists.sf.net .
+    */
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+#  ifdef PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED
+   png_flush(png_ptr);
+#  endif
+#endif
+}
+
+#ifdef PNG_CONVERT_tIME_SUPPORTED
+void PNGAPI
+png_convert_from_struct_tm(png_timep ptime, const struct tm * ttime)
+{
+   png_debug(1, "in png_convert_from_struct_tm");
+
+   ptime->year = (png_uint_16)(1900 + ttime->tm_year);
+   ptime->month = (png_byte)(ttime->tm_mon + 1);
+   ptime->day = (png_byte)ttime->tm_mday;
+   ptime->hour = (png_byte)ttime->tm_hour;
+   ptime->minute = (png_byte)ttime->tm_min;
+   ptime->second = (png_byte)ttime->tm_sec;
+}
+
+void PNGAPI
+png_convert_from_time_t(png_timep ptime, time_t ttime)
+{
+   struct tm *tbuf;
+
+   png_debug(1, "in png_convert_from_time_t");
+
+   tbuf = gmtime(&ttime);
+   if (tbuf == NULL)
+   {
+      /* TODO: add a safe function which takes a png_ptr argument and raises
+       * a png_error if the ttime argument is invalid and the call to gmtime
+       * fails as a consequence.
+       */
+      memset(ptime, 0, sizeof(*ptime));
+      return;
+   }
+
+   png_convert_from_struct_tm(ptime, tbuf);
+}
+#endif
+
+/* Initialize png_ptr structure, and allocate any memory needed */
+PNG_FUNCTION(png_structp,PNGAPI
+png_create_write_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
+{
+#ifndef PNG_USER_MEM_SUPPORTED
+   png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
+       error_fn, warn_fn, NULL, NULL, NULL);
+#else
+   return png_create_write_struct_2(user_png_ver, error_ptr, error_fn,
+       warn_fn, NULL, NULL, NULL);
+}
+
+/* Alternate initialize png_ptr structure, and allocate any memory needed */
+PNG_FUNCTION(png_structp,PNGAPI
+png_create_write_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
+    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
+{
+   png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr,
+       error_fn, warn_fn, mem_ptr, malloc_fn, free_fn);
+#endif /* USER_MEM */
+   if (png_ptr != NULL)
+   {
+      /* Set the zlib control values to defaults; they can be overridden by the
+       * application after the struct has been created.
+       */
+      png_ptr->zbuffer_size = PNG_ZBUF_SIZE;
+
+      /* The 'zlib_strategy' setting is irrelevant because png_default_claim in
+       * pngwutil.c defaults it according to whether or not filters will be
+       * used, and ignores this setting.
+       */
+      png_ptr->zlib_strategy = PNG_Z_DEFAULT_STRATEGY;
+      png_ptr->zlib_level = PNG_Z_DEFAULT_COMPRESSION;
+      png_ptr->zlib_mem_level = 8;
+      png_ptr->zlib_window_bits = 15;
+      png_ptr->zlib_method = 8;
+
+#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
+      png_ptr->zlib_text_strategy = PNG_TEXT_Z_DEFAULT_STRATEGY;
+      png_ptr->zlib_text_level = PNG_TEXT_Z_DEFAULT_COMPRESSION;
+      png_ptr->zlib_text_mem_level = 8;
+      png_ptr->zlib_text_window_bits = 15;
+      png_ptr->zlib_text_method = 8;
+#endif /* WRITE_COMPRESSED_TEXT */
+
+      /* This is a highly dubious configuration option; by default it is off,
+       * but it may be appropriate for private builds that are testing
+       * extensions not conformant to the current specification, or of
+       * applications that must not fail to write at all costs!
+       */
+#ifdef PNG_BENIGN_WRITE_ERRORS_SUPPORTED
+      /* In stable builds only warn if an application error can be completely
+       * handled.
+       */
+      png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN;
+#endif
+
+      /* App warnings are warnings in release (or release candidate) builds but
+       * are errors during development.
+       */
+#if PNG_RELEASE_BUILD
+      png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN;
+#endif
+
+      /* TODO: delay this, it can be done in png_init_io() (if the app doesn't
+       * do it itself) avoiding setting the default function if it is not
+       * required.
+       */
+      png_set_write_fn(png_ptr, NULL, NULL, NULL);
+   }
+
+   return png_ptr;
+}
+
+
+/* Write a few rows of image data.  If the image is interlaced,
+ * either you will have to write the 7 sub images, or, if you
+ * have called png_set_interlace_handling(), you will have to
+ * "write" the image seven times.
+ */
+void PNGAPI
+png_write_rows(png_structrp png_ptr, png_bytepp row,
+    png_uint_32 num_rows)
+{
+   png_uint_32 i; /* row counter */
+   png_bytepp rp; /* row pointer */
+
+   png_debug(1, "in png_write_rows");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* Loop through the rows */
+   for (i = 0, rp = row; i < num_rows; i++, rp++)
+   {
+      png_write_row(png_ptr, *rp);
+   }
+}
+
+/* Write the image.  You only need to call this function once, even
+ * if you are writing an interlaced image.
+ */
+void PNGAPI
+png_write_image(png_structrp png_ptr, png_bytepp image)
+{
+   png_uint_32 i; /* row index */
+   int pass, num_pass; /* pass variables */
+   png_bytepp rp; /* points to current row */
+
+   if (png_ptr == NULL)
+      return;
+
+   png_debug(1, "in png_write_image");
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* Initialize interlace handling.  If image is not interlaced,
+    * this will set pass to 1
+    */
+   num_pass = png_set_interlace_handling(png_ptr);
+#else
+   num_pass = 1;
+#endif
+   /* Loop through passes */
+   for (pass = 0; pass < num_pass; pass++)
+   {
+      /* Loop through image */
+      for (i = 0, rp = image; i < png_ptr->height; i++, rp++)
+      {
+         png_write_row(png_ptr, *rp);
+      }
+   }
+}
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+/* Performs intrapixel differencing  */
+static void
+png_do_write_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_intrapixel");
+
+   if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp)     = (png_byte)(*rp       - *(rp + 1));
+            *(rp + 2) = (png_byte)(*(rp + 2) - *(rp + 1));
+         }
+      }
+
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (png_uint_32)(*(rp    ) << 8) | *(rp + 1);
+            png_uint_32 s1   = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3);
+            png_uint_32 s2   = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5);
+            png_uint_32 red  = (png_uint_32)((s0 - s1) & 0xffffL);
+            png_uint_32 blue = (png_uint_32)((s2 - s1) & 0xffffL);
+            *(rp    ) = (png_byte)(red >> 8);
+            *(rp + 1) = (png_byte)red;
+            *(rp + 4) = (png_byte)(blue >> 8);
+            *(rp + 5) = (png_byte)blue;
+         }
+      }
+#endif /* WRITE_16BIT */
+   }
+}
+#endif /* MNG_FEATURES */
+
+/* Called by user to write a row of image data */
+void PNGAPI
+png_write_row(png_structrp png_ptr, png_const_bytep row)
+{
+   /* 1.5.6: moved from png_struct to be a local structure: */
+   png_row_info row_info;
+
+   png_debug2(1, "in png_write_row (row %u, pass %d)",
+       png_ptr->row_number, png_ptr->pass);
+
+   if (png_ptr == NULL)
+      return;
+
+   /* Initialize transformations and other stuff if first time */
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+      /* Make sure we wrote the header info */
+      if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0)
+         png_error(png_ptr,
+             "png_write_info was never called before png_write_row");
+
+      /* Check for transforms that have been set but were defined out */
+#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED)
+      if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
+         png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined");
+#endif
+
+#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED)
+      if ((png_ptr->transformations & PNG_FILLER) != 0)
+         png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined");
+#endif
+#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
+    defined(PNG_READ_PACKSWAP_SUPPORTED)
+      if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+         png_warning(png_ptr,
+             "PNG_WRITE_PACKSWAP_SUPPORTED is not defined");
+#endif
+
+#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED)
+      if ((png_ptr->transformations & PNG_PACK) != 0)
+         png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined");
+#endif
+
+#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED)
+      if ((png_ptr->transformations & PNG_SHIFT) != 0)
+         png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined");
+#endif
+
+#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED)
+      if ((png_ptr->transformations & PNG_BGR) != 0)
+         png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined");
+#endif
+
+#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED)
+      if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
+         png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined");
+#endif
+
+      png_write_start_row(png_ptr);
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* If interlaced and not interested in row, return */
+   if (png_ptr->interlaced != 0 &&
+       (png_ptr->transformations & PNG_INTERLACE) != 0)
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if ((png_ptr->row_number & 0x07) != 0)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 1:
+            if ((png_ptr->row_number & 0x07) != 0 || png_ptr->width < 5)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 3:
+            if ((png_ptr->row_number & 0x03) != 0 || png_ptr->width < 3)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 4:
+            if ((png_ptr->row_number & 0x03) != 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 5:
+            if ((png_ptr->row_number & 0x01) != 0 || png_ptr->width < 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         case 6:
+            if ((png_ptr->row_number & 0x01) == 0)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+
+         default: /* error: ignore it */
+            break;
+      }
+   }
+#endif
+
+   /* Set up row info for transformations */
+   row_info.color_type = png_ptr->color_type;
+   row_info.width = png_ptr->usr_width;
+   row_info.channels = png_ptr->usr_channels;
+   row_info.bit_depth = png_ptr->usr_bit_depth;
+   row_info.pixel_depth = (png_byte)(row_info.bit_depth * row_info.channels);
+   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
+
+   png_debug1(3, "row_info->color_type = %d", row_info.color_type);
+   png_debug1(3, "row_info->width = %u", row_info.width);
+   png_debug1(3, "row_info->channels = %d", row_info.channels);
+   png_debug1(3, "row_info->bit_depth = %d", row_info.bit_depth);
+   png_debug1(3, "row_info->pixel_depth = %d", row_info.pixel_depth);
+   png_debug1(3, "row_info->rowbytes = %lu", (unsigned long)row_info.rowbytes);
+
+   /* Copy user's row into buffer, leaving room for filter byte. */
+   memcpy(png_ptr->row_buf + 1, row, row_info.rowbytes);
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* Handle interlacing */
+   if (png_ptr->interlaced && png_ptr->pass < 6 &&
+       (png_ptr->transformations & PNG_INTERLACE) != 0)
+   {
+      png_do_write_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass);
+      /* This should always get caught above, but still ... */
+      if (row_info.width == 0)
+      {
+         png_write_finish_row(png_ptr);
+         return;
+      }
+   }
+#endif
+
+#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
+   /* Handle other transformations */
+   if (png_ptr->transformations != 0)
+      png_do_write_transformations(png_ptr, &row_info);
+#endif
+
+   /* At this point the row_info pixel depth must match the 'transformed' depth,
+    * which is also the output depth.
+    */
+   if (row_info.pixel_depth != png_ptr->pixel_depth ||
+       row_info.pixel_depth != png_ptr->transformed_pixel_depth)
+      png_error(png_ptr, "internal write transform logic error");
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
+       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_write_intrapixel(&row_info, png_ptr->row_buf + 1);
+   }
+#endif
+
+/* Added at libpng-1.5.10 */
+#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
+   /* Check for out-of-range palette index */
+   if (row_info.color_type == PNG_COLOR_TYPE_PALETTE &&
+       png_ptr->num_palette_max >= 0)
+      png_do_check_palette_indexes(png_ptr, &row_info);
+#endif
+
+   /* Find a filter if necessary, filter the row and write it out. */
+   png_write_find_filter(png_ptr, &row_info);
+
+   if (png_ptr->write_row_fn != NULL)
+      (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+}
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+/* Set the automatic flush interval or 0 to turn flushing off */
+void PNGAPI
+png_set_flush(png_structrp png_ptr, int nrows)
+{
+   png_debug(1, "in png_set_flush");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->flush_dist = (nrows < 0 ? 0 : (png_uint_32)nrows);
+}
+
+/* Flush the current output buffers now */
+void PNGAPI
+png_write_flush(png_structrp png_ptr)
+{
+   png_debug(1, "in png_write_flush");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* We have already written out all of the data */
+   if (png_ptr->row_number >= png_ptr->num_rows)
+      return;
+
+   png_compress_IDAT(png_ptr, NULL, 0, Z_SYNC_FLUSH);
+   png_ptr->flush_rows = 0;
+   png_flush(png_ptr);
+}
+#endif /* WRITE_FLUSH */
+
+/* Free any memory used in png_ptr struct without freeing the struct itself. */
+static void
+png_write_destroy(png_structrp png_ptr)
+{
+   png_debug(1, "in png_write_destroy");
+
+   /* Free any memory zlib uses */
+   if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
+      deflateEnd(&png_ptr->zstream);
+
+   /* Free our memory.  png_free checks NULL for us. */
+   png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list);
+   png_free(png_ptr, png_ptr->row_buf);
+   png_ptr->row_buf = NULL;
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   png_free(png_ptr, png_ptr->prev_row);
+   png_free(png_ptr, png_ptr->try_row);
+   png_free(png_ptr, png_ptr->tst_row);
+   png_ptr->prev_row = NULL;
+   png_ptr->try_row = NULL;
+   png_ptr->tst_row = NULL;
+#endif
+
+#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED
+   png_free(png_ptr, png_ptr->chunk_list);
+   png_ptr->chunk_list = NULL;
+#endif
+
+   /* The error handling and memory handling information is left intact at this
+    * point: the jmp_buf may still have to be freed.  See png_destroy_png_struct
+    * for how this happens.
+    */
+}
+
+/* Free all memory used by the write.
+ * In libpng 1.6.0 this API changed quietly to no longer accept a NULL value for
+ * *png_ptr_ptr.  Prior to 1.6.0 it would accept such a value and it would free
+ * the passed in info_structs but it would quietly fail to free any of the data
+ * inside them.  In 1.6.0 it quietly does nothing (it has to be quiet because it
+ * has no png_ptr.)
+ */
+void PNGAPI
+png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
+{
+   png_debug(1, "in png_destroy_write_struct");
+
+   if (png_ptr_ptr != NULL)
+   {
+      png_structrp png_ptr = *png_ptr_ptr;
+
+      if (png_ptr != NULL) /* added in libpng 1.6.0 */
+      {
+         png_destroy_info_struct(png_ptr, info_ptr_ptr);
+
+         *png_ptr_ptr = NULL;
+         png_write_destroy(png_ptr);
+         png_destroy_png_struct(png_ptr);
+      }
+   }
+}
+
+/* Allow the application to select one or more row filters to use. */
+void PNGAPI
+png_set_filter(png_structrp png_ptr, int method, int filters)
+{
+   png_debug(1, "in png_set_filter");
+
+   if (png_ptr == NULL)
+      return;
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
+       (method == PNG_INTRAPIXEL_DIFFERENCING))
+      method = PNG_FILTER_TYPE_BASE;
+
+#endif
+   if (method == PNG_FILTER_TYPE_BASE)
+   {
+      switch (filters & (PNG_ALL_FILTERS | 0x07))
+      {
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+         case 5:
+         case 6:
+         case 7: png_app_error(png_ptr, "Unknown row filter for method 0");
+#endif /* WRITE_FILTER */
+            /* FALLTHROUGH */
+         case PNG_FILTER_VALUE_NONE:
+            png_ptr->do_filter = PNG_FILTER_NONE; break;
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+         case PNG_FILTER_VALUE_SUB:
+            png_ptr->do_filter = PNG_FILTER_SUB; break;
+
+         case PNG_FILTER_VALUE_UP:
+            png_ptr->do_filter = PNG_FILTER_UP; break;
+
+         case PNG_FILTER_VALUE_AVG:
+            png_ptr->do_filter = PNG_FILTER_AVG; break;
+
+         case PNG_FILTER_VALUE_PAETH:
+            png_ptr->do_filter = PNG_FILTER_PAETH; break;
+
+         default:
+            png_ptr->do_filter = (png_byte)filters; break;
+#else
+         default:
+            png_app_error(png_ptr, "Unknown row filter for method 0");
+#endif /* WRITE_FILTER */
+      }
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+      /* If we have allocated the row_buf, this means we have already started
+       * with the image and we should have allocated all of the filter buffers
+       * that have been selected.  If prev_row isn't already allocated, then
+       * it is too late to start using the filters that need it, since we
+       * will be missing the data in the previous row.  If an application
+       * wants to start and stop using particular filters during compression,
+       * it should start out with all of the filters, and then remove them
+       * or add them back after the start of compression.
+       *
+       * NOTE: this is a nasty constraint on the code, because it means that the
+       * prev_row buffer must be maintained even if there are currently no
+       * 'prev_row' requiring filters active.
+       */
+      if (png_ptr->row_buf != NULL)
+      {
+         int num_filters;
+         png_alloc_size_t buf_size;
+
+         /* Repeat the checks in png_write_start_row; 1 pixel high or wide
+          * images cannot benefit from certain filters.  If this isn't done here
+          * the check below will fire on 1 pixel high images.
+          */
+         if (png_ptr->height == 1)
+            filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
+
+         if (png_ptr->width == 1)
+            filters &= ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH);
+
+         if ((filters & (PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH)) != 0
+            && png_ptr->prev_row == NULL)
+         {
+            /* This is the error case, however it is benign - the previous row
+             * is not available so the filter can't be used.  Just warn here.
+             */
+            png_app_warning(png_ptr,
+                "png_set_filter: UP/AVG/PAETH cannot be added after start");
+            filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
+         }
+
+         num_filters = 0;
+
+         if (filters & PNG_FILTER_SUB)
+            num_filters++;
+
+         if (filters & PNG_FILTER_UP)
+            num_filters++;
+
+         if (filters & PNG_FILTER_AVG)
+            num_filters++;
+
+         if (filters & PNG_FILTER_PAETH)
+            num_filters++;
+
+         /* Allocate needed row buffers if they have not already been
+          * allocated.
+          */
+         buf_size = PNG_ROWBYTES(png_ptr->usr_channels * png_ptr->usr_bit_depth,
+             png_ptr->width) + 1;
+
+         if (png_ptr->try_row == NULL)
+            png_ptr->try_row = png_voidcast(png_bytep,
+                png_malloc(png_ptr, buf_size));
+
+         if (num_filters > 1)
+         {
+            if (png_ptr->tst_row == NULL)
+               png_ptr->tst_row = png_voidcast(png_bytep,
+                   png_malloc(png_ptr, buf_size));
+         }
+      }
+      png_ptr->do_filter = (png_byte)filters;
+#endif
+   }
+   else
+      png_error(png_ptr, "Unknown custom filter method");
+}
+
+#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */
+/* Provide floating and fixed point APIs */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_filter_heuristics(png_structrp png_ptr, int heuristic_method,
+    int num_weights, png_const_doublep filter_weights,
+    png_const_doublep filter_costs)
+{
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(heuristic_method)
+   PNG_UNUSED(num_weights)
+   PNG_UNUSED(filter_weights)
+   PNG_UNUSED(filter_costs)
+}
+#endif /* FLOATING_POINT */
+
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_filter_heuristics_fixed(png_structrp png_ptr, int heuristic_method,
+    int num_weights, png_const_fixed_point_p filter_weights,
+    png_const_fixed_point_p filter_costs)
+{
+   PNG_UNUSED(png_ptr)
+   PNG_UNUSED(heuristic_method)
+   PNG_UNUSED(num_weights)
+   PNG_UNUSED(filter_weights)
+   PNG_UNUSED(filter_costs)
+}
+#endif /* FIXED_POINT */
+#endif /* WRITE_WEIGHTED_FILTER */
+
+#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
+void PNGAPI
+png_set_compression_level(png_structrp png_ptr, int level)
+{
+   png_debug(1, "in png_set_compression_level");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->zlib_level = level;
+}
+
+void PNGAPI
+png_set_compression_mem_level(png_structrp png_ptr, int mem_level)
+{
+   png_debug(1, "in png_set_compression_mem_level");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->zlib_mem_level = mem_level;
+}
+
+void PNGAPI
+png_set_compression_strategy(png_structrp png_ptr, int strategy)
+{
+   png_debug(1, "in png_set_compression_strategy");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* The flag setting here prevents the libpng dynamic selection of strategy.
+    */
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY;
+   png_ptr->zlib_strategy = strategy;
+}
+
+/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
+ * smaller value of window_bits if it can do so safely.
+ */
+void PNGAPI
+png_set_compression_window_bits(png_structrp png_ptr, int window_bits)
+{
+   png_debug(1, "in png_set_compression_window_bits");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* Prior to 1.6.0 this would warn but then set the window_bits value. This
+    * meant that negative window bits values could be selected that would cause
+    * libpng to write a non-standard PNG file with raw deflate or gzip
+    * compressed IDAT or ancillary chunks.  Such files can be read and there is
+    * no warning on read, so this seems like a very bad idea.
+    */
+   if (window_bits > 15)
+   {
+      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
+      window_bits = 15;
+   }
+
+   else if (window_bits < 8)
+   {
+      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
+      window_bits = 8;
+   }
+
+   png_ptr->zlib_window_bits = window_bits;
+}
+
+void PNGAPI
+png_set_compression_method(png_structrp png_ptr, int method)
+{
+   png_debug(1, "in png_set_compression_method");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* This would produce an invalid PNG file if it worked, but it doesn't and
+    * deflate will fault it, so it is harmless to just warn here.
+    */
+   if (method != 8)
+      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
+
+   png_ptr->zlib_method = method;
+}
+#endif /* WRITE_CUSTOMIZE_COMPRESSION */
+
+/* The following were added to libpng-1.5.4 */
+#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
+void PNGAPI
+png_set_text_compression_level(png_structrp png_ptr, int level)
+{
+   png_debug(1, "in png_set_text_compression_level");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->zlib_text_level = level;
+}
+
+void PNGAPI
+png_set_text_compression_mem_level(png_structrp png_ptr, int mem_level)
+{
+   png_debug(1, "in png_set_text_compression_mem_level");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->zlib_text_mem_level = mem_level;
+}
+
+void PNGAPI
+png_set_text_compression_strategy(png_structrp png_ptr, int strategy)
+{
+   png_debug(1, "in png_set_text_compression_strategy");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->zlib_text_strategy = strategy;
+}
+
+/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
+ * smaller value of window_bits if it can do so safely.
+ */
+void PNGAPI
+png_set_text_compression_window_bits(png_structrp png_ptr, int window_bits)
+{
+   png_debug(1, "in png_set_text_compression_window_bits");
+
+   if (png_ptr == NULL)
+      return;
+
+   if (window_bits > 15)
+   {
+      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
+      window_bits = 15;
+   }
+
+   else if (window_bits < 8)
+   {
+      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
+      window_bits = 8;
+   }
+
+   png_ptr->zlib_text_window_bits = window_bits;
+}
+
+void PNGAPI
+png_set_text_compression_method(png_structrp png_ptr, int method)
+{
+   png_debug(1, "in png_set_text_compression_method");
+
+   if (png_ptr == NULL)
+      return;
+
+   if (method != 8)
+      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
+
+   png_ptr->zlib_text_method = method;
+}
+#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */
+/* end of API added to libpng-1.5.4 */
+
+void PNGAPI
+png_set_write_status_fn(png_structrp png_ptr, png_write_status_ptr write_row_fn)
+{
+   png_debug(1, "in png_set_write_status_fn");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->write_row_fn = write_row_fn;
+}
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+void PNGAPI
+png_set_write_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr
+    write_user_transform_fn)
+{
+   png_debug(1, "in png_set_write_user_transform_fn");
+
+   if (png_ptr == NULL)
+      return;
+
+   png_ptr->transformations |= PNG_USER_TRANSFORM;
+   png_ptr->write_user_transform_fn = write_user_transform_fn;
+}
+#endif
+
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+void PNGAPI
+png_write_png(png_structrp png_ptr, png_inforp info_ptr,
+    int transforms, voidp params)
+{
+   png_debug(1, "in png_write_png");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if ((info_ptr->valid & PNG_INFO_IDAT) == 0)
+   {
+      png_app_error(png_ptr, "no rows for png_write_image to write");
+      return;
+   }
+
+   /* Write the file header information. */
+   png_write_info(png_ptr, info_ptr);
+
+   /* ------ these transformations don't touch the info structure ------- */
+
+   /* Invert monochrome pixels */
+   if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0)
+#ifdef PNG_WRITE_INVERT_SUPPORTED
+      png_set_invert_mono(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported");
+#endif
+
+   /* Shift the pixels up to a legal bit depth and fill in
+    * as appropriate to correctly scale the image.
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT) != 0)
+#ifdef PNG_WRITE_SHIFT_SUPPORTED
+      if ((info_ptr->valid & PNG_INFO_sBIT) != 0)
+         png_set_shift(png_ptr, &info_ptr->sig_bit);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported");
+#endif
+
+   /* Pack pixels into bytes */
+   if ((transforms & PNG_TRANSFORM_PACKING) != 0)
+#ifdef PNG_WRITE_PACK_SUPPORTED
+      png_set_packing(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported");
+#endif
+
+   /* Swap location of alpha bytes from ARGB to RGBA */
+   if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0)
+#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
+      png_set_swap_alpha(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported");
+#endif
+
+   /* Remove a filler (X) from XRGB/RGBX/AG/GA into to convert it into
+    * RGB, note that the code expects the input color type to be G or RGB; no
+    * alpha channel.
+    */
+   if ((transforms & (PNG_TRANSFORM_STRIP_FILLER_AFTER|
+       PNG_TRANSFORM_STRIP_FILLER_BEFORE)) != 0)
+   {
+#ifdef PNG_WRITE_FILLER_SUPPORTED
+      if ((transforms & PNG_TRANSFORM_STRIP_FILLER_AFTER) != 0)
+      {
+         if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0)
+            png_app_error(png_ptr,
+                "PNG_TRANSFORM_STRIP_FILLER: BEFORE+AFTER not supported");
+
+         /* Continue if ignored - this is the pre-1.6.10 behavior */
+         png_set_filler(png_ptr, 0, PNG_FILLER_AFTER);
+      }
+
+      else if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0)
+         png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_FILLER not supported");
+#endif
+   }
+
+   /* Flip BGR pixels to RGB */
+   if ((transforms & PNG_TRANSFORM_BGR) != 0)
+#ifdef PNG_WRITE_BGR_SUPPORTED
+      png_set_bgr(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported");
+#endif
+
+   /* Swap bytes of 16-bit files to most significant byte first */
+   if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0)
+#ifdef PNG_WRITE_SWAP_SUPPORTED
+      png_set_swap(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported");
+#endif
+
+   /* Swap bits of 1-bit, 2-bit, 4-bit packed pixel formats */
+   if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0)
+#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
+      png_set_packswap(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported");
+#endif
+
+   /* Invert the alpha channel from opacity to transparency */
+   if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0)
+#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
+      png_set_invert_alpha(png_ptr);
+#else
+      png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported");
+#endif
+
+   /* ----------------------- end of transformations ------------------- */
+
+   /* Write the bits */
+   png_write_image(png_ptr, info_ptr->row_pointers);
+
+   /* It is REQUIRED to call this to finish writing the rest of the file */
+   png_write_end(png_ptr, info_ptr);
+
+   PNG_UNUSED(params)
+}
+#endif
+
+
+#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED
+/* Initialize the write structure - general purpose utility. */
+static int
+png_image_write_init(png_imagep image)
+{
+   png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, image,
+       png_safe_error, png_safe_warning);
+
+   if (png_ptr != NULL)
+   {
+      png_infop info_ptr = png_create_info_struct(png_ptr);
+
+      if (info_ptr != NULL)
+      {
+         png_controlp control = png_voidcast(png_controlp,
+             png_malloc_warn(png_ptr, (sizeof *control)));
+
+         if (control != NULL)
+         {
+            memset(control, 0, (sizeof *control));
+
+            control->png_ptr = png_ptr;
+            control->info_ptr = info_ptr;
+            control->for_write = 1;
+
+            image->opaque = control;
+            return 1;
+         }
+
+         /* Error clean up */
+         png_destroy_info_struct(png_ptr, &info_ptr);
+      }
+
+      png_destroy_write_struct(&png_ptr, NULL);
+   }
+
+   return png_image_error(image, "png_image_write_: out of memory");
+}
+
+/* Arguments to png_image_write_main: */
+typedef struct
+{
+   /* Arguments: */
+   png_imagep      image;
+   png_const_voidp buffer;
+   png_int_32      row_stride;
+   png_const_voidp colormap;
+   int             convert_to_8bit;
+   /* Local variables: */
+   png_const_voidp first_row;
+   ptrdiff_t       row_bytes;
+   png_voidp       local_row;
+   /* Byte count for memory writing */
+   png_bytep        memory;
+   png_alloc_size_t memory_bytes; /* not used for STDIO */
+   png_alloc_size_t output_bytes; /* running total */
+} png_image_write_control;
+
+/* Write png_uint_16 input to a 16-bit PNG; the png_ptr has already been set to
+ * do any necessary byte swapping.  The component order is defined by the
+ * png_image format value.
+ */
+static int
+png_write_image_16bit(png_voidp argument)
+{
+   png_image_write_control *display = png_voidcast(png_image_write_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+
+   png_const_uint_16p input_row = png_voidcast(png_const_uint_16p,
+       display->first_row);
+   png_uint_16p output_row = png_voidcast(png_uint_16p, display->local_row);
+   png_uint_16p row_end;
+   unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ?
+       3 : 1;
+   int aindex = 0;
+   png_uint_32 y = image->height;
+
+   if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
+   {
+#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
+      if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
+      {
+         aindex = -1;
+         ++input_row; /* To point to the first component */
+         ++output_row;
+      }
+         else
+            aindex = (int)channels;
+#     else
+         aindex = (int)channels;
+#     endif
+   }
+
+   else
+      png_error(png_ptr, "png_write_image: internal call error");
+
+   /* Work out the output row end and count over this, note that the increment
+    * above to 'row' means that row_end can actually be beyond the end of the
+    * row; this is correct.
+    */
+   row_end = output_row + image->width * (channels+1);
+
+   for (; y > 0; --y)
+   {
+      png_const_uint_16p in_ptr = input_row;
+      png_uint_16p out_ptr = output_row;
+
+      while (out_ptr < row_end)
+      {
+         png_uint_16 alpha = in_ptr[aindex];
+         png_uint_32 reciprocal = 0;
+         int c;
+
+         out_ptr[aindex] = alpha;
+
+         /* Calculate a reciprocal.  The correct calculation is simply
+          * component/alpha*65535 << 15. (I.e. 15 bits of precision); this
+          * allows correct rounding by adding .5 before the shift.  'reciprocal'
+          * is only initialized when required.
+          */
+         if (alpha > 0 && alpha < 65535)
+            reciprocal = ((0xffff<<15)+(alpha>>1))/alpha;
+
+         c = (int)channels;
+         do /* always at least one channel */
+         {
+            png_uint_16 component = *in_ptr++;
+
+            /* The following gives 65535 for an alpha of 0, which is fine,
+             * otherwise if 0/0 is represented as some other value there is more
+             * likely to be a discontinuity which will probably damage
+             * compression when moving from a fully transparent area to a
+             * nearly transparent one.  (The assumption here is that opaque
+             * areas tend not to be 0 intensity.)
+             */
+            if (component >= alpha)
+               component = 65535;
+
+            /* component<alpha, so component/alpha is less than one and
+             * component*reciprocal is less than 2^31.
+             */
+            else if (component > 0 && alpha < 65535)
+            {
+               png_uint_32 calc = component * reciprocal;
+               calc += 16384; /* round to nearest */
+               component = (png_uint_16)(calc >> 15);
+            }
+
+            *out_ptr++ = component;
+         }
+         while (--c > 0);
+
+         /* Skip to next component (skip the intervening alpha channel) */
+         ++in_ptr;
+         ++out_ptr;
+      }
+
+      png_write_row(png_ptr, png_voidcast(png_const_bytep, display->local_row));
+      input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
+   }
+
+   return 1;
+}
+
+/* Given 16-bit input (1 to 4 channels) write 8-bit output.  If an alpha channel
+ * is present it must be removed from the components, the components are then
+ * written in sRGB encoding.  No components are added or removed.
+ *
+ * Calculate an alpha reciprocal to reverse pre-multiplication.  As above the
+ * calculation can be done to 15 bits of accuracy; however, the output needs to
+ * be scaled in the range 0..255*65535, so include that scaling here.
+ */
+#   define UNP_RECIPROCAL(alpha) ((((0xffff*0xff)<<7)+((alpha)>>1))/(alpha))
+
+static png_byte
+png_unpremultiply(png_uint_32 component, png_uint_32 alpha,
+    png_uint_32 reciprocal/*from the above macro*/)
+{
+   /* The following gives 1.0 for an alpha of 0, which is fine, otherwise if 0/0
+    * is represented as some other value there is more likely to be a
+    * discontinuity which will probably damage compression when moving from a
+    * fully transparent area to a nearly transparent one.  (The assumption here
+    * is that opaque areas tend not to be 0 intensity.)
+    *
+    * There is a rounding problem here; if alpha is less than 128 it will end up
+    * as 0 when scaled to 8 bits.  To avoid introducing spurious colors into the
+    * output change for this too.
+    */
+   if (component >= alpha || alpha < 128)
+      return 255;
+
+   /* component<alpha, so component/alpha is less than one and
+    * component*reciprocal is less than 2^31.
+    */
+   else if (component > 0)
+   {
+      /* The test is that alpha/257 (rounded) is less than 255, the first value
+       * that becomes 255 is 65407.
+       * NOTE: this must agree with the PNG_DIV257 macro (which must, therefore,
+       * be exact!)  [Could also test reciprocal != 0]
+       */
+      if (alpha < 65407)
+      {
+         component *= reciprocal;
+         component += 64; /* round to nearest */
+         component >>= 7;
+      }
+
+      else
+         component *= 255;
+
+      /* Convert the component to sRGB. */
+      return (png_byte)PNG_sRGB_FROM_LINEAR(component);
+   }
+
+   else
+      return 0;
+}
+
+static int
+png_write_image_8bit(png_voidp argument)
+{
+   png_image_write_control *display = png_voidcast(png_image_write_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+
+   png_const_uint_16p input_row = png_voidcast(png_const_uint_16p,
+       display->first_row);
+   png_bytep output_row = png_voidcast(png_bytep, display->local_row);
+   png_uint_32 y = image->height;
+   unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ?
+       3 : 1;
+
+   if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0)
+   {
+      png_bytep row_end;
+      int aindex;
+
+#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
+      if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0)
+      {
+         aindex = -1;
+         ++input_row; /* To point to the first component */
+         ++output_row;
+      }
+
+      else
+#   endif
+      aindex = (int)channels;
+
+      /* Use row_end in place of a loop counter: */
+      row_end = output_row + image->width * (channels+1);
+
+      for (; y > 0; --y)
+      {
+         png_const_uint_16p in_ptr = input_row;
+         png_bytep out_ptr = output_row;
+
+         while (out_ptr < row_end)
+         {
+            png_uint_16 alpha = in_ptr[aindex];
+            png_byte alphabyte = (png_byte)PNG_DIV257(alpha);
+            png_uint_32 reciprocal = 0;
+            int c;
+
+            /* Scale and write the alpha channel. */
+            out_ptr[aindex] = alphabyte;
+
+            if (alphabyte > 0 && alphabyte < 255)
+               reciprocal = UNP_RECIPROCAL(alpha);
+
+            c = (int)channels;
+            do /* always at least one channel */
+               *out_ptr++ = png_unpremultiply(*in_ptr++, alpha, reciprocal);
+            while (--c > 0);
+
+            /* Skip to next component (skip the intervening alpha channel) */
+            ++in_ptr;
+            ++out_ptr;
+         } /* while out_ptr < row_end */
+
+         png_write_row(png_ptr, png_voidcast(png_const_bytep,
+             display->local_row));
+         input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
+      } /* while y */
+   }
+
+   else
+   {
+      /* No alpha channel, so the row_end really is the end of the row and it
+       * is sufficient to loop over the components one by one.
+       */
+      png_bytep row_end = output_row + image->width * channels;
+
+      for (; y > 0; --y)
+      {
+         png_const_uint_16p in_ptr = input_row;
+         png_bytep out_ptr = output_row;
+
+         while (out_ptr < row_end)
+         {
+            png_uint_32 component = *in_ptr++;
+
+            component *= 255;
+            *out_ptr++ = (png_byte)PNG_sRGB_FROM_LINEAR(component);
+         }
+
+         png_write_row(png_ptr, output_row);
+         input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16));
+      }
+   }
+
+   return 1;
+}
+
+static void
+png_image_set_PLTE(png_image_write_control *display)
+{
+   png_imagep image = display->image;
+   const void *cmap = display->colormap;
+   int entries = image->colormap_entries > 256 ? 256 :
+       (int)image->colormap_entries;
+
+   /* NOTE: the caller must check for cmap != NULL and entries != 0 */
+   png_uint_32 format = image->format;
+   unsigned int channels = PNG_IMAGE_SAMPLE_CHANNELS(format);
+
+#   if defined(PNG_FORMAT_BGR_SUPPORTED) &&\
+      defined(PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED)
+      int afirst = (format & PNG_FORMAT_FLAG_AFIRST) != 0 &&
+          (format & PNG_FORMAT_FLAG_ALPHA) != 0;
+#   else
+#     define afirst 0
+#   endif
+
+#   ifdef PNG_FORMAT_BGR_SUPPORTED
+      int bgr = (format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0;
+#   else
+#     define bgr 0
+#   endif
+
+   int i, num_trans;
+   png_color palette[256];
+   png_byte tRNS[256];
+
+   memset(tRNS, 255, (sizeof tRNS));
+   memset(palette, 0, (sizeof palette));
+
+   for (i=num_trans=0; i<entries; ++i)
+   {
+      /* This gets automatically converted to sRGB with reversal of the
+       * pre-multiplication if the color-map has an alpha channel.
+       */
+      if ((format & PNG_FORMAT_FLAG_LINEAR) != 0)
+      {
+         png_const_uint_16p entry = png_voidcast(png_const_uint_16p, cmap);
+
+         entry += (unsigned int)i * channels;
+
+         if ((channels & 1) != 0) /* no alpha */
+         {
+            if (channels >= 3) /* RGB */
+            {
+               palette[i].blue = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
+                   entry[(2 ^ bgr)]);
+               palette[i].green = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
+                   entry[1]);
+               palette[i].red = (png_byte)PNG_sRGB_FROM_LINEAR(255 *
+                   entry[bgr]);
+            }
+
+            else /* Gray */
+               palette[i].blue = palette[i].red = palette[i].green =
+                  (png_byte)PNG_sRGB_FROM_LINEAR(255 * *entry);
+         }
+
+         else /* alpha */
+         {
+            png_uint_16 alpha = entry[afirst ? 0 : channels-1];
+            png_byte alphabyte = (png_byte)PNG_DIV257(alpha);
+            png_uint_32 reciprocal = 0;
+
+            /* Calculate a reciprocal, as in the png_write_image_8bit code above
+             * this is designed to produce a value scaled to 255*65535 when
+             * divided by 128 (i.e. asr 7).
+             */
+            if (alphabyte > 0 && alphabyte < 255)
+               reciprocal = (((0xffff*0xff)<<7)+(alpha>>1))/alpha;
+
+            tRNS[i] = alphabyte;
+            if (alphabyte < 255)
+               num_trans = i+1;
+
+            if (channels >= 3) /* RGB */
+            {
+               palette[i].blue = png_unpremultiply(entry[afirst + (2 ^ bgr)],
+                   alpha, reciprocal);
+               palette[i].green = png_unpremultiply(entry[afirst + 1], alpha,
+                   reciprocal);
+               palette[i].red = png_unpremultiply(entry[afirst + bgr], alpha,
+                   reciprocal);
+            }
+
+            else /* gray */
+               palette[i].blue = palette[i].red = palette[i].green =
+                   png_unpremultiply(entry[afirst], alpha, reciprocal);
+         }
+      }
+
+      else /* Color-map has sRGB values */
+      {
+         png_const_bytep entry = png_voidcast(png_const_bytep, cmap);
+
+         entry += (unsigned int)i * channels;
+
+         switch (channels)
+         {
+            case 4:
+               tRNS[i] = entry[afirst ? 0 : 3];
+               if (tRNS[i] < 255)
+                  num_trans = i+1;
+               /* FALLTHROUGH */
+            case 3:
+               palette[i].blue = entry[afirst + (2 ^ bgr)];
+               palette[i].green = entry[afirst + 1];
+               palette[i].red = entry[afirst + bgr];
+               break;
+
+            case 2:
+               tRNS[i] = entry[1 ^ afirst];
+               if (tRNS[i] < 255)
+                  num_trans = i+1;
+               /* FALLTHROUGH */
+            case 1:
+               palette[i].blue = palette[i].red = palette[i].green =
+                  entry[afirst];
+               break;
+
+            default:
+               break;
+         }
+      }
+   }
+
+#   ifdef afirst
+#     undef afirst
+#   endif
+#   ifdef bgr
+#     undef bgr
+#   endif
+
+   png_set_PLTE(image->opaque->png_ptr, image->opaque->info_ptr, palette,
+       entries);
+
+   if (num_trans > 0)
+      png_set_tRNS(image->opaque->png_ptr, image->opaque->info_ptr, tRNS,
+          num_trans, NULL);
+
+   image->colormap_entries = (png_uint_32)entries;
+}
+
+static int
+png_image_write_main(png_voidp argument)
+{
+   png_image_write_control *display = png_voidcast(png_image_write_control*,
+       argument);
+   png_imagep image = display->image;
+   png_structrp png_ptr = image->opaque->png_ptr;
+   png_inforp info_ptr = image->opaque->info_ptr;
+   png_uint_32 format = image->format;
+
+   /* The following four ints are actually booleans */
+   int colormap = (format & PNG_FORMAT_FLAG_COLORMAP);
+   int linear = !colormap && (format & PNG_FORMAT_FLAG_LINEAR); /* input */
+   int alpha = !colormap && (format & PNG_FORMAT_FLAG_ALPHA);
+   int write_16bit = linear && (display->convert_to_8bit == 0);
+
+#   ifdef PNG_BENIGN_ERRORS_SUPPORTED
+      /* Make sure we error out on any bad situation */
+      png_set_benign_errors(png_ptr, 0/*error*/);
+#   endif
+
+   /* Default the 'row_stride' parameter if required, also check the row stride
+    * and total image size to ensure that they are within the system limits.
+    */
+   {
+      unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format);
+
+      if (image->width <= 0x7fffffffU/channels) /* no overflow */
+      {
+         png_uint_32 check;
+         png_uint_32 png_row_stride = image->width * channels;
+
+         if (display->row_stride == 0)
+            display->row_stride = (png_int_32)/*SAFE*/png_row_stride;
+
+         if (display->row_stride < 0)
+            check = (png_uint_32)(-display->row_stride);
+
+         else
+            check = (png_uint_32)display->row_stride;
+
+         if (check >= png_row_stride)
+         {
+            /* Now check for overflow of the image buffer calculation; this
+             * limits the whole image size to 32 bits for API compatibility with
+             * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro.
+             */
+            if (image->height > 0xffffffffU/png_row_stride)
+               png_error(image->opaque->png_ptr, "memory image too large");
+         }
+
+         else
+            png_error(image->opaque->png_ptr, "supplied row stride too small");
+      }
+
+      else
+         png_error(image->opaque->png_ptr, "image row stride too large");
+   }
+
+   /* Set the required transforms then write the rows in the correct order. */
+   if ((format & PNG_FORMAT_FLAG_COLORMAP) != 0)
+   {
+      if (display->colormap != NULL && image->colormap_entries > 0)
+      {
+         png_uint_32 entries = image->colormap_entries;
+
+         png_set_IHDR(png_ptr, info_ptr, image->width, image->height,
+             entries > 16 ? 8 : (entries > 4 ? 4 : (entries > 2 ? 2 : 1)),
+             PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE,
+             PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+         png_image_set_PLTE(display);
+      }
+
+      else
+         png_error(image->opaque->png_ptr,
+             "no color-map for color-mapped image");
+   }
+
+   else
+      png_set_IHDR(png_ptr, info_ptr, image->width, image->height,
+          write_16bit ? 16 : 8,
+          ((format & PNG_FORMAT_FLAG_COLOR) ? PNG_COLOR_MASK_COLOR : 0) +
+          ((format & PNG_FORMAT_FLAG_ALPHA) ? PNG_COLOR_MASK_ALPHA : 0),
+          PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+   /* Counter-intuitively the data transformations must be called *after*
+    * png_write_info, not before as in the read code, but the 'set' functions
+    * must still be called before.  Just set the color space information, never
+    * write an interlaced image.
+    */
+
+   if (write_16bit != 0)
+   {
+      /* The gamma here is 1.0 (linear) and the cHRM chunk matches sRGB. */
+      png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_LINEAR);
+
+      if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0)
+         png_set_cHRM_fixed(png_ptr, info_ptr,
+             /* color      x       y */
+             /* white */ 31270, 32900,
+             /* red   */ 64000, 33000,
+             /* green */ 30000, 60000,
+             /* blue  */ 15000,  6000
+         );
+   }
+
+   else if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0)
+      png_set_sRGB(png_ptr, info_ptr, PNG_sRGB_INTENT_PERCEPTUAL);
+
+   /* Else writing an 8-bit file and the *colors* aren't sRGB, but the 8-bit
+    * space must still be gamma encoded.
+    */
+   else
+      png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_sRGB_INVERSE);
+
+   /* Write the file header. */
+   png_write_info(png_ptr, info_ptr);
+
+   /* Now set up the data transformations (*after* the header is written),
+    * remove the handled transformations from the 'format' flags for checking.
+    *
+    * First check for a little endian system if writing 16-bit files.
+    */
+   if (write_16bit != 0)
+   {
+      png_uint_16 le = 0x0001;
+
+      if ((*(png_const_bytep) & le) != 0)
+         png_set_swap(png_ptr);
+   }
+
+#   ifdef PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED
+      if ((format & PNG_FORMAT_FLAG_BGR) != 0)
+      {
+         if (colormap == 0 && (format & PNG_FORMAT_FLAG_COLOR) != 0)
+            png_set_bgr(png_ptr);
+         format &= ~PNG_FORMAT_FLAG_BGR;
+      }
+#   endif
+
+#   ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED
+      if ((format & PNG_FORMAT_FLAG_AFIRST) != 0)
+      {
+         if (colormap == 0 && (format & PNG_FORMAT_FLAG_ALPHA) != 0)
+            png_set_swap_alpha(png_ptr);
+         format &= ~PNG_FORMAT_FLAG_AFIRST;
+      }
+#   endif
+
+   /* If there are 16 or fewer color-map entries we wrote a lower bit depth
+    * above, but the application data is still byte packed.
+    */
+   if (colormap != 0 && image->colormap_entries <= 16)
+      png_set_packing(png_ptr);
+
+   /* That should have handled all (both) the transforms. */
+   if ((format & ~(png_uint_32)(PNG_FORMAT_FLAG_COLOR | PNG_FORMAT_FLAG_LINEAR |
+         PNG_FORMAT_FLAG_ALPHA | PNG_FORMAT_FLAG_COLORMAP)) != 0)
+      png_error(png_ptr, "png_write_image: unsupported transformation");
+
+   {
+      png_const_bytep row = png_voidcast(png_const_bytep, display->buffer);
+      ptrdiff_t row_bytes = display->row_stride;
+
+      if (linear != 0)
+         row_bytes *= (sizeof (png_uint_16));
+
+      if (row_bytes < 0)
+         row += (image->height-1) * (-row_bytes);
+
+      display->first_row = row;
+      display->row_bytes = row_bytes;
+   }
+
+   /* Apply 'fast' options if the flag is set. */
+   if ((image->flags & PNG_IMAGE_FLAG_FAST) != 0)
+   {
+      png_set_filter(png_ptr, PNG_FILTER_TYPE_BASE, PNG_NO_FILTERS);
+      /* NOTE: determined by experiment using pngstest, this reflects some
+       * balance between the time to write the image once and the time to read
+       * it about 50 times.  The speed-up in pngstest was about 10-20% of the
+       * total (user) time on a heavily loaded system.
+       */
+#   ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED
+      png_set_compression_level(png_ptr, 3);
+#   endif
+   }
+
+   /* Check for the cases that currently require a pre-transform on the row
+    * before it is written.  This only applies when the input is 16-bit and
+    * either there is an alpha channel or it is converted to 8-bit.
+    */
+   if ((linear != 0 && alpha != 0 ) ||
+       (colormap == 0 && display->convert_to_8bit != 0))
+   {
+      png_bytep row = png_voidcast(png_bytep, png_malloc(png_ptr,
+          png_get_rowbytes(png_ptr, info_ptr)));
+      int result;
+
+      display->local_row = row;
+      if (write_16bit != 0)
+         result = png_safe_execute(image, png_write_image_16bit, display);
+      else
+         result = png_safe_execute(image, png_write_image_8bit, display);
+      display->local_row = NULL;
+
+      png_free(png_ptr, row);
+
+      /* Skip the 'write_end' on error: */
+      if (result == 0)
+         return 0;
+   }
+
+   /* Otherwise this is the case where the input is in a format currently
+    * supported by the rest of the libpng write code; call it directly.
+    */
+   else
+   {
+      png_const_bytep row = png_voidcast(png_const_bytep, display->first_row);
+      ptrdiff_t row_bytes = display->row_bytes;
+      png_uint_32 y = image->height;
+
+      for (; y > 0; --y)
+      {
+         png_write_row(png_ptr, row);
+         row += row_bytes;
+      }
+   }
+
+   png_write_end(png_ptr, info_ptr);
+   return 1;
+}
+
+
+static void (PNGCBAPI
+image_memory_write)(png_structp png_ptr, png_bytep/*const*/ data, size_t size)
+{
+   png_image_write_control *display = png_voidcast(png_image_write_control*,
+       png_ptr->io_ptr/*backdoor: png_get_io_ptr(png_ptr)*/);
+   png_alloc_size_t ob = display->output_bytes;
+
+   /* Check for overflow; this should never happen: */
+   if (size <= ((png_alloc_size_t)-1) - ob)
+   {
+      /* I don't think libpng ever does this, but just in case: */
+      if (size > 0)
+      {
+         if (display->memory_bytes >= ob+size) /* writing */
+            memcpy(display->memory+ob, data, size);
+
+         /* Always update the size: */
+         display->output_bytes = ob+size;
+      }
+   }
+
+   else
+      png_error(png_ptr, "png_image_write_to_memory: PNG too big");
+}
+
+static void (PNGCBAPI
+image_memory_flush)(png_structp png_ptr)
+{
+   PNG_UNUSED(png_ptr)
+}
+
+static int
+png_image_write_memory(png_voidp argument)
+{
+   png_image_write_control *display = png_voidcast(png_image_write_control*,
+       argument);
+
+   /* The rest of the memory-specific init and write_main in an error protected
+    * environment.  This case needs to use callbacks for the write operations
+    * since libpng has no built in support for writing to memory.
+    */
+   png_set_write_fn(display->image->opaque->png_ptr, display/*io_ptr*/,
+       image_memory_write, image_memory_flush);
+
+   return png_image_write_main(display);
+}
+
+int PNGAPI
+png_image_write_to_memory(png_imagep image, void *memory,
+    png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8bit,
+    const void *buffer, png_int_32 row_stride, const void *colormap)
+{
+   /* Write the image to the given buffer, or count the bytes if it is NULL */
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (memory_bytes != NULL && buffer != NULL)
+      {
+         /* This is to give the caller an easier error detection in the NULL
+          * case and guard against uninitialized variable problems:
+          */
+         if (memory == NULL)
+            *memory_bytes = 0;
+
+         if (png_image_write_init(image) != 0)
+         {
+            png_image_write_control display;
+            int result;
+
+            memset(&display, 0, (sizeof display));
+            display.image = image;
+            display.buffer = buffer;
+            display.row_stride = row_stride;
+            display.colormap = colormap;
+            display.convert_to_8bit = convert_to_8bit;
+            display.memory = png_voidcast(png_bytep, memory);
+            display.memory_bytes = *memory_bytes;
+            display.output_bytes = 0;
+
+            result = png_safe_execute(image, png_image_write_memory, &display);
+            png_image_free(image);
+
+            /* write_memory returns true even if we ran out of buffer. */
+            if (result)
+            {
+               /* On out-of-buffer this function returns '0' but still updates
+                * memory_bytes:
+                */
+               if (memory != NULL && display.output_bytes > *memory_bytes)
+                  result = 0;
+
+               *memory_bytes = display.output_bytes;
+            }
+
+            return result;
+         }
+
+         else
+            return 0;
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_write_to_memory: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_write_to_memory: incorrect PNG_IMAGE_VERSION");
+
+   else
+      return 0;
+}
+
+#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
+int PNGAPI
+png_image_write_to_stdio(png_imagep image, FILE *file, int convert_to_8bit,
+    const void *buffer, png_int_32 row_stride, const void *colormap)
+{
+   /* Write the image to the given (FILE*). */
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (file != NULL && buffer != NULL)
+      {
+         if (png_image_write_init(image) != 0)
+         {
+            png_image_write_control display;
+            int result;
+
+            /* This is slightly evil, but png_init_io doesn't do anything other
+             * than this and we haven't changed the standard IO functions so
+             * this saves a 'safe' function.
+             */
+            image->opaque->png_ptr->io_ptr = file;
+
+            memset(&display, 0, (sizeof display));
+            display.image = image;
+            display.buffer = buffer;
+            display.row_stride = row_stride;
+            display.colormap = colormap;
+            display.convert_to_8bit = convert_to_8bit;
+
+            result = png_safe_execute(image, png_image_write_main, &display);
+            png_image_free(image);
+            return result;
+         }
+
+         else
+            return 0;
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_write_to_stdio: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_write_to_stdio: incorrect PNG_IMAGE_VERSION");
+
+   else
+      return 0;
+}
+
+int PNGAPI
+png_image_write_to_file(png_imagep image, const char *file_name,
+    int convert_to_8bit, const void *buffer, png_int_32 row_stride,
+    const void *colormap)
+{
+   /* Write the image to the named file. */
+   if (image != NULL && image->version == PNG_IMAGE_VERSION)
+   {
+      if (file_name != NULL && buffer != NULL)
+      {
+         FILE *fp = fopen(file_name, "wb");
+
+         if (fp != NULL)
+         {
+            if (png_image_write_to_stdio(image, fp, convert_to_8bit, buffer,
+                row_stride, colormap) != 0)
+            {
+               int error; /* from fflush/fclose */
+
+               /* Make sure the file is flushed correctly. */
+               if (fflush(fp) == 0 && ferror(fp) == 0)
+               {
+                  if (fclose(fp) == 0)
+                     return 1;
+
+                  error = errno; /* from fclose */
+               }
+
+               else
+               {
+                  error = errno; /* from fflush or ferror */
+                  (void)fclose(fp);
+               }
+
+               (void)remove(file_name);
+               /* The image has already been cleaned up; this is just used to
+                * set the error (because the original write succeeded).
+                */
+               return png_image_error(image, strerror(error));
+            }
+
+            else
+            {
+               /* Clean up: just the opened file. */
+               (void)fclose(fp);
+               (void)remove(file_name);
+               return 0;
+            }
+         }
+
+         else
+            return png_image_error(image, strerror(errno));
+      }
+
+      else
+         return png_image_error(image,
+             "png_image_write_to_file: invalid argument");
+   }
+
+   else if (image != NULL)
+      return png_image_error(image,
+          "png_image_write_to_file: incorrect PNG_IMAGE_VERSION");
+
+   else
+      return 0;
+}
+#endif /* SIMPLIFIED_WRITE_STDIO */
+#endif /* SIMPLIFIED_WRITE */
+#endif /* WRITE */
diff --git a/reg-io/png/lpng1510/pngwtran.c b/reg-io/png/lpng/pngwtran.c
similarity index 72%
rename from reg-io/png/lpng1510/pngwtran.c
rename to reg-io/png/lpng/pngwtran.c
index b598149a..473c3b87 100644
--- a/reg-io/png/lpng1510/pngwtran.c
+++ b/reg-io/png/lpng/pngwtran.c
@@ -1,10 +1,10 @@
 
 /* pngwtran.c - transforms the data in a row for PNG writers
  *
- * Last changed in libpng 1.5.6 [November 3, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -14,90 +14,14 @@
 #include "pngpriv.h"
 
 #ifdef PNG_WRITE_SUPPORTED
-
 #ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-/* Transform the data according to the user's wishes.  The order of
- * transformations is significant.
- */
-void /* PRIVATE */
-png_do_write_transformations(png_structp png_ptr, png_row_infop row_info)
-{
-   png_debug(1, "in png_do_write_transformations");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   if (png_ptr->transformations & PNG_USER_TRANSFORM)
-      if (png_ptr->write_user_transform_fn != NULL)
-         (*(png_ptr->write_user_transform_fn)) /* User write transform
-                                                 function */
-             (png_ptr,  /* png_ptr */
-             row_info,  /* row_info: */
-                /*  png_uint_32 width;       width of row */
-                /*  png_size_t rowbytes;     number of bytes in row */
-                /*  png_byte color_type;     color type of pixels */
-                /*  png_byte bit_depth;      bit depth of samples */
-                /*  png_byte channels;       number of channels (1-4) */
-                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
-             png_ptr->row_buf + 1);      /* start of pixel data for row */
-#endif
-
-#ifdef PNG_WRITE_FILLER_SUPPORTED
-   if (png_ptr->transformations & PNG_FILLER)
-      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
-         !(png_ptr->flags & PNG_FLAG_FILLER_AFTER));
-#endif
-
-#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
-   if (png_ptr->transformations & PNG_PACKSWAP)
-      png_do_packswap(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_PACK_SUPPORTED
-   if (png_ptr->transformations & PNG_PACK)
-      png_do_pack(row_info, png_ptr->row_buf + 1,
-          (png_uint_32)png_ptr->bit_depth);
-#endif
-
-#ifdef PNG_WRITE_SWAP_SUPPORTED
-   if (png_ptr->transformations & PNG_SWAP_BYTES)
-      png_do_swap(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-   if (png_ptr->transformations & PNG_SHIFT)
-      png_do_shift(row_info, png_ptr->row_buf + 1,
-          &(png_ptr->shift));
-#endif
-
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-   if (png_ptr->transformations & PNG_SWAP_ALPHA)
-      png_do_write_swap_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-   if (png_ptr->transformations & PNG_INVERT_ALPHA)
-      png_do_write_invert_alpha(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_BGR_SUPPORTED
-   if (png_ptr->transformations & PNG_BGR)
-      png_do_bgr(row_info, png_ptr->row_buf + 1);
-#endif
-
-#ifdef PNG_WRITE_INVERT_SUPPORTED
-   if (png_ptr->transformations & PNG_INVERT_MONO)
-      png_do_invert(row_info, png_ptr->row_buf + 1);
-#endif
-}
 
 #ifdef PNG_WRITE_PACK_SUPPORTED
 /* Pack pixels into bytes.  Pass the true bit depth in bit_depth.  The
  * row_info bit depth should be 8 (one pixel per byte).  The channels
  * should be 1 (this only happens on grayscale and paletted images).
  */
-void /* PRIVATE */
+static void
 png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
 {
    png_debug(1, "in png_do_pack");
@@ -147,7 +71,8 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
          case 2:
          {
             png_bytep sp, dp;
-            int shift, v;
+            unsigned int shift;
+            int v;
             png_uint_32 i;
             png_uint_32 row_width = row_info->width;
 
@@ -186,7 +111,8 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
          case 4:
          {
             png_bytep sp, dp;
-            int shift, v;
+            unsigned int shift;
+            int v;
             png_uint_32 i;
             png_uint_32 row_width = row_info->width;
 
@@ -242,7 +168,7 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
  * would pass 3 as bit_depth, and this routine would translate the
  * data to 0 to 15.
  */
-void /* PRIVATE */
+static void
 png_do_shift(png_row_infop row_info, png_bytep row,
     png_const_color_8p bit_depth)
 {
@@ -251,9 +177,9 @@ png_do_shift(png_row_infop row_info, png_bytep row,
    if (row_info->color_type != PNG_COLOR_TYPE_PALETTE)
    {
       int shift_start[4], shift_dec[4];
-      int channels = 0;
+      unsigned int channels = 0;
 
-      if (row_info->color_type & PNG_COLOR_MASK_COLOR)
+      if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0)
       {
          shift_start[channels] = row_info->bit_depth - bit_depth->red;
          shift_dec[channels] = bit_depth->red;
@@ -275,7 +201,7 @@ png_do_shift(png_row_infop row_info, png_bytep row,
          channels++;
       }
 
-      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0)
       {
          shift_start[channels] = row_info->bit_depth - bit_depth->alpha;
          shift_dec[channels] = bit_depth->alpha;
@@ -286,9 +212,9 @@ png_do_shift(png_row_infop row_info, png_bytep row,
       if (row_info->bit_depth < 8)
       {
          png_bytep bp = row;
-         png_size_t i;
-         png_byte mask;
-         png_size_t row_bytes = row_info->rowbytes;
+         size_t i;
+         unsigned int mask;
+         size_t row_bytes = row_info->rowbytes;
 
          if (bit_depth->gray == 1 && row_info->bit_depth == 2)
             mask = 0x55;
@@ -301,20 +227,22 @@ png_do_shift(png_row_infop row_info, png_bytep row,
 
          for (i = 0; i < row_bytes; i++, bp++)
          {
-            png_uint_16 v;
             int j;
+            unsigned int v, out;
 
             v = *bp;
-            *bp = 0;
+            out = 0;
 
             for (j = shift_start[0]; j > -shift_dec[0]; j -= shift_dec[0])
             {
                if (j > 0)
-                  *bp |= (png_byte)((v << j) & 0xff);
+                  out |= v << j;
 
                else
-                  *bp |= (png_byte)((v >> (-j)) & mask);
+                  out |= (v >> (-j)) & mask;
             }
+
+            *bp = (png_byte)(out & 0xff);
          }
       }
 
@@ -326,22 +254,23 @@ png_do_shift(png_row_infop row_info, png_bytep row,
 
          for (i = 0; i < istop; i++, bp++)
          {
-
-            png_uint_16 v;
+            unsigned int c = i%channels;
             int j;
-            int c = (int)(i%channels);
+            unsigned int v, out;
 
             v = *bp;
-            *bp = 0;
+            out = 0;
 
             for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
             {
                if (j > 0)
-                  *bp |= (png_byte)((v << j) & 0xff);
+                  out |= v << j;
 
                else
-                  *bp |= (png_byte)((v >> (-j)) & 0xff);
+                  out |= v >> (-j);
             }
+
+            *bp = (png_byte)(out & 0xff);
          }
       }
 
@@ -353,22 +282,22 @@ png_do_shift(png_row_infop row_info, png_bytep row,
 
          for (bp = row, i = 0; i < istop; i++)
          {
-            int c = (int)(i%channels);
-            png_uint_16 value, v;
+            unsigned int c = i%channels;
             int j;
+            unsigned int value, v;
 
-            v = (png_uint_16)(((png_uint_16)(*bp) << 8) + *(bp + 1));
+            v = png_get_uint_16(bp);
             value = 0;
 
             for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
             {
                if (j > 0)
-                  value |= (png_uint_16)((v << j) & (png_uint_16)0xffff);
+                  value |= v << j;
 
                else
-                  value |= (png_uint_16)((v >> (-j)) & (png_uint_16)0xffff);
+                  value |= v >> (-j);
             }
-            *bp++ = (png_byte)(value >> 8);
+            *bp++ = (png_byte)((value >> 8) & 0xff);
             *bp++ = (png_byte)(value & 0xff);
          }
       }
@@ -377,7 +306,7 @@ png_do_shift(png_row_infop row_info, png_bytep row,
 #endif
 
 #ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-void /* PRIVATE */
+static void
 png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
 {
    png_debug(1, "in png_do_write_swap_alpha");
@@ -425,7 +354,7 @@ png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
                *(dp++) = save[1];
             }
          }
-#endif /* PNG_WRITE_16BIT_SUPPORTED */
+#endif /* WRITE_16BIT */
       }
 
       else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
@@ -464,14 +393,14 @@ png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
                *(dp++) = save[1];
             }
          }
-#endif /* PNG_WRITE_16BIT_SUPPORTED */
+#endif /* WRITE_16BIT */
       }
    }
 }
 #endif
 
 #ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-void /* PRIVATE */
+static void
 png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
 {
    png_debug(1, "in png_do_write_invert_alpha");
@@ -494,7 +423,7 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
                *(dp++) = *(sp++);
                */
                sp+=3; dp = sp;
-               *(dp++) = (png_byte)(255 - *(sp++));
+               *dp = (png_byte)(255 - *(sp++));
             }
          }
 
@@ -518,10 +447,10 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
                */
                sp+=6; dp = sp;
                *(dp++) = (png_byte)(255 - *(sp++));
-               *(dp++) = (png_byte)(255 - *(sp++));
+               *dp     = (png_byte)(255 - *(sp++));
             }
          }
-#endif /* PNG_WRITE_16BIT_SUPPORTED */
+#endif /* WRITE_16BIT */
       }
 
       else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
@@ -556,78 +485,91 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
                */
                sp+=2; dp = sp;
                *(dp++) = (png_byte)(255 - *(sp++));
-               *(dp++) = (png_byte)(255 - *(sp++));
+               *dp     = (png_byte)(255 - *(sp++));
             }
          }
-#endif /* PNG_WRITE_16BIT_SUPPORTED */
+#endif /* WRITE_16BIT */
       }
    }
 }
 #endif
-#endif /* PNG_WRITE_TRANSFORMS_SUPPORTED */
 
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-/* Undoes intrapixel differencing  */
+/* Transform the data according to the user's wishes.  The order of
+ * transformations is significant.
+ */
 void /* PRIVATE */
-png_do_write_intrapixel(png_row_infop row_info, png_bytep row)
+png_do_write_transformations(png_structrp png_ptr, png_row_infop row_info)
 {
-   png_debug(1, "in png_do_write_intrapixel");
+   png_debug(1, "in png_do_write_transformations");
 
-   if ((row_info->color_type & PNG_COLOR_MASK_COLOR))
-   {
-      int bytes_per_pixel;
-      png_uint_32 row_width = row_info->width;
-      if (row_info->bit_depth == 8)
-      {
-         png_bytep rp;
-         png_uint_32 i;
+   if (png_ptr == NULL)
+      return;
 
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 3;
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+   if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0)
+      if (png_ptr->write_user_transform_fn != NULL)
+         (*(png_ptr->write_user_transform_fn)) /* User write transform
+                                                 function */
+             (png_ptr,  /* png_ptr */
+             row_info,  /* row_info: */
+                /*  png_uint_32 width;       width of row */
+                /*  size_t rowbytes;         number of bytes in row */
+                /*  png_byte color_type;     color type of pixels */
+                /*  png_byte bit_depth;      bit depth of samples */
+                /*  png_byte channels;       number of channels (1-4) */
+                /*  png_byte pixel_depth;    bits per pixel (depth*channels) */
+             png_ptr->row_buf + 1);      /* start of pixel data for row */
+#endif
 
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 4;
+#ifdef PNG_WRITE_FILLER_SUPPORTED
+   if ((png_ptr->transformations & PNG_FILLER) != 0)
+      png_do_strip_channel(row_info, png_ptr->row_buf + 1,
+          !(png_ptr->flags & PNG_FLAG_FILLER_AFTER));
+#endif
 
-         else
-            return;
+#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
+   if ((png_ptr->transformations & PNG_PACKSWAP) != 0)
+      png_do_packswap(row_info, png_ptr->row_buf + 1);
+#endif
 
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            *(rp)     = (png_byte)((*rp       - *(rp + 1)) & 0xff);
-            *(rp + 2) = (png_byte)((*(rp + 2) - *(rp + 1)) & 0xff);
-         }
-      }
+#ifdef PNG_WRITE_PACK_SUPPORTED
+   if ((png_ptr->transformations & PNG_PACK) != 0)
+      png_do_pack(row_info, png_ptr->row_buf + 1,
+          (png_uint_32)png_ptr->bit_depth);
+#endif
 
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      else if (row_info->bit_depth == 16)
-      {
-         png_bytep rp;
-         png_uint_32 i;
+#ifdef PNG_WRITE_SWAP_SUPPORTED
+#  ifdef PNG_16BIT_SUPPORTED
+   if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0)
+      png_do_swap(row_info, png_ptr->row_buf + 1);
+#  endif
+#endif
 
-         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
-            bytes_per_pixel = 6;
+#ifdef PNG_WRITE_SHIFT_SUPPORTED
+   if ((png_ptr->transformations & PNG_SHIFT) != 0)
+      png_do_shift(row_info, png_ptr->row_buf + 1,
+           &(png_ptr->shift));
+#endif
 
-         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            bytes_per_pixel = 8;
+#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0)
+      png_do_write_swap_alpha(row_info, png_ptr->row_buf + 1);
+#endif
 
-         else
-            return;
+#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
+   if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0)
+      png_do_write_invert_alpha(row_info, png_ptr->row_buf + 1);
+#endif
 
-         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
-         {
-            png_uint_32 s0   = (*(rp    ) << 8) | *(rp + 1);
-            png_uint_32 s1   = (*(rp + 2) << 8) | *(rp + 3);
-            png_uint_32 s2   = (*(rp + 4) << 8) | *(rp + 5);
-            png_uint_32 red  = (png_uint_32)((s0 - s1) & 0xffffL);
-            png_uint_32 blue = (png_uint_32)((s2 - s1) & 0xffffL);
-            *(rp    ) = (png_byte)((red >> 8) & 0xff);
-            *(rp + 1) = (png_byte)(red & 0xff);
-            *(rp + 4) = (png_byte)((blue >> 8) & 0xff);
-            *(rp + 5) = (png_byte)(blue & 0xff);
-         }
-      }
-#endif /* PNG_WRITE_16BIT_SUPPORTED */
-   }
+#ifdef PNG_WRITE_BGR_SUPPORTED
+   if ((png_ptr->transformations & PNG_BGR) != 0)
+      png_do_bgr(row_info, png_ptr->row_buf + 1);
+#endif
+
+#ifdef PNG_WRITE_INVERT_SUPPORTED
+   if ((png_ptr->transformations & PNG_INVERT_MONO) != 0)
+      png_do_invert(row_info, png_ptr->row_buf + 1);
+#endif
 }
-#endif /* PNG_MNG_FEATURES_SUPPORTED */
-#endif /* PNG_WRITE_SUPPORTED */
+#endif /* WRITE_TRANSFORMS */
+#endif /* WRITE */
diff --git a/reg-io/png/lpng/pngwutil.c b/reg-io/png/lpng/pngwutil.c
new file mode 100644
index 00000000..ac36eabb
--- /dev/null
+++ b/reg-io/png/lpng/pngwutil.c
@@ -0,0 +1,2781 @@
+
+/* pngwutil.c - utilities to write a PNG file
+ *
+ * Copyright (c) 2018-2024 Cosmin Truta
+ * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson
+ * Copyright (c) 1996-1997 Andreas Dilger
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "pngpriv.h"
+
+#ifdef PNG_WRITE_SUPPORTED
+
+#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
+/* Place a 32-bit number into a buffer in PNG byte order.  We work
+ * with unsigned numbers for convenience, although one supported
+ * ancillary chunk uses signed (two's complement) numbers.
+ */
+void PNGAPI
+png_save_uint_32(png_bytep buf, png_uint_32 i)
+{
+   buf[0] = (png_byte)((i >> 24) & 0xffU);
+   buf[1] = (png_byte)((i >> 16) & 0xffU);
+   buf[2] = (png_byte)((i >>  8) & 0xffU);
+   buf[3] = (png_byte)( i        & 0xffU);
+}
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+void PNGAPI
+png_save_uint_16(png_bytep buf, unsigned int i)
+{
+   buf[0] = (png_byte)((i >> 8) & 0xffU);
+   buf[1] = (png_byte)( i       & 0xffU);
+}
+#endif
+
+/* Simple function to write the signature.  If we have already written
+ * the magic bytes of the signature, or more likely, the PNG stream is
+ * being embedded into another stream and doesn't need its own signature,
+ * we should call png_set_sig_bytes() to tell libpng how many of the
+ * bytes have already been written.
+ */
+void PNGAPI
+png_write_sig(png_structrp png_ptr)
+{
+   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   /* Inform the I/O callback that the signature is being written */
+   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_SIGNATURE;
+#endif
+
+   /* Write the rest of the 8 byte signature */
+   png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes],
+       (size_t)(8 - png_ptr->sig_bytes));
+
+   if (png_ptr->sig_bytes < 3)
+      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+}
+
+/* Write the start of a PNG chunk.  The type is the chunk type.
+ * The total_length is the sum of the lengths of all the data you will be
+ * passing in png_write_chunk_data().
+ */
+static void
+png_write_chunk_header(png_structrp png_ptr, png_uint_32 chunk_name,
+    png_uint_32 length)
+{
+   png_byte buf[8];
+
+#if defined(PNG_DEBUG) && (PNG_DEBUG > 0)
+   PNG_CSTRING_FROM_CHUNK(buf, chunk_name);
+   png_debug2(0, "Writing %s chunk, length = %lu", buf, (unsigned long)length);
+#endif
+
+   if (png_ptr == NULL)
+      return;
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   /* Inform the I/O callback that the chunk header is being written.
+    * PNG_IO_CHUNK_HDR requires a single I/O call.
+    */
+   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_HDR;
+#endif
+
+   /* Write the length and the chunk name */
+   png_save_uint_32(buf, length);
+   png_save_uint_32(buf + 4, chunk_name);
+   png_write_data(png_ptr, buf, 8);
+
+   /* Put the chunk name into png_ptr->chunk_name */
+   png_ptr->chunk_name = chunk_name;
+
+   /* Reset the crc and run it over the chunk name */
+   png_reset_crc(png_ptr);
+
+   png_calculate_crc(png_ptr, buf + 4, 4);
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   /* Inform the I/O callback that chunk data will (possibly) be written.
+    * PNG_IO_CHUNK_DATA does NOT require a specific number of I/O calls.
+    */
+   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_DATA;
+#endif
+}
+
+void PNGAPI
+png_write_chunk_start(png_structrp png_ptr, png_const_bytep chunk_string,
+    png_uint_32 length)
+{
+   png_write_chunk_header(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), length);
+}
+
+/* Write the data of a PNG chunk started with png_write_chunk_header().
+ * Note that multiple calls to this function are allowed, and that the
+ * sum of the lengths from these calls *must* add up to the total_length
+ * given to png_write_chunk_header().
+ */
+void PNGAPI
+png_write_chunk_data(png_structrp png_ptr, png_const_bytep data, size_t length)
+{
+   /* Write the data, and run the CRC over it */
+   if (png_ptr == NULL)
+      return;
+
+   if (data != NULL && length > 0)
+   {
+      png_write_data(png_ptr, data, length);
+
+      /* Update the CRC after writing the data,
+       * in case the user I/O routine alters it.
+       */
+      png_calculate_crc(png_ptr, data, length);
+   }
+}
+
+/* Finish a chunk started with png_write_chunk_header(). */
+void PNGAPI
+png_write_chunk_end(png_structrp png_ptr)
+{
+   png_byte buf[4];
+
+   if (png_ptr == NULL) return;
+
+#ifdef PNG_IO_STATE_SUPPORTED
+   /* Inform the I/O callback that the chunk CRC is being written.
+    * PNG_IO_CHUNK_CRC requires a single I/O function call.
+    */
+   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_CRC;
+#endif
+
+   /* Write the crc in a single operation */
+   png_save_uint_32(buf, png_ptr->crc);
+
+   png_write_data(png_ptr, buf, 4);
+}
+
+/* Write a PNG chunk all at once.  The type is an array of ASCII characters
+ * representing the chunk name.  The array must be at least 4 bytes in
+ * length, and does not need to be null terminated.  To be safe, pass the
+ * pre-defined chunk names here, and if you need a new one, define it
+ * where the others are defined.  The length is the length of the data.
+ * All the data must be present.  If that is not possible, use the
+ * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end()
+ * functions instead.
+ */
+static void
+png_write_complete_chunk(png_structrp png_ptr, png_uint_32 chunk_name,
+    png_const_bytep data, size_t length)
+{
+   if (png_ptr == NULL)
+      return;
+
+   /* On 64-bit architectures 'length' may not fit in a png_uint_32. */
+   if (length > PNG_UINT_31_MAX)
+      png_error(png_ptr, "length exceeds PNG maximum");
+
+   png_write_chunk_header(png_ptr, chunk_name, (png_uint_32)length);
+   png_write_chunk_data(png_ptr, data, length);
+   png_write_chunk_end(png_ptr);
+}
+
+/* This is the API that calls the internal function above. */
+void PNGAPI
+png_write_chunk(png_structrp png_ptr, png_const_bytep chunk_string,
+    png_const_bytep data, size_t length)
+{
+   png_write_complete_chunk(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), data,
+       length);
+}
+
+/* This is used below to find the size of an image to pass to png_deflate_claim,
+ * so it only needs to be accurate if the size is less than 16384 bytes (the
+ * point at which a lower LZ window size can be used.)
+ */
+static png_alloc_size_t
+png_image_size(png_structrp png_ptr)
+{
+   /* Only return sizes up to the maximum of a png_uint_32; do this by limiting
+    * the width and height used to 15 bits.
+    */
+   png_uint_32 h = png_ptr->height;
+
+   if (png_ptr->rowbytes < 32768 && h < 32768)
+   {
+      if (png_ptr->interlaced != 0)
+      {
+         /* Interlacing makes the image larger because of the replication of
+          * both the filter byte and the padding to a byte boundary.
+          */
+         png_uint_32 w = png_ptr->width;
+         unsigned int pd = png_ptr->pixel_depth;
+         png_alloc_size_t cb_base;
+         int pass;
+
+         for (cb_base=0, pass=0; pass<=6; ++pass)
+         {
+            png_uint_32 pw = PNG_PASS_COLS(w, pass);
+
+            if (pw > 0)
+               cb_base += (PNG_ROWBYTES(pd, pw)+1) * PNG_PASS_ROWS(h, pass);
+         }
+
+         return cb_base;
+      }
+
+      else
+         return (png_ptr->rowbytes+1) * h;
+   }
+
+   else
+      return 0xffffffffU;
+}
+
+#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
+   /* This is the code to hack the first two bytes of the deflate stream (the
+    * deflate header) to correct the windowBits value to match the actual data
+    * size.  Note that the second argument is the *uncompressed* size but the
+    * first argument is the *compressed* data (and it must be deflate
+    * compressed.)
+    */
+static void
+optimize_cmf(png_bytep data, png_alloc_size_t data_size)
+{
+   /* Optimize the CMF field in the zlib stream.  The resultant zlib stream is
+    * still compliant to the stream specification.
+    */
+   if (data_size <= 16384) /* else windowBits must be 15 */
+   {
+      unsigned int z_cmf = data[0];  /* zlib compression method and flags */
+
+      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
+      {
+         unsigned int z_cinfo;
+         unsigned int half_z_window_size;
+
+         z_cinfo = z_cmf >> 4;
+         half_z_window_size = 1U << (z_cinfo + 7);
+
+         if (data_size <= half_z_window_size) /* else no change */
+         {
+            unsigned int tmp;
+
+            do
+            {
+               half_z_window_size >>= 1;
+               --z_cinfo;
+            }
+            while (z_cinfo > 0 && data_size <= half_z_window_size);
+
+            z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
+
+            data[0] = (png_byte)z_cmf;
+            tmp = data[1] & 0xe0;
+            tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f;
+            data[1] = (png_byte)tmp;
+         }
+      }
+   }
+}
+#endif /* WRITE_OPTIMIZE_CMF */
+
+/* Initialize the compressor for the appropriate type of compression. */
+static int
+png_deflate_claim(png_structrp png_ptr, png_uint_32 owner,
+    png_alloc_size_t data_size)
+{
+   if (png_ptr->zowner != 0)
+   {
+#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_ERROR_TEXT_SUPPORTED)
+      char msg[64];
+
+      PNG_STRING_FROM_CHUNK(msg, owner);
+      msg[4] = ':';
+      msg[5] = ' ';
+      PNG_STRING_FROM_CHUNK(msg+6, png_ptr->zowner);
+      /* So the message that results is "<chunk> using zstream"; this is an
+       * internal error, but is very useful for debugging.  i18n requirements
+       * are minimal.
+       */
+      (void)png_safecat(msg, (sizeof msg), 10, " using zstream");
+#endif
+#if PNG_RELEASE_BUILD
+         png_warning(png_ptr, msg);
+
+         /* Attempt sane error recovery */
+         if (png_ptr->zowner == png_IDAT) /* don't steal from IDAT */
+         {
+            png_ptr->zstream.msg = PNGZ_MSG_CAST("in use by IDAT");
+            return Z_STREAM_ERROR;
+         }
+
+         png_ptr->zowner = 0;
+#else
+         png_error(png_ptr, msg);
+#endif
+   }
+
+   {
+      int level = png_ptr->zlib_level;
+      int method = png_ptr->zlib_method;
+      int windowBits = png_ptr->zlib_window_bits;
+      int memLevel = png_ptr->zlib_mem_level;
+      int strategy; /* set below */
+      int ret; /* zlib return code */
+
+      if (owner == png_IDAT)
+      {
+         if ((png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY) != 0)
+            strategy = png_ptr->zlib_strategy;
+
+         else if (png_ptr->do_filter != PNG_FILTER_NONE)
+            strategy = PNG_Z_DEFAULT_STRATEGY;
+
+         else
+            strategy = PNG_Z_DEFAULT_NOFILTER_STRATEGY;
+      }
+
+      else
+      {
+#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
+            level = png_ptr->zlib_text_level;
+            method = png_ptr->zlib_text_method;
+            windowBits = png_ptr->zlib_text_window_bits;
+            memLevel = png_ptr->zlib_text_mem_level;
+            strategy = png_ptr->zlib_text_strategy;
+#else
+            /* If customization is not supported the values all come from the
+             * IDAT values except for the strategy, which is fixed to the
+             * default.  (This is the pre-1.6.0 behavior too, although it was
+             * implemented in a very different way.)
+             */
+            strategy = Z_DEFAULT_STRATEGY;
+#endif
+      }
+
+      /* Adjust 'windowBits' down if larger than 'data_size'; to stop this
+       * happening just pass 32768 as the data_size parameter.  Notice that zlib
+       * requires an extra 262 bytes in the window in addition to the data to be
+       * able to see the whole of the data, so if data_size+262 takes us to the
+       * next windowBits size we need to fix up the value later.  (Because even
+       * though deflate needs the extra window, inflate does not!)
+       */
+      if (data_size <= 16384)
+      {
+         /* IMPLEMENTATION NOTE: this 'half_window_size' stuff is only here to
+          * work round a Microsoft Visual C misbehavior which, contrary to C-90,
+          * widens the result of the following shift to 64-bits if (and,
+          * apparently, only if) it is used in a test.
+          */
+         unsigned int half_window_size = 1U << (windowBits-1);
+
+         while (data_size + 262 <= half_window_size)
+         {
+            half_window_size >>= 1;
+            --windowBits;
+         }
+      }
+
+      /* Check against the previous initialized values, if any. */
+      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0 &&
+         (png_ptr->zlib_set_level != level ||
+         png_ptr->zlib_set_method != method ||
+         png_ptr->zlib_set_window_bits != windowBits ||
+         png_ptr->zlib_set_mem_level != memLevel ||
+         png_ptr->zlib_set_strategy != strategy))
+      {
+         if (deflateEnd(&png_ptr->zstream) != Z_OK)
+            png_warning(png_ptr, "deflateEnd failed (ignored)");
+
+         png_ptr->flags &= ~PNG_FLAG_ZSTREAM_INITIALIZED;
+      }
+
+      /* For safety clear out the input and output pointers (currently zlib
+       * doesn't use them on Init, but it might in the future).
+       */
+      png_ptr->zstream.next_in = NULL;
+      png_ptr->zstream.avail_in = 0;
+      png_ptr->zstream.next_out = NULL;
+      png_ptr->zstream.avail_out = 0;
+
+      /* Now initialize if required, setting the new parameters, otherwise just
+       * do a simple reset to the previous parameters.
+       */
+      if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0)
+         ret = deflateReset(&png_ptr->zstream);
+
+      else
+      {
+         ret = deflateInit2(&png_ptr->zstream, level, method, windowBits,
+             memLevel, strategy);
+
+         if (ret == Z_OK)
+            png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED;
+      }
+
+      /* The return code is from either deflateReset or deflateInit2; they have
+       * pretty much the same set of error codes.
+       */
+      if (ret == Z_OK)
+         png_ptr->zowner = owner;
+
+      else
+         png_zstream_error(png_ptr, ret);
+
+      return ret;
+   }
+}
+
+/* Clean up (or trim) a linked list of compression buffers. */
+void /* PRIVATE */
+png_free_buffer_list(png_structrp png_ptr, png_compression_bufferp *listp)
+{
+   png_compression_bufferp list = *listp;
+
+   if (list != NULL)
+   {
+      *listp = NULL;
+
+      do
+      {
+         png_compression_bufferp next = list->next;
+
+         png_free(png_ptr, list);
+         list = next;
+      }
+      while (list != NULL);
+   }
+}
+
+#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
+/* This pair of functions encapsulates the operation of (a) compressing a
+ * text string, and (b) issuing it later as a series of chunk data writes.
+ * The compression_state structure is shared context for these functions
+ * set up by the caller to allow access to the relevant local variables.
+ *
+ * compression_buffer (new in 1.6.0) is just a linked list of zbuffer_size
+ * temporary buffers.  From 1.6.0 it is retained in png_struct so that it will
+ * be correctly freed in the event of a write error (previous implementations
+ * just leaked memory.)
+ */
+typedef struct
+{
+   png_const_bytep      input;        /* The uncompressed input data */
+   png_alloc_size_t     input_len;    /* Its length */
+   png_uint_32          output_len;   /* Final compressed length */
+   png_byte             output[1024]; /* First block of output */
+} compression_state;
+
+static void
+png_text_compress_init(compression_state *comp, png_const_bytep input,
+    png_alloc_size_t input_len)
+{
+   comp->input = input;
+   comp->input_len = input_len;
+   comp->output_len = 0;
+}
+
+/* Compress the data in the compression state input */
+static int
+png_text_compress(png_structrp png_ptr, png_uint_32 chunk_name,
+    compression_state *comp, png_uint_32 prefix_len)
+{
+   int ret;
+
+   /* To find the length of the output it is necessary to first compress the
+    * input. The result is buffered rather than using the two-pass algorithm
+    * that is used on the inflate side; deflate is assumed to be slower and a
+    * PNG writer is assumed to have more memory available than a PNG reader.
+    *
+    * IMPLEMENTATION NOTE: the zlib API deflateBound() can be used to find an
+    * upper limit on the output size, but it is always bigger than the input
+    * size so it is likely to be more efficient to use this linked-list
+    * approach.
+    */
+   ret = png_deflate_claim(png_ptr, chunk_name, comp->input_len);
+
+   if (ret != Z_OK)
+      return ret;
+
+   /* Set up the compression buffers, we need a loop here to avoid overflowing a
+    * uInt.  Use ZLIB_IO_MAX to limit the input.  The output is always limited
+    * by the output buffer size, so there is no need to check that.  Since this
+    * is ANSI-C we know that an 'int', hence a uInt, is always at least 16 bits
+    * in size.
+    */
+   {
+      png_compression_bufferp *end = &png_ptr->zbuffer_list;
+      png_alloc_size_t input_len = comp->input_len; /* may be zero! */
+      png_uint_32 output_len;
+
+      /* zlib updates these for us: */
+      png_ptr->zstream.next_in = PNGZ_INPUT_CAST(comp->input);
+      png_ptr->zstream.avail_in = 0; /* Set below */
+      png_ptr->zstream.next_out = comp->output;
+      png_ptr->zstream.avail_out = (sizeof comp->output);
+
+      output_len = png_ptr->zstream.avail_out;
+
+      do
+      {
+         uInt avail_in = ZLIB_IO_MAX;
+
+         if (avail_in > input_len)
+            avail_in = (uInt)input_len;
+
+         input_len -= avail_in;
+
+         png_ptr->zstream.avail_in = avail_in;
+
+         if (png_ptr->zstream.avail_out == 0)
+         {
+            png_compression_buffer *next;
+
+            /* Chunk data is limited to 2^31 bytes in length, so the prefix
+             * length must be counted here.
+             */
+            if (output_len + prefix_len > PNG_UINT_31_MAX)
+            {
+               ret = Z_MEM_ERROR;
+               break;
+            }
+
+            /* Need a new (malloc'ed) buffer, but there may be one present
+             * already.
+             */
+            next = *end;
+            if (next == NULL)
+            {
+               next = png_voidcast(png_compression_bufferp, png_malloc_base
+                  (png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr)));
+
+               if (next == NULL)
+               {
+                  ret = Z_MEM_ERROR;
+                  break;
+               }
+
+               /* Link in this buffer (so that it will be freed later) */
+               next->next = NULL;
+               *end = next;
+            }
+
+            png_ptr->zstream.next_out = next->output;
+            png_ptr->zstream.avail_out = png_ptr->zbuffer_size;
+            output_len += png_ptr->zstream.avail_out;
+
+            /* Move 'end' to the next buffer pointer. */
+            end = &next->next;
+         }
+
+         /* Compress the data */
+         ret = deflate(&png_ptr->zstream,
+             input_len > 0 ? Z_NO_FLUSH : Z_FINISH);
+
+         /* Claw back input data that was not consumed (because avail_in is
+          * reset above every time round the loop).
+          */
+         input_len += png_ptr->zstream.avail_in;
+         png_ptr->zstream.avail_in = 0; /* safety */
+      }
+      while (ret == Z_OK);
+
+      /* There may be some space left in the last output buffer. This needs to
+       * be subtracted from output_len.
+       */
+      output_len -= png_ptr->zstream.avail_out;
+      png_ptr->zstream.avail_out = 0; /* safety */
+      comp->output_len = output_len;
+
+      /* Now double check the output length, put in a custom message if it is
+       * too long.  Otherwise ensure the z_stream::msg pointer is set to
+       * something.
+       */
+      if (output_len + prefix_len >= PNG_UINT_31_MAX)
+      {
+         png_ptr->zstream.msg = PNGZ_MSG_CAST("compressed data too long");
+         ret = Z_MEM_ERROR;
+      }
+
+      else
+         png_zstream_error(png_ptr, ret);
+
+      /* Reset zlib for another zTXt/iTXt or image data */
+      png_ptr->zowner = 0;
+
+      /* The only success case is Z_STREAM_END, input_len must be 0; if not this
+       * is an internal error.
+       */
+      if (ret == Z_STREAM_END && input_len == 0)
+      {
+#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
+         /* Fix up the deflate header, if required */
+         optimize_cmf(comp->output, comp->input_len);
+#endif
+         /* But Z_OK is returned, not Z_STREAM_END; this allows the claim
+          * function above to return Z_STREAM_END on an error (though it never
+          * does in the current versions of zlib.)
+          */
+         return Z_OK;
+      }
+
+      else
+         return ret;
+   }
+}
+
+/* Ship the compressed text out via chunk writes */
+static void
+png_write_compressed_data_out(png_structrp png_ptr, compression_state *comp)
+{
+   png_uint_32 output_len = comp->output_len;
+   png_const_bytep output = comp->output;
+   png_uint_32 avail = (sizeof comp->output);
+   png_compression_buffer *next = png_ptr->zbuffer_list;
+
+   for (;;)
+   {
+      if (avail > output_len)
+         avail = output_len;
+
+      png_write_chunk_data(png_ptr, output, avail);
+
+      output_len -= avail;
+
+      if (output_len == 0 || next == NULL)
+         break;
+
+      avail = png_ptr->zbuffer_size;
+      output = next->output;
+      next = next->next;
+   }
+
+   /* This is an internal error; 'next' must have been NULL! */
+   if (output_len > 0)
+      png_error(png_ptr, "error writing ancillary chunked compressed data");
+}
+#endif /* WRITE_COMPRESSED_TEXT */
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.  Note that the rest of this code depends upon this
+ * information being correct.
+ */
+void /* PRIVATE */
+png_write_IHDR(png_structrp png_ptr, png_uint_32 width, png_uint_32 height,
+    int bit_depth, int color_type, int compression_type, int filter_type,
+    int interlace_type)
+{
+   png_byte buf[13]; /* Buffer to store the IHDR info */
+   int is_invalid_depth;
+
+   png_debug(1, "in png_write_IHDR");
+
+   /* Check that we have valid input data from the application info */
+   switch (color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8:
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+            case 16:
+#endif
+               png_ptr->channels = 1; break;
+
+            default:
+               png_error(png_ptr,
+                   "Invalid bit depth for grayscale image");
+         }
+         break;
+
+      case PNG_COLOR_TYPE_RGB:
+         is_invalid_depth = (bit_depth != 8);
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
+#endif
+         if (is_invalid_depth)
+            png_error(png_ptr, "Invalid bit depth for RGB image");
+
+         png_ptr->channels = 3;
+         break;
+
+      case PNG_COLOR_TYPE_PALETTE:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8:
+               png_ptr->channels = 1;
+               break;
+
+            default:
+               png_error(png_ptr, "Invalid bit depth for paletted image");
+         }
+         break;
+
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         is_invalid_depth = (bit_depth != 8);
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
+#endif
+         if (is_invalid_depth)
+            png_error(png_ptr, "Invalid bit depth for grayscale+alpha image");
+
+         png_ptr->channels = 2;
+         break;
+
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         is_invalid_depth = (bit_depth != 8);
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+         is_invalid_depth = (is_invalid_depth && bit_depth != 16);
+#endif
+         if (is_invalid_depth)
+            png_error(png_ptr, "Invalid bit depth for RGBA image");
+
+         png_ptr->channels = 4;
+         break;
+
+      default:
+         png_error(png_ptr, "Invalid image color type specified");
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid compression type specified");
+      compression_type = PNG_COMPRESSION_TYPE_BASE;
+   }
+
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if (
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+       !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 &&
+       ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) &&
+       (color_type == PNG_COLOR_TYPE_RGB ||
+        color_type == PNG_COLOR_TYPE_RGB_ALPHA) &&
+       (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) &&
+#endif
+       filter_type != PNG_FILTER_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid filter type specified");
+      filter_type = PNG_FILTER_TYPE_BASE;
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   if (interlace_type != PNG_INTERLACE_NONE &&
+       interlace_type != PNG_INTERLACE_ADAM7)
+   {
+      png_warning(png_ptr, "Invalid interlace type specified");
+      interlace_type = PNG_INTERLACE_ADAM7;
+   }
+#else
+   interlace_type=PNG_INTERLACE_NONE;
+#endif
+
+   /* Save the relevant information */
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->color_type = (png_byte)color_type;
+   png_ptr->interlaced = (png_byte)interlace_type;
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+   png_ptr->width = width;
+   png_ptr->height = height;
+
+   png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width);
+   /* Set the usr info, so any transformations can modify it */
+   png_ptr->usr_width = png_ptr->width;
+   png_ptr->usr_bit_depth = png_ptr->bit_depth;
+   png_ptr->usr_channels = png_ptr->channels;
+
+   /* Pack the header information into the buffer */
+   png_save_uint_32(buf, width);
+   png_save_uint_32(buf + 4, height);
+   buf[8] = (png_byte)bit_depth;
+   buf[9] = (png_byte)color_type;
+   buf[10] = (png_byte)compression_type;
+   buf[11] = (png_byte)filter_type;
+   buf[12] = (png_byte)interlace_type;
+
+   /* Write the chunk */
+   png_write_complete_chunk(png_ptr, png_IHDR, buf, 13);
+
+   if ((png_ptr->do_filter) == PNG_NO_FILTERS)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
+          png_ptr->bit_depth < 8)
+         png_ptr->do_filter = PNG_FILTER_NONE;
+
+      else
+         png_ptr->do_filter = PNG_ALL_FILTERS;
+   }
+
+   png_ptr->mode = PNG_HAVE_IHDR; /* not READY_FOR_ZTXT */
+}
+
+/* Write the palette.  We are careful not to trust png_color to be in the
+ * correct order for PNG, so people can redefine it to any convenient
+ * structure.
+ */
+void /* PRIVATE */
+png_write_PLTE(png_structrp png_ptr, png_const_colorp palette,
+    png_uint_32 num_pal)
+{
+   png_uint_32 max_palette_length, i;
+   png_const_colorp pal_ptr;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_PLTE");
+
+   max_palette_length = (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ?
+      (1 << png_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH;
+
+   if ((
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+       (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0 &&
+#endif
+       num_pal == 0) || num_pal > max_palette_length)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_error(png_ptr, "Invalid number of colors in palette");
+      }
+
+      else
+      {
+         png_warning(png_ptr, "Invalid number of colors in palette");
+         return;
+      }
+   }
+
+   if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0)
+   {
+      png_warning(png_ptr,
+          "Ignoring request to write a PLTE chunk in grayscale PNG");
+
+      return;
+   }
+
+   png_ptr->num_palette = (png_uint_16)num_pal;
+   png_debug1(3, "num_palette = %d", png_ptr->num_palette);
+
+   png_write_chunk_header(png_ptr, png_PLTE, (png_uint_32)(num_pal * 3));
+#ifdef PNG_POINTER_INDEXING_SUPPORTED
+
+   for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
+   {
+      buf[0] = pal_ptr->red;
+      buf[1] = pal_ptr->green;
+      buf[2] = pal_ptr->blue;
+      png_write_chunk_data(png_ptr, buf, 3);
+   }
+
+#else
+   /* This is a little slower but some buggy compilers need to do this
+    * instead
+    */
+   pal_ptr=palette;
+
+   for (i = 0; i < num_pal; i++)
+   {
+      buf[0] = pal_ptr[i].red;
+      buf[1] = pal_ptr[i].green;
+      buf[2] = pal_ptr[i].blue;
+      png_write_chunk_data(png_ptr, buf, 3);
+   }
+
+#endif
+   png_write_chunk_end(png_ptr);
+   png_ptr->mode |= PNG_HAVE_PLTE;
+}
+
+/* This is similar to png_text_compress, above, except that it does not require
+ * all of the data at once and, instead of buffering the compressed result,
+ * writes it as IDAT chunks.  Unlike png_text_compress it *can* png_error out
+ * because it calls the write interface.  As a result it does its own error
+ * reporting and does not return an error code.  In the event of error it will
+ * just call png_error.  The input data length may exceed 32-bits.  The 'flush'
+ * parameter is exactly the same as that to deflate, with the following
+ * meanings:
+ *
+ * Z_NO_FLUSH: normal incremental output of compressed data
+ * Z_SYNC_FLUSH: do a SYNC_FLUSH, used by png_write_flush
+ * Z_FINISH: this is the end of the input, do a Z_FINISH and clean up
+ *
+ * The routine manages the acquire and release of the png_ptr->zstream by
+ * checking and (at the end) clearing png_ptr->zowner; it does some sanity
+ * checks on the 'mode' flags while doing this.
+ */
+void /* PRIVATE */
+png_compress_IDAT(png_structrp png_ptr, png_const_bytep input,
+    png_alloc_size_t input_len, int flush)
+{
+   if (png_ptr->zowner != png_IDAT)
+   {
+      /* First time.   Ensure we have a temporary buffer for compression and
+       * trim the buffer list if it has more than one entry to free memory.
+       * If 'WRITE_COMPRESSED_TEXT' is not set the list will never have been
+       * created at this point, but the check here is quick and safe.
+       */
+      if (png_ptr->zbuffer_list == NULL)
+      {
+         png_ptr->zbuffer_list = png_voidcast(png_compression_bufferp,
+             png_malloc(png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr)));
+         png_ptr->zbuffer_list->next = NULL;
+      }
+
+      else
+         png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list->next);
+
+      /* It is a terminal error if we can't claim the zstream. */
+      if (png_deflate_claim(png_ptr, png_IDAT, png_image_size(png_ptr)) != Z_OK)
+         png_error(png_ptr, png_ptr->zstream.msg);
+
+      /* The output state is maintained in png_ptr->zstream, so it must be
+       * initialized here after the claim.
+       */
+      png_ptr->zstream.next_out = png_ptr->zbuffer_list->output;
+      png_ptr->zstream.avail_out = png_ptr->zbuffer_size;
+   }
+
+   /* Now loop reading and writing until all the input is consumed or an error
+    * terminates the operation.  The _out values are maintained across calls to
+    * this function, but the input must be reset each time.
+    */
+   png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input);
+   png_ptr->zstream.avail_in = 0; /* set below */
+   for (;;)
+   {
+      int ret;
+
+      /* INPUT: from the row data */
+      uInt avail = ZLIB_IO_MAX;
+
+      if (avail > input_len)
+         avail = (uInt)input_len; /* safe because of the check */
+
+      png_ptr->zstream.avail_in = avail;
+      input_len -= avail;
+
+      ret = deflate(&png_ptr->zstream, input_len > 0 ? Z_NO_FLUSH : flush);
+
+      /* Include as-yet unconsumed input */
+      input_len += png_ptr->zstream.avail_in;
+      png_ptr->zstream.avail_in = 0;
+
+      /* OUTPUT: write complete IDAT chunks when avail_out drops to zero. Note
+       * that these two zstream fields are preserved across the calls, therefore
+       * there is no need to set these up on entry to the loop.
+       */
+      if (png_ptr->zstream.avail_out == 0)
+      {
+         png_bytep data = png_ptr->zbuffer_list->output;
+         uInt size = png_ptr->zbuffer_size;
+
+         /* Write an IDAT containing the data then reset the buffer.  The
+          * first IDAT may need deflate header optimization.
+          */
+#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
+            if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 &&
+                png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
+               optimize_cmf(data, png_image_size(png_ptr));
+#endif
+
+         if (size > 0)
+            png_write_complete_chunk(png_ptr, png_IDAT, data, size);
+         png_ptr->mode |= PNG_HAVE_IDAT;
+
+         png_ptr->zstream.next_out = data;
+         png_ptr->zstream.avail_out = size;
+
+         /* For SYNC_FLUSH or FINISH it is essential to keep calling zlib with
+          * the same flush parameter until it has finished output, for NO_FLUSH
+          * it doesn't matter.
+          */
+         if (ret == Z_OK && flush != Z_NO_FLUSH)
+            continue;
+      }
+
+      /* The order of these checks doesn't matter much; it just affects which
+       * possible error might be detected if multiple things go wrong at once.
+       */
+      if (ret == Z_OK) /* most likely return code! */
+      {
+         /* If all the input has been consumed then just return.  If Z_FINISH
+          * was used as the flush parameter something has gone wrong if we get
+          * here.
+          */
+         if (input_len == 0)
+         {
+            if (flush == Z_FINISH)
+               png_error(png_ptr, "Z_OK on Z_FINISH with output space");
+
+            return;
+         }
+      }
+
+      else if (ret == Z_STREAM_END && flush == Z_FINISH)
+      {
+         /* This is the end of the IDAT data; any pending output must be
+          * flushed.  For small PNG files we may still be at the beginning.
+          */
+         png_bytep data = png_ptr->zbuffer_list->output;
+         uInt size = png_ptr->zbuffer_size - png_ptr->zstream.avail_out;
+
+#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
+         if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 &&
+             png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
+            optimize_cmf(data, png_image_size(png_ptr));
+#endif
+
+         if (size > 0)
+            png_write_complete_chunk(png_ptr, png_IDAT, data, size);
+         png_ptr->zstream.avail_out = 0;
+         png_ptr->zstream.next_out = NULL;
+         png_ptr->mode |= PNG_HAVE_IDAT | PNG_AFTER_IDAT;
+
+         png_ptr->zowner = 0; /* Release the stream */
+         return;
+      }
+
+      else
+      {
+         /* This is an error condition. */
+         png_zstream_error(png_ptr, ret);
+         png_error(png_ptr, png_ptr->zstream.msg);
+      }
+   }
+}
+
+/* Write an IEND chunk */
+void /* PRIVATE */
+png_write_IEND(png_structrp png_ptr)
+{
+   png_debug(1, "in png_write_IEND");
+
+   png_write_complete_chunk(png_ptr, png_IEND, NULL, 0);
+   png_ptr->mode |= PNG_HAVE_IEND;
+}
+
+#ifdef PNG_WRITE_gAMA_SUPPORTED
+/* Write a gAMA chunk */
+void /* PRIVATE */
+png_write_gAMA_fixed(png_structrp png_ptr, png_fixed_point file_gamma)
+{
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA");
+
+   /* file_gamma is saved in 1/100,000ths */
+   png_save_uint_32(buf, (png_uint_32)file_gamma);
+   png_write_complete_chunk(png_ptr, png_gAMA, buf, 4);
+}
+#endif
+
+#ifdef PNG_WRITE_sRGB_SUPPORTED
+/* Write a sRGB chunk */
+void /* PRIVATE */
+png_write_sRGB(png_structrp png_ptr, int srgb_intent)
+{
+   png_byte buf[1];
+
+   png_debug(1, "in png_write_sRGB");
+
+   if (srgb_intent >= PNG_sRGB_INTENT_LAST)
+      png_warning(png_ptr,
+          "Invalid sRGB rendering intent specified");
+
+   buf[0]=(png_byte)srgb_intent;
+   png_write_complete_chunk(png_ptr, png_sRGB, buf, 1);
+}
+#endif
+
+#ifdef PNG_WRITE_iCCP_SUPPORTED
+/* Write an iCCP chunk */
+void /* PRIVATE */
+png_write_iCCP(png_structrp png_ptr, png_const_charp name,
+    png_const_bytep profile)
+{
+   png_uint_32 name_len;
+   png_uint_32 profile_len;
+   png_byte new_name[81]; /* 1 byte for the compression byte */
+   compression_state comp;
+   png_uint_32 temp;
+
+   png_debug(1, "in png_write_iCCP");
+
+   /* These are all internal problems: the profile should have been checked
+    * before when it was stored.
+    */
+   if (profile == NULL)
+      png_error(png_ptr, "No profile for iCCP chunk"); /* internal error */
+
+   profile_len = png_get_uint_32(profile);
+
+   if (profile_len < 132)
+      png_error(png_ptr, "ICC profile too short");
+
+   temp = (png_uint_32) (*(profile+8));
+   if (temp > 3 && (profile_len & 0x03))
+      png_error(png_ptr, "ICC profile length invalid (not a multiple of 4)");
+
+   {
+      png_uint_32 embedded_profile_len = png_get_uint_32(profile);
+
+      if (profile_len != embedded_profile_len)
+         png_error(png_ptr, "Profile length does not match profile");
+   }
+
+   name_len = png_check_keyword(png_ptr, name, new_name);
+
+   if (name_len == 0)
+      png_error(png_ptr, "iCCP: invalid keyword");
+
+   new_name[++name_len] = PNG_COMPRESSION_TYPE_BASE;
+
+   /* Make sure we include the NULL after the name and the compression type */
+   ++name_len;
+
+   png_text_compress_init(&comp, profile, profile_len);
+
+   /* Allow for keyword terminator and compression byte */
+   if (png_text_compress(png_ptr, png_iCCP, &comp, name_len) != Z_OK)
+      png_error(png_ptr, png_ptr->zstream.msg);
+
+   png_write_chunk_header(png_ptr, png_iCCP, name_len + comp.output_len);
+
+   png_write_chunk_data(png_ptr, new_name, name_len);
+
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_sPLT_SUPPORTED
+/* Write a sPLT chunk */
+void /* PRIVATE */
+png_write_sPLT(png_structrp png_ptr, png_const_sPLT_tp spalette)
+{
+   png_uint_32 name_len;
+   png_byte new_name[80];
+   png_byte entrybuf[10];
+   size_t entry_size = (spalette->depth == 8 ? 6 : 10);
+   size_t palette_size = entry_size * (size_t)spalette->nentries;
+   png_sPLT_entryp ep;
+#ifndef PNG_POINTER_INDEXING_SUPPORTED
+   int i;
+#endif
+
+   png_debug(1, "in png_write_sPLT");
+
+   name_len = png_check_keyword(png_ptr, spalette->name, new_name);
+
+   if (name_len == 0)
+      png_error(png_ptr, "sPLT: invalid keyword");
+
+   /* Make sure we include the NULL after the name */
+   png_write_chunk_header(png_ptr, png_sPLT,
+       (png_uint_32)(name_len + 2 + palette_size));
+
+   png_write_chunk_data(png_ptr, (png_bytep)new_name, (size_t)(name_len + 1));
+
+   png_write_chunk_data(png_ptr, &spalette->depth, 1);
+
+   /* Loop through each palette entry, writing appropriately */
+#ifdef PNG_POINTER_INDEXING_SUPPORTED
+   for (ep = spalette->entries; ep<spalette->entries + spalette->nentries; ep++)
+   {
+      if (spalette->depth == 8)
+      {
+         entrybuf[0] = (png_byte)ep->red;
+         entrybuf[1] = (png_byte)ep->green;
+         entrybuf[2] = (png_byte)ep->blue;
+         entrybuf[3] = (png_byte)ep->alpha;
+         png_save_uint_16(entrybuf + 4, ep->frequency);
+      }
+
+      else
+      {
+         png_save_uint_16(entrybuf + 0, ep->red);
+         png_save_uint_16(entrybuf + 2, ep->green);
+         png_save_uint_16(entrybuf + 4, ep->blue);
+         png_save_uint_16(entrybuf + 6, ep->alpha);
+         png_save_uint_16(entrybuf + 8, ep->frequency);
+      }
+
+      png_write_chunk_data(png_ptr, entrybuf, entry_size);
+   }
+#else
+   ep=spalette->entries;
+   for (i = 0; i>spalette->nentries; i++)
+   {
+      if (spalette->depth == 8)
+      {
+         entrybuf[0] = (png_byte)ep[i].red;
+         entrybuf[1] = (png_byte)ep[i].green;
+         entrybuf[2] = (png_byte)ep[i].blue;
+         entrybuf[3] = (png_byte)ep[i].alpha;
+         png_save_uint_16(entrybuf + 4, ep[i].frequency);
+      }
+
+      else
+      {
+         png_save_uint_16(entrybuf + 0, ep[i].red);
+         png_save_uint_16(entrybuf + 2, ep[i].green);
+         png_save_uint_16(entrybuf + 4, ep[i].blue);
+         png_save_uint_16(entrybuf + 6, ep[i].alpha);
+         png_save_uint_16(entrybuf + 8, ep[i].frequency);
+      }
+
+      png_write_chunk_data(png_ptr, entrybuf, entry_size);
+   }
+#endif
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_sBIT_SUPPORTED
+/* Write the sBIT chunk */
+void /* PRIVATE */
+png_write_sBIT(png_structrp png_ptr, png_const_color_8p sbit, int color_type)
+{
+   png_byte buf[4];
+   size_t size;
+
+   png_debug(1, "in png_write_sBIT");
+
+   /* Make sure we don't depend upon the order of PNG_COLOR_8 */
+   if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
+   {
+      png_byte maxbits;
+
+      maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 :
+          png_ptr->usr_bit_depth);
+
+      if (sbit->red == 0 || sbit->red > maxbits ||
+          sbit->green == 0 || sbit->green > maxbits ||
+          sbit->blue == 0 || sbit->blue > maxbits)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+
+      buf[0] = sbit->red;
+      buf[1] = sbit->green;
+      buf[2] = sbit->blue;
+      size = 3;
+   }
+
+   else
+   {
+      if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+
+      buf[0] = sbit->gray;
+      size = 1;
+   }
+
+   if ((color_type & PNG_COLOR_MASK_ALPHA) != 0)
+   {
+      if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+
+      buf[size++] = sbit->alpha;
+   }
+
+   png_write_complete_chunk(png_ptr, png_sBIT, buf, size);
+}
+#endif
+
+#ifdef PNG_WRITE_cHRM_SUPPORTED
+/* Write the cHRM chunk */
+void /* PRIVATE */
+png_write_cHRM_fixed(png_structrp png_ptr, const png_xy *xy)
+{
+   png_byte buf[32];
+
+   png_debug(1, "in png_write_cHRM");
+
+   /* Each value is saved in 1/100,000ths */
+   png_save_int_32(buf,      xy->whitex);
+   png_save_int_32(buf +  4, xy->whitey);
+
+   png_save_int_32(buf +  8, xy->redx);
+   png_save_int_32(buf + 12, xy->redy);
+
+   png_save_int_32(buf + 16, xy->greenx);
+   png_save_int_32(buf + 20, xy->greeny);
+
+   png_save_int_32(buf + 24, xy->bluex);
+   png_save_int_32(buf + 28, xy->bluey);
+
+   png_write_complete_chunk(png_ptr, png_cHRM, buf, 32);
+}
+#endif
+
+#ifdef PNG_WRITE_tRNS_SUPPORTED
+/* Write the tRNS chunk */
+void /* PRIVATE */
+png_write_tRNS(png_structrp png_ptr, png_const_bytep trans_alpha,
+    png_const_color_16p tran, int num_trans, int color_type)
+{
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_tRNS");
+
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette)
+      {
+         png_app_warning(png_ptr,
+             "Invalid number of transparent colors specified");
+         return;
+      }
+
+      /* Write the chunk out as it is */
+      png_write_complete_chunk(png_ptr, png_tRNS, trans_alpha,
+          (size_t)num_trans);
+   }
+
+   else if (color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      /* One 16-bit value */
+      if (tran->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_app_warning(png_ptr,
+             "Ignoring attempt to write tRNS chunk out-of-range for bit_depth");
+
+         return;
+      }
+
+      png_save_uint_16(buf, tran->gray);
+      png_write_complete_chunk(png_ptr, png_tRNS, buf, 2);
+   }
+
+   else if (color_type == PNG_COLOR_TYPE_RGB)
+   {
+      /* Three 16-bit values */
+      png_save_uint_16(buf, tran->red);
+      png_save_uint_16(buf + 2, tran->green);
+      png_save_uint_16(buf + 4, tran->blue);
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0)
+#else
+      if ((buf[0] | buf[2] | buf[4]) != 0)
+#endif
+      {
+         png_app_warning(png_ptr,
+             "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8");
+         return;
+      }
+
+      png_write_complete_chunk(png_ptr, png_tRNS, buf, 6);
+   }
+
+   else
+   {
+      png_app_warning(png_ptr, "Can't write tRNS with an alpha channel");
+   }
+}
+#endif
+
+#ifdef PNG_WRITE_bKGD_SUPPORTED
+/* Write the background chunk */
+void /* PRIVATE */
+png_write_bKGD(png_structrp png_ptr, png_const_color_16p back, int color_type)
+{
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_bKGD");
+
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+          (png_ptr->num_palette != 0 ||
+          (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0) &&
+#endif
+         back->index >= png_ptr->num_palette)
+      {
+         png_warning(png_ptr, "Invalid background palette index");
+         return;
+      }
+
+      buf[0] = back->index;
+      png_write_complete_chunk(png_ptr, png_bKGD, buf, 1);
+   }
+
+   else if ((color_type & PNG_COLOR_MASK_COLOR) != 0)
+   {
+      png_save_uint_16(buf, back->red);
+      png_save_uint_16(buf + 2, back->green);
+      png_save_uint_16(buf + 4, back->blue);
+#ifdef PNG_WRITE_16BIT_SUPPORTED
+      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0)
+#else
+      if ((buf[0] | buf[2] | buf[4]) != 0)
+#endif
+      {
+         png_warning(png_ptr,
+             "Ignoring attempt to write 16-bit bKGD chunk "
+             "when bit_depth is 8");
+
+         return;
+      }
+
+      png_write_complete_chunk(png_ptr, png_bKGD, buf, 6);
+   }
+
+   else
+   {
+      if (back->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_warning(png_ptr,
+             "Ignoring attempt to write bKGD chunk out-of-range for bit_depth");
+
+         return;
+      }
+
+      png_save_uint_16(buf, back->gray);
+      png_write_complete_chunk(png_ptr, png_bKGD, buf, 2);
+   }
+}
+#endif
+
+#ifdef PNG_WRITE_eXIf_SUPPORTED
+/* Write the Exif data */
+void /* PRIVATE */
+png_write_eXIf(png_structrp png_ptr, png_bytep exif, int num_exif)
+{
+   int i;
+   png_byte buf[1];
+
+   png_debug(1, "in png_write_eXIf");
+
+   png_write_chunk_header(png_ptr, png_eXIf, (png_uint_32)(num_exif));
+
+   for (i = 0; i < num_exif; i++)
+   {
+      buf[0] = exif[i];
+      png_write_chunk_data(png_ptr, buf, 1);
+   }
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_hIST_SUPPORTED
+/* Write the histogram */
+void /* PRIVATE */
+png_write_hIST(png_structrp png_ptr, png_const_uint_16p hist, int num_hist)
+{
+   int i;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_hIST");
+
+   if (num_hist > (int)png_ptr->num_palette)
+   {
+      png_debug2(3, "num_hist = %d, num_palette = %d", num_hist,
+          png_ptr->num_palette);
+
+      png_warning(png_ptr, "Invalid number of histogram entries specified");
+      return;
+   }
+
+   png_write_chunk_header(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
+
+   for (i = 0; i < num_hist; i++)
+   {
+      png_save_uint_16(buf, hist[i]);
+      png_write_chunk_data(png_ptr, buf, 2);
+   }
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_tEXt_SUPPORTED
+/* Write a tEXt chunk */
+void /* PRIVATE */
+png_write_tEXt(png_structrp png_ptr, png_const_charp key, png_const_charp text,
+    size_t text_len)
+{
+   png_uint_32 key_len;
+   png_byte new_key[80];
+
+   png_debug(1, "in png_write_tEXt");
+
+   key_len = png_check_keyword(png_ptr, key, new_key);
+
+   if (key_len == 0)
+      png_error(png_ptr, "tEXt: invalid keyword");
+
+   if (text == NULL || *text == '\0')
+      text_len = 0;
+
+   else
+      text_len = strlen(text);
+
+   if (text_len > PNG_UINT_31_MAX - (key_len+1))
+      png_error(png_ptr, "tEXt: text too long");
+
+   /* Make sure we include the 0 after the key */
+   png_write_chunk_header(png_ptr, png_tEXt,
+       (png_uint_32)/*checked above*/(key_len + text_len + 1));
+   /*
+    * We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    */
+   png_write_chunk_data(png_ptr, new_key, key_len + 1);
+
+   if (text_len != 0)
+      png_write_chunk_data(png_ptr, (png_const_bytep)text, text_len);
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_zTXt_SUPPORTED
+/* Write a compressed text chunk */
+void /* PRIVATE */
+png_write_zTXt(png_structrp png_ptr, png_const_charp key, png_const_charp text,
+    int compression)
+{
+   png_uint_32 key_len;
+   png_byte new_key[81];
+   compression_state comp;
+
+   png_debug(1, "in png_write_zTXt");
+
+   if (compression == PNG_TEXT_COMPRESSION_NONE)
+   {
+      png_write_tEXt(png_ptr, key, text, 0);
+      return;
+   }
+
+   if (compression != PNG_TEXT_COMPRESSION_zTXt)
+      png_error(png_ptr, "zTXt: invalid compression type");
+
+   key_len = png_check_keyword(png_ptr, key, new_key);
+
+   if (key_len == 0)
+      png_error(png_ptr, "zTXt: invalid keyword");
+
+   /* Add the compression method and 1 for the keyword separator. */
+   new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE;
+   ++key_len;
+
+   /* Compute the compressed data; do it now for the length */
+   png_text_compress_init(&comp, (png_const_bytep)text,
+       text == NULL ? 0 : strlen(text));
+
+   if (png_text_compress(png_ptr, png_zTXt, &comp, key_len) != Z_OK)
+      png_error(png_ptr, png_ptr->zstream.msg);
+
+   /* Write start of chunk */
+   png_write_chunk_header(png_ptr, png_zTXt, key_len + comp.output_len);
+
+   /* Write key */
+   png_write_chunk_data(png_ptr, new_key, key_len);
+
+   /* Write the compressed data */
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   /* Close the chunk */
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_iTXt_SUPPORTED
+/* Write an iTXt chunk */
+void /* PRIVATE */
+png_write_iTXt(png_structrp png_ptr, int compression, png_const_charp key,
+    png_const_charp lang, png_const_charp lang_key, png_const_charp text)
+{
+   png_uint_32 key_len, prefix_len;
+   size_t lang_len, lang_key_len;
+   png_byte new_key[82];
+   compression_state comp;
+
+   png_debug(1, "in png_write_iTXt");
+
+   key_len = png_check_keyword(png_ptr, key, new_key);
+
+   if (key_len == 0)
+      png_error(png_ptr, "iTXt: invalid keyword");
+
+   /* Set the compression flag */
+   switch (compression)
+   {
+      case PNG_ITXT_COMPRESSION_NONE:
+      case PNG_TEXT_COMPRESSION_NONE:
+         compression = new_key[++key_len] = 0; /* no compression */
+         break;
+
+      case PNG_TEXT_COMPRESSION_zTXt:
+      case PNG_ITXT_COMPRESSION_zTXt:
+         compression = new_key[++key_len] = 1; /* compressed */
+         break;
+
+      default:
+         png_error(png_ptr, "iTXt: invalid compression");
+   }
+
+   new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE;
+   ++key_len; /* for the keywod separator */
+
+   /* We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG, however,
+    * specifies that the text is UTF-8 and this really doesn't require any
+    * checking.
+    *
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    *
+    * TODO: validate the language tag correctly (see the spec.)
+    */
+   if (lang == NULL) lang = ""; /* empty language is valid */
+   lang_len = strlen(lang)+1;
+   if (lang_key == NULL) lang_key = ""; /* may be empty */
+   lang_key_len = strlen(lang_key)+1;
+   if (text == NULL) text = ""; /* may be empty */
+
+   prefix_len = key_len;
+   if (lang_len > PNG_UINT_31_MAX-prefix_len)
+      prefix_len = PNG_UINT_31_MAX;
+   else
+      prefix_len = (png_uint_32)(prefix_len + lang_len);
+
+   if (lang_key_len > PNG_UINT_31_MAX-prefix_len)
+      prefix_len = PNG_UINT_31_MAX;
+   else
+      prefix_len = (png_uint_32)(prefix_len + lang_key_len);
+
+   png_text_compress_init(&comp, (png_const_bytep)text, strlen(text));
+
+   if (compression != 0)
+   {
+      if (png_text_compress(png_ptr, png_iTXt, &comp, prefix_len) != Z_OK)
+         png_error(png_ptr, png_ptr->zstream.msg);
+   }
+
+   else
+   {
+      if (comp.input_len > PNG_UINT_31_MAX-prefix_len)
+         png_error(png_ptr, "iTXt: uncompressed text too long");
+
+      /* So the string will fit in a chunk: */
+      comp.output_len = (png_uint_32)/*SAFE*/comp.input_len;
+   }
+
+   png_write_chunk_header(png_ptr, png_iTXt, comp.output_len + prefix_len);
+
+   png_write_chunk_data(png_ptr, new_key, key_len);
+
+   png_write_chunk_data(png_ptr, (png_const_bytep)lang, lang_len);
+
+   png_write_chunk_data(png_ptr, (png_const_bytep)lang_key, lang_key_len);
+
+   if (compression != 0)
+      png_write_compressed_data_out(png_ptr, &comp);
+
+   else
+      png_write_chunk_data(png_ptr, (png_const_bytep)text, comp.output_len);
+
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_oFFs_SUPPORTED
+/* Write the oFFs chunk */
+void /* PRIVATE */
+png_write_oFFs(png_structrp png_ptr, png_int_32 x_offset, png_int_32 y_offset,
+    int unit_type)
+{
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_oFFs");
+
+   if (unit_type >= PNG_OFFSET_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for oFFs chunk");
+
+   png_save_int_32(buf, x_offset);
+   png_save_int_32(buf + 4, y_offset);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_complete_chunk(png_ptr, png_oFFs, buf, 9);
+}
+#endif
+#ifdef PNG_WRITE_pCAL_SUPPORTED
+/* Write the pCAL chunk (described in the PNG extensions document) */
+void /* PRIVATE */
+png_write_pCAL(png_structrp png_ptr, png_charp purpose, png_int_32 X0,
+    png_int_32 X1, int type, int nparams, png_const_charp units,
+    png_charpp params)
+{
+   png_uint_32 purpose_len;
+   size_t units_len, total_len;
+   size_t *params_len;
+   png_byte buf[10];
+   png_byte new_purpose[80];
+   int i;
+
+   png_debug1(1, "in png_write_pCAL (%d parameters)", nparams);
+
+   if (type >= PNG_EQUATION_LAST)
+      png_error(png_ptr, "Unrecognized equation type for pCAL chunk");
+
+   purpose_len = png_check_keyword(png_ptr, purpose, new_purpose);
+
+   if (purpose_len == 0)
+      png_error(png_ptr, "pCAL: invalid keyword");
+
+   ++purpose_len; /* terminator */
+
+   png_debug1(3, "pCAL purpose length = %d", (int)purpose_len);
+   units_len = strlen(units) + (nparams == 0 ? 0 : 1);
+   png_debug1(3, "pCAL units length = %d", (int)units_len);
+   total_len = purpose_len + units_len + 10;
+
+   params_len = (size_t *)png_malloc(png_ptr,
+       (png_alloc_size_t)((png_alloc_size_t)nparams * (sizeof (size_t))));
+
+   /* Find the length of each parameter, making sure we don't count the
+    * null terminator for the last parameter.
+    */
+   for (i = 0; i < nparams; i++)
+   {
+      params_len[i] = strlen(params[i]) + (i == nparams - 1 ? 0 : 1);
+      png_debug2(3, "pCAL parameter %d length = %lu", i,
+          (unsigned long)params_len[i]);
+      total_len += params_len[i];
+   }
+
+   png_debug1(3, "pCAL total length = %d", (int)total_len);
+   png_write_chunk_header(png_ptr, png_pCAL, (png_uint_32)total_len);
+   png_write_chunk_data(png_ptr, new_purpose, purpose_len);
+   png_save_int_32(buf, X0);
+   png_save_int_32(buf + 4, X1);
+   buf[8] = (png_byte)type;
+   buf[9] = (png_byte)nparams;
+   png_write_chunk_data(png_ptr, buf, 10);
+   png_write_chunk_data(png_ptr, (png_const_bytep)units, (size_t)units_len);
+
+   for (i = 0; i < nparams; i++)
+   {
+      png_write_chunk_data(png_ptr, (png_const_bytep)params[i], params_len[i]);
+   }
+
+   png_free(png_ptr, params_len);
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#ifdef PNG_WRITE_sCAL_SUPPORTED
+/* Write the sCAL chunk */
+void /* PRIVATE */
+png_write_sCAL_s(png_structrp png_ptr, int unit, png_const_charp width,
+    png_const_charp height)
+{
+   png_byte buf[64];
+   size_t wlen, hlen, total_len;
+
+   png_debug(1, "in png_write_sCAL_s");
+
+   wlen = strlen(width);
+   hlen = strlen(height);
+   total_len = wlen + hlen + 2;
+
+   if (total_len > 64)
+   {
+      png_warning(png_ptr, "Can't write sCAL (buffer too small)");
+      return;
+   }
+
+   buf[0] = (png_byte)unit;
+   memcpy(buf + 1, width, wlen + 1);      /* Append the '\0' here */
+   memcpy(buf + wlen + 2, height, hlen);  /* Do NOT append the '\0' here */
+
+   png_debug1(3, "sCAL total length = %u", (unsigned int)total_len);
+   png_write_complete_chunk(png_ptr, png_sCAL, buf, total_len);
+}
+#endif
+
+#ifdef PNG_WRITE_pHYs_SUPPORTED
+/* Write the pHYs chunk */
+void /* PRIVATE */
+png_write_pHYs(png_structrp png_ptr, png_uint_32 x_pixels_per_unit,
+    png_uint_32 y_pixels_per_unit,
+    int unit_type)
+{
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_pHYs");
+
+   if (unit_type >= PNG_RESOLUTION_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for pHYs chunk");
+
+   png_save_uint_32(buf, x_pixels_per_unit);
+   png_save_uint_32(buf + 4, y_pixels_per_unit);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_complete_chunk(png_ptr, png_pHYs, buf, 9);
+}
+#endif
+
+#ifdef PNG_WRITE_tIME_SUPPORTED
+/* Write the tIME chunk.  Use either png_convert_from_struct_tm()
+ * or png_convert_from_time_t(), or fill in the structure yourself.
+ */
+void /* PRIVATE */
+png_write_tIME(png_structrp png_ptr, png_const_timep mod_time)
+{
+   png_byte buf[7];
+
+   png_debug(1, "in png_write_tIME");
+
+   if (mod_time->month  > 12 || mod_time->month  < 1 ||
+       mod_time->day    > 31 || mod_time->day    < 1 ||
+       mod_time->hour   > 23 || mod_time->second > 60)
+   {
+      png_warning(png_ptr, "Invalid time specified for tIME chunk");
+      return;
+   }
+
+   png_save_uint_16(buf, mod_time->year);
+   buf[2] = mod_time->month;
+   buf[3] = mod_time->day;
+   buf[4] = mod_time->hour;
+   buf[5] = mod_time->minute;
+   buf[6] = mod_time->second;
+
+   png_write_complete_chunk(png_ptr, png_tIME, buf, 7);
+}
+#endif
+
+/* Initializes the row writing capability of libpng */
+void /* PRIVATE */
+png_write_start_row(png_structrp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* Start of interlace block */
+   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* Offset to next interlace block */
+   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* Start of interlace block in the y direction */
+   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* Offset to next interlace block in the y direction */
+   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+
+   png_alloc_size_t buf_size;
+   int usr_pixel_depth;
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   png_byte filters;
+#endif
+
+   png_debug(1, "in png_write_start_row");
+
+   usr_pixel_depth = png_ptr->usr_channels * png_ptr->usr_bit_depth;
+   buf_size = PNG_ROWBYTES(usr_pixel_depth, png_ptr->width) + 1;
+
+   /* 1.5.6: added to allow checking in the row write code. */
+   png_ptr->transformed_pixel_depth = png_ptr->pixel_depth;
+   png_ptr->maximum_pixel_depth = (png_byte)usr_pixel_depth;
+
+   /* Set up row buffer */
+   png_ptr->row_buf = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
+
+   png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   filters = png_ptr->do_filter;
+
+   if (png_ptr->height == 1)
+      filters &= 0xff & ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH);
+
+   if (png_ptr->width == 1)
+      filters &= 0xff & ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH);
+
+   if (filters == 0)
+      filters = PNG_FILTER_NONE;
+
+   png_ptr->do_filter = filters;
+
+   if (((filters & (PNG_FILTER_SUB | PNG_FILTER_UP | PNG_FILTER_AVG |
+       PNG_FILTER_PAETH)) != 0) && png_ptr->try_row == NULL)
+   {
+      int num_filters = 0;
+
+      png_ptr->try_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
+
+      if (filters & PNG_FILTER_SUB)
+         num_filters++;
+
+      if (filters & PNG_FILTER_UP)
+         num_filters++;
+
+      if (filters & PNG_FILTER_AVG)
+         num_filters++;
+
+      if (filters & PNG_FILTER_PAETH)
+         num_filters++;
+
+      if (num_filters > 1)
+         png_ptr->tst_row = png_voidcast(png_bytep, png_malloc(png_ptr,
+             buf_size));
+   }
+
+   /* We only need to keep the previous row if we are using one of the following
+    * filters.
+    */
+   if ((filters & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH)) != 0)
+      png_ptr->prev_row = png_voidcast(png_bytep,
+          png_calloc(png_ptr, buf_size));
+#endif /* WRITE_FILTER */
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* If interlaced, we need to set up width and height of pass */
+   if (png_ptr->interlaced != 0)
+   {
+      if ((png_ptr->transformations & PNG_INTERLACE) == 0)
+      {
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+             png_pass_ystart[0]) / png_pass_yinc[0];
+
+         png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 -
+             png_pass_start[0]) / png_pass_inc[0];
+      }
+
+      else
+      {
+         png_ptr->num_rows = png_ptr->height;
+         png_ptr->usr_width = png_ptr->width;
+      }
+   }
+
+   else
+#endif
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->usr_width = png_ptr->width;
+   }
+}
+
+/* Internal use only.  Called when finished processing a row of data. */
+void /* PRIVATE */
+png_write_finish_row(png_structrp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* Start of interlace block */
+   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* Offset to next interlace block */
+   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* Start of interlace block in the y direction */
+   static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* Offset to next interlace block in the y direction */
+   static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+
+   png_debug(1, "in png_write_finish_row");
+
+   /* Next row */
+   png_ptr->row_number++;
+
+   /* See if we are done */
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* If interlaced, go to next pass */
+   if (png_ptr->interlaced != 0)
+   {
+      png_ptr->row_number = 0;
+      if ((png_ptr->transformations & PNG_INTERLACE) != 0)
+      {
+         png_ptr->pass++;
+      }
+
+      else
+      {
+         /* Loop until we find a non-zero width or height pass */
+         do
+         {
+            png_ptr->pass++;
+
+            if (png_ptr->pass >= 7)
+               break;
+
+            png_ptr->usr_width = (png_ptr->width +
+                png_pass_inc[png_ptr->pass] - 1 -
+                png_pass_start[png_ptr->pass]) /
+                png_pass_inc[png_ptr->pass];
+
+            png_ptr->num_rows = (png_ptr->height +
+                png_pass_yinc[png_ptr->pass] - 1 -
+                png_pass_ystart[png_ptr->pass]) /
+                png_pass_yinc[png_ptr->pass];
+
+            if ((png_ptr->transformations & PNG_INTERLACE) != 0)
+               break;
+
+         } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0);
+
+      }
+
+      /* Reset the row above the image for the next pass */
+      if (png_ptr->pass < 7)
+      {
+         if (png_ptr->prev_row != NULL)
+            memset(png_ptr->prev_row, 0,
+                PNG_ROWBYTES(png_ptr->usr_channels *
+                png_ptr->usr_bit_depth, png_ptr->width) + 1);
+
+         return;
+      }
+   }
+#endif
+
+   /* If we get here, we've just written the last row, so we need
+      to flush the compressor */
+   png_compress_IDAT(png_ptr, NULL, 0, Z_FINISH);
+}
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+/* Pick out the correct pixels for the interlace pass.
+ * The basic idea here is to go through the row with a source
+ * pointer and a destination pointer (sp and dp), and copy the
+ * correct pixels for the pass.  As the row gets compacted,
+ * sp will always be >= dp, so we should never overwrite anything.
+ * See the default: case for the easiest code to understand.
+ */
+void /* PRIVATE */
+png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass)
+{
+   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* Start of interlace block */
+   static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* Offset to next interlace block */
+   static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   png_debug(1, "in png_do_write_interlace");
+
+   /* We don't have to do anything on the last pass (6) */
+   if (pass < 6)
+   {
+      /* Each pixel depth is handled separately */
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            unsigned int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            d = 0;
+            shift = 7;
+
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (size_t)(i >> 3);
+               value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 7;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+
+               else
+                  shift--;
+
+            }
+            if (shift != 7)
+               *dp = (png_byte)d;
+
+            break;
+         }
+
+         case 2:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            unsigned int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 6;
+            d = 0;
+
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (size_t)(i >> 2);
+               value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 6;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+
+               else
+                  shift -= 2;
+            }
+            if (shift != 6)
+               *dp = (png_byte)d;
+
+            break;
+         }
+
+         case 4:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            unsigned int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 4;
+            d = 0;
+            for (i = png_pass_start[pass]; i < row_width;
+                i += png_pass_inc[pass])
+            {
+               sp = row + (size_t)(i >> 1);
+               value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 4;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+
+               else
+                  shift -= 4;
+            }
+            if (shift != 4)
+               *dp = (png_byte)d;
+
+            break;
+         }
+
+         default:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            size_t pixel_bytes;
+
+            /* Start at the beginning */
+            dp = row;
+
+            /* Find out how many bytes each pixel takes up */
+            pixel_bytes = (row_info->pixel_depth >> 3);
+
+            /* Loop through the row, only looking at the pixels that matter */
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               /* Find out where the original pixel is */
+               sp = row + (size_t)i * pixel_bytes;
+
+               /* Move the pixel */
+               if (dp != sp)
+                  memcpy(dp, sp, pixel_bytes);
+
+               /* Next pixel */
+               dp += pixel_bytes;
+            }
+            break;
+         }
+      }
+      /* Set new row width */
+      row_info->width = (row_info->width +
+          png_pass_inc[pass] - 1 -
+          png_pass_start[pass]) /
+          png_pass_inc[pass];
+
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+          row_info->width);
+   }
+}
+#endif
+
+
+/* This filters the row, chooses which filter to use, if it has not already
+ * been specified by the application, and then writes the row out with the
+ * chosen filter.
+ */
+static void /* PRIVATE */
+png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row,
+    size_t row_bytes);
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+static size_t /* PRIVATE */
+png_setup_sub_row(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes, size_t lmins)
+{
+   png_bytep rp, dp, lp;
+   size_t i;
+   size_t sum = 0;
+   unsigned int v;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp;
+        i++, rp++, dp++)
+   {
+      v = *dp = *rp;
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+   }
+
+   for (lp = png_ptr->row_buf + 1; i < row_bytes;
+      i++, rp++, lp++, dp++)
+   {
+      v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+
+      if (sum > lmins)  /* We are already worse, don't continue. */
+        break;
+   }
+
+   return sum;
+}
+
+static void /* PRIVATE */
+png_setup_sub_row_only(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes)
+{
+   png_bytep rp, dp, lp;
+   size_t i;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp;
+        i++, rp++, dp++)
+   {
+      *dp = *rp;
+   }
+
+   for (lp = png_ptr->row_buf + 1; i < row_bytes;
+      i++, rp++, lp++, dp++)
+   {
+      *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+   }
+}
+
+static size_t /* PRIVATE */
+png_setup_up_row(png_structrp png_ptr, size_t row_bytes, size_t lmins)
+{
+   png_bytep rp, dp, pp;
+   size_t i;
+   size_t sum = 0;
+   unsigned int v;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_UP;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < row_bytes;
+       i++, rp++, pp++, dp++)
+   {
+      v = *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+
+      if (sum > lmins)  /* We are already worse, don't continue. */
+        break;
+   }
+
+   return sum;
+}
+static void /* PRIVATE */
+png_setup_up_row_only(png_structrp png_ptr, size_t row_bytes)
+{
+   png_bytep rp, dp, pp;
+   size_t i;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_UP;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < row_bytes;
+       i++, rp++, pp++, dp++)
+   {
+      *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
+   }
+}
+
+static size_t /* PRIVATE */
+png_setup_avg_row(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes, size_t lmins)
+{
+   png_bytep rp, dp, pp, lp;
+   png_uint_32 i;
+   size_t sum = 0;
+   unsigned int v;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < bpp; i++)
+   {
+      v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+   }
+
+   for (lp = png_ptr->row_buf + 1; i < row_bytes; i++)
+   {
+      v = *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
+          & 0xff);
+
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+
+      if (sum > lmins)  /* We are already worse, don't continue. */
+        break;
+   }
+
+   return sum;
+}
+static void /* PRIVATE */
+png_setup_avg_row_only(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes)
+{
+   png_bytep rp, dp, pp, lp;
+   png_uint_32 i;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < bpp; i++)
+   {
+      *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+   }
+
+   for (lp = png_ptr->row_buf + 1; i < row_bytes; i++)
+   {
+      *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
+          & 0xff);
+   }
+}
+
+static size_t /* PRIVATE */
+png_setup_paeth_row(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes, size_t lmins)
+{
+   png_bytep rp, dp, pp, cp, lp;
+   size_t i;
+   size_t sum = 0;
+   unsigned int v;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < bpp; i++)
+   {
+      v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+   }
+
+   for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes;
+        i++)
+   {
+      int a, b, c, pa, pb, pc, p;
+
+      b = *pp++;
+      c = *cp++;
+      a = *lp++;
+
+      p = b - c;
+      pc = a - c;
+
+#ifdef PNG_USE_ABS
+      pa = abs(p);
+      pb = abs(pc);
+      pc = abs(p + pc);
+#else
+      pa = p < 0 ? -p : p;
+      pb = pc < 0 ? -pc : pc;
+      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+      p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+      v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+
+#ifdef PNG_USE_ABS
+      sum += 128 - abs((int)v - 128);
+#else
+      sum += (v < 128) ? v : 256 - v;
+#endif
+
+      if (sum > lmins)  /* We are already worse, don't continue. */
+        break;
+   }
+
+   return sum;
+}
+static void /* PRIVATE */
+png_setup_paeth_row_only(png_structrp png_ptr, png_uint_32 bpp,
+    size_t row_bytes)
+{
+   png_bytep rp, dp, pp, cp, lp;
+   size_t i;
+
+   png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH;
+
+   for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
+       pp = png_ptr->prev_row + 1; i < bpp; i++)
+   {
+      *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+   }
+
+   for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes;
+        i++)
+   {
+      int a, b, c, pa, pb, pc, p;
+
+      b = *pp++;
+      c = *cp++;
+      a = *lp++;
+
+      p = b - c;
+      pc = a - c;
+
+#ifdef PNG_USE_ABS
+      pa = abs(p);
+      pb = abs(pc);
+      pc = abs(p + pc);
+#else
+      pa = p < 0 ? -p : p;
+      pb = pc < 0 ? -pc : pc;
+      pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+      p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+      *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+   }
+}
+#endif /* WRITE_FILTER */
+
+void /* PRIVATE */
+png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
+{
+#ifndef PNG_WRITE_FILTER_SUPPORTED
+   png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1);
+#else
+   unsigned int filter_to_do = png_ptr->do_filter;
+   png_bytep row_buf;
+   png_bytep best_row;
+   png_uint_32 bpp;
+   size_t mins;
+   size_t row_bytes = row_info->rowbytes;
+
+   png_debug(1, "in png_write_find_filter");
+
+   /* Find out how many bytes offset each pixel is */
+   bpp = (row_info->pixel_depth + 7) >> 3;
+
+   row_buf = png_ptr->row_buf;
+   mins = PNG_SIZE_MAX - 256/* so we can detect potential overflow of the
+                               running sum */;
+
+   /* The prediction method we use is to find which method provides the
+    * smallest value when summing the absolute values of the distances
+    * from zero, using anything >= 128 as negative numbers.  This is known
+    * as the "minimum sum of absolute differences" heuristic.  Other
+    * heuristics are the "weighted minimum sum of absolute differences"
+    * (experimental and can in theory improve compression), and the "zlib
+    * predictive" method (not implemented yet), which does test compressions
+    * of lines using different filter methods, and then chooses the
+    * (series of) filter(s) that give minimum compressed data size (VERY
+    * computationally expensive).
+    *
+    * GRR 980525:  consider also
+    *
+    *   (1) minimum sum of absolute differences from running average (i.e.,
+    *       keep running sum of non-absolute differences & count of bytes)
+    *       [track dispersion, too?  restart average if dispersion too large?]
+    *
+    *  (1b) minimum sum of absolute differences from sliding average, probably
+    *       with window size <= deflate window (usually 32K)
+    *
+    *   (2) minimum sum of squared differences from zero or running average
+    *       (i.e., ~ root-mean-square approach)
+    */
+
+
+   /* We don't need to test the 'no filter' case if this is the only filter
+    * that has been chosen, as it doesn't actually do anything to the data.
+    */
+   best_row = png_ptr->row_buf;
+
+   if (PNG_SIZE_MAX/128 <= row_bytes)
+   {
+      /* Overflow can occur in the calculation, just select the lowest set
+       * filter.
+       */
+      filter_to_do &= 0U-filter_to_do;
+   }
+   else if ((filter_to_do & PNG_FILTER_NONE) != 0 &&
+         filter_to_do != PNG_FILTER_NONE)
+   {
+      /* Overflow not possible and multiple filters in the list, including the
+       * 'none' filter.
+       */
+      png_bytep rp;
+      size_t sum = 0;
+      size_t i;
+      unsigned int v;
+
+      {
+         for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++)
+         {
+            v = *rp;
+#ifdef PNG_USE_ABS
+            sum += 128 - abs((int)v - 128);
+#else
+            sum += (v < 128) ? v : 256 - v;
+#endif
+         }
+      }
+
+      mins = sum;
+   }
+
+   /* Sub filter */
+   if (filter_to_do == PNG_FILTER_SUB)
+   /* It's the only filter so no testing is needed */
+   {
+      png_setup_sub_row_only(png_ptr, bpp, row_bytes);
+      best_row = png_ptr->try_row;
+   }
+
+   else if ((filter_to_do & PNG_FILTER_SUB) != 0)
+   {
+      size_t sum;
+      size_t lmins = mins;
+
+      sum = png_setup_sub_row(png_ptr, bpp, row_bytes, lmins);
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->try_row;
+         if (png_ptr->tst_row != NULL)
+         {
+            png_ptr->try_row = png_ptr->tst_row;
+            png_ptr->tst_row = best_row;
+         }
+      }
+   }
+
+   /* Up filter */
+   if (filter_to_do == PNG_FILTER_UP)
+   {
+      png_setup_up_row_only(png_ptr, row_bytes);
+      best_row = png_ptr->try_row;
+   }
+
+   else if ((filter_to_do & PNG_FILTER_UP) != 0)
+   {
+      size_t sum;
+      size_t lmins = mins;
+
+      sum = png_setup_up_row(png_ptr, row_bytes, lmins);
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->try_row;
+         if (png_ptr->tst_row != NULL)
+         {
+            png_ptr->try_row = png_ptr->tst_row;
+            png_ptr->tst_row = best_row;
+         }
+      }
+   }
+
+   /* Avg filter */
+   if (filter_to_do == PNG_FILTER_AVG)
+   {
+      png_setup_avg_row_only(png_ptr, bpp, row_bytes);
+      best_row = png_ptr->try_row;
+   }
+
+   else if ((filter_to_do & PNG_FILTER_AVG) != 0)
+   {
+      size_t sum;
+      size_t lmins = mins;
+
+      sum= png_setup_avg_row(png_ptr, bpp, row_bytes, lmins);
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->try_row;
+         if (png_ptr->tst_row != NULL)
+         {
+            png_ptr->try_row = png_ptr->tst_row;
+            png_ptr->tst_row = best_row;
+         }
+      }
+   }
+
+   /* Paeth filter */
+   if (filter_to_do == PNG_FILTER_PAETH)
+   {
+      png_setup_paeth_row_only(png_ptr, bpp, row_bytes);
+      best_row = png_ptr->try_row;
+   }
+
+   else if ((filter_to_do & PNG_FILTER_PAETH) != 0)
+   {
+      size_t sum;
+      size_t lmins = mins;
+
+      sum = png_setup_paeth_row(png_ptr, bpp, row_bytes, lmins);
+
+      if (sum < mins)
+      {
+         best_row = png_ptr->try_row;
+         if (png_ptr->tst_row != NULL)
+         {
+            png_ptr->try_row = png_ptr->tst_row;
+            png_ptr->tst_row = best_row;
+         }
+      }
+   }
+
+   /* Do the actual writing of the filtered row data from the chosen filter. */
+   png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1);
+
+#endif /* WRITE_FILTER */
+}
+
+
+/* Do the actual writing of a previously filtered row. */
+static void
+png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row,
+    size_t full_row_length/*includes filter byte*/)
+{
+   png_debug(1, "in png_write_filtered_row");
+
+   png_debug1(2, "filter = %d", filtered_row[0]);
+
+   png_compress_IDAT(png_ptr, filtered_row, full_row_length, Z_NO_FLUSH);
+
+#ifdef PNG_WRITE_FILTER_SUPPORTED
+   /* Swap the current and previous rows */
+   if (png_ptr->prev_row != NULL)
+   {
+      png_bytep tptr;
+
+      tptr = png_ptr->prev_row;
+      png_ptr->prev_row = png_ptr->row_buf;
+      png_ptr->row_buf = tptr;
+   }
+#endif /* WRITE_FILTER */
+
+   /* Finish row - updates counters and flushes zlib if last row */
+   png_write_finish_row(png_ptr);
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+   png_ptr->flush_rows++;
+
+   if (png_ptr->flush_dist > 0 &&
+       png_ptr->flush_rows >= png_ptr->flush_dist)
+   {
+      png_write_flush(png_ptr);
+   }
+#endif /* WRITE_FLUSH */
+}
+#endif /* WRITE */
diff --git a/reg-io/png/lpng1510/LICENSE b/reg-io/png/lpng1510/LICENSE
deleted file mode 100644
index 56b136da..00000000
--- a/reg-io/png/lpng1510/LICENSE
+++ /dev/null
@@ -1,111 +0,0 @@
-
-This copy of the libpng notices is provided for your convenience.  In case of
-any discrepancy between this copy and the notices in the file png.h that is
-included in the libpng distribution, the latter shall prevail.
-
-COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
-
-If you modify libpng you may insert additional notices immediately following
-this sentence.
-
-This code is released under the libpng license.
-
-libpng versions 1.2.6, August 15, 2004, through 1.5.10, March 29, 2012, are
-Copyright (c) 2004, 2006-2011 Glenn Randers-Pehrson, and are
-distributed according to the same disclaimer and license as libpng-1.2.5
-with the following individual added to the list of Contributing Authors
-
-   Cosmin Truta
-
-libpng versions 1.0.7, July 1, 2000, through 1.2.5 - October 3, 2002, are
-Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are
-distributed according to the same disclaimer and license as libpng-1.0.6
-with the following individuals added to the list of Contributing Authors
-
-   Simon-Pierre Cadieux
-   Eric S. Raymond
-   Gilles Vollant
-
-and with the following additions to the disclaimer:
-
-   There is no warranty against interference with your enjoyment of the
-   library or against infringement.  There is no warranty that our
-   efforts or the library will fulfill any of your particular purposes
-   or needs.  This library is provided with all faults, and the entire
-   risk of satisfactory quality, performance, accuracy, and effort is with
-   the user.
-
-libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
-Copyright (c) 1998, 1999 Glenn Randers-Pehrson, and are
-distributed according to the same disclaimer and license as libpng-0.96,
-with the following individuals added to the list of Contributing Authors:
-
-   Tom Lane
-   Glenn Randers-Pehrson
-   Willem van Schaik
-
-libpng versions 0.89, June 1996, through 0.96, May 1997, are
-Copyright (c) 1996, 1997 Andreas Dilger
-Distributed according to the same disclaimer and license as libpng-0.88,
-with the following individuals added to the list of Contributing Authors:
-
-   John Bowler
-   Kevin Bracey
-   Sam Bushell
-   Magnus Holmgren
-   Greg Roelofs
-   Tom Tanner
-
-libpng versions 0.5, May 1995, through 0.88, January 1996, are
-Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
-
-For the purposes of this copyright and license, "Contributing Authors"
-is defined as the following set of individuals:
-
-   Andreas Dilger
-   Dave Martindale
-   Guy Eric Schalnat
-   Paul Schmidt
-   Tim Wegner
-
-The PNG Reference Library is supplied "AS IS".  The Contributing Authors
-and Group 42, Inc. disclaim all warranties, expressed or implied,
-including, without limitation, the warranties of merchantability and of
-fitness for any purpose.  The Contributing Authors and Group 42, Inc.
-assume no liability for direct, indirect, incidental, special, exemplary,
-or consequential damages, which may result from the use of the PNG
-Reference Library, even if advised of the possibility of such damage.
-
-Permission is hereby granted to use, copy, modify, and distribute this
-source code, or portions hereof, for any purpose, without fee, subject
-to the following restrictions:
-
-1. The origin of this source code must not be misrepresented.
-
-2. Altered versions must be plainly marked as such and must not
-   be misrepresented as being the original source.
-
-3. This Copyright notice may not be removed or altered from any
-   source or altered source distribution.
-
-The Contributing Authors and Group 42, Inc. specifically permit, without
-fee, and encourage the use of this source code as a component to
-supporting the PNG file format in commercial products.  If you use this
-source code in a product, acknowledgment is not required but would be
-appreciated.
-
-
-A "png_get_copyright" function is available, for convenient use in "about"
-boxes and the like:
-
-   printf("%s",png_get_copyright(NULL));
-
-Also, the PNG logo (in PNG format, of course) is supplied in the
-files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
-
-Libpng is OSI Certified Open Source Software.  OSI Certified Open Source is a
-certification mark of the Open Source Initiative.
-
-Glenn Randers-Pehrson
-glennrp at users.sourceforge.net
-March 29, 2012
diff --git a/reg-io/png/lpng1510/png.c b/reg-io/png/lpng1510/png.c
deleted file mode 100644
index c8bff0c1..00000000
--- a/reg-io/png/lpng1510/png.c
+++ /dev/null
@@ -1,2874 +0,0 @@
-
-/* png.c - location for general purpose libpng functions
- *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-/* Generate a compiler error if there is an old png.h in the search path. */
-typedef png_libpng_version_1_5_10 Your_png_h_is_not_version_1_5_10;
-
-/* Tells libpng that we have already handled the first "num_bytes" bytes
- * of the PNG file signature.  If the PNG data is embedded into another
- * stream we can set num_bytes = 8 so that libpng will not attempt to read
- * or write any of the magic bytes before it starts on the IHDR.
- */
-
-#ifdef PNG_READ_SUPPORTED
-void PNGAPI
-png_set_sig_bytes(png_structp png_ptr, int num_bytes)
-{
-   png_debug(1, "in png_set_sig_bytes");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (num_bytes > 8)
-      png_error(png_ptr, "Too many bytes for PNG signature");
-
-   png_ptr->sig_bytes = (png_byte)(num_bytes < 0 ? 0 : num_bytes);
-}
-
-/* Checks whether the supplied bytes match the PNG signature.  We allow
- * checking less than the full 8-byte signature so that those apps that
- * already read the first few bytes of a file to determine the file type
- * can simply check the remaining bytes for extra assurance.  Returns
- * an integer less than, equal to, or greater than zero if sig is found,
- * respectively, to be less than, to match, or be greater than the correct
- * PNG signature (this is the same behavior as strcmp, memcmp, etc).
- */
-int PNGAPI
-png_sig_cmp(png_const_bytep sig, png_size_t start, png_size_t num_to_check)
-{
-   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
-
-   if (num_to_check > 8)
-      num_to_check = 8;
-
-   else if (num_to_check < 1)
-      return (-1);
-
-   if (start > 7)
-      return (-1);
-
-   if (start + num_to_check > 8)
-      num_to_check = 8 - start;
-
-   return ((int)(png_memcmp(&sig[start], &png_signature[start], num_to_check)));
-}
-
-#endif /* PNG_READ_SUPPORTED */
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-/* Function to allocate memory for zlib */
-PNG_FUNCTION(voidpf /* PRIVATE */,
-png_zalloc,(voidpf png_ptr, uInt items, uInt size),PNG_ALLOCATED)
-{
-   png_voidp ptr;
-   png_structp p=(png_structp)png_ptr;
-   png_uint_32 save_flags=p->flags;
-   png_alloc_size_t num_bytes;
-
-   if (png_ptr == NULL)
-      return (NULL);
-
-   if (items > PNG_UINT_32_MAX/size)
-   {
-     png_warning (p, "Potential overflow in png_zalloc()");
-     return (NULL);
-   }
-   num_bytes = (png_alloc_size_t)items * size;
-
-   p->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
-   ptr = (png_voidp)png_malloc((png_structp)png_ptr, num_bytes);
-   p->flags=save_flags;
-
-   return ((voidpf)ptr);
-}
-
-/* Function to free memory for zlib */
-void /* PRIVATE */
-png_zfree(voidpf png_ptr, voidpf ptr)
-{
-   png_free((png_structp)png_ptr, (png_voidp)ptr);
-}
-
-/* Reset the CRC variable to 32 bits of 1's.  Care must be taken
- * in case CRC is > 32 bits to leave the top bits 0.
- */
-void /* PRIVATE */
-png_reset_crc(png_structp png_ptr)
-{
-   /* The cast is safe because the crc is a 32 bit value. */
-   png_ptr->crc = (png_uint_32)crc32(0, Z_NULL, 0);
-}
-
-/* Calculate the CRC over a section of data.  We can only pass as
- * much data to this routine as the largest single buffer size.  We
- * also check that this data will actually be used before going to the
- * trouble of calculating it.
- */
-void /* PRIVATE */
-png_calculate_crc(png_structp png_ptr, png_const_bytep ptr, png_size_t length)
-{
-   int need_crc = 1;
-
-   if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name))
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
-          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
-         need_crc = 0;
-   }
-
-   else /* critical */
-   {
-      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
-         need_crc = 0;
-   }
-
-   /* 'uLong' is defined as unsigned long, this means that on some systems it is
-    * a 64 bit value.  crc32, however, returns 32 bits so the following cast is
-    * safe.  'uInt' may be no more than 16 bits, so it is necessary to perform a
-    * loop here.
-    */
-   if (need_crc && length > 0)
-   {
-      uLong crc = png_ptr->crc; /* Should never issue a warning */
-
-      do
-      {
-         uInt safeLength = (uInt)length;
-         if (safeLength == 0)
-            safeLength = (uInt)-1; /* evil, but safe */
-
-         crc = crc32(crc, ptr, safeLength);
-
-         /* The following should never issue compiler warnings, if they do the
-          * target system has characteristics that will probably violate other
-          * assumptions within the libpng code.
-          */
-         ptr += safeLength;
-         length -= safeLength;
-      }
-      while (length > 0);
-
-      /* And the following is always safe because the crc is only 32 bits. */
-      png_ptr->crc = (png_uint_32)crc;
-   }
-}
-
-/* Check a user supplied version number, called from both read and write
- * functions that create a png_struct
- */
-int
-png_user_version_check(png_structp png_ptr, png_const_charp user_png_ver)
-{
-   if (user_png_ver)
-   {
-      int i = 0;
-
-      do
-      {
-         if (user_png_ver[i] != png_libpng_ver[i])
-            png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
-      } while (png_libpng_ver[i++]);
-   }
-
-   else
-      png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
-
-   if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH)
-   {
-     /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so
-      * we must recompile any applications that use any older library version.
-      * For versions after libpng 1.0, we will be compatible, so we need
-      * only check the first digit.
-      */
-      if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] ||
-          (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) ||
-          (user_png_ver[0] == '0' && user_png_ver[2] < '9'))
-      {
-#ifdef PNG_WARNINGS_SUPPORTED
-         size_t pos = 0;
-         char m[128];
-
-         pos = png_safecat(m, sizeof m, pos, "Application built with libpng-");
-         pos = png_safecat(m, sizeof m, pos, user_png_ver);
-         pos = png_safecat(m, sizeof m, pos, " but running with ");
-         pos = png_safecat(m, sizeof m, pos, png_libpng_ver);
-
-         png_warning(png_ptr, m);
-#endif
-
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-         png_ptr->flags = 0;
-#endif
-
-         return 0;
-      }
-   }
-
-   /* Success return. */
-   return 1;
-}
-
-/* Allocate the memory for an info_struct for the application.  We don't
- * really need the png_ptr, but it could potentially be useful in the
- * future.  This should be used in favour of malloc(png_sizeof(png_info))
- * and png_info_init() so that applications that want to use a shared
- * libpng don't have to be recompiled if png_info changes size.
- */
-PNG_FUNCTION(png_infop,PNGAPI
-png_create_info_struct,(png_structp png_ptr),PNG_ALLOCATED)
-{
-   png_infop info_ptr;
-
-   png_debug(1, "in png_create_info_struct");
-
-   if (png_ptr == NULL)
-      return (NULL);
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   info_ptr = (png_infop)png_create_struct_2(PNG_STRUCT_INFO,
-      png_ptr->malloc_fn, png_ptr->mem_ptr);
-#else
-   info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
-#endif
-   if (info_ptr != NULL)
-      png_info_init_3(&info_ptr, png_sizeof(png_info));
-
-   return (info_ptr);
-}
-
-/* This function frees the memory associated with a single info struct.
- * Normally, one would use either png_destroy_read_struct() or
- * png_destroy_write_struct() to free an info struct, but this may be
- * useful for some applications.
- */
-void PNGAPI
-png_destroy_info_struct(png_structp png_ptr, png_infopp info_ptr_ptr)
-{
-   png_infop info_ptr = NULL;
-
-   png_debug(1, "in png_destroy_info_struct");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (info_ptr_ptr != NULL)
-      info_ptr = *info_ptr_ptr;
-
-   if (info_ptr != NULL)
-   {
-      png_info_destroy(png_ptr, info_ptr);
-
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)info_ptr, png_ptr->free_fn,
-          png_ptr->mem_ptr);
-#else
-      png_destroy_struct((png_voidp)info_ptr);
-#endif
-      *info_ptr_ptr = NULL;
-   }
-}
-
-/* Initialize the info structure.  This is now an internal function (0.89)
- * and applications using it are urged to use png_create_info_struct()
- * instead.
- */
-
-void PNGAPI
-png_info_init_3(png_infopp ptr_ptr, png_size_t png_info_struct_size)
-{
-   png_infop info_ptr = *ptr_ptr;
-
-   png_debug(1, "in png_info_init_3");
-
-   if (info_ptr == NULL)
-      return;
-
-   if (png_sizeof(png_info) > png_info_struct_size)
-   {
-      png_destroy_struct(info_ptr);
-      info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
-      *ptr_ptr = info_ptr;
-   }
-
-   /* Set everything to 0 */
-   png_memset(info_ptr, 0, png_sizeof(png_info));
-}
-
-void PNGAPI
-png_data_freer(png_structp png_ptr, png_infop info_ptr,
-   int freer, png_uint_32 mask)
-{
-   png_debug(1, "in png_data_freer");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (freer == PNG_DESTROY_WILL_FREE_DATA)
-      info_ptr->free_me |= mask;
-
-   else if (freer == PNG_USER_WILL_FREE_DATA)
-      info_ptr->free_me &= ~mask;
-
-   else
-      png_warning(png_ptr,
-         "Unknown freer parameter in png_data_freer");
-}
-
-void PNGAPI
-png_free_data(png_structp png_ptr, png_infop info_ptr, png_uint_32 mask,
-   int num)
-{
-   png_debug(1, "in png_free_data");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* Free text item num or (if num == -1) all text items */
-   if ((mask & PNG_FREE_TEXT) & info_ptr->free_me)
-   {
-      if (num != -1)
-      {
-         if (info_ptr->text && info_ptr->text[num].key)
-         {
-            png_free(png_ptr, info_ptr->text[num].key);
-            info_ptr->text[num].key = NULL;
-         }
-      }
-
-      else
-      {
-         int i;
-         for (i = 0; i < info_ptr->num_text; i++)
-             png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, i);
-         png_free(png_ptr, info_ptr->text);
-         info_ptr->text = NULL;
-         info_ptr->num_text=0;
-      }
-   }
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-   /* Free any tRNS entry */
-   if ((mask & PNG_FREE_TRNS) & info_ptr->free_me)
-   {
-      png_free(png_ptr, info_ptr->trans_alpha);
-      info_ptr->trans_alpha = NULL;
-      info_ptr->valid &= ~PNG_INFO_tRNS;
-   }
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-   /* Free any sCAL entry */
-   if ((mask & PNG_FREE_SCAL) & info_ptr->free_me)
-   {
-      png_free(png_ptr, info_ptr->scal_s_width);
-      png_free(png_ptr, info_ptr->scal_s_height);
-      info_ptr->scal_s_width = NULL;
-      info_ptr->scal_s_height = NULL;
-      info_ptr->valid &= ~PNG_INFO_sCAL;
-   }
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   /* Free any pCAL entry */
-   if ((mask & PNG_FREE_PCAL) & info_ptr->free_me)
-   {
-      png_free(png_ptr, info_ptr->pcal_purpose);
-      png_free(png_ptr, info_ptr->pcal_units);
-      info_ptr->pcal_purpose = NULL;
-      info_ptr->pcal_units = NULL;
-      if (info_ptr->pcal_params != NULL)
-         {
-            int i;
-            for (i = 0; i < (int)info_ptr->pcal_nparams; i++)
-            {
-               png_free(png_ptr, info_ptr->pcal_params[i]);
-               info_ptr->pcal_params[i] = NULL;
-            }
-            png_free(png_ptr, info_ptr->pcal_params);
-            info_ptr->pcal_params = NULL;
-         }
-      info_ptr->valid &= ~PNG_INFO_pCAL;
-   }
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   /* Free any iCCP entry */
-   if ((mask & PNG_FREE_ICCP) & info_ptr->free_me)
-   {
-      png_free(png_ptr, info_ptr->iccp_name);
-      png_free(png_ptr, info_ptr->iccp_profile);
-      info_ptr->iccp_name = NULL;
-      info_ptr->iccp_profile = NULL;
-      info_ptr->valid &= ~PNG_INFO_iCCP;
-   }
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   /* Free a given sPLT entry, or (if num == -1) all sPLT entries */
-   if ((mask & PNG_FREE_SPLT) & info_ptr->free_me)
-   {
-      if (num != -1)
-      {
-         if (info_ptr->splt_palettes)
-         {
-            png_free(png_ptr, info_ptr->splt_palettes[num].name);
-            png_free(png_ptr, info_ptr->splt_palettes[num].entries);
-            info_ptr->splt_palettes[num].name = NULL;
-            info_ptr->splt_palettes[num].entries = NULL;
-         }
-      }
-
-      else
-      {
-         if (info_ptr->splt_palettes_num)
-         {
-            int i;
-            for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
-               png_free_data(png_ptr, info_ptr, PNG_FREE_SPLT, i);
-
-            png_free(png_ptr, info_ptr->splt_palettes);
-            info_ptr->splt_palettes = NULL;
-            info_ptr->splt_palettes_num = 0;
-         }
-         info_ptr->valid &= ~PNG_INFO_sPLT;
-      }
-   }
-#endif
-
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   if (png_ptr->unknown_chunk.data)
-   {
-      png_free(png_ptr, png_ptr->unknown_chunk.data);
-      png_ptr->unknown_chunk.data = NULL;
-   }
-
-   if ((mask & PNG_FREE_UNKN) & info_ptr->free_me)
-   {
-      if (num != -1)
-      {
-          if (info_ptr->unknown_chunks)
-          {
-             png_free(png_ptr, info_ptr->unknown_chunks[num].data);
-             info_ptr->unknown_chunks[num].data = NULL;
-          }
-      }
-
-      else
-      {
-         int i;
-
-         if (info_ptr->unknown_chunks_num)
-         {
-            for (i = 0; i < info_ptr->unknown_chunks_num; i++)
-               png_free_data(png_ptr, info_ptr, PNG_FREE_UNKN, i);
-
-            png_free(png_ptr, info_ptr->unknown_chunks);
-            info_ptr->unknown_chunks = NULL;
-            info_ptr->unknown_chunks_num = 0;
-         }
-      }
-   }
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   /* Free any hIST entry */
-   if ((mask & PNG_FREE_HIST)  & info_ptr->free_me)
-   {
-      png_free(png_ptr, info_ptr->hist);
-      info_ptr->hist = NULL;
-      info_ptr->valid &= ~PNG_INFO_hIST;
-   }
-#endif
-
-   /* Free any PLTE entry that was internally allocated */
-   if ((mask & PNG_FREE_PLTE) & info_ptr->free_me)
-   {
-      png_zfree(png_ptr, info_ptr->palette);
-      info_ptr->palette = NULL;
-      info_ptr->valid &= ~PNG_INFO_PLTE;
-      info_ptr->num_palette = 0;
-   }
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* Free any image bits attached to the info structure */
-   if ((mask & PNG_FREE_ROWS) & info_ptr->free_me)
-   {
-      if (info_ptr->row_pointers)
-      {
-         int row;
-         for (row = 0; row < (int)info_ptr->height; row++)
-         {
-            png_free(png_ptr, info_ptr->row_pointers[row]);
-            info_ptr->row_pointers[row] = NULL;
-         }
-         png_free(png_ptr, info_ptr->row_pointers);
-         info_ptr->row_pointers = NULL;
-      }
-      info_ptr->valid &= ~PNG_INFO_IDAT;
-   }
-#endif
-
-   if (num != -1)
-      mask &= ~PNG_FREE_MUL;
-
-   info_ptr->free_me &= ~mask;
-}
-
-/* This is an internal routine to free any memory that the info struct is
- * pointing to before re-using it or freeing the struct itself.  Recall
- * that png_free() checks for NULL pointers for us.
- */
-void /* PRIVATE */
-png_info_destroy(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_info_destroy");
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   if (png_ptr->num_chunk_list)
-   {
-      png_free(png_ptr, png_ptr->chunk_list);
-      png_ptr->chunk_list = NULL;
-      png_ptr->num_chunk_list = 0;
-   }
-#endif
-
-   png_info_init_3(&info_ptr, png_sizeof(png_info));
-}
-#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
-
-/* This function returns a pointer to the io_ptr associated with the user
- * functions.  The application should free any memory associated with this
- * pointer before png_write_destroy() or png_read_destroy() are called.
- */
-png_voidp PNGAPI
-png_get_io_ptr(png_structp png_ptr)
-{
-   if (png_ptr == NULL)
-      return (NULL);
-
-   return (png_ptr->io_ptr);
-}
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-#  ifdef PNG_STDIO_SUPPORTED
-/* Initialize the default input/output functions for the PNG file.  If you
- * use your own read or write routines, you can call either png_set_read_fn()
- * or png_set_write_fn() instead of png_init_io().  If you have defined
- * PNG_NO_STDIO or otherwise disabled PNG_STDIO_SUPPORTED, you must use a
- * function of your own because "FILE *" isn't necessarily available.
- */
-void PNGAPI
-png_init_io(png_structp png_ptr, png_FILE_p fp)
-{
-   png_debug(1, "in png_init_io");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->io_ptr = (png_voidp)fp;
-}
-#  endif
-
-#  ifdef PNG_TIME_RFC1123_SUPPORTED
-/* Convert the supplied time into an RFC 1123 string suitable for use in
- * a "Creation Time" or other text-based time string.
- */
-png_const_charp PNGAPI
-png_convert_to_rfc1123(png_structp png_ptr, png_const_timep ptime)
-{
-   static PNG_CONST char short_months[12][4] =
-        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
-         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
-
-   if (png_ptr == NULL)
-      return (NULL);
-
-   if (ptime->year > 9999 /* RFC1123 limitation */ ||
-       ptime->month == 0    ||  ptime->month > 12  ||
-       ptime->day   == 0    ||  ptime->day   > 31  ||
-       ptime->hour  > 23    ||  ptime->minute > 59 ||
-       ptime->second > 60)
-   {
-      png_warning(png_ptr, "Ignoring invalid time value");
-      return (NULL);
-   }
-
-   {
-      size_t pos = 0;
-      char number_buf[5]; /* enough for a four-digit year */
-
-#     define APPEND_STRING(string)\
-         pos = png_safecat(png_ptr->time_buffer, sizeof png_ptr->time_buffer,\
-            pos, (string))
-#     define APPEND_NUMBER(format, value)\
-         APPEND_STRING(PNG_FORMAT_NUMBER(number_buf, format, (value)))
-#     define APPEND(ch)\
-         if (pos < (sizeof png_ptr->time_buffer)-1)\
-            png_ptr->time_buffer[pos++] = (ch)
-
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, (unsigned)ptime->day);
-      APPEND(' ');
-      APPEND_STRING(short_months[(ptime->month - 1)]);
-      APPEND(' ');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_u, ptime->year);
-      APPEND(' ');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->hour);
-      APPEND(':');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->minute);
-      APPEND(':');
-      APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->second);
-      APPEND_STRING(" +0000"); /* This reliably terminates the buffer */
-
-#     undef APPEND
-#     undef APPEND_NUMBER
-#     undef APPEND_STRING
-   }
-
-   return png_ptr->time_buffer;
-}
-#  endif /* PNG_TIME_RFC1123_SUPPORTED */
-
-#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
-
-png_const_charp PNGAPI
-png_get_copyright(png_const_structp png_ptr)
-{
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-#ifdef PNG_STRING_COPYRIGHT
-   return PNG_STRING_COPYRIGHT
-#else
-#  ifdef __STDC__
-   return PNG_STRING_NEWLINE \
-     "libpng version 1.5.10 - March 29, 2012" PNG_STRING_NEWLINE \
-     "Copyright (c) 1998-2011 Glenn Randers-Pehrson" PNG_STRING_NEWLINE \
-     "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \
-     "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \
-     PNG_STRING_NEWLINE;
-#  else
-      return "libpng version 1.5.10 - March 29, 2012\
-      Copyright (c) 1998-2011 Glenn Randers-Pehrson\
-      Copyright (c) 1996-1997 Andreas Dilger\
-      Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.";
-#  endif
-#endif
-}
-
-/* The following return the library version as a short string in the
- * format 1.0.0 through 99.99.99zz.  To get the version of *.h files
- * used with your application, print out PNG_LIBPNG_VER_STRING, which
- * is defined in png.h.
- * Note: now there is no difference between png_get_libpng_ver() and
- * png_get_header_ver().  Due to the version_nn_nn_nn typedef guard,
- * it is guaranteed that png.c uses the correct version of png.h.
- */
-png_const_charp PNGAPI
-png_get_libpng_ver(png_const_structp png_ptr)
-{
-   /* Version of *.c files used when building libpng */
-   return png_get_header_ver(png_ptr);
-}
-
-png_const_charp PNGAPI
-png_get_header_ver(png_const_structp png_ptr)
-{
-   /* Version of *.h files used when building libpng */
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-   return PNG_LIBPNG_VER_STRING;
-}
-
-png_const_charp PNGAPI
-png_get_header_version(png_const_structp png_ptr)
-{
-   /* Returns longer string containing both version and date */
-   PNG_UNUSED(png_ptr)  /* Silence compiler warning about unused png_ptr */
-#ifdef __STDC__
-   return PNG_HEADER_VERSION_STRING
-#  ifndef PNG_READ_SUPPORTED
-   "     (NO READ SUPPORT)"
-#  endif
-   PNG_STRING_NEWLINE;
-#else
-   return PNG_HEADER_VERSION_STRING;
-#endif
-}
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-int PNGAPI
-png_handle_as_unknown(png_structp png_ptr, png_const_bytep chunk_name)
-{
-   /* Check chunk_name and return "keep" value if it's on the list, else 0 */
-   png_const_bytep p, p_end;
-
-   if (png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list <= 0)
-      return PNG_HANDLE_CHUNK_AS_DEFAULT;
-
-   p_end = png_ptr->chunk_list;
-   p = p_end + png_ptr->num_chunk_list*5; /* beyond end */
-
-   /* The code is the fifth byte after each four byte string.  Historically this
-    * code was always searched from the end of the list, so it should continue
-    * to do so in case there are duplicated entries.
-    */
-   do /* num_chunk_list > 0, so at least one */
-   {
-      p -= 5;
-      if (!png_memcmp(chunk_name, p, 4))
-         return p[4];
-   }
-   while (p > p_end);
-
-   return PNG_HANDLE_CHUNK_AS_DEFAULT;
-}
-
-int /* PRIVATE */
-png_chunk_unknown_handling(png_structp png_ptr, png_uint_32 chunk_name)
-{
-   png_byte chunk_string[5];
-
-   PNG_CSTRING_FROM_CHUNK(chunk_string, chunk_name);
-   return png_handle_as_unknown(png_ptr, chunk_string);
-}
-#endif
-
-#ifdef PNG_READ_SUPPORTED
-/* This function, added to libpng-1.0.6g, is untested. */
-int PNGAPI
-png_reset_zstream(png_structp png_ptr)
-{
-   if (png_ptr == NULL)
-      return Z_STREAM_ERROR;
-
-   return (inflateReset(&png_ptr->zstream));
-}
-#endif /* PNG_READ_SUPPORTED */
-
-/* This function was added to libpng-1.0.7 */
-png_uint_32 PNGAPI
-png_access_version_number(void)
-{
-   /* Version of *.c files used when building libpng */
-   return((png_uint_32)PNG_LIBPNG_VER);
-}
-
-
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-/* png_convert_size: a PNGAPI but no longer in png.h, so deleted
- * at libpng 1.5.5!
- */
-
-/* Added at libpng version 1.2.34 and 1.4.0 (moved from pngset.c) */
-#  ifdef PNG_CHECK_cHRM_SUPPORTED
-
-int /* PRIVATE */
-png_check_cHRM_fixed(png_structp png_ptr,
-   png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
-   png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
-   png_fixed_point blue_x, png_fixed_point blue_y)
-{
-   int ret = 1;
-   unsigned long xy_hi,xy_lo,yx_hi,yx_lo;
-
-   png_debug(1, "in function png_check_cHRM_fixed");
-
-   if (png_ptr == NULL)
-      return 0;
-
-   /* (x,y,z) values are first limited to 0..100000 (PNG_FP_1), the white
-    * y must also be greater than 0.  To test for the upper limit calculate
-    * (PNG_FP_1-y) - x must be <= to this for z to be >= 0 (and the expression
-    * cannot overflow.)  At this point we know x and y are >= 0 and (x+y) is
-    * <= PNG_FP_1.  The previous test on PNG_MAX_UINT_31 is removed because it
-    * pointless (and it produces compiler warnings!)
-    */
-   if (white_x < 0 || white_y <= 0 ||
-         red_x < 0 ||   red_y <  0 ||
-       green_x < 0 || green_y <  0 ||
-        blue_x < 0 ||  blue_y <  0)
-   {
-      png_warning(png_ptr,
-        "Ignoring attempt to set negative chromaticity value");
-      ret = 0;
-   }
-   /* And (x+y) must be <= PNG_FP_1 (so z is >= 0) */
-   if (white_x > PNG_FP_1 - white_y)
-   {
-      png_warning(png_ptr, "Invalid cHRM white point");
-      ret = 0;
-   }
-
-   if (red_x > PNG_FP_1 - red_y)
-   {
-      png_warning(png_ptr, "Invalid cHRM red point");
-      ret = 0;
-   }
-
-   if (green_x > PNG_FP_1 - green_y)
-   {
-      png_warning(png_ptr, "Invalid cHRM green point");
-      ret = 0;
-   }
-
-   if (blue_x > PNG_FP_1 - blue_y)
-   {
-      png_warning(png_ptr, "Invalid cHRM blue point");
-      ret = 0;
-   }
-
-   png_64bit_product(green_x - red_x, blue_y - red_y, &xy_hi, &xy_lo);
-   png_64bit_product(green_y - red_y, blue_x - red_x, &yx_hi, &yx_lo);
-
-   if (xy_hi == yx_hi && xy_lo == yx_lo)
-   {
-      png_warning(png_ptr,
-         "Ignoring attempt to set cHRM RGB triangle with zero area");
-      ret = 0;
-   }
-
-   return ret;
-}
-#  endif /* PNG_CHECK_cHRM_SUPPORTED */
-
-#ifdef PNG_cHRM_SUPPORTED
-/* Added at libpng-1.5.5 to support read and write of true CIEXYZ values for
- * cHRM, as opposed to using chromaticities.  These internal APIs return
- * non-zero on a parameter error.  The X, Y and Z values are required to be
- * positive and less than 1.0.
- */
-int png_xy_from_XYZ(png_xy *xy, png_XYZ XYZ)
-{
-   png_int_32 d, dwhite, whiteX, whiteY;
-
-   d = XYZ.redX + XYZ.redY + XYZ.redZ;
-   if (!png_muldiv(&xy->redx, XYZ.redX, PNG_FP_1, d)) return 1;
-   if (!png_muldiv(&xy->redy, XYZ.redY, PNG_FP_1, d)) return 1;
-   dwhite = d;
-   whiteX = XYZ.redX;
-   whiteY = XYZ.redY;
-
-   d = XYZ.greenX + XYZ.greenY + XYZ.greenZ;
-   if (!png_muldiv(&xy->greenx, XYZ.greenX, PNG_FP_1, d)) return 1;
-   if (!png_muldiv(&xy->greeny, XYZ.greenY, PNG_FP_1, d)) return 1;
-   dwhite += d;
-   whiteX += XYZ.greenX;
-   whiteY += XYZ.greenY;
-
-   d = XYZ.blueX + XYZ.blueY + XYZ.blueZ;
-   if (!png_muldiv(&xy->bluex, XYZ.blueX, PNG_FP_1, d)) return 1;
-   if (!png_muldiv(&xy->bluey, XYZ.blueY, PNG_FP_1, d)) return 1;
-   dwhite += d;
-   whiteX += XYZ.blueX;
-   whiteY += XYZ.blueY;
-
-   /* The reference white is simply the same of the end-point (X,Y,Z) vectors,
-    * thus:
-    */
-   if (!png_muldiv(&xy->whitex, whiteX, PNG_FP_1, dwhite)) return 1;
-   if (!png_muldiv(&xy->whitey, whiteY, PNG_FP_1, dwhite)) return 1;
-
-   return 0;
-}
-
-int png_XYZ_from_xy(png_XYZ *XYZ, png_xy xy)
-{
-   png_fixed_point red_inverse, green_inverse, blue_scale;
-   png_fixed_point left, right, denominator;
-
-   /* Check xy and, implicitly, z.  Note that wide gamut color spaces typically
-    * have end points with 0 tristimulus values (these are impossible end
-    * points, but they are used to cover the possible colors.)
-    */
-   if (xy.redx < 0 || xy.redx > PNG_FP_1) return 1;
-   if (xy.redy < 0 || xy.redy > PNG_FP_1-xy.redx) return 1;
-   if (xy.greenx < 0 || xy.greenx > PNG_FP_1) return 1;
-   if (xy.greeny < 0 || xy.greeny > PNG_FP_1-xy.greenx) return 1;
-   if (xy.bluex < 0 || xy.bluex > PNG_FP_1) return 1;
-   if (xy.bluey < 0 || xy.bluey > PNG_FP_1-xy.bluex) return 1;
-   if (xy.whitex < 0 || xy.whitex > PNG_FP_1) return 1;
-   if (xy.whitey < 0 || xy.whitey > PNG_FP_1-xy.whitex) return 1;
-
-   /* The reverse calculation is more difficult because the original tristimulus
-    * value had 9 independent values (red,green,blue)x(X,Y,Z) however only 8
-    * derived values were recorded in the cHRM chunk;
-    * (red,green,blue,white)x(x,y).  This loses one degree of freedom and
-    * therefore an arbitrary ninth value has to be introduced to undo the
-    * original transformations.
-    *
-    * Think of the original end-points as points in (X,Y,Z) space.  The
-    * chromaticity values (c) have the property:
-    *
-    *           C
-    *   c = ---------
-    *       X + Y + Z
-    *
-    * For each c (x,y,z) from the corresponding original C (X,Y,Z).  Thus the
-    * three chromaticity values (x,y,z) for each end-point obey the
-    * relationship:
-    *
-    *   x + y + z = 1
-    *
-    * This describes the plane in (X,Y,Z) space that intersects each axis at the
-    * value 1.0; call this the chromaticity plane.  Thus the chromaticity
-    * calculation has scaled each end-point so that it is on the x+y+z=1 plane
-    * and chromaticity is the intersection of the vector from the origin to the
-    * (X,Y,Z) value with the chromaticity plane.
-    *
-    * To fully invert the chromaticity calculation we would need the three
-    * end-point scale factors, (red-scale, green-scale, blue-scale), but these
-    * were not recorded.  Instead we calculated the reference white (X,Y,Z) and
-    * recorded the chromaticity of this.  The reference white (X,Y,Z) would have
-    * given all three of the scale factors since:
-    *
-    *    color-C = color-c * color-scale
-    *    white-C = red-C + green-C + blue-C
-    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
-    *
-    * But cHRM records only white-x and white-y, so we have lost the white scale
-    * factor:
-    *
-    *    white-C = white-c*white-scale
-    *
-    * To handle this the inverse transformation makes an arbitrary assumption
-    * about white-scale:
-    *
-    *    Assume: white-Y = 1.0
-    *    Hence:  white-scale = 1/white-y
-    *    Or:     red-Y + green-Y + blue-Y = 1.0
-    *
-    * Notice the last statement of the assumption gives an equation in three of
-    * the nine values we want to calculate.  8 more equations come from the
-    * above routine as summarised at the top above (the chromaticity
-    * calculation):
-    *
-    *    Given: color-x = color-X / (color-X + color-Y + color-Z)
-    *    Hence: (color-x - 1)*color-X + color.x*color-Y + color.x*color-Z = 0
-    *
-    * This is 9 simultaneous equations in the 9 variables "color-C" and can be
-    * solved by Cramer's rule.  Cramer's rule requires calculating 10 9x9 matrix
-    * determinants, however this is not as bad as it seems because only 28 of
-    * the total of 90 terms in the various matrices are non-zero.  Nevertheless
-    * Cramer's rule is notoriously numerically unstable because the determinant
-    * calculation involves the difference of large, but similar, numbers.  It is
-    * difficult to be sure that the calculation is stable for real world values
-    * and it is certain that it becomes unstable where the end points are close
-    * together.
-    *
-    * So this code uses the perhaps slighly less optimal but more understandable
-    * and totally obvious approach of calculating color-scale.
-    *
-    * This algorithm depends on the precision in white-scale and that is
-    * (1/white-y), so we can immediately see that as white-y approaches 0 the
-    * accuracy inherent in the cHRM chunk drops off substantially.
-    *
-    * libpng arithmetic: a simple invertion of the above equations
-    * ------------------------------------------------------------
-    *
-    *    white_scale = 1/white-y
-    *    white-X = white-x * white-scale
-    *    white-Y = 1.0
-    *    white-Z = (1 - white-x - white-y) * white_scale
-    *
-    *    white-C = red-C + green-C + blue-C
-    *            = red-c*red-scale + green-c*green-scale + blue-c*blue-scale
-    *
-    * This gives us three equations in (red-scale,green-scale,blue-scale) where
-    * all the coefficients are now known:
-    *
-    *    red-x*red-scale + green-x*green-scale + blue-x*blue-scale
-    *       = white-x/white-y
-    *    red-y*red-scale + green-y*green-scale + blue-y*blue-scale = 1
-    *    red-z*red-scale + green-z*green-scale + blue-z*blue-scale
-    *       = (1 - white-x - white-y)/white-y
-    *
-    * In the last equation color-z is (1 - color-x - color-y) so we can add all
-    * three equations together to get an alternative third:
-    *
-    *    red-scale + green-scale + blue-scale = 1/white-y = white-scale
-    *
-    * So now we have a Cramer's rule solution where the determinants are just
-    * 3x3 - far more tractible.  Unfortunately 3x3 determinants still involve
-    * multiplication of three coefficients so we can't guarantee to avoid
-    * overflow in the libpng fixed point representation.  Using Cramer's rule in
-    * floating point is probably a good choice here, but it's not an option for
-    * fixed point.  Instead proceed to simplify the first two equations by
-    * eliminating what is likely to be the largest value, blue-scale:
-    *
-    *    blue-scale = white-scale - red-scale - green-scale
-    *
-    * Hence:
-    *
-    *    (red-x - blue-x)*red-scale + (green-x - blue-x)*green-scale =
-    *                (white-x - blue-x)*white-scale
-    *
-    *    (red-y - blue-y)*red-scale + (green-y - blue-y)*green-scale =
-    *                1 - blue-y*white-scale
-    *
-    * And now we can trivially solve for (red-scale,green-scale):
-    *
-    *    green-scale =
-    *                (white-x - blue-x)*white-scale - (red-x - blue-x)*red-scale
-    *                -----------------------------------------------------------
-    *                                  green-x - blue-x
-    *
-    *    red-scale =
-    *                1 - blue-y*white-scale - (green-y - blue-y) * green-scale
-    *                ---------------------------------------------------------
-    *                                  red-y - blue-y
-    *
-    * Hence:
-    *
-    *    red-scale =
-    *          ( (green-x - blue-x) * (white-y - blue-y) -
-    *            (green-y - blue-y) * (white-x - blue-x) ) / white-y
-    * -------------------------------------------------------------------------
-    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
-    *
-    *    green-scale =
-    *          ( (red-y - blue-y) * (white-x - blue-x) -
-    *            (red-x - blue-x) * (white-y - blue-y) ) / white-y
-    * -------------------------------------------------------------------------
-    *  (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x)
-    *
-    * Accuracy:
-    * The input values have 5 decimal digits of accuracy.  The values are all in
-    * the range 0 < value < 1, so simple products are in the same range but may
-    * need up to 10 decimal digits to preserve the original precision and avoid
-    * underflow.  Because we are using a 32-bit signed representation we cannot
-    * match this; the best is a little over 9 decimal digits, less than 10.
-    *
-    * The approach used here is to preserve the maximum precision within the
-    * signed representation.  Because the red-scale calculation above uses the
-    * difference between two products of values that must be in the range -1..+1
-    * it is sufficient to divide the product by 7; ceil(100,000/32767*2).  The
-    * factor is irrelevant in the calculation because it is applied to both
-    * numerator and denominator.
-    *
-    * Note that the values of the differences of the products of the
-    * chromaticities in the above equations tend to be small, for example for
-    * the sRGB chromaticities they are:
-    *
-    * red numerator:    -0.04751
-    * green numerator:  -0.08788
-    * denominator:      -0.2241 (without white-y multiplication)
-    *
-    *  The resultant Y coefficients from the chromaticities of some widely used
-    *  color space definitions are (to 15 decimal places):
-    *
-    *  sRGB
-    *    0.212639005871510 0.715168678767756 0.072192315360734
-    *  Kodak ProPhoto
-    *    0.288071128229293 0.711843217810102 0.000085653960605
-    *  Adobe RGB
-    *    0.297344975250536 0.627363566255466 0.075291458493998
-    *  Adobe Wide Gamut RGB
-    *    0.258728243040113 0.724682314948566 0.016589442011321
-    */
-   /* By the argument, above overflow should be impossible here. The return
-    * value of 2 indicates an internal error to the caller.
-    */
-   if (!png_muldiv(&left, xy.greenx-xy.bluex, xy.redy - xy.bluey, 7)) return 2;
-   if (!png_muldiv(&right, xy.greeny-xy.bluey, xy.redx - xy.bluex, 7)) return 2;
-   denominator = left - right;
-
-   /* Now find the red numerator. */
-   if (!png_muldiv(&left, xy.greenx-xy.bluex, xy.whitey-xy.bluey, 7)) return 2;
-   if (!png_muldiv(&right, xy.greeny-xy.bluey, xy.whitex-xy.bluex, 7)) return 2;
-
-   /* Overflow is possible here and it indicates an extreme set of PNG cHRM
-    * chunk values.  This calculation actually returns the reciprocal of the
-    * scale value because this allows us to delay the multiplication of white-y
-    * into the denominator, which tends to produce a small number.
-    */
-   if (!png_muldiv(&red_inverse, xy.whitey, denominator, left-right) ||
-       red_inverse <= xy.whitey /* r+g+b scales = white scale */)
-      return 1;
-
-   /* Similarly for green_inverse: */
-   if (!png_muldiv(&left, xy.redy-xy.bluey, xy.whitex-xy.bluex, 7)) return 2;
-   if (!png_muldiv(&right, xy.redx-xy.bluex, xy.whitey-xy.bluey, 7)) return 2;
-   if (!png_muldiv(&green_inverse, xy.whitey, denominator, left-right) ||
-       green_inverse <= xy.whitey)
-      return 1;
-
-   /* And the blue scale, the checks above guarantee this can't overflow but it
-    * can still produce 0 for extreme cHRM values.
-    */
-   blue_scale = png_reciprocal(xy.whitey) - png_reciprocal(red_inverse) -
-      png_reciprocal(green_inverse);
-   if (blue_scale <= 0) return 1;
-
-
-   /* And fill in the png_XYZ: */
-   if (!png_muldiv(&XYZ->redX, xy.redx, PNG_FP_1, red_inverse)) return 1;
-   if (!png_muldiv(&XYZ->redY, xy.redy, PNG_FP_1, red_inverse)) return 1;
-   if (!png_muldiv(&XYZ->redZ, PNG_FP_1 - xy.redx - xy.redy, PNG_FP_1,
-      red_inverse))
-      return 1;
-
-   if (!png_muldiv(&XYZ->greenX, xy.greenx, PNG_FP_1, green_inverse)) return 1;
-   if (!png_muldiv(&XYZ->greenY, xy.greeny, PNG_FP_1, green_inverse)) return 1;
-   if (!png_muldiv(&XYZ->greenZ, PNG_FP_1 - xy.greenx - xy.greeny, PNG_FP_1,
-      green_inverse))
-      return 1;
-
-   if (!png_muldiv(&XYZ->blueX, xy.bluex, blue_scale, PNG_FP_1)) return 1;
-   if (!png_muldiv(&XYZ->blueY, xy.bluey, blue_scale, PNG_FP_1)) return 1;
-   if (!png_muldiv(&XYZ->blueZ, PNG_FP_1 - xy.bluex - xy.bluey, blue_scale,
-      PNG_FP_1))
-      return 1;
-
-   return 0; /*success*/
-}
-
-int png_XYZ_from_xy_checked(png_structp png_ptr, png_XYZ *XYZ, png_xy xy)
-{
-   switch (png_XYZ_from_xy(XYZ, xy))
-   {
-      case 0: /* success */
-         return 1;
-
-      case 1:
-         /* The chunk may be technically valid, but we got png_fixed_point
-          * overflow while trying to get XYZ values out of it.  This is
-          * entirely benign - the cHRM chunk is pretty extreme.
-          */
-         png_warning(png_ptr,
-            "extreme cHRM chunk cannot be converted to tristimulus values");
-         break;
-
-      default:
-         /* libpng is broken; this should be a warning but if it happens we
-          * want error reports so for the moment it is an error.
-          */
-         png_error(png_ptr, "internal error in png_XYZ_from_xy");
-         break;
-   }
-
-   /* ERROR RETURN */
-   return 0;
-}
-#endif
-
-void /* PRIVATE */
-png_check_IHDR(png_structp png_ptr,
-   png_uint_32 width, png_uint_32 height, int bit_depth,
-   int color_type, int interlace_type, int compression_type,
-   int filter_type)
-{
-   int error = 0;
-
-   /* Check for width and height valid values */
-   if (width == 0)
-   {
-      png_warning(png_ptr, "Image width is zero in IHDR");
-      error = 1;
-   }
-
-   if (height == 0)
-   {
-      png_warning(png_ptr, "Image height is zero in IHDR");
-      error = 1;
-   }
-
-#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (width > png_ptr->user_width_max)
-
-#  else
-   if (width > PNG_USER_WIDTH_MAX)
-#  endif
-   {
-      png_warning(png_ptr, "Image width exceeds user limit in IHDR");
-      error = 1;
-   }
-
-#  ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   if (height > png_ptr->user_height_max)
-#  else
-   if (height > PNG_USER_HEIGHT_MAX)
-#  endif
-   {
-      png_warning(png_ptr, "Image height exceeds user limit in IHDR");
-      error = 1;
-   }
-
-   if (width > PNG_UINT_31_MAX)
-   {
-      png_warning(png_ptr, "Invalid image width in IHDR");
-      error = 1;
-   }
-
-   if (height > PNG_UINT_31_MAX)
-   {
-      png_warning(png_ptr, "Invalid image height in IHDR");
-      error = 1;
-   }
-
-   if (width > (PNG_UINT_32_MAX
-                 >> 3)      /* 8-byte RGBA pixels */
-                 - 48       /* bigrowbuf hack */
-                 - 1        /* filter byte */
-                 - 7*8      /* rounding of width to multiple of 8 pixels */
-                 - 8)       /* extra max_pixel_depth pad */
-      png_warning(png_ptr, "Width is too large for libpng to process pixels");
-
-   /* Check other values */
-   if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 &&
-       bit_depth != 8 && bit_depth != 16)
-   {
-      png_warning(png_ptr, "Invalid bit depth in IHDR");
-      error = 1;
-   }
-
-   if (color_type < 0 || color_type == 1 ||
-       color_type == 5 || color_type > 6)
-   {
-      png_warning(png_ptr, "Invalid color type in IHDR");
-      error = 1;
-   }
-
-   if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) ||
-       ((color_type == PNG_COLOR_TYPE_RGB ||
-         color_type == PNG_COLOR_TYPE_GRAY_ALPHA ||
-         color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8))
-   {
-      png_warning(png_ptr, "Invalid color type/bit depth combination in IHDR");
-      error = 1;
-   }
-
-   if (interlace_type >= PNG_INTERLACE_LAST)
-   {
-      png_warning(png_ptr, "Unknown interlace method in IHDR");
-      error = 1;
-   }
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Unknown compression method in IHDR");
-      error = 1;
-   }
-
-#  ifdef PNG_MNG_FEATURES_SUPPORTED
-   /* Accept filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not read a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) &&
-       png_ptr->mng_features_permitted)
-      png_warning(png_ptr, "MNG features are not allowed in a PNG datastream");
-
-   if (filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      if (!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
-          (filter_type == PNG_INTRAPIXEL_DIFFERENCING) &&
-          ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) &&
-          (color_type == PNG_COLOR_TYPE_RGB ||
-          color_type == PNG_COLOR_TYPE_RGB_ALPHA)))
-      {
-         png_warning(png_ptr, "Unknown filter method in IHDR");
-         error = 1;
-      }
-
-      if (png_ptr->mode & PNG_HAVE_PNG_SIGNATURE)
-      {
-         png_warning(png_ptr, "Invalid filter method in IHDR");
-         error = 1;
-      }
-   }
-
-#  else
-   if (filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Unknown filter method in IHDR");
-      error = 1;
-   }
-#  endif
-
-   if (error == 1)
-      png_error(png_ptr, "Invalid IHDR data");
-}
-
-#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
-/* ASCII to fp functions */
-/* Check an ASCII formated floating point value, see the more detailed
- * comments in pngpriv.h
- */
-/* The following is used internally to preserve the sticky flags */
-#define png_fp_add(state, flags) ((state) |= (flags))
-#define png_fp_set(state, value) ((state) = (value) | ((state) & PNG_FP_STICKY))
-
-int /* PRIVATE */
-png_check_fp_number(png_const_charp string, png_size_t size, int *statep,
-   png_size_tp whereami)
-{
-   int state = *statep;
-   png_size_t i = *whereami;
-
-   while (i < size)
-   {
-      int type;
-      /* First find the type of the next character */
-      switch (string[i])
-      {
-      case 43:  type = PNG_FP_SAW_SIGN;                   break;
-      case 45:  type = PNG_FP_SAW_SIGN + PNG_FP_NEGATIVE; break;
-      case 46:  type = PNG_FP_SAW_DOT;                    break;
-      case 48:  type = PNG_FP_SAW_DIGIT;                  break;
-      case 49: case 50: case 51: case 52:
-      case 53: case 54: case 55: case 56:
-      case 57:  type = PNG_FP_SAW_DIGIT + PNG_FP_NONZERO; break;
-      case 69:
-      case 101: type = PNG_FP_SAW_E;                      break;
-      default:  goto PNG_FP_End;
-      }
-
-      /* Now deal with this type according to the current
-       * state, the type is arranged to not overlap the
-       * bits of the PNG_FP_STATE.
-       */
-      switch ((state & PNG_FP_STATE) + (type & PNG_FP_SAW_ANY))
-      {
-      case PNG_FP_INTEGER + PNG_FP_SAW_SIGN:
-         if (state & PNG_FP_SAW_ANY)
-            goto PNG_FP_End; /* not a part of the number */
-
-         png_fp_add(state, type);
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_DOT:
-         /* Ok as trailer, ok as lead of fraction. */
-         if (state & PNG_FP_SAW_DOT) /* two dots */
-            goto PNG_FP_End;
-
-         else if (state & PNG_FP_SAW_DIGIT) /* trailing dot? */
-            png_fp_add(state, type);
-
-         else
-            png_fp_set(state, PNG_FP_FRACTION | type);
-
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_DIGIT:
-         if (state & PNG_FP_SAW_DOT) /* delayed fraction */
-            png_fp_set(state, PNG_FP_FRACTION | PNG_FP_SAW_DOT);
-
-         png_fp_add(state, type | PNG_FP_WAS_VALID);
-
-         break;
-
-      case PNG_FP_INTEGER + PNG_FP_SAW_E:
-         if ((state & PNG_FP_SAW_DIGIT) == 0)
-            goto PNG_FP_End;
-
-         png_fp_set(state, PNG_FP_EXPONENT);
-
-         break;
-
-   /* case PNG_FP_FRACTION + PNG_FP_SAW_SIGN:
-         goto PNG_FP_End; ** no sign in fraction */
-
-   /* case PNG_FP_FRACTION + PNG_FP_SAW_DOT:
-         goto PNG_FP_End; ** Because SAW_DOT is always set */
-
-      case PNG_FP_FRACTION + PNG_FP_SAW_DIGIT:
-         png_fp_add(state, type | PNG_FP_WAS_VALID);
-         break;
-
-      case PNG_FP_FRACTION + PNG_FP_SAW_E:
-         /* This is correct because the trailing '.' on an
-          * integer is handled above - so we can only get here
-          * with the sequence ".E" (with no preceding digits).
-          */
-         if ((state & PNG_FP_SAW_DIGIT) == 0)
-            goto PNG_FP_End;
-
-         png_fp_set(state, PNG_FP_EXPONENT);
-
-         break;
-
-      case PNG_FP_EXPONENT + PNG_FP_SAW_SIGN:
-         if (state & PNG_FP_SAW_ANY)
-            goto PNG_FP_End; /* not a part of the number */
-
-         png_fp_add(state, PNG_FP_SAW_SIGN);
-
-         break;
-
-   /* case PNG_FP_EXPONENT + PNG_FP_SAW_DOT:
-         goto PNG_FP_End; */
-
-      case PNG_FP_EXPONENT + PNG_FP_SAW_DIGIT:
-         png_fp_add(state, PNG_FP_SAW_DIGIT | PNG_FP_WAS_VALID);
-
-         break;
-
-   /* case PNG_FP_EXPONEXT + PNG_FP_SAW_E:
-         goto PNG_FP_End; */
-
-      default: goto PNG_FP_End; /* I.e. break 2 */
-      }
-
-      /* The character seems ok, continue. */
-      ++i;
-   }
-
-PNG_FP_End:
-   /* Here at the end, update the state and return the correct
-    * return code.
-    */
-   *statep = state;
-   *whereami = i;
-
-   return (state & PNG_FP_SAW_DIGIT) != 0;
-}
-
-
-/* The same but for a complete string. */
-int
-png_check_fp_string(png_const_charp string, png_size_t size)
-{
-   int        state=0;
-   png_size_t char_index=0;
-
-   if (png_check_fp_number(string, size, &state, &char_index) &&
-      (char_index == size || string[char_index] == 0))
-      return state /* must be non-zero - see above */;
-
-   return 0; /* i.e. fail */
-}
-#endif /* pCAL or sCAL */
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-/* Utility used below - a simple accurate power of ten from an integral
- * exponent.
- */
-static double
-png_pow10(int power)
-{
-   int recip = 0;
-   double d = 1.0;
-
-   /* Handle negative exponent with a reciprocal at the end because
-    * 10 is exact whereas .1 is inexact in base 2
-    */
-   if (power < 0)
-   {
-      if (power < DBL_MIN_10_EXP) return 0;
-      recip = 1, power = -power;
-   }
-
-   if (power > 0)
-   {
-      /* Decompose power bitwise. */
-      double mult = 10.0;
-      do
-      {
-         if (power & 1) d *= mult;
-         mult *= mult;
-         power >>= 1;
-      }
-      while (power > 0);
-
-      if (recip) d = 1/d;
-   }
-   /* else power is 0 and d is 1 */
-
-   return d;
-}
-
-/* Function to format a floating point value in ASCII with a given
- * precision.
- */
-void /* PRIVATE */
-png_ascii_from_fp(png_structp png_ptr, png_charp ascii, png_size_t size,
-    double fp, unsigned int precision)
-{
-   /* We use standard functions from math.h, but not printf because
-    * that would require stdio.  The caller must supply a buffer of
-    * sufficient size or we will png_error.  The tests on size and
-    * the space in ascii[] consumed are indicated below.
-    */
-   if (precision < 1)
-      precision = DBL_DIG;
-
-   /* Enforce the limit of the implementation precision too. */
-   if (precision > DBL_DIG+1)
-      precision = DBL_DIG+1;
-
-   /* Basic sanity checks */
-   if (size >= precision+5) /* See the requirements below. */
-   {
-      if (fp < 0)
-      {
-         fp = -fp;
-         *ascii++ = 45; /* '-'  PLUS 1 TOTAL 1 */
-         --size;
-      }
-
-      if (fp >= DBL_MIN && fp <= DBL_MAX)
-      {
-         int exp_b10;       /* A base 10 exponent */
-         double base;   /* 10^exp_b10 */
-
-         /* First extract a base 10 exponent of the number,
-          * the calculation below rounds down when converting
-          * from base 2 to base 10 (multiply by log10(2) -
-          * 0.3010, but 77/256 is 0.3008, so exp_b10 needs to
-          * be increased.  Note that the arithmetic shift
-          * performs a floor() unlike C arithmetic - using a
-          * C multiply would break the following for negative
-          * exponents.
-          */
-         (void)frexp(fp, &exp_b10); /* exponent to base 2 */
-
-         exp_b10 = (exp_b10 * 77) >> 8; /* <= exponent to base 10 */
-
-         /* Avoid underflow here. */
-         base = png_pow10(exp_b10); /* May underflow */
-
-         while (base < DBL_MIN || base < fp)
-         {
-            /* And this may overflow. */
-            double test = png_pow10(exp_b10+1);
-
-            if (test <= DBL_MAX)
-               ++exp_b10, base = test;
-
-            else
-               break;
-         }
-
-         /* Normalize fp and correct exp_b10, after this fp is in the
-          * range [.1,1) and exp_b10 is both the exponent and the digit
-          * *before* which the decimal point should be inserted
-          * (starting with 0 for the first digit).  Note that this
-          * works even if 10^exp_b10 is out of range because of the
-          * test on DBL_MAX above.
-          */
-         fp /= base;
-         while (fp >= 1) fp /= 10, ++exp_b10;
-
-         /* Because of the code above fp may, at this point, be
-          * less than .1, this is ok because the code below can
-          * handle the leading zeros this generates, so no attempt
-          * is made to correct that here.
-          */
-
-         {
-            int czero, clead, cdigits;
-            char exponent[10];
-
-            /* Allow up to two leading zeros - this will not lengthen
-             * the number compared to using E-n.
-             */
-            if (exp_b10 < 0 && exp_b10 > -3) /* PLUS 3 TOTAL 4 */
-            {
-               czero = -exp_b10; /* PLUS 2 digits: TOTAL 3 */
-               exp_b10 = 0;      /* Dot added below before first output. */
-            }
-            else
-               czero = 0;    /* No zeros to add */
-
-            /* Generate the digit list, stripping trailing zeros and
-             * inserting a '.' before a digit if the exponent is 0.
-             */
-            clead = czero; /* Count of leading zeros */
-            cdigits = 0;   /* Count of digits in list. */
-
-            do
-            {
-               double d;
-
-               fp *= 10.0;
-
-               /* Use modf here, not floor and subtract, so that
-                * the separation is done in one step.  At the end
-                * of the loop don't break the number into parts so
-                * that the final digit is rounded.
-                */
-               if (cdigits+czero-clead+1 < (int)precision)
-                  fp = modf(fp, &d);
-
-               else
-               {
-                  d = floor(fp + .5);
-
-                  if (d > 9.0)
-                  {
-                     /* Rounding up to 10, handle that here. */
-                     if (czero > 0)
-                     {
-                        --czero, d = 1;
-                        if (cdigits == 0) --clead;
-                     }
-
-                     else
-                     {
-                        while (cdigits > 0 && d > 9.0)
-                        {
-                           int ch = *--ascii;
-
-                           if (exp_b10 != (-1))
-                              ++exp_b10;
-
-                           else if (ch == 46)
-                           {
-                              ch = *--ascii, ++size;
-                              /* Advance exp_b10 to '1', so that the
-                               * decimal point happens after the
-                               * previous digit.
-                               */
-                              exp_b10 = 1;
-                           }
-
-                           --cdigits;
-                           d = ch - 47;  /* I.e. 1+(ch-48) */
-                        }
-
-                        /* Did we reach the beginning? If so adjust the
-                         * exponent but take into account the leading
-                         * decimal point.
-                         */
-                        if (d > 9.0)  /* cdigits == 0 */
-                        {
-                           if (exp_b10 == (-1))
-                           {
-                              /* Leading decimal point (plus zeros?), if
-                               * we lose the decimal point here it must
-                               * be reentered below.
-                               */
-                              int ch = *--ascii;
-
-                              if (ch == 46)
-                                 ++size, exp_b10 = 1;
-
-                              /* Else lost a leading zero, so 'exp_b10' is
-                               * still ok at (-1)
-                               */
-                           }
-                           else
-                              ++exp_b10;
-
-                           /* In all cases we output a '1' */
-                           d = 1.0;
-                        }
-                     }
-                  }
-                  fp = 0; /* Guarantees termination below. */
-               }
-
-               if (d == 0.0)
-               {
-                  ++czero;
-                  if (cdigits == 0) ++clead;
-               }
-
-               else
-               {
-                  /* Included embedded zeros in the digit count. */
-                  cdigits += czero - clead;
-                  clead = 0;
-
-                  while (czero > 0)
-                  {
-                     /* exp_b10 == (-1) means we just output the decimal
-                      * place - after the DP don't adjust 'exp_b10' any
-                      * more!
-                      */
-                     if (exp_b10 != (-1))
-                     {
-                        if (exp_b10 == 0) *ascii++ = 46, --size;
-                        /* PLUS 1: TOTAL 4 */
-                        --exp_b10;
-                     }
-                     *ascii++ = 48, --czero;
-                  }
-
-                  if (exp_b10 != (-1))
-                  {
-                     if (exp_b10 == 0) *ascii++ = 46, --size; /* counted
-                                                                 above */
-                     --exp_b10;
-                  }
-
-                  *ascii++ = (char)(48 + (int)d), ++cdigits;
-               }
-            }
-            while (cdigits+czero-clead < (int)precision && fp > DBL_MIN);
-
-            /* The total output count (max) is now 4+precision */
-
-            /* Check for an exponent, if we don't need one we are
-             * done and just need to terminate the string.  At
-             * this point exp_b10==(-1) is effectively if flag - it got
-             * to '-1' because of the decrement after outputing
-             * the decimal point above (the exponent required is
-             * *not* -1!)
-             */
-            if (exp_b10 >= (-1) && exp_b10 <= 2)
-            {
-               /* The following only happens if we didn't output the
-                * leading zeros above for negative exponent, so this
-                * doest add to the digit requirement.  Note that the
-                * two zeros here can only be output if the two leading
-                * zeros were *not* output, so this doesn't increase
-                * the output count.
-                */
-               while (--exp_b10 >= 0) *ascii++ = 48;
-
-               *ascii = 0;
-
-               /* Total buffer requirement (including the '\0') is
-                * 5+precision - see check at the start.
-                */
-               return;
-            }
-
-            /* Here if an exponent is required, adjust size for
-             * the digits we output but did not count.  The total
-             * digit output here so far is at most 1+precision - no
-             * decimal point and no leading or trailing zeros have
-             * been output.
-             */
-            size -= cdigits;
-
-            *ascii++ = 69, --size;    /* 'E': PLUS 1 TOTAL 2+precision */
-
-            /* The following use of an unsigned temporary avoids ambiguities in
-             * the signed arithmetic on exp_b10 and permits GCC at least to do
-             * better optimization.
-             */
-            {
-               unsigned int uexp_b10;
-
-               if (exp_b10 < 0)
-               {
-                  *ascii++ = 45, --size; /* '-': PLUS 1 TOTAL 3+precision */
-                  uexp_b10 = -exp_b10;
-               }
-
-               else
-                  uexp_b10 = exp_b10;
-
-               cdigits = 0;
-
-               while (uexp_b10 > 0)
-               {
-                  exponent[cdigits++] = (char)(48 + uexp_b10 % 10);
-                  uexp_b10 /= 10;
-               }
-            }
-
-            /* Need another size check here for the exponent digits, so
-             * this need not be considered above.
-             */
-            if ((int)size > cdigits)
-            {
-               while (cdigits > 0) *ascii++ = exponent[--cdigits];
-
-               *ascii = 0;
-
-               return;
-            }
-         }
-      }
-      else if (!(fp >= DBL_MIN))
-      {
-         *ascii++ = 48; /* '0' */
-         *ascii = 0;
-         return;
-      }
-      else
-      {
-         *ascii++ = 105; /* 'i' */
-         *ascii++ = 110; /* 'n' */
-         *ascii++ = 102; /* 'f' */
-         *ascii = 0;
-         return;
-      }
-   }
-
-   /* Here on buffer too small. */
-   png_error(png_ptr, "ASCII conversion buffer too small");
-}
-
-#  endif /* FLOATING_POINT */
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-/* Function to format a fixed point value in ASCII.
- */
-void /* PRIVATE */
-png_ascii_from_fixed(png_structp png_ptr, png_charp ascii, png_size_t size,
-    png_fixed_point fp)
-{
-   /* Require space for 10 decimal digits, a decimal point, a minus sign and a
-    * trailing \0, 13 characters:
-    */
-   if (size > 12)
-   {
-      png_uint_32 num;
-
-      /* Avoid overflow here on the minimum integer. */
-      if (fp < 0)
-         *ascii++ = 45, --size, num = -fp;
-      else
-         num = fp;
-
-      if (num <= 0x80000000) /* else overflowed */
-      {
-         unsigned int ndigits = 0, first = 16 /* flag value */;
-         char digits[10];
-
-         while (num)
-         {
-            /* Split the low digit off num: */
-            unsigned int tmp = num/10;
-            num -= tmp*10;
-            digits[ndigits++] = (char)(48 + num);
-            /* Record the first non-zero digit, note that this is a number
-             * starting at 1, it's not actually the array index.
-             */
-            if (first == 16 && num > 0)
-               first = ndigits;
-            num = tmp;
-         }
-
-         if (ndigits > 0)
-         {
-            while (ndigits > 5) *ascii++ = digits[--ndigits];
-            /* The remaining digits are fractional digits, ndigits is '5' or
-             * smaller at this point.  It is certainly not zero.  Check for a
-             * non-zero fractional digit:
-             */
-            if (first <= 5)
-            {
-               unsigned int i;
-               *ascii++ = 46; /* decimal point */
-               /* ndigits may be <5 for small numbers, output leading zeros
-                * then ndigits digits to first:
-                */
-               i = 5;
-               while (ndigits < i) *ascii++ = 48, --i;
-               while (ndigits >= first) *ascii++ = digits[--ndigits];
-               /* Don't output the trailing zeros! */
-            }
-         }
-         else
-            *ascii++ = 48;
-
-         /* And null terminate the string: */
-         *ascii = 0;
-         return;
-      }
-   }
-
-   /* Here on buffer too small. */
-   png_error(png_ptr, "ASCII conversion buffer too small");
-}
-#   endif /* FIXED_POINT */
-#endif /* READ_SCAL */
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) && \
-   !defined(PNG_FIXED_POINT_MACRO_SUPPORTED)
-png_fixed_point
-png_fixed(png_structp png_ptr, double fp, png_const_charp text)
-{
-   double r = floor(100000 * fp + .5);
-
-   if (r > 2147483647. || r < -2147483648.)
-      png_fixed_error(png_ptr, text);
-
-   return (png_fixed_point)r;
-}
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || \
-    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG__READ_pHYs_SUPPORTED)
-/* muldiv functions */
-/* This API takes signed arguments and rounds the result to the nearest
- * integer (or, for a fixed point number - the standard argument - to
- * the nearest .00001).  Overflow and divide by zero are signalled in
- * the result, a boolean - true on success, false on overflow.
- */
-int
-png_muldiv(png_fixed_point_p res, png_fixed_point a, png_int_32 times,
-    png_int_32 divisor)
-{
-   /* Return a * times / divisor, rounded. */
-   if (divisor != 0)
-   {
-      if (a == 0 || times == 0)
-      {
-         *res = 0;
-         return 1;
-      }
-      else
-      {
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-         double r = a;
-         r *= times;
-         r /= divisor;
-         r = floor(r+.5);
-
-         /* A png_fixed_point is a 32-bit integer. */
-         if (r <= 2147483647. && r >= -2147483648.)
-         {
-            *res = (png_fixed_point)r;
-            return 1;
-         }
-#else
-         int negative = 0;
-         png_uint_32 A, T, D;
-         png_uint_32 s16, s32, s00;
-
-         if (a < 0)
-            negative = 1, A = -a;
-         else
-            A = a;
-
-         if (times < 0)
-            negative = !negative, T = -times;
-         else
-            T = times;
-
-         if (divisor < 0)
-            negative = !negative, D = -divisor;
-         else
-            D = divisor;
-
-         /* Following can't overflow because the arguments only
-          * have 31 bits each, however the result may be 32 bits.
-          */
-         s16 = (A >> 16) * (T & 0xffff) +
-                           (A & 0xffff) * (T >> 16);
-         /* Can't overflow because the a*times bit is only 30
-          * bits at most.
-          */
-         s32 = (A >> 16) * (T >> 16) + (s16 >> 16);
-         s00 = (A & 0xffff) * (T & 0xffff);
-
-         s16 = (s16 & 0xffff) << 16;
-         s00 += s16;
-
-         if (s00 < s16)
-            ++s32; /* carry */
-
-         if (s32 < D) /* else overflow */
-         {
-            /* s32.s00 is now the 64-bit product, do a standard
-             * division, we know that s32 < D, so the maximum
-             * required shift is 31.
-             */
-            int bitshift = 32;
-            png_fixed_point result = 0; /* NOTE: signed */
-
-            while (--bitshift >= 0)
-            {
-               png_uint_32 d32, d00;
-
-               if (bitshift > 0)
-                  d32 = D >> (32-bitshift), d00 = D << bitshift;
-
-               else
-                  d32 = 0, d00 = D;
-
-               if (s32 > d32)
-               {
-                  if (s00 < d00) --s32; /* carry */
-                  s32 -= d32, s00 -= d00, result += 1<<bitshift;
-               }
-
-               else
-                  if (s32 == d32 && s00 >= d00)
-                     s32 = 0, s00 -= d00, result += 1<<bitshift;
-            }
-
-            /* Handle the rounding. */
-            if (s00 >= (D >> 1))
-               ++result;
-
-            if (negative)
-               result = -result;
-
-            /* Check for overflow. */
-            if ((negative && result <= 0) || (!negative && result >= 0))
-            {
-               *res = result;
-               return 1;
-            }
-         }
-#endif
-      }
-   }
-
-   return 0;
-}
-#endif /* READ_GAMMA || INCH_CONVERSIONS */
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-/* The following is for when the caller doesn't much care about the
- * result.
- */
-png_fixed_point
-png_muldiv_warn(png_structp png_ptr, png_fixed_point a, png_int_32 times,
-    png_int_32 divisor)
-{
-   png_fixed_point result;
-
-   if (png_muldiv(&result, a, times, divisor))
-      return result;
-
-   png_warning(png_ptr, "fixed point overflow ignored");
-   return 0;
-}
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED /* more fixed point functions for gammma */
-/* Calculate a reciprocal, return 0 on div-by-zero or overflow. */
-png_fixed_point
-png_reciprocal(png_fixed_point a)
-{
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   double r = floor(1E10/a+.5);
-
-   if (r <= 2147483647. && r >= -2147483648.)
-      return (png_fixed_point)r;
-#else
-   png_fixed_point res;
-
-   if (png_muldiv(&res, 100000, 100000, a))
-      return res;
-#endif
-
-   return 0; /* error/overflow */
-}
-
-/* A local convenience routine. */
-static png_fixed_point
-png_product2(png_fixed_point a, png_fixed_point b)
-{
-   /* The required result is 1/a * 1/b; the following preserves accuracy. */
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   double r = a * 1E-5;
-   r *= b;
-   r = floor(r+.5);
-
-   if (r <= 2147483647. && r >= -2147483648.)
-      return (png_fixed_point)r;
-#else
-   png_fixed_point res;
-
-   if (png_muldiv(&res, a, b, 100000))
-      return res;
-#endif
-
-   return 0; /* overflow */
-}
-
-/* The inverse of the above. */
-png_fixed_point
-png_reciprocal2(png_fixed_point a, png_fixed_point b)
-{
-   /* The required result is 1/a * 1/b; the following preserves accuracy. */
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   double r = 1E15/a;
-   r /= b;
-   r = floor(r+.5);
-
-   if (r <= 2147483647. && r >= -2147483648.)
-      return (png_fixed_point)r;
-#else
-   /* This may overflow because the range of png_fixed_point isn't symmetric,
-    * but this API is only used for the product of file and screen gamma so it
-    * doesn't matter that the smallest number it can produce is 1/21474, not
-    * 1/100000
-    */
-   png_fixed_point res = png_product2(a, b);
-
-   if (res != 0)
-      return png_reciprocal(res);
-#endif
-
-   return 0; /* overflow */
-}
-#endif /* READ_GAMMA */
-
-#ifdef PNG_CHECK_cHRM_SUPPORTED
-/* Added at libpng version 1.2.34 (Dec 8, 2008) and 1.4.0 (Jan 2,
- * 2010: moved from pngset.c) */
-/*
- *    Multiply two 32-bit numbers, V1 and V2, using 32-bit
- *    arithmetic, to produce a 64-bit result in the HI/LO words.
- *
- *                  A B
- *                x C D
- *               ------
- *              AD || BD
- *        AC || CB || 0
- *
- *    where A and B are the high and low 16-bit words of V1,
- *    C and D are the 16-bit words of V2, AD is the product of
- *    A and D, and X || Y is (X << 16) + Y.
-*/
-
-void /* PRIVATE */
-png_64bit_product (long v1, long v2, unsigned long *hi_product,
-    unsigned long *lo_product)
-{
-   int a, b, c, d;
-   long lo, hi, x, y;
-
-   a = (v1 >> 16) & 0xffff;
-   b = v1 & 0xffff;
-   c = (v2 >> 16) & 0xffff;
-   d = v2 & 0xffff;
-
-   lo = b * d;                   /* BD */
-   x = a * d + c * b;            /* AD + CB */
-   y = ((lo >> 16) & 0xffff) + x;
-
-   lo = (lo & 0xffff) | ((y & 0xffff) << 16);
-   hi = (y >> 16) & 0xffff;
-
-   hi += a * c;                  /* AC */
-
-   *hi_product = (unsigned long)hi;
-   *lo_product = (unsigned long)lo;
-}
-#endif /* CHECK_cHRM */
-
-#ifdef PNG_READ_GAMMA_SUPPORTED /* gamma table code */
-#ifndef PNG_FLOATING_ARITHMETIC_SUPPORTED
-/* Fixed point gamma.
- *
- * To calculate gamma this code implements fast log() and exp() calls using only
- * fixed point arithmetic.  This code has sufficient precision for either 8-bit
- * or 16-bit sample values.
- *
- * The tables used here were calculated using simple 'bc' programs, but C double
- * precision floating point arithmetic would work fine.  The programs are given
- * at the head of each table.
- *
- * 8-bit log table
- *   This is a table of -log(value/255)/log(2) for 'value' in the range 128 to
- *   255, so it's the base 2 logarithm of a normalized 8-bit floating point
- *   mantissa.  The numbers are 32-bit fractions.
- */
-static png_uint_32
-png_8bit_l2[128] =
-{
-#  ifdef PNG_DO_BC
-      for (i=128;i<256;++i) { .5 - l(i/255)/l(2)*65536*65536; }
-#  else
-   4270715492U, 4222494797U, 4174646467U, 4127164793U, 4080044201U, 4033279239U,
-   3986864580U, 3940795015U, 3895065449U, 3849670902U, 3804606499U, 3759867474U,
-   3715449162U, 3671346997U, 3627556511U, 3584073329U, 3540893168U, 3498011834U,
-   3455425220U, 3413129301U, 3371120137U, 3329393864U, 3287946700U, 3246774933U,
-   3205874930U, 3165243125U, 3124876025U, 3084770202U, 3044922296U, 3005329011U,
-   2965987113U, 2926893432U, 2888044853U, 2849438323U, 2811070844U, 2772939474U,
-   2735041326U, 2697373562U, 2659933400U, 2622718104U, 2585724991U, 2548951424U,
-   2512394810U, 2476052606U, 2439922311U, 2404001468U, 2368287663U, 2332778523U,
-   2297471715U, 2262364947U, 2227455964U, 2192742551U, 2158222529U, 2123893754U,
-   2089754119U, 2055801552U, 2022034013U, 1988449497U, 1955046031U, 1921821672U,
-   1888774511U, 1855902668U, 1823204291U, 1790677560U, 1758320682U, 1726131893U,
-   1694109454U, 1662251657U, 1630556815U, 1599023271U, 1567649391U, 1536433567U,
-   1505374214U, 1474469770U, 1443718700U, 1413119487U, 1382670639U, 1352370686U,
-   1322218179U, 1292211689U, 1262349810U, 1232631153U, 1203054352U, 1173618059U,
-   1144320946U, 1115161701U, 1086139034U, 1057251672U, 1028498358U, 999877854U,
-   971388940U, 943030410U, 914801076U, 886699767U, 858725327U, 830876614U,
-   803152505U, 775551890U, 748073672U, 720716771U, 693480120U, 666362667U,
-   639363374U, 612481215U, 585715177U, 559064263U, 532527486U, 506103872U,
-   479792461U, 453592303U, 427502463U, 401522014U, 375650043U, 349885648U,
-   324227938U, 298676034U, 273229066U, 247886176U, 222646516U, 197509248U,
-   172473545U, 147538590U, 122703574U, 97967701U, 73330182U, 48790236U,
-   24347096U, 0U
-#  endif
-
-#if 0
-   /* The following are the values for 16-bit tables - these work fine for the
-    * 8-bit conversions but produce very slightly larger errors in the 16-bit
-    * log (about 1.2 as opposed to 0.7 absolute error in the final value).  To
-    * use these all the shifts below must be adjusted appropriately.
-    */
-   65166, 64430, 63700, 62976, 62257, 61543, 60835, 60132, 59434, 58741, 58054,
-   57371, 56693, 56020, 55352, 54689, 54030, 53375, 52726, 52080, 51439, 50803,
-   50170, 49542, 48918, 48298, 47682, 47070, 46462, 45858, 45257, 44661, 44068,
-   43479, 42894, 42312, 41733, 41159, 40587, 40020, 39455, 38894, 38336, 37782,
-   37230, 36682, 36137, 35595, 35057, 34521, 33988, 33459, 32932, 32408, 31887,
-   31369, 30854, 30341, 29832, 29325, 28820, 28319, 27820, 27324, 26830, 26339,
-   25850, 25364, 24880, 24399, 23920, 23444, 22970, 22499, 22029, 21562, 21098,
-   20636, 20175, 19718, 19262, 18808, 18357, 17908, 17461, 17016, 16573, 16132,
-   15694, 15257, 14822, 14390, 13959, 13530, 13103, 12678, 12255, 11834, 11415,
-   10997, 10582, 10168, 9756, 9346, 8937, 8531, 8126, 7723, 7321, 6921, 6523,
-   6127, 5732, 5339, 4947, 4557, 4169, 3782, 3397, 3014, 2632, 2251, 1872, 1495,
-   1119, 744, 372
-#endif
-};
-
-PNG_STATIC png_int_32
-png_log8bit(unsigned int x)
-{
-   unsigned int lg2 = 0;
-   /* Each time 'x' is multiplied by 2, 1 must be subtracted off the final log,
-    * because the log is actually negate that means adding 1.  The final
-    * returned value thus has the range 0 (for 255 input) to 7.994 (for 1
-    * input), return 7.99998 for the overflow (log 0) case - so the result is
-    * always at most 19 bits.
-    */
-   if ((x &= 0xff) == 0)
-      return 0xffffffff;
-
-   if ((x & 0xf0) == 0)
-      lg2  = 4, x <<= 4;
-
-   if ((x & 0xc0) == 0)
-      lg2 += 2, x <<= 2;
-
-   if ((x & 0x80) == 0)
-      lg2 += 1, x <<= 1;
-
-   /* result is at most 19 bits, so this cast is safe: */
-   return (png_int_32)((lg2 << 16) + ((png_8bit_l2[x-128]+32768)>>16));
-}
-
-/* The above gives exact (to 16 binary places) log2 values for 8-bit images,
- * for 16-bit images we use the most significant 8 bits of the 16-bit value to
- * get an approximation then multiply the approximation by a correction factor
- * determined by the remaining up to 8 bits.  This requires an additional step
- * in the 16-bit case.
- *
- * We want log2(value/65535), we have log2(v'/255), where:
- *
- *    value = v' * 256 + v''
- *          = v' * f
- *
- * So f is value/v', which is equal to (256+v''/v') since v' is in the range 128
- * to 255 and v'' is in the range 0 to 255 f will be in the range 256 to less
- * than 258.  The final factor also needs to correct for the fact that our 8-bit
- * value is scaled by 255, whereas the 16-bit values must be scaled by 65535.
- *
- * This gives a final formula using a calculated value 'x' which is value/v' and
- * scaling by 65536 to match the above table:
- *
- *   log2(x/257) * 65536
- *
- * Since these numbers are so close to '1' we can use simple linear
- * interpolation between the two end values 256/257 (result -368.61) and 258/257
- * (result 367.179).  The values used below are scaled by a further 64 to give
- * 16-bit precision in the interpolation:
- *
- * Start (256): -23591
- * Zero  (257):      0
- * End   (258):  23499
- */
-PNG_STATIC png_int_32
-png_log16bit(png_uint_32 x)
-{
-   unsigned int lg2 = 0;
-
-   /* As above, but now the input has 16 bits. */
-   if ((x &= 0xffff) == 0)
-      return 0xffffffff;
-
-   if ((x & 0xff00) == 0)
-      lg2  = 8, x <<= 8;
-
-   if ((x & 0xf000) == 0)
-      lg2 += 4, x <<= 4;
-
-   if ((x & 0xc000) == 0)
-      lg2 += 2, x <<= 2;
-
-   if ((x & 0x8000) == 0)
-      lg2 += 1, x <<= 1;
-
-   /* Calculate the base logarithm from the top 8 bits as a 28-bit fractional
-    * value.
-    */
-   lg2 <<= 28;
-   lg2 += (png_8bit_l2[(x>>8)-128]+8) >> 4;
-
-   /* Now we need to interpolate the factor, this requires a division by the top
-    * 8 bits.  Do this with maximum precision.
-    */
-   x = ((x << 16) + (x >> 9)) / (x >> 8);
-
-   /* Since we divided by the top 8 bits of 'x' there will be a '1' at 1<<24,
-    * the value at 1<<16 (ignoring this) will be 0 or 1; this gives us exactly
-    * 16 bits to interpolate to get the low bits of the result.  Round the
-    * answer.  Note that the end point values are scaled by 64 to retain overall
-    * precision and that 'lg2' is current scaled by an extra 12 bits, so adjust
-    * the overall scaling by 6-12.  Round at every step.
-    */
-   x -= 1U << 24;
-
-   if (x <= 65536U) /* <= '257' */
-      lg2 += ((23591U * (65536U-x)) + (1U << (16+6-12-1))) >> (16+6-12);
-
-   else
-      lg2 -= ((23499U * (x-65536U)) + (1U << (16+6-12-1))) >> (16+6-12);
-
-   /* Safe, because the result can't have more than 20 bits: */
-   return (png_int_32)((lg2 + 2048) >> 12);
-}
-
-/* The 'exp()' case must invert the above, taking a 20-bit fixed point
- * logarithmic value and returning a 16 or 8-bit number as appropriate.  In
- * each case only the low 16 bits are relevant - the fraction - since the
- * integer bits (the top 4) simply determine a shift.
- *
- * The worst case is the 16-bit distinction between 65535 and 65534, this
- * requires perhaps spurious accuracy in the decoding of the logarithm to
- * distinguish log2(65535/65534.5) - 10^-5 or 17 bits.  There is little chance
- * of getting this accuracy in practice.
- *
- * To deal with this the following exp() function works out the exponent of the
- * frational part of the logarithm by using an accurate 32-bit value from the
- * top four fractional bits then multiplying in the remaining bits.
- */
-static png_uint_32
-png_32bit_exp[16] =
-{
-#  ifdef PNG_DO_BC
-      for (i=0;i<16;++i) { .5 + e(-i/16*l(2))*2^32; }
-#  else
-   /* NOTE: the first entry is deliberately set to the maximum 32-bit value. */
-   4294967295U, 4112874773U, 3938502376U, 3771522796U, 3611622603U, 3458501653U,
-   3311872529U, 3171459999U, 3037000500U, 2908241642U, 2784941738U, 2666869345U,
-   2553802834U, 2445529972U, 2341847524U, 2242560872U
-#  endif
-};
-
-/* Adjustment table; provided to explain the numbers in the code below. */
-#ifdef PNG_DO_BC
-for (i=11;i>=0;--i){ print i, " ", (1 - e(-(2^i)/65536*l(2))) * 2^(32-i), "\n"}
-   11 44937.64284865548751208448
-   10 45180.98734845585101160448
-    9 45303.31936980687359311872
-    8 45364.65110595323018870784
-    7 45395.35850361789624614912
-    6 45410.72259715102037508096
-    5 45418.40724413220722311168
-    4 45422.25021786898173001728
-    3 45424.17186732298419044352
-    2 45425.13273269940811464704
-    1 45425.61317555035558641664
-    0 45425.85339951654943850496
-#endif
-
-PNG_STATIC png_uint_32
-png_exp(png_fixed_point x)
-{
-   if (x > 0 && x <= 0xfffff) /* Else overflow or zero (underflow) */
-   {
-      /* Obtain a 4-bit approximation */
-      png_uint_32 e = png_32bit_exp[(x >> 12) & 0xf];
-
-      /* Incorporate the low 12 bits - these decrease the returned value by
-       * multiplying by a number less than 1 if the bit is set.  The multiplier
-       * is determined by the above table and the shift. Notice that the values
-       * converge on 45426 and this is used to allow linear interpolation of the
-       * low bits.
-       */
-      if (x & 0x800)
-         e -= (((e >> 16) * 44938U) +  16U) >> 5;
-
-      if (x & 0x400)
-         e -= (((e >> 16) * 45181U) +  32U) >> 6;
-
-      if (x & 0x200)
-         e -= (((e >> 16) * 45303U) +  64U) >> 7;
-
-      if (x & 0x100)
-         e -= (((e >> 16) * 45365U) + 128U) >> 8;
-
-      if (x & 0x080)
-         e -= (((e >> 16) * 45395U) + 256U) >> 9;
-
-      if (x & 0x040)
-         e -= (((e >> 16) * 45410U) + 512U) >> 10;
-
-      /* And handle the low 6 bits in a single block. */
-      e -= (((e >> 16) * 355U * (x & 0x3fU)) + 256U) >> 9;
-
-      /* Handle the upper bits of x. */
-      e >>= x >> 16;
-      return e;
-   }
-
-   /* Check for overflow */
-   if (x <= 0)
-      return png_32bit_exp[0];
-
-   /* Else underflow */
-   return 0;
-}
-
-PNG_STATIC png_byte
-png_exp8bit(png_fixed_point lg2)
-{
-   /* Get a 32-bit value: */
-   png_uint_32 x = png_exp(lg2);
-
-   /* Convert the 32-bit value to 0..255 by multiplying by 256-1, note that the
-    * second, rounding, step can't overflow because of the first, subtraction,
-    * step.
-    */
-   x -= x >> 8;
-   return (png_byte)((x + 0x7fffffU) >> 24);
-}
-
-PNG_STATIC png_uint_16
-png_exp16bit(png_fixed_point lg2)
-{
-   /* Get a 32-bit value: */
-   png_uint_32 x = png_exp(lg2);
-
-   /* Convert the 32-bit value to 0..65535 by multiplying by 65536-1: */
-   x -= x >> 16;
-   return (png_uint_16)((x + 32767U) >> 16);
-}
-#endif /* FLOATING_ARITHMETIC */
-
-png_byte
-png_gamma_8bit_correct(unsigned int value, png_fixed_point gamma_val)
-{
-   if (value > 0 && value < 255)
-   {
-#     ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-         double r = floor(255*pow(value/255.,gamma_val*.00001)+.5);
-         return (png_byte)r;
-#     else
-         png_int_32 lg2 = png_log8bit(value);
-         png_fixed_point res;
-
-         if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1))
-            return png_exp8bit(res);
-
-         /* Overflow. */
-         value = 0;
-#     endif
-   }
-
-   return (png_byte)value;
-}
-
-png_uint_16
-png_gamma_16bit_correct(unsigned int value, png_fixed_point gamma_val)
-{
-   if (value > 0 && value < 65535)
-   {
-#     ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-         double r = floor(65535*pow(value/65535.,gamma_val*.00001)+.5);
-         return (png_uint_16)r;
-#     else
-         png_int_32 lg2 = png_log16bit(value);
-         png_fixed_point res;
-
-         if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1))
-            return png_exp16bit(res);
-
-         /* Overflow. */
-         value = 0;
-#     endif
-   }
-
-   return (png_uint_16)value;
-}
-
-/* This does the right thing based on the bit_depth field of the
- * png_struct, interpreting values as 8-bit or 16-bit.  While the result
- * is nominally a 16-bit value if bit depth is 8 then the result is
- * 8-bit (as are the arguments.)
- */
-png_uint_16 /* PRIVATE */
-png_gamma_correct(png_structp png_ptr, unsigned int value,
-    png_fixed_point gamma_val)
-{
-   if (png_ptr->bit_depth == 8)
-      return png_gamma_8bit_correct(value, gamma_val);
-
-   else
-      return png_gamma_16bit_correct(value, gamma_val);
-}
-
-/* This is the shared test on whether a gamma value is 'significant' - whether
- * it is worth doing gamma correction.
- */
-int /* PRIVATE */
-png_gamma_significant(png_fixed_point gamma_val)
-{
-   return gamma_val < PNG_FP_1 - PNG_GAMMA_THRESHOLD_FIXED ||
-       gamma_val > PNG_FP_1 + PNG_GAMMA_THRESHOLD_FIXED;
-}
-
-/* Internal function to build a single 16-bit table - the table consists of
- * 'num' 256-entry subtables, where 'num' is determined by 'shift' - the amount
- * to shift the input values right (or 16-number_of_signifiant_bits).
- *
- * The caller is responsible for ensuring that the table gets cleaned up on
- * png_error (i.e. if one of the mallocs below fails) - i.e. the *table argument
- * should be somewhere that will be cleaned.
- */
-static void
-png_build_16bit_table(png_structp png_ptr, png_uint_16pp *ptable,
-   PNG_CONST unsigned int shift, PNG_CONST png_fixed_point gamma_val)
-{
-   /* Various values derived from 'shift': */
-   PNG_CONST unsigned int num = 1U << (8U - shift);
-   PNG_CONST unsigned int max = (1U << (16U - shift))-1U;
-   PNG_CONST unsigned int max_by_2 = 1U << (15U-shift);
-   unsigned int i;
-
-   png_uint_16pp table = *ptable =
-       (png_uint_16pp)png_calloc(png_ptr, num * png_sizeof(png_uint_16p));
-
-   for (i = 0; i < num; i++)
-   {
-      png_uint_16p sub_table = table[i] =
-          (png_uint_16p)png_malloc(png_ptr, 256 * png_sizeof(png_uint_16));
-
-      /* The 'threshold' test is repeated here because it can arise for one of
-       * the 16-bit tables even if the others don't hit it.
-       */
-      if (png_gamma_significant(gamma_val))
-      {
-         /* The old code would overflow at the end and this would cause the
-          * 'pow' function to return a result >1, resulting in an
-          * arithmetic error.  This code follows the spec exactly; ig is
-          * the recovered input sample, it always has 8-16 bits.
-          *
-          * We want input * 65535/max, rounded, the arithmetic fits in 32
-          * bits (unsigned) so long as max <= 32767.
-          */
-         unsigned int j;
-         for (j = 0; j < 256; j++)
-         {
-            png_uint_32 ig = (j << (8-shift)) + i;
-#           ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-               /* Inline the 'max' scaling operation: */
-               double d = floor(65535*pow(ig/(double)max, gamma_val*.00001)+.5);
-               sub_table[j] = (png_uint_16)d;
-#           else
-               if (shift)
-                  ig = (ig * 65535U + max_by_2)/max;
-
-               sub_table[j] = png_gamma_16bit_correct(ig, gamma_val);
-#           endif
-         }
-      }
-      else
-      {
-         /* We must still build a table, but do it the fast way. */
-         unsigned int j;
-
-         for (j = 0; j < 256; j++)
-         {
-            png_uint_32 ig = (j << (8-shift)) + i;
-
-            if (shift)
-               ig = (ig * 65535U + max_by_2)/max;
-
-            sub_table[j] = (png_uint_16)ig;
-         }
-      }
-   }
-}
-
-/* NOTE: this function expects the *inverse* of the overall gamma transformation
- * required.
- */
-static void
-png_build_16to8_table(png_structp png_ptr, png_uint_16pp *ptable,
-   PNG_CONST unsigned int shift, PNG_CONST png_fixed_point gamma_val)
-{
-   PNG_CONST unsigned int num = 1U << (8U - shift);
-   PNG_CONST unsigned int max = (1U << (16U - shift))-1U;
-   unsigned int i;
-   png_uint_32 last;
-
-   png_uint_16pp table = *ptable =
-       (png_uint_16pp)png_calloc(png_ptr, num * png_sizeof(png_uint_16p));
-
-   /* 'num' is the number of tables and also the number of low bits of the
-    * input 16-bit value used to select a table.  Each table is itself indexed
-    * by the high 8 bits of the value.
-    */
-   for (i = 0; i < num; i++)
-      table[i] = (png_uint_16p)png_malloc(png_ptr,
-          256 * png_sizeof(png_uint_16));
-
-   /* 'gamma_val' is set to the reciprocal of the value calculated above, so
-    * pow(out,g) is an *input* value.  'last' is the last input value set.
-    *
-    * In the loop 'i' is used to find output values.  Since the output is
-    * 8-bit there are only 256 possible values.  The tables are set up to
-    * select the closest possible output value for each input by finding
-    * the input value at the boundary between each pair of output values
-    * and filling the table up to that boundary with the lower output
-    * value.
-    *
-    * The boundary values are 0.5,1.5..253.5,254.5.  Since these are 9-bit
-    * values the code below uses a 16-bit value in i; the values start at
-    * 128.5 (for 0.5) and step by 257, for a total of 254 values (the last
-    * entries are filled with 255).  Start i at 128 and fill all 'last'
-    * table entries <= 'max'
-    */
-   last = 0;
-   for (i = 0; i < 255; ++i) /* 8-bit output value */
-   {
-      /* Find the corresponding maximum input value */
-      png_uint_16 out = (png_uint_16)(i * 257U); /* 16-bit output value */
-
-      /* Find the boundary value in 16 bits: */
-      png_uint_32 bound = png_gamma_16bit_correct(out+128U, gamma_val);
-
-      /* Adjust (round) to (16-shift) bits: */
-      bound = (bound * max + 32768U)/65535U + 1U;
-
-      while (last < bound)
-      {
-         table[last & (0xffU >> shift)][last >> (8U - shift)] = out;
-         last++;
-      }
-   }
-
-   /* And fill in the final entries. */
-   while (last < (num << 8))
-   {
-      table[last & (0xff >> shift)][last >> (8U - shift)] = 65535U;
-      last++;
-   }
-}
-
-/* Build a single 8-bit table: same as the 16-bit case but much simpler (and
- * typically much faster).  Note that libpng currently does no sBIT processing
- * (apparently contrary to the spec) so a 256-entry table is always generated.
- */
-static void
-png_build_8bit_table(png_structp png_ptr, png_bytepp ptable,
-   PNG_CONST png_fixed_point gamma_val)
-{
-   unsigned int i;
-   png_bytep table = *ptable = (png_bytep)png_malloc(png_ptr, 256);
-
-   if (png_gamma_significant(gamma_val)) for (i=0; i<256; i++)
-      table[i] = png_gamma_8bit_correct(i, gamma_val);
-
-   else for (i=0; i<256; ++i)
-      table[i] = (png_byte)i;
-}
-
-/* Used from png_read_destroy and below to release the memory used by the gamma
- * tables.
- */
-void /* PRIVATE */
-png_destroy_gamma_table(png_structp png_ptr)
-{
-   png_free(png_ptr, png_ptr->gamma_table);
-   png_ptr->gamma_table = NULL;
-
-   if (png_ptr->gamma_16_table != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_table[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_table);
-   png_ptr->gamma_16_table = NULL;
-   }
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-   png_free(png_ptr, png_ptr->gamma_from_1);
-   png_ptr->gamma_from_1 = NULL;
-   png_free(png_ptr, png_ptr->gamma_to_1);
-   png_ptr->gamma_to_1 = NULL;
-
-   if (png_ptr->gamma_16_from_1 != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_from_1[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_from_1);
-   png_ptr->gamma_16_from_1 = NULL;
-   }
-   if (png_ptr->gamma_16_to_1 != NULL)
-   {
-      int i;
-      int istop = (1 << (8 - png_ptr->gamma_shift));
-      for (i = 0; i < istop; i++)
-      {
-         png_free(png_ptr, png_ptr->gamma_16_to_1[i]);
-      }
-   png_free(png_ptr, png_ptr->gamma_16_to_1);
-   png_ptr->gamma_16_to_1 = NULL;
-   }
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-}
-
-/* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
- * tables, we don't make a full table if we are reducing to 8-bit in
- * the future.  Note also how the gamma_16 tables are segmented so that
- * we don't need to allocate > 64K chunks for a full 16-bit table.
- */
-void /* PRIVATE */
-png_build_gamma_table(png_structp png_ptr, int bit_depth)
-{
-  png_debug(1, "in png_build_gamma_table");
-
-  /* Remove any existing table; this copes with multiple calls to
-   * png_read_update_info.  The warning is because building the gamma tables
-   * multiple times is a performance hit - it's harmless but the ability to call
-   * png_read_update_info() multiple times is new in 1.5.6 so it seems sensible
-   * to warn if the app introduces such a hit.
-   */
-  if (png_ptr->gamma_table != NULL || png_ptr->gamma_16_table != NULL)
-  {
-    png_warning(png_ptr, "gamma table being rebuilt");
-    png_destroy_gamma_table(png_ptr);
-  }
-
-  if (bit_depth <= 8)
-  {
-     png_build_8bit_table(png_ptr, &png_ptr->gamma_table,
-         png_ptr->screen_gamma > 0 ?  png_reciprocal2(png_ptr->gamma,
-         png_ptr->screen_gamma) : PNG_FP_1);
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-     if (png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY))
-     {
-        png_build_8bit_table(png_ptr, &png_ptr->gamma_to_1,
-            png_reciprocal(png_ptr->gamma));
-
-        png_build_8bit_table(png_ptr, &png_ptr->gamma_from_1,
-            png_ptr->screen_gamma > 0 ?  png_reciprocal(png_ptr->screen_gamma) :
-            png_ptr->gamma/* Probably doing rgb_to_gray */);
-     }
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-  }
-  else
-  {
-     png_byte shift, sig_bit;
-
-     if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
-     {
-        sig_bit = png_ptr->sig_bit.red;
-
-        if (png_ptr->sig_bit.green > sig_bit)
-           sig_bit = png_ptr->sig_bit.green;
-
-        if (png_ptr->sig_bit.blue > sig_bit)
-           sig_bit = png_ptr->sig_bit.blue;
-     }
-     else
-        sig_bit = png_ptr->sig_bit.gray;
-
-     /* 16-bit gamma code uses this equation:
-      *
-      *   ov = table[(iv & 0xff) >> gamma_shift][iv >> 8]
-      *
-      * Where 'iv' is the input color value and 'ov' is the output value -
-      * pow(iv, gamma).
-      *
-      * Thus the gamma table consists of up to 256 256-entry tables.  The table
-      * is selected by the (8-gamma_shift) most significant of the low 8 bits of
-      * the color value then indexed by the upper 8 bits:
-      *
-      *   table[low bits][high 8 bits]
-      *
-      * So the table 'n' corresponds to all those 'iv' of:
-      *
-      *   <all high 8-bit values><n << gamma_shift>..<(n+1 << gamma_shift)-1>
-      *
-      */
-     if (sig_bit > 0 && sig_bit < 16U)
-        shift = (png_byte)(16U - sig_bit); /* shift == insignificant bits */
-
-     else
-        shift = 0; /* keep all 16 bits */
-
-     if (png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8))
-     {
-        /* PNG_MAX_GAMMA_8 is the number of bits to keep - effectively
-         * the significant bits in the *input* when the output will
-         * eventually be 8 bits.  By default it is 11.
-         */
-        if (shift < (16U - PNG_MAX_GAMMA_8))
-           shift = (16U - PNG_MAX_GAMMA_8);
-     }
-
-     if (shift > 8U)
-        shift = 8U; /* Guarantees at least one table! */
-
-     png_ptr->gamma_shift = shift;
-
-#ifdef PNG_16BIT_SUPPORTED
-     /* NOTE: prior to 1.5.4 this test used to include PNG_BACKGROUND (now
-      * PNG_COMPOSE).  This effectively smashed the background calculation for
-      * 16-bit output because the 8-bit table assumes the result will be reduced
-      * to 8 bits.
-      */
-     if (png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8))
-#endif
-         png_build_16to8_table(png_ptr, &png_ptr->gamma_16_table, shift,
-         png_ptr->screen_gamma > 0 ? png_product2(png_ptr->gamma,
-         png_ptr->screen_gamma) : PNG_FP_1);
-
-#ifdef PNG_16BIT_SUPPORTED
-     else
-         png_build_16bit_table(png_ptr, &png_ptr->gamma_16_table, shift,
-         png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->gamma,
-         png_ptr->screen_gamma) : PNG_FP_1);
-#endif
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
-   defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \
-   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
-     if (png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY))
-     {
-        png_build_16bit_table(png_ptr, &png_ptr->gamma_16_to_1, shift,
-            png_reciprocal(png_ptr->gamma));
-
-        /* Notice that the '16 from 1' table should be full precision, however
-         * the lookup on this table still uses gamma_shift, so it can't be.
-         * TODO: fix this.
-         */
-        png_build_16bit_table(png_ptr, &png_ptr->gamma_16_from_1, shift,
-            png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) :
-            png_ptr->gamma/* Probably doing rgb_to_gray */);
-     }
-#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */
-  }
-}
-#endif /* READ_GAMMA */
-#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
diff --git a/reg-io/png/lpng1510/png.h b/reg-io/png/lpng1510/png.h
deleted file mode 100644
index b4da5bb2..00000000
--- a/reg-io/png/lpng1510/png.h
+++ /dev/null
@@ -1,2664 +0,0 @@
-
-/* png.h - header file for PNG reference library
- *
- * libpng version 1.5.10 - March 29, 2012
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license (See LICENSE, below)
- *
- * Authors and maintainers:
- *   libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
- *   libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
- *   libpng versions 0.97, January 1998, through 1.5.10 - March 29, 2012: Glenn
- *   See also "Contributing Authors", below.
- *
- * Note about libpng version numbers:
- *
- *   Due to various miscommunications, unforeseen code incompatibilities
- *   and occasional factors outside the authors' control, version numbering
- *   on the library has not always been consistent and straightforward.
- *   The following table summarizes matters since version 0.89c, which was
- *   the first widely used release:
- *
- *    source                 png.h  png.h  shared-lib
- *    version                string   int  version
- *    -------                ------ -----  ----------
- *    0.89c "1.0 beta 3"     0.89      89  1.0.89
- *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
- *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
- *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
- *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
- *    0.97c                  0.97      97  2.0.97
- *    0.98                   0.98      98  2.0.98
- *    0.99                   0.99      98  2.0.99
- *    0.99a-m                0.99      99  2.0.99
- *    1.00                   1.00     100  2.1.0 [100 should be 10000]
- *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
- *    1.0.1       png.h string is   10001  2.1.0
- *    1.0.1a-e    identical to the  10002  from here on, the shared library
- *    1.0.2       source version)   10002  is 2.V where V is the source code
- *    1.0.2a-b                      10003  version, except as noted.
- *    1.0.3                         10003
- *    1.0.3a-d                      10004
- *    1.0.4                         10004
- *    1.0.4a-f                      10005
- *    1.0.5 (+ 2 patches)           10005
- *    1.0.5a-d                      10006
- *    1.0.5e-r                      10100 (not source compatible)
- *    1.0.5s-v                      10006 (not binary compatible)
- *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
- *    1.0.6d-f                      10007 (still binary incompatible)
- *    1.0.6g                        10007
- *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
- *    1.0.6i                        10007  10.6i
- *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
- *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
- *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
- *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
- *    1.0.7                    1    10007  (still compatible)
- *    1.0.8beta1-4             1    10008  2.1.0.8beta1-4
- *    1.0.8rc1                 1    10008  2.1.0.8rc1
- *    1.0.8                    1    10008  2.1.0.8
- *    1.0.9beta1-6             1    10009  2.1.0.9beta1-6
- *    1.0.9rc1                 1    10009  2.1.0.9rc1
- *    1.0.9beta7-10            1    10009  2.1.0.9beta7-10
- *    1.0.9rc2                 1    10009  2.1.0.9rc2
- *    1.0.9                    1    10009  2.1.0.9
- *    1.0.10beta1              1    10010  2.1.0.10beta1
- *    1.0.10rc1                1    10010  2.1.0.10rc1
- *    1.0.10                   1    10010  2.1.0.10
- *    1.0.11beta1-3            1    10011  2.1.0.11beta1-3
- *    1.0.11rc1                1    10011  2.1.0.11rc1
- *    1.0.11                   1    10011  2.1.0.11
- *    1.0.12beta1-2            2    10012  2.1.0.12beta1-2
- *    1.0.12rc1                2    10012  2.1.0.12rc1
- *    1.0.12                   2    10012  2.1.0.12
- *    1.1.0a-f                 -    10100  2.1.1.0a-f (branch abandoned)
- *    1.2.0beta1-2             2    10200  2.1.2.0beta1-2
- *    1.2.0beta3-5             3    10200  3.1.2.0beta3-5
- *    1.2.0rc1                 3    10200  3.1.2.0rc1
- *    1.2.0                    3    10200  3.1.2.0
- *    1.2.1beta1-4             3    10201  3.1.2.1beta1-4
- *    1.2.1rc1-2               3    10201  3.1.2.1rc1-2
- *    1.2.1                    3    10201  3.1.2.1
- *    1.2.2beta1-6            12    10202  12.so.0.1.2.2beta1-6
- *    1.0.13beta1             10    10013  10.so.0.1.0.13beta1
- *    1.0.13rc1               10    10013  10.so.0.1.0.13rc1
- *    1.2.2rc1                12    10202  12.so.0.1.2.2rc1
- *    1.0.13                  10    10013  10.so.0.1.0.13
- *    1.2.2                   12    10202  12.so.0.1.2.2
- *    1.2.3rc1-6              12    10203  12.so.0.1.2.3rc1-6
- *    1.2.3                   12    10203  12.so.0.1.2.3
- *    1.2.4beta1-3            13    10204  12.so.0.1.2.4beta1-3
- *    1.0.14rc1               13    10014  10.so.0.1.0.14rc1
- *    1.2.4rc1                13    10204  12.so.0.1.2.4rc1
- *    1.0.14                  10    10014  10.so.0.1.0.14
- *    1.2.4                   13    10204  12.so.0.1.2.4
- *    1.2.5beta1-2            13    10205  12.so.0.1.2.5beta1-2
- *    1.0.15rc1-3             10    10015  10.so.0.1.0.15rc1-3
- *    1.2.5rc1-3              13    10205  12.so.0.1.2.5rc1-3
- *    1.0.15                  10    10015  10.so.0.1.0.15
- *    1.2.5                   13    10205  12.so.0.1.2.5
- *    1.2.6beta1-4            13    10206  12.so.0.1.2.6beta1-4
- *    1.0.16                  10    10016  10.so.0.1.0.16
- *    1.2.6                   13    10206  12.so.0.1.2.6
- *    1.2.7beta1-2            13    10207  12.so.0.1.2.7beta1-2
- *    1.0.17rc1               10    10017  12.so.0.1.0.17rc1
- *    1.2.7rc1                13    10207  12.so.0.1.2.7rc1
- *    1.0.17                  10    10017  12.so.0.1.0.17
- *    1.2.7                   13    10207  12.so.0.1.2.7
- *    1.2.8beta1-5            13    10208  12.so.0.1.2.8beta1-5
- *    1.0.18rc1-5             10    10018  12.so.0.1.0.18rc1-5
- *    1.2.8rc1-5              13    10208  12.so.0.1.2.8rc1-5
- *    1.0.18                  10    10018  12.so.0.1.0.18
- *    1.2.8                   13    10208  12.so.0.1.2.8
- *    1.2.9beta1-3            13    10209  12.so.0.1.2.9beta1-3
- *    1.2.9beta4-11           13    10209  12.so.0.9[.0]
- *    1.2.9rc1                13    10209  12.so.0.9[.0]
- *    1.2.9                   13    10209  12.so.0.9[.0]
- *    1.2.10beta1-7           13    10210  12.so.0.10[.0]
- *    1.2.10rc1-2             13    10210  12.so.0.10[.0]
- *    1.2.10                  13    10210  12.so.0.10[.0]
- *    1.4.0beta1-5            14    10400  14.so.0.0[.0]
- *    1.2.11beta1-4           13    10211  12.so.0.11[.0]
- *    1.4.0beta7-8            14    10400  14.so.0.0[.0]
- *    1.2.11                  13    10211  12.so.0.11[.0]
- *    1.2.12                  13    10212  12.so.0.12[.0]
- *    1.4.0beta9-14           14    10400  14.so.0.0[.0]
- *    1.2.13                  13    10213  12.so.0.13[.0]
- *    1.4.0beta15-36          14    10400  14.so.0.0[.0]
- *    1.4.0beta37-87          14    10400  14.so.14.0[.0]
- *    1.4.0rc01               14    10400  14.so.14.0[.0]
- *    1.4.0beta88-109         14    10400  14.so.14.0[.0]
- *    1.4.0rc02-08            14    10400  14.so.14.0[.0]
- *    1.4.0                   14    10400  14.so.14.0[.0]
- *    1.4.1beta01-03          14    10401  14.so.14.1[.0]
- *    1.4.1rc01               14    10401  14.so.14.1[.0]
- *    1.4.1beta04-12          14    10401  14.so.14.1[.0]
- *    1.4.1                   14    10401  14.so.14.1[.0]
- *    1.4.2                   14    10402  14.so.14.2[.0]
- *    1.4.3                   14    10403  14.so.14.3[.0]
- *    1.4.4                   14    10404  14.so.14.4[.0]
- *    1.5.0beta01-58          15    10500  15.so.15.0[.0]
- *    1.5.0rc01-07            15    10500  15.so.15.0[.0]
- *    1.5.0                   15    10500  15.so.15.0[.0]
- *    1.5.1beta01-11          15    10501  15.so.15.1[.0]
- *    1.5.1rc01-02            15    10501  15.so.15.1[.0]
- *    1.5.1                   15    10501  15.so.15.1[.0]
- *    1.5.2beta01-03          15    10502  15.so.15.2[.0]
- *    1.5.2rc01-03            15    10502  15.so.15.2[.0]
- *    1.5.2                   15    10502  15.so.15.2[.0]
- *    1.5.3beta01-10          15    10503  15.so.15.3[.0]
- *    1.5.3rc01-02            15    10503  15.so.15.3[.0]
- *    1.5.3beta11             15    10503  15.so.15.3[.0]
- *    1.5.3 [omitted]
- *    1.5.4beta01-08          15    10504  15.so.15.4[.0]
- *    1.5.4rc01               15    10504  15.so.15.4[.0]
- *    1.5.4                   15    10504  15.so.15.4[.0]
- *    1.5.5beta01-08          15    10505  15.so.15.5[.0]
- *    1.5.5rc01               15    10505  15.so.15.5[.0]
- *    1.5.5                   15    10505  15.so.15.5[.0]
- *    1.5.6beta01-07          15    10506  15.so.15.6[.0]
- *    1.5.6rc01-03            15    10506  15.so.15.6[.0]
- *    1.5.6                   15    10506  15.so.15.6[.0]
- *    1.5.7beta01-05          15    10507  15.so.15.7[.0]
- *    1.5.7rc01-03            15    10507  15.so.15.7[.0]
- *    1.5.7                   15    10507  15.so.15.7[.0]
- *    1.5.8beta01             15    10508  15.so.15.8[.0]
- *    1.5.8rc01               15    10508  15.so.15.8[.0]
- *    1.5.8                   15    10508  15.so.15.8[.0]
- *    1.5.9beta01-02          15    10509  15.so.15.9[.0]
- *    1.5.9rc01               15    10509  15.so.15.9[.0]
- *    1.5.9                   15    10509  15.so.15.9[.0]
- *    1.5.10beta01-05         15    10510  15.so.15.10[.0]
- *    1.5.10                  15    10510  15.so.15.10[.0]
- *
- *   Henceforth the source version will match the shared-library major
- *   and minor numbers; the shared-library major version number will be
- *   used for changes in backward compatibility, as it is intended.  The
- *   PNG_LIBPNG_VER macro, which is not used within libpng but is available
- *   for applications, is an unsigned integer of the form xyyzz corresponding
- *   to the source version x.y.z (leading zeros in y and z).  Beta versions
- *   were given the previous public release number plus a letter, until
- *   version 1.0.6j; from then on they were given the upcoming public
- *   release number plus "betaNN" or "rcNN".
- *
- *   Binary incompatibility exists only when applications make direct access
- *   to the info_ptr or png_ptr members through png.h, and the compiled
- *   application is loaded with a different version of the library.
- *
- *   DLLNUM will change each time there are forward or backward changes
- *   in binary compatibility (e.g., when a new feature is added).
- *
- * See libpng-manual.txt or libpng.3 for more information.  The PNG
- * specification is available as a W3C Recommendation and as an ISO
- * Specification, <http://www.w3.org/TR/2003/REC-PNG-20031110/
- */
-
-/*
- * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
- *
- * If you modify libpng you may insert additional notices immediately following
- * this sentence.
- *
- * This code is released under the libpng license.
- *
- * libpng versions 1.2.6, August 15, 2004, through 1.5.10, March 29, 2012, are
- * Copyright (c) 2004, 2006-2012 Glenn Randers-Pehrson, and are
- * distributed according to the same disclaimer and license as libpng-1.2.5
- * with the following individual added to the list of Contributing Authors:
- *
- *    Cosmin Truta
- *
- * libpng versions 1.0.7, July 1, 2000, through 1.2.5, October 3, 2002, are
- * Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are
- * distributed according to the same disclaimer and license as libpng-1.0.6
- * with the following individuals added to the list of Contributing Authors:
- *
- *    Simon-Pierre Cadieux
- *    Eric S. Raymond
- *    Gilles Vollant
- *
- * and with the following additions to the disclaimer:
- *
- *    There is no warranty against interference with your enjoyment of the
- *    library or against infringement.  There is no warranty that our
- *    efforts or the library will fulfill any of your particular purposes
- *    or needs.  This library is provided with all faults, and the entire
- *    risk of satisfactory quality, performance, accuracy, and effort is with
- *    the user.
- *
- * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
- * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson, and are
- * distributed according to the same disclaimer and license as libpng-0.96,
- * with the following individuals added to the list of Contributing Authors:
- *
- *    Tom Lane
- *    Glenn Randers-Pehrson
- *    Willem van Schaik
- *
- * libpng versions 0.89, June 1996, through 0.96, May 1997, are
- * Copyright (c) 1996, 1997 Andreas Dilger
- * Distributed according to the same disclaimer and license as libpng-0.88,
- * with the following individuals added to the list of Contributing Authors:
- *
- *    John Bowler
- *    Kevin Bracey
- *    Sam Bushell
- *    Magnus Holmgren
- *    Greg Roelofs
- *    Tom Tanner
- *
- * libpng versions 0.5, May 1995, through 0.88, January 1996, are
- * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
- *
- * For the purposes of this copyright and license, "Contributing Authors"
- * is defined as the following set of individuals:
- *
- *    Andreas Dilger
- *    Dave Martindale
- *    Guy Eric Schalnat
- *    Paul Schmidt
- *    Tim Wegner
- *
- * The PNG Reference Library is supplied "AS IS".  The Contributing Authors
- * and Group 42, Inc. disclaim all warranties, expressed or implied,
- * including, without limitation, the warranties of merchantability and of
- * fitness for any purpose.  The Contributing Authors and Group 42, Inc.
- * assume no liability for direct, indirect, incidental, special, exemplary,
- * or consequential damages, which may result from the use of the PNG
- * Reference Library, even if advised of the possibility of such damage.
- *
- * Permission is hereby granted to use, copy, modify, and distribute this
- * source code, or portions hereof, for any purpose, without fee, subject
- * to the following restrictions:
- *
- *   1. The origin of this source code must not be misrepresented.
- *
- *   2. Altered versions must be plainly marked as such and must not
- *      be misrepresented as being the original source.
- *
- *   3. This Copyright notice may not be removed or altered from
- *      any source or altered source distribution.
- *
- * The Contributing Authors and Group 42, Inc. specifically permit, without
- * fee, and encourage the use of this source code as a component to
- * supporting the PNG file format in commercial products.  If you use this
- * source code in a product, acknowledgment is not required but would be
- * appreciated.
- */
-
-/*
- * A "png_get_copyright" function is available, for convenient use in "about"
- * boxes and the like:
- *
- *     printf("%s", png_get_copyright(NULL));
- *
- * Also, the PNG logo (in PNG format, of course) is supplied in the
- * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
- */
-
-/*
- * Libpng is OSI Certified Open Source Software.  OSI Certified is a
- * certification mark of the Open Source Initiative.
- */
-
-/*
- * The contributing authors would like to thank all those who helped
- * with testing, bug fixes, and patience.  This wouldn't have been
- * possible without all of you.
- *
- * Thanks to Frank J. T. Wojcik for helping with the documentation.
- */
-
-/*
- * Y2K compliance in libpng:
- * =========================
- *
- *    March 29, 2012
- *
- *    Since the PNG Development group is an ad-hoc body, we can't make
- *    an official declaration.
- *
- *    This is your unofficial assurance that libpng from version 0.71 and
- *    upward through 1.5.10 are Y2K compliant.  It is my belief that
- *    earlier versions were also Y2K compliant.
- *
- *    Libpng only has two year fields.  One is a 2-byte unsigned integer
- *    that will hold years up to 65535.  The other holds the date in text
- *    format, and will hold years up to 9999.
- *
- *    The integer is
- *        "png_uint_16 year" in png_time_struct.
- *
- *    The string is
- *        "png_char time_buffer" in png_struct
- *
- *    There are seven time-related functions:
- *        png.c: png_convert_to_rfc_1123() in png.c
- *          (formerly png_convert_to_rfc_1152() in error)
- *        png_convert_from_struct_tm() in pngwrite.c, called in pngwrite.c
- *        png_convert_from_time_t() in pngwrite.c
- *        png_get_tIME() in pngget.c
- *        png_handle_tIME() in pngrutil.c, called in pngread.c
- *        png_set_tIME() in pngset.c
- *        png_write_tIME() in pngwutil.c, called in pngwrite.c
- *
- *    All handle dates properly in a Y2K environment.  The
- *    png_convert_from_time_t() function calls gmtime() to convert from system
- *    clock time, which returns (year - 1900), which we properly convert to
- *    the full 4-digit year.  There is a possibility that applications using
- *    libpng are not passing 4-digit years into the png_convert_to_rfc_1123()
- *    function, or that they are incorrectly passing only a 2-digit year
- *    instead of "year - 1900" into the png_convert_from_struct_tm() function,
- *    but this is not under our control.  The libpng documentation has always
- *    stated that it works with 4-digit years, and the APIs have been
- *    documented as such.
- *
- *    The tIME chunk itself is also Y2K compliant.  It uses a 2-byte unsigned
- *    integer to hold the year, and can hold years as large as 65535.
- *
- *    zlib, upon which libpng depends, is also Y2K compliant.  It contains
- *    no date-related code.
- *
- *       Glenn Randers-Pehrson
- *       libpng maintainer
- *       PNG Development Group
- */
-
-#pragma once
-
-/* This is not the place to learn how to use libpng. The file libpng-manual.txt
- * describes how to use libpng, and the file example.c summarizes it
- * with some code on which to build.  This file is useful for looking
- * at the actual function definitions and structure components.
- *
- * If you just need to read a PNG file and don't want to read the documentation
- * skip to the end of this file and read the section entitled 'simplified API'.
- */
-
-/* Version information for png.h - this should match the version in png.c */
-#define PNG_LIBPNG_VER_STRING "1.5.10"
-#define PNG_HEADER_VERSION_STRING \
-     " libpng version 1.5.10 - March 29, 2012\n"
-
-#define PNG_LIBPNG_VER_SONUM   15
-#define PNG_LIBPNG_VER_DLLNUM  15
-
-/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
-#define PNG_LIBPNG_VER_MAJOR   1
-#define PNG_LIBPNG_VER_MINOR   5
-#define PNG_LIBPNG_VER_RELEASE 10
-
-/* This should match the numeric part of the final component of
- * PNG_LIBPNG_VER_STRING, omitting any leading zero:
- */
-
-#define PNG_LIBPNG_VER_BUILD  0
-
-/* Release Status */
-#define PNG_LIBPNG_BUILD_ALPHA    1
-#define PNG_LIBPNG_BUILD_BETA     2
-#define PNG_LIBPNG_BUILD_RC       3
-#define PNG_LIBPNG_BUILD_STABLE   4
-#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
-
-/* Release-Specific Flags */
-#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
-PNG_LIBPNG_BUILD_STABLE only */
-#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
-PNG_LIBPNG_BUILD_SPECIAL */
-#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
-PNG_LIBPNG_BUILD_PRIVATE */
-
-#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
-
-/* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
- * We must not include leading zeros.
- * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only
- * version 1.0.0 was mis-numbered 100 instead of 10000).  From
- * version 1.0.1 it's    xxyyzz, where x=major, y=minor, z=release
- */
-#define PNG_LIBPNG_VER 10510 /* 1.5.10 */
-
-/* Library configuration: these options cannot be changed after
- * the library has been built.
- */
-#ifndef PNGLCONF_H
-/* If pnglibconf.h is missing, you can
- * copy scripts/pnglibconf.h.prebuilt to pnglibconf.h
- */
-#   include "pnglibconf.h"
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-#  ifndef PNG_BUILDING_SYMBOL_TABLE
-/*
- *   Standard header files (not needed for the version info or while
- *   building symbol table -- see scripts/pnglibconf.dfa)
- */
-#    ifdef PNG_SETJMP_SUPPORTED
-#      include <setjmp.h>
-#    endif
-
-/* Need the time information for converting tIME chunks, it
- * defines struct tm:
- */
-#    ifdef PNG_CONVERT_tIME_SUPPORTED
-/* "time.h" functions are not supported on all operating systems */
-#      include <time.h>
-#    endif
-#  endif
-
-/* Machine specific configuration. */
-#  include "pngconf.h"
-#endif
-
-/*
- * Added at libpng-1.2.8
- *
- * Ref MSDN: Private as priority over Special
- * VS_FF_PRIVATEBUILD File *was not* built using standard release
- * procedures. If this value is given, the StringFileInfo block must
- * contain a PrivateBuild string.
- *
- * VS_FF_SPECIALBUILD File *was* built by the original company using
- * standard release procedures but is a variation of the standard
- * file of the same version number. If this value is given, the
- * StringFileInfo block must contain a SpecialBuild string.
- */
-
-#ifdef PNG_USER_PRIVATEBUILD /* From pnglibconf.h */
-#  define PNG_LIBPNG_BUILD_TYPE \
-       (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
-#else
-#  ifdef PNG_LIBPNG_SPECIALBUILD
-#    define PNG_LIBPNG_BUILD_TYPE \
-         (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
-#  else
-#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
-#  endif
-#endif
-
-#ifndef PNG_VERSION_INFO_ONLY
-
-/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-   /* Version information for C files, stored in png.c.  This had better match
-    * the version above.
-    */
-#define png_libpng_ver png_get_header_ver(NULL)
-
-   /* This file is arranged in several sections:
-    *
-    * 1. Any configuration options that can be specified by for the application
-    *    code when it is built.  (Build time configuration is in pnglibconf.h)
-    * 2. Type definitions (base types are defined in pngconf.h), structure
-    *    definitions.
-    * 3. Exported library functions.
-    *
-    * The library source code has additional files (principally pngpriv.h) that
-    * allow configuration of the library.
-    */
-   /* Section 1: run time configuration
-    * See pnglibconf.h for build time configuration
-    *
-    * Run time configuration allows the application to choose between
-    * implementations of certain arithmetic APIs.  The default is set
-    * at build time and recorded in pnglibconf.h, but it is safe to
-    * override these (and only these) settings.  Note that this won't
-    * change what the library does, only application code, and the
-    * settings can (and probably should) be made on a per-file basis
-    * by setting the #defines before including png.h
-    *
-    * Use macros to read integers from PNG data or use the exported
-    * functions?
-    *   PNG_USE_READ_MACROS: use the macros (see below)  Note that
-    *     the macros evaluate their argument multiple times.
-    *   PNG_NO_USE_READ_MACROS: call the relevant library function.
-    *
-    * Use the alternative algorithm for compositing alpha samples that
-    * does not use division?
-    *   PNG_READ_COMPOSITE_NODIV_SUPPORTED: use the 'no division'
-    *      algorithm.
-    *   PNG_NO_READ_COMPOSITE_NODIV: use the 'division' algorithm.
-    *
-    * How to handle benign errors if PNG_ALLOW_BENIGN_ERRORS is
-    * false?
-    *   PNG_ALLOW_BENIGN_ERRORS: map calls to the benign error
-    *      APIs to png_warning.
-    * Otherwise the calls are mapped to png_error.
-    */
-
-   /* Section 2: type definitions, including structures and compile time
-    * constants.
-    * See pngconf.h for base types that vary by machine/system
-    */
-
-   /* This triggers a compiler error in png.c, if png.c and png.h
-    * do not agree upon the version number.
-    */
-   typedef char* png_libpng_version_1_5_10;
-
-   /* Three color definitions.  The order of the red, green, and blue, (and the
-    * exact size) is not important, although the size of the fields need to
-    * be png_byte or png_uint_16 (as defined below).
-    */
-   typedef struct png_color_struct
-   {
-      png_byte red;
-      png_byte green;
-      png_byte blue;
-   } png_color;
-   typedef png_color FAR * png_colorp;
-   typedef PNG_CONST png_color FAR * png_const_colorp;
-   typedef png_color FAR * FAR * png_colorpp;
-
-   typedef struct png_color_16_struct
-   {
-      png_byte index;    /* used for palette files */
-      png_uint_16 red;   /* for use in red green blue files */
-      png_uint_16 green;
-      png_uint_16 blue;
-      png_uint_16 gray;  /* for use in grayscale files */
-   } png_color_16;
-   typedef png_color_16 FAR * png_color_16p;
-   typedef PNG_CONST png_color_16 FAR * png_const_color_16p;
-   typedef png_color_16 FAR * FAR * png_color_16pp;
-
-   typedef struct png_color_8_struct
-   {
-      png_byte red;   /* for use in red green blue files */
-      png_byte green;
-      png_byte blue;
-      png_byte gray;  /* for use in grayscale files */
-      png_byte alpha; /* for alpha channel files */
-   } png_color_8;
-   typedef png_color_8 FAR * png_color_8p;
-   typedef PNG_CONST png_color_8 FAR * png_const_color_8p;
-   typedef png_color_8 FAR * FAR * png_color_8pp;
-
-   /*
-    * The following two structures are used for the in-core representation
-    * of sPLT chunks.
-    */
-   typedef struct png_sPLT_entry_struct
-   {
-      png_uint_16 red;
-      png_uint_16 green;
-      png_uint_16 blue;
-      png_uint_16 alpha;
-      png_uint_16 frequency;
-   } png_sPLT_entry;
-   typedef png_sPLT_entry FAR * png_sPLT_entryp;
-   typedef PNG_CONST png_sPLT_entry FAR * png_const_sPLT_entryp;
-   typedef png_sPLT_entry FAR * FAR * png_sPLT_entrypp;
-
-   /*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
-    *  occupy the LSB of their respective members, and the MSB of each member
-    *  is zero-filled.  The frequency member always occupies the full 16 bits.
-    */
-
-   typedef struct png_sPLT_struct
-   {
-      png_charp name;           /* palette name */
-      png_byte depth;           /* depth of palette samples */
-      png_sPLT_entryp entries;  /* palette entries */
-      png_int_32 nentries;      /* number of palette entries */
-   } png_sPLT_t;
-   typedef png_sPLT_t FAR * png_sPLT_tp;
-   typedef PNG_CONST png_sPLT_t FAR * png_const_sPLT_tp;
-   typedef png_sPLT_t FAR * FAR * png_sPLT_tpp;
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
-    * and whether that contents is compressed or not.  The "key" field
-    * points to a regular zero-terminated C string.  The "text" fields can be a
-    * regular C string, an empty string, or a NULL pointer.
-    * However, the structure returned by png_get_text() will always contain
-    * the "text" field as a regular zero-terminated C string (possibly
-    * empty), never a NULL pointer, so it can be safely used in printf() and
-    * other string-handling functions.  Note that the "itxt_length", "lang", and
-    * "lang_key" members of the structure only exist when the library is built
-    * with iTXt chunk support.  Prior to libpng-1.4.0 the library was built by
-    * default without iTXt support. Also note that when iTXt *is* supported,
-    * the "lang" and "lang_key" fields contain NULL pointers when the
-    * "compression" field contains * PNG_TEXT_COMPRESSION_NONE or
-    * PNG_TEXT_COMPRESSION_zTXt. Note that the "compression value" is not the
-    * same as what appears in the PNG tEXt/zTXt/iTXt chunk's "compression flag"
-    * which is always 0 or 1, or its "compression method" which is always 0.
-    */
-   typedef struct png_text_struct
-   {
-      int  compression;       /* compression value:
-                             -1: tEXt, none
-                              0: zTXt, deflate
-                              1: iTXt, none
-                              2: iTXt, deflate  */
-      png_charp key;          /* keyword, 1-79 character description of "text" */
-      png_charp text;         /* comment, may be an empty string (ie "")
-                              or a NULL pointer */
-      png_size_t text_length; /* length of the text string */
-      png_size_t itxt_length; /* length of the itxt string */
-      png_charp lang;         /* language code, 0-79 characters
-                              or a NULL pointer */
-      png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
-                              chars or a NULL pointer */
-   } png_text;
-   typedef png_text FAR * png_textp;
-   typedef PNG_CONST png_text FAR * png_const_textp;
-   typedef png_text FAR * FAR * png_textpp;
-#endif
-
-   /* Supported compression types for text in PNG files (tEXt, and zTXt).
-    * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
-#define PNG_TEXT_COMPRESSION_NONE_WR -3
-#define PNG_TEXT_COMPRESSION_zTXt_WR -2
-#define PNG_TEXT_COMPRESSION_NONE    -1
-#define PNG_TEXT_COMPRESSION_zTXt     0
-#define PNG_ITXT_COMPRESSION_NONE     1
-#define PNG_ITXT_COMPRESSION_zTXt     2
-#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
-
-   /* png_time is a way to hold the time in an machine independent way.
-    * Two conversions are provided, both from time_t and struct tm.  There
-    * is no portable way to convert to either of these structures, as far
-    * as I know.  If you know of a portable way, send it to me.  As a side
-    * note - PNG has always been Year 2000 compliant!
-    */
-   typedef struct png_time_struct
-   {
-      png_uint_16 year; /* full year, as in, 1995 */
-      png_byte month;   /* month of year, 1 - 12 */
-      png_byte day;     /* day of month, 1 - 31 */
-      png_byte hour;    /* hour of day, 0 - 23 */
-      png_byte minute;  /* minute of hour, 0 - 59 */
-      png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
-   } png_time;
-   typedef png_time FAR * png_timep;
-   typedef PNG_CONST png_time FAR * png_const_timep;
-   typedef png_time FAR * FAR * png_timepp;
-
-#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \
-    defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
-   /* png_unknown_chunk is a structure to hold queued chunks for which there is
-    * no specific support.  The idea is that we can use this to queue
-    * up private chunks for output even though the library doesn't actually
-    * know about their semantics.
-    */
-   typedef struct png_unknown_chunk_t
-   {
-      png_byte name[5];
-      png_byte *data;
-      png_size_t size;
-
-      /* libpng-using applications should NOT directly modify this byte. */
-      png_byte location; /* mode of operation at read time */
-   }
-
-
-   png_unknown_chunk;
-   typedef png_unknown_chunk FAR * png_unknown_chunkp;
-   typedef PNG_CONST png_unknown_chunk FAR * png_const_unknown_chunkp;
-   typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp;
-#endif
-
-   /* Values for the unknown chunk location byte */
-
-#define PNG_HAVE_IHDR  0x01
-#define PNG_HAVE_PLTE  0x02
-#define PNG_AFTER_IDAT 0x08
-
-   /* The complete definition of png_info has, as of libpng-1.5.0,
-    * been moved into a separate header file that is not accessible to
-    * applications.  Read libpng-manual.txt or libpng.3 for more info.
-    */
-   typedef struct png_info_def png_info;
-   typedef png_info FAR * png_infop;
-   typedef PNG_CONST png_info FAR * png_const_infop;
-   typedef png_info FAR * FAR * png_infopp;
-
-   /* Maximum positive integer used in PNG is (2^31)-1 */
-#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
-#define PNG_UINT_32_MAX ((png_uint_32)(-1))
-#define PNG_SIZE_MAX ((png_size_t)(-1))
-
-   /* These are constants for fixed point values encoded in the
-    * PNG specification manner (x100000)
-    */
-#define PNG_FP_1    100000
-#define PNG_FP_HALF  50000
-#define PNG_FP_MAX  ((png_fixed_point)0x7fffffffL)
-#define PNG_FP_MIN  (-PNG_FP_MAX)
-
-   /* These describe the color_type field in png_info. */
-   /* color type masks */
-#define PNG_COLOR_MASK_PALETTE    1
-#define PNG_COLOR_MASK_COLOR      2
-#define PNG_COLOR_MASK_ALPHA      4
-
-   /* color types.  Note that not all combinations are legal */
-#define PNG_COLOR_TYPE_GRAY 0
-#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
-#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
-#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
-#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
-   /* aliases */
-#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
-#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
-
-   /* This is for compression type. PNG 1.0-1.2 only define the single type. */
-#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
-#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
-
-   /* This is for filter type. PNG 1.0-1.2 only define the single type. */
-#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
-#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
-#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
-
-   /* These are for the interlacing type.  These values should NOT be changed. */
-#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
-#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
-#define PNG_INTERLACE_LAST        2 /* Not a valid value */
-
-   /* These are for the oFFs chunk.  These values should NOT be changed. */
-#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
-#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
-#define PNG_OFFSET_LAST           2 /* Not a valid value */
-
-   /* These are for the pCAL chunk.  These values should NOT be changed. */
-#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
-#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
-#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
-#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
-#define PNG_EQUATION_LAST         4 /* Not a valid value */
-
-   /* These are for the sCAL chunk.  These values should NOT be changed. */
-#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
-#define PNG_SCALE_METER           1 /* meters per pixel */
-#define PNG_SCALE_RADIAN          2 /* radians per pixel */
-#define PNG_SCALE_LAST            3 /* Not a valid value */
-
-   /* These are for the pHYs chunk.  These values should NOT be changed. */
-#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
-#define PNG_RESOLUTION_METER      1 /* pixels/meter */
-#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
-
-   /* These are for the sRGB chunk.  These values should NOT be changed. */
-#define PNG_sRGB_INTENT_PERCEPTUAL 0
-#define PNG_sRGB_INTENT_RELATIVE   1
-#define PNG_sRGB_INTENT_SATURATION 2
-#define PNG_sRGB_INTENT_ABSOLUTE   3
-#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
-
-   /* This is for text chunks */
-#define PNG_KEYWORD_MAX_LENGTH     79
-
-   /* Maximum number of entries in PLTE/sPLT/tRNS arrays */
-#define PNG_MAX_PALETTE_LENGTH    256
-
-   /* These determine if an ancillary chunk's data has been successfully read
-    * from the PNG header, or if the application has filled in the corresponding
-    * data in the info_struct to be written into the output file.  The values
-    * of the PNG_INFO_<chunk> defines should NOT be changed.
-    */
-#define PNG_INFO_gAMA 0x0001
-#define PNG_INFO_sBIT 0x0002
-#define PNG_INFO_cHRM 0x0004
-#define PNG_INFO_PLTE 0x0008
-#define PNG_INFO_tRNS 0x0010
-#define PNG_INFO_bKGD 0x0020
-#define PNG_INFO_hIST 0x0040
-#define PNG_INFO_pHYs 0x0080
-#define PNG_INFO_oFFs 0x0100
-#define PNG_INFO_tIME 0x0200
-#define PNG_INFO_pCAL 0x0400
-#define PNG_INFO_sRGB 0x0800   /* GR-P, 0.96a */
-#define PNG_INFO_iCCP 0x1000   /* ESR, 1.0.6 */
-#define PNG_INFO_sPLT 0x2000   /* ESR, 1.0.6 */
-#define PNG_INFO_sCAL 0x4000   /* ESR, 1.0.6 */
-#define PNG_INFO_IDAT 0x8000   /* ESR, 1.0.6 */
-
-   /* This is used for the transformation routines, as some of them
-    * change these values for the row.  It also should enable using
-    * the routines for other purposes.
-    */
-   typedef struct png_row_info_struct
-   {
-      png_uint_32 width;    /* width of row */
-      png_size_t rowbytes;  /* number of bytes in row */
-      png_byte color_type;  /* color type of row */
-      png_byte bit_depth;   /* bit depth of row */
-      png_byte channels;    /* number of channels (1, 2, 3, or 4) */
-      png_byte pixel_depth; /* bits per pixel (depth * channels) */
-   } png_row_info;
-
-   typedef png_row_info FAR * png_row_infop;
-   typedef png_row_info FAR * FAR * png_row_infopp;
-
-   /* The complete definition of png_struct has, as of libpng-1.5.0,
-    * been moved into a separate header file that is not accessible to
-    * applications.  Read libpng-manual.txt or libpng.3 for more info.
-    */
-   typedef struct png_struct_def png_struct;
-   typedef PNG_CONST png_struct FAR * png_const_structp;
-   typedef png_struct FAR * png_structp;
-
-   /* These are the function types for the I/O functions and for the functions
-    * that allow the user to override the default I/O functions with his or her
-    * own.  The png_error_ptr type should match that of user-supplied warning
-    * and error functions, while the png_rw_ptr type should match that of the
-    * user read/write data functions.  Note that the 'write' function must not
-    * modify the buffer it is passed. The 'read' function, on the other hand, is
-    * expected to return the read data in the buffer.
-    */
-   typedef PNG_CALLBACK(void, *png_error_ptr, (png_structp, png_const_charp));
-   typedef PNG_CALLBACK(void, *png_rw_ptr, (png_structp, png_bytep, png_size_t));
-   typedef PNG_CALLBACK(void, *png_flush_ptr, (png_structp));
-   typedef PNG_CALLBACK(void, *png_read_status_ptr, (png_structp, png_uint_32,
-                        int));
-   typedef PNG_CALLBACK(void, *png_write_status_ptr, (png_structp, png_uint_32,
-                        int));
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   typedef PNG_CALLBACK(void, *png_progressive_info_ptr, (png_structp, png_infop));
-   typedef PNG_CALLBACK(void, *png_progressive_end_ptr, (png_structp, png_infop));
-
-   /* The following callback receives png_uint_32 row_number, int pass for the
-    * png_bytep data of the row.  When transforming an interlaced image the
-    * row number is the row number within the sub-image of the interlace pass, so
-    * the value will increase to the height of the sub-image (not the full image)
-    * then reset to 0 for the next pass.
-    *
-    * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
-    * find the output pixel (x,y) given an interlaced sub-image pixel
-    * (row,col,pass).  (See below for these macros.)
-    */
-   typedef PNG_CALLBACK(void, *png_progressive_row_ptr, (png_structp, png_bytep,
-                        png_uint_32, int));
-#endif
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
-    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
-   typedef PNG_CALLBACK(void, *png_user_transform_ptr, (png_structp, png_row_infop,
-                        png_bytep));
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-   typedef PNG_CALLBACK(int, *png_user_chunk_ptr, (png_structp,
-                        png_unknown_chunkp));
-#endif
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   typedef PNG_CALLBACK(void, *png_unknown_chunk_ptr, (png_structp));
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   /* This must match the function definition in <setjmp.h>, and the application
-    * must include this before png.h to obtain the definition of jmp_buf.  The
-    * function is required to be PNG_NORETURN, but this is not checked.  If the
-    * function does return the application will crash via an abort() or similar
-    * system level call.
-    *
-    * If you get a warning here while building the library you may need to make
-    * changes to ensure that pnglibconf.h records the calling convention used by
-    * your compiler.  This may be very difficult - try using a different compiler
-    * to build the library!
-    */
-   PNG_FUNCTION(void, (PNGCAPI *png_longjmp_ptr), PNGARG((jmp_buf, int)), typedef);
-#endif
-
-   /* Transform masks for the high-level interface */
-#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
-#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
-#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
-#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
-#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
-#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
-#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
-#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
-#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
-#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
-#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
-#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
-#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* write only */
-   /* Added to libpng-1.2.34 */
-#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER
-#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */
-   /* Added to libpng-1.4.0 */
-#define PNG_TRANSFORM_GRAY_TO_RGB   0x2000      /* read only */
-   /* Added to libpng-1.5.4 */
-#define PNG_TRANSFORM_EXPAND_16     0x4000      /* read only */
-#define PNG_TRANSFORM_SCALE_16      0x8000      /* read only */
-
-   /* Flags for MNG supported features */
-#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
-#define PNG_FLAG_MNG_FILTER_64      0x04
-#define PNG_ALL_MNG_FEATURES        0x05
-
-   /* NOTE: prior to 1.5 these functions had no 'API' style declaration,
-    * this allowed the zlib default functions to be used on Windows
-    * platforms.  In 1.5 the zlib default malloc (which just calls malloc and
-    * ignores the first argument) should be completely compatible with the
-    * following.
-    */
-   typedef PNG_CALLBACK(png_voidp, *png_malloc_ptr, (png_structp,
-                        png_alloc_size_t));
-   typedef PNG_CALLBACK(void, *png_free_ptr, (png_structp, png_voidp));
-
-   typedef png_struct FAR * FAR * png_structpp;
-
-   /* Section 3: exported functions
-    * Here are the function definitions most commonly used.  This is not
-    * the place to find out how to use libpng.  See libpng-manual.txt for the
-    * full explanation, see example.c for the summary.  This just provides
-    * a simple one line description of the use of each function.
-    *
-    * The PNG_EXPORT() and PNG_EXPORTA() macros used below are defined in
-    * pngconf.h and in the *.dfn files in the scripts directory.
-    *
-    *   PNG_EXPORT(ordinal, type, name, (args));
-    *
-    *       ordinal:    ordinal that is used while building
-    *                   *.def files. The ordinal value is only
-    *                   relevant when preprocessing png.h with
-    *                   the *.dfn files for building symbol table
-    *                   entries, and are removed by pngconf.h.
-    *       type:       return type of the function
-    *       name:       function name
-    *       args:       function arguments, with types
-    *
-    * When we wish to append attributes to a function prototype we use
-    * the PNG_EXPORTA() macro instead.
-    *
-    *   PNG_EXPORTA(ordinal, type, name, (args), attributes);
-    *
-    *       ordinal, type, name, and args: same as in PNG_EXPORT().
-    *       attributes: function attributes
-    */
-
-   /* Returns the version number of the library */
-   PNG_EXPORT(1, png_uint_32, png_access_version_number, (void));
-
-   /* Tell lib we have already handled the first <num_bytes> magic bytes.
-    * Handling more than 8 bytes from the beginning of the file is an error.
-    */
-   PNG_EXPORT(2, void, png_set_sig_bytes, (png_structp png_ptr, int num_bytes));
-
-   /* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
-    * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
-    * signature, and non-zero otherwise.  Having num_to_check == 0 or
-    * start > 7 will always fail (ie return non-zero).
-    */
-   PNG_EXPORT(3, int, png_sig_cmp, (png_const_bytep sig, png_size_t start,
-                                    png_size_t num_to_check));
-
-   /* Simple signature checking function.  This is the same as calling
-    * png_check_sig(sig, n) := !png_sig_cmp(sig, 0, n).
-    */
-#define png_check_sig(sig, n) !png_sig_cmp((sig), 0, (n))
-
-   /* Allocate and initialize png_ptr struct for reading, and any other memory. */
-   PNG_EXPORTA(4, png_structp, png_create_read_struct,
-               (png_const_charp user_png_ver, png_voidp error_ptr,
-                png_error_ptr error_fn, png_error_ptr warn_fn),
-               PNG_ALLOCATED);
-
-   /* Allocate and initialize png_ptr struct for writing, and any other memory */
-   PNG_EXPORTA(5, png_structp, png_create_write_struct,
-               (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-                png_error_ptr warn_fn),
-               PNG_ALLOCATED);
-
-   PNG_EXPORT(6, png_size_t, png_get_compression_buffer_size,
-              (png_const_structp png_ptr));
-
-   PNG_EXPORT(7, void, png_set_compression_buffer_size, (png_structp png_ptr,
-              png_size_t size));
-
-   /* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp
-    * match up.
-    */
-#ifdef PNG_SETJMP_SUPPORTED
-   /* This function returns the jmp_buf built in to *png_ptr.  It must be
-    * supplied with an appropriate 'longjmp' function to use on that jmp_buf
-    * unless the default error function is overridden in which case NULL is
-    * acceptable.  The size of the jmp_buf is checked against the actual size
-    * allocated by the library - the call will return NULL on a mismatch
-    * indicating an ABI mismatch.
-    */
-   PNG_EXPORT(8, jmp_buf*, png_set_longjmp_fn, (png_structp png_ptr,
-              png_longjmp_ptr longjmp_fn, size_t jmp_buf_size));
-#  define png_jmpbuf(png_ptr) \
-      (*png_set_longjmp_fn((png_ptr), longjmp, sizeof (jmp_buf)))
-#else
-#  define png_jmpbuf(png_ptr) \
-      (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP)
-#endif
-   /* This function should be used by libpng applications in place of
-    * longjmp(png_ptr->jmpbuf, val).  If longjmp_fn() has been set, it
-    * will use it; otherwise it will call PNG_ABORT().  This function was
-    * added in libpng-1.5.0.
-    */
-   PNG_EXPORTA(9, void, png_longjmp, (png_structp png_ptr, int val),
-               PNG_NORETURN);
-
-#ifdef PNG_READ_SUPPORTED
-   /* Reset the compression stream */
-   PNG_EXPORT(10, int, png_reset_zstream, (png_structp png_ptr));
-#endif
-
-   /* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
-#ifdef PNG_USER_MEM_SUPPORTED
-   PNG_EXPORTA(11, png_structp, png_create_read_struct_2,
-               (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-                png_error_ptr warn_fn,
-                png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
-               PNG_ALLOCATED);
-   PNG_EXPORTA(12, png_structp, png_create_write_struct_2,
-               (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn,
-                png_error_ptr warn_fn,
-                png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn),
-               PNG_ALLOCATED);
-#endif
-
-   /* Write the PNG file signature. */
-   PNG_EXPORT(13, void, png_write_sig, (png_structp png_ptr));
-
-   /* Write a PNG chunk - size, type, (optional) data, CRC. */
-   PNG_EXPORT(14, void, png_write_chunk, (png_structp png_ptr, png_const_bytep
-                                          chunk_name, png_const_bytep data, png_size_t length));
-
-   /* Write the start of a PNG chunk - length and chunk name. */
-   PNG_EXPORT(15, void, png_write_chunk_start, (png_structp png_ptr,
-              png_const_bytep chunk_name, png_uint_32 length));
-
-   /* Write the data of a PNG chunk started with png_write_chunk_start(). */
-   PNG_EXPORT(16, void, png_write_chunk_data, (png_structp png_ptr,
-              png_const_bytep data, png_size_t length));
-
-   /* Finish a chunk started with png_write_chunk_start() (includes CRC). */
-   PNG_EXPORT(17, void, png_write_chunk_end, (png_structp png_ptr));
-
-   /* Allocate and initialize the info structure */
-   PNG_EXPORTA(18, png_infop, png_create_info_struct, (png_structp png_ptr),
-               PNG_ALLOCATED);
-
-   PNG_EXPORT(19, void, png_info_init_3, (png_infopp info_ptr,
-                                          png_size_t png_info_struct_size));
-
-   /* Writes all the PNG information before the image. */
-   PNG_EXPORT(20, void, png_write_info_before_PLTE,
-              (png_structp png_ptr, png_infop info_ptr));
-   PNG_EXPORT(21, void, png_write_info,
-              (png_structp png_ptr, png_infop info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   /* Read the information before the actual image data. */
-   PNG_EXPORT(22, void, png_read_info,
-              (png_structp png_ptr, png_infop info_ptr));
-#endif
-
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-   PNG_EXPORT(23, png_const_charp, png_convert_to_rfc1123,
-              (png_structp png_ptr,
-               png_const_timep ptime));
-#endif
-
-#ifdef PNG_CONVERT_tIME_SUPPORTED
-   /* Convert from a struct tm to png_time */
-   PNG_EXPORT(24, void, png_convert_from_struct_tm, (png_timep ptime,
-              PNG_CONST struct tm FAR * ttime));
-
-   /* Convert from time_t to png_time.  Uses gmtime() */
-   PNG_EXPORT(25, void, png_convert_from_time_t,
-              (png_timep ptime, time_t ttime));
-#endif /* PNG_CONVERT_tIME_SUPPORTED */
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   /* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
-   PNG_EXPORT(26, void, png_set_expand, (png_structp png_ptr));
-   PNG_EXPORT(27, void, png_set_expand_gray_1_2_4_to_8, (png_structp png_ptr));
-   PNG_EXPORT(28, void, png_set_palette_to_rgb, (png_structp png_ptr));
-   PNG_EXPORT(29, void, png_set_tRNS_to_alpha, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   /* Expand to 16-bit channels, forces conversion of palette to RGB and expansion
-    * of a tRNS chunk if present.
-    */
-   PNG_EXPORT(221, void, png_set_expand_16, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-   /* Use blue, green, red order for pixels. */
-   PNG_EXPORT(30, void, png_set_bgr, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /* Expand the grayscale to 24-bit RGB if necessary. */
-   PNG_EXPORT(31, void, png_set_gray_to_rgb, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   /* Reduce RGB to grayscale. */
-#define PNG_ERROR_ACTION_NONE  1
-#define PNG_ERROR_ACTION_WARN  2
-#define PNG_ERROR_ACTION_ERROR 3
-#define PNG_RGB_TO_GRAY_DEFAULT (-1)/*for red/green coefficients*/
-
-   PNG_FP_EXPORT(32, void, png_set_rgb_to_gray, (png_structp png_ptr,
-                 int error_action, double red, double green));
-   PNG_FIXED_EXPORT(33, void, png_set_rgb_to_gray_fixed, (png_structp png_ptr,
-                    int error_action, png_fixed_point red, png_fixed_point green));
-
-   PNG_EXPORT(34, png_byte, png_get_rgb_to_gray_status, (png_const_structp
-              png_ptr));
-#endif
-
-#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED
-   PNG_EXPORT(35, void, png_build_grayscale_palette, (int bit_depth,
-              png_colorp palette));
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-   /* How the alpha channel is interpreted - this affects how the color channels of
-    * a PNG file are returned when an alpha channel, or tRNS chunk in a palette
-    * file, is present.
-    *
-    * This has no effect on the way pixels are written into a PNG output
-    * datastream. The color samples in a PNG datastream are never premultiplied
-    * with the alpha samples.
-    *
-    * The default is to return data according to the PNG specification: the alpha
-    * channel is a linear measure of the contribution of the pixel to the
-    * corresponding composited pixel.  The gamma encoded color channels must be
-    * scaled according to the contribution and to do this it is necessary to undo
-    * the encoding, scale the color values, perform the composition and reencode
-    * the values.  This is the 'PNG' mode.
-    *
-    * The alternative is to 'associate' the alpha with the color information by
-    * storing color channel values that have been scaled by the alpha.  The
-    * advantage is that the color channels can be resampled (the image can be
-    * scaled) in this form.  The disadvantage is that normal practice is to store
-    * linear, not (gamma) encoded, values and this requires 16-bit channels for
-    * still images rather than the 8-bit channels that are just about sufficient if
-    * gamma encoding is used.  In addition all non-transparent pixel values,
-    * including completely opaque ones, must be gamma encoded to produce the final
-    * image.  This is the 'STANDARD', 'ASSOCIATED' or 'PREMULTIPLIED' mode (the
-    * latter being the two common names for associated alpha color channels.)
-    *
-    * Since it is not necessary to perform arithmetic on opaque color values so
-    * long as they are not to be resampled and are in the final color space it is
-    * possible to optimize the handling of alpha by storing the opaque pixels in
-    * the PNG format (adjusted for the output color space) while storing partially
-    * opaque pixels in the standard, linear, format.  The accuracy required for
-    * standard alpha composition is relatively low, because the pixels are
-    * isolated, therefore typically the accuracy loss in storing 8-bit linear
-    * values is acceptable.  (This is not true if the alpha channel is used to
-    * simulate transparency over large areas - use 16 bits or the PNG mode in
-    * this case!)  This is the 'OPTIMIZED' mode.  For this mode a pixel is
-    * treated as opaque only if the alpha value is equal to the maximum value.
-    *
-    * The final choice is to gamma encode the alpha channel as well.  This is
-    * broken because, in practice, no implementation that uses this choice
-    * correctly undoes the encoding before handling alpha composition.  Use this
-    * choice only if other serious errors in the software or hardware you use
-    * mandate it; the typical serious error is for dark halos to appear around
-    * opaque areas of the composited PNG image because of arithmetic overflow.
-    *
-    * The API function png_set_alpha_mode specifies which of these choices to use
-    * with an enumerated 'mode' value and the gamma of the required output:
-    */
-#define PNG_ALPHA_PNG           0 /* according to the PNG standard */
-#define PNG_ALPHA_STANDARD      1 /* according to Porter/Duff */
-#define PNG_ALPHA_ASSOCIATED    1 /* as above; this is the normal practice */
-#define PNG_ALPHA_PREMULTIPLIED 1 /* as above */
-#define PNG_ALPHA_OPTIMIZED     2 /* 'PNG' for opaque pixels, else 'STANDARD' */
-#define PNG_ALPHA_BROKEN        3 /* the alpha channel is gamma encoded */
-
-   PNG_FP_EXPORT(227, void, png_set_alpha_mode, (png_structp png_ptr, int mode,
-                 double output_gamma));
-   PNG_FIXED_EXPORT(228, void, png_set_alpha_mode_fixed, (png_structp png_ptr,
-                    int mode, png_fixed_point output_gamma));
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   /* The output_gamma value is a screen gamma in libpng terminology: it expresses
-    * how to decode the output values, not how they are encoded.  The values used
-    * correspond to the normal numbers used to describe the overall gamma of a
-    * computer display system; for example 2.2 for an sRGB conformant system.  The
-    * values are scaled by 100000 in the _fixed version of the API (so 220000 for
-    * sRGB.)
-    *
-    * The inverse of the value is always used to provide a default for the PNG file
-    * encoding if it has no gAMA chunk and if png_set_gamma() has not been called
-    * to override the PNG gamma information.
-    *
-    * When the ALPHA_OPTIMIZED mode is selected the output gamma is used to encode
-    * opaque pixels however pixels with lower alpha values are not encoded,
-    * regardless of the output gamma setting.
-    *
-    * When the standard Porter Duff handling is requested with mode 1 the output
-    * encoding is set to be linear and the output_gamma value is only relevant
-    * as a default for input data that has no gamma information.  The linear output
-    * encoding will be overridden if png_set_gamma() is called - the results may be
-    * highly unexpected!
-    *
-    * The following numbers are derived from the sRGB standard and the research
-    * behind it.  sRGB is defined to be approximated by a PNG gAMA chunk value of
-    * 0.45455 (1/2.2) for PNG.  The value implicitly includes any viewing
-    * correction required to take account of any differences in the color
-    * environment of the original scene and the intended display environment; the
-    * value expresses how to *decode* the image for display, not how the original
-    * data was *encoded*.
-    *
-    * sRGB provides a peg for the PNG standard by defining a viewing environment.
-    * sRGB itself, and earlier TV standards, actually use a more complex transform
-    * (a linear portion then a gamma 2.4 power law) than PNG can express.  (PNG is
-    * limited to simple power laws.)  By saying that an image for direct display on
-    * an sRGB conformant system should be stored with a gAMA chunk value of 45455
-    * (11.3.3.2 and 11.3.3.5 of the ISO PNG specification) the PNG specification
-    * makes it possible to derive values for other display systems and
-    * environments.
-    *
-    * The Mac value is deduced from the sRGB based on an assumption that the actual
-    * extra viewing correction used in early Mac display systems was implemented as
-    * a power 1.45 lookup table.
-    *
-    * Any system where a programmable lookup table is used or where the behavior of
-    * the final display device characteristics can be changed requires system
-    * specific code to obtain the current characteristic.  However this can be
-    * difficult and most PNG gamma correction only requires an approximate value.
-    *
-    * By default, if png_set_alpha_mode() is not called, libpng assumes that all
-    * values are unencoded, linear, values and that the output device also has a
-    * linear characteristic.  This is only very rarely correct - it is invariably
-    * better to call png_set_alpha_mode() with PNG_DEFAULT_sRGB than rely on the
-    * default if you don't know what the right answer is!
-    *
-    * The special value PNG_GAMMA_MAC_18 indicates an older Mac system (pre Mac OS
-    * 10.6) which used a correction table to implement a somewhat lower gamma on an
-    * otherwise sRGB system.
-    *
-    * Both these values are reserved (not simple gamma values) in order to allow
-    * more precise correction internally in the future.
-    *
-    * NOTE: the following values can be passed to either the fixed or floating
-    * point APIs, but the floating point API will also accept floating point
-    * values.
-    */
-#define PNG_DEFAULT_sRGB -1       /* sRGB gamma and color space */
-#define PNG_GAMMA_MAC_18 -2       /* Old Mac '1.8' gamma and color space */
-#define PNG_GAMMA_sRGB   220000   /* Television standards--matches sRGB gamma */
-#define PNG_GAMMA_LINEAR PNG_FP_1 /* Linear */
-#endif
-
-   /* The following are examples of calls to png_set_alpha_mode to achieve the
-    * required overall gamma correction and, where necessary, alpha
-    * premultiplication.
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
-    *    This is the default libpng handling of the alpha channel - it is not
-    *    pre-multiplied into the color components.  In addition the call states
-    *    that the output is for a sRGB system and causes all PNG files without gAMA
-    *    chunks to be assumed to be encoded using sRGB.
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
-    *    In this case the output is assumed to be something like an sRGB conformant
-    *    display preceeded by a power-law lookup table of power 1.45.  This is how
-    *    early Mac systems behaved.
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_GAMMA_LINEAR);
-    *    This is the classic Jim Blinn approach and will work in academic
-    *    environments where everything is done by the book.  It has the shortcoming
-    *    of assuming that input PNG data with no gamma information is linear - this
-    *    is unlikely to be correct unless the PNG files where generated locally.
-    *    Most of the time the output precision will be so low as to show
-    *    significant banding in dark areas of the image.
-    *
-    * png_set_expand_16(pp);
-    * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_DEFAULT_sRGB);
-    *    This is a somewhat more realistic Jim Blinn inspired approach.  PNG files
-    *    are assumed to have the sRGB encoding if not marked with a gamma value and
-    *    the output is always 16 bits per component.  This permits accurate scaling
-    *    and processing of the data.  If you know that your input PNG files were
-    *    generated locally you might need to replace PNG_DEFAULT_sRGB with the
-    *    correct value for your system.
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_OPTIMIZED, PNG_DEFAULT_sRGB);
-    *    If you just need to composite the PNG image onto an existing background
-    *    and if you control the code that does this you can use the optimization
-    *    setting.  In this case you just copy completely opaque pixels to the
-    *    output.  For pixels that are not completely transparent (you just skip
-    *    those) you do the composition math using png_composite or png_composite_16
-    *    below then encode the resultant 8-bit or 16-bit values to match the output
-    *    encoding.
-    *
-    * Other cases
-    *    If neither the PNG nor the standard linear encoding work for you because
-    *    of the software or hardware you use then you have a big problem.  The PNG
-    *    case will probably result in halos around the image.  The linear encoding
-    *    will probably result in a washed out, too bright, image (it's actually too
-    *    contrasty.)  Try the ALPHA_OPTIMIZED mode above - this will probably
-    *    substantially reduce the halos.  Alternatively try:
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_BROKEN, PNG_DEFAULT_sRGB);
-    *    This option will also reduce the halos, but there will be slight dark
-    *    halos round the opaque parts of the image where the background is light.
-    *    In the OPTIMIZED mode the halos will be light halos where the background
-    *    is dark.  Take your pick - the halos are unavoidable unless you can get
-    *    your hardware/software fixed!  (The OPTIMIZED approach is slightly
-    *    faster.)
-    *
-    * When the default gamma of PNG files doesn't match the output gamma.
-    *    If you have PNG files with no gamma information png_set_alpha_mode allows
-    *    you to provide a default gamma, but it also sets the ouput gamma to the
-    *    matching value.  If you know your PNG files have a gamma that doesn't
-    *    match the output you can take advantage of the fact that
-    *    png_set_alpha_mode always sets the output gamma but only sets the PNG
-    *    default if it is not already set:
-    *
-    * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB);
-    * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC);
-    *    The first call sets both the default and the output gamma values, the
-    *    second call overrides the output gamma without changing the default.  This
-    *    is easier than achieving the same effect with png_set_gamma.  You must use
-    *    PNG_ALPHA_PNG for the first call - internal checking in png_set_alpha will
-    *    fire if more than one call to png_set_alpha_mode and png_set_background is
-    *    made in the same read operation, however multiple calls with PNG_ALPHA_PNG
-    *    are ignored.
-    */
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   PNG_EXPORT(36, void, png_set_strip_alpha, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
-   PNG_EXPORT(37, void, png_set_swap_alpha, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
-    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
-   PNG_EXPORT(38, void, png_set_invert_alpha, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
-   /* Add a filler byte to 8-bit Gray or 24-bit RGB images. */
-   PNG_EXPORT(39, void, png_set_filler, (png_structp png_ptr, png_uint_32 filler,
-                                         int flags));
-   /* The values of the PNG_FILLER_ defines should NOT be changed */
-#  define PNG_FILLER_BEFORE 0
-#  define PNG_FILLER_AFTER 1
-   /* Add an alpha byte to 8-bit Gray or 24-bit RGB images. */
-   PNG_EXPORT(40, void, png_set_add_alpha,
-              (png_structp png_ptr, png_uint_32 filler,
-               int flags));
-#endif /* PNG_READ_FILLER_SUPPORTED || PNG_WRITE_FILLER_SUPPORTED */
-
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-   /* Swap bytes in 16-bit depth files. */
-   PNG_EXPORT(41, void, png_set_swap, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
-   /* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
-   PNG_EXPORT(42, void, png_set_packing, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
-    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-   /* Swap packing order of pixels in bytes. */
-   PNG_EXPORT(43, void, png_set_packswap, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
-   /* Converts files to legal bit depths. */
-   PNG_EXPORT(44, void, png_set_shift, (png_structp png_ptr, png_const_color_8p
-                                        true_bits));
-#endif
-
-#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
-    defined(PNG_WRITE_INTERLACING_SUPPORTED)
-   /* Have the code handle the interlacing.  Returns the number of passes.
-    * MUST be called before png_read_update_info or png_start_read_image,
-    * otherwise it will not have the desired effect.  Note that it is still
-    * necessary to call png_read_row or png_read_rows png_get_image_height
-    * times for each pass.
-   */
-   PNG_EXPORT(45, int, png_set_interlace_handling, (png_structp png_ptr));
-#endif
-
-#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
-   /* Invert monochrome files */
-   PNG_EXPORT(46, void, png_set_invert_mono, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-   /* Handle alpha and tRNS by replacing with a background color.  Prior to
-    * libpng-1.5.4 this API must not be called before the PNG file header has been
-    * read.  Doing so will result in unexpected behavior and possible warnings or
-    * errors if the PNG file contains a bKGD chunk.
-    */
-   PNG_FP_EXPORT(47, void, png_set_background, (png_structp png_ptr,
-                 png_const_color_16p background_color, int background_gamma_code,
-                 int need_expand, double background_gamma));
-   PNG_FIXED_EXPORT(215, void, png_set_background_fixed, (png_structp png_ptr,
-                    png_const_color_16p background_color, int background_gamma_code,
-                    int need_expand, png_fixed_point background_gamma));
-#endif
-#ifdef PNG_READ_BACKGROUND_SUPPORTED
-#  define PNG_BACKGROUND_GAMMA_UNKNOWN 0
-#  define PNG_BACKGROUND_GAMMA_SCREEN  1
-#  define PNG_BACKGROUND_GAMMA_FILE    2
-#  define PNG_BACKGROUND_GAMMA_UNIQUE  3
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-   /* Scale a 16-bit depth file down to 8-bit, accurately. */
-   PNG_EXPORT(229, void, png_set_scale_16, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-#define PNG_READ_16_TO_8 SUPPORTED /* Name prior to 1.5.4 */
-   /* Strip the second byte of information from a 16-bit depth file. */
-   PNG_EXPORT(48, void, png_set_strip_16, (png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   /* Turn on quantizing, and reduce the palette to the number of colors
-    * available.
-    */
-   PNG_EXPORT(49, void, png_set_quantize,
-              (png_structp png_ptr, png_colorp palette,
-               int num_palette, int maximum_colors, png_const_uint_16p histogram,
-               int full_quantize));
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   /* The threshold on gamma processing is configurable but hard-wired into the
-    * library.  The following is the floating point variant.
-    */
-#define PNG_GAMMA_THRESHOLD (PNG_GAMMA_THRESHOLD_FIXED*.00001)
-
-   /* Handle gamma correction. Screen_gamma=(display_exponent).
-    * NOTE: this API simply sets the screen and file gamma values. It will
-    * therefore override the value for gamma in a PNG file if it is called after
-    * the file header has been read - use with care  - call before reading the PNG
-    * file for best results!
-    *
-    * These routines accept the same gamma values as png_set_alpha_mode (described
-    * above).  The PNG_GAMMA_ defines and PNG_DEFAULT_sRGB can be passed to either
-    * API (floating point or fixed.)  Notice, however, that the 'file_gamma' value
-    * is the inverse of a 'screen gamma' value.
-    */
-   PNG_FP_EXPORT(50, void, png_set_gamma,
-                 (png_structp png_ptr, double screen_gamma,
-                  double override_file_gamma));
-   PNG_FIXED_EXPORT(208, void, png_set_gamma_fixed, (png_structp png_ptr,
-                    png_fixed_point screen_gamma, png_fixed_point override_file_gamma));
-#endif
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   /* Set how many lines between output flushes - 0 for no flushing */
-   PNG_EXPORT(51, void, png_set_flush, (png_structp png_ptr, int nrows));
-   /* Flush the current PNG output buffer */
-   PNG_EXPORT(52, void, png_write_flush, (png_structp png_ptr));
-#endif
-
-   /* Optional update palette with requested transformations */
-   PNG_EXPORT(53, void, png_start_read_image, (png_structp png_ptr));
-
-   /* Optional call to update the users info structure */
-   PNG_EXPORT(54, void, png_read_update_info,
-              (png_structp png_ptr, png_infop info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   /* Read one or more rows of image data. */
-   PNG_EXPORT(55, void, png_read_rows, (png_structp png_ptr, png_bytepp row,
-                                        png_bytepp display_row, png_uint_32 num_rows));
-#endif
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   /* Read a row of data. */
-   PNG_EXPORT(56, void, png_read_row, (png_structp png_ptr, png_bytep row,
-                                       png_bytep display_row));
-#endif
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   /* Read the whole image into memory at once. */
-   PNG_EXPORT(57, void, png_read_image, (png_structp png_ptr, png_bytepp image));
-#endif
-
-   /* Write a row of image data */
-   PNG_EXPORT(58, void, png_write_row,
-              (png_structp png_ptr, png_const_bytep row));
-
-   /* Write a few rows of image data: (*row) is not written; however, the type
-    * is declared as writeable to maintain compatibility with previous versions
-    * of libpng and to allow the 'display_row' array from read_rows to be passed
-    * unchanged to write_rows.
-    */
-   PNG_EXPORT(59, void, png_write_rows, (png_structp png_ptr, png_bytepp row,
-                                         png_uint_32 num_rows));
-
-   /* Write the image data */
-   PNG_EXPORT(60, void, png_write_image,
-              (png_structp png_ptr, png_bytepp image));
-
-   /* Write the end of the PNG file. */
-   PNG_EXPORT(61, void, png_write_end,
-              (png_structp png_ptr, png_infop info_ptr));
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-   /* Read the end of the PNG file. */
-   PNG_EXPORT(62, void, png_read_end, (png_structp png_ptr, png_infop info_ptr));
-#endif
-
-   /* Free any memory associated with the png_info_struct */
-   PNG_EXPORT(63, void, png_destroy_info_struct, (png_structp png_ptr,
-              png_infopp info_ptr_ptr));
-
-   /* Free any memory associated with the png_struct and the png_info_structs */
-   PNG_EXPORT(64, void, png_destroy_read_struct, (png_structpp png_ptr_ptr,
-              png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
-
-   /* Free any memory associated with the png_struct and the png_info_structs */
-   PNG_EXPORT(65, void, png_destroy_write_struct, (png_structpp png_ptr_ptr,
-              png_infopp info_ptr_ptr));
-
-   /* Set the libpng method of handling chunk CRC errors */
-   PNG_EXPORT(66, void, png_set_crc_action,
-              (png_structp png_ptr, int crit_action, int ancil_action));
-
-   /* Values for png_set_crc_action() say how to handle CRC errors in
-    * ancillary and critical chunks, and whether to use the data contained
-    * therein.  Note that it is impossible to "discard" data in a critical
-    * chunk.  For versions prior to 0.90, the action was always error/quit,
-    * whereas in version 0.90 and later, the action for CRC errors in ancillary
-    * chunks is warn/discard.  These values should NOT be changed.
-    *
-    *      value                       action:critical     action:ancillary
-    */
-#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
-#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
-#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
-#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
-#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
-#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
-
-   /* These functions give the user control over the scan-line filtering in
-    * libpng and the compression methods used by zlib.  These functions are
-    * mainly useful for testing, as the defaults should work with most users.
-    * Those users who are tight on memory or want faster performance at the
-    * expense of compression can modify them.  See the compression library
-    * header file (zlib.h) for an explination of the compression functions.
-    */
-
-   /* Set the filtering method(s) used by libpng.  Currently, the only valid
-    * value for "method" is 0.
-    */
-   PNG_EXPORT(67, void, png_set_filter,
-              (png_structp png_ptr, int method, int filters));
-
-   /* Flags for png_set_filter() to say which filters to use.  The flags
-    * are chosen so that they don't conflict with real filter types
-    * below, in case they are supplied instead of the #defined constants.
-    * These values should NOT be changed.
-    */
-#define PNG_NO_FILTERS     0x00
-#define PNG_FILTER_NONE    0x08
-#define PNG_FILTER_SUB     0x10
-#define PNG_FILTER_UP      0x20
-#define PNG_FILTER_AVG     0x40
-#define PNG_FILTER_PAETH   0x80
-#define PNG_ALL_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP | \
-                         PNG_FILTER_AVG | PNG_FILTER_PAETH)
-
-   /* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
-    * These defines should NOT be changed.
-    */
-#define PNG_FILTER_VALUE_NONE  0
-#define PNG_FILTER_VALUE_SUB   1
-#define PNG_FILTER_VALUE_UP    2
-#define PNG_FILTER_VALUE_AVG   3
-#define PNG_FILTER_VALUE_PAETH 4
-#define PNG_FILTER_VALUE_LAST  5
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* EXPERIMENTAL */
-   /* The "heuristic_method" is given by one of the PNG_FILTER_HEURISTIC_
-    * defines, either the default (minimum-sum-of-absolute-differences), or
-    * the experimental method (weighted-minimum-sum-of-absolute-differences).
-    *
-    * Weights are factors >= 1.0, indicating how important it is to keep the
-    * filter type consistent between rows.  Larger numbers mean the current
-    * filter is that many times as likely to be the same as the "num_weights"
-    * previous filters.  This is cumulative for each previous row with a weight.
-    * There needs to be "num_weights" values in "filter_weights", or it can be
-    * NULL if the weights aren't being specified.  Weights have no influence on
-    * the selection of the first row filter.  Well chosen weights can (in theory)
-    * improve the compression for a given image.
-    *
-    * Costs are factors >= 1.0 indicating the relative decoding costs of a
-    * filter type.  Higher costs indicate more decoding expense, and are
-    * therefore less likely to be selected over a filter with lower computational
-    * costs.  There needs to be a value in "filter_costs" for each valid filter
-    * type (given by PNG_FILTER_VALUE_LAST), or it can be NULL if you aren't
-    * setting the costs.  Costs try to improve the speed of decompression without
-    * unduly increasing the compressed image size.
-    *
-    * A negative weight or cost indicates the default value is to be used, and
-    * values in the range [0.0, 1.0) indicate the value is to remain unchanged.
-    * The default values for both weights and costs are currently 1.0, but may
-    * change if good general weighting/cost heuristics can be found.  If both
-    * the weights and costs are set to 1.0, this degenerates the WEIGHTED method
-    * to the UNWEIGHTED method, but with added encoding time/computation.
-    */
-   PNG_FP_EXPORT(68, void, png_set_filter_heuristics, (png_structp png_ptr,
-                 int heuristic_method, int num_weights, png_const_doublep filter_weights,
-                 png_const_doublep filter_costs));
-   PNG_FIXED_EXPORT(209, void, png_set_filter_heuristics_fixed,
-                    (png_structp png_ptr,
-                     int heuristic_method, int num_weights, png_const_fixed_point_p
-                     filter_weights, png_const_fixed_point_p filter_costs));
-#endif /*  PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
-
-   /* Heuristic used for row filter selection.  These defines should NOT be
-    * changed.
-    */
-#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
-#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
-#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
-#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
-
-#ifdef PNG_WRITE_SUPPORTED
-   /* Set the library compression level.  Currently, valid values range from
-    * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
-    * (0 - no compression, 9 - "maximal" compression).  Note that tests have
-    * shown that zlib compression levels 3-6 usually perform as well as level 9
-    * for PNG images, and do considerably fewer caclulations.  In the future,
-    * these values may not correspond directly to the zlib compression levels.
-    */
-   PNG_EXPORT(69, void, png_set_compression_level,
-              (png_structp png_ptr, int level));
-
-   PNG_EXPORT(70, void, png_set_compression_mem_level, (png_structp png_ptr,
-              int mem_level));
-
-   PNG_EXPORT(71, void, png_set_compression_strategy, (png_structp png_ptr,
-              int strategy));
-
-   /* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
-    * smaller value of window_bits if it can do so safely.
-    */
-   PNG_EXPORT(72, void, png_set_compression_window_bits, (png_structp png_ptr,
-              int window_bits));
-
-   PNG_EXPORT(73, void, png_set_compression_method, (png_structp png_ptr,
-              int method));
-#endif
-
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-   /* Also set zlib parameters for compressing non-IDAT chunks */
-   PNG_EXPORT(222, void, png_set_text_compression_level,
-              (png_structp png_ptr, int level));
-
-   PNG_EXPORT(223, void, png_set_text_compression_mem_level, (png_structp png_ptr,
-              int mem_level));
-
-   PNG_EXPORT(224, void, png_set_text_compression_strategy, (png_structp png_ptr,
-              int strategy));
-
-   /* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
-    * smaller value of window_bits if it can do so safely.
-    */
-   PNG_EXPORT(225, void, png_set_text_compression_window_bits, (png_structp
-              png_ptr, int window_bits));
-
-   PNG_EXPORT(226, void, png_set_text_compression_method, (png_structp png_ptr,
-              int method));
-#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */
-
-   /* These next functions are called for input/output, memory, and error
-    * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
-    * and call standard C I/O routines such as fread(), fwrite(), and
-    * fprintf().  These functions can be made to use other I/O routines
-    * at run time for those applications that need to handle I/O in a
-    * different manner by calling png_set_???_fn().  See libpng-manual.txt for
-    * more information.
-    */
-
-#ifdef PNG_STDIO_SUPPORTED
-   /* Initialize the input/output for the PNG file to the default functions. */
-   PNG_EXPORT(74, void, png_init_io, (png_structp png_ptr, png_FILE_p fp));
-#endif
-
-   /* Replace the (error and abort), and warning functions with user
-    * supplied functions.  If no messages are to be printed you must still
-    * write and use replacement functions. The replacement error_fn should
-    * still do a longjmp to the last setjmp location if you are using this
-    * method of error handling.  If error_fn or warning_fn is NULL, the
-    * default function will be used.
-    */
-
-   PNG_EXPORT(75, void, png_set_error_fn,
-              (png_structp png_ptr, png_voidp error_ptr,
-               png_error_ptr error_fn, png_error_ptr warning_fn));
-
-   /* Return the user pointer associated with the error functions */
-   PNG_EXPORT(76, png_voidp, png_get_error_ptr, (png_const_structp png_ptr));
-
-   /* Replace the default data output functions with a user supplied one(s).
-    * If buffered output is not used, then output_flush_fn can be set to NULL.
-    * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
-    * output_flush_fn will be ignored (and thus can be NULL).
-    * It is probably a mistake to use NULL for output_flush_fn if
-    * write_data_fn is not also NULL unless you have built libpng with
-    * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's
-    * default flush function, which uses the standard *FILE structure, will
-    * be used.
-    */
-   PNG_EXPORT(77, void, png_set_write_fn, (png_structp png_ptr, png_voidp io_ptr,
-                                           png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
-
-   /* Replace the default data input function with a user supplied one. */
-   PNG_EXPORT(78, void, png_set_read_fn, (png_structp png_ptr, png_voidp io_ptr,
-                                          png_rw_ptr read_data_fn));
-
-   /* Return the user pointer associated with the I/O functions */
-   PNG_EXPORT(79, png_voidp, png_get_io_ptr, (png_structp png_ptr));
-
-   PNG_EXPORT(80, void, png_set_read_status_fn, (png_structp png_ptr,
-              png_read_status_ptr read_row_fn));
-
-   PNG_EXPORT(81, void, png_set_write_status_fn, (png_structp png_ptr,
-              png_write_status_ptr write_row_fn));
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   /* Replace the default memory allocation functions with user supplied one(s). */
-   PNG_EXPORT(82, void, png_set_mem_fn, (png_structp png_ptr, png_voidp mem_ptr,
-                                         png_malloc_ptr malloc_fn, png_free_ptr free_fn));
-   /* Return the user pointer associated with the memory functions */
-   PNG_EXPORT(83, png_voidp, png_get_mem_ptr, (png_const_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   PNG_EXPORT(84, void, png_set_read_user_transform_fn, (png_structp png_ptr,
-              png_user_transform_ptr read_user_transform_fn));
-#endif
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   PNG_EXPORT(85, void, png_set_write_user_transform_fn, (png_structp png_ptr,
-              png_user_transform_ptr write_user_transform_fn));
-#endif
-
-#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
-   PNG_EXPORT(86, void, png_set_user_transform_info, (png_structp png_ptr,
-              png_voidp user_transform_ptr, int user_transform_depth,
-              int user_transform_channels));
-   /* Return the user pointer associated with the user transform functions */
-   PNG_EXPORT(87, png_voidp, png_get_user_transform_ptr,
-              (png_const_structp png_ptr));
-#endif
-
-#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED
-   /* Return information about the row currently being processed.  Note that these
-    * APIs do not fail but will return unexpected results if called outside a user
-    * transform callback.  Also note that when transforming an interlaced image the
-    * row number is the row number within the sub-image of the interlace pass, so
-    * the value will increase to the height of the sub-image (not the full image)
-    * then reset to 0 for the next pass.
-    *
-    * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to
-    * find the output pixel (x,y) given an interlaced sub-image pixel
-    * (row,col,pass).  (See below for these macros.)
-    */
-   PNG_EXPORT(217, png_uint_32, png_get_current_row_number, (png_const_structp));
-   PNG_EXPORT(218, png_byte, png_get_current_pass_number, (png_const_structp));
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-   PNG_EXPORT(88, void, png_set_read_user_chunk_fn, (png_structp png_ptr,
-              png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
-   PNG_EXPORT(89, png_voidp, png_get_user_chunk_ptr, (png_const_structp png_ptr));
-#endif
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   /* Sets the function callbacks for the push reader, and a pointer to a
-    * user-defined structure available to the callback functions.
-    */
-   PNG_EXPORT(90, void, png_set_progressive_read_fn, (png_structp png_ptr,
-              png_voidp progressive_ptr, png_progressive_info_ptr info_fn,
-              png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn));
-
-   /* Returns the user pointer associated with the push read functions */
-   PNG_EXPORT(91, png_voidp, png_get_progressive_ptr, (png_const_structp png_ptr));
-
-   /* Function to be called when data becomes available */
-   PNG_EXPORT(92, void, png_process_data,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_bytep buffer, png_size_t buffer_size));
-
-   /* A function which may be called *only* within png_process_data to stop the
-    * processing of any more data.  The function returns the number of bytes
-    * remaining, excluding any that libpng has cached internally.  A subsequent
-    * call to png_process_data must supply these bytes again.  If the argument
-    * 'save' is set to true the routine will first save all the pending data and
-    * will always return 0.
-    */
-   PNG_EXPORT(219, png_size_t, png_process_data_pause, (png_structp, int save));
-
-   /* A function which may be called *only* outside (after) a call to
-    * png_process_data.  It returns the number of bytes of data to skip in the
-    * input.  Normally it will return 0, but if it returns a non-zero value the
-    * application must skip than number of bytes of input data and pass the
-    * following data to the next call to png_process_data.
-    */
-   PNG_EXPORT(220, png_uint_32, png_process_data_skip, (png_structp));
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Function that combines rows.  'new_row' is a flag that should come from
-    * the callback and be non-NULL if anything needs to be done; the library
-    * stores its own version of the new data internally and ignores the passed
-    * in value.
-    */
-   PNG_EXPORT(93, void, png_progressive_combine_row, (png_structp png_ptr,
-              png_bytep old_row, png_const_bytep new_row));
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
-#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
-
-   PNG_EXPORTA(94, png_voidp, png_malloc,
-               (png_structp png_ptr, png_alloc_size_t size),
-               PNG_ALLOCATED);
-   /* Added at libpng version 1.4.0 */
-   PNG_EXPORTA(95, png_voidp, png_calloc,
-               (png_structp png_ptr, png_alloc_size_t size),
-               PNG_ALLOCATED);
-
-   /* Added at libpng version 1.2.4 */
-   PNG_EXPORTA(96, png_voidp, png_malloc_warn, (png_structp png_ptr,
-               png_alloc_size_t size), PNG_ALLOCATED);
-
-   /* Frees a pointer allocated by png_malloc() */
-   PNG_EXPORT(97, void, png_free, (png_structp png_ptr, png_voidp ptr));
-
-   /* Free data that was allocated internally */
-   PNG_EXPORT(98, void, png_free_data,
-              (png_structp png_ptr, png_infop info_ptr, png_uint_32 free_me, int num));
-
-   /* Reassign responsibility for freeing existing data, whether allocated
-    * by libpng or by the application */
-   PNG_EXPORT(99, void, png_data_freer,
-              (png_structp png_ptr, png_infop info_ptr, int freer, png_uint_32 mask));
-
-   /* Assignments for png_data_freer */
-#define PNG_DESTROY_WILL_FREE_DATA 1
-#define PNG_SET_WILL_FREE_DATA 1
-#define PNG_USER_WILL_FREE_DATA 2
-   /* Flags for png_ptr->free_me and info_ptr->free_me */
-#define PNG_FREE_HIST 0x0008
-#define PNG_FREE_ICCP 0x0010
-#define PNG_FREE_SPLT 0x0020
-#define PNG_FREE_ROWS 0x0040
-#define PNG_FREE_PCAL 0x0080
-#define PNG_FREE_SCAL 0x0100
-#define PNG_FREE_UNKN 0x0200
-#define PNG_FREE_LIST 0x0400
-#define PNG_FREE_PLTE 0x1000
-#define PNG_FREE_TRNS 0x2000
-#define PNG_FREE_TEXT 0x4000
-#define PNG_FREE_ALL  0x7fff
-#define PNG_FREE_MUL  0x4220 /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   PNG_EXPORTA(100, png_voidp, png_malloc_default, (png_structp png_ptr,
-               png_alloc_size_t size), PNG_ALLOCATED);
-   PNG_EXPORT(101, void, png_free_default, (png_structp png_ptr, png_voidp ptr));
-#endif
-
-#ifdef PNG_ERROR_TEXT_SUPPORTED
-   /* Fatal error in PNG image of libpng - can't continue */
-   PNG_EXPORTA(102, void, png_error,
-               (png_structp png_ptr, png_const_charp error_message),
-               PNG_NORETURN);
-
-   /* The same, but the chunk name is prepended to the error string. */
-   PNG_EXPORTA(103, void, png_chunk_error, (png_structp png_ptr,
-               png_const_charp error_message), PNG_NORETURN);
-
-#else
-   /* Fatal error in PNG image of libpng - can't continue */
-   PNG_EXPORTA(104, void, png_err, (png_structp png_ptr), PNG_NORETURN);
-#endif
-
-#ifdef PNG_WARNINGS_SUPPORTED
-   /* Non-fatal error in libpng.  Can continue, but may have a problem. */
-   PNG_EXPORT(105, void, png_warning, (png_structp png_ptr,
-                                       png_const_charp warning_message));
-
-   /* Non-fatal error in libpng, chunk name is prepended to message. */
-   PNG_EXPORT(106, void, png_chunk_warning, (png_structp png_ptr,
-              png_const_charp warning_message));
-#endif
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-   /* Benign error in libpng.  Can continue, but may have a problem.
-    * User can choose whether to handle as a fatal error or as a warning. */
-#  undef png_benign_error
-   PNG_EXPORT(107, void, png_benign_error, (png_structp png_ptr,
-              png_const_charp warning_message));
-
-   /* Same, chunk name is prepended to message. */
-#  undef png_chunk_benign_error
-   PNG_EXPORT(108, void, png_chunk_benign_error, (png_structp png_ptr,
-              png_const_charp warning_message));
-
-   PNG_EXPORT(109, void, png_set_benign_errors,
-              (png_structp png_ptr, int allowed));
-#else
-#  ifdef PNG_ALLOW_BENIGN_ERRORS
-#    define png_benign_error png_warning
-#    define png_chunk_benign_error png_chunk_warning
-#  else
-#    define png_benign_error png_error
-#    define png_chunk_benign_error png_chunk_error
-#  endif
-#endif
-
-   /* The png_set_<chunk> functions are for storing values in the png_info_struct.
-    * Similarly, the png_get_<chunk> calls are used to read values from the
-    * png_info_struct, either storing the parameters in the passed variables, or
-    * setting pointers into the png_info_struct where the data is stored.  The
-    * png_get_<chunk> functions return a non-zero value if the data was available
-    * in info_ptr, or return zero and do not change any of the parameters if the
-    * data was not available.
-    *
-    * These functions should be used instead of directly accessing png_info
-    * to avoid problems with future changes in the size and internal layout of
-    * png_info_struct.
-    */
-   /* Returns "flag" if chunk data is valid in info_ptr. */
-   PNG_EXPORT(110, png_uint_32, png_get_valid,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_uint_32 flag));
-
-   /* Returns number of bytes needed to hold a transformed row. */
-   PNG_EXPORT(111, png_size_t, png_get_rowbytes, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* Returns row_pointers, which is an array of pointers to scanlines that was
-    * returned from png_read_png().
-    */
-   PNG_EXPORT(112, png_bytepp, png_get_rows,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   /* Set row_pointers, which is an array of pointers to scanlines for use
-    * by png_write_png().
-    */
-   PNG_EXPORT(113, void, png_set_rows, (png_structp png_ptr,
-                                        png_infop info_ptr, png_bytepp row_pointers));
-#endif
-
-   /* Returns number of color channels in image. */
-   PNG_EXPORT(114, png_byte, png_get_channels,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-#ifdef PNG_EASY_ACCESS_SUPPORTED
-   /* Returns image width in pixels. */
-   PNG_EXPORT(115, png_uint_32, png_get_image_width, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image height in pixels. */
-   PNG_EXPORT(116, png_uint_32, png_get_image_height, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image bit_depth. */
-   PNG_EXPORT(117, png_byte, png_get_bit_depth,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   /* Returns image color_type. */
-   PNG_EXPORT(118, png_byte, png_get_color_type, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image filter_type. */
-   PNG_EXPORT(119, png_byte, png_get_filter_type, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image interlace_type. */
-   PNG_EXPORT(120, png_byte, png_get_interlace_type, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image compression_type. */
-   PNG_EXPORT(121, png_byte, png_get_compression_type, (png_const_structp png_ptr,
-              png_const_infop info_ptr));
-
-   /* Returns image resolution in pixels per meter, from pHYs chunk data. */
-   PNG_EXPORT(122, png_uint_32, png_get_pixels_per_meter,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_EXPORT(123, png_uint_32, png_get_x_pixels_per_meter,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_EXPORT(124, png_uint_32, png_get_y_pixels_per_meter,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   /* Returns pixel aspect ratio, computed from pHYs chunk data.  */
-   PNG_FP_EXPORT(125, float, png_get_pixel_aspect_ratio,
-                 (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_FIXED_EXPORT(210, png_fixed_point, png_get_pixel_aspect_ratio_fixed,
-                    (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   /* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
-   PNG_EXPORT(126, png_int_32, png_get_x_offset_pixels,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_EXPORT(127, png_int_32, png_get_y_offset_pixels,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_EXPORT(128, png_int_32, png_get_x_offset_microns,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-   PNG_EXPORT(129, png_int_32, png_get_y_offset_microns,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-#endif /* PNG_EASY_ACCESS_SUPPORTED */
-
-   /* Returns pointer to signature string read from PNG header */
-   PNG_EXPORT(130, png_const_bytep, png_get_signature,
-              (png_const_structp png_ptr, png_infop info_ptr));
-
-#ifdef PNG_bKGD_SUPPORTED
-   PNG_EXPORT(131, png_uint_32, png_get_bKGD,
-              (png_const_structp png_ptr, png_infop info_ptr,
-               png_color_16p *background));
-#endif
-
-#ifdef PNG_bKGD_SUPPORTED
-   PNG_EXPORT(132, void, png_set_bKGD, (png_structp png_ptr, png_infop info_ptr,
-                                        png_const_color_16p background));
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-   PNG_FP_EXPORT(133, png_uint_32, png_get_cHRM, (png_const_structp png_ptr,
-                 png_const_infop info_ptr, double *white_x, double *white_y, double *red_x,
-                 double *red_y, double *green_x, double *green_y, double *blue_x,
-                 double *blue_y));
-   PNG_FP_EXPORT(230, png_uint_32, png_get_cHRM_XYZ, (png_structp png_ptr,
-                 png_const_infop info_ptr, double *red_X, double *red_Y, double *red_Z,
-                 double *green_X, double *green_Y, double *green_Z, double *blue_X,
-                 double *blue_Y, double *blue_Z));
-#ifdef PNG_FIXED_POINT_SUPPORTED /* Otherwise not implemented */
-   PNG_FIXED_EXPORT(134, png_uint_32, png_get_cHRM_fixed,
-                    (png_const_structp png_ptr,
-                     png_const_infop info_ptr, png_fixed_point *int_white_x,
-                     png_fixed_point *int_white_y, png_fixed_point *int_red_x,
-                     png_fixed_point *int_red_y, png_fixed_point *int_green_x,
-                     png_fixed_point *int_green_y, png_fixed_point *int_blue_x,
-                     png_fixed_point *int_blue_y));
-#endif
-   PNG_FIXED_EXPORT(231, png_uint_32, png_get_cHRM_XYZ_fixed,
-                    (png_structp png_ptr, png_const_infop info_ptr,
-                     png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
-                     png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
-                     png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
-                     png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
-                     png_fixed_point *int_blue_Z));
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-   PNG_FP_EXPORT(135, void, png_set_cHRM,
-                 (png_structp png_ptr, png_infop info_ptr,
-                  double white_x, double white_y, double red_x, double red_y, double green_x,
-                  double green_y, double blue_x, double blue_y));
-   PNG_FP_EXPORT(232, void, png_set_cHRM_XYZ, (png_structp png_ptr,
-                 png_infop info_ptr, double red_X, double red_Y, double red_Z,
-                 double green_X, double green_Y, double green_Z, double blue_X,
-                 double blue_Y, double blue_Z));
-   PNG_FIXED_EXPORT(136, void, png_set_cHRM_fixed, (png_structp png_ptr,
-                    png_infop info_ptr, png_fixed_point int_white_x,
-                    png_fixed_point int_white_y, png_fixed_point int_red_x,
-                    png_fixed_point int_red_y, png_fixed_point int_green_x,
-                    png_fixed_point int_green_y, png_fixed_point int_blue_x,
-                    png_fixed_point int_blue_y));
-   PNG_FIXED_EXPORT(233, void, png_set_cHRM_XYZ_fixed, (png_structp png_ptr,
-                    png_infop info_ptr, png_fixed_point int_red_X, png_fixed_point int_red_Y,
-                    png_fixed_point int_red_Z, png_fixed_point int_green_X,
-                    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
-                    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
-                    png_fixed_point int_blue_Z));
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-   PNG_FP_EXPORT(137, png_uint_32, png_get_gAMA,
-                 (png_const_structp png_ptr, png_const_infop info_ptr,
-                  double *file_gamma));
-   PNG_FIXED_EXPORT(138, png_uint_32, png_get_gAMA_fixed,
-                    (png_const_structp png_ptr, png_const_infop info_ptr,
-                     png_fixed_point *int_file_gamma));
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-   PNG_FP_EXPORT(139, void, png_set_gAMA, (png_structp png_ptr,
-                                           png_infop info_ptr, double file_gamma));
-   PNG_FIXED_EXPORT(140, void, png_set_gAMA_fixed, (png_structp png_ptr,
-                    png_infop info_ptr, png_fixed_point int_file_gamma));
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   PNG_EXPORT(141, png_uint_32, png_get_hIST,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_uint_16p *hist));
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-   PNG_EXPORT(142, void, png_set_hIST, (png_structp png_ptr,
-                                        png_infop info_ptr, png_const_uint_16p hist));
-#endif
-
-   PNG_EXPORT(143, png_uint_32, png_get_IHDR,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_uint_32 *width, png_uint_32 *height, int *bit_depth, int *color_type,
-               int *interlace_method, int *compression_method, int *filter_method));
-
-   PNG_EXPORT(144, void, png_set_IHDR,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_uint_32 width, png_uint_32 height, int bit_depth, int color_type,
-               int interlace_method, int compression_method, int filter_method));
-
-#ifdef PNG_oFFs_SUPPORTED
-   PNG_EXPORT(145, png_uint_32, png_get_oFFs,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type));
-#endif
-
-#ifdef PNG_oFFs_SUPPORTED
-   PNG_EXPORT(146, void, png_set_oFFs,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_int_32 offset_x, png_int_32 offset_y, int unit_type));
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   PNG_EXPORT(147, png_uint_32, png_get_pCAL,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type,
-               int *nparams,
-               png_charp *units, png_charpp *params));
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-   PNG_EXPORT(148, void, png_set_pCAL, (png_structp png_ptr,
-                                        png_infop info_ptr,
-                                        png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type,
-                                        int nparams, png_const_charp units, png_charpp params));
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-   PNG_EXPORT(149, png_uint_32, png_get_pHYs,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-   PNG_EXPORT(150, void, png_set_pHYs,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_uint_32 res_x, png_uint_32 res_y, int unit_type));
-#endif
-
-   PNG_EXPORT(151, png_uint_32, png_get_PLTE,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_colorp *palette, int *num_palette));
-
-   PNG_EXPORT(152, void, png_set_PLTE,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_const_colorp palette, int num_palette));
-
-#ifdef PNG_sBIT_SUPPORTED
-   PNG_EXPORT(153, png_uint_32, png_get_sBIT,
-              (png_const_structp png_ptr, png_infop info_ptr,
-               png_color_8p *sig_bit));
-#endif
-
-#ifdef PNG_sBIT_SUPPORTED
-   PNG_EXPORT(154, void, png_set_sBIT,
-              (png_structp png_ptr, png_infop info_ptr, png_const_color_8p sig_bit));
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-   PNG_EXPORT(155, png_uint_32, png_get_sRGB, (png_const_structp png_ptr,
-              png_const_infop info_ptr, int *file_srgb_intent));
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-   PNG_EXPORT(156, void, png_set_sRGB,
-              (png_structp png_ptr, png_infop info_ptr, int srgb_intent));
-   PNG_EXPORT(157, void, png_set_sRGB_gAMA_and_cHRM, (png_structp png_ptr,
-              png_infop info_ptr, int srgb_intent));
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   PNG_EXPORT(158, png_uint_32, png_get_iCCP,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_charpp name, int *compression_type, png_bytepp profile,
-               png_uint_32 *proflen));
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-   PNG_EXPORT(159, void, png_set_iCCP,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_const_charp name, int compression_type, png_const_bytep profile,
-               png_uint_32 proflen));
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   PNG_EXPORT(160, png_uint_32, png_get_sPLT,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_sPLT_tpp entries));
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-   PNG_EXPORT(161, void, png_set_sPLT,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_const_sPLT_tp entries, int nentries));
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-   /* png_get_text also returns the number of text chunks in *num_text */
-   PNG_EXPORT(162, png_uint_32, png_get_text,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               png_textp *text_ptr, int *num_text));
-#endif
-
-   /* Note while png_set_text() will accept a structure whose text,
-    * language, and  translated keywords are NULL pointers, the structure
-    * returned by png_get_text will always contain regular
-    * zero-terminated C strings.  They might be empty strings but
-    * they will never be NULL pointers.
-    */
-
-#ifdef PNG_TEXT_SUPPORTED
-   PNG_EXPORT(163, void, png_set_text,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_const_textp text_ptr, int num_text));
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-   PNG_EXPORT(164, png_uint_32, png_get_tIME,
-              (png_const_structp png_ptr, png_infop info_ptr, png_timep *mod_time));
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-   PNG_EXPORT(165, void, png_set_tIME,
-              (png_structp png_ptr, png_infop info_ptr, png_const_timep mod_time));
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-   PNG_EXPORT(166, png_uint_32, png_get_tRNS,
-              (png_const_structp png_ptr, png_infop info_ptr,
-               png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color));
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-   PNG_EXPORT(167, void, png_set_tRNS,
-              (png_structp png_ptr, png_infop info_ptr,
-               png_const_bytep trans_alpha, int num_trans,
-               png_const_color_16p trans_color));
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-   PNG_FP_EXPORT(168, png_uint_32, png_get_sCAL,
-                 (png_const_structp png_ptr, png_const_infop info_ptr,
-                  int *unit, double *width, double *height));
-#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-   /* NOTE: this API is currently implemented using floating point arithmetic,
-    * consequently it can only be used on systems with floating point support.
-    * In any case the range of values supported by png_fixed_point is small and it
-    * is highly recommended that png_get_sCAL_s be used instead.
-    */
-   PNG_FIXED_EXPORT(214, png_uint_32, png_get_sCAL_fixed,
-                    (png_structp png_ptr, png_const_infop info_ptr, int *unit,
-                     png_fixed_point *width,
-                     png_fixed_point *height));
-#endif
-   PNG_EXPORT(169, png_uint_32, png_get_sCAL_s,
-              (png_const_structp png_ptr, png_const_infop info_ptr,
-               int *unit, png_charpp swidth, png_charpp sheight));
-
-   PNG_FP_EXPORT(170, void, png_set_sCAL,
-                 (png_structp png_ptr, png_infop info_ptr,
-                  int unit, double width, double height));
-   PNG_FIXED_EXPORT(213, void, png_set_sCAL_fixed, (png_structp png_ptr,
-                    png_infop info_ptr, int unit, png_fixed_point width,
-                    png_fixed_point height));
-   PNG_EXPORT(171, void, png_set_sCAL_s,
-              (png_structp png_ptr, png_infop info_ptr,
-               int unit, png_const_charp swidth, png_const_charp sheight));
-#endif /* PNG_sCAL_SUPPORTED */
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   /* Provide a list of chunks and how they are to be handled, if the built-in
-      handling or default unknown chunk handling is not desired.  Any chunks not
-      listed will be handled in the default manner.  The IHDR and IEND chunks
-      must not be listed.  Because this turns off the default handling for chunks
-      that would otherwise be recognized the behavior of libpng transformations may
-      well become incorrect!
-         keep = 0: PNG_HANDLE_CHUNK_AS_DEFAULT: follow default behavior
-              = 1: PNG_HANDLE_CHUNK_NEVER:      do not keep
-              = 2: PNG_HANDLE_CHUNK_IF_SAFE:    keep only if safe-to-copy
-              = 3: PNG_HANDLE_CHUNK_ALWAYS:     keep even if unsafe-to-copy
-   */
-   PNG_EXPORT(172, void, png_set_keep_unknown_chunks,
-              (png_structp png_ptr, int keep,
-               png_const_bytep chunk_list, int num_chunks));
-
-   /* The handling code is returned; the result is therefore true (non-zero) if
-    * special handling is required, false for the default handling.
-    */
-   PNG_EXPORT(173, int, png_handle_as_unknown, (png_structp png_ptr,
-              png_const_bytep chunk_name));
-#endif
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   PNG_EXPORT(174, void, png_set_unknown_chunks, (png_structp png_ptr,
-              png_infop info_ptr, png_const_unknown_chunkp unknowns,
-              int num_unknowns));
-   PNG_EXPORT(175, void, png_set_unknown_chunk_location,
-              (png_structp png_ptr, png_infop info_ptr, int chunk, int location));
-   PNG_EXPORT(176, int, png_get_unknown_chunks, (png_const_structp png_ptr,
-              png_const_infop info_ptr, png_unknown_chunkpp entries));
-#endif
-
-   /* Png_free_data() will turn off the "valid" flag for anything it frees.
-    * If you need to turn it off for a chunk that your application has freed,
-    * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK);
-    */
-   PNG_EXPORT(177, void, png_set_invalid,
-              (png_structp png_ptr, png_infop info_ptr, int mask));
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-   /* The "params" pointer is currently not used and is for future expansion. */
-   PNG_EXPORT(178, void, png_read_png, (png_structp png_ptr, png_infop info_ptr,
-                                        int transforms, png_voidp params));
-   PNG_EXPORT(179, void, png_write_png, (png_structp png_ptr, png_infop info_ptr,
-                                         int transforms, png_voidp params));
-#endif
-
-   PNG_EXPORT(180, png_const_charp, png_get_copyright,
-              (png_const_structp png_ptr));
-   PNG_EXPORT(181, png_const_charp, png_get_header_ver,
-              (png_const_structp png_ptr));
-   PNG_EXPORT(182, png_const_charp, png_get_header_version,
-              (png_const_structp png_ptr));
-   PNG_EXPORT(183, png_const_charp, png_get_libpng_ver,
-              (png_const_structp png_ptr));
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   PNG_EXPORT(184, png_uint_32, png_permit_mng_features, (png_structp png_ptr,
-              png_uint_32 mng_features_permitted));
-#endif
-
-   /* For use in png_set_keep_unknown, added to version 1.2.6 */
-#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
-#define PNG_HANDLE_CHUNK_NEVER        1
-#define PNG_HANDLE_CHUNK_IF_SAFE      2
-#define PNG_HANDLE_CHUNK_ALWAYS       3
-
-   /* Strip the prepended error numbers ("#nnn ") from error and warning
-    * messages before passing them to the error or warning handler.
-    */
-#ifdef PNG_ERROR_NUMBERS_SUPPORTED
-   PNG_EXPORT(185, void, png_set_strip_error_numbers,
-              (png_structp png_ptr,
-               png_uint_32 strip_mode));
-#endif
-
-   /* Added in libpng-1.2.6 */
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   PNG_EXPORT(186, void, png_set_user_limits, (png_structp png_ptr,
-              png_uint_32 user_width_max, png_uint_32 user_height_max));
-   PNG_EXPORT(187, png_uint_32, png_get_user_width_max,
-              (png_const_structp png_ptr));
-   PNG_EXPORT(188, png_uint_32, png_get_user_height_max,
-              (png_const_structp png_ptr));
-   /* Added in libpng-1.4.0 */
-   PNG_EXPORT(189, void, png_set_chunk_cache_max, (png_structp png_ptr,
-              png_uint_32 user_chunk_cache_max));
-   PNG_EXPORT(190, png_uint_32, png_get_chunk_cache_max,
-              (png_const_structp png_ptr));
-   /* Added in libpng-1.4.1 */
-   PNG_EXPORT(191, void, png_set_chunk_malloc_max, (png_structp png_ptr,
-              png_alloc_size_t user_chunk_cache_max));
-   PNG_EXPORT(192, png_alloc_size_t, png_get_chunk_malloc_max,
-              (png_const_structp png_ptr));
-#endif
-
-#if defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-   PNG_EXPORT(193, png_uint_32, png_get_pixels_per_inch,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   PNG_EXPORT(194, png_uint_32, png_get_x_pixels_per_inch,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   PNG_EXPORT(195, png_uint_32, png_get_y_pixels_per_inch,
-              (png_const_structp png_ptr, png_const_infop info_ptr));
-
-   PNG_FP_EXPORT(196, float, png_get_x_offset_inches,
-                 (png_const_structp png_ptr, png_const_infop info_ptr));
-#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
-   PNG_FIXED_EXPORT(211, png_fixed_point, png_get_x_offset_inches_fixed,
-                    (png_structp png_ptr, png_const_infop info_ptr));
-#endif
-
-   PNG_FP_EXPORT(197, float, png_get_y_offset_inches, (png_const_structp png_ptr,
-                 png_const_infop info_ptr));
-#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */
-   PNG_FIXED_EXPORT(212, png_fixed_point, png_get_y_offset_inches_fixed,
-                    (png_structp png_ptr, png_const_infop info_ptr));
-#endif
-
-#  ifdef PNG_pHYs_SUPPORTED
-   PNG_EXPORT(198, png_uint_32, png_get_pHYs_dpi, (png_const_structp png_ptr,
-              png_const_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y,
-              int *unit_type));
-#  endif /* PNG_pHYs_SUPPORTED */
-#endif  /* PNG_INCH_CONVERSIONS_SUPPORTED */
-
-   /* Added in libpng-1.4.0 */
-#ifdef PNG_IO_STATE_SUPPORTED
-   PNG_EXPORT(199, png_uint_32, png_get_io_state, (png_structp png_ptr));
-
-   PNG_EXPORTA(200, png_const_bytep, png_get_io_chunk_name,
-               (png_structp png_ptr), PNG_DEPRECATED);
-   PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type,
-              (png_const_structp png_ptr));
-
-   /* The flags returned by png_get_io_state() are the following: */
-#  define PNG_IO_NONE        0x0000   /* no I/O at this moment */
-#  define PNG_IO_READING     0x0001   /* currently reading */
-#  define PNG_IO_WRITING     0x0002   /* currently writing */
-#  define PNG_IO_SIGNATURE   0x0010   /* currently at the file signature */
-#  define PNG_IO_CHUNK_HDR   0x0020   /* currently at the chunk header */
-#  define PNG_IO_CHUNK_DATA  0x0040   /* currently at the chunk data */
-#  define PNG_IO_CHUNK_CRC   0x0080   /* currently at the chunk crc */
-#  define PNG_IO_MASK_OP     0x000f   /* current operation: reading/writing */
-#  define PNG_IO_MASK_LOC    0x00f0   /* current location: sig/hdr/data/crc */
-#endif /* ?PNG_IO_STATE_SUPPORTED */
-
-   /* Interlace support.  The following macros are always defined so that if
-    * libpng interlace handling is turned off the macros may be used to handle
-    * interlaced images within the application.
-    */
-#define PNG_INTERLACE_ADAM7_PASSES 7
-
-   /* Two macros to return the first row and first column of the original,
-    * full, image which appears in a given pass.  'pass' is in the range 0
-    * to 6 and the result is in the range 0 to 7.
-    */
-#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7)
-#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7)
-
-   /* A macro to return the offset between pixels in the output row for a pair of
-    * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that
-    * follows.  Note that ROW_OFFSET is the offset from one row to the next whereas
-    * COL_OFFSET is from one column to the next, within a row.
-    */
-#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8)
-#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1))
-
-   /* Two macros to help evaluate the number of rows or columns in each
-    * pass.  This is expressed as a shift - effectively log2 of the number or
-    * rows or columns in each 8x8 tile of the original image.
-    */
-#define PNG_PASS_ROW_SHIFT(pass) ((pass)>2?(8-(pass))>>1:3)
-#define PNG_PASS_COL_SHIFT(pass) ((pass)>1?(7-(pass))>>1:3)
-
-   /* Hence two macros to determine the number of rows or columns in a given
-    * pass of an image given its height or width.  In fact these macros may
-    * return non-zero even though the sub-image is empty, because the other
-    * dimension may be empty for a small image.
-    */
-#define PNG_PASS_ROWS(height, pass) (((height)+(((1<<PNG_PASS_ROW_SHIFT(pass))\
-   -1)-PNG_PASS_START_ROW(pass)))>>PNG_PASS_ROW_SHIFT(pass))
-#define PNG_PASS_COLS(width, pass) (((width)+(((1<<PNG_PASS_COL_SHIFT(pass))\
-   -1)-PNG_PASS_START_COL(pass)))>>PNG_PASS_COL_SHIFT(pass))
-
-   /* For the reader row callbacks (both progressive and sequential) it is
-    * necessary to find the row in the output image given a row in an interlaced
-    * image, so two more macros:
-    */
-#define PNG_ROW_FROM_PASS_ROW(yIn, pass) \
-   (((yIn)<<PNG_PASS_ROW_SHIFT(pass))+PNG_PASS_START_ROW(pass))
-#define PNG_COL_FROM_PASS_COL(xIn, pass) \
-   (((xIn)<<PNG_PASS_COL_SHIFT(pass))+PNG_PASS_START_COL(pass))
-
-   /* Two macros which return a boolean (0 or 1) saying whether the given row
-    * or column is in a particular pass.  These use a common utility macro that
-    * returns a mask for a given pass - the offset 'off' selects the row or
-    * column version.  The mask has the appropriate bit set for each column in
-    * the tile.
-    */
-#define PNG_PASS_MASK(pass,off) ( \
-   ((0x110145AF>>(((7-(off))-(pass))<<2)) & 0xF) | \
-   ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0))
-
-#define PNG_ROW_IN_INTERLACE_PASS(y, pass) \
-   ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1)
-#define PNG_COL_IN_INTERLACE_PASS(x, pass) \
-   ((PNG_PASS_MASK(pass,1) >> ((x)&7)) & 1)
-
-#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
-   /* With these routines we avoid an integer divide, which will be slower on
-    * most machines.  However, it does take more operations than the corresponding
-    * divide method, so it may be slower on a few RISC systems.  There are two
-    * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
-    *
-    * Note that the rounding factors are NOT supposed to be the same!  128 and
-    * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
-    * standard method.
-    *
-    * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
-    */
-
-   /* fg and bg should be in `gamma 1.0' space; alpha is the opacity */
-
-#  define png_composite(composite, fg, alpha, bg)         \
-     { png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \
-           * (png_uint_16)(alpha)                         \
-           + (png_uint_16)(bg)*(png_uint_16)(255          \
-           - (png_uint_16)(alpha)) + 128);                \
-       (composite) = (png_byte)((temp + (temp >> 8)) >> 8); }
-
-#  define png_composite_16(composite, fg, alpha, bg)       \
-     { png_uint_32 temp = (png_uint_32)((png_uint_32)(fg)  \
-           * (png_uint_32)(alpha)                          \
-           + (png_uint_32)(bg)*(65535                      \
-           - (png_uint_32)(alpha)) + 32768);               \
-       (composite) = (png_uint_16)((temp + (temp >> 16)) >> 16); }
-
-#else  /* Standard method using integer division */
-
-#  define png_composite(composite, fg, alpha, bg)                          \
-     (composite) = (png_byte)(((png_uint_16)(fg) * (png_uint_16)(alpha) +  \
-     (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) +       \
-     127) / 255)
-
-#  define png_composite_16(composite, fg, alpha, bg)                         \
-     (composite) = (png_uint_16)(((png_uint_32)(fg) * (png_uint_32)(alpha) + \
-     (png_uint_32)(bg)*(png_uint_32)(65535 - (png_uint_32)(alpha)) +         \
-     32767) / 65535)
-#endif /* PNG_READ_COMPOSITE_NODIV_SUPPORTED */
-
-#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
-   PNG_EXPORT(201, png_uint_32, png_get_uint_32, (png_const_bytep buf));
-   PNG_EXPORT(202, png_uint_16, png_get_uint_16, (png_const_bytep buf));
-   PNG_EXPORT(203, png_int_32, png_get_int_32, (png_const_bytep buf));
-#endif
-
-   PNG_EXPORT(204, png_uint_32, png_get_uint_31, (png_structp png_ptr,
-              png_const_bytep buf));
-   /* No png_get_int_16 -- may be added if there's a real need for it. */
-
-   /* Place a 32-bit number into a buffer in PNG byte order (big-endian). */
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-   PNG_EXPORT(205, void, png_save_uint_32, (png_bytep buf, png_uint_32 i));
-#endif
-#ifdef PNG_SAVE_INT_32_SUPPORTED
-   PNG_EXPORT(206, void, png_save_int_32, (png_bytep buf, png_int_32 i));
-#endif
-
-   /* Place a 16-bit number into a buffer in PNG byte order.
-    * The parameter is declared unsigned int, not png_uint_16,
-    * just to avoid potential problems on pre-ANSI C compilers.
-    */
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-   PNG_EXPORT(207, void, png_save_uint_16, (png_bytep buf, unsigned int i));
-   /* No png_save_int_16 -- may be added if there's a real need for it. */
-#endif
-
-#ifdef PNG_USE_READ_MACROS
-   /* Inline macros to do direct reads of bytes from the input buffer.
-    * The png_get_int_32() routine assumes we are using two's complement
-    * format for negative values, which is almost certainly true.
-    */
-#  define png_get_uint_32(buf) \
-     (((png_uint_32)(*(buf)) << 24) + \
-      ((png_uint_32)(*((buf) + 1)) << 16) + \
-      ((png_uint_32)(*((buf) + 2)) << 8) + \
-      ((png_uint_32)(*((buf) + 3))))
-
-   /* From libpng-1.4.0 until 1.4.4, the png_get_uint_16 macro (but not the
-    * function) incorrectly returned a value of type png_uint_32.
-    */
-#  define png_get_uint_16(buf) \
-     ((png_uint_16) \
-      (((unsigned int)(*(buf)) << 8) + \
-       ((unsigned int)(*((buf) + 1)))))
-
-#  define png_get_int_32(buf) \
-     ((png_int_32)((*(buf) & 0x80) \
-      ? -((png_int_32)((png_get_uint_32(buf) ^ 0xffffffffL) + 1)) \
-      : (png_int_32)png_get_uint_32(buf)))
-#endif
-
-#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
-    defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
-   PNG_EXPORT(234, void, png_set_check_for_invalid_index, (png_structp png_ptr,
-              int allowed));
-#endif
-
-   /* Maintainer: Put new public prototypes here ^, in libpng.3, and project
-    * defs
-    */
-
-   /* The last ordinal number (this is the *last* one already used; the next
-    * one to use is one more than this.)  Maintainer, remember to add an entry to
-    * scripts/symbols.def as well.
-    */
-#ifdef PNG_EXPORT_LAST_ORDINAL
-   PNG_EXPORT_LAST_ORDINAL(234);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* PNG_VERSION_INFO_ONLY */
-/* Do not put anything past this line */
diff --git a/reg-io/png/lpng1510/pngconf.h b/reg-io/png/lpng1510/pngconf.h
deleted file mode 100644
index d89e1206..00000000
--- a/reg-io/png/lpng1510/pngconf.h
+++ /dev/null
@@ -1,594 +0,0 @@
-
-/* pngconf.h - machine configurable file for libpng
- *
- * libpng version 1.5.10 - March 29, 2012
- *
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- */
-
-/* Any machine specific code is near the front of this file, so if you
- * are configuring libpng for a machine, you may want to read the section
- * starting here down to where it starts to typedef png_color, png_text,
- * and png_info.
- */
-
-#pragma once
-
-#ifndef PNG_BUILDING_SYMBOL_TABLE
-/* PNG_NO_LIMITS_H may be used to turn off the use of the standard C
- * definition file for  machine specific limits, this may impact the
- * correctness of the definitons below (see uses of INT_MAX).
- */
-#  ifndef PNG_NO_LIMITS_H
-#    include <limits.h>
-#  endif
-
-/* For the memory copy APIs (i.e. the standard definitions of these),
- * because this file defines png_memcpy and so on the base APIs must
- * be defined here.
- */
-#  ifdef BSD
-#    include <strings.h>
-#  else
-#    include <string.h>
-#  endif
-
-/* For png_FILE_p - this provides the standard definition of a
- * FILE
- */
-#  ifdef PNG_STDIO_SUPPORTED
-#    include <stdio.h>
-#  endif
-#endif
-
-/* This controls optimization of the reading of 16 and 32 bit values
- * from PNG files.  It can be set on a per-app-file basis - it
- * just changes whether a macro is used when the function is called.
- * The library builder sets the default; if read functions are not
- * built into the library the macro implementation is forced on.
- */
-#ifndef PNG_READ_INT_FUNCTIONS_SUPPORTED
-#  define PNG_USE_READ_MACROS
-#endif
-#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS)
-#  if PNG_DEFAULT_READ_MACROS
-#    define PNG_USE_READ_MACROS
-#  endif
-#endif
-
-/* COMPILER SPECIFIC OPTIONS.
- *
- * These options are provided so that a variety of difficult compilers
- * can be used.  Some are fixed at build time (e.g. PNG_API_RULE
- * below) but still have compiler specific implementations, others
- * may be changed on a per-file basis when compiling against libpng.
- */
-
-/* The PNGARG macro protects us against machines that don't have function
- * prototypes (ie K&R style headers).  If your compiler does not handle
- * function prototypes, define this macro and use the included ansi2knr.
- * I've always been able to use _NO_PROTO as the indicator, but you may
- * need to drag the empty declaration out in front of here, or change the
- * ifdef to suit your own needs.
- */
-#ifndef PNGARG
-
-#  ifdef OF /* zlib prototype munger */
-#    define PNGARG(arglist) OF(arglist)
-#  else
-
-#    ifdef _NO_PROTO
-#      define PNGARG(arglist) ()
-#    else
-#      define PNGARG(arglist) arglist
-#    endif /* _NO_PROTO */
-
-#  endif /* OF */
-
-#endif /* PNGARG */
-
-/* Function calling conventions.
- * =============================
- * Normally it is not necessary to specify to the compiler how to call
- * a function - it just does it - however on x86 systems derived from
- * Microsoft and Borland C compilers ('IBM PC', 'DOS', 'Windows' systems
- * and some others) there are multiple ways to call a function and the
- * default can be changed on the compiler command line.  For this reason
- * libpng specifies the calling convention of every exported function and
- * every function called via a user supplied function pointer.  This is
- * done in this file by defining the following macros:
- *
- * PNGAPI    Calling convention for exported functions.
- * PNGCBAPI  Calling convention for user provided (callback) functions.
- * PNGCAPI   Calling convention used by the ANSI-C library (required
- *           for longjmp callbacks and sometimes used internally to
- *           specify the calling convention for zlib).
- *
- * These macros should never be overridden.  If it is necessary to
- * change calling convention in a private build this can be done
- * by setting PNG_API_RULE (which defaults to 0) to one of the values
- * below to select the correct 'API' variants.
- *
- * PNG_API_RULE=0 Use PNGCAPI - the 'C' calling convention - throughout.
- *                This is correct in every known environment.
- * PNG_API_RULE=1 Use the operating system convention for PNGAPI and
- *                the 'C' calling convention (from PNGCAPI) for
- *                callbacks (PNGCBAPI).  This is no longer required
- *                in any known environment - if it has to be used
- *                please post an explanation of the problem to the
- *                libpng mailing list.
- *
- * These cases only differ if the operating system does not use the C
- * calling convention, at present this just means the above cases
- * (x86 DOS/Windows sytems) and, even then, this does not apply to
- * Cygwin running on those systems.
- *
- * Note that the value must be defined in pnglibconf.h so that what
- * the application uses to call the library matches the conventions
- * set when building the library.
- */
-
-/* Symbol export
- * =============
- * When building a shared library it is almost always necessary to tell
- * the compiler which symbols to export.  The png.h macro 'PNG_EXPORT'
- * is used to mark the symbols.  On some systems these symbols can be
- * extracted at link time and need no special processing by the compiler,
- * on other systems the symbols are flagged by the compiler and just
- * the declaration requires a special tag applied (unfortunately) in a
- * compiler dependent way.  Some systems can do either.
- *
- * A small number of older systems also require a symbol from a DLL to
- * be flagged to the program that calls it.  This is a problem because
- * we do not know in the header file included by application code that
- * the symbol will come from a shared library, as opposed to a statically
- * linked one.  For this reason the application must tell us by setting
- * the magic flag PNG_USE_DLL to turn on the special processing before
- * it includes png.h.
- *
- * Four additional macros are used to make this happen:
- *
- * PNG_IMPEXP The magic (if any) to cause a symbol to be exported from
- *            the build or imported if PNG_USE_DLL is set - compiler
- *            and system specific.
- *
- * PNG_EXPORT_TYPE(type) A macro that pre or appends PNG_IMPEXP to
- *                       'type', compiler specific.
- *
- * PNG_DLL_EXPORT Set to the magic to use during a libpng build to
- *                make a symbol exported from the DLL.  Not used in the
- *                public header files; see pngpriv.h for how it is used
- *                in the libpng build.
- *
- * PNG_DLL_IMPORT Set to the magic to force the libpng symbols to come
- *                from a DLL - used to define PNG_IMPEXP when
- *                PNG_USE_DLL is set.
- */
-
-/* System specific discovery.
- * ==========================
- * This code is used at build time to find PNG_IMPEXP, the API settings
- * and PNG_EXPORT_TYPE(), it may also set a macro to indicate the DLL
- * import processing is possible.  On Windows/x86 systems it also sets
- * compiler-specific macros to the values required to change the calling
- * conventions of the various functions.
- */
-#if ( defined(_Windows) || defined(_WINDOWS) || defined(WIN32) ||\
-      defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) ) &&\
-    ( defined(_X86_) || defined(_X64_) || defined(_M_IX86) ||\
-      defined(_M_X64) || defined(_M_IA64) )
-/* Windows system (DOS doesn't support DLLs) running on x86/x64.  Includes
- * builds under Cygwin or MinGW.  Also includes Watcom builds but these need
- * special treatment because they are not compatible with GCC or Visual C
- * because of different calling conventions.
- */
-#  if PNG_API_RULE == 2
-/* If this line results in an error, either because __watcall is not
- * understood or because of a redefine just below you cannot use *this*
- * build of the library with the compiler you are using.  *This* build was
- * build using Watcom and applications must also be built using Watcom!
- */
-#    define PNGCAPI __watcall
-#  endif
-
-#  if defined(__GNUC__) || (defined (_MSC_VER) && (_MSC_VER >= 800))
-#    define PNGCAPI __cdecl
-#    if PNG_API_RULE == 1
-#      define PNGAPI __stdcall
-#    endif
-#  else
-/* An older compiler, or one not detected (erroneously) above,
- * if necessary override on the command line to get the correct
- * variants for the compiler.
- */
-#    ifndef PNGCAPI
-#      define PNGCAPI _cdecl
-#    endif
-#    if PNG_API_RULE == 1 && !defined(PNGAPI)
-#      define PNGAPI _stdcall
-#    endif
-#  endif /* compiler/api */
-/* NOTE: PNGCBAPI always defaults to PNGCAPI. */
-
-#  if defined(PNGAPI) && !defined(PNG_USER_PRIVATEBUILD)
-ERROR:
-PNG_USER_PRIVATEBUILD must be defined if PNGAPI is changed
-#  endif
-
-#  if (defined(_MSC_VER) && _MSC_VER < 800) ||\
-      (defined(__BORLANDC__) && __BORLANDC__ < 0x500)
-/* older Borland and MSC
- * compilers used '__export' and required this to be after
- * the type.
- */
-#    ifndef PNG_EXPORT_TYPE
-#      define PNG_EXPORT_TYPE(type) type PNG_IMPEXP
-#    endif
-#    define PNG_DLL_EXPORT __export
-#  else /* newer compiler */
-#    define PNG_DLL_EXPORT __declspec(dllexport)
-#    ifndef PNG_DLL_IMPORT
-#      define PNG_DLL_IMPORT __declspec(dllimport)
-#    endif
-#  endif /* compiler */
-
-#else /* !Windows/x86 */
-#  if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
-#    define PNGAPI _System
-#  else /* !Windows/x86 && !OS/2 */
-/* Use the defaults, or define PNG*API on the command line (but
- * this will have to be done for every compile!)
- */
-#  endif /* other system, !OS/2 */
-#endif /* !Windows/x86 */
-
-/* Now do all the defaulting . */
-#ifndef PNGCAPI
-#  define PNGCAPI
-#endif
-#ifndef PNGCBAPI
-#  define PNGCBAPI PNGCAPI
-#endif
-#ifndef PNGAPI
-#  define PNGAPI PNGCAPI
-#endif
-
-/* PNG_IMPEXP may be set on the compilation system command line or (if not set)
- * then in an internal header file when building the library, otherwise (when
- * using the library) it is set here.
- */
-#ifndef PNG_IMPEXP
-#  if defined(PNG_USE_DLL) && defined(PNG_DLL_IMPORT)
-/* This forces use of a DLL, disallowing static linking */
-#    define PNG_IMPEXP PNG_DLL_IMPORT
-#  endif
-
-#  ifndef PNG_IMPEXP
-#    define PNG_IMPEXP
-#  endif
-#endif
-
-/* In 1.5.2 the definition of PNG_FUNCTION has been changed to always treat
- * 'attributes' as a storage class - the attributes go at the start of the
- * function definition, and attributes are always appended regardless of the
- * compiler.  This considerably simplifies these macros but may cause problems
- * if any compilers both need function attributes and fail to handle them as
- * a storage class (this is unlikely.)
- */
-#ifndef PNG_FUNCTION
-#  define PNG_FUNCTION(type, name, args, attributes) attributes type name args
-#endif
-
-#ifndef PNG_EXPORT_TYPE
-#  define PNG_EXPORT_TYPE(type) PNG_IMPEXP type
-#endif
-
-/* The ordinal value is only relevant when preprocessing png.h for symbol
- * table entries, so we discard it here.  See the .dfn files in the
- * scripts directory.
- */
-#ifndef PNG_EXPORTA
-
-#  define PNG_EXPORTA(ordinal, type, name, args, attributes)\
-      PNG_FUNCTION(PNG_EXPORT_TYPE(type),(PNGAPI name),PNGARG(args), \
-        extern attributes)
-#endif
-
-/* ANSI-C (C90) does not permit a macro to be invoked with an empty argument,
- * so make something non-empty to satisfy the requirement:
- */
-#define PNG_EMPTY /*empty list*/
-
-#define PNG_EXPORT(ordinal, type, name, args)\
-   PNG_EXPORTA(ordinal, type, name, args, PNG_EMPTY)
-
-/* Use PNG_REMOVED to comment out a removed interface. */
-#ifndef PNG_REMOVED
-#  define PNG_REMOVED(ordinal, type, name, args, attributes)
-#endif
-
-#ifndef PNG_CALLBACK
-#  define PNG_CALLBACK(type, name, args) type (PNGCBAPI name) PNGARG(args)
-#endif
-
-/* Support for compiler specific function attributes.  These are used
- * so that where compiler support is available incorrect use of API
- * functions in png.h will generate compiler warnings.
- *
- * Added at libpng-1.2.41.
- */
-
-#ifndef PNG_NO_PEDANTIC_WARNINGS
-#  ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED
-#    define PNG_PEDANTIC_WARNINGS_SUPPORTED
-#  endif
-#endif
-
-#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED
-/* Support for compiler specific function attributes.  These are used
- * so that where compiler support is available incorrect use of API
- * functions in png.h will generate compiler warnings.  Added at libpng
- * version 1.2.41.
- */
-#  if defined(__GNUC__)
-#    ifndef PNG_USE_RESULT
-#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
-#    endif
-#    ifndef PNG_NORETURN
-#      define PNG_NORETURN   __attribute__((__noreturn__))
-#    endif
-#    ifndef PNG_ALLOCATED
-#      define PNG_ALLOCATED  __attribute__((__malloc__))
-#    endif
-#    ifndef PNG_DEPRECATED
-#      define PNG_DEPRECATED __attribute__((__deprecated__))
-#    endif
-#    ifndef PNG_PRIVATE
-#      if 0 /* Doesn't work so we use deprecated instead*/
-#        define PNG_PRIVATE \
-          __attribute__((warning("This function is not exported by libpng.")))
-#      else
-#        define PNG_PRIVATE \
-          __attribute__((__deprecated__))
-#      endif
-#    endif
-#  endif /* __GNUC__ */
-
-#  if defined(_MSC_VER)  && (_MSC_VER >= 1300)
-#    ifndef PNG_USE_RESULT
-#      define PNG_USE_RESULT /* not supported */
-#    endif
-#    ifndef PNG_NORETURN
-#      define PNG_NORETURN __declspec(noreturn)
-#    endif
-#    ifndef PNG_ALLOCATED
-#      if (_MSC_VER >= 1400)
-#        define PNG_ALLOCATED __declspec(restrict)
-#      endif
-#    endif
-#    ifndef PNG_DEPRECATED
-#      define PNG_DEPRECATED __declspec(deprecated)
-#    endif
-#    ifndef PNG_PRIVATE
-#      define PNG_PRIVATE __declspec(deprecated)
-#    endif
-#  endif /* _MSC_VER */
-#endif /* PNG_PEDANTIC_WARNINGS */
-
-#ifndef PNG_DEPRECATED
-#  define PNG_DEPRECATED  /* Use of this function is deprecated */
-#endif
-#ifndef PNG_USE_RESULT
-#  define PNG_USE_RESULT  /* The result of this function must be checked */
-#endif
-#ifndef PNG_NORETURN
-#  define PNG_NORETURN    /* This function does not return */
-#endif
-#ifndef PNG_ALLOCATED
-#  define PNG_ALLOCATED   /* The result of the function is new memory */
-#endif
-#ifndef PNG_PRIVATE
-#  define PNG_PRIVATE     /* This is a private libpng function */
-#endif
-#ifndef PNG_FP_EXPORT     /* A floating point API. */
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-#     define PNG_FP_EXPORT(ordinal, type, name, args)\
-         PNG_EXPORT(ordinal, type, name, args)
-#  else                   /* No floating point APIs */
-#     define PNG_FP_EXPORT(ordinal, type, name, args)
-#  endif
-#endif
-#ifndef PNG_FIXED_EXPORT  /* A fixed point API. */
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-#     define PNG_FIXED_EXPORT(ordinal, type, name, args)\
-         PNG_EXPORT(ordinal, type, name, args)
-#  else                   /* No fixed point APIs */
-#     define PNG_FIXED_EXPORT(ordinal, type, name, args)
-#  endif
-#endif
-
-/* The following uses const char * instead of char * for error
- * and warning message functions, so some compilers won't complain.
- * If you do not want to use const, define PNG_NO_CONST here.
- *
- * This should not change how the APIs are called, so it can be done
- * on a per-file basis in the application.
- */
-#ifndef PNG_CONST
-#  ifndef PNG_NO_CONST
-#    define PNG_CONST const
-#  else
-#    define PNG_CONST
-#  endif
-#endif
-
-/* Some typedefs to get us started.  These should be safe on most of the
- * common platforms.  The typedefs should be at least as large as the
- * numbers suggest (a png_uint_32 must be at least 32 bits long), but they
- * don't have to be exactly that size.  Some compilers dislike passing
- * unsigned shorts as function parameters, so you may be better off using
- * unsigned int for png_uint_16.
- */
-
-#if defined(INT_MAX) && (INT_MAX > 0x7ffffffeL)
-typedef unsigned int png_uint_32;
-typedef int png_int_32;
-#else
-typedef unsigned long png_uint_32;
-typedef long png_int_32;
-#endif
-typedef unsigned short png_uint_16;
-typedef short png_int_16;
-typedef unsigned char png_byte;
-
-#ifdef PNG_NO_SIZE_T
-typedef unsigned int png_size_t;
-#else
-typedef size_t png_size_t;
-#endif
-#define png_sizeof(x) (sizeof (x))
-
-/* The following is needed for medium model support.  It cannot be in the
- * pngpriv.h header.  Needs modification for other compilers besides
- * MSC.  Model independent support declares all arrays and pointers to be
- * large using the far keyword.  The zlib version used must also support
- * model independent data.  As of version zlib 1.0.4, the necessary changes
- * have been made in zlib.  The USE_FAR_KEYWORD define triggers other
- * changes that are needed. (Tim Wegner)
- */
-
-/* Separate compiler dependencies (problem here is that zlib.h always
- * defines FAR. (SJT)
- */
-#ifdef __BORLANDC__
-#  if defined(__LARGE__) || defined(__HUGE__) || defined(__COMPACT__)
-#    define LDATA 1
-#  else
-#    define LDATA 0
-#  endif
-/* GRR:  why is Cygwin in here?  Cygwin is not Borland C... */
-#  if !defined(__WIN32__) && !defined(__FLAT__) && !defined(__CYGWIN__)
-#    define PNG_MAX_MALLOC_64K /* only used in build */
-#    if (LDATA != 1)
-#      ifndef FAR
-#        define FAR __far
-#      endif
-#      define USE_FAR_KEYWORD
-#    endif   /* LDATA != 1 */
-/* Possibly useful for moving data out of default segment.
- * Uncomment it if you want. Could also define FARDATA as
- * const if your compiler supports it. (SJT)
-#        define FARDATA FAR
- */
-#  endif  /* __WIN32__, __FLAT__, __CYGWIN__ */
-#endif   /* __BORLANDC__ */
-
-
-/* Suggest testing for specific compiler first before testing for
- * FAR.  The Watcom compiler defines both __MEDIUM__ and M_I86MM,
- * making reliance oncertain keywords suspect. (SJT)
- */
-
-/* MSC Medium model */
-#ifdef FAR
-#  ifdef M_I86MM
-#    define USE_FAR_KEYWORD
-#    define FARDATA FAR
-#    include <dos.h>
-#  endif
-#endif
-
-/* SJT: default case */
-#ifndef FAR
-#  define FAR
-#endif
-
-/* At this point FAR is always defined */
-#ifndef FARDATA
-#  define FARDATA
-#endif
-
-/* Typedef for floating-point numbers that are converted
- * to fixed-point with a multiple of 100,000, e.g., gamma
- */
-typedef png_int_32 png_fixed_point;
-
-/* Add typedefs for pointers */
-typedef void                      FAR * png_voidp;
-typedef PNG_CONST void            FAR * png_const_voidp;
-typedef png_byte                  FAR * png_bytep;
-typedef PNG_CONST png_byte        FAR * png_const_bytep;
-typedef png_uint_32               FAR * png_uint_32p;
-typedef PNG_CONST png_uint_32     FAR * png_const_uint_32p;
-typedef png_int_32                FAR * png_int_32p;
-typedef PNG_CONST png_int_32      FAR * png_const_int_32p;
-typedef png_uint_16               FAR * png_uint_16p;
-typedef PNG_CONST png_uint_16     FAR * png_const_uint_16p;
-typedef png_int_16                FAR * png_int_16p;
-typedef PNG_CONST png_int_16      FAR * png_const_int_16p;
-typedef char                      FAR * png_charp;
-typedef PNG_CONST char            FAR * png_const_charp;
-typedef png_fixed_point           FAR * png_fixed_point_p;
-typedef PNG_CONST png_fixed_point FAR * png_const_fixed_point_p;
-typedef png_size_t                FAR * png_size_tp;
-typedef PNG_CONST png_size_t      FAR * png_const_size_tp;
-
-#ifdef PNG_STDIO_SUPPORTED
-typedef FILE            * png_FILE_p;
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-typedef double           FAR * png_doublep;
-typedef PNG_CONST double FAR * png_const_doublep;
-#endif
-
-/* Pointers to pointers; i.e. arrays */
-typedef png_byte        FAR * FAR * png_bytepp;
-typedef png_uint_32     FAR * FAR * png_uint_32pp;
-typedef png_int_32      FAR * FAR * png_int_32pp;
-typedef png_uint_16     FAR * FAR * png_uint_16pp;
-typedef png_int_16      FAR * FAR * png_int_16pp;
-typedef PNG_CONST char  FAR * FAR * png_const_charpp;
-typedef char            FAR * FAR * png_charpp;
-typedef png_fixed_point FAR * FAR * png_fixed_point_pp;
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-typedef double          FAR * FAR * png_doublepp;
-#endif
-
-/* Pointers to pointers to pointers; i.e., pointer to array */
-typedef char            FAR * FAR * FAR * png_charppp;
-
-/* png_alloc_size_t is guaranteed to be no smaller than png_size_t,
- * and no smaller than png_uint_32.  Casts from png_size_t or png_uint_32
- * to png_alloc_size_t are not necessary; in fact, it is recommended
- * not to use them at all so that the compiler can complain when something
- * turns out to be problematic.
- * Casts in the other direction (from png_alloc_size_t to png_size_t or
- * png_uint_32) should be explicitly applied; however, we do not expect
- * to encounter practical situations that require such conversions.
- */
-#if defined(__TURBOC__) && !defined(__FLAT__)
-typedef unsigned long png_alloc_size_t;
-#else
-#  if defined(_MSC_VER) && defined(MAXSEG_64K)
-typedef unsigned long    png_alloc_size_t;
-#  else
-/* This is an attempt to detect an old Windows system where (int) is
- * actually 16 bits, in that case png_malloc must have an argument with a
- * bigger size to accomodate the requirements of the library.
- */
-#    if (defined(_Windows) || defined(_WINDOWS) || defined(_WINDOWS_)) && \
-        (!defined(INT_MAX) || INT_MAX <= 0x7ffffffeL)
-typedef DWORD         png_alloc_size_t;
-#    else
-typedef png_size_t    png_alloc_size_t;
-#    endif
-#  endif
-#endif
diff --git a/reg-io/png/lpng1510/pngget.c b/reg-io/png/lpng1510/pngget.c
deleted file mode 100644
index 1889e990..00000000
--- a/reg-io/png/lpng1510/pngget.c
+++ /dev/null
@@ -1,1124 +0,0 @@
-
-/* pngget.c - retrieval of values from info struct
- *
- * Last changed in libpng 1.5.7 [December 15, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-png_uint_32 PNGAPI
-png_get_valid(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_uint_32 flag)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return(info_ptr->valid & flag);
-
-   return(0);
-}
-
-png_size_t PNGAPI
-png_get_rowbytes(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return(info_ptr->rowbytes);
-
-   return(0);
-}
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-png_bytepp PNGAPI
-png_get_rows(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return(info_ptr->row_pointers);
-
-   return(0);
-}
-#endif
-
-#ifdef PNG_EASY_ACCESS_SUPPORTED
-/* Easy access to info, added in libpng-0.99 */
-png_uint_32 PNGAPI
-png_get_image_width(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->width;
-
-   return (0);
-}
-
-png_uint_32 PNGAPI
-png_get_image_height(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->height;
-
-   return (0);
-}
-
-png_byte PNGAPI
-png_get_bit_depth(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->bit_depth;
-
-   return (0);
-}
-
-png_byte PNGAPI
-png_get_color_type(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->color_type;
-
-   return (0);
-}
-
-png_byte PNGAPI
-png_get_filter_type(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->filter_type;
-
-   return (0);
-}
-
-png_byte PNGAPI
-png_get_interlace_type(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->interlace_type;
-
-   return (0);
-}
-
-png_byte PNGAPI
-png_get_compression_type(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return info_ptr->compression_type;
-
-   return (0);
-}
-
-png_uint_32 PNGAPI
-png_get_x_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-      {
-         png_debug1(1, "in %s retrieval function",
-             "png_get_x_pixels_per_meter");
-
-         if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
-            return (info_ptr->x_pixels_per_unit);
-      }
-#endif
-
-   return (0);
-}
-
-png_uint_32 PNGAPI
-png_get_y_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      png_debug1(1, "in %s retrieval function",
-          "png_get_y_pixels_per_meter");
-
-      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER)
-         return (info_ptr->y_pixels_per_unit);
-   }
-#endif
-
-   return (0);
-}
-
-png_uint_32 PNGAPI
-png_get_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_pHYs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_pixels_per_meter");
-
-      if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER &&
-          info_ptr->x_pixels_per_unit == info_ptr->y_pixels_per_unit)
-         return (info_ptr->x_pixels_per_unit);
-   }
-#endif
-
-   return (0);
-}
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_pixel_aspect_ratio(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_READ_pHYs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_aspect_ratio");
-
-      if (info_ptr->x_pixels_per_unit != 0)
-         return ((float)((float)info_ptr->y_pixels_per_unit
-             /(float)info_ptr->x_pixels_per_unit));
-   }
-#endif
-
-   return ((float)0.0);
-}
-#endif
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-png_fixed_point PNGAPI
-png_get_pixel_aspect_ratio_fixed(png_const_structp png_ptr,
-    png_const_infop info_ptr)
-{
-#ifdef PNG_READ_pHYs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)
-       && info_ptr->x_pixels_per_unit > 0 && info_ptr->y_pixels_per_unit > 0
-       && info_ptr->x_pixels_per_unit <= PNG_UINT_31_MAX
-       && info_ptr->y_pixels_per_unit <= PNG_UINT_31_MAX)
-   {
-      png_fixed_point res;
-
-      png_debug1(1, "in %s retrieval function", "png_get_aspect_ratio_fixed");
-
-      /* The following casts work because a PNG 4 byte integer only has a valid
-       * range of 0..2^31-1; otherwise the cast might overflow.
-       */
-      if (png_muldiv(&res, (png_int_32)info_ptr->y_pixels_per_unit, PNG_FP_1,
-          (png_int_32)info_ptr->x_pixels_per_unit))
-         return res;
-   }
-#endif
-
-   return 0;
-}
-#endif
-
-png_int_32 PNGAPI
-png_get_x_offset_microns(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_x_offset_microns");
-
-      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
-         return (info_ptr->x_offset);
-   }
-#endif
-
-   return (0);
-}
-
-png_int_32 PNGAPI
-png_get_y_offset_microns(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_y_offset_microns");
-
-      if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER)
-         return (info_ptr->y_offset);
-   }
-#endif
-
-   return (0);
-}
-
-png_int_32 PNGAPI
-png_get_x_offset_pixels(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_x_offset_pixels");
-
-      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
-         return (info_ptr->x_offset);
-   }
-#endif
-
-   return (0);
-}
-
-png_int_32 PNGAPI
-png_get_y_offset_pixels(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-#ifdef PNG_oFFs_SUPPORTED
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
-   {
-      png_debug1(1, "in %s retrieval function", "png_get_y_offset_pixels");
-
-      if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL)
-         return (info_ptr->y_offset);
-   }
-#endif
-
-   return (0);
-}
-
-#ifdef PNG_INCH_CONVERSIONS_SUPPORTED
-static png_uint_32
-ppi_from_ppm(png_uint_32 ppm)
-{
-#if 0
-   /* The conversion is *(2.54/100), in binary (32 digits):
-    * .00000110100000001001110101001001
-    */
-   png_uint_32 t1001, t1101;
-   ppm >>= 1;                  /* .1 */
-   t1001 = ppm + (ppm >> 3);   /* .1001 */
-   t1101 = t1001 + (ppm >> 1); /* .1101 */
-   ppm >>= 20;                 /* .000000000000000000001 */
-   t1101 += t1101 >> 15;       /* .1101000000000001101 */
-   t1001 >>= 11;               /* .000000000001001 */
-   t1001 += t1001 >> 12;       /* .000000000001001000000001001 */
-   ppm += t1001;               /* .000000000001001000001001001 */
-   ppm += t1101;               /* .110100000001001110101001001 */
-   return (ppm + 16) >> 5;/* .00000110100000001001110101001001 */
-#else
-   /* The argument is a PNG unsigned integer, so it is not permitted
-    * to be bigger than 2^31.
-    */
-   png_fixed_point result;
-   if (ppm <= PNG_UINT_31_MAX && png_muldiv(&result, (png_int_32)ppm, 127,
-       5000))
-      return result;
-
-   /* Overflow. */
-   return 0;
-#endif
-}
-
-png_uint_32 PNGAPI
-png_get_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   return ppi_from_ppm(png_get_pixels_per_meter(png_ptr, info_ptr));
-}
-
-png_uint_32 PNGAPI
-png_get_x_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   return ppi_from_ppm(png_get_x_pixels_per_meter(png_ptr, info_ptr));
-}
-
-png_uint_32 PNGAPI
-png_get_y_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   return ppi_from_ppm(png_get_y_pixels_per_meter(png_ptr, info_ptr));
-}
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-static png_fixed_point
-png_fixed_inches_from_microns(png_structp png_ptr, png_int_32 microns)
-{
-   /* Convert from metres * 1,000,000 to inches * 100,000, meters to
-    * inches is simply *(100/2.54), so we want *(10/2.54) == 500/127.
-    * Notice that this can overflow - a warning is output and 0 is
-    * returned.
-    */
-   return png_muldiv_warn(png_ptr, microns, 500, 127);
-}
-
-png_fixed_point PNGAPI
-png_get_x_offset_inches_fixed(png_structp png_ptr,
-    png_const_infop info_ptr)
-{
-   return png_fixed_inches_from_microns(png_ptr,
-       png_get_x_offset_microns(png_ptr, info_ptr));
-}
-#endif
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-png_fixed_point PNGAPI
-png_get_y_offset_inches_fixed(png_structp png_ptr,
-    png_const_infop info_ptr)
-{
-   return png_fixed_inches_from_microns(png_ptr,
-       png_get_y_offset_microns(png_ptr, info_ptr));
-}
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_x_offset_inches(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   /* To avoid the overflow do the conversion directly in floating
-    * point.
-    */
-   return (float)(png_get_x_offset_microns(png_ptr, info_ptr) * .00003937);
-}
-#endif
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-float PNGAPI
-png_get_y_offset_inches(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   /* To avoid the overflow do the conversion directly in floating
-    * point.
-    */
-   return (float)(png_get_y_offset_microns(png_ptr, info_ptr) * .00003937);
-}
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pHYs_dpi(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
-{
-   png_uint_32 retval = 0;
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      png_debug1(1, "in %s retrieval function", "pHYs");
-
-      if (res_x != NULL)
-      {
-         *res_x = info_ptr->x_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (res_y != NULL)
-      {
-         *res_y = info_ptr->y_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (unit_type != NULL)
-      {
-         *unit_type = (int)info_ptr->phys_unit_type;
-         retval |= PNG_INFO_pHYs;
-
-         if (*unit_type == 1)
-         {
-            if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50);
-            if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50);
-         }
-      }
-   }
-
-   return (retval);
-}
-#endif /* PNG_pHYs_SUPPORTED */
-#endif  /* PNG_INCH_CONVERSIONS_SUPPORTED */
-
-/* png_get_channels really belongs in here, too, but it's been around longer */
-
-#endif  /* PNG_EASY_ACCESS_SUPPORTED */
-
-png_byte PNGAPI
-png_get_channels(png_const_structp png_ptr, png_const_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return(info_ptr->channels);
-
-   return (0);
-}
-
-png_const_bytep PNGAPI
-png_get_signature(png_const_structp png_ptr, png_infop info_ptr)
-{
-   if (png_ptr != NULL && info_ptr != NULL)
-      return(info_ptr->signature);
-
-   return (NULL);
-}
-
-#ifdef PNG_bKGD_SUPPORTED
-png_uint_32 PNGAPI
-png_get_bKGD(png_const_structp png_ptr, png_infop info_ptr,
-   png_color_16p *background)
-{
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD)
-       && background != NULL)
-   {
-      png_debug1(1, "in %s retrieval function", "bKGD");
-
-      *background = &(info_ptr->background);
-      return (PNG_INFO_bKGD);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-/* The XYZ APIs were added in 1.5.5 to take advantage of the code added at the
- * same time to correct the rgb grayscale coefficient defaults obtained from the
- * cHRM chunk in 1.5.4
- */
-png_uint_32 PNGFAPI
-png_get_cHRM_XYZ_fixed(png_structp png_ptr, png_const_infop info_ptr,
-    png_fixed_point *int_red_X, png_fixed_point *int_red_Y,
-    png_fixed_point *int_red_Z, png_fixed_point *int_green_X,
-    png_fixed_point *int_green_Y, png_fixed_point *int_green_Z,
-    png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y,
-    png_fixed_point *int_blue_Z)
-{
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
-   {
-      png_xy xy;
-      png_XYZ XYZ;
-
-      png_debug1(1, "in %s retrieval function", "cHRM_XYZ");
-
-      xy.whitex = info_ptr->x_white;
-      xy.whitey = info_ptr->y_white;
-      xy.redx = info_ptr->x_red;
-      xy.redy = info_ptr->y_red;
-      xy.greenx = info_ptr->x_green;
-      xy.greeny = info_ptr->y_green;
-      xy.bluex = info_ptr->x_blue;
-      xy.bluey = info_ptr->y_blue;
-
-      /* The *_checked function handles error reporting, so just return 0 if
-       * there is a failure here.
-       */
-      if (png_XYZ_from_xy_checked(png_ptr, &XYZ, xy))
-      {
-         if (int_red_X != NULL)
-            *int_red_X = XYZ.redX;
-         if (int_red_Y != NULL)
-            *int_red_Y = XYZ.redY;
-         if (int_red_Z != NULL)
-            *int_red_Z = XYZ.redZ;
-         if (int_green_X != NULL)
-            *int_green_X = XYZ.greenX;
-         if (int_green_Y != NULL)
-            *int_green_Y = XYZ.greenY;
-         if (int_green_Z != NULL)
-            *int_green_Z = XYZ.greenZ;
-         if (int_blue_X != NULL)
-            *int_blue_X = XYZ.blueX;
-         if (int_blue_Y != NULL)
-            *int_blue_Y = XYZ.blueY;
-         if (int_blue_Z != NULL)
-            *int_blue_Z = XYZ.blueZ;
-
-         return (PNG_INFO_cHRM);
-      }
-   }
-
-   return (0);
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_cHRM(png_const_structp png_ptr, png_const_infop info_ptr,
-    double *white_x, double *white_y, double *red_x, double *red_y,
-    double *green_x, double *green_y, double *blue_x, double *blue_y)
-{
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
-   {
-      png_debug1(1, "in %s retrieval function", "cHRM");
-
-      if (white_x != NULL)
-         *white_x = png_float(png_ptr, info_ptr->x_white, "cHRM white X");
-      if (white_y != NULL)
-         *white_y = png_float(png_ptr, info_ptr->y_white, "cHRM white Y");
-      if (red_x != NULL)
-         *red_x = png_float(png_ptr, info_ptr->x_red, "cHRM red X");
-      if (red_y != NULL)
-         *red_y = png_float(png_ptr, info_ptr->y_red, "cHRM red Y");
-      if (green_x != NULL)
-         *green_x = png_float(png_ptr, info_ptr->x_green, "cHRM green X");
-      if (green_y != NULL)
-         *green_y = png_float(png_ptr, info_ptr->y_green, "cHRM green Y");
-      if (blue_x != NULL)
-         *blue_x = png_float(png_ptr, info_ptr->x_blue, "cHRM blue X");
-      if (blue_y != NULL)
-         *blue_y = png_float(png_ptr, info_ptr->y_blue, "cHRM blue Y");
-      return (PNG_INFO_cHRM);
-   }
-
-   return (0);
-}
-
-png_uint_32 PNGAPI
-png_get_cHRM_XYZ(png_structp png_ptr, png_const_infop info_ptr,
-   double *red_X, double *red_Y, double *red_Z, double *green_X,
-   double *green_Y, double *green_Z, double *blue_X, double *blue_Y,
-   double *blue_Z)
-{
-   png_XYZ XYZ;
-
-   if (png_get_cHRM_XYZ_fixed(png_ptr, info_ptr,
-      &XYZ.redX, &XYZ.redY, &XYZ.redZ, &XYZ.greenX, &XYZ.greenY, &XYZ.greenZ,
-      &XYZ.blueX, &XYZ.blueY, &XYZ.blueZ) & PNG_INFO_cHRM)
-   {
-      if (red_X != NULL)
-         *red_X = png_float(png_ptr, XYZ.redX, "cHRM red X");
-      if (red_Y != NULL)
-         *red_Y = png_float(png_ptr, XYZ.redY, "cHRM red Y");
-      if (red_Z != NULL)
-         *red_Z = png_float(png_ptr, XYZ.redZ, "cHRM red Z");
-      if (green_X != NULL)
-         *green_X = png_float(png_ptr, XYZ.greenX, "cHRM green X");
-      if (green_Y != NULL)
-         *green_Y = png_float(png_ptr, XYZ.greenY, "cHRM green Y");
-      if (green_Z != NULL)
-         *green_Z = png_float(png_ptr, XYZ.greenZ, "cHRM green Z");
-      if (blue_X != NULL)
-         *blue_X = png_float(png_ptr, XYZ.blueX, "cHRM blue X");
-      if (blue_Y != NULL)
-         *blue_Y = png_float(png_ptr, XYZ.blueY, "cHRM blue Y");
-      if (blue_Z != NULL)
-         *blue_Z = png_float(png_ptr, XYZ.blueZ, "cHRM blue Z");
-      return (PNG_INFO_cHRM);
-   }
-
-   return (0);
-}
-#  endif
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_cHRM_fixed(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
-    png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
-    png_fixed_point *blue_x, png_fixed_point *blue_y)
-{
-   png_debug1(1, "in %s retrieval function", "cHRM");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
-   {
-      if (white_x != NULL)
-         *white_x = info_ptr->x_white;
-      if (white_y != NULL)
-         *white_y = info_ptr->y_white;
-      if (red_x != NULL)
-         *red_x = info_ptr->x_red;
-      if (red_y != NULL)
-         *red_y = info_ptr->y_red;
-      if (green_x != NULL)
-         *green_x = info_ptr->x_green;
-      if (green_y != NULL)
-         *green_y = info_ptr->y_green;
-      if (blue_x != NULL)
-         *blue_x = info_ptr->x_blue;
-      if (blue_y != NULL)
-         *blue_y = info_ptr->y_blue;
-      return (PNG_INFO_cHRM);
-   }
-
-   return (0);
-}
-#  endif
-#endif
-
-#ifdef PNG_gAMA_SUPPORTED
-png_uint_32 PNGFAPI
-png_get_gAMA_fixed(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_fixed_point *file_gamma)
-{
-   png_debug1(1, "in %s retrieval function", "gAMA");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
-       && file_gamma != NULL)
-   {
-      *file_gamma = info_ptr->gamma;
-      return (PNG_INFO_gAMA);
-   }
-
-   return (0);
-}
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_gAMA(png_const_structp png_ptr, png_const_infop info_ptr,
-    double *file_gamma)
-{
-   png_fixed_point igamma;
-   png_uint_32 ok = png_get_gAMA_fixed(png_ptr, info_ptr, &igamma);
-
-   if (ok)
-      *file_gamma = png_float(png_ptr, igamma, "png_get_gAMA");
-
-   return ok;
-}
-
-#  endif
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sRGB(png_const_structp png_ptr, png_const_infop info_ptr,
-    int *file_srgb_intent)
-{
-   png_debug1(1, "in %s retrieval function", "sRGB");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB)
-       && file_srgb_intent != NULL)
-   {
-      *file_srgb_intent = (int)info_ptr->srgb_intent;
-      return (PNG_INFO_sRGB);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_iCCP_SUPPORTED
-png_uint_32 PNGAPI
-png_get_iCCP(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_charpp name, int *compression_type,
-    png_bytepp profile, png_uint_32 *proflen)
-{
-   png_debug1(1, "in %s retrieval function", "iCCP");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP)
-       && name != NULL && compression_type != NULL && profile != NULL &&
-		 proflen != NULL)
-   {
-      *name = info_ptr->iccp_name;
-      *profile = info_ptr->iccp_profile;
-      /* Compression_type is a dummy so the API won't have to change
-       * if we introduce multiple compression types later.
-       */
-      *proflen = info_ptr->iccp_proflen;
-      *compression_type = info_ptr->iccp_compression;
-      return (PNG_INFO_iCCP);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sPLT(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_sPLT_tpp spalettes)
-{
-   if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
-   {
-      *spalettes = info_ptr->splt_palettes;
-      return ((png_uint_32)info_ptr->splt_palettes_num);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-png_uint_32 PNGAPI
-png_get_hIST(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_uint_16p *hist)
-{
-   png_debug1(1, "in %s retrieval function", "hIST");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST)
-       && hist != NULL)
-   {
-      *hist = info_ptr->hist;
-      return (PNG_INFO_hIST);
-   }
-
-   return (0);
-}
-#endif
-
-png_uint_32 PNGAPI
-png_get_IHDR(png_structp png_ptr, png_infop info_ptr,
-    png_uint_32 *width, png_uint_32 *height, int *bit_depth,
-    int *color_type, int *interlace_type, int *compression_type,
-    int *filter_type)
-
-{
-   png_debug1(1, "in %s retrieval function", "IHDR");
-
-   if (png_ptr == NULL || info_ptr == NULL || width == NULL ||
-       height == NULL || bit_depth == NULL || color_type == NULL)
-      return (0);
-
-   *width = info_ptr->width;
-   *height = info_ptr->height;
-   *bit_depth = info_ptr->bit_depth;
-   *color_type = info_ptr->color_type;
-
-   if (compression_type != NULL)
-      *compression_type = info_ptr->compression_type;
-
-   if (filter_type != NULL)
-      *filter_type = info_ptr->filter_type;
-
-   if (interlace_type != NULL)
-      *interlace_type = info_ptr->interlace_type;
-
-   /* This is redundant if we can be sure that the info_ptr values were all
-    * assigned in png_set_IHDR().  We do the check anyhow in case an
-    * application has ignored our advice not to mess with the members
-    * of info_ptr directly.
-    */
-   png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height,
-       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
-       info_ptr->compression_type, info_ptr->filter_type);
-
-   return (1);
-}
-
-#ifdef PNG_oFFs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_oFFs(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)
-{
-   png_debug1(1, "in %s retrieval function", "oFFs");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)
-       && offset_x != NULL && offset_y != NULL && unit_type != NULL)
-   {
-      *offset_x = info_ptr->x_offset;
-      *offset_y = info_ptr->y_offset;
-      *unit_type = (int)info_ptr->offset_unit_type;
-      return (PNG_INFO_oFFs);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pCAL(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams,
-    png_charp *units, png_charpp *params)
-{
-   png_debug1(1, "in %s retrieval function", "pCAL");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL)
-       && purpose != NULL && X0 != NULL && X1 != NULL && type != NULL &&
-       nparams != NULL && units != NULL && params != NULL)
-   {
-      *purpose = info_ptr->pcal_purpose;
-      *X0 = info_ptr->pcal_X0;
-      *X1 = info_ptr->pcal_X1;
-      *type = (int)info_ptr->pcal_type;
-      *nparams = (int)info_ptr->pcal_nparams;
-      *units = info_ptr->pcal_units;
-      *params = info_ptr->pcal_params;
-      return (PNG_INFO_pCAL);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-#    ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sCAL_fixed(png_structp png_ptr, png_const_infop info_ptr,
-    int *unit, png_fixed_point *width, png_fixed_point *height)
-{
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL))
-   {
-      *unit = info_ptr->scal_unit;
-      /*TODO: make this work without FP support */
-      *width = png_fixed(png_ptr, atof(info_ptr->scal_s_width), "sCAL width");
-      *height = png_fixed(png_ptr, atof(info_ptr->scal_s_height),
-         "sCAL height");
-      return (PNG_INFO_sCAL);
-   }
-
-   return(0);
-}
-#    endif /* FLOATING_ARITHMETIC */
-#  endif /* FIXED_POINT */
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sCAL(png_const_structp png_ptr, png_const_infop info_ptr,
-    int *unit, double *width, double *height)
-{
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL))
-   {
-      *unit = info_ptr->scal_unit;
-      *width = atof(info_ptr->scal_s_width);
-      *height = atof(info_ptr->scal_s_height);
-      return (PNG_INFO_sCAL);
-   }
-
-   return(0);
-}
-#  endif /* FLOATING POINT */
-png_uint_32 PNGAPI
-png_get_sCAL_s(png_const_structp png_ptr, png_const_infop info_ptr,
-    int *unit, png_charpp width, png_charpp height)
-{
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_sCAL))
-   {
-      *unit = info_ptr->scal_unit;
-      *width = info_ptr->scal_s_width;
-      *height = info_ptr->scal_s_height;
-      return (PNG_INFO_sCAL);
-   }
-
-   return(0);
-}
-#endif /* sCAL */
-
-#ifdef PNG_pHYs_SUPPORTED
-png_uint_32 PNGAPI
-png_get_pHYs(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
-{
-   png_uint_32 retval = 0;
-
-   png_debug1(1, "in %s retrieval function", "pHYs");
-
-   if (png_ptr != NULL && info_ptr != NULL &&
-       (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      if (res_x != NULL)
-      {
-         *res_x = info_ptr->x_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (res_y != NULL)
-      {
-         *res_y = info_ptr->y_pixels_per_unit;
-         retval |= PNG_INFO_pHYs;
-      }
-
-      if (unit_type != NULL)
-      {
-         *unit_type = (int)info_ptr->phys_unit_type;
-         retval |= PNG_INFO_pHYs;
-      }
-   }
-
-   return (retval);
-}
-#endif /* pHYs */
-
-png_uint_32 PNGAPI
-png_get_PLTE(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_colorp *palette, int *num_palette)
-{
-   png_debug1(1, "in %s retrieval function", "PLTE");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_PLTE)
-       && palette != NULL)
-   {
-      *palette = info_ptr->palette;
-      *num_palette = info_ptr->num_palette;
-      png_debug1(3, "num_palette = %d", *num_palette);
-      return (PNG_INFO_PLTE);
-   }
-
-   return (0);
-}
-
-#ifdef PNG_sBIT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_sBIT(png_const_structp png_ptr, png_infop info_ptr,
-    png_color_8p *sig_bit)
-{
-   png_debug1(1, "in %s retrieval function", "sBIT");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT)
-       && sig_bit != NULL)
-   {
-      *sig_bit = &(info_ptr->sig_bit);
-      return (PNG_INFO_sBIT);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-png_uint_32 PNGAPI
-png_get_text(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_textp *text_ptr, int *num_text)
-{
-   if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0)
-   {
-      png_debug1(1, "in 0x%lx retrieval function",
-         (unsigned long)png_ptr->chunk_name);
-
-      if (text_ptr != NULL)
-         *text_ptr = info_ptr->text;
-
-      if (num_text != NULL)
-         *num_text = info_ptr->num_text;
-
-      return ((png_uint_32)info_ptr->num_text);
-   }
-
-   if (num_text != NULL)
-      *num_text = 0;
-
-   return(0);
-}
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-png_uint_32 PNGAPI
-png_get_tIME(png_const_structp png_ptr, png_infop info_ptr, png_timep *mod_time)
-{
-   png_debug1(1, "in %s retrieval function", "tIME");
-
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME)
-       && mod_time != NULL)
-   {
-      *mod_time = &(info_ptr->mod_time);
-      return (PNG_INFO_tIME);
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-png_uint_32 PNGAPI
-png_get_tRNS(png_const_structp png_ptr, png_infop info_ptr,
-    png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color)
-{
-   png_uint_32 retval = 0;
-   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
-   {
-      png_debug1(1, "in %s retrieval function", "tRNS");
-
-      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (trans_alpha != NULL)
-         {
-            *trans_alpha = info_ptr->trans_alpha;
-            retval |= PNG_INFO_tRNS;
-         }
-
-         if (trans_color != NULL)
-            *trans_color = &(info_ptr->trans_color);
-      }
-
-      else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */
-      {
-         if (trans_color != NULL)
-         {
-            *trans_color = &(info_ptr->trans_color);
-            retval |= PNG_INFO_tRNS;
-         }
-
-         if (trans_alpha != NULL)
-            *trans_alpha = NULL;
-      }
-
-      if (num_trans != NULL)
-      {
-         *num_trans = info_ptr->num_trans;
-         retval |= PNG_INFO_tRNS;
-      }
-   }
-
-   return (retval);
-}
-#endif
-
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-int PNGAPI
-png_get_unknown_chunks(png_const_structp png_ptr, png_const_infop info_ptr,
-    png_unknown_chunkpp unknowns)
-{
-   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
-   {
-      *unknowns = info_ptr->unknown_chunks;
-      return info_ptr->unknown_chunks_num;
-   }
-
-   return (0);
-}
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-png_byte PNGAPI
-png_get_rgb_to_gray_status (png_const_structp png_ptr)
-{
-   return (png_byte)(png_ptr ? png_ptr->rgb_to_gray_status : 0);
-}
-#endif
-
-#ifdef PNG_USER_CHUNKS_SUPPORTED
-png_voidp PNGAPI
-png_get_user_chunk_ptr(png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_ptr : NULL);
-}
-#endif
-
-png_size_t PNGAPI
-png_get_compression_buffer_size(png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->zbuf_size : 0);
-}
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-/* These functions were added to libpng 1.2.6 and were enabled
- * by default in libpng-1.4.0 */
-png_uint_32 PNGAPI
-png_get_user_width_max (png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_width_max : 0);
-}
-
-png_uint_32 PNGAPI
-png_get_user_height_max (png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_height_max : 0);
-}
-
-/* This function was added to libpng 1.4.0 */
-png_uint_32 PNGAPI
-png_get_chunk_cache_max (png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_cache_max : 0);
-}
-
-/* This function was added to libpng 1.4.1 */
-png_alloc_size_t PNGAPI
-png_get_chunk_malloc_max (png_const_structp png_ptr)
-{
-   return (png_ptr ? png_ptr->user_chunk_malloc_max : 0);
-}
-#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
-
-/* These functions were added to libpng 1.4.0 */
-#ifdef PNG_IO_STATE_SUPPORTED
-png_uint_32 PNGAPI
-png_get_io_state (png_structp png_ptr)
-{
-   return png_ptr->io_state;
-}
-
-png_uint_32 PNGAPI
-png_get_io_chunk_type (png_const_structp png_ptr)
-{
-   return png_ptr->chunk_name;
-}
-
-png_const_bytep PNGAPI
-png_get_io_chunk_name (png_structp png_ptr)
-{
-   PNG_CSTRING_FROM_CHUNK(png_ptr->io_chunk_string, png_ptr->chunk_name);
-   return png_ptr->io_chunk_string;
-}
-#endif /* ?PNG_IO_STATE_SUPPORTED */
-
-#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngmem.c b/reg-io/png/lpng1510/pngmem.c
deleted file mode 100644
index 25b5c735..00000000
--- a/reg-io/png/lpng1510/pngmem.c
+++ /dev/null
@@ -1,667 +0,0 @@
-
-/* pngmem.c - stub functions for memory allocation
- *
- * Last changed in libpng 1.5.7 [December 15, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file provides a location for all memory allocation.  Users who
- * need special memory handling are expected to supply replacement
- * functions for png_malloc() and png_free(), and to use
- * png_create_read_struct_2() and png_create_write_struct_2() to
- * identify the replacement functions.
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-/* Borland DOS special memory handler */
-#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
-/* If you change this, be sure to change the one in png.h also */
-
-/* Allocate memory for a png_struct.  The malloc and memset can be replaced
-   by a single call to calloc() if this is thought to improve performance. */
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_create_struct,(int type),PNG_ALLOCATED)
-{
-#  ifdef PNG_USER_MEM_SUPPORTED
-   return (png_create_struct_2(type, NULL, NULL));
-}
-
-/* Alternate version of png_create_struct, for use with user-defined malloc. */
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_create_struct_2,(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr),
-   PNG_ALLOCATED)
-{
-#  endif /* PNG_USER_MEM_SUPPORTED */
-   png_size_t size;
-   png_voidp struct_ptr;
-
-   if (type == PNG_STRUCT_INFO)
-      size = png_sizeof(png_info);
-
-   else if (type == PNG_STRUCT_PNG)
-      size = png_sizeof(png_struct);
-
-   else
-      return (png_get_copyright(NULL));
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (malloc_fn != NULL)
-   {
-      png_struct dummy_struct;
-      memset(&dummy_struct, 0, sizeof dummy_struct);
-      dummy_struct.mem_ptr=mem_ptr;
-      struct_ptr = (*(malloc_fn))(&dummy_struct, (png_alloc_size_t)size);
-   }
-
-   else
-#  endif /* PNG_USER_MEM_SUPPORTED */
-   struct_ptr = (png_voidp)farmalloc(size);
-   if (struct_ptr != NULL)
-      png_memset(struct_ptr, 0, size);
-
-   return (struct_ptr);
-}
-
-/* Free memory allocated by a png_create_struct() call */
-void /* PRIVATE */
-png_destroy_struct(png_voidp struct_ptr)
-{
-#  ifdef PNG_USER_MEM_SUPPORTED
-   png_destroy_struct_2(struct_ptr, NULL, NULL);
-}
-
-/* Free memory allocated by a png_create_struct() call */
-void /* PRIVATE */
-png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
-    png_voidp mem_ptr)
-{
-#  endif
-   if (struct_ptr != NULL)
-   {
-#  ifdef PNG_USER_MEM_SUPPORTED
-      if (free_fn != NULL)
-      {
-         png_struct dummy_struct;
-         memset(&dummy_struct, 0, sizeof dummy_struct);
-         dummy_struct.mem_ptr=mem_ptr;
-         (*(free_fn))(&dummy_struct, struct_ptr);
-         return;
-      }
-
-#  endif /* PNG_USER_MEM_SUPPORTED */
-      farfree (struct_ptr);
-   }
-}
-
-/* Allocate memory.  For reasonable files, size should never exceed
- * 64K.  However, zlib may allocate more then 64K if you don't tell
- * it not to.  See zconf.h and png.h for more information. zlib does
- * need to allocate exactly 64K, so whatever you call here must
- * have the ability to do that.
- *
- * Borland seems to have a problem in DOS mode for exactly 64K.
- * It gives you a segment with an offset of 8 (perhaps to store its
- * memory stuff).  zlib doesn't like this at all, so we have to
- * detect and deal with it.  This code should not be needed in
- * Windows or OS/2 modes, and only in 16 bit mode.  This code has
- * been updated by Alexander Lehmann for version 0.89 to waste less
- * memory.
- *
- * Note that we can't use png_size_t for the "size" declaration,
- * since on some systems a png_size_t is a 16-bit quantity, and as a
- * result, we would be truncating potentially larger memory requests
- * (which should cause a fatal error) and introducing major problems.
- */
-PNG_FUNCTION(png_voidp,PNGAPI
-png_calloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-   ret = (png_malloc(png_ptr, size));
-
-   if (ret != NULL)
-      png_memset(ret,0,(png_size_t)size);
-
-   return (ret);
-}
-
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-   if (png_ptr == NULL || size == 0)
-      return (NULL);
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr->malloc_fn != NULL)
-      ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, size));
-
-   else
-      ret = (png_malloc_default(png_ptr, size));
-
-   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-       png_error(png_ptr, "Out of memory");
-
-   return (ret);
-}
-
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc_default,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-#  endif /* PNG_USER_MEM_SUPPORTED */
-
-   if (png_ptr == NULL || size == 0)
-      return (NULL);
-
-#  ifdef PNG_MAX_MALLOC_64K
-   if (size > (png_uint_32)65536L)
-   {
-      png_warning(png_ptr, "Cannot Allocate > 64K");
-      ret = NULL;
-   }
-
-   else
-#  endif
-
-   if (size != (size_t)size)
-      ret = NULL;
-
-   else if (size == (png_uint_32)65536L)
-   {
-      if (png_ptr->offset_table == NULL)
-      {
-         /* Try to see if we need to do any of this fancy stuff */
-         ret = farmalloc(size);
-         if (ret == NULL || ((png_size_t)ret & 0xffff))
-         {
-            int num_blocks;
-            png_uint_32 total_size;
-            png_bytep table;
-            int i, mem_level, window_bits;
-            png_byte huge * hptr;
-            int window_bits
-
-            if (ret != NULL)
-            {
-               farfree(ret);
-               ret = NULL;
-            }
-
-            window_bits =
-                png_ptr->zlib_window_bits >= png_ptr->zlib_text_window_bits ?
-                png_ptr->zlib_window_bits : png_ptr->zlib_text_window_bits;
-
-            if (window_bits > 14)
-               num_blocks = (int)(1 << (window_bits - 14));
-
-            else
-               num_blocks = 1;
-
-            mem_level =
-                png_ptr->zlib_mem_level >= png_ptr->zlib_text_mem_level ?
-                png_ptr->zlib_mem_level : png_ptr->zlib_text_mem_level;
-
-            if (mem_level >= 7)
-               num_blocks += (int)(1 << (mem_level - 7));
-
-            else
-               num_blocks++;
-
-            total_size = ((png_uint_32)65536L) * (png_uint_32)num_blocks+16;
-
-            table = farmalloc(total_size);
-
-            if (table == NULL)
-            {
-#  ifndef PNG_USER_MEM_SUPPORTED
-               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-                  png_error(png_ptr, "Out Of Memory"); /* Note "O", "M" */
-
-               else
-                  png_warning(png_ptr, "Out Of Memory");
-#  endif
-               return (NULL);
-            }
-
-            if ((png_size_t)table & 0xfff0)
-            {
-#  ifndef PNG_USER_MEM_SUPPORTED
-               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-                  png_error(png_ptr,
-                    "Farmalloc didn't return normalized pointer");
-
-               else
-                  png_warning(png_ptr,
-                    "Farmalloc didn't return normalized pointer");
-#  endif
-               return (NULL);
-            }
-
-            png_ptr->offset_table = table;
-            png_ptr->offset_table_ptr = farmalloc(num_blocks *
-               png_sizeof(png_bytep));
-
-            if (png_ptr->offset_table_ptr == NULL)
-            {
-#  ifndef PNG_USER_MEM_SUPPORTED
-               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-                  png_error(png_ptr, "Out Of memory"); /* Note "O", "m" */
-
-               else
-                  png_warning(png_ptr, "Out Of memory");
-#  endif
-               return (NULL);
-            }
-
-            hptr = (png_byte huge *)table;
-            if ((png_size_t)hptr & 0xf)
-            {
-               hptr = (png_byte huge *)((long)(hptr) & 0xfffffff0L);
-               hptr = hptr + 16L;  /* "hptr += 16L" fails on Turbo C++ 3.0 */
-            }
-
-            for (i = 0; i < num_blocks; i++)
-            {
-               png_ptr->offset_table_ptr[i] = (png_bytep)hptr;
-               hptr = hptr + (png_uint_32)65536L;  /* "+=" fails on TC++3.0 */
-            }
-
-            png_ptr->offset_table_number = num_blocks;
-            png_ptr->offset_table_count = 0;
-            png_ptr->offset_table_count_free = 0;
-         }
-      }
-
-      if (png_ptr->offset_table_count >= png_ptr->offset_table_number)
-      {
-#  ifndef PNG_USER_MEM_SUPPORTED
-         if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-            png_error(png_ptr, "Out of Memory"); /* Note "O" and "M" */
-
-         else
-            png_warning(png_ptr, "Out of Memory");
-#  endif
-         return (NULL);
-      }
-
-      ret = png_ptr->offset_table_ptr[png_ptr->offset_table_count++];
-   }
-
-   else
-      ret = farmalloc(size);
-
-#  ifndef PNG_USER_MEM_SUPPORTED
-   if (ret == NULL)
-   {
-      if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-         png_error(png_ptr, "Out of memory"); /* Note "o" and "m" */
-
-      else
-         png_warning(png_ptr, "Out of memory"); /* Note "o" and "m" */
-   }
-#  endif
-
-   return (ret);
-}
-
-/* Free a pointer allocated by png_malloc().  In the default
- * configuration, png_ptr is not used, but is passed in case it
- * is needed.  If ptr is NULL, return without taking any action.
- */
-void PNGAPI
-png_free(png_structp png_ptr, png_voidp ptr)
-{
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr->free_fn != NULL)
-   {
-      (*(png_ptr->free_fn))(png_ptr, ptr);
-      return;
-   }
-
-   else
-      png_free_default(png_ptr, ptr);
-}
-
-void PNGAPI
-png_free_default(png_structp png_ptr, png_voidp ptr)
-{
-#  endif /* PNG_USER_MEM_SUPPORTED */
-
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-
-   if (png_ptr->offset_table != NULL)
-   {
-      int i;
-
-      for (i = 0; i < png_ptr->offset_table_count; i++)
-      {
-         if (ptr == png_ptr->offset_table_ptr[i])
-         {
-            ptr = NULL;
-            png_ptr->offset_table_count_free++;
-            break;
-         }
-      }
-      if (png_ptr->offset_table_count_free == png_ptr->offset_table_count)
-      {
-         farfree(png_ptr->offset_table);
-         farfree(png_ptr->offset_table_ptr);
-         png_ptr->offset_table = NULL;
-         png_ptr->offset_table_ptr = NULL;
-      }
-   }
-
-   if (ptr != NULL)
-      farfree(ptr);
-}
-
-#else /* Not the Borland DOS special memory handler */
-
-/* Allocate memory for a png_struct or a png_info.  The malloc and
-   memset can be replaced by a single call to calloc() if this is thought
-   to improve performance noticably. */
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_create_struct,(int type),PNG_ALLOCATED)
-{
-#  ifdef PNG_USER_MEM_SUPPORTED
-   return (png_create_struct_2(type, NULL, NULL));
-}
-
-/* Allocate memory for a png_struct or a png_info.  The malloc and
-   memset can be replaced by a single call to calloc() if this is thought
-   to improve performance noticably. */
-PNG_FUNCTION(png_voidp /* PRIVATE */,
-png_create_struct_2,(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr),
-   PNG_ALLOCATED)
-{
-#  endif /* PNG_USER_MEM_SUPPORTED */
-   png_size_t size;
-   png_voidp struct_ptr;
-
-   if (type == PNG_STRUCT_INFO)
-      size = png_sizeof(png_info);
-
-   else if (type == PNG_STRUCT_PNG)
-      size = png_sizeof(png_struct);
-
-   else
-      return (NULL);
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (malloc_fn != NULL)
-   {
-      png_struct dummy_struct;
-      png_structp png_ptr = &dummy_struct;
-      png_ptr->mem_ptr=mem_ptr;
-      struct_ptr = (*(malloc_fn))(png_ptr, size);
-
-      if (struct_ptr != NULL)
-         png_memset(struct_ptr, 0, size);
-
-      return (struct_ptr);
-   }
-#  endif /* PNG_USER_MEM_SUPPORTED */
-
-#  if defined(__TURBOC__) && !defined(__FLAT__)
-   struct_ptr = (png_voidp)farmalloc(size);
-#  else
-#    if defined(_MSC_VER) && defined(MAXSEG_64K)
-   struct_ptr = (png_voidp)halloc(size, 1);
-#    else
-   struct_ptr = (png_voidp)malloc(size);
-#    endif
-#  endif
-
-   if (struct_ptr != NULL)
-      png_memset(struct_ptr, 0, size);
-
-   return (struct_ptr);
-}
-
-
-/* Free memory allocated by a png_create_struct() call */
-void /* PRIVATE */
-png_destroy_struct(png_voidp struct_ptr)
-{
-#  ifdef PNG_USER_MEM_SUPPORTED
-   png_destroy_struct_2(struct_ptr, NULL, NULL);
-}
-
-/* Free memory allocated by a png_create_struct() call */
-void /* PRIVATE */
-png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
-    png_voidp mem_ptr)
-{
-#  endif /* PNG_USER_MEM_SUPPORTED */
-   if (struct_ptr != NULL)
-   {
-#  ifdef PNG_USER_MEM_SUPPORTED
-      if (free_fn != NULL)
-      {
-         png_struct dummy_struct;
-         png_structp png_ptr = &dummy_struct;
-         png_ptr->mem_ptr=mem_ptr;
-         (*(free_fn))(png_ptr, struct_ptr);
-         return;
-      }
-#  endif /* PNG_USER_MEM_SUPPORTED */
-#  if defined(__TURBOC__) && !defined(__FLAT__)
-      farfree(struct_ptr);
-
-#  else
-#    if defined(_MSC_VER) && defined(MAXSEG_64K)
-      hfree(struct_ptr);
-
-#    else
-      free(struct_ptr);
-
-#    endif
-#  endif
-   }
-}
-
-/* Allocate memory.  For reasonable files, size should never exceed
- * 64K.  However, zlib may allocate more then 64K if you don't tell
- * it not to.  See zconf.h and png.h for more information.  zlib does
- * need to allocate exactly 64K, so whatever you call here must
- * have the ability to do that.
- */
-
-PNG_FUNCTION(png_voidp,PNGAPI
-png_calloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-   ret = (png_malloc(png_ptr, size));
-
-   if (ret != NULL)
-      png_memset(ret,0,(png_size_t)size);
-
-   return (ret);
-}
-
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr == NULL || size == 0)
-      return (NULL);
-
-   if (png_ptr->malloc_fn != NULL)
-      ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size));
-
-   else
-      ret = (png_malloc_default(png_ptr, size));
-
-   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-       png_error(png_ptr, "Out of Memory");
-
-   return (ret);
-}
-
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc_default,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ret;
-#  endif /* PNG_USER_MEM_SUPPORTED */
-
-   if (png_ptr == NULL || size == 0)
-      return (NULL);
-
-#  ifdef PNG_MAX_MALLOC_64K
-   if (size > (png_uint_32)65536L)
-   {
-#    ifndef PNG_USER_MEM_SUPPORTED
-      if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-         png_error(png_ptr, "Cannot Allocate > 64K");
-
-      else
-#    endif
-         return NULL;
-   }
-#  endif
-
-   /* Check for overflow */
-#  if defined(__TURBOC__) && !defined(__FLAT__)
-
-   if (size != (unsigned long)size)
-      ret = NULL;
-
-   else
-      ret = farmalloc(size);
-
-#  else
-#    if defined(_MSC_VER) && defined(MAXSEG_64K)
-   if (size != (unsigned long)size)
-      ret = NULL;
-
-   else
-      ret = halloc(size, 1);
-
-#    else
-   if (size != (size_t)size)
-      ret = NULL;
-
-   else
-      ret = malloc((size_t)size);
-#    endif
-#  endif
-
-#  ifndef PNG_USER_MEM_SUPPORTED
-   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
-      png_error(png_ptr, "Out of Memory");
-#  endif
-
-   return (ret);
-}
-
-/* Free a pointer allocated by png_malloc().  If ptr is NULL, return
- * without taking any action.
- */
-void PNGAPI
-png_free(png_structp png_ptr, png_voidp ptr)
-{
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-
-#  ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr->free_fn != NULL)
-   {
-      (*(png_ptr->free_fn))(png_ptr, ptr);
-      return;
-   }
-
-   else
-      png_free_default(png_ptr, ptr);
-}
-
-void PNGAPI
-png_free_default(png_structp png_ptr, png_voidp ptr)
-{
-   if (png_ptr == NULL || ptr == NULL)
-      return;
-
-#  endif /* PNG_USER_MEM_SUPPORTED */
-
-#  if defined(__TURBOC__) && !defined(__FLAT__)
-   farfree(ptr);
-
-#  else
-#    if defined(_MSC_VER) && defined(MAXSEG_64K)
-   hfree(ptr);
-
-#    else
-   free(ptr);
-
-#    endif
-#  endif
-}
-#endif /* Not Borland DOS special memory handler */
-
-/* This function was added at libpng version 1.2.3.  The png_malloc_warn()
- * function will set up png_malloc() to issue a png_warning and return NULL
- * instead of issuing a png_error, if it fails to allocate the requested
- * memory.
- */
-PNG_FUNCTION(png_voidp,PNGAPI
-png_malloc_warn,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED)
-{
-   png_voidp ptr;
-   png_uint_32 save_flags;
-   if (png_ptr == NULL)
-      return (NULL);
-
-   save_flags = png_ptr->flags;
-   png_ptr->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
-   ptr = (png_voidp)png_malloc((png_structp)png_ptr, size);
-   png_ptr->flags=save_flags;
-   return(ptr);
-}
-
-
-#ifdef PNG_USER_MEM_SUPPORTED
-/* This function is called when the application wants to use another method
- * of allocating and freeing memory.
- */
-void PNGAPI
-png_set_mem_fn(png_structp png_ptr, png_voidp mem_ptr, png_malloc_ptr
-  malloc_fn, png_free_ptr free_fn)
-{
-   if (png_ptr != NULL)
-   {
-      png_ptr->mem_ptr = mem_ptr;
-      png_ptr->malloc_fn = malloc_fn;
-      png_ptr->free_fn = free_fn;
-   }
-}
-
-/* This function returns a pointer to the mem_ptr associated with the user
- * functions.  The application should free any memory associated with this
- * pointer before png_write_destroy and png_read_destroy are called.
- */
-png_voidp PNGAPI
-png_get_mem_ptr(png_const_structp png_ptr)
-{
-   if (png_ptr == NULL)
-      return (NULL);
-
-   return ((png_voidp)png_ptr->mem_ptr);
-}
-#endif /* PNG_USER_MEM_SUPPORTED */
-#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngpriv.h b/reg-io/png/lpng1510/pngpriv.h
deleted file mode 100644
index d64d47ed..00000000
--- a/reg-io/png/lpng1510/pngpriv.h
+++ /dev/null
@@ -1,1674 +0,0 @@
-
-/* pngpriv.h - private declarations for use inside libpng
- *
- * For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * Last changed in libpng 1.5.10 [March 29, 2012]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-/* The symbols declared in this file (including the functions declared
- * as PNG_EXTERN) are PRIVATE.  They are not part of the libpng public
- * interface, and are not recommended for use by regular applications.
- * Some of them may become public in the future; others may stay private,
- * change in an incompatible way, or even disappear.
- * Although the libpng users are not forbidden to include this header,
- * they should be well aware of the issues that may arise from doing so.
- */
-
-#pragma once
-
-/* Feature Test Macros.  The following are defined here to ensure that correctly
- * implemented libraries reveal the APIs libpng needs to build and hide those
- * that are not needed and potentially damaging to the compilation.
- *
- * Feature Test Macros must be defined before any system header is included (see
- * POSIX 1003.1 2.8.2 "POSIX Symbols."
- *
- * These macros only have an effect if the operating system supports either
- * POSIX 1003.1 or C99, or both.  On other operating systems (particularly
- * Windows/Visual Studio) there is no effect; the OS specific tests below are
- * still required (as of 2011-05-02.)
- */
-#define _POSIX_SOURCE 1 /* Just the POSIX 1003.1 and C89 APIs */
-
-/* This is required for the definition of abort(), used as a last ditch
- * error handler when all else fails.
- */
-#include <stdlib.h>
-
-/* This is used to find 'offsetof', used below for alignment tests. */
-#include <stddef.h>
-
-#define PNGLIB_BUILD /*libpng is being built, not used*/
-
-#ifdef PNG_USER_CONFIG
-#  include "pngusr.h"
-/* These should have been defined in pngusr.h */
-#  ifndef PNG_USER_PRIVATEBUILD
-#    define PNG_USER_PRIVATEBUILD "Custom libpng build"
-#  endif
-#  ifndef PNG_USER_DLLFNAME_POSTFIX
-#    define PNG_USER_DLLFNAME_POSTFIX "Cb"
-#  endif
-#endif
-
-/* Is this a build of a DLL where compilation of the object modules requires
- * different preprocessor settings to those required for a simple library?  If
- * so PNG_BUILD_DLL must be set.
- *
- * If libpng is used inside a DLL but that DLL does not export the libpng APIs
- * PNG_BUILD_DLL must not be set.  To avoid the code below kicking in build a
- * static library of libpng then link the DLL against that.
- */
-#ifndef PNG_BUILD_DLL
-#  ifdef DLL_EXPORT
-/* This is set by libtool when files are compiled for a DLL; libtool
- * always compiles twice, even on systems where it isn't necessary.  Set
- * PNG_BUILD_DLL in case it is necessary:
- */
-#     define PNG_BUILD_DLL
-#  else
-#     ifdef _WINDLL
-/* This is set by the Microsoft Visual Studio IDE in projects that
- * build a DLL.  It can't easily be removed from those projects (it
- * isn't visible in the Visual Studio UI) so it is a fairly reliable
- * indication that PNG_IMPEXP needs to be set to the DLL export
- * attributes.
- */
-#        define PNG_BUILD_DLL
-#     else
-#        ifdef __DLL__
-/* This is set by the Borland C system when compiling for a DLL
- * (as above.)
- */
-#           define PNG_BUILD_DLL
-#        else
-/* Add additional compiler cases here. */
-#        endif
-#     endif
-#  endif
-#endif /* Setting PNG_BUILD_DLL if required */
-
-/* See pngconf.h for more details: the builder of the library may set this on
- * the command line to the right thing for the specific compilation system or it
- * may be automagically set above (at present we know of no system where it does
- * need to be set on the command line.)
- *
- * PNG_IMPEXP must be set here when building the library to prevent pngconf.h
- * setting it to the "import" setting for a DLL build.
- */
-#ifndef PNG_IMPEXP
-#  ifdef PNG_BUILD_DLL
-#     define PNG_IMPEXP PNG_DLL_EXPORT
-#  else
-/* Not building a DLL, or the DLL doesn't require specific export
- * definitions.
- */
-#     define PNG_IMPEXP
-#  endif
-#endif
-
-/* No warnings for private or deprecated functions in the build: */
-#ifndef PNG_DEPRECATED
-#  define PNG_DEPRECATED
-#endif
-#ifndef PNG_PRIVATE
-#  define PNG_PRIVATE
-#endif
-
-#include "png.h"
-#include "pnginfo.h"
-#include "pngstruct.h"
-
-/* pngconf.h does not set PNG_DLL_EXPORT unless it is required, so: */
-#ifndef PNG_DLL_EXPORT
-#  define PNG_DLL_EXPORT
-#endif
-
-/* SECURITY and SAFETY:
- *
- * By default libpng is built without any internal limits on image size,
- * individual heap (png_malloc) allocations or the total amount of memory used.
- * If PNG_SAFE_LIMITS_SUPPORTED is defined, however, the limits below are used
- * (unless individually overridden).  These limits are believed to be fairly
- * safe, but builders of secure systems should verify the values against the
- * real system capabilities.
- */
-
-#ifdef PNG_SAFE_LIMITS_SUPPORTED
-/* 'safe' limits */
-#  ifndef PNG_USER_WIDTH_MAX
-#     define PNG_USER_WIDTH_MAX 1000000
-#  endif
-#  ifndef PNG_USER_HEIGHT_MAX
-#     define PNG_USER_HEIGHT_MAX 1000000
-#  endif
-#  ifndef PNG_USER_CHUNK_CACHE_MAX
-#     define PNG_USER_CHUNK_CACHE_MAX 128
-#  endif
-#  ifndef PNG_USER_CHUNK_MALLOC_MAX
-#     define PNG_USER_CHUNK_MALLOC_MAX 8000000
-#  endif
-#else
-/* values for no limits */
-#  ifndef PNG_USER_WIDTH_MAX
-#     define PNG_USER_WIDTH_MAX 0x7fffffff
-#  endif
-#  ifndef PNG_USER_HEIGHT_MAX
-#     define PNG_USER_HEIGHT_MAX 0x7fffffff
-#  endif
-#  ifndef PNG_USER_CHUNK_CACHE_MAX
-#     define PNG_USER_CHUNK_CACHE_MAX 0
-#  endif
-#  ifndef PNG_USER_CHUNK_MALLOC_MAX
-#     define PNG_USER_CHUNK_MALLOC_MAX 0
-#  endif
-#endif
-
-/* This is used for 16 bit gamma tables - only the top level pointers are const,
- * this could be changed:
- */
-typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
-
-/* Added at libpng-1.2.9 */
-/* Moved to pngpriv.h at libpng-1.5.0 */
-
-/* config.h is created by and PNG_CONFIGURE_LIBPNG is set by the "configure"
- * script.  We may need it here to get the correct configuration on things
- * like limits.
- */
-#ifdef PNG_CONFIGURE_LIBPNG
-#  ifdef HAVE_CONFIG_H
-#    include "config.h"
-#  endif
-#endif
-
-/* Moved to pngpriv.h at libpng-1.5.0 */
-/* NOTE: some of these may have been used in external applications as
- * these definitions were exposed in pngconf.h prior to 1.5.
- */
-
-/* If you are running on a machine where you cannot allocate more
- * than 64K of memory at once, uncomment this.  While libpng will not
- * normally need that much memory in a chunk (unless you load up a very
- * large file), zlib needs to know how big of a chunk it can use, and
- * libpng thus makes sure to check any memory allocation to verify it
- * will fit into memory.
- *
- * zlib provides 'MAXSEG_64K' which, if defined, indicates the
- * same limit and pngconf.h (already included) sets the limit
- * if certain operating systems are detected.
- */
-#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
-#  define PNG_MAX_MALLOC_64K
-#endif
-
-#ifndef PNG_UNUSED
-/* Unused formal parameter warnings are silenced using the following macro
- * which is expected to have no bad effects on performance (optimizing
- * compilers will probably remove it entirely).  Note that if you replace
- * it with something other than whitespace, you must include the terminating
- * semicolon.
- */
-#  define PNG_UNUSED(param) (void)param;
-#endif
-
-/* Just a little check that someone hasn't tried to define something
- * contradictory.
- */
-#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
-#  undef PNG_ZBUF_SIZE
-#  define PNG_ZBUF_SIZE 65536L
-#endif
-
-/* PNG_STATIC is used to mark internal file scope functions if they need to be
- * accessed for implementation tests (see the code in tests/?*).
- */
-#ifndef PNG_STATIC
-#   define PNG_STATIC static
-#endif
-
-/* C99 restrict is used where possible, to do this 'restrict' is defined as
- * empty if we can't be sure it is supported.  configure builds have already
- * done this work.
- */
-#ifdef PNG_CONFIGURE_LIBPNG
-#  define PNG_RESTRICT restrict
-#else
-/* Modern compilers support restrict, but assume not for anything not
- * recognized here:
- */
-#  if defined __GNUC__ || defined _MSC_VER || defined __WATCOMC__
-#     define PNG_RESTRICT restrict
-#  else
-#     define PNG_RESTRICT
-#  endif
-#endif
-
-/* If warnings or errors are turned off the code is disabled or redirected here.
- * From 1.5.4 functions have been added to allow very limited formatting of
- * error and warning messages - this code will also be disabled here.
- */
-#ifdef PNG_WARNINGS_SUPPORTED
-#  define PNG_WARNING_PARAMETERS(p) png_warning_parameters p;
-#else
-#  define png_warning(s1,s2) ((void)(s1))
-#  define png_chunk_warning(s1,s2) ((void)(s1))
-#  define png_warning_parameter(p,number,string) ((void)0)
-#  define png_warning_parameter_unsigned(p,number,format,value) ((void)0)
-#  define png_warning_parameter_signed(p,number,format,value) ((void)0)
-#  define png_formatted_warning(pp,p,message) ((void)(pp))
-#  define PNG_WARNING_PARAMETERS(p)
-#endif
-#ifndef PNG_ERROR_TEXT_SUPPORTED
-#  define png_error(s1,s2) png_err(s1)
-#  define png_chunk_error(s1,s2) png_err(s1)
-#  define png_fixed_error(s1,s2) png_err(s1)
-#endif
-
-/* C allows up-casts from (void*) to any pointer and (const void*) to any
- * pointer to a const object.  C++ regards this as a type error and requires an
- * explicit, static, cast and provides the static_cast<> rune to ensure that
- * const is not cast away.
- */
-#ifdef __cplusplus
-#  define png_voidcast(type, value) static_cast<type>(value)
-#else
-#  define png_voidcast(type, value) (value)
-#endif /* __cplusplus */
-
-#ifndef PNG_EXTERN
-/* The functions exported by PNG_EXTERN are internal functions, which
- * aren't usually used outside the library (as far as I know), so it is
- * debatable if they should be exported at all.  In the future, when it
- * is possible to have run-time registry of chunk-handling functions,
- * some of these might be made available again.
- *
- * 1.5.7: turned the use of 'extern' back on, since it is localized to pngpriv.h
- * it should be safe now (it is unclear why it was turned off.)
- */
-#  define PNG_EXTERN extern
-#endif
-
-/* Some fixed point APIs are still required even if not exported because
- * they get used by the corresponding floating point APIs.  This magic
- * deals with this:
- */
-#ifdef PNG_FIXED_POINT_SUPPORTED
-#  define PNGFAPI PNGAPI
-#else
-#  define PNGFAPI /* PRIVATE */
-#endif
-
-/* Other defines specific to compilers can go here.  Try to keep
- * them inside an appropriate ifdef/endif pair for portability.
- */
-#if defined(PNG_FLOATING_POINT_SUPPORTED) ||\
-    defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)
-/* png.c requires the following ANSI-C constants if the conversion of
- * floating point to ASCII is implemented therein:
- *
- *  DBL_DIG  Maximum number of decimal digits (can be set to any constant)
- *  DBL_MIN  Smallest normalized fp number (can be set to an arbitrary value)
- *  DBL_MAX  Maximum floating point number (can be set to an arbitrary value)
- */
-#  include <float.h>
-
-#  if (defined(__MWERKS__) && defined(macintosh)) || defined(applec) || \
-    defined(THINK_C) || defined(__SC__) || defined(TARGET_OS_MAC)
-/* We need to check that <math.h> hasn't already been included earlier
- * as it seems it doesn't agree with <fp.h>, yet we should really use
- * <fp.h> if possible.
- */
-#    if !defined(__MATH_H__) && !defined(__MATH_H) && !defined(__cmath__)
-#      include <fp.h>
-#    endif
-#  else
-#    include <math.h>
-#  endif
-#  if defined(_AMIGA) && defined(__SASC) && defined(_M68881)
-/* Amiga SAS/C: We must include builtin FPU functions when compiling using
- * MATH=68881
- */
-#    include <m68881.h>
-#  endif
-#endif
-
-/* This provides the non-ANSI (far) memory allocation routines. */
-#if defined(__TURBOC__) && defined(__MSDOS__)
-#  include <mem.h>
-#  include <alloc.h>
-#endif
-
-#if defined(WIN32) || defined(_Windows) || defined(_WINDOWS) || \
-    defined(_WIN32) || defined(__WIN32__)
-#  include <windows.h>  /* defines _WINDOWS_ macro */
-#endif
-
-/* Moved here around 1.5.0beta36 from pngconf.h */
-/* Users may want to use these so they are not private.  Any library
- * functions that are passed far data must be model-independent.
- */
-
-/* Memory model/platform independent fns */
-#ifndef PNG_ABORT
-#  ifdef _WINDOWS_
-#    define PNG_ABORT() ExitProcess(0)
-#  else
-#    define PNG_ABORT() abort()
-#  endif
-#endif
-
-#ifdef USE_FAR_KEYWORD
-/* Use this to make far-to-near assignments */
-#  define CHECK   1
-#  define NOCHECK 0
-#  define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK))
-#  define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK))
-#  define png_strlen  _fstrlen
-#  define png_memcmp  _fmemcmp    /* SJT: added */
-#  define png_memcpy  _fmemcpy
-#  define png_memset  _fmemset
-#else
-#  ifdef _WINDOWS_  /* Favor Windows over C runtime fns */
-#    define CVT_PTR(ptr)         (ptr)
-#    define CVT_PTR_NOCHECK(ptr) (ptr)
-#    define png_strlen  lstrlenA
-#    define png_memcmp  memcmp
-#    define png_memcpy  CopyMemory
-#    define png_memset  memset
-#  else
-#    define CVT_PTR(ptr)         (ptr)
-#    define CVT_PTR_NOCHECK(ptr) (ptr)
-#    define png_strlen  strlen
-#    define png_memcmp  memcmp      /* SJT: added */
-#    define png_memcpy  memcpy
-#    define png_memset  memset
-#  endif
-#endif
-
-/* These macros may need to be architecture dependent. */
-#define PNG_ALIGN_NONE   0 /* do not use data alignment */
-#define PNG_ALIGN_ALWAYS 1 /* assume unaligned accesses are OK */
-#ifdef offsetof
-#  define PNG_ALIGN_OFFSET 2 /* use offsetof to determine alignment */
-#else
-#  define PNG_ALIGN_OFFSET -1 /* prevent the use of this */
-#endif
-#define PNG_ALIGN_SIZE   3 /* use sizeof to determine alignment */
-
-#ifndef PNG_ALIGN_TYPE
-/* Default to using aligned access optimizations and requiring alignment to a
- * multiple of the data type size.  Override in a compiler specific fashion
- * if necessary by inserting tests here:
- */
-#  define PNG_ALIGN_TYPE PNG_ALIGN_SIZE
-#endif
-
-#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE
-/* This is used because in some compiler implementations non-aligned
- * structure members are supported, so the offsetof approach below fails.
- * Set PNG_ALIGN_TO_SIZE=0 for compiler combinations where unaligned access
- * is good for performance.  Do not do this unless you have tested the result
- * and understand it.
- */
-#  define png_alignof(type) (sizeof (type))
-#else
-#  if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET
-#     define png_alignof(type) offsetof(struct{char c; type t;}, t)
-#  else
-#     if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS
-#        define png_alignof(type) (1)
-#     endif
-/* Else leave png_alignof undefined to prevent use thereof */
-#  endif
-#endif
-
-/* This implicitly assumes alignment is always to a power of 2. */
-#ifdef png_alignof
-#  define png_isaligned(ptr, type)\
-   ((((const char*)ptr-(const char*)0) & (png_alignof(type)-1)) == 0)
-#else
-#  define png_isaligned(ptr, type) 0
-#endif
-
-/* End of memory model/platform independent support */
-/* End of 1.5.0beta36 move from pngconf.h */
-
-/* CONSTANTS and UTILITY MACROS
- * These are used internally by libpng and not exposed in the API
- */
-
-/* Various modes of operation.  Note that after an init, mode is set to
- * zero automatically when the structure is created.  Three of these
- * are defined in png.h because they need to be visible to applications
- * that call png_set_unknown_chunk().
- */
-/* #define PNG_HAVE_IHDR            0x01 (defined in png.h) */
-/* #define PNG_HAVE_PLTE            0x02 (defined in png.h) */
-#define PNG_HAVE_IDAT               0x04
-/* #define PNG_AFTER_IDAT           0x08 (defined in png.h) */
-#define PNG_HAVE_IEND               0x10
-#define PNG_HAVE_gAMA               0x20
-#define PNG_HAVE_cHRM               0x40
-#define PNG_HAVE_sRGB               0x80
-#define PNG_HAVE_CHUNK_HEADER      0x100
-#define PNG_WROTE_tIME             0x200
-#define PNG_WROTE_INFO_BEFORE_PLTE 0x400
-#define PNG_BACKGROUND_IS_GRAY     0x800
-#define PNG_HAVE_PNG_SIGNATURE    0x1000
-#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000 /* Have another chunk after IDAT */
-#define PNG_HAVE_iCCP             0x4000
-
-/* Flags for the transformations the PNG library does on the image data */
-#define PNG_BGR                 0x0001
-#define PNG_INTERLACE           0x0002
-#define PNG_PACK                0x0004
-#define PNG_SHIFT               0x0008
-#define PNG_SWAP_BYTES          0x0010
-#define PNG_INVERT_MONO         0x0020
-#define PNG_QUANTIZE            0x0040
-#define PNG_COMPOSE             0x0080     /* Was PNG_BACKGROUND */
-#define PNG_BACKGROUND_EXPAND   0x0100
-#define PNG_EXPAND_16           0x0200     /* Added to libpng 1.5.2 */
-#define PNG_16_TO_8             0x0400     /* Becomes 'chop' in 1.5.4 */
-#define PNG_RGBA                0x0800
-#define PNG_EXPAND              0x1000
-#define PNG_GAMMA               0x2000
-#define PNG_GRAY_TO_RGB         0x4000
-#define PNG_FILLER              0x8000
-#define PNG_PACKSWAP           0x10000
-#define PNG_SWAP_ALPHA         0x20000
-#define PNG_STRIP_ALPHA        0x40000
-#define PNG_INVERT_ALPHA       0x80000
-#define PNG_USER_TRANSFORM    0x100000
-#define PNG_RGB_TO_GRAY_ERR   0x200000
-#define PNG_RGB_TO_GRAY_WARN  0x400000
-#define PNG_RGB_TO_GRAY       0x600000 /* two bits, RGB_TO_GRAY_ERR|WARN */
-#define PNG_ENCODE_ALPHA      0x800000 /* Added to libpng-1.5.4 */
-#define PNG_ADD_ALPHA         0x1000000 /* Added to libpng-1.2.7 */
-#define PNG_EXPAND_tRNS       0x2000000 /* Added to libpng-1.2.9 */
-#define PNG_SCALE_16_TO_8     0x4000000 /* Added to libpng-1.5.4 */
-/*   0x8000000 unused */
-/*  0x10000000 unused */
-/*  0x20000000 unused */
-/*  0x40000000 unused */
-/* Flags for png_create_struct */
-#define PNG_STRUCT_PNG   0x0001
-#define PNG_STRUCT_INFO  0x0002
-
-/* Scaling factor for filter heuristic weighting calculations */
-#define PNG_WEIGHT_FACTOR (1<<(PNG_WEIGHT_SHIFT))
-#define PNG_COST_FACTOR (1<<(PNG_COST_SHIFT))
-
-/* Flags for the png_ptr->flags rather than declaring a byte for each one */
-#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY     0x0001
-#define PNG_FLAG_ZLIB_CUSTOM_LEVEL        0x0002
-#define PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL    0x0004
-#define PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS  0x0008
-#define PNG_FLAG_ZLIB_CUSTOM_METHOD       0x0010
-#define PNG_FLAG_ZLIB_FINISHED            0x0020
-#define PNG_FLAG_ROW_INIT                 0x0040
-#define PNG_FLAG_FILLER_AFTER             0x0080
-#define PNG_FLAG_CRC_ANCILLARY_USE        0x0100
-#define PNG_FLAG_CRC_ANCILLARY_NOWARN     0x0200
-#define PNG_FLAG_CRC_CRITICAL_USE         0x0400
-#define PNG_FLAG_CRC_CRITICAL_IGNORE      0x0800
-#define PNG_FLAG_ASSUME_sRGB              0x1000  /* Added to libpng-1.5.4 */
-#define PNG_FLAG_OPTIMIZE_ALPHA           0x2000  /* Added to libpng-1.5.4 */
-#define PNG_FLAG_DETECT_UNINITIALIZED     0x4000  /* Added to libpng-1.5.4 */
-#define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000
-#define PNG_FLAG_KEEP_UNSAFE_CHUNKS       0x10000
-#define PNG_FLAG_LIBRARY_MISMATCH         0x20000
-#define PNG_FLAG_STRIP_ERROR_NUMBERS      0x40000
-#define PNG_FLAG_STRIP_ERROR_TEXT         0x80000
-#define PNG_FLAG_MALLOC_NULL_MEM_OK       0x100000
-/*      0x200000  unused */
-/*      0x400000  unused */
-#define PNG_FLAG_BENIGN_ERRORS_WARN       0x800000  /* Added to libpng-1.4.0 */
-#define PNG_FLAG_ZTXT_CUSTOM_STRATEGY    0x1000000  /* 5 lines added */
-#define PNG_FLAG_ZTXT_CUSTOM_LEVEL       0x2000000  /* to libpng-1.5.4 */
-#define PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL   0x4000000
-#define PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS 0x8000000
-#define PNG_FLAG_ZTXT_CUSTOM_METHOD      0x10000000
-/*     0x20000000  unused */
-/*     0x40000000  unused */
-
-#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
-                                     PNG_FLAG_CRC_ANCILLARY_NOWARN)
-
-#define PNG_FLAG_CRC_CRITICAL_MASK  (PNG_FLAG_CRC_CRITICAL_USE | \
-                                     PNG_FLAG_CRC_CRITICAL_IGNORE)
-
-#define PNG_FLAG_CRC_MASK           (PNG_FLAG_CRC_ANCILLARY_MASK | \
-                                     PNG_FLAG_CRC_CRITICAL_MASK)
-
-/* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib
- * can handle at once.  This type need be no larger than 16 bits (so maximum of
- * 65535), this define allows us to discover how big it is, but limited by the
- * maximuum for png_size_t.  The value can be overriden in a library build
- * (pngusr.h, or set it in CPPFLAGS) and it works to set it to a considerably
- * lower value (e.g. 255 works).  A lower value may help memory usage (slightly)
- * and may even improve performance on some systems (and degrade it on others.)
- */
-#ifndef ZLIB_IO_MAX
-#  define ZLIB_IO_MAX ((uInt)-1)
-#endif
-
-/* Save typing and make code easier to understand */
-
-#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \
-   abs((int)((c1).green) - (int)((c2).green)) + \
-   abs((int)((c1).blue) - (int)((c2).blue)))
-
-/* Added to libpng-1.2.6 JB */
-#define PNG_ROWBYTES(pixel_bits, width) \
-    ((pixel_bits) >= 8 ? \
-    ((png_size_t)(width) * (((png_size_t)(pixel_bits)) >> 3)) : \
-    (( ((png_size_t)(width) * ((png_size_t)(pixel_bits))) + 7) >> 3) )
-
-/* PNG_OUT_OF_RANGE returns true if value is outside the range
- * ideal-delta..ideal+delta.  Each argument is evaluated twice.
- * "ideal" and "delta" should be constants, normally simple
- * integers, "value" a variable. Added to libpng-1.2.6 JB
- */
-#define PNG_OUT_OF_RANGE(value, ideal, delta) \
-   ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) )
-
-/* Conversions between fixed and floating point, only defined if
- * required (to make sure the code doesn't accidentally use float
- * when it is supposedly disabled.)
- */
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-/* The floating point conversion can't overflow, though it can and
- * does lose accuracy relative to the original fixed point value.
- * In practice this doesn't matter because png_fixed_point only
- * stores numbers with very low precision.  The png_ptr and s
- * arguments are unused by default but are there in case error
- * checking becomes a requirement.
- */
-#define png_float(png_ptr, fixed, s) (.00001 * (fixed))
-
-/* The fixed point conversion performs range checking and evaluates
- * its argument multiple times, so must be used with care.  The
- * range checking uses the PNG specification values for a signed
- * 32 bit fixed point value except that the values are deliberately
- * rounded-to-zero to an integral value - 21474 (21474.83 is roughly
- * (2^31-1) * 100000). 's' is a string that describes the value being
- * converted.
- *
- * NOTE: this macro will raise a png_error if the range check fails,
- * therefore it is normally only appropriate to use this on values
- * that come from API calls or other sources where an out of range
- * error indicates a programming error, not a data error!
- *
- * NOTE: by default this is off - the macro is not used - because the
- * function call saves a lot of code.
- */
-#ifdef PNG_FIXED_POINT_MACRO_SUPPORTED
-#define png_fixed(png_ptr, fp, s) ((fp) <= 21474 && (fp) >= -21474 ?\
-    ((png_fixed_point)(100000 * (fp))) : (png_fixed_error(png_ptr, s),0))
-#else
-PNG_EXTERN png_fixed_point png_fixed PNGARG((png_structp png_ptr, double fp,
-      png_const_charp text));
-#endif
-#endif
-
-/* Constants for known chunk types.  If you need to add a chunk, define the name
- * here.  For historical reasons these constants have the form png_<name>; i.e.
- * the prefix is lower case.  Please use decimal values as the parameters to
- * match the ISO PNG specification and to avoid relying on the C locale
- * interpretation of character values.
- *
- * Prior to 1.5.6 these constants were strings, as of 1.5.6 png_uint_32 values
- * are computed and a new macro (PNG_STRING_FROM_CHUNK) added to allow a string
- * to be generated if required.
- *
- * PNG_32b correctly produces a value shifted by up to 24 bits, even on
- * architectures where (int) is only 16 bits.
- */
-#define PNG_32b(b,s) ((png_uint_32)(b) << (s))
-#define PNG_CHUNK(b1,b2,b3,b4) \
-   (PNG_32b(b1,24) | PNG_32b(b2,16) | PNG_32b(b3,8) | PNG_32b(b4,0))
-
-#define png_IHDR PNG_CHUNK( 73,  72,  68,  82)
-#define png_IDAT PNG_CHUNK( 73,  68,  65,  84)
-#define png_IEND PNG_CHUNK( 73,  69,  78,  68)
-#define png_PLTE PNG_CHUNK( 80,  76,  84,  69)
-#define png_bKGD PNG_CHUNK( 98,  75,  71,  68)
-#define png_cHRM PNG_CHUNK( 99,  72,  82,  77)
-#define png_gAMA PNG_CHUNK(103,  65,  77,  65)
-#define png_hIST PNG_CHUNK(104,  73,  83,  84)
-#define png_iCCP PNG_CHUNK(105,  67,  67,  80)
-#define png_iTXt PNG_CHUNK(105,  84,  88, 116)
-#define png_oFFs PNG_CHUNK(111,  70,  70, 115)
-#define png_pCAL PNG_CHUNK(112,  67,  65,  76)
-#define png_sCAL PNG_CHUNK(115,  67,  65,  76)
-#define png_pHYs PNG_CHUNK(112,  72,  89, 115)
-#define png_sBIT PNG_CHUNK(115,  66,  73,  84)
-#define png_sPLT PNG_CHUNK(115,  80,  76,  84)
-#define png_sRGB PNG_CHUNK(115,  82,  71,  66)
-#define png_sTER PNG_CHUNK(115,  84,  69,  82)
-#define png_tEXt PNG_CHUNK(116,  69,  88, 116)
-#define png_tIME PNG_CHUNK(116,  73,  77,  69)
-#define png_tRNS PNG_CHUNK(116,  82,  78,  83)
-#define png_zTXt PNG_CHUNK(122,  84,  88, 116)
-
-/* The following will work on (signed char*) strings, whereas the get_uint_32
- * macro will fail on top-bit-set values because of the sign extension.
- */
-#define PNG_CHUNK_FROM_STRING(s)\
-   PNG_CHUNK(0xff&(s)[0], 0xff&(s)[1], 0xff&(s)[2], 0xff&(s)[3])
-
-/* This uses (char), not (png_byte) to avoid warnings on systems where (char) is
- * signed and the argument is a (char[])  This macro will fail miserably on
- * systems where (char) is more than 8 bits.
- */
-#define PNG_STRING_FROM_CHUNK(s,c)\
-   (void)(((char*)(s))[0]=(char)((c)>>24), ((char*)(s))[1]=(char)((c)>>16),\
-   ((char*)(s))[2]=(char)((c)>>8), ((char*)(s))[3]=(char)((c)))
-
-/* Do the same but terminate with a null character. */
-#define PNG_CSTRING_FROM_CHUNK(s,c)\
-   (void)(PNG_STRING_FROM_CHUNK(s,c), ((char*)(s))[4] = 0)
-
-/* Test on flag values as defined in the spec (section 5.4): */
-#define PNG_CHUNK_ANCILLIARY(c)   (1 & ((c) >> 29))
-#define PNG_CHUNK_CRITICAL(c)     (!PNG_CHUNK_ANCILLIARY(c))
-#define PNG_CHUNK_PRIVATE(c)      (1 & ((c) >> 21))
-#define PNG_CHUNK_RESERVED(c)     (1 & ((c) >> 13))
-#define PNG_CHUNK_SAFE_TO_COPY(c) (1 & ((c) >>  5))
-
-/* Gamma values (new at libpng-1.5.4): */
-#define PNG_GAMMA_MAC_OLD 151724  /* Assume '1.8' is really 2.2/1.45! */
-#define PNG_GAMMA_MAC_INVERSE 65909
-#define PNG_GAMMA_sRGB_INVERSE 45455
-
-
-/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-   /* These functions are used internally in the code.  They generally
-    * shouldn't be used unless you are writing code to add or replace some
-    * functionality in libpng.  More information about most functions can
-    * be found in the files where the functions are located.
-    */
-
-   /* Check the user version string for compatibility, returns false if the version
-    * numbers aren't compatible.
-    */
-   PNG_EXTERN int png_user_version_check(png_structp png_ptr,
-                                         png_const_charp user_png_ver);
-
-   /* Allocate memory for an internal libpng struct */
-   PNG_EXTERN PNG_FUNCTION(png_voidp,png_create_struct,PNGARG((int type)),
-                           PNG_ALLOCATED);
-
-   /* Free memory from internal libpng struct */
-   PNG_EXTERN void png_destroy_struct PNGARG((png_voidp struct_ptr));
-
-   PNG_EXTERN PNG_FUNCTION(png_voidp,png_create_struct_2,
-                           PNGARG((int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)),
-                           PNG_ALLOCATED);
-   PNG_EXTERN void png_destroy_struct_2 PNGARG((png_voidp struct_ptr,
-         png_free_ptr free_fn, png_voidp mem_ptr));
-
-   /* Free any memory that info_ptr points to and reset struct. */
-   PNG_EXTERN void png_info_destroy PNGARG((png_structp png_ptr,
-                                           png_infop info_ptr));
-
-   /* Function to allocate memory for zlib.  PNGAPI is disallowed. */
-   PNG_EXTERN PNG_FUNCTION(voidpf,png_zalloc,PNGARG((voidpf png_ptr, uInt items,
-                           uInt size)),PNG_ALLOCATED);
-
-   /* Function to free memory for zlib.  PNGAPI is disallowed. */
-   PNG_EXTERN void png_zfree PNGARG((voidpf png_ptr, voidpf ptr));
-
-   /* Next four functions are used internally as callbacks.  PNGCBAPI is required
-    * but not PNG_EXPORT.  PNGAPI added at libpng version 1.2.3, changed to
-    * PNGCBAPI at 1.5.0
-    */
-
-   PNG_EXTERN void PNGCBAPI png_default_read_data PNGARG((png_structp png_ptr,
-         png_bytep data, png_size_t length));
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   PNG_EXTERN void PNGCBAPI png_push_fill_buffer PNGARG((png_structp png_ptr,
-         png_bytep buffer, png_size_t length));
-#endif
-
-   PNG_EXTERN void PNGCBAPI png_default_write_data PNGARG((png_structp png_ptr,
-         png_bytep data, png_size_t length));
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-#  ifdef PNG_STDIO_SUPPORTED
-   PNG_EXTERN void PNGCBAPI png_default_flush PNGARG((png_structp png_ptr));
-#  endif
-#endif
-
-   /* Reset the CRC variable */
-   PNG_EXTERN void png_reset_crc PNGARG((png_structp png_ptr));
-
-   /* Write the "data" buffer to whatever output you are using */
-   PNG_EXTERN void png_write_data PNGARG((png_structp png_ptr,
-                                          png_const_bytep data, png_size_t length));
-
-   /* Read and check the PNG file signature */
-   PNG_EXTERN void png_read_sig PNGARG((png_structp png_ptr, png_infop info_ptr));
-
-   /* Read the chunk header (length + type name) */
-   PNG_EXTERN png_uint_32 png_read_chunk_header PNGARG((png_structp png_ptr));
-
-   /* Read data from whatever input you are using into the "data" buffer */
-   PNG_EXTERN void png_read_data PNGARG((png_structp png_ptr, png_bytep data,
-                                         png_size_t length));
-
-   /* Read bytes into buf, and update png_ptr->crc */
-   PNG_EXTERN void png_crc_read PNGARG((png_structp png_ptr, png_bytep buf,
-                                        png_size_t length));
-
-   /* Decompress data in a chunk that uses compression */
-#if defined(PNG_READ_COMPRESSED_TEXT_SUPPORTED)
-   PNG_EXTERN void png_decompress_chunk PNGARG((png_structp png_ptr,
-         int comp_type, png_size_t chunklength, png_size_t prefix_length,
-         png_size_t *data_length));
-#endif
-
-   /* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */
-   PNG_EXTERN int png_crc_finish PNGARG((png_structp png_ptr, png_uint_32 skip));
-
-   /* Read the CRC from the file and compare it to the libpng calculated CRC */
-   PNG_EXTERN int png_crc_error PNGARG((png_structp png_ptr));
-
-   /* Calculate the CRC over a section of data.  Note that we are only
-    * passing a maximum of 64K on systems that have this as a memory limit,
-    * since this is the maximum buffer size we can specify.
-    */
-   PNG_EXTERN void png_calculate_crc PNGARG((png_structp png_ptr,
-         png_const_bytep ptr, png_size_t length));
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   PNG_EXTERN void png_flush PNGARG((png_structp png_ptr));
-#endif
-
-   /* Write various chunks */
-
-   /* Write the IHDR chunk, and update the png_struct with the necessary
-    * information.
-    */
-   PNG_EXTERN void png_write_IHDR PNGARG((png_structp png_ptr, png_uint_32 width,
-                                          png_uint_32 height,
-                                          int bit_depth, int color_type, int compression_method, int filter_method,
-                                          int interlace_method));
-
-   PNG_EXTERN void png_write_PLTE PNGARG((png_structp png_ptr,
-                                          png_const_colorp palette, png_uint_32 num_pal));
-
-   PNG_EXTERN void png_write_IDAT PNGARG((png_structp png_ptr, png_bytep data,
-                                          png_size_t length));
-
-   PNG_EXTERN void png_write_IEND PNGARG((png_structp png_ptr));
-
-#ifdef PNG_WRITE_gAMA_SUPPORTED
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-   PNG_EXTERN void png_write_gAMA PNGARG((png_structp png_ptr, double file_gamma));
-#  endif
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-   PNG_EXTERN void png_write_gAMA_fixed PNGARG((png_structp png_ptr,
-         png_fixed_point file_gamma));
-#  endif
-#endif
-
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-   PNG_EXTERN void png_write_sBIT PNGARG((png_structp png_ptr,
-                                          png_const_color_8p sbit, int color_type));
-#endif
-
-#ifdef PNG_WRITE_cHRM_SUPPORTED
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-   PNG_EXTERN void png_write_cHRM PNGARG((png_structp png_ptr,
-                                          double white_x, double white_y,
-                                          double red_x, double red_y, double green_x, double green_y,
-                                          double blue_x, double blue_y));
-#  endif
-   PNG_EXTERN void png_write_cHRM_fixed PNGARG((png_structp png_ptr,
-         png_fixed_point int_white_x, png_fixed_point int_white_y,
-         png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
-         int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
-         png_fixed_point int_blue_y));
-#endif
-
-#ifdef PNG_WRITE_sRGB_SUPPORTED
-   PNG_EXTERN void png_write_sRGB PNGARG((png_structp png_ptr,
-                                          int intent));
-#endif
-
-#ifdef PNG_WRITE_iCCP_SUPPORTED
-   PNG_EXTERN void png_write_iCCP PNGARG((png_structp png_ptr,
-                                          png_const_charp name, int compression_type,
-                                          png_const_charp profile, int proflen));
-   /* Note to maintainer: profile should be png_bytep */
-#endif
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-   PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr,
-                                          png_const_sPLT_tp palette));
-#endif
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-   PNG_EXTERN void png_write_tRNS PNGARG((png_structp png_ptr,
-                                          png_const_bytep trans, png_const_color_16p values, int number,
-                                          int color_type));
-#endif
-
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-   PNG_EXTERN void png_write_bKGD PNGARG((png_structp png_ptr,
-                                          png_const_color_16p values, int color_type));
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-   PNG_EXTERN void png_write_hIST PNGARG((png_structp png_ptr,
-                                          png_const_uint_16p hist, int num_hist));
-#endif
-
-   /* Chunks that have keywords */
-#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
-    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
-   PNG_EXTERN png_size_t png_check_keyword PNGARG((png_structp png_ptr,
-         png_const_charp key, png_charpp new_key));
-#endif
-
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-   PNG_EXTERN void png_write_tEXt PNGARG((png_structp png_ptr, png_const_charp key,
-                                          png_const_charp text, png_size_t text_len));
-#endif
-
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-   PNG_EXTERN void png_write_zTXt PNGARG((png_structp png_ptr, png_const_charp key,
-                                          png_const_charp text, png_size_t text_len, int compression));
-#endif
-
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-   PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr,
-                                          int compression, png_const_charp key, png_const_charp lang,
-                                          png_const_charp lang_key, png_const_charp text));
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED  /* Added at version 1.0.14 and 1.2.4 */
-   PNG_EXTERN int png_set_text_2 PNGARG((png_structp png_ptr,
-                                         png_infop info_ptr, png_const_textp text_ptr, int num_text));
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-   PNG_EXTERN void png_write_oFFs PNGARG((png_structp png_ptr,
-                                          png_int_32 x_offset, png_int_32 y_offset, int unit_type));
-#endif
-
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-   PNG_EXTERN void png_write_pCAL PNGARG((png_structp png_ptr, png_charp purpose,
-                                          png_int_32 X0, png_int_32 X1, int type, int nparams,
-                                          png_const_charp units, png_charpp params));
-#endif
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-   PNG_EXTERN void png_write_pHYs PNGARG((png_structp png_ptr,
-                                          png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit,
-                                          int unit_type));
-#endif
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-   PNG_EXTERN void png_write_tIME PNGARG((png_structp png_ptr,
-                                          png_const_timep mod_time));
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-   PNG_EXTERN void png_write_sCAL_s PNGARG((png_structp png_ptr,
-                                           int unit, png_const_charp width, png_const_charp height));
-#endif
-
-   /* Called when finished processing a row of data */
-   PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr));
-
-   /* Internal use only.   Called before first row of data */
-   PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr));
-
-   /* Combine a row of data, dealing with alpha, etc. if requested.  'row' is an
-    * array of png_ptr->width pixels.  If the image is not interlaced or this
-    * is the final pass this just does a png_memcpy, otherwise the "display" flag
-    * is used to determine whether to copy pixels that are not in the current pass.
-    *
-    * Because 'png_do_read_interlace' (below) replicates pixels this allows this
-    * function to achieve the documented 'blocky' appearance during interlaced read
-    * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row'
-    * are not changed if they are not in the current pass, when display is 0.
-    *
-    * 'display' must be 0 or 1, otherwise the memcpy will be done regardless.
-    *
-    * The API always reads from the png_struct row buffer and always assumes that
-    * it is full width (png_do_read_interlace has already been called.)
-    *
-    * This function is only ever used to write to row buffers provided by the
-    * caller of the relevant libpng API and the row must have already been
-    * transformed by the read transformations.
-    *
-    * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed
-    * bitmasks for use within the code, otherwise runtime generated masks are used.
-    * The default is compile time masks.
-    */
-#ifndef PNG_USE_COMPILE_TIME_MASKS
-#  define PNG_USE_COMPILE_TIME_MASKS 1
-#endif
-   PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row,
-                                           int display));
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Expand an interlaced row: the 'row_info' describes the pass data that has
-    * been read in and must correspond to the pixels in 'row', the pixels are
-    * expanded (moved apart) in 'row' to match the final layout, when doing this
-    * the pixels are *replicated* to the intervening space.  This is essential for
-    * the correct operation of png_combine_row, above.
-    */
-   PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info,
-         png_bytep row, int pass, png_uint_32 transformations));
-#endif
-
-   /* GRR TO DO (2.0 or whenever):  simplify other internal calling interfaces */
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Grab pixels out of a row for an interlaced pass */
-   PNG_EXTERN void png_do_write_interlace PNGARG((png_row_infop row_info,
-         png_bytep row, int pass));
-#endif
-
-   /* Unfilter a row: check the filter value before calling this, there is no point
-    * calling it for PNG_FILTER_VALUE_NONE.
-    */
-   PNG_EXTERN void png_read_filter_row PNGARG((png_structp pp, png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row, int filter));
-
-   PNG_EXTERN void png_read_filter_row_up_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_sub3_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_sub4_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_avg3_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_avg4_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_paeth3_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-   PNG_EXTERN void png_read_filter_row_paeth4_neon PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_bytep prev_row));
-
-   /* Choose the best filter to use and filter the row data */
-   PNG_EXTERN void png_write_find_filter PNGARG((png_structp png_ptr,
-         png_row_infop row_info));
-
-   /* Finish a row while reading, dealing with interlacing passes, etc. */
-   PNG_EXTERN void png_read_finish_row PNGARG((png_structp png_ptr));
-
-   /* Initialize the row buffers, etc. */
-   PNG_EXTERN void png_read_start_row PNGARG((png_structp png_ptr));
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   /* Optional call to update the users info structure */
-   PNG_EXTERN void png_read_transform_info PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-#endif
-
-   /* These are the functions that do the transformations */
-#ifdef PNG_READ_FILLER_SUPPORTED
-   PNG_EXTERN void png_do_read_filler PNGARG((png_row_infop row_info,
-         png_bytep row, png_uint_32 filler, png_uint_32 flags));
-#endif
-
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-   PNG_EXTERN void png_do_read_swap_alpha PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-   PNG_EXTERN void png_do_write_swap_alpha PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-   PNG_EXTERN void png_do_read_invert_alpha PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-   PNG_EXTERN void png_do_write_invert_alpha PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
-    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
-   PNG_EXTERN void png_do_strip_channel PNGARG((png_row_infop row_info,
-         png_bytep row, int at_start));
-#endif
-
-#ifdef PNG_16BIT_SUPPORTED
-#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
-   PNG_EXTERN void png_do_swap PNGARG((png_row_infop row_info,
-                                       png_bytep row));
-#endif
-#endif
-
-#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
-    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
-   PNG_EXTERN void png_do_packswap PNGARG((png_row_infop row_info,
-                                           png_bytep row));
-#endif
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   PNG_EXTERN int png_do_rgb_to_gray PNGARG((png_structp png_ptr,
-         png_row_infop row_info, png_bytep row));
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   PNG_EXTERN void png_do_gray_to_rgb PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#ifdef PNG_READ_PACK_SUPPORTED
-   PNG_EXTERN void png_do_unpack PNGARG((png_row_infop row_info,
-                                         png_bytep row));
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-   PNG_EXTERN void png_do_unshift PNGARG((png_row_infop row_info,
-                                          png_bytep row, png_const_color_8p sig_bits));
-#endif
-
-#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
-   PNG_EXTERN void png_do_invert PNGARG((png_row_infop row_info,
-                                         png_bytep row));
-#endif
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-   PNG_EXTERN void png_do_scale_16_to_8 PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-   PNG_EXTERN void png_do_chop PNGARG((png_row_infop row_info,
-                                       png_bytep row));
-#endif
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   PNG_EXTERN void png_do_quantize PNGARG((png_row_infop row_info,
-                                           png_bytep row, png_const_bytep palette_lookup,
-                                           png_const_bytep quantize_lookup));
-
-#  ifdef PNG_CORRECT_PALETTE_SUPPORTED
-   PNG_EXTERN void png_correct_palette PNGARG((png_structp png_ptr,
-         png_colorp palette, int num_palette));
-#  endif
-#endif
-
-#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
-   PNG_EXTERN void png_do_bgr PNGARG((png_row_infop row_info,
-                                      png_bytep row));
-#endif
-
-#ifdef PNG_WRITE_PACK_SUPPORTED
-   PNG_EXTERN void png_do_pack PNGARG((png_row_infop row_info,
-                                       png_bytep row, png_uint_32 bit_depth));
-#endif
-
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-   PNG_EXTERN void png_do_shift PNGARG((png_row_infop row_info,
-                                        png_bytep row, png_const_color_8p bit_depth));
-#endif
-
-#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\
-    defined(PNG_READ_ALPHA_MODE_SUPPORTED)
-   PNG_EXTERN void png_do_compose PNGARG((png_row_infop row_info,
-                                          png_bytep row, png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   PNG_EXTERN void png_do_gamma PNGARG((png_row_infop row_info,
-                                        png_bytep row, png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_ALPHA_MODE_SUPPORTED
-   PNG_EXTERN void png_do_encode_alpha PNGARG((png_row_infop row_info,
-         png_bytep row, png_structp png_ptr));
-#endif
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   PNG_EXTERN void png_do_expand_palette PNGARG((png_row_infop row_info,
-         png_bytep row, png_const_colorp palette, png_const_bytep trans,
-         int num_trans));
-   PNG_EXTERN void png_do_expand PNGARG((png_row_infop row_info,
-                                         png_bytep row, png_const_color_16p trans_color));
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   PNG_EXTERN void png_do_expand_16 PNGARG((png_row_infop row_info,
-                                           png_bytep row));
-#endif
-
-   /* The following decodes the appropriate chunks, and does error correction,
-    * then calls the appropriate callback for the chunk if it is valid.
-    */
-
-   /* Decode the IHDR chunk */
-   PNG_EXTERN void png_handle_IHDR PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-   PNG_EXTERN void png_handle_PLTE PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-   PNG_EXTERN void png_handle_IEND PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-   PNG_EXTERN void png_handle_bKGD PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-   PNG_EXTERN void png_handle_cHRM PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-   PNG_EXTERN void png_handle_gAMA PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-   PNG_EXTERN void png_handle_hIST PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-   PNG_EXTERN void png_handle_iCCP PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif /* PNG_READ_iCCP_SUPPORTED */
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-   PNG_EXTERN void png_handle_iTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-   PNG_EXTERN void png_handle_oFFs PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-   PNG_EXTERN void png_handle_pCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-   PNG_EXTERN void png_handle_pHYs PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-   PNG_EXTERN void png_handle_sBIT PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-   PNG_EXTERN void png_handle_sCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-   PNG_EXTERN void png_handle_sPLT PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif /* PNG_READ_sPLT_SUPPORTED */
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-   PNG_EXTERN void png_handle_sRGB PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-   PNG_EXTERN void png_handle_tEXt PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-   PNG_EXTERN void png_handle_tIME PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-   PNG_EXTERN void png_handle_tRNS PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-   PNG_EXTERN void png_handle_zTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
-                                           png_uint_32 length));
-#endif
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   PNG_EXTERN void png_handle_unknown PNGARG((png_structp png_ptr,
-         png_infop info_ptr, png_uint_32 length));
-#endif
-
-   PNG_EXTERN void png_check_chunk_name PNGARG((png_structp png_ptr,
-         png_uint_32 chunk_name));
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-   /* Exactly as png_handle_as_unknown() except that the argument is a 32-bit chunk
-    * name, not a string.
-    */
-   PNG_EXTERN int png_chunk_unknown_handling PNGARG((png_structp png_ptr,
-         png_uint_32 chunk_name));
-#endif
-
-   /* Handle the transformations for reading and writing */
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   PNG_EXTERN void png_do_read_transformations PNGARG((png_structp png_ptr,
-         png_row_infop row_info));
-#endif
-#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-   PNG_EXTERN void png_do_write_transformations PNGARG((png_structp png_ptr,
-         png_row_infop row_info));
-#endif
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   PNG_EXTERN void png_init_read_transformations PNGARG((png_structp png_ptr));
-#endif
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   PNG_EXTERN void png_push_read_chunk PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_push_read_sig PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_push_check_crc PNGARG((png_structp png_ptr));
-   PNG_EXTERN void png_push_crc_skip PNGARG((png_structp png_ptr,
-         png_uint_32 length));
-   PNG_EXTERN void png_push_crc_finish PNGARG((png_structp png_ptr));
-   PNG_EXTERN void png_push_save_buffer PNGARG((png_structp png_ptr));
-   PNG_EXTERN void png_push_restore_buffer PNGARG((png_structp png_ptr,
-         png_bytep buffer, png_size_t buffer_length));
-   PNG_EXTERN void png_push_read_IDAT PNGARG((png_structp png_ptr));
-   PNG_EXTERN void png_process_IDAT_data PNGARG((png_structp png_ptr,
-         png_bytep buffer, png_size_t buffer_length));
-   PNG_EXTERN void png_push_process_row PNGARG((png_structp png_ptr));
-   PNG_EXTERN void png_push_handle_unknown PNGARG((png_structp png_ptr,
-         png_infop info_ptr, png_uint_32 length));
-   PNG_EXTERN void png_push_have_info PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_push_have_end PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_push_have_row PNGARG((png_structp png_ptr, png_bytep row));
-   PNG_EXTERN void png_push_read_end PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_process_some_data PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-   PNG_EXTERN void png_read_push_finish_row PNGARG((png_structp png_ptr));
-#  ifdef PNG_READ_tEXt_SUPPORTED
-   PNG_EXTERN void png_push_handle_tEXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr, png_uint_32 length));
-   PNG_EXTERN void png_push_read_tEXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-#  endif
-#  ifdef PNG_READ_zTXt_SUPPORTED
-   PNG_EXTERN void png_push_handle_zTXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr, png_uint_32 length));
-   PNG_EXTERN void png_push_read_zTXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-#  endif
-#  ifdef PNG_READ_iTXt_SUPPORTED
-   PNG_EXTERN void png_push_handle_iTXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr, png_uint_32 length));
-   PNG_EXTERN void png_push_read_iTXt PNGARG((png_structp png_ptr,
-         png_infop info_ptr));
-#  endif
-
-#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   PNG_EXTERN void png_do_read_intrapixel PNGARG((png_row_infop row_info,
-         png_bytep row));
-   PNG_EXTERN void png_do_write_intrapixel PNGARG((png_row_infop row_info,
-         png_bytep row));
-#endif
-
-   /* Added at libpng version 1.4.0 */
-#ifdef PNG_CHECK_cHRM_SUPPORTED
-   PNG_EXTERN int png_check_cHRM_fixed PNGARG((png_structp png_ptr,
-         png_fixed_point int_white_x, png_fixed_point int_white_y,
-         png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
-         int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
-         png_fixed_point int_blue_y));
-#endif
-
-#ifdef PNG_CHECK_cHRM_SUPPORTED
-   /* Added at libpng version 1.2.34 and 1.4.0 */
-   /* Currently only used by png_check_cHRM_fixed */
-   PNG_EXTERN void png_64bit_product PNGARG((long v1, long v2,
-         unsigned long *hi_product, unsigned long *lo_product));
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-   /* Added at libpng version 1.5.5 */
-   typedef struct png_xy
-   {
-      png_fixed_point redx, redy;
-      png_fixed_point greenx, greeny;
-      png_fixed_point bluex, bluey;
-      png_fixed_point whitex, whitey;
-   } png_xy;
-
-   typedef struct png_XYZ
-   {
-      png_fixed_point redX, redY, redZ;
-      png_fixed_point greenX, greenY, greenZ;
-      png_fixed_point blueX, blueY, blueZ;
-   } png_XYZ;
-
-   /* The conversion APIs return 0 on success, non-zero on a parameter error. They
-    * allow conversion between the above representations of a color encoding.  When
-    * converting from XYZ end points to chromaticities the absolute magnitude of
-    * the end points is lost, when converting back the sum of the Y values of the
-    * three end points will be 1.0
-    */
-   PNG_EXTERN int png_xy_from_XYZ PNGARG((png_xy *xy, png_XYZ XYZ));
-   PNG_EXTERN int png_XYZ_from_xy PNGARG((png_XYZ *XYZ, png_xy xy));
-   PNG_EXTERN int png_XYZ_from_xy_checked PNGARG((png_structp png_ptr,
-         png_XYZ *XYZ, png_xy xy));
-#endif
-
-   /* Added at libpng version 1.4.0 */
-   PNG_EXTERN void png_check_IHDR PNGARG((png_structp png_ptr,
-                                          png_uint_32 width, png_uint_32 height, int bit_depth,
-                                          int color_type, int interlace_type, int compression_type,
-                                          int filter_type));
-
-   /* Added at libpng version 1.5.10 */
-#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \
-    defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED)
-   PNG_EXTERN void png_do_check_palette_indexes PNGARG((png_structp png_ptr,
-         png_row_infop row_info));
-#endif
-
-   /* Free all memory used by the read (old method - NOT DLL EXPORTED) */
-   PNG_EXTERN void png_read_destroy PNGARG((png_structp png_ptr,
-                                           png_infop info_ptr, png_infop end_info_ptr));
-
-   /* Free any memory used in png_ptr struct (old method - NOT DLL EXPORTED) */
-   PNG_EXTERN void png_write_destroy PNGARG((png_structp png_ptr));
-
-#ifdef USE_FAR_KEYWORD  /* memory model conversion function */
-   PNG_EXTERN void *png_far_to_near PNGARG((png_structp png_ptr, png_voidp ptr,
-                                           int check));
-#endif /* USE_FAR_KEYWORD */
-
-#if defined(PNG_FLOATING_POINT_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)
-   PNG_EXTERN PNG_FUNCTION(void, png_fixed_error, (png_structp png_ptr,
-                           png_const_charp name),PNG_NORETURN);
-#endif
-
-   /* Puts 'string' into 'buffer' at buffer[pos], taking care never to overwrite
-    * the end.  Always leaves the buffer nul terminated.  Never errors out (and
-    * there is no error code.)
-    */
-   PNG_EXTERN size_t png_safecat(png_charp buffer, size_t bufsize, size_t pos,
-                                 png_const_charp string);
-
-   /* Various internal functions to handle formatted warning messages, currently
-    * only implemented for warnings.
-    */
-#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED)
-   /* Utility to dump an unsigned value into a buffer, given a start pointer and
-    * and end pointer (which should point just *beyond* the end of the buffer!)
-    * Returns the pointer to the start of the formatted string.  This utility only
-    * does unsigned values.
-    */
-   PNG_EXTERN png_charp png_format_number(png_const_charp start, png_charp end,
-                                          int format, png_alloc_size_t number);
-
-   /* Convenience macro that takes an array: */
-#define PNG_FORMAT_NUMBER(buffer,format,number) \
-   png_format_number(buffer, buffer + (sizeof buffer), format, number)
-
-   /* Suggested size for a number buffer (enough for 64 bits and a sign!) */
-#define PNG_NUMBER_BUFFER_SIZE 24
-
-   /* These are the integer formats currently supported, the name is formed from
-    * the standard printf(3) format string.
-    */
-#define PNG_NUMBER_FORMAT_u     1 /* chose unsigned API! */
-#define PNG_NUMBER_FORMAT_02u   2
-#define PNG_NUMBER_FORMAT_d     1 /* chose signed API! */
-#define PNG_NUMBER_FORMAT_02d   2
-#define PNG_NUMBER_FORMAT_x     3
-#define PNG_NUMBER_FORMAT_02x   4
-#define PNG_NUMBER_FORMAT_fixed 5 /* choose the signed API */
-#endif
-
-#ifdef PNG_WARNINGS_SUPPORTED
-   /* New defines and members adding in libpng-1.5.4 */
-#  define PNG_WARNING_PARAMETER_SIZE 32
-#  define PNG_WARNING_PARAMETER_COUNT 8
-
-   /* An l-value of this type has to be passed to the APIs below to cache the
-    * values of the parameters to a formatted warning message.
-    */
-   typedef char png_warning_parameters[PNG_WARNING_PARAMETER_COUNT][
-      PNG_WARNING_PARAMETER_SIZE];
-
-   PNG_EXTERN void png_warning_parameter(png_warning_parameters p, int number,
-                                         png_const_charp string);
-   /* Parameters are limited in size to PNG_WARNING_PARAMETER_SIZE characters,
-    * including the trailing '\0'.
-    */
-   PNG_EXTERN void png_warning_parameter_unsigned(png_warning_parameters p,
-         int number, int format, png_alloc_size_t value);
-   /* Use png_alloc_size_t because it is an unsigned type as big as any we
-    * need to output.  Use the following for a signed value.
-    */
-   PNG_EXTERN void png_warning_parameter_signed(png_warning_parameters p,
-         int number, int format, png_int_32 value);
-
-   PNG_EXTERN void png_formatted_warning(png_structp png_ptr,
-                                         png_warning_parameters p, png_const_charp message);
-   /* 'message' follows the X/Open approach of using @1, @2 to insert
-    * parameters previously supplied using the above functions.  Errors in
-    * specifying the paramters will simple result in garbage substitutions.
-    */
-#endif
-
-   /* ASCII to FP interfaces, currently only implemented if sCAL
-    * support is required.
-    */
-#if defined(PNG_READ_sCAL_SUPPORTED)
-   /* MAX_DIGITS is actually the maximum number of characters in an sCAL
-    * width or height, derived from the precision (number of significant
-    * digits - a build time settable option) and assumpitions about the
-    * maximum ridiculous exponent.
-    */
-#define PNG_sCAL_MAX_DIGITS (PNG_sCAL_PRECISION+1/*.*/+1/*E*/+10/*exponent*/)
-
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-   PNG_EXTERN void png_ascii_from_fp PNGARG((png_structp png_ptr, png_charp ascii,
-         png_size_t size, double fp, unsigned int precision));
-#endif /* FLOATING_POINT */
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-   PNG_EXTERN void png_ascii_from_fixed PNGARG((png_structp png_ptr,
-         png_charp ascii, png_size_t size, png_fixed_point fp));
-#endif /* FIXED_POINT */
-#endif /* READ_sCAL */
-
-#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED)
-   /* An internal API to validate the format of a floating point number.
-    * The result is the index of the next character.  If the number is
-    * not valid it will be the index of a character in the supposed number.
-    *
-    * The format of a number is defined in the PNG extensions specification
-    * and this API is strictly conformant to that spec, not anyone elses!
-    *
-    * The format as a regular expression is:
-    *
-    * [+-]?[0-9]+.?([Ee][+-]?[0-9]+)?
-    *
-    * or:
-    *
-    * [+-]?.[0-9]+(.[0-9]+)?([Ee][+-]?[0-9]+)?
-    *
-    * The complexity is that either integer or fraction must be present and the
-    * fraction is permitted to have no digits only if the integer is present.
-    *
-    * NOTE: The dangling E problem.
-    *   There is a PNG valid floating point number in the following:
-    *
-    *       PNG floating point numb1.ers are not greedy.
-    *
-    *   Working this out requires *TWO* character lookahead (because of the
-    *   sign), the parser does not do this - it will fail at the 'r' - this
-    *   doesn't matter for PNG sCAL chunk values, but it requires more care
-    *   if the value were ever to be embedded in something more complex.  Use
-    *   ANSI-C strtod if you need the lookahead.
-    */
-   /* State table for the parser. */
-#define PNG_FP_INTEGER    0  /* before or in integer */
-#define PNG_FP_FRACTION   1  /* before or in fraction */
-#define PNG_FP_EXPONENT   2  /* before or in exponent */
-#define PNG_FP_STATE      3  /* mask for the above */
-#define PNG_FP_SAW_SIGN   4  /* Saw +/- in current state */
-#define PNG_FP_SAW_DIGIT  8  /* Saw a digit in current state */
-#define PNG_FP_SAW_DOT   16  /* Saw a dot in current state */
-#define PNG_FP_SAW_E     32  /* Saw an E (or e) in current state */
-#define PNG_FP_SAW_ANY   60  /* Saw any of the above 4 */
-
-   /* These three values don't affect the parser.  They are set but not used.
-    */
-#define PNG_FP_WAS_VALID 64  /* Preceding substring is a valid fp number */
-#define PNG_FP_NEGATIVE 128  /* A negative number, including "-0" */
-#define PNG_FP_NONZERO  256  /* A non-zero value */
-#define PNG_FP_STICKY   448  /* The above three flags */
-
-   /* This is available for the caller to store in 'state' if required.  Do not
-    * call the parser after setting it (the parser sometimes clears it.)
-    */
-#define PNG_FP_INVALID  512  /* Available for callers as a distinct value */
-
-   /* Result codes for the parser (boolean - true meants ok, false means
-    * not ok yet.)
-    */
-#define PNG_FP_MAYBE      0  /* The number may be valid in the future */
-#define PNG_FP_OK         1  /* The number is valid */
-
-   /* Tests on the sticky non-zero and negative flags.  To pass these checks
-    * the state must also indicate that the whole number is valid - this is
-    * achieved by testing PNG_FP_SAW_DIGIT (see the implementation for why this
-    * is equivalent to PNG_FP_OK above.)
-    */
-#define PNG_FP_NZ_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NEGATIVE | PNG_FP_NONZERO)
-   /* NZ_MASK: the string is valid and a non-zero negative value */
-#define PNG_FP_Z_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NONZERO)
-   /* Z MASK: the string is valid and a non-zero value. */
-   /* PNG_FP_SAW_DIGIT: the string is valid. */
-#define PNG_FP_IS_ZERO(state) (((state) & PNG_FP_Z_MASK) == PNG_FP_SAW_DIGIT)
-#define PNG_FP_IS_POSITIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_Z_MASK)
-#define PNG_FP_IS_NEGATIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_NZ_MASK)
-
-   /* The actual parser.  This can be called repeatedly, it updates
-    * the index into the string and the state variable (which must
-    * be initialzed to 0).  It returns a result code, as above.  There
-    * is no point calling the parser any more if it fails to advance to
-    * the end of the string - it is stuck on an invalid character (or
-    * terminated by '\0').
-    *
-    * Note that the pointer will consume an E or even an E+ then leave
-    * a 'maybe' state even though a preceding integer.fraction is valid.
-    * The PNG_FP_WAS_VALID flag indicates that a preceding substring was
-    * a valid number.  It's possible to recover from this by calling
-    * the parser again (from the start, with state 0) but with a string
-    * that omits the last character (i.e. set the size to the index of
-    * the problem character.)  This has not been tested within libpng.
-    */
-   PNG_EXTERN int png_check_fp_number PNGARG((png_const_charp string,
-         png_size_t size, int *statep, png_size_tp whereami));
-
-   /* This is the same but it checks a complete string and returns true
-    * only if it just contains a floating point number.  As of 1.5.4 this
-    * function also returns the state at the end of parsing the number if
-    * it was valid (otherwise it returns 0.)  This can be used for testing
-    * for negative or zero values using the sticky flag.
-    */
-   PNG_EXTERN int png_check_fp_string PNGARG((png_const_charp string,
-         png_size_t size));
-#endif /* pCAL || sCAL */
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) ||\
-    defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED)
-   /* Added at libpng version 1.5.0 */
-   /* This is a utility to provide a*times/div (rounded) and indicate
-    * if there is an overflow.  The result is a boolean - false (0)
-    * for overflow, true (1) if no overflow, in which case *res
-    * holds the result.
-    */
-   PNG_EXTERN int png_muldiv PNGARG((png_fixed_point_p res, png_fixed_point a,
-                                     png_int_32 multiplied_by, png_int_32 divided_by));
-#endif
-
-#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED)
-   /* Same deal, but issue a warning on overflow and return 0. */
-   PNG_EXTERN png_fixed_point png_muldiv_warn PNGARG((png_structp png_ptr,
-         png_fixed_point a, png_int_32 multiplied_by, png_int_32 divided_by));
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   /* Calculate a reciprocal - used for gamma values.  This returns
-    * 0 if the argument is 0 in order to maintain an undefined value,
-    * there are no warnings.
-    */
-   PNG_EXTERN png_fixed_point png_reciprocal PNGARG((png_fixed_point a));
-
-   /* The same but gives a reciprocal of the product of two fixed point
-    * values.  Accuracy is suitable for gamma calculations but this is
-    * not exact - use png_muldiv for that.
-    */
-   PNG_EXTERN png_fixed_point png_reciprocal2 PNGARG((png_fixed_point a,
-         png_fixed_point b));
-#endif
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   /* Internal fixed point gamma correction.  These APIs are called as
-    * required to convert single values - they don't need to be fast,
-    * they are not used when processing image pixel values.
-    *
-    * While the input is an 'unsigned' value it must actually be the
-    * correct bit value - 0..255 or 0..65535 as required.
-    */
-   PNG_EXTERN png_uint_16 png_gamma_correct PNGARG((png_structp png_ptr,
-         unsigned int value, png_fixed_point gamma_value));
-   PNG_EXTERN int png_gamma_significant PNGARG((png_fixed_point gamma_value));
-   PNG_EXTERN png_uint_16 png_gamma_16bit_correct PNGARG((unsigned int value,
-         png_fixed_point gamma_value));
-   PNG_EXTERN png_byte png_gamma_8bit_correct PNGARG((unsigned int value,
-         png_fixed_point gamma_value));
-   PNG_EXTERN void png_destroy_gamma_table(png_structp png_ptr);
-   PNG_EXTERN void png_build_gamma_table PNGARG((png_structp png_ptr,
-         int bit_depth));
-#endif
-
-   /* Maintainer: Put new private prototypes here ^ and in libpngpf.3 */
-
-#include "pngdebug.h"
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/reg-io/png/lpng1510/pngread.c b/reg-io/png/lpng1510/pngread.c
deleted file mode 100644
index 96a2a566..00000000
--- a/reg-io/png/lpng1510/pngread.c
+++ /dev/null
@@ -1,1305 +0,0 @@
-
-/* pngread.c - read a PNG file
- *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file contains routines that an application calls directly to
- * read a PNG file or stream.
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-/* Create a PNG structure for reading, and allocate any memory needed. */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_read_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
-{
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   return (png_create_read_struct_2(user_png_ver, error_ptr, error_fn,
-       warn_fn, NULL, NULL, NULL));
-}
-
-/* Alternate create PNG structure for reading, and allocate any memory
- * needed.
- */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
-{
-#endif /* PNG_USER_MEM_SUPPORTED */
-
-#ifdef PNG_SETJMP_SUPPORTED
-   volatile
-#endif
-   png_structp png_ptr;
-   volatile int png_cleanup_needed = 0;
-
-#ifdef PNG_SETJMP_SUPPORTED
-#ifdef USE_FAR_KEYWORD
-   jmp_buf tmp_jmpbuf;
-#endif
-#endif
-
-   png_debug(1, "in png_create_read_struct");
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
-       malloc_fn, mem_ptr);
-#else
-   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
-#endif
-   if (png_ptr == NULL)
-      return (NULL);
-
-   /* Added at libpng-1.2.6 */
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   png_ptr->user_width_max = PNG_USER_WIDTH_MAX;
-   png_ptr->user_height_max = PNG_USER_HEIGHT_MAX;
-
-   /* Added at libpng-1.2.43 and 1.4.0 */
-   png_ptr->user_chunk_cache_max = PNG_USER_CHUNK_CACHE_MAX;
-
-   /* Added at libpng-1.2.43 and 1.4.1 */
-   png_ptr->user_chunk_malloc_max = PNG_USER_CHUNK_MALLOC_MAX;
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-/* Applications that neglect to set up their own setjmp() and then
- * encounter a png_error() will longjmp here.  Since the jmpbuf is
- * then meaningless we abort instead of returning.
- */
-#ifdef USE_FAR_KEYWORD
-   if (setjmp(tmp_jmpbuf))
-#else
-   if (setjmp(png_jmpbuf(png_ptr))) /* Sets longjmp to match setjmp */
-#endif
-      PNG_ABORT();
-#ifdef USE_FAR_KEYWORD
-   png_memcpy(png_jmpbuf(png_ptr), tmp_jmpbuf, png_sizeof(jmp_buf));
-#endif
-#endif /* PNG_SETJMP_SUPPORTED */
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
-#endif
-
-   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
-
-   /* Call the general version checker (shared with read and write code): */
-   if (!png_user_version_check(png_ptr, user_png_ver))
-      png_cleanup_needed = 1;
-
-   if (!png_cleanup_needed)
-   {
-   /* Initialize zbuf - compression buffer */
-   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
-   png_ptr->zbuf = (png_bytep)png_malloc_warn(png_ptr, png_ptr->zbuf_size);
-
-   if (png_ptr->zbuf == NULL)
-      png_cleanup_needed = 1;
-   }
-
-   png_ptr->zstream.zalloc = png_zalloc;
-   png_ptr->zstream.zfree = png_zfree;
-   png_ptr->zstream.opaque = (voidpf)png_ptr;
-
-   if (!png_cleanup_needed)
-   {
-      switch (inflateInit(&png_ptr->zstream))
-      {
-         case Z_OK:
-            break; /* Do nothing */
-
-         case Z_MEM_ERROR:
-            png_warning(png_ptr, "zlib memory error");
-            png_cleanup_needed = 1;
-            break;
-
-         case Z_STREAM_ERROR:
-            png_warning(png_ptr, "zlib stream error");
-            png_cleanup_needed = 1;
-            break;
-
-         case Z_VERSION_ERROR:
-            png_warning(png_ptr, "zlib version error");
-            png_cleanup_needed = 1;
-            break;
-
-         default: png_warning(png_ptr, "Unknown zlib error");
-            png_cleanup_needed = 1;
-      }
-   }
-
-   if (png_cleanup_needed)
-   {
-      /* Clean up PNG structure and deallocate any memory. */
-      png_free(png_ptr, png_ptr->zbuf);
-      png_ptr->zbuf = NULL;
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)png_ptr,
-          (png_free_ptr)free_fn, (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)png_ptr);
-#endif
-      return (NULL);
-   }
-
-   png_ptr->zstream.next_out = png_ptr->zbuf;
-   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-
-   png_set_read_fn(png_ptr, NULL, NULL);
-
-
-   return (png_ptr);
-}
-
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the information before the actual image data.  This has been
- * changed in v0.90 to allow reading a file that already has the magic
- * bytes read from the stream.  You can tell libpng how many bytes have
- * been read from the beginning of the stream (up to the maximum of 8)
- * via png_set_sig_bytes(), and we will only check the remaining bytes
- * here.  The application can then have access to the signature bytes we
- * read if it is determined that this isn't a valid PNG file.
- */
-void PNGAPI
-png_read_info(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_read_info");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Read and check the PNG file signature. */
-   png_read_sig(png_ptr, info_ptr);
-
-   for (;;)
-   {
-      png_uint_32 length = png_read_chunk_header(png_ptr);
-      png_uint_32 chunk_name = png_ptr->chunk_name;
-
-      /* This should be a binary subdivision search or a hash for
-       * matching the chunk name rather than a linear search.
-       */
-      if (chunk_name == png_IDAT)
-         if (png_ptr->mode & PNG_AFTER_IDAT)
-            png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
-
-      if (chunk_name == png_IHDR)
-         png_handle_IHDR(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IEND)
-         png_handle_IEND(png_ptr, info_ptr, length);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-      else if (png_chunk_unknown_handling(png_ptr, chunk_name) !=
-         PNG_HANDLE_CHUNK_AS_DEFAULT)
-      {
-         if (chunk_name == png_IDAT)
-            png_ptr->mode |= PNG_HAVE_IDAT;
-
-         png_handle_unknown(png_ptr, info_ptr, length);
-
-         if (chunk_name == png_PLTE)
-            png_ptr->mode |= PNG_HAVE_PLTE;
-
-         else if (chunk_name == png_IDAT)
-         {
-            if (!(png_ptr->mode & PNG_HAVE_IHDR))
-               png_error(png_ptr, "Missing IHDR before IDAT");
-
-            else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-                !(png_ptr->mode & PNG_HAVE_PLTE))
-               png_error(png_ptr, "Missing PLTE before IDAT");
-
-            break;
-         }
-      }
-#endif
-      else if (chunk_name == png_PLTE)
-         png_handle_PLTE(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IDAT)
-      {
-         if (!(png_ptr->mode & PNG_HAVE_IHDR))
-            png_error(png_ptr, "Missing IHDR before IDAT");
-
-         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-             !(png_ptr->mode & PNG_HAVE_PLTE))
-            png_error(png_ptr, "Missing PLTE before IDAT");
-
-         png_ptr->idat_size = length;
-         png_ptr->mode |= PNG_HAVE_IDAT;
-         break;
-      }
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-      else if (chunk_name == png_bKGD)
-         png_handle_bKGD(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-      else if (chunk_name == png_cHRM)
-         png_handle_cHRM(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-      else if (chunk_name == png_gAMA)
-         png_handle_gAMA(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-      else if (chunk_name == png_hIST)
-         png_handle_hIST(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-      else if (chunk_name == png_oFFs)
-         png_handle_oFFs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-      else if (chunk_name == png_pCAL)
-         png_handle_pCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-      else if (chunk_name == png_sCAL)
-         png_handle_sCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-      else if (chunk_name == png_pHYs)
-         png_handle_pHYs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-      else if (chunk_name == png_sBIT)
-         png_handle_sBIT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-      else if (chunk_name == png_sRGB)
-         png_handle_sRGB(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-      else if (chunk_name == png_iCCP)
-         png_handle_iCCP(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-      else if (chunk_name == png_sPLT)
-         png_handle_sPLT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-      else if (chunk_name == png_tEXt)
-         png_handle_tEXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-      else if (chunk_name == png_tIME)
-         png_handle_tIME(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-      else if (chunk_name == png_tRNS)
-         png_handle_tRNS(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-      else if (chunk_name == png_zTXt)
-         png_handle_zTXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-      else if (chunk_name == png_iTXt)
-         png_handle_iTXt(png_ptr, info_ptr, length);
-#endif
-
-      else
-         png_handle_unknown(png_ptr, info_ptr, length);
-   }
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-/* Optional call to update the users info_ptr structure */
-void PNGAPI
-png_read_update_info(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_read_update_info");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_read_start_row(png_ptr);
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   png_read_transform_info(png_ptr, info_ptr);
-#else
-   PNG_UNUSED(info_ptr)
-#endif
-}
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Initialize palette, background, etc, after transformations
- * are set, but before any reading takes place.  This allows
- * the user to obtain a gamma-corrected palette, for example.
- * If the user doesn't call this, we will do it ourselves.
- */
-void PNGAPI
-png_start_read_image(png_structp png_ptr)
-{
-   png_debug(1, "in png_start_read_image");
-
-   if (png_ptr != NULL)
-     png_read_start_row(png_ptr);
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-void PNGAPI
-png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
-{
-   int ret;
-
-   png_row_info row_info;
-
-   if (png_ptr == NULL)
-      return;
-
-   png_debug2(1, "in png_read_row (row %lu, pass %d)",
-       (unsigned long)png_ptr->row_number, png_ptr->pass);
-
-   /* png_read_start_row sets the information (in particular iwidth) for this
-    * interlace pass.
-    */
-   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
-      png_read_start_row(png_ptr);
-
-   /* 1.5.6: row_info moved out of png_struct to a local here. */
-   row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */
-   row_info.color_type = png_ptr->color_type;
-   row_info.bit_depth = png_ptr->bit_depth;
-   row_info.channels = png_ptr->channels;
-   row_info.pixel_depth = png_ptr->pixel_depth;
-   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
-
-   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
-   {
-   /* Check for transforms that have been set but were defined out */
-#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
-   if (png_ptr->transformations & PNG_INVERT_MONO)
-      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
-   if (png_ptr->transformations & PNG_FILLER)
-      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
-    !defined(PNG_READ_PACKSWAP_SUPPORTED)
-   if (png_ptr->transformations & PNG_PACKSWAP)
-      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
-   if (png_ptr->transformations & PNG_PACK)
-      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
-   if (png_ptr->transformations & PNG_SHIFT)
-      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
-   if (png_ptr->transformations & PNG_BGR)
-      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined");
-#endif
-
-#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
-   if (png_ptr->transformations & PNG_SWAP_BYTES)
-      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined");
-#endif
-   }
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* If interlaced and we do not need a new row, combine row and return.
-    * Notice that the pixels we have from previous rows have been transformed
-    * already; we can only combine like with like (transformed or
-    * untransformed) and, because of the libpng API for interlaced images, this
-    * means we must transform before de-interlacing.
-    */
-   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
-   {
-      switch (png_ptr->pass)
-      {
-         case 0:
-            if (png_ptr->row_number & 0x07)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 1:
-            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 2:
-            if ((png_ptr->row_number & 0x07) != 4)
-            {
-               if (dsp_row != NULL && (png_ptr->row_number & 4))
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 3:
-            if ((png_ptr->row_number & 3) || png_ptr->width < 3)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 4:
-            if ((png_ptr->row_number & 3) != 2)
-            {
-               if (dsp_row != NULL && (png_ptr->row_number & 2))
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-         case 5:
-            if ((png_ptr->row_number & 1) || png_ptr->width < 2)
-            {
-               if (dsp_row != NULL)
-                  png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         default:
-         case 6:
-            if (!(png_ptr->row_number & 1))
-            {
-               png_read_finish_row(png_ptr);
-               return;
-            }
-            break;
-      }
-   }
-#endif
-
-   if (!(png_ptr->mode & PNG_HAVE_IDAT))
-      png_error(png_ptr, "Invalid attempt to read row data");
-
-   png_ptr->zstream.next_out = png_ptr->row_buf;
-   png_ptr->zstream.avail_out =
-       (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth,
-       png_ptr->iwidth) + 1);
-
-   do
-   {
-      if (!(png_ptr->zstream.avail_in))
-      {
-         while (!png_ptr->idat_size)
-         {
-            png_crc_finish(png_ptr, 0);
-
-            png_ptr->idat_size = png_read_chunk_header(png_ptr);
-            if (png_ptr->chunk_name != png_IDAT)
-               png_error(png_ptr, "Not enough image data");
-         }
-         png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
-         png_ptr->zstream.next_in = png_ptr->zbuf;
-         if (png_ptr->zbuf_size > png_ptr->idat_size)
-            png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
-         png_crc_read(png_ptr, png_ptr->zbuf,
-             (png_size_t)png_ptr->zstream.avail_in);
-         png_ptr->idat_size -= png_ptr->zstream.avail_in;
-      }
-
-      ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
-
-      if (ret == Z_STREAM_END)
-      {
-         if (png_ptr->zstream.avail_out || png_ptr->zstream.avail_in ||
-            png_ptr->idat_size)
-            png_benign_error(png_ptr, "Extra compressed data");
-         png_ptr->mode |= PNG_AFTER_IDAT;
-         png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
-         break;
-      }
-
-      if (ret != Z_OK)
-         png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
-             "Decompression error");
-
-   } while (png_ptr->zstream.avail_out);
-
-   if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
-   {
-      if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
-         png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
-            png_ptr->prev_row + 1, png_ptr->row_buf[0]);
-      else
-         png_error(png_ptr, "bad adaptive filter value");
-   }
-
-   /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before
-    * 1.5.6, while the buffer really is this big in current versions of libpng
-    * it may not be in the future, so this was changed just to copy the
-    * interlaced count:
-    */
-   png_memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1);
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
-       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
-   {
-      /* Intrapixel differencing */
-      png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1);
-   }
-#endif
-
-
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   if (png_ptr->transformations)
-      png_do_read_transformations(png_ptr, &row_info);
-#endif
-
-   /* The transformed pixel depth should match the depth now in row_info. */
-   if (png_ptr->transformed_pixel_depth == 0)
-   {
-      png_ptr->transformed_pixel_depth = row_info.pixel_depth;
-      if (row_info.pixel_depth > png_ptr->maximum_pixel_depth)
-         png_error(png_ptr, "sequential row overflow");
-   }
-
-   else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth)
-      png_error(png_ptr, "internal sequential row size calculation error");
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Blow up interlaced rows to full size */
-   if (png_ptr->interlaced &&
-      (png_ptr->transformations & PNG_INTERLACE))
-   {
-      if (png_ptr->pass < 6)
-         png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass,
-            png_ptr->transformations);
-
-      if (dsp_row != NULL)
-         png_combine_row(png_ptr, dsp_row, 1/*display*/);
-
-      if (row != NULL)
-         png_combine_row(png_ptr, row, 0/*row*/);
-   }
-
-   else
-#endif
-   {
-      if (row != NULL)
-         png_combine_row(png_ptr, row, -1/*ignored*/);
-
-      if (dsp_row != NULL)
-         png_combine_row(png_ptr, dsp_row, -1/*ignored*/);
-   }
-   png_read_finish_row(png_ptr);
-
-   if (png_ptr->read_row_fn != NULL)
-      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read one or more rows of image data.  If the image is interlaced,
- * and png_set_interlace_handling() has been called, the rows need to
- * contain the contents of the rows from the previous pass.  If the
- * image has alpha or transparency, and png_handle_alpha()[*] has been
- * called, the rows contents must be initialized to the contents of the
- * screen.
- *
- * "row" holds the actual image, and pixels are placed in it
- * as they arrive.  If the image is displayed after each pass, it will
- * appear to "sparkle" in.  "display_row" can be used to display a
- * "chunky" progressive image, with finer detail added as it becomes
- * available.  If you do not want this "chunky" display, you may pass
- * NULL for display_row.  If you do not want the sparkle display, and
- * you have not called png_handle_alpha(), you may pass NULL for rows.
- * If you have called png_handle_alpha(), and the image has either an
- * alpha channel or a transparency chunk, you must provide a buffer for
- * rows.  In this case, you do not have to provide a display_row buffer
- * also, but you may.  If the image is not interlaced, or if you have
- * not called png_set_interlace_handling(), the display_row buffer will
- * be ignored, so pass NULL to it.
- *
- * [*] png_handle_alpha() does not exist yet, as of this version of libpng
- */
-
-void PNGAPI
-png_read_rows(png_structp png_ptr, png_bytepp row,
-    png_bytepp display_row, png_uint_32 num_rows)
-{
-   png_uint_32 i;
-   png_bytepp rp;
-   png_bytepp dp;
-
-   png_debug(1, "in png_read_rows");
-
-   if (png_ptr == NULL)
-      return;
-
-   rp = row;
-   dp = display_row;
-   if (rp != NULL && dp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep rptr = *rp++;
-         png_bytep dptr = *dp++;
-
-         png_read_row(png_ptr, rptr, dptr);
-      }
-
-   else if (rp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep rptr = *rp;
-         png_read_row(png_ptr, rptr, NULL);
-         rp++;
-      }
-
-   else if (dp != NULL)
-      for (i = 0; i < num_rows; i++)
-      {
-         png_bytep dptr = *dp;
-         png_read_row(png_ptr, NULL, dptr);
-         dp++;
-      }
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the entire image.  If the image has an alpha channel or a tRNS
- * chunk, and you have called png_handle_alpha()[*], you will need to
- * initialize the image to the current image that PNG will be overlaying.
- * We set the num_rows again here, in case it was incorrectly set in
- * png_read_start_row() by a call to png_read_update_info() or
- * png_start_read_image() if png_set_interlace_handling() wasn't called
- * prior to either of these functions like it should have been.  You can
- * only call this function once.  If you desire to have an image for
- * each pass of a interlaced image, use png_read_rows() instead.
- *
- * [*] png_handle_alpha() does not exist yet, as of this version of libpng
- */
-void PNGAPI
-png_read_image(png_structp png_ptr, png_bytepp image)
-{
-   png_uint_32 i, image_height;
-   int pass, j;
-   png_bytepp rp;
-
-   png_debug(1, "in png_read_image");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
-   {
-      pass = png_set_interlace_handling(png_ptr);
-      /* And make sure transforms are initialized. */
-      png_start_read_image(png_ptr);
-   }
-   else
-   {
-      if (png_ptr->interlaced && !(png_ptr->transformations & PNG_INTERLACE))
-      {
-         /* Caller called png_start_read_image or png_read_update_info without
-          * first turning on the PNG_INTERLACE transform.  We can fix this here,
-          * but the caller should do it!
-          */
-         png_warning(png_ptr, "Interlace handling should be turned on when "
-            "using png_read_image");
-         /* Make sure this is set correctly */
-         png_ptr->num_rows = png_ptr->height;
-      }
-
-      /* Obtain the pass number, which also turns on the PNG_INTERLACE flag in
-       * the above error case.
-       */
-      pass = png_set_interlace_handling(png_ptr);
-   }
-#else
-   if (png_ptr->interlaced)
-      png_error(png_ptr,
-          "Cannot read interlaced image -- interlace handler disabled");
-
-   pass = 1;
-#endif
-
-   image_height=png_ptr->height;
-
-   for (j = 0; j < pass; j++)
-   {
-      rp = image;
-      for (i = 0; i < image_height; i++)
-      {
-         png_read_row(png_ptr, *rp, NULL);
-         rp++;
-      }
-   }
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-/* Read the end of the PNG file.  Will not read past the end of the
- * file, will verify the end is accurate, and will read any comments
- * or time information at the end of the file, if info is not NULL.
- */
-void PNGAPI
-png_read_end(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_read_end");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_crc_finish(png_ptr, 0); /* Finish off CRC from last IDAT chunk */
-
-#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Report invalid palette index; added at libng-1.5.10 */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-      png_ptr->num_palette_max > png_ptr->num_palette)
-     png_benign_error(png_ptr, "Read palette index exceeding num_palette");
-#endif
-
-   do
-   {
-      png_uint_32 length = png_read_chunk_header(png_ptr);
-      png_uint_32 chunk_name = png_ptr->chunk_name;
-
-      if (chunk_name == png_IHDR)
-         png_handle_IHDR(png_ptr, info_ptr, length);
-
-      else if (chunk_name == png_IEND)
-         png_handle_IEND(png_ptr, info_ptr, length);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-      else if (png_chunk_unknown_handling(png_ptr, chunk_name) !=
-         PNG_HANDLE_CHUNK_AS_DEFAULT)
-      {
-         if (chunk_name == png_IDAT)
-         {
-            if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
-               png_benign_error(png_ptr, "Too many IDATs found");
-         }
-         png_handle_unknown(png_ptr, info_ptr, length);
-         if (chunk_name == png_PLTE)
-            png_ptr->mode |= PNG_HAVE_PLTE;
-      }
-#endif
-
-      else if (chunk_name == png_IDAT)
-      {
-         /* Zero length IDATs are legal after the last IDAT has been
-          * read, but not after other chunks have been read.
-          */
-         if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
-            png_benign_error(png_ptr, "Too many IDATs found");
-
-         png_crc_finish(png_ptr, length);
-      }
-      else if (chunk_name == png_PLTE)
-         png_handle_PLTE(png_ptr, info_ptr, length);
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-      else if (chunk_name == png_bKGD)
-         png_handle_bKGD(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-      else if (chunk_name == png_cHRM)
-         png_handle_cHRM(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-      else if (chunk_name == png_gAMA)
-         png_handle_gAMA(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-      else if (chunk_name == png_hIST)
-         png_handle_hIST(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-      else if (chunk_name == png_oFFs)
-         png_handle_oFFs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-      else if (chunk_name == png_pCAL)
-         png_handle_pCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-      else if (chunk_name == png_sCAL)
-         png_handle_sCAL(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-      else if (chunk_name == png_pHYs)
-         png_handle_pHYs(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-      else if (chunk_name == png_sBIT)
-         png_handle_sBIT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-      else if (chunk_name == png_sRGB)
-         png_handle_sRGB(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-      else if (chunk_name == png_iCCP)
-         png_handle_iCCP(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-      else if (chunk_name == png_sPLT)
-         png_handle_sPLT(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-      else if (chunk_name == png_tEXt)
-         png_handle_tEXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-      else if (chunk_name == png_tIME)
-         png_handle_tIME(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-      else if (chunk_name == png_tRNS)
-         png_handle_tRNS(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-      else if (chunk_name == png_zTXt)
-         png_handle_zTXt(png_ptr, info_ptr, length);
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-      else if (chunk_name == png_iTXt)
-         png_handle_iTXt(png_ptr, info_ptr, length);
-#endif
-
-      else
-         png_handle_unknown(png_ptr, info_ptr, length);
-   } while (!(png_ptr->mode & PNG_HAVE_IEND));
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-/* Free all memory used by the read */
-void PNGAPI
-png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr,
-    png_infopp end_info_ptr_ptr)
-{
-   png_structp png_ptr = NULL;
-   png_infop info_ptr = NULL, end_info_ptr = NULL;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_free_ptr free_fn = NULL;
-   png_voidp mem_ptr = NULL;
-#endif
-
-   png_debug(1, "in png_destroy_read_struct");
-
-   if (png_ptr_ptr != NULL)
-      png_ptr = *png_ptr_ptr;
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   free_fn = png_ptr->free_fn;
-   mem_ptr = png_ptr->mem_ptr;
-#endif
-
-   if (info_ptr_ptr != NULL)
-      info_ptr = *info_ptr_ptr;
-
-   if (end_info_ptr_ptr != NULL)
-      end_info_ptr = *end_info_ptr_ptr;
-
-   png_read_destroy(png_ptr, info_ptr, end_info_ptr);
-
-   if (info_ptr != NULL)
-   {
-#ifdef PNG_TEXT_SUPPORTED
-      png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, -1);
-#endif
-
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
-          (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)info_ptr);
-#endif
-      *info_ptr_ptr = NULL;
-   }
-
-   if (end_info_ptr != NULL)
-   {
-#ifdef PNG_READ_TEXT_SUPPORTED
-      png_free_data(png_ptr, end_info_ptr, PNG_FREE_TEXT, -1);
-#endif
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)end_info_ptr, (png_free_ptr)free_fn,
-          (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)end_info_ptr);
-#endif
-      *end_info_ptr_ptr = NULL;
-   }
-
-   if (png_ptr != NULL)
-   {
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
-          (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)png_ptr);
-#endif
-      *png_ptr_ptr = NULL;
-   }
-}
-
-/* Free all memory used by the read (old method) */
-void /* PRIVATE */
-png_read_destroy(png_structp png_ptr, png_infop info_ptr,
-    png_infop end_info_ptr)
-{
-#ifdef PNG_SETJMP_SUPPORTED
-   jmp_buf tmp_jmp;
-#endif
-   png_error_ptr error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_error_ptr warning_fn;
-#endif
-   png_voidp error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_free_ptr free_fn;
-#endif
-
-   png_debug(1, "in png_read_destroy");
-
-   if (info_ptr != NULL)
-      png_info_destroy(png_ptr, info_ptr);
-
-   if (end_info_ptr != NULL)
-      png_info_destroy(png_ptr, end_info_ptr);
-
-#ifdef PNG_READ_GAMMA_SUPPORTED
-   png_destroy_gamma_table(png_ptr);
-#endif
-
-   png_free(png_ptr, png_ptr->zbuf);
-   png_free(png_ptr, png_ptr->big_row_buf);
-   png_free(png_ptr, png_ptr->big_prev_row);
-   png_free(png_ptr, png_ptr->chunkdata);
-
-#ifdef PNG_READ_QUANTIZE_SUPPORTED
-   png_free(png_ptr, png_ptr->palette_lookup);
-   png_free(png_ptr, png_ptr->quantize_index);
-#endif
-
-   if (png_ptr->free_me & PNG_FREE_PLTE)
-      png_zfree(png_ptr, png_ptr->palette);
-   png_ptr->free_me &= ~PNG_FREE_PLTE;
-
-#if defined(PNG_tRNS_SUPPORTED) || \
-    defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
-   if (png_ptr->free_me & PNG_FREE_TRNS)
-      png_free(png_ptr, png_ptr->trans_alpha);
-   png_ptr->free_me &= ~PNG_FREE_TRNS;
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-   if (png_ptr->free_me & PNG_FREE_HIST)
-      png_free(png_ptr, png_ptr->hist);
-   png_ptr->free_me &= ~PNG_FREE_HIST;
-#endif
-
-   inflateEnd(&png_ptr->zstream);
-
-#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
-   png_free(png_ptr, png_ptr->save_buffer);
-#endif
-
-   /* Save the important info out of the png_struct, in case it is
-    * being used again.
-    */
-#ifdef PNG_SETJMP_SUPPORTED
-   png_memcpy(tmp_jmp, png_ptr->longjmp_buffer, png_sizeof(jmp_buf));
-#endif
-
-   error_fn = png_ptr->error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   warning_fn = png_ptr->warning_fn;
-#endif
-   error_ptr = png_ptr->error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   free_fn = png_ptr->free_fn;
-#endif
-
-   png_memset(png_ptr, 0, png_sizeof(png_struct));
-
-   png_ptr->error_fn = error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_ptr->warning_fn = warning_fn;
-#endif
-   png_ptr->error_ptr = error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_ptr->free_fn = free_fn;
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   png_memcpy(png_ptr->longjmp_buffer, tmp_jmp, png_sizeof(jmp_buf));
-#endif
-
-}
-
-void PNGAPI
-png_set_read_status_fn(png_structp png_ptr, png_read_status_ptr read_row_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->read_row_fn = read_row_fn;
-}
-
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_read_png(png_structp png_ptr, png_infop info_ptr,
-                           int transforms,
-                           voidp params)
-{
-   int row;
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* png_read_info() gives us all of the information from the
-    * PNG file before the first IDAT (image data chunk).
-    */
-   png_read_info(png_ptr, info_ptr);
-   if (info_ptr->height > PNG_UINT_32_MAX/png_sizeof(png_bytep))
-      png_error(png_ptr, "Image is too high to process with png_read_png()");
-
-   /* -------------- image transformations start here ------------------- */
-
-#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED
-   /* Tell libpng to strip 16-bit/color files down to 8 bits per color.
-    */
-   if (transforms & PNG_TRANSFORM_SCALE_16)
-   {
-     /* Added at libpng-1.5.4. "strip_16" produces the same result that it
-      * did in earlier versions, while "scale_16" is now more accurate.
-      */
-      png_set_scale_16(png_ptr);
-   }
-#endif
-
-#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED
-   /* If both SCALE and STRIP are required pngrtran will effectively cancel the
-    * latter by doing SCALE first.  This is ok and allows apps not to check for
-    * which is supported to get the right answer.
-    */
-   if (transforms & PNG_TRANSFORM_STRIP_16)
-      png_set_strip_16(png_ptr);
-#endif
-
-#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
-   /* Strip alpha bytes from the input data without combining with
-    * the background (not recommended).
-    */
-   if (transforms & PNG_TRANSFORM_STRIP_ALPHA)
-      png_set_strip_alpha(png_ptr);
-#endif
-
-#if defined(PNG_READ_PACK_SUPPORTED) && !defined(PNG_READ_EXPAND_SUPPORTED)
-   /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single
-    * byte into separate bytes (useful for paletted and grayscale images).
-    */
-   if (transforms & PNG_TRANSFORM_PACKING)
-      png_set_packing(png_ptr);
-#endif
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-   /* Change the order of packed pixels to least significant bit first
-    * (not useful if you are using png_set_packing).
-    */
-   if (transforms & PNG_TRANSFORM_PACKSWAP)
-      png_set_packswap(png_ptr);
-#endif
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   /* Expand paletted colors into true RGB triplets
-    * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel
-    * Expand paletted or RGB images with transparency to full alpha
-    * channels so the data will be available as RGBA quartets.
-    */
-   if (transforms & PNG_TRANSFORM_EXPAND)
-      if ((png_ptr->bit_depth < 8) ||
-          (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ||
-          (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)))
-         png_set_expand(png_ptr);
-#endif
-
-   /* We don't handle background color or gamma transformation or quantizing.
-    */
-
-#ifdef PNG_READ_INVERT_SUPPORTED
-   /* Invert monochrome files to have 0 as white and 1 as black
-    */
-   if (transforms & PNG_TRANSFORM_INVERT_MONO)
-      png_set_invert_mono(png_ptr);
-#endif
-
-#ifdef PNG_READ_SHIFT_SUPPORTED
-   /* If you want to shift the pixel values from the range [0,255] or
-    * [0,65535] to the original [0,7] or [0,31], or whatever range the
-    * colors were originally in:
-    */
-   if ((transforms & PNG_TRANSFORM_SHIFT)
-       && png_get_valid(png_ptr, info_ptr, PNG_INFO_sBIT))
-   {
-      png_color_8p sig_bit;
-
-      png_get_sBIT(png_ptr, info_ptr, &sig_bit);
-      png_set_shift(png_ptr, sig_bit);
-   }
-#endif
-
-#ifdef PNG_READ_BGR_SUPPORTED
-   /* Flip the RGB pixels to BGR (or RGBA to BGRA) */
-   if (transforms & PNG_TRANSFORM_BGR)
-      png_set_bgr(png_ptr);
-#endif
-
-#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED
-   /* Swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR) */
-   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
-      png_set_swap_alpha(png_ptr);
-#endif
-
-#ifdef PNG_READ_SWAP_SUPPORTED
-   /* Swap bytes of 16-bit files to least significant byte first */
-   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
-      png_set_swap(png_ptr);
-#endif
-
-/* Added at libpng-1.2.41 */
-#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED
-   /* Invert the alpha channel from opacity to transparency */
-   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
-      png_set_invert_alpha(png_ptr);
-#endif
-
-/* Added at libpng-1.2.41 */
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   /* Expand grayscale image to RGB */
-   if (transforms & PNG_TRANSFORM_GRAY_TO_RGB)
-      png_set_gray_to_rgb(png_ptr);
-#endif
-
-/* Added at libpng-1.5.4 */
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   if (transforms & PNG_TRANSFORM_EXPAND_16)
-      png_set_expand_16(png_ptr);
-#endif
-
-   /* We don't handle adding filler bytes */
-
-   /* We use png_read_image and rely on that for interlace handling, but we also
-    * call png_read_update_info therefore must turn on interlace handling now:
-    */
-   (void)png_set_interlace_handling(png_ptr);
-
-   /* Optional call to gamma correct and add the background to the palette
-    * and update info structure.  REQUIRED if you are expecting libpng to
-    * update the palette for you (i.e., you selected such a transform above).
-    */
-   png_read_update_info(png_ptr, info_ptr);
-
-   /* -------------- image transformations end here ------------------- */
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
-   if (info_ptr->row_pointers == NULL)
-   {
-      png_uint_32 iptr;
-
-      info_ptr->row_pointers = (png_bytepp)png_malloc(png_ptr,
-          info_ptr->height * png_sizeof(png_bytep));
-      for (iptr=0; iptr<info_ptr->height; iptr++)
-         info_ptr->row_pointers[iptr] = NULL;
-
-      info_ptr->free_me |= PNG_FREE_ROWS;
-
-      for (row = 0; row < (int)info_ptr->height; row++)
-         info_ptr->row_pointers[row] = (png_bytep)png_malloc(png_ptr,
-            png_get_rowbytes(png_ptr, info_ptr));
-   }
-
-   png_read_image(png_ptr, info_ptr->row_pointers);
-   info_ptr->valid |= PNG_INFO_IDAT;
-
-   /* Read rest of file, and get additional chunks in info_ptr - REQUIRED */
-   png_read_end(png_ptr, info_ptr);
-
-   PNG_UNUSED(transforms)   /* Quiet compiler warnings */
-   PNG_UNUSED(params)
-
-}
-#endif /* PNG_INFO_IMAGE_SUPPORTED */
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-#endif /* PNG_READ_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngrutil.c b/reg-io/png/lpng1510/pngrutil.c
deleted file mode 100644
index 059b3724..00000000
--- a/reg-io/png/lpng1510/pngrutil.c
+++ /dev/null
@@ -1,4159 +0,0 @@
-
-/* pngrutil.c - utilities to read a PNG file
- *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This file contains routines that are only called from within
- * libpng itself during the course of reading an image.
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-#define png_strtod(p,a,b) strtod(a,b)
-
-png_uint_32 PNGAPI
-png_get_uint_31(png_structp png_ptr, png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-
-   if (uval > PNG_UINT_31_MAX)
-      png_error(png_ptr, "PNG unsigned integer out of range");
-
-   return (uval);
-}
-
-#if defined(PNG_READ_gAMA_SUPPORTED) || defined(PNG_READ_cHRM_SUPPORTED)
-/* The following is a variation on the above for use with the fixed
- * point values used for gAMA and cHRM.  Instead of png_error it
- * issues a warning and returns (-1) - an invalid value because both
- * gAMA and cHRM use *unsigned* integers for fixed point values.
- */
-#define PNG_FIXED_ERROR (-1)
-
-static png_fixed_point /* PRIVATE */
-png_get_fixed_point(png_structp png_ptr, png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-
-   if (uval <= PNG_UINT_31_MAX)
-      return (png_fixed_point)uval; /* known to be in range */
-
-   /* The caller can turn off the warning by passing NULL. */
-   if (png_ptr != NULL)
-      png_warning(png_ptr, "PNG fixed point integer out of range");
-
-   return PNG_FIXED_ERROR;
-}
-#endif
-
-#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED
-/* NOTE: the read macros will obscure these definitions, so that if
- * PNG_USE_READ_MACROS is set the library will not use them internally,
- * but the APIs will still be available externally.
- *
- * The parentheses around "PNGAPI function_name" in the following three
- * functions are necessary because they allow the macros to co-exist with
- * these (unused but exported) functions.
- */
-
-/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */
-png_uint_32 (PNGAPI
-png_get_uint_32)(png_const_bytep buf)
-{
-   png_uint_32 uval =
-       ((png_uint_32)(*(buf    )) << 24) +
-       ((png_uint_32)(*(buf + 1)) << 16) +
-       ((png_uint_32)(*(buf + 2)) <<  8) +
-       ((png_uint_32)(*(buf + 3))      ) ;
-
-   return uval;
-}
-
-/* Grab a signed 32-bit integer from a buffer in big-endian format.  The
- * data is stored in the PNG file in two's complement format and there
- * is no guarantee that a 'png_int_32' is exactly 32 bits, therefore
- * the following code does a two's complement to native conversion.
- */
-png_int_32 (PNGAPI
-png_get_int_32)(png_const_bytep buf)
-{
-   png_uint_32 uval = png_get_uint_32(buf);
-   if ((uval & 0x80000000) == 0) /* non-negative */
-      return uval;
-
-   uval = (uval ^ 0xffffffff) + 1;  /* 2's complement: -x = ~x+1 */
-   return -(png_int_32)uval;
-}
-
-/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */
-png_uint_16 (PNGAPI
-png_get_uint_16)(png_const_bytep buf)
-{
-   /* ANSI-C requires an int value to accomodate at least 16 bits so this
-    * works and allows the compiler not to worry about possible narrowing
-    * on 32 bit systems.  (Pre-ANSI systems did not make integers smaller
-    * than 16 bits either.)
-    */
-   unsigned int val =
-       ((unsigned int)(*buf) << 8) +
-       ((unsigned int)(*(buf + 1)));
-
-   return (png_uint_16)val;
-}
-
-#endif /* PNG_READ_INT_FUNCTIONS_SUPPORTED */
-
-/* Read and check the PNG file signature */
-void /* PRIVATE */
-png_read_sig(png_structp png_ptr, png_infop info_ptr)
-{
-   png_size_t num_checked, num_to_check;
-
-   /* Exit if the user application does not expect a signature. */
-   if (png_ptr->sig_bytes >= 8)
-      return;
-
-   num_checked = png_ptr->sig_bytes;
-   num_to_check = 8 - num_checked;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_SIGNATURE;
-#endif
-
-   /* The signature must be serialized in a single I/O call. */
-   png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check);
-   png_ptr->sig_bytes = 8;
-
-   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
-   {
-      if (num_checked < 4 &&
-          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
-         png_error(png_ptr, "Not a PNG file");
-      else
-         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
-   }
-   if (num_checked < 3)
-      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
-}
-
-/* Read the chunk header (length + type name).
- * Put the type name into png_ptr->chunk_name, and return the length.
- */
-png_uint_32 /* PRIVATE */
-png_read_chunk_header(png_structp png_ptr)
-{
-   png_byte buf[8];
-   png_uint_32 length;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_HDR;
-#endif
-
-   /* Read the length and the chunk name.
-    * This must be performed in a single I/O call.
-    */
-   png_read_data(png_ptr, buf, 8);
-   length = png_get_uint_31(png_ptr, buf);
-
-   /* Put the chunk name into png_ptr->chunk_name. */
-   png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(buf+4);
-
-   png_debug2(0, "Reading %lx chunk, length = %lu",
-       (unsigned long)png_ptr->chunk_name, (unsigned long)length);
-
-   /* Reset the crc and run it over the chunk name. */
-   png_reset_crc(png_ptr);
-   png_calculate_crc(png_ptr, buf + 4, 4);
-
-   /* Check to see if chunk name is valid. */
-   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_DATA;
-#endif
-
-   return length;
-}
-
-/* Read data, and (optionally) run it through the CRC. */
-void /* PRIVATE */
-png_crc_read(png_structp png_ptr, png_bytep buf, png_size_t length)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_read_data(png_ptr, buf, length);
-   png_calculate_crc(png_ptr, buf, length);
-}
-
-/* Optionally skip data and then check the CRC.  Depending on whether we
- * are reading a ancillary or critical chunk, and how the program has set
- * things up, we may calculate the CRC on the data and print a message.
- * Returns '1' if there was a CRC error, '0' otherwise.
- */
-int /* PRIVATE */
-png_crc_finish(png_structp png_ptr, png_uint_32 skip)
-{
-   png_size_t i;
-   png_size_t istop = png_ptr->zbuf_size;
-
-   for (i = (png_size_t)skip; i > istop; i -= istop)
-   {
-      png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
-   }
-
-   if (i)
-   {
-      png_crc_read(png_ptr, png_ptr->zbuf, i);
-   }
-
-   if (png_crc_error(png_ptr))
-   {
-      if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name) ?
-          !(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) :
-          (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE))
-      {
-         png_chunk_warning(png_ptr, "CRC error");
-      }
-
-      else
-      {
-         png_chunk_benign_error(png_ptr, "CRC error");
-         return (0);
-      }
-
-      return (1);
-   }
-
-   return (0);
-}
-
-/* Compare the CRC stored in the PNG file with that calculated by libpng from
- * the data it has read thus far.
- */
-int /* PRIVATE */
-png_crc_error(png_structp png_ptr)
-{
-   png_byte crc_bytes[4];
-   png_uint_32 crc;
-   int need_crc = 1;
-
-   if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name))
-   {
-      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
-          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
-         need_crc = 0;
-   }
-
-   else /* critical */
-   {
-      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
-         need_crc = 0;
-   }
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_CRC;
-#endif
-
-   /* The chunk CRC must be serialized in a single I/O call. */
-   png_read_data(png_ptr, crc_bytes, 4);
-
-   if (need_crc)
-   {
-      crc = png_get_uint_32(crc_bytes);
-      return ((int)(crc != png_ptr->crc));
-   }
-
-   else
-      return (0);
-}
-
-#ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED
-static png_size_t
-png_inflate(png_structp png_ptr, png_bytep data, png_size_t size,
-    png_bytep output, png_size_t output_size)
-{
-   png_size_t count = 0;
-
-   /* zlib can't necessarily handle more than 65535 bytes at once (i.e. it can't
-    * even necessarily handle 65536 bytes) because the type uInt is "16 bits or
-    * more".  Consequently it is necessary to chunk the input to zlib.  This
-    * code uses ZLIB_IO_MAX, from pngpriv.h, as the maximum (the maximum value
-    * that can be stored in a uInt.)  It is possible to set ZLIB_IO_MAX to a
-    * lower value in pngpriv.h and this may sometimes have a performance
-    * advantage, because it forces access of the input data to be separated from
-    * at least some of the use by some period of time.
-    */
-   png_ptr->zstream.next_in = data;
-   /* avail_in is set below from 'size' */
-   png_ptr->zstream.avail_in = 0;
-
-   while (1)
-   {
-      int ret, avail;
-
-      /* The setting of 'avail_in' used to be outside the loop; by setting it
-       * inside it is possible to chunk the input to zlib and simply rely on
-       * zlib to advance the 'next_in' pointer.  This allows arbitrary amounts o
-       * data to be passed through zlib at the unavoidable cost of requiring a
-       * window save (memcpy of up to 32768 output bytes) every ZLIB_IO_MAX
-       * input bytes.
-       */
-      if (png_ptr->zstream.avail_in == 0 && size > 0)
-      {
-         if (size <= ZLIB_IO_MAX)
-         {
-            /* The value is less than ZLIB_IO_MAX so the cast is safe: */
-            png_ptr->zstream.avail_in = (uInt)size;
-            size = 0;
-         }
-
-         else
-         {
-            png_ptr->zstream.avail_in = ZLIB_IO_MAX;
-            size -= ZLIB_IO_MAX;
-         }
-      }
-
-      /* Reset the output buffer each time round - we empty it
-       * after every inflate call.
-       */
-      png_ptr->zstream.next_out = png_ptr->zbuf;
-      png_ptr->zstream.avail_out = png_ptr->zbuf_size;
-
-      ret = inflate(&png_ptr->zstream, Z_NO_FLUSH);
-      avail = png_ptr->zbuf_size - png_ptr->zstream.avail_out;
-
-      /* First copy/count any new output - but only if we didn't
-       * get an error code.
-       */
-      if ((ret == Z_OK || ret == Z_STREAM_END) && avail > 0)
-      {
-         png_size_t space = avail; /* > 0, see above */
-
-         if (output != 0 && output_size > count)
-         {
-            png_size_t copy = output_size - count;
-
-            if (space < copy)
-               copy = space;
-
-            png_memcpy(output + count, png_ptr->zbuf, copy);
-         }
-         count += space;
-      }
-
-      if (ret == Z_OK)
-         continue;
-
-      /* Termination conditions - always reset the zstream, it
-       * must be left in inflateInit state.
-       */
-      png_ptr->zstream.avail_in = 0;
-      inflateReset(&png_ptr->zstream);
-
-      if (ret == Z_STREAM_END)
-         return count; /* NOTE: may be zero. */
-
-      /* Now handle the error codes - the API always returns 0
-       * and the error message is dumped into the uncompressed
-       * buffer if available.
-       */
-#     ifdef PNG_WARNINGS_SUPPORTED
-      {
-         png_const_charp msg;
-
-         if (png_ptr->zstream.msg != 0)
-            msg = png_ptr->zstream.msg;
-
-         else switch (ret)
-         {
-            case Z_BUF_ERROR:
-               msg = "Buffer error in compressed datastream";
-               break;
-
-            case Z_DATA_ERROR:
-               msg = "Data error in compressed datastream";
-               break;
-
-            default:
-               msg = "Incomplete compressed datastream";
-               break;
-         }
-
-         png_chunk_warning(png_ptr, msg);
-      }
-#     endif
-
-      /* 0 means an error - notice that this code simply ignores
-       * zero length compressed chunks as a result.
-       */
-      return 0;
-   }
-}
-
-/*
- * Decompress trailing data in a chunk.  The assumption is that chunkdata
- * points at an allocated area holding the contents of a chunk with a
- * trailing compressed part.  What we get back is an allocated area
- * holding the original prefix part and an uncompressed version of the
- * trailing part (the malloc area passed in is freed).
- */
-void /* PRIVATE */
-png_decompress_chunk(png_structp png_ptr, int comp_type,
-    png_size_t chunklength,
-    png_size_t prefix_size, png_size_t *newlength)
-{
-   /* The caller should guarantee this */
-   if (prefix_size > chunklength)
-   {
-      /* The recovery is to delete the chunk. */
-      png_warning(png_ptr, "invalid chunklength");
-      prefix_size = 0; /* To delete everything */
-   }
-
-   else if (comp_type == PNG_COMPRESSION_TYPE_BASE)
-   {
-      png_size_t expanded_size = png_inflate(png_ptr,
-          (png_bytep)(png_ptr->chunkdata + prefix_size),
-          chunklength - prefix_size,
-          0,            /* output */
-          0);           /* output size */
-
-      /* Now check the limits on this chunk - if the limit fails the
-       * compressed data will be removed, the prefix will remain.
-       */
-      if (prefix_size >= (~(png_size_t)0) - 1 ||
-         expanded_size >= (~(png_size_t)0) - 1 - prefix_size
-#ifdef PNG_USER_LIMITS_SUPPORTED
-         || (png_ptr->user_chunk_malloc_max &&
-          (prefix_size + expanded_size >= png_ptr->user_chunk_malloc_max - 1))
-#else
-         || ((PNG_USER_CHUNK_MALLOC_MAX > 0) &&
-          prefix_size + expanded_size >= PNG_USER_CHUNK_MALLOC_MAX - 1)
-#endif
-         )
-         png_warning(png_ptr, "Exceeded size limit while expanding chunk");
-
-      /* If the size is zero either there was an error and a message
-       * has already been output (warning) or the size really is zero
-       * and we have nothing to do - the code will exit through the
-       * error case below.
-       */
-      else if (expanded_size > 0)
-      {
-         /* Success (maybe) - really uncompress the chunk. */
-         png_size_t new_size = 0;
-         png_charp text = (png_charp)png_malloc_warn(png_ptr,
-             prefix_size + expanded_size + 1);
-
-         if (text != NULL)
-         {
-            png_memcpy(text, png_ptr->chunkdata, prefix_size);
-            new_size = png_inflate(png_ptr,
-                (png_bytep)(png_ptr->chunkdata + prefix_size),
-                chunklength - prefix_size,
-                (png_bytep)(text + prefix_size), expanded_size);
-            text[prefix_size + expanded_size] = 0; /* just in case */
-
-            if (new_size == expanded_size)
-            {
-               png_free(png_ptr, png_ptr->chunkdata);
-               png_ptr->chunkdata = text;
-               *newlength = prefix_size + expanded_size;
-               return; /* The success return! */
-            }
-
-            png_warning(png_ptr, "png_inflate logic error");
-            png_free(png_ptr, text);
-         }
-
-         else
-            png_warning(png_ptr, "Not enough memory to decompress chunk");
-      }
-   }
-
-   else /* if (comp_type != PNG_COMPRESSION_TYPE_BASE) */
-   {
-      PNG_WARNING_PARAMETERS(p)
-      png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d, comp_type);
-      png_formatted_warning(png_ptr, p, "Unknown compression type @1");
-
-      /* The recovery is to simply drop the data. */
-   }
-
-   /* Generic error return - leave the prefix, delete the compressed
-    * data, reallocate the chunkdata to remove the potentially large
-    * amount of compressed data.
-    */
-   {
-      png_charp text = (png_charp)png_malloc_warn(png_ptr, prefix_size + 1);
-
-      if (text != NULL)
-      {
-         if (prefix_size > 0)
-            png_memcpy(text, png_ptr->chunkdata, prefix_size);
-
-         png_free(png_ptr, png_ptr->chunkdata);
-         png_ptr->chunkdata = text;
-
-         /* This is an extra zero in the 'uncompressed' part. */
-         *(png_ptr->chunkdata + prefix_size) = 0x00;
-      }
-      /* Ignore a malloc error here - it is safe. */
-   }
-
-   *newlength = prefix_size;
-}
-#endif /* PNG_READ_COMPRESSED_TEXT_SUPPORTED */
-
-/* Read and check the IDHR chunk */
-void /* PRIVATE */
-png_handle_IHDR(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte buf[13];
-   png_uint_32 width, height;
-   int bit_depth, color_type, compression_type, filter_type;
-   int interlace_type;
-
-   png_debug(1, "in png_handle_IHDR");
-
-   if (png_ptr->mode & PNG_HAVE_IHDR)
-      png_error(png_ptr, "Out of place IHDR");
-
-   /* Check the length */
-   if (length != 13)
-      png_error(png_ptr, "Invalid IHDR chunk");
-
-   png_ptr->mode |= PNG_HAVE_IHDR;
-
-   png_crc_read(png_ptr, buf, 13);
-   png_crc_finish(png_ptr, 0);
-
-   width = png_get_uint_31(png_ptr, buf);
-   height = png_get_uint_31(png_ptr, buf + 4);
-   bit_depth = buf[8];
-   color_type = buf[9];
-   compression_type = buf[10];
-   filter_type = buf[11];
-   interlace_type = buf[12];
-
-   /* Set internal variables */
-   png_ptr->width = width;
-   png_ptr->height = height;
-   png_ptr->bit_depth = (png_byte)bit_depth;
-   png_ptr->interlaced = (png_byte)interlace_type;
-   png_ptr->color_type = (png_byte)color_type;
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_ptr->filter_type = (png_byte)filter_type;
-#endif
-   png_ptr->compression_type = (png_byte)compression_type;
-
-   /* Find number of channels */
-   switch (png_ptr->color_type)
-   {
-      default: /* invalid, png_set_IHDR calls png_error */
-      case PNG_COLOR_TYPE_GRAY:
-      case PNG_COLOR_TYPE_PALETTE:
-         png_ptr->channels = 1;
-         break;
-
-      case PNG_COLOR_TYPE_RGB:
-         png_ptr->channels = 3;
-         break;
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-         png_ptr->channels = 2;
-         break;
-
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-         png_ptr->channels = 4;
-         break;
-   }
-
-   /* Set up other useful info */
-   png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth *
-   png_ptr->channels);
-   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->width);
-   png_debug1(3, "bit_depth = %d", png_ptr->bit_depth);
-   png_debug1(3, "channels = %d", png_ptr->channels);
-   png_debug1(3, "rowbytes = %lu", (unsigned long)png_ptr->rowbytes);
-   png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth,
-       color_type, interlace_type, compression_type, filter_type);
-}
-
-/* Read and check the palette */
-void /* PRIVATE */
-png_handle_PLTE(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_color palette[PNG_MAX_PALETTE_LENGTH];
-   int num, i;
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   png_colorp pal_ptr;
-#endif
-
-   png_debug(1, "in png_handle_PLTE");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before PLTE");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid PLTE after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-      png_error(png_ptr, "Duplicate PLTE chunk");
-
-   png_ptr->mode |= PNG_HAVE_PLTE;
-
-   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
-   {
-      png_warning(png_ptr,
-          "Ignoring PLTE chunk in grayscale PNG");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
-   {
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-#endif
-
-   if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3)
-   {
-      if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
-      {
-         png_warning(png_ptr, "Invalid palette chunk");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      else
-      {
-         png_error(png_ptr, "Invalid palette chunk");
-      }
-   }
-
-   num = (int)length / 3;
-
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++)
-   {
-      png_byte buf[3];
-
-      png_crc_read(png_ptr, buf, 3);
-      pal_ptr->red = buf[0];
-      pal_ptr->green = buf[1];
-      pal_ptr->blue = buf[2];
-   }
-#else
-   for (i = 0; i < num; i++)
-   {
-      png_byte buf[3];
-
-      png_crc_read(png_ptr, buf, 3);
-      /* Don't depend upon png_color being any order */
-      palette[i].red = buf[0];
-      palette[i].green = buf[1];
-      palette[i].blue = buf[2];
-   }
-#endif
-
-   /* If we actually need the PLTE chunk (ie for a paletted image), we do
-    * whatever the normal CRC configuration tells us.  However, if we
-    * have an RGB image, the PLTE can be considered ancillary, so
-    * we will act as though it is.
-    */
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-#endif
-   {
-      png_crc_finish(png_ptr, 0);
-   }
-
-#ifndef PNG_READ_OPT_PLTE_SUPPORTED
-   else if (png_crc_error(png_ptr))  /* Only if we have a CRC error */
-   {
-      /* If we don't want to use the data from an ancillary chunk,
-       * we have two options: an error abort, or a warning and we
-       * ignore the data in this chunk (which should be OK, since
-       * it's considered ancillary for a RGB or RGBA image).
-       */
-      if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE))
-      {
-         if (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)
-         {
-            png_chunk_benign_error(png_ptr, "CRC error");
-         }
-
-         else
-         {
-            png_chunk_warning(png_ptr, "CRC error");
-            return;
-         }
-      }
-
-      /* Otherwise, we (optionally) emit a warning and use the chunk. */
-      else if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN))
-      {
-         png_chunk_warning(png_ptr, "CRC error");
-      }
-   }
-#endif
-
-   png_set_PLTE(png_ptr, info_ptr, palette, num);
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
-      {
-         if (png_ptr->num_trans > (png_uint_16)num)
-         {
-            png_warning(png_ptr, "Truncating incorrect tRNS chunk length");
-            png_ptr->num_trans = (png_uint_16)num;
-         }
-
-         if (info_ptr->num_trans > (png_uint_16)num)
-         {
-            png_warning(png_ptr, "Truncating incorrect info tRNS chunk length");
-            info_ptr->num_trans = (png_uint_16)num;
-         }
-      }
-   }
-#endif
-
-}
-
-void /* PRIVATE */
-png_handle_IEND(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_debug(1, "in png_handle_IEND");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR) || !(png_ptr->mode & PNG_HAVE_IDAT))
-   {
-      png_error(png_ptr, "No image in file");
-   }
-
-   png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND);
-
-   if (length != 0)
-   {
-      png_warning(png_ptr, "Incorrect IEND chunk length");
-   }
-
-   png_crc_finish(png_ptr, length);
-
-   PNG_UNUSED(info_ptr) /* Quiet compiler warnings about unused info_ptr */
-}
-
-#ifdef PNG_READ_gAMA_SUPPORTED
-void /* PRIVATE */
-png_handle_gAMA(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_fixed_point igamma;
-   png_byte buf[4];
-
-   png_debug(1, "in png_handle_gAMA");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before gAMA");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid gAMA after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-      /* Should be an error, but we can cope with it */
-      png_warning(png_ptr, "Out of place gAMA chunk");
-
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
-#ifdef PNG_READ_sRGB_SUPPORTED
-       && !(info_ptr->valid & PNG_INFO_sRGB)
-#endif
-       )
-   {
-      png_warning(png_ptr, "Duplicate gAMA chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length != 4)
-   {
-      png_warning(png_ptr, "Incorrect gAMA chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 4);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   igamma = png_get_fixed_point(NULL, buf);
-
-   /* Check for zero gamma or an error. */
-   if (igamma <= 0)
-   {
-      png_warning(png_ptr,
-          "Ignoring gAMA chunk with out of range gamma");
-
-      return;
-   }
-
-#  ifdef PNG_READ_sRGB_SUPPORTED
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
-   {
-      if (PNG_OUT_OF_RANGE(igamma, 45500, 500))
-      {
-         PNG_WARNING_PARAMETERS(p)
-         png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed, igamma);
-         png_formatted_warning(png_ptr, p,
-             "Ignoring incorrect gAMA value @1 when sRGB is also present");
-         return;
-      }
-   }
-#  endif /* PNG_READ_sRGB_SUPPORTED */
-
-#  ifdef PNG_READ_GAMMA_SUPPORTED
-   /* Gamma correction on read is supported. */
-   png_ptr->gamma = igamma;
-#  endif
-   /* And set the 'info' structure members. */
-   png_set_gAMA_fixed(png_ptr, info_ptr, igamma);
-}
-#endif
-
-#ifdef PNG_READ_sBIT_SUPPORTED
-void /* PRIVATE */
-png_handle_sBIT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_size_t truelen;
-   png_byte buf[4];
-
-   png_debug(1, "in png_handle_sBIT");
-
-   buf[0] = buf[1] = buf[2] = buf[3] = 0;
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before sBIT");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid sBIT after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-   {
-      /* Should be an error, but we can cope with it */
-      png_warning(png_ptr, "Out of place sBIT chunk");
-   }
-
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT))
-   {
-      png_warning(png_ptr, "Duplicate sBIT chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      truelen = 3;
-
-   else
-      truelen = (png_size_t)png_ptr->channels;
-
-   if (length != truelen || length > 4)
-   {
-      png_warning(png_ptr, "Incorrect sBIT chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, truelen);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
-   {
-      png_ptr->sig_bit.red = buf[0];
-      png_ptr->sig_bit.green = buf[1];
-      png_ptr->sig_bit.blue = buf[2];
-      png_ptr->sig_bit.alpha = buf[3];
-   }
-
-   else
-   {
-      png_ptr->sig_bit.gray = buf[0];
-      png_ptr->sig_bit.red = buf[0];
-      png_ptr->sig_bit.green = buf[0];
-      png_ptr->sig_bit.blue = buf[0];
-      png_ptr->sig_bit.alpha = buf[1];
-   }
-
-   png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit));
-}
-#endif
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-void /* PRIVATE */
-png_handle_cHRM(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte buf[32];
-   png_fixed_point x_white, y_white, x_red, y_red, x_green, y_green, x_blue,
-      y_blue;
-
-   png_debug(1, "in png_handle_cHRM");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before cHRM");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid cHRM after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-      /* Should be an error, but we can cope with it */
-      png_warning(png_ptr, "Out of place cHRM chunk");
-
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)
-#  ifdef PNG_READ_sRGB_SUPPORTED
-       && !(info_ptr->valid & PNG_INFO_sRGB)
-#  endif
-      )
-   {
-      png_warning(png_ptr, "Duplicate cHRM chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length != 32)
-   {
-      png_warning(png_ptr, "Incorrect cHRM chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 32);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   x_white = png_get_fixed_point(NULL, buf);
-   y_white = png_get_fixed_point(NULL, buf + 4);
-   x_red   = png_get_fixed_point(NULL, buf + 8);
-   y_red   = png_get_fixed_point(NULL, buf + 12);
-   x_green = png_get_fixed_point(NULL, buf + 16);
-   y_green = png_get_fixed_point(NULL, buf + 20);
-   x_blue  = png_get_fixed_point(NULL, buf + 24);
-   y_blue  = png_get_fixed_point(NULL, buf + 28);
-
-   if (x_white == PNG_FIXED_ERROR ||
-       y_white == PNG_FIXED_ERROR ||
-       x_red   == PNG_FIXED_ERROR ||
-       y_red   == PNG_FIXED_ERROR ||
-       x_green == PNG_FIXED_ERROR ||
-       y_green == PNG_FIXED_ERROR ||
-       x_blue  == PNG_FIXED_ERROR ||
-       y_blue  == PNG_FIXED_ERROR)
-   {
-      png_warning(png_ptr, "Ignoring cHRM chunk with negative chromaticities");
-      return;
-   }
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-   if ((info_ptr != NULL) && (info_ptr->valid & PNG_INFO_sRGB))
-   {
-      if (PNG_OUT_OF_RANGE(x_white, 31270,  1000) ||
-          PNG_OUT_OF_RANGE(y_white, 32900,  1000) ||
-          PNG_OUT_OF_RANGE(x_red,   64000,  1000) ||
-          PNG_OUT_OF_RANGE(y_red,   33000,  1000) ||
-          PNG_OUT_OF_RANGE(x_green, 30000,  1000) ||
-          PNG_OUT_OF_RANGE(y_green, 60000,  1000) ||
-          PNG_OUT_OF_RANGE(x_blue,  15000,  1000) ||
-          PNG_OUT_OF_RANGE(y_blue,   6000,  1000))
-      {
-         PNG_WARNING_PARAMETERS(p)
-
-         png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed, x_white);
-         png_warning_parameter_signed(p, 2, PNG_NUMBER_FORMAT_fixed, y_white);
-         png_warning_parameter_signed(p, 3, PNG_NUMBER_FORMAT_fixed, x_red);
-         png_warning_parameter_signed(p, 4, PNG_NUMBER_FORMAT_fixed, y_red);
-         png_warning_parameter_signed(p, 5, PNG_NUMBER_FORMAT_fixed, x_green);
-         png_warning_parameter_signed(p, 6, PNG_NUMBER_FORMAT_fixed, y_green);
-         png_warning_parameter_signed(p, 7, PNG_NUMBER_FORMAT_fixed, x_blue);
-         png_warning_parameter_signed(p, 8, PNG_NUMBER_FORMAT_fixed, y_blue);
-
-         png_formatted_warning(png_ptr, p,
-             "Ignoring incorrect cHRM white(@1,@2) r(@3,@4)g(@5,@6)b(@7,@8) "
-             "when sRGB is also present");
-      }
-      return;
-   }
-#endif /* PNG_READ_sRGB_SUPPORTED */
-
-#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-   /* Store the _white values as default coefficients for the rgb to gray
-    * operation if it is supported.  Check if the transform is already set to
-    * avoid destroying the transform values.
-    */
-   if (!png_ptr->rgb_to_gray_coefficients_set)
-   {
-      /* png_set_background has not been called and we haven't seen an sRGB
-       * chunk yet.  Find the XYZ of the three end points.
-       */
-      png_XYZ XYZ;
-      png_xy xy;
-
-      xy.redx = x_red;
-      xy.redy = y_red;
-      xy.greenx = x_green;
-      xy.greeny = y_green;
-      xy.bluex = x_blue;
-      xy.bluey = y_blue;
-      xy.whitex = x_white;
-      xy.whitey = y_white;
-
-      if (png_XYZ_from_xy_checked(png_ptr, &XYZ, xy))
-      {
-         /* The success case, because XYZ_from_xy normalises to a reference
-          * white Y of 1.0 we just need to scale the numbers.  This should
-          * always work just fine. It is an internal error if this overflows.
-          */
-         {
-            png_fixed_point r, g, b;
-            if (png_muldiv(&r, XYZ.redY, 32768, PNG_FP_1) &&
-               r >= 0 && r <= 32768 &&
-               png_muldiv(&g, XYZ.greenY, 32768, PNG_FP_1) &&
-               g >= 0 && g <= 32768 &&
-               png_muldiv(&b, XYZ.blueY, 32768, PNG_FP_1) &&
-               b >= 0 && b <= 32768 &&
-               r+g+b <= 32769)
-            {
-               /* We allow 0 coefficients here.  r+g+b may be 32769 if two or
-                * all of the coefficients were rounded up.  Handle this by
-                * reducing the *largest* coefficient by 1; this matches the
-                * approach used for the default coefficients in pngrtran.c
-                */
-               int add = 0;
-
-               if (r+g+b > 32768)
-                  add = -1;
-               else if (r+g+b < 32768)
-                  add = 1;
-
-               if (add != 0)
-               {
-                  if (g >= r && g >= b)
-                     g += add;
-                  else if (r >= g && r >= b)
-                     r += add;
-                  else
-                     b += add;
-               }
-
-               /* Check for an internal error. */
-               if (r+g+b != 32768)
-                  png_error(png_ptr,
-                     "internal error handling cHRM coefficients");
-
-               png_ptr->rgb_to_gray_red_coeff   = (png_uint_16)r;
-               png_ptr->rgb_to_gray_green_coeff = (png_uint_16)g;
-            }
-
-            /* This is a png_error at present even though it could be ignored -
-             * it should never happen, but it is important that if it does, the
-             * bug is fixed.
-             */
-            else
-               png_error(png_ptr, "internal error handling cHRM->XYZ");
-         }
-      }
-   }
-#endif
-
-   png_set_cHRM_fixed(png_ptr, info_ptr, x_white, y_white, x_red, y_red,
-      x_green, y_green, x_blue, y_blue);
-}
-#endif
-
-#ifdef PNG_READ_sRGB_SUPPORTED
-void /* PRIVATE */
-png_handle_sRGB(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   int intent;
-   png_byte buf[1];
-
-   png_debug(1, "in png_handle_sRGB");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before sRGB");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid sRGB after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-      /* Should be an error, but we can cope with it */
-      png_warning(png_ptr, "Out of place sRGB chunk");
-
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
-   {
-      png_warning(png_ptr, "Duplicate sRGB chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length != 1)
-   {
-      png_warning(png_ptr, "Incorrect sRGB chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 1);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   intent = buf[0];
-
-   /* Check for bad intent */
-   if (intent >= PNG_sRGB_INTENT_LAST)
-   {
-      png_warning(png_ptr, "Unknown sRGB intent");
-      return;
-   }
-
-#if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA))
-   {
-      if (PNG_OUT_OF_RANGE(info_ptr->gamma, 45500, 500))
-      {
-         PNG_WARNING_PARAMETERS(p)
-
-         png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed,
-            info_ptr->gamma);
-
-         png_formatted_warning(png_ptr, p,
-             "Ignoring incorrect gAMA value @1 when sRGB is also present");
-      }
-   }
-#endif /* PNG_READ_gAMA_SUPPORTED */
-
-#ifdef PNG_READ_cHRM_SUPPORTED
-   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
-      if (PNG_OUT_OF_RANGE(info_ptr->x_white, 31270,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->y_white, 32900,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->x_red,   64000,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->y_red,   33000,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->x_green, 30000,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->y_green, 60000,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->x_blue,  15000,  1000) ||
-          PNG_OUT_OF_RANGE(info_ptr->y_blue,   6000,  1000))
-      {
-         png_warning(png_ptr,
-             "Ignoring incorrect cHRM value when sRGB is also present");
-      }
-#endif /* PNG_READ_cHRM_SUPPORTED */
-
-   /* This is recorded for use when handling the cHRM chunk above.  An sRGB
-    * chunk unconditionally overwrites the coefficients for grayscale conversion
-    * too.
-    */
-   png_ptr->is_sRGB = 1;
-
-#  ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
-      /* Don't overwrite user supplied values: */
-      if (!png_ptr->rgb_to_gray_coefficients_set)
-      {
-         /* These numbers come from the sRGB specification (or, since one has to
-          * pay much money to get a copy, the wikipedia sRGB page) the
-          * chromaticity values quoted have been inverted to get the reverse
-          * transformation from RGB to XYZ and the 'Y' coefficients scaled by
-          * 32768 (then rounded).
-          *
-          * sRGB and ITU Rec-709 both truncate the values for the D65 white
-          * point to four digits and, even though it actually stores five
-          * digits, the PNG spec gives the truncated value.
-          *
-          * This means that when the chromaticities are converted back to XYZ
-          * end points we end up with (6968,23435,2366), which, as described in
-          * pngrtran.c, would overflow.  If the five digit precision and up is
-          * used we get, instead:
-          *
-          *    6968*R + 23435*G + 2365*B
-          *
-          * (Notice that this rounds the blue coefficient down, rather than the
-          * choice used in pngrtran.c which is to round the green one down.)
-          */
-         png_ptr->rgb_to_gray_red_coeff   =  6968; /* 0.212639005871510 */
-         png_ptr->rgb_to_gray_green_coeff = 23434; /* 0.715168678767756 */
-         /* png_ptr->rgb_to_gray_blue_coeff  =  2366; 0.072192315360734	*/
-
-         /* The following keeps the cHRM chunk from destroying the
-          * coefficients again in the event that it follows the sRGB chunk.
-          */
-         png_ptr->rgb_to_gray_coefficients_set = 1;
-      }
-#  endif
-
-   png_set_sRGB_gAMA_and_cHRM(png_ptr, info_ptr, intent);
-}
-#endif /* PNG_READ_sRGB_SUPPORTED */
-
-#ifdef PNG_READ_iCCP_SUPPORTED
-void /* PRIVATE */
-png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-/* Note: this does not properly handle chunks that are > 64K under DOS */
-{
-   png_byte compression_type;
-   png_bytep pC;
-   png_charp profile;
-   png_uint_32 skip = 0;
-   png_uint_32 profile_size;
-   png_alloc_size_t profile_length;
-   png_size_t slength, prefix_length, data_length;
-
-   png_debug(1, "in png_handle_iCCP");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before iCCP");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid iCCP after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->mode & PNG_HAVE_PLTE)
-      /* Should be an error, but we can cope with it */
-      png_warning(png_ptr, "Out of place iCCP chunk");
-
-   if ((png_ptr->mode & PNG_HAVE_iCCP) || (info_ptr != NULL &&
-      (info_ptr->valid & (PNG_INFO_iCCP|PNG_INFO_sRGB))))
-   {
-      png_warning(png_ptr, "Duplicate iCCP chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_ptr->mode |= PNG_HAVE_iCCP;
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (length > (png_uint_32)65535L)
-   {
-      png_warning(png_ptr, "iCCP chunk too large to fit in memory");
-      skip = length - (png_uint_32)65535L;
-      length = (png_uint_32)65535L;
-   }
-#endif
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = (png_charp)png_malloc(png_ptr, length + 1);
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, skip))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_ptr->chunkdata[slength] = 0x00;
-
-   for (profile = png_ptr->chunkdata; *profile; profile++)
-      /* Empty loop to find end of name */ ;
-
-   ++profile;
-
-   /* There should be at least one zero (the compression type byte)
-    * following the separator, and we should be on it
-    */
-   if (profile >= png_ptr->chunkdata + slength - 1)
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      png_warning(png_ptr, "Malformed iCCP chunk");
-      return;
-   }
-
-   /* Compression_type should always be zero */
-   compression_type = *profile++;
-
-   if (compression_type)
-   {
-      png_warning(png_ptr, "Ignoring nonzero compression type in iCCP chunk");
-      compression_type = 0x00;  /* Reset it to zero (libpng-1.0.6 through 1.0.8
-                                 wrote nonzero) */
-   }
-
-   prefix_length = profile - png_ptr->chunkdata;
-   png_decompress_chunk(png_ptr, compression_type,
-       slength, prefix_length, &data_length);
-
-   profile_length = data_length - prefix_length;
-
-   if (prefix_length > data_length || profile_length < 4)
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      png_warning(png_ptr, "Profile size field missing from iCCP chunk");
-      return;
-   }
-
-   /* Check the profile_size recorded in the first 32 bits of the ICC profile */
-   pC = (png_bytep)(png_ptr->chunkdata + prefix_length);
-   profile_size = ((*(pC    )) << 24) |
-                  ((*(pC + 1)) << 16) |
-                  ((*(pC + 2)) <<  8) |
-                  ((*(pC + 3))      );
-
-   /* NOTE: the following guarantees that 'profile_length' fits into 32 bits,
-    * because profile_size is a 32 bit value.
-    */
-   if (profile_size < profile_length)
-      profile_length = profile_size;
-
-   /* And the following guarantees that profile_size == profile_length. */
-   if (profile_size > profile_length)
-   {
-      PNG_WARNING_PARAMETERS(p)
-
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-
-      png_warning_parameter_unsigned(p, 1, PNG_NUMBER_FORMAT_u, profile_size);
-      png_warning_parameter_unsigned(p, 2, PNG_NUMBER_FORMAT_u, profile_length);
-      png_formatted_warning(png_ptr, p,
-         "Ignoring iCCP chunk with declared size = @1 and actual length = @2");
-      return;
-   }
-
-   png_set_iCCP(png_ptr, info_ptr, png_ptr->chunkdata,
-       compression_type, (png_bytep)png_ptr->chunkdata + prefix_length,
-       profile_size);
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-}
-#endif /* PNG_READ_iCCP_SUPPORTED */
-
-#ifdef PNG_READ_sPLT_SUPPORTED
-void /* PRIVATE */
-png_handle_sPLT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-/* Note: this does not properly handle chunks that are > 64K under DOS */
-{
-   png_bytep entry_start;
-   png_sPLT_t new_palette;
-   png_sPLT_entryp pp;
-   png_uint_32 data_length;
-   int entry_size, i;
-   png_uint_32 skip = 0;
-   png_size_t slength;
-   png_uint_32 dl;
-   png_size_t max_dl;
-
-   png_debug(1, "in png_handle_sPLT");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for sPLT");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before sPLT");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid sPLT after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (length > (png_uint_32)65535L)
-   {
-      png_warning(png_ptr, "sPLT chunk too large to fit in memory");
-      skip = length - (png_uint_32)65535L;
-      length = (png_uint_32)65535L;
-   }
-#endif
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = (png_charp)png_malloc(png_ptr, length + 1);
-
-   /* WARNING: this may break if size_t is less than 32 bits; it is assumed
-    * that the PNG_MAX_MALLOC_64K test is enabled in this case, but this is a
-    * potential breakage point if the types in pngconf.h aren't exactly right.
-    */
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, skip))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_ptr->chunkdata[slength] = 0x00;
-
-   for (entry_start = (png_bytep)png_ptr->chunkdata; *entry_start;
-       entry_start++)
-      /* Empty loop to find end of name */ ;
-
-   ++entry_start;
-
-   /* A sample depth should follow the separator, and we should be on it  */
-   if (entry_start > (png_bytep)png_ptr->chunkdata + slength - 2)
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      png_warning(png_ptr, "malformed sPLT chunk");
-      return;
-   }
-
-   new_palette.depth = *entry_start++;
-   entry_size = (new_palette.depth == 8 ? 6 : 10);
-   /* This must fit in a png_uint_32 because it is derived from the original
-    * chunk data length (and use 'length', not 'slength' here for clarity -
-    * they are guaranteed to be the same, see the tests above.)
-    */
-   data_length = length - (png_uint_32)(entry_start -
-      (png_bytep)png_ptr->chunkdata);
-
-   /* Integrity-check the data length */
-   if (data_length % entry_size)
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      png_warning(png_ptr, "sPLT chunk has bad length");
-      return;
-   }
-
-   dl = (png_int_32)(data_length / entry_size);
-   max_dl = PNG_SIZE_MAX / png_sizeof(png_sPLT_entry);
-
-   if (dl > max_dl)
-   {
-       png_warning(png_ptr, "sPLT chunk too long");
-       return;
-   }
-
-   new_palette.nentries = (png_int_32)(data_length / entry_size);
-
-   new_palette.entries = (png_sPLT_entryp)png_malloc_warn(
-       png_ptr, new_palette.nentries * png_sizeof(png_sPLT_entry));
-
-   if (new_palette.entries == NULL)
-   {
-       png_warning(png_ptr, "sPLT chunk requires too much memory");
-       return;
-   }
-
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (i = 0; i < new_palette.nentries; i++)
-   {
-      pp = new_palette.entries + i;
-
-      if (new_palette.depth == 8)
-      {
-         pp->red = *entry_start++;
-         pp->green = *entry_start++;
-         pp->blue = *entry_start++;
-         pp->alpha = *entry_start++;
-      }
-
-      else
-      {
-         pp->red   = png_get_uint_16(entry_start); entry_start += 2;
-         pp->green = png_get_uint_16(entry_start); entry_start += 2;
-         pp->blue  = png_get_uint_16(entry_start); entry_start += 2;
-         pp->alpha = png_get_uint_16(entry_start); entry_start += 2;
-      }
-
-      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
-   }
-#else
-   pp = new_palette.entries;
-
-   for (i = 0; i < new_palette.nentries; i++)
-   {
-
-      if (new_palette.depth == 8)
-      {
-         pp[i].red   = *entry_start++;
-         pp[i].green = *entry_start++;
-         pp[i].blue  = *entry_start++;
-         pp[i].alpha = *entry_start++;
-      }
-
-      else
-      {
-         pp[i].red   = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].green = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].blue  = png_get_uint_16(entry_start); entry_start += 2;
-         pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2;
-      }
-
-      pp[i].frequency = png_get_uint_16(entry_start); entry_start += 2;
-   }
-#endif
-
-   /* Discard all chunk data except the name and stash that */
-   new_palette.name = png_ptr->chunkdata;
-
-   png_set_sPLT(png_ptr, info_ptr, &new_palette, 1);
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-   png_free(png_ptr, new_palette.entries);
-}
-#endif /* PNG_READ_sPLT_SUPPORTED */
-
-#ifdef PNG_READ_tRNS_SUPPORTED
-void /* PRIVATE */
-png_handle_tRNS(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
-
-   png_debug(1, "in png_handle_tRNS");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before tRNS");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid tRNS after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
-   {
-      png_warning(png_ptr, "Duplicate tRNS chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      png_byte buf[2];
-
-      if (length != 2)
-      {
-         png_warning(png_ptr, "Incorrect tRNS chunk length");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      png_crc_read(png_ptr, buf, 2);
-      png_ptr->num_trans = 1;
-      png_ptr->trans_color.gray = png_get_uint_16(buf);
-   }
-
-   else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
-   {
-      png_byte buf[6];
-
-      if (length != 6)
-      {
-         png_warning(png_ptr, "Incorrect tRNS chunk length");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      png_crc_read(png_ptr, buf, (png_size_t)length);
-      png_ptr->num_trans = 1;
-      png_ptr->trans_color.red = png_get_uint_16(buf);
-      png_ptr->trans_color.green = png_get_uint_16(buf + 2);
-      png_ptr->trans_color.blue = png_get_uint_16(buf + 4);
-   }
-
-   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (!(png_ptr->mode & PNG_HAVE_PLTE))
-      {
-         /* Should be an error, but we can cope with it. */
-         png_warning(png_ptr, "Missing PLTE before tRNS");
-      }
-
-      if (length > (png_uint_32)png_ptr->num_palette ||
-          length > PNG_MAX_PALETTE_LENGTH)
-      {
-         png_warning(png_ptr, "Incorrect tRNS chunk length");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (length == 0)
-      {
-         png_warning(png_ptr, "Zero length tRNS chunk");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      png_crc_read(png_ptr, readbuf, (png_size_t)length);
-      png_ptr->num_trans = (png_uint_16)length;
-   }
-
-   else
-   {
-      png_warning(png_ptr, "tRNS chunk not allowed with alpha channel");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (png_crc_finish(png_ptr, 0))
-   {
-      png_ptr->num_trans = 0;
-      return;
-   }
-
-   png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans,
-       &(png_ptr->trans_color));
-}
-#endif
-
-#ifdef PNG_READ_bKGD_SUPPORTED
-void /* PRIVATE */
-png_handle_bKGD(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_size_t truelen;
-   png_byte buf[6];
-   png_color_16 background;
-
-   png_debug(1, "in png_handle_bKGD");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before bKGD");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid bKGD after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
-       !(png_ptr->mode & PNG_HAVE_PLTE))
-   {
-      png_warning(png_ptr, "Missing PLTE before bKGD");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD))
-   {
-      png_warning(png_ptr, "Duplicate bKGD chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      truelen = 1;
-
-   else if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
-      truelen = 6;
-
-   else
-      truelen = 2;
-
-   if (length != truelen)
-   {
-      png_warning(png_ptr, "Incorrect bKGD chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, truelen);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   /* We convert the index value into RGB components so that we can allow
-    * arbitrary RGB values for background when we have transparency, and
-    * so it is easy to determine the RGB values of the background color
-    * from the info_ptr struct.
-    */
-   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      background.index = buf[0];
-
-      if (info_ptr && info_ptr->num_palette)
-      {
-         if (buf[0] >= info_ptr->num_palette)
-         {
-            png_warning(png_ptr, "Incorrect bKGD chunk index value");
-            return;
-         }
-
-         background.red = (png_uint_16)png_ptr->palette[buf[0]].red;
-         background.green = (png_uint_16)png_ptr->palette[buf[0]].green;
-         background.blue = (png_uint_16)png_ptr->palette[buf[0]].blue;
-      }
-
-      else
-         background.red = background.green = background.blue = 0;
-
-      background.gray = 0;
-   }
-
-   else if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) /* GRAY */
-   {
-      background.index = 0;
-      background.red =
-      background.green =
-      background.blue =
-      background.gray = png_get_uint_16(buf);
-   }
-
-   else
-   {
-      background.index = 0;
-      background.red = png_get_uint_16(buf);
-      background.green = png_get_uint_16(buf + 2);
-      background.blue = png_get_uint_16(buf + 4);
-      background.gray = 0;
-   }
-
-   png_set_bKGD(png_ptr, info_ptr, &background);
-}
-#endif
-
-#ifdef PNG_READ_hIST_SUPPORTED
-void /* PRIVATE */
-png_handle_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   unsigned int num, i;
-   png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH];
-
-   png_debug(1, "in png_handle_hIST");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before hIST");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid hIST after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (!(png_ptr->mode & PNG_HAVE_PLTE))
-   {
-      png_warning(png_ptr, "Missing PLTE before hIST");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST))
-   {
-      png_warning(png_ptr, "Duplicate hIST chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length > 2*PNG_MAX_PALETTE_LENGTH ||
-       length != (unsigned int) (2*png_ptr->num_palette))
-   {
-      png_warning(png_ptr, "Incorrect hIST chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   num = length / 2 ;
-
-   for (i = 0; i < num; i++)
-   {
-      png_byte buf[2];
-
-      png_crc_read(png_ptr, buf, 2);
-      readbuf[i] = png_get_uint_16(buf);
-   }
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   png_set_hIST(png_ptr, info_ptr, readbuf);
-}
-#endif
-
-#ifdef PNG_READ_pHYs_SUPPORTED
-void /* PRIVATE */
-png_handle_pHYs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte buf[9];
-   png_uint_32 res_x, res_y;
-   int unit_type;
-
-   png_debug(1, "in png_handle_pHYs");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before pHYs");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid pHYs after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
-   {
-      png_warning(png_ptr, "Duplicate pHYs chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length != 9)
-   {
-      png_warning(png_ptr, "Incorrect pHYs chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 9);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   res_x = png_get_uint_32(buf);
-   res_y = png_get_uint_32(buf + 4);
-   unit_type = buf[8];
-   png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type);
-}
-#endif
-
-#ifdef PNG_READ_oFFs_SUPPORTED
-void /* PRIVATE */
-png_handle_oFFs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte buf[9];
-   png_int_32 offset_x, offset_y;
-   int unit_type;
-
-   png_debug(1, "in png_handle_oFFs");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before oFFs");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid oFFs after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
-   {
-      png_warning(png_ptr, "Duplicate oFFs chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (length != 9)
-   {
-      png_warning(png_ptr, "Incorrect oFFs chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 9);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   offset_x = png_get_int_32(buf);
-   offset_y = png_get_int_32(buf + 4);
-   unit_type = buf[8];
-   png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type);
-}
-#endif
-
-#ifdef PNG_READ_pCAL_SUPPORTED
-/* Read the pCAL chunk (described in the PNG Extensions document) */
-void /* PRIVATE */
-png_handle_pCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_int_32 X0, X1;
-   png_byte type, nparams;
-   png_charp buf, units, endptr;
-   png_charpp params;
-   png_size_t slength;
-   int i;
-
-   png_debug(1, "in png_handle_pCAL");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before pCAL");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid pCAL after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL))
-   {
-      png_warning(png_ptr, "Duplicate pCAL chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_debug1(2, "Allocating and reading pCAL chunk data (%u bytes)",
-       length + 1);
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
-
-   if (png_ptr->chunkdata == NULL)
-   {
-      png_warning(png_ptr, "No memory for pCAL purpose");
-      return;
-   }
-
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, 0))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_ptr->chunkdata[slength] = 0x00; /* Null terminate the last string */
-
-   png_debug(3, "Finding end of pCAL purpose string");
-   for (buf = png_ptr->chunkdata; *buf; buf++)
-      /* Empty loop */ ;
-
-   endptr = png_ptr->chunkdata + slength;
-
-   /* We need to have at least 12 bytes after the purpose string
-    * in order to get the parameter information.
-    */
-   if (endptr <= buf + 12)
-   {
-      png_warning(png_ptr, "Invalid pCAL data");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_debug(3, "Reading pCAL X0, X1, type, nparams, and units");
-   X0 = png_get_int_32((png_bytep)buf+1);
-   X1 = png_get_int_32((png_bytep)buf+5);
-   type = buf[9];
-   nparams = buf[10];
-   units = buf + 11;
-
-   png_debug(3, "Checking pCAL equation type and number of parameters");
-   /* Check that we have the right number of parameters for known
-    * equation types.
-    */
-   if ((type == PNG_EQUATION_LINEAR && nparams != 2) ||
-       (type == PNG_EQUATION_BASE_E && nparams != 3) ||
-       (type == PNG_EQUATION_ARBITRARY && nparams != 3) ||
-       (type == PNG_EQUATION_HYPERBOLIC && nparams != 4))
-   {
-      png_warning(png_ptr, "Invalid pCAL parameters for equation type");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   else if (type >= PNG_EQUATION_LAST)
-   {
-      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
-   }
-
-   for (buf = units; *buf; buf++)
-      /* Empty loop to move past the units string. */ ;
-
-   png_debug(3, "Allocating pCAL parameters array");
-
-   params = (png_charpp)png_malloc_warn(png_ptr,
-       (png_size_t)(nparams * png_sizeof(png_charp)));
-
-   if (params == NULL)
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      png_warning(png_ptr, "No memory for pCAL params");
-      return;
-   }
-
-   /* Get pointers to the start of each parameter string. */
-   for (i = 0; i < (int)nparams; i++)
-   {
-      buf++; /* Skip the null string terminator from previous parameter. */
-
-      png_debug1(3, "Reading pCAL parameter %d", i);
-
-      for (params[i] = buf; buf <= endptr && *buf != 0x00; buf++)
-         /* Empty loop to move past each parameter string */ ;
-
-      /* Make sure we haven't run out of data yet */
-      if (buf > endptr)
-      {
-         png_warning(png_ptr, "Invalid pCAL data");
-         png_free(png_ptr, png_ptr->chunkdata);
-         png_ptr->chunkdata = NULL;
-         png_free(png_ptr, params);
-         return;
-      }
-   }
-
-   png_set_pCAL(png_ptr, info_ptr, png_ptr->chunkdata, X0, X1, type, nparams,
-      units, params);
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-   png_free(png_ptr, params);
-}
-#endif
-
-#ifdef PNG_READ_sCAL_SUPPORTED
-/* Read the sCAL chunk */
-void /* PRIVATE */
-png_handle_sCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_size_t slength, i;
-   int state;
-
-   png_debug(1, "in png_handle_sCAL");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before sCAL");
-
-   else if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      png_warning(png_ptr, "Invalid sCAL after IDAT");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL))
-   {
-      png_warning(png_ptr, "Duplicate sCAL chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   /* Need unit type, width, \0, height: minimum 4 bytes */
-   else if (length < 4)
-   {
-      png_warning(png_ptr, "sCAL chunk too short");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_debug1(2, "Allocating and reading sCAL chunk data (%u bytes)",
-      length + 1);
-
-   png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
-
-   if (png_ptr->chunkdata == NULL)
-   {
-      png_warning(png_ptr, "Out of memory while processing sCAL chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-   png_ptr->chunkdata[slength] = 0x00; /* Null terminate the last string */
-
-   if (png_crc_finish(png_ptr, 0))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   /* Validate the unit. */
-   if (png_ptr->chunkdata[0] != 1 && png_ptr->chunkdata[0] != 2)
-   {
-      png_warning(png_ptr, "Invalid sCAL ignored: invalid unit");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   /* Validate the ASCII numbers, need two ASCII numbers separated by
-    * a '\0' and they need to fit exactly in the chunk data.
-    */
-   i = 1;
-   state = 0;
-
-   if (!png_check_fp_number(png_ptr->chunkdata, slength, &state, &i) ||
-       i >= slength || png_ptr->chunkdata[i++] != 0)
-      png_warning(png_ptr, "Invalid sCAL chunk ignored: bad width format");
-
-   else if (!PNG_FP_IS_POSITIVE(state))
-      png_warning(png_ptr, "Invalid sCAL chunk ignored: non-positive width");
-
-   else
-   {
-      png_size_t heighti = i;
-
-      state = 0;
-      if (!png_check_fp_number(png_ptr->chunkdata, slength, &state, &i) ||
-          i != slength)
-         png_warning(png_ptr, "Invalid sCAL chunk ignored: bad height format");
-
-      else if (!PNG_FP_IS_POSITIVE(state))
-         png_warning(png_ptr,
-            "Invalid sCAL chunk ignored: non-positive height");
-
-      else
-         /* This is the (only) success case. */
-         png_set_sCAL_s(png_ptr, info_ptr, png_ptr->chunkdata[0],
-            png_ptr->chunkdata+1, png_ptr->chunkdata+heighti);
-   }
-
-   /* Clean up - just free the temporarily allocated buffer. */
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-}
-#endif
-
-#ifdef PNG_READ_tIME_SUPPORTED
-void /* PRIVATE */
-png_handle_tIME(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_byte buf[7];
-   png_time mod_time;
-
-   png_debug(1, "in png_handle_tIME");
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Out of place tIME chunk");
-
-   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME))
-   {
-      png_warning(png_ptr, "Duplicate tIME chunk");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-   if (length != 7)
-   {
-      png_warning(png_ptr, "Incorrect tIME chunk length");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-
-   png_crc_read(png_ptr, buf, 7);
-
-   if (png_crc_finish(png_ptr, 0))
-      return;
-
-   mod_time.second = buf[6];
-   mod_time.minute = buf[5];
-   mod_time.hour = buf[4];
-   mod_time.day = buf[3];
-   mod_time.month = buf[2];
-   mod_time.year = png_get_uint_16(buf);
-
-   png_set_tIME(png_ptr, info_ptr, &mod_time);
-}
-#endif
-
-#ifdef PNG_READ_tEXt_SUPPORTED
-/* Note: this does not properly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_textp text_ptr;
-   png_charp key;
-   png_charp text;
-   png_uint_32 skip = 0;
-   png_size_t slength;
-   int ret;
-
-   png_debug(1, "in png_handle_tEXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for tEXt");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before tEXt");
-
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (length > (png_uint_32)65535L)
-   {
-      png_warning(png_ptr, "tEXt chunk too large to fit in memory");
-      skip = length - (png_uint_32)65535L;
-      length = (png_uint_32)65535L;
-   }
-#endif
-
-   png_free(png_ptr, png_ptr->chunkdata);
-
-   png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
-
-   if (png_ptr->chunkdata == NULL)
-   {
-     png_warning(png_ptr, "No memory to process text chunk");
-     return;
-   }
-
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, skip))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   key = png_ptr->chunkdata;
-
-   key[slength] = 0x00;
-
-   for (text = key; *text; text++)
-      /* Empty loop to find end of key */ ;
-
-   if (text != key + slength)
-      text++;
-
-   text_ptr = (png_textp)png_malloc_warn(png_ptr,
-       png_sizeof(png_text));
-
-   if (text_ptr == NULL)
-   {
-      png_warning(png_ptr, "Not enough memory to process text chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
-   text_ptr->key = key;
-   text_ptr->lang = NULL;
-   text_ptr->lang_key = NULL;
-   text_ptr->itxt_length = 0;
-   text_ptr->text = text;
-   text_ptr->text_length = png_strlen(text);
-
-   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-   png_free(png_ptr, text_ptr);
-
-   if (ret)
-      png_warning(png_ptr, "Insufficient memory to process text chunk");
-}
-#endif
-
-#ifdef PNG_READ_zTXt_SUPPORTED
-/* Note: this does not correctly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_textp text_ptr;
-   png_charp text;
-   int comp_type;
-   int ret;
-   png_size_t slength, prefix_len, data_len;
-
-   png_debug(1, "in png_handle_zTXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for zTXt");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before zTXt");
-
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-#ifdef PNG_MAX_MALLOC_64K
-   /* We will no doubt have problems with chunks even half this size, but
-    * there is no hard and fast rule to tell us where to stop.
-    */
-   if (length > (png_uint_32)65535L)
-   {
-      png_warning(png_ptr, "zTXt chunk too large to fit in memory");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-#endif
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
-
-   if (png_ptr->chunkdata == NULL)
-   {
-      png_warning(png_ptr, "Out of memory processing zTXt chunk");
-      return;
-   }
-
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, 0))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_ptr->chunkdata[slength] = 0x00;
-
-   for (text = png_ptr->chunkdata; *text; text++)
-      /* Empty loop */ ;
-
-   /* zTXt must have some text after the chunkdataword */
-   if (text >= png_ptr->chunkdata + slength - 2)
-   {
-      png_warning(png_ptr, "Truncated zTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   else
-   {
-       comp_type = *(++text);
-
-       if (comp_type != PNG_TEXT_COMPRESSION_zTXt)
-       {
-          png_warning(png_ptr, "Unknown compression type in zTXt chunk");
-          comp_type = PNG_TEXT_COMPRESSION_zTXt;
-       }
-
-       text++;        /* Skip the compression_method byte */
-   }
-
-   prefix_len = text - png_ptr->chunkdata;
-
-   png_decompress_chunk(png_ptr, comp_type,
-       (png_size_t)length, prefix_len, &data_len);
-
-   text_ptr = (png_textp)png_malloc_warn(png_ptr,
-       png_sizeof(png_text));
-
-   if (text_ptr == NULL)
-   {
-      png_warning(png_ptr, "Not enough memory to process zTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   text_ptr->compression = comp_type;
-   text_ptr->key = png_ptr->chunkdata;
-   text_ptr->lang = NULL;
-   text_ptr->lang_key = NULL;
-   text_ptr->itxt_length = 0;
-   text_ptr->text = png_ptr->chunkdata + prefix_len;
-   text_ptr->text_length = data_len;
-
-   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
-
-   png_free(png_ptr, text_ptr);
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-
-   if (ret)
-      png_error(png_ptr, "Insufficient memory to store zTXt chunk");
-}
-#endif
-
-#ifdef PNG_READ_iTXt_SUPPORTED
-/* Note: this does not correctly handle chunks that are > 64K under DOS */
-void /* PRIVATE */
-png_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_textp text_ptr;
-   png_charp key, lang, text, lang_key;
-   int comp_flag;
-   int comp_type = 0;
-   int ret;
-   png_size_t slength, prefix_len, data_len;
-
-   png_debug(1, "in png_handle_iTXt");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for iTXt");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if (!(png_ptr->mode & PNG_HAVE_IHDR))
-      png_error(png_ptr, "Missing IHDR before iTXt");
-
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-      png_ptr->mode |= PNG_AFTER_IDAT;
-
-#ifdef PNG_MAX_MALLOC_64K
-   /* We will no doubt have problems with chunks even half this size, but
-    * there is no hard and fast rule to tell us where to stop.
-    */
-   if (length > (png_uint_32)65535L)
-   {
-      png_warning(png_ptr, "iTXt chunk too large to fit in memory");
-      png_crc_finish(png_ptr, length);
-      return;
-   }
-#endif
-
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
-
-   if (png_ptr->chunkdata == NULL)
-   {
-      png_warning(png_ptr, "No memory to process iTXt chunk");
-      return;
-   }
-
-   slength = length;
-   png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength);
-
-   if (png_crc_finish(png_ptr, 0))
-   {
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   png_ptr->chunkdata[slength] = 0x00;
-
-   for (lang = png_ptr->chunkdata; *lang; lang++)
-      /* Empty loop */ ;
-
-   lang++;        /* Skip NUL separator */
-
-   /* iTXt must have a language tag (possibly empty), two compression bytes,
-    * translated keyword (possibly empty), and possibly some text after the
-    * keyword
-    */
-
-   if (lang >= png_ptr->chunkdata + slength - 3)
-   {
-      png_warning(png_ptr, "Truncated iTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   else
-   {
-      comp_flag = *lang++;
-      comp_type = *lang++;
-   }
-
-   if (comp_type || (comp_flag && comp_flag != PNG_TEXT_COMPRESSION_zTXt))
-   {
-      png_warning(png_ptr, "Unknown iTXt compression type or method");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   for (lang_key = lang; *lang_key; lang_key++)
-      /* Empty loop */ ;
-
-   lang_key++;        /* Skip NUL separator */
-
-   if (lang_key >= png_ptr->chunkdata + slength)
-   {
-      png_warning(png_ptr, "Truncated iTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   for (text = lang_key; *text; text++)
-      /* Empty loop */ ;
-
-   text++;        /* Skip NUL separator */
-
-   if (text >= png_ptr->chunkdata + slength)
-   {
-      png_warning(png_ptr, "Malformed iTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   prefix_len = text - png_ptr->chunkdata;
-
-   key=png_ptr->chunkdata;
-
-   if (comp_flag)
-      png_decompress_chunk(png_ptr, comp_type,
-          (size_t)length, prefix_len, &data_len);
-
-   else
-      data_len = png_strlen(png_ptr->chunkdata + prefix_len);
-
-   text_ptr = (png_textp)png_malloc_warn(png_ptr,
-       png_sizeof(png_text));
-
-   if (text_ptr == NULL)
-   {
-      png_warning(png_ptr, "Not enough memory to process iTXt chunk");
-      png_free(png_ptr, png_ptr->chunkdata);
-      png_ptr->chunkdata = NULL;
-      return;
-   }
-
-   text_ptr->compression = (int)comp_flag + 1;
-   text_ptr->lang_key = png_ptr->chunkdata + (lang_key - key);
-   text_ptr->lang = png_ptr->chunkdata + (lang - key);
-   text_ptr->itxt_length = data_len;
-   text_ptr->text_length = 0;
-   text_ptr->key = png_ptr->chunkdata;
-   text_ptr->text = png_ptr->chunkdata + prefix_len;
-
-   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
-
-   png_free(png_ptr, text_ptr);
-   png_free(png_ptr, png_ptr->chunkdata);
-   png_ptr->chunkdata = NULL;
-
-   if (ret)
-      png_error(png_ptr, "Insufficient memory to store iTXt chunk");
-}
-#endif
-
-/* This function is called when we haven't found a handler for a
- * chunk.  If there isn't a problem with the chunk itself (ie bad
- * chunk name, CRC, or a critical chunk), the chunk is silently ignored
- * -- unless the PNG_FLAG_UNKNOWN_CHUNKS_SUPPORTED flag is on in which
- * case it will be saved away to be written out later.
- */
-void /* PRIVATE */
-png_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
-{
-   png_uint_32 skip = 0;
-
-   png_debug(1, "in png_handle_unknown");
-
-#ifdef PNG_USER_LIMITS_SUPPORTED
-   if (png_ptr->user_chunk_cache_max != 0)
-   {
-      if (png_ptr->user_chunk_cache_max == 1)
-      {
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-
-      if (--png_ptr->user_chunk_cache_max == 1)
-      {
-         png_warning(png_ptr, "No space in chunk cache for unknown chunk");
-         png_crc_finish(png_ptr, length);
-         return;
-      }
-   }
-#endif
-
-   if (png_ptr->mode & PNG_HAVE_IDAT)
-   {
-      if (png_ptr->chunk_name != png_IDAT)
-         png_ptr->mode |= PNG_AFTER_IDAT;
-   }
-
-   if (PNG_CHUNK_CRITICAL(png_ptr->chunk_name))
-   {
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-      if (png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name) !=
-          PNG_HANDLE_CHUNK_ALWAYS
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-          && png_ptr->read_user_chunk_fn == NULL
-#endif
-          )
-#endif
-         png_chunk_error(png_ptr, "unknown critical chunk");
-   }
-
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-   if ((png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS)
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-       || (png_ptr->read_user_chunk_fn != NULL)
-#endif
-       )
-   {
-#ifdef PNG_MAX_MALLOC_64K
-      if (length > 65535)
-      {
-         png_warning(png_ptr, "unknown chunk too large to fit in memory");
-         skip = length - 65535;
-         length = 65535;
-      }
-#endif
-
-      /* TODO: this code is very close to the unknown handling in pngpread.c,
-       * maybe it can be put into a common utility routine?
-       * png_struct::unknown_chunk is just used as a temporary variable, along
-       * with the data into which the chunk is read.  These can be eliminated.
-       */
-      PNG_CSTRING_FROM_CHUNK(png_ptr->unknown_chunk.name, png_ptr->chunk_name);
-      png_ptr->unknown_chunk.size = (png_size_t)length;
-
-      if (length == 0)
-         png_ptr->unknown_chunk.data = NULL;
-
-      else
-      {
-         png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
-         png_crc_read(png_ptr, png_ptr->unknown_chunk.data, length);
-      }
-
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-      if (png_ptr->read_user_chunk_fn != NULL)
-      {
-         /* Callback to user unknown chunk handler */
-         int ret;
-
-         ret = (*(png_ptr->read_user_chunk_fn))
-             (png_ptr, &png_ptr->unknown_chunk);
-
-         if (ret < 0)
-            png_chunk_error(png_ptr, "error in user chunk");
-
-         if (ret == 0)
-         {
-            if (PNG_CHUNK_CRITICAL(png_ptr->chunk_name))
-            {
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-               if (png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name) !=
-                   PNG_HANDLE_CHUNK_ALWAYS)
-#endif
-                  png_chunk_error(png_ptr, "unknown critical chunk");
-            }
-
-            png_set_unknown_chunks(png_ptr, info_ptr,
-                &png_ptr->unknown_chunk, 1);
-         }
-      }
-
-      else
-#endif
-         png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1);
-
-      png_free(png_ptr, png_ptr->unknown_chunk.data);
-      png_ptr->unknown_chunk.data = NULL;
-   }
-
-   else
-#endif
-      skip = length;
-
-   png_crc_finish(png_ptr, skip);
-
-#ifndef PNG_READ_USER_CHUNKS_SUPPORTED
-   PNG_UNUSED(info_ptr) /* Quiet compiler warnings about unused info_ptr */
-#endif
-}
-
-/* This function is called to verify that a chunk name is valid.
- * This function can't have the "critical chunk check" incorporated
- * into it, since in the future we will need to be able to call user
- * functions to handle unknown critical chunks after we check that
- * the chunk name itself is valid.
- */
-
-/* Bit hacking: the test for an invalid byte in the 4 byte chunk name is:
- *
- * ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
- */
-
-void /* PRIVATE */
-png_check_chunk_name(png_structp png_ptr, png_uint_32 chunk_name)
-{
-   int i;
-
-   png_debug(1, "in png_check_chunk_name");
-
-   for (i=1; i<=4; ++i)
-   {
-      int c = chunk_name & 0xff;
-
-      if (c < 65 || c > 122 || (c > 90 && c < 97))
-         png_chunk_error(png_ptr, "invalid chunk type");
-
-      chunk_name >>= 8;
-   }
-}
-
-/* Combines the row recently read in with the existing pixels in the row.  This
- * routine takes care of alpha and transparency if requested.  This routine also
- * handles the two methods of progressive display of interlaced images,
- * depending on the 'display' value; if 'display' is true then the whole row
- * (dp) is filled from the start by replicating the available pixels.  If
- * 'display' is false only those pixels present in the pass are filled in.
- */
-void /* PRIVATE */
-png_combine_row(png_structp png_ptr, png_bytep dp, int display)
-{
-   unsigned int pixel_depth = png_ptr->transformed_pixel_depth;
-   png_const_bytep sp = png_ptr->row_buf + 1;
-   png_uint_32 row_width = png_ptr->width;
-   unsigned int pass = png_ptr->pass;
-   png_bytep end_ptr = 0;
-   png_byte end_byte = 0;
-   unsigned int end_mask;
-
-   png_debug(1, "in png_combine_row");
-
-   /* Added in 1.5.6: it should not be possible to enter this routine until at
-    * least one row has been read from the PNG data and transformed.
-    */
-   if (pixel_depth == 0)
-      png_error(png_ptr, "internal row logic error");
-
-   /* Added in 1.5.4: the pixel depth should match the information returned by
-    * any call to png_read_update_info at this point.  Do not continue if we got
-    * this wrong.
-    */
-   if (png_ptr->info_rowbytes != 0 && png_ptr->info_rowbytes !=
-          PNG_ROWBYTES(pixel_depth, row_width))
-      png_error(png_ptr, "internal row size calculation error");
-
-   /* Don't expect this to ever happen: */
-   if (row_width == 0)
-      png_error(png_ptr, "internal row width error");
-
-   /* Preserve the last byte in cases where only part of it will be overwritten,
-    * the multiply below may overflow, we don't care because ANSI-C guarantees
-    * we get the low bits.
-    */
-   end_mask = (pixel_depth * row_width) & 7;
-   if (end_mask != 0)
-   {
-      /* end_ptr == NULL is a flag to say do nothing */
-      end_ptr = dp + PNG_ROWBYTES(pixel_depth, row_width) - 1;
-      end_byte = *end_ptr;
-#     ifdef PNG_READ_PACKSWAP_SUPPORTED
-         if (png_ptr->transformations & PNG_PACKSWAP) /* little-endian byte */
-            end_mask = 0xff << end_mask;
-
-         else /* big-endian byte */
-#     endif
-         end_mask = 0xff >> end_mask;
-      /* end_mask is now the bits to *keep* from the destination row */
-   }
-
-   /* For non-interlaced images this reduces to a png_memcpy(). A png_memcpy()
-    * will also happen if interlacing isn't supported or if the application
-    * does not call png_set_interlace_handling().  In the latter cases the
-    * caller just gets a sequence of the unexpanded rows from each interlace
-    * pass.
-    */
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE) &&
-      pass < 6 && (display == 0 ||
-      /* The following copies everything for 'display' on passes 0, 2 and 4. */
-      (display == 1 && (pass & 1) != 0)))
-   {
-      /* Narrow images may have no bits in a pass; the caller should handle
-       * this, but this test is cheap:
-       */
-      if (row_width <= PNG_PASS_START_COL(pass))
-         return;
-
-      if (pixel_depth < 8)
-      {
-         /* For pixel depths up to 4 bpp the 8-pixel mask can be expanded to fit
-          * into 32 bits, then a single loop over the bytes using the four byte
-          * values in the 32-bit mask can be used.  For the 'display' option the
-          * expanded mask may also not require any masking within a byte.  To
-          * make this work the PACKSWAP option must be taken into account - it
-          * simply requires the pixels to be reversed in each byte.
-          *
-          * The 'regular' case requires a mask for each of the first 6 passes,
-          * the 'display' case does a copy for the even passes in the range
-          * 0..6.  This has already been handled in the test above.
-          *
-          * The masks are arranged as four bytes with the first byte to use in
-          * the lowest bits (little-endian) regardless of the order (PACKSWAP or
-          * not) of the pixels in each byte.
-          *
-          * NOTE: the whole of this logic depends on the caller of this function
-          * only calling it on rows appropriate to the pass.  This function only
-          * understands the 'x' logic; the 'y' logic is handled by the caller.
-          *
-          * The following defines allow generation of compile time constant bit
-          * masks for each pixel depth and each possibility of swapped or not
-          * swapped bytes.  Pass 'p' is in the range 0..6; 'x', a pixel index,
-          * is in the range 0..7; and the result is 1 if the pixel is to be
-          * copied in the pass, 0 if not.  'S' is for the sparkle method, 'B'
-          * for the block method.
-          *
-          * With some compilers a compile time expression of the general form:
-          *
-          *    (shift >= 32) ? (a >> (shift-32)) : (b >> shift)
-          *
-          * Produces warnings with values of 'shift' in the range 33 to 63
-          * because the right hand side of the ?: expression is evaluated by
-          * the compiler even though it isn't used.  Microsoft Visual C (various
-          * versions) and the Intel C compiler are known to do this.  To avoid
-          * this the following macros are used in 1.5.6.  This is a temporary
-          * solution to avoid destabilizing the code during the release process.
-          */
-#        if PNG_USE_COMPILE_TIME_MASKS
-#           define PNG_LSR(x,s) ((x)>>((s) & 0x1f))
-#           define PNG_LSL(x,s) ((x)<<((s) & 0x1f))
-#        else
-#           define PNG_LSR(x,s) ((x)>>(s))
-#           define PNG_LSL(x,s) ((x)<<(s))
-#        endif
-#        define S_COPY(p,x) (((p)<4 ? PNG_LSR(0x80088822,(3-(p))*8+(7-(x))) :\
-           PNG_LSR(0xaa55ff00,(7-(p))*8+(7-(x)))) & 1)
-#        define B_COPY(p,x) (((p)<4 ? PNG_LSR(0xff0fff33,(3-(p))*8+(7-(x))) :\
-           PNG_LSR(0xff55ff00,(7-(p))*8+(7-(x)))) & 1)
-
-         /* Return a mask for pass 'p' pixel 'x' at depth 'd'.  The mask is
-          * little endian - the first pixel is at bit 0 - however the extra
-          * parameter 's' can be set to cause the mask position to be swapped
-          * within each byte, to match the PNG format.  This is done by XOR of
-          * the shift with 7, 6 or 4 for bit depths 1, 2 and 4.
-          */
-#        define PIXEL_MASK(p,x,d,s) \
-            (PNG_LSL(((PNG_LSL(1U,(d)))-1),(((x)*(d))^((s)?8-(d):0))))
-
-         /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask.
-          */
-#        define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
-#        define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0)
-
-         /* Combine 8 of these to get the full mask.  For the 1-bpp and 2-bpp
-          * cases the result needs replicating, for the 4-bpp case the above
-          * generates a full 32 bits.
-          */
-#        define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1)))
-
-#        define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\
-            S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\
-            S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d)
-
-#        define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\
-            B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\
-            B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d)
-
-#if PNG_USE_COMPILE_TIME_MASKS
-         /* Utility macros to construct all the masks for a depth/swap
-          * combination.  The 's' parameter says whether the format is PNG
-          * (big endian bytes) or not.  Only the three odd-numbered passes are
-          * required for the display/block algorithm.
-          */
-#        define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\
-            S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) }
-
-#        define B_MASKS(d,s) { B_MASK(1,d,s), S_MASK(3,d,s), S_MASK(5,d,s) }
-
-#        define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2))
-
-         /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and
-          * then pass:
-          */
-         static PNG_CONST png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] =
-         {
-            /* Little-endian byte masks for PACKSWAP */
-            { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) },
-            /* Normal (big-endian byte) masks - PNG format */
-            { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) }
-         };
-
-         /* display_mask has only three entries for the odd passes, so index by
-          * pass>>1.
-          */
-         static PNG_CONST png_uint_32 display_mask[2][3][3] =
-         {
-            /* Little-endian byte masks for PACKSWAP */
-            { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) },
-            /* Normal (big-endian byte) masks - PNG format */
-            { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) }
-         };
-
-#        define MASK(pass,depth,display,png)\
-            ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\
-               row_mask[png][DEPTH_INDEX(depth)][pass])
-
-#else /* !PNG_USE_COMPILE_TIME_MASKS */
-         /* This is the runtime alternative: it seems unlikely that this will
-          * ever be either smaller or faster than the compile time approach.
-          */
-#        define MASK(pass,depth,display,png)\
-            ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png))
-#endif /* !PNG_USE_COMPILE_TIME_MASKS */
-
-         /* Use the appropriate mask to copy the required bits.  In some cases
-          * the byte mask will be 0 or 0xff, optimize these cases.  row_width is
-          * the number of pixels, but the code copies bytes, so it is necessary
-          * to special case the end.
-          */
-         png_uint_32 pixels_per_byte = 8 / pixel_depth;
-         png_uint_32 mask;
-
-#        ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if (png_ptr->transformations & PNG_PACKSWAP)
-               mask = MASK(pass, pixel_depth, display, 0);
-
-            else
-#        endif
-            mask = MASK(pass, pixel_depth, display, 1);
-
-         for (;;)
-         {
-            png_uint_32 m;
-
-            /* It doesn't matter in the following if png_uint_32 has more than
-             * 32 bits because the high bits always match those in m<<24; it is,
-             * however, essential to use OR here, not +, because of this.
-             */
-            m = mask;
-            mask = (m >> 8) | (m << 24); /* rotate right to good compilers */
-            m &= 0xff;
-
-            if (m != 0) /* something to copy */
-            {
-               if (m != 0xff)
-                  *dp = (png_byte)((*dp & ~m) | (*sp & m));
-               else
-                  *dp = *sp;
-            }
-
-            /* NOTE: this may overwrite the last byte with garbage if the image
-             * is not an exact number of bytes wide; libpng has always done
-             * this.
-             */
-            if (row_width <= pixels_per_byte)
-               break; /* May need to restore part of the last byte */
-
-            row_width -= pixels_per_byte;
-            ++dp;
-            ++sp;
-         }
-      }
-
-      else /* pixel_depth >= 8 */
-      {
-         unsigned int bytes_to_copy, bytes_to_jump;
-
-         /* Validate the depth - it must be a multiple of 8 */
-         if (pixel_depth & 7)
-            png_error(png_ptr, "invalid user transform pixel depth");
-
-         pixel_depth >>= 3; /* now in bytes */
-         row_width *= pixel_depth;
-
-         /* Regardless of pass number the Adam 7 interlace always results in a
-          * fixed number of pixels to copy then to skip.  There may be a
-          * different number of pixels to skip at the start though.
-          */
-         {
-            unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth;
-
-            row_width -= offset;
-            dp += offset;
-            sp += offset;
-         }
-
-         /* Work out the bytes to copy. */
-         if (display)
-         {
-            /* When doing the 'block' algorithm the pixel in the pass gets
-             * replicated to adjacent pixels.  This is why the even (0,2,4,6)
-             * passes are skipped above - the entire expanded row is copied.
-             */
-            bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth;
-
-            /* But don't allow this number to exceed the actual row width. */
-            if (bytes_to_copy > row_width)
-               bytes_to_copy = row_width;
-         }
-
-         else /* normal row; Adam7 only ever gives us one pixel to copy. */
-            bytes_to_copy = pixel_depth;
-
-         /* In Adam7 there is a constant offset between where the pixels go. */
-         bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth;
-
-         /* And simply copy these bytes.  Some optimization is possible here,
-          * depending on the value of 'bytes_to_copy'.  Special case the low
-          * byte counts, which we know to be frequent.
-          *
-          * Notice that these cases all 'return' rather than 'break' - this
-          * avoids an unnecessary test on whether to restore the last byte
-          * below.
-          */
-         switch (bytes_to_copy)
-         {
-            case 1:
-               for (;;)
-               {
-                  *dp = *sp;
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  dp += bytes_to_jump;
-                  sp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-
-            case 2:
-               /* There is a possibility of a partial copy at the end here; this
-                * slows the code down somewhat.
-                */
-               do
-               {
-                  dp[0] = sp[0], dp[1] = sp[1];
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-               while (row_width > 1);
-
-               /* And there can only be one byte left at this point: */
-               *dp = *sp;
-               return;
-
-            case 3:
-               /* This can only be the RGB case, so each copy is exactly one
-                * pixel and it is not necessary to check for a partial copy.
-                */
-               for(;;)
-               {
-                  dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2];
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-               }
-
-            default:
-#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE
-               /* Check for double byte alignment and, if possible, use a
-                * 16-bit copy.  Don't attempt this for narrow images - ones that
-                * are less than an interlace panel wide.  Don't attempt it for
-                * wide bytes_to_copy either - use the png_memcpy there.
-                */
-               if (bytes_to_copy < 16 /*else use png_memcpy*/ &&
-                  png_isaligned(dp, png_uint_16) &&
-                  png_isaligned(sp, png_uint_16) &&
-                  bytes_to_copy % sizeof (png_uint_16) == 0 &&
-                  bytes_to_jump % sizeof (png_uint_16) == 0)
-               {
-                  /* Everything is aligned for png_uint_16 copies, but try for
-                   * png_uint_32 first.
-                   */
-                  if (png_isaligned(dp, png_uint_32) &&
-                     png_isaligned(sp, png_uint_32) &&
-                     bytes_to_copy % sizeof (png_uint_32) == 0 &&
-                     bytes_to_jump % sizeof (png_uint_32) == 0)
-                  {
-                     png_uint_32p dp32 = (png_uint_32p)dp;
-                     png_const_uint_32p sp32 = (png_const_uint_32p)sp;
-                     unsigned int skip = (bytes_to_jump-bytes_to_copy) /
-                        sizeof (png_uint_32);
-
-                     do
-                     {
-                        size_t c = bytes_to_copy;
-                        do
-                        {
-                           *dp32++ = *sp32++;
-                           c -= sizeof (png_uint_32);
-                        }
-                        while (c > 0);
-
-                        if (row_width <= bytes_to_jump)
-                           return;
-
-                        dp32 += skip;
-                        sp32 += skip;
-                        row_width -= bytes_to_jump;
-                     }
-                     while (bytes_to_copy <= row_width);
-
-                     /* Get to here when the row_width truncates the final copy.
-                      * There will be 1-3 bytes left to copy, so don't try the
-                      * 16-bit loop below.
-                      */
-                     dp = (png_bytep)dp32;
-                     sp = (png_const_bytep)sp32;
-                     do
-                        *dp++ = *sp++;
-                     while (--row_width > 0);
-                     return;
-                  }
-
-                  /* Else do it in 16-bit quantities, but only if the size is
-                   * not too large.
-                   */
-                  else
-                  {
-                     png_uint_16p dp16 = (png_uint_16p)dp;
-                     png_const_uint_16p sp16 = (png_const_uint_16p)sp;
-                     unsigned int skip = (bytes_to_jump-bytes_to_copy) /
-                        sizeof (png_uint_16);
-
-                     do
-                     {
-                        size_t c = bytes_to_copy;
-                        do
-                        {
-                           *dp16++ = *sp16++;
-                           c -= sizeof (png_uint_16);
-                        }
-                        while (c > 0);
-
-                        if (row_width <= bytes_to_jump)
-                           return;
-
-                        dp16 += skip;
-                        sp16 += skip;
-                        row_width -= bytes_to_jump;
-                     }
-                     while (bytes_to_copy <= row_width);
-
-                     /* End of row - 1 byte left, bytes_to_copy > row_width: */
-                     dp = (png_bytep)dp16;
-                     sp = (png_const_bytep)sp16;
-                     do
-                        *dp++ = *sp++;
-                     while (--row_width > 0);
-                     return;
-                  }
-               }
-#endif /* PNG_ALIGN_ code */
-
-               /* The true default - use a png_memcpy: */
-               for (;;)
-               {
-                  png_memcpy(dp, sp, bytes_to_copy);
-
-                  if (row_width <= bytes_to_jump)
-                     return;
-
-                  sp += bytes_to_jump;
-                  dp += bytes_to_jump;
-                  row_width -= bytes_to_jump;
-                  if (bytes_to_copy > row_width)
-                     bytes_to_copy = row_width;
-               }
-         }
-
-         /* NOT REACHED*/
-      } /* pixel_depth >= 8 */
-
-      /* Here if pixel_depth < 8 to check 'end_ptr' below. */
-   }
-   else
-#endif
-
-   /* If here then the switch above wasn't used so just png_memcpy the whole row
-    * from the temporary row buffer (notice that this overwrites the end of the
-    * destination row if it is a partial byte.)
-    */
-   png_memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width));
-
-   /* Restore the overwritten bits from the last byte if necessary. */
-   if (end_ptr != NULL)
-      *end_ptr = (png_byte)((end_byte & end_mask) | (*end_ptr & ~end_mask));
-}
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-void /* PRIVATE */
-png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
-   png_uint_32 transformations /* Because these may affect the byte layout */)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-   /* Offset to next interlace block */
-   static PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   png_debug(1, "in png_do_read_interlace");
-   if (row != NULL && row_info != NULL)
-   {
-      png_uint_32 final_width;
-
-      final_width = row_info->width * png_pass_inc[pass];
-
-      switch (row_info->pixel_depth)
-      {
-         case 1:
-         {
-            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 3);
-            png_bytep dp = row + (png_size_t)((final_width - 1) >> 3);
-            int sshift, dshift;
-            int s_start, s_end, s_inc;
-            int jstop = png_pass_inc[pass];
-            png_byte v;
-            png_uint_32 i;
-            int j;
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if (transformations & PNG_PACKSWAP)
-            {
-                sshift = (int)((row_info->width + 7) & 0x07);
-                dshift = (int)((final_width + 7) & 0x07);
-                s_start = 7;
-                s_end = 0;
-                s_inc = -1;
-            }
-
-            else
-#endif
-            {
-                sshift = 7 - (int)((row_info->width + 7) & 0x07);
-                dshift = 7 - (int)((final_width + 7) & 0x07);
-                s_start = 0;
-                s_end = 7;
-                s_inc = 1;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               v = (png_byte)((*sp >> sshift) & 0x01);
-               for (j = 0; j < jstop; j++)
-               {
-                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
-                  *dp |= (png_byte)(v << dshift);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift += s_inc;
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift += s_inc;
-            }
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2);
-            png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2);
-            int sshift, dshift;
-            int s_start, s_end, s_inc;
-            int jstop = png_pass_inc[pass];
-            png_uint_32 i;
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if (transformations & PNG_PACKSWAP)
-            {
-               sshift = (int)(((row_info->width + 3) & 0x03) << 1);
-               dshift = (int)(((final_width + 3) & 0x03) << 1);
-               s_start = 6;
-               s_end = 0;
-               s_inc = -2;
-            }
-
-            else
-#endif
-            {
-               sshift = (int)((3 - ((row_info->width + 3) & 0x03)) << 1);
-               dshift = (int)((3 - ((final_width + 3) & 0x03)) << 1);
-               s_start = 0;
-               s_end = 6;
-               s_inc = 2;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v;
-               int j;
-
-               v = (png_byte)((*sp >> sshift) & 0x03);
-               for (j = 0; j < jstop; j++)
-               {
-                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
-                  *dp |= (png_byte)(v << dshift);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift += s_inc;
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift += s_inc;
-            }
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 1);
-            png_bytep dp = row + (png_size_t)((final_width - 1) >> 1);
-            int sshift, dshift;
-            int s_start, s_end, s_inc;
-            png_uint_32 i;
-            int jstop = png_pass_inc[pass];
-
-#ifdef PNG_READ_PACKSWAP_SUPPORTED
-            if (transformations & PNG_PACKSWAP)
-            {
-               sshift = (int)(((row_info->width + 1) & 0x01) << 2);
-               dshift = (int)(((final_width + 1) & 0x01) << 2);
-               s_start = 4;
-               s_end = 0;
-               s_inc = -4;
-            }
-
-            else
-#endif
-            {
-               sshift = (int)((1 - ((row_info->width + 1) & 0x01)) << 2);
-               dshift = (int)((1 - ((final_width + 1) & 0x01)) << 2);
-               s_start = 0;
-               s_end = 4;
-               s_inc = 4;
-            }
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v = (png_byte)((*sp >> sshift) & 0x0f);
-               int j;
-
-               for (j = 0; j < jstop; j++)
-               {
-                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
-                  *dp |= (png_byte)(v << dshift);
-
-                  if (dshift == s_end)
-                  {
-                     dshift = s_start;
-                     dp--;
-                  }
-
-                  else
-                     dshift += s_inc;
-               }
-
-               if (sshift == s_end)
-               {
-                  sshift = s_start;
-                  sp--;
-               }
-
-               else
-                  sshift += s_inc;
-            }
-            break;
-         }
-
-         default:
-         {
-            png_size_t pixel_bytes = (row_info->pixel_depth >> 3);
-
-            png_bytep sp = row + (png_size_t)(row_info->width - 1)
-                * pixel_bytes;
-
-            png_bytep dp = row + (png_size_t)(final_width - 1) * pixel_bytes;
-
-            int jstop = png_pass_inc[pass];
-            png_uint_32 i;
-
-            for (i = 0; i < row_info->width; i++)
-            {
-               png_byte v[8];
-               int j;
-
-               png_memcpy(v, sp, pixel_bytes);
-
-               for (j = 0; j < jstop; j++)
-               {
-                  png_memcpy(dp, v, pixel_bytes);
-                  dp -= pixel_bytes;
-               }
-
-               sp -= pixel_bytes;
-            }
-            break;
-         }
-      }
-
-      row_info->width = final_width;
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, final_width);
-   }
-#ifndef PNG_READ_PACKSWAP_SUPPORTED
-   PNG_UNUSED(transformations)  /* Silence compiler warning */
-#endif
-}
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
-
-static void
-png_read_filter_row_sub(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_size_t i;
-   png_size_t istop = row_info->rowbytes;
-   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
-   png_bytep rp = row + bpp;
-
-   PNG_UNUSED(prev_row)
-
-   for (i = bpp; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_up(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_size_t i;
-   png_size_t istop = row_info->rowbytes;
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-
-   for (i = 0; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_avg(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_size_t i;
-   png_bytep rp = row;
-   png_const_bytep pp = prev_row;
-   unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
-   png_size_t istop = row_info->rowbytes - bpp;
-
-   for (i = 0; i < bpp; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) +
-         ((int)(*pp++) / 2 )) & 0xff);
-
-      rp++;
-   }
-
-   for (i = 0; i < istop; i++)
-   {
-      *rp = (png_byte)(((int)(*rp) +
-         (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
-
-      rp++;
-   }
-}
-
-static void
-png_read_filter_row_paeth_1byte_pixel(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   png_bytep rp_end = row + row_info->rowbytes;
-   int a, c;
-
-   /* First pixel/byte */
-   c = *prev_row++;
-   a = *row + c;
-   *row++ = (png_byte)a;
-
-   /* Remainder */
-   while (row < rp_end)
-   {
-      int b, pa, pb, pc, p;
-
-      a &= 0xff; /* From previous iteration or start */
-      b = *prev_row++;
-
-      p = b - c;
-      pc = a - c;
-
-#     ifdef PNG_USE_ABS
-         pa = abs(p);
-         pb = abs(pc);
-         pc = abs(p + pc);
-#     else
-         pa = p < 0 ? -p : p;
-         pb = pc < 0 ? -pc : pc;
-         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#     endif
-
-      /* Find the best predictor, the least of pa, pb, pc favoring the earlier
-       * ones in the case of a tie.
-       */
-      if (pb < pa) pa = pb, a = b;
-      if (pc < pa) a = c;
-
-      /* Calculate the current pixel in a, and move the previous row pixel to c
-       * for the next time round the loop
-       */
-      c = b;
-      a += *row;
-      *row++ = (png_byte)a;
-   }
-}
-
-static void
-png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row)
-{
-   int bpp = (row_info->pixel_depth + 7) >> 3;
-   png_bytep rp_end = row + bpp;
-
-   /* Process the first pixel in the row completely (this is the same as 'up'
-    * because there is only one candidate predictor for the first row).
-    */
-   while (row < rp_end)
-   {
-      int a = *row + *prev_row++;
-      *row++ = (png_byte)a;
-   }
-
-   /* Remainder */
-   rp_end += row_info->rowbytes - bpp;
-
-   while (row < rp_end)
-   {
-      int a, b, c, pa, pb, pc, p;
-
-      c = *(prev_row - bpp);
-      a = *(row - bpp);
-      b = *prev_row++;
-
-      p = b - c;
-      pc = a - c;
-
-#     ifdef PNG_USE_ABS
-         pa = abs(p);
-         pb = abs(pc);
-         pc = abs(p + pc);
-#     else
-         pa = p < 0 ? -p : p;
-         pb = pc < 0 ? -pc : pc;
-         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#     endif
-
-      if (pb < pa) pa = pb, a = b;
-      if (pc < pa) a = c;
-
-      c = b;
-      a += *row;
-      *row++ = (png_byte)a;
-   }
-}
-
-#ifdef PNG_ARM_NEON
-
-#ifdef __linux__
-#include <stdio.h>
-#include <elf.h>
-#include <asm/hwcap.h>
-
-static int png_have_hwcap(unsigned cap)
-{
-   FILE *f = fopen("/proc/self/auxv", "r");
-   Elf32_auxv_t aux;
-   int have_cap = 0;
-
-   if (!f)
-      return 0;
-
-   while (fread(&aux, sizeof(aux), 1, f) > 0)
-   {
-      if (aux.a_type == AT_HWCAP &&
-          aux.a_un.a_val & cap)
-      {
-         have_cap = 1;
-         break;
-      }
-   }
-
-   fclose(f);
-
-   return have_cap;
-}
-#endif /* __linux__ */
-
-static void
-png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
-{
-#ifdef __linux__
-   if (!png_have_hwcap(HWCAP_NEON))
-      return;
-#endif
-
-   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
-
-   if (bpp == 3)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = 
-         png_read_filter_row_paeth3_neon;
-   }
-
-   else if (bpp == 4)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-          png_read_filter_row_paeth4_neon;
-   }
-}
-#endif /* PNG_ARM_NEON */
-
-static void
-png_init_filter_functions(png_structp pp)
-{
-   unsigned int bpp = (pp->pixel_depth + 7) >> 3;
-
-   pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub;
-   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up;
-   pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg;
-   if (bpp == 1)
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth_1byte_pixel;
-   else
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth_multibyte_pixel;
-
-#ifdef PNG_ARM_NEON
-   png_init_filter_functions_neon(pp, bpp);
-#endif
-}
-
-void /* PRIVATE */
-png_read_filter_row(png_structp pp, png_row_infop row_info, png_bytep row,
-   png_const_bytep prev_row, int filter)
-{
-   if (pp->read_filter[0] == NULL)
-      png_init_filter_functions(pp);
-   if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST)
-      pp->read_filter[filter-1](row_info, row, prev_row);
-}
-
-#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
-void /* PRIVATE */
-png_read_finish_row(png_structp png_ptr)
-{
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
-
-   png_debug(1, "in png_read_finish_row");
-   png_ptr->row_number++;
-   if (png_ptr->row_number < png_ptr->num_rows)
-      return;
-
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced)
-   {
-      png_ptr->row_number = 0;
-
-      /* TO DO: don't do this if prev_row isn't needed (requires
-       * read-ahead of the next row's filter byte.
-       */
-      png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
-
-      do
-      {
-         png_ptr->pass++;
-
-         if (png_ptr->pass >= 7)
-            break;
-
-         png_ptr->iwidth = (png_ptr->width +
-            png_pass_inc[png_ptr->pass] - 1 -
-            png_pass_start[png_ptr->pass]) /
-            png_pass_inc[png_ptr->pass];
-
-         if (!(png_ptr->transformations & PNG_INTERLACE))
-         {
-            png_ptr->num_rows = (png_ptr->height +
-                png_pass_yinc[png_ptr->pass] - 1 -
-                png_pass_ystart[png_ptr->pass]) /
-                png_pass_yinc[png_ptr->pass];
-         }
-
-         else  /* if (png_ptr->transformations & PNG_INTERLACE) */
-            break; /* libpng deinterlacing sees every row */
-
-      } while (png_ptr->num_rows == 0 || png_ptr->iwidth == 0);
-
-      if (png_ptr->pass < 7)
-         return;
-   }
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
-   {
-      char extra;
-      int ret;
-
-      png_ptr->zstream.next_out = (Byte *)&extra;
-      png_ptr->zstream.avail_out = (uInt)1;
-
-      for (;;)
-      {
-         if (!(png_ptr->zstream.avail_in))
-         {
-            while (!png_ptr->idat_size)
-            {
-               png_crc_finish(png_ptr, 0);
-               png_ptr->idat_size = png_read_chunk_header(png_ptr);
-               if (png_ptr->chunk_name != png_IDAT)
-                  png_error(png_ptr, "Not enough image data");
-            }
-
-            png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
-            png_ptr->zstream.next_in = png_ptr->zbuf;
-
-            if (png_ptr->zbuf_size > png_ptr->idat_size)
-               png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
-
-            png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zstream.avail_in);
-            png_ptr->idat_size -= png_ptr->zstream.avail_in;
-         }
-
-         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
-
-         if (ret == Z_STREAM_END)
-         {
-            if (!(png_ptr->zstream.avail_out) || png_ptr->zstream.avail_in ||
-                png_ptr->idat_size)
-               png_warning(png_ptr, "Extra compressed data");
-
-            png_ptr->mode |= PNG_AFTER_IDAT;
-            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
-            break;
-         }
-
-         if (ret != Z_OK)
-            png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
-                "Decompression Error");
-
-         if (!(png_ptr->zstream.avail_out))
-         {
-            png_warning(png_ptr, "Extra compressed data");
-            png_ptr->mode |= PNG_AFTER_IDAT;
-            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
-            break;
-         }
-
-      }
-      png_ptr->zstream.avail_out = 0;
-   }
-
-   if (png_ptr->idat_size || png_ptr->zstream.avail_in)
-      png_warning(png_ptr, "Extra compression data");
-
-   inflateReset(&png_ptr->zstream);
-
-   png_ptr->mode |= PNG_AFTER_IDAT;
-}
-#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */
-
-void /* PRIVATE */
-png_read_start_row(png_structp png_ptr)
-{
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif
-
-   int max_pixel_depth;
-   png_size_t row_bytes;
-
-   png_debug(1, "in png_read_start_row");
-   png_ptr->zstream.avail_in = 0;
-#ifdef PNG_READ_TRANSFORMS_SUPPORTED
-   png_init_read_transformations(png_ptr);
-#endif
-#ifdef PNG_READ_INTERLACING_SUPPORTED
-   if (png_ptr->interlaced)
-   {
-      if (!(png_ptr->transformations & PNG_INTERLACE))
-         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
-             png_pass_ystart[0]) / png_pass_yinc[0];
-
-      else
-         png_ptr->num_rows = png_ptr->height;
-
-      png_ptr->iwidth = (png_ptr->width +
-          png_pass_inc[png_ptr->pass] - 1 -
-          png_pass_start[png_ptr->pass]) /
-          png_pass_inc[png_ptr->pass];
-   }
-
-   else
-#endif /* PNG_READ_INTERLACING_SUPPORTED */
-   {
-      png_ptr->num_rows = png_ptr->height;
-      png_ptr->iwidth = png_ptr->width;
-   }
-
-   max_pixel_depth = png_ptr->pixel_depth;
-
-   /* WARNING: * png_read_transform_info (pngrtran.c) performs a simpliar set of
-    * calculations to calculate the final pixel depth, then
-    * png_do_read_transforms actually does the transforms.  This means that the
-    * code which effectively calculates this value is actually repeated in three
-    * separate places.  They must all match.  Innocent changes to the order of
-    * transformations can and will break libpng in a way that causes memory
-    * overwrites.
-    *
-    * TODO: fix this.
-    */
-#ifdef PNG_READ_PACK_SUPPORTED
-   if ((png_ptr->transformations & PNG_PACK) && png_ptr->bit_depth < 8)
-      max_pixel_depth = 8;
-#endif
-
-#ifdef PNG_READ_EXPAND_SUPPORTED
-   if (png_ptr->transformations & PNG_EXPAND)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (png_ptr->num_trans)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 24;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         if (max_pixel_depth < 8)
-            max_pixel_depth = 8;
-
-         if (png_ptr->num_trans)
-            max_pixel_depth *= 2;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
-      {
-         if (png_ptr->num_trans)
-         {
-            max_pixel_depth *= 4;
-            max_pixel_depth /= 3;
-         }
-      }
-   }
-#endif
-
-#ifdef PNG_READ_EXPAND_16_SUPPORTED
-   if (png_ptr->transformations & PNG_EXPAND_16)
-   {
-#     ifdef PNG_READ_EXPAND_SUPPORTED
-         /* In fact it is an error if it isn't supported, but checking is
-          * the safe way.
-          */
-         if (png_ptr->transformations & PNG_EXPAND)
-         {
-            if (png_ptr->bit_depth < 16)
-               max_pixel_depth *= 2;
-         }
-         else
-#     endif
-         png_ptr->transformations &= ~PNG_EXPAND_16;
-   }
-#endif
-
-#ifdef PNG_READ_FILLER_SUPPORTED
-   if (png_ptr->transformations & (PNG_FILLER))
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
-      {
-         if (max_pixel_depth <= 8)
-            max_pixel_depth = 16;
-
-         else
-            max_pixel_depth = 32;
-      }
-
-      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB ||
-         png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         if (max_pixel_depth <= 32)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 64;
-      }
-   }
-#endif
-
-#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
-   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
-   {
-      if (
-#ifdef PNG_READ_EXPAND_SUPPORTED
-          (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND)) ||
-#endif
-#ifdef PNG_READ_FILLER_SUPPORTED
-          (png_ptr->transformations & (PNG_FILLER)) ||
-#endif
-          png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      {
-         if (max_pixel_depth <= 16)
-            max_pixel_depth = 32;
-
-         else
-            max_pixel_depth = 64;
-      }
-
-      else
-      {
-         if (max_pixel_depth <= 8)
-         {
-            if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-               max_pixel_depth = 32;
-
-            else
-               max_pixel_depth = 24;
-         }
-
-         else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
-            max_pixel_depth = 64;
-
-         else
-            max_pixel_depth = 48;
-      }
-   }
-#endif
-
-#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \
-defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
-   if (png_ptr->transformations & PNG_USER_TRANSFORM)
-   {
-      int user_pixel_depth = png_ptr->user_transform_depth *
-         png_ptr->user_transform_channels;
-
-      if (user_pixel_depth > max_pixel_depth)
-         max_pixel_depth = user_pixel_depth;
-   }
-#endif
-
-   /* This value is stored in png_struct and double checked in the row read
-    * code.
-    */
-   png_ptr->maximum_pixel_depth = (png_byte)max_pixel_depth;
-   png_ptr->transformed_pixel_depth = 0; /* calculated on demand */
-
-   /* Align the width on the next larger 8 pixels.  Mainly used
-    * for interlacing
-    */
-   row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7));
-   /* Calculate the maximum bytes needed, adding a byte and a pixel
-    * for safety's sake
-    */
-   row_bytes = PNG_ROWBYTES(max_pixel_depth, row_bytes) +
-       1 + ((max_pixel_depth + 7) >> 3);
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (row_bytes > (png_uint_32)65536L)
-      png_error(png_ptr, "This image requires a row greater than 64KB");
-#endif
-
-   if (row_bytes + 48 > png_ptr->old_big_row_buf_size)
-   {
-     png_free(png_ptr, png_ptr->big_row_buf);
-     png_free(png_ptr, png_ptr->big_prev_row);
-
-     if (png_ptr->interlaced)
-        png_ptr->big_row_buf = (png_bytep)png_calloc(png_ptr,
-            row_bytes + 48);
-
-     else
-        png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
-
-     png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48);
-
-#ifdef PNG_ALIGNED_MEMORY_SUPPORTED
-     /* Use 16-byte aligned memory for row_buf with at least 16 bytes
-      * of padding before and after row_buf; treat prev_row similarly.
-      * NOTE: the alignment is to the start of the pixels, one beyond the start
-      * of the buffer, because of the filter byte.  Prior to libpng 1.5.6 this
-      * was incorrect; the filter byte was aligned, which had the exact
-      * opposite effect of that intended.
-      */
-     {
-        png_bytep temp = png_ptr->big_row_buf + 32;
-        int extra = (int)((temp - (png_bytep)0) & 0x0f);
-        png_ptr->row_buf = temp - extra - 1/*filter byte*/;
-
-        temp = png_ptr->big_prev_row + 32;
-        extra = (int)((temp - (png_bytep)0) & 0x0f);
-        png_ptr->prev_row = temp - extra - 1/*filter byte*/;
-     }
-
-#else
-     /* Use 31 bytes of padding before and 17 bytes after row_buf. */
-     png_ptr->row_buf = png_ptr->big_row_buf + 31;
-     png_ptr->prev_row = png_ptr->big_prev_row + 31;
-#endif
-     png_ptr->old_big_row_buf_size = row_bytes + 48;
-   }
-
-#ifdef PNG_MAX_MALLOC_64K
-   if (png_ptr->rowbytes > 65535)
-      png_error(png_ptr, "This image requires a row greater than 64KB");
-
-#endif
-   if (png_ptr->rowbytes > (PNG_SIZE_MAX - 1))
-      png_error(png_ptr, "Row has too many bytes to allocate in memory");
-
-   png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
-
-   png_debug1(3, "width = %u,", png_ptr->width);
-   png_debug1(3, "height = %u,", png_ptr->height);
-   png_debug1(3, "iwidth = %u,", png_ptr->iwidth);
-   png_debug1(3, "num_rows = %u,", png_ptr->num_rows);
-   png_debug1(3, "rowbytes = %lu,", (unsigned long)png_ptr->rowbytes);
-   png_debug1(3, "irowbytes = %lu",
-       (unsigned long)PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->iwidth) + 1);
-
-   png_ptr->flags |= PNG_FLAG_ROW_INIT;
-}
-#endif /* PNG_READ_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngset.c b/reg-io/png/lpng1510/pngset.c
deleted file mode 100644
index 2bcd96d1..00000000
--- a/reg-io/png/lpng1510/pngset.c
+++ /dev/null
@@ -1,1309 +0,0 @@
-
-/* pngset.c - storage of image information into info struct
- *
- * Last changed in libpng 1.5.10 [(PENDING RELEASE)]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * The functions here are used during reads to store data from the file
- * into the info struct, and during writes to store application data
- * into the info struct for writing into the file.  This abstracts the
- * info struct and allows us to change the structure in the future.
- */
-
-#include "pngpriv.h"
-
-#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
-
-#ifdef PNG_bKGD_SUPPORTED
-void PNGAPI
-png_set_bKGD(png_structp png_ptr, png_infop info_ptr,
-    png_const_color_16p background)
-{
-   png_debug1(1, "in %s storage function", "bKGD");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_memcpy(&(info_ptr->background), background, png_sizeof(png_color_16));
-   info_ptr->valid |= PNG_INFO_bKGD;
-}
-#endif
-
-#ifdef PNG_cHRM_SUPPORTED
-void PNGFAPI
-png_set_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
-    png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
-    png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
-    png_fixed_point blue_x, png_fixed_point blue_y)
-{
-   png_debug1(1, "in %s storage function", "cHRM fixed");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-#  ifdef PNG_CHECK_cHRM_SUPPORTED
-   if (png_check_cHRM_fixed(png_ptr,
-       white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y))
-#  endif
-   {
-      info_ptr->x_white = white_x;
-      info_ptr->y_white = white_y;
-      info_ptr->x_red   = red_x;
-      info_ptr->y_red   = red_y;
-      info_ptr->x_green = green_x;
-      info_ptr->y_green = green_y;
-      info_ptr->x_blue  = blue_x;
-      info_ptr->y_blue  = blue_y;
-      info_ptr->valid |= PNG_INFO_cHRM;
-   }
-}
-
-void PNGFAPI
-png_set_cHRM_XYZ_fixed(png_structp png_ptr, png_infop info_ptr,
-    png_fixed_point int_red_X, png_fixed_point int_red_Y,
-    png_fixed_point int_red_Z, png_fixed_point int_green_X,
-    png_fixed_point int_green_Y, png_fixed_point int_green_Z,
-    png_fixed_point int_blue_X, png_fixed_point int_blue_Y,
-    png_fixed_point int_blue_Z)
-{
-   png_XYZ XYZ;
-   png_xy xy;
-
-   png_debug1(1, "in %s storage function", "cHRM XYZ fixed");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   XYZ.redX = int_red_X;
-   XYZ.redY = int_red_Y;
-   XYZ.redZ = int_red_Z;
-   XYZ.greenX = int_green_X;
-   XYZ.greenY = int_green_Y;
-   XYZ.greenZ = int_green_Z;
-   XYZ.blueX = int_blue_X;
-   XYZ.blueY = int_blue_Y;
-   XYZ.blueZ = int_blue_Z;
-
-   if (png_xy_from_XYZ(&xy, XYZ))
-      png_error(png_ptr, "XYZ values out of representable range");
-
-   png_set_cHRM_fixed(png_ptr, info_ptr, xy.whitex, xy.whitey, xy.redx, xy.redy,
-      xy.greenx, xy.greeny, xy.bluex, xy.bluey);
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_cHRM(png_structp png_ptr, png_infop info_ptr,
-    double white_x, double white_y, double red_x, double red_y,
-    double green_x, double green_y, double blue_x, double blue_y)
-{
-   png_set_cHRM_fixed(png_ptr, info_ptr,
-      png_fixed(png_ptr, white_x, "cHRM White X"),
-      png_fixed(png_ptr, white_y, "cHRM White Y"),
-      png_fixed(png_ptr, red_x, "cHRM Red X"),
-      png_fixed(png_ptr, red_y, "cHRM Red Y"),
-      png_fixed(png_ptr, green_x, "cHRM Green X"),
-      png_fixed(png_ptr, green_y, "cHRM Green Y"),
-      png_fixed(png_ptr, blue_x, "cHRM Blue X"),
-      png_fixed(png_ptr, blue_y, "cHRM Blue Y"));
-}
-
-void PNGAPI
-png_set_cHRM_XYZ(png_structp png_ptr, png_infop info_ptr, double red_X,
-    double red_Y, double red_Z, double green_X, double green_Y, double green_Z,
-    double blue_X, double blue_Y, double blue_Z)
-{
-   png_set_cHRM_XYZ_fixed(png_ptr, info_ptr,
-      png_fixed(png_ptr, red_X, "cHRM Red X"),
-      png_fixed(png_ptr, red_Y, "cHRM Red Y"),
-      png_fixed(png_ptr, red_Z, "cHRM Red Z"),
-      png_fixed(png_ptr, green_X, "cHRM Red X"),
-      png_fixed(png_ptr, green_Y, "cHRM Red Y"),
-      png_fixed(png_ptr, green_Z, "cHRM Red Z"),
-      png_fixed(png_ptr, blue_X, "cHRM Red X"),
-      png_fixed(png_ptr, blue_Y, "cHRM Red Y"),
-      png_fixed(png_ptr, blue_Z, "cHRM Red Z"));
-}
-#  endif /* PNG_FLOATING_POINT_SUPPORTED */
-
-#endif /* PNG_cHRM_SUPPORTED */
-
-#ifdef PNG_gAMA_SUPPORTED
-void PNGFAPI
-png_set_gAMA_fixed(png_structp png_ptr, png_infop info_ptr, png_fixed_point
-    file_gamma)
-{
-   png_debug1(1, "in %s storage function", "gAMA");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Changed in libpng-1.5.4 to limit the values to ensure overflow can't
-    * occur.  Since the fixed point representation is assymetrical it is
-    * possible for 1/gamma to overflow the limit of 21474 and this means the
-    * gamma value must be at least 5/100000 and hence at most 20000.0.  For
-    * safety the limits here are a little narrower.  The values are 0.00016 to
-    * 6250.0, which are truly ridiculous gammma values (and will produce
-    * displays that are all black or all white.)
-    */
-   if (file_gamma < 16 || file_gamma > 625000000)
-      png_warning(png_ptr, "Out of range gamma value ignored");
-
-   else
-   {
-      info_ptr->gamma = file_gamma;
-      info_ptr->valid |= PNG_INFO_gAMA;
-   }
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_gAMA(png_structp png_ptr, png_infop info_ptr, double file_gamma)
-{
-   png_set_gAMA_fixed(png_ptr, info_ptr, png_fixed(png_ptr, file_gamma,
-       "png_set_gAMA"));
-}
-#  endif
-#endif
-
-#ifdef PNG_hIST_SUPPORTED
-void PNGAPI
-png_set_hIST(png_structp png_ptr, png_infop info_ptr, png_const_uint_16p hist)
-{
-   int i;
-
-   png_debug1(1, "in %s storage function", "hIST");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (info_ptr->num_palette == 0 || info_ptr->num_palette
-       > PNG_MAX_PALETTE_LENGTH)
-   {
-      png_warning(png_ptr,
-          "Invalid palette size, hIST allocation skipped");
-
-      return;
-   }
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0);
-
-   /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in
-    * version 1.2.1
-    */
-   png_ptr->hist = (png_uint_16p)png_malloc_warn(png_ptr,
-       PNG_MAX_PALETTE_LENGTH * png_sizeof(png_uint_16));
-
-   if (png_ptr->hist == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for hIST chunk data");
-      return;
-   }
-
-   for (i = 0; i < info_ptr->num_palette; i++)
-      png_ptr->hist[i] = hist[i];
-
-   info_ptr->hist = png_ptr->hist;
-   info_ptr->valid |= PNG_INFO_hIST;
-   info_ptr->free_me |= PNG_FREE_HIST;
-}
-#endif
-
-void PNGAPI
-png_set_IHDR(png_structp png_ptr, png_infop info_ptr,
-    png_uint_32 width, png_uint_32 height, int bit_depth,
-    int color_type, int interlace_type, int compression_type,
-    int filter_type)
-{
-   png_debug1(1, "in %s storage function", "IHDR");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->width = width;
-   info_ptr->height = height;
-   info_ptr->bit_depth = (png_byte)bit_depth;
-   info_ptr->color_type = (png_byte)color_type;
-   info_ptr->compression_type = (png_byte)compression_type;
-   info_ptr->filter_type = (png_byte)filter_type;
-   info_ptr->interlace_type = (png_byte)interlace_type;
-
-   png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height,
-       info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type,
-       info_ptr->compression_type, info_ptr->filter_type);
-
-   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      info_ptr->channels = 1;
-
-   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
-      info_ptr->channels = 3;
-
-   else
-      info_ptr->channels = 1;
-
-   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
-      info_ptr->channels++;
-
-   info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
-
-   /* Check for potential overflow */
-   if (width >
-       (PNG_UINT_32_MAX >> 3)      /* 8-byte RRGGBBAA pixels */
-       - 48       /* bigrowbuf hack */
-       - 1        /* filter byte */
-       - 7*8      /* rounding of width to multiple of 8 pixels */
-       - 8)       /* extra max_pixel_depth pad */
-      info_ptr->rowbytes = 0;
-   else
-      info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, width);
-}
-
-#ifdef PNG_oFFs_SUPPORTED
-void PNGAPI
-png_set_oFFs(png_structp png_ptr, png_infop info_ptr,
-    png_int_32 offset_x, png_int_32 offset_y, int unit_type)
-{
-   png_debug1(1, "in %s storage function", "oFFs");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->x_offset = offset_x;
-   info_ptr->y_offset = offset_y;
-   info_ptr->offset_unit_type = (png_byte)unit_type;
-   info_ptr->valid |= PNG_INFO_oFFs;
-}
-#endif
-
-#ifdef PNG_pCAL_SUPPORTED
-void PNGAPI
-png_set_pCAL(png_structp png_ptr, png_infop info_ptr,
-    png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type,
-    int nparams, png_const_charp units, png_charpp params)
-{
-   png_size_t length;
-   int i;
-
-   png_debug1(1, "in %s storage function", "pCAL");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   length = png_strlen(purpose) + 1;
-   png_debug1(3, "allocating purpose for info (%lu bytes)",
-       (unsigned long)length);
-
-   /* TODO: validate format of calibration name and unit name */
-
-   /* Check that the type matches the specification. */
-   if (type < 0 || type > 3)
-      png_error(png_ptr, "Invalid pCAL equation type");
-
-   /* Validate params[nparams] */
-   for (i=0; i<nparams; ++i)
-      if (!png_check_fp_string(params[i], png_strlen(params[i])))
-         png_error(png_ptr, "Invalid format for pCAL parameter");
-
-   info_ptr->pcal_purpose = (png_charp)png_malloc_warn(png_ptr, length);
-
-   if (info_ptr->pcal_purpose == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for pCAL purpose");
-      return;
-   }
-
-   png_memcpy(info_ptr->pcal_purpose, purpose, length);
-
-   png_debug(3, "storing X0, X1, type, and nparams in info");
-   info_ptr->pcal_X0 = X0;
-   info_ptr->pcal_X1 = X1;
-   info_ptr->pcal_type = (png_byte)type;
-   info_ptr->pcal_nparams = (png_byte)nparams;
-
-   length = png_strlen(units) + 1;
-   png_debug1(3, "allocating units for info (%lu bytes)",
-     (unsigned long)length);
-
-   info_ptr->pcal_units = (png_charp)png_malloc_warn(png_ptr, length);
-
-   if (info_ptr->pcal_units == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for pCAL units");
-      return;
-   }
-
-   png_memcpy(info_ptr->pcal_units, units, length);
-
-   info_ptr->pcal_params = (png_charpp)png_malloc_warn(png_ptr,
-       (png_size_t)((nparams + 1) * png_sizeof(png_charp)));
-
-   if (info_ptr->pcal_params == NULL)
-   {
-      png_warning(png_ptr, "Insufficient memory for pCAL params");
-      return;
-   }
-
-   png_memset(info_ptr->pcal_params, 0, (nparams + 1) * png_sizeof(png_charp));
-
-   for (i = 0; i < nparams; i++)
-   {
-      length = png_strlen(params[i]) + 1;
-      png_debug2(3, "allocating parameter %d for info (%lu bytes)", i,
-          (unsigned long)length);
-
-      info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length);
-
-      if (info_ptr->pcal_params[i] == NULL)
-      {
-         png_warning(png_ptr, "Insufficient memory for pCAL parameter");
-         return;
-      }
-
-      png_memcpy(info_ptr->pcal_params[i], params[i], length);
-   }
-
-   info_ptr->valid |= PNG_INFO_pCAL;
-   info_ptr->free_me |= PNG_FREE_PCAL;
-}
-#endif
-
-#ifdef PNG_sCAL_SUPPORTED
-void PNGAPI
-png_set_sCAL_s(png_structp png_ptr, png_infop info_ptr,
-    int unit, png_const_charp swidth, png_const_charp sheight)
-{
-   png_size_t lengthw = 0, lengthh = 0;
-
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Double check the unit (should never get here with an invalid
-    * unit unless this is an API call.)
-    */
-   if (unit != 1 && unit != 2)
-      png_error(png_ptr, "Invalid sCAL unit");
-
-   if (swidth == NULL || (lengthw = png_strlen(swidth)) == 0 ||
-       swidth[0] == 45 /* '-' */ || !png_check_fp_string(swidth, lengthw))
-      png_error(png_ptr, "Invalid sCAL width");
-
-   if (sheight == NULL || (lengthh = png_strlen(sheight)) == 0 ||
-       sheight[0] == 45 /* '-' */ || !png_check_fp_string(sheight, lengthh))
-      png_error(png_ptr, "Invalid sCAL height");
-
-   info_ptr->scal_unit = (png_byte)unit;
-
-   ++lengthw;
-
-   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthw);
-
-   info_ptr->scal_s_width = (png_charp)png_malloc_warn(png_ptr, lengthw);
-
-   if (info_ptr->scal_s_width == NULL)
-   {
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
-      return;
-   }
-
-   png_memcpy(info_ptr->scal_s_width, swidth, lengthw);
-
-   ++lengthh;
-
-   png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthh);
-
-   info_ptr->scal_s_height = (png_charp)png_malloc_warn(png_ptr, lengthh);
-
-   if (info_ptr->scal_s_height == NULL)
-   {
-      png_free (png_ptr, info_ptr->scal_s_width);
-      info_ptr->scal_s_width = NULL;
-
-      png_warning(png_ptr, "Memory allocation failed while processing sCAL");
-      return;
-   }
-
-   png_memcpy(info_ptr->scal_s_height, sheight, lengthh);
-
-   info_ptr->valid |= PNG_INFO_sCAL;
-   info_ptr->free_me |= PNG_FREE_SCAL;
-}
-
-#  ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_sCAL(png_structp png_ptr, png_infop info_ptr, int unit, double width,
-    double height)
-{
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   /* Check the arguments. */
-   if (width <= 0)
-      png_warning(png_ptr, "Invalid sCAL width ignored");
-
-   else if (height <= 0)
-      png_warning(png_ptr, "Invalid sCAL height ignored");
-
-   else
-   {
-      /* Convert 'width' and 'height' to ASCII. */
-      char swidth[PNG_sCAL_MAX_DIGITS+1];
-      char sheight[PNG_sCAL_MAX_DIGITS+1];
-
-      png_ascii_from_fp(png_ptr, swidth, sizeof swidth, width,
-         PNG_sCAL_PRECISION);
-      png_ascii_from_fp(png_ptr, sheight, sizeof sheight, height,
-         PNG_sCAL_PRECISION);
-
-      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
-   }
-}
-#  endif
-
-#  ifdef PNG_FIXED_POINT_SUPPORTED
-void PNGAPI
-png_set_sCAL_fixed(png_structp png_ptr, png_infop info_ptr, int unit,
-    png_fixed_point width, png_fixed_point height)
-{
-   png_debug1(1, "in %s storage function", "sCAL");
-
-   /* Check the arguments. */
-   if (width <= 0)
-      png_warning(png_ptr, "Invalid sCAL width ignored");
-
-   else if (height <= 0)
-      png_warning(png_ptr, "Invalid sCAL height ignored");
-
-   else
-   {
-      /* Convert 'width' and 'height' to ASCII. */
-      char swidth[PNG_sCAL_MAX_DIGITS+1];
-      char sheight[PNG_sCAL_MAX_DIGITS+1];
-
-      png_ascii_from_fixed(png_ptr, swidth, sizeof swidth, width);
-      png_ascii_from_fixed(png_ptr, sheight, sizeof sheight, height);
-
-      png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight);
-   }
-}
-#  endif
-#endif
-
-#ifdef PNG_pHYs_SUPPORTED
-void PNGAPI
-png_set_pHYs(png_structp png_ptr, png_infop info_ptr,
-    png_uint_32 res_x, png_uint_32 res_y, int unit_type)
-{
-   png_debug1(1, "in %s storage function", "pHYs");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->x_pixels_per_unit = res_x;
-   info_ptr->y_pixels_per_unit = res_y;
-   info_ptr->phys_unit_type = (png_byte)unit_type;
-   info_ptr->valid |= PNG_INFO_pHYs;
-}
-#endif
-
-void PNGAPI
-png_set_PLTE(png_structp png_ptr, png_infop info_ptr,
-    png_const_colorp palette, int num_palette)
-{
-
-   png_debug1(1, "in %s storage function", "PLTE");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (num_palette < 0 || num_palette > PNG_MAX_PALETTE_LENGTH)
-   {
-      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-         png_error(png_ptr, "Invalid palette length");
-
-      else
-      {
-         png_warning(png_ptr, "Invalid palette length");
-         return;
-      }
-   }
-
-   /* It may not actually be necessary to set png_ptr->palette here;
-    * we do it for backward compatibility with the way the png_handle_tRNS
-    * function used to do the allocation.
-    */
-   png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0);
-
-   /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead
-    * of num_palette entries, in case of an invalid PNG file that has
-    * too-large sample values.
-    */
-   png_ptr->palette = (png_colorp)png_calloc(png_ptr,
-       PNG_MAX_PALETTE_LENGTH * png_sizeof(png_color));
-
-   png_memcpy(png_ptr->palette, palette, num_palette * png_sizeof(png_color));
-   info_ptr->palette = png_ptr->palette;
-   info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette;
-
-   info_ptr->free_me |= PNG_FREE_PLTE;
-
-   info_ptr->valid |= PNG_INFO_PLTE;
-}
-
-#ifdef PNG_sBIT_SUPPORTED
-void PNGAPI
-png_set_sBIT(png_structp png_ptr, png_infop info_ptr,
-    png_const_color_8p sig_bit)
-{
-   png_debug1(1, "in %s storage function", "sBIT");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_memcpy(&(info_ptr->sig_bit), sig_bit, png_sizeof(png_color_8));
-   info_ptr->valid |= PNG_INFO_sBIT;
-}
-#endif
-
-#ifdef PNG_sRGB_SUPPORTED
-void PNGAPI
-png_set_sRGB(png_structp png_ptr, png_infop info_ptr, int srgb_intent)
-{
-   png_debug1(1, "in %s storage function", "sRGB");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   info_ptr->srgb_intent = (png_byte)srgb_intent;
-   info_ptr->valid |= PNG_INFO_sRGB;
-}
-
-void PNGAPI
-png_set_sRGB_gAMA_and_cHRM(png_structp png_ptr, png_infop info_ptr,
-    int srgb_intent)
-{
-   png_debug1(1, "in %s storage function", "sRGB_gAMA_and_cHRM");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_set_sRGB(png_ptr, info_ptr, srgb_intent);
-
-#  ifdef PNG_gAMA_SUPPORTED
-   png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_sRGB_INVERSE);
-#  endif
-
-#  ifdef PNG_cHRM_SUPPORTED
-   png_set_cHRM_fixed(png_ptr, info_ptr,
-      /* color      x       y */
-      /* white */ 31270, 32900,
-      /* red   */ 64000, 33000,
-      /* green */ 30000, 60000,
-      /* blue  */ 15000,  6000
-   );
-#  endif /* cHRM */
-}
-#endif /* sRGB */
-
-
-#ifdef PNG_iCCP_SUPPORTED
-void PNGAPI
-png_set_iCCP(png_structp png_ptr, png_infop info_ptr,
-    png_const_charp name, int compression_type,
-    png_const_bytep profile, png_uint_32 proflen)
-{
-   png_charp new_iccp_name;
-   png_bytep new_iccp_profile;
-   png_size_t length;
-
-   png_debug1(1, "in %s storage function", "iCCP");
-
-   if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL)
-      return;
-
-   length = png_strlen(name)+1;
-   new_iccp_name = (png_charp)png_malloc_warn(png_ptr, length);
-
-   if (new_iccp_name == NULL)
-   {
-        png_warning(png_ptr, "Insufficient memory to process iCCP chunk");
-      return;
-   }
-
-   png_memcpy(new_iccp_name, name, length);
-   new_iccp_profile = (png_bytep)png_malloc_warn(png_ptr, proflen);
-
-   if (new_iccp_profile == NULL)
-   {
-      png_free (png_ptr, new_iccp_name);
-      png_warning(png_ptr,
-          "Insufficient memory to process iCCP profile");
-      return;
-   }
-
-   png_memcpy(new_iccp_profile, profile, (png_size_t)proflen);
-
-   png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0);
-
-   info_ptr->iccp_proflen = proflen;
-   info_ptr->iccp_name = new_iccp_name;
-   info_ptr->iccp_profile = new_iccp_profile;
-   /* Compression is always zero but is here so the API and info structure
-    * does not have to change if we introduce multiple compression types
-    */
-   info_ptr->iccp_compression = (png_byte)compression_type;
-   info_ptr->free_me |= PNG_FREE_ICCP;
-   info_ptr->valid |= PNG_INFO_iCCP;
-}
-#endif
-
-#ifdef PNG_TEXT_SUPPORTED
-void PNGAPI
-png_set_text(png_structp png_ptr, png_infop info_ptr, png_const_textp text_ptr,
-    int num_text)
-{
-   int ret;
-   ret = png_set_text_2(png_ptr, info_ptr, text_ptr, num_text);
-
-   if (ret)
-      png_error(png_ptr, "Insufficient memory to store text");
-}
-
-int /* PRIVATE */
-png_set_text_2(png_structp png_ptr, png_infop info_ptr,
-    png_const_textp text_ptr, int num_text)
-{
-   int i;
-
-   png_debug1(1, "in %lx storage function", png_ptr == NULL ? "unexpected" :
-      (unsigned long)png_ptr->chunk_name);
-
-   if (png_ptr == NULL || info_ptr == NULL || num_text == 0)
-      return(0);
-
-   /* Make sure we have enough space in the "text" array in info_struct
-    * to hold all of the incoming text_ptr objects.
-    */
-   if (info_ptr->num_text + num_text > info_ptr->max_text)
-   {
-      int old_max_text = info_ptr->max_text;
-      int old_num_text = info_ptr->num_text;
-
-      if (info_ptr->text != NULL)
-      {
-         png_textp old_text;
-
-         info_ptr->max_text = info_ptr->num_text + num_text + 8;
-         old_text = info_ptr->text;
-
-         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
-            (png_size_t)(info_ptr->max_text * png_sizeof(png_text)));
-
-         if (info_ptr->text == NULL)
-         {
-            /* Restore to previous condition */
-            info_ptr->max_text = old_max_text;
-            info_ptr->text = old_text;
-            return(1);
-         }
-
-         png_memcpy(info_ptr->text, old_text, (png_size_t)(old_max_text *
-             png_sizeof(png_text)));
-         png_free(png_ptr, old_text);
-      }
-
-      else
-      {
-         info_ptr->max_text = num_text + 8;
-         info_ptr->num_text = 0;
-         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
-             (png_size_t)(info_ptr->max_text * png_sizeof(png_text)));
-         if (info_ptr->text == NULL)
-         {
-            /* Restore to previous condition */
-            info_ptr->num_text = old_num_text;
-            info_ptr->max_text = old_max_text;
-            return(1);
-         }
-         info_ptr->free_me |= PNG_FREE_TEXT;
-      }
-
-      png_debug1(3, "allocated %d entries for info_ptr->text",
-          info_ptr->max_text);
-   }
-   for (i = 0; i < num_text; i++)
-   {
-      png_size_t text_length, key_len;
-      png_size_t lang_len, lang_key_len;
-      png_textp textp = &(info_ptr->text[info_ptr->num_text]);
-
-      if (text_ptr[i].key == NULL)
-          continue;
-
-      if (text_ptr[i].compression < PNG_TEXT_COMPRESSION_NONE ||
-          text_ptr[i].compression >= PNG_TEXT_COMPRESSION_LAST)
-      {
-         png_warning(png_ptr, "text compression mode is out of range");
-         continue;
-      }
-
-      key_len = png_strlen(text_ptr[i].key);
-
-      if (text_ptr[i].compression <= 0)
-      {
-         lang_len = 0;
-         lang_key_len = 0;
-      }
-
-      else
-#  ifdef PNG_iTXt_SUPPORTED
-      {
-         /* Set iTXt data */
-
-         if (text_ptr[i].lang != NULL)
-            lang_len = png_strlen(text_ptr[i].lang);
-
-         else
-            lang_len = 0;
-
-         if (text_ptr[i].lang_key != NULL)
-            lang_key_len = png_strlen(text_ptr[i].lang_key);
-
-         else
-            lang_key_len = 0;
-      }
-#  else /* PNG_iTXt_SUPPORTED */
-      {
-         png_warning(png_ptr, "iTXt chunk not supported");
-         continue;
-      }
-#  endif
-
-      if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0')
-      {
-         text_length = 0;
-#  ifdef PNG_iTXt_SUPPORTED
-         if (text_ptr[i].compression > 0)
-            textp->compression = PNG_ITXT_COMPRESSION_NONE;
-
-         else
-#  endif
-            textp->compression = PNG_TEXT_COMPRESSION_NONE;
-      }
-
-      else
-      {
-         text_length = png_strlen(text_ptr[i].text);
-         textp->compression = text_ptr[i].compression;
-      }
-
-      textp->key = (png_charp)png_malloc_warn(png_ptr,
-          (png_size_t)
-          (key_len + text_length + lang_len + lang_key_len + 4));
-
-      if (textp->key == NULL)
-         return(1);
-
-      png_debug2(2, "Allocated %lu bytes at %p in png_set_text",
-          (unsigned long)(png_uint_32)
-          (key_len + lang_len + lang_key_len + text_length + 4),
-          textp->key);
-
-      png_memcpy(textp->key, text_ptr[i].key,(png_size_t)(key_len));
-      *(textp->key + key_len) = '\0';
-
-      if (text_ptr[i].compression > 0)
-      {
-         textp->lang = textp->key + key_len + 1;
-         png_memcpy(textp->lang, text_ptr[i].lang, lang_len);
-         *(textp->lang + lang_len) = '\0';
-         textp->lang_key = textp->lang + lang_len + 1;
-         png_memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
-         *(textp->lang_key + lang_key_len) = '\0';
-         textp->text = textp->lang_key + lang_key_len + 1;
-      }
-
-      else
-      {
-         textp->lang=NULL;
-         textp->lang_key=NULL;
-         textp->text = textp->key + key_len + 1;
-      }
-
-      if (text_length)
-         png_memcpy(textp->text, text_ptr[i].text,
-             (png_size_t)(text_length));
-
-      *(textp->text + text_length) = '\0';
-
-#  ifdef PNG_iTXt_SUPPORTED
-      if (textp->compression > 0)
-      {
-         textp->text_length = 0;
-         textp->itxt_length = text_length;
-      }
-
-      else
-#  endif
-      {
-         textp->text_length = text_length;
-         textp->itxt_length = 0;
-      }
-
-      info_ptr->num_text++;
-      png_debug1(3, "transferred text chunk %d", info_ptr->num_text);
-   }
-   return(0);
-}
-#endif
-
-#ifdef PNG_tIME_SUPPORTED
-void PNGAPI
-png_set_tIME(png_structp png_ptr, png_infop info_ptr, png_const_timep mod_time)
-{
-   png_debug1(1, "in %s storage function", "tIME");
-
-   if (png_ptr == NULL || info_ptr == NULL ||
-       (png_ptr->mode & PNG_WROTE_tIME))
-      return;
-
-   if (mod_time->month == 0   || mod_time->month > 12  ||
-       mod_time->day   == 0   || mod_time->day   > 31  ||
-       mod_time->hour  > 23   || mod_time->minute > 59 ||
-       mod_time->second > 60)
-   {
-      png_warning(png_ptr, "Ignoring invalid time value");
-      return;
-   }
-
-   png_memcpy(&(info_ptr->mod_time), mod_time, png_sizeof(png_time));
-   info_ptr->valid |= PNG_INFO_tIME;
-}
-#endif
-
-#ifdef PNG_tRNS_SUPPORTED
-void PNGAPI
-png_set_tRNS(png_structp png_ptr, png_infop info_ptr,
-    png_const_bytep trans_alpha, int num_trans, png_const_color_16p trans_color)
-{
-   png_debug1(1, "in %s storage function", "tRNS");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (trans_alpha != NULL)
-   {
-       /* It may not actually be necessary to set png_ptr->trans_alpha here;
-        * we do it for backward compatibility with the way the png_handle_tRNS
-        * function used to do the allocation.
-        */
-
-       png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0);
-
-       /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */
-       png_ptr->trans_alpha = info_ptr->trans_alpha =
-           (png_bytep)png_malloc(png_ptr, (png_size_t)PNG_MAX_PALETTE_LENGTH);
-
-       if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH)
-          png_memcpy(info_ptr->trans_alpha, trans_alpha, (png_size_t)num_trans);
-   }
-
-   if (trans_color != NULL)
-   {
-      int sample_max = (1 << info_ptr->bit_depth);
-
-      if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY &&
-          (int)trans_color->gray > sample_max) ||
-          (info_ptr->color_type == PNG_COLOR_TYPE_RGB &&
-          ((int)trans_color->red > sample_max ||
-          (int)trans_color->green > sample_max ||
-          (int)trans_color->blue > sample_max)))
-         png_warning(png_ptr,
-            "tRNS chunk has out-of-range samples for bit_depth");
-
-      png_memcpy(&(info_ptr->trans_color), trans_color,
-         png_sizeof(png_color_16));
-
-      if (num_trans == 0)
-         num_trans = 1;
-   }
-
-   info_ptr->num_trans = (png_uint_16)num_trans;
-
-   if (num_trans != 0)
-   {
-      info_ptr->valid |= PNG_INFO_tRNS;
-      info_ptr->free_me |= PNG_FREE_TRNS;
-   }
-}
-#endif
-
-#ifdef PNG_sPLT_SUPPORTED
-void PNGAPI
-png_set_sPLT(png_structp png_ptr,
-    png_infop info_ptr, png_const_sPLT_tp entries, int nentries)
-/*
- *  entries        - array of png_sPLT_t structures
- *                   to be added to the list of palettes
- *                   in the info structure.
- *
- *  nentries       - number of palette structures to be
- *                   added.
- */
-{
-   png_sPLT_tp np;
-   int i;
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   np = (png_sPLT_tp)png_malloc_warn(png_ptr,
-       (info_ptr->splt_palettes_num + nentries) *
-       (png_size_t)png_sizeof(png_sPLT_t));
-
-   if (np == NULL)
-   {
-      png_warning(png_ptr, "No memory for sPLT palettes");
-      return;
-   }
-
-   png_memcpy(np, info_ptr->splt_palettes,
-       info_ptr->splt_palettes_num * png_sizeof(png_sPLT_t));
-
-   png_free(png_ptr, info_ptr->splt_palettes);
-   info_ptr->splt_palettes=NULL;
-
-   for (i = 0; i < nentries; i++)
-   {
-      png_sPLT_tp to = np + info_ptr->splt_palettes_num + i;
-      png_const_sPLT_tp from = entries + i;
-      png_size_t length;
-
-      length = png_strlen(from->name) + 1;
-      to->name = (png_charp)png_malloc_warn(png_ptr, length);
-
-      if (to->name == NULL)
-      {
-         png_warning(png_ptr,
-             "Out of memory while processing sPLT chunk");
-         continue;
-      }
-
-      png_memcpy(to->name, from->name, length);
-      to->entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
-          from->nentries * png_sizeof(png_sPLT_entry));
-
-      if (to->entries == NULL)
-      {
-         png_warning(png_ptr,
-             "Out of memory while processing sPLT chunk");
-         png_free(png_ptr, to->name);
-         to->name = NULL;
-         continue;
-      }
-
-      png_memcpy(to->entries, from->entries,
-          from->nentries * png_sizeof(png_sPLT_entry));
-
-      to->nentries = from->nentries;
-      to->depth = from->depth;
-   }
-
-   info_ptr->splt_palettes = np;
-   info_ptr->splt_palettes_num += nentries;
-   info_ptr->valid |= PNG_INFO_sPLT;
-   info_ptr->free_me |= PNG_FREE_SPLT;
-}
-#endif /* PNG_sPLT_SUPPORTED */
-
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-void PNGAPI
-png_set_unknown_chunks(png_structp png_ptr,
-   png_infop info_ptr, png_const_unknown_chunkp unknowns, int num_unknowns)
-{
-   png_unknown_chunkp np;
-   int i;
-
-   if (png_ptr == NULL || info_ptr == NULL || num_unknowns == 0)
-      return;
-
-   np = (png_unknown_chunkp)png_malloc_warn(png_ptr,
-       (png_size_t)(info_ptr->unknown_chunks_num + num_unknowns) *
-       png_sizeof(png_unknown_chunk));
-
-   if (np == NULL)
-   {
-      png_warning(png_ptr,
-          "Out of memory while processing unknown chunk");
-      return;
-   }
-
-   png_memcpy(np, info_ptr->unknown_chunks,
-       (png_size_t)info_ptr->unknown_chunks_num *
-       png_sizeof(png_unknown_chunk));
-
-   png_free(png_ptr, info_ptr->unknown_chunks);
-   info_ptr->unknown_chunks = NULL;
-
-   for (i = 0; i < num_unknowns; i++)
-   {
-      png_unknown_chunkp to = np + info_ptr->unknown_chunks_num + i;
-      png_const_unknown_chunkp from = unknowns + i;
-
-      png_memcpy(to->name, from->name, png_sizeof(from->name));
-      to->name[png_sizeof(to->name)-1] = '\0';
-      to->size = from->size;
-
-      /* Note our location in the read or write sequence */
-      to->location = (png_byte)(png_ptr->mode & 0xff);
-
-      if (from->size == 0)
-         to->data=NULL;
-
-      else
-      {
-         to->data = (png_bytep)png_malloc_warn(png_ptr,
-             (png_size_t)from->size);
-
-         if (to->data == NULL)
-         {
-            png_warning(png_ptr,
-                "Out of memory while processing unknown chunk");
-            to->size = 0;
-         }
-
-         else
-            png_memcpy(to->data, from->data, from->size);
-      }
-   }
-
-   info_ptr->unknown_chunks = np;
-   info_ptr->unknown_chunks_num += num_unknowns;
-   info_ptr->free_me |= PNG_FREE_UNKN;
-}
-
-void PNGAPI
-png_set_unknown_chunk_location(png_structp png_ptr, png_infop info_ptr,
-    int chunk, int location)
-{
-   if (png_ptr != NULL && info_ptr != NULL && chunk >= 0 && chunk <
-       info_ptr->unknown_chunks_num)
-      info_ptr->unknown_chunks[chunk].location = (png_byte)location;
-}
-#endif
-
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-png_uint_32 PNGAPI
-png_permit_mng_features (png_structp png_ptr, png_uint_32 mng_features)
-{
-   png_debug(1, "in png_permit_mng_features");
-
-   if (png_ptr == NULL)
-      return (png_uint_32)0;
-
-   png_ptr->mng_features_permitted =
-       (png_byte)(mng_features & PNG_ALL_MNG_FEATURES);
-
-   return (png_uint_32)png_ptr->mng_features_permitted;
-}
-#endif
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-void PNGAPI
-png_set_keep_unknown_chunks(png_structp png_ptr, int keep, png_const_bytep
-    chunk_list, int num_chunks)
-{
-   png_bytep new_list, p;
-   int i, old_num_chunks;
-   if (png_ptr == NULL)
-      return;
-
-   if (num_chunks == 0)
-   {
-      if (keep == PNG_HANDLE_CHUNK_ALWAYS || keep == PNG_HANDLE_CHUNK_IF_SAFE)
-         png_ptr->flags |= PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
-
-      else
-         png_ptr->flags &= ~PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
-
-      if (keep == PNG_HANDLE_CHUNK_ALWAYS)
-         png_ptr->flags |= PNG_FLAG_KEEP_UNSAFE_CHUNKS;
-
-      else
-         png_ptr->flags &= ~PNG_FLAG_KEEP_UNSAFE_CHUNKS;
-
-      return;
-   }
-
-   if (chunk_list == NULL)
-      return;
-
-   old_num_chunks = png_ptr->num_chunk_list;
-   new_list=(png_bytep)png_malloc(png_ptr,
-       (png_size_t)(5*(num_chunks + old_num_chunks)));
-
-   if (png_ptr->chunk_list != NULL)
-   {
-      png_memcpy(new_list, png_ptr->chunk_list,
-          (png_size_t)(5*old_num_chunks));
-      png_free(png_ptr, png_ptr->chunk_list);
-      png_ptr->chunk_list=NULL;
-   }
-
-   png_memcpy(new_list + 5*old_num_chunks, chunk_list,
-       (png_size_t)(5*num_chunks));
-
-   for (p = new_list + 5*old_num_chunks + 4, i = 0; i<num_chunks; i++, p += 5)
-      *p=(png_byte)keep;
-
-   png_ptr->num_chunk_list = old_num_chunks + num_chunks;
-   png_ptr->chunk_list = new_list;
-   png_ptr->free_me |= PNG_FREE_LIST;
-}
-#endif
-
-#ifdef PNG_READ_USER_CHUNKS_SUPPORTED
-void PNGAPI
-png_set_read_user_chunk_fn(png_structp png_ptr, png_voidp user_chunk_ptr,
-    png_user_chunk_ptr read_user_chunk_fn)
-{
-   png_debug(1, "in png_set_read_user_chunk_fn");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->read_user_chunk_fn = read_user_chunk_fn;
-   png_ptr->user_chunk_ptr = user_chunk_ptr;
-}
-#endif
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_set_rows(png_structp png_ptr, png_infop info_ptr, png_bytepp row_pointers)
-{
-   png_debug1(1, "in %s storage function", "rows");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (info_ptr->row_pointers && (info_ptr->row_pointers != row_pointers))
-      png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
-
-   info_ptr->row_pointers = row_pointers;
-
-   if (row_pointers)
-      info_ptr->valid |= PNG_INFO_IDAT;
-}
-#endif
-
-void PNGAPI
-png_set_compression_buffer_size(png_structp png_ptr, png_size_t size)
-{
-    if (png_ptr == NULL)
-       return;
-
-    png_free(png_ptr, png_ptr->zbuf);
-
-    if (size > ZLIB_IO_MAX)
-    {
-       png_warning(png_ptr, "Attempt to set buffer size beyond max ignored");
-       png_ptr->zbuf_size = ZLIB_IO_MAX;
-       size = ZLIB_IO_MAX; /* must fit */
-    }
-
-    else
-       png_ptr->zbuf_size = (uInt)size;
-
-    png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size);
-
-    /* The following ensures a relatively safe failure if this gets called while
-     * the buffer is actually in use.
-     */
-    png_ptr->zstream.next_out = png_ptr->zbuf;
-    png_ptr->zstream.avail_out = 0;
-    png_ptr->zstream.avail_in = 0;
-}
-
-void PNGAPI
-png_set_invalid(png_structp png_ptr, png_infop info_ptr, int mask)
-{
-   if (png_ptr && info_ptr)
-      info_ptr->valid &= ~mask;
-}
-
-
-
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-/* This function was added to libpng 1.2.6 */
-void PNGAPI
-png_set_user_limits (png_structp png_ptr, png_uint_32 user_width_max,
-    png_uint_32 user_height_max)
-{
-   /* Images with dimensions larger than these limits will be
-    * rejected by png_set_IHDR().  To accept any PNG datastream
-    * regardless of dimensions, set both limits to 0x7ffffffL.
-    */
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->user_width_max = user_width_max;
-   png_ptr->user_height_max = user_height_max;
-}
-
-/* This function was added to libpng 1.4.0 */
-void PNGAPI
-png_set_chunk_cache_max (png_structp png_ptr,
-   png_uint_32 user_chunk_cache_max)
-{
-    if (png_ptr)
-       png_ptr->user_chunk_cache_max = user_chunk_cache_max;
-}
-
-/* This function was added to libpng 1.4.1 */
-void PNGAPI
-png_set_chunk_malloc_max (png_structp png_ptr,
-    png_alloc_size_t user_chunk_malloc_max)
-{
-   if (png_ptr)
-      png_ptr->user_chunk_malloc_max = user_chunk_malloc_max;
-}
-#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
-
-
-#ifdef PNG_BENIGN_ERRORS_SUPPORTED
-void PNGAPI
-png_set_benign_errors(png_structp png_ptr, int allowed)
-{
-   png_debug(1, "in png_set_benign_errors");
-
-   if (allowed)
-      png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN;
-
-   else
-      png_ptr->flags &= ~PNG_FLAG_BENIGN_ERRORS_WARN;
-}
-#endif /* PNG_BENIGN_ERRORS_SUPPORTED */
-
-#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Do not report invalid palette index; added at libng-1.5.10 */
-void PNGAPI
-png_set_check_for_invalid_index(png_structp png_ptr, int allowed)
-{
-   png_debug(1, "in png_set_check_for_invalid_index");
-
-   if (allowed)
-      png_ptr->num_palette_max = 0;
-
-   else
-      png_ptr->num_palette_max = -1;
-}
-#endif
-
-#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngtest.c b/reg-io/png/lpng1510/pngtest.c
deleted file mode 100644
index ed84f88f..00000000
--- a/reg-io/png/lpng1510/pngtest.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-
-/* pngtest.c - a simple test program to test libpng
- *
- * Last changed in libpng 1.5.6 [November 3, 2011]
- * Copyright (c) 1998-2011 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- *
- * This program reads in a PNG image, writes it out again, and then
- * compares the two files.  If the files are identical, this shows that
- * the basic chunk handling, filtering, and (de)compression code is working
- * properly.  It does not currently test all of the transforms, although
- * it probably should.
- *
- * The program will report "FAIL" in certain legitimate cases:
- * 1) when the compression level or filter selection method is changed.
- * 2) when the maximum IDAT size (PNG_ZBUF_SIZE in pngconf.h) is not 8192.
- * 3) unknown unsafe-to-copy ancillary chunks or unknown critical chunks
- *    exist in the input file.
- * 4) others not listed here...
- * In these cases, it is best to check with another tool such as "pngcheck"
- * to see what the differences between the two files are.
- *
- * If a filename is given on the command-line, then this file is used
- * for the input, rather than the default "pngtest.png".  This allows
- * testing a wide variety of files easily.  You can also test a number
- * of files at once by typing "pngtest -m file1.png file2.png ..."
- */
-
-#define _POSIX_SOURCE 1
-
-#include "zlib.h"
-#include "png.h"
-/* Copied from pngpriv.h but only used in error messages below. */
-#ifndef PNG_ZBUF_SIZE
-#  define PNG_ZBUF_SIZE 8192
-#endif
-#  include <stdio.h>
-#  include <stdlib.h>
-#  include <string.h>
-#  define FCLOSE(file) fclose(file)
-
-#ifndef PNG_STDIO_SUPPORTED
-typedef FILE                * png_FILE_p;
-#endif
-
-/* Makes pngtest verbose so we can find problems. */
-#ifndef PNG_DEBUG
-#  define PNG_DEBUG 0
-#endif
-
-#if PNG_DEBUG > 1
-#  define pngtest_debug(m)        ((void)fprintf(stderr, m "\n"))
-#  define pngtest_debug1(m,p1)    ((void)fprintf(stderr, m "\n", p1))
-#  define pngtest_debug2(m,p1,p2) ((void)fprintf(stderr, m "\n", p1, p2))
-#else
-#  define pngtest_debug(m)        ((void)0)
-#  define pngtest_debug1(m,p1)    ((void)0)
-#  define pngtest_debug2(m,p1,p2) ((void)0)
-#endif
-
-#if !PNG_DEBUG
-#  define SINGLE_ROWBUF_ALLOC  /* Makes buffer overruns easier to nail */
-#endif
-
-/* The code uses memcmp and memcpy on large objects (typically row pointers) so
- * it is necessary to do soemthing special on certain architectures, note that
- * the actual support for this was effectively removed in 1.4, so only the
- * memory remains in this program:
- */
-#define CVT_PTR(ptr)         (ptr)
-#define CVT_PTR_NOCHECK(ptr) (ptr)
-#define png_memcmp  memcmp
-#define png_memcpy  memcpy
-#define png_memset  memset
-
-/* Turn on CPU timing
-#define PNGTEST_TIMING
-*/
-
-#ifndef PNG_FLOATING_POINT_SUPPORTED
-#undef PNGTEST_TIMING
-#endif
-
-#ifdef PNGTEST_TIMING
-static float t_start, t_stop, t_decode, t_encode, t_misc;
-#include <time.h>
-#endif
-
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-#define PNG_tIME_STRING_LENGTH 29
-static int tIME_chunk_present = 0;
-static char tIME_string[PNG_tIME_STRING_LENGTH] = "tIME chunk is not present";
-#endif
-
-static int verbose = 0;
-static int strict = 0;
-
-int test_one_file PNGARG((PNG_CONST char *inname, PNG_CONST char *outname));
-
-#ifdef __TURBOC__
-#include <mem.h>
-#endif
-
-/* Defined so I can write to a file on gui/windowing platforms */
-/*  #define STDERR stderr  */
-#define STDERR stdout   /* For DOS */
-
-/* Define png_jmpbuf() in case we are using a pre-1.0.6 version of libpng */
-#ifndef png_jmpbuf
-#  define png_jmpbuf(png_ptr) png_ptr->jmpbuf
-#endif
-
-/* Example of using row callbacks to make a simple progress meter */
-static int status_pass = 1;
-static int status_dots_requested = 0;
-static int status_dots = 1;
-
-void PNGCBAPI
-read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass);
-void PNGCBAPI
-read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
-{
-   if (png_ptr == NULL || row_number > PNG_UINT_31_MAX)
-      return;
-
-   if (status_pass != pass)
-   {
-      fprintf(stdout, "\n Pass %d: ", pass);
-      status_pass = pass;
-      status_dots = 31;
-   }
-
-   status_dots--;
-
-   if (status_dots == 0)
-   {
-      fprintf(stdout, "\n         ");
-      status_dots=30;
-   }
-
-   fprintf(stdout, "r");
-}
-
-void PNGCBAPI
-write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass);
-void PNGCBAPI
-write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass)
-{
-   if (png_ptr == NULL || row_number > PNG_UINT_31_MAX || pass > 7)
-      return;
-
-   fprintf(stdout, "w");
-}
-
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-/* Example of using user transform callback (we don't transform anything,
- * but merely examine the row filters.  We set this to 256 rather than
- * 5 in case illegal filter values are present.)
- */
-static png_uint_32 filters_used[256];
-void PNGCBAPI
-count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data);
-void PNGCBAPI
-count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data)
-{
-   if (png_ptr != NULL && row_info != NULL)
-      ++filters_used[*(data - 1)];
-}
-#endif
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-/* Example of using user transform callback (we don't transform anything,
- * but merely count the zero samples)
- */
-
-static png_uint_32 zero_samples;
-
-void PNGCBAPI
-count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data);
-void PNGCBAPI
-count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data)
-{
-   png_bytep dp = data;
-   if (png_ptr == NULL)
-      return;
-
-   /* Contents of row_info:
-    *  png_uint_32 width      width of row
-    *  png_uint_32 rowbytes   number of bytes in row
-    *  png_byte color_type    color type of pixels
-    *  png_byte bit_depth     bit depth of samples
-    *  png_byte channels      number of channels (1-4)
-    *  png_byte pixel_depth   bits per pixel (depth*channels)
-    */
-
-    /* Counts the number of zero samples (or zero pixels if color_type is 3 */
-
-    if (row_info->color_type == 0 || row_info->color_type == 3)
-    {
-       int pos = 0;
-       png_uint_32 n, nstop;
-
-       for (n = 0, nstop=row_info->width; n<nstop; n++)
-       {
-          if (row_info->bit_depth == 1)
-          {
-             if (((*dp << pos++ ) & 0x80) == 0)
-                zero_samples++;
-
-             if (pos == 8)
-             {
-                pos = 0;
-                dp++;
-             }
-          }
-
-          if (row_info->bit_depth == 2)
-          {
-             if (((*dp << (pos+=2)) & 0xc0) == 0)
-                zero_samples++;
-
-             if (pos == 8)
-             {
-                pos = 0;
-                dp++;
-             }
-          }
-
-          if (row_info->bit_depth == 4)
-          {
-             if (((*dp << (pos+=4)) & 0xf0) == 0)
-                zero_samples++;
-
-             if (pos == 8)
-             {
-                pos = 0;
-                dp++;
-             }
-          }
-
-          if (row_info->bit_depth == 8)
-             if (*dp++ == 0)
-                zero_samples++;
-
-          if (row_info->bit_depth == 16)
-          {
-             if ((*dp | *(dp+1)) == 0)
-                zero_samples++;
-             dp+=2;
-          }
-       }
-    }
-    else /* Other color types */
-    {
-       png_uint_32 n, nstop;
-       int channel;
-       int color_channels = row_info->channels;
-       if (row_info->color_type > 3)color_channels--;
-
-       for (n = 0, nstop=row_info->width; n<nstop; n++)
-       {
-          for (channel = 0; channel < color_channels; channel++)
-          {
-             if (row_info->bit_depth == 8)
-                if (*dp++ == 0)
-                   zero_samples++;
-
-             if (row_info->bit_depth == 16)
-             {
-                if ((*dp | *(dp+1)) == 0)
-                   zero_samples++;
-
-                dp+=2;
-             }
-          }
-          if (row_info->color_type > 3)
-          {
-             dp++;
-             if (row_info->bit_depth == 16)
-                dp++;
-          }
-       }
-    }
-}
-#endif /* PNG_WRITE_USER_TRANSFORM_SUPPORTED */
-
-static int wrote_question = 0;
-
-#ifndef PNG_STDIO_SUPPORTED
-/* START of code to validate stdio-free compilation */
-/* These copies of the default read/write functions come from pngrio.c and
- * pngwio.c.  They allow "don't include stdio" testing of the library.
- * This is the function that does the actual reading of data.  If you are
- * not reading from a standard C stream, you should create a replacement
- * read_data function and use it at run time with png_set_read_fn(), rather
- * than changing the library.
- */
-
-#ifdef PNG_IO_STATE_SUPPORTED
-void
-pngtest_check_io_state(png_structp png_ptr, png_size_t data_length,
-   png_uint_32 io_op);
-void
-pngtest_check_io_state(png_structp png_ptr, png_size_t data_length,
-   png_uint_32 io_op)
-{
-   png_uint_32 io_state = png_get_io_state(png_ptr);
-   int err = 0;
-
-   /* Check if the current operation (reading / writing) is as expected. */
-   if ((io_state & PNG_IO_MASK_OP) != io_op)
-      png_error(png_ptr, "Incorrect operation in I/O state");
-
-   /* Check if the buffer size specific to the current location
-    * (file signature / header / data / crc) is as expected.
-    */
-   switch (io_state & PNG_IO_MASK_LOC)
-   {
-   case PNG_IO_SIGNATURE:
-      if (data_length > 8)
-         err = 1;
-      break;
-   case PNG_IO_CHUNK_HDR:
-      if (data_length != 8)
-         err = 1;
-      break;
-   case PNG_IO_CHUNK_DATA:
-      break;  /* no restrictions here */
-   case PNG_IO_CHUNK_CRC:
-      if (data_length != 4)
-         err = 1;
-      break;
-   default:
-      err = 1;  /* uninitialized */
-   }
-   if (err)
-      png_error(png_ptr, "Bad I/O state or buffer size");
-}
-#endif
-
-#ifndef USE_FAR_KEYWORD
-static void PNGCBAPI
-pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_size_t check = 0;
-   png_voidp io_ptr;
-
-   /* fread() returns 0 on error, so it is OK to store this in a png_size_t
-    * instead of an int, which is what fread() actually returns.
-    */
-   io_ptr = png_get_io_ptr(png_ptr);
-   if (io_ptr != NULL)
-   {
-      check = fread(data, 1, length, (png_FILE_p)io_ptr);
-   }
-
-   if (check != length)
-   {
-      png_error(png_ptr, "Read Error");
-   }
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   pngtest_check_io_state(png_ptr, length, PNG_IO_READING);
-#endif
-}
-#else
-/* This is the model-independent version. Since the standard I/O library
-   can't handle far buffers in the medium and small models, we have to copy
-   the data.
-*/
-
-#define NEAR_BUF_SIZE 1024
-#define MIN(a,b) (a <= b ? a : b)
-
-static void PNGCBAPI
-pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_size_t check;
-   png_byte *n_data;
-   png_FILE_p io_ptr;
-
-   /* Check if data really is near. If so, use usual code. */
-   n_data = (png_byte *)CVT_PTR_NOCHECK(data);
-   io_ptr = (png_FILE_p)CVT_PTR(png_get_io_ptr(png_ptr));
-   if ((png_bytep)n_data == data)
-   {
-      check = fread(n_data, 1, length, io_ptr);
-   }
-   else
-   {
-      png_byte buf[NEAR_BUF_SIZE];
-      png_size_t read, remaining, err;
-      check = 0;
-      remaining = length;
-
-      do
-      {
-         read = MIN(NEAR_BUF_SIZE, remaining);
-         err = fread(buf, 1, 1, io_ptr);
-         png_memcpy(data, buf, read); /* Copy far buffer to near buffer */
-         if (err != read)
-            break;
-         else
-            check += err;
-         data += read;
-         remaining -= read;
-      }
-      while (remaining != 0);
-   }
-
-   if (check != length)
-      png_error(png_ptr, "Read Error");
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   pngtest_check_io_state(png_ptr, length, PNG_IO_READING);
-#endif
-}
-#endif /* USE_FAR_KEYWORD */
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-static void PNGCBAPI
-pngtest_flush(png_structp png_ptr)
-{
-   /* Do nothing; fflush() is said to be just a waste of energy. */
-   PNG_UNUSED(png_ptr)   /* Stifle compiler warning */
-}
-#endif
-
-/* This is the function that does the actual writing of data.  If you are
- * not writing to a standard C stream, you should create a replacement
- * write_data function and use it at run time with png_set_write_fn(), rather
- * than changing the library.
- */
-#ifndef USE_FAR_KEYWORD
-static void PNGCBAPI
-pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_size_t check;
-
-   check = fwrite(data, 1, length, (png_FILE_p)png_get_io_ptr(png_ptr));
-
-   if (check != length)
-   {
-      png_error(png_ptr, "Write Error");
-   }
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING);
-#endif
-}
-#else
-/* This is the model-independent version. Since the standard I/O library
-   can't handle far buffers in the medium and small models, we have to copy
-   the data.
-*/
-
-#define NEAR_BUF_SIZE 1024
-#define MIN(a,b) (a <= b ? a : b)
-
-static void PNGCBAPI
-pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_size_t check;
-   png_byte *near_data;  /* Needs to be "png_byte *" instead of "png_bytep" */
-   png_FILE_p io_ptr;
-
-   /* Check if data really is near. If so, use usual code. */
-   near_data = (png_byte *)CVT_PTR_NOCHECK(data);
-   io_ptr = (png_FILE_p)CVT_PTR(png_get_io_ptr(png_ptr));
-
-   if ((png_bytep)near_data == data)
-   {
-      check = fwrite(near_data, 1, length, io_ptr);
-   }
-
-   else
-   {
-      png_byte buf[NEAR_BUF_SIZE];
-      png_size_t written, remaining, err;
-      check = 0;
-      remaining = length;
-
-      do
-      {
-         written = MIN(NEAR_BUF_SIZE, remaining);
-         png_memcpy(buf, data, written); /* Copy far buffer to near buffer */
-         err = fwrite(buf, 1, written, io_ptr);
-         if (err != written)
-            break;
-         else
-            check += err;
-         data += written;
-         remaining -= written;
-      }
-      while (remaining != 0);
-   }
-
-   if (check != length)
-   {
-      png_error(png_ptr, "Write Error");
-   }
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING);
-#endif
-}
-#endif /* USE_FAR_KEYWORD */
-
-/* This function is called when there is a warning, but the library thinks
- * it can continue anyway.  Replacement functions don't have to do anything
- * here if you don't want to.  In the default configuration, png_ptr is
- * not used, but it is passed in case it may be useful.
- */
-static void PNGCBAPI
-pngtest_warning(png_structp png_ptr, png_const_charp message)
-{
-   PNG_CONST char *name = "UNKNOWN (ERROR!)";
-   char *test;
-   test = png_get_error_ptr(png_ptr);
-
-   if (test == NULL)
-     fprintf(STDERR, "%s: libpng warning: %s\n", name, message);
-
-   else
-     fprintf(STDERR, "%s: libpng warning: %s\n", test, message);
-}
-
-/* This is the default error handling function.  Note that replacements for
- * this function MUST NOT RETURN, or the program will likely crash.  This
- * function is used by default, or if the program supplies NULL for the
- * error function pointer in png_set_error_fn().
- */
-static void PNGCBAPI
-pngtest_error(png_structp png_ptr, png_const_charp message)
-{
-   pngtest_warning(png_ptr, message);
-   /* We can return because png_error calls the default handler, which is
-    * actually OK in this case.
-    */
-}
-#endif /* !PNG_STDIO_SUPPORTED */
-/* END of code to validate stdio-free compilation */
-
-/* START of code to validate memory allocation and deallocation */
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-
-/* Allocate memory.  For reasonable files, size should never exceed
- * 64K.  However, zlib may allocate more then 64K if you don't tell
- * it not to.  See zconf.h and png.h for more information.  zlib does
- * need to allocate exactly 64K, so whatever you call here must
- * have the ability to do that.
- *
- * This piece of code can be compiled to validate max 64K allocations
- * by setting MAXSEG_64K in zlib zconf.h *or* PNG_MAX_MALLOC_64K.
- */
-typedef struct memory_information
-{
-   png_alloc_size_t          size;
-   png_voidp                 pointer;
-   struct memory_information FAR *next;
-} memory_information;
-typedef memory_information FAR *memory_infop;
-
-static memory_infop pinformation = NULL;
-static int current_allocation = 0;
-static int maximum_allocation = 0;
-static int total_allocation = 0;
-static int num_allocations = 0;
-
-png_voidp PNGCBAPI png_debug_malloc PNGARG((png_structp png_ptr,
-    png_alloc_size_t size));
-void PNGCBAPI png_debug_free PNGARG((png_structp png_ptr, png_voidp ptr));
-
-png_voidp
-PNGCBAPI png_debug_malloc(png_structp png_ptr, png_alloc_size_t size)
-{
-
-   /* png_malloc has already tested for NULL; png_create_struct calls
-    * png_debug_malloc directly, with png_ptr == NULL which is OK
-    */
-
-   if (size == 0)
-      return (NULL);
-
-   /* This calls the library allocator twice, once to get the requested
-      buffer and once to get a new free list entry. */
-   {
-      /* Disable malloc_fn and free_fn */
-      memory_infop pinfo;
-      png_set_mem_fn(png_ptr, NULL, NULL, NULL);
-      pinfo = (memory_infop)png_malloc(png_ptr,
-         png_sizeof(*pinfo));
-      pinfo->size = size;
-      current_allocation += size;
-      total_allocation += size;
-      num_allocations ++;
-
-      if (current_allocation > maximum_allocation)
-         maximum_allocation = current_allocation;
-
-      pinfo->pointer = png_malloc(png_ptr, size);
-      /* Restore malloc_fn and free_fn */
-
-      png_set_mem_fn(png_ptr,
-          NULL, png_debug_malloc, png_debug_free);
-
-      if (size != 0 && pinfo->pointer == NULL)
-      {
-         current_allocation -= size;
-         total_allocation -= size;
-         png_error(png_ptr,
-           "out of memory in pngtest->png_debug_malloc");
-      }
-
-      pinfo->next = pinformation;
-      pinformation = pinfo;
-      /* Make sure the caller isn't assuming zeroed memory. */
-      png_memset(pinfo->pointer, 0xdd, pinfo->size);
-
-      if (verbose)
-         printf("png_malloc %lu bytes at %p\n", (unsigned long)size,
-            pinfo->pointer);
-
-      return (png_voidp)(pinfo->pointer);
-   }
-}
-
-/* Free a pointer.  It is removed from the list at the same time. */
-void PNGCBAPI
-png_debug_free(png_structp png_ptr, png_voidp ptr)
-{
-   if (png_ptr == NULL)
-      fprintf(STDERR, "NULL pointer to png_debug_free.\n");
-
-   if (ptr == 0)
-   {
-#if 0 /* This happens all the time. */
-      fprintf(STDERR, "WARNING: freeing NULL pointer\n");
-#endif
-      return;
-   }
-
-   /* Unlink the element from the list. */
-   {
-      memory_infop FAR *ppinfo = &pinformation;
-
-      for (;;)
-      {
-         memory_infop pinfo = *ppinfo;
-
-         if (pinfo->pointer == ptr)
-         {
-            *ppinfo = pinfo->next;
-            current_allocation -= pinfo->size;
-            if (current_allocation < 0)
-               fprintf(STDERR, "Duplicate free of memory\n");
-            /* We must free the list element too, but first kill
-               the memory that is to be freed. */
-            png_memset(ptr, 0x55, pinfo->size);
-            png_free_default(png_ptr, pinfo);
-            pinfo = NULL;
-            break;
-         }
-
-         if (pinfo->next == NULL)
-         {
-            fprintf(STDERR, "Pointer %x not found\n", (unsigned int)ptr);
-            break;
-         }
-
-         ppinfo = &pinfo->next;
-      }
-   }
-
-   /* Finally free the data. */
-   if (verbose)
-      printf("Freeing %p\n", ptr);
-
-   png_free_default(png_ptr, ptr);
-   ptr = NULL;
-}
-#endif /* PNG_USER_MEM_SUPPORTED && PNG_DEBUG */
-/* END of code to test memory allocation/deallocation */
-
-
-/* Demonstration of user chunk support of the sTER and vpAg chunks */
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-
-/* (sTER is a public chunk not yet known by libpng.  vpAg is a private
-chunk used in ImageMagick to store "virtual page" size).  */
-
-static png_uint_32 user_chunk_data[4];
-
-    /* 0: sTER mode + 1
-     * 1: vpAg width
-     * 2: vpAg height
-     * 3: vpAg units
-     */
-
-static int PNGCBAPI read_user_chunk_callback(png_struct *png_ptr,
-   png_unknown_chunkp chunk)
-{
-   png_uint_32
-     *my_user_chunk_data;
-
-   /* Return one of the following:
-    *    return (-n);  chunk had an error
-    *    return (0);  did not recognize
-    *    return (n);  success
-    *
-    * The unknown chunk structure contains the chunk data:
-    * png_byte name[5];
-    * png_byte *data;
-    * png_size_t size;
-    *
-    * Note that libpng has already taken care of the CRC handling.
-    */
-
-   if (chunk->name[0] == 115 && chunk->name[1] ==  84 &&     /* s  T */
-       chunk->name[2] ==  69 && chunk->name[3] ==  82)       /* E  R */
-      {
-         /* Found sTER chunk */
-         if (chunk->size != 1)
-            return (-1); /* Error return */
-
-         if (chunk->data[0] != 0 && chunk->data[0] != 1)
-            return (-1);  /* Invalid mode */
-
-         my_user_chunk_data=(png_uint_32 *) png_get_user_chunk_ptr(png_ptr);
-         my_user_chunk_data[0]=chunk->data[0]+1;
-         return (1);
-      }
-
-   if (chunk->name[0] != 118 || chunk->name[1] != 112 ||    /* v  p */
-       chunk->name[2] !=  65 || chunk->name[3] != 103)      /* A  g */
-      return (0); /* Did not recognize */
-
-   /* Found ImageMagick vpAg chunk */
-
-   if (chunk->size != 9)
-      return (-1); /* Error return */
-
-   my_user_chunk_data=(png_uint_32 *) png_get_user_chunk_ptr(png_ptr);
-
-   my_user_chunk_data[1]=png_get_uint_31(png_ptr, chunk->data);
-   my_user_chunk_data[2]=png_get_uint_31(png_ptr, chunk->data + 4);
-   my_user_chunk_data[3]=(png_uint_32)chunk->data[8];
-
-   return (1);
-
-}
-#endif
-/* END of code to demonstrate user chunk support */
-
-/* Test one file */
-int
-test_one_file(PNG_CONST char *inname, PNG_CONST char *outname)
-{
-   static png_FILE_p fpin;
-   static png_FILE_p fpout;  /* "static" prevents setjmp corruption */
-   png_structp read_ptr;
-   png_infop read_info_ptr, end_info_ptr;
-#ifdef PNG_WRITE_SUPPORTED
-   png_structp write_ptr;
-   png_infop write_info_ptr;
-   png_infop write_end_info_ptr;
-#else
-   png_structp write_ptr = NULL;
-   png_infop write_info_ptr = NULL;
-   png_infop write_end_info_ptr = NULL;
-#endif
-   png_bytep row_buf;
-   png_uint_32 y;
-   png_uint_32 width, height;
-   int num_pass, pass;
-   int bit_depth, color_type;
-#ifdef PNG_SETJMP_SUPPORTED
-#ifdef USE_FAR_KEYWORD
-   jmp_buf tmp_jmpbuf;
-#endif
-#endif
-
-   char inbuf[256], outbuf[256];
-
-   row_buf = NULL;
-
-   if ((fpin = fopen(inname, "rb")) == NULL)
-   {
-      fprintf(STDERR, "Could not find input file %s\n", inname);
-      return (1);
-   }
-
-   if ((fpout = fopen(outname, "wb")) == NULL)
-   {
-      fprintf(STDERR, "Could not open output file %s\n", outname);
-      FCLOSE(fpin);
-      return (1);
-   }
-
-   pngtest_debug("Allocating read and write structures");
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-   read_ptr =
-      png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL,
-      NULL, NULL, NULL, png_debug_malloc, png_debug_free);
-#else
-   read_ptr =
-      png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-#endif
-#ifndef PNG_STDIO_SUPPORTED
-   png_set_error_fn(read_ptr, (png_voidp)inname, pngtest_error,
-       pngtest_warning);
-#endif
-
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   user_chunk_data[0] = 0;
-   user_chunk_data[1] = 0;
-   user_chunk_data[2] = 0;
-   user_chunk_data[3] = 0;
-   png_set_read_user_chunk_fn(read_ptr, user_chunk_data,
-     read_user_chunk_callback);
-
-#endif
-#ifdef PNG_WRITE_SUPPORTED
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-   write_ptr =
-      png_create_write_struct_2(PNG_LIBPNG_VER_STRING, NULL,
-      NULL, NULL, NULL, png_debug_malloc, png_debug_free);
-#else
-   write_ptr =
-      png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
-#endif
-#ifndef PNG_STDIO_SUPPORTED
-   png_set_error_fn(write_ptr, (png_voidp)inname, pngtest_error,
-       pngtest_warning);
-#endif
-#endif
-   pngtest_debug("Allocating read_info, write_info and end_info structures");
-   read_info_ptr = png_create_info_struct(read_ptr);
-   end_info_ptr = png_create_info_struct(read_ptr);
-#ifdef PNG_WRITE_SUPPORTED
-   write_info_ptr = png_create_info_struct(write_ptr);
-   write_end_info_ptr = png_create_info_struct(write_ptr);
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   pngtest_debug("Setting jmpbuf for read struct");
-#ifdef USE_FAR_KEYWORD
-   if (setjmp(tmp_jmpbuf))
-#else
-   if (setjmp(png_jmpbuf(read_ptr)))
-#endif
-   {
-      fprintf(STDERR, "%s -> %s: libpng read error\n", inname, outname);
-      png_free(read_ptr, row_buf);
-      row_buf = NULL;
-      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
-#ifdef PNG_WRITE_SUPPORTED
-      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
-      png_destroy_write_struct(&write_ptr, &write_info_ptr);
-#endif
-      FCLOSE(fpin);
-      FCLOSE(fpout);
-      return (1);
-   }
-#ifdef USE_FAR_KEYWORD
-   png_memcpy(png_jmpbuf(read_ptr), tmp_jmpbuf, png_sizeof(jmp_buf));
-#endif
-
-#ifdef PNG_WRITE_SUPPORTED
-   pngtest_debug("Setting jmpbuf for write struct");
-#ifdef USE_FAR_KEYWORD
-
-   if (setjmp(tmp_jmpbuf))
-#else
-   if (setjmp(png_jmpbuf(write_ptr)))
-#endif
-   {
-      fprintf(STDERR, "%s -> %s: libpng write error\n", inname, outname);
-      png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
-      png_destroy_info_struct(write_ptr, &write_end_info_ptr);
-#ifdef PNG_WRITE_SUPPORTED
-      png_destroy_write_struct(&write_ptr, &write_info_ptr);
-#endif
-      FCLOSE(fpin);
-      FCLOSE(fpout);
-      return (1);
-   }
-
-#ifdef USE_FAR_KEYWORD
-   png_memcpy(png_jmpbuf(write_ptr), tmp_jmpbuf, png_sizeof(jmp_buf));
-#endif
-#endif
-#endif
-
-   pngtest_debug("Initializing input and output streams");
-#ifdef PNG_STDIO_SUPPORTED
-   png_init_io(read_ptr, fpin);
-#  ifdef PNG_WRITE_SUPPORTED
-   png_init_io(write_ptr, fpout);
-#  endif
-#else
-   png_set_read_fn(read_ptr, (png_voidp)fpin, pngtest_read_data);
-#  ifdef PNG_WRITE_SUPPORTED
-   png_set_write_fn(write_ptr, (png_voidp)fpout,  pngtest_write_data,
-#    ifdef PNG_WRITE_FLUSH_SUPPORTED
-      pngtest_flush);
-#    else
-      NULL);
-#    endif
-#  endif
-#endif
-
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-   /* Normally one would use Z_DEFAULT_STRATEGY for text compression.
-    * This is here just to make pngtest replicate the results from libpng
-    * versions prior to 1.5.4, and to test this new API.
-    */
-   png_set_text_compression_strategy(write_ptr, Z_FILTERED);
-#endif
-
-   if (status_dots_requested == 1)
-   {
-#ifdef PNG_WRITE_SUPPORTED
-      png_set_write_status_fn(write_ptr, write_row_callback);
-#endif
-      png_set_read_status_fn(read_ptr, read_row_callback);
-   }
-
-   else
-   {
-#ifdef PNG_WRITE_SUPPORTED
-      png_set_write_status_fn(write_ptr, NULL);
-#endif
-      png_set_read_status_fn(read_ptr, NULL);
-   }
-
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-   {
-      int i;
-
-      for (i = 0; i<256; i++)
-         filters_used[i] = 0;
-
-      png_set_read_user_transform_fn(read_ptr, count_filters);
-   }
-#endif
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-   zero_samples = 0;
-   png_set_write_user_transform_fn(write_ptr, count_zero_samples);
-#endif
-
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-#  ifndef PNG_HANDLE_CHUNK_ALWAYS
-#    define PNG_HANDLE_CHUNK_ALWAYS       3
-#  endif
-   png_set_keep_unknown_chunks(read_ptr, PNG_HANDLE_CHUNK_ALWAYS,
-      NULL, 0);
-#endif
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-#  ifndef PNG_HANDLE_CHUNK_IF_SAFE
-#    define PNG_HANDLE_CHUNK_IF_SAFE      2
-#  endif
-   png_set_keep_unknown_chunks(write_ptr, PNG_HANDLE_CHUNK_IF_SAFE,
-      NULL, 0);
-#endif
-
-   pngtest_debug("Reading info struct");
-   png_read_info(read_ptr, read_info_ptr);
-
-   pngtest_debug("Transferring info struct");
-   {
-      int interlace_type, compression_type, filter_type;
-
-      if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth,
-          &color_type, &interlace_type, &compression_type, &filter_type))
-      {
-         png_set_IHDR(write_ptr, write_info_ptr, width, height, bit_depth,
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-            color_type, interlace_type, compression_type, filter_type);
-#else
-            color_type, PNG_INTERLACE_NONE, compression_type, filter_type);
-#endif
-      }
-   }
-#ifdef PNG_FIXED_POINT_SUPPORTED
-#ifdef PNG_cHRM_SUPPORTED
-   {
-      png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
-         blue_y;
-
-      if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y,
-         &red_x, &red_y, &green_x, &green_y, &blue_x, &blue_y))
-      {
-         png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y, red_x,
-            red_y, green_x, green_y, blue_x, blue_y);
-      }
-   }
-#endif
-#ifdef PNG_gAMA_SUPPORTED
-   {
-      png_fixed_point gamma;
-
-      if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &gamma))
-         png_set_gAMA_fixed(write_ptr, write_info_ptr, gamma);
-   }
-#endif
-#else /* Use floating point versions */
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-#ifdef PNG_cHRM_SUPPORTED
-   {
-      double white_x, white_y, red_x, red_y, green_x, green_y, blue_x,
-         blue_y;
-
-      if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x,
-         &red_y, &green_x, &green_y, &blue_x, &blue_y))
-      {
-         png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x,
-            red_y, green_x, green_y, blue_x, blue_y);
-      }
-   }
-#endif
-#ifdef PNG_gAMA_SUPPORTED
-   {
-      double gamma;
-
-      if (png_get_gAMA(read_ptr, read_info_ptr, &gamma))
-         png_set_gAMA(write_ptr, write_info_ptr, gamma);
-   }
-#endif
-#endif /* Floating point */
-#endif /* Fixed point */
-#ifdef PNG_iCCP_SUPPORTED
-   {
-      png_charp name;
-      png_bytep profile;
-      png_uint_32 proflen;
-      int compression_type;
-
-      if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type,
-                      &profile, &proflen))
-      {
-         png_set_iCCP(write_ptr, write_info_ptr, name, compression_type,
-                      profile, proflen);
-      }
-   }
-#endif
-#ifdef PNG_sRGB_SUPPORTED
-   {
-      int intent;
-
-      if (png_get_sRGB(read_ptr, read_info_ptr, &intent))
-         png_set_sRGB(write_ptr, write_info_ptr, intent);
-   }
-#endif
-   {
-      png_colorp palette;
-      int num_palette;
-
-      if (png_get_PLTE(read_ptr, read_info_ptr, &palette, &num_palette))
-         png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette);
-   }
-#ifdef PNG_bKGD_SUPPORTED
-   {
-      png_color_16p background;
-
-      if (png_get_bKGD(read_ptr, read_info_ptr, &background))
-      {
-         png_set_bKGD(write_ptr, write_info_ptr, background);
-      }
-   }
-#endif
-#ifdef PNG_hIST_SUPPORTED
-   {
-      png_uint_16p hist;
-
-      if (png_get_hIST(read_ptr, read_info_ptr, &hist))
-         png_set_hIST(write_ptr, write_info_ptr, hist);
-   }
-#endif
-#ifdef PNG_oFFs_SUPPORTED
-   {
-      png_int_32 offset_x, offset_y;
-      int unit_type;
-
-      if (png_get_oFFs(read_ptr, read_info_ptr, &offset_x, &offset_y,
-          &unit_type))
-      {
-         png_set_oFFs(write_ptr, write_info_ptr, offset_x, offset_y, unit_type);
-      }
-   }
-#endif
-#ifdef PNG_pCAL_SUPPORTED
-   {
-      png_charp purpose, units;
-      png_charpp params;
-      png_int_32 X0, X1;
-      int type, nparams;
-
-      if (png_get_pCAL(read_ptr, read_info_ptr, &purpose, &X0, &X1, &type,
-         &nparams, &units, &params))
-      {
-         png_set_pCAL(write_ptr, write_info_ptr, purpose, X0, X1, type,
-            nparams, units, params);
-      }
-   }
-#endif
-#ifdef PNG_pHYs_SUPPORTED
-   {
-      png_uint_32 res_x, res_y;
-      int unit_type;
-
-      if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y, &unit_type))
-         png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type);
-   }
-#endif
-#ifdef PNG_sBIT_SUPPORTED
-   {
-      png_color_8p sig_bit;
-
-      if (png_get_sBIT(read_ptr, read_info_ptr, &sig_bit))
-         png_set_sBIT(write_ptr, write_info_ptr, sig_bit);
-   }
-#endif
-#ifdef PNG_sCAL_SUPPORTED
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-   {
-      int unit;
-      double scal_width, scal_height;
-
-      if (png_get_sCAL(read_ptr, read_info_ptr, &unit, &scal_width,
-         &scal_height))
-      {
-         png_set_sCAL(write_ptr, write_info_ptr, unit, scal_width, scal_height);
-      }
-   }
-#else
-#ifdef PNG_FIXED_POINT_SUPPORTED
-   {
-      int unit;
-      png_charp scal_width, scal_height;
-
-      if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &scal_width,
-          &scal_height))
-      {
-         png_set_sCAL_s(write_ptr, write_info_ptr, unit, scal_width,
-             scal_height);
-      }
-   }
-#endif
-#endif
-#endif
-#ifdef PNG_TEXT_SUPPORTED
-   {
-      png_textp text_ptr;
-      int num_text;
-
-      if (png_get_text(read_ptr, read_info_ptr, &text_ptr, &num_text) > 0)
-      {
-         pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text);
-
-         if (verbose)
-            printf("\n Text compression=%d\n", text_ptr->compression);
-
-         png_set_text(write_ptr, write_info_ptr, text_ptr, num_text);
-      }
-   }
-#endif
-#ifdef PNG_tIME_SUPPORTED
-   {
-      png_timep mod_time;
-
-      if (png_get_tIME(read_ptr, read_info_ptr, &mod_time))
-      {
-         png_set_tIME(write_ptr, write_info_ptr, mod_time);
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-         /* We have to use png_memcpy instead of "=" because the string
-          * pointed to by png_convert_to_rfc1123() gets free'ed before
-          * we use it.
-          */
-         png_memcpy(tIME_string,
-                    png_convert_to_rfc1123(read_ptr, mod_time),
-                    png_sizeof(tIME_string));
-
-         tIME_string[png_sizeof(tIME_string) - 1] = '\0';
-         tIME_chunk_present++;
-#endif /* PNG_TIME_RFC1123_SUPPORTED */
-      }
-   }
-#endif
-#ifdef PNG_tRNS_SUPPORTED
-   {
-      png_bytep trans_alpha;
-      int num_trans;
-      png_color_16p trans_color;
-
-      if (png_get_tRNS(read_ptr, read_info_ptr, &trans_alpha, &num_trans,
-         &trans_color))
-      {
-         int sample_max = (1 << bit_depth);
-         /* libpng doesn't reject a tRNS chunk with out-of-range samples */
-         if (!((color_type == PNG_COLOR_TYPE_GRAY &&
-             (int)trans_color->gray > sample_max) ||
-             (color_type == PNG_COLOR_TYPE_RGB &&
-             ((int)trans_color->red > sample_max ||
-             (int)trans_color->green > sample_max ||
-             (int)trans_color->blue > sample_max))))
-            png_set_tRNS(write_ptr, write_info_ptr, trans_alpha, num_trans,
-               trans_color);
-      }
-   }
-#endif
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   {
-      png_unknown_chunkp unknowns;
-      int num_unknowns = png_get_unknown_chunks(read_ptr, read_info_ptr,
-         &unknowns);
-
-      if (num_unknowns)
-      {
-         int i;
-         png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns,
-           num_unknowns);
-         /* Copy the locations from the read_info_ptr.  The automatically
-          * generated locations in write_info_ptr are wrong because we
-          * haven't written anything yet.
-          */
-         for (i = 0; i < num_unknowns; i++)
-           png_set_unknown_chunk_location(write_ptr, write_info_ptr, i,
-             unknowns[i].location);
-      }
-   }
-#endif
-
-#ifdef PNG_WRITE_SUPPORTED
-   pngtest_debug("Writing info struct");
-
-/* If we wanted, we could write info in two steps:
- * png_write_info_before_PLTE(write_ptr, write_info_ptr);
- */
-   png_write_info(write_ptr, write_info_ptr);
-
-#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
-   if (user_chunk_data[0] != 0)
-   {
-      png_byte png_sTER[5] = {115,  84,  69,  82, '\0'};
-
-      unsigned char
-        ster_chunk_data[1];
-
-      if (verbose)
-         fprintf(STDERR, "\n stereo mode = %lu\n",
-           (unsigned long)(user_chunk_data[0] - 1));
-
-      ster_chunk_data[0]=(unsigned char)(user_chunk_data[0] - 1);
-      png_write_chunk(write_ptr, png_sTER, ster_chunk_data, 1);
-   }
-
-   if (user_chunk_data[1] != 0 || user_chunk_data[2] != 0)
-   {
-      png_byte png_vpAg[5] = {118, 112,  65, 103, '\0'};
-
-      unsigned char
-        vpag_chunk_data[9];
-
-      if (verbose)
-         fprintf(STDERR, " vpAg = %lu x %lu, units = %lu\n",
-           (unsigned long)user_chunk_data[1],
-           (unsigned long)user_chunk_data[2],
-           (unsigned long)user_chunk_data[3]);
-
-      png_save_uint_32(vpag_chunk_data, user_chunk_data[1]);
-      png_save_uint_32(vpag_chunk_data + 4, user_chunk_data[2]);
-      vpag_chunk_data[8] = (unsigned char)(user_chunk_data[3] & 0xff);
-      png_write_chunk(write_ptr, png_vpAg, vpag_chunk_data, 9);
-   }
-
-#endif
-#endif
-
-#ifdef SINGLE_ROWBUF_ALLOC
-   pngtest_debug("Allocating row buffer...");
-   row_buf = (png_bytep)png_malloc(read_ptr,
-      png_get_rowbytes(read_ptr, read_info_ptr));
-
-   pngtest_debug1("\t0x%08lx", (unsigned long)row_buf);
-#endif /* SINGLE_ROWBUF_ALLOC */
-   pngtest_debug("Writing row data");
-
-#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
-  defined(PNG_WRITE_INTERLACING_SUPPORTED)
-   num_pass = png_set_interlace_handling(read_ptr);
-#  ifdef PNG_WRITE_SUPPORTED
-   png_set_interlace_handling(write_ptr);
-#  endif
-#else
-   num_pass = 1;
-#endif
-
-#ifdef PNGTEST_TIMING
-   t_stop = (float)clock();
-   t_misc += (t_stop - t_start);
-   t_start = t_stop;
-#endif
-   for (pass = 0; pass < num_pass; pass++)
-   {
-      pngtest_debug1("Writing row data for pass %d", pass);
-      for (y = 0; y < height; y++)
-      {
-#ifndef SINGLE_ROWBUF_ALLOC
-         pngtest_debug2("Allocating row buffer (pass %d, y = %u)...", pass, y);
-         row_buf = (png_bytep)png_malloc(read_ptr,
-            png_get_rowbytes(read_ptr, read_info_ptr));
-
-         pngtest_debug2("\t0x%08lx (%u bytes)", (unsigned long)row_buf,
-            png_get_rowbytes(read_ptr, read_info_ptr));
-
-#endif /* !SINGLE_ROWBUF_ALLOC */
-         png_read_rows(read_ptr, (png_bytepp)&row_buf, NULL, 1);
-
-#ifdef PNG_WRITE_SUPPORTED
-#ifdef PNGTEST_TIMING
-         t_stop = (float)clock();
-         t_decode += (t_stop - t_start);
-         t_start = t_stop;
-#endif
-         png_write_rows(write_ptr, (png_bytepp)&row_buf, 1);
-#ifdef PNGTEST_TIMING
-         t_stop = (float)clock();
-         t_encode += (t_stop - t_start);
-         t_start = t_stop;
-#endif
-#endif /* PNG_WRITE_SUPPORTED */
-
-#ifndef SINGLE_ROWBUF_ALLOC
-         pngtest_debug2("Freeing row buffer (pass %d, y = %u)", pass, y);
-         png_free(read_ptr, row_buf);
-         row_buf = NULL;
-#endif /* !SINGLE_ROWBUF_ALLOC */
-      }
-   }
-
-#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
-   png_free_data(read_ptr, read_info_ptr, PNG_FREE_UNKN, -1);
-#endif
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   png_free_data(write_ptr, write_info_ptr, PNG_FREE_UNKN, -1);
-#endif
-
-   pngtest_debug("Reading and writing end_info data");
-
-   png_read_end(read_ptr, end_info_ptr);
-#ifdef PNG_TEXT_SUPPORTED
-   {
-      png_textp text_ptr;
-      int num_text;
-
-      if (png_get_text(read_ptr, end_info_ptr, &text_ptr, &num_text) > 0)
-      {
-         pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text);
-         png_set_text(write_ptr, write_end_info_ptr, text_ptr, num_text);
-      }
-   }
-#endif
-#ifdef PNG_tIME_SUPPORTED
-   {
-      png_timep mod_time;
-
-      if (png_get_tIME(read_ptr, end_info_ptr, &mod_time))
-      {
-         png_set_tIME(write_ptr, write_end_info_ptr, mod_time);
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-         /* We have to use png_memcpy instead of "=" because the string
-            pointed to by png_convert_to_rfc1123() gets free'ed before
-            we use it */
-         png_memcpy(tIME_string,
-                    png_convert_to_rfc1123(read_ptr, mod_time),
-                    png_sizeof(tIME_string));
-
-         tIME_string[png_sizeof(tIME_string) - 1] = '\0';
-         tIME_chunk_present++;
-#endif /* PNG_TIME_RFC1123_SUPPORTED */
-      }
-   }
-#endif
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   {
-      png_unknown_chunkp unknowns;
-      int num_unknowns = png_get_unknown_chunks(read_ptr, end_info_ptr,
-         &unknowns);
-
-      if (num_unknowns)
-      {
-         int i;
-         png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns,
-           num_unknowns);
-         /* Copy the locations from the read_info_ptr.  The automatically
-          * generated locations in write_end_info_ptr are wrong because we
-          * haven't written the end_info yet.
-          */
-         for (i = 0; i < num_unknowns; i++)
-           png_set_unknown_chunk_location(write_ptr, write_end_info_ptr, i,
-             unknowns[i].location);
-      }
-   }
-#endif
-#ifdef PNG_WRITE_SUPPORTED
-   png_write_end(write_ptr, write_end_info_ptr);
-#endif
-
-#ifdef PNG_EASY_ACCESS_SUPPORTED
-   if (verbose)
-   {
-      png_uint_32 iwidth, iheight;
-      iwidth = png_get_image_width(write_ptr, write_info_ptr);
-      iheight = png_get_image_height(write_ptr, write_info_ptr);
-      fprintf(STDERR, "\n Image width = %lu, height = %lu\n",
-         (unsigned long)iwidth, (unsigned long)iheight);
-   }
-#endif
-
-   pngtest_debug("Destroying data structs");
-#ifdef SINGLE_ROWBUF_ALLOC
-   pngtest_debug("destroying row_buf for read_ptr");
-   png_free(read_ptr, row_buf);
-   row_buf = NULL;
-#endif /* SINGLE_ROWBUF_ALLOC */
-   pngtest_debug("destroying read_ptr, read_info_ptr, end_info_ptr");
-   png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr);
-#ifdef PNG_WRITE_SUPPORTED
-   pngtest_debug("destroying write_end_info_ptr");
-   png_destroy_info_struct(write_ptr, &write_end_info_ptr);
-   pngtest_debug("destroying write_ptr, write_info_ptr");
-   png_destroy_write_struct(&write_ptr, &write_info_ptr);
-#endif
-   pngtest_debug("Destruction complete.");
-
-   FCLOSE(fpin);
-   FCLOSE(fpout);
-
-   pngtest_debug("Opening files for comparison");
-   if ((fpin = fopen(inname, "rb")) == NULL)
-   {
-      fprintf(STDERR, "Could not find file %s\n", inname);
-      return (1);
-   }
-
-   if ((fpout = fopen(outname, "rb")) == NULL)
-   {
-      fprintf(STDERR, "Could not find file %s\n", outname);
-      FCLOSE(fpin);
-      return (1);
-   }
-
-   for (;;)
-   {
-      png_size_t num_in, num_out;
-
-         num_in = fread(inbuf, 1, 1, fpin);
-         num_out = fread(outbuf, 1, 1, fpout);
-
-      if (num_in != num_out)
-      {
-         fprintf(STDERR, "\nFiles %s and %s are of a different size\n",
-                 inname, outname);
-
-         if (wrote_question == 0)
-         {
-            fprintf(STDERR,
-         "   Was %s written with the same maximum IDAT chunk size (%d bytes),",
-              inname, PNG_ZBUF_SIZE);
-            fprintf(STDERR,
-              "\n   filtering heuristic (libpng default), compression");
-            fprintf(STDERR,
-              " level (zlib default),\n   and zlib version (%s)?\n\n",
-              ZLIB_VERSION);
-            wrote_question = 1;
-         }
-
-         FCLOSE(fpin);
-         FCLOSE(fpout);
-
-         if (strict != 0)
-           return (1);
-
-         else
-           return (0);
-      }
-
-      if (!num_in)
-         break;
-
-      if (png_memcmp(inbuf, outbuf, num_in))
-      {
-         fprintf(STDERR, "\nFiles %s and %s are different\n", inname, outname);
-
-         if (wrote_question == 0)
-         {
-            fprintf(STDERR,
-         "   Was %s written with the same maximum IDAT chunk size (%d bytes),",
-                 inname, PNG_ZBUF_SIZE);
-            fprintf(STDERR,
-              "\n   filtering heuristic (libpng default), compression");
-            fprintf(STDERR,
-              " level (zlib default),\n   and zlib version (%s)?\n\n",
-              ZLIB_VERSION);
-            wrote_question = 1;
-         }
-
-         FCLOSE(fpin);
-         FCLOSE(fpout);
-
-         if (strict != 0)
-           return (1);
-
-         else
-           return (0);
-      }
-   }
-
-   FCLOSE(fpin);
-   FCLOSE(fpout);
-
-   return (0);
-}
-
-/* Input and output filenames */
-#ifdef RISCOS
-static PNG_CONST char *inname = "pngtest/png";
-static PNG_CONST char *outname = "pngout/png";
-#else
-static PNG_CONST char *inname = "pngtest.png";
-static PNG_CONST char *outname = "pngout.png";
-#endif
-
-int
-main(int argc, char *argv[])
-{
-   int multiple = 0;
-   int ierror = 0;
-
-   fprintf(STDERR, "\n Testing libpng version %s\n", PNG_LIBPNG_VER_STRING);
-   fprintf(STDERR, "   with zlib   version %s\n", ZLIB_VERSION);
-   fprintf(STDERR, "%s", png_get_copyright(NULL));
-   /* Show the version of libpng used in building the library */
-   fprintf(STDERR, " library (%lu):%s",
-      (unsigned long)png_access_version_number(),
-      png_get_header_version(NULL));
-
-   /* Show the version of libpng used in building the application */
-   fprintf(STDERR, " pngtest (%lu):%s", (unsigned long)PNG_LIBPNG_VER,
-      PNG_HEADER_VERSION_STRING);
-
-   /* Do some consistency checking on the memory allocation settings, I'm
-    * not sure this matters, but it is nice to know, the first of these
-    * tests should be impossible because of the way the macros are set
-    * in pngconf.h
-    */
-#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
-      fprintf(STDERR, " NOTE: Zlib compiled for max 64k, libpng not\n");
-#endif
-   /* I think the following can happen. */
-#if !defined(MAXSEG_64K) && defined(PNG_MAX_MALLOC_64K)
-      fprintf(STDERR, " NOTE: libpng compiled for max 64k, zlib not\n");
-#endif
-
-   if (strcmp(png_libpng_ver, PNG_LIBPNG_VER_STRING))
-   {
-      fprintf(STDERR,
-         "Warning: versions are different between png.h and png.c\n");
-      fprintf(STDERR, "  png.h version: %s\n", PNG_LIBPNG_VER_STRING);
-      fprintf(STDERR, "  png.c version: %s\n\n", png_libpng_ver);
-      ++ierror;
-   }
-
-   if (argc > 1)
-   {
-      if (strcmp(argv[1], "-m") == 0)
-      {
-         multiple = 1;
-         status_dots_requested = 0;
-      }
-
-      else if (strcmp(argv[1], "-mv") == 0 ||
-               strcmp(argv[1], "-vm") == 0 )
-      {
-         multiple = 1;
-         verbose = 1;
-         status_dots_requested = 1;
-      }
-
-      else if (strcmp(argv[1], "-v") == 0)
-      {
-         verbose = 1;
-         status_dots_requested = 1;
-         inname = argv[2];
-      }
-
-      else if (strcmp(argv[1], "--strict") == 0)
-      {
-         status_dots_requested = 0;
-         verbose = 1;
-         inname = argv[2];
-         strict++;
-      }
-
-      else
-      {
-         inname = argv[1];
-         status_dots_requested = 0;
-      }
-   }
-
-   if (!multiple && argc == 3 + verbose)
-     outname = argv[2 + verbose];
-
-   if ((!multiple && argc > 3 + verbose) || (multiple && argc < 2))
-   {
-     fprintf(STDERR,
-       "usage: %s [infile.png] [outfile.png]\n\t%s -m {infile.png}\n",
-        argv[0], argv[0]);
-     fprintf(STDERR,
-       "  reads/writes one PNG file (without -m) or multiple files (-m)\n");
-     fprintf(STDERR,
-       "  with -m %s is used as a temporary file\n", outname);
-     exit(1);
-   }
-
-   if (multiple)
-   {
-      int i;
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-      int allocation_now = current_allocation;
-#endif
-      for (i=2; i<argc; ++i)
-      {
-         int kerror;
-         fprintf(STDERR, "\n Testing %s:", argv[i]);
-         kerror = test_one_file(argv[i], outname);
-         if (kerror == 0)
-         {
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-            int k;
-#endif
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-            fprintf(STDERR, "\n PASS (%lu zero samples)\n",
-               (unsigned long)zero_samples);
-#else
-            fprintf(STDERR, " PASS\n");
-#endif
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-            for (k = 0; k<256; k++)
-               if (filters_used[k])
-                  fprintf(STDERR, " Filter %d was used %lu times\n",
-                     k, (unsigned long)filters_used[k]);
-#endif
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-         if (tIME_chunk_present != 0)
-            fprintf(STDERR, " tIME = %s\n", tIME_string);
-
-         tIME_chunk_present = 0;
-#endif /* PNG_TIME_RFC1123_SUPPORTED */
-         }
-
-         else
-         {
-            fprintf(STDERR, " FAIL\n");
-            ierror += kerror;
-         }
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-         if (allocation_now != current_allocation)
-            fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
-               current_allocation - allocation_now);
-
-         if (current_allocation != 0)
-         {
-            memory_infop pinfo = pinformation;
-
-            fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
-               current_allocation);
-
-            while (pinfo != NULL)
-            {
-               fprintf(STDERR, " %lu bytes at %x\n",
-                 (unsigned long)pinfo->size,
-                 (unsigned int)pinfo->pointer);
-               pinfo = pinfo->next;
-            }
-         }
-#endif
-      }
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-         fprintf(STDERR, " Current memory allocation: %10d bytes\n",
-            current_allocation);
-         fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
-            maximum_allocation);
-         fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
-            total_allocation);
-         fprintf(STDERR, "     Number of allocations: %10d\n",
-            num_allocations);
-#endif
-   }
-
-   else
-   {
-      int i;
-      for (i = 0; i<3; ++i)
-      {
-         int kerror;
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-         int allocation_now = current_allocation;
-#endif
-         if (i == 1)
-            status_dots_requested = 1;
-
-         else if (verbose == 0)
-            status_dots_requested = 0;
-
-         if (i == 0 || verbose == 1 || ierror != 0)
-            fprintf(STDERR, "\n Testing %s:", inname);
-
-         kerror = test_one_file(inname, outname);
-
-         if (kerror == 0)
-         {
-            if (verbose == 1 || i == 2)
-            {
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-                int k;
-#endif
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-                fprintf(STDERR, "\n PASS (%lu zero samples)\n",
-                   (unsigned long)zero_samples);
-#else
-                fprintf(STDERR, " PASS\n");
-#endif
-#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
-                for (k = 0; k<256; k++)
-                   if (filters_used[k])
-                      fprintf(STDERR, " Filter %d was used %lu times\n",
-                         k, (unsigned long)filters_used[k]);
-#endif
-#ifdef PNG_TIME_RFC1123_SUPPORTED
-             if (tIME_chunk_present != 0)
-                fprintf(STDERR, " tIME = %s\n", tIME_string);
-#endif /* PNG_TIME_RFC1123_SUPPORTED */
-            }
-         }
-
-         else
-         {
-            if (verbose == 0 && i != 2)
-               fprintf(STDERR, "\n Testing %s:", inname);
-
-            fprintf(STDERR, " FAIL\n");
-            ierror += kerror;
-         }
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-         if (allocation_now != current_allocation)
-             fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n",
-               current_allocation - allocation_now);
-
-         if (current_allocation != 0)
-         {
-             memory_infop pinfo = pinformation;
-
-             fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n",
-                current_allocation);
-
-             while (pinfo != NULL)
-             {
-                fprintf(STDERR, " %lu bytes at %x\n",
-                   (unsigned long)pinfo->size, (unsigned int)pinfo->pointer);
-                pinfo = pinfo->next;
-             }
-          }
-#endif
-       }
-#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG
-       fprintf(STDERR, " Current memory allocation: %10d bytes\n",
-          current_allocation);
-       fprintf(STDERR, " Maximum memory allocation: %10d bytes\n",
-          maximum_allocation);
-       fprintf(STDERR, " Total   memory allocation: %10d bytes\n",
-          total_allocation);
-       fprintf(STDERR, "     Number of allocations: %10d\n",
-            num_allocations);
-#endif
-   }
-
-#ifdef PNGTEST_TIMING
-   t_stop = (float)clock();
-   t_misc += (t_stop - t_start);
-   t_start = t_stop;
-   fprintf(STDERR, " CPU time used = %.3f seconds",
-      (t_misc+t_decode+t_encode)/(float)CLOCKS_PER_SEC);
-   fprintf(STDERR, " (decoding %.3f,\n",
-      t_decode/(float)CLOCKS_PER_SEC);
-   fprintf(STDERR, "        encoding %.3f ,",
-      t_encode/(float)CLOCKS_PER_SEC);
-   fprintf(STDERR, " other %.3f seconds)\n\n",
-      t_misc/(float)CLOCKS_PER_SEC);
-#endif
-
-   if (ierror == 0)
-      fprintf(STDERR, " libpng passes test\n");
-
-   else
-      fprintf(STDERR, " libpng FAILS test\n");
-
-   return (int)(ierror != 0);
-}
-
-/* Generate a compiler error if there is an old png.h in the search path. */
-typedef png_libpng_version_1_5_10 Your_png_h_is_not_version_1_5_10;
diff --git a/reg-io/png/lpng1510/pngwrite.c b/reg-io/png/lpng1510/pngwrite.c
deleted file mode 100644
index 9a154445..00000000
--- a/reg-io/png/lpng1510/pngwrite.c
+++ /dev/null
@@ -1,1667 +0,0 @@
-
-/* pngwrite.c - general routines to write a PNG file
- *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_WRITE_SUPPORTED
-
-/* Writes all the PNG information.  This is the suggested way to use the
- * library.  If you have a new chunk to add, make a function to write it,
- * and put it in the correct location here.  If you want the chunk written
- * after the image data, put it in png_write_end().  I strongly encourage
- * you to supply a PNG_INFO_ flag, and check info_ptr->valid before writing
- * the chunk, as that will keep the code from breaking if you want to just
- * write a plain PNG file.  If you have long comments, I suggest writing
- * them in png_write_end(), and compressing them.
- */
-void PNGAPI
-png_write_info_before_PLTE(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_write_info_before_PLTE");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
-   {
-   /* Write PNG signature */
-   png_write_sig(png_ptr);
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   if ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) && \
-       (png_ptr->mng_features_permitted))
-   {
-      png_warning(png_ptr, "MNG features are not allowed in a PNG datastream");
-      png_ptr->mng_features_permitted = 0;
-   }
-#endif
-
-   /* Write IHDR information. */
-   png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height,
-       info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type,
-       info_ptr->filter_type,
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-       info_ptr->interlace_type);
-#else
-       0);
-#endif
-   /* The rest of these check to see if the valid field has the appropriate
-    * flag set, and if it does, writes the chunk.
-    */
-#ifdef PNG_WRITE_gAMA_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_gAMA)
-      png_write_gAMA_fixed(png_ptr, info_ptr->gamma);
-#endif
-#ifdef PNG_WRITE_sRGB_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_sRGB)
-      png_write_sRGB(png_ptr, (int)info_ptr->srgb_intent);
-#endif
-
-#ifdef PNG_WRITE_iCCP_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_iCCP)
-      png_write_iCCP(png_ptr, info_ptr->iccp_name, PNG_COMPRESSION_TYPE_BASE,
-          (png_charp)info_ptr->iccp_profile, (int)info_ptr->iccp_proflen);
-#endif
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_sBIT)
-      png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
-#endif
-#ifdef PNG_WRITE_cHRM_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_cHRM)
-      png_write_cHRM_fixed(png_ptr,
-          info_ptr->x_white, info_ptr->y_white,
-          info_ptr->x_red, info_ptr->y_red,
-          info_ptr->x_green, info_ptr->y_green,
-          info_ptr->x_blue, info_ptr->y_blue);
-#endif
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   if (info_ptr->unknown_chunks_num)
-   {
-      png_unknown_chunk *up;
-
-      png_debug(5, "writing extra chunks");
-
-      for (up = info_ptr->unknown_chunks;
-           up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
-           up++)
-      {
-         int keep = png_handle_as_unknown(png_ptr, up->name);
-
-         if (keep != PNG_HANDLE_CHUNK_NEVER &&
-             up->location &&
-             !(up->location & PNG_HAVE_PLTE) &&
-             !(up->location & PNG_HAVE_IDAT) &&
-             !(up->location & PNG_AFTER_IDAT) &&
-             ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
-             (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
-         {
-            if (up->size == 0)
-               png_warning(png_ptr, "Writing zero-length unknown chunk");
-
-            png_write_chunk(png_ptr, up->name, up->data, up->size);
-         }
-      }
-   }
-#endif
-      png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
-   }
-}
-
-void PNGAPI
-png_write_info(png_structp png_ptr, png_infop info_ptr)
-{
-#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
-   int i;
-#endif
-
-   png_debug(1, "in png_write_info");
-
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   png_write_info_before_PLTE(png_ptr, info_ptr);
-
-   if (info_ptr->valid & PNG_INFO_PLTE)
-      png_write_PLTE(png_ptr, info_ptr->palette,
-          (png_uint_32)info_ptr->num_palette);
-
-   else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      png_error(png_ptr, "Valid palette required for paletted images");
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_tRNS)
-   {
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-      /* Invert the alpha channel (in tRNS) */
-      if ((png_ptr->transformations & PNG_INVERT_ALPHA) &&
-          info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         int j;
-         for (j = 0; j<(int)info_ptr->num_trans; j++)
-            info_ptr->trans_alpha[j] =
-               (png_byte)(255 - info_ptr->trans_alpha[j]);
-      }
-#endif
-      png_write_tRNS(png_ptr, info_ptr->trans_alpha, &(info_ptr->trans_color),
-          info_ptr->num_trans, info_ptr->color_type);
-   }
-#endif
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_bKGD)
-      png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type);
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_hIST)
-      png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette);
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_oFFs)
-      png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset,
-          info_ptr->offset_unit_type);
-#endif
-
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_pCAL)
-      png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0,
-          info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams,
-          info_ptr->pcal_units, info_ptr->pcal_params);
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_sCAL)
-      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
-          info_ptr->scal_s_width, info_ptr->scal_s_height);
-#endif /* sCAL */
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_pHYs)
-      png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit,
-          info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type);
-#endif /* pHYs */
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_tIME)
-   {
-      png_write_tIME(png_ptr, &(info_ptr->mod_time));
-      png_ptr->mode |= PNG_WROTE_tIME;
-   }
-#endif /* tIME */
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-   if (info_ptr->valid & PNG_INFO_sPLT)
-      for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
-         png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
-#endif /* sPLT */
-
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-   /* Check to see if we need to write text chunks */
-   for (i = 0; i < info_ptr->num_text; i++)
-   {
-      png_debug2(2, "Writing header text chunk %d, type %d", i,
-          info_ptr->text[i].compression);
-      /* An internationalized chunk? */
-      if (info_ptr->text[i].compression > 0)
-      {
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-         /* Write international chunk */
-         png_write_iTXt(png_ptr,
-             info_ptr->text[i].compression,
-             info_ptr->text[i].key,
-             info_ptr->text[i].lang,
-             info_ptr->text[i].lang_key,
-             info_ptr->text[i].text);
-#else
-          png_warning(png_ptr, "Unable to write international text");
-#endif
-          /* Mark this chunk as written */
-          info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-      }
-
-      /* If we want a compressed text chunk */
-      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
-      {
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-         /* Write compressed chunk */
-         png_write_zTXt(png_ptr, info_ptr->text[i].key,
-             info_ptr->text[i].text, 0,
-             info_ptr->text[i].compression);
-#else
-         png_warning(png_ptr, "Unable to write compressed text");
-#endif
-         /* Mark this chunk as written */
-         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-      }
-
-      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-      {
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-         /* Write uncompressed chunk */
-         png_write_tEXt(png_ptr, info_ptr->text[i].key,
-             info_ptr->text[i].text,
-             0);
-         /* Mark this chunk as written */
-         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-#else
-         /* Can't get here */
-         png_warning(png_ptr, "Unable to write uncompressed text");
-#endif
-      }
-   }
-#endif /* tEXt */
-
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   if (info_ptr->unknown_chunks_num)
-   {
-      png_unknown_chunk *up;
-
-      png_debug(5, "writing extra chunks");
-
-      for (up = info_ptr->unknown_chunks;
-           up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
-           up++)
-      {
-         int keep = png_handle_as_unknown(png_ptr, up->name);
-         if (keep != PNG_HANDLE_CHUNK_NEVER &&
-             up->location &&
-             (up->location & PNG_HAVE_PLTE) &&
-             !(up->location & PNG_HAVE_IDAT) &&
-             !(up->location & PNG_AFTER_IDAT) &&
-             ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
-             (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
-         {
-            png_write_chunk(png_ptr, up->name, up->data, up->size);
-         }
-      }
-   }
-#endif
-}
-
-/* Writes the end of the PNG file.  If you don't want to write comments or
- * time information, you can pass NULL for info.  If you already wrote these
- * in png_write_info(), do not write them again here.  If you have long
- * comments, I suggest writing them here, and compressing them.
- */
-void PNGAPI
-png_write_end(png_structp png_ptr, png_infop info_ptr)
-{
-   png_debug(1, "in png_write_end");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (!(png_ptr->mode & PNG_HAVE_IDAT))
-      png_error(png_ptr, "No IDATs written into file");
-
-#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   if (png_ptr->num_palette_max > png_ptr->num_palette)
-      png_benign_error(png_ptr, "Wrote palette index exceeding num_palette");
-#endif
-
-   /* See if user wants us to write information chunks */
-   if (info_ptr != NULL)
-   {
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-      int i; /* local index variable */
-#endif
-#ifdef PNG_WRITE_tIME_SUPPORTED
-      /* Check to see if user has supplied a time chunk */
-      if ((info_ptr->valid & PNG_INFO_tIME) &&
-          !(png_ptr->mode & PNG_WROTE_tIME))
-         png_write_tIME(png_ptr, &(info_ptr->mod_time));
-
-#endif
-#ifdef PNG_WRITE_TEXT_SUPPORTED
-      /* Loop through comment chunks */
-      for (i = 0; i < info_ptr->num_text; i++)
-      {
-         png_debug2(2, "Writing trailer text chunk %d, type %d", i,
-            info_ptr->text[i].compression);
-         /* An internationalized chunk? */
-         if (info_ptr->text[i].compression > 0)
-         {
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-            /* Write international chunk */
-            png_write_iTXt(png_ptr,
-                info_ptr->text[i].compression,
-                info_ptr->text[i].key,
-                info_ptr->text[i].lang,
-                info_ptr->text[i].lang_key,
-                info_ptr->text[i].text);
-#else
-            png_warning(png_ptr, "Unable to write international text");
-#endif
-            /* Mark this chunk as written */
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-         }
-
-         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
-         {
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-            /* Write compressed chunk */
-            png_write_zTXt(png_ptr, info_ptr->text[i].key,
-                info_ptr->text[i].text, 0,
-                info_ptr->text[i].compression);
-#else
-            png_warning(png_ptr, "Unable to write compressed text");
-#endif
-            /* Mark this chunk as written */
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
-         }
-
-         else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
-         {
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-            /* Write uncompressed chunk */
-            png_write_tEXt(png_ptr, info_ptr->text[i].key,
-                info_ptr->text[i].text, 0);
-#else
-            png_warning(png_ptr, "Unable to write uncompressed text");
-#endif
-
-            /* Mark this chunk as written */
-            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
-         }
-      }
-#endif
-#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
-   if (info_ptr->unknown_chunks_num)
-   {
-      png_unknown_chunk *up;
-
-      png_debug(5, "writing extra chunks");
-
-      for (up = info_ptr->unknown_chunks;
-           up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
-           up++)
-      {
-         int keep = png_handle_as_unknown(png_ptr, up->name);
-         if (keep != PNG_HANDLE_CHUNK_NEVER &&
-             up->location &&
-             (up->location & PNG_AFTER_IDAT) &&
-             ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
-             (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
-         {
-            png_write_chunk(png_ptr, up->name, up->data, up->size);
-         }
-      }
-   }
-#endif
-   }
-
-   png_ptr->mode |= PNG_AFTER_IDAT;
-
-   /* Write end of PNG file */
-   png_write_IEND(png_ptr);
-   /* This flush, added in libpng-1.0.8, removed from libpng-1.0.9beta03,
-    * and restored again in libpng-1.2.30, may cause some applications that
-    * do not set png_ptr->output_flush_fn to crash.  If your application
-    * experiences a problem, please try building libpng with
-    * PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED defined, and report the event to
-    * png-mng-implement at lists.sf.net .
-    */
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-#  ifdef PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED
-   png_flush(png_ptr);
-#  endif
-#endif
-}
-
-#ifdef PNG_CONVERT_tIME_SUPPORTED
-/* "tm" structure is not supported on WindowsCE */
-void PNGAPI
-png_convert_from_struct_tm(png_timep ptime, PNG_CONST struct tm FAR * ttime)
-{
-   png_debug(1, "in png_convert_from_struct_tm");
-
-   ptime->year = (png_uint_16)(1900 + ttime->tm_year);
-   ptime->month = (png_byte)(ttime->tm_mon + 1);
-   ptime->day = (png_byte)ttime->tm_mday;
-   ptime->hour = (png_byte)ttime->tm_hour;
-   ptime->minute = (png_byte)ttime->tm_min;
-   ptime->second = (png_byte)ttime->tm_sec;
-}
-
-void PNGAPI
-png_convert_from_time_t(png_timep ptime, time_t ttime)
-{
-   struct tm *tbuf;
-
-   png_debug(1, "in png_convert_from_time_t");
-
-   tbuf = gmtime(&ttime);
-   png_convert_from_struct_tm(ptime, tbuf);
-}
-#endif
-
-/* Initialize png_ptr structure, and allocate any memory needed */
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_write_struct,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED)
-{
-#ifdef PNG_USER_MEM_SUPPORTED
-   return (png_create_write_struct_2(user_png_ver, error_ptr, error_fn,
-       warn_fn, NULL, NULL, NULL));
-}
-
-/* Alternate initialize png_ptr structure, and allocate any memory needed */
-static void png_reset_filter_heuristics(png_structp png_ptr); /* forward decl */
-
-PNG_FUNCTION(png_structp,PNGAPI
-png_create_write_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr,
-    png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
-    png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED)
-{
-#endif /* PNG_USER_MEM_SUPPORTED */
-   volatile int png_cleanup_needed = 0;
-#ifdef PNG_SETJMP_SUPPORTED
-   volatile
-#endif
-   png_structp png_ptr;
-#ifdef PNG_SETJMP_SUPPORTED
-#ifdef USE_FAR_KEYWORD
-   jmp_buf tmp_jmpbuf;
-#endif
-#endif
-
-   png_debug(1, "in png_create_write_struct");
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
-       (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr);
-#else
-   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
-#endif /* PNG_USER_MEM_SUPPORTED */
-   if (png_ptr == NULL)
-      return (NULL);
-
-   /* Added at libpng-1.2.6 */
-#ifdef PNG_SET_USER_LIMITS_SUPPORTED
-   png_ptr->user_width_max = PNG_USER_WIDTH_MAX;
-   png_ptr->user_height_max = PNG_USER_HEIGHT_MAX;
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-/* Applications that neglect to set up their own setjmp() and then
- * encounter a png_error() will longjmp here.  Since the jmpbuf is
- * then meaningless we abort instead of returning.
- */
-#ifdef USE_FAR_KEYWORD
-   if (setjmp(tmp_jmpbuf))
-#else
-   if (setjmp(png_jmpbuf(png_ptr))) /* sets longjmp to match setjmp */
-#endif
-#ifdef USE_FAR_KEYWORD
-   png_memcpy(png_jmpbuf(png_ptr), tmp_jmpbuf, png_sizeof(jmp_buf));
-#endif
-      PNG_ABORT();
-#endif
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
-#endif /* PNG_USER_MEM_SUPPORTED */
-   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
-
-   if (!png_user_version_check(png_ptr, user_png_ver))
-      png_cleanup_needed = 1;
-
-   /* Initialize zbuf - compression buffer */
-   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
-
-   if (!png_cleanup_needed)
-   {
-      png_ptr->zbuf = (png_bytep)png_malloc_warn(png_ptr,
-          png_ptr->zbuf_size);
-      if (png_ptr->zbuf == NULL)
-         png_cleanup_needed = 1;
-   }
-
-   if (png_cleanup_needed)
-   {
-       /* Clean up PNG structure and deallocate any memory. */
-       png_free(png_ptr, png_ptr->zbuf);
-       png_ptr->zbuf = NULL;
-#ifdef PNG_USER_MEM_SUPPORTED
-       png_destroy_struct_2((png_voidp)png_ptr,
-           (png_free_ptr)free_fn, (png_voidp)mem_ptr);
-#else
-       png_destroy_struct((png_voidp)png_ptr);
-#endif
-       return (NULL);
-   }
-
-   png_set_write_fn(png_ptr, NULL, NULL, NULL);
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-   png_reset_filter_heuristics(png_ptr);
-#endif
-
-   return (png_ptr);
-}
-
-
-/* Write a few rows of image data.  If the image is interlaced,
- * either you will have to write the 7 sub images, or, if you
- * have called png_set_interlace_handling(), you will have to
- * "write" the image seven times.
- */
-void PNGAPI
-png_write_rows(png_structp png_ptr, png_bytepp row,
-    png_uint_32 num_rows)
-{
-   png_uint_32 i; /* row counter */
-   png_bytepp rp; /* row pointer */
-
-   png_debug(1, "in png_write_rows");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* Loop through the rows */
-   for (i = 0, rp = row; i < num_rows; i++, rp++)
-   {
-      png_write_row(png_ptr, *rp);
-   }
-}
-
-/* Write the image.  You only need to call this function once, even
- * if you are writing an interlaced image.
- */
-void PNGAPI
-png_write_image(png_structp png_ptr, png_bytepp image)
-{
-   png_uint_32 i; /* row index */
-   int pass, num_pass; /* pass variables */
-   png_bytepp rp; /* points to current row */
-
-   if (png_ptr == NULL)
-      return;
-
-   png_debug(1, "in png_write_image");
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Initialize interlace handling.  If image is not interlaced,
-    * this will set pass to 1
-    */
-   num_pass = png_set_interlace_handling(png_ptr);
-#else
-   num_pass = 1;
-#endif
-   /* Loop through passes */
-   for (pass = 0; pass < num_pass; pass++)
-   {
-      /* Loop through image */
-      for (i = 0, rp = image; i < png_ptr->height; i++, rp++)
-      {
-         png_write_row(png_ptr, *rp);
-      }
-   }
-}
-
-/* Called by user to write a row of image data */
-void PNGAPI
-png_write_row(png_structp png_ptr, png_const_bytep row)
-{
-   /* 1.5.6: moved from png_struct to be a local structure: */
-   png_row_info row_info;
-
-   if (png_ptr == NULL)
-      return;
-
-   png_debug2(1, "in png_write_row (row %u, pass %d)",
-      png_ptr->row_number, png_ptr->pass);
-
-   /* Initialize transformations and other stuff if first time */
-   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
-   {
-      /* Make sure we wrote the header info */
-      if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
-         png_error(png_ptr,
-             "png_write_info was never called before png_write_row");
-
-      /* Check for transforms that have been set but were defined out */
-#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED)
-      if (png_ptr->transformations & PNG_INVERT_MONO)
-         png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED)
-      if (png_ptr->transformations & PNG_FILLER)
-         png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined");
-#endif
-#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \
-    defined(PNG_READ_PACKSWAP_SUPPORTED)
-      if (png_ptr->transformations & PNG_PACKSWAP)
-         png_warning(png_ptr,
-             "PNG_WRITE_PACKSWAP_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED)
-      if (png_ptr->transformations & PNG_PACK)
-         png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED)
-      if (png_ptr->transformations & PNG_SHIFT)
-         png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED)
-      if (png_ptr->transformations & PNG_BGR)
-         png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined");
-#endif
-
-#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED)
-      if (png_ptr->transformations & PNG_SWAP_BYTES)
-         png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined");
-#endif
-
-      png_write_start_row(png_ptr);
-   }
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced and not interested in row, return */
-   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
-   {
-      switch (png_ptr->pass)
-      {
-         case 0:
-            if (png_ptr->row_number & 0x07)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 1:
-            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 2:
-            if ((png_ptr->row_number & 0x07) != 4)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 3:
-            if ((png_ptr->row_number & 0x03) || png_ptr->width < 3)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 4:
-            if ((png_ptr->row_number & 0x03) != 2)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 5:
-            if ((png_ptr->row_number & 0x01) || png_ptr->width < 2)
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         case 6:
-            if (!(png_ptr->row_number & 0x01))
-            {
-               png_write_finish_row(png_ptr);
-               return;
-            }
-            break;
-
-         default: /* error: ignore it */
-            break;
-      }
-   }
-#endif
-
-   /* Set up row info for transformations */
-   row_info.color_type = png_ptr->color_type;
-   row_info.width = png_ptr->usr_width;
-   row_info.channels = png_ptr->usr_channels;
-   row_info.bit_depth = png_ptr->usr_bit_depth;
-   row_info.pixel_depth = (png_byte)(row_info.bit_depth * row_info.channels);
-   row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width);
-
-   png_debug1(3, "row_info->color_type = %d", row_info.color_type);
-   png_debug1(3, "row_info->width = %u", row_info.width);
-   png_debug1(3, "row_info->channels = %d", row_info.channels);
-   png_debug1(3, "row_info->bit_depth = %d", row_info.bit_depth);
-   png_debug1(3, "row_info->pixel_depth = %d", row_info.pixel_depth);
-   png_debug1(3, "row_info->rowbytes = %lu", (unsigned long)row_info.rowbytes);
-
-   /* Copy user's row into buffer, leaving room for filter byte. */
-   png_memcpy(png_ptr->row_buf + 1, row, row_info.rowbytes);
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Handle interlacing */
-   if (png_ptr->interlaced && png_ptr->pass < 6 &&
-       (png_ptr->transformations & PNG_INTERLACE))
-   {
-      png_do_write_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass);
-      /* This should always get caught above, but still ... */
-      if (!(row_info.width))
-      {
-         png_write_finish_row(png_ptr);
-         return;
-      }
-   }
-#endif
-
-#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
-   /* Handle other transformations */
-   if (png_ptr->transformations)
-      png_do_write_transformations(png_ptr, &row_info);
-#endif
-
-   /* At this point the row_info pixel depth must match the 'transformed' depth,
-    * which is also the output depth.
-    */
-   if (row_info.pixel_depth != png_ptr->pixel_depth ||
-      row_info.pixel_depth != png_ptr->transformed_pixel_depth)
-      png_error(png_ptr, "internal write transform logic error");
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   /* Write filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not write a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
-       (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
-   {
-      /* Intrapixel differencing */
-      png_do_write_intrapixel(&row_info, png_ptr->row_buf + 1);
-   }
-#endif
-
-/* Added at libpng-1.5.10 */
-#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED
-   /* Check for out-of-range palette index */
-   if(row_info.color_type == PNG_COLOR_TYPE_PALETTE)
-      png_do_check_palette_indexes(png_ptr, &row_info);
-#endif
-
-   /* Find a filter if necessary, filter the row and write it out. */
-   png_write_find_filter(png_ptr, &row_info);
-
-   if (png_ptr->write_row_fn != NULL)
-      (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
-}
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-/* Set the automatic flush interval or 0 to turn flushing off */
-void PNGAPI
-png_set_flush(png_structp png_ptr, int nrows)
-{
-   png_debug(1, "in png_set_flush");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flush_dist = (nrows < 0 ? 0 : nrows);
-}
-
-/* Flush the current output buffers now */
-void PNGAPI
-png_write_flush(png_structp png_ptr)
-{
-   int wrote_IDAT;
-
-   png_debug(1, "in png_write_flush");
-
-   if (png_ptr == NULL)
-      return;
-
-   /* We have already written out all of the data */
-   if (png_ptr->row_number >= png_ptr->num_rows)
-      return;
-
-   do
-   {
-      int ret;
-
-      /* Compress the data */
-      ret = deflate(&png_ptr->zstream, Z_SYNC_FLUSH);
-      wrote_IDAT = 0;
-
-      /* Check for compression errors */
-      if (ret != Z_OK)
-      {
-         if (png_ptr->zstream.msg != NULL)
-            png_error(png_ptr, png_ptr->zstream.msg);
-
-         else
-            png_error(png_ptr, "zlib error");
-      }
-
-      if (!(png_ptr->zstream.avail_out))
-      {
-         /* Write the IDAT and reset the zlib output buffer */
-         png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
-         wrote_IDAT = 1;
-      }
-   } while (wrote_IDAT == 1);
-
-   /* If there is any data left to be output, write it into a new IDAT */
-   if (png_ptr->zbuf_size != png_ptr->zstream.avail_out)
-   {
-      /* Write the IDAT and reset the zlib output buffer */
-      png_write_IDAT(png_ptr, png_ptr->zbuf,
-          png_ptr->zbuf_size - png_ptr->zstream.avail_out);
-   }
-   png_ptr->flush_rows = 0;
-   png_flush(png_ptr);
-}
-#endif /* PNG_WRITE_FLUSH_SUPPORTED */
-
-/* Free all memory used by the write */
-void PNGAPI
-png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
-{
-   png_structp png_ptr = NULL;
-   png_infop info_ptr = NULL;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_free_ptr free_fn = NULL;
-   png_voidp mem_ptr = NULL;
-#endif
-
-   png_debug(1, "in png_destroy_write_struct");
-
-   if (png_ptr_ptr != NULL)
-      png_ptr = *png_ptr_ptr;
-
-#ifdef PNG_USER_MEM_SUPPORTED
-   if (png_ptr != NULL)
-   {
-      free_fn = png_ptr->free_fn;
-      mem_ptr = png_ptr->mem_ptr;
-   }
-#endif
-
-   if (info_ptr_ptr != NULL)
-      info_ptr = *info_ptr_ptr;
-
-   if (info_ptr != NULL)
-   {
-      if (png_ptr != NULL)
-      {
-         png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
-
-#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
-         if (png_ptr->num_chunk_list)
-         {
-            png_free(png_ptr, png_ptr->chunk_list);
-            png_ptr->num_chunk_list = 0;
-         }
-#endif
-      }
-
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
-          (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)info_ptr);
-#endif
-      *info_ptr_ptr = NULL;
-   }
-
-   if (png_ptr != NULL)
-   {
-      png_write_destroy(png_ptr);
-#ifdef PNG_USER_MEM_SUPPORTED
-      png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
-          (png_voidp)mem_ptr);
-#else
-      png_destroy_struct((png_voidp)png_ptr);
-#endif
-      *png_ptr_ptr = NULL;
-   }
-}
-
-
-/* Free any memory used in png_ptr struct (old method) */
-void /* PRIVATE */
-png_write_destroy(png_structp png_ptr)
-{
-#ifdef PNG_SETJMP_SUPPORTED
-   jmp_buf tmp_jmp; /* Save jump buffer */
-#endif
-   png_error_ptr error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_error_ptr warning_fn;
-#endif
-   png_voidp error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_free_ptr free_fn;
-#endif
-
-   png_debug(1, "in png_write_destroy");
-
-   /* Free any memory zlib uses */
-   if (png_ptr->zlib_state != PNG_ZLIB_UNINITIALIZED)
-      deflateEnd(&png_ptr->zstream);
-
-   /* Free our memory.  png_free checks NULL for us. */
-   png_free(png_ptr, png_ptr->zbuf);
-   png_free(png_ptr, png_ptr->row_buf);
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   png_free(png_ptr, png_ptr->prev_row);
-   png_free(png_ptr, png_ptr->sub_row);
-   png_free(png_ptr, png_ptr->up_row);
-   png_free(png_ptr, png_ptr->avg_row);
-   png_free(png_ptr, png_ptr->paeth_row);
-#endif
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-   /* Use this to save a little code space, it doesn't free the filter_costs */
-   png_reset_filter_heuristics(png_ptr);
-   png_free(png_ptr, png_ptr->filter_costs);
-   png_free(png_ptr, png_ptr->inv_filter_costs);
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   /* Reset structure */
-   png_memcpy(tmp_jmp, png_ptr->longjmp_buffer, png_sizeof(jmp_buf));
-#endif
-
-   error_fn = png_ptr->error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   warning_fn = png_ptr->warning_fn;
-#endif
-   error_ptr = png_ptr->error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   free_fn = png_ptr->free_fn;
-#endif
-
-   png_memset(png_ptr, 0, png_sizeof(png_struct));
-
-   png_ptr->error_fn = error_fn;
-#ifdef PNG_WARNINGS_SUPPORTED
-   png_ptr->warning_fn = warning_fn;
-#endif
-   png_ptr->error_ptr = error_ptr;
-#ifdef PNG_USER_MEM_SUPPORTED
-   png_ptr->free_fn = free_fn;
-#endif
-
-#ifdef PNG_SETJMP_SUPPORTED
-   png_memcpy(png_ptr->longjmp_buffer, tmp_jmp, png_sizeof(jmp_buf));
-#endif
-}
-
-/* Allow the application to select one or more row filters to use. */
-void PNGAPI
-png_set_filter(png_structp png_ptr, int method, int filters)
-{
-   png_debug(1, "in png_set_filter");
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
-       (method == PNG_INTRAPIXEL_DIFFERENCING))
-      method = PNG_FILTER_TYPE_BASE;
-
-#endif
-   if (method == PNG_FILTER_TYPE_BASE)
-   {
-      switch (filters & (PNG_ALL_FILTERS | 0x07))
-      {
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-         case 5:
-         case 6:
-         case 7: png_warning(png_ptr, "Unknown row filter for method 0");
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-         case PNG_FILTER_VALUE_NONE:
-            png_ptr->do_filter = PNG_FILTER_NONE; break;
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-         case PNG_FILTER_VALUE_SUB:
-            png_ptr->do_filter = PNG_FILTER_SUB; break;
-
-         case PNG_FILTER_VALUE_UP:
-            png_ptr->do_filter = PNG_FILTER_UP; break;
-
-         case PNG_FILTER_VALUE_AVG:
-            png_ptr->do_filter = PNG_FILTER_AVG; break;
-
-         case PNG_FILTER_VALUE_PAETH:
-            png_ptr->do_filter = PNG_FILTER_PAETH; break;
-
-         default:
-            png_ptr->do_filter = (png_byte)filters; break;
-#else
-         default:
-            png_warning(png_ptr, "Unknown row filter for method 0");
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-      }
-
-      /* If we have allocated the row_buf, this means we have already started
-       * with the image and we should have allocated all of the filter buffers
-       * that have been selected.  If prev_row isn't already allocated, then
-       * it is too late to start using the filters that need it, since we
-       * will be missing the data in the previous row.  If an application
-       * wants to start and stop using particular filters during compression,
-       * it should start out with all of the filters, and then add and
-       * remove them after the start of compression.
-       */
-      if (png_ptr->row_buf != NULL)
-      {
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-         if ((png_ptr->do_filter & PNG_FILTER_SUB) && png_ptr->sub_row == NULL)
-         {
-            png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
-                (png_ptr->rowbytes + 1));
-            png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
-         }
-
-         if ((png_ptr->do_filter & PNG_FILTER_UP) && png_ptr->up_row == NULL)
-         {
-            if (png_ptr->prev_row == NULL)
-            {
-               png_warning(png_ptr, "Can't add Up filter after starting");
-               png_ptr->do_filter = (png_byte)(png_ptr->do_filter &
-                   ~PNG_FILTER_UP);
-            }
-
-            else
-            {
-               png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
-                   (png_ptr->rowbytes + 1));
-               png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
-            }
-         }
-
-         if ((png_ptr->do_filter & PNG_FILTER_AVG) && png_ptr->avg_row == NULL)
-         {
-            if (png_ptr->prev_row == NULL)
-            {
-               png_warning(png_ptr, "Can't add Average filter after starting");
-               png_ptr->do_filter = (png_byte)(png_ptr->do_filter &
-                   ~PNG_FILTER_AVG);
-            }
-
-            else
-            {
-               png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
-                   (png_ptr->rowbytes + 1));
-               png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
-            }
-         }
-
-         if ((png_ptr->do_filter & PNG_FILTER_PAETH) &&
-             png_ptr->paeth_row == NULL)
-         {
-            if (png_ptr->prev_row == NULL)
-            {
-               png_warning(png_ptr, "Can't add Paeth filter after starting");
-               png_ptr->do_filter &= (png_byte)(~PNG_FILTER_PAETH);
-            }
-
-            else
-            {
-               png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
-                   (png_ptr->rowbytes + 1));
-               png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
-            }
-         }
-
-         if (png_ptr->do_filter == PNG_NO_FILTERS)
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-            png_ptr->do_filter = PNG_FILTER_NONE;
-      }
-   }
-   else
-      png_error(png_ptr, "Unknown custom filter method");
-}
-
-/* This allows us to influence the way in which libpng chooses the "best"
- * filter for the current scanline.  While the "minimum-sum-of-absolute-
- * differences metric is relatively fast and effective, there is some
- * question as to whether it can be improved upon by trying to keep the
- * filtered data going to zlib more consistent, hopefully resulting in
- * better compression.
- */
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED      /* GRR 970116 */
-/* Convenience reset API. */
-static void
-png_reset_filter_heuristics(png_structp png_ptr)
-{
-   /* Clear out any old values in the 'weights' - this must be done because if
-    * the app calls set_filter_heuristics multiple times with different
-    * 'num_weights' values we would otherwise potentially have wrong sized
-    * arrays.
-    */
-   png_ptr->num_prev_filters = 0;
-   png_ptr->heuristic_method = PNG_FILTER_HEURISTIC_UNWEIGHTED;
-   if (png_ptr->prev_filters != NULL)
-   {
-      png_bytep old = png_ptr->prev_filters;
-      png_ptr->prev_filters = NULL;
-      png_free(png_ptr, old);
-   }
-   if (png_ptr->filter_weights != NULL)
-   {
-      png_uint_16p old = png_ptr->filter_weights;
-      png_ptr->filter_weights = NULL;
-      png_free(png_ptr, old);
-   }
-
-   if (png_ptr->inv_filter_weights != NULL)
-   {
-      png_uint_16p old = png_ptr->inv_filter_weights;
-      png_ptr->inv_filter_weights = NULL;
-      png_free(png_ptr, old);
-   }
-
-   /* Leave the filter_costs - this array is fixed size. */
-}
-
-static int
-png_init_filter_heuristics(png_structp png_ptr, int heuristic_method,
-   int num_weights)
-{
-   if (png_ptr == NULL)
-      return 0;
-
-   /* Clear out the arrays */
-   png_reset_filter_heuristics(png_ptr);
-
-   /* Check arguments; the 'reset' function makes the correct settings for the
-    * unweighted case, but we must handle the weight case by initializing the
-    * arrays for the caller.
-    */
-   if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-   {
-      int i;
-
-      if (num_weights > 0)
-      {
-         png_ptr->prev_filters = (png_bytep)png_malloc(png_ptr,
-             (png_uint_32)(png_sizeof(png_byte) * num_weights));
-
-         /* To make sure that the weighting starts out fairly */
-         for (i = 0; i < num_weights; i++)
-         {
-            png_ptr->prev_filters[i] = 255;
-         }
-
-         png_ptr->filter_weights = (png_uint_16p)png_malloc(png_ptr,
-             (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
-
-         png_ptr->inv_filter_weights = (png_uint_16p)png_malloc(png_ptr,
-             (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
-
-         for (i = 0; i < num_weights; i++)
-         {
-            png_ptr->inv_filter_weights[i] =
-            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
-         }
-
-         /* Safe to set this now */
-         png_ptr->num_prev_filters = (png_byte)num_weights;
-      }
-
-      /* If, in the future, there are other filter methods, this would
-       * need to be based on png_ptr->filter.
-       */
-      if (png_ptr->filter_costs == NULL)
-      {
-         png_ptr->filter_costs = (png_uint_16p)png_malloc(png_ptr,
-             (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
-
-         png_ptr->inv_filter_costs = (png_uint_16p)png_malloc(png_ptr,
-             (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
-      }
-
-      for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
-      {
-         png_ptr->inv_filter_costs[i] =
-         png_ptr->filter_costs[i] = PNG_COST_FACTOR;
-      }
-
-      /* All the arrays are inited, safe to set this: */
-      png_ptr->heuristic_method = PNG_FILTER_HEURISTIC_WEIGHTED;
-
-      /* Return the 'ok' code. */
-      return 1;
-   }
-   else if (heuristic_method == PNG_FILTER_HEURISTIC_DEFAULT ||
-      heuristic_method == PNG_FILTER_HEURISTIC_UNWEIGHTED)
-   {
-      return 1;
-   }
-   else
-   {
-      png_warning(png_ptr, "Unknown filter heuristic method");
-      return 0;
-   }
-}
-
-/* Provide floating and fixed point APIs */
-#ifdef PNG_FLOATING_POINT_SUPPORTED
-void PNGAPI
-png_set_filter_heuristics(png_structp png_ptr, int heuristic_method,
-    int num_weights, png_const_doublep filter_weights,
-    png_const_doublep filter_costs)
-{
-   png_debug(1, "in png_set_filter_heuristics");
-
-   /* The internal API allocates all the arrays and ensures that the elements of
-    * those arrays are set to the default value.
-    */
-   if (!png_init_filter_heuristics(png_ptr, heuristic_method, num_weights))
-      return;
-
-   /* If using the weighted method copy in the weights. */
-   if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-   {
-      int i;
-      for (i = 0; i < num_weights; i++)
-      {
-         if (filter_weights[i] <= 0.0)
-         {
-            png_ptr->inv_filter_weights[i] =
-            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
-         }
-
-         else
-         {
-            png_ptr->inv_filter_weights[i] =
-                (png_uint_16)(PNG_WEIGHT_FACTOR*filter_weights[i]+.5);
-
-            png_ptr->filter_weights[i] =
-                (png_uint_16)(PNG_WEIGHT_FACTOR/filter_weights[i]+.5);
-         }
-      }
-
-      /* Here is where we set the relative costs of the different filters.  We
-       * should take the desired compression level into account when setting
-       * the costs, so that Paeth, for instance, has a high relative cost at low
-       * compression levels, while it has a lower relative cost at higher
-       * compression settings.  The filter types are in order of increasing
-       * relative cost, so it would be possible to do this with an algorithm.
-       */
-      for (i = 0; i < PNG_FILTER_VALUE_LAST; i++) if (filter_costs[i] >= 1.0)
-      {
-         png_ptr->inv_filter_costs[i] =
-             (png_uint_16)(PNG_COST_FACTOR / filter_costs[i] + .5);
-
-         png_ptr->filter_costs[i] =
-             (png_uint_16)(PNG_COST_FACTOR * filter_costs[i] + .5);
-      }
-   }
-}
-#endif /* FLOATING_POINT */
-
-#ifdef PNG_FIXED_POINT_SUPPORTED
-void PNGAPI
-png_set_filter_heuristics_fixed(png_structp png_ptr, int heuristic_method,
-    int num_weights, png_const_fixed_point_p filter_weights,
-    png_const_fixed_point_p filter_costs)
-{
-   png_debug(1, "in png_set_filter_heuristics_fixed");
-
-   /* The internal API allocates all the arrays and ensures that the elements of
-    * those arrays are set to the default value.
-    */
-   if (!png_init_filter_heuristics(png_ptr, heuristic_method, num_weights))
-      return;
-
-   /* If using the weighted method copy in the weights. */
-   if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-   {
-      int i;
-      for (i = 0; i < num_weights; i++)
-      {
-         if (filter_weights[i] <= 0)
-         {
-            png_ptr->inv_filter_weights[i] =
-            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
-         }
-
-         else
-         {
-            png_ptr->inv_filter_weights[i] = (png_uint_16)
-               ((PNG_WEIGHT_FACTOR*filter_weights[i]+PNG_FP_HALF)/PNG_FP_1);
-
-            png_ptr->filter_weights[i] = (png_uint_16)((PNG_WEIGHT_FACTOR*
-               PNG_FP_1+(filter_weights[i]/2))/filter_weights[i]);
-         }
-      }
-
-      /* Here is where we set the relative costs of the different filters.  We
-       * should take the desired compression level into account when setting
-       * the costs, so that Paeth, for instance, has a high relative cost at low
-       * compression levels, while it has a lower relative cost at higher
-       * compression settings.  The filter types are in order of increasing
-       * relative cost, so it would be possible to do this with an algorithm.
-       */
-      for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
-         if (filter_costs[i] >= PNG_FP_1)
-      {
-         png_uint_32 tmp;
-
-         /* Use a 32 bit unsigned temporary here because otherwise the
-          * intermediate value will be a 32 bit *signed* integer (ANSI rules)
-          * and this will get the wrong answer on division.
-          */
-         tmp = PNG_COST_FACTOR*PNG_FP_1 + (filter_costs[i]/2);
-         tmp /= filter_costs[i];
-
-         png_ptr->inv_filter_costs[i] = (png_uint_16)tmp;
-
-         tmp = PNG_COST_FACTOR * filter_costs[i] + PNG_FP_HALF;
-         tmp /= PNG_FP_1;
-
-         png_ptr->filter_costs[i] = (png_uint_16)tmp;
-      }
-   }
-}
-#endif /* FIXED_POINT */
-#endif /* PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
-
-void PNGAPI
-png_set_compression_level(png_structp png_ptr, int level)
-{
-   png_debug(1, "in png_set_compression_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_LEVEL;
-   png_ptr->zlib_level = level;
-}
-
-void PNGAPI
-png_set_compression_mem_level(png_structp png_ptr, int mem_level)
-{
-   png_debug(1, "in png_set_compression_mem_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL;
-   png_ptr->zlib_mem_level = mem_level;
-}
-
-void PNGAPI
-png_set_compression_strategy(png_structp png_ptr, int strategy)
-{
-   png_debug(1, "in png_set_compression_strategy");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY;
-   png_ptr->zlib_strategy = strategy;
-}
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-void PNGAPI
-png_set_compression_window_bits(png_structp png_ptr, int window_bits)
-{
-   if (png_ptr == NULL)
-      return;
-
-   if (window_bits > 15)
-      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
-
-   else if (window_bits < 8)
-      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
-
-#ifndef WBITS_8_OK
-   /* Avoid libpng bug with 256-byte windows */
-   if (window_bits == 8)
-      {
-        png_warning(png_ptr, "Compression window is being reset to 512");
-        window_bits = 9;
-      }
-
-#endif
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS;
-   png_ptr->zlib_window_bits = window_bits;
-}
-
-void PNGAPI
-png_set_compression_method(png_structp png_ptr, int method)
-{
-   png_debug(1, "in png_set_compression_method");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (method != 8)
-      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
-
-   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_METHOD;
-   png_ptr->zlib_method = method;
-}
-
-/* The following were added to libpng-1.5.4 */
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-void PNGAPI
-png_set_text_compression_level(png_structp png_ptr, int level)
-{
-   png_debug(1, "in png_set_text_compression_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_LEVEL;
-   png_ptr->zlib_text_level = level;
-}
-
-void PNGAPI
-png_set_text_compression_mem_level(png_structp png_ptr, int mem_level)
-{
-   png_debug(1, "in png_set_text_compression_mem_level");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL;
-   png_ptr->zlib_text_mem_level = mem_level;
-}
-
-void PNGAPI
-png_set_text_compression_strategy(png_structp png_ptr, int strategy)
-{
-   png_debug(1, "in png_set_text_compression_strategy");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_STRATEGY;
-   png_ptr->zlib_text_strategy = strategy;
-}
-
-/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a
- * smaller value of window_bits if it can do so safely.
- */
-void PNGAPI
-png_set_text_compression_window_bits(png_structp png_ptr, int window_bits)
-{
-   if (png_ptr == NULL)
-      return;
-
-   if (window_bits > 15)
-      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
-
-   else if (window_bits < 8)
-      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
-
-#ifndef WBITS_8_OK
-   /* Avoid libpng bug with 256-byte windows */
-   if (window_bits == 8)
-      {
-        png_warning(png_ptr, "Text compression window is being reset to 512");
-        window_bits = 9;
-      }
-
-#endif
-   png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS;
-   png_ptr->zlib_text_window_bits = window_bits;
-}
-
-void PNGAPI
-png_set_text_compression_method(png_structp png_ptr, int method)
-{
-   png_debug(1, "in png_set_text_compression_method");
-
-   if (png_ptr == NULL)
-      return;
-
-   if (method != 8)
-      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
-
-   png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_METHOD;
-   png_ptr->zlib_text_method = method;
-}
-#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */
-/* end of API added to libpng-1.5.4 */
-
-void PNGAPI
-png_set_write_status_fn(png_structp png_ptr, png_write_status_ptr write_row_fn)
-{
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->write_row_fn = write_row_fn;
-}
-
-#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
-void PNGAPI
-png_set_write_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
-    write_user_transform_fn)
-{
-   png_debug(1, "in png_set_write_user_transform_fn");
-
-   if (png_ptr == NULL)
-      return;
-
-   png_ptr->transformations |= PNG_USER_TRANSFORM;
-   png_ptr->write_user_transform_fn = write_user_transform_fn;
-}
-#endif
-
-
-#ifdef PNG_INFO_IMAGE_SUPPORTED
-void PNGAPI
-png_write_png(png_structp png_ptr, png_infop info_ptr,
-    int transforms, voidp params)
-{
-   if (png_ptr == NULL || info_ptr == NULL)
-      return;
-
-   /* Write the file header information. */
-   png_write_info(png_ptr, info_ptr);
-
-   /* ------ these transformations don't touch the info structure ------- */
-
-#ifdef PNG_WRITE_INVERT_SUPPORTED
-   /* Invert monochrome pixels */
-   if (transforms & PNG_TRANSFORM_INVERT_MONO)
-      png_set_invert_mono(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_SHIFT_SUPPORTED
-   /* Shift the pixels up to a legal bit depth and fill in
-    * as appropriate to correctly scale the image.
-    */
-   if ((transforms & PNG_TRANSFORM_SHIFT)
-       && (info_ptr->valid & PNG_INFO_sBIT))
-      png_set_shift(png_ptr, &info_ptr->sig_bit);
-#endif
-
-#ifdef PNG_WRITE_PACK_SUPPORTED
-   /* Pack pixels into bytes */
-   if (transforms & PNG_TRANSFORM_PACKING)
-       png_set_packing(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED
-   /* Swap location of alpha bytes from ARGB to RGBA */
-   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
-      png_set_swap_alpha(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_FILLER_SUPPORTED
-   /* Pack XRGB/RGBX/ARGB/RGBA into RGB (4 channels -> 3 channels) */
-   if (transforms & PNG_TRANSFORM_STRIP_FILLER_AFTER)
-      png_set_filler(png_ptr, 0, PNG_FILLER_AFTER);
-
-   else if (transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE)
-      png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE);
-#endif
-
-#ifdef PNG_WRITE_BGR_SUPPORTED
-   /* Flip BGR pixels to RGB */
-   if (transforms & PNG_TRANSFORM_BGR)
-      png_set_bgr(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_SWAP_SUPPORTED
-   /* Swap bytes of 16-bit files to most significant byte first */
-   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
-      png_set_swap(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_PACKSWAP_SUPPORTED
-   /* Swap bits of 1, 2, 4 bit packed pixel formats */
-   if (transforms & PNG_TRANSFORM_PACKSWAP)
-      png_set_packswap(png_ptr);
-#endif
-
-#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED
-   /* Invert the alpha channel from opacity to transparency */
-   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
-      png_set_invert_alpha(png_ptr);
-#endif
-
-   /* ----------------------- end of transformations ------------------- */
-
-   /* Write the bits */
-   if (info_ptr->valid & PNG_INFO_IDAT)
-       png_write_image(png_ptr, info_ptr->row_pointers);
-
-   /* It is REQUIRED to call this to finish writing the rest of the file */
-   png_write_end(png_ptr, info_ptr);
-
-   PNG_UNUSED(transforms)   /* Quiet compiler warnings */
-   PNG_UNUSED(params)
-}
-#endif
-#endif /* PNG_WRITE_SUPPORTED */
diff --git a/reg-io/png/lpng1510/pngwutil.c b/reg-io/png/lpng1510/pngwutil.c
deleted file mode 100644
index c289e66f..00000000
--- a/reg-io/png/lpng1510/pngwutil.c
+++ /dev/null
@@ -1,3179 +0,0 @@
-
-/* pngwutil.c - utilities to write a PNG file
- *
- * Last changed in libpng 1.5.10 [March 8, 2012]
- * Copyright (c) 1998-2012 Glenn Randers-Pehrson
- * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
- * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "pngpriv.h"
-
-#ifdef PNG_WRITE_SUPPORTED
-
-#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED
-/* Place a 32-bit number into a buffer in PNG byte order.  We work
- * with unsigned numbers for convenience, although one supported
- * ancillary chunk uses signed (two's complement) numbers.
- */
-void PNGAPI
-png_save_uint_32(png_bytep buf, png_uint_32 i)
-{
-   buf[0] = (png_byte)((i >> 24) & 0xff);
-   buf[1] = (png_byte)((i >> 16) & 0xff);
-   buf[2] = (png_byte)((i >> 8) & 0xff);
-   buf[3] = (png_byte)(i & 0xff);
-}
-
-#ifdef PNG_SAVE_INT_32_SUPPORTED
-/* The png_save_int_32 function assumes integers are stored in two's
- * complement format.  If this isn't the case, then this routine needs to
- * be modified to write data in two's complement format.  Note that,
- * the following works correctly even if png_int_32 has more than 32 bits
- * (compare the more complex code required on read for sign extention.)
- */
-void PNGAPI
-png_save_int_32(png_bytep buf, png_int_32 i)
-{
-   buf[0] = (png_byte)((i >> 24) & 0xff);
-   buf[1] = (png_byte)((i >> 16) & 0xff);
-   buf[2] = (png_byte)((i >> 8) & 0xff);
-   buf[3] = (png_byte)(i & 0xff);
-}
-#endif
-
-/* Place a 16-bit number into a buffer in PNG byte order.
- * The parameter is declared unsigned int, not png_uint_16,
- * just to avoid potential problems on pre-ANSI C compilers.
- */
-void PNGAPI
-png_save_uint_16(png_bytep buf, unsigned int i)
-{
-   buf[0] = (png_byte)((i >> 8) & 0xff);
-   buf[1] = (png_byte)(i & 0xff);
-}
-#endif
-
-/* Simple function to write the signature.  If we have already written
- * the magic bytes of the signature, or more likely, the PNG stream is
- * being embedded into another stream and doesn't need its own signature,
- * we should call png_set_sig_bytes() to tell libpng how many of the
- * bytes have already been written.
- */
-void PNGAPI
-png_write_sig(png_structp png_ptr)
-{
-   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the signature is being written */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_SIGNATURE;
-#endif
-
-   /* Write the rest of the 8 byte signature */
-   png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes],
-      (png_size_t)(8 - png_ptr->sig_bytes));
-
-   if (png_ptr->sig_bytes < 3)
-      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
-}
-
-/* Write the start of a PNG chunk.  The type is the chunk type.
- * The total_length is the sum of the lengths of all the data you will be
- * passing in png_write_chunk_data().
- */
-static void
-png_write_chunk_header(png_structp png_ptr, png_uint_32 chunk_name,
-    png_uint_32 length)
-{
-   png_byte buf[8];
-
-#if defined(PNG_DEBUG) && (PNG_DEBUG > 0)
-   PNG_CSTRING_FROM_CHUNK(buf, chunk_name);
-   png_debug2(0, "Writing %s chunk, length = %lu", buf, (unsigned long)length);
-#endif
-
-   if (png_ptr == NULL)
-      return;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the chunk header is being written.
-    * PNG_IO_CHUNK_HDR requires a single I/O call.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_HDR;
-#endif
-
-   /* Write the length and the chunk name */
-   png_save_uint_32(buf, length);
-   png_save_uint_32(buf + 4, chunk_name);
-   png_write_data(png_ptr, buf, 8);
-
-   /* Put the chunk name into png_ptr->chunk_name */
-   png_ptr->chunk_name = chunk_name;
-
-   /* Reset the crc and run it over the chunk name */
-   png_reset_crc(png_ptr);
-
-   png_calculate_crc(png_ptr, buf + 4, 4);
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that chunk data will (possibly) be written.
-    * PNG_IO_CHUNK_DATA does NOT require a specific number of I/O calls.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_DATA;
-#endif
-}
-
-void PNGAPI
-png_write_chunk_start(png_structp png_ptr, png_const_bytep chunk_string,
-    png_uint_32 length)
-{
-   png_write_chunk_header(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), length);
-}
-
-/* Write the data of a PNG chunk started with png_write_chunk_header().
- * Note that multiple calls to this function are allowed, and that the
- * sum of the lengths from these calls *must* add up to the total_length
- * given to png_write_chunk_header().
- */
-void PNGAPI
-png_write_chunk_data(png_structp png_ptr, png_const_bytep data,
-    png_size_t length)
-{
-   /* Write the data, and run the CRC over it */
-   if (png_ptr == NULL)
-      return;
-
-   if (data != NULL && length > 0)
-   {
-      png_write_data(png_ptr, data, length);
-
-      /* Update the CRC after writing the data,
-       * in case that the user I/O routine alters it.
-       */
-      png_calculate_crc(png_ptr, data, length);
-   }
-}
-
-/* Finish a chunk started with png_write_chunk_header(). */
-void PNGAPI
-png_write_chunk_end(png_structp png_ptr)
-{
-   png_byte buf[4];
-
-   if (png_ptr == NULL) return;
-
-#ifdef PNG_IO_STATE_SUPPORTED
-   /* Inform the I/O callback that the chunk CRC is being written.
-    * PNG_IO_CHUNK_CRC requires a single I/O function call.
-    */
-   png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_CRC;
-#endif
-
-   /* Write the crc in a single operation */
-   png_save_uint_32(buf, png_ptr->crc);
-
-   png_write_data(png_ptr, buf, (png_size_t)4);
-}
-
-/* Write a PNG chunk all at once.  The type is an array of ASCII characters
- * representing the chunk name.  The array must be at least 4 bytes in
- * length, and does not need to be null terminated.  To be safe, pass the
- * pre-defined chunk names here, and if you need a new one, define it
- * where the others are defined.  The length is the length of the data.
- * All the data must be present.  If that is not possible, use the
- * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end()
- * functions instead.
- */
-static void
-png_write_complete_chunk(png_structp png_ptr, png_uint_32 chunk_name,
-   png_const_bytep data, png_size_t length)
-{
-   if (png_ptr == NULL)
-      return;
-
-   /* On 64 bit architectures 'length' may not fit in a png_uint_32. */
-   if (length > PNG_UINT_32_MAX)
-      png_error(png_ptr, "length exceeds PNG maxima");
-
-   png_write_chunk_header(png_ptr, chunk_name, (png_uint_32)length);
-   png_write_chunk_data(png_ptr, data, length);
-   png_write_chunk_end(png_ptr);
-}
-
-/* This is the API that calls the internal function above. */
-void PNGAPI
-png_write_chunk(png_structp png_ptr, png_const_bytep chunk_string,
-   png_const_bytep data, png_size_t length)
-{
-   png_write_complete_chunk(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), data,
-      length);
-}
-
-/* Initialize the compressor for the appropriate type of compression. */
-static void
-png_zlib_claim(png_structp png_ptr, png_uint_32 state)
-{
-   if (!(png_ptr->zlib_state & PNG_ZLIB_IN_USE))
-   {
-      /* If already initialized for 'state' do not re-init. */
-      if (png_ptr->zlib_state != state)
-      {
-         int ret = Z_OK;
-         png_const_charp who = "-";
-
-         /* If actually initialized for another state do a deflateEnd. */
-         if (png_ptr->zlib_state != PNG_ZLIB_UNINITIALIZED)
-         {
-            ret = deflateEnd(&png_ptr->zstream);
-            who = "end";
-            png_ptr->zlib_state = PNG_ZLIB_UNINITIALIZED;
-         }
-
-         /* zlib itself detects an incomplete state on deflateEnd */
-         if (ret == Z_OK) switch (state)
-         {
-#           ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-               case PNG_ZLIB_FOR_TEXT:
-                  ret = deflateInit2(&png_ptr->zstream,
-                     png_ptr->zlib_text_level, png_ptr->zlib_text_method,
-                     png_ptr->zlib_text_window_bits,
-                     png_ptr->zlib_text_mem_level, png_ptr->zlib_text_strategy);
-                  who = "text";
-                  break;
-#           endif
-
-            case PNG_ZLIB_FOR_IDAT:
-               ret = deflateInit2(&png_ptr->zstream, png_ptr->zlib_level,
-                   png_ptr->zlib_method, png_ptr->zlib_window_bits,
-                   png_ptr->zlib_mem_level, png_ptr->zlib_strategy);
-               who = "IDAT";
-               break;
-
-            default:
-               png_error(png_ptr, "invalid zlib state");
-         }
-
-         if (ret == Z_OK)
-            png_ptr->zlib_state = state;
-
-         else /* an error in deflateEnd or deflateInit2 */
-         {
-            size_t pos = 0;
-            char msg[64];
-
-            pos = png_safecat(msg, sizeof msg, pos,
-               "zlib failed to initialize compressor (");
-            pos = png_safecat(msg, sizeof msg, pos, who);
-
-            switch (ret)
-            {
-               case Z_VERSION_ERROR:
-                  pos = png_safecat(msg, sizeof msg, pos, ") version error");
-                  break;
-
-               case Z_STREAM_ERROR:
-                  pos = png_safecat(msg, sizeof msg, pos, ") stream error");
-                  break;
-
-               case Z_MEM_ERROR:
-                  pos = png_safecat(msg, sizeof msg, pos, ") memory error");
-                  break;
-
-               default:
-                  pos = png_safecat(msg, sizeof msg, pos, ") unknown error");
-                  break;
-            }
-
-            png_error(png_ptr, msg);
-         }
-      }
-
-      /* Here on success, claim the zstream: */
-      png_ptr->zlib_state |= PNG_ZLIB_IN_USE;
-   }
-
-   else
-      png_error(png_ptr, "zstream already in use (internal error)");
-}
-
-/* The opposite: release the stream.  It is also reset, this API will warn on
- * error but will not fail.
- */
-static void
-png_zlib_release(png_structp png_ptr)
-{
-   if (png_ptr->zlib_state & PNG_ZLIB_IN_USE)
-   {
-      int ret = deflateReset(&png_ptr->zstream);
-
-      png_ptr->zlib_state &= ~PNG_ZLIB_IN_USE;
-
-      if (ret != Z_OK)
-      {
-         png_const_charp err;
-         PNG_WARNING_PARAMETERS(p)
-
-         switch (ret)
-         {
-            case Z_VERSION_ERROR:
-               err = "version";
-               break;
-
-            case Z_STREAM_ERROR:
-               err = "stream";
-               break;
-
-            case Z_MEM_ERROR:
-               err = "memory";
-               break;
-
-            default:
-               err = "unknown";
-               break;
-         }
-
-         png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d, ret);
-         png_warning_parameter(p, 2, err);
-
-         if (png_ptr->zstream.msg)
-            err = png_ptr->zstream.msg;
-         else
-            err = "[no zlib message]";
-
-         png_warning_parameter(p, 3, err);
-
-         png_formatted_warning(png_ptr, p,
-            "zlib failed to reset compressor: @1(@2): @3");
-      }
-   }
-
-   else
-      png_warning(png_ptr, "zstream not in use (internal error)");
-}
-
-#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-/* This pair of functions encapsulates the operation of (a) compressing a
- * text string, and (b) issuing it later as a series of chunk data writes.
- * The compression_state structure is shared context for these functions
- * set up by the caller in order to make the whole mess thread-safe.
- */
-
-typedef struct
-{
-   png_const_bytep input;   /* The uncompressed input data */
-   png_size_t input_len;    /* Its length */
-   int num_output_ptr;      /* Number of output pointers used */
-   int max_output_ptr;      /* Size of output_ptr */
-   png_bytep *output_ptr;   /* Array of pointers to output */
-} compression_state;
-
-/* Compress given text into storage in the png_ptr structure */
-static int /* PRIVATE */
-png_text_compress(png_structp png_ptr,
-    png_const_charp text, png_size_t text_len, int compression,
-    compression_state *comp)
-{
-   int ret;
-
-   comp->num_output_ptr = 0;
-   comp->max_output_ptr = 0;
-   comp->output_ptr = NULL;
-   comp->input = NULL;
-   comp->input_len = text_len;
-
-   /* We may just want to pass the text right through */
-   if (compression == PNG_TEXT_COMPRESSION_NONE)
-   {
-      comp->input = (png_const_bytep)text;
-      return((int)text_len);
-   }
-
-   if (compression >= PNG_TEXT_COMPRESSION_LAST)
-   {
-      PNG_WARNING_PARAMETERS(p)
-
-      png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d,
-         compression);
-      png_formatted_warning(png_ptr, p, "Unknown compression type @1");
-   }
-
-   /* We can't write the chunk until we find out how much data we have,
-    * which means we need to run the compressor first and save the
-    * output.  This shouldn't be a problem, as the vast majority of
-    * comments should be reasonable, but we will set up an array of
-    * malloc'd pointers to be sure.
-    *
-    * If we knew the application was well behaved, we could simplify this
-    * greatly by assuming we can always malloc an output buffer large
-    * enough to hold the compressed text ((1001 * text_len / 1000) + 12)
-    * and malloc this directly.  The only time this would be a bad idea is
-    * if we can't malloc more than 64K and we have 64K of random input
-    * data, or if the input string is incredibly large (although this
-    * wouldn't cause a failure, just a slowdown due to swapping).
-    */
-   png_zlib_claim(png_ptr, PNG_ZLIB_FOR_TEXT);
-
-   /* Set up the compression buffers */
-   /* TODO: the following cast hides a potential overflow problem. */
-   png_ptr->zstream.avail_in = (uInt)text_len;
-
-   /* NOTE: assume zlib doesn't overwrite the input */
-   png_ptr->zstream.next_in = (Bytef *)text;
-   png_ptr->zstream.avail_out = png_ptr->zbuf_size;
-   png_ptr->zstream.next_out = png_ptr->zbuf;
-
-   /* This is the same compression loop as in png_write_row() */
-   do
-   {
-      /* Compress the data */
-      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
-
-      if (ret != Z_OK)
-      {
-         /* Error */
-         if (png_ptr->zstream.msg != NULL)
-            png_error(png_ptr, png_ptr->zstream.msg);
-
-         else
-            png_error(png_ptr, "zlib error");
-      }
-
-      /* Check to see if we need more room */
-      if (!(png_ptr->zstream.avail_out))
-      {
-         /* Make sure the output array has room */
-         if (comp->num_output_ptr >= comp->max_output_ptr)
-         {
-            int old_max;
-
-            old_max = comp->max_output_ptr;
-            comp->max_output_ptr = comp->num_output_ptr + 4;
-            if (comp->output_ptr != NULL)
-            {
-               png_bytepp old_ptr;
-
-               old_ptr = comp->output_ptr;
-
-               comp->output_ptr = (png_bytepp)png_malloc(png_ptr,
-                   (png_alloc_size_t)
-                   (comp->max_output_ptr * png_sizeof(png_charpp)));
-
-               png_memcpy(comp->output_ptr, old_ptr, old_max
-                   * png_sizeof(png_charp));
-
-               png_free(png_ptr, old_ptr);
-            }
-            else
-               comp->output_ptr = (png_bytepp)png_malloc(png_ptr,
-                   (png_alloc_size_t)
-                   (comp->max_output_ptr * png_sizeof(png_charp)));
-         }
-
-         /* Save the data */
-         comp->output_ptr[comp->num_output_ptr] =
-             (png_bytep)png_malloc(png_ptr,
-             (png_alloc_size_t)png_ptr->zbuf_size);
-
-         png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
-             png_ptr->zbuf_size);
-
-         comp->num_output_ptr++;
-
-         /* and reset the buffer */
-         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-         png_ptr->zstream.next_out = png_ptr->zbuf;
-      }
-   /* Continue until we don't have any more to compress */
-   } while (png_ptr->zstream.avail_in);
-
-   /* Finish the compression */
-   do
-   {
-      /* Tell zlib we are finished */
-      ret = deflate(&png_ptr->zstream, Z_FINISH);
-
-      if (ret == Z_OK)
-      {
-         /* Check to see if we need more room */
-         if (!(png_ptr->zstream.avail_out))
-         {
-            /* Check to make sure our output array has room */
-            if (comp->num_output_ptr >= comp->max_output_ptr)
-            {
-               int old_max;
-
-               old_max = comp->max_output_ptr;
-               comp->max_output_ptr = comp->num_output_ptr + 4;
-               if (comp->output_ptr != NULL)
-               {
-                  png_bytepp old_ptr;
-
-                  old_ptr = comp->output_ptr;
-
-                  /* This could be optimized to realloc() */
-                  comp->output_ptr = (png_bytepp)png_malloc(png_ptr,
-                      (png_alloc_size_t)(comp->max_output_ptr *
-                      png_sizeof(png_charp)));
-
-                  png_memcpy(comp->output_ptr, old_ptr,
-                      old_max * png_sizeof(png_charp));
-
-                  png_free(png_ptr, old_ptr);
-               }
-
-               else
-                  comp->output_ptr = (png_bytepp)png_malloc(png_ptr,
-                      (png_alloc_size_t)(comp->max_output_ptr *
-                      png_sizeof(png_charp)));
-            }
-
-            /* Save the data */
-            comp->output_ptr[comp->num_output_ptr] =
-                (png_bytep)png_malloc(png_ptr,
-                (png_alloc_size_t)png_ptr->zbuf_size);
-
-            png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
-                png_ptr->zbuf_size);
-
-            comp->num_output_ptr++;
-
-            /* and reset the buffer pointers */
-            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-            png_ptr->zstream.next_out = png_ptr->zbuf;
-         }
-      }
-      else if (ret != Z_STREAM_END)
-      {
-         /* We got an error */
-         if (png_ptr->zstream.msg != NULL)
-            png_error(png_ptr, png_ptr->zstream.msg);
-
-         else
-            png_error(png_ptr, "zlib error");
-      }
-   } while (ret != Z_STREAM_END);
-
-   /* Text length is number of buffers plus last buffer */
-   text_len = png_ptr->zbuf_size * comp->num_output_ptr;
-
-   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
-      text_len += png_ptr->zbuf_size - (png_size_t)png_ptr->zstream.avail_out;
-
-   return((int)text_len);
-}
-
-/* Ship the compressed text out via chunk writes */
-static void /* PRIVATE */
-png_write_compressed_data_out(png_structp png_ptr, compression_state *comp,
-   png_size_t data_len)
-{
-   int i;
-
-   /* Handle the no-compression case */
-   if (comp->input)
-   {
-      png_write_chunk_data(png_ptr, comp->input, data_len);
-
-      return;
-   }
-
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-   /* The zbuf_size test is because the code below doesn't work if zbuf_size is
-    * '1'; simply skip it to avoid memory overwrite.
-    */
-   if (data_len >= 2 && comp->input_len < 16384 && png_ptr->zbuf_size > 1)
-   {
-      unsigned int z_cmf;  /* zlib compression method and flags */
-
-      /* Optimize the CMF field in the zlib stream.  This hack of the zlib
-       * stream is compliant to the stream specification.
-       */
-
-      if (comp->num_output_ptr)
-        z_cmf = comp->output_ptr[0][0];
-      else
-        z_cmf = png_ptr->zbuf[0];
-
-      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
-      {
-         unsigned int z_cinfo;
-         unsigned int half_z_window_size;
-         png_size_t uncompressed_text_size = comp->input_len;
-
-         z_cinfo = z_cmf >> 4;
-         half_z_window_size = 1 << (z_cinfo + 7);
-
-         while (uncompressed_text_size <= half_z_window_size &&
-             half_z_window_size >= 256)
-         {
-            z_cinfo--;
-            half_z_window_size >>= 1;
-         }
-
-         z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
-
-         if (comp->num_output_ptr)
-         {
-
-           if (comp->output_ptr[0][0] != z_cmf)
-           {
-              int tmp;
-
-              comp->output_ptr[0][0] = (png_byte)z_cmf;
-              tmp = comp->output_ptr[0][1] & 0xe0;
-              tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f;
-              comp->output_ptr[0][1] = (png_byte)tmp;
-           }
-         }
-         else
-         {
-            int tmp;
-
-            png_ptr->zbuf[0] = (png_byte)z_cmf;
-            tmp = png_ptr->zbuf[1] & 0xe0;
-            tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f;
-            png_ptr->zbuf[1] = (png_byte)tmp;
-         }
-      }
-
-      else
-         png_error(png_ptr,
-             "Invalid zlib compression method or flags in non-IDAT chunk");
-   }
-#endif /* PNG_WRITE_OPTIMIZE_CMF_SUPPORTED */
-
-   /* Write saved output buffers, if any */
-   for (i = 0; i < comp->num_output_ptr; i++)
-   {
-      png_write_chunk_data(png_ptr, comp->output_ptr[i],
-          (png_size_t)png_ptr->zbuf_size);
-
-      png_free(png_ptr, comp->output_ptr[i]);
-   }
-
-   if (comp->max_output_ptr != 0)
-      png_free(png_ptr, comp->output_ptr);
-
-   /* Write anything left in zbuf */
-   if (png_ptr->zstream.avail_out < (png_uint_32)png_ptr->zbuf_size)
-      png_write_chunk_data(png_ptr, png_ptr->zbuf,
-          (png_size_t)(png_ptr->zbuf_size - png_ptr->zstream.avail_out));
-
-   /* Reset zlib for another zTXt/iTXt or image data */
-   png_zlib_release(png_ptr);
-}
-#endif /* PNG_WRITE_COMPRESSED_TEXT_SUPPORTED */
-
-/* Write the IHDR chunk, and update the png_struct with the necessary
- * information.  Note that the rest of this code depends upon this
- * information being correct.
- */
-void /* PRIVATE */
-png_write_IHDR(png_structp png_ptr, png_uint_32 width, png_uint_32 height,
-    int bit_depth, int color_type, int compression_type, int filter_type,
-    int interlace_type)
-{
-   png_byte buf[13]; /* Buffer to store the IHDR info */
-
-   png_debug(1, "in png_write_IHDR");
-
-   /* Check that we have valid input data from the application info */
-   switch (color_type)
-   {
-      case PNG_COLOR_TYPE_GRAY:
-         switch (bit_depth)
-         {
-            case 1:
-            case 2:
-            case 4:
-            case 8:
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-            case 16:
-#endif
-               png_ptr->channels = 1; break;
-
-            default:
-               png_error(png_ptr,
-                   "Invalid bit depth for grayscale image");
-         }
-         break;
-
-      case PNG_COLOR_TYPE_RGB:
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         if (bit_depth != 8 && bit_depth != 16)
-#else
-         if (bit_depth != 8)
-#endif
-            png_error(png_ptr, "Invalid bit depth for RGB image");
-
-         png_ptr->channels = 3;
-         break;
-
-      case PNG_COLOR_TYPE_PALETTE:
-         switch (bit_depth)
-         {
-            case 1:
-            case 2:
-            case 4:
-            case 8:
-               png_ptr->channels = 1;
-               break;
-
-            default:
-               png_error(png_ptr, "Invalid bit depth for paletted image");
-         }
-         break;
-
-      case PNG_COLOR_TYPE_GRAY_ALPHA:
-         if (bit_depth != 8 && bit_depth != 16)
-            png_error(png_ptr, "Invalid bit depth for grayscale+alpha image");
-
-         png_ptr->channels = 2;
-         break;
-
-      case PNG_COLOR_TYPE_RGB_ALPHA:
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-         if (bit_depth != 8 && bit_depth != 16)
-#else
-         if (bit_depth != 8)
-#endif
-            png_error(png_ptr, "Invalid bit depth for RGBA image");
-
-         png_ptr->channels = 4;
-         break;
-
-      default:
-         png_error(png_ptr, "Invalid image color type specified");
-   }
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Invalid compression type specified");
-      compression_type = PNG_COMPRESSION_TYPE_BASE;
-   }
-
-   /* Write filter_method 64 (intrapixel differencing) only if
-    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
-    * 2. Libpng did not write a PNG signature (this filter_method is only
-    *    used in PNG datastreams that are embedded in MNG datastreams) and
-    * 3. The application called png_permit_mng_features with a mask that
-    *    included PNG_FLAG_MNG_FILTER_64 and
-    * 4. The filter_method is 64 and
-    * 5. The color_type is RGB or RGBA
-    */
-   if (
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-       !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
-       ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) &&
-       (color_type == PNG_COLOR_TYPE_RGB ||
-        color_type == PNG_COLOR_TYPE_RGB_ALPHA) &&
-       (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) &&
-#endif
-       filter_type != PNG_FILTER_TYPE_BASE)
-   {
-      png_warning(png_ptr, "Invalid filter type specified");
-      filter_type = PNG_FILTER_TYPE_BASE;
-   }
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   if (interlace_type != PNG_INTERLACE_NONE &&
-       interlace_type != PNG_INTERLACE_ADAM7)
-   {
-      png_warning(png_ptr, "Invalid interlace type specified");
-      interlace_type = PNG_INTERLACE_ADAM7;
-   }
-#else
-   interlace_type=PNG_INTERLACE_NONE;
-#endif
-
-   /* Save the relevent information */
-   png_ptr->bit_depth = (png_byte)bit_depth;
-   png_ptr->color_type = (png_byte)color_type;
-   png_ptr->interlaced = (png_byte)interlace_type;
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-   png_ptr->filter_type = (png_byte)filter_type;
-#endif
-   png_ptr->compression_type = (png_byte)compression_type;
-   png_ptr->width = width;
-   png_ptr->height = height;
-
-   png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels);
-   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width);
-   /* Set the usr info, so any transformations can modify it */
-   png_ptr->usr_width = png_ptr->width;
-   png_ptr->usr_bit_depth = png_ptr->bit_depth;
-   png_ptr->usr_channels = png_ptr->channels;
-
-   /* Pack the header information into the buffer */
-   png_save_uint_32(buf, width);
-   png_save_uint_32(buf + 4, height);
-   buf[8] = (png_byte)bit_depth;
-   buf[9] = (png_byte)color_type;
-   buf[10] = (png_byte)compression_type;
-   buf[11] = (png_byte)filter_type;
-   buf[12] = (png_byte)interlace_type;
-
-   /* Write the chunk */
-   png_write_complete_chunk(png_ptr, png_IHDR, buf, (png_size_t)13);
-
-   /* Initialize zlib with PNG info */
-   png_ptr->zstream.zalloc = png_zalloc;
-   png_ptr->zstream.zfree = png_zfree;
-   png_ptr->zstream.opaque = (voidpf)png_ptr;
-
-   if (!(png_ptr->do_filter))
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
-          png_ptr->bit_depth < 8)
-         png_ptr->do_filter = PNG_FILTER_NONE;
-
-      else
-         png_ptr->do_filter = PNG_ALL_FILTERS;
-   }
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY))
-   {
-      if (png_ptr->do_filter != PNG_FILTER_NONE)
-         png_ptr->zlib_strategy = Z_FILTERED;
-
-      else
-         png_ptr->zlib_strategy = Z_DEFAULT_STRATEGY;
-   }
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_LEVEL))
-      png_ptr->zlib_level = Z_DEFAULT_COMPRESSION;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL))
-      png_ptr->zlib_mem_level = 8;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS))
-      png_ptr->zlib_window_bits = 15;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_METHOD))
-      png_ptr->zlib_method = 8;
-
-#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED
-#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED
-   if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_STRATEGY))
-      png_ptr->zlib_text_strategy = Z_DEFAULT_STRATEGY;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_LEVEL))
-      png_ptr->zlib_text_level = png_ptr->zlib_level;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL))
-      png_ptr->zlib_text_mem_level = png_ptr->zlib_mem_level;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS))
-      png_ptr->zlib_text_window_bits = png_ptr->zlib_window_bits;
-
-   if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_METHOD))
-      png_ptr->zlib_text_method = png_ptr->zlib_method;
-#else
-   png_ptr->zlib_text_strategy = Z_DEFAULT_STRATEGY;
-   png_ptr->zlib_text_level = png_ptr->zlib_level;
-   png_ptr->zlib_text_mem_level = png_ptr->zlib_mem_level;
-   png_ptr->zlib_text_window_bits = png_ptr->zlib_window_bits;
-   png_ptr->zlib_text_method = png_ptr->zlib_method;
-#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */
-#endif /* PNG_WRITE_COMPRESSED_TEXT_SUPPORTED */
-
-   /* Record that the compressor has not yet been initialized. */
-   png_ptr->zlib_state = PNG_ZLIB_UNINITIALIZED;
-
-   png_ptr->mode = PNG_HAVE_IHDR; /* not READY_FOR_ZTXT */
-}
-
-/* Write the palette.  We are careful not to trust png_color to be in the
- * correct order for PNG, so people can redefine it to any convenient
- * structure.
- */
-void /* PRIVATE */
-png_write_PLTE(png_structp png_ptr, png_const_colorp palette,
-    png_uint_32 num_pal)
-{
-   png_uint_32 i;
-   png_const_colorp pal_ptr;
-   png_byte buf[3];
-
-   png_debug(1, "in png_write_PLTE");
-
-   if ((
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-       !(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) &&
-#endif
-       num_pal == 0) || num_pal > 256)
-   {
-      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
-      {
-         png_error(png_ptr, "Invalid number of colors in palette");
-      }
-
-      else
-      {
-         png_warning(png_ptr, "Invalid number of colors in palette");
-         return;
-      }
-   }
-
-   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
-   {
-      png_warning(png_ptr,
-          "Ignoring request to write a PLTE chunk in grayscale PNG");
-
-      return;
-   }
-
-   png_ptr->num_palette = (png_uint_16)num_pal;
-   png_debug1(3, "num_palette = %d", png_ptr->num_palette);
-
-   png_write_chunk_header(png_ptr, png_PLTE, (png_uint_32)(num_pal * 3));
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-
-   for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
-   {
-      buf[0] = pal_ptr->red;
-      buf[1] = pal_ptr->green;
-      buf[2] = pal_ptr->blue;
-      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
-   }
-
-#else
-   /* This is a little slower but some buggy compilers need to do this
-    * instead
-    */
-   pal_ptr=palette;
-
-   for (i = 0; i < num_pal; i++)
-   {
-      buf[0] = pal_ptr[i].red;
-      buf[1] = pal_ptr[i].green;
-      buf[2] = pal_ptr[i].blue;
-      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
-   }
-
-#endif
-   png_write_chunk_end(png_ptr);
-   png_ptr->mode |= PNG_HAVE_PLTE;
-}
-
-/* Write an IDAT chunk */
-void /* PRIVATE */
-png_write_IDAT(png_structp png_ptr, png_bytep data, png_size_t length)
-{
-   png_debug(1, "in png_write_IDAT");
-
-#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED
-   if (!(png_ptr->mode & PNG_HAVE_IDAT) &&
-       png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
-   {
-      /* Optimize the CMF field in the zlib stream.  This hack of the zlib
-       * stream is compliant to the stream specification.
-       */
-      unsigned int z_cmf = data[0];  /* zlib compression method and flags */
-
-      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
-      {
-         /* Avoid memory underflows and multiplication overflows.
-          *
-          * The conditions below are practically always satisfied;
-          * however, they still must be checked.
-          */
-         if (length >= 2 &&
-             png_ptr->height < 16384 && png_ptr->width < 16384)
-         {
-            /* Compute the maximum possible length of the datastream */
-
-            /* Number of pixels, plus for each row a filter byte
-             * and possibly a padding byte, so increase the maximum
-             * size to account for these.
-             */
-            unsigned int z_cinfo;
-            unsigned int half_z_window_size;
-            png_uint_32 uncompressed_idat_size = png_ptr->height *
-                ((png_ptr->width *
-                png_ptr->channels * png_ptr->bit_depth + 15) >> 3);
-
-            /* If it's interlaced, each block of 8 rows is sent as up to
-             * 14 rows, i.e., 6 additional rows, each with a filter byte
-             * and possibly a padding byte
-             */
-            if (png_ptr->interlaced)
-               uncompressed_idat_size += ((png_ptr->height + 7)/8) *
-                   (png_ptr->bit_depth < 8 ? 12 : 6);
-
-            z_cinfo = z_cmf >> 4;
-            half_z_window_size = 1 << (z_cinfo + 7);
-
-            while (uncompressed_idat_size <= half_z_window_size &&
-                half_z_window_size >= 256)
-            {
-               z_cinfo--;
-               half_z_window_size >>= 1;
-            }
-
-            z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
-
-            if (data[0] != z_cmf)
-            {
-               int tmp;
-               data[0] = (png_byte)z_cmf;
-               tmp = data[1] & 0xe0;
-               tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f;
-               data[1] = (png_byte)tmp;
-            }
-         }
-      }
-
-      else
-         png_error(png_ptr,
-             "Invalid zlib compression method or flags in IDAT");
-   }
-#endif /* PNG_WRITE_OPTIMIZE_CMF_SUPPORTED */
-
-   png_write_complete_chunk(png_ptr, png_IDAT, data, length);
-   png_ptr->mode |= PNG_HAVE_IDAT;
-
-   /* Prior to 1.5.4 this code was replicated in every caller (except at the
-    * end, where it isn't technically necessary).  Since this function has
-    * flushed the data we can safely reset the zlib output buffer here.
-    */
-   png_ptr->zstream.next_out = png_ptr->zbuf;
-   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-}
-
-/* Write an IEND chunk */
-void /* PRIVATE */
-png_write_IEND(png_structp png_ptr)
-{
-   png_debug(1, "in png_write_IEND");
-
-   png_write_complete_chunk(png_ptr, png_IEND, NULL, (png_size_t)0);
-   png_ptr->mode |= PNG_HAVE_IEND;
-}
-
-#ifdef PNG_WRITE_gAMA_SUPPORTED
-/* Write a gAMA chunk */
-void /* PRIVATE */
-png_write_gAMA_fixed(png_structp png_ptr, png_fixed_point file_gamma)
-{
-   png_byte buf[4];
-
-   png_debug(1, "in png_write_gAMA");
-
-   /* file_gamma is saved in 1/100,000ths */
-   png_save_uint_32(buf, (png_uint_32)file_gamma);
-   png_write_complete_chunk(png_ptr, png_gAMA, buf, (png_size_t)4);
-}
-#endif
-
-#ifdef PNG_WRITE_sRGB_SUPPORTED
-/* Write a sRGB chunk */
-void /* PRIVATE */
-png_write_sRGB(png_structp png_ptr, int srgb_intent)
-{
-   png_byte buf[1];
-
-   png_debug(1, "in png_write_sRGB");
-
-   if (srgb_intent >= PNG_sRGB_INTENT_LAST)
-      png_warning(png_ptr,
-          "Invalid sRGB rendering intent specified");
-
-   buf[0]=(png_byte)srgb_intent;
-   png_write_complete_chunk(png_ptr, png_sRGB, buf, (png_size_t)1);
-}
-#endif
-
-#ifdef PNG_WRITE_iCCP_SUPPORTED
-/* Write an iCCP chunk */
-void /* PRIVATE */
-png_write_iCCP(png_structp png_ptr, png_const_charp name, int compression_type,
-    png_const_charp profile, int profile_len)
-{
-   png_size_t name_len;
-   png_charp new_name;
-   compression_state comp;
-   int embedded_profile_len = 0;
-
-   png_debug(1, "in png_write_iCCP");
-
-   comp.num_output_ptr = 0;
-   comp.max_output_ptr = 0;
-   comp.output_ptr = NULL;
-   comp.input = NULL;
-   comp.input_len = 0;
-
-   if ((name_len = png_check_keyword(png_ptr, name, &new_name)) == 0)
-      return;
-
-   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
-      png_warning(png_ptr, "Unknown compression type in iCCP chunk");
-
-   if (profile == NULL)
-      profile_len = 0;
-
-   if (profile_len > 3)
-      embedded_profile_len =
-          ((*( (png_const_bytep)profile    ))<<24) |
-          ((*( (png_const_bytep)profile + 1))<<16) |
-          ((*( (png_const_bytep)profile + 2))<< 8) |
-          ((*( (png_const_bytep)profile + 3))    );
-
-   if (embedded_profile_len < 0)
-   {
-      png_warning(png_ptr,
-          "Embedded profile length in iCCP chunk is negative");
-
-      png_free(png_ptr, new_name);
-      return;
-   }
-
-   if (profile_len < embedded_profile_len)
-   {
-      png_warning(png_ptr,
-          "Embedded profile length too large in iCCP chunk");
-
-      png_free(png_ptr, new_name);
-      return;
-   }
-
-   if (profile_len > embedded_profile_len)
-   {
-      png_warning(png_ptr,
-          "Truncating profile to actual length in iCCP chunk");
-
-      profile_len = embedded_profile_len;
-   }
-
-   if (profile_len)
-      profile_len = png_text_compress(png_ptr, profile,
-          (png_size_t)profile_len, PNG_COMPRESSION_TYPE_BASE, &comp);
-
-   /* Make sure we include the NULL after the name and the compression type */
-   png_write_chunk_header(png_ptr, png_iCCP,
-       (png_uint_32)(name_len + profile_len + 2));
-
-   new_name[name_len + 1] = 0x00;
-
-   png_write_chunk_data(png_ptr, (png_bytep)new_name,
-       (png_size_t)(name_len + 2));
-
-   if (profile_len)
-   {
-      png_write_compressed_data_out(png_ptr, &comp, profile_len);
-   }
-
-   png_write_chunk_end(png_ptr);
-   png_free(png_ptr, new_name);
-}
-#endif
-
-#ifdef PNG_WRITE_sPLT_SUPPORTED
-/* Write a sPLT chunk */
-void /* PRIVATE */
-png_write_sPLT(png_structp png_ptr, png_const_sPLT_tp spalette)
-{
-   png_size_t name_len;
-   png_charp new_name;
-   png_byte entrybuf[10];
-   png_size_t entry_size = (spalette->depth == 8 ? 6 : 10);
-   png_size_t palette_size = entry_size * spalette->nentries;
-   png_sPLT_entryp ep;
-#ifndef PNG_POINTER_INDEXING_SUPPORTED
-   int i;
-#endif
-
-   png_debug(1, "in png_write_sPLT");
-
-   if ((name_len = png_check_keyword(png_ptr,spalette->name, &new_name))==0)
-      return;
-
-   /* Make sure we include the NULL after the name */
-   png_write_chunk_header(png_ptr, png_sPLT,
-       (png_uint_32)(name_len + 2 + palette_size));
-
-   png_write_chunk_data(png_ptr, (png_bytep)new_name,
-       (png_size_t)(name_len + 1));
-
-   png_write_chunk_data(png_ptr, &spalette->depth, (png_size_t)1);
-
-   /* Loop through each palette entry, writing appropriately */
-#ifdef PNG_POINTER_INDEXING_SUPPORTED
-   for (ep = spalette->entries; ep<spalette->entries + spalette->nentries; ep++)
-   {
-      if (spalette->depth == 8)
-      {
-         entrybuf[0] = (png_byte)ep->red;
-         entrybuf[1] = (png_byte)ep->green;
-         entrybuf[2] = (png_byte)ep->blue;
-         entrybuf[3] = (png_byte)ep->alpha;
-         png_save_uint_16(entrybuf + 4, ep->frequency);
-      }
-
-      else
-      {
-         png_save_uint_16(entrybuf + 0, ep->red);
-         png_save_uint_16(entrybuf + 2, ep->green);
-         png_save_uint_16(entrybuf + 4, ep->blue);
-         png_save_uint_16(entrybuf + 6, ep->alpha);
-         png_save_uint_16(entrybuf + 8, ep->frequency);
-      }
-
-      png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size);
-   }
-#else
-   ep=spalette->entries;
-   for (i = 0; i>spalette->nentries; i++)
-   {
-      if (spalette->depth == 8)
-      {
-         entrybuf[0] = (png_byte)ep[i].red;
-         entrybuf[1] = (png_byte)ep[i].green;
-         entrybuf[2] = (png_byte)ep[i].blue;
-         entrybuf[3] = (png_byte)ep[i].alpha;
-         png_save_uint_16(entrybuf + 4, ep[i].frequency);
-      }
-
-      else
-      {
-         png_save_uint_16(entrybuf + 0, ep[i].red);
-         png_save_uint_16(entrybuf + 2, ep[i].green);
-         png_save_uint_16(entrybuf + 4, ep[i].blue);
-         png_save_uint_16(entrybuf + 6, ep[i].alpha);
-         png_save_uint_16(entrybuf + 8, ep[i].frequency);
-      }
-
-      png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size);
-   }
-#endif
-
-   png_write_chunk_end(png_ptr);
-   png_free(png_ptr, new_name);
-}
-#endif
-
-#ifdef PNG_WRITE_sBIT_SUPPORTED
-/* Write the sBIT chunk */
-void /* PRIVATE */
-png_write_sBIT(png_structp png_ptr, png_const_color_8p sbit, int color_type)
-{
-   png_byte buf[4];
-   png_size_t size;
-
-   png_debug(1, "in png_write_sBIT");
-
-   /* Make sure we don't depend upon the order of PNG_COLOR_8 */
-   if (color_type & PNG_COLOR_MASK_COLOR)
-   {
-      png_byte maxbits;
-
-      maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 :
-          png_ptr->usr_bit_depth);
-
-      if (sbit->red == 0 || sbit->red > maxbits ||
-          sbit->green == 0 || sbit->green > maxbits ||
-          sbit->blue == 0 || sbit->blue > maxbits)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[0] = sbit->red;
-      buf[1] = sbit->green;
-      buf[2] = sbit->blue;
-      size = 3;
-   }
-
-   else
-   {
-      if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[0] = sbit->gray;
-      size = 1;
-   }
-
-   if (color_type & PNG_COLOR_MASK_ALPHA)
-   {
-      if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth)
-      {
-         png_warning(png_ptr, "Invalid sBIT depth specified");
-         return;
-      }
-
-      buf[size++] = sbit->alpha;
-   }
-
-   png_write_complete_chunk(png_ptr, png_sBIT, buf, size);
-}
-#endif
-
-#ifdef PNG_WRITE_cHRM_SUPPORTED
-/* Write the cHRM chunk */
-void /* PRIVATE */
-png_write_cHRM_fixed(png_structp png_ptr, png_fixed_point white_x,
-    png_fixed_point white_y, png_fixed_point red_x, png_fixed_point red_y,
-    png_fixed_point green_x, png_fixed_point green_y, png_fixed_point blue_x,
-    png_fixed_point blue_y)
-{
-   png_byte buf[32];
-
-   png_debug(1, "in png_write_cHRM");
-
-   /* Each value is saved in 1/100,000ths */
-#ifdef PNG_CHECK_cHRM_SUPPORTED
-   if (png_check_cHRM_fixed(png_ptr, white_x, white_y, red_x, red_y,
-       green_x, green_y, blue_x, blue_y))
-#endif
-   {
-      png_save_uint_32(buf, (png_uint_32)white_x);
-      png_save_uint_32(buf + 4, (png_uint_32)white_y);
-
-      png_save_uint_32(buf + 8, (png_uint_32)red_x);
-      png_save_uint_32(buf + 12, (png_uint_32)red_y);
-
-      png_save_uint_32(buf + 16, (png_uint_32)green_x);
-      png_save_uint_32(buf + 20, (png_uint_32)green_y);
-
-      png_save_uint_32(buf + 24, (png_uint_32)blue_x);
-      png_save_uint_32(buf + 28, (png_uint_32)blue_y);
-
-      png_write_complete_chunk(png_ptr, png_cHRM, buf, (png_size_t)32);
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_tRNS_SUPPORTED
-/* Write the tRNS chunk */
-void /* PRIVATE */
-png_write_tRNS(png_structp png_ptr, png_const_bytep trans_alpha,
-    png_const_color_16p tran, int num_trans, int color_type)
-{
-   png_byte buf[6];
-
-   png_debug(1, "in png_write_tRNS");
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette)
-      {
-         png_warning(png_ptr, "Invalid number of transparent colors specified");
-         return;
-      }
-
-      /* Write the chunk out as it is */
-      png_write_complete_chunk(png_ptr, png_tRNS, trans_alpha, (png_size_t)num_trans);
-   }
-
-   else if (color_type == PNG_COLOR_TYPE_GRAY)
-   {
-      /* One 16 bit value */
-      if (tran->gray >= (1 << png_ptr->bit_depth))
-      {
-         png_warning(png_ptr,
-             "Ignoring attempt to write tRNS chunk out-of-range for bit_depth");
-
-         return;
-      }
-
-      png_save_uint_16(buf, tran->gray);
-      png_write_complete_chunk(png_ptr, png_tRNS, buf, (png_size_t)2);
-   }
-
-   else if (color_type == PNG_COLOR_TYPE_RGB)
-   {
-      /* Three 16 bit values */
-      png_save_uint_16(buf, tran->red);
-      png_save_uint_16(buf + 2, tran->green);
-      png_save_uint_16(buf + 4, tran->blue);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
-#else
-      if (buf[0] | buf[2] | buf[4])
-#endif
-      {
-         png_warning(png_ptr,
-           "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8");
-         return;
-      }
-
-      png_write_complete_chunk(png_ptr, png_tRNS, buf, (png_size_t)6);
-   }
-
-   else
-   {
-      png_warning(png_ptr, "Can't write tRNS with an alpha channel");
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_bKGD_SUPPORTED
-/* Write the background chunk */
-void /* PRIVATE */
-png_write_bKGD(png_structp png_ptr, png_const_color_16p back, int color_type)
-{
-   png_byte buf[6];
-
-   png_debug(1, "in png_write_bKGD");
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-   {
-      if (
-#ifdef PNG_MNG_FEATURES_SUPPORTED
-          (png_ptr->num_palette ||
-          (!(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE))) &&
-#endif
-         back->index >= png_ptr->num_palette)
-      {
-         png_warning(png_ptr, "Invalid background palette index");
-         return;
-      }
-
-      buf[0] = back->index;
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)1);
-   }
-
-   else if (color_type & PNG_COLOR_MASK_COLOR)
-   {
-      png_save_uint_16(buf, back->red);
-      png_save_uint_16(buf + 2, back->green);
-      png_save_uint_16(buf + 4, back->blue);
-#ifdef PNG_WRITE_16BIT_SUPPORTED
-      if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
-#else
-      if (buf[0] | buf[2] | buf[4])
-#endif
-      {
-         png_warning(png_ptr,
-             "Ignoring attempt to write 16-bit bKGD chunk when bit_depth is 8");
-
-         return;
-      }
-
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)6);
-   }
-
-   else
-   {
-      if (back->gray >= (1 << png_ptr->bit_depth))
-      {
-         png_warning(png_ptr,
-             "Ignoring attempt to write bKGD chunk out-of-range for bit_depth");
-
-         return;
-      }
-
-      png_save_uint_16(buf, back->gray);
-      png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)2);
-   }
-}
-#endif
-
-#ifdef PNG_WRITE_hIST_SUPPORTED
-/* Write the histogram */
-void /* PRIVATE */
-png_write_hIST(png_structp png_ptr, png_const_uint_16p hist, int num_hist)
-{
-   int i;
-   png_byte buf[3];
-
-   png_debug(1, "in png_write_hIST");
-
-   if (num_hist > (int)png_ptr->num_palette)
-   {
-      png_debug2(3, "num_hist = %d, num_palette = %d", num_hist,
-          png_ptr->num_palette);
-
-      png_warning(png_ptr, "Invalid number of histogram entries specified");
-      return;
-   }
-
-   png_write_chunk_header(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
-
-   for (i = 0; i < num_hist; i++)
-   {
-      png_save_uint_16(buf, hist[i]);
-      png_write_chunk_data(png_ptr, buf, (png_size_t)2);
-   }
-
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
-    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
-/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification,
- * and if invalid, correct the keyword rather than discarding the entire
- * chunk.  The PNG 1.0 specification requires keywords 1-79 characters in
- * length, forbids leading or trailing whitespace, multiple internal spaces,
- * and the non-break space (0x80) from ISO 8859-1.  Returns keyword length.
- *
- * The new_key is allocated to hold the corrected keyword and must be freed
- * by the calling routine.  This avoids problems with trying to write to
- * static keywords without having to have duplicate copies of the strings.
- */
-png_size_t /* PRIVATE */
-png_check_keyword(png_structp png_ptr, png_const_charp key, png_charpp new_key)
-{
-   png_size_t key_len;
-   png_const_charp ikp;
-   png_charp kp, dp;
-   int kflag;
-   int kwarn=0;
-
-   png_debug(1, "in png_check_keyword");
-
-   *new_key = NULL;
-
-   if (key == NULL || (key_len = png_strlen(key)) == 0)
-   {
-      png_warning(png_ptr, "zero length keyword");
-      return ((png_size_t)0);
-   }
-
-   png_debug1(2, "Keyword to be checked is '%s'", key);
-
-   *new_key = (png_charp)png_malloc_warn(png_ptr, (png_uint_32)(key_len + 2));
-
-   if (*new_key == NULL)
-   {
-      png_warning(png_ptr, "Out of memory while procesing keyword");
-      return ((png_size_t)0);
-   }
-
-   /* Replace non-printing characters with a blank and print a warning */
-   for (ikp = key, dp = *new_key; *ikp != '\0'; ikp++, dp++)
-   {
-      if ((png_byte)*ikp < 0x20 ||
-         ((png_byte)*ikp > 0x7E && (png_byte)*ikp < 0xA1))
-      {
-         PNG_WARNING_PARAMETERS(p)
-
-         png_warning_parameter_unsigned(p, 1, PNG_NUMBER_FORMAT_02x,
-            (png_byte)*ikp);
-         png_formatted_warning(png_ptr, p, "invalid keyword character 0x@1");
-         *dp = ' ';
-      }
-
-      else
-      {
-         *dp = *ikp;
-      }
-   }
-   *dp = '\0';
-
-   /* Remove any trailing white space. */
-   kp = *new_key + key_len - 1;
-   if (*kp == ' ')
-   {
-      png_warning(png_ptr, "trailing spaces removed from keyword");
-
-      while (*kp == ' ')
-      {
-         *(kp--) = '\0';
-         key_len--;
-      }
-   }
-
-   /* Remove any leading white space. */
-   kp = *new_key;
-   if (*kp == ' ')
-   {
-      png_warning(png_ptr, "leading spaces removed from keyword");
-
-      while (*kp == ' ')
-      {
-         kp++;
-         key_len--;
-      }
-   }
-
-   png_debug1(2, "Checking for multiple internal spaces in '%s'", kp);
-
-   /* Remove multiple internal spaces. */
-   for (kflag = 0, dp = *new_key; *kp != '\0'; kp++)
-   {
-      if (*kp == ' ' && kflag == 0)
-      {
-         *(dp++) = *kp;
-         kflag = 1;
-      }
-
-      else if (*kp == ' ')
-      {
-         key_len--;
-         kwarn = 1;
-      }
-
-      else
-      {
-         *(dp++) = *kp;
-         kflag = 0;
-      }
-   }
-   *dp = '\0';
-   if (kwarn)
-      png_warning(png_ptr, "extra interior spaces removed from keyword");
-
-   if (key_len == 0)
-   {
-      png_free(png_ptr, *new_key);
-      png_warning(png_ptr, "Zero length keyword");
-   }
-
-   if (key_len > 79)
-   {
-      png_warning(png_ptr, "keyword length must be 1 - 79 characters");
-      (*new_key)[79] = '\0';
-      key_len = 79;
-   }
-
-   return (key_len);
-}
-#endif
-
-#ifdef PNG_WRITE_tEXt_SUPPORTED
-/* Write a tEXt chunk */
-void /* PRIVATE */
-png_write_tEXt(png_structp png_ptr, png_const_charp key, png_const_charp text,
-    png_size_t text_len)
-{
-   png_size_t key_len;
-   png_charp new_key;
-
-   png_debug(1, "in png_write_tEXt");
-
-   if ((key_len = png_check_keyword(png_ptr, key, &new_key))==0)
-      return;
-
-   if (text == NULL || *text == '\0')
-      text_len = 0;
-
-   else
-      text_len = png_strlen(text);
-
-   /* Make sure we include the 0 after the key */
-   png_write_chunk_header(png_ptr, png_tEXt,
-       (png_uint_32)(key_len + text_len + 1));
-   /*
-    * We leave it to the application to meet PNG-1.0 requirements on the
-    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
-    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
-    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
-    */
-   png_write_chunk_data(png_ptr, (png_bytep)new_key,
-       (png_size_t)(key_len + 1));
-
-   if (text_len)
-      png_write_chunk_data(png_ptr, (png_const_bytep)text,
-          (png_size_t)text_len);
-
-   png_write_chunk_end(png_ptr);
-   png_free(png_ptr, new_key);
-}
-#endif
-
-#ifdef PNG_WRITE_zTXt_SUPPORTED
-/* Write a compressed text chunk */
-void /* PRIVATE */
-png_write_zTXt(png_structp png_ptr, png_const_charp key, png_const_charp text,
-    png_size_t text_len, int compression)
-{
-   png_size_t key_len;
-   png_byte buf;
-   png_charp new_key;
-   compression_state comp;
-
-   png_debug(1, "in png_write_zTXt");
-
-   comp.num_output_ptr = 0;
-   comp.max_output_ptr = 0;
-   comp.output_ptr = NULL;
-   comp.input = NULL;
-   comp.input_len = 0;
-
-   if ((key_len = png_check_keyword(png_ptr, key, &new_key)) == 0)
-   {
-      png_free(png_ptr, new_key);
-      return;
-   }
-
-   if (text == NULL || *text == '\0' || compression==PNG_TEXT_COMPRESSION_NONE)
-   {
-      png_write_tEXt(png_ptr, new_key, text, (png_size_t)0);
-      png_free(png_ptr, new_key);
-      return;
-   }
-
-   text_len = png_strlen(text);
-
-   /* Compute the compressed data; do it now for the length */
-   text_len = png_text_compress(png_ptr, text, text_len, compression,
-       &comp);
-
-   /* Write start of chunk */
-   png_write_chunk_header(png_ptr, png_zTXt,
-       (png_uint_32)(key_len+text_len + 2));
-
-   /* Write key */
-   png_write_chunk_data(png_ptr, (png_bytep)new_key,
-       (png_size_t)(key_len + 1));
-
-   png_free(png_ptr, new_key);
-
-   buf = (png_byte)compression;
-
-   /* Write compression */
-   png_write_chunk_data(png_ptr, &buf, (png_size_t)1);
-
-   /* Write the compressed data */
-   png_write_compressed_data_out(png_ptr, &comp, text_len);
-
-   /* Close the chunk */
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_iTXt_SUPPORTED
-/* Write an iTXt chunk */
-void /* PRIVATE */
-png_write_iTXt(png_structp png_ptr, int compression, png_const_charp key,
-    png_const_charp lang, png_const_charp lang_key, png_const_charp text)
-{
-   png_size_t lang_len, key_len, lang_key_len, text_len;
-   png_charp new_lang;
-   png_charp new_key = NULL;
-   png_byte cbuf[2];
-   compression_state comp;
-
-   png_debug(1, "in png_write_iTXt");
-
-   comp.num_output_ptr = 0;
-   comp.max_output_ptr = 0;
-   comp.output_ptr = NULL;
-   comp.input = NULL;
-
-   if ((key_len = png_check_keyword(png_ptr, key, &new_key)) == 0)
-      return;
-
-   if ((lang_len = png_check_keyword(png_ptr, lang, &new_lang)) == 0)
-   {
-      png_warning(png_ptr, "Empty language field in iTXt chunk");
-      new_lang = NULL;
-      lang_len = 0;
-   }
-
-   if (lang_key == NULL)
-      lang_key_len = 0;
-
-   else
-      lang_key_len = png_strlen(lang_key);
-
-   if (text == NULL)
-      text_len = 0;
-
-   else
-      text_len = png_strlen(text);
-
-   /* Compute the compressed data; do it now for the length */
-   text_len = png_text_compress(png_ptr, text, text_len, compression - 2,
-       &comp);
-
-
-   /* Make sure we include the compression flag, the compression byte,
-    * and the NULs after the key, lang, and lang_key parts
-    */
-
-   png_write_chunk_header(png_ptr, png_iTXt, (png_uint_32)(
-        5 /* comp byte, comp flag, terminators for key, lang and lang_key */
-        + key_len
-        + lang_len
-        + lang_key_len
-        + text_len));
-
-   /* We leave it to the application to meet PNG-1.0 requirements on the
-    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
-    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
-    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
-    */
-   png_write_chunk_data(png_ptr, (png_bytep)new_key, (png_size_t)(key_len + 1));
-
-   /* Set the compression flag */
-   if (compression == PNG_ITXT_COMPRESSION_NONE ||
-       compression == PNG_TEXT_COMPRESSION_NONE)
-      cbuf[0] = 0;
-
-   else /* compression == PNG_ITXT_COMPRESSION_zTXt */
-      cbuf[0] = 1;
-
-   /* Set the compression method */
-   cbuf[1] = 0;
-
-   png_write_chunk_data(png_ptr, cbuf, (png_size_t)2);
-
-   cbuf[0] = 0;
-   png_write_chunk_data(png_ptr, (new_lang ? (png_const_bytep)new_lang : cbuf),
-       (png_size_t)(lang_len + 1));
-
-   png_write_chunk_data(png_ptr, (lang_key ? (png_const_bytep)lang_key : cbuf),
-       (png_size_t)(lang_key_len + 1));
-
-   png_write_compressed_data_out(png_ptr, &comp, text_len);
-
-   png_write_chunk_end(png_ptr);
-
-   png_free(png_ptr, new_key);
-   png_free(png_ptr, new_lang);
-}
-#endif
-
-#ifdef PNG_WRITE_oFFs_SUPPORTED
-/* Write the oFFs chunk */
-void /* PRIVATE */
-png_write_oFFs(png_structp png_ptr, png_int_32 x_offset, png_int_32 y_offset,
-    int unit_type)
-{
-   png_byte buf[9];
-
-   png_debug(1, "in png_write_oFFs");
-
-   if (unit_type >= PNG_OFFSET_LAST)
-      png_warning(png_ptr, "Unrecognized unit type for oFFs chunk");
-
-   png_save_int_32(buf, x_offset);
-   png_save_int_32(buf + 4, y_offset);
-   buf[8] = (png_byte)unit_type;
-
-   png_write_complete_chunk(png_ptr, png_oFFs, buf, (png_size_t)9);
-}
-#endif
-#ifdef PNG_WRITE_pCAL_SUPPORTED
-/* Write the pCAL chunk (described in the PNG extensions document) */
-void /* PRIVATE */
-png_write_pCAL(png_structp png_ptr, png_charp purpose, png_int_32 X0,
-    png_int_32 X1, int type, int nparams, png_const_charp units,
-    png_charpp params)
-{
-   png_size_t purpose_len, units_len, total_len;
-   png_size_tp params_len;
-   png_byte buf[10];
-   png_charp new_purpose;
-   int i;
-
-   png_debug1(1, "in png_write_pCAL (%d parameters)", nparams);
-
-   if (type >= PNG_EQUATION_LAST)
-      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
-
-   purpose_len = png_check_keyword(png_ptr, purpose, &new_purpose) + 1;
-   png_debug1(3, "pCAL purpose length = %d", (int)purpose_len);
-   units_len = png_strlen(units) + (nparams == 0 ? 0 : 1);
-   png_debug1(3, "pCAL units length = %d", (int)units_len);
-   total_len = purpose_len + units_len + 10;
-
-   params_len = (png_size_tp)png_malloc(png_ptr,
-       (png_alloc_size_t)(nparams * png_sizeof(png_size_t)));
-
-   /* Find the length of each parameter, making sure we don't count the
-    * null terminator for the last parameter.
-    */
-   for (i = 0; i < nparams; i++)
-   {
-      params_len[i] = png_strlen(params[i]) + (i == nparams - 1 ? 0 : 1);
-      png_debug2(3, "pCAL parameter %d length = %lu", i,
-          (unsigned long)params_len[i]);
-      total_len += params_len[i];
-   }
-
-   png_debug1(3, "pCAL total length = %d", (int)total_len);
-   png_write_chunk_header(png_ptr, png_pCAL, (png_uint_32)total_len);
-   png_write_chunk_data(png_ptr, (png_const_bytep)new_purpose, purpose_len);
-   png_save_int_32(buf, X0);
-   png_save_int_32(buf + 4, X1);
-   buf[8] = (png_byte)type;
-   buf[9] = (png_byte)nparams;
-   png_write_chunk_data(png_ptr, buf, (png_size_t)10);
-   png_write_chunk_data(png_ptr, (png_const_bytep)units, (png_size_t)units_len);
-
-   png_free(png_ptr, new_purpose);
-
-   for (i = 0; i < nparams; i++)
-   {
-      png_write_chunk_data(png_ptr, (png_const_bytep)params[i], params_len[i]);
-   }
-
-   png_free(png_ptr, params_len);
-   png_write_chunk_end(png_ptr);
-}
-#endif
-
-#ifdef PNG_WRITE_sCAL_SUPPORTED
-/* Write the sCAL chunk */
-void /* PRIVATE */
-png_write_sCAL_s(png_structp png_ptr, int unit, png_const_charp width,
-    png_const_charp height)
-{
-   png_byte buf[64];
-   png_size_t wlen, hlen, total_len;
-
-   png_debug(1, "in png_write_sCAL_s");
-
-   wlen = png_strlen(width);
-   hlen = png_strlen(height);
-   total_len = wlen + hlen + 2;
-
-   if (total_len > 64)
-   {
-      png_warning(png_ptr, "Can't write sCAL (buffer too small)");
-      return;
-   }
-
-   buf[0] = (png_byte)unit;
-   png_memcpy(buf + 1, width, wlen + 1);      /* Append the '\0' here */
-   png_memcpy(buf + wlen + 2, height, hlen);  /* Do NOT append the '\0' here */
-
-   png_debug1(3, "sCAL total length = %u", (unsigned int)total_len);
-   png_write_complete_chunk(png_ptr, png_sCAL, buf, total_len);
-}
-#endif
-
-#ifdef PNG_WRITE_pHYs_SUPPORTED
-/* Write the pHYs chunk */
-void /* PRIVATE */
-png_write_pHYs(png_structp png_ptr, png_uint_32 x_pixels_per_unit,
-    png_uint_32 y_pixels_per_unit,
-    int unit_type)
-{
-   png_byte buf[9];
-
-   png_debug(1, "in png_write_pHYs");
-
-   if (unit_type >= PNG_RESOLUTION_LAST)
-      png_warning(png_ptr, "Unrecognized unit type for pHYs chunk");
-
-   png_save_uint_32(buf, x_pixels_per_unit);
-   png_save_uint_32(buf + 4, y_pixels_per_unit);
-   buf[8] = (png_byte)unit_type;
-
-   png_write_complete_chunk(png_ptr, png_pHYs, buf, (png_size_t)9);
-}
-#endif
-
-#ifdef PNG_WRITE_tIME_SUPPORTED
-/* Write the tIME chunk.  Use either png_convert_from_struct_tm()
- * or png_convert_from_time_t(), or fill in the structure yourself.
- */
-void /* PRIVATE */
-png_write_tIME(png_structp png_ptr, png_const_timep mod_time)
-{
-   png_byte buf[7];
-
-   png_debug(1, "in png_write_tIME");
-
-   if (mod_time->month  > 12 || mod_time->month  < 1 ||
-       mod_time->day    > 31 || mod_time->day    < 1 ||
-       mod_time->hour   > 23 || mod_time->second > 60)
-   {
-      png_warning(png_ptr, "Invalid time specified for tIME chunk");
-      return;
-   }
-
-   png_save_uint_16(buf, mod_time->year);
-   buf[2] = mod_time->month;
-   buf[3] = mod_time->day;
-   buf[4] = mod_time->hour;
-   buf[5] = mod_time->minute;
-   buf[6] = mod_time->second;
-
-   png_write_complete_chunk(png_ptr, png_tIME, buf, (png_size_t)7);
-}
-#endif
-
-/* Initializes the row writing capability of libpng */
-void /* PRIVATE */
-png_write_start_row(png_structp png_ptr)
-{
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif
-
-   png_alloc_size_t buf_size;
-   int usr_pixel_depth;
-
-   png_debug(1, "in png_write_start_row");
-
-   usr_pixel_depth = png_ptr->usr_channels * png_ptr->usr_bit_depth;
-   buf_size = PNG_ROWBYTES(usr_pixel_depth, png_ptr->width) + 1;
-
-   /* 1.5.6: added to allow checking in the row write code. */
-   png_ptr->transformed_pixel_depth = png_ptr->pixel_depth;
-   png_ptr->maximum_pixel_depth = (png_byte)usr_pixel_depth;
-
-   /* Set up row buffer */
-   png_ptr->row_buf = (png_bytep)png_malloc(png_ptr, buf_size);
-
-   png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   /* Set up filtering buffer, if using this filter */
-   if (png_ptr->do_filter & PNG_FILTER_SUB)
-   {
-      png_ptr->sub_row = (png_bytep)png_malloc(png_ptr, png_ptr->rowbytes + 1);
-
-      png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
-   }
-
-   /* We only need to keep the previous row if we are using one of these. */
-   if (png_ptr->do_filter & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH))
-   {
-      /* Set up previous row buffer */
-      png_ptr->prev_row = (png_bytep)png_calloc(png_ptr, buf_size);
-
-      if (png_ptr->do_filter & PNG_FILTER_UP)
-      {
-         png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
-            png_ptr->rowbytes + 1);
-
-         png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
-      }
-
-      if (png_ptr->do_filter & PNG_FILTER_AVG)
-      {
-         png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
-             png_ptr->rowbytes + 1);
-
-         png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
-      }
-
-      if (png_ptr->do_filter & PNG_FILTER_PAETH)
-      {
-         png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
-             png_ptr->rowbytes + 1);
-
-         png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
-      }
-   }
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced, we need to set up width and height of pass */
-   if (png_ptr->interlaced)
-   {
-      if (!(png_ptr->transformations & PNG_INTERLACE))
-      {
-         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
-             png_pass_ystart[0]) / png_pass_yinc[0];
-
-         png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 -
-             png_pass_start[0]) / png_pass_inc[0];
-      }
-
-      else
-      {
-         png_ptr->num_rows = png_ptr->height;
-         png_ptr->usr_width = png_ptr->width;
-      }
-   }
-
-   else
-#endif
-   {
-      png_ptr->num_rows = png_ptr->height;
-      png_ptr->usr_width = png_ptr->width;
-   }
-
-   png_zlib_claim(png_ptr, PNG_ZLIB_FOR_IDAT);
-   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-   png_ptr->zstream.next_out = png_ptr->zbuf;
-}
-
-/* Internal use only.  Called when finished processing a row of data. */
-void /* PRIVATE */
-png_write_finish_row(png_structp png_ptr)
-{
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   /* Start of interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
-
-   /* Offset to next interlace block in the y direction */
-   static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
-#endif
-
-   int ret;
-
-   png_debug(1, "in png_write_finish_row");
-
-   /* Next row */
-   png_ptr->row_number++;
-
-   /* See if we are done */
-   if (png_ptr->row_number < png_ptr->num_rows)
-      return;
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-   /* If interlaced, go to next pass */
-   if (png_ptr->interlaced)
-   {
-      png_ptr->row_number = 0;
-      if (png_ptr->transformations & PNG_INTERLACE)
-      {
-         png_ptr->pass++;
-      }
-
-      else
-      {
-         /* Loop until we find a non-zero width or height pass */
-         do
-         {
-            png_ptr->pass++;
-
-            if (png_ptr->pass >= 7)
-               break;
-
-            png_ptr->usr_width = (png_ptr->width +
-                png_pass_inc[png_ptr->pass] - 1 -
-                png_pass_start[png_ptr->pass]) /
-                png_pass_inc[png_ptr->pass];
-
-            png_ptr->num_rows = (png_ptr->height +
-                png_pass_yinc[png_ptr->pass] - 1 -
-                png_pass_ystart[png_ptr->pass]) /
-                png_pass_yinc[png_ptr->pass];
-
-            if (png_ptr->transformations & PNG_INTERLACE)
-               break;
-
-         } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0);
-
-      }
-
-      /* Reset the row above the image for the next pass */
-      if (png_ptr->pass < 7)
-      {
-         if (png_ptr->prev_row != NULL)
-            png_memset(png_ptr->prev_row, 0,
-                (png_size_t)(PNG_ROWBYTES(png_ptr->usr_channels*
-                png_ptr->usr_bit_depth, png_ptr->width)) + 1);
-
-         return;
-      }
-   }
-#endif
-
-   /* If we get here, we've just written the last row, so we need
-      to flush the compressor */
-   do
-   {
-      /* Tell the compressor we are done */
-      ret = deflate(&png_ptr->zstream, Z_FINISH);
-
-      /* Check for an error */
-      if (ret == Z_OK)
-      {
-         /* Check to see if we need more room */
-         if (!(png_ptr->zstream.avail_out))
-         {
-            png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
-            png_ptr->zstream.next_out = png_ptr->zbuf;
-            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
-         }
-      }
-
-      else if (ret != Z_STREAM_END)
-      {
-         if (png_ptr->zstream.msg != NULL)
-            png_error(png_ptr, png_ptr->zstream.msg);
-
-         else
-            png_error(png_ptr, "zlib error");
-      }
-   } while (ret != Z_STREAM_END);
-
-   /* Write any extra space */
-   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
-   {
-      png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size -
-          png_ptr->zstream.avail_out);
-   }
-
-   png_zlib_release(png_ptr);
-   png_ptr->zstream.data_type = Z_BINARY;
-}
-
-#ifdef PNG_WRITE_INTERLACING_SUPPORTED
-/* Pick out the correct pixels for the interlace pass.
- * The basic idea here is to go through the row with a source
- * pointer and a destination pointer (sp and dp), and copy the
- * correct pixels for the pass.  As the row gets compacted,
- * sp will always be >= dp, so we should never overwrite anything.
- * See the default: case for the easiest code to understand.
- */
-void /* PRIVATE */
-png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass)
-{
-   /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */
-
-   /* Start of interlace block */
-   static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
-
-   /* Offset to next interlace block */
-   static PNG_CONST png_byte  png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
-
-   png_debug(1, "in png_do_write_interlace");
-
-   /* We don't have to do anything on the last pass (6) */
-   if (pass < 6)
-   {
-      /* Each pixel depth is handled separately */
-      switch (row_info->pixel_depth)
-      {
-         case 1:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            d = 0;
-            shift = 7;
-
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               sp = row + (png_size_t)(i >> 3);
-               value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 7;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift--;
-
-            }
-            if (shift != 7)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         case 2:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            shift = 6;
-            d = 0;
-
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               sp = row + (png_size_t)(i >> 2);
-               value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 6;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift -= 2;
-            }
-            if (shift != 6)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         case 4:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            int shift;
-            int d;
-            int value;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-
-            dp = row;
-            shift = 4;
-            d = 0;
-            for (i = png_pass_start[pass]; i < row_width;
-                i += png_pass_inc[pass])
-            {
-               sp = row + (png_size_t)(i >> 1);
-               value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f;
-               d |= (value << shift);
-
-               if (shift == 0)
-               {
-                  shift = 4;
-                  *dp++ = (png_byte)d;
-                  d = 0;
-               }
-
-               else
-                  shift -= 4;
-            }
-            if (shift != 4)
-               *dp = (png_byte)d;
-
-            break;
-         }
-
-         default:
-         {
-            png_bytep sp;
-            png_bytep dp;
-            png_uint_32 i;
-            png_uint_32 row_width = row_info->width;
-            png_size_t pixel_bytes;
-
-            /* Start at the beginning */
-            dp = row;
-
-            /* Find out how many bytes each pixel takes up */
-            pixel_bytes = (row_info->pixel_depth >> 3);
-
-            /* Loop through the row, only looking at the pixels that matter */
-            for (i = png_pass_start[pass]; i < row_width;
-               i += png_pass_inc[pass])
-            {
-               /* Find out where the original pixel is */
-               sp = row + (png_size_t)i * pixel_bytes;
-
-               /* Move the pixel */
-               if (dp != sp)
-                  png_memcpy(dp, sp, pixel_bytes);
-
-               /* Next pixel */
-               dp += pixel_bytes;
-            }
-            break;
-         }
-      }
-      /* Set new row width */
-      row_info->width = (row_info->width +
-          png_pass_inc[pass] - 1 -
-          png_pass_start[pass]) /
-          png_pass_inc[pass];
-
-      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
-          row_info->width);
-   }
-}
-#endif
-
-/* This filters the row, chooses which filter to use, if it has not already
- * been specified by the application, and then writes the row out with the
- * chosen filter.
- */
-static void png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row,
-   png_size_t row_bytes);
-
-#define PNG_MAXSUM (((png_uint_32)(-1)) >> 1)
-#define PNG_HISHIFT 10
-#define PNG_LOMASK ((png_uint_32)0xffffL)
-#define PNG_HIMASK ((png_uint_32)(~PNG_LOMASK >> PNG_HISHIFT))
-void /* PRIVATE */
-png_write_find_filter(png_structp png_ptr, png_row_infop row_info)
-{
-   png_bytep best_row;
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   png_bytep prev_row, row_buf;
-   png_uint_32 mins, bpp;
-   png_byte filter_to_do = png_ptr->do_filter;
-   png_size_t row_bytes = row_info->rowbytes;
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-   int num_p_filters = png_ptr->num_prev_filters;
-#endif
-
-   png_debug(1, "in png_write_find_filter");
-
-#ifndef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-  if (png_ptr->row_number == 0 && filter_to_do == PNG_ALL_FILTERS)
-  {
-     /* These will never be selected so we need not test them. */
-     filter_to_do &= ~(PNG_FILTER_UP | PNG_FILTER_PAETH);
-  }
-#endif
-
-   /* Find out how many bytes offset each pixel is */
-   bpp = (row_info->pixel_depth + 7) >> 3;
-
-   prev_row = png_ptr->prev_row;
-#endif
-   best_row = png_ptr->row_buf;
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-   row_buf = best_row;
-   mins = PNG_MAXSUM;
-
-   /* The prediction method we use is to find which method provides the
-    * smallest value when summing the absolute values of the distances
-    * from zero, using anything >= 128 as negative numbers.  This is known
-    * as the "minimum sum of absolute differences" heuristic.  Other
-    * heuristics are the "weighted minimum sum of absolute differences"
-    * (experimental and can in theory improve compression), and the "zlib
-    * predictive" method (not implemented yet), which does test compressions
-    * of lines using different filter methods, and then chooses the
-    * (series of) filter(s) that give minimum compressed data size (VERY
-    * computationally expensive).
-    *
-    * GRR 980525:  consider also
-    *
-    *   (1) minimum sum of absolute differences from running average (i.e.,
-    *       keep running sum of non-absolute differences & count of bytes)
-    *       [track dispersion, too?  restart average if dispersion too large?]
-    *
-    *  (1b) minimum sum of absolute differences from sliding average, probably
-    *       with window size <= deflate window (usually 32K)
-    *
-    *   (2) minimum sum of squared differences from zero or running average
-    *       (i.e., ~ root-mean-square approach)
-    */
-
-
-   /* We don't need to test the 'no filter' case if this is the only filter
-    * that has been chosen, as it doesn't actually do anything to the data.
-    */
-   if ((filter_to_do & PNG_FILTER_NONE) && filter_to_do != PNG_FILTER_NONE)
-   {
-      png_bytep rp;
-      png_uint_32 sum = 0;
-      png_size_t i;
-      int v;
-
-      for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++)
-      {
-         v = *rp;
-         sum += (v < 128) ? v : 256 - v;
-      }
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         png_uint_32 sumhi, sumlo;
-         int j;
-         sumlo = sum & PNG_LOMASK;
-         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; /* Gives us some footroom */
-
-         /* Reduce the sum if we match any of the previous rows */
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
-            {
-               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         /* Factor in the cost of this filter (this is here for completeness,
-          * but it makes no sense to have a "cost" for the NONE filter, as
-          * it has the minimum possible computational cost - none).
-          */
-         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
-             PNG_COST_SHIFT;
-
-         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
-             PNG_COST_SHIFT;
-
-         if (sumhi > PNG_HIMASK)
-            sum = PNG_MAXSUM;
-
-         else
-            sum = (sumhi << PNG_HISHIFT) + sumlo;
-      }
-#endif
-      mins = sum;
-   }
-
-   /* Sub filter */
-   if (filter_to_do == PNG_FILTER_SUB)
-   /* It's the only filter so no testing is needed */
-   {
-      png_bytep rp, lp, dp;
-      png_size_t i;
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
-           i++, rp++, dp++)
-      {
-         *dp = *rp;
-      }
-
-      for (lp = row_buf + 1; i < row_bytes;
-         i++, rp++, lp++, dp++)
-      {
-         *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
-      }
-
-      best_row = png_ptr->sub_row;
-   }
-
-   else if (filter_to_do & PNG_FILTER_SUB)
-   {
-      png_bytep rp, dp, lp;
-      png_uint_32 sum = 0, lmins = mins;
-      png_size_t i;
-      int v;
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      /* We temporarily increase the "minimum sum" by the factor we
-       * would reduce the sum of this filter, so that we can do the
-       * early exit comparison without scaling the sum each time.
-       */
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 lmhi, lmlo;
-         lmlo = lmins & PNG_LOMASK;
-         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
-            {
-               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
-             PNG_COST_SHIFT;
-
-         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
-             PNG_COST_SHIFT;
-
-         if (lmhi > PNG_HIMASK)
-            lmins = PNG_MAXSUM;
-
-         else
-            lmins = (lmhi << PNG_HISHIFT) + lmlo;
-      }
-#endif
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
-           i++, rp++, dp++)
-      {
-         v = *dp = *rp;
-
-         sum += (v < 128) ? v : 256 - v;
-      }
-
-      for (lp = row_buf + 1; i < row_bytes;
-         i++, rp++, lp++, dp++)
-      {
-         v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-
-         if (sum > lmins)  /* We are already worse, don't continue. */
-            break;
-      }
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 sumhi, sumlo;
-         sumlo = sum & PNG_LOMASK;
-         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
-            {
-               sumlo = (sumlo * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               sumhi = (sumhi * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         sumlo = (sumlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
-             PNG_COST_SHIFT;
-
-         sumhi = (sumhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
-             PNG_COST_SHIFT;
-
-         if (sumhi > PNG_HIMASK)
-            sum = PNG_MAXSUM;
-
-         else
-            sum = (sumhi << PNG_HISHIFT) + sumlo;
-      }
-#endif
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->sub_row;
-      }
-   }
-
-   /* Up filter */
-   if (filter_to_do == PNG_FILTER_UP)
-   {
-      png_bytep rp, dp, pp;
-      png_size_t i;
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
-          pp = prev_row + 1; i < row_bytes;
-          i++, rp++, pp++, dp++)
-      {
-         *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
-      }
-
-      best_row = png_ptr->up_row;
-   }
-
-   else if (filter_to_do & PNG_FILTER_UP)
-   {
-      png_bytep rp, dp, pp;
-      png_uint_32 sum = 0, lmins = mins;
-      png_size_t i;
-      int v;
-
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 lmhi, lmlo;
-         lmlo = lmins & PNG_LOMASK;
-         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
-            {
-               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
-             PNG_COST_SHIFT;
-
-         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
-             PNG_COST_SHIFT;
-
-         if (lmhi > PNG_HIMASK)
-            lmins = PNG_MAXSUM;
-
-         else
-            lmins = (lmhi << PNG_HISHIFT) + lmlo;
-      }
-#endif
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
-          pp = prev_row + 1; i < row_bytes; i++)
-      {
-         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-
-         if (sum > lmins)  /* We are already worse, don't continue. */
-            break;
-      }
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 sumhi, sumlo;
-         sumlo = sum & PNG_LOMASK;
-         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
-            {
-               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
-             PNG_COST_SHIFT;
-
-         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
-             PNG_COST_SHIFT;
-
-         if (sumhi > PNG_HIMASK)
-            sum = PNG_MAXSUM;
-
-         else
-            sum = (sumhi << PNG_HISHIFT) + sumlo;
-      }
-#endif
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->up_row;
-      }
-   }
-
-   /* Avg filter */
-   if (filter_to_do == PNG_FILTER_AVG)
-   {
-      png_bytep rp, dp, pp, lp;
-      png_uint_32 i;
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
-           pp = prev_row + 1; i < bpp; i++)
-      {
-         *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
-      }
-
-      for (lp = row_buf + 1; i < row_bytes; i++)
-      {
-         *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
-                 & 0xff);
-      }
-      best_row = png_ptr->avg_row;
-   }
-
-   else if (filter_to_do & PNG_FILTER_AVG)
-   {
-      png_bytep rp, dp, pp, lp;
-      png_uint_32 sum = 0, lmins = mins;
-      png_size_t i;
-      int v;
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 lmhi, lmlo;
-         lmlo = lmins & PNG_LOMASK;
-         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_AVG)
-            {
-               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
-             PNG_COST_SHIFT;
-
-         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
-             PNG_COST_SHIFT;
-
-         if (lmhi > PNG_HIMASK)
-            lmins = PNG_MAXSUM;
-
-         else
-            lmins = (lmhi << PNG_HISHIFT) + lmlo;
-      }
-#endif
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
-           pp = prev_row + 1; i < bpp; i++)
-      {
-         v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-      }
-
-      for (lp = row_buf + 1; i < row_bytes; i++)
-      {
-         v = *dp++ =
-             (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-
-         if (sum > lmins)  /* We are already worse, don't continue. */
-            break;
-      }
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 sumhi, sumlo;
-         sumlo = sum & PNG_LOMASK;
-         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
-            {
-               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
-             PNG_COST_SHIFT;
-
-         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
-             PNG_COST_SHIFT;
-
-         if (sumhi > PNG_HIMASK)
-            sum = PNG_MAXSUM;
-
-         else
-            sum = (sumhi << PNG_HISHIFT) + sumlo;
-      }
-#endif
-
-      if (sum < mins)
-      {
-         mins = sum;
-         best_row = png_ptr->avg_row;
-      }
-   }
-
-   /* Paeth filter */
-   if (filter_to_do == PNG_FILTER_PAETH)
-   {
-      png_bytep rp, dp, pp, cp, lp;
-      png_size_t i;
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
-          pp = prev_row + 1; i < bpp; i++)
-      {
-         *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
-      }
-
-      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
-      {
-         int a, b, c, pa, pb, pc, p;
-
-         b = *pp++;
-         c = *cp++;
-         a = *lp++;
-
-         p = b - c;
-         pc = a - c;
-
-#ifdef PNG_USE_ABS
-         pa = abs(p);
-         pb = abs(pc);
-         pc = abs(p + pc);
-#else
-         pa = p < 0 ? -p : p;
-         pb = pc < 0 ? -pc : pc;
-         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-
-         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
-
-         *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
-      }
-      best_row = png_ptr->paeth_row;
-   }
-
-   else if (filter_to_do & PNG_FILTER_PAETH)
-   {
-      png_bytep rp, dp, pp, cp, lp;
-      png_uint_32 sum = 0, lmins = mins;
-      png_size_t i;
-      int v;
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 lmhi, lmlo;
-         lmlo = lmins & PNG_LOMASK;
-         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
-            {
-               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
-             PNG_COST_SHIFT;
-
-         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
-             PNG_COST_SHIFT;
-
-         if (lmhi > PNG_HIMASK)
-            lmins = PNG_MAXSUM;
-
-         else
-            lmins = (lmhi << PNG_HISHIFT) + lmlo;
-      }
-#endif
-
-      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
-          pp = prev_row + 1; i < bpp; i++)
-      {
-         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-      }
-
-      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
-      {
-         int a, b, c, pa, pb, pc, p;
-
-         b = *pp++;
-         c = *cp++;
-         a = *lp++;
-
-#ifndef PNG_SLOW_PAETH
-         p = b - c;
-         pc = a - c;
-#ifdef PNG_USE_ABS
-         pa = abs(p);
-         pb = abs(pc);
-         pc = abs(p + pc);
-#else
-         pa = p < 0 ? -p : p;
-         pb = pc < 0 ? -pc : pc;
-         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
-#endif
-         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
-#else /* PNG_SLOW_PAETH */
-         p = a + b - c;
-         pa = abs(p - a);
-         pb = abs(p - b);
-         pc = abs(p - c);
-
-         if (pa <= pb && pa <= pc)
-            p = a;
-
-         else if (pb <= pc)
-            p = b;
-
-         else
-            p = c;
-#endif /* PNG_SLOW_PAETH */
-
-         v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
-
-         sum += (v < 128) ? v : 256 - v;
-
-         if (sum > lmins)  /* We are already worse, don't continue. */
-            break;
-      }
-
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
-      {
-         int j;
-         png_uint_32 sumhi, sumlo;
-         sumlo = sum & PNG_LOMASK;
-         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
-
-         for (j = 0; j < num_p_filters; j++)
-         {
-            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
-            {
-               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-
-               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
-                   PNG_WEIGHT_SHIFT;
-            }
-         }
-
-         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
-             PNG_COST_SHIFT;
-
-         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
-             PNG_COST_SHIFT;
-
-         if (sumhi > PNG_HIMASK)
-            sum = PNG_MAXSUM;
-
-         else
-            sum = (sumhi << PNG_HISHIFT) + sumlo;
-      }
-#endif
-
-      if (sum < mins)
-      {
-         best_row = png_ptr->paeth_row;
-      }
-   }
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-
-   /* Do the actual writing of the filtered row data from the chosen filter. */
-   png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1);
-
-#ifdef PNG_WRITE_FILTER_SUPPORTED
-#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
-   /* Save the type of filter we picked this time for future calculations */
-   if (png_ptr->num_prev_filters > 0)
-   {
-      int j;
-
-      for (j = 1; j < num_p_filters; j++)
-      {
-         png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1];
-      }
-
-      png_ptr->prev_filters[j] = best_row[0];
-   }
-#endif
-#endif /* PNG_WRITE_FILTER_SUPPORTED */
-}
-
-
-/* Do the actual writing of a previously filtered row. */
-static void
-png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row,
-   png_size_t avail/*includes filter byte*/)
-{
-   png_debug(1, "in png_write_filtered_row");
-
-   png_debug1(2, "filter = %d", filtered_row[0]);
-   /* Set up the zlib input buffer */
-
-   png_ptr->zstream.next_in = filtered_row;
-   png_ptr->zstream.avail_in = 0;
-   /* Repeat until we have compressed all the data */
-   do
-   {
-      int ret; /* Return of zlib */
-
-      /* Record the number of bytes available - zlib supports at least 65535
-       * bytes at one step, depending on the size of the zlib type 'uInt', the
-       * maximum size zlib can write at once is ZLIB_IO_MAX (from pngpriv.h).
-       * Use this because on 16 bit systems 'rowbytes' can be up to 65536 (i.e.
-       * one more than 16 bits) and, in this case 'rowbytes+1' can overflow a
-       * uInt.  ZLIB_IO_MAX can be safely reduced to cause zlib to be called
-       * with smaller chunks of data.
-       */
-      if (png_ptr->zstream.avail_in == 0)
-      {
-         if (avail > ZLIB_IO_MAX)
-         {
-            png_ptr->zstream.avail_in  = ZLIB_IO_MAX;
-            avail -= ZLIB_IO_MAX;
-         }
-
-         else
-         {
-            /* So this will fit in the available uInt space: */
-            png_ptr->zstream.avail_in = (uInt)avail;
-            avail = 0;
-         }
-      }
-
-      /* Compress the data */
-      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
-
-      /* Check for compression errors */
-      if (ret != Z_OK)
-      {
-         if (png_ptr->zstream.msg != NULL)
-            png_error(png_ptr, png_ptr->zstream.msg);
-
-         else
-            png_error(png_ptr, "zlib error");
-      }
-
-      /* See if it is time to write another IDAT */
-      if (!(png_ptr->zstream.avail_out))
-      {
-         /* Write the IDAT and reset the zlib output buffer */
-         png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
-      }
-   /* Repeat until all data has been compressed */
-   } while (avail > 0 || png_ptr->zstream.avail_in > 0);
-
-   /* Swap the current and previous rows */
-   if (png_ptr->prev_row != NULL)
-   {
-      png_bytep tptr;
-
-      tptr = png_ptr->prev_row;
-      png_ptr->prev_row = png_ptr->row_buf;
-      png_ptr->row_buf = tptr;
-   }
-
-   /* Finish row - updates counters and flushes zlib if last row */
-   png_write_finish_row(png_ptr);
-
-#ifdef PNG_WRITE_FLUSH_SUPPORTED
-   png_ptr->flush_rows++;
-
-   if (png_ptr->flush_dist > 0 &&
-       png_ptr->flush_rows >= png_ptr->flush_dist)
-   {
-      png_write_flush(png_ptr);
-   }
-#endif
-}
-#endif /* PNG_WRITE_SUPPORTED */

From a74e4367b07f24b70efb9b044222b87d9fe1f6eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 15 Feb 2024 17:11:24 +0000
Subject: [PATCH 287/314] Upgrade zlib to 1.3.1

---
 niftyreg_build_version.txt |    2 +-
 reg-io/zlib/CMakeLists.txt |    2 +-
 reg-io/zlib/FAQ            |  339 --
 reg-io/zlib/README         |  100 +-
 reg-io/zlib/adler32.c      |  103 +-
 reg-io/zlib/compress.c     |   66 +-
 reg-io/zlib/crc32.c        | 1252 +++--
 reg-io/zlib/crc32.h        | 9877 ++++++++++++++++++++++++++++++++++--
 reg-io/zlib/deflate.c      | 1901 ++++---
 reg-io/zlib/deflate.h      |  467 +-
 reg-io/zlib/gzclose.c      |   23 +
 reg-io/zlib/gzguts.h       |  214 +
 reg-io/zlib/gzio.c         | 1026 ----
 reg-io/zlib/gzlib.c        |  582 +++
 reg-io/zlib/gzread.c       |  602 +++
 reg-io/zlib/gzwrite.c      |  631 +++
 reg-io/zlib/infback.c      |  155 +-
 reg-io/zlib/inffast.c      |  162 +-
 reg-io/zlib/inffast.h      |    4 +-
 reg-io/zlib/inffixed.h     |  184 +-
 reg-io/zlib/inflate.c      |  604 ++-
 reg-io/zlib/inflate.h      |  171 +-
 reg-io/zlib/inftrees.c     |  118 +-
 reg-io/zlib/inftrees.h     |   49 +-
 reg-io/zlib/minigzip.c     |  322 --
 reg-io/zlib/trees.c        |  786 ++-
 reg-io/zlib/trees.h        |  232 +-
 reg-io/zlib/uncompr.c      |  100 +-
 reg-io/zlib/zconf.h        |  369 +-
 reg-io/zlib/zlib.h         | 3136 +++++++-----
 reg-io/zlib/zutil.c        |  151 +-
 reg-io/zlib/zutil.h        |  223 +-
 32 files changed, 17605 insertions(+), 6348 deletions(-)
 delete mode 100644 reg-io/zlib/FAQ
 create mode 100644 reg-io/zlib/gzclose.c
 create mode 100644 reg-io/zlib/gzguts.h
 delete mode 100644 reg-io/zlib/gzio.c
 create mode 100644 reg-io/zlib/gzlib.c
 create mode 100644 reg-io/zlib/gzread.c
 create mode 100644 reg-io/zlib/gzwrite.c
 delete mode 100644 reg-io/zlib/minigzip.c

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ec8785ec..c8f0fcc6 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-405
+406
diff --git a/reg-io/zlib/CMakeLists.txt b/reg-io/zlib/CMakeLists.txt
index ef827947..79533400 100644
--- a/reg-io/zlib/CMakeLists.txt
+++ b/reg-io/zlib/CMakeLists.txt
@@ -1,6 +1,6 @@
 #-----------------------------------------------------------------------------
 if(NOT ZLIB_FOUND)
-    add_library(z adler32.c compress.c crc32.c gzio.c uncompr.c deflate.c trees.c zutil.c inflate.c infback.c inftrees.c inffast.c)
+    add_library(z adler32.c compress.c crc32.c deflate.c gzclose.c gzlib.c gzread.c gzwrite.c infback.c inffast.c inflate.c inftrees.c trees.c uncompr.c zutil.c)
     install(TARGETS z
             RUNTIME DESTINATION bin COMPONENT Development
             LIBRARY DESTINATION lib COMPONENT Development
diff --git a/reg-io/zlib/FAQ b/reg-io/zlib/FAQ
deleted file mode 100644
index 441d910d..00000000
--- a/reg-io/zlib/FAQ
+++ /dev/null
@@ -1,339 +0,0 @@
-
-                Frequently Asked Questions about zlib
-
-
-If your question is not there, please check the zlib home page
-http://www.zlib.org which may have more recent information.
-The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html
-
-
- 1. Is zlib Y2K-compliant?
-
-    Yes. zlib doesn't handle dates.
-
- 2. Where can I get a Windows DLL version?
-
-    The zlib sources can be compiled without change to produce a DLL.
-    See the file win32/DLL_FAQ.txt in the zlib distribution.
-    Pointers to the precompiled DLL are found in the zlib web site at
-    http://www.zlib.org.
-
- 3. Where can I get a Visual Basic interface to zlib?
-
-    See
-        * http://www.dogma.net/markn/articles/zlibtool/zlibtool.htm
-        * contrib/visual-basic.txt in the zlib distribution
-        * win32/DLL_FAQ.txt in the zlib distribution
-
- 4. compress() returns Z_BUF_ERROR.
-
-    Make sure that before the call of compress, the length of the compressed
-    buffer is equal to the total size of the compressed buffer and not
-    zero. For Visual Basic, check that this parameter is passed by reference
-    ("as any"), not by value ("as long").
-
- 5. deflate() or inflate() returns Z_BUF_ERROR.
-
-    Before making the call, make sure that avail_in and avail_out are not
-    zero. When setting the parameter flush equal to Z_FINISH, also make sure
-    that avail_out is big enough to allow processing all pending input.
-    Note that a Z_BUF_ERROR is not fatal--another call to deflate() or
-    inflate() can be made with more input or output space. A Z_BUF_ERROR
-    may in fact be unavoidable depending on how the functions are used, since
-    it is not possible to tell whether or not there is more output pending
-    when strm.avail_out returns with zero.
-
- 6. Where's the zlib documentation (man pages, etc.)?
-
-    It's in zlib.h for the moment, and Francis S. Lin has converted it to a
-    web page zlib.html. Volunteers to transform this to Unix-style man pages,
-    please contact us (zlib@gzip.org). Examples of zlib usage are in the files
-    example.c and minigzip.c.
-
- 7. Why don't you use GNU autoconf or libtool or ...?
-
-    Because we would like to keep zlib as a very small and simple
-    package. zlib is rather portable and doesn't need much configuration.
-
- 8. I found a bug in zlib.
-
-    Most of the time, such problems are due to an incorrect usage of
-    zlib. Please try to reproduce the problem with a small program and send
-    the corresponding source to us at zlib@gzip.org . Do not send
-    multi-megabyte data files without prior agreement.
-
- 9. Why do I get "undefined reference to gzputc"?
-
-    If "make test" produces something like
-
-       example.o(.text+0x154): undefined reference to `gzputc'
-
-    check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
-    /usr/X11R6/lib. Remove any old versions, then do "make install".
-
-10. I need a Delphi interface to zlib.
-
-    See the contrib/delphi directory in the zlib distribution.
-
-11. Can zlib handle .zip archives?
-
-    Not by itself, no.  See the directory contrib/minizip in the zlib
-    distribution.
-
-12. Can zlib handle .Z files?
-
-    No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt
-    the code of uncompress on your own.
-
-13. How can I make a Unix shared library?
-
-    make clean
-    ./configure -s
-    make
-
-14. How do I install a shared zlib library on Unix?
-
-    After the above, then:
-
-    make install
-
-    However, many flavors of Unix come with a shared zlib already installed.
-    Before going to the trouble of compiling a shared version of zlib and
-    trying to install it, you may want to check if it's already there! If you
-    can #include <zlib.h>, it's there. The -lz option will probably link to it.
-
-15. I have a question about OttoPDF.
-
-    We are not the authors of OttoPDF. The real author is on the OttoPDF web
-    site: Joel Hainley, jhainley@myndkryme.com.
-
-16. Can zlib decode Flate data in an Adobe PDF file?
-
-    Yes. See http://www.fastio.com/ (ClibPDF), or http://www.pdflib.com/ .
-    To modify PDF forms, see http://sourceforge.net/projects/acroformtool/ .
-
-17. Why am I getting this "register_frame_info not found" error on Solaris?
-
-    After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
-    generates an error such as:
-
-        ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
-        symbol __register_frame_info: referenced symbol not found
-
-    The symbol __register_frame_info is not part of zlib, it is generated by
-    the C compiler (cc or gcc). You must recompile applications using zlib
-    which have this problem. This problem is specific to Solaris. See
-    http://www.sunfreeware.com for Solaris versions of zlib and applications
-    using zlib.
-
-18. Why does gzip give an error on a file I make with compress/deflate?
-
-    The compress and deflate functions produce data in the zlib format, which
-    is different and incompatible with the gzip format. The gz* functions in
-    zlib on the other hand use the gzip format. Both the zlib and gzip
-    formats use the same compressed data format internally, but have different
-    headers and trailers around the compressed data.
-
-19. Ok, so why are there two different formats?
-
-    The gzip format was designed to retain the directory information about
-    a single file, such as the name and last modification date. The zlib
-    format on the other hand was designed for in-memory and communication
-    channel applications, and has a much more compact header and trailer and
-    uses a faster integrity check than gzip.
-
-20. Well that's nice, but how do I make a gzip file in memory?
-
-    You can request that deflate write the gzip format instead of the zlib
-    format using deflateInit2(). You can also request that inflate decode
-    the gzip format using inflateInit2(). Read zlib.h for more details.
-
-21. Is zlib thread-safe?
-
-    Yes. However any library routines that zlib uses and any application-
-    provided memory allocation routines must also be thread-safe. zlib's gz*
-    functions use stdio library routines, and most of zlib's functions use the
-    library memory allocation routines by default. zlib's Init functions allow
-    for the application to provide custom memory allocation routines.
-
-    Of course, you should only operate on any given zlib or gzip stream from a
-    single thread at a time.
-
-22. Can I use zlib in my commercial application?
-
-    Yes. Please read the license in zlib.h.
-
-23. Is zlib under the GNU license?
-
-    No. Please read the license in zlib.h.
-
-24. The license says that altered source versions must be "plainly marked". So
-    what exactly do I need to do to meet that requirement?
-
-    You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In
-    particular, the final version number needs to be changed to "f", and an
-    identification string should be appended to ZLIB_VERSION. Version numbers
-    x.x.x.f are reserved for modifications to zlib by others than the zlib
-    maintainers. For example, if the version of the base zlib you are altering
-    is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
-    ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also
-    update the version strings in deflate.c and inftrees.c.
-
-    For altered source distributions, you should also note the origin and
-    nature of the changes in zlib.h, as well as in ChangeLog and README, along
-    with the dates of the alterations. The origin should include at least your
-    name (or your company's name), and an email address to contact for help or
-    issues with the library.
-
-    Note that distributing a compiled zlib library along with zlib.h and
-    zconf.h is also a source distribution, and so you should change
-    ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
-    in zlib.h as you would for a full source distribution.
-
-25. Will zlib work on a big-endian or little-endian architecture, and can I
-    exchange compressed data between them?
-
-    Yes and yes.
-
-26. Will zlib work on a 64-bit machine?
-
-    It should. It has been tested on 64-bit machines, and has no dependence
-    on any data types being limited to 32-bits in length. If you have any
-    difficulties, please provide a complete problem report to zlib@gzip.org
-
-27. Will zlib decompress data from the PKWare Data Compression Library?
-
-    No. The PKWare DCL uses a completely different compressed data format
-    than does PKZIP and zlib. However, you can look in zlib's contrib/blast
-    directory for a possible solution to your problem.
-
-28. Can I access data randomly in a compressed stream?
-
-    No, not without some preparation. If when compressing you periodically
-    use Z_FULL_FLUSH, carefully write all the pending data at those points,
-    and keep an index of those locations, then you can start decompression
-    at those points. You have to be careful to not use Z_FULL_FLUSH too
-    often, since it can significantly degrade compression.
-
-29. Does zlib work on MVS, OS/390, CICS, etc.?
-
-    We don't know for sure. We have heard occasional reports of success on
-    these systems. If you do use it on one of these, please provide us with
-    a report, instructions, and patches that we can reference when we get
-    these questions. Thanks.
-
-30. Is there some simpler, easier to read version of inflate I can look at
-    to understand the deflate format?
-
-    First off, you should read RFC 1951. Second, yes. Look in zlib's
-    contrib/puff directory.
-
-31. Does zlib infringe on any patents?
-
-    As far as we know, no. In fact, that was originally the whole point behind
-    zlib. Look here for some more information:
-
-    http://www.gzip.org/#faq11
-
-32. Can zlib work with greater than 4 GB of data?
-
-    Yes. inflate() and deflate() will process any amount of data correctly.
-    Each call of inflate() or deflate() is limited to input and output chunks
-    of the maximum value that can be stored in the compiler's "unsigned int"
-    type, but there is no limit to the number of chunks. Note however that the
-    strm.total_in and strm_total_out counters may be limited to 4 GB. These
-    counters are provided as a convenience and are not used internally by
-    inflate() or deflate(). The application can easily set up its own counters
-    updated after each call of inflate() or deflate() to count beyond 4 GB.
-    compress() and uncompress() may be limited to 4 GB, since they operate in a
-    single call. gzseek() and gztell() may be limited to 4 GB depending on how
-    zlib is compiled. See the zlibCompileFlags() function in zlib.h.
-
-    The word "may" appears several times above since there is a 4 GB limit
-    only if the compiler's "long" type is 32 bits. If the compiler's "long"
-    type is 64 bits, then the limit is 16 exabytes.
-
-33. Does zlib have any security vulnerabilities?
-
-    The only one that we are aware of is potentially in gzprintf(). If zlib
-    is compiled to use sprintf() or vsprintf(), then there is no protection
-    against a buffer overflow of a 4K string space, other than the caller of
-    gzprintf() assuring that the output will not exceed 4K. On the other
-    hand, if zlib is compiled to use snprintf() or vsnprintf(), which should
-    normally be the case, then there is no vulnerability. The ./configure
-    script will display warnings if an insecure variation of sprintf() will
-    be used by gzprintf(). Also the zlibCompileFlags() function will return
-    information on what variant of sprintf() is used by gzprintf().
-
-    If you don't have snprintf() or vsnprintf() and would like one, you can
-    find a portable implementation here:
-
-        http://www.ijs.si/software/snprintf/
-
-    Note that you should be using the most recent version of zlib. Versions
-    1.1.3 and before were subject to a double-free vulnerability.
-
-34. Is there a Java version of zlib?
-
-    Probably what you want is to use zlib in Java. zlib is already included
-    as part of the Java SDK in the java.util.zip package. If you really want
-    a version of zlib written in the Java language, look on the zlib home
-    page for links: http://www.zlib.org/
-
-35. I get this or that compiler or source-code scanner warning when I crank it
-    up to maximally-pedantic. Can't you guys write proper code?
-
-    Many years ago, we gave up attempting to avoid warnings on every compiler
-    in the universe. It just got to be a waste of time, and some compilers
-    were downright silly. So now, we simply make sure that the code always
-    works.
-
-36. Valgrind (or some similar memory access checker) says that deflate is
-    performing a conditional jump that depends on an uninitialized value.
-    Isn't that a bug?
-
-    No.  That is intentional for performance reasons, and the output of
-    deflate is not affected.  This only started showing up recently since
-    zlib 1.2.x uses malloc() by default for allocations, whereas earlier
-    versions used calloc(), which zeros out the allocated memory.
-
-37. Will zlib read the (insert any ancient or arcane format here) compressed
-    data format?
-
-    Probably not. Look in the comp.compression FAQ for pointers to various
-    formats and associated software.
-
-38. How can I encrypt/decrypt zip files with zlib?
-
-    zlib doesn't support encryption. The original PKZIP encryption is very weak
-    and can be broken with freely available programs. To get strong encryption,
-    use GnuPG, http://www.gnupg.org/ , which already includes zlib compression.
-    For PKZIP compatible "encryption", look at http://www.info-zip.org/
-
-39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
-
-    "gzip" is the gzip format, and "deflate" is the zlib format. They should
-    probably have called the second one "zlib" instead to avoid confusion
-    with the raw deflate compressed data format. While the HTTP 1.1 RFC 2616
-    correctly points to the zlib specification in RFC 1950 for the "deflate"
-    transfer encoding, there have been reports of servers and browsers that
-    incorrectly produce or expect raw deflate data per the deflate
-    specficiation in RFC 1951, most notably Microsoft. So even though the
-    "deflate" transfer encoding using the zlib format would be the more
-    efficient approach (and in fact exactly what the zlib format was designed
-    for), using the "gzip" transfer encoding is probably more reliable due to
-    an unfortunate choice of name on the part of the HTTP 1.1 authors.
-
-    Bottom line: use the gzip format for HTTP 1.1 encoding.
-
-40. Does zlib support the new "Deflate64" format introduced by PKWare?
-
-    No. PKWare has apparently decided to keep that format proprietary, since
-    they have not documented it as they have previous compression formats.
-    In any case, the compression improvements are so modest compared to other
-    more modern approaches, that it's not worth the effort to implement.
-
-41. Can you please sign these lengthy legal documents and fax them back to us
-    so that we can use your software in our product?
-
-    No. Go away. Shoo.
diff --git a/reg-io/zlib/README b/reg-io/zlib/README
index 758cc500..c5f91754 100644
--- a/reg-io/zlib/README
+++ b/reg-io/zlib/README
@@ -1,56 +1,51 @@
 ZLIB DATA COMPRESSION LIBRARY
 
-zlib 1.2.3 is a general purpose data compression library.  All the code is
+zlib 1.3.1 is a general purpose data compression library.  All the code is
 thread safe.  The data format used by the zlib library is described by RFCs
 (Request for Comments) 1950 to 1952 in the files
-http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format)
-and rfc1952.txt (gzip format). These documents are also available in other
-formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html
+http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
+rfc1952 (gzip format).
 
 All functions of the compression library are documented in the file zlib.h
-(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
-of the library is given in the file example.c which also tests that the library
-is working correctly. Another example is given in the file minigzip.c. The
-compression library itself is composed of all source files except example.c and
-minigzip.c.
+(volunteer to write man pages welcome, contact zlib@gzip.org).  A usage example
+of the library is given in the file test/example.c which also tests that
+the library is working correctly.  Another example is given in the file
+test/minigzip.c.  The compression library itself is composed of all source
+files in the root directory.
 
 To compile all files and run the test program, follow the instructions given at
-the top of Makefile. In short "make test; make install" should work for most
-machines. For Unix: "./configure; make test; make install". For MSDOS, use one
-of the special makefiles such as Makefile.msc. For VMS, use make_vms.com.
+the top of Makefile.in.  In short "./configure; make test", and if that goes
+well, "make install" should work for most flavors of Unix.  For Windows, use
+one of the special makefiles in win32/ or contrib/vstudio/ .  For VMS, use
+make_vms.com.
 
 Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
-<info@winimage.com> for the Windows DLL version. The zlib home page is
-http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem,
-please check this site to verify that you have the latest version of zlib;
-otherwise get the latest version and check whether the problem still exists or
-not.
+<info@winimage.com> for the Windows DLL version.  The zlib home page is
+http://zlib.net/ .  Before reporting a problem, please check this site to
+verify that you have the latest version of zlib; otherwise get the latest
+version and check whether the problem still exists or not.
 
-PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking
-for help.
+PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
 
-Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
-issue of  Dr. Dobb's Journal; a copy of the article is available in
-http://dogma.net/markn/articles/zlibtool/zlibtool.htm
+Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
+issue of Dr.  Dobb's Journal; a copy of the article is available at
+https://marknelson.us/posts/1997/01/01/zlib-engine.html .
 
-The changes made in version 1.2.3 are documented in the file ChangeLog.
+The changes made in version 1.3.1 are documented in the file ChangeLog.
 
-Unsupported third party contributions are provided in directory "contrib".
+Unsupported third party contributions are provided in directory contrib/ .
 
-A Java implementation of zlib is available in the Java Development Kit
-http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html
-See the zlib home page http://www.zlib.org for details.
+zlib is available in Java using the java.util.zip package. Follow the API
+Documentation link at: https://docs.oracle.com/search/?q=java.util.zip .
 
-A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is in the
-CPAN (Comprehensive Perl Archive Network) sites
-http://www.cpan.org/modules/by-module/Compress/
+A Perl interface to zlib and bzip2 written by Paul Marquess <pmqs@cpan.org>
+can be found at https://github.com/pmqs/IO-Compress .
 
 A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
 available in Python 1.5 and later versions, see
-http://www.python.org/doc/lib/module-zlib.html
+http://docs.python.org/library/zlib.html .
 
-A zlib binding for TCL written by Andreas Kupries <a.kupries@westend.com> is
-availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html
+zlib is built into tcl: http://wiki.tcl.tk/4610 .
 
 An experimental package to read and write files in .zip format, written on top
 of zlib by Gilles Vollant <info@winimage.com>, is available in the
@@ -68,31 +63,27 @@ Notes for some targets:
 - zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
   when compiled with cc.
 
-- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
+- On Digital Unix 4.0D (formerly OSF/1) on AlphaServer, the cc option -std1 is
   necessary to get gzprintf working correctly. This is done by configure.
 
 - zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
   other compilers. Use "make test" to check your compiler.
 
-- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers.
+- gzdopen is not supported on RISCOS or BEOS.
 
 - For PalmOs, see http://palmzlib.sourceforge.net/
 
-- When building a shared, i.e. dynamic library on Mac OS X, the library must be
-  installed before testing (do "make install" before "make test"), since the
-  library location is specified in the library.
-
 
 Acknowledgments:
 
-  The deflate format used by zlib was defined by Phil Katz. The deflate
-  and zlib specifications were written by L. Peter Deutsch. Thanks to all the
-  people who reported problems and suggested various improvements in zlib;
-  they are too numerous to cite here.
+  The deflate format used by zlib was defined by Phil Katz.  The deflate and
+  zlib specifications were written by L.  Peter Deutsch.  Thanks to all the
+  people who reported problems and suggested various improvements in zlib; they
+  are too numerous to cite here.
 
 Copyright notice:
 
- (C) 1995-2004 Jean-loup Gailly and Mark Adler
+ (C) 1995-2024 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -113,13 +104,14 @@ Copyright notice:
   Jean-loup Gailly        Mark Adler
   jloup@gzip.org          madler@alumni.caltech.edu
 
-If you use the zlib library in a product, we would appreciate *not*
-receiving lengthy legal documents to sign. The sources are provided
-for free but without warranty of any kind.  The library has been
-entirely written by Jean-loup Gailly and Mark Adler; it does not
-include third-party code.
-
-If you redistribute modified sources, we would appreciate that you include
-in the file ChangeLog history information documenting your changes. Please
-read the FAQ for more information on the distribution of modified source
-versions.
+If you use the zlib library in a product, we would appreciate *not* receiving
+lengthy legal documents to sign.  The sources are provided for free but without
+warranty of any kind.  The library has been entirely written by Jean-loup
+Gailly and Mark Adler; it does not include third-party code.  We make all
+contributions to and distributions of this project solely in our personal
+capacity, and are not conveying any rights to any intellectual property of
+any third parties.
+
+If you redistribute modified sources, we would appreciate that you include in
+the file ChangeLog history information documenting your changes.  Please read
+the FAQ for more information on the distribution of modified source versions.
diff --git a/reg-io/zlib/adler32.c b/reg-io/zlib/adler32.c
index 007ba262..04b81d29 100644
--- a/reg-io/zlib/adler32.c
+++ b/reg-io/zlib/adler32.c
@@ -1,14 +1,13 @@
 /* adler32.c -- compute the Adler-32 checksum of a data stream
- * Copyright (C) 1995-2004 Mark Adler
+ * Copyright (C) 1995-2011, 2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
-#define ZLIB_INTERNAL
-#include "zlib.h"
+#include "zutil.h"
 
-#define BASE 65521UL    /* largest prime smaller than 65536 */
+#define BASE 65521U     /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
 
@@ -18,47 +17,48 @@
 #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
 #define DO16(buf)   DO8(buf,0); DO8(buf,8);
 
-/* use NO_DIVIDE if your processor does not do division in hardware */
+/* use NO_DIVIDE if your processor does not do division in hardware --
+   try it both ways to see which is faster */
 #ifdef NO_DIVIDE
-#  define MOD(a) \
+/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
+   (thank you to John Reiser for pointing this out) */
+#  define CHOP(a) \
+    do { \
+        unsigned long tmp = a >> 16; \
+        a &= 0xffffUL; \
+        a += (tmp << 4) - tmp; \
+    } while (0)
+#  define MOD28(a) \
     do { \
-        if (a >= (BASE << 16)) a -= (BASE << 16); \
-        if (a >= (BASE << 15)) a -= (BASE << 15); \
-        if (a >= (BASE << 14)) a -= (BASE << 14); \
-        if (a >= (BASE << 13)) a -= (BASE << 13); \
-        if (a >= (BASE << 12)) a -= (BASE << 12); \
-        if (a >= (BASE << 11)) a -= (BASE << 11); \
-        if (a >= (BASE << 10)) a -= (BASE << 10); \
-        if (a >= (BASE << 9)) a -= (BASE << 9); \
-        if (a >= (BASE << 8)) a -= (BASE << 8); \
-        if (a >= (BASE << 7)) a -= (BASE << 7); \
-        if (a >= (BASE << 6)) a -= (BASE << 6); \
-        if (a >= (BASE << 5)) a -= (BASE << 5); \
-        if (a >= (BASE << 4)) a -= (BASE << 4); \
-        if (a >= (BASE << 3)) a -= (BASE << 3); \
-        if (a >= (BASE << 2)) a -= (BASE << 2); \
-        if (a >= (BASE << 1)) a -= (BASE << 1); \
+        CHOP(a); \
         if (a >= BASE) a -= BASE; \
     } while (0)
-#  define MOD4(a) \
+#  define MOD(a) \
     do { \
-        if (a >= (BASE << 4)) a -= (BASE << 4); \
-        if (a >= (BASE << 3)) a -= (BASE << 3); \
-        if (a >= (BASE << 2)) a -= (BASE << 2); \
-        if (a >= (BASE << 1)) a -= (BASE << 1); \
+        CHOP(a); \
+        MOD28(a); \
+    } while (0)
+#  define MOD63(a) \
+    do { /* this assumes a is not negative */ \
+        z_off64_t tmp = a >> 32; \
+        a &= 0xffffffffL; \
+        a += (tmp << 8) - (tmp << 5) + tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
         if (a >= BASE) a -= BASE; \
     } while (0)
 #else
 #  define MOD(a) a %= BASE
-#  define MOD4(a) a %= BASE
+#  define MOD28(a) a %= BASE
+#  define MOD63(a) a %= BASE
 #endif
 
 /* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    uInt len;
-{
+uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) {
     unsigned long sum2;
     unsigned n;
 
@@ -89,7 +89,7 @@ uLong ZEXPORT adler32(adler, buf, len)
         }
         if (adler >= BASE)
             adler -= BASE;
-        MOD4(sum2);             /* only added so many BASE's */
+        MOD28(sum2);            /* only added so many BASE's */
         return adler | (sum2 << 16);
     }
 
@@ -125,25 +125,40 @@ uLong ZEXPORT adler32(adler, buf, len)
 }
 
 /* ========================================================================= */
-uLong ZEXPORT adler32_combine(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off_t len2;
-{
+uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) {
+    return adler32_z(adler, buf, len);
+}
+
+/* ========================================================================= */
+local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) {
     unsigned long sum1;
     unsigned long sum2;
     unsigned rem;
 
+    /* for negative len, return invalid adler32 as a clue for debugging */
+    if (len2 < 0)
+        return 0xffffffffUL;
+
     /* the derivation of this formula is left as an exercise for the reader */
-    rem = (unsigned)(len2 % BASE);
+    MOD63(len2);                /* assumes len2 >= 0 */
+    rem = (unsigned)len2;
     sum1 = adler1 & 0xffff;
     sum2 = rem * sum1;
     MOD(sum2);
     sum1 += (adler2 & 0xffff) + BASE - 1;
     sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
-    if (sum1 > BASE) sum1 -= BASE;
-    if (sum1 > BASE) sum1 -= BASE;
-    if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
-    if (sum2 > BASE) sum2 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
+    if (sum2 >= BASE) sum2 -= BASE;
     return sum1 | (sum2 << 16);
 }
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) {
+    return adler32_combine_(adler1, adler2, len2);
+}
+
+uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) {
+    return adler32_combine_(adler1, adler2, len2);
+}
diff --git a/reg-io/zlib/compress.c b/reg-io/zlib/compress.c
index df04f014..f43bacf7 100644
--- a/reg-io/zlib/compress.c
+++ b/reg-io/zlib/compress.c
@@ -1,5 +1,5 @@
 /* compress.c -- compress a memory buffer
- * Copyright (C) 1995-2003 Jean-loup Gailly.
+ * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -19,25 +19,15 @@
    memory, Z_BUF_ERROR if there was not enough room in the output buffer,
    Z_STREAM_ERROR if the level parameter is invalid.
 */
-int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-    int level;
-{
+int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                      uLong sourceLen, int level) {
     z_stream stream;
     int err;
+    const uInt max = (uInt)-1;
+    uLong left;
 
-    stream.next_in = (Bytef*)source;
-    stream.avail_in = (uInt)sourceLen;
-#ifdef MAXSEG_64K
-    /* Check for source > 64K on 16-bit machine: */
-    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
-#endif
-    stream.next_out = dest;
-    stream.avail_out = (uInt)*destLen;
-    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+    left = *destLen;
+    *destLen = 0;
 
     stream.zalloc = (alloc_func)0;
     stream.zfree = (free_func)0;
@@ -46,25 +36,32 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
     err = deflateInit(&stream, level);
     if (err != Z_OK) return err;
 
-    err = deflate(&stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        deflateEnd(&stream);
-        return err == Z_OK ? Z_BUF_ERROR : err;
-    }
-    *destLen = stream.total_out;
+    stream.next_out = dest;
+    stream.avail_out = 0;
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
 
-    err = deflateEnd(&stream);
-    return err;
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
+            sourceLen -= stream.avail_in;
+        }
+        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
+    } while (err == Z_OK);
+
+    *destLen = stream.total_out;
+    deflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK : err;
 }
 
 /* ===========================================================================
  */
-int ZEXPORT compress (dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                     uLong sourceLen) {
     return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
 }
 
@@ -72,8 +69,7 @@ int ZEXPORT compress (dest, destLen, source, sourceLen)
      If the default memLevel or windowBits for deflateInit() is changed, then
    this function needs to be updated.
  */
-uLong ZEXPORT compressBound (sourceLen)
-    uLong sourceLen;
-{
-    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11;
+uLong ZEXPORT compressBound(uLong sourceLen) {
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13;
 }
diff --git a/reg-io/zlib/crc32.c b/reg-io/zlib/crc32.c
index f658a9ef..6c38f5c0 100644
--- a/reg-io/zlib/crc32.c
+++ b/reg-io/zlib/crc32.c
@@ -1,12 +1,10 @@
 /* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-2005 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  *
- * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
- * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
- * tables for updating the shift register in one step with three exclusive-ors
- * instead of four steps with four exclusive-ors.  This results in about a
- * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ * This interleaved implementation of a CRC makes use of pipelined multiple
+ * arithmetic-logic units, commonly found in modern CPU cores. It is due to
+ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
  */
 
 /* @(#) $Id$ */
@@ -14,9 +12,12 @@
 /*
   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
   protection on the static variables used to control the first-use generation
-  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
   first call get_crc_table() to initialize the tables before allowing more than
   one thread to use crc32().
+
+  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
+  produced, so that this one source file can be compiled to an executable.
  */
 
 #ifdef MAKECRCH
@@ -26,398 +27,1023 @@
 #  endif /* !DYNAMIC_CRC_TABLE */
 #endif /* MAKECRCH */
 
-#include "zutil.h"      /* for STDC and FAR definitions */
+#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
+
+ /*
+  A CRC of a message is computed on N braids of words in the message, where
+  each word consists of W bytes (4 or 8). If N is 3, for example, then three
+  running sparse CRCs are calculated respectively on each braid, at these
+  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
+  This is done starting at a word boundary, and continues until as many blocks
+  of N * W bytes as are available have been processed. The results are combined
+  into a single CRC at the end. For this code, N must be in the range 1..6 and
+  W must be 4 or 8. The upper limit on N can be increased if desired by adding
+  more #if blocks, extending the patterns apparent in the code. In addition,
+  crc32.h would need to be regenerated, if the maximum N value is increased.
+
+  N and W are chosen empirically by benchmarking the execution time on a given
+  processor. The choices for N and W below were based on testing on Intel Kaby
+  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
+  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
+  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
+  They were all tested with either gcc or clang, all using the -O3 optimization
+  level. Your mileage may vary.
+ */
+
+/* Define N */
+#ifdef Z_TESTN
+#  define N Z_TESTN
+#else
+#  define N 5
+#endif
+#if N < 1 || N > 6
+#  error N must be in 1..6
+#endif
 
-#define local static
+/*
+  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
+  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
+  that bytes are eight bits.
+ */
 
-/* Find a four-byte integer type for crc32_little() and crc32_big(). */
-#ifndef NOBYFOUR
-#  ifdef STDC           /* need ANSI C limits.h to determine sizes */
-#    include <limits.h>
-#    define BYFOUR
-#    if (UINT_MAX == 0xffffffffUL)
-       typedef unsigned int u4;
+/*
+  Define W and the associated z_word_t type. If W is not defined, then a
+  braided calculation is not used, and the associated tables and code are not
+  compiled.
+ */
+#ifdef Z_TESTW
+#  if Z_TESTW-1 != -1
+#    define W Z_TESTW
+#  endif
+#else
+#  ifdef MAKECRCH
+#    define W 8         /* required for MAKECRCH */
+#  else
+#    if defined(__x86_64__) || defined(__aarch64__)
+#      define W 8
 #    else
-#      if (ULONG_MAX == 0xffffffffUL)
-         typedef unsigned long u4;
-#      else
-#        if (USHRT_MAX == 0xffffffffUL)
-           typedef unsigned short u4;
-#        else
-#          undef BYFOUR     /* can't find a four-byte integer type! */
-#        endif
-#      endif
+#      define W 4
 #    endif
-#  endif /* STDC */
-#endif /* !NOBYFOUR */
-
-/* Definitions for doing the crc four data bytes at a time. */
-#ifdef BYFOUR
-#  define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
-                (((w)&0xff00)<<8)+(((w)&0xff)<<24))
-   local unsigned long crc32_little OF((unsigned long,
-                        const unsigned char FAR *, unsigned));
-   local unsigned long crc32_big OF((unsigned long,
-                        const unsigned char FAR *, unsigned));
-#  define TBLS 8
-#else
-#  define TBLS 1
-#endif /* BYFOUR */
+#  endif
+#endif
+#ifdef W
+#  if W == 8 && defined(Z_U8)
+     typedef Z_U8 z_word_t;
+#  elif defined(Z_U4)
+#    undef W
+#    define W 4
+     typedef Z_U4 z_word_t;
+#  else
+#    undef W
+#  endif
+#endif
 
-/* Local functions for crc concatenation */
-local unsigned long gf2_matrix_times OF((unsigned long *mat,
-                                         unsigned long vec));
-local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
+/* If available, use the ARM processor CRC32 instruction. */
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
+#  define ARMCRC32
+#endif
+
+#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
+/*
+  Swap the bytes in a z_word_t to convert between little and big endian. Any
+  self-respecting compiler will optimize this to a single machine byte-swap
+  instruction, if one is available. This assumes that word_t is either 32 bits
+  or 64 bits.
+ */
+local z_word_t byte_swap(z_word_t word) {
+#  if W == 8
+    return
+        (word & 0xff00000000000000) >> 56 |
+        (word & 0xff000000000000) >> 40 |
+        (word & 0xff0000000000) >> 24 |
+        (word & 0xff00000000) >> 8 |
+        (word & 0xff000000) << 8 |
+        (word & 0xff0000) << 24 |
+        (word & 0xff00) << 40 |
+        (word & 0xff) << 56;
+#  else   /* W == 4 */
+    return
+        (word & 0xff000000) >> 24 |
+        (word & 0xff0000) >> 8 |
+        (word & 0xff00) << 8 |
+        (word & 0xff) << 24;
+#  endif
+}
+#endif
 
 #ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
+ * below.
+ */
+   local z_crc_t FAR x2n_table[32];
+#else
+/* =========================================================================
+ * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
+ * of x for combining CRC-32s, all made by make_crc_table().
+ */
+#  include "crc32.h"
+#endif
 
-local volatile int crc_table_empty = 1;
-local unsigned long FAR crc_table[TBLS][256];
-local void make_crc_table OF((void));
+/* CRC polynomial. */
+#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
+
+/*
+  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
+  reflected. For speed, this requires that a not be zero.
+ */
+local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
+    z_crc_t m, p;
+
+    m = (z_crc_t)1 << 31;
+    p = 0;
+    for (;;) {
+        if (a & m) {
+            p ^= b;
+            if ((a & (m - 1)) == 0)
+                break;
+        }
+        m >>= 1;
+        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
+    }
+    return p;
+}
+
+/*
+  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
+  initialized.
+ */
+local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
+    z_crc_t p;
+
+    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
+    while (n) {
+        if (n & 1)
+            p = multmodp(x2n_table[k & 31], p);
+        n >>= 1;
+        k++;
+    }
+    return p;
+}
+
+#ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Build the tables for byte-wise and braided CRC-32 calculations, and a table
+ * of powers of x for combining CRC-32s.
+ */
+local z_crc_t FAR crc_table[256];
+#ifdef W
+   local z_word_t FAR crc_big_table[256];
+   local z_crc_t FAR crc_braid_table[W][256];
+   local z_word_t FAR crc_braid_big_table[W][256];
+   local void braid(z_crc_t [][256], z_word_t [][256], int, int);
+#endif
 #ifdef MAKECRCH
-   local void write_table OF((FILE *, const unsigned long FAR *));
+   local void write_table(FILE *, const z_crc_t FAR *, int);
+   local void write_table32hi(FILE *, const z_word_t FAR *, int);
+   local void write_table64(FILE *, const z_word_t FAR *, int);
 #endif /* MAKECRCH */
+
+/*
+  Define a once() function depending on the availability of atomics. If this is
+  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
+  multiple threads, and if atomics are not available, then get_crc_table() must
+  be called to initialize the tables and must return before any threads are
+  allowed to compute or combine CRCs.
+ */
+
+/* Definition of once functionality. */
+typedef struct once_s once_t;
+
+/* Check for the availability of atomics. */
+#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
+    !defined(__STDC_NO_ATOMICS__)
+
+#include <stdatomic.h>
+
+/* Structure for once(), which must be initialized with ONCE_INIT. */
+struct once_s {
+    atomic_flag begun;
+    atomic_int done;
+};
+#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
+
+/*
+  Run the provided init() function exactly once, even if multiple threads
+  invoke once() at the same time. The state must be a once_t initialized with
+  ONCE_INIT.
+ */
+local void once(once_t *state, void (*init)(void)) {
+    if (!atomic_load(&state->done)) {
+        if (atomic_flag_test_and_set(&state->begun))
+            while (!atomic_load(&state->done))
+                ;
+        else {
+            init();
+            atomic_store(&state->done, 1);
+        }
+    }
+}
+
+#else   /* no atomics */
+
+/* Structure for once(), which must be initialized with ONCE_INIT. */
+struct once_s {
+    volatile int begun;
+    volatile int done;
+};
+#define ONCE_INIT {0, 0}
+
+/* Test and set. Alas, not atomic, but tries to minimize the period of
+   vulnerability. */
+local int test_and_set(int volatile *flag) {
+    int was;
+
+    was = *flag;
+    *flag = 1;
+    return was;
+}
+
+/* Run the provided init() function once. This is not thread-safe. */
+local void once(once_t *state, void (*init)(void)) {
+    if (!state->done) {
+        if (test_and_set(&state->begun))
+            while (!state->done)
+                ;
+        else {
+            init();
+            state->done = 1;
+        }
+    }
+}
+
+#endif
+
+/* State for once(). */
+local once_t made = ONCE_INIT;
+
 /*
   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
 
   Polynomials over GF(2) are represented in binary, one bit per coefficient,
-  with the lowest powers in the most significant bit.  Then adding polynomials
+  with the lowest powers in the most significant bit. Then adding polynomials
   is just exclusive-or, and multiplying a polynomial by x is a right shift by
-  one.  If we call the above polynomial p, and represent a byte as the
+  one. If we call the above polynomial p, and represent a byte as the
   polynomial q, also with the lowest power in the most significant bit (so the
-  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
   where a mod b means the remainder after dividing a by b.
 
   This calculation is done using the shift-register method of multiplying and
-  taking the remainder.  The register is initialized to zero, and for each
+  taking the remainder. The register is initialized to zero, and for each
   incoming bit, x^32 is added mod p to the register if the bit is a one (where
-  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
-  x (which is shifting right by one and adding x^32 mod p if the bit shifted
-  out is a one).  We start with the highest power (least significant bit) of
-  q and repeat for all eight bits of q.
-
-  The first table is simply the CRC of all possible eight bit values.  This is
-  all the information needed to generate CRCs on data a byte at a time for all
-  combinations of CRC register values and incoming bytes.  The remaining tables
-  allow for word-at-a-time CRC calculation for both big-endian and little-
-  endian machines, where a word is four bytes.
-*/
-local void make_crc_table()
-{
-    unsigned long c;
-    int n, k;
-    unsigned long poly;                 /* polynomial exclusive-or pattern */
-    /* terms of polynomial defining this crc (except x^32): */
-    static volatile int first = 1;      /* flag to limit concurrent making */
-    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
-
-    /* See if another task is already doing this (not thread-safe, but better
-       than nothing -- significantly reduces duration of vulnerability in
-       case the advice about DYNAMIC_CRC_TABLE is ignored) */
-    if (first) {
-        first = 0;
-
-        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
-        poly = 0UL;
-        for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
-            poly |= 1UL << (31 - p[n]);
-
-        /* generate a crc for every 8-bit value */
-        for (n = 0; n < 256; n++) {
-            c = (unsigned long)n;
-            for (k = 0; k < 8; k++)
-                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
-            crc_table[0][n] = c;
-        }
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
+  (which is shifting right by one and adding x^32 mod p if the bit shifted out
+  is a one). We start with the highest power (least significant bit) of q and
+  repeat for all eight bits of q.
 
-#ifdef BYFOUR
-        /* generate crc for each value followed by one, two, and three zeros,
-           and then the byte reversal of those as well as the first table */
-        for (n = 0; n < 256; n++) {
-            c = crc_table[0][n];
-            crc_table[4][n] = REV(c);
-            for (k = 1; k < 4; k++) {
-                c = crc_table[0][c & 0xff] ^ (c >> 8);
-                crc_table[k][n] = c;
-                crc_table[k + 4][n] = REV(c);
-            }
-        }
-#endif /* BYFOUR */
+  The table is simply the CRC of all possible eight bit values. This is all the
+  information needed to generate CRCs on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.
+ */
 
-        crc_table_empty = 0;
-    }
-    else {      /* not first */
-        /* wait for the other guy to finish (not efficient, but rare) */
-        while (crc_table_empty)
-            ;
+local void make_crc_table(void) {
+    unsigned i, j, n;
+    z_crc_t p;
+
+    /* initialize the CRC of bytes tables */
+    for (i = 0; i < 256; i++) {
+        p = i;
+        for (j = 0; j < 8; j++)
+            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
+        crc_table[i] = p;
+#ifdef W
+        crc_big_table[i] = byte_swap(p);
+#endif
     }
 
+    /* initialize the x^2^n mod p(x) table */
+    p = (z_crc_t)1 << 30;         /* x^1 */
+    x2n_table[0] = p;
+    for (n = 1; n < 32; n++)
+        x2n_table[n] = p = multmodp(p, p);
+
+#ifdef W
+    /* initialize the braiding tables -- needs x2n_table[] */
+    braid(crc_braid_table, crc_braid_big_table, N, W);
+#endif
+
 #ifdef MAKECRCH
-    /* write out CRC tables to crc32.h */
     {
+        /*
+          The crc32.h header file contains tables for both 32-bit and 64-bit
+          z_word_t's, and so requires a 64-bit type be available. In that case,
+          z_word_t must be defined to be 64-bits. This code then also generates
+          and writes out the tables for the case that z_word_t is 32 bits.
+         */
+#if !defined(W) || W != 8
+#  error Need a 64-bit integer type in order to generate crc32.h.
+#endif
         FILE *out;
+        int k, n;
+        z_crc_t ltl[8][256];
+        z_word_t big[8][256];
 
         out = fopen("crc32.h", "w");
         if (out == NULL) return;
-        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
-        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
-        fprintf(out, "local const unsigned long FAR ");
-        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
-        write_table(out, crc_table[0]);
-#  ifdef BYFOUR
-        fprintf(out, "#ifdef BYFOUR\n");
-        for (k = 1; k < 8; k++) {
-            fprintf(out, "  },\n  {\n");
-            write_table(out, crc_table[k]);
+
+        /* write out little-endian CRC table to crc32.h */
+        fprintf(out,
+            "/* crc32.h -- tables for rapid CRC calculation\n"
+            " * Generated automatically by crc32.c\n */\n"
+            "\n"
+            "local const z_crc_t FAR crc_table[] = {\n"
+            "    ");
+        write_table(out, crc_table, 256);
+        fprintf(out,
+            "};\n");
+
+        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
+        fprintf(out,
+            "\n"
+            "#ifdef W\n"
+            "\n"
+            "#if W == 8\n"
+            "\n"
+            "local const z_word_t FAR crc_big_table[] = {\n"
+            "    ");
+        write_table64(out, crc_big_table, 256);
+        fprintf(out,
+            "};\n");
+
+        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
+        fprintf(out,
+            "\n"
+            "#else /* W == 4 */\n"
+            "\n"
+            "local const z_word_t FAR crc_big_table[] = {\n"
+            "    ");
+        write_table32hi(out, crc_big_table, 256);
+        fprintf(out,
+            "};\n"
+            "\n"
+            "#endif\n");
+
+        /* write out braid tables for each value of N */
+        for (n = 1; n <= 6; n++) {
+            fprintf(out,
+            "\n"
+            "#if N == %d\n", n);
+
+            /* compute braid tables for this N and 64-bit word_t */
+            braid(ltl, big, n, 8);
+
+            /* write out braid tables for 64-bit z_word_t to crc32.h */
+            fprintf(out,
+            "\n"
+            "#if W == 8\n"
+            "\n"
+            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
+            for (k = 0; k < 8; k++) {
+                fprintf(out, "   {");
+                write_table(out, ltl[k], 256);
+                fprintf(out, "}%s", k < 7 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
+            for (k = 0; k < 8; k++) {
+                fprintf(out, "   {");
+                write_table64(out, big[k], 256);
+                fprintf(out, "}%s", k < 7 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n");
+
+            /* compute braid tables for this N and 32-bit word_t */
+            braid(ltl, big, n, 4);
+
+            /* write out braid tables for 32-bit z_word_t to crc32.h */
+            fprintf(out,
+            "\n"
+            "#else /* W == 4 */\n"
+            "\n"
+            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
+            for (k = 0; k < 4; k++) {
+                fprintf(out, "   {");
+                write_table(out, ltl[k], 256);
+                fprintf(out, "}%s", k < 3 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
+            for (k = 0; k < 4; k++) {
+                fprintf(out, "   {");
+                write_table32hi(out, big[k], 256);
+                fprintf(out, "}%s", k < 3 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "#endif\n"
+            "\n"
+            "#endif\n");
         }
-        fprintf(out, "#endif\n");
-#  endif /* BYFOUR */
-        fprintf(out, "  }\n};\n");
+        fprintf(out,
+            "\n"
+            "#endif\n");
+
+        /* write out zeros operator table to crc32.h */
+        fprintf(out,
+            "\n"
+            "local const z_crc_t FAR x2n_table[] = {\n"
+            "    ");
+        write_table(out, x2n_table, 32);
+        fprintf(out,
+            "};\n");
         fclose(out);
     }
 #endif /* MAKECRCH */
 }
 
 #ifdef MAKECRCH
-local void write_table(out, table)
-    FILE *out;
-    const unsigned long FAR *table;
-{
+
+/*
+   Write the 32-bit values in table[0..k-1] to out, five per line in
+   hexadecimal separated by commas.
+ */
+local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
+    int n;
+
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
+                (unsigned long)(table[n]),
+                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
+}
+
+/*
+   Write the high 32-bits of each value in table[0..k-1] to out, five per line
+   in hexadecimal separated by commas.
+ */
+local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
     int n;
 
-    for (n = 0; n < 256; n++)
-        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ", table[n],
-                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
+                (unsigned long)(table[n] >> 32),
+                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
 }
+
+/*
+  Write the 64-bit values in table[0..k-1] to out, three per line in
+  hexadecimal separated by commas. This assumes that if there is a 64-bit
+  type, then there is also a long long integer type, and it is at least 64
+  bits. If not, then the type cast and format string can be adjusted
+  accordingly.
+ */
+local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
+    int n;
+
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
+                (unsigned long long)(table[n]),
+                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
+}
+
+/* Actually do the deed. */
+int main(void) {
+    make_crc_table();
+    return 0;
+}
+
 #endif /* MAKECRCH */
 
-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+#ifdef W
+/*
+  Generate the little and big-endian braid tables for the given n and z_word_t
+  size w. Each array must have room for w blocks of 256 elements.
  */
-#include "crc32.h"
+local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
+    int k;
+    z_crc_t i, p, q;
+    for (k = 0; k < w; k++) {
+        p = x2nmodp((n * w + 3 - k) << 3, 0);
+        ltl[k][0] = 0;
+        big[w - 1 - k][0] = 0;
+        for (i = 1; i < 256; i++) {
+            ltl[k][i] = q = multmodp(i << 24, p);
+            big[w - 1 - k][i] = byte_swap(q);
+        }
+    }
+}
+#endif
+
 #endif /* DYNAMIC_CRC_TABLE */
 
 /* =========================================================================
- * This function can be used by asm versions of crc32()
+ * This function can be used by asm versions of crc32(), and to force the
+ * generation of the CRC tables in a threaded application.
  */
-const unsigned long FAR * ZEXPORT get_crc_table()
-{
+const z_crc_t FAR * ZEXPORT get_crc_table(void) {
 #ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
+    once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
-    return (const unsigned long FAR *)crc_table;
+    return (const z_crc_t FAR *)crc_table;
 }
 
-/* ========================================================================= */
-#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
-#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+/* =========================================================================
+ * Use ARM machine instructions if available. This will compute the CRC about
+ * ten times faster than the braided calculation. This code does not check for
+ * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
+ * only be defined if the compilation specifies an ARM processor architecture
+ * that has the instructions. For example, compiling with -march=armv8.1-a or
+ * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
+ * instructions.
+ */
+#ifdef ARMCRC32
 
-/* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    unsigned len;
-{
-    if (buf == Z_NULL) return 0UL;
+/*
+   Constants empirically determined to maximize speed. These values are from
+   measurements on a Cortex-A57. Your mileage may vary.
+ */
+#define Z_BATCH 3990                /* number of words in a batch */
+#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
+#define Z_BATCH_MIN 800             /* fewest words in a final batch */
+
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
+    z_crc_t val;
+    z_word_t crc1, crc2;
+    const z_word_t *word;
+    z_word_t val0, val1, val2;
+    z_size_t last, last2, i;
+    z_size_t num;
+
+    /* Return initial CRC, if requested. */
+    if (buf == Z_NULL) return 0;
 
 #ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
+    once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
 
-#ifdef BYFOUR
-    if (sizeof(void *) == sizeof(ptrdiff_t)) {
-        u4 endian;
+    /* Pre-condition the CRC */
+    crc = (~crc) & 0xffffffff;
 
-        endian = 1;
-        if (*((unsigned char *)(&endian)))
-            return crc32_little(crc, buf, len);
-        else
-            return crc32_big(crc, buf, len);
-    }
-#endif /* BYFOUR */
-    crc = crc ^ 0xffffffffUL;
-    while (len >= 8) {
-        DO8;
-        len -= 8;
+    /* Compute the CRC up to a word boundary. */
+    while (len && ((z_size_t)buf & 7) != 0) {
+        len--;
+        val = *buf++;
+        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
     }
-    if (len) do {
-        DO1;
-    } while (--len);
-    return crc ^ 0xffffffffUL;
-}
 
-#ifdef BYFOUR
+    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
+    word = (z_word_t const *)buf;
+    num = len >> 3;
+    len &= 7;
 
-/* ========================================================================= */
-#define DOLIT4 c ^= *buf4++; \
-        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
-            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
-#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+    /* Do three interleaved CRCs to realize the throughput of one crc32x
+       instruction per cycle. Each CRC is calculated on Z_BATCH words. The
+       three CRCs are combined into a single CRC after each set of batches. */
+    while (num >= 3 * Z_BATCH) {
+        crc1 = 0;
+        crc2 = 0;
+        for (i = 0; i < Z_BATCH; i++) {
+            val0 = word[i];
+            val1 = word[i + Z_BATCH];
+            val2 = word[i + 2 * Z_BATCH];
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
+        }
+        word += 3 * Z_BATCH;
+        num -= 3 * Z_BATCH;
+        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
+        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
+    }
 
-/* ========================================================================= */
-local unsigned long crc32_little(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    unsigned len;
-{
-    register u4 c;
-    register const u4 FAR *buf4;
-
-    c = (u4)crc;
-    c = ~c;
-    while (len && ((ptrdiff_t)buf & 3)) {
-        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
-        len--;
+    /* Do one last smaller batch with the remaining words, if there are enough
+       to pay for the combination of CRCs. */
+    last = num / 3;
+    if (last >= Z_BATCH_MIN) {
+        last2 = last << 1;
+        crc1 = 0;
+        crc2 = 0;
+        for (i = 0; i < last; i++) {
+            val0 = word[i];
+            val1 = word[i + last];
+            val2 = word[i + last2];
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
+        }
+        word += 3 * last;
+        num -= 3 * last;
+        val = x2nmodp(last, 6);
+        crc = multmodp(val, crc) ^ crc1;
+        crc = multmodp(val, crc) ^ crc2;
     }
 
-    buf4 = (const u4 FAR *)(const void FAR *)buf;
-    while (len >= 32) {
-        DOLIT32;
-        len -= 32;
+    /* Compute the CRC on any remaining words. */
+    for (i = 0; i < num; i++) {
+        val0 = word[i];
+        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
     }
-    while (len >= 4) {
-        DOLIT4;
-        len -= 4;
+    word += num;
+
+    /* Complete the CRC on any remaining bytes. */
+    buf = (const unsigned char FAR *)word;
+    while (len) {
+        len--;
+        val = *buf++;
+        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
     }
-    buf = (const unsigned char FAR *)buf4;
 
-    if (len) do {
-        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
-    } while (--len);
-    c = ~c;
-    return (unsigned long)c;
+    /* Return the CRC, post-conditioned. */
+    return crc ^ 0xffffffff;
 }
 
-/* ========================================================================= */
-#define DOBIG4 c ^= *++buf4; \
-        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
-            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
-#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+#else
+
+#ifdef W
+
+/*
+  Return the CRC of the W bytes in the word_t data, taking the
+  least-significant byte of the word as the first byte of data, without any pre
+  or post conditioning. This is used to combine the CRCs of each braid.
+ */
+local z_crc_t crc_word(z_word_t data) {
+    int k;
+    for (k = 0; k < W; k++)
+        data = (data >> 8) ^ crc_table[data & 0xff];
+    return (z_crc_t)data;
+}
+
+local z_word_t crc_word_big(z_word_t data) {
+    int k;
+    for (k = 0; k < W; k++)
+        data = (data << 8) ^
+            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
+    return data;
+}
+
+#endif
 
 /* ========================================================================= */
-local unsigned long crc32_big(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    unsigned len;
-{
-    register u4 c;
-    register const u4 FAR *buf4;
-
-    c = REV((u4)crc);
-    c = ~c;
-    while (len && ((ptrdiff_t)buf & 3)) {
-        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
-        len--;
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
+    /* Return initial CRC, if requested. */
+    if (buf == Z_NULL) return 0;
+
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+
+    /* Pre-condition the CRC */
+    crc = (~crc) & 0xffffffff;
+
+#ifdef W
+
+    /* If provided enough bytes, do a braided CRC calculation. */
+    if (len >= N * W + W - 1) {
+        z_size_t blks;
+        z_word_t const *words;
+        unsigned endian;
+        int k;
+
+        /* Compute the CRC up to a z_word_t boundary. */
+        while (len && ((z_size_t)buf & (W - 1)) != 0) {
+            len--;
+            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        }
+
+        /* Compute the CRC on as many N z_word_t blocks as are available. */
+        blks = len / (N * W);
+        len -= blks * N * W;
+        words = (z_word_t const *)buf;
+
+        /* Do endian check at execution time instead of compile time, since ARM
+           processors can change the endianness at execution time. If the
+           compiler knows what the endianness will be, it can optimize out the
+           check and the unused branch. */
+        endian = 1;
+        if (*(unsigned char *)&endian) {
+            /* Little endian. */
+
+            z_crc_t crc0;
+            z_word_t word0;
+#if N > 1
+            z_crc_t crc1;
+            z_word_t word1;
+#if N > 2
+            z_crc_t crc2;
+            z_word_t word2;
+#if N > 3
+            z_crc_t crc3;
+            z_word_t word3;
+#if N > 4
+            z_crc_t crc4;
+            z_word_t word4;
+#if N > 5
+            z_crc_t crc5;
+            z_word_t word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /* Initialize the CRC for each braid. */
+            crc0 = crc;
+#if N > 1
+            crc1 = 0;
+#if N > 2
+            crc2 = 0;
+#if N > 3
+            crc3 = 0;
+#if N > 4
+            crc4 = 0;
+#if N > 5
+            crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /*
+              Process the first blks-1 blocks, computing the CRCs on each braid
+              independently.
+             */
+            while (--blks) {
+                /* Load the word for each braid into registers. */
+                word0 = crc0 ^ words[0];
+#if N > 1
+                word1 = crc1 ^ words[1];
+#if N > 2
+                word2 = crc2 ^ words[2];
+#if N > 3
+                word3 = crc3 ^ words[3];
+#if N > 4
+                word4 = crc4 ^ words[4];
+#if N > 5
+                word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+                words += N;
+
+                /* Compute and update the CRC for each word. The loop should
+                   get unrolled. */
+                crc0 = crc_braid_table[0][word0 & 0xff];
+#if N > 1
+                crc1 = crc_braid_table[0][word1 & 0xff];
+#if N > 2
+                crc2 = crc_braid_table[0][word2 & 0xff];
+#if N > 3
+                crc3 = crc_braid_table[0][word3 & 0xff];
+#if N > 4
+                crc4 = crc_braid_table[0][word4 & 0xff];
+#if N > 5
+                crc5 = crc_braid_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                for (k = 1; k < W; k++) {
+                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                }
+            }
+
+            /*
+              Process the last block, combining the CRCs of the N braids at the
+              same time.
+             */
+            crc = crc_word(crc0 ^ words[0]);
+#if N > 1
+            crc = crc_word(crc1 ^ words[1] ^ crc);
+#if N > 2
+            crc = crc_word(crc2 ^ words[2] ^ crc);
+#if N > 3
+            crc = crc_word(crc3 ^ words[3] ^ crc);
+#if N > 4
+            crc = crc_word(crc4 ^ words[4] ^ crc);
+#if N > 5
+            crc = crc_word(crc5 ^ words[5] ^ crc);
+#endif
+#endif
+#endif
+#endif
+#endif
+            words += N;
+        }
+        else {
+            /* Big endian. */
+
+            z_word_t crc0, word0, comb;
+#if N > 1
+            z_word_t crc1, word1;
+#if N > 2
+            z_word_t crc2, word2;
+#if N > 3
+            z_word_t crc3, word3;
+#if N > 4
+            z_word_t crc4, word4;
+#if N > 5
+            z_word_t crc5, word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /* Initialize the CRC for each braid. */
+            crc0 = byte_swap(crc);
+#if N > 1
+            crc1 = 0;
+#if N > 2
+            crc2 = 0;
+#if N > 3
+            crc3 = 0;
+#if N > 4
+            crc4 = 0;
+#if N > 5
+            crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /*
+              Process the first blks-1 blocks, computing the CRCs on each braid
+              independently.
+             */
+            while (--blks) {
+                /* Load the word for each braid into registers. */
+                word0 = crc0 ^ words[0];
+#if N > 1
+                word1 = crc1 ^ words[1];
+#if N > 2
+                word2 = crc2 ^ words[2];
+#if N > 3
+                word3 = crc3 ^ words[3];
+#if N > 4
+                word4 = crc4 ^ words[4];
+#if N > 5
+                word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+                words += N;
+
+                /* Compute and update the CRC for each word. The loop should
+                   get unrolled. */
+                crc0 = crc_braid_big_table[0][word0 & 0xff];
+#if N > 1
+                crc1 = crc_braid_big_table[0][word1 & 0xff];
+#if N > 2
+                crc2 = crc_braid_big_table[0][word2 & 0xff];
+#if N > 3
+                crc3 = crc_braid_big_table[0][word3 & 0xff];
+#if N > 4
+                crc4 = crc_braid_big_table[0][word4 & 0xff];
+#if N > 5
+                crc5 = crc_braid_big_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                for (k = 1; k < W; k++) {
+                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                }
+            }
+
+            /*
+              Process the last block, combining the CRCs of the N braids at the
+              same time.
+             */
+            comb = crc_word_big(crc0 ^ words[0]);
+#if N > 1
+            comb = crc_word_big(crc1 ^ words[1] ^ comb);
+#if N > 2
+            comb = crc_word_big(crc2 ^ words[2] ^ comb);
+#if N > 3
+            comb = crc_word_big(crc3 ^ words[3] ^ comb);
+#if N > 4
+            comb = crc_word_big(crc4 ^ words[4] ^ comb);
+#if N > 5
+            comb = crc_word_big(crc5 ^ words[5] ^ comb);
+#endif
+#endif
+#endif
+#endif
+#endif
+            words += N;
+            crc = byte_swap(comb);
+        }
+
+        /*
+          Update the pointer to the remaining bytes to process.
+         */
+        buf = (unsigned char const *)words;
     }
 
-    buf4 = (const u4 FAR *)(const void FAR *)buf;
-    buf4--;
-    while (len >= 32) {
-        DOBIG32;
-        len -= 32;
+#endif /* W */
+
+    /* Complete the computation of the CRC on any remaining bytes. */
+    while (len >= 8) {
+        len -= 8;
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
     }
-    while (len >= 4) {
-        DOBIG4;
-        len -= 4;
+    while (len) {
+        len--;
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
     }
-    buf4++;
-    buf = (const unsigned char FAR *)buf4;
-
-    if (len) do {
-        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
-    } while (--len);
-    c = ~c;
-    return (unsigned long)(REV(c));
+
+    /* Return the CRC, post-conditioned. */
+    return crc ^ 0xffffffff;
 }
 
-#endif /* BYFOUR */
+#endif
 
-#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
+                            uInt len) {
+    return crc32_z(crc, buf, len);
+}
 
 /* ========================================================================= */
-local unsigned long gf2_matrix_times(mat, vec)
-    unsigned long *mat;
-    unsigned long vec;
-{
-    unsigned long sum;
-
-    sum = 0;
-    while (vec) {
-        if (vec & 1)
-            sum ^= *mat;
-        vec >>= 1;
-        mat++;
-    }
-    return sum;
+uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
 }
 
 /* ========================================================================= */
-local void gf2_matrix_square(square, mat)
-    unsigned long *square;
-    unsigned long *mat;
-{
-    int n;
+uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
+    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
+}
 
-    for (n = 0; n < GF2_DIM; n++)
-        square[n] = gf2_matrix_times(mat, mat[n]);
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    return x2nmodp(len2, 3);
 }
 
 /* ========================================================================= */
-uLong ZEXPORT crc32_combine(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off_t len2;
-{
-    int n;
-    unsigned long row;
-    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
-    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */
-
-    /* degenerate case */
-    if (len2 == 0)
-        return crc1;
-
-    /* put operator for one zero bit in odd */
-    odd[0] = 0xedb88320L;           /* CRC-32 polynomial */
-    row = 1;
-    for (n = 1; n < GF2_DIM; n++) {
-        odd[n] = row;
-        row <<= 1;
-    }
+uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
+    return crc32_combine_gen64((z_off64_t)len2);
+}
 
-    /* put operator for two zero bits in even */
-    gf2_matrix_square(even, odd);
-
-    /* put operator for four zero bits in odd */
-    gf2_matrix_square(odd, even);
-
-    /* apply len2 zeros to crc1 (first square will put the operator for one
-       zero byte, eight zero bits, in even) */
-    do {
-        /* apply zeros operator for this bit of len2 */
-        gf2_matrix_square(even, odd);
-        if (len2 & 1)
-            crc1 = gf2_matrix_times(even, crc1);
-        len2 >>= 1;
-
-        /* if no more bits set, then done */
-        if (len2 == 0)
-            break;
-
-        /* another iteration of the loop with odd and even swapped */
-        gf2_matrix_square(odd, even);
-        if (len2 & 1)
-            crc1 = gf2_matrix_times(odd, crc1);
-        len2 >>= 1;
-
-        /* if no more bits set, then done */
-    } while (len2 != 0);
-
-    /* return combined crc */
-    crc1 ^= crc2;
-    return crc1;
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
+    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
 }
diff --git a/reg-io/zlib/crc32.h b/reg-io/zlib/crc32.h
index 6080fa25..137df68d 100644
--- a/reg-io/zlib/crc32.h
+++ b/reg-io/zlib/crc32.h
@@ -2,440 +2,9445 @@
  * Generated automatically by crc32.c
  */
 
-local const unsigned long FAR crc_table[TBLS][256] =
-{
-   {
-      0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
-      0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
-      0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
-      0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
-      0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
-      0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
-      0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
-      0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
-      0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
-      0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
-      0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
-      0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
-      0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
-      0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
-      0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
-      0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
-      0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
-      0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
-      0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
-      0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
-      0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
-      0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
-      0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
-      0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
-      0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
-      0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
-      0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
-      0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
-      0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
-      0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
-      0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
-      0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
-      0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
-      0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
-      0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
-      0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
-      0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
-      0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
-      0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
-      0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
-      0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
-      0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
-      0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
-      0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
-      0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
-      0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
-      0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
-      0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
-      0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
-      0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
-      0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
-      0x2d02ef8dUL
-#ifdef BYFOUR
-   },
-   {
-      0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
-      0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
-      0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
-      0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
-      0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
-      0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
-      0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
-      0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
-      0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
-      0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
-      0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
-      0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
-      0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
-      0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
-      0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
-      0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
-      0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
-      0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
-      0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
-      0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
-      0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
-      0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
-      0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
-      0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
-      0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
-      0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
-      0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
-      0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
-      0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
-      0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
-      0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
-      0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
-      0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
-      0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
-      0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
-      0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
-      0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
-      0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
-      0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
-      0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
-      0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
-      0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
-      0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
-      0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
-      0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
-      0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
-      0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
-      0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
-      0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
-      0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
-      0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
-      0x9324fd72UL
-   },
-   {
-      0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
-      0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
-      0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
-      0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
-      0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
-      0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
-      0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
-      0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
-      0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
-      0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
-      0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
-      0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
-      0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
-      0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
-      0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
-      0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
-      0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
-      0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
-      0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
-      0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
-      0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
-      0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
-      0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
-      0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
-      0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
-      0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
-      0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
-      0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
-      0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
-      0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
-      0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
-      0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
-      0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
-      0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
-      0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
-      0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
-      0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
-      0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
-      0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
-      0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
-      0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
-      0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
-      0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
-      0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
-      0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
-      0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
-      0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
-      0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
-      0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
-      0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
-      0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
-      0xbe9834edUL
-   },
-   {
-      0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
-      0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
-      0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
-      0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
-      0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
-      0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
-      0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
-      0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
-      0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
-      0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
-      0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
-      0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
-      0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
-      0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
-      0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
-      0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
-      0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
-      0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
-      0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
-      0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
-      0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
-      0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
-      0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
-      0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
-      0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
-      0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
-      0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
-      0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
-      0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
-      0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
-      0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
-      0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
-      0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
-      0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
-      0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
-      0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
-      0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
-      0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
-      0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
-      0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
-      0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
-      0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
-      0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
-      0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
-      0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
-      0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
-      0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
-      0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
-      0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
-      0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
-      0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
-      0xde0506f1UL
-   },
-   {
-      0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
-      0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
-      0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
-      0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
-      0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
-      0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
-      0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
-      0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
-      0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
-      0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
-      0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
-      0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
-      0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
-      0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
-      0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
-      0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
-      0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
-      0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
-      0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
-      0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
-      0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
-      0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
-      0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
-      0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
-      0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
-      0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
-      0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
-      0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
-      0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
-      0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
-      0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
-      0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
-      0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
-      0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
-      0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
-      0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
-      0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
-      0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
-      0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
-      0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
-      0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
-      0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
-      0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
-      0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
-      0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
-      0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
-      0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
-      0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
-      0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
-      0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
-      0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
-      0x8def022dUL
-   },
-   {
-      0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
-      0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
-      0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
-      0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
-      0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
-      0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
-      0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
-      0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
-      0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
-      0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
-      0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
-      0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
-      0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
-      0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
-      0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
-      0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
-      0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
-      0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
-      0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
-      0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
-      0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
-      0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
-      0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
-      0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
-      0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
-      0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
-      0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
-      0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
-      0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
-      0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
-      0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
-      0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
-      0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
-      0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
-      0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
-      0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
-      0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
-      0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
-      0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
-      0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
-      0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
-      0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
-      0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
-      0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
-      0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
-      0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
-      0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
-      0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
-      0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
-      0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
-      0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
-      0x72fd2493UL
-   },
-   {
-      0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
-      0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
-      0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
-      0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
-      0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
-      0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
-      0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
-      0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
-      0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
-      0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
-      0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
-      0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
-      0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
-      0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
-      0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
-      0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
-      0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
-      0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
-      0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
-      0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
-      0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
-      0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
-      0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
-      0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
-      0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
-      0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
-      0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
-      0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
-      0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
-      0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
-      0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
-      0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
-      0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
-      0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
-      0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
-      0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
-      0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
-      0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
-      0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
-      0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
-      0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
-      0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
-      0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
-      0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
-      0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
-      0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
-      0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
-      0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
-      0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
-      0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
-      0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
-      0xed3498beUL
-   },
-   {
-      0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
-      0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
-      0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
-      0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
-      0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
-      0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
-      0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
-      0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
-      0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
-      0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
-      0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
-      0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
-      0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
-      0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
-      0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
-      0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
-      0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
-      0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
-      0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
-      0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
-      0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
-      0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
-      0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
-      0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
-      0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
-      0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
-      0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
-      0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
-      0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
-      0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
-      0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
-      0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
-      0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
-      0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
-      0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
-      0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
-      0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
-      0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
-      0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
-      0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
-      0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
-      0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
-      0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
-      0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
-      0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
-      0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
-      0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
-      0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
-      0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
-      0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
-      0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
-      0xf10605deUL
+local const z_crc_t FAR crc_table[] = {
+    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+    0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+    0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+    0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+    0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+    0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+    0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+    0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+    0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+    0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+    0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+    0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+    0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+    0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+    0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+    0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+    0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+    0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+    0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+    0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+    0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+    0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+    0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+    0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+    0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+    0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+    0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+    0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+    0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+    0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+    0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+    0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+    0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+    0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+    0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+    0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+    0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+    0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+    0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+    0x2d02ef8d};
+
+#ifdef W
+
+#if W == 8
+
+local const z_word_t FAR crc_big_table[] = {
+    0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000,
+    0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000,
+    0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000,
+    0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000,
+    0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000,
+    0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000,
+    0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000,
+    0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000,
+    0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000,
+    0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000,
+    0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000,
+    0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000,
+    0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000,
+    0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000,
+    0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000,
+    0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000,
+    0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000,
+    0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000,
+    0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000,
+    0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000,
+    0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000,
+    0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000,
+    0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000,
+    0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000,
+    0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000,
+    0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000,
+    0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000,
+    0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000,
+    0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000,
+    0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000,
+    0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000,
+    0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000,
+    0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000,
+    0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000,
+    0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000,
+    0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000,
+    0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000,
+    0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000,
+    0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000,
+    0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000,
+    0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000,
+    0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000,
+    0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000,
+    0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000,
+    0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000,
+    0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000,
+    0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000,
+    0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000,
+    0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000,
+    0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000,
+    0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000,
+    0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000,
+    0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000,
+    0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000,
+    0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000,
+    0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000,
+    0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000,
+    0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000,
+    0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000,
+    0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000,
+    0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000,
+    0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000,
+    0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000,
+    0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000,
+    0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000,
+    0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000,
+    0x8567077200000000, 0x1357000500000000, 0x824abf9500000000,
+    0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000,
+    0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000,
+    0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000,
+    0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000,
+    0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000,
+    0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000,
+    0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000,
+    0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000,
+    0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000,
+    0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000,
+    0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000,
+    0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000,
+    0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000,
+    0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000,
+    0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000,
+    0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000,
+    0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000,
+    0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000,
+    0x8def022d00000000};
+
+#else /* W == 4 */
+
+local const z_word_t FAR crc_big_table[] = {
+    0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07,
+    0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79,
+    0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7,
+    0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84,
+    0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13,
+    0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663,
+    0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5,
+    0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5,
+    0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832,
+    0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51,
+    0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf,
+    0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1,
+    0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76,
+    0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606,
+    0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996,
+    0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6,
+    0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c,
+    0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712,
+    0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c,
+    0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4,
+    0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943,
+    0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333,
+    0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe,
+    0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce,
+    0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359,
+    0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a,
+    0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04,
+    0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a,
+    0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0,
+    0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580,
+    0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10,
+    0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060,
+    0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1,
+    0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf,
+    0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31,
+    0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852,
+    0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5,
+    0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5,
+    0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75,
+    0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005,
+    0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292,
+    0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1,
+    0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f,
+    0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111,
+    0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0,
+    0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0,
+    0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40,
+    0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530,
+    0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba,
+    0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4,
+    0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a,
+    0x8def022d};
+
+#endif
+
+#if N == 1
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa,
+    0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b,
+    0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232,
+    0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8,
+    0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e,
+    0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa,
+    0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b,
+    0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f,
+    0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719,
+    0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3,
+    0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa,
+    0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b,
+    0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed,
+    0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89,
+    0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25,
+    0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041,
+    0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c,
+    0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed,
+    0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4,
+    0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758,
+    0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e,
+    0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a,
+    0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed,
+    0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889,
+    0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df,
+    0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544,
+    0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d,
+    0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c,
+    0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1,
+    0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95,
+    0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839,
+    0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d,
+    0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976,
+    0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7,
+    0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be,
+    0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144,
+    0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12,
+    0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376,
+    0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a,
+    0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e,
+    0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278,
+    0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682,
+    0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b,
+    0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a,
+    0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561,
+    0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05,
+    0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9,
+    0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd,
+    0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0,
+    0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61,
+    0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678,
+    0x264b06e6},
+   {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413,
+    0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3,
+    0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d,
+    0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653,
+    0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9,
+    0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e,
+    0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5,
+    0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712,
+    0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8,
+    0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6,
+    0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068,
+    0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8,
+    0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579,
+    0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade,
+    0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37,
+    0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590,
+    0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4,
+    0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64,
+    0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea,
+    0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678,
+    0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282,
+    0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25,
+    0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102,
+    0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5,
+    0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f,
+    0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146,
+    0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8,
+    0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08,
+    0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c,
+    0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b,
+    0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972,
+    0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5,
+    0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d,
+    0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd,
+    0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833,
+    0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d,
+    0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7,
+    0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60,
+    0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2,
+    0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105,
+    0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff,
+    0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1,
+    0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f,
+    0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf,
+    0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617,
+    0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0,
+    0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959,
+    0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe,
+    0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca,
+    0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a,
+    0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184,
+    0x92364a30},
+   {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216,
+    0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8,
+    0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170,
+    0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035,
+    0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6,
+    0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145,
+    0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d,
+    0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e,
+    0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d,
+    0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408,
+    0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0,
+    0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e,
+    0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c,
+    0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf,
+    0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a,
+    0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9,
+    0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1,
+    0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f,
+    0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987,
+    0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4,
+    0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37,
+    0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84,
+    0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca,
+    0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79,
+    0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba,
+    0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d,
+    0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5,
+    0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b,
+    0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643,
+    0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0,
+    0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525,
+    0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496,
+    0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8,
+    0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026,
+    0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e,
+    0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db,
+    0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118,
+    0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab,
+    0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf,
+    0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c,
+    0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf,
+    0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a,
+    0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32,
+    0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec,
+    0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82,
+    0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31,
+    0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4,
+    0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957,
+    0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f,
+    0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1,
+    0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869,
+    0xe4c4abcc},
+   {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0,
+    0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271,
+    0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61,
+    0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52,
+    0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43,
+    0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333,
+    0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64,
+    0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314,
+    0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205,
+    0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136,
+    0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26,
+    0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997,
+    0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849,
+    0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739,
+    0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8,
+    0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98,
+    0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b,
+    0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba,
+    0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa,
+    0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d,
+    0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c,
+    0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc,
+    0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af,
+    0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf,
+    0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce,
+    0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922,
+    0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532,
+    0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183,
+    0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710,
+    0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860,
+    0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1,
+    0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1,
+    0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956,
+    0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7,
+    0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7,
+    0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4,
+    0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5,
+    0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5,
+    0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb,
+    0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb,
+    0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da,
+    0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9,
+    0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9,
+    0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48,
+    0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df,
+    0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af,
+    0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e,
+    0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e,
+    0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d,
+    0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c,
+    0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c,
+    0xca64c78c},
+   {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757,
+    0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a,
+    0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733,
+    0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
+    0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70,
+    0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42,
+    0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5,
+    0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
+    0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086,
+    0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4,
+    0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d,
+    0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
+    0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d,
+    0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f,
+    0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859,
+    0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
+    0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5,
+    0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028,
+    0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891,
+    0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
+    0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec,
+    0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde,
+    0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817,
+    0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
+    0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24,
+    0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e,
+    0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7,
+    0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
+    0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4,
+    0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196,
+    0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0,
+    0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
+    0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52,
+    0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f,
+    0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36,
+    0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
+    0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675,
+    0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647,
+    0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d,
+    0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
+    0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be,
+    0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc,
+    0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645,
+    0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
+    0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138,
+    0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a,
+    0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c,
+    0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
+    0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0,
+    0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d,
+    0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194,
+    0xde0506f1},
+   {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc,
+    0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f,
+    0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a,
+    0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
+    0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8,
+    0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023,
+    0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e,
+    0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
+    0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84,
+    0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7,
+    0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922,
+    0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
+    0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0,
+    0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b,
+    0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816,
+    0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
+    0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c,
+    0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f,
+    0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba,
+    0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
+    0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98,
+    0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873,
+    0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e,
+    0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
+    0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134,
+    0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7,
+    0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732,
+    0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
+    0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0,
+    0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b,
+    0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26,
+    0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
+    0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc,
+    0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef,
+    0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a,
+    0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
+    0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8,
+    0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43,
+    0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e,
+    0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
+    0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24,
+    0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07,
+    0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982,
+    0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
+    0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0,
+    0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b,
+    0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576,
+    0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
+    0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c,
+    0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f,
+    0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda,
+    0xbe9834ed},
+   {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504,
+    0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49,
+    0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e,
+    0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
+    0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859,
+    0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c,
+    0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620,
+    0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
+    0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae,
+    0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2,
+    0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175,
+    0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
+    0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05,
+    0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40,
+    0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f,
+    0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
+    0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850,
+    0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d,
+    0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da,
+    0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
+    0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af,
+    0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea,
+    0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74,
+    0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
+    0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa,
+    0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a,
+    0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd,
+    0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
+    0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a,
+    0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f,
+    0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290,
+    0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
+    0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed,
+    0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0,
+    0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167,
+    0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
+    0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0,
+    0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5,
+    0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc,
+    0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
+    0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842,
+    0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e,
+    0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299,
+    0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
+    0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec,
+    0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9,
+    0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66,
+    0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
+    0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9,
+    0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4,
+    0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33,
+    0x9324fd72},
+   {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+    0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+    0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+    0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+    0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+    0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+    0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+    0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+    0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+    0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+    0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+    0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+    0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+    0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+    0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+    0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+    0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+    0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+    0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+    0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+    0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+    0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+    0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+    0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+    0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+    0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+    0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+    0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+    0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+    0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+    0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+    0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+    0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+    0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+    0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+    0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+    0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+    0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+    0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+    0x2d02ef8d}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000,
+    0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000,
+    0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000,
+    0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000,
+    0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000,
+    0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000,
+    0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000,
+    0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000,
+    0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000,
+    0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000,
+    0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000,
+    0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000,
+    0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000,
+    0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000,
+    0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000,
+    0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000,
+    0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000,
+    0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000,
+    0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000,
+    0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000,
+    0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000,
+    0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000,
+    0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000,
+    0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000,
+    0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000,
+    0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000,
+    0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000,
+    0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000,
+    0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000,
+    0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000,
+    0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000,
+    0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000,
+    0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000,
+    0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000,
+    0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000,
+    0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000,
+    0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000,
+    0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000,
+    0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000,
+    0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000,
+    0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000,
+    0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000,
+    0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000,
+    0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000,
+    0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000,
+    0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000,
+    0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000,
+    0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000,
+    0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000,
+    0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000,
+    0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000,
+    0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000,
+    0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000,
+    0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000,
+    0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000,
+    0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000,
+    0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000,
+    0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000,
+    0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000,
+    0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000,
+    0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000,
+    0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000,
+    0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000,
+    0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000,
+    0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000,
+    0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000,
+    0x8567077200000000, 0x1357000500000000, 0x824abf9500000000,
+    0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000,
+    0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000,
+    0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000,
+    0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000,
+    0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000,
+    0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000,
+    0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000,
+    0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000,
+    0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000,
+    0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000,
+    0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000,
+    0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000,
+    0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000,
+    0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000,
+    0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000,
+    0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000,
+    0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000,
+    0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000,
+    0x8def022d00000000},
+   {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000,
+    0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000,
+    0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000,
+    0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000,
+    0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000,
+    0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000,
+    0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000,
+    0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000,
+    0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000,
+    0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000,
+    0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000,
+    0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000,
+    0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000,
+    0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000,
+    0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000,
+    0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000,
+    0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000,
+    0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000,
+    0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000,
+    0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000,
+    0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000,
+    0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000,
+    0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000,
+    0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000,
+    0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000,
+    0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000,
+    0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000,
+    0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000,
+    0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000,
+    0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000,
+    0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000,
+    0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000,
+    0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000,
+    0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000,
+    0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000,
+    0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000,
+    0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000,
+    0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000,
+    0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000,
+    0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000,
+    0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000,
+    0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000,
+    0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000,
+    0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000,
+    0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000,
+    0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000,
+    0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000,
+    0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000,
+    0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000,
+    0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000,
+    0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000,
+    0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000,
+    0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000,
+    0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000,
+    0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000,
+    0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000,
+    0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000,
+    0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000,
+    0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000,
+    0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000,
+    0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000,
+    0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000,
+    0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000,
+    0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000,
+    0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000,
+    0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000,
+    0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000,
+    0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000,
+    0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000,
+    0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000,
+    0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000,
+    0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000,
+    0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000,
+    0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000,
+    0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000,
+    0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000,
+    0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000,
+    0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000,
+    0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000,
+    0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000,
+    0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000,
+    0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000,
+    0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000,
+    0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000,
+    0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000,
+    0x72fd249300000000},
+   {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000,
+    0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000,
+    0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000,
+    0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000,
+    0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000,
+    0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000,
+    0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000,
+    0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000,
+    0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000,
+    0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000,
+    0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000,
+    0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000,
+    0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000,
+    0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000,
+    0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000,
+    0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000,
+    0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000,
+    0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000,
+    0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000,
+    0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000,
+    0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000,
+    0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000,
+    0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000,
+    0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000,
+    0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000,
+    0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000,
+    0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000,
+    0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000,
+    0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000,
+    0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000,
+    0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000,
+    0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000,
+    0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000,
+    0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000,
+    0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000,
+    0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000,
+    0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000,
+    0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000,
+    0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000,
+    0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000,
+    0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000,
+    0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000,
+    0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000,
+    0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000,
+    0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000,
+    0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000,
+    0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000,
+    0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000,
+    0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000,
+    0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000,
+    0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000,
+    0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000,
+    0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000,
+    0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000,
+    0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000,
+    0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000,
+    0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000,
+    0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000,
+    0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000,
+    0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000,
+    0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000,
+    0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000,
+    0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000,
+    0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000,
+    0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000,
+    0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000,
+    0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000,
+    0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000,
+    0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000,
+    0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000,
+    0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000,
+    0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000,
+    0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000,
+    0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000,
+    0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000,
+    0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000,
+    0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000,
+    0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000,
+    0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000,
+    0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000,
+    0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000,
+    0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000,
+    0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000,
+    0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000,
+    0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000,
+    0xed3498be00000000},
+   {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000,
+    0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000,
+    0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000,
+    0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000,
+    0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000,
+    0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000,
+    0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000,
+    0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000,
+    0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000,
+    0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000,
+    0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000,
+    0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000,
+    0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000,
+    0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000,
+    0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000,
+    0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000,
+    0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000,
+    0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000,
+    0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000,
+    0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000,
+    0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000,
+    0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000,
+    0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000,
+    0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000,
+    0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000,
+    0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000,
+    0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000,
+    0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000,
+    0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000,
+    0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000,
+    0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000,
+    0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000,
+    0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000,
+    0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000,
+    0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000,
+    0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000,
+    0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000,
+    0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000,
+    0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000,
+    0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000,
+    0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000,
+    0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000,
+    0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000,
+    0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000,
+    0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000,
+    0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000,
+    0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000,
+    0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000,
+    0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000,
+    0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000,
+    0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000,
+    0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000,
+    0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000,
+    0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000,
+    0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000,
+    0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000,
+    0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000,
+    0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000,
+    0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000,
+    0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000,
+    0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000,
+    0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000,
+    0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000,
+    0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000,
+    0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000,
+    0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000,
+    0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000,
+    0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000,
+    0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000,
+    0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000,
+    0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000,
+    0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000,
+    0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000,
+    0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000,
+    0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000,
+    0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000,
+    0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000,
+    0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000,
+    0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000,
+    0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000,
+    0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000,
+    0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000,
+    0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000,
+    0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000,
+    0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000,
+    0xf10605de00000000},
+   {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000,
+    0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000,
+    0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000,
+    0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000,
+    0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000,
+    0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000,
+    0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000,
+    0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000,
+    0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000,
+    0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000,
+    0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000,
+    0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000,
+    0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000,
+    0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000,
+    0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000,
+    0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000,
+    0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000,
+    0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000,
+    0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000,
+    0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000,
+    0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000,
+    0x572f712300000000, 0x4958f35800000000, 0xf971936500000000,
+    0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000,
+    0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000,
+    0x8813836800000000, 0x383ae35500000000, 0xe840431200000000,
+    0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000,
+    0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000,
+    0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000,
+    0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000,
+    0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000,
+    0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000,
+    0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000,
+    0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000,
+    0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000,
+    0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000,
+    0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000,
+    0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000,
+    0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000,
+    0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000,
+    0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000,
+    0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000,
+    0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000,
+    0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000,
+    0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000,
+    0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000,
+    0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000,
+    0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000,
+    0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000,
+    0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000,
+    0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000,
+    0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000,
+    0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000,
+    0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000,
+    0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000,
+    0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000,
+    0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000,
+    0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000,
+    0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000,
+    0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000,
+    0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000,
+    0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000,
+    0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000,
+    0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000,
+    0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000,
+    0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000,
+    0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000,
+    0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000,
+    0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000,
+    0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000,
+    0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000,
+    0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000,
+    0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000,
+    0x983485b900000000, 0x281de58400000000, 0xf86745c300000000,
+    0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000,
+    0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000,
+    0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000,
+    0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000,
+    0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000,
+    0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000,
+    0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000,
+    0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000,
+    0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000,
+    0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000,
+    0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000,
+    0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000,
+    0x8cc764ca00000000},
+   {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000,
+    0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000,
+    0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000,
+    0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000,
+    0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000,
+    0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000,
+    0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000,
+    0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000,
+    0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000,
+    0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000,
+    0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000,
+    0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000,
+    0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000,
+    0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000,
+    0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000,
+    0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000,
+    0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000,
+    0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000,
+    0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000,
+    0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000,
+    0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000,
+    0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000,
+    0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000,
+    0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000,
+    0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000,
+    0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000,
+    0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000,
+    0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000,
+    0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000,
+    0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000,
+    0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000,
+    0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000,
+    0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000,
+    0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000,
+    0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000,
+    0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000,
+    0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000,
+    0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000,
+    0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000,
+    0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000,
+    0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000,
+    0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000,
+    0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000,
+    0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000,
+    0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000,
+    0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000,
+    0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000,
+    0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000,
+    0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000,
+    0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000,
+    0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000,
+    0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000,
+    0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000,
+    0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000,
+    0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000,
+    0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000,
+    0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000,
+    0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000,
+    0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000,
+    0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000,
+    0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000,
+    0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000,
+    0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000,
+    0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000,
+    0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000,
+    0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000,
+    0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000,
+    0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000,
+    0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000,
+    0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000,
+    0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000,
+    0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000,
+    0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000,
+    0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000,
+    0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000,
+    0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000,
+    0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000,
+    0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000,
+    0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000,
+    0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000,
+    0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000,
+    0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000,
+    0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000,
+    0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000,
+    0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000,
+    0xccabc4e400000000},
+   {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000,
+    0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000,
+    0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000,
+    0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000,
+    0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000,
+    0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000,
+    0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000,
+    0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000,
+    0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000,
+    0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000,
+    0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000,
+    0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000,
+    0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000,
+    0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000,
+    0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000,
+    0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000,
+    0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000,
+    0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000,
+    0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000,
+    0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000,
+    0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000,
+    0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000,
+    0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000,
+    0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000,
+    0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000,
+    0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000,
+    0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000,
+    0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000,
+    0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000,
+    0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000,
+    0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000,
+    0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000,
+    0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000,
+    0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000,
+    0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000,
+    0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000,
+    0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000,
+    0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000,
+    0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000,
+    0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000,
+    0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000,
+    0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000,
+    0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000,
+    0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000,
+    0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000,
+    0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000,
+    0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000,
+    0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000,
+    0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000,
+    0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000,
+    0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000,
+    0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000,
+    0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000,
+    0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000,
+    0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000,
+    0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000,
+    0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000,
+    0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000,
+    0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000,
+    0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000,
+    0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000,
+    0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000,
+    0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000,
+    0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000,
+    0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000,
+    0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000,
+    0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000,
+    0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000,
+    0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000,
+    0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000,
+    0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000,
+    0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000,
+    0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000,
+    0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000,
+    0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000,
+    0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000,
+    0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000,
+    0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000,
+    0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000,
+    0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000,
+    0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000,
+    0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000,
+    0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000,
+    0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000,
+    0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000,
+    0x304a369200000000},
+   {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000,
+    0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000,
+    0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000,
+    0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000,
+    0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000,
+    0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000,
+    0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000,
+    0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000,
+    0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000,
+    0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000,
+    0x1923316900000000, 0x87239ba500000000, 0x566276f900000000,
+    0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000,
+    0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000,
+    0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000,
+    0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000,
+    0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000,
+    0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000,
+    0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000,
+    0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000,
+    0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000,
+    0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000,
+    0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000,
+    0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000,
+    0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000,
+    0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000,
+    0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000,
+    0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000,
+    0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000,
+    0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000,
+    0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000,
+    0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000,
+    0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000,
+    0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000,
+    0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000,
+    0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000,
+    0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000,
+    0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000,
+    0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000,
+    0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000,
+    0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000,
+    0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000,
+    0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000,
+    0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000,
+    0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000,
+    0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000,
+    0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000,
+    0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000,
+    0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000,
+    0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000,
+    0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000,
+    0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000,
+    0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000,
+    0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000,
+    0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000,
+    0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000,
+    0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000,
+    0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000,
+    0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000,
+    0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000,
+    0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000,
+    0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000,
+    0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000,
+    0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000,
+    0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000,
+    0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000,
+    0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000,
+    0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000,
+    0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000,
+    0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000,
+    0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000,
+    0x6171384400000000, 0xff71928800000000, 0xe678578200000000,
+    0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000,
+    0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000,
+    0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000,
+    0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000,
+    0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000,
+    0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000,
+    0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000,
+    0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000,
+    0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000,
+    0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000,
+    0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000,
+    0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000,
+    0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000,
+    0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000,
+    0xe6064b2600000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757,
+    0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a,
+    0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733,
+    0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871,
+    0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70,
+    0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42,
+    0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5,
+    0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787,
+    0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086,
+    0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4,
+    0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d,
+    0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0,
+    0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d,
+    0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f,
+    0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859,
+    0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b,
+    0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5,
+    0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028,
+    0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891,
+    0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed,
+    0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec,
+    0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde,
+    0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817,
+    0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825,
+    0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24,
+    0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e,
+    0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7,
+    0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a,
+    0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4,
+    0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196,
+    0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0,
+    0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2,
+    0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52,
+    0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f,
+    0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36,
+    0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174,
+    0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675,
+    0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647,
+    0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d,
+    0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf,
+    0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be,
+    0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc,
+    0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645,
+    0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98,
+    0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138,
+    0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a,
+    0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c,
+    0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e,
+    0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0,
+    0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d,
+    0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194,
+    0xde0506f1},
+   {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc,
+    0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f,
+    0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a,
+    0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29,
+    0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8,
+    0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023,
+    0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e,
+    0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065,
+    0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84,
+    0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7,
+    0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922,
+    0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71,
+    0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0,
+    0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b,
+    0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816,
+    0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd,
+    0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c,
+    0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f,
+    0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba,
+    0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579,
+    0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98,
+    0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873,
+    0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e,
+    0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5,
+    0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134,
+    0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7,
+    0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732,
+    0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461,
+    0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0,
+    0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b,
+    0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26,
+    0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd,
+    0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc,
+    0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef,
+    0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a,
+    0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049,
+    0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8,
+    0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43,
+    0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e,
+    0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5,
+    0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24,
+    0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07,
+    0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982,
+    0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1,
+    0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0,
+    0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b,
+    0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576,
+    0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d,
+    0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c,
+    0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f,
+    0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda,
+    0xbe9834ed},
+   {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504,
+    0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49,
+    0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e,
+    0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192,
+    0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859,
+    0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c,
+    0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620,
+    0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265,
+    0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae,
+    0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2,
+    0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175,
+    0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38,
+    0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05,
+    0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40,
+    0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f,
+    0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca,
+    0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850,
+    0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d,
+    0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da,
+    0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864,
+    0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af,
+    0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea,
+    0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74,
+    0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31,
+    0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa,
+    0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a,
+    0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd,
+    0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180,
+    0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a,
+    0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f,
+    0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290,
+    0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5,
+    0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed,
+    0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0,
+    0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167,
+    0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b,
+    0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0,
+    0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5,
+    0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc,
+    0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189,
+    0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842,
+    0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e,
+    0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299,
+    0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4,
+    0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec,
+    0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9,
+    0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66,
+    0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23,
+    0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9,
+    0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4,
+    0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33,
+    0x9324fd72},
+   {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+    0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+    0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+    0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+    0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+    0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+    0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+    0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+    0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+    0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+    0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+    0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+    0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+    0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+    0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+    0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+    0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+    0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+    0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+    0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+    0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+    0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+    0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+    0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+    0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+    0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+    0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+    0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+    0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+    0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+    0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+    0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+    0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+    0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+    0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+    0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+    0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+    0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+    0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+    0x2d02ef8d}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07,
+    0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79,
+    0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7,
+    0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84,
+    0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13,
+    0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663,
+    0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5,
+    0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5,
+    0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832,
+    0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51,
+    0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf,
+    0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1,
+    0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76,
+    0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606,
+    0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996,
+    0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6,
+    0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c,
+    0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712,
+    0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c,
+    0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4,
+    0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943,
+    0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333,
+    0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe,
+    0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce,
+    0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359,
+    0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a,
+    0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04,
+    0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a,
+    0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0,
+    0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580,
+    0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10,
+    0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060,
+    0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1,
+    0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf,
+    0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31,
+    0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852,
+    0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5,
+    0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5,
+    0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75,
+    0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005,
+    0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292,
+    0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1,
+    0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f,
+    0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111,
+    0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0,
+    0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0,
+    0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40,
+    0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530,
+    0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba,
+    0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4,
+    0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a,
+    0x8def022d},
+   {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64,
+    0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1,
+    0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e,
+    0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61,
+    0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82,
+    0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff,
+    0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7,
+    0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da,
+    0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139,
+    0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6,
+    0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89,
+    0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c,
+    0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0,
+    0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d,
+    0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a,
+    0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177,
+    0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de,
+    0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b,
+    0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824,
+    0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e,
+    0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad,
+    0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0,
+    0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d,
+    0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60,
+    0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83,
+    0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822,
+    0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d,
+    0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8,
+    0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171,
+    0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c,
+    0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b,
+    0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6,
+    0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca,
+    0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f,
+    0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430,
+    0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf,
+    0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c,
+    0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51,
+    0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9,
+    0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84,
+    0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67,
+    0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398,
+    0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7,
+    0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62,
+    0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e,
+    0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923,
+    0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4,
+    0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9,
+    0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070,
+    0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5,
+    0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a,
+    0x72fd2493},
+   {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907,
+    0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f,
+    0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a,
+    0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e,
+    0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512,
+    0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14,
+    0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b,
+    0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d,
+    0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731,
+    0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925,
+    0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620,
+    0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28,
+    0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70,
+    0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176,
+    0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d,
+    0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b,
+    0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b,
+    0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63,
+    0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266,
+    0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a,
+    0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446,
+    0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40,
+    0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557,
+    0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51,
+    0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d,
+    0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0,
+    0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5,
+    0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed,
+    0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd,
+    0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb,
+    0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0,
+    0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6,
+    0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de,
+    0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6,
+    0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3,
+    0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7,
+    0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb,
+    0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd,
+    0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92,
+    0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094,
+    0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598,
+    0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c,
+    0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489,
+    0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81,
+    0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9,
+    0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af,
+    0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4,
+    0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2,
+    0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2,
+    0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba,
+    0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf,
+    0xed3498be},
+   {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f,
+    0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d,
+    0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0,
+    0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42,
+    0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95,
+    0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2,
+    0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a,
+    0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d,
+    0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea,
+    0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748,
+    0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5,
+    0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27,
+    0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b,
+    0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac,
+    0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4,
+    0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3,
+    0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44,
+    0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6,
+    0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b,
+    0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329,
+    0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe,
+    0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9,
+    0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1,
+    0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6,
+    0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921,
+    0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555,
+    0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8,
+    0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a,
+    0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd,
+    0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a,
+    0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2,
+    0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5,
+    0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2,
+    0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330,
+    0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad,
+    0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f,
+    0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8,
+    0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef,
+    0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc,
+    0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb,
+    0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c,
+    0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e,
+    0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03,
+    0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1,
+    0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6,
+    0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1,
+    0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9,
+    0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e,
+    0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409,
+    0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb,
+    0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966,
+    0xf10605de}};
+
+#endif
+
+#endif
+
+#if N == 2
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87,
+    0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede,
+    0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab,
+    0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c,
+    0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1,
+    0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7,
+    0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e,
+    0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308,
+    0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5,
+    0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472,
+    0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07,
+    0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e,
+    0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa,
+    0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec,
+    0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6,
+    0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0,
+    0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3,
+    0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba,
+    0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf,
+    0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975,
+    0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8,
+    0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde,
+    0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a,
+    0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c,
+    0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1,
+    0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65,
+    0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410,
+    0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649,
+    0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a,
+    0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c,
+    0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946,
+    0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450,
+    0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e,
+    0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857,
+    0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022,
+    0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5,
+    0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758,
+    0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e,
+    0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d,
+    0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b,
+    0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6,
+    0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401,
+    0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74,
+    0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d,
+    0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073,
+    0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65,
+    0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f,
+    0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749,
+    0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a,
+    0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033,
+    0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846,
+    0x0d7139d7},
+   {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563,
+    0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f,
+    0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875,
+    0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536,
+    0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8,
+    0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43,
+    0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f,
+    0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184,
+    0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a,
+    0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39,
+    0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523,
+    0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f,
+    0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d,
+    0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6,
+    0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b,
+    0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0,
+    0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151,
+    0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d,
+    0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47,
+    0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a,
+    0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964,
+    0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef,
+    0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d,
+    0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6,
+    0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348,
+    0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53,
+    0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449,
+    0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645,
+    0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4,
+    0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f,
+    0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2,
+    0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69,
+    0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46,
+    0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a,
+    0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650,
+    0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13,
+    0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded,
+    0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366,
+    0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57,
+    0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc,
+    0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222,
+    0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61,
+    0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b,
+    0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277,
+    0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558,
+    0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3,
+    0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e,
+    0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5,
+    0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74,
+    0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78,
+    0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262,
+    0x1c53e98a},
+   {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b,
+    0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40,
+    0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580,
+    0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7,
+    0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a,
+    0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37,
+    0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75,
+    0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218,
+    0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5,
+    0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2,
+    0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02,
+    0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59,
+    0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1,
+    0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c,
+    0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a,
+    0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307,
+    0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486,
+    0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd,
+    0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d,
+    0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2,
+    0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f,
+    0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72,
+    0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8,
+    0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985,
+    0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268,
+    0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94,
+    0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454,
+    0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f,
+    0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e,
+    0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3,
+    0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915,
+    0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778,
+    0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821,
+    0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a,
+    0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba,
+    0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d,
+    0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560,
+    0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d,
+    0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe,
+    0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3,
+    0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e,
+    0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509,
+    0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9,
+    0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92,
+    0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb,
+    0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6,
+    0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50,
+    0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d,
+    0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc,
+    0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7,
+    0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927,
+    0x3f88e851},
+   {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96,
+    0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8,
+    0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0,
+    0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14,
+    0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7,
+    0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4,
+    0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe,
+    0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad,
+    0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e,
+    0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa,
+    0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2,
+    0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c,
+    0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab,
+    0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8,
+    0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d,
+    0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e,
+    0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7,
+    0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99,
+    0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1,
+    0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690,
+    0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933,
+    0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20,
+    0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf,
+    0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc,
+    0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f,
+    0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92,
+    0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca,
+    0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4,
+    0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd,
+    0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de,
+    0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb,
+    0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8,
+    0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474,
+    0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a,
+    0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252,
+    0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6,
+    0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55,
+    0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846,
+    0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7,
+    0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4,
+    0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47,
+    0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3,
+    0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb,
+    0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5,
+    0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49,
+    0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a,
+    0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f,
+    0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c,
+    0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305,
+    0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b,
+    0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523,
+    0x3dee8ca6},
+   {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f,
+    0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91,
+    0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e,
+    0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c,
+    0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02,
+    0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12,
+    0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567,
+    0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277,
+    0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679,
+    0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b,
+    0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4,
+    0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a,
+    0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0,
+    0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0,
+    0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91,
+    0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881,
+    0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173,
+    0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d,
+    0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912,
+    0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8,
+    0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6,
+    0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6,
+    0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b,
+    0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b,
+    0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75,
+    0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f,
+    0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00,
+    0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee,
+    0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c,
+    0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c,
+    0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d,
+    0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d,
+    0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67,
+    0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89,
+    0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706,
+    0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14,
+    0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a,
+    0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a,
+    0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f,
+    0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f,
+    0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591,
+    0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983,
+    0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c,
+    0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2,
+    0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8,
+    0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8,
+    0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89,
+    0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99,
+    0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b,
+    0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485,
+    0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a,
+    0x36197165},
+   {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382,
+    0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85,
+    0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06,
+    0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca,
+    0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e,
+    0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc,
+    0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616,
+    0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54,
+    0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10,
+    0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc,
+    0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f,
+    0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58,
+    0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef,
+    0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad,
+    0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b,
+    0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29,
+    0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6,
+    0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1,
+    0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622,
+    0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039,
+    0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d,
+    0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f,
+    0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32,
+    0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770,
+    0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034,
+    0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f,
+    0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc,
+    0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db,
+    0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154,
+    0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16,
+    0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0,
+    0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592,
+    0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca,
+    0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd,
+    0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e,
+    0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882,
+    0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6,
+    0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384,
+    0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1,
+    0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3,
+    0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7,
+    0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b,
+    0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8,
+    0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff,
+    0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7,
+    0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5,
+    0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23,
+    0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761,
+    0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee,
+    0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9,
+    0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a,
+    0x1a3b93aa},
+   {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a,
+    0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca,
+    0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3,
+    0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb,
+    0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c,
+    0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58,
+    0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed,
+    0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9,
+    0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e,
+    0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906,
+    0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f,
+    0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf,
+    0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0,
+    0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4,
+    0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769,
+    0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d,
+    0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632,
+    0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82,
+    0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb,
+    0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73,
+    0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484,
+    0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0,
+    0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5,
+    0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1,
+    0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516,
+    0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f,
+    0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946,
+    0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6,
+    0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9,
+    0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad,
+    0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820,
+    0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364,
+    0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab,
+    0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b,
+    0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62,
+    0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a,
+    0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd,
+    0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089,
+    0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c,
+    0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8,
+    0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f,
+    0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477,
+    0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e,
+    0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be,
+    0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71,
+    0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635,
+    0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8,
+    0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc,
+    0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3,
+    0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753,
+    0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a,
+    0xe147d714},
+   {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c,
+    0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b,
+    0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92,
+    0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4,
+    0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069,
+    0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526,
+    0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25,
+    0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a,
+    0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7,
+    0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491,
+    0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958,
+    0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f,
+    0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307,
+    0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648,
+    0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999,
+    0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6,
+    0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a,
+    0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d,
+    0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4,
+    0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61,
+    0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc,
+    0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3,
+    0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53,
+    0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c,
+    0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1,
+    0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c,
+    0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5,
+    0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92,
+    0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e,
+    0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771,
+    0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0,
+    0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def,
+    0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0,
+    0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7,
+    0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e,
+    0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58,
+    0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285,
+    0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca,
+    0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce,
+    0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81,
+    0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c,
+    0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a,
+    0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3,
+    0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4,
+    0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb,
+    0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4,
+    0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75,
+    0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a,
+    0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296,
+    0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1,
+    0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808,
+    0x494f0c4b}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000,
+    0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000,
+    0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000,
+    0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000,
+    0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000,
+    0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000,
+    0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000,
+    0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000,
+    0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000,
+    0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000,
+    0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000,
+    0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000,
+    0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000,
+    0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000,
+    0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000,
+    0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000,
+    0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000,
+    0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000,
+    0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000,
+    0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000,
+    0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000,
+    0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000,
+    0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000,
+    0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000,
+    0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000,
+    0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000,
+    0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000,
+    0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000,
+    0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000,
+    0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000,
+    0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000,
+    0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000,
+    0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000,
+    0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000,
+    0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000,
+    0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000,
+    0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000,
+    0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000,
+    0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000,
+    0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000,
+    0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000,
+    0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000,
+    0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000,
+    0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000,
+    0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000,
+    0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000,
+    0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000,
+    0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000,
+    0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000,
+    0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000,
+    0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000,
+    0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000,
+    0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000,
+    0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000,
+    0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000,
+    0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000,
+    0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000,
+    0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000,
+    0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000,
+    0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000,
+    0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000,
+    0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000,
+    0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000,
+    0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000,
+    0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000,
+    0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000,
+    0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000,
+    0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000,
+    0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000,
+    0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000,
+    0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000,
+    0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000,
+    0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000,
+    0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000,
+    0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000,
+    0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000,
+    0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000,
+    0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000,
+    0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000,
+    0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000,
+    0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000,
+    0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000,
+    0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000,
+    0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000,
+    0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000,
+    0x4b0c4f4900000000},
+   {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000,
+    0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000,
+    0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000,
+    0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000,
+    0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000,
+    0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000,
+    0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000,
+    0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000,
+    0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000,
+    0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000,
+    0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000,
+    0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000,
+    0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000,
+    0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000,
+    0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000,
+    0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000,
+    0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000,
+    0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000,
+    0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000,
+    0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000,
+    0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000,
+    0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000,
+    0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000,
+    0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000,
+    0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000,
+    0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000,
+    0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000,
+    0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000,
+    0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000,
+    0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000,
+    0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000,
+    0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000,
+    0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000,
+    0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000,
+    0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000,
+    0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000,
+    0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000,
+    0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000,
+    0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000,
+    0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000,
+    0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000,
+    0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000,
+    0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000,
+    0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000,
+    0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000,
+    0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000,
+    0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000,
+    0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000,
+    0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000,
+    0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000,
+    0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000,
+    0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000,
+    0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000,
+    0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000,
+    0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000,
+    0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000,
+    0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000,
+    0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000,
+    0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000,
+    0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000,
+    0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000,
+    0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000,
+    0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000,
+    0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000,
+    0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000,
+    0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000,
+    0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000,
+    0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000,
+    0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000,
+    0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000,
+    0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000,
+    0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000,
+    0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000,
+    0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000,
+    0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000,
+    0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000,
+    0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000,
+    0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000,
+    0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000,
+    0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000,
+    0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000,
+    0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000,
+    0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000,
+    0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000,
+    0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000,
+    0x14d747e100000000},
+   {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000,
+    0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000,
+    0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000,
+    0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000,
+    0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000,
+    0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000,
+    0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000,
+    0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000,
+    0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000,
+    0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000,
+    0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000,
+    0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000,
+    0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000,
+    0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000,
+    0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000,
+    0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000,
+    0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000,
+    0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000,
+    0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000,
+    0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000,
+    0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000,
+    0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000,
+    0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000,
+    0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000,
+    0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000,
+    0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000,
+    0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000,
+    0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000,
+    0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000,
+    0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000,
+    0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000,
+    0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000,
+    0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000,
+    0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000,
+    0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000,
+    0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000,
+    0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000,
+    0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000,
+    0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000,
+    0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000,
+    0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000,
+    0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000,
+    0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000,
+    0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000,
+    0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000,
+    0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000,
+    0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000,
+    0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000,
+    0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000,
+    0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000,
+    0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000,
+    0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000,
+    0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000,
+    0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000,
+    0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000,
+    0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000,
+    0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000,
+    0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000,
+    0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000,
+    0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000,
+    0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000,
+    0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000,
+    0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000,
+    0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000,
+    0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000,
+    0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000,
+    0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000,
+    0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000,
+    0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000,
+    0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000,
+    0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000,
+    0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000,
+    0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000,
+    0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000,
+    0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000,
+    0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000,
+    0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000,
+    0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000,
+    0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000,
+    0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000,
+    0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000,
+    0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000,
+    0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000,
+    0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000,
+    0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000,
+    0xaa933b1a00000000},
+   {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000,
+    0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000,
+    0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000,
+    0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000,
+    0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000,
+    0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000,
+    0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000,
+    0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000,
+    0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000,
+    0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000,
+    0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000,
+    0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000,
+    0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000,
+    0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000,
+    0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000,
+    0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000,
+    0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000,
+    0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000,
+    0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000,
+    0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000,
+    0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000,
+    0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000,
+    0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000,
+    0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000,
+    0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000,
+    0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000,
+    0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000,
+    0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000,
+    0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000,
+    0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000,
+    0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000,
+    0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000,
+    0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000,
+    0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000,
+    0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000,
+    0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000,
+    0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000,
+    0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000,
+    0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000,
+    0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000,
+    0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000,
+    0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000,
+    0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000,
+    0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000,
+    0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000,
+    0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000,
+    0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000,
+    0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000,
+    0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000,
+    0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000,
+    0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000,
+    0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000,
+    0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000,
+    0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000,
+    0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000,
+    0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000,
+    0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000,
+    0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000,
+    0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000,
+    0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000,
+    0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000,
+    0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000,
+    0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000,
+    0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000,
+    0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000,
+    0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000,
+    0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000,
+    0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000,
+    0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000,
+    0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000,
+    0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000,
+    0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000,
+    0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000,
+    0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000,
+    0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000,
+    0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000,
+    0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000,
+    0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000,
+    0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000,
+    0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000,
+    0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000,
+    0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000,
+    0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000,
+    0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000,
+    0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000,
+    0x6571193600000000},
+   {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000,
+    0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000,
+    0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000,
+    0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000,
+    0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000,
+    0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000,
+    0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000,
+    0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000,
+    0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000,
+    0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000,
+    0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000,
+    0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000,
+    0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000,
+    0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000,
+    0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000,
+    0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000,
+    0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000,
+    0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000,
+    0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000,
+    0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000,
+    0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000,
+    0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000,
+    0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000,
+    0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000,
+    0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000,
+    0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000,
+    0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000,
+    0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000,
+    0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000,
+    0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000,
+    0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000,
+    0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000,
+    0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000,
+    0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000,
+    0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000,
+    0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000,
+    0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000,
+    0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000,
+    0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000,
+    0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000,
+    0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000,
+    0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000,
+    0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000,
+    0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000,
+    0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000,
+    0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000,
+    0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000,
+    0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000,
+    0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000,
+    0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000,
+    0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000,
+    0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000,
+    0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000,
+    0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000,
+    0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000,
+    0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000,
+    0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000,
+    0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000,
+    0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000,
+    0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000,
+    0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000,
+    0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000,
+    0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000,
+    0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000,
+    0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000,
+    0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000,
+    0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000,
+    0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000,
+    0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000,
+    0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000,
+    0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000,
+    0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000,
+    0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000,
+    0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000,
+    0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000,
+    0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000,
+    0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000,
+    0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000,
+    0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000,
+    0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000,
+    0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000,
+    0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000,
+    0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000,
+    0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000,
+    0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000,
+    0xa68cee3d00000000},
+   {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000,
+    0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000,
+    0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000,
+    0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000,
+    0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000,
+    0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000,
+    0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000,
+    0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000,
+    0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000,
+    0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000,
+    0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000,
+    0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000,
+    0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000,
+    0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000,
+    0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000,
+    0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000,
+    0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000,
+    0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000,
+    0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000,
+    0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000,
+    0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000,
+    0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000,
+    0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000,
+    0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000,
+    0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000,
+    0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000,
+    0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000,
+    0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000,
+    0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000,
+    0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000,
+    0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000,
+    0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000,
+    0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000,
+    0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000,
+    0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000,
+    0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000,
+    0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000,
+    0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000,
+    0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000,
+    0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000,
+    0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000,
+    0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000,
+    0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000,
+    0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000,
+    0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000,
+    0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000,
+    0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000,
+    0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000,
+    0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000,
+    0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000,
+    0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000,
+    0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000,
+    0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000,
+    0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000,
+    0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000,
+    0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000,
+    0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000,
+    0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000,
+    0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000,
+    0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000,
+    0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000,
+    0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000,
+    0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000,
+    0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000,
+    0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000,
+    0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000,
+    0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000,
+    0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000,
+    0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000,
+    0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000,
+    0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000,
+    0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000,
+    0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000,
+    0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000,
+    0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000,
+    0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000,
+    0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000,
+    0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000,
+    0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000,
+    0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000,
+    0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000,
+    0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000,
+    0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000,
+    0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000,
+    0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000,
+    0x51e8883f00000000},
+   {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000,
+    0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000,
+    0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000,
+    0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000,
+    0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000,
+    0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000,
+    0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000,
+    0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000,
+    0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000,
+    0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000,
+    0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000,
+    0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000,
+    0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000,
+    0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000,
+    0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000,
+    0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000,
+    0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000,
+    0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000,
+    0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000,
+    0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000,
+    0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000,
+    0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000,
+    0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000,
+    0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000,
+    0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000,
+    0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000,
+    0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000,
+    0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000,
+    0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000,
+    0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000,
+    0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000,
+    0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000,
+    0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000,
+    0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000,
+    0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000,
+    0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000,
+    0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000,
+    0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000,
+    0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000,
+    0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000,
+    0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000,
+    0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000,
+    0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000,
+    0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000,
+    0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000,
+    0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000,
+    0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000,
+    0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000,
+    0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000,
+    0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000,
+    0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000,
+    0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000,
+    0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000,
+    0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000,
+    0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000,
+    0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000,
+    0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000,
+    0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000,
+    0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000,
+    0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000,
+    0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000,
+    0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000,
+    0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000,
+    0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000,
+    0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000,
+    0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000,
+    0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000,
+    0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000,
+    0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000,
+    0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000,
+    0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000,
+    0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000,
+    0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000,
+    0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000,
+    0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000,
+    0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000,
+    0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000,
+    0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000,
+    0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000,
+    0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000,
+    0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000,
+    0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000,
+    0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000,
+    0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000,
+    0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000,
+    0x8ae9531c00000000},
+   {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000,
+    0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000,
+    0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000,
+    0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000,
+    0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000,
+    0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000,
+    0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000,
+    0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000,
+    0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000,
+    0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000,
+    0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000,
+    0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000,
+    0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000,
+    0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000,
+    0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000,
+    0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000,
+    0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000,
+    0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000,
+    0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000,
+    0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000,
+    0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000,
+    0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000,
+    0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000,
+    0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000,
+    0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000,
+    0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000,
+    0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000,
+    0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000,
+    0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000,
+    0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000,
+    0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000,
+    0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000,
+    0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000,
+    0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000,
+    0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000,
+    0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000,
+    0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000,
+    0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000,
+    0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000,
+    0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000,
+    0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000,
+    0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000,
+    0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000,
+    0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000,
+    0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000,
+    0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000,
+    0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000,
+    0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000,
+    0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000,
+    0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000,
+    0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000,
+    0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000,
+    0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000,
+    0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000,
+    0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000,
+    0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000,
+    0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000,
+    0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000,
+    0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000,
+    0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000,
+    0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000,
+    0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000,
+    0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000,
+    0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000,
+    0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000,
+    0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000,
+    0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000,
+    0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000,
+    0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000,
+    0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000,
+    0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000,
+    0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000,
+    0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000,
+    0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000,
+    0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000,
+    0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000,
+    0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000,
+    0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000,
+    0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000,
+    0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000,
+    0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000,
+    0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000,
+    0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000,
+    0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000,
+    0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000,
+    0xd739710d00000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa,
+    0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b,
+    0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232,
+    0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8,
+    0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e,
+    0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa,
+    0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b,
+    0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f,
+    0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719,
+    0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3,
+    0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa,
+    0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b,
+    0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed,
+    0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89,
+    0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25,
+    0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041,
+    0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c,
+    0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed,
+    0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4,
+    0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758,
+    0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e,
+    0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a,
+    0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed,
+    0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889,
+    0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df,
+    0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544,
+    0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d,
+    0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c,
+    0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1,
+    0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95,
+    0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839,
+    0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d,
+    0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976,
+    0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7,
+    0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be,
+    0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144,
+    0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12,
+    0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376,
+    0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a,
+    0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e,
+    0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278,
+    0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682,
+    0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b,
+    0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a,
+    0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561,
+    0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05,
+    0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9,
+    0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd,
+    0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0,
+    0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61,
+    0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678,
+    0x264b06e6},
+   {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413,
+    0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3,
+    0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d,
+    0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653,
+    0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9,
+    0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e,
+    0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5,
+    0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712,
+    0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8,
+    0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6,
+    0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068,
+    0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8,
+    0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579,
+    0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade,
+    0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37,
+    0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590,
+    0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4,
+    0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64,
+    0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea,
+    0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678,
+    0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282,
+    0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25,
+    0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102,
+    0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5,
+    0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f,
+    0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146,
+    0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8,
+    0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08,
+    0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c,
+    0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b,
+    0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972,
+    0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5,
+    0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d,
+    0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd,
+    0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833,
+    0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d,
+    0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7,
+    0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60,
+    0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2,
+    0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105,
+    0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff,
+    0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1,
+    0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f,
+    0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf,
+    0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617,
+    0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0,
+    0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959,
+    0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe,
+    0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca,
+    0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a,
+    0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184,
+    0x92364a30},
+   {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216,
+    0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8,
+    0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170,
+    0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035,
+    0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6,
+    0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145,
+    0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d,
+    0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e,
+    0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d,
+    0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408,
+    0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0,
+    0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e,
+    0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c,
+    0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf,
+    0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a,
+    0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9,
+    0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1,
+    0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f,
+    0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987,
+    0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4,
+    0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37,
+    0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84,
+    0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca,
+    0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79,
+    0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba,
+    0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d,
+    0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5,
+    0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b,
+    0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643,
+    0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0,
+    0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525,
+    0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496,
+    0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8,
+    0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026,
+    0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e,
+    0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db,
+    0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118,
+    0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab,
+    0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf,
+    0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c,
+    0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf,
+    0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a,
+    0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32,
+    0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec,
+    0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82,
+    0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31,
+    0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4,
+    0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957,
+    0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f,
+    0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1,
+    0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869,
+    0xe4c4abcc},
+   {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0,
+    0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271,
+    0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61,
+    0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52,
+    0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43,
+    0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333,
+    0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64,
+    0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314,
+    0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205,
+    0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136,
+    0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26,
+    0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997,
+    0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849,
+    0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739,
+    0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8,
+    0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98,
+    0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b,
+    0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba,
+    0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa,
+    0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d,
+    0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c,
+    0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc,
+    0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af,
+    0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf,
+    0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce,
+    0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922,
+    0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532,
+    0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183,
+    0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710,
+    0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860,
+    0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1,
+    0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1,
+    0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956,
+    0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7,
+    0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7,
+    0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4,
+    0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5,
+    0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5,
+    0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb,
+    0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb,
+    0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da,
+    0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9,
+    0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9,
+    0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48,
+    0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df,
+    0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af,
+    0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e,
+    0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e,
+    0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d,
+    0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c,
+    0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c,
+    0xca64c78c}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5,
+    0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d,
+    0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf,
+    0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027,
+    0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050,
+    0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098,
+    0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb,
+    0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173,
+    0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104,
+    0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c,
+    0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e,
+    0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6,
+    0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358,
+    0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390,
+    0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312,
+    0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da,
+    0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd,
+    0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335,
+    0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387,
+    0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de,
+    0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9,
+    0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261,
+    0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283,
+    0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b,
+    0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c,
+    0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c,
+    0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e,
+    0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6,
+    0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1,
+    0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619,
+    0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b,
+    0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653,
+    0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785,
+    0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d,
+    0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf,
+    0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757,
+    0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720,
+    0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8,
+    0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593,
+    0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b,
+    0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c,
+    0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4,
+    0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506,
+    0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe,
+    0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428,
+    0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0,
+    0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462,
+    0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa,
+    0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd,
+    0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445,
+    0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7,
+    0x8cc764ca},
+   {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b,
+    0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27,
+    0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a,
+    0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285,
+    0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef,
+    0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf,
+    0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a,
+    0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a,
+    0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70,
+    0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf,
+    0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2,
+    0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e,
+    0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f,
+    0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f,
+    0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae,
+    0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe,
+    0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97,
+    0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b,
+    0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436,
+    0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e,
+    0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4,
+    0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4,
+    0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46,
+    0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716,
+    0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c,
+    0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5,
+    0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8,
+    0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774,
+    0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d,
+    0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d,
+    0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc,
+    0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec,
+    0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82,
+    0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e,
+    0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623,
+    0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c,
+    0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6,
+    0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6,
+    0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c,
+    0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c,
+    0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66,
+    0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9,
+    0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4,
+    0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978,
+    0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416,
+    0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946,
+    0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7,
+    0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7,
+    0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e,
+    0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32,
+    0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f,
+    0xccabc4e4},
+   {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4,
+    0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895,
+    0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50,
+    0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656,
+    0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154,
+    0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906,
+    0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258,
+    0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a,
+    0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08,
+    0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e,
+    0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb,
+    0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa,
+    0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44,
+    0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316,
+    0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0,
+    0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2,
+    0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7,
+    0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6,
+    0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73,
+    0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba,
+    0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8,
+    0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea,
+    0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b,
+    0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29,
+    0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b,
+    0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e,
+    0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb,
+    0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a,
+    0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef,
+    0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd,
+    0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b,
+    0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019,
+    0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3,
+    0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2,
+    0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417,
+    0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11,
+    0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13,
+    0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241,
+    0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b,
+    0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09,
+    0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b,
+    0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d,
+    0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8,
+    0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9,
+    0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003,
+    0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851,
+    0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7,
+    0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5,
+    0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190,
+    0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1,
+    0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134,
+    0x304a3692},
+   {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84,
+    0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f,
+    0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15,
+    0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2,
+    0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf,
+    0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7,
+    0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb,
+    0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3,
+    0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae,
+    0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749,
+    0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243,
+    0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8,
+    0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29,
+    0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61,
+    0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8,
+    0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0,
+    0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1,
+    0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a,
+    0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40,
+    0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e,
+    0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03,
+    0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b,
+    0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee,
+    0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6,
+    0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb,
+    0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f,
+    0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495,
+    0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e,
+    0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f,
+    0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067,
+    0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be,
+    0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6,
+    0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e,
+    0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5,
+    0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf,
+    0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958,
+    0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305,
+    0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d,
+    0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338,
+    0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370,
+    0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d,
+    0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca,
+    0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0,
+    0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b,
+    0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083,
+    0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb,
+    0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012,
+    0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a,
+    0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b,
+    0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0,
+    0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea,
+    0xe6064b26}};
+
 #endif
-   }
-};
+
+#endif
+
+#if N == 3
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f,
+    0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999,
+    0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee,
+    0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615,
+    0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383,
+    0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb,
+    0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275,
+    0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d,
+    0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b,
+    0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460,
+    0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317,
+    0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1,
+    0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5,
+    0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd,
+    0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04,
+    0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c,
+    0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7,
+    0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11,
+    0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66,
+    0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7,
+    0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871,
+    0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309,
+    0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd,
+    0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85,
+    0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913,
+    0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d,
+    0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a,
+    0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc,
+    0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57,
+    0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f,
+    0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6,
+    0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e,
+    0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f,
+    0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289,
+    0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe,
+    0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05,
+    0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893,
+    0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb,
+    0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0,
+    0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8,
+    0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e,
+    0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5,
+    0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2,
+    0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574,
+    0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5,
+    0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add,
+    0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114,
+    0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c,
+    0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7,
+    0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701,
+    0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076,
+    0x09cd8551},
+   {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193,
+    0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2,
+    0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c,
+    0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71,
+    0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a,
+    0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d,
+    0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71,
+    0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436,
+    0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d,
+    0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000,
+    0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae,
+    0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf,
+    0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930,
+    0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277,
+    0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff,
+    0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8,
+    0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef,
+    0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e,
+    0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20,
+    0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95,
+    0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e,
+    0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9,
+    0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d,
+    0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a,
+    0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151,
+    0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4,
+    0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a,
+    0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b,
+    0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c,
+    0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b,
+    0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3,
+    0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4,
+    0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b,
+    0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a,
+    0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4,
+    0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189,
+    0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92,
+    0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5,
+    0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9,
+    0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe,
+    0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5,
+    0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8,
+    0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66,
+    0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707,
+    0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8,
+    0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f,
+    0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707,
+    0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40,
+    0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017,
+    0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876,
+    0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8,
+    0x7bc97a0c},
+   {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300,
+    0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0,
+    0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80,
+    0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701,
+    0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41,
+    0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81,
+    0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43,
+    0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83,
+    0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3,
+    0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42,
+    0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202,
+    0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2,
+    0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7,
+    0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407,
+    0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47,
+    0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87,
+    0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86,
+    0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46,
+    0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506,
+    0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44,
+    0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704,
+    0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4,
+    0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5,
+    0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505,
+    0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45,
+    0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f,
+    0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f,
+    0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f,
+    0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e,
+    0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e,
+    0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e,
+    0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce,
+    0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c,
+    0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc,
+    0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c,
+    0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d,
+    0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d,
+    0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d,
+    0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88,
+    0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48,
+    0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708,
+    0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89,
+    0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9,
+    0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309,
+    0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb,
+    0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b,
+    0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b,
+    0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b,
+    0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a,
+    0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a,
+    0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a,
+    0x7851a2ca},
+   {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb,
+    0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8,
+    0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0,
+    0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f,
+    0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a,
+    0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf,
+    0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5,
+    0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380,
+    0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815,
+    0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa,
+    0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2,
+    0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1,
+    0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1,
+    0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4,
+    0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa,
+    0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df,
+    0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6,
+    0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5,
+    0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad,
+    0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca,
+    0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f,
+    0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a,
+    0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8,
+    0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d,
+    0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708,
+    0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d,
+    0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865,
+    0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636,
+    0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f,
+    0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a,
+    0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744,
+    0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061,
+    0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0,
+    0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293,
+    0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb,
+    0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874,
+    0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1,
+    0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4,
+    0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f,
+    0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a,
+    0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f,
+    0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120,
+    0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778,
+    0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b,
+    0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a,
+    0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af,
+    0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81,
+    0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4,
+    0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd,
+    0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e,
+    0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6,
+    0x566b6848},
+   {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59,
+    0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4,
+    0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67,
+    0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef,
+    0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97,
+    0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88,
+    0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687,
+    0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698,
+    0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0,
+    0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068,
+    0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb,
+    0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056,
+    0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016,
+    0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009,
+    0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028,
+    0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037,
+    0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a,
+    0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7,
+    0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054,
+    0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7,
+    0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af,
+    0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0,
+    0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4,
+    0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab,
+    0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3,
+    0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a,
+    0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9,
+    0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54,
+    0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09,
+    0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16,
+    0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37,
+    0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28,
+    0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e,
+    0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3,
+    0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40,
+    0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8,
+    0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0,
+    0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf,
+    0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6,
+    0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9,
+    0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1,
+    0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059,
+    0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca,
+    0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067,
+    0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031,
+    0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e,
+    0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f,
+    0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010,
+    0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d,
+    0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0,
+    0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073,
+    0xd8ac6b35},
+   {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2,
+    0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd,
+    0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696,
+    0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3,
+    0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f,
+    0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35,
+    0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5,
+    0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f,
+    0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673,
+    0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46,
+    0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d,
+    0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632,
+    0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28,
+    0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192,
+    0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c,
+    0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6,
+    0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0,
+    0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff,
+    0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4,
+    0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95,
+    0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9,
+    0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03,
+    0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7,
+    0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d,
+    0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151,
+    0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808,
+    0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343,
+    0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c,
+    0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a,
+    0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0,
+    0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e,
+    0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594,
+    0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6,
+    0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399,
+    0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2,
+    0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7,
+    0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb,
+    0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571,
+    0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289,
+    0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33,
+    0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f,
+    0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a,
+    0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461,
+    0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e,
+    0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c,
+    0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6,
+    0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918,
+    0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2,
+    0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484,
+    0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb,
+    0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0,
+    0xa140efa8},
+   {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706,
+    0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed,
+    0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289,
+    0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a,
+    0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214,
+    0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3,
+    0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3,
+    0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254,
+    0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a,
+    0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9,
+    0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad,
+    0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746,
+    0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060,
+    0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187,
+    0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef,
+    0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408,
+    0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e,
+    0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495,
+    0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1,
+    0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532,
+    0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c,
+    0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb,
+    0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb,
+    0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c,
+    0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42,
+    0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060,
+    0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04,
+    0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef,
+    0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99,
+    0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e,
+    0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16,
+    0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1,
+    0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7,
+    0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c,
+    0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38,
+    0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb,
+    0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5,
+    0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42,
+    0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62,
+    0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85,
+    0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb,
+    0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18,
+    0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c,
+    0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997,
+    0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1,
+    0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36,
+    0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e,
+    0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9,
+    0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf,
+    0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24,
+    0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040,
+    0x917cd6a1},
+   {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf,
+    0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd,
+    0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896,
+    0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9,
+    0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3,
+    0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f,
+    0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d,
+    0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1,
+    0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab,
+    0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4,
+    0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f,
+    0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d,
+    0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4,
+    0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978,
+    0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad,
+    0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621,
+    0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46,
+    0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854,
+    0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f,
+    0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a,
+    0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890,
+    0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c,
+    0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4,
+    0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238,
+    0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622,
+    0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab,
+    0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0,
+    0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2,
+    0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295,
+    0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19,
+    0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc,
+    0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140,
+    0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd,
+    0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf,
+    0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184,
+    0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb,
+    0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1,
+    0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d,
+    0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb,
+    0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257,
+    0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d,
+    0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22,
+    0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069,
+    0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b,
+    0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6,
+    0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a,
+    0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf,
+    0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33,
+    0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254,
+    0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146,
+    0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d,
+    0x18ba364e}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000,
+    0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000,
+    0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000,
+    0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000,
+    0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000,
+    0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000,
+    0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000,
+    0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000,
+    0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000,
+    0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000,
+    0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000,
+    0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000,
+    0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000,
+    0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000,
+    0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000,
+    0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000,
+    0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000,
+    0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000,
+    0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000,
+    0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000,
+    0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000,
+    0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000,
+    0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000,
+    0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000,
+    0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000,
+    0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000,
+    0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000,
+    0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000,
+    0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000,
+    0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000,
+    0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000,
+    0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000,
+    0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000,
+    0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000,
+    0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000,
+    0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000,
+    0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000,
+    0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000,
+    0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000,
+    0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000,
+    0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000,
+    0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000,
+    0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000,
+    0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000,
+    0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000,
+    0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000,
+    0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000,
+    0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000,
+    0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000,
+    0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000,
+    0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000,
+    0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000,
+    0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000,
+    0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000,
+    0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000,
+    0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000,
+    0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000,
+    0x08eda52100000000, 0x4391370100000000, 0x005a918600000000,
+    0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000,
+    0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000,
+    0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000,
+    0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000,
+    0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000,
+    0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000,
+    0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000,
+    0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000,
+    0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000,
+    0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000,
+    0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000,
+    0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000,
+    0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000,
+    0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000,
+    0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000,
+    0x7b23114500000000, 0x305f836500000000, 0x739425e200000000,
+    0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000,
+    0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000,
+    0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000,
+    0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000,
+    0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000,
+    0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000,
+    0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000,
+    0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000,
+    0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000,
+    0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000,
+    0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000,
+    0x4e36ba1800000000},
+   {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000,
+    0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000,
+    0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000,
+    0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000,
+    0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000,
+    0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000,
+    0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000,
+    0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000,
+    0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000,
+    0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000,
+    0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000,
+    0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000,
+    0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000,
+    0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000,
+    0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000,
+    0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000,
+    0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000,
+    0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000,
+    0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000,
+    0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000,
+    0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000,
+    0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000,
+    0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000,
+    0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000,
+    0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000,
+    0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000,
+    0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000,
+    0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000,
+    0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000,
+    0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000,
+    0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000,
+    0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000,
+    0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000,
+    0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000,
+    0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000,
+    0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000,
+    0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000,
+    0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000,
+    0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000,
+    0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000,
+    0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000,
+    0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000,
+    0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000,
+    0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000,
+    0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000,
+    0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000,
+    0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000,
+    0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000,
+    0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000,
+    0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000,
+    0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000,
+    0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000,
+    0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000,
+    0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000,
+    0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000,
+    0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000,
+    0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000,
+    0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000,
+    0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000,
+    0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000,
+    0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000,
+    0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000,
+    0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000,
+    0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000,
+    0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000,
+    0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000,
+    0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000,
+    0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000,
+    0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000,
+    0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000,
+    0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000,
+    0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000,
+    0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000,
+    0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000,
+    0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000,
+    0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000,
+    0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000,
+    0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000,
+    0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000,
+    0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000,
+    0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000,
+    0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000,
+    0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000,
+    0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000,
+    0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000,
+    0xa1d67c9100000000},
+   {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000,
+    0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000,
+    0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000,
+    0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000,
+    0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000,
+    0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000,
+    0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000,
+    0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000,
+    0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000,
+    0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000,
+    0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000,
+    0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000,
+    0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000,
+    0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000,
+    0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000,
+    0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000,
+    0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000,
+    0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000,
+    0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000,
+    0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000,
+    0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000,
+    0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000,
+    0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000,
+    0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000,
+    0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000,
+    0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000,
+    0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000,
+    0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000,
+    0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000,
+    0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000,
+    0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000,
+    0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000,
+    0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000,
+    0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000,
+    0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000,
+    0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000,
+    0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000,
+    0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000,
+    0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000,
+    0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000,
+    0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000,
+    0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000,
+    0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000,
+    0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000,
+    0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000,
+    0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000,
+    0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000,
+    0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000,
+    0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000,
+    0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000,
+    0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000,
+    0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000,
+    0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000,
+    0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000,
+    0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000,
+    0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000,
+    0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000,
+    0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000,
+    0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000,
+    0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000,
+    0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000,
+    0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000,
+    0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000,
+    0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000,
+    0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000,
+    0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000,
+    0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000,
+    0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000,
+    0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000,
+    0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000,
+    0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000,
+    0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000,
+    0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000,
+    0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000,
+    0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000,
+    0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000,
+    0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000,
+    0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000,
+    0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000,
+    0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000,
+    0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000,
+    0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000,
+    0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000,
+    0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000,
+    0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000,
+    0xa8ef40a100000000},
+   {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000,
+    0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000,
+    0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000,
+    0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000,
+    0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000,
+    0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000,
+    0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000,
+    0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000,
+    0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000,
+    0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000,
+    0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000,
+    0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000,
+    0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000,
+    0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000,
+    0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000,
+    0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000,
+    0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000,
+    0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000,
+    0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000,
+    0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000,
+    0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000,
+    0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000,
+    0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000,
+    0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000,
+    0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000,
+    0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000,
+    0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000,
+    0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000,
+    0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000,
+    0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000,
+    0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000,
+    0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000,
+    0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000,
+    0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000,
+    0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000,
+    0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000,
+    0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000,
+    0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000,
+    0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000,
+    0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000,
+    0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000,
+    0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000,
+    0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000,
+    0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000,
+    0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000,
+    0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000,
+    0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000,
+    0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000,
+    0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000,
+    0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000,
+    0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000,
+    0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000,
+    0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000,
+    0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000,
+    0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000,
+    0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000,
+    0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000,
+    0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000,
+    0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000,
+    0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000,
+    0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000,
+    0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000,
+    0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000,
+    0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000,
+    0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000,
+    0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000,
+    0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000,
+    0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000,
+    0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000,
+    0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000,
+    0x933d017400000000, 0xd506661100000000, 0x46a022f000000000,
+    0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000,
+    0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000,
+    0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000,
+    0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000,
+    0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000,
+    0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000,
+    0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000,
+    0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000,
+    0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000,
+    0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000,
+    0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000,
+    0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000,
+    0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000,
+    0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000,
+    0x356bacd800000000},
+   {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000,
+    0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000,
+    0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000,
+    0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000,
+    0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000,
+    0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000,
+    0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000,
+    0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000,
+    0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000,
+    0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000,
+    0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000,
+    0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000,
+    0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000,
+    0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000,
+    0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000,
+    0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000,
+    0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000,
+    0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000,
+    0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000,
+    0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000,
+    0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000,
+    0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000,
+    0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000,
+    0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000,
+    0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000,
+    0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000,
+    0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000,
+    0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000,
+    0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000,
+    0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000,
+    0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000,
+    0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000,
+    0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000,
+    0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000,
+    0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000,
+    0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000,
+    0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000,
+    0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000,
+    0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000,
+    0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000,
+    0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000,
+    0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000,
+    0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000,
+    0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000,
+    0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000,
+    0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000,
+    0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000,
+    0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000,
+    0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000,
+    0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000,
+    0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000,
+    0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000,
+    0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000,
+    0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000,
+    0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000,
+    0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000,
+    0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000,
+    0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000,
+    0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000,
+    0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000,
+    0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000,
+    0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000,
+    0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000,
+    0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000,
+    0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000,
+    0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000,
+    0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000,
+    0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000,
+    0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000,
+    0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000,
+    0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000,
+    0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000,
+    0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000,
+    0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000,
+    0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000,
+    0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000,
+    0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000,
+    0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000,
+    0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000,
+    0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000,
+    0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000,
+    0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000,
+    0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000,
+    0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000,
+    0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000,
+    0x48686b5600000000},
+   {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000,
+    0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000,
+    0x805af17200000000, 0x403ed96500000000, 0x002643b900000000,
+    0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000,
+    0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000,
+    0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000,
+    0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000,
+    0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000,
+    0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000,
+    0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000,
+    0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000,
+    0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000,
+    0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000,
+    0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000,
+    0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000,
+    0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000,
+    0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000,
+    0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000,
+    0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000,
+    0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000,
+    0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000,
+    0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000,
+    0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000,
+    0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000,
+    0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000,
+    0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000,
+    0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000,
+    0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000,
+    0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000,
+    0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000,
+    0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000,
+    0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000,
+    0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000,
+    0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000,
+    0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000,
+    0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000,
+    0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000,
+    0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000,
+    0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000,
+    0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000,
+    0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000,
+    0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000,
+    0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000,
+    0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000,
+    0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000,
+    0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000,
+    0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000,
+    0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000,
+    0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000,
+    0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000,
+    0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000,
+    0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000,
+    0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000,
+    0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000,
+    0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000,
+    0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000,
+    0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000,
+    0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000,
+    0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000,
+    0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000,
+    0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000,
+    0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000,
+    0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000,
+    0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000,
+    0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000,
+    0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000,
+    0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000,
+    0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000,
+    0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000,
+    0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000,
+    0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000,
+    0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000,
+    0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000,
+    0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000,
+    0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000,
+    0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000,
+    0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000,
+    0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000,
+    0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000,
+    0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000,
+    0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000,
+    0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000,
+    0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000,
+    0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000,
+    0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000,
+    0xcaa2517800000000},
+   {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000,
+    0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000,
+    0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000,
+    0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000,
+    0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000,
+    0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000,
+    0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000,
+    0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000,
+    0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000,
+    0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000,
+    0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000,
+    0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000,
+    0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000,
+    0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000,
+    0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000,
+    0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000,
+    0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000,
+    0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000,
+    0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000,
+    0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000,
+    0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000,
+    0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000,
+    0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000,
+    0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000,
+    0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000,
+    0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000,
+    0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000,
+    0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000,
+    0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000,
+    0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000,
+    0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000,
+    0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000,
+    0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000,
+    0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000,
+    0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000,
+    0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000,
+    0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000,
+    0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000,
+    0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000,
+    0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000,
+    0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000,
+    0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000,
+    0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000,
+    0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000,
+    0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000,
+    0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000,
+    0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000,
+    0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000,
+    0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000,
+    0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000,
+    0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000,
+    0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000,
+    0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000,
+    0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000,
+    0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000,
+    0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000,
+    0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000,
+    0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000,
+    0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000,
+    0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000,
+    0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000,
+    0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000,
+    0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000,
+    0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000,
+    0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000,
+    0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000,
+    0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000,
+    0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000,
+    0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000,
+    0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000,
+    0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000,
+    0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000,
+    0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000,
+    0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000,
+    0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000,
+    0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000,
+    0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000,
+    0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000,
+    0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000,
+    0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000,
+    0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000,
+    0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000,
+    0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000,
+    0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000,
+    0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000,
+    0x0c7ac97b00000000},
+   {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000,
+    0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000,
+    0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000,
+    0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000,
+    0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000,
+    0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000,
+    0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000,
+    0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000,
+    0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000,
+    0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000,
+    0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000,
+    0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000,
+    0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000,
+    0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000,
+    0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000,
+    0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000,
+    0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000,
+    0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000,
+    0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000,
+    0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000,
+    0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000,
+    0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000,
+    0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000,
+    0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000,
+    0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000,
+    0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000,
+    0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000,
+    0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000,
+    0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000,
+    0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000,
+    0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000,
+    0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000,
+    0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000,
+    0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000,
+    0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000,
+    0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000,
+    0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000,
+    0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000,
+    0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000,
+    0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000,
+    0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000,
+    0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000,
+    0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000,
+    0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000,
+    0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000,
+    0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000,
+    0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000,
+    0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000,
+    0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000,
+    0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000,
+    0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000,
+    0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000,
+    0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000,
+    0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000,
+    0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000,
+    0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000,
+    0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000,
+    0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000,
+    0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000,
+    0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000,
+    0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000,
+    0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000,
+    0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000,
+    0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000,
+    0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000,
+    0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000,
+    0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000,
+    0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000,
+    0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000,
+    0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000,
+    0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000,
+    0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000,
+    0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000,
+    0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000,
+    0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000,
+    0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000,
+    0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000,
+    0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000,
+    0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000,
+    0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000,
+    0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000,
+    0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000,
+    0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000,
+    0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000,
+    0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000,
+    0x5185cd0900000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f,
+    0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91,
+    0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e,
+    0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c,
+    0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02,
+    0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12,
+    0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567,
+    0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277,
+    0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679,
+    0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b,
+    0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4,
+    0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a,
+    0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0,
+    0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0,
+    0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91,
+    0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881,
+    0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173,
+    0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d,
+    0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912,
+    0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8,
+    0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6,
+    0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6,
+    0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b,
+    0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b,
+    0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75,
+    0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f,
+    0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00,
+    0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee,
+    0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c,
+    0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c,
+    0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d,
+    0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d,
+    0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67,
+    0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89,
+    0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706,
+    0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14,
+    0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a,
+    0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a,
+    0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f,
+    0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f,
+    0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591,
+    0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983,
+    0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c,
+    0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2,
+    0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8,
+    0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8,
+    0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89,
+    0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99,
+    0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b,
+    0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485,
+    0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a,
+    0x36197165},
+   {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382,
+    0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85,
+    0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06,
+    0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca,
+    0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e,
+    0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc,
+    0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616,
+    0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54,
+    0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10,
+    0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc,
+    0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f,
+    0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58,
+    0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef,
+    0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad,
+    0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b,
+    0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29,
+    0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6,
+    0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1,
+    0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622,
+    0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039,
+    0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d,
+    0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f,
+    0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32,
+    0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770,
+    0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034,
+    0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f,
+    0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc,
+    0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db,
+    0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154,
+    0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16,
+    0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0,
+    0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592,
+    0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca,
+    0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd,
+    0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e,
+    0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882,
+    0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6,
+    0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384,
+    0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1,
+    0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3,
+    0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7,
+    0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b,
+    0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8,
+    0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff,
+    0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7,
+    0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5,
+    0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23,
+    0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761,
+    0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee,
+    0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9,
+    0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a,
+    0x1a3b93aa},
+   {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a,
+    0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca,
+    0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3,
+    0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb,
+    0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c,
+    0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58,
+    0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed,
+    0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9,
+    0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e,
+    0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906,
+    0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f,
+    0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf,
+    0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0,
+    0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4,
+    0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769,
+    0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d,
+    0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632,
+    0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82,
+    0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb,
+    0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73,
+    0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484,
+    0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0,
+    0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5,
+    0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1,
+    0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516,
+    0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f,
+    0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946,
+    0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6,
+    0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9,
+    0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad,
+    0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820,
+    0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364,
+    0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab,
+    0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b,
+    0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62,
+    0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a,
+    0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd,
+    0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089,
+    0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c,
+    0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8,
+    0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f,
+    0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477,
+    0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e,
+    0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be,
+    0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71,
+    0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635,
+    0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8,
+    0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc,
+    0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3,
+    0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753,
+    0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a,
+    0xe147d714},
+   {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c,
+    0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b,
+    0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92,
+    0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4,
+    0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069,
+    0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526,
+    0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25,
+    0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a,
+    0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7,
+    0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491,
+    0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958,
+    0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f,
+    0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307,
+    0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648,
+    0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999,
+    0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6,
+    0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a,
+    0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d,
+    0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4,
+    0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61,
+    0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc,
+    0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3,
+    0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53,
+    0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c,
+    0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1,
+    0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c,
+    0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5,
+    0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92,
+    0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e,
+    0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771,
+    0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0,
+    0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def,
+    0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0,
+    0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7,
+    0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e,
+    0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58,
+    0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285,
+    0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca,
+    0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce,
+    0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81,
+    0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c,
+    0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a,
+    0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3,
+    0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4,
+    0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb,
+    0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4,
+    0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75,
+    0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a,
+    0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296,
+    0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1,
+    0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808,
+    0x494f0c4b}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d,
+    0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac,
+    0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8,
+    0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95,
+    0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817,
+    0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d,
+    0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac,
+    0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6,
+    0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564,
+    0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39,
+    0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d,
+    0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac,
+    0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de,
+    0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594,
+    0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b,
+    0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01,
+    0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f,
+    0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de,
+    0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba,
+    0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65,
+    0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7,
+    0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad,
+    0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de,
+    0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294,
+    0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716,
+    0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71,
+    0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15,
+    0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4,
+    0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca,
+    0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280,
+    0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f,
+    0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15,
+    0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9,
+    0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748,
+    0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c,
+    0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971,
+    0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3,
+    0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9,
+    0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196,
+    0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc,
+    0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e,
+    0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03,
+    0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67,
+    0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296,
+    0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a,
+    0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170,
+    0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af,
+    0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5,
+    0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb,
+    0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a,
+    0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e,
+    0x4b0c4f49},
+   {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09,
+    0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc,
+    0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e,
+    0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc,
+    0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934,
+    0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2,
+    0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b,
+    0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad,
+    0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155,
+    0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187,
+    0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65,
+    0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390,
+    0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e,
+    0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378,
+    0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889,
+    0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f,
+    0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0,
+    0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145,
+    0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7,
+    0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a,
+    0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2,
+    0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924,
+    0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2,
+    0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514,
+    0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec,
+    0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709,
+    0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb,
+    0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e,
+    0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1,
+    0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227,
+    0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6,
+    0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030,
+    0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0,
+    0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55,
+    0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7,
+    0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165,
+    0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d,
+    0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b,
+    0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c,
+    0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a,
+    0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362,
+    0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0,
+    0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52,
+    0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7,
+    0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237,
+    0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1,
+    0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020,
+    0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6,
+    0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719,
+    0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec,
+    0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e,
+    0x14d747e1},
+   {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0,
+    0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b,
+    0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652,
+    0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437,
+    0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514,
+    0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265,
+    0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de,
+    0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af,
+    0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c,
+    0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9,
+    0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0,
+    0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b,
+    0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6,
+    0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7,
+    0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734,
+    0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045,
+    0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8,
+    0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303,
+    0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a,
+    0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9,
+    0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea,
+    0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b,
+    0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6,
+    0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7,
+    0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4,
+    0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6,
+    0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f,
+    0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054,
+    0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9,
+    0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8,
+    0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b,
+    0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a,
+    0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441,
+    0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a,
+    0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3,
+    0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6,
+    0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5,
+    0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94,
+    0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9,
+    0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288,
+    0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab,
+    0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce,
+    0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7,
+    0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c,
+    0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527,
+    0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256,
+    0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5,
+    0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4,
+    0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39,
+    0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2,
+    0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db,
+    0xaa933b1a},
+   {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603,
+    0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d,
+    0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9,
+    0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b,
+    0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a,
+    0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792,
+    0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4,
+    0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c,
+    0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d,
+    0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f,
+    0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb,
+    0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65,
+    0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330,
+    0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8,
+    0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da,
+    0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742,
+    0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f,
+    0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1,
+    0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5,
+    0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f,
+    0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e,
+    0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6,
+    0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8,
+    0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250,
+    0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021,
+    0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb,
+    0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f,
+    0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511,
+    0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c,
+    0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4,
+    0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886,
+    0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e,
+    0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b,
+    0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5,
+    0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791,
+    0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003,
+    0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272,
+    0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea,
+    0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc,
+    0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24,
+    0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55,
+    0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7,
+    0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3,
+    0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d,
+    0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548,
+    0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0,
+    0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2,
+    0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a,
+    0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47,
+    0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9,
+    0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad,
+    0x65711936}};
+
+#endif
+
+#endif
+
+#if N == 4
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a,
+    0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe,
+    0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b,
+    0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656,
+    0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd,
+    0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d,
+    0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7,
+    0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47,
+    0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac,
+    0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691,
+    0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404,
+    0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0,
+    0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4,
+    0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424,
+    0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5,
+    0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65,
+    0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67,
+    0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3,
+    0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626,
+    0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9,
+    0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222,
+    0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2,
+    0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a,
+    0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a,
+    0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1,
+    0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2,
+    0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077,
+    0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3,
+    0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1,
+    0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621,
+    0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0,
+    0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60,
+    0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0,
+    0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64,
+    0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1,
+    0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc,
+    0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027,
+    0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7,
+    0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9,
+    0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79,
+    0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292,
+    0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af,
+    0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a,
+    0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee,
+    0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e,
+    0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe,
+    0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f,
+    0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff,
+    0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd,
+    0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29,
+    0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc,
+    0xe3c45916},
+   {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344,
+    0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59,
+    0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e,
+    0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463,
+    0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98,
+    0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d,
+    0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3,
+    0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656,
+    0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad,
+    0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0,
+    0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397,
+    0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a,
+    0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2,
+    0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357,
+    0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8,
+    0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d,
+    0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696,
+    0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b,
+    0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc,
+    0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0,
+    0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b,
+    0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be,
+    0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811,
+    0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384,
+    0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f,
+    0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955,
+    0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362,
+    0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f,
+    0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94,
+    0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701,
+    0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe,
+    0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b,
+    0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1,
+    0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc,
+    0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b,
+    0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986,
+    0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d,
+    0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8,
+    0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4,
+    0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371,
+    0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a,
+    0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87,
+    0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0,
+    0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad,
+    0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527,
+    0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2,
+    0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d,
+    0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998,
+    0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73,
+    0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e,
+    0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59,
+    0xa7520488},
+   {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20,
+    0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09,
+    0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431,
+    0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a,
+    0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203,
+    0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b,
+    0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14,
+    0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c,
+    0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25,
+    0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e,
+    0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36,
+    0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f,
+    0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649,
+    0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961,
+    0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58,
+    0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170,
+    0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b,
+    0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742,
+    0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a,
+    0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55,
+    0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c,
+    0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64,
+    0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f,
+    0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77,
+    0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e,
+    0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a,
+    0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2,
+    0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b,
+    0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090,
+    0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8,
+    0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881,
+    0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9,
+    0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6,
+    0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f,
+    0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7,
+    0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c,
+    0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695,
+    0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd,
+    0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb,
+    0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3,
+    0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa,
+    0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1,
+    0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9,
+    0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0,
+    0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df,
+    0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7,
+    0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace,
+    0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6,
+    0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd,
+    0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4,
+    0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec,
+    0x3522e9e4},
+   {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1,
+    0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86,
+    0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b,
+    0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669,
+    0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7,
+    0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352,
+    0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03,
+    0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6,
+    0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38,
+    0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a,
+    0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7,
+    0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80,
+    0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7,
+    0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522,
+    0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d,
+    0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8,
+    0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103,
+    0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54,
+    0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9,
+    0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0,
+    0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e,
+    0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb,
+    0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1,
+    0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624,
+    0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea,
+    0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a,
+    0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37,
+    0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360,
+    0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab,
+    0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e,
+    0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741,
+    0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4,
+    0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334,
+    0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63,
+    0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de,
+    0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c,
+    0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942,
+    0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7,
+    0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131,
+    0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4,
+    0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a,
+    0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758,
+    0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5,
+    0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2,
+    0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32,
+    0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7,
+    0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8,
+    0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d,
+    0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6,
+    0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1,
+    0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c,
+    0x97411e28},
+   {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474,
+    0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5,
+    0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6,
+    0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7,
+    0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938,
+    0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051,
+    0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a,
+    0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3,
+    0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c,
+    0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d,
+    0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e,
+    0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf,
+    0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740,
+    0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29,
+    0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592,
+    0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb,
+    0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4,
+    0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365,
+    0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036,
+    0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7,
+    0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08,
+    0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561,
+    0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a,
+    0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663,
+    0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac,
+    0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d,
+    0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce,
+    0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f,
+    0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50,
+    0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639,
+    0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82,
+    0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb,
+    0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954,
+    0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5,
+    0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86,
+    0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7,
+    0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418,
+    0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71,
+    0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa,
+    0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93,
+    0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c,
+    0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d,
+    0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e,
+    0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df,
+    0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60,
+    0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309,
+    0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2,
+    0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db,
+    0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4,
+    0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45,
+    0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16,
+    0x93c7a00b},
+   {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45,
+    0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb,
+    0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d,
+    0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696,
+    0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf,
+    0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb,
+    0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028,
+    0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c,
+    0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65,
+    0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be,
+    0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038,
+    0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6,
+    0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15,
+    0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11,
+    0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d,
+    0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19,
+    0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05,
+    0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b,
+    0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d,
+    0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c,
+    0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35,
+    0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31,
+    0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068,
+    0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c,
+    0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25,
+    0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a,
+    0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac,
+    0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22,
+    0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e,
+    0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a,
+    0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36,
+    0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32,
+    0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84,
+    0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a,
+    0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c,
+    0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057,
+    0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e,
+    0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a,
+    0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc,
+    0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8,
+    0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1,
+    0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a,
+    0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec,
+    0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62,
+    0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4,
+    0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0,
+    0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc,
+    0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8,
+    0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4,
+    0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a,
+    0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc,
+    0xce5f968d},
+   {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de,
+    0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b,
+    0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d,
+    0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680,
+    0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4,
+    0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d,
+    0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde,
+    0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97,
+    0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3,
+    0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e,
+    0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678,
+    0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d,
+    0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723,
+    0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a,
+    0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0,
+    0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9,
+    0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85,
+    0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770,
+    0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56,
+    0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a,
+    0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e,
+    0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67,
+    0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785,
+    0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc,
+    0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788,
+    0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90,
+    0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6,
+    0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843,
+    0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f,
+    0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336,
+    0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac,
+    0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5,
+    0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68,
+    0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d,
+    0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb,
+    0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36,
+    0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72,
+    0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b,
+    0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b,
+    0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402,
+    0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446,
+    0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb,
+    0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed,
+    0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418,
+    0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95,
+    0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc,
+    0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946,
+    0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f,
+    0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233,
+    0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6,
+    0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0,
+    0x3e721277},
+   {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb,
+    0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9,
+    0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11,
+    0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d,
+    0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9,
+    0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c,
+    0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881,
+    0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274,
+    0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790,
+    0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc,
+    0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514,
+    0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56,
+    0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9,
+    0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c,
+    0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13,
+    0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6,
+    0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c,
+    0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e,
+    0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386,
+    0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376,
+    0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692,
+    0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67,
+    0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416,
+    0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3,
+    0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07,
+    0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd,
+    0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15,
+    0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457,
+    0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd,
+    0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28,
+    0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337,
+    0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2,
+    0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594,
+    0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6,
+    0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e,
+    0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52,
+    0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6,
+    0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143,
+    0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17,
+    0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2,
+    0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306,
+    0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a,
+    0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182,
+    0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0,
+    0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496,
+    0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63,
+    0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c,
+    0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89,
+    0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903,
+    0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041,
+    0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9,
+    0x1c65ace7}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000,
+    0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000,
+    0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000,
+    0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000,
+    0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000,
+    0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000,
+    0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000,
+    0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000,
+    0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000,
+    0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000,
+    0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000,
+    0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000,
+    0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000,
+    0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000,
+    0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000,
+    0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000,
+    0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000,
+    0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000,
+    0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000,
+    0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000,
+    0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000,
+    0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000,
+    0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000,
+    0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000,
+    0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000,
+    0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000,
+    0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000,
+    0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000,
+    0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000,
+    0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000,
+    0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000,
+    0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000,
+    0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000,
+    0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000,
+    0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000,
+    0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000,
+    0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000,
+    0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000,
+    0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000,
+    0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000,
+    0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000,
+    0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000,
+    0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000,
+    0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000,
+    0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000,
+    0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000,
+    0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000,
+    0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000,
+    0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000,
+    0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000,
+    0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000,
+    0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000,
+    0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000,
+    0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000,
+    0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000,
+    0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000,
+    0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000,
+    0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000,
+    0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000,
+    0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000,
+    0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000,
+    0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000,
+    0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000,
+    0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000,
+    0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000,
+    0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000,
+    0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000,
+    0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000,
+    0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000,
+    0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000,
+    0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000,
+    0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000,
+    0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000,
+    0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000,
+    0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000,
+    0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000,
+    0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000,
+    0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000,
+    0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000,
+    0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000,
+    0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000,
+    0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000,
+    0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000,
+    0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000,
+    0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000,
+    0xe7ac651c00000000},
+   {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000,
+    0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000,
+    0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000,
+    0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000,
+    0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000,
+    0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000,
+    0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000,
+    0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000,
+    0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000,
+    0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000,
+    0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000,
+    0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000,
+    0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000,
+    0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000,
+    0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000,
+    0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000,
+    0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000,
+    0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000,
+    0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000,
+    0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000,
+    0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000,
+    0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000,
+    0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000,
+    0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000,
+    0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000,
+    0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000,
+    0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000,
+    0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000,
+    0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000,
+    0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000,
+    0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000,
+    0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000,
+    0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000,
+    0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000,
+    0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000,
+    0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000,
+    0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000,
+    0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000,
+    0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000,
+    0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000,
+    0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000,
+    0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000,
+    0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000,
+    0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000,
+    0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000,
+    0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000,
+    0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000,
+    0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000,
+    0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000,
+    0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000,
+    0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000,
+    0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000,
+    0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000,
+    0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000,
+    0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000,
+    0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000,
+    0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000,
+    0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000,
+    0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000,
+    0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000,
+    0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000,
+    0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000,
+    0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000,
+    0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000,
+    0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000,
+    0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000,
+    0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000,
+    0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000,
+    0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000,
+    0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000,
+    0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000,
+    0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000,
+    0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000,
+    0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000,
+    0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000,
+    0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000,
+    0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000,
+    0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000,
+    0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000,
+    0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000,
+    0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000,
+    0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000,
+    0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000,
+    0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000,
+    0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000,
+    0x7712723e00000000},
+   {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000,
+    0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000,
+    0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000,
+    0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000,
+    0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000,
+    0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000,
+    0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000,
+    0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000,
+    0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000,
+    0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000,
+    0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000,
+    0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000,
+    0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000,
+    0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000,
+    0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000,
+    0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000,
+    0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000,
+    0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000,
+    0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000,
+    0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000,
+    0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000,
+    0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000,
+    0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000,
+    0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000,
+    0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000,
+    0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000,
+    0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000,
+    0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000,
+    0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000,
+    0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000,
+    0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000,
+    0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000,
+    0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000,
+    0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000,
+    0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000,
+    0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000,
+    0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000,
+    0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000,
+    0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000,
+    0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000,
+    0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000,
+    0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000,
+    0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000,
+    0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000,
+    0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000,
+    0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000,
+    0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000,
+    0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000,
+    0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000,
+    0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000,
+    0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000,
+    0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000,
+    0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000,
+    0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000,
+    0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000,
+    0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000,
+    0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000,
+    0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000,
+    0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000,
+    0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000,
+    0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000,
+    0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000,
+    0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000,
+    0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000,
+    0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000,
+    0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000,
+    0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000,
+    0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000,
+    0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000,
+    0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000,
+    0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000,
+    0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000,
+    0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000,
+    0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000,
+    0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000,
+    0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000,
+    0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000,
+    0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000,
+    0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000,
+    0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000,
+    0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000,
+    0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000,
+    0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000,
+    0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000,
+    0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000,
+    0x8d965fce00000000},
+   {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000,
+    0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000,
+    0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000,
+    0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000,
+    0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000,
+    0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000,
+    0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000,
+    0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000,
+    0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000,
+    0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000,
+    0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000,
+    0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000,
+    0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000,
+    0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000,
+    0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000,
+    0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000,
+    0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000,
+    0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000,
+    0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000,
+    0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000,
+    0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000,
+    0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000,
+    0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000,
+    0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000,
+    0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000,
+    0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000,
+    0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000,
+    0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000,
+    0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000,
+    0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000,
+    0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000,
+    0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000,
+    0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000,
+    0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000,
+    0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000,
+    0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000,
+    0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000,
+    0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000,
+    0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000,
+    0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000,
+    0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000,
+    0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000,
+    0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000,
+    0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000,
+    0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000,
+    0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000,
+    0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000,
+    0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000,
+    0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000,
+    0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000,
+    0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000,
+    0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000,
+    0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000,
+    0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000,
+    0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000,
+    0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000,
+    0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000,
+    0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000,
+    0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000,
+    0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000,
+    0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000,
+    0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000,
+    0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000,
+    0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000,
+    0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000,
+    0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000,
+    0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000,
+    0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000,
+    0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000,
+    0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000,
+    0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000,
+    0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000,
+    0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000,
+    0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000,
+    0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000,
+    0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000,
+    0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000,
+    0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000,
+    0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000,
+    0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000,
+    0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000,
+    0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000,
+    0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000,
+    0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000,
+    0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000,
+    0x0ba0c79300000000},
+   {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000,
+    0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000,
+    0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000,
+    0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000,
+    0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000,
+    0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000,
+    0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000,
+    0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000,
+    0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000,
+    0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000,
+    0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000,
+    0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000,
+    0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000,
+    0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000,
+    0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000,
+    0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000,
+    0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000,
+    0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000,
+    0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000,
+    0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000,
+    0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000,
+    0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000,
+    0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000,
+    0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000,
+    0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000,
+    0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000,
+    0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000,
+    0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000,
+    0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000,
+    0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000,
+    0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000,
+    0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000,
+    0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000,
+    0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000,
+    0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000,
+    0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000,
+    0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000,
+    0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000,
+    0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000,
+    0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000,
+    0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000,
+    0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000,
+    0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000,
+    0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000,
+    0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000,
+    0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000,
+    0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000,
+    0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000,
+    0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000,
+    0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000,
+    0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000,
+    0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000,
+    0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000,
+    0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000,
+    0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000,
+    0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000,
+    0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000,
+    0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000,
+    0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000,
+    0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000,
+    0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000,
+    0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000,
+    0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000,
+    0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000,
+    0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000,
+    0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000,
+    0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000,
+    0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000,
+    0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000,
+    0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000,
+    0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000,
+    0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000,
+    0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000,
+    0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000,
+    0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000,
+    0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000,
+    0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000,
+    0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000,
+    0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000,
+    0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000,
+    0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000,
+    0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000,
+    0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000,
+    0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000,
+    0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000,
+    0x281e419700000000},
+   {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000,
+    0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000,
+    0x304a428900000000, 0x38a922b500000000, 0x011e763800000000,
+    0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000,
+    0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000,
+    0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000,
+    0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000,
+    0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000,
+    0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000,
+    0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000,
+    0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000,
+    0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000,
+    0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000,
+    0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000,
+    0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000,
+    0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000,
+    0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000,
+    0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000,
+    0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000,
+    0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000,
+    0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000,
+    0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000,
+    0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000,
+    0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000,
+    0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000,
+    0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000,
+    0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000,
+    0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000,
+    0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000,
+    0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000,
+    0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000,
+    0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000,
+    0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000,
+    0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000,
+    0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000,
+    0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000,
+    0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000,
+    0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000,
+    0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000,
+    0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000,
+    0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000,
+    0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000,
+    0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000,
+    0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000,
+    0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000,
+    0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000,
+    0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000,
+    0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000,
+    0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000,
+    0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000,
+    0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000,
+    0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000,
+    0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000,
+    0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000,
+    0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000,
+    0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000,
+    0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000,
+    0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000,
+    0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000,
+    0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000,
+    0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000,
+    0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000,
+    0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000,
+    0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000,
+    0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000,
+    0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000,
+    0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000,
+    0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000,
+    0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000,
+    0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000,
+    0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000,
+    0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000,
+    0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000,
+    0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000,
+    0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000,
+    0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000,
+    0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000,
+    0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000,
+    0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000,
+    0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000,
+    0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000,
+    0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000,
+    0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000,
+    0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000,
+    0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000,
+    0xe4e9223500000000},
+   {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000,
+    0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000,
+    0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000,
+    0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000,
+    0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000,
+    0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000,
+    0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000,
+    0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000,
+    0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000,
+    0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000,
+    0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000,
+    0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000,
+    0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000,
+    0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000,
+    0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000,
+    0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000,
+    0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000,
+    0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000,
+    0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000,
+    0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000,
+    0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000,
+    0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000,
+    0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000,
+    0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000,
+    0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000,
+    0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000,
+    0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000,
+    0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000,
+    0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000,
+    0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000,
+    0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000,
+    0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000,
+    0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000,
+    0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000,
+    0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000,
+    0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000,
+    0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000,
+    0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000,
+    0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000,
+    0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000,
+    0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000,
+    0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000,
+    0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000,
+    0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000,
+    0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000,
+    0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000,
+    0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000,
+    0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000,
+    0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000,
+    0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000,
+    0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000,
+    0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000,
+    0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000,
+    0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000,
+    0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000,
+    0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000,
+    0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000,
+    0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000,
+    0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000,
+    0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000,
+    0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000,
+    0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000,
+    0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000,
+    0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000,
+    0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000,
+    0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000,
+    0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000,
+    0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000,
+    0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000,
+    0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000,
+    0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000,
+    0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000,
+    0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000,
+    0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000,
+    0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000,
+    0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000,
+    0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000,
+    0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000,
+    0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000,
+    0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000,
+    0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000,
+    0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000,
+    0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000,
+    0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000,
+    0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000,
+    0x880452a700000000},
+   {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000,
+    0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000,
+    0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000,
+    0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000,
+    0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000,
+    0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000,
+    0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000,
+    0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000,
+    0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000,
+    0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000,
+    0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000,
+    0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000,
+    0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000,
+    0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000,
+    0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000,
+    0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000,
+    0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000,
+    0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000,
+    0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000,
+    0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000,
+    0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000,
+    0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000,
+    0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000,
+    0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000,
+    0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000,
+    0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000,
+    0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000,
+    0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000,
+    0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000,
+    0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000,
+    0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000,
+    0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000,
+    0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000,
+    0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000,
+    0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000,
+    0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000,
+    0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000,
+    0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000,
+    0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000,
+    0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000,
+    0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000,
+    0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000,
+    0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000,
+    0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000,
+    0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000,
+    0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000,
+    0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000,
+    0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000,
+    0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000,
+    0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000,
+    0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000,
+    0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000,
+    0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000,
+    0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000,
+    0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000,
+    0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000,
+    0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000,
+    0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000,
+    0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000,
+    0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000,
+    0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000,
+    0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000,
+    0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000,
+    0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000,
+    0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000,
+    0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000,
+    0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000,
+    0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000,
+    0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000,
+    0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000,
+    0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000,
+    0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000,
+    0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000,
+    0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000,
+    0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000,
+    0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000,
+    0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000,
+    0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000,
+    0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000,
+    0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000,
+    0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000,
+    0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000,
+    0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000,
+    0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000,
+    0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000,
+    0x1659c4e300000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87,
+    0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede,
+    0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab,
+    0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c,
+    0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1,
+    0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7,
+    0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e,
+    0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308,
+    0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5,
+    0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472,
+    0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07,
+    0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e,
+    0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa,
+    0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec,
+    0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6,
+    0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0,
+    0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3,
+    0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba,
+    0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf,
+    0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975,
+    0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8,
+    0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde,
+    0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a,
+    0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c,
+    0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1,
+    0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65,
+    0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410,
+    0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649,
+    0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a,
+    0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c,
+    0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946,
+    0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450,
+    0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e,
+    0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857,
+    0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022,
+    0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5,
+    0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758,
+    0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e,
+    0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d,
+    0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b,
+    0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6,
+    0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401,
+    0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74,
+    0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d,
+    0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073,
+    0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65,
+    0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f,
+    0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749,
+    0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a,
+    0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033,
+    0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846,
+    0x0d7139d7},
+   {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563,
+    0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f,
+    0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875,
+    0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536,
+    0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8,
+    0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43,
+    0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f,
+    0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184,
+    0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a,
+    0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39,
+    0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523,
+    0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f,
+    0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d,
+    0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6,
+    0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b,
+    0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0,
+    0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151,
+    0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d,
+    0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47,
+    0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a,
+    0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964,
+    0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef,
+    0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d,
+    0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6,
+    0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348,
+    0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53,
+    0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449,
+    0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645,
+    0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4,
+    0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f,
+    0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2,
+    0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69,
+    0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46,
+    0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a,
+    0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650,
+    0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13,
+    0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded,
+    0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366,
+    0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57,
+    0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc,
+    0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222,
+    0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61,
+    0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b,
+    0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277,
+    0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558,
+    0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3,
+    0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e,
+    0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5,
+    0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74,
+    0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78,
+    0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262,
+    0x1c53e98a},
+   {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b,
+    0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40,
+    0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580,
+    0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7,
+    0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a,
+    0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37,
+    0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75,
+    0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218,
+    0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5,
+    0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2,
+    0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02,
+    0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59,
+    0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1,
+    0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c,
+    0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a,
+    0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307,
+    0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486,
+    0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd,
+    0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d,
+    0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2,
+    0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f,
+    0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72,
+    0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8,
+    0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985,
+    0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268,
+    0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94,
+    0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454,
+    0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f,
+    0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e,
+    0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3,
+    0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915,
+    0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778,
+    0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821,
+    0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a,
+    0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba,
+    0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d,
+    0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560,
+    0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d,
+    0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe,
+    0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3,
+    0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e,
+    0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509,
+    0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9,
+    0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92,
+    0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb,
+    0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6,
+    0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50,
+    0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d,
+    0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc,
+    0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7,
+    0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927,
+    0x3f88e851},
+   {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96,
+    0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8,
+    0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0,
+    0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14,
+    0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7,
+    0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4,
+    0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe,
+    0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad,
+    0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e,
+    0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa,
+    0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2,
+    0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c,
+    0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab,
+    0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8,
+    0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d,
+    0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e,
+    0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7,
+    0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99,
+    0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1,
+    0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690,
+    0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933,
+    0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20,
+    0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf,
+    0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc,
+    0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f,
+    0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92,
+    0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca,
+    0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4,
+    0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd,
+    0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de,
+    0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb,
+    0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8,
+    0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474,
+    0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a,
+    0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252,
+    0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6,
+    0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55,
+    0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846,
+    0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7,
+    0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4,
+    0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47,
+    0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3,
+    0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb,
+    0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5,
+    0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49,
+    0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a,
+    0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f,
+    0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c,
+    0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305,
+    0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b,
+    0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523,
+    0x3dee8ca6}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0,
+    0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587,
+    0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa,
+    0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09,
+    0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee,
+    0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3,
+    0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3,
+    0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce,
+    0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429,
+    0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda,
+    0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7,
+    0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0,
+    0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd,
+    0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0,
+    0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287,
+    0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a,
+    0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9,
+    0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e,
+    0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3,
+    0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3,
+    0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054,
+    0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49,
+    0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da,
+    0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7,
+    0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20,
+    0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d,
+    0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00,
+    0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347,
+    0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14,
+    0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209,
+    0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e,
+    0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33,
+    0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3,
+    0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194,
+    0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9,
+    0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a,
+    0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd,
+    0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0,
+    0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d,
+    0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460,
+    0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87,
+    0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674,
+    0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509,
+    0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e,
+    0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae,
+    0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3,
+    0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694,
+    0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989,
+    0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da,
+    0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d,
+    0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0,
+    0xa68cee3d},
+   {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19,
+    0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae,
+    0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb,
+    0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a,
+    0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55,
+    0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1,
+    0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c,
+    0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8,
+    0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7,
+    0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936,
+    0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453,
+    0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4,
+    0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941,
+    0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5,
+    0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93,
+    0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17,
+    0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e,
+    0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89,
+    0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec,
+    0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0,
+    0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf,
+    0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b,
+    0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b,
+    0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f,
+    0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0,
+    0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e,
+    0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b,
+    0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc,
+    0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5,
+    0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261,
+    0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637,
+    0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3,
+    0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57,
+    0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0,
+    0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85,
+    0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454,
+    0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b,
+    0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f,
+    0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423,
+    0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7,
+    0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8,
+    0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739,
+    0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c,
+    0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb,
+    0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f,
+    0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b,
+    0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd,
+    0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59,
+    0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070,
+    0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7,
+    0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2,
+    0x51e8883f},
+   {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a,
+    0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276,
+    0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed,
+    0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55,
+    0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b,
+    0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8,
+    0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320,
+    0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413,
+    0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd,
+    0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75,
+    0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee,
+    0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312,
+    0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca,
+    0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9,
+    0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad,
+    0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e,
+    0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504,
+    0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8,
+    0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63,
+    0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353,
+    0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d,
+    0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be,
+    0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae,
+    0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d,
+    0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943,
+    0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7,
+    0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c,
+    0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390,
+    0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a,
+    0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239,
+    0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d,
+    0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e,
+    0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c,
+    0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0,
+    0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b,
+    0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93,
+    0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d,
+    0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e,
+    0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c,
+    0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f,
+    0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1,
+    0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579,
+    0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2,
+    0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e,
+    0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c,
+    0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f,
+    0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b,
+    0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158,
+    0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2,
+    0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e,
+    0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5,
+    0x8ae9531c},
+   {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4,
+    0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd,
+    0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220,
+    0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf,
+    0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495,
+    0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def,
+    0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90,
+    0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea,
+    0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0,
+    0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f,
+    0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2,
+    0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab,
+    0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e,
+    0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754,
+    0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda,
+    0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0,
+    0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c,
+    0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215,
+    0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8,
+    0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910,
+    0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a,
+    0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30,
+    0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658,
+    0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22,
+    0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478,
+    0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2,
+    0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f,
+    0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606,
+    0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba,
+    0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0,
+    0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e,
+    0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034,
+    0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f,
+    0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996,
+    0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b,
+    0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84,
+    0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de,
+    0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4,
+    0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5,
+    0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f,
+    0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5,
+    0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a,
+    0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7,
+    0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce,
+    0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65,
+    0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f,
+    0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91,
+    0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb,
+    0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57,
+    0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e,
+    0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3,
+    0xd739710d}};
+
+#endif
+
+#endif
+
+#if N == 5
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df,
+    0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8,
+    0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef,
+    0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376,
+    0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201,
+    0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399,
+    0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372,
+    0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea,
+    0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d,
+    0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004,
+    0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353,
+    0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334,
+    0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a,
+    0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2,
+    0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a,
+    0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2,
+    0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b,
+    0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c,
+    0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b,
+    0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f,
+    0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338,
+    0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0,
+    0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6,
+    0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e,
+    0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319,
+    0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3,
+    0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4,
+    0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783,
+    0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a,
+    0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492,
+    0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a,
+    0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2,
+    0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496,
+    0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1,
+    0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6,
+    0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f,
+    0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548,
+    0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0,
+    0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741,
+    0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9,
+    0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae,
+    0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437,
+    0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760,
+    0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707,
+    0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433,
+    0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab,
+    0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703,
+    0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b,
+    0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412,
+    0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475,
+    0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722,
+    0xe9947565},
+   {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5,
+    0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22,
+    0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c,
+    0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed,
+    0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d,
+    0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1,
+    0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e,
+    0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32,
+    0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142,
+    0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93,
+    0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d,
+    0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a,
+    0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58,
+    0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14,
+    0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81,
+    0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd,
+    0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab,
+    0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c,
+    0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72,
+    0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f,
+    0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff,
+    0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3,
+    0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30,
+    0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c,
+    0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c,
+    0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558,
+    0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146,
+    0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581,
+    0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7,
+    0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab,
+    0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e,
+    0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272,
+    0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838,
+    0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff,
+    0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1,
+    0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330,
+    0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840,
+    0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c,
+    0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb,
+    0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7,
+    0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7,
+    0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616,
+    0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208,
+    0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf,
+    0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85,
+    0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9,
+    0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c,
+    0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10,
+    0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76,
+    0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1,
+    0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf,
+    0xf7d05006},
+   {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b,
+    0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774,
+    0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58,
+    0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a,
+    0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb,
+    0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952,
+    0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e,
+    0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7,
+    0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746,
+    0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14,
+    0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338,
+    0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907,
+    0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777,
+    0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de,
+    0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064,
+    0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd,
+    0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951,
+    0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e,
+    0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42,
+    0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b,
+    0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a,
+    0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3,
+    0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904,
+    0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad,
+    0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c,
+    0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d,
+    0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861,
+    0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e,
+    0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2,
+    0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b,
+    0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1,
+    0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78,
+    0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f,
+    0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40,
+    0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c,
+    0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e,
+    0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf,
+    0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166,
+    0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d,
+    0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4,
+    0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805,
+    0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157,
+    0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b,
+    0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644,
+    0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43,
+    0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea,
+    0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850,
+    0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9,
+    0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165,
+    0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a,
+    0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676,
+    0xb2075b94},
+   {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf,
+    0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61,
+    0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be,
+    0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd,
+    0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3,
+    0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063,
+    0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105,
+    0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5,
+    0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb,
+    0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8,
+    0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07,
+    0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9,
+    0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5,
+    0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515,
+    0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4,
+    0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014,
+    0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7,
+    0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269,
+    0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6,
+    0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af,
+    0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1,
+    0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111,
+    0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d,
+    0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad,
+    0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3,
+    0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75,
+    0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa,
+    0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74,
+    0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7,
+    0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477,
+    0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6,
+    0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176,
+    0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af,
+    0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71,
+    0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae,
+    0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd,
+    0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3,
+    0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073,
+    0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0,
+    0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400,
+    0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e,
+    0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d,
+    0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2,
+    0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c,
+    0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5,
+    0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505,
+    0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4,
+    0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004,
+    0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7,
+    0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279,
+    0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6,
+    0xba50bcb9},
+   {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897,
+    0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb,
+    0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2,
+    0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2,
+    0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372,
+    0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70,
+    0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92,
+    0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190,
+    0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40,
+    0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430,
+    0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759,
+    0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75,
+    0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2,
+    0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0,
+    0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7,
+    0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5,
+    0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39,
+    0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215,
+    0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c,
+    0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5,
+    0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625,
+    0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27,
+    0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c,
+    0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e,
+    0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee,
+    0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71,
+    0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18,
+    0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134,
+    0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8,
+    0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba,
+    0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd,
+    0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff,
+    0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a,
+    0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6,
+    0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf,
+    0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf,
+    0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f,
+    0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d,
+    0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d,
+    0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f,
+    0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af,
+    0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df,
+    0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6,
+    0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a,
+    0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef,
+    0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed,
+    0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa,
+    0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8,
+    0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624,
+    0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08,
+    0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861,
+    0x808abcf4},
+   {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2,
+    0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd,
+    0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76,
+    0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52,
+    0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e,
+    0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124,
+    0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147,
+    0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d,
+    0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31,
+    0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15,
+    0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae,
+    0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1,
+    0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d,
+    0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307,
+    0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9,
+    0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3,
+    0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084,
+    0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb,
+    0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850,
+    0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2,
+    0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe,
+    0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94,
+    0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261,
+    0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b,
+    0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917,
+    0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53,
+    0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8,
+    0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787,
+    0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0,
+    0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba,
+    0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404,
+    0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e,
+    0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af,
+    0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0,
+    0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b,
+    0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f,
+    0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543,
+    0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129,
+    0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627,
+    0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d,
+    0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51,
+    0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75,
+    0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce,
+    0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1,
+    0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760,
+    0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a,
+    0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4,
+    0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde,
+    0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089,
+    0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6,
+    0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d,
+    0xefdb3f95},
+   {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8,
+    0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7,
+    0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945,
+    0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9,
+    0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652,
+    0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc,
+    0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a,
+    0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4,
+    0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f,
+    0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3,
+    0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51,
+    0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e,
+    0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c,
+    0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362,
+    0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11,
+    0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff,
+    0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7,
+    0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8,
+    0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a,
+    0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690,
+    0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b,
+    0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5,
+    0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05,
+    0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb,
+    0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740,
+    0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f,
+    0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded,
+    0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2,
+    0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa,
+    0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714,
+    0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67,
+    0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89,
+    0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7,
+    0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8,
+    0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a,
+    0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6,
+    0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d,
+    0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3,
+    0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9,
+    0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57,
+    0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc,
+    0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540,
+    0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2,
+    0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd,
+    0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93,
+    0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d,
+    0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e,
+    0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0,
+    0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8,
+    0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7,
+    0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75,
+    0x0e2fbf43},
+   {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc,
+    0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a,
+    0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3,
+    0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7,
+    0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b,
+    0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154,
+    0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3,
+    0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc,
+    0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330,
+    0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264,
+    0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd,
+    0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b,
+    0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a,
+    0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175,
+    0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275,
+    0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a,
+    0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234,
+    0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2,
+    0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b,
+    0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a,
+    0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6,
+    0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189,
+    0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b,
+    0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204,
+    0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8,
+    0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226,
+    0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff,
+    0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219,
+    0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167,
+    0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258,
+    0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158,
+    0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267,
+    0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c,
+    0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da,
+    0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003,
+    0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157,
+    0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b,
+    0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4,
+    0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179,
+    0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246,
+    0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a,
+    0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de,
+    0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107,
+    0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1,
+    0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba,
+    0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285,
+    0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185,
+    0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba,
+    0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4,
+    0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322,
+    0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb,
+    0xf4377108}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000,
+    0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000,
+    0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000,
+    0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000,
+    0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000,
+    0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000,
+    0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000,
+    0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000,
+    0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000,
+    0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000,
+    0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000,
+    0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000,
+    0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000,
+    0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000,
+    0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000,
+    0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000,
+    0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000,
+    0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000,
+    0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000,
+    0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000,
+    0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000,
+    0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000,
+    0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000,
+    0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000,
+    0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000,
+    0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000,
+    0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000,
+    0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000,
+    0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000,
+    0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000,
+    0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000,
+    0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000,
+    0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000,
+    0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000,
+    0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000,
+    0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000,
+    0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000,
+    0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000,
+    0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000,
+    0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000,
+    0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000,
+    0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000,
+    0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000,
+    0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000,
+    0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000,
+    0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000,
+    0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000,
+    0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000,
+    0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000,
+    0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000,
+    0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000,
+    0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000,
+    0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000,
+    0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000,
+    0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000,
+    0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000,
+    0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000,
+    0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000,
+    0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000,
+    0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000,
+    0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000,
+    0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000,
+    0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000,
+    0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000,
+    0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000,
+    0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000,
+    0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000,
+    0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000,
+    0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000,
+    0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000,
+    0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000,
+    0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000,
+    0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000,
+    0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000,
+    0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000,
+    0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000,
+    0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000,
+    0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000,
+    0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000,
+    0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000,
+    0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000,
+    0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000,
+    0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000,
+    0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000,
+    0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000,
+    0x087137f400000000},
+   {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000,
+    0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000,
+    0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000,
+    0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000,
+    0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000,
+    0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000,
+    0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000,
+    0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000,
+    0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000,
+    0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000,
+    0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000,
+    0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000,
+    0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000,
+    0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000,
+    0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000,
+    0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000,
+    0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000,
+    0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000,
+    0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000,
+    0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000,
+    0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000,
+    0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000,
+    0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000,
+    0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000,
+    0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000,
+    0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000,
+    0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000,
+    0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000,
+    0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000,
+    0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000,
+    0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000,
+    0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000,
+    0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000,
+    0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000,
+    0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000,
+    0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000,
+    0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000,
+    0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000,
+    0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000,
+    0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000,
+    0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000,
+    0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000,
+    0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000,
+    0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000,
+    0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000,
+    0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000,
+    0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000,
+    0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000,
+    0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000,
+    0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000,
+    0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000,
+    0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000,
+    0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000,
+    0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000,
+    0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000,
+    0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000,
+    0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000,
+    0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000,
+    0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000,
+    0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000,
+    0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000,
+    0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000,
+    0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000,
+    0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000,
+    0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000,
+    0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000,
+    0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000,
+    0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000,
+    0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000,
+    0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000,
+    0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000,
+    0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000,
+    0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000,
+    0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000,
+    0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000,
+    0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000,
+    0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000,
+    0x1129fad400000000, 0x621116d400000000, 0x544094f000000000,
+    0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000,
+    0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000,
+    0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000,
+    0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000,
+    0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000,
+    0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000,
+    0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000,
+    0x43bf2f0e00000000},
+   {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000,
+    0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000,
+    0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000,
+    0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000,
+    0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000,
+    0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000,
+    0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000,
+    0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000,
+    0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000,
+    0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000,
+    0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000,
+    0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000,
+    0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000,
+    0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000,
+    0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000,
+    0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000,
+    0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000,
+    0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000,
+    0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000,
+    0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000,
+    0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000,
+    0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000,
+    0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000,
+    0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000,
+    0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000,
+    0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000,
+    0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000,
+    0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000,
+    0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000,
+    0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000,
+    0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000,
+    0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000,
+    0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000,
+    0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000,
+    0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000,
+    0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000,
+    0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000,
+    0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000,
+    0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000,
+    0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000,
+    0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000,
+    0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000,
+    0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000,
+    0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000,
+    0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000,
+    0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000,
+    0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000,
+    0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000,
+    0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000,
+    0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000,
+    0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000,
+    0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000,
+    0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000,
+    0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000,
+    0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000,
+    0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000,
+    0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000,
+    0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000,
+    0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000,
+    0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000,
+    0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000,
+    0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000,
+    0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000,
+    0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000,
+    0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000,
+    0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000,
+    0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000,
+    0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000,
+    0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000,
+    0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000,
+    0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000,
+    0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000,
+    0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000,
+    0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000,
+    0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000,
+    0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000,
+    0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000,
+    0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000,
+    0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000,
+    0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000,
+    0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000,
+    0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000,
+    0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000,
+    0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000,
+    0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000,
+    0x953fdbef00000000},
+   {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000,
+    0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000,
+    0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000,
+    0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000,
+    0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000,
+    0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000,
+    0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000,
+    0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000,
+    0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000,
+    0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000,
+    0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000,
+    0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000,
+    0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000,
+    0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000,
+    0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000,
+    0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000,
+    0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000,
+    0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000,
+    0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000,
+    0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000,
+    0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000,
+    0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000,
+    0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000,
+    0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000,
+    0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000,
+    0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000,
+    0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000,
+    0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000,
+    0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000,
+    0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000,
+    0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000,
+    0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000,
+    0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000,
+    0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000,
+    0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000,
+    0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000,
+    0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000,
+    0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000,
+    0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000,
+    0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000,
+    0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000,
+    0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000,
+    0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000,
+    0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000,
+    0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000,
+    0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000,
+    0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000,
+    0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000,
+    0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000,
+    0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000,
+    0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000,
+    0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000,
+    0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000,
+    0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000,
+    0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000,
+    0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000,
+    0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000,
+    0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000,
+    0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000,
+    0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000,
+    0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000,
+    0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000,
+    0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000,
+    0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000,
+    0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000,
+    0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000,
+    0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000,
+    0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000,
+    0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000,
+    0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000,
+    0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000,
+    0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000,
+    0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000,
+    0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000,
+    0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000,
+    0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000,
+    0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000,
+    0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000,
+    0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000,
+    0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000,
+    0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000,
+    0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000,
+    0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000,
+    0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000,
+    0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000,
+    0xf4bc8a8000000000},
+   {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000,
+    0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000,
+    0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000,
+    0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000,
+    0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000,
+    0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000,
+    0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000,
+    0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000,
+    0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000,
+    0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000,
+    0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000,
+    0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000,
+    0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000,
+    0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000,
+    0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000,
+    0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000,
+    0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000,
+    0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000,
+    0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000,
+    0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000,
+    0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000,
+    0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000,
+    0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000,
+    0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000,
+    0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000,
+    0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000,
+    0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000,
+    0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000,
+    0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000,
+    0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000,
+    0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000,
+    0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000,
+    0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000,
+    0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000,
+    0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000,
+    0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000,
+    0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000,
+    0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000,
+    0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000,
+    0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000,
+    0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000,
+    0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000,
+    0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000,
+    0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000,
+    0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000,
+    0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000,
+    0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000,
+    0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000,
+    0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000,
+    0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000,
+    0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000,
+    0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000,
+    0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000,
+    0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000,
+    0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000,
+    0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000,
+    0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000,
+    0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000,
+    0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000,
+    0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000,
+    0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000,
+    0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000,
+    0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000,
+    0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000,
+    0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000,
+    0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000,
+    0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000,
+    0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000,
+    0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000,
+    0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000,
+    0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000,
+    0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000,
+    0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000,
+    0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000,
+    0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000,
+    0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000,
+    0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000,
+    0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000,
+    0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000,
+    0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000,
+    0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000,
+    0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000,
+    0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000,
+    0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000,
+    0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000,
+    0xb9bc50ba00000000},
+   {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000,
+    0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000,
+    0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000,
+    0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000,
+    0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000,
+    0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000,
+    0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000,
+    0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000,
+    0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000,
+    0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000,
+    0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000,
+    0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000,
+    0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000,
+    0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000,
+    0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000,
+    0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000,
+    0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000,
+    0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000,
+    0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000,
+    0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000,
+    0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000,
+    0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000,
+    0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000,
+    0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000,
+    0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000,
+    0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000,
+    0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000,
+    0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000,
+    0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000,
+    0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000,
+    0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000,
+    0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000,
+    0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000,
+    0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000,
+    0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000,
+    0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000,
+    0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000,
+    0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000,
+    0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000,
+    0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000,
+    0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000,
+    0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000,
+    0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000,
+    0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000,
+    0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000,
+    0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000,
+    0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000,
+    0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000,
+    0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000,
+    0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000,
+    0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000,
+    0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000,
+    0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000,
+    0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000,
+    0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000,
+    0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000,
+    0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000,
+    0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000,
+    0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000,
+    0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000,
+    0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000,
+    0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000,
+    0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000,
+    0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000,
+    0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000,
+    0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000,
+    0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000,
+    0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000,
+    0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000,
+    0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000,
+    0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000,
+    0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000,
+    0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000,
+    0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000,
+    0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000,
+    0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000,
+    0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000,
+    0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000,
+    0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000,
+    0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000,
+    0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000,
+    0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000,
+    0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000,
+    0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000,
+    0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000,
+    0x945b07b200000000},
+   {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000,
+    0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000,
+    0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000,
+    0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000,
+    0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000,
+    0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000,
+    0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000,
+    0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000,
+    0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000,
+    0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000,
+    0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000,
+    0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000,
+    0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000,
+    0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000,
+    0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000,
+    0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000,
+    0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000,
+    0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000,
+    0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000,
+    0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000,
+    0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000,
+    0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000,
+    0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000,
+    0x149f066100000000, 0xef839db200000000, 0x468814fc00000000,
+    0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000,
+    0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000,
+    0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000,
+    0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000,
+    0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000,
+    0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000,
+    0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000,
+    0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000,
+    0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000,
+    0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000,
+    0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000,
+    0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000,
+    0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000,
+    0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000,
+    0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000,
+    0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000,
+    0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000,
+    0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000,
+    0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000,
+    0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000,
+    0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000,
+    0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000,
+    0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000,
+    0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000,
+    0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000,
+    0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000,
+    0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000,
+    0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000,
+    0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000,
+    0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000,
+    0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000,
+    0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000,
+    0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000,
+    0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000,
+    0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000,
+    0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000,
+    0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000,
+    0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000,
+    0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000,
+    0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000,
+    0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000,
+    0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000,
+    0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000,
+    0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000,
+    0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000,
+    0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000,
+    0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000,
+    0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000,
+    0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000,
+    0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000,
+    0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000,
+    0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000,
+    0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000,
+    0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000,
+    0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000,
+    0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000,
+    0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000,
+    0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000,
+    0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000,
+    0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000,
+    0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000,
+    0x0650d0f700000000},
+   {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000,
+    0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000,
+    0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000,
+    0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000,
+    0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000,
+    0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000,
+    0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000,
+    0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000,
+    0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000,
+    0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000,
+    0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000,
+    0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000,
+    0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000,
+    0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000,
+    0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000,
+    0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000,
+    0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000,
+    0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000,
+    0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000,
+    0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000,
+    0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000,
+    0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000,
+    0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000,
+    0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000,
+    0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000,
+    0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000,
+    0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000,
+    0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000,
+    0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000,
+    0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000,
+    0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000,
+    0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000,
+    0xc702c15700000000, 0x809085f800000000, 0x082039d200000000,
+    0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000,
+    0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000,
+    0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000,
+    0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000,
+    0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000,
+    0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000,
+    0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000,
+    0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000,
+    0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000,
+    0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000,
+    0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000,
+    0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000,
+    0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000,
+    0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000,
+    0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000,
+    0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000,
+    0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000,
+    0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000,
+    0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000,
+    0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000,
+    0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000,
+    0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000,
+    0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000,
+    0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000,
+    0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000,
+    0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000,
+    0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000,
+    0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000,
+    0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000,
+    0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000,
+    0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000,
+    0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000,
+    0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000,
+    0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000,
+    0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000,
+    0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000,
+    0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000,
+    0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000,
+    0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000,
+    0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000,
+    0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000,
+    0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000,
+    0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000,
+    0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000,
+    0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000,
+    0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000,
+    0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000,
+    0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000,
+    0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000,
+    0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000,
+    0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000,
+    0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000,
+    0x657594e900000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59,
+    0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4,
+    0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67,
+    0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef,
+    0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97,
+    0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88,
+    0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687,
+    0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698,
+    0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0,
+    0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068,
+    0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb,
+    0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056,
+    0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016,
+    0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009,
+    0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028,
+    0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037,
+    0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a,
+    0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7,
+    0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054,
+    0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7,
+    0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af,
+    0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0,
+    0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4,
+    0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab,
+    0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3,
+    0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a,
+    0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9,
+    0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54,
+    0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09,
+    0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16,
+    0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37,
+    0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28,
+    0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e,
+    0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3,
+    0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40,
+    0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8,
+    0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0,
+    0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf,
+    0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6,
+    0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9,
+    0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1,
+    0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059,
+    0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca,
+    0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067,
+    0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031,
+    0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e,
+    0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f,
+    0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010,
+    0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d,
+    0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0,
+    0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073,
+    0xd8ac6b35},
+   {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2,
+    0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd,
+    0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696,
+    0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3,
+    0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f,
+    0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35,
+    0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5,
+    0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f,
+    0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673,
+    0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46,
+    0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d,
+    0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632,
+    0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28,
+    0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192,
+    0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c,
+    0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6,
+    0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0,
+    0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff,
+    0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4,
+    0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95,
+    0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9,
+    0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03,
+    0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7,
+    0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d,
+    0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151,
+    0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808,
+    0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343,
+    0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c,
+    0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a,
+    0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0,
+    0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e,
+    0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594,
+    0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6,
+    0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399,
+    0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2,
+    0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7,
+    0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb,
+    0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571,
+    0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289,
+    0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33,
+    0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f,
+    0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a,
+    0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461,
+    0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e,
+    0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c,
+    0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6,
+    0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918,
+    0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2,
+    0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484,
+    0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb,
+    0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0,
+    0xa140efa8},
+   {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706,
+    0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed,
+    0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289,
+    0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a,
+    0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214,
+    0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3,
+    0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3,
+    0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254,
+    0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a,
+    0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9,
+    0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad,
+    0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746,
+    0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060,
+    0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187,
+    0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef,
+    0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408,
+    0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e,
+    0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495,
+    0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1,
+    0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532,
+    0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c,
+    0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb,
+    0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb,
+    0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c,
+    0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42,
+    0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060,
+    0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04,
+    0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef,
+    0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99,
+    0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e,
+    0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16,
+    0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1,
+    0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7,
+    0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c,
+    0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38,
+    0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb,
+    0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5,
+    0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42,
+    0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62,
+    0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85,
+    0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb,
+    0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18,
+    0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c,
+    0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997,
+    0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1,
+    0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36,
+    0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e,
+    0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9,
+    0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf,
+    0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24,
+    0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040,
+    0x917cd6a1},
+   {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf,
+    0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd,
+    0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896,
+    0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9,
+    0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3,
+    0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f,
+    0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d,
+    0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1,
+    0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab,
+    0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4,
+    0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f,
+    0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d,
+    0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4,
+    0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978,
+    0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad,
+    0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621,
+    0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46,
+    0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854,
+    0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f,
+    0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a,
+    0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890,
+    0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c,
+    0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4,
+    0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238,
+    0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622,
+    0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab,
+    0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0,
+    0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2,
+    0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295,
+    0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19,
+    0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc,
+    0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140,
+    0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd,
+    0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf,
+    0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184,
+    0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb,
+    0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1,
+    0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d,
+    0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb,
+    0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257,
+    0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d,
+    0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22,
+    0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069,
+    0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b,
+    0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6,
+    0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a,
+    0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf,
+    0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33,
+    0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254,
+    0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146,
+    0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d,
+    0x18ba364e}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873,
+    0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661,
+    0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441,
+    0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44,
+    0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1,
+    0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05,
+    0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa,
+    0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e,
+    0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb,
+    0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be,
+    0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e,
+    0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c,
+    0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d,
+    0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9,
+    0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f,
+    0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b,
+    0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39,
+    0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b,
+    0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b,
+    0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20,
+    0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595,
+    0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61,
+    0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0,
+    0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644,
+    0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1,
+    0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d,
+    0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d,
+    0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f,
+    0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad,
+    0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359,
+    0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f,
+    0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b,
+    0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7,
+    0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5,
+    0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5,
+    0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0,
+    0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65,
+    0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091,
+    0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633,
+    0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7,
+    0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272,
+    0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77,
+    0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57,
+    0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145,
+    0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9,
+    0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d,
+    0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb,
+    0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f,
+    0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad,
+    0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf,
+    0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f,
+    0x4e36ba18},
+   {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b,
+    0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8,
+    0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19,
+    0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4,
+    0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239,
+    0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd,
+    0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258,
+    0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc,
+    0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41,
+    0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c,
+    0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d,
+    0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e,
+    0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba,
+    0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e,
+    0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8,
+    0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c,
+    0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f,
+    0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c,
+    0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d,
+    0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d,
+    0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0,
+    0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014,
+    0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc,
+    0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628,
+    0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5,
+    0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941,
+    0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0,
+    0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53,
+    0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880,
+    0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264,
+    0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92,
+    0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776,
+    0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8,
+    0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b,
+    0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea,
+    0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837,
+    0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca,
+    0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e,
+    0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211,
+    0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5,
+    0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08,
+    0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5,
+    0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934,
+    0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7,
+    0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049,
+    0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad,
+    0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b,
+    0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf,
+    0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c,
+    0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f,
+    0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e,
+    0xa1d67c91},
+   {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9,
+    0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de,
+    0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94,
+    0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0,
+    0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a,
+    0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924,
+    0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052,
+    0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c,
+    0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6,
+    0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2,
+    0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8,
+    0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f,
+    0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d,
+    0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273,
+    0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30,
+    0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e,
+    0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7,
+    0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980,
+    0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca,
+    0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8,
+    0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62,
+    0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c,
+    0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c,
+    0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032,
+    0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798,
+    0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d,
+    0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07,
+    0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630,
+    0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389,
+    0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7,
+    0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4,
+    0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca,
+    0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55,
+    0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662,
+    0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828,
+    0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c,
+    0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6,
+    0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98,
+    0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3,
+    0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d,
+    0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037,
+    0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913,
+    0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759,
+    0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e,
+    0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1,
+    0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf,
+    0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c,
+    0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2,
+    0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b,
+    0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c,
+    0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276,
+    0xa8ef40a1},
+   {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e,
+    0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8,
+    0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819,
+    0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f,
+    0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d,
+    0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756,
+    0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0,
+    0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb,
+    0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9,
+    0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f,
+    0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e,
+    0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8,
+    0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835,
+    0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e,
+    0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62,
+    0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749,
+    0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b,
+    0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d,
+    0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc,
+    0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80,
+    0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2,
+    0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599,
+    0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05,
+    0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e,
+    0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c,
+    0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e,
+    0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef,
+    0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359,
+    0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b,
+    0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0,
+    0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc,
+    0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7,
+    0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f,
+    0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189,
+    0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568,
+    0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e,
+    0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c,
+    0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27,
+    0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794,
+    0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf,
+    0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d,
+    0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db,
+    0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a,
+    0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c,
+    0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544,
+    0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f,
+    0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013,
+    0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38,
+    0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea,
+    0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c,
+    0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd,
+    0x356bacd8}};
+
+#endif
+
+#endif
+
+#if N == 6
+
+#if W == 8
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370,
+    0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d,
+    0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69,
+    0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426,
+    0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3,
+    0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f,
+    0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c,
+    0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490,
+    0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155,
+    0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a,
+    0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e,
+    0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603,
+    0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349,
+    0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5,
+    0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50,
+    0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc,
+    0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b,
+    0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76,
+    0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862,
+    0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9,
+    0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c,
+    0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0,
+    0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937,
+    0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b,
+    0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e,
+    0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e,
+    0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a,
+    0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357,
+    0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0,
+    0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c,
+    0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9,
+    0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165,
+    0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766,
+    0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b,
+    0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f,
+    0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030,
+    0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5,
+    0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59,
+    0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63,
+    0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf,
+    0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a,
+    0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845,
+    0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51,
+    0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c,
+    0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f,
+    0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3,
+    0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46,
+    0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea,
+    0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d,
+    0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60,
+    0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74,
+    0x8568a0a8},
+   {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5,
+    0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf,
+    0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5,
+    0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba,
+    0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf,
+    0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f,
+    0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0,
+    0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450,
+    0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55,
+    0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a,
+    0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620,
+    0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a,
+    0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454,
+    0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4,
+    0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534,
+    0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584,
+    0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694,
+    0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e,
+    0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4,
+    0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1,
+    0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4,
+    0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164,
+    0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1,
+    0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911,
+    0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314,
+    0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c,
+    0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6,
+    0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec,
+    0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc,
+    0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c,
+    0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c,
+    0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c,
+    0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716,
+    0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c,
+    0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676,
+    0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879,
+    0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c,
+    0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc,
+    0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77,
+    0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7,
+    0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2,
+    0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd,
+    0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7,
+    0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad,
+    0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897,
+    0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827,
+    0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7,
+    0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947,
+    0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57,
+    0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d,
+    0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37,
+    0x0d907052},
+   {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d,
+    0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89,
+    0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31,
+    0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81,
+    0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e,
+    0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0,
+    0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f,
+    0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291,
+    0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e,
+    0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e,
+    0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936,
+    0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2,
+    0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13,
+    0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d,
+    0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f,
+    0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1,
+    0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a,
+    0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae,
+    0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516,
+    0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f,
+    0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20,
+    0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe,
+    0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28,
+    0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6,
+    0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419,
+    0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5,
+    0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d,
+    0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889,
+    0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412,
+    0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c,
+    0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e,
+    0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0,
+    0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02,
+    0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986,
+    0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e,
+    0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e,
+    0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221,
+    0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf,
+    0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913,
+    0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d,
+    0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622,
+    0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592,
+    0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a,
+    0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae,
+    0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c,
+    0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82,
+    0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20,
+    0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe,
+    0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025,
+    0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1,
+    0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719,
+    0xfd1a6c8a},
+   {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3,
+    0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb,
+    0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d,
+    0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb,
+    0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9,
+    0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156,
+    0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045,
+    0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa,
+    0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8,
+    0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e,
+    0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8,
+    0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0,
+    0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38,
+    0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87,
+    0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46,
+    0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9,
+    0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585,
+    0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d,
+    0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb,
+    0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531,
+    0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03,
+    0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc,
+    0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33,
+    0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c,
+    0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be,
+    0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d,
+    0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b,
+    0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303,
+    0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f,
+    0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0,
+    0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801,
+    0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe,
+    0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e,
+    0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346,
+    0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620,
+    0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776,
+    0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844,
+    0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb,
+    0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0,
+    0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f,
+    0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d,
+    0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b,
+    0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d,
+    0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75,
+    0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795,
+    0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a,
+    0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb,
+    0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354,
+    0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28,
+    0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30,
+    0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856,
+    0x7895f01a},
+   {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188,
+    0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33,
+    0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d,
+    0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445,
+    0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2,
+    0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058,
+    0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43,
+    0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9,
+    0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e,
+    0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06,
+    0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228,
+    0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93,
+    0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e,
+    0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4,
+    0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b,
+    0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371,
+    0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265,
+    0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede,
+    0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0,
+    0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f,
+    0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8,
+    0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32,
+    0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae,
+    0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544,
+    0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3,
+    0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f,
+    0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911,
+    0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa,
+    0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be,
+    0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54,
+    0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b,
+    0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1,
+    0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652,
+    0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9,
+    0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7,
+    0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f,
+    0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68,
+    0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782,
+    0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797,
+    0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d,
+    0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a,
+    0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2,
+    0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc,
+    0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647,
+    0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4,
+    0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e,
+    0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41,
+    0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab,
+    0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf,
+    0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904,
+    0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a,
+    0x9239b848},
+   {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad,
+    0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0,
+    0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40,
+    0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b,
+    0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d,
+    0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b,
+    0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb,
+    0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d,
+    0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b,
+    0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0,
+    0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840,
+    0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d,
+    0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b,
+    0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d,
+    0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6,
+    0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0,
+    0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580,
+    0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd,
+    0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d,
+    0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b,
+    0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d,
+    0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b,
+    0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6,
+    0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0,
+    0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6,
+    0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c,
+    0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c,
+    0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461,
+    0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841,
+    0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317,
+    0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac,
+    0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa,
+    0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7,
+    0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba,
+    0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a,
+    0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161,
+    0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777,
+    0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21,
+    0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a,
+    0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc,
+    0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da,
+    0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1,
+    0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01,
+    0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c,
+    0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241,
+    0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917,
+    0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac,
+    0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa,
+    0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da,
+    0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397,
+    0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537,
+    0xeb36d3cc},
+   {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b,
+    0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059,
+    0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251,
+    0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d,
+    0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9,
+    0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c,
+    0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41,
+    0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4,
+    0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10,
+    0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c,
+    0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54,
+    0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476,
+    0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8,
+    0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d,
+    0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92,
+    0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307,
+    0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad,
+    0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f,
+    0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87,
+    0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17,
+    0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3,
+    0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46,
+    0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197,
+    0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02,
+    0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6,
+    0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e,
+    0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96,
+    0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4,
+    0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e,
+    0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b,
+    0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934,
+    0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1,
+    0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7,
+    0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5,
+    0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd,
+    0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1,
+    0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475,
+    0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0,
+    0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155,
+    0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0,
+    0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304,
+    0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348,
+    0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140,
+    0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862,
+    0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14,
+    0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181,
+    0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e,
+    0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab,
+    0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01,
+    0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523,
+    0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b,
+    0x38e5f3c5},
+   {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06,
+    0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad,
+    0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509,
+    0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba,
+    0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414,
+    0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3,
+    0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733,
+    0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994,
+    0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a,
+    0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889,
+    0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d,
+    0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386,
+    0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621,
+    0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886,
+    0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e,
+    0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389,
+    0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f,
+    0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294,
+    0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30,
+    0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3,
+    0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d,
+    0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba,
+    0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a,
+    0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad,
+    0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03,
+    0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2,
+    0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306,
+    0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad,
+    0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b,
+    0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc,
+    0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914,
+    0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3,
+    0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435,
+    0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e,
+    0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a,
+    0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589,
+    0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27,
+    0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080,
+    0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21,
+    0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586,
+    0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28,
+    0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b,
+    0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f,
+    0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94,
+    0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12,
+    0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5,
+    0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d,
+    0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba,
+    0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c,
+    0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7,
+    0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103,
+    0x3d3101a2}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000,
+    0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000,
+    0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000,
+    0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000,
+    0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000,
+    0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000,
+    0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000,
+    0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000,
+    0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000,
+    0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000,
+    0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000,
+    0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000,
+    0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000,
+    0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000,
+    0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000,
+    0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000,
+    0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000,
+    0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000,
+    0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000,
+    0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000,
+    0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000,
+    0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000,
+    0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000,
+    0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000,
+    0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000,
+    0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000,
+    0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000,
+    0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000,
+    0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000,
+    0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000,
+    0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000,
+    0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000,
+    0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000,
+    0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000,
+    0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000,
+    0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000,
+    0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000,
+    0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000,
+    0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000,
+    0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000,
+    0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000,
+    0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000,
+    0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000,
+    0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000,
+    0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000,
+    0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000,
+    0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000,
+    0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000,
+    0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000,
+    0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000,
+    0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000,
+    0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000,
+    0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000,
+    0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000,
+    0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000,
+    0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000,
+    0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000,
+    0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000,
+    0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000,
+    0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000,
+    0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000,
+    0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000,
+    0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000,
+    0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000,
+    0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000,
+    0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000,
+    0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000,
+    0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000,
+    0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000,
+    0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000,
+    0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000,
+    0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000,
+    0x3688267d00000000, 0x9718319500000000, 0x35af787600000000,
+    0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000,
+    0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000,
+    0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000,
+    0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000,
+    0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000,
+    0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000,
+    0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000,
+    0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000,
+    0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000,
+    0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000,
+    0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000,
+    0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000,
+    0xa201313d00000000},
+   {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000,
+    0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000,
+    0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000,
+    0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000,
+    0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000,
+    0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000,
+    0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000,
+    0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000,
+    0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000,
+    0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000,
+    0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000,
+    0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000,
+    0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000,
+    0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000,
+    0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000,
+    0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000,
+    0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000,
+    0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000,
+    0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000,
+    0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000,
+    0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000,
+    0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000,
+    0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000,
+    0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000,
+    0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000,
+    0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000,
+    0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000,
+    0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000,
+    0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000,
+    0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000,
+    0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000,
+    0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000,
+    0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000,
+    0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000,
+    0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000,
+    0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000,
+    0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000,
+    0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000,
+    0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000,
+    0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000,
+    0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000,
+    0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000,
+    0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000,
+    0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000,
+    0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000,
+    0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000,
+    0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000,
+    0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000,
+    0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000,
+    0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000,
+    0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000,
+    0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000,
+    0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000,
+    0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000,
+    0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000,
+    0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000,
+    0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000,
+    0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000,
+    0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000,
+    0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000,
+    0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000,
+    0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000,
+    0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000,
+    0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000,
+    0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000,
+    0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000,
+    0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000,
+    0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000,
+    0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000,
+    0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000,
+    0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000,
+    0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000,
+    0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000,
+    0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000,
+    0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000,
+    0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000,
+    0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000,
+    0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000,
+    0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000,
+    0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000,
+    0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000,
+    0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000,
+    0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000,
+    0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000,
+    0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000,
+    0xc5f3e53800000000},
+   {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000,
+    0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000,
+    0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000,
+    0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000,
+    0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000,
+    0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000,
+    0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000,
+    0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000,
+    0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000,
+    0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000,
+    0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000,
+    0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000,
+    0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000,
+    0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000,
+    0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000,
+    0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000,
+    0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000,
+    0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000,
+    0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000,
+    0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000,
+    0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000,
+    0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000,
+    0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000,
+    0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000,
+    0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000,
+    0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000,
+    0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000,
+    0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000,
+    0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000,
+    0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000,
+    0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000,
+    0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000,
+    0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000,
+    0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000,
+    0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000,
+    0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000,
+    0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000,
+    0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000,
+    0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000,
+    0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000,
+    0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000,
+    0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000,
+    0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000,
+    0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000,
+    0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000,
+    0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000,
+    0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000,
+    0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000,
+    0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000,
+    0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000,
+    0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000,
+    0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000,
+    0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000,
+    0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000,
+    0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000,
+    0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000,
+    0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000,
+    0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000,
+    0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000,
+    0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000,
+    0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000,
+    0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000,
+    0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000,
+    0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000,
+    0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000,
+    0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000,
+    0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000,
+    0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000,
+    0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000,
+    0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000,
+    0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000,
+    0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000,
+    0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000,
+    0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000,
+    0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000,
+    0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000,
+    0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000,
+    0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000,
+    0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000,
+    0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000,
+    0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000,
+    0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000,
+    0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000,
+    0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000,
+    0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000,
+    0xccd336eb00000000},
+   {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000,
+    0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000,
+    0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000,
+    0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000,
+    0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000,
+    0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000,
+    0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000,
+    0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000,
+    0xb249204500000000, 0xd071086f00000000, 0x7639701100000000,
+    0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000,
+    0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000,
+    0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000,
+    0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000,
+    0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000,
+    0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000,
+    0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000,
+    0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000,
+    0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000,
+    0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000,
+    0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000,
+    0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000,
+    0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000,
+    0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000,
+    0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000,
+    0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000,
+    0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000,
+    0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000,
+    0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000,
+    0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000,
+    0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000,
+    0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000,
+    0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000,
+    0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000,
+    0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000,
+    0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000,
+    0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000,
+    0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000,
+    0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000,
+    0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000,
+    0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000,
+    0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000,
+    0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000,
+    0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000,
+    0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000,
+    0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000,
+    0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000,
+    0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000,
+    0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000,
+    0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000,
+    0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000,
+    0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000,
+    0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000,
+    0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000,
+    0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000,
+    0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000,
+    0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000,
+    0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000,
+    0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000,
+    0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000,
+    0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000,
+    0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000,
+    0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000,
+    0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000,
+    0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000,
+    0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000,
+    0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000,
+    0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000,
+    0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000,
+    0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000,
+    0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000,
+    0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000,
+    0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000,
+    0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000,
+    0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000,
+    0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000,
+    0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000,
+    0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000,
+    0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000,
+    0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000,
+    0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000,
+    0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000,
+    0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000,
+    0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000,
+    0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000,
+    0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000,
+    0x48b8399200000000},
+   {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000,
+    0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000,
+    0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000,
+    0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000,
+    0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000,
+    0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000,
+    0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000,
+    0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000,
+    0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000,
+    0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000,
+    0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000,
+    0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000,
+    0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000,
+    0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000,
+    0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000,
+    0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000,
+    0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000,
+    0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000,
+    0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000,
+    0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000,
+    0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000,
+    0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000,
+    0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000,
+    0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000,
+    0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000,
+    0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000,
+    0xb521428400000000, 0xf909d42700000000, 0x762efede00000000,
+    0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000,
+    0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000,
+    0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000,
+    0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000,
+    0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000,
+    0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000,
+    0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000,
+    0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000,
+    0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000,
+    0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000,
+    0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000,
+    0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000,
+    0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000,
+    0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000,
+    0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000,
+    0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000,
+    0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000,
+    0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000,
+    0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000,
+    0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000,
+    0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000,
+    0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000,
+    0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000,
+    0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000,
+    0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000,
+    0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000,
+    0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000,
+    0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000,
+    0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000,
+    0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000,
+    0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000,
+    0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000,
+    0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000,
+    0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000,
+    0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000,
+    0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000,
+    0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000,
+    0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000,
+    0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000,
+    0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000,
+    0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000,
+    0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000,
+    0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000,
+    0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000,
+    0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000,
+    0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000,
+    0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000,
+    0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000,
+    0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000,
+    0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000,
+    0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000,
+    0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000,
+    0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000,
+    0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000,
+    0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000,
+    0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000,
+    0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000,
+    0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000,
+    0x1af0957800000000},
+   {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000,
+    0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000,
+    0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000,
+    0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000,
+    0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000,
+    0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000,
+    0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000,
+    0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000,
+    0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000,
+    0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000,
+    0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000,
+    0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000,
+    0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000,
+    0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000,
+    0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000,
+    0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000,
+    0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000,
+    0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000,
+    0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000,
+    0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000,
+    0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000,
+    0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000,
+    0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000,
+    0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000,
+    0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000,
+    0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000,
+    0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000,
+    0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000,
+    0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000,
+    0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000,
+    0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000,
+    0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000,
+    0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000,
+    0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000,
+    0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000,
+    0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000,
+    0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000,
+    0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000,
+    0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000,
+    0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000,
+    0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000,
+    0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000,
+    0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000,
+    0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000,
+    0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000,
+    0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000,
+    0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000,
+    0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000,
+    0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000,
+    0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000,
+    0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000,
+    0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000,
+    0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000,
+    0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000,
+    0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000,
+    0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000,
+    0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000,
+    0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000,
+    0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000,
+    0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000,
+    0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000,
+    0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000,
+    0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000,
+    0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000,
+    0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000,
+    0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000,
+    0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000,
+    0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000,
+    0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000,
+    0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000,
+    0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000,
+    0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000,
+    0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000,
+    0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000,
+    0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000,
+    0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000,
+    0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000,
+    0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000,
+    0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000,
+    0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000,
+    0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000,
+    0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000,
+    0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000,
+    0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000,
+    0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000,
+    0x8a6c1afd00000000},
+   {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000,
+    0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000,
+    0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000,
+    0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000,
+    0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000,
+    0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000,
+    0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000,
+    0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000,
+    0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000,
+    0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000,
+    0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000,
+    0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000,
+    0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000,
+    0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000,
+    0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000,
+    0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000,
+    0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000,
+    0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000,
+    0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000,
+    0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000,
+    0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000,
+    0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000,
+    0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000,
+    0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000,
+    0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000,
+    0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000,
+    0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000,
+    0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000,
+    0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000,
+    0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000,
+    0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000,
+    0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000,
+    0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000,
+    0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000,
+    0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000,
+    0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000,
+    0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000,
+    0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000,
+    0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000,
+    0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000,
+    0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000,
+    0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000,
+    0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000,
+    0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000,
+    0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000,
+    0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000,
+    0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000,
+    0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000,
+    0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000,
+    0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000,
+    0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000,
+    0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000,
+    0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000,
+    0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000,
+    0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000,
+    0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000,
+    0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000,
+    0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000,
+    0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000,
+    0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000,
+    0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000,
+    0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000,
+    0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000,
+    0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000,
+    0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000,
+    0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000,
+    0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000,
+    0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000,
+    0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000,
+    0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000,
+    0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000,
+    0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000,
+    0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000,
+    0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000,
+    0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000,
+    0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000,
+    0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000,
+    0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000,
+    0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000,
+    0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000,
+    0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000,
+    0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000,
+    0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000,
+    0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000,
+    0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000,
+    0x5270900d00000000},
+   {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000,
+    0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000,
+    0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000,
+    0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000,
+    0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000,
+    0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000,
+    0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000,
+    0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000,
+    0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000,
+    0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000,
+    0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000,
+    0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000,
+    0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000,
+    0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000,
+    0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000,
+    0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000,
+    0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000,
+    0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000,
+    0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000,
+    0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000,
+    0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000,
+    0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000,
+    0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000,
+    0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000,
+    0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000,
+    0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000,
+    0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000,
+    0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000,
+    0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000,
+    0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000,
+    0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000,
+    0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000,
+    0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000,
+    0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000,
+    0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000,
+    0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000,
+    0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000,
+    0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000,
+    0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000,
+    0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000,
+    0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000,
+    0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000,
+    0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000,
+    0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000,
+    0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000,
+    0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000,
+    0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000,
+    0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000,
+    0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000,
+    0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000,
+    0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000,
+    0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000,
+    0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000,
+    0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000,
+    0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000,
+    0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000,
+    0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000,
+    0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000,
+    0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000,
+    0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000,
+    0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000,
+    0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000,
+    0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000,
+    0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000,
+    0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000,
+    0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000,
+    0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000,
+    0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000,
+    0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000,
+    0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000,
+    0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000,
+    0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000,
+    0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000,
+    0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000,
+    0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000,
+    0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000,
+    0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000,
+    0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000,
+    0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000,
+    0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000,
+    0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000,
+    0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000,
+    0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000,
+    0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000,
+    0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000,
+    0xa8a0688500000000}};
+
+#else /* W == 4 */
+
+local const z_crc_t FAR crc_braid_table[][256] = {
+   {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f,
+    0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999,
+    0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee,
+    0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615,
+    0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383,
+    0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb,
+    0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275,
+    0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d,
+    0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b,
+    0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460,
+    0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317,
+    0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1,
+    0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5,
+    0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd,
+    0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04,
+    0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c,
+    0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7,
+    0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11,
+    0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66,
+    0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7,
+    0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871,
+    0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309,
+    0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd,
+    0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85,
+    0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913,
+    0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d,
+    0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a,
+    0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc,
+    0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57,
+    0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f,
+    0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6,
+    0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e,
+    0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f,
+    0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289,
+    0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe,
+    0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05,
+    0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893,
+    0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb,
+    0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0,
+    0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8,
+    0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e,
+    0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5,
+    0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2,
+    0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574,
+    0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5,
+    0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add,
+    0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114,
+    0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c,
+    0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7,
+    0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701,
+    0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076,
+    0x09cd8551},
+   {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193,
+    0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2,
+    0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c,
+    0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71,
+    0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a,
+    0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d,
+    0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71,
+    0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436,
+    0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d,
+    0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000,
+    0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae,
+    0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf,
+    0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930,
+    0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277,
+    0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff,
+    0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8,
+    0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef,
+    0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e,
+    0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20,
+    0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95,
+    0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e,
+    0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9,
+    0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d,
+    0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a,
+    0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151,
+    0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4,
+    0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a,
+    0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b,
+    0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c,
+    0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b,
+    0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3,
+    0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4,
+    0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b,
+    0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a,
+    0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4,
+    0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189,
+    0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92,
+    0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5,
+    0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9,
+    0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe,
+    0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5,
+    0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8,
+    0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66,
+    0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707,
+    0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8,
+    0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f,
+    0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707,
+    0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40,
+    0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017,
+    0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876,
+    0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8,
+    0x7bc97a0c},
+   {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300,
+    0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0,
+    0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80,
+    0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701,
+    0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41,
+    0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81,
+    0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43,
+    0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83,
+    0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3,
+    0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42,
+    0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202,
+    0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2,
+    0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7,
+    0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407,
+    0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47,
+    0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87,
+    0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86,
+    0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46,
+    0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506,
+    0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44,
+    0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704,
+    0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4,
+    0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5,
+    0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505,
+    0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45,
+    0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f,
+    0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f,
+    0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f,
+    0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e,
+    0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e,
+    0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e,
+    0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce,
+    0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c,
+    0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc,
+    0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c,
+    0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d,
+    0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d,
+    0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d,
+    0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88,
+    0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48,
+    0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708,
+    0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89,
+    0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9,
+    0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309,
+    0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb,
+    0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b,
+    0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b,
+    0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b,
+    0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a,
+    0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a,
+    0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a,
+    0x7851a2ca},
+   {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb,
+    0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8,
+    0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0,
+    0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f,
+    0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a,
+    0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf,
+    0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5,
+    0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380,
+    0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815,
+    0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa,
+    0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2,
+    0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1,
+    0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1,
+    0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4,
+    0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa,
+    0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df,
+    0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6,
+    0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5,
+    0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad,
+    0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca,
+    0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f,
+    0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a,
+    0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8,
+    0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d,
+    0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708,
+    0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d,
+    0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865,
+    0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636,
+    0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f,
+    0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a,
+    0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744,
+    0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061,
+    0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0,
+    0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293,
+    0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb,
+    0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874,
+    0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1,
+    0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4,
+    0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f,
+    0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a,
+    0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f,
+    0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120,
+    0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778,
+    0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b,
+    0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a,
+    0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af,
+    0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81,
+    0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4,
+    0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd,
+    0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e,
+    0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6,
+    0x566b6848}};
+
+local const z_word_t FAR crc_braid_big_table[][256] = {
+   {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912,
+    0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba,
+    0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3,
+    0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30,
+    0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e,
+    0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3,
+    0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73,
+    0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe,
+    0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0,
+    0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643,
+    0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a,
+    0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082,
+    0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4,
+    0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279,
+    0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735,
+    0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8,
+    0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad,
+    0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05,
+    0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c,
+    0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718,
+    0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46,
+    0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb,
+    0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc,
+    0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41,
+    0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f,
+    0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad,
+    0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4,
+    0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c,
+    0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779,
+    0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4,
+    0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8,
+    0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235,
+    0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7,
+    0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f,
+    0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476,
+    0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195,
+    0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb,
+    0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46,
+    0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622,
+    0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af,
+    0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1,
+    0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12,
+    0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b,
+    0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3,
+    0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51,
+    0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc,
+    0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90,
+    0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d,
+    0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708,
+    0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0,
+    0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9,
+    0x48686b56},
+   {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c,
+    0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae,
+    0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb,
+    0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90,
+    0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410,
+    0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b,
+    0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6,
+    0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed,
+    0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d,
+    0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036,
+    0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953,
+    0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1,
+    0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca,
+    0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781,
+    0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d,
+    0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416,
+    0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f,
+    0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd,
+    0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8,
+    0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b,
+    0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb,
+    0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0,
+    0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5,
+    0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e,
+    0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e,
+    0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558,
+    0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d,
+    0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf,
+    0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6,
+    0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad,
+    0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971,
+    0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a,
+    0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b,
+    0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969,
+    0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c,
+    0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57,
+    0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7,
+    0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c,
+    0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab,
+    0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0,
+    0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160,
+    0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b,
+    0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e,
+    0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac,
+    0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d,
+    0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546,
+    0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a,
+    0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1,
+    0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8,
+    0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a,
+    0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f,
+    0xcaa25178},
+   {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00,
+    0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b,
+    0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed,
+    0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777,
+    0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01,
+    0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a,
+    0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef,
+    0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74,
+    0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002,
+    0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498,
+    0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee,
+    0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75,
+    0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05,
+    0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e,
+    0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8,
+    0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73,
+    0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404,
+    0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f,
+    0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9,
+    0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71,
+    0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607,
+    0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c,
+    0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb,
+    0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470,
+    0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806,
+    0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790,
+    0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6,
+    0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d,
+    0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a,
+    0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991,
+    0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7,
+    0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c,
+    0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09,
+    0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92,
+    0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4,
+    0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e,
+    0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08,
+    0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593,
+    0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3,
+    0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778,
+    0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e,
+    0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94,
+    0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2,
+    0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079,
+    0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c,
+    0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497,
+    0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1,
+    0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a,
+    0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d,
+    0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396,
+    0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0,
+    0x0c7ac97b},
+   {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669,
+    0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853,
+    0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062,
+    0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527,
+    0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad,
+    0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545,
+    0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27,
+    0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf,
+    0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45,
+    0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800,
+    0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031,
+    0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b,
+    0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26,
+    0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce,
+    0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d,
+    0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5,
+    0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130,
+    0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a,
+    0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b,
+    0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480,
+    0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a,
+    0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2,
+    0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e,
+    0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996,
+    0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c,
+    0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc,
+    0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd,
+    0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7,
+    0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232,
+    0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da,
+    0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439,
+    0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1,
+    0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da,
+    0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0,
+    0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1,
+    0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94,
+    0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e,
+    0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6,
+    0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2,
+    0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a,
+    0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0,
+    0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95,
+    0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4,
+    0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e,
+    0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395,
+    0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d,
+    0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e,
+    0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676,
+    0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83,
+    0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9,
+    0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888,
+    0x5185cd09}};
+
+#endif
+
+#endif
+
+#endif
+
+local const z_crc_t FAR x2n_table[] = {
+    0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000,
+    0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467,
+    0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0,
+    0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169,
+    0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37,
+    0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a,
+    0xc40ba6d0, 0xc4e22c3c};
diff --git a/reg-io/zlib/deflate.c b/reg-io/zlib/deflate.c
index 29ce1f64..012ea814 100644
--- a/reg-io/zlib/deflate.c
+++ b/reg-io/zlib/deflate.c
@@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -37,7 +37,7 @@
  *  REFERENCES
  *
  *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
- *      Available in http://www.ietf.org/rfc/rfc1951.txt
+ *      Available in http://tools.ietf.org/html/rfc1951
  *
  *      A description of the Rabin and Karp algorithm is given in the book
  *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
@@ -52,7 +52,7 @@
 #include "deflate.h"
 
 const char deflate_copyright[] =
-   " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly ";
+   " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -60,9 +60,6 @@ const char deflate_copyright[] =
   copyright string in the executable of your product.
  */
 
-/* ===========================================================================
- *  Function prototypes.
- */
 typedef enum {
     need_more,      /* block not completed, need more input or more output */
     block_done,     /* block flush performed */
@@ -70,33 +67,16 @@ typedef enum {
     finish_done     /* finish done, accept no more input or output */
 } block_state;
 
-typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+typedef block_state (*compress_func)(deflate_state *s, int flush);
 /* Compression function. Returns the block state after the call. */
 
-local void fill_window    OF((deflate_state *s));
-local block_state deflate_stored OF((deflate_state *s, int flush));
-local block_state deflate_fast   OF((deflate_state *s, int flush));
+local block_state deflate_stored(deflate_state *s, int flush);
+local block_state deflate_fast(deflate_state *s, int flush);
 #ifndef FASTEST
-local block_state deflate_slow   OF((deflate_state *s, int flush));
-#endif
-local void lm_init        OF((deflate_state *s));
-local void putShortMSB    OF((deflate_state *s, uInt b));
-local void flush_pending  OF((z_streamp strm));
-local int read_buf        OF((z_streamp strm, Bytef *buf, unsigned size));
-#ifndef FASTEST
-#ifdef ASMV
-      void match_init OF((void)); /* asm code initialization */
-      uInt longest_match  OF((deflate_state *s, IPos cur_match));
-#else
-local uInt longest_match  OF((deflate_state *s, IPos cur_match));
-#endif
-#endif
-local uInt longest_match_fast OF((deflate_state *s, IPos cur_match));
-
-#ifdef DEBUG
-local  void check_match OF((deflate_state *s, IPos start, IPos match,
-                            int length));
+local block_state deflate_slow(deflate_state *s, int flush);
 #endif
+local block_state deflate_rle(deflate_state *s, int flush);
+local block_state deflate_huff(deflate_state *s, int flush);
 
 /* ===========================================================================
  * Local data
@@ -110,11 +90,6 @@ local  void check_match OF((deflate_state *s, IPos start, IPos match,
 #endif
 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
 
-#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
-/* Minimum amount of lookahead, except at the end of the input file.
- * See deflate.c for comments about the MIN_MATCH+1.
- */
-
 /* Values for max_lazy_match, good_match and max_chain_length, depending on
  * the desired pack level (0..9). The values given below have been tuned to
  * exclude worst case performance for pathological files. Better values may be
@@ -154,20 +129,16 @@ local const config configuration_table[10] = {
  * meaning.
  */
 
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-#ifndef NO_DUMMY_DECL
-struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
-#endif
+/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
- *    input characters, so that a running hash key can be computed from the
- *    previous key instead of complete recalculation each time.
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
+ *    characters, so that a running hash key can be computed from the previous
+ *    key instead of complete recalculation each time.
  */
-#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+#define UPDATE_HASH(s,h,c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
 
 
 /* ===========================================================================
@@ -176,9 +147,9 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
  * the previous length of the hash chain.
  * If this file is compiled with -DFASTEST, the compression level is forced
  * to 1, and no hash chains are maintained.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
- *    input characters and the first MIN_MATCH bytes of str are valid
- *    (except for the last MIN_MATCH-1 bytes of the input file).
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive input
+ *    characters and the first MIN_MATCH bytes of str are valid (except for
+ *    the last MIN_MATCH-1 bytes of the input file).
  */
 #ifdef FASTEST
 #define INSERT_STRING(s, str, match_head) \
@@ -197,42 +168,221 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
  * prev[] will be initialized on the fly.
  */
 #define CLEAR_HASH(s) \
-    s->head[s->hash_size-1] = NIL; \
-    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+    do { \
+        s->head[s->hash_size - 1] = NIL; \
+        zmemzero((Bytef *)s->head, \
+                 (unsigned)(s->hash_size - 1)*sizeof(*s->head)); \
+    } while (0)
+
+/* ===========================================================================
+ * Slide the hash table when sliding the window down (could be avoided with 32
+ * bit values at the expense of memory usage). We slide even when level == 0 to
+ * keep the hash table consistent if we switch back to level > 0 later.
+ */
+#if defined(__has_feature)
+#  if __has_feature(memory_sanitizer)
+     __attribute__((no_sanitize("memory")))
+#  endif
+#endif
+local void slide_hash(deflate_state *s) {
+    unsigned n, m;
+    Posf *p;
+    uInt wsize = s->w_size;
+
+    n = s->hash_size;
+    p = &s->head[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+    } while (--n);
+    n = wsize;
+#ifndef FASTEST
+    p = &s->prev[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+        /* If n is not on any hash chain, prev[n] is garbage but
+         * its value will never be used.
+         */
+    } while (--n);
+#endif
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) {
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    zmemcpy(buf, strm->next_in, len);
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, buf, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, buf, len);
+    }
+#endif
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
+}
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(deflate_state *s) {
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize + MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            if (s->insert > s->strstart)
+                s->insert = s->strstart;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+     */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
+
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
 
 /* ========================================================================= */
-int ZEXPORT deflateInit_(strm, level, version, stream_size)
-    z_streamp strm;
-    int level;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version,
+                         int stream_size) {
     return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
                          Z_DEFAULT_STRATEGY, version, stream_size);
     /* To do: ignore strm->next_in if we use it as window */
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
-                  version, stream_size)
-    z_streamp strm;
-    int  level;
-    int  method;
-    int  windowBits;
-    int  memLevel;
-    int  strategy;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
+                          int windowBits, int memLevel, int strategy,
+                          const char *version, int stream_size) {
     deflate_state *s;
     int wrap = 1;
     static const char my_version[] = ZLIB_VERSION;
 
-    ushf *overlay;
-    /* We overlay pending_buf and d_buf+l_buf. This works since the average
-     * output size for (length,distance) codes is <= 24 bits.
-     */
-
     if (version == Z_NULL || version[0] != my_version[0] ||
         stream_size != sizeof(z_stream)) {
         return Z_VERSION_ERROR;
@@ -241,10 +391,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 
     strm->msg = Z_NULL;
     if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
         strm->zalloc = zcalloc;
         strm->opaque = (voidpf)0;
+#endif
     }
-    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
 
 #ifdef FASTEST
     if (level != 0) level = 1;
@@ -254,6 +413,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 
     if (windowBits < 0) { /* suppress zlib wrapper */
         wrap = 0;
+        if (windowBits < -15)
+            return Z_STREAM_ERROR;
         windowBits = -windowBits;
     }
 #ifdef GZIP
@@ -264,7 +425,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 #endif
     if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
         windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
-        strategy < 0 || strategy > Z_FIXED) {
+        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
         return Z_STREAM_ERROR;
     }
     if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
@@ -272,37 +433,88 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     if (s == Z_NULL) return Z_MEM_ERROR;
     strm->state = (struct internal_state FAR *)s;
     s->strm = strm;
+    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
 
     s->wrap = wrap;
     s->gzhead = Z_NULL;
-    s->w_bits = windowBits;
+    s->w_bits = (uInt)windowBits;
     s->w_size = 1 << s->w_bits;
     s->w_mask = s->w_size - 1;
 
-    s->hash_bits = memLevel + 7;
+    s->hash_bits = (uInt)memLevel + 7;
     s->hash_size = 1 << s->hash_bits;
     s->hash_mask = s->hash_size - 1;
-    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+    s->hash_shift =  ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH);
 
     s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
     s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
     s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
 
+    s->high_water = 0;      /* nothing written to s->window yet */
+
     s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
 
-    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
-    s->pending_buf = (uchf *) overlay;
-    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+    /* We overlay pending_buf and sym_buf. This works since the average size
+     * for length/distance pairs over any compressed block is assured to be 31
+     * bits or less.
+     *
+     * Analysis: The longest fixed codes are a length code of 8 bits plus 5
+     * extra bits, for lengths 131 to 257. The longest fixed distance codes are
+     * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
+     * possible fixed-codes length/distance pair is then 31 bits total.
+     *
+     * sym_buf starts one-fourth of the way into pending_buf. So there are
+     * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
+     * in sym_buf is three bytes -- two for the distance and one for the
+     * literal/length. As each symbol is consumed, the pointer to the next
+     * sym_buf value to read moves forward three bytes. From that symbol, up to
+     * 31 bits are written to pending_buf. The closest the written pending_buf
+     * bits gets to the next sym_buf symbol to read is just before the last
+     * code is written. At that time, 31*(n - 2) bits have been written, just
+     * after 24*(n - 2) bits have been consumed from sym_buf. sym_buf starts at
+     * 8*n bits into pending_buf. (Note that the symbol buffer fills when n - 1
+     * symbols are written.) The closest the writing gets to what is unread is
+     * then n + 14 bits. Here n is lit_bufsize, which is 16384 by default, and
+     * can range from 128 to 32768.
+     *
+     * Therefore, at a minimum, there are 142 bits of space between what is
+     * written and what is read in the overlain buffers, so the symbols cannot
+     * be overwritten by the compressed data. That space is actually 139 bits,
+     * due to the three-bit fixed-code block header.
+     *
+     * That covers the case where either Z_FIXED is specified, forcing fixed
+     * codes, or when the use of fixed codes is chosen, because that choice
+     * results in a smaller compressed block than dynamic codes. That latter
+     * condition then assures that the above analysis also covers all dynamic
+     * blocks. A dynamic-code block will only be chosen to be emitted if it has
+     * fewer bits than a fixed-code block would for the same set of symbols.
+     * Therefore its average symbol length is assured to be less than 31. So
+     * the compressed data for a dynamic block also cannot overwrite the
+     * symbols from which it is being constructed.
+     */
+
+    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, LIT_BUFS);
+    s->pending_buf_size = (ulg)s->lit_bufsize * 4;
 
     if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
         s->pending_buf == Z_NULL) {
         s->status = FINISH_STATE;
-        strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
+        strm->msg = ERR_MSG(Z_MEM_ERROR);
         deflateEnd (strm);
         return Z_MEM_ERROR;
     }
-    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
-    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+#ifdef LIT_MEM
+    s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1));
+    s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
+    s->sym_end = s->lit_bufsize - 1;
+#else
+    s->sym_buf = s->pending_buf + s->lit_bufsize;
+    s->sym_end = (s->lit_bufsize - 1) * 3;
+#endif
+    /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
 
     s->level = level;
     s->strategy = strategy;
@@ -311,56 +523,119 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     return deflateReset(strm);
 }
 
+/* =========================================================================
+ * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
+ */
+local int deflateStateCheck(z_streamp strm) {
+    deflate_state *s;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    s = strm->state;
+    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
+#ifdef GZIP
+                                           s->status != GZIP_STATE &&
+#endif
+                                           s->status != EXTRA_STATE &&
+                                           s->status != NAME_STATE &&
+                                           s->status != COMMENT_STATE &&
+                                           s->status != HCRC_STATE &&
+                                           s->status != BUSY_STATE &&
+                                           s->status != FINISH_STATE))
+        return 1;
+    return 0;
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
-    z_streamp strm;
-    const Bytef *dictionary;
-    uInt  dictLength;
-{
+int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt  dictLength) {
     deflate_state *s;
-    uInt length = dictLength;
-    uInt n;
-    IPos hash_head = 0;
+    uInt str, n;
+    int wrap;
+    unsigned avail;
+    z_const unsigned char *next;
 
-    if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
-        strm->state->wrap == 2 ||
-        (strm->state->wrap == 1 && strm->state->status != INIT_STATE))
+    if (deflateStateCheck(strm) || dictionary == Z_NULL)
         return Z_STREAM_ERROR;
-
     s = strm->state;
-    if (s->wrap)
-        strm->adler = adler32(strm->adler, dictionary, dictLength);
+    wrap = s->wrap;
+    if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
+        return Z_STREAM_ERROR;
 
-    if (length < MIN_MATCH) return Z_OK;
-    if (length > MAX_DIST(s)) {
-        length = MAX_DIST(s);
-        dictionary += dictLength - length; /* use the tail of the dictionary */
+    /* when using zlib wrappers, compute Adler-32 for provided dictionary */
+    if (wrap == 1)
+        strm->adler = adler32(strm->adler, dictionary, dictLength);
+    s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
+
+    /* if dictionary would fill window, just replace the history */
+    if (dictLength >= s->w_size) {
+        if (wrap == 0) {            /* already empty otherwise */
+            CLEAR_HASH(s);
+            s->strstart = 0;
+            s->block_start = 0L;
+            s->insert = 0;
+        }
+        dictionary += dictLength - s->w_size;  /* use the tail */
+        dictLength = s->w_size;
     }
-    zmemcpy(s->window, dictionary, length);
-    s->strstart = length;
-    s->block_start = (long)length;
 
-    /* Insert all strings in the hash table (except for the last two bytes).
-     * s->lookahead stays null, so s->ins_h will be recomputed at the next
-     * call of fill_window.
-     */
-    s->ins_h = s->window[0];
-    UPDATE_HASH(s, s->ins_h, s->window[1]);
-    for (n = 0; n <= length - MIN_MATCH; n++) {
-        INSERT_STRING(s, n, hash_head);
+    /* insert dictionary into window and hash */
+    avail = strm->avail_in;
+    next = strm->next_in;
+    strm->avail_in = dictLength;
+    strm->next_in = (z_const Bytef *)dictionary;
+    fill_window(s);
+    while (s->lookahead >= MIN_MATCH) {
+        str = s->strstart;
+        n = s->lookahead - (MIN_MATCH-1);
+        do {
+            UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+            s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+            s->head[s->ins_h] = (Pos)str;
+            str++;
+        } while (--n);
+        s->strstart = str;
+        s->lookahead = MIN_MATCH-1;
+        fill_window(s);
     }
-    if (hash_head) hash_head = 0;  /* to make compiler happy */
+    s->strstart += s->lookahead;
+    s->block_start = (long)s->strstart;
+    s->insert = s->lookahead;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    strm->next_in = next;
+    strm->avail_in = avail;
+    s->wrap = wrap;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
+    deflate_state *s;
+    uInt len;
+
+    if (deflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    len = s->strstart + s->lookahead;
+    if (len > s->w_size)
+        len = s->w_size;
+    if (dictionary != Z_NULL && len)
+        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
+    if (dictLength != Z_NULL)
+        *dictLength = len;
     return Z_OK;
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateReset (strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateResetKeep(z_streamp strm) {
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL ||
-        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) {
+    if (deflateStateCheck(strm)) {
         return Z_STREAM_ERROR;
     }
 
@@ -375,54 +650,110 @@ int ZEXPORT deflateReset (strm)
     if (s->wrap < 0) {
         s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
     }
-    s->status = s->wrap ? INIT_STATE : BUSY_STATE;
+    s->status =
+#ifdef GZIP
+        s->wrap == 2 ? GZIP_STATE :
+#endif
+        INIT_STATE;
     strm->adler =
 #ifdef GZIP
         s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
 #endif
         adler32(0L, Z_NULL, 0);
-    s->last_flush = Z_NO_FLUSH;
+    s->last_flush = -2;
 
     _tr_init(s);
-    lm_init(s);
 
     return Z_OK;
 }
 
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init(deflate_state *s) {
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset(z_streamp strm) {
+    int ret;
+
+    ret = deflateResetKeep(strm);
+    if (ret == Z_OK)
+        lm_init(strm->state);
+    return ret;
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateSetHeader (strm, head)
-    z_streamp strm;
-    gz_headerp head;
-{
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-    if (strm->state->wrap != 2) return Z_STREAM_ERROR;
+int ZEXPORT deflateSetHeader(z_streamp strm, gz_headerp head) {
+    if (deflateStateCheck(strm) || strm->state->wrap != 2)
+        return Z_STREAM_ERROR;
     strm->state->gzhead = head;
     return Z_OK;
 }
 
 /* ========================================================================= */
-int ZEXPORT deflatePrime (strm, bits, value)
-    z_streamp strm;
-    int bits;
-    int value;
-{
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-    strm->state->bi_valid = bits;
-    strm->state->bi_buf = (ush)(value & ((1 << bits) - 1));
+int ZEXPORT deflatePending(z_streamp strm, unsigned *pending, int *bits) {
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (pending != Z_NULL)
+        *pending = strm->state->pending;
+    if (bits != Z_NULL)
+        *bits = strm->state->bi_valid;
     return Z_OK;
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateParams(strm, level, strategy)
-    z_streamp strm;
-    int level;
-    int strategy;
-{
+int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) {
+    deflate_state *s;
+    int put;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+#ifdef LIT_MEM
+    if (bits < 0 || bits > 16 ||
+        (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+#else
+    if (bits < 0 || bits > 16 ||
+        s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+#endif
+    do {
+        put = Buf_size - s->bi_valid;
+        if (put > bits)
+            put = bits;
+        s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid);
+        s->bi_valid += put;
+        _tr_flush_bits(s);
+        value >>= put;
+        bits -= put;
+    } while (bits);
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) {
     deflate_state *s;
     compress_func func;
-    int err = Z_OK;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
 
 #ifdef FASTEST
@@ -435,11 +766,23 @@ int ZEXPORT deflateParams(strm, level, strategy)
     }
     func = configuration_table[s->level].func;
 
-    if (func != configuration_table[level].func && strm->total_in != 0) {
+    if ((strategy != s->strategy || func != configuration_table[level].func) &&
+        s->last_flush != -2) {
         /* Flush the last buffer: */
-        err = deflate(strm, Z_PARTIAL_FLUSH);
+        int err = deflate(strm, Z_BLOCK);
+        if (err == Z_STREAM_ERROR)
+            return err;
+        if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead)
+            return Z_BUF_ERROR;
     }
     if (s->level != level) {
+        if (s->level == 0 && s->matches != 0) {
+            if (s->matches == 1)
+                slide_hash(s);
+            else
+                CLEAR_HASH(s);
+            s->matches = 0;
+        }
         s->level = level;
         s->max_lazy_match   = configuration_table[level].max_lazy;
         s->good_match       = configuration_table[level].good_length;
@@ -447,67 +790,110 @@ int ZEXPORT deflateParams(strm, level, strategy)
         s->max_chain_length = configuration_table[level].max_chain;
     }
     s->strategy = strategy;
-    return err;
+    return Z_OK;
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
-    z_streamp strm;
-    int good_length;
-    int max_lazy;
-    int nice_length;
-    int max_chain;
-{
+int ZEXPORT deflateTune(z_streamp strm, int good_length, int max_lazy,
+                        int nice_length, int max_chain) {
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
-    s->good_match = good_length;
-    s->max_lazy_match = max_lazy;
+    s->good_match = (uInt)good_length;
+    s->max_lazy_match = (uInt)max_lazy;
     s->nice_match = nice_length;
-    s->max_chain_length = max_chain;
+    s->max_chain_length = (uInt)max_chain;
     return Z_OK;
 }
 
 /* =========================================================================
- * For the default windowBits of 15 and memLevel of 8, this function returns
- * a close to exact, as well as small, upper bound on the compressed size.
- * They are coded as constants here for a reason--if the #define's are
- * changed, then this function needs to be changed as well.  The return
- * value for 15 and 8 only works for those exact settings.
+ * For the default windowBits of 15 and memLevel of 8, this function returns a
+ * close to exact, as well as small, upper bound on the compressed size. This
+ * is an expansion of ~0.03%, plus a small constant.
  *
- * For any setting other than those defaults for windowBits and memLevel,
- * the value returned is a conservative worst case for the maximum expansion
- * resulting from using fixed blocks instead of stored blocks, which deflate
- * can emit on compressed data for some combinations of the parameters.
+ * For any setting other than those defaults for windowBits and memLevel, one
+ * of two worst case bounds is returned. This is at most an expansion of ~4% or
+ * ~13%, plus a small constant.
  *
- * This function could be more sophisticated to provide closer upper bounds
- * for every combination of windowBits and memLevel, as well as wrap.
- * But even the conservative upper bound of about 14% expansion does not
- * seem onerous for output buffer allocation.
+ * Both the 0.03% and 4% derive from the overhead of stored blocks. The first
+ * one is for stored blocks of 16383 bytes (memLevel == 8), whereas the second
+ * is for stored blocks of 127 bytes (the worst case memLevel == 1). The
+ * expansion results from five bytes of header for each stored block.
+ *
+ * The larger expansion of 13% results from a window size less than or equal to
+ * the symbols buffer size (windowBits <= memLevel + 7). In that case some of
+ * the data being compressed may have slid out of the sliding window, impeding
+ * a stored block from being emitted. Then the only choice is a fixed or
+ * dynamic block, where a fixed block limits the maximum expansion to 9 bits
+ * per 8-bit byte, plus 10 bits for every block. The smallest block size for
+ * which this can occur is 255 (memLevel == 2).
+ *
+ * Shifts are used to approximate divisions, for speed.
  */
-uLong ZEXPORT deflateBound(strm, sourceLen)
-    z_streamp strm;
-    uLong sourceLen;
-{
+uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) {
     deflate_state *s;
-    uLong destLen;
+    uLong fixedlen, storelen, wraplen;
 
-    /* conservative upper bound */
-    destLen = sourceLen +
-              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11;
+    /* upper bound for fixed blocks with 9-bit literals and length 255
+       (memLevel == 2, which is the lowest that may not use stored blocks) --
+       ~13% overhead plus a small constant */
+    fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) +
+               (sourceLen >> 9) + 4;
 
-    /* if can't get parameters, return conservative bound */
-    if (strm == Z_NULL || strm->state == Z_NULL)
-        return destLen;
+    /* upper bound for stored blocks with length 127 (memLevel == 1) --
+       ~4% overhead plus a small constant */
+    storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) +
+               (sourceLen >> 11) + 7;
 
-    /* if not default parameters, return conservative bound */
+    /* if can't get parameters, return larger bound plus a zlib wrapper */
+    if (deflateStateCheck(strm))
+        return (fixedlen > storelen ? fixedlen : storelen) + 6;
+
+    /* compute wrapper length */
     s = strm->state;
+    switch (s->wrap) {
+    case 0:                                 /* raw deflate */
+        wraplen = 0;
+        break;
+    case 1:                                 /* zlib wrapper */
+        wraplen = 6 + (s->strstart ? 4 : 0);
+        break;
+#ifdef GZIP
+    case 2:                                 /* gzip wrapper */
+        wraplen = 18;
+        if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
+            Bytef *str;
+            if (s->gzhead->extra != Z_NULL)
+                wraplen += 2 + s->gzhead->extra_len;
+            str = s->gzhead->name;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            str = s->gzhead->comment;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            if (s->gzhead->hcrc)
+                wraplen += 2;
+        }
+        break;
+#endif
+    default:                                /* for compiler happiness */
+        wraplen = 6;
+    }
+
+    /* if not default parameters, return one of the conservative bounds */
     if (s->w_bits != 15 || s->hash_bits != 8 + 7)
-        return destLen;
+        return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
+               wraplen;
 
-    /* default settings: return tight bound for that case */
-    return compressBound(sourceLen);
+    /* default settings: return tight bound for that case -- ~0.03% overhead
+       plus a small constant */
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13 - 6 + wraplen;
 }
 
 /* =========================================================================
@@ -515,271 +901,277 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
  * IN assertion: the stream state is correct and there is enough room in
  * pending_buf.
  */
-local void putShortMSB (s, b)
-    deflate_state *s;
-    uInt b;
-{
+local void putShortMSB(deflate_state *s, uInt b) {
     put_byte(s, (Byte)(b >> 8));
     put_byte(s, (Byte)(b & 0xff));
 }
 
 /* =========================================================================
- * Flush as much pending output as possible. All deflate() output goes
- * through this function so some applications may wish to modify it
- * to avoid allocating a large strm->next_out buffer and copying into it.
- * (See also read_buf()).
+ * Flush as much pending output as possible. All deflate() output, except for
+ * some deflate_stored() output, goes through this function so some
+ * applications may wish to modify it to avoid allocating a large
+ * strm->next_out buffer and copying into it. (See also read_buf()).
  */
-local void flush_pending(strm)
-    z_streamp strm;
-{
-    unsigned len = strm->state->pending;
+local void flush_pending(z_streamp strm) {
+    unsigned len;
+    deflate_state *s = strm->state;
 
+    _tr_flush_bits(s);
+    len = s->pending;
     if (len > strm->avail_out) len = strm->avail_out;
     if (len == 0) return;
 
-    zmemcpy(strm->next_out, strm->state->pending_out, len);
+    zmemcpy(strm->next_out, s->pending_out, len);
     strm->next_out  += len;
-    strm->state->pending_out  += len;
+    s->pending_out  += len;
     strm->total_out += len;
-    strm->avail_out  -= len;
-    strm->state->pending -= len;
-    if (strm->state->pending == 0) {
-        strm->state->pending_out = strm->state->pending_buf;
+    strm->avail_out -= len;
+    s->pending      -= len;
+    if (s->pending == 0) {
+        s->pending_out = s->pending_buf;
     }
 }
 
+/* ===========================================================================
+ * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
+ */
+#define HCRC_UPDATE(beg) \
+    do { \
+        if (s->gzhead->hcrc && s->pending > (beg)) \
+            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
+                                s->pending - (beg)); \
+    } while (0)
+
 /* ========================================================================= */
-int ZEXPORT deflate (strm, flush)
-    z_streamp strm;
-    int flush;
-{
+int ZEXPORT deflate(z_streamp strm, int flush) {
     int old_flush; /* value of flush param for previous deflate call */
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL ||
-        flush > Z_FINISH || flush < 0) {
+    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
         return Z_STREAM_ERROR;
     }
     s = strm->state;
 
     if (strm->next_out == Z_NULL ||
-        (strm->next_in == Z_NULL && strm->avail_in != 0) ||
+        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
         (s->status == FINISH_STATE && flush != Z_FINISH)) {
         ERR_RETURN(strm, Z_STREAM_ERROR);
     }
     if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
 
-    s->strm = strm; /* just in case */
     old_flush = s->last_flush;
     s->last_flush = flush;
 
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
     /* Write the header */
+    if (s->status == INIT_STATE && s->wrap == 0)
+        s->status = BUSY_STATE;
     if (s->status == INIT_STATE) {
-#ifdef GZIP
-        if (s->wrap == 2) {
-            strm->adler = crc32(0L, Z_NULL, 0);
-            put_byte(s, 31);
-            put_byte(s, 139);
-            put_byte(s, 8);
-            if (s->gzhead == NULL) {
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, s->level == 9 ? 2 :
-                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                             4 : 0));
-                put_byte(s, OS_CODE);
-                s->status = BUSY_STATE;
-            }
-            else {
-                put_byte(s, (s->gzhead->text ? 1 : 0) +
-                            (s->gzhead->hcrc ? 2 : 0) +
-                            (s->gzhead->extra == Z_NULL ? 0 : 4) +
-                            (s->gzhead->name == Z_NULL ? 0 : 8) +
-                            (s->gzhead->comment == Z_NULL ? 0 : 16)
-                        );
-                put_byte(s, (Byte)(s->gzhead->time & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
-                put_byte(s, s->level == 9 ? 2 :
-                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                             4 : 0));
-                put_byte(s, s->gzhead->os & 0xff);
-                if (s->gzhead->extra != NULL) {
-                    put_byte(s, s->gzhead->extra_len & 0xff);
-                    put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
-                }
-                if (s->gzhead->hcrc)
-                    strm->adler = crc32(strm->adler, s->pending_buf,
-                                        s->pending);
-                s->gzindex = 0;
-                s->status = EXTRA_STATE;
-            }
-        }
+        /* zlib header */
+        uInt header = (Z_DEFLATED + ((s->w_bits - 8) << 4)) << 8;
+        uInt level_flags;
+
+        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+            level_flags = 0;
+        else if (s->level < 6)
+            level_flags = 1;
+        else if (s->level == 6)
+            level_flags = 2;
         else
-#endif
-        {
-            uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
-            uInt level_flags;
-
-            if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
-                level_flags = 0;
-            else if (s->level < 6)
-                level_flags = 1;
-            else if (s->level == 6)
-                level_flags = 2;
-            else
-                level_flags = 3;
-            header |= (level_flags << 6);
-            if (s->strstart != 0) header |= PRESET_DICT;
-            header += 31 - (header % 31);
+            level_flags = 3;
+        header |= (level_flags << 6);
+        if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        putShortMSB(s, header);
+
+        /* Save the adler32 of the preset dictionary: */
+        if (s->strstart != 0) {
+            putShortMSB(s, (uInt)(strm->adler >> 16));
+            putShortMSB(s, (uInt)(strm->adler & 0xffff));
+        }
+        strm->adler = adler32(0L, Z_NULL, 0);
+        s->status = BUSY_STATE;
 
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#ifdef GZIP
+    if (s->status == GZIP_STATE) {
+        /* gzip header */
+        strm->adler = crc32(0L, Z_NULL, 0);
+        put_byte(s, 31);
+        put_byte(s, 139);
+        put_byte(s, 8);
+        if (s->gzhead == Z_NULL) {
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, OS_CODE);
             s->status = BUSY_STATE;
-            putShortMSB(s, header);
 
-            /* Save the adler32 of the preset dictionary: */
-            if (s->strstart != 0) {
-                putShortMSB(s, (uInt)(strm->adler >> 16));
-                putShortMSB(s, (uInt)(strm->adler & 0xffff));
+            /* Compression must start with an empty pending buffer */
+            flush_pending(strm);
+            if (s->pending != 0) {
+                s->last_flush = -1;
+                return Z_OK;
             }
-            strm->adler = adler32(0L, Z_NULL, 0);
+        }
+        else {
+            put_byte(s, (s->gzhead->text ? 1 : 0) +
+                     (s->gzhead->hcrc ? 2 : 0) +
+                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                     (s->gzhead->name == Z_NULL ? 0 : 8) +
+                     (s->gzhead->comment == Z_NULL ? 0 : 16)
+                     );
+            put_byte(s, (Byte)(s->gzhead->time & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, s->gzhead->os & 0xff);
+            if (s->gzhead->extra != Z_NULL) {
+                put_byte(s, s->gzhead->extra_len & 0xff);
+                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
+            }
+            if (s->gzhead->hcrc)
+                strm->adler = crc32(strm->adler, s->pending_buf,
+                                    s->pending);
+            s->gzindex = 0;
+            s->status = EXTRA_STATE;
         }
     }
-#ifdef GZIP
     if (s->status == EXTRA_STATE) {
-        if (s->gzhead->extra != NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
-
-            while (s->gzindex < (s->gzhead->extra_len & 0xffff)) {
-                if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
-                    flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size)
-                        break;
+        if (s->gzhead->extra != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
+            while (s->pending + left > s->pending_buf_size) {
+                uInt copy = s->pending_buf_size - s->pending;
+                zmemcpy(s->pending_buf + s->pending,
+                        s->gzhead->extra + s->gzindex, copy);
+                s->pending = s->pending_buf_size;
+                HCRC_UPDATE(beg);
+                s->gzindex += copy;
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
                 }
-                put_byte(s, s->gzhead->extra[s->gzindex]);
-                s->gzindex++;
-            }
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (s->gzindex == s->gzhead->extra_len) {
-                s->gzindex = 0;
-                s->status = NAME_STATE;
+                beg = 0;
+                left -= copy;
             }
+            zmemcpy(s->pending_buf + s->pending,
+                    s->gzhead->extra + s->gzindex, left);
+            s->pending += left;
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
         }
-        else
-            s->status = NAME_STATE;
+        s->status = NAME_STATE;
     }
     if (s->status == NAME_STATE) {
-        if (s->gzhead->name != NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
+        if (s->gzhead->name != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
             int val;
-
             do {
                 if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
+                    HCRC_UPDATE(beg);
                     flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size) {
-                        val = 1;
-                        break;
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
                     }
+                    beg = 0;
                 }
                 val = s->gzhead->name[s->gzindex++];
                 put_byte(s, val);
             } while (val != 0);
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (val == 0) {
-                s->gzindex = 0;
-                s->status = COMMENT_STATE;
-            }
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
         }
-        else
-            s->status = COMMENT_STATE;
+        s->status = COMMENT_STATE;
     }
     if (s->status == COMMENT_STATE) {
-        if (s->gzhead->comment != NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
+        if (s->gzhead->comment != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
             int val;
-
             do {
                 if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
+                    HCRC_UPDATE(beg);
                     flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size) {
-                        val = 1;
-                        break;
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
                     }
+                    beg = 0;
                 }
                 val = s->gzhead->comment[s->gzindex++];
                 put_byte(s, val);
             } while (val != 0);
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (val == 0)
-                s->status = HCRC_STATE;
+            HCRC_UPDATE(beg);
         }
-        else
-            s->status = HCRC_STATE;
+        s->status = HCRC_STATE;
     }
     if (s->status == HCRC_STATE) {
         if (s->gzhead->hcrc) {
-            if (s->pending + 2 > s->pending_buf_size)
+            if (s->pending + 2 > s->pending_buf_size) {
                 flush_pending(strm);
-            if (s->pending + 2 <= s->pending_buf_size) {
-                put_byte(s, (Byte)(strm->adler & 0xff));
-                put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-                strm->adler = crc32(0L, Z_NULL, 0);
-                s->status = BUSY_STATE;
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
             }
+            put_byte(s, (Byte)(strm->adler & 0xff));
+            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+            strm->adler = crc32(0L, Z_NULL, 0);
         }
-        else
-            s->status = BUSY_STATE;
-    }
-#endif
+        s->status = BUSY_STATE;
 
-    /* Flush as much pending output as possible */
-    if (s->pending != 0) {
+        /* Compression must start with an empty pending buffer */
         flush_pending(strm);
-        if (strm->avail_out == 0) {
-            /* Since avail_out is 0, deflate will be called again with
-             * more output space, but possibly with both pending and
-             * avail_in equal to zero. There won't be anything to do,
-             * but this is not an error situation so make sure we
-             * return OK instead of BUF_ERROR at next call of deflate:
-             */
+        if (s->pending != 0) {
             s->last_flush = -1;
             return Z_OK;
         }
-
-    /* Make sure there is something to do and avoid duplicate consecutive
-     * flushes. For repeated and useless calls with Z_FINISH, we keep
-     * returning Z_STREAM_END instead of Z_BUF_ERROR.
-     */
-    } else if (strm->avail_in == 0 && flush <= old_flush &&
-               flush != Z_FINISH) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
-    }
-
-    /* User must not provide more input after the first FINISH: */
-    if (s->status == FINISH_STATE && strm->avail_in != 0) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
     }
+#endif
 
     /* Start a new block or continue the current one.
      */
@@ -787,7 +1179,10 @@ int ZEXPORT deflate (strm, flush)
         (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
         block_state bstate;
 
-        bstate = (*(configuration_table[s->level].func))(s, flush);
+        bstate = s->level == 0 ? deflate_stored(s, flush) :
+                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
+                 (*(configuration_table[s->level].func))(s, flush);
 
         if (bstate == finish_started || bstate == finish_done) {
             s->status = FINISH_STATE;
@@ -808,13 +1203,18 @@ int ZEXPORT deflate (strm, flush)
         if (bstate == block_done) {
             if (flush == Z_PARTIAL_FLUSH) {
                 _tr_align(s);
-            } else { /* FULL_FLUSH or SYNC_FLUSH */
+            } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
                 _tr_stored_block(s, (char*)0, 0L, 0);
                 /* For a full flush, this empty block will be recognized
                  * as a special marker by inflate_sync().
                  */
                 if (flush == Z_FULL_FLUSH) {
                     CLEAR_HASH(s);             /* forget history */
+                    if (s->lookahead == 0) {
+                        s->strstart = 0;
+                        s->block_start = 0L;
+                        s->insert = 0;
+                    }
                 }
             }
             flush_pending(strm);
@@ -824,7 +1224,6 @@ int ZEXPORT deflate (strm, flush)
             }
         }
     }
-    Assert(strm->avail_out > 0, "bug2");
 
     if (flush != Z_FINISH) return Z_OK;
     if (s->wrap <= 0) return Z_STREAM_END;
@@ -856,23 +1255,12 @@ int ZEXPORT deflate (strm, flush)
 }
 
 /* ========================================================================= */
-int ZEXPORT deflateEnd (strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateEnd(z_streamp strm) {
     int status;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
 
     status = strm->state->status;
-    if (status != INIT_STATE &&
-        status != EXTRA_STATE &&
-        status != NAME_STATE &&
-        status != COMMENT_STATE &&
-        status != HCRC_STATE &&
-        status != BUSY_STATE &&
-        status != FINISH_STATE) {
-      return Z_STREAM_ERROR;
-    }
 
     /* Deallocate in reverse order of allocations: */
     TRY_FREE(strm, strm->state->pending_buf);
@@ -891,37 +1279,34 @@ int ZEXPORT deflateEnd (strm)
  * To simplify the source, this is not supported for 16-bit MSDOS (which
  * doesn't have enough memory anyway to duplicate compression states).
  */
-int ZEXPORT deflateCopy (dest, source)
-    z_streamp dest;
-    z_streamp source;
-{
+int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) {
 #ifdef MAXSEG_64K
+    (void)dest;
+    (void)source;
     return Z_STREAM_ERROR;
 #else
     deflate_state *ds;
     deflate_state *ss;
-    ushf *overlay;
 
 
-    if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
+    if (deflateStateCheck(source) || dest == Z_NULL) {
         return Z_STREAM_ERROR;
     }
 
     ss = source->state;
 
-    zmemcpy(dest, source, sizeof(z_stream));
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
 
     ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
     if (ds == Z_NULL) return Z_MEM_ERROR;
     dest->state = (struct internal_state FAR *) ds;
-    zmemcpy(ds, ss, sizeof(deflate_state));
+    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
     ds->strm = dest;
 
     ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
     ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
     ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
-    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
-    ds->pending_buf = (uchf *) overlay;
+    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, LIT_BUFS);
 
     if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
         ds->pending_buf == Z_NULL) {
@@ -930,13 +1315,17 @@ int ZEXPORT deflateCopy (dest, source)
     }
     /* following zmemcpy do not work for 16-bit MSDOS */
     zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-    zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
-    zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
-    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+    zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS);
 
     ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+#ifdef LIT_MEM
+    ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1));
+    ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
+#else
+    ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+#endif
 
     ds->l_desc.dyn_tree = ds->dyn_ltree;
     ds->d_desc.dyn_tree = ds->dyn_dtree;
@@ -946,70 +1335,6 @@ int ZEXPORT deflateCopy (dest, source)
 #endif /* MAXSEG_64K */
 }
 
-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read.  All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-local int read_buf(strm, buf, size)
-    z_streamp strm;
-    Bytef *buf;
-    unsigned size;
-{
-    unsigned len = strm->avail_in;
-
-    if (len > size) len = size;
-    if (len == 0) return 0;
-
-    strm->avail_in  -= len;
-
-    if (strm->state->wrap == 1) {
-        strm->adler = adler32(strm->adler, strm->next_in, len);
-    }
-#ifdef GZIP
-    else if (strm->state->wrap == 2) {
-        strm->adler = crc32(strm->adler, strm->next_in, len);
-    }
-#endif
-    zmemcpy(buf, strm->next_in, len);
-    strm->next_in  += len;
-    strm->total_in += len;
-
-    return (int)len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-local void lm_init (s)
-    deflate_state *s;
-{
-    s->window_size = (ulg)2L*s->w_size;
-
-    CLEAR_HASH(s);
-
-    /* Set the default configuration parameters:
-     */
-    s->max_lazy_match   = configuration_table[s->level].max_lazy;
-    s->good_match       = configuration_table[s->level].good_length;
-    s->nice_match       = configuration_table[s->level].nice_length;
-    s->max_chain_length = configuration_table[s->level].max_chain;
-
-    s->strstart = 0;
-    s->block_start = 0L;
-    s->lookahead = 0;
-    s->match_length = s->prev_length = MIN_MATCH-1;
-    s->match_available = 0;
-    s->ins_h = 0;
-#ifndef FASTEST
-#ifdef ASMV
-    match_init(); /* initialize the asm code */
-#endif
-#endif
-}
-
 #ifndef FASTEST
 /* ===========================================================================
  * Set match_start to the longest match starting at the given string and
@@ -1020,19 +1345,12 @@ local void lm_init (s)
  *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
  * OUT assertion: the match length is not greater than s->lookahead.
  */
-#ifndef ASMV
-/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
- * match.S. The code will be functionally equivalent.
- */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
     unsigned chain_length = s->max_chain_length;/* max hash chain length */
     register Bytef *scan = s->window + s->strstart; /* current string */
-    register Bytef *match;                       /* matched string */
+    register Bytef *match;                      /* matched string */
     register int len;                           /* length of current match */
-    int best_len = s->prev_length;              /* best match length so far */
+    int best_len = (int)s->prev_length;         /* best match length so far */
     int nice_match = s->nice_match;             /* stop if match long enough */
     IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
         s->strstart - (IPos)MAX_DIST(s) : NIL;
@@ -1048,10 +1366,10 @@ local uInt longest_match(s, cur_match)
      */
     register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
     register ush scan_start = *(ushf*)scan;
-    register ush scan_end   = *(ushf*)(scan+best_len-1);
+    register ush scan_end   = *(ushf*)(scan + best_len - 1);
 #else
     register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end1  = scan[best_len - 1];
     register Byte scan_end   = scan[best_len];
 #endif
 
@@ -1067,9 +1385,10 @@ local uInt longest_match(s, cur_match)
     /* Do not look for matches beyond the end of the input. This is necessary
      * to make deflate deterministic.
      */
-    if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
 
-    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "need lookahead");
 
     do {
         Assert(cur_match < s->strstart, "no future");
@@ -1087,43 +1406,44 @@ local uInt longest_match(s, cur_match)
         /* This code assumes sizeof(unsigned short) == 2. Do not use
          * UNALIGNED_OK if your compiler uses a different size.
          */
-        if (*(ushf*)(match+best_len-1) != scan_end ||
+        if (*(ushf*)(match + best_len - 1) != scan_end ||
             *(ushf*)match != scan_start) continue;
 
         /* It is not necessary to compare scan[2] and match[2] since they are
          * always equal when the other bytes match, given that the hash keys
          * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
-         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * strstart + 3, + 5, up to strstart + 257. We check for insufficient
          * lookahead only every 4th comparison; the 128th check will be made
-         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * at strstart + 257. If MAX_MATCH-2 is not a multiple of 8, it is
          * necessary to put more guard bytes at the end of the window, or
          * to check more often for insufficient lookahead.
          */
         Assert(scan[2] == match[2], "scan[2]?");
         scan++, match++;
         do {
-        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
-                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+        } while (*(ushf*)(scan += 2) == *(ushf*)(match += 2) &&
+                 *(ushf*)(scan += 2) == *(ushf*)(match += 2) &&
+                 *(ushf*)(scan += 2) == *(ushf*)(match += 2) &&
+                 *(ushf*)(scan += 2) == *(ushf*)(match += 2) &&
                  scan < strend);
         /* The funny "do {}" generates better code on most compilers */
 
-        /* Here, scan <= window+strstart+257 */
-        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        /* Here, scan <= window + strstart + 257 */
+        Assert(scan <= s->window + (unsigned)(s->window_size - 1),
+               "wild scan");
         if (*scan == *match) scan++;
 
-        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        len = (MAX_MATCH - 1) - (int)(strend - scan);
         scan = strend - (MAX_MATCH-1);
 
 #else /* UNALIGNED_OK */
 
-        if (match[best_len]   != scan_end  ||
-            match[best_len-1] != scan_end1 ||
-            *match            != *scan     ||
-            *++match          != scan[1])      continue;
+        if (match[best_len]     != scan_end  ||
+            match[best_len - 1] != scan_end1 ||
+            *match              != *scan     ||
+            *++match            != scan[1])      continue;
 
-        /* The check at best_len-1 can be removed because it will be made
+        /* The check at best_len - 1 can be removed because it will be made
          * again later. (This heuristic is not always a win.)
          * It is not necessary to compare scan[2] and match[2] since they
          * are always equal when the other bytes match, given that
@@ -1133,7 +1453,7 @@ local uInt longest_match(s, cur_match)
         Assert(*scan == *match, "match[2]?");
 
         /* We check for insufficient lookahead only every 8th comparison;
-         * the 256th check will be made at strstart+258.
+         * the 256th check will be made at strstart + 258.
          */
         do {
         } while (*++scan == *++match && *++scan == *++match &&
@@ -1142,7 +1462,8 @@ local uInt longest_match(s, cur_match)
                  *++scan == *++match && *++scan == *++match &&
                  scan < strend);
 
-        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        Assert(scan <= s->window + (unsigned)(s->window_size - 1),
+               "wild scan");
 
         len = MAX_MATCH - (int)(strend - scan);
         scan = strend - MAX_MATCH;
@@ -1154,9 +1475,9 @@ local uInt longest_match(s, cur_match)
             best_len = len;
             if (len >= nice_match) break;
 #ifdef UNALIGNED_OK
-            scan_end = *(ushf*)(scan+best_len-1);
+            scan_end = *(ushf*)(scan + best_len - 1);
 #else
-            scan_end1  = scan[best_len-1];
+            scan_end1  = scan[best_len - 1];
             scan_end   = scan[best_len];
 #endif
         }
@@ -1166,16 +1487,13 @@ local uInt longest_match(s, cur_match)
     if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
     return s->lookahead;
 }
-#endif /* ASMV */
-#endif /* FASTEST */
+
+#else /* FASTEST */
 
 /* ---------------------------------------------------------------------------
- * Optimized version for level == 1 or strategy == Z_RLE only
+ * Optimized version for FASTEST only
  */
-local uInt longest_match_fast(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
     register Bytef *scan = s->window + s->strstart; /* current string */
     register Bytef *match;                       /* matched string */
     register int len;                           /* length of current match */
@@ -1186,7 +1504,8 @@ local uInt longest_match_fast(s, cur_match)
      */
     Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
 
-    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "need lookahead");
 
     Assert(cur_match < s->strstart, "no future");
 
@@ -1196,7 +1515,7 @@ local uInt longest_match_fast(s, cur_match)
      */
     if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
 
-    /* The check at best_len-1 can be removed because it will be made
+    /* The check at best_len - 1 can be removed because it will be made
      * again later. (This heuristic is not always a win.)
      * It is not necessary to compare scan[2] and match[2] since they
      * are always equal when the other bytes match, given that
@@ -1206,7 +1525,7 @@ local uInt longest_match_fast(s, cur_match)
     Assert(*scan == *match, "match[2]?");
 
     /* We check for insufficient lookahead only every 8th comparison;
-     * the 256th check will be made at strstart+258.
+     * the 256th check will be made at strstart + 258.
      */
     do {
     } while (*++scan == *++match && *++scan == *++match &&
@@ -1215,7 +1534,7 @@ local uInt longest_match_fast(s, cur_match)
              *++scan == *++match && *++scan == *++match &&
              scan < strend);
 
-    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+    Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan");
 
     len = MAX_MATCH - (int)(strend - scan);
 
@@ -1225,217 +1544,261 @@ local uInt longest_match_fast(s, cur_match)
     return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
 }
 
-#ifdef DEBUG
+#endif /* FASTEST */
+
+#ifdef ZLIB_DEBUG
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
 /* ===========================================================================
  * Check that the match at match_start is indeed a match.
  */
-local void check_match(s, start, match, length)
-    deflate_state *s;
-    IPos start, match;
-    int length;
-{
+local void check_match(deflate_state *s, IPos start, IPos match, int length) {
     /* check that the match is indeed a match */
-    if (zmemcmp(s->window + match,
-                s->window + start, length) != EQUAL) {
-        fprintf(stderr, " start %u, match %u, length %d\n",
-                start, match, length);
+    Bytef *back = s->window + (int)match, *here = s->window + start;
+    IPos len = length;
+    if (match == (IPos)-1) {
+        /* match starts one byte before the current window -- just compare the
+           subsequent length-1 bytes */
+        back++;
+        here++;
+        len--;
+    }
+    if (zmemcmp(back, here, len) != EQUAL) {
+        fprintf(stderr, " start %u, match %d, length %d\n",
+                start, (int)match, length);
         do {
-            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
-        } while (--length != 0);
+            fprintf(stderr, "(%02x %02x)", *back++, *here++);
+        } while (--len != 0);
         z_error("invalid match");
     }
     if (z_verbose > 1) {
-        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        fprintf(stderr,"\\[%d,%d]", start - match, length);
         do { putc(s->window[start++], stderr); } while (--length != 0);
     }
 }
 #else
 #  define check_match(s, start, match, length)
-#endif /* DEBUG */
-
-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- *    At least one byte has been read, or avail_in == 0; reads are
- *    performed for at least two bytes (required for the zip translate_eol
- *    option -- not supported here).
- */
-local void fill_window(s)
-    deflate_state *s;
-{
-    register unsigned n, m;
-    register Posf *p;
-    unsigned more;    /* Amount of free space at the end of the window. */
-    uInt wsize = s->w_size;
-
-    do {
-        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
-        /* Deal with !@#$% 64K limit: */
-        if (sizeof(int) <= 2) {
-            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-                more = wsize;
-
-            } else if (more == (unsigned)(-1)) {
-                /* Very unlikely, but possible on 16 bit machine if
-                 * strstart == 0 && lookahead == 1 (input done a byte at time)
-                 */
-                more--;
-            }
-        }
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize+MAX_DIST(s)) {
-
-            zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
-            s->match_start -= wsize;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= (long) wsize;
-
-            /* Slide the hash table (could be avoided with 32 bit values
-               at the expense of memory usage). We slide even when level == 0
-               to keep the hash table consistent if we switch back to level > 0
-               later. (Using level 0 permanently is not an optimal usage of
-               zlib, so we don't care about this pathological case.)
-             */
-            /* %%% avoid this when Z_RLE */
-            n = s->hash_size;
-            p = &s->head[n];
-            do {
-                m = *--p;
-                *p = (Pos)(m >= wsize ? m-wsize : NIL);
-            } while (--n);
-
-            n = wsize;
-#ifndef FASTEST
-            p = &s->prev[n];
-            do {
-                m = *--p;
-                *p = (Pos)(m >= wsize ? m-wsize : NIL);
-                /* If n is not on any hash chain, prev[n] is garbage but
-                 * its value will never be used.
-                 */
-            } while (--n);
-#endif
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0) return;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead >= MIN_MATCH) {
-            s->ins_h = s->window[s->strstart];
-            UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
-#if MIN_MATCH != 3
-            Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-}
+#endif /* ZLIB_DEBUG */
 
 /* ===========================================================================
  * Flush the current block, with given end-of-file flag.
  * IN assertion: strstart is set to the end of the current match.
  */
-#define FLUSH_BLOCK_ONLY(s, eof) { \
+#define FLUSH_BLOCK_ONLY(s, last) { \
    _tr_flush_block(s, (s->block_start >= 0L ? \
                    (charf *)&s->window[(unsigned)s->block_start] : \
                    (charf *)Z_NULL), \
                 (ulg)((long)s->strstart - s->block_start), \
-                (eof)); \
+                (last)); \
    s->block_start = s->strstart; \
    flush_pending(s->strm); \
    Tracev((stderr,"[FLUSH]")); \
 }
 
 /* Same but force premature exit if necessary. */
-#define FLUSH_BLOCK(s, eof) { \
-   FLUSH_BLOCK_ONLY(s, eof); \
-   if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
+#define FLUSH_BLOCK(s, last) { \
+   FLUSH_BLOCK_ONLY(s, last); \
+   if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
 }
 
+/* Maximum stored block length in deflate format (not including header). */
+#define MAX_STORED 65535
+
+/* Minimum of a and b. */
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
 /* ===========================================================================
  * Copy without compression as much as possible from the input stream, return
  * the current block state.
- * This function does not insert new strings in the dictionary since
- * uncompressible data is probably not useful. This function is used
- * only for the level=0 compression option.
- * NOTE: this function should be optimized to avoid extra copying from
- * window to pending_buf.
+ *
+ * In case deflateParams() is used to later switch to a non-zero compression
+ * level, s->matches (otherwise unused when storing) keeps track of the number
+ * of hash table slides to perform. If s->matches is 1, then one hash table
+ * slide will be done when switching. If s->matches is 2, the maximum value
+ * allowed here, then the hash table will be cleared, since two or more slides
+ * is the same as a clear.
+ *
+ * deflate_stored() is written to minimize the number of times an input byte is
+ * copied. It is most efficient with large input and output buffers, which
+ * maximizes the opportunities to have a single copy from next_in to next_out.
  */
-local block_state deflate_stored(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
-     * to pending_buf_size, and each stored block has a 5 byte header:
+local block_state deflate_stored(deflate_state *s, int flush) {
+    /* Smallest worthy block size when not flushing or finishing. By default
+     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
+     * large input and output buffers, the stored block size will be larger.
      */
-    ulg max_block_size = 0xffff;
-    ulg max_start;
-
-    if (max_block_size > s->pending_buf_size - 5) {
-        max_block_size = s->pending_buf_size - 5;
-    }
+    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
 
-    /* Copy as much as possible from input to output: */
-    for (;;) {
-        /* Fill the window as much as possible: */
-        if (s->lookahead <= 1) {
-
-            Assert(s->strstart < s->w_size+MAX_DIST(s) ||
-                   s->block_start >= (long)s->w_size, "slide too late");
+    /* Copy as many min_block or larger stored blocks directly to next_out as
+     * possible. If flushing, copy the remaining available input to next_out as
+     * stored blocks, if there is enough space.
+     */
+    unsigned len, left, have, last = 0;
+    unsigned used = s->strm->avail_in;
+    do {
+        /* Set len to the maximum size block that we can copy directly with the
+         * available input data and output space. Set left to how much of that
+         * would be copied from what's left in the window.
+         */
+        len = MAX_STORED;       /* maximum deflate stored block length */
+        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        if (s->strm->avail_out < have)          /* need room for header */
+            break;
+            /* maximum stored block length that will fit in avail_out: */
+        have = s->strm->avail_out - have;
+        left = s->strstart - s->block_start;    /* bytes left in window */
+        if (len > (ulg)left + s->strm->avail_in)
+            len = left + s->strm->avail_in;     /* limit len to the input */
+        if (len > have)
+            len = have;                         /* limit len to the output */
+
+        /* If the stored block would be less than min_block in length, or if
+         * unable to copy all of the available input when flushing, then try
+         * copying to the window and the pending buffer instead. Also don't
+         * write an empty block when flushing -- deflate() does that.
+         */
+        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
+                                flush == Z_NO_FLUSH ||
+                                len != left + s->strm->avail_in))
+            break;
 
-            fill_window(s);
-            if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+        /* Make a dummy stored block in pending to get the header bytes,
+         * including any pending bits. This also updates the debugging counts.
+         */
+        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
+        _tr_stored_block(s, (char *)0, 0L, last);
+
+        /* Replace the lengths in the dummy stored block with len. */
+        s->pending_buf[s->pending - 4] = len;
+        s->pending_buf[s->pending - 3] = len >> 8;
+        s->pending_buf[s->pending - 2] = ~len;
+        s->pending_buf[s->pending - 1] = ~len >> 8;
+
+        /* Write the stored block header bytes. */
+        flush_pending(s->strm);
+
+#ifdef ZLIB_DEBUG
+        /* Update debugging counts for the data about to be copied. */
+        s->compressed_len += len << 3;
+        s->bits_sent += len << 3;
+#endif
 
-            if (s->lookahead == 0) break; /* flush the current block */
+        /* Copy uncompressed bytes from the window to next_out. */
+        if (left) {
+            if (left > len)
+                left = len;
+            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
+            s->strm->next_out += left;
+            s->strm->avail_out -= left;
+            s->strm->total_out += left;
+            s->block_start += left;
+            len -= left;
         }
-        Assert(s->block_start >= 0L, "block gone");
-
-        s->strstart += s->lookahead;
-        s->lookahead = 0;
-
-        /* Emit a stored block if pending_buf will be full: */
-        max_start = s->block_start + max_block_size;
-        if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
-            /* strstart == 0 is possible when wraparound on 16-bit machine */
-            s->lookahead = (uInt)(s->strstart - max_start);
-            s->strstart = (uInt)max_start;
-            FLUSH_BLOCK(s, 0);
+
+        /* Copy uncompressed bytes directly from next_in to next_out, updating
+         * the check value.
+         */
+        if (len) {
+            read_buf(s->strm, s->strm->next_out, len);
+            s->strm->next_out += len;
+            s->strm->avail_out -= len;
+            s->strm->total_out += len;
         }
-        /* Flush if we may have to slide, otherwise block_start may become
-         * negative and the data will be gone:
+    } while (last == 0);
+
+    /* Update the sliding window with the last s->w_size bytes of the copied
+     * data, or append all of the copied data to the existing window if less
+     * than s->w_size bytes were copied. Also update the number of bytes to
+     * insert in the hash tables, in the event that deflateParams() switches to
+     * a non-zero compression level.
+     */
+    used -= s->strm->avail_in;      /* number of input bytes directly copied */
+    if (used) {
+        /* If any input was used, then no unused input remains in the window,
+         * therefore s->block_start == s->strstart.
          */
-        if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
-            FLUSH_BLOCK(s, 0);
+        if (used >= s->w_size) {    /* supplant the previous history */
+            s->matches = 2;         /* clear hash */
+            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
+            s->strstart = s->w_size;
+            s->insert = s->strstart;
+        }
+        else {
+            if (s->window_size - s->strstart <= used) {
+                /* Slide the window down. */
+                s->strstart -= s->w_size;
+                zmemcpy(s->window, s->window + s->w_size, s->strstart);
+                if (s->matches < 2)
+                    s->matches++;   /* add a pending slide_hash() */
+                if (s->insert > s->strstart)
+                    s->insert = s->strstart;
+            }
+            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
+            s->strstart += used;
+            s->insert += MIN(used, s->w_size - s->insert);
         }
+        s->block_start = s->strstart;
     }
-    FLUSH_BLOCK(s, flush == Z_FINISH);
-    return flush == Z_FINISH ? finish_done : block_done;
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* If the last block was written to next_out, then done. */
+    if (last)
+        return finish_done;
+
+    /* If flushing and all input has been consumed, then done. */
+    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
+        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
+        return block_done;
+
+    /* Fill the window with any remaining input. */
+    have = s->window_size - s->strstart;
+    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
+        /* Slide the window down. */
+        s->block_start -= s->w_size;
+        s->strstart -= s->w_size;
+        zmemcpy(s->window, s->window + s->w_size, s->strstart);
+        if (s->matches < 2)
+            s->matches++;           /* add a pending slide_hash() */
+        have += s->w_size;          /* more space now */
+        if (s->insert > s->strstart)
+            s->insert = s->strstart;
+    }
+    if (have > s->strm->avail_in)
+        have = s->strm->avail_in;
+    if (have) {
+        read_buf(s->strm, s->window + s->strstart, have);
+        s->strstart += have;
+        s->insert += MIN(have, s->w_size - s->insert);
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* There was not enough avail_out to write a complete worthy or flushed
+     * stored block to next_out. Write a stored block to pending instead, if we
+     * have enough input for a worthy block, or if flushing and there is enough
+     * room for the remaining input as a stored block in the pending buffer.
+     */
+    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        /* maximum stored block length that will fit in pending: */
+    have = MIN(s->pending_buf_size - have, MAX_STORED);
+    min_block = MIN(have, s->w_size);
+    left = s->strstart - s->block_start;
+    if (left >= min_block ||
+        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
+         s->strm->avail_in == 0 && left <= have)) {
+        len = MIN(left, have);
+        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
+               len == left ? 1 : 0;
+        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
+        s->block_start += len;
+        flush_pending(s->strm);
+    }
+
+    /* We've done all we can with the available input and output. */
+    return last ? finish_started : need_more;
 }
 
 /* ===========================================================================
@@ -1445,11 +1808,8 @@ local block_state deflate_stored(s, flush)
  * new strings in the dictionary only for unmatched strings or for short
  * matches. It is used only for the fast compression options.
  */
-local block_state deflate_fast(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    IPos hash_head = NIL; /* head of the hash chain */
+local block_state deflate_fast(deflate_state *s, int flush) {
+    IPos hash_head;       /* head of the hash chain */
     int bflush;           /* set if current block must be flushed */
 
     for (;;) {
@@ -1466,9 +1826,10 @@ local block_state deflate_fast(s, flush)
             if (s->lookahead == 0) break; /* flush the current block */
         }
 
-        /* Insert the string window[strstart .. strstart+2] in the
+        /* Insert the string window[strstart .. strstart + 2] in the
          * dictionary, and set hash_head to the head of the hash chain:
          */
+        hash_head = NIL;
         if (s->lookahead >= MIN_MATCH) {
             INSERT_STRING(s, s->strstart, hash_head);
         }
@@ -1481,19 +1842,8 @@ local block_state deflate_fast(s, flush)
              * of window index 0 (in particular we have to avoid a match
              * of the string with itself at the start of the input file).
              */
-#ifdef FASTEST
-            if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) ||
-                (s->strategy == Z_RLE && s->strstart - hash_head == 1)) {
-                s->match_length = longest_match_fast (s, hash_head);
-            }
-#else
-            if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
-                s->match_length = longest_match (s, hash_head);
-            } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
-                s->match_length = longest_match_fast (s, hash_head);
-            }
-#endif
-            /* longest_match() or longest_match_fast() sets match_start */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
         }
         if (s->match_length >= MIN_MATCH) {
             check_match(s, s->strstart, s->match_start, s->match_length);
@@ -1524,7 +1874,7 @@ local block_state deflate_fast(s, flush)
                 s->strstart += s->match_length;
                 s->match_length = 0;
                 s->ins_h = s->window[s->strstart];
-                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+                UPDATE_HASH(s, s->ins_h, s->window[s->strstart + 1]);
 #if MIN_MATCH != 3
                 Call UPDATE_HASH() MIN_MATCH-3 more times
 #endif
@@ -1535,14 +1885,20 @@ local block_state deflate_fast(s, flush)
         } else {
             /* No match, output a literal byte */
             Tracevv((stderr,"%c", s->window[s->strstart]));
-            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            _tr_tally_lit(s, s->window[s->strstart], bflush);
             s->lookahead--;
             s->strstart++;
         }
         if (bflush) FLUSH_BLOCK(s, 0);
     }
-    FLUSH_BLOCK(s, flush == Z_FINISH);
-    return flush == Z_FINISH ? finish_done : block_done;
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
 
 #ifndef FASTEST
@@ -1551,11 +1907,8 @@ local block_state deflate_fast(s, flush)
  * evaluation for matches: a match is finally adopted only if there is
  * no better match at the next window position.
  */
-local block_state deflate_slow(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    IPos hash_head = NIL;    /* head of hash chain */
+local block_state deflate_slow(deflate_state *s, int flush) {
+    IPos hash_head;          /* head of hash chain */
     int bflush;              /* set if current block must be flushed */
 
     /* Process the input block. */
@@ -1573,9 +1926,10 @@ local block_state deflate_slow(s, flush)
             if (s->lookahead == 0) break; /* flush the current block */
         }
 
-        /* Insert the string window[strstart .. strstart+2] in the
+        /* Insert the string window[strstart .. strstart + 2] in the
          * dictionary, and set hash_head to the head of the hash chain:
          */
+        hash_head = NIL;
         if (s->lookahead >= MIN_MATCH) {
             INSERT_STRING(s, s->strstart, hash_head);
         }
@@ -1591,12 +1945,8 @@ local block_state deflate_slow(s, flush)
              * of window index 0 (in particular we have to avoid a match
              * of the string with itself at the start of the input file).
              */
-            if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
-                s->match_length = longest_match (s, hash_head);
-            } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
-                s->match_length = longest_match_fast (s, hash_head);
-            }
-            /* longest_match() or longest_match_fast() sets match_start */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
 
             if (s->match_length <= 5 && (s->strategy == Z_FILTERED
 #if TOO_FAR <= 32767
@@ -1618,17 +1968,17 @@ local block_state deflate_slow(s, flush)
             uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
             /* Do not insert strings in hash table beyond this. */
 
-            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+            check_match(s, s->strstart - 1, s->prev_match, s->prev_length);
 
-            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+            _tr_tally_dist(s, s->strstart - 1 - s->prev_match,
                            s->prev_length - MIN_MATCH, bflush);
 
             /* Insert in hash table all strings up to the end of the match.
-             * strstart-1 and strstart are already inserted. If there is not
+             * strstart - 1 and strstart are already inserted. If there is not
              * enough lookahead, the last two strings are not inserted in
              * the hash table.
              */
-            s->lookahead -= s->prev_length-1;
+            s->lookahead -= s->prev_length - 1;
             s->prev_length -= 2;
             do {
                 if (++s->strstart <= max_insert) {
@@ -1646,8 +1996,8 @@ local block_state deflate_slow(s, flush)
              * single literal. If there was a match but the current match
              * is longer, truncate the previous match to a single literal.
              */
-            Tracevv((stderr,"%c", s->window[s->strstart-1]));
-            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+            Tracevv((stderr,"%c", s->window[s->strstart - 1]));
+            _tr_tally_lit(s, s->window[s->strstart - 1], bflush);
             if (bflush) {
                 FLUSH_BLOCK_ONLY(s, 0);
             }
@@ -1665,72 +2015,125 @@ local block_state deflate_slow(s, flush)
     }
     Assert (flush != Z_NO_FLUSH, "no flush?");
     if (s->match_available) {
-        Tracevv((stderr,"%c", s->window[s->strstart-1]));
-        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        Tracevv((stderr,"%c", s->window[s->strstart - 1]));
+        _tr_tally_lit(s, s->window[s->strstart - 1], bflush);
         s->match_available = 0;
     }
-    FLUSH_BLOCK(s, flush == Z_FINISH);
-    return flush == Z_FINISH ? finish_done : block_done;
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
 #endif /* FASTEST */
 
-#if 0
 /* ===========================================================================
  * For Z_RLE, simply look for runs of bytes, generate matches only of distance
  * one.  Do not maintain a hash table.  (It will be regenerated if this run of
  * deflate switches away from Z_RLE.)
  */
-local block_state deflate_rle(s, flush)
-    deflate_state *s;
-    int flush;
-{
-    int bflush;         /* set if current block must be flushed */
-    uInt run;           /* length of run */
-    uInt max;           /* maximum length of run */
-    uInt prev;          /* byte at distance one to match */
-    Bytef *scan;        /* scan for end of run */
+local block_state deflate_rle(deflate_state *s, int flush) {
+    int bflush;             /* set if current block must be flushed */
+    uInt prev;              /* byte at distance one to match */
+    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
 
     for (;;) {
         /* Make sure that we always have enough lookahead, except
          * at the end of the input file. We need MAX_MATCH bytes
-         * for the longest encodable run.
+         * for the longest run, plus one for the unrolled loop.
          */
-        if (s->lookahead < MAX_MATCH) {
+        if (s->lookahead <= MAX_MATCH) {
             fill_window(s);
-            if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) {
+            if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
                 return need_more;
             }
             if (s->lookahead == 0) break; /* flush the current block */
         }
 
         /* See how many times the previous byte repeats */
-        run = 0;
-        if (s->strstart > 0) {      /* if there is a previous byte, that is */
-            max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH;
+        s->match_length = 0;
+        if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
             scan = s->window + s->strstart - 1;
-            prev = *scan++;
-            do {
-                if (*scan++ != prev)
-                    break;
-            } while (++run < max);
+            prev = *scan;
+            if (prev == *++scan && prev == *++scan && prev == *++scan) {
+                strend = s->window + s->strstart + MAX_MATCH;
+                do {
+                } while (prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         scan < strend);
+                s->match_length = MAX_MATCH - (uInt)(strend - scan);
+                if (s->match_length > s->lookahead)
+                    s->match_length = s->lookahead;
+            }
+            Assert(scan <= s->window + (uInt)(s->window_size - 1),
+                   "wild scan");
         }
 
         /* Emit match if have run of MIN_MATCH or longer, else emit literal */
-        if (run >= MIN_MATCH) {
-            check_match(s, s->strstart, s->strstart - 1, run);
-            _tr_tally_dist(s, 1, run - MIN_MATCH, bflush);
-            s->lookahead -= run;
-            s->strstart += run;
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->strstart - 1, s->match_length);
+
+            _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+            s->strstart += s->match_length;
+            s->match_length = 0;
         } else {
             /* No match, output a literal byte */
             Tracevv((stderr,"%c", s->window[s->strstart]));
-            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            _tr_tally_lit(s, s->window[s->strstart], bflush);
             s->lookahead--;
             s->strstart++;
         }
         if (bflush) FLUSH_BLOCK(s, 0);
     }
-    FLUSH_BLOCK(s, flush == Z_FINISH);
-    return flush == Z_FINISH ? finish_done : block_done;
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+
+/* ===========================================================================
+ * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
+ * (It will be regenerated if this run of deflate switches away from Huffman.)
+ */
+local block_state deflate_huff(deflate_state *s, int flush) {
+    int bflush;             /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we have a literal to write. */
+        if (s->lookahead == 0) {
+            fill_window(s);
+            if (s->lookahead == 0) {
+                if (flush == Z_NO_FLUSH)
+                    return need_more;
+                break;      /* flush the current block */
+            }
+        }
+
+        /* Output a literal byte */
+        s->match_length = 0;
+        Tracevv((stderr,"%c", s->window[s->strstart]));
+        _tr_tally_lit(s, s->window[s->strstart], bflush);
+        s->lookahead--;
+        s->strstart++;
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
-#endif
diff --git a/reg-io/zlib/deflate.h b/reg-io/zlib/deflate.h
index 44e7a4a0..300c6ada 100644
--- a/reg-io/zlib/deflate.h
+++ b/reg-io/zlib/deflate.h
@@ -1,5 +1,5 @@
 /* deflate.h -- internal compression state
- * Copyright (C) 1995-2004 Jean-loup Gailly
+ * Copyright (C) 1995-2024 Jean-loup Gailly
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -23,6 +23,10 @@
 #  define GZIP
 #endif
 
+/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
+   the cost of a larger memory footprint */
+/* #define LIT_MEM */
+
 /* ===========================================================================
  * Internal compression state.
  */
@@ -48,29 +52,32 @@
 #define MAX_BITS 15
 /* All codes must not exceed MAX_BITS bits */
 
-#define INIT_STATE    42
-#define EXTRA_STATE   69
-#define NAME_STATE    73
-#define COMMENT_STATE 91
-#define HCRC_STATE   103
-#define BUSY_STATE   113
-#define FINISH_STATE 666
+#define Buf_size 16
+/* size of bit buffer in bi_buf */
+
+#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
+#ifdef GZIP
+#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
+#endif
+#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
+#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
+#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
+#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
+#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
+#define FINISH_STATE 666    /* stream complete */
 /* Stream status */
 
 
 /* Data structure describing a single value and its code string. */
-typedef struct ct_data_s
-{
-   union
-   {
-      ush  freq;       /* frequency count */
-      ush  code;       /* bit string */
-   } fc;
-   union
-   {
-      ush  dad;        /* father node in Huffman tree */
-      ush  len;        /* length of bit string */
-   } dl;
+typedef struct ct_data_s {
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
 } FAR ct_data;
 
 #define Freq fc.freq
@@ -80,11 +87,10 @@ typedef struct ct_data_s
 
 typedef struct static_tree_desc_s  static_tree_desc;
 
-typedef struct tree_desc_s
-{
-   ct_data *dyn_tree;           /* the dynamic tree */
-   int     max_code;            /* largest code with non zero frequency */
-   static_tree_desc *stat_desc; /* the corresponding static tree */
+typedef struct tree_desc_s {
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    const static_tree_desc *stat_desc;  /* the corresponding static tree */
 } FAR tree_desc;
 
 typedef ush Pos;
@@ -95,182 +101,190 @@ typedef unsigned IPos;
  * save space in the various tables. IPos is used only for parameter passing.
  */
 
-typedef struct internal_state
-{
-   z_streamp strm;      /* pointer back to this zlib stream */
-   int   status;        /* as the name implies */
-   Bytef *pending_buf;  /* output still pending */
-   ulg   pending_buf_size; /* size of pending_buf */
-   Bytef *pending_out;  /* next pending byte to output to the stream */
-   uInt   pending;      /* nb of bytes in the pending buffer */
-   int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
-   gz_headerp  gzhead;  /* gzip header information to write */
-   uInt   gzindex;      /* where in extra, name, or comment */
-   Byte  method;        /* STORED (for zip only) or DEFLATED */
-   int   last_flush;    /* value of flush param for previous deflate call */
-
-   /* used by deflate.c: */
-
-   uInt  w_size;        /* LZ77 window size (32K by default) */
-   uInt  w_bits;        /* log2(w_size)  (8..16) */
-   uInt  w_mask;        /* w_size - 1 */
-
-   Bytef *window;
-   /* Sliding window. Input bytes are read into the second half of the window,
-    * and move to the first half later to keep a dictionary of at least wSize
-    * bytes. With this organization, matches are limited to a distance of
-    * wSize-MAX_MATCH bytes, but this ensures that IO is always
-    * performed with a length multiple of the block size. Also, it limits
-    * the window size to 64K, which is quite useful on MSDOS.
-    * To do: use the user input buffer as sliding window.
-    */
-
-   ulg window_size;
-   /* Actual size of window: 2*wSize, except when the user input buffer
-    * is directly used as sliding window.
-    */
-
-   Posf *prev;
-   /* Link to older string with same hash index. To limit the size of this
-    * array to 64K, this link is maintained only for the last 32K strings.
-    * An index in this array is thus a window index modulo 32K.
-    */
-
-   Posf *head; /* Heads of the hash chains or NIL. */
-
-   uInt  ins_h;          /* hash index of string to be inserted */
-   uInt  hash_size;      /* number of elements in hash table */
-   uInt  hash_bits;      /* log2(hash_size) */
-   uInt  hash_mask;      /* hash_size-1 */
-
-   uInt  hash_shift;
-   /* Number of bits by which ins_h must be shifted at each input
-    * step. It must be such that after MIN_MATCH steps, the oldest
-    * byte no longer takes part in the hash key, that is:
-    *   hash_shift * MIN_MATCH >= hash_bits
-    */
-
-   long block_start;
-   /* Window position at the beginning of the current output block. Gets
-    * negative when the window is moved backwards.
-    */
-
-   uInt match_length;           /* length of best match */
-   IPos prev_match;             /* previous match */
-   int match_available;         /* set if previous match exists */
-   uInt strstart;               /* start of string to insert */
-   uInt match_start;            /* start of matching string */
-   uInt lookahead;              /* number of valid bytes ahead in window */
-
-   uInt prev_length;
-   /* Length of the best match at previous step. Matches not greater than this
-    * are discarded. This is used in the lazy match evaluation.
-    */
-
-   uInt max_chain_length;
-   /* To speed up deflation, hash chains are never searched beyond this
-    * length.  A higher limit improves compression ratio but degrades the
-    * speed.
-    */
-
-   uInt max_lazy_match;
-   /* Attempt to find a better match only when the current match is strictly
-    * smaller than this value. This mechanism is used only for compression
-    * levels >= 4.
-    */
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    ulg   pending;       /* nb of bytes in the pending buffer */
+    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
+    gz_headerp  gzhead;  /* gzip header information to write */
+    ulg   gzindex;       /* where in extra, name, or comment */
+    Byte  method;        /* can only be DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+
+                /* used by deflate.c: */
+
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
+
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
+
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
+
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
+
+    Posf *head; /* Heads of the hash chains or NIL. */
+
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
+
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
+
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
+
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
+
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
+
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
+
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
 #   define max_insert_length  max_lazy_match
-   /* Insert new strings in the hash table only if the match length is not
-    * greater than this length. This saves time but degrades compression.
-    * max_insert_length is used only for compression levels <= 3.
-    */
-
-   int level;    /* compression level (1..9) */
-   int strategy; /* favor or force Huffman coding*/
-
-   uInt good_match;
-   /* Use a faster search when the previous match is longer than this */
-
-   int nice_match; /* Stop searching when current match exceeds this */
-
-   /* used by trees.c: */
-   /* Didn't use ct_data typedef below to supress compiler warning */
-   struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
-   struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
-   struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
-
-   struct tree_desc_s l_desc;               /* desc. for literal tree */
-   struct tree_desc_s d_desc;               /* desc. for distance tree */
-   struct tree_desc_s bl_desc;              /* desc. for bit length tree */
-
-   ush bl_count[MAX_BITS+1];
-   /* number of codes at each bit length for an optimal tree */
-
-   int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
-   int heap_len;               /* number of elements in the heap */
-   int heap_max;               /* element of largest frequency */
-   /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
-    * The same heap array is used to build all trees.
-    */
-
-   uch depth[2*L_CODES+1];
-   /* Depth of each subtree used as tie breaker for trees of equal frequency
-    */
-
-   uchf *l_buf;          /* buffer for literals or lengths */
-
-   uInt  lit_bufsize;
-   /* Size of match buffer for literals/lengths.  There are 4 reasons for
-    * limiting lit_bufsize to 64K:
-    *   - frequencies can be kept in 16 bit counters
-    *   - if compression is not successful for the first block, all input
-    *     data is still in the window so we can still emit a stored block even
-    *     when input comes from standard input.  (This can also be done for
-    *     all blocks if lit_bufsize is not greater than 32K.)
-    *   - if compression is not successful for a file smaller than 64K, we can
-    *     even emit a stored file instead of a stored block (saving 5 bytes).
-    *     This is applicable only for zip (not gzip or zlib).
-    *   - creating new Huffman trees less frequently may not provide fast
-    *     adaptation to changes in the input data statistics. (Take for
-    *     example a binary file with poorly compressible code followed by
-    *     a highly compressible string table.) Smaller buffer sizes give
-    *     fast adaptation but have of course the overhead of transmitting
-    *     trees more frequently.
-    *   - I can't count above 4
-    */
-
-   uInt last_lit;      /* running index in l_buf */
-
-   ushf *d_buf;
-   /* Buffer for distances. To simplify the code, d_buf and l_buf have
-    * the same number of elements. To use different lengths, an extra flag
-    * array would be necessary.
-    */
-
-   ulg opt_len;        /* bit length of current block with optimal trees */
-   ulg static_len;     /* bit length of current block with static trees */
-   uInt matches;       /* number of string matches in current block */
-   int last_eob_len;   /* bit length of EOB code for last block */
-
-#ifdef DEBUG
-   ulg compressed_len; /* total bit length of compressed file mod 2^32 */
-   ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
+
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
+
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
+
+    int nice_match; /* Stop searching when current match exceeds this */
+
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to suppress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
+
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
+
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
+
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
+
+#ifdef LIT_MEM
+#   define LIT_BUFS 5
+    ushf *d_buf;          /* buffer for distances */
+    uchf *l_buf;          /* buffer for literals/lengths */
+#else
+#   define LIT_BUFS 4
+    uchf *sym_buf;        /* buffer for distances and literals/lengths */
 #endif
 
-   ush bi_buf;
-   /* Output buffer. bits are inserted starting at the bottom (least
-    * significant bits).
-    */
-   int bi_valid;
-   /* Number of valid bits in bi_buf.  All bits above the last valid bit
-    * are always zero.
-    */
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
+
+    uInt sym_next;      /* running index in symbol buffer */
+    uInt sym_end;       /* symbol table full when sym_next reaches this */
+
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    uInt insert;        /* bytes at end of window left to insert */
+
+#ifdef ZLIB_DEBUG
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+#endif
+
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
+
+    ulg high_water;
+    /* High water mark offset in window for initialized bytes -- bytes above
+     * this are set to zero in order to avoid memory check warnings when
+     * longest match routines access bytes past the input.  This is then
+     * updated to the new high water mark.
+     */
 
 } FAR deflate_state;
 
 /* Output a byte on the stream.
  * IN assertion: there is enough room in pending_buf.
  */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
 
 
 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
@@ -283,14 +297,19 @@ typedef struct internal_state
  * distances are limited to MAX_DIST instead of WSIZE.
  */
 
-/* in trees.c */
-void _tr_init         OF((deflate_state *s));
-int  _tr_tally        OF((deflate_state *s, unsigned dist, unsigned lc));
-void _tr_flush_block  OF((deflate_state *s, charf *buf, ulg stored_len,
-                          int eof));
-void _tr_align        OF((deflate_state *s));
-void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
-                          int eof));
+#define WIN_INIT MAX_MATCH
+/* Number of bytes after end of data in window to initialize in order to avoid
+   memory checker errors from longest match routines */
+
+        /* in trees.c */
+void ZLIB_INTERNAL _tr_init(deflate_state *s);
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last);
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
+void ZLIB_INTERNAL _tr_align(deflate_state *s);
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last);
 
 #define d_code(dist) \
    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
@@ -299,34 +318,56 @@ void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
  * used.
  */
 
-#ifndef DEBUG
+#ifndef ZLIB_DEBUG
 /* Inline versions of _tr_tally for speed: */
 
 #if defined(GEN_TREES_H) || !defined(STDC)
-extern uch _length_code[];
-extern uch _dist_code[];
+  extern uch ZLIB_INTERNAL _length_code[];
+  extern uch ZLIB_INTERNAL _dist_code[];
 #else
-extern const uch _length_code[];
-extern const uch _dist_code[];
+  extern const uch ZLIB_INTERNAL _length_code[];
+  extern const uch ZLIB_INTERNAL _dist_code[];
 #endif
 
+#ifdef LIT_MEM
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->sym_next] = 0; \
+    s->l_buf[s->sym_next++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->d_buf[s->sym_next] = dist; \
+    s->l_buf[s->sym_next++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+  }
+#else
 # define _tr_tally_lit(s, c, flush) \
   { uch cc = (c); \
-    s->d_buf[s->last_lit] = 0; \
-    s->l_buf[s->last_lit++] = cc; \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = cc; \
     s->dyn_ltree[cc].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
+    flush = (s->sym_next == s->sym_end); \
    }
 # define _tr_tally_dist(s, distance, length, flush) \
-  { uch len = (length); \
-    ush dist = (distance); \
-    s->d_buf[s->last_lit] = dist; \
-    s->l_buf[s->last_lit++] = len; \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->sym_buf[s->sym_next++] = (uch)dist; \
+    s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \
+    s->sym_buf[s->sym_next++] = len; \
     dist--; \
     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
     s->dyn_dtree[d_code(dist)].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
+    flush = (s->sym_next == s->sym_end); \
   }
+#endif
 #else
 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
 # define _tr_tally_dist(s, distance, length, flush) \
diff --git a/reg-io/zlib/gzclose.c b/reg-io/zlib/gzclose.c
new file mode 100644
index 00000000..48d6a86f
--- /dev/null
+++ b/reg-io/zlib/gzclose.c
@@ -0,0 +1,23 @@
+/* gzclose.c -- zlib gzclose() function
+ * Copyright (C) 2004, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* gzclose() is in a separate file so that it is linked in only if it is used.
+   That way the other gzclose functions can be used instead to avoid linking in
+   unneeded compression or decompression routines. */
+int ZEXPORT gzclose(gzFile file) {
+#ifndef NO_GZCOMPRESS
+    gz_statep state;
+
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
+#else
+    return gzclose_r(file);
+#endif
+}
diff --git a/reg-io/zlib/gzguts.h b/reg-io/zlib/gzguts.h
new file mode 100644
index 00000000..eba72085
--- /dev/null
+++ b/reg-io/zlib/gzguts.h
@@ -0,0 +1,214 @@
+/* gzguts.h -- zlib internal header definitions for gz* operations
+ * Copyright (C) 2004-2024 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef _LARGEFILE64_SOURCE
+#  ifndef _LARGEFILE_SOURCE
+#    define _LARGEFILE_SOURCE 1
+#  endif
+#  undef _FILE_OFFSET_BITS
+#  undef _TIME_BITS
+#endif
+
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
+#include <stdio.h>
+#include "zlib.h"
+#ifdef STDC
+#  include <string.h>
+#  include <stdlib.h>
+#  include <limits.h>
+#endif
+
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE
+#endif
+#include <fcntl.h>
+
+#ifdef _WIN32
+#  include <stddef.h>
+#endif
+
+#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
+#  include <io.h>
+#endif
+
+#if defined(_WIN32)
+#  define WIDECHAR
+#endif
+
+#ifdef WINAPI_FAMILY
+#  define open _open
+#  define read _read
+#  define write _write
+#  define close _close
+#endif
+
+#ifdef NO_DEFLATE       /* for compatibility with old definition */
+#  define NO_GZCOMPRESS
+#endif
+
+#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(__CYGWIN__)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#ifndef HAVE_VSNPRINTF
+#  ifdef MSDOS
+/* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
+   but for now we just assume it doesn't. */
+#    define NO_vsnprintf
+#  endif
+#  ifdef __TURBOC__
+#    define NO_vsnprintf
+#  endif
+#  ifdef WIN32
+/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
+#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
+#      if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
+#         define vsnprintf _vsnprintf
+#      endif
+#    endif
+#  endif
+#  ifdef __SASC
+#    define NO_vsnprintf
+#  endif
+#  ifdef VMS
+#    define NO_vsnprintf
+#  endif
+#  ifdef __OS400__
+#    define NO_vsnprintf
+#  endif
+#  ifdef __MVS__
+#    define NO_vsnprintf
+#  endif
+#endif
+
+/* unlike snprintf (which is required in C99), _snprintf does not guarantee
+   null termination of the result -- however this is only used in gzlib.c where
+   the result is assured to fit in the space provided */
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#  define snprintf _snprintf
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
+
+/* gz* functions always use library allocation functions */
+#ifndef STDC
+  extern voidp  malloc(uInt size);
+  extern void   free(voidpf ptr);
+#endif
+
+/* get errno and strerror definition */
+#if defined UNDER_CE
+#  include <windows.h>
+#  define zstrerror() gz_strwinerror((DWORD)GetLastError())
+#else
+#  ifndef NO_STRERROR
+#    include <errno.h>
+#    define zstrerror() strerror(errno)
+#  else
+#    define zstrerror() "stdio error (consult errno)"
+#  endif
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
+    ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+    ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+    ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+    ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
+#endif
+
+/* default memLevel */
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+
+/* default i/o buffer size -- double this for output when reading (this and
+   twice this must be able to fit in an unsigned type) */
+#define GZBUFSIZE 8192
+
+/* gzip modes, also provide a little integrity check on the passed structure */
+#define GZ_NONE 0
+#define GZ_READ 7247
+#define GZ_WRITE 31153
+#define GZ_APPEND 1     /* mode set to GZ_WRITE after the file is opened */
+
+/* values for gz_state how */
+#define LOOK 0      /* look for a gzip header */
+#define COPY 1      /* copy input directly */
+#define GZIP 2      /* decompress a gzip stream */
+
+/* internal gzip file state data structure */
+typedef struct {
+        /* exposed contents for gzgetc() macro */
+    struct gzFile_s x;      /* "x" for exposed */
+                            /* x.have: number of bytes available at x.next */
+                            /* x.next: next output data to deliver or write */
+                            /* x.pos: current position in uncompressed data */
+        /* used for both reading and writing */
+    int mode;               /* see gzip modes above */
+    int fd;                 /* file descriptor */
+    char *path;             /* path or fd for error messages */
+    unsigned size;          /* buffer size, zero if not allocated yet */
+    unsigned want;          /* requested buffer size, default is GZBUFSIZE */
+    unsigned char *in;      /* input buffer (double-sized when writing) */
+    unsigned char *out;     /* output buffer (double-sized when reading) */
+    int direct;             /* 0 if processing gzip, 1 if transparent */
+        /* just for reading */
+    int how;                /* 0: get header, 1: copy, 2: decompress */
+    z_off64_t start;        /* where the gzip data started, for rewinding */
+    int eof;                /* true if end of input file reached */
+    int past;               /* true if read requested past end */
+        /* just for writing */
+    int level;              /* compression level */
+    int strategy;           /* compression strategy */
+    int reset;              /* true if a reset is pending after a Z_FINISH */
+        /* seek request */
+    z_off64_t skip;         /* amount to skip (already rewound if backwards) */
+    int seek;               /* true if seek request pending */
+        /* error information */
+    int err;                /* error code */
+    char *msg;              /* error message */
+        /* zlib inflate or deflate stream */
+    z_stream strm;          /* stream structure in-place (not a pointer) */
+} gz_state;
+typedef gz_state FAR *gz_statep;
+
+/* shared functions */
+void ZLIB_INTERNAL gz_error(gz_statep, int, const char *);
+#if defined UNDER_CE
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error);
+#endif
+
+/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
+   value -- needed when comparing unsigned to z_off64_t, which is signed
+   (possible z_off64_t types off_t, off64_t, and long are all signed) */
+unsigned ZLIB_INTERNAL gz_intmax(void);
+#define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
diff --git a/reg-io/zlib/gzio.c b/reg-io/zlib/gzio.c
deleted file mode 100644
index 7e90f492..00000000
--- a/reg-io/zlib/gzio.c
+++ /dev/null
@@ -1,1026 +0,0 @@
-/* gzio.c -- IO on .gz files
- * Copyright (C) 1995-2005 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Compile this file with -DNO_GZCOMPRESS to avoid the compression code.
- */
-
-/* @(#) $Id$ */
-
-#include <stdio.h>
-
-#include "zutil.h"
-
-#ifdef NO_DEFLATE       /* for compatibility with old definition */
-#  define NO_GZCOMPRESS
-#endif
-
-#ifndef NO_DUMMY_DECL
-struct internal_state {int dummy;}; /* for buggy compilers */
-#endif
-
-#ifndef Z_BUFSIZE
-#  ifdef MAXSEG_64K
-#    define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */
-#  else
-#    define Z_BUFSIZE 16384
-#  endif
-#endif
-#ifndef Z_PRINTF_BUFSIZE
-#  define Z_PRINTF_BUFSIZE 4096
-#endif
-
-#ifdef __MVS__
-#  pragma map (fdopen , "\174\174FDOPEN")
-   FILE *fdopen(int, const char *);
-#endif
-
-#ifndef STDC
-extern voidp  malloc OF((uInt size));
-extern void   free   OF((voidpf ptr));
-#endif
-
-#define ALLOC(size) malloc(size)
-#define TRYFREE(p) {if (p) free(p);}
-
-static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
-
-/* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
-#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
-#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
-#define COMMENT      0x10 /* bit 4 set: file comment present */
-#define RESERVED     0xE0 /* bits 5..7: reserved */
-
-typedef struct gz_stream {
-    z_stream stream;
-    int      z_err;   /* error code for last stream operation */
-    int      z_eof;   /* set if end of input file */
-    FILE     *file;   /* .gz file */
-    Byte     *inbuf;  /* input buffer */
-    Byte     *outbuf; /* output buffer */
-    uLong    crc;     /* crc32 of uncompressed data */
-    char     *msg;    /* error message */
-    char     *path;   /* path name for debugging only */
-    int      transparent; /* 1 if input file is not a .gz file */
-    char     mode;    /* 'w' or 'r' */
-    z_off_t  start;   /* start of compressed data in file (header skipped) */
-    z_off_t  in;      /* bytes into deflate or inflate */
-    z_off_t  out;     /* bytes out of deflate or inflate */
-    int      back;    /* one character push-back */
-    int      last;    /* true if push-back is last character */
-} gz_stream;
-
-
-local gzFile gz_open      OF((const char *path, const char *mode, int  fd));
-local int do_flush        OF((gzFile file, int flush));
-local int    get_byte     OF((gz_stream *s));
-local void   check_header OF((gz_stream *s));
-local int    destroy      OF((gz_stream *s));
-local void   putLong      OF((FILE *file, uLong x));
-local uLong  getLong      OF((gz_stream *s));
-
-/* ===========================================================================
-     Opens a gzip (.gz) file for reading or writing. The mode parameter
-   is as in fopen ("rb" or "wb"). The file is given either by file descriptor
-   or path name (if fd == -1).
-     gz_open returns NULL if the file could not be opened or if there was
-   insufficient memory to allocate the (de)compression state; errno
-   can be checked to distinguish the two cases (if errno is zero, the
-   zlib error is Z_MEM_ERROR).
-*/
-local gzFile gz_open (path, mode, fd)
-    const char *path;
-    const char *mode;
-    int  fd;
-{
-    int err;
-    int level = Z_DEFAULT_COMPRESSION; /* compression level */
-    int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */
-    char *p = (char*)mode;
-    gz_stream *s;
-    char fmode[80]; /* copy of mode, without the compression level */
-    char *m = fmode;
-
-    if (!path || !mode) return Z_NULL;
-
-    s = (gz_stream *)ALLOC(sizeof(gz_stream));
-    if (!s) return Z_NULL;
-
-    s->stream.zalloc = (alloc_func)0;
-    s->stream.zfree = (free_func)0;
-    s->stream.opaque = (voidpf)0;
-    s->stream.next_in = s->inbuf = Z_NULL;
-    s->stream.next_out = s->outbuf = Z_NULL;
-    s->stream.avail_in = s->stream.avail_out = 0;
-    s->file = NULL;
-    s->z_err = Z_OK;
-    s->z_eof = 0;
-    s->in = 0;
-    s->out = 0;
-    s->back = EOF;
-    s->crc = crc32(0L, Z_NULL, 0);
-    s->msg = NULL;
-    s->transparent = 0;
-
-    s->path = (char*)ALLOC(strlen(path)+1);
-    if (s->path == NULL) {
-        return destroy(s), (gzFile)Z_NULL;
-    }
-    strcpy(s->path, path); /* do this early for debugging */
-
-    s->mode = '\0';
-    do {
-        if (*p == 'r') s->mode = 'r';
-        if (*p == 'w' || *p == 'a') s->mode = 'w';
-        if (*p >= '0' && *p <= '9') {
-            level = *p - '0';
-        } else if (*p == 'f') {
-          strategy = Z_FILTERED;
-        } else if (*p == 'h') {
-          strategy = Z_HUFFMAN_ONLY;
-        } else if (*p == 'R') {
-          strategy = Z_RLE;
-        } else {
-            *m++ = *p; /* copy the mode */
-        }
-    } while (*p++ && m != fmode + sizeof(fmode));
-    if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL;
-
-    if (s->mode == 'w') {
-#ifdef NO_GZCOMPRESS
-        err = Z_STREAM_ERROR;
-#else
-        err = deflateInit2(&(s->stream), level,
-                           Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy);
-        /* windowBits is passed < 0 to suppress zlib header */
-
-        s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
-#endif
-        if (err != Z_OK || s->outbuf == Z_NULL) {
-            return destroy(s), (gzFile)Z_NULL;
-        }
-    } else {
-        s->stream.next_in  = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
-
-        err = inflateInit2(&(s->stream), -MAX_WBITS);
-        /* windowBits is passed < 0 to tell that there is no zlib header.
-         * Note that in this case inflate *requires* an extra "dummy" byte
-         * after the compressed stream in order to complete decompression and
-         * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
-         * present after the compressed stream.
-         */
-        if (err != Z_OK || s->inbuf == Z_NULL) {
-            return destroy(s), (gzFile)Z_NULL;
-        }
-    }
-    s->stream.avail_out = Z_BUFSIZE;
-
-    errno = 0;
-    s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode);
-
-    if (s->file == NULL) {
-        return destroy(s), (gzFile)Z_NULL;
-    }
-    if (s->mode == 'w') {
-        /* Write a very simple .gz header:
-         */
-        fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1],
-             Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE);
-        s->start = 10L;
-        /* We use 10L instead of ftell(s->file) to because ftell causes an
-         * fflush on some systems. This version of the library doesn't use
-         * start anyway in write mode, so this initialization is not
-         * necessary.
-         */
-    } else {
-        check_header(s); /* skip the .gz header */
-        s->start = ftell(s->file) - s->stream.avail_in;
-    }
-
-    return (gzFile)s;
-}
-
-/* ===========================================================================
-     Opens a gzip (.gz) file for reading or writing.
-*/
-gzFile ZEXPORT gzopen (path, mode)
-    const char *path;
-    const char *mode;
-{
-    return gz_open (path, mode, -1);
-}
-
-/* ===========================================================================
-     Associate a gzFile with the file descriptor fd. fd is not dup'ed here
-   to mimic the behavio(u)r of fdopen.
-*/
-gzFile ZEXPORT gzdopen (fd, mode)
-    int fd;
-    const char *mode;
-{
-    char name[46];      /* allow for up to 128-bit integers */
-
-    if (fd < 0) return (gzFile)Z_NULL;
-    sprintf(name, "<fd:%d>", fd); /* for debugging */
-
-    return gz_open (name, mode, fd);
-}
-
-/* ===========================================================================
- * Update the compression level and strategy
- */
-int ZEXPORT gzsetparams (file, level, strategy)
-    gzFile file;
-    int level;
-    int strategy;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
-
-    /* Make room to allow flushing */
-    if (s->stream.avail_out == 0) {
-
-        s->stream.next_out = s->outbuf;
-        if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
-            s->z_err = Z_ERRNO;
-        }
-        s->stream.avail_out = Z_BUFSIZE;
-    }
-
-    return deflateParams (&(s->stream), level, strategy);
-}
-
-/* ===========================================================================
-     Read a byte from a gz_stream; update next_in and avail_in. Return EOF
-   for end of file.
-   IN assertion: the stream s has been sucessfully opened for reading.
-*/
-local int get_byte(s)
-    gz_stream *s;
-{
-    if (s->z_eof) return EOF;
-    if (s->stream.avail_in == 0) {
-        errno = 0;
-        s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file);
-        if (s->stream.avail_in == 0) {
-            s->z_eof = 1;
-            if (ferror(s->file)) s->z_err = Z_ERRNO;
-            return EOF;
-        }
-        s->stream.next_in = s->inbuf;
-    }
-    s->stream.avail_in--;
-    return *(s->stream.next_in)++;
-}
-
-/* ===========================================================================
-      Check the gzip header of a gz_stream opened for reading. Set the stream
-    mode to transparent if the gzip magic header is not present; set s->err
-    to Z_DATA_ERROR if the magic header is present but the rest of the header
-    is incorrect.
-    IN assertion: the stream s has already been created sucessfully;
-       s->stream.avail_in is zero for the first time, but may be non-zero
-       for concatenated .gz files.
-*/
-local void check_header(s)
-    gz_stream *s;
-{
-    int method; /* method byte */
-    int flags;  /* flags byte */
-    uInt len;
-    int c;
-
-    /* Assure two bytes in the buffer so we can peek ahead -- handle case
-       where first byte of header is at the end of the buffer after the last
-       gzip segment */
-    len = s->stream.avail_in;
-    if (len < 2) {
-        if (len) s->inbuf[0] = s->stream.next_in[0];
-        errno = 0;
-        len = (uInt)fread(s->inbuf + len, 1, Z_BUFSIZE >> len, s->file);
-        if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO;
-        s->stream.avail_in += len;
-        s->stream.next_in = s->inbuf;
-        if (s->stream.avail_in < 2) {
-            s->transparent = s->stream.avail_in;
-            return;
-        }
-    }
-
-    /* Peek ahead to check the gzip magic header */
-    if (s->stream.next_in[0] != gz_magic[0] ||
-        s->stream.next_in[1] != gz_magic[1]) {
-        s->transparent = 1;
-        return;
-    }
-    s->stream.avail_in -= 2;
-    s->stream.next_in += 2;
-
-    /* Check the rest of the gzip header */
-    method = get_byte(s);
-    flags = get_byte(s);
-    if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
-        s->z_err = Z_DATA_ERROR;
-        return;
-    }
-
-    /* Discard time, xflags and OS code: */
-    for (len = 0; len < 6; len++) (void)get_byte(s);
-
-    if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
-        len  =  (uInt)get_byte(s);
-        len += ((uInt)get_byte(s))<<8;
-        /* len is garbage if EOF but the loop below will quit anyway */
-        while (len-- != 0 && get_byte(s) != EOF) ;
-    }
-    if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
-        while ((c = get_byte(s)) != 0 && c != EOF) ;
-    }
-    if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
-        while ((c = get_byte(s)) != 0 && c != EOF) ;
-    }
-    if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
-        for (len = 0; len < 2; len++) (void)get_byte(s);
-    }
-    s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK;
-}
-
- /* ===========================================================================
- * Cleanup then free the given gz_stream. Return a zlib error code.
-   Try freeing in the reverse order of allocations.
- */
-local int destroy (s)
-    gz_stream *s;
-{
-    int err = Z_OK;
-
-    if (!s) return Z_STREAM_ERROR;
-
-    TRYFREE(s->msg);
-
-    if (s->stream.state != NULL) {
-        if (s->mode == 'w') {
-#ifdef NO_GZCOMPRESS
-            err = Z_STREAM_ERROR;
-#else
-            err = deflateEnd(&(s->stream));
-#endif
-        } else if (s->mode == 'r') {
-            err = inflateEnd(&(s->stream));
-        }
-    }
-    if (s->file != NULL && fclose(s->file)) {
-#ifdef ESPIPE
-        if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */
-#endif
-            err = Z_ERRNO;
-    }
-    if (s->z_err < 0) err = s->z_err;
-
-    TRYFREE(s->inbuf);
-    TRYFREE(s->outbuf);
-    TRYFREE(s->path);
-    TRYFREE(s);
-    return err;
-}
-
-/* ===========================================================================
-     Reads the given number of uncompressed bytes from the compressed file.
-   gzread returns the number of bytes actually read (0 for end of file).
-*/
-int ZEXPORT gzread (file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
-    gz_stream *s = (gz_stream*)file;
-    Bytef *start = (Bytef*)buf; /* starting point for crc computation */
-    Byte  *next_out; /* == stream.next_out but not forced far (for MSDOS) */
-
-    if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;
-
-    if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1;
-    if (s->z_err == Z_STREAM_END) return 0;  /* EOF */
-
-    next_out = (Byte*)buf;
-    s->stream.next_out = (Bytef*)buf;
-    s->stream.avail_out = len;
-
-    if (s->stream.avail_out && s->back != EOF) {
-        *next_out++ = s->back;
-        s->stream.next_out++;
-        s->stream.avail_out--;
-        s->back = EOF;
-        s->out++;
-        start++;
-        if (s->last) {
-            s->z_err = Z_STREAM_END;
-            return 1;
-        }
-    }
-
-    while (s->stream.avail_out != 0) {
-
-        if (s->transparent) {
-            /* Copy first the lookahead bytes: */
-            uInt n = s->stream.avail_in;
-            if (n > s->stream.avail_out) n = s->stream.avail_out;
-            if (n > 0) {
-                zmemcpy(s->stream.next_out, s->stream.next_in, n);
-                next_out += n;
-                s->stream.next_out = next_out;
-                s->stream.next_in   += n;
-                s->stream.avail_out -= n;
-                s->stream.avail_in  -= n;
-            }
-            if (s->stream.avail_out > 0) {
-                s->stream.avail_out -=
-                    (uInt)fread(next_out, 1, s->stream.avail_out, s->file);
-            }
-            len -= s->stream.avail_out;
-            s->in  += len;
-            s->out += len;
-            if (len == 0) s->z_eof = 1;
-            return (int)len;
-        }
-        if (s->stream.avail_in == 0 && !s->z_eof) {
-
-            errno = 0;
-            s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file);
-            if (s->stream.avail_in == 0) {
-                s->z_eof = 1;
-                if (ferror(s->file)) {
-                    s->z_err = Z_ERRNO;
-                    break;
-                }
-            }
-            s->stream.next_in = s->inbuf;
-        }
-        s->in += s->stream.avail_in;
-        s->out += s->stream.avail_out;
-        s->z_err = inflate(&(s->stream), Z_NO_FLUSH);
-        s->in -= s->stream.avail_in;
-        s->out -= s->stream.avail_out;
-
-        if (s->z_err == Z_STREAM_END) {
-            /* Check CRC and original size */
-            s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
-            start = s->stream.next_out;
-
-            if (getLong(s) != s->crc) {
-                s->z_err = Z_DATA_ERROR;
-            } else {
-                (void)getLong(s);
-                /* The uncompressed length returned by above getlong() may be
-                 * different from s->out in case of concatenated .gz files.
-                 * Check for such files:
-                 */
-                check_header(s);
-                if (s->z_err == Z_OK) {
-                    inflateReset(&(s->stream));
-                    s->crc = crc32(0L, Z_NULL, 0);
-                }
-            }
-        }
-        if (s->z_err != Z_OK || s->z_eof) break;
-    }
-    s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
-
-    if (len == s->stream.avail_out &&
-        (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO))
-        return -1;
-    return (int)(len - s->stream.avail_out);
-}
-
-
-/* ===========================================================================
-      Reads one byte from the compressed file. gzgetc returns this byte
-   or -1 in case of end of file or error.
-*/
-int ZEXPORT gzgetc(file)
-    gzFile file;
-{
-    unsigned char c;
-
-    return gzread(file, &c, 1) == 1 ? c : -1;
-}
-
-
-/* ===========================================================================
-      Push one byte back onto the stream.
-*/
-int ZEXPORT gzungetc(c, file)
-    int c;
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'r' || c == EOF || s->back != EOF) return EOF;
-    s->back = c;
-    s->out--;
-    s->last = (s->z_err == Z_STREAM_END);
-    if (s->last) s->z_err = Z_OK;
-    s->z_eof = 0;
-    return c;
-}
-
-
-/* ===========================================================================
-      Reads bytes from the compressed file until len-1 characters are
-   read, or a newline character is read and transferred to buf, or an
-   end-of-file condition is encountered.  The string is then terminated
-   with a null character.
-      gzgets returns buf, or Z_NULL in case of error.
-
-      The current implementation is not optimized at all.
-*/
-char * ZEXPORT gzgets(file, buf, len)
-    gzFile file;
-    char *buf;
-    int len;
-{
-    char *b = buf;
-    if (buf == Z_NULL || len <= 0) return Z_NULL;
-
-    while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ;
-    *buf = '\0';
-    return b == buf && len > 0 ? Z_NULL : b;
-}
-
-
-#ifndef NO_GZCOMPRESS
-/* ===========================================================================
-     Writes the given number of uncompressed bytes into the compressed file.
-   gzwrite returns the number of bytes actually written (0 in case of error).
-*/
-int ZEXPORT gzwrite (file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
-
-    s->stream.next_in = (Bytef*)buf;
-    s->stream.avail_in = len;
-
-    while (s->stream.avail_in != 0) {
-
-        if (s->stream.avail_out == 0) {
-
-            s->stream.next_out = s->outbuf;
-            if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
-                s->z_err = Z_ERRNO;
-                break;
-            }
-            s->stream.avail_out = Z_BUFSIZE;
-        }
-        s->in += s->stream.avail_in;
-        s->out += s->stream.avail_out;
-        s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
-        s->in -= s->stream.avail_in;
-        s->out -= s->stream.avail_out;
-        if (s->z_err != Z_OK) break;
-    }
-    s->crc = crc32(s->crc, (const Bytef *)buf, len);
-
-    return (int)(len - s->stream.avail_in);
-}
-
-
-/* ===========================================================================
-     Converts, formats, and writes the args to the compressed file under
-   control of the format string, as in fprintf. gzprintf returns the number of
-   uncompressed bytes actually written (0 in case of error).
-*/
-#ifdef STDC
-#include <stdarg.h>
-
-int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...)
-{
-    char buf[Z_PRINTF_BUFSIZE];
-    va_list va;
-    int len;
-
-    buf[sizeof(buf) - 1] = 0;
-    va_start(va, format);
-#ifdef NO_vsnprintf
-#  ifdef HAS_vsprintf_void
-    (void)vsprintf(buf, format, va);
-    va_end(va);
-    for (len = 0; len < sizeof(buf); len++)
-        if (buf[len] == 0) break;
-#  else
-    len = vsprintf(buf, format, va);
-    va_end(va);
-#  endif
-#else
-#  ifdef HAS_vsnprintf_void
-    (void)vsnprintf(buf, sizeof(buf), format, va);
-    va_end(va);
-    len = strlen(buf);
-#  else
-    len = vsnprintf(buf, sizeof(buf), format, va);
-    va_end(va);
-#  endif
-#endif
-    if (len <= 0 || len >= (int)sizeof(buf) || buf[sizeof(buf) - 1] != 0)
-        return 0;
-    return gzwrite(file, buf, (unsigned)len);
-}
-#else /* not ANSI C */
-
-int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
-    gzFile file;
-    const char *format;
-    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
-    char buf[Z_PRINTF_BUFSIZE];
-    int len;
-
-    buf[sizeof(buf) - 1] = 0;
-#ifdef NO_snprintf
-#  ifdef HAS_sprintf_void
-    sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
-            a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-    for (len = 0; len < sizeof(buf); len++)
-        if (buf[len] == 0) break;
-#  else
-    len = sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
-                a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-#  endif
-#else
-#  ifdef HAS_snprintf_void
-    snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
-             a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-    len = strlen(buf);
-#  else
-    len = snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
-                 a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
-#  endif
-#endif
-    if (len <= 0 || len >= sizeof(buf) || buf[sizeof(buf) - 1] != 0)
-        return 0;
-    return gzwrite(file, buf, len);
-}
-#endif
-
-/* ===========================================================================
-      Writes c, converted to an unsigned char, into the compressed file.
-   gzputc returns the value that was written, or -1 in case of error.
-*/
-int ZEXPORT gzputc(file, c)
-    gzFile file;
-    int c;
-{
-    unsigned char cc = (unsigned char) c; /* required for big endian systems */
-
-    return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1;
-}
-
-
-/* ===========================================================================
-      Writes the given null-terminated string to the compressed file, excluding
-   the terminating null character.
-      gzputs returns the number of characters written, or -1 in case of error.
-*/
-int ZEXPORT gzputs(file, s)
-    gzFile file;
-    const char *s;
-{
-    return gzwrite(file, (char*)s, (unsigned)strlen(s));
-}
-
-
-/* ===========================================================================
-     Flushes all pending output into the compressed file. The parameter
-   flush is as in the deflate() function.
-*/
-local int do_flush (file, flush)
-    gzFile file;
-    int flush;
-{
-    uInt len;
-    int done = 0;
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
-
-    s->stream.avail_in = 0; /* should be zero already anyway */
-
-    for (;;) {
-        len = Z_BUFSIZE - s->stream.avail_out;
-
-        if (len != 0) {
-            if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) {
-                s->z_err = Z_ERRNO;
-                return Z_ERRNO;
-            }
-            s->stream.next_out = s->outbuf;
-            s->stream.avail_out = Z_BUFSIZE;
-        }
-        if (done) break;
-        s->out += s->stream.avail_out;
-        s->z_err = deflate(&(s->stream), flush);
-        s->out -= s->stream.avail_out;
-
-        /* Ignore the second of two consecutive flushes: */
-        if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK;
-
-        /* deflate has finished flushing only when it hasn't used up
-         * all the available space in the output buffer:
-         */
-        done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
-
-        if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
-    }
-    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
-}
-
-int ZEXPORT gzflush (file, flush)
-     gzFile file;
-     int flush;
-{
-    gz_stream *s = (gz_stream*)file;
-    int err = do_flush (file, flush);
-
-    if (err) return err;
-    fflush(s->file);
-    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
-}
-#endif /* NO_GZCOMPRESS */
-
-/* ===========================================================================
-      Sets the starting position for the next gzread or gzwrite on the given
-   compressed file. The offset represents a number of bytes in the
-      gzseek returns the resulting offset location as measured in bytes from
-   the beginning of the uncompressed stream, or -1 in case of error.
-      SEEK_END is not implemented, returns error.
-      In this version of the library, gzseek can be extremely slow.
-*/
-z_off_t ZEXPORT gzseek (file, offset, whence)
-    gzFile file;
-    z_off_t offset;
-    int whence;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || whence == SEEK_END ||
-        s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) {
-        return -1L;
-    }
-
-    if (s->mode == 'w') {
-#ifdef NO_GZCOMPRESS
-        return -1L;
-#else
-        if (whence == SEEK_SET) {
-            offset -= s->in;
-        }
-        if (offset < 0) return -1L;
-
-        /* At this point, offset is the number of zero bytes to write. */
-        if (s->inbuf == Z_NULL) {
-            s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */
-            if (s->inbuf == Z_NULL) return -1L;
-            zmemzero(s->inbuf, Z_BUFSIZE);
-        }
-        while (offset > 0)  {
-            uInt size = Z_BUFSIZE;
-            if (offset < Z_BUFSIZE) size = (uInt)offset;
-
-            size = gzwrite(file, s->inbuf, size);
-            if (size == 0) return -1L;
-
-            offset -= size;
-        }
-        return s->in;
-#endif
-    }
-    /* Rest of function is for reading only */
-
-    /* compute absolute position */
-    if (whence == SEEK_CUR) {
-        offset += s->out;
-    }
-    if (offset < 0) return -1L;
-
-    if (s->transparent) {
-        /* map to fseek */
-        s->back = EOF;
-        s->stream.avail_in = 0;
-        s->stream.next_in = s->inbuf;
-        if (fseek(s->file, offset, SEEK_SET) < 0) return -1L;
-
-        s->in = s->out = offset;
-        return offset;
-    }
-
-    /* For a negative seek, rewind and use positive seek */
-    if (offset >= s->out) {
-        offset -= s->out;
-    } else if (gzrewind(file) < 0) {
-        return -1L;
-    }
-    /* offset is now the number of bytes to skip. */
-
-    if (offset != 0 && s->outbuf == Z_NULL) {
-        s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
-        if (s->outbuf == Z_NULL) return -1L;
-    }
-    if (offset && s->back != EOF) {
-        s->back = EOF;
-        s->out++;
-        offset--;
-        if (s->last) s->z_err = Z_STREAM_END;
-    }
-    while (offset > 0)  {
-        int size = Z_BUFSIZE;
-        if (offset < Z_BUFSIZE) size = (int)offset;
-
-        size = gzread(file, s->outbuf, (uInt)size);
-        if (size <= 0) return -1L;
-        offset -= size;
-    }
-    return s->out;
-}
-
-/* ===========================================================================
-     Rewinds input file.
-*/
-int ZEXPORT gzrewind (file)
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'r') return -1;
-
-    s->z_err = Z_OK;
-    s->z_eof = 0;
-    s->back = EOF;
-    s->stream.avail_in = 0;
-    s->stream.next_in = s->inbuf;
-    s->crc = crc32(0L, Z_NULL, 0);
-    if (!s->transparent) (void)inflateReset(&s->stream);
-    s->in = 0;
-    s->out = 0;
-    return fseek(s->file, s->start, SEEK_SET);
-}
-
-/* ===========================================================================
-     Returns the starting position for the next gzread or gzwrite on the
-   given compressed file. This position represents a number of bytes in the
-   uncompressed data stream.
-*/
-z_off_t ZEXPORT gztell (file)
-    gzFile file;
-{
-    return gzseek(file, 0L, SEEK_CUR);
-}
-
-/* ===========================================================================
-     Returns 1 when EOF has previously been detected reading the given
-   input stream, otherwise zero.
-*/
-int ZEXPORT gzeof (file)
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    /* With concatenated compressed files that can have embedded
-     * crc trailers, z_eof is no longer the only/best indicator of EOF
-     * on a gz_stream. Handle end-of-stream error explicitly here.
-     */
-    if (s == NULL || s->mode != 'r') return 0;
-    if (s->z_eof) return 1;
-    return s->z_err == Z_STREAM_END;
-}
-
-/* ===========================================================================
-     Returns 1 if reading and doing so transparently, otherwise zero.
-*/
-int ZEXPORT gzdirect (file)
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL || s->mode != 'r') return 0;
-    return s->transparent;
-}
-
-/* ===========================================================================
-   Outputs a long in LSB order to the given file
-*/
-local void putLong (file, x)
-    FILE *file;
-    uLong x;
-{
-    int n;
-    for (n = 0; n < 4; n++) {
-        fputc((int)(x & 0xff), file);
-        x >>= 8;
-    }
-}
-
-/* ===========================================================================
-   Reads a long in LSB order from the given gz_stream. Sets z_err in case
-   of error.
-*/
-local uLong getLong (s)
-    gz_stream *s;
-{
-    uLong x = (uLong)get_byte(s);
-    int c;
-
-    x += ((uLong)get_byte(s))<<8;
-    x += ((uLong)get_byte(s))<<16;
-    c = get_byte(s);
-    if (c == EOF) s->z_err = Z_DATA_ERROR;
-    x += ((uLong)c)<<24;
-    return x;
-}
-
-/* ===========================================================================
-     Flushes all pending output if necessary, closes the compressed file
-   and deallocates all the (de)compression state.
-*/
-int ZEXPORT gzclose (file)
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL) return Z_STREAM_ERROR;
-
-    if (s->mode == 'w') {
-#ifdef NO_GZCOMPRESS
-        return Z_STREAM_ERROR;
-#else
-        if (do_flush (file, Z_FINISH) != Z_OK)
-            return destroy((gz_stream*)file);
-
-        putLong (s->file, s->crc);
-        putLong (s->file, (uLong)(s->in & 0xffffffff));
-#endif
-    }
-    return destroy((gz_stream*)file);
-}
-
-#ifdef STDC
-#  define zstrerror(errnum) strerror(errnum)
-#else
-#  define zstrerror(errnum) ""
-#endif
-
-/* ===========================================================================
-     Returns the error message for the last error which occurred on the
-   given compressed file. errnum is set to zlib error number. If an
-   error occurred in the file system and not in the compression library,
-   errnum is set to Z_ERRNO and the application may consult errno
-   to get the exact error code.
-*/
-const char * ZEXPORT gzerror (file, errnum)
-    gzFile file;
-    int *errnum;
-{
-    char *m;
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL) {
-        *errnum = Z_STREAM_ERROR;
-        return (const char*)ERR_MSG(Z_STREAM_ERROR);
-    }
-    *errnum = s->z_err;
-    if (*errnum == Z_OK) return (const char*)"";
-
-    m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg);
-
-    if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err);
-
-    TRYFREE(s->msg);
-    s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3);
-    if (s->msg == Z_NULL) return (const char*)ERR_MSG(Z_MEM_ERROR);
-    strcpy(s->msg, s->path);
-    strcat(s->msg, ": ");
-    strcat(s->msg, m);
-    return (const char*)s->msg;
-}
-
-/* ===========================================================================
-     Clear the error and end-of-file flags, and do the same for the real file.
-*/
-void ZEXPORT gzclearerr (file)
-    gzFile file;
-{
-    gz_stream *s = (gz_stream*)file;
-
-    if (s == NULL) return;
-    if (s->z_err != Z_STREAM_END) s->z_err = Z_OK;
-    s->z_eof = 0;
-    clearerr(s->file);
-}
diff --git a/reg-io/zlib/gzlib.c b/reg-io/zlib/gzlib.c
new file mode 100644
index 00000000..983153cc
--- /dev/null
+++ b/reg-io/zlib/gzlib.c
@@ -0,0 +1,582 @@
+/* gzlib.c -- zlib functions common to reading and writing gzip files
+ * Copyright (C) 2004-2024 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+#if defined(_WIN32) && !defined(__BORLANDC__)
+#  define LSEEK _lseeki64
+#else
+#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
+#  define LSEEK lseek64
+#else
+#  define LSEEK lseek
+#endif
+#endif
+
+#if defined UNDER_CE
+
+/* Map the Windows error number in ERROR to a locale-dependent error message
+   string and return a pointer to it.  Typically, the values for ERROR come
+   from GetLastError.
+
+   The string pointed to shall not be modified by the application, but may be
+   overwritten by a subsequent call to gz_strwinerror
+
+   The gz_strwinerror function does not change the current setting of
+   GetLastError. */
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error) {
+    static char buf[1024];
+
+    wchar_t *msgbuf;
+    DWORD lasterr = GetLastError();
+    DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM
+        | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+        NULL,
+        error,
+        0, /* Default language */
+        (LPVOID)&msgbuf,
+        0,
+        NULL);
+    if (chars != 0) {
+        /* If there is an \r\n appended, zap it.  */
+        if (chars >= 2
+            && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') {
+            chars -= 2;
+            msgbuf[chars] = 0;
+        }
+
+        if (chars > sizeof (buf) - 1) {
+            chars = sizeof (buf) - 1;
+            msgbuf[chars] = 0;
+        }
+
+        wcstombs(buf, msgbuf, chars + 1);
+        LocalFree(msgbuf);
+    }
+    else {
+        sprintf(buf, "unknown win32 error (%ld)", error);
+    }
+
+    SetLastError(lasterr);
+    return buf;
+}
+
+#endif /* UNDER_CE */
+
+/* Reset gzip file state */
+local void gz_reset(gz_statep state) {
+    state->x.have = 0;              /* no output data available */
+    if (state->mode == GZ_READ) {   /* for reading ... */
+        state->eof = 0;             /* not at end of file */
+        state->past = 0;            /* have not read past end yet */
+        state->how = LOOK;          /* look for gzip header */
+    }
+    else                            /* for writing ... */
+        state->reset = 0;           /* no deflateReset pending */
+    state->seek = 0;                /* no seek request pending */
+    gz_error(state, Z_OK, NULL);    /* clear error */
+    state->x.pos = 0;               /* no uncompressed data yet */
+    state->strm.avail_in = 0;       /* no input data yet */
+}
+
+/* Open a gzip file either by name or file descriptor. */
+local gzFile gz_open(const void *path, int fd, const char *mode) {
+    gz_statep state;
+    z_size_t len;
+    int oflag;
+#ifdef O_CLOEXEC
+    int cloexec = 0;
+#endif
+#ifdef O_EXCL
+    int exclusive = 0;
+#endif
+
+    /* check input */
+    if (path == NULL)
+        return NULL;
+
+    /* allocate gzFile structure to return */
+    state = (gz_statep)malloc(sizeof(gz_state));
+    if (state == NULL)
+        return NULL;
+    state->size = 0;            /* no buffers allocated yet */
+    state->want = GZBUFSIZE;    /* requested buffer size */
+    state->msg = NULL;          /* no error message yet */
+
+    /* interpret mode */
+    state->mode = GZ_NONE;
+    state->level = Z_DEFAULT_COMPRESSION;
+    state->strategy = Z_DEFAULT_STRATEGY;
+    state->direct = 0;
+    while (*mode) {
+        if (*mode >= '0' && *mode <= '9')
+            state->level = *mode - '0';
+        else
+            switch (*mode) {
+            case 'r':
+                state->mode = GZ_READ;
+                break;
+#ifndef NO_GZCOMPRESS
+            case 'w':
+                state->mode = GZ_WRITE;
+                break;
+            case 'a':
+                state->mode = GZ_APPEND;
+                break;
+#endif
+            case '+':       /* can't read and write at the same time */
+                free(state);
+                return NULL;
+            case 'b':       /* ignore -- will request binary anyway */
+                break;
+#ifdef O_CLOEXEC
+            case 'e':
+                cloexec = 1;
+                break;
+#endif
+#ifdef O_EXCL
+            case 'x':
+                exclusive = 1;
+                break;
+#endif
+            case 'f':
+                state->strategy = Z_FILTERED;
+                break;
+            case 'h':
+                state->strategy = Z_HUFFMAN_ONLY;
+                break;
+            case 'R':
+                state->strategy = Z_RLE;
+                break;
+            case 'F':
+                state->strategy = Z_FIXED;
+                break;
+            case 'T':
+                state->direct = 1;
+                break;
+            default:        /* could consider as an error, but just ignore */
+                ;
+            }
+        mode++;
+    }
+
+    /* must provide an "r", "w", or "a" */
+    if (state->mode == GZ_NONE) {
+        free(state);
+        return NULL;
+    }
+
+    /* can't force transparent read */
+    if (state->mode == GZ_READ) {
+        if (state->direct) {
+            free(state);
+            return NULL;
+        }
+        state->direct = 1;      /* for empty file */
+    }
+
+    /* save the path name for error messages */
+#ifdef WIDECHAR
+    if (fd == -2) {
+        len = wcstombs(NULL, path, 0);
+        if (len == (z_size_t)-1)
+            len = 0;
+    }
+    else
+#endif
+        len = strlen((const char *)path);
+    state->path = (char *)malloc(len + 1);
+    if (state->path == NULL) {
+        free(state);
+        return NULL;
+    }
+#ifdef WIDECHAR
+    if (fd == -2)
+        if (len)
+            wcstombs(state->path, path, len + 1);
+        else
+            *(state->path) = 0;
+    else
+#endif
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+        (void)snprintf(state->path, len + 1, "%s", (const char *)path);
+#else
+        strcpy(state->path, path);
+#endif
+
+    /* compute the flags for open() */
+    oflag =
+#ifdef O_LARGEFILE
+        O_LARGEFILE |
+#endif
+#ifdef O_BINARY
+        O_BINARY |
+#endif
+#ifdef O_CLOEXEC
+        (cloexec ? O_CLOEXEC : 0) |
+#endif
+        (state->mode == GZ_READ ?
+         O_RDONLY :
+         (O_WRONLY | O_CREAT |
+#ifdef O_EXCL
+          (exclusive ? O_EXCL : 0) |
+#endif
+          (state->mode == GZ_WRITE ?
+           O_TRUNC :
+           O_APPEND)));
+
+    /* open the file with the appropriate flags (or just use fd) */
+    state->fd = fd > -1 ? fd : (
+#ifdef WIDECHAR
+        fd == -2 ? _wopen(path, oflag, 0666) :
+#endif
+        open((const char *)path, oflag, 0666));
+    if (state->fd == -1) {
+        free(state->path);
+        free(state);
+        return NULL;
+    }
+    if (state->mode == GZ_APPEND) {
+        LSEEK(state->fd, 0, SEEK_END);  /* so gzoffset() is correct */
+        state->mode = GZ_WRITE;         /* simplify later checks */
+    }
+
+    /* save the current position for rewinding (only if reading) */
+    if (state->mode == GZ_READ) {
+        state->start = LSEEK(state->fd, 0, SEEK_CUR);
+        if (state->start == -1) state->start = 0;
+    }
+
+    /* initialize stream */
+    gz_reset(state);
+
+    /* return stream */
+    return (gzFile)state;
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzopen(const char *path, const char *mode) {
+    return gz_open(path, -1, mode);
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzopen64(const char *path, const char *mode) {
+    return gz_open(path, -1, mode);
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzdopen(int fd, const char *mode) {
+    char *path;         /* identifier for error messages */
+    gzFile gz;
+
+    if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL)
+        return NULL;
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    (void)snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd);
+#else
+    sprintf(path, "<fd:%d>", fd);   /* for debugging */
+#endif
+    gz = gz_open(path, fd, mode);
+    free(path);
+    return gz;
+}
+
+/* -- see zlib.h -- */
+#ifdef WIDECHAR
+gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) {
+    return gz_open(path, -2, mode);
+}
+#endif
+
+/* -- see zlib.h -- */
+int ZEXPORT gzbuffer(gzFile file, unsigned size) {
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* make sure we haven't already allocated memory */
+    if (state->size != 0)
+        return -1;
+
+    /* check and set requested size */
+    if ((size << 1) < size)
+        return -1;              /* need to be able to double it */
+    if (size < 8)
+        size = 8;               /* needed to behave well with flushing */
+    state->want = size;
+    return 0;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzrewind(gzFile file) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* back up and start over */
+    if (LSEEK(state->fd, state->start, SEEK_SET) == -1)
+        return -1;
+    gz_reset(state);
+    return 0;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) {
+    unsigned n;
+    z_off64_t ret;
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* check that there's no error */
+    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+
+    /* can only seek from start or relative to current position */
+    if (whence != SEEK_SET && whence != SEEK_CUR)
+        return -1;
+
+    /* normalize offset to a SEEK_CUR specification */
+    if (whence == SEEK_SET)
+        offset -= state->x.pos;
+    else if (state->seek)
+        offset += state->skip;
+    state->seek = 0;
+
+    /* if within raw area while reading, just go there */
+    if (state->mode == GZ_READ && state->how == COPY &&
+            state->x.pos + offset >= 0) {
+        ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR);
+        if (ret == -1)
+            return -1;
+        state->x.have = 0;
+        state->eof = 0;
+        state->past = 0;
+        state->seek = 0;
+        gz_error(state, Z_OK, NULL);
+        state->strm.avail_in = 0;
+        state->x.pos += offset;
+        return state->x.pos;
+    }
+
+    /* calculate skip amount, rewinding if needed for back seek when reading */
+    if (offset < 0) {
+        if (state->mode != GZ_READ)         /* writing -- can't go backwards */
+            return -1;
+        offset += state->x.pos;
+        if (offset < 0)                     /* before start of file! */
+            return -1;
+        if (gzrewind(file) == -1)           /* rewind, then skip to offset */
+            return -1;
+    }
+
+    /* if reading, skip what's in output buffer (one less gzgetc() check) */
+    if (state->mode == GZ_READ) {
+        n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ?
+            (unsigned)offset : state->x.have;
+        state->x.have -= n;
+        state->x.next += n;
+        state->x.pos += n;
+        offset -= n;
+    }
+
+    /* request skip (if not zero) */
+    if (offset) {
+        state->seek = 1;
+        state->skip = offset;
+    }
+    return state->x.pos + offset;
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) {
+    z_off64_t ret;
+
+    ret = gzseek64(file, (z_off64_t)offset, whence);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gztell64(gzFile file) {
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* return position */
+    return state->x.pos + (state->seek ? state->skip : 0);
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gztell(gzFile file) {
+    z_off64_t ret;
+
+    ret = gztell64(file);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gzoffset64(gzFile file) {
+    z_off64_t offset;
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* compute and return effective offset in file */
+    offset = LSEEK(state->fd, 0, SEEK_CUR);
+    if (offset == -1)
+        return -1;
+    if (state->mode == GZ_READ)             /* reading */
+        offset -= state->strm.avail_in;     /* don't count buffered input */
+    return offset;
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gzoffset(gzFile file) {
+    z_off64_t ret;
+
+    ret = gzoffset64(file);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzeof(gzFile file) {
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return 0;
+
+    /* return end-of-file state */
+    return state->mode == GZ_READ ? state->past : 0;
+}
+
+/* -- see zlib.h -- */
+const char * ZEXPORT gzerror(gzFile file, int *errnum) {
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return NULL;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return NULL;
+
+    /* return error information */
+    if (errnum != NULL)
+        *errnum = state->err;
+    return state->err == Z_MEM_ERROR ? "out of memory" :
+                                       (state->msg == NULL ? "" : state->msg);
+}
+
+/* -- see zlib.h -- */
+void ZEXPORT gzclearerr(gzFile file) {
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return;
+
+    /* clear error and end-of-file */
+    if (state->mode == GZ_READ) {
+        state->eof = 0;
+        state->past = 0;
+    }
+    gz_error(state, Z_OK, NULL);
+}
+
+/* Create an error message in allocated memory and set state->err and
+   state->msg accordingly.  Free any previous error message already there.  Do
+   not try to free or allocate space if the error is Z_MEM_ERROR (out of
+   memory).  Simply save the error message as a static string.  If there is an
+   allocation failure constructing the error message, then convert the error to
+   out of memory. */
+void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) {
+    /* free previously allocated message and clear */
+    if (state->msg != NULL) {
+        if (state->err != Z_MEM_ERROR)
+            free(state->msg);
+        state->msg = NULL;
+    }
+
+    /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */
+    if (err != Z_OK && err != Z_BUF_ERROR)
+        state->x.have = 0;
+
+    /* set error code, and if no message, then done */
+    state->err = err;
+    if (msg == NULL)
+        return;
+
+    /* for an out of memory error, return literal string when requested */
+    if (err == Z_MEM_ERROR)
+        return;
+
+    /* construct error message with path */
+    if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) ==
+            NULL) {
+        state->err = Z_MEM_ERROR;
+        return;
+    }
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3,
+                   "%s%s%s", state->path, ": ", msg);
+#else
+    strcpy(state->msg, state->path);
+    strcat(state->msg, ": ");
+    strcat(state->msg, msg);
+#endif
+}
+
+/* portably return maximum value for an int (when limits.h presumed not
+   available) -- we need to do this to cover cases where 2's complement not
+   used, since C standard permits 1's complement and sign-bit representations,
+   otherwise we could just use ((unsigned)-1) >> 1 */
+unsigned ZLIB_INTERNAL gz_intmax(void) {
+#ifdef INT_MAX
+    return INT_MAX;
+#else
+    unsigned p = 1, q;
+    do {
+        q = p;
+        p <<= 1;
+        p++;
+    } while (p > q);
+    return q >> 1;
+#endif
+}
diff --git a/reg-io/zlib/gzread.c b/reg-io/zlib/gzread.c
new file mode 100644
index 00000000..4168cbc8
--- /dev/null
+++ b/reg-io/zlib/gzread.c
@@ -0,0 +1,602 @@
+/* gzread.c -- zlib functions for reading gzip files
+ * Copyright (C) 2004-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
+   state->fd, and update state->eof, state->err, and state->msg as appropriate.
+   This function needs to loop on read(), since read() is not guaranteed to
+   read the number of bytes requested, depending on the type of descriptor. */
+local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
+                  unsigned *have) {
+    int ret;
+    unsigned get, max = ((unsigned)-1 >> 2) + 1;
+
+    *have = 0;
+    do {
+        get = len - *have;
+        if (get > max)
+            get = max;
+        ret = read(state->fd, buf + *have, get);
+        if (ret <= 0)
+            break;
+        *have += (unsigned)ret;
+    } while (*have < len);
+    if (ret < 0) {
+        gz_error(state, Z_ERRNO, zstrerror());
+        return -1;
+    }
+    if (ret == 0)
+        state->eof = 1;
+    return 0;
+}
+
+/* Load up input buffer and set eof flag if last data loaded -- return -1 on
+   error, 0 otherwise.  Note that the eof flag is set when the end of the input
+   file is reached, even though there may be unused data in the buffer.  Once
+   that data has been used, no more attempts will be made to read the file.
+   If strm->avail_in != 0, then the current data is moved to the beginning of
+   the input buffer, and then the remainder of the buffer is loaded with the
+   available data from the input file. */
+local int gz_avail(gz_statep state) {
+    unsigned got;
+    z_streamp strm = &(state->strm);
+
+    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+    if (state->eof == 0) {
+        if (strm->avail_in) {       /* copy what's there to the start */
+            unsigned char *p = state->in;
+            unsigned const char *q = strm->next_in;
+            unsigned n = strm->avail_in;
+            do {
+                *p++ = *q++;
+            } while (--n);
+        }
+        if (gz_load(state, state->in + strm->avail_in,
+                    state->size - strm->avail_in, &got) == -1)
+            return -1;
+        strm->avail_in += got;
+        strm->next_in = state->in;
+    }
+    return 0;
+}
+
+/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
+   If this is the first time in, allocate required memory.  state->how will be
+   left unchanged if there is no more input data available, will be set to COPY
+   if there is no gzip header and direct copying will be performed, or it will
+   be set to GZIP for decompression.  If direct copying, then leftover input
+   data from the input buffer will be copied to the output buffer.  In that
+   case, all further file reads will be directly to either the output buffer or
+   a user buffer.  If decompressing, the inflate state will be initialized.
+   gz_look() will return 0 on success or -1 on failure. */
+local int gz_look(gz_statep state) {
+    z_streamp strm = &(state->strm);
+
+    /* allocate read buffers and inflate memory */
+    if (state->size == 0) {
+        /* allocate buffers */
+        state->in = (unsigned char *)malloc(state->want);
+        state->out = (unsigned char *)malloc(state->want << 1);
+        if (state->in == NULL || state->out == NULL) {
+            free(state->out);
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        state->size = state->want;
+
+        /* allocate inflate memory */
+        state->strm.zalloc = Z_NULL;
+        state->strm.zfree = Z_NULL;
+        state->strm.opaque = Z_NULL;
+        state->strm.avail_in = 0;
+        state->strm.next_in = Z_NULL;
+        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
+            free(state->out);
+            free(state->in);
+            state->size = 0;
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+    }
+
+    /* get at least the magic bytes in the input buffer */
+    if (strm->avail_in < 2) {
+        if (gz_avail(state) == -1)
+            return -1;
+        if (strm->avail_in == 0)
+            return 0;
+    }
+
+    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
+       a logical dilemma here when considering the case of a partially written
+       gzip file, to wit, if a single 31 byte is written, then we cannot tell
+       whether this is a single-byte file, or just a partially written gzip
+       file -- for here we assume that if a gzip file is being written, then
+       the header will be written in a single operation, so that reading a
+       single byte is sufficient indication that it is not a gzip file) */
+    if (strm->avail_in > 1 &&
+            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
+        inflateReset(strm);
+        state->how = GZIP;
+        state->direct = 0;
+        return 0;
+    }
+
+    /* no gzip header -- if we were decoding gzip before, then this is trailing
+       garbage.  Ignore the trailing garbage and finish. */
+    if (state->direct == 0) {
+        strm->avail_in = 0;
+        state->eof = 1;
+        state->x.have = 0;
+        return 0;
+    }
+
+    /* doing raw i/o, copy any leftover input to output -- this assumes that
+       the output buffer is larger than the input buffer, which also assures
+       space for gzungetc() */
+    state->x.next = state->out;
+    memcpy(state->x.next, strm->next_in, strm->avail_in);
+    state->x.have = strm->avail_in;
+    strm->avail_in = 0;
+    state->how = COPY;
+    state->direct = 1;
+    return 0;
+}
+
+/* Decompress from input to the provided next_out and avail_out in the state.
+   On return, state->x.have and state->x.next point to the just decompressed
+   data.  If the gzip stream completes, state->how is reset to LOOK to look for
+   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
+   on success, -1 on failure. */
+local int gz_decomp(gz_statep state) {
+    int ret = Z_OK;
+    unsigned had;
+    z_streamp strm = &(state->strm);
+
+    /* fill output buffer up to end of deflate stream */
+    had = strm->avail_out;
+    do {
+        /* get more input for inflate() */
+        if (strm->avail_in == 0 && gz_avail(state) == -1)
+            return -1;
+        if (strm->avail_in == 0) {
+            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
+            break;
+        }
+
+        /* decompress and handle errors */
+        ret = inflate(strm, Z_NO_FLUSH);
+        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
+            gz_error(state, Z_STREAM_ERROR,
+                     "internal error: inflate stream corrupt");
+            return -1;
+        }
+        if (ret == Z_MEM_ERROR) {
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
+            gz_error(state, Z_DATA_ERROR,
+                     strm->msg == NULL ? "compressed data error" : strm->msg);
+            return -1;
+        }
+    } while (strm->avail_out && ret != Z_STREAM_END);
+
+    /* update available output */
+    state->x.have = had - strm->avail_out;
+    state->x.next = strm->next_out - state->x.have;
+
+    /* if the gzip stream completed successfully, look for another */
+    if (ret == Z_STREAM_END)
+        state->how = LOOK;
+
+    /* good decompression */
+    return 0;
+}
+
+/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
+   Data is either copied from the input file or decompressed from the input
+   file depending on state->how.  If state->how is LOOK, then a gzip header is
+   looked for to determine whether to copy or decompress.  Returns -1 on error,
+   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
+   end of the input file has been reached and all data has been processed.  */
+local int gz_fetch(gz_statep state) {
+    z_streamp strm = &(state->strm);
+
+    do {
+        switch(state->how) {
+        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
+            if (gz_look(state) == -1)
+                return -1;
+            if (state->how == LOOK)
+                return 0;
+            break;
+        case COPY:      /* -> COPY */
+            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
+                    == -1)
+                return -1;
+            state->x.next = state->out;
+            return 0;
+        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
+            strm->avail_out = state->size << 1;
+            strm->next_out = state->out;
+            if (gz_decomp(state) == -1)
+                return -1;
+        }
+    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
+    return 0;
+}
+
+/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
+local int gz_skip(gz_statep state, z_off64_t len) {
+    unsigned n;
+
+    /* skip over len bytes or reach end-of-file, whichever comes first */
+    while (len)
+        /* skip over whatever is in output buffer */
+        if (state->x.have) {
+            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
+                (unsigned)len : state->x.have;
+            state->x.have -= n;
+            state->x.next += n;
+            state->x.pos += n;
+            len -= n;
+        }
+
+        /* output buffer empty -- return if we're at the end of the input */
+        else if (state->eof && state->strm.avail_in == 0)
+            break;
+
+        /* need more data to skip -- load up output buffer */
+        else {
+            /* get more output, looking for header if required */
+            if (gz_fetch(state) == -1)
+                return -1;
+        }
+    return 0;
+}
+
+/* Read len bytes into buf from file, or less than len up to the end of the
+   input.  Return the number of bytes read.  If zero is returned, either the
+   end of file was reached, or there was an error.  state->err must be
+   consulted in that case to determine which. */
+local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
+    z_size_t got;
+    unsigned n;
+
+    /* if len is zero, avoid unnecessary operations */
+    if (len == 0)
+        return 0;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return 0;
+    }
+
+    /* get len bytes to buf, or less than len if at the end */
+    got = 0;
+    do {
+        /* set n to the maximum amount of len that fits in an unsigned int */
+        n = (unsigned)-1;
+        if (n > len)
+            n = (unsigned)len;
+
+        /* first just try copying data from the output buffer */
+        if (state->x.have) {
+            if (state->x.have < n)
+                n = state->x.have;
+            memcpy(buf, state->x.next, n);
+            state->x.next += n;
+            state->x.have -= n;
+        }
+
+        /* output buffer empty -- return if we're at the end of the input */
+        else if (state->eof && state->strm.avail_in == 0) {
+            state->past = 1;        /* tried to read past end */
+            break;
+        }
+
+        /* need output data -- for small len or new stream load up our output
+           buffer */
+        else if (state->how == LOOK || n < (state->size << 1)) {
+            /* get more output, looking for header if required */
+            if (gz_fetch(state) == -1)
+                return 0;
+            continue;       /* no progress yet -- go back to copy above */
+            /* the copy above assures that we will leave with space in the
+               output buffer, allowing at least one gzungetc() to succeed */
+        }
+
+        /* large len -- read directly into user buffer */
+        else if (state->how == COPY) {      /* read directly */
+            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
+                return 0;
+        }
+
+        /* large len -- decompress directly into user buffer */
+        else {  /* state->how == GZIP */
+            state->strm.avail_out = n;
+            state->strm.next_out = (unsigned char *)buf;
+            if (gz_decomp(state) == -1)
+                return 0;
+            n = state->x.have;
+            state->x.have = 0;
+        }
+
+        /* update progress */
+        len -= n;
+        buf = (char *)buf + n;
+        got += n;
+        state->x.pos += n;
+    } while (len);
+
+    /* return number of bytes read into user buffer */
+    return got;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
+        return -1;
+    }
+
+    /* read len or fewer bytes to buf */
+    len = (unsigned)gz_read(state, buf, len);
+
+    /* check for an error */
+    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+
+    /* return the number of bytes read (this is assured to fit in an int) */
+    return (int)len;
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* read len or fewer bytes to buf, return the number of full items read */
+    return len ? gz_read(state, buf, len) / size : 0;
+}
+
+/* -- see zlib.h -- */
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#else
+#  undef gzgetc
+#endif
+int ZEXPORT gzgetc(gzFile file) {
+    unsigned char buf[1];
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* try output buffer (no need to check for skip request) */
+    if (state->x.have) {
+        state->x.have--;
+        state->x.pos++;
+        return *(state->x.next)++;
+    }
+
+    /* nothing there -- try gz_read() */
+    return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
+}
+
+int ZEXPORT gzgetc_(gzFile file) {
+    return gzgetc(file);
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzungetc(int c, gzFile file) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* in case this was just opened, set up the input buffer */
+    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
+        (void)gz_look(state);
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return -1;
+    }
+
+    /* can't push EOF */
+    if (c < 0)
+        return -1;
+
+    /* if output buffer empty, put byte at end (allows more pushing) */
+    if (state->x.have == 0) {
+        state->x.have = 1;
+        state->x.next = state->out + (state->size << 1) - 1;
+        state->x.next[0] = (unsigned char)c;
+        state->x.pos--;
+        state->past = 0;
+        return c;
+    }
+
+    /* if no room, give up (must have already done a gzungetc()) */
+    if (state->x.have == (state->size << 1)) {
+        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
+        return -1;
+    }
+
+    /* slide output data if needed and insert byte before existing data */
+    if (state->x.next == state->out) {
+        unsigned char *src = state->out + state->x.have;
+        unsigned char *dest = state->out + (state->size << 1);
+        while (src > state->out)
+            *--dest = *--src;
+        state->x.next = dest;
+    }
+    state->x.have++;
+    state->x.next--;
+    state->x.next[0] = (unsigned char)c;
+    state->x.pos--;
+    state->past = 0;
+    return c;
+}
+
+/* -- see zlib.h -- */
+char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
+    unsigned left, n;
+    char *str;
+    unsigned char *eol;
+    gz_statep state;
+
+    /* check parameters and get internal structure */
+    if (file == NULL || buf == NULL || len < 1)
+        return NULL;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return NULL;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return NULL;
+    }
+
+    /* copy output bytes up to new line or len - 1, whichever comes first --
+       append a terminating zero to the string (we don't check for a zero in
+       the contents, let the user worry about that) */
+    str = buf;
+    left = (unsigned)len - 1;
+    if (left) do {
+        /* assure that something is in the output buffer */
+        if (state->x.have == 0 && gz_fetch(state) == -1)
+            return NULL;                /* error */
+        if (state->x.have == 0) {       /* end of file */
+            state->past = 1;            /* read past end */
+            break;                      /* return what we have */
+        }
+
+        /* look for end-of-line in current output buffer */
+        n = state->x.have > left ? left : state->x.have;
+        eol = (unsigned char *)memchr(state->x.next, '\n', n);
+        if (eol != NULL)
+            n = (unsigned)(eol - state->x.next) + 1;
+
+        /* copy through end-of-line, or remainder if not found */
+        memcpy(buf, state->x.next, n);
+        state->x.have -= n;
+        state->x.next += n;
+        state->x.pos += n;
+        left -= n;
+        buf += n;
+    } while (left && eol == NULL);
+
+    /* return terminated string, or if nothing, end of file */
+    if (buf == str)
+        return NULL;
+    buf[0] = 0;
+    return str;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzdirect(gzFile file) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* if the state is not known, but we can find out, then do so (this is
+       mainly for right after a gzopen() or gzdopen()) */
+    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
+        (void)gz_look(state);
+
+    /* return 1 if transparent, 0 if processing a gzip stream */
+    return state->direct;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzclose_r(gzFile file) {
+    int ret, err;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're reading */
+    if (state->mode != GZ_READ)
+        return Z_STREAM_ERROR;
+
+    /* free memory and close file */
+    if (state->size) {
+        inflateEnd(&(state->strm));
+        free(state->out);
+        free(state->in);
+    }
+    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
+    gz_error(state, Z_OK, NULL);
+    free(state->path);
+    ret = close(state->fd);
+    free(state);
+    return ret ? Z_ERRNO : err;
+}
diff --git a/reg-io/zlib/gzwrite.c b/reg-io/zlib/gzwrite.c
new file mode 100644
index 00000000..435b4621
--- /dev/null
+++ b/reg-io/zlib/gzwrite.c
@@ -0,0 +1,631 @@
+/* gzwrite.c -- zlib functions for writing gzip files
+ * Copyright (C) 2004-2019 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* Initialize state for writing a gzip file.  Mark initialization by setting
+   state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
+   success. */
+local int gz_init(gz_statep state) {
+    int ret;
+    z_streamp strm = &(state->strm);
+
+    /* allocate input buffer (double size for gzprintf) */
+    state->in = (unsigned char *)malloc(state->want << 1);
+    if (state->in == NULL) {
+        gz_error(state, Z_MEM_ERROR, "out of memory");
+        return -1;
+    }
+
+    /* only need output buffer and deflate state if compressing */
+    if (!state->direct) {
+        /* allocate output buffer */
+        state->out = (unsigned char *)malloc(state->want);
+        if (state->out == NULL) {
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+
+        /* allocate deflate memory, set up for gzip compression */
+        strm->zalloc = Z_NULL;
+        strm->zfree = Z_NULL;
+        strm->opaque = Z_NULL;
+        ret = deflateInit2(strm, state->level, Z_DEFLATED,
+                           MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy);
+        if (ret != Z_OK) {
+            free(state->out);
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        strm->next_in = NULL;
+    }
+
+    /* mark state as initialized */
+    state->size = state->want;
+
+    /* initialize write buffer if compressing */
+    if (!state->direct) {
+        strm->avail_out = state->size;
+        strm->next_out = state->out;
+        state->x.next = strm->next_out;
+    }
+    return 0;
+}
+
+/* Compress whatever is at avail_in and next_in and write to the output file.
+   Return -1 if there is an error writing to the output file or if gz_init()
+   fails to allocate memory, otherwise 0.  flush is assumed to be a valid
+   deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
+   reset to start a new gzip stream.  If gz->direct is true, then simply write
+   to the output file without compressing, and ignore flush. */
+local int gz_comp(gz_statep state, int flush) {
+    int ret, writ;
+    unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
+    z_streamp strm = &(state->strm);
+
+    /* allocate memory if this is the first time through */
+    if (state->size == 0 && gz_init(state) == -1)
+        return -1;
+
+    /* write directly if requested */
+    if (state->direct) {
+        while (strm->avail_in) {
+            put = strm->avail_in > max ? max : strm->avail_in;
+            writ = write(state->fd, strm->next_in, put);
+            if (writ < 0) {
+                gz_error(state, Z_ERRNO, zstrerror());
+                return -1;
+            }
+            strm->avail_in -= (unsigned)writ;
+            strm->next_in += writ;
+        }
+        return 0;
+    }
+
+    /* check for a pending reset */
+    if (state->reset) {
+        /* don't start a new gzip member unless there is data to write */
+        if (strm->avail_in == 0)
+            return 0;
+        deflateReset(strm);
+        state->reset = 0;
+    }
+
+    /* run deflate() on provided input until it produces no more output */
+    ret = Z_OK;
+    do {
+        /* write out current buffer contents if full, or if flushing, but if
+           doing Z_FINISH then don't write until we get to Z_STREAM_END */
+        if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
+            (flush != Z_FINISH || ret == Z_STREAM_END))) {
+            while (strm->next_out > state->x.next) {
+                put = strm->next_out - state->x.next > (int)max ? max :
+                      (unsigned)(strm->next_out - state->x.next);
+                writ = write(state->fd, state->x.next, put);
+                if (writ < 0) {
+                    gz_error(state, Z_ERRNO, zstrerror());
+                    return -1;
+                }
+                state->x.next += writ;
+            }
+            if (strm->avail_out == 0) {
+                strm->avail_out = state->size;
+                strm->next_out = state->out;
+                state->x.next = state->out;
+            }
+        }
+
+        /* compress */
+        have = strm->avail_out;
+        ret = deflate(strm, flush);
+        if (ret == Z_STREAM_ERROR) {
+            gz_error(state, Z_STREAM_ERROR,
+                      "internal error: deflate stream corrupt");
+            return -1;
+        }
+        have -= strm->avail_out;
+    } while (have);
+
+    /* if that completed a deflate stream, allow another to start */
+    if (flush == Z_FINISH)
+        state->reset = 1;
+
+    /* all done, no errors */
+    return 0;
+}
+
+/* Compress len zeros to output.  Return -1 on a write error or memory
+   allocation failure by gz_comp(), or 0 on success. */
+local int gz_zero(gz_statep state, z_off64_t len) {
+    int first;
+    unsigned n;
+    z_streamp strm = &(state->strm);
+
+    /* consume whatever's left in the input buffer */
+    if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
+        return -1;
+
+    /* compress len zeros (len guaranteed > 0) */
+    first = 1;
+    while (len) {
+        n = GT_OFF(state->size) || (z_off64_t)state->size > len ?
+            (unsigned)len : state->size;
+        if (first) {
+            memset(state->in, 0, n);
+            first = 0;
+        }
+        strm->avail_in = n;
+        strm->next_in = state->in;
+        state->x.pos += n;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return -1;
+        len -= n;
+    }
+    return 0;
+}
+
+/* Write len bytes from buf to file.  Return the number of bytes written.  If
+   the returned value is less than len, then there was an error. */
+local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) {
+    z_size_t put = len;
+
+    /* if len is zero, avoid unnecessary operations */
+    if (len == 0)
+        return 0;
+
+    /* allocate memory if this is the first time through */
+    if (state->size == 0 && gz_init(state) == -1)
+        return 0;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return 0;
+    }
+
+    /* for small len, copy to input buffer, otherwise compress directly */
+    if (len < state->size) {
+        /* copy to input buffer, compress when full */
+        do {
+            unsigned have, copy;
+
+            if (state->strm.avail_in == 0)
+                state->strm.next_in = state->in;
+            have = (unsigned)((state->strm.next_in + state->strm.avail_in) -
+                              state->in);
+            copy = state->size - have;
+            if (copy > len)
+                copy = (unsigned)len;
+            memcpy(state->in + have, buf, copy);
+            state->strm.avail_in += copy;
+            state->x.pos += copy;
+            buf = (const char *)buf + copy;
+            len -= copy;
+            if (len && gz_comp(state, Z_NO_FLUSH) == -1)
+                return 0;
+        } while (len);
+    }
+    else {
+        /* consume whatever's left in the input buffer */
+        if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
+            return 0;
+
+        /* directly compress user buffer to file */
+        state->strm.next_in = (z_const Bytef *)buf;
+        do {
+            unsigned n = (unsigned)-1;
+            if (n > len)
+                n = (unsigned)len;
+            state->strm.avail_in = n;
+            state->x.pos += n;
+            if (gz_comp(state, Z_NO_FLUSH) == -1)
+                return 0;
+            len -= n;
+        } while (len);
+    }
+
+    /* input was all buffered or compressed */
+    return put;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return 0;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
+        return 0;
+    }
+
+    /* write len bytes from buf (the return value will fit in an int) */
+    return (int)gz_write(state, buf, len);
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems,
+                          gzFile file) {
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* write len bytes to buf, return the number of full items written */
+    return len ? gz_write(state, buf, len) / size : 0;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzputc(gzFile file, int c) {
+    unsigned have;
+    unsigned char buf[1];
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return -1;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return -1;
+    }
+
+    /* try writing to input buffer for speed (state->size == 0 if buffer not
+       initialized) */
+    if (state->size) {
+        if (strm->avail_in == 0)
+            strm->next_in = state->in;
+        have = (unsigned)((strm->next_in + strm->avail_in) - state->in);
+        if (have < state->size) {
+            state->in[have] = (unsigned char)c;
+            strm->avail_in++;
+            state->x.pos++;
+            return c & 0xff;
+        }
+    }
+
+    /* no room in buffer or not initialized, use gz_write() */
+    buf[0] = (unsigned char)c;
+    if (gz_write(state, buf, 1) != 1)
+        return -1;
+    return c & 0xff;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzputs(gzFile file, const char *s) {
+    z_size_t len, put;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return -1;
+
+    /* write string */
+    len = strlen(s);
+    if ((int)len < 0 || (unsigned)len != len) {
+        gz_error(state, Z_STREAM_ERROR, "string length does not fit in int");
+        return -1;
+    }
+    put = gz_write(state, s, len);
+    return put < len ? -1 : (int)len;
+}
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#include <stdarg.h>
+
+/* -- see zlib.h -- */
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) {
+    int len;
+    unsigned left;
+    char *next;
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* make sure we have some buffer space */
+    if (state->size == 0 && gz_init(state) == -1)
+        return state->err;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state->in;
+    next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in);
+    next[state->size - 1] = 0;
+#ifdef NO_vsnprintf
+#  ifdef HAS_vsprintf_void
+    (void)vsprintf(next, format, va);
+    for (len = 0; len < state->size; len++)
+        if (next[len] == 0) break;
+#  else
+    len = vsprintf(next, format, va);
+#  endif
+#else
+#  ifdef HAS_vsnprintf_void
+    (void)vsnprintf(next, state->size, format, va);
+    len = strlen(next);
+#  else
+    len = vsnprintf(next, state->size, format, va);
+#  endif
+#endif
+
+    /* check that printf() results fit in buffer */
+    if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0)
+        return 0;
+
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += (unsigned)len;
+    state->x.pos += len;
+    if (strm->avail_in >= state->size) {
+        left = strm->avail_in - state->size;
+        strm->avail_in = state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state->err;
+        memmove(state->in, state->in + state->size, left);
+        strm->next_in = state->in;
+        strm->avail_in = left;
+    }
+    return len;
+}
+
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) {
+    va_list va;
+    int ret;
+
+    va_start(va, format);
+    ret = gzvprintf(file, format, va);
+    va_end(va);
+    return ret;
+}
+
+#else /* !STDC && !Z_HAVE_STDARG_H */
+
+/* -- see zlib.h -- */
+int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3,
+                       int a4, int a5, int a6, int a7, int a8, int a9, int a10,
+                       int a11, int a12, int a13, int a14, int a15, int a16,
+                       int a17, int a18, int a19, int a20) {
+    unsigned len, left;
+    char *next;
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that can really pass pointer in ints */
+    if (sizeof(int) != sizeof(void *))
+        return Z_STREAM_ERROR;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* make sure we have some buffer space */
+    if (state->size == 0 && gz_init(state) == -1)
+        return state->error;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->error;
+    }
+
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state->in;
+    next = (char *)(strm->next_in + strm->avail_in);
+    next[state->size - 1] = 0;
+#ifdef NO_snprintf
+#  ifdef HAS_sprintf_void
+    sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
+            a13, a14, a15, a16, a17, a18, a19, a20);
+    for (len = 0; len < size; len++)
+        if (next[len] == 0)
+            break;
+#  else
+    len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
+                  a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#else
+#  ifdef HAS_snprintf_void
+    snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9,
+             a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    len = strlen(next);
+#  else
+    len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8,
+                   a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#endif
+
+    /* check that printf() results fit in buffer */
+    if (len == 0 || len >= state->size || next[state->size - 1] != 0)
+        return 0;
+
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += len;
+    state->x.pos += len;
+    if (strm->avail_in >= state->size) {
+        left = strm->avail_in - state->size;
+        strm->avail_in = state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state->err;
+        memmove(state->in, state->in + state->size, left);
+        strm->next_in = state->in;
+        strm->avail_in = left;
+    }
+    return (int)len;
+}
+
+#endif
+
+/* -- see zlib.h -- */
+int ZEXPORT gzflush(gzFile file, int flush) {
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* check flush parameter */
+    if (flush < 0 || flush > Z_FINISH)
+        return Z_STREAM_ERROR;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* compress remaining data with requested flush */
+    (void)gz_comp(state, flush);
+    return state->err;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzsetparams(gzFile file, int level, int strategy) {
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct)
+        return Z_STREAM_ERROR;
+
+    /* if no change is requested, then do nothing */
+    if (level == state->level && strategy == state->strategy)
+        return Z_OK;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* change compression parameters for subsequent input */
+    if (state->size) {
+        /* flush previous input with previous parameters before changing */
+        if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1)
+            return state->err;
+        deflateParams(strm, level, strategy);
+    }
+    state->level = level;
+    state->strategy = strategy;
+    return Z_OK;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzclose_w(gzFile file) {
+    int ret = Z_OK;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're writing */
+    if (state->mode != GZ_WRITE)
+        return Z_STREAM_ERROR;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            ret = state->err;
+    }
+
+    /* flush, free memory, and close file */
+    if (gz_comp(state, Z_FINISH) == -1)
+        ret = state->err;
+    if (state->size) {
+        if (!state->direct) {
+            (void)deflateEnd(&(state->strm));
+            free(state->out);
+        }
+        free(state->in);
+    }
+    gz_error(state, Z_OK, NULL);
+    free(state->path);
+    if (close(state->fd) == -1)
+        ret = Z_ERRNO;
+    free(state);
+    return ret;
+}
diff --git a/reg-io/zlib/infback.c b/reg-io/zlib/infback.c
index 455dbc9e..e7b25b30 100644
--- a/reg-io/zlib/infback.c
+++ b/reg-io/zlib/infback.c
@@ -1,5 +1,5 @@
 /* infback.c -- inflate using a call-back interface
- * Copyright (C) 1995-2005 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -15,9 +15,6 @@
 #include "inflate.h"
 #include "inffast.h"
 
-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-
 /*
    strm provides memory allocation functions in zalloc and zfree, or
    Z_NULL to use the library memory allocation functions.
@@ -25,13 +22,9 @@ local void fixedtables OF((struct inflate_state FAR *state));
    windowBits is in the range 8..15, and window is a user-supplied
    window and output buffer that is 2**windowBits bytes.
  */
-int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
-z_streamp strm;
-int windowBits;
-unsigned char FAR *window;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                             unsigned char FAR *window, const char *version,
+                             int stream_size) {
     struct inflate_state FAR *state;
 
     if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
@@ -42,21 +35,31 @@ int stream_size;
         return Z_STREAM_ERROR;
     strm->msg = Z_NULL;                 /* in case we return an error */
     if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
         strm->zalloc = zcalloc;
         strm->opaque = (voidpf)0;
+#endif
     }
-    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+    strm->zfree = zcfree;
+#endif
     state = (struct inflate_state FAR *)ZALLOC(strm, 1,
                                                sizeof(struct inflate_state));
     if (state == Z_NULL) return Z_MEM_ERROR;
     Tracev((stderr, "inflate: allocated\n"));
     strm->state = (struct internal_state FAR *)state;
     state->dmax = 32768U;
-    state->wbits = windowBits;
+    state->wbits = (uInt)windowBits;
     state->wsize = 1U << windowBits;
     state->window = window;
-    state->write = 0;
+    state->wnext = 0;
     state->whave = 0;
+    state->sane = 1;
     return Z_OK;
 }
 
@@ -70,9 +73,7 @@ int stream_size;
    used for threaded applications, since the rewriting of the tables and virgin
    may not be thread-safe.
  */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
     static int virgin = 1;
     static code *lenfix, *distfix;
@@ -238,22 +239,17 @@ struct inflate_state FAR *state;
    inflateBack() can also return Z_STREAM_ERROR if the input parameters
    are not correct, i.e. strm is Z_NULL or the state was not initialized.
  */
-int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
-z_streamp strm;
-in_func in;
-void FAR *in_desc;
-out_func out;
-void FAR *out_desc;
-{
+int ZEXPORT inflateBack(z_streamp strm, in_func in, void FAR *in_desc,
+                        out_func out, void FAR *out_desc) {
     struct inflate_state FAR *state;
-    unsigned char FAR *next;    /* next input */
+    z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
     unsigned have, left;        /* available input and output */
     unsigned long hold;         /* bit buffer */
     unsigned bits;              /* bits in bit buffer */
     unsigned copy;              /* number of stored or match bytes to copy */
     unsigned char FAR *from;    /* where to copy match bytes from */
-    code this;                  /* current decoding table entry */
+    code here;                  /* current decoding table entry */
     code last;                  /* parent table entry */
     unsigned len;               /* length to copy for repeats, bits to drop */
     int ret;                    /* return code */
@@ -389,19 +385,18 @@ void FAR *out_desc;
             state->have = 0;
             while (state->have < state->nlen + state->ndist) {
                 for (;;) {
-                    this = state->lencode[BITS(state->lenbits)];
-                    if ((unsigned)(this.bits) <= bits) break;
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
                     PULLBYTE();
                 }
-                if (this.val < 16) {
-                    NEEDBITS(this.bits);
-                    DROPBITS(this.bits);
-                    state->lens[state->have++] = this.val;
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
                 }
                 else {
-                    if (this.val == 16) {
-                        NEEDBITS(this.bits + 2);
-                        DROPBITS(this.bits);
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
                         if (state->have == 0) {
                             strm->msg = (char *)"invalid bit length repeat";
                             state->mode = BAD;
@@ -411,16 +406,16 @@ void FAR *out_desc;
                         copy = 3 + BITS(2);
                         DROPBITS(2);
                     }
-                    else if (this.val == 17) {
-                        NEEDBITS(this.bits + 3);
-                        DROPBITS(this.bits);
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
                         len = 0;
                         copy = 3 + BITS(3);
                         DROPBITS(3);
                     }
                     else {
-                        NEEDBITS(this.bits + 7);
-                        DROPBITS(this.bits);
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
                         len = 0;
                         copy = 11 + BITS(7);
                         DROPBITS(7);
@@ -438,7 +433,16 @@ void FAR *out_desc;
             /* handle error breaks in while */
             if (state->mode == BAD) break;
 
-            /* build code tables */
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
             state->next = state->codes;
             state->lencode = (code const FAR *)(state->next);
             state->lenbits = 9;
@@ -460,6 +464,7 @@ void FAR *out_desc;
             }
             Tracev((stderr, "inflate:       codes ok\n"));
             state->mode = LEN;
+                /* fallthrough */
 
         case LEN:
             /* use inflate_fast() if we have enough input and output */
@@ -474,28 +479,28 @@ void FAR *out_desc;
 
             /* get a literal, length, or end-of-block code */
             for (;;) {
-                this = state->lencode[BITS(state->lenbits)];
-                if ((unsigned)(this.bits) <= bits) break;
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
                 PULLBYTE();
             }
-            if (this.op && (this.op & 0xf0) == 0) {
-                last = this;
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
                 for (;;) {
-                    this = state->lencode[last.val +
+                    here = state->lencode[last.val +
                             (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
                     PULLBYTE();
                 }
                 DROPBITS(last.bits);
             }
-            DROPBITS(this.bits);
-            state->length = (unsigned)this.val;
+            DROPBITS(here.bits);
+            state->length = (unsigned)here.val;
 
             /* process literal */
-            if (this.op == 0) {
-                Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+            if (here.op == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
                         "inflate:         literal '%c'\n" :
-                        "inflate:         literal 0x%02x\n", this.val));
+                        "inflate:         literal 0x%02x\n", here.val));
                 ROOM();
                 *put++ = (unsigned char)(state->length);
                 left--;
@@ -504,21 +509,21 @@ void FAR *out_desc;
             }
 
             /* process end of block */
-            if (this.op & 32) {
+            if (here.op & 32) {
                 Tracevv((stderr, "inflate:         end of block\n"));
                 state->mode = TYPE;
                 break;
             }
 
             /* invalid code */
-            if (this.op & 64) {
+            if (here.op & 64) {
                 strm->msg = (char *)"invalid literal/length code";
                 state->mode = BAD;
                 break;
             }
 
             /* length code -- get extra bits, if any */
-            state->extra = (unsigned)(this.op) & 15;
+            state->extra = (unsigned)(here.op) & 15;
             if (state->extra != 0) {
                 NEEDBITS(state->extra);
                 state->length += BITS(state->extra);
@@ -528,30 +533,30 @@ void FAR *out_desc;
 
             /* get distance code */
             for (;;) {
-                this = state->distcode[BITS(state->distbits)];
-                if ((unsigned)(this.bits) <= bits) break;
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
                 PULLBYTE();
             }
-            if ((this.op & 0xf0) == 0) {
-                last = this;
+            if ((here.op & 0xf0) == 0) {
+                last = here;
                 for (;;) {
-                    this = state->distcode[last.val +
+                    here = state->distcode[last.val +
                             (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
                     PULLBYTE();
                 }
                 DROPBITS(last.bits);
             }
-            DROPBITS(this.bits);
-            if (this.op & 64) {
+            DROPBITS(here.bits);
+            if (here.op & 64) {
                 strm->msg = (char *)"invalid distance code";
                 state->mode = BAD;
                 break;
             }
-            state->offset = (unsigned)this.val;
+            state->offset = (unsigned)here.val;
 
             /* get distance extra bits, if any */
-            state->extra = (unsigned)(this.op) & 15;
+            state->extra = (unsigned)(here.op) & 15;
             if (state->extra != 0) {
                 NEEDBITS(state->extra);
                 state->offset += BITS(state->extra);
@@ -587,33 +592,33 @@ void FAR *out_desc;
             break;
 
         case DONE:
-            /* inflate stream terminated properly -- write leftover output */
+            /* inflate stream terminated properly */
             ret = Z_STREAM_END;
-            if (left < state->wsize) {
-                if (out(out_desc, state->window, state->wsize - left))
-                    ret = Z_BUF_ERROR;
-            }
             goto inf_leave;
 
         case BAD:
             ret = Z_DATA_ERROR;
             goto inf_leave;
 
-        default:                /* can't happen, but makes compilers happy */
+        default:
+            /* can't happen, but makes compilers happy */
             ret = Z_STREAM_ERROR;
             goto inf_leave;
         }
 
-    /* Return unused input */
+    /* Write leftover output and return unused input */
   inf_leave:
+    if (left < state->wsize) {
+        if (out(out_desc, state->window, state->wsize - left) &&
+            ret == Z_STREAM_END)
+            ret = Z_BUF_ERROR;
+    }
     strm->next_in = next;
     strm->avail_in = have;
     return ret;
 }
 
-int ZEXPORT inflateBackEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateBackEnd(z_streamp strm) {
     if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
         return Z_STREAM_ERROR;
     ZFREE(strm, strm->state);
diff --git a/reg-io/zlib/inffast.c b/reg-io/zlib/inffast.c
index bbee92ed..9354676e 100644
--- a/reg-io/zlib/inffast.c
+++ b/reg-io/zlib/inffast.c
@@ -1,5 +1,5 @@
 /* inffast.c -- fast decoding
- * Copyright (C) 1995-2004 Mark Adler
+ * Copyright (C) 1995-2017 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -8,26 +8,9 @@
 #include "inflate.h"
 #include "inffast.h"
 
-#ifndef ASMINF
-
-/* Allow machine dependent optimization for post-increment or pre-increment.
-   Based on testing to date,
-   Pre-increment preferred for:
-   - PowerPC G3 (Adler)
-   - MIPS R5000 (Randers-Pehrson)
-   Post-increment preferred for:
-   - none
-   No measurable difference:
-   - Pentium III (Anderson)
-   - M68060 (Nikl)
- */
-#ifdef POSTINC
-#  define OFF 0
-#  define PUP(a) *(a)++
+#ifdef ASMINF
+#  pragma message("Assembler code may have bugs -- use at your own risk")
 #else
-#  define OFF 1
-#  define PUP(a) *++(a)
-#endif
 
 /*
    Decode literal, length, and distance codes and write out the resulting
@@ -64,13 +47,10 @@
       requires strm->avail_out >= 258 for each loop to avoid checking for
       output space.
  */
-void inflate_fast(strm, start)
-z_streamp strm;
-unsigned start;         /* inflate()'s starting value for strm->avail_out */
-{
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) {
     struct inflate_state FAR *state;
-    unsigned char FAR *in;      /* local strm->next_in */
-    unsigned char FAR *last;    /* while in < last, enough input available */
+    z_const unsigned char FAR *in;      /* local strm->next_in */
+    z_const unsigned char FAR *last;    /* have enough input while in < last */
     unsigned char FAR *out;     /* local strm->next_out */
     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
     unsigned char FAR *end;     /* while out < end, enough space available */
@@ -79,7 +59,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 #endif
     unsigned wsize;             /* window size or zero if not using window */
     unsigned whave;             /* valid bytes in the window */
-    unsigned write;             /* window write index */
+    unsigned wnext;             /* window write index */
     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
     unsigned long hold;         /* local strm->hold */
     unsigned bits;              /* local strm->bits */
@@ -87,7 +67,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
     code const FAR *dcode;      /* local strm->distcode */
     unsigned lmask;             /* mask for first level of length codes */
     unsigned dmask;             /* mask for first level of distance codes */
-    code this;                  /* retrieved table entry */
+    code const *here;           /* retrieved table entry */
     unsigned op;                /* code bits, operation, extra bits, or */
                                 /*  window position, window bytes to copy */
     unsigned len;               /* match length, unused bytes */
@@ -96,9 +76,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 
     /* copy state to local variables */
     state = (struct inflate_state FAR *)strm->state;
-    in = strm->next_in - OFF;
+    in = strm->next_in;
     last = in + (strm->avail_in - 5);
-    out = strm->next_out - OFF;
+    out = strm->next_out;
     beg = out - (start - strm->avail_out);
     end = out + (strm->avail_out - 257);
 #ifdef INFLATE_STRICT
@@ -106,7 +86,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 #endif
     wsize = state->wsize;
     whave = state->whave;
-    write = state->write;
+    wnext = state->wnext;
     window = state->window;
     hold = state->hold;
     bits = state->bits;
@@ -119,29 +99,29 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
        input data or output space */
     do {
         if (bits < 15) {
-            hold += (unsigned long)(PUP(in)) << bits;
+            hold += (unsigned long)(*in++) << bits;
             bits += 8;
-            hold += (unsigned long)(PUP(in)) << bits;
+            hold += (unsigned long)(*in++) << bits;
             bits += 8;
         }
-        this = lcode[hold & lmask];
+        here = lcode + (hold & lmask);
       dolen:
-        op = (unsigned)(this.bits);
+        op = (unsigned)(here->bits);
         hold >>= op;
         bits -= op;
-        op = (unsigned)(this.op);
+        op = (unsigned)(here->op);
         if (op == 0) {                          /* literal */
-            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
                     "inflate:         literal '%c'\n" :
-                    "inflate:         literal 0x%02x\n", this.val));
-            PUP(out) = (unsigned char)(this.val);
+                    "inflate:         literal 0x%02x\n", here->val));
+            *out++ = (unsigned char)(here->val);
         }
         else if (op & 16) {                     /* length base */
-            len = (unsigned)(this.val);
+            len = (unsigned)(here->val);
             op &= 15;                           /* number of extra bits */
             if (op) {
                 if (bits < op) {
-                    hold += (unsigned long)(PUP(in)) << bits;
+                    hold += (unsigned long)(*in++) << bits;
                     bits += 8;
                 }
                 len += (unsigned)hold & ((1U << op) - 1);
@@ -150,25 +130,25 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
             }
             Tracevv((stderr, "inflate:         length %u\n", len));
             if (bits < 15) {
-                hold += (unsigned long)(PUP(in)) << bits;
+                hold += (unsigned long)(*in++) << bits;
                 bits += 8;
-                hold += (unsigned long)(PUP(in)) << bits;
+                hold += (unsigned long)(*in++) << bits;
                 bits += 8;
             }
-            this = dcode[hold & dmask];
+            here = dcode + (hold & dmask);
           dodist:
-            op = (unsigned)(this.bits);
+            op = (unsigned)(here->bits);
             hold >>= op;
             bits -= op;
-            op = (unsigned)(this.op);
+            op = (unsigned)(here->op);
             if (op & 16) {                      /* distance base */
-                dist = (unsigned)(this.val);
+                dist = (unsigned)(here->val);
                 op &= 15;                       /* number of extra bits */
                 if (bits < op) {
-                    hold += (unsigned long)(PUP(in)) << bits;
+                    hold += (unsigned long)(*in++) << bits;
                     bits += 8;
                     if (bits < op) {
-                        hold += (unsigned long)(PUP(in)) << bits;
+                        hold += (unsigned long)(*in++) << bits;
                         bits += 8;
                     }
                 }
@@ -187,79 +167,101 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                 if (dist > op) {                /* see if copy from window */
                     op = dist - op;             /* distance back in window */
                     if (op > whave) {
-                        strm->msg = (char *)"invalid distance too far back";
-                        state->mode = BAD;
-                        break;
+                        if (state->sane) {
+                            strm->msg =
+                                (char *)"invalid distance too far back";
+                            state->mode = BAD;
+                            break;
+                        }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                        if (len <= op - whave) {
+                            do {
+                                *out++ = 0;
+                            } while (--len);
+                            continue;
+                        }
+                        len -= op - whave;
+                        do {
+                            *out++ = 0;
+                        } while (--op > whave);
+                        if (op == 0) {
+                            from = out - dist;
+                            do {
+                                *out++ = *from++;
+                            } while (--len);
+                            continue;
+                        }
+#endif
                     }
-                    from = window - OFF;
-                    if (write == 0) {           /* very common case */
+                    from = window;
+                    if (wnext == 0) {           /* very common case */
                         from += wsize - op;
                         if (op < len) {         /* some from window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
                             from = out - dist;  /* rest from output */
                         }
                     }
-                    else if (write < op) {      /* wrap around window */
-                        from += wsize + write - op;
-                        op -= write;
+                    else if (wnext < op) {      /* wrap around window */
+                        from += wsize + wnext - op;
+                        op -= wnext;
                         if (op < len) {         /* some from end of window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
-                            from = window - OFF;
-                            if (write < len) {  /* some from start of window */
-                                op = write;
+                            from = window;
+                            if (wnext < len) {  /* some from start of window */
+                                op = wnext;
                                 len -= op;
                                 do {
-                                    PUP(out) = PUP(from);
+                                    *out++ = *from++;
                                 } while (--op);
                                 from = out - dist;      /* rest from output */
                             }
                         }
                     }
                     else {                      /* contiguous in window */
-                        from += write - op;
+                        from += wnext - op;
                         if (op < len) {         /* some from window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
                             from = out - dist;  /* rest from output */
                         }
                     }
                     while (len > 2) {
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
                         len -= 3;
                     }
                     if (len) {
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
                         if (len > 1)
-                            PUP(out) = PUP(from);
+                            *out++ = *from++;
                     }
                 }
                 else {
                     from = out - dist;          /* copy direct from output */
                     do {                        /* minimum length is three */
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
                         len -= 3;
                     } while (len > 2);
                     if (len) {
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
                         if (len > 1)
-                            PUP(out) = PUP(from);
+                            *out++ = *from++;
                     }
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
-                this = dcode[this.val + (hold & ((1U << op) - 1))];
+                here = dcode + here->val + (hold & ((1U << op) - 1));
                 goto dodist;
             }
             else {
@@ -269,7 +271,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
             }
         }
         else if ((op & 64) == 0) {              /* 2nd level length code */
-            this = lcode[this.val + (hold & ((1U << op) - 1))];
+            here = lcode + here->val + (hold & ((1U << op) - 1));
             goto dolen;
         }
         else if (op & 32) {                     /* end-of-block */
@@ -291,8 +293,8 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
     hold &= (1U << bits) - 1;
 
     /* update state and return */
-    strm->next_in = in + OFF;
-    strm->next_out = out + OFF;
+    strm->next_in = in;
+    strm->next_out = out;
     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
     strm->avail_out = (unsigned)(out < end ?
                                  257 + (end - out) : 257 - (out - end));
@@ -305,7 +307,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
    - Using bit fields for code structure
    - Different op definition to avoid & for extra bits (do & for table bits)
-   - Three separate decoding do-loops for direct, window, and write == 0
+   - Three separate decoding do-loops for direct, window, and wnext == 0
    - Special case for distance > 1 copies to do overlapped load and store copy
    - Explicit branch predictions (based on measured branch probabilities)
    - Deferring match copy and interspersed it with decoding subsequent codes
diff --git a/reg-io/zlib/inffast.h b/reg-io/zlib/inffast.h
index 1e88d2d9..49c6d156 100644
--- a/reg-io/zlib/inffast.h
+++ b/reg-io/zlib/inffast.h
@@ -1,5 +1,5 @@
 /* inffast.h -- header to use inffast.c
- * Copyright (C) 1995-2003 Mark Adler
+ * Copyright (C) 1995-2003, 2010 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -8,4 +8,4 @@
    subject to change. Applications should only use zlib.h.
  */
 
-void inflate_fast OF((z_streamp strm, unsigned start));
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start);
diff --git a/reg-io/zlib/inffixed.h b/reg-io/zlib/inffixed.h
index ea0a1246..d6283277 100644
--- a/reg-io/zlib/inffixed.h
+++ b/reg-io/zlib/inffixed.h
@@ -1,96 +1,94 @@
-/* inffixed.h -- table for decoding fixed codes
- * Generated automatically by makefixed().
- */
+    /* inffixed.h -- table for decoding fixed codes
+     * Generated automatically by makefixed().
+     */
 
-/* WARNING: this file should *not* be used by applications. It
-   is part of the implementation of the compression library and
-   is subject to change. Applications should only use zlib.h.
- */
+    /* WARNING: this file should *not* be used by applications.
+       It is part of the implementation of this library and is
+       subject to change. Applications should only use zlib.h.
+     */
 
-static const code lenfix[512] =
-{
-   {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
-   {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
-   {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
-   {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
-   {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
-   {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
-   {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
-   {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
-   {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
-   {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
-   {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
-   {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
-   {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
-   {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
-   {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
-   {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
-   {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
-   {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
-   {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
-   {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
-   {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
-   {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
-   {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
-   {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
-   {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
-   {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
-   {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
-   {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
-   {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
-   {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
-   {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
-   {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
-   {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
-   {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
-   {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
-   {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
-   {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
-   {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
-   {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
-   {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
-   {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
-   {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
-   {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
-   {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
-   {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
-   {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
-   {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
-   {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
-   {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
-   {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
-   {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
-   {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
-   {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
-   {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
-   {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
-   {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
-   {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
-   {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
-   {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
-   {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
-   {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
-   {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
-   {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
-   {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
-   {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
-   {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
-   {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
-   {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
-   {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
-   {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
-   {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
-   {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
-   {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
-   {0,9,255}
-};
+    static const code lenfix[512] = {
+        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+        {0,9,255}
+    };
 
-static const code distfix[32] =
-{
-   {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
-   {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
-   {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
-   {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
-   {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
-   {22,5,193},{64,5,0}
-};
+    static const code distfix[32] = {
+        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+        {22,5,193},{64,5,0}
+    };
diff --git a/reg-io/zlib/inflate.c b/reg-io/zlib/inflate.c
index 792fdee8..94ecff01 100644
--- a/reg-io/zlib/inflate.c
+++ b/reg-io/zlib/inflate.c
@@ -1,5 +1,5 @@
 /* inflate.c -- zlib decompression
- * Copyright (C) 1995-2005 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -45,7 +45,7 @@
  * - Rearrange window copies in inflate_fast() for speed and simplification
  * - Unroll last copy for window match in inflate_fast()
  * - Use local copies of window variables in inflate_fast() for speed
- * - Pull out common write == 0 case for speed in inflate_fast()
+ * - Pull out common wnext == 0 case for speed in inflate_fast()
  * - Make op and len in inflate_fast() unsigned for consistency
  * - Add FAR to lcode and dcode declarations in inflate_fast()
  * - Simplified bad distance check in inflate_fast()
@@ -91,62 +91,93 @@
 #  endif
 #endif
 
-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, unsigned out));
-#ifdef BUILDFIXED
-   void makefixed OF((void));
-#endif
-local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf,
-                              unsigned len));
+local int inflateStateCheck(z_streamp strm) {
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state == Z_NULL || state->strm != strm ||
+        state->mode < HEAD || state->mode > SYNC)
+        return 1;
+    return 0;
+}
 
-int ZEXPORT inflateReset(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateResetKeep(z_streamp strm) {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     strm->total_in = strm->total_out = state->total = 0;
     strm->msg = Z_NULL;
-    strm->adler = 1;        /* to support ill-conceived Java test suite */
+    if (state->wrap)        /* to support ill-conceived Java test suite */
+        strm->adler = state->wrap & 1;
     state->mode = HEAD;
     state->last = 0;
     state->havedict = 0;
+    state->flags = -1;
     state->dmax = 32768U;
     state->head = Z_NULL;
-    state->wsize = 0;
-    state->whave = 0;
-    state->write = 0;
     state->hold = 0;
     state->bits = 0;
     state->lencode = state->distcode = state->next = state->codes;
+    state->sane = 1;
+    state->back = -1;
     Tracev((stderr, "inflate: reset\n"));
     return Z_OK;
 }
 
-int ZEXPORT inflatePrime(strm, bits, value)
-z_streamp strm;
-int bits;
-int value;
-{
+int ZEXPORT inflateReset(z_streamp strm) {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
-    value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
-    return Z_OK;
+    state->wsize = 0;
+    state->whave = 0;
+    state->wnext = 0;
+    return inflateResetKeep(strm);
 }
 
-int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
-z_streamp strm;
-int windowBits;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateReset2(z_streamp strm, int windowBits) {
+    int wrap;
+    struct inflate_state FAR *state;
+
+    /* get the state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* extract wrap request from windowBits parameter */
+    if (windowBits < 0) {
+        if (windowBits < -15)
+            return Z_STREAM_ERROR;
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+    else {
+        wrap = (windowBits >> 4) + 5;
+#ifdef GUNZIP
+        if (windowBits < 48)
+            windowBits &= 15;
+#endif
+    }
+
+    /* set number of window bits, free window if different */
+    if (windowBits && (windowBits < 8 || windowBits > 15))
+        return Z_STREAM_ERROR;
+    if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
+        ZFREE(strm, state->window);
+        state->window = Z_NULL;
+    }
+
+    /* update state and reset the rest of it */
+    state->wrap = wrap;
+    state->wbits = (unsigned)windowBits;
+    return inflateReset(strm);
+}
+
+int ZEXPORT inflateInit2_(z_streamp strm, int windowBits,
+                          const char *version, int stream_size) {
+    int ret;
     struct inflate_state FAR *state;
 
     if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
@@ -155,43 +186,59 @@ int stream_size;
     if (strm == Z_NULL) return Z_STREAM_ERROR;
     strm->msg = Z_NULL;                 /* in case we return an error */
     if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
         strm->zalloc = zcalloc;
         strm->opaque = (voidpf)0;
+#endif
     }
-    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
     state = (struct inflate_state FAR *)
             ZALLOC(strm, 1, sizeof(struct inflate_state));
     if (state == Z_NULL) return Z_MEM_ERROR;
     Tracev((stderr, "inflate: allocated\n"));
     strm->state = (struct internal_state FAR *)state;
-    if (windowBits < 0) {
-        state->wrap = 0;
-        windowBits = -windowBits;
-    }
-    else {
-        state->wrap = (windowBits >> 4) + 1;
-#ifdef GUNZIP
-        if (windowBits < 48) windowBits &= 15;
-#endif
-    }
-    if (windowBits < 8 || windowBits > 15) {
+    state->strm = strm;
+    state->window = Z_NULL;
+    state->mode = HEAD;     /* to pass state test in inflateReset2() */
+    ret = inflateReset2(strm, windowBits);
+    if (ret != Z_OK) {
         ZFREE(strm, state);
         strm->state = Z_NULL;
-        return Z_STREAM_ERROR;
     }
-    state->wbits = (unsigned)windowBits;
-    state->window = Z_NULL;
-    return inflateReset(strm);
+    return ret;
 }
 
-int ZEXPORT inflateInit_(strm, version, stream_size)
-z_streamp strm;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateInit_(z_streamp strm, const char *version,
+                         int stream_size) {
     return inflateInit2_(strm, DEF_WBITS, version, stream_size);
 }
 
+int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) {
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (bits == 0)
+        return Z_OK;
+    state = (struct inflate_state FAR *)strm->state;
+    if (bits < 0) {
+        state->hold = 0;
+        state->bits = 0;
+        return Z_OK;
+    }
+    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
+    value &= (1L << bits) - 1;
+    state->hold += (unsigned)value << state->bits;
+    state->bits += (uInt)bits;
+    return Z_OK;
+}
+
 /*
    Return state with length and distance decoding tables and index sizes set to
    fixed code decoding.  Normally this returns fixed tables from inffixed.h.
@@ -202,9 +249,7 @@ int stream_size;
    used for threaded applications, since the rewriting of the tables and virgin
    may not be thread-safe.
  */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
     static int virgin = 1;
     static code *lenfix, *distfix;
@@ -266,7 +311,7 @@ struct inflate_state FAR *state;
 
     a.out > inffixed.h
  */
-void makefixed()
+void makefixed(void)
 {
     unsigned low, size;
     struct inflate_state state;
@@ -286,8 +331,8 @@ void makefixed()
     low = 0;
     for (;;) {
         if ((low % 7) == 0) printf("\n        ");
-        printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits,
-               state.lencode[low].val);
+        printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
+               state.lencode[low].bits, state.lencode[low].val);
         if (++low == size) break;
         putchar(',');
     }
@@ -320,12 +365,9 @@ void makefixed()
    output will fall in the output data, making match copies simpler and faster.
    The advantage may be dependent on the size of the processor's data caches.
  */
-local int updatewindow(strm, out)
-z_streamp strm;
-unsigned out;
-{
+local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) {
     struct inflate_state FAR *state;
-    unsigned copy, dist;
+    unsigned dist;
 
     state = (struct inflate_state FAR *)strm->state;
 
@@ -340,30 +382,29 @@ unsigned out;
     /* if window not in use yet, initialize */
     if (state->wsize == 0) {
         state->wsize = 1U << state->wbits;
-        state->write = 0;
+        state->wnext = 0;
         state->whave = 0;
     }
 
     /* copy state->wsize or less output bytes into the circular window */
-    copy = out - strm->avail_out;
     if (copy >= state->wsize) {
-        zmemcpy(state->window, strm->next_out - state->wsize, state->wsize);
-        state->write = 0;
+        zmemcpy(state->window, end - state->wsize, state->wsize);
+        state->wnext = 0;
         state->whave = state->wsize;
     }
     else {
-        dist = state->wsize - state->write;
+        dist = state->wsize - state->wnext;
         if (dist > copy) dist = copy;
-        zmemcpy(state->window + state->write, strm->next_out - copy, dist);
+        zmemcpy(state->window + state->wnext, end - copy, dist);
         copy -= dist;
         if (copy) {
-            zmemcpy(state->window, strm->next_out - copy, copy);
-            state->write = copy;
+            zmemcpy(state->window, end - copy, copy);
+            state->wnext = copy;
             state->whave = state->wsize;
         }
         else {
-            state->write += dist;
-            if (state->write == state->wsize) state->write = 0;
+            state->wnext += dist;
+            if (state->wnext == state->wsize) state->wnext = 0;
             if (state->whave < state->wsize) state->whave += dist;
         }
     }
@@ -374,10 +415,10 @@ unsigned out;
 
 /* check function to use adler32() for zlib or crc32() for gzip */
 #ifdef GUNZIP
-#  define UPDATE(check, buf, len) \
+#  define UPDATE_CHECK(check, buf, len) \
     (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
 #else
-#  define UPDATE(check, buf, len) adler32(check, buf, len)
+#  define UPDATE_CHECK(check, buf, len) adler32(check, buf, len)
 #endif
 
 /* check macros for header crc */
@@ -464,11 +505,6 @@ unsigned out;
         bits -= bits & 7; \
     } while (0)
 
-/* Reverse the bytes in a 32-bit value */
-#define REVERSE(q) \
-    ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
-     (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
-
 /*
    inflate() uses a state machine to process as much input data and generate as
    much output data as possible before returning.  The state machine is
@@ -551,12 +587,9 @@ unsigned out;
    will return Z_BUF_ERROR if it has not reached the end of the stream.
  */
 
-int ZEXPORT inflate(strm, flush)
-z_streamp strm;
-int flush;
-{
+int ZEXPORT inflate(z_streamp strm, int flush) {
     struct inflate_state FAR *state;
-    unsigned char FAR *next;    /* next input */
+    z_const unsigned char FAR *next;    /* next input */
     unsigned char FAR *put;     /* next output */
     unsigned have, left;        /* available input and output */
     unsigned long hold;         /* bit buffer */
@@ -564,7 +597,7 @@ int flush;
     unsigned in, out;           /* save starting available input and output */
     unsigned copy;              /* number of stored or match bytes to copy */
     unsigned char FAR *from;    /* where to copy match bytes from */
-    code this;                  /* current decoding table entry */
+    code here;                  /* current decoding table entry */
     code last;                  /* parent table entry */
     unsigned len;               /* length to copy for repeats, bits to drop */
     int ret;                    /* return code */
@@ -574,7 +607,7 @@ int flush;
     static const unsigned short order[19] = /* permutation of code lengths */
         {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
 
-    if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL ||
+    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
         (strm->next_in == Z_NULL && strm->avail_in != 0))
         return Z_STREAM_ERROR;
 
@@ -594,13 +627,14 @@ int flush;
             NEEDBITS(16);
 #ifdef GUNZIP
             if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                if (state->wbits == 0)
+                    state->wbits = 15;
                 state->check = crc32(0L, Z_NULL, 0);
                 CRC2(state->check, hold);
                 INITBITS();
                 state->mode = FLAGS;
                 break;
             }
-            state->flags = 0;           /* expect zlib header */
             if (state->head != Z_NULL)
                 state->head->done = -1;
             if (!(state->wrap & 1) ||   /* check if zlib header allowed */
@@ -619,12 +653,15 @@ int flush;
             }
             DROPBITS(4);
             len = BITS(4) + 8;
-            if (len > state->wbits) {
+            if (state->wbits == 0)
+                state->wbits = len;
+            if (len > 15 || len > state->wbits) {
                 strm->msg = (char *)"invalid window size";
                 state->mode = BAD;
                 break;
             }
             state->dmax = 1U << len;
+            state->flags = 0;               /* indicate zlib header */
             Tracev((stderr, "inflate:   zlib header ok\n"));
             strm->adler = state->check = adler32(0L, Z_NULL, 0);
             state->mode = hold & 0x200 ? DICTID : TYPE;
@@ -646,50 +683,59 @@ int flush;
             }
             if (state->head != Z_NULL)
                 state->head->text = (int)((hold >> 8) & 1);
-            if (state->flags & 0x0200) CRC2(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
             INITBITS();
             state->mode = TIME;
+                /* fallthrough */
         case TIME:
             NEEDBITS(32);
             if (state->head != Z_NULL)
                 state->head->time = hold;
-            if (state->flags & 0x0200) CRC4(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC4(state->check, hold);
             INITBITS();
             state->mode = OS;
+                /* fallthrough */
         case OS:
             NEEDBITS(16);
             if (state->head != Z_NULL) {
                 state->head->xflags = (int)(hold & 0xff);
                 state->head->os = (int)(hold >> 8);
             }
-            if (state->flags & 0x0200) CRC2(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
             INITBITS();
             state->mode = EXLEN;
+                /* fallthrough */
         case EXLEN:
             if (state->flags & 0x0400) {
                 NEEDBITS(16);
                 state->length = (unsigned)(hold);
                 if (state->head != Z_NULL)
                     state->head->extra_len = (unsigned)hold;
-                if (state->flags & 0x0200) CRC2(state->check, hold);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    CRC2(state->check, hold);
                 INITBITS();
             }
             else if (state->head != Z_NULL)
                 state->head->extra = Z_NULL;
             state->mode = EXTRA;
+                /* fallthrough */
         case EXTRA:
             if (state->flags & 0x0400) {
                 copy = state->length;
                 if (copy > have) copy = have;
                 if (copy) {
                     if (state->head != Z_NULL &&
-                        state->head->extra != Z_NULL) {
-                        len = state->head->extra_len - state->length;
+                        state->head->extra != Z_NULL &&
+                        (len = state->head->extra_len - state->length) <
+                            state->head->extra_max) {
                         zmemcpy(state->head->extra + len, next,
                                 len + copy > state->head->extra_max ?
                                 state->head->extra_max - len : copy);
                     }
-                    if (state->flags & 0x0200)
+                    if ((state->flags & 0x0200) && (state->wrap & 4))
                         state->check = crc32(state->check, next, copy);
                     have -= copy;
                     next += copy;
@@ -699,6 +745,7 @@ int flush;
             }
             state->length = 0;
             state->mode = NAME;
+                /* fallthrough */
         case NAME:
             if (state->flags & 0x0800) {
                 if (have == 0) goto inf_leave;
@@ -708,9 +755,9 @@ int flush;
                     if (state->head != Z_NULL &&
                             state->head->name != Z_NULL &&
                             state->length < state->head->name_max)
-                        state->head->name[state->length++] = len;
+                        state->head->name[state->length++] = (Bytef)len;
                 } while (len && copy < have);
-                if (state->flags & 0x0200)
+                if ((state->flags & 0x0200) && (state->wrap & 4))
                     state->check = crc32(state->check, next, copy);
                 have -= copy;
                 next += copy;
@@ -720,6 +767,7 @@ int flush;
                 state->head->name = Z_NULL;
             state->length = 0;
             state->mode = COMMENT;
+                /* fallthrough */
         case COMMENT:
             if (state->flags & 0x1000) {
                 if (have == 0) goto inf_leave;
@@ -729,9 +777,9 @@ int flush;
                     if (state->head != Z_NULL &&
                             state->head->comment != Z_NULL &&
                             state->length < state->head->comm_max)
-                        state->head->comment[state->length++] = len;
+                        state->head->comment[state->length++] = (Bytef)len;
                 } while (len && copy < have);
-                if (state->flags & 0x0200)
+                if ((state->flags & 0x0200) && (state->wrap & 4))
                     state->check = crc32(state->check, next, copy);
                 have -= copy;
                 next += copy;
@@ -740,10 +788,11 @@ int flush;
             else if (state->head != Z_NULL)
                 state->head->comment = Z_NULL;
             state->mode = HCRC;
+                /* fallthrough */
         case HCRC:
             if (state->flags & 0x0200) {
                 NEEDBITS(16);
-                if (hold != (state->check & 0xffff)) {
+                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
                     strm->msg = (char *)"header crc mismatch";
                     state->mode = BAD;
                     break;
@@ -760,9 +809,10 @@ int flush;
 #endif
         case DICTID:
             NEEDBITS(32);
-            strm->adler = state->check = REVERSE(hold);
+            strm->adler = state->check = ZSWAP32(hold);
             INITBITS();
             state->mode = DICT;
+                /* fallthrough */
         case DICT:
             if (state->havedict == 0) {
                 RESTORE();
@@ -770,8 +820,10 @@ int flush;
             }
             strm->adler = state->check = adler32(0L, Z_NULL, 0);
             state->mode = TYPE;
+                /* fallthrough */
         case TYPE:
-            if (flush == Z_BLOCK) goto inf_leave;
+            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
         case TYPEDO:
             if (state->last) {
                 BYTEBITS();
@@ -791,7 +843,11 @@ int flush;
                 fixedtables(state);
                 Tracev((stderr, "inflate:     fixed codes block%s\n",
                         state->last ? " (last)" : ""));
-                state->mode = LEN;              /* decode codes */
+                state->mode = LEN_;             /* decode codes */
+                if (flush == Z_TREES) {
+                    DROPBITS(2);
+                    goto inf_leave;
+                }
                 break;
             case 2:                             /* dynamic block */
                 Tracev((stderr, "inflate:     dynamic codes block%s\n",
@@ -816,7 +872,12 @@ int flush;
             Tracev((stderr, "inflate:       stored length %u\n",
                     state->length));
             INITBITS();
+            state->mode = COPY_;
+            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
+        case COPY_:
             state->mode = COPY;
+                /* fallthrough */
         case COPY:
             copy = state->length;
             if (copy) {
@@ -852,6 +913,7 @@ int flush;
             Tracev((stderr, "inflate:       table sizes ok\n"));
             state->have = 0;
             state->mode = LENLENS;
+                /* fallthrough */
         case LENLENS:
             while (state->have < state->ncode) {
                 NEEDBITS(3);
@@ -861,7 +923,7 @@ int flush;
             while (state->have < 19)
                 state->lens[order[state->have++]] = 0;
             state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
+            state->lencode = (const code FAR *)(state->next);
             state->lenbits = 7;
             ret = inflate_table(CODES, state->lens, 19, &(state->next),
                                 &(state->lenbits), state->work);
@@ -873,22 +935,22 @@ int flush;
             Tracev((stderr, "inflate:       code lengths ok\n"));
             state->have = 0;
             state->mode = CODELENS;
+                /* fallthrough */
         case CODELENS:
             while (state->have < state->nlen + state->ndist) {
                 for (;;) {
-                    this = state->lencode[BITS(state->lenbits)];
-                    if ((unsigned)(this.bits) <= bits) break;
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
                     PULLBYTE();
                 }
-                if (this.val < 16) {
-                    NEEDBITS(this.bits);
-                    DROPBITS(this.bits);
-                    state->lens[state->have++] = this.val;
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
                 }
                 else {
-                    if (this.val == 16) {
-                        NEEDBITS(this.bits + 2);
-                        DROPBITS(this.bits);
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
                         if (state->have == 0) {
                             strm->msg = (char *)"invalid bit length repeat";
                             state->mode = BAD;
@@ -898,16 +960,16 @@ int flush;
                         copy = 3 + BITS(2);
                         DROPBITS(2);
                     }
-                    else if (this.val == 17) {
-                        NEEDBITS(this.bits + 3);
-                        DROPBITS(this.bits);
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
                         len = 0;
                         copy = 3 + BITS(3);
                         DROPBITS(3);
                     }
                     else {
-                        NEEDBITS(this.bits + 7);
-                        DROPBITS(this.bits);
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
                         len = 0;
                         copy = 11 + BITS(7);
                         DROPBITS(7);
@@ -925,9 +987,18 @@ int flush;
             /* handle error breaks in while */
             if (state->mode == BAD) break;
 
-            /* build code tables */
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
             state->next = state->codes;
-            state->lencode = (code const FAR *)(state->next);
+            state->lencode = (const code FAR *)(state->next);
             state->lenbits = 9;
             ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
                                 &(state->lenbits), state->work);
@@ -936,7 +1007,7 @@ int flush;
                 state->mode = BAD;
                 break;
             }
-            state->distcode = (code const FAR *)(state->next);
+            state->distcode = (const code FAR *)(state->next);
             state->distbits = 6;
             ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
                             &(state->next), &(state->distbits), state->work);
@@ -946,88 +1017,107 @@ int flush;
                 break;
             }
             Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN_;
+            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
+        case LEN_:
             state->mode = LEN;
+                /* fallthrough */
         case LEN:
             if (have >= 6 && left >= 258) {
                 RESTORE();
                 inflate_fast(strm, out);
                 LOAD();
+                if (state->mode == TYPE)
+                    state->back = -1;
                 break;
             }
+            state->back = 0;
             for (;;) {
-                this = state->lencode[BITS(state->lenbits)];
-                if ((unsigned)(this.bits) <= bits) break;
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
                 PULLBYTE();
             }
-            if (this.op && (this.op & 0xf0) == 0) {
-                last = this;
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
                 for (;;) {
-                    this = state->lencode[last.val +
+                    here = state->lencode[last.val +
                             (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
                     PULLBYTE();
                 }
                 DROPBITS(last.bits);
+                state->back += last.bits;
             }
-            DROPBITS(this.bits);
-            state->length = (unsigned)this.val;
-            if ((int)(this.op) == 0) {
-                Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            state->length = (unsigned)here.val;
+            if ((int)(here.op) == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
                         "inflate:         literal '%c'\n" :
-                        "inflate:         literal 0x%02x\n", this.val));
+                        "inflate:         literal 0x%02x\n", here.val));
                 state->mode = LIT;
                 break;
             }
-            if (this.op & 32) {
+            if (here.op & 32) {
                 Tracevv((stderr, "inflate:         end of block\n"));
+                state->back = -1;
                 state->mode = TYPE;
                 break;
             }
-            if (this.op & 64) {
+            if (here.op & 64) {
                 strm->msg = (char *)"invalid literal/length code";
                 state->mode = BAD;
                 break;
             }
-            state->extra = (unsigned)(this.op) & 15;
+            state->extra = (unsigned)(here.op) & 15;
             state->mode = LENEXT;
+                /* fallthrough */
         case LENEXT:
             if (state->extra) {
                 NEEDBITS(state->extra);
                 state->length += BITS(state->extra);
                 DROPBITS(state->extra);
+                state->back += state->extra;
             }
             Tracevv((stderr, "inflate:         length %u\n", state->length));
+            state->was = state->length;
             state->mode = DIST;
+                /* fallthrough */
         case DIST:
             for (;;) {
-                this = state->distcode[BITS(state->distbits)];
-                if ((unsigned)(this.bits) <= bits) break;
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
                 PULLBYTE();
             }
-            if ((this.op & 0xf0) == 0) {
-                last = this;
+            if ((here.op & 0xf0) == 0) {
+                last = here;
                 for (;;) {
-                    this = state->distcode[last.val +
+                    here = state->distcode[last.val +
                             (BITS(last.bits + last.op) >> last.bits)];
-                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
                     PULLBYTE();
                 }
                 DROPBITS(last.bits);
+                state->back += last.bits;
             }
-            DROPBITS(this.bits);
-            if (this.op & 64) {
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            if (here.op & 64) {
                 strm->msg = (char *)"invalid distance code";
                 state->mode = BAD;
                 break;
             }
-            state->offset = (unsigned)this.val;
-            state->extra = (unsigned)(this.op) & 15;
+            state->offset = (unsigned)here.val;
+            state->extra = (unsigned)(here.op) & 15;
             state->mode = DISTEXT;
+                /* fallthrough */
         case DISTEXT:
             if (state->extra) {
                 NEEDBITS(state->extra);
                 state->offset += BITS(state->extra);
                 DROPBITS(state->extra);
+                state->back += state->extra;
             }
 #ifdef INFLATE_STRICT
             if (state->offset > state->dmax) {
@@ -1036,24 +1126,40 @@ int flush;
                 break;
             }
 #endif
-            if (state->offset > state->whave + out - left) {
-                strm->msg = (char *)"invalid distance too far back";
-                state->mode = BAD;
-                break;
-            }
             Tracevv((stderr, "inflate:         distance %u\n", state->offset));
             state->mode = MATCH;
+                /* fallthrough */
         case MATCH:
             if (left == 0) goto inf_leave;
             copy = out - left;
             if (state->offset > copy) {         /* copy from window */
                 copy = state->offset - copy;
-                if (copy > state->write) {
-                    copy -= state->write;
+                if (copy > state->whave) {
+                    if (state->sane) {
+                        strm->msg = (char *)"invalid distance too far back";
+                        state->mode = BAD;
+                        break;
+                    }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                    Trace((stderr, "inflate.c too far\n"));
+                    copy -= state->whave;
+                    if (copy > state->length) copy = state->length;
+                    if (copy > left) copy = left;
+                    left -= copy;
+                    state->length -= copy;
+                    do {
+                        *put++ = 0;
+                    } while (--copy);
+                    if (state->length == 0) state->mode = LEN;
+                    break;
+#endif
+                }
+                if (copy > state->wnext) {
+                    copy -= state->wnext;
                     from = state->window + (state->wsize - copy);
                 }
                 else
-                    from = state->window + (state->write - copy);
+                    from = state->window + (state->wnext - copy);
                 if (copy > state->length) copy = state->length;
             }
             else {                              /* copy from output */
@@ -1080,15 +1186,15 @@ int flush;
                 out -= left;
                 strm->total_out += out;
                 state->total += out;
-                if (out)
+                if ((state->wrap & 4) && out)
                     strm->adler = state->check =
-                        UPDATE(state->check, put - out, out);
+                        UPDATE_CHECK(state->check, put - out, out);
                 out = left;
-                if ((
+                if ((state->wrap & 4) && (
 #ifdef GUNZIP
                      state->flags ? hold :
 #endif
-                     REVERSE(hold)) != state->check) {
+                     ZSWAP32(hold)) != state->check) {
                     strm->msg = (char *)"incorrect data check";
                     state->mode = BAD;
                     break;
@@ -1098,10 +1204,11 @@ int flush;
             }
 #ifdef GUNZIP
             state->mode = LENGTH;
+                /* fallthrough */
         case LENGTH:
             if (state->wrap && state->flags) {
                 NEEDBITS(32);
-                if (hold != (state->total & 0xffffffffUL)) {
+                if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) {
                     strm->msg = (char *)"incorrect length check";
                     state->mode = BAD;
                     break;
@@ -1111,6 +1218,7 @@ int flush;
             }
 #endif
             state->mode = DONE;
+                /* fallthrough */
         case DONE:
             ret = Z_STREAM_END;
             goto inf_leave;
@@ -1120,6 +1228,7 @@ int flush;
         case MEM:
             return Z_MEM_ERROR;
         case SYNC:
+                /* fallthrough */
         default:
             return Z_STREAM_ERROR;
         }
@@ -1132,8 +1241,9 @@ int flush;
      */
   inf_leave:
     RESTORE();
-    if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
-        if (updatewindow(strm, out)) {
+    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+            (state->mode < CHECK || flush != Z_FINISH)))
+        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
             state->mode = MEM;
             return Z_MEM_ERROR;
         }
@@ -1142,21 +1252,20 @@ int flush;
     strm->total_in += in;
     strm->total_out += out;
     state->total += out;
-    if (state->wrap && out)
+    if ((state->wrap & 4) && out)
         strm->adler = state->check =
-            UPDATE(state->check, strm->next_out - out, out);
-    strm->data_type = state->bits + (state->last ? 64 : 0) +
-                      (state->mode == TYPE ? 128 : 0);
+            UPDATE_CHECK(state->check, strm->next_out - out, out);
+    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
+                      (state->mode == TYPE ? 128 : 0) +
+                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
     if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
         ret = Z_BUF_ERROR;
     return ret;
 }
 
-int ZEXPORT inflateEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateEnd(z_streamp strm) {
     struct inflate_state FAR *state;
-    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+    if (inflateStateCheck(strm))
         return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (state->window != Z_NULL) ZFREE(strm, state->window);
@@ -1166,56 +1275,63 @@ z_streamp strm;
     return Z_OK;
 }
 
-int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-const Bytef *dictionary;
-uInt dictLength;
-{
+int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* copy dictionary */
+    if (state->whave && dictionary != Z_NULL) {
+        zmemcpy(dictionary, state->window + state->wnext,
+                state->whave - state->wnext);
+        zmemcpy(dictionary + state->whave - state->wnext,
+                state->window, state->wnext);
+    }
+    if (dictLength != Z_NULL)
+        *dictLength = state->whave;
+    return Z_OK;
+}
+
+int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt dictLength) {
     struct inflate_state FAR *state;
-    unsigned long id;
+    unsigned long dictid;
+    int ret;
 
     /* check state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (state->wrap != 0 && state->mode != DICT)
         return Z_STREAM_ERROR;
 
-    /* check for correct dictionary id */
+    /* check for correct dictionary identifier */
     if (state->mode == DICT) {
-        id = adler32(0L, Z_NULL, 0);
-        id = adler32(id, dictionary, dictLength);
-        if (id != state->check)
+        dictid = adler32(0L, Z_NULL, 0);
+        dictid = adler32(dictid, dictionary, dictLength);
+        if (dictid != state->check)
             return Z_DATA_ERROR;
     }
 
-    /* copy dictionary to window */
-    if (updatewindow(strm, strm->avail_out)) {
+    /* copy dictionary to window using updatewindow(), which will amend the
+       existing dictionary if appropriate */
+    ret = updatewindow(strm, dictionary + dictLength, dictLength);
+    if (ret) {
         state->mode = MEM;
         return Z_MEM_ERROR;
     }
-    if (dictLength > state->wsize) {
-        zmemcpy(state->window, dictionary + dictLength - state->wsize,
-                state->wsize);
-        state->whave = state->wsize;
-    }
-    else {
-        zmemcpy(state->window + state->wsize - dictLength, dictionary,
-                dictLength);
-        state->whave = dictLength;
-    }
     state->havedict = 1;
     Tracev((stderr, "inflate:   dictionary set\n"));
     return Z_OK;
 }
 
-int ZEXPORT inflateGetHeader(strm, head)
-z_streamp strm;
-gz_headerp head;
-{
+int ZEXPORT inflateGetHeader(z_streamp strm, gz_headerp head) {
     struct inflate_state FAR *state;
 
     /* check state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
 
@@ -1236,11 +1352,8 @@ gz_headerp head;
    called again with more data and the *have state.  *have is initialized to
    zero for the first call.
  */
-local unsigned syncsearch(have, buf, len)
-unsigned FAR *have;
-unsigned char FAR *buf;
-unsigned len;
-{
+local unsigned syncsearch(unsigned FAR *have, const unsigned char FAR *buf,
+                          unsigned len) {
     unsigned got;
     unsigned next;
 
@@ -1259,23 +1372,22 @@ unsigned len;
     return next;
 }
 
-int ZEXPORT inflateSync(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSync(z_streamp strm) {
     unsigned len;               /* number of bytes to look at or looked at */
+    int flags;                  /* temporary to save header status */
     unsigned long in, out;      /* temporary to save total_in and total_out */
     unsigned char buf[4];       /* to restore bit buffer to byte string */
     struct inflate_state FAR *state;
 
     /* check parameters */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
 
     /* if first time, start search in bit buffer */
     if (state->mode != SYNC) {
         state->mode = SYNC;
-        state->hold <<= state->bits & 7;
+        state->hold >>= state->bits & 7;
         state->bits -= state->bits & 7;
         len = 0;
         while (state->bits >= 8) {
@@ -1295,9 +1407,15 @@ z_streamp strm;
 
     /* return no joy or set up to restart inflate() on a new block */
     if (state->have != 4) return Z_DATA_ERROR;
+    if (state->flags == -1)
+        state->wrap = 0;    /* if no header yet, treat as raw */
+    else
+        state->wrap &= ~4;  /* no point in computing a check value now */
+    flags = state->flags;
     in = strm->total_in;  out = strm->total_out;
     inflateReset(strm);
     strm->total_in = in;  strm->total_out = out;
+    state->flags = flags;
     state->mode = TYPE;
     return Z_OK;
 }
@@ -1310,28 +1428,22 @@ z_streamp strm;
    block. When decompressing, PPP checks that at the end of input packet,
    inflate is waiting for these length bytes.
  */
-int ZEXPORT inflateSyncPoint(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSyncPoint(z_streamp strm) {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     return state->mode == STORED && state->bits == 0;
 }
 
-int ZEXPORT inflateCopy(dest, source)
-z_streamp dest;
-z_streamp source;
-{
+int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) {
     struct inflate_state FAR *state;
     struct inflate_state FAR *copy;
     unsigned char FAR *window;
     unsigned wsize;
 
     /* check input */
-    if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL ||
-        source->zalloc == (alloc_func)0 || source->zfree == (free_func)0)
+    if (inflateStateCheck(source) || dest == Z_NULL)
         return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)source->state;
 
@@ -1350,8 +1462,9 @@ z_streamp source;
     }
 
     /* copy state */
-    zmemcpy(dest, source, sizeof(z_stream));
-    zmemcpy(copy, state, sizeof(struct inflate_state));
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    copy->strm = dest;
     if (state->lencode >= state->codes &&
         state->lencode <= state->codes + ENOUGH - 1) {
         copy->lencode = copy->codes + (state->lencode - state->codes);
@@ -1366,3 +1479,48 @@ z_streamp source;
     dest->state = (struct internal_state FAR *)copy;
     return Z_OK;
 }
+
+int ZEXPORT inflateUndermine(z_streamp strm, int subvert) {
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+    state->sane = !subvert;
+    return Z_OK;
+#else
+    (void)subvert;
+    state->sane = 1;
+    return Z_DATA_ERROR;
+#endif
+}
+
+int ZEXPORT inflateValidate(z_streamp strm, int check) {
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (check && state->wrap)
+        state->wrap |= 4;
+    else
+        state->wrap &= ~4;
+    return Z_OK;
+}
+
+long ZEXPORT inflateMark(z_streamp strm) {
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm))
+        return -(1L << 16);
+    state = (struct inflate_state FAR *)strm->state;
+    return (long)(((unsigned long)((long)state->back)) << 16) +
+        (state->mode == COPY ? state->length :
+            (state->mode == MATCH ? state->was - state->length : 0));
+}
+
+unsigned long ZEXPORT inflateCodesUsed(z_streamp strm) {
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm)) return (unsigned long)-1;
+    state = (struct inflate_state FAR *)strm->state;
+    return (unsigned long)(state->next - state->codes);
+}
diff --git a/reg-io/zlib/inflate.h b/reg-io/zlib/inflate.h
index a65b9d9b..f127b6b1 100644
--- a/reg-io/zlib/inflate.h
+++ b/reg-io/zlib/inflate.h
@@ -1,5 +1,5 @@
 /* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2004 Mark Adler
+ * Copyright (C) 1995-2019 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -17,56 +17,59 @@
 #endif
 
 /* Possible inflate modes between inflate() calls */
-typedef enum
-{
-   HEAD,       /* i: waiting for magic header */
-   FLAGS,      /* i: waiting for method and flags (gzip) */
-   TIME,       /* i: waiting for modification time (gzip) */
-   OS,         /* i: waiting for extra flags and operating system (gzip) */
-   EXLEN,      /* i: waiting for extra length (gzip) */
-   EXTRA,      /* i: waiting for extra bytes (gzip) */
-   NAME,       /* i: waiting for end of file name (gzip) */
-   COMMENT,    /* i: waiting for end of comment (gzip) */
-   HCRC,       /* i: waiting for header crc (gzip) */
-   DICTID,     /* i: waiting for dictionary check value */
-   DICT,       /* waiting for inflateSetDictionary() call */
-   TYPE,       /* i: waiting for type bits, including last-flag bit */
-   TYPEDO,     /* i: same, but skip check to exit inflate on new block */
-   STORED,     /* i: waiting for stored size (length and complement) */
-   COPY,       /* i/o: waiting for input or output to copy stored block */
-   TABLE,      /* i: waiting for dynamic block table lengths */
-   LENLENS,    /* i: waiting for code length code lengths */
-   CODELENS,   /* i: waiting for length/lit and distance code lengths */
-   LEN,        /* i: waiting for length/lit code */
-   LENEXT,     /* i: waiting for length extra bits */
-   DIST,       /* i: waiting for distance code */
-   DISTEXT,    /* i: waiting for distance extra bits */
-   MATCH,      /* o: waiting for output space to copy string */
-   LIT,        /* o: waiting for output space to write literal */
-   CHECK,      /* i: waiting for 32-bit check value */
-   LENGTH,     /* i: waiting for 32-bit length (gzip) */
-   DONE,       /* finished check, done -- remain here until reset */
-   BAD,        /* got a data error -- remain here until reset */
-   MEM,        /* got an inflate() memory error -- remain here until reset */
-   SYNC        /* looking for synchronization bytes to restart inflate() */
+typedef enum {
+    HEAD = 16180,   /* i: waiting for magic header */
+    FLAGS,      /* i: waiting for method and flags (gzip) */
+    TIME,       /* i: waiting for modification time (gzip) */
+    OS,         /* i: waiting for extra flags and operating system (gzip) */
+    EXLEN,      /* i: waiting for extra length (gzip) */
+    EXTRA,      /* i: waiting for extra bytes (gzip) */
+    NAME,       /* i: waiting for end of file name (gzip) */
+    COMMENT,    /* i: waiting for end of comment (gzip) */
+    HCRC,       /* i: waiting for header crc (gzip) */
+    DICTID,     /* i: waiting for dictionary check value */
+    DICT,       /* waiting for inflateSetDictionary() call */
+        TYPE,       /* i: waiting for type bits, including last-flag bit */
+        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
+        STORED,     /* i: waiting for stored size (length and complement) */
+        COPY_,      /* i/o: same as COPY below, but only first time in */
+        COPY,       /* i/o: waiting for input or output to copy stored block */
+        TABLE,      /* i: waiting for dynamic block table lengths */
+        LENLENS,    /* i: waiting for code length code lengths */
+        CODELENS,   /* i: waiting for length/lit and distance code lengths */
+            LEN_,       /* i: same as LEN below, but only first time in */
+            LEN,        /* i: waiting for length/lit/eob code */
+            LENEXT,     /* i: waiting for length extra bits */
+            DIST,       /* i: waiting for distance code */
+            DISTEXT,    /* i: waiting for distance extra bits */
+            MATCH,      /* o: waiting for output space to copy string */
+            LIT,        /* o: waiting for output space to write literal */
+    CHECK,      /* i: waiting for 32-bit check value */
+    LENGTH,     /* i: waiting for 32-bit length (gzip) */
+    DONE,       /* finished check, done -- remain here until reset */
+    BAD,        /* got a data error -- remain here until reset */
+    MEM,        /* got an inflate() memory error -- remain here until reset */
+    SYNC        /* looking for synchronization bytes to restart inflate() */
 } inflate_mode;
 
 /*
     State transitions between above modes -
 
-    (most modes can go to the BAD or MEM mode -- not shown for clarity)
+    (most modes can go to BAD or MEM on error -- not shown for clarity)
 
     Process header:
-        HEAD -> (gzip) or (zlib)
-        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
-        NAME -> COMMENT -> HCRC -> TYPE
+        HEAD -> (gzip) or (zlib) or (raw)
+        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
+                  HCRC -> TYPE
         (zlib) -> DICTID or TYPE
         DICTID -> DICT -> TYPE
+        (raw) -> TYPEDO
     Read deflate blocks:
-            TYPE -> STORED or TABLE or LEN or CHECK
-            STORED -> COPY -> TYPE
-            TABLE -> LENLENS -> CODELENS -> LEN
-    Read deflate codes:
+            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
+            STORED -> COPY_ -> COPY -> TYPE
+            TABLE -> LENLENS -> CODELENS -> LEN_
+            LEN_ -> LEN
+    Read deflate codes in fixed or dynamic block:
                 LEN -> LENEXT or LIT or TYPE
                 LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
                 LIT -> LEN
@@ -74,44 +77,50 @@ typedef enum
         CHECK -> LENGTH -> DONE
  */
 
-/* state maintained between inflate() calls.  Approximately 7K bytes. */
-struct inflate_state
-{
-   inflate_mode mode;          /* current inflate mode */
-   int last;                   /* true if processing last block */
-   int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip */
-   int havedict;               /* true if dictionary provided */
-   int flags;                  /* gzip header method and flags (0 if zlib) */
-   unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
-   unsigned long check;        /* protected copy of check value */
-   unsigned long total;        /* protected copy of output count */
-   gz_headerp head;            /* where to save gzip header information */
-   /* sliding window */
-   unsigned wbits;             /* log base 2 of requested window size */
-   unsigned wsize;             /* window size or zero if not using window */
-   unsigned whave;             /* valid bytes in the window */
-   unsigned write;             /* window write index */
-   unsigned char FAR *window;  /* allocated sliding window, if needed */
-   /* bit accumulator */
-   unsigned long hold;         /* input bit accumulator */
-   unsigned bits;              /* number of bits in "in" */
-   /* for string and stored block copying */
-   unsigned length;            /* literal or length of data to copy */
-   unsigned offset;            /* distance back to copy string from */
-   /* for table and code decoding */
-   unsigned extra;             /* extra bits needed */
-   /* fixed and dynamic code tables */
-   code const FAR *lencode;    /* starting table for length/literal codes */
-   code const FAR *distcode;   /* starting table for distance codes */
-   unsigned lenbits;           /* index bits for lencode */
-   unsigned distbits;          /* index bits for distcode */
-   /* dynamic table building */
-   unsigned ncode;             /* number of code length code lengths */
-   unsigned nlen;              /* number of length code lengths */
-   unsigned ndist;             /* number of distance code lengths */
-   unsigned have;              /* number of code lengths in lens[] */
-   code FAR *next;             /* next available space in codes[] */
-   unsigned short lens[320];   /* temporary storage for code lengths */
-   unsigned short work[288];   /* work area for code table building */
-   code codes[ENOUGH];         /* space for code tables */
+/* State maintained between inflate() calls -- approximately 7K bytes, not
+   including the allocated sliding window, which is up to 32K bytes. */
+struct inflate_state {
+    z_streamp strm;             /* pointer back to this zlib stream */
+    inflate_mode mode;          /* current inflate mode */
+    int last;                   /* true if processing last block */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
+                                   bit 2 true to validate check value */
+    int havedict;               /* true if dictionary provided */
+    int flags;                  /* gzip header method and flags, 0 if zlib, or
+                                   -1 if raw or no header yet */
+    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
+    unsigned long check;        /* protected copy of check value */
+    unsigned long total;        /* protected copy of output count */
+    gz_headerp head;            /* where to save gzip header information */
+        /* sliding window */
+    unsigned wbits;             /* log base 2 of requested window size */
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if needed */
+        /* bit accumulator */
+    unsigned long hold;         /* input bit accumulator */
+    unsigned bits;              /* number of bits in "in" */
+        /* for string and stored block copying */
+    unsigned length;            /* literal or length of data to copy */
+    unsigned offset;            /* distance back to copy string from */
+        /* for table and code decoding */
+    unsigned extra;             /* extra bits needed */
+        /* fixed and dynamic code tables */
+    code const FAR *lencode;    /* starting table for length/literal codes */
+    code const FAR *distcode;   /* starting table for distance codes */
+    unsigned lenbits;           /* index bits for lencode */
+    unsigned distbits;          /* index bits for distcode */
+        /* dynamic table building */
+    unsigned ncode;             /* number of code length code lengths */
+    unsigned nlen;              /* number of length code lengths */
+    unsigned ndist;             /* number of distance code lengths */
+    unsigned have;              /* number of code lengths in lens[] */
+    code FAR *next;             /* next available space in codes[] */
+    unsigned short lens[320];   /* temporary storage for code lengths */
+    unsigned short work[288];   /* work area for code table building */
+    code codes[ENOUGH];         /* space for code tables */
+    int sane;                   /* if false, allow invalid distance too far */
+    int back;                   /* bits back of last unprocessed length/lit */
+    unsigned was;               /* initial length of match */
 };
diff --git a/reg-io/zlib/inftrees.c b/reg-io/zlib/inftrees.c
index 8a9c13ff..98cfe164 100644
--- a/reg-io/zlib/inftrees.c
+++ b/reg-io/zlib/inftrees.c
@@ -1,5 +1,5 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2005 Mark Adler
+ * Copyright (C) 1995-2024 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -9,7 +9,7 @@
 #define MAXBITS 15
 
 const char inflate_copyright[] =
-   " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
+   " inflate 1.3.1 Copyright 1995-2024 Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -29,14 +29,9 @@ const char inflate_copyright[] =
    table index bits.  It will differ if the request is greater than the
    longest code or if it is less than the shortest code.
  */
-int inflate_table(type, lens, codes, table, bits, work)
-codetype type;
-unsigned short FAR *lens;
-unsigned codes;
-code FAR * FAR *table;
-unsigned FAR *bits;
-unsigned short FAR *work;
-{
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work) {
     unsigned len;               /* a code's length in bits */
     unsigned sym;               /* index of code symbols */
     unsigned min, max;          /* minimum and maximum code lengths */
@@ -50,11 +45,11 @@ unsigned short FAR *work;
     unsigned fill;              /* index for replicating entries */
     unsigned low;               /* low bits for current root entry */
     unsigned mask;              /* mask for low root bits */
-    code this;                  /* table entry for duplication */
+    code here;                  /* table entry for duplication */
     code FAR *next;             /* next available space in table */
     const unsigned short FAR *base;     /* base value table to use */
     const unsigned short FAR *extra;    /* extra bits table to use */
-    int end;                    /* use base and extra for symbol > end */
+    unsigned match;             /* use base and extra for symbol >= match */
     unsigned short count[MAXBITS+1];    /* number of codes of each length */
     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
@@ -62,7 +57,7 @@ unsigned short FAR *work;
         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77};
     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
@@ -115,15 +110,15 @@ unsigned short FAR *work;
         if (count[max] != 0) break;
     if (root > max) root = max;
     if (max == 0) {                     /* no symbols to code at all */
-        this.op = (unsigned char)64;    /* invalid code marker */
-        this.bits = (unsigned char)1;
-        this.val = (unsigned short)0;
-        *(*table)++ = this;             /* make a table to force an error */
-        *(*table)++ = this;
+        here.op = (unsigned char)64;    /* invalid code marker */
+        here.bits = (unsigned char)1;
+        here.val = (unsigned short)0;
+        *(*table)++ = here;             /* make a table to force an error */
+        *(*table)++ = here;
         *bits = 1;
         return 0;     /* no symbols, but wait for decoding to report error */
     }
-    for (min = 1; min <= MAXBITS; min++)
+    for (min = 1; min < max; min++)
         if (count[min] != 0) break;
     if (root < min) root = min;
 
@@ -166,11 +161,10 @@ unsigned short FAR *work;
        entered in the tables.
 
        used keeps track of how many table entries have been allocated from the
-       provided *table space.  It is checked when a LENS table is being made
-       against the space in *table, ENOUGH, minus the maximum space needed by
-       the worst case distance code, MAXD.  This should never happen, but the
-       sufficiency of ENOUGH has not been proven exhaustively, hence the check.
-       This assumes that when type == LENS, bits == 9.
+       provided *table space.  It is checked for LENS and DIST tables against
+       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
+       the initial root table size constants.  See the comments in inftrees.h
+       for more information.
 
        sym increments through all symbols, and the loop terminates when
        all codes of length max, i.e. all codes, have been processed.  This
@@ -182,19 +176,17 @@ unsigned short FAR *work;
     switch (type) {
     case CODES:
         base = extra = work;    /* dummy value--not used */
-        end = 19;
+        match = 20;
         break;
     case LENS:
         base = lbase;
-        base -= 257;
         extra = lext;
-        extra -= 257;
-        end = 256;
+        match = 257;
         break;
-    default:            /* DISTS */
+    default:    /* DISTS */
         base = dbase;
         extra = dext;
-        end = -1;
+        match = 0;
     }
 
     /* initialize state for loop */
@@ -209,24 +201,25 @@ unsigned short FAR *work;
     mask = used - 1;            /* mask for comparing low */
 
     /* check available table space */
-    if (type == LENS && used >= ENOUGH - MAXD)
+    if ((type == LENS && used > ENOUGH_LENS) ||
+        (type == DISTS && used > ENOUGH_DISTS))
         return 1;
 
     /* process all codes and make table entries */
     for (;;) {
         /* create table entry */
-        this.bits = (unsigned char)(len - drop);
-        if ((int)(work[sym]) < end) {
-            this.op = (unsigned char)0;
-            this.val = work[sym];
+        here.bits = (unsigned char)(len - drop);
+        if (work[sym] + 1U < match) {
+            here.op = (unsigned char)0;
+            here.val = work[sym];
         }
-        else if ((int)(work[sym]) > end) {
-            this.op = (unsigned char)(extra[work[sym]]);
-            this.val = base[work[sym]];
+        else if (work[sym] >= match) {
+            here.op = (unsigned char)(extra[work[sym] - match]);
+            here.val = base[work[sym] - match];
         }
         else {
-            this.op = (unsigned char)(32 + 64);         /* end of block */
-            this.val = 0;
+            here.op = (unsigned char)(32 + 64);         /* end of block */
+            here.val = 0;
         }
 
         /* replicate for those indices with low len bits equal to huff */
@@ -235,7 +228,7 @@ unsigned short FAR *work;
         min = fill;                 /* save offset to next table */
         do {
             fill -= incr;
-            next[(huff >> drop) + fill] = this;
+            next[(huff >> drop) + fill] = here;
         } while (fill != 0);
 
         /* backwards increment the len-bit code huff */
@@ -277,7 +270,8 @@ unsigned short FAR *work;
 
             /* check for enough space */
             used += 1U << curr;
-            if (type == LENS && used >= ENOUGH - MAXD)
+            if ((type == LENS && used > ENOUGH_LENS) ||
+                (type == DISTS && used > ENOUGH_DISTS))
                 return 1;
 
             /* point entry in root table to sub-table */
@@ -288,38 +282,14 @@ unsigned short FAR *work;
         }
     }
 
-    /*
-       Fill in rest of table for incomplete codes.  This loop is similar to the
-       loop above in incrementing huff for table indices.  It is assumed that
-       len is equal to curr + drop, so there is no loop needed to increment
-       through high index bits.  When the current sub-table is filled, the loop
-       drops back to the root table to fill in any remaining entries there.
-     */
-    this.op = (unsigned char)64;                /* invalid code marker */
-    this.bits = (unsigned char)(len - drop);
-    this.val = (unsigned short)0;
-    while (huff != 0) {
-        /* when done with sub-table, drop back to root table */
-        if (drop != 0 && (huff & mask) != low) {
-            drop = 0;
-            len = root;
-            next = *table;
-            this.bits = (unsigned char)len;
-        }
-
-        /* put invalid code marker in table */
-        next[huff >> drop] = this;
-
-        /* backwards increment the len-bit code huff */
-        incr = 1U << (len - 1);
-        while (huff & incr)
-            incr >>= 1;
-        if (incr != 0) {
-            huff &= incr - 1;
-            huff += incr;
-        }
-        else
-            huff = 0;
+    /* fill in remaining table entry if code is incomplete (guaranteed to have
+       at most one remaining entry, since if the code is incomplete, the
+       maximum code length that was allowed to get this far is one bit) */
+    if (huff != 0) {
+        here.op = (unsigned char)64;            /* invalid code marker */
+        here.bits = (unsigned char)(len - drop);
+        here.val = (unsigned short)0;
+        next[huff] = here;
     }
 
     /* set return parameters */
diff --git a/reg-io/zlib/inftrees.h b/reg-io/zlib/inftrees.h
index ad3c0772..396f74b5 100644
--- a/reg-io/zlib/inftrees.h
+++ b/reg-io/zlib/inftrees.h
@@ -1,5 +1,5 @@
 /* inftrees.h -- header to use inftrees.c
- * Copyright (C) 1995-2005 Mark Adler
+ * Copyright (C) 1995-2005, 2010 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -21,11 +21,10 @@
    of the bit buffer.  val is the actual byte to output in the case
    of a literal, the base length or distance, or the offset from
    the current table to the next table.  Each entry is four bytes. */
-typedef struct
-{
-   unsigned char op;           /* operation, extra bits, table bits */
-   unsigned char bits;         /* bits in this part of the code */
-   unsigned short val;         /* offset in table or code value */
+typedef struct {
+    unsigned char op;           /* operation, extra bits, table bits */
+    unsigned char bits;         /* bits in this part of the code */
+    unsigned short val;         /* offset in table or code value */
 } code;
 
 /* op values as set by inflate_table():
@@ -36,22 +35,28 @@ typedef struct
     01000000 - invalid code
  */
 
-/* Maximum size of dynamic tree.  The maximum found in a long but non-
-   exhaustive search was 1444 code structures (852 for length/literals
-   and 592 for distances, the latter actually the result of an
-   exhaustive search).  The true maximum is not known, but the value
-   below is more than safe. */
-#define ENOUGH 2048
-#define MAXD 592
+/* Maximum size of the dynamic table.  The maximum number of code structures is
+   1444, which is the sum of 852 for literal/length codes and 592 for distance
+   codes.  These values were found by exhaustive searches using the program
+   examples/enough.c found in the zlib distribution.  The arguments to that
+   program are the number of symbols, the initial root table size, and the
+   maximum bit length of a code.  "enough 286 9 15" for literal/length codes
+   returns 852, and "enough 30 6 15" for distance codes returns 592. The
+   initial root table size (9 or 6) is found in the fifth argument of the
+   inflate_table() calls in inflate.c and infback.c.  If the root table size is
+   changed, then these maximum sizes would be need to be recalculated and
+   updated. */
+#define ENOUGH_LENS 852
+#define ENOUGH_DISTS 592
+#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
 
-/* Type of code to build for inftable() */
-typedef enum
-{
-   CODES,
-   LENS,
-   DISTS
+/* Type of code to build for inflate_table() */
+typedef enum {
+    CODES,
+    LENS,
+    DISTS
 } codetype;
 
-extern int inflate_table OF((codetype type, unsigned short FAR *lens,
-                             unsigned codes, code FAR * FAR *table,
-                             unsigned FAR *bits, unsigned short FAR *work));
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work);
diff --git a/reg-io/zlib/minigzip.c b/reg-io/zlib/minigzip.c
deleted file mode 100644
index 4524b96a..00000000
--- a/reg-io/zlib/minigzip.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/* minigzip.c -- simulate gzip using the zlib compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/*
- * minigzip is a minimal implementation of the gzip utility. This is
- * only an example of using zlib and isn't meant to replace the
- * full-featured gzip. No attempt is made to deal with file systems
- * limiting names to 14 or 8+3 characters, etc... Error checking is
- * very limited. So use minigzip only for testing; use gzip for the
- * real thing. On MSDOS, use only on file names without extension
- * or in pipe mode.
- */
-
-/* @(#) $Id$ */
-
-#include <stdio.h>
-#include "zlib.h"
-
-#ifdef STDC
-#  include <string.h>
-#  include <stdlib.h>
-#endif
-
-#ifdef USE_MMAP
-#  include <sys/types.h>
-#  include <sys/mman.h>
-#  include <sys/stat.h>
-#endif
-
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
-#  include <fcntl.h>
-#  include <io.h>
-#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
-#else
-#  define SET_BINARY_MODE(file)
-#endif
-
-#ifdef VMS
-#  define unlink delete
-#  define GZ_SUFFIX "-gz"
-#endif
-#ifdef RISCOS
-#  define unlink remove
-#  define GZ_SUFFIX "-gz"
-#  define fileno(file) file->__file
-#endif
-#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-#  include <unix.h> /* for fileno */
-#endif
-
-#ifndef WIN32 /* unlink already in stdio.h for WIN32 */
-  extern int unlink OF((const char *));
-#endif
-
-#ifndef GZ_SUFFIX
-#  define GZ_SUFFIX ".gz"
-#endif
-#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1)
-
-#define BUFLEN      16384
-#define MAX_NAME_LEN 1024
-
-#ifdef MAXSEG_64K
-#  define local static
-   /* Needed for systems with limitation on stack size. */
-#else
-#  define local
-#endif
-
-char *prog;
-
-void error            OF((const char *msg));
-void gz_compress      OF((FILE   *in, gzFile out));
-#ifdef USE_MMAP
-int  gz_compress_mmap OF((FILE   *in, gzFile out));
-#endif
-void gz_uncompress    OF((gzFile in, FILE   *out));
-void file_compress    OF((char  *file, char *mode));
-void file_uncompress  OF((char  *file));
-int  main             OF((int argc, char *argv[]));
-
-/* ===========================================================================
- * Display error message and exit
- */
-void error(msg)
-    const char *msg;
-{
-    fprintf(stderr, "%s: %s\n", prog, msg);
-    exit(1);
-}
-
-/* ===========================================================================
- * Compress input to output then close both files.
- */
-
-void gz_compress(in, out)
-    FILE   *in;
-    gzFile out;
-{
-    local char buf[BUFLEN];
-    int len;
-    int err;
-
-#ifdef USE_MMAP
-    /* Try first compressing with mmap. If mmap fails (minigzip used in a
-     * pipe), use the normal fread loop.
-     */
-    if (gz_compress_mmap(in, out) == Z_OK) return;
-#endif
-    for (;;) {
-        len = (int)fread(buf, 1, sizeof(buf), in);
-        if (ferror(in)) {
-            perror("fread");
-            exit(1);
-        }
-        if (len == 0) break;
-
-        if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err));
-    }
-    fclose(in);
-    if (gzclose(out) != Z_OK) error("failed gzclose");
-}
-
-#ifdef USE_MMAP /* MMAP version, Miguel Albrecht <malbrech@eso.org> */
-
-/* Try compressing the input file at once using mmap. Return Z_OK if
- * if success, Z_ERRNO otherwise.
- */
-int gz_compress_mmap(in, out)
-    FILE   *in;
-    gzFile out;
-{
-    int len;
-    int err;
-    int ifd = fileno(in);
-    caddr_t buf;    /* mmap'ed buffer for the entire input file */
-    off_t buf_len;  /* length of the input file */
-    struct stat sb;
-
-    /* Determine the size of the file, needed for mmap: */
-    if (fstat(ifd, &sb) < 0) return Z_ERRNO;
-    buf_len = sb.st_size;
-    if (buf_len <= 0) return Z_ERRNO;
-
-    /* Now do the actual mmap: */
-    buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0);
-    if (buf == (caddr_t)(-1)) return Z_ERRNO;
-
-    /* Compress the whole file at once: */
-    len = gzwrite(out, (char *)buf, (unsigned)buf_len);
-
-    if (len != (int)buf_len) error(gzerror(out, &err));
-
-    munmap(buf, buf_len);
-    fclose(in);
-    if (gzclose(out) != Z_OK) error("failed gzclose");
-    return Z_OK;
-}
-#endif /* USE_MMAP */
-
-/* ===========================================================================
- * Uncompress input to output then close both files.
- */
-void gz_uncompress(in, out)
-    gzFile in;
-    FILE   *out;
-{
-    local char buf[BUFLEN];
-    int len;
-    int err;
-
-    for (;;) {
-        len = gzread(in, buf, sizeof(buf));
-        if (len < 0) error (gzerror(in, &err));
-        if (len == 0) break;
-
-        if ((int)fwrite(buf, 1, (unsigned)len, out) != len) {
-            error("failed fwrite");
-        }
-    }
-    if (fclose(out)) error("failed fclose");
-
-    if (gzclose(in) != Z_OK) error("failed gzclose");
-}
-
-
-/* ===========================================================================
- * Compress the given file: create a corresponding .gz file and remove the
- * original.
- */
-void file_compress(file, mode)
-    char  *file;
-    char  *mode;
-{
-    local char outfile[MAX_NAME_LEN];
-    FILE  *in;
-    gzFile out;
-
-    strcpy(outfile, file);
-    strcat(outfile, GZ_SUFFIX);
-
-    in = fopen(file, "rb");
-    if (in == NULL) {
-        perror(file);
-        exit(1);
-    }
-    out = gzopen(outfile, mode);
-    if (out == NULL) {
-        fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile);
-        exit(1);
-    }
-    gz_compress(in, out);
-
-    unlink(file);
-}
-
-
-/* ===========================================================================
- * Uncompress the given file and remove the original.
- */
-void file_uncompress(file)
-    char  *file;
-{
-    local char buf[MAX_NAME_LEN];
-    char *infile, *outfile;
-    FILE  *out;
-    gzFile in;
-    uInt len = (uInt)strlen(file);
-
-    strcpy(buf, file);
-
-    if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) {
-        infile = file;
-        outfile = buf;
-        outfile[len-3] = '\0';
-    } else {
-        outfile = file;
-        infile = buf;
-        strcat(infile, GZ_SUFFIX);
-    }
-    in = gzopen(infile, "rb");
-    if (in == NULL) {
-        fprintf(stderr, "%s: can't gzopen %s\n", prog, infile);
-        exit(1);
-    }
-    out = fopen(outfile, "wb");
-    if (out == NULL) {
-        perror(file);
-        exit(1);
-    }
-
-    gz_uncompress(in, out);
-
-    unlink(infile);
-}
-
-
-/* ===========================================================================
- * Usage:  minigzip [-d] [-f] [-h] [-r] [-1 to -9] [files...]
- *   -d : decompress
- *   -f : compress with Z_FILTERED
- *   -h : compress with Z_HUFFMAN_ONLY
- *   -r : compress with Z_RLE
- *   -1 to -9 : compression level
- */
-
-int main(argc, argv)
-    int argc;
-    char *argv[];
-{
-    int uncompr = 0;
-    gzFile file;
-    char outmode[20];
-
-    strcpy(outmode, "wb6 ");
-
-    prog = argv[0];
-    argc--, argv++;
-
-    while (argc > 0) {
-      if (strcmp(*argv, "-d") == 0)
-        uncompr = 1;
-      else if (strcmp(*argv, "-f") == 0)
-        outmode[3] = 'f';
-      else if (strcmp(*argv, "-h") == 0)
-        outmode[3] = 'h';
-      else if (strcmp(*argv, "-r") == 0)
-        outmode[3] = 'R';
-      else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' &&
-               (*argv)[2] == 0)
-        outmode[2] = (*argv)[1];
-      else
-        break;
-      argc--, argv++;
-    }
-    if (outmode[3] == ' ')
-        outmode[3] = 0;
-    if (argc == 0) {
-        SET_BINARY_MODE(stdin);
-        SET_BINARY_MODE(stdout);
-        if (uncompr) {
-            file = gzdopen(fileno(stdin), "rb");
-            if (file == NULL) error("can't gzdopen stdin");
-            gz_uncompress(file, stdout);
-        } else {
-            file = gzdopen(fileno(stdout), outmode);
-            if (file == NULL) error("can't gzdopen stdout");
-            gz_compress(stdin, file);
-        }
-    } else {
-        do {
-            if (uncompr) {
-                file_uncompress(*argv);
-            } else {
-                file_compress(*argv, outmode);
-            }
-        } while (argv++, --argc);
-    }
-    return 0;
-}
diff --git a/reg-io/zlib/trees.c b/reg-io/zlib/trees.c
index 395e4e16..6a523ef3 100644
--- a/reg-io/zlib/trees.c
+++ b/reg-io/zlib/trees.c
@@ -1,5 +1,6 @@
 /* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2005 Jean-loup Gailly
+ * Copyright (C) 1995-2024 Jean-loup Gailly
+ * detect_data_type() function provided freely by Cosmin Truta, 2006
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -35,7 +36,7 @@
 
 #include "deflate.h"
 
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 #  include <ctype.h>
 #endif
 
@@ -73,11 +74,6 @@ local const uch bl_order[BL_CODES]
  * probability, to avoid transmitting the lengths for unused bit length codes.
  */
 
-#define Buf_size (8 * 2*sizeof(char))
-/* Number of bits used within bi_buf. (bi_buf might be implemented on
- * more than 16 bits on some systems.)
- */
-
 /* ===========================================================================
  * Local data. These are initialized only once.
  */
@@ -126,108 +122,168 @@ struct static_tree_desc_s {
     int     max_length;          /* max bit length for the codes */
 };
 
-local static_tree_desc  static_l_desc =
+#ifdef NO_INIT_GLOBAL_POINTERS
+#  define TCONST
+#else
+#  define TCONST const
+#endif
+
+local TCONST static_tree_desc static_l_desc =
 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
 
-local static_tree_desc  static_d_desc =
+local TCONST static_tree_desc static_d_desc =
 {static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
 
-local static_tree_desc  static_bl_desc =
+local TCONST static_tree_desc static_bl_desc =
 {(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
 
 /* ===========================================================================
- * Local (static) routines in this file.
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(unsigned code, int len) {
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
  */
+local void bi_flush(deflate_state *s) {
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(deflate_state *s) {
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->bits_sent = (s->bits_sent + 7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) {
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits - 1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
 
-local void tr_static_init OF((void));
-local void init_block     OF((deflate_state *s));
-local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
-local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
-local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
-local void build_tree     OF((deflate_state *s, tree_desc *desc));
-local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local int  build_bl_tree  OF((deflate_state *s));
-local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
-                              int blcodes));
-local void compress_block OF((deflate_state *s, ct_data *ltree,
-                              ct_data *dtree));
-local void set_data_type  OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned value, int length));
-local void bi_windup      OF((deflate_state *s));
-local void bi_flush       OF((deflate_state *s));
-local void copy_block     OF((deflate_state *s, charf *buf, unsigned len,
-                              int header));
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
+    }
+}
 
 #ifdef GEN_TREES_H
-local void gen_trees_header OF((void));
+local void gen_trees_header(void);
 #endif
 
-#ifndef DEBUG
+#ifndef ZLIB_DEBUG
 #  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
    /* Send a code of the given tree. c and tree must not have side effects */
 
-#else /* DEBUG */
+#else /* !ZLIB_DEBUG */
 #  define send_code(s, c, tree) \
      { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
        send_bits(s, tree[c].Code, tree[c].Len); }
 #endif
 
-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
-    put_byte(s, (uch)((w) & 0xff)); \
-    put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
 /* ===========================================================================
  * Send a value on a given number of bits.
  * IN assertion: length <= 16 and value fits in length bits.
  */
-#ifdef DEBUG
-local void send_bits      OF((deflate_state *s, int value, int length));
-
-local void send_bits(s, value, length)
-    deflate_state *s;
-    int value;  /* value to send */
-    int length; /* number of bits */
-{
+#ifdef ZLIB_DEBUG
+local void send_bits(deflate_state *s, int value, int length) {
     Tracevv((stderr," l %2d v %4x ", length, value));
     Assert(length > 0 && length <= 15, "invalid length");
     s->bits_sent += (ulg)length;
 
     /* If not enough room in bi_buf, use (valid) bits from bi_buf and
-     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid))
      * unused bits in value.
      */
     if (s->bi_valid > (int)Buf_size - length) {
-        s->bi_buf |= (value << s->bi_valid);
+        s->bi_buf |= (ush)value << s->bi_valid;
         put_short(s, s->bi_buf);
         s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
         s->bi_valid += length - Buf_size;
     } else {
-        s->bi_buf |= value << s->bi_valid;
+        s->bi_buf |= (ush)value << s->bi_valid;
         s->bi_valid += length;
     }
 }
-#else /* !DEBUG */
+#else /* !ZLIB_DEBUG */
 
 #define send_bits(s, value, length) \
 { int len = length;\
   if (s->bi_valid > (int)Buf_size - len) {\
-    int val = value;\
-    s->bi_buf |= (val << s->bi_valid);\
+    int val = (int)value;\
+    s->bi_buf |= (ush)val << s->bi_valid;\
     put_short(s, s->bi_buf);\
     s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
     s->bi_valid += len - Buf_size;\
   } else {\
-    s->bi_buf |= (value) << s->bi_valid;\
+    s->bi_buf |= (ush)(value) << s->bi_valid;\
     s->bi_valid += len;\
   }\
 }
-#endif /* DEBUG */
+#endif /* ZLIB_DEBUG */
 
 
 /* the arguments must not have side effects */
@@ -235,8 +291,7 @@ local void send_bits(s, value, length)
 /* ===========================================================================
  * Initialize the various 'constant' tables.
  */
-local void tr_static_init()
-{
+local void tr_static_init(void) {
 #if defined(GEN_TREES_H) || !defined(STDC)
     static int static_init_done = 0;
     int n;        /* iterates over tree elements */
@@ -250,17 +305,19 @@ local void tr_static_init()
     if (static_init_done) return;
 
     /* For some embedded targets, global variables are not initialized: */
+#ifdef NO_INIT_GLOBAL_POINTERS
     static_l_desc.static_tree = static_ltree;
     static_l_desc.extra_bits = extra_lbits;
     static_d_desc.static_tree = static_dtree;
     static_d_desc.extra_bits = extra_dbits;
     static_bl_desc.extra_bits = extra_blbits;
+#endif
 
     /* Initialize the mapping length (0..255) -> length code (0..28) */
     length = 0;
     for (code = 0; code < LENGTH_CODES-1; code++) {
         base_length[code] = length;
-        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+        for (n = 0; n < (1 << extra_lbits[code]); n++) {
             _length_code[length++] = (uch)code;
         }
     }
@@ -269,13 +326,13 @@ local void tr_static_init()
      * in two different ways: code 284 + 5 bits or code 285, so we
      * overwrite length_code[255] to use the best encoding:
      */
-    _length_code[length-1] = (uch)code;
+    _length_code[length - 1] = (uch)code;
 
     /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
     dist = 0;
     for (code = 0 ; code < 16; code++) {
         base_dist[code] = dist;
-        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+        for (n = 0; n < (1 << extra_dbits[code]); n++) {
             _dist_code[dist++] = (uch)code;
         }
     }
@@ -283,11 +340,11 @@ local void tr_static_init()
     dist >>= 7; /* from now on, all distances are divided by 128 */
     for ( ; code < D_CODES; code++) {
         base_dist[code] = dist << 7;
-        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+        for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) {
             _dist_code[256 + dist++] = (uch)code;
         }
     }
-    Assert (dist == 256, "tr_static_init: 256+dist != 512");
+    Assert (dist == 256, "tr_static_init: 256 + dist != 512");
 
     /* Construct the codes of the static literal tree */
     for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
@@ -316,19 +373,18 @@ local void tr_static_init()
 }
 
 /* ===========================================================================
- * Genererate the file trees.h describing the static trees.
+ * Generate the file trees.h describing the static trees.
  */
 #ifdef GEN_TREES_H
-#  ifndef DEBUG
+#  ifndef ZLIB_DEBUG
 #    include <stdio.h>
 #  endif
 
 #  define SEPARATOR(i, last, width) \
       ((i) == (last)? "\n};\n\n" :    \
-       ((i) % (width) == (width)-1 ? ",\n" : ", "))
+       ((i) % (width) == (width) - 1 ? ",\n" : ", "))
 
-void gen_trees_header()
-{
+void gen_trees_header(void) {
     FILE *header = fopen("trees.h", "w");
     int i;
 
@@ -348,13 +404,14 @@ void gen_trees_header()
                 static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
     }
 
-    fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
+    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
     for (i = 0; i < DIST_CODE_LEN; i++) {
         fprintf(header, "%2u%s", _dist_code[i],
                 SEPARATOR(i, DIST_CODE_LEN-1, 20));
     }
 
-    fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    fprintf(header,
+        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
     for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
         fprintf(header, "%2u%s", _length_code[i],
                 SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
@@ -376,12 +433,26 @@ void gen_trees_header()
 }
 #endif /* GEN_TREES_H */
 
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(deflate_state *s) {
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->sym_next = s->matches = 0;
+}
+
 /* ===========================================================================
  * Initialize the tree data structures for a new zlib stream.
  */
-void _tr_init(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_init(deflate_state *s) {
     tr_static_init();
 
     s->l_desc.dyn_tree = s->dyn_ltree;
@@ -395,8 +466,7 @@ void _tr_init(s)
 
     s->bi_buf = 0;
     s->bi_valid = 0;
-    s->last_eob_len = 8; /* enough lookahead for inflate */
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     s->compressed_len = 0L;
     s->bits_sent = 0L;
 #endif
@@ -405,24 +475,6 @@ void _tr_init(s)
     init_block(s);
 }
 
-/* ===========================================================================
- * Initialize a new block.
- */
-local void init_block(s)
-    deflate_state *s;
-{
-    int n; /* iterates over tree elements */
-
-    /* Initialize the trees. */
-    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
-    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
-    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
-    s->dyn_ltree[END_BLOCK].Freq = 1;
-    s->opt_len = s->static_len = 0L;
-    s->last_lit = s->matches = 0;
-}
-
 #define SMALLEST 1
 /* Index within the heap array of least frequent node in the Huffman tree */
 
@@ -452,17 +504,13 @@ local void init_block(s)
  * when the heap property is re-established (each father smaller than its
  * two sons).
  */
-local void pqdownheap(s, tree, k)
-    deflate_state *s;
-    ct_data *tree;  /* the tree to restore */
-    int k;               /* node to move down */
-{
+local void pqdownheap(deflate_state *s, ct_data *tree, int k) {
     int v = s->heap[k];
     int j = k << 1;  /* left son of k */
     while (j <= s->heap_len) {
         /* Set j to the smallest of the two sons: */
         if (j < s->heap_len &&
-            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) {
             j++;
         }
         /* Exit if v is smaller than both sons */
@@ -487,10 +535,7 @@ local void pqdownheap(s, tree, k)
  *     The length opt_len is updated; static_len is also updated if stree is
  *     not null.
  */
-local void gen_bitlen(s, desc)
-    deflate_state *s;
-    tree_desc *desc;    /* the tree descriptor */
-{
+local void gen_bitlen(deflate_state *s, tree_desc *desc) {
     ct_data *tree        = desc->dyn_tree;
     int max_code         = desc->max_code;
     const ct_data *stree = desc->stat_desc->static_tree;
@@ -511,7 +556,7 @@ local void gen_bitlen(s, desc)
      */
     tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
 
-    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+    for (h = s->heap_max + 1; h < HEAP_SIZE; h++) {
         n = s->heap[h];
         bits = tree[tree[n].Dad].Len + 1;
         if (bits > max_length) bits = max_length, overflow++;
@@ -522,22 +567,22 @@ local void gen_bitlen(s, desc)
 
         s->bl_count[bits]++;
         xbits = 0;
-        if (n >= base) xbits = extra[n-base];
+        if (n >= base) xbits = extra[n - base];
         f = tree[n].Freq;
-        s->opt_len += (ulg)f * (bits + xbits);
-        if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
+        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
     }
     if (overflow == 0) return;
 
-    Trace((stderr,"\nbit length overflow\n"));
+    Tracev((stderr,"\nbit length overflow\n"));
     /* This happens for example on obj2 and pic of the Calgary corpus */
 
     /* Find the first bit length which could increase: */
     do {
-        bits = max_length-1;
+        bits = max_length - 1;
         while (s->bl_count[bits] == 0) bits--;
-        s->bl_count[bits]--;      /* move one leaf down the tree */
-        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[bits]--;        /* move one leaf down the tree */
+        s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */
         s->bl_count[max_length]--;
         /* The brother of the overflow item also moves one step up,
          * but this does not affect bl_count[max_length]
@@ -556,9 +601,8 @@ local void gen_bitlen(s, desc)
             m = s->heap[--h];
             if (m > max_code) continue;
             if ((unsigned) tree[m].Len != (unsigned) bits) {
-                Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
-                s->opt_len += ((long)bits - (long)tree[m].Len)
-                              *(long)tree[m].Freq;
+                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
                 tree[m].Len = (ush)bits;
             }
             n--;
@@ -566,47 +610,9 @@ local void gen_bitlen(s, desc)
     }
 }
 
-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- *     zero code length.
- */
-local void gen_codes (tree, max_code, bl_count)
-    ct_data *tree;             /* the tree to decorate */
-    int max_code;              /* largest code with non zero frequency */
-    ushf *bl_count;            /* number of codes at each bit length */
-{
-    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-    ush code = 0;              /* running code value */
-    int bits;                  /* bit index */
-    int n;                     /* code index */
-
-    /* The distribution counts are first used to generate the code values
-     * without bit reversal.
-     */
-    for (bits = 1; bits <= MAX_BITS; bits++) {
-        next_code[bits] = code = (code + bl_count[bits-1]) << 1;
-    }
-    /* Check that the bit counts in bl_count are consistent. The last code
-     * must be all ones.
-     */
-    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
-            "inconsistent bit counts");
-    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
-    for (n = 0;  n <= max_code; n++) {
-        int len = tree[n].Len;
-        if (len == 0) continue;
-        /* Now reverse the bits */
-        tree[n].Code = bi_reverse(next_code[len]++, len);
-
-        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
-             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
-    }
-}
+#ifdef DUMP_BL_TREE
+#  include <stdio.h>
+#endif
 
 /* ===========================================================================
  * Construct one Huffman tree and assigns the code bit strings and lengths.
@@ -616,10 +622,7 @@ local void gen_codes (tree, max_code, bl_count)
  *     and corresponding code. The length opt_len is updated; static_len is
  *     also updated if stree is not null. The field max_code is set.
  */
-local void build_tree(s, desc)
-    deflate_state *s;
-    tree_desc *desc; /* the tree descriptor */
-{
+local void build_tree(deflate_state *s, tree_desc *desc) {
     ct_data *tree         = desc->dyn_tree;
     const ct_data *stree  = desc->stat_desc->static_tree;
     int elems             = desc->stat_desc->elems;
@@ -628,7 +631,7 @@ local void build_tree(s, desc)
     int node;          /* new node being created */
 
     /* Construct the initial heap, with least frequent element in
-     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1].
      * heap[0] is not used.
      */
     s->heap_len = 0, s->heap_max = HEAP_SIZE;
@@ -656,7 +659,7 @@ local void build_tree(s, desc)
     }
     desc->max_code = max_code;
 
-    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+    /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree,
      * establish sub-heaps of increasing lengths:
      */
     for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
@@ -704,11 +707,7 @@ local void build_tree(s, desc)
  * Scan a literal or distance tree to determine the frequencies of the codes
  * in the bit length tree.
  */
-local void scan_tree (s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree;   /* the tree to be scanned */
-    int max_code;    /* and its largest code of non zero frequency */
-{
+local void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
     int n;                     /* iterates over all tree elements */
     int prevlen = -1;          /* last emitted length */
     int curlen;                /* length of current code */
@@ -718,10 +717,10 @@ local void scan_tree (s, tree, max_code)
     int min_count = 4;         /* min repeat count */
 
     if (nextlen == 0) max_count = 138, min_count = 3;
-    tree[max_code+1].Len = (ush)0xffff; /* guard */
+    tree[max_code + 1].Len = (ush)0xffff; /* guard */
 
     for (n = 0; n <= max_code; n++) {
-        curlen = nextlen; nextlen = tree[n+1].Len;
+        curlen = nextlen; nextlen = tree[n + 1].Len;
         if (++count < max_count && curlen == nextlen) {
             continue;
         } else if (count < min_count) {
@@ -749,11 +748,7 @@ local void scan_tree (s, tree, max_code)
  * Send a literal or distance tree in compressed form, using the codes in
  * bl_tree.
  */
-local void send_tree (s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree; /* the tree to be scanned */
-    int max_code;       /* and its largest code of non zero frequency */
-{
+local void send_tree(deflate_state *s, ct_data *tree, int max_code) {
     int n;                     /* iterates over all tree elements */
     int prevlen = -1;          /* last emitted length */
     int curlen;                /* length of current code */
@@ -762,11 +757,11 @@ local void send_tree (s, tree, max_code)
     int max_count = 7;         /* max repeat count */
     int min_count = 4;         /* min repeat count */
 
-    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    /* tree[max_code + 1].Len = -1; */  /* guard already set */
     if (nextlen == 0) max_count = 138, min_count = 3;
 
     for (n = 0; n <= max_code; n++) {
-        curlen = nextlen; nextlen = tree[n+1].Len;
+        curlen = nextlen; nextlen = tree[n + 1].Len;
         if (++count < max_count && curlen == nextlen) {
             continue;
         } else if (count < min_count) {
@@ -777,13 +772,13 @@ local void send_tree (s, tree, max_code)
                 send_code(s, curlen, s->bl_tree); count--;
             }
             Assert(count >= 3 && count <= 6, " 3_6?");
-            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2);
 
         } else if (count <= 10) {
-            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3);
 
         } else {
-            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7);
         }
         count = 0; prevlen = curlen;
         if (nextlen == 0) {
@@ -800,9 +795,7 @@ local void send_tree (s, tree, max_code)
  * Construct the Huffman tree for the bit lengths and return the index in
  * bl_order of the last bit length code to send.
  */
-local int build_bl_tree(s)
-    deflate_state *s;
-{
+local int build_bl_tree(deflate_state *s) {
     int max_blindex;  /* index of last bit length code of non zero freq */
 
     /* Determine the bit length frequencies for literal and distance trees */
@@ -811,8 +804,8 @@ local int build_bl_tree(s)
 
     /* Build the bit length tree: */
     build_tree(s, (tree_desc *)(&(s->bl_desc)));
-    /* opt_len now includes the length of the tree representations, except
-     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+    /* opt_len now includes the length of the tree representations, except the
+     * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts.
      */
 
     /* Determine the number of bit length codes to send. The pkzip format
@@ -823,7 +816,7 @@ local int build_bl_tree(s)
         if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
     }
     /* Update opt_len to include the bit length tree and counts */
-    s->opt_len += 3*(max_blindex+1) + 5+5+4;
+    s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4;
     Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
             s->opt_len, s->static_len));
 
@@ -835,95 +828,172 @@ local int build_bl_tree(s)
  * lengths of the bit length codes, the literal tree and the distance tree.
  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
  */
-local void send_all_trees(s, lcodes, dcodes, blcodes)
-    deflate_state *s;
-    int lcodes, dcodes, blcodes; /* number of codes for each tree */
-{
+local void send_all_trees(deflate_state *s, int lcodes, int dcodes,
+                          int blcodes) {
     int rank;                    /* index in bl_order */
 
     Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
     Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
             "too many codes");
     Tracev((stderr, "\nbl counts: "));
-    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
-    send_bits(s, dcodes-1,   5);
-    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    send_bits(s, lcodes - 257, 5);  /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes - 1,   5);
+    send_bits(s, blcodes - 4,  4);  /* not -3 as stated in appnote.txt */
     for (rank = 0; rank < blcodes; rank++) {
         Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
         send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
     }
     Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
 
-    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1);  /* literal tree */
     Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
 
-    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1);  /* distance tree */
     Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
 }
 
 /* ===========================================================================
  * Send a stored block
  */
-void _tr_stored_block(s, buf, stored_len, eof)
-    deflate_state *s;
-    charf *buf;       /* input block */
-    ulg stored_len;   /* length of input block */
-    int eof;          /* true if this is the last block for a file */
-{
-    send_bits(s, (STORED_BLOCK<<1)+eof, 3);  /* send block type */
-#ifdef DEBUG
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last) {
+    send_bits(s, (STORED_BLOCK<<1) + last, 3);  /* send block type */
+    bi_windup(s);        /* align on byte boundary */
+    put_short(s, (ush)stored_len);
+    put_short(s, (ush)~stored_len);
+    if (stored_len)
+        zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    s->pending += stored_len;
+#ifdef ZLIB_DEBUG
     s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
     s->compressed_len += (stored_len + 4) << 3;
+    s->bits_sent += 2*16;
+    s->bits_sent += stored_len << 3;
 #endif
-    copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* ===========================================================================
+ * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
+ */
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) {
+    bi_flush(s);
 }
 
 /* ===========================================================================
  * Send one empty static block to give enough lookahead for inflate.
  * This takes 10 bits, of which 7 may remain in the bit buffer.
- * The current inflate code requires 9 bits of lookahead. If the
- * last two codes for the previous block (real code plus EOB) were coded
- * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
- * the last real code. In this case we send two empty static blocks instead
- * of one. (There are no problems if the previous block is stored or fixed.)
- * To simplify the code, we assume the worst case of last real code encoded
- * on one bit only.
  */
-void _tr_align(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_align(deflate_state *s) {
     send_bits(s, STATIC_TREES<<1, 3);
     send_code(s, END_BLOCK, static_ltree);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
 #endif
     bi_flush(s);
-    /* Of the 10 bits for the empty block, we have already sent
-     * (10 - bi_valid) bits. The lookahead for the last real code (before
-     * the EOB of the previous block) was thus at least one plus the length
-     * of the EOB plus what we have just sent of the empty static block.
-     */
-    if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
-        send_bits(s, STATIC_TREES<<1, 3);
-        send_code(s, END_BLOCK, static_ltree);
-#ifdef DEBUG
-        s->compressed_len += 10L;
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(deflate_state *s, const ct_data *ltree,
+                          const ct_data *dtree) {
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned sx = 0;    /* running index in symbol buffers */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->sym_next != 0) do {
+#ifdef LIT_MEM
+        dist = s->d_buf[sx];
+        lc = s->l_buf[sx++];
+#else
+        dist = s->sym_buf[sx++] & 0xff;
+        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
+        lc = s->sym_buf[sx++];
 #endif
-        bi_flush(s);
-    }
-    s->last_eob_len = 7;
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check for no overlay of pending_buf on needed symbols */
+#ifdef LIT_MEM
+        Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow");
+#else
+        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
+#endif
+
+    } while (sx < s->sym_next);
+
+    send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ *    a) There are no non-portable control characters belonging to the
+ *       "block list" (0..6, 14..25, 28..31).
+ *    b) There is at least one printable character belonging to the
+ *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ *   "gray list" that is ignored in this detection algorithm:
+ *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local int detect_data_type(deflate_state *s) {
+    /* block_mask is the bit mask of block-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long block_mask = 0xf3ffc07fUL;
+    int n;
+
+    /* Check for non-textual ("block-listed") bytes. */
+    for (n = 0; n <= 31; n++, block_mask >>= 1)
+        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
+
+    /* Check for textual ("allow-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
+
+    /* There are no "block-listed" or "allow-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
 }
 
 /* ===========================================================================
  * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and output the encoded block to the zip file.
+ * trees or store, and write out the encoded block.
  */
-void _tr_flush_block(s, buf, stored_len, eof)
-    deflate_state *s;
-    charf *buf;       /* input block, or NULL if too old */
-    ulg stored_len;   /* length of input block */
-    int eof;          /* true if this is the last block for a file */
-{
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last) {
     ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
     int max_blindex = 0;  /* index of last bit length code of non zero freq */
 
@@ -931,8 +1001,8 @@ void _tr_flush_block(s, buf, stored_len, eof)
     if (s->level > 0) {
 
         /* Check if the file is binary or text */
-        if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
-            set_data_type(s);
+        if (s->strm->data_type == Z_UNKNOWN)
+            s->strm->data_type = detect_data_type(s);
 
         /* Construct the literal and distance trees */
         build_tree(s, (tree_desc *)(&(s->l_desc)));
@@ -952,14 +1022,17 @@ void _tr_flush_block(s, buf, stored_len, eof)
         max_blindex = build_bl_tree(s);
 
         /* Determine the best encoding. Compute the block lengths in bytes. */
-        opt_lenb = (s->opt_len+3+7)>>3;
-        static_lenb = (s->static_len+3+7)>>3;
+        opt_lenb = (s->opt_len + 3 + 7) >> 3;
+        static_lenb = (s->static_len + 3 + 7) >> 3;
 
         Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
                 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
-                s->last_lit));
+                s->sym_next / 3));
 
-        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+#ifndef FORCE_STATIC
+        if (static_lenb <= opt_lenb || s->strategy == Z_FIXED)
+#endif
+            opt_lenb = static_lenb;
 
     } else {
         Assert(buf != (char*)0, "lost buf");
@@ -969,7 +1042,7 @@ void _tr_flush_block(s, buf, stored_len, eof)
 #ifdef FORCE_STORED
     if (buf != (char*)0) { /* force stored block */
 #else
-    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+    if (stored_len + 4 <= opt_lenb && buf != (char*)0) {
                        /* 4: two words for the lengths */
 #endif
         /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
@@ -978,24 +1051,22 @@ void _tr_flush_block(s, buf, stored_len, eof)
          * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
          * transform a block into a stored block.
          */
-        _tr_stored_block(s, buf, stored_len, eof);
+        _tr_stored_block(s, buf, stored_len, last);
 
-#ifdef FORCE_STATIC
-    } else if (static_lenb >= 0) { /* force static trees */
-#else
-    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
-#endif
-        send_bits(s, (STATIC_TREES<<1)+eof, 3);
-        compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
-#ifdef DEBUG
+    } else if (static_lenb == opt_lenb) {
+        send_bits(s, (STATIC_TREES<<1) + last, 3);
+        compress_block(s, (const ct_data *)static_ltree,
+                       (const ct_data *)static_dtree);
+#ifdef ZLIB_DEBUG
         s->compressed_len += 3 + s->static_len;
 #endif
     } else {
-        send_bits(s, (DYN_TREES<<1)+eof, 3);
-        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
-                       max_blindex+1);
-        compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
-#ifdef DEBUG
+        send_bits(s, (DYN_TREES<<1) + last, 3);
+        send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1,
+                       max_blindex + 1);
+        compress_block(s, (const ct_data *)s->dyn_ltree,
+                       (const ct_data *)s->dyn_dtree);
+#ifdef ZLIB_DEBUG
         s->compressed_len += 3 + s->opt_len;
 #endif
     }
@@ -1005,27 +1076,29 @@ void _tr_flush_block(s, buf, stored_len, eof)
      */
     init_block(s);
 
-    if (eof) {
+    if (last) {
         bi_windup(s);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
         s->compressed_len += 7;  /* align on byte boundary */
 #endif
     }
-    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
-           s->compressed_len-7*eof));
+    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3,
+           s->compressed_len - 7*last));
 }
 
 /* ===========================================================================
  * Save the match info and tally the frequency counts. Return true if
  * the current block must be flushed.
  */
-int _tr_tally (s, dist, lc)
-    deflate_state *s;
-    unsigned dist;  /* distance of matched string */
-    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
-{
-    s->d_buf[s->last_lit] = (ush)dist;
-    s->l_buf[s->last_lit++] = (uch)lc;
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
+#ifdef LIT_MEM
+    s->d_buf[s->sym_next] = (ush)dist;
+    s->l_buf[s->sym_next++] = (uch)lc;
+#else
+    s->sym_buf[s->sym_next++] = (uch)dist;
+    s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
+    s->sym_buf[s->sym_next++] = (uch)lc;
+#endif
     if (dist == 0) {
         /* lc is the unmatched char */
         s->dyn_ltree[lc].Freq++;
@@ -1037,183 +1110,8 @@ int _tr_tally (s, dist, lc)
                (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
                (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
 
-        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++;
         s->dyn_dtree[d_code(dist)].Freq++;
     }
-
-#ifdef TRUNCATE_BLOCK
-    /* Try to guess if it is profitable to stop the current block here */
-    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
-        /* Compute an upper bound for the compressed length */
-        ulg out_length = (ulg)s->last_lit*8L;
-        ulg in_length = (ulg)((long)s->strstart - s->block_start);
-        int dcode;
-        for (dcode = 0; dcode < D_CODES; dcode++) {
-            out_length += (ulg)s->dyn_dtree[dcode].Freq *
-                (5L+extra_dbits[dcode]);
-        }
-        out_length >>= 3;
-        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
-               s->last_lit, in_length, out_length,
-               100L - out_length*100L/in_length));
-        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
-    }
-#endif
-    return (s->last_lit == s->lit_bufsize-1);
-    /* We avoid equality with lit_bufsize because of wraparound at 64K
-     * on 16 bit machines and because stored blocks are restricted to
-     * 64K-1 bytes.
-     */
-}
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-local void compress_block(s, ltree, dtree)
-    deflate_state *s;
-    ct_data *ltree; /* literal tree */
-    ct_data *dtree; /* distance tree */
-{
-    unsigned dist;      /* distance of matched string */
-    int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned lx = 0;    /* running index in l_buf */
-    unsigned code;      /* the code to send */
-    int extra;          /* number of extra bits to send */
-
-    if (s->last_lit != 0) do {
-        dist = s->d_buf[lx];
-        lc = s->l_buf[lx++];
-        if (dist == 0) {
-            send_code(s, lc, ltree); /* send a literal byte */
-            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
-        } else {
-            /* Here, lc is the match length - MIN_MATCH */
-            code = _length_code[lc];
-            send_code(s, code+LITERALS+1, ltree); /* send the length code */
-            extra = extra_lbits[code];
-            if (extra != 0) {
-                lc -= base_length[code];
-                send_bits(s, lc, extra);       /* send the extra length bits */
-            }
-            dist--; /* dist is now the match distance - 1 */
-            code = d_code(dist);
-            Assert (code < D_CODES, "bad d_code");
-
-            send_code(s, code, dtree);       /* send the distance code */
-            extra = extra_dbits[code];
-            if (extra != 0) {
-                dist -= base_dist[code];
-                send_bits(s, dist, extra);   /* send the extra distance bits */
-            }
-        } /* literal or match pair ? */
-
-        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
-        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
-               "pendingBuf overflow");
-
-    } while (lx < s->last_lit);
-
-    send_code(s, END_BLOCK, ltree);
-    s->last_eob_len = ltree[END_BLOCK].Len;
-}
-
-/* ===========================================================================
- * Set the data type to BINARY or TEXT, using a crude approximation:
- * set it to Z_TEXT if all symbols are either printable characters (33 to 255)
- * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
- * IN assertion: the fields Freq of dyn_ltree are set.
- */
-local void set_data_type(s)
-    deflate_state *s;
-{
-    int n;
-
-    for (n = 0; n < 9; n++)
-        if (s->dyn_ltree[n].Freq != 0)
-            break;
-    if (n == 9)
-        for (n = 14; n < 32; n++)
-            if (s->dyn_ltree[n].Freq != 0)
-                break;
-    s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
-}
-
-/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-local unsigned bi_reverse(code, len)
-    unsigned code; /* the value to invert */
-    int len;       /* its bit length */
-{
-    register unsigned res = 0;
-    do {
-        res |= code & 1;
-        code >>= 1, res <<= 1;
-    } while (--len > 0);
-    return res >> 1;
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-local void bi_flush(s)
-    deflate_state *s;
-{
-    if (s->bi_valid == 16) {
-        put_short(s, s->bi_buf);
-        s->bi_buf = 0;
-        s->bi_valid = 0;
-    } else if (s->bi_valid >= 8) {
-        put_byte(s, (Byte)s->bi_buf);
-        s->bi_buf >>= 8;
-        s->bi_valid -= 8;
-    }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-local void bi_windup(s)
-    deflate_state *s;
-{
-    if (s->bi_valid > 8) {
-        put_short(s, s->bi_buf);
-    } else if (s->bi_valid > 0) {
-        put_byte(s, (Byte)s->bi_buf);
-    }
-    s->bi_buf = 0;
-    s->bi_valid = 0;
-#ifdef DEBUG
-    s->bits_sent = (s->bits_sent+7) & ~7;
-#endif
-}
-
-/* ===========================================================================
- * Copy a stored block, storing first the length and its
- * one's complement if requested.
- */
-local void copy_block(s, buf, len, header)
-    deflate_state *s;
-    charf    *buf;    /* the input data */
-    unsigned len;     /* its length */
-    int      header;  /* true if block header must be written */
-{
-    bi_windup(s);        /* align on byte boundary */
-    s->last_eob_len = 8; /* enough lookahead for inflate */
-
-    if (header) {
-        put_short(s, (ush)len);
-        put_short(s, (ush)~len);
-#ifdef DEBUG
-        s->bits_sent += 2*16;
-#endif
-    }
-#ifdef DEBUG
-    s->bits_sent += (ulg)len<<3;
-#endif
-    while (len--) {
-        put_byte(s, *buf++);
-    }
+    return (s->sym_next == s->sym_end);
 }
diff --git a/reg-io/zlib/trees.h b/reg-io/zlib/trees.h
index 3e51006c..d35639d8 100644
--- a/reg-io/zlib/trees.h
+++ b/reg-io/zlib/trees.h
@@ -1,134 +1,128 @@
 /* header created automatically with -DGEN_TREES_H */
 
-local const ct_data static_ltree[L_CODES+2] =
-{
-   {{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
-   {{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
-   {{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
-   {{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
-   {{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
-   {{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
-   {{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
-   {{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
-   {{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
-   {{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
-   {{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
-   {{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
-   {{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
-   {{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
-   {{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
-   {{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
-   {{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
-   {{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
-   {{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
-   {{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
-   {{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
-   {{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
-   {{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
-   {{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
-   {{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
-   {{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
-   {{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
-   {{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
-   {{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
-   {{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
-   {{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
-   {{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
-   {{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
-   {{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
-   {{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
-   {{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
-   {{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
-   {{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
-   {{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
-   {{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
-   {{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
-   {{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
-   {{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
-   {{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
-   {{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
-   {{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
-   {{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
-   {{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
-   {{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
-   {{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
-   {{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
-   {{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
-   {{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
-   {{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
-   {{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
-   {{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
-   {{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
-   {{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
 };
 
-local const ct_data static_dtree[D_CODES] =
-{
-   {{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
-   {{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
-   {{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
-   {{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
-   {{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
-   {{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
 };
 
-const uch _dist_code[DIST_CODE_LEN] =
-{
-   0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
-   8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
-   10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-   11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-   12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
-   13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
-   13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-   14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-   14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-   14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
-   15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-   15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-   15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
-   18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
-   23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-   24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
-   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-   27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-   28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
-   28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-   29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-   29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
-   29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
 };
 
-const uch _length_code[MAX_MATCH-MIN_MATCH+1]=
-{
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
-   13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
-   17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
-   19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
-   21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
-   22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
-   23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-   24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-   25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-   25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
-   26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-   26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
-   27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
 };
 
-local const int base_length[LENGTH_CODES] =
-{
-   0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
-   64, 80, 96, 112, 128, 160, 192, 224, 0
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
 };
 
-local const int base_dist[D_CODES] =
-{
-   0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
    32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
-   1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
 };
 
diff --git a/reg-io/zlib/uncompr.c b/reg-io/zlib/uncompr.c
index b59e3d0d..5e256663 100644
--- a/reg-io/zlib/uncompr.c
+++ b/reg-io/zlib/uncompr.c
@@ -1,5 +1,5 @@
 /* uncompr.c -- decompress a memory buffer
- * Copyright (C) 1995-2003 Jean-loup Gailly.
+ * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -9,53 +9,77 @@
 #include "zlib.h"
 
 /* ===========================================================================
-     Decompresses the source buffer into the destination buffer.  sourceLen is
-   the byte length of the source buffer. Upon entry, destLen is the total
-   size of the destination buffer, which must be large enough to hold the
-   entire uncompressed data. (The size of the uncompressed data must have
-   been saved previously by the compressor and transmitted to the decompressor
-   by some mechanism outside the scope of this compression library.)
-   Upon exit, destLen is the actual size of the compressed buffer.
-     This function can be used to decompress a whole file at once if the
-   input file is mmap'ed.
-
-     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_BUF_ERROR if there was not enough room in the output
-   buffer, or Z_DATA_ERROR if the input data was corrupted.
+     Decompresses the source buffer into the destination buffer.  *sourceLen is
+   the byte length of the source buffer. Upon entry, *destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data. (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit,
+   *destLen is the size of the decompressed data and *sourceLen is the number
+   of source bytes consumed. Upon return, source + *sourceLen points to the
+   first unused input byte.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer, or
+   Z_DATA_ERROR if the input data was corrupted, including if the input data is
+   an incomplete zlib stream.
 */
-int ZEXPORT uncompress (dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                        uLong *sourceLen) {
     z_stream stream;
     int err;
+    const uInt max = (uInt)-1;
+    uLong len, left;
+    Byte buf[1];    /* for detection of incomplete stream when *destLen == 0 */
 
-    stream.next_in = (Bytef*)source;
-    stream.avail_in = (uInt)sourceLen;
-    /* Check for source > 64K on 16-bit machine: */
-    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
-
-    stream.next_out = dest;
-    stream.avail_out = (uInt)*destLen;
-    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+    len = *sourceLen;
+    if (*destLen) {
+        left = *destLen;
+        *destLen = 0;
+    }
+    else {
+        left = 1;
+        dest = buf;
+    }
 
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
     stream.zalloc = (alloc_func)0;
     stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
 
     err = inflateInit(&stream);
     if (err != Z_OK) return err;
 
-    err = inflate(&stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        inflateEnd(&stream);
-        if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0))
-            return Z_DATA_ERROR;
-        return err;
-    }
-    *destLen = stream.total_out;
+    stream.next_out = dest;
+    stream.avail_out = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = len > (uLong)max ? max : (uInt)len;
+            len -= stream.avail_in;
+        }
+        err = inflate(&stream, Z_NO_FLUSH);
+    } while (err == Z_OK);
+
+    *sourceLen -= len + stream.avail_in;
+    if (dest != buf)
+        *destLen = stream.total_out;
+    else if (stream.total_out && err == Z_BUF_ERROR)
+        left = 1;
+
+    inflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK :
+           err == Z_NEED_DICT ? Z_DATA_ERROR  :
+           err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
+           err;
+}
 
-    err = inflateEnd(&stream);
-    return err;
+int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                       uLong sourceLen) {
+    return uncompress2(dest, destLen, source, &sourceLen);
 }
diff --git a/reg-io/zlib/zconf.h b/reg-io/zlib/zconf.h
index b891e9f6..62adc8d8 100644
--- a/reg-io/zlib/zconf.h
+++ b/reg-io/zlib/zconf.h
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -11,52 +11,161 @@
 /*
  * If you *really* need a unique prefix for all types and library functions,
  * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
  */
-#ifdef Z_PREFIX
-#  define deflateInit_          z_deflateInit_
+#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
+#  define Z_PREFIX_SET
+
+/* all linked symbols and init macros */
+#  define _dist_code            z__dist_code
+#  define _length_code          z__length_code
+#  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
+#  define _tr_flush_block       z__tr_flush_block
+#  define _tr_init              z__tr_init
+#  define _tr_stored_block      z__tr_stored_block
+#  define _tr_tally             z__tr_tally
+#  define adler32               z_adler32
+#  define adler32_combine       z_adler32_combine
+#  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
+#  ifndef Z_SOLO
+#    define compress              z_compress
+#    define compress2             z_compress2
+#    define compressBound         z_compressBound
+#  endif
+#  define crc32                 z_crc32
+#  define crc32_combine         z_crc32_combine
+#  define crc32_combine64       z_crc32_combine64
+#  define crc32_combine_gen     z_crc32_combine_gen
+#  define crc32_combine_gen64   z_crc32_combine_gen64
+#  define crc32_combine_op      z_crc32_combine_op
+#  define crc32_z               z_crc32_z
 #  define deflate               z_deflate
+#  define deflateBound          z_deflateBound
+#  define deflateCopy           z_deflateCopy
 #  define deflateEnd            z_deflateEnd
-#  define inflateInit_          z_inflateInit_
-#  define inflate               z_inflate
-#  define inflateEnd            z_inflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
 #  define deflateInit2_         z_deflateInit2_
-#  define deflateSetDictionary  z_deflateSetDictionary
-#  define deflateCopy           z_deflateCopy
-#  define deflateReset          z_deflateReset
+#  define deflateInit_          z_deflateInit_
 #  define deflateParams         z_deflateParams
-#  define deflateBound          z_deflateBound
+#  define deflatePending        z_deflatePending
 #  define deflatePrime          z_deflatePrime
+#  define deflateReset          z_deflateReset
+#  define deflateResetKeep      z_deflateResetKeep
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateSetHeader      z_deflateSetHeader
+#  define deflateTune           z_deflateTune
+#  define deflate_copyright     z_deflate_copyright
+#  define get_crc_table         z_get_crc_table
+#  ifndef Z_SOLO
+#    define gz_error              z_gz_error
+#    define gz_intmax             z_gz_intmax
+#    define gz_strwinerror        z_gz_strwinerror
+#    define gzbuffer              z_gzbuffer
+#    define gzclearerr            z_gzclearerr
+#    define gzclose               z_gzclose
+#    define gzclose_r             z_gzclose_r
+#    define gzclose_w             z_gzclose_w
+#    define gzdirect              z_gzdirect
+#    define gzdopen               z_gzdopen
+#    define gzeof                 z_gzeof
+#    define gzerror               z_gzerror
+#    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
+#    define gzgetc                z_gzgetc
+#    define gzgetc_               z_gzgetc_
+#    define gzgets                z_gzgets
+#    define gzoffset              z_gzoffset
+#    define gzoffset64            z_gzoffset64
+#    define gzopen                z_gzopen
+#    define gzopen64              z_gzopen64
+#    ifdef _WIN32
+#      define gzopen_w              z_gzopen_w
+#    endif
+#    define gzprintf              z_gzprintf
+#    define gzputc                z_gzputc
+#    define gzputs                z_gzputs
+#    define gzread                z_gzread
+#    define gzrewind              z_gzrewind
+#    define gzseek                z_gzseek
+#    define gzseek64              z_gzseek64
+#    define gzsetparams           z_gzsetparams
+#    define gztell                z_gztell
+#    define gztell64              z_gztell64
+#    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
+#    define gzwrite               z_gzwrite
+#  endif
+#  define inflate               z_inflate
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
+#  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
+#  define inflateCopy           z_inflateCopy
+#  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
+#  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
 #  define inflateInit2_         z_inflateInit2_
+#  define inflateInit_          z_inflateInit_
+#  define inflateMark           z_inflateMark
+#  define inflatePrime          z_inflatePrime
+#  define inflateReset          z_inflateReset
+#  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
 #  define inflateSetDictionary  z_inflateSetDictionary
 #  define inflateSync           z_inflateSync
 #  define inflateSyncPoint      z_inflateSyncPoint
-#  define inflateCopy           z_inflateCopy
-#  define inflateReset          z_inflateReset
-#  define inflateBack           z_inflateBack
-#  define inflateBackEnd        z_inflateBackEnd
-#  define compress              z_compress
-#  define compress2             z_compress2
-#  define compressBound         z_compressBound
-#  define uncompress            z_uncompress
-#  define adler32               z_adler32
-#  define crc32                 z_crc32
-#  define get_crc_table         z_get_crc_table
+#  define inflateUndermine      z_inflateUndermine
+#  define inflateValidate       z_inflateValidate
+#  define inflate_copyright     z_inflate_copyright
+#  define inflate_fast          z_inflate_fast
+#  define inflate_table         z_inflate_table
+#  ifndef Z_SOLO
+#    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
+#  endif
 #  define zError                z_zError
+#  ifndef Z_SOLO
+#    define zcalloc               z_zcalloc
+#    define zcfree                z_zcfree
+#  endif
+#  define zlibCompileFlags      z_zlibCompileFlags
+#  define zlibVersion           z_zlibVersion
 
+/* all zlib typedefs in zlib.h and zconf.h */
+#  define Byte                  z_Byte
+#  define Bytef                 z_Bytef
 #  define alloc_func            z_alloc_func
+#  define charf                 z_charf
 #  define free_func             z_free_func
+#  ifndef Z_SOLO
+#    define gzFile                z_gzFile
+#  endif
+#  define gz_header             z_gz_header
+#  define gz_headerp            z_gz_headerp
 #  define in_func               z_in_func
+#  define intf                  z_intf
 #  define out_func              z_out_func
-#  define Byte                  z_Byte
 #  define uInt                  z_uInt
-#  define uLong                 z_uLong
-#  define Bytef                 z_Bytef
-#  define charf                 z_charf
-#  define intf                  z_intf
 #  define uIntf                 z_uIntf
+#  define uLong                 z_uLong
 #  define uLongf                z_uLongf
-#  define voidpf                z_voidpf
 #  define voidp                 z_voidp
+#  define voidpc                z_voidpc
+#  define voidpf                z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+#  define gz_header_s           z_gz_header_s
+#  define internal_state        z_internal_state
+
 #endif
 
 #if defined(__MSDOS__) && !defined(MSDOS)
@@ -125,9 +234,29 @@
 #  endif
 #endif
 
-/* Some Mac compilers merge all .h files incorrectly: */
-#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
-#  define NO_DUMMY_DECL
+#if defined(ZLIB_CONST) && !defined(z_const)
+#  define z_const const
+#else
+#  define z_const
+#endif
+
+#ifdef Z_SOLO
+#  ifdef _WIN64
+     typedef unsigned long long z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
 #endif
 
 /* Maximum value for memLevel in deflateInit2 */
@@ -157,11 +286,11 @@
  Of course this will generally degrade compression (there's no free lunch).
 
    The memory requirements for inflate are (in bytes) 1 << windowBits
- that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
  for small objects.
 */
 
-/* Type declarations */
+                        /* Type declarations */
 
 #ifndef OF /* function prototypes */
 #  ifdef STDC
@@ -179,7 +308,7 @@
  */
 #ifdef SYS16BIT
 #  if defined(M_I86SM) || defined(M_I86MM)
-/* MSC small or medium model */
+     /* MSC small or medium model */
 #    define SMALL_MEDIUM
 #    ifdef _MSC_VER
 #      define FAR _far
@@ -188,7 +317,7 @@
 #    endif
 #  endif
 #  if (defined(__SMALL__) || defined(__MEDIUM__))
-/* Turbo C small or medium model */
+     /* Turbo C small or medium model */
 #    define SMALL_MEDIUM
 #    ifdef __BORLANDC__
 #      define FAR _far
@@ -199,9 +328,9 @@
 #endif
 
 #if defined(WINDOWS) || defined(WIN32)
-/* If building or using zlib as a DLL, define ZLIB_DLL.
- * This is not mandatory, but it offers a little performance increase.
- */
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
 #  ifdef ZLIB_DLL
 #    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
 #      ifdef ZLIB_INTERNAL
@@ -211,17 +340,20 @@
 #      endif
 #    endif
 #  endif  /* ZLIB_DLL */
-/* If building or using zlib with the WINAPI/WINAPIV calling convention,
- * define ZLIB_WINAPI.
- * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
- */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
 #  ifdef ZLIB_WINAPI
 #    ifdef FAR
 #      undef FAR
 #    endif
+#    ifndef WIN32_LEAN_AND_MEAN
+#      define WIN32_LEAN_AND_MEAN
+#    endif
 #    include <windows.h>
-/* No need for _export, use ZLIB.DEF instead. */
-/* For complete Windows compatibility, use WINAPI, not __stdcall. */
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
 #    define ZEXPORT WINAPI
 #    ifdef WIN32
 #      define ZEXPORTVA WINAPIV
@@ -264,10 +396,10 @@ typedef unsigned int   uInt;  /* 16 bits or more */
 typedef unsigned long  uLong; /* 32 bits or more */
 
 #ifdef SMALL_MEDIUM
-/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
 #  define Bytef Byte FAR
 #else
-typedef Byte  FAR Bytef;
+   typedef Byte  FAR Bytef;
 #endif
 typedef char  FAR charf;
 typedef int   FAR intf;
@@ -275,60 +407,137 @@ typedef uInt  FAR uIntf;
 typedef uLong FAR uLongf;
 
 #ifdef STDC
-typedef void const *voidpc;
-typedef void FAR   *voidpf;
-typedef void       *voidp;
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (UINT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
+#  endif
+#endif
+
+#ifdef Z_U4
+   typedef Z_U4 z_crc_t;
 #else
-typedef Byte const *voidpc;
-typedef Byte FAR   *voidpf;
-typedef Byte       *voidp;
+   typedef unsigned long z_crc_t;
 #endif
 
-#if 1           /* HAVE_UNISTD_H -- this line is updated by ./configure */
-#  include <sys/types.h> /* for off_t */
-#ifndef _WINDOWS
-#  include <unistd.h>    /* for SEEK_* and off_t */
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_UNISTD_H
 #endif
-#  ifdef VMS
-#    include <unixio.h>   /* for off_t */
+
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+#  ifndef Z_SOLO
+#    include <sys/types.h>      /* for off_t */
+#  endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
+#ifdef _WIN32
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
 #  endif
-#  define z_off_t off_t
 #endif
-#ifndef SEEK_SET
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#  undef _LARGEFILE64_SOURCE
+#endif
+
+#ifndef Z_HAVE_UNISTD_H
+#  ifdef __WATCOMC__
+#    define Z_HAVE_UNISTD_H
+#  endif
+#endif
+#ifndef Z_HAVE_UNISTD_H
+#  if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32)
+#    define Z_HAVE_UNISTD_H
+#  endif
+#endif
+#ifndef Z_SOLO
+#  if defined(Z_HAVE_UNISTD_H)
+#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+#    ifdef VMS
+#      include <unixio.h>       /* for off_t */
+#    endif
+#    ifndef z_off_t
+#      define z_off_t off_t
+#    endif
+#  endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+#  define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+#  define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+#  define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
 #  define SEEK_SET        0       /* Seek from beginning of file.  */
 #  define SEEK_CUR        1       /* Seek from current position.  */
 #  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
 #endif
+
 #ifndef z_off_t
 #  define z_off_t long
 #endif
 
-#if defined(__OS400__)
-#  define NO_vsnprintf
-#endif
-
-#if defined(__MVS__)
-#  define NO_vsnprintf
-#  ifdef FAR
-#    undef FAR
+#if !defined(_WIN32) && defined(Z_LARGE64)
+#  define z_off64_t off64_t
+#else
+#  if defined(_WIN32) && !defined(__GNUC__)
+#    define z_off64_t __int64
+#  else
+#    define z_off64_t z_off_t
 #  endif
 #endif
 
 /* MVS linker does not support external names larger than 8 bytes */
 #if defined(__MVS__)
-#   pragma map(deflateInit_,"DEIN")
-#   pragma map(deflateInit2_,"DEIN2")
-#   pragma map(deflateEnd,"DEEND")
-#   pragma map(deflateBound,"DEBND")
-#   pragma map(inflateInit_,"ININ")
-#   pragma map(inflateInit2_,"ININ2")
-#   pragma map(inflateEnd,"INEND")
-#   pragma map(inflateSync,"INSY")
-#   pragma map(inflateSetDictionary,"INSEDI")
-#   pragma map(compressBound,"CMBND")
-#   pragma map(inflate_table,"INTABL")
-#   pragma map(inflate_fast,"INFA")
-#   pragma map(inflate_copyright,"INCOPY")
+  #pragma map(deflateInit_,"DEIN")
+  #pragma map(deflateInit2_,"DEIN2")
+  #pragma map(deflateEnd,"DEEND")
+  #pragma map(deflateBound,"DEBND")
+  #pragma map(inflateInit_,"ININ")
+  #pragma map(inflateInit2_,"ININ2")
+  #pragma map(inflateEnd,"INEND")
+  #pragma map(inflateSync,"INSY")
+  #pragma map(inflateSetDictionary,"INSEDI")
+  #pragma map(compressBound,"CMBND")
+  #pragma map(inflate_table,"INTABL")
+  #pragma map(inflate_fast,"INFA")
+  #pragma map(inflate_copyright,"INCOPY")
 #endif
 
 #endif /* ZCONF_H */
diff --git a/reg-io/zlib/zlib.h b/reg-io/zlib/zlib.h
index 39d0ca63..8d4b932e 100644
--- a/reg-io/zlib/zlib.h
+++ b/reg-io/zlib/zlib.h
@@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.3, July 18th, 2005
+  version 1.3.1, January 22nd, 2024
 
-  Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -24,8 +24,8 @@
 
 
   The data format used by the zlib library is described by RFCs (Request for
-  Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
-  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
+  Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+  (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
 */
 
 #ifndef ZLIB_H
@@ -37,137 +37,142 @@
 extern "C" {
 #endif
 
-#define ZLIB_VERSION "1.2.3"
-#define ZLIB_VERNUM 0x1230
-
-   /*
-        The 'zlib' compression library provides in-memory compression and
-     decompression functions, including integrity checks of the uncompressed
-     data.  This version of the library supports only one compression method
-     (deflation) but other algorithms will be added later and will have the same
-     stream interface.
-
-        Compression can be done in a single step if the buffers are large
-     enough (for example if an input file is mmap'ed), or can be done by
-     repeated calls of the compression function.  In the latter case, the
-     application must provide more input and/or consume the output
-     (providing more output space) before each call.
-
-        The compressed data format used by default by the in-memory functions is
-     the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
-     around a deflate stream, which is itself documented in RFC 1951.
-
-        The library also supports reading and writing files in gzip (.gz) format
-     with an interface similar to that of stdio using the functions that start
-     with "gz".  The gzip format is different from the zlib format.  gzip is a
-     gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
-
-        This library can optionally read and write gzip streams in memory as well.
-
-        The zlib format was designed to be compact and fast for use in memory
-     and on communications channels.  The gzip format was designed for single-
-     file compression on file systems, has a larger header than zlib to maintain
-     directory information, and uses a different, slower check method than zlib.
-
-        The library does not install any signal handler. The decoder checks
-     the consistency of the compressed data, so the library should never
-     crash even in case of corrupted input.
-   */
-
-   typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-   typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
-
-   struct internal_state;
-
-   typedef struct z_stream_s
-   {
-      Bytef    *next_in;  /* next input byte */
-      uInt     avail_in;  /* number of bytes available at next_in */
-      uLong    total_in;  /* total nb of input bytes read so far */
-
-      Bytef    *next_out; /* next output byte should be put there */
-      uInt     avail_out; /* remaining free space at next_out */
-      uLong    total_out; /* total nb of bytes output so far */
-
-      char     *msg;      /* last error message, NULL if no error */
-      struct internal_state FAR *state; /* not visible by applications */
-
-      alloc_func zalloc;  /* used to allocate the internal state */
-      free_func  zfree;   /* used to free the internal state */
-      voidpf     opaque;  /* private data object passed to zalloc and zfree */
-
-      int     data_type;  /* best guess about the data type: binary or text */
-      uLong   adler;      /* adler32 value of the uncompressed data */
-      uLong   reserved;   /* reserved for future use */
-   } z_stream;
-
-   typedef z_stream FAR *z_streamp;
-
-   /*
-        gzip header information passed to and from zlib routines.  See RFC 1952
-     for more details on the meanings of these fields.
-   */
-   typedef struct gz_header_s
-   {
-      int     text;       /* true if compressed data believed to be text */
-      uLong   time;       /* modification time */
-      int     xflags;     /* extra flags (not used when writing a gzip file) */
-      int     os;         /* operating system */
-      Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
-      uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
-      uInt    extra_max;  /* space at extra (only when reading header) */
-      Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
-      uInt    name_max;   /* space at name (only when reading header) */
-      Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
-      uInt    comm_max;   /* space at comment (only when reading header) */
-      int     hcrc;       /* true if there was or will be a header crc */
-      int     done;       /* true when done reading gzip header (not used
+#define ZLIB_VERSION "1.3.1"
+#define ZLIB_VERNUM 0x1310
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 3
+#define ZLIB_VER_REVISION 1
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+    The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed data.
+  This version of the library supports only one compression method (deflation)
+  but other algorithms will be added later and will have the same stream
+  interface.
+
+    Compression can be done in a single step if the buffers are large enough,
+  or can be done by repeated calls of the compression function.  In the latter
+  case, the application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+    The compressed data format used by default by the in-memory functions is
+  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+  around a deflate stream, which is itself documented in RFC 1951.
+
+    The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio using the functions that start
+  with "gz".  The gzip format is different from the zlib format.  gzip is a
+  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+    This library can optionally read and write gzip and raw deflate streams in
+  memory as well.
+
+    The zlib format was designed to be compact and fast for use in memory
+  and on communications channels.  The gzip format was designed for single-
+  file compression on file systems, has a larger header than zlib to maintain
+  directory information, and uses a different, slower check method than zlib.
+
+    The library does not install any signal handler.  The decoder checks
+  the consistency of the compressed data, so the library should never crash
+  even in the case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size);
+typedef void   (*free_func)(voidpf opaque, voidpf address);
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    z_const Bytef *next_in;     /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total number of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte will go here */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total number of bytes output so far */
+
+    z_const char *msg;  /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: binary or text
+                           for deflate, or the decoding state for inflate */
+    uLong   adler;      /* Adler-32 or CRC-32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+     gzip header information passed to and from zlib routines.  See RFC 1952
+  for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+    int     text;       /* true if compressed data believed to be text */
+    uLong   time;       /* modification time */
+    int     xflags;     /* extra flags (not used when writing a gzip file) */
+    int     os;         /* operating system */
+    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
+    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
+    uInt    extra_max;  /* space at extra (only when reading header) */
+    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
+    uInt    name_max;   /* space at name (only when reading header) */
+    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
+    uInt    comm_max;   /* space at comment (only when reading header) */
+    int     hcrc;       /* true if there was or will be a header crc */
+    int     done;       /* true when done reading gzip header (not used
                            when writing a gzip file) */
-   } gz_header;
-
-   typedef gz_header FAR *gz_headerp;
-
-   /*
-      The application must update next_in and avail_in when avail_in has
-      dropped to zero. It must update next_out and avail_out when avail_out
-      has dropped to zero. The application must initialize zalloc, zfree and
-      opaque before calling the init function. All other fields are set by the
-      compression library and must not be updated by the application.
-
-      The opaque value provided by the application will be passed as the first
-      parameter for calls of zalloc and zfree. This can be useful for custom
-      memory management. The compression library attaches no meaning to the
-      opaque value.
-
-      zalloc must return Z_NULL if there is not enough memory for the object.
-      If zlib is used in a multi-threaded application, zalloc and zfree must be
-      thread safe.
-
-      On 16-bit systems, the functions zalloc and zfree must be able to allocate
-      exactly 65536 bytes, but will not be required to allocate more than this
-      if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
-      pointers returned by zalloc for objects of exactly 65536 bytes *must*
-      have their offset normalized to zero. The default allocation function
-      provided by this library ensures this (see zutil.c). To reduce memory
-      requirements and avoid any allocation of 64K objects, at the expense of
-      compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
-
-      The fields total_in and total_out can be used for statistics or
-      progress reports. After compression, total_in holds the total size of
-      the uncompressed data and may be saved for use in the decompressor
-      (particularly if the decompressor wants to decompress everything in
-      a single step).
-   */
-
-   /* constants */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+     The application must update next_in and avail_in when avail_in has dropped
+   to zero.  It must update next_out and avail_out when avail_out has dropped
+   to zero.  The application must initialize zalloc, zfree and opaque before
+   calling the init function.  All other fields are set by the compression
+   library and must not be updated by the application.
+
+     The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree.  This can be useful for custom
+   memory management.  The compression library attaches no meaning to the
+   opaque value.
+
+     zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.  In that case, zlib is thread-safe.  When zalloc and zfree are
+   Z_NULL on entry to the initialization function, they are set to internal
+   routines that use the standard library functions malloc() and free().
+
+     On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this if
+   the symbol MAXSEG_64K is defined (see zconf.h).  WARNING: On MSDOS, pointers
+   returned by zalloc for objects of exactly 65536 bytes *must* have their
+   offset normalized to zero.  The default allocation function provided by this
+   library ensures this (see zutil.c).  To reduce memory requirements and avoid
+   any allocation of 64K objects, at the expense of compression ratio, compile
+   the library with -DMAX_WBITS=14 (see zconf.h).
+
+     The fields total_in and total_out can be used for statistics or progress
+   reports.  After compression, total_in holds the total size of the
+   uncompressed data and may be saved for use by the decompressor (particularly
+   if the decompressor wants to decompress everything in a single step).
+*/
+
+                        /* constants */
 
 #define Z_NO_FLUSH      0
-#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
+#define Z_PARTIAL_FLUSH 1
 #define Z_SYNC_FLUSH    2
 #define Z_FULL_FLUSH    3
 #define Z_FINISH        4
 #define Z_BLOCK         5
-   /* Allowed flush values; see deflate() and inflate() below for details */
+#define Z_TREES         6
+/* Allowed flush values; see deflate() and inflate() below for details */
 
 #define Z_OK            0
 #define Z_STREAM_END    1
@@ -178,1182 +183,1753 @@ extern "C" {
 #define Z_MEM_ERROR    (-4)
 #define Z_BUF_ERROR    (-5)
 #define Z_VERSION_ERROR (-6)
-   /* Return codes for the compression/decompression functions. Negative
-    * values are errors, positive values are used for special but normal events.
-    */
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
 
 #define Z_NO_COMPRESSION         0
 #define Z_BEST_SPEED             1
 #define Z_BEST_COMPRESSION       9
 #define Z_DEFAULT_COMPRESSION  (-1)
-   /* compression levels */
+/* compression levels */
 
 #define Z_FILTERED            1
 #define Z_HUFFMAN_ONLY        2
 #define Z_RLE                 3
 #define Z_FIXED               4
 #define Z_DEFAULT_STRATEGY    0
-   /* compression strategy; see deflateInit2() below for details */
+/* compression strategy; see deflateInit2() below for details */
 
 #define Z_BINARY   0
 #define Z_TEXT     1
 #define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
 #define Z_UNKNOWN  2
-   /* Possible values of the data_type field (though see inflate()) */
+/* Possible values of the data_type field for deflate() */
 
 #define Z_DEFLATED   8
-   /* The deflate compression method (the only one supported in this version) */
+/* The deflate compression method (the only one supported in this version) */
 
 #define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
 
 #define zlib_version zlibVersion()
-   /* for compatibility with versions < 1.0.2 */
-
-   /* basic functions */
-
-   ZEXTERN const char * ZEXPORT zlibVersion OF((void));
-   /* The application can compare zlibVersion and ZLIB_VERSION for consistency.
-      If the first character differs, the library code actually used is
-      not compatible with the zlib.h header file used by the application.
-      This check is automatically made by deflateInit and inflateInit.
-    */
-
-   /*
-   ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
-
-        Initializes the internal stream state for compression. The fields
-      zalloc, zfree and opaque must be initialized before by the caller.
-      If zalloc and zfree are set to Z_NULL, deflateInit updates them to
-      use default allocation functions.
-
-        The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
-      1 gives best speed, 9 gives best compression, 0 gives no compression at
-      all (the input data is simply copied a block at a time).
-      Z_DEFAULT_COMPRESSION requests a default compromise between speed and
-      compression (currently equivalent to level 6).
-
-        deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
-      enough memory, Z_STREAM_ERROR if level is not a valid compression level,
-      Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
-      with the version assumed by the caller (ZLIB_VERSION).
-      msg is set to null if there is no error message.  deflateInit does not
-      perform any compression: this will be done by deflate().
-   */
-
-
-   ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
-   /*
-       deflate compresses as much data as possible, and stops when the input
-     buffer becomes empty or the output buffer becomes full. It may introduce some
-     output latency (reading input without producing any output) except when
-     forced to flush.
-
-       The detailed semantics are as follows. deflate performs one or both of the
-     following actions:
-
-     - Compress more input starting at next_in and update next_in and avail_in
-       accordingly. If not all input can be processed (because there is not
-       enough room in the output buffer), next_in and avail_in are updated and
-       processing will resume at this point for the next call of deflate().
-
-     - Provide more output starting at next_out and update next_out and avail_out
-       accordingly. This action is forced if the parameter flush is non zero.
-       Forcing flush frequently degrades the compression ratio, so this parameter
-       should be set only when necessary (in interactive applications).
-       Some output may be provided even if flush is not set.
-
-     Before the call of deflate(), the application should ensure that at least
-     one of the actions is possible, by providing more input and/or consuming
-     more output, and updating avail_in or avail_out accordingly; avail_out
-     should never be zero before the call. The application can consume the
-     compressed output when it wants, for example when the output buffer is full
-     (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
-     and with zero avail_out, it must be called again after making room in the
-     output buffer because there might be more output pending.
-
-       Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
-     decide how much data to accumualte before producing output, in order to
-     maximize compression.
-
-       If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
-     flushed to the output buffer and the output is aligned on a byte boundary, so
-     that the decompressor can get all input data available so far. (In particular
-     avail_in is zero after the call if enough output space has been provided
-     before the call.)  Flushing may degrade compression for some compression
-     algorithms and so it should be used only when necessary.
-
-       If flush is set to Z_FULL_FLUSH, all output is flushed as with
-     Z_SYNC_FLUSH, and the compression state is reset so that decompression can
-     restart from this point if previous compressed data has been damaged or if
-     random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
-     compression.
-
-       If deflate returns with avail_out == 0, this function must be called again
-     with the same value of the flush parameter and more output space (updated
-     avail_out), until the flush is complete (deflate returns with non-zero
-     avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
-     avail_out is greater than six to avoid repeated flush markers due to
-     avail_out == 0 on return.
-
-       If the parameter flush is set to Z_FINISH, pending input is processed,
-     pending output is flushed and deflate returns with Z_STREAM_END if there
-     was enough output space; if deflate returns with Z_OK, this function must be
-     called again with Z_FINISH and more output space (updated avail_out) but no
-     more input data, until it returns with Z_STREAM_END or an error. After
-     deflate has returned Z_STREAM_END, the only possible operations on the
-     stream are deflateReset or deflateEnd.
-
-       Z_FINISH can be used immediately after deflateInit if all the compression
-     is to be done in a single step. In this case, avail_out must be at least
-     the value returned by deflateBound (see below). If deflate does not return
-     Z_STREAM_END, then it must be called again as described above.
-
-       deflate() sets strm->adler to the adler32 checksum of all input read
-     so far (that is, total_in bytes).
-
-       deflate() may update strm->data_type if it can make a good guess about
-     the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
-     binary. This field is only for information purposes and does not affect
-     the compression algorithm in any manner.
-
-       deflate() returns Z_OK if some progress has been made (more input
-     processed or more output produced), Z_STREAM_END if all input has been
-     consumed and all output has been produced (only when flush is set to
-     Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
-     if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
-     (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
-     fatal, and deflate() can be called again with more input and more output
-     space to continue compressing.
-   */
-
-
-   ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
-   /*
-        All dynamically allocated data structures for this stream are freed.
-      This function discards any unprocessed input and does not flush any
-      pending output.
-
-        deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
-      stream state was inconsistent, Z_DATA_ERROR if the stream was freed
-      prematurely (some input or output was discarded). In the error case,
-      msg may be set but then points to a static string (which must not be
-      deallocated).
-   */
-
-
-   /*
-   ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
-
-        Initializes the internal stream state for decompression. The fields
-      next_in, avail_in, zalloc, zfree and opaque must be initialized before by
-      the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
-      value depends on the compression method), inflateInit determines the
-      compression method from the zlib header and allocates all data structures
-      accordingly; otherwise the allocation will be deferred to the first call of
-      inflate.  If zalloc and zfree are set to Z_NULL, inflateInit updates them to
-      use default allocation functions.
-
-        inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
-      memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
-      version assumed by the caller.  msg is set to null if there is no error
-      message. inflateInit does not perform any decompression apart from reading
-      the zlib header if present: this will be done by inflate().  (So next_in and
-      avail_in may be modified, but next_out and avail_out are unchanged.)
-   */
-
-
-   ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
-   /*
-       inflate decompresses as much data as possible, and stops when the input
-     buffer becomes empty or the output buffer becomes full. It may introduce
-     some output latency (reading input without producing any output) except when
-     forced to flush.
-
-     The detailed semantics are as follows. inflate performs one or both of the
-     following actions:
-
-     - Decompress more input starting at next_in and update next_in and avail_in
-       accordingly. If not all input can be processed (because there is not
-       enough room in the output buffer), next_in is updated and processing
-       will resume at this point for the next call of inflate().
-
-     - Provide more output starting at next_out and update next_out and avail_out
-       accordingly.  inflate() provides as much output as possible, until there
-       is no more input data or no more space in the output buffer (see below
-       about the flush parameter).
-
-     Before the call of inflate(), the application should ensure that at least
-     one of the actions is possible, by providing more input and/or consuming
-     more output, and updating the next_* and avail_* values accordingly.
-     The application can consume the uncompressed output when it wants, for
-     example when the output buffer is full (avail_out == 0), or after each
-     call of inflate(). If inflate returns Z_OK and with zero avail_out, it
-     must be called again after making room in the output buffer because there
-     might be more output pending.
-
-       The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH,
-     Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much
-     output as possible to the output buffer. Z_BLOCK requests that inflate() stop
-     if and when it gets to the next deflate block boundary. When decoding the
-     zlib or gzip format, this will cause inflate() to return immediately after
-     the header and before the first block. When doing a raw inflate, inflate()
-     will go ahead and process the first block, and will return when it gets to
-     the end of that block, or when it runs out of data.
-
-       The Z_BLOCK option assists in appending to or combining deflate streams.
-     Also to assist in this, on return inflate() will set strm->data_type to the
-     number of unused bits in the last byte taken from strm->next_in, plus 64
-     if inflate() is currently decoding the last block in the deflate stream,
-     plus 128 if inflate() returned immediately after decoding an end-of-block
-     code or decoding the complete header up to just before the first byte of the
-     deflate stream. The end-of-block will not be indicated until all of the
-     uncompressed data from that block has been written to strm->next_out.  The
-     number of unused bits may in general be greater than seven, except when
-     bit 7 of data_type is set, in which case the number of unused bits will be
-     less than eight.
-
-       inflate() should normally be called until it returns Z_STREAM_END or an
-     error. However if all decompression is to be performed in a single step
-     (a single call of inflate), the parameter flush should be set to
-     Z_FINISH. In this case all pending input is processed and all pending
-     output is flushed; avail_out must be large enough to hold all the
-     uncompressed data. (The size of the uncompressed data may have been saved
-     by the compressor for this purpose.) The next operation on this stream must
-     be inflateEnd to deallocate the decompression state. The use of Z_FINISH
-     is never required, but can be used to inform inflate that a faster approach
-     may be used for the single inflate() call.
-
-        In this implementation, inflate() always flushes as much output as
-     possible to the output buffer, and always uses the faster approach on the
-     first call. So the only effect of the flush parameter in this implementation
-     is on the return value of inflate(), as noted below, or when it returns early
-     because Z_BLOCK is used.
-
-        If a preset dictionary is needed after this call (see inflateSetDictionary
-     below), inflate sets strm->adler to the adler32 checksum of the dictionary
-     chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
-     strm->adler to the adler32 checksum of all output produced so far (that is,
-     total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
-     below. At the end of the stream, inflate() checks that its computed adler32
-     checksum is equal to that saved by the compressor and returns Z_STREAM_END
-     only if the checksum is correct.
-
-       inflate() will decompress and check either zlib-wrapped or gzip-wrapped
-     deflate data.  The header type is detected automatically.  Any information
-     contained in the gzip header is not retained, so applications that need that
-     information should instead use raw inflate, see inflateInit2() below, or
-     inflateBack() and perform their own processing of the gzip header and
-     trailer.
-
-       inflate() returns Z_OK if some progress has been made (more input processed
-     or more output produced), Z_STREAM_END if the end of the compressed data has
-     been reached and all uncompressed output has been produced, Z_NEED_DICT if a
-     preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
-     corrupted (input stream not conforming to the zlib format or incorrect check
-     value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
-     if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
-     Z_BUF_ERROR if no progress is possible or if there was not enough room in the
-     output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
-     inflate() can be called again with more input and more output space to
-     continue decompressing. If Z_DATA_ERROR is returned, the application may then
-     call inflateSync() to look for a good compression block if a partial recovery
-     of the data is desired.
-   */
-
-
-   ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
-   /*
-        All dynamically allocated data structures for this stream are freed.
-      This function discards any unprocessed input and does not flush any
-      pending output.
-
-        inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
-      was inconsistent. In the error case, msg may be set but then points to a
-      static string (which must not be deallocated).
-   */
-
-   /* Advanced functions */
-
-   /*
-       The following functions are needed only in some special applications.
-   */
-
-   /*
-   ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
-                                        int  level,
-                                        int  method,
-                                        int  windowBits,
-                                        int  memLevel,
-                                        int  strategy));
-
-        This is another version of deflateInit with more compression options. The
-      fields next_in, zalloc, zfree and opaque must be initialized before by
-      the caller.
-
-        The method parameter is the compression method. It must be Z_DEFLATED in
-      this version of the library.
-
-        The windowBits parameter is the base two logarithm of the window size
-      (the size of the history buffer). It should be in the range 8..15 for this
-      version of the library. Larger values of this parameter result in better
-      compression at the expense of memory usage. The default value is 15 if
-      deflateInit is used instead.
-
-        windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
-      determines the window size. deflate() will then generate raw deflate data
-      with no zlib header or trailer, and will not compute an adler32 check value.
-
-        windowBits can also be greater than 15 for optional gzip encoding. Add
-      16 to windowBits to write a simple gzip header and trailer around the
-      compressed data instead of a zlib wrapper. The gzip header will have no
-      file name, no extra data, no comment, no modification time (set to zero),
-      no header crc, and the operating system will be set to 255 (unknown).  If a
-      gzip stream is being written, strm->adler is a crc32 instead of an adler32.
-
-        The memLevel parameter specifies how much memory should be allocated
-      for the internal compression state. memLevel=1 uses minimum memory but
-      is slow and reduces compression ratio; memLevel=9 uses maximum memory
-      for optimal speed. The default value is 8. See zconf.h for total memory
-      usage as a function of windowBits and memLevel.
-
-        The strategy parameter is used to tune the compression algorithm. Use the
-      value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
-      filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
-      string match), or Z_RLE to limit match distances to one (run-length
-      encoding). Filtered data consists mostly of small values with a somewhat
-      random distribution. In this case, the compression algorithm is tuned to
-      compress them better. The effect of Z_FILTERED is to force more Huffman
-      coding and less string matching; it is somewhat intermediate between
-      Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as
-      Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy
-      parameter only affects the compression ratio but not the correctness of the
-      compressed output even if it is not set appropriately.  Z_FIXED prevents the
-      use of dynamic Huffman codes, allowing for a simpler decoder for special
-      applications.
-
-         deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-      memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
-      method). msg is set to null if there is no error message.  deflateInit2 does
-      not perform any compression: this will be done by deflate().
-   */
-
-   ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
-         const Bytef *dictionary,
-         uInt  dictLength));
-   /*
-        Initializes the compression dictionary from the given byte sequence
-      without producing any compressed output. This function must be called
-      immediately after deflateInit, deflateInit2 or deflateReset, before any
-      call of deflate. The compressor and decompressor must use exactly the same
-      dictionary (see inflateSetDictionary).
-
-        The dictionary should consist of strings (byte sequences) that are likely
-      to be encountered later in the data to be compressed, with the most commonly
-      used strings preferably put towards the end of the dictionary. Using a
-      dictionary is most useful when the data to be compressed is short and can be
-      predicted with good accuracy; the data can then be compressed better than
-      with the default empty dictionary.
-
-        Depending on the size of the compression data structures selected by
-      deflateInit or deflateInit2, a part of the dictionary may in effect be
-      discarded, for example if the dictionary is larger than the window size in
-      deflate or deflate2. Thus the strings most likely to be useful should be
-      put at the end of the dictionary, not at the front. In addition, the
-      current implementation of deflate will use at most the window size minus
-      262 bytes of the provided dictionary.
-
-        Upon return of this function, strm->adler is set to the adler32 value
-      of the dictionary; the decompressor may later use this value to determine
-      which dictionary has been used by the compressor. (The adler32 value
-      applies to the whole dictionary even if only a subset of the dictionary is
-      actually used by the compressor.) If a raw deflate was requested, then the
-      adler32 value is not computed and strm->adler is not set.
-
-        deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
-      parameter is invalid (such as NULL dictionary) or the stream state is
-      inconsistent (for example if deflate has already been called for this stream
-      or if the compression method is bsort). deflateSetDictionary does not
-      perform any compression: this will be done by deflate().
-   */
-
-   ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
-                                       z_streamp source));
-   /*
-        Sets the destination stream as a complete copy of the source stream.
-
-        This function can be useful when several compression strategies will be
-      tried, for example when there are several ways of pre-processing the input
-      data with a filter. The streams that will be discarded should then be freed
-      by calling deflateEnd.  Note that deflateCopy duplicates the internal
-      compression state which can be quite large, so this strategy is slow and
-      can consume lots of memory.
-
-        deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-      enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-      (such as zalloc being NULL). msg is left unchanged in both source and
-      destination.
-   */
-
-   ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
-   /*
-        This function is equivalent to deflateEnd followed by deflateInit,
-      but does not free and reallocate all the internal compression state.
-      The stream will keep the same compression level and any other attributes
-      that may have been set by deflateInit2.
-
-         deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent (such as zalloc or state being NULL).
-   */
-
-   ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
-                                         int level,
-                                         int strategy));
-   /*
-        Dynamically update the compression level and compression strategy.  The
-      interpretation of level and strategy is as in deflateInit2.  This can be
-      used to switch between compression and straight copy of the input data, or
-      to switch to a different kind of input data requiring a different
-      strategy. If the compression level is changed, the input available so far
-      is compressed with the old level (and may be flushed); the new level will
-      take effect only at the next call of deflate().
-
-        Before the call of deflateParams, the stream state must be set as for
-      a call of deflate(), since the currently available input may have to
-      be compressed and flushed. In particular, strm->avail_out must be non-zero.
-
-        deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-      stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
-      if strm->avail_out was zero.
-   */
-
-   ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
-                                       int good_length,
-                                       int max_lazy,
-                                       int nice_length,
-                                       int max_chain));
-   /*
-        Fine tune deflate's internal compression parameters.  This should only be
-      used by someone who understands the algorithm used by zlib's deflate for
-      searching for the best matching string, and even then only by the most
-      fanatic optimizer trying to squeeze out the last compressed bit for their
-      specific input data.  Read the deflate.c source code for the meaning of the
-      max_lazy, good_length, nice_length, and max_chain parameters.
-
-        deflateTune() can be called after deflateInit() or deflateInit2(), and
-      returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
-    */
-
-   ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
-                                          uLong sourceLen));
-   /*
-        deflateBound() returns an upper bound on the compressed size after
-      deflation of sourceLen bytes.  It must be called after deflateInit()
-      or deflateInit2().  This would be used to allocate an output buffer
-      for deflation in a single pass, and so would be called before deflate().
-   */
-
-   ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
-                                        int bits,
-                                        int value));
-   /*
-        deflatePrime() inserts bits in the deflate output stream.  The intent
-     is that this function is used to start off the deflate output with the
-     bits leftover from a previous deflate stream when appending to it.  As such,
-     this function can only be used for raw deflate, and must be used before the
-     first deflate() call after a deflateInit2() or deflateReset().  bits must be
-     less than or equal to 16, and that many of the least significant bits of
-     value will be inserted in the output.
-
-         deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent.
-   */
-
-   ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
-                                           gz_headerp head));
-   /*
-         deflateSetHeader() provides gzip header information for when a gzip
-      stream is requested by deflateInit2().  deflateSetHeader() may be called
-      after deflateInit2() or deflateReset() and before the first call of
-      deflate().  The text, time, os, extra field, name, and comment information
-      in the provided gz_header structure are written to the gzip header (xflag is
-      ignored -- the extra flags are set according to the compression level).  The
-      caller must assure that, if not Z_NULL, name and comment are terminated with
-      a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
-      available there.  If hcrc is true, a gzip header crc is included.  Note that
-      the current versions of the command-line version of gzip (up through version
-      1.3.x) do not support header crc's, and will report that it is a "multi-part
-      gzip file" and give up.
-
-         If deflateSetHeader is not used, the default gzip header has text false,
-      the time set to zero, and os set to 255, with no extra, name, or comment
-      fields.  The gzip header is returned to the default state by deflateReset().
-
-         deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent.
-   */
-
-   /*
-   ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
-                                        int  windowBits));
-
-        This is another version of inflateInit with an extra parameter. The
-      fields next_in, avail_in, zalloc, zfree and opaque must be initialized
-      before by the caller.
-
-        The windowBits parameter is the base two logarithm of the maximum window
-      size (the size of the history buffer).  It should be in the range 8..15 for
-      this version of the library. The default value is 15 if inflateInit is used
-      instead. windowBits must be greater than or equal to the windowBits value
-      provided to deflateInit2() while compressing, or it must be equal to 15 if
-      deflateInit2() was not used. If a compressed stream with a larger window
-      size is given as input, inflate() will return with the error code
-      Z_DATA_ERROR instead of trying to allocate a larger window.
-
-        windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
-      determines the window size. inflate() will then process raw deflate data,
-      not looking for a zlib or gzip header, not generating a check value, and not
-      looking for any check values for comparison at the end of the stream. This
-      is for use with other formats that use the deflate compressed data format
-      such as zip.  Those formats provide their own check values. If a custom
-      format is developed using the raw deflate format for compressed data, it is
-      recommended that a check value such as an adler32 or a crc32 be applied to
-      the uncompressed data as is done in the zlib, gzip, and zip formats.  For
-      most applications, the zlib format should be used as is. Note that comments
-      above on the use in deflateInit2() applies to the magnitude of windowBits.
-
-        windowBits can also be greater than 15 for optional gzip decoding. Add
-      32 to windowBits to enable zlib and gzip decoding with automatic header
-      detection, or add 16 to decode only the gzip format (the zlib format will
-      return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is
-      a crc32 instead of an adler32.
-
-        inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-      memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg
-      is set to null if there is no error message.  inflateInit2 does not perform
-      any decompression apart from reading the zlib header if present: this will
-      be done by inflate(). (So next_in and avail_in may be modified, but next_out
-      and avail_out are unchanged.)
-   */
-
-   ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
-         const Bytef *dictionary,
-         uInt  dictLength));
-   /*
-        Initializes the decompression dictionary from the given uncompressed byte
-      sequence. This function must be called immediately after a call of inflate,
-      if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
-      can be determined from the adler32 value returned by that call of inflate.
-      The compressor and decompressor must use exactly the same dictionary (see
-      deflateSetDictionary).  For raw inflate, this function can be called
-      immediately after inflateInit2() or inflateReset() and before any call of
-      inflate() to set the dictionary.  The application must insure that the
-      dictionary that was used for compression is provided.
-
-        inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
-      parameter is invalid (such as NULL dictionary) or the stream state is
-      inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-      expected one (incorrect adler32 value). inflateSetDictionary does not
-      perform any decompression: this will be done by subsequent calls of
-      inflate().
-   */
-
-   ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
-   /*
-       Skips invalid compressed data until a full flush point (see above the
-     description of deflate with Z_FULL_FLUSH) can be found, or until all
-     available input is skipped. No output is provided.
-
-       inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
-     if no more input was provided, Z_DATA_ERROR if no flush point has been found,
-     or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
-     case, the application may save the current current value of total_in which
-     indicates where valid compressed data was found. In the error case, the
-     application may repeatedly call inflateSync, providing more input each time,
-     until success or end of the input data.
-   */
-
-   ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
-                                       z_streamp source));
-   /*
-        Sets the destination stream as a complete copy of the source stream.
-
-        This function can be useful when randomly accessing a large stream.  The
-      first pass through the stream can periodically record the inflate state,
-      allowing restarting inflate at those points when randomly accessing the
-      stream.
-
-        inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
-      enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
-      (such as zalloc being NULL). msg is left unchanged in both source and
-      destination.
-   */
-
-   ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
-   /*
-        This function is equivalent to inflateEnd followed by inflateInit,
-      but does not free and reallocate all the internal decompression state.
-      The stream will keep attributes that may have been set by inflateInit2.
-
-         inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent (such as zalloc or state being NULL).
-   */
-
-   ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
-                                        int bits,
-                                        int value));
-   /*
-        This function inserts bits in the inflate input stream.  The intent is
-     that this function is used to start inflating at a bit position in the
-     middle of a byte.  The provided bits will be used before any bytes are used
-     from next_in.  This function should only be used with raw inflate, and
-     should be used before the first inflate() call after inflateInit2() or
-     inflateReset().  bits must be less than or equal to 16, and that many of the
-     least significant bits of value will be inserted in the input.
-
-         inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent.
-   */
-
-   ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
-                                           gz_headerp head));
-   /*
-         inflateGetHeader() requests that gzip header information be stored in the
-      provided gz_header structure.  inflateGetHeader() may be called after
-      inflateInit2() or inflateReset(), and before the first call of inflate().
-      As inflate() processes the gzip stream, head->done is zero until the header
-      is completed, at which time head->done is set to one.  If a zlib stream is
-      being decoded, then head->done is set to -1 to indicate that there will be
-      no gzip header information forthcoming.  Note that Z_BLOCK can be used to
-      force inflate() to return immediately after header processing is complete
-      and before any actual data is decompressed.
-
-         The text, time, xflags, and os fields are filled in with the gzip header
-      contents.  hcrc is set to true if there is a header CRC.  (The header CRC
-      was valid if done is set to one.)  If extra is not Z_NULL, then extra_max
-      contains the maximum number of bytes to write to extra.  Once done is true,
-      extra_len contains the actual extra field length, and extra contains the
-      extra field, or that field truncated if extra_max is less than extra_len.
-      If name is not Z_NULL, then up to name_max characters are written there,
-      terminated with a zero unless the length is greater than name_max.  If
-      comment is not Z_NULL, then up to comm_max characters are written there,
-      terminated with a zero unless the length is greater than comm_max.  When
-      any of extra, name, or comment are not Z_NULL and the respective field is
-      not present in the header, then that field is set to Z_NULL to signal its
-      absence.  This allows the use of deflateSetHeader() with the returned
-      structure to duplicate the header.  However if those fields are set to
-      allocated memory, then the application will need to save those pointers
-      elsewhere so that they can be eventually freed.
-
-         If inflateGetHeader is not used, then the header information is simply
-      discarded.  The header is always checked for validity, including the header
-      CRC if present.  inflateReset() will reset the process to discard the header
-      information.  The application would need to call inflateGetHeader() again to
-      retrieve the header from the next gzip stream.
-
-         inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
-      stream state was inconsistent.
-   */
-
-   /*
-   ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
-                                           unsigned char FAR *window));
-
-        Initialize the internal stream state for decompression using inflateBack()
-      calls.  The fields zalloc, zfree and opaque in strm must be initialized
-      before the call.  If zalloc and zfree are Z_NULL, then the default library-
-      derived memory allocation routines are used.  windowBits is the base two
-      logarithm of the window size, in the range 8..15.  window is a caller
-      supplied buffer of that size.  Except for special applications where it is
-      assured that deflate was used with small window sizes, windowBits must be 15
-      and a 32K byte window must be supplied to be able to decompress general
-      deflate streams.
-
-        See inflateBack() for the usage of these routines.
-
-        inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
-      the paramaters are invalid, Z_MEM_ERROR if the internal state could not
-      be allocated, or Z_VERSION_ERROR if the version of the library does not
-      match the version of the header file.
-   */
-
-   typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *));
-   typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
-
-   ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
-                                       in_func in, void FAR *in_desc,
-                                       out_func out, void FAR *out_desc));
-   /*
-        inflateBack() does a raw inflate with a single call using a call-back
-      interface for input and output.  This is more efficient than inflate() for
-      file i/o applications in that it avoids copying between the output and the
-      sliding window by simply making the window itself the output buffer.  This
-      function trusts the application to not change the output buffer passed by
-      the output function, at least until inflateBack() returns.
-
-        inflateBackInit() must be called first to allocate the internal state
-      and to initialize the state with the user-provided window buffer.
-      inflateBack() may then be used multiple times to inflate a complete, raw
-      deflate stream with each call.  inflateBackEnd() is then called to free
-      the allocated state.
-
-        A raw deflate stream is one with no zlib or gzip header or trailer.
-      This routine would normally be used in a utility that reads zip or gzip
-      files and writes out uncompressed files.  The utility would decode the
-      header and process the trailer on its own, hence this routine expects
-      only the raw deflate stream to decompress.  This is different from the
-      normal behavior of inflate(), which expects either a zlib or gzip header and
-      trailer around the deflate stream.
-
-        inflateBack() uses two subroutines supplied by the caller that are then
-      called by inflateBack() for input and output.  inflateBack() calls those
-      routines until it reads a complete deflate stream and writes out all of the
-      uncompressed data, or until it encounters an error.  The function's
-      parameters and return types are defined above in the in_func and out_func
-      typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
-      number of bytes of provided input, and a pointer to that input in buf.  If
-      there is no input available, in() must return zero--buf is ignored in that
-      case--and inflateBack() will return a buffer error.  inflateBack() will call
-      out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].  out()
-      should return zero on success, or non-zero on failure.  If out() returns
-      non-zero, inflateBack() will return with an error.  Neither in() nor out()
-      are permitted to change the contents of the window provided to
-      inflateBackInit(), which is also the buffer that out() uses to write from.
-      The length written by out() will be at most the window size.  Any non-zero
-      amount of input may be provided by in().
-
-        For convenience, inflateBack() can be provided input on the first call by
-      setting strm->next_in and strm->avail_in.  If that input is exhausted, then
-      in() will be called.  Therefore strm->next_in must be initialized before
-      calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
-      immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
-      must also be initialized, and then if strm->avail_in is not zero, input will
-      initially be taken from strm->next_in[0 .. strm->avail_in - 1].
-
-        The in_desc and out_desc parameters of inflateBack() is passed as the
-      first parameter of in() and out() respectively when they are called.  These
-      descriptors can be optionally used to pass any information that the caller-
-      supplied in() and out() functions need to do their job.
-
-        On return, inflateBack() will set strm->next_in and strm->avail_in to
-      pass back any unused input that was provided by the last in() call.  The
-      return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
-      if in() or out() returned an error, Z_DATA_ERROR if there was a format
-      error in the deflate stream (in which case strm->msg is set to indicate the
-      nature of the error), or Z_STREAM_ERROR if the stream was not properly
-      initialized.  In the case of Z_BUF_ERROR, an input or output error can be
-      distinguished using strm->next_in which will be Z_NULL only if in() returned
-      an error.  If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to
-      out() returning non-zero.  (in() will always be called before out(), so
-      strm->next_in is assured to be defined if out() returns non-zero.)  Note
-      that inflateBack() cannot return Z_OK.
-   */
-
-   ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
-   /*
-        All memory allocated by inflateBackInit() is freed.
-
-        inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
-      state was inconsistent.
-   */
-
-   ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
-   /* Return flags indicating compile-time options.
-
-       Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
-        1.0: size of uInt
-        3.2: size of uLong
-        5.4: size of voidpf (pointer)
-        7.6: size of z_off_t
-
-       Compiler, assembler, and debug options:
-        8: DEBUG
-        9: ASMV or ASMINF -- use ASM code
-        10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
-        11: 0 (reserved)
-
-       One-time table building (smaller code, but not thread-safe if true):
-        12: BUILDFIXED -- build static block decoding tables when needed
-        13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
-        14,15: 0 (reserved)
-
-       Library content (indicates missing functionality):
-        16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
-                             deflate code when not needed)
-        17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
-                       and decode gzip streams (to avoid linking crc code)
-        18-19: 0 (reserved)
-
-       Operation variations (changes in library functionality):
-        20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
-        21: FASTEST -- deflate algorithm with only one, lowest compression level
-        22,23: 0 (reserved)
-
-       The sprintf variant used by gzprintf (zero is best):
-        24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
-        25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
-        26: 0 = returns value, 1 = void -- 1 means inferred string length returned
-
-       Remainder:
-        27-31: 0 (reserved)
-    */
-
-
-   /* utility functions */
-
-   /*
-        The following utility functions are implemented on top of the
-      basic stream-oriented functions. To simplify the interface, some
-      default options are assumed (compression level and memory usage,
-      standard memory allocation functions). The source code of these
-      utility functions can easily be modified if you need special options.
-   */
-
-   ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
-                                    const Bytef *source, uLong sourceLen));
-   /*
-        Compresses the source buffer into the destination buffer.  sourceLen is
-      the byte length of the source buffer. Upon entry, destLen is the total
-      size of the destination buffer, which must be at least the value returned
-      by compressBound(sourceLen). Upon exit, destLen is the actual size of the
-      compressed buffer.
-        This function can be used to compress a whole file at once if the
-      input file is mmap'ed.
-        compress returns Z_OK if success, Z_MEM_ERROR if there was not
-      enough memory, Z_BUF_ERROR if there was not enough room in the output
-      buffer.
-   */
-
-   ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
-                                     const Bytef *source, uLong sourceLen,
-                                     int level));
-   /*
-        Compresses the source buffer into the destination buffer. The level
-      parameter has the same meaning as in deflateInit.  sourceLen is the byte
-      length of the source buffer. Upon entry, destLen is the total size of the
-      destination buffer, which must be at least the value returned by
-      compressBound(sourceLen). Upon exit, destLen is the actual size of the
-      compressed buffer.
-
-        compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
-      memory, Z_BUF_ERROR if there was not enough room in the output buffer,
-      Z_STREAM_ERROR if the level parameter is invalid.
-   */
-
-   ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
-   /*
-        compressBound() returns an upper bound on the compressed size after
-      compress() or compress2() on sourceLen bytes.  It would be used before
-      a compress() or compress2() call to allocate the destination buffer.
-   */
-
-   ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
-                                      const Bytef *source, uLong sourceLen));
-   /*
-        Decompresses the source buffer into the destination buffer.  sourceLen is
-      the byte length of the source buffer. Upon entry, destLen is the total
-      size of the destination buffer, which must be large enough to hold the
-      entire uncompressed data. (The size of the uncompressed data must have
-      been saved previously by the compressor and transmitted to the decompressor
-      by some mechanism outside the scope of this compression library.)
-      Upon exit, destLen is the actual size of the compressed buffer.
-        This function can be used to decompress a whole file at once if the
-      input file is mmap'ed.
-
-        uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
-      enough memory, Z_BUF_ERROR if there was not enough room in the output
-      buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.
-   */
-
-
-   typedef voidp gzFile;
-
-   ZEXTERN gzFile ZEXPORT gzopen  OF((const char *path, const char *mode));
-   /*
-        Opens a gzip (.gz) file for reading or writing. The mode parameter
-      is as in fopen ("rb" or "wb") but can also include a compression level
-      ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
-      Huffman only compression as in "wb1h", or 'R' for run-length encoding
-      as in "wb1R". (See the description of deflateInit2 for more information
-      about the strategy parameter.)
-
-        gzopen can be used to read a file which is not in gzip format; in this
-      case gzread will directly read from the file without decompression.
-
-        gzopen returns NULL if the file could not be opened or if there was
-      insufficient memory to allocate the (de)compression state; errno
-      can be checked to distinguish the two cases (if errno is zero, the
-      zlib error is Z_MEM_ERROR).  */
-
-   ZEXTERN gzFile ZEXPORT gzdopen  OF((int fd, const char *mode));
-   /*
-        gzdopen() associates a gzFile with the file descriptor fd.  File
-      descriptors are obtained from calls like open, dup, creat, pipe or
-      fileno (in the file has been previously opened with fopen).
-      The mode parameter is as in gzopen.
-        The next call of gzclose on the returned gzFile will also close the
-      file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
-      descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
-        gzdopen returns NULL if there was insufficient memory to allocate
-      the (de)compression state.
-   */
-
-   ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
-   /*
-        Dynamically update the compression level or strategy. See the description
-      of deflateInit2 for the meaning of these parameters.
-        gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
-      opened for writing.
-   */
-
-   ZEXTERN int ZEXPORT    gzread  OF((gzFile file, voidp buf, unsigned len));
-   /*
-        Reads the given number of uncompressed bytes from the compressed file.
-      If the input file was not in gzip format, gzread copies the given number
-      of bytes into the buffer.
-        gzread returns the number of uncompressed bytes actually read (0 for
-      end of file, -1 for error). */
-
-   ZEXTERN int ZEXPORT    gzwrite OF((gzFile file,
-                                      voidpc buf, unsigned len));
-   /*
-        Writes the given number of uncompressed bytes into the compressed file.
-      gzwrite returns the number of uncompressed bytes actually written
-      (0 in case of error).
-   */
-
-   ZEXTERN int ZEXPORTVA   gzprintf OF((gzFile file, const char *format, ...));
-   /*
-        Converts, formats, and writes the args to the compressed file under
-      control of the format string, as in fprintf. gzprintf returns the number of
-      uncompressed bytes actually written (0 in case of error).  The number of
-      uncompressed bytes written is limited to 4095. The caller should assure that
-      this limit is not exceeded. If it is exceeded, then gzprintf() will return
-      return an error (0) with nothing written. In this case, there may also be a
-      buffer overflow with unpredictable consequences, which is possible only if
-      zlib was compiled with the insecure functions sprintf() or vsprintf()
-      because the secure snprintf() or vsnprintf() functions were not available.
-   */
-
-   ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
-   /*
-         Writes the given null-terminated string to the compressed file, excluding
-      the terminating null character.
-         gzputs returns the number of characters written, or -1 in case of error.
-   */
-
-   ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
-   /*
-         Reads bytes from the compressed file until len-1 characters are read, or
-      a newline character is read and transferred to buf, or an end-of-file
-      condition is encountered.  The string is then terminated with a null
-      character.
-         gzgets returns buf, or Z_NULL in case of error.
-   */
-
-   ZEXTERN int ZEXPORT    gzputc OF((gzFile file, int c));
-   /*
-         Writes c, converted to an unsigned char, into the compressed file.
-      gzputc returns the value that was written, or -1 in case of error.
-   */
-
-   ZEXTERN int ZEXPORT    gzgetc OF((gzFile file));
-   /*
-         Reads one byte from the compressed file. gzgetc returns this byte
-      or -1 in case of end of file or error.
-   */
-
-   ZEXTERN int ZEXPORT    gzungetc OF((int c, gzFile file));
-   /*
-         Push one character back onto the stream to be read again later.
-      Only one character of push-back is allowed.  gzungetc() returns the
-      character pushed, or -1 on failure.  gzungetc() will fail if a
-      character has been pushed but not read yet, or if c is -1. The pushed
-      character will be discarded if the stream is repositioned with gzseek()
-      or gzrewind().
-   */
-
-   ZEXTERN int ZEXPORT    gzflush OF((gzFile file, int flush));
-   /*
-        Flushes all pending output into the compressed file. The parameter
-      flush is as in the deflate() function. The return value is the zlib
-      error number (see function gzerror below). gzflush returns Z_OK if
-      the flush parameter is Z_FINISH and all output could be flushed.
-        gzflush should be called only when strictly necessary because it can
-      degrade compression.
-   */
-
-   ZEXTERN z_off_t ZEXPORT    gzseek OF((gzFile file,
-                                         z_off_t offset, int whence));
-   /*
-         Sets the starting position for the next gzread or gzwrite on the
-      given compressed file. The offset represents a number of bytes in the
-      uncompressed data stream. The whence parameter is defined as in lseek(2);
-      the value SEEK_END is not supported.
-        If the file is opened for reading, this function is emulated but can be
-      extremely slow. If the file is opened for writing, only forward seeks are
-      supported; gzseek then compresses a sequence of zeroes up to the new
-      starting position.
-
-         gzseek returns the resulting offset location as measured in bytes from
-      the beginning of the uncompressed stream, or -1 in case of error, in
-      particular if the file is opened for writing and the new starting position
-      would be before the current position.
-   */
-
-   ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
-   /*
-        Rewinds the given file. This function is supported only for reading.
-
-      gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
-   */
-
-   ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
-   /*
-        Returns the starting position for the next gzread or gzwrite on the
-      given compressed file. This position represents a number of bytes in the
-      uncompressed data stream.
-
-      gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
-   */
-
-   ZEXTERN int ZEXPORT gzeof OF((gzFile file));
-   /*
-        Returns 1 when EOF has previously been detected reading the given
-      input stream, otherwise zero.
-   */
-
-   ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
-   /*
-        Returns 1 if file is being read directly without decompression, otherwise
-      zero.
-   */
-
-   ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
-   /*
-        Flushes all pending output if necessary, closes the compressed file
-      and deallocates all the (de)compression state. The return value is the zlib
-      error number (see function gzerror below).
-   */
-
-   ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
-   /*
-        Returns the error message for the last error which occurred on the
-      given compressed file. errnum is set to zlib error number. If an
-      error occurred in the file system and not in the compression library,
-      errnum is set to Z_ERRNO and the application may consult errno
-      to get the exact error code.
-   */
-
-   ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
-   /*
-        Clears the error and end-of-file flags for file. This is analogous to the
-      clearerr() function in stdio. This is useful for continuing to read a gzip
-      file that is being written concurrently.
-   */
-
-   /* checksum functions */
-
-   /*
-        These functions are not related to compression but are exported
-      anyway because they might be useful in applications using the
-      compression library.
-   */
-
-   ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
-   /*
-        Update a running Adler-32 checksum with the bytes buf[0..len-1] and
-      return the updated checksum. If buf is NULL, this function returns
-      the required initial value for the checksum.
-      An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
-      much faster. Usage example:
-
-        uLong adler = adler32(0L, Z_NULL, 0);
-
-        while (read_buffer(buffer, length) != EOF) {
-          adler = adler32(adler, buffer, length);
-        }
-        if (adler != original_adler) error();
-   */
-
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
-         z_off_t len2));
-   /*
-        Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
-      and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
-      each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
-      seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.
-   */
-
-   ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
-   /*
-        Update a running CRC-32 with the bytes buf[0..len-1] and return the
-      updated CRC-32. If buf is NULL, this function returns the required initial
-      value for the for the crc. Pre- and post-conditioning (one's complement) is
-      performed within this function so it shouldn't be done by the application.
-      Usage example:
-
-        uLong crc = crc32(0L, Z_NULL, 0);
-
-        while (read_buffer(buffer, length) != EOF) {
-          crc = crc32(crc, buffer, length);
-        }
-        if (crc != original_crc) error();
-   */
-
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
-
-   /*
-        Combine two CRC-32 check values into one.  For two sequences of bytes,
-      seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
-      calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
-      check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
-      len2.
-   */
-
-
-   /* various hacks, don't look :) */
-
-   /* deflateInit and inflateInit are macros to allow checking the zlib version
-    * and the compiler's view of z_stream:
-    */
-   ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
-                                        const char *version, int stream_size));
-   ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
-                                        const char *version, int stream_size));
-   ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
-                                         int windowBits, int memLevel,
-                                         int strategy, const char *version,
-                                         int stream_size));
-   ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
-                                         const char *version, int stream_size));
-   ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
-                                           unsigned char FAR *window,
-                                           const char *version,
-                                           int stream_size));
-#define deflateInit(strm, level) \
-        deflateInit_((strm), (level),       ZLIB_VERSION, sizeof(z_stream))
-#define inflateInit(strm) \
-        inflateInit_((strm),                ZLIB_VERSION, sizeof(z_stream))
-#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
-        deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
-                      (strategy),           ZLIB_VERSION, sizeof(z_stream))
-#define inflateInit2(strm, windowBits) \
-        inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
-#define inflateBackInit(strm, windowBits, window) \
-        inflateBackInit_((strm), (windowBits), (window), \
-        ZLIB_VERSION, sizeof(z_stream))
-
-
-#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
-   struct internal_state
-   {
-      int dummy;
-   }; /* hack for buggy compilers */
+/* for compatibility with versions < 1.0.2 */
+
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion(void);
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is not
+   compatible with the zlib.h header file used by the application.  This check
+   is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level);
+
+     Initializes the internal stream state for compression.  The fields
+   zalloc, zfree and opaque must be initialized before by the caller.  If
+   zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+   allocation functions.  total_in, total_out, adler, and msg are initialized.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at all
+   (the input data is simply copied a block at a time).  Z_DEFAULT_COMPRESSION
+   requests a default compromise between speed and compression (currently
+   equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if level is not a valid compression level, or
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).  msg is set to null
+   if there is no error message.  deflateInit does not perform any compression:
+   this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush);
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows.  deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary.  Some output may be provided even if
+    flush is zero.
+
+    Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating avail_in or avail_out accordingly; avail_out should
+  never be zero before the call.  The application can consume the compressed
+  output when it wants, for example when the output buffer is full (avail_out
+  == 0), or after each call of deflate().  If deflate returns Z_OK and with
+  zero avail_out, it must be called again after making room in the output
+  buffer because there might be more output pending. See deflatePending(),
+  which can be used if desired to determine whether or not there is more output
+  in that case.
+
+    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+  decide how much data to accumulate before producing output, in order to
+  maximize compression.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far.  (In
+  particular avail_in is zero after the call if enough output space has been
+  provided before the call.) Flushing may degrade compression for some
+  compression algorithms and so it should be used only when necessary.  This
+  completes the current deflate block and follows it with an empty stored block
+  that is three bits plus filler bits to the next byte, followed by four bytes
+  (00 00 ff ff).
+
+    If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+  output buffer, but the output is not aligned to a byte boundary.  All of the
+  input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+  This completes the current deflate block and follows it with an empty fixed
+  codes block that is 10 bits long.  This assures that enough bytes are output
+  in order for the decompressor to finish the block before the empty fixed
+  codes block.
+
+    If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+  for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+  seven bits of the current block are held to be written as the next byte after
+  the next deflate block is completed.  In this case, the decompressor may not
+  be provided enough bits at this point in order to complete decompression of
+  the data provided so far to the compressor.  It may need to wait for the next
+  block to be emitted.  This is for advanced applications that need to control
+  the emission of deflate blocks.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired.  Using Z_FULL_FLUSH too often can seriously degrade
+  compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+  avail_out is greater than six when the flush marker begins, in order to avoid
+  repeated flush markers upon calling deflate() again when avail_out == 0.
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there was
+  enough output space.  If deflate returns with Z_OK or Z_BUF_ERROR, this
+  function must be called again with Z_FINISH and more output space (updated
+  avail_out) but no more input data, until it returns with Z_STREAM_END or an
+  error.  After deflate has returned Z_STREAM_END, the only possible operations
+  on the stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used in the first deflate call after deflateInit if all the
+  compression is to be done in a single step.  In order to complete in one
+  call, avail_out must be at least the value returned by deflateBound (see
+  below).  Then deflate is guaranteed to return Z_STREAM_END.  If not enough
+  output space is provided, deflate will not return Z_STREAM_END, and it must
+  be called again as described above.
+
+    deflate() sets strm->adler to the Adler-32 checksum of all input read
+  so far (that is, total_in bytes).  If a gzip stream is being generated, then
+  strm->adler will be the CRC-32 checksum of the input read so far.  (See
+  deflateInit2 below.)
+
+    deflate() may update strm->data_type if it can make a good guess about
+  the input data type (Z_BINARY or Z_TEXT).  If in doubt, the data is
+  considered binary.  This field is only for information purposes and does not
+  affect the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was Z_NULL or the state was inadvertently written over
+  by the application), or Z_BUF_ERROR if no progress is possible (for example
+  avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not fatal, and
+  deflate() can be called again with more input and more output space to
+  continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd(z_streamp strm);
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded).  In the error case, msg
+   may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit(z_streamp strm);
+
+     Initializes the internal stream state for decompression.  The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller.  In the current version of inflate, the provided input is not
+   read or consumed.  The allocation of a sliding window will be deferred to
+   the first call of inflate (if the decompression does not complete on the
+   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
+   them to use default allocation functions.  total_in, total_out, adler, and
+   msg are initialized.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit does not perform any decompression.
+   Actual decompression will be done by inflate().  So next_in, and avail_in,
+   next_out, and avail_out are unused and unchanged.  The current
+   implementation of inflateInit() does not process any header information --
+   that is deferred until inflate() is called.
+*/
+
+
+ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush);
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+  The detailed semantics are as follows.  inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), then next_in and avail_in are updated
+    accordingly, and processing will resume at this point for the next call of
+    inflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there is
+    no more input data or no more space in the output buffer (see below about
+    the flush parameter).
+
+    Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating the next_* and avail_* values accordingly.  If the
+  caller of inflate() does not provide both available input and available
+  output space, it is possible that there will be no progress made.  The
+  application can consume the uncompressed output when it wants, for example
+  when the output buffer is full (avail_out == 0), or after each call of
+  inflate().  If inflate returns Z_OK and with zero avail_out, it must be
+  called again after making room in the output buffer because there might be
+  more output pending.
+
+    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+  Z_BLOCK, or Z_TREES.  Z_SYNC_FLUSH requests that inflate() flush as much
+  output as possible to the output buffer.  Z_BLOCK requests that inflate()
+  stop if and when it gets to the next deflate block boundary.  When decoding
+  the zlib or gzip format, this will cause inflate() to return immediately
+  after the header and before the first block.  When doing a raw inflate,
+  inflate() will go ahead and process the first block, and will return when it
+  gets to the end of that block, or when it runs out of data.
+
+    The Z_BLOCK option assists in appending to or combining deflate streams.
+  To assist in this, on return inflate() always sets strm->data_type to the
+  number of unused bits in the last byte taken from strm->next_in, plus 64 if
+  inflate() is currently decoding the last block in the deflate stream, plus
+  128 if inflate() returned immediately after decoding an end-of-block code or
+  decoding the complete header up to just before the first byte of the deflate
+  stream.  The end-of-block will not be indicated until all of the uncompressed
+  data from that block has been written to strm->next_out.  The number of
+  unused bits may in general be greater than seven, except when bit 7 of
+  data_type is set, in which case the number of unused bits will be less than
+  eight.  data_type is set as noted here every time inflate() returns for all
+  flush options, and so can be used to determine the amount of currently
+  consumed input in bits.
+
+    The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+  end of each deflate block header is reached, before any actual data in that
+  block is decoded.  This allows the caller to determine the length of the
+  deflate block header for later use in random access within a deflate block.
+  256 is added to the value of strm->data_type when inflate() returns
+  immediately after reaching the end of the deflate block header.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error.  However if all decompression is to be performed in a single step (a
+  single call of inflate), the parameter flush should be set to Z_FINISH.  In
+  this case all pending input is processed and all pending output is flushed;
+  avail_out must be large enough to hold all of the uncompressed data for the
+  operation to complete.  (The size of the uncompressed data may have been
+  saved by the compressor for this purpose.)  The use of Z_FINISH is not
+  required to perform an inflation in one step.  However it may be used to
+  inform inflate that a faster approach can be used for the single inflate()
+  call.  Z_FINISH also informs inflate to not maintain a sliding window if the
+  stream completes, which reduces inflate's memory footprint.  If the stream
+  does not complete, either because not all of the stream is provided or not
+  enough output space is provided, then a sliding window will be allocated and
+  inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+  been used.
+
+     In this implementation, inflate() always flushes as much output as
+  possible to the output buffer, and always uses the faster approach on the
+  first call.  So the effects of the flush parameter in this implementation are
+  on the return value of inflate() as noted below, when inflate() returns early
+  when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+  memory for a sliding window when Z_FINISH is used.
+
+     If a preset dictionary is needed after this call (see inflateSetDictionary
+  below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+  strm->adler to the Adler-32 checksum of all output produced so far (that is,
+  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+  below.  At the end of the stream, inflate() checks that its computed Adler-32
+  checksum is equal to that saved by the compressor and returns Z_STREAM_END
+  only if the checksum is correct.
+
+    inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+  deflate data.  The header type is detected automatically, if requested when
+  initializing with inflateInit2().  Any information contained in the gzip
+  header is not retained unless inflateGetHeader() is used.  When processing
+  gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+  produced so far.  The CRC-32 is checked against the gzip trailer, as is the
+  uncompressed length, modulo 2^32.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect check
+  value, in which case strm->msg points to a string with a more specific
+  error), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  next_in or next_out was Z_NULL, or the state was inadvertently written over
+  by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR
+  if no progress was possible or if there was not enough room in the output
+  buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
+  inflate() can be called again with more input and more output space to
+  continue decompressing.  If Z_DATA_ERROR is returned, the application may
+  then call inflateSync() to look for a good compression block if a partial
+  recovery of the data is to be attempted.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd(z_streamp strm);
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state
+   was inconsistent.
+*/
+
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2(z_streamp strm,
+                                 int level,
+                                 int method,
+                                 int windowBits,
+                                 int memLevel,
+                                 int strategy);
+
+     This is another version of deflateInit with more compression options.  The
+   fields zalloc, zfree and opaque must be initialized before by the caller.
+
+     The method parameter is the compression method.  It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer).  It should be in the range 8..15 for this
+   version of the library.  Larger values of this parameter result in better
+   compression at the expense of memory usage.  The default value is 15 if
+   deflateInit is used instead.
+
+     For the current implementation of deflate(), a windowBits value of 8 (a
+   window size of 256 bytes) is not supported.  As a result, a request for 8
+   will result in 9 (a 512-byte window).  In that case, providing 8 to
+   inflateInit2() will result in an error when the zlib header with 9 is
+   checked against the initialization of inflate().  The remedy is to not use 8
+   with deflateInit2() with this initialization, or at least in that case use 9
+   with inflateInit2().
+
+     windowBits can also be -8..-15 for raw deflate.  In this case, -windowBits
+   determines the window size.  deflate() will then generate raw deflate data
+   with no zlib header or trailer, and will not compute a check value.
+
+     windowBits can also be greater than 15 for optional gzip encoding.  Add
+   16 to windowBits to write a simple gzip header and trailer around the
+   compressed data instead of a zlib wrapper.  The gzip header will have no
+   file name, no extra data, no comment, no modification time (set to zero), no
+   header crc, and the operating system will be set to the appropriate value,
+   if the operating system was determined at compile time.  If a gzip stream is
+   being written, strm->adler is a CRC-32 instead of an Adler-32.
+
+     For raw deflate or gzip encoding, a request for a 256-byte window is
+   rejected as invalid, since only the zlib header provides a means of
+   transmitting the window size to the decompressor.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state.  memLevel=1 uses minimum memory but is
+   slow and reduces compression ratio; memLevel=9 uses maximum memory for
+   optimal speed.  The default value is 8.  See zconf.h for total memory usage
+   as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm.  Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match), or Z_RLE to limit match distances to one (run-length
+   encoding).  Filtered data consists mostly of small values with a somewhat
+   random distribution.  In this case, the compression algorithm is tuned to
+   compress them better.  The effect of Z_FILTERED is to force more Huffman
+   coding and less string matching; it is somewhat intermediate between
+   Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY.  Z_RLE is designed to be almost as
+   fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data.  The
+   strategy parameter only affects the compression ratio but not the
+   correctness of the compressed output even if it is not set appropriately.
+   Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
+   decoder for special applications.
+
+     deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
+   method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
+   incompatible with the version assumed by the caller (ZLIB_VERSION).  msg is
+   set to null if there is no error message.  deflateInit2 does not perform any
+   compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output.  When using the zlib format, this
+   function must be called immediately after deflateInit, deflateInit2 or
+   deflateReset, and before any call of deflate.  When doing raw deflate, this
+   function must be called either before any call of deflate, or immediately
+   after the completion of a deflate block, i.e. after all input has been
+   consumed and all output has been delivered when using any of the flush
+   options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH.  The
+   compressor and decompressor must use exactly the same dictionary (see
+   inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary.  Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size
+   provided in deflateInit or deflateInit2.  Thus the strings most likely to be
+   useful should be put at the end of the dictionary, not at the front.  In
+   addition, the current implementation of deflate will use at most the window
+   size minus 262 bytes of the provided dictionary.
+
+     Upon return of this function, strm->adler is set to the Adler-32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor.  (The Adler-32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.) If a raw deflate was requested, then the
+   Adler-32 value is not computed and strm->adler is not set.
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if not at a block boundary for raw deflate).  deflateSetDictionary does
+   not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
+/*
+     Returns the sliding dictionary being maintained by deflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If deflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similarly, if dictLength is Z_NULL, then it is not set.
+
+     deflateGetDictionary() may return a length less than the window size, even
+   when more than the window size in input has been provided. It may return up
+   to 258 bytes less in that case, due to how zlib's implementation of deflate
+   manages the sliding window and lookahead for matches, where matches can be
+   up to 258 bytes long. If the application needs the last window-size bytes of
+   input, then that would need to be saved by the application outside of zlib.
+
+     deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateCopy(z_streamp dest,
+                                z_streamp source);
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter.  The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and can
+   consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset(z_streamp strm);
+/*
+     This function is equivalent to deflateEnd followed by deflateInit, but
+   does not free and reallocate the internal compression state.  The stream
+   will leave the compression level and any other attributes that may have been
+   set unchanged.  total_in, total_out, adler, and msg are initialized.
+
+     deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams(z_streamp strm,
+                                  int level,
+                                  int strategy);
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2().  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different strategy.
+   If the compression approach (which is a function of the level) or the
+   strategy is changed, and if there have been any deflate() calls since the
+   state was initialized or reset, then the input available so far is
+   compressed with the old level and strategy using deflate(strm, Z_BLOCK).
+   There are three approaches for the compression levels 0, 1..3, and 4..9
+   respectively.  The new level and strategy will take effect at the next call
+   of deflate().
+
+     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
+   not have enough output space to complete, then the parameter change will not
+   take effect.  In this case, deflateParams() can be called again with the
+   same parameters and more output space to try again.
+
+     In order to assure a change in the parameters on the first try, the
+   deflate stream should be flushed using deflate() with Z_BLOCK or other flush
+   request until strm.avail_out is not zero, before calling deflateParams().
+   Then no more input data should be provided before the deflateParams() call.
+   If this is done, the old level and strategy will be applied to the data
+   compressed before deflateParams(), and the new level and strategy will be
+   applied to the data compressed after deflateParams().
+
+     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
+   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
+   there was not enough output space to complete the compression of the
+   available input data before a change in the strategy or approach.  Note that
+   in the case of a Z_BUF_ERROR, the parameters are not changed.  A return
+   value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be
+   retried with more output space.
+*/
+
+ZEXTERN int ZEXPORT deflateTune(z_streamp strm,
+                                int good_length,
+                                int max_lazy,
+                                int nice_length,
+                                int max_chain);
+/*
+     Fine tune deflate's internal compression parameters.  This should only be
+   used by someone who understands the algorithm used by zlib's deflate for
+   searching for the best matching string, and even then only by the most
+   fanatic optimizer trying to squeeze out the last compressed bit for their
+   specific input data.  Read the deflate.c source code for the meaning of the
+   max_lazy, good_length, nice_length, and max_chain parameters.
+
+     deflateTune() can be called after deflateInit() or deflateInit2(), and
+   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm,
+                                   uLong sourceLen);
+/*
+     deflateBound() returns an upper bound on the compressed size after
+   deflation of sourceLen bytes.  It must be called after deflateInit() or
+   deflateInit2(), and after deflateSetHeader(), if used.  This would be used
+   to allocate an output buffer for deflation in a single pass, and so would be
+   called before deflate().  If that first deflate() call is provided the
+   sourceLen input bytes, an output buffer allocated to the size returned by
+   deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
+   to return Z_STREAM_END.  Note that it is possible for the compressed size to
+   be larger than the value returned by deflateBound() if flush options other
+   than Z_FINISH or Z_NO_FLUSH are used.
+*/
+
+ZEXTERN int ZEXPORT deflatePending(z_streamp strm,
+                                   unsigned *pending,
+                                   int *bits);
+/*
+     deflatePending() returns the number of bytes and bits of output that have
+   been generated, but not yet provided in the available output.  The bytes not
+   provided would be due to the available output space having being consumed.
+   The number of bits of output not provided are between 0 and 7, where they
+   await more bits to join them in order to fill out a full byte.  If pending
+   or bits are Z_NULL, then those values are not set.
+
+     deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+ */
+
+ZEXTERN int ZEXPORT deflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
+/*
+     deflatePrime() inserts bits in the deflate output stream.  The intent
+   is that this function is used to start off the deflate output with the bits
+   leftover from a previous deflate stream when appending to it.  As such, this
+   function can only be used for raw deflate, and must be used before the first
+   deflate() call after a deflateInit2() or deflateReset().  bits must be less
+   than or equal to 16, and that many of the least significant bits of value
+   will be inserted in the output.
+
+     deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
+   room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm,
+                                     gz_headerp head);
+/*
+     deflateSetHeader() provides gzip header information for when a gzip
+   stream is requested by deflateInit2().  deflateSetHeader() may be called
+   after deflateInit2() or deflateReset() and before the first call of
+   deflate().  The text, time, os, extra field, name, and comment information
+   in the provided gz_header structure are written to the gzip header (xflag is
+   ignored -- the extra flags are set according to the compression level).  The
+   caller must assure that, if not Z_NULL, name and comment are terminated with
+   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+   available there.  If hcrc is true, a gzip header crc is included.  Note that
+   the current versions of the command-line version of gzip (up through version
+   1.3.x) do not support header crc's, and will report that it is a "multi-part
+   gzip file" and give up.
+
+     If deflateSetHeader is not used, the default gzip header has text false,
+   the time set to zero, and os set to the current operating system, with no
+   extra, name, or comment fields.  The gzip header is returned to the default
+   state by deflateReset().
+
+     deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2(z_streamp strm,
+                                 int windowBits);
+
+     This is another version of inflateInit with an extra parameter.  The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library.  The default value is 15 if inflateInit is used
+   instead.  windowBits must be greater than or equal to the windowBits value
+   provided to deflateInit2() while compressing, or it must be equal to 15 if
+   deflateInit2() was not used.  If a compressed stream with a larger window
+   size is given as input, inflate() will return with the error code
+   Z_DATA_ERROR instead of trying to allocate a larger window.
+
+     windowBits can also be zero to request that inflate use the window size in
+   the zlib header of the compressed stream.
+
+     windowBits can also be -8..-15 for raw inflate.  In this case, -windowBits
+   determines the window size.  inflate() will then process raw deflate data,
+   not looking for a zlib or gzip header, not generating a check value, and not
+   looking for any check values for comparison at the end of the stream.  This
+   is for use with other formats that use the deflate compressed data format
+   such as zip.  Those formats provide their own check values.  If a custom
+   format is developed using the raw deflate format for compressed data, it is
+   recommended that a check value such as an Adler-32 or a CRC-32 be applied to
+   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
+   most applications, the zlib format should be used as is.  Note that comments
+   above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+     windowBits can also be greater than 15 for optional gzip decoding.  Add
+   32 to windowBits to enable zlib and gzip decoding with automatic header
+   detection, or add 16 to decode only the gzip format (the zlib format will
+   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
+   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
+   below), inflate() will *not* automatically decode concatenated gzip members.
+   inflate() will return Z_STREAM_END at the end of the gzip member.  The state
+   would need to be reset to continue decoding a subsequent gzip member.  This
+   *must* be done if there is more data after a gzip member, in order for the
+   decompression to be compliant with the gzip standard (RFC 1952).
+
+     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit2 does not perform any decompression
+   apart from possibly reading the zlib header if present: actual decompression
+   will be done by inflate().  (So next_in and avail_in may be modified, but
+   next_out and avail_out are unused and unchanged.) The current implementation
+   of inflateInit2() does not process any header information -- that is
+   deferred until inflate() is called.
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence.  This function must be called immediately after a call of inflate,
+   if that call returned Z_NEED_DICT.  The dictionary chosen by the compressor
+   can be determined from the Adler-32 value returned by that call of inflate.
+   The compressor and decompressor must use exactly the same dictionary (see
+   deflateSetDictionary).  For raw inflate, this function can be called at any
+   time to set the dictionary.  If the provided dictionary is smaller than the
+   window and there is already data in the window, then the provided dictionary
+   will amend what's there.  The application must insure that the dictionary
+   that was used for compression is provided.
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect Adler-32 value).  inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
+/*
+     Returns the sliding dictionary being maintained by inflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If inflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similarly, if dictLength is Z_NULL, then it is not set.
+
+     inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateSync(z_streamp strm);
+/*
+     Skips invalid compressed data until a possible full flush point (see above
+   for the description of deflate with Z_FULL_FLUSH) can be found, or until all
+   available input is skipped.  No output is provided.
+
+     inflateSync searches for a 00 00 FF FF pattern in the compressed data.
+   All full flush points have this pattern, but not all occurrences of this
+   pattern are full flush points.
+
+     inflateSync returns Z_OK if a possible full flush point has been found,
+   Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
+   has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
+   In the success case, the application may save the current value of total_in
+   which indicates where valid compressed data was found.  In the error case,
+   the application may repeatedly call inflateSync, providing more input each
+   time, until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy(z_streamp dest,
+                                z_streamp source);
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when randomly accessing a large stream.  The
+   first pass through the stream can periodically record the inflate state,
+   allowing restarting inflate at those points when randomly accessing the
+   stream.
+
+     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset(z_streamp strm);
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate the internal decompression state.  The
+   stream will keep attributes that may have been set by inflateInit2.
+   total_in, total_out, adler, and msg are initialized.
+
+     inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT inflateReset2(z_streamp strm,
+                                  int windowBits);
+/*
+     This function is the same as inflateReset, but it also permits changing
+   the wrap and window size requests.  The windowBits parameter is interpreted
+   the same as it is for inflateInit2.  If the window size is changed, then the
+   memory allocated for the window is freed, and the window will be reallocated
+   by inflate() if needed.
+
+     inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL), or if
+   the windowBits parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT inflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
+/*
+     This function inserts bits in the inflate input stream.  The intent is
+   that this function is used to start inflating at a bit position in the
+   middle of a byte.  The provided bits will be used before any bytes are used
+   from next_in.  This function should only be used with raw inflate, and
+   should be used before the first inflate() call after inflateInit2() or
+   inflateReset().  bits must be less than or equal to 16, and that many of the
+   least significant bits of value will be inserted in the input.
+
+     If bits is negative, then the input stream bit buffer is emptied.  Then
+   inflatePrime() can be called again to put bits in the buffer.  This is used
+   to clear out bits leftover after feeding inflate a block description prior
+   to feeding inflate codes.
+
+     inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN long ZEXPORT inflateMark(z_streamp strm);
+/*
+     This function returns two values, one in the lower 16 bits of the return
+   value, and the other in the remaining upper bits, obtained by shifting the
+   return value down 16 bits.  If the upper value is -1 and the lower value is
+   zero, then inflate() is currently decoding information outside of a block.
+   If the upper value is -1 and the lower value is non-zero, then inflate is in
+   the middle of a stored block, with the lower value equaling the number of
+   bytes from the input remaining to copy.  If the upper value is not -1, then
+   it is the number of bits back from the current bit position in the input of
+   the code (literal or length/distance pair) currently being processed.  In
+   that case the lower value is the number of bytes already emitted for that
+   code.
+
+     A code is being processed if inflate is waiting for more input to complete
+   decoding of the code, or if it has completed decoding but is waiting for
+   more output space to write the literal or match data.
+
+     inflateMark() is used to mark locations in the input data for random
+   access, which may be at bit positions, and to note those cases where the
+   output of a code may span boundaries of random access blocks.  The current
+   location in the input stream can be determined from avail_in and data_type
+   as noted in the description for the Z_BLOCK flush parameter for inflate.
+
+     inflateMark returns the value noted above, or -65536 if the provided
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm,
+                                     gz_headerp head);
+/*
+     inflateGetHeader() requests that gzip header information be stored in the
+   provided gz_header structure.  inflateGetHeader() may be called after
+   inflateInit2() or inflateReset(), and before the first call of inflate().
+   As inflate() processes the gzip stream, head->done is zero until the header
+   is completed, at which time head->done is set to one.  If a zlib stream is
+   being decoded, then head->done is set to -1 to indicate that there will be
+   no gzip header information forthcoming.  Note that Z_BLOCK or Z_TREES can be
+   used to force inflate() to return immediately after header processing is
+   complete and before any actual data is decompressed.
+
+     The text, time, xflags, and os fields are filled in with the gzip header
+   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
+   was valid if done is set to one.) If extra is not Z_NULL, then extra_max
+   contains the maximum number of bytes to write to extra.  Once done is true,
+   extra_len contains the actual extra field length, and extra contains the
+   extra field, or that field truncated if extra_max is less than extra_len.
+   If name is not Z_NULL, then up to name_max characters are written there,
+   terminated with a zero unless the length is greater than name_max.  If
+   comment is not Z_NULL, then up to comm_max characters are written there,
+   terminated with a zero unless the length is greater than comm_max.  When any
+   of extra, name, or comment are not Z_NULL and the respective field is not
+   present in the header, then that field is set to Z_NULL to signal its
+   absence.  This allows the use of deflateSetHeader() with the returned
+   structure to duplicate the header.  However if those fields are set to
+   allocated memory, then the application will need to save those pointers
+   elsewhere so that they can be eventually freed.
+
+     If inflateGetHeader is not used, then the header information is simply
+   discarded.  The header is always checked for validity, including the header
+   CRC if present.  inflateReset() will reset the process to discard the header
+   information.  The application would need to call inflateGetHeader() again to
+   retrieve the header from the next gzip stream.
+
+     inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits,
+                                    unsigned char FAR *window);
+
+     Initialize the internal stream state for decompression using inflateBack()
+   calls.  The fields zalloc, zfree and opaque in strm must be initialized
+   before the call.  If zalloc and zfree are Z_NULL, then the default library-
+   derived memory allocation routines are used.  windowBits is the base two
+   logarithm of the window size, in the range 8..15.  window is a caller
+   supplied buffer of that size.  Except for special applications where it is
+   assured that deflate was used with small window sizes, windowBits must be 15
+   and a 32K byte window must be supplied to be able to decompress general
+   deflate streams.
+
+     See inflateBack() for the usage of these routines.
+
+     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+   the parameters are invalid, Z_MEM_ERROR if the internal state could not be
+   allocated, or Z_VERSION_ERROR if the version of the library does not match
+   the version of the header file.
+*/
+
+typedef unsigned (*in_func)(void FAR *,
+                            z_const unsigned char FAR * FAR *);
+typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned);
+
+ZEXTERN int ZEXPORT inflateBack(z_streamp strm,
+                                in_func in, void FAR *in_desc,
+                                out_func out, void FAR *out_desc);
+/*
+     inflateBack() does a raw inflate with a single call using a call-back
+   interface for input and output.  This is potentially more efficient than
+   inflate() for file i/o applications, in that it avoids copying between the
+   output and the sliding window by simply making the window itself the output
+   buffer.  inflate() can be faster on modern CPUs when used with large
+   buffers.  inflateBack() trusts the application to not change the output
+   buffer passed by the output function, at least until inflateBack() returns.
+
+     inflateBackInit() must be called first to allocate the internal state
+   and to initialize the state with the user-provided window buffer.
+   inflateBack() may then be used multiple times to inflate a complete, raw
+   deflate stream with each call.  inflateBackEnd() is then called to free the
+   allocated state.
+
+     A raw deflate stream is one with no zlib or gzip header or trailer.
+   This routine would normally be used in a utility that reads zip or gzip
+   files and writes out uncompressed files.  The utility would decode the
+   header and process the trailer on its own, hence this routine expects only
+   the raw deflate stream to decompress.  This is different from the default
+   behavior of inflate(), which expects a zlib header and trailer around the
+   deflate stream.
+
+     inflateBack() uses two subroutines supplied by the caller that are then
+   called by inflateBack() for input and output.  inflateBack() calls those
+   routines until it reads a complete deflate stream and writes out all of the
+   uncompressed data, or until it encounters an error.  The function's
+   parameters and return types are defined above in the in_func and out_func
+   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
+   number of bytes of provided input, and a pointer to that input in buf.  If
+   there is no input available, in() must return zero -- buf is ignored in that
+   case -- and inflateBack() will return a buffer error.  inflateBack() will
+   call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].
+   out() should return zero on success, or non-zero on failure.  If out()
+   returns non-zero, inflateBack() will return with an error.  Neither in() nor
+   out() are permitted to change the contents of the window provided to
+   inflateBackInit(), which is also the buffer that out() uses to write from.
+   The length written by out() will be at most the window size.  Any non-zero
+   amount of input may be provided by in().
+
+     For convenience, inflateBack() can be provided input on the first call by
+   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
+   in() will be called.  Therefore strm->next_in must be initialized before
+   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
+   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
+   must also be initialized, and then if strm->avail_in is not zero, input will
+   initially be taken from strm->next_in[0 ..  strm->avail_in - 1].
+
+     The in_desc and out_desc parameters of inflateBack() is passed as the
+   first parameter of in() and out() respectively when they are called.  These
+   descriptors can be optionally used to pass any information that the caller-
+   supplied in() and out() functions need to do their job.
+
+     On return, inflateBack() will set strm->next_in and strm->avail_in to
+   pass back any unused input that was provided by the last in() call.  The
+   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+   if in() or out() returned an error, Z_DATA_ERROR if there was a format error
+   in the deflate stream (in which case strm->msg is set to indicate the nature
+   of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
+   In the case of Z_BUF_ERROR, an input or output error can be distinguished
+   using strm->next_in which will be Z_NULL only if in() returned an error.  If
+   strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
+   non-zero.  (in() will always be called before out(), so strm->next_in is
+   assured to be defined if out() returns non-zero.)  Note that inflateBack()
+   cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm);
+/*
+     All memory allocated by inflateBackInit() is freed.
+
+     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+   state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags(void);
+/* Return flags indicating compile-time options.
+
+    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+     1.0: size of uInt
+     3.2: size of uLong
+     5.4: size of voidpf (pointer)
+     7.6: size of z_off_t
+
+    Compiler, assembler, and debug options:
+     8: ZLIB_DEBUG
+     9: ASMV or ASMINF -- use ASM code
+     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+     11: 0 (reserved)
+
+    One-time table building (smaller code, but not thread-safe if true):
+     12: BUILDFIXED -- build static block decoding tables when needed
+     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+     14,15: 0 (reserved)
+
+    Library content (indicates missing functionality):
+     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+                          deflate code when not needed)
+     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+                    and decode gzip streams (to avoid linking crc code)
+     18-19: 0 (reserved)
+
+    Operation variations (changes in library functionality):
+     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+     21: FASTEST -- deflate algorithm with only one, lowest compression level
+     22,23: 0 (reserved)
+
+    The sprintf variant used by gzprintf (zero is best):
+     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+    Remainder:
+     27-31: 0 (reserved)
+ */
+
+#ifndef Z_SOLO
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the basic
+   stream-oriented functions.  To simplify the interface, some default options
+   are assumed (compression level and memory usage, standard memory allocation
+   functions).  The source code of these utility functions can be modified if
+   you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress(Bytef *dest,   uLongf *destLen,
+                             const Bytef *source, uLong sourceLen);
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.  compress() is equivalent to compress2() with a level
+   parameter of Z_DEFAULT_COMPRESSION.
+
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2(Bytef *dest,   uLongf *destLen,
+                              const Bytef *source, uLong sourceLen,
+                              int level);
+/*
+     Compresses the source buffer into the destination buffer.  The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer.  Upon entry, destLen is the total size of the
+   destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen);
+/*
+     compressBound() returns an upper bound on the compressed size after
+   compress() or compress2() on sourceLen bytes.  It would be used before a
+   compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress(Bytef *dest,   uLongf *destLen,
+                               const Bytef *source, uLong sourceLen);
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data.  (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit, destLen
+   is the actual size of the uncompressed data.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.  In
+   the case where there is not enough room, uncompress() will fill the output
+   buffer with the uncompressed data up to that point.
+*/
+
+ZEXTERN int ZEXPORT uncompress2(Bytef *dest,   uLongf *destLen,
+                                const Bytef *source, uLong *sourceLen);
+/*
+     Same as uncompress, except that sourceLen is a pointer, where the
+   length of the source is *sourceLen.  On return, *sourceLen is the number of
+   source bytes consumed.
+*/
+
+                        /* gzip file access functions */
+
+/*
+     This library supports reading and writing files in gzip (.gz) format with
+   an interface similar to that of stdio, using the functions that start with
+   "gz".  The gzip format is different from the zlib format.  gzip is a gzip
+   wrapper, documented in RFC 1952, wrapped around a deflate stream.
+*/
+
+typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
+
+/*
+ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode);
+
+     Open the gzip (.gz) file at path for reading and decompressing, or
+   compressing and writing.  The mode parameter is as in fopen ("rb" or "wb")
+   but can also include a compression level ("wb9") or a strategy: 'f' for
+   filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h",
+   'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression
+   as in "wb9F".  (See the description of deflateInit2 for more information
+   about the strategy parameter.)  'T' will request transparent writing or
+   appending with no compression and not using the gzip format.
+
+     "a" can be used instead of "w" to request that the gzip stream that will
+   be written be appended to the file.  "+" will result in an error, since
+   reading and writing to the same gzip file is not supported.  The addition of
+   "x" when writing will create the file exclusively, which fails if the file
+   already exists.  On systems that support it, the addition of "e" when
+   reading or writing will set the flag to close the file on an execve() call.
+
+     These functions, as well as gzip, will read and decode a sequence of gzip
+   streams in a file.  The append function of gzopen() can be used to create
+   such a file.  (Also see gzflush() for another way to do this.)  When
+   appending, gzopen does not test whether the file begins with a gzip stream,
+   nor does it look for the end of the gzip streams to begin appending.  gzopen
+   will simply append a gzip stream to the existing file.
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.  When
+   reading, this will be detected automatically by looking for the magic two-
+   byte gzip header.
+
+     gzopen returns NULL if the file could not be opened, if there was
+   insufficient memory to allocate the gzFile state, or if an invalid mode was
+   specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
+   errno can be checked to determine if the reason gzopen failed was that the
+   file could not be opened.
+*/
+
+ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode);
+/*
+     Associate a gzFile with the file descriptor fd.  File descriptors are
+   obtained from calls like open, dup, creat, pipe or fileno (if the file has
+   been previously opened with fopen).  The mode parameter is as in gzopen.
+
+     The next call of gzclose on the returned gzFile will also close the file
+   descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
+   fd.  If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
+   mode);.  The duplicated descriptor should be saved to avoid a leak, since
+   gzdopen does not close fd if it fails.  If you are using fileno() to get the
+   file descriptor from a FILE *, then you will have to use dup() to avoid
+   double-close()ing the file descriptor.  Both gzclose() and fclose() will
+   close the associated file descriptor, so they need to have different file
+   descriptors.
+
+     gzdopen returns NULL if there was insufficient memory to allocate the
+   gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
+   provided, or '+' was provided), or if fd is -1.  The file descriptor is not
+   used until the next gz* read, write, seek, or close operation, so gzdopen
+   will not detect if fd is invalid (unless fd is -1).
+*/
+
+ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size);
+/*
+     Set the internal buffer size used by this library's functions for file to
+   size.  The default buffer size is 8192 bytes.  This function must be called
+   after gzopen() or gzdopen(), and before any other calls that read or write
+   the file.  The buffer memory allocation is always deferred to the first read
+   or write.  Three times that size in buffer space is allocated.  A larger
+   buffer size of, for example, 64K or 128K bytes will noticeably increase the
+   speed of decompression (reading).
+
+     The new buffer size also affects the maximum length for gzprintf().
+
+     gzbuffer() returns 0 on success, or -1 on failure, such as being called
+   too late.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy);
+/*
+     Dynamically update the compression level and strategy for file.  See the
+   description of deflateInit2 for the meaning of these parameters. Previously
+   provided data is flushed before applying the parameter changes.
+
+     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
+   opened for writing, Z_ERRNO if there is an error writing the flushed data,
+   or Z_MEM_ERROR if there is a memory allocation error.
+*/
+
+ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len);
+/*
+     Read and decompress up to len uncompressed bytes from file into buf.  If
+   the input file is not in gzip format, gzread copies the given number of
+   bytes into the buffer directly from the file.
+
+     After reaching the end of a gzip stream in the input, gzread will continue
+   to read, looking for another gzip stream.  Any number of gzip streams may be
+   concatenated in the input file, and will all be decompressed by gzread().
+   If something other than a gzip stream is encountered after a gzip stream,
+   that remaining trailing garbage is ignored (and no error is returned).
+
+     gzread can be used to read a gzip file that is being concurrently written.
+   Upon reaching the end of the input, gzread will return with the available
+   data.  If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
+   gzclearerr can be used to clear the end of file indicator in order to permit
+   gzread to be tried again.  Z_OK indicates that a gzip stream was completed
+   on the last gzread.  Z_BUF_ERROR indicates that the input file ended in the
+   middle of a gzip stream.  Note that gzread does not return -1 in the event
+   of an incomplete gzip stream.  This error is deferred until gzclose(), which
+   will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
+   stream.  Alternatively, gzerror can be used before gzclose to detect this
+   case.
+
+     gzread returns the number of uncompressed bytes actually read, less than
+   len for end of file, or -1 for error.  If len is too large to fit in an int,
+   then nothing is read, -1 is returned, and the error state is set to
+   Z_STREAM_ERROR.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
+                                 gzFile file);
+/*
+     Read and decompress up to nitems items of size size from file into buf,
+   otherwise operating as gzread() does.  This duplicates the interface of
+   stdio's fread(), with size_t request and return types.  If the library
+   defines size_t, then z_size_t is identical to size_t.  If not, then z_size_t
+   is an unsigned integer type that can contain a pointer.
+
+     gzfread() returns the number of full items read of size size, or zero if
+   the end of the file was reached and a full item could not be read, or if
+   there was an error.  gzerror() must be consulted if zero is returned in
+   order to determine if there was an error.  If the multiplication of size and
+   nitems overflows, i.e. the product does not fit in a z_size_t, then nothing
+   is read, zero is returned, and the error state is set to Z_STREAM_ERROR.
+
+     In the event that the end of file is reached and only a partial item is
+   available at the end, i.e. the remaining uncompressed data length is not a
+   multiple of size, then the final partial item is nevertheless read into buf
+   and the end-of-file flag is set.  The length of the partial item read is not
+   provided, but could be inferred from the result of gztell().  This behavior
+   is the same as the behavior of fread() implementations in common libraries,
+   but it prevents the direct use of gzfread() to read a concurrently written
+   file, resetting and retrying on end-of-file, when size is not 1.
+*/
+
+ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len);
+/*
+     Compress and write the len uncompressed bytes at buf to file. gzwrite
+   returns the number of uncompressed bytes written or 0 in case of error.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size,
+                                  z_size_t nitems, gzFile file);
+/*
+     Compress and write nitems items of size size from buf to file, duplicating
+   the interface of stdio's fwrite(), with size_t request and return types.  If
+   the library defines size_t, then z_size_t is identical to size_t.  If not,
+   then z_size_t is an unsigned integer type that can contain a pointer.
+
+     gzfwrite() returns the number of full items written of size size, or zero
+   if there was an error.  If the multiplication of size and nitems overflows,
+   i.e. the product does not fit in a z_size_t, then nothing is written, zero
+   is returned, and the error state is set to Z_STREAM_ERROR.
+*/
+
+ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...);
+/*
+     Convert, format, compress, and write the arguments (...) to file under
+   control of the string format, as in fprintf.  gzprintf returns the number of
+   uncompressed bytes actually written, or a negative zlib error code in case
+   of error.  The number of uncompressed bytes written is limited to 8191, or
+   one less than the buffer size given to gzbuffer().  The caller should assure
+   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
+   return an error (0) with nothing written.  In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf(),
+   because the secure snprintf() or vsnprintf() functions were not available.
+   This can be determined using zlibCompileFlags().
+*/
+
+ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s);
+/*
+     Compress and write the given null-terminated string s to file, excluding
+   the terminating null character.
+
+     gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len);
+/*
+     Read and decompress bytes from file into buf, until len-1 characters are
+   read, or until a newline character is read and transferred to buf, or an
+   end-of-file condition is encountered.  If any characters are read or if len
+   is one, the string is terminated with a null character.  If no characters
+   are read due to an end-of-file or len is less than one, then the buffer is
+   left untouched.
+
+     gzgets returns buf which is a null-terminated string, or it returns NULL
+   for end-of-file or in case of error.  If there was an error, the contents at
+   buf are indeterminate.
+*/
+
+ZEXTERN int ZEXPORT gzputc(gzFile file, int c);
+/*
+     Compress and write c, converted to an unsigned char, into file.  gzputc
+   returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT gzgetc(gzFile file);
+/*
+     Read and decompress one byte from file.  gzgetc returns this byte or -1
+   in case of end of file or error.  This is implemented as a macro for speed.
+   As such, it does not do all of the checking the other functions do.  I.e.
+   it does not check to see if file is NULL, nor whether the structure file
+   points to has been clobbered or not.
+*/
+
+ZEXTERN int ZEXPORT gzungetc(int c, gzFile file);
+/*
+     Push c back onto the stream for file to be read as the first character on
+   the next read.  At least one character of push-back is always allowed.
+   gzungetc() returns the character pushed, or -1 on failure.  gzungetc() will
+   fail if c is -1, and may fail if a character has been pushed but not read
+   yet.  If gzungetc is used immediately after gzopen or gzdopen, at least the
+   output buffer size of pushed characters is allowed.  (See gzbuffer above.)
+   The pushed character will be discarded if the stream is repositioned with
+   gzseek() or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT gzflush(gzFile file, int flush);
+/*
+     Flush all pending output to file.  The parameter flush is as in the
+   deflate() function.  The return value is the zlib error number (see function
+   gzerror below).  gzflush is only permitted when writing.
+
+     If the flush parameter is Z_FINISH, the remaining data is written and the
+   gzip stream is completed in the output.  If gzwrite() is called again, a new
+   gzip stream will be started in the output.  gzread() is able to read such
+   concatenated gzip streams.
+
+     gzflush should be called only when strictly necessary because it will
+   degrade compression if called too often.
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzseek(gzFile file,
+                               z_off_t offset, int whence);
+
+     Set the starting position to offset relative to whence for the next gzread
+   or gzwrite on file.  The offset represents a number of bytes in the
+   uncompressed data stream.  The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow.  If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+     gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind(gzFile file);
+/*
+     Rewind file. This function is supported only for reading.
+
+     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET).
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT    gztell(gzFile file);
+
+     Return the starting position for the next gzread or gzwrite on file.
+   This position represents a number of bytes in the uncompressed data stream,
+   and is zero when starting, even if appending or reading a gzip stream from
+   the middle of a file using gzdopen().
+
+     gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file);
+
+     Return the current compressed (actual) read or write offset of file.  This
+   offset includes the count of bytes that precede the gzip stream, for example
+   when appending or when using gzdopen() for reading.  When reading, the
+   offset does not include as yet unused buffered input.  This information can
+   be used for a progress indicator.  On error, gzoffset() returns -1.
+*/
+
+ZEXTERN int ZEXPORT gzeof(gzFile file);
+/*
+     Return true (1) if the end-of-file indicator for file has been set while
+   reading, false (0) otherwise.  Note that the end-of-file indicator is set
+   only if the read tried to go past the end of the input, but came up short.
+   Therefore, just like feof(), gzeof() may return false even if there is no
+   more data to read, in the event that the last read request was for the exact
+   number of bytes remaining in the input file.  This will happen if the input
+   file size is an exact multiple of the buffer size.
+
+     If gzeof() returns true, then the read functions will return no more data,
+   unless the end-of-file indicator is reset by gzclearerr() and the input file
+   has grown since the previous end of file was detected.
+*/
+
+ZEXTERN int ZEXPORT gzdirect(gzFile file);
+/*
+     Return true (1) if file is being copied directly while reading, or false
+   (0) if file is a gzip stream being decompressed.
+
+     If the input file is empty, gzdirect() will return true, since the input
+   does not contain a gzip stream.
+
+     If gzdirect() is used immediately after gzopen() or gzdopen() it will
+   cause buffers to be allocated to allow reading the file to determine if it
+   is a gzip file.  Therefore if gzbuffer() is used, it should be called before
+   gzdirect().
+
+     When writing, gzdirect() returns true (1) if transparent writing was
+   requested ("wT" for the gzopen() mode), or false (0) otherwise.  (Note:
+   gzdirect() is not needed when writing.  Transparent writing must be
+   explicitly requested, so the application already knows the answer.  When
+   linking statically, using gzdirect() will include all of the zlib code for
+   gzip file reading and decompression, which may not be desired.)
+*/
+
+ZEXTERN int ZEXPORT    gzclose(gzFile file);
+/*
+     Flush all pending output for file, if necessary, close file and
+   deallocate the (de)compression state.  Note that once file is closed, you
+   cannot call gzerror with file, since its structures have been deallocated.
+   gzclose must not be called more than once on the same file, just as free
+   must not be called more than once on the same allocation.
+
+     gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
+   file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
+   last read ended in the middle of a gzip stream, or Z_OK on success.
+*/
+
+ZEXTERN int ZEXPORT gzclose_r(gzFile file);
+ZEXTERN int ZEXPORT gzclose_w(gzFile file);
+/*
+     Same as gzclose(), but gzclose_r() is only for use when reading, and
+   gzclose_w() is only for use when writing or appending.  The advantage to
+   using these instead of gzclose() is that they avoid linking in zlib
+   compression or decompression code that is not used when only reading or only
+   writing respectively.  If gzclose() is used, then both compression and
+   decompression code will be included the application when linking to a static
+   zlib library.
+*/
+
+ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum);
+/*
+     Return the error message for the last error which occurred on file.
+   errnum is set to zlib error number.  If an error occurred in the file system
+   and not in the compression library, errnum is set to Z_ERRNO and the
+   application may consult errno to get the exact error code.
+
+     The application must not modify the returned string.  Future calls to
+   this function may invalidate the previously returned string.  If file is
+   closed, then the string previously returned by gzerror will no longer be
+   available.
+
+     gzerror() should be used to distinguish errors from end-of-file for those
+   functions above that do not distinguish those cases in their return values.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr(gzFile file);
+/*
+     Clear the error and end-of-file flags for file.  This is analogous to the
+   clearerr() function in stdio.  This is useful for continuing to read a gzip
+   file that is being written concurrently.
+*/
+
+#endif /* !Z_SOLO */
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the compression
+   library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len);
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum. An Adler-32 value is in the range of a 32-bit
+   unsigned integer. If buf is Z_NULL, this function returns the required
+   initial value for the checksum.
+
+     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
+   much faster.
+
+   Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf,
+                                z_size_t len);
+/*
+     Same as adler32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2,
+                                      z_off_t len2);
+
+     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
+   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
+   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.  Note
+   that the z_off_t type (like off_t) is a signed integer.  If len2 is
+   negative, the result has no meaning or utility.
+*/
+
+ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len);
+/*
+     Update a running CRC-32 with the bytes buf[0..len-1] and return the
+   updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer.
+   If buf is Z_NULL, this function returns the required initial value for the
+   crc. Pre- and post-conditioning (one's complement) is performed within this
+   function so it shouldn't be done by the application.
+
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf,
+                              z_size_t len);
+/*
+     Same as crc32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2);
+
+     Combine two CRC-32 check values into one.  For two sequences of bytes,
+   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
+   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+   len2. len2 must be non-negative.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2);
+
+     Return the operator corresponding to length len2, to be used with
+   crc32_combine_op(). len2 must be non-negative.
+*/
+
+ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op);
+/*
+     Give the same result as crc32_combine(), using op in place of len2. op is
+   is generated from len2 by crc32_combine_gen(). This will be faster than
+   crc32_combine() if the generated op is used more than once.
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateInit_(z_streamp strm,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int  level, int  method,
+                                  int windowBits, int memLevel,
+                                  int strategy, const char *version,
+                                  int stream_size);
+ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int  windowBits,
+                                  const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                                     unsigned char FAR *window,
+                                     const char *version,
+                                     int stream_size);
+#ifdef Z_PREFIX_SET
+#  define z_deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define z_inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#else
+#  define deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#endif
+
+#ifndef Z_SOLO
+
+/* gzgetc() macro and its supporting function and exposed data structure.  Note
+ * that the real internal state is much larger than the exposed structure.
+ * This abbreviated structure exposes just enough for the gzgetc() macro.  The
+ * user should not mess with these exposed elements, since their names or
+ * behavior could change in the future, perhaps even capriciously.  They can
+ * only be used by the gzgetc() macro.  You have been warned.
+ */
+struct gzFile_s {
+    unsigned have;
+    unsigned char *next;
+    z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_(gzFile file);       /* backward compatibility */
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#  define z_gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
+#else
+#  define gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
 #endif
 
-   ZEXTERN const char   * ZEXPORT zError           OF((int));
-   ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp z));
-   ZEXTERN const uLongf * ZEXPORT get_crc_table    OF((void));
+/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
+ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
+ * both are true, the application gets the *64 functions, and the regular
+ * functions are changed to 64 bits) -- in case these are set on systems
+ * without large file support, _LFS64_LARGEFILE must also be true
+ */
+#ifdef Z_LARGE64
+   ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+   ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+   ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+   ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t);
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+#  ifdef Z_PREFIX_SET
+#    define z_gzopen z_gzopen64
+#    define z_gzseek z_gzseek64
+#    define z_gztell z_gztell64
+#    define z_gzoffset z_gzoffset64
+#    define z_adler32_combine z_adler32_combine64
+#    define z_crc32_combine z_crc32_combine64
+#    define z_crc32_combine_gen z_crc32_combine_gen64
+#  else
+#    define gzopen gzopen64
+#    define gzseek gzseek64
+#    define gztell gztell64
+#    define gzoffset gzoffset64
+#    define adler32_combine adler32_combine64
+#    define crc32_combine crc32_combine64
+#    define crc32_combine_gen crc32_combine_gen64
+#  endif
+#  ifndef Z_LARGE64
+     ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+     ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int);
+     ZEXTERN z_off_t ZEXPORT gztell64(gzFile);
+     ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile);
+     ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
+#  endif
+#else
+   ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *);
+   ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int);
+   ZEXTERN z_off_t ZEXPORT gztell(gzFile);
+   ZEXTERN z_off_t ZEXPORT gzoffset(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);
+#endif
+
+#else /* Z_SOLO */
+
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);
+
+#endif /* !Z_SOLO */
+
+/* undocumented functions */
+ZEXTERN const char   * ZEXPORT zError(int);
+ZEXTERN int            ZEXPORT inflateSyncPoint(z_streamp);
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void);
+ZEXTERN int            ZEXPORT inflateUndermine(z_streamp, int);
+ZEXTERN int            ZEXPORT inflateValidate(z_streamp, int);
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed(z_streamp);
+ZEXTERN int            ZEXPORT inflateResetKeep(z_streamp);
+ZEXTERN int            ZEXPORT deflateResetKeep(z_streamp);
+#if defined(_WIN32) && !defined(Z_SOLO)
+ZEXTERN gzFile         ZEXPORT gzopen_w(const wchar_t *path,
+                                        const char *mode);
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+ZEXTERN int            ZEXPORTVA gzvprintf(gzFile file,
+                                           const char *format,
+                                           va_list va);
+#  endif
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/reg-io/zlib/zutil.c b/reg-io/zlib/zutil.c
index d55f5948..b1c5d2d3 100644
--- a/reg-io/zlib/zutil.c
+++ b/reg-io/zlib/zutil.c
@@ -1,69 +1,69 @@
 /* zutil.c -- target dependent utility functions for the compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * Copyright (C) 1995-2017 Jean-loup Gailly
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
 #include "zutil.h"
-
-#ifndef NO_DUMMY_DECL
-struct internal_state      {int dummy;}; /* for buggy compilers */
+#ifndef Z_SOLO
+#  include "gzguts.h"
 #endif
 
-const char * const z_errmsg[10] = {
-"need dictionary",     /* Z_NEED_DICT       2  */
-"stream end",          /* Z_STREAM_END      1  */
-"",                    /* Z_OK              0  */
-"file error",          /* Z_ERRNO         (-1) */
-"stream error",        /* Z_STREAM_ERROR  (-2) */
-"data error",          /* Z_DATA_ERROR    (-3) */
-"insufficient memory", /* Z_MEM_ERROR     (-4) */
-"buffer error",        /* Z_BUF_ERROR     (-5) */
-"incompatible version",/* Z_VERSION_ERROR (-6) */
-""};
-
-
-const char * ZEXPORT zlibVersion()
-{
+z_const char * const z_errmsg[10] = {
+    (z_const char *)"need dictionary",     /* Z_NEED_DICT       2  */
+    (z_const char *)"stream end",          /* Z_STREAM_END      1  */
+    (z_const char *)"",                    /* Z_OK              0  */
+    (z_const char *)"file error",          /* Z_ERRNO         (-1) */
+    (z_const char *)"stream error",        /* Z_STREAM_ERROR  (-2) */
+    (z_const char *)"data error",          /* Z_DATA_ERROR    (-3) */
+    (z_const char *)"insufficient memory", /* Z_MEM_ERROR     (-4) */
+    (z_const char *)"buffer error",        /* Z_BUF_ERROR     (-5) */
+    (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */
+    (z_const char *)""
+};
+
+
+const char * ZEXPORT zlibVersion(void) {
     return ZLIB_VERSION;
 }
 
-uLong ZEXPORT zlibCompileFlags()
-{
+uLong ZEXPORT zlibCompileFlags(void) {
     uLong flags;
 
     flags = 0;
-    switch (sizeof(uInt)) {
+    switch ((int)(sizeof(uInt))) {
     case 2:     break;
     case 4:     flags += 1;     break;
     case 8:     flags += 2;     break;
     default:    flags += 3;
     }
-    switch (sizeof(uLong)) {
+    switch ((int)(sizeof(uLong))) {
     case 2:     break;
     case 4:     flags += 1 << 2;        break;
     case 8:     flags += 2 << 2;        break;
     default:    flags += 3 << 2;
     }
-    switch (sizeof(voidpf)) {
+    switch ((int)(sizeof(voidpf))) {
     case 2:     break;
     case 4:     flags += 1 << 4;        break;
     case 8:     flags += 2 << 4;        break;
     default:    flags += 3 << 4;
     }
-    switch (sizeof(z_off_t)) {
+    switch ((int)(sizeof(z_off_t))) {
     case 2:     break;
     case 4:     flags += 1 << 6;        break;
     case 8:     flags += 2 << 6;        break;
     default:    flags += 3 << 6;
     }
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     flags += 1 << 8;
 #endif
+    /*
 #if defined(ASMV) || defined(ASMINF)
     flags += 1 << 9;
 #endif
+     */
 #ifdef ZLIB_WINAPI
     flags += 1 << 10;
 #endif
@@ -85,43 +85,41 @@ uLong ZEXPORT zlibCompileFlags()
 #ifdef FASTEST
     flags += 1L << 21;
 #endif
-#ifdef STDC
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
 #  ifdef NO_vsnprintf
-        flags += 1L << 25;
+    flags += 1L << 25;
 #    ifdef HAS_vsprintf_void
-        flags += 1L << 26;
+    flags += 1L << 26;
 #    endif
 #  else
 #    ifdef HAS_vsnprintf_void
-        flags += 1L << 26;
+    flags += 1L << 26;
 #    endif
 #  endif
 #else
-        flags += 1L << 24;
+    flags += 1L << 24;
 #  ifdef NO_snprintf
-        flags += 1L << 25;
+    flags += 1L << 25;
 #    ifdef HAS_sprintf_void
-        flags += 1L << 26;
+    flags += 1L << 26;
 #    endif
 #  else
 #    ifdef HAS_snprintf_void
-        flags += 1L << 26;
+    flags += 1L << 26;
 #    endif
 #  endif
 #endif
     return flags;
 }
 
-#ifdef DEBUG
-
+#ifdef ZLIB_DEBUG
+#include <stdlib.h>
 #  ifndef verbose
 #    define verbose 0
 #  endif
-int z_verbose = verbose;
+int ZLIB_INTERNAL z_verbose = verbose;
 
-void z_error (m)
-    char *m;
-{
+void ZLIB_INTERNAL z_error(char *m) {
     fprintf(stderr, "%s\n", m);
     exit(1);
 }
@@ -130,14 +128,12 @@ void z_error (m)
 /* exported to allow conversion of error code to string for compress() and
  * uncompress()
  */
-const char * ZEXPORT zError(err)
-    int err;
-{
+const char * ZEXPORT zError(int err) {
     return ERR_MSG(err);
 }
 
-#if defined(_WIN32_WCE)
-    /* The Microsoft C Run-Time Library for Windows CE doesn't have
+#if defined(_WIN32_WCE) && _WIN32_WCE < 0x800
+    /* The older Microsoft C Run-Time Library for Windows CE doesn't have
      * errno.  We define it as a global variable to simplify porting.
      * Its value is always 0 and should not be used.
      */
@@ -146,22 +142,14 @@ const char * ZEXPORT zError(err)
 
 #ifndef HAVE_MEMCPY
 
-void zmemcpy(dest, source, len)
-    Bytef* dest;
-    const Bytef* source;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len) {
     if (len == 0) return;
     do {
         *dest++ = *source++; /* ??? to be unrolled */
     } while (--len != 0);
 }
 
-int zmemcmp(s1, s2, len)
-    const Bytef* s1;
-    const Bytef* s2;
-    uInt  len;
-{
+int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len) {
     uInt j;
 
     for (j = 0; j < len; j++) {
@@ -170,10 +158,7 @@ int zmemcmp(s1, s2, len)
     return 0;
 }
 
-void zmemzero(dest, len)
-    Bytef* dest;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len) {
     if (len == 0) return;
     do {
         *dest++ = 0;  /* ??? to be unrolled */
@@ -181,6 +166,7 @@ void zmemzero(dest, len)
 }
 #endif
 
+#ifndef Z_SOLO
 
 #ifdef SYS16BIT
 
@@ -213,11 +199,12 @@ local ptr_table table[MAX_PTR];
  * a protected system like OS/2. Use Microsoft C instead.
  */
 
-voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-{
-    voidpf buf = opaque; /* just to make some compilers happy */
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
+    voidpf buf;
     ulg bsize = (ulg)items*size;
 
+    (void)opaque;
+
     /* If we allocate less than 65520 bytes, we assume that farmalloc
      * will return a usable pointer which doesn't have to be normalized.
      */
@@ -237,9 +224,11 @@ voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
     return buf;
 }
 
-void  zcfree (voidpf opaque, voidpf ptr)
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
     int n;
+
+    (void)opaque;
+
     if (*(ush*)&ptr != 0) { /* object < 64K */
         farfree(ptr);
         return;
@@ -255,7 +244,6 @@ void  zcfree (voidpf opaque, voidpf ptr)
         next_ptr--;
         return;
     }
-    ptr = opaque; /* just to make some compilers happy */
     Assert(0, "zcfree: ptr not found");
 }
 
@@ -272,15 +260,13 @@ void  zcfree (voidpf opaque, voidpf ptr)
 #  define _hfree   hfree
 #endif
 
-voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
-{
-    if (opaque) opaque = 0; /* to make compiler happy */
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size) {
+    (void)opaque;
     return _halloc((long)items, size);
 }
 
-void  zcfree (voidpf opaque, voidpf ptr)
-{
-    if (opaque) opaque = 0; /* to make compiler happy */
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
+    (void)opaque;
     _hfree(ptr);
 }
 
@@ -292,27 +278,22 @@ void  zcfree (voidpf opaque, voidpf ptr)
 #ifndef MY_ZCALLOC /* Any system without a special alloc function */
 
 #ifndef STDC
-extern voidp  malloc OF((uInt size));
-extern voidp  calloc OF((uInt items, uInt size));
-extern void   free   OF((voidpf ptr));
+extern voidp malloc(uInt size);
+extern voidp calloc(uInt items, uInt size);
+extern void free(voidpf ptr);
 #endif
 
-voidpf zcalloc (opaque, items, size)
-    voidpf opaque;
-    unsigned items;
-    unsigned size;
-{
-    if (opaque) items += size - size; /* make compiler happy */
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
+    (void)opaque;
     return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
                               (voidpf)calloc(items, size);
 }
 
-void  zcfree (opaque, ptr)
-    voidpf opaque;
-    voidpf ptr;
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
+    (void)opaque;
     free(ptr);
-    if (opaque) return; /* make compiler happy */
 }
 
 #endif /* MY_ZCALLOC */
+
+#endif /* !Z_SOLO */
diff --git a/reg-io/zlib/zutil.h b/reg-io/zlib/zutil.h
index dea52429..48dd7feb 100644
--- a/reg-io/zlib/zutil.h
+++ b/reg-io/zlib/zutil.h
@@ -1,5 +1,5 @@
 /* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -13,36 +13,28 @@
 #ifndef ZUTIL_H
 #define ZUTIL_H
 
-#define ZLIB_INTERNAL
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
 #include "zlib.h"
 
-#ifdef STDC
-#  ifndef _WIN32_WCE
+#if defined(STDC) && !defined(Z_SOLO)
+#  if !(defined(_WIN32_WCE) && defined(_MSC_VER))
 #    include <stddef.h>
 #  endif
 #  include <string.h>
 #  include <stdlib.h>
 #endif
-#ifdef NO_ERRNO_H
-#   ifdef _WIN32_WCE
-/* The Microsoft C Run-Time Library for Windows CE doesn't have
- * errno.  We define it as a global variable to simplify porting.
- * Its value is always 0 and should not be used.  We rename it to
- * avoid conflict with other libraries that use the same workaround.
- */
-#     define errno z_errno
-#   endif
-extern int errno;
-#else
-#  ifndef _WIN32_WCE
-#    include <errno.h>
-#  endif
-#endif
 
 #ifndef local
 #  define local static
 #endif
-/* compile with -Dlocal if your debugger can't find static symbols */
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
 
 typedef unsigned char  uch;
 typedef uch FAR uchf;
@@ -50,16 +42,27 @@ typedef unsigned short ush;
 typedef ush FAR ushf;
 typedef unsigned long  ulg;
 
-extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (ULONG_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned long
+#  elif (ULLONG_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned long long
+#  elif (UINT_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned
+#  endif
+#endif
+
+extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* (size given to avoid silly warnings with Visual C++) */
 
-#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+#define ERR_MSG(err) z_errmsg[(err) < -6 || (err) > 2 ? 9 : 2 - (err)]
 
 #define ERR_RETURN(strm,err) \
-  return (strm->msg = (char*)ERR_MSG(err), (err))
+  return (strm->msg = ERR_MSG(err), (err))
 /* To be used only when the state is known to be valid */
 
-/* common constants */
+        /* common constants */
 
 #ifndef DEF_WBITS
 #  define DEF_WBITS MAX_WBITS
@@ -84,138 +87,114 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 
 #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
 
-/* target dependencies */
+        /* target dependencies */
 
 #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
 #  define OS_CODE  0x00
-#  if defined(__TURBOC__) || defined(__BORLANDC__)
-#    if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
-/* Allow compilation with ANSI keywords only enabled */
-void _Cdecl farfree( void *block );
-void *_Cdecl farmalloc( unsigned long nbytes );
-#    else
-#      include <alloc.h>
+#  ifndef Z_SOLO
+#    if defined(__TURBOC__) || defined(__BORLANDC__)
+#      if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
+         /* Allow compilation with ANSI keywords only enabled */
+         void _Cdecl farfree( void *block );
+         void *_Cdecl farmalloc( unsigned long nbytes );
+#      else
+#        include <alloc.h>
+#      endif
+#    else /* MSC or DJGPP */
+#      include <malloc.h>
 #    endif
-#  else /* MSC or DJGPP */
-#    include <malloc.h>
 #  endif
 #endif
 
 #ifdef AMIGA
-#  define OS_CODE  0x01
+#  define OS_CODE  1
 #endif
 
 #if defined(VAXC) || defined(VMS)
-#  define OS_CODE  0x02
+#  define OS_CODE  2
 #  define F_OPEN(name, mode) \
      fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
 #endif
 
+#ifdef __370__
+#  if __TARGET_LIB__ < 0x20000000
+#    define OS_CODE 4
+#  elif __TARGET_LIB__ < 0x40000000
+#    define OS_CODE 11
+#  else
+#    define OS_CODE 8
+#  endif
+#endif
+
 #if defined(ATARI) || defined(atarist)
-#  define OS_CODE  0x05
+#  define OS_CODE  5
 #endif
 
 #ifdef OS2
-#  define OS_CODE  0x06
-#  ifdef M_I86
-#include <malloc.h>
+#  define OS_CODE  6
+#  if defined(M_I86) && !defined(Z_SOLO)
+#    include <malloc.h>
 #  endif
 #endif
 
-#if defined(MACOS) || defined(TARGET_OS_MAC)
-#  define OS_CODE  0x07
-#  if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
-#    include <unix.h> /* for fdopen */
-#  else
-#    ifndef fdopen
-#      define fdopen(fd,mode) NULL /* No fdopen() */
-#    endif
-#  endif
+#if defined(MACOS)
+#  define OS_CODE  7
 #endif
 
-#ifdef TOPS20
-#  define OS_CODE  0x0a
+#ifdef __acorn
+#  define OS_CODE 13
 #endif
 
-#ifdef WIN32
-#  ifndef __CYGWIN__  /* Cygwin is Unix, not Win32 */
-#    define OS_CODE  0x0b
-#  endif
+#if defined(WIN32) && !defined(__CYGWIN__)
+#  define OS_CODE  10
 #endif
 
-#ifdef __50SERIES /* Prime/PRIMOS */
-#  define OS_CODE  0x0f
+#ifdef _BEOS_
+#  define OS_CODE  16
 #endif
 
-#if defined(_BEOS_) || defined(RISCOS)
-#  define fdopen(fd,mode) NULL /* No fdopen() */
+#ifdef __TOS_OS400__
+#  define OS_CODE 18
 #endif
 
-#if (defined(_MSC_VER) && (_MSC_VER > 600))
-#  if defined(_WIN32_WCE)
-#    define fdopen(fd,mode) NULL /* No fdopen() */
-#    ifndef _PTRDIFF_T_DEFINED
-typedef int ptrdiff_t;
-#      define _PTRDIFF_T_DEFINED
-#    endif
-#  else
-#    define fdopen(fd,type)  _fdopen(fd,type)
-#  endif
+#ifdef __APPLE__
+#  define OS_CODE 19
+#endif
+
+#if defined(__BORLANDC__) && !defined(MSDOS)
+  #pragma warn -8004
+  #pragma warn -8008
+  #pragma warn -8066
 #endif
 
-/* common defaults */
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_WIN32) && \
+    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
+    ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
+#endif
+
+        /* common defaults */
 
 #ifndef OS_CODE
-#  define OS_CODE  0x03  /* assume Unix */
+#  define OS_CODE  3     /* assume Unix */
 #endif
 
 #ifndef F_OPEN
 #  define F_OPEN(name, mode) fopen((name), (mode))
 #endif
 
-/* functions */
+         /* functions */
 
-#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
-#  ifndef HAVE_VSNPRINTF
-#    define HAVE_VSNPRINTF
-#  endif
-#endif
-#if defined(__CYGWIN__)
-#  ifndef HAVE_VSNPRINTF
-#    define HAVE_VSNPRINTF
-#  endif
-#endif
-#ifndef HAVE_VSNPRINTF
-#  ifdef MSDOS
-/* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
-   but for now we just assume it doesn't. */
-#    define NO_vsnprintf
-#  endif
-#  ifdef __TURBOC__
-#    define NO_vsnprintf
-#  endif
-#  ifdef WIN32
-/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
-#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
-#      define vsnprintf _vsnprintf
-#    endif
-#  endif
-#  ifdef __SASC
-#    define NO_vsnprintf
-#  endif
-#endif
-#ifdef VMS
-#  define NO_vsnprintf
-#endif
-
-#if defined(pyr)
+#if defined(pyr) || defined(Z_SOLO)
 #  define NO_MEMCPY
 #endif
 #if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
-/* Use our own functions for small and medium model with MSC <= 5.0.
- * You may have to use the same strategy for Borland C (untested).
- * The __SC__ check is for Symantec.
- */
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+  * You may have to use the same strategy for Borland C (untested).
+  * The __SC__ check is for Symantec.
+  */
 #  define NO_MEMCPY
 #endif
 #if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
@@ -232,16 +211,16 @@ typedef int ptrdiff_t;
 #    define zmemzero(dest, len) memset(dest, 0, len)
 #  endif
 #else
-extern void zmemcpy  OF((Bytef* dest, const Bytef* source, uInt len));
-extern int  zmemcmp  OF((const Bytef* s1, const Bytef* s2, uInt len));
-extern void zmemzero OF((Bytef* dest, uInt len));
+   void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len);
+   int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len);
+   void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len);
 #endif
 
 /* Diagnostic functions */
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 #  include <stdio.h>
-extern int z_verbose;
-extern void z_error    OF((char *m));
+   extern int ZLIB_INTERNAL z_verbose;
+   extern void ZLIB_INTERNAL z_error(char *m);
 #  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
 #  define Trace(x) {if (z_verbose>=0) fprintf x ;}
 #  define Tracev(x) {if (z_verbose>0) fprintf x ;}
@@ -257,13 +236,19 @@ extern void z_error    OF((char *m));
 #  define Tracecv(c,x)
 #endif
 
-
-voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
-void   zcfree  OF((voidpf opaque, voidpf ptr));
+#ifndef Z_SOLO
+   voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items,
+                                unsigned size);
+   void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr);
+#endif
 
 #define ZALLOC(strm, items, size) \
            (*((strm)->zalloc))((strm)->opaque, (items), (size))
 #define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
 #define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
 
+/* Reverse the bytes in a 32-bit value */
+#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+                    (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
 #endif /* ZUTIL_H */

From 193ef44e5be9fc6252f6bdfa934bad74e63ece21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 16 Feb 2024 17:39:59 +0000
Subject: [PATCH 288/314] Upgrade RNifti and dependencies

---
 niftyreg_build_version.txt          |   2 +-
 reg-io/RNifti.h                     |   2 +-
 reg-io/RNifti/NiftiImage.h          | 114 +++++--
 reg-io/RNifti/NiftiImage_impl.h     |  76 +++--
 reg-io/RNifti/NiftiImage_print.h    |   6 +-
 reg-io/niftilib/nifti1.h            |   2 +-
 reg-io/niftilib/nifti1_io.c         | 407 ++++++++++++++++---------
 reg-io/niftilib/nifti1_io.h         |   8 +-
 reg-io/niftilib/nifti1_io_version.h |  16 +
 reg-io/niftilib/nifti2_io.c         | 442 +++++++++++++++++-----------
 reg-io/niftilib/nifti2_io.h         |  17 +-
 reg-io/niftilib/nifti2_io_version.h |  16 +
 reg-io/znzlib/znzlib.c              |  12 +-
 reg-io/znzlib/znzlib.h              |  17 +-
 14 files changed, 750 insertions(+), 387 deletions(-)
 create mode 100644 reg-io/niftilib/nifti1_io_version.h
 create mode 100644 reg-io/niftilib/nifti2_io_version.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c8f0fcc6..74fa38c9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-406
+407
diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h
index 121053e5..16ca0b76 100644
--- a/reg-io/RNifti.h
+++ b/reg-io/RNifti.h
@@ -7,7 +7,7 @@
 
 // Defined since RNifti v0.10.0, and equal to 100 * (major version) + (minor version). May not
 // change if the API does not change, and in particular never changes with patch level
-#define RNIFTI_VERSION 104
+#define RNIFTI_VERSION 106
 
 // Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some
 // work to get them to play nicely:
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index b03f5837..26cffe98 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -134,7 +134,7 @@ class NiftiImageData
         double getDouble (void *ptr) const { return static_cast<double>(getNative(ptr).real()); }
         int getInt (void *ptr) const { return static_cast<int>(getNative(ptr).real()); }
         void setComplex (void *ptr, const complex128_t value) const { setNative(ptr, std::complex<ElementType>(value)); }
-        void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex<ElementType>(static_cast<ElementType>(value), 0.0)); }
+        void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex<ElementType>(value, 0.0)); }
         void setInt (void *ptr, const int value) const { setNative(ptr, std::complex<ElementType>(static_cast<ElementType>(value), 0.0)); }
         void minmax (void *ptr, const size_t length, double *min, double *max) const;
     };
@@ -329,7 +329,9 @@ class NiftiImageData
         operator Rcomplex() const
         {
             const complex128_t value = parent.handler->getComplex(ptr);
-            Rcomplex rValue = { value.real(), value.imag() };
+            Rcomplex rValue;
+            rValue.r = value.real();
+            rValue.i = value.imag();
             if (parent.isScaled())
             {
                 rValue.r = rValue.r * parent.slope + parent.intercept;
@@ -351,7 +353,9 @@ class NiftiImageData
     class Iterator
     {
     private:
-        const NiftiImageData &parent;
+        // NB: "parent" cannot be a reference because reference members are immutable. That renders
+        // the class non-copy-assignable, which is a requirement for iterators (issue #31)
+        const NiftiImageData *parent;
         void *ptr;
         size_t step;
 
@@ -365,16 +369,17 @@ class NiftiImageData
 
         /**
          * Primary constructor
-         * @param parent A reference to the parent object
-         * @param ptr An opaque pointer to the memory underpinning the iterator
+         * @param parent A pointer to the parent object
+         * @param ptr An opaque pointer to the memory underpinning the iterator. The default,
+         *   \c nullptr, corresponds to the start of the parent object's data blob.
          * @param step The increment between elements within the blob, in bytes. If zero, the
          *   default, the width associated with the stored datatype will be used.
         **/
-        Iterator (const NiftiImageData &parent, void *ptr = nullptr, const size_t step = 0)
+        Iterator (const NiftiImageData *parent = nullptr, void *ptr = nullptr, const size_t step = 0)
             : parent(parent)
         {
-            this->ptr = (ptr == nullptr ? parent.dataPtr : ptr);
-            this->step = (step == 0 ? parent.handler->size() : step);
+            this->ptr = (ptr == nullptr ? parent->dataPtr : ptr);
+            this->step = (step == 0 ? parent->handler->size() : step);
         }
 
         /**
@@ -387,7 +392,7 @@ class NiftiImageData
         /**
          * Reset the iterator to point to the start of the data blob
         **/
-        void reset () { ptr = parent.dataPtr; }
+        void reset () { ptr = parent->dataPtr; }
 
         Iterator & operator++ () { ptr = static_cast<char*>(ptr) + step; return *this; }
         Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast<char*>(ptr) + step; return copy; }
@@ -415,10 +420,10 @@ class NiftiImageData
         bool operator> (const Iterator &other) const { return (ptr > other.ptr); }
         bool operator< (const Iterator &other) const { return (ptr < other.ptr); }
 
-        const Element operator* () const { return Element(parent, ptr); }
-        Element operator* () { return Element(parent, ptr); }
-        const Element operator[] (const size_t i) const { return Element(parent, static_cast<char*>(ptr) + (i * step)); }
-        Element operator[] (const size_t i) { return Element(parent, static_cast<char*>(ptr) + (i * step)); }
+        const Element operator* () const { return Element(*parent, ptr); }
+        Element operator* () { return Element(*parent, ptr); }
+        const Element operator[] (const size_t i) const { return Element(*parent, static_cast<char*>(ptr) + (i * step)); }
+        Element operator[] (const size_t i) { return Element(*parent, static_cast<char*>(ptr) + (i * step)); }
     };
 
     /**
@@ -479,8 +484,7 @@ class NiftiImageData
         else
         {
             calibrateFrom(source);
-            for (size_t i = 0; i < source.length(); ++i)
-                (*this)[i] = source[i];
+            std::copy(source.begin(), source.end(), this->begin());
         }
     }
 
@@ -591,16 +595,16 @@ class NiftiImageData
     NiftiImageData & disown ()       { this->owner = false; return *this; }
 
     /** Obtain a constant iterator corresponding to the start of the blob */
-    const Iterator begin () const { return Iterator(*this); }
+    const Iterator begin () const { return Iterator(this); }
 
     /** Obtain a constant iterator corresponding to the end of the blob */
-    const Iterator end () const { return Iterator(*this, static_cast<char*>(dataPtr) + totalBytes()); }
+    const Iterator end () const { return Iterator(this, static_cast<char*>(dataPtr) + totalBytes()); }
 
     /** Obtain a mutable iterator corresponding to the start of the blob */
-    Iterator begin () { return Iterator(*this); }
+    Iterator begin () { return Iterator(this); }
 
     /** Obtain a mutable iterator corresponding to the end of the blob */
-    Iterator end () { return Iterator(*this, static_cast<char*>(dataPtr) + totalBytes()); }
+    Iterator end () { return Iterator(this, static_cast<char*>(dataPtr) + totalBytes()); }
 
     /**
      * Indexing operator, returning a constant element
@@ -1305,6 +1309,16 @@ class NiftiImage
     **/
     void acquire (nifti_image * const image);
 
+    /**
+     * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count
+     * @param source A reference to a \c NiftiImage
+    **/
+    void acquire (const NiftiImage &source)
+    {
+        refCount = source.refCount;
+        acquire(source.image);
+    }
+
     /**
      * Release the currently wrapped pointer, if it is not \c nullptr, decrementing the reference
      * count and releasing memory if there are no remaining references to the pointer
@@ -1318,6 +1332,12 @@ class NiftiImage
     **/
     void copy (const nifti_image *source, const Copy copy);
 
+    /**
+     * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer
+     * @param source A reference to a \c NiftiImage
+    **/
+    void copy (const NiftiImage &source);
+
     /**
      * Copy the contents of a \ref Block to create a new image, acquiring a new pointer
      * @param source A reference to a \ref Block
@@ -1408,8 +1428,7 @@ class NiftiImage
         if (copy != Copy::None) {
             this->copy(source, copy);
         } else {
-            refCount = source.refCount;
-            acquire(source.image);
+            acquire(source);
         }
         RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
@@ -1451,6 +1470,34 @@ class NiftiImage
         RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from pointer)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
+    /**
+     * Initialise using a NIfTI-1 header
+     * @param header A reference to a NIfTI-1 header struct
+    **/
+    NiftiImage (const nifti_1_header &header)
+        : NiftiImage()
+    {
+#if RNIFTI_NIFTILIB_VERSION == 1
+        acquire(nifti_convert_nhdr2nim(header, nullptr));
+#elif RNIFTI_NIFTILIB_VERSION == 2
+        acquire(nifti_convert_n1hdr2nim(header, nullptr));
+#endif
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from header)", RNIFTI_NIFTILIB_VERSION, this->image);
+    }
+
+#if RNIFTI_NIFTILIB_VERSION == 2
+    /**
+     * Initialise using a NIfTI-2 header
+     * @param header A reference to a NIfTI-2 header struct
+    **/
+    NiftiImage (const nifti_2_header &header)
+        : NiftiImage()
+    {
+        acquire(nifti_convert_n2hdr2nim(header, nullptr));
+        RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from header)", RNIFTI_NIFTILIB_VERSION, this->image);
+    }
+#endif
+
     /**
      * Initialise from basic metadata, allocating and zeroing pixel data
      * @param dim A vector of image dimensions
@@ -2021,11 +2068,12 @@ class NiftiImage
      * @param dimCount Number of dimensions to consider
      * @return The number of voxels in the image
      */
-    static size_t calcVoxelNumber(const nifti_image *image, const int dimCount) {
+    static size_t calcVoxelNumber (const nifti_image *image, const int dimCount) {
         if (image == nullptr)
             return 0;
         size_t voxelNumber = 1;
-        for (int i = 1; i <= dimCount; i++) {
+        for (int i = 1; i <= dimCount; i++)
+        {
             const size_t dim = static_cast<size_t>(std::abs(image->dim[i]));
             voxelNumber *= dim > 0 ? dim : 1;
         }
@@ -2035,7 +2083,7 @@ class NiftiImage
     /**
      * Recalculate the number of voxels in the image and update the nvox field
     */
-    void recalcVoxelNumber() {
+    void recalcVoxelNumber () {
         if (image != nullptr)
             image->nvox = calcVoxelNumber(image, image->ndim);
     }
@@ -2061,7 +2109,7 @@ class NiftiImage
     /**
      * Return the total size of the image data in bytes
     */
-    size_t totalBytes() const
+    size_t totalBytes () const
     {
 #if RNIFTI_NIFTILIB_VERSION == 1
         return nifti_get_volsize(image);
@@ -2120,7 +2168,7 @@ class NiftiImage
      * @param A list of \ref Extension objects
      * @return Self, with the new extensions attached
     **/
-    NiftiImage & replaceExtensions (const std::list<Extension> extensions)
+    NiftiImage & replaceExtensions (const std::list<Extension> &extensions)
     {
         dropExtensions();
         for (std::list<Extension>::const_iterator it=extensions.begin(); it!=extensions.end(); ++it)
@@ -2147,7 +2195,7 @@ class NiftiImage
      * Set the intent name of the image
      * @param name A string giving the new intent name
     **/
-    void setIntentName(const std::string& name) {
+    void setIntentName (const std::string &name) {
         if (image != nullptr)
         {
             constexpr size_t intentNameLength = sizeof(image->intent_name) / sizeof(*image->intent_name);
@@ -2162,9 +2210,11 @@ class NiftiImage
      * @param datatype The datatype to use when writing the file
      * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case
      * the file name is used to determine the file type
+     * @param compression The \c zlib compression level to use, if appropriate. Valid values are
+      * between 0 and 9
      * @return A pair of strings, giving the final header and image paths in that order
     **/
-    std::pair<std::string,std::string> toFile (const std::string fileName, const int datatype = DT_NONE, const int filetype = -1) const;
+    std::pair<std::string,std::string> toFile (const std::string &fileName, const int datatype = DT_NONE, const int filetype = -1, const int compression = 6) const;
 
     /**
      * Write the image to a NIfTI-1 file
@@ -2172,9 +2222,11 @@ class NiftiImage
      * @param datatype The datatype to use when writing the file, or "auto"
      * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case
      * the file name is used to determine the file type
+     * @param compression The \c zlib compression level to use, if appropriate. Valid values are
+     * between 0 and 9
      * @return A pair of strings, giving the final header and image paths in that order
     **/
-    std::pair<std::string,std::string> toFile (const std::string fileName, const std::string &datatype, const int filetype = -1) const;
+    std::pair<std::string,std::string> toFile (const std::string &fileName, const std::string &datatype, const int filetype = -1, const int compression = 6) const;
 
 #ifdef USING_R
 
@@ -2189,7 +2241,7 @@ class NiftiImage
      * @param label A string labelling the image
      * @return An R character string with additional attributes
     **/
-    Rcpp::RObject toPointer (const std::string label) const;
+    Rcpp::RObject toPointer (const std::string &label) const;
 
     /**
      * A conditional method that calls either \ref toArray or \ref toPointer
@@ -2197,7 +2249,7 @@ class NiftiImage
      * @param label A string labelling the image
      * @return An R object
     **/
-    Rcpp::RObject toArrayOrPointer (const bool internal, const std::string label) const;
+    Rcpp::RObject toArrayOrPointer (const bool internal, const std::string &label) const;
 
 #endif
 
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 6ae2866c..bf4b359b 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -75,7 +75,9 @@ inline int stringToDatatype (const std::string &datatype)
         datatypeCodes["uint32"] = DT_UINT32;
         datatypeCodes["int64"] = DT_INT64;
         datatypeCodes["uint64"] = DT_UINT64;
+        datatypeCodes["cfloat"] = DT_COMPLEX64;
         datatypeCodes["complex64"] = DT_COMPLEX64;
+        datatypeCodes["cdouble"] = DT_COMPLEX128;
         datatypeCodes["complex128"] = DT_COMPLEX128;
         datatypeCodes["complex"] = DT_COMPLEX128;
         datatypeCodes["rgb24"] = DT_RGB24;
@@ -91,9 +93,7 @@ inline int stringToDatatype (const std::string &datatype)
 
     if (datatypeCodes.count(lowerCaseDatatype) == 0)
     {
-        std::ostringstream message;
-        message << "Datatype \"" << datatype << "\" is not valid";
-        Rf_warning(message.str().c_str());
+        Rf_warning("Datatype \"%s\" is not valid", datatype.c_str());
         return DT_NONE;
     }
     else
@@ -233,16 +233,10 @@ inline void copyIfPresent (const Rcpp::List &list, const std::set<std::string> n
         const Rcpp::RObject object = list[name];
         const int length = Rf_length(object);
         if (length == 0)
-        {
-            std::ostringstream message;
-            message << "Field \"" << name << "\" is empty and will be ignored";
-            Rf_warning(message.str().c_str());
-        }
+            Rf_warning("Field \"%s\" is empty and will be ignored", name.c_str());
         else if (length > 1)
         {
-            std::ostringstream message;
-            message << "Field \"" << name << "\" has " << length << "elements, but only the first will be used";
-            Rf_warning(message.str().c_str());
+            Rf_warning("Field \"%s\" has %d elements, but only the first will be used", name.c_str(), length);
             target = Rcpp::as< std::vector<TargetType> >(object)[0];
         }
         else
@@ -624,7 +618,7 @@ inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const
 #elif RNIFTI_NIFTILIB_VERSION == 2
     nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr);
 #endif
-    q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]);
+    q[0] = 1.0 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]);
     return q;
 }
 
@@ -788,6 +782,13 @@ inline void NiftiImage::copy (const nifti_image *source, const Copy copy)
     }
 }
 
+inline void NiftiImage::copy (const NiftiImage &source)
+{
+    const nifti_image *sourceStruct = source;
+
+    copy(sourceStruct, Copy::Image);
+}
+
 inline void NiftiImage::copy (const Block &source)
 {
     const nifti_image *sourceStruct = source.image;
@@ -942,7 +943,12 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
         data = call.eval();
     }
 
-    const int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type()));
+    int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type()));
+    if (data.inherits("rgbArray"))
+    {
+        const int channels = (data.hasAttribute("channels") ? data.attr("channels") : 3);
+        datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24);
+    }
 
     dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
     const std::vector<dim_t> dimVector = mriImage.field("imageDims");
@@ -975,8 +981,15 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo
         // NB: nifti_get_volsize() will not be right here if there were tags
         const size_t dataSize = nVoxels * image->nbyper;
         this->image->data = calloc(1, dataSize);
-        if (datatype == DT_INT32)
+        if (datatype == DT_INT32 || datatype == DT_RGBA32)
             memcpy(this->image->data, INTEGER(data), dataSize);
+        else if (datatype == DT_RGB24)
+        {
+            NiftiImageData newData(image);
+            std::copy(INTEGER(data), INTEGER(data)+nVoxels, newData.begin());
+        }
+        else if (datatype == DT_COMPLEX128)
+            memcpy(this->image->data, COMPLEX(data), dataSize);
         else
             memcpy(this->image->data, REAL(data), dataSize);
     }
@@ -1273,7 +1286,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector<dim_t>
     nifti_brick_list brickList;
 
 #if RNIFTI_NIFTILIB_VERSION == 1
-    acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast<int>(volumes.size()), &volumes.front(), &brickList));
+    acquire(nifti_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList));
 
     if (image == nullptr)
         throw std::runtime_error("Failed to read image from path " + path);
@@ -1311,7 +1324,7 @@ inline void NiftiImage::updatePixDim (const std::vector<pixdim_t> &pixDims)
     for (int i=1; i<8; i++)
         image->pixdim[i] = 0.0;
 
-    const int pixdimLength = static_cast<int>(pixDims.size());
+    const int pixdimLength = pixDims.size();
     for (int i=0; i<std::min(pixdimLength,nDims); i++)
         image->pixdim[i+1] = pixDims[i];
 
@@ -1465,7 +1478,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons
         for (int j=0; j<3; j++)
             result(i,j) = nativeMat(i,0) * transform(0,j) + nativeMat(i,1) * transform(1,j) + nativeMat(i,2) * transform(2,j);
 
-        result(3,i) = (i == 3 ? 1.f : 0.f);
+        result(3,i) = (i == 3 ? 1.0 : 0.0);
     }
 
     // Extract the mapping between dimensions and the signs
@@ -1497,7 +1510,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons
 
         // Flip and/or permute the origin
         if (signs[j] < 0)
-            offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1;
+            offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1.0;
         else
             offset[j] = origin[locs[j]];
     }
@@ -1565,7 +1578,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons
             for (size_t i=0; i<supervolSize; i++, ++it)
             {
                 for (int j=0; j<3; j++)
-                    oldVec[j] = *(it + j*supervolSize);
+                    oldVec[j] = double(*(it + j*supervolSize));
                 const Xform::Vector3 newVec = transform * oldVec;
                 for (int j=0; j<3; j++)
                     *(it + j*supervolSize) = newVec[j];
@@ -1746,7 +1759,7 @@ inline const NiftiImage::Xform NiftiImage::xform (const bool preferQuaternion) c
         // No qform or sform so use pixdim (NB: other software may assume differently)
         Xform::Matrix matrix;
         for (int i=0; i<3; i++)
-            matrix(i,i) = (image->pixdim[i+1]==0 ? 1 : image->pixdim[i+1]);
+            matrix(i,i) = (image->pixdim[i+1]==0.0 ? 1.0 : image->pixdim[i+1]);
         matrix(3,3) = 1.0;
         return Xform(matrix);
     }
@@ -1874,36 +1887,43 @@ inline NiftiImage & NiftiImage::copyData (const nifti_image *other)
     return *this;
 }
 
-inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const
+inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string &fileName, const int datatype, const int filetype, const int compression) const
 {
     const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype);
 
     // Copy the source image only if the datatype will be changed
-    NiftiImage imageToWrite(*this, Copy(changingDatatype));
+    NiftiImage imageToWrite(*this, changingDatatype ? Copy::Image : Copy::None);
 
     if (changingDatatype)
         imageToWrite.changeDatatype(datatype, true);
     if (filetype >= 0 && filetype <= NIFTI_MAX_FTYPE)
         imageToWrite->nifti_type = filetype;
 
+    const char *path = internal::stringToPath(fileName);
+
+    // If we're writing a gzipped file (only), append a compression level to the mode string
+    std::string mode = "wb";
+    if (nifti_is_gzfile(path) && compression >= 0 && compression <= 9)
+        mode += std::to_string(compression);
+
 #if RNIFTI_NIFTILIB_VERSION == 1
-    const int status = nifti_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true);
+    const int status = nifti_set_filenames(imageToWrite, path, false, true);
     if (status != 0)
         throw std::runtime_error("Failed to set filenames for NIfTI object");
-    nifti_image_write(imageToWrite);
+    nifti_image_write_hdr_img(imageToWrite, 1, mode.c_str());
 #elif RNIFTI_NIFTILIB_VERSION == 2
-    const int status = nifti2_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true);
+    const int status = nifti2_set_filenames(imageToWrite, path, false, true);
     if (status != 0)
         throw std::runtime_error("Failed to set filenames for NIfTI object");
-    nifti2_image_write(imageToWrite);
+    nifti2_image_write_hdr_img(imageToWrite, 1, mode.c_str());
 #endif
 
     return std::pair<std::string,std::string>(std::string(imageToWrite->fname), std::string(imageToWrite->iname));
 }
 
-inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string fileName, const std::string &datatype, const int filetype) const
+inline std::pair<std::string,std::string> NiftiImage::toFile (const std::string &fileName, const std::string &datatype, const int filetype, const int compression) const
 {
-    return toFile(fileName, internal::stringToDatatype(datatype), filetype);
+    return toFile(fileName, internal::stringToDatatype(datatype), filetype, compression);
 }
 
 #ifdef USING_R
diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h
index 2390a2ee..c8370249 100644
--- a/reg-io/RNifti/NiftiImage_print.h
+++ b/reg-io/RNifti/NiftiImage_print.h
@@ -11,8 +11,8 @@
 #define Rc_printf Rprintf
 #define Rc_fprintf_stdout(...) Rprintf(__VA_ARGS__)
 #define Rc_fprintf_stderr(...) REprintf(__VA_ARGS__)
-#define Rc_fputs_stdout(str) Rprintf(str)
-#define Rc_fputs_stderr(str) REprintf(str)
+#define Rc_fputs_stdout(str) Rprintf("%s", str)
+#define Rc_fputs_stderr(str) REprintf("%s", str)
 #define Rc_fputc_stdout(ch) Rprintf("%c", ch)
 #define Rc_fputc_stderr(ch) REprintf("%c", ch)
 
@@ -27,7 +27,7 @@
 #define Rc_fputs_stderr(str) fputs(str, stderr)
 #define Rc_fputc_stdout(ch) fputc(ch, stdout)
 #define Rc_fputc_stderr(ch) fputc(ch, stderr)
-#define Rf_warning(str) fprintf(stderr, "%s\n", str)
+#define Rf_warning(...) fprintf(stderr, __VA_ARGS__)
 #define Rprintf(...) fprintf(stderr, __VA_ARGS__)
 
 #endif // USING_R
diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h
index 49e7602b..6a7498cf 100644
--- a/reg-io/niftilib/nifti1.h
+++ b/reg-io/niftilib/nifti1.h
@@ -872,7 +872,7 @@ typedef struct { unsigned char r,g,b; } rgb_byte ;
      as a displacement field or vector:
        - dataset must have a 5th dimension
        - intent_code must be NIFTI_INTENT_DISPVECT
-       - dim[5] must be the dimensionality of the displacment
+       - dim[5] must be the dimensionality of the displacement
          vector (e.g., 3 for spatial displacement, 2 for in-plane) */
 
 #define NIFTI_INTENT_DISPVECT  1006   /* specifically for displacements */
diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c
index d8bee4da..5237bb76 100644
--- a/reg-io/niftilib/nifti1_io.c
+++ b/reg-io/niftilib/nifti1_io.c
@@ -1,6 +1,10 @@
 #define NIFTI1_IO_C
 
 #include "niftilib/nifti1_io.h"   /* typedefs, prototypes, macros, etc. */
+#include "niftilib/nifti1_io_version.h"
+
+#include <errno.h>
+#include <limits.h>
 
 /*****===================================================================*****/
 /*****     Sample functions to deal with NIFTI-1 and ANALYZE files       *****/
@@ -41,7 +45,7 @@ static char const * const gni_history[] =
   "     (FMRIB Centre, University of Oxford, UK)\n"
   "   - Mainly adding low-level IO and changing things to allow gzipped\n"
   "     files to be read and written\n"
-  "   - Full backwards compatability should have been maintained\n"
+  "   - Full backwards compatibility should have been maintained\n"
   "\n",
   "0.2  16 Nov 2004 [rickr]\n"
   "     (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n"
@@ -264,7 +268,7 @@ static char const * const gni_history[] =
   "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n",
   "1.13  25 August 2005 [rickr]\n",
   "   - finished changes by Hans for Insight\n"
-  "   - added const in all appropraite parameter locations (30-40)\n"
+  "   - added const in all appropriate parameter locations (30-40)\n"
   "     (any pointer referencing data that will not change)\n"
   "   - shortened all string constants below 509 character limit\n"
   "1.14  28 October 2005 [HJohnson]\n",
@@ -340,9 +344,13 @@ static char const * const gni_history[] =
   "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n",
   "1.46 26 Sep 2019 [rickr]:\n"
   "   - nifti_read_ascii_image no longer closes fp or free's fname\n",
+  "2.1.0  18 Jun 2020 [leej3,hmjohnson,rickr]:\n"
+  "     - big version jump - changed to more formal library versioning\n",
+  "2.1.0.1 - non-release update - 16 Jun 2022 [rickr]:\n"
+  "        - add nifti_image_write_status\n",
   "----------------------------------------------------------------------\n"
 };
-static const char gni_version[] = "nifti library version 1.46 (26 Sep, 2019)";
+static const char gni_version[] = NIFTI1_IO_SOURCE_VERSION " (16 Jun, 2022)";
 
 /*! global nifti options structure - init with defaults */
 static nifti_global_options g_opts = {
@@ -443,12 +451,15 @@ static int   unescape_string   (char *str);  /* string utility functions */
 static char *escapize_string   (const char *str);
 
 /* internal I/O routines */
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+           const char *opts, znzFile *imgfile, const nifti_brick_list *NBL);
 static znzFile nifti_image_load_prep( nifti_image *nim );
 static int     has_ascii_header(znzFile fp);
 /*---------------------------------------------------------------------------*/
 
 
 /* for calling from some main program */
+
 /*----------------------------------------------------------------------*/
 /*! display the nifti library module history (via stdout)
 *//*--------------------------------------------------------------------*/
@@ -540,7 +551,7 @@ nifti_image *nifti_image_read_bricks(const char * hname, int nbricks,
 
    if( !hname || !NBL ){
       Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n",
-              hname, (void *)NBL);
+              (void *)hname, (void *)NBL);
       return NULL;
    }
 
@@ -769,7 +780,7 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks,
 
    if( rv != 0 ){
       nifti_free_NBL( NBL );  /* failure! */
-      NBL->nbricks = 0; /* repetative, but clear */
+      NBL->nbricks = 0; /* repetitive, but clear */
    }
 
    if( slist ){ free(slist); free(sindex); }
@@ -1409,8 +1420,6 @@ char const *nifti_orientation_string( int ii )
     \param nbyper   pointer to return value: number of bytes per voxel
     \param swapsize pointer to return value: size of swap blocks
 
-    \return appropriate values at nbyper and swapsize
-
     The swapsize is set to 0 if this datatype doesn't ever need swapping.
 
     \sa NIFTI1_DATATYPES in nifti1.h
@@ -1679,7 +1688,7 @@ mat44 nifti_mat44_inverse( mat44 R )
    v1  = R.m[0][3]; v2  = R.m[1][3]; v3  = R.m[2][3];  /* [  0   0   0   1 ] */
 
    deti = r11*r22*r33-r11*r32*r23-r21*r12*r33
-         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ;
+         +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; /* determinant */
 
    if( deti != 0.0l ) deti = 1.0l / deti ;
 
@@ -1687,19 +1696,19 @@ mat44 nifti_mat44_inverse( mat44 R )
    Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ;
    Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ;
    Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3
-                     -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ;
+                              -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ;
 
    Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ;
    Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ;
    Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ;
    Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3
-                     +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ;
+                              +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ;
 
    Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ;
    Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ;
    Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ;
    Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3
-                     -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ;
+                              -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ;
 
    Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ;
    Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */
@@ -1943,7 +1952,7 @@ mat33 nifti_mat33_polar( mat33 A )
 }
 
 /*---------------------------------------------------------------------------*/
-/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
 
    <pre>
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2100,6 +2109,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: i = NIFTI_A2P ; break ;
      case  3: i = NIFTI_I2S ; break ;
      case -3: i = NIFTI_S2I ; break ;
+     default: break ;
    }
 
    switch( jbest*qbest ){
@@ -2109,6 +2119,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: j = NIFTI_A2P ; break ;
      case  3: j = NIFTI_I2S ; break ;
      case -3: j = NIFTI_S2I ; break ;
+     default: break ;
    }
 
    switch( kbest*rbest ){
@@ -2118,9 +2129,11 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: k = NIFTI_A2P ; break ;
      case  3: k = NIFTI_I2S ; break ;
      case -3: k = NIFTI_S2I ; break ;
+     default: break ;
    }
 
-   *icod = i ; *jcod = j ; *kcod = k ; }
+   *icod = i ; *jcod = j ; *kcod = k ;
+}
 
 /*---------------------------------------------------------------------------*/
 /* Routines to swap byte arrays in various ways:
@@ -2134,8 +2147,8 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 /*! swap each byte pair from the given list of n pairs
  *
  *  Due to alignment of structures at some architectures (e.g. on ARM),
- *  stick to char varaibles.
- *  Fixes http://bugs.debian.org/446893   Yaroslav <debian@onerussian.com>
+ *  stick to char variables.
+ *  Fixes <http://bugs.debian.org/446893> Yaroslav <debian @ onerussian.com>
  *
 *//*--------------------------------------------------------------------*/
 void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
@@ -2149,7 +2162,7 @@ void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp1 += 2;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 4 bytes at a time from the given list of n sets of 4 bytes
@@ -2167,7 +2180,7 @@ void nifti_swap_4bytes( size_t n , void *ar )    /* 4 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp0 += 4;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 8 bytes at a time from the given list of n sets of 8 bytes
@@ -2189,7 +2202,7 @@ void nifti_swap_8bytes( size_t n , void *ar )    /* 8 bytes at a time */
        }
        cp0 += 8;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 16 bytes at a time from the given list of n sets of 16 bytes
@@ -2209,7 +2222,7 @@ void nifti_swap_16bytes( size_t n , void *ar )    /* 16 bytes at a time */
        }
        cp0 += 16;
    }
-   }
+}
 
 #if 0  /* not important: save for version update     6 Jul 2010 [rickr] */
 
@@ -2251,7 +2264,7 @@ void nifti_swap_Nbytes( size_t n , int siz , void *ar )  /* subsuming case */
         Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz);
         break ;
    }
-   }
+}
 
 
 /*-------------------------------------------------------------------------*/
@@ -2407,7 +2420,7 @@ void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti )
      nifti_swap_4bytes(4,h->srow_y);
      nifti_swap_4bytes(4,h->srow_z);
    }
-   }
+}
 
 #endif /* RNIFTI_NIFTILIB_DEDUPLICATE */
 
@@ -2450,6 +2463,7 @@ int nifti_get_filesize( const char *pathname )
 
 #endif /* USE_STAT */
 
+
 /*----------------------------------------------------------------------*/
 /*! return the total volume size, in bytes
 
@@ -2574,7 +2588,7 @@ int nifti_validfilename(const char* fname)
 
     \return a pointer to the extension substring within the original
             function input parameter name, or NULL if not found.
-    \caution Note that if the input parameter is is immutabale
+    \warning Note that if the input parameter is is immutabale
              (i.e. a const char *) then this function performs an
              implicit casting away of the mutability constraint and
              the return parameter will appear as a mutable
@@ -2652,8 +2666,7 @@ int nifti_is_gzfile(const char* fname)
   if (fname == NULL) { return 0; }
 #ifdef HAVE_ZLIB
   { /* just so len doesn't generate compile warning */
-     int len;
-     len = (int)strlen(fname);
+     size_t len = strlen(fname);
      if (len < 3) return 0;  /* so we don't search before the name */
      if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; }
   }
@@ -2779,7 +2792,7 @@ char * nifti_findhdrname(const char* fname)
 
    /* note: efirst is 0 in the case of ".img" */
 
-   /* if the user passed an uppercase entension (.IMG), search for uppercase */
+   /* if the user passed an uppercase extension (.IMG), search for uppercase */
    if( eisupper ) {
       make_uppercase(elist[0]);
       make_uppercase(elist[1]);
@@ -2824,8 +2837,8 @@ char * nifti_findhdrname(const char* fname)
 /*! check current directory for existing image file
 
     \param fname filename to check for
-    \nifti_type  nifti_type for dataset - this determines whether to
-                 first check for ".nii" or ".img" (since both may exist)
+    \param nifti_type  nifti_type for dataset - this determines whether to
+                       first check for ".nii" or ".img" (since both may exist)
 
     \return filename of data/img file on success and NULL if no appropriate
             file could be found
@@ -3070,7 +3083,7 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
 
    if( !nim || !prefix ){
       Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
-              (void *)nim,prefix);
+              (void *)nim,(void *)prefix);
       return -1;
    }
 
@@ -3105,11 +3118,11 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
     - if type 1, expect .nii (and names must match)
 
     \param nim       given nifti_image
-    \param show_warn if set, print a warning message for any mis-match
+    \param show_warn if set, print a warning message for any mismatch
 
     \return
         -   1 if the values seem to match
-        -   0 if there is a mis-match
+        -   0 if there is a mismatch
         -  -1 if there is not sufficient information to create file(s)
 
     \sa NIFTI_FTYPE_* codes in nifti1_io.h
@@ -3161,7 +3174,7 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
       errs++;
    }
 
-   if( errs ) return 0;   /* do not proceed, but this is just a mis-match */
+   if( errs ) return 0;   /* do not proceed, but this is just a mismatch */
 
    /* general tests */
    if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){  /* .nii */
@@ -3396,7 +3409,7 @@ int nifti_set_type_from_names( nifti_image * nim )
 
    if( !nim->fname || !nim->iname ){
       Rc_fprintf_stderr("** NSTFN: missing filename(s) fname @ %p, iname @ %p\n",
-              nim->fname, nim->iname);
+              (void *)nim->fname, (void *)nim->iname);
       return -1;
    }
 
@@ -3677,7 +3690,7 @@ nifti_image* nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -4388,7 +4401,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
    nifti1_extender    extdr;      /* defines extension existence  */
    nifti1_extension   extn;       /* single extension to process  */
    nifti1_extension * Elist;      /* list of processed extensions */
-   int                posn, count;
+   int                count;
 
    if( !nim || znz_isnull(fp) ) {
       if( g_opts.debug > 0 )
@@ -4397,16 +4410,16 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
       return -1;
    }
 
-   posn = znztell(fp);
+   znz_off_t posn = znztell(fp);
 
    if( (posn != sizeof(nifti_1_header)) &&
        (nim->nifti_type != NIFTI_FTYPE_ASCII) )
       Rc_fprintf_stderr("** WARNING: posn not header size (%d, %d)\n",
-              posn, (int)sizeof(nifti_1_header));
+              (int)posn, (int)sizeof(nifti_1_header));
 
    if( g_opts.debug > 2 )
       Rc_fprintf_stderr("-d nre: posn = %d, offset = %d, type = %d, remain = %d\n",
-              posn, nim->iname_offset, nim->nifti_type, remain);
+              (int)posn, nim->iname_offset, nim->nifti_type, remain);
 
    if( remain < 16 ){
       if( g_opts.debug > 2 ){
@@ -4485,7 +4498,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
 
    \param nim    - nifti_image to add extension to
    \param data   - raw extension data
-   \param length - length of raw extension data
+   \param len    - length of raw extension data
    \param ecode  - extension code
 
    \sa extension codes NIFTI_ECODE_* in nifti1_io.h
@@ -4567,7 +4580,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data,
 
    if( !ext || !data || len < 0 ){
       Rc_fprintf_stderr("** fill_ext: bad params (%p,%p,%d)\n",
-              (void *)ext, data, len);
+              (void *)ext, (void *)data, len);
       return -1;
    } else if( ! nifti_is_valid_ecode(ecode) ){
       Rc_fprintf_stderr("** warning: writing unknown ecode %d\n", ecode);
@@ -4815,7 +4828,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
       if ( g_opts.debug > 0 ){
          if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
          else Rc_fprintf_stderr("** ERROR: N_image_load: bad params (%p,%d,%u)\n",
-                      nim->iname, nim->nbyper, (unsigned)nim->nvox);
+                      (void *)nim->iname, nim->nbyper, (unsigned)nim->nvox);
       }
       return NULL;
    }
@@ -5375,7 +5388,7 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype)
 /*! basic creation of a nifti_image struct
 
    Create a nifti_image from the given dimensions and data type.
-   Optinally, allocate zero-filled data.
+   Optionally, allocate zero-filled data.
 
    \param dims      : optional dim[8]   (default {3,1,1,1,0,0,0,0})
    \param datatype  : optional datatype (default DT_FLOAT32)
@@ -5606,7 +5619,7 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src)
     and the bytes used for the data.  Each esize also needs to be a
     multiple of 16, so it may be greater than the sum of its 3 parts.
 *//*--------------------------------------------------------------------*/
-int nifti_extension_size(nifti_image *nim)
+static int nifti_extension_size(nifti_image *nim)
 {
    int c, size = 0;
 
@@ -5683,25 +5696,42 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data ,
   return nifti_image_write_hdr_img2(nim,write_data,opts,NULL,NULL);
 }
 
+/*----------------------------------------------------------------------*/
+/*! This writes the header (and optionally the image data) to file.
+ *
+ * This is now just a front-end for nifti_image_write_engine, but the
+ * engine will return a status (for success of write), which is promptly
+ * ignored by this function.
+ *
+ * \sa nifti_image_write_engine
+*//*--------------------------------------------------------------------*/
+znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
+               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+{
+   znzFile loc_img = imgfile;   /* might be NULL, might point to open struct */
+   (void)nifti_image_write_engine(nim, write_opts, opts, &loc_img, NBL);
+   return loc_img;
+}
 
 #undef  ERREX
-#define ERREX(msg)                                                \
- do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
-     return fp ; } while(0)
+#define ERREX(msg)                                                          \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_engine: %s\n",(msg)) ;  \
+     if( imgfile ) *imgfile = fp;                                           \
+     return 1 ; } while(0)
 
 
 /* ----------------------------------------------------------------------*/
 /*! This writes the header (and optionally the image data) to file
  *
- * If the image data file is left open it returns a valid znzFile handle.
- * It also uses imgfile as the open image file is not null, and modifies
- * it inside.
+ * If imgfile points to a NULL znzFile, it modifies it to a valid and open
+ * handle.  If it points to an non-NULL znzFile, it uses that as the open
+ * image and simply modifies that structure.  This also depends on write_opts.
  *
  * \param nim        nifti_image to write to disk
  * \param write_opts flags whether to write data and/or close file (see below)
  * \param opts       file-open options, probably "wb" from nifti_image_write()
- * \param imgfile    optional open znzFile struct, for writing image data
-                     (may be NULL)
+ * \param imgfile    pointer to optionally open znzFile, for writing image data
+                     (must not be NULL, contents might be NULL)
  * \param NBL        optional nifti_brick_list, containing the image data
                      (may be NULL)
  *
@@ -5715,19 +5745,19 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data ,
  * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free,
  *     nifti_set_filenames
 *//*---------------------------------------------------------------------*/
-znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
-               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+             const char *opts, znzFile *imgfile, const nifti_brick_list *NBL)
 {
    struct nifti_1_header nhdr ;
    znzFile               fp=NULL;
    size_t                ss ;
    int                   write_data, leave_open;
-   char                  func[] = { "nifti_image_write_hdr_img2" };
+   char                  func[] = { "nifti_image_write_engine" };
 
    write_data = write_opts & 1;  /* just separate the bits now */
    leave_open = write_opts & 2;
 
-   if( ! nim                              ) ERREX("NULL input") ;
+   if( ! nim || ! imgfile                 ) ERREX("NULL input") ;
    if( ! nifti_validfilename(nim->fname)  ) ERREX("bad fname input") ;
    if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ;
 
@@ -5736,6 +5766,7 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    nifti_set_iname_offset(nim);
 
+   /* chit-chat */
    if( g_opts.debug > 1 ){
       Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
       if( g_opts.debug > 2 )
@@ -5743,8 +5774,13 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
                  nim->nifti_type, nim->iname_offset);
    }
 
-   if( nim->nifti_type == NIFTI_FTYPE_ASCII )   /* non-standard case */
-      return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+   /* get to work */
+
+   /* if non-standard ASCII, just write out and return */
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII ) {
+      *imgfile = nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+      return 0; /* write_ascii has no status */
+   }
 
    nhdr = nifti_convert_nim2nhdr(nim);    /* create the nifti1_header struct */
 
@@ -5755,22 +5791,27 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
        }
        if( nim->iname == NULL ){ /* then make a new one */
          nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0);
-         if( nim->iname == NULL ) return NULL;
+         if( nim->iname == NULL ) {
+            *imgfile = NULL;
+            return 1;
+         }
        }
    }
 
    /* if we have an imgfile and will write the header there, use it */
-   if( ! znz_isnull(imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){
+   if( ! znz_isnull(*imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
-      fp = imgfile;
+      fp = *imgfile;
    }
    else {
+      /* we will write the header to a new file */
       if( g_opts.debug > 2 )
          Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
       fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
       if( znz_isnull(fp) ){
          LNI_FERR(func,"cannot open output file",nim->fname);
-         return fp;
+         *imgfile = fp;
+         return 1;
       }
    }
 
@@ -5779,24 +5820,27 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
    ss = znzwrite(&nhdr , 1 , sizeof(nhdr) , fp); /* write header */
    if( ss < sizeof(nhdr) ){
       LNI_FERR(func,"bad header write to output file",nim->fname);
-      znzclose(fp); return fp;
+      znzclose(fp); *imgfile = fp; return 1;
    }
 
-   /* partial file exists, and errors have been printed, so ignore return */
+   /* write extensions; any errors will be printed */
    if( nim->nifti_type != NIFTI_FTYPE_ANALYZE )
-      (void)nifti_write_extensions(fp,nim);
+      if( nifti_write_extensions(fp,nim) < 0 ) {
+         znzclose(fp); *imgfile = fp; return 1;
+      }
 
    /* if the header is all we want, we are done */
    if( ! write_data && ! leave_open ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
-      znzclose(fp); return(fp);
+      znzclose(fp); *imgfile = fp;  return 0;
    }
 
+   /* if multiple files (hdr/img), close fp and use (any) *imgfile for data */
    if( nim->nifti_type != NIFTI_FTYPE_NIFTI1_1 ){ /* get a new file pointer */
       znzclose(fp);         /* first, close header file */
-      if( ! znz_isnull(imgfile) ){
+      if( ! znz_isnull(*imgfile) ){
          if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
-         fp = imgfile;
+         fp = *imgfile;
       }
       else {
          if( g_opts.debug > 2 )
@@ -5811,7 +5855,9 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
    if( write_data ) nifti_write_all_data(fp,nim,NBL);
    if( ! leave_open ) znzclose(fp);
 
-   return fp;
+   *imgfile = fp;
+
+   return 0;
 }
 
 
@@ -5871,28 +5917,74 @@ znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
 *//*------------------------------------------------------------------------*/
 void nifti_image_write( nifti_image *nim )
 {
-   znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
    if( fp ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write: done, status %d\n", rv);
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Write a nifti_image to disk, returning 0 on success, else failure.
+
+    This simple write function takes a nifti_image as input and returns
+    the status of the operation.  It is akin to nifti_image_write, but
+    returns the status.  Changing nifti_image_write from void to int
+    would have backward compatibility ramifications.
+
+   \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames,
+       nifti_image_write_engine, nifti_image_write
+*//*------------------------------------------------------------------------*/
+int nifti_image_write_status( nifti_image *nim )
+{
+   znzFile fp=NULL;   /* required for _engine, but promptly ignored */
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write_status: done, status %d\n", rv);
+   return rv;
 }
 
 
 /*----------------------------------------------------------------------*/
 /*! similar to nifti_image_write, but data is in NBL struct, not nim->data
 
+   \return 0 on success, 1 on error
+
    \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
 *//*--------------------------------------------------------------------*/
-void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+int nifti_image_write_bricks_status( nifti_image *nim,
+                                     const nifti_brick_list * NBL )
 {
-   znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NBL);
    if( fp ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d niwb: done writing bricks, status %d\n", rv);
+   return rv;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+
+   \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
+*//*--------------------------------------------------------------------*/
+void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+{
+   (void)nifti_image_write_bricks_status(nim, NBL);
 }
 
 
@@ -6112,15 +6204,16 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
    if( nim == NULL ) return NULL ;   /* stupid caller */
 
-   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
+   const size_t bufLen = 65534; /* longer than needed, to be safe */
+   buf = (char *)calloc(1,bufLen);
    if( !buf ){
       Rc_fprintf_stderr("** NITA: failed to alloc %d bytes\n",65534);
       return NULL;
    }
 
-   sprintf( buf , "<nifti_image\n" ) ;   /* XML-ish opener */
+   snprintf( buf , bufLen , "<nifti_image\n" ) ;   /* XML-ish opener */
 
-   sprintf( buf+strlen(buf) , "  nifti_type = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nifti_type = '%s'\n" ,
               (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+"
              :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1"
              :(nim->nifti_type == NIFTI_FTYPE_ASCII   ) ? "NIFTI-1A"
@@ -6134,126 +6227,126 @@ char *nifti_image_to_ascii( const nifti_image *nim )
        - The result is that the NIFTI ASCII-format header is XML-compliant. */
 
    ebuf = escapize_string(nim->fname) ;
-   sprintf( buf+strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
 
    ebuf = escapize_string(nim->iname) ;
-   sprintf( buf+strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
 
-   sprintf( buf+strlen(buf) , "  image_offset = '%d'\n" , nim->iname_offset );
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_offset = '%d'\n" , nim->iname_offset );
 
-   sprintf(buf + strlen(buf), "  ndim = '%d'\n", nim->ndim);
-   sprintf(buf + strlen(buf), "  nx = '%d'\n", nim->nx);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  ndim = '%d'\n", nim->ndim);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nx = '%d'\n", nim->nx);
    if (nim->ndim > 1)
-     sprintf(buf + strlen(buf), "  ny = '%d'\n", nim->ny);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  ny = '%d'\n", nim->ny);
    if (nim->ndim > 2)
-     sprintf(buf + strlen(buf), "  nz = '%d'\n", nim->nz);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nz = '%d'\n", nim->nz);
    if (nim->ndim > 3)
-     sprintf(buf + strlen(buf), "  nt = '%d'\n", nim->nt);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nt = '%d'\n", nim->nt);
    if (nim->ndim > 4)
-     sprintf(buf + strlen(buf), "  nu = '%d'\n", nim->nu);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nu = '%d'\n", nim->nu);
    if (nim->ndim > 5)
-     sprintf(buf + strlen(buf), "  nv = '%d'\n", nim->nv);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nv = '%d'\n", nim->nv);
    if (nim->ndim > 6)
-     sprintf(buf + strlen(buf), "  nw = '%d'\n", nim->nw);
-   sprintf(buf + strlen(buf), "  dx = '%g'\n", nim->dx);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nw = '%d'\n", nim->nw);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dx = '%g'\n", nim->dx);
    if (nim->ndim > 1)
-     sprintf(buf + strlen(buf), "  dy = '%g'\n", nim->dy);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dy = '%g'\n", nim->dy);
    if (nim->ndim > 2)
-     sprintf(buf + strlen(buf), "  dz = '%g'\n", nim->dz);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dz = '%g'\n", nim->dz);
    if (nim->ndim > 3)
-     sprintf(buf + strlen(buf), "  dt = '%g'\n", nim->dt);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dt = '%g'\n", nim->dt);
    if (nim->ndim > 4)
-     sprintf(buf + strlen(buf), "  du = '%g'\n", nim->du);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  du = '%g'\n", nim->du);
    if (nim->ndim > 5)
-     sprintf(buf + strlen(buf), "  dv = '%g'\n", nim->dv);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dv = '%g'\n", nim->dv);
    if (nim->ndim > 6)
-     sprintf(buf + strlen(buf), "  dw = '%g'\n", nim->dw);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dw = '%g'\n", nim->dw);
 
-   sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
-   sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype_name = '%s'\n" ,
                               nifti_datatype_string(nim->datatype) ) ;
 
-   sprintf( buf+strlen(buf) , "  nvox = '%u'\n" , (unsigned)nim->nvox ) ;
-   sprintf( buf+strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nvox = '%u'\n" , (unsigned)nim->nvox ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
 
-   sprintf( buf+strlen(buf) , "  byteorder = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  byteorder = '%s'\n" ,
             (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ;
 
    if( nim->cal_min < nim->cal_max ){
-     sprintf( buf+strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
-     sprintf( buf+strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
    }
 
    if( nim->scl_slope != 0.0 ){
-     sprintf( buf+strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
-     sprintf( buf+strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
    }
 
    if( nim->intent_code > 0 ){
-     sprintf( buf+strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
-     sprintf( buf+strlen(buf) , "  intent_code_name = '%s'\n" ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code_name = '%s'\n" ,
                                 nifti_intent_string(nim->intent_code) ) ;
-     sprintf( buf+strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
 
      if( nim->intent_name[0] != '\0' ){
        ebuf = escapize_string(nim->intent_name) ;
-       sprintf( buf+strlen(buf) , "  intent_name = %s\n",ebuf) ;
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_name = %s\n",ebuf) ;
        free(ebuf) ;
      }
    }
 
    if( nim->toffset != 0.0 )
-     sprintf( buf+strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
 
    if( nim->xyz_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  xyz_units = '%d'\n"
               "  xyz_units_name = '%s'\n" ,
               nim->xyz_units , nifti_units_string(nim->xyz_units) ) ;
 
    if( nim->time_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  time_units = '%d'\n"
               "  time_units_name = '%s'\n" ,
               nim->time_units , nifti_units_string(nim->time_units) ) ;
 
    if( nim->freq_dim > 0 )
-     sprintf( buf+strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
    if( nim->phase_dim > 0 )
-     sprintf( buf+strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
    if( nim->slice_dim > 0 )
-     sprintf( buf+strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
    if( nim->slice_code > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_code = '%d'\n"
               "  slice_code_name = '%s'\n" ,
               nim->slice_code , nifti_slice_string(nim->slice_code) ) ;
    if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_start = '%d'\n"
               "  slice_end = '%d'\n"  , nim->slice_start , nim->slice_end ) ;
    if( nim->slice_duration != 0.0 )
-     sprintf( buf+strlen(buf) , "  slice_duration = '%g'\n",
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_duration = '%g'\n",
               nim->slice_duration ) ;
 
    if( nim->descrip[0] != '\0' ){
      ebuf = escapize_string(nim->descrip) ;
-     sprintf( buf+strlen(buf) , "  descrip = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  descrip = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->aux_file[0] != '\0' ){
      ebuf = escapize_string(nim->aux_file) ;
-     sprintf( buf+strlen(buf) , "  aux_file = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  aux_file = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->qform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  qform_code = '%d'\n"
               "  qform_code_name = '%s'\n"
      "  qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -6267,7 +6360,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] ,
          nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] ,
          nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] ,
@@ -6278,7 +6371,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] ,
          nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  quatern_b = '%g'\n"
               "  quatern_c = '%g'\n"
               "  quatern_d = '%g'\n"
@@ -6291,7 +6384,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
      nifti_mat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  qform_i_orientation = '%s'\n"
                 "  qform_j_orientation = '%s'\n"
                 "  qform_k_orientation = '%s'\n" ,
@@ -6303,7 +6396,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
    if( nim->sform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  sform_code = '%d'\n"
               "  sform_code_name = '%s'\n"
      "  sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -6317,7 +6410,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] ,
          nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] ,
          nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] ,
@@ -6330,7 +6423,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
      nifti_mat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  sform_i_orientation = '%s'\n"
                 "  sform_j_orientation = '%s'\n"
                 "  sform_k_orientation = '%s'\n" ,
@@ -6339,9 +6432,9 @@ char *nifti_image_to_ascii( const nifti_image *nim )
                 nifti_orientation_string(k)  ) ;
    }
 
-   sprintf( buf+strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
 
-   sprintf( buf+strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
 
    nbuf = (int)strlen(buf) ;
    buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
@@ -6379,7 +6472,7 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 /* macro to check lhs string against "n1"; if it matches,
    interpret rhs string as a number, and put it into nim->"n2" */
 
-#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)(strtod(rhs,NULL))
 
 /* same, but where "n1" == "n2" */
 
@@ -6860,7 +6953,7 @@ compute_strides(int *strides,const int *size,int nbyper)
 /*---------------------------------------------------------------------------*/
 /*! read an arbitrary subregion from a nifti image
 
-    This function may be used to read a single arbitary subregion of any
+    This function may be used to read a single arbitrary subregion of any
     rectangular size from a nifti dataset, such as a small 5x5x5 subregion
     around the center of a 3D image.
 
@@ -6881,7 +6974,7 @@ compute_strides(int *strides,const int *size,int nbyper)
           speed and possibly repeated calls to this function.
     \return
         -  the total number of bytes read, or < 0 on failure
-        -  the read and byte-swapped data, in 'data'            </pre>
+        -  the read and byte-swapped data, in 'data'
 
     \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks
         nifti_image_load, nifti_read_collapsed_image
@@ -6894,7 +6987,7 @@ int nifti_read_subregion_image( nifti_image * nim,
   znzFile fp;                   /* file to read */
   int i,j,k,l,m,n;              /* indices for dims */
   long int bytes = 0;           /* total # bytes read */
-  int total_alloc_size;         /* size of buffer allocation */
+  size_t total_alloc_size;      /* size of buffer allocation */
   char *readptr;                /* where in *data to read next */
   int strides[7];               /* strides between dimensions */
   int collapsed_dims[8];        /* for read_collapsed_image */
@@ -6965,6 +7058,13 @@ int nifti_read_subregion_image( nifti_image * nim,
 
   /* get the file open */
   fp = nifti_image_load_prep( nim );
+  if(znz_isnull(fp))
+    {
+    if(g_opts.debug > 0)
+      Rc_fprintf_stderr("** nifti_read_subregion_image, failed load_prep\n");
+    return -1;
+    }
+
   /* the current offset is just past the nifti header, save
    * location so that SEEK_SET can be used below
    */
@@ -6989,9 +7089,10 @@ int nifti_read_subregion_image( nifti_image * nim,
     {
     if(g_opts.debug > 1)
       {
-      Rc_fprintf_stderr("allocation of %d bytes failed\n",total_alloc_size);
-      return -1;
+      Rc_fprintf_stderr("allocation of %d bytes failed\n", (int)total_alloc_size);
       }
+    znzclose(fp);
+    return -1;
     }
 
   /* point to start of data buffer as char * */
@@ -7038,11 +7139,12 @@ int nifti_read_subregion_image( nifti_image * nim,
               nread = (int)nifti_read_buffer(fp, readptr, read_amount, nim);
               if(nread != read_amount)
                 {
-                if(g_opts.debug > 1)
+                if(g_opts.debug > 0)
                   {
                   Rc_fprintf_stderr("read of %d bytes failed\n",read_amount);
-                  return -1;
                   }
+                znzclose(fp);
+                return -1;
                 }
               bytes += nread;
               readptr += read_amount;
@@ -7053,6 +7155,7 @@ int nifti_read_subregion_image( nifti_image * nim,
       }
     }
   }
+  znzclose(fp);
   return bytes;
 }
 
@@ -7256,7 +7359,7 @@ int * nifti_get_intlist( int nvals , const char * str )
    int *subv = NULL ;
    int *subv_realloc = NULL;
    int ii , ipos , nout , slen ;
-   int ibot,itop,istep , nused ;
+   int ibot,itop,istep ;
    char *cpt ;
 
    /* Meaningless input? */
@@ -7292,7 +7395,13 @@ int * nifti_get_intlist( int nvals , const char * str )
       if( str[ipos] == '$' ){  /* special case */
          ibot = nvals-1 ; ipos++ ;
       } else {                 /* decode an integer */
-         ibot = strtol( str+ipos , &cpt , 10 ) ;
+         errno = 0;
+         long temp = strtol( str+ipos , &cpt , 10 ) ;
+         if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){
+            Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ;
+            free(subv) ; return NULL ;
+         }
+         ibot = (int)temp;
          if( ibot < 0 ){
            Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n",
                    ibot,nvals-1) ;
@@ -7303,7 +7412,7 @@ int * nifti_get_intlist( int nvals , const char * str )
                    ibot,nvals-1) ;
            free(subv) ; return NULL ;
          }
-         nused = (cpt-(str+ipos)) ;
+         long nused = (cpt-(str+ipos)) ;
          if( ibot == 0 && nused == 0 ){
            Rc_fprintf_stderr("** ERROR: list syntax error '%s'\n",str+ipos) ;
            free(subv) ; return NULL ;
@@ -7349,7 +7458,13 @@ int * nifti_get_intlist( int nvals , const char * str )
       if( str[ipos] == '$' ){  /* special case */
          itop = nvals-1 ; ipos++ ;
       } else {                 /* decode an integer */
-         itop = strtol( str+ipos , &cpt , 10 ) ;
+         errno = 0;
+         long temp = strtol( str+ipos , &cpt , 10 ) ;
+         if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){
+            Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ;
+            free(subv) ; return NULL ;
+         }
+         itop = (int)temp;
          if( itop < 0 ){
            Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n",
                    itop,nvals-1) ;
@@ -7360,7 +7475,7 @@ int * nifti_get_intlist( int nvals , const char * str )
                    itop,nvals-1) ;
            free(subv) ; return NULL ;
          }
-         nused = (cpt-(str+ipos)) ;
+         long nused = (cpt-(str+ipos)) ;
          if( itop == 0 && nused == 0 ){
            Rc_fprintf_stderr("** ERROR: index list syntax error '%s'\n",str+ipos) ;
            free(subv) ; return NULL ;
@@ -7378,12 +7493,18 @@ int * nifti_get_intlist( int nvals , const char * str )
 
       if( str[ipos] == '(' ){  /* decode an integer */
          ipos++ ;
-         istep = strtol( str+ipos , &cpt , 10 ) ;
+         errno = 0;
+         long temp = strtol( str+ipos , &cpt , 10 ) ;
+         if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){
+            Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ;
+            free(subv) ; return NULL ;
+         }
+         istep = (int)temp;
          if( istep == 0 ){
            Rc_fprintf_stderr("** ERROR: index loop step is 0!\n") ;
            free(subv) ; return NULL ;
          }
-         nused = (cpt-(str+ipos)) ;
+         long nused = (cpt-(str+ipos)) ;
          ipos += nused ;
          if( str[ipos] == ')' ) ipos++ ;
          if( (ibot-itop)*istep > 0 ){
diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h
index 0e95531c..5c67b585 100644
--- a/reg-io/niftilib/nifti1_io.h
+++ b/reg-io/niftilib/nifti1_io.h
@@ -49,7 +49,7 @@ extern "C" {
 
       Mainly adding low-level IO and changing things to allow gzipped files
       to be read and written
-      Full backwards compatability should have been maintained
+      Full backwards compatibility should have been maintained
 
    Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
    Date: December 2004
@@ -316,8 +316,12 @@ int          nifti_read_subregion_image( nifti_image * nim,
                                          void ** data );
 
 void         nifti_image_write   ( nifti_image * nim ) ;
+int          nifti_image_write_status( nifti_image *nim );
+
 void         nifti_image_write_bricks(nifti_image * nim,
                                       const nifti_brick_list * NBL);
+int          nifti_image_write_bricks_status(nifti_image * nim,
+                                             const nifti_brick_list * NBL);
 void         nifti_image_infodump( const nifti_image * nim ) ;
 
 void         nifti_disp_lib_hist( void ) ;     /* to display library history */
@@ -534,7 +538,7 @@ typedef struct {
     char const * const name;           /* text string to match #define */
 } nifti_type_ele;
 
-#undef  LNI_FERR /* local nifti file error, to be compact and repetative */
+#undef  LNI_FERR /* local nifti file error, to be compact and repetitive */
 #ifdef USING_R
 #define LNI_FERR(func,msg,file)                                      \
             Rf_warning("%s: %s '%s'\n",func,msg,file)
diff --git a/reg-io/niftilib/nifti1_io_version.h b/reg-io/niftilib/nifti1_io_version.h
new file mode 100644
index 00000000..ac5e8203
--- /dev/null
+++ b/reg-io/niftilib/nifti1_io_version.h
@@ -0,0 +1,16 @@
+/* NOTE:  When changing version consider the impact on versions in
+  nifti2_io_version.h nifti1_io_version.h nifticdf_version.h and znzlib.h
+*/
+#define NIFTI1_IO_VERSION_MAJOR 2
+#define NIFTI1_IO_VERSION_MINOR 1
+#define NIFTI1_IO_VERSION_PATCH 0
+
+/* main string macros: NIFTI1_IO_VERSION and NIFTI1_IO_SOURCE_VERSION */
+#define NIFTI1_IO_VERSION_TO_STRING(x) NIFTI1_IO_VERSION_TO_STRING0(x)
+#define NIFTI1_IO_VERSION_TO_STRING0(x) #x
+#define NIFTI1_IO_VERSION                                   \
+   NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_MAJOR)     \
+   "." NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_MINOR) \
+   "." NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_PATCH)
+
+#define NIFTI1_IO_SOURCE_VERSION "NIFTI1_IO version " NIFTI1_IO_VERSION
diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c
index a87fa3fd..634bef72 100644
--- a/reg-io/niftilib/nifti2_io.c
+++ b/reg-io/niftilib/nifti2_io.c
@@ -1,6 +1,7 @@
 #define NIFTI2_IO_C
 
 #include "niftilib/nifti2_io.h"   /* typedefs, prototypes, macros, etc. */
+#include "niftilib/nifti2_io_version.h"
 
 /*****===================================================================*****/
 /*****     Sample functions to deal with NIFTI-1,2 and ANALYZE files     *****/
@@ -41,7 +42,7 @@ static char const * const gni1_history[] =
   "     (FMRIB Centre, University of Oxford, UK)\n"
   "   - Mainly adding low-level IO and changing things to allow gzipped\n"
   "     files to be read and written\n"
-  "   - Full backwards compatability should have been maintained\n"
+  "   - Full backwards compatibility should have been maintained\n"
   "\n",
   "0.2  16 Nov 2004 [rickr]\n"
   "     (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n"
@@ -192,7 +193,7 @@ static char const * const gni1_history[] =
   "\n",
   "1.3  09 Feb 2005 [rickr]\n"
   "   - nifti1.h: added doxygen comments for extension structs\n"
-  "   - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n"
+  "   - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n"
   "   - added a doxygen-style description to every exported function\n"
   "   - added doxygen-style comments within some functions\n"
   "   - re-exported many znzFile functions that I had made static\n"
@@ -264,7 +265,7 @@ static char const * const gni1_history[] =
   "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n",
   "1.13  25 August 2005 [rickr]\n",
   "   - finished changes by Hans for Insight\n"
-  "   - added const in all appropraite parameter locations (30-40)\n"
+  "   - added const in all appropriate parameter locations (30-40)\n"
   "     (any pointer referencing data that will not change)\n"
   "   - shortened all string constants below 509 character limit\n"
   "1.14  28 October 2005 [HJohnson]\n",
@@ -397,11 +398,16 @@ static char const * const gni2_history[] =
   "2.09 10 May, 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n"
   "2.10 26 Sep, 2019 [rickr]: nifti_read_ascii_image no longer closes fp\n",
   "2.11  3 Oct, 2019 [rickr]: added nifti_[d]mat33_mul\n",
+  "2.1.0  18 Jun, 2020 [leej3,hmjohnson,rickr]:\n"
+  "     - changed to more formal library versioning\n",
+  "2.1.0.1 - non-release update -  2 Mar, 2022 [rickr]\n"
+  "        - cast a few more pedantic void*'s\n"
+  "2.1.0.2 - non-release update - 16 Jun, 2022 [rickr]\n"
+  "        - add nifti_image_write_status\n",
   "----------------------------------------------------------------------\n"
 };
 
-static const char gni_version[]
-        = "nifti-2 library version 2.11 (3 Oct, 2019)";
+static const char gni_version[] = NIFTI2_IO_SOURCE_VERSION " (16 Jun, 2022)";
 
 /*! global nifti options structure - init with defaults */
 /*  see 'option accessor functions'                     */
@@ -489,12 +495,12 @@ static int  nifti_NBL_matches_nim(const nifti_image *nim,
                                   const nifti_brick_list *NBL);
 
 /* for nifti_read_collapsed_image: */
-static int  rci_read_data(nifti_image *nim, int *pivots, int64_t *prods,
+static int  rci_read_data(nifti_image *nim, int64_t *pivots, int64_t *prods,
                           int nprods, const int64_t dims[], char *data,
                           znzFile fp, int64_t base_offset);
 static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper);
 static int  make_pivot_list(nifti_image * nim, const int64_t dims[],
-                            int pivots[], int64_t prods[], int * nprods );
+                            int64_t pivots[], int64_t prods[], int * nprods );
 
 /* misc */
 static int   compare_strlist   (const char * str, char ** strlist, int len);
@@ -514,6 +520,8 @@ static char *escapize_string   (const char *str);
 static int  nifti_ext_type_index(nifti_image * nim, int ecode);
 
 /* internal I/O routines */
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+        const char * opts, znzFile * imgfile, const nifti_brick_list * NBL);
 static znzFile nifti_image_load_prep( nifti_image *nim );
 static int     has_ascii_header(znzFile fp);
 /*---------------------------------------------------------------------------*/
@@ -629,7 +637,7 @@ nifti_image *nifti2_image_read_bricks(const char * hname, int64_t nbricks,
 
    if( !hname || !NBL ){
       Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n",
-              hname, (void *)NBL);
+              (void *)hname, (void *)NBL);
       return NULL;
    }
 
@@ -866,7 +874,7 @@ int nifti2_image_load_bricks( nifti_image * nim , int64_t nbricks,
 
    if( rv != 0 ){
       nifti_free_NBL( NBL );  /* failure! */
-      NBL->nbricks = 0; /* repetative, but clear */
+      NBL->nbricks = 0; /* repetitive, but clear */
    }
 
    if( slist ){ free(slist); free(sindex); }
@@ -1524,8 +1532,6 @@ char const *nifti_orientation_string( int ii )
     \param nbyper   pointer to return value: number of bytes per voxel
     \param swapsize pointer to return value: size of swap blocks
 
-    \return appropriate values at nbyper and swapsize
-
     The swapsize is set to 0 if this datatype doesn't ever need swapping.
 
     \sa NIFTI1_DATATYPES in nifti1.h
@@ -2613,7 +2619,7 @@ mat33 nifti_mat33_polar( mat33 A )
 }
 
 /*---------------------------------------------------------------------------*/
-/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
 
    <pre>
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2771,7 +2777,7 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: i = NIFTI_A2P ; break ;
      case  3: i = NIFTI_I2S ; break ;
      case -3: i = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    switch( jbest*qbest ){
@@ -2781,7 +2787,7 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: j = NIFTI_A2P ; break ;
      case  3: j = NIFTI_I2S ; break ;
      case -3: j = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    switch( kbest*rbest ){
@@ -2791,13 +2797,13 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: k = NIFTI_A2P ; break ;
      case  3: k = NIFTI_I2S ; break ;
      case -3: k = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    *icod = i ; *jcod = j ; *kcod = k ; }
 
 /*---------------------------------------------------------------------------*/
-/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
 
    <pre>
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2991,8 +2997,8 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 /*! swap each byte pair from the given list of n pairs
  *
  *  Due to alignment of structures at some architectures (e.g. on ARM),
- *  stick to char varaibles.
- *  Fixes http://bugs.debian.org/446893   Yaroslav <debian@onerussian.com>
+ *  stick to char variables.
+ *  Fixes <http://bugs.debian.org/446893> Yaroslav <debian @ onerussian.com>
  *
 *//*--------------------------------------------------------------------*/
 void nifti_swap_2bytes( int64_t n , void *ar )    /* 2 bytes at a time */
@@ -3494,7 +3500,7 @@ int nifti_validfilename(const char* fname)
 
     \return a pointer to the extension substring within the original
             function input parameter name, or NULL if not found.
-    \caution Note that if the input parameter is is immutabale
+    \warning Note that if the input parameter is is immutabale
              (i.e. a const char *) then this function performs an
              implicit casting away of the mutability constraint and
              the return parameter will appear as a mutable
@@ -3574,8 +3580,7 @@ int nifti_is_gzfile(const char* fname)
   if (fname == NULL) { return 0; }
 #ifdef HAVE_ZLIB
   { /* just so len doesn't generate compile warning */
-     int len;
-     len = (int)strlen(fname);
+     size_t len = strlen(fname);
      if (len < 3) return 0;  /* so we don't search before the name */
      if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; }
   }
@@ -3723,7 +3728,7 @@ char * nifti_findhdrname(const char* fname)
 
    /* note: efirst is 0 in the case of ".img" */
 
-   /* if the user passed an uppercase entension (.IMG), search for uppercase */
+   /* if the user passed an uppercase extension (.IMG), search for uppercase */
    if( eisupper ) {
       make_uppercase(elist[0]);
       make_uppercase(elist[1]);
@@ -3768,8 +3773,8 @@ char * nifti_findhdrname(const char* fname)
 /*! check current directory for existing image file
 
     \param fname filename to check for
-    \nifti_type  nifti_type for dataset - this determines whether to
-                 first check for ".nii" or ".img" (since both may exist)
+    \param nifti_type  nifti_type for dataset - this determines whether to
+                       first check for ".nii" or ".img" (since both may exist)
 
     \return filename of data/img file on success and NULL if no appropriate
             file could be found
@@ -4025,7 +4030,7 @@ int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check,
 
    if( !nim || !prefix ){
       Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
-              (void *)nim,prefix);
+              (void *)nim, (void *)prefix);
       return -1;
    }
 
@@ -4060,11 +4065,11 @@ int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check,
     - if type 1, expect .nii (and names must match)
 
     \param nim       given nifti_image
-    \param show_warn if set, print a warning message for any mis-match
+    \param show_warn if set, print a warning message for any mismatch
 
     \return
         -   1 if the values seem to match
-        -   0 if there is a mis-match
+        -   0 if there is a mismatch
         -  -1 if there is not sufficient information to create file(s)
 
     \sa NIFTI_FTYPE_* codes in nifti1_io.h
@@ -4116,7 +4121,7 @@ int nifti2_type_and_names_match( nifti_image * nim, int show_warn )
       errs++;
    }
 
-   if( errs ) return 0;   /* do not proceed, but this is just a mis-match */
+   if( errs ) return 0;   /* do not proceed, but this is just a mismatch */
 
    /* general tests */
    if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ||
@@ -4353,7 +4358,7 @@ int nifti2_set_type_from_names( nifti_image * nim )
 
    if( !nim->fname || !nim->iname ){
       Rc_fprintf_stderr("** NIFTI_STFN: NULL filename(s) fname @ %p, iname @ %p\n",
-              nim->fname, nim->iname);
+              (void *)nim->fname, (void *)nim->iname);
       return -1;
    }
 
@@ -4376,8 +4381,10 @@ int nifti2_set_type_from_names( nifti_image * nim )
       nim->nifti_type = NIFTI_FTYPE_ASCII;
    } else {
       /* not too picky here, do what must be done, and then verify */
-      if( strcmp(nim->fname, nim->iname) == 0 )          /* one file, type 1 */
-         nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1;
+      if( strcmp(nim->fname, nim->iname) == 0 ) {        /* one file, type 1 */
+         nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ?
+                              NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1;
+      }
       else if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) /* cannot be type 1 */
          nim->nifti_type = NIFTI_FTYPE_NIFTI1_2;
       else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 )
@@ -4729,7 +4736,7 @@ nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname)
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -4960,7 +4967,8 @@ nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname)
 *//*--------------------------------------------------------------------*/
 nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
 {
-   int          ii, doswap, ni_ver, is_onefile;
+   int64_t      ii;
+   int          doswap, ni_ver, is_onefile;
    nifti_image *nim;
 
    nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
@@ -5008,7 +5016,7 @@ nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -5022,9 +5030,9 @@ nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
 
    nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI2_2;
 
-   ii = nifti_short_order() ;
-   if( doswap )   nim->byteorder = REVERSE_ORDER(ii) ;
-   else           nim->byteorder = ii ;
+   int byteOrder = nifti_short_order() ;
+   if( doswap )   nim->byteorder = REVERSE_ORDER(byteOrder) ;
+   else           nim->byteorder = byteOrder ;
 
 
   /**- set dimensions of data array */
@@ -6091,7 +6099,7 @@ nifti_image * nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
                                      int read_data)
 {
    nifti_image * nim;
-   int           slen, txt_size, remain, rv = 0;
+   int           txt_size, remain, rv = 0;
    char        * sbuf, lfunc[25] = { "nifti_read_ascii_image" };
 
    if( nifti_is_gzfile(fname) ){
@@ -6099,11 +6107,11 @@ nifti_image * nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
               fname);
      return NULL;
    }
-   slen = flen;  /* slen will be our buffer length */
+   int64_t slen = flen;  /* slen will be our buffer length */
    if( slen <= 0 ) slen = nifti_get_filesize(fname);
 
    if( g_opts.debug > 1 )
-      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen);
+      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,(int)slen);
 
    if( slen > 65530 ) slen = 65530 ;
    sbuf = (char *)calloc(sizeof(char),slen+1) ;
@@ -6259,7 +6267,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int64_t remain )
 
    \param nim    - nifti_image to add extension to
    \param data   - raw extension data
-   \param length - length of raw extension data
+   \param len    - length of raw extension data
    \param ecode  - extension code
 
    \sa extension codes NIFTI_ECODE_* in nifti1_io.h
@@ -6517,8 +6525,8 @@ int valid_nifti2_extensions(const nifti_image * nim)
        \return -1 on error, else NIFTI version
  *//*--------------------------------------------------------------------*/
 int nifti_header_version(const char * buf, size_t nbytes){
-   nifti_1_header *n1p = (nifti_1_header *)buf;
-   nifti_2_header *n2p = (nifti_2_header *)buf;
+   const nifti_1_header *n1p = (const nifti_1_header *)buf;
+   const nifti_2_header *n2p = (const nifti_2_header *)buf;
    char            fname[] = { "nifti_header_version" };
    int             sizeof_hdr, sver, nver;
 
@@ -6530,7 +6538,7 @@ int nifti_header_version(const char * buf, size_t nbytes){
 
    if( nbytes < sizeof(nifti_1_header) ) {
       if(g_opts.debug > 0)
-         Rc_fprintf_stderr("** %s: nbytes=%zu, too small for test", fname, nbytes);
+         Rc_fprintf_stderr("** %s: nbytes=%u, too small for test", fname, (unsigned)nbytes);
       return -1;
    }
 
@@ -6659,7 +6667,8 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
       if ( g_opts.debug > 0 ){
          if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
          else Rc_fprintf_stderr("** ERROR: nifti_image_load: bad params (%p,%d,"
-                      "%" PRId64 ")\n", nim->iname, nim->nbyper, nim->nvox);
+                      "%" PRId64 ")\n",
+                      (void *)nim->iname, nim->nbyper, nim->nvox);
       }
       return NULL;
    }
@@ -7306,7 +7315,7 @@ nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[],
 /*! basic creation of a nifti_image struct
 
    Create a nifti_image from the given dimensions and data type.
-   Optinally, allocate zero-filled data.
+   Optionally, allocate zero-filled data.
 
    \param dims      : optional dim[8]   (default {3,1,1,1,0,0,0,0})
    \param datatype  : optional datatype (default DT_FLOAT32)
@@ -7777,24 +7786,42 @@ znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data ,
 }
 
 
+/*----------------------------------------------------------------------*/
+/*! This writes the header (and optionally the image data) to file.
+ *
+ * This is now just a front-end for nifti_image_write_engine, but the
+ * engine will return a status (for success of write), which is promptly
+ * ignored by this function.
+ *
+ * \sa nifti_image_write_engine
+*//*--------------------------------------------------------------------*/
+znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
+               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+{
+   znzFile loc_img = imgfile;   /* might be NULL, might point to open struct */
+   (void)nifti_image_write_engine(nim, write_opts, opts, &loc_img, NBL);
+   return loc_img;
+}
+
 #undef  ERREX
-#define ERREX(msg)                                                \
- do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
-     return fp ; } while(0)
+#define ERREX(msg)                                                         \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_engine: %s\n",(msg)) ; \
+     if( imgfile ) *imgfile = fp;                                          \
+     return 1 ; } while(0)
 
 
 /* ----------------------------------------------------------------------*/
 /*! This writes the header (and optionally the image data) to file
  *
- * If the image data file is left open it returns a valid znzFile handle.
- * It also uses imgfile as the open image file is not null, and modifies
- * it inside.
+ * If imgfile points to a NULL znzFile, it modifies it to a valid and open
+ * handle.  If it points to an non-NULL znzFile, it uses that as the open
+ * image and simply modifies that structure.  This also depends on write_opts.
  *
  * \param nim        nifti_image to write to disk
  * \param write_opts flags whether to write data and/or close file (see below)
  * \param opts       file-open options, probably "wb" from nifti_image_write()
- * \param imgfile    optional open znzFile struct, for writing image data
-                     (may be NULL)
+ * \param imgfile    pointer to optionaly open znzFile, for writing image data
+                     (must not be NULL, contents might be NULL)
  * \param NBL        optional nifti_brick_list, containing the image data
                      (may be NULL)
  *
@@ -7808,27 +7835,29 @@ znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data ,
  * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free,
  *     nifti_set_filenames
 *//*---------------------------------------------------------------------*/
-znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
-               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+        const char * opts, znzFile * imgfile, const nifti_brick_list * NBL)
 {
    nifti_1_header n1hdr ;
    nifti_2_header n2hdr ;
    znzFile        fp=NULL;
    int64_t        ss ;
    int            write_data, leave_open;
-   int            nver=1, hsize=(int)sizeof(nifti_1_header);  /* 5 Aug 2015 */
-   char           func[] = { "nifti_image_write_hdr_img2" };
+   int            nver, hsize;
+   char           func[] = { "nifti_image_write_engine" };
 
    write_data = write_opts & 1;  /* just separate the bits now */
    leave_open = write_opts & 2;
 
-   if( ! nim                              ) ERREX("NULL input") ;
+   /* check for valid input */
+   if( ! nim || ! imgfile                 ) ERREX("NULL input") ;
    if( ! nifti_validfilename(nim->fname)  ) ERREX("bad fname input") ;
    if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ;
 
    if( write_data && NBL && ! nifti_NBL_matches_nim(nim, NBL) )
       ERREX("NBL does not match nim");
 
+   /* chit-chat */
    if( g_opts.debug > 1 ){
       Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
       if( g_opts.debug > 2 )
@@ -7836,42 +7865,63 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
                  nim->nifti_type, nim->iname_offset);
    }
 
-   if( nim->nifti_type == NIFTI_FTYPE_ASCII )   /* non-standard case */
-      return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
-   else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) {
+   /* get to work */
+
+   /* if non-standard ASCII, just write output and return */
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII ) {
+      *imgfile = nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+      return 0; /* write_ascii has no status, either */
+   }
+
+   /* create a header structure to write out */
+   if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 ||
+            nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) {
       nifti_set_iname_offset(nim, 2);
-      if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) return NULL;
-      nver = 2;
+      if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) {
+         *imgfile = NULL;
+         return 1;
+      }
+      nver = 2; /* we will write NIFTI-2 */
       hsize = (int)sizeof(nifti_2_header);
-   }
-   else {
+   } else {
       nifti_set_iname_offset(nim, 1);
-      if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) return NULL;
+      if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) {
+         *imgfile = NULL;
+         return 1;
+      }
+      nver = 1;
+      hsize = (int)sizeof(nifti_1_header);  /* 5 Aug 2015 */
    }
 
    /* if writing to 2 files, make sure iname is set and different from fname */
-   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) &&
+       (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){
        if( nim->iname && strcmp(nim->iname,nim->fname) == 0 ){
          free(nim->iname) ; nim->iname = NULL ;
        }
        if( nim->iname == NULL ){ /* then make a new one */
          nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0);
-         if( nim->iname == NULL ) return NULL;
+         if( nim->iname == NULL ) {
+            *imgfile = NULL;
+            return 1;
+         }
        }
    }
 
-   /* if we have an imgfile and will write the header there, use it */
-   if( ! znz_isnull(imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){
+   /* if we have an imgfile and will also write the header there, use it */
+   if( ! znz_isnull(*imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ||
+                                  nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
-      fp = imgfile;
-   }
-   else {
+      fp = *imgfile;
+   } else {
+      /* we will write the header to a new file */
       if( g_opts.debug > 2 )
          Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
       fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
       if( znz_isnull(fp) ){
          LNI_FERR(func,"cannot open output file",nim->fname);
-         return fp;
+         *imgfile = fp;
+         return 1;
       }
    }
 
@@ -7882,26 +7932,31 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    if( ss < hsize ){
       LNI_FERR(func,"bad header write to output file",nim->fname);
-      znzclose(fp); return fp;
+      znzclose(fp); *imgfile = fp; return 1;
    }
 
-   /* partial file exists, and errors have been printed, so ignore return */
+   /* write extensions; any errors will be printed */
    if( nim->nifti_type != NIFTI_FTYPE_ANALYZE )
-      (void)nifti_write_extensions(fp,nim);
+      if( nifti_write_extensions(fp,nim) < 0 ) {
+         znzclose(fp); *imgfile = fp; return 1;
+      }
 
    /* if the header is all we want, we are done */
    if( ! write_data && ! leave_open ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
-      znzclose(fp); return(fp);
+      znzclose(fp); *imgfile = fp; return 0;
    }
 
-   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */
+   /* if multiple files (hdr/img), close fp and use (any) *imgfile for data */
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) &&
+       (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */
       znzclose(fp);         /* first, close header file */
-      if( ! znz_isnull(imgfile) ){
+      /* use any valid *imgfile for img */
+      if( ! znz_isnull(*imgfile) ){
          if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
-         fp = imgfile;
-      }
-      else {
+         fp = *imgfile;
+      } else {
+         /* else we need a new img file pointer */
          if( g_opts.debug > 2 )
             Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname);
          fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ;
@@ -7909,12 +7964,16 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
       }
    }
 
+   /* have image pointer, ready to write */
+
    znzseek(fp, nim->iname_offset, SEEK_SET);  /* in any case, seek to offset */
 
    if( write_data ) nifti_write_all_data(fp,nim,NBL);
    if( ! leave_open ) znzclose(fp);
 
-   return fp;
+   *imgfile = fp;
+
+   return 0;
 }
 
 
@@ -7975,28 +8034,72 @@ znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
 *//*------------------------------------------------------------------------*/
 void nifti2_image_write( nifti_image *nim )
 {
-   znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
-   if( fp ){
+   (void)nifti_image_write_status(nim);
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Write a nifti_image to disk, returning 0 on success, else failure.
+
+    This simple write function takes a nifti_image as input and returns
+    the status of the operation.  It is akin to nifti_image_write, but
+    returns the status.  Changing nifti_image_write from void to int
+    would have backward compatibility ramifications.
+
+   \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames,
+       nifti_image_write_engine, nifti_image_write
+*//*------------------------------------------------------------------------*/
+int nifti2_image_write_status( nifti_image *nim )
+{
+   znzFile fp=NULL;   /* required for _engine, but promptly ignored */
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
+
+   if( fp ){ /* this should not happen, as we requested file closure */
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write_status: done, status %d\n", rv);
+
+   return rv;
 }
 
 
 /*----------------------------------------------------------------------*/
-/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+/*! similar to nifti_image_write_status, but data is in NBL struct,
+    not nim->data
+
+   \return 0 on success, 1 on error
 
    \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
 *//*--------------------------------------------------------------------*/
-void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+int nifti2_image_write_bricks_status( nifti_image *nim,
+                                     const nifti_brick_list * NBL )
 {
-   znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NBL);
    if( fp ){
-      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d niwb: done writing bricks, status %d\n", rv);
+   return rv;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+
+   \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
+*//*--------------------------------------------------------------------*/
+void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+{
+   (void)nifti_image_write_bricks_status(nim, NBL);
 }
 
 
@@ -8220,15 +8323,16 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
    if( g_opts.debug > 2 )
       Rc_fprintf_stderr("+d converting %s to ASCII\n",nim->fname);
 
-   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
+   const size_t bufLen = 65534; /* longer than needed, to be safe */
+   buf = (char *)calloc(1,bufLen);
    if( !buf ){
       Rc_fprintf_stderr("** NIFTI NITA: failed to alloc %d bytes\n",65534);
       return NULL;
    }
 
-   sprintf( buf , "<nifti_image\n" ) ;   /* XML-ish opener */
+   snprintf( buf , bufLen , "<nifti_image\n" ) ;   /* XML-ish opener */
 
-   sprintf( buf+strlen(buf) , "  nifti_type = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nifti_type = '%s'\n" ,
               (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+"
              :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1"
              :(nim->nifti_type == NIFTI_FTYPE_ASCII   ) ? "NIFTI-1A"
@@ -8244,123 +8348,123 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
        - The result is that the NIFTI ASCII-format header is XML-compliant. */
 
    ebuf = escapize_string(nim->fname) ;
-   sprintf( buf+strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
 
    ebuf = escapize_string(nim->iname) ;
-   sprintf( buf+strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
 
-   sprintf( buf+strlen(buf) , "  image_offset = '%" PRId64 "'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_offset = '%" PRId64 "'\n" ,
             nim->iname_offset );
 
-   sprintf( buf+strlen(buf), "  ndim = '%" PRId64 "'\n",nim->ndim);
-   sprintf( buf+strlen(buf), "  nx = '%" PRId64 "'\n",  nim->nx  );
+   snprintf( buf+strlen(buf), bufLen-strlen(buf), "  ndim = '%" PRId64 "'\n",nim->ndim);
+   snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nx = '%" PRId64 "'\n",  nim->nx  );
    if( nim->ndim > 1 )
-      sprintf( buf+strlen(buf), "  ny = '%" PRId64 "'\n",  nim->ny  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  ny = '%" PRId64 "'\n",  nim->ny  );
    if( nim->ndim > 2 )
-      sprintf( buf+strlen(buf), "  nz = '%" PRId64 "'\n",  nim->nz  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nz = '%" PRId64 "'\n",  nim->nz  );
    if( nim->ndim > 3 )
-      sprintf( buf+strlen(buf), "  nt = '%" PRId64 "'\n",  nim->nt  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nt = '%" PRId64 "'\n",  nim->nt  );
    if( nim->ndim > 4 )
-      sprintf( buf+strlen(buf), "  nu = '%" PRId64 "'\n",  nim->nu  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nu = '%" PRId64 "'\n",  nim->nu  );
    if( nim->ndim > 5 )
-      sprintf( buf+strlen(buf), "  nv = '%" PRId64 "'\n",  nim->nv  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nv = '%" PRId64 "'\n",  nim->nv  );
    if( nim->ndim > 6 )
-      sprintf( buf+strlen(buf), "  nw = '%" PRId64 "'\n",  nim->nw  );
-
-                       sprintf( buf+strlen(buf), "  dx = '%g'\n",   nim->dx  );
-   if( nim->ndim > 1 ) sprintf( buf+strlen(buf), "  dy = '%g'\n",   nim->dy  );
-   if( nim->ndim > 2 ) sprintf( buf+strlen(buf), "  dz = '%g'\n",   nim->dz  );
-   if( nim->ndim > 3 ) sprintf( buf+strlen(buf), "  dt = '%g'\n",   nim->dt  );
-   if( nim->ndim > 4 ) sprintf( buf+strlen(buf), "  du = '%g'\n",   nim->du  );
-   if( nim->ndim > 5 ) sprintf( buf+strlen(buf), "  dv = '%g'\n",   nim->dv  );
-   if( nim->ndim > 6 ) sprintf( buf+strlen(buf), "  dw = '%g'\n",   nim->dw  );
-
-   sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
-   sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nw = '%" PRId64 "'\n",  nim->nw  );
+
+                       snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dx = '%g'\n",   nim->dx  );
+   if( nim->ndim > 1 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dy = '%g'\n",   nim->dy  );
+   if( nim->ndim > 2 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dz = '%g'\n",   nim->dz  );
+   if( nim->ndim > 3 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dt = '%g'\n",   nim->dt  );
+   if( nim->ndim > 4 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  du = '%g'\n",   nim->du  );
+   if( nim->ndim > 5 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dv = '%g'\n",   nim->dv  );
+   if( nim->ndim > 6 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dw = '%g'\n",   nim->dw  );
+
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype_name = '%s'\n" ,
                               nifti_datatype_string(nim->datatype) ) ;
 
-   sprintf( buf+strlen(buf) , "  nvox = '%" PRId64 "'\n" ,  nim->nvox ) ;
-   sprintf( buf+strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nvox = '%" PRId64 "'\n" ,  nim->nvox ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
 
-   sprintf( buf+strlen(buf) , "  byteorder = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  byteorder = '%s'\n" ,
             (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ;
 
    if( nim->cal_min < nim->cal_max ){
-     sprintf( buf+strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
-     sprintf( buf+strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
    }
 
    if( nim->scl_slope != 0.0 ){
-     sprintf( buf+strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
-     sprintf( buf+strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
    }
 
    if( nim->intent_code > 0 ){
-     sprintf( buf+strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
-     sprintf( buf+strlen(buf) , "  intent_code_name = '%s'\n" ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code_name = '%s'\n" ,
                                 nifti_intent_string(nim->intent_code) ) ;
-     sprintf( buf+strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
 
      if( nim->intent_name[0] != '\0' ){
        ebuf = escapize_string(nim->intent_name) ;
-       sprintf( buf+strlen(buf) , "  intent_name = %s\n",ebuf) ;
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_name = %s\n",ebuf) ;
        free(ebuf) ;
      }
    }
 
    if( nim->toffset != 0.0 )
-     sprintf( buf+strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
 
    if( nim->xyz_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  xyz_units = '%d'\n"
               "  xyz_units_name = '%s'\n" ,
               nim->xyz_units , nifti_units_string(nim->xyz_units) ) ;
 
    if( nim->time_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  time_units = '%d'\n"
               "  time_units_name = '%s'\n" ,
               nim->time_units , nifti_units_string(nim->time_units) ) ;
 
    if( nim->freq_dim > 0 )
-     sprintf( buf+strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
    if( nim->phase_dim > 0 )
-     sprintf( buf+strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
    if( nim->slice_dim > 0 )
-     sprintf( buf+strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
    if( nim->slice_code > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_code = '%d'\n"
               "  slice_code_name = '%s'\n" ,
               nim->slice_code , nifti_slice_string(nim->slice_code) ) ;
    if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_start = '%" PRId64 "'\n"
               "  slice_end = '%" PRId64 "'\n",
               nim->slice_start , nim->slice_end ) ;
    if( nim->slice_duration != 0.0 )
-     sprintf( buf+strlen(buf) , "  slice_duration = '%g'\n",
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_duration = '%g'\n",
               nim->slice_duration ) ;
 
    if( nim->descrip[0] != '\0' ){
      ebuf = escapize_string(nim->descrip) ;
-     sprintf( buf+strlen(buf) , "  descrip = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  descrip = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->aux_file[0] != '\0' ){
      ebuf = escapize_string(nim->aux_file) ;
-     sprintf( buf+strlen(buf) , "  aux_file = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  aux_file = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->qform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  qform_code = '%d'\n"
               "  qform_code_name = '%s'\n"
      "  qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -8374,7 +8478,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] ,
          nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] ,
          nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] ,
@@ -8385,7 +8489,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] ,
          nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  quatern_b = '%g'\n"
               "  quatern_c = '%g'\n"
               "  quatern_d = '%g'\n"
@@ -8398,7 +8502,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
 
      nifti_dmat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  qform_i_orientation = '%s'\n"
                 "  qform_j_orientation = '%s'\n"
                 "  qform_k_orientation = '%s'\n" ,
@@ -8410,7 +8514,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
    if( nim->sform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  sform_code = '%d'\n"
               "  sform_code_name = '%s'\n"
      "  sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -8424,7 +8528,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] ,
          nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] ,
          nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] ,
@@ -8437,7 +8541,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
 
      nifti_dmat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  sform_i_orientation = '%s'\n"
                 "  sform_j_orientation = '%s'\n"
                 "  sform_k_orientation = '%s'\n" ,
@@ -8446,9 +8550,9 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
                 nifti_orientation_string(k)  ) ;
    }
 
-   sprintf( buf+strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
 
-   sprintf( buf+strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
 
    nbuf = (int)strlen(buf) ;
    buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
@@ -8485,7 +8589,7 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 /* macro to check lhs string against "n1"; if it matches,
    interpret rhs string as a number, and put it into nim->"n2" */
 
-#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)(strtod(rhs,NULL))
 
 /* same, but where "n1" == "n2" */
 
@@ -8903,7 +9007,8 @@ int64_t nifti2_read_collapsed_image( nifti_image * nim, const int64_t dims [8],
 {
    znzFile fp;
    int64_t prods[8];          /* sizes are bounded by dims[], so 8 */
-   int     pivots[8], nprods; /* sizes are bounded by dims[], so 8 */
+   int64_t pivots[8];         /* sizes are bounded by dims[], so 8 */
+   int     nprods;
    int64_t c, bytes;
 
    /** - check pointers for sanity */
@@ -8979,7 +9084,7 @@ compute_strides(int64_t *strides,const int64_t *size,int nbyper)
 /*---------------------------------------------------------------------------*/
 /*! read an arbitrary subregion from a nifti image
 
-    This function may be used to read a single arbitary subregion of any
+    This function may be used to read a single arbitrary subregion of any
     rectangular size from a nifti dataset, such as a small 5x5x5 subregion
     around the center of a 3D image.
 
@@ -9000,7 +9105,7 @@ compute_strides(int64_t *strides,const int64_t *size,int nbyper)
           speed and possibly repeated calls to this function.
     \return
         -  the total number of bytes read, or < 0 on failure
-        -  the read and byte-swapped data, in 'data'            </pre>
+        -  the read and byte-swapped data, in 'data'
 
     \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks
         nifti_image_load, nifti_read_collapsed_image
@@ -9062,6 +9167,12 @@ int64_t nifti2_read_subregion_image( nifti_image * nim,
 
   /* get the file open */
   fp = nifti_image_load_prep( nim );
+  if(znz_isnull(fp)) {
+    if(g_opts.debug > 0)
+      Rc_fprintf_stderr("** nifti_read_subregion_image, failed load_prep\n");
+    return -1;
+  }
+
   /* the current offset is just past the nifti header, save
    * location so that SEEK_SET can be used below
    */
@@ -9081,6 +9192,7 @@ int64_t nifti2_read_subregion_image( nifti_image * nim,
     if(g_opts.debug > 1)
       Rc_fprintf_stderr("allocation of %" PRId64 " bytes failed\n",
               total_alloc_size);
+    znzclose(fp);
     return -1;
   }
 
@@ -9120,11 +9232,11 @@ int64_t nifti2_read_subregion_image( nifti_image * nim,
               read_amount = rs[0] * nim->nbyper; /* read a row of subregion */
               nread = nifti_read_buffer(fp, readptr, read_amount, nim);
               if(nread != read_amount) {
-                if(g_opts.debug > 1) {
+                if(g_opts.debug > 0)
                   Rc_fprintf_stderr("read of %" PRId64 " bytes failed\n",
                           read_amount);
-                  return -1;
-                }
+                znzclose(fp);
+                return -1;
               }
             bytes += nread;
             readptr += read_amount;
@@ -9147,7 +9259,7 @@ int64_t nifti2_read_subregion_image( nifti_image * nim,
 
    return 0 on success, < 0 on failure
 */
-static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods,
+static int rci_read_data(nifti_image * nim, int64_t * pivots, int64_t * prods,
                          int nprods, const int64_t dims[], char * data,
                          znzFile fp, int64_t base_offset)
 {
@@ -9166,7 +9278,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods,
 
       /* make sure things look good here */
       if( *pivots != 0 ){
-         Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", *pivots);
+         Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", (int)*pivots);
          return -1;
       }
 
@@ -9269,13 +9381,11 @@ static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nb
    wants to collapse a dimension.  The last pivot should always be zero
    (note that we have space for that in the lists).
 */
-static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[],
+static int make_pivot_list(nifti_image *nim, const int64_t dims[], int64_t pivots[],
                                              int64_t prods[], int * nprods )
 {
-   int len, dind;
-
-   len = 0;
-   dind = nim->dim[0];
+   int len = 0;
+   int64_t dind = nim->dim[0];
    while( dind > 0 ){
       prods[len] = 1;
       while( dind > 0 && (nim->dim[dind] == 1 || dims[dind] == -1) ){
@@ -9299,7 +9409,7 @@ static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[],
    if( g_opts.debug > 2 ){
       Rc_fprintf_stderr("+d pivot list created, pivots :");
       for(dind = 0; dind < len; dind++)
-         Rc_fprintf_stderr(" %d", pivots[dind]);
+         Rc_fprintf_stderr(" %lld", (long long)pivots[dind]);
       Rc_fprintf_stderr(", prods :");
       for(dind = 0; dind < len; dind++)
          Rc_fprintf_stderr(" %" PRId64 "", prods[dind]);
diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h
index 946e6d4e..c8829dad 100644
--- a/reg-io/niftilib/nifti2_io.h
+++ b/reg-io/niftilib/nifti2_io.h
@@ -54,7 +54,7 @@ extern "C" {
 
       Mainly adding low-level IO and changing things to allow gzipped files
       to be read and written
-      Full backwards compatability should have been maintained
+      Full backwards compatibility should have been maintained
 
    ......................................................................
    Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health)
@@ -69,7 +69,7 @@ extern "C" {
 
       Converted to be based on nifti_2_header.
 
-      ** NOT BACKWARD COMPATABLE **
+      ** NOT BACKWARD COMPATIBLE **
 
       These routines will read/write both NIFTI-1 and NIFTI-2 image files,
       but modification to the _calling_ routies is necessary, since:
@@ -79,6 +79,11 @@ extern "C" {
         c. some routines have been changed to apply to multiple NIFTI types
 */
 
+/********************** file identification magic ****************************/
+
+extern char nifti1_magic[4];
+extern char nifti2_magic[8];
+
 /********************** Some sample data structures **************************/
 
 #if RNIFTI_NIFTILIB_VERSION == 2
@@ -462,8 +467,12 @@ int64_t      nifti2_read_subregion_image(nifti_image *nim, const int64_t *start_
                                         const int64_t *region_size, void ** data);
 
 void         nifti2_image_write   ( nifti_image * nim ) ;
+int          nifti2_image_write_status( nifti_image *nim ) ;  /* 7 Jun 2022 */
+
 void         nifti2_image_write_bricks(nifti_image * nim,
                                       const nifti_brick_list * NBL);
+int          nifti2_image_write_bricks_status(nifti_image * nim,
+                                             const nifti_brick_list * NBL);
 void         nifti2_image_infodump( const nifti_image * nim ) ;
 
 void         nifti2_disp_lib_hist( int ver ) ;  /* to display library history */
@@ -635,7 +644,9 @@ int    nifti_valid_header_size(int ni_ver, int whine);
 #define nifti_read_subregion_image      nifti2_read_subregion_image
 
 #define nifti_image_write               nifti2_image_write
+#define nifti_image_write_status        nifti2_image_write_status
 #define nifti_image_write_bricks        nifti2_image_write_bricks
+#define nifti_image_write_bricks_status nifti2_image_write_bricks_status
 #define nifti_image_infodump            nifti2_image_infodump
 
 #define nifti_disp_lib_hist             nifti2_disp_lib_hist
@@ -774,7 +785,7 @@ typedef struct {
     char const * const name;           /* text string to match #define */
 } nifti_type_ele;
 
-#undef  LNI_FERR /* local nifti file error, to be compact and repetative */
+#undef  LNI_FERR /* local nifti file error, to be compact and repetitive */
 #ifdef USING_R
 #define LNI_FERR(func,msg,file)                                      \
             Rf_warning("%s: %s '%s'\n",func,msg,file)
diff --git a/reg-io/niftilib/nifti2_io_version.h b/reg-io/niftilib/nifti2_io_version.h
new file mode 100644
index 00000000..8d0f3966
--- /dev/null
+++ b/reg-io/niftilib/nifti2_io_version.h
@@ -0,0 +1,16 @@
+/* NOTE:  When changing version consider the impact on versions in
+  nifti2_io_version.h nifti1_io_version.h nifticdf_version.h and znzlib.h
+*/
+#define NIFTI2_IO_VERSION_MAJOR 2
+#define NIFTI2_IO_VERSION_MINOR 1
+#define NIFTI2_IO_VERSION_PATCH 0
+
+/* main string macros: NIFTI2_IO_VERSION and NIFTI2_IO_SOURCE_VERSION */
+#define NIFTI2_IO_VERSION_TO_STRING(x) NIFTI2_IO_VERSION_TO_STRING0(x)
+#define NIFTI2_IO_VERSION_TO_STRING0(x) #x
+#define NIFTI2_IO_VERSION                                   \
+   NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_MAJOR)     \
+   "." NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_MINOR) \
+   "." NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_PATCH)
+
+#define NIFTI2_IO_SOURCE_VERSION "NIFTI2_IO version " NIFTI2_IO_VERSION
diff --git a/reg-io/znzlib/znzlib.c b/reg-io/znzlib/znzlib.c
index 170a6065..d8beaa2d 100644
--- a/reg-io/znzlib/znzlib.c
+++ b/reg-io/znzlib/znzlib.c
@@ -143,7 +143,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file)
     /* gzread/write take unsigned int length, so maybe read in int pieces
        (noted by M Hanke, example given by M Adler)   6 July 2010 [rickr] */
     while( remain > 0 ) {
-       n2read = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE;
+       n2read = (remain < ZNZ_MAX_BLOCK_SIZE) ? (unsigned)remain : ZNZ_MAX_BLOCK_SIZE;
        nread = gzread(file->zfptr, (void *)cbuf, n2read);
        if( nread < 0 ) return nread; /* returns -1 on error */
 
@@ -175,7 +175,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file)
 #ifdef HAVE_ZLIB
   if (file->zfptr!=NULL) {
     while( remain > 0 ) {
-       n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE;
+       n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? (unsigned)remain : ZNZ_MAX_BLOCK_SIZE;
        nwritten = gzwrite(file->zfptr, (const void *)cbuf, n2write);
 
        /* gzread returns 0 on error, but in case that ever changes... */
@@ -198,11 +198,11 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file)
   return fwrite(buf,size,nmemb,file->nzfptr);
 }
 
-long znzseek(znzFile file, long offset, int whence)
+znz_off_t znzseek(znzFile file, znz_off_t offset, int whence)
 {
   if (file==NULL) { return 0; }
 #ifdef HAVE_ZLIB
-  if (file->zfptr!=NULL) return (long) gzseek(file->zfptr,offset,whence);
+  if (file->zfptr!=NULL) return (znz_off_t) gzseek(file->zfptr,offset,whence);
 #endif
   return fseek(file->nzfptr,offset,whence);
 }
@@ -223,11 +223,11 @@ int znzrewind(znzFile stream)
   return 0;
 }
 
-long znztell(znzFile file)
+znz_off_t znztell(znzFile file)
 {
   if (file==NULL) { return 0; }
 #ifdef HAVE_ZLIB
-  if (file->zfptr!=NULL) return (long) gztell(file->zfptr);
+  if (file->zfptr!=NULL) return (znz_off_t) gztell(file->zfptr);
 #endif
   return ftell(file->nzfptr);
 }
diff --git a/reg-io/znzlib/znzlib.h b/reg-io/znzlib/znzlib.h
index d0e95aa1..78049a9a 100644
--- a/reg-io/znzlib/znzlib.h
+++ b/reg-io/znzlib/znzlib.h
@@ -46,6 +46,7 @@ extern "C" {
 #include <string.h>
 #include <stdarg.h>
 
+
 /* include optional check for HAVE_FDOPEN here, from deleted config.h:
 
    uncomment the following line if fdopen() exists for your compiler and
@@ -53,6 +54,18 @@ extern "C" {
 */
 /* #define HAVE_FDOPEN */
 
+#if defined(WIN32) || defined(WIN64) || defined(_WIN32) || defined(_WIN64) || defined(_MSVC) || defined(_MSC_VER)
+#include <io.h>
+#define fseek _fseeki64
+#define ftell _ftelli64
+#define znz_off_t long long
+#elif defined(__APPLE__) || defined(__FreeBSD__)
+#define znz_off_t off_t
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#define znz_off_t off_t
+#endif
 
 #ifdef HAVE_ZLIB
 #if defined(ITKZLIB) && !defined(ITK_USE_SYSTEM_ZLIB)
@@ -96,11 +109,11 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file);
 
 size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file);
 
-long znzseek(znzFile file, long offset, int whence);
+znz_off_t znzseek(znzFile file, znz_off_t offset, int whence);
 
 int znzrewind(znzFile stream);
 
-long znztell(znzFile file);
+znz_off_t znztell(znzFile file);
 
 int znzputs(const char *str, znzFile file);
 

From 3abfaaa1f9c9ca726ac2cc67c90aa9e4bf763d9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 16 Feb 2024 20:22:14 +0000
Subject: [PATCH 289/314] Fix zlib compilation error

---
 niftyreg_build_version.txt | 2 +-
 reg-io/zlib/zconf.h        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 74fa38c9..92c732d0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-407
+408
diff --git a/reg-io/zlib/zconf.h b/reg-io/zlib/zconf.h
index 62adc8d8..58b9fd7b 100644
--- a/reg-io/zlib/zconf.h
+++ b/reg-io/zlib/zconf.h
@@ -475,7 +475,7 @@ typedef uLong FAR uLongf;
 #  endif
 #endif
 #ifndef Z_HAVE_UNISTD_H
-#  if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32)
+#  ifndef _WIN32
 #    define Z_HAVE_UNISTD_H
 #  endif
 #endif

From f21c5fb2094ca1cf0bc0bc312c2577251943fbbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Feb 2024 12:51:24 +0000
Subject: [PATCH 290/314] Fix linting issues of reg_png

---
 niftyreg_build_version.txt |   2 +-
 reg-io/png/CMakeLists.txt  |   2 +-
 reg-io/png/readpng.cpp     | 317 ---------------------------------
 reg-io/png/readpng.h       |  91 ----------
 reg-io/png/reg_png.cpp     | 352 +++++++++++++++++--------------------
 reg-io/png/reg_png.h       |   6 +-
 6 files changed, 170 insertions(+), 600 deletions(-)
 delete mode 100644 reg-io/png/readpng.cpp
 delete mode 100644 reg-io/png/readpng.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 92c732d0..102c15d5 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-408
+409
diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt
index 56f0424f..8804ac61 100644
--- a/reg-io/png/CMakeLists.txt
+++ b/reg-io/png/CMakeLists.txt
@@ -54,7 +54,7 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
     install(FILES ${png_hdrs} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development)
 endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP)
 
-add_library(reg_png reg_png.cpp readpng.cpp)
+add_library(reg_png reg_png.cpp)
 target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools)
 install(TARGETS reg_png
         RUNTIME DESTINATION bin COMPONENT Development
diff --git a/reg-io/png/readpng.cpp b/reg-io/png/readpng.cpp
deleted file mode 100644
index e5614a75..00000000
--- a/reg-io/png/readpng.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-/*---------------------------------------------------------------------------
-
-   rpng - simple PNG display program                              readpng.c
-
-  ---------------------------------------------------------------------------
-
-      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
-
-      This software is provided "as is," without warranty of any kind,
-      express or implied.  In no event shall the author or contributors
-      be held liable for any damages arising in any way from the use of
-      this software.
-
-      The contents of this file are DUAL-LICENSED.  You may modify and/or
-      redistribute this software according to the terms of one of the
-      following two licenses (at your option):
-
-
-      LICENSE 1 ("BSD-like with advertising clause"):
-
-      Permission is granted to anyone to use this software for any purpose,
-      including commercial applications, and to alter it and redistribute
-      it freely, subject to the following restrictions:
-
-      1. Redistributions of source code must retain the above copyright
-         notice, disclaimer, and this list of conditions.
-      2. Redistributions in binary form must reproduce the above copyright
-         notice, disclaimer, and this list of conditions in the documenta-
-         tion and/or other materials provided with the distribution.
-      3. All advertising materials mentioning features or use of this
-         software must display the following acknowledgment:
-
-            This product includes software developed by Greg Roelofs
-            and contributors for the book, "PNG: The Definitive Guide,"
-            published by O'Reilly and Associates.
-
-
-      LICENSE 2 (GNU GPL v2 or later):
-
-      This program is free software; you can redistribute it and/or modify
-      it under the terms of the GNU General Public License as published by
-      the Free Software Foundation; either version 2 of the License, or
-      (at your option) any later version.
-
-      This program is distributed in the hope that it will be useful,
-      but WITHOUT ANY WARRANTY; without even the implied warranty of
-      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-      GNU General Public License for more details.
-
-      You should have received a copy of the GNU General Public License
-      along with this program; if not, write to the Free Software Foundation,
-      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  ---------------------------------------------------------------------------*/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "readpng.h"    /* typedefs, common macros, public prototypes */
-
-/* future versions of libpng will provide this macro: */
-#ifndef png_jmpbuf
-#  define png_jmpbuf(png_ptr)   ((png_ptr)->jmpbuf)
-#endif
-
-
-static png_structp png_ptr = nullptr;
-static png_infop info_ptr = nullptr;
-
-png_uint_32  width, height;
-int  bit_depth, color_type;
-uch  *image_data = nullptr;
-
-
-void readpng_version_info(void)
-{
-   fprintf(stderr, "   Compiled with libpng %s; using libpng %s.\n",
-           PNG_LIBPNG_VER_STRING, png_libpng_ver);
-   fprintf(stderr, "   Compiled with zlib %s; using zlib %s.\n",
-           ZLIB_VERSION, zlib_version);
-}
-
-
-/* return value = 0 for success, 1 for bad sig, 2 for bad IHDR, 4 for no mem */
-
-int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight)
-{
-   uch sig[8];
-
-
-   /* first do a quick check that the file really is a PNG image; could
-    * have used slightly more general png_sig_cmp() function instead */
-
-   if(!fread(sig, 1, 8, infile))
-      return 1;
-   if (!png_check_sig(sig, 8))
-      return 1;   /* bad signature */
-
-
-   /* could pass pointers to user-defined error handlers instead of NULLs: */
-
-   png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
-   if (!png_ptr)
-      return 4;   /* out of memory */
-
-   info_ptr = png_create_info_struct(png_ptr);
-   if (!info_ptr)
-   {
-      png_destroy_read_struct(&png_ptr, nullptr, nullptr);
-      return 4;   /* out of memory */
-   }
-
-
-   /* we could create a second info struct here (end_info), but it's only
-    * useful if we want to keep pre- and post-IDAT chunk info separated
-    * (mainly for PNG-aware image editors and converters) */
-
-
-   /* setjmp() must be called in every function that calls a PNG-reading
-    * libpng function */
-
-   if (setjmp(png_jmpbuf(png_ptr)))
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      return 2;
-   }
-
-
-   png_init_io(png_ptr, infile);
-   png_set_sig_bytes(png_ptr, 8);  /* we already read the 8 signature bytes */
-
-   png_read_info(png_ptr, info_ptr);  /* read all PNG info up to image data */
-
-
-   /* alternatively, could make separate calls to png_get_image_width(),
-    * etc., but want bit_depth and color_type for later [don't care about
-    * compression_type and filter_type => NULLs] */
-
-   png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type,
-                nullptr, nullptr, nullptr);
-   *pWidth = width;
-   *pHeight = height;
-
-
-   /* OK, that's all we need for now; return happy */
-
-   return 0;
-}
-
-
-
-
-/* returns 0 if succeeds, 1 if fails due to no bKGD chunk, 2 if libpng error;
- * scales values to 8-bit if necessary */
-
-int readpng_get_bgcolor(uch *red, uch *green, uch *blue)
-{
-   png_color_16p pBackground;
-
-
-   /* setjmp() must be called in every function that calls a PNG-reading
-    * libpng function */
-
-   if (setjmp(png_jmpbuf(png_ptr)))
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      return 2;
-   }
-
-
-   if (!png_get_valid(png_ptr, info_ptr, PNG_INFO_bKGD))
-      return 1;
-
-   /* it is not obvious from the libpng documentation, but this function
-    * takes a pointer to a pointer, and it always returns valid red, green
-    * and blue values, regardless of color_type: */
-
-   png_get_bKGD(png_ptr, info_ptr, &pBackground);
-
-
-   /* however, it always returns the raw bKGD data, regardless of any
-    * bit-depth transformations, so check depth and adjust if necessary */
-
-   if (bit_depth == 16)
-   {
-      *red   = pBackground->red   >> 8;
-      *green = pBackground->green >> 8;
-      *blue  = pBackground->blue  >> 8;
-   }
-   else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
-   {
-      if (bit_depth == 1)
-         *red = *green = *blue = pBackground->gray? 255 : 0;
-      else if (bit_depth == 2)
-         *red = *green = *blue = (255/3) * pBackground->gray;
-      else /* bit_depth == 4 */
-         *red = *green = *blue = (255/15) * pBackground->gray;
-   }
-   else
-   {
-      *red   = (uch)pBackground->red;
-      *green = (uch)pBackground->green;
-      *blue  = (uch)pBackground->blue;
-   }
-
-   return 0;
-}
-
-
-
-
-/* display_exponent == LUT_exponent * CRT_exponent */
-
-uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes)
-{
-   double  gamma;
-   png_uint_32  i, rowbytes;
-   png_bytepp  row_pointers = nullptr;
-
-
-   /* setjmp() must be called in every function that calls a PNG-reading
-    * libpng function */
-
-   if (setjmp(png_jmpbuf(png_ptr)))
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      return nullptr;
-   }
-
-
-   /* expand palette images to RGB, low-bit-depth grayscale images to 8 bits,
-    * transparency chunks to full alpha channel; strip 16-bit-per-sample
-    * images to 8 bits per sample; and convert grayscale to RGB[A] */
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-      png_set_expand(png_ptr);
-   if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
-      png_set_expand(png_ptr);
-   if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS))
-      png_set_expand(png_ptr);
-   if (bit_depth == 16)
-      png_set_strip_16(png_ptr);
-   if (color_type == PNG_COLOR_TYPE_GRAY ||
-         color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      png_set_gray_to_rgb(png_ptr);
-
-
-   /* unlike the example in the libpng documentation, we have *no* idea where
-    * this file may have come from--so if it doesn't have a file gamma, don't
-    * do any correction ("do no harm") */
-
-   if (png_get_gAMA(png_ptr, info_ptr, &gamma))
-      png_set_gamma(png_ptr, display_exponent, gamma);
-
-
-   /* all transformations have been registered; now update info_ptr data,
-    * get rowbytes and channels, and allocate image memory */
-
-   png_read_update_info(png_ptr, info_ptr);
-
-   *pRowbytes = rowbytes = png_get_rowbytes(png_ptr, info_ptr);
-   *pChannels = (int)png_get_channels(png_ptr, info_ptr);
-
-   if ((image_data = (uch *)malloc(rowbytes*height)) == nullptr)
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      return nullptr;
-   }
-   if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == nullptr)
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      free(image_data);
-      image_data = nullptr;
-      return nullptr;
-   }
-
-   Trace((stderr, "readpng_get_image:  channels = %d, rowbytes = %ld, height = %ld\n", *pChannels, rowbytes, height));
-
-
-   /* set the individual row_pointers to point at the correct offsets */
-
-   for (i = 0;  i < height;  ++i)
-      row_pointers[i] = image_data + i*rowbytes;
-
-
-   /* now we can go ahead and just read the whole image */
-
-   png_read_image(png_ptr, row_pointers);
-
-
-   /* and we're done!  (png_read_end() can be omitted if no processing of
-    * post-IDAT text/time/etc. is desired) */
-
-   free(row_pointers);
-   row_pointers = nullptr;
-
-   png_read_end(png_ptr, nullptr);
-
-   return image_data;
-}
-
-
-void readpng_cleanup(int free_image_data)
-{
-   if (free_image_data && image_data)
-   {
-      free(image_data);
-      image_data = nullptr;
-   }
-
-   if (png_ptr && info_ptr)
-   {
-      png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-      png_ptr = nullptr;
-      info_ptr = nullptr;
-   }
-}
diff --git a/reg-io/png/readpng.h b/reg-io/png/readpng.h
deleted file mode 100644
index 3d6f4ee8..00000000
--- a/reg-io/png/readpng.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*---------------------------------------------------------------------------
-
-   rpng - simple PNG display program                              readpng.h
-
-  ---------------------------------------------------------------------------
-
-      Copyright (c) 1998-2007 Greg Roelofs.  All rights reserved.
-
-      This software is provided "as is," without warranty of any kind,
-      express or implied.  In no event shall the author or contributors
-      be held liable for any damages arising in any way from the use of
-      this software.
-
-      The contents of this file are DUAL-LICENSED.  You may modify and/or
-      redistribute this software according to the terms of one of the
-      following two licenses (at your option):
-
-
-      LICENSE 1 ("BSD-like with advertising clause"):
-
-      Permission is granted to anyone to use this software for any purpose,
-      including commercial applications, and to alter it and redistribute
-      it freely, subject to the following restrictions:
-
-      1. Redistributions of source code must retain the above copyright
-         notice, disclaimer, and this list of conditions.
-      2. Redistributions in binary form must reproduce the above copyright
-         notice, disclaimer, and this list of conditions in the documenta-
-         tion and/or other materials provided with the distribution.
-      3. All advertising materials mentioning features or use of this
-         software must display the following acknowledgment:
-
-            This product includes software developed by Greg Roelofs
-            and contributors for the book, "PNG: The Definitive Guide,"
-            published by O'Reilly and Associates.
-
-
-      LICENSE 2 (GNU GPL v2 or later):
-
-      This program is free software; you can redistribute it and/or modify
-      it under the terms of the GNU General Public License as published by
-      the Free Software Foundation; either version 2 of the License, or
-      (at your option) any later version.
-
-      This program is distributed in the hope that it will be useful,
-      but WITHOUT ANY WARRANTY; without even the implied warranty of
-      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-      GNU General Public License for more details.
-
-      You should have received a copy of the GNU General Public License
-      along with this program; if not, write to the Free Software Foundation,
-      Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-  ---------------------------------------------------------------------------*/
-
-#include "png.h"        /* libpng header; includes zlib.h */
-#include "zlib.h"
-
-#ifndef TRUE
-#  define TRUE 1
-#  define FALSE 0
-#endif
-
-#ifndef MAX
-#  define MAX(a,b)  ((a) > (b)? (a) : (b))
-#  define MIN(a,b)  ((a) < (b)? (a) : (b))
-#endif
-
-#ifdef DEBUG
-#  define Trace(x)  {fprintf x ; fflush(stderr); fflush(stdout);}
-#else
-#  define Trace(x)  ;
-#endif
-
-typedef unsigned char   uch;
-typedef unsigned short  ush;
-typedef unsigned long   ulg;
-
-
-/* prototypes for public functions in readpng.c */
-
-void readpng_version_info(void);
-
-int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight);
-
-int readpng_get_bgcolor(uch *bg_red, uch *bg_green, uch *bg_blue);
-
-uch *readpng_get_image(double display_exponent, int *pChannels,
-                       ulg *pRowbytes);
-
-void readpng_cleanup(int free_image_data);
diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp
index 53c28b1b..0ef067d5 100644
--- a/reg-io/png/reg_png.cpp
+++ b/reg-io/png/reg_png.cpp
@@ -11,173 +11,154 @@
  */
 
 #include "reg_png.h"
-#include "readpng.h"
+#include "png.h"
+
+using uch = unsigned char;
+using ulg = unsigned long;
 
 /* *************************************************************** */
-nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData)
-{
-   // We first read the png file
-   FILE *pngFile=nullptr;
-   pngFile = fopen(pngFileName, "rb");
-   if(pngFile==nullptr)
-      NR_FATAL_ERROR("Can not open the png file: "s + pngFileName);
-
-   uch sig[8];
-   if (!fread(sig, 1, 8, pngFile))
-      NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName);
-   if (!png_check_sig(sig, 8))
-      NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName);
-   rewind(pngFile);
-
-   png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
-   if (!png_ptr)
-      NR_FATAL_ERROR("Error when reading the png file - out of memory");
-
-   png_infop info_ptr = png_create_info_struct(png_ptr);
-   if (!info_ptr)
-   {
-      png_destroy_read_struct(&png_ptr, nullptr, nullptr);
-      NR_FATAL_ERROR("Error when reading the png file - out of memory");
-   }
-
-   png_init_io(png_ptr, pngFile);
-   png_read_info(png_ptr, info_ptr);
-
-   png_uint_32 Width, Height;
-   int bit_depth, color_type;
-   png_get_IHDR(png_ptr, info_ptr, &Width, &Height, &bit_depth,
-                &color_type, nullptr, nullptr, nullptr);
-
-   int Channels;
-   ulg rowbytes;
-
-   if (color_type == PNG_COLOR_TYPE_PALETTE)
-      png_set_expand(png_ptr);
-   if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
-      png_set_expand(png_ptr);
-   if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS))
-      png_set_expand(png_ptr);
-
-   if (bit_depth == 16)
-      png_set_strip_16(png_ptr);
-   if (color_type == PNG_COLOR_TYPE_GRAY ||
-       color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
-      png_set_gray_to_rgb(png_ptr);
-
-   png_bytep *row_pointers= new png_bytep[Height];
-
-   png_read_update_info(png_ptr, info_ptr);
-
-   rowbytes = png_get_rowbytes(png_ptr, info_ptr);
-   Channels = (int)png_get_channels(png_ptr, info_ptr);
-
-   if(Channels > 3)
-      NR_WARN_WFCT("The PNG file has " << Channels << " channels. Only the first three are considered for RGB to gray conversion.");
-   else if(Channels == 2)
-      NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel");
-
-   int dim[8]= {2,static_cast<int>(Width),static_cast<int>(Height),1,1,1,1,1};
-   nifti_image *niiImage=nullptr;
-   if(readData)
-   {
-
-      uch *image_data;
-      if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr)
-         NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName);
-
-      for (png_uint_32 i=0; i<Height; ++i)
-      {
-         row_pointers[i] = image_data + i*rowbytes;
-      }
-
-      png_read_image(png_ptr, row_pointers);
-      png_read_end(png_ptr, nullptr);
-
-      niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,true);
-      uch *niiPtr=static_cast<uch *>(niiImage->data);
-      for(size_t i=0; i<niiImage->nvox; ++i) niiPtr[i]=0;
-      // Define some weight to create a gray scale image
-      float rgb2grayWeight[3];
-      if(Channels==1)
-      {
-         rgb2grayWeight[0]=1;
-      }
-      else if(Channels==2)
-      {
-         rgb2grayWeight[0]=0.5;
-         rgb2grayWeight[1]=0.5;
-      }
-      if(Channels>=3)  // rgb to y
-      {
-         rgb2grayWeight[0]=0.299;
-         rgb2grayWeight[1]=0.587;
-         rgb2grayWeight[2]=0.114;
-      }
-      for(int c=0; c<(Channels<3?Channels:3); ++c)
-      {
-         for(png_uint_32 h=0; h<Height; ++h)
-         {
-            for(png_uint_32 w=0; w<Width; ++w)
-            {
-               niiPtr[h*niiImage->nx+w] += (uch)((float)row_pointers[h][w*Channels+c]*rgb2grayWeight[c]);
-            }
-         }
-      }
-   }
-   else
-   {
-      niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,false);
-   }
-   delete []row_pointers;
-   png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
-   fclose (pngFile);
-
-   nifti_set_filenames(niiImage, pngFileName,0,0);
-   return niiImage;
+nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) {
+    // We first read the png file
+    FILE *pngFile = nullptr;
+    pngFile = fopen(pngFileName, "rb");
+    if (pngFile == nullptr)
+        NR_FATAL_ERROR("Can not open the png file: "s + pngFileName);
+
+    uch sig[8];
+    if (!fread(sig, 1, 8, pngFile))
+        NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName);
+    if (!png_check_sig(sig, 8))
+        NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName);
+    rewind(pngFile);
+
+    png_structp pngPtr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
+    if (!pngPtr)
+        NR_FATAL_ERROR("Error when reading the png file - out of memory");
+
+    png_infop infoPtr = png_create_info_struct(pngPtr);
+    if (!infoPtr) {
+        png_destroy_read_struct(&pngPtr, nullptr, nullptr);
+        NR_FATAL_ERROR("Error when reading the png file - out of memory");
+    }
+
+    png_init_io(pngPtr, pngFile);
+    png_read_info(pngPtr, infoPtr);
+
+    png_uint_32 width, height;
+    int bitDepth, colorType;
+    png_get_IHDR(pngPtr, infoPtr, &width, &height, &bitDepth, &colorType, nullptr, nullptr, nullptr);
+
+    int channels;
+    ulg rowBytes;
+
+    if (colorType == PNG_COLOR_TYPE_PALETTE)
+        png_set_expand(pngPtr);
+    if (colorType == PNG_COLOR_TYPE_GRAY && bitDepth < 8)
+        png_set_expand(pngPtr);
+    if (png_get_valid(pngPtr, infoPtr, PNG_INFO_tRNS))
+        png_set_expand(pngPtr);
+
+    if (bitDepth == 16)
+        png_set_strip_16(pngPtr);
+    if (colorType == PNG_COLOR_TYPE_GRAY ||
+        colorType == PNG_COLOR_TYPE_GRAY_ALPHA)
+        png_set_gray_to_rgb(pngPtr);
+
+    unique_ptr<png_bytep[]> rowPointers(new png_bytep[height]);
+
+    png_read_update_info(pngPtr, infoPtr);
+
+    rowBytes = png_get_rowbytes(pngPtr, infoPtr);
+    channels = (int)png_get_channels(pngPtr, infoPtr);
+
+    if (channels > 3)
+        NR_WARN_WFCT("The PNG file has " << channels << " channels. Only the first three are considered for RGB to gray conversion.");
+    else if (channels == 2)
+        NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel");
+
+    const int dim[8] = { 2, static_cast<int>(width), static_cast<int>(height), 1, 1, 1, 1, 1 };
+    nifti_image *niiImage = nullptr;
+    if (readData) {
+
+        uch *image_data = static_cast<uch*>(malloc(width * height * channels * sizeof(uch)));
+        if (image_data == nullptr)
+            NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName);
+
+        for (png_uint_32 i = 0; i < height; i++)
+            rowPointers[i] = image_data + i * rowBytes;
+
+        png_read_image(pngPtr, rowPointers.get());
+        png_read_end(pngPtr, nullptr);
+
+        niiImage = nifti_make_new_nim(dim, NIFTI_TYPE_UINT8, true);
+        uch *niiPtr = static_cast<uch*>(niiImage->data);
+        for (size_t i = 0; i < niiImage->nvox; ++i) niiPtr[i] = 0;
+        // Define some weight to create a gray scale image
+        float rgb2grayWeight[3];
+        if (channels == 1) {
+            rgb2grayWeight[0] = 1;
+        } else if (channels == 2) {
+            rgb2grayWeight[0] = 0.5;
+            rgb2grayWeight[1] = 0.5;
+        }
+        if (channels >= 3) {  // rgb to y
+            rgb2grayWeight[0] = 0.299;
+            rgb2grayWeight[1] = 0.587;
+            rgb2grayWeight[2] = 0.114;
+        }
+        for (int c = 0; c < (channels < 3 ? channels : 3); c++)
+            for (png_uint_32 h = 0; h < height; h++)
+                for (png_uint_32 w = 0; w < width; w++)
+                    niiPtr[h * niiImage->nx + w] += static_cast<uch>((float)rowPointers[h][w * channels + c] * rgb2grayWeight[c]);
+    } else {
+        niiImage = nifti_make_new_nim(dim, NIFTI_TYPE_UINT8, false);
+    }
+    png_destroy_read_struct(&pngPtr, &infoPtr, nullptr);
+    fclose(pngFile);
+
+    nifti_set_filenames(niiImage, pngFileName, 0, 0);
+    return niiImage;
 }
-
 /* *************************************************************** */
-void reg_io_writePNGfile(nifti_image *image, const char *filename)
-{
-   // We first check the nifti image dimension
-   if(image->nz>1 || image->nt>1 || image->nu>1 || image->nv>1 || image->nw>1)
-      NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png");
-
-   // Check the min and max values of the nifti image
-   float minValue = reg_tools_getMinValue(image, -1);
-   float maxValue = reg_tools_getMaxValue(image, -1);
-
-   // Rescale the image intensities if they are outside of the range
-   if(minValue<0 || maxValue>255)
-   {
-      reg_intensityRescale(image, 0, 0, 255);
-      NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255].");
-   }
-
-   // The nifti image is converted as unsigned char if required
-   if(image->datatype!=NIFTI_TYPE_UINT8)
-      reg_tools_changeDatatype<uch>(image);
-
-   // Create pointer the nifti image data
-   uch *niiImgPtr = static_cast<uch *>(image->data);
-
-   // Check first if the png file can be writen
-   FILE *fp=fopen(filename, "wb");
-   if(!fp)
-      NR_FATAL_ERROR("The png file can not be written: "s + filename);
-
-   // The png file structures are created
-   png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
-   if (png_ptr==nullptr)
-      NR_FATAL_ERROR("The png pointer could not be created");
-
-   png_infop info_ptr = png_create_info_struct (png_ptr);
-   if(info_ptr==nullptr)
-      NR_FATAL_ERROR("The png structure could not be created");
-
-   // Set the png header information
-   png_set_IHDR (png_ptr,
-                 info_ptr,
+void reg_io_writePNGfile(nifti_image *image, const char *filename) {
+    // We first check the nifti image dimension
+    if (image->nz > 1 || image->nt > 1 || image->nu > 1 || image->nv > 1 || image->nw > 1)
+        NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png");
+
+    // Check the min and max values of the nifti image
+    float minValue = reg_tools_getMinValue(image, -1);
+    float maxValue = reg_tools_getMaxValue(image, -1);
+
+    // Rescale the image intensities if they are outside of the range
+    if (minValue < 0 || maxValue > 255) {
+        reg_intensityRescale(image, 0, 0, 255);
+        NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255].");
+    }
+
+    // The nifti image is converted as unsigned char if required
+    if (image->datatype != NIFTI_TYPE_UINT8)
+        reg_tools_changeDatatype<uch>(image);
+
+    // Create pointer the nifti image data
+    uch *niiImgPtr = static_cast<uch*>(image->data);
+
+    // Check first if the png file can be writen
+    FILE *fp = fopen(filename, "wb");
+    if (!fp)
+        NR_FATAL_ERROR("The png file can not be written: "s + filename);
+
+    // The png file structures are created
+    png_structp pngPtr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
+    if (pngPtr == nullptr)
+        NR_FATAL_ERROR("The png pointer could not be created");
+
+    png_infop infoPtr = png_create_info_struct(pngPtr);
+    if (infoPtr == nullptr)
+        NR_FATAL_ERROR("The png structure could not be created");
+
+    // Set the png header information
+    png_set_IHDR(pngPtr,
+                 infoPtr,
                  image->nx, // width
                  image->ny, // height
                  8, // depth
@@ -185,29 +166,26 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename)
                  PNG_INTERLACE_NONE,
                  PNG_COMPRESSION_TYPE_DEFAULT,
                  PNG_FILTER_TYPE_DEFAULT);
-   // The rows of the png are intialised
-   png_byte **row_pointers = (png_byte **)png_malloc(png_ptr, image->ny*sizeof(png_byte *));
-   // The data are copied over from the nifti structure to the png structure
-   size_t niiIndex=0;
-   for (int y = 0; y < image->ny; ++y)
-   {
-      png_byte *row = (png_byte *)png_malloc(png_ptr, sizeof(uch)*image->nx);
-      row_pointers[y] = row;
-      for (int x = 0; x < image->nx; ++x)
-      {
-         *row++ = niiImgPtr[niiIndex++];
-      }
-   }
-   // Write the image data to the file
-   png_init_io (png_ptr, fp);
-   png_set_rows (png_ptr, info_ptr, row_pointers);
-   png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, nullptr);
-   // Free the allocated png arrays
-   for(int y=0; y<image->ny; ++y)
-      png_free(png_ptr, row_pointers[y]);
-   png_free(png_ptr, row_pointers);
-   png_destroy_write_struct(&png_ptr, &info_ptr);
-   // Finally close the file on the hard-drive
-   fclose (fp);
+    // The rows of the png are intialised
+    png_byte **rowPointers = static_cast<png_byte**>(png_malloc(pngPtr, image->ny * sizeof(png_byte*)));
+    // The data are copied over from the nifti structure to the png structure
+    size_t niiIndex = 0;
+    for (int y = 0; y < image->ny; y++) {
+        png_byte *row = static_cast<png_byte*>(png_malloc(pngPtr, sizeof(uch) * image->nx));
+        rowPointers[y] = row;
+        for (int x = 0; x < image->nx; x++)
+            *row++ = niiImgPtr[niiIndex++];
+    }
+    // Write the image data to the file
+    png_init_io(pngPtr, fp);
+    png_set_rows(pngPtr, infoPtr, rowPointers);
+    png_write_png(pngPtr, infoPtr, PNG_TRANSFORM_IDENTITY, nullptr);
+    // Free the allocated png arrays
+    for (int y = 0; y < image->ny; y++)
+        png_free(pngPtr, rowPointers[y]);
+    png_free(pngPtr, rowPointers);
+    png_destroy_write_struct(&pngPtr, &infoPtr);
+    // Finally close the file on the hard-drive
+    fclose(fp);
 }
 /* *************************************************************** */
diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h
index ad94cc21..cad9a485 100644
--- a/reg-io/png/reg_png.h
+++ b/reg-io/png/reg_png.h
@@ -17,8 +17,8 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-/** @brief This function read a png file from the hard-drive and convert
-  * it into a nifti_structure. using this function, you can either
+/** @brief This function reads a png file from the hard-drive and converts
+  * it into a nifti_structure. Using this function, you can either
   * read the full image or only the header information
   * @param filename Filename of the png file to read
   * @param readData The actual data is read if the flag is set to true
@@ -26,7 +26,7 @@
   */
 nifti_image *reg_io_readPNGfile(const char *filename, bool readData);
 /* *************************************************************** */
-/** @brief This function first convert a nifti image into a png and then
+/** @brief This function first converts a nifti image into a png and then
   * save the png file.
   * @param image Nifti image that will first be converted to a png file
   * and then will be saved on the disk

From 1fde5bb349e3259bba3eef6a68d01bc69d4de0e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Feb 2024 14:35:31 +0000
Subject: [PATCH 291/314] Fix linting issues of nifti1_io

---
 niftyreg_build_version.txt  |  2 +-
 reg-io/niftilib/nifti1_io.c | 21 +++++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 102c15d5..17e344e7 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-409
+410
diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c
index 5237bb76..b557b702 100644
--- a/reg-io/niftilib/nifti1_io.c
+++ b/reg-io/niftilib/nifti1_io.c
@@ -3229,13 +3229,14 @@ static int fileext_compare(const char * test_ext, const char * known_ext)
 {
    char caps[8] = "";
    size_t c,len;
+
+   /* if anything odd, use default */
+   if( !test_ext || !known_ext ) return -1;
+
    /* if equal, don't need to check case (store to avoid multiple calls) */
    const int cmp = strcmp(test_ext, known_ext);
    if( cmp == 0 ) return cmp;
 
-   /* if anything odd, use default */
-   if( !test_ext || !known_ext ) return cmp;
-
    len = strlen(known_ext);
    if( len > 7 ) return cmp;
 
@@ -3254,13 +3255,14 @@ static int fileext_n_compare(const char * test_ext,
 {
    char caps[8] = "";
    size_t c,len;
+
+   /* if anything odd, use default */
+   if( !test_ext || !known_ext ) return -1;
+
    /* if equal, don't need to check case (store to avoid multiple calls) */
    const int  cmp = strncmp(test_ext, known_ext, maxlen);
    if( cmp == 0 ) return cmp;
 
-   /* if anything odd, use default */
-   if( !test_ext || !known_ext ) return cmp;
-
    len = strlen(known_ext);
    if( len > maxlen ) len = maxlen;     /* ignore anything past maxlen */
    if( len > 7 ) return cmp;
@@ -6437,8 +6439,11 @@ char *nifti_image_to_ascii( const nifti_image *nim )
    snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
 
    nbuf = (int)strlen(buf) ;
-   buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
-   if( !buf ) Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n",nbuf+1);
+   char *temp = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
+   if (temp)
+      buf = temp; // cppcheck-suppress memleak // false negative
+   else
+      Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n", nbuf+1);
    return buf ;
 #endif
 }

From b90d0d5c7756a6cf1836aa7ca9d61d9bd22eedff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Feb 2024 15:36:36 +0000
Subject: [PATCH 292/314] Enable inline suppressions for static code analysis

---
 .github/workflows/analysis.yml | 2 +-
 niftyreg_build_version.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
index 2cce5a89..ae2d6c3e 100644
--- a/.github/workflows/analysis.yml
+++ b/.github/workflows/analysis.yml
@@ -53,7 +53,7 @@ jobs:
             REPORT_PR_CHANGES_ONLY: false
         run: |
             analysis_file="analysis.txt"
-            cppcheck_params="--enable=warning --check-level=exhaustive --suppress=internalError --suppress=internalAstError"
+            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError"
             cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file
             # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately
             find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 17e344e7..617de7ea 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-410
+411

From 7d1f3f869c26b6dfb4997258cd4e297b6476bd2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Feb 2024 15:39:10 +0000
Subject: [PATCH 293/314] Exclude Eigen library from static code analysis

---
 .github/workflows/analysis.yml | 2 +-
 niftyreg_build_version.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
index ae2d6c3e..5c085d3d 100644
--- a/.github/workflows/analysis.yml
+++ b/.github/workflows/analysis.yml
@@ -53,7 +53,7 @@ jobs:
             REPORT_PR_CHANGES_ONLY: false
         run: |
             analysis_file="analysis.txt"
-            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError"
+            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen3/*"
             cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file
             # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately
             find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 617de7ea..ddabef86 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-411
+412

From 6cbbccd4d1452ad7870126a3f455164888dcd703 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Feb 2024 15:44:31 +0000
Subject: [PATCH 294/314] Enable CRT secure warnings

---
 CMakeLists.txt             | 2 --
 niftyreg_build_version.txt | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3601fb55..f1cfa291 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,8 +50,6 @@ if(GIT_FOUND)
 endif(GIT_FOUND)
 #-----------------------------------------------------------------------------
 if(MSVC)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
   set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
 endif(MSVC)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ddabef86..36352541 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-412
+413

From f5e227f72929b7d6dd19c0dbd53b604474e3a444 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 20 Feb 2024 16:33:58 +0000
Subject: [PATCH 295/314] Fix linting issues

---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_aladin.cpp                       |    5 -
 reg-apps/reg_average.cpp                      |    8 +-
 reg-apps/reg_transform.cpp                    | 2504 ++++++++---------
 reg-io/_reg_ReadWriteImage.cpp                |   12 +-
 reg-io/niftilib/nifti1_io.c                   |    2 +-
 reg-lib/AffineDeformationFieldKernel.h        |    2 -
 reg-lib/AladinContent.h                       |    3 +
 reg-lib/BlockMatchingKernel.h                 |    2 -
 reg-lib/ConvolutionKernel.h                   |    9 +-
 reg-lib/Kernel.h                              |    6 +-
 reg-lib/LtsKernel.h                           |    2 -
 reg-lib/Optimiser.cpp                         |    9 -
 reg-lib/Optimiser.hpp                         |   12 +-
 reg-lib/Platform.cpp                          |    8 +-
 reg-lib/Platform.h                            |    4 +
 reg-lib/_reg_aladin.cpp                       |    8 +-
 reg-lib/_reg_aladin.h                         |    4 -
 reg-lib/_reg_aladin_sym.cpp                   |    4 +-
 reg-lib/_reg_base.cpp                         |    9 +-
 reg-lib/_reg_base.h                           |    2 +-
 reg-lib/_reg_f3d.cpp                          |    6 +
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |   13 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.h   |    4 +-
 reg-lib/cl/ClAladinContent.cpp                |   22 +-
 reg-lib/cl/ClAladinContent.h                  |   11 +-
 reg-lib/cl/ClBlockMatchingKernel.h            |    4 +-
 reg-lib/cl/ClContextSingleton.cpp             |   99 +-
 reg-lib/cl/ClContextSingleton.h               |    4 +-
 reg-lib/cl/ClConvolutionKernel.h              |    9 +-
 reg-lib/cl/ClLtsKernel.h                      |    5 +-
 reg-lib/cl/InfoDevice.h                       |  200 +-
 reg-lib/cl/blockMatchingKernel.cl             |   10 +-
 reg-lib/cl/resampleKernel.cl                  |    8 +-
 reg-lib/cpu/CpuAffineDeformationFieldKernel.h |    2 +-
 reg-lib/cpu/CpuBlockMatchingKernel.h          |    2 +-
 reg-lib/cpu/CpuConvolutionKernel.h            |    8 +-
 reg-lib/cpu/CpuLtsKernel.h                    |    2 +-
 reg-lib/cpu/_reg_blockMatching.h              |   74 +-
 reg-lib/cpu/_reg_dti.h                        |    4 +-
 reg-lib/cpu/_reg_localTrans.cpp               |   50 +-
 reg-lib/cpu/_reg_measure.h                    |   26 +-
 reg-lib/cpu/_reg_mind.cpp                     |   10 -
 reg-lib/cpu/_reg_mind.h                       |   16 +-
 reg-lib/cpu/_reg_nmi.cpp                      |    7 -
 reg-lib/cpu/_reg_nmi.h                        |  155 +-
 reg-lib/cpu/_reg_tools.cpp                    |    2 +-
 .../cuda/CudaAffineDeformationFieldKernel.h   |    3 +-
 reg-lib/cuda/CudaAladinContent.cpp            |   98 -
 reg-lib/cuda/CudaAladinContent.h              |   25 +-
 reg-lib/cuda/CudaBlockMatchingKernel.h        |    2 +-
 reg-lib/cuda/CudaCompute.cu                   |    6 +-
 reg-lib/cuda/CudaContent.cpp                  |   10 +-
 reg-lib/cuda/CudaConvolutionKernel.h          |   13 +-
 reg-lib/cuda/CudaF3dContent.cpp               |    4 +-
 reg-lib/cuda/CudaLtsKernel.cpp                |   57 -
 reg-lib/cuda/CudaLtsKernel.h                  |    9 +-
 reg-lib/cuda/CudaTools.cu                     |    2 +-
 reg-lib/cuda/CudaToolsKernels.cu              |    2 +-
 reg-lib/cuda/_reg_measure_gpu.h               |   24 +-
 reg-lib/cuda/blockMatchingKernel.cu           |   10 +-
 reg-lib/cuda/optimizeKernel.cu                |  395 ---
 reg-lib/cuda/optimizeKernel.h                 |   23 -
 63 files changed, 1508 insertions(+), 2545 deletions(-)
 delete mode 100644 reg-lib/cuda/optimizeKernel.cu
 delete mode 100644 reg-lib/cuda/optimizeKernel.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 36352541..d1b9f6a9 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-413
+414
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 9619dcec..6cf515a4 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -93,7 +93,6 @@ void Usage(char *exec) {
         NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids");
     }
 
-    //   NR_INFO("\t-crv\t\t\tChoose custom capture range for the block matching alg");
 #ifdef _OPENMP
     int defaultOpenMPValue = omp_get_num_procs();
     if (getenv("OMP_NUM_THREADS") != nullptr)
@@ -161,7 +160,6 @@ int main(int argc, char **argv) {
 
     bool iso = false;
     bool verbose = true;
-    int captureRangeVox = 3;
     PlatformType platformType(PlatformType::Cpu);
     unsigned gpuIdx = 999;
 
@@ -300,8 +298,6 @@ int main(int argc, char **argv) {
             platformType = value;
         } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) {
             gpuIdx = unsigned(atoi(argv[++i]));
-        } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) {
-            captureRangeVox = atoi(argv[++i]);
         } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
 #ifdef _OPENMP
             omp_set_num_threads(atoi(argv[++i]));
@@ -406,7 +402,6 @@ int main(int argc, char **argv) {
     reg->SetBlockPercentage(blockPercentage);
     reg->SetInlierLts(inlierLts);
     reg->SetInterpolation(interpolation);
-    reg->SetCaptureRangeVox(captureRangeVox);
     reg->SetPlatformType(platformType);
     reg->SetGpuIdx(gpuIdx);
 
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index d4bea706..372763a4 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -118,7 +118,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
    for(size_t m=0; m<matrixNumber; ++m)
       reg_tool_ReadAffineFile(&matrices[m],inputAffName[m]);
    // Matrix to store the final result is created
-   mat44 average_matrix;
+   mat44 average_matrix{};
    // An array to store the weight given to each matrix is generated
    float *matrixWeight = (float *)malloc(matrixNumber*sizeof(float));
    int *matrixIndexSorted = (int *)malloc(matrixNumber*sizeof(int));
@@ -133,11 +133,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
    if(lts_inlier<1.f && lts_inlier>0)
       iterationNumber=10;
    for(size_t it=0; it<iterationNumber; ++it){
-      double tempValue[16]= {0,0,0,0,
-                             0,0,0,0,
-                             0,0,0,0,
-                             0,0,0,0
-                            };
+      double tempValue[16]{};
       double weightSum=0;
       // The (weighted) average matrix is computed
       for(size_t m=0; m<matrixNumber; ++m)
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 0bf20051..485765d9 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -25,1427 +25,1239 @@
 #include <vector>
 #include <iostream>
 
-typedef struct
-{
-   char *referenceImageName;
-   char *referenceImage2Name;
-   char *inputTransName;
-   char *input2TransName;
-   char *inputLandmarkName;
-   float affTransParam[12];
-   char *outputTransName;
+typedef struct {
+    char *referenceImageName;
+    char *referenceImage2Name;
+    char *inputTransName;
+    char *input2TransName;
+    char *inputLandmarkName;
+    float affTransParam[12];
+    char *outputTransName;
 } PARAM;
-typedef struct
-{
-   bool referenceImageFlag;
-   bool referenceImage2Flag;
-   bool outputDefFlag;
-   bool outputDispFlag;
-   bool outputFlowFlag;
-   bool outputCompFlag;
-   bool outputLandFlag;
-   bool updSFormFlag;
-   bool halfTransFlag;
-   bool invertAffFlag;
-   bool invertNRRFlag;
-   bool flirtAff2NRFlag;
-   bool makeAffFlag;
-   bool aff2rigFlag;
+typedef struct {
+    bool referenceImageFlag;
+    bool referenceImage2Flag;
+    bool outputDefFlag;
+    bool outputDispFlag;
+    bool outputFlowFlag;
+    bool outputCompFlag;
+    bool outputLandFlag;
+    bool updSFormFlag;
+    bool halfTransFlag;
+    bool invertAffFlag;
+    bool invertNRRFlag;
+    bool flirtAff2NRFlag;
+    bool makeAffFlag;
+    bool aff2rigFlag;
 } FLAG;
 
 
-void PetitUsage(char *exec)
-{
-   NR_INFO("Usage:\t" << exec << " [OPTIONS]");
-   NR_INFO("\tSee the help for more details (-h)");
+void PetitUsage(char *exec) {
+    NR_INFO("Usage:\t" << exec << " [OPTIONS]");
+    NR_INFO("\tSee the help for more details (-h)");
 }
 
-void Usage(char *exec)
-{
-   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
-   NR_INFO("Usage:\t" << exec << " [OPTIONS]");
-   NR_INFO("* * OPTIONS * *\n");
+void Usage(char *exec) {
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("Usage:\t" << exec << " [OPTIONS]");
+    NR_INFO("* * OPTIONS * *\n");
 
-   NR_INFO("\t-ref <filename>");
-   NR_INFO("\t\tFilename of the reference image");
-   NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*.");
-   NR_INFO("\t-ref2 <filename>");
-   NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n");
+    NR_INFO("\t-ref <filename>");
+    NR_INFO("\t\tFilename of the reference image");
+    NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*.");
+    NR_INFO("\t-ref2 <filename>");
+    NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n");
 
-   NR_INFO("\t-def <filename1> <filename2>");
-   NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Output deformation field file name\n");
+    NR_INFO("\t-def <filename1> <filename2>");
+    NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Output deformation field file name\n");
 
-   NR_INFO("\t-disp <filename1> <filename2>");
-   NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Output displacement field file name\n");
+    NR_INFO("\t-disp <filename1> <filename2>");
+    NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Output displacement field file name\n");
 
-   NR_INFO("\t-flow <filename1> <filename2>");
-   NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Output flow field file name\n");
+    NR_INFO("\t-flow <filename1> <filename2>");
+    NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Output flow field file name\n");
 
-   NR_INFO("\t-comp <filename1> <filename2> <filename3>");
-   NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field.");
-   NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x)).");
-   NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)");
-   NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)");
-   NR_INFO("\t\tfilename3 - Output deformation field file name\n");
+    NR_INFO("\t-comp <filename1> <filename2> <filename3>");
+    NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field.");
+    NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x)).");
+    NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)");
+    NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)");
+    NR_INFO("\t\tfilename3 - Output deformation field file name\n");
 
-   NR_INFO("\t-land <filename1> <filename2> <filename3>");
-   NR_INFO("\t\tApply a transformation to a set of landmark(s).");
-   NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:");
-   NR_INFO("\t\t\t<key1_x> <key1_y> <key1_z>");
-   NR_INFO("\t\t\t<key2_x> <key2_y> <key2_z>");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Input landmark file name.");
-   NR_INFO("\t\tfilename3 - Output landmark file name\n");
+    NR_INFO("\t-land <filename1> <filename2> <filename3>");
+    NR_INFO("\t\tApply a transformation to a set of landmark(s).");
+    NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:");
+    NR_INFO("\t\t\t<key1_x> <key1_y> <key1_z>");
+    NR_INFO("\t\t\t<key2_x> <key2_y> <key2_z>");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Input landmark file name.");
+    NR_INFO("\t\tfilename3 - Output landmark file name\n");
 
-   NR_INFO("\t-updSform <filename1> <filename2> <filename3>");
-   NR_INFO("\t\tUpdate the sform of an image using an affine transformation.");
-   NR_INFO("\t\tFilename1 - Image to be updated");
-   NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating");
-   NR_INFO("\t\tFilename3 - Updated image.\n");
+    NR_INFO("\t-updSform <filename1> <filename2> <filename3>");
+    NR_INFO("\t\tUpdate the sform of an image using an affine transformation.");
+    NR_INFO("\t\tFilename1 - Image to be updated");
+    NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating");
+    NR_INFO("\t\tFilename3 - Updated image.\n");
 
-   NR_INFO("\t-invAff <filename1> <filename2>");
-   NR_INFO("\t\tInvert an affine matrix.");
-   NR_INFO("\t\tfilename1 - Input affine transformation file name");
-   NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n");
+    NR_INFO("\t-invAff <filename1> <filename2>");
+    NR_INFO("\t\tInvert an affine matrix.");
+    NR_INFO("\t\tfilename1 - Input affine transformation file name");
+    NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n");
 
-   NR_INFO("\t-invNrr <filename1> <filename2> <filename3>");
-   NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field.");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined");
-   NR_INFO("\t\tfilename3 - Output inverted transformation file name");
-   NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,");
-   NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n");
+    NR_INFO("\t-invNrr <filename1> <filename2> <filename3>");
+    NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field.");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined");
+    NR_INFO("\t\tfilename3 - Output inverted transformation file name");
+    NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,");
+    NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n");
 
-   NR_INFO("\t-half <filename1> <filename2>");
-   NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type.");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Output transformation file name\n");
+    NR_INFO("\t-half <filename1> <filename2>");
+    NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type.");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Output transformation file name\n");
 
-   NR_INFO("\t-makeAff <rx> <ry> <rz> <tx> <ty> <tz> <sx> <sy> <sz> <shx> <shy> <shz> <outputFilename>");
-   NR_INFO("\t\tCreate an affine transformation matrix\n");
+    NR_INFO("\t-makeAff <rx> <ry> <rz> <tx> <ty> <tz> <sx> <sy> <sz> <shx> <shy> <shz> <outputFilename>");
+    NR_INFO("\t\tCreate an affine transformation matrix\n");
 
-   NR_INFO("\t-aff2rig <filename1> <filename2>");
-   NR_INFO("\t\tExtract the rigid component from an affine transformation matrix");
-   NR_INFO("\t\tfilename1 - Input transformation file name");
-   NR_INFO("\t\tfilename2 - Output transformation file name\n");
+    NR_INFO("\t-aff2rig <filename1> <filename2>");
+    NR_INFO("\t\tExtract the rigid component from an affine transformation matrix");
+    NR_INFO("\t\tfilename1 - Input transformation file name");
+    NR_INFO("\t\tfilename2 - Output transformation file name\n");
 
-   NR_INFO("\t-flirtAff2NR <filename1> <filename2> <filename3> <filename4>");
-   NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation");
-   NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name");
-   NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)");
-   NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)");
-   NR_INFO("\t\tfilename4 - Output affine transformation file name\n");
+    NR_INFO("\t-flirtAff2NR <filename1> <filename2> <filename3> <filename4>");
+    NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation");
+    NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name");
+    NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)");
+    NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)");
+    NR_INFO("\t\tfilename4 - Output affine transformation file name\n");
 #ifdef _OPENMP
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   NR_INFO("\t-omp <int>\n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    NR_INFO("\t-omp <int>\n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]");
 #endif
-   NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")");
+    NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")");
 
-   NR_INFO("\n\t* The supported transformation types are:");
-   NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)");
-   NR_INFO("\t\t- a dense deformation field");
-   NR_INFO("\t\t- a dense displacement field");
-   NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)");
-   NR_INFO("\t\t- a stationary velocity deformation field");
-   NR_INFO("\t\t- a stationary velocity displacement field");
-   NR_INFO("\t\t- an affine matrix\n");
-   NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
+    NR_INFO("\n\t* The supported transformation types are:");
+    NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)");
+    NR_INFO("\t\t- a dense deformation field");
+    NR_INFO("\t\t- a dense displacement field");
+    NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)");
+    NR_INFO("\t\t- a stationary velocity deformation field");
+    NR_INFO("\t\t- a stationary velocity displacement field");
+    NR_INFO("\t\t- an affine matrix\n");
+    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
-int main(int argc, char **argv)
-{
-   // Display the help if no arguments are provided
-   if(argc==1)
-   {
-      PetitUsage(argv[0]);
-      return EXIT_SUCCESS;
-   }
+int main(int argc, char **argv) {
+    // Display the help if no arguments are provided
+    if (argc == 1) {
+        PetitUsage(argv[0]);
+        return EXIT_SUCCESS;
+    }
 
-   // Set the variables used to store the parsed data
-   PARAM *param = (PARAM *)calloc(1,sizeof(PARAM));
-   FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG));
+    // Set the variables used to store the parsed data
+    PARAM *param = (PARAM *)calloc(1, sizeof(PARAM));
+    FLAG *flag = (FLAG *)calloc(1, sizeof(FLAG));
 
 #ifdef _OPENMP
-   // Set the default number of threads
-   int defaultOpenMPValue=omp_get_num_procs();
-   if(getenv("OMP_NUM_THREADS")!=nullptr)
-      defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS"));
-   omp_set_num_threads(defaultOpenMPValue);
+    // Set the default number of threads
+    int defaultOpenMPValue = omp_get_num_procs();
+    if (getenv("OMP_NUM_THREADS") != nullptr)
+        defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS"));
+    omp_set_num_threads(defaultOpenMPValue);
 #endif
 
-   // Parse the input data
-   for(int i=1; i<argc; ++i)
-   {
-      if(strcmp(argv[i],"-h")==0 ||
-            strcmp(argv[i],"-H")==0 ||
-            strcmp(argv[i],"-help")==0 ||
-            strcmp(argv[i],"--help")==0 ||
-            strcmp(argv[i],"-HELP")==0 ||
-            strcmp(argv[i],"--HELP")==0 ||
-            strcmp(argv[i],"-Help")==0 ||
-            strcmp(argv[i],"--Help")==0
-        )
-      {
-         free(param);
-         free(flag);
-         Usage(argv[0]);
-         return EXIT_SUCCESS;
-      }
-      else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0)
-      {
+    // Parse the input data
+    for (int i = 1; i < argc; ++i) {
+        if (strcmp(argv[i], "-h") == 0 ||
+            strcmp(argv[i], "-H") == 0 ||
+            strcmp(argv[i], "-help") == 0 ||
+            strcmp(argv[i], "--help") == 0 ||
+            strcmp(argv[i], "-HELP") == 0 ||
+            strcmp(argv[i], "--HELP") == 0 ||
+            strcmp(argv[i], "-Help") == 0 ||
+            strcmp(argv[i], "--Help") == 0) {
+            free(param);
+            free(flag);
+            Usage(argv[0]);
+            return EXIT_SUCCESS;
+        } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) {
 #ifdef _OPENMP
-         omp_set_num_threads(atoi(argv[++i]));
+            omp_set_num_threads(atoi(argv[++i]));
 #else
-         NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
-         ++i;
+            NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored");
+            ++i;
 #endif
-      }
-      else if(strcmp(argv[i], "-version")==0 || strcmp(argv[i], "-Version")==0 ||
-            strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 ||
-            strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0)
-      {
-         NR_COUT << NR_VERSION << std::endl;
-         return EXIT_SUCCESS;
-      }
-      else if(strcmp(argv[i],"-ref")==0 || strcmp(argv[i],"--ref")==0 || strcmp(argv[i],"-target")==0)
-      {
-         flag->referenceImageFlag=true;
-         param->referenceImageName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-ref2")==0 || strcmp(argv[i],"--ref2")==0 || strcmp(argv[i],"-target2")==0)
-      {
-         flag->referenceImage2Flag=true;
-         param->referenceImage2Name=argv[++i];
-      }
-      else if(strcmp(argv[i],"-def")==0 || strcmp(argv[i],"--def")==0)
-      {
-         flag->outputDefFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-disp")==0 || strcmp(argv[i],"--disp")==0)
-      {
-         flag->outputDispFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-flow")==0 || strcmp(argv[i],"--flow")==0)
-      {
-         flag->outputFlowFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-comp")==0 || strcmp(argv[i],"--comp")==0)
-      {
-         flag->outputCompFlag=true;
-         param->inputTransName=argv[++i];
-         param->input2TransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-land")==0 || strcmp(argv[i],"--land")==0)
-      {
-         flag->outputLandFlag=true;
-         param->inputTransName=argv[++i];
-         param->inputLandmarkName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
+        } else if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-Version") == 0 ||
+                 strcmp(argv[i], "-V") == 0 || strcmp(argv[i], "-v") == 0 ||
+                 strcmp(argv[i], "--v") == 0 || strcmp(argv[i], "--version") == 0) {
+            NR_COUT << NR_VERSION << std::endl;
+            return EXIT_SUCCESS;
+        } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "--ref") == 0 || strcmp(argv[i], "-target") == 0) {
+            flag->referenceImageFlag = true;
+            param->referenceImageName = argv[++i];
+        } else if (strcmp(argv[i], "-ref2") == 0 || strcmp(argv[i], "--ref2") == 0 || strcmp(argv[i], "-target2") == 0) {
+            flag->referenceImage2Flag = true;
+            param->referenceImage2Name = argv[++i];
+        } else if (strcmp(argv[i], "-def") == 0 || strcmp(argv[i], "--def") == 0) {
+            flag->outputDefFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-disp") == 0 || strcmp(argv[i], "--disp") == 0) {
+            flag->outputDispFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-flow") == 0 || strcmp(argv[i], "--flow") == 0) {
+            flag->outputFlowFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-comp") == 0 || strcmp(argv[i], "--comp") == 0) {
+            flag->outputCompFlag = true;
+            param->inputTransName = argv[++i];
+            param->input2TransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) {
+            flag->outputLandFlag = true;
+            param->inputTransName = argv[++i];
+            param->inputLandmarkName = argv[++i];
+            param->outputTransName = argv[++i];
+        }
 
-      else if(strcmp(argv[i],"-updSform")==0 || strcmp(argv[i],"--comp")==0)
-      {
-         flag->updSFormFlag=true;
-         param->inputTransName=argv[++i];
-         param->input2TransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-half")==0 || strcmp(argv[i],"--half")==0)
-      {
-         flag->halfTransFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-invAff")==0 || strcmp(argv[i],"--invAff")==0 ||
-              strcmp(argv[i],"-invAffine")==0 || strcmp(argv[i],"--invAffine")==0)
-      {
-         flag->invertAffFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-invNrr")==0 || strcmp(argv[i],"--invNrr")==0)
-      {
-         flag->invertNRRFlag=true;
-         param->inputTransName=argv[++i];
-         param->input2TransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-makeAff")==0 || strcmp(argv[i],"--makeAff")==0)
-      {
-         flag->makeAffFlag=true;
-         for(int j=0; j<12; ++j)
-            param->affTransParam[j]=static_cast<float>(atof(argv[++i]));
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-aff2rig")==0 || strcmp(argv[i],"--aff2rig")==0)
-      {
-         flag->aff2rigFlag=true;
-         param->inputTransName=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else if(strcmp(argv[i],"-flirtAff2NR")==0 || strcmp(argv[i],"--flirtAff2NR")==0)
-      {
-         flag->flirtAff2NRFlag=true;
-         param->inputTransName=argv[++i];
-         param->referenceImageName=argv[++i];
-         param->referenceImage2Name=argv[++i];
-         param->outputTransName=argv[++i];
-      }
-      else
-      {
-         NR_ERROR("Unrecognised argument: " << argv[i]);
-         return EXIT_FAILURE;
-      }
-   }
-
-   /* ********************************************** */
-   // Generate the deformation or displacement field //
-   /* ********************************************** */
-   if(flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag)
-   {
-      // Create some variables
-      mat44 *affineTransformation=nullptr;
-      nifti_image *referenceImage=nullptr;
-      nifti_image *inputTransformationImage=nullptr;
-      nifti_image *outputTransformationImage=nullptr;
-      // First check if the input filename is an image
-      if(reg_isAnImageFileName(param->inputTransName))
-      {
-         inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransformationImage==nullptr)
-         {
-            NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
+        else if (strcmp(argv[i], "-updSform") == 0 || strcmp(argv[i], "--comp") == 0) {
+            flag->updSFormFlag = true;
+            param->inputTransName = argv[++i];
+            param->input2TransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-half") == 0 || strcmp(argv[i], "--half") == 0) {
+            flag->halfTransFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-invAff") == 0 || strcmp(argv[i], "--invAff") == 0 ||
+                 strcmp(argv[i], "-invAffine") == 0 || strcmp(argv[i], "--invAffine") == 0) {
+            flag->invertAffFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-invNrr") == 0 || strcmp(argv[i], "--invNrr") == 0) {
+            flag->invertNRRFlag = true;
+            param->inputTransName = argv[++i];
+            param->input2TransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-makeAff") == 0 || strcmp(argv[i], "--makeAff") == 0) {
+            flag->makeAffFlag = true;
+            for (int j = 0; j < 12; ++j)
+                param->affTransParam[j] = static_cast<float>(atof(argv[++i]));
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-aff2rig") == 0 || strcmp(argv[i], "--aff2rig") == 0) {
+            flag->aff2rigFlag = true;
+            param->inputTransName = argv[++i];
+            param->outputTransName = argv[++i];
+        } else if (strcmp(argv[i], "-flirtAff2NR") == 0 || strcmp(argv[i], "--flirtAff2NR") == 0) {
+            flag->flirtAff2NRFlag = true;
+            param->inputTransName = argv[++i];
+            param->referenceImageName = argv[++i];
+            param->referenceImage2Name = argv[++i];
+            param->outputTransName = argv[++i];
+        } else {
+            NR_ERROR("Unrecognised argument: " << argv[i]);
             return EXIT_FAILURE;
-         }
-         // If the input transformation is a grid, check that the reference image has been specified
-         if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
-               inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
-               inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
-         {
-            if(!flag->referenceImageFlag)
-            {
-               NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
-                        " a reference image should be specified (-ref flag)");
-               return EXIT_FAILURE;
+        }
+    }
+
+    /* ********************************************** */
+    // Generate the deformation or displacement field //
+    /* ********************************************** */
+    if (flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag) {
+        // Create some variables
+        mat44 *affineTransformation = nullptr;
+        nifti_image *referenceImage = nullptr;
+        nifti_image *inputTransformationImage = nullptr;
+        nifti_image *outputTransformationImage = nullptr;
+        // First check if the input filename is an image
+        if (reg_isAnImageFileName(param->inputTransName)) {
+            inputTransformationImage = reg_io_ReadImageFile(param->inputTransName);
+            if (inputTransformationImage == nullptr) {
+                NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
+                return EXIT_FAILURE;
             }
-            referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==nullptr)
-            {
-               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-               return EXIT_FAILURE;
+            // If the input transformation is a grid, check that the reference image has been specified
+            if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID ||
+                inputTransformationImage->intent_p1 == CUB_SPLINE_GRID ||
+                inputTransformationImage->intent_p1 == SPLINE_VEL_GRID) {
+                if (!flag->referenceImageFlag) {
+                    NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
+                             " a reference image should be specified (-ref flag)");
+                    return EXIT_FAILURE;
+                }
+                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                if (referenceImage == nullptr) {
+                    NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                    return EXIT_FAILURE;
+                }
             }
-         }
-      }
-      else
-      {
-         // Read the affine transformation
-         affineTransformation=(mat44 *)malloc(sizeof(mat44));
-         reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
-         if(!flag->referenceImageFlag)
-         {
-            NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
-                     " a reference image should be specified (-ref flag)");
-            return EXIT_FAILURE;
-         }
-         referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==nullptr)
-         {
-            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-            return EXIT_FAILURE;
-         }
-      }
-      // Create a dense field
-      if(affineTransformation!=nullptr ||
-            inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
-            inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
-            inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
-      {
-         // Create a field image from the reference image
-         outputTransformationImage=nifti_copy_nim_info(referenceImage);
-         outputTransformationImage->ndim=outputTransformationImage->dim[0]=5;
-         outputTransformationImage->nt=outputTransformationImage->dim[4]=1;
-         outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2;
-         outputTransformationImage->nvox=NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim);
-         outputTransformationImage->nbyper=sizeof(float);
-         outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32;
-         outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR;
-         memset(outputTransformationImage->intent_name, 0, 16);
-         strcpy(outputTransformationImage->intent_name,"NREG_TRANS");
-         outputTransformationImage->scl_slope=1.f;
-         outputTransformationImage->scl_inter=0.f;
-      }
-      else
-      {
-         // Create a deformation field from in the input transformation
-         outputTransformationImage=nifti_copy_nim_info(inputTransformationImage);
-      }
-      // Allocate the output field data array
-      outputTransformationImage->data=malloc(outputTransformationImage->nvox*outputTransformationImage->nbyper);
-      // Create a flow field image
-      if(flag->outputFlowFlag)
-      {
-         if(affineTransformation!=nullptr)
-         {
-            NR_ERROR("A flow field transformation can not be generated from an affine transformation");
-            return EXIT_FAILURE;
-         }
-         if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID)
-         {
-            NR_ERROR("A flow field transformation can not be generated from a linear spline grid");
-            return EXIT_FAILURE;
-         }
-         if(inputTransformationImage->intent_p1==CUB_SPLINE_GRID)
-         {
-            NR_ERROR("A flow field transformation can not be generated from a cubic spline grid");
-            return EXIT_FAILURE;
-         }
-         if(inputTransformationImage->intent_p1==DEF_FIELD)
-         {
-            NR_ERROR("A flow field transformation can not be generated from a deformation field");
-            return EXIT_FAILURE;
-         }
-         if(inputTransformationImage->intent_p1==DISP_FIELD)
-         {
-            NR_ERROR("A flow field transformation can not be generated from a displacement field");
-            return EXIT_FAILURE;
-         }
-         switch(static_cast<int>(inputTransformationImage->intent_p1))
-         {
-            break;
-         case DEF_VEL_FIELD:
-            NR_INFO("The specified transformation is a deformation velocity field:");
-            NR_INFO(inputTransformationImage->fname);
-            // The current input transformation is copied
-            memcpy(outputTransformationImage->data,inputTransformationImage->data,
-                   outputTransformationImage->nvox*outputTransformationImage->nbyper);
+        } else {
+            // Read the affine transformation
+            affineTransformation = (mat44 *)malloc(sizeof(mat44));
+            reg_tool_ReadAffineFile(affineTransformation, param->inputTransName);
+            if (!flag->referenceImageFlag) {
+                NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                         " a reference image should be specified (-ref flag)");
+                return EXIT_FAILURE;
+            }
+            referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            if (referenceImage == nullptr) {
+                NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                return EXIT_FAILURE;
+            }
+        }
+        // Create a dense field
+        if (affineTransformation != nullptr || (inputTransformationImage != nullptr &&
+                                                (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID ||
+                                                 inputTransformationImage->intent_p1 == CUB_SPLINE_GRID ||
+                                                 inputTransformationImage->intent_p1 == SPLINE_VEL_GRID))) {
+            // Create a field image from the reference image
+            outputTransformationImage = nifti_copy_nim_info(referenceImage);
+            outputTransformationImage->ndim = outputTransformationImage->dim[0] = 5;
+            outputTransformationImage->nt = outputTransformationImage->dim[4] = 1;
+            outputTransformationImage->nu = outputTransformationImage->dim[5] = outputTransformationImage->nz > 1 ? 3 : 2;
+            outputTransformationImage->nvox = NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim);
+            outputTransformationImage->nbyper = sizeof(float);
+            outputTransformationImage->datatype = NIFTI_TYPE_FLOAT32;
+            outputTransformationImage->intent_code = NIFTI_INTENT_VECTOR;
+            memset(outputTransformationImage->intent_name, 0, 16);
+            strcpy(outputTransformationImage->intent_name, "NREG_TRANS");
+            outputTransformationImage->scl_slope = 1.f;
+            outputTransformationImage->scl_inter = 0.f;
+        } else {
+            // Create a deformation field from in the input transformation
+            outputTransformationImage = nifti_copy_nim_info(inputTransformationImage);
+        }
+        // Allocate the output field data array
+        outputTransformationImage->data = malloc(outputTransformationImage->nvox * outputTransformationImage->nbyper);
+        // Create a flow field image
+        if (flag->outputFlowFlag) {
+            if (affineTransformation != nullptr) {
+                NR_ERROR("A flow field transformation can not be generated from an affine transformation");
+                return EXIT_FAILURE;
+            }
+            if (inputTransformationImage) {
+                if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID) {
+                    NR_ERROR("A flow field transformation can not be generated from a linear spline grid");
+                    return EXIT_FAILURE;
+                }
+                if (inputTransformationImage->intent_p1 == CUB_SPLINE_GRID) {
+                    NR_ERROR("A flow field transformation can not be generated from a cubic spline grid");
+                    return EXIT_FAILURE;
+                }
+                if (inputTransformationImage->intent_p1 == DEF_FIELD) {
+                    NR_ERROR("A flow field transformation can not be generated from a deformation field");
+                    return EXIT_FAILURE;
+                }
+                if (inputTransformationImage->intent_p1 == DISP_FIELD) {
+                    NR_ERROR("A flow field transformation can not be generated from a displacement field");
+                    return EXIT_FAILURE;
+                }
+                switch (static_cast<int>(inputTransformationImage->intent_p1)) {
+                    break;
+                case DEF_VEL_FIELD:
+                    NR_INFO("The specified transformation is a deformation velocity field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The current input transformation is copied
+                    memcpy(outputTransformationImage->data, inputTransformationImage->data,
+                           outputTransformationImage->nvox * outputTransformationImage->nbyper);
+                    break;
+                case DISP_VEL_FIELD:
+                    NR_INFO("The specified transformation is a displacement velocity field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The current input transformation is copied and converted
+                    memcpy(outputTransformationImage->data, inputTransformationImage->data,
+                           outputTransformationImage->nvox * outputTransformationImage->nbyper);
+                    reg_getDisplacementFromDeformation(outputTransformationImage);
+                    break;
+                case SPLINE_VEL_GRID:
+                    NR_INFO("The specified transformation is a spline velocity parametrisation:");
+                    NR_INFO(inputTransformationImage->fname);
+                    reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage,
+                                                            outputTransformationImage);
+                    break;
+                default:
+                    NR_ERROR("Unknown input transformation type");
+                    return EXIT_FAILURE;
+                }
+                outputTransformationImage->intent_p1 = DEF_VEL_FIELD;
+                outputTransformationImage->intent_p2 = inputTransformationImage->intent_p2;
+            }
+        }
+        // Create a deformation or displacement field
+        else if (flag->outputDefFlag || flag->outputDispFlag) {
+            if (affineTransformation != nullptr) {
+                reg_affine_getDeformationField(affineTransformation, outputTransformationImage);
+            } else {
+                switch (Round(inputTransformationImage->intent_p1)) {
+                case DEF_FIELD:
+                    NR_INFO("The specified transformation is a deformation field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // the current in transformation is copied
+                    memcpy(outputTransformationImage->data, inputTransformationImage->data,
+                           outputTransformationImage->nvox * outputTransformationImage->nbyper);
+                    break;
+                case DISP_FIELD:
+                    NR_INFO("The specified transformation is a displacement field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // the current in transformation is copied and converted
+                    memcpy(outputTransformationImage->data, inputTransformationImage->data,
+                           outputTransformationImage->nvox * outputTransformationImage->nbyper);
+                    reg_getDeformationFromDisplacement(outputTransformationImage);
+                    break;
+                case LIN_SPLINE_GRID:
+                case CUB_SPLINE_GRID:
+                    NR_INFO("The specified transformation is a spline parametrisation:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The output field is filled with an identity deformation field
+                    memset(outputTransformationImage->data,
+                           0,
+                           outputTransformationImage->nvox * outputTransformationImage->nbyper);
+                    reg_getDeformationFromDisplacement(outputTransformationImage);
+                    // The spline transformation is composed with the identity field
+                    reg_spline_getDeformationField(inputTransformationImage,
+                                                   outputTransformationImage,
+                                                   nullptr, // no mask
+                                                   true,  // composition is used,
+                                                   true); // b-spline are used
+                    break;
+                case DEF_VEL_FIELD:
+                    NR_INFO("The specified transformation is a deformation velocity field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The flow field is exponentiated
+                    reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
+                                                                  outputTransformationImage,
+                                                                  false); // step number is not updated
+                    break;
+                case DISP_VEL_FIELD:
+                    NR_INFO("The specified transformation is a displacement velocity field:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The input transformation is converted into a def flow
+                    reg_getDeformationFromDisplacement(outputTransformationImage);
+                    // The flow field is exponentiated
+                    reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
+                                                                  outputTransformationImage,
+                                                                  false); // step number is not updated
+                    break;
+                case SPLINE_VEL_GRID:
+                    NR_INFO("The specified transformation is a spline velocity parametrisation:");
+                    NR_INFO(inputTransformationImage->fname);
+                    // The spline parametrisation is converted into a dense flow and exponentiated
+                    reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
+                                                           outputTransformationImage,
+                                                           false); // step number is not updated
+                    break;
+                default:
+                    NR_ERROR("Unknown input transformation type");
+                    return EXIT_FAILURE;
+                }
+            }
+            outputTransformationImage->intent_p1 = DEF_FIELD;
+            outputTransformationImage->intent_p2 = 0;
+            if (flag->outputDispFlag)
+                reg_getDisplacementFromDeformation(outputTransformationImage);
+        }
+        // Save the generated transformation
+        reg_io_WriteImageFile(outputTransformationImage, param->outputTransName);
+        switch (Round(outputTransformationImage->intent_p1)) {
+        case DEF_FIELD:
+            NR_INFO("The deformation field has been saved as:");
+            NR_INFO(param->outputTransName);
             break;
-         case DISP_VEL_FIELD:
-            NR_INFO("The specified transformation is a displacement velocity field:");
-            NR_INFO(inputTransformationImage->fname);
-            // The current input transformation is copied and converted
-            memcpy(outputTransformationImage->data,inputTransformationImage->data,
-                   outputTransformationImage->nvox*outputTransformationImage->nbyper);
-            reg_getDisplacementFromDeformation(outputTransformationImage);
+        case DISP_FIELD:
+            NR_INFO("The displacement field has been saved as:");
+            NR_INFO(param->outputTransName);
             break;
-         case SPLINE_VEL_GRID:
-            NR_INFO("The specified transformation is a spline velocity parametrisation:");
-            NR_INFO(inputTransformationImage->fname);
-            reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage,
-                                                    outputTransformationImage);
+        case DEF_VEL_FIELD:
+            NR_INFO("The flow field has been saved as:");
+            NR_INFO(param->outputTransName);
             break;
-         default:
-            NR_ERROR("Unknown input transformation type");
-            return EXIT_FAILURE;
-         }
-         outputTransformationImage->intent_p1=DEF_VEL_FIELD;
-         outputTransformationImage->intent_p2=inputTransformationImage->intent_p2;
-      }
-      // Create a deformation or displacement field
-      else if(flag->outputDefFlag || flag->outputDispFlag)
-      {
-         if(affineTransformation!=nullptr)
-         {
-            reg_affine_getDeformationField(affineTransformation,outputTransformationImage);
-         }
-         else
-         {
-            switch(Round(inputTransformationImage->intent_p1))
-            {
-            case DEF_FIELD:
-               NR_INFO("The specified transformation is a deformation field:");
-               NR_INFO(inputTransformationImage->fname);
-               // the current in transformation is copied
-               memcpy(outputTransformationImage->data,inputTransformationImage->data,
-                      outputTransformationImage->nvox*outputTransformationImage->nbyper);
-               break;
-            case DISP_FIELD:
-               NR_INFO("The specified transformation is a displacement field:");
-               NR_INFO(inputTransformationImage->fname);
-               // the current in transformation is copied and converted
-               memcpy(outputTransformationImage->data,inputTransformationImage->data,
-                      outputTransformationImage->nvox*outputTransformationImage->nbyper);
-               reg_getDeformationFromDisplacement(outputTransformationImage);
-               break;
+        }
+        // Free the allocated images and arrays
+        if (affineTransformation != nullptr) free(affineTransformation);
+        if (referenceImage != nullptr) nifti_image_free(referenceImage);
+        if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage);
+        nifti_image_free(outputTransformationImage);
+    }
+
+    /* ************************************ */
+    // Start the transformation composition //
+    /* ************************************ */
+    if (flag->outputCompFlag) {
+        NR_INFO("Starting the composition of two transformations");
+        // Create some variables
+        mat44 *affine1Trans = nullptr;
+        mat44 *affine2Trans = nullptr;
+        nifti_image *referenceImage = nullptr;
+        nifti_image *referenceImage2 = nullptr;
+        nifti_image *input1TransImage = nullptr;
+        nifti_image *input2TransImage = nullptr;
+        nifti_image *output1TransImage = nullptr;
+        nifti_image *output2TransImage = nullptr;
+        // Read the first transformation
+        if (!reg_isAnImageFileName(param->inputTransName)) {
+            affine1Trans = (mat44 *)malloc(sizeof(mat44));
+            reg_tool_ReadAffineFile(affine1Trans, param->inputTransName);
+            NR_INFO("Transformation 1 is an affine parametrisation:");
+            NR_INFO(param->inputTransName);
+        } else {
+            input1TransImage = reg_io_ReadImageFile(param->inputTransName);
+            if (input1TransImage == nullptr) {
+                NR_ERROR("Error when reading the transformation image: " << param->inputTransName);
+                return EXIT_FAILURE;
+            }
+        }
+        // Read the second transformation
+        if (!reg_isAnImageFileName(param->input2TransName)) {
+            affine2Trans = (mat44 *)malloc(sizeof(mat44));
+            reg_tool_ReadAffineFile(affine2Trans, param->input2TransName);
+        } else {
+            input2TransImage = reg_io_ReadImageFile(param->input2TransName);
+            if (input2TransImage == nullptr) {
+                NR_ERROR("Error when reading the transformation image: " << param->input2TransName);
+                return EXIT_FAILURE;
+            }
+        }
+        // Check if the two input transformations are affine transformation
+        if (affine1Trans != nullptr && affine2Trans != nullptr) {
+            NR_INFO("Transformation 2 is an affine parametrisation:");
+            NR_INFO(param->input2TransName);
+            *affine1Trans = reg_mat44_mul(affine2Trans, affine1Trans);
+            reg_tool_WriteAffineFile(affine1Trans, param->outputTransName);
+        } else {
+            // Check if the reference image is required
+            if (affine1Trans != nullptr) {
+                if (!flag->referenceImageFlag) {
+                    NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                             " a reference image should be specified (-res flag).");
+                    return EXIT_FAILURE;
+                }
+                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                if (referenceImage == nullptr) {
+                    NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                    return EXIT_FAILURE;
+                }
+            } else if (input1TransImage->intent_p1 == LIN_SPLINE_GRID ||
+                     input1TransImage->intent_p1 == CUB_SPLINE_GRID ||
+                     input1TransImage->intent_p1 == SPLINE_VEL_GRID) {
+                if (!flag->referenceImageFlag) {
+                    NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," <<
+                             " a reference image should be specified (-ref flag).");
+                    return EXIT_FAILURE;
+                }
+                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                if (referenceImage == nullptr) {
+                    NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                    return EXIT_FAILURE;
+                }
+            }
+            // Read the second reference image if specified
+            if (flag->referenceImage2Flag) {
+                referenceImage2 = reg_io_ReadImageHeader(param->referenceImage2Name);
+                if (referenceImage2 == nullptr) {
+                    NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name);
+                    return EXIT_FAILURE;
+                }
+            }
+            // Generate the first deformation field
+            if (referenceImage != nullptr) {
+                // The field is created using the reference image space
+                output1TransImage = nifti_copy_nim_info(referenceImage);
+                output1TransImage->ndim = output1TransImage->dim[0] = 5;
+                output1TransImage->nt = output1TransImage->dim[4] = 1;
+                output1TransImage->nu = output1TransImage->dim[5] = output1TransImage->nz > 1 ? 3 : 2;
+                output1TransImage->nvox = NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim);
+                output1TransImage->scl_slope = 1.f;
+                output1TransImage->scl_inter = 0.f;
+                if (referenceImage->datatype != NIFTI_TYPE_FLOAT32) {
+                    output1TransImage->nbyper = sizeof(float);
+                    output1TransImage->datatype = NIFTI_TYPE_FLOAT32;
+                }
+                NR_INFO("Transformation 1 is defined in the space of image:");
+                NR_INFO(referenceImage->fname);
+            } else {
+                // The field is created using the input transformation image space
+                output1TransImage = nifti_copy_nim_info(input1TransImage);
+            }
+            output1TransImage->intent_code = NIFTI_INTENT_VECTOR;
+            memset(output1TransImage->intent_name, 0, 16);
+            strcpy(output1TransImage->intent_name, "NREG_TRANS");
+            output1TransImage->intent_p1 = DEF_FIELD;
+            output1TransImage->data = calloc(output1TransImage->nvox, output1TransImage->nbyper);
+            if (affine1Trans != nullptr) {
+                reg_affine_getDeformationField(affine1Trans, output1TransImage);
+            } else switch (Round(input1TransImage->intent_p1)) {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
-               NR_INFO("The specified transformation is a spline parametrisation:");
-               NR_INFO(inputTransformationImage->fname);
-               // The output field is filled with an identity deformation field
-               memset(outputTransformationImage->data,
-                      0,
-                      outputTransformationImage->nvox*outputTransformationImage->nbyper);
-               reg_getDeformationFromDisplacement(outputTransformationImage);
-               // The spline transformation is composed with the identity field
-               reg_spline_getDeformationField(inputTransformationImage,
-                                              outputTransformationImage,
-                                              nullptr, // no mask
-                                              true, // composition is used,
-                                              true // b-spline are used
-                                             );
-               break;
+                NR_INFO("Transformation 1 is a spline parametrisation:");
+                NR_INFO(input1TransImage->fname);
+                reg_tools_multiplyValueToImage(output1TransImage, output1TransImage, 0.f);
+                output1TransImage->intent_p1 = DISP_FIELD;
+                reg_getDeformationFromDisplacement(output1TransImage);
+                reg_spline_getDeformationField(input1TransImage,
+                                               output1TransImage,
+                                               nullptr,
+                                               true,
+                                               true);
+                break;
+            case DEF_FIELD:
+                NR_INFO("Transformation 1 is a deformation field:");
+                NR_INFO(input1TransImage->fname);
+                memcpy(output1TransImage->data, input1TransImage->data,
+                       output1TransImage->nbyper * output1TransImage->nvox);
+                break;
+            case DISP_FIELD:
+                NR_INFO("Transformation 1 is a displacement field:");
+                NR_INFO(input1TransImage->fname);
+                memcpy(output1TransImage->data, input1TransImage->data,
+                       output1TransImage->nbyper * output1TransImage->nvox);
+                reg_getDeformationFromDisplacement(output1TransImage);
+                break;
+            case SPLINE_VEL_GRID:
+                NR_INFO("Transformation 1 is a spline velocity field parametrisation:");
+                NR_INFO(input1TransImage->fname);
+                reg_spline_getDefFieldFromVelocityGrid(input1TransImage,
+                                                       output1TransImage,
+                                                       false); // the number of step is not automatically updated
+                break;
             case DEF_VEL_FIELD:
-               NR_INFO("The specified transformation is a deformation velocity field:");
-               NR_INFO(inputTransformationImage->fname);
-               // The flow field is exponentiated
-               reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
-                     outputTransformationImage,
-                     false // step number is not updated
-                                                            );
-               break;
+                NR_INFO("Transformation 1 is a deformation field velocity:");
+                NR_INFO(input1TransImage->fname);
+                reg_defField_getDeformationFieldFromFlowField(input1TransImage,
+                                                              output1TransImage,
+                                                              false); // the number of step is not automatically updated
+                break;
             case DISP_VEL_FIELD:
-               NR_INFO("The specified transformation is a displacement velocity field:");
-               NR_INFO(inputTransformationImage->fname);
-               // The input transformation is converted into a def flow
-               reg_getDeformationFromDisplacement(outputTransformationImage);
-               // The flow field is exponentiated
-               reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
-                     outputTransformationImage,
-                     false // step number is not updated
-                                                            );
-               break;
-            case SPLINE_VEL_GRID:
-               NR_INFO("The specified transformation is a spline velocity parametrisation:");
-               NR_INFO(inputTransformationImage->fname);
-               // The spline parametrisation is converted into a dense flow and exponentiated
-               reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
-                     outputTransformationImage,
-                     false); // step number is not updated
-               break;
+                NR_INFO("Transformation 1 is a displacement field velocity:");
+                NR_INFO(input1TransImage->fname);
+                reg_getDeformationFromDisplacement(output1TransImage);
+                reg_defField_getDeformationFieldFromFlowField(input1TransImage,
+                                                              output1TransImage,
+                                                              false); // the number of step is not automatically updated
+                break;
             default:
-               NR_ERROR("Unknown input transformation type");
-               return EXIT_FAILURE;
+                NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName);
+                return EXIT_FAILURE;
             }
-         }
-         outputTransformationImage->intent_p1=DEF_FIELD;
-         outputTransformationImage->intent_p2=0;
-         if(flag->outputDispFlag)
-            reg_getDisplacementFromDeformation(outputTransformationImage);
-      }
-      // Save the generated transformation
-      reg_io_WriteImageFile(outputTransformationImage,param->outputTransName);
-      switch(Round(outputTransformationImage->intent_p1))
-      {
-      case DEF_FIELD:
-         NR_INFO("The deformation field has been saved as:");
-         NR_INFO(param->outputTransName);
-         break;
-      case DISP_FIELD:
-         NR_INFO("The displacement field has been saved as:");
-         NR_INFO(param->outputTransName);
-         break;
-      case DEF_VEL_FIELD:
-         NR_INFO("The flow field has been saved as:");
-         NR_INFO(param->outputTransName);
-         break;
-      }
-      // Free the allocated images and arrays
-      if(affineTransformation!=nullptr) free(affineTransformation);
-      if(referenceImage!=nullptr) nifti_image_free(referenceImage);
-      if(inputTransformationImage!=nullptr) nifti_image_free(inputTransformationImage);
-      if(outputTransformationImage!=nullptr) nifti_image_free(outputTransformationImage);
-   }
-
-   /* ************************************ */
-   // Start the transformation composition //
-   /* ************************************ */
-   if(flag->outputCompFlag)
-   {
-      NR_INFO("Starting the composition of two transformations");
-      // Create some variables
-      mat44 *affine1Trans=nullptr;
-      mat44 *affine2Trans=nullptr;
-      nifti_image *referenceImage=nullptr;
-      nifti_image *referenceImage2=nullptr;
-      nifti_image *input1TransImage=nullptr;
-      nifti_image *input2TransImage=nullptr;
-      nifti_image *output1TransImage=nullptr;
-      nifti_image *output2TransImage=nullptr;
-      // Read the first transformation
-      if(!reg_isAnImageFileName(param->inputTransName))
-      {
-         affine1Trans=(mat44 *)malloc(sizeof(mat44));
-         reg_tool_ReadAffineFile(affine1Trans,param->inputTransName);
-         NR_INFO("Transformation 1 is an affine parametrisation:");
-         NR_INFO(param->inputTransName);
-      }
-      else
-      {
-         input1TransImage = reg_io_ReadImageFile(param->inputTransName);
-         if(input1TransImage==nullptr)
-         {
-            NR_ERROR("Error when reading the transformation image: " << param->inputTransName);
-            return EXIT_FAILURE;
-         }
-      }
-      // Read the second transformation
-      if(!reg_isAnImageFileName(param->input2TransName))
-      {
-         affine2Trans=(mat44 *)malloc(sizeof(mat44));
-         reg_tool_ReadAffineFile(affine2Trans,param->input2TransName);
-      }
-      else
-      {
-         input2TransImage = reg_io_ReadImageFile(param->input2TransName);
-         if(input2TransImage==nullptr)
-         {
-            NR_ERROR("Error when reading the transformation image: " << param->input2TransName);
-            return EXIT_FAILURE;
-         }
-      }
-      // Check if the two input transformations are affine transformation
-      if(affine1Trans!=nullptr && affine2Trans!=nullptr)
-      {
-         NR_INFO("Transformation 2 is an affine parametrisation:");
-         NR_INFO(param->input2TransName);
-         *affine1Trans=reg_mat44_mul(affine2Trans,affine1Trans);
-         reg_tool_WriteAffineFile(affine1Trans,param->outputTransName);
-      }
-      else
-      {
-         // Check if the reference image is required
-         if(affine1Trans!=nullptr)
-         {
-            if(!flag->referenceImageFlag)
-            {
-               NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
-                        " a reference image should be specified (-res flag).");
-               return EXIT_FAILURE;
+            if (affine2Trans != nullptr) {
+                NR_INFO("Transformation 2 is an affine parametrisation:");
+                NR_INFO(param->input2TransName);
+                // The field is created using the previous image space
+                output2TransImage = nifti_copy_nim_info(output1TransImage);
+                output2TransImage->intent_code = NIFTI_INTENT_VECTOR;
+                memset(output2TransImage->intent_name, 0, 16);
+                strcpy(output2TransImage->intent_name, "NREG_TRANS");
+                output2TransImage->intent_p1 = DEF_FIELD;
+                output2TransImage->data = calloc(output2TransImage->nvox, output2TransImage->nbyper);
+                reg_affine_getDeformationField(affine2Trans, output2TransImage);
+                reg_defField_compose(output2TransImage, output1TransImage, nullptr);
+            } else {
+                switch (Round(input2TransImage->intent_p1)) {
+                case LIN_SPLINE_GRID:
+                case CUB_SPLINE_GRID:
+                    NR_INFO("Transformation 2 is a spline parametrisation:");
+                    NR_INFO(input2TransImage->fname);
+                    reg_spline_getDeformationField(input2TransImage,
+                                                   output1TransImage,
+                                                   nullptr,
+                                                   true,  // composition
+                                                   true); // b-spline
+                    break;
+                case DEF_FIELD:
+                    NR_INFO("Transformation 2 is a deformation field:");
+                    NR_INFO(input2TransImage->fname);
+                    reg_defField_compose(input2TransImage, output1TransImage, nullptr);
+                    break;
+                case DISP_FIELD:
+                    NR_INFO("Transformation 2 is a displacement field:");
+                    NR_INFO(input2TransImage->fname);
+                    reg_getDeformationFromDisplacement(input2TransImage);
+                    reg_defField_compose(input2TransImage, output1TransImage, nullptr);
+                    break;
+                case SPLINE_VEL_GRID:
+                    // The field is created using the second reference image space
+                    if (referenceImage2 != nullptr) {
+                        output2TransImage = nifti_copy_nim_info(referenceImage2);
+                        output2TransImage->scl_slope = 1.f;
+                        output2TransImage->scl_inter = 0.f;
+                        NR_INFO("Transformation 2 is defined in the space of image:");
+                        NR_INFO(referenceImage2->fname);
+                    } else {
+                        output2TransImage = nifti_copy_nim_info(output1TransImage);
+                    }
+                    output2TransImage->ndim = output2TransImage->dim[0] = 5;
+                    output2TransImage->nt = output2TransImage->dim[4] = 1;
+                    output2TransImage->nu = output2TransImage->dim[5] = output2TransImage->nz > 1 ? 3 : 2;
+                    output2TransImage->nvox = NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim);
+                    output2TransImage->nbyper = output1TransImage->nbyper;
+                    output2TransImage->datatype = output1TransImage->datatype;
+                    output2TransImage->data = calloc(output2TransImage->nvox, output2TransImage->nbyper);
+                    NR_INFO("Transformation 2 is a spline velocity field parametrisation:");
+                    NR_INFO(input2TransImage->fname);
+                    reg_spline_getDefFieldFromVelocityGrid(input2TransImage,
+                                                           output2TransImage,
+                                                           false); // the number of step is not automatically updated
+                    reg_defField_compose(output2TransImage, output1TransImage, nullptr);
+                    break;
+                case DEF_VEL_FIELD:
+                    NR_INFO("Transformation 2 is a deformation field velocity:");
+                    NR_INFO(input2TransImage->fname);
+                    output2TransImage = nifti_dup(*input2TransImage, false);
+                    output2TransImage->intent_p1 = DEF_FIELD;
+                    reg_defField_getDeformationFieldFromFlowField(input2TransImage,
+                                                                  output2TransImage,
+                                                                  false); // the number of step is not automatically updated
+                    reg_defField_compose(output2TransImage, output1TransImage, nullptr);
+                    break;
+                case DISP_VEL_FIELD:
+                    NR_INFO("Transformation 2 is a displacement field velocity:");
+                    NR_INFO(input2TransImage->fname);
+                    output2TransImage = nifti_dup(*input2TransImage, false);
+                    output2TransImage->intent_p1 = DEF_FIELD;
+                    reg_getDeformationFromDisplacement(input2TransImage);
+                    reg_defField_getDeformationFieldFromFlowField(input2TransImage,
+                                                                  output2TransImage,
+                                                                  false); // the number of step is not automatically updated
+                    reg_defField_compose(output2TransImage, output1TransImage, nullptr);
+                    break;
+                default:
+                    NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName);
+                    return EXIT_FAILURE;
+                }
             }
-            referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==nullptr)
-            {
-               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-               return EXIT_FAILURE;
-            }
-         }
-         else if(input1TransImage->intent_p1==LIN_SPLINE_GRID ||
-                 input1TransImage->intent_p1==CUB_SPLINE_GRID ||
-                 input1TransImage->intent_p1==SPLINE_VEL_GRID)
-         {
-            if(!flag->referenceImageFlag)
-            {
-               NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," <<
-                        " a reference image should be specified (-ref flag).");
-               return EXIT_FAILURE;
+            // Save the composed transformation
+            memset(output1TransImage->descrip, 0, 80);
+            strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)");
+            reg_io_WriteImageFile(output1TransImage, param->outputTransName);
+            NR_INFO("The final deformation field has been saved as:");
+            NR_INFO(param->outputTransName);
+        }
+        // Free allocated object
+        if (affine1Trans != nullptr) free(affine1Trans);
+        if (affine2Trans != nullptr) free(affine2Trans);
+        if (referenceImage != nullptr) nifti_image_free(referenceImage);
+        if (referenceImage2 != nullptr) nifti_image_free(referenceImage2);
+        if (input1TransImage != nullptr) nifti_image_free(input1TransImage);
+        if (input2TransImage != nullptr) nifti_image_free(input2TransImage);
+        if (output1TransImage != nullptr) nifti_image_free(output1TransImage);
+        if (output2TransImage != nullptr) nifti_image_free(output2TransImage);
+    }
+
+
+    /* ********************************** */
+    // Update the landmark transformation //
+    /* ********************************** */
+    if (flag->outputLandFlag) {
+        // Create some variables
+        mat44 *affineTransformation = nullptr;
+        nifti_image *referenceImage = nullptr;
+        nifti_image *inputTransformationImage = nullptr;
+        nifti_image *deformationFieldImage = nullptr;
+        // First check if the input filename is an image
+        if (reg_isAnImageFileName(param->inputTransName)) {
+            inputTransformationImage = reg_io_ReadImageFile(param->inputTransName);
+            if (inputTransformationImage == nullptr) {
+                NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
+                return EXIT_FAILURE;
             }
-            referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==nullptr)
-            {
-               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-               return EXIT_FAILURE;
+            // If the input transformation is a grid, check that the reference image has been specified
+            if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID ||
+                inputTransformationImage->intent_p1 == CUB_SPLINE_GRID ||
+                inputTransformationImage->intent_p1 == SPLINE_VEL_GRID) {
+                if (!flag->referenceImageFlag) {
+                    NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
+                             " a reference image should be specified (-ref flag).");
+                    return EXIT_FAILURE;
+                }
+                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                if (referenceImage == nullptr) {
+                    NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                    return EXIT_FAILURE;
+                }
             }
-         }
-         // Read the second reference image if specified
-         if(flag->referenceImage2Flag)
-         {
-            referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name);
-            if(referenceImage2==nullptr)
-            {
-               NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name);
-               return EXIT_FAILURE;
+        } else {
+            // Read the affine transformation
+            affineTransformation = (mat44 *)malloc(sizeof(mat44));
+            reg_tool_ReadAffineFile(affineTransformation, param->inputTransName);
+            if (!flag->referenceImageFlag) {
+                NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
+                         " a reference image should be specified (-ref flag).");
+                return EXIT_FAILURE;
             }
-         }
-         // Generate the first deformation field
-         if(referenceImage!=nullptr)
-         {
-            // The field is created using the reference image space
-            output1TransImage=nifti_copy_nim_info(referenceImage);
-            output1TransImage->ndim=output1TransImage->dim[0]=5;
-            output1TransImage->nt=output1TransImage->dim[4]=1;
-            output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2;
-            output1TransImage->nvox=NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim);
-            output1TransImage->scl_slope=1.f;
-            output1TransImage->scl_inter=0.f;
-            if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32)
-            {
-               output1TransImage->nbyper=sizeof(float);
-               output1TransImage->datatype=NIFTI_TYPE_FLOAT32;
+            referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            if (referenceImage == nullptr) {
+                NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                return EXIT_FAILURE;
             }
-            NR_INFO("Transformation 1 is defined in the space of image:");
-            NR_INFO(referenceImage->fname);
-         }
-         else
-         {
-            // The field is created using the input transformation image space
-            output1TransImage=nifti_copy_nim_info(input1TransImage);
-         }
-         output1TransImage->intent_code=NIFTI_INTENT_VECTOR;
-         memset(output1TransImage->intent_name, 0, 16);
-         strcpy(output1TransImage->intent_name,"NREG_TRANS");
-         output1TransImage->intent_p1=DEF_FIELD;
-         output1TransImage->data=calloc(output1TransImage->nvox,output1TransImage->nbyper);
-         if(affine1Trans!=nullptr)
-         {
-            reg_affine_getDeformationField(affine1Trans,output1TransImage);
-         }
-         else switch(Round(input1TransImage->intent_p1))
-         {
-         case LIN_SPLINE_GRID:
-         case CUB_SPLINE_GRID:
-               NR_INFO("Transformation 1 is a spline parametrisation:");
-               NR_INFO(input1TransImage->fname);
-               reg_tools_multiplyValueToImage(output1TransImage,output1TransImage,0.f);
-               output1TransImage->intent_p1=DISP_FIELD;
-               reg_getDeformationFromDisplacement(output1TransImage);
-               reg_spline_getDeformationField(input1TransImage,
-                                              output1TransImage,
-                                              nullptr,
-                                              true,
-                                              true);
-               break;
+        }
+        // Create a dense field
+        if (affineTransformation != nullptr || (inputTransformationImage != nullptr &&
+                                                (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID ||
+                                                 inputTransformationImage->intent_p1 == CUB_SPLINE_GRID ||
+                                                 inputTransformationImage->intent_p1 == SPLINE_VEL_GRID))) {
+            // Create a field image from the reference image
+            deformationFieldImage = nifti_copy_nim_info(referenceImage);
+            deformationFieldImage->ndim = deformationFieldImage->dim[0] = 5;
+            deformationFieldImage->nt = deformationFieldImage->dim[4] = 1;
+            deformationFieldImage->nu = deformationFieldImage->dim[5] = deformationFieldImage->nz > 1 ? 3 : 2;
+            deformationFieldImage->nvox = NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim);
+            deformationFieldImage->nbyper = sizeof(float);
+            deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32;
+            deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR;
+            memset(deformationFieldImage->intent_name, 0, 16);
+            strcpy(deformationFieldImage->intent_name, "NREG_TRANS");
+            deformationFieldImage->scl_slope = 1.f;
+            deformationFieldImage->scl_inter = 0.f;
+        } else {
+            // Create a deformation field from in the input transformation
+            deformationFieldImage = nifti_copy_nim_info(inputTransformationImage);
+        }
+        // Allocate the deformation field
+        deformationFieldImage->data = malloc(deformationFieldImage->nvox * deformationFieldImage->nbyper);
+        // Fill the deformation field
+        if (affineTransformation != nullptr) {
+            reg_affine_getDeformationField(affineTransformation, deformationFieldImage);
+        } else if (inputTransformationImage != nullptr) {
+            switch (Round(inputTransformationImage->intent_p1)) {
             case DEF_FIELD:
-               NR_INFO("Transformation 1 is a deformation field:");
-               NR_INFO(input1TransImage->fname);
-               memcpy(output1TransImage->data,input1TransImage->data,
-                      output1TransImage->nbyper*output1TransImage->nvox);
-               break;
+                NR_INFO("The specified transformation is a deformation field:");
+                NR_INFO(inputTransformationImage->fname);
+                // the current in transformation is copied
+                memcpy(deformationFieldImage->data, inputTransformationImage->data,
+                       deformationFieldImage->nvox * deformationFieldImage->nbyper);
+                break;
             case DISP_FIELD:
-               NR_INFO("Transformation 1 is a displacement field:");
-               NR_INFO(input1TransImage->fname);
-               memcpy(output1TransImage->data,input1TransImage->data,
-                      output1TransImage->nbyper*output1TransImage->nvox);
-               reg_getDeformationFromDisplacement(output1TransImage);
-               break;
-            case SPLINE_VEL_GRID:
-               NR_INFO("Transformation 1 is a spline velocity field parametrisation:");
-               NR_INFO(input1TransImage->fname);
-               reg_spline_getDefFieldFromVelocityGrid(input1TransImage,
-                     output1TransImage,
-                     false); // the number of step is not automatically updated
-               break;
+                NR_INFO("The specified transformation is a displacement field:");
+                NR_INFO(inputTransformationImage->fname);
+                // the current in transformation is copied and converted
+                memcpy(deformationFieldImage->data, inputTransformationImage->data,
+                       deformationFieldImage->nvox * deformationFieldImage->nbyper);
+                reg_getDeformationFromDisplacement(deformationFieldImage);
+                break;
+            case LIN_SPLINE_GRID:
+            case CUB_SPLINE_GRID:
+                NR_INFO("The specified transformation is a spline parametrisation:");
+                NR_INFO(inputTransformationImage->fname);
+                // The deformation field is filled with an identity deformation field
+                memset(deformationFieldImage->data,
+                       0,
+                       deformationFieldImage->nvox * deformationFieldImage->nbyper);
+                reg_getDeformationFromDisplacement(deformationFieldImage);
+                // The spline transformation is composed with the identity field
+                reg_spline_getDeformationField(inputTransformationImage,
+                                               deformationFieldImage,
+                                               nullptr, // no mask
+                                               true, // composition is used,
+                                               true); // b-spline are used
+                break;
             case DEF_VEL_FIELD:
-               NR_INFO("Transformation 1 is a deformation field velocity:");
-               NR_INFO(input1TransImage->fname);
-               reg_defField_getDeformationFieldFromFlowField(input1TransImage,
-                     output1TransImage,
-                     false); // the number of step is not automatically updated
-               break;
+                NR_INFO("The specified transformation is a deformation velocity field:");
+                NR_INFO(inputTransformationImage->fname);
+                // The flow field is exponentiated
+                reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
+                                                              deformationFieldImage,
+                                                              false); // step number is not updated
+                break;
             case DISP_VEL_FIELD:
-               NR_INFO("Transformation 1 is a displacement field velocity:");
-               NR_INFO(input1TransImage->fname);
-               reg_getDeformationFromDisplacement(output1TransImage);
-               reg_defField_getDeformationFieldFromFlowField(input1TransImage,
-                     output1TransImage,
-                     false); // the number of step is not automatically updated
-               break;
+                NR_INFO("The specified transformation is a displacement velocity field:");
+                NR_INFO(inputTransformationImage->fname);
+                // The input transformation is converted into a def flow
+                reg_getDeformationFromDisplacement(deformationFieldImage);
+                // The flow field is exponentiated
+                reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
+                                                              deformationFieldImage,
+                                                              false); // step number is not updated
+                break;
+            case SPLINE_VEL_GRID:
+                NR_INFO("The specified transformation is a spline velocity parametrisation:");
+                NR_INFO(inputTransformationImage->fname);
+                // The spline parametrisation is converted into a dense flow and exponentiated
+                reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
+                                                       deformationFieldImage,
+                                                       false); // step number is not updated
+                break;
             default:
-               NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName);
-               return EXIT_FAILURE;
+                NR_ERROR("Unknown input transformation type");
+                return EXIT_FAILURE;
             }
-         if(affine2Trans!=nullptr)
-         {
-            NR_INFO("Transformation 2 is an affine parametrisation:");
-            NR_INFO(param->input2TransName);
-            // The field is created using the previous image space
-            output2TransImage=nifti_copy_nim_info(output1TransImage);
-            output2TransImage->intent_code=NIFTI_INTENT_VECTOR;
-            memset(output2TransImage->intent_name, 0, 16);
-            strcpy(output2TransImage->intent_name,"NREG_TRANS");
-            output2TransImage->intent_p1=DEF_FIELD;
-            output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
-            reg_affine_getDeformationField(affine2Trans,output2TransImage);
-            reg_defField_compose(output2TransImage,output1TransImage,nullptr);
-         }
-         else
-         {
-            switch(Round(input2TransImage->intent_p1))
-            {
+        }
+        deformationFieldImage->intent_p1 = DEF_FIELD;
+        deformationFieldImage->intent_p2 = 0;
+        // Free all allocated input
+        if (affineTransformation != nullptr) free(affineTransformation);
+        if (referenceImage != nullptr) nifti_image_free(referenceImage);
+        if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage);
+        // Read the landmark file
+        std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(param->inputLandmarkName);
+        size_t landmarkNumber = inputMatrixSize.first;
+        size_t n = inputMatrixSize.second;
+        if (n == 2 && deformationFieldImage->nz > 1) {
+            NR_ERROR("2 values per line are expected for 2D images");
+            return EXIT_FAILURE;
+        } else if (n == 3 && deformationFieldImage->nz < 2) {
+            NR_ERROR("3 values per line are expected for 3D images");
+            return EXIT_FAILURE;
+        } else if (n != 2 && n != 3) {
+            NR_ERROR("2 or 3 values are expected per line");
+            return EXIT_FAILURE;
+        }
+        float **allLandmarks = reg_tool_ReadMatrixFile<float>(param->inputLandmarkName, landmarkNumber, n);
+        // Allocate a deformation field to store the landmark position
+        nifti_image *landmarkImage = nifti_copy_nim_info(deformationFieldImage);
+        landmarkImage->ndim = landmarkImage->dim[0] = 5;
+        landmarkImage->nx = landmarkImage->dim[1] = 1;
+        landmarkImage->ny = landmarkImage->dim[2] = 1;
+        landmarkImage->nz = landmarkImage->dim[3] = 1;
+        landmarkImage->nvox = NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim);
+        landmarkImage->data = malloc(landmarkImage->nvox * landmarkImage->nbyper);
+        float *landmarkImagePtr = static_cast<float *>(landmarkImage->data);
+        for (size_t l = 0, index = 0; l < landmarkNumber; ++l) {
+            for (size_t i = 0; i < n; ++i)
+                landmarkImagePtr[i] = allLandmarks[l][i];
+            reg_defField_compose(deformationFieldImage, landmarkImage, nullptr);
+            for (size_t i = 0; i < n; ++i)
+                allLandmarks[l][i] = landmarkImagePtr[i];
+        }
+        // Save the update landmark positions
+        reg_tool_WriteMatrixFile(param->outputTransName, allLandmarks, landmarkNumber, n);
+        // Free all allocated array and image
+        for (size_t l = 0; l < landmarkNumber; ++l)
+            free(allLandmarks[l]);
+        free(allLandmarks);
+        if (deformationFieldImage != nullptr) nifti_image_free(deformationFieldImage);
+        if (landmarkImage != nullptr) nifti_image_free(landmarkImage);
+    }
+    /* **************************************** */
+    // Update the SForm matrix of a given image //
+    /* **************************************** */
+    if (flag->updSFormFlag) {
+        // Read the input image
+        nifti_image *image = reg_io_ReadImageFile(param->inputTransName);
+        if (image == nullptr) {
+            NR_ERROR("Error when reading the input image: " << param->inputTransName);
+            return EXIT_FAILURE;
+        }
+        // Read the affine transformation
+        mat44 *affineTransformation = (mat44 *)calloc(1, sizeof(mat44));
+        reg_tool_ReadAffineFile(affineTransformation, param->input2TransName);
+        //Invert the affine transformation since the flaoting is updated
+        *affineTransformation = nifti_mat44_inverse(*affineTransformation);
+
+        // Update the sform
+        if (image->sform_code > 0) {
+            image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz));
+        } else {
+            image->sform_code = 1;
+            image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz));
+        }
+        image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
+
+        // Write the output image
+        reg_io_WriteImageFile(image, param->outputTransName);
+        // Free the allocated image and array
+        nifti_image_free(image);
+        free(affineTransformation);
+    }
+    /* ******************************** */
+    // Half the provided transformation //
+    /* ******************************** */
+    if (flag->halfTransFlag) {
+        // Read the input transformation
+        mat44 *affineTrans = nullptr;
+        nifti_image *inputTransImage = nullptr;
+        if (!reg_isAnImageFileName(param->inputTransName)) {
+            // An affine transformation is considered
+            affineTrans = (mat44 *)malloc(sizeof(mat44));
+            reg_tool_ReadAffineFile(affineTrans, param->inputTransName);
+            // The affine transformation is halfed
+            *affineTrans = reg_mat44_logm(affineTrans);
+            *affineTrans = reg_mat44_mul(affineTrans, 0.5);
+            *affineTrans = reg_mat44_expm(affineTrans);
+            // The affine transformation is saved
+            reg_tool_WriteAffineFile(affineTrans, param->outputTransName);
+        } else {
+            // A non-rigid parametrisation is considered
+            inputTransImage = reg_io_ReadImageFile(param->inputTransName);
+            if (inputTransImage == nullptr) {
+                NR_ERROR("Error when reading the input image: " << param->inputTransName);
+                return EXIT_FAILURE;
+            }
+            switch (Round(inputTransImage->intent_p1)) {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
-               NR_INFO("Transformation 2 is a spline parametrisation:");
-               NR_INFO(input2TransImage->fname);
-               reg_spline_getDeformationField(input2TransImage,
-                                              output1TransImage,
-                                              nullptr,
-                                              true, // composition
-                                              true // b-spline
-                                             );
-               break;
+                reg_getDisplacementFromDeformation(inputTransImage);
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                reg_getDeformationFromDisplacement(inputTransImage);
+                break;
             case DEF_FIELD:
-               NR_INFO("Transformation 2 is a deformation field:");
-               NR_INFO(input2TransImage->fname);
-               reg_defField_compose(input2TransImage,output1TransImage,nullptr);
-               break;
+                reg_getDisplacementFromDeformation(inputTransImage);
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                reg_getDeformationFromDisplacement(inputTransImage);
+                break;
             case DISP_FIELD:
-               NR_INFO("Transformation 2 is a displacement field:");
-               NR_INFO(input2TransImage->fname);
-               reg_getDeformationFromDisplacement(input2TransImage);
-               reg_defField_compose(input2TransImage,output1TransImage,nullptr);
-               break;
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                break;
             case SPLINE_VEL_GRID:
-               // The field is created using the second reference image space
-               if(referenceImage2!=nullptr)
-               {
-                  output2TransImage=nifti_copy_nim_info(referenceImage2);
-                  output2TransImage->scl_slope=1.f;
-                  output2TransImage->scl_inter=0.f;
-                  NR_INFO("Transformation 2 is defined in the space of image:");
-                  NR_INFO(referenceImage2->fname);
-               }
-               else
-               {
-                  output2TransImage=nifti_copy_nim_info(output1TransImage);
-               }
-               output2TransImage->ndim=output2TransImage->dim[0]=5;
-               output2TransImage->nt=output2TransImage->dim[4]=1;
-               output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2;
-               output2TransImage->nvox=NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim);
-               output2TransImage->nbyper=output1TransImage->nbyper;
-               output2TransImage->datatype=output1TransImage->datatype;
-               output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper);
-               NR_INFO("Transformation 2 is a spline velocity field parametrisation:");
-               NR_INFO(input2TransImage->fname);
-               reg_spline_getDefFieldFromVelocityGrid(input2TransImage,
-                     output2TransImage,
-                     false // the number of step is not automatically updated
-                                                             );
-               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
-               break;
+                reg_getDisplacementFromDeformation(inputTransImage);
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                reg_getDeformationFromDisplacement(inputTransImage);
+                --inputTransImage->intent_p2;
+                if (inputTransImage->num_ext > 1)
+                    --inputTransImage->num_ext;
+                break;
             case DEF_VEL_FIELD:
-               NR_INFO("Transformation 2 is a deformation field velocity:");
-               NR_INFO(input2TransImage->fname);
-               output2TransImage = nifti_dup(*input2TransImage, false);
-               output2TransImage->intent_p1=DEF_FIELD;
-               reg_defField_getDeformationFieldFromFlowField(input2TransImage,
-                     output2TransImage,
-                     false // the number of step is not automatically updated
-                                                            );
-               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
-               break;
+                reg_getDisplacementFromDeformation(inputTransImage);
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                reg_getDeformationFromDisplacement(inputTransImage);
+                --inputTransImage->intent_p2;
+                break;
             case DISP_VEL_FIELD:
-               NR_INFO("Transformation 2 is a displacement field velocity:");
-               NR_INFO(input2TransImage->fname);
-               output2TransImage = nifti_dup(*input2TransImage, false);
-               output2TransImage->intent_p1=DEF_FIELD;
-               reg_getDeformationFromDisplacement(input2TransImage);
-               reg_defField_getDeformationFieldFromFlowField(input2TransImage,
-                     output2TransImage,
-                     false // the number of step is not automatically updated
-                                                            );
-               reg_defField_compose(output2TransImage,output1TransImage,nullptr);
-               break;
+                reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f);
+                --inputTransImage->intent_p2;
+                break;
             default:
-               NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName);
-               return EXIT_FAILURE;
+                NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName);
+                return EXIT_FAILURE;
             }
-         }
-         // Save the composed transformation
-         memset(output1TransImage->descrip, 0, 80);
-         strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)");
-         reg_io_WriteImageFile(output1TransImage,param->outputTransName);
-         NR_INFO("The final deformation field has been saved as:");
-         NR_INFO(param->outputTransName);
-      }
-      // Free allocated object
-      if(affine1Trans!=nullptr) free(affine1Trans);
-      if(affine2Trans!=nullptr) free(affine2Trans);
-      if(referenceImage!=nullptr) nifti_image_free(referenceImage);
-      if(referenceImage2!=nullptr) nifti_image_free(referenceImage2);
-      if(input1TransImage!=nullptr) nifti_image_free(input1TransImage);
-      if(input2TransImage!=nullptr) nifti_image_free(input2TransImage);
-      if(output1TransImage!=nullptr) nifti_image_free(output1TransImage);
-      if(output2TransImage!=nullptr) nifti_image_free(output2TransImage);
-   }
-
-
-   /* ********************************** */
-   // Update the landmark transformation //
-   /* ********************************** */
-   if(flag->outputLandFlag)
-   {
-      // Create some variables
-      mat44 *affineTransformation=nullptr;
-      nifti_image *referenceImage=nullptr;
-      nifti_image *inputTransformationImage=nullptr;
-      nifti_image *deformationFieldImage=nullptr;
-      // First check if the input filename is an image
-      if(reg_isAnImageFileName(param->inputTransName))
-      {
-         inputTransformationImage=reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransformationImage==nullptr)
-         {
-            NR_ERROR("Error when reading the provided transformation: " << param->inputTransName);
+            // Save the image
+            reg_io_WriteImageFile(inputTransImage, param->outputTransName);
+        }
+        // Deallocate the allocated arrays
+        if (affineTrans != nullptr) free(affineTrans);
+    }
+    /* ******************************************** */
+    // Invert the provided non-rigid transformation //
+    /* ******************************************** */
+    if (flag->invertNRRFlag) {
+        // Read the provided transformation
+        nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName);
+        if (inputTransImage == nullptr) {
+            NR_ERROR("Error when reading the input image: " << param->inputTransName);
+            return EXIT_FAILURE;
+        }
+        // Read the provided floating space image
+        nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName);
+        if (floatingImage == nullptr) {
+            NR_ERROR("Error when reading the input image: " << param->input2TransName);
             return EXIT_FAILURE;
-         }
-         // If the input transformation is a grid, check that the reference image has been specified
-         if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
-               inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
-               inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
-         {
-            if(!flag->referenceImageFlag)
-            {
-               NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," <<
-                        " a reference image should be specified (-ref flag).");
-               return EXIT_FAILURE;
+        }
+        // Convert the spline parametrisation into a dense deformation parametrisation
+        if (inputTransImage->intent_p1 == LIN_SPLINE_GRID ||
+            inputTransImage->intent_p1 == CUB_SPLINE_GRID ||
+            inputTransImage->intent_p1 == SPLINE_VEL_GRID) {
+            // Read the reference image
+            if (!flag->referenceImageFlag) {
+                NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," <<
+                         " a reference image should be specified (-ref flag).");
+                return EXIT_FAILURE;
             }
-            referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-            if(referenceImage==nullptr)
-            {
-               NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-               return EXIT_FAILURE;
+            nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            if (referenceImage == nullptr) {
+                NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
+                return EXIT_FAILURE;
             }
-         }
-      }
-      else
-      {
-         // Read the affine transformation
-         affineTransformation=(mat44 *)malloc(sizeof(mat44));
-         reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
-         if(!flag->referenceImageFlag)
-         {
-            NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," <<
-                     " a reference image should be specified (-ref flag).");
-            return EXIT_FAILURE;
-         }
-         referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==nullptr)
-         {
-            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-            return EXIT_FAILURE;
-         }
-      }
-      // Create a dense field
-      if(affineTransformation!=nullptr ||
-         inputTransformationImage->intent_p1==LIN_SPLINE_GRID ||
-         inputTransformationImage->intent_p1==CUB_SPLINE_GRID ||
-         inputTransformationImage->intent_p1==SPLINE_VEL_GRID)
-      {
-         // Create a field image from the reference image
-         deformationFieldImage=nifti_copy_nim_info(referenceImage);
-         deformationFieldImage->ndim=deformationFieldImage->dim[0]=5;
-         deformationFieldImage->nt=deformationFieldImage->dim[4]=1;
-         deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2;
-         deformationFieldImage->nvox=NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim);
-         deformationFieldImage->nbyper=sizeof(float);
-         deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32;
-         deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR;
-         memset(deformationFieldImage->intent_name, 0, 16);
-         strcpy(deformationFieldImage->intent_name,"NREG_TRANS");
-         deformationFieldImage->scl_slope=1.f;
-         deformationFieldImage->scl_inter=0.f;
-      }
-      else
-      {
-         // Create a deformation field from in the input transformation
-         deformationFieldImage=nifti_copy_nim_info(inputTransformationImage);
-      }
-      // Allocate the deformation field
-      deformationFieldImage->data=malloc(deformationFieldImage->nvox*deformationFieldImage->nbyper);
-      // Fill the deformation field
-      if(affineTransformation!=nullptr)
-      {
-         reg_affine_getDeformationField(affineTransformation,deformationFieldImage);
-      }
-      else
-      {
-         switch(Round(inputTransformationImage->intent_p1))
-         {
-         case DEF_FIELD:
-            NR_INFO("The specified transformation is a deformation field:");
-            NR_INFO(inputTransformationImage->fname);
-            // the current in transformation is copied
-            memcpy(deformationFieldImage->data,inputTransformationImage->data,
-                   deformationFieldImage->nvox*deformationFieldImage->nbyper);
-            break;
-         case DISP_FIELD:
-            NR_INFO("The specified transformation is a displacement field:");
-            NR_INFO(inputTransformationImage->fname);
-            // the current in transformation is copied and converted
-            memcpy(deformationFieldImage->data,inputTransformationImage->data,
-                   deformationFieldImage->nvox*deformationFieldImage->nbyper);
-            reg_getDeformationFromDisplacement(deformationFieldImage);
-            break;
-         case LIN_SPLINE_GRID:
-         case CUB_SPLINE_GRID:
-            NR_INFO("The specified transformation is a spline parametrisation:");
-            NR_INFO(inputTransformationImage->fname);
-            // The deformation field is filled with an identity deformation field
-            memset(deformationFieldImage->data,
-                   0,
-                   deformationFieldImage->nvox*deformationFieldImage->nbyper);
-            reg_getDeformationFromDisplacement(deformationFieldImage);
-            // The spline transformation is composed with the identity field
-            reg_spline_getDeformationField(inputTransformationImage,
-                                           deformationFieldImage,
-                                           nullptr, // no mask
-                                           true, // composition is used,
-                                           true // b-spline are used
-                                           );
-            break;
-         case DEF_VEL_FIELD:
-            NR_INFO("The specified transformation is a deformation velocity field:");
-            NR_INFO(inputTransformationImage->fname);
-            // The flow field is exponentiated
-            reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
-                                                          deformationFieldImage,
-                                                          false // step number is not updated
-                                                          );
-            break;
-         case DISP_VEL_FIELD:
-            NR_INFO("The specified transformation is a displacement velocity field:");
-            NR_INFO(inputTransformationImage->fname);
-            // The input transformation is converted into a def flow
-            reg_getDeformationFromDisplacement(deformationFieldImage);
-            // The flow field is exponentiated
-            reg_defField_getDeformationFieldFromFlowField(inputTransformationImage,
-                                                          deformationFieldImage,
-                                                          false // step number is not updated
-                                                          );
-            break;
-         case SPLINE_VEL_GRID:
-            NR_INFO("The specified transformation is a spline velocity parametrisation:");
-            NR_INFO(inputTransformationImage->fname);
-            // The spline parametrisation is converted into a dense flow and exponentiated
-            reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage,
-                                                   deformationFieldImage,
-                                                   false // step number is not updated
-                                                   );
-            break;
-         default:
-            NR_ERROR("Unknown input transformation type");
-            return EXIT_FAILURE;
-         }
-      }
-      deformationFieldImage->intent_p1=DEF_FIELD;
-      deformationFieldImage->intent_p2=0;
-      // Free all allocated input
-      if(affineTransformation!=nullptr){
-         free(affineTransformation);
-      }
-      if(referenceImage!=nullptr){
-         nifti_image_free(referenceImage);
-      }
-      if(inputTransformationImage!=nullptr){
-         nifti_image_free(inputTransformationImage);
-      }
-      // Read the landmark file
-      std::pair<size_t, size_t> inputMatrixSize =
-            reg_tool_sizeInputMatrixFile(param->inputLandmarkName);
-      size_t landmarkNumber = inputMatrixSize.first;
-      size_t n = inputMatrixSize.second;
-      if(n==2 && deformationFieldImage->nz>1){
-         NR_ERROR("2 values per line are expected for 2D images");
-         return EXIT_FAILURE;
-      }
-      else if(n==3 && deformationFieldImage->nz<2){
-         NR_ERROR("3 values per line are expected for 3D images");
-         return EXIT_FAILURE;
-      }
-      else if(n!=2 && n!=3){
-         NR_ERROR("2 or 3 values are expected per line");
-         return EXIT_FAILURE;
-      }
-      float **allLandmarks = reg_tool_ReadMatrixFile<float>(param->inputLandmarkName,
-                                                            landmarkNumber,
-                                                            n);
-      // Allocate a deformation field to store the landmark position
-      nifti_image *landmarkImage=nifti_copy_nim_info(deformationFieldImage);
-      landmarkImage->ndim=landmarkImage->dim[0]=5;
-      landmarkImage->nx=landmarkImage->dim[1]=1;
-      landmarkImage->ny=landmarkImage->dim[2]=1;
-      landmarkImage->nz=landmarkImage->dim[3]=1;
-      landmarkImage->nvox=NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim);
-      landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper);
-      float *landmarkImagePtr = static_cast<float *>(landmarkImage->data);
-      for(size_t l=0, index=0;l<landmarkNumber;++l){
-         for(size_t i=0;i<n;++i){
-            landmarkImagePtr[i]=allLandmarks[l][i];
-         }
-         reg_defField_compose(deformationFieldImage,
-                              landmarkImage,
-                              nullptr);
-         for(size_t i=0;i<n;++i){
-            allLandmarks[l][i]=landmarkImagePtr[i];
-         }
-      }
-      // Save the update landmark positions
-      reg_tool_WriteMatrixFile(param->outputTransName,
-                               allLandmarks,
-                               landmarkNumber,
-                               n);
-      // Free all allocated array and image
-      for(size_t l=0; l<landmarkNumber; ++l)
-         free(allLandmarks[l]);
-      free(allLandmarks);
-      if(deformationFieldImage!=nullptr){
-         nifti_image_free(deformationFieldImage);
-      }
-      if(landmarkImage!=nullptr){
-         nifti_image_free(landmarkImage);
-      }
-   }
-   /* **************************************** */
-   // Update the SForm matrix of a given image //
-   /* **************************************** */
-   if(flag->updSFormFlag)
-   {
-      // Read the input image
-      nifti_image *image = reg_io_ReadImageFile(param->inputTransName);
-      if(image==nullptr)
-      {
-         NR_ERROR("Error when reading the input image: " << param->inputTransName);
-         return EXIT_FAILURE;
-      }
-      // Read the affine transformation
-      mat44 *affineTransformation = (mat44 *)calloc(1,sizeof(mat44));
-      reg_tool_ReadAffineFile(affineTransformation,
-                              param->input2TransName);
-      //Invert the affine transformation since the flaoting is updated
-      *affineTransformation = nifti_mat44_inverse(*affineTransformation);
-
-      // Update the sform
-      if(image->sform_code>0)
-      {
-         image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz));
-      }
-      else
-      {
-         image->sform_code = 1;
-         image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz));
-      }
-      image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
-
-      // Write the output image
-      reg_io_WriteImageFile(image,param->outputTransName);
-      // Free the allocated image and array
-      nifti_image_free(image);
-      free(affineTransformation);
-   }
-   /* ******************************** */
-   // Half the provided transformation //
-   /* ******************************** */
-   if(flag->halfTransFlag)
-   {
-      // Read the input transformation
-      mat44 *affineTrans=nullptr;
-      nifti_image *inputTransImage=nullptr;
-      if(!reg_isAnImageFileName(param->inputTransName))
-      {
-         // An affine transformation is considered
-         affineTrans=(mat44 *)malloc(sizeof(mat44));
-         reg_tool_ReadAffineFile(affineTrans,param->inputTransName);
-         // The affine transformation is halfed
-         *affineTrans=reg_mat44_logm(affineTrans);
-         *affineTrans=reg_mat44_mul(affineTrans,0.5);
-         *affineTrans=reg_mat44_expm(affineTrans);
-         // The affine transformation is saved
-         reg_tool_WriteAffineFile(affineTrans,param->outputTransName);
-      }
-      else
-      {
-         // A non-rigid parametrisation is considered
-         inputTransImage = reg_io_ReadImageFile(param->inputTransName);
-         if(inputTransImage==nullptr)
-         {
-            NR_ERROR("Error when reading the input image: " << param->inputTransName);
-            return EXIT_FAILURE;
-         }
-         switch(Round(inputTransImage->intent_p1))
-         {
-         case LIN_SPLINE_GRID:
-         case CUB_SPLINE_GRID:
-            reg_getDisplacementFromDeformation(inputTransImage);
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
-            reg_getDeformationFromDisplacement(inputTransImage);
+            // Create a deformation field or a flow field
+            nifti_image *tempField = nifti_copy_nim_info(referenceImage);
+            tempField->ndim = tempField->dim[0] = 5;
+            tempField->nt = tempField->dim[4] = 1;
+            tempField->nu = tempField->dim[5] = tempField->nz > 1 ? 3 : 2;
+            tempField->nvox = NiftiImage::calcVoxelNumber(tempField, tempField->ndim);
+            tempField->nbyper = inputTransImage->nbyper;
+            tempField->datatype = inputTransImage->datatype;
+            tempField->intent_code = NIFTI_INTENT_VECTOR;
+            memset(tempField->intent_name, 0, 16);
+            strcpy(tempField->intent_name, "NREG_TRANS");
+            tempField->intent_p1 = DEF_FIELD;
+            if (inputTransImage->intent_p1 == SPLINE_VEL_GRID) {
+                tempField->intent_p1 = DEF_VEL_FIELD;
+                tempField->intent_p2 = inputTransImage->intent_p2;
+            }
+            tempField->scl_slope = 1.f;
+            tempField->scl_inter = 0.f;
+            tempField->data = calloc(tempField->nvox, tempField->nbyper);
+            // Compute the dense field
+            if (inputTransImage->intent_p1 == LIN_SPLINE_GRID ||
+                inputTransImage->intent_p1 == CUB_SPLINE_GRID)
+                reg_spline_getDeformationField(inputTransImage, tempField, nullptr, false, true);
+            else
+                reg_spline_getFlowFieldFromVelocityGrid(inputTransImage, tempField);
+            // The provided transformation file is replaced by the compute dense field
+            nifti_image_free(referenceImage);
+            nifti_image_free(inputTransImage);
+            inputTransImage = tempField;
+            tempField = nullptr;
+        }
+        // Create a field to store the transformation
+        nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage);
+        outputTransImage->ndim = outputTransImage->dim[0] = 5;
+        outputTransImage->nt = outputTransImage->dim[4] = 1;
+        outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz > 1 ? 3 : 2;
+        outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim);
+        outputTransImage->nbyper = inputTransImage->nbyper;
+        outputTransImage->datatype = inputTransImage->datatype;
+        outputTransImage->intent_code = NIFTI_INTENT_VECTOR;
+        memset(outputTransImage->intent_name, 0, 16);
+        strcpy(outputTransImage->intent_name, "NREG_TRANS");
+        outputTransImage->intent_p1 = inputTransImage->intent_p1;
+        outputTransImage->intent_p2 = inputTransImage->intent_p2;
+        outputTransImage->scl_slope = 1.f;
+        outputTransImage->scl_inter = 0.f;
+        outputTransImage->data = malloc(outputTransImage->nvox * outputTransImage->nbyper);
+        // Invert the provided
+        switch (Round(inputTransImage->intent_p1)) {
+        case DEF_FIELD:
+            reg_defFieldInvert(inputTransImage, outputTransImage, 1.0e-6f);
+            memset(outputTransImage->descrip, 0, 80);
+            strcpy(outputTransImage->descrip, "Deformation field from NiftyReg (reg_transform -invNrr)");
             break;
-         case DEF_FIELD:
-            reg_getDisplacementFromDeformation(inputTransImage);
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
+        case DISP_FIELD:
             reg_getDeformationFromDisplacement(inputTransImage);
+            reg_defFieldInvert(inputTransImage, outputTransImage, 1.0e-6f);
+            reg_getDisplacementFromDeformation(outputTransImage);
+            memset(outputTransImage->descrip, 0, 80);
+            strcpy(outputTransImage->descrip, "Displacement field from NiftyReg (reg_transform -invNrr)");
             break;
-         case DISP_FIELD:
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
-            break;
-         case SPLINE_VEL_GRID:
+        case DEF_VEL_FIELD:
+        {
+            // create a temp deformation field containing an identity transformation
+            nifti_image *tempField = nifti_dup(*outputTransImage, false);
+            tempField->intent_p1 = DEF_FIELD;
+            reg_getDeformationFromDisplacement(tempField);
             reg_getDisplacementFromDeformation(inputTransImage);
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
-            reg_getDeformationFromDisplacement(inputTransImage);
-            --inputTransImage->intent_p2;
-            if(inputTransImage->num_ext>1)
-               --inputTransImage->num_ext;
+            reg_resampleGradient(inputTransImage, outputTransImage, tempField, 1, 0);
+            nifti_image_free(tempField);
+            reg_getDeformationFromDisplacement(outputTransImage);
+            outputTransImage->intent_p2 *= -1.f;
+            memset(outputTransImage->descrip, 0, 80);
+            strcpy(outputTransImage->descrip, "Deformation velocity field from NiftyReg (reg_transform -invNrr)");
             break;
-         case DEF_VEL_FIELD:
-            reg_getDisplacementFromDeformation(inputTransImage);
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
-            reg_getDeformationFromDisplacement(inputTransImage);
-            --inputTransImage->intent_p2;
-            break;
-         case DISP_VEL_FIELD:
-            reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f);
-            --inputTransImage->intent_p2;
+        }
+        case DISP_VEL_FIELD:
+        {
+            // create a temp deformation field containing an identity transformation
+            nifti_image *tempField = nifti_dup(*outputTransImage, false);
+            tempField->intent_p1 = DEF_FIELD;
+            reg_getDeformationFromDisplacement(tempField);
+            reg_resampleGradient(inputTransImage, outputTransImage, tempField, 1, 0);
+            nifti_image_free(tempField);
+            outputTransImage->intent_p2 *= -1.f;
+            memset(outputTransImage->descrip, 0, 80);
+            strcpy(outputTransImage->descrip, "Displacement velocity field from NiftyReg (reg_transform -invNrr)");
             break;
-         default:
+        }
+        default:
             NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName);
             return EXIT_FAILURE;
-         }
-         // Save the image
-         reg_io_WriteImageFile(inputTransImage,param->outputTransName);
-      }
-      // Deallocate the allocated arrays
-      if(affineTrans!=nullptr) free(affineTrans);
-   }
-   /* ******************************************** */
-   // Invert the provided non-rigid transformation //
-   /* ******************************************** */
-   if(flag->invertNRRFlag)
-   {
-      // Read the provided transformation
-      nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName);
-      if(inputTransImage==nullptr)
-      {
-         NR_ERROR("Error when reading the input image: " << param->inputTransName);
-         return EXIT_FAILURE;
-      }
-      // Read the provided floating space image
-      nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName);
-      if(floatingImage==nullptr)
-      {
-         NR_ERROR("Error when reading the input image: " << param->input2TransName);
-         return EXIT_FAILURE;
-      }
-      // Convert the spline parametrisation into a dense deformation parametrisation
-      if(inputTransImage->intent_p1==LIN_SPLINE_GRID ||
-            inputTransImage->intent_p1==CUB_SPLINE_GRID ||
-            inputTransImage->intent_p1==SPLINE_VEL_GRID)
-      {
-         // Read the reference image
-         if(!flag->referenceImageFlag)
-         {
-            NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," <<
-                     " a reference image should be specified (-ref flag).");
-            return EXIT_FAILURE;
-         }
-         nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-         if(referenceImage==nullptr)
-         {
-            NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
-            return EXIT_FAILURE;
-         }
-         // Create a deformation field or a flow field
-         nifti_image *tempField=nifti_copy_nim_info(referenceImage);
-         tempField->ndim=tempField->dim[0]=5;
-         tempField->nt=tempField->dim[4]=1;
-         tempField->nu=tempField->dim[5]=tempField->nz>1?3:2;
-         tempField->nvox=NiftiImage::calcVoxelNumber(tempField, tempField->ndim);
-         tempField->nbyper=inputTransImage->nbyper;
-         tempField->datatype=inputTransImage->datatype;
-         tempField->intent_code=NIFTI_INTENT_VECTOR;
-         memset(tempField->intent_name, 0, 16);
-         strcpy(tempField->intent_name,"NREG_TRANS");
-         tempField->intent_p1=DEF_FIELD;
-         if(inputTransImage->intent_p1==SPLINE_VEL_GRID)
-         {
-            tempField->intent_p1=DEF_VEL_FIELD;
-            tempField->intent_p2=inputTransImage->intent_p2;
-         }
-         tempField->scl_slope=1.f;
-         tempField->scl_inter=0.f;
-         tempField->data=calloc(tempField->nvox,tempField->nbyper);
-         // Compute the dense field
-         if(inputTransImage->intent_p1==LIN_SPLINE_GRID ||
-               inputTransImage->intent_p1==CUB_SPLINE_GRID)
-            reg_spline_getDeformationField(inputTransImage,
-                                           tempField,
-                                           nullptr,
-                                           false,
-                                           true);
-         else
-            reg_spline_getFlowFieldFromVelocityGrid(inputTransImage,
-                                                    tempField);
-         // The provided transformation file is replaced by the compute dense field
-         nifti_image_free(referenceImage);
-         nifti_image_free(inputTransImage);
-         inputTransImage=tempField;
-         tempField=nullptr;
-      }
-     // Create a field to store the transformation
-     nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage);
-     outputTransImage->ndim = outputTransImage->dim[0] = 5;
-     outputTransImage->nt = outputTransImage->dim[4] = 1;
-     outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2;
-     outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim);
-     outputTransImage->nbyper = inputTransImage->nbyper;
-     outputTransImage->datatype = inputTransImage->datatype;
-     outputTransImage->intent_code = NIFTI_INTENT_VECTOR;
-     memset(outputTransImage->intent_name, 0, 16);
-     strcpy(outputTransImage->intent_name, "NREG_TRANS");
-     outputTransImage->intent_p1 = inputTransImage->intent_p1;
-     outputTransImage->intent_p2 = inputTransImage->intent_p2;
-     outputTransImage->scl_slope = 1.f;
-     outputTransImage->scl_inter = 0.f;
-     outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper);
-      // Invert the provided
-      switch(Round(inputTransImage->intent_p1))
-      {
-      case DEF_FIELD:
-         reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f);
-       memset(outputTransImage->descrip, 0, 80);
-       strcpy(outputTransImage->descrip, "Deformation field from NiftyReg (reg_transform -invNrr)");
-         break;
-      case DISP_FIELD:
-         reg_getDeformationFromDisplacement(inputTransImage);
-         reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f);
-       reg_getDisplacementFromDeformation(outputTransImage);
-       memset(outputTransImage->descrip, 0, 80);
-       strcpy(outputTransImage->descrip, "Displacement field from NiftyReg (reg_transform -invNrr)");
-         break;
-      case DEF_VEL_FIELD:
-      {
-         // create a temp deformation field containing an identity transformation
-         nifti_image *tempField = nifti_dup(*outputTransImage, false);
-         tempField->intent_p1=DEF_FIELD;
-         reg_getDeformationFromDisplacement(tempField);
-         reg_getDisplacementFromDeformation(inputTransImage);
-         reg_resampleGradient(inputTransImage,
-                              outputTransImage,
-                              tempField,
-                              1,
-                              0);
-         nifti_image_free(tempField);
-         reg_getDeformationFromDisplacement(outputTransImage);
-       outputTransImage->intent_p2 *= -1.f;
-       memset(outputTransImage->descrip, 0, 80);
-       strcpy(outputTransImage->descrip, "Deformation velocity field from NiftyReg (reg_transform -invNrr)");
-         break;
-      }
-      case DISP_VEL_FIELD:
-      {
-         // create a temp deformation field containing an identity transformation
-         nifti_image *tempField = nifti_dup(*outputTransImage, false);
-         tempField->intent_p1=DEF_FIELD;
-         reg_getDeformationFromDisplacement(tempField);
-         reg_resampleGradient(inputTransImage,
-                              outputTransImage,
-                              tempField,
-                              1,
-                              0);
-         nifti_image_free(tempField);
-       outputTransImage->intent_p2 *= -1.f;
-       memset(outputTransImage->descrip, 0, 80);
-       strcpy(outputTransImage->descrip, "Displacement velocity field from NiftyReg (reg_transform -invNrr)");
-         break;
-      }
-      default:
-         NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName);
-         return EXIT_FAILURE;
-      }
-      // Save the inverted transformation
-      reg_io_WriteImageFile(outputTransImage,param->outputTransName);
-      // Free the allocated images
-      nifti_image_free(inputTransImage);
-      nifti_image_free(outputTransImage);
-   }
-   /* ***************************************** */
-   // Invert the provided affine transformation //
-   /* ***************************************** */
-   if(flag->invertAffFlag)
-   {
-      // Read the affine transformation
-      mat44 affineTrans;
-      reg_tool_ReadAffineFile(&affineTrans,param->inputTransName);
-      // Invert the transformation
-      affineTrans = nifti_mat44_inverse(affineTrans);
-      // Save the inverted transformation
-      reg_tool_WriteAffineFile(&affineTrans,param->outputTransName);
-   }
-   /* ******************************* */
-   // Create an affine transformation //
-   /* ******************************* */
-   if(flag->makeAffFlag)
-   {
-      // Create all the required matrices
-      mat44 rotationX;
-      reg_mat44_eye(&rotationX);
-      mat44 translation;
-      reg_mat44_eye(&translation);
-      mat44 rotationY;
-      reg_mat44_eye(&rotationY);
-      mat44 rotationZ;
-      reg_mat44_eye(&rotationZ);
-      mat44 scaling;
-      reg_mat44_eye(&scaling);
-      mat44 shearing;
-      reg_mat44_eye(&shearing);
-      // Set up the rotation matrix along the YZ plane
-      rotationX.m[1][1]=cosf(param->affTransParam[0]);
-      rotationX.m[1][2]=-sinf(param->affTransParam[0]);
-      rotationX.m[2][1]=sinf(param->affTransParam[0]);
-      rotationX.m[2][2]=cosf(param->affTransParam[0]);
-      // Set up the rotation matrix along the XZ plane
-      rotationY.m[0][0]=cosf(param->affTransParam[1]);
-      rotationY.m[0][2]=-sinf(param->affTransParam[1]);
-      rotationY.m[2][0]=sinf(param->affTransParam[1]);
-      rotationY.m[2][2]=cosf(param->affTransParam[1]);
-      // Set up the rotation matrix along the XY plane
-      rotationZ.m[0][0]=cosf(param->affTransParam[2]);
-      rotationZ.m[0][1]=-sinf(param->affTransParam[2]);
-      rotationZ.m[1][0]=sinf(param->affTransParam[2]);
-      rotationZ.m[1][1]=cosf(param->affTransParam[2]);
-      // Set up the translation matrix
-      translation.m[0][3]=param->affTransParam[3];
-      translation.m[1][3]=param->affTransParam[4];
-      translation.m[2][3]=param->affTransParam[5];
-      // Set up the scaling matrix
-      scaling.m[0][0]=param->affTransParam[6];
-      scaling.m[1][1]=param->affTransParam[7];
-      scaling.m[2][2]=param->affTransParam[8];
-      // Set up the shearing matrix
-      shearing.m[1][0]=param->affTransParam[9];
-      shearing.m[2][0]=param->affTransParam[10];
-      shearing.m[2][1]=param->affTransParam[11];
-      // Combine all the transformations
-      mat44 affine=reg_mat44_mul(&rotationY,&rotationZ);
-      affine=reg_mat44_mul(&rotationX,&affine);
-      affine=reg_mat44_mul(&scaling,&affine);
-      affine=reg_mat44_mul(&shearing,&affine);
-      affine=reg_mat44_mul(&translation,&affine);
-      // Save the new matrix
-      reg_tool_WriteAffineFile(&affine,param->outputTransName);
-   }
-   /* ************************************************* */
-   // Extract the rigid component from an affine matrix //
-   /* ************************************************* */
-   if(flag->aff2rigFlag)
-   {
-      mat44 affine;
-      reg_tool_ReadAffineFile(&affine,param->inputTransName);
-      // Compute the orthonormal matrix
-      float qb,qc,qd,qx,qy,qz,dx,dy,dz,qfac;
-      nifti_mat44_to_quatern(affine,&qb,&qc,&qd,&qx,&qy,&qz,&dx,&dy,&dz,&qfac);
-      affine = nifti_quatern_to_mat44(qb,qc,qd,qx,qy,qz,1.f,1.f,1.f,qfac);
-      reg_tool_WriteAffineFile(&affine, param->outputTransName);
-   }
-   /* ********************************************************** */
-   // Convert a flirt affine transformation to a NiftyReg affine //
-   /* ********************************************************** */
-   if(flag->flirtAff2NRFlag)
-   {
-      mat44 affine;
-      nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName);
-      nifti_image *floatingImage=reg_io_ReadImageHeader(param->referenceImage2Name);
-      reg_tool_ReadAffineFile(&affine,referenceImage,floatingImage,param->inputTransName,true);
-      reg_tool_WriteAffineFile(&affine, param->outputTransName);
-      nifti_image_free(referenceImage);
-      nifti_image_free(floatingImage);
-   }
-   // Free allocated object
-   free(param);
-   free(flag);
+        }
+        // Save the inverted transformation
+        reg_io_WriteImageFile(outputTransImage, param->outputTransName);
+        // Free the allocated images
+        nifti_image_free(inputTransImage);
+        nifti_image_free(outputTransImage);
+    }
+    /* ***************************************** */
+    // Invert the provided affine transformation //
+    /* ***************************************** */
+    if (flag->invertAffFlag) {
+        // Read the affine transformation
+        mat44 affineTrans;
+        reg_tool_ReadAffineFile(&affineTrans, param->inputTransName);
+        // Invert the transformation
+        affineTrans = nifti_mat44_inverse(affineTrans);
+        // Save the inverted transformation
+        reg_tool_WriteAffineFile(&affineTrans, param->outputTransName);
+    }
+    /* ******************************* */
+    // Create an affine transformation //
+    /* ******************************* */
+    if (flag->makeAffFlag) {
+        // Create all the required matrices
+        mat44 rotationX;
+        reg_mat44_eye(&rotationX);
+        mat44 translation;
+        reg_mat44_eye(&translation);
+        mat44 rotationY;
+        reg_mat44_eye(&rotationY);
+        mat44 rotationZ;
+        reg_mat44_eye(&rotationZ);
+        mat44 scaling;
+        reg_mat44_eye(&scaling);
+        mat44 shearing;
+        reg_mat44_eye(&shearing);
+        // Set up the rotation matrix along the YZ plane
+        rotationX.m[1][1] = cosf(param->affTransParam[0]);
+        rotationX.m[1][2] = -sinf(param->affTransParam[0]);
+        rotationX.m[2][1] = sinf(param->affTransParam[0]);
+        rotationX.m[2][2] = cosf(param->affTransParam[0]);
+        // Set up the rotation matrix along the XZ plane
+        rotationY.m[0][0] = cosf(param->affTransParam[1]);
+        rotationY.m[0][2] = -sinf(param->affTransParam[1]);
+        rotationY.m[2][0] = sinf(param->affTransParam[1]);
+        rotationY.m[2][2] = cosf(param->affTransParam[1]);
+        // Set up the rotation matrix along the XY plane
+        rotationZ.m[0][0] = cosf(param->affTransParam[2]);
+        rotationZ.m[0][1] = -sinf(param->affTransParam[2]);
+        rotationZ.m[1][0] = sinf(param->affTransParam[2]);
+        rotationZ.m[1][1] = cosf(param->affTransParam[2]);
+        // Set up the translation matrix
+        translation.m[0][3] = param->affTransParam[3];
+        translation.m[1][3] = param->affTransParam[4];
+        translation.m[2][3] = param->affTransParam[5];
+        // Set up the scaling matrix
+        scaling.m[0][0] = param->affTransParam[6];
+        scaling.m[1][1] = param->affTransParam[7];
+        scaling.m[2][2] = param->affTransParam[8];
+        // Set up the shearing matrix
+        shearing.m[1][0] = param->affTransParam[9];
+        shearing.m[2][0] = param->affTransParam[10];
+        shearing.m[2][1] = param->affTransParam[11];
+        // Combine all the transformations
+        mat44 affine = reg_mat44_mul(&rotationY, &rotationZ);
+        affine = reg_mat44_mul(&rotationX, &affine);
+        affine = reg_mat44_mul(&scaling, &affine);
+        affine = reg_mat44_mul(&shearing, &affine);
+        affine = reg_mat44_mul(&translation, &affine);
+        // Save the new matrix
+        reg_tool_WriteAffineFile(&affine, param->outputTransName);
+    }
+    /* ************************************************* */
+    // Extract the rigid component from an affine matrix //
+    /* ************************************************* */
+    if (flag->aff2rigFlag) {
+        mat44 affine;
+        reg_tool_ReadAffineFile(&affine, param->inputTransName);
+        // Compute the orthonormal matrix
+        float qb, qc, qd, qx, qy, qz, dx, dy, dz, qfac;
+        nifti_mat44_to_quatern(affine, &qb, &qc, &qd, &qx, &qy, &qz, &dx, &dy, &dz, &qfac);
+        affine = nifti_quatern_to_mat44(qb, qc, qd, qx, qy, qz, 1.f, 1.f, 1.f, qfac);
+        reg_tool_WriteAffineFile(&affine, param->outputTransName);
+    }
+    /* ********************************************************** */
+    // Convert a flirt affine transformation to a NiftyReg affine //
+    /* ********************************************************** */
+    if (flag->flirtAff2NRFlag) {
+        mat44 affine;
+        nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+        nifti_image *floatingImage = reg_io_ReadImageHeader(param->referenceImage2Name);
+        reg_tool_ReadAffineFile(&affine, referenceImage, floatingImage, param->inputTransName, true);
+        reg_tool_WriteAffineFile(&affine, param->outputTransName);
+        nifti_image_free(referenceImage);
+        nifti_image_free(floatingImage);
+    }
+    // Free allocated object
+    free(param);
+    free(flag);
 
-   return EXIT_SUCCESS;
+    return EXIT_SUCCESS;
 }
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index b5413b21..8b2d928a 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -14,17 +14,11 @@
 #include <filesystem>
 
 /* *************************************************************** */
-void reg_hack_filename(nifti_image *image, std::string filename) {
-    filename.append("\0");
-    // Free the char arrays if already allocated
+void reg_hack_filename(nifti_image *image, const char *filename) {
     if (image->fname) free(image->fname);
     if (image->iname) free(image->iname);
-    // Allocate the char arrays
-    image->fname = (char *)malloc((filename.size() + 1) * sizeof(char));
-    image->iname = (char *)malloc((filename.size() + 1) * sizeof(char));
-    // Copy the new name in the char arrays
-    strcpy(image->fname, filename.c_str());
-    strcpy(image->iname, filename.c_str());
+    image->fname = strdup(filename);
+    image->iname = strdup(filename);
 }
 /* *************************************************************** */
 int reg_io_checkFileFormat(const std::string& filename) {
diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c
index b557b702..23d75187 100644
--- a/reg-io/niftilib/nifti1_io.c
+++ b/reg-io/niftilib/nifti1_io.c
@@ -6441,7 +6441,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
    nbuf = (int)strlen(buf) ;
    char *temp = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
    if (temp)
-      buf = temp; // cppcheck-suppress memleak // false negative
+      buf = temp; // cppcheck-suppress memleak // false positive
    else
       Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n", nbuf+1);
    return buf ;
diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h
index 979fcc5c..94946ddf 100644
--- a/reg-lib/AffineDeformationFieldKernel.h
+++ b/reg-lib/AffineDeformationFieldKernel.h
@@ -7,7 +7,5 @@ class AffineDeformationFieldKernel: public Kernel {
     static std::string GetName() {
         return "AffineDeformationFieldKernel";
     }
-    AffineDeformationFieldKernel() : Kernel() {}
-    virtual ~AffineDeformationFieldKernel() {}
     virtual void Calculate(bool compose = false) = 0;
 };
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 9757f5fe..19cf8c28 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -11,6 +11,7 @@
 
 class AladinContent: public Content {
 public:
+    AladinContent(const AladinContent&) = delete;
     AladinContent(nifti_image *referenceIn,
                   nifti_image *floatingIn,
                   int *referenceMaskIn = nullptr,
@@ -21,6 +22,8 @@ class AladinContent: public Content {
                   int blockStepSize = 0);
     virtual ~AladinContent();
 
+    AladinContent& operator=(const AladinContent&) = delete;
+
     // Getters
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; }
 
diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h
index b78b05ab..747ad46a 100644
--- a/reg-lib/BlockMatchingKernel.h
+++ b/reg-lib/BlockMatchingKernel.h
@@ -7,7 +7,5 @@ class BlockMatchingKernel: public Kernel {
     static std::string GetName() {
         return "BlockMatchingKernel";
     }
-    BlockMatchingKernel() : Kernel() {}
-    virtual ~BlockMatchingKernel() {}
     virtual void Calculate() = 0;
 };
diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h
index 8d4fdd52..cfe93f0a 100644
--- a/reg-lib/ConvolutionKernel.h
+++ b/reg-lib/ConvolutionKernel.h
@@ -8,7 +8,10 @@ class ConvolutionKernel: public Kernel {
     static std::string GetName() {
         return "ConvolutionKernel";
     }
-    ConvolutionKernel() : Kernel() {}
-    virtual ~ConvolutionKernel() {}
-    virtual void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0;
+    virtual void Calculate(nifti_image *image,
+                           float *sigma,
+                           ConvKernelType kernelType,
+                           int *mask = nullptr,
+                           bool *timePoints = nullptr,
+                           bool *axis = nullptr) = 0;
 };
diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h
index 4d3a16f1..2d06f52e 100755
--- a/reg-lib/Kernel.h
+++ b/reg-lib/Kernel.h
@@ -5,10 +5,8 @@
 
 class Kernel {
 public:
-    Kernel() {}
-    virtual ~Kernel() {}
-
-    std::string GetName() const;
+    Kernel() = default;
+    virtual ~Kernel() = default;
 
     template <class T>
     T* castTo() { return dynamic_cast<T*>(this); }
diff --git a/reg-lib/LtsKernel.h b/reg-lib/LtsKernel.h
index 139f6cf9..d12a1f60 100644
--- a/reg-lib/LtsKernel.h
+++ b/reg-lib/LtsKernel.h
@@ -7,7 +7,5 @@ class LtsKernel: public Kernel {
     static std::string GetName() {
         return "LtsKernel";
     }
-    LtsKernel() : Kernel() {}
-    virtual ~LtsKernel() {}
     virtual void Calculate(bool affine) = 0;
 };
diff --git a/reg-lib/Optimiser.cpp b/reg-lib/Optimiser.cpp
index cf696b95..4a92c7d8 100644
--- a/reg-lib/Optimiser.cpp
+++ b/reg-lib/Optimiser.cpp
@@ -176,15 +176,6 @@ template class Optimiser<float>;
 template class Optimiser<double>;
 /* *************************************************************** */
 template <class T>
-ConjugateGradient<T>::ConjugateGradient(): Optimiser<T>::Optimiser() {
-    this->array1 = nullptr;
-    this->array1Bw = nullptr;
-    this->array2 = nullptr;
-    this->array2Bw = nullptr;
-    NR_FUNC_CALLED();
-}
-/* *************************************************************** */
-template <class T>
 ConjugateGradient<T>::~ConjugateGradient() {
     if (this->array1) {
         free(this->array1);
diff --git a/reg-lib/Optimiser.hpp b/reg-lib/Optimiser.hpp
index 3f672b54..aa4da312 100644
--- a/reg-lib/Optimiser.hpp
+++ b/reg-lib/Optimiser.hpp
@@ -146,11 +146,11 @@ class Optimiser {
 template <class T>
 class ConjugateGradient: public Optimiser<T> {
 protected:
-    T *array1;
-    T *array1Bw;
-    T *array2;
-    T *array2Bw;
-    bool firstCall;
+    T *array1 = nullptr;
+    T *array1Bw = nullptr;
+    T *array2 = nullptr;
+    T *array2Bw = nullptr;
+    bool firstCall = true;
 
 #ifdef NR_TESTING
 public:
@@ -158,7 +158,7 @@ class ConjugateGradient: public Optimiser<T> {
     virtual void UpdateGradientValues() override;
 
 public:
-    ConjugateGradient();
+    ConjugateGradient() { NR_FUNC_CALLED(); }
     virtual ~ConjugateGradient();
     virtual void Initialise(size_t nvox,
                             int ndim,
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 77035b04..3701327c 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -88,11 +88,9 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) {
             clContext.SetClIdx(gpuIdxIn);
         }
 
-        std::size_t paramValueSize;
-        clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
-        cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize);
-        clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
-        if (CL_DEVICE_TYPE_CPU == *field)
+        cl_device_type field;
+        clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, sizeof(field), &field, nullptr), "Failed to find OpenCL device info");
+        if (CL_DEVICE_TYPE_CPU == field)
             NR_FATAL_ERROR("The OpenCL kernels only support GPU devices for now");
     }
 #endif
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index 71d2b3b7..f3d4d4d0 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -20,9 +20,13 @@ constexpr PlatformType PlatformTypes[] = {
 
 class Platform {
 public:
+    Platform() = delete;
+    Platform(const Platform&) = delete;
     Platform(const PlatformType platformTypeIn);
     ~Platform();
 
+    Platform& operator=(const Platform&) = delete;
+
     std::string GetName() const;
     PlatformType GetPlatformType() const;
     unsigned GetGpuIdx() const;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index f8445e3f..3f184522 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -251,9 +251,9 @@ void reg_aladin<T>::InitialiseRegistration() {
             referenceCentre[0] /= referenceCount;
             referenceCentre[1] /= referenceCount;
             referenceCentre[2] /= referenceCount;
-            float refCOM[3];
+            float refCOM[3]{};
             if (this->inputReference->sform_code > 0)
-                reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM);
+                reg_mat44_mul(&this->inputReference->sto_xyz, referenceCentre, refCOM);
 
             float floatingCentre[3] = { 0, 0, 0 };
             float floatingCount = 0;
@@ -275,9 +275,9 @@ void reg_aladin<T>::InitialiseRegistration() {
             floatingCentre[0] /= floatingCount;
             floatingCentre[1] /= floatingCount;
             floatingCentre[2] /= floatingCount;
-            float floCOM[3];
+            float floCOM[3]{};
             if (this->inputFloating->sform_code > 0)
-                reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM);
+                reg_mat44_mul(&this->inputFloating->sto_xyz, floatingCentre, floCOM);
             reg_mat44_eye(this->affineTransformation.get());
             this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0];
             this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1];
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 59c99fa2..f204d66e 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -82,7 +82,6 @@ class reg_aladin {
 
     bool performRigid;
     bool performAffine;
-    int captureRangeVox;
 
     int blockPercentage;
     int inlierLts;
@@ -242,9 +241,6 @@ class reg_aladin {
     void SetInterpolationToCubic() {
         this->SetInterpolation(3);
     }
-    void SetCaptureRangeVox(int captureRangeIn) {
-        this->captureRangeVox = captureRangeIn;
-    }
 
     virtual int Check();
     virtual void Print();
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 381ca144..610405bd 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -79,7 +79,7 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         referenceCentre[0] /= referenceCount;
         referenceCentre[1] /= referenceCount;
         referenceCentre[2] /= referenceCount;
-        float refCOG[3];
+        float refCOG[3]{};
         if (this->inputReference->sform_code > 0)
             reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG);
 
@@ -104,7 +104,7 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         floatingCentre[0] /= floatingCount;
         floatingCentre[1] /= floatingCount;
         floatingCentre[2] /= floatingCount;
-        float floCOG[3];
+        float floCOG[3]{};
         if (this->inputFloating->sform_code > 0)
             reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG);
         reg_mat44_eye(this->affineTransformation.get());
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 564276f6..2190241f 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -15,7 +15,7 @@
 /* *************************************************************** */
 template<class T>
 reg_base<T>::reg_base(int refTimePoints, int floTimePoints) {
-    SetPlatformType(PlatformType::Cpu);
+    reg_base::SetPlatformType(PlatformType::Cpu);
 
     maxIterationNumber = 150;
     optimiseX = true;
@@ -59,6 +59,13 @@ reg_base<T>::reg_base(int refTimePoints, int floTimePoints) {
     landmarkReference = nullptr;
     landmarkFloating = nullptr;
 
+    bestWMeasure = 0;
+    currentWMeasure = 0;
+    currentWLand = 0;
+    bestWLand = 0;
+    funcProgressCallback = nullptr;
+    paramsProgressCallback = nullptr;
+
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 4973fc99..3b4b91c3 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -105,7 +105,7 @@ class reg_base: public InterfaceOptimiser {
 
     // For the NiftyReg plugin in NiftyView
     void (*funcProgressCallback)(float pcntProgress, void *params);
-    void* paramsProgressCallback;
+    void *paramsProgressCallback;
 
     virtual void WarpFloatingImage(int);
     virtual double ComputeSimilarityMeasure();
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index afef536b..1f005525 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -29,6 +29,12 @@ reg_f3d<T>::reg_f3d(int refTimePoints, int floTimePoints):
     this->useConjGradient = true;
     this->useApproxGradient = false;
     gridRefinement = true;
+    currentWJac = 0;
+    currentWBE = 0;
+    currentWLE = 0;
+    bestWJac = 0;
+    bestWBE = 0;
+    bestWLE = 0;
 
     NR_FUNC_CALLED();
 }
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index a7c33a51..073fcaa6 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -62,15 +62,9 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) :
 /* *************************************************************** */
 void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     //localWorkSize[0]*localWorkSize[1]*localWorkSize[2]... should be lower than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE
-    cl_uint maxWG = 0;
-    cl_int errNum;
-    std::size_t paramValueSize;
-    errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
-    sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
-    cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize);
-    errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
-    sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info ");
-    maxWG = *info;
+    size_t maxWG = 0;
+    auto errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWG), &maxWG, nullptr);
+    sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info");
 
     //8=default value
     unsigned xThreads = 8;
@@ -126,7 +120,6 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
 
     free(trans);
     clReleaseMemObject(cltransMat);
-    return;
 }
 /* *************************************************************** */
 ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() {
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h
index ad3a092b..c0203054 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.h
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h
@@ -6,8 +6,8 @@
 class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
     ClAffineDeformationFieldKernel(Content *conIn);
-    ~ClAffineDeformationFieldKernel();
-    void Calculate(bool compose = false);
+    virtual ~ClAffineDeformationFieldKernel();
+    virtual void Calculate(bool compose = false) override;
 
 private:
     mat44 *affineTransformation, *referenceMatrix;
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index bff1e4c6..49a78646 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -150,15 +150,13 @@ void ClAladinContent::SetReferenceMask(int *referenceMaskIn) {
     sContext->CheckErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): ");
 }
 /* *************************************************************** */
-void ClAladinContent::SetWarped(nifti_image *warped) {
-    if (warped != nullptr) {
+void ClAladinContent::SetWarped(nifti_image *warpedIn) {
+    if (warpedIn->nbyper != NIFTI_TYPE_FLOAT32)
+        reg_tools_changeDatatype<float>(warpedIn);
+    if (warped != nullptr)
         clReleaseMemObject(warpedImageClmem);
-    }
-    if (warped->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(warped);
-    }
-    AladinContent::SetWarped(warped);
-    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
+    AladinContent::SetWarped(warpedIn);
+    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warpedIn->nvox * sizeof(float), warpedIn->data, &errNum);
     sContext->CheckErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): ");
 }
 /* *************************************************************** */
@@ -224,14 +222,6 @@ cl_mem ClAladinContent::GetFloMatClmem() {
     return floMatClmem;
 }
 /* *************************************************************** */
-int *ClAladinContent::GetReferenceDims() {
-    return referenceDims;
-}
-/* *************************************************************** */
-int *ClAladinContent::GetFloatingDims() {
-    return floatingDims;
-}
-/* *************************************************************** */
 template<class DataType>
 DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
     switch (datatype) {
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 5c11f081..3c184871 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -35,8 +35,6 @@ class ClAladinContent: public AladinContent {
     virtual cl_mem GetMaskClmem();
     virtual cl_mem GetRefMatClmem();
     virtual cl_mem GetFloMatClmem();
-    virtual int* GetReferenceDims();
-    virtual int* GetFloatingDims();
 
     // CPU getters with data downloaded from device
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
@@ -64,11 +62,6 @@ class ClAladinContent: public AladinContent {
     cl_mem refMatClmem;
     cl_mem floMatClmem;
 
-    int referenceDims[4];
-    int floatingDims[4];
-
-    unsigned nVoxels;
-
     void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
     template<class T>
     void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
@@ -82,8 +75,8 @@ class ClAladinContent: public AladinContent {
 #endif
     // Functions for testing
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetWarped(nifti_image *warpedIn) override;
     virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
     virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override;
 };
diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h
index acecafe3..f97380c4 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.h
+++ b/reg-lib/cl/ClBlockMatchingKernel.h
@@ -6,8 +6,8 @@
 class ClBlockMatchingKernel: public BlockMatchingKernel {
 public:
     ClBlockMatchingKernel(Content *conIn);
-    ~ClBlockMatchingKernel();
-    void Calculate();
+    virtual ~ClBlockMatchingKernel();
+    virtual void Calculate() override;
 
 private:
     ClContextSingleton *sContext;
diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp
index c9deb205..17231274 100644
--- a/reg-lib/cl/ClContextSingleton.cpp
+++ b/reg-lib/cl/ClContextSingleton.cpp
@@ -13,15 +13,15 @@ void ClContextSingleton::Init() {
     cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms);
     CheckErrNum(errNum, "Failed to find CL platforms.");
 
-    this->platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * this->numPlatforms);
-    errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr);
+    this->platformIds = std::make_unique<cl_platform_id[]>(this->numPlatforms);
+    errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds.get(), nullptr);
     CheckErrNum(errNum, "Failed to find any OpenCL platforms.");
 
     errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices);
     CheckErrNum(errNum, "Failed to find OpenCL devices.");
 
-    this->devices = new cl_device_id[this->numDevices];
-    errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr);
+    this->devices = std::make_unique<cl_device_id[]>(this->numDevices);
+    errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices.get(), nullptr);
 
     PickCard(this->clIdx);
 
@@ -50,56 +50,34 @@ void ClContextSingleton::SetClIdx(int clIdxIn) {
 }
 /* *************************************************************** */
 void ClContextSingleton::QueryGridDims() {
-    std::size_t paramValueSize;
-    cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, &paramValueSize);
-    CheckErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE");
-
-    size_t *info = (size_t*)alloca(sizeof(size_t) * paramValueSize);
-    errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr);
-    CheckErrNum(errNum, "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_GROUP_SIZE2");
-    this->maxThreads = *info;
+    size_t maxWorkGroupSize;
+    auto errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroupSize), &maxWorkGroupSize, nullptr);
+    CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE");
+    this->maxThreads = maxWorkGroupSize;
     this->maxBlocks = 65535;
 }
 /* *************************************************************** */
 void ClContextSingleton::PickCard(cl_uint deviceId) {
     cl_int errNum;
-    std::size_t paramValueSize;
+    size_t paramValueSize;
     cl_uint maxProcs = 0;
     this->clIdx = 0;
-    this->isCardDoubleCapable = 0;
-
-    std::size_t paramValueSizeDOUBE1;
-    std::size_t paramValueSizeDOUBE2;
+    this->isCardDoubleCapable = false;
 
     if (deviceId < this->numDevices) {
         this->clIdx = deviceId;
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize);
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint numProcs = *info;
-        maxProcs = numProcs;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxProcs), &maxProcs, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info");
 
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint numD1 = *infoD1;
+        cl_uint numD1;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(numD1), &numD1, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info");
 
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
-        CheckErrNum(errNum, "Failed to find OpenCL device info ");
-        cl_uint numD2 = *infoD2;
+        cl_uint numD2;
+        errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(numD2), &numD2, nullptr);
+        CheckErrNum(errNum, "Failed to find OpenCL device info");
 
-        if (numD1 > 0 || numD2 > 0) {
-            this->isCardDoubleCapable = true;
-        } else {
-            this->isCardDoubleCapable = false;
-        }
+        this->isCardDoubleCapable = numD1 > 0 || numD2 > 0;
         return;
     } else if (deviceId != 999)
         NR_FATAL_ERROR("The specified OpenCL card ID is not defined! Run reg_gpuinfo to get the proper ID.");
@@ -108,36 +86,24 @@ void ClContextSingleton::PickCard(cl_uint deviceId) {
         cl_device_type dev_type;
         clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr);
         if (dev_type == CL_DEVICE_TYPE_GPU) {
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, &paramValueSize);
-            CheckErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize);
-            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr);
-            CheckErrNum(errNum, "Failed to find OpenCL device info ");
-            cl_uint numProcs = *info;
+            cl_uint numProcs;
+            errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numProcs), &numProcs, nullptr);
+            CheckErrNum(errNum, "Failed to find OpenCL device info");
+
             const bool found = numProcs > maxProcs;
             this->clIdx = found ? i : this->clIdx;
             maxProcs = found ? numProcs : maxProcs;
 
             if (found) {
-                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE1);
-                CheckErrNum(errNum, "Failed to find OpenCL device info ");
-                cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1);
-                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr);
-                CheckErrNum(errNum, "Failed to find OpenCL device info ");
-                cl_uint numD1 = *infoD1;
+                cl_uint numD1;
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(numD1), &numD1, nullptr);
+                CheckErrNum(errNum, "Failed to find OpenCL device info");
 
-                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, &paramValueSizeDOUBE2);
-                CheckErrNum(errNum, "Failed to find OpenCL device info ");
-                cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2);
-                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr);
-                CheckErrNum(errNum, "Failed to find OpenCL device info ");
-                cl_uint numD2 = *infoD2;
+                cl_uint numD2;
+                errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(numD2), &numD2, nullptr);
+                CheckErrNum(errNum, "Failed to find OpenCL device info");
 
-                if (numD1 > 0 || numD2 > 0) {
-                    this->isCardDoubleCapable = true;
-                } else {
-                    this->isCardDoubleCapable = false;
-                }
+                this->isCardDoubleCapable = numD1 > 0 || numD2 > 0;
             }
         }
     }
@@ -173,7 +139,6 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) {
 ClContextSingleton::~ClContextSingleton() {
     if (this->context != 0) clReleaseContext(this->context);
     if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue);
-    delete[] this->devices;
 }
 /* *************************************************************** */
 void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message) {
@@ -259,7 +224,7 @@ cl_device_id ClContextSingleton::GetDeviceId() {
 }
 /* *************************************************************** */
 cl_device_id* ClContextSingleton::GetDevices() {
-    return this->devices;
+    return this->devices.get();
 }
 /* *************************************************************** */
 cl_command_queue ClContextSingleton::GetCommandQueue() {
@@ -271,7 +236,7 @@ cl_uint ClContextSingleton::GetNumPlatforms() {
 }
 /* *************************************************************** */
 cl_platform_id* ClContextSingleton::GetPlatformIds() {
-    return this->platformIds;
+    return this->platformIds.get();
 }
 /* *************************************************************** */
 cl_uint ClContextSingleton::GetNumDevices() {
diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h
index 2da4247e..9f30a34f 100644
--- a/reg-lib/cl/ClContextSingleton.h
+++ b/reg-lib/cl/ClContextSingleton.h
@@ -53,10 +53,10 @@ class ClContextSingleton {
 
     cl_context context;
     cl_device_id deviceId;
-    cl_device_id *devices;
+    unique_ptr<cl_device_id[]> devices;
     cl_command_queue commandQueue;
     cl_uint numPlatforms;
-    cl_platform_id *platformIds;
+    unique_ptr<cl_platform_id[]> platformIds;
     cl_uint numDevices;
     size_t maxThreads;
 
diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h
index 824578d5..caeef9c9 100644
--- a/reg-lib/cl/ClConvolutionKernel.h
+++ b/reg-lib/cl/ClConvolutionKernel.h
@@ -5,7 +5,10 @@
 
 class ClConvolutionKernel: public ConvolutionKernel {
 public:
-    ClConvolutionKernel() : ConvolutionKernel() {}
-    ~ClConvolutionKernel() {}
-    void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
+    virtual void Calculate(nifti_image *image,
+                           float *sigma,
+                           ConvKernelType kernelType,
+                           int *mask = nullptr,
+                           bool *timePoints = nullptr,
+                           bool *axis = nullptr) override;
 };
diff --git a/reg-lib/cl/ClLtsKernel.h b/reg-lib/cl/ClLtsKernel.h
index b0ce0b13..dd6fc317 100644
--- a/reg-lib/cl/ClLtsKernel.h
+++ b/reg-lib/cl/ClLtsKernel.h
@@ -6,10 +6,9 @@
 class ClLtsKernel: public LtsKernel {
 public:
     ClLtsKernel(Content *con);
-    ~ClLtsKernel() {}
-    void Calculate(bool affine);
+    virtual void Calculate(bool affine) override;
 
 private:
-    _reg_blockMatchingParam * blockMatchingParams;
+    _reg_blockMatchingParam *blockMatchingParams;
     mat44 *transformationMatrix;
 };
diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h
index a4f7a70f..8f9b2a32 100644
--- a/reg-lib/cl/InfoDevice.h
+++ b/reg-lib/cl/InfoDevice.h
@@ -9,112 +9,96 @@
 template<typename T>
 class DeviceLog {
 public:
-
-	static void appendToString(bool flag, std::string name, std::string & str)
-	{
-		if (flag) {
-			if(str.length() > 0)  str.append(" / ") ;
-			str.append(name);
-		}
-	}
-
-	static void show(cl_device_id id, cl_device_info name, std::string str)
-	{
-		std::size_t paramValueSize;
-		std::string clInfo;
-		ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
-
-		sContext->CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info ");
-
-		T * field = (T *) alloca(sizeof(T) * paramValueSize);
-		sContext->CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info ");
-
-		switch (name) {
-		case CL_DEVICE_TYPE: {
-				const cl_device_type deviceType = *(reinterpret_cast<cl_device_type*>(field));
-				appendToString(deviceType & CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", clInfo);
-				appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo);
-				appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo);
-				appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo);
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
-			}
-			break;
-		case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: {
-				const cl_device_mem_cache_type cacheType = *(reinterpret_cast<cl_device_mem_cache_type*>(field));
-				appendToString(cacheType & CL_NONE, "CL_NONE", clInfo);
-				appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo);
-				appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo);
-
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
-			}
-			break;
-		case CL_DEVICE_LOCAL_MEM_TYPE: {
-				const cl_device_local_mem_type localMemType = *(reinterpret_cast<cl_device_local_mem_type*>(field));
-				appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo);
-				appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo);
-
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
-			}
-			break;
-		case CL_DEVICE_EXECUTION_CAPABILITIES: {
-
-				const cl_device_exec_capabilities execCapabilities = *(reinterpret_cast<cl_device_exec_capabilities*>(field));
-
-				appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo);
-				appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo);
-
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
-			}
-			break;
-		case CL_DEVICE_QUEUE_PROPERTIES: {
-
-				appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field)) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo);
-				appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field)) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo);
-
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
-			}
-			break;
-		case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
-				cl_uint maxWorkItemDimensions;
-
-				sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
-				NR_COUT << str << ":\t";
-				for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
-					NR_COUT << field[i] << " ";
-				NR_COUT << std::endl;
-			}
-			break;
-
-		case CL_DEVICE_NAME:
-		case CL_DEVICE_VENDOR:
-		case CL_DRIVER_VERSION:
-		case CL_DEVICE_VERSION: {
-				NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field << std::endl;
-			}
-			break;
-		default:
-			NR_COUT << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl;
-			break;
-		}
-	}
-	static void showKernelInfo(cl_device_id id, cl_kernel_work_group_info name, std::string str)
-	{
-		cl_int errNum;
-		size_t local;
-		ClContextSingleton *sContext = &ClContextSingleton::GetInstance();
-
-		errNum = clGetKernelWorkGroupInfo(sContext->DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
-
-		switch (name) {
-		case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
-				if (errNum != CL_SUCCESS)  local = 1;
-				NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl;
-			}
-			break;
-			break;
-		default:
-			NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl;
-			break;
-		}
-	}
+    static void appendToString(bool flag, const std::string& name, std::string& str) {
+        if (flag) {
+            if (str.length() > 0)  str.append(" / ");
+            str.append(name);
+        }
+    }
+
+    static void show(cl_device_id id, cl_device_info name, const std::string& str) {
+        size_t paramValueSize;
+        std::string clInfo;
+        ClContextSingleton& sContext = ClContextSingleton::GetInstance();
+
+        sContext.CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, &paramValueSize), "Failed to find OpenCL device info");
+
+        unique_ptr<T[]> field(new T[paramValueSize]);
+        sContext.CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field.get(), nullptr), "Failed to find OpenCL device info");
+
+        switch (name) {
+        case CL_DEVICE_TYPE: {
+            const cl_device_type deviceType = *(reinterpret_cast<cl_device_type*>(field.get()));
+            appendToString(deviceType & CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", clInfo);
+            appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo);
+            appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo);
+            appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo);
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+            break;
+        }
+        case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: {
+            const cl_device_mem_cache_type cacheType = *(reinterpret_cast<cl_device_mem_cache_type*>(field.get()));
+            appendToString(cacheType & CL_NONE, "CL_NONE", clInfo);
+            appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo);
+            appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo);
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+            break;
+        }
+        case CL_DEVICE_LOCAL_MEM_TYPE: {
+            const cl_device_local_mem_type localMemType = *(reinterpret_cast<cl_device_local_mem_type*>(field.get()));
+            appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo);
+            appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo);
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+            break;
+        }
+        case CL_DEVICE_EXECUTION_CAPABILITIES: {
+            const cl_device_exec_capabilities execCapabilities = *(reinterpret_cast<cl_device_exec_capabilities*>(field.get()));
+            appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo);
+            appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo);
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+            break;
+        }
+        case CL_DEVICE_QUEUE_PROPERTIES:
+            appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field.get())) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo);
+            appendToString(*(reinterpret_cast<cl_device_exec_capabilities*>(field.get())) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo);
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl;
+            break;
+        case CL_DEVICE_MAX_WORK_ITEM_SIZES: {
+            cl_uint maxWorkItemDimensions;
+
+            sContext.CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info  CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.");
+            NR_COUT << str << ":\t";
+            for (cl_uint i = 0; i < maxWorkItemDimensions; i++)
+                NR_COUT << field[i] << " ";
+            NR_COUT << std::endl;
+            break;
+        }
+
+        case CL_DEVICE_NAME:
+        case CL_DEVICE_VENDOR:
+        case CL_DRIVER_VERSION:
+        case CL_DEVICE_VERSION:
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field.get() << std::endl;
+            break;
+        default:
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field[0] << std::endl;
+            break;
+        }
+    }
+
+    static void showKernelInfo(cl_device_id id, cl_kernel_work_group_info name, const std::string& str) {
+        ClContextSingleton& sContext = ClContextSingleton::GetInstance();
+        size_t local;
+        auto errNum = clGetKernelWorkGroupInfo(sContext.DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr);
+
+        switch (name) {
+        case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
+            if (errNum != CL_SUCCESS)  local = 1;
+            NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl;
+            break;
+        default:
+            NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl;
+            break;
+        }
+    }
 };
diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl
index d3f7b0d9..876bb7d2 100755
--- a/reg-lib/cl/blockMatchingKernel.cl
+++ b/reg-lib/cl/blockMatchingKernel.cl
@@ -136,9 +136,9 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues,
 
 		// Populate shared memory with the warped image values
 		for (int y=-1; y<2; ++y) {
-			const int yImageIn = yImage + y * 4;
+			const int yImageIn = yImage + y * 4;  // cppcheck-suppress integerOverflow
 			for (int x=-1; x<2; ++x) {
-				const int xImageIn = xImage + x * 4;
+				const int xImageIn = xImage + x * 4;  // cppcheck-suppress integerOverflow
 
 				// Compute the index in the local shared memory
 				const int sharedIndex = ((y+1)*4+idy)*12+(x+1)*4+idx;
@@ -292,11 +292,11 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues,
 
 		// Populate shared memory with the warped image values
 		for (int n=-1; n<2; ++n) {
-			const int zImageIn = zImage + n * 4;
+			const int zImageIn = zImage + n * 4;  // cppcheck-suppress integerOverflow
 			for (int m=-1; m<2; ++m) {
-				const int yImageIn = yImage + m * 4;
+				const int yImageIn = yImage + m * 4;  // cppcheck-suppress integerOverflow
 				for (int l=-1; l<2; ++l) {
-					const int xImageIn = xImage + l * 4;
+					const int xImageIn = xImage + l * 4;  // cppcheck-suppress integerOverflow
 
 					// Compute the index in the local shared memory
 					const int sharedIndex = (((n+1)*4+idz)*12+(m+1)*4+idy)*12+(l+1)*4+idx;
diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl
index 3157c3cd..be154011 100755
--- a/reg-lib/cl/resampleKernel.cl
+++ b/reg-lib/cl/resampleKernel.cl
@@ -83,7 +83,7 @@ __inline void interpCubicSplineKernel(real_t relative, real_t *basis)
 __inline void interpLinearKernel(real_t relative, real_t *basis)
 {
     if (relative < (real_t) 0.0) relative = (real_t) 0.0; //reg_rounding error
-    basis[1] = relative;
+    basis[1] = relative;  // cppcheck-suppress ctuArrayIndex // false positive
     basis[0] = (real_t) 1.0 - relative;
 }
 /* *************************************************************** */
@@ -188,9 +188,9 @@ __inline void reg_mat44_mul_cl(__global float const* mat,
 }
 /* *************************************************************** */
 /* *************************************************************** */
-float cl_reg_round(float a)
-{
-    return (float)((a) > 0.0f ? (int)((a)+0.5) : (int)((a)-0.5));
+__inline int Floor(float x) {
+    const int i = (int)x;
+    return i - (x < i);
 }
 /* *************************************************************** */
 /* *************************************************************** */
diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
index 47c16c17..7ec45f4a 100644
--- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
+++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h
@@ -7,7 +7,7 @@
 class CpuAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
     CpuAffineDeformationFieldKernel(Content *conIn);
-    void Calculate(bool compose = false);
+    virtual void Calculate(bool compose = false) override;
 
 private:
     mat44 *affineTransformation;
diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h
index 3626d908..6904917a 100644
--- a/reg-lib/cpu/CpuBlockMatchingKernel.h
+++ b/reg-lib/cpu/CpuBlockMatchingKernel.h
@@ -6,7 +6,7 @@
 class CpuBlockMatchingKernel: public BlockMatchingKernel {
 public:
     CpuBlockMatchingKernel(Content *con);
-    void Calculate();
+    virtual void Calculate() override;
 
 private:
     nifti_image *reference;
diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h
index 3e960308..f113fd36 100644
--- a/reg-lib/cpu/CpuConvolutionKernel.h
+++ b/reg-lib/cpu/CpuConvolutionKernel.h
@@ -5,6 +5,10 @@
 
 class CpuConvolutionKernel: public ConvolutionKernel {
 public:
-    CpuConvolutionKernel() : ConvolutionKernel() {}
-    void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr);
+    virtual void Calculate(nifti_image *image,
+                           float *sigma,
+                           ConvKernelType kernelType,
+                           int *mask = nullptr,
+                           bool *timePoints = nullptr,
+                           bool *axis = nullptr) override;;
 };
diff --git a/reg-lib/cpu/CpuLtsKernel.h b/reg-lib/cpu/CpuLtsKernel.h
index 4f808dff..6b183934 100644
--- a/reg-lib/cpu/CpuLtsKernel.h
+++ b/reg-lib/cpu/CpuLtsKernel.h
@@ -6,7 +6,7 @@
 class CpuLtsKernel: public LtsKernel {
 public:
     CpuLtsKernel(Content *con);
-    void Calculate(bool affine);
+    virtual void Calculate(bool affine) override;
 
 private:
     _reg_blockMatchingParam *blockMatchingParams;
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index 9639f43c..f370df90 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include "_reg_maths.h"
-#include <vector>
 
 #define TOLERANCE 0.001
 #define MAX_ITERATIONS 30
@@ -30,49 +29,36 @@
 #define NUM_BLOCKS_TO_COMPARE_1D 7
 
 /// @brief Structure which contains the block matching parameters
-struct _reg_blockMatchingParam
-{
-   int totalBlockNumber;
-   int *totalBlock;
-   unsigned blockNumber[3];
-   //Number of block we keep for LTS
-   int percent_to_keep;
-
-   unsigned dim;
-   float *referencePosition;
-   float *warpedPosition;
-
-   //Before:
-   //Min between Number of block we keep in total (totalBlockNumber*percent_to_keep) and Number of total block - unuseable blocks
-   //Now:
-   //Number of total block - unuseable blocks
-   int activeBlockNumber;
-   //int *activeBlock;
-
-   //Number of active block which has a displacement vector (not NaN)
-   int definedActiveBlockNumber;
-   //int *definedActiveBlock;
-
-   int voxelCaptureRange;
-
-   int stepSize;
-
-   _reg_blockMatchingParam()
-       : totalBlockNumber(0),
-        totalBlock(0),
-        percent_to_keep(0),
-        dim(0),
-        referencePosition(0),
-        warpedPosition(0),
-        activeBlockNumber(0),
-        voxelCaptureRange(0),
-        stepSize(0)
-   {}
-
-   // Perform a deep copy
-   _reg_blockMatchingParam(_reg_blockMatchingParam *);
-
-   ~_reg_blockMatchingParam();
+struct _reg_blockMatchingParam {
+    int totalBlockNumber = 0;
+    int *totalBlock = nullptr;
+    unsigned blockNumber[3]{};
+    // Number of block we keep for LTS
+    int percent_to_keep = 0;
+
+    unsigned dim = 0;
+    float *referencePosition = nullptr;
+    float *warpedPosition = nullptr;
+
+    // Before: Min between Number of block we keep in total (totalBlockNumber*percent_to_keep) and Number of total block - unusable blocks
+    // Now: Number of total block - unusable blocks
+    int activeBlockNumber = 0;
+    //int *activeBlock;
+
+    // Number of active block which has a displacement vector (not NaN)
+    int definedActiveBlockNumber = 0;
+    //int *definedActiveBlock;
+
+    int voxelCaptureRange = 0;
+
+    int stepSize = 0;
+
+    _reg_blockMatchingParam() = default;
+
+    // Perform a deep copy
+    _reg_blockMatchingParam(_reg_blockMatchingParam *);
+
+    ~_reg_blockMatchingParam();
 };
 /* *************************************************************** */
 /** @brief This function initialise a _reg_blockMatchingParam structure
diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h
index 83fd60fa..534e079f 100755
--- a/reg-lib/cpu/_reg_dti.h
+++ b/reg-lib/cpu/_reg_dti.h
@@ -48,8 +48,8 @@ class reg_dti: public reg_measure {
 
 protected:
     // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ
-    unsigned dtIndicies[6];
-    float currentValue;
+    unsigned dtIndicies[6]{};
+    float currentValue = 0;
 };
 /* *************************************************************** */
 /** @brief Computes and returns the SSD between two input image
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 90967d07..bb4b7a54 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -1109,7 +1109,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
 #endif // USE_SSE
 
         // Assess if lookup table can be used
-        if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) {
+        if (gridVoxelSpacing[0] == 5.f && gridVoxelSpacing[1] == 5.f && gridVoxelSpacing[2] == 5.f && forceNoLut == false) {
             // Assign a single array that will contain all coefficients
             DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType));
             // Compute and store all required coefficients
@@ -1706,14 +1706,14 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
 }
 /* *************************************************************** */
 template<class SplineTYPE>
-SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) {
+SplineTYPE GetValue(const SplineTYPE *array, const int (&dim)[4], const int x, const int y, const int z) {
     if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3])
         return 0;
     return array[(z * dim[2] + y) * dim[1] + x];
 }
 /* *************************************************************** */
 template<class SplineTYPE>
-void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value) {
+void SetValue(SplineTYPE *array, const int *dim, const int x, const int y, const int z, const SplineTYPE value) {
     if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3])
         return;
     array[(z * dim[2] + y) * dim[1] + x] = value;
@@ -1723,15 +1723,10 @@ template<class SplineTYPE>
 void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
                                          nifti_image *referenceImage) {
     // The input grid is first saved
-    SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper);
-    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
-    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper);
-    if (splineControlPoint->data != nullptr) free(splineControlPoint->data);
-    int oldDim[4];
-    oldDim[0] = splineControlPoint->dim[0];
-    oldDim[1] = splineControlPoint->dim[1];
-    oldDim[2] = splineControlPoint->dim[2];
-    oldDim[3] = splineControlPoint->dim[3];
+    const int oldDim[4]{ splineControlPoint->dim[0], splineControlPoint->dim[1], splineControlPoint->dim[2], splineControlPoint->dim[3] };
+    SplineTYPE *oldGridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]];
+    splineControlPoint->data = nullptr;
 
     splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
     splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
@@ -1747,10 +1742,8 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
 
     splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim);
     splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
-    gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
     SplineTYPE *gridPtrY = &gridPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)];
-    SplineTYPE *oldGridPtrX = &oldGrid[0];
-    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]];
 
     for (int y = 0; y < oldDim[2]; y++) {
         int Y = 2 * y - 1;
@@ -1810,21 +1803,17 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
         }
     }
 
-    free(oldGrid);
+    free(oldGridPtrX);
 }
 /* *************************************************************** */
 template<class SplineTYPE>
 void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage) {
     // The input grid is first saved
-    SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper);
-    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
-    memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper);
-    if (splineControlPoint->data != nullptr) free(splineControlPoint->data);
-    int oldDim[4];
-    oldDim[0] = splineControlPoint->dim[0];
-    oldDim[1] = splineControlPoint->dim[1];
-    oldDim[2] = splineControlPoint->dim[2];
-    oldDim[3] = splineControlPoint->dim[3];
+    const int oldDim[4]{ splineControlPoint->dim[0], splineControlPoint->dim[1], splineControlPoint->dim[2], splineControlPoint->dim[3] };
+    SplineTYPE *oldGridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]];
+    SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]];
+    splineControlPoint->data = nullptr;
 
     splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f;
     splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
@@ -1843,12 +1832,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
     splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper);
 
     const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
+    SplineTYPE *gridPtrX = static_cast<SplineTYPE*>(splineControlPoint->data);
     SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber];
     SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber];
-    SplineTYPE *oldGridPtrX = &oldGrid[0];
-    SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]];
-    SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]];
 
     for (int z = 0; z < oldDim[3]; z++) {
         int Z = 2 * z - 1;
@@ -2130,7 +2116,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
             }
         }
     }
-    free(oldGrid);
+    free(oldGridPtrX);
 }
 /* *************************************************************** */
 void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
@@ -3724,8 +3710,8 @@ void compute_lie_bracket(nifti_image *img1,
 #endif
     // Lie bracket using Jacobian for testing
     if (use_jac) {
-        mat33 *jacImg1 = (mat33*)malloc(voxNumber * sizeof(mat33));
-        mat33 *jacImg2 = (mat33*)malloc(voxNumber * sizeof(mat33));
+        mat33 *jacImg1 = (mat33*)calloc(voxNumber, sizeof(mat33));
+        mat33 *jacImg2 = (mat33*)calloc(voxNumber, sizeof(mat33));
 
         reg_getDeformationFromDisplacement(img1);
         reg_getDeformationFromDisplacement(img2);
diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h
index 7017548d..a9449b92 100755
--- a/reg-lib/cpu/_reg_measure.h
+++ b/reg-lib/cpu/_reg_measure.h
@@ -127,20 +127,20 @@ class reg_measure {
     }
 
 protected:
-    nifti_image *referenceImage;
-    int *referenceMask;
-    nifti_image *warpedImage;
-    nifti_image *warpedGradient;
-    nifti_image *voxelBasedGradient;
-    nifti_image *localWeightSim;
+    nifti_image *referenceImage = nullptr;
+    int *referenceMask = nullptr;
+    nifti_image *warpedImage = nullptr;
+    nifti_image *warpedGradient = nullptr;
+    nifti_image *voxelBasedGradient = nullptr;
+    nifti_image *localWeightSim = nullptr;
 
-    bool isSymmetric;
-    nifti_image *floatingImage;
-    int *floatingMask;
-    nifti_image *warpedImageBw;
-    nifti_image *warpedGradientBw;
-    nifti_image *voxelBasedGradientBw;
+    bool isSymmetric = false;
+    nifti_image *floatingImage = nullptr;
+    int *floatingMask = nullptr;
+    nifti_image *warpedImageBw = nullptr;
+    nifti_image *warpedGradientBw = nullptr;
+    nifti_image *voxelBasedGradientBw = nullptr;
 
     double timePointWeights[255]{};
-    int referenceTimePoints;
+    int referenceTimePoints = 0;
 };
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index ea4f1739..0877e2ed 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -282,16 +282,6 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage,
     NR_FUNC_CALLED();
 }
 /* *************************************************************** */
-reg_mind::reg_mind(): reg_ssd() {
-    this->referenceImageDescriptor = nullptr;
-    this->floatingImageDescriptor = nullptr;
-    this->warpedFloatingImageDescriptor = nullptr;
-    this->warpedReferenceImageDescriptor = nullptr;
-    this->mindType = MIND_TYPE;
-    this->descriptorOffset = 1;
-    NR_FUNC_CALLED();
-}
-/* *************************************************************** */
 reg_mind::~reg_mind() {
     if (this->referenceImageDescriptor != nullptr) {
         nifti_image_free(this->referenceImageDescriptor);
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 35c21203..7fb44cf7 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -24,7 +24,7 @@
 class reg_mind: public reg_ssd {
 public:
     /// @brief reg_mind class constructor
-    reg_mind();
+    reg_mind() { NR_FUNC_CALLED(); }
     /// @brief Measure class destructor
     virtual ~reg_mind();
 
@@ -52,14 +52,14 @@ class reg_mind: public reg_ssd {
     virtual int GetDescriptorOffset() { return this->descriptorOffset; }
 
 protected:
-    nifti_image *referenceImageDescriptor;
-    nifti_image *floatingImageDescriptor;
-    nifti_image *warpedReferenceImageDescriptor;
-    nifti_image *warpedFloatingImageDescriptor;
+    nifti_image *referenceImageDescriptor = nullptr;
+    nifti_image *floatingImageDescriptor = nullptr;
+    nifti_image *warpedReferenceImageDescriptor = nullptr;
+    nifti_image *warpedFloatingImageDescriptor = nullptr;
     double timePointWeightsDescriptor[255]{};
-    int descriptorOffset;
-    int mindType;
-    int descriptorNumber;
+    int mindType = MIND_TYPE;
+    int descriptorOffset = 1;
+    int descriptorNumber = 0;
 };
 /* *************************************************************** */
 /// @brief MIND-SSC measure of similarity class
diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp
index 97b1138b..21c5e7bf 100755
--- a/reg-lib/cpu/_reg_nmi.cpp
+++ b/reg-lib/cpu/_reg_nmi.cpp
@@ -14,13 +14,6 @@
 
 /* *************************************************************** */
 reg_nmi::reg_nmi(): reg_measure() {
-    this->jointHistogramPro = nullptr;
-    this->jointHistogramLog = nullptr;
-    this->entropyValues = nullptr;
-    this->jointHistogramProBw = nullptr;
-    this->jointHistogramLogBw = nullptr;
-    this->entropyValuesBw = nullptr;
-    this->approximatePw = true;
     for (int i = 0; i < 255; ++i) {
         this->referenceBinNumber[i] = 68;
         this->floatingBinNumber[i] = 68;
diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h
index 7daea41a..cf8471d7 100755
--- a/reg-lib/cpu/_reg_nmi.h
+++ b/reg-lib/cpu/_reg_nmi.h
@@ -13,10 +13,6 @@
 #pragma once
 
 #include "_reg_measure.h"
-#include <vector>
-#ifdef _OPENMP
-#include "omp.h"
-#endif
 
 /* *************************************************************** */
 /// @brief NMI measure of similarity class
@@ -73,137 +69,20 @@ class reg_nmi: public reg_measure {
     }
 
 protected:
-    bool approximatePw;
+    bool approximatePw = true;
     unsigned short referenceBinNumber[255];
     unsigned short floatingBinNumber[255];
-    unsigned short totalBinNumber[255];
-    double **jointHistogramPro;
-    double **jointHistogramLog;
-    double **entropyValues;
-    double **jointHistogramProBw;
-    double **jointHistogramLogBw;
-    double **entropyValuesBw;
+    unsigned short totalBinNumber[255]{};
+    double **jointHistogramPro = nullptr;
+    double **jointHistogramLog = nullptr;
+    double **entropyValues = nullptr;
+    double **jointHistogramProBw = nullptr;
+    double **jointHistogramLogBw = nullptr;
+    double **entropyValuesBw = nullptr;
 
     void DeallocateHistogram();
 };
 /* *************************************************************** */
-// Simple class to dynamically manage an array of pointers
-// Needed for multi channel NMI
-template<class DataTYPE>
-class SafeArray {
-public:
-    /// Constructor
-    SafeArray(int items) {
-        data = new DataTYPE[items];
-    }
-
-    /// Destructor
-    ~SafeArray() {
-        delete[] data;
-    }
-
-    /// Implicit conversion
-    operator DataTYPE *() {
-        return data;
-    }
-
-private:
-    void operator=(const SafeArray&) {};
-    SafeArray(const SafeArray&) {};
-
-    DataTYPE *data;
-};
-
-//-----------------------------------------------------------------------------
-// Template for emulating nested multiple loops, where the number of nested loops
-// is only known at runtime.
-// The index type may be any incrementable type, including pointers and iterators.
-// 'end' values are like the STL ranges, where they signify one past the last value.
-//-----------------------------------------------------------------------------
-template<typename T>
-class Multi_Loop {
-public:
-    /// Add a for loop to the list
-    void Add(T begin_value, T end_value) {
-        begin.push_back(begin_value);
-        end.push_back(end_value);
-    }
-
-    // Initialises the loops before use.
-    void Initialise() {
-        current.resize(Count());
-        std::copy(begin.begin(), begin.end(), current.begin());
-    }
-
-    /// Gets the index or iterator for the specified loop.
-    T Index(int index) const {
-        return (current[index]);
-    }
-
-    /// Gets the index or iterator for the specified loop.
-    const T& operator [](int index) const {
-        return (current[index]);
-    }
-
-    /// Tests to see if the loops continue.
-    bool Continue() const {
-        return (current[0] != end[0]);
-    }
-
-    /// Compute the next set of indexes or iterators in the sequence.
-    void Next() {
-        int position = begin.size() - 1;
-        bool finished = false;
-
-        while (!finished) {
-            ++current[position];
-            // Finished incrementing?
-            if ((current[position] != end[position]) || (position == 0)) {
-                finished = true;
-            } else {
-                // Reset this index, and move on to the previous one.
-                current[position] = begin[position];
-                --position;
-            }
-        }
-    }
-
-    /// Returns the number of 'for' loops added.
-    int Count() const {
-        return (static_cast<int>(begin.size()));
-    }
-
-private:
-    std::vector<T> begin;   // Start for each loop.
-    std::vector<T> end;     // End for each loop.
-    std::vector<T> current; // Current position of each loop
-};
-
-/// Some methods that will be needed for generating the multi-channel histogram
-/// Needed for multi channel NMI
-inline int calculate_product(int dim, int *dimensions) {
-    int product = 1;
-    for (int i = 0; i < dim; ++i)
-        product *= dimensions[i];
-
-    return product;
-}
-
-inline int calculate_index(int num_dims, int *dimensions, int *indices) {
-    int index = 0;
-    for (int i = 0; i < num_dims; ++i)
-        index += indices[i] * calculate_product(i, dimensions);
-
-    return index;
-}
-
-inline int previous(int current, int num_dims) {
-    if (current > 0)
-        return current - 1;
-
-    return num_dims - 1;
-}
-/* *************************************************************** */
 /// @brief NMI measure of similarity class
 class reg_multichannel_nmi: public reg_measure {
 public:
@@ -223,15 +102,15 @@ class reg_multichannel_nmi: public reg_measure {
     virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {}
 
 protected:
-    unsigned short referenceBinNumber[255];
-    unsigned short floatingBinNumber[255];
-    unsigned short totalBinNumber[255];
-    double *jointHistogramProp;
-    double *jointHistogramLog;
-    double *entropyValues;
-    double *jointHistogramPropBw;
-    double *jointHistogramLogBw;
-    double *entropyValuesBw;
+    unsigned short referenceBinNumber[255]{};
+    unsigned short floatingBinNumber[255]{};
+    unsigned short totalBinNumber[255]{};
+    double *jointHistogramProp = nullptr;
+    double *jointHistogramLog = nullptr;
+    double *entropyValues = nullptr;
+    double *jointHistogramPropBw = nullptr;
+    double *jointHistogramLogBw = nullptr;
+    double *entropyValuesBw = nullptr;
 };
 /* *************************************************************** */
 /// Multi channel NMI version - Entropy
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 1b63bcdb..b7f20f45 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -2566,7 +2566,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
 /* *************************************************************** */
 void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) {
     // Print the version
-    NR_INFO(argv[0] << " v" << NR_VERSION);
+    NR_INFO("Version " << NR_VERSION);
     NR_INFO("");
 #ifdef NDEBUG
     if (!verbose) return;
diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
index 327e7d71..46eebd18 100644
--- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
+++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h
@@ -7,7 +7,8 @@
 class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel {
 public:
     CudaAffineDeformationFieldKernel(Content *conIn);
-    void Calculate(bool compose = false);
+    virtual void Calculate(bool compose = false) override;
+
 private:
     mat44 *affineTransformation;
     nifti_image *deformationFieldImage;
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index d91d7cf2..84be113d 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -111,26 +111,6 @@ void CudaAladinContent::AllocateCuPtrs() {
             Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
             Cuda::TransferNiftiToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
-        /* // Removed until CUDA SVD is added back
-        if (blockMatchingParams->activeBlockNumber > 0 ) {
-           unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
-           unsigned n = 0;
-
-           if (blockMatchingParams->dim == 2) {
-              n = 6;
-           }
-           else {
-              n = 12;
-           }
-
-           Cuda::Allocate<float>(&AR_d, m * n);
-           Cuda::Allocate<float>(&U_d, m * m); //only the singular vectors output is needed
-           Cuda::Allocate<float>(&VT_d, n * n);
-           Cuda::Allocate<float>(&Sigma_d, std::min(m, n));
-           Cuda::Allocate<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-           Cuda::Allocate<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-        }
-        */
     }
 }
 /* *************************************************************** */
@@ -210,26 +190,6 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
         Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
         Cuda::TransferFromHostToDevice<int>(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
     }
-    /* // Removed until CUDA SVD is added back
-     if (blockMatchingParams->activeBlockNumber > 0) {
-         unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim;
-         unsigned n = 0;
-
-         if (blockMatchingParams->dim == 2) {
-             n = 6;
-         }
-         else {
-             n = 12;
-         }
-
-         Cuda::Allocate<float>(&AR_d, m * n);
-         Cuda::Allocate<float>(&U_d, m * m); //only the singular vectors output is needed
-         Cuda::Allocate<float>(&VT_d, n * n);
-         Cuda::Allocate<float>(&Sigma_d, std::min(m, n));
-         Cuda::Allocate<float>(&lengths_d, blockMatchingParams->activeBlockNumber);
-         Cuda::Allocate<float>(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
-     }
-     */
 }
 /* *************************************************************** */
 template<class DataType>
@@ -343,48 +303,6 @@ float* CudaAladinContent::GetFloIJKMat_d() {
     return floIJKMat_d;
 }
 /* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetAR_d()
-{
-   return AR_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetU_d()
-{
-   return U_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetVT_d()
-{
-   return VT_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetSigma_d()
-{
-   return Sigma_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetLengths_d()
-{
-   return lengths_d;
-}
-*/
-/* *************************************************************** */
-/* // Removed until CUDA SVD is added back
-float* CudaAladinContent::GetNewWarpedPos_d()
-{
-   return newWarpedPos_d;
-}
-*/
-/* *************************************************************** */
 int* CudaAladinContent::GetTotalBlock_d() {
     return totalBlock_d;
 }
@@ -393,14 +311,6 @@ int* CudaAladinContent::GetMask_d() {
     return mask_d;
 }
 /* *************************************************************** */
-int* CudaAladinContent::GetReferenceDims() {
-    return referenceDims;
-}
-/* *************************************************************** */
-int* CudaAladinContent::GetFloatingDims() {
-    return floatingDims;
-}
-/* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
     if (transformationMatrix_d != nullptr)
         Cuda::Free(transformationMatrix_d);
@@ -430,14 +340,6 @@ void CudaAladinContent::FreeCuPtrs() {
         Cuda::Free(referencePosition_d);
     if (warpedPosition_d != nullptr)
         Cuda::Free(warpedPosition_d);
-        /*
-        Cuda::Free(AR_d);
-        Cuda::Free(U_d);
-        Cuda::Free(VT_d);
-        Cuda::Free(Sigma_d);
-        Cuda::Free(lengths_d);
-        Cuda::Free(newWarpedPos_d);
-        */
 }
 /* *************************************************************** */
 bool CudaAladinContent::IsCurrentComputationDoubleCapable() {
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index e8eaad82..bae204bf 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -29,19 +29,9 @@ class CudaAladinContent: public AladinContent {
     virtual float* GetReferenceMat_d();
     virtual float* GetFloIJKMat_d();
 
-    //	float* GetAR_d(); // Removed until CUDA SVD is added back
-    //	float* GetU_d(); // Removed until CUDA SVD is added back
-    //	float* GetVT_d(); // Removed until CUDA SVD is added back
-    //	float* GetSigma_d(); // Removed until CUDA SVD is added back
-    //	float* GetLengths_d(); // Removed until CUDA SVD is added back
-    //	float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
-
     virtual int* GetTotalBlock_d();
     virtual int* GetMask_d();
 
-    virtual int* GetReferenceDims();
-    virtual int* GetFloatingDims();
-
     // CPU getters with data downloaded from device
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
     virtual nifti_image* GetDeformationField() override;
@@ -64,17 +54,6 @@ class CudaAladinContent: public AladinContent {
     float *referenceMat_d;
     float *floIJKMat_d;
 
-    //svd
-    //	float *AR_d;//A and then pseudoinverse  // Removed until CUDA SVD is added back
-    //	float *U_d; // Removed until CUDA SVD is added back
-    //	float *VT_d; // Removed until CUDA SVD is added back
-    //	float *Sigma_d; // Removed until CUDA SVD is added back
-    //	float *lengths_d; // Removed until CUDA SVD is added back
-    //	float *newWarpedPos_d; // Removed until CUDA SVD is added back
-
-    int referenceDims[4];
-    int floatingDims[4];
-
     void DownloadImage(nifti_image *image, float* memoryObject, int datatype);
     template<class T>
     void FillImageData(nifti_image *image, float* memoryObject, int type);
@@ -89,8 +68,8 @@ class CudaAladinContent: public AladinContent {
 #endif
     // Functions for testing
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedImageIn) override;
+    virtual void SetWarped(nifti_image *warpedIn) override;
     virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
     virtual void SetReferenceMask(int *referenceMaskIn) override;
-    virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override;
+    virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override;
 };
diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h
index f917f85e..821099d6 100644
--- a/reg-lib/cuda/CudaBlockMatchingKernel.h
+++ b/reg-lib/cuda/CudaBlockMatchingKernel.h
@@ -7,7 +7,7 @@
 class CudaBlockMatchingKernel: public BlockMatchingKernel {
 public:
     explicit CudaBlockMatchingKernel(Content *conIn);
-    void Calculate();
+    virtual void Calculate() override;
 
 private:
     nifti_image *reference;
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 569581b1..d4b5a277 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -122,8 +122,10 @@ inline void UpdateControlPointPosition(float4 *currentDofCuda,
                                        cudaTextureObject_t gradientTexture,
                                        const size_t nVoxels,
                                        const float scale) {
-    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) {
-        float4 dofValue = currentDofCuda[index]; scale; // To capture scale
+    thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [
+        currentDofCuda, bestDofTexture, gradientTexture, scale
+    ]__device__(const int index) {
+        float4 dofValue = currentDofCuda[index];
         const float4 bestValue = tex1Dfetch<float4>(bestDofTexture, index);
         const float4 gradValue = tex1Dfetch<float4>(gradientTexture, index);
         if constexpr (optimiseX)
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index c25cff9d..08ed8e91 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -11,15 +11,15 @@ CudaContent::CudaContent(nifti_image *referenceIn,
     AllocateFloating();
     AllocateWarped();
     AllocateDeformationField();
-    SetReferenceMask(referenceMask);
-    SetTransformationMatrix(transformationMatrix);
+    CudaContent::SetReferenceMask(referenceMask);
+    CudaContent::SetTransformationMatrix(transformationMatrix);
 }
 /* *************************************************************** */
 CudaContent::~CudaContent() {
     DeallocateWarped();
     DeallocateDeformationField();
-    SetReferenceMask(nullptr);
-    SetTransformationMatrix(nullptr);
+    CudaContent::SetReferenceMask(nullptr);
+    CudaContent::SetTransformationMatrix(nullptr);
 }
 /* *************************************************************** */
 void CudaContent::AllocateReference() {
@@ -40,7 +40,7 @@ void CudaContent::AllocateFloating() {
 /* *************************************************************** */
 void CudaContent::AllocateDeformationField() {
     Cuda::Allocate(&deformationFieldCuda, deformationField->dim);
-    UpdateDeformationField();
+    CudaContent::UpdateDeformationField();
 }
 /* *************************************************************** */
 void CudaContent::DeallocateDeformationField() {
diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h
index f0d9ca74..1e315302 100644
--- a/reg-lib/cuda/CudaConvolutionKernel.h
+++ b/reg-lib/cuda/CudaConvolutionKernel.h
@@ -6,11 +6,10 @@
 // A kernel function for convolution (gaussian smoothing?)
 class CudaConvolutionKernel: public ConvolutionKernel {
 public:
-    CudaConvolutionKernel() : ConvolutionKernel() {}
-    void Calculate(nifti_image *image,
-                   float *sigma,
-                   ConvKernelType kernelType,
-                   int *mask = nullptr,
-                   bool *timePoints = nullptr,
-                   bool *axis = nullptr);
+    virtual void Calculate(nifti_image *image,
+                           float *sigma,
+                           ConvKernelType kernelType,
+                           int *mask = nullptr,
+                           bool *timePoints = nullptr,
+                           bool *axis = nullptr) override;
 };
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index 6c73f9cd..c6722b9e 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -18,14 +18,14 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
 }
 /* *************************************************************** */
 CudaF3dContent::~CudaF3dContent() {
-    GetControlPointGrid();  // Transfer device data back to nifti
+    CudaF3dContent::GetControlPointGrid();  // Transfer device data back to nifti
     DeallocateControlPointGrid();
     DeallocateTransformationGradient();
 }
 /* *************************************************************** */
 void CudaF3dContent::AllocateControlPointGrid() {
     Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim);
-    UpdateControlPointGrid();
+    CudaF3dContent::UpdateControlPointGrid();
 }
 /* *************************************************************** */
 void CudaF3dContent::DeallocateControlPointGrid() {
diff --git a/reg-lib/cuda/CudaLtsKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp
index a0993fe9..9c669a3c 100644
--- a/reg-lib/cuda/CudaLtsKernel.cpp
+++ b/reg-lib/cuda/CudaLtsKernel.cpp
@@ -1,7 +1,6 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include "CudaLtsKernel.h"
-#include "optimizeKernel.h"
 
 /* *************************************************************** */
 CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() {
@@ -11,65 +10,9 @@ CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() {
     //get cpu ptrs
     transformationMatrix = con->AladinContent::GetTransformationMatrix();
     blockMatchingParams = con->AladinContent::GetBlockMatchingParams();
-
-    //   transformationMatrix_d = con->GetTransformationMatrix_d();
-    //   AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back
-    //   U_d = con->GetU_d(); // Removed until CUDA SVD is added back
-    //   Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back
-    //   VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back
-    //   lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back
-    //   referencePos_d = con->GetReferencePosition_d();
-    //   warpedPos_d = con->GetWarpedPosition_d();
-    //   newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back
-
 }
 /* *************************************************************** */
 void CudaLtsKernel::Calculate(bool affine) {
-    /* // Removed until CUDA SVD is added back
- #if _WIN64 || __x86_64__ || __ppc64__
-
-     //for now. Soon we will have a GPU version of it
-     int* cudaRunTimeVersion = (int*)malloc(sizeof(int));
-     int* cudaDriverVersion = (int*)malloc(sizeof(int));
-     cudaRuntimeGetVersion(cudaRunTimeVersion);
-     cudaDriverGetVersion(cudaDriverVersion);
-
-     NR_DEBUG("CUDA runtime version=" << *cudaRunTimeVersion);
-     NR_DEBUG("CUDA driver version=" << *cudaDriverVersion);
-
-     if (*cudaRunTimeVersion < 7050) {
-         blockMatchingParams = con->GetBlockMatchingParams();
-         optimize(blockMatchingParams, transformationMatrix, affine);
-     }
-     else {
-         //HAVE TO DO THE RIGID AND 2D VERSION
-         if(affine && blockMatchingParams->dim == 3) {
-             const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f));
-             optimize_affine3D_cuda(transformationMatrix,
-                                    transformationMatrix_d,
-                                    AR_d,
-                                    U_d,
-                                    Sigma_d,
-                                    VT_d,
-                                    lengths_d,
-                                    referencePos_d,
-                                    warpedPos_d,
-                                    newWarpedPos_d,
-                                    blockMatchingParams->activeBlockNumber * 3,
-                                    12,
-                                    num_to_keep,
-                                    ils,
-                                    affine);
-         } else {
-             blockMatchingParams = con->GetBlockMatchingParams();
-             optimize(blockMatchingParams, transformationMatrix, affine);
-         }
-     }
- #else
-     blockMatchingParams = con->GetBlockMatchingParams();
-     optimize(blockMatchingParams, transformationMatrix, affine);
- #endif
- */
     blockMatchingParams = con->GetBlockMatchingParams();
     optimize(blockMatchingParams, transformationMatrix, affine);
 }
diff --git a/reg-lib/cuda/CudaLtsKernel.h b/reg-lib/cuda/CudaLtsKernel.h
index 605730bd..c0a95099 100644
--- a/reg-lib/cuda/CudaLtsKernel.h
+++ b/reg-lib/cuda/CudaLtsKernel.h
@@ -7,17 +7,10 @@
 class CudaLtsKernel: public LtsKernel {
 public:
     CudaLtsKernel(Content *conIn);
-    void Calculate(bool affine);
+    virtual void Calculate(bool affine) override;
 
 private:
     _reg_blockMatchingParam *blockMatchingParams;
     mat44 *transformationMatrix;
     CudaAladinContent *con;
-
-//    float *AR_d; // Removed until CUDA SVD is added back
-//    float *U_d; // Removed until CUDA SVD is added back
-//    float *Sigma_d; // Removed until CUDA SVD is added back
-//    float *VT_d; // Removed until CUDA SVD is added back
-//    float *lengths_d; // Removed until CUDA SVD is added back
-//    float *newWarpedPos_d; // Removed until CUDA SVD is added back
 };
diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu
index 4a48d26b..a662ade1 100644
--- a/reg-lib/cuda/CudaTools.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -394,7 +394,6 @@ void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber) {
 }
 /* *************************************************************** */
 void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xAxis, const bool yAxis, const bool zAxis) {
-    if (!xAxis && !yAxis && !zAxis) return;
     decltype(SetGradientToZero<true, true, true>) *setGradientToZero;
     if (xAxis && yAxis && zAxis) setGradientToZero = SetGradientToZero<true, true, true>;
     else if (xAxis && yAxis) setGradientToZero = SetGradientToZero<true, true, false>;
@@ -403,6 +402,7 @@ void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xA
     else if (xAxis) setGradientToZero = SetGradientToZero<true, false, false>;
     else if (yAxis) setGradientToZero = SetGradientToZero<false, true, false>;
     else if (zAxis) setGradientToZero = SetGradientToZero<false, false, true>;
+    else return;
     setGradientToZero(gradCuda, voxelNumber);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
index fc38446e..5243f464 100644
--- a/reg-lib/cuda/CudaToolsKernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -72,7 +72,7 @@ __global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, c
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < nodeNumber) {
         const float4 voxelGradient = gradient[tid];
-        float4 realGradient;
+        float4 realGradient{};
         realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z;
         realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z;
         realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z;
diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h
index 8d753747..01a8e9c1 100755
--- a/reg-lib/cuda/_reg_measure_gpu.h
+++ b/reg-lib/cuda/_reg_measure_gpu.h
@@ -75,19 +75,19 @@ class reg_measure_gpu {
     }
 
 protected:
-    float *referenceImageCuda;
-    float *floatingImageCuda;
-    int *referenceMaskCuda;
-    size_t activeVoxelNumber;
-    float *warpedImageCuda;
-    float4 *warpedGradientCuda;
-    float4 *voxelBasedGradientCuda;
-    float *localWeightSimCuda;
+    float *referenceImageCuda = nullptr;
+    float *floatingImageCuda = nullptr;
+    int *referenceMaskCuda = nullptr;
+    size_t activeVoxelNumber = 0;
+    float *warpedImageCuda = nullptr;
+    float4 *warpedGradientCuda = nullptr;
+    float4 *voxelBasedGradientCuda = nullptr;
+    float *localWeightSimCuda = nullptr;
 
-    int *floatingMaskCuda;
-    float *warpedImageBwCuda;
-    float4 *warpedGradientBwCuda;
-    float4 *voxelBasedGradientBwCuda;
+    int *floatingMaskCuda = nullptr;
+    float *warpedImageBwCuda = nullptr;
+    float4 *warpedGradientBwCuda = nullptr;
+    float4 *voxelBasedGradientBwCuda = nullptr;
 };
 /* *************************************************************** */
 class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu {
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index 035e29c3..f70f277f 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -123,9 +123,9 @@ __global__ void blockMatchingKernel2D(float *warpedPosition,
 
         //populate shared memory with resultImageArray's values
         for (int y = -1; y < 2; ++y) {
-            const int yImageIn = yImage + y * 4;
+            const int yImageIn = yImage + y * 4;  // cppcheck-suppress integerOverflow
             for (int x = -1; x < 2; ++x) {
-                const int xImageIn = xImage + x * 4;
+                const int xImageIn = xImage + x * 4;  // cppcheck-suppress integerOverflow
                 const int sharedIndex = ((y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
                 const int indexXYIn = yImageIn * imageSize.x + xImageIn;
                 const bool valid =
@@ -233,11 +233,11 @@ __global__ void blockMatchingKernel3D(float *warpedPosition,
 
         //populate shared memory with resultImageArray's values
         for (int z = -1; z < 2; ++z) {
-            const int zImageIn = zImage + z * 4;
+            const int zImageIn = zImage + z * 4;  // cppcheck-suppress integerOverflow
             for (int y = -1; y < 2; ++y) {
-                const int yImageIn = yImage + y * 4;
+                const int yImageIn = yImage + y * 4;  // cppcheck-suppress integerOverflow
                 for (int x = -1; x < 2; ++x) {
-                    const int xImageIn = xImage + x * 4;
+                    const int xImageIn = xImage + x * 4;  // cppcheck-suppress integerOverflow
                     const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx;
                     const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y);
                     const bool valid =
diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu
deleted file mode 100644
index bc609b6b..00000000
--- a/reg-lib/cuda/optimizeKernel.cu
+++ /dev/null
@@ -1,395 +0,0 @@
-#include "optimizeKernel.h"
-
-#include "cublas_v2.h"
-#include "cusolverDn.h"
-
-#include <cmath>
-
-#include "_reg_maths.h"
-#include "_reg_tools.h"
-#include "_reg_blockMatching.h"
-
-#define IDX2C(i,j,ld) (((j)*(ld))+(i))
-/* *************************************************************** */
-template<class DataType>
-__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) {
-    out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]);
-    out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]);
-    out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]);
-    return;
-}
-/* *************************************************************** */
-__device__ double getSquareDistance3Dcu(float * first_point3D, float * second_point3D) {
-    return sqrt(((double)first_point3D[0] - (double)second_point3D[0]) *
-                ((double)first_point3D[0] - (double)second_point3D[0]) +
-                ((double)first_point3D[1] - (double)second_point3D[1]) *
-                ((double)first_point3D[1] - (double)second_point3D[1]) +
-                ((double)first_point3D[2] - (double)second_point3D[2]) *
-                ((double)first_point3D[2] - (double)second_point3D[2]));
-}
-/* *************************************************************** */
-void checkCublasStatus(cublasStatus_t status) {
-    if (status != CUBLAS_STATUS_SUCCESS)
-        NR_FATAL_ERROR("CUBLAS error");
-}
-/* *************************************************************** */
-void checkCUSOLVERStatus(cusolverStatus_t status, char* msg) {
-    if (status != CUSOLVER_STATUS_SUCCESS) {
-        if (status == CUSOLVER_STATUS_NOT_INITIALIZED)
-            NR_FATAL_ERROR("The library was not initialized");
-        else if (status == CUSOLVER_STATUS_INTERNAL_ERROR)
-            NR_FATAL_ERROR("An internal operation failed");
-        NR_FATAL_ERROR("CUSOLVER error");
-    }
-}
-/* *************************************************************** */
-void checkDevInfo(int *devInfo) {
-    int *hostDevInfo = (int*)malloc(sizeof(int));
-    cudaMemcpy(hostDevInfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost);
-    if (hostDevInfo < 0)
-        NR_ERROR("Parameter " << hostDevInfo << " is wrong");
-    if (hostDevInfo > 0)
-        NR_ERROR(hostDevInfo << " superdiagonals of an intermediate bidiagonal form B did not converge to zero");
-    else
-        NR_INFO(hostDevInfo << ": operation successful");
-    free(hostDevInfo);
-}
-/* *************************************************************** */
-void downloadMat44(mat44 *lastTransformation, float* transform_d) {
-    float* tempMat = (float*)malloc(16 * sizeof(float));
-    cudaMemcpy(tempMat, transform_d, 16 * sizeof(float), cudaMemcpyDeviceToHost);
-    cPtrToMat44(lastTransformation, tempMat);
-    free(tempMat);
-}
-/* *************************************************************** */
-void uploadMat44(mat44 lastTransformation, float* transform_d) {
-    float* tempMat = (float*)malloc(16 * sizeof(float));
-    mat44ToCptr(lastTransformation, tempMat);
-    cudaMemcpy(transform_d, tempMat, 16 * sizeof(float), cudaMemcpyHostToDevice);
-    free(tempMat);
-}
-/* *************************************************************** */
-//threads: 512 | blocks:numEquations/512
-__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned definedBlockNum)
-{
-    const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
-    if (tid < definedBlockNum) {
-        const unsigned posIdx = 3 * tid;
-        in += posIdx;
-        out += posIdx;
-        reg_mat44_mul_cuda<float>(transform, in, out);
-    }
-}
-/* *************************************************************** */
-//blocks: 1 | threads: 12
-__global__ void trimAndInvertSingularValuesKernel(float* sigma)
-{
-    sigma[threadIdx.x] = (sigma[threadIdx.x] < 0.0001) ? 0.0f : (float) ((double) 1.0 / (double) sigma[threadIdx.x]);
-}
-/* *************************************************************** */
-//launched as ldm blocks n threads
-__global__ void scaleV(float* V, const unsigned ldm, const unsigned n, float*w)
-{
-    unsigned k = blockIdx.x;
-    unsigned j = threadIdx.x;
-    V[IDX2C(j, k, ldm)] = (float)((double)V[IDX2C(j, k, ldm)] * (double)w[j]);
-}
-/* *************************************************************** */
-//threads: 16 | blocks:1
-__global__ void permuteAffineMatrix(float* transform)
-{
-    __shared__ float buffer[16];
-    const unsigned i = threadIdx.x;
-
-    buffer[i] = transform[i];
-    __syncthreads();
-    const unsigned idx33 = (i / 3) * 4 + i % 3;
-    const unsigned idx34 = (i % 3) * 4 + 3;
-
-    if (i < 9) transform[idx33] = buffer[i];
-    else if (i < 12)transform[idx34] = buffer[i];
-    else transform[i] = buffer[i];
-
-}
-/* *************************************************************** */
-//threads: 512 | blocks:numEquations/512
-__global__ void populateMatrixA(float* A, float *reference, unsigned numBlocks)
-{
-    const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned c = tid * 3;
-    //	const unsigned n = 12;
-    const unsigned lda = numBlocks * 3;
-
-    if (tid < numBlocks) {
-        reference += c;
-        //IDX2C(i,j,ld)
-        A[IDX2C(c, 0, lda)] = reference[0];
-        A[IDX2C(c, 1, lda)] = reference[1];
-        A[IDX2C(c, 2, lda)] = reference[2];
-        A[IDX2C(c, 3, lda)] = A[IDX2C(c, 4, lda)] = A[IDX2C(c, 5, lda)] = A[IDX2C(c, 6, lda)] = A[IDX2C(c, 7, lda)] = A[IDX2C(c, 8, lda)] = A[IDX2C(c, 10, lda)] = A[IDX2C(c, 11, lda)] = 0.0f;
-        A[IDX2C(c, 9, lda)] = 1.0f;
-
-        A[IDX2C((c + 1), 3, lda)] = reference[0];
-        A[IDX2C((c + 1), 4, lda)] = reference[1];
-        A[IDX2C((c + 1), 5, lda)] = reference[2];
-        A[IDX2C((c + 1), 0, lda)] = A[IDX2C((c + 1), 1, lda)] = A[IDX2C((c + 1), 2, lda)] = A[IDX2C((c + 1), 6, lda)] = A[IDX2C((c + 1), 7, lda)] = A[IDX2C((c + 1), 8, lda)] = A[IDX2C((c + 1), 9, lda)] = A[IDX2C((c + 1), 11, lda)] = 0.0f;
-        A[IDX2C((c + 1), 10, lda)] = 1.0f;
-
-        A[IDX2C((c + 2), 6, lda)] = reference[0];
-        A[IDX2C((c + 2), 7, lda)] = reference[1];
-        A[IDX2C((c + 2), 8, lda)] = reference[2];
-        A[IDX2C((c + 2), 0, lda)] = A[IDX2C((c + 2), 1, lda)] = A[IDX2C((c + 2), 2, lda)] = A[IDX2C((c + 2), 3, lda)] = A[IDX2C((c + 2), 4, lda)] = A[IDX2C((c + 2), 5, lda)] = A[IDX2C((c + 2), 9, lda)] = A[IDX2C((c + 2), 10, lda)] = 0.0f;
-        A[IDX2C((c + 2), 11, lda)] = 1.0f;
-    }
-}
-/* *************************************************************** */
-//threads: 512 | blocks:numEquations/512
-__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned numEquations)
-{
-    unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
-    unsigned c = tid * 3;
-
-    if (tid < numEquations) {
-        newWarped_d += c;
-        warped_d += c;
-        lengths[tid] = getSquareDistance3Dcu(warped_d, newWarped_d);
-    }
-
-}
-/* *************************************************************** */
-//launched as 1 block 1 thread
-__global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg)
-{
-    for (int i = 0; i < ldm * n; ++i)
-        NR_COUT << mat[i] << " | ";
-    NR_COUT << std::endl;
-}
-/* *************************************************************** */
-//launched as 1 block 1 thread
-__global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg)
-{
-    for (int i = 0; i < ldm; ++i) {
-        NR_COUT << i << " ";
-        for (int j = 0; j < n; ++j)
-            NR_COUT << mat[IDX2C(i, j, ldm)] << " ";
-        NR_COUT << "\n";
-    }
-    NR_COUT << std::endl;
-}
-/* *************************************************************** */
-/*
-* the function computes the SVD of a matrix A
-* A = V* x S x U, where V* is a (conjugate) transpose of V
-* */
-void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d) {
-
-    //CAST float* to double*
-    /*
-    double* Adouble_d;
-    double* Sdouble_d;
-    double* VTdouble_d;
-    double* Udouble_d;
-
-    cudaMalloc((void **) &Adouble_d, m*n*sizeof(double));
-    cudaMalloc((void **) &Sdouble_d, xx*sizeof(double));
-    cudaMalloc((void **) &VTdouble_d, xx*sizeof(double));
-    cudaMalloc((void **) &Udouble_d, xx*sizeof(double));
-
-    cudaMemcpy(b_d, a_d, nBytes, cudaMemcpyDeviceToDevice);
-    */
-
-    const int lda = m;
-    const int ldu = m;
-    const int ldvt = n;
-
-    /*
-    * 'A': all m columns of U are returned in array
-    * 'S': the first min(m,n) columns of U (the left singular vectors) are returned in the array
-    * 'O': the first min(m,n) columns of U (the left singular vectors) are overwritten on the array
-    * 'N': no columns of U (no left singular vectors) are computed
-    */
-    const char jobu = 'A';
-
-    /*
-    * 'A': all N rows of V**T are returned in the array
-    * 'S': the first min(m,n) rows of V**T (the right singular vectors) are returned in the array
-    * 'O': the first min(m,n) rows of V**T (the right singular vectors) are overwritten on the array
-    * 'N': no rows of V**T (no right singular vectors) are computed
-    */
-    const char jobvt = 'A';
-
-    cusolverDnHandle_t gH = nullptr;
-    int Lwork;
-    //device ptrs
-    float *Work;
-    float *rwork;
-    int *devInfo;
-
-    //init cusolver compute SVD and shut down
-    checkCUSOLVERStatus(cusolverDnCreate(&gH), "cusolverDnCreate");
-    checkCUSOLVERStatus(cusolverDnSgesvd_bufferSize(gH, m, n, &Lwork), "cusolverDnSgesvd_bufferSize");
-
-    cudaMalloc(&Work, Lwork * sizeof(float));
-    cudaMalloc(&rwork, Lwork * sizeof(float));
-    cudaMalloc(&devInfo, sizeof(int));
-
-    checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, nullptr, devInfo), "cusolverDnSgesvd");
-    checkCUSOLVERStatus(cusolverDnDestroy(gH), "cusolverDnDestroy");
-
-    //free vars
-    cudaFree(devInfo);
-    cudaFree(rwork);
-    cudaFree(Work);
-
-}
-/* *************************************************************** */
-/*
-* the function computes the Pseudoinverse from the products of the SVD factorisation of A
-* R = V x inv(S) x U*
-* */
-void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned m, const unsigned n) {
-    // First we make sure that the really small singular values
-    // are set to 0. and compute the inverse by taking the reciprocal of the entries
-
-    trimAndInvertSingularValuesKernel <<<1, n >>>(Sigma_d);	//test 3
-
-    cublasHandle_t handle;
-
-    const float alpha = 1.f;
-    const float beta = 0.f;
-
-    const int ldvt = n;//VT's lead dimension
-    const int ldu = m;//U's lead dimension
-    const int ldr = n;//Pseudoinverse's r lead dimension
-
-    const int rowsVTandR = n;//VT and r's num rows
-    const int colsUandR = m;//U and r's num cols
-    const int colsVtRowsU = n;//VT's cols and U's rows
-
-    // V x inv(S) in place | We scale eaach row with the corresponding singular value as V is transpose
-    scaleV <<<n, n >>>(VT_d, n, n, Sigma_d);
-
-    //Initialize CUBLAS perform ops and shut down
-    checkCublasStatus(cublasCreate(&handle));
-
-    //now R = V x inv(S) x U*
-    checkCublasStatus(cublasSgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, rowsVTandR, colsUandR, colsVtRowsU, &alpha, VT_d, ldvt, U_d, ldu, &beta, R_d, ldr));
-
-    //finally M=Rxb, where M is our affine matrix and b a vector containg the warped points
-    checkCublasStatus(cublasSgemv(handle, CUBLAS_OP_N, n, m, &alpha, R_d, ldr, warped_d, 1, &beta, transformation, 1));
-    checkCublasStatus(cublasDestroy(handle));
-    permuteAffineMatrix <<<1, 16 >>>(transformation);
-    cudaDeviceSynchronize();
-
-}
-/* *************************************************************** */
-double sortAndReduce(float* lengths_d,
-                        float* reference_d,
-                        float* warped_d,
-                        float* newWarped_d,
-                        const unsigned numBlocks,
-                        const unsigned numToKeep,
-                        const unsigned m) {
-    //populateLengthsKernel
-    populateLengthsKernel <<< numBlocks, 512 >>>(lengths_d, warped_d, newWarped_d, m / 3);
-
-    // The initial vector with all the input points
-    thrust::device_ptr<float> reference_d_ptr(reference_d);
-    thrust::device_vector<float> vecReference_d(reference_d_ptr, reference_d_ptr + m);
-
-    thrust::device_ptr<float> warped_d_ptr(warped_d);
-    thrust::device_vector<float> vecWarped_d(warped_d_ptr, warped_d_ptr + m);
-
-    thrust::device_ptr<float> lengths_d_ptr(lengths_d);
-    thrust::device_vector<float> vec_lengths_d(lengths_d_ptr, lengths_d_ptr + m / 3);
-
-    // initialize indices vector to [0,1,2,..m]
-    thrust::counting_iterator<int> iter(0);
-    thrust::device_vector<int> indices(m);
-    thrust::copy(iter, iter + indices.size(), indices.begin());
-
-    //sort an indices array by lengths as key. Then use it to sort reference and warped arrays
-
-    thrust::sort_by_key(vec_lengths_d.begin(), vec_lengths_d.end(), indices.begin());
-    thrust::gather(indices.begin(), indices.end(), vecReference_d.begin(), vecReference_d.begin());//end()?
-    thrust::gather(indices.begin(), indices.end(), vecWarped_d.begin(), vecWarped_d.begin());//end()?
-
-    return thrust::reduce(lengths_d_ptr, lengths_d_ptr + numToKeep, 0, thrust::plus<double>());
-
-}
-/* *************************************************************** */
-//OPTIMIZER-----------------------------------------------
-// estimate an affine transformation using least square
-void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n) {
-
-    //populate A
-    populateMatrixA <<< numBlocks, 512 >>>(AR_d, reference_d, m / 3); //test 2
-
-    //calculate SVD on the GPU
-    cusolverSVD(AR_d, m, n, Sigma_d, VT_d, U_d);
-    //calculate the pseudoinverse
-    cublasPseudoInverse(transformation, AR_d, warped_d, VT_d, Sigma_d, U_d, m, n);
-
-}
-/* *************************************************************** */
-void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n) {
-
-    double lastDistance = std::numeric_limits<double>::max();
-
-    float* lastTransformation_d;
-    cudaMalloc(&lastTransformation_d, 16 * sizeof(float));
-
-    //get initial affine matrix
-    getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n);
-
-    for (unsigned count = 0; count < MAX_ITERATIONS; ++count) {
-
-        // Transform the points in the reference
-        transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, referencePos_d, newWarpedPos_d, m / 3); //test 1
-        double distance = sortAndReduce(lengths_d, referencePos_d, warpedPos_d, newWarpedPos_d, numBlocks, num_to_keep, m);
-
-        // If the change is not substantial or we are getting worst, we return
-        if ((distance > lastDistance) || (lastDistance - distance) < TOLERANCE) break;
-
-        lastDistance = distance;
-
-        cudaMemcpy(lastTransformation_d, final_d, 16 * sizeof(float), cudaMemcpyDeviceToDevice);
-        getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n);
-    }
-
-    //async cudamemcpy here
-    cudaMemcpy(final_d, lastTransformation_d, 16 * sizeof(float), cudaMemcpyDeviceToDevice);
-    cudaFree(lastTransformation_d);
-}
-/* *************************************************************** */
-void optimize_affine3D_cuda(mat44* cpuMat,
-                            float* final_d,
-                            float* A_d,
-                            float* U_d,
-                            float* Sigma_d,
-                            float* VT_d,
-                            float* lengths_d,
-                            float* reference_d,
-                            float* warped_d,
-                            float* newWarped_d,
-                            unsigned m,
-                            unsigned n,
-                            const unsigned numToKeep,
-                            bool ilsIn,
-                            bool isAffine) {
-
-    //m | blockMatchingParams->activeBlockNumber * 3
-    //n | 12
-    const unsigned numEquations = m;
-    const unsigned numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512;
-
-    uploadMat44(*cpuMat, final_d);
-    transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, warped_d, newWarped_d, m / 3); //test 1
-    cudaMemcpy(warped_d, newWarped_d, m * sizeof(float), cudaMemcpyDeviceToDevice);
-
-    // run the local search optimization routine
-    affineLocalSearch3DCuda(cpuMat, final_d, A_d, Sigma_d, U_d, VT_d, newWarped_d, reference_d, warped_d, lengths_d, numBlocks, numToKeep, m, n);
-
-    downloadMat44(cpuMat, final_d);
-
-}
diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h
deleted file mode 100644
index c2d95bbc..00000000
--- a/reg-lib/cuda/optimizeKernel.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include "RNifti.h"
-
-/*
-void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams,
-                    mat44 *updateAffineMatrix,
-                    float **targetPosition_d,
-                    float **resultPosition_d,
-                    bool affine = true);
-
-void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n);
-*/
-void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d);
-
-void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine);
-/*
-void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n);
-
-void downloadMat44(mat44 *lastTransformation, float* transform_d);
-
-void uploadMat44(mat44 lastTransformation, float* transform_d);
-*/

From 1c99a7a724ce757976a6aa474ae6b7f0dba54198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 22 Feb 2024 12:24:50 +0000
Subject: [PATCH 296/314] Eliminate dead code

---
 niftyreg_build_version.txt                    |    2 +-
 reg-apps/reg_average.cpp                      |   13 -
 reg-apps/reg_transform.cpp                    |    1 -
 reg-io/_reg_ReadWriteImage.cpp                |   53 -
 reg-io/_reg_ReadWriteImage.h                  |    6 -
 reg-io/_reg_ReadWriteMatrix.cpp               |   27 -
 reg-io/_reg_ReadWriteMatrix.h                 |    8 -
 reg-lib/CMakeLists.txt                        |   39 -
 reg-lib/Debug.hpp                             |    6 -
 reg-lib/Optimiser.cpp                         |   80 --
 reg-lib/Optimiser.hpp                         |   38 -
 reg-lib/Platform.cpp                          |    4 -
 reg-lib/Platform.h                            |    1 -
 reg-lib/_reg_aladin.cpp                       |   46 -
 reg-lib/_reg_aladin.h                         |    3 -
 reg-lib/_reg_polyAffine.cpp                   |  131 --
 reg-lib/_reg_polyAffine.h                     |   41 -
 reg-lib/cl/ClResampleImageKernel.cpp          |    6 -
 reg-lib/cpu/_reg_discrete_init.cpp            |  397 ------
 reg-lib/cpu/_reg_discrete_init.h              |   77 -
 reg-lib/cpu/_reg_femTrans.cpp                 |  253 ----
 reg-lib/cpu/_reg_femTrans.h                   |   68 -
 reg-lib/cpu/_reg_localTrans.cpp               |  117 --
 reg-lib/cpu/_reg_localTrans.h                 |    6 -
 reg-lib/cpu/_reg_localTrans_regul.cpp         | 1257 ++---------------
 reg-lib/cpu/_reg_localTrans_regul.h           |   49 -
 reg-lib/cpu/_reg_maths.cpp                    |  541 ++-----
 reg-lib/cpu/_reg_maths.h                      |   34 -
 reg-lib/cpu/_reg_maths_eigen.cpp              |  147 --
 reg-lib/cpu/_reg_maths_eigen.h                |   11 -
 reg-lib/cpu/_reg_mrf.cpp                      |  869 ------------
 reg-lib/cpu/_reg_mrf.h                        |  119 --
 reg-lib/cpu/_reg_polyAffine.cpp               |  131 --
 reg-lib/cpu/_reg_polyAffine.h                 |   41 -
 reg-lib/cpu/_reg_resampling.cpp               |    1 -
 reg-lib/cpu/_reg_ssd.cpp                      |  260 +---
 reg-lib/cpu/_reg_ssd.h                        |    4 +-
 reg-lib/cpu/_reg_thinPlateSpline.cpp          |  297 ----
 reg-lib/cpu/_reg_thinPlateSpline.h            |   49 -
 reg-lib/cpu/_reg_tools.cpp                    |  242 ----
 reg-lib/cpu/_reg_tools.h                      |   46 -
 reg-lib/cuda/BlockSize.hpp                    |   15 -
 reg-lib/cuda/CudaLocalTransformation.cu       |   17 -
 reg-lib/cuda/CudaLocalTransformation.hpp      |    4 -
 .../cuda/CudaLocalTransformationKernels.cu    |   62 -
 reg-lib/cuda/CudaTools.cu                     |  196 ---
 reg-lib/cuda/CudaTools.hpp                    |   23 -
 reg-lib/cuda/CudaToolsKernels.cu              |  143 --
 48 files changed, 236 insertions(+), 5745 deletions(-)
 delete mode 100644 reg-lib/_reg_polyAffine.cpp
 delete mode 100644 reg-lib/_reg_polyAffine.h
 delete mode 100644 reg-lib/cpu/_reg_discrete_init.cpp
 delete mode 100644 reg-lib/cpu/_reg_discrete_init.h
 delete mode 100644 reg-lib/cpu/_reg_femTrans.cpp
 delete mode 100644 reg-lib/cpu/_reg_femTrans.h
 delete mode 100644 reg-lib/cpu/_reg_mrf.cpp
 delete mode 100644 reg-lib/cpu/_reg_mrf.h
 delete mode 100644 reg-lib/cpu/_reg_polyAffine.cpp
 delete mode 100644 reg-lib/cpu/_reg_polyAffine.h
 delete mode 100644 reg-lib/cpu/_reg_thinPlateSpline.cpp
 delete mode 100644 reg-lib/cpu/_reg_thinPlateSpline.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index d1b9f6a9..21c8d99f 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-414
+415
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 372763a4..0b57a922 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -70,19 +70,6 @@ void usage(char *exec)
    NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
 }
 
-void average_norm_intensity(nifti_image *image)
-{
-   PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType));
-   memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType));
-   reg_heapSort(rankedIntensities,static_cast<int>(image->nvox));
-   PrecisionType lowerValue=rankedIntensities[static_cast<unsigned>(static_cast<float>(image->nvox)*0.03f)];
-   PrecisionType higherValue=rankedIntensities[static_cast<unsigned>(static_cast<float>(image->nvox)*0.97f)];
-   reg_tools_subtractValueFromImage(image,image,lowerValue);
-   reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue));
-   free(rankedIntensities);
-   return;
-}
-
 int remove_nan_and_add(nifti_image *averageImage,
                         nifti_image *toAddImage,
                         nifti_image *definedNumImage)
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 485765d9..4cf0bfe5 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -16,7 +16,6 @@
 #include "_reg_globalTrans.h"
 #include "_reg_localTrans.h"
 #include "_reg_tools.h"
-#include "_reg_thinPlateSpline.h"
 #include "_reg_maths_eigen.h"
 
 #include "reg_transform.h"
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 8b2d928a..fa945192 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -158,56 +158,3 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
     }
 }
 /* *************************************************************** */
-template <class DataType>
-void reg_io_displayImageData1(nifti_image *image) {
-    NR_DEBUG("Image values:");
-    const DataType *data = static_cast<DataType*>(image->data);
-    const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3);
-
-    size_t voxelIndex = 0;
-    for (int z = 0; z < image->nz; z++) {
-        for (int y = 0; y < image->ny; y++) {
-            for (int x = 0; x < image->nx; x++) {
-                std::string text = "[" + std::to_string(x) + " - " + std::to_string(y) + " - " + std::to_string(z) + "] = [";
-                for (int tu = 0; tu < image->nt * image->nu; ++tu)
-                    text += std::to_string(static_cast<double>(data[voxelIndex + tu * nVoxelsPerVolume])) + " ";
-                if (text.back() == ' ')
-                    text.pop_back();
-                text += "]";
-                NR_DEBUG(text);
-            }
-        }
-    }
-}
-/* *************************************************************** */
-void reg_io_displayImageData(nifti_image *image) {
-    switch (image->datatype) {
-    case NIFTI_TYPE_UINT8:
-        reg_io_displayImageData1<unsigned char>(image);
-        break;
-    case NIFTI_TYPE_INT8:
-        reg_io_displayImageData1<char>(image);
-        break;
-    case NIFTI_TYPE_UINT16:
-        reg_io_displayImageData1<unsigned short>(image);
-        break;
-    case NIFTI_TYPE_INT16:
-        reg_io_displayImageData1<short>(image);
-        break;
-    case NIFTI_TYPE_UINT32:
-        reg_io_displayImageData1<unsigned>(image);
-        break;
-    case NIFTI_TYPE_INT32:
-        reg_io_displayImageData1<int>(image);
-        break;
-    case NIFTI_TYPE_FLOAT32:
-        reg_io_displayImageData1<float>(image);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_io_displayImageData1<double>(image);
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported datatype");
-    }
-}
-/* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index c1356f02..5eb0f372 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -65,9 +65,3 @@ nifti_image *reg_io_ReadImageHeader(const char *filename);
   */
 void reg_io_WriteImageFile(nifti_image *image, const char *filename);
 /* *************************************************************** */
-/** The function expects a nifti_image structure
-  * The image will be displayed on the standard output
-  * @param Nifti image to be displayed
-  */
-void reg_io_displayImageData(nifti_image *image);
-/* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index baf0a6f5..8b399680 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -201,30 +201,3 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) {
 template float** reg_tool_ReadMatrixFile<float>(char *filename, size_t nbLine, size_t nbColumn);
 template double** reg_tool_ReadMatrixFile<double>(char *filename, size_t nbLine, size_t nbColumn);
 /* *************************************************************** */
-mat44* reg_tool_ReadMat44File(char *fileName) {
-    mat44 *mat = (mat44 *)malloc(sizeof(mat44));
-    std::ifstream matrixFile;
-    matrixFile.open(fileName);
-    if (matrixFile.is_open()) {
-        int i = 0;
-        double value1, value2, value3, value4;
-        while (!matrixFile.eof()) {
-            matrixFile >> value1 >> value2 >> value3 >> value4;
-
-            mat->m[i][0] = (float)value1;
-            mat->m[i][1] = (float)value2;
-            mat->m[i][2] = (float)value3;
-            mat->m[i][3] = (float)value4;
-            i++;
-            if (i > 3) break;
-        }
-    } else {
-        NR_FATAL_ERROR("The mat44 file can not be read: "s + fileName);
-    }
-    matrixFile.close();
-
-    NR_MAT44_DEBUG(*mat, "mat44 matrix");
-
-    return mat;
-}
-/* *************************************************************** */
diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h
index 7ad758e8..d83c1b5e 100644
--- a/reg-io/_reg_ReadWriteMatrix.h
+++ b/reg-io/_reg_ReadWriteMatrix.h
@@ -46,14 +46,6 @@ void reg_tool_ReadAffineFile(mat44 *mat,
 void reg_tool_ReadAffineFile(mat44 *mat,
                              char *filename);
 
-/**
-* @brief Read a file that contains a 4-by-4 matrix and store it into
-* a mat44 structure
-* @param filename Filename of the text file that contains the matrix to read
-* @return mat44 structure that store the matrix
-**/
-mat44* reg_tool_ReadMat44File(char *fileName);
-
 /** @brief This function save a 4-by-4 matrix to the disk as a text file
  * @param mat Matrix to be saved on the disk
  * @param filename Name of the text file to save on the disk
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index c417e42e..e319f92a 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -95,15 +95,6 @@ install(TARGETS _reg_blockMatching
 )
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching")
 #-----------------------------------------------------------------------------
-add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp)
-target_link_libraries(_reg_femTrans _reg_globalTrans)
-install(TARGETS _reg_femTrans
-  RUNTIME DESTINATION bin
-  LIBRARY DESTINATION lib
-  ARCHIVE DESTINATION lib
-)
-set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans")
-#-----------------------------------------------------------------------------
 add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   Compute.cpp
   AladinContent.cpp
@@ -191,34 +182,4 @@ install(TARGETS _reg_f3d
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d")
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
-# BUILD THE TPS LIBRARY
-#set(NAME _reg_thinPlateSpline)
-#if(APPLE)
-#	add_library(${NAME} SHARED cpu/${NAME}.cpp)
-#else(APPLE)
-#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.cpp)
-#endif(APPLE)
-#target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage)
-#install(TARGETS ${NAME}
-#	RUNTIME DESTINATION bin
-#	LIBRARY DESTINATION lib
-#	ARCHIVE DESTINATION lib
-#	)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
-## BUILD THE POLYAFFINE LIBRARY
-#set(NAME _reg_polyAffine)
-#if(APPLE)
-#	add_library(${NAME} SHARED _reg_base.cpp ${NAME}.cpp)
-#else(APPLE)
-#	add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.cpp ${NAME}.cpp)
-#endif(APPLE)
-#target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage)
-#install(TARGETS ${NAME}
-#	RUNTIME DESTINATION bin
-#	LIBRARY DESTINATION lib
-#	ARCHIVE DESTINATION lib
-#	)
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
 set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE)
\ No newline at end of file
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
index cbd29581..93e452eb 100644
--- a/reg-lib/Debug.hpp
+++ b/reg-lib/Debug.hpp
@@ -68,18 +68,12 @@ inline std::string StripFunctionName(const std::string& funcName) {
 #define NR_INFO(msg)        NR_COUT << "[NiftyReg INFO] " << msg << std::endl
 /* *************************************************************** */
 #ifndef NDEBUG
-#define NR_MAT33(mat, title)          reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title))
 #define NR_MAT44(mat, title)          reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
-#define NR_MAT33_DEBUG(mat, title)    NR_MAT33(mat, title)
 #define NR_MAT44_DEBUG(mat, title)    NR_MAT44(mat, title)
-#define NR_MAT33_VERBOSE(mat, title)  NR_MAT33(mat, title)
 #define NR_MAT44_VERBOSE(mat, title)  NR_MAT44(mat, title)
 #else
-#define NR_MAT33(mat, title)          reg_mat33_disp(mat, title)
 #define NR_MAT44(mat, title)          reg_mat44_disp(mat, title)
-#define NR_MAT33_DEBUG(mat, title)
 #define NR_MAT44_DEBUG(mat, title)
-#define NR_MAT33_VERBOSE(mat, title)  if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title))
 #define NR_MAT44_VERBOSE(mat, title)  if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title))
 #endif
 /* *************************************************************** */
diff --git a/reg-lib/Optimiser.cpp b/reg-lib/Optimiser.cpp
index 4a92c7d8..b508f98c 100644
--- a/reg-lib/Optimiser.cpp
+++ b/reg-lib/Optimiser.cpp
@@ -331,85 +331,5 @@ void ConjugateGradient<T>::Perturbation(float length) {
 template class ConjugateGradient<float>;
 template class ConjugateGradient<double>;
 /* *************************************************************** */
-template <class T>
-Lbfgs<T>::Lbfgs(): Optimiser<T>::Optimiser() {
-    this->stepToKeep = 5;
-    this->oldDof = nullptr;
-    this->oldGrad = nullptr;
-    this->diffDof = nullptr;
-    this->diffGrad = nullptr;
-}
-/* *************************************************************** */
-template <class T>
-Lbfgs<T>::~Lbfgs() {
-    if (this->oldDof) {
-        free(this->oldDof);
-        this->oldDof = nullptr;
-    }
-    if (this->oldGrad) {
-        free(this->oldGrad);
-        this->oldGrad = nullptr;
-    }
-    for (size_t i = 0; i < this->stepToKeep; ++i) {
-        if (this->diffDof[i]) {
-            free(this->diffDof[i]);
-            this->diffDof[i] = nullptr;
-        }
-        if (this->diffGrad[i]) {
-            free(this->diffGrad[i]);
-            this->diffGrad[i] = nullptr;
-        }
-    }
-    if (this->diffDof) {
-        free(this->diffDof);
-        this->diffDof = nullptr;
-    }
-    if (this->diffGrad) {
-        free(this->diffGrad);
-        this->diffGrad = nullptr;
-    }
-}
-/* *************************************************************** */
-template <class T>
-void Lbfgs<T>::Initialise(size_t nvox,
-                          int ndim,
-                          bool optX,
-                          bool optY,
-                          bool optZ,
-                          size_t maxIt,
-                          size_t startIt,
-                          InterfaceOptimiser *intOpt,
-                          T *cppData,
-                          T *gradData,
-                          size_t nvoxBw,
-                          T *cppDataBw,
-                          T *gradDataBw) {
-    Optimiser<T>::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw);
-    this->stepToKeep = 5;
-    this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*));
-    this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*));
-    for (size_t i = 0; i < this->stepToKeep; ++i) {
-        this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T));
-        this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T));
-        if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr)
-            NR_FATAL_ERROR("Out of memory");
-    }
-    this->oldDof = (T*)malloc(this->dofNumber * sizeof(T));
-    this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T));
-    if (this->oldDof == nullptr || this->oldGrad == nullptr)
-        NR_FATAL_ERROR("Out of memory");
-}
-/* *************************************************************** */
-template <class T>
-void Lbfgs<T>::UpdateGradientValues() {
-    NR_FATAL_ERROR("Not implemented");
-}
-/* *************************************************************** */
-template <class T>
-void Lbfgs<T>::Optimise(T maxLength, T smallLength, T& startLength) {
-    this->UpdateGradientValues();
-    Optimiser<T>::Optimise(maxLength, smallLength, startLength);
-}
-/* *************************************************************** */
 } // namespace NiftyReg
 /* *************************************************************** */
diff --git a/reg-lib/Optimiser.hpp b/reg-lib/Optimiser.hpp
index aa4da312..2b44e75e 100644
--- a/reg-lib/Optimiser.hpp
+++ b/reg-lib/Optimiser.hpp
@@ -179,43 +179,5 @@ class ConjugateGradient: public Optimiser<T> {
     virtual void Perturbation(float length) override;
 };
 /* *************************************************************** */
-/** @class Global optimisation class
- * @brief
- */
-template <class T>
-class Lbfgs: public Optimiser<T> {
-protected:
-    size_t stepToKeep;
-    T *oldDof;
-    T *oldGrad;
-    T **diffDof;
-    T **diffGrad;
-
-#ifdef NR_TESTING
-public:
-#endif
-    virtual void UpdateGradientValues() override;
-
-public:
-    Lbfgs();
-    virtual ~Lbfgs();
-    virtual void Initialise(size_t nvox,
-                            int ndim,
-                            bool optX,
-                            bool optY,
-                            bool optZ,
-                            size_t maxIt,
-                            size_t startIt,
-                            InterfaceOptimiser *intOpt,
-                            T *cppData,
-                            T *gradData,
-                            size_t nvoxBw,
-                            T *cppDataBw,
-                            T *gradDataBw) override;
-    virtual void Optimise(T maxLength,
-                          T smallLength,
-                          T& startLength) override;
-};
-/* *************************************************************** */
 } // namespace NiftyReg
 /* *************************************************************** */
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 3701327c..e9b6d4ed 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -63,10 +63,6 @@ PlatformType Platform::GetPlatformType() const {
     return platformType;
 }
 /* *************************************************************** */
-unsigned Platform::GetGpuIdx() const {
-    return gpuIdx;
-}
-/* *************************************************************** */
 void Platform::SetGpuIdx(unsigned gpuIdxIn) {
     if (platformType == PlatformType::Cpu) {
         gpuIdx = 999;
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index f3d4d4d0..ee82a04e 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -29,7 +29,6 @@ class Platform {
 
     std::string GetName() const;
     PlatformType GetPlatformType() const;
-    unsigned GetGpuIdx() const;
     void SetGpuIdx(unsigned gpuIdxIn);
 
     Compute* CreateCompute(Content& con) const;
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 3f184522..35b5a2dd 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -51,57 +51,11 @@ reg_aladin<T>::reg_aladin() {
 }
 /* *************************************************************** */
 template<class T>
-bool reg_aladin<T>::TestMatrixConvergence(mat44 *mat) {
-    bool convergence = true;
-    if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS)
-        convergence = false;
-
-    if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-
-    if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-
-    if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-    if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS)
-        convergence = false;
-
-    return convergence;
-}
-/* *************************************************************** */
-template<class T>
 void reg_aladin<T>::SetVerbose(bool _verbose) {
     this->verbose = _verbose;
 }
 /* *************************************************************** */
 template<class T>
-int reg_aladin<T>::Check() {
-    //This does all the initial checking
-    if (!this->inputReference)
-        NR_FATAL_ERROR("No reference image has been specified or it can not be read");
-
-    if (!this->inputFloating)
-        NR_FATAL_ERROR("No floating image has been specified or it can not be read");
-
-    return EXIT_SUCCESS;
-}
-/* *************************************************************** */
-template<class T>
 void reg_aladin<T>::Print() {
     if (!this->inputReference)
         NR_FATAL_ERROR("No reference image has been specified");
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index f204d66e..9096688d 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -106,8 +106,6 @@ class reg_aladin {
     PlatformType platformType;
     unsigned gpuIdx;
 
-    bool TestMatrixConvergence(mat44 *mat);
-
     virtual void InitialiseRegistration();
     virtual void DeallocateCurrentInputImage();
 
@@ -242,7 +240,6 @@ class reg_aladin {
         this->SetInterpolation(3);
     }
 
-    virtual int Check();
     virtual void Print();
     virtual void Run();
 
diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp
deleted file mode 100644
index 73ed7b97..00000000
--- a/reg-lib/_reg_polyAffine.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/**
- * @file _reg_polyAffine.cpp
- * @author Marc Modat
- * @date 16/11/2012
- *
- * Copyright (c) 2012-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_polyAffine.h"
-
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_polyAffine<T>::reg_polyAffine(int refTimePoints,int floTimePoints)
-   : reg_base<T>::reg_base(refTimePoints,floTimePoints)
-{
-   this->executableName=(char *)"NiftyReg PolyAffine";
-   NR_FUNC_CALLED();
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_polyAffine<T>::~reg_polyAffine()
-{
-   NR_FUNC_CALLED();
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::GetDeformationField()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::SetGradientImageToZero()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::GetApproximatedGradient()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-double reg_polyAffine<T>::GetObjectiveFunctionValue()
-{
-
-   return EXIT_SUCCESS;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::UpdateParameters(float stepSize)
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_polyAffine<T>::NormaliseGradient()
-{
-   return EXIT_SUCCESS;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::GetSimilarityMeasureGradient()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::GetObjectiveFunctionGradient()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::DisplayCurrentLevelParameters()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::UpdateBestObjFunctionValue()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::PrintCurrentObjFunctionValue(T stepSize)
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::PrintInitialObjFunctionValue()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::AllocateTransformationGradient()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_polyAffine<T>::DeallocateTransformationGradient()
-{
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h
deleted file mode 100644
index 28a7f5ff..00000000
--- a/reg-lib/_reg_polyAffine.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * @file _reg_polyAffine.h
- * @author Marc Modat
- * @date 16/11/2012
- *
- * Copyright (c) 2012-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_base.h"
-
-template <class T>
-class reg_polyAffine : public reg_base<T>
-{
-protected:
-   void GetDeformationField();
-   void SetGradientImageToZero();
-   void GetApproximatedGradient();
-   double GetObjectiveFunctionValue();
-   void UpdateParameters(float);
-   T NormaliseGradient();
-   void GetSimilarityMeasureGradient();
-   void GetObjectiveFunctionGradient();
-   void DisplayCurrentLevelParameters();
-   void UpdateBestObjFunctionValue();
-   void PrintCurrentObjFunctionValue(T);
-   void PrintInitialObjFunctionValue();
-   void AllocateTransformationGradient();
-   void DeallocateTransformationGradient();
-
-public:
-   reg_polyAffine(int refTimePoints,int floTimePoints);
-   ~reg_polyAffine();
-};
-
-#include "_reg_polyAffine.cpp"
diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp
index 59e76be1..59211a3e 100644
--- a/reg-lib/cl/ClResampleImageKernel.cpp
+++ b/reg-lib/cl/ClResampleImageKernel.cpp
@@ -83,16 +83,10 @@ void ClResampleImageKernel::Calculate(int interp,
     const size_t globalWorkSize[dims] = {blocks * maxThreads};
     const size_t localWorkSize[dims] = {maxThreads};
 
-    //    int numMats = 0; //needs to be a parameter
-    //    float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float));
-
     cl_long2 voxelNumber = {{(cl_long)NiftiImage::calcVoxelNumber(warpedImage, 3), (cl_long)NiftiImage::calcVoxelNumber(this->floatingImage, 3)}};
     cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}};
     cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}};
 
-    //    if (numMats)
-    //        mat33ToCptr(jacMat, jacMat_h, numMats);
-
     int datatype = this->floatingImage->datatype;
 
     errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating);
diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp
deleted file mode 100644
index a35fa85a..00000000
--- a/reg-lib/cpu/_reg_discrete_init.cpp
+++ /dev/null
@@ -1,397 +0,0 @@
-#include "_reg_discrete_init.h"
-
-/*****************************************************/
-reg_discrete_init::reg_discrete_init(reg_measure *_measure,
-                                     nifti_image *_referenceImage,
-                                     nifti_image *_controlPointImage,
-                                     int _discrete_radius,
-                                     int _discrete_increment,
-                                     int _reg_max_it,
-                                     float _reg_weight)
-{
-   this->measure = _measure;
-   this->referenceImage = _referenceImage;
-   this->controlPointImage = _controlPointImage;
-   this->discrete_radius = _discrete_radius;
-   this->discrete_increment = _discrete_increment;
-   this->regularisation_weight = _reg_weight;
-   this->reg_max_it = _reg_max_it;
-
-   if (this->discrete_radius / this->discrete_increment !=
-       (float)this->discrete_radius / (float)this->discrete_increment)
-      NR_FATAL_ERROR("The discrete_radius is expected to be a multiple of discretise_increment");
-
-   this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
-   this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
-   this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
-
-   this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
-   this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
-
-   // Allocate the discretised values in voxel
-   int *discrete_values_vox = (int *)malloc(this->label_1D_num*sizeof(int));
-   int currentValue = -this->discrete_radius;
-   for(int i = 0;i<this->label_1D_num;i++) {
-      discrete_values_vox[i]=currentValue;
-      currentValue+=this->discrete_increment;
-   }
-
-   // Allocate the discretised values in millimetre
-   this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
-   for(int i=0;i<this->image_dim;++i){
-      this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
-   }
-   float disp_vox[3];
-   mat44 vox2mm = this->referenceImage->qto_xyz;
-   if(this->referenceImage->sform_code>0)
-      vox2mm = this->referenceImage->sto_xyz;
-   int i=0;
-   for(int z=0; z<this->label_1D_num; ++z){
-      disp_vox[2]=discrete_values_vox[z];
-      for(int y=0; y<this->label_1D_num; ++y){
-         disp_vox[1]=discrete_values_vox[y];
-         for(int x=0; x<this->label_1D_num; ++x){
-            disp_vox[0]=discrete_values_vox[x];
-            this->discrete_values_mm[0][i] =
-                  disp_vox[0] * vox2mm.m[0][0] +
-                  disp_vox[1] * vox2mm.m[0][1] +
-                  disp_vox[2] * vox2mm.m[0][2];
-            this->discrete_values_mm[1][i] =
-                  disp_vox[0] * vox2mm.m[1][0] +
-                  disp_vox[1] * vox2mm.m[1][1] +
-                  disp_vox[2] * vox2mm.m[1][2];
-            this->discrete_values_mm[2][i] =
-                  disp_vox[0] * vox2mm.m[2][0] +
-                  disp_vox[1] * vox2mm.m[2][1] +
-                  disp_vox[2] * vox2mm.m[2][2];
-            ++i;
-         }
-      }
-   }
-   free(discrete_values_vox);
-
-   //regularization - optimization
-   this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int));
-   currentValue= (this->label_1D_num-1)/2;
-   currentValue = (currentValue*this->label_1D_num+currentValue)*this->label_1D_num+currentValue;
-   for(size_t n=0; n<this->node_number; ++n)
-      this->optimal_label_index[n]=currentValue;
-
-   //To store the cost data term
-   this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num, sizeof(float));
-
-   //Optimal transformation based on the data term
-   this->regularised_measures = (float *)malloc(this->node_number*this->label_nD_num*sizeof(float));
-
-   // Compute the l2 for each label
-   l2_weight = 1.e-10f;
-   this->l2_penalisation = (float *)malloc(this->label_nD_num*sizeof(float));
-   int label_index=0;
-   for(float z=-this->discrete_radius; z<=this->discrete_radius; z+=this->discrete_increment)
-      for(float y=-this->discrete_radius; y<=this->discrete_radius; y+=this->discrete_increment)
-         for(float x=-this->discrete_radius; x<=this->discrete_radius; x+=this->discrete_increment)
-            this->l2_penalisation[label_index++] = std::sqrt(x*x+y*y+z*z);
-}
-/*****************************************************/
-/*****************************************************/
-reg_discrete_init::~reg_discrete_init()
-{
-   if(this->discretised_measures!=nullptr)
-      free(this->discretised_measures);
-   this->discretised_measures=nullptr;
-
-   if(this->regularised_measures!=nullptr)
-      free(this->regularised_measures);
-   this->regularised_measures=nullptr;
-
-   if(this->l2_penalisation!=nullptr)
-      free(this->l2_penalisation);
-   this->l2_penalisation=nullptr;
-
-   if(this->optimal_label_index!=nullptr)
-      free(this->optimal_label_index);
-   this->optimal_label_index=nullptr;
-
-   for(int i=0; i<this->image_dim; ++i){
-      if(this->discrete_values_mm[i]!=nullptr)
-         free(this->discrete_values_mm[i]);
-      this->discrete_values_mm[i]=nullptr;
-   }
-   if(this->discrete_values_mm!=nullptr)
-      free(this->discrete_values_mm);
-   this->discrete_values_mm=nullptr;
-
-   if(this->input_transformation!=nullptr)
-      nifti_image_free(this->input_transformation);
-   this->input_transformation=nullptr;
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::GetDiscretisedMeasure()
-{
-   measure->GetDiscretisedValue(this->controlPointImage,
-                                this->discretised_measures,
-                                this->discrete_radius,
-                                this->discrete_increment);
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::GetOptimalLabel()
-{
-   this->regularisation_convergence=0;
-   size_t opt_label = 0;
-   for(size_t node=0; node<this->node_number; ++node){
-      size_t current_optimal = this->optimal_label_index[node];
-      opt_label =
-            std::max_element(this->regularised_measures+node*this->label_nD_num,
-                             this->regularised_measures+(node+1)*this->label_nD_num) -
-                            (this->regularised_measures+node*this->label_nD_num);
-      this->optimal_label_index[node] = opt_label;
-      if(current_optimal != opt_label)
-         ++this->regularisation_convergence;
-   }
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::UpdateTransformation()
-{
-   //Update the control point position
-   float *cpPtrX = static_cast<float *>(this->controlPointImage->data);
-   float *cpPtrY = &cpPtrX[this->node_number];
-   float *cpPtrZ = &cpPtrY[this->node_number];
-
-   float *inputCpPtrX = static_cast<float *>(this->input_transformation->data);
-   float *inputCpPtrY = &inputCpPtrX[this->node_number];
-   float *inputCpPtrZ = &inputCpPtrY[this->node_number];
-
-   memcpy(cpPtrX, inputCpPtrX, this->node_number*3*sizeof(float));
-   //float scaleFactor = 0.5;
-   float scaleFactor = 1;
-
-   for(int z=1; z<this->controlPointImage->nz-1; z++) {
-      for(int y=1; y<this->controlPointImage->ny-1; y++) {
-         size_t node = (z*this->controlPointImage->ny+y)*this->controlPointImage->nx+1;
-         for(int x=1; x<this->controlPointImage->nx-1; x++){
-            int optimal_id = this->optimal_label_index[node];
-            cpPtrX[node] = inputCpPtrX[node] + scaleFactor*this->discrete_values_mm[0][optimal_id];
-            cpPtrY[node] = inputCpPtrY[node] + scaleFactor*this->discrete_values_mm[1][optimal_id];
-            cpPtrZ[node] = inputCpPtrZ[node] + scaleFactor*this->discrete_values_mm[2][optimal_id];
-            ++node;
-         }
-      }
-   }
-
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::AddL2Penalisation(float weight)
-{
-   // Compute the l2 for each label
-   float *l2_penalisation = (float *)malloc(this->label_nD_num*sizeof(float));
-   int label_index=0;
-   for(float z=-this->discrete_radius; z<=this->discrete_radius; z+=this->discrete_increment)
-      for(float y=-this->discrete_radius; y<=this->discrete_radius; y+=this->discrete_increment)
-         for(float x=-this->discrete_radius; x<=this->discrete_radius; x+=this->discrete_increment)
-            l2_penalisation[label_index++] = weight * sqrt(x*x+y*y+z*z);
-
-   // Loop over all control points
-   int measure_index, n;
-   int _node_number = static_cast<int>(this->node_number);
-   int _label_nD_num = this->label_nD_num;
-   float *_discretised_measures = &this->discretised_measures[0];
-#ifdef _OPENMP
-   #pragma omp parallel for default(none) \
-   shared(_node_number, _label_nD_num, _discretised_measures, l2_penalisation) \
-   private(measure_index, n, label_index)
-#endif
-   for(n=0; n<_node_number; ++n){
-      measure_index = n * _label_nD_num;
-      // Loop over all label
-      for(label_index=0; label_index<_label_nD_num; ++label_index){
-         _discretised_measures[measure_index] -= l2_penalisation[label_index];
-         ++measure_index;
-      }
-   }
-
-   free(l2_penalisation);
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::GetRegularisedMeasure()
-{
-   reg_getDisplacementFromDeformation(this->controlPointImage);
-   reg_getDisplacementFromDeformation(this->input_transformation);
-
-   float *cpPtrX = static_cast<float *>(this->controlPointImage->data);
-   float *cpPtrY = &cpPtrX[this->node_number];
-   float *cpPtrZ = &cpPtrY[this->node_number];
-
-   float *inputCpPtrX = static_cast<float *>(this->input_transformation->data);
-   float *inputCpPtrY = &inputCpPtrX[this->node_number];
-   float *inputCpPtrZ = &inputCpPtrY[this->node_number];
-
-   float basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27];
-   float _basisXX, _basisYY, _basisZZ, _basisXY, _basisYZ, _basisXZ;
-   float basis[4], first[4], second[4];
-   get_BSplineBasisValues<float>(0.f, basis, first, second);
-   int i=0;
-   for(int c=0; c<3; ++c){
-      for(int b=0; b<3; ++b){
-         for(int a=0; a<3; ++a){
-            basisXX[i]=second[a]*basis[b]*basis[c];
-            basisYY[i]=basis[a]*second[b]*basis[c];
-            basisZZ[i]=basis[a]*basis[b]*second[c];
-            basisXY[i]=first[a]*first[b]*basis[c];
-            basisYZ[i]=basis[a]*first[b]*first[c];
-            basisXZ[i]=first[a]*basis[b]*first[c];
-            ++i;
-         }
-      }
-   }
-   _basisXX = basisXX[13]; _basisYY = basisYY[13]; _basisZZ = basisZZ[13];
-   _basisXY = basisXY[13]; _basisYZ = basisYZ[13]; _basisXZ = basisXZ[13];
-
-   float splineCoeffX[27], splineCoeffY[27], splineCoeffZ[27];
-
-   size_t node = 0;
-   for(int z=0; z<this->controlPointImage->nz; z++) {
-      for(int y=0; y<this->controlPointImage->ny; y++) {
-         for(int x=0; x<this->controlPointImage->nx; x++){
-            // Copy all 27 required control point displacement
-            i=0;
-            for(int c=z-1; c<z+2; c++){
-               for(int b=y-1; b<y+2; b++){
-                  for(int a=x-1; a<x+2; a++){
-                     if(a>-1 && a<this->controlPointImage->nx &&
-                        b>-1 && b<this->controlPointImage->ny &&
-                        c>-1 && c<this->controlPointImage->nz){
-                        int node_index = (c*this->controlPointImage->ny+b)*this->controlPointImage->nx+a;
-                        splineCoeffX[i] = cpPtrX[node_index];
-                        splineCoeffY[i] = cpPtrY[node_index];
-                        splineCoeffZ[i] = cpPtrZ[node_index];
-                     }
-                     else{
-                        splineCoeffX[i] = 0.f;
-                        splineCoeffY[i] = 0.f;
-                        splineCoeffZ[i] = 0.f;
-                     }
-                     ++i;
-                  } // a
-               } // b
-            } // c
-            // Set the central control point to no displacement
-            splineCoeffX[13] = 0.f;
-            splineCoeffY[13] = 0.f;
-            splineCoeffZ[13] = 0.f;
-            // Compute the second derivative without the central control point
-            float XX_x=0, YY_x=0, ZZ_x=0;
-            float XY_x=0, YZ_x=0, XZ_x=0;
-            float XX_y=0, YY_y=0, ZZ_y=0;
-            float XY_y=0, YZ_y=0, XZ_y=0;
-            float XX_z=0, YY_z=0, ZZ_z=0;
-            float XY_z=0, YZ_z=0, XZ_z=0;
-            for(i=0; i<27; i++){
-               XX_x += basisXX[i]*splineCoeffX[i];
-               YY_x += basisYY[i]*splineCoeffX[i];
-               ZZ_x += basisZZ[i]*splineCoeffX[i];
-               XY_x += basisXY[i]*splineCoeffX[i];
-               YZ_x += basisYZ[i]*splineCoeffX[i];
-               XZ_x += basisXZ[i]*splineCoeffX[i];
-
-               XX_y += basisXX[i]*splineCoeffY[i];
-               YY_y += basisYY[i]*splineCoeffY[i];
-               ZZ_y += basisZZ[i]*splineCoeffY[i];
-               XY_y += basisXY[i]*splineCoeffY[i];
-               YZ_y += basisYZ[i]*splineCoeffY[i];
-               XZ_y += basisXZ[i]*splineCoeffY[i];
-
-               XX_z += basisXX[i]*splineCoeffZ[i];
-               YY_z += basisYY[i]*splineCoeffZ[i];
-               ZZ_z += basisZZ[i]*splineCoeffZ[i];
-               XY_z += basisXY[i]*splineCoeffZ[i];
-               YZ_z += basisYZ[i]*splineCoeffZ[i];
-               XZ_z += basisXZ[i]*splineCoeffZ[i];
-            }
-            float *_discrete_values_mm_x = this->discrete_values_mm[0];
-            float *_discrete_values_mm_y = this->discrete_values_mm[1];
-            float *_discrete_values_mm_z = this->discrete_values_mm[2];
-            for(int label=0; label<this->label_nD_num; ++label){
-
-               float valX = inputCpPtrX[node] + *_discrete_values_mm_x++;
-               float valY = inputCpPtrY[node] + *_discrete_values_mm_y++;
-               float valZ = inputCpPtrZ[node] + *_discrete_values_mm_z++;
-
-               size_t measure_index = node * this->label_nD_num + label;
-               this->regularised_measures[measure_index] =
-                     (1.f-this->regularisation_weight-this->l2_weight) * this->discretised_measures[measure_index] -
-                     this->regularisation_weight * (
-                     Square(XX_x + valX * _basisXX) +
-                     Square(XX_y + valY * _basisXX) +
-                     Square(XX_z + valZ * _basisXX) +
-                     Square(YY_x + valX * _basisYY) +
-                     Square(YY_y + valY * _basisYY) +
-                     Square(YY_z + valZ * _basisYY) +
-                     Square(ZZ_x + valX * _basisZZ) +
-                     Square(ZZ_y + valY * _basisZZ) +
-                     Square(ZZ_z + valZ * _basisZZ) + 2.0 * (
-                     Square(XY_x + valX * _basisXY) +
-                     Square(XY_y + valY * _basisXY) +
-                     Square(XY_z + valZ * _basisXY) +
-                     Square(XZ_x + valX * _basisXZ) +
-                     Square(XZ_y + valY * _basisXZ) +
-                     Square(XZ_z + valZ * _basisXZ) +
-                     Square(YZ_x + valX * _basisYZ) +
-                     Square(YZ_y + valY * _basisYZ) +
-                     Square(YZ_z + valZ * _basisYZ)
-                     ) ) - this->l2_weight * this->l2_penalisation[label];
-            } // label
-            ++node;
-         } // x
-      } // y
-   } // z
-   reg_getDeformationFromDisplacement(this->controlPointImage);
-   reg_getDeformationFromDisplacement(this->input_transformation);
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-/*****************************************************/
-void reg_discrete_init::Run()
-{
-   NR_VERBOSE("Control point number = " << this->node_number);
-   NR_VERBOSE("Discretised radius (voxel) = " << this->discrete_radius);
-   NR_VERBOSE("Discretised step (voxel) = " << this->discrete_increment);
-   NR_VERBOSE("Discretised label number = " << this->label_nD_num);
-   // Store the initial transformation parametrisation
-   memcpy(this->input_transformation->data, this->controlPointImage->data,
-          this->node_number*this->image_dim*sizeof(float));
-   // Compute the discretised data term values
-   this->GetDiscretisedMeasure();
-   // Add the l2 regularisation
-   //this->AddL2Penalisation(1.e-10f);
-   // Initialise the regularise with the measure only
-   memcpy(this->regularised_measures,
-          this->discretised_measures,
-          this->label_nD_num*this->node_number*sizeof(float));
-   // Extract the best label
-   this->GetOptimalLabel();
-   // Update the control point positions
-   this->UpdateTransformation();
-   // Run the regularisation optimisation
-   for(int i=0; i< this->reg_max_it; ++i){
-      this->GetRegularisedMeasure();
-      this->GetOptimalLabel();
-      this->UpdateTransformation();
-      NR_VERBOSE("Regularisation " << i+1 << "/" << this->reg_max_it <<
-                 " - BE=" << reg_spline_approxBendingEnergy(this->controlPointImage) <<
-                 " - [" << 100.f*(float)this->regularisation_convergence/this->node_number << "%]");
-      //if(this->regularisation_convergence<this->node_number/100)
-      //   break;
-   }
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-/*****************************************************/
diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h
deleted file mode 100644
index d4ae28cf..00000000
--- a/reg-lib/cpu/_reg_discrete_init.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * @file _reg_mrf.h
- * @author Benoit Presles
- * @author Mattias Heinrich
- * @date 01/01/2016
- * @brief reg_mrf class for discrete optimisation
- *
- * Copyright (c) 2016-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_measure.h"
-#include "Optimiser.hpp"
-#include "_reg_localTrans_regul.h"
-#include "_reg_localTrans.h"
-#include "_reg_ReadWriteImage.h"
-#include <cmath>
-#include <queue>
-#include <algorithm>
-
-/** @brief Given two input images a discretisation of the measure of similarity is performed.
- * The returned transformation is a balanced between the best discretised measure and a regularisation
- * term (bending energy).
- */
-class reg_discrete_init
-{
-public:
-   /// @brief Constructor
-   reg_discrete_init(reg_measure *_measure,
-                     nifti_image *_referenceImage,
-                     nifti_image *_controlPointImage,
-                     int discrete_radius,
-                     int _discrete_increment,
-                     int _reg_max_it,
-                     float _reg_weight);
-   /// @brief Destructor
-   ~reg_discrete_init();
-   void Run();
-
-private:
-   void GetDiscretisedMeasure();
-   void AddL2Penalisation(float);
-   void GetRegularisedMeasure();
-   void GetOptimalLabel();
-   void UpdateTransformation();
-
-   reg_measure *measure; ///< Measure of similarity object to use for the data term
-   nifti_image* referenceImage; ///< Reference image in which the transformation is parametrised
-   nifti_image* controlPointImage; ///< Control point image that contains the transformation to optimise
-   int discrete_radius; ///< Radius of the discretised grid
-   int discrete_increment; ///< Increment step size in the discretised grid
-   float regularisation_weight; ///< Weight given to the regularisation
-
-   int image_dim; ///< Dimension of the reference image
-   size_t node_number; ///< Number of nodes in the tree
-
-   float **discrete_values_mm; ///< All discretised values in millimetre
-
-   int label_1D_num; ///< Number of discretised values per axis
-   int label_nD_num; ///< Total number of discretised values
-
-   nifti_image *input_transformation;
-   float *discretised_measures; ///< All discretised measures of similarity
-   float *regularised_measures; ///< All combined measures
-   int* optimal_label_index; ///< Optimimal label index for each node
-   int regularisation_convergence;
-   int reg_max_it; ///< Maximal number of iteration in the regularisation strategy
-
-   float l2_weight;
-   float* l2_penalisation;
-};
-/********************************************************************************************************/
diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp
deleted file mode 100644
index 04cb40bd..00000000
--- a/reg-lib/cpu/_reg_femTrans.cpp
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- *  _reg_femTransformation_gpu.h
- *
- *
- *  Created by Marc Modat on 02/11/2011.
- *  Copyright (c) 2011-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_femTrans.h"
-
-float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4)
-{
-   mat33 matrix;
-   matrix.m[0][0]=node2[0]-node1[0];
-   matrix.m[0][1]=node2[1]-node1[1];
-   matrix.m[0][2]=node2[2]-node1[2];
-
-   matrix.m[1][0]=node3[0]-node2[0];
-   matrix.m[1][1]=node3[1]-node2[1];
-   matrix.m[1][2]=node3[2]-node2[2];
-
-   matrix.m[2][0]=node4[0]-node3[0];
-   matrix.m[2][1]=node4[1]-node3[1];
-   matrix.m[2][2]=node4[2]-node3[2];
-   return fabs(nifti_mat33_determ(matrix))/6.f;
-}
-
-void reg_fem_InitialiseTransformation(int *elementNodes,
-                                      unsigned elementNumber,
-                                      float *nodePositions,
-                                      nifti_image *deformationFieldImage,
-                                      unsigned *closestNodes,
-                                      float *femInterpolationWeight
-                                     )
-{
-   // Set all the closest nodes and coefficients to zero
-   for (int i = 0; i < 4 * NiftiImage::calcVoxelNumber(deformationFieldImage, 3); ++i)
-   {
-      closestNodes[i]=0;
-      femInterpolationWeight[i]=0.f;
-   }
-
-   mat44 *realToVoxel;
-   if(deformationFieldImage->sform_code>0)
-   {
-      realToVoxel=&(deformationFieldImage->sto_ijk);
-   }
-   else realToVoxel=&(deformationFieldImage->qto_ijk);
-
-   int currentNodes[4];
-   float nodeRealPosition[3];
-   float nodeVoxelIndices[4][3];
-   float voxel[3];
-   float fullVolume;
-   float subVolume[4];
-
-   for(unsigned element=0; element<elementNumber; ++element)
-   {
-      // Compute the element bounding box in voxel coordinate
-      for(unsigned i=0; i<4; ++i)
-      {
-         currentNodes[i]=elementNodes[4*element+i];
-         nodeRealPosition[0]=nodePositions[3*currentNodes[i]];
-         nodeRealPosition[1]=nodePositions[3*currentNodes[i]+1];
-         nodeRealPosition[2]=nodePositions[3*currentNodes[i]+2];
-         reg_mat44_mul(realToVoxel, nodeRealPosition, nodeVoxelIndices[i]);
-      }
-
-      int xRange[2]= {Ceil(nodeVoxelIndices[0][0]), Floor(nodeVoxelIndices[0][0])};
-      int yRange[2]= {Ceil(nodeVoxelIndices[0][1]), Floor(nodeVoxelIndices[0][1])};
-      int zRange[2]= {Ceil(nodeVoxelIndices[0][2]), Floor(nodeVoxelIndices[0][2])};
-      for(unsigned i=1; i<4; ++i)
-      {
-         xRange[0]=xRange[0]<Ceil(nodeVoxelIndices[i][0])?xRange[0]:Ceil(nodeVoxelIndices[i][0]);
-         xRange[1]=xRange[1]>Floor(nodeVoxelIndices[i][0])?xRange[1]:Floor(nodeVoxelIndices[i][0]);
-         yRange[0]=yRange[0]<Ceil(nodeVoxelIndices[i][1])?yRange[0]:Ceil(nodeVoxelIndices[i][1]);
-         yRange[1]=yRange[1]>Floor(nodeVoxelIndices[i][1])?yRange[1]:Floor(nodeVoxelIndices[i][1]);
-         zRange[0]=zRange[0]<Ceil(nodeVoxelIndices[i][2])?zRange[0]:Ceil(nodeVoxelIndices[i][2]);
-         zRange[1]=zRange[1]>Floor(nodeVoxelIndices[i][2])?zRange[1]:Floor(nodeVoxelIndices[i][2]);
-      }
-
-      xRange[0]=xRange[0]<0?0:xRange[0];
-      yRange[0]=yRange[0]<0?0:yRange[0];
-      zRange[0]=zRange[0]<0?0:zRange[0];
-      xRange[1]=xRange[1]<deformationFieldImage->nx?xRange[1]:deformationFieldImage->nx-1;
-      yRange[1]=yRange[1]<deformationFieldImage->ny?yRange[1]:deformationFieldImage->ny-1;
-      zRange[1]=zRange[1]<deformationFieldImage->nz?zRange[1]:deformationFieldImage->nz-1;
-
-      fullVolume=reg_getTetrahedronVolume(nodeVoxelIndices[0],
-                                          nodeVoxelIndices[1],
-                                          nodeVoxelIndices[2],
-                                          nodeVoxelIndices[3]);
-      for(int z=zRange[0]; z<=zRange[1]; ++z)
-      {
-         voxel[2]=z;
-         for(int y=yRange[0]; y<=yRange[1]; ++y)
-         {
-            voxel[1]=y;
-            for(int x=xRange[0]; x<=xRange[1]; ++x)
-            {
-               voxel[0]=x;
-               subVolume[0]=reg_getTetrahedronVolume(voxel,
-                                                     nodeVoxelIndices[1],
-                                                     nodeVoxelIndices[2],
-                                                     nodeVoxelIndices[3]);
-
-               subVolume[1]=reg_getTetrahedronVolume(nodeVoxelIndices[0],
-                                                     voxel,
-                                                     nodeVoxelIndices[2],
-                                                     nodeVoxelIndices[3]);
-
-               subVolume[2]=reg_getTetrahedronVolume(nodeVoxelIndices[0],
-                                                     nodeVoxelIndices[1],
-                                                     voxel,
-                                                     nodeVoxelIndices[3]);
-
-               subVolume[3]=reg_getTetrahedronVolume(nodeVoxelIndices[0],
-                                                     nodeVoxelIndices[1],
-                                                     nodeVoxelIndices[2],
-                                                     voxel);
-
-               // Check if the voxel is in the element
-               if(fabs(fullVolume/(subVolume[0]+subVolume[1]+subVolume[2]+subVolume[3])-1.f)<.000001f)
-               {
-                  int index=(z*deformationFieldImage->ny+y)*deformationFieldImage->nx+x;
-                  for(unsigned i=0; i<4; ++i)
-                  {
-                     closestNodes[4*index+i]=currentNodes[i];
-                     femInterpolationWeight[4*index+i]=subVolume[i]/fullVolume;
-                  }
-               }// voxel in element check
-            }//x bounding box
-         }//y bounding box
-      }//z bounding box
-   }// element loop
-   return;
-}// reg_fem_InitialiseTransformation
-
-
-void reg_fem_getDeformationField(float *nodePositions,
-                                 nifti_image *deformationFieldImage,
-                                 unsigned *closestNodes,
-                                 float *femInterpolationWeight
-                                )
-{
-#ifdef _WIN32
-    long voxel;
-    const long voxelNumber = (long)NiftiImage::calcVoxelNumber(deformationFieldImage, 3);
-#else
-    size_t voxel;
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationFieldImage, 3);
-#endif
-
-   float *defPtrX = static_cast<float *>(deformationFieldImage->data);
-   float *defPtrY = &defPtrX[voxelNumber];
-   float *defPtrZ = &defPtrY[voxelNumber];
-
-   float coefficients[4];
-   float positionA[3], positionB[3], positionC[3], positionD[3];
-#ifdef _OPENMP
-   #pragma omp parallel for default(none) \
-   shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \
-          nodePositions, closestNodes, voxelNumber) \
-   private(coefficients, positionA, positionB, positionC, positionD)
-#endif
-   for(voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      coefficients[0]=femInterpolationWeight[4*voxel];
-      coefficients[1]=femInterpolationWeight[4*voxel+1];
-      coefficients[2]= femInterpolationWeight[4*voxel+2];
-      coefficients[3]=femInterpolationWeight[4*voxel+3];
-
-      positionA[0]=nodePositions[3*closestNodes[4*voxel]];
-      positionA[1]=nodePositions[3*closestNodes[4*voxel]+1];
-      positionA[2]=nodePositions[3*closestNodes[4*voxel]+2];
-
-      positionB[0]=nodePositions[3*closestNodes[4*voxel+1]];
-      positionB[1]=nodePositions[3*closestNodes[4*voxel+1]+1];
-      positionB[2]=nodePositions[3*closestNodes[4*voxel+1]+2];
-
-      positionC[0]=nodePositions[3*closestNodes[4*voxel+2]];
-      positionC[1]=nodePositions[3*closestNodes[4*voxel+2]+1];
-      positionC[2]=nodePositions[3*closestNodes[4*voxel+2]+2];
-
-      positionD[0]=nodePositions[3*closestNodes[4*voxel+3]];
-      positionD[1]=nodePositions[3*closestNodes[4*voxel+3]+1];
-      positionD[2]=nodePositions[3*closestNodes[4*voxel+3]+2];
-
-      defPtrX[voxel]=positionA[0]*coefficients[0] +
-                     positionB[0]*coefficients[1] +
-                     positionC[0]*coefficients[2] +
-                     positionD[0]*coefficients[3];
-
-      defPtrY[voxel]=positionA[1]*coefficients[0] +
-                     positionB[1]*coefficients[1] +
-                     positionC[1]*coefficients[2] +
-                     positionD[1]*coefficients[3];
-
-      defPtrZ[voxel]=positionA[2]*coefficients[0] +
-                     positionB[2]*coefficients[1] +
-                     positionC[2]*coefficients[2] +
-                     positionD[2]*coefficients[3];
-   }
-   return;
-}// reg_fem_getDeformationField
-
-void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
-                                 unsigned *closestNodes,
-                                 float *femInterpolationWeight,
-                                 unsigned nodeNumber,
-                                 float *femBasedGradient)
-{
-   const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelBasedGradient, 3);
-   float *voxGradPtrX = static_cast<float *>(voxelBasedGradient->data);
-   float *voxGradPtrY = &voxGradPtrX[voxelNumber];
-   float *voxGradPtrZ = &voxGradPtrY[voxelNumber];
-
-   for(unsigned node=0; node<3*nodeNumber; ++node)
-      femBasedGradient[node]=0.f;
-
-   unsigned currentNodes[4];
-   float currentGradient[3];
-   float coefficients[4];
-   for(size_t voxel=0; voxel<voxelNumber; ++voxel)
-   {
-      currentNodes[0]=closestNodes[4*voxel];
-      currentNodes[1]=closestNodes[4*voxel+1];
-      currentNodes[2]=closestNodes[4*voxel+2];
-      currentNodes[3]=closestNodes[4*voxel+3];
-
-      coefficients[0]=femInterpolationWeight[4*voxel];
-      coefficients[1]=femInterpolationWeight[4*voxel+1];
-      coefficients[2]=femInterpolationWeight[4*voxel+2];
-      coefficients[3]=femInterpolationWeight[4*voxel+3];
-
-      currentGradient[0]=voxGradPtrX[voxel];
-      currentGradient[1]=voxGradPtrY[voxel];
-      currentGradient[2]=voxGradPtrZ[voxel];
-
-      for(unsigned i=0; i<4; ++i)
-      {
-         femBasedGradient[3*currentNodes[i]  ] += currentGradient[0]*coefficients[i];
-         femBasedGradient[3*currentNodes[i]+1] += currentGradient[1]*coefficients[i];
-         femBasedGradient[3*currentNodes[i]+2] += currentGradient[2]*coefficients[i];
-      }
-   }// voxel
-
-   return;
-}// reg_fem_voxelToNodeGradient
diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h
deleted file mode 100644
index d9ee6861..00000000
--- a/reg-lib/cpu/_reg_femTrans.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * @file _reg_femTransformation_gpu.h
- * @author Marc Modat
- * @date 02/11/2011
- * @brief Functions built to interface between NiftyReg and NiftySim
- * It basically allows to populate a dense deformation
- *
- *  Created by Marc Modat on 02/11/2011.
- *  Copyright (c) 2011-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_tools.h"
-
-/** @brief Initialise multiples arrays to populate a dense deformation
- * field from a FEM parametrisation
- * @param elementNodes Arrays that contains the nodes use to define each element.
- * @param nodePositions Arrays that contains the position in mm of
- * every node
- * @param deformationFieldImage Deformation field image, at this stage it
- * is only used to define the space of the transformation
- * @param closestNodes This array will contain for every voxel the closest
- * nodes to be used for interpolation
- * @param femInterpolationWeight This arrayt will contain for every voxel
- * the weight associated with the closest node.
- */
-void reg_fem_InitialiseTransformation(int *elementNodes,
-                                      unsigned elementNumber,
-                                      float *nodePositions,
-                                      nifti_image *deformationFieldImage,
-                                      unsigned *closestNodes,
-                                      float *femInterpolationWeight);
-
-/** @brief A dense deformation field is filled using interpolation
- * from a coarse mesh
- * @param nodePositions Array that contains the position of every node
- * @param deformationFieldImage Deformation field image that will be
- * filled
- * @param closestNodes Array that contains for every voxel the closest
- * nodes from the mesh
- * @param femInterpolationWeight Array that contains for every voxel,
- * the weight associated with the closest nodes.
- */
-void reg_fem_getDeformationField(float *nodePositions,
-                                 nifti_image *deformationFieldImage,
-                                 unsigned *closestNodes,
-                                 float *femInterpolationWeight);
-
-/** @brief Convert a dense gradient image into a mesh based gradient image
- * @param voxelBasedGradient Image that contains the gradient image
- * @param closestNodes Array that contains the closest nodes associated
- * with every voxel
- * @param femInterpolationWeight Array that contains for every voxel the
- * weight associated with the closest nodes
- * @param nodeNumber Scalar that contains the total number of node in the mesh
- * @param femBasedGradient Array that contains the gradient values at
- * every node.
- */
-void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient,
-                                 unsigned *closestNodes,
-                                 float *femInterpolationWeight,
-                                 unsigned nodeNumber,
-                                 float *femBasedGradient);
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index bb4b7a54..c3e17149 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -3942,120 +3942,3 @@ void compute_BCH_update(nifti_image *img1, // current field
     }
 }
 /* *************************************************************** */
-template <class DataType>
-void extractLine(int start, int end, int increment, const DataType *image, DataType *values) {
-    size_t index = 0;
-    for (int i = start; i < end; i += increment) values[index++] = image[i];
-}
-/* *************************************************************** */
-template <class DataType>
-void restoreLine(int start, int end, int increment, DataType *image, const DataType *values) {
-    size_t index = 0;
-    for (int i = start; i < end; i += increment) image[i] = values[index++];
-}
-/* *************************************************************** */
-template <class DataType>
-void intensitiesToSplineCoefficients(DataType *values, int number) {
-    // Border are set to zero
-    DataType pole = sqrt(3.0) - 2.0;
-    DataType currentPole = pole;
-    DataType currentOpposite = pow(pole, (DataType)(2.0 * (DataType)number - 1.0));
-    DataType sum = 0;
-    for (int i = 1; i < number; i++) {
-        sum += (currentPole - currentOpposite) * values[i];
-        currentPole *= pole;
-        currentOpposite /= pole;
-    }
-    values[0] = (DataType)((values[0] - pole * pole * (values[0] + sum)) / (1.0 - pow(pole, (DataType)(2.0 * (double)number + 2.0))));
-
-    //other values forward
-    for (int i = 1; i < number; i++) {
-        values[i] += pole * values[i - 1];
-    }
-
-    DataType ipp = (DataType)(1.0 - pole);
-    ipp *= ipp;
-
-    //last value
-    values[number - 1] = ipp * values[number - 1];
-
-    //other values backward
-    for (int i = number - 2; 0 <= i; i--) {
-        values[i] = pole * values[i + 1] + ipp * values[i];
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_getDeconvolvedCoefficents(nifti_image *img) {
-    double *coeff = (double*)malloc(img->nvox * sizeof(double));
-    DataType *imgPtr = static_cast<DataType*>(img->data);
-    for (size_t i = 0; i < img->nvox; ++i)
-        coeff[i] = imgPtr[i];
-    for (int u = 0; u < img->nu; ++u) {
-        for (int t = 0; t < img->nt; ++t) {
-            double *coeffPtr = &coeff[(u * img->nt + t) * img->nx * img->ny * img->nz];
-
-            // Along the X axis
-            int number = img->nx;
-            double *values = new double[number];
-            int increment = 1;
-            for (int i = 0; i < img->ny * img->nz; i++) {
-                int start = i * img->nx;
-                int end = start + img->nx;
-                extractLine<double>(start, end, increment, coeffPtr, values);
-                intensitiesToSplineCoefficients<double>(values, number);
-                restoreLine<double>(start, end, increment, coeffPtr, values);
-            }
-            delete[] values;
-            values = nullptr;
-
-            // Along the Y axis
-            number = img->ny;
-            values = new double[number];
-            increment = img->nx;
-            for (int i = 0; i < img->nx * img->nz; i++) {
-                int start = i + i / img->nx * img->nx * (img->ny - 1);
-                int end = start + img->nx * img->ny;
-                extractLine<double>(start, end, increment, coeffPtr, values);
-                intensitiesToSplineCoefficients<double>(values, number);
-                restoreLine<double>(start, end, increment, coeffPtr, values);
-            }
-            delete[] values;
-            values = nullptr;
-
-            // Along the Z axis
-            if (img->nz > 1) {
-                number = img->nz;
-                values = new double[number];
-                increment = img->nx * img->ny;
-                for (int i = 0; i < img->nx * img->ny; i++) {
-                    int start = i;
-                    int end = start + img->nx * img->ny * img->nz;
-                    extractLine<double>(start, end, increment, coeffPtr, values);
-                    intensitiesToSplineCoefficients<double>(values, number);
-                    restoreLine<double>(start, end, increment, coeffPtr, values);
-                }
-                delete[] values;
-                values = nullptr;
-            }
-        }//t
-    }//u
-
-    for (size_t i = 0; i < img->nvox; ++i)
-        imgPtr[i] = static_cast<DataType>(coeff[i]);
-    free(coeff);
-}
-/* *************************************************************** */
-void reg_spline_getDeconvolvedCoefficents(nifti_image *img) {
-    switch (img->datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        reg_spline_getDeconvolvedCoefficents<float>(img);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_spline_getDeconvolvedCoefficents<double>(img);
-        break;
-    default:
-        NR_FATAL_ERROR("Only implemented for single or double precision images");
-    }
-}
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index 5263d9c4..c2a06195 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -193,9 +193,3 @@ void compute_BCH_update(nifti_image *img1,
                         nifti_image *img2,
                         int type);
 /* *************************************************************** */
-/** @brief This function deconvolve an image by a cubic B-Spline kernel
- * in order to get cubic B-Spline coefficient
- * @param img Image to be deconvolved
- */
-void reg_spline_getDeconvolvedCoefficents(nifti_image *img);
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 44feb651..722add4e 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -671,955 +671,140 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) {
 }
 /* *************************************************************** */
 template <class DataType>
-double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage,
-                                      const nifti_image *splineControlPoint) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
-    int a, b, x, y, index, xPre, yPre;
-    DataType basis;
-
-    const DataType gridVoxelSpacing[2] = {
-        splineControlPoint->dx / referenceImage->dx,
-        splineControlPoint->dy / referenceImage->dy
-    };
-
-    double constraintValue = 0;
-    double currentValue;
-
-    // Create pointers to the spline coefficients
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    DataType splineCoeffX, splineCoeffY;
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[4], basisY[4];
-    DataType firstX[4], firstY[4];
-
-    mat33 matrix, r;
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-
-
-    for (y = 0; y < referenceImage->ny; ++y) {
-        yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
-        basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
-        if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DataType>(basis, basisY, firstY);
-
-        for (x = 0; x < referenceImage->nx; ++x) {
-            xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
-            basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
-            if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DataType>(basis, basisX, firstX);
-
-            memset(&matrix, 0, sizeof(mat33));
-
-            for (b = 0; b < 4; b++) {
-                for (a = 0; a < 4; a++) {
-                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
-                    splineCoeffX = splinePtrX[index];
-                    splineCoeffY = splinePtrY[index];
-
-                    matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffX);
-                    matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffX);
-
-                    matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffY);
-                    matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffY);
-                }
-            }
-            // Convert from mm to voxel
-            matrix = nifti_mat33_mul(reorientation, matrix);
-            // Removing the rotation component
-            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(r, matrix);
-            // Convert to displacement
-            --matrix.m[0][0];
-            --matrix.m[1][1];
-
-            currentValue = 0;
-            for (b = 0; b < 2; b++) {
-                for (a = 0; a < 2; a++) {
-                    currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
-                }
-            }
-            constraintValue += currentValue;
-        }
-    }
-    return constraintValue / static_cast<double>(voxelNumber * 2);
-}
-/* *************************************************************** */
-template <class DataType>
-double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage,
-                                      const nifti_image *splineControlPoint) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
-    int a, b, c, x, y, z, index, xPre, yPre, zPre;
-    DataType basis;
-
-    const DataType gridVoxelSpacing[3] = {
-        splineControlPoint->dx / referenceImage->dx,
-        splineControlPoint->dy / referenceImage->dy,
-        splineControlPoint->dz / referenceImage->dz
-    };
-
-    double constraintValue = 0;
-    double currentValue;
-
-    // Create pointers to the spline coefficients
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
-    DataType splineCoeffX, splineCoeffY, splineCoeffZ;
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[4], basisY[4], basisZ[4];
-    DataType firstX[4], firstY[4], firstZ[4];
-
-    mat33 matrix, r;
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-
-    for (z = 0; z < referenceImage->nz; ++z) {
-        zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
-        basis = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
-        if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DataType>(basis, basisZ, firstZ);
-
-        for (y = 0; y < referenceImage->ny; ++y) {
-            yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
-            basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
-            if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DataType>(basis, basisY, firstY);
-
-            for (x = 0; x < referenceImage->nx; ++x) {
-                xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
-                basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
-                if (basis < 0) basis = 0; //rounding error
-                get_BSplineBasisValues<DataType>(basis, basisX, firstX);
-
-                memset(&matrix, 0, sizeof(mat33));
-
-                for (c = 0; c < 4; c++) {
-                    for (b = 0; b < 4; b++) {
-                        for (a = 0; a < 4; a++) {
-                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
-                            splineCoeffX = splinePtrX[index];
-                            splineCoeffY = splinePtrY[index];
-                            splineCoeffZ = splinePtrZ[index];
-
-                            matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX);
-                            matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX);
-                            matrix.m[2][0] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX);
-
-                            matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY);
-                            matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY);
-                            matrix.m[2][1] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY);
-
-                            matrix.m[0][2] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ);
-                            matrix.m[1][2] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ);
-                            matrix.m[2][2] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ);
-                        }
-                    }
-                }
-                // Convert from mm to voxel
-                matrix = nifti_mat33_mul(reorientation, matrix);
-                // Removing the rotation component
-                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(r, matrix);
-                // Convert to displacement
-                --matrix.m[0][0];
-                --matrix.m[1][1];
-                --matrix.m[2][2];
-
-                currentValue = 0;
-                for (b = 0; b < 3; b++) {
-                    for (a = 0; a < 3; a++) {
-                        currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
-                    }
-                }
-                constraintValue += currentValue;
-            }
-        }
-    }
-    return constraintValue / static_cast<double>(voxelNumber * 3);
-}
-/* *************************************************************** */
-double reg_spline_linearEnergy(const nifti_image *referenceImage,
-                               const nifti_image *splineControlPoint) {
-    if (splineControlPoint->nz > 1) {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_spline_linearEnergyValue3D<float>(referenceImage, splineControlPoint);
-        case NIFTI_TYPE_FLOAT64:
-            return reg_spline_linearEnergyValue3D<double>(referenceImage, splineControlPoint);
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-            return 0;
-        }
-    } else {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_spline_linearEnergyValue2D<float>(referenceImage, splineControlPoint);
-        case NIFTI_TYPE_FLOAT64:
-            return reg_spline_linearEnergyValue2D<double>(referenceImage, splineControlPoint);
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-            return 0;
-        }
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage,
-                                       const nifti_image *splineControlPoint,
-                                       nifti_image *gradientImage,
-                                       float weight) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2);
-    int a, b, x, y, index, xPre, yPre;
-    DataType basis;
-
-    const DataType gridVoxelSpacing[2] = {
-        splineControlPoint->dx / referenceImage->dx,
-        splineControlPoint->dy / referenceImage->dy
-    };
-
-    // Create pointers to the spline coefficients
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    DataType splineCoeffX, splineCoeffY;
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[4], basisY[4];
-    DataType firstX[4], firstY[4];
-
-    mat33 matrix, r;
-
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
-
-    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
-    DataType gradValues[2];
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 invReorientation = nifti_mat33_inverse(reorientation);
-
-    // Loop over all voxels
-    for (y = 0; y < referenceImage->ny; ++y) {
-        yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
-        basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
-        if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DataType>(basis, basisY, firstY);
-
-        for (x = 0; x < referenceImage->nx; ++x) {
-            xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
-            basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
-            if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DataType>(basis, basisX, firstX);
-
-            memset(&matrix, 0, sizeof(mat33));
-
-            for (b = 0; b < 4; b++) {
-                for (a = 0; a < 4; a++) {
-                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
-                    splineCoeffX = splinePtrX[index];
-                    splineCoeffY = splinePtrY[index];
-
-                    matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffX);
-                    matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffX);
-
-                    matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * splineCoeffY);
-                    matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * splineCoeffY);
-                }
-            }
-            // Convert from mm to voxel
-            matrix = nifti_mat33_mul(reorientation, matrix);
-            // Removing the rotation component
-            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(r, matrix);
-            // Convert to displacement
-            --matrix.m[0][0];
-            --matrix.m[1][1];
-            for (b = 0; b < 4; b++) {
-                for (a = 0; a < 4; a++) {
-                    index = (yPre + b) * splineControlPoint->nx + xPre + a;
-                    gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b];
-                    gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b];
-                    gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                                          invReorientation.m[0][1] * gradValues[1]);
-                    gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                                          invReorientation.m[1][1] * gradValues[1]);
-                } // a
-            } // b
-        }
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage,
-                                       const nifti_image *splineControlPoint,
-                                       nifti_image *gradientImage,
-                                       float weight) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
-    int a, b, c, x, y, z, index, xPre, yPre, zPre;
-    DataType basis;
-
-    const DataType gridVoxelSpacing[3] = {
-        splineControlPoint->dx / referenceImage->dx,
-        splineControlPoint->dy / referenceImage->dy,
-        splineControlPoint->dz / referenceImage->dz
-    };
-
-    // Create pointers to the spline coefficients
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
-    DataType splineCoeffX, splineCoeffY, splineCoeffZ;
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[4], basisY[4], basisZ[4];
-    DataType firstX[4], firstY[4], firstZ[4];
-
-    mat33 matrix, r;
-
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
-    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
-
-    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
-    DataType gradValues[3];
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
-    mat33 invReorientation = nifti_mat33_inverse(reorientation);
-
-    // Loop over all voxels
-    for (z = 0; z < referenceImage->nz; ++z) {
-        zPre = static_cast<int>(static_cast<DataType>(z) / gridVoxelSpacing[2]);
-        basis = static_cast<DataType>(z) / gridVoxelSpacing[2] - static_cast<DataType>(zPre);
-        if (basis < 0) basis = 0; //rounding error
-        get_BSplineBasisValues<DataType>(basis, basisZ, firstZ);
-
-        for (y = 0; y < referenceImage->ny; ++y) {
-            yPre = static_cast<int>(static_cast<DataType>(y) / gridVoxelSpacing[1]);
-            basis = static_cast<DataType>(y) / gridVoxelSpacing[1] - static_cast<DataType>(yPre);
-            if (basis < 0) basis = 0; //rounding error
-            get_BSplineBasisValues<DataType>(basis, basisY, firstY);
-
-            for (x = 0; x < referenceImage->nx; ++x) {
-                xPre = static_cast<int>(static_cast<DataType>(x) / gridVoxelSpacing[0]);
-                basis = static_cast<DataType>(x) / gridVoxelSpacing[0] - static_cast<DataType>(xPre);
-                if (basis < 0) basis = 0; //rounding error
-                get_BSplineBasisValues<DataType>(basis, basisX, firstX);
-
-                memset(&matrix, 0, sizeof(mat33));
-
-                for (c = 0; c < 4; c++) {
-                    for (b = 0; b < 4; b++) {
-                        for (a = 0; a < 4; a++) {
-                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
-                            splineCoeffX = splinePtrX[index];
-                            splineCoeffY = splinePtrY[index];
-                            splineCoeffZ = splinePtrZ[index];
-
-                            matrix.m[0][0] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX);
-                            matrix.m[1][0] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX);
-                            matrix.m[2][0] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX);
-
-                            matrix.m[0][1] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY);
-                            matrix.m[1][1] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY);
-                            matrix.m[2][1] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY);
-
-                            matrix.m[0][2] += static_cast<float>(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ);
-                            matrix.m[1][2] += static_cast<float>(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ);
-                            matrix.m[2][2] += static_cast<float>(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ);
-                        }
-                    }
-                }
-                // Convert from mm to voxel
-                matrix = nifti_mat33_mul(reorientation, matrix);
-                // Removing the rotation component
-                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(r, matrix);
-                // Convert to displacement
-                --matrix.m[0][0];
-                --matrix.m[1][1];
-                --matrix.m[2][2];
-                for (c = 0; c < 4; c++) {
-                    for (b = 0; b < 4; b++) {
-                        for (a = 0; a < 4; a++) {
-                            index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a;
-                            gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c];
-                            gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c];
-                            gradValues[2] = -2.f * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c];
-                            gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                                                  invReorientation.m[0][1] * gradValues[1] +
-                                                                  invReorientation.m[0][2] * gradValues[2]);
-                            gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                                                  invReorientation.m[1][1] * gradValues[1] +
-                                                                  invReorientation.m[1][2] * gradValues[2]);
-                            gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
-                                                                  invReorientation.m[2][1] * gradValues[1] +
-                                                                  invReorientation.m[2][2] * gradValues[2]);
-                        } // a
-                    } // b
-                } // c
-            } // x
-        } // y
-    } // z
-}
-/* *************************************************************** */
-void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
-                                     const nifti_image *splineControlPoint,
-                                     nifti_image *gradientImage,
-                                     float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype)
-        NR_FATAL_ERROR("Input images are expected to have the same datatype");
-
-    if (splineControlPoint->nz > 1) {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_spline_linearEnergyGradient3D<float>(referenceImage, splineControlPoint, gradientImage, weight);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_spline_linearEnergyGradient3D<double>(referenceImage, splineControlPoint, gradientImage, weight);
-            break;
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-        }
-    } else {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_spline_linearEnergyGradient2D<float>(referenceImage, splineControlPoint, gradientImage, weight);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_spline_linearEnergyGradient2D<double>(referenceImage, splineControlPoint, gradientImage, weight);
-            break;
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-        }
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
-                                             nifti_image *gradientImage,
-                                             float weight) {
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
-
-    // Create the pointers
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[9], basisY[9];
-    set_first_order_basis_values(basisX, basisY);
-
-    // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
-    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
-
-    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
-
-    for (int y = 1; y < splineControlPoint->ny - 1; y++) {
-        for (int x = 1; x < splineControlPoint->nx - 1; x++) {
-            mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 };
-
-            int i = 0;
-            for (int b = -1; b < 2; b++) {
-                for (int a = -1; a < 2; a++) {
-                    const int index = (y + b) * splineControlPoint->nx + x + a;
-                    const DataType splineCoeffX = splinePtrX[index];
-                    const DataType splineCoeffY = splinePtrY[index];
-
-                    matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
-                    matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
-
-                    matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
-                    matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
-                    ++i;
-                } // a
-            } // b
-            // Convert from mm to voxel
-            matrix = nifti_mat33_mul(reorientation, matrix);
-            // Removing the rotation component
-            const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(r, matrix);
-            // Convert to displacement
-            matrix.m[0][0]--; matrix.m[1][1]--;
-            i = 8;
-            for (int b = -1; b < 2; b++) {
-                for (int a = -1; a < 2; a++) {
-                    const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] };
-                    const int index = (y + b) * splineControlPoint->nx + x + a;
-
-                    gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                                          invReorientation.m[0][1] * gradValues[1]);
-                    gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                                          invReorientation.m[1][1] * gradValues[1]);
-                    --i;
-                } // a
-            } // b
-        } // x
-    } // y
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
-                                             nifti_image *gradientImage,
-                                             float weight) {
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-
-    // Create the pointers
-    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    const DataType *splinePtrY = &splinePtrX[nodeNumber];
-    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
-    DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
-    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
-
-    // Store the basis values since they are constant as the value is approximated
-    // at the control point positions only
-    DataType basisX[27], basisY[27], basisZ[27];
-    set_first_order_basis_values(basisX, basisY, basisZ);
-
-    // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
-    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
-
-    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
-
-    for (int z = 1; z < splineControlPoint->nz - 1; z++) {
-        for (int y = 1; y < splineControlPoint->ny - 1; y++) {
-            for (int x = 1; x < splineControlPoint->nx - 1; x++) {
-                mat33 matrix{};
-                int i = 0;
-                for (int c = -1; c < 2; c++) {
-                    for (int b = -1; b < 2; b++) {
-                        for (int a = -1; a < 2; a++) {
-                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            const DataType splineCoeffX = splinePtrX[index];
-                            const DataType splineCoeffY = splinePtrY[index];
-                            const DataType splineCoeffZ = splinePtrZ[index];
-
-                            matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
-                            matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
-                            matrix.m[2][0] += static_cast<float>(basisZ[i] * splineCoeffX);
-
-                            matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
-                            matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
-                            matrix.m[2][1] += static_cast<float>(basisZ[i] * splineCoeffY);
-
-                            matrix.m[0][2] += static_cast<float>(basisX[i] * splineCoeffZ);
-                            matrix.m[1][2] += static_cast<float>(basisY[i] * splineCoeffZ);
-                            matrix.m[2][2] += static_cast<float>(basisZ[i] * splineCoeffZ);
-                            ++i;
-                        }
-                    }
-                }
-                // Convert from mm to voxel
-                matrix = nifti_mat33_mul(reorientation, matrix);
-                // Removing the rotation component
-                const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(r, matrix);
-                // Convert to displacement
-                matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--;
-                i = 26;
-                for (int c = -1; c < 2; c++) {
-                    for (int b = -1; b < 2; b++) {
-                        for (int a = -1; a < 2; a++) {
-                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
-                            const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i],
-                                                          -2.f * matrix.m[1][1] * basisY[i],
-                                                          -2.f * matrix.m[2][2] * basisZ[i] };
-
-                            gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
-                                                                  invReorientation.m[0][1] * gradValues[1] +
-                                                                  invReorientation.m[0][2] * gradValues[2]);
-                            gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
-                                                                  invReorientation.m[1][1] * gradValues[1] +
-                                                                  invReorientation.m[1][2] * gradValues[2]);
-                            gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
-                                                                  invReorientation.m[2][1] * gradValues[1] +
-                                                                  invReorientation.m[2][2] * gradValues[2]);
-                            --i;
-                        } // a
-                    } // b
-                } // c
-            } // x
-        } // y
-    } // z
-}
-/* *************************************************************** */
-void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint,
-                                           nifti_image *gradientImage,
-                                           float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype)
-        NR_FATAL_ERROR("Input images are expected to have the same datatype");
-
-    if (splineControlPoint->nz > 1) {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_spline_approxLinearEnergyGradient3D<float>(splineControlPoint, gradientImage, weight);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_spline_approxLinearEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
-            break;
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-        }
-    } else {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_spline_approxLinearEnergyGradient2D<float>(splineControlPoint, gradientImage, weight);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_spline_approxLinearEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
-            break;
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-        }
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
-    int a, b, x, y, X, Y, index;
-    DataType basis[2] = {1, 0};
-    DataType first[2] = {-1, 1};
-
-    double constraintValue = 0;
-    double currentValue;
-
-    // Create pointers to the deformation field
-    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    const DataType *defPtrY = &defPtrX[voxelNumber];
-    DataType defX, defY;
-
-    mat33 matrix, r;
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (deformationField->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-
-    for (y = 0; y < deformationField->ny; ++y) {
-        Y = (y != deformationField->ny - 1) ? y : y - 1;
-        for (x = 0; x < deformationField->nx; ++x) {
-            X = (x != deformationField->nx - 1) ? x : x - 1;
-
-            memset(&matrix, 0, sizeof(mat33));
-
-            for (b = 0; b < 2; b++) {
-                for (a = 0; a < 2; a++) {
-                    index = (Y + b) * deformationField->nx + X + a;
-                    defX = defPtrX[index];
-                    defY = defPtrY[index];
-
-                    matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * defX);
-                    matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * defX);
-                    matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * defY);
-                    matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * defY);
-                }
-            }
-            // Convert from mm to voxel
-            matrix = nifti_mat33_mul(reorientation, matrix);
-            // Removing the rotation component
-            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-            matrix = nifti_mat33_mul(r, matrix);
-            // Convert to displacement
-            --matrix.m[0][0];
-            --matrix.m[1][1];
-
-            currentValue = 0;
-            for (b = 0; b < 2; b++) {
-                for (a = 0; a < 2; a++) {
-                    currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
-                }
-            }
-            constraintValue += currentValue;
-        }
-    }
-    return constraintValue / static_cast<double>(deformationField->nvox);
-}
-/* *************************************************************** */
-template <class DataType>
-double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-    int a, b, c, x, y, z, X, Y, Z, index;
-    DataType basis[2] = {1, 0};
-    DataType first[2] = {-1, 1};
-
-    double constraintValue = 0;
-    double currentValue;
-
-    // Create pointers to the deformation field
-    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    const DataType *defPtrY = &defPtrX[voxelNumber];
-    const DataType *defPtrZ = &defPtrY[voxelNumber];
-    DataType defX, defY, defZ;
-
-    mat33 matrix, r;
-
-    // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (deformationField->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-
-    for (z = 0; z < deformationField->nz; ++z) {
-        Z = (z != deformationField->nz - 1) ? z : z - 1;
-        for (y = 0; y < deformationField->ny; ++y) {
-            Y = (y != deformationField->ny - 1) ? y : y - 1;
-            for (x = 0; x < deformationField->nx; ++x) {
-                X = (x != deformationField->nx - 1) ? x : x - 1;
-
-                memset(&matrix, 0, sizeof(mat33));
-
-                for (c = 0; c < 2; c++) {
-                    for (b = 0; b < 2; b++) {
-                        for (a = 0; a < 2; a++) {
-                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
-                            defX = defPtrX[index];
-                            defY = defPtrY[index];
-                            defZ = defPtrZ[index];
-
-                            matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * basis[c] * defX);
-                            matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * basis[c] * defX);
-                            matrix.m[2][0] += static_cast<float>(basis[a] * basis[b] * first[c] * defX);
-
-                            matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * basis[c] * defY);
-                            matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * basis[c] * defY);
-                            matrix.m[2][1] += static_cast<float>(basis[a] * basis[b] * first[c] * defY);
-
-                            matrix.m[0][2] += static_cast<float>(first[a] * basis[b] * basis[c] * defZ);
-                            matrix.m[1][2] += static_cast<float>(basis[a] * first[b] * basis[c] * defZ);
-                            matrix.m[2][2] += static_cast<float>(basis[a] * basis[b] * first[c] * defZ);
-                        }
-                    }
-                }
-                // Convert from mm to voxel
-                matrix = nifti_mat33_mul(reorientation, matrix);
-                // Removing the rotation component
-                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
-                matrix = nifti_mat33_mul(r, matrix);
-                // Convert to displacement
-                --matrix.m[0][0];
-                --matrix.m[1][1];
-                --matrix.m[2][2];
-
-                currentValue = 0;
-                for (b = 0; b < 3; b++) {
-                    for (a = 0; a < 3; a++) {
-                        currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part
-                    }
-                }
-                constraintValue += currentValue;
-            }
-        }
-    }
-    return constraintValue / static_cast<double>(deformationField->nvox);
-}
-/* *************************************************************** */
-double reg_defField_linearEnergy(const nifti_image *deformationField) {
-    if (deformationField->nz > 1) {
-        switch (deformationField->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_defField_linearEnergyValue3D<float>(deformationField);
-        case NIFTI_TYPE_FLOAT64:
-            return reg_defField_linearEnergyValue3D<double>(deformationField);
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-            return 0;
-        }
-    } else {
-        switch (deformationField->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_defField_linearEnergyValue2D<float>(deformationField);
-        case NIFTI_TYPE_FLOAT64:
-            return reg_defField_linearEnergyValue2D<double>(deformationField);
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-            return 0;
-        }
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField,
-                                         nifti_image *gradientImage,
-                                         float weight) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2);
-    int a, b, x, y, X, Y, index;
-    DataType basis[2] = {1, 0};
-    DataType first[2] = {-1, 1};
-
-    // Create pointers to the deformation field
-    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    const DataType *defPtrY = &defPtrX[voxelNumber];
-    DataType defX, defY;
-
-    mat33 matrix, r;
+void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint,
+                                             nifti_image *gradientImage,
+                                             float weight) {
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2);
 
+    // Create the pointers
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[voxelNumber];
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
 
-    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
-    DataType gradValues[2];
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DataType basisX[9], basisY[9];
+    set_first_order_basis_values(basisX, basisY);
 
     // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (deformationField->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-    mat33 invReorientation = nifti_mat33_inverse(reorientation);
+    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
-    for (y = 0; y < deformationField->ny; ++y) {
-        Y = (y != deformationField->ny - 1) ? y : y - 1;
-        for (x = 0; x < deformationField->nx; ++x) {
-            X = (x != deformationField->nx - 1) ? x : x - 1;
+    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
 
-            memset(&matrix, 0, sizeof(mat33));
+    for (int y = 1; y < splineControlPoint->ny - 1; y++) {
+        for (int x = 1; x < splineControlPoint->nx - 1; x++) {
+            mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 };
 
-            for (b = 0; b < 2; b++) {
-                for (a = 0; a < 2; a++) {
-                    index = (Y + b) * deformationField->nx + X + a;
-                    defX = defPtrX[index];
-                    defY = defPtrY[index];
-
-                    matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * defX);
-                    matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * defX);
-                    matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * defY);
-                    matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * defY);
-                }
-            }
+            int i = 0;
+            for (int b = -1; b < 2; b++) {
+                for (int a = -1; a < 2; a++) {
+                    const int index = (y + b) * splineControlPoint->nx + x + a;
+                    const DataType splineCoeffX = splinePtrX[index];
+                    const DataType splineCoeffY = splinePtrY[index];
+
+                    matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                    matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
+
+                    matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                    matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
+                    ++i;
+                } // a
+            } // b
             // Convert from mm to voxel
             matrix = nifti_mat33_mul(reorientation, matrix);
             // Removing the rotation component
-            r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+            const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
             matrix = nifti_mat33_mul(r, matrix);
             // Convert to displacement
-            --matrix.m[0][0];
-            --matrix.m[1][1];
+            matrix.m[0][0]--; matrix.m[1][1]--;
+            i = 8;
+            for (int b = -1; b < 2; b++) {
+                for (int a = -1; a < 2; a++) {
+                    const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] };
+                    const int index = (y + b) * splineControlPoint->nx + x + a;
 
-            for (b = 0; b < 2; b++) {
-                for (a = 0; a < 2; a++) {
-                    index = (Y + b) * deformationField->nx + X + a;
-                    gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b];
-                    gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b];
                     gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
                                                           invReorientation.m[0][1] * gradValues[1]);
                     gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] +
                                                           invReorientation.m[1][1] * gradValues[1]);
+                    --i;
                 } // a
             } // b
-        }
-    }
+        } // x
+    } // y
 }
 /* *************************************************************** */
 template <class DataType>
-void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
-                                         nifti_image *gradientImage,
-                                         float weight) {
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-    int a, b, c, x, y, z, X, Y, Z, index;
-    DataType basis[2] = {1, 0};
-    DataType first[2] = {-1, 1};
-
-    // Create pointers to the deformation field
-    const DataType *defPtrX = static_cast<DataType*>(deformationField->data);
-    const DataType *defPtrY = &defPtrX[voxelNumber];
-    const DataType *defPtrZ = &defPtrY[voxelNumber];
-    DataType defX, defY, defZ;
-
-    mat33 matrix, r;
+void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint,
+                                             nifti_image *gradientImage,
+                                             float weight) {
+    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
 
+    // Create the pointers
+    const DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
+    const DataType *splinePtrY = &splinePtrX[nodeNumber];
+    const DataType *splinePtrZ = &splinePtrY[nodeNumber];
     DataType *gradientXPtr = static_cast<DataType*>(gradientImage->data);
-    DataType *gradientYPtr = &gradientXPtr[voxelNumber];
-    DataType *gradientZPtr = &gradientYPtr[voxelNumber];
+    DataType *gradientYPtr = &gradientXPtr[nodeNumber];
+    DataType *gradientZPtr = &gradientYPtr[nodeNumber];
 
-    DataType approxRatio = weight / static_cast<DataType>(voxelNumber);
-    DataType gradValues[3];
+    // Store the basis values since they are constant as the value is approximated
+    // at the control point positions only
+    DataType basisX[27], basisY[27], basisZ[27];
+    set_first_order_basis_values(basisX, basisY, basisZ);
 
     // Matrix to use to convert the gradient from mm to voxel
-    mat33 reorientation;
-    if (deformationField->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk);
-    mat33 invReorientation = nifti_mat33_inverse(reorientation);
-
-    for (z = 0; z < deformationField->nz; ++z) {
-        Z = (z != deformationField->nz - 1) ? z : z - 1;
-        for (y = 0; y < deformationField->ny; ++y) {
-            Y = (y != deformationField->ny - 1) ? y : y - 1;
-            for (x = 0; x < deformationField->nx; ++x) {
-                X = (x != deformationField->nx - 1) ? x : x - 1;
+    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
-                memset(&matrix, 0, sizeof(mat33));
+    const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
+
+    for (int z = 1; z < splineControlPoint->nz - 1; z++) {
+        for (int y = 1; y < splineControlPoint->ny - 1; y++) {
+            for (int x = 1; x < splineControlPoint->nx - 1; x++) {
+                mat33 matrix{};
+                int i = 0;
+                for (int c = -1; c < 2; c++) {
+                    for (int b = -1; b < 2; b++) {
+                        for (int a = -1; a < 2; a++) {
+                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            const DataType splineCoeffX = splinePtrX[index];
+                            const DataType splineCoeffY = splinePtrY[index];
+                            const DataType splineCoeffZ = splinePtrZ[index];
+
+                            matrix.m[0][0] += static_cast<float>(basisX[i] * splineCoeffX);
+                            matrix.m[1][0] += static_cast<float>(basisY[i] * splineCoeffX);
+                            matrix.m[2][0] += static_cast<float>(basisZ[i] * splineCoeffX);
+
+                            matrix.m[0][1] += static_cast<float>(basisX[i] * splineCoeffY);
+                            matrix.m[1][1] += static_cast<float>(basisY[i] * splineCoeffY);
+                            matrix.m[2][1] += static_cast<float>(basisZ[i] * splineCoeffY);
 
-                for (c = 0; c < 2; c++) {
-                    for (b = 0; b < 2; b++) {
-                        for (a = 0; a < 2; a++) {
-                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
-                            defX = defPtrX[index];
-                            defY = defPtrY[index];
-                            defZ = defPtrZ[index];
-
-                            matrix.m[0][0] += static_cast<float>(first[a] * basis[b] * basis[c] * defX);
-                            matrix.m[1][0] += static_cast<float>(basis[a] * first[b] * basis[c] * defX);
-                            matrix.m[2][0] += static_cast<float>(basis[a] * basis[b] * first[c] * defX);
-
-                            matrix.m[0][1] += static_cast<float>(first[a] * basis[b] * basis[c] * defY);
-                            matrix.m[1][1] += static_cast<float>(basis[a] * first[b] * basis[c] * defY);
-                            matrix.m[2][1] += static_cast<float>(basis[a] * basis[b] * first[c] * defY);
-
-                            matrix.m[0][2] += static_cast<float>(first[a] * basis[b] * basis[c] * defZ);
-                            matrix.m[1][2] += static_cast<float>(basis[a] * first[b] * basis[c] * defZ);
-                            matrix.m[2][2] += static_cast<float>(basis[a] * basis[b] * first[c] * defZ);
+                            matrix.m[0][2] += static_cast<float>(basisX[i] * splineCoeffZ);
+                            matrix.m[1][2] += static_cast<float>(basisY[i] * splineCoeffZ);
+                            matrix.m[2][2] += static_cast<float>(basisZ[i] * splineCoeffZ);
+                            ++i;
                         }
                     }
                 }
                 // Convert from mm to voxel
                 matrix = nifti_mat33_mul(reorientation, matrix);
                 // Removing the rotation component
-                r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
+                const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix));
                 matrix = nifti_mat33_mul(r, matrix);
                 // Convert to displacement
-                --matrix.m[0][0];
-                --matrix.m[1][1];
-                --matrix.m[2][2];
-                for (c = 0; c < 2; c++) {
-                    for (b = 0; b < 2; b++) {
-                        for (a = 0; a < 2; a++) {
-                            index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a;
-                            gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c];
-                            gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c];
-                            gradValues[2] = -2.f * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c];
+                matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--;
+                i = 26;
+                for (int c = -1; c < 2; c++) {
+                    for (int b = -1; b < 2; b++) {
+                        for (int a = -1; a < 2; a++) {
+                            const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a;
+                            const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i],
+                                                          -2.f * matrix.m[1][1] * basisY[i],
+                                                          -2.f * matrix.m[2][2] * basisZ[i] };
+
                             gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] +
                                                                   invReorientation.m[0][1] * gradValues[1] +
                                                                   invReorientation.m[0][2] * gradValues[2]);
@@ -1629,35 +814,39 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField,
                             gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] +
                                                                   invReorientation.m[2][1] * gradValues[1] +
                                                                   invReorientation.m[2][2] * gradValues[2]);
+                            --i;
                         } // a
                     } // b
                 } // c
-            }
-        }
-    }
+            } // x
+        } // y
+    } // z
 }
 /* *************************************************************** */
-void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
-                                       nifti_image *gradientImage,
-                                       float weight) {
-    if (deformationField->nz > 1) {
-        switch (deformationField->datatype) {
+void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint,
+                                           nifti_image *gradientImage,
+                                           float weight) {
+    if (splineControlPoint->datatype != gradientImage->datatype)
+        NR_FATAL_ERROR("Input images are expected to have the same datatype");
+
+    if (splineControlPoint->nz > 1) {
+        switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_defField_linearEnergyGradient3D<float>(deformationField, gradientImage, weight);
+            reg_spline_approxLinearEnergyGradient3D<float>(splineControlPoint, gradientImage, weight);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_defField_linearEnergyGradient3D<double>(deformationField, gradientImage, weight);
+            reg_spline_approxLinearEnergyGradient3D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
             NR_FATAL_ERROR("Only implemented for single or double precision images");
         }
     } else {
-        switch (deformationField->datatype) {
+        switch (splineControlPoint->datatype) {
         case NIFTI_TYPE_FLOAT32:
-            reg_defField_linearEnergyGradient2D<float>(deformationField, gradientImage, weight);
+            reg_spline_approxLinearEnergyGradient2D<float>(splineControlPoint, gradientImage, weight);
             break;
         case NIFTI_TYPE_FLOAT64:
-            reg_defField_linearEnergyGradient2D<double>(deformationField, gradientImage, weight);
+            reg_spline_approxLinearEnergyGradient2D<double>(splineControlPoint, gradientImage, weight);
             break;
         default:
             NR_FATAL_ERROR("Only implemented for single or double precision images");
@@ -1912,223 +1101,3 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
     }
 }
 /* *************************************************************** */
-template <class DataType>
-double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) {
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    int x, y, z, index;
-
-    // Create pointers to the spline coefficients
-    reg_getDisplacementFromDeformation(splineControlPoint);
-    DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    DataType *splinePtrY = &splinePtrX[nodeNumber];
-    DataType *splinePtrZ = &splinePtrY[nodeNumber];
-
-    DataType centralCP[3], neigbCP[3];
-
-    double constraintValue = 0;
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-    private(index, x, y, centralCP, neigbCP) \
-    shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \
-    reduction(+:constraintValue)
-#endif // _OPENMP
-    for (z = 0; z < splineControlPoint->nz; ++z) {
-        index = z * splineControlPoint->nx * splineControlPoint->ny;
-        for (y = 0; y < splineControlPoint->ny; ++y) {
-            for (x = 0; x < splineControlPoint->nx; ++x) {
-                centralCP[0] = splinePtrX[index];
-                centralCP[1] = splinePtrY[index];
-                centralCP[2] = splinePtrZ[index];
-
-                if (x > 0) {
-                    neigbCP[0] = splinePtrX[index - 1];
-                    neigbCP[1] = splinePtrY[index - 1];
-                    neigbCP[2] = splinePtrZ[index - 1];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
-                }
-                if (x < splineControlPoint->nx - 1) {
-                    neigbCP[0] = splinePtrX[index + 1];
-                    neigbCP[1] = splinePtrY[index + 1];
-                    neigbCP[2] = splinePtrZ[index + 1];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx;
-                }
-
-                if (y > 0) {
-                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx];
-                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx];
-                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
-                }
-                if (y < splineControlPoint->ny - 1) {
-                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx];
-                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx];
-                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy;
-                }
-
-                if (z > 0) {
-                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
-                }
-                if (z < splineControlPoint->nz - 1) {
-                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny];
-                    constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) +
-                                        Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz;
-                }
-                index++;
-            } // x
-        } // y
-    } // z
-    reg_getDeformationFromDisplacement(splineControlPoint);
-    return constraintValue / nodeNumber;
-}
-/* *************************************************************** */
-double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) {
-    if (splineControlPoint->nz > 1) {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            return reg_spline_approxLinearPairwise3D<float>(splineControlPoint);
-        case NIFTI_TYPE_FLOAT64:
-            return reg_spline_approxLinearPairwise3D<double>(splineControlPoint);
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-            return 0;
-        }
-    } else {
-        NR_FATAL_ERROR("Not implemented in 2D yet");
-        return 0;
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint,
-                                               nifti_image *gradientImage,
-                                               float weight) {
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
-    int x, y, z, index;
-
-    // Create pointers to the spline coefficients
-    reg_getDisplacementFromDeformation(splineControlPoint);
-    DataType *splinePtrX = static_cast<DataType*>(splineControlPoint->data);
-    DataType *splinePtrY = &splinePtrX[nodeNumber];
-    DataType *splinePtrZ = &splinePtrY[nodeNumber];
-
-    // Pointers to the gradient image
-    DataType *gradPtrX = static_cast<DataType*>(gradientImage->data);
-    DataType *gradPtrY = &gradPtrX[nodeNumber];
-    DataType *gradPtrZ = &gradPtrY[nodeNumber];
-
-    DataType centralCP[3], neigbCP[3];
-
-    double grad_values[3];
-
-    DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-    private(index, x, y, centralCP, neigbCP, grad_values) \
-    shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \
-    gradPtrX, gradPtrY, gradPtrZ)
-#endif // _OPENMP
-    for (z = 0; z < splineControlPoint->nz; ++z) {
-        index = z * splineControlPoint->nx * splineControlPoint->ny;
-        for (y = 0; y < splineControlPoint->ny; ++y) {
-            for (x = 0; x < splineControlPoint->nx; ++x) {
-                centralCP[0] = splinePtrX[index];
-                centralCP[1] = splinePtrY[index];
-                centralCP[2] = splinePtrZ[index];
-                grad_values[0] = 0;
-                grad_values[1] = 0;
-                grad_values[2] = 0;
-
-                if (x > 0) {
-                    neigbCP[0] = splinePtrX[index - 1];
-                    neigbCP[1] = splinePtrY[index - 1];
-                    neigbCP[2] = splinePtrZ[index - 1];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx;
-                }
-                if (x < splineControlPoint->nx - 1) {
-                    neigbCP[0] = splinePtrX[index + 1];
-                    neigbCP[1] = splinePtrY[index + 1];
-                    neigbCP[2] = splinePtrZ[index + 1];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx;
-                }
-
-                if (y > 0) {
-                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx];
-                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx];
-                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy;
-                }
-                if (y < splineControlPoint->ny - 1) {
-                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx];
-                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx];
-                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy;
-                }
-
-                if (z > 0) {
-                    neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz;
-                }
-                if (z < splineControlPoint->nz - 1) {
-                    neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny];
-                    neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny];
-                    grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz;
-                    grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz;
-                    grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz;
-                }
-                gradPtrX[index] += approxRatio * static_cast<DataType>(grad_values[0]);
-                gradPtrY[index] += approxRatio * static_cast<DataType>(grad_values[1]);
-                gradPtrZ[index] += approxRatio * static_cast<DataType>(grad_values[2]);
-
-                index++;
-            } // x
-        } // y
-    } // z
-    reg_getDeformationFromDisplacement(splineControlPoint);
-}
-/* *************************************************************** */
-void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint,
-                                             nifti_image *gradientImage,
-                                             float weight) {
-    if (splineControlPoint->datatype != gradientImage->datatype)
-        NR_FATAL_ERROR("Input images are expected to have the same datatype");
-
-    if (splineControlPoint->nz > 1) {
-        switch (splineControlPoint->datatype) {
-        case NIFTI_TYPE_FLOAT32:
-            reg_spline_approxLinearPairwiseGradient3D<float>(splineControlPoint, gradientImage, weight);
-            break;
-        case NIFTI_TYPE_FLOAT64:
-            reg_spline_approxLinearPairwiseGradient3D<double>(splineControlPoint, gradientImage, weight);
-            break;
-        default:
-            NR_FATAL_ERROR("Only implemented for single or double precision images");
-        }
-    } else {
-        NR_FATAL_ERROR("Not implemented for 2D images yet");
-    }
-}
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h
index 864bc9c7..f945f19d 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.h
+++ b/reg-lib/cpu/_reg_localTrans_regul.h
@@ -38,14 +38,6 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage,
                                             nifti_image *gradientImage,
                                             float weight);
 /* *************************************************************** */
-/** @brief Compute and return the linear elastic energy terms.
- * @param controlPointGridImage Image that contains the transformation
- * parametrisation
- * @return The normalised linear energy. Normalised by the number of voxel
- */
-double reg_spline_linearEnergy(const nifti_image *referenceImage,
-                               const nifti_image *controlPointGridImage);
-/* *************************************************************** */
 /** @brief Compute and return the linear elastic energy terms approximated
  * at the control point positions only.
  * @param controlPointGridImage Image that contains the transformation
@@ -54,22 +46,6 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage,
  */
 double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage);
 /* *************************************************************** */
-/** @brief Compute the gradient of the linear elastic energy terms
- * computed at all voxel position.
- * @param referenceImage Image that contains the dense space
- * @param controlPointGridImage Image that contains the transformation
- * parametrisation
- * @param gradientImage Image of similar size than the control point
- * grid and that contains the gradient of the objective function.
- * The gradient of the linear elasticity terms are added to the
- * current values
- * @param weight Weight to apply to the term of the penalty
- */
-void reg_spline_linearEnergyGradient(const nifti_image *referenceImage,
-                                     const nifti_image *controlPointGridImage,
-                                     nifti_image *gradientImage,
-                                     float weight);
-/* *************************************************************** */
 /** @brief Compute the gradient of the linear elastic energy terms
  * approximated at the control point positions only.
  * @param controlPointGridImage Image that contains the transformation
@@ -84,20 +60,6 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridIm
                                            nifti_image *gradientImage,
                                            float weight);
 /* *************************************************************** */
-/** @brief Compute and return the linear elastic energy terms.
- * @param deformationField Image that contains the transformation.
- * @return The normalised linear energy. Normalised by the number of voxel
- */
-double reg_defField_linearEnergy(const nifti_image *deformationField);
-/* *************************************************************** */
-/** @brief Compute and return the linear elastic energy terms.
- * @param deformationField Image that contains the transformation.
- * @param weight Weight to apply to the term of the penalty
- */
-void reg_defField_linearEnergyGradient(const nifti_image *deformationField,
-                                       nifti_image *gradientImage,
-                                       float weight);
-/* *************************************************************** */
 /** @Brief Compute the distance between two set of points given a
  * transformation
  * @param controlPointGridImage Image that contains the transformation
@@ -129,14 +91,3 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage
                                             float *landmarkFloating,
                                             float weight);
 /* *************************************************************** */
-/** @brief Compute and return a pairwise energy.
- * @param controlPointGridImage Image that contains the transformation
- * parametrisation
- * @return The normalised pairwise energy. Normalised by the number of voxel
- */
-void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage,
-                                             nifti_image *gradientImage,
-                                             float weight);
-/* *************************************************************** */
-double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage);
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp
index 45d6a8b7..19ed9210 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/_reg_maths.cpp
@@ -2,147 +2,10 @@
 
 #define mat(i,j,dim) mat[i*dim+j]
 
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_LUdecomposition(T *mat,
-                         size_t dim,
-                         size_t *index)
-{
-    T *vv = (T *)malloc(dim * sizeof(T));
-    size_t i, j, k, imax = 0;
-
-    for (i = 0; i < dim; ++i)
-    {
-        T big = 0.f;
-        T temp;
-        for (j = 0; j < dim; ++j)
-            if ((temp = fabs(mat(i, j, dim)))>big)
-                big = temp;
-        if (big == 0.f)
-            NR_FATAL_ERROR("Singular matrix");
-        vv[i] = 1.0 / big;
-    }
-    for (j = 0; j < dim; ++j)
-    {
-        for (i = 0; i < j; ++i)
-        {
-            T sum = mat(i, j, dim);
-            for (k = 0; k < i; k++) sum -= mat(i, k, dim)*mat(k, j, dim);
-            mat(i, j, dim) = sum;
-        }
-        T big = 0.f;
-        T dum;
-        for (i = j; i < dim; ++i)
-        {
-            T sum = mat(i, j, dim);
-            for (k = 0; k < j; ++k) sum -= mat(i, k, dim)*mat(k, j, dim);
-            mat(i, j, dim) = sum;
-            if ((dum = vv[i] * fabs(sum)) >= big)
-            {
-                big = dum;
-                imax = i;
-            }
-        }
-        if (j != imax)
-        {
-            for (k = 0; k < dim; ++k)
-            {
-                dum = mat(imax, k, dim);
-                mat(imax, k, dim) = mat(j, k, dim);
-                mat(j, k, dim) = dum;
-            }
-            vv[imax] = vv[j];
-        }
-        index[j] = imax;
-        if (mat(j, j, dim) == 0) mat(j, j, dim) = 1.0e-20;
-        if (j != dim - 1)
-        {
-            dum = 1.0 / mat(j, j, dim);
-            for (i = j + 1; i < dim; ++i) mat(i, j, dim) *= dum;
-        }
-    }
-    free(vv);
-    return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_matrixInvertMultiply(T *mat,
-                              size_t dim,
-                              size_t *index,
-                              T *vec)
-{
-    // Perform the LU decomposition if necessary
-    if (index == nullptr)
-        reg_LUdecomposition(mat, dim, index);
-
-    int ii = 0;
-    for (size_t i = 0; i < dim; ++i)
-    {
-        int ip = index[i];
-        T sum = vec[ip];
-        vec[ip] = vec[i];
-        if (ii != 0)
-        {
-            for (int j = ii - 1; j < (int)i; ++j)
-                sum -= mat(i, j, dim)*vec[j];
-        }
-        else if (sum != 0)
-            ii = i + 1;
-        vec[i] = sum;
-    }
-    for (int i = (int)dim - 1; i > -1; --i)
-    {
-        T sum = vec[i];
-        for (int j = i + 1; j < (int)dim; ++j)
-            sum -= mat(i, j, dim)*vec[j];
-        vec[i] = sum / mat(i, i, dim);
-    }
-}
-template void reg_matrixInvertMultiply<float>(float *, size_t, size_t *, float *);
-template void reg_matrixInvertMultiply<double>(double *, size_t, size_t *, double *);
-/* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-void reg_matrixMultiply(T *mat1,
-                        T *mat2,
-                        size_t *dim1,
-                        size_t *dim2,
-                        T * &res)
-{
-    // First check that the dimension are appropriate
-    if (dim1[1] != dim2[0])
-        NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(dim1[0]) + " " +
-                       std::to_string(dim1[1]) + "] [" + std::to_string(dim2[0]) + " " + std::to_string(dim2[1]) + "]");
-    size_t resDim[2] = {dim1[0], dim2[1]};
-    // Allocate the result matrix
-    if (res != nullptr)
-        free(res);
-    res = (T *)calloc(resDim[0] * resDim[1], sizeof(T));
-    // Multiply both matrices
-    for (size_t j = 0; j < resDim[1]; ++j)
-    {
-        for (size_t i = 0; i < resDim[0]; ++i)
-        {
-            double sum = 0;
-            for (size_t k = 0; k < dim1[1]; ++k)
-            {
-                sum += mat1[k * dim1[0] + i] * mat2[j * dim2[0] + k];
-            }
-            res[j * resDim[0] + i] = sum;
-        } // i
-    } // j
-}
-template void reg_matrixMultiply<float>(float *, float *, size_t *, size_t *, float * &);
-template void reg_matrixMultiply<double>(double *, double *, size_t *, size_t *, double * &);
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
 /* *************************************************************** */
 template<class T>
 T* reg_matrix1DAllocate(size_t arraySize) {
-    T* res = (T*)malloc(arraySize*sizeof(T));
+    T* res = (T*)malloc(arraySize * sizeof(T));
     return res;
 }
 template bool* reg_matrix1DAllocate<bool>(size_t arraySize);
@@ -150,15 +13,6 @@ template float* reg_matrix1DAllocate<float>(size_t arraySize);
 template double* reg_matrix1DAllocate<double>(size_t arraySize);
 /* *************************************************************** */
 template<class T>
-T* reg_matrix1DAllocateAndInitToZero(size_t arraySize) {
-    T* res = (T*)calloc(arraySize, sizeof(T));
-    return res;
-}
-template bool* reg_matrix1DAllocateAndInitToZero<bool>(size_t arraySize);
-template float* reg_matrix1DAllocateAndInitToZero<float>(size_t arraySize);
-template double* reg_matrix1DAllocateAndInitToZero<double>(size_t arraySize);
-/* *************************************************************** */
-template<class T>
 void reg_matrix1DDeallocate(T* mat) {
     free(mat);
 }
@@ -169,9 +23,9 @@ template void reg_matrix1DDeallocate<double>(double* mat);
 template<class T>
 T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY) {
     T** res;
-    res = (T**)malloc(arraySizeX*sizeof(T*));
+    res = (T**)malloc(arraySizeX * sizeof(T*));
     for (size_t i = 0; i < arraySizeX; i++) {
-        res[i] = (T*)malloc(arraySizeY*sizeof(T));
+        res[i] = (T*)malloc(arraySizeY * sizeof(T));
     }
     return res;
 }
@@ -179,18 +33,6 @@ template float** reg_matrix2DAllocate<float>(size_t arraySizeX, size_t arraySize
 template double** reg_matrix2DAllocate<double>(size_t arraySizeX, size_t arraySizeY);
 /* *************************************************************** */
 template<class T>
-T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY) {
-    T** res;
-    res = (T**)calloc(arraySizeX, sizeof(T*));
-    for (size_t i = 0; i < arraySizeX; i++) {
-        res[i] = (T*)calloc(arraySizeY, sizeof(T));
-    }
-    return res;
-}
-template float** reg_matrix2DAllocateAndInitToZero<float>(size_t arraySizeX, size_t arraySizeY);
-template double** reg_matrix2DAllocateAndInitToZero<double>(size_t arraySizeX, size_t arraySizeY);
-/* *************************************************************** */
-template<class T>
 void reg_matrix2DDeallocate(size_t arraySizeX, T** mat) {
     for (size_t i = 0; i < arraySizeX; i++) {
         free(mat[i]);
@@ -203,9 +45,9 @@ template void reg_matrix2DDeallocate<double>(size_t arraySizeX, double** mat);
 template<class T>
 T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY) {
     T** res;
-    res = (T**)malloc(arraySizeY*sizeof(T*));
+    res = (T**)malloc(arraySizeY * sizeof(T*));
     for (size_t i = 0; i < arraySizeY; i++) {
-        res[i] = (T*)malloc(arraySizeX*sizeof(T));
+        res[i] = (T*)malloc(arraySizeX * sizeof(T));
     }
     for (size_t i = 0; i < arraySizeX; i++) {
         for (size_t j = 0; j < arraySizeY; j++) {
@@ -227,7 +69,7 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
 
         size_t nbElement = mat1Y;
         double resTemp = 0;
-        T** res = reg_matrix2DAllocate<T>(mat1X,mat2Y);
+        T** res = reg_matrix2DAllocate<T>(mat1X, mat2Y);
 
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2Y; j++) {
@@ -239,9 +81,8 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
             }
         }
         //Output
-       return res;
-    }
-    else {
+        return res;
+    } else {
         // First check that the dimension are appropriate
         if (mat1Y != mat2Y)
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
@@ -249,7 +90,7 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
 
         size_t nbElement = mat1Y;
         double resTemp = 0;
-        T** res = reg_matrix2DAllocate<T>(mat1X,mat2X);
+        T** res = reg_matrix2DAllocate<T>(mat1X, mat2X);
 
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2X; j++) {
@@ -287,8 +128,7 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
                 resT[i][j] = static_cast<T>(resTemp);
             }
         }
-    }
-    else {
+    } else {
         // First check that the dimension are appropriate
         if (mat1Y != mat2Y)
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
@@ -314,12 +154,9 @@ template void reg_matrix2DMultiply<double>(double** mat1, size_t mat1X, size_t m
 // Multiply a matrix with a vector - we assume correct dimension
 template<class T>
 T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) {
-
     T* res = reg_matrix1DAllocate<T>(m);
-    double resTemp;
-
     for (size_t i = 0; i < m; i++) {
-        resTemp = 0;
+        double resTemp = 0;
         for (size_t k = 0; k < n; k++) {
             resTemp += static_cast<double>(mat[i][k]) * static_cast<double>(vect[k]);
         }
@@ -332,11 +169,8 @@ template double* reg_matrix2DVectorMultiply<double>(double** mat, size_t m, size
 /* *************************************************************** */
 template<class T>
 void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) {
-
-    double resTemp = 0;
-
     for (size_t i = 0; i < m; i++) {
-        resTemp = 0;
+        double resTemp = 0;
         for (size_t k = 0; k < n; k++) {
             resTemp += static_cast<double>(mat[i][k]) * static_cast<double>(vect[k]);
         }
@@ -346,33 +180,24 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) {
 template void reg_matrix2DVectorMultiply<float>(float** mat, size_t m, size_t n, float* vect, float* res);
 template void reg_matrix2DVectorMultiply<double>(double** mat, size_t m, size_t n, double* vect, double* res);
 /* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
 // Heap sort
-void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum)
-{
+void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) {
     float *array = &array_tmp[-1];
     int *index = &index_tmp[-1];
     int l = (blockNum >> 1) + 1;
     int ir = blockNum;
     float val;
     int iVal;
-    for (;;)
-    {
-        if (l > 1)
-        {
+    for (;;) {
+        if (l > 1) {
             val = array[--l];
             iVal = index[l];
-        }
-        else
-        {
+        } else {
             val = array[ir];
             iVal = index[ir];
             array[ir] = array[1];
             index[ir] = index[1];
-            if (--ir == 1)
-            {
+            if (--ir == 1) {
                 array[1] = val;
                 index[1] = iVal;
                 break;
@@ -380,18 +205,15 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum)
         }
         int i = l;
         int j = l + l;
-        while (j <= ir)
-        {
+        while (j <= ir) {
             if (j < ir && array[j] < array[j + 1])
                 j++;
-            if (val < array[j])
-            {
+            if (val < array[j]) {
                 array[i] = array[j];
                 index[i] = index[j];
                 i = j;
                 j <<= 1;
-            }
-            else
+            } else
                 break;
         }
         array[i] = val;
@@ -401,41 +223,32 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum)
 /* *************************************************************** */
 // Heap sort
 template<class DataType>
-void reg_heapSort(DataType *array_tmp, int blockNum)
-{
+void reg_heapSort(DataType *array_tmp, int blockNum) {
     DataType *array = &array_tmp[-1];
     int l = (blockNum >> 1) + 1;
     int ir = blockNum;
     DataType val;
-    for (;;)
-    {
-        if (l > 1)
-        {
+    for (;;) {
+        if (l > 1) {
             val = array[--l];
-        }
-        else
-        {
+        } else {
             val = array[ir];
             array[ir] = array[1];
-            if (--ir == 1)
-            {
+            if (--ir == 1) {
                 array[1] = val;
                 break;
             }
         }
         int i = l;
         int j = l + l;
-        while (j <= ir)
-        {
+        while (j <= ir) {
             if (j < ir && array[j] < array[j + 1])
                 j++;
-            if (val < array[j])
-            {
+            if (val < array[j]) {
                 array[i] = array[j];
                 i = j;
                 j <<= 1;
-            }
-            else
+            } else
                 break;
         }
         array[i] = val;
@@ -444,13 +257,9 @@ void reg_heapSort(DataType *array_tmp, int blockNum)
 template void reg_heapSort<float>(float *array_tmp, int blockNum);
 template void reg_heapSort<double>(double *array_tmp, int blockNum);
 /* *************************************************************** */
-/* *************************************************************** */
-bool operator==(mat44 A, mat44 B)
-{
-    for (unsigned i = 0; i < 4; ++i)
-    {
-        for (unsigned j = 0; j < 4; ++j)
-        {
+bool operator==(mat44 A, mat44 B) {
+    for (unsigned i = 0; i < 4; ++i) {
+        for (unsigned j = 0; j < 4; ++j) {
             if (A.m[i][j] != B.m[i][j])
                 return false;
         }
@@ -458,12 +267,9 @@ bool operator==(mat44 A, mat44 B)
     return true;
 }
 /* *************************************************************** */
-bool operator!=(mat44 A, mat44 B)
-{
-    for (unsigned i = 0; i < 4; ++i)
-    {
-        for (unsigned j = 0; j < 4; ++j)
-        {
+bool operator!=(mat44 A, mat44 B) {
+    for (unsigned i = 0; i < 4; ++i) {
+        for (unsigned j = 0; j < 4; ++j) {
             if (A.m[i][j] != B.m[i][j])
                 return true;
         }
@@ -471,10 +277,8 @@ bool operator!=(mat44 A, mat44 B)
     return false;
 }
 /* *************************************************************** */
-/* *************************************************************** */
 template<class T>
-T reg_mat44_det(mat44 const* A)
-{
+T reg_mat44_det(mat44 const* A) {
     double D =
         static_cast<double>(A->m[0][0]) * static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[2][2]) * static_cast<double>(A->m[3][3])
         - static_cast<double>(A->m[0][0]) * static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[3][2]) * static_cast<double>(A->m[2][3])
@@ -505,29 +309,13 @@ T reg_mat44_det(mat44 const* A)
 template float reg_mat44_det<float>(mat44 const* A);
 template double reg_mat44_det<double>(mat44 const* A);
 /* *************************************************************** */
-/* *************************************************************** */
-template<class T>
-T reg_mat33_det(mat33 const* A)
-{
-    double D = static_cast<T>((static_cast<double>(A->m[0][0]) * (static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[2][2]) - static_cast<double>(A->m[1][2]) * static_cast<double>(A->m[2][1]))) -
-        (static_cast<double>(A->m[0][1]) * (static_cast<double>(A->m[1][0]) * static_cast<double>(A->m[2][2]) - static_cast<double>(A->m[1][2]) * static_cast<double>(A->m[2][0]))) +
-        (static_cast<double>(A->m[0][2]) * (static_cast<double>(A->m[1][0]) * static_cast<double>(A->m[2][1]) - static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[2][0]))));
-    return static_cast<T>(D);
-}
-template float reg_mat33_det<float>(mat33 const* A);
-template double reg_mat33_det<double>(mat33 const* A);
-/* *************************************************************** */
-/* *************************************************************** */
-void reg_mat33_to_nan(mat33 *A)
-{
-   for(int i=0;i<3;++i)
-      for(int j=0;j<3;++j)
-         A->m[i][j] = std::numeric_limits<float>::quiet_NaN();
+void reg_mat33_to_nan(mat33 *A) {
+    for (int i = 0; i < 3; ++i)
+        for (int j = 0; j < 3; ++j)
+            A->m[i][j] = std::numeric_limits<float>::quiet_NaN();
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 reg_mat44_to_mat33(mat44 const* A)
-{
+mat33 reg_mat44_to_mat33(mat44 const* A) {
     mat33 out;
     out.m[0][0] = A->m[0][0];
     out.m[0][1] = A->m[0][1];
@@ -541,14 +329,10 @@ mat33 reg_mat44_to_mat33(mat44 const* A)
     return out;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat44 reg_mat44_mul(mat44 const* A, mat44 const* B)
-{
+mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) {
     mat44 R;
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][0]) * static_cast<double>(B->m[0][j]) +
                                            static_cast<double>(A->m[i][1]) * static_cast<double>(B->m[1][j]) +
                                            static_cast<double>(A->m[i][2]) * static_cast<double>(B->m[2][j]) +
@@ -558,48 +342,32 @@ mat44 reg_mat44_mul(mat44 const* A, mat44 const* B)
     return R;
 }
 /* *************************************************************** */
-mat44 operator*(mat44 A, mat44 B)
-{
+mat44 operator*(mat44 A, mat44 B) {
     return reg_mat44_mul(&A, &B);
 }
 /* *************************************************************** */
-void reg_mat33_mul(mat44 const* mat,
-    float const* in,
-    float *out)
-{
-    out[0] = static_cast<float>(
-        static_cast<double>(in[0])*static_cast<double>(mat->m[0][0]) +
-        static_cast<double>(in[1])*static_cast<double>(mat->m[0][1]) +
-        static_cast<double>(mat->m[0][3]));
-    out[1] = static_cast<float>(
-        static_cast<double>(in[0])*static_cast<double>(mat->m[1][0]) +
-        static_cast<double>(in[1])*static_cast<double>(mat->m[1][1]) +
-        static_cast<double>(mat->m[1][3]));
-    return;
+void reg_mat33_mul(mat44 const* mat, float const* in, float *out) {
+    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[0][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat->m[0][1]) +
+                                static_cast<double>(mat->m[0][3]));
+    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[1][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat->m[1][1]) +
+                                static_cast<double>(mat->m[1][3]));
 }
 /* *************************************************************** */
-void reg_mat33_mul(mat33 const* mat,
-    float const* in,
-    float *out)
-{
-    out[0] = static_cast<float>(
-        static_cast<double>(in[0])*static_cast<double>(mat->m[0][0]) +
-        static_cast<double>(in[1])*static_cast<double>(mat->m[0][1]) +
-        static_cast<double>(mat->m[0][2]));
-    out[1] = static_cast<float>(
-        static_cast<double>(in[0])*static_cast<double>(mat->m[1][0]) +
-        static_cast<double>(in[1])*static_cast<double>(mat->m[1][1]) +
-        static_cast<double>(mat->m[1][2]));
-    return;
+void reg_mat33_mul(mat33 const* mat, float const* in, float *out) {
+    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[0][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat->m[0][1]) +
+                                static_cast<double>(mat->m[0][2]));
+    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[1][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat->m[1][1]) +
+                                static_cast<double>(mat->m[1][2]));
 }
 /* *************************************************************** */
-mat33 reg_mat33_mul(mat33 const* A, mat33 const* B)
-{
+mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) {
     mat33 R;
-    for (int i = 0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][0]) * static_cast<double>(B->m[0][j]) +
                                            static_cast<double>(A->m[i][1]) * static_cast<double>(B->m[1][j]) +
                                            static_cast<double>(A->m[i][2]) * static_cast<double>(B->m[2][j]));
@@ -608,82 +376,59 @@ mat33 reg_mat33_mul(mat33 const* A, mat33 const* B)
     return R;
 }
 /* *************************************************************** */
-mat33 operator*(mat33 A, mat33 B)
-{
+mat33 operator*(mat33 A, mat33 B) {
     return reg_mat33_mul(&A, &B);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 reg_mat33_add(mat33 const* A, mat33 const* B)
-{
+mat33 reg_mat33_add(mat33 const* A, mat33 const* B) {
     mat33 R;
-    for (int i = 0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) + static_cast<double>(B->m[i][j]));
         }
     }
     return R;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 reg_mat33_trans(mat33 A)
-{
+mat33 reg_mat33_trans(mat33 A) {
     mat33 R;
-    for (int i = 0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
             R.m[j][i] = A.m[i][j];
         }
     }
     return R;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 operator+(mat33 A, mat33 B)
-{
+mat33 operator+(mat33 A, mat33 B) {
     return reg_mat33_add(&A, &B);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat44 reg_mat44_add(mat44 const* A, mat44 const* B)
-{
+mat44 reg_mat44_add(mat44 const* A, mat44 const* B) {
     mat44 R;
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) + static_cast<double>(B->m[i][j]));
         }
     }
     return R;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat44 operator+(mat44 A, mat44 B)
-{
+mat44 operator+(mat44 A, mat44 B) {
     return reg_mat44_add(&A, &B);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 reg_mat33_minus(mat33 const* A, mat33 const* B)
-{
+mat33 reg_mat33_minus(mat33 const* A, mat33 const* B) {
     mat33 R;
-    for (int i = 0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) - static_cast<double>(B->m[i][j]));
         }
     }
     return R;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
-{
+void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) {
     // A must be a symmetric matrix.
     // returns Q and D such that
     // Diagonal matrix D = QT * A * Q;  and  A = Q*D*QT
@@ -696,8 +441,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
     float tmp1, tmp2, mq;
     mat33 AQ;
     float thet, sgn, t, c;
-    for (int i = 0; i < maxsteps; ++i)
-    {
+    for (int i = 0; i < maxsteps; ++i) {
         // quat to matrix
         sqx = q[0] * q[0];
         sqy = q[1] * q[1];
@@ -749,25 +493,22 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
         k0 = (m[0] > m[1] && m[0] > m[2]) ? 0 : (m[1] > m[2]) ? 1 : 2; // index of largest element of offdiag
         k1 = (k0 + 1) % 3;
         k2 = (k0 + 2) % 3;
-        if (o[k0] == 0)
-        {
+        if (o[k0] == 0) {
             break;                          // diagonal already
         }
-        thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0*o[k0]);
+        thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0 * o[k0]);
         sgn = (thet > 0) ? 1 : -1;
         thet *= sgn;                      // make it positive
-        t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet*thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1))
-        c = 1.0 / sqrt(t*t + 1.0);        //  c= 1/(t^2+1) , t=s/c
-        if (c == 1.0)
-        {
+        t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet * thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1))
+        c = 1.0 / sqrt(t * t + 1.0);        //  c= 1/(t^2+1) , t=s/c
+        if (c == 1.0) {
             break;                          // no room for improvement - reached machine precision.
         }
         jr[0] = jr[1] = jr[2] = jr[3] = 0;
-        jr[k0] = sgn*sqrt((1.0 - c) / 2.0);    // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2)
+        jr[k0] = sgn * sqrt((1.0 - c) / 2.0);    // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2)
         jr[k0] *= -1.0;                     // since our quat-to-matrix convention was for v*M instead of M*v
         jr[3] = sqrt(1.0f - jr[k0] * jr[k0]);
-        if (jr[3] == 1.0)
-        {
+        if (jr[3] == 1.0) {
             break;                          // reached limits of floating point precision
         }
         q[0] = (q[3] * jr[0] + q[0] * jr[3] + q[1] * jr[2] - q[2] * jr[1]);
@@ -783,15 +524,11 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D)
 }
 
 /* *************************************************************** */
-/* *************************************************************** */
-mat33 operator-(mat33 A, mat33 B)
-{
+mat33 operator-(mat33 A, mat33 B) {
     return reg_mat33_minus(&A, &B);
 }
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_mat33_eye(mat33 *mat)
-{
+void reg_mat33_eye(mat33 *mat) {
     mat->m[0][0] = 1.f;
     mat->m[0][1] = mat->m[0][2] = 0.f;
     mat->m[1][1] = 1.f;
@@ -800,31 +537,21 @@ void reg_mat33_eye(mat33 *mat)
     mat->m[2][0] = mat->m[2][1] = 0.f;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat44 reg_mat44_minus(mat44 const* A, mat44 const* B)
-{
+mat44 reg_mat44_minus(mat44 const* A, mat44 const* B) {
     mat44 R;
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
             R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) - static_cast<double>(B->m[i][j]));
         }
     }
     return R;
 }
-
-/* *************************************************************** */
 /* *************************************************************** */
-mat44 operator-(mat44 A, mat44 B)
-{
+mat44 operator-(mat44 A, mat44 B) {
     return reg_mat44_minus(&A, &B);
 }
-
 /* *************************************************************** */
-/* *************************************************************** */
-void reg_mat44_eye(mat44 *mat)
-{
+void reg_mat44_eye(mat44 *mat) {
     mat->m[0][0] = 1.f;
     mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f;
     mat->m[1][1] = 1.f;
@@ -835,46 +562,26 @@ void reg_mat44_eye(mat44 *mat)
     mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-float reg_mat44_norm_inf(mat44 const* mat)
-{
-    float maxval = 0;
-    float newval = 0;
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
-            newval = fabsf(mat->m[i][j]);
-            maxval = (newval > maxval) ? newval : maxval;
-        }
-    }
-    return maxval;
-}
-/* *************************************************************** */
-/* *************************************************************** */
 void reg_mat44_mul(mat44 const* mat,
-    float const* in,
-    float *out)
-{
+                   float const* in,
+                   float *out) {
     out[0] = static_cast<float>(static_cast<double>(mat->m[0][0]) * static_cast<double>(in[0]) +
-        static_cast<double>(mat->m[0][1]) * static_cast<double>(in[1]) +
-        static_cast<double>(mat->m[0][2]) * static_cast<double>(in[2]) +
-        static_cast<double>(mat->m[0][3]));
+                                static_cast<double>(mat->m[0][1]) * static_cast<double>(in[1]) +
+                                static_cast<double>(mat->m[0][2]) * static_cast<double>(in[2]) +
+                                static_cast<double>(mat->m[0][3]));
     out[1] = static_cast<float>(static_cast<double>(mat->m[1][0]) * static_cast<double>(in[0]) +
-        static_cast<double>(mat->m[1][1]) * static_cast<double>(in[1]) +
-        static_cast<double>(mat->m[1][2]) * static_cast<double>(in[2]) +
-        static_cast<double>(mat->m[1][3]));
+                                static_cast<double>(mat->m[1][1]) * static_cast<double>(in[1]) +
+                                static_cast<double>(mat->m[1][2]) * static_cast<double>(in[2]) +
+                                static_cast<double>(mat->m[1][3]));
     out[2] = static_cast<float>(static_cast<double>(mat->m[2][0]) * static_cast<double>(in[0]) +
-        static_cast<double>(mat->m[2][1]) * static_cast<double>(in[1]) +
-        static_cast<double>(mat->m[2][2]) * static_cast<double>(in[2]) +
-        static_cast<double>(mat->m[2][3]));
+                                static_cast<double>(mat->m[2][1]) * static_cast<double>(in[1]) +
+                                static_cast<double>(mat->m[2][2]) * static_cast<double>(in[2]) +
+                                static_cast<double>(mat->m[2][3]));
 }
 /* *************************************************************** */
-/* *************************************************************** */
 void reg_mat44_mul(mat44 const* mat,
-    double const* in,
-    double *out)
-{
+                   double const* in,
+                   double *out) {
     double matD[4][4];
     for (int i = 0; i < 4; ++i)
         for (int j = 0; j < 4; ++j)
@@ -895,9 +602,7 @@ void reg_mat44_mul(mat44 const* mat,
     return;
 }
 /* *************************************************************** */
-/* *************************************************************** */
-mat44 reg_mat44_mul(mat44 const* A, double scalar)
-{
+mat44 reg_mat44_mul(mat44 const* A, double scalar) {
     mat44 out;
     out.m[0][0] = A->m[0][0] * scalar;
     out.m[0][1] = A->m[0][1] * scalar;
@@ -920,43 +625,23 @@ mat44 reg_mat44_mul(mat44 const* A, double scalar)
 /* *************************************************************** */
 void reg_mat44_disp(const mat44& mat, const std::string& title) {
     NR_COUT << title << ":\n"
-            << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n"
-            << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n"
-            << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n"
-            << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl;
-}
-/* *************************************************************** */
-void reg_mat33_disp(const mat33& mat, const std::string& title){
-    NR_COUT << title << ":\n"
-            << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\n"
-            << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\n"
-            << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << std::endl;
+        << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n"
+        << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n"
+        << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n"
+        << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl;
 }
 /* *************************************************************** */
 //is it square distance or just distance?
 // Helper function: Get the square of the Euclidean distance
 double get_square_distance3D(float * first_point3D, float * second_point3D) {
     return sqrt(Square(first_point3D[0] - second_point3D[0]) +
-          Square(first_point3D[1] - second_point3D[1]) +
-          Square(first_point3D[2] - second_point3D[2]));
+                Square(first_point3D[1] - second_point3D[1]) +
+                Square(first_point3D[2] - second_point3D[2]));
 }
 /* *************************************************************** */
 //is it square distance or just distance?
 double get_square_distance2D(float * first_point2D, float * second_point2D) {
     return sqrt(Square(first_point2D[0] - second_point2D[0]) +
-          Square(first_point2D[1] - second_point2D[1]));
+                Square(first_point2D[1] - second_point2D[1]));
 }
 /* *************************************************************** */
-// Calculate pythagorean distance
-template<class T>
-T pythag(T a, T b)
-{
-    T absa, absb;
-    absa = fabs(a);
-    absb = fabs(b);
-
-    if (absa > absb)
-        return (T)(absa * sqrt(1.0f + Square(absb / absa)));
-    else
-        return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + Square(absa / absb))));
-}
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
index c77e18fd..42c0cddd 100644
--- a/reg-lib/cpu/_reg_maths.h
+++ b/reg-lib/cpu/_reg_maths.h
@@ -76,40 +76,16 @@ DEVICE inline int Round(const T& x) {
 /* *************************************************************** */
 } // namespace NiftyReg
 /* *************************************************************** */
-template <class T>
-void reg_LUdecomposition(T *inputMatrix,
-                         size_t dim,
-                         size_t *index);
-/* *************************************************************** */
-template <class T>
-void reg_matrixMultiply(T *mat1,
-                        T *mat2,
-                        size_t *dim1,
-                        size_t *dim2,
-                        T * &res);
-/* *************************************************************** */
-template <class T>
-void reg_matrixInvertMultiply(T *mat,
-                              size_t dim,
-                              size_t *index,
-                              T *vec);
-/* *************************************************************** */
 template<class T>
 T* reg_matrix1DAllocate(size_t arraySize);
 /* *************************************************************** */
 template<class T>
-T* reg_matrix1DAllocateAndInitToZero(size_t arraySize);
-/* *************************************************************** */
-template<class T>
 void reg_matrix1DDeallocate(T* mat);
 /* *************************************************************** */
 template<class T>
 T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY);
 /* *************************************************************** */
 template<class T>
-T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY);
-/* *************************************************************** */
-template<class T>
 void reg_matrix2DDeallocate(size_t arraySizeX, T** mat);
 /* *************************************************************** */
 template<class T>
@@ -160,10 +136,6 @@ void reg_mat33_eye(mat33 *mat);
 /* *************************************************************** */
 /** @brief Compute the determinant of a 3-by-3 matrix
 */
-template<class T> T reg_mat33_det(mat33 const* A);
-/* *************************************************************** */
-/** @brief Compute the determinant of a 3-by-3 matrix
-*/
 void reg_mat33_to_nan(mat33 *A);
 /* *************************************************************** */
 /** @brief Transform a mat44 to a mat33 matrix
@@ -218,16 +190,10 @@ void reg_mat44_eye(mat44 *mat);
  */
 template<class T> T reg_mat44_det(mat44 const* A);
 /* *************************************************************** */
-float reg_mat44_norm_inf(mat44 const* mat);
-/* *************************************************************** */
 /** @brief Display a mat44 matrix
  */
 void reg_mat44_disp(const mat44& mat, const std::string& title);
 /* *************************************************************** */
-/** @brief Display a mat33 matrix
- */
-void reg_mat33_disp(const mat33& mat, const std::string& title);
-/* *************************************************************** */
 double get_square_distance3D(float * first_point3D, float * second_point3D);
 /* *************************************************************** */
 double get_square_distance2D(float * first_point2D, float * second_point2D);
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
index 0ad50020..444a1721 100644
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ b/reg-lib/cpu/_reg_maths_eigen.cpp
@@ -66,117 +66,6 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
 template void svd<float>(float **in, size_t m, size_t n, float * w, float **v);
 template void svd<double>(double **in, size_t m, size_t n, double * w, double **v);
 /* *************************************************************** */
-/**
-* @brief SVD
-* @param in input matrix to decompose
-* @param size_m row
-* @param size_n colomn
-* @param U unitary matrices
-* @param S diagonal matrix
-* @param V unitary matrices
-*  X = U*S*V'
-*/
-template<class T>
-void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) {
-   if (in == nullptr)
-      NR_FATAL_ERROR("The specified matrix is empty");
-
-#ifdef _WIN32
-   long sm, sn, min_dim, i, j;
-   long size__m = (long)size_m, size__n = (long)size_n;
-#else
-   size_t sm, sn, min_dim, i, j;
-   size_t size__m = size_m, size__n = size_n;
-#endif
-   Eigen::MatrixXd m(size__m, size__n);
-
-   //Convert to Eigen matrix
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(in, m, size__m, size__n) \
-   private(sn)
-#endif
-   for (sm = 0; sm < size__m; sm++)
-   {
-      for (sn = 0; sn < size__n; sn++)
-      {
-         m(sm, sn) = static_cast<double>(in[sm][sn]);
-      }
-   }
-
-   Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
-
-   min_dim = std::min(size__m, size__n);
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(svd, min_dim, S) \
-   private(j)
-#endif
-   //Convert to C matrix
-   for (i = 0; i < min_dim; i++) {
-      for (j = 0; j < min_dim; j++) {
-         if (i == j) {
-            (*S)[i][j] = static_cast<T>(svd.singularValues()(i));
-         }
-         else {
-            (*S)[i][j] = 0;
-         }
-      }
-   }
-
-   if (size__m > size__n) {
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(svd, min_dim, V) \
-   private(j)
-#endif
-      //Convert to C matrix
-      for (i = 0; i < min_dim; i++) {
-         for (j = 0; j < min_dim; j++) {
-            (*V)[i][j] = static_cast<T>(svd.matrixV()(i, j));
-
-         }
-      }
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(svd, size__m, size__n, U) \
-   private(j)
-#endif
-      for (i = 0; i < size__m; i++) {
-         for (j = 0; j < size__n; j++) {
-            (*U)[i][j] = static_cast<T>(svd.matrixU()(i, j));
-         }
-      }
-   }
-   else {
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(svd, min_dim, U) \
-   private(j)
-#endif
-      //Convert to C matrix
-      for (i = 0; i < min_dim; i++) {
-         for (j = 0; j < min_dim; j++) {
-            (*U)[i][j] = static_cast<T>(svd.matrixU()(i, j));
-
-         }
-      }
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(svd, size__m, size__n, V) \
-   private(j)
-#endif
-      for (i = 0; i < size__n; i++) {
-         for (j = 0; j < size__m; j++) {
-            (*V)[i][j] = static_cast<T>(svd.matrixV()(i, j));
-         }
-      }
-   }
-
-}
-template void svd<float>(float **in, size_t size_m, size_t size_n, float ***U, float ***S, float ***V);
-template void svd<double>(double **in, size_t size_m, size_t size_n, double ***U, double ***S, double ***V);
-/* *************************************************************** */
 template<class T>
 T reg_matrix2DDet(T** mat, size_t m, size_t n) {
    if (m != n)
@@ -206,24 +95,6 @@ T reg_matrix2DDet(T** mat, size_t m, size_t n) {
 template float reg_matrix2DDet<float>(float** mat, size_t m, size_t n);
 template double reg_matrix2DDet<double>(double** mat, size_t m, size_t n);
 /* *************************************************************** */
-mat44 reg_mat44_sqrt(mat44 const* mat)
-{
-   mat44 X;
-   Eigen::Matrix4d m;
-   for (size_t i = 0; i < 4; ++i)
-   {
-      for (size_t j = 0; j < 4; ++j)
-      {
-         m(i, j) = static_cast<double>(mat->m[i][j]);
-      }
-   }
-   m = m.sqrt();
-   for (size_t i = 0; i < 4; ++i)
-      for (size_t j = 0; j < 4; ++j)
-         X.m[i][j] = static_cast<float>(m(i, j));
-   return X;
-}
-/* *************************************************************** */
 void reg_mat33_expm(mat33 *in_tensor)
 {
    int sm, sn;
@@ -318,24 +189,6 @@ mat44 reg_mat44_logm(mat44 const* mat)
    return X;
 }
 /* *************************************************************** */
-mat44 reg_mat44_inv(mat44 const* mat)
-{
-   mat44 out;
-   Eigen::Matrix4d m, m_inv;
-   for (size_t i = 0; i < 4; ++i) {
-      for (size_t j = 0; j < 4; ++j) {
-         m(i, j) = static_cast<double>(mat->m[i][j]);
-      }
-   }
-   m_inv = m.inverse();
-   for (size_t i = 0; i < 4; ++i)
-      for (size_t j = 0; j < 4; ++j)
-         out.m[i][j] = static_cast<float>(m_inv(i, j));
-   //
-   return out;
-
-}
-/* *************************************************************** */
 mat44 reg_mat44_avg2(mat44 const* A, mat44 const* B)
 {
    mat44 out;
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
index ce326b47..20867b69 100644
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ b/reg-lib/cpu/_reg_maths_eigen.h
@@ -11,20 +11,9 @@
 template <class T>
 void svd(T **in, size_t m, size_t n, T * w, T **v);
 /* *************************************************************** */
-template <class T>
-void svd(T **in, size_t m, size_t n, T ***U, T ***S, T ***V);
-/* *************************************************************** */
 template<class T>
 T reg_matrix2DDet(T** mat, size_t m, size_t n);
 /* *************************************************************** */
-/** @brief Compute the inverse of a  4-by-4 matrix
-*/
-mat44 reg_mat44_inv(mat44 const* mat);
-/* *************************************************************** */
-/** @brief Compute the square root of a 4-by-4 matrix
-*/
-mat44 reg_mat44_sqrt(mat44 const* mat);
-/* *************************************************************** */
 /** @brief Compute the log of a 3-by-3 matrix
 */
 void reg_mat33_expm(mat33 *in_tensor);
diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp
deleted file mode 100644
index 2ed3463f..00000000
--- a/reg-lib/cpu/_reg_mrf.cpp
+++ /dev/null
@@ -1,869 +0,0 @@
-#include "_reg_mrf.h"
-
-//DEBUG
-#include <iostream>
-#include <fstream>
-//DEBUG
-/*****************************************************/
-reg_mrf::reg_mrf(int _discrete_radius,
-                 int _discrete_increment,
-                 float _reg_weight,
-                 int _img_dim,
-                 size_t _node_number)
-{
-    this->measure = nullptr;
-    this->referenceImage = nullptr;
-    this->controlPointImage = nullptr;
-    this->discrete_radius = _discrete_radius;
-    this->discrete_increment = _discrete_increment;
-    this->regularisation_weight = _reg_weight;
-    //
-    this->image_dim = _img_dim;
-    this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
-    this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-    this->node_number = _node_number;
-
-    // Allocate the discretised values in millimetre
-    this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
-    for(int i=0;i<this->image_dim;++i){
-        this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
-    }
-    //To store the cost data term - originaly SAD between images.
-    this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num,sizeof(float));
-
-    // Allocate the arrays to store the tree
-    this->orderedList = (int *) malloc(this->node_number*sizeof(int));
-    this->parentsList = (int *) malloc(this->node_number*sizeof(int));
-    this->edgeWeight = (float *) malloc(this->node_number*sizeof(float));
-
-    //regulatization - optimization
-    this->regularised_cost= (float *)malloc(this->node_number*this->label_nD_num*sizeof(float));
-    this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int));
-}
-/*****************************************************/
-reg_mrf::reg_mrf(reg_measure *_measure,
-                 nifti_image *_referenceImage,
-                 nifti_image *_controlPointImage,
-                 int _discrete_radius,
-                 int _discrete_increment,
-                 float _reg_weight)
-{
-   this->measure = _measure;
-   this->referenceImage = _referenceImage;
-   this->controlPointImage = _controlPointImage;
-   this->discrete_radius = _discrete_radius;
-   this->discrete_increment = _discrete_increment;
-   this->regularisation_weight = _reg_weight;
-
-   this->image_dim = this->referenceImage->nz > 1 ? 3 :2;
-   this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1;
-   this->label_nD_num = static_cast<int>(std::pow((double) this->label_1D_num,this->image_dim));
-   this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
-
-   this->input_transformation=nifti_copy_nim_info(this->controlPointImage);
-   this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float));
-   // Allocate the discretised values in voxel
-   int *discrete_values_vox = (int *)malloc(this->label_1D_num*sizeof(int));
-   int currentValue = -this->discrete_radius;
-   for(int i = 0;i<this->label_1D_num;i++) {
-      discrete_values_vox[i]=currentValue;
-      currentValue+=this->discrete_increment;
-   }
-
-   // Allocate the discretised values in millimetre
-   this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *));
-   for(int i=0;i<this->image_dim;++i){
-       this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float));
-   }
-   float disp_vox[3];
-   mat44 vox2mm = this->referenceImage->qto_xyz;
-   if(this->referenceImage->sform_code>0)
-      vox2mm = this->referenceImage->sto_xyz;
-   int i=0;
-   for(int z=0; z<this->label_1D_num; ++z){
-      disp_vox[2]=discrete_values_vox[z];
-      for(int y=0; y<this->label_1D_num; ++y){
-         disp_vox[1]=discrete_values_vox[y];
-         for(int x=0; x<this->label_1D_num; ++x){
-            disp_vox[0]=discrete_values_vox[x];
-            this->discrete_values_mm[0][i] =
-                  disp_vox[0] * vox2mm.m[0][0] +
-                  disp_vox[1] * vox2mm.m[0][1] +
-                  disp_vox[2] * vox2mm.m[0][2];
-            this->discrete_values_mm[1][i] =
-                  disp_vox[0] * vox2mm.m[1][0] +
-                  disp_vox[1] * vox2mm.m[1][1] +
-                  disp_vox[2] * vox2mm.m[1][2];
-            this->discrete_values_mm[2][i] =
-                  disp_vox[0] * vox2mm.m[2][0] +
-                  disp_vox[1] * vox2mm.m[2][1] +
-                  disp_vox[2] * vox2mm.m[2][2];
-            ++i;
-         }
-      }
-   }
-   free(discrete_values_vox);
-
-
-   //To store the cost data term - originaly SAD between images.
-   this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num,sizeof(float));
-
-   // Allocate the arrays to store the tree
-   this->orderedList = (int *) malloc(this->node_number*sizeof(int));
-   this->parentsList = (int *) malloc(this->node_number*sizeof(int));
-   this->edgeWeight = (float *) malloc(this->node_number*sizeof(float));
-
-   //regulatization - optimization
-   this->regularised_cost= (float *)malloc(this->node_number*this->label_nD_num*sizeof(float));
-   this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int));
-
-   this->initialised = false;
-}
-/*****************************************************/
-reg_mrf::~reg_mrf()
-{
-   if(this->discretised_measures!=nullptr)
-      free(this->discretised_measures);
-   this->discretised_measures=nullptr;
-
-   if(this->orderedList!=nullptr)
-      free(this->orderedList);
-   this->orderedList=nullptr;
-
-   if(this->parentsList!=nullptr)
-      free(this->parentsList);
-   this->parentsList=nullptr;
-
-   if(this->edgeWeight!=nullptr)
-      free(this->edgeWeight);
-   this->edgeWeight=nullptr;
-
-   if(this->regularised_cost!=nullptr)
-      free(this->regularised_cost);
-   this->regularised_cost=nullptr;
-
-   if(this->optimal_label_index!=nullptr)
-      free(this->optimal_label_index);
-   this->optimal_label_index=nullptr;
-
-   for(int i=0; i<this->image_dim; ++i){
-      if(this->discrete_values_mm[i]!=nullptr)
-         free(this->discrete_values_mm[i]);
-      this->discrete_values_mm[i]=nullptr;
-   }
-   if(this->discrete_values_mm!=nullptr)
-      free(this->discrete_values_mm);
-   this->discrete_values_mm=nullptr;
-
-   if(this->input_transformation!=nullptr)
-      nifti_image_free(this->input_transformation);
-   this->input_transformation=nullptr;
-}
-/*****************************************************/
-void reg_mrf::Initialise()
-{
-   // Create the minimum spamming tree
-   int edge_number = this->node_number*this->image_dim*2;
-   float *edgeWeightMatrix = (float *)calloc(edge_number,sizeof(float));
-   int *index_neighbours = (int *)malloc(edge_number*sizeof(int));
-   for(int i =0;i<edge_number;i++) {
-      index_neighbours[i]=-1;
-   }
-   const size_t num_vertices = NiftiImage::calcVoxelNumber(this->controlPointImage, 3);
-   const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4;
-
-   this->GetGraph(edgeWeightMatrix, index_neighbours);
-   this->GetPrimsMST(edgeWeightMatrix, index_neighbours, num_vertices, num_neighbours, true);
-   free(edgeWeightMatrix);
-   free(index_neighbours);
-   this->initialised = true;
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-float* reg_mrf::GetDiscretisedMeasurePtr()
-{
-   return this->discretised_measures;
-}
-/*****************************************************/
-void reg_mrf::SetDiscretisedMeasure(float* dm)
-{
-   for(size_t i=0;i<this->node_number*this->label_nD_num;i++) {
-       this->discretised_measures[i]=dm[i];
-   }
-}
-/*****************************************************/
-int* reg_mrf::GetOptimalLabelPtr()
-{
-   return optimal_label_index;
-}
-/*****************************************************/
-int* reg_mrf::GetOrderedListPtr()
-{
-   return this->orderedList;
-}
-/*****************************************************/
-void reg_mrf::SetOrderedList(int* ol)
-{
-   for(size_t i=0;i<this->node_number;i++) {
-       this->orderedList[i]=ol[i];
-   }
-}
-/*****************************************************/
-int* reg_mrf::GetParentsListPtr()
-{
-   return this->parentsList;
-}
-/*****************************************************/
-void reg_mrf::SetParentsList(int* pl)
-{
-   for(size_t i=0;i<this->node_number;i++) {
-       this->parentsList[i]=pl[i];
-   }
-}
-/*****************************************************/
-float* reg_mrf::GetEdgeWeightPtr()
-{
-   return this->edgeWeight;
-}
-/*****************************************************/
-void reg_mrf::SetEdgeWeight(float* ew)
-{
-    for(size_t i=0;i<this->node_number;i++) {
-        this->edgeWeight[i]=ew[i];
-    }
-}
-/*****************************************************/
-void reg_mrf::GetDiscretisedMeasure()
-{
-   measure->GetDiscretisedValue(this->controlPointImage,
-                                this->discretised_measures,
-                                this->discrete_radius,
-                                this->discrete_increment);
-   //Let's put the values positive for the mrf
-   for(size_t i=0;i<this->node_number*this->label_nD_num;i++) {
-       this->discretised_measures[i]=-this->discretised_measures[i];
-   }
-//DEBUG
-/*
-   std::ifstream myfile;
-   std::string pathDataFile = "/media/windows/Users/bpresles/OneDrive - University College London/NiftyReg/Mattias/dataForDeedsForNifty/similarity2.dat";
-   myfile.open(pathDataFile.c_str(), std::ios::in | std::ios::binary);
-   char buffer[128];
-   //
-   if (myfile.is_open()) {
-       // ok, proceed with output
-       NR_COUT<<"OK - file opened"<<std::endl;
-       for(int i=0;i<32388174;i++){
-           myfile.read(buffer, sizeof(float));
-           this->discretised_measures[i]=atof(buffer);
-       }
-       myfile.close();
-   }
-/////
-float* expectedDataCost = new float[32388174];
-std::string expectedDataCostName = "/media/windows/Users/bpresles/OneDrive - University College London/NiftyReg/Mattias/dataForDeedsForNifty/similarity2.dat";
-readFloatBinaryArray(expectedDataCostName.c_str(), 32388174, expectedDataCost);
-for(int i=0;i<32388174;i++){
-    this->discretised_measures[i]=expectedDataCost[i];
-}
-/////
-for(int i=0;i<32388174;i++){
-    this->discretised_measures[i]=rand() % 10;
-}
-*/
-//DEBUG
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-void reg_mrf::GetOptimalLabel()
-{
-   for(size_t node=0; node<this->node_number; ++node) {
-      this->optimal_label_index[node]=
-         std::min_element(this->regularised_cost+node*this->label_nD_num,this->regularised_cost+(node+1)*this->label_nD_num) -
-         (this->regularised_cost+node*this->label_nD_num);
-   }
-}
-/*****************************************************/
-void reg_mrf::UpdateNodePositions()
-{
-   //Update the control point position
-   float *cpPtrX = static_cast<float *>(this->controlPointImage->data);
-   float *cpPtrY = &cpPtrX[this->node_number];
-   float *cpPtrZ = &cpPtrY[this->node_number];
-
-   float *inputCpPtrX = static_cast<float *>(this->input_transformation->data);
-   float *inputCpPtrY = &inputCpPtrX[this->node_number];
-   float *inputCpPtrZ = &inputCpPtrY[this->node_number];
-
-   memcpy(cpPtrX, inputCpPtrX, this->node_number*3*sizeof(float));
-
-   size_t voxel=0;
-   for(int z=0; z<this->controlPointImage->nz; z++) {
-      for(int y=0; y<this->controlPointImage->ny; y++) {
-         for(int x=0; x<this->controlPointImage->nx; x++) {
-            int optimal_id = this->optimal_label_index[voxel];
-            cpPtrX[voxel] = inputCpPtrX[voxel] + this->discrete_values_mm[0][optimal_id];
-            cpPtrY[voxel] = inputCpPtrY[voxel] + this->discrete_values_mm[1][optimal_id];
-            cpPtrZ[voxel] = inputCpPtrZ[voxel] + this->discrete_values_mm[2][optimal_id];
-            ++voxel;
-         }
-      }
-   }
-   NR_FUNC_CALLED();
-}
-/*****************************************************/
-void reg_mrf::Run()
-{
-   if(this->initialised==false)
-      this->Initialise();
-   // Store the intial transformation parametrisation
-   memcpy(this->input_transformation->data, this->controlPointImage->data,
-          this->node_number*this->image_dim*sizeof(float));
-   // Compute the discretised data term values
-   this->GetDiscretisedMeasure();
-   // Compute the regularisation term
-   //for(int i=0;i<100; ++i){
-       this->GetRegularisation();
-       // Extract the best label
-       //memcpy(this->regularised_cost, this->discretised_measures, this->node_number*this->label_nD_num*sizeof(float));
-       this->GetOptimalLabel();
-       // Update the control point positions
-       this->UpdateNodePositions();
-   //}
-}
-/*****************************************************/
-/*****************************************************/
-template <class DataType>
-void GetGraph_core3D(nifti_image* controlPointGridImage,
-                     float* edgeWeightMatrix,
-                     int* index_neighbours,
-                     nifti_image *refImage,
-                     int *mask)
-{
-   int cpx, cpy, cpz, t, x, y, z, blockIndex, voxIndex, voxIndex_t;
-   float gridVox[3], imageVox[3];
-   // Define the transformation matrices
-   mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
-   if(controlPointGridImage->sform_code>0)
-      grid_vox2mm = &controlPointGridImage->sto_xyz;
-   mat44 *image_mm2vox = &refImage->qto_ijk;
-   if(refImage->sform_code>0)
-      image_mm2vox = &refImage->sto_ijk;
-   mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
-
-   const size_t node_number = NiftiImage::calcVoxelNumber(controlPointGridImage, 3);
-
-   // Compute the block size
-   int blockSize[3]={
-      Ceil(controlPointGridImage->dx / refImage->dx),
-      Ceil(controlPointGridImage->dy / refImage->dy),
-      Ceil(controlPointGridImage->dz / refImage->dz),
-   };
-   int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
-   // Allocate some static memory
-   float* refBlockValue = (float*) malloc(voxelBlockNumber*sizeof(float));
-   float* neighbourBlockValue = (float*) malloc(voxelBlockNumber*sizeof(float));
-   float SADNeighbourValue = 0;
-
-   // Pointers to the input image
-   DataType *refImgPtr = static_cast<DataType *>(refImage->data);
-
-   // Loop over all control points
-   for(cpz=0; cpz<controlPointGridImage->nz; ++cpz){
-      for(cpy=0; cpy<controlPointGridImage->ny; ++cpy){
-         for(cpx=0; cpx<controlPointGridImage->nx; ++cpx){
-            //Because I reuse this variable after.
-            gridVox[2] = cpz;
-            gridVox[1] = cpy;
-            gridVox[0] = cpx;
-            // Compute the corresponding image voxel position
-            reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-            imageVox[0]=Round(imageVox[0]);
-            imageVox[1]=Round(imageVox[1]);
-            imageVox[2]=Round(imageVox[2]);
-            //DEBUG
-            //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx;
-            //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy;
-            //imageVox[2]=gridVox[2]*controlPointGridImage->dz / refImage->dz;
-            //DEBUG
-            // Extract the block in the reference image
-            blockIndex = 0;
-            for(z=imageVox[2]-blockSize[2]/2; z<imageVox[2]+blockSize[2]/2; ++z){
-               for(y=imageVox[1]-blockSize[1]/2; y<imageVox[1]+blockSize[1]/2; ++y){
-                  for(x=imageVox[0]-blockSize[0]/2; x<imageVox[0]+blockSize[0]/2; ++x){
-                     //DEBUG
-                     //for(z=imageVox[2]; z<imageVox[2]+blockSize[2]; ++z){
-                     //    for(y=imageVox[1]; y<imageVox[1]+blockSize[1]; ++y){
-                     //        for(x=imageVox[0]; x<imageVox[0]+blockSize[0]; ++x){
-                     //DEBUG
-                     if(x>-1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z<refImage->nz) {
-                        voxIndex = x+y*refImage->nx+z*refImage->nx*refImage->ny;
-                        if(mask[voxIndex]>-1){
-                           for(t=0; t<refImage->nt; ++t){
-                              voxIndex_t = voxIndex+t*refImage->nx*refImage->ny*refImage->nz;
-                              refBlockValue[blockIndex] = refImgPtr[voxIndex_t];
-                              blockIndex++;
-                           } //t
-                        }
-                     } else {
-                        for(t=0; t<refImage->nt; ++t){
-                           refBlockValue[blockIndex] = 0;
-                           blockIndex++;
-                        }
-                     }
-                  } // x
-               } // y
-            } // z
-            //Let look at the neighbours now -- 6 in 3D
-            //standard six-neighbourhood for grid graph
-            const int nb_neighbours = 6;
-            int dx[nb_neighbours]={-1,1,0,0,0,0};
-            int dy[nb_neighbours]={0,0,-1,1,0,0};
-            int dz[nb_neighbours]={0,0,0,0,-1,1};
-
-            for(int ngh_index=0;ngh_index<nb_neighbours;ngh_index++) {
-
-               gridVox[2] = cpz+dz[ngh_index];
-               gridVox[1] = cpy+dy[ngh_index];
-               gridVox[0] = cpx+dx[ngh_index];
-               if(gridVox[0]>=0 && gridVox[0]<controlPointGridImage->nx &&
-                     gridVox[1]>=0 && gridVox[1]<controlPointGridImage->ny &&
-                     gridVox[2]>=0 && gridVox[2]<controlPointGridImage->nz) {
-                  //DEBUG
-                  //if(gridVox[0]>=0 && gridVox[0]<m1 &&
-                  //   gridVox[1]>=0 && gridVox[1]<n1 &&
-                  //   gridVox[2]>=0 && gridVox[2]<o1) {
-                  //DEBUG
-                  // Compute the corresponding image voxel position
-                  reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                  imageVox[0]=Round(imageVox[0]);
-                  imageVox[1]=Round(imageVox[1]);
-                  imageVox[2]=Round(imageVox[2]);
-                  //DEBUG
-                  //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx;
-                  //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy;
-                  //imageVox[2]=gridVox[2]*controlPointGridImage->dz / refImage->dz;
-                  //DEBUG
-                  if(imageVox[0]>-1 && imageVox[0]<refImage->nx &&
-                        imageVox[1]>-1 && imageVox[1]<refImage->ny &&
-                        imageVox[2]>-1 && imageVox[2]<refImage->nz) {
-                     blockIndex = 0;
-                     for(z=imageVox[2]-blockSize[2]/2; z<imageVox[2]+blockSize[2]/2; ++z){
-                        for(y=imageVox[1]-blockSize[1]/2; y<imageVox[1]+blockSize[1]/2; ++y){
-                           for(x=imageVox[0]-blockSize[0]/2; x<imageVox[0]+blockSize[0]/2; ++x){
-                              //DEBUG
-                              //for(z=imageVox[2]; z<imageVox[2]+blockSize[2]; ++z){
-                              //    for(y=imageVox[1]; y<imageVox[1]+blockSize[1]; ++y){
-                              //        for(x=imageVox[0]; x<imageVox[0]+blockSize[0]; ++x){
-                              //DEBUG
-                              if(x>-1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z<refImage->nz) {
-                                 voxIndex = x+y*refImage->nx+z*refImage->nx*refImage->ny;
-                                 if(mask[voxIndex]>-1){
-                                    for(t=0; t<refImage->nt; ++t){
-                                       voxIndex_t = voxIndex+t*refImage->nx*refImage->ny*refImage->nz;
-                                       neighbourBlockValue[blockIndex] = refImgPtr[voxIndex_t];
-                                       blockIndex++;
-                                    } //t
-                                 }
-                              }else {
-                                 for(t=0; t<refImage->nt; ++t){
-                                    neighbourBlockValue[blockIndex] = 0;
-                                    blockIndex++;
-                                 } //t
-                              }
-                           } // x
-                        } // y
-                     } // z
-
-                     SADNeighbourValue = 0;
-                     for(int sadIndex=0;sadIndex<voxelBlockNumber;sadIndex++) {
-                        SADNeighbourValue += std::abs(neighbourBlockValue[sadIndex]-refBlockValue[sadIndex]);
-                     }
-                     if(SADNeighbourValue == 0) {
-                         SADNeighbourValue = std::numeric_limits<float>::epsilon();
-                     }
-                     //store results:
-                     index_neighbours[cpx+cpy*controlPointGridImage->nx+
-                           cpz*controlPointGridImage->nx*controlPointGridImage->ny+
-                           ngh_index*node_number]=
-                           cpx+dx[ngh_index]+(cpy+dy[ngh_index])*controlPointGridImage->nx+
-                           (cpz+dz[ngh_index])*controlPointGridImage->nx*controlPointGridImage->ny;
-                     edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+
-                           cpz*controlPointGridImage->nx*controlPointGridImage->ny+
-                           ngh_index*node_number]=SADNeighbourValue;
-                     //DEBUG
-                     //index_neighbours[cpx+cpy*m1+
-                     //        cpz*m1*n1+
-                     //        ngh_index*num_vertices]=
-                     //        cpx+dx[ngh_index]+(cpy+dy[ngh_index])*m1+
-                     //        (cpz+dz[ngh_index])*m1*n1;
-                     //edgeWeightMatrix[cpx+cpy*m1+
-                     //        cpz*m1*n1+
-                     //        ngh_index*num_vertices]=SADNeighbourValue;
-                     //DEBUG
-                  } else {
-                     //store results:
-                     index_neighbours[cpx+cpy*controlPointGridImage->nx+
-                           cpz*controlPointGridImage->nx*controlPointGridImage->ny+
-                           ngh_index*node_number]=
-                           cpx+dx[ngh_index]+(cpy+dy[ngh_index])*controlPointGridImage->nx+
-                           (cpz+dz[ngh_index])*controlPointGridImage->nx*controlPointGridImage->ny;
-
-                     edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+
-                           cpz*controlPointGridImage->nx*controlPointGridImage->ny+
-                           ngh_index*node_number]=0;
-                     //DEBUG
-                     //index_neighbours[cpx+cpy*m1+
-                     //        cpz*m1*n1+
-                     //        ngh_index*num_vertices]=
-                     //        cpx+dx[ngh_index]+(cpy+dy[ngh_index])*m1+
-                     //        (cpz+dz[ngh_index])*m1*n1;
-                     //edgeWeightMatrix[cpx+cpy*m1+
-                     //        cpz*m1*n1+
-                     //        ngh_index*num_vertices]=0;
-                     //DEBUG
-                  }
-               }
-            }
-         } //cpx
-      } //cpy
-   } //cpz
-   //
-   //
-   //normalise edgeweights by stddev of image ???????
-   float stdim=reg_tools_getSTDValue(refImage);
-
-   for(size_t i=0;i<node_number*6;i++){
-      edgeWeightMatrix[i]/=voxelBlockNumber;
-   }
-   for(size_t i=0;i<node_number*6;i++){
-      edgeWeightMatrix[i]=-exp(-edgeWeightMatrix[i]/(2.0f*stdim));
-   }
-   //DEBUG
-   //for(int i=0;i<num_vertices*6;i++){
-   //    edgeWeightMatrix[i]/=voxelBlockNumber;
-   //    }
-   //for(int i=0;i<num_vertices*6;i++){
-   //    edgeWeightMatrix[i]=-exp(-edgeWeightMatrix[i]/(2.0f*stdim));
-   //    }
-   //DEBUG
-   free(neighbourBlockValue);
-   free(refBlockValue);
-}
-/* *************************************************************** */
-template <class DataType>
-void GetGraph_core2D(nifti_image* controlPointGridImage,
-                     float* edgeWeightMatrix,
-                     int* index_neighbours,
-                     nifti_image *refImage,
-                     int *mask)
-{
-   NR_ERROR("Not yet implemented");
-}
-/* *************************************************************** */
-void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours)
-{
-   if(this->referenceImage->nz > 1) {
-      switch(this->referenceImage->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         GetGraph_core3D<float>
-               (this->controlPointImage,
-                edgeWeightMatrix,
-                index_neighbours,
-                this->referenceImage,
-                this->measure->GetReferenceMask()
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         GetGraph_core3D<double>
-               (this->controlPointImage,
-                edgeWeightMatrix,
-                index_neighbours,
-                this->referenceImage,
-                this->measure->GetReferenceMask()
-                );
-         break;
-      default:
-         NR_FATAL_ERROR("Unsupported datatype");
-      }
-   } else {
-      switch(this->referenceImage->datatype)
-      {
-      case NIFTI_TYPE_FLOAT32:
-         GetGraph_core2D<float>
-               (this->controlPointImage,
-                edgeWeightMatrix,
-                index_neighbours,
-                this->referenceImage,
-                this->measure->GetReferenceMask()
-                );
-         break;
-      case NIFTI_TYPE_FLOAT64:
-         GetGraph_core2D<double>
-               (this->controlPointImage,
-                edgeWeightMatrix,
-                index_neighbours,
-                this->referenceImage,
-                this->measure->GetReferenceMask()
-                );
-         break;
-      default:
-         NR_FATAL_ERROR("Unsupported datatype");
-      }
-   }
-}
-/* *************************************************************** */
-/*****************************************************/
-//CUT THE EDGES WITH HIGH COST = INTENSITY DIFFERENCES!
-/*****************************************************/
-void reg_mrf::GetPrimsMST(float *edgeWeightMatrix,
-                          int *index_neighbours, int num_vertices, int num_neighbours,bool norm)
-{
-   //size_t num_vertices = NiftiImage::calcVoxelNumber(controlPointGridImage, 3);
-
-   //DEBUG
-   //int blockSize[3]={
-   //    Ceil(controlPointImage->dx / referenceImage->dx),
-   //    Ceil(controlPointImage->dy / referenceImage->dy),
-   //    Ceil(controlPointImage->dz / referenceImage->dz),
-   //};
-   //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3);
-   //int m=referenceImage->nx;
-   //int n=referenceImage->ny;
-   //int o=referenceImage->nz;
-   //int grid_step = blockSize[0];
-   //int m1=m/grid_step;
-   //int n1=n/grid_step;
-   //int o1=o/grid_step;
-   //num_vertices = m1*n1*o1;
-   //DEBUG
-   int currentNode=0; //arbritary root node
-   //list of nodes already in MST
-   bool* addedToMST=new bool[num_vertices];
-   for(int i=0;i<num_vertices;i++){
-      addedToMST[i]=false;
-   }
-   addedToMST[currentNode]=true;
-   std::pair<short,int>* treeLevel=new std::pair<short,int>[num_vertices];
-   treeLevel[currentNode]=std::pair<short,int>(0,currentNode);
-
-   //int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4;
-
-   this->parentsList[currentNode]=-1; //root has no parent
-   std::priority_queue<Edge> priority; //priority queue - ordered list - high --- low
-   //Edge comparison - a edge is inferior if weight is bigger (cf. edge struct) ==> ordered from low to high weights
-
-   float mincost=0.0f;
-   //run n-1 times so that all nodes added
-   for(int i=0;i<num_vertices-1;i++){
-      //add edges of new node to priority queue
-      for(int j=0;j<num_neighbours;j++){
-         int index_j=index_neighbours[currentNode+j*num_vertices];
-         float weight=edgeWeightMatrix[currentNode+j*num_vertices];
-         //index_neighbours is initialized at -1
-         if(index_j>=0){
-            Edge current_edge = {weight,currentNode,index_j};
-            priority.push(current_edge);//weight - start index - end index
-         }
-
-      }
-      currentNode=-1;
-      while(currentNode==-1){
-         Edge bestEdge=priority.top();
-         priority.pop();
-         //test whether endIndex of edge is already in MST
-         if(addedToMST[bestEdge.startIndex] && !addedToMST[bestEdge.endIndex]){
-            if(norm) {
-                mincost+=-bestEdge.weight; //if normalization by -exp
-            } else {
-                mincost+=bestEdge.weight;
-            }
-            //
-            if(norm) {
-                this->edgeWeight[bestEdge.endIndex]=-bestEdge.weight;//if normalization by -exp
-            } else {
-                this->edgeWeight[bestEdge.endIndex]=bestEdge.weight;
-            }
-
-            currentNode=bestEdge.endIndex;
-            addedToMST[bestEdge.endIndex]=true;
-            this->parentsList[bestEdge.endIndex]=bestEdge.startIndex;
-            treeLevel[bestEdge.endIndex]=std::pair<short,int>(treeLevel[bestEdge.startIndex].first+1,bestEdge.endIndex);
-         }
-      }
-   }
-   //generate list of nodes ordered by tree depth
-   std::sort(treeLevel,treeLevel+num_vertices);
-   for(int i=0;i<num_vertices;i++){
-      orderedList[i]=treeLevel[i].second;
-   }
-   //Free memory
-   delete []treeLevel;
-   delete []addedToMST;
-}
-/*****************************************************/
-void reg_mrf::GetRegularisation()
-{
-   /* Incremental diffusion regularisation of parametrised transformation
-     using (globally optimal) belief-propagation on minimum spanning tree.
-     Fast distance transform uses squared differences.
-     Similarity cost for each node and label has to be given as input.
-    */
-
-   //buffer variable
-   float *cost1=new float[this->label_nD_num];
-   float *vals=new float[this->label_nD_num];
-   int *inds=new int[this->label_nD_num];
-
-
-   float* message=new float[this->node_number*this->label_nD_num];
-   //initialize the energy term with the data cost value
-   for(size_t i=0;i<this->node_number*this->label_nD_num;i++){
-      //matrix = discretisedValue (first dimension displacement label, second dim. control point)
-      this->regularised_cost[i]=this->discretised_measures[i];
-      message[i]=0;
-   }
-
-   for(int i=0;i<this->label_nD_num;i++){
-      cost1[i]=0;
-   }
-
-   //weight of the regularisation - constant weight
-   //float edgew=this->regularisation_weight + std::numeric_limits<float>::epsilon();
-   //float edgew1=1.0f/edgew;
-
-   //calculate mst-cost
-   for(int i=(this->node_number-1);i>0;i--){ //do for each control point
-      //retreive the child of the current node - start with the leave
-      int ochild=this->orderedList[i];//ordered list of all the nodes from root to leaves
-      //retreive the parent node of the child
-      int oparent=this->parentsList[ochild];
-      //retreive the weight of the edge between oparent and ochild
-      float edgew=this->edgeWeight[ochild];
-      float edgew1=1.0f/edgew;
-
-      for(int l=0;l<this->label_nD_num;l++){
-         //matrix = discretisedValue (first dimension displacement label, second dim. control point)
-         //weighted by the  edge weight
-         cost1[l]=this->regularised_cost[ochild*this->label_nD_num+l]*edgew;
-      }
-
-      //fast distance transform
-      //It is were the regularisation is calculated
-      dt3x(cost1,inds,this->label_1D_num,0,0,0);
-
-      //add mincost to parent node
-      for(int l=0;l<this->label_nD_num;l++){
-         message[ochild*this->label_nD_num+l]=cost1[l]*edgew1;
-         this->regularised_cost[oparent*this->label_nD_num+l]+=cost1[l]*edgew1;
-      }
-   }
-
-   //backwards pass mst-cost
-   for(size_t i=1;i<this->node_number;i++){ //other direction
-      int ochild=this->orderedList[i];
-      int oparent=this->parentsList[ochild];
-      //retreive the weight of the edge between oparent and ochild
-      float edgew=this->edgeWeight[ochild];
-      float edgew1=1.0f/edgew;
-
-      for(int l=0;l<this->label_nD_num;l++){
-         cost1[l]=(this->regularised_cost[oparent*this->label_nD_num+l]-message[ochild*this->label_nD_num+l]+message[oparent*this->label_nD_num+l])*edgew;
-      }
-
-      dt3x(cost1,inds,this->label_1D_num,0,0,0);
-      for(int l=0;l<this->label_nD_num;l++){
-         message[ochild*this->label_nD_num+l]=cost1[l]*edgew1;
-      }
-
-   }
-
-   for(size_t i=0;i<this->node_number*this->label_nD_num;i++){
-      this->regularised_cost[i]+=message[i];
-   }
-
-   delete []message;
-   delete []cost1;
-   delete []vals;
-   delete []inds;
-}
-/*****************************************************/
-/*****************************************************/
-//fast distance transform for message computation following Pedro Felzenszwalb's implementation
-//see http://cs.brown.edu/~pff/dt/index.html for details
-void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1){
-   float INF=1e10;
-   int j=0;
-   z[0]=-INF;
-   z[1]=INF;
-   v[0]=0;
-   for(int q=1;q<len;q++){
-      float s=((val[q*k]+q*q)-(val[v[j]*k]+v[j]*v[j]))/(2.0*(q-v[j]));
-
-      while(s<=z[j]){
-         j--;
-         s=((val[q*k]+q*q)-(val[v[j]*k]+v[j]*v[j]))/(2.0*(q-v[j]));
-      }
-
-      j++;
-      v[j]=q;
-      z[j]=s;
-      z[j+1]=INF;
-
-   }
-   for(int q=0;q<len;q++){
-      f[q]=val[q*k]; //needs to be added to fastDT2 otherwise incorrect
-      ind1[q]=ind[q*k];
-   }
-
-   j=0;
-   for(int q=0;q<len;q++){
-      while(z[j+1]<(q-offset)){  //was wrong -offset is now correct
-         j++;
-      }
-      ind[q*k]=ind1[v[j]];
-      val[q*k]=(q-offset-v[j])*(q-offset-v[j])+f[v[j]];
-   }
-}
-
-void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz){
-   //rl is length of one side
-   for(int i=0;i<rl*rl*rl;i++){
-      indr[i]=i;
-   }
-   //r contains D*(fp) = D(fp)+ Sum(Cc(fp))
-   int* v=new int[rl]; //slightly faster if not intitialised in each loop
-   float* z=new float[rl+1];
-   float* f=new float[rl];
-   int* i1=new int[rl];
-
-   //we calculate here the ||up-uq||^2 / ||xp - xq|| ->1st dim => up
-   for(int k=0;k<rl;k++){
-      for(int i=0;i<rl;i++){
-         dt1sq(r+i+k*rl*rl,indr+i+k*rl*rl,rl,-dx,rl,v,z,f,i1);
-      }
-   }
-   //we calculate here the ||up-uq||^2 / ||xp - xq|| ->2nd dim => vp
-   for(int k=0;k<rl;k++){
-      for(int j=0;j<rl;j++){
-         dt1sq(r+j*rl+k*rl*rl,indr+j*rl+k*rl*rl,rl,-dy,1,v,z,f,i1);//);
-      }
-   }
-   //we calculate here the ||up-uq||^2 / ||xp - xq|| ->3rd dim => wp
-   for(int j=0;j<rl;j++){
-      for(int i=0;i<rl;i++){
-         dt1sq(r+i+j*rl,indr+i+j*rl,rl,-dz,rl*rl,v,z,f,i1);//);
-      }
-   }
-   //calculate the min -- of r = Cp(fq) = D(fp)+ Sum(Cc(fp)) + \alpha R(fp,fq)
-   float min1=*std::min_element(r,r+rl*rl*rl);
-   for(int i=0;i<rl*rl*rl;i++){
-      r[i]-=min1;
-   }
-   delete []i1;
-   delete []f;
-
-   delete []v;
-   delete []z;
-}
diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h
deleted file mode 100644
index 9471d41a..00000000
--- a/reg-lib/cpu/_reg_mrf.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * @file _reg_mrf.h
- * @author Benoit Presles
- * @author Mattias Heinrich
- * @date 01/01/2016
- * @brief reg_mrf class for discrete optimisation
- *
- * Copyright (c) 2016-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_measure.h"
-#include "_reg_localTrans_regul.h"
-#include <cmath>
-#include <queue>
-#include <algorithm>
-#include "_reg_maths.h"
-
-struct Edge{
-   float weight;
-   int startIndex;
-   int endIndex;
-   friend bool operator<(Edge a,Edge b){
-      return a.weight>b.weight;
-      //return a.weight<b.weight;
-   }
-};
-
-class reg_mrf
-{
-public:
-    /// @brief Constructor
-   reg_mrf(int _discrete_radius,
-           int _discrete_increment,
-           float _reg_weight,
-           int _img_dim,
-           size_t _node_number);
-   /// @brief Constructor
-   reg_mrf(reg_measure *_measure,
-           nifti_image *_referenceImage,
-           nifti_image *_controlPointImage,
-           int discrete_radius,
-           int _discrete_increment,
-           float _reg_weight);
-   /// @brief Destructor
-   ~reg_mrf();
-   void Run();
-   //4 the tests
-   void GetDiscretisedMeasure();
-   float* GetDiscretisedMeasurePtr();
-   void SetDiscretisedMeasure(float* dm);
-   //
-   void GetRegularisation();
-   //
-   void GetOptimalLabel();
-   int* GetOptimalLabelPtr();
-   //
-   int* GetOrderedListPtr();
-   int* GetParentsListPtr();
-   float* GetEdgeWeightPtr();
-   //
-   void SetOrderedList(int* ol);
-   void SetParentsList(int* pl);
-   void SetEdgeWeight(float* ew);
-   //
-   void GetPrimsMST(float *, int *, int, int, bool);
-
-private:
-   void Initialise();
-   void UpdateNodePositions();
-   void GetGraph(float *, int *);
-
-   reg_measure *measure; ///< Measure of similarity object to use for the data term
-   nifti_image* referenceImage; ///< Reference image in which the transformation is parametrised
-   nifti_image* controlPointImage; ///< Control point image that contains the transformation to optimise
-   int discrete_radius; ///< Radius of the discretised grid
-   int discrete_increment; ///< Increment step size in the discretised grid
-   float regularisation_weight; ///< Weight given to the regularisation
-
-   int image_dim; ///< Dimension of the reference image
-   size_t node_number; ///< Number of nodes in the tree
-
-   float **discrete_values_mm; ///< All discretised values in millimetre
-
-   int* orderedList; ///< Ordered list of nodes from the root to the leaves
-   int* parentsList; ///< List that gives parent's index for each node
-   float* edgeWeight; ///< Weight of edge between two nodes
-
-   int label_1D_num; ///< Number of discretised values per axis
-   int label_nD_num; ///< Total number of discretised values
-
-   nifti_image *input_transformation;
-   float *discretised_measures; ///< All discretised measures of similarity
-   float* regularised_cost; ///< Discretised cost that embeds data term and regularisation cost
-   int* optimal_label_index; ///< Optimimal label index for each node
-
-   bool initialised; ///< Variable to access if the object has been initialised
-};
-/********************************************************************************************************/
-template <class DataType>
-void GetGraph_core3D(nifti_image* controlPointGridImage,
-                     float* edgeWeightMatrix,
-                     float* index_neighbours,
-                     nifti_image *refImage,
-                     int *mask);
-template <class DataType>
-void GetGraph_core2D(nifti_image* controlPointGridImage,
-                     float* edgeWeightMatrix,
-                     float* index_neighbours,
-                     nifti_image *refImage,
-                     int *mask);
-void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1);
-void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz);
-/********************************************************************************************************/
diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp
deleted file mode 100644
index 231a6797..00000000
--- a/reg-lib/cpu/_reg_polyAffine.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/**
- * @file _reg_polyAffine.cpp
- * @author Marc Modat
- * @date 16/11/2012
- *
- * Copyright (c) 2012-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_polyAffine.h"
-
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-reg_polyAffine<T>::reg_polyAffine(int refTimePoints,int floTimePoints)
-   : reg_base<T>::reg_base(refTimePoints,floTimePoints)
-{
-   this->executableName=(char *)"NiftyReg PolyAffine";
-   NR_FUNC_CALLED();
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-reg_polyAffine<T>::~reg_polyAffine()
-{
-   NR_FUNC_CALLED();
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::GetDeformationField()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::SetGradientImageToZero()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::GetApproximatedGradient()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-double reg_polyAffine<T>::GetObjectiveFunctionValue()
-{
-
-   return 0;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::UpdateParameters(float stepSize)
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-T reg_polyAffine<T>::NormaliseGradient()
-{
-   return 0;
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::GetSimilarityMeasureGradient()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::GetObjectiveFunctionGradient()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::DisplayCurrentLevelParameters()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::UpdateBestObjFunctionValue()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::PrintCurrentObjFunctionValue(T stepSize)
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::PrintInitialObjFunctionValue()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::AllocateTransformationGradient()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-template <class T>
-void reg_polyAffine<T>::DeallocateTransformationGradient()
-{
-
-}
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
-/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */
diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h
deleted file mode 100644
index 28a7f5ff..00000000
--- a/reg-lib/cpu/_reg_polyAffine.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * @file _reg_polyAffine.h
- * @author Marc Modat
- * @date 16/11/2012
- *
- * Copyright (c) 2012-2018, University College London
- * Copyright (c) 2018, NiftyReg Developers.
- * All rights reserved.
- * See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_base.h"
-
-template <class T>
-class reg_polyAffine : public reg_base<T>
-{
-protected:
-   void GetDeformationField();
-   void SetGradientImageToZero();
-   void GetApproximatedGradient();
-   double GetObjectiveFunctionValue();
-   void UpdateParameters(float);
-   T NormaliseGradient();
-   void GetSimilarityMeasureGradient();
-   void GetObjectiveFunctionGradient();
-   void DisplayCurrentLevelParameters();
-   void UpdateBestObjFunctionValue();
-   void PrintCurrentObjFunctionValue(T);
-   void PrintInitialObjFunctionValue();
-   void AllocateTransformationGradient();
-   void DeallocateTransformationGradient();
-
-public:
-   reg_polyAffine(int refTimePoints,int floTimePoints);
-   ~reg_polyAffine();
-};
-
-#include "_reg_polyAffine.cpp"
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 483d5911..0d9d1785 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -1131,7 +1131,6 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                     ASAt = A * S * reg_mat33_trans(A);
 
                     TmS = T - ASAt;
-                    //reg_mat33_disp(&TmS, "matTmS");
 
                     reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
 
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index b20f9581..b000fbd4 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -14,11 +14,6 @@
 
 // #define MRF_USE_SAD
 
-/* *************************************************************** */
-reg_ssd::reg_ssd(): reg_measure() {
-    memset(this->normaliseTimePoint, 0, 255 * sizeof(bool));
-    NR_FUNC_CALLED();
-}
 /* *************************************************************** */
 void reg_ssd::InitialiseMeasure(nifti_image *refImg,
                                 nifti_image *floImg,
@@ -338,247 +333,6 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                                    nifti_image *refImage,
                                    nifti_image *warImage,
                                    int *mask) {
-    int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex;
-    size_t voxIndex, voxIndex_t;
-    int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1;
-    int label_2D_number = label_1D_number * label_1D_number;
-    int label_nD_number = label_2D_number * label_1D_number;
-    //output matrix = discretisedValue (first dimension displacement label, second dim. control point)
-    float gridVox[3], imageVox[3];
-    float currentValue;
-    // Define the transformation matrices
-    mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz;
-    if (controlPointGridImage->sform_code > 0)
-        grid_vox2mm = &controlPointGridImage->sto_xyz;
-    mat44 *image_mm2vox = &refImage->qto_ijk;
-    if (refImage->sform_code > 0)
-        image_mm2vox = &refImage->sto_ijk;
-    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
-
-    // Compute the block size
-    int blockSize[3] = {
-        Ceil(controlPointGridImage->dx / refImage->dx),
-        Ceil(controlPointGridImage->dy / refImage->dy),
-        Ceil(controlPointGridImage->dz / refImage->dz),
-    };
-    int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
-    int currentControlPoint = 0;
-
-    // Allocate some static memory
-    float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float));
-
-    // Pointers to the input image
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3);
-    DataType *refImgPtr = static_cast<DataType*>(refImage->data);
-    DataType *warImgPtr = static_cast<DataType*>(warImage->data);
-
-    // Create a padded version of the warped image to avoid boundary condition check
-    int warPaddedOffset[3] = {
-        discretiseRadius + blockSize[0],
-        discretiseRadius + blockSize[1],
-        discretiseRadius + blockSize[2],
-    };
-    int warPaddedDim[4] = {
-        warImage->nx + 2 * warPaddedOffset[0] + blockSize[0],
-        warImage->ny + 2 * warPaddedOffset[1] + blockSize[1],
-        warImage->nz + 2 * warPaddedOffset[2] + blockSize[2],
-        warImage->nt
-    };
-
-    DataType padding_value = 0;
-
-    size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * warPaddedDim[1] * warPaddedDim[2];
-    DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType));
-    for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex)
-        paddedWarImgPtr[voxIndex] = padding_value;
-    voxIndex = 0;
-    voxIndex_t = 0;
-    for (t = 0; t < warImage->nt; ++t) {
-        for (z = warPaddedOffset[2]; z < warPaddedDim[2] - warPaddedOffset[2] - blockSize[2]; ++z) {
-            for (y = warPaddedOffset[1]; y < warPaddedDim[1] - warPaddedOffset[1] - blockSize[1]; ++y) {
-                voxIndex = t * warPaddedVoxelNumber + (z * warPaddedDim[1] + y) * warPaddedDim[0] + warPaddedOffset[0];
-                for (x = warPaddedOffset[0]; x < warPaddedDim[0] - warPaddedOffset[0] - blockSize[0]; ++x) {
-                    paddedWarImgPtr[voxIndex] = warImgPtr[voxIndex_t];
-                    ++voxIndex;
-                    ++voxIndex_t;
-                }
-            }
-        }
-    }
-
-    int definedValueNumber;
-
-    // Loop over all control points
-    for (cpz = 1; cpz < controlPointGridImage->nz - 1; ++cpz) {
-        gridVox[2] = cpz;
-        for (cpy = 1; cpy < controlPointGridImage->ny - 1; ++cpy) {
-            gridVox[1] = cpy;
-            currentControlPoint = (cpz * controlPointGridImage->ny + cpy) * controlPointGridImage->nx + 1;
-            for (cpx = 1; cpx < controlPointGridImage->nx - 1; ++cpx) {
-                gridVox[0] = cpx;
-                // Compute the corresponding image voxel position
-                reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
-                imageVox[0] = Round(imageVox[0]);
-                imageVox[1] = Round(imageVox[1]);
-                imageVox[2] = Round(imageVox[2]);
-
-                // Extract the block in the reference image
-                blockIndex = 0;
-                definedValueNumber = 0;
-                for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) {
-                    for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) {
-                        for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) {
-                            if (x > -1 && x<refImage->nx && y>-1 && y<refImage->ny && z>-1 && z < refImage->nz) {
-                                voxIndex = (z * refImage->ny + y) * refImage->nx + x;
-                                if (mask[voxIndex] > -1) {
-                                    for (t = 0; t < refImage->nt; ++t) {
-                                        voxIndex_t = t * voxelNumber + voxIndex;
-                                        refBlockValue[blockIndex] = refImgPtr[voxIndex_t];
-                                        if (refBlockValue[blockIndex] == refBlockValue[blockIndex])
-                                            ++definedValueNumber;
-                                        blockIndex++;
-                                    } //t
-                                } else {
-                                    for (t = 0; t < refImage->nt; ++t) {
-                                        refBlockValue[blockIndex] = padding_value;
-                                        blockIndex++;
-                                    } // t
-                                }
-                            } else {
-                                for (t = 0; t < refImage->nt; ++t) {
-                                    refBlockValue[blockIndex] = padding_value;
-                                    blockIndex++;
-                                } // t
-                            } // mask
-                        } // x
-                    } // y
-                } // z
-                // Loop over the discretised value
-                if (definedValueNumber > 0) {
-
-                    DataType warpedValue;
-                    int paddedImageVox[3] = {
-                        static_cast<int>(imageVox[0] + warPaddedOffset[0]),
-                        static_cast<int>(imageVox[1] + warPaddedOffset[1]),
-                        static_cast<int>(imageVox[2] + warPaddedOffset[2])
-                    };
-                    int cc;
-                    double currentSum;
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-    shared(label_1D_number, label_2D_number, label_nD_number, discretiseStep, discretiseRadius, \
-    paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \
-    discretisedValue, currentControlPoint, voxelBlockNumber) \
-    private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \
-    currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum)
-#endif
-                    for (cc = 0; cc < label_1D_number; ++cc) {
-                        discretisedIndex = cc * label_2D_number;
-                        c = paddedImageVox[2] - discretiseRadius + cc * discretiseStep;
-                        for (b = paddedImageVox[1] - discretiseRadius; b <= paddedImageVox[1] + discretiseRadius; b += discretiseStep) {
-                            for (a = paddedImageVox[0] - discretiseRadius; a <= paddedImageVox[0] + discretiseRadius; a += discretiseStep) {
-
-                                blockIndex = 0;
-                                currentSum = 0.;
-                                definedValueNumber = 0;
-
-                                for (z = c - blockSize[2] / 2; z < c + blockSize[2] / 2; ++z) {
-                                    for (y = b - blockSize[1] / 2; y < b + blockSize[1] / 2; ++y) {
-                                        for (x = a - blockSize[0] / 2; x < a + blockSize[0] / 2; ++x) {
-                                            voxIndex = (z * warPaddedDim[1] + y) * warPaddedDim[0] + x;
-                                            for (t = 0; t < warPaddedDim[3]; ++t) {
-                                                voxIndex_t = t * warPaddedVoxelNumber + voxIndex;
-                                                warpedValue = paddedWarImgPtr[voxIndex_t];
-#ifdef MRF_USE_SAD
-                                                currentValue = fabs(warpedValue - refBlockValue[blockIndex]);
-#else
-                                                currentValue = Square(warpedValue - refBlockValue[blockIndex]);
-#endif
-                                                if (currentValue == currentValue) {
-                                                    currentSum -= currentValue;
-                                                    ++definedValueNumber;
-                                                }
-                                                blockIndex++;
-                                            }
-                                        } // x
-                                    } // y
-                                } // z
-                                discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] =
-                                    currentSum / static_cast<float>(definedValueNumber);
-                                ++discretisedIndex;
-                            } // a
-                        } // b
-                    } // cc
-                } // defined value in the reference block
-                ++currentControlPoint;
-            } // cpx
-        } // cpy
-    } // cpz
-    free(paddedWarImgPtr);
-    free(refBlockValue);
-    // Deal with the labels that contains NaN values
-    for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) {
-        int definedValueNumber = 0;
-        float *discretisedValuePtr = &discretisedValue[node * label_nD_number];
-        float meanValue = 0;
-        for (int label = 0; label < label_nD_number; ++label) {
-            if (discretisedValuePtr[label] == discretisedValuePtr[label]) {
-                ++definedValueNumber;
-                meanValue += discretisedValuePtr[label];
-            }
-        }
-        if (definedValueNumber == 0) {
-            for (int label = 0; label < label_nD_number; ++label) {
-                discretisedValuePtr[label] = 0;
-            }
-        } else if (definedValueNumber < label_nD_number) {
-            // Needs to be altered for efficiency
-            int label = 0;
-            // Loop over all labels
-            int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2;
-            float min_distance, current_distance;
-            for (label_z = 0; label_z < label_1D_number; ++label_z) {
-                for (label_y = 0; label_y < label_1D_number; ++label_y) {
-                    for (label_x = 0; label_x < label_1D_number; ++label_x) {
-                        // check if the current label is defined
-                        if (discretisedValuePtr[label] != discretisedValuePtr[label]) {
-                            label2 = 0;
-                            min_distance = std::numeric_limits<float>::max();
-                            // Loop again over all label to detect the defined values
-                            for (label2_z = 0; label2_z < label_1D_number; ++label2_z) {
-                                for (label2_y = 0; label2_y < label_1D_number; ++label2_y) {
-                                    for (label2_x = 0; label2_x < label_1D_number; ++label2_x) {
-                                        // Check if the value is defined
-                                        if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) {
-                                            // compute the distance between label and label2
-                                            current_distance = Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z);
-                                            if (current_distance < min_distance) {
-                                                min_distance = current_distance;
-                                                discretisedValuePtr[label] = discretisedValuePtr[label2];
-                                            }
-                                        } // Check if label2 is defined
-                                        ++label2;
-                                    } // x
-                                } // y
-                            } // z
-                        } // check if undefined label
-                        ++label;
-                    } //x
-                } // y
-            } // z
-
-        } // node with undefined label
-    } // node
-}
-/* *************************************************************** */
-template <class DataType>
-void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage,
-                                     float *discretisedValue,
-                                     int discretiseRadius,
-                                     int discretiseStep,
-                                     nifti_image *refImage,
-                                     nifti_image *warImage,
-                                     int *mask) {
     int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex;
     size_t voxIndex, voxIndex_t;
     const int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1;
@@ -817,13 +571,13 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage,
     std::visit([&](auto&& refImgDataType) {
         using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
         if (referenceImage->nz > 1) {
-            GetDiscretisedValueSSD_core3D_2<RefImgDataType>(controlPointGridImage,
-                                                            discretisedValue,
-                                                            discretiseRadius,
-                                                            discretiseStep,
-                                                            this->referenceImage,
-                                                            this->warpedImage,
-                                                            this->referenceMask);
+            GetDiscretisedValueSSD_core3D<RefImgDataType>(controlPointGridImage,
+                                                          discretisedValue,
+                                                          discretiseRadius,
+                                                          discretiseStep,
+                                                          this->referenceImage,
+                                                          this->warpedImage,
+                                                          this->referenceMask);
         } else {
             NR_FATAL_ERROR("Not implemented in 2D yet");
         }
diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h
index b05eded2..6a4ae0c1 100755
--- a/reg-lib/cpu/_reg_ssd.h
+++ b/reg-lib/cpu/_reg_ssd.h
@@ -21,7 +21,7 @@
 class reg_ssd: public reg_measure {
 public:
     /// @brief reg_ssd class constructor
-    reg_ssd();
+    reg_ssd() { NR_FUNC_CALLED(); }
     /// @brief reg_ssd class destructor
     virtual ~reg_ssd() {}
 
@@ -54,7 +54,7 @@ class reg_ssd: public reg_measure {
                                      int discretiseStep) override;
 
 protected:
-    bool normaliseTimePoint[255];
+    bool normaliseTimePoint[255]{};
 };
 /* *************************************************************** */
 /** @brief Computes and returns the SSD between two input images
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp
deleted file mode 100644
index 186349a2..00000000
--- a/reg-lib/cpu/_reg_thinPlateSpline.cpp
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- *  _reg_thinPlateSpline.cpp
- *
- *
- *  Created by Marc Modat on 22/02/2011.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#include "_reg_thinPlateSpline.h"
-
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_tps<T>::reg_tps(size_t d, size_t n)
-{
-   this->dim=d;
-   this->number=n;
-   this->positionX=(T*)calloc(this->number,sizeof(T));
-   this->positionY=(T*)calloc(this->number,sizeof(T));
-   this->coefficientX=(T*)calloc(this->number+this->dim+1,sizeof(T));
-   this->coefficientY=(T*)calloc(this->number+this->dim+1,sizeof(T));
-   if(this->dim==3)
-   {
-      this->positionZ=(T*)calloc(this->number,sizeof(T));
-      this->coefficientZ=(T*)calloc(this->number+this->dim+1,sizeof(T));
-   }
-   else
-   {
-      this->positionZ=nullptr;
-      this->coefficientZ=nullptr;
-   }
-   this->initialised=false;
-   this->approxInter=0.;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-reg_tps<T>::~reg_tps()
-{
-   if(this->positionX!=nullptr) free(this->positionX);
-   this->positionX=nullptr;
-   if(this->positionY!=nullptr) free(this->positionY);
-   this->positionY=nullptr;
-   if(this->positionZ!=nullptr) free(this->positionZ);
-   this->positionZ=nullptr;
-   if(this->coefficientX!=nullptr) free(this->coefficientX);
-   this->coefficientX=nullptr;
-   if(this->coefficientY!=nullptr) free(this->coefficientY);
-   this->coefficientY=nullptr;
-   if(this->coefficientZ!=nullptr) free(this->coefficientZ);
-   this->coefficientZ=nullptr;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_tps<T>::SetPosition(T *px, T *py, T *pz, T *cx,T *cy, T *cz)
-{
-   memcpy(this->positionX,px,this->number*sizeof(T));
-   memcpy(this->positionY,py,this->number*sizeof(T));
-   memcpy(this->positionZ,pz,this->number*sizeof(T));
-   memcpy(this->coefficientX,cx,this->number*sizeof(T));
-   memcpy(this->coefficientY,cy,this->number*sizeof(T));
-   memcpy(this->coefficientZ,cz,this->number*sizeof(T));
-   for(size_t i=this->number; i<this->number+this->dim+1; ++i)
-   {
-      this->coefficientX[i]=0;
-      this->coefficientY[i]=0;
-      this->coefficientZ[i]=0;
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_tps<T>::SetPosition(T *px, T *py, T *cx,T *cy)
-{
-   memcpy(this->positionX,px,this->number*sizeof(T));
-   memcpy(this->positionY,py,this->number*sizeof(T));
-   memcpy(this->coefficientX,cx,this->number*sizeof(T));
-   memcpy(this->coefficientY,cy,this->number*sizeof(T));
-   for(size_t i=this->number; i<this->number+this->dim+1; ++i)
-   {
-      this->coefficientX[i]=0;
-      this->coefficientY[i]=0;
-   }
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_tps<T>::SetAproxInter(T v)
-{
-   this->approxInter=v;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_tps<T>::GetTPSEuclideanDistance(size_t i, size_t j)
-{
-   T temp = this->positionX[i] - this->positionX[j];
-   T dist = temp*temp;
-   temp = this->positionY[i] - this->positionY[j];
-   dist += temp*temp;
-   if(this->dim==3)
-   {
-      temp = this->positionZ[i] - this->positionZ[j];
-      dist += temp*temp;
-   }
-   return sqrt(dist);
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_tps<T>::GetTPSEuclideanDistance(size_t i, T *p)
-{
-   T temp = this->positionX[i] - p[0];
-   T dist = temp*temp;
-   temp = this->positionY[i] - p[1];
-   dist += temp*temp;
-   if(this->dim==3)
-   {
-      temp = this->positionZ[i] - p[2];
-      dist += temp*temp;
-   }
-   return sqrt(dist);
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-T reg_tps<T>::GetTPSweight(T dist)
-{
-   if(dist==0)
-      return EXIT_SUCCESS;
-   return dist*dist*log(dist);
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_tps<T>::InitialiseTPS()
-{
-   const size_t matrixSide=this->number + this->dim + 1;
-   T *matrixL=(T*)calloc(matrixSide*matrixSide,sizeof(T));
-   if(matrixL==nullptr)
-      NR_FATAL_ERROR("Calloc failed, the TPS distance matrix is too large! Size should be " +
-                     std::to_string(matrixSide * matrixSide * sizeof(T) / 1000000000.f) + " GB (" +
-                     std::to_string(matrixSide) + " x " + std::to_string(matrixSide) + ")");
-
-   // Distance matrix is computed
-   double a=0.;
-   for(size_t i=0; i<this->number; ++i)
-   {
-      for(size_t j=i+1; j<this->number; ++j)
-      {
-         T distance = this->GetTPSEuclideanDistance(i,j);
-         a += distance * 2.;
-         distance = this->GetTPSweight(distance);
-         matrixL[i*matrixSide+j]=matrixL[j*matrixSide+i]=distance;
-      }
-   }
-   a/=(double)(this->number*this->number);
-   a=(double)this->approxInter*a*a;
-   for(size_t i=0; i<this->number; ++i)
-   {
-      matrixL[i*matrixSide+i]=a;
-   }
-   for(size_t i=0; i<this->number; ++i)
-   {
-      matrixL[i*matrixSide+this->number]=matrixL[(this->number)*matrixSide+i]=1;
-      matrixL[i*matrixSide+this->number+1]=matrixL[(this->number+1)*matrixSide+i]=this->positionX[i];
-      matrixL[i*matrixSide+this->number+2]=matrixL[(this->number+2)*matrixSide+i]=this->positionY[i];
-      if(this->dim==3)
-         matrixL[i*matrixSide+this->number+3]=matrixL[(this->number+3)*matrixSide+i]=this->positionZ[i];
-
-   }
-   for(size_t i=this->number; i<matrixSide; ++i)
-   {
-      for(size_t j=this->number; j<matrixSide; ++j)
-      {
-         matrixL[i*matrixSide+j]=0;
-      }
-   }
-
-   // Run the LU decomposition
-   size_t *index=(size_t *)calloc(matrixSide,sizeof(size_t));
-   reg_LUdecomposition<T>(matrixL, matrixSide, index);
-
-   // Perform the multiplications
-   reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientX);
-   reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientY);
-   if(this->dim==3)
-   {
-      reg_matrixInvertMultiply<T>(matrixL, matrixSide, index, this->coefficientZ);
-   }
-
-   free(index);
-   free(matrixL);
-   this->initialised=true;
-   return;
-}
-/* *************************************************************** */
-/* *************************************************************** */
-template <class T>
-void reg_tps<T>::FillDeformationField(nifti_image *deformationField)
-{
-   if(this->initialised==false)
-      this->InitialiseTPS();
-
-   const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-   T *defX=static_cast<T *>(deformationField->data);
-   T *defY=&defX[voxelNumber];
-   T *defZ=nullptr;
-   if(this->dim==3)
-      defZ=&defY[voxelNumber];
-
-   mat44 *voxel2realDF=nullptr;
-   if(deformationField->sform_code>0)
-      voxel2realDF=&(deformationField->sto_xyz);
-   else voxel2realDF=&(deformationField->qto_xyz);
-
-   T position[3];
-
-   int index=0;
-   for(int z=0; z<deformationField->nz; ++z)
-   {
-      for(int y=0; y<deformationField->ny; ++y)
-      {
-         for(int x=0; x<deformationField->nx; ++x)
-         {
-
-            // Compute the voxel position in mm
-            position[0]=x * voxel2realDF->m[0][0] +
-                        y * voxel2realDF->m[0][1] +
-                        z * voxel2realDF->m[0][2] +
-                        voxel2realDF->m[0][3];
-            position[1]=x * voxel2realDF->m[1][0] +
-                        y * voxel2realDF->m[1][1] +
-                        z * voxel2realDF->m[1][2] +
-                        voxel2realDF->m[1][3];
-            position[2]=x * voxel2realDF->m[2][0] +
-                        y * voxel2realDF->m[2][1] +
-                        z * voxel2realDF->m[2][2] +
-                        voxel2realDF->m[2][3];
-
-            T finalPositionX=0;
-            T finalPositionY=0;
-            T finalPositionZ=0;
-            if(this->dim==3)
-            {
-               finalPositionX=this->coefficientX[this->number]+
-                              this->coefficientX[this->number+1]*position[0]+
-                              this->coefficientX[this->number+2]*position[1]+
-                              this->coefficientX[this->number+3]*position[2];
-
-               finalPositionY=this->coefficientY[this->number]+
-                              this->coefficientY[this->number+1]*position[0]+
-                              this->coefficientY[this->number+2]*position[1]+
-                              this->coefficientY[this->number+3]*position[2];
-
-               finalPositionZ=this->coefficientZ[this->number]+
-                              this->coefficientZ[this->number+1]*position[0]+
-                              this->coefficientZ[this->number+2]*position[1]+
-                              this->coefficientZ[this->number+3]*position[2];
-            }
-            else
-            {
-               finalPositionX=this->coefficientX[this->number] +
-                              this->coefficientX[this->number+1]*position[0]+
-                              this->coefficientX[this->number+2]*position[1];
-
-               finalPositionY=this->coefficientY[this->number] +
-                              this->coefficientY[this->number+1]*position[0]+
-                              this->coefficientY[this->number+2]*position[1];
-            }
-
-            // Compute the displacement
-            for(size_t i=0; i<this->number; ++i)
-            {
-               T distance=GetTPSweight(GetTPSEuclideanDistance(i,position));
-               finalPositionX += this->coefficientX[i]*distance;
-               finalPositionY += this->coefficientY[i]*distance;
-               if(this->dim==3)
-                  finalPositionZ += this->coefficientZ[i]*distance;
-            }
-            defX[index]=finalPositionX+position[0];
-            defY[index]=finalPositionY+position[1];
-            if(this->dim==3)
-               defZ[index]=finalPositionZ+position[2];
-            index++;
-         }
-      }
-   }
-
-}
-/* *************************************************************** */
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_thinPlateSpline.h b/reg-lib/cpu/_reg_thinPlateSpline.h
deleted file mode 100644
index e06a4dbb..00000000
--- a/reg-lib/cpu/_reg_thinPlateSpline.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  _reg_thinPlateSpline.h
- *
- *
- *  Created by Marc Modat on 22/02/2011.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "_reg_maths.h"
-
-/* *************************************************************** */
-template <class T>
-class reg_tps
-{
-protected:
-   T *positionX;
-   T *positionY;
-   T *positionZ;
-   T *coefficientX;
-   T *coefficientY;
-   T *coefficientZ;
-   size_t dim;
-   size_t number;
-   bool initialised;
-   T approxInter;
-
-   T GetTPSEuclideanDistance(size_t i, size_t j);
-   T GetTPSEuclideanDistance(size_t i, T *p);
-   T GetTPSweight(T dist);
-
-public:
-   reg_tps(size_t d,size_t n);
-   ~reg_tps();
-   void SetPosition(T*,T*,T*,T*,T*,T*);
-   void SetPosition(T*,T*,T*,T*);
-   void SetAproxInter(T);
-
-   void InitialiseTPS();
-   void FillDeformationField(nifti_image *deformationField);
-};
-
-
-#include "_reg_thinPlateSpline.cpp"
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index b7f20f45..017d6029 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -2027,85 +2027,6 @@ float reg_tools_getSTDValue(const nifti_image *image) {
     }
 }
 /* *************************************************************** */
-template <class DataType>
-void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) {
-    // Allocate the outputArray if it is not allocated yet
-    if (*outputArray == nullptr)
-        *outputArray = malloc(NiftiImage::calcVoxelNumber(image, 7) * sizeof(DataType));
-
-    // Parse the cmd to check which axis have to be flipped
-    const char *axisName = "x\0y\0z\0t\0u\0v\0w\0";
-    int increment[7] = { 1, 1, 1, 1, 1, 1, 1 };
-    int start[7] = { 0, 0, 0, 0, 0, 0, 0 };
-    const int end[7] = { image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw };
-    for (int i = 0; i < 7; ++i) {
-        if (cmd.find(axisName[i * 2]) != std::string::npos) {
-            increment[i] = -1;
-            start[i] = end[i] - 1;
-        }
-    }
-
-    // Define the reading and writing pointers
-    const DataType *inputPtr = static_cast<const DataType*>(image->data);
-    DataType *outputPtr = static_cast<DataType*>(*outputArray);
-
-    // Copy the data and flip axis if required
-    for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) {
-        size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv;
-        for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) {
-            size_t index_v = index_w + v2 * image->nx * image->ny * image->nz * image->nt * image->nu;
-            for (int u = 0, u2 = start[4]; u < image->nu; ++u, u2 += increment[4]) {
-                size_t index_u = index_v + u2 * image->nx * image->ny * image->nz * image->nt;
-                for (int t = 0, t2 = start[3]; t < image->nt; ++t, t2 += increment[3]) {
-                    size_t index_t = index_u + t2 * image->nx * image->ny * image->nz;
-                    for (int z = 0, z2 = start[2]; z < image->nz; ++z, z2 += increment[2]) {
-                        size_t index_z = index_t + z2 * image->nx * image->ny;
-                        for (int y = 0, y2 = start[1]; y < image->ny; ++y, y2 += increment[1]) {
-                            size_t index_y = index_z + y2 * image->nx;
-                            for (int x = 0, x2 = start[0]; x < image->nx; ++x, x2 += increment[0]) {
-                                size_t index = index_y + x2;
-                                *outputPtr++ = inputPtr[index];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-/* *************************************************************** */
-void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) {
-    // Check the image data type
-    switch (image->datatype) {
-    case NIFTI_TYPE_UINT8:
-        reg_flipAxis<unsigned char>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_INT8:
-        reg_flipAxis<char>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_UINT16:
-        reg_flipAxis<unsigned short>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_INT16:
-        reg_flipAxis<short>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_UINT32:
-        reg_flipAxis<unsigned>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_INT32:
-        reg_flipAxis<int>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_FLOAT32:
-        reg_flipAxis<float>(image, outputArray, cmd);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_flipAxis<double>(image, outputArray, cmd);
-        break;
-    default:
-        NR_FATAL_ERROR("The image data type is not supported");
-    }
-}
-/* *************************************************************** */
 template<class DataType>
 void reg_getDisplacementFromDeformation_2D(nifti_image *field) {
     DataType *ptrX = static_cast<DataType*>(field->data);
@@ -2385,113 +2306,6 @@ void reg_setGradientToZero(nifti_image *image,
     }
 }
 /* *************************************************************** */
-template <class DataType>
-double reg_test_compare_arrays(const DataType *ptrA,
-                               const DataType *ptrB,
-                               size_t nvox) {
-    double maxDifference = 0;
-
-    for (size_t i = 0; i < nvox; ++i) {
-        const double valA = (double)ptrA[i];
-        const double valB = (double)ptrB[i];
-        if (valA != valA || valB != valB) {
-            if (valA == valA || valB == valB) {
-                NR_WARN_WFCT("Unexpected NaN in only one of the array");
-                return std::numeric_limits<float>::max();
-            }
-        } else {
-            if (valA != 0 && valB != 0) {
-                double diffRatio = valA / valB;
-                if (diffRatio < 0) {
-                    diffRatio = std::abs(valA - valB);
-                    maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
-                }
-                diffRatio -= 1.0;
-                maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
-            } else {
-                double diffRatio = std::abs(valA - valB);
-                maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio;
-            }
-        }
-    }
-    return maxDifference;
-}
-template double reg_test_compare_arrays<float>(const float*, const float*, size_t);
-template double reg_test_compare_arrays<double>(const double*, const double*, size_t);
-/* *************************************************************** */
-template <class DataType>
-double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) {
-    const DataType *imgAPtr = static_cast<DataType*>(imgA->data);
-    const DataType *imgBPtr = static_cast<DataType*>(imgB->data);
-    return reg_test_compare_arrays<DataType>(imgAPtr, imgBPtr, imgA->nvox);
-}
-/* *************************************************************** */
-double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) {
-    if (imgA->datatype != imgB->datatype)
-        NR_FATAL_ERROR("Input images have different datatype");
-    if (imgA->nvox != imgB->nvox)
-        NR_FATAL_ERROR("Input images have different size");
-    switch (imgA->datatype) {
-    case NIFTI_TYPE_UINT8:
-        return reg_test_compare_images<unsigned char>(imgA, imgB);
-    case NIFTI_TYPE_UINT16:
-        return reg_test_compare_images<unsigned short>(imgA, imgB);
-    case NIFTI_TYPE_UINT32:
-        return reg_test_compare_images<unsigned>(imgA, imgB);
-    case NIFTI_TYPE_INT8:
-        return reg_test_compare_images<char>(imgA, imgB);
-    case NIFTI_TYPE_INT16:
-        return reg_test_compare_images<short>(imgA, imgB);
-    case NIFTI_TYPE_INT32:
-        return reg_test_compare_images<int>(imgA, imgB);
-    case NIFTI_TYPE_FLOAT32:
-        return reg_test_compare_images<float>(imgA, imgB);
-    case NIFTI_TYPE_FLOAT64:
-        return reg_test_compare_images<double>(imgA, imgB);
-    default:
-        NR_FATAL_ERROR("Unsupported data type");
-        return 0;
-    }
-}
-/* *************************************************************** */
-template <class DataType>
-void reg_tools_abs_image(nifti_image *img) {
-    DataType *ptr = static_cast<DataType*>(img->data);
-    for (size_t i = 0; i < img->nvox; ++i)
-        ptr[i] = static_cast<DataType>(fabs(static_cast<double>(ptr[i])));
-}
-/* *************************************************************** */
-void reg_tools_abs_image(nifti_image *img) {
-    switch (img->datatype) {
-    case NIFTI_TYPE_UINT8:
-        reg_tools_abs_image<unsigned char>(img);
-        break;
-    case NIFTI_TYPE_UINT16:
-        reg_tools_abs_image<unsigned short>(img);
-        break;
-    case NIFTI_TYPE_UINT32:
-        reg_tools_abs_image<unsigned>(img);
-        break;
-    case NIFTI_TYPE_INT8:
-        reg_tools_abs_image<char>(img);
-        break;
-    case NIFTI_TYPE_INT16:
-        reg_tools_abs_image<short>(img);
-        break;
-    case NIFTI_TYPE_INT32:
-        reg_tools_abs_image<int>(img);
-        break;
-    case NIFTI_TYPE_FLOAT32:
-        reg_tools_abs_image<float>(img);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        reg_tools_abs_image<double>(img);
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported data type");
-    }
-}
-/* *************************************************************** */
 void mat44ToCptr(const mat44& mat, float *cMat) {
     for (int i = 0; i < 4; i++) {
         for (int j = 0; j < 4; j++) {
@@ -2500,62 +2314,6 @@ void mat44ToCptr(const mat44& mat, float *cMat) {
     }
 }
 /* *************************************************************** */
-void cPtrToMat44(mat44 *mat, const float *cMat) {
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            mat->m[i][j] = cMat[i * 4 + j];
-        }
-    }
-}
-/* *************************************************************** */
-void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats) {
-    for (size_t k = 0; k < numMats; k++) {
-        for (int i = 0; i < 3; i++) {
-            for (int j = 0; j < 3; j++) {
-                cMat[9 * k + i * 3 + j] = mat[k].m[i][j];
-            }
-        }
-    }
-}
-/* *************************************************************** */
-void cPtrToMat33(mat33 *mat, const float *cMat) {
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            mat->m[i][j] = cMat[i * 3 + j];
-        }
-    }
-}
-/* *************************************************************** */
-template<typename T>
-void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n) {
-    for (unsigned i = 0; i < m; i++) {
-        for (unsigned j = 0; j < n; j++) {
-            cMat[i * n + j] = mat[i][j];
-        }
-    }
-}
-template void matmnToCptr<float>(const float**, float*, unsigned, unsigned);
-template void matmnToCptr<double>(const double**, double*, unsigned, unsigned);
-/* *************************************************************** */
-template<typename T>
-void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n) {
-    for (unsigned i = 0; i < m; i++) {
-        for (unsigned j = 0; j < n; j++) {
-            mat[i][j] = cMat[i * n + j];
-        }
-    }
-}
-template void cPtrToMatmn<float>(float**, const float*, unsigned, unsigned);
-template void cPtrToMatmn<double>(double**, const double*, unsigned, unsigned);
-/* *************************************************************** */
-void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) {
-    x = index % (maxValue_x + 1);
-    index /= (maxValue_x + 1);
-    y = index % (maxValue_y + 1);
-    index /= (maxValue_y + 1);
-    z = index;
-}
-/* *************************************************************** */
 nifti_image* nifti_dup(const nifti_image& image, const bool copyData) {
     nifti_image *newImage = nifti_copy_nim_info(&image);
     newImage->data = calloc(image.nvox, image.nbyper);
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 81c9e633..650e6c71 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -343,17 +343,6 @@ void reg_thresholdImage(nifti_image *image,
                         T lowThr,
                         T upThr);
 /* *************************************************************** */
-/** @brief This function flip the specified axis
- * @param image Input image to be flipped
- * @param array Array that will contain the flipped
- * input image->data array
- * @param cmd String that contains the letter(s) of the axis
- * to flip (xyztuvw)
- */
-void reg_flipAxis(const nifti_image *image,
-                  void **outputArray,
-                  const std::string& cmd);
-/* *************************************************************** */
 /** @brief This function converts an image containing deformation
  * field into a displacement field
  * The conversion is done using the appropriate qform/sform
@@ -381,43 +370,8 @@ void reg_setGradientToZero(nifti_image *image,
                            bool yAxis,
                            bool zAxis);
 /* *************************************************************** */
-/* *************************************************************** */
-/** @brief The functions returns the largest ratio between two arrays
- * The returned value is the largest value computed as ((A/B)-1)
- * If A or B are zeros then the (A-B) value is returned.
- */
-template<class DataType>
-double reg_test_compare_arrays(const DataType *ptrA,
-                               const DataType *ptrB,
-                               size_t nvox);
-/* *************************************************************** */
-/** @brief The functions returns the largest ratio between input image intensities
- * The returned value is the largest value computed as ((A/B)-1)
- * If A or B are zeros then the (A-B) value is returned.
- */
-double reg_test_compare_images(const nifti_image *imgA,
-                               const nifti_image *imgB);
-/* *************************************************************** */
-/** @brief The absolute operator is applied to the input image
- */
-void reg_tools_abs_image(nifti_image *img);
-/* *************************************************************** */
 void mat44ToCptr(const mat44& mat, float *cMat);
 /* *************************************************************** */
-void cPtrToMat44(mat44 *mat, const float *cMat);
-/* *************************************************************** */
-void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats);
-/* *************************************************************** */
-void cPtrToMat33(mat33 *mat, const float *cMat);
-/* *************************************************************** */
-template<typename T>
-void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n);
-/* *************************************************************** */
-template<typename T>
-void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n);
-/* *************************************************************** */
-void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z);
-/* *************************************************************** */
 /** @brief Duplicates the nifti image
  * @param image Input image
  * @param copyData Boolean to specify if the image data should be copied
diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp
index 5f70e968..3685a6a1 100644
--- a/reg-lib/cuda/BlockSize.hpp
+++ b/reg-lib/cuda/BlockSize.hpp
@@ -25,11 +25,6 @@ struct BlockSize {
     unsigned ComputeJacGradient3d;
     unsigned ApproxCorrectFolding3d;
     unsigned CorrectFolding3d;
-    unsigned GetJacobianMatrix;
-    unsigned ConvertNmiGradientFromVoxelToRealSpace;
-    unsigned ApplyConvolutionWindowAlongX;
-    unsigned ApplyConvolutionWindowAlongY;
-    unsigned ApplyConvolutionWindowAlongZ;
 };
 /* *************************************************************** */
 struct BlockSize100: public BlockSize {
@@ -45,11 +40,6 @@ struct BlockSize100: public BlockSize {
         ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem
         ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem
         CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem
-        GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem
-        ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem
-        ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem
-        ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem
-        ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem
         NR_FUNC_CALLED();
     }
 };
@@ -67,11 +57,6 @@ struct BlockSize300: public BlockSize {
         ComputeJacGradient3d = 768; // 37 reg
         ApproxCorrectFolding3d = 768; // 34 reg
         CorrectFolding3d = 768; // 34 reg
-        GetJacobianMatrix = 768; // 34 reg
-        ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg
-        ApplyConvolutionWindowAlongX = 1024; // 25 reg
-        ApplyConvolutionWindowAlongY = 1024; // 25 reg
-        ApplyConvolutionWindowAlongZ = 1024; // 25 reg
         NR_FUNC_CALLED();
     }
 };
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 06972269..2c98a8ca 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -859,23 +859,6 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
     }
 }
 /* *************************************************************** */
-void GetJacobianMatrix(const nifti_image *deformationField,
-                       const float4 *deformationFieldCuda,
-                       float *jacobianMatricesCuda) {
-    const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
-    const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz);
-    auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-
-    const unsigned blocks = CudaContext::GetBlockSize()->GetJacobianMatrix;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    GetJacobianMatrix3d<<<gridDims, blockDims>>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim,
-                                                 (unsigned)voxelNumber, reorientation);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
 template<bool is3d>
 double ApproxLinearEnergy(const nifti_image *controlPointGrid,
                           const float4 *controlPointGridCuda) {
diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp
index 6be6b2d3..8279a0ac 100644
--- a/reg-lib/cuda/CudaLocalTransformation.hpp
+++ b/reg-lib/cuda/CudaLocalTransformation.hpp
@@ -72,10 +72,6 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
                                         vector<NiftiImage>& deformationFields,
                                         vector<thrust::device_vector<float4>>& deformationFieldCudaVecs);
 /* *************************************************************** */
-void GetJacobianMatrix(const nifti_image *deformationField,
-                       const float4 *deformationFieldCuda,
-                       float *jacobianMatricesCuda);
-/* *************************************************************** */
 template<bool is3d>
 double ApproxLinearEnergy(const nifti_image *controlPointGrid,
                           const float4 *controlPointGridCuda);
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index 536f7719..b7639f76 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -1129,68 +1129,6 @@ __device__ void DefFieldComposeKernel(float4 *deformationField,
     deformationField[index] = position;
 }
 /* *************************************************************** */
-__global__ void GetJacobianMatrix3d(float *jacobianMatrices,
-                                    cudaTextureObject_t deformationFieldTexture,
-                                    const int3 referenceImageDim,
-                                    const unsigned voxelNumber,
-                                    const mat33 reorientation) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
-        const int z = quot;
-        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
-
-        if (x == referenceImageDim.x - 1 || y == referenceImageDim.y - 1 || z == referenceImageDim.z - 1) {
-            int index = tid * 9;
-            jacobianMatrices[index++] = 1;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index++] = 1;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index++] = 0;
-            jacobianMatrices[index] = 1;
-            return;
-        }
-
-        int index = (z * referenceImageDim.y + y) * referenceImageDim.x + x;
-        float4 deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
-        float matrix[9] = {
-            -deformation.x, -deformation.x, -deformation.x,
-            -deformation.y, -deformation.y, -deformation.y,
-            -deformation.z, -deformation.z, -deformation.z
-        };
-        deformation = tex1Dfetch<float4>(deformationFieldTexture, index + 1);
-        matrix[0] += deformation.x;
-        matrix[3] += deformation.y;
-        matrix[6] += deformation.z;
-        index = (z * referenceImageDim.y + y + 1) * referenceImageDim.x + x;
-        deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
-        matrix[1] += deformation.x;
-        matrix[4] += deformation.y;
-        matrix[7] += deformation.z;
-        index = ((z + 1) * referenceImageDim.y + y) * referenceImageDim.x + x;
-        deformation = tex1Dfetch<float4>(deformationFieldTexture, index);
-        matrix[2] += deformation.x;
-        matrix[5] += deformation.y;
-        matrix[8] += deformation.z;
-
-        index = tid * 9;
-        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[0] + reorientation.m[0][1] * matrix[3] + reorientation.m[0][2] * matrix[6];
-        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[1] + reorientation.m[0][1] * matrix[4] + reorientation.m[0][2] * matrix[7];
-        jacobianMatrices[index++] = reorientation.m[0][0] * matrix[2] + reorientation.m[0][1] * matrix[5] + reorientation.m[0][2] * matrix[8];
-        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[0] + reorientation.m[1][1] * matrix[3] + reorientation.m[1][2] * matrix[6];
-        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[1] + reorientation.m[1][1] * matrix[4] + reorientation.m[1][2] * matrix[7];
-        jacobianMatrices[index++] = reorientation.m[1][0] * matrix[2] + reorientation.m[1][1] * matrix[5] + reorientation.m[1][2] * matrix[8];
-        jacobianMatrices[index++] = reorientation.m[2][0] * matrix[0] + reorientation.m[2][1] * matrix[3] + reorientation.m[2][2] * matrix[6];
-        jacobianMatrices[index++] = reorientation.m[2][0] * matrix[1] + reorientation.m[2][1] * matrix[4] + reorientation.m[2][2] * matrix[7];
-        jacobianMatrices[index] = reorientation.m[2][0] * matrix[2] + reorientation.m[2][1] * matrix[5] + reorientation.m[2][2] * matrix[8];
-    }
-}
-/* *************************************************************** */
 template<bool is3d>
 struct Basis1st {
     float x[27], y[27], z[27];
diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu
index a662ade1..91455a2c 100644
--- a/reg-lib/cuda/CudaTools.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -77,184 +77,6 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
 template void VoxelCentricToNodeCentric<false>(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*);
 template void VoxelCentricToNodeCentric<true>(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*);
 /* *************************************************************** */
-void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ,
-                                            const nifti_image *controlPointImage,
-                                            float4 *nmiGradientCuda) {
-    const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const unsigned blocks = CudaContext::GetBlockSize()->ConvertNmiGradientFromVoxelToRealSpace;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks));
-    const dim3 gridDims(grids, grids, 1);
-    const dim3 blockDims(blocks, 1, 1);
-    ConvertNmiGradientFromVoxelToRealSpaceKernel<<<gridDims, blockDims>>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber);
-    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-}
-/* *************************************************************** */
-void GaussianSmoothing(const nifti_image *image,
-                       float4 *imageCuda,
-                       const float sigma,
-                       const bool smoothXYZ[8]) {
-    auto blockSize = CudaContext::GetBlockSize();
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
-    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
-
-    bool axisToSmooth[8];
-    if (smoothXYZ == nullptr) {
-        for (int i = 0; i < 8; i++) axisToSmooth[i] = true;
-    } else {
-        for (int i = 0; i < 8; i++) axisToSmooth[i] = smoothXYZ[i];
-    }
-
-    for (int n = 1; n < 4; n++) {
-        if (axisToSmooth[n] && image->dim[n] > 1) {
-            float currentSigma;
-            if (sigma > 0) currentSigma = sigma / image->pixdim[n];
-            else currentSigma = fabs(sigma); // voxel based if negative value
-            const int radius = (int)Ceil(currentSigma * 3.0f);
-            if (radius > 0) {
-                const int kernelSize = 1 + radius * 2;
-                float *kernel;
-                NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float)));
-                float kernelSum = 0;
-                for (int i = -radius; i <= radius; i++) {
-                    kernel[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) /
-                                                 (currentSigma * 2.506628274631));
-                    // 2.506... = sqrt(2*pi)
-                    kernelSum += kernel[radius + i];
-                }
-                for (int i = 0; i < kernelSize; i++)
-                    kernel[i] /= kernelSum;
-
-                float *kernelCuda;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float)));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
-                NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
-
-                float4 *smoothedImage;
-                NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
-
-                auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-                auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1);
-
-                unsigned blocks, grids;
-                dim3 blockDims, gridDims;
-                switch (n) {
-                case 1:
-                    blocks = blockSize->ApplyConvolutionWindowAlongX;
-                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                    gridDims = dim3(grids, grids, 1);
-                    blockDims = dim3(blocks, 1, 1);
-                    ApplyConvolutionWindowAlongXKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
-                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                    break;
-                case 2:
-                    blocks = blockSize->ApplyConvolutionWindowAlongY;
-                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                    gridDims = dim3(grids, grids, 1);
-                    blockDims = dim3(blocks, 1, 1);
-                    ApplyConvolutionWindowAlongYKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
-                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                    break;
-                case 3:
-                    blocks = blockSize->ApplyConvolutionWindowAlongZ;
-                    grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                    gridDims = dim3(grids, grids, 1);
-                    blockDims = dim3(blocks, 1, 1);
-                    ApplyConvolutionWindowAlongZKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                     kernelSize, imageDim, (unsigned)voxelNumber);
-                    NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                    break;
-                }
-                NR_CUDA_SAFE_CALL(cudaFree(kernelCuda));
-                NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
-                NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
-            }
-        }
-    }
-}
-/* *************************************************************** */
-void SmoothImageForCubicSpline(const nifti_image *image,
-                               float4 *imageCuda,
-                               const float *spacingVoxel) {
-    auto blockSize = CudaContext::GetBlockSize();
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
-    const int3 imageDim = make_int3(image->nx, image->ny, image->nz);
-
-    for (int n = 0; n < 3; n++) {
-        if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) {
-            int radius = Ceil(2.0 * spacingVoxel[n]);
-            int kernelSize = 1 + radius * 2;
-
-            float *kernel;
-            NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float)));
-
-            float coeffSum = 0;
-            for (int it = -radius; it <= radius; it++) {
-                float coeff = (float)(fabs((float)(float)it / (float)spacingVoxel[0]));
-                if (coeff < 1.0) kernel[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff);
-                else if (coeff < 2.0) kernel[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0);
-                else kernel[it + radius] = 0;
-                coeffSum += kernel[it + radius];
-            }
-            for (int it = 0; it < kernelSize; it++)
-                kernel[it] /= coeffSum;
-
-            float *kernelCuda;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float)));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice));
-            NR_CUDA_SAFE_CALL(cudaFreeHost(kernel));
-
-            auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
-            auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1);
-
-            float4 *smoothedImage;
-            NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4)));
-
-            unsigned grids, blocks;
-            dim3 blockDims, gridDims;
-            switch (n) {
-            case 0:
-                blocks = blockSize->ApplyConvolutionWindowAlongX;
-                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                gridDims = dim3(grids, grids, 1);
-                blockDims = dim3(blocks, 1, 1);
-                ApplyConvolutionWindowAlongXKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
-                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                break;
-            case 1:
-                blocks = blockSize->ApplyConvolutionWindowAlongY;
-                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                gridDims = dim3(grids, grids, 1);
-                blockDims = dim3(blocks, 1, 1);
-                ApplyConvolutionWindowAlongYKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
-                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                break;
-            case 2:
-                blocks = blockSize->ApplyConvolutionWindowAlongZ;
-                grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
-                gridDims = dim3(grids, grids, 1);
-                blockDims = dim3(blocks, 1, 1);
-                ApplyConvolutionWindowAlongZKernel<<<gridDims, blockDims>>>(smoothedImage, *imageTexture, *kernelTexture,
-                                                                                 kernelSize, imageDim, (unsigned)voxelNumber);
-                NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
-                break;
-            }
-            NR_CUDA_SAFE_CALL(cudaFree(kernelCuda));
-            NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice));
-            NR_CUDA_SAFE_CALL(cudaFree(smoothedImage));
-        }
-    }
-}
-/* *************************************************************** */
-void AddValue(const size_t count, float4 *arrayCuda, const float addition) {
-    thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) {
-        val = val + addition;
-    });
-}
-/* *************************************************************** */
 void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier) {
     thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) {
         val = val * multiplier;
@@ -275,16 +97,6 @@ float SumReduction(float *arrayCuda, const size_t size) {
     return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus<float>());
 }
 /* *************************************************************** */
-float MaxReduction(float *arrayCuda, const size_t size) {
-    thrust::device_ptr<float> dptr(arrayCuda);
-    return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum<float>());
-}
-/* *************************************************************** */
-float MinReduction(float *arrayCuda, const size_t size) {
-    thrust::device_ptr<float> dptr(arrayCuda);
-    return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum<float>());
-}
-/* *************************************************************** */
 template<typename Operation>
 void OperationOnImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) {
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3);
@@ -299,14 +111,6 @@ void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2
     OperationOnImages(img, img1Cuda, img2Cuda, thrust::minus<float4>());
 }
 /* *************************************************************** */
-void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    OperationOnImages(img, img1Cuda, img2Cuda, thrust::multiplies<float4>());
-}
-/* *************************************************************** */
-void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) {
-    OperationOnImages(img, img1Cuda, img2Cuda, thrust::divides<float4>());
-}
-/* *************************************************************** */
 template<bool isMin>
 DEVICE static inline float MinMax(const float lhs, const float rhs) {
     if constexpr (isMin) return lhs < rhs ? lhs : rhs;
diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp
index 010e3017..6c67ba27 100644
--- a/reg-lib/cuda/CudaTools.hpp
+++ b/reg-lib/cuda/CudaTools.hpp
@@ -26,39 +26,16 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
                                float weight,
                                const mat44 *voxelToMillimetre = nullptr);
 /* *************************************************************** */
-void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ,
-                                            const nifti_image *controlPointImage,
-                                            float4 *nmiGradientCuda);
-/* *************************************************************** */
-void GaussianSmoothing(const nifti_image *image,
-                       float4 *imageCuda,
-                       const float sigma,
-                       const bool axisToSmooth[8]);
-/* *************************************************************** */
-void SmoothImageForCubicSpline(const nifti_image *image,
-                               float4 *imageCuda,
-                               const float *smoothingRadius);
-/* *************************************************************** */
-void AddValue(const size_t count, float4 *arrayCuda, const float value);
-/* *************************************************************** */
 void MultiplyValue(const size_t count, float4 *arrayCuda, const float value);
 /* *************************************************************** */
 void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float value);
 /* *************************************************************** */
 float SumReduction(float *arrayCuda, const size_t size);
 /* *************************************************************** */
-float MaxReduction(float *arrayCuda, const size_t size);
-/* *************************************************************** */
-float MinReduction(float *arrayCuda, const size_t size);
-/* *************************************************************** */
 void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
 /* *************************************************************** */
 void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
 /* *************************************************************** */
-void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
-void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda);
-/* *************************************************************** */
 float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
 /* *************************************************************** */
 float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1);
diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
index 5243f464..361bbdac 100644
--- a/reg-lib/cuda/CudaToolsKernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -68,148 +68,5 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
     nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
-__global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < nodeNumber) {
-        const float4 voxelGradient = gradient[tid];
-        float4 realGradient{};
-        realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z;
-        realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z;
-        realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z;
-        gradient[tid] = realGradient;
-    }
-}
-/* *************************************************************** */
-__global__ void ApplyConvolutionWindowAlongXKernel(float4 *smoothedImage,
-                                                   cudaTextureObject_t imageTexture,
-                                                   cudaTextureObject_t kernelTexture,
-                                                   const int kernelSize,
-                                                   const int3 imageSize,
-                                                   const unsigned voxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
-        reg_div_cuda(rem, imageSize.x, quot, rem);
-        int x = rem;
-
-        const int radius = (kernelSize - 1) / 2;
-        int index = tid - radius;
-        x -= radius;
-
-        float4 finalValue{};
-
-        // Kahan summation used here
-        float3 c{}, Y, t;
-        float windowValue;
-        for (int i = 0; i < kernelSize; i++) {
-            if (-1 < x && x < imageSize.x) {
-                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
-                windowValue = tex1Dfetch<float>(kernelTexture, i);
-
-                Y.x = gradientValue.x * windowValue - c.x;
-                Y.y = gradientValue.y * windowValue - c.y;
-                Y.z = gradientValue.z * windowValue - c.z;
-                t.x = finalValue.x + Y.x;
-                t.y = finalValue.y + Y.y;
-                t.z = finalValue.z + Y.z;
-                c.x = (t.x - finalValue.x) - Y.x;
-                c.y = (t.y - finalValue.y) - Y.y;
-                c.z = (t.z - finalValue.z) - Y.z;
-                finalValue = make_float4(t.x, t.y, t.z, 0.f);
-            }
-            index++;
-            x++;
-        }
-        smoothedImage[tid] = finalValue;
-    }
-}
-/* *************************************************************** */
-__global__ void ApplyConvolutionWindowAlongYKernel(float4 *smoothedImage,
-                                                   cudaTextureObject_t imageTexture,
-                                                   cudaTextureObject_t kernelTexture,
-                                                   const int kernelSize,
-                                                   const int3 imageSize,
-                                                   const unsigned voxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int quot, rem;
-        reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem);
-        int y = rem / imageSize.x;
-
-        const int radius = (kernelSize - 1) / 2;
-        int index = tid - imageSize.x * radius;
-        y -= radius;
-
-        float4 finalValue{};
-
-        // Kahan summation used here
-        float3 c{}, Y, t;
-        float windowValue;
-        for (int i = 0; i < kernelSize; i++) {
-            if (-1 < y && y < imageSize.y) {
-                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
-                windowValue = tex1Dfetch<float>(kernelTexture, i);
-
-                Y.x = gradientValue.x * windowValue - c.x;
-                Y.y = gradientValue.y * windowValue - c.y;
-                Y.z = gradientValue.z * windowValue - c.z;
-                t.x = finalValue.x + Y.x;
-                t.y = finalValue.y + Y.y;
-                t.z = finalValue.z + Y.z;
-                c.x = (t.x - finalValue.x) - Y.x;
-                c.y = (t.y - finalValue.y) - Y.y;
-                c.z = (t.z - finalValue.z) - Y.z;
-                finalValue = make_float4(t.x, t.y, t.z, 0.f);
-            }
-            index += imageSize.x;
-            y++;
-        }
-        smoothedImage[tid] = finalValue;
-    }
-}
-/* *************************************************************** */
-__global__ void ApplyConvolutionWindowAlongZKernel(float4 *smoothedImage,
-                                                   cudaTextureObject_t imageTexture,
-                                                   cudaTextureObject_t kernelTexture,
-                                                   const int kernelSize,
-                                                   const int3 imageSize,
-                                                   const unsigned voxelNumber) {
-    const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
-    if (tid < voxelNumber) {
-        int z = (int)tid / (imageSize.x * imageSize.y);
-
-        const int radius = (kernelSize - 1) / 2;
-        int index = tid - imageSize.x * imageSize.y * radius;
-        z -= radius;
-
-        float4 finalValue{};
-
-        // Kahan summation used here
-        float3 c{}, Y, t;
-        float windowValue;
-        for (int i = 0; i < kernelSize; i++) {
-            if (-1 < z && z < imageSize.z) {
-                float4 gradientValue = tex1Dfetch<float4>(imageTexture, index);
-                windowValue = tex1Dfetch<float>(kernelTexture, i);
-
-                Y.x = gradientValue.x * windowValue - c.x;
-                Y.y = gradientValue.y * windowValue - c.y;
-                Y.z = gradientValue.z * windowValue - c.z;
-                t.x = finalValue.x + Y.x;
-                t.y = finalValue.y + Y.y;
-                t.z = finalValue.z + Y.z;
-                c.x = (t.x - finalValue.x) - Y.x;
-                c.y = (t.y - finalValue.y) - Y.y;
-                c.z = (t.z - finalValue.z) - Y.z;
-                finalValue = make_float4(t.x, t.y, t.z, 0.f);
-            }
-            index += imageSize.x * imageSize.y;
-            z++;
-        }
-        smoothedImage[tid] = finalValue;
-    }
-}
-/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */

From a560060fc7df02580edc1f9c910bb219d6ce9fe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Thu, 22 Feb 2024 13:38:46 +0000
Subject: [PATCH 297/314] Use git to download Eigen library

Also, upgrade Eigen library to 3.3.*
---
 .github/workflows/analysis.yml |   2 +-
 CMakeLists.txt                 |   2 +-
 niftyreg_build_version.txt     |   2 +-
 third-party/CMakeLists.txt     |  27 ++++++++++-----------------
 third-party/eigen_3.3.3.tar.gz | Bin 2071698 -> 0 bytes
 5 files changed, 13 insertions(+), 20 deletions(-)
 delete mode 100644 third-party/eigen_3.3.3.tar.gz

diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
index 5c085d3d..a411c00d 100644
--- a/.github/workflows/analysis.yml
+++ b/.github/workflows/analysis.yml
@@ -53,7 +53,7 @@ jobs:
             REPORT_PR_CHANGES_ONLY: false
         run: |
             analysis_file="analysis.txt"
-            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen3/*"
+            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen/*"
             cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file
             # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately
             find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f1cfa291..57d91902 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -134,7 +134,7 @@ include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
 include_directories(${CMAKE_SOURCE_DIR}/reg-io)
 include_directories(${CMAKE_SOURCE_DIR}/third-party)
 include_directories(${CMAKE_BINARY_DIR})
-include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3)
+include_directories(${CMAKE_BINARY_DIR}/third-party/eigen)
 #-----------------------------------------------------------------------------
 if(USE_OPENCL)
   # Find the OpenCL package
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 21c8d99f..1c105f1a 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-415
+416
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
index 54a1ba3b..1357e060 100644
--- a/third-party/CMakeLists.txt
+++ b/third-party/CMakeLists.txt
@@ -1,24 +1,17 @@
 #-----------------------------------------------------------------------------
-# Eigen version 3.3.3 - 67e894c6cd8f
-if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen3)
+# Eigen version 3.3.*
+if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
+  message(STATUS "Cloning Eigen...")
   execute_process(
-    COMMAND ${CMAKE_COMMAND} -E tar x ${CMAKE_SOURCE_DIR}/third-party/eigen_3.3.3.tar.gz
+    COMMAND git clone -q -b 3.3 https://gitlab.com/libeigen/eigen.git
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/third-party
+    RESULT_VARIABLE result
   )
-  execute_process(
-    COMMAND ${CMAKE_COMMAND} -E rename
-    ${CMAKE_BINARY_DIR}/third-party/eigen-eigen-67e894c6cd8f
-    ${CMAKE_BINARY_DIR}/third-party/eigen3
-  )
-  message(STATUS "Eigen3 files are copied in ${CMAKE_BINARY_DIR}/third-party/eigen3")
-endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen3)
-#-----------------------------------------------------------------------------
-if(MSVC)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj" )
-	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj" )
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505 /wd4714")
-	string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-endif()
+  if(result)
+    message(FATAL_ERROR "Failed to clone Eigen!")
+  endif(result)
+  message(STATUS "Eigen is cloned into ${CMAKE_BINARY_DIR}/third-party/eigen")
+endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
 #-----------------------------------------------------------------------------
 if(OPENMP_FOUND)
 	if(USE_OPENMP)
diff --git a/third-party/eigen_3.3.3.tar.gz b/third-party/eigen_3.3.3.tar.gz
deleted file mode 100644
index e2328b4cc323284b60b53feec13bc7a269f5ca1d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2071698
zcmV((K;XY0iwFQ>LabN<|LnbKciY&ND4Ne_-5-I)^m-*#D4LTf$xiA%TB2=sWJ!_a
za=C}A2Lgu@ViI5oP%_h9@3+4_odF;y$*!bs_kGby$`T2jVV`~WJm}<Br+b?IJ2=pX
zNBw9J#fQ@ut-ITsa2eet_u;Hnd?|kO?LTe!8T9+~FZ@*hZFdIyzv=Ya`@MF*)9H7A
z10VF-z25}w-~0+ctGo!8LGX*msc!DS$<I<RvX{YO*zHW?!~OQ4+mDWV9o_FAMw4hd
z=^b_k@LTtwa}XUIwN;wM`eooF%Ko%79kl!KNZdQnasOZ%PurdD;GjR9s>w1;qq~<u
zuhmmUcuT*)ufO@n&wrI4{mA{-+3asJ$5Z#eJLp&Lf4|r1|KtAu8bANK2#Z25)BNAn
z_pSUc_(O2|27Y@#KD!tm{}KK?zJw3*XqhaE{KfaJB;`k9Jz3qpnCJHqd>Ds?d9w6*
z8W#6SZoXQj`D(GqmW7UA6gn?n%oj@?&fvN4%_HCAZ}3)fua){&q)mBJWXtuxEB=t@
z{>QETb?$$1n`TS>_rL$WL9cTE_uJj}Kkomp@sqF9BK-0)xSeGa^_OLI^L_N?3lM;8
zncTuZi**G5jT7MI^dF!RA>2*+4?HxB;lH=@4F3C<xrzW==s&IR;e$!|S$+Tis>v#u
z#rXF;i(2xpS|vB%tK;|KXMG$-cN*nOln-*%f(afLDvaQ^C&jw8NO2X}s=&3}E%5zW
zcI(!+)PGj^0Z_5%PK7FjDQ4j!j6M^IN#J$)bE&6#snbaRgM2*q{^N=K3jDv<IcQh;
zfB&HSkNf{?{B+~VQGakaI5><3{lj*2H0bpvgYH3hI&JIT{?Sp~(Vb4%3%afL-b5E+
zPxTJ=yWRd_=OB*sU~n`UOv7F$KI*pPesp-W9}arca5C+6?JFJCn;dq!?P>eq;Bc_7
zyJ7pFt2^!K{zUhB8s6=9Cx^YTJ)l>XQAc%-;%U^@2ZO<25^3EXOb^2Tk)HIs)80|o
z?jE+oUjJ}k+d1~tL3j`!hDZDGEa2318g~1=c08T5qi(d{+n?-*aolTn-A?-IAnx?T
zxP5pSM%~`Q;3(?#kM=u<2eAKkf7<TuA4~^f?`VRrv^r{X6vLc@&i=vCG(Ldcw)ZF9
zFzk1`z5O^kiZwv6Kbg9X50vg4fG!*M+x_+-tS6du!u<|RIn}-J=xCxR2m6zQ{(d`{
z=qV7+y}R4Jd2+i1VJAzKj>ExzALfa}Znx8p^kMs;a|GLnric6egM*$P#5&p!X?bm>
z^-*{b!C{6+`T)<M1Hd|jZA@T${Ym>^+C9?oLB}qytK$Aq`{*F*?#GAiKE4IO+V6+m
zDLmKR2MEHXu;s%boPv;8Ufp1SACBPg;P9Z=nM9NQ_-HcGZCDtb@}#}r)d%}}AHW&_
zF<*tVJ-lbd-gKGG_duK^dfn;51`m%0-M9~1nF0*MPSo26=p0N2N4huA2kpVZegtZx
z!KD4D-+{xA54#7G-t-XAuhW}!;Lj-_9^fNj<zNaJV_)e4VrbB6!vO%}s1xsp(f%QT
z@F1QHV4QxZ7ahg}4b$3JdMbiVv=1jofEw|j+ua`=Om!3iiXXxoaNvVMw-fgUefvsZ
zO$Pma__(h_4XSVdDAb38!(My{Tj?GicEfHIwtM|<NUtD@hlibZydO`(N&g^>rh2eH
z>A~K+u<L0YA018yfMS5r@InV(31^Etyh&}iWQS4M3#ZelGl=8%qzx<WblQD5h@-=S
zj^l%aaO%I+Rj~4D1gAUMKY}|EhPoXN;)(7a4dMYDaaZ>a0Y5wY@)m>ouy@$ghw%SN
z6b+`G{@`dIh{6Q!Z+kKs^g1Cfy*Hi6OSq@Wv>#0lqoZCQXhI0rq`yDy=mE?Fx4jL3
z)JHwtKkC_600qO&(f$$KLztx#!xq}zgJ`1lLA2imObWvZEW6hmz$;x@Sa_)0ac93b
zjbJUkBRxHwv<HAK1GGgP=|floAa-<UE|tCyXR8qOh+MUg;N@=TZ~zM$OyHq?xIvwG
z5byVU8t!#pA9nRLY@?yVfIQOsVSgV^2QGZr4gpj8`(dXK_h!-$JA<fqsN<uyKBRYe
zg#(}zox`X-I669NA50=RC0H2jV*&$oI$a&N0TmDT?JHdcYu#_d(|~nx*xrxZF<e-n
zfdHR2Y%>D-9QTfn2J{NTx(y)PpN9QOx8FH{OFNCGa3~%4{h-@D0DcRc`~bisFLV`P
z?ciXd`_qH=B%A_$gYoncT#Fu%6~HREP%!@y?H$O;bbru=vw<N2n$v!Jzth*<X&)h=
z2m0usf7As`ZPS8)IL6`8q_1I|?*4uo&!^KqI1GCc%mL6H=n2p}*w&OD@BTvw`>!tl
zk#v5V<VF4qt-nvnfBS=euT$0k0Du1>|NR<2i)Hp_9Thtx#DLzeQqdgF{brt|$$T}x
zS?a&6lBJG!ejHt1jnB_Ob~tS9!(+b<Zdc(l4#L|oN%JB|(!D%eEh8O}!j~(ROn3h2
zKaSrI|2Vq2I{$EaJi0j<U;Z3iU0;s=^kH}!m@i+C&xV(u=*wLd1aqC|;jP|zGrS(2
z-i$6U&o6_<xH5W>W<?OrX4!*|TfxOlhq(@L;XxRr`XPv6%@G=of}IE+)38o{(|xlW
zglQZsS7|_)9-v?n6nCH_wt`RD3PxMQQ1E7v1xr28?sZUBSX)73SLqbg;vIs=aF&F5
zKybYI9=C&UGFbWVmrCVYAnkz!03lC#VA}N_bpUdYmVmZ#7?UvO?d}!pg%0v!35qZs
zNw9NvejNzlKa5XLZmvIFj1cZPU-k2#aj6Nm{!V$co$K?{^N%Aq8+(rSwVxXSV-c<4
z2G?*?WO!K1T||ojesCGv9&$L+Aah~^mKO}qPM-VHQWpL7uRSEi9ZZ>|)2y)-89$_-
z)9fJ?gdzz4TOAbst<eIcARG-)0?~O8E_HB%TWSUZS^DK8ObZ{6VOm7kT<^g#>NHQ#
z=DFMQp|5`XxBLOjS!y^>4cL9JmpRDw0d9UyB7X37KfnDOuMHL#t_oTUUgCAI2$yip
z;f~t1rmOixFV!@e>7C2b@C32(;`02TN5|LxZF)hs=0(1YUL1o!_+nKgvls8fXqn}$
zyGC$B?C(Yfy}8q2jF>8jemQzO`YCunygq)1_&7~c{jb5l761Ob(fIA??B?V7B^<br
zNB&~xU)y^}|NgsOJMu%e1hg~b&r$cmmDbMtb3^X_=IHLO(Q0lw^z5@%`F{A%=a)~c
zavuIUTYl3jOQZjiRgTY|US*Pg>ni=x>sM)?m8WjbKD>WDBD%YI96z_r$EE#yANvzB
zodybw_&e9jXthkjSpfVO6zX}90CAYWoiCd30Q~Wg%w_>lh<iqO9`N1Uo8x!Gv$vzG
z(X~+ZK+hM&x(P#1Rz>iTrQa3sO}1cW)l&M4j#h=ffq5bzW;@5{@82U$yt@sGWD)FL
z26asO{KNIdhwGakhnM5w>(h~){(5|I!{6;bHZr??zi#l?m%}qS2Q#{X6@UW%hadoS
z30Jcs*a<&}@W;KLWs5lgY*(gh2Kq~}45Oj~;pi?*Z#5jPPz%fH4GgkXu~-$YRttGk
z&0-i|M^}WgxP&eE{J-o>FN5z1ZNGm>==&<%n*s{AzHii=<Bhy+kj4jc87E7pUx&WT
zB%Zm0-@=Ab0?O~Qhadsq0Q68|sUfd+P=5tRW+>ekw~g`z`Z?ICI}o>LH}227xRY6~
zfAO3Zh?EBcVJMAet61+G-|6V{@lQV;XY&Q{k>#6Nc)M%AeS7v{3?dcqpOa*n@3IId
zkUu~9a5}oV7+${%>e&tn9lL*fo(*&}4)EOIyk<%I*_<1!GmTLYoF#B4fUW1hI?u+X
z-Jrt-#ilD+&-<{rJEbM95%TT`uSZwcH^(2YuFv1!oQ}`_IJ&%fb2@x`6$Fh&a6AN@
zMfq%e_BLn?<2XT~1P}<fjQ1E03JB$$PU#aQhN~1X867`)t8{hzdEltcB*EkJM1m3i
zO#IpAT;60?*FXk`mnX0zxP_PF(bdiMIc*RAg$Z{_H$tJ*J_+~pojm^O=FRzsvlDoz
zR{T`7=;ILSeC^Q#N4%5sUt84(J?Tq(y-6A}BowYXupB+3J;Aqg;6p$WgEi<|!D1E`
z(`-4X9nz(wb=f=h=vH@Wt<Q#wxy6q@_A}hHTB!X@^13YXE9ZS|KqGhCU!K2rb9(UL
z^#`E6SDUxfyTAI&VwOZnF<ZY3o(Bp;Cx6*oX_l7MyKI5jZ@36)Gq0~fJ9<sp&O`&J
zorlZMLB2`~-3ZR#ydh?NlH}oJrh~=j+bBz?$!(COAhHAUNIXaI5hz3UkQ@7eT!D|{
zvtBm<waV$G@6O?}UVXZ{9=*Rg8@?Y|?j<AOerr~F@yEBv=Vx!mZ_Qd^DSNV()*=c}
zNuc!*`oqf4e5KY>_3_#9>4%dMP>ajU(b+Yr-S}u3Fjp6+<LjH#;pN*A5f=nGz!dIe
zxV+U&7C@5%Sm$#zKr90mmk>sIqh`R9(VO9i(`!86%kz&cQG%+8cQ<?3Bhfqvb1MYF
zEng;IP|{n%u?N`_q+K}-i=cHwBJ9FAz9HG0=v=<j*gM(&LZbpCkDk6C{&aIS{+ChE
z=ylswGWtIJlEBVop)c;q=n~|~UqRzFn*hNM--MCpsEh6B<ObIkefe@@Nbe1j-T=up
z@qS<Gx*T1c4v%5>y#`Y8D}1N}R|o(7dHd7<_HVy~=NIw<k`sE7-iS~X*qnby1`R;O
zy%?VYle+qFadCcmeIswv%K)QCzxPcnjt5ye&VKZ>*+Yo{!M~3K$#IqUS3e%(tG^9C
zYQUL%HAfvHEP@QpTb8S_H3D>MAOrLvTvNQkXafmA>^VMGG)Oc9aZQBeJjgQ`?IB#I
zXi=i;v=Va&GfA2V2Sy8B6s~UXieR$d23eW_wu8K9@1tHWVZ36sOktiCC=hlYx=HkP
z{>U*kWQfM*L473Vx$T(TxBelaU1)qcIx#0@M)!{ee%o>$u$qOA!?Z}E#uLwd1Pbxw
zGo;+mQ@?%mF_7KYpf;tqjopf<$0rE`JTcQMg$w%RdZI8d_QF|mn?C*MUAWxKR}+aM
zeDZl<NqhInlDx)oVIcnKidVUg_hwlHZ=gXAHt_T`iww|8FZX~w7I#lR{QyttJu#&|
z`7{bmVX>EM)Om{alh1<|wk*&E)88wyy<Ydp7iQTlTE+IV#nYGrY8)~`(dwQ)uHXHx
z)7b-jLklD@h$ml&vlS4Iy#?siS-~^>_K7LVt06LgKnP=0M)AJqVXA>H6<J1f9JG3(
zOSe(f1kGomrz^AwCIuTxCOSg8v|<^=JQ)mVU?Gb_X3gCnU)>;)`{^eXK0LB)A28~r
z(JJ3wj)vE0@ZQI0i^i@HlgEZ;it_lm<CZHV<;m-@w}6c%&$vf>_ki4CoJ4!XJf3s)
z;1;E~^q<DlYnj6HM2+`S{qWS_^l&zxKaGM@K5`^4VV=lJ9(n#DK054F>7Ai^u}G%l
z<Kyr0pn)0>$i~e5Br-t9fN{Xc@`a9)X##(WK(*8;>pnGkx7Y61&b~3ZFtp&e_zKD1
zdqx%q7M#MiS0pD92IJ1*RS+mwXy+o26mV1!36r_JL(T{9GNCy!UPZ70;JKTGl+Tkc
z#Wm=Gdm&tK;@iP|g>EB_yck}d0c~%xxbTqRd?c%dvpfrs(4rt3`dauS93H)LxMLV%
z8YZ)xCV!tTbp~q=9yHpYRv@gSxPT_13CtSone)f>p^=JydzABgZ`81VluD05nYtQ3
zfk}T7R$_pjyK{Ai$r6#A<x6)!{B97h$S})8lOyG2@Ext}JBEIp2vT0*^JT@-#w#lD
zBl~oZ8{wXo1?uMI`S|4K?BtJ64zWmd<H?7n7vs?;b=xjuHE1Oz&4_f3tE*8rXpB;X
z{|lpQ(uZ(yN!xm%zl3ltNV`Hc$g@(|5xx?%^E}sG*<)3~rlsI9Z&^eIwzWIz)$OSF
zH9P8k!;X5-*irA>c2vKmuiMi%ZR%OO`sQto`Zt}rqw2RhUvpS6$~PPrO!ABat6$3V
z_SUW6Tlees*8Qfvb)U7jZ@OT^AAj1|7=HHLUBR<oyQT2{%TxQQU%```dGmg_u^IgA
zxtqafO(=x-@!1G1QD3*o={$UDll3cm&WY`Jx^;(!AAiNU!Q<a>bnw<R*qef6y(jY2
z9|nQ3jg8LGS1E%(<B;oD{Pa0@2FIHRjUPVyq~YPOKGM!0dh$%`7xDC=o?efC9BpoD
zI4hDLb@cpW<#E3HTxV!}j_|ih@(20r$@AN~wkNmy<JBhB!Oxz%O?dWew>ap-f9~@q
zNl*PUp4<`Ah>b0sX^Iqj-kuzl*&s^%@mJyMV)chqqhd2%-1m(H>V)2J8Ow}fWz?@4
zWR`^>e*<>ih0FM<Lm+5=<0-&F|Kf8P^uKC<f3NNQ3tZ)E^l(%qkKR%Hr=7o!jyt{G
zz&!Na{mJuRO(K@j-5>ti>A|mmWk1D}Ldo-1S%JcK<#n1ZbUI(uD6Z!hqqFxH+w9iv
z+d3HYw;Lcnhq>V6?Le)9)t}`n;A%EyUfc@JtqZE&O6QEl>+W0~WK%fqce)3`9$G`J
z17CkxEMZoPYRuWd5~hVbe$WBB8K8ZKEgCmB(`*^(B!xw>`+|RsgPk$iE;trA)|8wY
z7GxI5^-SZRdbVbli~0GegD6a`$$(?J#QK9fz0+O*0h)RuV`hnFI$W|J1wP24Xtm5s
zLr*kYfgUv3u(D~P(;@-zVl3y1r^qH4)h+IrJB%X3m?>*tg8$9Kb!pV%Zkas*#3D_I
zlml#Fo#-A)qSXvmw7F_o0KG|}Xy~GVfz$HH-k1QthzvhT8tX6g4movNawhxjeVBKZ
z+GO20AftXTU8NC)q~5_vO|(uWCL0bXnJ+RpV}i0h9$h_|=c4V4(YrhfUOdEvPWUgb
z3I7FrX$Y<@^<oynfrYbK@Z#eaG=IJL*x%}MeDRT@!5a8mzY#o->^?>?Ui=?OFJAop
z$i?5=pj7q(oZc87OU_MTE_kcSzG=!|1c1oE%qK`d)_I}lc@yWx=QOP-E=NAMz801^
z>q}f<2`IIT{C30E1}xkxLYHgAnDd*_PZy`-<1vW3#5c$PGQ6fJK641xAkMrv`76hw
zZ#$OFIet1k2pVsYmIo`ep$GUuBF1$jL#<$p26T<5Ql6!?b<ke-_4wKhd#h88S+Fp6
zuR9^zeXPy9stg*Kyd1Xp1RPjY#@qX-Q|W7jTnq{OL+|ili&%quqJXv>nMv@a0<CR0
z;6&~X6e^zumk}YPr=T)J%&PGXmG~+L_P;Sedfg`gVU8CT4Djf6_skLhB2X{}{Ssho
zMVh*(9vjqie&(JY{h?DAEyh<5xBk?f+ZHIcvA*Hy`0d&I(b;v)#ipnM@|f)0A(Jo9
zy1VtNZ-<7R?MX<`lTSgxE$~|)a6G!eL$M<QojLy9?>sSR`*zH$&+w#r!#%zkPyG7I
z-N3EsSi^T9^;Fn5d5kdv<eKB(raA=C6Lz|zTeq#?TBkY6m|47<VFHs-Mn?pXKb#Ed
zDu1{h-3%{}-vzyPC4vk+l=v83kyW9Il_mwC3QQEtFnW~&S~1|Rs0QcCi?OC?TaM+v
zF}Z(0`E6>V3G?$46sq4<lkHxI(dP#Yt~60oFlVtg?44r_X_qFJaC`>Ga(ap*xKpqB
z(m%+L!^<;*!`7!!T!=MjI1zfVTLA_O);<QrT!(QfjqI^iSVQra72+w@y=J0TYrgwg
z1Q<SHikPSKb$OagJ>xJ@I8uiG#pUSD_$Lmpf55y<iCgB>9t!7VCw{{5`8xIkVz(?(
z)?pcg`AQz8sC~*L>NS3yCCe=CXe=MY`fJ0to2%bg`^oTn=q;SH4tU_d!Qy+z6hhmg
zQ6)CVUpueZ2H$?ga&4&a-(s!2cZ~C`TO!`tJeen`$6`#bE^e4^VD{2NgO00nN}viC
zpk2hI=*l;1bAM14K;0Ds!8oF!9JX2>aBT<^{>|y1q>0>aOugQLD}0}1s~m0+e}yCn
zzx{bv0+`ARdI)n2|2MDA*0`Z2y~CgU%o@i-&@i9u!F{H@f(F1V0hNT0lTxVjAi^v<
zOWQ%W1x6uClnC@@94BkpMK?UrrTS<spm*FhFD^k@yu2QduFQ@RU)(nS=|2c^lFIWK
zxQWcusqef2@$d>K2PRHj=_Z?UdSLH{!;x<!KLQ>)0Y(SpokZ_{`j0x&MN0Ph8HzF|
z$aEATluNWK2cI*57_V_^N=>!MiRBX=rvBgOXQ!V;h4k0{7z27?UsuMyQr<%CP5jwD
z?C}7EV!oQ#d&cSJbzj6;6o7C-pm}q7{vM>LQ^!~Ny<@ZGYnBs;Q%;jFbKMl>cPVBt
zvai9v@YrldIrch!saWy75h-J57x3N$B}odu&D&dMFrKSV(F8QZ?0>XY-D>5X&FN_P
zBgf|hFmTZ~*BG&HrNq)Eab}z~qTHG|+t_h{gr*rxmOA_lA|3A2@tfM=YJj%Mfxl{z
zlUbP8e!*4lb{|;~zFM=Otz$j5cnaDMu09=~)`*SxG38MOSL<kI#G+QM)Qiswp$KZy
z>MZlPJU_qw`NeyZCqaI05cx8Hzi4>rknU}EUKk{Olce!6OOnTn#bejNt5aHg8T34>
z1Eg{YOVzSSU?vgS8?Z^zp&<`Qaw#2#)5wx^b2f>YuAlGv{N(&4*|igtFDO8dZId{1
zHHBxg4#FUUCE_(==F}iAFezq@|5>GI@J2lvVFu6(cx5YoY>ZOM^?78_d_n(i9oQQ~
zRVb_}P#eWzekU}9MHVEvo1YrmtALxx7r;uAw19QZ=fD~eGpafj;8XP@OZ8$A^|t4t
z){IuZJa2~MQ@j*b<3gu{Bp6oij(M~enC7)-)a-z<Mwi3u^GjQ@Lu?<}vQ)8S%XcoX
z7{AAZsMSTUGRi=qOpbN7V#i2^0?NzU=l4oKzuWsoKeaOeA(-RM<e&dUCaM{QOi22#
zzb2NiSq{K8N!l^vRV#Q$Iz(>b-JOgJ0&P7Q%dyD+N&o$QSr%tG`iB?4uMvEHc;=uV
za>*P8(uU$BnUSd6$@7<0x2l&xRde9ae}49;HJf@FY?1PwIp{IvuG4PKQK5s%*b04h
zFK6rX^MSyGsTJ&<)NT6@Xf6cF7(}XPj(EQOmbq(yLsJ8N*^ag-Aj)WOnMg2<h8JV}
zj>4k}NX<YhO69>x_GOLG`oDtH@LK=0eQ<Jv@!*`;X$m(PcjO)u$>0*TG!ijU5Pa_D
z?}}pa@&(J)tzz|M8iJm<Ze`2c7v!bHy2JmoNQ#;M1B!h4_Jxv!<uCO0>F~<rVh1}Y
z1yibaXvkq;3P$a29~3soi1webKXiHKVn=WU02sSZJbo4WwKenjI%ARMg?~b9cErPf
z&Ix_PiC9io$7K~smqD2^i<xm-y^7u1mpCnVOF5Wr?_d=e_fEN1%$9d9s3u5IUr46%
z&IwS5v$0s58|ClJWk&N{V+%Qbv4tR3ra=i@%zU-O1SFA;F{5sZzHyg$E;l|13oK}p
zi|U4TP%8z=su@?H3SZ%4o*0ix0=#^|1twVfdZ%Yt#Xx-X<GJ`)MZ^`+TLdM0840<7
zLtKw8-wcmO^s<-~#M<ur5H_B-N8>@CtTi%5jmddq4ARN}>Gv!s+fQFUVwwIyUU61S
z_fp*%{=l{h1k30P*`|Tu(8UR`${5q0ZDty?Ue87|`ia5_C@J3<3#{!T*kQf(iY*-S
zuow}qX>>j#C%Q|WkQ@GlB>7;6_qZJhY~OsbX`Dp;sqS6hdb0Tzy@Ki@79_f=xi@9>
zS1ecvj7yf6ZB|=0zMP%kfT(=@N0;(q?Mhcb6pk;?@szkSN2WPo01o3GG&}M$WGSX2
z(ShQ5UYe4GR9$KOFQfA}H$P(P&IQ{^{Ehy|(afUxaIBG=iH5`3VokPBkYq}p7D<YW
z5K9V@;X#A7x4T8jTr`Gx5F-|vz@+Q!$D%?QYffmwAmp;5W-#H=D3uD8<zCl&B1a5<
z(1>j)1>7eQS1Ss4TgHlTBP9~+Kx4~M`Jd9g1s&a)AWC23x0L*QGYRt~vPf%6jWO14
zg|~W6={!UEws50oje+O{sQ7w(I==oCT!E1B9;-&kVk$<KDp(!?+Rs8ku5INQ<8l*C
zgalwtp|sUydO->c$N^Tm6`WJu0V3Qqk~vB|ZHn|ln>-vbM{Dr=FJH3WgQjIBT@~_z
zMCj@QNwIk$K;vnd-n66wlSct%d2)*EQb5dH>e~d=<TY0wFyVKMkO8o*gsPdWQWUp1
zT8&=Q_Zn^G_$-E|;XR`8G+#XfG`Hn>teWocOo&Eci?Dh`wT_I|ZT^|GNTMg7%XWVo
zyn$6r&>V>MWAtqpt?TYJIu5XE5Gh7w4s0?`$J1ad?-oxnWtrZG$&Ab&%<6+5{y;@%
z4|zEeh>++DMyC+s9X-iokzDMjt-A7g7@3XU;26%mxHDC;JnP{0m7SAEes~&Oj;^pc
ziY;4rd~y7a?2NlEtJDAJd5rFBS=}ieL7Cxw6U+N*ruS`Z?{CQXnDw#Z5hxC?L!xBn
z{yv~=_la?`a(zisj}QkQJq;O-YA6aSV{HL52p7hhhv|w6kxCt{3RYRym;(lej5T&Q
zzSx9!)pu^HA=ka<N#f-Xey(#qfdK!vz_7~CHz8c*hfJ`+=<0ZQF}nHFhx2QYFe`DJ
zPx0NI;J-TG4NUE@=TTf`>3&e}3fw%q54mO_Y}xlOJgVouz76=#Bo?ebqSMG4JPiM!
zC@6AUh>`-pu@5OZ0xCUf*UEqY9PC{@7x_P!0gcG;|NHv?=NF&GXK!1tuTK8X{hzuA
z`y2fq2ZQ~8)c^lAeqMa9Kzv%PmkCzt+KG09ZoAWIk|AdX-z-6v<IrfVWggIDXwu2`
z@?OU+AkvpQM!`7&-ih>%9G*Fa%1X(8{A7}(Xm-J}NO_aPOr*>SvRaV-NO2?!vEsU9
zdO1%Dlp;kIh*>5t`x$Jq4|DvvE?&ZlzOSq)N7KA4hQfHsPrKAbC@M=hL0vksU;q&W
z6icqej09!|gW(wdbP^@x)&es|voM+KWvgxpFqH=cvjnNv^4KaACFfAw#8<7t=wl`O
zI}Qdfy}%qG(#qx_O=DHw+`*Ynhu-uyKzlmV3B810qJD>~*+3Lnw|jy>pA?jQ#baeC
z?Nja%DpsWBV{NxI&ak*Uyvk*yw3M<bFvfA2+`IS>j2y|P#RDF=AP^2_%@m7ef~d7b
z>`57qC<P30_4?iTD!4j-bNz96ISSzSi_7yL$0wtc;Pof?5`=o7dY9w3@2-P)=cgyY
zo+%je`26hpa{T(k^*Mad7+%4f4g5mEe?Fn1>k=9G`DHMEe{nj7(O~$?;ThV@n&O25
zQrZk)1PZJ?9lsx6!(-RyO`26+Q{H>Q`J3SV=n^XyUk^pLz@@wyU!UP*Z(z0|h`yKC
z<Kqvf!^_~}!{x>K)hK|Cso-RMb$mJ;zaO2nV1Af082yL=OjqwPj-|ZO^N(kvOI&<u
zm#;^#O44)QV9{CJC$P-X@ilJ9{dNq%g9V;8gR6_t@fd#@{WOB@fQ-<TL9amWhlk+H
z;AHq782^<DcAkI=oC`4Q5e1L~B(6TZ1}WnD!}Tb5dwzaGu)Z2y{y07!UA<Dl>G>4_
z?8DWl8DM-NO$}oJw60#^@2@{xjR|gKGQIpjO5H9T@<#v+ta1pio)Cb~&v>^0r1Q&9
zI4Z({4xkx)d^dv6FA=5$tsz2zOZWJX!Gr+BYk!-;+34*l%9x<D!`J6H%*XN7XcrD_
ze1#9dm^kmp;U}z+juYcK!Mga@F+j~=Zj+8P7{3XICqIsH<?=LOz?G3i32evj1m-RE
z{flqn|8Fl&|E~PM)9&wgdK>)z;2-_pf0ds)jppsyhkzqPPJyhvJ{=zeavzW2B5zZO
zjIz^f2Hm6JpI4wUciZg)Rgx`^cj?36%~I>&%8>9kXj2p4@HmZH!SBWWH_fNy^7}yr
zBP?o}anzs_3@Dj&u_h*HSrwUsCotX|zf91)olOn@jC7hihS(%^pQ_PIg|ejFSV%pp
z|6MF~IG@ZkK80oof*P5(XTim40<#6DG9#L`fPHBYDR9LzJuU2NKpIe%pW#-RFMm$3
zMjb5uAzOaVTV@h@iz~0rGY|-B-d$i91hm8R%VG?{a9ABHN}0gW--mF`DM+}m7+Gr;
z<acJI$OSs^3WDJEnz=X@hf~FqJI3OgY0T$@?TJG8TUTdP8#AKfCn`SeKQt{e_V!>P
z_F$k}A}OdkA82hfoP!}k1_wc@1H_9ZF8w13bNUg~TXAO@^~jzq$T1Kcc420Gg=+~H
zoKZ+Ic(AW9UCU`8_yTE36q`iwHCm-`h2aV$7+T{eiHyRLsIn!D!bRA5|5&Cy%Lo%b
zVzSEvxn{!8xEzFlT~E^_W|M85f(!w20&@z+RUjjwVLmvA;p=x>#+sl6dJ@E8Y3k0{
zS3MN+ZX#d6jrFnX28+VpGWwHoEtyhm*h7-v?KbTcu!l%fI=3gjV+17t3M~P6Y3W7v
z5MsJk;k}B_cxY^=$C|}(oUpdY$fqbPr}{zBIvu=Oyq4jx%CR5524m(Xtc~wyhOeUA
zjIo?`B{~h$NDL#}F|CX+BWH;=*6AA02sgoFijqYYzdy@YbURneC0;2OulcDK(S;>7
zx@^*ecql5;OVVyYQCnbmD5WUjVF;LOPsxW;6Q@t6h=Hu0Axgg79FMxAq3m6h5CN|U
z?U5k!Cd2yYlycU5bOq#lu{XYnxg9u)qMow^5xU*MRLf>bjar0xj&4ga%L2+IFko8b
zHrz(Zy7CRgisk0fKot~+x7T#(n&xTcp&moaVkL|-1az^BqP{AG*@7s*w@`DTQ57K*
z{FWFrAvm4bmJGZAAxcbWFHY;wOSH8IER(Aj`AfCa*~OyYaQ>bunWdG(Ma<sm?gGrw
z{YLnOgh?*Hx=rp4-l5ND=BZJJ?Kb@r6qV8<iam91++%k29nC8w?>n;{qQA6F*dkTG
z0Q!f?qDb78If<oa_Ka0Dke6_80a|s5y0my=sv}C|pC(my(j3m%O=qLgXd1RdV7oTM
zfF{~i2vJN-;wGOWFDoJX3}V1sLNnO|7=(quCZ-Y>spdo-<P5wLM0cA*W9B2cQMfDs
zEs1KVFj5QNR}e<ncZqzbVlah$FITDB+(zYc;|u6sLG%P%n4uDV%Q`>$IHn<a{+CK9
zU7it@;Z{sj<e6kopaRTkwQ@XJG7_W!%A7CfmLu>zK2bxNnU+aO1gw3aU}U&AsUr;&
zN>ZnqD8{}4FNO?Eu@@j-fb;|?9Iw!@0We=-qdiW1LJ<-vM2_X9Id_aDKVt2x6rs8x
zw_=&JK4@ll?l2G_rc;st7*UGIe0G;m&;q4xku;_SUo*H~W~ng4SaxQSC|siJ604<S
zt(rujEV2Z^4lf0*hFr=NQMcxv9tCHvC1T1kmzAb8Z{kqX6~%QJ<WO$yO^mMKgdEvM
zQA5~SsG=L5q#W!)Rp<R)X2l)(X%SAQ?9Pm29*cqt{2J-rRRO={ejQ+vzs&=}VYmjk
z*Y?XA8UfK9YfmM@WVW|hEf=_P$sWcoHz@8C(qy?%_&6i>j6w%p&HF52gBn?rfbPYJ
zq*9H?EQm!taqRmN1Iz_mP#lX{1R7|oRG^{Uy=9^cYRbcs0!spTUXC1Lw+2Ivl$j51
zp%4{gE%T%RGc}fa#<f^YL6BVd4oO_jZpcX}3zR#WLDhV#d~>Xr7Psp{*Q3k#)=GB5
zk#P8My9Mmb2GG&_jcbp`G*~>P^EJ1qXYbKgDVztgT4jR;?zs-r-0{1;Spr8e3m;5B
z3KFSdqOxwKNV91QX#o_}^8|s!HIn8aeA#ttQ1=MX{<2ZVw{vpkd6e}<M?+5*pHh-5
zv-%(yh0&dPgf+uB#v)Z*gsK57w*gNz<ONK2!Xs%op{7BFz$pX3(KxE<bQ`8pL%|B9
z4Q9zO2(OI+Wi)4!KT)8H!v$SCvcHDQ!kh!Xqu|N_o`?Az9uYH4w!Ita!U+cweSuI<
zg`JQ9i+WtDKm?3fDiKN#`(Dksr~obwi(i>;y^SgpYsi`u$_I(jt4JE8-x?Frukb;f
z*nf(EF$@}1dI}BP@L5AZ;aRj-shv{}+Z&!nS0YEr*A~clyTjWM`yo}~921%pF1wbv
zLRd7d?b?Ul1JOVtAS@$eun?dww$9Orn!p|4&soCxL7+P&=@d^xO966914H^0aiwy5
zMsOLLQVK#^5sA$M@4PZZGO$7%MW>@JohqHlnO(>}xi$zz&g7Pks1hC7dCbQH3Z5W_
zi}qpRT`lOgvw%u2q&d#MLgxXh2y|Ff;fvJ|i3YSMLACI!=#KXk@4QF}b_+xsIPX-3
zR#fkd)fzell&C}lWi17u@SbD_40H~;CK+utB)&++&_I?_p=CV4Pzzj)xV=DvEkJ(F
zQSK)51`E?-dgAqCni}D3>gKuEJV9b3tO4F2Bd>vI3>VB_@Tg8nUOyw>QQlCZ6OnXK
z*swNxcp=Z2CWfP;_a(uI8$dIZA<)1j#Xb0K)GzGJY_70#Z+g%~&rsN4j@2=N(&IwC
zP|_ViSiuc{6Y@eLBC>H_8PkCwusL^4AgIz9G_E7ehOO{QrKRG1q95d7tntth@bOe}
z`j0sWtTD)ffpyW*CL>NVW8%yeSRAhj72McWla*bDI2;VxL72qQNOP@8iABPQkuXbf
zV+EZSXbGfx9;51Mcz(mvM;oFl`{anl0yGM7Z0)>6=gGds*V4bNtYH+$l+LNDpbYQs
z7RnwxvoeE*CBkdF(b@C>_qqp2zK=|h6Z#I+(*)%(<>Dt@P#4AkM%gX!Jowu@04iG+
zuhDwZG@ztVci{GRqC9YoJ{U+|nL`n%<_HGt^RNnU(HN5Q@)Wh^jZviIP8jnHovu!A
z$8FmhkBEpeggc`vQg4D&6G7cz?&vj$6K*IO4~BgRZ&jAJ(o@um<KZrwX~F!RaF?yG
z^tO%4O|c=-%pAJnU@9i^6s{B;n@~HJldZBu;doXgfC=bv<vmV!e6kx*NI=eg|H!3F
zdy%SQcEO?~0w_1;%{UMk%|tBW2b+jP2gZv7iNZcBJSpE`C;>ARFC2kPh}6^)a8orZ
zt7r(?aJ|q1NKVtBYQSCiP*x+w%ta@vozI}9>KT%FY*8pGEQ1EbQ!Hv_RB+>_ram;p
zg!y5$lOP(bcabCW2AOb|u>$n_yvcHjZBHs{f%1b1nB(AcsT2b>70dvTKm-c+9_7-B
z&4w`aI9sxvkNUDPp(&A*q&y}EG%%26GgUE+c?zY8{%;zKg&@zA*Xj03WO~;o`T3KP
zO*S*8Cbv~ac!H;-%mYNoUy;p3<#Tca9R0iO0kzg9vO^npYVMpF_PbnFuPXtys$>>f
z#zMG!0XM)QBDp7wQQ6?`QnWcw=u9YD0fEU0wdEz_tyJ#i^0nmEwknp^x^q;P^?QPZ
z)zeIgqCC%{go66H6A_;#e<>Dw0lSGnsa!7hB>C+}$xF9R!#O92C~6-RqoKos#s>Hp
zZNSecDOth9Y}ZtVouW4di{PRZ5{Cs{j`=DzN-Wo0AXJ#5ZD9f;8Aj=|ImTau=S2hP
z<|Xi&3sMJHHu3v7WW|1C%3bD)Gak$}C>Qw@ZESPy(sYWadwebBt6HUdY@Y!FOo^y=
zXWdrt8Ut;E3#*W@8tKJ--I5nCt`VRJDb3eLh(p^0a+?i9^o23-lRX%vK-k57#!8D9
z@I`%%VMMhV=w(cjW`5VxHV8p#K%)nl$yeztnI||#*@o@XQ>$8^D5u=<l1~L5rn1VY
zDLEaIi%Rp~6lCr#hq#~=ZsL)e!EHjxB1KMgiC7*P$C6@Ih%D{~ukH^RcA7naa(t_K
zXUe#VrdTL~eOflG3-8B$IAi|o#db~BWu1@?nEW3hClNOZFro>tQd?S<=Ru(o4uxXj
z+Um2unF&}QHj={?&{-Itw@c9w7cSZusQPxxtwx0Q%H)~#oe7TR8KMl+VKimlh4;3T
zHK}lASr#;k*(ztF91a2NYD&}@WgL(z5osP!M&sj}vRyrOWLXSH<<La(4FXR_yE2TR
z%nit^N(5d?tLTy0#YEwG(hWmciBd?U%bpzJ1{*O~xwWzQi>w@)lC839icI<?NSJD*
z6MYxXrcJru^by-2FmzCCh`5v{U1ZuDn?^l@_nfbcQSjMD#{NAeB~N7=I(8ce%rPzr
z^oeP1oR;4u3uZy^CgJL_g_{^Vte-tfmeFdCdH_oJrAQIP5tP{R4JElC7DtJ|z|PUi
z83b1(E@J0P6yi(qAg|B{L+qr}CL?c7No#3}e&if2ySTt!3+agQdwgKO2P@^5d|}@p
zc!t2O_Ks=EXa~koPvttEWu?PIR?3NXu?AT;wrn3ICSziT!-($EOuB-CZW)pr)4|9y
z3c<AxrmGon^BI?lpCz|)JLGv(hCm4GbPNmnIKDXd2uFd&eHb$awU%qycH6-TfMGs?
z*E>f?1H4npRNNy&tU(cjs$$Y7OJWHO;_ESc$X&z(UofHyWm?I;;D-<aftwYdC6%bb
z(1&a}Nq~n`rzycKF!KdvlL<)}rB_i0WWX~)1+jG*C4?|Sl55x_A+hz8XR3NdnH7p-
zoFaH>Clo|e2+JZyW)ugKYRaa1U(F)DkY#fCSf^92Z4A@gBEc<u5tn3>CSP~<sV*7I
z;MRU8z?7X?Xl@*us)r_>Uccqt+aFDQ)-fB@d=4b1TM6MZ8x#KgU0xDtn7Jrpro#{r
zv{(c1;IbsEc|Dm<^93mU?35+{urp(!y9TL1eutQ?(OxfcW{(=XUa8MoFYv_B${n&R
zn0&ZMVyqxo7Mf7}N!JQ3C`LTFw?3O#^!G4|vZWElg#;Zqrv{@^{Kx_Xe&Hmia?3<J
z*4F0sik5Lpmj~20FMOij1_n=M#+qHVhTDVW=pLMgVvP{5(oB9wX5_%?9|{T*%3K5-
zhvO4WWnE;B#!%>j2V6`DEn(kBw)aN16Uq2T-_c&dI^WX2A*hP9Q8kQNojWO9eM0+^
z*j|iL%*R?oBDAt1f;rlL9ma~AON*OkDp4hlk-cOOUARlaD_i}L*+&4dwP$3kMM@jx
z1EAu1lw#MBKoCl|)%O@;1=^JsZ{Xyc<|1!9y>El!>eLCo8Kq<gx)1!Uz>0=BFru<(
zh1@Kccauv*koy7^A_HhVK&nx@<>EACqtUL55i&9?u?Qf3OREWcmN?JzhwqzBvCj}~
ziSQiCidtIRV+RvyRAYStva8Yn6k$@BpadoXP5(YtSiPkdj2p4-j6+D*G}MDu?XwS2
z%p{7Ew9%#<*=@LtXBc;avNi`Xt=R<6`I&K5aY4d@aumH^*1>#O8g11JoDA3Evv(F6
zDV7SVO@fFhu@i1k2`-~Zja<XRnVy?qFJtBkf_N80FYLsayT9Gl0+1ql#nLl7KWLP#
zj+a*m8?i5ym{)Zs*^sqNOIQLI<lvdM&%!*LhM-+o8w4pnB#U$S4%^|qj>HDdJTa3U
zb_96?Iq|}AVGy))ir!N;LvemJNbOv0H*lTf4N8>+9is2$5ny2xvg#WTsWI!YxPf&-
zX2?&hb-sxTBPzS-n!e9wE6zp2It$qn%Q|Gsa#({A1w0pXsv6<#Eh04fZ4<Ln2O`=$
zC%cK4+Od3DxiThH7R`vsa#R-9Nbv=yY-9ZIBs^J7G>`-YiCDj!Utg47R%Foil#<F;
zcYNffhyR%!Ih!Uoiz*RUq(i>+;C!D>Rxkq*?hJgMdIrLZu+uk~&sWb&%Z8E~gR$j+
zOpKJ{fH_cR1pfY3M(8>8<{_4@)_WKTCB-0WEMk?(sv-^JQ>q<f1>)ne)MGne#h6Zl
z0jNE|ZAexlhu+;fCmC46yl9XeXP@?-sKL?QCdqsAMVKbGcmZcQ*P8m0FD$Ku*ms=m
zXJ&LBkU1FTPjmEut*qmP0|HbyAkVS(|E+rtnW#w;A4!4DfT%u8d#02a#+m70t-~do
zZoNmCb9(kCBTO!sTP-=nnjzx}nQT~MqlVoIh!g1EK_7-FnTCrC>lR7Fhm?3SDIix0
zGZ<m|F}p2uc>i!L2FQv;j@oK`Y}Vt0G^jy<KOY~O#zRVyyKwP&#?b&`HG->`XQ>37
zumjx8iMo{U(-3*aS--4UOjv~>$u6u_)Pu*cpNmLIgavyb2|LnER1hb@2|jz&wX9wG
zM<)>--hpl6qeMvFi`A-jfxZkE&M=gf8yO_IF)Fd4IE$h%C&`S}QFN)J2LlZYrkoZF
z=Ekn<hp^W#pLwXgJXVk7o$wPABOneYPA=MV@h75<(RJoC67aKUhV1l&rPLd5ry8@*
z=dsJ8B}33zsl8Kw^uR@<V$VgmHsLnzLut%c7*w6EmTVnlY+y!Z<s?&@&`%k74&GHX
zU=K_j(o2qKT8U^FX2>!r%{OqjxfPZ8;&J33U+-i}O%e-(_B(7DVtPRI4#DoMIcqKt
zL&QUfUG5)&1af(nZ;$`7LXIkul!W0nmDW0pal>B<D^Pg5%tIQLL#<JuOW0+&6}kYz
z%aWQwl96okF3UNuW9#cpxf{4zW85@J<}SCxX`!B@me_`68P0E=BPO%AIWK9Gx!fAP
z_117rvxU~ny&~Jge>wcihWr@_(>S>DBxQR6I|4NYl`!mTJmF>EAPylITCxSg%M;Di
zc1jFGj!C@LCPu>D8ZqC9uI-VDSX2qt5Qw_~8hOS?Z-J;Mpp(KkdnDq4!3|q`N&OYW
z%TaQ^0~NFvcVZpcaoL-)K?=PvBs9jO9)>)$K}+#CG{p|)t2rwf^oUV(Y^0MaFjtgz
z4aY)?v}?-Z0%kDM#^<Z%Vc?}y$+n4UM0Xi{4|;cOoL9NyfW~udUEai;XR)L<M6TF^
zBJ%+d1B`;iut8K3;60RblT_{<2TfY(A(61Z(q@|~&Q_CRx|&hERPMYEa5UNMp5Z(V
z@3SS>pn%I5-kNM1KZ43+H#qi0v07e;6{<bWpiu%;DXv-->jlY*8As6pdf6}{K+<Ry
z=DC;h(X5yyjNf==a~mp?2fPWoAR(n!xPY6=qY86j%qsba^p^$o*3FiT&I|Sgz``lb
zM56L>VHja##mHK)S5|Hg%Nv6X23|Uda&iGO^>~HSG=mDQ$aVr=?kc6BNfy9QsN@KR
zE0PC-qzS|^GHSBfOiKWxfsok?MZ!x_dRCCfM5*X^6CUJWF)u=J4%^D&yotCG=@=aY
zP00f@y#_z)HG`K4U*g6#1UL3loyds5@j^PN&3D?=KnZtB3q%T}!j1P$&62GWoh~g8
ztwJhaVW^_6vKetU7D>87Vjx|NMeub7WxV1<w3MNen2?%r=%S>GFd1M|5$}sbbI7$R
zJ)z6~AloZ>9!iktcv^Czr5mKuH^myFFB%_C$8J48DrYL$w5$gD&~>pvp0v-ITcDd+
zCnmGQ&`jmsM2Q%g`qojG^g}X0=`=B{MIx&`9%n^6iLYAdVujuMj9jW%gQifGom%r|
zd4)MKNB9dQ;lE12u~xM#jQ3r(ZyTW6RxwWRRZ^&M+a1Fc{FOM6e2ccV^pH&lYig&$
z*zl~=j2*z994Muaa&ZcA_d@S(Rd7<0u37RfL7W2jY`B?kOjVU4r~v!R^Y^<pY{_50
zr$=qu`euY)s47F4TjmdERC<&hDIDE+j|ibx3p62fY=roJq+PnXHWm)ca<%;uUNr@k
z)CPDgHYZOFij2Z4K%p`^fr$IDCbI~&xU4#%kWlH_)CN)*cV>*VOmi@lbb{Bu>zJvJ
znF!W$pJX#i#G}ovW*o9dIh<L9(T!7K#xAN3+p*=jKahlIJaQM9Dr`MxBMp;5&i8xP
zULMX&qL-~rgk@Q{2d2yi_J}8wGy6w^O3d)~fMHb)oD<zuN_Vm52t?ZC3St`i11=Z<
z1>B5qVwIv>gdFc^pp@VT(X<F!2Q936X#6tQ+89(D-lxvNf$3mN?T6kVj!V4shI!MZ
zok@p?N;g=JH%v7r{VcUtsw*~Gd-aQutUB>PiLc3oO|VbQxI0LU1Zu!BBb?;s(v3lE
z=42W_Slu^dm0~e$E;u`!*w(j^^Mp}t#OcD?wJWxKn<#ZZhSvIRCEasbm>Wo<yz?h1
zL(oyZ@y2F%RSgtOrIK3|NjQB$ZG?wl@EkKBv+epFYz>HEV^v*f3~nQox&fSu0_2kl
z$dkjc{*XmK3kyk6LNZ4NA9U(7*c7=XcD7)N0MA&>T->hsi?Rn<2E2z#oYbsf{o}!G
zuoCIr+I7wbGO?&F%SSpPM6oHpW3h{n##Hu6mE-QneP~KOkiEAy?xS2=4xMs%jFgFD
zLf=e59+Qt<^*eA;850DDaV*$aZ*zh66e&wU1ZTgtu~`#pl$xpw@NF?;?><T>%3(_M
zm2-ZaNI(*4lP3IvdH~EhoD0(tS%|PUDHG0Y3^mmthdk))6EW=!s%v<KGNy4tT-tQg
z1=pOk?8~L_c;;tzx!JH6c8xN)Y()m!3EKw`b7SsVRQ0xU`vw_Pe6kFBcF&>Y5wfzK
z6mIp=E?$2d^9Gn=b*~Z+hLgNaQmfoLoR#IfjN2_WDok+%yBAlqLTq&&gdQkuFZZm6
z)(pc732pf$x0QGo4o41Ka`@P=L97xF-q<j3F}Cg_yZ5Qqg?|!OM>h+06X{(HBb+fX
z(b<B?R*9-xf--~K<2Xi9Kgf+R7hJ<9coj|eI4hST2@-sAH}BNPG<0~+(^6QS+lg1e
z#2lGeW)1My<roj7&UtCO!H&tYtsJ97-t6)%<XT%~eW1Dk^M$Qt>MmSW$svNmuuj+h
zbHX_}WT<vrn{15r`zeB5%*FUeEdXqG08_b25L-3cgm|{;RS6q!h-qHgP*4(YaPM*x
z(SBfjL^kr)8Q2YTEe8O`6uTr!kKyhKhuX8&6AFQ-3rq4C^wyZSkd;LvaE3w6MjXKw
zKFbH7DeDYnlZ|S0c74hY7<dDcWWj*|sQtMZWln7=#YFp}pU3~}7>h&7^3FY{yq{0&
z)y}Ia%UiQ*YA_@1fA6V|7gDNi>-TLGv|JW*vl?U0Hr;^stGFx_N(VLw&jzsh$xKad
z%~3F06r;ih;+AVj@P&}qNp%!-LeFAE7^OCx9VJ28%J{>LfGqF+WYrthGy;``tczpV
zrQCV}Pe$gNS&#72ycXu367Wj7%EP{2L$)h+1YjDp-oq9tC`rop2tV#ax;9A0h^qxv
zIXOw_rZpfl%t4s8n6w!NNJX_!bcGXI6kgs4EBSUdWh+nNjl?Q0tg2DGTsh0CkPYZk
z6)lq*;N5sXdVxp@g@fAyjMY>q%@!6kY=!XM;;uxtI&AuQ=LLn4@Q(41T=J0A?^+SD
z5J`BcY|TjayLzVTssI>}jfm_EtN`ySD}=H`SuDfcpSVb7wQ^)Bt16AvlneeiAeIw*
zTrMzTZOCN=nl83V23ImTHmarTjTdMtK1+VqfrDxW&9yJXkX>1Eoo9G^&<<jfYPjn%
z&V>0G7Dcf^RYp)RB_TZvh{^+^x2KIQnKx)BiO#*fsHgYBmBu+Q4@m$_K(oKg5+hrZ
zxpwvZEOQfD14D1c7?V%S^69SA<5YFIF6kgziG$INYT?`S;j1L#1D3O}iZd^YjeCwh
z;I|uhtZc}!W<7T+5n}d|q48HLa-cLtdYJ`<3E!|2&_;1Lo42`T_=<g)#v+THojaD$
zi^4x@4+ENB+FS4!ke^5|&1{8`R6mppwA%86mdlo+mr<HQm>spq`nRCeCDb%T7!~*7
zJI-jUq)V9CN#8lbwaYeg&45vIxQbaQ5KPMQD@7!^?8jjm0o@Eal-pKI*$g=(Yd(q0
z;)Mu}iwo9dG7X+^fGclVvK}nnL`2?VhwUI~q2wiSqj*8y(o*RmKL8miHQ&f`_;DQv
z;h6JE8m5*^Y}OpP;Es3qO+`!2)~h*$awR!5#@QM>oO;MW-bgTQN<aX~6~eF_3^wjP
zmm89BUzC)GhV#iIB21rT4}j5Fz8DbA1isKqRGi9Eo-MgnWv@z!yBa!`djjsJerc77
zrX=Dvt+c=vXE_xv^g?syD|SVY{8a*M`7TIx(iw?GF71i)G82N@WL?g1@?=QA#@H|o
z8fe8s<;nSf8Z7bq9wF;<l7<wNs>ubf{E}QOf|bhpA>YSRog8%wDx-OZBGg#$*c@Z7
zm$!^Le!VZEZ`-b`k{ZMq$84gu(qMCU66VfMqJoQDDu1s?guNs7$p=T8U~sN|0ONM>
ziaL63@(tM6!p`>fcT|xvrYuS}!;q5`v<M*5F)IjISC1@lE3fjM6wkC07l_ZZ6|PIN
z6svg^TOD=&7A(4MSrdvgs`@<z^%s@G#FFx7S-qZ4G307PY!-bRDNl_Gl^f5Sq#0V@
zXC);cSw7{xZxf_Tr6EKqRQ~+##$t8dWws7y;wj6#5IWA@a%)jfmIiQAhTSn%o4^~6
zVP=f4suV0tj-@?LgXHt4h*bKUoIaRRx<XS4y3cNn60bZ#T^@D>J9b`>CUYeqD;zb`
zbdilzjy;@7{bTB6Xslj-Q8=8(1wKhuNvG2aE=*ZVQ*zF>J!zQ0n~KPYcgk9HDJP&t
zjju3iuP|MyTFiy3z)5Lr%y^XCw^i<H6S_nQ6QUr?gxlz^+Lqt5IV7bgT`?RV?Bb<D
z`d0c_0proJSB+y<YR_i|Zn~hQ8P_)rBN1}obdZs<sBb=P8cu=5^T;jeoj3{HAk3vG
z?%I@awHY!!ts)1R3pq!+(!s_6rK!*=BLboEO!HQ^E{PmZQ~<0ZD4Dxom(p7^H{WRG
z8(wKcu9!<x4l<iHT){UNSYgboQfCzTj-}Id#~5gAvdifa8}M2(6_Bf;!aYZv-sx8D
z31K1GHEj6-ta`Yd#aLC#Qk^}nnNw1^J<?xB!Yw$6!iD0J78I9-XDJt*quuG2^A&ex
zU$`p8BO(dUE*sy57;MN&E4`UGS;4X^vOYD+n`9!9u8ny_!J}HNQ|x|E3bwyVei;S_
zWTL(_S(VB}?$M;(lH+v8mYqw@$Q!b-R4F#^@O<|=4LOmMnl-G%?$1y7wI_GEtZjqN
zxnT>rmrsy_8(6xq1u~`PI>)MS+3m)@3~Knv>@;p&lv71~#WE_eYRU?nu?(JWES0m=
zwg#vA0)r_MQ4S*}7l5+0(y2@^#!Z^M==jN$BpcfvDihP-O;SmDuuPhw4Dd+`CrO-%
zK^4}aipEhd^wlJYZP_dA0d0<09a$!KfvV^X<R;3o##*7QqfkJKfM}vqAV}7>T0sa~
z!rimN*ve%)Jv&3wr%37<6lpVzz}Ck$rKi=*SDUOx<Wd2!;^7~w$YKf?TFZ=hQfzS^
zsqTu5!I%Ils1-;R6fb01ZJ4*E=qbxz#^%Bc<HW(b+)7n`lev#oSp86s1k*Wz&2R{}
z6q**NsFDF(&Yjp@P?BYIm?ipNyC4j?bxm{w=BtncvRG_}ZKk?ZCKZ|MtQ=Gad`u3S
zX`5FKz*mq-cf$zMmC?1}3DJa`8>%y<uap0<jt-Gn;714RT$I<k0y<Vev!#Vhw69&8
zVHnCfG;pt`+Mll}ZD`YJ4VF;j4PJhdS4`N9%3|`t;#@9!r91@-eq;zIPqi&=AOh>K
z3K*EGf%Qk039*!Hxo${v^Nj#J_S12(KiHiYJ;agWO<VCLuc|LBTj#E*m#eqHl7Uln
zD&a}4?}-gTDu<(P*FsulrK`#u7*t}(A>q}j!N`~#dRix-7f>m_KqbDW2}+O>MC#HV
z$xdb(aPR_M$O*B*MYx_*oKxl;3Nk~fDutA$G4@p{61bM;kW5x8HTwMds$;Wk)-)x}
zEva^fSEeM!PPcJ8jN!FOX-Pi%R0%#+C*7n9<xIR1NwYz;61=m+@pB24_KmH1iA(+C
z$Gglt&<O$SpcF?A@l4~oslC3D(m#Sl50j>y8=9EfcA@19ERGahn<g7U%rzYZK^<9B
zS*ekLg~>x`wdba@x_1wQ_u&#w3wx%S_@X;oTGliB*-Q;8CbwK!AD^hxUZ@T!$QS^b
zP;Ew0Esg8|s)IGtl<JC0U<%MSD<Wg)vL88V&F;RZ(`-}<TXniEEN*pW+Ze%lpHnrr
z@2E3ZoXw5=R;l;LCW=_<lm$CRKcsSYE2<2`?hH?Ubc@|JQEV9a5GRoh%QsWhx!l&K
zz8wG(*)7g%ZA07NZ8`Ci+cp?NQ)cmbCglxGYFVDlSF<8CePTFLZ=*6-$s}M(q?k-G
zv{ca6-D|>LHth1g`9~IrJ<O>1NvpnJ6M>ctc+Mwovg^2a70SAx(PIe}qsNg5WK%G$
z#N*vrcnb*3+++Ew5{fBVD$=fwZGjeS)aFM<vW+fEP_imGIU1%*yu=)xX=2pM$zy&s
zqfW1d5#3J8#J&_m)F89R+EaW}AfOc}t0hv>&q9%~M8<}H!f>Nf=Lv~Vt!=CE=<)+e
zCu12+xw;&#)~jE9q0+!Xa%1X4kjWMA2kDn8P*9+bO`+SY8DipUO|12%xc`jZHI;R|
z^(Y=F(n}88^NRUft;?!d97N!)R^TG><D6Z^_|k6$ml{qO*8O9t_fo|?a{WjzLXK>e
z0%S`WO&V;WhtUrhd!h>?-!pXQ;KJcwDsTueE{%ix!w79tt`93DEs|wovw0<I(OSJo
zxxrO(5Fx%0V@Ao0dgyRF6Pm&{HDZ|<!R`5(40-}=xH2jU4~R`G*gBqzc_Lk5VL==D
zrEGAy#DE%U(1sB63+0Ti3anHGRt+)hViJV2dfL8@R0H4LQd$N_Qj_h@v+-<cxg!d<
z38*qFRCa|+Q<tr@0)q+8P+Gzp<?v=RFwV;C3@4O!%Mdl9-gXpn;bYNVX1o+;5VAkn
z^{Kk`d3bGDyT8JX0|bouvgQtp@C~XNL=(7Wm8$kLdj$1y2npP8+HxLW*5Bn~Oeu$b
zm7S^%On%6RXQMaljzL1Bn>UQk@3WCpI#45S*-ao+Z^dTfM@v%eB8kpbe%!woKa^&r
zH#|Y1A}(RpvIIb9H4!c(888*M>I7}ebPXlM2I-}QluYdQTQ)SDQR$-uhck(LH{urS
zXXmD^`6udHPJOB`&);4S-#3HnbNYMq(=~Sbz8GD;A75WnJ9ag^xHugjQw#Fb;YaM8
z^waU^;u?E!pSk|o!PWH;Ul^YSA1}w(*hst7-duTo++Uc++uS#OWZS>{YqI^_KaQ{8
zVIyv{cy<1U8g&10e0I_dMq?UCdcR`_ZrlC+sh01`Yx({QI=(CE_}&T_gfIvI{BnHt
z#{hO9fc?{lp&baI3uC+=QVa4o{+<;a6K*&7bp8QZ1nm3tqy!WM)F?O^y%`-}kAECB
z@kyBG>cjhy!21eYmMd&8e>OUX^$ssTNsD%Z!{z8=IF>f^m)Jdie167gG-z>fxpjD)
zny6)_7+(AO4`-(ciObQSKEPfPOM)_%;P}J0mm>nShZE}K7#4=7=lb3=Vo<Yo{K6r|
zCqS|D;Qjf@_zfP5AkOjm*^i@7SIUP9z}cbf@bx(Y=Jkj=!NYoB1qdZPH|cxtVFu2r
zq{((OxVjh}kMS4y9`F$m;FLjib_ILIqlFJ-5Zf@`b%|HhB_7+-8^nRR@iTw1UN8Dh
zqzq2auMh*($?$p@(2C*T*CTxTa&!iOqB}M`KK^hC_YI%GH()haAK(U#&-gTOzjVdN
zmnX_xGQ!(8!|~~d%PQjG%;x|?9Fh>wI|+W`YPYFOvwLc1&&L*&uJWhg9UR5$5j-|L
z`Eg9wQRaa~js+&LI~qaY$k=z#;@%?I62oHP)kczv&%ELi(X$yb_~5LB$S(Ze7Wv>v
zTi4N#1)o`l#SS=mg)0k7e6vttPM6{kVTB^0eqaOW3QM@K{?7ucj1)eYWD%^&Im<W`
z6cby%P>+OMp-pR&XEV&7r^3lBexQt=+$S?{?KL(NPX=>=q@~;-msn5+giCj359mht
zSwKyTf!kNhy4L&vUeY;l=|Jdy-f>^2A%Td)xvouU`6py~XCSP~0&>rTBz=8I7kF@8
ziA^|zw44?XiR|N+@*H#E+?n`aR(U12v?-pEyx<x}7`S&w7Ihm2Dt@t~Q03OyEDlk-
zO*D;iKZ$Z9Uej6C`j>J)<osG<#F`jX9f~o~iLWMg%1ZtwCU8u4=scX_)^Nr4?YVgv
zBr49fpdcbI%%1zT<fV?7isUb`W_nc(%SDN4BpO26gOcoRs=tx?(}-Lz$Qf(M*gtTQ
zky6L@1yem|y0W!hsg*6trh;6*M{v^nrpBAM|L<~2Cz7Ej%S2DngA&@}L}DOs{UB9|
zjFh!=yc_%;tJD1eGtdy3N#FZ{=e?GGttM=vblfj(zwy!`Cq=p8vg8fbdlVm)lEb{D
zUr55wb{W2D)R+y^f{UZz^rM|}y6NtwX49$%fZM0-U3G`<1(T#h+Bi%0aGYF~7WGjh
zt|138BHJsQSBn)ixVWYmKOM2+w5zJ*1;E`Vcm+=Iy3*RFI6kI_8y5zv$);))`XhY2
zwvo-{$hz$VOI<my7MX)3tAl{-82O-s-`^F*;^m7M4-XHm+jP~+mbWiVxW|hhU_nC+
z-NU3CzqAP!5n<X%_HS-IPBrh*&bQ1`tQv*ALc#?`uE5@WM!E3yP>Hgh`No!}p>n3#
zN{H}S7E10YwQ;3QQ8q9ZhApUWBNsdM3-)5+8cA9Gz0CW=vv1-C9&%}Bg6Z(})%od%
z>(S{aUz2%7XDY`T6l*}x|A#uhKYZ76!&UFIV;w}x^$h1>!({1t(+GT<ZBn$gB)syc
zi@x(00>DSh(%pK2=0<WCx-NufeY6Jqw&0)XR_|vzmkYvfv3CZ+`IH0^>)3WQ(@d@A
zbOcz*!sxZHguB1}Fm~0CrHunEXhpV$paB9C;Kd~S(y(!pvSf-i#ZXI{Rl~i`))>(s
z_G;G!!F2S~%Uz1;Lj4v92e(@zuNPK8;tF5}U7U>9aBh5C)*UOX*q$G6tWUjsak#Uy
z*H)4xH2grcYJ0h{^EO;k_2eaG%i$xG#+rOM(#<ax21LR?AFdqc^`$mn<X*0XN0&nk
z5L;&G$JEl+Yc0ukT=tjpI`NJYFqk}3WD&0f4cVN>?Pe5LQ6XkX>6s&g8+#VtqX!d<
z@F3v@*t*-qtsp>7F$`?E&ANwCLOP6Vyfs)-hca$3dYIC&FodS|C>D1Kh|<~OZvEon
zZoLO!-kaSnX05woK7-Tw&40%qo!shlkN-P3(1%C;Xb{DR(-){HgZ9@t1?>EVbF|@S
z(C^c~@KgDB&>Qr-zv=Ya`@MF*(}CaHo&9!a@SC9hYXD${EF}nj@i^7Z{Wtm9)T-z*
zp-t;D5c>oOJg-m3#{v9*bapk`)(Kub9znO&X$EifWVymS-Z?rt0>V}1#PA_JgI_%&
zYL3$gc<Fuz9tqRWKtF=30v?8o`X-qIXMQuwFjn|AXcqYR`(Y5YyPZyZ4{)gy1Rt)3
zDi|3}snie3)Oe+ygpTo6+Ri9TEjF{CQQ{;$OS)*6J4FGd{@3e`j6U1$qT-Y=l<VS@
z^(?7yzDf&NLdYZJt4LA}JfTp^Vs4o1&G_ABaEI<*#hHC&(?6V2>)+M3v!tI342Z*A
z8sR&o+%{6^tX>+DaO)a07AV(+;<H_n5*ufvDN<udUr(hHGOyR-o<}-dFZ3EuqL{c-
z7Xx(*h=$=4k|FiTWMyr7Z_mY_iiYS)V2kKxN^)Zu6P4>1F6^+PSvtfmUKG!bM0F~}
zk7?UlhTNu1CYM$kIV_M1VN%a`>7=>0CtjPa4n)_((p0Lz7#?w`aNrt49i%C0Ny#5z
z7gK5eix9cJMFRzeJ#a$m9lD|;m<!eDb-<FDL}IAYJrRM43|NSu+nmk07(xn6EQM<b
zP`F*&6{fa?gUQbg5tNlksKqQyMS|cSZniqo1G$UC&*Y#hi{TH9vD^#WxH<%FxcVif
z6>`;clyO(vjyB&weIKF1Lirv;A2mI?<D#F+#-1HP+~qB&vq|bKS1>VRF6i#NuUx36
ziP7?Fm$|egmDp@WF<yDE;xlq5OIN|vT!ck(GbtJ+C7T#6*a(iVB>KX1YOpeKsr456
z_Nf0Eccn;gtzk-9I{9h3bW~LJ*Fd(E8bzZ@dp8TvjH}4TI+b!yT(w#HaF9tT%s-n-
zP`QmpqUR)?$)<glr^A@l8QHBetC`uPL$6&Vow`X2tvhH>&ZS_;sbA_!qBHBFaGn~>
zrOt5qb!E%1Q&N`=|F#w`zuT8|9`FM!0s6NXl!EOS1J?wpjN=q8)nxMFQjBdXoNEuP
zT!nO83CFfbY*6fl+4$-OPbda-MR}aMur;2!C2_S14xmk*xuuB(jiy6`rK(OOr30iy
z9^U~ws(mJ<2uO+y#85h-phJ_{hKn>x^xU_)@=9P47$I)jTQZIjrP6h1Z(~d!vOQ2-
z7E(a>CB^GM__@cNE-&Ds>I+?Zg7aJ${e51=24wEZL}x%1uF5e-oU%~1Dm!bV50vW7
zC3~%R5$XCGjm_A4P$ZM`@tj~v_F0mkoLSB&-9%uVMu=7_TTsHB44}C!rz9C)DDvr<
z1j%7xMq(MwD$0u~0gr~ur9D`jA(|~>U0rpo5{lVx#n=8F6<S`2D~L>%;_wN*#*sEW
zhJ7_+BtgZTo66Bc-0rre{s&ToM2dHKk^+bBO9m%JfD*Xx&<)|rCsMiuMK@3gs86<A
zgoMaPHo2E{nWvyL@>wa*gl}o1sR_1}!pr{Xw1tVOj~2QbN%wlQ5(h<#a%4g|W&VLQ
z_#`hj(mbumL32pOgs>`N8%&YHp|u$#<Qymq0bY?g6LdTCqPX6p2#kWN6D+(g^k~|f
zxvufmb}d>t@o1|(r&T9vI%|h|hPY|ilqY9;tHSIy82>M;q|o&>;T{#?7<aJnbV>fB
zQn50x>j3XpqNo^!xDUJkC{=|?_e%1G6YWCZG9{hlg=B6pLaOq`GBg#7qcG@lfBJRP
zH0PRlwM)p6@{rLKNF}jUI?9u#PD4315&*nR?zz27sj?pxJTUa@+Jotq)ARcKnR;K8
z9%o?Xbrx4rX4GH!6kni3jAcSaM~qyF(bc?w$4zBsQZnPs!Y^*1DMgaYD#~nIRhhy^
zP5H%T{h9oAJbPoCQXrMg(F#t+%u}g~Y*>t;HQ(h<n&y%po7k#0u+^y7B2VP9kFwk?
zZeOHlIJ@Fr%VCV=!>GC3z2+1FU9iuzil;_+1Wst*%2l{DA{Ql6J!oscd`eN`BBANp
z-4H((%u_!NpzZeD^j?KkqkWcJW^c-zSb859ZMqY2IQdeWI%J^@#}x@3@DW`Bp=c18
z(u=a2?+E9z6eZfQcC0i(>P*fTg%@gX8b&ChV<2_7j5)>B_mwv{fp2NNX|N>fWZAMj
zWb~dcDI&h$Li>pgL!|U$!!+1^Wf~0qIh&wS2KQ!qeAqSk$1`Szp;&3PG7jhn1$)FX
zbro5`%&)BJi#tXsRTeSv>JzX$+<DaSVDgaVwuaObqAcu@qs7;<tyGoTjVTLUO{FZ*
z8^Cc6OD~dip|izIyD_O{V=&2La<Z1LST)CBb4iBL9a}3G;O*vNoAu18%LLO*e-*B~
zDGa8h{yB1IYAi!dTd{LW5HiiYhtmsFXM%chnxv~QfqhT$ds6t))v<p36!6@MjG3Or
zmPK&u?e7dFYuZe^Y7YtSS~B$*pGi6MxgR^?8C-W_IW$Dl<TFz=1U?<kr239B;juI>
z*K4Y8is6lKoly%Er}HgA&r?_IyjgDihk{3`(gFL#05LAmoirnPw(y#tbL15ZC=nTr
z9<X><s2ZDHn+1g%60p&5wf$G7QJ;B4DrAtAV(Dsx5@REdjv}RzhJ4uqsW^X#%Up67
zkcqDJGA*eoekHC|$|C(YS>Nj-o+wN~oHotiWPEjfIez^CzoG{@obh;wAK=loTjegj
zLz_9}$&PrbEb;No71A$MFospi&GM>Le{*YzYMY_um$T-4MCllo*z)UsOvq-Ci&UR@
z^ak#*QJpKEG<+9eW4EO!9YJ^}w`Job2F|2wfSzp1pEN9LNpl;M^JJ=08)%tUFS${k
z<FE(;WLTn;Oudr-HIdc`X~I$FC1KXNM=ixuI}C2pPHYY{RV4Lir0I5T|6J~rL!{DP
z+=krbMw1PvO}_&Z9{h@;xHeCp(lU7!wyQ<$3E99ig51V6Ri|ZGN@$T!#3U!f6Rlm7
z8n2q+EPQxr>+w=j9U4RUS+6>Gb!1g4;p4|Pu^UL5-c*{t%XwN`U`&>4TnB*q0o%u{
zC5<X&{GNsB?TUM4?Yz@iPLRw^O*;f9!MKKICTPivCY4AiDm>iq*VAb2Qn!dJXYVz=
zn7F<#*V$k~EuC>p#aLJK3hUpoVpDB!&yE7_@ny-MwZi#UX0FQV+LxAkXOM9b#-_d^
zi7R$&N_Jt9*F4XngaB$ILcD4=rs6I=&spcoWWin;la@xM*Sy?a#W}xTuaUAnfG;&?
zPbXFxAa!CZ*DBZNF@2!7bYgwwx>dk|VuS>V8I&+D$`}S!VHK@ano6VG5+n9;HZj!@
z*@R-6sYvV;HM25xSDbciR%X;jQ?Z83Mr1Ea3bOG9?t&`eqh{hAuLrmRp;v;!NoZ)Y
z%-{?Vp|AuolX0vK^U-!TlUur6;pyU_IoGffN$1p)x}{aPd{dHqEF<@y=aBA}*Npl}
zCvRmB-{q<nxut`!v2G)^5dW0vEMBUItt6c_Uw}}}8NDQmxZtXp%kjfZDlV5qt5x;Q
zCWNWVZfXM1Uj?7FULffKl(P5G2J|A(hyqk=D4MiRE(D#Ma#W~pmV<G9gRmE%SYIUF
zqwnSZ;8Fkw{Shv*%Jhcj%TO+<$a8T)p88%UV(hlIV<ToW-PB*isU_{cFrFMGudc%r
zF>Fg+mc=@7H3ljuUatM7gmF-5HPhtSzeqv(A~i)mW~@{mK)HcKiS`Pot&8ftGHI-w
zPq4L3`dn*@NpNk=oow#<dtCc%=$L7s3N4MLSkJGB0j^89bqTpDLhO^VBO+;rlc8go
zbB4MgC5=%k!cW(RPAm3?GO9~VrCK>jved0Ss$B=`eLkOKdCS(o+vs;H^;F8`L7%A8
z<oYI4S$}DsyEYu#g27;rCuCWXjinI^zNWefST+RYH&c3Pwcwa#YAsPG6*`h=Be=~v
znm$wG0${LJ?l>TR5!+YHyu<P2Xfe9aK6C!0Upk6h1YvNSWigiUhRe>CEi1hbIW$|q
zKphOLt9SXSD+-OIOW?u8L8Mt)AK(mnRHWUp1X)U(aaVQJ_9)`$_}ug;vCWvIy~K1i
zlQY~Y*(AK<;gXZiDuXP7ZtY)I6h@F`R8U&_3EPPbue0R_(A2O;A`N*IsyvlncK|B;
zK(1UH-fnCX;Ely9T~}Y;vuNXE8RHj;Oh#0(Uy}w)*IEl<4QDTnukEpOOT=Kp;NXrk
z0Fv^^a%gp+LlO3RqqTCmKtP#5QCu(GZ}KLqQaLNzV#wrL+a}1k<_)FZ&}L#Vk~VCs
zwyyn-CH{%w!=~SYR9E6^NOPc?_d2(QovL+EsU8wHdyvAB<<1hyF5h;*^7sUKm$BqP
zX=({$H7}_03v9Qv!-jd_nr||Vw1pQQ5dxbg_svPehP2WcrYB#P)+y)C{ak68MqF9+
zBvHU~TdOVeog6fcX)3xR3QA%V<q45bnYLAJsG9{3C=MtJs<?(1n_j#gWrU=Ky@Fn8
zdc5XTLe>S2kl36T%6HUbei0RkkcJ!fu5-DfR{@;Tow3r(RHURTTjr`%lGA80b-?yL
zOsM{{;o)9at9zBrbmP>fiZ`)%G~DIeTZED=6D@iw?StIZH3O*)1LKZSj~@|ucDc6V
zeKuRo+4&4)AR`9>Wx0F%V>#N<mdVn#^LDE<h5fynV;j7<=j6I<QFxU`s(De5_+Ql%
zapTOsIeF7h)j4&JzKo-s&_`i8XE>v>wa*sK(bn%ZJ@lS=boF(SpvvFLMiKQLl3h~$
z;H7qv#9#+7Eb1B4l2r&`uES+XctF}Jl_*rHW`bFo7t)4Z{!W-YvbbSO=$NR{R6@uE
ziY(Kgqw+#~GmlA?Fyi3uybKLUme^Z)$?hrc;cD9*=Ff(6$z(d4DXj8t{7S4vkor_M
zVwKC6$*eB8R5Y6;H<=Wc8<<E9!aKixCCB=4Q5dI^6Q)+22F!L!!hHX6t+{&fLs$NK
zsmh*jlm=L4Hfl<jYQB}42`@d9Bxz1+yPA^+9a>IT`V`eDJCaqrR*4tTVHNqPRMvEk
z4#ZUNVVVca3r^3M_$XzV+2e-iZ3{%2r-YtGFjB@<=veHc(!(~&n97<vRc1WhAo@Po
z3^rp-jVV|j+P=YfZc1Wlyl4+;gGJZSzJ+queK;dYrZfOu@{;k+$Vntc&|p%1D>3Dy
zOO2Q}gbyFOl7=a6GhfSchP3%Yb6s+`ATvnWzTks~pts+`ik3dATc<>tRpK)$72G{@
z1&2UWC2bef>q>3;H(`R_3DaI8nQ?t`--1teB3o&_-V=4|JW*7eDO4O$f;|UvX~xrZ
zm$qsPFLNG_=y4GwQ<yoow#UaArwu4DF&>YvcLN+M;?#F8GHq|S>yx(`A!A937xiJ!
zOpd2&gV}A0OeH#P-6B=lz2`57gU$s!Tg<jCWk}b6>ja6&&bgMA3rbpR)xjcBN{_;i
zKCzvF<l^1|Rl<N2z$Rr|T6;vkB#ryD!PwIL#^IrQ99t^)v25~WjB6=pxs`+#wA`Xr
z>v9<Io{;4#EsVa%z8BJ_{2P(2it;u%_}MR}Eq14{95tmdqA^F;p(`5A<`J_Vj@Kt=
zG^JIrmW=@xQhstgwROh5=wUtvvW}-9q%O%0iOSU+SO>PKA;%ro)r_zP6^ypV>%{h4
zNy-ruL_Q}k2en1_K1fomE1;ZzZihSbrQE!VR22+{(dlh*x9e2|_Dt-&U_}c?nIQF@
z09BQT&x<7-6S?N!J|)Hac18|s%YHHxKs?+cpG)OB8gj|et*qQ5wW;v_0sE)UO)bu5
z^|&7)3$9H`g{N%A`$uf0gO(anUHvUvwsrzEoYrlmsSISPGo#mtT=3gAP~G4YjM6Nk
z4uDAsiIgUcMxwXUgcx<Lv77V3xc;Lsn6IVH5<!Pi#lm3x-EX0^%}ps#pk$;q17uS-
zEQxeoXHb5Q0}EAMe+%P<WVww*CW`RO9#Y8!M~fFq>SzwQ1whztEr33g)h*@WZ5q*?
z>%k^HRIShqD(8SH*^6ZZ%JaaH5SOG<gv0Ie)JqePWHIS@Ygmk`D@RS1F*c*sJ5^j+
zKLUKrwa*!i$r#5Csq+n2k*|1~qTO7)vf@%QaJM^{XHw<76dx6`{z(aijOt!HXsecP
z*#gJy+x5g5R`yobEx~y}&LuH40=+wnC&YSb!n|Fy-`J)L)wX@A#1%l<9}jw|?P1$h
zNc6>4{eXt3R||H@i?&JsO2QJ85*np?g_|`X*x0Zm3S2{?N*L%weex;M3?(VuOKdQs
za5j&Z0OF%NF)Wi`<Ix#wwB_F3ZkLsqjH-`>3=#$NIc#QMWLYeZ4L=-?`fHn9ol0L8
z7dMcL6G^(|;xHx7g_PV>b3i2x#U`m~ZL)oI)dJ3oaS?`g1KzaE_sB=~fx;oNUlk)|
zDfB#;YurSV&r9*u)`{vZnBr*cFz!9%)~QfuOc0_|N1M_r<?65OTk^@9O=^+`OQytY
zmihtKVys`9a7i}phZ|86b~q>FNC|Xb^tSm)g5J)BLq#F<(y-3$!G|^iC7D`J7vX{g
zdF0$h4{)3`MTyYc7%j=gHzi>vdqiytvI5yl=Gnmtj|_#PgX43~{kV-_h1M`ks&|w7
zWw0xnuP9ZrlGZ?2pN8x3+x$Gzx|zUwE#$>@>Sw2T?x8{0G%^H70*QMw%_v0HTqRS?
zJ})?WcWFG>+(*h&b4otWS{Hy2v!|>RILcNF1$*%ywM_7~Dx0@Jsk2qc()BKuCt%{`
z(-1OM${naLza&+b+$5%ozfJ5l@8?`9pJjxljAEneOLV^J8&1jrS#{d13zYMjIYq{y
zo0t-G3tMTBBcm}SpNCdU%_P~-Ti^7C5Tg(;tNjzJH-Y_DNTvQk9u!#j^7$X-0Qen3
zV13-Mz=SHNSRRLR-o4j$Fhqj`?}P~fuqJGBMY!y`8#xdEOn#3!@O&clJ3^mf)t|9>
z{Y>;QlrDCy>Qo8(hVPW?gW<sf$YpNJ8H)Kxrm{+@D_^L}Z?lHU5vzA-{I-OHI&nJ-
znd?Ge0+G3tSGncm)D?`k)pA^V6pbw2LjzP5mPk-_*Vek^Er8uFrkjx@_#sb|s`}bR
zjXO2sR<z5s&dgJGKC;4d;4I5*J+#b%-TAKi^HSIOlC1EJCak^5dXK~CQrAjoR$Uxn
zjBzTmZOWUBn;z5}el|3o+`x>3x?QU6rjlb_vRetH*_EmaPzF=|75m^xa@U+r*)k>)
z_po6Nk251xKV(lx);3suW<{dnu2O!EXesF)B+k@^H38c%g}pm>je$Q=e!C-+*Z533
zyOD#+8nO#Qdb?e*tKFsS46}=zTxqOqNm2EL*G6$s<q48uS)Ab_lNm^xa%Jp&#7pP_
z$S3v|wzQYSR;5J|x}jL+ZhGw**+5I%YnI%ox`bFvy15k$b^M#TI<)ouk%m|+958E+
zo!xDx7n>0N<Vw^QhY?J2pCy8?+)bg>&&oA_r(!Xi?c&`MEth(#Ej)GGF*PL0jp56l
zT&o0_A{~j1L;xD^TEhcVF)}3)1+xH-CPI2d3B<0^v}x`B70ue4NYSJ{3oQo;vN$2i
zOxpB3ryTdK4bDYh+kP@=eJ%10*W^aia}4V-?Yyi})%86Op4s<YmHM8SI%91(7=eOp
zWd*z6*chF?Bc`0s*JLd5itX}M1&H<S)vyJ7xubfi&g0i^`H{Zs!6SXw)kfd-x*8AO
zRSwFf9*lRV)Ph}a$@pqL)!JIswYGK@8NREwxehArrSB+#qNTR9eGk5_?R%wh?f;M_
z&e+Ouqi2#dRL5>pe_qA6rb{U`pti<LX)Rgmo@Gj~Se_^ez&p5h%JE==V7jX2`3k$3
zssCeoV3h)$|F<{3QU*D<T*a+ju)=ENP_9(!RonEt7yEa8Gkp??A;$sX{XiyWQVS$v
z#5xnGji&v}bSI)_d$>jQoCaCeR_*gW*B49G_r-c-`50DK<3snt=it{nP2ES?;(?_X
zQt98zF{s=uE?q0X!k3qjE>+UQnIq}iAKtG3M-g7GXRmq6g^AX_Kv5|iR8&cXAD1Eo
zO+0T+h#(xzN*K1+G*7lA*2tx&%W2GSWb3>12{+`JLJQeEB&{5A5E3=`$yepcO@42V
zyYp6Pk{_h8EN=RCDI?kpb64uXKV#94O*E*&TG>QNq+AULJ-HeK+%Xzy{(C|N&b%hU
z1r0&!7Ne!$TDuO}CaKI+2MD--u%9^TsRGJiJCQC~UBaejMvBz$PVR>&OX#oUYrhhq
zezmZPE|hcnBF<8VzZA!VTTUTgL4HRlgTf)P`%=9&yH?vP%n~D6B<CI5j8Y+k!VQ@a
zle3IMQp>l-$5IM?!-e83GgCC_LBay04KEd5nCMMzB@Nn&+F%dt#pwE3lrr32P??m?
z?5^C)Jj3`qSDG5z1tZZCJI-Vsc*s9lJJ%HV#$=-_(`YB1Y$OOHRV0f+Ink>|h9@s+
z94qc_4q%7lzSS9ZStoyfX^UQ3rE8LDI*7_vn#wpqO0Slxh;l|9CvT9;%mIQa$0}Dm
zfsVN#1m9*U2;r^YRcW@_>2`@&nP94n71=bH<)R8T$J%TClIC(79kw3Ns^07`r|R3w
z1z*!%PCe3IE_g<JxnI^?&dIyBH(7n#G@q9e#VoaoBTb{R*-)<BLN0i+g`C>nLhfri
z$Eim<vsL$A(>YF6+Ls9^zM?<eldE9yyWcXxqX~5NZbwimNnZ+bb%lIfwGz%YDBa+y
zP@UGeez5u{UN~Lm7N&3{3iD3S7D2Jnp@q6zJEHeQYrJMqnhqaRHIKcblBU^Q<5kO<
zJ6HpKZsVf3Ua3NsmfP}k*lTD-rXN`g8V7GPa%fEX3NG(URcJ8qZN<fp;C*KzVC(d%
zS~N-Qm}rH|tVWNnPHsDl@f*Ta>?^lSRc7nf<8_F=z6+-D-0RWt&@{L@9lyTBZe!ys
zQzPRfcyl?TUQ@^KhL>+gO}`tR+UQ0n13cD^uF_|$rM{^dT~KaDr?xkvGke<Bj4t42
zbm|v3qoej=|0SByDeoNXo6(sE9%)9Wyk>O4w>6{tMGfbaIrnd9I5+&nRa!nNX*mZo
znldhBtawh}Ikmm-T<}ESIrZGWbHO8h=j?GltM6Q}*>_HTW8b;J?>na|edo-gw)dU8
zI)8Hw_h7W!45aUz`o(?csB@k2`_6r3H?vZupl5Y6J2lS2t?kT8Rb&4@_TGFwjV#$0
zy?^t4^$uC?K5lR!G+M5fWbKUs+w4Yz0omn!>ozxml#~{bbY@~nUEBA4&U2g>yD_XW
z2SBpqc2%RV3Pa|ahgcD@B7X68;moc*wv`#9*PZ|p@#Dk+LGsu#k;{Y*k%6L7VTAGo
zbKQ8BB=jmNv>DW;mf6*6;xbZcIqy<w0q|Z);@c_@GFi!T;nR#rCUgPL;nSRl2%lyI
z;l6kLG#Y|Ojh{A>s2YzOKTQJx+iPxrLHsnf?2w?_^Wvvv%0a?5D>IbZTz+p~u<}&E
zXB{>pBbeG91F0D!@p}xUMkswWs`KT}ua2j-#C#^gs)>=^r|3@@RV~G`>!93NPBM~7
z`RgK?JYamb2Z*6X_p~CAmUHJoTBa4B4x|MKo509)@m_p7g6+!=v*aubo1e`XJ_fhJ
z@D|Pdcvz7-^>0;0jAO4GKxE7#vngRmAos}s`#KEze}N6AO)jv3*8<}AY7+FLlCs}}
zD*g-G;Iv^tgR%409b9=Kt-J%aWJB)|SYq8<AYj4du<UHY86El~jB~Sr=Lo+jV_cQU
zm$(n_VZEUi(2f71AozJ0NRRMi;a?p8>8N>pTCM%<@!~&i>}+rBCgVSCZ*Q&QKYfe8
zBY%R?W{flUn9uhjMrSHHXA_@}8NNWMp*r;<AcOvqyqaa*fj9i2@I#^PrSQ^3EEnm*
z2~P~`zNjp(oPQ|DBgXXLD3g{(>s(W2S3eXq_m$A?jJ<dd+q7A6>}Ex*g)pG&W)z-@
zwAsqa^oNwEL|L8$tW!6u6BkdaEk6`A;j3UHw$hD2_+LE|vJujH{^8ppxd0i($~6`K
ztUgoroj(-RNGmfc%OV*2By`Qd^skY{t6i`KXUJ1!v+nxjNcyOl?T5uV39d&T-?-C+
z!s+@%hU<i}Qdy%kgtuhHC{yNIrknl~vX*2;YBjvTaM#YU%Mdt}wFOf_DQ>CfRUuuF
zY(bM@H<NE92m(Qlx@_7H1-2y>y&p1rjha%r9UvQpHO4`;%Y->m2DDr>HD#xwl|uDq
zr`D=Afd#9O6e?v|RH<~%PFcw#R7yV-a&$@#>Xa&(iYw9<{ZKeNCh47&U7a#30?bEF
zy@5>L5o%DNOjI_!`B+Ao_6O9@G}}(&=(Kh6vC-&oL=)L=ljm$=(L$pIQuhyqv_dOO
zpMWK#)mc9j()z6OLcSEb*6g;R!|?w2<an)l{JIU*Hn3x53bax`q54_(M0K|sZ>lZQ
z#84%a7rJl?QoPg$NVOv?HEPh1+6lC{(QROB%GBAjPUH1ii&`f%3UV%?;%!xNyMg0_
z<J^5)?Xo)NH4U<!`wyf)!_>0<&S|^(zDgl&Pd=aqh2|J)F7A|mD2SrP;BmorfvDPg
zeRfQ0Gg=MD-A228cG|_+q4dozE;)F3(q3a#Y@1);5Ygif74t*kRYU6F8g*LF&nrBg
zgj9^3S7T|b=BO3cBK@IYtWxK0i}Z(rT%|lY_8Qf?wbw#<2V10xjV{BwRH`$aNJDX^
z)N5GgDOTP@wWZLeVV!cfVS?jo_Y7xu8Ex4Qh12#4tlo~)he-`uSv9cTsCHodKER-N
zPGH1fR6e|Ipyuia)@P|<#hI!i;560&S#YFYRXa^86gt#}HQGhpcpFC8Q05&s-n5!;
z8l=E&JE*6ub#<wwvQm|yW*o*zXwz8L_S)%Gn_5+;)M&84j+GV-x|vp^iT*0xCM}x(
zYxVbN{Qqfd>rvys?rc2YO7j0ZFLqb)U%w6iAN<oFjohS$MaOd8D7;+wS3n-Su6-X1
zYt|pWh6GP)(hMjFpRnqUalcVXi8gM)Z_eBiV<KFj^U)+mUXAT#SPxy+wnU_6RbGJf
zo^_z4HVdwy<eKpEkcycKwq(n0iR`M`qMTqrHOq`i`shTYE&)E4952~X6_Gj}vl8Vh
zZWy~6@Bb26g;P7X&2}alnCzGc4Ut(k6}Cf#*_q4&1$=l7rJ>g-jga0~I!V#I!i1PT
zBF{O}VtYe13sN^caT+&dEbdg<j87v?aPbm1#i&<k&B8I?NF3Q<xassFtuN7&UE!&x
z{kj#b0jX4ia~cWYFP=2bTmKIzo^(8vqEHr-aXpRE5$K?9Njv*28WD3eKrk6V8Zk@)
z6jhGTE2%+D&dxrX?{dX`qvF)eD~D9~v+Iux(@G14QPwP#&6}uf4q@cNk?(~my)){8
zIi7J8=;Bl<N<HqT2aX1L7X~>o<>qQ5d+;O(y@6AkoMkHW1(exBKi1Is0O_sr47dj2
zXOpo<46iUL=hE^KsIAI>i|$K;Py)`Im}!{=<!5Y|2qs|>5mdqpk(d(xRVBnxG1FeU
z%JxDbL6XILS>d0$H~x4wW@t<uQ>iSSk2sQsfZOoL>i$kHR4R=u8x=U+i0=LvikgN%
z0YyvA4qq~5;UghM-+=B|yEHk)(&AJK^Lu(D(F!0in@I-kVL-%aHj0H$ud6XEVL-}i
zAr8Hf$H4(YjGiMM7ZT_qCNO}QK~+NbcEjdTc-y&6)FDqMx48xLSmE(^%J%&=(o#R+
z?PJi7#NY{U0MgwQqmmg~FdZMFECPh`$V|fkmFi`QIgiU4t#GO0vJR|F73c*?u`_|l
ziaR(>*aYm0uoss`&O;^>QmHQ%*tr2(Ac@JXSix81R6}tM%COzYw6?5$#R(bDMPqy3
z4#Mvm5h>7eHjHV4B{n?Wl==ecC`z8q^f1+t_v*q7rUdzb6!o(B5*?3Z9>%7@DwUVN
z!!q-2>bQ<o9E?E4ucIjJIr+A+KH*nl-3LZ0CJX}%hy#s#ja)@?H78l06lu0Xhd5c0
zLb!0`GCl^gC*;nA-@+#)Ygs4QJlAIQgzFC$p7H@SmsEu;?u@Jy%93kVsz4kTgMkzX
z7zC~>qjA!Y5s^0rB9j%ODFfRI5*9BV93L?P@@uh8g}uG@e9^!kWxV~Od@`;LGY%fm
z>}xcbWM2*{#_;VZGfuul6cGSBY~49_C$97*l6Z5}Gn@L8p*6bq3}WSUz>r&bjSN%`
zcw$%~m;Mxsq5D%jZP_)~Kaa`EVt}<2pKtsx?LR+@DF7e&k|T5pTXn+<h1vJTBDIP(
zS_ta`_L-C_-ms%0lBJqbS|@~s^dA-h^pi6S^InVZ;tN3G1xb!!xNJwmHFPm3TV6S3
ziXkT~CUgi|IGJ#kc=qwekf4=umD)}lXk4P%6hT3mC=I=_8(x)+b!yU7o1PMKOj#@Y
z%L3$9a&TqCFrh3Fp2`xU!*<&vyG!M)i{|fvAE&7wJ4HzgVshS}O?1>`H}d6GjCG`!
zB@DlX8g8&*WM=}S*O#MbopNe5B)X5?E%PAYIUo)JUWw)@;3eq4-C1=M=yM?5Y3bvd
zeGY7)IM|SOHBKEGPgwMdYAVM=<N_p~EilhCR3Tv?#xi1-S#N>kSZbRc_Dr<auDHQh
zp)D*KlNiu2GM(q5#8}EMY~v1-tN=Mx(q3^o2=$rY9U@$+PmT;vN^T7auS<!$ZTk2f
z2Z|?iM+Qo|s!AG|NJ|mrkZL=Y$sFWkd!A<7GyK2{$3ct}py-k@GsM0z_{}6pMv3-y
zmrz767^5vnfb`U)dqEKmG%4hT-Z)Sp1X6OFpzORS&WXfpXV8dgJXXKw6#Wt#?AZ>D
z+D#S15h+Sw%EHM?nZ)~kDNA*cy)6uBNZpzhHD-WDLx@sLs@Ogy=9!Cw?5(f(L>%hW
ztg4$<8E{C2>9Cq99i-$5q8t;L-NZo?D^Es9lax3@(h`0~0i1s#AiyZN2qNh%VNR>j
zbAg?z3{zyl4}nfqFeG~`T!ld(wkta&+L^!*bu<{bR4o)>{aUpnIgHHRB+fE{x`%=g
zHu|GRS<C=h&e=GwA@SL8T82k$5xvw@RH2Q^R*4-pQ}%Ef0zaz1G$_eD2+r-wHV?~l
zybKp^KMbgrKGj2+ATpI7`n~XeBfQvxy|ih45iydkWkNX!augp2w2h<LIdU*KF1&^c
zQtCEkV6wtV$`^A4<1JKvymtK%Vc!y~S7;GT7eA8W%ys-xx(6}M6;6$enx~Aaf&nyU
z!u;ns)<s7teH(hNkgH^zPK~G)QTtw0Ozx|oKvlL;5ZSJ5;m0<gwXl!{h^6i8ojy<s
z_W1}EZQ~>h$9w_%Qpn`)wvoH5C!CFijDq3`+P;la-pE-LB}+Rfg=X}owRMWLtjN-s
zbj!jMm$jZBIewesvLsNA4nUC|3`&D~vYDO%ZJ?JiH?yF%2}-}5>=FzwKARYBJ7%>7
z3$}<X0nurHKnTZV&60;HYJfB-$QO!U5Hr55^z=-@GuYJDRNo3k-8OPm$=OWk0?B*)
zqaE2MYd(`&V{~{I=d|F25_v?FjfgDR6IxMweVE$jNULaVD!$1BJ7-P<jYF7h?ELA-
z)qdkdtMEf2s-}pn=#SAiAEdzv;XK4kBKui8N2n837YzPgu8Ac4RkC9_0nu!w4{I6~
z-zAN!Wd!F~n$f2Wx=fvgQZ6N%px6Lo?J^}LM2=X<5=$>$yJHqT-$wR2xt=W%h%IW#
z2a*X%Dp;bC*hwHK8)tgCU=PMe8OG1xK9T_!QWQu;F-h+y<?d}Z#*Y-ZnVTT21zT?}
zRO}`>3J8vx6$$D46B(%q>0Fwu;oL2A3sbiF6lOE{kc>_FW0YRD$!fDAjPNc@)=KJD
z0|eJV-I3u{j4>Qju;ExhN3oTDS;E~o*-Tm?CEXZ}QhG;YR)a9n)`D&;Ez+HeFx-sv
za%60rF@+=v4O?nt2o+EgsSiahGA?6LADbDd(n6+EzqN&7WO|@5!)?l?=kU`dUf{MQ
zmFL1fFmFqW@lMM-f?rn~g@MvzFU;x?a?9)~LoGK^xNsW{Z_(KMdP0D3sPuu|Fc{22
zci@xSbQ>Ags*NsD)yz6MUl~Mz9ih=BnIT_AeAQ&+)3P2ZLI6hvM*&03jVENG80yio
z0CD2L+IiMtlv-aE4H>4$d%yshh%GZ=`h$J%dzUU+C1PuF;gAXgA7zM32Ae~QE97AN
zQ81cOTw6w|P;m)q-KM+hsIHdt`bxNmbY772IMYK0_!XzJQ2i0)k$_{tGS6>CL^w;E
z!jgSK*95D4777%|l$I(Xdo4d#ypOSU)OO8lDO^;wbP73Jhx(VdbtLlbK&YJxpJt&S
z2|Q0&t>{2MQH?=ClTMHNj7F$~Ufs;>w!Mp)h?Bi`_dnv{L=>&Dat_2n7R1>+wT#=q
zOa?ZkPUFz_p?N2nI!1=nqe8?zDkCZvDWoM37O}Cii;M>)m+c-Y<&#o<CQHBI0+#*k
zfPE@R9x?&op^dVIN(qBTkW#S1h^(A%szY%01$6OHuzgUYyofX{s@;T86eR|#Fi(R9
z92L_~3DV7-vLOD%z!t1=Eb~rDHk#wqN_m>%ub^j^xG{1I<_W4pszq0*B4mG43A!#I
z-#Lr|EN39SU4g=wUa9zcldaIRG?p1ij~Z*z0?;e?ERdWFn30Uds~`gnnksU!uD2l3
z=3*m;%i?AUJZx>eQ?SSok$8ePC$wZ(w!|eyg6&a&#pG%+iJ|#)hijH0F>yo`@^gP;
z!)``=3b4|tEEhE25)^{TMSzTexFYZ~6#6)5bS~j>3$&MMAMt*8+hT+8(auf@IMdK)
z@q2f}8DcaG_Z-i1sSGKQF1?SADg^dnplmuva0&+Ugp<9v+o<w=h}F}qjuxnYz84ui
zbe8O4S;tR;VIj>v+i56$kMdR&%EjP3S+;gY6l;g0y(OEek4JF_q1L4y=52GB3p8+4
z|1{S9iW#8ld4-{(J`&<4T9d>`-(q=4yBBB!5t|pTcqHT9cg#`5vOp0zwrWuWFbzF;
z#4Z*ZUs2$XA{OEd{rJ|T4Fh?Lhd_hHlM>@FDztGo>%OOu1F9z*W*T1PZ(%rcKC+Xg
zFrFbwY*ZI1u{B2}D-;X`<?(A!gXq#VsT7U>HlFB47;Vg>Nuph7<UD87E|@lX4}vi%
zJ@&*>*E1R>GO}ht9w^uiMs2|hIvBr_2#rW<weP{$5LVsj*1S=neG&3Tw2Dg8y@Fbc
zW?#7b(JcH-F68<R48*es`YSwh9^{V-+9l<H^ss`N`5<|?I~J77Xx+7-2kTmv+&%|D
zT2Rp^2v`<9F$*$om(gs=@EE5pbme1M93k}ztZK9_68DD0$Z<i;kp$#AEG5)(ra6?T
zLt)F~=2_4+LJr{2F|d^)ij|Cj6e3{5xgU#Y8fHO;$*sa*42f<Fq5}U(FFA^04XpZ=
zKtPpkqJsVba;Gmie{s)m;$CJoSevDoE$Rr*?n%a_8_u35I*9vASb_<@n?*!b5W&+X
z1iuo}bqwWf>KEBqPTrRBr5jC*C@A!%uFw29g=}I$h4Bi8*4|Mo1Cb2jB$<h5lA<77
z7Zb8()DaOAH_`%PDzo>HR20Mr68=B4*RfD77;g+;&PUm64@KC%PZIgUG*7_UX(>_m
zfk@=qzlubj(HfZqHh`?)#l<VOQ-cjdbW0j*!Dfn$f((t@#Jd=wJI=r>m6btPIbqSo
z=Oixk6wx;LK-PGmwg|B_O+%8@=5V7#LHb5la1nivbPz11k*p+4C8PnHHf6I45}{;>
zWE1KEerLo+aV?EIdgzYb3s2q6qp}RT4FjbMT0rt;2^2*ONfS^zAlg94;TIJw3%Uq`
zAqs<K)@es^Fr7lyvW|C!5)HvQ;NAR4MzvDrmWqxD=1+hKBLGr3uzEvHAw4z$r%0iK
z{d<iuqUqGE6<;)YDl!Ob8-#vLlp*R6gj5tXvNoEbGolKJqBE3HV7%lgiY_C+>*<e3
zyXj9p&B9xu2_rLIC36T96=TJk$C@MnXF!<0+BYURFU7cXua%}jm^A5TS71qwy{x@v
zs<|Q+bO24ulPE}GVi*zy>2;X942Ak1qui=uakA}#j9R3+U5&1JE`KX}^kB!x&q%SQ
zfLBOpT&kpBSK?pjjX2Iw9Hc>cfoAjxyf{{9xg3IWCSW{WYg!GcDP&S6Lzi6XePL#g
z%LSwcn7*kyDsV6%D5H-S<x&MAmXm58FMLG5G;Xe71x5-Y+H8QPEGpKeGys8hat<pv
zUJK6G#0q9DOho2r1Sy*|s^iM1A&YIU+AxN8womw&rYOsc(iC?=_W5hYIRYjGL@`Fn
zFQnD6mZgbhJ!1rn)WHD+V6H;y<me8CG2W=MO>J;aazDw$va>09RpPvp60cxEybD#K
z&a38vB7%%1gtDcBELe6|@^--}wc^kMgc-2)yKJ{1<ddkNxPBP0i<x_&;x`BbY1I~D
zHNzR6b3&0p%P(Zjh^Cu;5e<~FmMDuo5=sw}biUiDNCiZcaI`AS)vrUw%?g3}$?S!K
zbk{Txoqa2g7@TGv9Oy%(;dKq6O&d30;oD>(Nq0%wra7Z11vRf+lXP4e2_<bIKBpLS
z3z1pD(K0M^VLmJ~4TG}L#iH#2$uL@&qj^bTyNTKg_PmJH+7w7yl8uZgftmM`g&ecF
z$!(SyUJ5Wy27{B6NWI^#2%Bt3r#mhqY_Q3JGT|iKNXup@8Qn}lJ;p&n8Y~DvNNsdM
zJ2z6dt+#0d2a0c8$PUzL(uZ>q_=eE?%+<x(CA^(3un;N;u9A%<-e1;|l=hmebQa)}
zM*L?A_9aY4$>c-@htzE=@$l}L<{fBDoayTY+2+_Q6trol(3I+1U}I%2UvwdD3Xx}b
zlZ=sS25>1PT~KY;)8u5yn;hxIieT!}kJnjCH(%ys%Y(w5ZVv@uVX91whG0b+vNx>l
zuU;@Ljy_k=1&fa?3-9mfv_)RBcFdQdR;L#nx?d|R*im7vg`^SY=Q56`d+Y0CuDKF~
z7wb^uI#jk^F>XOA4vf49C5}4sLn$wTSdnm$w(cgldVNJ{+!iNujRIkWP~_Nzcug%}
zuM5&$5ihPs-e=t8ERuofSrW{ZYgn@=pT3cQ<q4@E2(Ai>0=W7bKp9yKho+@68%o65
z)Pex1q|JzO3o=&HD_BX(d0^jTUEN`oE!PG#4=gzB-=*nvHfn-egM~j3{ztn}tsgZW
z5&vUjb7ylqiT}5?vAM$k_!fT+oO#yl`(uuPj)*Vv^aMT<p;=8Z7-`QCakTnq54MLO
zk=NJPl`Jy3IK}JgyUJxe9u*2C{&UE7VIpQ>qLC2=pM<YEb)>VcH$dM<%3ctt+(f|V
zvH81f`!i80Qo#z_l+$sE*{M`tx%|?zt;*&a$|q%22|Wqd$l11n_Rcc8FJASs53j6@
z(i1sWPU5OEy!Jz*j*4R91Uu${ph9<Pp1!o;`O)4;w<7M{pR~e|pa6Nc3>!x^RpLy#
zWpgnZxZ<;6!e2!4kHc9iJAYzI$LHddJQPtL!vo=V+z&=L+YN+$>R|=FljGLMf)yx@
zPE~@dmcf~$vJsaIj}IC(=q`cILIrf#*M9lsb*uWOgB7gRlT{EEC&__*WL;6Z2;&om
ze3jUjK_$q=jR*HnX1r`YkZ62ltej!x-xEYCpZ<5fC<aOP+`p&{P!4}Q`oFRLJVF0=
zw|93|^#5D@{pnB4A(<fYmH`?G{y|`HlE~tjTeni6LV!#eQA2A-uw+`mZAX<@+po<j
z4GWR9AQAM3g#gkLI72tO><1(pLk6nXz~5)P9&Oz_L-JO~%R*^{SHl8yDOd=GUO;fa
zUboS~*d4_ZKV_)9Mv-<xsStTFe1#;v)Aq@KHEP}7arLO-JRu6bUD<wOCOJkx!q&%L
zr(11zd)+1`+}PNo|D3aK4H6rv>+O@%-ch6b_N4AS!IRP|i{5scT_^&P0D_7?34OP6
z9;Q>ZPHK#s_+-6Shd!=@C^h!u%Jg#jA7XG{X0}(=(W}*~owrW6eby+gVD<j(=|51%
zV9fL%mfhpfe_;1F6ZHSZ#`cQ-f0I9h2Gq_v-IJr<QT3PR(b-Y&`0VIaquo1s-9u1L
zpo<;n$rkj?q8qt+1jwtS@~pPn?`!|lF|lqR<B!DuZSC%+_W#C<)&Bn$e?mAaw!I7Q
z<}CI{QK^8uR?mwD?$qm{x^!~UbE95}DS$U3Eo1Rb>*N&+r#;2W^;p1u@(j6~ocxVW
zt$Nz1_i9JgzX7vx)^0bByS>iIS-aNg)thbS$)A7Ad-ny)JqIb)9b6XAoj?DUS^pQr
z;PKCk2%UWH996ruw+$d&zy69V_z6YUO1y%C7PRWi`u|<|e=0xSJ9iWg1QHg4_7|(C
zrx@)kv-!O^=#gPM$hv?2?VNh!O@Ob4p@)B$>3%`vFTQTJ8Xd@=lhml{3#6(O`^)^}
zpMMkFZ?Aa_JZ!7Q{a|eb!j+#KBXD*dVfG=bQ77YKpR_+Z(CK@Z=}^;qG{^`xeOm2y
z8*SigpH(hbhsigj{}?<yy1IQ#{qI(q|9k#o=lP2Me~Z8MbwmF5HSP~hf0=@qUtGr5
z9$n|ue)`ElfBO-;fN-2-D1l7w{0X{!x}ysZo>$;;L6&{bJ3h>xfTh0c9Um5<y=Lxz
z$@eG!d@}UTh32#N_5_I2-#+$^PU>f^M(=GwYEARr7nBn6Ni9I+W#a*m$#s0VK7-SD
zz3vlkTqmAQ{ox0f40Ix(d4<CIvuDn)P|rme%%+aqn7yN5ID@H^zflJRzYAy(X8g|+
z!S9rj48;Kc?1sKO(Lt3}K|^#Nng$W0uF{pDH#&ERpD;3@Q@kYmP{>+BG~E#I{wks-
z3c76OW=I~wGH?f|PNplj&%T=opU5DlbfbXgzRnRAeDlhUyuH2AC@z*VUkuGR9cW{f
z+xXu1t`qsvZ&hYW^6P-1IG$;(@qtYU4>=v2#4_o6YBELA^;WBkGne<aE?=MPrFGV6
z^jcL6xYbDH8~f2<edJ=WCU0J;t*$J9QW5KrzuelMH_2IVoYjFHl((wapTQiC_h4PH
zf-k<EdM^jlsRO^G>z7+j9Kna{@8KY=j@sYr`43d1!#?kibN+8Xf4-BH|2Chm;y<j;
z|L^$x2O@|OwNBdId0aqNDuWhxV6-Fe-W&m1%!U(SDV{jL!AYc#tO4)Z%H?6<i(nie
z6Y&jVC-;~Yi1;+KxC*`9P2NI3O>h|5oCQHWL+j36?Gzx2QPDQpywX^6nCQTDE<^A9
z<rBkTAe#BH7tvT%fU9_NXgoMrcMlyr6x|Dt`HyC7FgUohfXOjf44iYsc|rd#zC9j{
z&{<h<9c%p4=ras)Fmy+7u<j9<qI}@M6U{AIMOLzks!NW@kAn#obs>PkTg7@Qw<rNY
zAh8M@!=2v=(cgdUAzvoJqInR>3Se|g-Wpb6%wVDEnCb=cb>wM@jw_{#n%39ChdDwM
zY%=O%6Y>cm%g4r~6JLio$$<c`wnu#K@T!_&Fc&1KixkJw-cuN%i1IZi7@5=yS8nB<
z=?>8x4(-6n=u6xWVr%;|e-y9z6Dr(l5M3JDEy!Lvl_eQSy@}FnB(L2GgO`ZAOLLgn
zzC`S7=hAZ#CPQ0nYPMR!>?#YwsT*HL%Jw&uK__KaG~&^tF-UAu#^FGOyeVQ<P8br2
z_q}I56+dK5V%1Wh%H-1|%t3|~wY-3YlYxKVjX1RDC-NSXF}ul^6WaUQpA5z5@lBLm
zc+liaZ#1P0()A*CqboHXnXI{<gv3wgdLM>S&LsT5NB$26;6YfUIq|<XUu>oHKO4KN
z_`iPy{!fP$F4juU*2c!q&KuVoId5j(Wa9PR+0Z$-fCrTe_256HdBb650Wa3JHa0h%
zSKb71?*Gdj1bye=6aG`_)3eIve_l|eb09c}e`q%Jq@BTa@<W;x2ALtR8dkF75xGbH
zuF5M64fOcPK=)NYCL<f*uh2#EK1!-}=Peu=u$GSB*J|svvwC&C^Rd>#rb{F0Vy}nK
zYqegl<owV77>_Y!uikjytTo8&4-x<whcfHz?HPv>ScXvUu@pX|A>WV00h{A<d;(>?
zI(vgJtT8fQ2>l4<)mbT`>2bBi>E1MtDeY{sC`EQ@jbCbwQ@jG{Bula1sNB$FG*6X&
z#|RofVh~$~oX39az}0Yu6M6)Bz0fl2^@biIpZ9vYDSFW99?Bk6SpJ_qW^BC7kxh<m
z{p7gYdrucV&Hrw20&9%35~%*A67!T}Oj&5>We~-^p4|y*@CQU?hdu%)eA!cjQ@NZ@
zu>-I(XQAQ@;X`WFU~YPyZXGX^-a_6%7{~(A{W}sNJ{S&>T~7%$KJo=Bujb^^6(7>P
zjYO!O++G>8D^usERZ2VT@}=UOaAVTEi1yI}py|e~R(7Vd5qfLW=om{!MO}c>E=EBg
zSblV@0_h3CvykJ&@kW+P_=lkZwX6dOhNxqJew1awlu}_)th~N}BTVn_UbS6&i(CBZ
zQ{xS`mAAD$0QNK$aUe<@D$3d0!(aP3BR3}_v{2`jdeC#Hs9IAyJv}@A&^)du=#bu)
zC2IbU`8`vwk{defEoyghN2R-O5CWsAd4xhO$ccA-Xf7A>Akv;g@?005KZsN?3waB$
zN?tS%PO|tfWXO&ljvQ=7n4}NU`&k?%eUFT<u_7PSbFtUBOsH@Q=m7k}gdB_`j}DW_
zDI&i>z}Z8P9|tn(JgmD(5K8}J#HRX-F!6>t;w2`%ZoJAx#iT3MR`bnq34QdTZAcO#
zVf;PBW3-Rn#j0JCTRs746-y!~r1ZS=?TAK59%$mUW0`8|t_`MZgAuvSPC@*ZqTu<+
z6_p(MlQ$on-O3F?0eRORhL<4Wf&7P{Q!avvQrfkX?Go03P8Q<qCtkb~&u-4$u|K*c
z9-#lv^$3KjUVnB`8C>}P;SXOvf4(b#8xUww>_Eu_9G&)#U|W2^HOSB29G}&C)o!oV
zsCK%=o$@ZN!D;9MujIU`)nsQpr}$*;d=jjgJ$+J2UnCr`Lg405EIb6a6R&8^anP#B
zdDW_RIH2d>S_~FI6dVT_Gm?BcQP2(N0!CRt5}}&{i@NN*kw0U9YxN>SjKd{*ZY)Ly
z@u4{oes<z#Vo8v8VNZHERDa>6)_e64BZdo4Yd$YP!1V@KWn6(Vh3`OMLh8|gy!V}c
zAmF;qrCrP{xtw0x6zNDn^Q!$rRYt~4C(3#Ga-;C|C$*SrlVqCfs`I{9un)!2C>T^O
z4;>*1^k~?<VUOQ9YioRLqsGd+aKj<tRU+Jzz{0VnfZT9+w{TzNn5o*G!<P<7h#MuW
zkVFa;KU@j=4iDSj*hBAe<^yJtaQ~OYxWqn^au`I|H_O3~07%KoTomatdS2+^5|aq+
zP|WVm8ag%d@ky;iCVTRi7<jgj{eQWZ-chgl%g?*L*QXW9PZ=kXdP&EI@f$n4Rig8W
zhk?$X8+f<7BVtY3)7j}X&}W85s)wV~J8Ab?Rp3@lCd`;+K2#rc)CzX@fR&?G@rhY<
zia#1NiiFAN9zAOUJyK7SlxwTxlnVL%K9+rdb=HL+-QG#7-U9-cxv=y1iRgmtL2<cC
zEIwk<e3S}`6Q>-JaIs<tBO#ovk%m_voV0C7Z%!B#19iPF%bG==UbWl=)vMmekB$sh
z$PQ9OW(b)@hKp6D(zPyUP;ss8A)iN8be%8&i{5V;BY_=tK*rl%kHn|9vnO9)3<i8+
zz`sK=XtcPtd%dz#si=-q#Ao)(27#Oe9E5mBEGzQF_DO&oN1q23C5h*^@0su=Q*wh~
zjtG=Q(s&hh)1Dgkhr^2Gwr6RK8=bj@t-81EZEKaVToLojXzcVhmuUwin_tS-a;0qL
zmQsEH%fccc&9Xzb%#bYA<FDz^)-pZHFX8o3bzuRJCL_Ng%|H#$H@99VL@^;(u;&ve
zDJ^a)q*}H=Uo(laPxfF!<@I-JJQlUH{!SKQ42+?IKo-$pSR?hc%eY_aZk4qJZitXO
zZ~zixy>`93W4WOEQ8ZID7&-X}Sow%FjzA?YyvoFj*AaT?$_v*qa=;BaWuct@l3Dnt
zE^zhR8=F6G|8?gnUB~v|!Pdss&p&TGg-V7m|N8Ux_VXQC2}te)h>77teFPkuROmKU
zp~UwQzkD(bu0fI!P<2rtaF&^0g*=fz0153A#Z7`<kH@MgF9zD!_Yhxg7?c!c)!64U
zAvxIr{WOM5f;khU7FK<Mn&V3hLXA=b`9mXXG2@sTo|z$1RF?D(0(J{UIJ~LCYhK6U
zke2jIgjiI4k?a=(S#gYNsS%3k0?<-@%H()D?8};?fWq;*p96m=ld_X>#Oe_}uI7L<
zX1TsNDNCJ{(L7`dje%mkvmj8zE1*8OxxVmZM~7!fSCiL%Wd)ceiebqFsuXbc#Bh6r
z0t=^K#HA<91@tmvY)TZEg#)O-YU9n}8*4%a2T|Vj&NJh!DXBRUUAv;IsuR3ABI||z
z#RXnp5>z^E9dS?s=cnNHodBAGC%yufiac*Df#w-4(NL)4^4il>Bgs#q(6G6=p^1kQ
zV2lnPDv}{pdI<$!Dy8|>oB#SI`!&(GdB>Pp#x%1GP2h|o=bd_Hk@43QX&jPfnbB94
zxs_4oWVtdE2b^^(WU%MSQ=_!p0!h?t{P=<^9q+;Qc4Hl}Ax-pp?>my$xerSXxf7ac
zcZ9l<fo!0mEQj49t{jAl4&lu%2<+#|n;62tB&(E{hs4_UdMu+8YYV#^i8nvtc5ns#
zAQ+jy(<!F@4f%^Chojo+Hlcf8F(jgdKf<smu^9)ZebhT{oE+22HRNgeoK@-f8#2tF
z9ItTPn|Og(=!xd^deQb@H*R`8gN?+c+^Bs`EB>3mCK>``WNx_(m6YahUvKW#vbFI<
z{A;c1@f&fE2(vFOOXvs_I?thVKHc0UiEb3Q3>LuO@DaYEfUgX*2Ao-pf@eyH|F+uc
zy*}OC?bWgxYG1(RjRqzuBnAQ&OAk$}BQX|PWxj8;d0rAv;M9Au@qA+=V+ydjben-~
zo;${Q8B`<b#!krNvXM$nJR7<_97BAObq(Y6PEH%gM?&-Uqg^LyWlpQ@YOB>~!2p#q
zN1Y4EzBw`9t40A!XEp~`2C43ll@zR<GY<O5^(m_|!EO*AI0=B;L9T;3@?%OtskH7V
zo!*D$ajn6N(?|yUw~+2fquo9}$?t}Et%sJHqPk(ch~gn+Jxn|r`F-mV-V|C7_-@^N
zB<~U3k@?gQqVwxvqIkT@F(0L$5MFX*WfPC2>bUl{+U~t-cBN4XoV)8|_hzqPlJtup
z1Xgj(7Z2o@g7<uW)Prfd41?<)VoLU)Fa8DTkqe2defB}@emsSbcD32<7^UMr>w%cY
z?bOQytATw#ow0$$(Cg1G2&LODoQwf_)lR4R=GY#uIP~1HRiwfxA)R?|FrE3svUQu4
z5G-iEvefZ<y^(U*-~M?Qm5i(;HddZojn@9uWLC8e?S=cH54}2;H^mahiYzNJLW|T?
zN@K(S#TF*QD5!$Acj}s1SfBicPNHZgL~%U)0psWr)VuSn<Bx|;tvBm<anzigMP3nQ
zuoC25z}gAc4J4o$GF|+n?7TjzmK{_{&}Z2J?vg&X=yMxC#XpT54EIa_ZatyP<^{Z@
zUE4#I%<QjO_(K-FxqQJ(6bPvom$yvA<`uI{{_OEuJd4aTS~Cf(-g|GbFe`B2W;Xb_
zFblD9W>xJY2cIp>C^#gmkHCoi;)oYdv}uHVe~g#5%Ie`up`fIa#HP$R=eK&&a<$W~
zzG>`i^j@E}T0MHV4bR%mTKBlo=@_z9X2xwhWA(JTD96@DuifaJwB9!`HPE-?*Da`Q
zVNIK6O<VJ8Qr3WU9V@}&NPR^ha$^J^zwqG1;ome$TWjKOnIe*+azuv}pH10%ikU--
zettUg+z1ip@y(Mt*ZNuy82j<aV`!$WCv+mn0Kyy!f|2+oh0dU?s4wGdhzJ<kUX0<4
z2y9ve{k!)5f<We^R|(ZcAKLA+)wVAT66h@D=&5w`M4Caq*cQsHcgMf1zpK?`9Mpng
zYr|FV7sO4?PGz^UL6;?wC>J|6jYEWF?pZn2!~KGCS<zl8LFg%86g2J79w<DQ;$ohB
zJA`exG+`CKAbe<ZSORn%r7;o0o9qY{SEO6}bCAb|9LD<=#u|<)>{&_2-6XeF&xOrK
zFqI*iD+H6F2!r6V(StOQ#d5K%F~d`b9^X?fbQpk?%XphA8;BYnr#RRd?xEUH!i(K`
z?_UQY0&Ld}eeAsC9XiRY#5c}M_scYV>y4&fIKOQ$I=6Xd_{Xzxml(60B94GNdaZ5q
zGSj@l{q7G=Q4JqshGoiXj;ABhsH8}gaW|4tni~!-*WpB3x9iPM3x(Y!EmVBh<mth(
zG&0#YHn6RMBSS`tDHKY`9Cc$D%zzd~bnZ%pyoVaz2O1DeTVs2p#R*e=d$57NYtc(J
z854Fv?5MefaJYCf_(J`mhV>3zy3;Jo>ryT!Hcvk*Sgzb*WX^5*=@jYjGNmGmF$VKz
zN@&>f%EQ-7x0e#%p|O;H>6U6OTwZzq{hm(CrQ0jh-=WDei`8l+FTcfD^sJSWCw5*z
z<aU7RbI7|+q<gbe^2DN^dm<!NdGo8EKQfd!gJ$P~J5a7VTbizSge7j?$x2F|AdJ?#
zUWl%Nuf^}2;ZVt6`FBZuc9+D*v(e}d$vg3A*`!s{3!zb{nS*R_IZz$zgT*^`EKugZ
z0u3AaJmnI5BSO`XIji)YVEg%K{%SJTc~-*aNlstt$jH_L@BBPM2$XrL=DgP!Dme*=
z4VCY+5}by>YY+1HKKy8Wc1JUJ&b07ssAO_e8FqQ6)pn;*@4foitDpSx5uQlL4QY{4
z4{mOe(MOG(xL9W62QIsE>4ZOT?)>~>r?4o58k%{=4hnA+9_!qv7*)jd%StvY&nwS~
z<-u>e*;`gtJa>my!_d8*Gy234zZ$s_g7VFq;tq@(T18|hIrgIX)$My2Ur(;{j8w1D
zkgdvQyH#=}&YSZ#X&8(Wv+4|>C^v7)PtpA2gxu~HuFkwwgJYMs>@Yy4AY>=gd5M^O
z%};Z-IEf%tOO%6<Lpg~Mm}r?s)q}k4)}c7X$3ZyGO%VFS{NbL?os7Kpw7tAT^VKLA
zT;;subADb5kn|=I0t)1fgYRD8t;F1AA`>lmJ)exVz2Pjcg7<#p3#yg1sb>HDXWlC9
zT;~ocoC;&NIYDeKJd474qU@r^cY@L9ydiATjixs*XE<j~oO<DS#?de5PJ*0Cf1X_4
zMGP_w<rmH^D*@k?lWNLC63HQ=b$04bywTq<EZS&Mwo?}}jl6k-FGhXg@s?}~_l}qU
zowCdy7Mv?Rg!d?T`OF=S0e6?qq_WNJP*%423q31G+nro(olS>s>@CWbTz|`TDw%7!
zKIvS%$L^PFZVO)lgEF^R{Ak9g`p#dwHB@Ngrfq<{Gd8%hbag=hOw(i@rR)$y&D`Sv
zPruxA59V?hjB`n}FeA-<KabK8Gw~sGr$RfK2INC+?p=2jWx5AdxO=|j4qmE+oc(ye
zymx4M_LjL<)_c`zZaJMb0pOzMJiQdq&+z?GD_@ks6bm^2H{^wFN%9W@JHyBi4~zdo
z62{}=zie!5zSv5|f7{w#;lF%~za%iN{@pq=e@S#Q^Vz$$`K$B3ZvK4Z1!FG`tqioL
zkZ1CuCo4eE-vj;=eLnX7-`GjVf81QffBeS#pGf_a6guT<qyF3Pf2P=vw*R-cQ~Q5+
zXL}X@WrhFyodU1X=lg2w3?r0ega2kGd59<g^Eont!}HB2Z-ZInLCT>Q<}-JiS4&u4
z8QWhPU>T2=i8X*WS)*Hx$cy0yph=FWig6;6$rP$+8d#QdlCPk?w35LB%8rVJn;ADi
zR}>XX<*XEl=7Q=C)SWLC3&0qHB@DC(xWDbWqt55y*3DL0RyB%4Np&dH(h`zpZh9(r
za5)R-C!_By`_4~jXjqrdBeW5CZ+f5XZ^?_f?}Zn2GiJp6OycTviLQ&3OE(8c^xc0i
z-Q%Up)ho_;`e5-NGS`F^2x6(BTrNlUEI(4%iA+kmiaha+_&<3vJf{4&v$2uV|Gap<
zivPLd|GrcH?@i<6sL^fb#(zq`e^|~hp=q;tIpt)R;N;{+mTa8-y;L*@>HPrntavQ4
zkzYf*c;_-02k1$Gd|iy!y~<-DlBwzONC1LF0~gUu%g$DvOlh|3P8h^?hF=lh!+AGD
zShop!R{TSKjfmgE_cAep2@;Mgsr#McuOZBp$>LFhvXlLMTZM;$K3)bFz<Wv80O22k
z^`{G+P5k(Tav-~=3L!pK>;~dzCf+2D_C$~Hav296d50D*nW^z$ZA6GxvU@Z*ob4?f
za3SnF$Z)B?rU!x(3qzh)!HwhIz;p^cKBtfNQfRx@4{mZZ&&J-ZJ9gO&YAu3cM}!h<
zR$=bh4bm)O0naSg+Y$`3Ou9~kd&F#hnj=xk8n9Nw>f~bNW!<HyWN-B<ol{s_EW~Wf
zhg7ny`kX|1_0CB|4X?s9%}LXh!@nraC`V*asXDy%=A_UoZBC-(DxTLrwHW6lI^#tr
zE!570e?EE>Xdiv)V}%Gj<i`Y}0)@P&I8}zE83zRRDFBKwx6GqJ83mXRq4=O9gMl{r
zr};MbCcZ$XgG5f>$lg<tUnuB-8RYb7b0_Y2#U8Kt|9t-6P#--S|G)YC`BqB*`+Rl(
zv*Q21Q~v+$$yulI_M}yBwDUxO%p?yV1=iS}lCd`)rTk=*e7%`A!MHEup=xAbf~_-B
zo8Qx1K~BOB`7;l^`5DOAGg16QoU)k{`Ug-$EujBwB<O$kTC}}c(-%D~{ohI5|8H&V
zZm;ZrR`mZnrT@)tqg_RZ)mHPkQEd}qMLYK@a8c@<c%b^}2}T42!XzZ&dmkH#SfANj
zTOK3XZ|7-?2D30^ya{!___7drS%=e2*L|1<TB_%q;v4)raH{xm<GjRw`ehMfgr1`!
z!(n~IP@p(qJenEdog!X^GjhWVkCY}NSf~+?TwF48Ao6)J$gOad&Kj)R$*0)`EciE}
zJM>Y!TqYE{x@c^n8l^+6j<SnFm{eh%YAapj5UH|W`L#ElZuJ#qlhH#(Vk-u_jL(Qz
z<Z2465s0E-;8Q3LM*TpY)f8GHZN4F)8iD2zn(D{5r`CITftyXljDhp^d^VCfv}a8s
z2i@o*LM>Fxch!kn%k0`m(|PS-z#5Fy!KHx^KlUdGy>17b$0kl>c+%Ec7k>Ndx6?Yy
z3aC1SWdjorlY(pl%)1)44x)6Z)ro|H8r>HII25skC{PqZ+=|6!N4yuCBHDAro)(w&
z<wP*uvU4^*M=xOYn<CD(&SXqV(Jn`^VNQw-%igMtRXLX<%g)H|Bp+au-pqs};8NMt
z<9V(_Ie8(mKd@KJ@)f7H)zUeQ&3XSq<gQ$Ppv=X3k6dysJrrxZ=zv-&N&bEHKTM<_
z*Zy~N_j$_xcYAlG|5@?>-zoq9U)9>lt7aa*pM3l9S|8q&l2aFa)FNGvNQT_;8#r7h
zk-l?XM%TmH^TU?ALq521LPszGz$gh$nnI+>U_I&Z@M()af>4`I5x7pa8rD~$(V4b>
zl<Q;yoNvoJkBg=GY1+7^iyZPk#T#+O;-7~)X_HIiJGiJ>K>u534^YPf`o9I#K0*I?
zo^P)7KP&qGoznl-Ssv{--#k3s$I_XqgHW48??n}n^S>aCEs`SXmjO62%nU_TL1`Dp
zO`u}>3gw<q{XCo&61mgH*(ODwN6j+{HP_>1Q)r{wrOdHgXUke)=ixul%jP%}MW=PP
z{(3eVo%)|!XPIwLVaNEcncxBJc@yPk*w&`f*NxfADo@~jC^XC8t(`>@a|!xJHWs3T
zk6>b9ZtFjo>G6W||HzB|Na&&-P5yha{UUk)k0Rmf{9m2_-{tv#)aW)l5`jF26Ua`J
zbJueZ&cF)hW&|k1{ffaHz7z!a06B<x5SzFs6w`^zAakEfCc}J|y!gpKjm=!?b2*cR
za3_Qp9G)YhxmXZ1e!#8-;_}PEgA$|n6~#<lj9iS-K8y#I*?3a%hO_mHt4lbT)<65+
zb+k?+QL6B*S2Lz5?-N4$(2?N|-N~&8W7nVgqj(MB!-;F5Sco?isE}hov2ASPCQc3%
z-ceO7V_0B65_4(}i}mB7WXtRY_qFst(c{PQ|JdD4>i=GBy;#xz75)EC>HkT)-e@<E
z-@ul8ds5H6_|HxDaHl}VwSEqbN<5x-kq_VWUA_c!5Agz)qFhf#9MV7%R!POBXRjiq
zt_p$(Srr|ZN<O+p=bKY?o$JVp>sz{|2G&}HGbO4GOC<y{FCuO;=q}E`H}X&%iJak#
zYM??W9KBKeO+pwhwp!gX$kAozU7J;a4&FcdkvF<6bEJC=jiK>=qu86`ewb?9Ls{}E
z4H*=UOHSMmi$<X7L8|K%i+4I5`P?avM>~R3Ozi+JSd7OR6?iVf47wsFtn8h<4Yd15
zN>>VFJcV6`^<Y3*H8PW6@=p(rPd#?fV;GTQ_1S5OL!-c1NdYUOF<#e)Sipo!kx9j{
z>5nsvoZ{>`3pj+tQp{iy=%eaHK3;gjfMT&|{(%-Civ9@4848hN4v0O^LJ2KfrJKfI
zdH=P}4g`uC2y<npcG9XI)nzj=Le`;kDC{Tpt=7&f^j-U&g<87{NwN^XvHxTf5wi`2
z3bBi;5|sr5vLQ(<H(EHiK!Kg$dg4rHA)>05Q*BPQO%}s#OQC%p)HyOr*->988*Yz%
zV-!Sc4iItZRQI{M_4U`jh@&DlkqC;yuVv-MYMDwb?IWX8!%jm(b4Ke|ApRa4He*k0
zBjZY;IP$MN%!^kE#444_A`qxtIevT#!x$4=&5hu_=uco@%T{t=w&-?D-Y<iS^8pnc
zMmO+iqabFpkSW(hB^HCuVQB=ph!k<f(4&YjE?NMU|9bxC>fgK1s_-vVdX9l_aN4Ey
z33rVL4bDTFV%-RL6ngIPRyWltGPX7CU)e+tM&VYRhaBx1cQ_T<zs;ykM8I(pK&(iq
zy-H#)uxd|E8-?4HD>IlR`-(qD!8O?-ORPTGn`&Z7R1D)Qto@*c84rskxmL2ZftCb!
z+S=-!9Jf9u&fElPTCa|W`KqL3V)=U>p!XdZ`ds$mTk3yK-A?TP@|6mpMfg8kn>$JU
z&x`G?75~5D|G!WE|FqiaHh)>50Ln@9=o+9Yh9|k{O;7<}vBJCZ8=)K$w-h3TML`6j
zRzJ;9pv!5HEJ`7z4(p+LWtDHIkfjVdaGlH0JAWx->4~0Pz`DEeD*gnHjLLbq{^Stq
z>-xVOth<LQ5-GDU^qftIrQz(aee`RGAq<A@sEj|S0{EN#uKP|fg+<3}SMZk1u2m#+
zrW&0#YoJ(kw&AH|S-i$Sp=H<=;HQB@b*%=e*0fr&=JKr~D_KR=TiyJk>US&aY)-3^
zcC|$#nl{h_*I+%wL0`&b5t8D8wPx}SPKY1C1;W9+)k-SUWQYmB1RjXY)=}Ztw=BO^
z<XTxhH1aYk)7%p;)u%`dDl4k?w7e$D7mJKC`6z)>Fhy@N@I8fR<ylX~4`hKZgKO1J
zAYFY=u!TUS1Y7r-#&Ixqfp_h@C<DWaL$ay4HWJC9wXgljkPEADda<vdy_eo-O6f^X
z!dQuJbfs*5Qa7d6_}jl8N>`af8&}qt-`)M+sT;y^8hjlCi2Trt=g)KPe>S(1{NIbs
z-Q5-cx8nc4SN^YEZ+1==GJx5s9*zSvuKlh?y(w>1<nQjrx0yR!Gj8V7dD`MWio6y?
z5KTxEc(_rqU2=~8)^fy9BphZ;+{cAHf!Un<G6mR_N8<>v@Wt#v#}n-$G@))V^#`Qh
zrQyONo=n{fk0U^1s9-bvGx7|OWQ`R_6k#<qSht+R+#I;X&xbqCvse1+bp73*|L@x0
z!2WkTh5xbhe1-qMI{&}d^Z#8g?8m$IcZBwwupY*?@sa(my$o`53bGlpS$AvE;{Y~a
z>OQ;+vn_-N0h-0WU7YZoEq60gE?_v`wU@9de%CgkN8Yv5ihoG)I^BfqgtcJVbnFJI
zp$9W?lOMnYIoZ2>Y~bw1KN24ByY>nO_#2-8vB2*;az~?J@HqCryU)|||H}Srb^d?1
z=YRL8+G?HDa=C!i`z&7I+$sJjZlgJMwbgD^>mT9Y=9}Y2y>uUjf$s31{eyRFF^Y$K
zdkE3n19}MmlnTGWK^9U7XP#kn#bNt{y*&XF+Z&6_m|-99im1?kybe9D$mLJtkc-MK
z;~W!XGF`y4e;_bDmaGaXJJ~J71kap_cYPX%&dX(6>Hvo^9!zhGoU!b14f5qOZB>KA
zMeOrU**(v!jI%4XF#hEy*aG_BIepjuTKA8I^552G3jc9?XNCX1qW|A1{ijVaU-_Gv
z<PkK#PdYO{_N2jK<#IZGawqMtvA?BsXq9;XJ9{iyt`WrX@&}jZ`pUZ2zrMb%L|D6k
z?*ggj$)WLp?)8Nq$TC`EqjUFF8B;GFO=oP5QKr(^DGIo#GTJUmBhqrEQYqc*h8Qap
z_rq8nJ`xpFYR0W`lzTaBqgN$<rX@pT2G*^0Woe-Kv|Pzz-=X~1d0&5Y{VyE*DgE#6
z*2e1mU!DKo>G}V@o_p?_uW~~r=LF@G`rNMuilA1A)BJt?PRd?PZ+lSCRiyCHaf<x_
zMiW6F=>QS-g5oghUI$6YiP1$6!uXFFc2d+bRC-_TG1f`09_Wff2ItqoT7(HD{*OSh
zM8pbc^JF&mLg5)iNRe<L&)_{w?ztNYfKHSRH5{h{%YoBIqu`nsETV;mIR6?)@>RW-
zYK)5rowG1P49%gB?RTC+$0z@oL2g)SQ18VpnY0H}T#k6;PN!bD)=!4=6I@W}p>bfO
z+se455_7U*pm*B(_B`;?5cnh5OV~i^?t+tZXH<?)JR6^f*~;|(aG>5T;=JD1A0jd$
z6tjMYWBF=vR76Y*|7XKa;!CyGvS0Jm3+=hYrzoaW=p_@LBK^A8>9(7-Zs|^%cB^%d
zq&Bw*(@Jl?`dgC!BYypH`M=HGtt9@(^PTM#|F`1*zFYlIwGAXcSM_7RP4Z^6qZE54
zT$H`oT}v3*9qDKK#@s=!MuK=rjA>!HPbOtru_UT6UuOG;XJi+~2Du3o5?T6{GBYq=
z&BdBu0^*vBUI1ymBzpd0Pe6MRG7z`qAUsJuv-$k&ZfBj&$u;wR(SJD<kEH*9zMaDV
z*n}ftMgM>Q^uGo)I;6bCTFKek*!X#EYh!cUdE<H`=grKUOuW838#)IU@St*`9{i_t
z>N@=6A0)N6C#^>3Zy)C$DCt!Hs=y(cXGG%Z9Rr1+?;M($xd=reX2LcW&xW~JDi((s
z24p#bIzJjn2UP?9z;{QjR$R__T5rYo2_=yZ4Z_y(`hsC@K$fL$L#zqP5zo&vPev~c
zVVu#GEpwL1eGN9uCG{qN2|Q<14lIHIV=$qYBhK1IuIsT(#5q{Te}7?FJzv0IKBQ=!
zvp@eh@*fV%<H>&@>un_O|F>51-%9@b&X4Vr_5vBuPVu1j;0GL_+;s0ByHoS$Ac}`D
zMTtkq4w#RWg8p#cQLm**<H=Hy<J|-Z7eZ=z5u9%#6g=vw`xyMk3`r%@j@^krosEpR
z@R&N7%Bui<=&@IoC!DHg=#=h`glH{zBPSM*pDce=LML<V25b~dyswAFJK1wEOLjZ~
zU8MMW1~Z+Z3Qi`|3iu@wSAmbYn9NOx*5fb(kvHtXcw{188>y?~zW<(9+7h+vqL*wg
z_id?!)48eO{S-QMw?dchSKB*ZruWxAOez|CoJ^NZ{>ooEJ%x8i=&mKwQVi53($#}_
zsqPPFOO2=RT`yCqiVX+bXAmz~tU&a`&m2{Z9`olic{9>uOtaH)Hp#?jN@ZQXk~*GE
zcW9Fhp0(8EOH8bV8FW&X5SHnv#&%mG_3PO`|9QJiFRdtW56EZRhwADE=Ns_<Ul|2v
zA^z9yX7c`LclY^<|NjH(|2DUr`v3Xgkstn_|Lq*l5P8&H``})=W9I<aMIX4Q2|w*!
z<5Nowm{r(HkEkhQeps6H`#e+RTp2*eo{_)vBG4#QcLO&n0Tl&fq{7!GAb=HU5<!#V
z7|qbk$HtbD1$%2#cLEDwHXXW%rOB~urygp2DDf2qd=aIdVsu>???^}s!(cEQOHoA5
zo9Z3oU@07ip$C&AN?NDjDN?(tO`q?edX?3vFTD$IJnmho05uEKcL>5W$~%dzq-q}K
zGsi|A86{>R%Rg$wq312riX#sO@^+b&m!3QA;U(uyFTz<~CeIy{PyNq9+*&FnuCU(l
ze5sM<`Rn!3pnB=DmrczW(aSbPq+Y)8>1{6#dNatkRMUZv_Ldqz6m*wq@f@Ddq+BbO
z?c4KzUx@;;5dULiH_iX8@V{32-|w2;%0~f7zrT-_%2%3z+r~IfmP6OC{m_R_ol0eG
z65t~%M#CacFb3_XJ2w>+PU3re=keMo2(FN65?a}0l{y4OG9-cp!ya7*v(XT5QbmLy
z_Nf{HkH`_~V5&zLgF=Laz(-VYqOw<Z*dO$_Vh<~Hw<;l4yb55UlQ4X|6;Oy)F@?Ar
zR&0&*ytqM$$1;GB98?LVhT(J-BK>mFHI@Qel}jQo07FWTy<m~vnLB*fUe}(BU!x;h
zF$Iry{>Q_*_m7$PnCJiI&i3=<`Tt^Lb^iY$?Z5tdH<e9R0dL*emFK+m{5}+PaEZSv
zm-OpDFM{A=gzhyXbRPa=>wLQPdgI+${uOlUed+>Q9|%L-<A;n&!r5ylN2k4`&ifkr
zyNG}ko!*D$@$L>jDN5v$U!bCDw^?J<lHRM2-A3o&<>qe5S%2nGes_lP+!1Rda)2>`
zADg>Q|CVE)I8a<4&TKr^;B2My#_C4B@w$18ZZ)kQcc*jOs5POo|Atymj*I9DjmN+!
z@=`jJc%*!f1sR)*&XFFm75#^SYr#r>T>p>lt(5!+e=GX`$E5!&@-2u>tJ&$!q2BiE
zIm8>~gYTVoQ)li=x{(4cqTKdqt@!$fqyKCt^Qip)PSXElcXMNf|Fq)&R|m-41LS?9
z);(#@IYN>N<{To@p7i@ZMns41dyr_8zj;SVa@78<=|5f_x6Zyk{>yy({|x`X75@9`
z{^xroy=SM5Hf)Lo5P&(U9uftR-L;NRyB`yk)3e@(YDb~UG8L^W!iSh2m>a$3;QwB{
zZ=43vXx6Ju+_4wgAq2_rMg*s}UqFT#S+pOLag6?L*aPW++IX`OTzL}*>lktmG)5r8
z3!!&lFdEGef~QO+2F^8J_l$$j9tF3?@OqyEiU969Vz9a2TsAI2TqvS&SI5K^=q<Q#
zCRpe&nEWMnI0B*{^K>T=U~<Q^RQQv~xp?45X0e#g$joyn4<#{|cCU(Sn{dq0tV|JT
zk>kMpCW84oozaZVd-k0#cf)|RXuO7jG@mZ#?6E}X#z8n-8+jM*;I=aA1KSsTUjP4Y
zd^foD<XN-+<WPJ|$8k)?T-1)58KD;KK#QVGJD#Bvp;mAb<85U!pkXDQy*V#G>;-|$
zpAj38<3`h6p)0n+Ab`onx9H`G!QG{Ur@X`~8D5>&yzhD3uS4{A2w^k)W9ElA@D$lO
zisg-K<m^;7aASy0SLi}@yK<uwpyw+ROA)t8L>#m$quACOdcp$7M0CkZ_p>LFGwr2{
zf$Cwa(LrMG?MdK_f(Zg|$z9m@1}>Js0qf%+&>Md#w^nPN-WTVSrgSuWJg84u!B<C&
zR4sjwkdbnMd}Yi?dlC{qAI$zg=RjR4aUPHVKO3K)x`V4n<^Q*y@9d`d|IHQt$BO^|
zPWk_{qt^)0w}Ai8OZDgxz-Qxg1R4~9zVE;Y+M8W6g7(vUGlHTRbpSpNBKX&Eh@yqe
z<Beyd*oPy35--6eE?`mY5xg&pBBu56SQJ^oSF<Rpr4PcQh|&967R8=~#LtIfQS$cu
z@AmwcnC}Fqf}!Ormk)jZKYy|HB6a>h-&w_f`a|CT5dPQZ*Ixqt&GSR&$Olmfp)cs)
z|GY3SezG#X@+JX@nS{0L91z@brBBZ)n+tM7o4fuvh%axQM#Xs>jBcHS*dJG5Z`|sv
z&L8^93HN`B%@GgNdMWdf47%iCbSJrj4p0S&X#~o27k-vWA9*%y1&30vMHO^I7mlL+
z0%XV0Ap`EuUdJ2zb!gt$b0@HL#Zx%0$Nm~M5*P(K#wNjJ?Vny4>^l+GhwJs&yKs#%
zpkQw@8#^8UV#3XFeoNL<PG%}Q{|$v=mNJ~*s>Uz-DaI=UQByDt#Cl*K^)oihZwGxm
zrgMPLNb4!A;c2GHqYbLl#l*0e-LcctqsvPUzlh}{3V>hCj=`$LN?-}mYiQQryE!ee
z>%bY~(iQ#ylalxn7TGndEv5gGX6?E(L{4c1*TDM}VGLd1yvX#PkiP{qyYYw22kb-S
zloEAalOC6x0$f9%bqb6q?mpKE7Z0hmh}4WiYbIjqtrt|j7{p)C|Lj!hGeX=q>Nr1p
z={To<@8V!SJH<yB4#x{U=M)J7RH^Y7L|J90MJ2=Q5siz{Q<`konL+iA1VyJcUgy%(
zGohw8IQ0bl9ZpB2)(y@*(3%;v3r@toKl0<-GHTtaBQ2go)c8?hxS1m6ynu-d;uvTj
zlgrSZTotP&;HoD`>_jV^D#mn^t{BljW~hG*gCJ(Vh6%Kuu<=CI{}kLiFUAC;-S)=b
zxR0ZPEz6!vt>DD=tcBiZ&mEyg(Wyc;WoH=BOXM?MRYclD1P5j4A(2Lc2eU@&&UK>O
z2&fpY7?2D^V1i3h>_!2qF3slBQNY_=C|e`LHKaKKS~&2m5U%L!f)5jI(f%Rc;L@G=
z(b#nJh+GS-e>5D$#ZuWxfQ{%Eg77fiSudoCaxY1hdrX!MiiOmCN+;DGLG#KxC6nan
zultmSsO)bA-plRaI;zGs77yTj^<b~)410U2_hpf>7L4X*Oud&GkKCI@m1ZR|vM;JS
zD~ZfbWb!4CBt81FZw*|=iMY4d8MwId`JbHh$FngXkq2{%ID`Sk$Q=}NA)_=AArViq
z8#>Q!_TjW$KR^`$URV$~R|Ha$Jma@3+e<W`ai+Lb^M_qIXF1?@mD4Mu>}-~ujdJD$
z!n;GKJD!?r%_>}?Coyd}$S5GI*h4#?Uc<6j135=Df9aBqtQqaqK`IUI=fK#IycG?u
z2>7CBigl>-DT=6~%tQrrj^f`l`L8$+IN?Ai#8U*w;`t13w18oZ*hok42bXk+htxm>
zJ&OZX-c%g32|A3Js@MsP+D)8Akt#Em$kUQ)+&X>1<P>HE|Mr}5j}H-`S_R1jjC-*)
zrS>~SK`ZJpluVkH#*dm41d1h~h~hb%J|0*3+m~g=lh5HXVH)6V9z`N`1!H6nbj!aI
zfD;*`jd5w1e>2O_PLG0b9=frs3Umj9ARLM_o6b{&yg2t+lY(fo&aW;7K2-xt=Yut_
zVywLz^44TFyU3fA1F@!tA0LZ&s&ghH{T@!1042rrtB0Pa)chnl*5nSrli3{&5M)qr
z9BBtw?R@&HYcN%IQEGV8BPa$seZV%cP}~5A88g9~gvcx&=m$FSQ_+Zw84|4vf!yE`
zFMbZ-G%A+n&6$-iXY%sXeBCUHM3Oj&YS7X2Go&n)nBvZvYvj6c$Px-((uKOt1r#IM
z>g)v&eD=ao;9|6?1R0d!s%RP9pIy7gYxG|S2E}7eT^zRvFPOic66@gU*R87ittUMh
zf0Q*;8<DPRatq_*M<o7akP4m#6d=wBV5oRqhIWo!F;Zd*D4KQT#n|Se-JnF9f%BtQ
zr<XKGoyJA+8JKiQ{V9x?Df<aY^GsIlPf+Wo*)HY>kOjqbXCnPsvBWFeY8$D1dx&lx
z#>HeqpFVY-(4O*HwdCt}l_z32Rjh1T>xCDe73JC~E!)YIfGaxLTCS5Vk-qiqI@wyL
zli0PiU=JccNr~3p5!XutFTn<hjxz;FaiUM{LZZ`09)RSApWpRC;GLfX-SI3ye@im~
zBEQ{Hi41y>WDpM>wob1<dKV?-5G^Uro8WK;blg&-`GyX%XsU@yiJta@0J!T&suv%<
zTS#4<moE*mBB??Bpa02BCh729he0XJmKg@AR4pPU{q{qV(COZPD75m&A(bjC|89Wl
z_vyQi_8=l)r1g*0N;Z#jKAQ|gKuuJVNi~u`S!2$Ff<;6|4bkl+c5j@6I6lM%SQVYn
z4Mc_V6mLog>yXIFU_oFsy`I|-W^r$D=?xIJr6+qBrIH^76yOsCk;og}mc{)wSu)Dz
z<qt@La}rw9)4zA&7^pj~zdv(GajGio-%^>nx2TTe3NX`cZ&OxMriU$fv;~jwg?d<r
zpWF0siyrEpV-BeEwzosCx9JJ$Sg8W)0lvduK59An1|9)rn$}LxDq+0&@%ezG+x&@D
zS)D#f!>S^Lh}2FIBYwq7=dB{KqJGn#0Ph;()qkmi1_Y!K9T3PNBvw_BZRna%x~erQ
zR%l`ru%&{J8diAm4Fiw27GAS(9kID6RXrkKB+!25RBEmQKgu4bY^L}QI?hF$a{Dqm
zOdqs_TYP{NrL0r%UNO%lVxsZ=Q>Xt(G%Hbr#XP@!*%#jmPU24%zPHD%F74qufzpQ<
zI2ZNiFbSAfp3AJazr^*8L3ici?Wd?`a%hUra2=GLejclx9`iaU<WVL1PmOF&PLYlk
zjj@#}ou~ckWcZ61<Nd;NVh=X*sbg!L<_bUiSp<FJcpw~%@wQuz70@^IVE`^h*>4-e
zYTc<xAwuZRKmw%kA}eRUT<a$As4TfcYiXSj>w07^L7#L~TTk}9hjunz2!)R}dy^m>
zm$@6+GwtL|HP443N~QLaO#allE{m<GN8l;Y#p$K{ax-(DEKON@SaYTSllsY?LV@Lj
zgw@l|$i0PQZw@<dXfLGzLdvyo!k=0rZ)g?Br+^h0-tb81Os##B8a5d>3+b|0pJgWT
z_{9~y4ZU+OL<vPI9PRN1!3ZcYN;V|5PJkMWZV8P{F@%_4O;nb{;|N|MsdvGOfIs64
zN!IEaC*e&(-HeT&Ar70{$Ll_Z8TJQ^CQSFTZnUQl6ndOukSd~~#gbr>fWjO0dd8aI
z;o-%tGKr_Z$!Vie-N&mQa~-jwkZE`_M}pX+Lr*FvYxQz-^6UK!#aY{9=26~e0?+o@
z4`z{h*-ADgg0DHBjWUj6xrkuZbT}PK0og@B37WZ5e~|N726AskQBsVkppT0i;uQn(
zLsFth$jCKaVRLdGz!04WkG>1B5^E3IW-^5SSns|1*sGuX@)4fo^B;V<s!3*K9AVP=
z`F*q2=)FEWu4!#kkCr9HG82SUl;u1P(cV*(<2)Tku_6G}P~l+a;2w)Tq!WAGg$M_6
zz{L}{-;)jQk%<Rhf=3DUmh_lr=kHFZ(eCz+PmX)d<8I?kqg`a$ogqU@Vyza%nE+9g
z#wWy!#3)fvaJ+1L4HOZic<|CWgU`BiR<Z84&>(6M3q=~)i16*!&;*9GN)_%voR4%<
z`hpAd4+0R~qA8q^k`^Xa9WAFwR<lyufv}Lk1&_$>9ebt<6swIoIxMl7uB9k}B11H<
zwJWVB_JDB$*e8U+L8#N4-=wrjIqDtjPSsT9?hAq>c?QF5iiPKW&EX|>+kWQ8mAeY5
zWTxGiwoph}h#|Y*6H54fBVlHO1F|$aEG)J}Gc}F?N<g*0_GlUJJ_O+vwG}t5*hJLp
z*`$wQcz`KR@CxO{y1cS3_9->8Y}lneb#Q!fC-OdIDLRUWdwXs_dLVlO%Lqd+qdd!j
z{Gm$R|I?>o3}R7bH&)UOei!#v`^HJDZ`WYQ>-5HAI}d>W=%INMW4zlscS-_u9G;@$
zN}kB)4&6;U2QQsXo5b1kQi;5{IYdu@__nCd1<Vd2fOiuE?+gP}<i-P4C~lNDN+st|
zEQ}tA=JA0Z!^7f6$tgLkYf2g$Dk*Rjd5fz!m{WzSA))EAY7j|mEtfj;c(4pIj+~je
z>%4+&8~gCj0Zq)l^V3gP_8_jWlOaEF)!_w*6$X79dB}Q;WL%t&pS|F`*Y__lHSF6S
z&_7_~WSg+>T0>jGxC}IXYgeVp0K;lk`>-Y|W3<D;5qg?mq@KgvB+gGSohz|WQtN@H
zpzWO(uSzfqYH=5rsSz?cKfat(vlQ)rQzAYLBydHeTVZKw-XVaDiEa@tfh!MC=R(Kn
z1SR^AVz8Q}WL~`B`KA(yod&(4q0MjQ+U7o$7{W9VA;OL1p2r^FoQw2{sdnV%g@LP}
zLks>JUAt3B1m=yP%%FynO9OI&rSNhvO2g;#N)s83(ukHC`Q;p!uS#X>Nq({AIW-zJ
z!8Q`R$bWhD)8@W^FlYJy^pmf(J--=xA3>bs@l;Sn-9TvazL7HZ5;91-0*mESap~@r
z=9D`xTe3oEv{BL*^$oU%wrNx5Yt5)eKK3`IdsIHBxAU{927W1MiiW#I*oZ2bWlz86
zzv{+@^FJ6Nnm+a0(aeFiR5$T$b#pWER^;8Hw_BU^xn&W_P;N{p`U?Kvlr5ytf2N^W
z{T9w;e^A~$#G<grWmstCt8y5XH%qzMHf6>`Y$azaZ@}ZL(!#2A&aDkq;RaTCRZ@f$
z$=~5dn#3WuBR9BM%?7QfvrkhDH>#_5z)wR<wdkHC4#p;S)?PYA9Q<cib*0K@cQo_N
ztkB9Z(#_;)*{n<gU}aqtzo8_YC%d4m<<guuQ84zjw5^aBq_d4v^ePvXva?;;s%(~o
z95NuQWs;?Xu^*!WrjU|}yV2#7v^{o5Besj4h<m0uI6rr&Tj;cM?Ln<&VZvz&yta|F
zQK~1Q_Bo0-df<_XIGgo*(vBKNoWdV^PV@LRTFW21$6Rex(WSM$u^V0qgYqgm%JFpv
z*`aSp{gKsZX(w#xLg8!m>dM&4HbW#QS<M3Rd^B1hhxAwx4)YJZa#56p9Gb2HIVNkF
zzI?fX8iF6~GBQtRlsS<-3_VN!Lw1xVH1x!XQ>b(bEQJ>4fmw!kGw`M{MKF*OYEn8t
zK~Y}GDLsyT1B+Ew^of?u-I)i_CuZY59Y`kbQ}$t##{qq^IiOG0`M@kUej;$3Je5D0
z%-g56P1w+3=-wj#ucx8{Yr2?uL<rs#{z+{(m^R+HSYP(M(N1>OU&;k#PI*d*NS7kI
z#IglWTf+LxMSoYDg)b&oP?oS_h$T`m;hX4$cu+DX1(V5bv6LX7eU;S6X*okyn2;9o
zDm;9!3YF2<*VL3FMHzC`okl80Wcp&tLiRfRB7D@^N@p&ONL8%uJ*Vz|MiW5T#&1D1
z29o^b2>s20vvJx$in)ZNVCbLw1L0o6FjpKkMJdEyn~UxV9BQ%(mSd43h5l#sTZdl5
z-kMwx7cQ^<{Ls|cMoux|;wZ{MyX0$;UUdQq*L48o9J@#7$EJ$I=b_CdpHo}{WV!GS
z(;so{Kr1l3A7!s_%C|@(>99$ZN%!5z`(X#XVHxWWJvwt#H3ATcu5$=d3DrkqZ-`;V
z)!h!cwU6<Rfqds@p*y%;UKjMixYn}NJ=9G59@9|y7#Ce1GH$vS=v_$VGsf$$gw{tM
zfqrD}u5Y3JSym~N((s6Ti<g3b1<HC|!EZ?Q$!uDbZPZJ5e^B<Oyv15~s!9yzfln$W
z<Y}gq5}$thDW#OC;StK)lRUhbbPs2AD5IyWT(v1X9oP?RA#YSkrG6~RNhlSm!6kjt
z^kMhu0|sj5OHf<cORm6aerNb8m(?kLDu2>^j!1uv>91u1_0}jcy8g69;sY$HXR>5q
zhqj8>ndvXw@fe4H_~{wX#7|U&>W{^GVUtj0f+^N1xhzrS6|aV$Ab;k7D3|C~w#b8=
zf-Liz8Hg|<P8_=a%t<mAVPvXYxGz7#;wYt*2SyHTX3-uX5*T5C)~Sfnv+G3hUkVxe
zGCB+%Qp;>UlUGG)GL)u2OB#{dif7b2bH)#x_$%m@wP#JkSaFNO9v0V3k4TYWw+v?X
zdfz&4GnD#!do~%yu46CQYMUqI%i<dXay)+28v=QA);>9Y(`z2Ln#YZAd}|;}^A1cq
zTd0Vrv{=(~M_RQMGnG?q;RaBp-+CwHk8aNHxW~Tu`W^B0Gr+yc8GdXe9s`q4vkOdC
zDm%H`AfxeF@C&B0$Gk-o?1Q8+QlX5tcqv=%vU3J!B<B{7Tl>ouk~ze}?ypVbq$}Vp
zEK;8B>$M?lD5UcMne%6u^pvzog-BW)3p<Rgg3cUrepsfUv*K7Qsi?@WoeGm&J?tpn
zYnqaC{TNe`7{P~_6g3rJNC|_WA*pL<2KA^3BZYM`xU$ePy33d-kg=7kt<o~R@3Won
z3v&%0u>t17;5<*A?B_{`+-&nmw?qqTj!6ie$`+gNs%u==yu{6r7F;DO6dYx|K+ly$
zsl<Gdqb}WpHXSr)Ac^C~0q#}A3w5Fy!6GPw2S{P*_V$Hbd{RGg2uqr5GRY7OSdoM&
zalsNQpavEz@Qc3fgkMr+VG1q%2xD;fpgsHQmYH!<K=z@XC0S4-DfY)O;VNR;T1kdb
zb@V`CFinr4iwB#BAW{8fnT>HyQb^}!5fU1PO<1lW^NY)D!BTD417x<~P)}k>In?P{
zO$_iCUP1}=WayD>DFn;89(jg!OGY-K56bcf;bPH<#eAROC~3nPJX231y(F7lHXBkF
zHJ6<KP-b>}39~ubfJDEf2x1u_N?amC+)7ieJx6%WYU<T3JIcrjczBcQrF~dXK15E#
zxpuDq-i6Kh%=wjJg09i`k6gKgRUcwzhuBrR*qCHlmzw#xSeLFv(2?S&P1>GHMFd29
zjUPQ32G<jMT7DwzuWchL0+L4+ioB2+-LAPqb^<0k!tIPI&dC&;;g0r||2|z!pbqP&
zO>SM}S`MIJN6?jm>8|Abj@H$x!o2cQ+-u3=G45yclW_=K8ww+&0g1J&Ip+Be5o)s{
zKOl%2=hI2`wl1sW#(Oo1^7#U4Vx0~?pAEh`8GNa7$|y<SS0@Zdbk(`U$RI~FEr%~R
zfIQ^;_U+|6yUF76hzAkX1vE`aA<Gi9Yb}RlQKv<wv`ga@74BqkOLnic5qcRr;U4z3
zv{kGH3n|4~MFv3lzzc2!^5NZu(}$n@JvoX%W}pDc3~`#)-+|f{Ar~$)<py1`JZi8-
zcCI?*qP!w2yc#FG_YfFs8hNu}utrX15qZGW(Vx?E`m+mXYllSHwDDnrxa2$aGJ(+|
z2U6U6QF;2xp~<~C#~Xl=2ZIC*I^BMI3RMwVLTK*h`lWwyiPlHbvKC(WV^rvx7PhxA
zw%#=IN94+e_Z<kl3%i6)rT(qBtNu&m+`MVDI-Nav!J~LR-4Ly+#j3D3b$O{-ac#3S
zv{=w4F2?=S-kxX;$m|B|x>*8=BPWCCryezR*Pp!QchoIEUn55WIxfr@1A)}9dQd|(
zprU-Sj1uO1viNInG;*jYf}+9FN5C+W1^+~{ERYy@7J?L_X(fQ6ioEa&y?|ToB7iWC
zc;{oc-GSYL@gT^<)pIt~jhbpJ7LYR^cO606EZJn8W|QwKkni{P$>g7tJprv?pDaFA
z6XX#9n~x3KN_gokQl>a?oytCpz9ct(*D2CQHutfDdssvQP9+KGI(8vwl!V{&=}<-f
z^k@MO(X-%-3S47Z_tRvdG)KoqV0#_!+xU3GC+=5oteu+0M#r5jJQ)qJZDt6`HPFGZ
zLS97on*AWY)J~0SX{Sb7mXI3xI0GEf_GfoT*Y0ifBYN{9!1Tlu-U;fyh)_f5a+e!7
z6DJ+v2~OBQ$!3g42m;kQD?9JnWi*iKyssx90%dnyJc}ZU^Ge_`^Sm_0?xS+Bp|taE
zk)9j9izEh@7|3#lAMFy1>vDW6sao@x%DcIHrccqkyY9^68u28i`YP%?h8(=AUXo^D
z?NGRYQL9Xzg?S_i^lCcQiK$V`l24Jes|m^ue#NVlG4_oG974ua@t*AgPqKy$2at4@
z8+v#)PiG-+(8`?z5yQ_TSHQ^kFMNoJ%DPQXIKmYY{*IBw%L{N|nxYI_eT<a!G6OZ`
z8NLzcCJnSotMPNpH1#3^xt3Ha#j;5A#DrO*QF^g%XVus>W-^@hVk?m})$Rl(rtAg7
z8bwR{zYZk&unyi}uh<+9EAqA^3k`)5i;v(iHX&S1=g~P>q{|4#vk3||tP5ZtpMvFf
zODj|5OepIPEX^}7S)en%0K5>-3QOeT1{*6|3@TM`7D?$%IG}&wi07jqWDdI`*K-Cq
zgFahBDyc>y#j?Y9STu11zGSN2Nh_TWv>;H&SMi&^Y5a%#g@0HO|6ylq_xV;b{=?4J
zD*n?SGycQfLO=X*V<}{XQn*7Th2IKy4WnR0QMhY#f^=|#P7H#G8xHf%v0|(B_io9G
zQh<^Rlokwp(yN?%E*uIRgX-S#3a&k}3+4L=sljOd5rP&NsPO_=nQ%_?yT)T3-Hyi|
zI<J>C6kS%Zf~paufUbGe4fUCfPSDB<KcE}<fgKL>Ss6i0d_>m`N4FAG8a69QR0Ihk
z;YS2)al~S>Q~&$3IwM4tsV*LUXsVEG${Y$9Y8!h(vp0vIN-|F>@)#*SG2(z=&~Sy4
z>GHV?(tTKQj!}mOr9;V)Ks1XMEd0<L14S?)w8scDN^?(@Ksp&QfI3SEB_IdkDx+gq
z4+c@4OEgUdNok^>zaA<Q0xhvt%4Pu}anb&o2L@v^q*jt@3QrGeJy^68VPEBLv0`YT
zrf+Weg?ooypho0{UufL8Xf?iv)Q-b@Atm(qX=N~-QmT_#3={+s!G!m-5;7dnw~6SG
zEkxyEomt@=>N1{#v;NF^+xX~o-!?l={p1%1|7muruUd_|^Quv+o^=}Vve9n*B?1cY
z>g)|1LipRkXU<96X&&>WR$rZ*bs=dLgi?6?>ZJRYgbL?4h=n6qK9~lrq)aSZ$!Js=
zagH688FLQ)g%#xS;!47Ca7KkmCu0yZS_s|o4r0Ug>^`9_ez%b=?h~3~6^5dao;?f=
z=*1mYLh1X@DLqDwxaE~hIw+B|3jFc}Z$*yW=!!EJb!nxw&3U0f@?t>TAq0d*X!=e=
zLEJ481YxRKAs}*q!2if$8|FoAutN?k4A)@(Ef~l<K+8XRyn=uC@Bs#5-QB|oJZKbw
z2M!|ekQ4h|g6RJ{VCX+0g#Ke7YIlfrh^6$yOdtF|8<P#;?X`O=QI0(=FXJV(f?F<I
zjZ-DVL*PPVv~w7V&=PEwhzIG!|Hlla)(emxQJ}=u_0z4(mRCq*7Sal*%avF-sR3VV
zQ3LD#HF*y_PsI2tH04JJQ+`Y=<wt~4ekdemjzRJ`NXp*}ka9kfa<&2?!Ad?5obnwP
z8k>c`RL0w~4916W`XJ4q5=5#<Ga*kO7^*o%V_W|sz~p=5C4UpR<P~i015ZW@HFpI@
z{U~76-#r#}`p9`t7BtqGlg5hIiJqd#O%_Nz`-dQi=l%+q`H_K{TZ@5~g^GWuFIJLd
z;;Vs-9|d4o8yE>cqcq}hHsJ7lw{~-ti&^FEgQeke@3AjMkHon=42PU;u$=!nd+-=W
z`_Vtbc)xI0&F}r-g6HO{1*2c4p~L@VrZ%`9^xg7$AY>hI`oq0Fx+Ugc7~F5d%OF*V
zl>&O^T_7uCI3aKX@!TxJj!$gBDt2M^^`vChYA4C6R-{n%79{M+L_vCvgaKqOX7U6N
zWLOdkPG`bo(5{{69X<EY8RCTT4HZm>WsD48`z~%_`2GfUVG$e?0xqf|b2MqU^QEZ<
zALN4(O(Xgqtmrc7OzA!dVWT3`bFVB*Vry7@!=(Js*-$hZ-JM{ya+}fO5xTms(1;a`
zD%$EFOq6}TCW`;%4uZbl7Oj`sd*?zjDi;-mH{?#@xEB4g57O`&)?LJJPvt`m4`@Ks
zOxApjOOwEj)Ocpr@^Wr8K&{4}F|h9JRpHzg31u=PA)x~I<jf#=DGYuOh|<I>-jto&
zvU5>NVwYt&%86m7flbiez84D!rUY!@O9Bm?7>I$tr6Gh`pO!^2%0oUPd?F|&6YhwH
z<#7;?sNV^|BO@iTYoA1dbxdrcg5Mmxv~eYq*oB`mDxdIR(OSPL?dO6ly`+>1<&xTI
zf-Bt>TWMkfE71bZL1#4~mtX+baaQI7FXcinWrHt4vDmda1;c<-60)b5htgy<IC}?(
zsldK}$!7wxj(!!aJ_8IT<4Sbi?dM$l4~=iZ)0GPnlo=F&kNAg&+t)N;F#>!N!s|+)
zhO@PdgbgqJ-aw2Kgdm5$S!lFK77+LtGOQ5-bcIcmh6sjW#}rsdJ+3l|!xaNv-=t}X
zs2kY?21m`oGS0+NCg@C=3tiZ5GH}nBfGos%O0DS3^_=JUtyitRX6@B#tyN-m{^=cT
zZ<TDdgn`H)r`2eYFaW@;>pu&D!+2g)Kh`&QOV2iUGm_929Q%_}QG9i8N;+SeDl7h6
z0``KxZf^ao_zN@00v@wh0-N{hr<baP6sY096qmdo*?ra>75iQ-r)(T^8NXzyjkyVw
z05&uvrjLG9?nigp*p&Y#$Vd6ZJLlh0`SByf|5B`>1}7#=VlRHFmmKt<R62aQQBoWF
zCp*2^Z=0Kj5=hx!$2Ry9!z`Hz%=>_YlL?6FZM>d1EJDvq=~}k7jI7!=lM3ep4n3T-
zRt8EbXPk1DS8dCW(YUd=wuwMZu!Q<S9FM$-H@Lbt@yL;=6BNa0=-mYA44GN(FVRkM
z;cmbpWXkVU6&1V~cWntCBLn5@PCUl)BsmK{Z)wKkPN;UdtW*+SP~m!fbr-%_2kzYb
zf8Dx%s4g3AV^Bg=Mmzul;R6E@J}&s+?+AF%hK?1p+OuvSGB~oVl`uzbn6V>(bwmFD
ztCzsz03Uum=;7}O@bDWg0UnaPC9o{wVq`)tMhinjYL}K2;-c-3>)LmU%B?oXf1o=<
zJ3)8U@+Z~Ng=ky%+8-=%Czz8@co#ev5M&N;*!K?unFH447)X38kjNEC<X3`3=E0p2
z5e!hBC2bY6DWYY(sIVU);{ue%Of1Gz?cPTKO?YF%f`n5<NRQwezn7#{RHmgPI~+F8
zHR0=%3k{oE*`+GCnu!S0=Tjsna-<P7Q$sjDb3Qwt;TgLG0uBA_jAs$~D@rIwDh^?A
zkO&R^u{Viii`KsiuFd3vTbamgPAdnnZNBlq9F`b&Xx8od`$nyM((aucw>}niS?T+@
z&+76mVf~N=MPtJpGW=oD7Au!LU6Y+wXndaq_C0DDB-S6V*5vXu_P9zT7V~2u^?e;s
z-vi_HG1L{|>p4yacGg0GtM4A)>H!d@=A%pHgG()o{jwaml$!r<g)MboXsMSk6Tnir
z5RDRADi^>_CU{(|sTP7CeLIY)$AXxeH($~)%|Kn!b7H7igE}Hb$m;Ka>XSC5T@I<|
z9zZ=0fzy)%)3X4j=U+DzcY=!gHb_xR4Phou)E&T%9ue#4j`%-4#zJk}2?uCC{?EpX
z-Hlxv|7UCC`OXUe=iA`_z!`3U^b8QBXLA#8BV%s_Zy22&8G5IRk@_%Mgbaokentb%
z0&vj^EQgp2SkcHA5x^^9_TZDgKXJobhXkFnFq{q*I>*?EbO<LE0#-~3_9ABASU!A|
zPu6*W0SPJ4xpW<2l#8(!qt)!QM4KXJv8XX5!;=UDTNSZ;$nd<6GX6jf1DqZ{MHpgI
z<&2;(EW@l$;Ew3FK#c>~@?(O3NjCwiG6q35VJgDr&S?cUfYc9nY=t8_C;c^)mosEq
zhrw5d>mDklj6+_49eIw~0qWDS=c`b&L|36TsfB_n7nOpB9wcdOG?x{G5mZXi=q0#L
z93UMG2xJ-^D}pg1O9;j!a>Vh&F1ZZC5}e1^IN@ReVPV3VdI0%+Tv{Qno(Zp`h$Yej
zqhWMTUUxrK+YN-oIBlQ2Z`KhK1EDe+PVMCMW4rn0ZP$5w(yBMw9jAI+ho{HgcJtL4
zh=3jENwotxpWurEe0+2oznr!koep7U9G$kBP#P59t{!)rjSlRj<67&i-aLL&cAy03
z_@wK!nn%qpB<r4(saBD#VB~X7UOPvPcI_?vtiEctn%$4ol-JGfF;?~(YO6XRb%Bt2
z)~dFh)3f&JNvGjJ#|nt_QEOG3M~!*~aXgMsoW}dcao6d*t+rZ;j-GrtZnUxSRxe*Q
zpp}Rgf;DkJ>&-Su!Pp`Ftp<Y!4Q!R2&S|68#4n9s8ql3;`(s%Y+G)HygGBJssaKDx
zZyKF~Q(Q1AFfX;UcH;;e4I|Mxd)4VSyJy{o^XBBFPGb#nIa=U%fU#?xbZEfNIt>u=
zs@*D84Q0S+b@uW5tFum%#tlv2+wHT{Zu8{01e5#$1_oM*a42;e;FDwSEez60`y-Z$
z<3JNoc0Rmq5Jt-X&)%1|w{0W|&u9ILPW+5Q$`mEpPO_`ZOG~uP8C|a^CC+Si`G6!O
zAtnh9L0Zxz-rxS#(I?OVK^=CIv7W>vfj+CNtE;N(0GJZA8VH3pKyB~LE(RR}5YOys
zilgS613b<5D7eZoHuGbv-K@fbwc1z!+Qfc;Y(S^yG;3TZ4AVkiTL3kMxiwl&(RwW!
z`#-dB<gzqiK)WTeg}}D=R>B;yf8~MymdGz@FU9~6Q?V(daR8OWAP3}u#N0x-BpsLz
z4$>aF+PGNq6`hykcsjYz(ntLfMh&c`Cm5R=fG;SUd6x~&CKh!kFt_{^<UH&E54p9J
z?p5|k{`RdmXFr|5qQ5$5wE3HQ4TR@)3k2GqeJKr1^V_nCT`2e~{6FU&ol3YDOsw0c
zwQ=u*YNB@)pdt4lu2Ma2U&!t|$elCS+lePY$+xnvS;If*2Q~6TtEuQL$98!+gB1~|
zCv%O$WVy8!VE@rIMVRXf5@aQ+t*ps{@-hNWDs=OPp}~~5%{X^;C%S2Sps7$huLr?p
zvV0m}9|pg~lULF7rTUJp4)jH}VuJ^jZdRi^JF1p=DHpwSdi-POu<<t_BAe>b-thrH
zQca@q@^WQpi~`qd>t@&h>N)d-Gc}yFVOaf8PWNZ&^+kF?g?>+-xQO`V31f&er5Yhe
zO)hgqA@NEzphXvYShDd7e(s=E)Jx3fvq{f)ImO9EOi%by<>w{GC-x%dM<3zek6ZpN
zVQEzLu9Ek?3-62~%SC4=sLN8%e%#z-fWbEU=HQ#EnZR94MY4mtv$04?Zz4I(d)h@U
zykhc#{j;yw`5$)0elBJAlY&P6=2^h>$J(MxOWCuoAI~VfhW@O23SA6hgope{d`0@m
zWDLTanmTCY?~ZEtU%pyNUM<;GuR6zAVKJSKVJm+3@t;9LZ*!=_)Xy@Cpkn>cx6fa^
zc%IV#Jm2{Ci~i@&8UK0p+4{!%mmttY1oqw@9~^=>dwy~P;$lYh$h>|?>7&Qn7)adc
zBD$PSLhW(wIfnBJZ!<aF75-f5l2oJ&S?LOIG8<K_*KCBV^7RGN`VBes?g$Bk&S|r8
zu%+MjQ*ZZ=&tDxhixRx&&VZa1B%RmO`2-W%I|j69zAHZ`z`$eDz9llr6%;qYeQ!a;
zp<uMr310!qv0$|Ypg{*6&Ys*o<^X`ol!1Zt9m!u;t3VnIs2f2rP?3-L@o`JiguAY?
zoaCpRre5tuXUZ%^o?+yVlVHr(ZA=fK%*2EfVv|SCs~V^|rOHUNg;YRa(*6e1fY2+s
zLjSaUgC09dLWi@ZCKkWjv4gGdE~zDw!@w|hjYN*E0;1}6n_ftFd)2sZ*KJMgvhhCK
zCOQqvbUK&`X!I{M{}a-OaTJd*uUev$0@X(#)(idKm?F|UG~?6nj2Aw>8%vKUen&Dh
z4eMk&F@++hPz2|jr_ha4h@uxe@BgLWud7E1Fki6Kl}~nbK^})uf+mpgKB4<s%XI~Y
zi^7blp<tsL&3J>mb=FgL^lXk8o_f3=m*fZ;zuiIWN9^n)Grk&;F-kgUN4D)OGa3tl
zj6q;=rxc~OQ0wT-o=U042l42#T<ZtNM{m54a#T?i#FA_gSfryykVMN^;Q&q`)?Yhm
z{Spwd#*7JA+)28cOVc{q*#`*{RIW;p8c0jfXVXI6VH>OcOX6nXE|#JHiuW-$w_LP;
zF%-d~<U!E|>3<6Y>GT5l$Jj8~mLQ)T94x*6-ob%K%c(cgXu&Z9OtbfhsW0ge)9OUQ
z-2hBCMn1-%Ao75@<MG0~*k=z?dR5Yf9t$9_v;=wFg;T+=3s#Vi036HC4(Z3L4jIsg
z1eajTa;F3m<i#=J=dS3JK$VLN_Un71g0_TGAm+AR*m{$m>&5Zd_ExMDLPmBsCQ=zS
zesyPOy@t1Oox|qYTRd{wWEI?beSC^~`18XfR>i}=crVy&SG^&DWEYv>N)`g~>})=U
z`IG`S#mA*c?q|m|v7)P}GJVBf=wY=ICRJ`aftSp<D%6w4&KT|4dF)&tq<EYiQ9&kp
zrCM?8AVyTGX^w{uuyV(t=QLadpZ)AH;B#+?=p2lEwC|n>ZId>_G>fjeT=OA@b?iGd
zLssT`E?3Mug2r2M$<Z=Qr)pcOE!41y+fwNb372XDzW^rHq=ZDoY1^)~vWtbs)&G_y
z|HqC-Kd;ifW}|A&DL_N#S2tC`hIW+MK`!qhs*Z5_@^@o4jce*h8x6;!9=d29OwH37
zP}5P|cb^{YA52Ls&t}px%26wymdd3i3m|>6^JyUp;a#|lMy=CLMU>7&d%3wW>H%|=
zl&|b5olfkLpVd<D%v|@2UaEXDWgR(c!b7gpGY5jf$yL<(i`LE)hh}pI78486&Ww>;
zN#L(7D*z6gpi3f?ec=flc(<f4PBxDR%cfE)s$M!m9Y?Xd!aHVnq<77Zl1Yy8xiZvP
ztHTi<4MJ7yyaSTry{5fVS?81?7~odAv(N))C$*B}_cHkaz6;=>xlI5y%;S0CH7g!5
zQ(iSW0Wnz0tQnmT>vr2x+lTk)d#!fs_=uPa6aA}`Ttyet<BJQB6#skuf5`F9ezbv)
z*6Zu4f~NY@07TMd4HP%#r%vl&gEB8(^A`{=1mbk+3K#PKmqp%RO+=OyfKA{h)#t?|
z3?+Q6p1CvzD$I^WRg6Yaa^*t^{MU}`>re!LfM1I#b8JZ*^$K5X@22Opx;sk85_Wp@
z2P1x{9=`<lE*0E+Kb;uxu3^6SY@dC}LFcEj)UNt!M)G`5=OM_WQEGlfWd0SN1XIKs
zcOMGPONHqdaw?W{d1{BGymph@VK+u*CN+_fnJq>OAM-Gr3Fbr}cYLorIW);-4_(BN
zm7!><n5Z%snADM1!3UKZh5T~)eq=VWQcF2$K@2cbi9YDUBL&EE(9fr1zuupoiS!|4
zG}LOTGMbo1Mh_zwAvf$5yX{o1Ga}um>Ow-OvfS_vSAO8-@#=2A|G85p<$|k5V4s|+
zs-7bkO3>*#4~Iugbqe;PH)))m9*bQXI3J@%yEO6#j@M#rCzH$BMwr*L8?q%oj@F7A
z?qF~VM}25)kh~lQr|6>tc_fc$06>$zh$g564hDnFFuu+l!4HOwdh@7hb;g8$4%<=_
z>-1morZO@9T72H5lA%$y5@;kEacVOPM8Kgm0-aD74JP_^aXeC|q%6O)BXlyB6n1CN
zXk8_HdKW}js1|Plvq@4r;M5-h#?l9UL#`{NX<}GtN{R(9!zt7Sl00As7YrZqK|082
zd=)W#wL@m(iQf)GN9{zZ6fbA!cg;?=7!xC@>)Z}n>opw4qh<BGQnk}9$cGM-Qzip~
z>(pM9;i+Z`_sMj(DDgomvy)iv)?7bTs4tfLq4C5o*m~JT#r=1dP5z-+6jbg}o+i`|
zF-YZkSrfOOZ7Us{6n{afycI#JRM#YVbhfi6LT(0Sw;XZi{0O~FIzKc{$+M=SgW6<f
zjBiX@s`N2}c2yE>&?jF-wOJjKp?ETlgUO}#jS_T8yPA+TwkG!~<;hLPG3V`cvRrmU
z<Q-zl!er$X#48JXVI8#4p(s*8GHc4z9WUy;9oS<PH^WtmlCj3S+v)W)#F1)eNFP;L
z5jR{zQ&@^apya=XkN9C9-jf=*manimPwZ=#bY<gb*xX~96;Z*2;_);ZBDTnYLgQPs
z)k&^_u}6N57(2k`1e45(N*A+cPnhLH<Fh~|m#v9#+N-NE;wI916rG^}9{SP@^Ye_7
zWfyfX<H{r?Iso#|G^J5RpseoRRBH+=;foV~H^t4-(o~jj{D4l2VWp^#6ayR;V=~Pq
zLdy~(TfePHk`)!AmMvhn{Dn-FB~u35VSeGJ=p{Haq%t@)MYI^$7GZI&A3Q6r>5HN#
zHGiR$X^^5g-docgwhRbEQj+*^4Wd4YO?}4Y=uc8af092M{V`-+LyzvCwtaiGU_ISZ
z_JUSc2rgb?!%IQbCds8XX%l=uGvpGcb~u$v3uLK{2N|2mT}QAdEiY!^)^Wq~%}sC_
zk0`rgcyST+A`0KrpG}llK2q!F^4eM9W`Ay-8s1LE#O{;*Gvx*D_jydYgW3GqNh{p%
zL=h|<$a}J-r|M~8x^U=9SJ_!FP>Q8kY!wM4rMuFa9%!zOAg^RYb#Z5|sOHh$vTnE{
zOE09{aRQn2alJt-2{<#IKdYSdO*i4$TmeeXP9-)mEqe8GR`Q1MW}F&1JgE%g%YD0z
zISC%2_#@0BJ&59oK<O>#shuog>kn|8W>Y>e0BpsRe6d2g7kt;!nWNWD&X9SH_Z)bM
zn>`JHika)()H+duwmw-c2x-8nVn;BC?Z^x050}*Jh69Ybkn|=|*F=*t5hk<%yt!mo
zqH+HB`~N}Nos{wl%_O{8TOgnPvI?gO)qs`Lc?^Yn1+c;ec8W{@G7w{MZP_CmM{-z-
zL@I#?r#M!ME;Nim&-GrI<P2vVEjX5Olp4s5cOWQ<g*{Z{%jF>t#k@h>EINoAgS<Ma
zSeMbNgTPO`EFrx&qtB$G5ZkF%tgJ6Crlw|qCLomcf$;^7nT`Jus~UZ_a_do^<sDj0
zRf$&CnjKy-kENK}m`s9OB@CHNFr*A?p-9a3N!!RMChsdZ&?c=<jYpU&RH5#lZrDk;
zgrWeLcIkbcvc6R7@C*&3W>zv6!-lC(E1askrM2Nj?v^HgI+*muUVATfi;%&UMN!2X
zaY>0GKS_y$4K}C{Km;ps^WS3t`2UD%HO;Q(5yQd2v?*I$*+EaP)FH|5wk*p5pGXOd
z8wNv5aO}^9<HK+mPi`yxZW#{KWMxhA8k$Q;*`*9;(*XHpS4n3n<d(AP+Lbg=xezlg
zDR$<W+!d;yywsL_mpb(+LG72e9-H%F-pPb1V<e<5QyYLb8jaHyOcf|xO(8c$oFr==
z6A#2PY%&2Xx&Hu9BGOw?CBnc){T<-xD|NRhoe}kQ*oP><i#ju`gM<G)%Q35>?wCih
z6|~abv2I3VHTNOFpzy>EhyR>Kz=TkO?3ir^zez-Q77eDWz@a!$;|bM~p`K8@UW(3$
zHy(os^Q6E8U}VJblY#{0ENyY<8Y(t)Twd$ifFR3k5*;ph`#%k-KN-9?L6(P_;B#+9
zyrW95$g>ezlPnS+OvoNluU7ESDMu=`ZKTK#6TisQJ~dLBq=_Qz2>-ioqhwA+&FP@~
z?4g<!zm&<Gb=?k-ZB5lg$mDs+xoxLEs7ChEb&x%@NJE9KN#dgMEErGWi0OsnsSi7)
zmj|J#cn=%}LcyKmCNUe7PMVmaAx8tD<heCLQ4(tS8iT!V(S+Ec2cL~#Z<r_65U+y?
zB~m-a44aaSK&f6tAzzW;(!xp~xU3VQ@lG+$gKmshkSQ*75PRDOZ;zo-D`9<j56eQa
zqw?(KlObbRXxY~|p@Nl_qOZ26AaDmuzdGh~cWF4!FfnyPq|6UX+aTK3a8AEun$0Fg
z;#V@<u+ngQE972`>W17qes0pa+$&@DlpZA=t*KL$PGs&NCF)C{)q$J^0}vOZ#1_KL
z`@{%HwwEOaTf<OgTR9C52F6A1<2`JAT@%{{PRRY=#}T>zJ2)fvzZ{ZZ4#|JxLvsJ&
zPs#oJ9h3Wyc24epu7h%a;gfRzvmBNC4|P^<-0`gZcn@jOM)cHKXuAG=h>2!A4<aU-
z=F|~5R)%+7yt$7@^5h6d@$B+S52q%=`enT)p6`p-%|jw~F!x(=6<uQVWQ-R0j9z^?
z?!Fv%ztiLHVGb{wb}W8&*|i_^=&~FBlqZ+nZ>fXJsqm@Jt<)$#<*{{s(mXvn-hY5&
z%khA4k1Y^d$mTxBIrCGUR&+!`;d_W{zoVmr^Awq5a)`0x0VusIAoXGT+FH+c7*4NX
zLO}l~v(Ypfa*Jq+j5RpYJtj7yW2PU6d|@mj-iv^Zip_N#QvBFs2yMB0n{xrG4MOcg
zCjuab!+_*bAi993G2ZBf(`&S0cLZ$0keoZ*FNf)u!}M_uQ`e_!(ZkfK{g8*L)9@!e
zOr3tq9HwrCPjr~3NBOH|utbMw5>7F+LB$Sp0G}zZ+T=Q#_O2@Ch2x7AB%vJ^Vbi+3
zop&IYIFuXbXUBH&F6@~1Ti)KCI1D9*vA)1Cvg+S?D&YCUwF5*vFx1kl%-q3XT#APt
zgBN6}F~|dPGq1!)ZD;psctC@0xjCkDA~R>AuKwlE{8uLab36&x$ed=OtY2m0KR^5a
z*|+O%{O67DH@^MuOZ?|QWBlj;{1W<^s0=20`Y99fSv}1Ldd`XSJTG1z`v1tUdt}w-
zM$w~Rhrx|iOh#bYU6LVnWk@tOr_+tghGnCsLDQISNbCYHBwI;^yIZUoVk{MZN8@=n
zo(TB=?;r$hrQZ?5d9P7J%gly;w9;M@D^1Nx!9^()fJj<u1Y1?`h7|>?GG!_V15i6*
zoC<^FRy0mpw1q6|{MG9S1!V0wY6(K2_$&1<#Uak?dy9f)L;?qI28Oh{J8fxUm!<N{
z)Q|vyS2>y7b-b!`c%wu^V`UK^PDy3JujE<iiQ1DRQt9aaYWL0wWTPPn^&mLOq;xRl
zP4z`dmUc^AQ;nIyWW@9WK#_;zAk^(y?Wcy7*EkchRY}M=ySd}Q%L{=HyiRI8O3g<t
z%WjMxFGZz9jelTAhR~GM<Rzk(6E@QngWxuvP3t}xSZPykrog<|Df_O%!C29=@nCid
z;2wskJc*K_Wz;Q4Fotea=2nnA<8jp6gYz&RM*jl5?wn8q3CgRi?2-DTG36~pcxiii
z^yjci*vY~9o7PcEyzLG!6TKtl(rWMFoND$U@WI;%fIjJ6-IfbHeGf|KZfY$OTM1n6
zO-wkPnH{}K5i4&mwgAjv-ESVXo1K&MSHOvPD2d(c#$K~RrK?MJC^9(^j7Fxu<^?2U
zsoENWJsJ+Um5RWoP&N~k({jqBDE0brHwNrb(InZ{_5#Qv>c04Zabo&lT2YE=)0u>5
zN8A}i@7a@6TJgH-0J&ANvBB(OU9T>IXh_cE)9_+T`_edWPqPGfW_)3V?y+k#p@=6t
zTe<+oPDpUW9kzeitBZCtlIcDv6KOXbpx*(U2Jz%QdEntrezUhnv=plVK?_ClKEg1K
zy3wgM6!8s>;G4R<ZoG<TgFaG+KC&e-YO9bE>7k`OuCn{eUcX%j1u@@Z=k@u~-jW@o
z>-6~O4WQ&f>!`__XxVfo6(q#O5PCa}i)B*Hx`LlH;Z6ax!X-0~PMF#a6@dtm5L1kX
zM8tn4PoS9<?gl9krS;~9lW@9+LnGIvn*9biyg@pA$eN_S7~EkH2Yqrm3PU8l-LMy6
zzT4@a5<Bj_^3o-0S%+6Q3!PmEE|jw6CpSS)M5%Ek<4I91b?AjRq`Ado7X1=Q^G^v7
zl*6I)WK~*vL39QnlyzO?;zN>Nx*->2pGv6NQz>#KuawAOIn*Nw7qYj2!|MQwO7(mN
z<%?i234{Kv9Zaig;(<j*^=?Po%8bw?Y(O~%5hOs(_bOk;sKgKQSB16H7&a{l2e(!7
z62)65t7W2b3L-Uh!w5#VH5u_`#5kjJCXeB40KN`SgKOZ}l9)$g`cHJLlW>S)lJ$xZ
zSbdkT<<HMSpY-;3>C>{9N1r-?zImjHHe5sz!Z0^6y5FLrP9mzBSKr!uz^4OWDtU6c
zTGAlIh;(Q#8R)AgU5(u~ucE%<&wNck@U~Wu7YC2yXsd37s8Qqw;z<_ELEHzi`zGwK
zk~@;RUaaYhyesea#cF-_`bt;q!REtgbihResHV3%9@PbVJgwKozPcpDEz(0Ihn>LU
zP#wb%0SQS3#=-@|OFwkevNyb>yw(&|1f|(1`sa*y4+=FjD61w6byIIf-HVQm2UXUU
z>#M$7^2f$v(#2j{HTt)DFMO!vTyMxl?_Gtx_Z@|W9TIO6bM3*+wvJku61w$|W@qoX
z`TBLc(|X-Gd)qkc98+>;46NQf!T=QeYGaj(Wwa(<x85B#5i&qVlPS_sc~rlAnICY(
z4*{O7%c06e?8x{!o!N-OB82@8VTW{IHqlZEPIqa7^S68|=&~A|fGpd1+h})Ae`p+>
zH#?`z*UeMRUEQ7wET#;KG{~Yy$IW+p&66`2MTOkKqojiy7fyuE5SXfth6)Cr*iOZA
zbwNq>v63V#cL5Um7iL1^e6=H;MA6MW6Cg*wH6DnD#=&G@1%t#x>k43HlhwO{4G;zJ
zCdh-C0tz!F80DivTuC8|r4XTP^swv!eYWVStm6<f3h@zhwk&S$vbJ1GOR62@pf<M8
zb}Om7RnNHBEh+YF<4(MV8vOb)@}=u4J?q&9{oc6Cg!r1+ozSyw7%U92!ktjlZmbjK
zVQfXY-BX(kc@lG?_W4+|Sxsh=ozNu|=9pN_De}3L?;wwM`WIA1N5HCbu*}I@qmsNT
z{Acm8G#Cywsrl`?_!)(l4hS!u6uaqRkEfCqBY8&#QN2j>2a5kpGaT}{!=6XTwf1MD
ze!xB~W7Hry0)aqxl^U|eXp9rQ2X<l@qZtTYLO>NIG)jI>w{9~E>a>mE5y%U7=HgQ8
z!S32HRBIV})-sG7T-dnT;Ce>Q$ctg&hZ<6Ly_bb8)n$7ECdle5F#Y^YvP{DNptyj!
zI52hq8}%ePv2)?SyaiUu?9}@5<5J6;*veX3T$G5UP@P&(dm(k(=>>J+W<p=fAl5YD
zQj7F#Uu1WSvw@3ONK-27nIb9niBLR>=VeE6H>x^syq;&z?#@Fw(dRoqH22PqPdn|C
z=3eVHJZ=MT#iw>nJg=HCM68jN>1;R>5e|1Nuf&Edv5{NinJn?lCh*1U@Z$Hk4xVSv
zKgHU-_>^noDe+vEc<xhS-8$6%UTcz-$km_Zn{8R$2^9qbq6vIxt6U3px69Q>)X*%T
zHg@M{MHh8vn&nYs)&ZuokqP#a%{s8LmB-7wjW}tq(v2_|$Nf;8b!}J(Om^BXfUnQw
z+fi>Y>xa4THE|xp%wZWe^VnP4__FA-rVVn!NlX%JQhZ^<nurbx7V?fp8`{H+_+p$+
zS9$Z4h&O|{tFD#ReN;04CY_BS*i<6X(eBlK7M#D6X>T^=D3imO!c9>KmW_Sl9N=&?
zJtlSMGF_SR%LwIctLsF(+BCPAJw{Iy9Z^6e>(Xmv^zhZGW>k74KB`+<`1z~`|2)Tk
zUeFkc`(RyD^3NIes5Cyv`a-dHHupjWYcv9EUUx{{n@%FCgR66i_Ra9*f;+=>$Ml<x
zxMk85e9*g)h6il3Y)bYxrIh`Ws~O-S7Zu(F__C%U^Za?ARh|}q_59|!1%8S`MolF(
z1uZt_cz3~7vr0SC%+cW7#hZk*TEQTRbu+lR<ho#_W3s#s2N$cz0tX``Y`1m)*kAN;
z01Re({2K~;U=S}{SS9Wacgi|sZ7F5mbJ+NMQ|{oipH7<l&DZS;???Q%J%i)<rY3L(
z{4YPLVqy^2eZ}2WD{G>}-_J{qeatp@Y&zTJ=TDxPp55<ej9ZUBpp9M4tL6@(0J^VR
z@0$DbU|h9E@~D$hvB&KBJbCi&Q$bV(<91j1of=0uh$>ii8V3i&UuB_Mh5i@8b6^Rd
zEn4R}GTRH*d4Wvef^{^^-g#@z#jj^~tM>eE)quxcDzjn{M()l0Mb%Q3<>8R}yC_##
zojLAbzg%TfeM-8!O~crQmU<U=sLkVm>KNgD>B-E}ZWgvOw`hsR4jQL#n&~Ze-fjW!
zn<|O9WbsF*akS6EOqzC?9mfU+#2{e^cRVfg$#$wZ8|9QP4h+tm8Vk*?;v*e*fXB0Y
z)qZ}j+Ar=^n}|yZkd^DB5|tcG<)UoCe%XS((oNFFEANe=ETaGN<?r+Rf1sRw2Op4P
z|BsFDzx(!i+W+JGZ@>TI|MA8D<4^AY@v6~o`h7s;o2(y*Sn^mxH?`3lo-VtN8TyYC
zOd2`a$~rKZ9MmP87+$%_snO&zrCecYeiE|fk+H)wz6*_!x@k;ocZEQP12A`g%5dgK
zGUeB*5veD+3Z;KN+vXDvNVC4{5XQyO?58kN-?fw4!3=~lHqgz|AnfU231|t#*Ohd8
zl~DQ7B^mNVX|kQ1ptpp%@Eeg2e{w~alMR*UE*g;{F#P8X9Ves`2L@?e3eb{n5Xl4*
zYG623lR(VAXhVO8x_CXCpyCd16lI(2R3Z(5)Zh_XI-8{48$H;@I*hd3#-hT(hKO4R
zrmu2St=5D#8608XOR4KgX+KD2XEC}_po7QtB$`UC8f-c;V^!9x7t-b41+CU=$?G|v
zzk@WGq73PDp^F!NBLF}o{pl(79zo7B08MH8RL}#d?Np6QjWLy0A(!+zAgYhjZT=x$
zGKix1+%jxpfxCQhM+~i(&k`5yOkpf6-3;&2XuoWlx-*3Z7Aca~Tgq*QR!HAXq+P;P
zh;;nU5dm__5xwo;R{{c@Ic>J$lo2|PFIblifV+APJFI1V6f%AmMq@7h;1mpWiW_uc
z6V~^w#TzP-P~KFhv=GydY(?7+k%IPw-W4`<G#j3!s$yacCpEoOGrRmbAs#YTS=k~P
z)2wU}6i??AkzP9UOO@yjN|oqQIl;^+_IeOpCRU&d`<<j!KDU*0imPlHv@7t{9C)5m
z4FAP_X3}p4!M|=BWTTdSm-2@2y?R>kZnfauO2NCeoxEMDu!|;xcXo7*kovu4i>Syc
z=25ItB<&6!S!w&#(-6-ly3^OD!7^D_D)|)fAb4rUHY>r1?n_f-au2zSqZx3oliTC*
zwxJh%VzZfMFq(GavD*1HYk<2#xpLS5D+@1&3*I|FJ#8MHb=qggr;Rtw4jo^62aR@H
zX`{?B<JR0i-s_wnHBNu(H1_w8kJ^=YwV$d>Pbm5d-=uzBudne*oPcnP7lkzM^AXKD
zsb77aTbz%{(#6@mv21B;EU%))22-lM8BMnMg-<k**UV33?3&Cybh;2<MLUYn{B?B8
zrB}`J@G5YgFb1^f(<y^uqP>>%mQeAkfY=%vEOM9NYhN~K^te=1BFw5MuE-MyEogvc
zeo^0;XEyJ%XmInIc^xSbi@nYimf1LLXKC(>9>*1J=TH-A!-s`ZojsQS+yjEh=R%h6
zJeHkyz6nU_p*-D5Sw1m6tZv=6;nm!R?`~RihiRFJ310uFtxqwS)zX9c6r)-BU5#dK
zk<r*O(F(i|9BzD><y||UI6DOtdNHKOlx;0#1f|=|CMpE+`R742dhcFkktinPDf|q_
z3ze|MaXJqoR?plzw8}PVQ==&~uyfR9MoeLr$_9+TV8tT9C4lBOqP!5(eWt)yR4LUz
zYAyYU$(h^bYiYkmLAXMopl5Lf-!F^4PIwI6_bI=$8kJnXRA_)XnC67m`{<afQS1W8
zK%L65pW0Tc8QX`g6j|~$UA&j5-PG&=T*K&wxCK_Up~6ZlCZ!kNAiN}{<oq$^Q(`Lx
zb!vyplh0%S%2(Xu=>#LRbT}<@d{OcCtrP{->dwyUM(wBL^Uhx5=;-*YbJ%*vTwR+k
zD-RoIr?8Z5bE5L*{rE}XvHAyshq8H`gfK^(zIHY5-kR^L#yi}o-{vI8ou_bHns@B@
z3~ZJ9b9pICxNY&ec$a7!Tx3<tGp99+rA=mCS;>f#wZcW(mE3KXv;A0R37-D3mY^(m
zc8?_p`u$J41S=*gL*`QBkFX{ykH03v+2GTz$=V-$P1YWNP5RM?hg_2d&0pkY@aMPx
zkeuhDCLnX{KVD?)Kc20B`|Q~l`;R}R{m1IF^^Nr}Mj&Q)|9ILsIcc8y?Lf>+uPI2{
zq(g4d(pDW!N0JU4>xzs$1|^i&7vlu=g1dU<_9R_CJmNT;7m&M-QtL)TzWi4H+LXNk
zeYP=13j5{GoKwj!=+}G}Fw!HVWEj<yNsTWWs*=Og);GKMy|=8O+nt0QxM48J>uYoK
z2qbVi;Rty1<DLR;9)#0x64|)G`mb(J!;4oDt`NrsBx#B67S}<Re}G}ho64GL7}}K`
zEQ|k34dSB;qdFhdS$sC&OoevMUxHs#im;s6u(E;0;w{Qb)g?RL%G>5?>kI|Z)|;b4
zyzw}OecL!YK5cs{72M2QE0LO&1UxT;c{kta0Jx(*Wex}i+e<mFBc?Q_Stnh3wtsqD
z%WFciMR~7nJ-5G%njx3@5y&km_gfc7@$`WDo<CP$PCH3BAkvkcz$hN^9cIT1O3NUm
zASR?=)1}o+xlXMT11fHRcE*Td$EY?{J_xBT%j?#0K*byWtk%$Owa!I~HIz=9Xdf`A
zO)j!Pn}v?DPSZ}w1$rp-=XH8;O6K1c*<Ix0=Zavj@0`B)WvOY{yG6HJG!N7EVvR2g
zh2AFoM5}MbpJ?@E$G$LcDLjx3$tJk6<M)2|VMV0!U34Uor6|sg`@<UXxx*``B)5j4
z7lVrw6MG*mk_2&C6bx>Zs;ALp9*{Ee2!Nx$4kmrYzH!_+`0~#gW~r3?HFsxS!Q@>K
z+cD>=b3)b-WbF#j%{9Ad{+c3RTu8m%vZ7%;-tgsh!-7-DgmKB43zg$IFW!q4A7Z2y
z@}q0rond?LA`56s<Yb;)P+kbo%$Eus@F+rnY1%ItJ(LjO-o|;PfJYMoxaWTdDZqiT
z0jPB=G;bC11cCBRl$8yfW=abOo?2g|gFmlyAbAOA)xSstUnGJ*r$nHyJ^moFL6ODI
zecAUfjQSTweX&f0sT+XLd;dqbpLa_DSbYDt{_V3Dnf!mxH@^6P|0(bPz88m4?+S1K
zjt3xj_O8VCkRQ}ze(+zH!<+gjWE;gy{qOkx@Krd9ft&hU(2Kib`xpGEcj;Mu0~!dT
z0nquOzls0&a{I{Sg~XBUy`y^UOG5ZK1|G{k_fR2h|FM#ZI)0UsE+40D7zI(`9an-$
z;CW^S%5r0lU5Au&SeRY%$0I$UsQDFHwVG3^-8gKrS>Zbl4Wq&^+WqEKuBfSz#o{3a
zh}v(xrYQYqoul(Z5IFZJ;7YsG!6HXZ3?X&gIS0x0>}|8N*Jz)04x49hkM}#rujMev
z&;$z6OSh5cJIVliK!m>me0J~?WgDXdL**%uLdMRHG@+5dmzR~Opx^Ls;<<8NTRNk-
z!@=~8DUQ#@CC|9#-Y9Bw3=1M{HhO9qguSzu&!Y2x%=!HxW@IUS@~319eV#dalI-0J
zO-oJscZH{)1p;|-jz22|J|zIyXZO|Z52W792SRFob$KOLV!{iEK8gwX5~1sN690=(
zy@|XJ;><#VV4?iCvGMGAI{*8#jc4o6zsP@oLiulF<36I>#s*4%FBX&Ea$0#N{uWI^
z7CVYCdIAgtfBhF;bSCvd81|ET(5uhh|2s&4O%zw~HY!gRv<vv8LIod;0Uw%#*yv{f
zM-Ig{Qu?EuyH{Z6*H#|xZ)ruGAR=!F&N4oTF9G=du*XRf0}N#kIDE;;PvJWz#l*Pu
za5l|GlXx(|Vk*DP0}gDBfQ1X*p*y5bs3@w8#p3`?x*YJEB_CU~pG-APwxasCCq3U|
zUIdI*r0=o|hT>c~dwbm6d^etufDKQ#a!TvaMDX1N6>@K+;gz~OcH6fumtjt-j6FSB
zhUIZ+*WC5G?KcLJRm=3D-{^kqvTk3gve3}mAlZ*5Kmi(@L4L<f1|%r!&8tXO`Wxm}
zu)B~UQ4Sg{*lHklgFJs!v4>o<$5fT72g`rfnrTZVm|&epg%-dYd8jI0ni84tMb*?L
z?4~M9Q>!kY#@C0zFYyFsWw(<FmXzC$C)AV7fJ;-5A(a-Jg)a`%9Vg-o{8TU(Uo<H>
zonv!hDQ81hn4N?kb%JR}Mkwi^<;ylSw~MzLQ(4_NCq^>Qs%fnhQsm?PV>&`$Z_lFS
zN_0aw$AMtWliH<vbm_UOpC!J{3<pEM(+!hphr@?$>m6P54@X_uA5f1RSB!7b7i(Q3
zwQ7(a*zLKS!0u0C(QM>~JWCI0mt$U87+6?6H2_&#?MxpD;to&4f6jm=^zH3mVrYR&
zcBuC<O=X9vTPma?0Qy#<c7{dL?A@ZNrBhN-X-`RxZg-3^dXmQFBt*DY<PI`zkhTz?
zIxSb#Za_Yc$p9}5W~`}Xo346nNS2p%cf9w3K+BHw??!Jr3kD1^O}29yqpm5qram&a
ztrK>0({4uZ=3815GX<D~JE=SinPncW-V0SHU#a`*ullFK;_A-M`kb~cQk8ApetC!X
zD>69vQR4tu%I3aI+SU3;Q;6SwqaNIYvAVmXd&%H>)xeXtOEET-T6=EDta_@44A!~J
zMtwNLf!|sF;BF8C0IA`p;gdsGs#UDyKRk9+dJCO$er*LM5srq62ZrL`N8ey{C`wIS
z6W5`*g5v=toqjAPvk{(Gg3`5d{0At|kFg4~(3Y(iWx@PL=HI-GV@^Z_r2P`3PQ|m!
zE6Q<{=(wE#$TrZ>(Ag(>P^afvP*;m!tx-#Y2(VY-yTnH4?e0$g>by*Cgw4wHmGs`I
zRt=;RoS3wtmBB<+^w?w&UsGDiYi;rhbREsccy*{M2SV;fmo=H=D6+5$4$-I=$f&pu
zCp8Jk#d~@MU1>^DLxFPiUPlI&G1hbvCea<lz4vzWZf+ze1v>n_jju5|SB-_$e1vuc
zt(q4kC|gtZ?(290ZACK2Xzl@frui8@Ke8K_$v~Pp*iEmMWCK%``?`HU8<?T&m|p0m
z!K6hR%8dKYLvlBf8It?F^pNbVN%oL(>lge<?wLdHnFg3@n4!tlGzE};<v}#nyWGic
z-CQ7+shkHl&3I?nTHCzVZEt5McPO<C%Uly#Ue1cvE4g4lH)k_7L<)q+K($~nu+r&0
zvy{K6*2=w1GtvdsHhA676-_vZhOpxW4^`2T)8m5!%q?=zI&7U`K>5_f^oh;Q>`~I6
zX$W+5r%2QlU+EKZ4uh1}z=}SeEnR_(92@la;$&KVD%p+P0#&iiqjX_VO-+hGssQ71
zedXt2)p^XJ7m;gSR7nBuMk}P7{=MlXCs~5@QdE|5Y}_jz)KSNQdZNeeBXyf@D>Q+s
zO)lw4VH0yH=22W&-%r7_kScwa5X2<!<pU3%sp4T-*CMzqup0SrTM$ZjhE3jJ#bOKZ
zpTU#pA|LK0=U0{w@J&==_dOBYKQ)ddTmt=(zBmbV_nWWI-^hy#c%R^1M31e0`(bx>
zS;1ONf;cl=IGKp;ZSgfA=)3V`Q+$oT@Mw+tm13=TwT|y9Rjfd5VSvglPt!m<^P6@F
z>jF*JlW=;$vF@Tc+1$JeZ)E-s`iZv)7g0D+R`!|MlHf~2`Wk33ebK6GdRU=n{csqJ
zE(c-f0^e8?G?l4-U1s*4uCzKozbhl0m*}cK8)e~<3eby~@k8U(rA6KnX@z^XL>}?-
z7S2&(c*~}=Ao=al`JMk-iLM2sw}hVjcIFjIGhsOZ%~8>%%caD7e;HdG$t#f(#)=!7
zUoIs`dC*+bP8ZJ&8|_1Mt#G$#GZyqBy9Y~ic`|Q>EPv*mY~~;<F31Og2PC>X7bJ%D
z^MORG1uE8&kP8y0L+9N`b&5#u=m)=Ceun;e;WSv~5yRp=^i6U82v$~vWOc239Qh`%
zxvWju-^0l$98jP;_J858uvf>$7sP2cyOUvY=<;}NO}uWsJ8W)hUv4yOfGW45Hwc4K
z4Rc&w1>;1%A7Q>L*8U_{@odmXo!EF1e~9|5&_W&6dm8jhpRCRE+1#{GYjr*CJlL9d
zv(3KF*6p_Zn!UQF%RI3o{fE}9vYVTlOgrLYP5QNsta^VfQ7>h3ZU+*VT#nBMko}!z
zqY48@EHnHJ7sS8oY->O?<kc<d`e_t=Sy|)Z%H#BoD6y^3ji&r`qsEA~xrw8bqvDN=
z(=5i;hd@z19&AhwhS2st@Lfjlu`)9tAaw3L8<*d{_plXO)NFs;MRA0uv=EZG({M4Q
z<3*909NQyeO3t7j6iwtD4YsTm(fWh*m3;xJe^^SN5`c3E{k=dT=2Ijm82sJF`Dv+7
z`Qbbg@mVY4UjU<6jp*LE6!8xajaPJ@K==PvoI`H6Mu{(6E1A#5Yj{oJ)My*p?SYd3
zZf@EIYd(v+{Hk`r$K`Oe`6Lq-2DE*SZ96u1&g^T}y6pORB3>nN6y8i3$_m;~Ge$WX
zvmv%z6pL;YO+s|1k4w`^jO2I0Ny%@{>fsZ6bxY3F!(}bLhaaCWdvMHlIJrF3?mQfB
z=Wtvp#7+;~S<w6Ph>wOfp3PA>kq6-AOEZ`EQGYx~g}iI<kCr0?&eWGYywyBq3AY`E
z2XQ?1*~{ao3YOfqW|=x>Bl)M4xtVus+qancwRI_tK;F4*eKM6#@i;WbrX?fTk+C(G
zLPRB`^kHk!Nex_F#UgY7)b%a-bz2cT4giG#S*ly&=~Le6I)096K`}kh0)N#lM2h9%
z7Wf#~E!;&TqS7jUoJ(--4EfBVyZc~5zkz>gzBrG=y}xp3z2>l|KAZA|xSKlz+@<2b
zLugyEsq7$XY@zDeEc17mE^Ri}k_yjv55-ApN&OPXoCADqWsX2ARV0f@v`8V@cP$i`
zF;G)Dzh)z8@7l>dZW26SZqW?(B2d<;C|+I#l%Mbto0RsHBN&4oQFjo6RC)<?gM83N
zeae>$U(x5%aqq%=cF{^2JQ7%=i(rtL_@i^is08Tf!f6BQlDTI%Li1fr|BPcb@sLG!
z=RAy#%Wxy^P8Hi_XS6NnFDG``{$WoHXMp#p6p#Vp*fA2V@nPW;@~n(t)5l>_!!H*>
zG@#73@pu{y<&tr8%)yF){kO)!^r~)YAlSfJTG}F>BVL6!S5X(e^`v$pnT3`)#@_Ma
zNe2fx54sL4_m7Xx=1Bpi%qn<$qg(TIkqUr}Xm|mDeoUlTY$|t{OlmsPjzqH;!VVb-
zyMkKYl0Ubx7PAD1uuFODfE7CMK@Oh+JJ9kB*PV_a`}jzllOwSXjogDudUhJO&b!FR
zj*3gDK!C1eQ$BB<V~3%TWnARNmE-;;FqXSbI??W^oCZr;wq@r~wVJjk<_<{5n38=T
zk0gZI$jZ~~f`(*1i>i_{QxizaDg|9b7Uqw)GFT@Ge;fB9H0MD6rC8@N%AhM3(IlC6
zWR2}CG<GHXP={9bgK2OwonSzggd$Mp_1qlc5<A0qDk_vTbk%-ZT{E~;T`}KPNZ>g~
zm_4ztcGgqu4zNg0U$RkKnqhinmoj9_Ohr9=x?`b!ijsNNMcaKxMU>>Kh!pKBHS5$g
z%{Z-9d2AB;N0FTFLOE>(ouuD|(vx$B+O)AWX{0}eh1?_Um7Lun75aBqZd*Hp9e`8P
z-~A;{t9i!CJQ4Gg-l>(u`#c+Y59hErep%=8=eY`cOm-)-A|-efLwAy{?edFp9`};Y
z3zc4aQu5K$o$SHzDelM~gv{ro`yX0AR*nvS4!u}Dz<>ApvBD<O)JO2g)@N6$&AUY^
zrA({cMJ(`AaN#4)chuEAq$$%vY~k-sp0?<<q}6~YiA0g8Nbm5_4D+9bfR?wnx+@)>
z$NaTDF6{Hw&-Yx!Jol|f$Ma9B5xScW$E!yw)_3H%>8KV-P#`saX}yhfR4uJM!U@C|
z6|P<V^y(-x#XD=97E&r@wL%hbD}4+EHJum&YU^~kLUd87d__KqHh`+^l6u}6O|h(O
zrz-v`T;NuB*Q-wIm9;gxvl+*va8fph2$68qUq#D8jz)#?Idm8$Rcsu?M7x8~P2*Cy
zdox##_MY{~xgT>x3_0(^AbBrrt&dn08(W`T?9mFUaGb)YiZe8)EtQt(ueQ=kEPE!n
z3CHD<(ekIXhky^dNAeQ)VaMWHMTtW-1H7z)ZUN5k*CjjWSs7)d?$^~wVurm8h_p!p
zn#{!N8gkXShE!2r@uk7exzgmeGcaR2aYxIE%sB5e#<XJ88m?4}-e2~0Rix)59U7@X
z@O|*b$V_V6Z0UzulGL_>!#KQ|Zf*{vQB<keFP~c5z4})JTz9LurZL5S>yO3}{jufM
z9A0UT{<nm5cy}eRjl_#+g70kPl_H*Z6Q0wfribgf679W>U$XsWRLpLFdZWn8>&LO)
zd;v$`n-E4!agi}3wb@Z}5gy)x(@GG0iG2g{khU{c=P`%G20|6}B79JZClX7m;$jjH
zDbh8MSuY2+J_WXHM9B%{+v`a*B?_yHVOVm}n#w@uu)n4psYp7AOs`{8HsRF7G&*u|
zw+AW1KGyZDf-Z)XCgT$^2tNRM7>AP~nTW&yPhf_yA=x+;0;-i@c4aar3aPI^!Ufg@
zzABcIIyuH1h`~toX2~=jdcejhNy#4N(q31aQu4ZTY&Jlxi*SMm4F+)M>A;i$)&Pv2
ziJTMzdJp1&43#1{^}4sKV{DSbXlpQdQdy*E-dRp~>h!zU+$0-c{~+ycc0_v|_9DWU
zzRC?Or`gGwCJ=ZhLEl`jowL$zJDKsDnjGqbYnr%6RwJ2S3tfydJ!PJhbV$m`<*H6W
z0Pt<k8hVyauGp?f%X;3SWd*__=O^zOJ!j!787Hn~VBBg-zD!L(EGLtmdMT$OmJzXS
zMbR<}%9z@b4<Ss<3lC5*sMg>+G*N;7>ATz$6d9Ht$eI<a1wO=z;?@Wcpk(|SJ|?tj
z)5kn887-tvH7ftCe6ey+N<B%f<uzr=iMl;cQ;vCcQG3=QU(}w%3lz2IBs<QZs-5sS
zuPG<Taeh;Fp5x+K<|ZC?XX)fM_BNVEZpKXY#7#5|Z}P_G-wexW_hC1d87{ongIj}>
zgRvB=VH!A0k6NVwvB}i>eKnyFuc;GDA0EE(<uc$IuV7quD}AfkUV1A!Ds+O1la81M
zDR(iy&`+iQSI-u6&t?W7v(Dak8rtS?fzfjvckKM^lZ@PvBrR;O*#yH(NiP?i=a>c>
ze-;R%!m0#|0hKUmbg4@AObI=3F)fH#7OvaU0c;W;#{H1ljsxI#_?JGmEK%q#;Nt3~
z5!P>^rAN72S)PN&x&awWU(d+aRQRy`GlC^|11t;;cUzQ#MRwPG4oLhCH<Y!X(txk!
zVpV#-Eu?2LN#`>|n-7u_tgw}#JA3sn4>}nwk=U+mVh@a-xPf})rBrl9Dxb|8qRP`)
zmr}_?Qls;xC7~Hx6`>i1?%iPPUG~oc+k9C0VVl1ecZRK7k>+0jTl<n~Mc8*z{7M5W
z!{GIyV8v_-M>fqeb<DgiiCjjFSutHeOD>vhZdqwyFt-Yb>(>FfEd@k(NEeNX&Xn#s
zo<x@srsFah>v?Ee2GWz+dNi-%Z($9Zqv_<9@8QU0gS9yr*gqIu&QOfC)Z`DVr1I)9
z<os*4{I8y66c?V{oW7m7#M<N#<*rV2(Fq2F_&V%YOm}IiRj-0iUR_(kRbN@FT59d%
zql2F?-}8Rw_3<gomn5)m6_!9Tb}OCX8BbS}<rxoEljE6B-huT_XL<g3FMytH*gu|q
zQox$qLFrQ-kGEkwFp4y+nNmWFNScHVoGtq;5;|dnhjL(FR4Nv1zuJ+_(A8zd0G3{0
z3o0}+)uAa?WR;rgWZvSbk!DuT)StI(;vqKzGeMM2*L$;5Nhx8<?g&!9ICjrky|rzc
z+%43;nHr|IU5^HCNo3VZjv&pOiV5=3@mW)x#TcDv98Lze>^B;U2Hu}ERvPu`Bp4;*
zI0<17ttG6jZP?_m#tmIF{YHw;xR6QU5?o9v;|Gj}Ggs;fL8po6Vm7eeB*b(fb;{YB
z-ZTVnf-ZrRrjXIDIr;}hU>u$29EDv*&sa-Cv!->7e64~#$Xht6^Xof_5ECc=AuID3
z+W|*9y#W5P(-lK&MYoD+V~o4(k59WB81W05F$?RXbe>5m4YH4Bt9QGK=syNy@SSyG
z^=QXG?5bBPm<Vng*b!sccYr7&xeCUi=9QdX2D}yTuiBEdA>s=Pqrv?<@?ZTBlf$yF
zawy*8BfRS}3bA3>16mLzL%Q)v)h2&unVdBc^hj>6oZFW+4%o#-dbBI#MY`L;){^No
z<Xb=<H1>%30Zr=JO?4GdN#mc<zv5Tm2<2m)N&T7rMz6Eu)Yz0WDDkVNA-+aw+<337
z8_sEGo5sixF<*DB=IyEIW{HgH<hBW|+Yoc@T8l<YZ3ZCjeqh>{>*oGHB8wo;^$E?(
zyzpJ#DB1%X6z3~y1;s5=rHaEB?8G~+CfQiEE?{g(vb1%E8Q+!(u0W}*#<K|?YP^H+
z9vG>U0VFUX%6p$c9RunG@~(0e2ZJa;K4ndO4OM1CGK2~e3N1}Hr?Y81B&k=Oo|JQk
z?|Xp<LnED*s9@sgs!Vw`y$YssB=M*hj_Kw(+d3v=4#Y45q56HuSHd7R4&euR|Bo)k
zKWEW2<SSE7y+ysYV-=bZcYWB8Py?lBI0=XG2WSD4W!-W!lW;(JyI|(oQ6#h86_3~e
zm|`NuA=@9~c@8zZeE%JEL5@Xq#e4nHD89uGfc{_*gnG{D<zneV+;(~?p|$ksls=ZM
z(&f)0?;(^veI9#qJf+V%MX%j0qQk!4G8q&q1D6y{dK4V68TDn!ni;A@R0TNDZHqwh
zOgd^NNU4A_;cM8B*!POcg|#Er4a-jr!i=L)hqKgm@Tsg##izru7I#y_?~G#Zke3ch
zY8Lq8(jN?zU)>9o#elJ8gO5b-rG1D80paD37<~pQRLFMILcOKzk`uPPXFB%{wEv9}
zQ?#_iLP|e(4KjgCb|p6^Gf*Jo@OVpg6gO~2c=*<=4tGng-+YkQjdG4c8@TZQ#F%MD
zRvtxpik5%J5gg8E@tG2OQ3}S#m4ZCstn*#gDO_{)$ob9+R$8ijEwtT3&M5MFsLo3n
z_Z3vm&l+fs2|Isa3RL8r$6qOJI2o*{4a{j--~qC9dl)l0>~B46SIgeMuoLHjG@sj-
z_A*=QQkqrMD!fuCvy&Q5dxEK8kb?Ip=PYc04Du|w8csXAsa2Wnv&LC#5AA1~r)Qmm
z9~uYeO@w_lr$oDP*z6oO&Q4qJI_=g!niU_sP!C}m*n8XD`+MhA>!@-16UBIf_FlCP
zT0d0~yUl^^YpL*9S0FL0-FDt+YBIGn>LWB*(`4Y0IiUHwzVylQAl%UBB6|46=;{`w
zfWJ5VrxWWg=ss_mSiffBw(QA9<^JbG;&Q84jL~iz4~^bvpBqoEg9&P5fR~w&+Nl(P
zq&g9CrE`J&pJca;5R<+PA#=zoD}$7}jkO|xg8L~$aV&HcPeUb3hXOdQso}wb$>ofc
zBwzm+2JgQno)69y7IIO9Mp0UsIa@eRd?ZM+<Qfx!NRue*Uz+;U_G`e(CJ8zy=LW-W
zbUBM>Nvhn#Xl2Y3oP<Zwv2C|$H>F#!luI?)ir4!j9TG6d|77k}R=!OuU*<oFE=I=k
z^yrwc{$#2E%?)I+Mqze!%g)fqE`Vip&2n>Vm+Cc-Mps)4qSSI0vxPkn%1u&pBr}i?
ztVHzEvL3K69sz8-Gs>2J6%NK|KM%+=x)i<HWI~#{xKDLP;k9)U;woLgVDo%z_Gn^1
zJyt*DwS|49>zY>OUF*)f*yXF;#cp<uLw3Q{F8g~uz}38r<X7iirWx~(&Lt&mfr}Wg
z0?D=HIyUiDVKan!XY?JTl4t^p0}DO^++}U<1#Mm4E8INC8&R2EcoTKQUNB4WsuHyq
zTFQlOFdO4mP9#NOZE`T`uhBIo(1CBT>u<=HhP@ncvFo-8pK}R9WpWMm9vfTx{Z2WI
zb%*-zjH3XvG>ZNXS2me-zpU(nD+@4lRu;K}k4PX8bjcVc9|d6f0@x1gB^^LZk{Q9*
zyt3>?(`lj<)Hpjn?Yv$3%a=dzAFbvp|NGtd;eY(+i{5v={(oGo5op(7k=FL&Nw~%g
zLg(=B2lcB(`&fs+@4x+)KH)F*`Rx1ezW??w8_(CjeZKx;W8=lMzpOu7Uw`rKU&Q*S
z0Kg0*Knrp2cG8pkgZ!<nEQ!5%e9Ov)O0O!Ot#53g0q`^&z?%siy>yJ%MPo2vokW64
zc=WSIt0AlM9##HU;4Q0oGYdzfup7+!V*3&v)Gzge|B6PFD5zgdfJ`rm)36@_7a9TY
zuB<7L4Twg{XbGQmqfs!qMLBo~6p3!Q;Ph84^Du_(k9V)oKy8V56ZXDE-b81C(`>!r
zX|%?Z8Jb}@-2@`Hk{T0PeaOL(5J9#W6TD7kGm)She+bEwk|6=?PSm4d`%!|lZUBv8
z8+Lcnwb+aWdIaG<8iwc*l^p_fWdT7A0Vda<^}?c2-~dRiwBRU&oLoQd&2)+%Y)rCV
zeA7IHLl_nlP9Y6vS{<rsO@QWf6h>0Z6%rB0(PR(}#%)Rv=+TZG2F-27lLR`w#esA~
z#1!H_;Q)XaXv2)-7@`!%pn<B=(u;k^2@*+sF}=nGmk5N7k+20w1oa|BtqEe!i1COR
z(K=wp+1plIw2xn({n$8d3iy6<di+Cczqv17{RA(YV(<9mr_<J(w`bz*@xgxcv@IG(
z`|$MW?6mdj9FC&4_`1=CnqT9KCHUtj(R_Du+HAMQ@u_GXo*cBGHE8~{adg&dwqYk8
z?H!!&w~pS_1hjyhxj1MYw$7l~*>R0}mDQH4dgA!CIBcHoy@j6*RNI~XL_>MqIy=J7
zUPEsUand+FYwevMG)~3I`RU1VyD4B|OJcv(-aBZt4x9UR=pXtN%^#XaXQKVKad42D
z=<$z7%~Kq_Gs{;^7-i$tK@)r8dG5DPn|o(CA@gkyfCmFS08#U#x!1xk&38?hPUG~a
znryV){D0?A2wsZ)#$n@4v%MrLbD#q2vUh&kJj6i*B--b%+Gnk^a}a9Y93Sr!tlP~~
zWDVQEc^n+K31H{#W)1o{Yf#tF20*L5h2LMDw_60a*3nt>^z{7XtaW@;g+=}m00X0J
zpcOR%`1put3qU$P{Rvw|IM4!sl>D{{&rcDi1g!=_0r=3?-kDttIs_n|+0zt9%{K?g
z-0U^+^)WW{W2@b)!h*HhSOD6@et&F0r{^?lTqhV8e{BKO6z0}wIYsNWXzc&c!ja3;
zfC256#1;bE-dhQC#Qv2B{zH-=VCw?~7){acA6X8hau{rY%vp#yR8Fe`_zo@&U&IfG
z2TCX;{{Smk#OiefRA5GQlAE(7$wBlU$mCA9o<yTc^${k7HiWKnO|qgeWMD#{Nc%m{
ztECiED}ELBGsOjid{9m^&UfA)hDu*8&AdoM0~GE9)ZffT+JlDP2FZSe#>NfKhqA+p
zYI2<s%ff#5D$;2dpwDbYQfAMq$d}~D6QI&b<8l(x1+cz2Z6(TChlB`y8*4q+q3l{#
zWHYM)4hEFEeSKMUH@oByX=mV&bs@^VNG@tCd!_75<uJIR*(^34S!>QH7Mc|_Sz=<T
zCZSUP{CtB{nC1yrq-r5s!-j`H4AFhjgFGO2dXN~NdN7oqswhe1dhc|+v$G-FD=bs9
zhsS|P83|M4#i)cShzrZWKtJIo^iI)0gErb$!=|T~et2#=Xrx*>2qu?db3`ik9o6Q}
z&i*aX4&Z)%`%OLD-C;5X6qjh!*<0XJkgG8<8%=<d6dkb_DGLQ_2x?Sz7RJo%Qo&te
zV(H*ZYDg4b%ptNTCmrCg;9#yaPT$~R`G(?;eWmQ0D_rb#qkV?gp>J75l6pwS8r4+G
z0F%Sx{ibZbbI=eqLA1OfvnZ7-e$dc?o(d~;L|y^l9$&ZKVT&Jz#YH+od?+kYkw2^I
z4!Chv@wQY`1=E94*s|XQ2J*CtA>Q!%7}!Q`==g-DjUV*;?5C3^J<(sd|MGu7+v>nl
zS0hIh-*Hzw)ZfpfSuDCo*+aJE9J!L?@ir@|J5QcSfINAE3PTNz-I_bhnlM^L12Vff
zk0A5s+&X&f0yE5kJ!EGHo11$7N|PTnozK9)7#H1VzIF(uMOQ&M*+&=G<`zTR($uD>
zZ;jwVG^F>yM5+H>WOX$i-PuXeTK_~#j6m=%r|ENSp_-6Z2XeEoRB^&18bi}EYLt%B
zHE~ZEsK1Kt568ldXATE1$$j!flcR-JM2+C1z;j?x2tE`=%Ep+rL-`^soWM$tHiAV%
z<USO<Y&>8n=$$N90QF>aX+%OJO#rvCT*<*p9bSZEB&=7fz=<q^iDm!hk6>MejGVJ@
z_6V%mo7mcj2C3OQkyPxdre2jBgFOPK2-K$)Bg{(|INb7s<AH}F5Fi#L2McZ&GlGxe
z^ZXf#BtGj1K41Slf{$C`vyPxVx7_ZWHXB)<`40SX77FD#^<pVNOAEDx_QvrEZ03tW
z98C3xXS|Oyg1#9+KL)WhG@px$D(%kR$-%on3h;Yo>InyKy{Vy|R3<^W`44O4k28XQ
zS-|&=VB<S$6@)0i4iCQsrPtxHHsB&7D8xe4iY?8DU=oe+AZ_!<L8DrtPQaBK{86qB
zl)b7^KUbye^x~vOHvq{3W8+E<{wUoK4r-4-%GH?$1hBJU5x8-s27i?92M2W)Xt-%J
zmcp*O1hWQzh)$WV!kaM`Zt_RD;SJ(TDoju)UuOfj!vpzI4t@SPnIih|2TW(F<OWUt
zISVFes`^i27|Q^H_f;@~nKdStlFcktk$RlOGt6Im${!0<gfkKCXJ7Nj0u_5<G(i02
z4}wlH2|YRf@pq|nv*RVAXYTGj$Y%+S+pDOY3aXZ;jH)dp*|8WyJs-^m1Mb=~5adBn
zsbDlsDZwN>N~!xkyS~q(?Rzv>e`5HTQa^rfUE}B0G^+k34Z%zHc*GgRCLuw5JPA3_
z(zYYqFYl6WV{!$3*HxtSYJJ!tYogMlc?3P@o#y}RXOv|#I+Y?d>wo~RG^=X4{;)dM
z&#hto-1^netzEsJZk3U7VRh<*X;xGHmsO_zj`XVc(yHEj_j}?0NtLRfTci59^{Jm*
zo65S>d<BG-KgJjg{KGIgiLY<8QV_|O{w9j$%5+P8Zb|9(g!OH~mU`S$p2}>2mi{Hj
zl&AW_mVDkT_i$}tqrp$;|M4mq^$#hG(%t+&zI*W^$N%H`^NnZUrTstP=NJEvPvZX}
zqZ?veN0}CloCIk3K^7GQw8pv)$)jNJ=~LqlJ&3v!5V2%<0{BUJ-Gjkp2+z@(8sDV7
zL)QOu5$_OggT~ip_lrGrzj%!f5NO6f>;4k<rf4Wk+1tX&PzUHdjQ<sZ2#EDh$VyEd
zL_G>$@S!fA)z|By#d+q&x4O#70Y&>mXI1UG5caVN>CzzKR1-hNGs?ymcgY6?6Ve4!
zaW$QeH`msN+;u&kT&_WzYtY+TJ#82K3JyORej{xc?7X_4T`+q-C~IJ|hbemIrnKKP
z4&B?j!mpNfU)H>jZF+Ys=SL-a$@<coQCauxF>e#}_u^z)T+b=0>Sqi|i`&Q)H{%#A
z(oUwhWxl%LkbE_)))pL?uZA|Lo{d4GEw0aHOpR_L#Jn0^UH*8?&O?P2WFb4hcR}Q-
zY=8M_<Qaoy%Jo_nsu{@avSy6y6{K2Cm4ZRAc`=Ju_K(Hqvu9is64C^vAGl3scO=Cn
zCJ$UEaYWjFA0%%dW>zY^<X<T}U(S(^t7;w<XLf%nHpEkfT~7&VmWtEB%XT%Ic?`a#
zBX;W_&CX%#i0&1er=9(ujv9xpy-JC-)d}QN*2we^r^H0SVeUMF<5uPc>d3NBQ9CC6
zo!3@Y#Lt|~gX1y*z`Gz`qfa101I~)z5@V(%Vr5OnP{G4c<=-QSWmBo?_~Ug|VvAkd
z=k(Z}+h2E1Z(W_TPQuCQFR)!^)X_MDrYD12c9}GpSvjs|Y8X-ro5Js9u2?uURpJo`
zS1_zO1M&<;=Jxdr>E}R1VikA>z@yo4q_)5c$F86-(qS-y5ns#(%yLDE?4CpMbLpS2
zgUN`~x>`ql!jy<zK)a#r1LpfnJgKMV`)UyP-fyR8T~k|gV;5_y{9A5<Wlz+wN=1@&
z?I}{-*;%ilb6MxGdG_{rzXRVWs|Y3(**iWsKLqiq@i+KE9L-*{t+Ogibd@Q_*WJg1
za75{;82(P|1JwG8O0>PB_Kl)jziN7<8dW7$qb>c8TdpM2KIJp7R=DjNKL~ENX#l&G
z^;)WTu?n?xgVm}Yj*7=isz@5!?haR3<;_-2bzZ8g2#D+ke34I{%8hNhmDoCWg)6G@
z8pEX~q8bjQCN^}&>4lJ+BA_cGR1V)ZY8mRktDC4uyqhy4UmIp_YGrEcP%yi?^0s5~
zo$O9l>eN1PTH>mg^6~KfRB#?3bPlNuI2LH{?m%%;-Z65jYu0@^#@1+hO5-9fMr}EV
z2?2hcN*s?V3_I~3if**S!9}D03#N8r7No+&r@FgN?pQQnpegKzBj0VycU^i@HSy^8
z;?Xa&%YeE_^$8V@X2UZX+n(b*q5GUv%a}Ge(k4@#Oe-(ND!<ueaF}We9xCyDhF5W1
zsxo|wew)K|y;R)OQVw%fvBtf<p4Uh*f0dOXJd)N^8$#8*;VjC?c|@^Qrz6$VM@8-w
zYn)3RoFxH%c@_=&hO;gm_*UaoEjw4ot;Kh-%r)^&SE5`^4@2yF8q{x9BwaJ$k6R0~
z97n`*Ad3hkT;5bsDrbiu?JCM;WFX%*-#)^-Q%d!dBH8XyRH79<Vf$24hw4#Hoy);Z
z6;8z?cnRyOIz0$Sm(#1tQT1-e*1Z74<aco}*#!L$2CQg4u;v~5l7?9)oR`ydtQdf^
zBXYk=93HVe9IShHho0MvUrAob48#Y>sODkFA}=(fPXWzRTtq%(Js9`E@SQuThl8?~
z0)4v8WU99(nc&D!hWLUEUVNai;+wfl(3Vvo38()ECvnAjXzl48YlEvIr~+4Ou(Ppc
zgK_KW)5sj0PFs=FSXA6vx45lUx3Sg8#nqH{OH5<yiI-5NYXIDGfF>ai1=1J;kE~Hx
zs;jKxB83G$eAXFc0fk@MG8~Vm#-jrWA%m2|?jl@N{8<b^f?{@BJ<d--@Seo8F~%ke
zJBM+9HVE+rzwAyT;3ZB&-0HXmVEfw%N~Q9F?hOLbg_gs<65LHAQxN@wq3B3$CorrO
z40lQvj8Q$Tt9w#)$q9#&Ta)4g9%=Q^gaY;f^8DPj7)Yx;#S1j$S#?lcMXLOca<pFQ
z!d<s$H>aRdA~!H`Qdgx?gDNsa6pZGHHlN_pJ%Pc~Z_dI5W$~dUQBV(=KaOHL!H8BA
zL<%7QMEgRV#p9^wgpd$lE8R+rzrW^=`W$Qk1Jd|=U?{Fi1<V-*4xUc!{!DoHP@%@e
zMx6vE5fn{ARsvb{`YP&Oi5_0yNKnZI0(cPgRcPcc>J0~jx<R)jkf~74CXredgYe$$
zw-Plu>v@OcLDE-5B>7d|Ca51>^;Ishjxj8i;%1v|tJb+;Zc{bKE$K$Yu9M5Nler*p
z61gdDJtwwH>D4iqhfkw#39;!5D~~AY-ck9K?mVldyo)EE?oz>hlGQ-Y9g)?Bx|2Jg
zY(6@|j=m1+*q1(pC_8esmfTAn1#*6VzZ(VLt%122P(~#Fn;mb@*&BdbS|#~Xec4p>
zV%Kxq;!Q{BqPxL%gX}i08&ubro<2y-9jkly>e~d=9rYwTq4rbw@7&4Gzko8`6yK?p
zZQ;HbVoqx%?!@?p>jo3+MN=(q=td-yfT!LLua?elpX~6W*_SXoYW2)b$qoX!s<Xcj
zJCb$Xc*<3tVj<!jEVa|HLmUPE0}Sl2BN;vZkWae!TQ&V_uF&A#?#ScVd#@w)z^Tg<
z9!j0|e7PGWEbr|=nuljPV2G&j!lmx0?tUw#fVs$w?s_N2%arNqj@MWUQ0nq(feR<Q
zs)tL?>=Ru+m9gPNFo~qIN2kvumSrv0>+NdPU7#XwnO(gw3svpPJ`v8jBsU{SF2riM
zoJN}d{x}K7P~-7C&SP=p0JgK8Eza}gkK9AsOI0#VQXa!pqWZ$jROVF3wC%S8`od#9
z0FTuvaq51)jaq_VdWao$XD&*1et%}lG_;^)(1N4P-=cF^E7?Y+_!z8N%*jqC!-91@
zJXpuWgmrSbF#fnd=cSAp&W|umZ|8@`DTO8`wUO3Xt*tpZWl&e@us%CGl0(8hsQ8zs
z%vwC%px0EI8MOjM@fV)&&&cz67`MFU?!dow?##e-?!m!zd@Ni_YwFb{Ev`JBnr2eU
zQ}n6`fVY~XM7^^Pm2U7|6)LIydPKYO6{G4j-wQ^lYg?B^Q~Jo1=FQbHuX&DHTLVFH
zTYVaa4A6ZCr&6R)EA)@Y3ghMa=0<Hpz4=Y)PWYU(ZrvZIRcXcQJ<!<jvXH5C;?1g~
zD+3Fu6-d}J1BZzB0oZ&6pVgAWL4#c85{1+Nwf}?v-gZOX0n=v7m^QghsBuFrWA*62
zalTXMUUQ=ILs+cyRjc=I*@>?{(9BbvFEVj!2o?cHN~fecME>?wfc8!$)Q0!elAC1h
z_tlpRLln}DcDgxA$i<~n104GIlSmKqL3Lj#VFB1Zq=;V(%-w9v6O`At>B@sD)N}(F
zup{G5U=lV867+@6w{?HVM8|9COJ1!%TmCOH{aD=fvxNWWi|5att-Jo88{cm{`;!0b
zPwD^pAB*{aW*hi!_1XHyx_A|iVqpIM7WCq-*!~4S>RozP-}tY~VGs>KdL90`T}Tg(
z_x_&s9+gkaJC0@#YaK-kk6qgN=A5RVd{%ekc)&R(PJ&T5$Y{aIT1>ZPwPUqmRTsN(
z=*qgXR*nKZRj7>mJ6mM|<#(nse4~o;(_3j0UUZUhKsJF^RWkFHKRGZ_PVHg*A?!dS
z;UpaO!fmIDYNwNF2=(s2N_94JCo+L(W(GOls9Z^A70p!2aXR(suI=7;1e$BDqY$QX
zAXjB?k04cXt}<UYzq3OXmkPWSv-Q8)QF6Xaev{2#Q`8{V$Uv+h3r@~;gPv)LIb`c+
z=1?+m&zw1ATaih5WGO4ujm+MZX(gLtsEVUqXJEJu_@K74v$4GF8_HKZ8*~S<l=YGD
z6<6(KI*{$GXWPk4lE?fuNZPPfZ40tSq9VP)EbKY$WE+uaU}vpzA_n<aPcBj(Qst#7
zg2s!6TO3Y_$3nq{ItjJi38tMMx89jT^VT|pv|a2-EN4J5j><JBZ_g;0)uT*pfw7dC
z+5(W0+X(oB*R6MlO->mq2`~AC24vz#$ti;YU`d~ttv<0KG4n2{tc*CndLj+~dV?^)
z>_!)}3D!54m-^-s?_rb9fPPwIqTiTuNtfGHTa@bC8NyQdYQJ(T;b!pD1F<YBsl;&N
zH?(|KOu<WBXtbse`?|#DdKGQf(-ZnFGs2n!+jIsxkzB>IL7#Xv0NE9Qae!&)Z|#(U
zss;7g7)Zg}PF#Nrw0w+G^*4#&n<KmGk0J4S37q7ETlkpbV!<+91n4gcs}t5K&mnPR
zfo?yXOfc2yHSmEryne`<DeMispvk0PF>%tiUuA5C!2`ZbqYt4`UeIXF=y!<DeazF=
z_!&TT=bT~$&Eph92))oGEypJ+k<AonZX>ROq|>LDoq*oD8)jGO(XsOu47)>H!MOK|
zVtvL#@w#?=Tf;nNM#tAB+Q1wim%bipI&LHZhz&Pkf0eY_NC6n;{JX0fb}lH?QAC0b
zevv|o<)5PMiLXOSfIJP}qq})vTGXv+(D?2aWy@46-PTk{`X`Y7iAp?XPrGWidE)x~
z*Q59E!NN--u`^tAyVYSZCi)rBCbgy(;5YWKS9epip^Tk2)RJ|q@dl1(zN%?P$EM;m
zxDc(y1I)opBf!l*8G=mbbdsK@GAbvU0hEFF3kmTtEN>Ue#UpEeX5EcVI0?p#owh-G
zI?RH!G@p-zK+1?TG8sa>J9VSbk2IC6h^OLwRa49STctxST^fDzP!qgcNf)I~hB4>2
zCVWQ#(OD`7LxC5rgIgrN@Cfrp8z{Vuufq@Fgz(`O<nrraL?oIC5mX!oqi8&vqS_-3
zOOi-d=v0m<uy8L-H-ovPLj)eiyG4jHWDs|g8EifW+%>F9O%mzHGX~qhl;~5vSk}<t
z{J;rO8v*(@3#c{&<|CM@$Psdx$Z23x(VyO6)(^S04lDP1<1xJiMhoV;DSif!bbSRm
zW40tVBh0nk{176hQBp}7rG-8~(e(%td7(+eA|HlpJOI%_&ai7MRE022R!BD)ji6RU
z_r_do7~Did42^+^$P^6&Y$TlXT^@n}4r(1RpdW{<r0E3+*gy|(2x9l-mbN0NlV1$d
z*7nE+_G>It8sb)I2kc2uelyhyi^6Lr0GwG?A5v=%rx+6M`DgB`)j3&SOH?bcfbuGL
zGB??uqEEqxzk6f7wwBTIV6n|jx{%4<jN?n|F{}6B|K@3HuXA$#>Ht0{9r^3VUb8|Y
zuUaKrM{l9znT!~Tnb14OC(Y9arf0WH&oeKRnkd*3X)V@KDnl6s1XYGRdwli*Sp>-P
zeJm;mB_}GB%?^)szlo{XJFm}=_A-ve71`^ufR2>e7`gZ%fGYT~$e)-!TiI&M65c8m
zOIftRuPxP!?9-RgnVn>zTV?5`Tm9`fr&ZlXR#;DWw_&xaqH<&{$X;92<$>Wzkzr0T
zoT>F2#%7H2G_b9*ZmFWcK^|?3(o@02JvtpFQ{=@K0+Ft{PXQ;YzExLa6uYQHgf0yG
zs-EZ~Ppy~RJHo82E9<K&Fj%)$;#!?nvB*>FrEJylnzhz--)5s8Y-O#{7q4-)WY-Wx
z>j6%s96r?S7E*QnPU*$rQ#@~V2lguAuF4O!k`su<jL-Np7ot)(AC+AE5v!)`QZXWR
z^AX8G9@XTYv0NozW+VR6ZC5B@0%Qj#omWMK3=4|>>}YG0NX*YB_<n|uuY*ZnU5A-N
zASG{OFT`4@7-Sg*6>-kQ#gF3As`^P1O1)lxz&odu#J$L=PF#^=>ybO*1+KAD{Ki8x
zso4q7L9I`j(w1|D{Kb5(0jpL06v?4RL?FtX-Dq%kkLGfZu6c)6Zoe7Ax^UmM$0ejV
za#Y;;;{cP!Nk7o4#S{eqL752v%`j0y8PN?Wjp9rFA%%dNSNE5r&-3p|des3?NfWGv
z@0Xcx^$7%vG((R|d1mBX0Ru(kEJPjuWpG*d6dycn{ye?6_nSxUW{%{5^{NZg0bZ@D
zzQo|gBw)2*l&E;#G79)$JPsy+fP-5xLVXKqVU`|VVlX4ip+A<V2|UNmXZe`Ga9M}N
zYQN}$qfMaxD7vt(m^bJ}DWSn>UEbGF4^l&z-$}j@k?wuN?PM?zlv9azazgsjX!HS&
zXdPda%AF}iOdvLHHk6Vw3Bw`O8Fr(~Sv<qdJe#Vk1>z=Hsg?vZjN9wrwr=_8C^}XV
zcNel=iU@C!9N!;B72S3h6SI8nY8vYdm;CBBEp{$j2U~U8x9Uh{pLyN~7vkoi_<V8B
z!hCzh)EDy<;jOFJyw{&z2vQ3kC}kn?P~uM>OdeSfdMrqGb0FFELo%0pDt0B)z1Tic
zCECmL9MhxA_*fM#AX{!pPb<uVq(|NATP}sI{#;c<=irCN!Ff}mK-G;$MT=!}Em8X*
zs+)CF!>)06<{ivQy>lh_!g<LIfTd@bA1{zco{CNVxN}g@Jv}~p0}R1I>!@k_v{`Xr
zX)t%66<2@s#mHHP$V4`MD7?x}0mfeOn5^xH4Qb)Vqo@_ymt<#O*lZ+2*RDSp#_}Lb
zE$5#C3`Kap04yE_5Dx?eC*bKp9P|(4J}E^sC{BXj`*12vY{&HD?ps}(k8?xY=OcN2
zz55{hfBtMJR3zQVo)k|-<!QC2zbrlkDQ$OZT2{;VpIeQ(d$8pyhq3u9lur8ja9I7x
za5-o7BldTGxCLK&U?|-gKpqr&5?vT4f|JH6hD3PvQ)mD9-B0kuc-|+0vTNJilqp^F
zcyZt1u_9z8dbm}Fsf>*E>O%EmV;bt#V<)vDa-VWGExPt}Oz)S)D0Tap@;dcwtt~PC
z_|cM!r}<w~t*OKl1<g51VJ({!k2H<0df0d94P~AZy;*o|11TL)!C43i1#1<HV;b0l
ztBFd1Vud!5eP<{&2kR!R&aMTg`wzk62Vf7uQeOCm0I%;lgv{J`ZISS<t$H$WS%RUC
zB5`*mwR4@ss_KU{S6TAvq_v9JG~~ZmLeoO$R!LIPJ1DGw_p0ht>W}-ogP4SIUM;u-
zN^0tpYq>v<rkcW6kOqJc)u$p7E246LQ<lnCS<jTJn44dl(zD#h*rg(T{k|mKsmlkj
zKGiwn(;Q@>-qu$(S6^%Ec(Zo{e+=EW!>&f7Xj*wJTcUfbp;Ku6G_(3El?<A^RLyK^
z9V={y436a1gkPI_B>n|oK#^Mp*ne;h_h(<EoNMsMUb;WI#E=p4T(VmRD|5v*pL!u1
zvCz7(F7LwkQcB=XuS+R6JTBs(S~eh`J$F}xEI)UT=;>FjSOrTUSqZsquGn^$UNZ99
zTyf3xqM{RYI_>kLlhfnQ9`>Sd|C1RsI_pO<xz5E?aWRQNJV&<-4vi3spbHyFK1VAt
zs(sizYpik;(<^C%KZ?aST<jb1KD@n-Cw&o2;@POL9O@Ew8i>0|JP4=Z%XH#gTQlH>
zRVdJiXn6$<7swM2U3MC*^C^io#ph;Lfl^zPP;(_e!L$fv0r7J+C#ihH{>hPEjax}6
z%PhS2#s0k;4M>p!XA>^@9dkWI!`UHUbPI*L1;)7)8GFd^Q8H}FmxxTi)A&zuj6UV3
zVSjeBXc(yC_)i<_&z^6%@t>ZpzxejMFY%wg#DDtp25)K{95jyh8%IB70yx=k$_H^;
z@@2>)TM<@wo{uK+V6X=)(p1N30`8?VqvzqoA81K7vAJoa3eJNh_#+kRzZav=f5qx+
z6;75lSvWW75Sm$LtNk^<+LB6-nxLMzr&ue}<4$HH8MIsuUMvp-irc|sl$@+R_iYU+
zX9yT^02;&EcwEA#F;t;jTVfzS=7dm?ZF+%iI$UQJZ@GD_t-u&o)-16ORn1%X9D+LX
z7Z`{&0t)ajcL)VIIJ*5K;~<IB_rOAH_9d|Jx8LrKh29Pq!a|Wn!*WP4XAlpFhlM<;
z8&w`Q5LsUj25j6T<F|5r33#bJuAA)hy=mOJO?nE{`4l^Ir^=>Xxd9?*?LqmaH#g%k
zo6c{OFyp=1(jc`Vq`kZqPyu(KH!aZH+sg<OIz)`FC7S9{YH#tP$)MVKfyZ1=w`|E3
zQPJ(E^1O(rzD#?KKE14>C!EUTaC&@j&^mh4IcOcW&eT@a&6=<`6>Ts{+a$WE@Eod3
zUd5C{A$4;#HF1s>m6=l25}bIq>i((Ma6ld{3`5|Rq|wPQ7=>pGKDLEQwD9!lFIAHQ
zRvD*oxd_Vfg$9<hvcLlRR=(0bO6m~R#4pvVyF2VT)5*bvBX3Jg1#tWrQeza_dqYJ}
zaS?bhcnD<0ZSVzI%Oa~yM*m32T8yg-Gerh@J-C{R6=V@`jshaPqJ*e^q^&XWEsMKO
z22mdl_wd+uuvkvRm~Lwkst4O^8+XHQ=Bdn)U9BgxA!@4pyEG9$_rwavg^5QfizM#8
zJpx&#p|V?4j)J3VJq?1_sJ06qVE0}H33Omh9V8yP2lkE+PdbO~ANJ_pn8lsG=yYB;
z+Gm}^#@X9Wr|N@LFqs6fIQ58IuG3O&Ej^J)D$9RWsbs3`>|`Fn7hG_vPs+_a%9n*H
zIc8lDpd`K}oLr=as%RLS^mG<a*H&9cuLUW`6BpD9JP&cCt2KKRP^-onT!qkHH599^
z=4Zn<FCbfv63`ZXzo_PZSNcCVL54vK5QDw^CyVF+=ji{QuYaG`|7|>f{^E=N?~DE~
zC;mIM@+JN|Qu@P2>!@|sY8<rw(LBv)5<LaH3PmeKWqO;Jj#l|L9AE}GZbe;>Qj|Ar
zk*)@1lR}BNz957cuun3$aN<%sur%5#G^fRjFo2^el)(o%;-C&dsDlq08Qc=Fw`+8C
z&y5|(5&3fB2jcPmb9WGo-Xk+%_oU%@3Xi|`kKpTC2JD-g$ZRB6c(Trk#iq3?nWgh;
z(@eM!H)Q^j|7KTbF?QQ7xV2su@<KZ884Kb-Wu3l(08!cCZ?g#o1kjhE@L0<pDPOYn
z+Ikgai-ZM%CCk}E&+QoP12h|T{|Oo9%V~L;9qY@?k_u|mLf?eBgoz|(&z(c{t=9zn
zXO~mIWV-+r`mG_ewnAic?GUXgEORp{6HO<<D7nC7vGMf?KCYs1o%bMaLns=dA@+!j
zgjPh*O)&oe#Y5%(~VjOj4?7u#uL)KoG7FyW6ye>TCei2abe$5@JNAfYWnpbt1v
zHa1}Q^;L9B3d!8Xy$g%mlU5dJgMifK^atwNwV+#Mm6cbY`xq9qx@49zuG6PpPW(NQ
zMr5?8b^6R%&wBP@Rjw);vjmg(B8CGk!fP89z&jCYyJIM58|0^}3~ikUBEjIdDkvyh
zW~02gY3u~(_jYEEyOp@g69If-{E`BC*{ZBrSKD&1P}LMfNJ(I!E67-YHW&~iVY0-t
zmgfGzpk0Pj6`O)^$hYX8WtB9xR`F0MUK7+u(A0Kz?3tylSO8Q{ce3-?(ghee>(+zp
zlsCNb#i;RZdqtKkmDJZ~ag5<-Z#f$A7%R!OPoIJnbEH(ic8qya{lQqb?a3~AdUJFs
zi}srCF9iy;H`(&hcmjpoW>24La<4^ClFI#fBay?6ZJmQWcRM-equ2cNq7diEqT8nI
zU+t`;5O0(IuRKWjILK=Envb_nvvs$3J{>MI7{ycPKt$DRb}xTY{B}-t8`ZV+!~v~s
zJn>yk3b#PkvC3=g=Io==tb;;~r#nuWf(=-3^8R1u=&h2DO6jA1Xr8uS|I|5cwA#(K
zjIhAoD5?ZEesnBfwvIubdE4Ckd+X>8?G(3M^W9$a1PitGxb&k=`=q&7k&0V;={n&}
zFQjPfRi&~r+sH8B7QsoWp1ZSXph-)NM>gIg;?=Z+URMF|Vch^<K%l=c#3T??4wM8O
z8WctTNcDrv*T(w#UxkVfkJ1dF=4>*K6B5mJB=K~9iWLyP-!lWXDIfgY1C~mD|0gQI
zCnIFQE++BtG-l^InJl-$pU*FF<7|uICQ5+a#E%XBcvh_&SV(k$4gg-JkF{cK(9E@I
zC?^hRE>)?b*`iHY=rY`<o|rAlG5%a$R;13Q2dOed6E4E*`;ml{-M&H@J#%S*Z~icX
zCLc{^IhTx`B}{V5-PIUgje){($@*4lm{@*k1c6&ReK6RiIUeX0DSV8lW-#hqfdJtb
zY)J;*i$@>A5nT`}@Tjy1EVNs|rQ`^_ZR}J8sZuJoC@}vYB22UbC%T=AAw~mXlHS9c
zK!)j`_O7(}&shpsrk9;DJ;hsg4CAha70$F1IWm3WeLwEwviJR1R^ImuD`bA7*MkYD
z!12AOPtoRK)DI^88r#B8E@zww#_V>U4T(u<oJ4yCb=k#`g27BtLC0`%l@&$$1{Dt8
zrwS@_Z;`HNZfbp(8D=YqER$~Cp5?Y$Sz%id7C_|FmE8)$3FeYmcV{qh)1BK}b<W)8
zv5fQPzEnK-k__aX;UNq_xG4fk`HLBkCh^PF$-mkC4`2k+%4RZ+hIDy<^+5MO-@f?%
zS^ECx`)3<p?tlJ-_dgroiPIo~lU*FnM(@QzFiC(du1tml`u1O!aeO%l@#?1f=f1i*
z+Haw3(rll#4l&$-=kmr=Bzt``&m3Hs6N~e^SjkLI!(gBuatw?FWy?W$qc0UEa4_{L
zfgu2+gIVWA);yEjunaW$!Ls;J)keAa*z_kpbb>Bw89vl|$c;6+X-Ts}C6rYHvhen?
zmYNfY&d{{9!K$oRT_3PI$F-=avd5z^seGs-dq`S&Ifnl?i>JtVa?e%EtQN5dwgsxi
zb)5T@-aF*`DdH^q(T6BeLG~vwF!6zUPU`uqQULI6wM6QZ?4fq=v<@1$8nD}UhN}YW
zY93tu&uzG_NRCv=)R<a{j~8OxfBP*OeW)<&Uj6O2&W5;*Fws;1!aqVNP9hm4hMUn^
zKuS%ZFAWnIYCQOW<}xTO)Pw#nF`C9z_2XeUy^8xuDlM8$*JZWT7+l66gI@uuc~R#8
z(BHx*%GEMgn<N9**VlCz%~b;bg6xD|g_C;FtIyuA{ofT3#9+8K0-k-$|86}0wm$Cj
zH29JR);LB48cIz=kl=_=rXn#(9>&V}EmVgxUY}Ac2aS`)-rwt@9g$jM$~T-F>p-{$
zgG3Qc0)U)82ixhPBtb0zbBg_kQXZMcQw$4_t-wf*EB$Dg{O>a~p988pMjZu7%ksr@
z&b=oyWU>^E7%kIEfT)y&md+L^bOZvND-6<zQPP2V&XV9V<lA92SuHxHfI;<t`;8kX
ztXauo66fB#h$iUD4=0CP0f~E;p&-ZIH@V<o1jm&TOkD~SD43anw`TwgR7q1huo(S=
z`4-GV*kn_@o(%>k(T9Wcnj$Uo*TMe5Sxu+3J2*IV1-Ci%RQWX@rP;T~03wa|xxM8j
zF#^yrY&r)Uoe`i!M{QCqwpb0IX0BJwbLDV)Vj-CVYnrcHB`VHO_nto|^VM6?nAZaM
zA`E^spwLI#W`rudKGV0mwo<c3qFOWuB(}=HrGq(2=tzK*8J$!-X2<&ErXWgHy3cns
zYp^IvaRz58{<5+WaZjus8VP=FH2-*AF|yQa@(@|4PT*|Zj}ml7mh~~S?I5OHm9))q
zV_pZ8?Qls(9_&z9rUNonY#lUD(H(z02`-0$Dk;XZ<jV87f~*;Jwke)H`}RMDrMOj`
zg&aTAcIONlp&t-~48EhCV)^mJ<0az~50i#{9Zou33OPWL{ON(<2c*e^Lc()|;icAW
z-1*?X5+CU08a$<MOD0o*v_T<e7gPFDu0u++BQ0zyUqgf8WcBN+xSRxiQo#iYu)9;)
ziaN7^rokwYKv?_1o(S?|hQA*l!Cv3q*iB)<hJyuGb%hUa+(B6VGd%?I_hTsDkK$`S
zsA+m=O)w+AKImOVA?hpwIez4?C6i8RiXyxP$7K#XQ4NkTIuoFlh^CSr;~hEoB?4}P
zA*8_rNQ$vq;1kmwC1wbCDL)ERAc_b5jteGd)*AJYTk9~d2l!>Zzy_?BTUr<Up#!Ul
z>}H1}cJ7GP4f7VOWlN-<U~%LNFwB~G&(`<vfvtOihWGEyrK8oq6NjOI;k4xgLxH5%
zzz4~%BVN20yPUM@D~nKJ4lBFQoirHC9RMuQD`1r0t9-n$cW1PYz?)u<P@}2ROLVcA
zeH{o@vmgp4uy=37Z^(RCgtg87_YBTadsNlmIDCa98fn(h9xeC!p-zofEx83ayc_Zi
zsVt|4Vu2_#$Q|GEJ`_j}8M)a%BE05(1HFcwUurVt>MvPTsync73FtY-_88UDRe*}}
z8W2tya<+n9aeHM648zGTy}gs(din6tTN@bdF`N>d*BzK&D3mbUX|<CJJMyJ6NNoB`
z;Vx2fcSEZdN^e>*pRRmd(m7fTD+x5IH$woDyoAjG)6u7!0!MbT_}T6)H#D3LrqOtC
zi_J*po6M`^oJh|B4NanuG${%6NDMClk<;g5D}aE}6mL23d<q6cNr)XskZh`>??VTD
zfhwyT)&Ko9`Y0+J7_m5lBMv15c#0w-c?;F*^?Hq-)zy5F(I4WykF@%X#arP2C*LI2
zVZy<JA~ZB3Tz^Ya71@AZ2Z=oT`cnKD2JaaOO#NXDyI>M`XB1jR^ngo3ZB8$qjC9r<
z+A8wE$ICh;=j%wM5{g4oamD&_DNK3+@N<~Qn?*<-Bez7iRVZWh!oh&gt4b&p3m6UQ
z3Pjv^%n5<U9Z8aKToWh-#G14Y@>mQ1Ia8)$-JthAnRNjNrZcv{!xrG!8_xK`7q$N^
zT(XrN<v!+ive1P-J_HHMJAjJ-1ch2QUg2`dBhtbG+9nlqd{#D?$)rOn<0Lj>|1e`A
zhsbWJBq-VwOOXT^*`G|F#8eIEX&u*=xNeyPDYC;?<{$n$`X4+fQp3j6!BU6V+7D+!
z6nxt}h>)?gAB@DR*f2R<Gu^;BpMIwHyGwpQSp2y)MO8UW(T~bAn1P?f(h#4+YkQbH
zK*`F1_#EC$H#d>NwCQlo-<@@_#wOsWfWfe5d!#1FQ&1Rjc8^#NBpX?M4m}zf6~Pp?
zZ$e`Cb6e~td}dp8*3U{!hQikW&)%D_w{a_pqx0`iPl5P+qmT|wZH_aeEhEcHqB&Z8
zB{|C(pC7*#*^)XY**x8(WySHl(S3^hVi#3~z0n&<Eq3HDvDw`~0VouJ+EA7HwI%<W
zMBR6svVm@run(b>46z=e`9%9XH5Iv<=Z+ogHh{NplxZdSByo83q9v4FRvoDq02dPr
zXYKlm2LzO(e7I-D!vZm-63Lw_yj;+zAl(Z^1!yU>7x#49iCmpQb-5b23A?~@({|Gk
zOV8R|WKCl%P}L~2tZ&O&Z~bkqRBSV|%LqMP$LdJAQV1|?Y%2{f7-u}}d;7UG{%|I~
zo3lYL`Fanp%|~GchhxVdz0Ce%8OIm?YWCkGRCmnNdJ(`1?Y}E8zWesuZ(aK@e|WV2
zeu?(ql^QqqJhh+kzn;%{g^-}Evj66%XvmJZu)~kp&EMhazsO|M=L}~`)|xNu24Thy
zeX_R)dCf55y~hUaeD>I%8OuvUykuVNGu>uY{0H`qi?3)rqxYIyPbr!u764k}hco3d
z=dd`Nnu-iJkoUGH@<8xy$>XLa!eaiNmsd7c0lWjiJL9;c`h#zn$sC}(@)BDC5F64S
zTspG!3IxDPL)#w<gA@6Gwb<zhoFQ85j1%$jlFRx@i@l11OYjQjU&H>z4he-V_A&te
zJREo1C1YnfW5<4)QJacOZc@(l>;e2yp$to$;_&fr(=nA}uWCc5Apay{2;07fM2VSO
zdaXrdt2y10S;B3uMnh+C@lNsLx-H$1Q^IOXP@X=7$*-L6JDhTqUjoR<uumkYQ5cgP
zpG5HluJ(a~JPP=h1qSAH9l&uxRdjZV`aa=@J$6@qC0-B{XVV*`Hj>nnkZ=8*QstdA
z$324YX?jJF8!yUxJra=kKA}z%>F!EKZdB*vVl+Xd*z7N%^i}3k8*Gsr<RnV39Kk1B
z8^`;MO@{o`(raFXud&>0xwr`+FzN?xtEzJi4AJ!lKpc-mz%G3HISQ{^YW_G%$kO*q
z5{$U$(qcQ{Z=s$}LgY>12iP+>0-Dsh^C;6hyK}N{<Lfxnt(Ot?OTfadxEiplB+QQT
zjOr_Rd;)u%KumbqgI|{7Kwi_Y=7uB-N;D0sDtDCUAu6Nba&;1N@jL<Lra>jaCD8?+
z?h*KS^G>&^Q-fdxhk)Cu!(b$m;sA~>1ajdyJM&nKHDo7^Hc->%X@dXfP*Ky^LA6AA
zX69Q^Z+DW*71)y**aFpCeF%xj*c!_EI#k(SUx!+GAXn>yTrB~)Rv+YA3CI;8S{fkp
zhf8I12V51@YAL9-s-V^sD7DpRR@p`kQaHFuYvP#KC_?c`ZGb1m0C{_rkVE|{K=VmO
zKqm$ug=BZbsDtC0_&9R1XcQqQJD{^n*C%ezH5}NLGJQGQ)?ugU6>Zuo@EtGU3$l5p
z*jpE52X-%1dGYqnZ{X$}u+lAtmyc{-9CUvM>nd0SpSG-+iQaUl_zWi1;719of~l?m
zo;av-s3y-OnRO{U=&@4uT3FQS+`RbZUEwJ9$3HzYd~T_KMZ|cfM&aoTi9a3AVxd9K
zbOJTph3NMDmW-Sy(D2fjdd{#BD1N>ng&JJJdNsUggg7KlsL*qo{j74fW%GJUXFww=
zA`R>eLvu8ZbVN<|k^D%J89jb=ybfa7@EuqNCu73LGc=oZ$3s4}YQx$T%~1xw%%FU?
z94qwMgk$v`7!&me<yj$C!lH-Uq?&&<3i_zb2#;)rh0(Nwz}2J;<00rBz?n+!4S+**
z$@!Y}51dE6VZ*44LWUGJAQVS%i7Y$s9p|}Ixf;=@@&<3BX$Bp6Xzd1i#W$FKNLh1Q
zn2ZA?dGP3Io4Ww51ZPCd4FD~|FB>sGF%=7%j474ir;RL<4RjNaj+-9x=iX960i<W}
zHMsO0qt*vVaa$Q9!BN$jL5`mV4Ey0U<j6EXfk(!5sGUr;Jg1B5AT2OC;D!P=tjzKy
z0}h5MYKiS0FABmJG^(|r#WaR+g($Q^KhYST#-l5cr@8?-yc)cAg{aQC2+|h-oi#-p
zr-Ebd8Mz>g54ql=PMIqHqM0gg=6C0r94s~ASs5=g!1ULaC{1oPPF@yYMNdJ3;Ylr%
zt)CO@+>;Z=hk?GYt^i;L!rk%(!j)gC7N+}zV~<MdJCI79fhAg*Rh+4yJ{keoJ;#XJ
ziFKGJ%wDInE?T*48vZH4tT+p`>`>r7W~8*_5VF{=2Qi!bQ91R=ET(6BCr3L66d<9q
zdwhD<0j=#`!<wh2bTO!R@H&LC$OaBd2O=*Z21F{yDJ2>jI0-%3Ok?tnh~Au@og5$i
z$l)FAAMGtLcOlc2bK#eg=!}UF;7R2{(_veTnRT!I0}}oRZl^!B(O@KO=Rp4{B8X5(
zRX+eMm<Q&ca~-kpI<yCB_XvyBZ&hC7R>6I|D6WQFBf^wOjkevO-Zf{ycp20=PeTqu
zXa-!|7xm4V5Op$xm)JoZ^bX@*Xh<BdgYLU<OgtgW>6<RYM)-5~1u^RLu8cP|<d*83
z916#9>FhQ0X0JN@!g*}o2_B1<S+#*^f8uOsK>`l3x3xNp7Z)+2%<KF)mtHg)AeU(>
zF53|Z@u8&P7Bh#a`n<jHf2`O=1(aWfURvx!KFL(;k2j=JdAAD5Om?ei^4L6PFQWW^
zg_Bkb9VH_d?WxgO*&@vl8|*1nlD9;D;nGfwl8lUIC~kxZ=@9g1kTVjb6LEhQM(xRb
zC{iBIiG#aER<4Mr+xF#F^q6$TTmj`!c2D>P-J)N|BjFec4~M}AUFHxjDfCm1xJJUM
zA_J;IAQd<TN`i4^xs1>^B~)h1fzv?|QJ1Pn2cU<k#}Dg}PLdDKM}mye%lMKZ7B@FZ
z(rwHv>Kf)fvC{!{=5;C{YFevIYF#mBF4LN)Xi|1zzlO;}4m-)ErN+xBQ6<DpS+Zp~
zt9TVH*(%PYcna7n$xwK>8UT~%F2yZbt4vEN52P-@SB1X)UnV!*m(e(@6Yy1Z)B?qG
z?;z8qfN>Lr+aS4wJk5=9JDXWu?i-ckYZn!*qHDT+@m5KFV%#D*5AswH{`d~%PP+#V
z>w8NAsK0TjH$4XOsG_0}=;=$57;vYuU#5%7lZSw8s@@oV@SyU(q^_DBuO@!6JD0ya
z!!RIj>aO`_;Xw%{Q;noh`p6y=x{+eL5V}7dP5OOwf6B88u>**@>Y8?1Ds@DOl_rhL
zEk3dp=nsZrNkR!(N=?O<8wOR{1^lLZR>L(s9nhG36{MXZedK4a@zhcA3sv21$W6}$
zULOp?0246i2(AZ#yg(!b;S-V%Lh(+rzt`k`M?6pVniA5gP2VM3ycBI^9~w=;N~pd-
zknom<%p&vQcd<9=ET-|O5w(7AZlId}zPjk`752&BDbu|{cd%mj&cwERJenDG?oNI@
z0%Ln2KJTjQ)qC6AD3cJgD>jeNrF^BAHe|SKp;rj~y?u{sVQblX@KN1;Ph5Ey{j+$x
zlW5EzPU!Z-+0gvc_#l>Gc|VKy55X_|BD1ep&gPEayVhAgqs);INe(=Cf)uWL)4?4;
zpgznsMMYckCRH?qNYGt=;7`!UtIuEH?HUZXfDq``G#8Rd<@u`sNW2JtLWjXnhReqL
z)bbl$IT?yL(gE-l(bm>70{>m6wo;Mig6?<%*??qZo%U^#wBDVzAK)3C&HR9P`}0pu
zPFu@%^hzN5Z+=$>_Xl@q)*DOC!cYTzki1F_tXJ;eXL#b9hlLCr3XMTB4h-4kWR2Zk
zUNAzs;U+P;gs3EO@-9d)c8=kOb}bzGFyg#$)j0_-wjqcF7eH`JJD-dR&L|2gQVHaZ
z5Ft~z%+l|}^^cej4`JPaO)-T>45z|nq$ufOsE>$(rJyrjlEf(lWn+^O3oe6*Z%pWC
z2YGM94_DE7gjs<ym9b`mp+ulOec`()-GyQ@y|dUXFW3$jHsr(l`U&q~JZP-7);Q%;
z1wLBgF)E8~vlCuvwSDn)2MfW*igC_BXnAUhEOfW3gY|>qn19&B*m>LA<ZcOa9_9_9
zBVin&cy|LyL`0;NCtTv?6E%23apD3~`;T4Nrl6Tdb%+|5nJH0N_7|hF!Cw(hi`(zB
zZD!?6PQuF|;bf5F&DS&8yP->-(3kT7=h*BKTz7ToxZP^gaiIR%R%7O9?!Ke(PB4;b
z50azdaV7G^Y3Sk>n%fTyB}3Z9W=jrQ=#HA`!72oIx1TDu3?LGs>8<PQpy&olk+zs@
zh?a~FjTjF3$=VG?%#depM*&s_-PBw1p>2Den3;b!=;%e;8Km#KxbK&gQJ%}Vx2rCk
zHJbQg<CbL;J>3tL15m+oN``}li?t=KlDXJ47z0}`Qlvqd4Qj4dWg4)Mzuv$)M>2BL
z`CQT6@(hpjIs1GTvVeETZaps8xlOQ_OD$iz_rO{|PVYRYR|n?T*?*9q)_{E{zwS`(
zZ3>TPifv&rM&(xZ-x;Y>-Slv%Ue9oRD2VQzgb+r!!&${m7lH`s?Y!RYhruxHmHSiZ
zHiYs>UJo?Q9QuBR<yg`1dafAFOGgXCd2Nxg2E%(uy=ctgzl8gdw%6L9tRedXlu7sg
zI~07y>K0#;;CFX`1$OW|2gheI6W7ZC=@}kb`Un7&{+^j{BuFbOMKQqr^duaG!5C#*
zx`zPwPhMga3%K~pB|#3hnRPkA-(l}C_)x{aB-gWKsIN|lZZ?K(`HL=4h~?a9&UztY
zHh+`3FlWV1Ao#4U-Qsh;r-8(b!ksW;OE2+HciHb^--2vcr*_;ZK2|HbLQIKj&tYL&
z-z!bTU5>EQq*Nc2HD?~jX6+%MD(N_idINV^6Nj`kkFu>hS;0R+1fi+yQK*8}RQ546
zo7q&=eg1|8r*v+8-_e|3cz%wE>>K7B5u&yH12xxE(K<jI_UXybBKHK$o=&=H>#)l?
z>E`DO>0et_9VFvX_8f~x)<LE&SxPTT{@v$BS^1PJoHJpuxR;ai4m~p^f?W?uwfo+s
zlV)CLWL|&=<fACM+EyZF1@AwzUp@ripX1`pqp@embFj&y#X8nzbf;6%#BvTS$cL@8
z=pv|+nHt-p_m&XtBW=ZgT(uw9@`O<ZZPS2D;0Ov<76+Y(p^=IGR1;L@{Hs{gC!EgK
z6%~<XO2H;CG|Sv)ZKiRWEKBKii94Ul8hp))hpDj&td<p6D`$&Tzq5=HbivCUtw_$O
zCCjaP%dKY0t$E9>+2u|-J>Yh^1qq2#{$$;lSy2(lELu`Axr7=F%-+?juesGlS19b%
zF(g+{cP=4-;ua=hYAED17maHy&8;o@b=C6~`c$p<n&+z#WMeto4bFGkE$bHoSQFS+
z#mwetf&R?EU3B3tI&j&dsqksAh6iiqilQvrY?7wMICCs|nt->Ga{R2d*_?h5znXRo
zlZL?aBKu!!26;F2x>#h{`7s%H+GJlJ?Cc+P_Kw~hb`JNC^bB~vW)_-)*UBy~^{uHr
zsWU4}7pkJw%51&9Lm(p;@Rz1wV9Et6C>THic*uwitLEp`%;z=p^BTw!QwtOBPWM+7
zCsus&M9Y|ooNsFnN)B1c2qK;#SRd9tN6}@a`pqRhUnN)74K&>X;I%0LKU`sJrS8qK
zc_Y6D)U~=$3j{+g#N~^IE8E8Ip82}EZHzVZb#2?|a|NMNM!c>fQ;OR|g(9Vd*RlfO
zwyY?&c><)YyQzU-U8rJ<jczpCO7$}WMqzo6gg8yBcPAFs!hHRq)R+8jYdWe*c@$7N
zUxqvJes>rf*_W&Y$H8X>E^5<|w@!SePTW#OQW*!Cm8wqk%c??^?5|fUI`Mw=2}KHw
z5P?+93KGztE{H`N#BB*X#w0!@Sw~UORHBYcbXt~i39X)x;%*Hy$teA+R0$dUw=IGs
z#>WIdPA`Bs>+~-te+WeTM2y7$>3$>(yPsN)NIdT!x)>qHGH+%-nF{U$<@IYJsVY;R
z)CXF?lPexLhb4#oRqti?w-fr7$Zyr04j|Xfowyx9L&c5<C4L8f0J)wAImPeQ_n?s<
zCaIds!7Gd~q29>zEgLT#96o>8|MhT>f`{u6@ZCNn__@BiVSy@-X_*0JkVO3(*5{pX
zx7%|FzGV6qOb9t&rf<RL8z4$f%Y$%3$m@)I2;X+tsGNQPgc<NsGhS3dCu$h|1(ey0
z{)a(<R))5jQJ7o>Bg&lUI4X-2_2;2T8%OsRMHF~02kFx6dwt0?7>jGAm>C|U;b@0Y
z`;LBoLW2r^?3s*U8w>M=S20V1c*zRA)zvQYI!2#FM6dCC<IZ*|_xQAuCMYm9auyg(
z_x*J$rzxm%a%H9|6FM>&IkEAL;JF6Mw;Q~qjGAxAo0t$qgv5su=5sSAaRHRGL+Lxm
z-s#!S+5T>4=k#>%<g6i%vBTE~`+KJbZ=n+`AaAkgC=ObNKP&dCL*aQl0UsTqy#tqp
z4LE#L+S*zvV25At?EZc4th2MbyLWopIobPv-t3?3op#QSJ3sCGyvO&zqrDT@5cgm4
ziP-;j?-hJnne8~f!&4H2=Tk^%{<oYR6~mhUE#KWwNPi4fZBW?UBiolAnYW3dng?ec
zwA`OKZ|>luG3=U7AbkU6DjvCkS}U%HUeJXR=y^N$FPm5Fs~s&rZzdmL7{}Y1*lH2p
zd5aG!6lCoIRGW7_MvMFPWps;%aJ!AEnN+xUg1RUM#9(E<&q+_P?(Y@FswwS+xj|O!
zgr_m=gr{>NxSvUeGFpH3K1)Fie%%$_b|%cd(XVO2AL}7Z6Gh+Q!(|xtk%~QTq)-Pq
zB>4Cu=Z`isuuM2+b;|^tVI^z9YGow}J7|85ZdDQ;GE#ubxR_p{pw_(#yYDb+SUL#$
z{V)-GJHkp?JnUkAJIHe{QUDGn{c-dx#f;@<SzkZsyHw98-kmPk;y2whN5sgXL%zdm
zB;=DT+oC*z1%JLrBVaajTMoqLLWQIgRk|h^>%5iV3^2dgv;)k8SbKoEfM|d}1?f?2
zAb}68Xtq|2S_b4g=rACt%9&m)E@s-0x&2<zrgaLTd_8&f;Jwx8)|RMYC}vIZ>o7>)
zaX=6Tx8e2!l9K!{V00PwcKqc?ZUT*_UN{3#!us=1dRZ+Gp{H#}`|KE#p#;mln=VC9
zw8XpJU33wWr^SXg^j6ngM1N#?d7D?8>H4-Q-)A;(Q7XGPgNR-!O9U5FDw|cjM;HfC
z_XvadWf@o|oEk@19atqtSgmg5+Gn!9E8e3qcewT)1@E=Xx&UkCsmLY7B)pi05TRNX
zLVVee`DtD=RSA(GR@J#_@R1U`U>I-Qx?Tsvu<r)=<{Ob|sj@;~K)4AClrF<fXj2$x
zf_0jBXw92zeQw?MtsyMEso)u~3^*nTT1QTAYXGp8O6}IaY>uyfVj6vr_;TusF36NW
zwe-`Ox;c7kk?N?ly04sqP^kmHa`wG0C}=6>(djMM2<_Zr`bzOnHVj|Sx3*S*jq-Kr
ztF0B_E>;#Y++)L6Ttmt<A|NeiA!Wv@_&)HTGd=5eQc)I0VvcJo3q<fG#G(-gXo=;3
z%uN5Af-TNw`fZg-F-6_2!q*on^Q~RB5@V0TcQtUO^2@A4tI?M#L#u%)xhB(huKy(v
zP?7Y~^!seOej%=x2M6ofKsY>L={*~R_|nkF0^UE&UFC}|W8ZB0U0IzhU&h{Gn(GK=
zCdZd@)h%>+$vC}ad{gV>WOTe^>Q(m4up{z_A^qu}wm&mi%8Gyn*Et#5$D(-<attZw
z9EV?BR#c~FI&ay1C1p{%klM&C$e!Y&fYUs_Xiy)3D*l}rGN|(0ZK7a}MC1FcNxX*N
zuqWV2%NTT1=7(4JAEsuv+^U_iV4tR8-4RwZ%l;wI27R3=^p{}lhq1^l^ehkz^|ne3
zMA-sFN+Q{T`LW0ges;V%##?OZsUr_OU1B0L0PM%E%it6o_<UIC5@&Q90e(^ta-2IP
z$+HQPK(Q_F^306$G%Ges9Wo#2^ptoaTGOMQ4e_<l-_USOO!?t7=3Nwp)nm30@4Q(E
z9-B%=XLsp9Xvk1LVI<!_M{KmW?Onl&aztvB8|lCil*Y50Se}VswzuSWVfB{^=J03~
ziBecbbIXuz6E5)FqJsu2$v=5VcrNwc$RZa?%@x1wK3qLm!6a0=E-?KB-JyF&c9ca!
zsxYfnnfB_F&mMq#2&<5^v^+;<eTqOw33fMlDDVf#48MG=b0n3j1XB~>2erG@$W<zx
zBnn(38ls-(e5NjQaZWK#TS6(u`!B@3ZG&m4*<Ox%LAD2<3)<tKl^;3jmob-$_Jr=m
zAzekGRAl`dNKl3@<K3&M-_s>I#X=&NejiFgIu#D)eH36(a<zp7E<{Qq4G#Z8rVbqx
zpuKPlrh=ISp^RmOi$atk#c%$p&AsOku)dmK6QJdue-E~b`N?Djwl$A?y)kH0XhT?v
zx@BU$we}jP#eNu2VqZ?H!me`8iJK@CgmmrA$0$)7rVfVN4*hWab;{m_;Rvp41sLQ)
z0zVJO0qaK>;h5NSC)Zn}p-pRV_J`xw<3z|H!X=&kyGQx5^J`<Jm6cD;3)(`>18e^Y
zdgO^)WC0oApBNg7T-)O*M+#puc{!Jlj?eZus$iL4V<i40?oA*!Vm3V0_i9t&vJHlO
zr$mcpmPTsLv*dT<VK0JWl-A3cbVzRH{jk?hL;b-MDuf!`^n!PxI>7uvmS+QKF3wpX
z8SAu?EzZ-3YnStEOW*j(QiHX->9_P%`lCTQBXvMFTMPDW&B;(?_~fM7Ea;e2XBRXi
zi`eS!9KV+mWof2;Z|t~C7wW}c$lX*^q%5sU>B`whg$jLbcWsieF1+wf%_=)q!?B8m
zy#HpgP>m;hEvBAQm3;>HGouUEaB!Qsm?==D<Ozvbw;;Jr0RG$>634z8w|j>re|+L@
zW69ujIR8o`Go%0Ka_hc*%q4L99;LGqjc(fI+#Oc;Z=?7CJqhL2$Eh56I6L-3AoCxL
z!FWiInwxdi#i~Kg*cm_!0N(SfMs3?ctYWoF9At&7i>6x{f)++sdQd)`!r<z>1?fi?
zSl1nWs!F=XDo?H<(l{gD)tJQ~g=36(dxQn6XD+g;d0|z~T9grStor`**yEXM+-UIl
zo#t4cOQI&YJ;@27@Tc3IrZ4tU#;+zn5>k1}2DfLmTm(yO7>QgamK~#^UVH5rnfMC$
z%XrT>{j?B98%vm=_z_W31xwH|{pt3t2~sK>CpRc)p?Zq)tq-_F(T_Kb(1D{#60&&2
zr70;p@U4&!<MIZ<5YDL;(oz71EDy$VFEa39`o7o~-`F{*G;gK2JapsV#e}K5K6!nH
zTV;iP_WE6w_Sx7Cp)_x$xIA>@-v!0keJ(NpX}q_L@tE4&TjNV^kBUgiSeuS2h&kW0
zhBGfpqly3iyN&;51(563ap(nUl412ZojP^ayszIruJhYp=QEhT%C~?!zrOs}Thp`8
zUxD03=(N}-YQ!6=#^$SP+Een)DcfS5)dQ1RQ`>^{OQgBiK{Ael{z=%Id;oT-OsHd7
zsZPB)3<u|70-?=-{>i4mk0Fni(rr}>SsI3}?-%T9!!X6Wnw6_%YxPM`D|ab8Wk4$O
z5GIWM;b=0h;?R@v{V{~Hvm|&QCTY+Y{^tr$TU+3K`7>A%cs=QinNR89rDzw)damaR
zu5z9@O61}PnQzk=Vhdnfy=`t|q2o~nWwKRK%irI#pC*@KjskhzLC(`kA^XT@Uv^6b
zEN=Q@_4(z(MOS>x_p^4S(Zz-(dN!Nc2!G|#{E!2^j{1Ez49To?&WU*dfwlM}Uew9#
zGGIFu#*(l%gYMUkw+o^tgv<3h&*g>Vt*o~#B?#fjpPkCjA>J;bRjhkUCXpQL?>>d8
zvG~=(cNL#j1`UwI8^1RT^rUuieiSyMDI(kDk0(#}JKa!>xObI@lOHP2a~K4@UWdYb
z#lsHL6S{KyPX@)U(D3CZX~u3XWCuti{0k+CWx=q=kB}s|kuP$rrT|W#&N5*<p3-OA
zB%F5R!GKGo@Q!u+L8^?&P`9L21v>S5PsrUc!)91L@v#u9=11Q$Gfv^`(mQgt7&9lk
zn3;{aTpTRT5j(%6t=Y}i!eGN<j~5x}>@3e^eDTr0XStk60TW_y0}m=JX-+WGI}EPo
zi$Hp3(bV$8j^1|gw47L@4=Mh6EkSgaWajgZNnWetMR>o@@Mcy0RyFD$R6Mfk-51dO
zN#o07>3XuImIX_<j^@oXBgDCVo?3@~ekD7Ml)uz`Zr6woN#USwPaC3`PnIwuTugyc
zleMXe(A+vEK647)B|7u^SrMBBJ_jIqh4}%W`S>I9ijrm9SL?p8Hb#Iybf*=i>Aec{
zk<i<YxDN~N7UdV?vI?)PCj)}j%BC_MS8)Z~Um?HbbOAZ$cH7GzhS`mau%GV6eOc-&
z_p2D*37SK+qz3+=THnO*&bjsRWjwx8FVdz3Y!~emcKM$`BM}eB0In7=BA^UBr;1l4
z4(;R*X>rj&O;rHqFc5$fKEnMob7T{~1j%7oc*2@WBo+_*H|#nZUpWr4=V3PhXZ!d9
zy^2TZYCC3J=t{156B00KnFL_VQypOr!%GhCeaI+uLi&BhyZ%}45STqn4dBe4%Aria
zn{E*6CiBe?&W=I997uj#alFX@H4GXxEBH(|K1)lE3ri+kniz>U)rT7=!d4AdN%Jve
zX$*pNkU*fGG)#>P^kxM9FbxxoWZ#e8fvYt+<vhP=&&5Lp5t+SbD?Su^sGy&TF?rv!
z{A$7}g*UR|I;I%2SqOoAR1ku3MfcuhTx;OADULluqwq7fLgwK@-)C$1wv~`W07EUo
zxmR)88Q;@vN69qFyLyj8_q?X$wr6El5r3-LxIaaJ!R|;9!aC*;sv-^|2yY^%>G<Lz
z4dH$q<F)Mb!r+a*H=(b~aHO&}%%!d^4i76nlwBJue<Izr<o8T7WQGei1EV{vvt8PX
z7;b;!fG5L7P1QpMXM!qALnFD;5~;dOH1^l;JKG#lVoe&9G$3zFiwSGgz8qe4(dzEa
z8CUaACn*NzTILD52!ySY7@98c7crk=KBCs|&BuKpe;+8(aOn&SpVcz20@k<neYhK%
zucCUKX}734s{&0^Z4WKI87Z{Yy4y~nMfWdYG52D;E-C;5Wk4X~t8i$1ls*jahMamp
zEaezF4-*hl(JPBV{%JH`^1e?7!;~60iiaUD-RG=4XoJWp0%RVM>@XUP;$$4~N#?NA
z5dT?W=>&phL0&PuaBi4&*noqJ`LxE#yOe=)3)>_CI&D|+4YL+X!S#K<{%iT1Su3UB
zn!bQr!3phE6kNt|2YIR+MNk{N=5aKWkWIaqEbUjhmDI4(qZJjMUs6w>Ban3sv}OuV
zH({se0!l?6pqVah_5}8_lDYg+ESlpw(Pine*}RX?R`V6ms~eyx&<dSOkbj0=@dYPe
z-LrAp?Vo%(Qx8i;>58`NTR`4i?AkSuHbsb+!aO+GR3;gYv8jnww*gH@@A;OzUKKk!
z*O45An`TJBR@}fgTNW{0o;ja#QW_OBYv8V0v+mn<w!EQ@RL|z}hwN5-Z-X0aJrpYD
z2Rrwy-Qz<#i_iFxnr}_h;c5(DEkR#!_lWjovls^agmXGh_(4-}WN!<Ix*J7?HFcs3
zR#ey8bPT5=X@n@Js()mkis8GxF@g2MA?MurAc-^ZHcl>Iz!p%Yb^pne3rhNoL2yeQ
z@|FL;ce20RdHv?)0sjYGMt|7Z-D~KIO>9UVv-*S2P0R9-6))r|nJwNZ+T+!xs1|hE
zFvG58DhRc>Q%`8Ri|SDkS<l#ToF0pAVT{OfN-}Q1ml2aLn)OY~4MF6Cn*CE`lJ&Kr
zlbY&xW@T^In{7o^Q?h85b2VS;ll~Z;r;$eVfd)4pu2~};^WC9K$!MYBr`rvXf|^b2
zMngi-lmO9lP)veWVRB-044<TYZ@C7k4E7n=Z-Svfs}Z-0%UA)I*RD+eQ2#KU(fJpK
z7@b@jGmOhRl+7uW1xXnC1jC2<PPw$L{e8uUaM&|G?m={=g@wQIx6uWBrcv%5pX}{{
z>-f&u@yTiDr-grecz-Z3)HC|~#dqO<{O9xTi*D~fE|zdiOKH+w+KrPC|Jvs$AUEHu
zT5T-zKQEp?$A9@B=iim(7vFySZ!2rd->xk`Us-v+`ftmt%P(HM_&2tE7YLX@7&^vg
zhT{(IGx;Oe_(jwYVR4UwWDL3#oc<PrFk+Wi<LeO9s_%aDjn1dukIs>o;1GcQ=%Yx=
zg8pU5-?xR=`EESANw98Xx5-wQm;dA0>hj8S_G1wC*^iTOI1JB&Nsnz_@)zw(_2R#x
zA?JSE7s<8&gq=K%CrKAXyk5wDh?4<yIGxby<s1&^XgG-uQt7d982=IV`vKH{O-IH-
z)P<9ty>GMC_HrBaPT>1Kx=~eLAF%6y!j|<Sa9KE?@Vx>+ph9^pk=0_q#S=_#6Q7Uy
zCc@%j$bvDu8jnZoOG^W4x*aE%OC07BZ*8f~w}|Lsh^LU+Aoe-yAoqvAk(*S`*9B+8
zfe@+-OHYmY>w-&3G)!?Md@~z?XT<xEBI?Q2OTJhbokk8?Gm5F{sYP1=to#M`bjfUA
z1*Q8~;P7vTMU#8{-#ObZnL0|S5s7YfC}tDffLQnh<9f>8QaP*le9z_|&XWin>W0q9
zf_|U#GC&IbI3A&+3XQKHqDq7hHyGZq0Xh|0K_3AsfxZrsA)IMvcuk7VI+*w_I7vv4
z7(VO9NoSmpz;*LhZx}@0gJO0EBlW|oW3*3E0)t);0`#ZBB}`7h5e)Yu_#716uJ9r2
zWA^rnZ92%A15cboUB^gS;9hsb>1DEcmxJ<vP<1tlRJh2QY1h#gDOR5W8SP3$V>m@Q
z1By7_1`IRB?DyC+KF^?LP|9PV82%k@#Bt;^8@tX;w?$s6W!?B*6mT^c1LFn`4X?XF
zR~q!LCA-jqw%U=*9;D-;FyzOU*jr-3X^ErsD;IHhk|gA}6JA_!zLEcK`%Mz^)|C{V
zsn6<<DSGox*-||qbbr`QXhA`{3Zrkk>?v-KVk;hoAI5wi9X|UbOk%fNK<j&w!G!`T
zg9QfLZu%}7jo@Y?pP@FaLwKRNVJzHYX(%r$o$@m)4S$Y83X*^m<ySQrlefs?5&in+
zn?@5yErx5p-;k)lfe8$_vmi+&LE0g8Q2jYER8iR}$;^o;m(!!!`vKSBvc1Um1IrcT
zWwwsM6}~DR!E&CCyzOGJT)05!Csr=O^!t!dG2|WVhHw{-6MWRG(nU!slr`1urqf00
zoRfRp-1Nl$0DGUggUafrN{=jNH=F^v3^~ieyDscx{MFZD8BrJ-BnM=`?2~AWyL;Ha
zYzufytRwQc-|WSF4-H$=?y${vHVVpZi#)}~LD$%6Q80Cn3;wouBSm)OC0$BfY|-@N
zw%8<z%+G%CF8vsCV4S(bZPPeo;n;+2N{4$zH4kUlfMZO{57W}q(ad|2bJk9E)Ri4@
z$}nta*~?F`)!f=Lb8(@8*f=JA#GNyEIawCvF47_9JN%rt>S%KgMfi(OMJUi%#@`5{
zAx}|w!zJv71>&P&04P7?Tt$|V&98}CW$1?TH^hcjpYr3Sth*Jlyc&ROr>KNx=Y0Ao
zd-2UT*)yreiDIeMhjLSjh9JFpwtiLI@=X!rl0jhPzH31-_iaBE*|BvAuV5(tjn*9u
z+awABXBzQpdkgIXzA!Qt1uvSLFAhK=Xu*!+Bm0_p9l{k95y<`rgVlcq3S}cqeT!!N
znJEec{Ui)}H+sk76@t{dwY}+XvGdry#3z}!$)i}+J5VLTLUpASf>M3f*(dL1$69)-
zHk_wRa@Ru5qQ$w>X2kobl!vyCa9RbTsR6i8{oi$zhA$)V<v$+HSO53o#kVUN{om@!
zqyFzv|3~`2ER+||02F%}4r9*E{CCie&)Meh@K5_3-?dl%>v9l8a3l}DY!%`!`=@)I
zm-|OMC%<)$UuTtwneSOOVh(GaJz(VeH`GE0SE=!Ia+SI!VQb^q)DW;Dlow80fE~yY
zo{kZ5(U40ud~Pq2cpyO?Kxr=3bkS+=DiE%HcsGq7Mnec;1d~WQI(|TL$z&jIp&g$7
zyjv1P1i;G(As`8g4Dxx*xh%u$@DmF(UrhYFQJzgN3!{Eoaij87dBu(T(<T<TL1YRl
z|1;!dLh!nbCzn?+(MkBPNt7V{U_cp82{3h6!Fx*XgI=5|vG6!04N&-4#AMnf(eI2%
zhy?m;;mRp;&!g@EygkuwDb(YWK4qYXJ%FFPr~yD(-$Nx(xD#J&`h@`5rCk4L;wa2j
zBW1e9R>e9eMjd|@uY@8=y1<vN3s~*&^)leAsFPB@Z3<%K5<RUEk&+u|y=MBW%qlvW
z6`({trQT)+B=1<|C{g;TFvk5lNy7>UM=n}&HjH=Pej<g45SRRSL0G~QqvT0pNwL={
zUDu`pEW6b?s0ZLA1HkHm88csa#`<Y3hUD%Qh^?G@^a#8`*;~4U-tXHYBpmHr@Lv%p
zKivswJQ+QgQk%8K2^V?87-oGYi&ZqdOybFi0iWJEgs{_SEHW)+QbT5Vgk6Ky5IGm6
z=6nXQ**OB&A~(pVqftL1xBKyRY?=#oLu1Z41_Olv%_oY$W?HzFYDw9GRi9%E<uhc&
z;D=zU`r*a+8Eod90qBKRm6Mz*$T8Y=$i#*|Er`t|#LLaYaf&8Ajh#86hYWlBc*B4W
z9n@FIUOKQKh1?BqIqKpOIS7F>guD6t*vz(^QXjE}^BDO`7fpDwdQgF;7GnM~odw;6
zFgdn-eJ11S$~W}yDn*a8r9*}3L-3#%vjGOtmWqas^N(Q%f*ZPELDw+A-r8bmjJj>!
zBt%)@rF!x3>#<-CVqA&{-)0A(hpDm?^>1`P-ll<wW&?8j`Z{3}{MLnTK!4mk2A8E*
ze6_rXpuvkRDzMA>(s-bnS`;d%7Bg1m8@cq2cSjS>!ONw(RC&X2Y_EdCy;}rZ2h*TX
z++;QnlV7oTZ;>)Z{8;duVcOhUg+Is`XGI&c9qxtQ(r3Ut8?yNc%#LV85RM^-p15Jr
zQ5)DtH3RGVCPuaR!A5hq!hVopAghDWm_`Wvu}<w1yY>mx_E#?;t}3MUAPK9Gtme~%
zLs0AMu+1p-XQ@ZAh4tn_`OIEuO>dSmG;dZNN|r(@L#fAa)&o|^dm5NEd`*qFw&vJE
zvN#iX*A`iT+#9_(!51g`g{-iMJPv1W6$BOX1&ka0tF!@d(14Y|!-;s1p=aS=sv)l?
z3*`+)AQsA--Q&a8J7@bZ_Yd}eYp4b16DeChnNdW*MJZVd-zaEDe}pJ{^8>wmWi@|#
z#@pNN?3|wNot$+}cMkVDhdXB{`@eQh_y4iikQy90Rx8IS3|IzS+1f(1$;U-Ps|=<h
zzl!`n)Xh(IXVByA{?QyT@i2V67tx2Xhocs<{{;))2T>oR7jZO*I7VI4Vvrw~nY1Mp
zP$435X45F$s4a^1r+Sbn!xr=vWntt>6b|KF8u5&ca($=Vr>Ssh-2nI;aOnxwRE$e~
zfuR_eW;sEaD3O`YF2Hy*&0uG5;GkPl-YgK3^=`PxVAJr}Ir-}iF`t<(kcNELKAh-2
zB>p}m1$~gho)WC2uxAM`75ifEuNVzJ+Z!1-n`XwcN-}-o3Dl83)_9r7mv)P8JZ{6u
zWasrhX!S?1TZYNqr~u4MFM2gxQjhelr3$Kcbsb8xs|9E7^eqVf=95rfK;pLmoU4@E
z6&G~X?`P<P338VqvPr56v~Lz{DY8<YJS8PJ&nHW=Tkvgs;v-vJFa$zbV|`un`_HyY
z#YP-W69og1U=69E(W63eb(F?zywIpmC+DIP7c(~9bKPuB9`M`Z@DwXULEVt<)WwtV
z&1W_V-;_<lBAa;<AhiD+C!vPnKWnwS@B2TNHlv;cK(YP*yS49LEIao9mF3m%9_{~M
zto?ua@&1po%#Yq2928pbtpYQc@73}(;}_`*kiHAu_OjnJAh;FTHs?`fL?RdHN53&B
zr?}TLqbxG{H#1!`iKTp<)=mO<hUASQVHD{1O|t-9T#(nM_>me&b~R%hQ5~ArI^tjo
z3r;veCH)(yH-gWivr7TQ`o@W_W%<DI3dWBOsl%!>CH4$>b<D*MI7R;vCNcbrw{L<W
zhRWFQ!2riMoR3GFKmRKpG-NC#jQIN$zYHA1f!}VRO$M4}`6<4J3s^0XDZz#y{QxlV
zm7E$FCv_dLhC!YzB($I^g0s?Qfmg{`p|EYBRnMcIVsvHUFu`;KdxIFxMGTkJFheYk
zngo^2F-gh*c<ad^7*2xzlWp-SvF!Pq<K8xBz~q!PnjC?fOZ+8#h*YTNZrGb7d?UcA
zB$2BZhhk$3N5F=}Lmk$NXqhikPCL9*18xSiSb{SoLTsf?YG!Gi=$?rl2mSFAPtSx4
za<f|h2EjYtbc!3a5lo9FoJ?6{D5CzSz5&o}6W)tRj)5OcxKT(fS=}#2tW2;a^J`T^
zv)wzfcXZ=8>2ZN+ObmF`g1>*4o#fOdAfVClD(t=slk+%8#O=dn9REsY29}U+wvA;O
z^wXGeqDv#Ggdq|{B}D)ZCD*ZGLlh$^m<8-Vhz<N8#PjZ4DRKC+aDtxEf;bI92H*h5
z+$Pop+S|yV!ZfO>6|>kxXt)y5{s>9~j<QR#^?mcr%8E^pve#DIJ8IK1qoi=4h!}aL
zljBA0xU7r)U}-$!i+dbVxEC>Pd?`kwNnfd}o)QsqfUuCxf%x7-05911=_+kRF%ck!
z9AEN7jD61OH=T?i!dwpIw!#~3FkJAuU}y0t>JpBkKRC#afk*aa6kLW+5MdbdQlN~S
zskq#N_j@^Af;lu88r>IEc^+N`@1q#w<~h8*Wi~CbZ)z2Qf4=i1#7eA)?<n^vLY&|&
ztPayc>WWr98(*2N!s(FRA2*_m)`1;yUe9m;QI(rTdU3m`j=Pl_p;eXfLN7aMz0fr+
z^95>>;V(2<pe_{tDluSK#sH|zwzr%N(h#lR<DR(1Wah>{|770p*clfT4$j|*$AQsm
z#QPn`I8SSYp+GgWbk9XQuVGu+v~#u9xoTRQ;j5^pbx3I%MQw%PXDYm9Q9|ZgkP31#
zdj^-y(P(1T!$gaH-jWQ&Q6usHmKaitlSEEG<lRr<GcFz2)87U};eoK**<nwy2>&f&
zp7NjKtdcysI(ORa5E#>QGRyEe1~x&9wpyEp8(Z6!9T1sA<TJoKX)Kb-;*pB?D+$5R
z&?F?rk8U|_^eaMp(KBT)5)41F@%#+sl8k^kf{ouhdO*PC1h(cB{e(!&VCj|2Rsnq>
zuzdEypl7q`Mwo1YWdQ9kB2v&W#TqeVxrbw^qfdcO)M?(Y78NX$eY80>jl)zO$A;?E
ziizbbkYUtvPY;)MV{`(1)T+<2o|>I$P&3p3LQ+cW+gxDq6^T*IH~yB+gL2gnb4&c$
zbSvjA$S)nfJUW6=o?%T9gqhY97B$w<{VpSjev4v@=K2w(7Q3(c{B=;{bo-NDD86;N
zqtVJroBxgAjwj>MWXwx(ru^JnGM<c$XfJ<>MU2tw%NMTC>Eq|U-LvD9&hgQ~Zw=7|
za9+`cpL1JjPPP13TB46)?UONnDo_Mx<<_?md*Pyn;=*2yYHrNcYAt43uS(ZstC>Ox
zpD&`|9ZHnAaOLUuc&<&iByQ#W%3hwzGM9BovMZFPk~aBEiuLn0VDf&#L*W>0I)7vs
z=f1G=S54unkcxL5EPSD;SP8=?LuVuPSqr6X!I`|B;opGpI+D<}eSNz@P2B22nJ?#;
zw*np+zt4U-n5@(EOMsx<EAc<_EB8c4xwPYBw}06Gb?+5<R~y`RMJlVtgiJ?;MjU)2
zD{Rp1PLiN|!}@W`31(k}3}tKWwU*e4(C`uk6r%nh8lHkFu4kEn<UMHXscB8HTT!@T
z3D0&21~{$@`!;BU4BC{IMfInIP;|df!+%XEh(Tv8k6_>a(}zqN_N%Mt@~XBFH|6Ff
z?WsM(Cr;#p64V17gdVk0kw5OXsAvlMu_@<sAcWhQ{JHNw+7XgZdhVW6gLv`*YLYU2
zcTPj_Wf6X8U~t8DOfJrL)_`MF+T2t2HPGtUjAI@zoXX0u52lna47@Yqlu|l{3ID?W
zNA~rf#3cR#u_Yv1DRYQ{51nA%IIZ`?;U#AqS7fMWI48db5MLA9MyzKQ7IKfYWrjjb
z%}_^?Qn6mlS3#)9)Ky>@>?J>)jZhq%1;F^nFj!s*h5(L((&YdyfuM5<M`I)}Z{w&X
zgB=VnL&2@K!!|lX01BS<A#5qkp=fL{>5n7O2tpLzE@Ji>$0zhky~wS=TOuw+Nxfn+
zjn=+dVGUrtVF+Q+y%Ow`aB@h(QHXn$ibtYhfm2dbctc$n?mfT?UB|dUqaMc7j>zRE
z<x)d>5y9<d4Ad!l0iStC(1SpDF!F(1hM3_^aB!k7dU&apQ~+sCMB}zrrGl_;IRvcN
zqnLdTtuE0(9g}Wg$E|rHx9*60n{8wM^3G(MFK~eE#a+{f!<_(dK##vXdvMNUle||(
zVHYAWLH0ozaMigXmWt>G$`j4KiKyj)_QNEB$J03fG&Xz;M9AfgtE|ktDI`|dt%A`g
zi9baA4Y*0+0gQJ{P=-a%IMB32nDw|DA!YGz@K8b@5G&M+s3_gRHqT-yr7dK|G<EAY
z4EpjlG%6#>%A7)u>knPJ!&suS#ny^+`ANh6g}!=%h7P_+#J2J~WNh5v|7;59jt%zB
zHzIAZE)`L!h?f#qJb9_8sV-7BzA}RQi*YufU77c<#eV0zEe;Telerst5%kG1NjmrP
zxl}WJCj4-J2Yr~um1mLKXnJlE&5bf9;Vsd|j5Hyz=0S(XxkBfTLLcTWmUvr-$21>D
zc49ZeN83GqO~-5V_uP~vnAX^b!;7fj&mxaDMYCh@Qla9s#)wTVmpyK5niK-ICNINp
zWJ~0hldkOir}S<wL#V&rRA?3-7rm%9N0EbZ_bM9SKvWglLRw<O!EHg$$VsST1E7m=
z1RTH6Be#&puS2C8M9%EhkaWn^qGJrc6P59;IdxpV)eIAqi`*LI<0lTYm7FGKqLl^1
zI$1z5g*t9YJ@6SBPV-`8?lSfg%`H(3-HAbyALSdlc5IwxF|-C|i|Hicsw9Z^I=c#-
zG03xeul?}x)TYyEX?4!@`WaGc1>>Yewc7Mpg?1m6NnX`}Dalt=4On%msg!y#3WS3w
zO=V6pb#XynTF{xx=qkend&_6&T`jwUt+;v{4toHiU1)B&b<IWD8Sknf_#Sq8`&tN-
zWl-{;DP^i|OeIo^+Vj)q0`q4?H$VgZ=d%XC#HHcU0v0Qh?szrb>hiH#!t;m^58FVf
zMD<IU&uqQi*pErnghSTjeSy1h|H|X%@PvcusKd-<E4iGUlos({Tr16c_F>RH$#7n_
ztE4GTN>?vhL5<~#zAQ-IzTN%NTYn~}tx7~(n}912Z|k9vFp98VvL*4fCczprX>83*
zU59Sha=voN@fN+WN(rJCG586Arv8CagP#V9W?$;8pUNl{If`kR27?x8ZL2>*Q%!LL
zq_&#$8xV(!)YcW}DN6rQXQo7Pcdq;rzd#b3UW1Mhz5wgV;iP+YI^g0%n4Db&Lys9H
zh1`<ePB%!$%6D$1IhAFlj{X~0UNuS)YUs+OBFoQ}R;FRnvuoAV+jy4ieAU%9il<Xf
z|D(q7YL$19Z`kE+vqENr%~RtU5auvv#Y#Kl%#|X(sg#rNHfEdIxlHSvHOQ4IVj8NL
zmNFh1P2`CZ6*aaleJp`bRJvaInqN)~FFM3Is7K*?df}R^oU_X%Fr&4FYpasGPNdxy
z|CP%kO66->An<h09IEc$^dk34MF~bor&BHnpaQX#KT70tr~KMfUxDIVp9kGLJcCW{
z%HmLy){R+q1@&4B;IonBfi7whk)MDh$$UmsR4CuC%qrg(auK$`56pq8yV9Do0^qpL
z1dHO;gu05pN(Bl4B!$K-zH(~1T#varVzRY%%cSI5_>|@6iqL7K<SfN|c;%G}QnK1u
z>q=A>lBbiCk82!yR{0WV7`Z>mpH&296T}?mtyn6`KlN%z$}6`cDckdU*MjnKO$i`V
zXByIYa6~a5QF%p+$}#|RskjWY7m331%xi!X!Z3t;6P!Z<p9+4sC1LYXVxC@NHX>rr
zEIY{gsx3KZD!PkD_Yzl(`52}8X&&64H|K|J!eIXj%T<w}X9`=QFlOADuOx{~yRPm~
z?wCpJkYlS&d-*hHpW2@V`Ar$%!3%T_5gw{iYaKbrA!1wd$)o67wTSN5COuuH8Xs>G
zUKR4JCPH~<UH5I_;aE~X*0832)Hc-!A_M*|t)U;4RkZSDwTf2OtesUSKeW>O<#(Z;
z#aB;B?f$^upFEaeZzh*~5^6Cdp_kDmpNfJ@pW9n2FPbX1i$SCvh=OnIR(61A8Z0Xm
zOq0p5p&kaBOk~_=(|1@cjX_)9JQ|pJKR;n$<$?v1GXO>5h{)|*8PA}t3~Xk2Xzg3X
z2g;e+@WXh0eGm<!yfNl1Ab>#22a9uxI|+g4B5=NT5h1gUajM&}kgbvE$u(vt%J~Hn
zqNMg-ZtyC3TR1?8>4d1RTdBP@n|0>4H_yx~D6;Tw+4k0Ns<-rcoI|&e8k&=xxg*Q7
znY`gk<ofG5&gGqp`ukIeE#7)1BELx?<zoJwkxG%Mq6ng0CiCm=Ruw{?aglj4<icy?
zJV>LonQ>dp*X6m$eZ#>h4T}5W*M{fiZ@!oR@V)%y7Ww*@FmTP(`wQv8X&tFdYc%D>
z0H@u?DK1B99bOpJC!3V2K)#FJ#SFUwrz301>o2@|y8>0T{sOdd7sSwPRi6A`S?~dJ
zqrvox*=nqOY%PEKzfYSpt?=Br%-sM!v-$iL4sWd_68sj9aG5fylw<!l5B8T^kNB^G
z|KIZQzfu+d|M24faR#c*z(4Y5V72ND{E{;Rd=4J5Ux@u*^0B{0*sq&L+DacgQ~u{O
zkYnbZ{~7WDXY)TVuRP{|{$le#Kd+bnISb{*GXTY2hQpX|F#jEN<8!w8JN(l=$9L_O
z|GFFmQNP`d2VM{>FWBA$A~vv>N%+1OvrXP@_+i)k!c$M5?VTJIrk=J6)J{Fk^aiT7
z4kMSxPI9?oKd#!3Yu<F!3e=_nlwc7EtjV@faR<l?DHku`H5y*=Ep^N;l6WA2Le*`S
zMnf0`$6}CDJ+w>2V~WHJlS~ndM@f9%4+kQ?z~Sl7y9=IVkV!a*--n%`-zWG&^S;9y
zNil_-r;hB8D8uXgI!gW?@tt1$yJ4r%_N3tPz{nbnC|P7S>7l1`*1Uw34}%ZY8ut}c
zAgXo2SCEo{(==hwg7H;6xx9j@mJuaQf&d*E5Cu%)Rq#I45nYWy3k0o^h9R@bOaxk;
z0E>bqETXA&47s6Ye(5XTMxOw8!3KMM6%K{}LOSUacK~|^Kc6qcB&3+F9;$)Do%lkN
zc0<?=93}&XC)L0zih{M2^w(SnYpP6B5-2?*ZVM8enk$FH;D`w>L!?yW_r?@3NSHPs
z{9U2pAEA*8AL!>sPT5tn>}qk@HM8tmL0PfHlMpAg8*b`;NNjdWy5<A1ngy|16U15;
zgfS4>LD&!d;4<Ya&gkR%I=x(kt>rNIJx*To4e=A^yZpq8EDCem@mq%ZOnoqj%E}6?
zn%L*9ZwR@>_nm}_Xn2{#lMw@kwsRQwCVibDLnzvR6p8N*iAK2knm%*S05}^Hbd^k-
zFYg7&Uq2f4BN5K|IyN8~;?0<}=X#4#_>&pORCa2anN=ksHUR)WQJl!6$%qmN&?&q(
z+&Zh?I%&vvh+b}`HE$)(lq7Z?tw5xiP&sB@QLzlMfJ_6o7GkfGFf>uyHZq`VuQA<I
zXR6W`co<MzH((D!;HAK+hMyx~CVrZ&LEXh>z=M){pxk57uZ@_ZOs~KXM?~Xp`8szV
z0u$G5kS7ISAM|5Y{#a|`=%H7R6sFL97^h=3Bj^MoT{w_*U}V?94W%w8e>LziLl3kY
z&Pwv0L|q7z1a0)<;n!nvOvbp_DgU~CCQ>wKh(yF^Zd{}X7ML`WuNkuMjzHk*Hj}3q
z^ubeNO_vaPVsvK~g)@XdH8}t*Nn@T9Z>Yj<c81x*tsM<t`L;2+i2Qq)&TO9D+cvG7
z*3KV?3#J~aj;K~!MKFrbXY90f&Tr>JtCC1v=20)?4;&6!^nti&{=8AA_Nr5RwN~vl
zr}mnkC&{0-nln^;GElvwGs=?y$N%3u+275{D6hKITqr}uMJEmWsG##sL;-J-4<$1p
zQ$fj0OhqU&^KHezYI1PJz>0Xe95)xAj?$iVu@S>47g?|(_f_a9e_k!zD)b*!X^mG}
zt5r$$gfee(HdOabxlpaML5H8PD~$lWtMK<~BUqKc*BZeZ{+3DSF~6pejXAIC<`zKr
z34Ww@uZ)cLwyExP=WwrcxN~;0|0_9ZYALcgMWr=dT-X_KjgL^cp_5a1dlz=6(J+0}
zIv}tX90YZDjn<T)R=pb#Wem%Qd3ttyvh(9!=kNPRuR4csPR{^iyFWEQud%PqckEaL
zb?<OehRK*yy6_qUpUGfVJ}km26<%cGi+EUE%r<KSS<3~oX8YdN*iE$4N<*+-=CUL1
z7Nrm?`v=DTe;-8s04{Ct#mEs2zq+K@LC#e|iAVe?`h$X8E|9M#Yh>o6oNutUPySB^
zUEW02ugGuSL~s4JS7&Nl?t;>{54Suqm*n+_NwRMXEwyknC4$yw?}ZObs~IeL3auE0
zKGi0Dv5G!(()TD}?~L?|+V+#a@J7EQ(yxk}+a>+FSX#|s$xHgW5N`YSEs%`*h|y&N
zQ+LCKWdD+(6=Cqtiws#fwN~t&Y~b3vmG)PS_E&S-U(IRX)@n`H{+iMLT2A|GdE*ya
zK&3mAT0mPZRiP&iqYZ4brz_$Ux)*?fYs^Kgum?1$CsNKBy<9b4u8Nmy=F7E>Y$uGS
z&`X`~c2eWI^Lihwo+Dt0!{i|-lov<_5lR`okT0pn9Zn6Mb5KzkEz~kl3X!g=LL{%<
zY1$-J0bLX*b0=~ANi3W2zdio>tpC*V|9LZw_fMJs&+>QApMUH4|13ZM?)jtt&!hj(
z-28vOa6g_m#a=vbihOuji4%`LfZt?}-(N=w88kd@I->NZswE4v?FD|UilPd_$6|j7
zKYRSfQfIo|(%uce+OEfgr+EcO@HOJOjy2w@qLjE9K!#V3vddeA{HA&GgZYG)0&2`x
ztDF;hQ)pO!bBkv6iEC6h4ZoHJIb%<MGn+MCU?*eawgclRtFcH?*}suM?Ve9%3$H%-
z5{oz3sFjeQLdk_6g6_C~!&dl0A5F#vxE2R+86>?ve>Br)Q-8Y%Gv+T7stKDW<U~t_
z2bBmnVVSw*D4xo6JH~j|o*HojL@w*7hiR}81ub?SlDnNS%ku%k0r2b!;~4Rxh?}7`
z0{9e(C~PJ=C`CABCl-1Y7>>o#%-xh;Xh(lj+loGMZ0KYHe^bc-UMuBi!6u*G034gO
zv1w$s{Z@fyUAEwum@C<mmF%=2CA_Rrwy<AyyX6>LtC?HhNMozFlQ|Yn-9D0W=Fq5W
z!gk;gfIYQvHWrLw7&-)>`jga3f?h4;3c3_+Eiw*XnibzTroJ~mLm!Yf|KDfO`&74s
zKJ$cDcJNlD?pH)(s0-QF{@Rdb%nZ@#>Z+ZIPq9;{Zysb<QTO;dnWONk-<wK=X)#Bn
zSsgkwnuG0I9l;#??(MauXJhvA76r5OT3>@UcYbi8npUh#2q2X4EqprVS2ggKqO0Du
zsW*QKC0Bj@&(BeKJ<t1JE32!|S2Op&R=#`G|9px1pZ^5?&vHHe&tF1$^Yi}RFNNwG
zQK0rEEWQ83%C)wC9c3V2g9DW|Wlf`krNuD;O{K`823$Q(7W&L89W$_kIBNB)a>Wju
z{BC)r)$svx6*~v$ECjlnEl$0iK0ewbq`c$bGhK9@Ew?P)kU&hw(-o;EYM6SWKi$Uv
zWUr!xFPoh%L<Qn+IN!}l{D=Kt5BJ<p(pL?Z#Ca%gI`zPT-oD$w3{39xb)1~?rygAD
z;$+DmJ1IVQf^kQ<J#;|8wg=Doq?UAqkO3+o*qZX<i!Z)T>G~c#b#UQxsyBq_7AY!#
zg4ZReCt;Gr$vPhc<=9Q&MjuQXEMUcolh#BieIS?@X<;a7u;BmIxZIm-IfGZ``eOIU
z@1b7}$FiUEXC2FW+iPOfSW0OaUo<MLq$XPucop%2o5GXrnD2WzLyh56)#<z?6x8d(
z9I#TXYc1GYt8&)>qDbEg-)n1JX@kk+!zk0bni-S$qf8c)(YVsED=1vwtIms>8b29*
ztIV-5vx+ru^g@^r(4cFjBQH_6CSlO$avo|R2f-yomw?{`N15o?C2BQdiT)3;T~7yS
zDC(E$as)p5^C|o!-Fmo$BBh_b>db{vN2^!V_!MnVuiS(7>#6swP7LE=2dA$y*5N^>
zQ}<bLi!^>pj~5Mrdv~yE959QpAfz^r5UrGXf^)NUd9-}sLR+aiZ~y2gj@dKt`riNX
z=y31ojEmfRCp%}yC#M<gTF&BS{>WP8A|l88bbjT&of$AwDfn`fds&RmU1L{850}Hy
zR#(LlYNqI2l}j*nd*_=nSXjVDc2r13zY@-EOOI_qiz&0{;4(XIO%<Hn?`kXIEIIBL
zv~Y#+4K>^x4k2^DH@16>=)dhpOb@>4ZnG-#twK%CX;cA*h~HGYdr#jzfmutneRR5)
zJ*kYygH!CifO4vuqeK@+2r<h~SuYp|=2Bd20O#{;TSYd8{GGYh7Xgg683&}Z_gH`>
z)MBtAV51hABvdPKxqz=jux59fwhiMcbLEQ?z9UD{*bouItBfMnLfX`$uwl%56w|BT
zg~IF6NAn@*|5dt_y3U`a`u`W-u08+G*8i`3_Z|P|QUCu1>i@qZ{r?P++sK9V(S%@}
z_Aht%rs>)a%rBLV2K;=_TvlcT7B#*S^bb344$cItv48Yx@7K=RZ?E?#&e}ORvVpG-
zXm2qImqdPVG8o*Dm;*9E3R<EY7DOD~eAt|0?!F^M4}XFe#^t#7f=J#Vj=%SD)T0_5
z+?wz>?7&T7Y`8)9z<n2sk0ygNI>&THNZym4V$IgsPv(&5zV6Gt&Q1rIq`g=8mU9^k
zpNf+#04B8-Pi>!-6?mwJpvthkKr@Y8BI%~OkjP0yO(j)RkR-uP>eGk`b<;VfhC;Ws
zRK~%hN!74pTnH%>v1nuyEdibK_dsh&2z!7f(V;I}vyH^VXKuh>5v_qtgxS@sCdlaz
z$ex3dDG0H{?TnUD@UC(sf}6n>TP$o}wp&_fM;f`r<1Fdo1s2{pC<n)U3OP8jI74<7
z_D6CtkNT5K-n&8Ay$Xg=I^d0`UB04u`B<bb;<6g`mW!<HEFMK&Qp`nv@M-S6MgS?`
z8P}22WR5^T5QB+@cQ(BloX34mESa_Y`ryrv`$x83@a=g&7``*xgYo<ng?vBmUfq<%
zuw^bu+B?eYos&~e<}ZKiygL5%H~z+ysIby5@Y<;ryOuL@bl0$~WQ&YTn0<sSOq_zZ
z4L%Td&Ag6eL@`)lX#HRu9O-Xy{=p`MFE}Z@=U*`_pPBO#wY*7u4U|I3D%yf$Hj|7X
z=>4%$HskjtOPE6v=evhlQenjgu(41k29D*vBm)BHM%I&~*PW(a<4mg3F{dF%0|ZgT
z%^bE2UAhM`r}`-^2iYS`W7rShLrB95nI)aJ3<5Kl^vBVtui!~1U<FtGdCkwz$ryEA
zhf0^ux7}eniqi;vk_|H*Xux>~0uCO7bSR|qt=XEE4CTKzRuNGa#~cT3LTIs5zKx`z
zR;P=(cBZn%gye)NB!WDn^p>a(^1G3LZ4o1FVeFzGgOarua{{L<Zh*6N+*@CVgRmcd
z*u)~+y!^P^o~rj0?NXb^g*bnAy_U17W|b@|d?yb2Q9wr>w0f%Ja=HT*pU|>{%5q~M
z8D!|gUL@*TUx%z+Mm9BDH<P!&nQE3v3VS%2WqPK9$NZ30IcCd%LfTie?|F;)p?naK
z`d;3kwHCJ<FT_fv_^5MU7uS@wGkNXY_w9fFnf>-5`|2k9>W}QJYY%=<f02D}sAxQ$
z)0z)2Zu35Igqoi+R7a(DN!O)9zB|v-2ty7c3%Q{#iWYQEY+bRYH|3(XGVLn9`Wf=o
zbOiAKeh1#&mSo6!$~V~ee5^ggBL5WF87T2KP|O*!`Xmiey@*7hcf<I4=r-H3fRO<-
z4V4H6U`42fuE=KGwraTMQJHGZd`E^FZ;H%Jf;`w{7IH%_6iq=+gyftlkO_|edb8lk
zUFx3&(L(m)mKp5>oqt@nkLgxQjX!nhfILr{8FHX`wvwru^kJT`QJc@DRlq_&<!Iwg
zr5PwlWi2?0c@xGwMi<Hjfm4MA%}XI|oZ~8aXN?t<6CJz&dHG?&ZNw>(w$L{SK12h)
zi*nX7#eCq-mWLdylj7bQ`T*5Sa8(PJj#Je5jEqWh9`ONSBE2jA#c9o6Q)8>wVr}X9
zu$Gfm(H<`6<}W5VUYI2cE)J<(CjnZQoqKSgNlJPl&V3J3u@AVAZ9(S%=q?{Fd4)H)
zV~C2CUW{O6`e-ix4&nqA1#uUd`uvXP(2FmMJHE}Va}%0LQZ@HwZ-et7DY=E;8CA*E
z{76{LPeOijt4vsPK52^gvbmi%KKd>=a;6}2LtS!*v~HY#j|89Pf(i)Umn9=h@cZe{
zsMDSCbOx{9l_IMC^Uu1}iy(8@CCQbF4E14zNFQg#n>Z<8nB>;Y0eZO|XaY3Ca*3?8
z(aa9lyHP_8#W_=J6Q*!e?S(-frW&=#foSTP%zcJ@7hLh@O{3pujQ$N!rU`w9S{78)
zP8L_^1)6PRX65C80<>chABs`GXDwOOv0WfY`kUe&41yczFZ?6!16Za6NCWKN3eeaW
zX;gSo%D;1k!{J-TX-HncMx#&q5&6#I<CAs{G0R&HH0}ltuxUrhXo+zO+Hv=$s~|;O
z4-iaBIz3wUCU(im-g8tyv))D36dvuFN|oIE_0S}o%z|l(qM60XPXMdeUMGGdzHZU&
z3?oIgyqZyXH$a0e^w77!lB-CO9#2EPh=#q+d6<qnqoDgP91F!>E1UCPI7q$5Vn$vY
zl>ucxeccbD;W1jI(B8JimaTqB#pnpiSr(>loNxjf#ls#NbYhpRY;%Iqj;qcjS$VU%
z`Zmi;%qI3ur`wQ!b|&L^rypGo*`J<ZoX*a(Xx>TsUae71#=Iq>^;-R_#%k3(hK%~#
zG;xQ92dIZE&^9_}s@MUTk3{i+i2`THu=*GVIBT~6Q}h%BvS6{VLWwsF!O}&Hvkc|q
zxr_q*Y@-Rhi_S;JUmM*5_kjh?S?55Yz&Ye_sob%VwPn&j0D2{#z7ocqtN1zuqwD1)
zNCJ*#rL%l0<%ojz1ZQV14K&e0AB>laZNE9zvtl5r5n3FB{$@6&e3d08vhN9BWB$bC
zEzJsYow0+o49ZcblOEGXp7XroaEI~mk1&aC*^Kxy^yy%ugcC*74UKvV_q;xGc|EIo
zJ2x%{MV>aDO?mVG;LUq3M}Ul6bSDB?0W6gz@VA_X&=*VHwGtthBmh*2ue3!Gv8)=r
zGlD&C+98^VkL&S8J2b@9r^J}U;xk;mv>{p8a*u)S<IJbU20kH8Y|F1@CHI^KTgDLF
zq+BN8GUE|eu%^L$it7r+Q(`*)BHHuae*LoE_qO2XV>ijCTHL2uXVS%5N0!c;Xk;bp
zPP8<8$Sc0pr#c$m%~Tt+Rd=E-Dw%GsBPn`JGuJeCv_Pv(ySV?fZc#yI+K)jDwU3k~
z;U$Krf*}RAP+UC6C;+@4y$up(FJh6L1tnUcn4uzU5RR|n9;Q`O2&YXH1O^(7NbAX)
z)bGgsOkU?EC<LDpY^3@K0#Oy)LUCX0Q+<lrPZQaT4VM`{rYj7g*H1&|@bS+bs((bk
z#Qqsx!z?zf>7&ZWM+d)k-hjK)5634Q)XO(NHe~S)QPN5jB0hr7ByWcC&*CC`qCC9}
z+`-uB^zM!)7_SagQrECrs7(8bn)AmgpL)vCB4pevtUC8lPfdJ&F$C$Q58Sa7ap(*%
zP(#0`rG6_!Q^BiHoFB^l_{6-rEiZwiI<4Qgc^;*qLyzepI+8h@pdIbd;QT0HWk*`H
zJMl>R2DJ=kY@16TRKWDu$oBbd4?5jNXIQKMLUo7zC*OFEF7y%Zd35WDM^JPAYa;SZ
z2k0D$gq-p9${ibTh?>x&YMJ^EXNae&xr1nUiWQ4TJNpQ^M9_-|Vsy7QuoG;B{7Wki
z%_EhVZVb4%C=du-k}B5WXs@&C61?zCYt<9I*~&KgI#Smx@1klxwF(uxoHydj`)#1&
zyz=$+6!wX>=J5hs5EP#!Ix;TD#{fzB2-ge0C#o%~&<tB5?19>uHZAar)bFMtMx9d3
zS0w?{?GAB&MS-==Je<Ja!$CqW!-#1uL!sIlFTo0Pk#t+t&{!i%(@Ds#qA@Lv%WuA6
z&)X}_k^^K0JUAntY8r~0LK4fOoGso~!{6Ln%g^quW!W$-;+Ho+V`wOc#xaJ1sVVsX
z{$G9LgoreMAtOP?5cgSIJh4pK3O60c)p0k=s<$!F5B|8>!SKGdmbf1(-yR3S{xBM2
zwCB+9#+QiR+++>&Zt)^ZEtDE#<ck>0#1c7gy8@B!*e64A;Rzg|cY3|B*Ro!nU{C`G
z8?R6Hc6ZM9UKw#36iUGAb&2<w42dsuomXjGrk{j5=&Cj?@>G=W{)QK|dgY$Q{=RvN
zL(C?6CuPzC)nCx-5udlI`(oi)U!zBEuV(l1>X?0`>Dwq18dQz_A!lT|IJlg1LfnHB
zA%HSWs_t0;^1W9IX)&ApDS_q2{2C;I>ed?Z1FizOkmE%F>U~3(?TjV%d;{Zrt@{eR
zTB;4Tgj038bW5F>9xL~TiK-3f?2dFpPe~fvIcw9VN?SN_K(tI%K5e+-8%FFZUWg7X
zb<97g&YNWSz&l*r$?3Kr2DXnpwa&)uFjKATn-b2{wU38fs-5%5445;G4B_NY8lIjP
zQNQ1)bJ{32D-Fjbr%E}1+t{A{@aG4!m|o116945TX#;Oml#Furfb%%+vnbsejgt67
zCAQONgO!4myyUkCVm(Bw^V|o-padQG9#KG?VxEne-8`IhuTBR+zaJ)NSHZB6Z8x`z
zx6s#_Fzz2pebmo+Tsf0qk;z$Hw1!)^N)4hT_uc_q(09-dZU^&g$8<8l7OsNyC^)j^
zCi6Qd&>tc$ts0`$topR4Th6RWX?8}qOL=fBi>;k_*wOc#ead7|<QNL#u@UrhkAL#2
zyiI$(c2W}Be2PRP!h35iQK_5KQ7rsUAijw0cNAnCqNH0hnvzV9`=$tvBO}D1TY%{A
zf_9B|T#{@BF8SRDU|qyyW^k2KE3mXC>pDm<7{^QKX#$qWJ}GR4Gm<pr2NMnyA|JgA
zZ?1VOEy#LC@e_>5C2u*2z7sJCqVWYM)3gIMJE9hb31M%;VGjycvL^Z>l-iRjyP9<h
zfPiZ6L+D#Pxx8YBr$6s%>)lm6L{H}==EKLug^uf2;n1~$XlUpPrY<oEZ-;Aedry(v
z!~PA$Yj?qd-@`@pK{<gfQJ^>xT!POr^sR0CGE^h<hEjuBmTE=l>2)wl4aKLMgH9?)
zGsHZrUK0rrTCb)y^O%I~h(%wS)gV%l<F?#7`)U8Qv-j)klfBc^{o|t!e>yro>+Ez6
ze%?8Fv)AeDpTWD6y`5KRM$R`GH=0{pgcRFktl!g<f&jVhjMQv;PDFSQZ|qgm$OU*3
z01Q*CF{j1z>3Uh-!K6CauP~{1faV!+TM~a00^v$ygkmL!`eliZ(+2iKoODBU4Gw#x
z>d`PSGT$nKt_#%RmXWX68wTh7aJ$$IGB=j8>0?TeS_PeINmfA~)!9z&Uo`xzef7@X
zRX1}PFcGLFoeaw6QIs5n!LVY{ae_+n5*APt<}yOBv}}#|IfH13u{_H9S_dTfoLeAs
zwkuRIPy}HA>`yZe1r%cnw3zYC_n{IQbFMv-Xm~h1^56(VBi=Js>^|glLS}$1I@d%S
zN!Xi6eT%Fn6s9GO{LJM#&dryeo$sMsLzMP`wuF;_{lbrUh|tLe3J~&dMC3qnBoenL
zMhO^Kb?iUR5RlIWN1%2Gz91-^HMkgzv!V-JJ{0lZjzxpOw5IVOlnISfk2kZ95+`cO
z#2dQfHt%5_BD#=uR`F*s2@>iebh^Or*8|Ap4xn<e%l2d#{cB={$0MO_<-<yq^qImk
zXNNj^Z!!jD{B7rS{~voD-kcqy++mw6M@G0Q3_O$NunaF1!;}epEdqqMmTh!MrGmYe
z)7#;V{Z?wlC!7IuR)rAe$psrM7sn$N@sRx#C($1e>$1NM*PGz8E0{|FA|Zqc#xhvj
zz)<T{0@2^4pE*h)Pc^{k&(<Lm6#w9X=Ud$-M0TA7qY=5w6MIj=j?lxZCpi`jO$^>#
zuvL+IKqoj~(2Mga+oMIh4&Hs;{-hTgo_w8z(ADmaMxyxfWIURTdFl6Ia$Zm)o{X(#
za0j5#!b+pXZ;q=aE*(txn+W7~(eG4jg6Yd74tgLO@sTbVJH^rx#wSLX5r+4^dlPZ-
zYWK~n9p#tC&L@|}tg)X$m>812$YZY>f2|yOJKJ_adk>5l#Pro523f;L3~H2fC5Ga;
z%`7)fk_B^{sa1Jy3)<UQsO5l7p1`^8*JMp{w<V9qfYB9Ufhx3Q)_`ExEi2W^w|t#l
zg+sYvs+W8HG_({C(isEtL#eIWteUAoxuh~`CZo<a9Fn2DfVhyc&*(}f@I}1yK$Xh)
zKeMqcpHnaz<^usv&aMr3y0N;_y48qO$-^*Z28-fhW^i9LOy;ZR8(PaOz&r!>1Crf`
z;R<i~S?3cN>Cv`f)M5S65$7u#1?Blh|3g3*`QVZ74fI$f3rO1$q#gq}5@i~e1_L>?
zsF=-&$-u|z5V5TY($!nm7O$l9W({6JHs9gHga7Mr4Vk*WBf~r);G&HhZi`tLa3r=k
zoEHCoTc0rslk;UD3g=1&T~a^HnNqv2Bn`C%(~AOGrb<huaAP#0!CUOTJd94@M!FG&
zoV|?4S0Z3E@6EO>P|OBPC=Lhg!Xoy`575IcxdmB5Z=|A<qD|;quhcpe5rnPEx97+}
z`Uj!g%AZAl&B`2xxI~B5O>bByGxnR46G&KfdUkxW^W$CzgXQlYaEVgJ{x;&<+j%jL
zcRMc+j(7jw*?INq_=rp;`$tE6C!N<jM|%fl#f_q!{T~{;$49T43xC4{fi{6B?e-F-
z4M>+>_T%omL%P}6zIsCCE_~@bD9}F^(4Yzm;pLu%zfH_^;<-4n%avoGu(XYU1({;G
z7o`D)KD)x`^D?Z%S?o#vAcABK!DLLdNV_y<Y0f!-Uxn#-yF63@k`Fc9fGUfuq8g&n
z$VP_%04;zh13ok{J;>hAI|umJ*)g_%21d8jpN`)gyy_h7{milRk|TcS;6Pp0gghz7
zFaLXQ_w3Z%pq%W3u+m?N3q>kgR-Clit5{w}&A%|N4~&LweOh|T{u}J*m@S1aqH`AG
z;u*9MrQ>keRiTD4)|LvjC2suoAejQ0hkGI4FfKt9llg&CGJl`Pbeo>fA*AdW#!=8k
z3wjq*ho^8$)ST#Ee60cq@&WzLToahH?ucU;@3?Y$|FLM&M4iLe2m5=c4PWq=tu53Q
z`NFz<wYAc6@4@Fs=i)#ztz|&!)-oMYt$k%S*DUg)+CSn{w*N{k>En}EoVtLf(B}Iy
zCn)|~n|_<()}KjY(R;p*)^mtP<$%@H!+TiZ?`M>1>5{cw&wLFRTg#gpHFL?C36LHW
zz0D*4n@)^SD+^q4{<m+xTU~u_=YLyWS$?thnE&ld%>VYHZvM9~J9FFa@!k(VoaX0m
zv%Xi(;^tA(Y%}5}SdMdHC%Wha2_NJj97kQuw3P8sv?6JxM)A!v6%dP^8Oo15N5(V9
zRgto|@ZFuCSVk;0BL$k)0ViS5dxqgXOpBF>mcZ#iMkX<mqO}W?u4S@}of+A0WU4Xj
zg;n}*C%hxTAE_)>>h=PqM<T~b-CgRUFA*6{I|+AhyG*B3fhRf?l}99u>Vjiu*h^a?
z^q0tECNn-kiwr^^MI1TU6h}}3H-gcu=QvCeZd7WqysTZ!N!ZoOFG-THQFK9OhMG2E
z*@BU<%+q3KBDQrFGJT0#0W*_`%`G3-95OK#RHsO%ow4IuofqB|I3Uhl1w-PsDW_7D
zW~{WL#kQ>U%u+Kr!7x+`HBOckWXGx(vIAV;5X2Gucv2%kA>SCUL)tAbF2V#iU6BS_
z!jiO{U-dFHJbqKz^jHMLOUnUZ^l>=?sW!|+H*E0jKOVtNT(Ov-9EExy0niyW?t|lq
z_CXS{D398|CK1HiOCx~#gqq<q`lKZvC}q|Yk<IHmf*3ae2+qPW8HFiyLP&48lr~Uf
z<^vUCb}!^BBT))Y0r>0*AOn;>XXLg!gndJ8)*`2;a0YI(o&Gi7BaM+hvAouwut>EG
z=b{X8-V#G27XrRd_HQs&l8yl-qP{~VIqw{W;G5m+CAeE7l}XH#q<Ah~CFJ4Jt`pr6
ztoG&MMireF30@Wo3d+@qH)))gY6*)&)7Ifz3*6tfzP9emDOLY&fqndM)*5iZ?o`})
z+PZRkpWVAf7Pk54QBx_}YP#bqU3VtGTEhs_*fQhz6(~+YHK0s9*`VI!_3}MiUT~xM
z83v|)P9<zRzaqs=cS7-E=bbt#0o<qQaOKNIH#4sChh2cHd<0K}t85({PR8}>^eRr2
z@_wCf!^5w~s4lfGzO>+#b~VGhC7LZHe6{us7eIBhgm;7=7C|rTuyz2~GE~F%JsS|E
zqCte7weeK#wip+cRD2n{H-mm#R?zD=YMdxA4x+Kjh8rg6{YD1?1^(B{#xBTwq=1a2
zcLXND3P&M^X~~KLZPHyT%wlay(xKxLtH^)~BJ%c-=mX>DK!R9cxZk3q52ktGvL3H|
z=?QPTpRd1!Rz(u+7A8QEsG(#39Ii3+LWr68PF}Xs&Wm!6fTQ3WHpZrkYt4<{d9ko=
zOS5R^NAbF07M=Dglgo`7kPV+-!`;_&r9vJ;#@!0G4MF@D7jg17895B*pJ#OOLREs_
zo6Na<p|>34GSbA#^lMnRgr^}40s21WZt2C&ted9lS#}E+a0r5z;qmU=R<#mGQ)`iq
z$wwDI&@Z{zoOH}pXtu?4no&WU^P)<(#mb~vog!<Hpe8Mrp{@CxE((EncCO^L_@$jF
zaEe5<Ow$K?jO0y4@nUppxkrb6XtexC$84)qIIHOMBXS!lvilb8>w?>3ix7G!3=+~Y
zK~kXtqaB*&P;<M#yAKBWo!#BN(^D|o?fjk3W9Rs2PrQR`nHbpj2g{6i+SxfcIQ|97
z!gzO1MDb?f=||Kh_Yr#96tVedJIn~5YP+eizhljutQI0xGw3MbPDd!hHecu);#Ndy
zRDG(*>6WJ9DtIr~y)#c0Nw-zT4t;2-pXyeCPNDIK{(9ZG4dQk?T#v$ytHYik@x`RP
z*Liuev-|hGv(EABy^|d<H=8s|7U<IsM5gXQ>DN2pgYwhfDU?z*j!w_^c3ycecGRI-
zFE!4G7~!b88OFn#K|Dc2LeBQ~NXzr0WyMnE((>)liG2bZ=v!{K=QYL;*&HbibUtnV
z&+<PvD)nY%e!aPA)ZQsF>D0Eio~=Mq$NUo8TdOU0{DyB7KXguZj((J9oI5!Z5i9?!
zJ~gZJL&hI>gp9RX$XKm`jJ5e8<NA(}@w^r?*0RV@f!g)nXKweTgU;lw1>X(|)IH9Z
zG)3B~n|P8iO3sx==TScz-{`zmF$;(3BoP5_0@+>?Vip8!CZ!Y?ZTCDN<L~=`OF`fs
z84q)Id({}J#S3<Qusd;K;vf!shaksj5d)HLpq&c_OF3_O!M}++`(X5i$2S~}_yUHW
zGLA3sp@{I*qss?YZm<A?Y>ko#oIdC*i~b;+QzTpzK_p|c5n+l4nJh?AI{<Q=M1;za
z3#T7pQ41;Nbb)*$>_jTVRlo+5{x}--l|@p_gOzY-$#YXd;%Ym0b-2a|0lKo(Kc)Fr
zxO-CT+D*K!g6lT)Sjy8!B-)fXY-Reqoz-@EGAy_uSZ11+Syf@qn6{_Yip{DZo8z`d
z-0cJ3%~V*eQr`A)m%H;_?n_sj+|3?+8)}$3dvQkWac>zcq4TwH4_em_*Qj5_eDzPr
zINolzpTH8wKwBa`7p6dQy@4g%mwXMN%ZicA%iS_WbU@Qh3_}GfAg3|rRPXf|#*S35
z(4|EN{^Gqw*T)ubkG3)DGw(8cg5E8l`n~{1QVDl|{583D3YU0Q6ZHjGQD3<E83+Nt
zA?aFp6%I&lj}#pAB2ze}%%$WNgJpQ-2rd^w#4d5|0mBAjpCRoyrJGmN)7HUKw1d<}
z_N8^em<6uENsmY&?|p7v@Pid0h<tAg>v@{guXAfsW^n6Oq|=pK?_-^B+<MOsZO>bN
z5bL*aXP}Bt=@KI8zMV>KH_Kd)QqkFJE~St@Q|R+8=h7NJrPN$?r&6;k(U30-zNzMW
zjCve6u@~5mITCU6G2o~4kb?98G5{&EYqDEa-C8rws<-8&>N?PB7u6bQv3yr8U#U8{
zB`2yc&-c{}$P<wmuDBcb+yyFZ*D;RBTlF*ZfX>Jc-li&)(wd_DZ*sphzkSHQy2-x!
zBm3%l<2L=9?%uQM(WmJl`ZP`D&1Bo2S)rYJT9*+SjKc=!s*FA>?<E+;j1Ei!E7~;g
zGOoEwn_SN&(W@!dG&A`xIa%@M?9SxzTABgdWiCs1>Z~LwiTSmI^K@9cOIM}GBjREC
zE7kN=^7*mmxV+9~NjgVEq#-{#Ce3<Im3f0y-MT*$PbEpLxeU(-h>UatfBgY}T{q@?
zsyH4<e=*;qyCS%9Z=^>`t<3*;DD~E;VBXF}=D@(I=&s#~%=T{@&A*xhkvaT7ZhiO<
z&Sl6P{`IX7e_AJ@Dk}aSDtaGI^aw}s?A7#(K9_6xC9{k}xQ>%|WF{j6OX^qYi!n<z
zKDE`iRQK52^y+(m_ndb1grG)|kYt%D+G^x*%Dg@A2V4TUEiJuk54d+V{cR}Mr_zbf
z`;v9WV|x_iupstff+kZ=t>fzulq%OT@)BWpGUgCiNG4SZBN#yxhqNDcm1P-~If9_Q
zx$2NxF)fNM(~#IQ?T1x#f+Fysp~8~2^WBehUILHF^V7^2A`~E2Qr5UYt`FnfwsNbG
zo=OYg>Q6*rGT`g80bF<%%wp4zSZh3UM65MoRS@g38GlmoQGVF}^>B}+R{<pbgOC#t
zDistZ-)CD+OB*egapdiA)%kT+(6{L)9ad<iIdN!zmRT{Tw-S2frFj`$z`)6^ECfN-
z)1bu|T5Yr#D6NZ@N~BVg<9Qgq4Rp3v@NoD{_#W4yUytb#bCSdL+ym3&3}=B=;C<XW
z^%)-*iH}^*BiHlD^*nMtk6h0q*W=@Qyzx#KEH8L;(Fgn(kx12fgPGkR96I^H|07J|
z?FA7U^&(2raYx)5=z!}k9%v%>k4*uSVZ+J-u?Xl=%Gu=&(cP*YE9k?fQB4iwBIu{#
zc8%__0d{b~YiY9@)$MIJ2JK!j4uCEOZ9^Y!FraD{EK(&Mx(q50(B<(ZUsAT_l<yjp
zYyNG(1|oYrGO;5FD9A*Yh^>R$EF#!1FRglM*fzwC4TE_Syr(V<ff>UTjOT%R$M^b|
zH)G*6DtJxsHAf>#8jEWt!s*mRRl|(e_Y{0A#^EJgI}%oJfPBa|+2p2u)y6!N5p|cp
zB38N=cPGj{yzMY@6=)_q^}Y`3km@=i5T^&AhHy9;aJtp2fo(A`JU4tMXVsY|SWAqj
zKmTN3IRVnxqNPlOc#G^45Rag~iiB<~Q_hWXvL3X}L?WLSium4q{PgJBM0Gr6*hwcB
zlfiptiq^hcQnEl{dL}~~YW5z;`nsa<W=mPn2PH@oJLFG6YF*ISg8K>`;x0RZohKUd
z6OGjdhC6wOxQ9+K?ub+~oiXPLOp4=mIMfU$zpoDho1OTyVQ;9pJ7`e{HhmkY)uH9m
zhGxbPo^8`(=vW_(Q(~y712r(@$;6;;GuBPhV8uCq%*yaU^FIi;|60i(%JV<0ez*Lc
zlmB7)`FG2Y`5(T-{15-Zehk7s`*9NTZTdWz^w{Plf6=~FFa9eUa#pK-k!<HCkXZiD
z8VMvaAXb(k2gMrS!6)xR_EQ+0^A2rZ!BhJRpZ+2lC=PedPWFG5sfFD96Yf_#NfV{C
zl3y_kkw{L)m<`A93(cw0__fJ?-6#XR!B37px&coCU#3N>oI*pMl|~w_j&SqsOyte-
z4oWe?jfj2ppT1|5925K<wimKeG~HN)xbYogS3BUiMOiC5By-^z%qbwc&`uYSj_u&!
zcy|YLj~(nE?w=X%wm^={Eldx}Nr`-p0dXY|7W`fv&~>=C&i{!o8fQ(mxm5_J#VFO8
z{IK_`b1cj5pLWiUk2^2-e?&=AOv@)nnnc;?l(=_sqi|m=5Y)mQCp+-=ctjd@5pACZ
zlX3hkFeD}=XiA4;C@}~yU%3`9*B3-ogq0Uu6y<^-=kO-O9^c#m<+!ZlF{cVZK=y*Z
ztmH^<%u50E%Qpzq6Vej=Kp^u~SbP-^$5^WA1iH5WtYhLezKhsG4}ji0Yav;RIP^?$
zh$G0vOw^6UcSdOZA1M7#RAu|rK}M|xsYA&5xq!H34;n(RPyurF6**-eU>U<o5o<|g
zn0vK<y7Tg2uk(g)HTypvahl#aIobJ5tdVABTRPnNdtTuNv^ogVcd#w>)tdbA=nY?T
zXPtw+ozt_%^VWAwM(h5^-Cg#){T;=wh*@xXiB6iR@n&G6iaA9Lgz|&zctW*{3ji^L
zl*FlY%+7-py+TPh6LiPMGQk}La!Sw^axQD*T?)9GABuYg=M%VyJc^U?x;%0&yWRHX
zaDqn;|BUC)QhF6%clcAgdl`Kn^|roU`_JWNg7p)bDWUP0AJ5UyRt-sQ=XrPqH%<fN
zx^i-fIun+L;Q-BRJ#qLZQTH7S(YIEpo{ZZaqy@mmp53Hhj}w_`nkHlC;Fq1>PF2_p
z`YhZ={nXY017wiE&e7qxc>Y~O*1}ST>n1*LU;@-BXokIA$at_CR|Vee568UWM!l|5
z7n^L6>4q>K{<3J!x&ibAX+U4+h*F9ufkzLh&eqwJOuZ+9POxL`@#MorFo^m$NH4wk
z!?PZrxc(AaoI4$~a;gX}xb5v=bM33~pzi_Bp7KBJWN+sc-g02S?7d{KIUm5j`syqE
z_N1A|K+F^CVyeG%G!id0?NR4DX80y7YIzSw_8+8)kc*Q~73U*5FOT0Gz1lhXt??ht
zY0fC7RO027@SQM2+}~-&ra#W^a;8>Z6i?O4i|OZzRh+DqZy#r?+H9?@7SGn&>aDM=
zwUx)osy10Ko)=Hni|4n$v|g-znX~1`xFw0@P1$nZd>J)ODT?l`WK$yBlk)LXX`wne
z`}nC`X`hk7g=3m_9}^njLjOSeNS=vMUIgy}OR^a)$miIXW{Y1$AHv?V6jw#>hW9$^
z;u8cL8o)Q@z$o1Icmvm5eh5;OdqKY8KV?8Z+pq3Wb!4enaYMg4kTOdAgp^a_r(Do1
zizl#>FR{@i3E>W&8i~Zft+cuDQFvyk2s#$EB4}NI&;dkmI5{zM5`T*pfH<v7KowMQ
z!l?)bzlFnV3a5^I-x51RxNO@>v{yqBK&lWL?o>b;O|d3@=)u=kKMjkrc(ur4h!EG*
z-N5HIJ={7@vwd7t4yRiM-6ba;COXFj>jsJ~Uv05PF&-KK?}IrW=hhZp+S*`+9klSL
z2iMYGSE&)lBAo(bA3qgeREOrawxFS>(1M7Ak#23_G86$X;1QOhx|l}Izd3qy$`8w*
zcTV;}QG~EciU91AB3<239@;3F-gPi)c&Y6p-&K3->sWxMre)FHMlN9CKB2MSfTlP^
zF{g->jY_pKLWolvaJW%8DD-Jd-Ur~+b`qj8+_*t30|R|5gpt&*C(@|0r{w(P2J$Do
zwGAzf4~VXwTk#Ve55q~nUwR_cKL-S7A}WeXszN3Wa`~2@H_|!o)A@~*?X4B7mTY;(
zWQh%{`d(FsC_SsSpvvPq^sd9@T8nSlSaINfM<F9Q6-u=^7_s$I<ox7@h%*cEZ-yZ_
z^ngVOlR~F)|2_XOIFBwT(KzC~Xa2%1U!t#wtu=n=WoqBSu<Ox(x`B_Z&;n_0A(i)y
zTkcY=txWgD-OTEDiBo|kO^5&(Xk<4S^;GcGRXFvM0Z_@Jxt@}7XJTy+I{LPXd?RnF
z)Y|FS1*pa{IQ}9G{_{=_fH^Vht(qg4jdNG6$+yVYsfOiS!L!ccn3CNoWzEHxk4nv7
z$f`3j4&$??DEf?CvHq+tLP_>?MutD1*H=VoBew*p#k{e=Um1UFe#eek^K5^I{<N$a
z&p)M%16>_%id9UC&UE82&(`ctf1c+{TADPlW11AOHCFxTpL-K`%&pfrH7L3g>Zq=x
z-uSB0DN_AJbPg0Rhd-C&WZJ{yGd(fx_L#VZ1L2O&gWKBh-MxO=-F*%_dxf2wgsEwF
zZ(r?M1Tr5sGaKtHn{^FvpT)*}g`UlIHSv{p=6nfi=AC(Jw*E8a<?n!jzg<>+{u^^$
zPJFt3Im?63&UM$^m#5^Yy*um44BN8BSXbGFv+S*hpUvBID^<StJy%#Z#nvl!-S+g#
zu&j0@clYF-yk!*#;$XS9E@A#V=q$adYf~%Sw0jXu0hr4~Z-?P^2VCm;+%-B`zx1uG
z<!!Qq>3(~}cTZT!+q30uZ^^**DdWGJH8#eQ?R~CTGa1pR*6t@8^NSP4$LxtV9ilBW
zv*?it(Bx#&L6E?O($S4|!nUkq?3>)dwU|5tD$mL-oxdg!HXe*<I8A*51Au4Sct-Kh
z%_?WFzJBx5&S~f5=beK$d!3WLANEf6j&}D>3tS;AGPK604ppjnbiDWL?%wM&mmu*#
zg)0Fz<CCGKY&oQ=3|VE%8MG;}Xg0-32aGGDakAlj6|%&#`(B)P%NwPRB6;Lc8Bwys
z6t_t+AF@X#qrz-X#V$d+sVzg=C5XsvZl;K0$kW=t3jWtM(6Zufrx*6aaag+Z$bs4`
zeS>}OD%SZ6>eV^I3TLjQLA6|pClhn^#@$M1iJVdDgfp)Tj;6=d@Yqaexf4n<-H@XT
zJgi|m_8u>KSH4XG*Nt4z?F}>SO~qNM%n#|-#FdmxL(96z6fDuFIZbONd14Al!ZgMk
zsv0Rspt?MxrQy>eGPa63kV92gLA18#V*S=KM~Ma}M+TXP#NjaZ^YpPPZfOo~dAA}w
z+>^NAxc(U;Lj|Y42PakFSn|b*Tap+6(o<}DT7iN3{TGlC;I9WRC(N7w{|x#3&xngR
zSI*Q-{4Hv1XY*~W!=TqRrI}~kJ#y>z{MBdRd|6!zdG$(cy49=O<<h+f_|lm4ht7`+
z{@d|qnT;Q>?&#t%2KaqlN^AmiumIcvKVHuZ<=*&lt<1kS_5Icst8x*i!WEi_q2{N-
zRM79wMTJkT%|nAPR^uaE{;<{ev#8}iclG;&W6NR5JT3Jz`%E3&>RdURN^gEYDs%g=
z@u*hk#)RjOHLEMP_VL?d!Yy+Bt7yvaiwUnDJDFm1&Jr@Ya{S*YxP%F;8~e9B{_pDQ
z^XF@J{NI)DUaWlk82|T6jQ{)H%;9^N{}aOZF4v1A{KdxbeYL;y<1y@YZunl~LpJCw
z6R}=|ye-yt<)|)uDor>Abm<8IOB|ENZ_WwICyLAB1b%r-$vG$r3r0#r^c&gaVwxCU
znUbqr({0@G|IgmLwzq90iNfz^{R$*`R{>>;7G=jybjw3avct2wJrb3%XFQ7sBta1|
zNiYCuNpZZt{nVwexR9Vk*&ZKfcPt5LR99D5S65fp#eavIqK`B87?@?mohdv_=r}mu
zQ!|B0@N4@+tOAEM>?ltvvc?A4Jb+V&fiG&CV1N396{~6#{@1`{Ga=RC;jF6sDI#kS
z1eew*xbkz`wP%2rEJKw^roe~7C?a%-$QY)me3t}NbUvPrFu}i7<U|^7LgrSHM_1<l
zO#Yk#wQG5adg1^%fXb0UCxOKe86s6NSC2os%^^6!B_86=qzkFjB=RFDu|-O@U~Un$
z)dUm(WI&t0*h+Dla$kw41$;k@5?OzSGyY=L1X_)vE2=|`$%8`b9ZLr0B@9Y;UY*r5
zXBWy(GMzP!>9xS7F~z+q_mr#$A$r*{zR-=Gy%jh-il(~7UVbT)IOur^D1>OOGFyuh
zCHlz;RnFPYhk?gV(6M_#9E>`_PWmee%_{{?H`>|~r=I`DG_c0++o~`H=~-wdw(DVt
zLN<yjl!((>Er0)wM0Tyt(q+lbuQ1WF_;sZy*|JC%e0J1mo_L2Rm&e{Q=$mF2SxncR
z>_?5)?z%#~>TFc1g&Cgn|1-Np^}SS=j@4yFbejEq*QxWXWB(?P@@cX{vze5gOIV_<
zmgVW0{8gT@n*TgRf_rCVnDJgVUmqT%G07mjC}|fVJ@l6$UuD)H=QB_)%^!q^N{nSO
zPRfN~FjMmGh&IC)A?7^~6K_C&+ER(P%U>Pcbj#Ub2wZv!R{nI9aOp7M#7aiy=o(e7
zEJBB1{!U@=ceaGps`J0b7gb$VG1byZ`q6aIrEL|q(mw1z1Fm$YH_K$jCa_W22P1oC
zRq3(l0Xt+>TzC`|PueZ&C1(KwF>@O~5|dO|e`uGI&~tI1+!9mi&CN45tc)hAtIujf
zLKhWp59sG%67wA=Zxb|uiajS=mR1xrGUSPY43{aJS%5Z$h{Qwgak_}DS<~X|^2Jee
zpRE%w8~cZ@{H(H4CJUaZVkSQ_B*LoDnxyf__d+o!WA8&-GB<NYB~!F(QA1aJKzB|n
zrXA(C!}DefGk-Q;og5#Yw7k=^!}CV#^!%b6^69FX$*jtd0-G8wKew{80Xzxw6x>X<
zwua#-+~HYu<v41b|DdY=<_;dE;8f<xysg>2Fw|<Q*Y$KSR_)Gm#WItu?Wn%J0Em)R
zcNOM#?a@cj*@8HitVX+G!JV;;48!0~(JblKvvz~Rjg71A6}~nLIOeyJYnt2q0}k+1
zvb@<li6*>LY?q*4@z-lk+8DTMa(K||od-R<Xg%%q;3OwX*UCRC#WsAid#g^RlW(?M
zyxX$FUUd{eB?)qk+d0K&yd%12Go)RmfV)kkA6baw0~7i3Y;V$S)+kCWzK1|>pJ8H0
zWImxj?vC9uQ7fQX*nN;G2e3Bc_}~Gi9MNB7u9t|{thrtUt>x1yN3aWPfIk@Q<R0lQ
zlfWwH3>ncgZCX?kYSx$*-N9JjDX)YW@W*j90S%uk$T<0<F0!MHqLD{kdbF3=*z+hg
z7%DZ~22DTgdBU5DaM}^4DMP^(*lsg^;d@ZGw`8&RBpL>eI5flwsED^t&kqM^+Y|MI
z0=^y|`ZrE6K|R~)fZ~c4GV$3uS;z=Ke>6#Iwb~3i=Us9NeYBk@ob#^BEQ;@;+Lw9;
zDq33Kn!W86<6>}r8ALXg*tN07?nVQZ@DgSh*yrjhi0SIVn5!U;qZsdHarW^#VZtdZ
zvEG@eL(JT3)|ftu{O)no4fgWwZ(LK@5IqdXH8+>-<wC96uvN8jSz~;@%~?Mr1NYkw
z`+gWn?fF4T)A!+x4?_#K@Oq>(n05oh<m)MGeP=wT)^J1~Pbbj$eGs>2eE<rzWnZ>W
z!TD+FRW)BB*izKB%2#h~Nm!cDE4l5oHJeV$f-kdQyQ%8iI4v#1gwBi&<CZg{0oRr>
z9#WqdACwUI#?T4?<&AGP+r9O*L|&it@sb1YJM4RoC9`9Tt|`SuEZuVjSS!s$MPJd#
zt0#>N$Oo(G9$9YI=#qr)pA5XwU+6kaf|~sa)wIHbRe&c*D5@?O72J=NUqR>vy6`HZ
zkJ2lt=%(i3R+wpzF3p&RR+78$@_}k5t%`3KT#GDTm%kwIO@le>bYXIQTe_cY>B>Rf
zZ4FyrKgeRXloO(aSCYlmbjhAl{I?^ZZEY>OziKSYK)&<di$mU<zX30SLe^@%7n!n>
zK%r2+lQhN3d)685GymV${NJMfe>`i8c>&J$|9i6W)ALQ+|8KLtvGMeq|KHc>|M$yc
zetS!BApGiG|CAGOz5}3oo$Cf@gFI`TW9Zb2H{QYN>o@RZ)^=g<z9`7`oh@}>$@Waa
znHl#&(#I*IWB*RJH8K}u9~Io%W*cs8v3XZmc^CNZJn#qNTm9?y!o!*2(R3$3tk;=;
zY@7Go!or#7r;|&Zw4;u^J<IdlSO`FYvrrz4Jcl6zOj%TBfoUe(GULS{8%Sr8ljw~6
z+CZrnG(L?%v3L=(E$!pBebLEV<c_#`%#yvDC8Ve!nd6z1JJ>yrxzgq|n{!ex`R|-&
z)|M{uEjOWMX}#y1VQ%A7lCs;SuM`A_uR2P9OIu9WM9t5RpG<bFkJk1mLa^M$Xii7@
zUE~0t7ZPXsHAlvi>|j*q66ex%%1pzmlUAW~x~b_l<)}2fo4Fm_#Y4n^sx62VEmaQZ
zSzZ>o5N!)~>b#?NvEqKYkFz-HPCJTUs@CV#Bmb{k(SkwAXvsAZ0BanI?2esP;k8qI
zb(QYgM!o8oof{?-aXDz7lybZ`Bn$i__Q!y;SaSPlUbq~A8hAS1F=B{Gl)zCQ&!T}B
zjfHU8s}}O@p0fv2M(y=sZoBL{$b2c9(9>DheVGqkgEu-jx~2qT1vW3bKy9+|hlj7v
z&b=4Slg9a*(=+d4zj4$!_s$Pnm**#5>&=<=57Sht3L!n_X{<Q;TqJL=c^+mA|H08S
z$my&}eCCG~_u>#w>7_|2uv6F)?k28<o7-ZfO!F@;)Z94r67*dP&MxDHK@=3&tI4gX
zjOvL9b<PX=(KV1uh5noyfgjq`Xyyz74ONv*OGWy4LU%U9e8?#w?1Tu?Y@1OfxGFUK
zGO{oI*z*xipxS}l2ras~Gn^Wq8nuZQ{)ooxtw!cn5hj$FF*<OL%yd|TlcWkfp`eqh
za|@@xZbawAQNXrhsz=zv*pu3fJ3+xoi$#@qQWupKEmArr*FVqE-U@<FgvKbrW+CRQ
zg4v*y86dJxHgdrh_GN~;%<ZjS1iijjaMDa~j2C?$y#x#6Q{-X7^e&%yu#k?_%bd=F
zrLW5;8glLavz<Qi&$Bg@JpEc|-OV^)OL2y@>gaeR-O?*#DwvPseNX%a@bV7+L}?-K
zEw3d$nPiDIRJSnfM`Sw6<AyQsO8-I5Dh5*0>BYLvb5x|6wB6yJ1CsPTfj7~#wwQ7l
zmp$;<SSmCRL;$Ig1<DawprD6vqRY-MgpE2relYslmwgm&$XB4(kBl8}cArJ@2BLH3
zH~=QnN=c&76BDn^19SMbyA0IUz;3E3-^|+-3OChT8-*tik3#yvUs;^0f2AjEe4C3`
zR?wfKPFLdSISY47q+0;(qDT_sj2@|wFC)HLUZe*ID&u>;&F-p|VYdN|KN>pyu#0is
z6pD*zD3Wxowt`+|MlD5q>5<yflLvGajDTdFEWTNff?Sz%{|TP6C^NQ$vhyqsEottQ
zhoB{}W@aMN8NcSTXFAo7r0{O$$TKV1(?T39FV%fnBwRVhP7Lpjt{x1=t0!0uX&fbV
zjW&q{E^9f=uRhCgeI95D=<yP$Y%ho9P&RMHa1JLVgV=rW&vF7dKQ9uOz*4R$;yot_
zGW3Is0Jp(GYWV>ce+6;0K>tZF`6qngrn*&+%|7K=@kp2~x&J}}$Oicople2<0<aen
zC57k$HsRz}z7g!)nZo%4_;bN20t}i0U=Ep1M?vD=6R|sSj)~SR>Jfs9ddk7L@UVTa
zyeI{UPrMvuJe>h$jyju{n<LEecFW*Wb=sheqWuB&W|Ti@w~4Mt^LngOcOgiX!0$TO
zNFmC_f<1=G1b-J0GzKNpdHBhJ_L7oLZNrr9BtKyXWuYy~+L3qaO*@Yv2GyC^EMRWQ
zF>e=GQlw$f%e9DGHVm)yIrnZ?;N`xWb?z)>63KLvqKH9&t*zJNn2yZiXNi;_3*1NF
z^E!Jem)T3X%9f@j$DKt>nejjwpeki8P-R)XNO;aRFU?*8Rl^uEiZD5Fcr}8P?8K8g
z&b`b`-W`lbDa0+aQCbn&pM7M=KQ6$|h;p@dw}ZMs%gwW4_-YSKod(Po=5fF<xUeRf
zF}u(X019h~fz27v{mn!(qV)1s6i=LG|H?)hIpZXlcBAz<`zf1R*b&@MN^je5aR>8y
zQS9*-Njax$y*HKiv^GAO9d#u5r&_RWn*P^eYKtrtPB7~>Pqoc;4mNk>tb6cmh%@7Q
zV1)+gS+D%~0YV(zgfO*O-ZuDi>QmfgR*SOpvmp<JBU%=~ES-?Jh1J;hEjr=j(~^Gm
z;o?&q7Ef_jnVB(B?I;@L`*|A^&g9izaH}*pb6&!pE8H)>+ZC=I6**Zwf50hH43D~@
zf756ur=tUX4mCY+bjv+=y-r6`ST{n+|AX?t!jB!{^0Dnadh`blRj3NO8$Z>DO#T7S
zU5=3cqxy%uK7}#Z?e{WhR`L+HK0;6*OQLJG3b|DXt^U9UVI~#ucV>2s<7uaVF@)1+
z5V!jNNDL;dI)7lQmlTdwp04bKl!yEW2!O>Pe>9x=V@#a!u|TKR+_~on#aUb_d+$IG
z^d^y)pmMomshWGRFBIsY`0qA(>Irc2{U$kP!JfCB*J(qk8>oKna1w5WRh`?h08DDD
zBT8!=r1lIDDcnk{H5v(H?@A`10CV0X_3$F@WFNkqj&K)Irwxt&jtRv4R(w)TA<kzH
zk;t2EGSiW(fjFz(WTLF!3ZAo?#GPs;q@BlyjzBqsl4p@>&e-I;J!kdtS`ScNd#o3=
zxQj?8=bX72x)#}bw(a-(TwVp=i{&O4w+nB@H{b0FNEOTrNH4fCy|79%&_`^765mv1
zHv;sEY^F^{>XD(%Fiy6e4HwBWN#E^yODBJN9FH;sS@tlJY5U=av7R>!m61CusqPX`
zbpMpypEdqRI1nxr05m)PXQTeSo{9gd|Fron{^x6q|M~e_{LixEf6mTN4=(pxg#keN
zeQ_dibrLe~Fol*PybW=TD%QDuo7?oQZ-)<;3pdRi(({ju8UN-~bKAo?{|K3O@65GG
zX`gG;ERs2JE{2ccajl`&?j4`1C-;Hne>AyE>4!4)AD+R2PSt;S2G739|F2R0KSS~T
zbJd{|4&Q{hcW{~`xK9g(b^d7?FzW{cv_+$GOMYcICEaL(E_y98M@8*HFywnPyejKq
zErs!PBK~BfRXgeu73jk%J{mdGaWtazQFyZlFBBPuVFFUW(?{=R5a3Bw<RIXLV@@!{
z3q{IDl{kJIUEeVM7<oEGbbsiNCLuBsg;@wO>AX06d3t_`oeZLgt2|7EdmkMnxH=#v
zPsV{w6LlTvV@j;fJ90S)Fz50FeYOQU0*``nl<N1(Dt3&$odiL*6}4i2l#ok@%2W!3
zj@iS*e}}izc;kZXO~!pbZrA!clPH<e(!h3|*49>T^kF6i>FTO;xj<W&GTygxa?De*
z^*Xk}7MD3G;kiXow%orRo$C5;b^Qyx0%v6)`FIeZhs>+}edlTIX>9`tGabW`@7j+?
zIEmjU)2pihHe8S|!3ZdvpvOcUO<}KFUH!gRoTC?dMF*OgmN#?;3p0se9>fAg?|I-r
ze{?&HIL%7-KS`8=Asa*~AR8R~l}QDbfH#VO?K|(R>sb-=1dQ^BCp0GOq$;lFT6@`p
zs(5v|r_ZShOzxGbdW$&+lj%UD6+90TeVv<k$0F8{(5h5(I+$mC$Xa`=jxP`5#`8;f
z-quDKp$z#owK23q3<yJCh#iIKp4+a}lIc*II4pBp{#wkVI97wbtdj9??;G_k>PzVY
z5`W>D$Wc4BiG$7uy^)=SCz|lpf^}|y<!GE0_@F>r1-p}#xb%7pOFF<EnIygl7DD2p
zb4!GmB{yiVZwGNAwkO_<<*sL<16>Z$)p|sbT@W~bPD9pGDkiQ(<UgiYK(KY8#E9*%
z7slxLF4A$5H;UDO04D+N4XTyxdBVw`x+kEh;D$P)pLaMI+1{2}=F&DVqS=d(Lo!>;
z+NClHq*g_ysfB`CDwq;sPMR;!B})3I>JwI8R^Z<cses?<$<Z5Ed|D~d)a<IL{YB&W
zP=16JL?ryG{V^7R{mx}ot?SA{rZ%h!?k1Tg2Zt{%Uy=D(N|Q{mY+7`sC&WZWODg1i
zEpPUJJKX<o5mD&vpB|q9VlSFU%{MN#tCj%?QbE;4U6Hk!jgrhR?TEaf`R@uvjR-+x
zlt?h7b{;vgZUqQFk`kh5tGJncysrLpLR|BBf5FibAXbhxX&KRKj01L-mFquo^ryCA
zqe=>~5I3x;inZyy1+{n}fEp%_Dpxy?VzzB`0+uQ-L6Htd;WVKSxIR~N1bQBbIdl@n
zlI9%v0Vjt<N)nK_s^}Qkxk4X;!N5!`2ZM~mAYqU%ZES^#izUD4GCGlGos`T~_8A$8
z84P;1L=vj~NJyl?_@tan%1djjTAfUX3yYdP&Zd%o&uq!ftmQ>s*<wXkmsv<eYd6}-
zrYrWh>_vYL7krZ0=P8e)XrfEcJrG0W^tCXLNT#StZn36gE~}(7#x0#YMswIA`4fee
ziF4V6kbh322|DPcZJp9LR?RxoN}g_n!V-?~KrLK<C<q{O61+Bt6G$znRYl=(Mt^sg
z%P|@5BQq2l+d@GdmC~nRHb;i?fpKR+`6*20ch8*+BfrZ^;cQXAU2D!}1U+&lcrSSG
z57gGNsp`jRcRTT`Gji?Z6^66NU+Cabvv>hpfVz8f+B)2F{2p2ffNL-|?mcMC!Kl+8
z`Y~D_#5yyIl)!oj_}B^ik`K#i9K<~w-l!8)(NKY>U|=4cw+^z=&Ck!A{j*DUhWTqx
zBj#?1%Vf#A&SRT>{~|n9$z$0}@$reIYGSyFgY*GDbGqUCunR9h6czJsG0)tatC^9_
zyr_eqH*tWc`~K)E?VX+B&u)1JrEBHcTqhE4rg-8D<_MjM=<#lrCzdA0u%ap+YLJ9D
z+tU$r+#5vKwDU#p`HwOTLw4C=FzH8Kjy>=P(bZHrh+6BPn{n_#h~A{ID+v~k!{i+x
zOd#`-0hEES@Ekau$cU*gR3hWqZ-VDRxs&R@1XtdT;g=$jQ@GqqE<d@OT)rl`ELAqp
z*oNo`k6vYY65Jg<zOH1YcnO*6rWVlJ^<CtuxmF5zbBXKZKVg|HCgNu<h3*}dlVaJQ
zS!?MX0J*O;?<pQ$hzDm$@wMwR2VIVUM#9*~+BpuBX?k%wkg>9+vU0EL)a0-E9F={J
zqHd?dEMxvuRL-@3YU9vQTRR!-<wX<DpckzJ@3_(W%^gSA*0P7c-JvO3{5bY+;U~Tr
zi?d<XK)|h69*<|*r4{St(@=W=)b|YY)vV#)gVN>6C1WH^V$8L{mmcfpmpRsvTAKGB
zVRO{OY#q0I|51wsB$Br2r31!5c$0VQtQN8k>?(j2Ha?X3sUmEa%-b&HH(R!ry>2k=
z_;1^0=*&8TcHZ{>f9Gv582rzee$85M_*NT!F(Hky#zTP>hTegMz*1S_k>5_D!E^#U
z4F=35wq_`Ur_H!60|Rm^z<>`KsVWl*aP4AvLwEXHTaw4BGj1}}Xy1mT-sBelwvoZ{
zt1$}`(oHzjqe~5CF7|5oZjLydUD&pI)|L+|ixCgl&c$P++7gVur3cW}&P&6hmL9S?
zO0nv~vCuod(~pDR?)T_ak!(GF9E@t$;kyvlV#ux)_}Al8;n(FMDZby6ZeHv7=gwn)
z51j=BN2CVftAJ=42Og0uvY<7MxDyUpsMPxSN&P@*OEpj!N1&{M>_`$$>xTXyaOevM
zx7I-F>&k2m^T#s@zwlET_%6^EyZ$sm9qlcMf_F}9iecJVsR-?OXzTtxOhVKzU&N8$
z#RwT_tly$YBWE6Dx>!91J{olqVBQ(L{Kb?|Ddt65F3}RW@&oDYx%!?KMe^<<x0y6+
zW(|iLiNd{LY~ELtx~K?~(-^dP_K$oFe_p~m_v72bJBoQ>Xpsg7M+Oifq#-_N2$~+j
z0zqiTM<NXEUbmgIqS16$E9EX3<&FBRxV1$slKe7t)|~^y#7#pY6#4KhvlCfBQ+942
zFFP1&4K4M?=aq`Hvr8l63^e#dw6yP5*33@R$D0M?Bth^^1&`k&JQDXD+MLj#=$hQE
zM6D)OrrEU62=4&A3ev6tehLwh6T?_RQt0DZxx+o+bfxzzPOAZ*wgXBmLA4a|P7^<*
z1DfyjKwBD+B}kn3@!n{tJLI#i(}q1kT;_=U^B_A>w7_mOtSRx}R-81s>2QykE<(9&
z52izS@9GLp1Kx)z%(8;03<(KGcs~f8B^R4AazNm=fDn*hl#u|lR=A|f(w1#qE;hXP
zN-f&l&`TYl8CpuV6(QN6h2YX%8hqe}DN{kUFqE}kkpYvoer<$J=EjYNM#$*=h)okQ
z6cy_4KPoBQ3h~NfmKV;sfCu|&>uWEr^cB^AQUzV^5Ct&X|G&P8?`;47=g&8veDnYR
zI_p1`cly6~^{B5lYJmQI(YQFw#15EmvJnJUmNT`aX-lVn_M_ntbh0+8Ufc+$mMj4H
z7o5*qK(|hxZ4G+McKSZ+J1XyBt0GELnn^SD2k_o9GO)4xI*PmGdZTdzBGD`f1x`#E
z_AazCn1e9DQUR2oL-(M;zX%e#4%hG2#kMa;B`gu-P9wCLL|JTv1}O<Z!-vt@p>Jf)
z2qwnK>Lh|KHDFXHR&q9I{yxM!37oHtG&`wV0_XU{c6c?#6^0i&132899&Yxz{cU^T
zkKUnz-g8<&h|WHC@>>YA5O?~w^E5A+=0J182jth8mw;ouT@>}}$?@4N1BrT-Rq%(L
z2#M!7Ltd*dVD*qIWfHg$G=mOV*l1wg0M^9-cnx)JyiRi1s0WJjTgmO>5CK{je3)R-
z*4tql)JSL}?W!*umq#u5c>;@}d2(?0n#wBFbT6phkpJLCI8tlmN$p9MtU%Mmzao*S
z8`hl5B+!;SE?Y~H5DF?xikTLzYRt`<f$l*FASXb!M11+!urOO4=yF~v{vssT4vu{I
zb!ADV$5!TN*p{nCtU|&$&xn1WqDp1cPL;)Fx%H$9I!PCkqVEZmm+FwR4$LNn{&+{N
zONoOZ(F`0?<7HrFk(f2^j9s1&+qiu3&{^Y5Qgl|zsd-jBL1hX+sj^ZcyGNE!Y=p!C
znQ`K8a*D2%L#d#GltM`!59gv}q|iiv0@y{+Y(dDaF%VOTNUSAAbE=Az!(TXUr|=hq
zUUM}15`T58d=QO(oZ!v`(uQ3Ty1e$Z*<lOhE$UQVfW0fy0+WfEp0ZHO*?khKel-Ji
zy4%NNdn<CW`6~8ca<8VpXoc^WF^We^ABNxU7K~y=#J8C>oMnz^+n!vNO)bTRRG>Ie
zO-Z2`;f+Rw4#ne1I7I&nOpsbjWy1q(EJq>Cxa<m^W0nwidV;+0T4%69BEbVT0o<41
z>q{p%DE40)!Pl2Z@R~m!4{lFKpoyL1(MsNE+@1V3w<C0<uhGL7F%>ML@ZUAz&ZMg@
zE}5=JLGNM7N*#PbR87$nC=Aa9RYgY4rTw=8=f%ujn>eG<xs`qAl9eU)`B<Er`3$p<
zCr+lVtPEke;L3YBK9Q!b)Ai8?*+_phoF){bfqvUSZCBL+sOVi#YOv;nuj$)!Rw?KX
zoo@U)6HHq9N7SYZm2nIYLi9-%C*1+2O!G%{@{Bp-Y&c;{fb<eh9e`y4o4)^E1o01}
zE*31)=W0F9f>VSh$Gxru6)`APT|tT$S^GejqPBoItD*=3E*i{lVB{7ya<w=!?%@mp
z@^R(C$Mfi622?G~6;~<(@pDpS{oyHMr=7buMatifn0CA`!T$d;iaSAL0E7s-jSd<y
z%3lP`vH#aMpVrg)-+p@X(>MG7S84xWr_4X!%=oy-U!I=tA2yDfuTBmR8vFZ)7Z(|O
zer}UFhW+eXEnmT8)WeTq8tH3bht9iT;-D2;RJg$^d+2U)<;PugdPBlp3(vzCG-JAQ
zPggq8g;8>goCqTxK|(F&Iv}?@M1?W{*~s{6NI8{H=9C!eWk|Ddsga^8BW1cNUqi&E
zSh{>jc7}ISA2@ru?1K0x4e~=#afdXh8C<3MFarN^fq4MJLv?(|DT6Ct9axnd=2gZ_
zb(8Hy-bdh!*oDOLLg4K3#ZhzLYo4?Y&tEq758W()N<rQFOyz~_r&2Gm#<}Xq67E=f
zL2Li6d@Ktu6dna<8A(lumYsAiynG9%1<y2c&V>hugdo0vA$5Z}V6B!Vp@*<Tl!I=y
zRP{Lv!HbIaXs6XVP1tVhD#BMa&1RHUPtgt`(6{)NRgkU_8R_KIVRQ;GDA9NJBBG{X
zE{M~70)_bRVszn8?nM*^aZg(0;9+??AHfa?TV}AAEo^&6+meXW;63VI45KtGhP5Lk
z%U;2J%}0|gNOZnGE5@#xGn!TBCS4?O9zhlIMt|86RGp>MdLiB`B=Mctpn$$1UjTms
zvWlMOt|Jv+kh?w27N`03bqe-5^E>`3d=J?3H`(nnTd*>xO(m~7PdfVD$ZSG7SXU{J
zzM%+%wwBe<nmshd&rqPt$7koLJhLm?<mrpqFohXlXO_2FI+dLJl4&Uf`YfcO@F5@0
zNt6;UWGC&~41(Q@T<R6MNOPyz1Ut3?kL5=+@+cgOU37Aq8c8g67;tq45v$T5C$ws*
zv6q}I<eDr$YulLN+(O5>q#sQOT{?tcNAWwleDTMeI~4uk;e3ucLFp9swa@AOCTfux
zdYHK>Z}ZHxb$oNt{v!OpUrzt}>-LTr;OE$XpVaI1wEy>J{n<DB@7J6E{a#MsKYz0x
zv$(wc=Re<M4aVYW$@K9nvvY^!UUgMDt$CPu2;~Z7{cc`pZ~9^k^O!gJ17**YkyCa)
zk{Ov1sAruFLERIogO-Wts&iXSr%X~<Wn#&c1H~N!K;`l4-l3njc0L6W;_0lLjdJo`
z=0tpoK+$113T2>Z=Ci650h*UZb8ae)0zN68WgS;-^Oa8Q{i#_XlS$;J#%5Vgczdh<
zx;r_Rx@=MbejZ<;d{N6t5i*xktsrKO<y-?YXWA}>{pL{X6SCtnm{EN~l?88V(pUIK
z*y$^aG3x_zrO0i)G_IV@exHI%kx3WIE;N@YJ4<G_nS=ja>^C#zk9~Z9+fjk<R-Tsp
zlFTX%Ms9WiI>UDZRO>Jydu|F08R#E4Jv`%3(iT4sIP*&v^|Ik~FbVO>&&P<J!EMcH
zT}LX|AG7nkr+-Cm7w1}K=s^&a4H}Nq9N^Usaz`SN*J%+D4ktGXIoIFY-oKo;zhfPg
z_n-+}i{IZ4f<KdikLg(6R^P%PUD1YqR7I<6`cbI?kbkRe(q<Z-K#V#}plb|y!eMjz
z8GQL$IA{Za^@~n_7<MO|Z^`xF`Kg511Y0C%ucRsbBIx+&Oe<7no2Vn<Ng(p8N?fX>
z7lWEC9C59LK?FOKkD&z5VE2Ck@2a#rGPSl$=&kZHsK9<~t`jp*1iUjHk^h74kWy(=
zVx$|tgSL=VM+9AWhQ|j?7<7%(hiGAaoV;Nmh$=fG&n(I;eL}7EuK%c*!D`xXVJ^&K
zc&f|$CwR&8^HbC};<#k1+~h8*U0U3)%uM=IQau$JfMz)k($<o-G8-aICgZeH^I~35
z3@c5T^cXZcN+tNo&7JZ9lC2@rfNHT8@_j%?j^&^Xt#4QaE+o(LWEquwx}hIRZY@i8
z)d&cYJ`VhZz2Vo49A-jBY2Rg0i$*zj3nM7>9B}EcqON$&_bf+fDRz2Jf~%n$4l&ns
zCb(CzKYFAVEX|??ahG*EV;!bDR|_pCwU?E)>)tD~r1Dg-HkZ31t(O0JtJ60>zFyKY
zHS~+6suzndh0nK36|Rnl+OF<&oLy-9THw+~*c-oHrF_-*Kd&&->2BrU=iLANRIfjO
zp7#IW_?G|Y+x<_`{SSr^)W2N<k@$uG9iBIiyrLrT+4p(D1HouIB$LmPA75dC+D+V?
zXp4bAo{$M=PTqO+j>EJMoNpeJE%0v#!FbYFnPy~xJKSW|ezlzfgSig)<758@|Hyof
zNew(q$#N9|2O{D;0yJX|#1O={WU&$%t^fWzw>~U2+uM~z^cwrW9lkto9-bT=y-{>)
z{dRtM@!RRqK@kuap;*bYCGxwa;8v;{EFVI4^=O=5d1lf*Z#i;^b!bvAuA)m#aXQf=
zw_RYmJnAQgDgC^k<W5_DGW0kYM(+a;de{LlswfEjzCEY;xzYTbn=5v7kch}t7-Y=H
z3y;Bz=T=G#rXV$k|I%L+G(W}!<pZ;WbJ;;AV+emi&8-gji-P6=h}M8384N2-ddJNZ
z;DN)Jhv(kQ=Ig@)H#58fa3%lC3?^IJnBgss+dUAovC5*v+{w9)IBWVKoKO)Hilzp|
zlg`i++HDfftJ0-XuPiW#P35sd$QR-!4qlRRN63opU=UqbxhpDp!rn}xCbh2>Zlk0*
z77=F|e%0wXPiu98`n0xLTQNLb4emwLxI|+&UwZA3&=eB{M4`1`70$JTxnGl@72INf
z-NlKW_c_geRkElAz|)|YZLZYbF$PyPajjZi?>UB;m%%+2k)*Vf?Xa%nq$=+p(k~b{
z*aY<+NYz!PMAbJX<d)qH+S4mn-)K12H@`Q|U8K9XaL>L(vKGE1niak^$Wz$Hpju%Y
zlbD5#h=$_6MdUzdXK6MtL>TYSp>!1Z58N&o1DC`*Oa>OIX+{-9@H~aVt-v81W2`U<
zF)yKh9GGS+RYO4>4pwATUdRc&G$?22kH?4y*~~uFtF$LoX>a@ZKTn+XJ?DeO0+}uO
zh^Kbtf7twSNLp1g6-l8x)0o781S?|o!Y*cm25qWlD;CGBRh;ZOGxd?3<LXJZzGsWB
z4^qWoo;=0SC2<-8hD4sUyV9S}>B^~N-Iy6&9r=IVQq&Dv61t*K3_{53_oQUM(CNuC
z`xR<lnSS9v{#SmJ>bHq;TLq*|O-kvm<Uk*&D`sQvihj?IsuU#`A0GGosroc}`^0^U
zfr^Q|E2rL}Uy@CqvR~2RCm%2K6ZigUd6@g8f3tnQ3Wo2OCJc=2<`ayL$MtEYg4yT8
zQq+`rpNgWU{3#yYa0#O`*1-e!Wc2Y##`NiW`W*Yjr<GqOMGN}OQgm+pvt{K;%Sn_*
z3Xk(U-vf#t0=tzZCQFkF`xa>Cw{6zdzo&S7NyzWvT!1&|;&w|Fcfr#*d#UiPdHtp?
z@5_Jvm8`IFw8iP@Mm~HBdVY`Y@~J*c%m(98vL!u<un+XP!hYL847~B>jCR2UtHi``
zG@^hoI1a<XeyHk!tqY$W04Pr+FlC7|9Xph19FBME;yx8mHrUrdK9C2YS`BkpPyEii
zf$&`d91$Ycwy9#86GpJ_zef~B!nN=(o-Q2Mi$T<Rx5F`97+VYS7Bm7hL2MZ@vmzcE
z9nc?`f9Tlr@ci?jV-QzaB4Nuzj>2&oolX>YMZP?;X!ZSZ@VGbduhg+w=GP0D4>$`v
z8ohvF`py%>>P2PvwpOU1Pu*EJzYduJhyEW}2L8oS<AU$pCn3f(qM7QLs`RpPe2>zh
zIX$dRj&0Wm;XA4;MBf8%W98t0JKD^2gseZKqm#KE0f3y2;P_9a2H3-f#!9{reE^DU
zlpO|OnNWI4XAnhWXdek{ALSBRPdKGH(2Vk{M<X&upb;bvdMGrm14^8Z7h72D(HR8q
zg8>TzDb@Ltm(v*WKuJ@TZX%il<Q>#cjN*q<NMI!d_Q06L{wNtooG1twqQnKtMc5TL
zA0jvVY1~CG7IHp#G4PXpMQY_>d<E*9$JuZUOWVvnrfdmek?&63$rx8MRYD0wt*#<K
zh=_FJEC{`@x4j@-dst~`G6NrdB{3myOw(`rqIG_H^2%$T95qi49nssG)Ac7lrJ|?X
zZch#!^ZNzy#AT<>)m7(v8R1G{tXN_KJuSN5SGMJDwXt33{+$}y0>cqEN7b|!xILQ8
zU=~K{p48Up875pEkW5))@#;z!jG$x{-X!H6#$p%tIt*wgAR<KuUT8UdzcLpH8ndOb
z`iD^3gO3}pfohFbbKj$AV(-P9*5So>yBm}!+Ad^4#i8XuXWfCAM5t6ZMBB5Y1~fc8
zxje=%ToF<Y&8m7dl|5(Z;e2f`IH&v-*RSU6NI!_e^{}=t(De|je1w>fV~UPr9e;$@
zQ3`ICw%P=>4{^-_(s&R|eoR1)ji$iUw=ji9x;L7-_AS|n(d7&TE9qH?1POMTR}It4
zcDu21*$$ju7$*}IRUhz%TU#2rAGtPz{i^5LnBc7qn3qLqZH776omiJzzhYKoSq0WC
zl(x3#x_Xe8&C%p+61(QiRh(7lf8Au#Wp?!@ZZ7ug^-87UJaTehR#xn*T#GYAB{tLS
z|Cr4FLK~$TGc-+upf^D-kUJBl6-ezs_t|%LRCemB1t$&RLvAMQ%=L5C$z;hkuXc10
z41DRV4)6ovPltcg_AHWQ!@oI^FPM^Dy50GMao^Y1L@l{@?@-SKq2=PgGEF<5@yNl>
z94kmI`_EmxkUl1D=U2Qs=3lq;(~k_Wkerl5y_)GLLsY>V<m3lquPOBdJ^Kdf2Q;9n
z7<NuPlfA1K<0VIA><jmrSu>^5Lr!qUBLTXQjnSahPjsR7oYv=7>+?CSO-!J5P^Ps}
z`#n`Ro@y%T@3i!314qa$60hoU6v!p)n7v&%Zrx^UiwhGSkuw)(2B9o(4N4~tZSk+|
z68)3+$a#tW&ll+bT=#DQJNC!ojJ%_WtMsY*Ue%emSr;i93YyS0U@M2dpi`WqZBDag
zkw)ZQM}FN+lQy`SU>0^4_*e)LYgFRXbNci=|I?U0Wj7azn)t4P{0(0cpJNzSE(HGM
zrH{*)5Gmohu~vYM1<Sw;cAudSr#!l)MM7Ek!0U7mEG;KEZnr(Xa_aSG&no#GVkW@h
zwb4vid$he!sIO{z9f5#)J&vV^5mVflJwu5#65aGds$GZaUeVZtm*(sef)=cVQcB7_
z71SWAuF!L4=%~B9b=?Jqwb6-{&bfHH6**<EqNuB^SslOACu0DD<<JzSnLoM(ypme2
zCVc^@LI94*F1fVpxO%u)PE4i4wAuk~%J&Q(4}zN)w<vKCfDY?W5|KiGMZ*f7m)!4k
ztMlAe68tf48T+KAUM2yt2K4b=!2l}G-+$W}adx|HJ2x!nyCT_xN|A@Y070zrt#TnY
zxlxf+q(z*PzXfp4Q*h+IP14PTqZ%6a5V<I?fG>KqPz(WO(h9daV;#$-gk^oRM6;6z
z0!P7tHJ)^)#DyYvGP_7{yeS%H;8zSF#EnuIM+Uc54S|f4fu95_T7x{E;43O|f&RXo
zLEt0%bEafj4&*PY2SQ$0$hnk4xx-Z?$$}%gU?>AwH&7fgh0Vl}K|4%jJ$Y)fw5E}g
zY$rj`ZAJUh=#S|Y;Hk-4RTES_pZSTPzk!C5;wrdVKM@tVjMfTASGVHxh54P8?AH4J
zJoWu~=6if%rL(oZ$4ONm)Xg_3PE`3JN%HYgQZrJ%4M6j&DNHiu$gH>G+-5h(^6{Q5
zJl)muScb&V$%FQ?rlIAMxUfV52?<w*>4!uA%34ReyIOa0H!8}1%IG9e(g(dD3uNi)
zY8DxVaS*}?I?w|;w31HY(Xrc8wNK&6dp{0wCAylRz9Jc%8LKplT&-;(PfzuqF_2$P
zo`A!KL1*!>qOeC9qd%1j!3Tvx%GC~&XJ52V&l|4}y?yuzTGi#N=80;qtIlD$B|WBX
z)uJpUaiEodBA%H!(E7^<gJxPsb8Q-%2vurHFrgF%25wG{@KyrFts5;rs^8Q?TBLJ9
z90~8^$~Qsz@PaZmA9F;dS|85r1u};*g?v^SG#>3*8v9yFhS@ISw=PSr#h`pt8^ImV
z{Zb8D*17-xp#%T7)BEho4n}N<^`U)+6A$-EEA1L*0)nOz;qF*tmmn58r56G{&jb3O
ze}4Z8j>HGzgae8u&qp55%sdiXN6iAwxNmF5Y3M~da=Klvw)~wv1f?p+7$LQC2d%Sj
zYn|{nZVr0aOlzJg68>rwwbmnF+so|4hPoN+hsJr<25g^Y7t%E?O*1`^bfCm+P?oBR
z)<m`D^QX)x<Z4iWcH1-}6%#qTu)tyl3~hM0p8-P~LQ)2da;oPH#Z&1753YDFqITX-
z>C90%S$8zrSm$s018{)TrmcxqLC~F?whl?7><Y6e-PbBHHnEJP!nuS1+Es6c6b{|S
zY7o)rs+P8(Y6>-$cD-XbS$x@gp1kzs>;H?3!}FHwerG0NMQrQ9QFzv$!Z<48BARMJ
zoivWT{nO*K<`DomJ@F3D&p}wg|GzoC^o|<`hh76C&0d{1j*pusue{?XboJkdLf+uT
z>*>KM&M*u@`#}YxXnNJhJxxtr8&(?@gy)oG+F(AxT56I#5-g}TnuK&&Po>wo=)E`u
z4Jp1A2|O@uA=&nPni*SVF~@x_#%(AoD5AyIudO1MT#eNl<0h~di}EJ#b~m=2cRSc_
z+j;cpo#Z{j9S%7zP0X6^NL2iJSHVo5174}{*Y*+)A>X*O40jfC1s#Pl(YU2qCFTHa
z5>)xT5+bjJ6fugt5}LeX)L`Pnu~2|fy+v^Fm~(Rq4^7wLDC0`IlF1kQN!+YIcS&lQ
z(8_R6l%9xYJ}XL1DN<IYQ&2T~rpt8>Af2SJFY0}P+=8*LljsqxsvxWaXCEKw3cYJ?
z#ZKfj@c+7%^N1OUWII{+M^~#@h1XeVoW@C}!0^2NORN;(dG{(dY*(0K><V%y<(JdY
z2&PAoUy#_y?gT8_aA+F1q^1mBS9QHv$0xSh!b;x97r*aU?L?T)bvPI}Nd#<$p6Cn}
zHH&s2{3*I1&N1^GC8QEwBy=k~a;BqeIN8x1ktoB7$E_}3p09}8Tu$XhTx5mkvVU$g
z=(1-n>`QIFHKM)BI1JWSL|ytR=E>wT<%nTF7>u2Xe|5#<wg)xo=ecmzY}Z3EU(Muq
zI0bb<4C(x|nM4IDJxAWD`R~6|^_(*Tao*=3UEJc%E4#bcU``)Gt!%`>?;^jfk#INn
z-W%^GY@qqq;{^8txE~LjiOTqJc7Ax!+;6ntc+@yPgiWM%-h53?d2%dO6M99uTQZ9U
z(yx7h7k?OmvBmGm?tq-+!(^-Q8EmoTysy{bnfrdTl4(oQ#gZ%P6KfCPZ}WE36hFYU
zeCHa}1Y#)~2wFE!es3H#54_(G>0l^+$OJ83^Cj)B|7gGld;t6F!D$P3-2KzTmoL44
zG_jVbXiA&#<weW8gomx)4!y(IXGhKbW{Xld<A!~)2-bS80O{}a7*c4987sv1PKcJr
zQ(;*xz^F1iS%cwvV}w}k<ni7fR(+*(4%;%xfzFRNTW-%td&ddybXq%eQ<+^?YJokM
zq*;VF^g~fQHemgZLljggV_Cd7(1MnPdpn@m5Q?7gN4El~Mu!5Ptj#%{l|jN0*&>Hy
z%t_@$xGVU01|_!~->BhS;~Wm^zyj;AHyz;d19ut+jfkM}z_}!*EY}pSag7I7tdz{z
zuQ~rD5n>cUr+p4%>7n(}pdoJ&KPEQ%G8m>ED0B^7;@Xc#xMvWK?}K>8oM3`2nGln+
zVp8Zw&ADKo2$&2i?i_MD=69p$nv;6sNerKGLQdM^D2a@CJPdq1v7uFza)~Bm3@m{O
zj-&U=0a~j*#Y)0z<=)vT^K}=in!%A)y$aMAQ#o5$k@>bIf7_D3ZOQ*BTXONfStv8+
zZlUFK#_D8*-x9F?C*^+)I~>&n+d_YV6tHvhzdoryd6v%q`gC*STmILtG5_m=#ru|!
zK=$iS{R;cQS!4gd4_heWf6Gt*nrV}l0yaM;m<~Y7YRU=WkyCQSfl_u4f}T(J|F%uE
zC;a^rk-}{}j^YXD#@AQ*BKLtT!~z-z9u+!dN=md<j15nyXop?ScuyB7ppb_F`wC#4
zbkNvk`GqUSSM?%%mTRndrwGeOR8ff5J34I~q^O$LK%7VOAYGo!g_UkH1Db4zx7mj?
z5fd#+VO9(+(?-A(S2SIoZO?M`NS_FP^ON8FL66x5^xTc=ihSPab~)Hrc;msNi)lOk
z=on6(sYfRP-V9rh8to+Asu5rDqYbmwW*R)K!1N>kCjAJnqtmDc!rRGu`eJ};kUh?W
z@O>JQi$6iu+K<i?dS*U4ycwrIJc_Pz9@WR$M;rE|v*=oVsIZV@-!KN6q#w2XOsfWe
zX+7dEtw;Q&HSTcK&-U2Q_LzQj^vWL&ee2Odn0@w7%;sPZdvJS|X?7U*(tmfX2hA*K
zeq=X$0o^r69h<7>lt$KS_i_+{2(*VCgoEsSTnU2B%GKP`xtYy{=WNYbkkV14MZaxZ
zPx`PtQ>mBD2z`yUxEB`8E@J3TN5YrQ9Gt!4<ssFU3TpLlWLN)2cJ*&$SN}$K_2(@0
zjqFn2$S(Db>{8#zUFvt4njJQ&z}v0V3wZRwF=07I@`ku$TvxZQCJH!ZqC$5p*yiNy
zf5@M~pu6uU6FnLLN~y5UakJHv3DE7wdX%;zplzbU(~G>q;B#_^bp49sYO6uTG;P@<
zWA3oW$ItdwG9?gYXs~LtGRAI&rAk3tb$kx%7w6j)h623n)0y>?q9r{AjAxJ6+G~%?
zqx)sxoBO;|@VvXw_zJkpcOE(ILY~sJ5YU{$LCX>Q-L4q09_@;b1TbH;jf5Qjqc-N@
zC`NOAxoE=a<W6X=&qQ<GUnY(t62xgV4Q$r^S;)Y5f8+^_`=CTHixHB+VxsHWGrG3i
z868dscbd_)IgIysxoCF7_jf|`@k})7ZYCUw@@c-jAcO6GnG_C3cR}4Ml@6k>U70%)
z|K@>5;@{kBB>qjwk@)Q-8cZif@k1KaJw<Fg$y<Y*#Er7yKns;o3tRhh5gqr@o5~+u
z4VH#wd{vC4D*hZ_X++_J`G~f|30;`@oLJxHk$Yl1%5Gh=mZSrJmqizjCridQiWfun
z@AAmrEFasOMbZ7A^5_DJX!lw^HvrT{8N&UpJ!jkF1B!XE4#=)y)rKn9ZWvAz@(QRw
zbK18P^aK;hzwS4lLF7yJRv%AC_Hju7WAD^8LdO^%A$j+~xzo|Q910>+S;By(pK3&8
zW2U~)ThsEUK&GoYLG7xx<-o2$|95ZoG?>&E8k((#`O6s7zc&7@hlPLG|14wr78|4E
zteJx(BFPr+nqMhDXpu%QF&Wh9PPrhgaE75$t057m6I8{Gy#NBA{j7jgou{IRaUwl|
zu=a76SF-{S@`S+ZZqT?6V24}I2W)|awSLx9KQ~i9pQxW73GzOdY#H39omeSN-QX6T
z3pnIY)Ldq7r9q$^dXid^yUaq<3Pb#)>oRvb|D5{W7-+rtbZ#y(opDfhI_FKV{<+9>
zV$6!=l$Ge=^6=Klt|;pj!{Y!*L!mKGb0EXZ5R_GPCc|)0TBXiXxB@5SE^M))S3F^(
zmOQ=9;Sy^Ie6xl(QtUXlfd#u*_V|xBD5e3P*3GAN`E=8Kx+$MNF`qsWPu~WkuIdDR
z(nva0MMLou)&>05j^S6d(rhkF))g~{p9b`FF)_O47iU3Hiq1w=|6VtK-!y)IQX%8F
zK~pt1lC}nAjdx68gTBT)rn5n1<K2@yTh_9s`z}-XbXill`Os5%pQ*~q;dUGbJ?FeY
zqc9W+REQqctF>AUeu@#HJX)_;YO^VqmLM=0OV)_Uy?_qCj6KM9673-OP|Q!FD*nV9
z@P#yT5rFHImy*m26xiX)EV|gcebkV`&;`se^toV<EHNrm6Q^D)4=|ExiJ@f2;w%FT
zaWyu+^3j=2+TR5>d>G;2xyNgqUSaKVWyP_#nQ6{8<1CRxpfbRO&7n~No`O~Io1#4s
zng^6EzZ;3<4&ylJ1(Qx6%`f7p-0jA*CVuW91jH195iCj@5u3nH|6b#K|2NOWhYou}
zbY0IoI(zoi^MqsUv#0BDqIbp-4T7DzM_r~0tnU$rPsyk&KN+ITN`~(D3X$4T-T9aE
zUv=l-+uu7|&iA{6?_I(M|B$~^goDcU{bY0LoyJC|vDqm(cR=5i_DEDNK0QA3Fz$@g
zJsJ;Rom}pt$EopX|EO{DD)p?nzmMeey!JF4Ou~_;CWZP{-Oje8aFTJM0dsX2AwBM<
z_Lbbl#?)`dO1QG<ITnK-XdH3`Ax<DVfAR2==}?7iD2@LuB?tId`0snFwsFv%-i)n5
ztRdvY*nRL%(EH~Zv<y5`M;iZO=Yhs>*Lj$OIeMYBvDOYgc&c9OQ;M3$52&%AvWZHj
z{{;RA2NeG&_}?@5-}8@Ej3?2vkA4-#r;n(0{Skez`G~%G@(4a#_&C&#!a)Lp^LoVf
zh(=K~zVpF&B{$iwTR0(z-GBXD+1=f!&YwLbLDiZ|!H*tdKZ%~+XHJd4tIx!dUe2A~
zA|nttVxJBB?0wln*z+yeXT#pVmqduM#7m+}R@@VJsnQ9scg2hZ5}i_FOv6msu!jCs
zIP&9LcF^t(Ld^E(xVZ02^BB<uZ7%mSdOG9IcyNO~<lw)eGlFfw3}L3<KqLmxMW#c}
z{fnuHyKyw;lFV|HI1*7!1Z%fRhhB6Thhw2}J%|mrGf(lWZ6kVxaC15DYo&^B;iozc
z&dAts!Nu0lF4fm)tdvN(P_Vfm1?6tC_$@G~P3MOy*2ilc9i9GzGI(4bwVG!~htT9@
z^Q1`;ODl4slAN_`-?74P>gp0(&%e6tM4w|M(CeI$KnpbzY(0M@6MyunhXGyA83wda
z!@$<_hvCPa&%#{(eD3m1_VUZ|7Y^t1@R&cJ$$XQ`{BmrD1A>{n5{HscI->df=9>)X
zm*coTAQ`tKjClD>QmsE<to2Q~)?aK?pH0T~=L@;MDdiR%*XNOV{rMuVZ_2zfg9>iO
zpL!(0&3Mj0poJO-ww^zXL3H)0hXGyA83wda!@$<1hEX4XhJn<_CEPf<UAaMQe2zhE
z%%6|?hB+hf#r)wUe@^{4SdXLVlW#j1qcl%ILlZ5avHkoJ#h{SGalQl3KjYxyg3;+S
z2YJWZejW8nRglFvd=!ooLxbS(W_w$Hs^MYh^IaaPnVLZUX#NcLD8lIzkBB;*KO$<T
zM#LYLI1OQd)~D7JpwIc^!B$#Tz)$pOmYLbGWgYEx050bRq8x5IQBU+fh%xeZ5^%h)
z5pGMNzYmcC(Y{lh;Wx&&y<#fSWvuG_OL)TlyP|I%YbEYeA3t_5mG4%{Glf1u8^qw2
z3P20~=*q<C0yVCB2A_`JIer_nLg^CUMw3J(K2MOhggX?zsT8x`O7@Pk?w0jr(f51h
zZ?DNFWZd=Hf^m*G<oJq5PG`A(^O@)#({ny`gXy%3Uv*5k9)wEq8{|`8?cCXewx)7@
z$qPMs`z={ca$1D$y701Ll;=fGY@<B(<J+0e30y~9eVP{v#!nhY|7g6qkOgwJ=HhoJ
zck_f=p+xT*>Al_J4^`#iidl)5dQaVLuNak2X5E3WEbeUZ;4@Ni0e{CSGS>EFzW>5K
zW+QWjZ_a^qpfKLvXFXhwD7hW;o$zCtcyR<L#&;-uyYE4cW7<-cG|um?lZPQ^!$xaJ
zUkF+P*!Sb^HKvIOdyfC!4+pXi9_4A9j>VzC?+RZ7DsxTA-L9k2k2LGU2n2H&un(ok
zQ_*3EN0F0GXRDaE5Ragz<ER@b32iMj80ekXnz<MzJF-vJU*WRo=I7~%@V*lHz^!nZ
z52O#?I-s>YVXL`F(_xJo2YK4-f%vbc2tZN6D=H=$VSCuAyiNehN?f?sc&rv&)k!Eb
zOB{%7`Q#bG0AxU$zsg1zbEa%nifae!-dF9lL61PC;ylqdRi@)E2H2tiAb$hay4W&4
zLKoVRZ{{vEyv6V)Z%Vf}Oa^D*Yk!4AW+G95MFfawYdJT_Yp5i^>sVeNLPVt;VLQ_O
z)^N53>QHG$LW#U?*F}K=>3oaJWI7TeO=kBMIaelP!EK76CA=o$08>pO<NM^c5ME6q
z7_3-(REWH46p1%hJmyzYsaSlzE0VRxH34cluFnzy>q@?g*Gi~zZV^DIIf7*<eqR@4
z+;u+mHmbdPwYORAJ^3hKGCA>WvQbUy)nv1ph_|QH34&oNV_Og!jM(n(2G?Q80cp3Z
z%XXqsHQC5*w<+6+Mu19gyC<@pXe5zcV9i;|ATMfT1u(U8mWH5|(Czes-4T5mU2tIz
z1|+_kclmagQ^-r7>xx=&0FL`=e_s_SMV3=bql0-zulr%A?{I$5Tgv)bS@r?sF(*)Z
zgx+I1tF=gOBoW57H8W+o@*@>SVESqY&Zs`<yGG1#&TAvuEB9^NP+@Z#xZ=iijefQ(
z$bo#9x<)@i(Nzy*lShvHNHk$hc+P0L(G<(n7Y;|fFB*?}T{s}oVu>S?nU0pr%&^dT
z{k%YbqbiG6Q6YPPH7>;Na-!s)b{&JmKPFg*x7ShJO%PUS9W4nCP<RrKIY#!Rwp^Ca
z$v`1A4wOYFyE9#eRPGXk0AyLyS9I{>Unl=AO081LZ*^s?%R(3(NzJIfhj}U9!EPT6
zWO52T&uiVH8%e5)GdFrO0N2!l8c?@>Qy1H6ZRvsz=JP_-IT8JdotyXp%hd+{hz*B{
z+U&V9isMrIHm07cARyupKE<fL{K1$3XV-)IS2HY!LFA=6yNu?2CRZNtX^hgJSC)mD
z&3LP@u}aO@J|*EEoBsw+I{dq3{*{1gVn7@Jc78RWI9tYtvyMdC%SSu68c_)pv*mYm
zf;_>C_l+0^6)T7*xjwDcvd&j+<q1@f>kq4*&|#V9N>qOU-BO;X)j2}M`t`#rOlUq1
zVhm106%fBEIk~VLlXrMF#h=Es>{4FtZCQFEWP{ouipE$meLCop$x^7wUB}1ThVqmr
zDxf14W*M70ma&<MjZxpmL?=Xas7#UJy(Bmp+ocU>xJ}JYievN7!2gu&Gw{Dc?pYd>
zY|Len^?E5bG2hIAXL9>O+%>aGQu&n5`H4oq<yZb{@;?uvwm*2O+%@|PrbaH#|GfF+
z`A?br&zl?H@;`r#`Jd|>>+sLBdnSRdZ+y%D3?ldF^hM)H7c$5uf6g|rYw?^l&MyuR
zycci0gVWb<;K@n~LgW0r@kZTa3wW0&jq^9|N#ppiN}x~As|Tn1-su_rI5=!InnxEE
z=PhojbL;11%E>V(VF111tf&TDet?0`JHFG8gWm4<S8+5Qdmg2(L=c#c+7MKe;ot`<
zR58H|0Plw%5cBW%jxE3MoyYzj=>MSCBwTgw5}oZ&xtiwMV;W3xc@}zYRgt6Rv$bV{
z+7Zum!6QMv3_Mvqa!;N}tbP!E*XpH#)>o^R%c^sOC6_sbaLIG}g^+=et&|~z^MO84
zdeOrm<=&X}FsR3mIh8N&wBq<xc8+*LN=%A4l;gKQ{NMo{u<~b?S`N3C^s)*|Kh#`O
zDq<$r_Tob!tMuG~*!M#U?tF5H6N|10JDbtCH`%LVxP?2+;UyrCAW|lKKb@xLu5Gk%
zR{0R<;qP8Frf?bB@vA6M6+RqgQz-)#?WGbb{*F-XSfw;*d|L`MrstHPJF*Zs!k5<o
z>S4MI^`hJmMii{2M!|p5K*XE$2>5X+;7K@=fZ<mJik|0bi14f=NfJ$wglUln51y8S
zH}MsA@GHU-fedMY@VFE}-+<@~kbc}*F97S8hHKE#7lcH(d4u=MQqwoc=?i)yP+aMG
zaLZB^;}MKW?<3q)NJS0<^viNU-(Z0>>_T?vm*t1P!4PTa#mrF-{D5-s%MSQ2>|*4D
zm;^~Qh_3t?)MOQ<%{lv}l9uk50hH>w8(Lk9Ke2C;v#>k7r`r;NVX7n1yi`v?+>^b)
zuh>Hn$8bC<8U!>d3%6qc?r1?sPCauw>6Gi`W~@57858z5V>p^n3Hl7wn{l~mkJJ_c
zzvfOjffjo!`{{#vw10Yh+^{A*x+>dYeQb1D9~W|2-5C$2?CMLmDj6QbuS<8jVRpKK
zol^O47*5eh?=?=pt>ssn#g~=IHzUE?iSAFAb_4-|I$fikl<jrX=ysDfHS*W*I&=Hq
zoQ=R{ISgXOQ1Gj%62PC+Vctd)m!l}U@GH>`zw5C%7>ke*+gPlnctBX+rBt`E*>5a6
z&cs9H;y}$y4i16n;o)D#0NpWGMRhhSCX3orCLTh=T)t&M<@T{@TVaB~(<H)AiNzBO
zi=qzki*f`;_e@1#{DUMy0O*2JkH?p*AU1pCNQU&lDH<RT5dv8X69OzTp+9mkA^Xu4
zDB%;TX65r`^i|jnM#jpThZI3CHyZ##%?A8lgwh~LlDYV#NM}wLfG($!CnK##(jSCT
zG+Fo8g$u+4a5H-iLGkHjCM-<gXdO<1TyzL<Nqhjf#s~i3<ADIqfjn7^0Q{mD0nCyZ
zF&K>$|BZOf!786l7Ooab46dZ+du1GVuEcrgYUH@U5gV1P)_X<tznh_u1%<E^N(1f{
zC~_km_5rV?0#jpC9{=mSoxs`D9}36pL(v8;!p2=GC)DU!P|Ryc+;u>8M7Lsf51aZ_
z<7AA;ecMUfMJg%uY(l|M-HP2H4hH1&rdkP0<vauL4Esto<*qKNvX{2%kqPKUC~QF5
z_#<W>u!V`=NsX|Y0$P>5?u*IVNi}e=UmW>-%ZsW!V?kI|vu0#dyU*xV3q54`8;#0g
zZwILVovUAjiHA`;g@&*+IkvWFy;6oXQRR2P0V~XV(Zr?YU0j}>ou0Q24{(F5mV{a{
z^KS|=670j*XA<)1ne6HOuyuKU;<etK={nOkyejlvkw&WJ&5o#6C9GwJ?W=|z7?VgQ
z)maw8jQx4B(H4>w$Gz})i=f?g<kB5++0v4nw^MR1vE`BNcg~>?K{BH!`}4yaANBJs
zbYtlZrrm%=h(|qp^a1}~>x{*fi}J{R`#y-<^FE8F6Y<&o1!yT5A-TAa@eE_Rl9U;1
zxh*gYxz8!;w#ce3|IER1F&56WNki1UoK%qZABrwoz8oUBLM9rkO^Qj){puCH5ex9%
znu?(ye41qlTFT@)K;W`p4AJn7&I)?6^`m0F=;rPU5u>n>T2v%QGz%>zNOHbgrX;z8
zDkhkH?#5P51ryLR3nis;DP7DK5OTM=sg}BJF4nLzD>yr<2T-jRRH+ImuQ;(tm9k)h
zdO^yDsa)8TP^>~}ayxVBt%Ytz=H3}8*UHtNGa)dm;mDI`D-T=j*m|P8zSKe`j;<4s
zuZEcEL<4$qV?9;!*15ZyH+Rs53}cePnv-G`?vu<?DTL*`EjVL^H}uP!Llr;c<T;m0
zl9cp&$hp6+Eb@KMP4z<G=a{`KG|G<@2G%+~INfqO7@sk?Mc*%uHtt7O)q0BcL1U{C
zCC0AK#6OO!@~_|s<DgeHW)pLMW43iu5;n%;IJ&9TYLy(9+^qZ8-TgnCrwh4!F5v%J
z{|Vn&{-5<{PwP*<`G0<e{+}Dq*5RL@7jWOqg7|qc@6oT;|Fe0T@%W@iY5ySSgR`>2
zRl%KuXf&Zz5c?p)vBWnz6p42i6OcN?GrV>peSFthuPd$R<#aHxUTvmakyL=nhm{!{
zH$`kl3H9WxI^!5V^EjH$O}$G}=ebqK8_Gw`Q=C8_lUxiZMwARYaLTt_BPSe@gQ$aI
z%sY;{(*fm&;Fnw?<^nTWVB}=NS@-Ge6To<=KJg@u1_PM{0K-3I#5_rI8Um+?Adp8e
z?NppVrJT^vRG9kPHYSM{#MD8Z-S7&lxZ`r7@LbBgfEh_d$X}HzbdJ<PO2ok^dv^j9
z$V?;U8R*y(SQGK)_J5cFMJ0tutRDC>AqP6gSPx;_qe33jBv1)B(*N<y{s`;qdSnbA
zXpRE~S(6a`dqy4rk$vGUdbUg_HT9AEq*5bvBg_q9W0~7`ng9~xzDP)rnFCo=O&qc*
z&=TgoBck;HmN5i8NBi(fPTD$6S}r2EBx(j5^alPF*%409*8?o+F@!U!$}mB1tJI~O
z7@i53o(+CZc^`Bd2+el*Ew;l`wYBPL$}-4Nst+#4qLderhC|ld0JU#A905MCSTMwx
zQ+@aR1g|NvD3v)dGDutm$ov=vin0*7nOBRlOH8gKyA+e;Y?K<Hcp3oVuTtPpU9~@h
zS?`56>2Ji$w}Y#2G!i*rXcn{GPJ6}Q2VCMe=SyIh@;!4deUyVXgfe~G->&3aN~BN}
zmXQLxh-$@lkTq3i>;N}Daj#4*Q2%Ff`w!UJw7`8V6TK)Y{6SdFWV9cbt(kI=a}orO
zEwxqFGhCtubqt|QR$AE}Ed!1>Huk2JjN<$|9<Q`rST`&B4A03biMX)8q<D7M`QG`y
zxV6QQ(Dv=<`)W}`W`y1Ei(BwSV~fHL6+;!mQ49ku@N}}Jnb6gr!_29MuKordD@<LD
zG(VQD)vw~QB!Fv?Ls(1j5h@3<3+Pl+BEe0O8&ha1`L5>^9@EUZJc0TsEe>5yryq3Q
zVcrylv4JpdnASY0k~_XL3a+X4i2&B6tWSQotI<ZNLg=Ye8|NMp>51suu;;qWT&uRE
ztyC;O@NI5Qm49~er>jU`8w8`PN#Cth*6Z91gfxg?H{9FZK>L+y`LFQb-|hbRcJw2|
z{vdl$C_mbDesq4^7SF@=b^i6EuwWbfqItvMhwW6GJfhk&U)7zbKei`$dxb}@rdB>6
z;%)0(OnG9Zlpc4M1k-M0+LYmR{66f8!D9W<h@>8C<7|iSsXkUr%lDxIf_ZNEbYR%9
zBR)4M4XAl4^c$|u#m9Zp4>>5=&=Bb`!edlfmC{!Ugr5a4GL%t{>h{F|l+2b1oI;sT
zS4AHn);SWKQ!>FOGK>;cLT3^V13st-IHX~q-Ww8VOgm9igSp6;-$^vYB;K?MCAG3U
ziK)2fMXB3!vbU096v|gbXSFR36w~iqbsJ=WSQTuMbFQGDash)Jm8TT`fg;os=$lPH
zeB^$6z>lM@lRDDKnRIq`oWkRdMtRk_?xmKwNI{`3eAWS~3Ln%YrnLZ6MWih~6G%#7
zfOLwZ39xX`-CD8Chc<<_woZZsRSpT3A*jigka_v$9tU-{RO0-8Fc=4PrSvWsGq<$;
z&bw<r?y3u<a1c&zc@bbSoe)!vp@OL+|1qH$)rKM5iCgt02*}?MG)lC2cA@>yA0!kh
zB!Y*gaR6uck<jGvPs}MMqW!xeh$qmbHFU})#m9pBhFcK}z7NNhHtL)quF3AS!)1>D
z&lDC5g}A~WPijwU#f1x{qEv`3v9le!gyIsTkunzLlj*R<i2;-uSEJ14*i5cFT9U*;
zh6;(ddfrePM1;5$T#6~_jD&dNB{Kjewi_1I;(OThzszyz4QrST0j6jd)I3e11lJoh
ztZ9v0KhWErqGe)S_h!xHay`Hv?hUtBbeLR`MP1DIHZD3&5pf-MfwKWedM0;F7&fI5
zglzx>SUB<Qjk4z=#)h7oQFQf)vmm%P9u{8I<-)U$$Ql}!g3RHsX3}KeP+LY7_szW<
zDMV*}Qn)NM2DB1v{YJo^K?ZBo>ar<43DKaN9RScQ3$l`Fn;3?Qoc1*7loE+E>vh-^
z_Z`hgz9K0)*X=DeSlB2;9+>zF-FAt^cq>wwN&cX5z>ghgDDH4D4apyJmosvhaLbTd
z^8JsIutZOsrS6H9o50U){T}|Rp@|d*=Oz{V1ujFx1hhmqto~uVl`Dk65B-}R!fVf^
z=c3@OOR(X8Zbj?Q3Wid6GmC$FrR-2O`>T;FaF)_*KiA#RtP#!?;0pRh{AqlpDd22~
z=k}b{3+L=Z8mp|vEk`f|i!2_0RLu2ijoA>FFgA;B&bm|2rIU$IdQLRJg4JcNCTpct
zj9fR#{9HD|aff|1loF~`Q#OYr#!gtZ{d1VTwJzK2Ik(XiI5p{G!^}QP+KVrDN1fqa
z`OgcBUX_fdIa;yN6({x^BoSs#O+t~#xYNggJj*&LdgF|`A(>BPN~f2N%cB<F5WGA+
zKW?-@d4a}AAhi5YL%J;J2Yxq*Wj5FzUuAGDWKnsPOr!pE5)Dx^k;(2w8^lXoi7Nw5
zMJG8#TvM-^P;vm;$}{)WP;^QtQZ@|jdnK!EtSD_mbflxQ;gn!yS}s<<hf3)#pn74>
zrGm6dbfNPl<o{`M(DR)xfoI?UZajPT{CVpB_xVqoKYhFZ{TlQCJpFcg%M1JD_rvpx
z!|V;NdY*fYo0WF2$1$GMjqn8!asE~C-{A;Em$0kw;%&mWg@^P)$m*Ob63+=>JMuNf
zX>7^2>94o8&d>?zlxAv=q-f^6l-JZTezy);BKw^I)In*@hZ8DLK)2bXhP)Ief8Ahf
z>!nbz&!g*O|Bons5l%R*1|E;+D-aez)Aq;1j4!ar6+sumBfhNER=$kQQ4k_yY%~pn
z0a+C%{yTXJ4^y}lniu^xOr#H_wOy7QH34B6_Q1!p_{gFiunb%3OzzytaI@ZP#mq}+
zaJn`a9T-d6{bn(k_=Ht!^N;P)e3}1W5w~VBV3WX%|D?4b2a4ef7WBMC=C{-&A*P}K
z#4|Ql%mp@#-Ul9Nay!;%3ZV6LAAQ_Pz%&pPGc4iV5h?p~z1FtbQX|g}IXO&4|H1?;
zSJ^5+;NuFKci|6LJYXYk9vEaBan@m2Dit{aQ0K>wGrs3TT>kg+fLDNkg}*f$tIjAz
zm4Yq^wq^RYz==j|D}3y{!rc0>L>p&K+!Dv=01?D9b|EKs8(PV1L$qax^{?mrin~J;
zJj|Ssv)2g6>6y#@4ghJ3D7A9s@*Gp%kbv$P#4A2ND=w+(2^Vv*o!>T8uV>iMGlpBS
z=g3=iJa|m$c!S!Q6tY^>r>lr`OMd4hXmh)j?E-6C1B4>bqL3dVQU*n#9G{KmlgOA0
zeBuEc2!|4H6v5&S{X`XCis?XF)xc$T6TI@LO-ELm)A@g$h61>AkW8v_ervVbUe&>W
zaahy?SCK*ABVH<L+Nh&uW{c0;)Ee=Xr5sH$6vI=*RE#Fu2`x#myl$rerazdWIAvgq
z2lz)W4;$}GDOno5jGiThExQbw3YJu@{&o)G^A|LkC{#dC0Ue9H1)VgRghV#*TgB=3
zEXmB?My`0S+|lLVEHkyaZfk24T;uf`S|_}YpG<ZTy|q28xR+EagsdyUR@ANh95z*7
zsGfCHm?ck5`pR!;WkbUyC0o{qgim(<KlaCWb^xE_|NZoN=KTNc+2-?a=l`$K|NHs+
zg6Vs|`F|Ig5RMyX8S?=@%QYR8bk#P*7i)BA)`r!8IwqeMOT&x9ZjfgpfaW_HI3^6^
zQ+s2~9`70chnXe-2E-?Pxk(vjfF`g-%w0An`<%4Dg{RtG1Cgz*;%2+f#>%WCoAWEV
zbEPmUhFF;mZHu6lK~4DCodT1U7+`U;yAH4j;=%(g1jGZ>)hpR?PV-V_75{FqJKnLM
zyc1uLF0uj5HCJfckNdx<n7p_>4|<@Qm|r0L#2b*4b%w)4K&c=G_fUbsxaOa$&i@`!
zGla~RC3RF#dXqpNQ{LjHt`yNkcp;LaD;lBdV8=JW=u5lAHI(C<tj28W_CtR>MmrKY
zpM|8W>CkYj#aPSOs8U2>`_!LP7~uftNAzY|udowPw-KBLNlzjx^u18Tvdbc@+W8y=
zB-L4^XmgaBZFFt!I6sJquW$Zbb*ko*XHTs^pKMxx)}L8_{*2VYzb{9^u`@S7A{8Hz
z^MMMRSFBB?L!uGglIc+<^d5%u;#OY+0zDj<-sw<~LDW_p__x9tJ7*=4T@kSFQYd)U
zEX~OA?7|#KB%|`&&~Z0Fatd^Aan2<^1--PSJ#q~A!Bo;oN|nY<7X=m5&NNW^0=s%k
zmq7S%5M9^i87-H<yqaTF!K^_b!ghHl?BSk>+cQ?;_D8owCcFmaJ$n?5*10tmCsi(F
zcx4FSlWC`qp(_+?>+CZIiNi4pq9s@1V)HMnO<{WSK^f9>&+h#Mp85pZ0Q4?8Tp2SA
z92JfPm(-L#(T}c!_ZZ5;b1h7_?OSCZ8<;aseT2BXkli;zF>G-IHTVtY`$yrTO7n*l
z7IpzO3mM)+s`z;2-g8>fIP3_E&}oeK%g*=gSiL?(BmDPbqPL`6<7fm}5gv6D%#K<x
z+L>_+;)Z?{$nc#pO=cQT&_`|s@wyS#o6TJtTeGQQFrzW}_o9@W0nQ%@dNW*gHC{DE
zpO}_T%-LP-Xc^EzJ5$P7Yevr02rsOT`ePiJ9kRVf_sE!u+C}SV-KXAq;SG?9cCK1>
z76^dH0Orqy)!Lkqa-?J5cbjG=XUd@_A%=+YfCUh~!4Sn16?-X%jvq}hKdK-wz2%gl
zImH9i)OZR=ZuZ0&#nTbpr6?DN8WtT4m;p$n!j57nLRd(XBbj`3PL1v2*>=-|TTscv
z&K{uM@kJ1x+N;!&P4$crjL(_f5(<+Okg_a511ik|R%??5u*~le9Z$Le9DD`|$sIgE
zrlgCeW;;Shnvo%<Qv6^>5%GWwwWaN}!tqwhUB=obq=G_rOWcInw_i&INl!Tbvdb(M
zRwE`sOH*<?n)o+cGeWPB9_kwpzL`%xySYg}W2emb^EVVU)_MV+TXEV`-UrExNN$GG
z+1WOL=0m6!iSokg=XW_tI;M)Nj)m|ix#TQ(u*`%5$RnfyPn$LMjmyc!p?7xq;;6ar
zHBVZH=Pw)khj`|w*iaPyOr<lHA*A&gzi(UL<TMgyTpI;-=_AaOT!hc~7!8}=B=Ush
zqU0W~4bd4-;)=fifxpDf(wuI@t%N>v%DbezScFn1=<ncw3CE_P6bJ;{#j{i0h)5`f
zR;0yNS+okzy=k-vwrd?;x1473JMH53r@xY3wZ6v==PQ}REgc6dtA>|>`h*SE_9y&C
zTpyYg!DHXP4w#gMX=%4!Cr@Q8PFoYu*5ps;EgwWszMV~CwZ92F7t24^iPE2lElZwT
zq#iGYA|3(c9%m?uDtTVfB&zFKF`Fn;QtT>_Rrw;-6s}67OnZh~(a||%($8HH0%nB*
zb{LAwKWq=$jA?_{v`5i}o)m5#k*MSxb$KV{SrW&WrB4>Zrl}yObv}7zj!K_sic8Ea
zuVF;?L1wv1pg;Fqi$m+O=Q^hNd@ET2J+dOeXUsaq8Q9;;;MTGrJ~!jwc#UiI<bjxo
zyP-IX2V^CAm-pM@d9&p;E-sp{PL2;xTHfi|;d!HVdVZmFj#R9kVP2BhvG2eFcZhsb
zu*m%%4($Q_e>b20^!%r^|L@cKxBI_u_kZ8+|K{BPy=dgP5bNiu7Z<HY3yw4(X%5d@
zAb1WAUwhc{z-#R9A6{I@JlGXgz%J)Imu@?ZG@Z&<M8->qdltM3IU=TLbULt|;C+bm
z_uo1?GFP##s=o97dvki}VSc{W`J3j+E3b9xUBW;cCpm*TJ@-ye@D*lNguk2cSL?S%
z%R4wdyzov=TQdJAJ+0*5nF_C~xwkAR8pX*K*Fu-LgzWhsieC((&bv%-KOemvpJ-#B
z?%Ns~=Pyt>esQlGMAz$s;61DW{uUDhxIyiz#^ykgX5&2QRmi}Bs1Rry9p18&99)Dy
z*6kQ$d(cb}<E<32wDiA{W&?l@O*Cf{&bdZ?VSx{BCa?|3qVz!3tG-Y?I&p-hn66+c
zyzQhI`RwTORr92#@(sKNS@Vt!5&Iy%qalTZduQ}H^rfGPseB8a(l<OgWOp*rB+HxN
zOMx(kcT-ds(Vx#;5vE8Y{pK1Jv|!SYy68ujbYPiFwa2=&5&6E}Qs{c&sd}66!#3IG
zIj3E_dCF@ICRrv^pn0AbBc(qxL!PS6dEj?XM}ymjWU;+`k4<7P?Ov%xfbh~Ru)pNE
zYR-gY*l3J<eFvTRlk|;u#$#NA$~2eh?J&r80XUiF63zvv|7kd^5oX@7Cb&J9E}6~^
z$6C|Lc6)I^wnOWj&p|T23qWa31%cD-oYA|PfO6u;&rx{bKP52$?(c?t@YpHdBQUVq
zP8Kb0^Yy?vEp&BdjtjoCHlvA|v)NhG`JD4&158^WHQ^KrqPQTVwJq{Zn6EPIn%5}9
zi+PPQ+_#;<<JiB+=`W{MPKP<IO7xh!|7V-M<m^D%ucd7*hp2df%YK3fy6jUtvgfsh
zO|;-K?5){11OKmEc1F&=LmepxOJgs{eq%`f?Y!|7y@El?S2909dwS(;)}K4;^43q3
ziv(4qxxeq!>$Q5edl4+VyFJt9z0dSOlPeuZQ-5!3qsd#;le<yJjN}!e6h*cH+wfZy
zcY)7qw4`O^MRA8B_M%*eMS&Xl-0V>GcrL3#q^&x=Fis}sZEecnqTCyD%ta%-BK$`m
z3np~Cq(PFVfgb$+#K+8C?*cf+#H5(@qmf9SC(JJbYu31I-9z2?%O^RVgI-~MRJFgD
zW81Nlw6m;akA^(5;&Y3fY63I&QAY4YmHw6B%2=7~Z2PI0jb_&{FEzU}E*Kc$e^E0a
zm0ET%v_}jHo~m`~R{otaBbFN9az+*!psc|xcreT*0Rfk8BzGPz2-PLdsDifGr1CY)
zgHNr(ZfT=@m<hJXQFfY@cd;0hI(*>u?@{D;;Rqe<S=X9|`slEpd6&sE>mEQilN}Ro
zHPyB8pUVS;xphlVd8P8O1AmzLEK4Cxk=#k2HFooEgz{LETamsN!L-?GyGs_nG9>H0
zc?~X&_7o{ZTZ=a1vu@yK)LME)yL(2Lt;{9%%jRn!^F{NYhiM{{&Hut5g_B!#*l-u2
zmAFJ=Us0e~w;Y+~Q%9~K0Ec?H?VX(^Txx+7krCa-g}0FNtAvdg=9HW%OZ6PU`I9gk
zaCXk-PulN?`z<mE9=%D=8eM&0z*i7YT-n#&u1dv}Y9a6K&}AO2nK>tZjeOC2IS<<p
z$2n@TEs1r~H*fQJNOBGSqt~1M@<SeR9gDo_AOfJK%c*V?njp-JJDV%vi`@5wRL)_2
zb(Jqk74(^`3D9%j(;zF0Os-WZ%(i-p_rf)-xP__>X<QZByO27$wf1Ik#QP}hnkc(7
zD|5LS^HOVXYfD9I>}^!42vFKbFx8s9slmwApk8~8qvop<@5P(e;l<uY9)sZgyGEy%
z&CvwdMeJ7<=f7szyA)cLT=vpTCQpg-$eG&y(CHi2Skx0$v?>*6r&PZVl_L9Ix^$yO
zT|^IjPxaGi3Jtb+(5ag}bGy46vu94AXi5xmgfr5Y>ptH5aVUEBgs|e2e%0&>-4n87
zVMv+PxF_V!wwe2M(aRj+GM-Ex>RLy4Fz0HAx;1ZiI6@!?t9iFOm{4xi2M(Q-vi*yc
zw|tps-bK-5j5vvJ44;v|JV&{79AG1gb9lgOihsZeg8Vy*GFKWOa-BkF5kfu^ZMWw+
zn9hR50A^X&%2?=y@2Po`C}EBl@MopNS=`UH*tY!{?6aT8+IpYsixQU>`4<x6{I)Xh
zo+77#6z|*sQ;(Bs+3cM}6AokCURlgEQDopVZ3)s$I}>OzN~((@;Ar$7aoEOW9euqx
zydsITtc8DL5{q=%N+~Td&8LoxVH^3%eele)MVU!q%2=TCj1jY}Ps>7;4G^Etlw=uC
zs6JQ5dP4fnHSPdsDVvc&Wa%Mj%EVO+bF#-~SVj`;OqRy2I<|=<g^6iJGDj$EN%JQU
zjz);fl7=3#9#m??cbm=o(<wu!TxQ52OZIwULHiKBQ-6Q>IQ!CK0dq<Uc2&PvLMNGn
zccwSPTUF`h`C>q2(pR^8A_0%UW0hl6ZAM9{JkrHggWzwr?e(sT!P-`g$D-~6`p8`4
z#(^alL?me(^!Un*I!N_Wr2wYmi{JO1ZWQq0X%N1XItl(;m0e&oSbKVvI6rUJNh$ti
zbE9T#-SbAo$AurJhOgbcb+Vx6t?<wL0{lPA`~Ob*FJ&Fu1-*c0`v2DJ&z?QmwEcfK
zH=aHFmjCywt^c}^|L<4qzT5iEtj(Nu;<eu*4hd0UOZIYoeO<c>qG4?sPCK=rJAM4W
z{b@UhC%2D>uv&k5^5pqrGIj)yxfD#TKN${Wv1%TSD0<TlCg`rv4jfFnNzwAkVJn5$
zS<`t^t3R#(^vmY6pPu~m^DobTd9wNB=cms$pFMr@<mV@w&wu%8WAo>qpVohQUVrw>
zlV5&$^8Baz#!o+QZv686=`T;8!iP^bp8s5b_H^Uf(MpcrEsgbb<ou&~vVU0R5)giy
z!d+#Od9D#Up{#&a)awVq<VXD)=tPPBFb4u&bR}Dgi^3$C2D*3;Y=6BFt8?)L)BZv(
z<_sD49EzW(uZG0Plxu=8pfF}QM4)L;gl41DXjC)Inx_{YjRP%z{AB<1_{^h@vX6Fl
z>pyL5tPCR9rDR>scH)|U-x$XpcYtqiDwS>AVY|^33j?UH%)hO#dgfPDv()Iz{5$hi
zk3aj!7>W)QhT`wcR|EN#v{IKyaVc}%Tn(bOKfo0GeNWX<_h1b|Gj#fGn>8H%L8%lb
zq#>~H?SU{^`RZi;BmVLuM&z?UuV}cAMKb6p+#>Gq(UTXWq~v$RoE~Zl`-m7wpbIPQ
z$!YHup(-~@%yLRO9Y!LlN|}+65tgtBO`M6{MSZt**3qs*P<ZYOvt^|CfRjmxYY5}Q
znIx@<i^7t}x2*1f<uIsIykY7arR`nP#z4QqV)Pgm*8xd{q7lR3KtH9|_&Ha)*LG83
zk|Y-=jJU@f9O1~NNVxhV`mtH5nnm}lS7NsC)DAM<w-bBPXsb93=Nfr~3U;8u5{C3{
zB&xNBBIQbG7)1$53V4Ej2Q^idj1;}QV2{Ld!`ZwyfH)T#L+UCR%8Dag*a;w|&#|1h
zFwKAju#^4R*&|&R_a@a!zN>1cAmNvfA@zPu38q8>fEvv^DbxUyNJ6yu6@;aa+z{OO
z<mgY-E5mKD2M-2`<W`%tbV66bVHYo(CyhfHLzzr&IroFQqyY3l<l>#?H0cE62^Btp
z2JJZD{O7_}6DKt)o9DdR-=|bML{b{obrip2IUaI0m^h%^3Nj_Ew$yc}!{IF@fnZ08
zD_s1U%o$ZFUyzCj95V*tM(`cmuyc&qt!lcHT{^(4ta+tmoS6ZyMZfVeCGV0*q#gpk
zzxHnvkE{hB)YtSU0dR@{EQFck?6m0Yr2rWH8c?^k?jl3AYjr{OfIGr9NSLpAc}-B=
zTpTrM7<|zuOV?;C2)_Q|2VRW2f?~Q=DKVXxF7<mwNSz<Hes84FD(+d}2fOimf5{k%
zdBJUadH=1fIZY`glvIr|P+-yj0Mruy%U#&g*hRJ6nh9P!4uTt4-MPi`#A~&-a8#a1
z(WA>f-Z5|@&)#EKj@0d`fTZnBc$29od82Da&Mjb>8HqFs!s}q~%NQ10e&{TL=vwt2
z@d2o)B}VL+Bc_dht8nyt&Ib}M?Azga`<Dj|uW`Qro9E4xxeE3UVcTK00WB^i09y;w
zs%4NU1v{4oYD5@K7pcCNV+fWxi_N~4$qz48!l8fl^+^>R)fXaFG7O~~yBn1S<V$8u
zU$%TfbS?RU4;GUzG-4rNibj7|`J!Nde)&R#xr2P6A$;lbMF_#(mnmP;vby(4g~cjy
zO0o742c(+9F@C}o9Q3Dad+zE@<zE~Bmem*L_sbla?tPvHl))hlCWT3PRSBtm7w%pC
z<on~7%fIRUF)bT<pJfxgKVR2BJzp2%P=0ex(_483^!!00w&?leG4|Oa6(XfRl&4Z@
zCFRZvij<*p%PyqJOg)Y__3}M_`Emrv{m0j*XXB`}XjzE2mwEJ7Xk=C5<=|xKwb+-X
z*W!b6daZ?S4$BHmJ|$cF?PT){X3K)z4Y8*AmZIg$G8(Nd!n>KVC<#eSOwK}*0AJKF
zg}rD^{6rr%KH;sEKj~$N@I{TVaB61C#;q-cz4-9I^po5uiih?8m>o>fmeBqJEzAt*
z@&YrsU9=A9Zo>sesBEWL&=m^|R~Y^&R@Wl@m8HSGRnsMm>t^vG2+eZFc$T47GWaij
zJ@#+M(d3I=kBXRt=DpY5B5SOgN)mBfnmKb{Cx6DWKBm#HhdQ$6V*A~#cy}w_$=n82
z=g+aIwApjLn?3K=b}}BrcixTkA^wy8ymcNsW9dFjp`e2#$bNNee|3}o3OXF`?pn6z
zN})wsEz>bZdBXTzqe&eF(VyeBKgWx#UuJ_ZeN9o6%on((P6ypoVd<0L+NwE7xttd`
ziHYxx^&|SoQ-sVava(}Ke1nqOry?*tN~qOQ<zT2D6nV8HAUs5g@3+wVhs(T?D-uy2
zU98Y8D2&m^3R)+%1*wr8cxt@ZPtJgGxbi$FDY>Kik>Isz_wke1&ZFt5>o}jEA+;mt
zZ;cCY|Mly7on+%mUUarx_>Q_J#$oW*&0MvJQZX}B$92D(wcl4@<}Qy9&!K(m&Do)M
ze0kCGUL1Pohm9i{%46P$Bc7=M4tl0!{70^2_=6C^3yoc^sV`~txf@<@5JftYCAUc@
z2nXp{7x%{5bBxX9SBC(><45Xo?WYPx?%?O}eBFAUB5~PEb|UZ>VoZsz9OplOy}~e-
zwOZ4fR(zG_8hEtm@><wfX^qS}2}*qLoTxQF!feqKVJ7|+7RQdgmc$=2`T$pvR@o%w
z%Fl1HAHf$#l6!E{EP7<PeXEEtd~7T&kr2w4uICP-tNQp$kW@Mb0=8hU=MM&Lzw?gf
z9wVyc-;8f)k3)FXqyKUoa4H#YC^OP1zkF<}dy0BSt^nwTa)olSQ3WTNh7)wZX3c=R
zSJ3?TemKClbJkm7z^h<=MmEj-)=|}txI8&Mp;BzG4$tKnm$6n;YYWh_&#gCgJX5BR
zfH&4j(dAaVj&6ct>jst;hBdo5SQna9;9!bBVe_{DLUVDPD6zzdaC(ortC6Z4ac69%
zmikCYZJT+htOkm?t(Jm%%n9a+sw@${49#a8T{~ayHPY*9>cJbOLNrB)Lso@&O*({l
zzq*=!{l<K)<^<@V58;m=r)GrbYCCUJ;1(~ae)Z|tMx-i{){g4zgYIE>(Ky|1&!Oe6
zvQfuDf;-woR96(MrBFB-%`=PWb;i8*X1;w5uuNH%qH7nx+?KE(Wv4Df>kOHktV5(|
zvM4FkXUZ4jHI83&WDeZu0EC&S(O~9m`!1h>E-3LC=3Bcp(=CQQ_tsf;>S&@%oV8sk
zY@rdf64fH9K(L!KA>J6R8O~}3N(#<L`&KV8%sLNm;_ye^X!ylmDy>)r74(rK`ywp~
zcwm+~S@QNld@23vvv5k?brignXfB3JnN6lx(H4s-KZJ*SkZY<UdYo_4-(GTul?BLA
ze353|r<GMpX53duj{?l3bGv?nNf(92#1<>S$E~1BC7zx)PEhE+R*qqXIApK$m5g(9
zIY&sUU!C<ju;oUjsx>`Bj;p#f!(ifjWB~F&QQ7n`DFN(m6UtALeu?4s-gI2}svBL8
z@?N&1t7&3$-*l8Pq8J!I;%du-=$b+n;n(SS9O$!O8eS3%DAh1c9}mSDhTZM}Ge5cn
z24;d_AkjAsF+lmFuob;^c6J@vypDP>Rtqa6NhSy9mgA0`9mlOZJ39h_N)Z%Apj;@s
zyBkFx67h0AD(qxBuJEyX7Qt7{L~eoxu&<?_Gm&;>22m3R^7#I5jdSlsvjvBQdg@C#
zMeuRa?Crp`iz(;n7HQ+uGERM%l7*!>r~p*D>uNDS@0m;{xu+Q10)u6Sk)xhBVGl9B
z>A39Z79Lh=PCxPJ)RN5|A1qqXs$83$@gxkZn5D-f5SF+e#UHTRc!hl0QP9fY6K52(
zc5qfjQ4hzT{9pFoy|0ZUSs2dWe2Pi(9<SsGfxuU8IT*_h&%wY0a^mcIpZN{afF_nS
z%8X<nUVrzuE`6Qp8I6Q+E+?F`F-YCj)z#J2Rn^s1D*4|8D|CHc(#fc`^1B_3p4;VN
zCYCHYz#4RLjwcBi=$HP4|3oq5PwXs^@3bbqbCXHqC-i6_o?Q_SVTMUK>dGuBn0n<B
zlSiP}$lKiTE-<Y^dYb}48@ezN<uZUyw|l!c1d{C(oD%?;a4MQ$<SDkGL?f8EW`Zdj
zMH0uRaI~rk;i_Cw4Pc%T*b1xiSLrD=o5z=zX*fY~t>%}6m}>%+^)$fL5~HhJuU_;q
zN=YEM@*syWjR^XM#`i!{LTpEAD+Y|3(OauCuy5A5(|5|c0;bBGUG`v@Xb^j>6@p|y
zy;vDfiU3v@OVxU4jDk9Vz+6~TY?FC40cejgu&rOAHtB@`M7h1tqh_mZnM~>bgXZ?j
zlY{dG^||$oLbqP8>suHl<&XkC@o!#|XGESsWM|G+;l2>S>@?!W@o)F(N0+!X;s)es
z|M1vvAHRKr<J8tkF4h!^b<AV~9~c8=3NNUoE+-DFV72_9hsS!fhB?e?i>^4=Wo5(s
z%9{)Yb~w)rF;-ll{D+_KE`3q#-ji7(<|8a95vJJSz~I}uv$UXSD`FTM95Q@cug_z!
zEbQ%ep>4eT*~5gJu1_n)Io*quYsRIx@^WP0iue?Et$kiLwu@mSf@Rh$=MO)vOCRdY
zNu1h^9uC9DqS7v9mZ6knmq9vNrCdvx{pYINZRo~966z}%JZVM#veqW23JOUV3R3Sn
zgu@d4#fzf>2xiVuR&zP`F)P{EH6t(C>*Kcv{(0x@@Sua=`TncJ<HJsy8dF6T->*|)
z=UOJ2+1_sJ+4_3!tAnHc<Ja{}eiJO?jBe9cr||Xp`QfXdy_OKfGaIg#+)#F0fl(Ff
zB1EZAd4Lkot2#=Wqt{F3K8ey%z?ql?Lw6n+xz9-QP(`~hk5@Fh{6pdeD;Z6~Lex-X
zFAcNW*E4XdKX_nzr#p5G(QL$J_?+_zzIj84<1>J|P(OjS)#X~ArE?htL~lE*fcQ{A
z!HVYluqhADPCPw`P-LX`zO!pjw-35{m`-X`nB-<J^DWv^_$9-lE%rxTnjEjswJy+m
z(M5r-nH7Og=2?!tqSXMAd9(AUT#TGRoNF<l_o9mdT{DXTpUkru-q+%qNK=;%$*+rW
zsGH$t>eMU9$d<opqw5pq-eLN&>-WRKI817tOi-?WIi^gQfcNgM9Nj-YJbr~-;{Ns&
zz7@&`XUtYvFnuP=#WWhAE+Ul>&X7ly#5jj1raxUNyd4y%XV%)t0s6p70?LB|ieK-p
zqjaAWt^tyMmIaqGoocYfaAyL}NT5PO<X8KqUIp<C{6c~(34S4Air}yCX06FAp)MV<
zNO#O+!9{J;+E}E1?sxp7_WpUN_P9ZHyKJ4X1O1LqPEKKycR){lQGBiK`@uwHb@u&Q
zEgnyzA=u!xZ$|MdpQfa0uOj<@1Gbs+1%QeULZr>ffEm`W(*P!oE*DPADLz&Mkam6%
zgW^64!D{Pm371(igf5tKGzQ%rQ!zp}N@mGPhHJg4NM!1zx5JBgP#3{kp1S$wY?&eY
z{k(A!ehew0DS*4WqP(XcVgK^X0?y5fJ6)L(!?+i2EsutB;*BcA%&6&mMT5^PV0<Ca
z)Xqh&Ty&UXd9&1Eip9^}hN&tn%weF<9fzqjo4XHFi3Rj9b`gi{7%Pe+<-e3A-1KY9
z4C1tzrFi3rNh<b0iQ`35_&E>$yhY*2nYBG9&<u|)0b*tWoDkh0JKj||3X`boqww)3
z!Km*On`qrI@Tt3_c7{|*mmiArnvu%pul{4j1dpb}@MeNp(Phzq3JKowdOv@%fArJ-
z&*%Q(@zLRN8<u4VoM-6g()rd=em7?@8}uU0Sr&hf+}d@c&;vIQhQZAu7zQ^MhQUo4
zhJ3W&FChgcx84re7vICbTWDgK;;-UZoR!}gPs&?4ruf?)_i{+0P<lo0cS)2TwFC29
z!d&%u73Uh<7;}{py~Gn-_+qlaHyj27!!wMo{#=yxEH|+U!g^A<0G1A$KIrv)!pV<E
zKAG+m@VQemBTOQtXC*I!<u|1>)Z<z?1oc(K-R4&nXNBv0>tF%7A6)T@N8?EsKJ#*n
z1>%0N3=#uLd`dJgjwXQLpQ&YLv^-PGZz?jimDKHinL3ZN!gW4Y`2tLhDZDhElmcvC
zhOt206H{NL8wOXyU-PrI3F3aRq;xA9P5N_gJZ&KNgDB;&(1_BXcfZ56p;o`bofCO_
zE~Jrnzdf6Us}01WvgO%6I54$ymtEyhc|;K=6EptgfobB3=TpkIoqH7F+ck#~AGyX6
z!_0d9>3XL!k9aS7%z~oUc#nfGXR5r%af<S?F=p^wv1wf6>k6!BDpX;cjOL|^GRrgG
zk7~o<6;-8Sl&!wdF!F|S<}jw`BFl?+*6%Q@fuY<{7K*v`YzxQ?k?6F6I-W=dk-Nv5
zkvKwHBb)$Y$pV6*mIw%HaAyHw4&xROYI>ytqKLATL!%mDRk{tCqK5KkESanXw1leo
z{hf(w4C1D#oSH#WDv5U#UwKqnYlAN6WGL-8PNNCB`bDiOQAX_m17}B3IGU*3L#ek;
zJ#Th**PB?QN^C$5t-ji*;V)tlte1N?d%OHB&kD-^C~i<iyc$mlzo2L1|JvO6x3>%P
zT5ovI8y+YoE%>dq+3+^tcTlJ{@KgNw5&Z87{O=ijhMzxfc+hGSTEizT`~-g8z~AAU
z)>HT$dcZ#|0I-37LTl*10sn&r@ZDyMqYgiM|JjBA?Co*bt^RYv{_~Oj=d(=TCkET`
z)M_7r!0R{bPY)jLAH00|mSPw;p8i`VMe^QDg3|@fuTyjyCvi*#Ro@5RjjoAyIKeBR
z5a>L@LT#{C;jhyOtQ)?|@!nHedgFo{bCGSUtW=no3|ZwpFbcM85Ks!Irh2Ua4}Gp-
zLJLn!y(X(Qj(T1iQ`oquo#xfT=2f$Sv1L?eFBpBm+bIHR7m!Q6Ypl|De%>bRDT1J9
zPKxRW23M3z;FoDS!TT}f8gCpXc=Huttamp7#s$%=ov)%gAZoH}_#YfFY;r-hYGTE?
z^Eyl@<2)-CG%D9!hQ!s+i~<z!^}ltZuFOb2pdZHHhk`jny<CQJIZL%%=-hCQ>4<~v
z$Pie}LRg9JG6`ZPKj$*(u5odn+M=@Rod~bTC^)jk#LO|gh38>i1JRdzDuag_lI%N}
zn5ZMwP!(V7ZRIUKRSOONwg`_RG?u`I9jrK7joF}~$!yqf+03SKaWWfxeP>?NL%4ZO
zOv_smb-QuJtqct<Wiej<R@S(<RmJ%3J6R7=peV-aGO3Hjb}LagwJ!j%-?FKzadA@D
zyxCurx_SsVb;Y!nMcqkIk-E^ZZfTs$r>@4uNnQB*&eYXIxT!k{mP*~AvS<i0vLUz+
z;SkJ_4#6_SLoiD|1WOhWL1hUMWJE-8A2K4CAtZujNQq#Um<X0ECxXg?BFIRJ;66k}
zFhf=Z%Mcd9ENKzs#l@X9iGF2OqMy+v`uCws^k--j{bi^V{aN}%f5{3(zp_Tr&!`mr
z`_L)+Gn9({GPH{REVZJ)WWAzaS+VG6Gz*i+UhQiU*I3iEuGe)~_N)E#4yXN_Q`hL%
z_*eHeLQ!m>O`~P%KK&WWPk$NOPk)yBQ^W<{LTqq3jC)2nr4f~NQNj|~%et*VDA4(0
z*4FcO_U7UO13eM66=Z_AE;6<{_P8`~nSW$?iJ^y<@xp3s`GIS2J+@pSM|3KOXPN<<
zlCd#lv*N~{Cl1)W@v8CY=A%OSi7J=_ySJp7E~N8$UTkLv7#E;QPrGt>;@jYYHg`aB
zcFeMcq=5mw2AqMV)kb2;B>y@)8HxY>=M%h~{`T7~IrW1iN~=dS{~1yWiYyZ}#YGYe
z7WOa6VYgte1xL*e%&sgG^vy0$B3Ss%wA())27^JEboxPIiV6ex1MjxsC<!}G!fq~q
zh2&E931iMS^OIXlMS}gR7g-fp?~lhxd^0mLXfZs!RqxiqmBOLzsWdVZx#LVulA|z9
z%NAh%Io;qCZc(NvdSL)cgImv0Sg0~r=d;58v6)(Y;Gn33m^$(HJVk-;%&%@tZhsd(
zD`2suvQjRY&i@wqxRDo-8(KpC0R`no1xdM)6_p!?thDAdKl_OEy|Y>B-jetl<*{Zk
z*D`o&rCh(_KC^3bBgsLx1ZB>dnX5M)4sU(cs8+XB*xUl>j>>?_&2Psfj&TOj05f86
zXVS7NgRlG{iRL(&mq?X)r2P!;`omDln3I!cYx9^l4<CB;4p&4Uaj_G6Y8wh0fntJ1
z+XYM$l?T~KIle=QOdN^gXh3)7Xik|1w*Ovjz0s=g?yh6;FFYhc0#Y)gybx7m=c%eO
zkf$q+ZJ(g`eu(0i1~1Z|)*m@Ke0?Jizy;1g`s+s2rMTsPOSP*ds8_Bj>eJRe$=cU9
z<OxtZ-U`=QDu2D~__B>Lmh#Oxer<;^cMeaE&;1`(zWO3R6!5vq|9$c_eD?fN_er<+
z>~f8;TuYPg+CiLzYj1){|CNl_L+?YgUwMpm_<8c^5&a84*?%8zZmxgT+Jt}CAGKPK
zHojWlXgzxR^eb=uOAufRe3N+Io#A9B_b2(Gj0Ylt0Y+_(gM`9WY55yo;88s3bx28l
z55D_O$sYLPA_<aP%$a~ernu{Xkip<8gwLDsF@AFpk8cy|TRW&@X2M6_Y1m6*?{z#H
zh4ASZ|7u>*U;lkI45C4^8xQNwzE8Z@K{)VUPeW*Z5lnmD&J}#nyiy<hcQi_(pm_<Z
zDUVNcJCCOc?7IUDseKhEL&_gKy}<mw7;B5C3Z}PC<q`8H{w*2|0_=Y}y%>N{If}aB
z2!#5_rnk{tZ+eH9K$$M!r@EXTdDlToxf^>?D)OWB5Cmm$R7oNl-p}zA4z!UMUrYi_
z2T3_&y#8b|-dbB5^3csVxmp95YcSecQzrp%ND6%L0Mf$F=HhCxl952@sdoFXDU?}#
zU<Ebz;lN2QVx-9=7>T&%0H3CWmzS&8Q7TgUzl_5aE8O-ld-E+-w1snV)CC!falz1~
zwYm0WwHIC`VF>3ar>|x##_<5_;$HWoZlBX8r(V$QP7~z1>6jAd0P&~e0VxI3!5AZq
zDS0$yeg4<#^M?2Q-wiLGPSU7HjWjS?ZN7#9Rp`65)mC$bz93e~6k5YHoGB*Oq&z-+
zV5by94Mh78PSWN|QHXNI>=!Z@`N1F<jzxN*G2ks3)0&b<maQl%Cb%LJas&R{9DbpR
ztzRN#Fo=8*t}2BL1FPlD;dn4j{pUc4aT-oqJDt7S^X4V_QDh4+9kWeVWgD3l>Gi<@
z=z9^ClZ(A4%}tR59Eh33z%PSanU5%BEQi6(8o)L38l{tP412QM=Yn)_E&_3=B3%Gd
z=ms{?VBlSZUKpXG<2}dHbwCpX0xk*?nVX6;yG89<Km`~epmZ$&uxWJTRY!R+9>*!?
z8;5Z&qZ<xfzv^~jLgqke7;e#aO<}*n*UhWZl=kbz^olm`TH24VefX={y^6k%db>|I
zo;`VLkQD27j)4X>LEFX{eyQ<NG>|DIaZv{z#-n&hN-%D*sEbcKl%zrn9<~Kc5;o1P
zXjqGTy}G%`!C>4E{Fb?dkDAY(Z$935^!V|!=dGvDo`kFGoA$bMr%kgH5aQ{^lh*T%
zXV0EKezb|59=SR_HaoSNkDfh#y0NkO_-X6u^QX_D(_>esr)H<e&DO@o#`CA^&(}Af
zf=&RPp1C^NZ27GDWB@)u!N2{{lc%l68yl_7r<;$%)mF>Zt7VY_MC|%{>)F%i>yMso
zfZhSUoTO-3v{>C}K7F$O^vN^m@dze|gFV4+6<4ko;+Lb`+F>>?Yw<2H;rO~^?H;$D
zJ%&|y_Bg+G)Mvw<<OZO+`RK{!#-q*W>+1!59@#4gBWyl;y8igdvnNj;KgusR^?8!%
z^ZePy<7b=CA3b^UbbSN*7*shY@rBdZQ%)(h60qzVP93YKQ@%9s=KpP&WVoT}45tG$
zFK-mYHYNnXbD?lIj1dY5I^J`=UhQV1p;lD=#wqdbMq~Ny^Rh3WmVNoS?90uvFHMAa
z(qs3e3m8dE`&SD(ub24u5shFs_*HzjQS#lo!sBK15sz1!^cS~Tw3^zHFz!VkYcR)O
z*(q=zmXpwSzWV!T+4f%&-OOS1Rkr^gfsEO(?Z4JjkW_!$e}6&yZ~e(C7;@|03lI;{
z#QQ%%H@@(8e!+j57xY=PrOm)SCx~YNV*UBwwxE=KXNT`x1|WZ2+3L$1F}}P_!N<M+
zS$v&}Uk~De{Pkok9&plMZ-N`68T^GU)h*G4ea<XNb`*30SwPhgH8cguL+2<M8Xp1A
zEnS5Q%c#InWfY0U@-@N47f)P$tmd@C<IpgaTVlemg>(0qK*gSa5rWOeHlD&1p~_rB
zr}UL}%Bt#r<S+#Fe;E7{Q(Y85OwQ>6VDJV%_rCY!uP>m*maY;PrGD=g&;YZMCXvGV
z8Bir7Nsis!m)wBTKMTsJ4<#J1E__Lr0+8Q(63~_c{!jvxaOA~Z0AgT-b};}%s$%lj
z+;@sna`bEooS-|I1_KLNt){5+G9FF#@vFM1-IpKSpj3hnrjxK|L7R?v&3wUY<}v{?
zs;LAbS_j>3n5JU2YBmU8_5P9SvqisAu?>v&60w!X`$vbbkJ~T(AKM3=le5ErwkcpN
zJK4S6Jjz@Xjj`M(Bm_U0HFQ?SBzU$AW)(<z#=W}u<|WCdb6(^B0aiA@Bnhy!bqMm~
zMz*0p%tpLF-;EOw{wE<A{Ed9FKRMv7)q$Ob%t?ETzDDlJeXUtaFj}sK-0ZTE6O6^2
zuLi+YYOv#w@b)k9<AtOP2?d5Nni+eSKR<-CB{_$2dSMnhQLga>P$_6p%ZdS9`}{n&
z@)Ty!vNGAL!*_4mjZ6VHoVG_t$l%tS3P*-G5{|*1N;@&_Eo^f-fWzj5&b$|a$bv6y
zia3#EJ)-kmGv$9k-N#yM<|p~>^B-IIG2;8i4$bZgRaaA{P@r`7f(a?Bz<Zcp5>Lk-
z>Y@IdxHlbyD&@fYizEV`C3Bm)c+?-Qs5E9epmOq&wk~__0Krx9w*rIx|LctbfD&}W
zhv)!Y?L$+_+zn6%-urNf)(w18d!NAA{QEa>+K1oN8H;YnL&;(B*vE9KxcxXy$Bz=B
zB-C9<J3t+v8xN*K)V^$V3tB^qX30##5e0>&DfB_y$(2d|tyMZ9a1e1o@CPw*Mk=O9
zE$MSHMSHBvD8c*ZKDu$0Y6wasoTQUbpe*Iy7BAU^zE(2q7)?j?CxiTHc$kWUW5XLF
zOq;a{Aim?xITJJc^EM3=m8?0{<(e5A^7K(@L0K57mICrLSzwsN7d?IH?)|18CUV{y
zd)%am`vr`J<-AFj0G97B<hg3M$Bk~Z=OuP^QKU^m(1S;~xr+!JkA>fS6^p)k!x)wV
z@5)o}78J`~j2`%Y@G(SL0GyoMdcj3JrR>$i%ygv0+edkNfrh(?D;6GC!0}t9zA(<;
z!j$N5W>nSIxhB|te>xFDNB~eLzIz=zI$wSlS|&(ATBc4sj=JLfI!Ss+j&Gz|6XW0C
zFiJv!AZI!ge3e8~lRyJSCnjjE9q@Gl$^gl_PW-fF5&2P?nAgP|ltAJNs$Co2bi*;1
z+F&0uZmpEP-ms*&Vu38QsA3*i&QjCNvWZg+Pt0ftNA$>I$R?-*8kzTj;9B0;prm0N
zYg=1@iDUDC1?T2rQ~P4r1fOk?aT25F2n&*ExI+I(VStYy5RmQrm;z@i0zhEIAHsyo
zDo_HO@CFNWhUu0*4F(Y$AmZ@Y=+VWIw{f!}bMc5H0tREzg6PWTJ5t(U=Apg136F*K
zqf3UzD~>=Ov7oY`2+@Vfjcg!5d4{tYF&r(}5!NP3ISW#RjM!>;n+zvE!FK{7OL*%k
z7UCvN1^J6>vQF3<+u5*NMObf0(CBI4)JMi)R5J&xSDDR5!8`<wHhbC8tSQy>*!3KH
zxVS@2&|H>tJU_6EhXA2inN0mLiY3g{L@h{-@c0-bFG@=OmT?lw6AKS%d4!`B!@qe|
z)E?0rN_r4j2lGO{y(IUAsq`rM!zraW$r3s~(ryxg=pS7fy9hH%>D?*~pLd|oV1?9e
z`s(Aq)-li5db1_}Zt>q6;_upeo&MgGf4BH=%DVN?1TR)9@`g3a6()xmVsTUI)iOTV
zzz;Tz4>s`wF^bHGWt^k10IUZgmGB1fW&(|X%)%EFdRWRvN!r2m3Qy9o+Yd%jikco$
z=mOfuBO!~gm^k_*I+()YGmL%%D*C4cAVoJgt*4Y@u)ZMmaE(xtp~5c6rIxqm^yuyD
z!(+X+JJH_G#ojr}I+Ua7*3R0+-p*RIr(5&}d*YV~wtXLrx_zwTL^>(kf*?%OY3il0
z1JY1{fYzd0Z;wc1tBf%B9!f_7(;!C{cXz6{O?0G{HE7_SDN*5EG$JKlPm~dLs?I33
z$7rvJlORZv;5OxRNZ+b8!NwTS2-aO29Tfm3?4c5gb)-Q%0Np0JrBgq;n#R+VK{hqJ
z!2eH2l#uMacutrJEeXqpMD;RI`+F3RehZV>#u^r)XD+6-hFbA{rRAeoOjCwAmelAl
z41<wSFT_2~1*%IzL7*9l1D#O+Fp957^qcUVt)lM?^qLXNGCYVI;uHsI8e^Rzwv>p9
zgh6I)m=31Ffh`;SG4tCnM68-j{$cO|K&HeJtYN`&r5L5C7J>r2F@+r3x)c!d=mt()
z)Cv$?Sido00$;SA+_auFy`vzx0yRUeWZI9XgC5F*UI;jd<+6?jypF^`1T?Yw!FU{w
zNYg<I;e?}v{f@e5Jd;W@wRJ3)FGHMItAHDB6r!*Qk_hG~R%KGMC<ykJt*z;Zs@Id$
zh5qQSN?9LP>y%p~JR`+&Dr=C)E2LgO490qD$1z<#0z%pt)@ySikTJLr=4X<l;q8RO
zJuY>-vj)FnMk&J%rIAHU4>LlHPIfW?hXN7wyQ7tJjj=*RhKmqvoe0elx40?YZBk=Z
z8a1LLwP42<J2cI$1~CZ*HVs0sw`fPCmboVG7KCx&ky|vCLMPJ}nT1*ph4RfbbZlA0
zP(pPX!qTF_M+OeF1DLdz(*affMO0}-%Hl{RjBbMNWTsF+c618{t{WIpGD|0uDhzM&
z`~^TXJ{m!|<GgN|(8ULlL+pRK*BHrwN~2jD^8)zv0~R%-;4VY3NEX~;-jO@Qxuk?g
zAS8GXFde02{|Y?-OgcEm10iy58lv!;ui=ojg8&3$YxeTsx4~ZK6yJYw$f-AlnK}r7
zc_+C8y+hGysC!<70}wRnbU1`P{F@N62n8@C^Jg5VX@o7s(Ty=qm|rv1)+<|>6X5T7
zdd14lAi5+OsqXl^P3FQK9+oBY?l;3=1jp?+VnUN-Z)eiq6C+Xig*EsaKWpo}r3WIi
zeVT}`*Px#Pptr|qO@&I`X-H*SD)35wgPuLzfiscvCDJEhiFdq*y<ie-=U+s~YqlEQ
zc1&v6to~n8(R$~Tx88s8bEkcd7YzcQ<r-&a4s3yx)k^lnKg^(K&Q6jo7)-v&AfB6T
zYtR0?^@p23_N+Q{*SeBpop0>snGs_8FS>@pzW?4XpV59zHGPO*#Fw>By{@OU;F=Y3
zXJfq~?+ny^<lM89_iO?J&ar5AnWoM;-i>Y7Xv^<qj&vzISsw5m3E!TWa#Z?<s02Nz
zX8Cl~-hS=8j#5b4wc-dIpehC@rIKIc_&Q8h<IB~_wMaV5Pg1ahX4~`5e&_JO<X)|h
zyvGBd*9|W;NNwa(@fQ%}6LmAce|iXqE6BrMN^P{xVi<GC6hA~^5+vRJZ6jmqSx!ez
zP<)>^=b6ipEsw7Pvo|<*zEk%+dB@0?Ob^w~3el6%5YC6DxI0C%2hO`BA`8V@iz`Jp
zx_bnCn}-xv`T2`C@3Uwj-~1e(xBb(%FOGm24v#zSvse2E?HWU@uUNyX^}unqwhSyx
zj0AXgjBv{f!YlhJDa>c#<+dV{rUVmfKgB%hsWvl!gTnyM;u!tnig3Ccn)S-)44m)X
zBzN^Z45h{!x?OZLr?6rPMl3^51rLUk^<V>~YN~GDWfBj)e$<mb2^Z)*j;M`aCBe9F
zudV9|mi+X*b9Qq4TGn&pwRxZi5fSj}b0iKw8&DAx94^Ro_tS7v<3_wFpA9Zm@q(G9
zIDp+MlqOOxB{{*+pc=Vg*SrsSXxwv{aou}Ja*u>@FrCChdQS$e$+MHE3^(Uc_luAl
zvY3OO-M&M!lp|8mkQgmsFSxXBZ@e0++rc0SgB}_|(S-7^hrN13jSLF642U-x<T2Dj
z9pjU5*)6v-&09INYpnK<(8UlAZtKz}yfz?<dnu+!gTnMjV9N7l=?HI}LTR@J`0{QH
z<|m+V4Q5FiGbZ-+D?{a*F8iv}aJ7i9rMoF3W87~cUxo>sMx>B0gICEzMY79+ei!DY
z+YG$qytzd+{4xLoqsKQ@V~(L^PL4*DIiKI7AeC7ry~k#~xJyS+fjlxcfq0Bk%>TrW
zhHNMBAy{W{$gA0JZDk2V3P_$X-UEX$UwEympyXUD4al)zUk3Lx!so^9k0`UUyoU7>
zDxpse^RkkeLR{N%WIgJrvEyWVB^{8qWsU3_Qe<P_DH5?;{yK^}c0r&`$cO3yB|#_C
zw=L9iy-AP8JeGd!ibClk+_T;}J`M^*FD(ht#vcsGnP3P};Yu*3C<8jCqv+QuOC;QC
zgp|>fya!UisZr#x*kzqwC80-L#t}HqOi+yYIH-|k-k|^1?{<LJ>Y)GOO4n>25OSv5
z54#_H$)!HIlhT^t0txr<_^@*bD)c|w{=rH6)vI&=@Ri^BVZY;_oMQC6e|YY<kN003
zwO?A?BZ7`_>JjEs!^4NG+LufPL=C8vkv!;YIzH1_7axz?cslW?Bf2cd=r~+(QG-U(
zOw4={5SYt=E0WKk)ciE^L<&vr_{09WfA-`4(c89v)_&DKYabu9&r9ed$Rb36JW5o_
z;`Y0P_Gt$OuaVy~N`2gR6kZj741Glaw4ro922yr1RAav>bP{rH5&pqMp}DxUuT?B6
zpr4CmQ{UEA5(|AM!en5vf;1y)XHm<Pm<tjvASFF0qM9TND{AR=Fs{+js3;+1E`2D`
zmZ>7~FFQW_bpPzU{SpjT|K-WMpWzd=qg)aSjZ%uwYTdN#YbPO9JyUgZzB`>yv}E_F
zS(IyjCgD~pTFFP{l%9p%%sNXu>qgR^PHVB_RARqw)~9Y!S-|DYWn%~M1ngwRK+UsU
zOgwYa9>~SDFTi^8G%<8{wmPgU8GM&AA)S4Ksv5e5%8w5BT8bz;@)p*&S4we|0Un8I
zB@kv;5#EL=Yo_VF$3Rmb_|?xS^M<J)?o#;z;xelmShvTQ<`pMj8k^P*MPl`)qh7$}
zvBu<&$lROY1t49-h|9fc8d`TQ#$Hpf)zK<m2x|pcALiFVLf81N4cm)SDuSg0x_A;`
zO`2PnCn-JjdTOY<AZR<tq}2cxYt`At!zSWgxZoBx1q?*CTr2*d-YnAQGRO%6qr3}Y
z8ff5k<Kd8_Bqni|jv9G`6j?epyihnZ2Dhfwg3BY{F2f{{9=#yx8D>ZR6sG71I7(57
ze)?zo?4$v$j`xp0!wkAfoEq_)<fA(^LM=FE8JIV%H(lyixin<ADsa8n>jnE}WbI@{
z+8Z5W!rRB#-VzJf!g)8dh65$m7|<N-eZW_E>K^ZXKn`IQwKw-RwOQC@6a4u%>IRdl
z>utc=S#oLCmfg6lZ8Yw@Rb9oC%XjYvh0t0E|8L&L1*)}qEyBO!UJ$#h`i|as>E+Kt
z>#Q!_&H1(67CM7g8Y|(kSrv~36{p#GDCGygEpCF!dz|z0_E|?fnexw1+XsiQ;Nx@P
z6rtE-WS@M^CLgUeQH!b7Iri{oyXb2QMZ7Kinktn3W~y(M$p7e%L;mas*I#5k9({rJ
zaD2UqUtcQ&BEMkcTT6)DZ58ZW328!`u0w}UEr$c1GI^J4YTupd8_UXm)dyd|qdJ#o
z*Q8Ig$8EI7<@1Ug1DU0dg>s}{)xpujqpPABt?eSWfDI(p5AX6o&{kjF<8ja{$7Md)
z5LZ9m>ktD7lUormW`1{9AF$gtJBvBbRF2AxoHJ$bfhWBri(^sfb)Fj!>BOK5BD&_J
zuxc5LBr|Vu!VcjT$vs-<cxUnTs5_YU!r}(%umq)G0%Dk-lWA%g^;0DBo|8ZgS!IC3
z+L>OLu#G7?i-I<6H)EK53{dy**`@1aXfWmLble;(;YDv6`9&kln;$S{9tXT0#1|^U
zOojIApoX${j~u$ikrfy$`w<p&pf0{auTCai%pyin4iu*dsvu};L;bAaqkdA5+Hd+u
zaA6J^dLSi&0Ilby=3@b|=D4pq_%xmaqv3P>L`L1Cs*3vFCjRG9=J}KLJO>R@)@|RJ
z#t1d!XbePN4MIceK_coSVo)0R$)zrVO@FfmAfI!<2VHI<9u^ucyZq|Q{F1wAYN2W_
zo*^|fDdd|ILuuc<*)$j0c!JcBw#OWgdaUIVPon6-t9rkqRzm<+uaAsn!XQBkR}Tl5
zt0<rXO4oDS%uP!?fa+dyu)#xnZsvn1i;Afkp(A6x^~+ums@bF!|I_w6KcBW=wqKps
zI`keC|9d_K6$|u_a|#{C|MDkwd{-#56*{-r!?h|UP~NZDZ{K%#Xg%DX_~P4d_2A!r
zt5Mtj0(i9cunDik?}JB65`&Wf%s|DtxUUZ1wO`I!;<|yzJrh+0UYhIs?YHl~98uMK
zGMmrdNmOQ0RMOJlKRP09Jx|p-j9qPmZP4?os;xKa?N!y*kI;Uo+**<4U8c`W4%(Qv
z!{)plzzkX`Ujcf$nfr<AGrEbRC;Q5HRyTQ;*+0Em-5k&90Y>+7u70BCI@qHy-paRA
zD>IR^e@P?9SN3AenG}vQApM==ZF%|MP|2M?2V8mnx5t~0*Eciyzt>xvf9L=H%jSPu
ze}1?8Z-1wLGbG%L{quG~`Zw{FD+OFG)s6_>u9j}c2nv^3ak7s9)ze=zKnxF<>t(hG
z&AQ-ci~(I^L?f_lB8*MGxb@WA&l?F?<i-g(d|AWiXP;!!!us*kTZGLM$M}D~8bX?L
zg6E0CgJDKsHD9RtA)FHqk{n{RS1{riY~oAx(m=yUi%L375gx0_m>K=V`GS((a>+*t
z*jYZ8K&GIP*&CH|dhCE{Mb{)pZ^in+Z=#j5SdnCxvR;EC6vrlW>;cC4N}uFIEBS|T
zl*n}vUOh8GPhc9el*0zY`qU~wt4UYG{RuCQu3<Gf0$HLZANMisWY!^!hfc1hoQEYc
z-gv?<iRvq#_X3E}>_QQPSymxyR^uk#Y0|qz?r*%uBG#7Bn%+JKEJ|CLtZlaK<s`-l
zXnU_P3li8wY*jb-ZeYa>9W1~G?`JW$Z$jBWCMuy)0<bt>{HB>p%;0hXTHe2pan!RB
zGkv-;;{tj6baZJZ;-`y=f_8Rbe82^hIl%9j!$M!1{K?ZTkaWzZTtGj?`-?#^`cU{J
zh+P-OE8scAs(QGi<H*&5nXhkV#g3V;TMT{`uafY0G*q_TuVA!SQ=~SOC~R#R`2w@K
z0~Z41&M-J9sBDVE((ZQBMqT~Pm8^5Y;)Pbv{6dP?Oj(a`E@(2}B<Ez$H$i8>j3%zk
ziq2LQhJ#iWMwBk}8O<o8pu<-#&K1`~E09%w7t3Z8w!uq%SI>dtQ~8|L@tmvdl~?wg
zLGarxzwpX`7k&%|`A_9N%XhoQ->sH>_ps!<wcYK~*KP+q54`>&e{bgR?ee~`0I(lM
zDZVhXfVjUXg$7)S*+luD8RHJHCI7Q+nDnSm7sk`BR1%x0@k-Y_Jp|M5*6r+InYW9=
zTPI#tid$2TXL*;8h|XV_)yqowEdXxMJ12A*FD8CPLa9q@v!aj8Ey3?)BMkBkt}v;f
zkp&OSli-G{)f>3OYW2E(kuARS_)GR>T39#BFXgR^Tsb~p)MjsTYi3E)dIl?Xt2_4#
z(5f7y%OahHsKarB8!H44HE$~_1UL=v<~Zan7?Dz+VX_0X500l`@+G$?;~gWDHP;9v
zB7hR?$76XwHH-=D@y3J(*)3_~Oa+C+jkwqfW(-2m`z409K=cfHj`J2U>P4J=a;LPA
zYa#d3oZpU$XZ*|b3d2^6DHqD0y^>)Hy0|+?m0_PrQBKY?o0Q~4<HRm`5(+V5IZw56
zvDYZmgmOZF?qX#2%{|uYdEmL%KLW#jf=}e<<Ve}CkRuO8gSj!9j=(OHF^w6lUSR5%
z9To|TV3?DgPR1!&ih|V4mHX%8DNr;;*1A=c=C;S@;O*I2`?%wucTUdsU$=d-ZVrz2
z&(HHlU#9EJlLJg*e)hA!|MKO@@p<iC<LCOyw=@wB%J`<)Tw_x$T|1ya0(0}G&Hgu4
zn!kuDv}Y_UgEyMz!CyLkWg@8LTd`F6u#;>njy$pMCW@(Tx66`NpTXxPPyKa#4yCQ5
zjJD?6eoi_^ETjXZ>2W_tZU{e1J^?^2O4BJ=;PieRLm0pN&U@5s&3c&ViWTE3f6UiK
zPy3PAWFVam8Tu&{si35fkG(1%qagc^Rg}BqxaMv^TCjvNNjOC(&Ew7aj|a__qHv%J
z(NXiyaK?l1?Ym%8K5|iHpnK#c4qW;+`R;>PUxV-RY*vKM7r<t<IyNgJ3eE=s@^b&+
z!1wRCRy^=&68EOvi9BW+R<zp@SgoP~LsG`&476pSCl4Y5I<0uhfd(y~anf5zd<s{2
zVzZ}k7qqumthbq8meMAD=0v(GvN>*ixO_1|-sE6B9^9UcI{ok{yqw(k7-x9jcgI0q
z3OjP)#mz!!cr*8s?7@z~3<BN*Y1+=e&ThO_2i9+;1|*EJ>Q2^1aZ?Kw*`B<kmLb)P
zCl#fjHTAsM)i#lE%k+-pWLQ((X7(E8nwuagso8N^lM6W9N*nZV$MK{JQi7`tp981e
z#>XV{_tmIZgFrh<4JcoI6tv3#uhk3JhY`z}$DZ~b%*INzJsiROx_U6>S{J)Sd)DPA
zSzEF|fyuR2KBe4P4#p39J@&V#4ZI^&K7o#O5|C?QKHLhr7znyW1Ez%|_t>6{QI|mI
zQv=ecLOM1C0K!ogmns>-!nJr9Tk_eo$_-gGW)&e<fXWU-*xD*s>BR;sn|#rT_R<xt
zf(UIClu?I-@###zdUa;ajOOIrqM<8nF?Ux@68XLdv)@$S7V`jg@0RH(r93c{P4P|K
z3l&dyk|-EmO+k+T7=_nPezu!vZGhppH&5|^!u>dNRFSR19@}2ZEC#bssKVw&nC%b*
z!Qm6DQLVncjFC$TQ#yQwHGexkiq#UFhl5LTU=@--(?+j4L#nXq){r#0D&vZ0oSD%x
z>z=`377~>d#v<Hy2!x<Kz6=R;tMZkvJa##8$`P>EUJ#Fxyql}GCyG%}7yF^?&`-k4
zFbUDGv@1TXt(V@>WqugmtiS;rDIBNKARfUujz+!daCqyFldv0M98j%3Lvczcl#P-{
z&->QZN5zU>WeBCO=4ZgoIJ3RCqi9moNu?#g=+4mY0O4J2@xDLbq3=KM(D$cemE710
z@&x@U!5sQYR&Nc`mpNU<Fn_~jIOTVsh5d~-wHcP2?Vd=($xIF{?or5*)P<gNMyYBo
zy_M2pdw`Df?jqYzKvq;+4rCMbTcoo-1(%Qe5hmAO`T&*v76OZtWYUjK8qV+%6v^YI
zlLz-!klZelEOvxNG7T|`m9juvQ2`j;DN_!ML_HktD>JH7ZH~b$hYvLKS#EyP=~(FX
z4*oL6lTwzv<QriMpdBai1z7mEGUHAX&`_8XCQy*|i=*jpzunReQ;<X{=64a1$5FbE
zS$%J6bu0Wsf80<Wv9EXMNl_pIKxk3awZraQ9wpmB-NhThHd0P_rkg-g^Vz>^kpe2;
zf<faW3_OjJT$!BGMZ+sCYtahQfB#vQaQDtj`1PHa@Hn2>t2nwfR`D3ii3ZA_*~r~=
zdwZVDGilsFc1~WNV32$_+~<6uT+^81-9rrX=c-FGWt8lOWt4GAu|Pbgn~l0ae=HCj
zB=K}4GExaLSSRNL$6)!$CY3xxCcF@!%7GMjSBpkV(x!};dzJA!ysUs>=@7GkyTKMH
zNW%9zNyuSX>?0fRm3xN=W7~6EFq|_$ekhY;eX?_a7Un0(@xB9AhQtbDUP=WcQDahY
z`99{jB_^lebcIRdbpZtyL}@l_GVXZQ9ZPBnWabF~U;t86)H))5YSPHrtfn)?Y?~>V
z)LaE!{!kMZu5WWbSp`{4KC`GLT{47NG1JkgM?LvOG}bPDH6092qmM^#cdP)cOY!m4
zR`#u~r(6})(R-}FnP#?2MF@k^eurFVKr;?MNGjai>f|{K)9GMBv0Elox>&&TZaN7C
z?EO*iWjOgMn)DAa5mrrbg9h+G4l316#~|{`>j8)<d>t5#g3&~_N3kb>P#WgON|Blx
zT%<2`|4twHwI9PNUQNLyu+}Qxg)Z|@<oBDQ5ZDIQ4c0KX$pH)#rXOyrZI+{>j?Jdt
zU@$4!AxAH>9R|HY5i0-ptQrzO#Gtkzb;9KTp4rg*##c!RUI#Jmxe`F#T!S)jLWY#U
zyUe#J$BzpfYs>E?rlaICQSm5?YiZ*GY8zp%v&im@kj(__=4WE)4N{}cXb&bj=Rdxj
zg$6x$j-a7E{a^UwkG-1ZP~}}q&K6x3Y2AhBi<bvEptyN^-Fbno%#{zVQq(*E((%6d
z_%BSPn;eyCe2DFc0;U+pBnGW~%mEiw3=s*JI{Koc&yanB{K=5?)|J}t89`yaaVvUx
zVg~Bxqma#S9Wlleb(i?r%sr~JWfVL$TqrW73ReimRd5SeEK5H8Y(-~V7jO*Dm+@Y7
zN+Hxm;!+k5$;l(cSZppOJf>&}jF~rlY<TF;0m){x%|>g_3t_(}`=cv3S%vpCSeL%n
zprwA>$hf}NJP<Cdfd`nD4&RecLN4ycWw>C=qsi>C&8&+6zCJkc9yK?;i|LhD`@U}0
zg!>^L#u(TPCy(AUA|o(eujCkhcenN6L2i@4r{4EotyZ#`>8T?2q?Z*hqDg&s*Z2#%
z{}3n9Z<s$|;B9$7qE-Y{t5?51f?h6k0kQ-JEsQ}v+aUl3E%Y5_If3@v-*3<63uiyj
zP}HXxWu4bc&;p-ty@^KC^khU=ZP_PbyQ+4`_cGfP%skg@wPlJe*E<kNhC4pQ;6<>o
z-tt<@f!Y!?laR%Gt#hpKAV0Gw$m`O6cY5Z(IK&48C#U}T!T!<ynSa*qygfUnoJh4i
zn1<NJzfR)``Yh|-4?1RSuGJT+s;jrxcSW)px`V0V7HSV<nqakeqp^$$lUBtfm^Lgk
z<07Oo7H%QkPVqq=nxjCGy~06iR=WpYDE}<E1};aa+%zjfi{f1kymH~S^P`!)DlyTn
z(u><aBpFR9W9mzploii=msu@1E$6&6zY4`X$El^tfKcQLL!|kL9LuxvSDCY$$C4>`
zcT89hykE-SER>ANBwlz>gBTo8f$4wdBZxygayihn%1z@GDMUu?9eUm+;;!l4jS(%P
zo2yvluF<?^#)!=G(!nnF#1}XgV8d}IGm~o)@<^{#{+$9k7RU9aFnM3*n8~EGIEU{f
zX!8P}2W{Vs;}P#DCKEr`QF9G%#;)6c8)IkBq1B{jqTyXG2C^Hc{ROc}qtT*WVbsM3
zE$+#0OV8qX;_mxvGAZtS6ki2NH0ckWeFyPXao1D4;&*l%tHeLn&2}Lq@n*XUz@mrD
zCir}^?y{Q}qrRc{=`(kZKEnPl>YB-^E(-(}2HDun8oKE$uR^t<XV`Z%R)!oPDTdJv
zz1GAl0=`q$WtljS+#tLX<&9_IS^Q8=-F;+@PLcD;Q3`_de2ymx7}J;jq$bMGJBOBD
z4XjMR+G_lK^434tKR!O`_-_v1`TJ*Q`#+!i`^PU;4Ve12ec|=_i;!dW&qEFj^TZ^5
zOZ!=Q=pEu_#k6Kzl_<WHX)7q*ok&5qvcxghIL}}bj!~yib?C&!t^WO?u8OU*aOrzM
z9j<BA=+<fQIWgN7dKmd3x88q#MmK79$7AYrGM$X46X^3XOfD+)i>H%WIloqBsYIDO
z@fF9Vu$lI!VhIk^QZkh{nuI;(VKcNsTUW_lyRUqnbD_LLdXjCV(1FEqRWpH^)SG|W
z)glesxl(hzRJ_x2l`f5pK6<D=o}1Wcsw_Zj>h^_6J^SJ59F!(t8A8vSTw3H;{a=y)
z`8Z4`;_3g~382gKKd(P|+*;4(f8Kcd`0xDBf5rUIb0%*tNFe=ejs((w+5FGP?eory
zpSj?8HUYH#oht*h{zzTUesW0bwu-Id&=Prb%bssTD(qxhdV4zSsc4;r_35)Z-$X8I
z17781)EcTtSU1G{z89f40$`j!`lAGBr8&0I;q^CepE)5h{>5Ogxz0|4T2F#T3c`nf
zS1)c??E&(stxJ-wjr5c$42J6)4UOE@lV}(+=Ll$6dKuX4R%FJRreV*{U|R}*ZT2dA
zuGLVhsl8N`^xKENKAI3aUMV|>yIV8e?JO@?2EGAa`oeV1RcWLjWAdiAFOI-+J3Q{R
z&tB~xv};Ds`pPVKf>23}K2qzjrX+(n(QPsGbqr!ne*CFg^*c^sn2i}UAvU>5#X@+T
z3$YX)H(5quJqxVWvLf`-+J6>wtL660(t<J!$<BgG(fs;7k@1|U#QU3}9YUbK*vqPG
zUx<u!y~*UVZVW1nolT4jdaZ&vCr+~oWInO3NEh>|_ku)W5O<`70}m@s{3s53`l)gz
zqE%;$_BgHdoft+bOblyB4PCHcv+*Qj3G?ZPr^V<s9Mj1qd7n*Dg}pJJV+sv#>C~Q!
z`-+LY5GNE+m|%Sw!iu40?j`AI@tj9gxkwd4G~<+Z0kXU4(w4^CglrIgsNT|m{>7yx
z@C)swD0LfHJYuR9szp8ldq&q0obyS0yj*Y=_W{@{_Fvxq=ciWl7k{c?|37{D#J2xi
zPai#6|J(lmE873h|1RFd(&J?R=y3l$Z@8<^XOwLc8?v&Qe42*SUTlhKkozB_9$mMM
z^f|KY0rcsR4BXhGx09T9bs^(f+T^fAJzgbDiikr*S0lWOav9Buky_n4SZEnikvUhx
zP{@nov=00+OzS!fxuy(k_^WVY0!EXYlHoI#8D}?_2EteveytJ?$n3_t*I0@v_AYM6
zh%*&|`WOsegs>SxE)#(fqnCr=N)n|2I$x&DaudjBtoPv}Fk#Pj4>TNUZKcb=s6?~3
zVl%2~_&e=>dlY8@7`v9bfLzPk<(j}b+uZ!>(`OBH@qlkvpi>s1Y$jcUO719dyAl&H
zo3FXuAH_bempQXW_P)c+D9@dBBJ@V-we_BgX>#h?x&YJ{rQLVG8}WKQ|F1y6)!&PN
za7^D10m}$eC=zNFO-f^sY!Fk{4LHL#^2WKN*UTfqMMFkLns#}Bk+60hVpa}{;*-U3
zb~SW91Ukd6tDa$eR2@u{1egHfU@Cg(5l^q+NQOU>U|`mH^oGGLBi`V{NI&CxXtnDH
zAH~r{!*E4=9N$D0z9k@Vj^R5*P%1=uJ?G?Qfk&e<ix^##<p<0;kN$kBzbFd)r3jcZ
zdC<V(<iruTtO9%wy_Zdm9H>nD@X?Fy+&ACBH_eHxJ%aDeU9DDky$$$?U^G=8)opi$
zy%Sbp_};%S*Z(rcwL}&XGXbswG7Z9jGFD(sJMTJ);roQ%`evuIZ=hq%#vC8m50mhR
zLRCRHlDzv=6mgKoije^^RaD4Y0b_yX3UW<FVsw%jlZaRXbw9jV;k+n>6qZ8>v}2*6
z%7t>BN~(7tPzAn<K%GTS;Tn?YX35RFc(zJY%BkC>j*Dzc60K+Lx#JtO@@0A|GEh86
zW|KR3E|iTihy5=faWCKjD7XJNT2J%$|M2PG_WxhC{&Q>n@9TTmg>T<<&h`&G4j+K=
zP35b5L{e6KiE@#iz32+|ZfhMiGrUWs`l?t;Es47RAR1!TV-OL-w#Ii|4FtGOON*mi
zVF?<ntISo62k{jE6B#Zs0~#Gj_&%bkrK6E%)_kQy$%5b<@iXJ^a!|Sb=}re^WR&_A
zW*J!UwM5a(HqoIvWQY8Xid+yh@+C!lBo@hDk%SR11(NB=%wbc6mxO_ibkzIu=R&7^
z+?`J25{}J~|GS4G`jwRktN<~iDkR}X%dwsPv>Qf)ne|RPW~ozdEoO~s2!g1s@@nfo
z*c7_y<!5#Mu0+5*Y(xXGW;0erk==3f$&F#V%;W?rJI#KG2R*Dsj#&zLF9jFzlq)cO
z43a3Grg9f~wWt|3MdG$P?-f?DIR=GxZ@$)chhRw<<oQ840(;Em8B4)F0I>Mb7B-?-
zRW<_zon$5PtLP*0K_2zM8mwuI5(<wX;M)r(oPr62APQ8WHw_}tmLxBM5rTZskVD}-
zzDbai@C^Azd9mIHUWa5Y{=#}9wCde@*p_ubV$nVuHpGk2imG%6nsehkAZTqpLwUPr
zK~bQoV7CqEZPPuY<9H;P&zjeSeVkaXowM&<gyd5r@yLc788fWpvhQ)UN<s<kBF-_Q
z=PHx&$w#`dmZAn}636%#mQS}xrt$->a(+OryH2yELrG47sD{<YQ#BaD2(I9iYI526
zKCt9R6z4dfAr?&MlSP+{SIGjk4gV{s1u5=UT(?WI0%E;(SVdik;IxTh#3`^5E<PY0
zUQ>~?R4B1tFAmeuKPKo84yKcMNNh_42I>uC!~l(iDKfY@wMb8iY(X54DvM+JcbQn^
zl5RqKVzEYH*dqg1x(wM49U>KIgBEbX5ts_NZaEH!Dc>RvA~(^1n`)YUv+)g<c>iX-
z`S_bADk^{|mgeljdSS;r(>Bz2Fk4mxXj0HpuqpnWU@FQfre@M)QK4>VqKb5nl%|Q*
z6|rHE<kO3ZD146-AeKP8%Q^pmE=mE1g6YFQc+m<(=addGKe5_WS67?ilOzm@jnV$<
z#klAnM)5V+WWmJP_n59N1RHq<f+I7H1T;`3MvdnPoO82<s*K(`8H($XX%CSJ9fPK0
zXfP#X3h9s=69%z|9a(CVVvqN9lxa~QhKG-x7_`;!Day3oL7c)u?)T^y>ilLppx@P{
z2^arM$T5QvibJX>cqz}Bi;&Wq9-O}Qy0=~QdEhRgESG{T=nwLSG1%a1UeN2oe}~fn
zaWS6ZBp_g<F=m(eNG>IH;X+n89vz01ry~n<57CfGcm+oaN(nXI`6R#y%_}TSh${|4
zick`FMnTf;N9cZ=CgCb+ftL|3==AckbfL~E=h5tS@<ehZK6)iH7%gP_q>|CjC?+&K
z97Pk-Q3DoY1w(WBMnTRvG1f-w05mx6ZbhbxkX*6o3^1|e`<*nn<ovVnF}8w_o8C)l
z4W#yjfHnE4X(%NxgKQOT0jcJx6M`@G8bF#s1d@LT#4j8QyWa@&2lSloc&uJ-p(VF=
zy#^nxk<eYUHR&}0|C>FBnr$eVJ8Qw7-Z#<2(+*Zzf`EAxU0!tXkn}U;TQ29Eo(@Ce
zjdL9aALNvIh2=tGb!_%QiZjP_IMIuI;s@9~<dVx~;8mBD8mT^5#;`@v7fUwPHUFUx
zVvQJr6wQssI)9Fk5$49!<7n&8HD_Be^ngA?pe6U=a}*8Ti%^RgAOIdPs7`i4RlN$E
ztfU%8Gy)Akb%=3a1K2zAh@63wNi16o*+gNXDwxYfGyH^EE7>-GMTnBqnvrS7USqY@
zsH@Eiv{#HIIO?=0H*Li^(GY!DaLurWsg3L*&OuD%))3Xo9P$Xh*7_X4&@zCaGV^{9
zqC%y;kp~X+6rae0M|~hqANXL!6U9kXAsuB>R;(lL&g7w}Sdi&s22k-(w^QHQ-Dt2H
z-5`Z%AcM#Y;C5C@>Vs=LVcVK9YfkoOLc1f{xNfVIg{zVdYLUAl@2G^$i|lYSak7Q#
z!=`wp!O)b@!XZ1D8Hn$++hJ6)$>$i6rsqok`AIziM`I#|(;g2q&_WW7_!h1=I4vE9
zP--HL1Ju?fN|kWx7=qX$K&^z_!{DYq*UURwz}ur}G4Xx*h|#D{NfK(Svk{vQvuFfX
z2i4}N%x=2NTqA*C3Mc+?a6HFQMxSywlN8KKA_K9qLK+~_!y^qK-))DOY>ytj<ob*6
zK2@49HFl^T%8AtqS07*I{D%&dgvfGXB)+PM61Sd#Q){CNP6NDAl%qR|WwrQ}!^QY|
z7DA5)#a>pQ9FlqbEJIr~DnQiT^phdTealN);!*X5dsJHWO8#e*1AT0_%8%_lXlKjZ
zp^dl7uD-&I%$(?UYtdMIojvRyx!TIFzbgf$)P#ADLd_1vj%a>#wCE{RGD5Cx)sgHI
zwrm8|sF=@1><j@tqiRU_8zvp(-$X`V{^Lz>quanA*i1R%_3)GnMMmWas0>j-Q;;N-
z%+6Q{1a*?B2upU8RaVW&W3-(^Kz~;mi3+<}jjvTcL;EYjCEKDhMsCGY5zBkvP%+D}
z&IzGnDdgy_zHa0U8fWW6rzum^`rNQ$iez8j6j8L;+A2*d`or`pq%K>fT`W;$GMht4
zeyP!&Quj*@F2<WvPK#jOnbKMfvyjY|MFFZx?&5+2Rn!~O2`~y^bXBG`$QBvvzdA?D
z<;pRe{e{5@@(Z;{)earHF<mjzs@dI)*?5*US==m7*V5+F0xEB%tg-TDwgpn$&N3e;
zJaf%!566>R?eF2WLGzMc{$a=*-rVY7mrP3)LgBbar>mFF?k?x&9QC~OSBJ;@?Pe*f
zVdFjTQ^9R^Q5=^MCvxBKzl_R{9E3JU99Vh$*W-<iR_6WJM(gp@M}NnE{WafzRS*4g
z^v3A1$A3Q|<H`!{vo~)sN8!ma=cCF6lDS&s1IoOjh%!3NaO2beL~VwN_bKzyJRvxS
z7{Msk%7Hi(^2#8P*~S`7nhU8BL9@cw&%aaGk_0UmHhiwXse{xrm#fC`BD|R|rsR|d
z>e`d;i9Uw%o=?R{sF0g(Jo<=m>G%=RLNX$ngPSkC_m}^l_dbnaZ_uxFYiXd+5I2em
zUjCrx8-vGk7&3{V`>AikqMK=Rs9NZ^8t(@^j6@0jaa}f~QG!uCqACR!Sf#juS#dCe
zp96%T<-Zhs025&}dFX!<A4K)fO=$9OV-57|VPj1(wuvyIH5lD`lk3=8Yl9RGSnptp
zAGnbagTC)ZNf%pw22|A4;TQNwAw&I6vn0cGd-OtpLv!#b*M+cj;M8#pdWQ%&!SM7R
zUp+{XG7RZvP2cHJXuP}&1hGk>7P#aM5ev_8^z;m2w94ISAeB@RK@X>cNrcfjNTv@F
zhufR0E?!`Y*Co1*2*)?VG-^7koL?8>YDDzF00TRkj%fItHqgj_Aqw8jjF@){2Ayq4
zr}|SpK3Or1h2<DL)~zyy6>{KI4k0TmQI#S#Is2Oj1k)BaA#e|uRoB07=c{B;!4cq0
z*^~a37gD(jCPu@22_VraSAxRml>1J05`KuHey*>Lu4O~IzIIjg$k9-|QscxM<{x%H
zS-Zw)(8%MOpZI#iIVl&qd)7rD<(=SB^_og-a{FS(Sbm2E-mt!g!SgN5nc0|70U%>U
z2Re=xX5An&Vwj%-i}TV3ldIJzKFF36k?qOyF`w;<l1!@3C+Kxim>Lh6Ou+^FIhHAB
ztCt_nC3}{y9W#QZ!i&d<*My!z@v}ztN#;?akwVvak%%+g*N@XLlb9=<b;LukNE~H>
zH%XQ<MH{edmXpjB({OUw!-E&B=SkAUJ96X6C7iw@;mwC#{%W<g?S0tWUElUrS3l@#
zN1RgC9Noxb`=J4lIV;qMdNs;bswemGp$s;`;4P6103%Gzyfa8kea!A+2oKmc(*)n)
zkuDRf+OTjAq7I_td5o8sw03<?l28O%0$Q7k50}k(u5G3ZoLdn-m`zTiTOku2eOFV6
zC}we&QnyCx#)Ii_<Q~kOrf%-RumQ(=nL(&J3`%NmLQVHDNWYOmQw--slM$E~%}UyX
zLnVTv#oF>DN%0Y6C$3qq2aSfV8tQ^sn1^7q|1(VDnz_F{nb8XObhINA41V`rq<6O{
zdSawh6SEbm-s%m~@15w&m)x~hH4c>tDWuvONv|o!*s}md=S0q`Y1-=dMD1!65F<b{
z>P8|zxlFEsfb79!hENXImYuq{4ev7e2%k(v1{8)&c0Ttvqc|tQ2b2;@zZAgJoP4m<
z2SCjDr0i{a@ma&XY2?__D<wyjHJ6pL@xYKZMOEGn;p48+ZRO^t%_xjsD$i}%%=oRb
zdJ3@ulx88LusI8H>rsq9zIqtZe$9~F%;}a;d%^sp@EY~40vX&CZDp$*-OocfQ&>^K
zm}!0&9f@Pr!K)G<Lc~0QZ3HaE7cUIV4{DC1+UgKt6d-JAZ^s*D_H$+f)1Jl+9Nm3p
zz;uP=&rP?4_)e1B>Pmhc-moc*8l_5&PlvUsn4qJe7X4C32SUtm=m(g?UgD}t#TW3m
zN?ey|ejRWWt<*_|=uSz|k(2Nn_EIH@4Ho)ueajFr5hg}RrBhYJr6BRqd%!7((L3P{
zV)E8r0Qhbn&~P9KMtfxx3Urx*{zjRzNM@e=#a>XlSitNbVKS*jph0>M9(ccqf1@37
zy5eU$#Y!l%^ao&RZi9TSw9rqQzw>9*2jFjD@Vbll@jH8O{h|g3#0RJ-7D4=_BE(2R
z{9IL<FUrm2C%-d0Yt8yHNGJMfTLS~`>DiTf6&S&8zXr}A@qQgkg3QH%nu8W%K(prE
z@;Q+25Mhcnr{rw7Sm2j>bsN#rV<R@HT`EqVpd)<1_WoiJj6U!m{v^5eC$W#O;QZuc
z>^uN-R=XV3Ry(wgn`v$)oNy@wl1s_P`l9nNQ>-j76FC)_jBu+)UB;YW71Al#hrPhh
z>@auS*^eilC~@R@6)Ik5QV3Ns_xb*q{0JR2zeRl-g(^R42rfp>n4e1Lw-g5VRF3+o
zPVaQqPjl0;YcoRg2i^es2u1V^|4#a8$u@@8B8NE&V`wWfq|df+il$475Q5FlIB|Ry
z>6(w)ps~EixqTtB-`vZ#C}5_&T(R)1Cs17|l(Ci5zLY!51=gsqow+D%ct(D(Ov11d
z6JOGftbCJz;%<Xohz<ez2O_NyL0E!~$=Wb_2$f-}l&14tQW-r6l}S8uo<$a|zX*-N
z?guNpw**pq#k5w*ytCdpoMQcMOJLHz!2q|-w12r(=!F=65RHP7O6=*lY1WV0#3fw|
z-<z+tp!+!F+h3s);>*Lytpikn-lyIM!tW=w_g!yPN6zSmy(wKw4&rVgZon_5mzV53
z!)BC*1)p$dOh1;u#<ZLc4&i(Z@gA(TZGDL5uh99`brW5y;DC;Mo>sxV4i>LXKh#DF
z4F1`?j0S@mr5>r<!zN^Lh>w8WgRqvML7lfb*4Dr{#V5*i<d9o2yqUmfVNbOY0Cq;G
zW#Yj9O8#B<lJCCTHcgjgZ+BETGutF#@L}5==sNVS(A*n^4X|A&|42o|0IKFwFpdYZ
zM9oFe{h(YjCMLq3T8HUb!xJ^M8M97qy<{5d7Uq>e&Lmm~zW(s_?guo|^553=tUej@
z%Z!;H(i1GP3jgR&KfCQI)h3&;kplk8YaL;<Wd7bwhRr*^(eF;|%Q?F?QvNQ`0m?mq
z4HuuRY)PmBl+~i)xHZguLK`FhvFpQZe1BeiN-Ot&V71LibSHK$;<g&zdeQ7#d)Z5K
z^#rH%QrLO@m@6@rTbAk>w4E_ER_IYa#5#ZVveu<H03a?zz_ta!MG}!l@g^EhhqHi*
zm)P-`pTBwp6$P-o-6CTqH$>BWlSwDtpsXDt-3`XGaiY$cuiP#%vvkwK++G~EX^jLJ
z0<;hhv(kP$55D=rtBONb@Psc8SdrhnjDjm9q2`jcdyp9gOsV5vvP$_f#mc9m;05<c
zdq%mf!sr`EFCB;DEc}(zz__1&>2l8jIg5s%n3q{nLCsD=`u0DY!4Klnh%#|ZN9I-6
zoUZk3$+G<L`Vx~<q07@b&AHy$z?RD?Ep579-Lc;9m^`$#rDFB-{w<8&HLu8;^8`B-
z&S+i_h-b|^<JQnc+Ny>goKJ#LFGzYE_G``VrAXaGZc-4G`M`o~AEkb0;|$o$($NOy
zMWgKut3V*RorG~YiyW?dTJiA3!h>+mSN)N0V2W<l(w^tZH-mkVhy$}e@r;8>)<j7`
za~<`-#tG2Y5eG?xWEC*~75%&9Hik2{zzLRk92J{J7tsK%A<U4|4^csuqsr@lMvgwl
zqYqPJ<EP!p6ptJ=MlVdeNpunBqHsC1fDjg$U_`p^b-J|~fr%D(yVE4aB+XsSMHW+5
zFp+awgxK1&0wv$E?@QehvxNe4h!Ia99X)~)Hjl^;U4jgwVT)s%DD!mjJ!rLA6ro=f
z^khUk*;`-7IazX#FImv|a0##_gsJg-T>G&~q?+5Et7bJbP>3+Rukf_WQ>(C8nQ#ZY
zj{GjJo?XTMuM{*kA9L99d<I{rPs;ehCGwXXRig3u-C{{B6z+KGvsi#v*``^<QCz6t
z=&%_sg4{OBUpS?pTl-UBMW4Ae!Av9zJ`T#<j%UH!*Ee=p3*X`ZkZmibMT5xPT$NH_
ziPu{Bbu9D<`xEJHF2C}!7u3F#a;+VUO8aUJfg?c2J(b}b%j_CshmhOs%lOT>htCx-
z_q#B9nsueU3C7E_dmHvpe=6_Vc*oS;?(Ao4WXQgBzuOn#6?UQACy$ub&$C+EJ@aO{
ziv|X-<dmUy1#Xxx=vc8I!-(>3Rs1S|zg@hFnm?4M{Ha^x&weY7m&!mJMi6-U01d;T
za!(cnpu3pOP^40q-oP)_LU5v9#U`j`4g7^H0Vk#S!@srtJ81iJ={=4Fb4F`f>eA+4
zae1&kLuYbBEWSAuV0UZ;7F8fJ5A$;J*{>p?l~Pb5`^_{sj(4U0j8~B>MGsjkbu4(y
zI{T^RO38cHioNA~-gVB*Q+E4kypOJQ{dhnCV848f!fWTX7hi{sXJ1oVOP4L(n}h4s
zME+-dMsCy?Flwo~uJd$xK}L9VD?!^%<5}1*cA4kAwJX?tMN3oZ60U5AR=Qgg0Oq}1
zQ{BF}T33GWCGe^h9~UK>R9=Isd@woEKqnzqG4O#Acibu{UoeWC&JR}XCzxeNn&Uhv
z8zE;{=R&`PXIvzSvE#ll_&wr!2%PAH9#uArc$+z2RSs028*8L)+VWY~gj&gNgq;km
zJcx!zg$iFKVJQ0Stz-l8jEe>f-k$eBCr^<-Znf5d`va3kR`J9;A#XmguL%KUM6H^d
zNT&YcUDgysISLolg}8gQXD_G=R)$Wq7qr6acn{30vE2F=vcv;HHRpmZguJz&&gE-(
z*>&axEnf=r=rEnOnI_KQQ`4WI6Oza@3)w+(he;7S0AxU$zgt_y2_H)HM3kK~i~@dO
z95nW824ng)afR$E^j<hW@rA~VGg4HcjFGWo@l!8H#_S_qXU-2RkBdv;)HItjU5nXq
z-jpq7-v!dPWV)B8ZZY+MKih+MeLY}23@Lp1ZaiG5ex0y{ckx_M>(^-n1VJA|@m;!e
z<8$bF;rnjoH>va{y$rz2qtErCpjfS$9qFKs8RLzBebAZB8m5FPIwMN7mP~Mw3`evs
z>{!E{B&Yh_fbD`=1TlvK!Xp&jg)d_vq%Kl@6&&QZ=4DTLIpi<Nl&}a6wQ_BMN$mCw
z=|pB(<w%-_{DtCKXBqGyH9D$i5{43Fyd(Wwl&~?MVOG0w(HF#W^t>McMNG8>KiGo}
zo=cLpq0?EAFUgyk>#=(FT32$)um1k|!atSs|LUw^$PRN=fiKPf+uC^g^hvh<du#pg
z{J($A{J+muDGBgg8GPMU<kz45T}2*E{p0iYdFSM8|8@Jt{&_o>1K803LC8Y0HEwly
zeAqeMKRWzp+b3as&^|}?{h#f#llscHo(0ipdvMZz_3C_P1OM=q-}zy`<DZ<Oxc3jw
z{r2(xi=*~SdRT^c`ly#jJD7%7MY-@}ZR3gduF##ay4Sbipz0)!_mBTTb8uDlQ)LRQ
zRQ2Ct4idm?YAp9M@m4`Wxs1>=^^2mo4W)Nbh_#QZq>nF@d?1mJwcIZ8y&_#+<@6|(
z;j*L@1b?-E^wa*&FtOvK!{fHd>ogsSJR+V-B=RX=E36WJJK`zT-n}A9dUnG|hG!MQ
z6C$90)ikAm_<a=w&?E^4YbZq4o^+oD&)1(nfBLkyxpp(gLYlpF?Eohj_6|prX8+s4
z#-oiD{fd<8^~wZyDF3Jqi}7N27~G)4nySLoI1LJ{v_Ao>)!W(eYR|lfo~WzH)b>`r
zR-OL4jboGyfDd=QFiB#3V_TzCguA=my7!;|5E1B)y7#?D&vLUB7`H@^Jp;ovCLZhP
zAQEZ>T-x%JK6LP53jRgV^XUn*tB9PJQ|C>erPJyG^c_uUdWY$ce9^T_f7g`lFTb$%
z3?-kTb|(0u#_FyZ4TLcH7+{h^pYvw<SdEuTGdc|wZQ<|d9LV_Tiw2{AejSWYW7_wr
z+AwSs`Irtzj!<d`z!6jC57J$F@y)FeI`WLB(2VOi`4A-WbR-Hvb1OVkFG6~Qj#rRa
zksP=n!2;_&Vc-G{BdWLV4dSb)OUG^>x^kuDe$?y1A0nwz4^MLcW}Ntx0A3W4=P9EK
z1IK?7qFiVw3@$;dNckN~cQEbA1m7ZYIj8N$;`ZI?kSpazBQaDLWV|RJ68BJBpaGOI
zh|GJfCXnWR<U78sCFOI`D4(lI`Kp|huj)zpx>Q#KhnWbvUiWWtkQA5j)-@L9#-!5W
z)T0-NtjBOsPNY_oZ#wK0`c$~xS~FA$UkZ{@=JbgMhX0e#YP|3URHSBkj&R9@_Hy~4
zT#bVXi1K9n%i-jkMc8~@Va?ZN*nF!Fn{O3j^Rt!M3g>jURbOFwDJ~fdF2fo#e?ek#
z*|lZRiXbUcv6Rb+D?6yewv9_8I%G7*e~~MO;+`;ICsThy*B=;(peW-_v$1j_FeAK~
zfE*T8xw%w0NHUf9V=%n}Av6rJR(F&R)eE+sP84z_kV~*)aXgN?2fz^VF!~Lq;hz!!
z%0x%HrV7i#K&==fRRQJnc6b4Y$9t*!oF45T9-qAUKkb7KnH{G`Z(koCHw&wEM5$G<
zgUix~&8ud^NDiCu>R1sW2F^&6AEGe9gdDe33lG?fr};pvG~O^3F`WX%s`}z%nU$8!
z!3+l>!ju~Or_2o-Uq#8Xf>xpLOihHXE!n6j|9(YOM6}8*7`fq%$h2&f+MIwybefT7
zein=|#a8>1Ou;*v4#!=a&=#yosXI!}rk&3BhPizEx%;_%O8(p(T)s)p4$C)j-32;Z
zRT$1?`Q=+V_1AneikOp(l)rG-JBI!EY<Z5AK-aZVCs-$(uZ8(mNXSUCX%Xo$4=%pU
zeY>M>2i^j=kj1j+s2x(+mV$Tn^>E=Z)@?xXVBD-qKh7wi21n-(i{Ll=@4y5-e0>a>
zcc*>6w=Qi!JYt+pc6Zn5as=VRD(~*9Q5tj<4P^)*EGMYESNJR(%_s5fT2yGKnXhu9
zD4zc08cwrjMe^(M_vRy|)lrDbS?g!?(Z7%LAriBe!-PZY;RKei=xcDT&~f-9*)e-7
zQ^bA^3lw(QRw90mQB78I4_(BDOG3aV+l<+3qAzp$RZu8g{Q^YtK{I?6$0wkwom?zm
z8$}03qv)7WWrp{>p)d5z?G?A?a2$rcv=h@b&@YouLG<;m%rHd$idxCZOi<AH-CM8A
zMUc|wf7&c=+AsZ+)AreZ=j6;kZvW&TzN(ox)K`pNwu6n|J3jE(&8njk9<t}n?*{2)
z2b(<HtDz06a_+f-@j_Z4GvN*vVBOpVUjm;$J@1^I9KV*1z_Mx^YN=#kN-6fm(B0)V
zG<5#nir`Sbmyw4qk3k%uNnwQE63ZT-FFQ4FuNzp(_GC1;<yUKQk^&QOB#fEb*|@Oh
zcQf%v+;Ws*{&sat#{|ZKB%vr^M!6_4hGiOvfOHCnQIR+3s+gu;8hpf~hEFB#3PZ=q
z#F2{nJPa3|g`_2(aJ_JXULX~LTzzrGdo$YZ4?F`rvcqA|@CP6ETp<q8WsRR0Jiw5%
zH^DD)@*<k($fevm_ku}KtN)7tdf)LVK!_|O{9A@fQQq^lH4%=pI>ZrXLT9MoG1?Y%
zX5xS6f>iE{*Z$_RaxROqM9Rd@<d&8?EOGsQN}@@an>^l){h4INJhQmtT3V~9MsSIX
zzXin{OY~=Fv3q5%NY`SxqkQ1V&dd`Y_hwOPjhT$Mqj+>XRL!lnijpGFms&ie2G^9u
z0#4y_L}%%y`(JQIS6IVptM|LD+p<o*_bM178yJgjsUwh0Z{nV)7wSUNbTWGEjhu_K
z1iFSt6XoG#$AsI+;9<RI%FGkk!NscCey*SIy;<y91<IB;i!EIN&iA$ov-qSc;a!Ag
z*mF(>{R3h7Y$&Sa8V`%pa)wf=?hTwDV5Y)s{WCR3dXzs=&Ix<68PxvdT-G2$e6iJu
zI8^DL7Nn5<ie{-3GcZPDj{cAVkqAVjW+1HxtDIS{PjX^<rik_)3d|g(F0$Bf7vOq7
zSlal_ftf5t_`a)4=LMJ~g+i8Y@HdKZDk}Rz$L%nj^y3~^uM~H>q~WpaZI#mT%FE$~
z7YszO2FJDx!YklZOmG?u23)WUpWUkw1Uay-T#U|nj6%JE5qi-Olu#~CpKx6~Z#u%;
z;;XPn0j`)2mD1W?2V6{(zcjL&t3hOe*<jRLi_I#qoKPHZtfy%xEBzhE2LuUoqp^<A
zuxd0mJl&$6r`aJZ;oWJH;QJ`E@~o=;nTjeEyeKmD>X5E*?To9k3W{8_hjPuRU@Qh7
zVX#k}+^Sj_Tq|Q7Ca_3D%wIC<S#?4sVw4ZIfVv}uCej089vX^$z|bPI=xEwcq7fDt
zZD?MPFlD;HgvTaGn2;ulK?ySljZ;T$xb{tebp^Lj%yBFe-?PhbGGIacU$Z+Nt3D^w
z$#^<}E}#ouly{4#6K5?rbA9W?-kcXAZ7jqU_EV+m8e_g5s4F6~paPYGdHCk^=&*fW
z%lYNM-WC4x27=w)%{6cyT@M~);O5-=8ZM%^yPIoJ4Q>vdT?rT4x^9c@3GM8<+AKJy
z?AS9VO=DM{H)1Qw{b6VE>o<oZ*dJJ#l!P`#2of{!wdGI+oY0BV{6@Vc)yHvZ46!ph
zdTR^Af0@mUs8xDCTNdMO*KeUiV3+3g5TWSeC=|X)dr*iMQ9cK6Z&3QJ?%Bn3JW~<k
zoq+#ocn%cFt~FuT7l=3)vklGI-W&>QVop4ad)=9buTEt%2uD|wKFV`!mlsQTxX}U%
zQPi}KV-@&@@o^}z^pO!Hn7khj6mdlB&@Eyq&TT~@;p&uSch>EsPo<hr?i*(Y9FHwg
zhnl4`p5xaM$aOV71$l|!z+ODPz=tFo4S7Acw~(A)K*m>+?~RQ3Ub<v2Ebg4Ia3BwX
z{m$Wmzkhz-KI`~Dwh!<M^5ppF=UOH@tL~EfHOJNHuDTkfn^D)!*8=W8{~^psgyovm
z>-fEM1TNtK@bMRrVe^WPc}0V<&1mO4at-Wyt@<oM(rPf~rt4}}AJKE(87@&gl}Q-=
zs=C2iFc(}K+-6Gs$oK5Dmqj%;QHdw!pfb<g3>1K?yv{N&A=eQAp(dN^>Hw(>ESGZP
zB7}2i{>`tDSU?I`ediDd3vTS3@Nj5p#u<Wdxs9Xa4r@tI;{WIiiE6imzKF`M`riLS
zn(B5|-a8huA)*<I7t%cTxlR?*9%Fq>+m=$6a&8Z8bq60pjH6B{|6MO8^$$*4vffa^
zlLbot0UAIUf<c~NnPV(?NzXs<87M1|fcL6UKPb<uQbuZMr;)EaNyEWyowNZ=m1jVv
z+9^^SZQ#QY&=(rQ7e4|lN1<ezU<_94IEgq4@77CW#zc>=tFI*C5MP-ZUy;!$e-~d%
z4QT!p5P1hn731qD`XK8X>x@5{oDaSu`zd!Z8;Zm*m?tum+je=%I|G*DMJ9?Fn8k-x
z<~!hVY9JJLv+F~%=XTX&R@YColoAs#wMQ+&9Dspr8+Sysw^|L2U#?I+T`~bijx0g<
zhmk_ZJpV0>%!XylP%7<FOynJNRHqXx1-`(I2R-RQnZgF10>kuNPP?$kXQvDSlq*8}
zqgp=q<4`l-1D+Kr+n!;%I672uxxG<OQxdrvxRVD2M?ImB$E)P%3I`!Z0+O4Hk}M07
zq9~=S_R!NO;dso{6F}rz^}}1XfS|EeF0FYd*H6*>XSjh35mNR7(};&eu_4Ma=j_=p
z#HRaL5fYPE6LG80yup5SdGCvZ8r8g<3a{bY%)4s^6Bttj=vOc-opYn`nkW0^SG!0H
zSzDvieGBX8ii_|eB}IpWvTy@gx{;$*HJ#?q$_Iqz-IzeK(wE_bDrUvK@j*2sMd*&q
z2c%B$>L(caGqTse`M=D&^%G)Z+;zxfr19lXj=az5PN#*Tmn|S4_|{8Qx&K?Y`Ge~=
zU91$+<BAb-*_L5~&CAJ#HAaz%GpegD$mb0UqQ&$Kyf_Z@k5{v!I?c76nl0mEyPlmS
z7BL8Z8VdW$5y+7S##8^TtwHeHEoT4(i|byvNX^I?YJTpKd@_lBx|g@+mnjKp43sNi
zW76ZE=9yi&Gs5k~?T@(kvs1kUHOT4J!Y`EdN`GX02nBU-2%x>}Yu>5gW{{MkD(@sA
zFBByy!PF=dat~!+pU3oTD#2V}P{ar6|H7;wX0>1SdaX%O%NNrt50msY9hvZ}Q#FH8
zjSJ6;O92^tgXv2Io3PVHuB=avSn{8!CX4y}KUXX02&23@eD|hJ$KeDtA5vV%Dd>G)
z95d#ohc0lNA%ZPR1^s<gi{k0TpUU@^K3z-;hl^QKL6UC~IJ&gpY@#nrYngUTKUDr<
z|J*<OasTLT+dpf+YM-@_58CG?w9Aq#D?U|*XzqS|(tdZ)KJ7S&<WY7jEE7?a6_mtR
zb<f<nrEmQ5(pzoVc`%bQf?vmT+O2E(YTfhjcOBW+@Ur-pm3+$Pm(G@EpZ_S!F2U?`
z3<=M>ykRUrj_RN0e(;z@MeE`Xugs%kb3!ig;Y&&%^8n9G#}xnNUh;c7;@vA<aJ+jD
za+PVb;(Qi0&4ZG(QCk^6{l_200cLP6(}VsTWGq19{>doIo;xo}jGAqt_Ec70kd+>d
zR$<g`C|^aP_5THFSE_osw9FZ_QpdZU`Yf1E;vxDt$PqFTRo_B&Z3d^<D8y%4m<%_6
zn^AsO#2<=+2ONjt)Mu^IPWHmc0+W=C1MG04x|Y1|G)hhCJI42TSt*{5>Fr#3pJZ6{
zey#}%)$S@tRkSW<mc_Ks)yyL0Wsk_JI5zz#z%J-gFrGM(*3X%k7%-ms&`WCNO>)0r
ziPE}sv1NOBN{2H$Hq^N6^Fh|o87%->4vYhc$LSRDB^PLmgC6=akpC%V7<zVa`?sE~
zUffQ?RncEFR>1MeB0k^ckVG%4D%)#w(AwI6^TLT)i|_#}F^N&Rm7?_@(tw#@eX~c6
zzL&WaY{(|i5^xnDRlqS{QMMRg=>VJy>yiN~!=7Uv1$Sj7b26{M7Mj_NAxlRqnrtTJ
z3;ZiSsmNw_|1usa9;sxE>fq;|%`Ap=3|-1Q)#g_+>bwIkJmV^?>qKTI2hT@?8F*Ad
zA^$Y(WI72&+Rek!NY-Jt(ahvSvT{G|@Mk*$4#V(dvb-XhAggKTyFS1~OYmuCatos%
z`GSGUWw<j5D$jn&^Zp&DeD_s1?d>nm0UR>?$KZpdQsQ?f!m>CcE8u<yF46Mx55_C^
zK%2XhWr^%#pI$R*RCa1FhiT?O=$-`bf+SfoF@AqKESo=;e+(@^qk9p=OW~NMlIM>?
znfu|P`#P}Zr`mm<TTA4sdmz|SX|=ZY-=Fijx6^P!%My$x+XaqE<$pBV)*L&@$lb93
zs6z5tU|slQncFG@QaN11<esxn&O~)%b219kDv+_=yx16hDq?dMjC%2KTO`}nkofcb
z_=RnX#<NS@ILjS~cH+&|?3U`9Hj^yajw_H6fP9A&ji?ZVtSy7#c>%@8AT`tt0LFq$
z{(koKM|k#>yN{Uh_{njlVX@xviNF80bMl7XHJ`Q55C3_1{Mz|=*?j1`yGup4YR)wM
z-uIr>7@zBi+S>HDBiDkc&1JR~e~ZKm<~QON%z%R_8O|xYO_UN<n%=&=c+-g)@Ln!T
zvGfyONpu4Y4lN-9wdWFnI)MYX#~0uJTMY7fb#exa_u}pA%)OAArJ(Q{)%jLf-*e<H
z>G&OJE_Y<+rcOO)q<fYiM~D1g<mN-Fk$ItGR{<6glat2ir;r#MET2MR$gUVI!1&71
z6y$)2Pp#1f=pYG$9#)G|@zZsE%OnH1FG9~1VEw;DT38QdvzL{HwXEw6JDj1O2d65k
zWiH7)>LK?;7+?(Rh$^tDSX_+!g8%w~ssSzg!u(<OwC13PT!mN?jGxWMvUZ;Y-)uH%
z{zzyVt{<c#xS1%7!chb&-`CwQMZDfAB-y4y_MW}r-Sx^>oy;g97~i^gF@>$u>qU4f
zQ|Q2u&RpV0q%kGuXFeRodO5?(ORPnUq3?Z)8ux0HTT^d1k&zQaIO!Dzr>Lb3g^y=s
zeRNo4Tp$D#jzBBGf_TAiw??qWa=5qlg+c(VoiaZ=0NWoT%l?BHfAj%)o+I`9lt_zS
z)llLzScWd&o9kF(mT@3pKzzv78Zd5<Y+HR}E@}-j1A=xrAlcDW-DkunQ1!mh`v+CT
z{0C<T@P=V9qJ3DjwBAplm@S@>Rv@@II5l__;dslBtx3xA<FN6qQ$&lbvmdQl6mB+H
zF`t)g)~;(9K)Amjk7ceyCojn&P<!)j**t@G){R0!oz_B6DSSNrHVQ2|M>#6^8x$Eo
zy4)$K;`H_IMx(PXtlWnlRz{tb(56O(M0KSq>VB9W!k||ecKYB!Ny5x*-<H+4QPT6(
z(Fq^{6gfRPJns1WXRqJBX~Qq4h}BCU2-Ru7ZlC!-?VtPYcc)<bv|rX8i5{$%aJr<|
zf(zzVS}_(SoprLa*QM><Y0=-U#a3>2{*{xdh|YmJhp*scP+{BI_~C}PA<34@S-k#l
z7)Mmd#+j%sk!#tqTef~5iqr0}iLO6tHL4_bEngc-M*Fq?CQ5xw39`d4!Bh=5Qpqdj
zk)82lm&K8z0pYk{^!<<2x|fm4SiJS8V{xehqwx?VS5tn(n+juN5{at9c;d47CjTcM
z@Srb*5!q;@Duc2G>RaokB^0CvB@47Nwf4ALWn)Dj<H8;>2F8<oA~b4xPm6kpFc3K!
zuSmP@EPhV4EGOpdT~v5LQDP5!j~SHQ=`_6M@3ZVqaP9Fb6(rw{6D&R1phbc|B#t<x
zLZ+8Ru<DIYG4JDR@sm#5dvpHdfw!@~{*1~&AfKnA%5yJDLdrus1}h<Z?%GFo!1}&p
zKPf(HTA61k)f*5ZVhDg<0c_hN^7feAAwTY)9pc-Bnud^Zo!AP8Zg$TKg7q3&om4>T
zpF+7KaW}I0v&M;_clUDpjTmAKY9gl0bvR|Fqv+RZXeJQ$YSPw9V0&_UXp#z|`Yt|f
z#g|;+;8u!}P*h`C@}|vQ3#>1yp0sf1RNuB`%)YpwZ<PyroZla6y|pVXQb)r*uCu8i
z{Z+If9<B4GRFu|o;Lo#B9{vtHC9e``MdwgCWnsf`zIUBP{)XQRTe<X;;F>OX?^}S0
zNM}hLw%9EvPd<Xbbao@y_ZJZy^T};yc2vpQaJLst&5)*dPOV_gc!r~@ga)n`ZWX6J
z68fV{w|!}pS#RFoO52I~?rEOt<rCeY{hrG_;|kB7zH&~<S9r6WRQHDVybtM?|GaIU
z`Er;`E-m2_$PiD+cbZ$kMt-mdUWO+5HBDT)3#L9IA<U&b$n=mt&StZ@_l2~^<){;d
zR-rJG?)7m0`pX~g_b}d_jny*EJQwC<YX~#>l9jdlWy`;n+&k}szjxag+-(o<zTNWs
zh&_m?=I$GZp8oVjgPl6(*TMKyysG7`cbS02+LKl6^^90S@-b%!CEm&vey^7B=Ak13
zK}NNlz|vuqrTKQ8a+K7?4QjS<RZpdjGxNVumToeyxrkLbqKa0ary?+2?I`z}_^yct
zNEQ*og}E$T%1=0&4!uv-QUELD*-U9GnN(-&d?+(oWvPhI`K;C1Wp$6j+)^`9tcxUT
z6+<7)!=a1^?6RP+;MYMSvfO33;>)cX<7BU$XRv2(#^!7bZnQ)sb@7o_kXmBqb#2YA
zjO41RB%F2bvPi9lS399dlJtSBtyrgGQ~c{niD=9X?WoJtDy{}qF!Xxqm^F8XbLmy5
zh{Sx0J(RUG;5ze^TX6F&s=#$kJ_p*_s3P>)h&vlqfL?@p{zDk0xFFcfL7X$le+YvV
zSN<v=MDQ8x>e|bA#1AR{qnM5YOGOhSOyThO$Ni(jm;Rgm&e`ERztaXz-|w{XtlNLn
z?zGR&>!t-N1p8r{&69`e>p>QaP*3=61tnHG4!T;0fK9$4cfY5lni9wbmMop>G6HvR
zMI7U$DKuA5oCtZuRTMz1YguU2OyGQ)#gEKcE=8AKjhY(i`Y9*t0|COw*iSu0h|Ts&
zkxTRC&&T_3q$^Y4Q(sYo!T+iFox@ZvV)1amHejG&91<0}mUpotscb>D7R^nyuS-X>
zCtdgIUbeFK>;Xm2+M?*gvPu&QxB2SDG4ARNYnoMYooS}Io|-$xM#VWB*Owj2>`WlN
z7>@MB$cID`VpEDf<4T^o8m3gvvKwG)%h*)jWH`=+``BeOFuQg<BBH_e14@TC6VQIZ
za1_)q7q1KgWkSwC;D9WX*%So8ta$;G`A}PLtTzlS8U{Rc?r`tfO^X(cG!YohSO^kB
zb8PIYA&|w}JR7B--G7!#Pi^}ZD<Ap~cu9tF#3T+;<p&{^0Dp68WWbCQXEkJv_p(&!
zDpF!3A}#VK6#&VfEJLihbP>u^lS0~QpJD(k-}6bxMi4v(PK&CN94Skgj*z9r=+$cm
z$lTK;I;tj^R~4mFnP)qt0A9fZ{AeTfQ1VOQW#abi<fn6zRb`D1PL9qw*c0h>6~`i;
zzPcQ*)Y|05g+#y2dSB`YFgL{VjsjCG!TL(6X6?RJ3zzMJo<f>gg>+Ue+AVZIm@j)8
zmRH~D)W?;S^Vl=geIXO$$s``0;Y*Z*I2nbBYMZ$uP!kb7a75YEsu!pRr1Gs52DC3G
zWKTU{!fgksg8KWPhjz?&#j_BPD>==(oQMdASLF|RE~3f}Yk)^h!>p<o{idHhs%(rS
zpd%))=0w;sFJhpcG+}H9Lc>^gS~)zP@byHI>cm<K)>2oGe&*KntEu+R?D!4nNS9k%
zi*%+(1AAmM@blP(VURA0tFnmsA@h9kjw%p{YHv7QAm4)(wnc!pS4w2#EFg5fvE{WG
z$XsHQf+dwOvqZ<AN_Vp;Hy!#o6H&`TpDyCvr#xL`aoD@ApNaOJ*3mix7h6y3*mJF`
zB7Tlad!9Omcu35mnro&{9QW7TO1drUN<c`NtecqjzssL$XNXXoYYhIt3mWFI54;P_
zm_hS`5_+@PzPQ_j3bEg@x>_V)*?3R0<L*8j7!&6-wzeq8`CctB8I4FWqw5RR^`f51
z;Lhaxj5-Hi?BqR7&MztOWJS2Bw4v~_eSF?N@0^_Nziv~mfge`B%KXp+@m2oslc(Xc
z=a0Hiy1i$YYXovFO}cBKRfTJ(N!)|QY4*RG|7RV3o;-R)|H4n^-`3-&j~{>4+FXCU
zx&EltdbIJ?dh6-qji+CE>tB6=A21z*#PjY9Cp)=6$q(taU<qGhu3-2VOMamtL(AXr
z0x&;8S-uYGW%<E(->It`_~HU%u~3&n1q}BX5Hc8Ch46V3KE`h_aX6{lUhSYxq4294
z>#dgeItT~e>uER|g%`oJ=j~j<2hA(>!GA}iBnq0B$({g2NS?=2eCv1+_d>Lthvaye
z3ME=pX9|;HDr0-!#J@#@L4f^F>F(1zin<gF__672G}p;d5b#q++2!<z-oc}0--|Gh
z%*AvP_7DU;#-B)HHN2lig{t@hi?ERkA((jm$z;5>wl?IUn{jgW|Fie)eQg|BqVqSO
zqU^cDNVpM?B%WD7crhjkyBPcvIFs!4UHuxV1s&@(YRQ;&yr2D^N4>kc)e>MQ9+McP
z?y6H&r%qL!I?oM&xdE+hG;(Q1T4yg_A6~$+{4<9s$UR^2*0?QaHapA7we4Zp!fzB6
zh(zL%9b1Ps8dPoX%lON3T_3ZKxLmMZu|d6&OSQUeVvj0OSy@i-){SrwCmP-)!Gs)b
z;LEX&ia`V)5Bo_xOuD;sDz!|dR=5&(%s|{n=<Fg9DA%&ewd`_MQQq8t9#4d{9q`_p
zIUpm4=jjmE&e>SHW$3s0o+&3+3*Y%}X-`<-mx$G)z_a`;h^;ZtNgMtSM$<$cL2WGH
zOfz>7;2YA37Pic=Ddk=O8tJ+k2z4AOj3*x$UjA*<d=VxH17y-acl<1^T)FeGm7^XJ
zsr^in4J)IloxBVbewudQawWKfc{wRnv^tY3!4<=LL%YRL30*g!4zwE$m55wpG|a-V
z!{RlWUg`2y{J{5kUTBh+f-_=xH3K52suULNoRe)!5hW&Jd3lUGD&`ex1LUI@wUaUo
z*j>$yvF>&>FBQTAX*5V~Fsg$wS4K|~4(Q(*P0;a+4ht0~pqt>Kg+XfIbeXXA%e79b
zp?vJW@5fhoF<Qb#pbqyXCH8!tPFI|uJ&Wu85!|K=1cj_NTB)<Er=wSsq?f>myOPAu
z5J}Jly0X?<eg&(OzG@rw;Q0#kS~w|fL6jk;7&LO}yYty}GMsS^*%cYl<q9TwERd~k
zzl0Zkls2xdSY|axCNPj>OXN03UvEafB|K%~)G{ywZAx}1q77(Z<_LEi2@z9Ru3w`-
zJ3}Kb$7<fH+6q)9hOV&g3diO_pIRFHB*C|IZH{g0{K`XXSWpuc&P;Kk)Xjvfu;NLw
z<8+?c3>Jj4tKJJv_bBHh!r<z0_WdMD^b<hiN&G*z){L!?)l!}ZYP(EOJI4h%a}N~7
z=65#7wMQ_hrsUODma~AZcAV!79^1@b-RUS^uEVMWtxwW{0(L=T-)0z?Cv{9E)fkX(
zptS`5cHO7AOj_!x^GXlio_9S+pC>lMzrR_&$4M@3<Q^xoI&-(raul-ev+Ysf-%;!M
zu>I=w^Aq@k&c&_o4-SuNat@?QGB-u0W!U=LDpHSk2{}!&DPJe5LvU{-l_A*IAg+|_
zhjqKD3asS=Yvq6y*g{J5Nz;Ak!>Z_CYjJi9zfZ0+&f*C#YVePe{lUW;zHkj`?a+i!
z(h^=nSt%oS;sd5bV2RSM(&R|$c<h$p{L`3v8XbPwuXp_>%@P@(0q*tc#F@z~^l%`J
zVV*F@v(Cz?vm`xs{gm=Zk>W*T831Eogy`V5gbgM2F3NA{0+Ptc0h(w;?G|@Q&yO>$
zG+70mEAEw^TMyRcx~PA$V}4p#-5uE?Uv8|2Shd)^4z|^W1t0iX((FZi<(O%gY%TKO
zCa2N1Du;sZ6?2FiKwJ#C8Y<!lJIuMRV->G_AY~?{;AGzii~%=NZsue^gSVJ<*=`o*
zJ?6+?iUyDq!N^d`veWzbGC=E+T~t;+|9Ko1PRwhY9WrR^;+@}|)^-eFsRjx*A&u`D
z>qbrj|EutKP7G#6=~_1+>D}@{Qnxj`lQm;y-k^;Lb>hC5KX`Z8;Oj0-#KEg$6hw|O
z_CPYZC*j{pPWTs^dFfX>ckUx2{feAsERwJps*(x6rzRy`gf&r>7LU!Xxot<Jwq=cc
z!D7j+IjiR@9~>MM3rAlV@+HD7WJwFxP3h9#&v8wpEz0vumgSchmD&<g32~gA4U^}$
z@C$_$LA&#<pq;s6?hZ$Ih;IyA_iqJF%58Xx2|XeY!9!ev&^xQE0D#faY`WJzq1<mA
z%om)Iav$L(t{DN#*w1Vq<?bL>KO=GR<MDa><<Z5DXD={iFhwlJV8zF$z}>dt!^y#a
z{i1XQ=f-5p=Bt$HyfZxZDqvS2V^KgNAAV>!v8}q$fZYg-p{-9@Fy!|=L<izw4lflz
zWcn|UyVQ5eDv~`FXn~D3DWLybQ2%?R`kO7F2`<(DZhiCg>9?N#cYEu*zkSvJev$g$
zr|T3C?RhdB!7b}Q;?C$&?EQ*=8kh8}vGwm)gE;L2@elTkRmhu*sgQF}wzkB7q*J&6
zo~9_Agg)T6e}}6jT(J5{(#;xir!jl`h3TKe3h1Ms4o+SlT%5Jei&W80iGWfXHl?uc
z*moaY6M755A;<bMZUn<5?-l3dL#(esljNVXbfQf^#T0{HvoRK69%a$}gfdu+M*svP
z-Jqc-WkX&=lkE&GiEc3)Krflt`W3u@^+#7QHx%7i@l1MSW-0cHOsL$shF<qj+B|?=
zkyZ=##xqFeKBWD0no9AtSW)4%nZ3+4QzT_BWmb*FSMK_e<M%SSq6zDQm+L`k&BAM)
zR}lDdR6W~wu6#qXkl{c>_FG02qbo|eldQfffMQVAL@D9v^9Q<C30m(ItL?G8SFeeh
zIDp^JMg1}7w#GE>JzB!FbRX9a*0^jPi#?_#fA->xNQYBk$Kx@X`(xgKSzlWJr#+P=
zh0MND)z=^QX8me2=@|uV<(_O?Psn0?D?j6Hj@aK;5#;v{QQD;p4E7AK!Q<1n0v%j?
zO9sZ+-d|W|%M(Hkp3Y#q3Afmh@er&g;Q}Yevt*lj8gz9#NbUQ!ucQ?llU?weWIafi
z#7{R_4>>uXVloy%zS(Q?dyHh@4-ZZbTJ7h@rw6TH&R${U&sMv2bn&`%+P?VZ)lp61
z02`91sTZ~AY*GI*Xbx<+p9I@-@4my%y~28SwR<;6INcz*J5)fsSXT9}Vd(~mo4Jhh
zgnpCFE{oKvAE7VO_=?SN@~afzVb+eliW3o8(A;Bo0hrb;(*x*`_MQ4xN-lTk;e8zC
zHbE@im6~2!<#mYUhNOBh))tDwB2n;`sm&~3EJlW7K$Q*^lDkrWoJjT&Sq%6>3`gwg
zuM}U8hCtaAwk%>Hk&_2T1@BQXp<}$#53j^%sAGidOGYmys}J&wnqcf(j4q8lDu%MP
zm|P%oSQb~nks{2PLY_}2BbA~gtQs-CPJ5zcL9GN-#8B$|Iv&$OtDYIA#nnw?xgd~M
z6(FrJ5L^3mOCaW|e~2^C`+*WhtHLsnQiUC+0+&{0bFaw0==-Tm4b){orf~{UO-?7q
zVNTtwcChyGl(PsWm;emNHzErh%EE=0st_g?zO4BAxF}C6QGVt%Di^u1L{2jXx&bOe
zTeNSh5rFJ?Zr<fuq}!73T$fPmbHUMW^+IpHmOx&a3$mh%zx_sc@8=0je{xED&6PH1
z3+~&2ct_ZZ+Rbw_QnITL(Af}(lJ;HYvx=Xki<;J~?d+fryyHQGV_4fKp=-U)=Ys+#
zMWZXqY7{PONUxO7j6?#wiBlKJ4D(rpW#QVEjUrCSa3kXfOyjo+XT-!C9-2#V4osO9
z3U+WCmdq^YUi>prOBz>=h;ni!lfjJFTU&ZGjg^rZ-9E>oEDgR*rkzF|dpSqRV|7Pu
zVF82IfaS_!pplYKu0z(a0h5;bpck`hSqNSQ!xjlo{q+nb$Us8#qV-+3n6Xuu$YK+_
zm?|9FEBE}7MY+E1vohdfq-i^1;~F%akrp{cpcQ|U+vn_kFw}q^Yb5OPu&!ansQZYo
zv-c+j!a9F2`Ug5x^QxI7BsiH&JF_X7W(tnd*|p>*PIUcDhSyKtcj9UPwxJ|2I}~t%
zBrpsCD-Jlgpk{L%I5#TIiej)*7N@d#EN7==wrWVxos`lq!0EimrgAfjMCDV?6AS1A
z>_8_)+q2<@HQacHmE=Q{pd%i{HvNuPEXK*iL{-7FnoXC%FuG58<r~7*<r3PjV9x>P
z+xetp9dX6c@>Yc99<6a8R=e3$C%x97#A-I_sK@hhAy&;H!m2}n+G5va0OU9w>-UH3
zZH({j02*npR)e>)&ZDM;#L7Q>!G-Asg3#9LREgb-Wt~VhBSBX`spc<vv!n@uI10I%
zX~5QMmaCLW{>dMM=DXeYSGuo;51aNK9kwB2)JU6Ufck1f$LgIE_2Zri6}rDDov!~O
z=ro&Mex=ij`1k|RDRjRgof<-Y))ZRNq$qa20;y1UBsJ<ONsv1f^M)_?$i-mHE@LOE
zukm+TA^IDyg70U;PHv?{c@Ngi*LTcYO^XWUu?fA9mKD1Lko}0ew~(qSFL*>;<zzIG
zgEVWk-%TH34}ImnH4&9iQgQsBU~4f5UL~`9E09o8u3-XN)9W{=RnSXX-uBkag9$Hs
zYRo>D)+_#43TVAS!2L=*(fHhdY<t3?0XSwfLlX`yeKRr*eE|Y8xrAfm*18E{z%kfN
zGTIX2;jPeOgtU<~BHA+O#>E{$iJm)$A$cZ+BO=q<JBc{w5Iqz|n#{GTJ>G-jxBwMu
z?{`Hpy8n0AEf*|$!X`_mtBYy#hz{=10qk31Z0jj5i7oH@ww@PjU0^IUGN~So)mFp1
zQ?5g130iPv7A6G0|3GWpYGPGvtl`RA+o-zGt#g`CqFICa)h4FewsW|<R#1lr7l%Kh
zq3Q7tr)^Z5p1o3*Q8oGW>{ayq_!NHY@7s0p#zwMOc-qU(YJibHnJ1mxGXfi0P>Lmc
zLsPoUxlQBUvm3DB=GG!L^Z-lI^haZU&mc#m#O1~JWp8%oEE5M0W+-#IHLp_^%ED#0
zt-|(PakoEwTv?e<mzoEUD43PK=SI$55~#icG`mK0S7^>1!Gq?!L&@ioLsinjP7;E;
z<cRMIG&pb<sE^G>_>P5ilZ7`Srf7A**&khRvG5TBl)E1ez9wZ=P=GFVk(lF0s<(f>
z0Vcg(D`6KZbTL7fN~K7_7<3)nrId#%R<AdJqH0E*y;x^iUU46Neey7!<xD7aTV1G^
zfgY|e(nEdwUAv#3l{F!bUH$Z^Cq^?&N6Lvi*W_Oo;42Py<(}H#SYi8c4BRX@%romY
z!u(pr<C&Qynj;#DUS-NBye*j|f8xZdJ8@!lV!oS8ka|MFS;>rHwN4AtsEBc`<E*37
zx5io|O8^zr9)?m7kR~neOlNUlJ9Q%@eLYDj>@^7}U^lx;WC|y#UQMTBKz8?+2|8T&
zW~53Tpt3caO%j%e08Fx_PyRUrB1lOZg3%3<uH6tTaVEDc1A%5ITn)u1bhAM)lxaPA
zkH*?$5k9_-b(!gCwBiRvCi%Y+G2zJYDkZ&-er;~<ieLBe-L81@<X4vS;{+%*K<(Fh
zq_Etu9G7aF_2^f$RWqmq;2(+DTBDmk9;M+uCQN8(#DlmeLhF|in${)HBQzB;z$Y{c
zHG2m_n=D6YeUH#43lo}*k!$3f9-*a3Xz3olLqbbELWAeEv>queE6=b<k(xzl6Gdnb
z%nO_$vCZKHF7|b5x(vhXwwda8?DQz3VVCS6fIKjlV(mhjtJIxfO_39FL{g&t&}@W?
zls<e6+S_=%`+nFBT#0Xis&u-9VcMYt%J4Ckxs=fs1|6A3vx!(UY@3h*Cl<>P<&9?(
zJXy_UD@&$$8qZt$aDmz9vtD_o*Xk;}vbUz8=qlc7O;`rrqu?_c(zH!_TGn~OS<Y^*
zN@UROe!ea}33dq>oX^*#^&%`~lG&4#GiW0xFP*8ic@&oOlJCmcC{((Zk;6(j9p3$o
z&QeSlp<Q-}WujK#{oTd6=A~JDvye*;F@mU!Vj^5YD7>+^7Q8p_+cqqphUs6X_t4fo
z_kM}X!1B1CUEX)^kp>Tj-GhbZAQJ8hqjUJq6O+{#&YS@%Eg%`+Vw45r-S;l-CYiRx
zb?tBYRN$o&G0jChd1N`LT}T~_Ea;@<C!Po$@Ep|U!}5pZyy-2>m$}R~L0cFs`Tt5L
zBfN&X^y1B-4BN6*S7DNt?&=a-LZE98(X(al{#B5+zNM0)-l=!@yqrx&h%k1sGM6U2
ze%yU<2Ka|$@Xka|f`h$qjcGu4Y^5k*wttu33WlpIt=6)uwMwg1*c()mWQ-0<frByg
z7H7G1W3@aJXTyu@<Rs}~giQKOnViJOIgz0(v){m5`%-~-3hYR|*);GYjhV=|8dgX!
z=^AzmmT9uoN=WYKWHUy~0ojZlSS@g)7EZ*a!Mirc3)0~V5nbiFQ@W`c3<a-jR&ZUI
z+rv?xK9<9!#MP#AmvEdaFCWty%+QZ<_Bz8$*oze7DIat?NtV^iK+9yB23e7XnTRJu
z?jI#tOzw39p;$HO&d`-h>gA}L?2A9liLA`8&as_1+o2PBIARqm%IHz1+&4~l7X3l3
z%h^;Uah77@Jhs>F@WwEqsKON#xFPUit7#h>+Ppw2)fVGe?~_l^IVl~qJt@Q69TDP~
zkbVNBEPEr#A!YsHkmA}aRz(UqdlcY>-aYgUhZ^~iryUkVu7%NMENtn@;)N|qhHx*z
zSmL-CF*xd|ry{7PofI&;j?7Fq;8c41&|4+C?uNI^Le6Ws&G&=E&mrFCws%ytnuO7S
zj<WQ@jtZ1wFqmwLww>|YWXPEZbKo1-%1vf~(Rz%|m%TRQDVlkq0FHvB16oBqxtbw8
ztI$FT-dEKK*JzA<rqy+{T4g$l#o0bo=eza7tw-WA>BN-m*2Lps-=6M>1mJInqZ_P|
zPRQT5VI7BPQhhOr-zAeQ?z4sU>mj-lr$ctbuBf_bB~#B1+ZiKaI_GtZt=SRz@6i6M
zxbrrdGJum&++~6ab!AK?7w`b~on04lV|AU|ZC6%{fUVE_(w4=T;^ksz<^j(Q>f;oy
z<iVQbNQ#RdvI1J?s`0*m3WrqJ*-!DM9Ie`T!<pw$4)xSE`c#jEV|-5h3%XESOU++d
znX&|DtslyIVpwYHXt#pg5M_#|z2K4E8QJK>Zj^ssjwT7Fk|zz=EqU3It*2Cfshxm{
z7yeAIBaw}aJt?cOrU?edN;BhJM;>&z2XOY4a+Y29237OP1n3dvUY&kCk<cV}*Q0#g
zc856Wb)(S`|3IUC4!;fixZ3m>A2)0xe+jplhDHl*ZZFb-6*<IrBxYKnIHsouC+))@
zj}HH{eRy_$(Z*P2N43-0KxSXkxszVb`iHQS*LRl%04Mtb03^-Fs?ML&o=mKgO}lW3
zO(qkuw<jLMk|br@Ib49c$@AN3k~M{RjPC-ksLGq+<J|zHRxza3VvwNWgT7?Anwl0=
zpY*Faohbs*stDpR9{!}fI-%L71;Tq(6-uiZN~<~)vXg2Cq0p-`I7f67Q#a?PjTZM+
z8IS;C6_84ZrR#jtU+Pq!V+rWG9BowPz{Q}Ws5zfrqZ{yLf(e!>ED16><=oNA!#KM?
zjGucrx7+f$8}2i6Lq1r9-y)^EJAyaGv4C>!>DZR3GA~70GdIcw(v&rG<0?#5wm{0S
zxc~#L79{{H&XDE-SSC?g9z?F7j@gl;n_fEXw(+c#EUR+BD<o>@Gwe;w2^VsBMNkVD
zU@Hf8#fulNgqY4D$T(DFrny9Z87aZ0U!^stieFw6O|!Wvo{7LYt_lYjpB<a#bfj?r
z^W*F)sZeK4lyDy6NggFnazhTwIiH{@k!ocfqff7iViqOhh!T8l0-U&Q?y1%E`m}X+
z0&MmV?UUn|#}`)mJLsp1G2=`bJ=9aBinI!fC{X%LYZ}0-c5XHe(JI+0D5-#I3X~hf
zY$&gBCis*?r0D&LIo?%M^Xj*X>MI%6k8tuu$ZmH>!@o{taGA>yu==q+gvpYPY=nkV
z?m~RHWRdyiEyrU{ZZ$oxP7YwFJ34*+61TjXGZDIU<fCE@h4%<~)T`MySa0KDlBugJ
zP^&@*THy|~s&t?=uLF`=P};2Gd1dVkzU|E&L^QV*75qz1p3nC+Su<hLpH5+@NRWre
znh2eMFqn=p$~AF9P1GKp`0!8Wzz^nUlw<I?#p2YFtE(l6Be#N^O>yE#0#+q$qysF7
z-A5jZ)WCEq%-U>-a@(Yf>{8tz>o>hM032F{-E<mHZUJqG$^W&rR926E6EzQKPI^e_
z=cZj;nFvZ}Och?ngjgG-?=hz*2BmRZ(IiYH;?V)-v~Z~pG3U*`-xmO?MJq*`))R`*
z?t-wd_G+8ST>~j_UVw$N3c5@ZoIHzdF?xq7XuEaiwp%4_Q{(pDM%3sNA7HQ+z|J1<
z5EvCkqR4+;7k>$Cl?Ol9D9oyEv|fKiyvQcZit~KMUn;r2Ndl?0%KO$@sBf);zO_pG
zwyBQ3wZN6Pn`Fu4U81!PB({q=++_nJ5Ozlihd79PJ&LGrg>dTOVy03u48jZ*5pa@f
zRUyp#NJ}v(us+sTu{1Eaf4`?H$MSexk7+SjuGlJyCMrj!n4dQmoofujz#@7rS65B;
zN29Ty$W&XQTLGt>_>2RfK#Jzs%cH3w1K+2N4`dq{9jZkHzzh_Y!G6ZQL|eL3qzLvr
z?o!lH6uBLcp4;0&A|8?32thx1ek=uTYa6+fis!OvAH;>yLD=1_1FfuB?UB9rmzK2I
zL-R2>#BxA}S^kS~`8v%JWRl6Jt@dMKc-xTrQ^scSq})|91UU)DYC3fwrqHPuduW-|
zfHi{VI+U%@nmoYpY=Yv|N=@EyG+dXQCNBlG#L|JzqcFYZl++_kui(|Ex?Ne9HgT0D
zA;masw}$scYj-3r?+|Km9S-zH#G56_ATwDUbTBt1Gsg&}ys%ajg%%|(EW<vH!biI2
zuu-1Ks(_iQ%bhY++1zsuM@xexkhsn6J`9OCDmJAL%c;xgrlY-E%rm)&Dp9vp3+B~L
zURtDg+f%kdwR^HP(m(PN{EQd}y)B@rzP#O6gkxR5BKyW0S>uh2qES9QCY_?e%CXk#
z5)J`-kx@QHMkN$kYLE!gSa--m;`QDq_RCo~Qk13fVPV0^R|Y6_65j=6v}jnv-W8)^
zn~q-{S=trY>eQzT0BPlcOc=<DLvw~o@LkJ!F;Op!YtxSS0runxI+gS+QbKn(RQ*#3
za%qtzrYv*hnJ{H{7q*hCqW#VVQz$h4DAR}UETL&%u#s~0xgV+`ph2IbbH|>@FXah!
ztb&UU;yz+WmxlabL?=(7ovpzb?QUcg@>ZDrjp{z;lyR3Vc$jff*m`Aj<G@XD$*OMS
z&bIpL3{dveNf+fBHH4%f8>k;D*z4h`6FR@=+o-9-L7hhuud8@2nUH^rj%a<WbWHv+
zX<Qk?2O|=-7!T0%Y77kN)*eILm+8k<qDmf2&F=~dA}KYwPJtIbLH26}^{JP2(<RTy
z_!Co+W|F=bDk>V$!%<Pc3N@6HR3uzVoM=icC8A;~`Xu|#*5@aZjWsnpYa0cG!c#RK
z%=%T9`zEgM1(x@wnu<?535Ll3aFej;YT2B>JoF@dwxzInpR=&B2E#{Gs;@9M{A$g{
z#u}i}JZtf@2|rtORvg@Fl=ym1S%7aJI4hrv;=khh)6umOef;jYfmLzO*rhluO1c5C
zFc6E_)9qT}SQg<S)5_dpI)^X(Bune-bkCjc>c>}Fa&Dys4BWlNQ<E{(E+%O_yqcjs
zzK#)ok1cGt_ZF5hP~2OX;N#4+ZpltvLN<k2xfCkc(gj<lpeF07Ze)oRZG{dRrM^UF
zTsem#8`M2`gI8;*$Rm7dZ_y+WWAH)zmkTCuk4$ihoG{E&WKSU4LMk8Nh~hmFrbO5l
zxY^v=*CfSvJnavdPwl8;e=P0+yc{7})9KH&4eCCF3Y4_-<&jZKlMxfjNuhN&(1|;t
z4w-R)#eJ84_+SqakxxJOy{n%m{oX<M*AbH3!*1)sBY9#ht}}n5&+XlTwb62_g?4f@
z&WL$fpKL3)Q2Ee%dBvvwxT~;e{`LyD?-Or*BAPbYt0QQ?w*PzH!E^KbP+NFeLth~E
zoDfkIhmVO0w{36x|K050DzEheZQd}I`H8eoGc&ugBmQF!vgE7?^0L-zqAM%5bBa9<
zCl>HS*0IQFx^o1olG1Q~0qL(0=)E@P4v)gJ<hE>n6p?eT-yK)c={ts$I%XJks}qUF
zch<<KQx6riydM~v(zzD2V!2Ep*nDZPF{^yrc9w@ac6*NFM41gun-RixTW3RT4KK!z
zYGjb(G(lOqRBg1h!xA<rQ2`zHsWfQMZ@(45*Bvc|PobWab#YtSD`p|jfn~@{It(~|
z;GNk7Gm`en^bOso1-8RR;OylsGY9u(Q;tfE#oo)5VA4PQlAn);i3HwO_LpR7-qc}|
zYSv`dSA!k`wMT7!&a=iTEG=9>84XR7Rvwnd;?g~F+pL8s3cI#YXKZW2mvQ!%ogB<e
zhM0Y}6*6T)7sq<oTt4#L4_wcJ-*VkS?*q?A!A4UZ&Gub{h7J9roP~P9Hld4H_NsF>
zY-x^Rz=+jBtWR{C^2I8GGVv4e8^tD|$7qfQ6**WzJpF`z_`Vyx4qLQ{rLN?O923#*
z`vR+6Gj=7^!=h^j1J>g}HswN#yaI!~P_J|5dBialNM6-=f<d$k#%M~C?#>KtjtJD)
z&#_dyWP7)ou!N-yAb{d}I40F+!=Q;c?xWCgbhTZS^C9!xBN+Fc(!B{5Ug`d((5==j
z4^pA0&~H$ouV&0&|FZS8&bIH;FqZa~q>GO!etWHu`LH}k%)xjaPr4mwqbs(aKH~T$
zA9p`7!C6G*0}Uh0X#y$GBEW*-TaD~GrL2vbx*~_CGpMV5TBSQ#_~!bb>9BjJutOFc
zKkSg~TY;tJoFit>RaMb_PC?|;FQ08UWS*&cM)#)s$y?VplQT?nD9t3@X?Ne~sdit`
z#uoIq=l8*TdZFp4J$u!{HBk!CGY<M?eU>d*o0xZlknUtE=x3E%gbrA6pO&K9CNvTz
z43MG*6AI{lxPeOUI=NE|Ju(7}f!?k3e)8wEH^f<Lw+~;xI5<3Pw;$w;t27w3XYYv9
zpAHY%)egfkVYDcWUB@o$ctg><=-cG>W;E&Y4d+HibFxA>C6shk<uPX0ItrNu^VraF
zL=()Tl8r=`4pPE@-0x=`MW$h+8R4w&dt0~^8I@S`M>OvFz8qBo3fmXbX49BWESjaH
z_P|WdTwb>9Pz{V!E$C+eac46sE3vs__+J?%NF8-5t(RP@zW{_ld%vrr&bE`ckG*O~
zU9R7EIBZCKvDjI6|IPm^9d@CE?qsop?tzZEzB%S5zlUS4;5ehB&N~8sx5wOF4-@?L
z5Qm8|%KCbk%sou*S3M)Ql}y~E(`$@oIhe_mlmooIb&`A5*I@s}8#SXV=nDNtrjRjh
zx2f&-I&(LcI+j($b<&5TGVhE|GlSc%9g^TOncgG`h8xX|={+U1i{+OFL8o8^5~LL+
z8rVP#YhvX-=Ky~Q0AHMs<B56$Y$AU>98Ho9{`ktY-MC&M=k5H#Z}f2&irguWgtQ*G
zP~Od5>zp%n<VmeKfkGq(6~mRAuF$}q4~zlX-S=Gy=Va*c0d9p7RRp0}5d<A+u~*xU
zAy}jkwk=N0*wrQyjCxMlCw)z^S9z5%sb0DxwGKojQnQ5BfOR||rKs3-86LtBj%b&O
zKPbt_Gpr#~FrpMYwiACknjEFSiHAyC#A%y!=Qx0d3@MUfl<jFMZ#~$FA%#=;9;&49
z4P%Iy79>DSHoo_~KkB^Qv!(FBfbm25RVZDPtsjB|p`ih$y_(W!<>oUYMZSWty2{bT
zfdT2p(->9aB<6D-qWoUz<qT-f62eM!yTpeTD<V=n9%CSM+<UB@CLIDTbxfoX?j(8i
zM;<AJgqmB?X1-X+-+LcAmc6ZE(+v#XQLF2D74)jngPUiK4o>Th<QB*nSU;q^p0$bC
z6E-;g`hyz@Dof%?=URtcCY4^6P+CzB;GP8zrvtn7e<%O1E+$W&>{`VE@ss^#ux-ke
zGD+gMyKX0dWUqi0M?=^i`Uyr&?u|HR$5hPVROnNT`3daXF$S9Eu<~9n85eIi0U2J0
z<eOKM<Q=?xyJ^28-0Gy#Zz)4JlBiwAm|ef+VFB}wM=Hf`Motl`?TWXLngF`6h=YX>
z0rrbm4)0a@#lM9?9TH6Kkf$=cI42>p%jZK~@=AE?PD>-ZiO021*NGwqlsZ?dd5zb_
ze_KZoO7`Dk$OW2HTq+JUZ%#u~DhxT;k3X*XpVZ%^pC!3%BWRC5bPVy`x&D0U<vFi_
z*C6$#q&lhFXl}0L#^ozy)jl2`X|7yo*af2w)UAL5QV%;QU^|P2aIjlIB1;?XS#Nn*
zE-P4QVJGDY=5&#=9}7t2NSQuVww6;lm6C<?-<0r`)Ll&k(mvVgZphf}ZU&Ke!>x<S
z9HM_mxINLh<s@-)kV}@0{m+5i<00l=A#vaS=b&J_{re+ja`-5r<RYWr&qUk`2`!q0
z&r0QXSMEUWr4vwOGV3mYhn2{v*k&oG(S0bPj5g+@+tv*_7N)&heY(lkQS9-`x#VDs
zJb^Orej*-ch``6so|OoBoZ)<OG0#IwUrI+eH;-vWamSs79VOQ9))<|sSK^MU!j4?!
zRlv<0VaJD+Qb9*<*ts@#cBCkkIcxh%KDZhOx`bZItLCn)_U9tM^6hv<UQHk6g#xxr
zpiB1D@I=<I?|wByMbEIg^{pMaFYa2z_3Z6_b93kI9(BukvA+Ix=gE_|mT-dOK>XT-
zzUwO@Tp{zq2`HQ6+J){d#ZP=&=j)sL{;MrMz^jV~MDo2<AHyoz_3^t^|KsQ5zuax>
z_rTtk?F@f(1~-W9-xY%I&gFh86zhd=GrFIeoZU{`CuRS^_+5jD0LS|o$lmSGomRj-
zFuosA0E+>)aa`Fr?ih`4@a0hom|l^(Ov;<ZmbtC77q1U5+DB;HesFQtI&c5D@~4OU
zNw6F1{QK$OlE44kx1Fb*?%#VG)W3~v(%B#lUJKaA(V%g?OdFf<^Yq(q=`Z|vf49E<
z=DTnHwDryAci(J&yS4T0_MbMl{`SrG=AXpopFY9Q3`lPx#GT>zgZqR0s3f*M4D$+q
z$MIx}0byzSBe*d1afRF@BsDyI^2C_@_0!8qJh@dVXZz&)BI5p40?!*VLGIybd^@4K
zwZppD-rW4V_#saE;)hu>9443XtSk1e;6dX`Klt}_I7#D1Z?bPE**zc4CLK%@+D%ZW
zHXz0HjLmtZA_ErNAXCBkUylAK?e}A>|B77y#0lIK(d_J9Lu@xTDMJ7zHy+>WDz8q&
zP0Z@NZVKCBdO3qdk0AK+sqhquUq&<0iHBlzIYooW5wQR9R9sJ|<DHF-0XN+kO|CWo
z<_5I3(OA)@FTBoM2d6L2Uc%b`Gd#oVgZFsF+bP5cVLww_3+#4*GeA1h2<2T*U)vK}
z!tK$Rwn%R8=;f=6U(oCW_nbr6ZfbsE+*0KY$1xfj!4;FNm+GP>twErVD)=<j-qU%r
zcMFdzFaIwkG9FD-03W|I3L^SCE1NCpyZK!>-Ia7DiX6WxC@y)P<E)j8lXxn<+32bG
zxGU!F=ya#&(PBh;?xDJB8yzv;v*{)eUAs&s8EM~8ngAr+zZDsji@QKs6OOS@*_Lmv
zM}17+0AxX@7XrUx<ukTV*`mmukO1SgUb+<iU${wa{XeUCuRn@ONHC^=5sXc)%!VG)
z+5_!-CNRh;_V7)Nc2g|gc!MjX&%GwKCXE6Yj4lG7p~!47#HF{QGE^{TlCF|Bh?lU>
zqTU*fh@npu+Fvg!K6ugCjXKxRSbGC&u{}wWAs**byUUd30%L!W1|BnN&^~aPu(>jh
zzRMx4*69{0XNJd4TKOJk=i!$YIytAtB<aqET?uoHGM6A$GM%G4qr|#=NWyY3dV1^)
z1BL|xb8YZWACBI~D9qVG`B0}n>n0X16xpL&Jp5~P#$&2;Hk*!TQ>gMTnOx4VHkwU!
zbA&g`g5f&mj3kc}mFE1CB0Q5M1_<XdC0@ayv8}@tCavGcaj6RA);MBHq_bg!hk`;k
zW^YKKn-Z<eMRDF^nfJ_eM&?L?ukR3{uu3o+FgnZ;^vmQrewV_$5!sk(8Gw5?>c2xW
zw^w1up>Z@VCwm(<Ha(re20gh&ey%2i-pZ5_K&GRCe0?@vtrI0tN_LtkY0?S@;ntRF
zjyPuA3Nwpvv)FDgOQwo8BKx^~NL>iNsTT?Av`@XWH(ReIB<*~!KpNlpmEuCieloKR
zD1p7klSA(DHV(NU7P0LUi*|3a%J^8chjMa{Kg=GQ-$HSJplwB_`=QU0$I$yCkCG{L
z$-NO%eYc^DZHAQ<?3hb#m~tBKvSCtDYm;1Pk6dJXTw-hdJlmo~>hEA%Ty~d=ey@8K
z5?kT^L}@vf-<d4RcNsmv`yXt(3iD66m2h=4Qg7ptn}D-N$fy>#qZvUZb4ghLvy8b#
z*#Kq!#xC)Gpg5+5R{4S}{43K_au(e0s^>SKY3Ua(_<-eCAmR678^U=46BGj_ke(^+
zq%>nGdyqJG!6VJfpv|pC>A&MGq3Ehrx;QoMR9>H^hP_0QBw4q8h#AW?{~ZZKor(m#
zC(&XAD8H`;NzYmeHes#N1UD7y!p7sw?%MRxN^pbAlsM`NfDVxj8YVaAbpGHZWnVTC
zEy9x)QI?^Rs=S?7!>6pG%`QwyQ2`|`!|;T>suTsTSYC*QbSNDen1U2hl8#<I+ck7*
z4X&IhvoD?;OSiN)bX8+{4jb#=c#<u<7>RoF#iY<{L26!(qVGc0zejp5k}D8l8(Bgb
zZCBT*X_5M$zW&@n{|6WR_sgmO=IH;v-G2Jrrmz3o`rB9i-&g%#2*A^Ig7Q2Wj^KXv
zA8}`NDfWKFKaERz*4X;@t3jNi`}yF@Rv{j}c>Qx;gD9Uapg@!fLGdpN;gof$7*Ws`
zxVn>;M`bf7qf%M67t6HZ0z-S<zws{;BRZ-eZu_Gvn1XK7NxOLP$Ee)|I=o`Rh0{A?
z9cC(X+C5tb=a0wlXTwfDhOX9WqHOfN)M&y%XCl!C%s62pyY0&)Q)yaqXRwY=YQ6*d
z{ZQFt>}s6vz5Jg1LWZOZ=S-1Cv&1N_=qPqC<M*-_1?W^gI4(=w{>=0os^>1@`3gHu
zh1^A6N0PDz^b$7+ouVG&>^c3DQA8`}h{y1xo+28j81P|NmXYRO#<v{y({7)Fn7X~<
zXsn@rd4Wa^+;gCl#DFb<s}ont8LIC$zgaWWEO8c$fqXHE(RU#3%UR8jM+E@%9(Q$3
zABQ^%kUN07kbvCB#c+y$b|e8(hFzzA;W(w4K=HCcFKC$s3Htft>?*-3D<Qhf*IhF&
z{eaN5;%e(#7%?SGY9>mwP62t64$`TP;rRNrb#?+A_z&%q<Cn)50Eno|U;_sfM3eOX
zqqxbMM(U;5(|r(4#^hep@7FVdBkL#A#1!5Urz8DM$E1a0%$#B`j-J2%p?z?Ee*D9!
zX9(C$E@xLYD^C0rkl0Bk6S21^9{)U^?1;zs5qG<^Tgz{iGNewufsbo-EK4xph9Fa}
zYmg1NVDxJAH63L;JJ+ZaHeWHgQZG&VT_+|j_x-1XmcOPw*t20CdI2D36$eXLQx<&h
zZ4cox`9)q-&RfI5V%`e&U%P}dr%XLR)y|wfX!sZ8H@1OlnRkF8-G8{3HDC@sZe=mP
zLOSKZtwp7KONLj|YmeO+j-ZXU8VB94t>ye<-0yKUSkL@nV_ToBIj+RxJ#>(+P`Ih`
z6mb#&+SbYd(+t`PI*sFArERh92N=SKwa^In7+D%e5<h03(xMleA;|gAO_jkuF;F9k
zz4hXTO0Bi$PI2k977Ti8-=FW%Y)S#msDRio6RN}g2BHX4Mm$~VFY_w>zQ`v>IUMN)
zj^^jLXNV0NM2!$56FplKrLK(de@t9s{}xW8xJ%a@x?PY;kkx!>xdA1TE?fwuF~mP-
z>4dL+Xb!>3J!xEc6HkV)&0(plctTg19^6(*LkRmHyY!*VR9&n$MW0tHj=COf7;ld7
zq4B8*Y_lNMOwmM#qS7rl(W~wx8;et}U@T2(r+xTswrrDiu37da(8a30R-Eh0);(>{
zLGQ@9-MBD!M{A*$%#~gCZaaH}ucBw;+Dd+7zQ^6ZBEZG=d2z2B{ZiX}N)mOoW3Men
zbn1*?ARFElKe*-B-GhQ_{$+uhv6a$7*YJ{9d;t_6JdC`EllHN+r|}ZmP3;#RR}a7j
ze}gwfB?{yXbkt*2A>4(s&m>f{)-kReTX<1A(Yl}qz`oa(0x-8&Z=n{ZDXb(FB=0(J
zdHeVE$i771{p5Wh2Axz$?KsPl$+Tu-?Co!Ij58HxkH0^@9nDx_Oy<;RUp*L4l?Byf
z<$o^wl4QG9?&mNgw+UQHFun5@hm*ousoIK=O~_?=mRE}`DsCaI&ufAfXrKvGf&J*=
zPq0tF8W~u%UZs)HUgpaAhVix7NmNLeC1jbC_i%`879f@IlN#h^R3cLYO8<Dj3#(9o
zdAAjI??=wkYcLSf0jb80>5wZoUV=-t14vLNS{932hk015puMv6`L>(Sm!scd2trN=
z=1SMZs(1m{tB#jnj7qw#>h@&rnzJE4sVkAT*Kw8E=Jhb<3Yu@U8I^nror9lirq@w`
zld0SJZfxw!c9>yn+OeI}Nj7bFdCj$DG1^!2uE_ObxG-7Ul&XvZZP=~VH)`8!V@v$I
z=6)}QSqRbvSOwhCu!}u(5b?z6e_2&gS~Yx(q`jI|#*HlNaCVZIpyH-nNq*L#B0kN*
z%B~4kVzs+(AxFmUq@)>emG37p8p+;_WHT8%RZK9eAn&A7Avzl8qMQ^AVCxP8c#@k&
z?+BE)4sRZ8YTA{B_Uu_#51%xhvf+@A?p(N+MeAL(&?u>WJI0IfP*&8=))MX5Xmr9K
zt}Hr6s~;91LtcQ~eGAX^heo5_mJ{&CWn&s+I$rE{OvuqZIc?^MhA4~JDrNE;<dig{
zWaf8e4$VT>O=}qu>=Lb*VN<LilKzcEHo!~M)Cm1ifOtE-23Di7;)D=Rhn+;{$Z;Gk
zu9G3zY-k%z1aT9y=|eW_T+3F-)-w{_(eSSrf9n>T9gU~y0G>CTjn3nxYnyuF@<`gc
zH{2AmsWAN0vx}o0jw4^7AI3d0L&ok_T;7tCnX&O$OyXiOdvm$QqTWQ}v$2e9(-9P_
z-j#)XfNPw6a;H`@MF0AeE37Y5vao%*%JYrOFtSB#8N*7tPVUB<?reR-+RN=@u87~r
z!3%L6b}CA<JpD~r^QKO6jm;>ufPZb`O><A<&AVY*E4^9!wb|T!W`?qZvAEN{T*u+z
zlP1Sp8@sjx`6H&}go);gz{2DFHA=1hyzFeb8#+Sc-b>9FCp2}{ySnWBAMco0R#s&s
z<h;wwJqA9_Wj`Rd_^_bf@}BPe_oOe9b&6#vT+ARtZE5s(!)h7rk$k(cl4IugE<sFt
zqUMcyA2Zr2@-|zww~x$M!fZw&hrOWczn~wFn!>&9czZ|{b*u}LJGAaTEbNlI{G1)>
zgI{ERPb4<{)(mLP=IOL&BJRdx_#0UCZE3i*zrwC7+yh<HF``u~QPr`t!IflT^FX>e
z*u*METBRiCrUX91B<JW}O0PeBbhFgubI#4@`*cxhHK?3kh4+0<Zae$Z@9!}4s#%Tt
z5AvB{Ll(I}wHjpTy|QP`8~=JW^B%e?Gboy;6Zg+CBhb8_<NW%PK(fu4#ALXRw$!vS
z^ph*}bH{@a4!VENk_l(1F+#mjua_lLl)X$aPYN1hPeubOWT+1N!UD*qmDX92su529
zuh%DOK&_3W?NOVjW+yL1QNgwz*04IQlvw$==BnY5Ic2z-E$j_d@szj)sR@lcSU!dr
ztu%QC?9v`mRG-g<aDAEFskFY##S^SAW#^^!txHT8h@<2wgJj)s!jK&O(#IkTDt|Mn
zl;F`cq~|l2t$4X6SqM<2B`ztO3Fj{q1qlon&H)+f#bT-E@3o&*kW{IA28ZEAS2S79
zRmCF5$foj2vuJH~&!kz&4p)za^NtNicDRh`@*NR+9|Ng`>AQ~8<2giLvlH9|Wc~4E
zG^I!(I~1W&DUR}+u|2Tee6tL5rAoxZdp9!q)~JgQULD`FO^F-Dhof(0a9Jfg7GSjD
zB1~MM3}s0cF6?g3e(JdxKYZ1A6BCo}#Qw6##R>xUJROp+wF(4AURlypP&$1mXqX%h
zuIB@e1SZ|z1@OM3P8w5MK^1SaA)b$>*P=6<Oz`wQeDdVq{!UyE$D~tmv*~R=VW<3S
zc#GDlh2G;cKlIBW9nu!zKSmZ!J~kmeFrRH7earpFuut;ixo+j*Yd<{C=gyTkVsjT}
zMmHEFhOctD3#VN9WIJbHIXMxqed**Ui(NZI9JlGdckR%fC3E8=dzCs5s3-WX*5wLP
z>1+Js{U$7thyzL9&=fo^nE4o3E<gt909b!z_hpBOfM;(y92te<f4q@~&Gte|Q9{q&
zsa?DSs~R{1==GY3)A+QpRMdexl7^vhiy9;<HHsTdK;%BVvy%%J1Tajbpr|Z@7$Gx$
z|HUFx?+bT8X8e8uWM=absdu|$3so4?1lMbXP=+!{8Ky<uWV?M1&gkxxm=~4yHjEyR
ze=&CAlCd)Gh-liWEgVxe67N>mY;KC*eiNY=%DWNvF}~J7St+`At>`!H$QxB>5IVQW
za!BG~(G>FhA(IfqKVf0z(^EN3yT-nBVr&ESi=&hF;n~Yq$0tWEz}uh4llW>7D^Cw#
zaI)*b;~GW?pt#dJVtf0$e-m}(+2&8jU1y*!HWVm6(R6Cu1qLx1Ysb65bgB$gr88P7
z-XB7$98HW0`+;#JKg07CDf;;w>y;_?6D-)RWfrW;@G&R+C2_8ZLwk3PMA}Fi_Fjx1
z-Dw}nb1#K1rSa7W+O~F9eR{z@a7nr@$wWmY-Ww(;J&PwP#*+Hy3|KCAS5^?shVa1i
zYpBk8Cuytf*fSk%P(K%qUL?bLeE#@9y(Odn%!~i?&F0g;ZF%v3o<80B?rZ#?FEIYk
z*7n!@A97uP9|+y^{arJEw5=d)CWB-9Y)omHEE!t+9LCp8Scqm4HsaO?OMNd7$H1_w
z3diJM0(cpZ7Xe`F-u-IA`)(_=EHJ`+^pxtdQPDK*JBpm8x~z0PzlqZ%n?An<jA=cO
ztr%Gnc)$U!@K{qhUf}HPTq{MoZ5H)ooZ0#)$H@FT&e~mi*^cS0BRrWZ<9C?{@@&$@
z&=lBC)1MdAWU=DeRYCz{V}%JSC{e5Bpb>sol?WA0{F_bB4ZwM;4fR>oMfg-*rI0fk
z!%7=Q4_u$bW|KS@ndvGDMg#p_6rk%087NoWRBeH_HdQ2``3*LkTN+~|EO#+hK4sMr
zV}UNi^fFad4-3nw;YrKVNLow{nck&4V_pzvtv(Lk4O>mufP$muW;A&lPewCJr~3|d
z$ry+X1Cw6Pt}tNf%k!TOfe-9cpm2FMVP=DPI88Ztng$15z%{rA=V)6AcoBr@mm9Tn
zzhUaqu}CqE=_s6xCn=>KzLlMV!!AY1F~@D^4s>AOkruRZmtqgVc@IOqPBOZmNziZz
zK9&fvw=*KfQk20ue9RFF2Jzd3F@ezy8V%L)0ND9Z)m-W)icqOX%EidtLmU81G{&um
zNUoOa%RYC;9*)q-ph;?vUOC9^cR7A-%G?4WyweB%C&HlbCbPvPfo;13WN>*aMR^<<
z!k8X7hDg|NYuA#yw9K??_Ws;%DR2CRmTQ0u5_2g$QX9UtfPYZHW|H6d;bCrvg=e`v
z1fF@5h2Y#l+QzpfB6E%Hm@+zh9gpmr+bn4#0P7jW+@OalD&AD`2yB!PHahgJ3=L!C
z-2cRWj`h*6$anE$8E_evR{~uzpnzR)nh6^xmfTnu;GrsAfWvH-w!yN%`CS+I&9=lk
zJwIx{dj0(5_>e?St?v&Gk7^PH^{_>|4BT#;T2L=)mI~JqiBvc((fV#vJQIU<Mlaby
zqlPA5TWpOwzbhj{^HsW1bWr8GLU3z!JMdhXb9yFhY2Jz#98U%pw3sAaffZVI1zYu1
zGJZKxvhEJeZf!HlS$t{9)w9i=yg69T%*!sGdubo!jJ~~n(H$kc^Tz%D=tf94NkN)V
zyo@Tr8Kabwv-OkdUj+pohs_tWGfnwg*PRUpw=$~vj%-U!MkSWS?tYc>xyVyhD7L7P
zGM@5-V*+w~`r}dS_(F!-dwFzv(LQ^1)S?`-))KVL3ao`KF<)tTRl2t?A@cqZyx|x-
zi2WcjQiwoWcDP5^Oz~BUTW{=&(_a355g^$u*C)eK4JXDF4GJRjkQn1Ktg5VPcXlvJ
ze2vYc+nqR@?%@#E_DS*%d<QKVs@#pIF-kADA_k^~@}4zrsQZB>^trlk^&Q5N>;4@9
zt6Upae{Dy0z>S&rq8ld0U42yTzRE#b9hmvM=A&kn`>OVdkwafqdrI9ReIBuMs6`dG
zgH6~76uR?>4!2fqH$%vYF*a2FsuG;Fde`#<ukXrfrNhVcT5ra?D%Op7qgf}`2f#%J
zE@WeYlIr%$QFqo)jI!WzlEMadP?oX_-i^n~p5uK=S)f80z1~bYC4|plk)1Gb_Hc;c
zHNwg@s$`4U0I>Z?+YTfpp325aC+($))TWJy9kUFS;ZAsYJH;ei$5W{?*+Cu3pJil&
zt?j=@BC?-6{nq*V&9?J(>#6hg?}!%suCGXJhv5I9t&=)z;qDWtp$29#jiWJh@01lp
z4O5=X4e|U|L5y%hDdrFsh{>)Yy_$_?Seh;(fRK?iieQtvF3{7EW{{opBv>WhSPPKo
z<qWt6xZ#X3@d>9WLme!e?{RfegR30fjy3#xqX686rNbJ*JrL@56PzqL)>}$^CI?#x
zL)wO+Pb*N#buvkmDHz5%M?lE4=qOBx07kGrH!<%=qOz1`P!af|hMNM77Y&^6@31o>
zRtE7mlXRK@H#J<>vVF25Bf^xa+gK7kB9f=Xw^qX7n`_h^^f9)pGFh?Hq>NKF@&lY;
z;NBPvVGS<Gu*i_Yla!41V`-jfElzY55&d=I&~~Kq5TniGrwxo#wm!l+m3ig31ThD_
z(P#@fn?2s?f9~xI4!gUi>SB(XbxMB6t%iYYOB*r-#h*05RRvMJNrqk3Ew(!uN<yXX
zVbH8dJ{Viw<$2CvYO~(0RH7nF0~^d#D>FC-rNkcHa5*+V%7zGIxVgqsB9YA!ov;pQ
z<mw8jNiI%%t;NE{49`Xn3ZoAps$;vXF3D|{&?!p4gYhaj62G-l2gzVGxfT8R7Dmmj
zo}_OR?5S##HL2k?Ijhe4fDC831G${qrUN(Eqb!ki{+gkq2W1u{13l9x+;?CINtgJ2
zouYa?0%(_MKb_Jl;NTmKee@Pl+LrapWO|bT@h5TTTDE0rzwK>DuUR+?V)Eq#-VXO0
zEa+}9s4&azG1N7=ms|m9Vuk5-JPf4XZun%zbjehf_AElP@TPA~tSlhPx<n@$DVE|K
ztz;R+sSDkFhfc0TnKJP*VId;zTT<4kHV>ZtA8|9o3CTtSvqAamSkZ(y;MJt)QpEh7
zmU7&<2AnBQ!S;zg`%hEq0QtmRp?oPoW3berwVGf*A7Px;PK-G%X<w!Z=V^S@+)@Cy
zmjWp3P?%Y<_5M`tjJ%^FSixDiP-0Ch?ezVYER<`xfl}xUs9N$;BqC0bS_Byh!al6$
z(OR&2N)5p6wb&nqmN_RjL6wq--9eg>MwVP5iPg5FZ?T6cnIC!ObGs-Ffu!p|`9#qf
zr{$FEL~kFwx^b&DL@LW9L(03^l}5W1J4Z#>$y;t`7Tw~+nB3pYHb_3vrGrJ!rEi>b
zDcX!IZg~#{i|SaRvc<F|N!msmGijq6*i*WSW+l$-)k%=WfOPYh7+2$J2%H&U45JKX
zz&)tV2bYVcSQMI&*En(}$Tz{9qfaH~;&GzwMkY!!rz(Zr5jG6!AQCk64)@z`7io!3
zuo*|0M3TY{$lk-Gk*mL*15?b$O+#Fai2Lf`9wRM1;T()eGtXlLz6aL^xw=D1I6t~t
z=!xfl%wZszJ<l;ha06{zHG;ED{cK#j^J{So`nX#a2XazNN&zVtD$q6j);i<yj?+;i
zHbIvAtaq@l8zED0*^+#Ta7j|Y+-RfGK;WH8{6DuG;ra?s3M3sW^Rd#<#0S&bfpl_N
zY^Y%f{+6Ikz`T5O=t4Ti`|}&v6M!JbX~))IWJLyIL5qpJ!Gx`&x38aCY_GHfIK&)G
z4KHu{FrL7@ahkXZfpX$_fVz+2PT{lEm=tR*?EDHj;WYq~b5WG9kL70-?)w4d0epvD
zp`G^jC>$Sm7zUx%6LB7vZXvJGsYjN19W=0pp`d6LSo#KoC=2UG+c00>P+ws%u2Igb
zgVy=ci}v$h+Aq$2{so@g8Pk6x96RzuWi5Gd0jN7TKR;?+)Ru2foL`)^4t_XlpB(&h
z_WA<QdvxADJ-cYXJh(XgQHvjyJCTy<vOLi7cu}&&yOV$3VE1=BDSy@iK9qhP4`d7j
zjVRQ5puf7gwOLFWY<d+CW}&ADF{&@pM!f>9I6CY;UEnPg(08DXiyx2A+ebgYY8{=Q
zAD^AJkC`+M+9y99oV-42w~sIIS?lQFMf>dZ<QGjYpM^$4jXtsrX1o<2Ej_y6VxX@i
zKX2#Pg+=-Els&Nj$#@*fFUDLV@%Y40IVkxI@E3PEFQxe8qSE<Tbhs`8L!PnK57v7=
zWWI#0$}`(2Jv=Ziq{-ie_n*Bz2H3t%`s2j5@9_;zFsWDcxTHeP%4BYhoUIDhcr$#w
z8}4XjBQ$~|);axHHG8xLn=aJr7B^j9g_`Duiw;<8_0A;<EE(Q<N+}sWt+MR0uIvZo
zw+}|mPe$4H;g(&aQxQt8=oKkc+AIc%@In{UEn_xRP9x|_((jKtbO*&Mb}#eG@eF%_
zYkPrF&}!5b^_V{7P)sit!LbG!Lb9u!pRYErpg89t`RenKU6sorhR!FK9v-f9B4pv^
zcF!^(16n{$xYVV!ha-%q9cML7L*9V7Y=JXlc<>5T!CaA+$JZLFI)$+p3SEM}zdwhd
z>i$2-bi!mV;RRIg|FgCIw{O02{eQOM>DT<<Uu6F8e_P*%x&7({Bv<k)U>XlDjsmVg
z=1p~np!|l*vbGCp6f0Vzo9`)QH+?%C_4!-g&{Zz>J*&Q{YwDr<&yI0(QDqBER!xB_
zq4Dxi(G-;gdjE-$-K@`*>;F8SP+j@AAQW@2oh5w|X1ldXM{k~-Qn;Q17#N(ct(6md
zek}#99Rg?h?}UQw>4`87!qS1lMsTiXT^BF7H9QmL?{`egT4PtxmI^JGkN2l(r?m5V
zE6}@lUbpOZJ{)y$V}q^!ZxR_M7xf466>fYong>VG6^|Z0DvOM`STw|f2#8MXz)(YI
z)wjADg!%|xJri3iIedf)8#uWp7|v!Mjx%v82BK{VC(})DM#gSWJ5}&q)QRHA`|L2e
z;<!iBcIKJmwdX|FvQ3<KsD~ekrD8{Rsj`t02)VCSHXA!~tPW`AXZ>l4+Q5kfrZS`D
z49s$7V+K*Va8OF|%c^n>nv!N40A~GJ)&R*knWn5cL$xT+n4ln*#$4)MGMNIzzR>PZ
zk<yOJb>o_EnZ>ebOV_9Z#E3NW+VTyPv?-9(xh{`*9t!7TWv?5u##i{ZB3da>Xu*$n
z#R0~^l<iH~)osm@kddtr+D);6#4Ia`t7QQ%(mho?o<*K~wPeg(TS)SY!Q9%It$a@`
zaEkM3<cZ3PeM2;47Nt4|Snz;c6lm^P3?Gz1VO^jF4>NX4L==@pda_=9UW4-dhl^)`
zgE;lV{OuOXC4h5I0iMlR1#iu^1Ybuu*1eUN=sdJlxpK6nGF(adNlm`Mc*he(F*Qv)
z^t_|!r=}@sz6z;U;?|Z!AN4zrz$kzhtxYJAh+h17gtGkB@z3qEmMxOM`0?PP{ma?w
z_ABh=3nTge@!+SUHdceeM~4^i>f)DINA-}2y~fQWJ8Fx@jD9*#LbBAHSD)(0gxr<e
zfQgN2&*6Mxm=q90=U<T+iUgqcB$dZHW*kd@2RKVCR5xfeDs*tAR*E7b%U0y;HGAUh
zR!W*-kh$HsICv>jfpA00tDfwNLUCE{1kiQd<b#q*lp%LJ?X_i2j6K3dWKPw68><V0
zvCc~iZQ3b;IRH7nAOqv^ExAB-rD3r&Gf_D<mZ{+f$YQfIG|4?1J8Vi6W?vWq^c#u<
zSm|oPJzWk*coGODSyw!ZN-Yp#SPG(j8eT3JkEt45C%Wdx<)+m#6DBiXAh>$P>3*U3
z<$t91{Ivm7<+EN^@vf6!ea6k&H+o^+Gd01wwB3(Aud1oFv!k0e@v2v<*7ZnO^!Q-P
zlgGXE${a4Wc4F;=red@Cle~vL<F4#%*)M<8S10{?@o-^AZg|h~e;nuY+n?~1SWpKr
zNB+OL_1&f~|9|?;)35UXukwGE{})1eo($3O_&?&#=u+(cihmlH^sKS<?^gp1Jkl8r
zzHk-7#p~AT^IzDQD6d9v-{w^b)kBLF2F0*81}ij1ATF3Mnm1>+*+p4(U`fCMYyU|8
zI!R=-RAs>LNDP&AI@)MkK2&TxFzP8d_~v<k)Oov(^RVuKu+DYq4U;)uL6k6|CkDy%
zdem(whzI>_6d4)u4LS~~TJRSwnaQh$17))BPd1(PdJRXMSz81_QpoDwZGICN&HFhy
z<siQdXyUxsxok9AJPO<mxe7QFs05MdDug2<TYMV{z+7n@tift>rDEh|Fy!`0MCS8M
zNbDFuDZ+Jkd8^z<ba8Sa8Ka_HURYH#hDGO#25dCYU?#2rLDFfKYN-o`alu6h2!sUA
zJK|W*6~@w@psQC`++5${9Ac~|UYxGS<qq_f66N8mY5X?9VN6JrD8u@Ma4()c!&VQ5
z*-bLp2^TSeg1RXD?2sP~=l4_Tpq&&%FzIEyHc1VKQOL4guUf_)MW8QOx)FeIt9~d^
zuZ2id#@MNnQNl>(t**?K&Tmu+tK$N!Brgr`WKz&zli<j<CsBdB*qUD_CR;4Xj)grt
z|JK5iO=}1XOE;~$C8mqnw1Py6%y^K<E=D!1vMRdS)RhR>u^^m8fhs7(`o08!RSSh5
zRm3Y5nzHju&21$tUa8QE$wsv@&+|YX<gbS_juMVCse{&0`=a&B@#zoki?jB@;o;Hw
zdHbM!4i8R_+J|RH-+zC6c#PJ*$EW!G)zRVc!HF&2`p1t)2m^&&$`bkH<m?~V_R-<N
z>vQ?;A7`&m@Dae#Fj-(it4noCN%z!uDSuR7S7nksyU41}WYP?jmZ+qg^6tlcdR_y?
zN-&k1iM2^WO9b}RsXZU6wwe$%-6gQ2`)R?0ji!VWa0$pofkn%K{4=GM&`@=l_wvwa
z1!k1`z5K8$Lb4j41l+CH0i;YTX?JB~O0_LG2Jn91ZMryzd0V_K7do-pF08vI*oETc
zw4CQa9{|kCNFV5IL`)Dd`{+m=d;FE_J<F#ECeU=wc@<Ysv$3dUlxYxm#P=xf#t2r>
zNn$rK;>#qtN(afV*P4|tk>Z*t2STeCz>W1Wz-0xF-vnu_!t|Tci=BTizfsVVmE?2D
zQ`X{eY$&GJHVdTB(DJ0B1?farh4!97tqSLJsoCL;tmsBMY*n?8NeZ0QD<L4cJe)Tl
zSKNGI*5GuHDsXIz2%_AZ=byg*{4Rg0%m2_NC|N28z#RGCcTd0d<NyEdyRC1(%KyGZ
z`QOuZ`1SV%BzFPfpJ=6U0Y^)d$@^p?_7W`r?-P1Bpl2nOzuF($!o#1A;NB4s8>;7b
zN=9Ir9r?;2UxrAVi*2W575-{u{27dlmo)F4Z&+JoC~MbTIE%Pfxu9hwZq4Dt*sa!V
zW76Nm2^mG8B-Myb0}&*2PS{AGOQn$X%+Ga-h6z7yb4pn>Xz3COGl!=n3|2Z@Ow=et
zHRy*JaLior#u8?Y6aLQYURhW6tWl6p)&5;O_g;0WgryXo6p*@g2kTrpFt!Voq08WZ
z76&h3kPF{9Gpn@BmErKLrW}BJ(-OR&vSivxIpFkCC7#s=vPwT338M^-a0Hn2RYT8~
zv7Zf~W{#n(lgw2T8r#YGMg;Pm%v+ncqg~8~HrQ@d*67AUr;qTdt&(uK{D`tFXqUE1
zr4|;J+ET6iwn#;294(8d!5G}{25hyQgfqMyU^Y7^i-Fu~<bkR45}*@>&fSv1#;x2O
z9+@W9dpTBkj5;&3)|ySdZt5~^iu2N4J9Q5nw3!2E7GJ$GxU*g_OQzU4bo($}#gqwV
zV9#v)?1(M5HulBT#&1;Ha&`#Ub`G4}XuJ-u6y=&JMyWG~ssL;xI>V@kGC$a)T68!`
zcU<pyb<5}M!b{{e+I*I`5<boX_R#@jSUBbu3AiB_>GCQJ&qDN0iVDvTba$@*ep&7X
zW8y2JuWRs5g*lM|JnOLF#ueo89aeXhpck2<icoi5v(k_yYDu$JZ5^9h<;|RT2%B`z
zRjfZ*`cmEYlgnGif2B5>a-OXhd6~W1(u~1d*a}k}%Z~M>SJgMPgzK%s49}Df(T9`Y
zHT`K|z6P?bh{fUS7YE7<X@bd@cmgggR^u_;izx%oV-?Yc9hs=99b`b{=gdvOts<)u
zAA><vGs&y~X<WvaeT;uPrSPmHl}`!nPpld4dhx?;Po)NIjW<{<VjTfjQGBH|UBFP`
z&?sCIdz_n^!W42osz?Q~9WrAD!(8`<Y4?@7MM99y^(gHmb~Hy74qAuB?Ts*{28BZG
zs=PZ;VF>)}REBL8>!MFKGZf4eAPfPQ-H{pq8uA;~mc<qg!#`^C8fj%g#uPg$7$F5$
z6azTJ2Br>)v~i<y;#@*EX!HlKj-?-AmglT96BhJZnt1dmptKuz6kw$?n@A>lGoVwB
zTRxCBAepFj;trw*E*`@!u9hr|L^AC(>ZRtj)SctutG-NPZSYWOMUo!9WSwcqD!njo
z=~A6-lnK0pU&la9O?fJ8Z1a}aPkP9AWHo$SZ)7*|xJJ}sJ=DFH+c-2NTl&j(J(qjJ
zG`0<`aaUApsBG;^s1(hYipgfpt7)9})hoa8k1PC721N@D&~0)$8lyD99^d6?IvowH
zr=bBdl=%a6(J8R?q5+y$^9NX@zO+?!VX4|XO@rznXs1KkA1Jn_EhMN~XHRj1QtCFx
z_<P?-4v2zLSm2frt4@v<XZnn0Od%DY1f~%m=cW>-1BX%b@#=|3Dv#W-v7c(}E<xdw
z&>AHy`RjCaQ!@6s##r-@&fxau9;CJd1p7bO?hjv&*MGs|HS8U|;L$k8y@b|Ub;pp<
z>1n*c=4=TLC|KT(yGK!s#s&R2lm_N^qW?W7WyNwS;OFRnzj?a(t*`(6=4<}<FH`^f
z_pka_qkla=IypML$f;lT<DmA{(wFd}TvA;k4f@kgiZ*PNO2#_q5f?n&uq94zwb7<{
zwqwAmSRpb&Yl%ALO){|3=Gx|Gq+Kz|j!=J=#6wGCqnf#Z^LS1RbYy#sfvkN5mxVdC
zDcfEk>sLh`%hLF)9<GEn#s#rTmX1>*q57d_5#QPfGZOx5M5umk#DQ>hSB*^;pc^|n
zrZe5v3wI`cQvzNRdqLPfNDq=@XW*7C_kr-rme?H-`l=cg<iJ)jEC>u-3oE^MC8h;#
z(;9$zTvFG?s;+AuSGGY2j%A@(V}bgMStSHPENq@o1Z9q$LTxr=^I%*@b$?Ej$%2pX
zKnN~7B-VqiL;FaoqEWrmN|io2z9woR7o7$5v)tbW)w5hlDt@F)FYGvTZtubYq^yd#
z-Eki~!<SLf9mHXsseTeD?&1}TC;EvU2u3$t%XNanzhEOZyDfT(uxfdb!4$Pt8!8A&
zvne5~2dk^1s2H{(ALtmph47_T%)plqJot7og74&?{a8MybCh}hD3m^L4!9zHp5N)+
zdEK+zUgTkA`Mii!MFsjbr00Wss)tUjnMS|Xjb>Q)OyXCS&~sxl)>?%nmfga48_5C#
znAiOUI$x1SN*24f%E}b3)=(B{*54}3;w^6s%PrE5zsH#Epu8O{S3xOT#_;4YGmpWs
zM>mh5Q9VS-s;Bu?LdF})z9t2t=59d5O@2ozPvo1}MnAqLMm;rhQ+mir)u6D2LxQEV
zfmYNfGs|CDoh*y{p=A6^=*D9HV%7yCa**vS!Q|>@enHi+Yde)f+OjHF98qvm=6*_V
zLe|s-;>=VWL(NVXpnGK}5iqSZ@-d6hbVuDg=ic-3S39`PIf+sOcVn6NGZq<dxnw5$
z2`-q?J<?ot@)Duyc$45%v9{~lLld88Cgb#nk3XhB{=66kRvu-IdN`LxgJ7gqHDW#`
zvikav<m(BB{;=d*d3O|{Dxt}Y>;j?!0fzxA+CxN3E4`IZvV~S2r?bEM1RSjFk*XP5
z>Hb??$D_r(e+2(OKzr|?`|Aj<$3LaX%|eQxa{bRY-+cS@Z(jT_`195O@5_w;^;Pww
z7XSALFaG1~`1C@Cp$%w(awRG#gzQy1S2I+m9YWKHm|>~=3@4cs@Hrk{&G6(@<Pyug
zlfI+PTt6GT8s&9XW)9YcDRb07y75=owo37x;tdFIOMgD0)YT*Y@_IZ@CLv8u8r0xq
zNw2@am@Oq0Q3wX38p<JMEaX??w1?BE=5k|A08Zw@?llyQt4RODSTM+>T$S^i-&iK|
zjIia{;`R7s6wun(bnAH8P<gSxfD34`bLDU}<UrExh21P&&l%Uj(tm!4DohugwFF$d
zl-+qwtahk|?`M50(6y&Rd5L>tlkg>ev$q0EtDqek(KkouWX;FU7SD|DqsiT&6ca@Q
zw)`l6OeRS;g`@QrAY{L}jXNGBQ22jq?8~8p)ItBfuyyE+NhgecQm>mdB0%@B<a*S&
z393fA+6nMxw*DhPs_k-G6-9fiR&uz@C`Zlh067<HBCOKFooiF3yg@rh>HO#@e&u^i
z=`bI(B%HDp6k0=W<${C?bT0DQc43`5_D;<4aA1R!o4v}Fyu`>{ov#SPah%infM7Yb
z{bhFnU%nt@xWN26yK;gPyE|2&fW(>O$tO3ID$@!j)rC)O5*4aZQC06^5)ZTS2<}zf
zY^_EgRt25mm_>+BcG!*T;y3e}p5kjNN3TQvWpnpdCm>Ksggf7R%o|p%n*SKfyze=0
zz<p;3SODJ0<h?Bc>y%mmdcgH80NwRBQ|bxY20X7~8Ol^r9=yMaCqu(|jwao72+Jd5
z!=#=HP09?L47bqrc|rwkWB#ZDnktw<D;GydGa|cwopj!|tv<HZLaPx7th^g`ag*99
zuh={^VR*UEjVV9UosX?~sYYb7^_sU<{dBH(MJ@KP1iPAFE*-e<h3cGjtHQB0E?Nqe
zl6Vw0QRqJzY^S)HMH(m`0;29n>&`4nQZatv0+dNFjt%*2Jb`O2?CI(}pA)Na9;RL~
zIR6BT@=%na$(rL#Y?4JAA?M^eD^Y(|YOzL|!3aS$++c_zj64@fjJP~P7ek)(h^vf4
zyK>~S1PkJ@f1O1}{%u2I{FY(1v>&y;jnW8V8P_bC!;r^aZjj(z1f)Wo_ou^ylY>_K
z`SIyN>zA`v6r)u}&7;IZH7Q27?hbNd>JeX|L%34gWiei0l7o1uWoz)Nkzbnih8FpG
zB7r^XM=eiruaZd*WpuH~ZU=(|2G9|}-MtmtVm9s~Bq_WI$e5JEd&e9(z5l~ZdXxV=
z5pUxD_&OFd@$SFZ;46&u*BU>oi+5u7-&DucH+p$}B$3kHWMrX&y21q|OvI&<p|X9(
zTK5isesif!m8FN1EEkDu_Bp~~-AbvH1{e!9NZayRI?lwyTXJ_sXTgRz8%mF&0qm7<
z&h}<~Yl&)zN(N<lG@+P>kSn8!LOUe;LEkaB)po7{Mj}Jk+oQ!pA1CKERnD(Dxkg^2
zl1u78-1ECogY`NR{-dADHrxCfuGB3ti$u}upWyYoNVtoGsi!Hw*S2uaoKL&|d9Sov
z1F}Lwty2*;6d_te!DKFSQDD{<CF;`=rLK)hTt1eAEbAV0ZYH~TE5+UHi>E;X`ub8T
z4)kTb(Pj;ams<U9<W@4P3)A9*5?&?BDvk`{h?n0%58ypqn$g;c<<G=y&n6Q*#BI8x
zvIKxd&joz8v$K&HJwfkEptp-jO4n$dU7c^h_$ZRVF55Ap^oTy<9ji0#-&&7e57Q}s
zH8ZV*@opvS5mutFc``XB(<jwEP2!nxf%GNvK-)u2{*mRr$@wajBdJQ|CBaJ&KS@Ng
z7b6M1%oLd+X*;VZTvCvRXp)g&o241F8_zP?E}I?Xn%>jG4_g+yFQbr!7m<BYn>x@;
z&Mvo=f~Dd}JSE1<R!cx4rOoW({$5RjSgjE~{KoQ`db3$WcENjEZfT`*HM+R6I3m^C
zWxxalLUI?d>;_a5I>Z;Dz{7Wdk-t|g*+GdSQ6%)srkafMlE3L-yj4?|D}9aq)V<8%
z*XrJO4g2Wdu>ZBG4N<b~<&2dA&G6--P{128+@R>USpFjmYY9LVk&U)$_8Ss8crq1;
zoa#W8nk8V-b&YJV92b>wZV8_EMXo?Gel(l2Fm`f{2oIKvnOt#$7hP{YR)Y8lY#BaA
z0!8E^4IxFJeC)_S7a;-Y!h%s%v}eXt_xH9r!^OQPzcWhA2g>7`Tx3Zuv|anSFDh3|
zls5jCShT^`L=I?3giuu55UT|yARjeX8tzR#eA)o5ZMtcEMPW1C`)|p0zJLi-(OF$^
zdG?PPxXash%<-XQ-`-q{ZqgZ%C}uvlrdl+7+6&c((y1yYBvs#<G1Ex#ge?NlUm<9#
zYktJx^{Ag@Z*Rqb42*>JZpD&_ot#|Qi8Yiuyx-RwpPlXJ<V8X8u3g^g2bo2&2}4m&
zkk(oWAX^Dh{w&ooMcUuf+2FzpTT1oum+|pcfE|0DM0Y1M+o7w^-ZNmLywcP$&B*(U
zimiQX$XQOG8z!V+4X>}(WQ2j3Zfhyz>1Z;r=4Fwb-t0xQF-O`Q<SK*qMF7g+(z-Is
z2HR4W+UAvjP)jAkHXEocIG~C95LM?uOW8Gm|A4>rYBl4LW3Y#H_wfW}-q1cRbVuqi
z9l6$i#c7r>VUZGCz_#le6<XW-b(~4b!(@=dPa|sVGa~4?;({Ea&jPoO5&4{qifBvO
zRVf;+#&~6Eh}o=j&4Mv=2@iEa?Bneo)<@cHh?*IcYm4KazfeI%vP3QD2r%}R;zD5#
z%=#i$b9^cH6EU^t+t11CMA%YJ8Y#Blro(P~I%*rh_++olZUrL{z?MV-3H)dTR(H;?
zW7t~gw--KNtyPZXbe>Gl#(=eSYWtYK$fifCOW68__3nJqDSY=m$|F^hdFf7~Jk-tI
zz0DrpmY?`rAl{@;dJTKUM0W4;eaiPlwXd7DjKyG+g+<mWY&<aLz)<xFbrT=l3{Pq<
z9;i8OVwR&RUIhSFX1@~VV@Y{vQ<XO#vAi9He9-RlndCmqCY}7l9`|^6#bo3x?X5H7
z9O0Gs+^y?BD62a+K?#>L$>x|55&i?2n=F`~Y(Bx)a^qj^lD86{SN;H30paSUBn+{N
z_>Qc(ri`CQR^l^CS4Lo|I+wS~P^QS>04TH@clxl{Vn)YPWWRrP5~%<HQ2J5R*kwq}
z!&MGr;UNiG)-k4-rlW@4KGC6T@_|(%VQz+%Dng|QDnrHLY>>R4vc!e%mojYQe2geg
z-NsDXHQ11n3ab-oH-Z9H;j4Ymf-kX7Z4_XeGKgS6g~uudAt^^uZyNXeN%wJmw_;Gr
zd;3{g6UVtMj)q~AH^gk^9-G|Aht=d&V+DpPHMy_qAQ)hPy|deM>C-BGy6t)FgSiv1
zhJR;=!hHt;<-45o*t1>rE4Eb&I5n54&$Oy`1N*b?sK)}Tv>927qSF#tlm>h|Y=xWW
z(~{Gnk->f559PILMC-vS`{vsiUBpS>saNQ&?|TS6G>P3Obp`2aDUw>qPc!F;^)HwI
z5pjHon$PDc#;-dYFW>=M?*F;@&9~ou=lg$t`_0x@|IaVd|MP47PuB_b@E^zLN7@Q7
z<OJ%M2s?pFVGCXNI6X93N=P<WiX;C@1`K~KP3*07EUmNCAKJ&KC&#BpqEa&NwWeo;
zXeI8>-D|YBeW4469nB4yM(f%_8IafaEeC$9W}yQ=cgL2o4rU5!do-59-AKxK%hHN3
zi@k%%B)%=c-jhEZ_Tp~$cZ0q4KOFX8-}<E3D_gl6e>m)Q(|1c@Z!xaV-+BLq^M9X@
z`tO#@{yjhc_m<E9ZT$`Y`^x`)f&Aarw_jO5-d@g5e#$4_R!{RRUU=ij>}M#QV=D#q
zymg&<v3hNtjS>Xlf<C5nKteb%MDRuBnDVc(WY!(o0vR16U53~KaQI2549R8Tb%v1(
z5F)KRfWo?}97=s?<{dCF>S-v#2$A3VHTrO5X*XFktouO&N08K>-$QoWjbcqHfuNS8
zPEU3Xur|PNq;o|Ir#yYA&}5N3p$tb$Bntf=%N3#GkDww?)Hv|?JD6QS7G|PcoSMSK
zBsG$D&v%!gRjt~z0qwb!R7Yqmu0m5<Qut7nf+b+E>3}Yb3hw-9pbHgt_?xpaCC!=N
zpW@));Z{QB_Z_Yb7Wv0J&5Tkt;{<s#qst1`bjqSe%{+fni$g6a*uLi38eEZf`+=MF
zx!zm{#*@)tG{xjW-Q+!moGC!N8x0~q3GRDkGwlbB(CzaGYy^=S$fydfFb^tRgDDUU
zA+MtBljbV!9^w|*6hF?c68xwGtQ168nM-5V(PK2_`iQ!x$wL*nRt8sy@qx`RIOByt
zN~3+i|5Q%Ln6wYXRXF#QwpETxtZMI8j4S~KYVeO@uEUF(d|9keEe|OHpq%&*iv}Y{
zcVs6Fc~N9&XPm2ez87?-s2>heR#iam>!My!cN`;$AilXt{Z`>zMk``S+FtFPv#iN9
zp^7VDLbe!*9`P@D2E&tCf12HKk(2;IK)%1M3VA$Q7AKZ!CA526o437<mKK#l03zO?
zfkovuwINAq9tGK69p^872WXScRSl%E8!nsb*Ob0_L;S$jGL-ZV?f~O>+PO|9_r`ns
zQy9vVp$v9R;)xRcDW994$CF672NgLh*-@tEL>0QvcIBiZQVTg=ZBv4ah8N8GKC!hN
zLd4D?xIEXQE!yUhzO`OV9W#j~l}lKW(l+U3p@EQoTj>BYUCy2QWQfZLzH`{CWBxaq
ziRTGyQDl}2mo<#H_Hno9^w0s2bfZdosJgt$td}{Vc+;!VQyHr$;xa0!hPGGP*%>A`
zqQ)6iDU0GB_I7QbM2Gb{+Yu>{(%ObW(E-wEGuwxT{;I)iYR{=dXQMd_Xs9-xRbjm|
zcpE)#=#RU^1mStfcSJmRA7kOuvx_5fBNJlcWlU8j<4>$hyYx0+%aGXF;%0D}Ud=|c
z%v9jkI29<~J!5%;+vY!hL5fb0yiYT{NevS6D-W$6ZKmb8=y$v)fz6CXtZH=`GqgP$
zq$Yd%YW56EcTga^jcmjetR)AiXD#R_-nGmSJ(D*5*fM6D-#@YH=j{ckm<$T&0By)*
z94_}g-pu5C$rtGy(`XLHbsjY(q`LU+H&J6dvA%kNAas5bs+43vO*L63Qq3rTb8IWH
z9I%JH7@)kB8F(&zTSm<et$!xcGM3t!gJTlAS6Z9f%2(o>LwxyYGA5XeMVM{lEx$*W
zgZp<NRTF#ScR;9QGV_`(X7()QAAx9VpH`e}zYj`ywXX=Ih#ZAQ_5si2dsqmccobik
zmA|ZX(5cd;wZv)z5-wY8j=FT5xyHE*=u)Lx<4<3I{?Phg<LY#e`5(5ozT3>@f7ssM
z{Hp)`QuBX*O#)%>ix;iqgVP^gV=%y=I@oz1wEZg-u>g*4;z<|TB3QG;EoD25O*_id
z^kyU`qZ!=%lT5@D0HgdQ$lROoBK=FV4YLQ(mM~dsToXP*63vtAOgT2&{8a9l)OwH~
zVA@S0?eQ2<K5!ElM3!NB)7hk(Of2=3_HyE*j$^1<Dlf6;v2e@x0FYpJf*+E<ZMD8B
zuV?bC9bg!>>!gqF0!nJdJ_|TGc#W4D9U;}@GwAH3f6H%i4lLz*a1vbfn_(K4NLy9L
zLPH~F^`e3UgyZZdLav|ZxQ0NR&9foU*9nFle1@e13RAJeFLG&=>&@ntiQa8XE}cHS
z9!`LI5di|5E$q_jDz|eEe3kp=QB%CQ1&qL+P+VxQDgIs$u++~4mg*<d#B^jwoR0Lj
z$l&TH8AuZPS=h~IVhifa{;1)HwYB-#WOF-7wm?tP6GczL27PMbCM=6GU7Mohsj|V*
z)qfKhlntHAWMMPHrbj1x*ca>pk<!kNGkUCQjecHTQxxhJuA7NY>roLmfdyRwxYIdu
zppNXh-6~KwpAZqTl0ol$LD_E0D%lE^yo6<GVQL@Y$-uQni5u#GQ)p-Bi1!h?6diP?
zv$%i4cPuwG$#_qNqR^YOBt{r)Bo6xI`LmBDp-SsI3th`($d2nw20LisC#8xcqIP;9
zV)YG(Bj0nm17c7D9YaxN`{<=42&Ei1Bj7mT7(=@q3jdBfD9jqBP90ND5$z(cLU-UD
z3wIfUNt^pDOQ5ryaHN0B3w3Ab*hG_eRx8xS+Z<{+oq+T3TqRLR+kE0;js`+q8<0hz
z#fMCM@Z?<#^mo}$Y6j<w5hOeWPItj-A<Mj<nm3?l93eWM#<e=-N6<lz7Wq;4+ea>R
zwLfawsD*}62dt9b)C-^IoYi|4zhJQm@@(d1%zIa!UQUywdoeP{bm;(VC?BHbN-B`L
zYwCcxB-B?x7aU&nabM*_-$82E)GKfFk)daaY5R$j_dva98PYCB>x9-v2)V+-t|J~J
zqql2LaiyFWKig(0_mP4v)7}UlZWm;n+UbKr8{$%1T0oDAZV{YgYBX$9I32e9ahg+l
z(uFrc?h=ckaPpxZ#%-}>RCW=voR{T1=8Sg*X~eNZUlbHaRcvdB2ZQ!yf7E#kGux&c
z$6l?am(l9{H556LH-C{5sF(5k99J0WSAhsfCxx}X-51YgZ>^9>_9LN2Q!~SlEWAX7
z<49~pg5=Wmq796fnG!7Wj%60a{OxL~9ck{_u-N3d8ypW@(@!K;?JZ9RFxZSX%l5g|
zNxtwTUrbGcy^Y<k<NqL;#_Kc9$)-*mfP41%ue{arQHGyRMx(KF1N|TS03#j+?v)s+
zLYHhdu~f<a%%e>f;*?8!RG}?>7BJOo7&IVO?9jex-Q|umzIUj6PpcfNwb|sqj0U`p
z@O}p6RE=k*lR(7Sgjly8t;3?_NFUR9GUaD><xP#>9RvT#){4A(E`AgM$63?O?{8WM
zxLE?ap}JVt8@Kh;PPK4FDkTH4W(HDk_yID@ei|KA-^dM!HoA(f)En7sP_uayosvu5
z5$p3txxu4kn=Ac1G9?N&)VVbEWQ(Y3pEs*MGn>{1^8289Qm+tw6w?#aQXS1T7w-5X
z{Fy%Zxu-b^23+;#^fW;El@YTUg=H-M1d30A1rmnbCmL3q1a^D6>^^h}{zwMSE!5)I
zEorc8Dfvop8fqhQVb0?U<UY$;^_oo%Q(}_q$!vSS30X|7^m$<COfcagqTv?l7autI
zj~DF!VZ!~m^8Hr_t@EQ7?dQL=U!48?3p`mo;-3>Pk1uBhNvw!c-9eI-uonwZ+6V9K
z5NRC`U-bd&bHi7**#*}M?qe5_4#RN1a=8ZIf6#cZZTa7-&|ES`lyS{fw~%JjdM3@&
zS66{zeYx`LYOpdl&J|hZZdIg3UtUw%BGyH|v*zskuVgafvk!qGbEe+7!RG6>;U%e<
znk0B|{R1342R$$90^706?d%jvyWL{B#rUYX^>L2hf*o8RdmR<sbH`{cslut^`!=UU
zf=AGCe_&H0aEEBr9he@XqHtG{)gdY5)TbKL9--65U*TSFFR9}6sXC{~o^Dh1llew1
zZ<1S>p_{y+MP#}wI||$&qK_FQwLC^<sAaDF%k2dFE55}`?wUGDrte^0skWD(KsC)7
z4kW<9c4C)v_mYB5WM737$VUcSs;4xs47p~-5#SZ<26<vTlp+ObDMn43d3!M5C9bsd
zIY@Nw7s$gZuPy{4myaFuITv7ckx_{vZ6yz9#TdwQlh1E|qU_=KH~dgyW8x~@bnEb_
zFwc6Wu33ih1tn_ASk3PIDweifv8*J6Xyk4{@e%-@Ck^;ytyIkHYYF7YTD?;G-kk4M
zXphOJ`wAK{4QR)IF{<GUirQ%+#(&T^zcg!O)+LT0N?|;yG24EP0%=ZIIV1x3u&$jJ
z>p^6)Uo&=R`LN;PxyU!L>mF4W<p|4bi+Iiuzqz12n`4*puVMd15v~^x0x~E5$L6<N
ze*Cx1t#7~De|?ehKfe2F`en9|*7Jk&f*>I3RVWfj0XsAIHtj_CptWpA@;DIa^(Vtl
z^mOV;6Ly1)?J0}~)z1QfJc#)sWU@}c3B=(5+F&3$G=?}B-iBLDhrrmSQ_i~rGhp=+
z&WU%Z?01tE9=tl<F@5WdIOd0S(G~}hxQqmy7i+F3A*W7BI<<p3zkTmGcwNG=YX6}5
z7I`in2vzvfsh0PfK@g_ee_sTv`92KXuBrA{x3xDg>1v!SPL}pVm8Kx#JhOBUnPnIl
zWYlxA`jB&(&OwD%fVd8_*?2sf;0cQFZ-B0)B_KvJ#%rbJpO!TO@n&Daev-!hSLwTx
z*DBl1_p?4fm>Yf8r&q&*(qeGJnK{0$js`PdRy!S6ap5<Y*qo-?a+Kg#GC+ctg~hd>
z^YxT4jU{d7)tVAc-l(n=PGLBayxE&0r$FUjJD#?Ax|22ryxCi(zsvkAG9J98CsQq_
z3~7YRF%;Ax8)ysSyDWkWyt`+tm-YSraUYB}<D$RFM0CM1ploidY#hJ+wrEEU$d6WX
z-WN$Pp7p0fn`n)6J}9&OsBCmHY1#i4zOhqLbXBN?3W<OzWGmeBX;XcVPcZ%R0dMv6
zd8QMPsqp0PX-N^Q=H)~t8AdMon{u#~A}C#`K)8eNSH)#0y_h38^(DH^=A|Xueaz77
zJJ1)(t%Io$E)xn0fTf%$UP4FlQmn$|V?SYEU=+5P=+Y4p`5EXmKg9{uom5_dyoM!b
zhqeL=c#+C8NQpBlr1&c(7~{^Rp7dg*U%d7fWDh^UE~jv0NnRkO0AtKtAnWIWg(Ls6
zpA3jyAyTvwoFm^chuyK*lYC-4&2z|r05vb8w<OUA%1pQ{F&Iv1J}KA|^*}1Q<BWBX
z?~C{A1^h3yw`E~I;F4wvs)Q>j@-54kT1vMnOGB7wm!eq(QWNZ7%jq@5XBSc{Tswgb
zpnWT-HYFOzYau3kkqN+)f6h|a$`a0$W-p200<a2{FM*0VV_nM6XO1=daX~gtOZrwE
zz@>y~mty2xJQ=+w+o5CV1eH!R0oUV+%eGIMr=F`iUQm>$F56%IKa1smQ#4k`1dsPF
z|9kqk?ajRW@99_h-xrzx;j8>l<*<H!a&&sudYPC1*{>FG_{?v~Mnhc?!ot=9WwgTE
zLqM)d3&#wr3k6vIexw7%XW2ikFYq@pUxA^=j|s<-mjaRj-pG<(4^QPjTP7Y{!d;-D
zUa+AEY0zux*?z`w)aeGTV(bO@y{0p##h2N3jmmAPYN1qb{H3;4*QIpe1}v~2O7?pV
z1e|2kTHQ@XuEqyY%POh+NhZsh9w-3n?4_#{=1sGQUfLf#$8HjF1r!^L<J0ep`ls9U
zJL4k3bin}>F6=Y_xHBN|pFfUAO+714r%&TkGp&JkWDHA{-Xv8)p(PbzW^c^jO25dH
z7V~+*HPN{uGhs3qcs|7Darq9Dt3GWY*Swnu_CsRgqe}Y{bDNB7Xr3N7o0}0sy6j^g
zHif3kCrwei?5}C!Z)E?R#lY5bZsd6>9OQxwq@LD~3Z#-UDfP-_e@4eNM|_eiI41yI
z73gy{FT;bC;@Qe|O%uX$h-?a_#-p2E_eu88$<%uU%X8NXxGBxXpbs_dh<7QMwwGl1
zo(|I~hfeDPX^;`QRmAGy<rduTw}FM;8o;k@_%*pqui$qAG(pl@(qkKNmgzRoNP3w}
z<4B}a_?HaQA^nNb78q_WyMD&&WGvO(>U;vG`3iL)@VYa@$;~98cCKMU*SiVewu`jb
zktr@RRUNle>UUrFn|e%{gL{2dOee2<49s-ec^m-Ek`oPhNW(827h<Pj#$anu<Z*<p
zQDbGp&>--7gt~(GGVQ0+TUsCD)miHTSC&LiCLX8#L7ZGaCVj%=Ngp9URv=L8KmpSU
z7%D6%yj>4^fHE%h8i_;#HS`ry$6it(kS@rzBU9eZlVpr`23?+M#0H)nNRxPU;0>jp
z4k&1_o?et?#@+P&Ztl{H^M!f{<am1hQH*M-^)cN!ZS1F0L7wSzo9U)k=*Jdl#)!eN
z!FB34m&T9`13-P6Wihvg^ID}fDAopQ26kmQ-E~@nH|bzD5bCXh8^5mvH-29PH-4Xo
zi<C!x`&9tfYxUhccHs3D9D(6Ekp2qLCbqI(t8J|lis~EN@&LTt;`uR+)nFH^Szn1z
z>XH2f;GtEg>H*nHUyf$OE^xs(PlL1z7wW6TX}!isd?L2QI*<i)LH1z#&OP8Pmcbsl
zUD(#cm(en}26t^6PeI%%SZKo+076I}1M8y3NO(fSR}nk6AEFP85(@{gPm3U;yQ3L8
zU&}@WP{>pXLFD#}gb(Lf4M5a)7g3xblQ9Ai@vy@ig=8}*A4WFl%Ol4=*3eHxaXW0_
zh3gp)Mt1PUch>0>1NAhKgf{G94RghyF?ZyfxgrB<zqAOh)<6*$SuM{pkk|b2l=Dgf
z!Pnkl^m3>HpLfL*qyc^G+Ku2o13W@4oi@m4R*R(I(1w@$IOaN`m-+)-mc#hR@7I8$
zWEI7GLcKj$>UBm$sLGa8#r`40<g4h|u?QZDJZj?a`idoS!w0(}z~o{T?EAipOM?%I
zX`(v(Y+-=w;l@dPD6<9_=o-~j#J<9TjCautuK|Kkn7NWbgu(ri57d1@4kgqta|e)p
z!#^8<X>|pV^ut|7Oaw0yP9Ict`<6fxREQU}rLNpVzbjrJp&`H^Zh_@JHKjLXS=HZ_
zTq)I<TyKhG@Jywdi-ak{ozz9(c=U@XbnG#<*LXW}Nerhl&>e!l#=WvI1uNi*N665?
z29a1{S~SmcNJ;XM>w1$6M1NbLq<~bE9Z7-|#29xu3Q*MJupC`YtPPWPN9Df;W)GGJ
z`TwFMfFe<iQE!-`AZav7r`H3r<q>p#Qs58b`z}dKnrJ?N91PP=imWO0C*wNXP)9P3
zq&&^Uo1c;il;}<3&IjuB#}C;@{q+AJYZW68iAQP|BaCR17UD=Jzy_mid^SloY%PGY
zlfl9B9a07-mFfXGUIK(dq&S<Zd#bvx)#?p4<7Y%5>#g7DYc5xG?QDirB(jP};`(+x
zn(~*qHZ~QmoO4ZL)a5gr#951uo$TNX+!y&sBr-YYSO(v2{@YXUocjOly?b99N3t+{
z{(borljM245*9)dSDQ-?#@OL37azdc?6cXU-+%^~SkfpO$w0C`pZ%>%-=}9Z5+Zi8
z@osF8x~sdZtE;Q3>q3fo_WF{FxG;X09nzn%If)WuqGVRp?Lr0~Byjjg8{MYWqee?U
zhlvklJWBx>!`t2H<bgwO?g-5Ij4YSV^Q9eGy&v`|aSuv>GUtg4RHhqv#wjv5ImRo#
zqceUM1VK+<r<`}8!V+j?RKu#fjjGPvV}1<MD%yq7+KPqK`^os`)*Y6^ztqMrqBN9T
zS9k-fi>+;CG2Q=bd++Gzbbs&S@bsj8ba;GtAuKj*>%n0#Q_vNfu0<X-O3EYbINfOU
z7dHJ~vXfj0g0-*=C4&*@Ccnt)Wz$4Z$xWyM;azW=J`~$-qNQ4+rWQ(Rm!?Ja?o19p
zE<-SAgWiMBIYiueK$g{q_=~>{F*gm_SG44+rB=Ss`a3&K=LSnfDNqg#Zzu{DXFaxj
zg|6cvg%XmFj!de=@l{(Q*y=KtG55v;xzD6Mc&Oh-VFx<kdn?+nh22`2?1Xe1RJLFq
z+kO1|`HtF~5`);N;W0(1Jq%m31O&b3fVm|Y3N)O8c}c;8Or_$ghsJ!3m%gdB7tt%Y
zHU47UwtPwN1@Fj`)1}82d?yax2?N(C3Es$S_+Wh1jXOXq!HF0l4A||f#$fi3gNxJS
z^IGtI(hXK?!C{(?Bh04Ke6g`vFL}4(;|rd}5*Mp+N%HM#-aYR9YTZlfR`y@6X4^09
zdNss^f&e?)EKF1|CCymUU3uXS=7c=Auj(Ta@^G@ij?)?e;Zljv?;%Jftimn${8%?g
zAd7av+4SSqFzSr&D;dun0)k#UhS951ow`!7`Cj;R@Bf&sXSM>s#qa-(jn$Q=TmN@u
z<<I)Rf5`ZsM(_$mJvi|H6<#Hm!S+x1PyLcUt2h7irWeNDI_PYF@W=nN!`Gg`ANjHO
z(ccW<p~G0ZM(=k?x08y*nD}}C+tt&hGmgW7uO{v-b>e%{-?0bM4cRn@!|;KxY1Ni~
zVL$+1GxWoe1Pq!!Td7{sAXf6uP{<dQCro@;Hzw&c7nOJE2_Qm^?`lX-K%iFu<pTO}
zFl5rv)p!(RVZJ_e6tg6a{{}woqrWl9Ff<DMvP7}gMg?N0Jx64-*uy_tJ?t5qYngt=
z(iJ-N&5Op!>&X*K%gs`lV0dHX0~#4*Izbe(UNjPsKhYA^K87=+SOSHCVU^;P43p4w
zIPQy9Ts?VsGv>!Racm8DdxJ>esGy(VNK@k|{17qGcm@*|A{mC(KGo^vY|np}rf8Nj
z4K!S-4{);YLiXF_mq#k&tk+R@M#{?(n<44hMxxQai-trn<U<>xr|?U8@~^t%4rY)U
zw8_~r?6$>Oxf;-!4geE_@#tmnBgj^l1?>STp4cs8bux6zKy3}h#_QlB8N^p+*7MX6
z&hT9>cS|^lQ!XA=1Q;{CylRHTB=PM5RY$I>3gXf7cJN~dD|@5-iD3hM#ZOo2E7`!T
z+|As{pP*E^b4hL|f9+*rP-^_F1?Tb(trU{I;zRj$e2-yy>Ns*Ka^W(ZsoEz};c?1E
zndmX-wdpui@}@}X3y6*@c^IhXM}FiY)K0#hi#y#c>z%)E#OZ{pkO5ndty{}rLS=9x
z(x#eZE=q;!#bohkv^P+e>Egwb(Q0?6fe106!6&}8ll?Cfs8qNx5#vL@$c!x6bx;x&
z{<&(bgg@t>t9GbUMt`&}y26=R87;)1*_0Oag?tp;5)U?+Q7wU>{HS6{*-_~WbJT2F
z3a!q`T;A>Yy4_)l-4>Yy^N4#_u57D3tXwWB=es3yr%K5WP6-GqF=jb(D@9__RmPTa
z1ttCNgPTg7E`{?b(&8NPB&tgxE~GTW@G1R@<le@-jpE+)`*x!y+x}|D(>h<Ph+UKa
z`zli#8QEblj?Lpz*pu2bMSN7<VC;|BRhzGS;Rh9luBxD3CF5=<xEw~|hkzs4D0*a0
zJ7J!t$M1wejhUUGhP9xPvlHs;k~?u~=J+kK6J~pNCqC91kK2f8j446~`!oK-w*TCL
z8h1OpX{5mZv)0^LaqT~iX6sM;&mS@VV;$>%ZT@Kl0#5U{y?0p?5PvvbnXgmO?z%~c
zb-q}EV7J(JRR07t_}lPfEF9$0J7Khe<@WqVQB_@beC)e0gGPo|_IL7Dk&%9s8?3E;
za>aAlwg+eiR*k0C)-^iwr(~2ARh3}T=C5zG&o`l0<G4FI?BgA3NsIC_{rodi*SDFY
zpx5rCqwI&k{j(oZsB1!U>w*>mGVkHsJh!%o)6S^rs(&=wJ4=?OTi+Oy@ktdE@LkDZ
z%q=PFerr^g6=Mg_5F<5X-jqZSqo!nTKoVjIE_LVjMw?$nl#89Z5u>5a?BrpGe^aRA
z15R)9(3va98z;Y_@(b2<)<Er5r+bUgDBAD3VPC`Vx7E1DXcdHsz>I<;l<jvrgRy73
z6@qJ|w!a(3qv)LtU4cZM6KmnR)6m%P&7&%F4q9E!fmDAz60Evvgcv;u@?%PJKNp<G
zB47U8Fdnm3ISsyf4pU*9M{#g1!?{EyuapXMP(d%edf_&QD~r@2yq%W^_%QFp*;6>o
zgwtIME*~W9!=Q(L3jHM5Kf8DrR4@v)I`>_F^+0OOp|F@KyM|>%a;r__v%`+ygYYwb
zXaN!+xbs$EbOkb2UE$|RUD?ann6I(Tx#v8P2mRCB(r2=e^Jnqp-;w^O(t^yO1QzRm
zE34}pO*j6(vA*)B{`aT;=T9N9g!v)<%p*X&^x|yq<ow;KCyl82O?iE?s6d6A_grVT
zGTH>YS`QQLOb9u7zps7S`=zI;{)DqRZx!SY*IE7DUU|^oVW#yh+jrWRIZ*e@O{VBe
z5omVHNzs>+s?hA2pG`0)GojNtXJqpJKB8BSS1~5=j?;FRezmuOKEfg~s_Dz1VSs2*
z44bpr26~Ok7XMq(K>ihAn^zBK(KR}q8vvUCP(c2dGfShDY}5??$E=u9TbS^iOX84t
z`_5!pv=%;$1hj$&UXroOLR*lvoxOPgyp=1&<N9AgR^An+e|7S(B*T{F8>GG}?H+WH
zpHYszMpQyBJKe95P?tIxseTh?sPNm}ZG3f$k#P8AJ(6a7V+fz`(GXwlQ}Poj&Uk0e
zuPz;ogmaCPu}yn%lEbH!_tyMQ<<w$_75KK7q$)|-h@Cs)RE|RLG^n_+4=Y8lTdov2
z<JR&AT`p$AL3W=N!?ozbtb>y8^=c8QNox7|biJ`Gc^tf-^hG(;9A54RJ<pZAd`Y>3
zb<SVDAmP7*v%~%NyZ5h-;Ge^ji-WT_d;13!HFkCKvQ-FA_S!mes>Fz;`d_8hE0%bk
zR&%Imz|RDwSLwx`f3C=n<a<1WKRo#k)^^d}J3l{sdvbhma?w70cW}0Mae8)ct}o{?
zu2g{vaqM)ti4w5#>Q)F?y55EVm7gQ8OpZ|a)X&3|-8V~2ct!xSg?{p;KDRlSDOfb^
zoF~jyiGN~ZT=OoXARZCZgBv4_7=7Hts&Z!0o5wxq>l*dQ$tsw;%#u7rtzxSDgy!8!
znPt8UQ#0%hZt~kOZFlISc1WLPu5Tp*Tl|Ug8~R0CHchD<h&dv&;25K95uYM4JUa)g
z^c5Feo$IuU<Aj%kw``EZ6#S_A48qaXZ8Y?(Mp(*oq?`qhlw)3VW=U!%$)sM17(y)r
zpJei%D`o;Iu9S%z=9AjYt>Q9T{uLH(vR+#GtGWz8<E}Ixe`JN%EsKAhfQp%ahnm2v
zQ{PZH$8xt+Y<d1TT)L=qTixne@*cA)SWh*Y)l>1|lHsHa6LYl}DR8;*q5g90>W_E~
zqejKKfVs`z;?XVnXC7XkBqLVbwkB+1e$U0A6OKaj7AwRrH=ekS=9<(9)9j-jM<u{Q
z*QA$0dJAF&YCIpJXrP_kRahKo67n2<JK0UqoaZyuQ9tb+c8tfp!%jga;KJ1?zyzhn
zkZsS?*{8{`MvtAD`Tq4uyVsRVjgB8s7^7Cs4umtk=}0d%vbO3t$(7#I!*ZsVg;)-4
zpMQ~*OewGUQiZ9#lDod1bS>y4pri*Urx*AWPXDD-)TW1Th-M;ZN@s96zWM5_uY#)(
z)lmc;cH@xvIN%bbn@rIdAR$0}h=nIbaI69;E5ues#oWvc>e;4S*#X!NfN?XaFhSlV
zNhe4sZN?arAw^tu$PBjUD3UJmHVkF~Rr^6$OhL9u04WpkQODSM-;5SA`M1^dYj%3~
z$zi7YvQyvCmF~ohnnl}<XwgY|=|zz4g<*Bba1<xY_@bzI^dSn;@h~E+4Pk$&vm{AM
znr>J^zM3~$j4!gt$W1ms0dv`C+fvE>Kb2t@GJSj{u1EDi<<-}?s7}jA1jN!@M7Dn{
z4-s%ma}c$&^Wq-@;AHOMHvHVYLze`Y$T{4p*X&&Wihk%9TLX|E6c%$Rcgh?ihc{Y1
zQw6{u)b7?3Fb-L^jsOR_+cg9}&An>-*Kt1}GXL7R%*mw#9X&WD%X~|?&tOjgG?7K&
zFg>RXLYQ)=@ZAS%5EZ^BQR16I5MS-c%rSEGW7M|UdxNtiDOsH|_Y^{-BItI0A9e(4
zkDRm#CK#0`0S!MQE(qowc(Vjq`UpMSJmbmgtV4k^ZNz=rbS+gW%TQV4NInG1nmh!x
zhz%tm_hTj~-az%_A@U5fNo?G=t+CPw)~gF+tl@Jtvde3u#&<R9K2VAHT`V|q7Y-_B
zN2eHv5OLX2>ZMA4g4KA!(JcxBSF*NWK&ex)@M01iaY8hM4DLOjU4@up4c{*6bH^tB
zC6H;&LjO`0tr?{_OMBj0e7<tpQxG$0vOHVE+fHa%=K0EnZ=H)Q43)9T0+Dl4RO2nf
zgYXq54cg|PcY{6tic%H{jccc|6~x<mvXpvL#{2EndAk3I|H<OA?fYjskZtSiQkA$S
zV-byYYqH&|PC1R9-*?a$u~+x{!{J0~(B13t#$OssG)_|?6Zi4ZDe0dWNzH}9XQ7|;
zXJamy+^VN)r}e~pi$>!7S&x+uvq+=*RKh6iXfDQm?pI%4;!CO!ZBw0n$fqC6oI~Rj
z<y;U?XH!jYLlH5I1i=C>W{B%yY6CCW(mEz5H&Xc?sl+PEo@scEnVTaLurcQ(4i8(0
zjYy^=8xcC5!eP3JMkD!Z&I94w%(y$^lnRuO0t9wLTGw&;ds1%yT1xO=YsKXx0pg2-
z+vG0#$oaBFzB?AxapLs44PdXwT{mk%DGP>pljoj4je!I9BT*pO`-U&{2cI(gV?P;1
zy8Z(V8MBiWA1jf_Pf<({hr7fH(!&%bGZQaWfp$2E?nwtrg9;qbC5BYx*!m^p-rVI1
z3w=?hwk54KrtBRM4xGZbmJ(o;x67zRrNnz;4uv|g0!Z?@6<b~rZ?_=aiI-Q*<LDJf
zjevY_$xIoEpGgggMI1fWB24dv6$z0%HEQWccR#p>qhZ6&XSRCz$S<=B{u8Ml(lGF(
zibpwCsR(MWjIx9lJ{y~ytWJL>CMg->Z@~|xRVSYt8nqB@Ua0Z+B%JWGFhUQvD9;Lu
z3>P*hRNMjcmS@QDlj}))a@u}<@ap|rR=TWuC|qi?6QKixwuOlzlU$D@p(ND39-IhL
zuIdvF2_$d62EFVsRc4`%@gPpgD&3-rSGUpC2ZJ%n$yiQ@ie2)BKyIyQ$HAHbFlgnn
z8>UiKu|~mSq<St@O2FbwP<StvU&gDre>$jT(#-B<KYBIeDz`JEQ@vCem<y)z4Ai)4
zv|u-jpCHJhNcJp)o`r1M<UDw<O2Ye;=U^aSZG-s|65OjEA~Gv7;mzwjpGG=n(j4s>
z#yje8Ct*sgJemkOEi{h$6=%%BA9r@_SyiKZ0Dm=V(@QN*q)-zqi_N*xa|YQeEA*pC
zy8LdH1DmstWlqWbwy)m3MZMP$Kv9-0bF$_*hoP8*WuY5cmxbKQ8gIKAQtD!qfwx9g
zok%=B#@Lqc!jDm=WetI&NGD_<N&zOG|JWsjs(uV%Xes-!+>f0;^hxrt(}UH5!~Ojr
z!a_?R4TyK8x(bfLEOS?*F-YzYnB)*2SwtHw&mNdJK1oNNr2m%@$7*7+E=k}xb!VP&
zii3FPgjefj#NKM04p?TV4uMQXr9MhN%<-O~67;B)v#tb&3lDF?xQ|1T7>U1-JqkTC
zMwH$#XALBX#UP>Oiy=82)$p2O3sUZOW?g4?roGR-&NthhBjB8Xn!{l^5JC9vSX|0a
zt|BFnYp-&u`E~Mc0VCVkuWDRdQT=L)3(4=Lw4E(RB$(;6_{kZ7bxw;Vz=}bh<>i@*
zG3vxhPGq?e=&Lgx66wlg!q1eJ#Y0QPU)!_z8mL%Ui1~pPI1$Q~2^>@;*#u+p#K{pd
zE~?drx6HJJU4|}nO>gT+n69UjsK!e%CavVo9;wYtPO`f|Logx1saGT<M}+^J><*OA
zfwcw0;jU=C*d50b1_+VUp+prOHwap*8|4G1&5*v&6#q*^n@$%5JSqOSv9apJ|H40i
z=Kua<=Ksdv-R7UcyViL=#G*Clxe>hTYi}5Df!)M*I^m=HN`@-v_tM--2m#kj;5VU(
zF6P{r((t04hynwRpL5=5jTZV*mtz4qlkej*{SpK%r6{wIa`au~D~c1Uh=z;16)ut?
zJc(UdM?rC%hAmiDCljKX%QY6Yf`1HIlCT8?amy=Xo`})O?s`^)(el@T>oH38CQA{v
z3htHIV%{h+r^$S<;*+f$e_pf19{pvL|GT}j^MlvzSATE6KK<eE@QK2T(ne&iOfsq|
zuU@hCj$Rm?WKd}a<01P8p}kJbX)yr0_=u0~p~{b@x^WtrTdr%82U-GJo))Q3fkqYy
z@<Pp2&0vRw#N7GVaB(J=Zq+Pg$<e$_yLu3rl%9<4KaXROIuXx+m)@9PNThk%8s8sJ
zIK%ixYWpdt;15IeO+hE;rns<x<HF=^Lxm2z4`d_eoTZVOklA*QH<+4mk(X`~1vBAx
zv4xIDOvN(^!%x4OI|FjSpVuUO_>iZmQk{&Hr$UEs4o^9H5gz{W&(Dtsa}K$qtKl;~
z>6(P(Xvzsn-uTZ9c9<J{qxqwcCE%l$h>MkLPBE!fhwGB)Tmo+90inqljKMdnN%K(?
zI-}@U!aG+7!C&hRo;{JZ(VZ?Tm$@TUOs!~@Nd<o%jf}TneugwsC&V3G_(+)MbomxD
z&^L`nhoV`<o(5KUHYp>(qI~Gl6>NKWrQz8X1P0SUCpUZEDpabIpnZ>CP*bej#t*G&
z@a=*sOOL9<WhCBQyV?XgLZ8NwWbTG5Kb1npXZrhE5Y=z$FC{F_Nuj--=oPb4uh-=N
ztDM9HugemRx5OUgNMoiCr9yQ$7<3;%ak&eJor?tBFx=>VUz8MX1LOb0P9xX~K5UB{
z9C7|Z<2=5uU><nhmk-sic0QOUDFBX6WblRX{}R8OU8**LO|z<3;nj!Pm&FTOYU1j4
zX^~4yPqZ>2%H>rNK>iiw9G6VAPgdmHl^`nCk~5(`zOJS_OKvs+oEgcukC<XMn&vT+
zGy?l`!}0Abi?G?AA~46{z2_T6dTo;v<z=GciEd}%c!Ap>1Ii#Q0KU@#q%7AuBTO^=
zUVyUN)O`48)`CVY2P4g0SF4ubZ7hgE%)ok?c*v73i2Ibz8FXe`GjN0aJf{0Mt9YOT
z+OnvVMf7lW8}@JLy0eoBWfoef(3s_voSa1Wen?npy*KWaf>>w$6QhYR>RMNtk_sf<
zw3%`0!LgGms79VX=`9r|K#P)0YpP2=)hFdYvUC~e(1$Mev36(QSsCN!`v3gnqWx=s
zsrf1RvD--iGW;5wzcfDm6I#_p1|@lTyAMj7!oh^}n6T041~?r`AA--aH_<@|H@P(h
zs;n-CgP-uei23c4T)Lub%kL?u=;p`pW7Q-aUJbo}pz?`h(^H?G1Uj`RmTwe%cPz`=
z+VgT|A`F!%HPmRP$2<+1#6uim-8xxA&773pL!^dq$dMTILP^`1VJ0;ecM4Aq?7<zr
zPsY4tQN~_9BB3z@Ka)!#<K!Uzn2hN4RaaFNZgnI$(M>JoV3V%m{t|hw8Pu3?>*7h$
z^OJN}qfEOGDgsby5$NHUO8-6mOm5dQQ_g@F&H%$L5z2~V{BVtf@h9*D$!q-MmM<hx
z)(4@1=qXvaa0%BwMIl(GIUJ&UA65T`m2|{Ybeec{22si`QuHIyr%DymYq)xq!>2oZ
zH6%lG-BV~SLm-?6><O+-AJfmK4>Rn7bGlh47@X}<OuMt!<UDbnyG8<&-^@<!6To;n
zm8w-P%dOv4pT?-`A7BQ%F4z5%BC=zO)DtdivOOQcc>x80UsJ5ry{1Qv4tvjHjkb(I
z*6pKgx2}Cw7|4~C)>8$oJc1-TCL+RTc$KM+%o)&Z^RrM0R~T_wPT2I_!nAPKstlJ{
z?y3wJ_R{2snfh%{`3R9g`8?htgr>q-i8MrRY3Z<kfES19FzCgXq5<{jpgwUP9cX`x
z9voERMKUmr`!UAfa|sro*&PsQn8WOa`Dt1|QrOg+YBSl>SsgW6KBukGnuV=myZw!}
zq<;T4`nM?CYQ}H7D*16Zn2YdGvq$+&wDiHY$ygnIJqxW_Yxsw0?$#V6<*Dx05SjPy
zsTKUE*>8KhRT0AZY_ko#P$qqvF(*r@Fzzg(DRui?@~A67z+@`fcc(?kh2D+4(xLmU
z<iMqY){M93;sI)YKL3;=3QYe`xnz2|#EUlm@JS_*r@r!km-l|jVh(KsM6nV5Z$q?-
z47aTwdADy~<6j?jdF9O7XTuN^SeU|=Q8ee$rV0V%@fHgO=u5dMywj`rD7b*1e@<0X
zO!-gF{#cE(BqDXQ-!1q5ayh^MG;{X%j0(7rc{Tn~J+25RX8OoEchZ_I-!9m@u3lV{
zA{#?uQ-wO9+4Qlz6Yqve2QHy)2b>p5Eud(OH^V3r-F6owQ7&eUVHU{U;F)-+Sa35N
zPh`%TGuSAz(s(|0A!f9XsOnHxkL@*=u!?e9&bs6Hq?WtgIqUF~!{qJG0p}oZ@^+Wr
zyWpAg;F}?XT*^^(H+<VY>q4yU&fDBt;1Yt^?&aV@C+!-d0Z!f`BK@VE-L<o9dA8`6
ztkT&I-Jq~c2{6w_DAe0>qXPBo#OzTe$uLteV7ok7xB{QInf+;ju$5_7D6Ltdq7uM#
z=@$=)^9?d$)2O|fm63k6Df0Ao`#BER#YJCAbj_>rlH1kq>Ez*Hej+ZoxD1(Qf&d9p
zQVdjR$Pe;>+mVe%23P-?F1U{zDO0|i+q;arI~;k1&*^96y+=kCH*p+F=d%U!CMl19
z58{o5*(;t8;tLYePd|eS$*o>??S#POp6N2RhHR{Fc`NJW4v^`dm%VR7^r;F*`?GrP
zzf=8RnNyO_)ie>nh4H`5W@Bx=<;MT6Y^?qn|NBR*|NFSoznfS+_{E>m!-5|l?w!1S
zf3$aYyazx3;E5m3G$>yP7_KYl6|oapTE655P93ka+jKWjN`h?EPLnQ`up500yLcyN
z+>m8Uicc=<hD>!G83OA&5(?8XrTyy}9(DP8P2ih{wk8v@ETtw}bIYh7902ij><rPF
z{3F;xDxtpezXe@$s`>g05EsD7DJh&=k+^6zxiyA!OB5GF%blC}C+p_DW9FSl?IKf*
zcNUaR^=BGfa~hti90v}=wz3IFKab%M3Z`TRyhhj8pd+GZcrBnJB~}At%8TqEjmypn
zn<bBTw6upR_bcU;<QV`5v!J=Do0dl8@s2*MS=4#xP-p>a`@D|&kJ`Z^3Y^y_`sh;Z
zqZ)|?{)rjsGq9&iXrJw_AJXhcwHX2VJyE6u^=91d8dahxi?ROQhUa|Azx$f8L58%P
zXMt5jxUfQ*&DqKJZe^hQs!~CY{Oo-{9=(pk8%_{MAN{{*n9xT?U{>K+MJ5y-N2-h6
zmIUZGtZ0A}D)D9kmaJl!DOp$&e94lG-ujC&YO^!$^&UjXY;Zm9Us2*^f0bIZNG;W<
zwRC%)@cWbZ_#XHD9>_+ojt(kDncBQ21WGosXa+m)>~J9*vD?p{(a`c!k*vRbAw9tb
z%=7>N*;tiCq@Mt=7q-WSrFJU6vY<6)-eB0n98cSk(kQY@oTHe$Z_TfieQ38}3H<5#
z#o6h}+xFqf(c#GfQ-vPOw;Ey0@jg`MERT^O-!3(p>GP>KH<M?;yTjJJIH}oHk_<a>
zAF!0V!z<gBLj#8SsT3Vdn%iS$Wf9A?Pv#}cPtGej&&1&#n@dPr)=*K*TXoK{po`Hm
zoBHQUCjm17UL+MOX|~ZlcqSqs2p3!P8}TUo<)QSb1t49iV3Zi2RD37J?J;DkOt~VP
z!HolRs?f?M*<qCW%a`l5X6e7|dFIugo*ezXO_$A^(=$#7h;w!BBVQ}?h*?(>e6h<3
zZQ-YFAu+&j09JesKW#HY6a+pAsO9&R+Df7ibPfLZJnCMH@^6zTh5-CCqpx;$s0xjk
z-|h~sHY1~BUSDo}l-#Mt+xd+}Ypc)=ej!E#iH<&!xASgUKs>w!h(3~l5RcUiE7P2e
z%3+`E9Wid+9-h1{pq;i9q~~`jNNyvm`IH-2j;gq)?TDi)@46NfEH+fya&ursNrjV4
zH{4amcx!fW%T-3D{f*OhU}vrwc^6Tf<|zE2{d25wEVxMf*yaQ3P!#N{+Gg{Nfaa`1
zzv{t$*vB*_-w>VE2&(z#8zP$;d@pHH{477WNS8G8Z!99LXFy6*0cJQ3G3`%q8IOW%
zj8fjh&|fP0K=I~LOkQ?8Hv%n2&A_Cj$sJj3y!$|K!UqQPjns??gw%=fmp+d$Ih5Vq
zWUJZ*UWNS#UGc+VjA7?gf}$QsJfMRE?IRWZ2r$=?04C#ssXPT)Qbou;^}tcEK+Pcf
z1<3zv8ab_-a}@Pf1m8z`B8bVgCBWKMwScQ4w$1^Xyl(u>IF7jIWCl=n7NS~=i@XaT
zM8q~$D6yl2i{P_iv!wg4QL8K%_tW^MPtnVx08gpMA=s)TW_ITG!LIh|FjK>rZog{m
zVRY>)nzsA$Wk2b+=`6MBEbH&2cJGDOOUR%syz{o*Ub2v%`}s-I2RXC|CQyE9X#3qL
z?B`6;EH3!28^--p=1#U%WGj;&ggBe#kFt?rUKYmXwl*PCJ<TJ`4;{UVsjgt!?$WO|
zs_CQP+dwyc88i$KcI8rQjiP8LNCcJd0O!CZ!r{X!%q0IWXLytvD*sz?s(V5+&QNB4
zazw92x&kO=(f(B630_RWW<qFf1Irrsk)96$_SOtz_@ONXrt7&o&UQO<cxnX}w6?Wp
zre)9b?4TW&o^I44uXK-xyg89#%ce1AV4jJrTf+?Fx`gcIEOUX(OeL4sOd7^uJYMdx
zFN;F!^2;_u@-!%!w710WJ<hN^5qE;U2ve@+dt3RhYHjoFO!BSCBU4M+P5Jf+n56Fj
z<6<cZI3&sbz#XgJH-vww4&3lt(Xm8_iZU@zY^g2Ja@vRK_jH504=0pyUp_q)DAd_7
z4@;bI^0twi6nYwpM0nB<Rz!6uLoL@GaDpSYL@a?hfG>!^{9c*{?2KeNQ5TlI5p8~*
z#CWNNW5p&}VPKruVlP+k80_;#6*_nMQS<yI#i-h<?+Z`hUYQ^Tb|y3RmBRTj6!!#{
z^GV}rmJXkoa%QFG<eiNoPX7gIg?M}TXN~XrT+Yfy?$EJui^d91WvD<@V#*E_WZ>Dy
z!qcT<kd5lAiKh@1-m6C#ehj==jB_{0G{{GfD#AB4ax#Mi4lElnndTZTs^3JUc+sI2
zpN|kv6JZbY;l#|vSOy;+=w?1gkCE~GV(;Q`zrA;UesFftKKg#|=>37g=j9z#cDcc*
zH&EzwZKj^~6=<LJf}NfaDep)L^yjt9C(BGa%;`?8ceK15n>M=gdTVmcmgdgQ%{uY9
z%+5NE{#cHPBp$|gd5g$nD?h`KX~MTpUlC~OC(RFOb}mst9Ox^T3_pa!WDFAL@%i`r
z0W4#IOsisL`=`h6+W3u)3XhxGbVxBpNU!JGHi7pq?QuvGq$r&$2@Z|6Ye+k$E!Pye
zsP)!DL3IGk;@7>FRc1e;q$R9T>VocM%yVo*mDOW~8S%#}!;*-<DElNm=BkKPBfRxC
z8IG>TBQo1jroVt6$Fnzi-IygY1g4iNd&}*@Su#@m@i^{jb|6e*tH$GoW8^YbI3WyV
zU{7MV!ly(@{QuGd{mxFK_U`QT?b+V(@!`qa_QBcN>6yz<r<tA0lnr^ztSX&}pPdq6
zf_*#+ZMhF(UlQ{CAu*Uia@dReVPD&G-DiSa+B7;(q)WD$*gdjn1D2>1SE*GhrBdLb
zsh;DyX=L_L_;h<T%pUHS>Tc>lS9~XcV>=ioPMs1qJvfi18H0?H*Y&H?^j^O?k4HaV
z0`|~b+j&K@4gXkM@-n58epHfEe1mS19xERk^ZAvD{lxLE&syHlvm4(tF3}*8j$rp<
z47e8sz(<%46!=)`9fLi^^)rmhiqGsg{20bvD&dW5=M|boE%7nImt)jj$mBPLIax+C
zmPoeV&^!IE-)t)4H7(Jw?R$PGYYp}Gwwq*K4GZVP*+OPFi%Ki2Z5l0Q8}$yC_t-F%
z&!y3kO|Sy9uDqcgC$=)*37Kwd;ZbPGvpLSyBq#=>)|Tlk!}ZAnaw#PrMalM!$|??@
z(afI^OrB_|uwhvqm*s6lB|SAg5|!6^(^L4MKc5BKKA+_mSkhP2v%rk|Xs)6s4T#To
z^weh1qq0QwOh=zS-%<5V0W&_~Ul(~%1xLef7rkVFo%W*ssNGMvbZNy1+jSJCn$aDu
z3emfr3}gXudpVNP>>5r#MODU%u($Mt*jxJTv6qf7=Z(FIC|ul`bT$gHv?#HZ&sFWG
zJn7phP-x}a0XA85ywDE<yKOm?p*BdbYWZ=iTK+9pwX9c_yIa?Bw>xP?<WWTp&BF1|
z0(uI^Nu|+<tUM^d^M_jnXUuFp11`<(zWLZD2Z^YRl?}VQVG6ZOoIx!pwH{P5W?YNt
zswQkMj~7;HR40vp+K<v1#}{3T#&1@OuN?NquIfAUE*$oC{d(nt$5xhWdY6+q6mktS
zw&VO<!U-&;QcOMHPcYBRyYQYa@|w>@9lDHvZtP78jwow$&r+Gf-xFfo3Sr0~eUENF
z$2U(nJ92G#pUa*p4o;Kbqx|X1e<jY(*JlXt=Z49VEsm1eu_VBM+iGvVt?{>6Vt&37
zwY=HmB&Nf$RNL(k8Ta0BgengO+N{y2(EMnG(!JXS1(XSRHY)iIW;A(IU6cKRjPUiH
zNQ&m=8cfULvjJzIdHe$0<U20G)Utrer`#Fdrls=j6c1{O^Z>CakC8^BjV=)ook8t%
zXf(4$mBFT!J;-B(m=c@a+f=8G${t!ocIo_4p4n8oCT?pJFm2fpBpQVYL1v4>(quBB
zE15TC?$;Xr#GhYAbhE4J<UA?K7*JUe%PQQsBzDu;R9TZz&H=R=m3=n8*78M2?}s}(
zr~QlD=m-mwRoLiHIaRA1GA}EIEvZ4_yBJGfX`qXqF>?nxLulFMo+P74m#P{W`Tj&T
zIslDOD2>P^Ai39zM<Yy6rQ#nbqpWCVtjdE~vsxPOhkW-l&I(l(!*z{fG(cA{13@hS
z{aXlRIy-?N05yPoD^O7Tu!FyVd|#!4bNJw!a)r5TE{_^uNY<bpT#CA)RZP{Aj^fdn
zi|A;AQe&_+e@t(aqIZ|0bu!02i|hkYjtn0luDaunY1QW^;QG~IpqihKM?ieg8jx|B
z+ano|CJ?cDpY40<24(0A@sFF$=u%b302v%Pz;=wNqXv^HYmtG;f-FN>#7`1y78!5g
z+i{A;{os({siYLbua5T4Ya)e>oPULb8-|f6X&7H|$v|BGVgDUY4{InT*VmCQ>Ldw<
z86Le_q_AxuvcW}s`M{~?V0SOh&cBz{{qUD#ngWR5Km+|K&CVND2%DdYw{bO<oS5U!
z<2k^u5wm#@7-G!Vk`TT&pLM->0HO8{U_?SiN-<G1vg9E$4lGTjh$4$yse2)>&^##S
z7bl>u;Ro2gztH6Vf`r!#Kkzu1HA*D$7*cie0Y)q7@^}W}<J{^wN98UCvf(fhjI9Nd
zuVHyU`lKJxYhM2^qu@5_Q+{R()aT%-=aKs|sP^DmTqB)m6vo}$Oo@3^SymdgNWYvs
zY7y5Xh2fcpx77_3%(g=JuFnbh{4N}%d3O=gV2^63S{f~XeV6C}vPVhMR{W=5Lu2G%
zrRpMmMR`Iy;hU2?GOCsCCKQa4MhX*@jt{m?>{-jYK9+62ksT(Tv9j-arAe)m>64Uo
zAQHMd7yK@z&CgSv*Bg;X(C#Mu4zQm^$JuS_BzJqAj<miqWl|1t9b%DK!3_lb;MoxM
z838+Z)=5X|1yH%URKC*diPJVpG;K0NRiHVlC`K>^d=l)S79=sq?q(K0!a*i1U#Ls7
zEZApm5j!BDRxAaPTcElk^*U*K47-Eduohe<Nf+*;QCL;3GIRU0uo$4;7$I?S?r=Ny
z8DIOy^u@BK!i70bV0p4-Yk4_%bNIvYfsE|Ei-HePG~lazkifJq<8C~9m?q|xLtZCw
z0dJg)6$?vno-KI1UffnSem2`RV_C{wjO6GtzZc`;_f9dm`|O#0^5jhPpJ`)=sZ53R
zsjYJ}V_gZB&ZQ*pB0}B5@0D1~m{|#RR{jrm)ZyGBHknbT5hAf%To({_K5c%9B=;F`
zSB5(f!lWts$uAct%c^j2ob#2E`@980r0EQAzIK@pTf$lM+AI_wo2KKMFk(<b^+<T0
zrua&Xz|go=FU~o;Fe%lNLvL38S?PsWGRqRvlumf$+yCEk<H^(Y$u|CJSD`WfbSq%*
z-CXmaMbH$^5#5s$Gw?Gq){(+H9u6Z#;f4dWW$*~ug%S-Ol;;I|UMJmda);I)^6^Mt
z>adR*{pjSYp>I`c8=m`>$}?kjFg{ZGZ?wB|)(Wd}-L~&iVUEo)#pZWon{2Un^rU3b
z`YYchVqZ4Sp;eNBJ)(~NVj1D(OZ<7k55o1<HmVYN@^j~duXu_`6^4owro(x1jEQKu
zd7;hVi!Sm?k5nlv!!;a}ZGKVa`UwZ1;oZk@9HZ`o&he}sEi%s(P9iCM0A)a$zd4Kh
z;+-wJifLQ8y87h<9%TH~_Wh4jI$?S(`Of`nt4P=MqA*VBfE&<+A|Hiasj%W4Ox)`4
zU{Uvbm+5JrfV{)&awjmeCF-WTm8B*LHP^{dINkgNjT>9=&vqaq;-5eN30L#0QF{o6
zh@oMB+yj32@?|gX$CaOIBCub;G@-@}H4e6Ss4oJIZF7yWfbcT?o)YNj5#XZ=dp5<j
zpR{{ud|gqEce4wxdT}9pXkXG#HUdYta0T2Zpl0oO08kaktFl;gvGCLLCgCBFSq>B;
zXf?j|2C!gQotjlu9wbUeVTk2;5*~3r_!;kMj)zOok2@ajVVmiA5RmyDkIY8rZ1WS)
zyIM)rySOa9PoQ<(Igi$xPskx$2Jwh<@yT;G<}A0)R|q~}8(=a_=|Of+DVMITP)DLI
ze4)!(&MSHt1XL>K^!2Hfobk!szd8_=Qz0o2d#=5uUaM328WE_#?Z&X%rafqh-YkXH
z?2>#;;=X?WHZx6<^Sc6=j<0Unyk?d=%uE0X5LhZ~TimG!6}=!^owTzb;A1X`d)*}L
za4k1_!;RTLqdL7kMWhpqB=NK-CKfc~`ZBR&DMx~}F+XQ}l(aQm{KTKI!%PGB@+Ae7
z$?R1b_&30d+UadLh*AwKGkH4*0bAD$q|yg6IT!uH2K!lYH|~eS2fflss1tt4xV0Us
zxv6^==N+?FHOgdhoU~&<42Etp!`VkxVU^C}#dRvi3ZB6c!R%C2&jEh+IG|ENOnNU_
z4)%0Jt+tI(uGjRX#I{~$OaI8puC_dF$ljvl52Foz;eb$We8liqz)RHi5CWO)gZKR*
zXyp6JxIYqaL6|jdcq(j9zJg1J{jZxO1ANk#0oH=$o!bRt#Va|Rz)TF$)ZF??^0mCa
zp#8^y)pg=)neT~|Na<){7|o<Bn24n|&1WfP>jfbY(?KE!x*aCtn_Id`BL#b;d_u(N
zKaMzo@B6_~;-;(M>})W167;O*WcF~HH)<-5zNX^i$?ea`Cg6yOv_UX*&Dpf`Xe2kt
zenM53DU$JpnX__umHXWrTyrvH(BBK4d`EAhu7Yb20d1y93Su)=%=Jyw2l4ePui>_5
zww_6<xruop@37K0N}ljU!zqT5Vd;Fw0a(d2^p6GYVNBUx|40nU2z2hXJDf7#{xBV0
zWk0;;8E-A<i0Z=GS}V?wO!3U^zU~J_ecd#jIqkIYcR@E|;C`^h%Ukfgn3o{VN@o4&
z5uo11-32}agv&1lVFb?*-!)^epP^3xUJNX`i#vnBQg?1|w4vmN3zL(%XBS8|dtOw2
z2@<W8@H3C($wpzBuXc36j!2(o05!{Kd{Qn#CeazKx3mw?q$$4^%-NiJ+|kpOYR|Ds
zh)+qtpFK0V9cX)!;_K$5>BQ)$q_|T9(6zt_rCTu4l!!4@qVai*(eRJasOFRc%Oaiu
zN*!(zI^}F9P(o0i6kyIK7R08M!iA(67aN~!0aagOBxvN`V&Z7o*H`vKaVB>cvWW6{
z&hV2LJz(a`G8N}w8@xe%oW~WamRPH0R1I>skl*SadbC{+mxV4FR+SnbRNxnmMPgEb
zWnlF>H2Dm#WTsK`TdX@Lz7^Ju1)>JzVxs+5I|53TidpdHcjXrdKsZTJ7p|Noder0)
z1UAEgJH`iw>y&v%Oyuj^0jF~!iR@ookfWB9;ptz7v{kK{BsgV3KX)ZHcT;MTK7|O5
z3*(=J;9D`*UbFVlxHr5$cvW!`TKP)GB+A21YKnuaH&Y7ns~uUD(ONMwL1zURv`r^G
z?CfKef$iYC@lAw3x;=|WTnsWduLJt|grPC4zJ-=_Sx4>c^ymoFa~&NXA71FXh)6cM
zhkIM9Ld=5%y1pgXhr$J*DkxA^Gr1PlXB_t{yqk_=$UJ!6XW(qi#3sO^cPjJj>>2`(
z&lk+IU?~*VJnOdI!%VCybyN$6Xmi<itRywpY62Hw*4|)h(PX(n8k#s1Dd0Jyup;go
z%FyS_59O{(51kpxZx&EP87O?zO`DnLwh6@St28MuzS#tnrEY$d5Cs_FXc_9&GcVER
z3+b*PEQB8SuyZXc;Q(clpmu)}X;&s43KwdPjq<yWGEm(jc#dfF8FxjbOZyEsWD+Lb
zZKwsAoq1+n;~-{#nOJsD<rjtJS?F~h_C@pFSeR&CB#cQ{;Fo3*X>=cX2(GZkLoEQ2
z@0KHUsBjg&ov`guKZ6X0$;Y^(-$D4={?=!W;5E%^FUiU}eiM-`zkxiBwIU3BEj3Z>
zF}EqF3N^^+p05A5xa)!;GUcs5n?ukSs353UcnB*Z7P9U$As#8ZEG?i}>nRkx6pxNc
z7v0%0iokNCy3Yuq3nEITJrJ2;buB)uZb->&sE8leaaI-ou;3wm=^bp}Q!iBx<k7UP
zzP5UA+hoM6!(bSFM3eR9_~zz;f@%YbK_Vx;tB@@=A%<RF$9=Se52LFwSv99S#WNgQ
zXCoW-=J}c6WI|F;E(`KC^(8)Lf=)!Q37p(iuJWVx(m;m)&X|IUyYXnGGX3!5rFsGN
z<a-?@RQJs&3P-OC7}w{<`_-pqkdH^LS1s|sapoNAxtwXQw}>F<jMIqJR|~x+L}#a*
zsOdOi<SEXaTFadGFH!bJ99((cwAm47pW}yN7$xPo_ndKZ%-Axz&@tD43XZnb^^-3-
zavTT;GM)Wc%qrftv4r;|P&FL+ot>W+yo1{eFn<2LngOfOsrj5Aos8}xbXwEP#jDu1
zC&%k2#XUbErXo1SS$B<p8}I%p>L?RGadR>WE1y}Sz7}Lk(>tLfMdGL#cT%fZzq-<X
zu)Ju}Bw%Z|k_k#HN*~2$1+TM?@YT`Vz9T~^OY0Fw7<6jJx|%u$TQ(+W6EdF;Lc;#9
zIZcfCc(+`{-2@=Y%95w2$Qxt=9OohPak@;Ib$JX$ARIW(czIUuNp|m<E#H%D-^x7h
zMC;PxLA-6)(w#wo&%bmEEX)%8JEy|JwN0SJg57TB?)K6bzT3rFjep;}UA(p#cYDd%
z?n2?i^CcAkTo4cK_%+Mx7iWiiCvV>$?VTO(!OuUmzgzf^FX|7OU6=Up^^Iur#p>1i
zRcG^hnHIDRxLL+Z9LrpF@>Msv`cS`}a*PK2SzleHf8me&@A`UU{Xd#3jkT4=YO}f8
z`j1AdvC`c5kD&2eAYcq+IShhF!*Lh)JNbibjnXW|U2@rzNnUCDYZ!Zh$?5JQ{F^d4
zsc@`re2M9skTIffV3#U62VwUng3s&lF@Ce33?3+1UQpSu2CYV8bE(y6HiEYyD5!78
zQNJHuhT~4KeFGoVZ`23>8TWzb)USuTUMTBJ2r76L^^<rM{8xCDTn5`e;Xn0D`mEmk
z&zoKtqh8zF6?36=o+sns6-Ib<B207EBbk28wK7DuG_1Izk_H_ozr@{ci2dKO%iK|X
zMS8@?deEvj>Zn6t8P36j>hkU=xC`0(*@>~H!sQs#*<fKaw0?{vzH7nXlQ9KeCYK}h
zo<tKyI0|k@qruDN<sJ`RPlh+k0CO2eTdupQGQL09zc@X6b#%J_SKuw*uot=ay9I|?
zq1#E>y77Ga`4-}=RKH}Z#Ty)A+iXv51FO=D=#gpbf84j=n>9OvX937>$Kg*fEReys
zK@*0+;T{bLfdH%~yP|*6u}kb>X)KjJN=p<iI&k85ujBiuvxIUes;nKh5b^Iok4v1R
zPXiG?t9tHCs`!H-JFZHk^OW61<V`yYKah#hfvO<VQh6E-N4Il9jAR-&%Ay)nk^%cu
zSADgWhM31GrTkl&=w;z-TM`MXYLW;`HKH{Um{Ex!jczDe1St<S`nyVYz-$|Q`robS
zCNxG5{U>BqFI?ith=!#!cX#1~$hA05MLj3#`CW1seZ;_KxcndBP~TyFFyJR0s(4TN
z@%r&#Jfhlu8o@@m3<t9zC*_J!oY&EWg<1L{M4FvE61Rq8LM$}buq%P2ym|eI13F;V
zyeQfvWB!;FFcw=yG1vig8)IQCWrh{oLkaL+O1kH8Yl7A`WrV`$>}+!@D!Y$K`VKH|
zZ*(jCzhP=l@Qs`e5=%`+*-&mi&T#l+XdD)Ti@k{8L?0Jw+=q_n&noDW?0G%lh(wQ~
z7-AImg<rc&i`2g%=s*>nBx0_Qc75C_ImceqONI}a_cFPn2e0W6Z=8}`+`qv#Q*J<}
zk3Tk^zN<|*XXfA4NsV)ZEWP(k;A6V#E-875WNL{#{yLcjq`FuZezodj{Z3b56hO0_
z+z!+Iq|1PUo#3m=)Pq#Dx0qRkOg@5~oFBB`y?=FdxX*6cZ}#>NDh7d67mCW#t7L<(
z%?pRrOcZF6;S?nzj6o}UQO`t<UFqnHI>t!-0;5T5l<-6st?eB$k@zixfcA}=^F;$b
zWcY>Maf!HRZg(UPJ)@!=g)C0CPfw2iZs9tU!|{#5ubvvIend`<yT`Ado;=GN25i~<
z_oDyb#c6apnAQMLtp9H`R~t=N|6gmZ|Ed4~ssI0}|4-8Y-|d}U9PS;p&kkO{{~==m
zaKFvk04%*0Q!B(__blp+?@?bZt=zulxV}u1ssRH8{bz_UNv5`PQK?u=U4+!tYsKTN
z&_%<+fJ;)cCI~=v8hIOH)Vqv^rvn=neHe&f8LRzk($rWF=)_l{tSr${pR0oDIB4aY
z@39QR6{rV5{NH?+4C7yt{%8heSYgRp4gjU^tk>#16*x_l?93cmN64~T$yxemRc^H^
zV2;5uyzlwLjBM8*iv3{0USV4Xt-7SQRoXT+-DtMW${p3t<fvfi{s9HmXEOo#op~e#
zjCaRe`)$q&kRM>_O)-#H3%6wd9|l9}EB^KNvoa>I=h0yw)J8p3L%h#8DPk~-#L&);
zIv0VEE=}M$zLEI)S^!d?E)U2|-<qg3e*c(>`noH?kD0EoI|G(gi#^lc0WjZMN*D^W
zqOw<r8QQ`wDP{fzep|c;0IV3J?1lQtj`EoAdDjZP))hIB48Ck*W%li0hSSO!B?_X(
z^k|qRReQTgE}UOs+>_P9nXo%wnHEGV&4v1`+g&KNe$5H)6qq6qhTF?Zb<AoM+$r)^
zRbuTK-56L5RD4jWYR#(G=3A%&|5R%_*t9~P=jiWQFfkSMnr7=dSBg%f994N23J0;G
zVc6-TXqe2|RKrMrsGx8;UY2=6u1kTZ&a5*NIoO0je0ziV_P~>w1sI@}bfI{%YAT))
z4Y;J&+Ct^=!SSnuGny@^uoeEbwtsqlQJoNt`Qw7lvmY3B2Iz9+NZOKrek6Yay2@Fm
zYk%_%PXv%N)t?`YeK<INcky=&+k3x%(Wb>Yz^juXN%o)@6&RQQ))aHfiaNsA!7z;q
z98lnAKBQm8_08E_;gsWM0ZYgKPlu=MoKy_-Oc<XKG+7FgfoaLLD^)vJ5b->DdW!HQ
z&s8|8^Nv#yOZqy`6>-6PD$<|FpssXIK*i)IP%J()?s0+)S%L$kfpM4i?s-tbMoZ1=
zqNmkfhvRFv{A2ZL5CZUgt@?xmrK9k&8=d@C!(9Tg{$_*SJ`9qPF&YNvEK1HRbAP1-
ztW9l=QYt8C&<t`BH#We|Fu5LuPn7@wILJp(F7%i^xgR%0lxrUgB<yw{*T4VpsQy<R
z0rv<9T*IXZfZmJ+eHS7}ADHfe`bu1|@44;(k9%P1n>km_!qivDg6FE3<I&?A#~WK|
zEv{1K{5$!VjTeS^Jecx2Sa8_Bwr>uQ8*C4DxsLHCYrk|#q!@G5oeuJyImOc0xZ^bw
zh)qkbxBYsG9HU~D2C7-VRefEh!ZDdF+OyGxmr2U$N~V~<>q==caU@nLp>TLB3F;ZO
z!?zNnd{-`snAVsKji{&VZC#fmpIms#YHmVHah70<(>`A!ZBi>`Z+(|W<4$545;<-b
z4Wp(tQ5Y^&oz0BhRVyn+K1GT2DMF8yBs)U-c`T{HAcF=nuxBxF9fg>FH7)N-1ll0(
zNhRYy1llNnF$cqHVYve_gVJ>lWP8_%!?&oKcSW@z{~5sNtzi?7Yv}Qrj3*mYY?z)n
z$P#^wEYkW%yDz<dynM-41NwU_SYBS<wrawsabG=@tkhQ~S$~Gnbvuo^wBBudPP=Gc
z>bE&b5K)``J?rFKlSfclQ+@p<yZ3#E`9k|Rgrmmg-2`&`WR`>+3z~E3wRdE1aeL^p
z3DyI<3LtPTdJaiV%Jw8?CnSyb?!*p?JSQ~V4P?{;nLcxLUv<;z<-$Uqc+Hv|%v)34
zx-}WQ@sy~roPFZdu-)x=je1pKY-`k1AHxv0lVhP%y^OI93)iI60;f%8{N2`boQRiP
zwz8jeL4KUQ|5<YPgEj)6k1v@uZt2>mD#An+T~@UcupIaj+->RjQuvOBS69i9ZhY}>
z{o|E!^%lC>sId_o|0Hug+Kw>depKx%%37*JIAc!{*W!f$@!%^gK3vMxq|$>^3kl_y
zBiw=HB$Wl&w&`dR_|~!czI05@TlqCX?IN$@)F0fBBA7ylL@4*TpG7F+A`r80J{4J~
ziRs@%NWS+YKeUX&+e6kiG~a>u@2XlG3S)vSDQo<QQq*g|axCCZQqiMJ?;58|Bi$~e
zCyD^^tB7cnWQ6;P^{4ZF!73sl?D4y!!-MmR!CRdAnZCEaot<X2h==_B^nLpsj`O;`
z*FHP_n{+&T_5K1r*grjbe=NPn+JD<SZy)^d4t_a!U7cdSnkGCm&QRA>GqxwllQvf2
zo3UGcsnpJ9|72pz7s{V7hXE9Ho&0`OTvXW0KxBygoEgc)qdyfRnaGyr%t*wApfWJ%
zo8h-&F}`gn!D?i;CuKQ>NBE0mIt5Jfv$LG3cKT7Q$2gNy^B&^_PG&wnxA`2G0n6)}
zHT8;pxSt6>w5sC`Kp*|;ll)5gdGi5rhiXUDyr5ep#X`!mmSB#p?zq<vGGSI0r;t90
zTpbKebdn({#&kjcpu*~F70pz}Jz?lE&1TxakfDf=ug2*p={atG_22}<ohYP;d@y81
zim;Elv68De9ATa;)jl&vzk6P73d4`>nqQ#jPV`wY9xyCT*fa{OgXA;!Dn}G$^saiP
z`LY95ENZHi)t=eLY-j?mnrV!f0V&_p#1w*K@w@J`H3LxbzNUbR-ya3ke5E$Z%ZVY*
z^|y~2rSAv-Xt?TqI1Brg`08=k(a+r;<ftznHHLn6&JOXN{O)n9828ki$BzSr&(Mng
z;O9pH3%gwjYp>gV9H{<-0@Z)W12xI;mgNof9K22Dg<Z?9kkVZ~YRdRdEa!}gNb-Q4
zVOTN9j=rnXnG!S*ttSC10ONsRUkdb6tu&XTflZ%NTRXEz=QrT(fq5Wm#td_G3R?uI
zC>fyY7XnCi=m8BzDTcKJBLz_?>W|DjI|@O^5RwD}#7w06?Ml?S>KK4Mck^Ay5!&1b
zpB?ELs4vU?#fCg`g>qpGYd8r{vY@h$MJ8_5r6b;pxR3YE$lr~hjk1!3Auq_bT<m$G
z`(ozS3k(3BD?Qu6zPzhNGGD0;2U5EE!|b=X|EJw=*{#fei?${AJ0p+;1_0lFXFAM#
zzlC@cAVZnYMyvYh%1l8o&O=~T0Z`L0j?!9y8Gmu3$$T-4Z(`JkL>Ln$A;2se-RNF+
zn0o6Cscp*LH+z;K0Hb+18t_Vtfepq(3Epn=CWb#45%j0u{eH=9@0Z?hXQTVMJN)O?
zeu-P_2zBMPdspn+mg=_fnyVD+?77{2gz<SPqsB=j6bw`>|B_;uGwl56w)Y!Tj<cOo
z11@)3vr2Jb0Pt*Bw!@dWK_O<8H1`Olt>JYSra%>DKk_f1cc-*1q2x{KF)sub5Z^uO
z>GV6^F_FIoqwD+lIMi2#ipgq%H~<AM{k$8i3^?WX)nn5MOc0)3^>mrLpgiu)SrM8f
z!j7jFUB~4_)^XVbHnvXIIPXY}pAhY6Pg{5_))yE2${WBgskm8|yg+pHDQs55xgkTj
z^qL&u?lz(vfj?ei+B8#RPZh49*8r_TAEOM~Fjd5lm}qhwIqbn07&C8N$_gZ$AJ66-
zf(OthAZh?276X~c6JW;WP;VJN1McQQyw&Wt!}|#T?nWWHH`8mK+JiD%0J#2)JJ5e9
zxdW!XIN)x+9Wa|c?GD%;asq|Z9)KB9ew(C`!uf6Zks|_9N-aV7Nu?7z^ixB`<bP*w
zRrYS(q9j2^1&%AcjHRbMtTG*SUcMZRhMoBOx;+Y}zEQ|lKjodmeM;Oa)UX^uIZp$D
z?CK#Zr|G<Jmw`x7|C?UyIOvqSKwU~~0=6r+2lj(XAUb+>z;0MpR2I^rZftT-xOlu+
z#v5tKk1L%rSk-U&ZP9HqT0MPA;8ip&OWwX`ra;5+(z&ls#c}k=C3wtxi+asd;2lxn
z%)J;31xvRVJ;i#n9)8Xvnc+S4t;kvVvRSQJZeYe2yBd=F5?Hjgo=$i}@n^z6{5qs4
zH}0XI-&{ZMY?dqLFbqs)fzDw@@_nSlAyLnQe`A^#lbw80@;c@n7t^x_PmF~X$E!wt
zsRF7LW^G5C<>lby^x`0haaEXg23Ri<@@2#sgvmo55Iye1$zM{DeVg$Y4Ts4vm0&b|
zEWV)~dTx1|VMggF>>xaol(;Dn3<?3apn1a?+kgLh4~rFwRCV|?JA6>iM_H>O!`O96
z$fe-o3D(SCbOYI?$S~<A&|pVQ%;#RI4rqzfrcmS@Hud?g;jZWS8W~`&MRRMR>Qn8u
zx2*pJd(d}u+l#Pfio|~!7%eoj+)oCv2(lE?z>gujm*URSIn!|?bM=*VefLzB-Ez*a
zQ)OK>Arv^Cw$JyD586MxJ8S>-@Z@#H^*8WASzflPPwOa`I6WDoQbx6>g_o!L3>Y}o
zyS2%RWBjf_X>#c5*{*^M<Y`1EIL@QhY&^|gr}IVa?(^T*_D-`i&!^m)^YJC-YQGwN
zQ5&@M$K9Z%DL3eN+#SVmzotbSA&O1AyZ2=Kv-mmpXYcH6@9(fb&eqI#OVxfY7Vg*9
z^c0ZY;@2^j4F94wc=_LFgO{IvgU^iMQSal0{P8K+uQxB5pBna2L6ENGhR>%+8wC^f
zRjjkaVG6>xeLkR8bbo%Q(!q61?CPSQXAIMCdzt7PeKL8LCN(rpvXKmQw9#D@^^53>
zDv&<~*O@?PdX~R4a$EWXklWHc$jz{4Y0?sbM#8L{!}65XLfm*)$q-)&FckvTiTV=w
zpjd&CK=1RCpMMcbR{QKk`J3G5E*&pEH68yvPDb)AF+-_}jDK5kZUW)TDSt7PtHjNJ
zPa0jGibnO5LO+K_C%=q|f$>oR{h-2b{EP9dK;fd<*7(4AN##I#3Ea)G*#nKsI*AuM
zXfjkZAFJQo7|k!8e7ynHj|djDsNTd;UVL!<3UTq@V#O6tbUreYrCoN2bszgq<Oo6B
zUjkloB?!cdE>3glqr~=FyW5$#y<9ch=&cU0VN=vAl2ENPked#XD{}LD)nWe~E*HC6
z*66oU(u+pJ2PENfH_F<f<s;U2NiVsH`cX1YUBkK&#AUQ9_eACG5ZvldEB2$*#3XUB
z%(#Eb`3?7nNt&6*?6-(`DYbzHSNOY?jeioO_9ff;PLXD(U0CrUQ*tkpkuH;UVNzx*
z`_Ib~L*!Jt$S&hJ?vK;cJ}$@oYG+5hY{H`CPcJjAmYTX^pj%ea8#}pBkP*$PMgZ%q
z<W0D`FcrQkGi*zB!d$I1FTd?vGy7}3huPaqd+`+8)BKH4+k6tV@JnK9BW8+=+>`h&
zZ(6x`zHm);H!H89emV6lWSX7J<pNlza*CZPg2Sc9y|~W=ITt!^`}zg9Sy^PuQ;5SS
zOc@n8KG9{%<A_up0cQFn?>r^b*KdQ)IKq$UkzUl|9^z~Lf=DUsahkEpg53?n!GIkL
zS&`CdgTszYmRK@_a7wOdcIH?+0SQd!I=C3aC&Jf@lLdbtgZu@Q@@klb9W(`t63V_5
zv+A}LqKff_o28kK9S&{e-}JRoxlkw@t#VOvx>ig@;*;JUvE<*y_83`TzJ%>LW7JL9
z9-WlN*q%&RcYDegUUf?%{>Q)fpVIYTnJT6&{aT{_Yjb10*|O`uHaFH*8-Lb+{X^D&
zov!rjAGU7m_lM_)Xk>FMw(8HbwOWI`3Pf4PERJg}a9K5HX@B1zCfzRipQ}0*AGu2#
zy%CA`8hR((B$%by=!TUamIL*Sl#@vk?IqcM|F@vuf=uh;lf~euj;KPQVYV3emF<&Y
z2G6Lxoj|}dxh&LucrE5Sw-ufgdaSwPeFOs#L<wligo0nyDK?`5YsIR1UAS%4k6R0Y
zQ#z!?7I}BXGt(wl9epxHiMv;PUXT0n$XITp7Bpa~^v{znQ|SDUTdXL0IFTx{tF#sw
zAghz9vVpRjTky{|7XlHfqn|&IrJkLok(iMd`{4b=@O~2TIHM!N`$@vH$8HpkoiqAR
zJo-<@mZ9K;MO~=oaM%e4=;vEddWS_xxoN5^Y>!Sk>DwliZZgx=F3uA?!&;Y=Xi~Sp
zkFfG27PO?Vsrrkrn2vzrsT`5t)XiJtNvt=y1g?UXTjQ31hFl71$I#c+GPfm?)s(pP
zf-r1(YuXeQ*d&A*S<UnN`zvR1e@Ti({ZqC|d;9ZuBHSShz{Nfjt6Ek~&(K$cM-z$5
zhk3#amMUI^bQ9K=D&nK4lGefne6q@h%4kfA-rfL6T{>hlwc{hD{87j&cehD58`x)*
zJcp+I!dW*e;pF8b6|Bvdj#Q3|{wS<bin(3GO_$uEdP(!YhLyZz?ePlaEc!|C=;H?o
z|5y5$q7E?n9_|s8{s>w;%;eU>mc)^XuUZwWxRR9I-B|v>tSLJJMgj9HuZb~p2wll3
zm};a?{LhcDqjK5&K_3&dq8?94q+r)j&cnstCD6J@)53qdNis;4fPlMr4bu!kv+m)|
zw7?k-P^>^yV=u})2MD{TDl@-CW|?=TzN>6$G~LA-|ATB$rqy!4u%b{fw`z5w;+}h9
z?=EDSIHB36wA0cnncGzyS8vfXeG)VGfdWjT|ABcXtq({d$KNXUtFO(g8S}t!sjivL
zwZGbt;)Q7=eD=)KbkQE$;n1=Gp4Eq|qv?e@b<!lFMVYki9l;ylpPZc@9UY#$Z66&T
zA6`i2FUweD@Mg<R)+}ps_53i9nIT@YQodqQSEAIEcXKPBy2;#CeKI5uVRM3GqvD&8
zq`72Pc(0C&O`^eeuAE5czAd`>iOO+nGcJGQI)S?ZKM_raQM#2Qv6vF&6#UB&ps}1g
zHQO@Op?J&CseUp9u{>-_){CrYJd)W35tn92KIy1HSE526qWLPJ3Ne^hKd&U1%VYw<
zOiJ$x0BBn~10-eru1@o`Tjtx685zCWlK8s9=-C#p<*-M#6Sn%;-a7S#xrOpWV=Ds^
zI`Whur!=I)5HhIL^<@J`Ar~@aeyvUB8ncj9cs~!aa;~W9$tt{m3bG2Gnyz-gM7nZu
zITyN`C<Xs`-S-L$Hp@ZbdJ#F}VMpMVKY_I$cC^6nl#l`#agY7}ed%Ge|20rH)s|Wu
zisVv@h*YHKZo;9+Todw9iCfEXR$DwPBf_yz<@8e-bZ3z46n1^GUH&_=-Z>_Qg*k2N
zua-WRWqwKugs%jzA~l|l1|vs*!?HY$2=uEzN97m4g6*q_(HH4oyOod$q*7^gNAcj3
z^L{rQ1#+5{d6HgGj`j%mHT`1|DHgZ=-IFBkfk}Xq{9g?Z)H%~Clf#ZiL8aoYg9Ea=
zO{i!Zv*^28f^_qEadz{k=f&aum%@wZav;1!t}<DFX+-Ydz;~meyMypi4(H8?2b!A0
z`#fq^-zd%t10YjXUpTCn#$V(g>~;?GH2~x?->FG%{>-<W=sSV;PD_?3oc?|Zb57iM
z9`q`?Z$8hp=o5KvhOqMqw!Axve((LV{XfFt)$Q`$_dhJ}bAHrm96^fxKUUY))|$5e
zN2}RbU;oqp;}7Wn(OeB)Mg1hk1jiBd7zUL~`n{g=@AaD`x%tnVUKn@b-1Vw|WFL_I
zQ~2uOhc+Cmj30>oUDg*Q*Ce~Vr2Yig-2|qjKalo;cj48CXta8DZ82dpu2Z$Bh^JOx
zBlV)ITeI1u-R`y4)}I9-+Lu38LIHz64uX$cwi!cJ6B-a4X||w@kRl!PHZ27W761_t
z6~aB;wwhaJAT=TSWGnMc3%<DxyVqN`^@IH*<6p=#b4U6h?{z6t=4oG;lGGO|$6u`S
zZ{LOKcL0(H;70QEy-p{f+UQNy^n85jesnzUx*xrcKe`{CL^pW)%#ZdiQ*PBTKWaH2
z9RrcNABFesN9WP#h)?nk)m{ITw^!r7y;}<9yTPvt+rBNN9T8YZ4DPG~n?V%g3&D;+
z_Dbv1R^FZk&V&l-7ig-}F(xM{RDA$MS_?9tT~>`8k&3w6J*3|@0BjG^N?89`yR3iY
z(6RE<0fsf-=>Wh-Os7xe<e7%$f2#+T9YPvvVVa>}jGuA`G8qR*ROttxKe0gcK~7`*
z36?1r9gT+VXb`WoDvf(_<u+=K`%V4hBR{R0T6G5L_sLKq9VaWikjvwuH}1|r1%xSq
zqfu|r)Hnio26oUnz1tZN#1uheK+4#%I=6~C-&&o;7X!Fk;WEU}0UUgMeB3_&?){rL
zM+cQwZKc+zHAMm@F^W*9)HrZ(1hL}eYcvZ#mbvG&NJWJ&2<Cj)`;R&7AQ#PjMw;?w
zn9x1VXucD=f7FZs!60nSebWGDW>Qnk3a+Y-2Th*)Y%#bVf}9Gokt+17_ix*KM~81u
z4qms9PWN7Wm?(l5&^H12Gq?<+`w@C=gx&Tqx~?n|&}ngrV8@S`_4|_&^B$L;_%sLy
zhTbeS^JhGB|LtVl7=w<%(=KH!OFQjU1nf=%IzhBp6~E#O;9~Weoc_Yp<N_>n8r0JQ
zK&_w@lH}K=y|H?vF<oNO=TY3l;4)DP#bD9Ip*sLWOGhEFT4yRPoC=M%Wi`DlX!^Wq
zGUY6G<347|yl%EO4Ga^?Q~nHo*Vj%hXqH1KJ;4rA5Wu6rFZ)-#*c>Gu_E2;t&o55T
z4#Y@43LYgea|D2Q=H&Bd%zOMK^VX-*ocXxS)U$K<g#R0K45Hm_H1w^a7L=!L`96;7
zb0;c3s&MX+iU?NR3vY-5|FN>jeg6r|p|5a>nXe{?Gdo~g`Cwa*0Ncz5+k6DrMm|{m
zRo>akOY<;Dufox!yav%W0{DG>NU&?V0XvE4wgm@HK?4<=RV4L?f$!TlE-|Aa1{
z86efPuRe~_w0#YtKmL~9j<2t~B$am2yNmt;>AhAi*u5k`DcR~NHXk{~*1V^<LQ~Xu
zzlBR=BI29Qs#-Ix3Wur%{P&ztZ-UGsf928BY|Vd~Vgn}MbiPX>7?AsPu^eo%QtnVu
zm$!Wrv$_ns&6@hO0*x$c;F&|<ob6m13-!q^Lz1DUM{eFh@ErkPEDJ;)Bzdb9LhWQ)
z;iuG0&(W3(^)?d7PL2L*+&7un^j!11(a1IaTXoD)e&M3~B^o9OMTl4S_qJ5U-zw%)
zzJol3k5#LEG&&%s@@Ytvz0lDFm!(tB_UH4CGQE*aOi$-9!9l^27BK`P<yA?&Cm1!1
zebvkWTmGZ*Po!hxN7GEQ0-FiEC#VVYl7`sKhuG8*n-d|LQ@a(G%2A^>YC7Y!^2clG
z@mf=?U?m^oiiWr{c?B)c3R<<%iuCO_aV2b0ElCHs)E3Y``s25@w^s$om5Du?*keWX
zD3Dj>gqk>^N1mkiL@S`@G-0O-+7C;es0r6`DG;7>g)zjZ*Vn+)l+WpaZ1^0Ez+ct$
z(r6hXPBT}B!teuAti<chBW|V<eAF>rZ-WXCxU<u!3S_a%Y<5nIt(vl4aKHl%gUe*N
z!cO`r=hXvk*zMb>3sj_c&XAiy`wF{hRKuEWyOUqnf<X^?V`h!_o^?NXKD+Wn#1Xe8
z0=cg}RBKmu;9ZeLy0qm@jcLXSYX=R21zp+Ve_4GbMiG}aLhHXbMj4$UnpNh3(SJh{
zWejEsb!rmb^qu)fkm%+-5PDS$6i#QvXzn~bW9qAsToPWsbnpsr9;D@c214DO2VQ-Y
zy7?LKnmdnK@QRn!gBPz8X4Q#5&id%6q2RmyPT*zn`f)Mywn&Cx-y>uYc*REeu7>|L
z?q4;{KAota^sE;}*}C90Q8b3-V`EqVPKF>&tuI^+y7$-Jcu<JP`w1pby-eE6`|M+C
zu!1tIcu)Q0c=hnZMXp;<>#8&1tU!w?+<LIJ`8wap@$2c#=GeolnbNZd=9J^v(}Zsx
z;n~x$JbPMydiE&K9zc1nXOG;&jB8J;oNJHTwDPV!NCAdx56;dyi10y%q0wjAhhrpg
zlb>{Ec*>^fXxE-OO^3WCW}c46wTGwS9nyB}nQEYnV~-dn<Ji;5_w4DI4?@zh$Mn!K
z{_}n8an~^rNiGVVncQ_~g<4UcW@im|osQ$LV;B)}Qe^zPZx}aSCx|{qediwS2*(-w
zBJDWtJYLvu+zu<=4wp=7&|%H$kaRkMZ!#^qcUKCLFWr7R6<kscVNJaOmTS+v4txJG
zhuw1RnbV9kmCi6B%e7}t6S{xYj4aol`E44x_H<@(?V(wju06)2d>5K>)hOxPLwwZm
z?O6rrj<*T>mrRF8)9YrY85jBWs5y_zOnREzP<!-DnDjH-BL?$YQ2r3kJ&UgWp?vIF
zM4x|r2k$<RkA50nM>X`^?+14gx#L40OsxYgE@5nr3xGcOoE0bimqDdB#%eU(<PPh@
z-6rFkTMRM?^7$7br^VyDm?3|mn(sqdfFuv9c1ap2sB)2BpQI@{rt92OhHMPQ*WEq#
z>ncM6`F=b~)US9y#zao8g8?yw(#|{;xdRAh9v(dNoN=BTPu`@b@ZF&e_xbLKiPnNC
zs9>V^jwzr<M8326!~B2n=QH}e_;BOLO70ftd%biD++I3UxV?xuD8GhI`+9wqUchUC
z{aN-!+i_hnpUWrCtU1x!#XmE8b)PV`tZT=Vk7)AvSuPVf@<sVgG29h+26SAT>(bsb
zjPN~_Fp2BI!;9VEV?wkk0(%jK{Tu(R;XloPPRq@G{uoM`GvsvFNx**X6pJ)GEiwmt
zRu=)aC#H`;;hyCB2sAZ+@odo~AA#cDQ~C%9+#5auz9nUS1XerNL%h2S+B?8_)py>k
zHK68{fC`_ov&Ihg4tVU2w<oH|-d)%4*uU|vPxFF}3!3r;o3DuM3pRZ|pBL=a&YWJb
zVNS*+IL!+-+eX|+7h2=JXWb8;&*t+yD2+6w70jo7!FDlP{ssGwjnUa(u)P@lHzd)y
zy<o5U&io@tbafsGo$jU8i_zS9<i51xjX1v-Y=cl&=YdxrrLKMkyynhh7QD{#f=#p1
zFW6Jt;Wd<rFW4Q=6L!b<gxzu9qi>~5B_5;;?c($v-I@74nx9_m_h>&>?k6bmS~b(}
z(f9zslPT1#7X~0~_0vHDx(Ng3nts}yV|Djg@Bwx|>e`yhyN3M^#9rInvd#~XB-vW%
ze%GAbshR22n%t?C>9jJr(+YPodR;WUuPW~vz!RDvncI&l7tO-f_H)pM|ApsW3mUIv
zL9Z0HZt%3Th;tTg#!$Mt#|2FD0$mkae0H+9nE}}93m>q?l)wOa7b|%)Yt06XXZE=B
zfR{rn=R8;}$DKqL9W~G9!M;8>G{~aeoppfbbER4O_Xw6TrAv*7W1wIbpG%U@fBLf6
z!v^$H9k!ndExxRxO8BqV)XX+-rt<3Xs>heb%vVgclbI^9@>!;8AhjF*rZ}xMx3@Q{
z|H6OPt21IZGvy~nE>3oZ=iI917ZcQ(jmxKYvr{<z&g=iF{BOtM==Kffm&RI=<+8v{
z$o~dA)o}8^wOT8G=70Nx=6`z^b%sgsHtF{x_;i53)o<vxf0MLt$9oswk>GG~czSZ~
z&Hm<Xvarncf6};*YED{1Pt6*oIqfDlob(+JB`-?uX*i6qKusET9|BP(h2E|fg2mv!
z#@#pqzM1rTaWwo3pf#mZG`DhdU&@8u49EQuU}_L0{rdGVir}whXpN1R7clW?`4n~5
z)8%Fy4AR^9den`sWiTsz_L1up_L08>_KdoTlpXZ&{~aJwTAKGuS-0Dbh5HzQ=mS6h
zCIa1CJpsjl3lY_6H1$}H$t|t&k?XbfjR&+ixQ(ZS&{PoYxf%hS+}84n(;s&+sYB&9
zuJWS|Uo&`5*@(Orh@|6IQ6J8V$*YeeTAG=3Yv`_5XhveGw9Z154<{om4|35yJOXUq
z=6IGr(|%bS2JGY<cT65t=8#x@BTDgDSMIC9<pY<2M$eaf$=~Sse1zlkkEM6<@@j2m
z5&!uoo_(W!pDN@vNV;L46CaBX+#J~SKg|Ah;wNSogoYfW0>8vd)%2m)i-4l8h|7#_
z$7ygIevE+Y0XIoHon>-Tv<h<R+8#Qo@EZ{I6gzA09UYzi4a*xHzdyP-1QCf_91^cn
zHIUw)AG}tiy)Xdm7<q@>IEwZ?D^rT<F|}{|o|IffdeV!6aT;BZyIgxC#q7;(IlcX(
zy|V)l*G|qa+L8lRfU&D^^n!-hqG7#pU83#iE~()abX^W^JNXz5+t+aa0sYroYg2U_
z#GnD!o0}_*7n_^w@bAWp#zwT%XqJJ}k8S`!TfAT`0i4ywdTUMQ$!kQ<8;vR8@XHhc
zdQo5BXtdTg*48&y)?Tb^tigm<rvL=^+7RKa*IOH#E3Fr68_kW))fbx$fU{8s4kA<I
zrwL$IS}QNsR#!Jx)|%@pa$QU}$Ke1d32#gH;K8~4g^yRWwnFC+MG|5<bYO`S!%Yi%
zcARwTCTad6jgR-WV6b`JHa5(Ts3sU%Wd}8l6RnX+_ZO!#u8Y<FdxDQ`2mrUU_w#;Y
zL%Cs-TZ$dwhVI%-Y#Uhp0AdNdg3H>0z&eIpF%g8ReuOegHzp<s*H=BN2Y0s-`9Nsw
z3TMF-`H+F&n7;*XJxUET&qrkYd`HmtCaP7J6y2O%v!BK5MLIxVW=cJ*PMK&m7;MJ4
zU)8EJHa_s;YTLp&-jIikdBI_BDirPyBV@4#67O!~t6P(mBZ&gb6X=VP?shTIo_r-a
zhkp7&;Sh9VQQ$BYdjk7_oAO}MZVcSB?fCHbC{v`SM_By+@w@ihi}vY%s|kyWbP<gV
z6MX0=y%_E@x&Qitqw@qO>zDe(v&>20N#1EUYQc;83;MIsT5YYsscw~Jb|#z_al6Lm
zDgaqqSzB+uc+uLN1<)#Tmo?x`FPdw>@EVPk)+)ehO$%q8+3)7&W@8PewZ76^S#4>q
zso6@q7x%|$8;eu!P%Mhv6Zj;zX-ghh<FlIC!0ySaoMZ+F+?lEgnn$;A!h)6sgnpaU
zu{CSQ#tak)EWJ4$3F298!`a%XZ?3Jp*u)KAZEe8qG#%j(m=)OX&3a>Fb!}yRqqWg^
z(OOw+O-DEcXAK37)y)^18_gH%8=C;BiFCX&C72B&zqQScjTR7dv(<!S(F8baF8OIn
zf<3XnLqHy)k??=9_KhdcnS0EB_=~sg$`oyzc3YUOXsXC$50zSU_tW)2Qo%z(g^tAa
zP7v_YXOh(9M1rM&pGiilxMz);STt?lFGfph;GDw9xY<}H;AP0RBC{miDSTbR4Fjy5
ze#RS4k~)`25qghaZ+#)KhY$%~$<xt$^I12T6^W%PxGV=JA$$VotDD?}!+3Ptv$P-h
z6t$m_e`@^?7F_LBVGjF?QSh8T?7_WCVgSvRQ~<o^_JbYjLSFn-Tof;~Sga`Q-$cPb
z(w~Q;N^`kYt<g`dYPB95_Ss58O=`TYa%yPdL2#EKnS^N!$_{8`!{k2hg(%g5jux6c
z<SIgp!WV$#-cFNC{EouQxEqfi>SV||eSKPCD_8Yp@Mb)uO2+5}cNN*}CzMG*$*6`=
z3QL{nW*9{rq+tFrW?-p(OS?h6UdMl6&e!qitEW=R0AI5xWs}%Xv$Q^iL8GV|mC}rw
zt#xP&f7hjmU)a9==Ir#ieRy(FiTfXcvil=(BLBB>zj4hmi0Z77T@H&nM}UGpkO4mD
z58(q!-pX|t`s3bZG_02&7t4$xl=c9PeVC<OjXDhsxE$j?qUrwHo%TC}h7BdY#Jv{{
zZlkn){@-U8-@U;d*l27t8*tAwo15!vs~c;rQd*q=19a3LH1PgxtZaa0xZZlvI#_B=
z3jye6sabEXHCNZ0jg@BOV5vDR2+RakZ?12wzkrK#bruLKGzpL%8=x|Syfy;}5FMX4
z>mY%?SZRTpyD<ZZHJZix`Z`FB8|&+9Gk^eT?s=<@<g(FR-Ixgk3J)NJkW{_`|DOTG
zCd^``zCwSR&5hORLHyjLS%B#BVs&-35iLzinLlGfvHJRp6}DIi-l27(KCtb$<5!FB
zYrgz{LPa`xd(eIlIuf2le0jJbt!@}ZKaY{6r{v{`s@pyOauS|c!=?mo`cV)5+f>Xh
zhU5Mf-VAXcE@ELFlV(R*;}ZB!Jc>U?0rRxFHVNEIP2c2I4uYayuc6dhwoc#IS}V3p
z6B+(0Tq6LxF>GYqYQpCci(t|H0F+I<?qISYva^hKPu5HO@`vl8)~w1&+k%#uy~W{`
z$-|PcL`%GJ)Lihfto1TDCqoV2;#6>sEJm3b;6y!=2!kRE62SrC0*)@-C}KB47J2$w
z&IpIe7&<g+Qt$7<P5+U~F#{};`K0(EpSTQRL7I!X1|EqO@pT23yd5w@5BTC10!I`q
z?F7v&{Q=_U9)z;{U<og3_06XM4gbNfq5*$Nb&J(D=aDPP>GqvM#CjyG2xg)|(@{om
zqqTFS{tB=x^GEX2sOI#gHln`<fNI3O^x?CFn3iGVy0@oxzoNct@t4KArxGgr^ybQP
zUa-mZi$5ZP!PO6~F2l?2gV^3g?l&}KM^xC#rEc^w>S7O+Lb`F<vn)3cwcr6(`Ov}%
zKiJxseF@vU!51qn^P!pBZN=M-)%}N7Znt%Bw~eYj#|Lk#mOCe0o~PZKJBf#ux80^^
z)D=Wp&gU)TbJ%eDSZk4;j-ybBQz*Gspo5cD*-6nviSqJ*iZ#L@QD)jdi$Fa(6N>T-
zMkS^wW-eA^9ueuK`p_yErIk4t57|#zTJwy%c#kD;p@`d2a7MwUDb)zxC*klWxWjmD
zP_$0=PA~{c{<?H{@}{17mSbMLpfWQ%3bwuzYZ>ls-qSjo$kDq^0ODF>@mVEe3J$~;
zlJQ79<kf?}CBqLXKi<Xk5cf6WrnktdDKN|7*Q~rEh_BIQnC$tL`(<wN9Pfm$=^FSN
zn#QT1j9!S9Du;X@==!919S|fSDAb#Gg4X}^7V(#1RSXpHZ&_s+?OSjiK9NossZhBh
zo$xa$fE%d)ZIsjhDzK9FZCndBH|wC4wBUd1s~a!YO3e%Vb`vz^S)Es3lp0way`$5&
zt%Em+i`CWEI*beo#pdRV^_A7;G+=Np$k>~8&}r7zUo_Vmpx8_!1rr3?6tr%FFu&FS
zrDSDV2*8;(QIua@Yrbe~tjqub{cu4&TVI2H*nF`%osLKl$Sl83f7VvlUQDM>5Cl-I
z*6REU+5{y}KoIXT-4HnOeciQskfb~SW>RU3tA~lJs(<MwT3omvz)4R7akfML33y3g
zuo4Q6M@KE-H^S8cSPpl#Sc~WY=c8)PsporE><LD0Fn>vb>O#Re!XR%!2p<X|Pt1$F
zs`^Rr=6KH(OIUghKZbD^bFbB8b6`sdNIsgP?Tl`V0N*c!*wb6wZ$5{=TFgnYTaB`T
zP_Siqbv1_TirgL$)QB}17&C~5!(^D&gOh}0u5g6OK{`qQFC%;sBVPD5E%$51!2>od
zCge`hPsc;iF~m^Ss~B^KEa9$1!;i?_omEoF=rroo^IUA8{p2U^ZXaKtNYw<5y9uiM
z+KU&fYb%baeQ0c6Ph>#si8omSac}zIl%?zaW=6}MrgzJ)WLuL~QaMT)J&VwI2EueK
zn-24vwhYG_YZxWPK<A;>axL@g`2Z9nl(Q1;!(y25+1F+GfTV`G5R-VMbszm_3fUXy
zWG`UQ$VipL%qj;S+0)aiRjSoWsaC6{TCI6nwHV*FyeaiB#^0-P-0etn0LUxWtG)PG
zzt`&rdxF-Jwtx+$63ePUkL7GIXlW_@j73KnDOeQJkReeDp3Jhl?^y%kRncm4uVlaP
z9tP3yGC`lx3lye6kXIt$Qwda~uzy=1Q3-)66L1TYi-z%4`x;XWA&fRzp)2>cU;Z2?
zyV^0Ozf~s^4TA;k7lGA|>84e{YF0Z!BAmpNpka&rt4cIfl4G^R@KO}qXuPPeu56S5
zuLMBuvff;IvC>#6GuA}m4@4)p7p4HE1;EyNb7P|kGuc?(Y&O<jG&VOjfmfGUk0S}L
z)Eh0hn_w;*0Bg1R0v6Z+w%?i(R7<;Un17l8syXEfe{RsU8qF7*YmK$$$|gwMt80L_
zR<yJ}-L%kT+^Vm&R#!Ix-7Bjv);Cw14Ok_BTA30Q5oBwlwZ68ty4h-MHepJw&5iYS
zUg0#LR*1H<e_lk5DRHGP`{wG}21w&;E6vv0bhFu~r;^py=6Yit*0s`jvD(~dt+bk@
z&>dHrjTVM2HYC5+`A#r)_FDOFUa?4Rnw&|HMmb`&W606smoB@k<ciTZAP@J2T29x&
zwf1g97|!t%!&qkDx6w#hYHp%o&`Ubw?wI`U8LlvMkeu27I_N~{)iAz9yX|H2F;aut
z5_qR_eV-XZ831^Y#52H6c7Whqh}8^CF4q*X(@``~<`|H(2`jj#Aad`w@I}3a^3RQ!
z2Zm)auugmvk5YCusN1HeYk@3Vrn8dS{8H4mm1~?bKl{a@myAKyV{^qyiNkP|3{C5+
zu(u6KXRzPU>*fFh8r_~N%WngNba3!iW=%qbE77oJHpE;R`KzH&@wh+hdVcn-=Vw~a
z&w83a`==@FjU^hk$~M&U+M&?VM1Du6J<zN!W6Gj4h(+iME%W^RpcM>(qT@dLrKCx{
zUiS(6Z{izN=8*L7P@w~OHwmh6Lg%ZYkM3<az;GFm`@`Te!d!6-%7@Eu?bjg@#gE%X
z6pG6n+`F#0xagMRCK-`<r}aO5ROU%wBBcWJURtkQaBjn(+1gwJY+$}A+<bKZCom)g
zi#|O(x5`iE0oKz^C@?@IGFws#t?3IdVk$CH_@E<%@aub1?8dMQR6Y4S@l>}Ow*^19
z+R6BHv#qB96xAx)DvH!uUlqOATEzi5GUwdT>9>NMQ+I|X#^%FgUVw*uMCA{wnC?wR
z=NI*kFv%Uq5D4p#++EY^k2}xW$qkxu(eLS=t-Wy7NxK4W=~Xzox)lr^R8FAQc%+^P
zg@u=_>|Pz`zUEE39XSe)itJ<`rk{r+c3--ck!f%C;1b91G?`Hq=f}osMEMZAfbwMc
zOJN(-s4-FwZykgOtm-7X8^LK$gI+YcO**xJj4fA5N)I8;r4}B0x>Gg1!WEtjFt)xU
zo=(VGbB|8<Y0@3DCH@c-u|^!rcMAh|qu@FDzrcbluN?NrNny+(j4<jxsHNha8-$&Y
zaN5J0NX669nnHYTW#~VRF_GpVieUDaz+b{{$`3l%90m}M5Fi{^j2FC&M4o^SVPg;v
zBL0m63={GPyCQEmLK(2MYtu`NVtY6~C%V3d>EiPVDscU)hdReeq8oSIi?NUaVCTQb
z;fH7i@QVr{K7L^SK-keS+T(^v!W;XBkTMG6ZjeS1F+>&fcXxMJzl<J|erKhFL74z|
zK#0G~z@1>c(Q-H(#aG=ZUEW+qdzVWYPZk0Mq&1DZ8Om((HYKyG5de+=@X^3FQc3b~
zyeh=wko{sL-mZOsIgKdE9H4-AVNXV>h_ZkA5-#p)B`=$u@#bC29WLLs<Lv?VH24iU
zSDL<y0iGrJ?F#NRKHaW=9s`%>xYE9t;`D1w!;DqPyL^tVLkz5Fs2(iu%_tDO6I}!~
z5%t2j$9lMEeqR;Q5#+u)PH&|PF0muIJ>>F%YtOz9>bF2^FKdbl?)f2)&7%gP29G7w
z1tSczm{4ttjoeLv?|U&lvTQ!V{7{}X=I6806wI$9=jS=~zI?oMK<2RIu`lh*NuH2~
z183>3{jkqnuS#~MSiAuUo1^wMuxknkDPvdXM`u15OXc1+^(?QuBYd@X{6)=Uj)7Yl
z-l*QXElpRryi>R*%NOz*;Vc1;W0>@Rs_2snw+(~3#^cNvHy&NO1g}DTj)5&n@oK+|
zx?TI33WUadCpS55wctF5p?jY%z6rYK<9enu11=2x@Azrp5a%nyMx-mlGA&AkvDIC4
z=Pn?hmwOiC!G&Is9HwSr%?5y&rrQiiITJ!)Uv?TCy(@+NvgNWfJ{68fh47_#)SW^q
zl_cYuq5SKhNPX<|Xon=BI~$3T9&9j4<Q&Y=4X=0$eXb|uNMc}-CFdu-PKM>~lWC%q
z?UjkbXgYq+y^_p3>8UQIVYyqW*gh#&nYTz*#IuX+$VFm)pW^I4lhXg_{6F3#9>4qi
zKa6Y40#q*f&&2#c>uc-lEhqm^bFH!VXa1i*X8xa7ARB;Ad_Ibx$1tc|((m<@f3M#p
z$<4n>BA|DB`+q%P>DikFDEoc+Bs=y<`+NJ}9lSX^JUDrM^!N6=v(wk__b=KP-<=(t
ze|LKH+8K93>tIve1@<U=XZznFtKS2*-#&SN{OaJWefp+-cJTJ_{Nmv3+!<z4^PtjN
z#43Wxb%hJzRLya5*f#prwA;=!zS}!*pTqS7_s${Q4rlNARyy8$?Jl&iB@d-;JyEbw
z)FD;Z@0RmTtUMarZ#wES^QIDxL8c&A;b;_JZF84hOsdP}6xq;+2^KLX<P%j&EC(ds
zodb|gF%X5<Vj<*Y@A#nhLpAtO)dkd24hSFLe1i?>smHb#{7@}50tf(kBcOx<H@|lT
zlr~C^U@BT=7voI@VJ<Mn;Y*Gm=MNhPm|((>!<bhHT<pE@f3r$_20Su)X$Qb))|xHr
znkQGMDC}S;(p~hIVZ<*e3ml7pzE49mlX6eX`LFIAO)KXVG@e*0R^+CfqBJZ<h<Gc*
zu3kT;Fl<v7hTYkmSQeJ5yIMtIu|(^RYco+_?<c8Uto10lu>l<14;+Z7f}0<lrS2ye
zVgI&h)K{1u(}qD|Mdl}O@NwGuWIu|#_<<@RJ82JPvd;2*xSChpx7C@~9<ENmhqG+K
zoKWoQUK-UYEsnGsd^_O-!<7-T92=ysJf`o$;Z4g0v@RL{@?!9Am<&LWOUHvj0?S8X
zP<Tern*<q;FkC6_aOgayn-k&pQP7Jp(^wDWDc~&KE@A*}Lg(iPju$b$nb0ng^|up&
zm_rho9I8H%4x!~PB6td35-U1<EXz<^iK!CH)?9|W&nUUfQRJe?vY)q^wU;;ZYA<{D
z-#>v>tLC#PgyoxnJecMLR_8E*&Dfj(CNd0iFc43NfpUq7$(nD=B7Eo4UOWrjvWeQ9
z*TidH)l4jxxVe)NOU$~G&k6_YOgR|k-LxeH9t?2&43S(gm~aRvTM$yC5^GZC;Bql<
zL|rE{odu!hkS<0SClA?~X_<hfJc<dk!wDCDhjzlMX&uH-HtLPqI{dc=|E<D*EB`-x
z@4DVLt|W@CzuQlN$$ZmGa%7n|U7Xt*x2+_eaqKKh>D7IDdh}5uWplM|5hX`nJ=x!W
zkaJ#cKgp>AKoTIqtCVy)EAEahkwBrUPz4l#LKXajb!9_`e_B(kmt;(YWKz=7uykrM
zp^Ibb)Rb5i=UtT2UA5I2%kP1YMyU+78>4b8iP~0eK-$UVC!<t01#OoYX2Rt0)d}Y}
z=SveFwOyU?AXj~9!WC)iDaBG~cbUMLs-T!PY`TA2m}Et(mXp<LLiB}JixW$e%_K>U
zxM`Q_xCA)MsR|A!gd}#J0$g!wqnv=%WtwK+sUXu)+tp>7XZ{Igx;W)gqN3`Q^JAIP
zlt*n>r<|XblxiC#NTAz$kr1WhL;LVJMmv2xIy}al<@6Y0X#03fSEEU@$amk7N6at`
z-L0Vw?hdwcSrTxz@(5QKkHq7jxRs;v6FYeavA9Ub6Lh8ZXY$b)Pq5H)q9!DW7{0oC
zcZRK9oDInT{12%Kug)${uU;Vs5)Yr5TDM*&_0UCrehYT=X6ZT9Z+4Ccrx-Yd+|wI^
zhHfr>>AvC;{ndR8i+n#@+@tsF0)w^Pdj1;LQjI7k$?bTY1z9%@88WZx<G#@P&pw#r
zupY#{={0jFX8<D1Og)B~@6CBgv>EyWju*2Tb24N};XS(i3-5lqQN$BDiUKS+9e~(O
z#?2cu?@!yG4TrTyA{zhpv;9j0evjzy@h^{HsR)0_qsQAv1jFCe@qrBw@H?75P?UWc
zC8OR2H!x+z3U6$JYGK;IC^~rYkxi9*Po>t$N1sdt(Rq;GJrEtuv-$0OPJTN{ek=A=
za#TMv1bM;P%$rx6&BC^Kn9Z>5Di+$z-tJDgy{$%g*!B*DW7|~-r%{%&NKdv!t1wEb
zll#k?>vvb@!?Slc!}E)q9z9?x#{m%uB@a@OaB`6-VYs^9v>Yd}23yFMGijrS3R24&
za(F|<Nuu|z%Fz@CODIW{*i=EP!dqcw$+G8rSV7M#uAt|8SV7M#uAs9!vk=|9bIX#Q
z-M59P=6zk7=<Ob;-c}@gy9c_rRS9RFkNc2x?m$^Z*dq*fl6Q(h6(->BX6k-wxVssg
zCx8F$)$sK4;?-5}e0X_%dLBarfmPeJ7SKrUu~tK)%6_)U4RJsFkdTVpU>CVoF1!8*
z1?VyUGX|NvR~)`zs8;B@O<LcnaD8``gYBkz=WTL#$jf3<wf{Ei4y=eq`Tm2o%q3Ly
ziN*SY2lJe{v&9OdezR75!H0tUW@`;%YlPT(gZ(r3KNJ6bEGHArW|=7!2fszPT!Sbz
z{O0Nh`ymRC#GKPHt`hpVAqoq0*9#Z<ES$I$RWqLYgyx3z`9;W>h`~yV0+6C{3RfVZ
z9KFboHD|Ccyqm`Ircl5`+6b1cH!|Mw%xAPgxtrOk;dBwS7hD);JW`>BI|%nKFv6cM
zKH#p$Yum@02MwCu>Pc#((dbYJnyQ5o1V9h#{44WPV{*@fad|VEJBvK>=G3qJ5n-8k
z*|kW%S3#q^<K13*<8g!j@OTdyT?Y<g91gRvBC#2r#q;}ONs`5M=_0P)&>HZsPWjym
zXrPg2Ke*J_5x`E+@zSOf8_LmqEmOo!^>3$(kv$D%Er=w5o<a}nJC>)6_;>cweG;wM
zW4VbV5t8l-n+>DE^WlcNYVhHzyt0-YCnM!d87U8bi-odK3XmfUkdr1sX^i0s=1t65
z8{FItI8lDoqJJCF=v9U>!Kn3qlWvH_857qmmHZ}8eX|oNio;E!?qi(~Ao=q%9Xgl(
zda-i3?^!K0r`C(4MVVz(^as~Bz3>bPK~gjUO3|rN#D;rO)dF#gV}sUZS!|rj*q|>>
zEi|nZyz$CnlR8;P)ASk>D;7SF7tq~Q?#sx=P+?rXJ2#d4GY2E5I!0CdG#jI`y^5J)
zt_Qs*kzZ+`?WS>S<Arz{UH}Z9<a6qwitc3@5h$Yuf-8J0cf43JgK4q|vak5AJEKPp
zBbQnV?&?W=^bzo(!%RcrB!f2y92W@07+Z=9vOip$v5QkAk<66==MH@&V{C1Zi@D=&
z32#evfXy68u<*~FLssq=#`K@tvG1+ecPsp!cK?qSGh@|^*mphlT?@XekL^=AgEKMb
z40ll^Src+W-mulmjzkoqf@|E^#vn7v&gI>jHaw}wVKxLBk<pYY=D!W4liiA2FMejV
z54B%-*p%`8+V_VO5LW;aE2Jk2i7{R9Chg(_Mnw6*v)zi7jS`iOcxAIhWs_g><h;1^
zZtrrKp~qUYlC2?>SgBSnPFSfy*zW{C+NG)|IMOLq-K-Z@Em<g&dX+avrHiGLA(u`O
zxpan;wv{KBtdPVDCn=W!Mj6uKq)3O6Y)v5@W-d+%=_qB!h^~`~fg<!Tlr8P0t+-EP
zvshykD76=sg}kQ}sHmkQawC-2LkVQC`u>o?JtJzrM3_)-R+%TBf+t=&8YOP9q@%fV
zC+>QD7nq^3eOg#ouF$BW3wK^LSRsz@>=i5zX%V&*etmH@7!I`J=3UTuU~$T`<Y#J}
z8keyh*<C0vczVx$=9e__an4zvWE#k0tZM(nt``wg?lUhTkahx?L?9eQLIfNZxT`n<
z+-F|qfWkLRjQ~$;FLQxiW25f^B+4AByEKMKn!_i*U@7eo2s51Bup8P|A1Ss2`1Xh)
zZ3xg`dO6=v6^4RyXn3DfjiW<xY~>>8#1K4)0Q(>8CC0Y67qf82Yhx>Ho=Ry43ki@Y
zN^F6(v`0Si?-rY>gX^2cuO*J%Mt)k)#V$d{VxEZCA0yLy#n(yNw)2wxH1_#XwQCff
zZS(jjWlsEJ#0x?ZyKI_DyrU%krILyrpQaM;h{RKQzz}oWr7RK{`sU)tH?UCtaQDD+
z&fsmXU;o0tW28=&T^6|afGMe_F5a|Z8^IqIt6%9pHjTRLVupvmlo=xspVaaC5CdTP
zTpNQC<IdmA^oZwo+|Le6{;a`@jegL7;Tj`UGEyoTK_ydB$xNwa1(jNgN>)mxc2KGF
z3mXWeTCHR)N^66WoNT78u3)eVMzTUjUBP4(%w&bGx`M?jSTQ1SJi?wy8>lIftZED6
zRvEDtvAmH}zs>5m)9RZ!^*gM7C#}AfQ@_jVcf<N309i6Q!zfoc)pC2XObW$$54ID<
zO=WT3f1N~eOIe)vUN?q3J#x|&M^H=#nP6$#@mOw#7Bke<Oj^wftBLg{9!9W8R%2*1
zI9!^|R+DExTC?OtFAOtDt4tVX#3LPzPTD#}8^6YN!BLMna#3EAflId!SX&xjEix#2
zN`vw7N-i2TsAn~(GvcBKV^`x?cL=y)B>C-Gp`|P{#u?W$ZE<m%juo#3_grg1<l*$w
zWIH@dj_v%Qy;Lt{wTq~#t@cz@4JSg?NjKpe>TL)C)q>(2%$h@I@nODRScMt9D?`in
z{4nQMQ1YnoY5WvCyc3i*A~V~#o>nn9DvYfyxC22WekN|>fwOd{gf6CPrGV-cByX5=
zi79#9tftsWWtAhcDxEq<O`VgVuACrbJJe^L8HaxKr6qt>+_MA}0$NQ1vK{}k91cQ%
z|H@s@0AzH1uzIc1`S!MQ=LdHQN_Rf1xaZC<1hgoRV!dx~EBAhI>7jD(GXTYVKOznH
zoUSJmSmM4o2t#pT{DmQ|5}&bGjj;DA6+SK;$LTwXtIfP79EKsL$ulzFg(nW)Nn%Sw
zyy?VRd;VOn(?`GYV@wJ`VP|s43{r3sKw_?PdSY7!52b^;q5x6x3HTfbW2Eiw04&)S
zdzyoFSejbK-7c^2k^ZJxxkXkOl~`f!$cj*xJ|`=}Wl0Jv%u1~2?8pkEL7DM+S>d|b
ztgxynNNY!4Q2l6VpNJQ-Gog{$bP8Wu)wCrKrEs?575N|afKz?E$!T2PgU%JF5Y7BS
zrd^3`ZS!|wTVibbWNgbDkUG`0(CGXwd`k>upNwzP==JeH3gJ}7-T*_Yq`g7C9Fq11
zKHk~haMJA!<tdYJw8Ja5im~1)!Fngg`qMpF6HdA{k%PBdUU!+!o%HGaOJRB|A?%sX
zI|s}4%AWg}8i@<-_tVAr*KmQ==)U-l{OYde?v#g7XPd19VMkKDO2LXYnX5S6$&{b)
zEzJOm9*1)0Re)j?!No{GF=Z%b5nOViSVeHL5>Q$)lvWX3a-p<~;L=V&>BvwzMR3W3
z($|!S@&m0gFdE9MBw}T~U}dym!pS(OY^W>ije(ipvRSaQvSmwM*=h{h`7O5!R#vv$
zR#$E{2A%wtI|VCCEyq<heJ1Z~4PXsD?$Q)h^{lF3u=iW3RgJ8wV8Qp>sa4IaswPzJ
zq*k>G(aNp*xV{#lRav!Nh*o9QP9a)Ne(z<s>BIUqeH4K5PdQGOA2jr8g9d7YS#8h=
zTU9EOS*X=LD3ax*D3T*-lCaB~uL9u)ObAJ@3`vs#83h2TG$DzSSs9Y1lCX3!aZpPD
zbDHY4OixD@t&>s{p@Z516sELj6sSmBEE76_t{C`fK%Zu4Tdba;h41k|=E;q5yu_RK
z4i{1E8<R=#c09|~_FR>x>}k9xPZsw)cIzlWs>$BNywxD_XKlr^mg1SEcxEb|8H#7R
z;+ZB5{xssCI#(Mj-=OT^3EFyp9lrak!o`mE4vpL>GCg8)LujX*`=PSmt<-+U4T`jW
z$9Gx%?xc0Rlk9j$-SbZEll7F7-c?SOu5yZYRdF5Vca!OXnj$g{I|?1CE~JI~Uv?vY
zZ(~$%=w*CDoOC>Rd>Ve-n1zTFLj+GEOCl06WNPo3A2t2ivrfGdQxdrB%o9Y0U%NW^
zn4Ly0CyQK|L@t>F>UYlnHh5UNXTUjcep@sFTz399%`jSt{BIq2{Vo67SD62;70>_n
zEHnSxi?2EV+u*0Sz2Vtue}K6%RSDn}1&U{Y<JsNkiw_vRB}l3Qe7dve&eRPPr!+8F
zFN^Q;=QS+{Ey)+?p%VH8(yqq1B6!|{VTW+gma@I`!xv1=LA?lJkdQJtVe-`V3u<jB
z5~{ck4lGd~r2cArws0qt8@y5wB~%&QuI0yJZk3|A01|x4`GmL}l!&`-3zyTv>)>92
zYiiW)xnD1RewCzjJbIyJFBxSZ<9ZlxnKsKKV(xL;Q%L@wzWsaw`Cl<N9A<@kxta46
zZ+AP+|7N>mn6dfausX)K`TuL2|J&rP>%hwL)nYz(;pq~;!K9DhzWTJUgVz@~=U~#`
z41Qus9!;L>v43+mJiET>v7D$(&8W>ErH_6+42^>w!IUx~&OM*zIYTauu?MiP7gGl;
zH@qZE%>!T}rU_Wz*?6rSt%x8O+{~VbJYKo)H{Qy1h<(2e^F+cgZ_Ls}@{ykgqdy%C
zFMBZh*IMW(P$)H~-sbs5i6S{Byd+EbrB06tFA8S>MIdqt&|>Y?hQk?9U6<SSif0~m
z(p&4olD3<Y+_YOFxv3LU*0zeb2DSi-8N1_(pV)O5{1gO20&q0LDTZFM!GHkinl5fx
zl36xN%$6HWD*;by+d5tv#ZSh6yX26}>d?s9p-UdqhqHQve7JL0fk*;kHkhMl<T<T^
zBL|HRMTZHGg(F<q^IMnvjM*7$`ms@O&`(Ca&eIS=Nm``A!!dHyP|p$E$R)1rd(#JE
zFPE#u)|=Tp*@#V+i|NCBG4p^V7-K+nFyiK%%%1C1QvkV<H}%#JOvHJf)tWk%#Q9(A
zk_8)d)l0D1!bI}J)62{2zoA+5=H2Ds;_YRRmAIh#&6Vig)w_Q0JQBu(B@iV_drVlr
zBf~8-@FMx$V&87*rdzhwq5EZ1o74*w#T0!FqBUEEO6x{@g05|C20gbzq?q3Peh6dF
zn(o{+$p6;1ok^F@%M1>kT3q7K?CI16M$Dnh99W00-~rnHd_M}71tVCJmS`BSOdy!y
za^d5tk*0;smIAWzhu(ZbZ#hmXK$zQetlOE4Wuo5-(u&I#Z|s54`uP;r#_saY^@shx
z-oR{vUD(k&y4LP=b-mkeS)G<)73xzC2gGrR7YI8|&Fr+hX4^P1dPhcS5FmR;dQ)%d
zR$JFhUF#j`rC}H-zk0jVJ~`=jtV$qEiiTlff_trNwX49e8sxFwv<&OSG>lHSQw2tg
z;?ZumyIQN$ZnvtyXagQ#x4=(Fw>s5ebN~;dX`SGY3IA7t(FHurrb&Nvy<=5?@m{BR
z=!Sk`S(fG=m6p->=sDkPpO|Lr#O$;tp(eoy14D(wjv&w=b(t}WInh60#M~`Dy|S;^
zp#>HL+gGafJn%W>6DU1tqOWS(28pK7@WhRuW?a4M1;-fzWS<t>32<;s!UxC|Eb(wA
z^5!r(v(&$SU~t0_<;^xTnErfk?cHM@E*v{?Ai;S9)3#t+)CLmA#1MLq-6UlZB<{Qk
zPN?p73Fb5(kD`gloy|1C&Jgw-0#a5}kMDnQGs~+(;|6z279d4ncqT!F&C$f!CPc6A
zGSotpknkfAC$H%?fkX@`DH|6ssWtR^zyRI~k2Ml-LYkH+ZbDg>gdr%3Gi8VkF3OB2
zq>p@}ZPG5`L4FfBAgT&mC(7wX`Hkiaj1_bq<b7*1qiP;mGB(X5Lo_tGyzw0d`B*LP
zT|dgY6&g5U;;m?kaS<*J2hwB}&_TKeG(RJ39wE%;pl=BD&e3zCKaCzBbGBgiZOIW@
z7~z|b%<uO%UVy!%uCkFnL`Q=P6`&lB&1h5uE(gQwzs7_nbOlC02q8L=V%CLPr^4>V
z?3!Vt%>Tp<9=nY!z^sNEdITP`fo-XiF_Ra;f-|TpZl?Ht8zXCLhA$2FmMbRfG&kOv
zq3JuP`vZ=9yK-I9Wm<qAQAok&fl?z_F+Ok+Sp+g8?kFE&Y}YB)K>KUz`7>GL9~$HV
z=zcH|<p(yN0F`351qMW{H<?jDNqx}ME1K$x7OQBaS8S^*G6(Vq(+71iBaRqSsVdlt
zBl2LV3w9L^o5;PC=Z5ecx}H7`yZ+R1VhbRy)4?c6M$kpU6d>o@W8r-l(@@<xmC%Tr
z#QVK2T+R)JP#pL;k{owVkEl*Tr2hvas~sbuClARdM#zDe-W0>tFE;C7s^&v2ANXn3
zSe)+c)h+qpO{cgvygI!CBO&~HbaC~vDQ%S}^;?*&qe+t6{Apo!i8QIl;0bmG2S}Fy
zTXq;71k0oIF6&BE=?UM%ZIZt&R=@f<BYc2<$FNP0kU_IUwxot3c=d=ax)H>q8N8$E
zscPF}R^l<vG>_=y@(7B0K9`W0?adunLD5-;3k-Wn7uJtJuyDQqoEZN-JH#)mI&X-u
zZ^=bGOu{(LeyVk{8^(jlUMK&4YvK;Q?Nx9|TmyQBcV2^ZyG>YV8Sr1*>YTKSp@x;G
z3$UT(+SW<2wz(o+UcWMWFOeaZWwfDfSR!@1CvDTxOTj^GXegW}tkYWU6TPLuVy%=(
zPeIVWF;BWWEKXV)th3D0AYicS;?l%w=_gvptO5cZ|6tYGZb5H!Ppp!QUkbuPLfZ7F
zWwlO9E|4e)STMGl?8HKc#VZHF2E((d4de5+nb;Mg);*@AtPO)XiHTWV_%>Y#w*C@y
zp9X^CyL<QvMwdAsT%aDF!cd!(hnRsuh^J5y2OC}il%+1CHH%ENFh?Jo`C_wXAdJym
z6QcPFZ2lFuC^>RXa<!nQki8C4pg0S*VTk7fI8RI$G+)bXL5*jdo(M2_o9PncIE-1c
zVh0w6eCB-l=2WzX3hchjr*jawBf1iWMY#^k`_{>c)#73o@nI$KA;>g43uXigByI<;
z!ufO6fSw3ff7j>NHFhjhhn2??OnjW<7i8f$$6&q91>2B?<L9}EplhcKZyj0j(N7r*
z5RnqPjp=^Erdc(Tr4*)*s+du%m|3itRjgP`QOsa$mkpk9vB3EF{l=R*!9p13N|9M!
za)BKA15kLgaE04p>jt@kKMj}9Y&mR1CgBY;VxUQK$fsn%w&4Q-GbEGe0Bm`U*@T8$
z9&7UO6U^IPN1V31u|O$ZC%(Deb;7;vY|ZOBd>+pmRiPmCqS4gq1waNfaz{I9T4tvJ
z>~I=pRoc3FVrpiQwsL1+n7m*hDFG^)gpIb|>F9t-$Li`@>qP5zy6%xxfS%Z7Y&JCm
z1~b5=1F$Up1PIhL_sA#-3cBE=d6M|i0hC^n!uJ|QOVdxfEv==SU9fws7Vyn*kJ{zX
z!aYc%*)lAv3+y(nlXlnAHJ}nenI%C{Au~EgyVbI~hSt>qC8OJEw;6?HK$%otlRqb}
zR+3j|I^QDItyTx@p_Zu|t#a6$(Jgq(=(e>s&}C{TmfkT;L+9)c)7+yyB5pBKBLrvo
zt|OQr{7bF{ato%!oUd`{uogS!oX2-kF&0{wTN#^)>DN|Dw>-EWUiE-*Ao}8JAekA`
z>IiHHXER-}BhNZtX<~^-w~vV9`s0;1!o4_5uihm6&4?OE3|N3ZU$T<*hXt5Y?lN4k
z9-irxN@Vew{`kzmV^SkX$VOND=-EesIo>TMprig8Dt4TOOZqwEvLpq<y#S$#kx|7+
zEb8ZHy5bpMpT}S)cDEG!SM$xxUD@l!O5Di`{1NC@1$+;;F6Qf|I7uui7BbGv23c;S
z9U^SpNYKWukOBn_u^<q$Os57<C_&(>((_(H&wEPG`v}eVYBc$VO@V?&(SnhYalySt
zRxTKW4_q+8#2$I{*2>bu1KD7!yWj5_1e6d~2rl(qZ8n=Kll7%{y8)9N_2L7X?*Q+1
zftEVh;<&-kz_7k|_R<fsXO3LVe!@Vhc=;E3yWa8W#S!c-Dyc~3ax3i1vR((G`PLnL
zN*Hv{jsKo2G|kO%tKbG(+p5Xq6B_*Nd1MPNgGckaxRV_CW#}3nlfWv_h!#R;i+SKY
z8*SSFrrZR)#SI(u%-FJh9T<p_yBi<6iFyjJ$9H^m0z)6T>#f5h2ELa=H`SDeByh?N
z3`Rme9v(AC=D^#KIaR!0t(yn@44=(0FHigIl0@K@gy^tcD2#8q!YVXxSm?lkWpLSo
z4qP~5B0SKfEBC`1gwZE67uI)91D(6jSInn7o%)eMXp+vwL|EFvbh23Doll3a@DP@}
z#p5l1G2O5&HpVHS-*)annlP~$Ra}oFW#jP*^FaIi23qr%t_#hNHq`AJ7FlfVIAMoo
zD9~%V9v!*-0;)sFS$a#CeWQ+R3(S-S=%arPrP*&#hrP{;zQ8DhW15Z=XmTeLcT9IT
z(LkS%A4IpTkinm9=MB4PA6swGp~EF;`g%A4TF|g4wzI}|VA*cmQNiuF$m3!%UE>~Z
z%;!T|)6U&n3O*R=hg)gyS32?IH|iF17!R4k(3BKZ^r`U4=dQtlU@dTBp@(@4)MUEx
z@0e)PJAEi6V;lYaz4^pP;l=c~)c%Q%&p>mB!hy2t94$mYB%L0iz3L^8(6(s@Xi_ub
z@I<v$x9AD)ww|{kF7J!Sj^G#z&Vg!z+cZ<A>yo0w=(Gw?Ew+M9!A9_)*BXMY!3mco
zEo$x!jF;Y$_uK-^DH_szxW(-)Zw*c2$N?%phNu4f4IaC$7K=68JqeG+xgbRErSX!>
z&sO&L-}9<kP|2)ib+(<a(8mI+gSa;4R}q~TozXPXMqOIqTwaQLi8iWAEdkt@rj^}H
zrE~<P1mr^oD0MQ}0ZFQ)lc$nSag~f~q!ZUj76@xk3JFA~=_6Br8tdybwD#Gx|6kD?
zLNNrW?)*^4<lLEd8^7#B`;nPYwXJk%J-azvMdb`48ErYWZT$FBHUw9pC3rfWp^4@>
zc(jR}<y-AkGx=(v!3?XZYu~eVL9qHIGuCBL5n%@lNw1%vI%zB;~`Px!gy9LzG;
z5f28~<p7Qt8ywuN2WC5JG4mkiLI&*y<|(2dBrS)TX-rpmE0>vo;!Zg$=>dPH^sWW7
z<<Ms1xocIj1X!Q`@Z%A%lG<?5mBUYO#;x{b$BE9o6Xwji7f0A{_T%TUAE$0b^HJr2
zm1;hUt*9LvOWBUH-6=YS1mgfZ>AFC=gY40POUN;04h%ml&-G)*G;~}xBhq~`Rs2A{
zQ`bU}Za<ATobsS;!7Jn-!;wVhD0f<%q`IK}F6Q+GjouLJ4qRA6H_<VX-Zz86ZOl3%
z%MnMWS7Pj^Hm+Cuu&za;HykG?NrSfpm~hCP=w-otv-S68>t_t0nWlbxyvQ>3vxiTo
zR?g0smk4_rJ-caxr;|Q-I;n$a*C{e~I*PHg>*Nfbac*J_<;dwIM$WHu8MMDq+9@(h
zmtF*|s;z=(H5MCCTAT!Hl#e7k076~_1bUFM+x^~8e*d5G2%`WLivW~)|6jLE(=rqB
ze~k9G`~P3({r_0}pIT=8pZeDv|0fpP3zaz@0Z3V-_yvBsy|dG^*S(iF7rm?V%b$jC
zZ?4bZoehSA*EhZX>+8$&SkpPBNjDtx<W}g|1orPK*uulBcW=J$-3+f^4sUv|F8YJs
zO+VI5UU5=04t?(*?qX8I2&;$ny4Wr@I~=}zb2=Qx(0F~?ANIi}2V?vKOvjsd%v67K
zdLAb<v!pz_V=y7sBu)m=Wbrii#*4i3;;)?07E#A5@?dVGmoMD4H-5(Ih*65zNh(ax
z^r|6?HB=Ccw<kDG9vm_LP?(PciI##vy&&WX-iWt1)Adk#PXq4-Y!hx`!IP(nH{A(A
z%jBs#juQ=D5r%6z#iSsvb3N~m?Ww(rUZ8~Gw>JCw+V)=qOx6G^l^vl%+GI=K>koil
zE^awACQ$rwZ@Y|8f$x1>V~7uK{P2lf^W3|SqD~Q^3)48y_8ec7;5-=3YQ|g6{utk_
z$oD*k#dbY2*@uFAd`R6$$VHINoUksWQ(O=+EtknO$Qdao-d@P%-I)2j8&ZSL6AG1P
zam^!aBO92Ah_Mg|1$9nPAVM?mAn%@YMB7&S_@YLp>&Uq9kemhOFcPYL;DS+FBxGpx
zJ2hLpN^B{0b`uL*!&WV{ufC&BQp)2L^!LHkPx!v%>5UvphBe2!Bd<NDY<s8#iTF6I
zMc{c(bIgG~Y_SYafq70EQx8*H9j%}mb<gtoUGyOW!kozrjgp)ev52#}2o^z}X^w>>
zVo{14z|K?&vsF0odnO0*V_%)c1#Bc<;B&pz)G)e>#r~M=kHP-v?2ktOoY3E0`nyAa
zx9M*RK)FN3U^0*#BK?i<N_R`dj-&#lvqO%7gaL!ochf}1I5i-VQb-BJOJd9yol-&x
zp>&k*S3oOsyTdMcSQ7puAdE7E!*0Oqa-5G4R&9@$L^oc2l+kk`uB8U%&FJ@FFOOqU
zfxQvJpNqW|*NW_=8^x}Itr<*-jYOz5Fb$VvlcHhDp9}(HnlH9nhGb~+?|B5<<%mgu
zOp;=koUavFdP<mONRRT~e^zCpx$Y||Q-tv6QYMOPMP+il``?4T)MdUBdn1HD7kjyT
zf5hI{W>Xnq#sd@uj}=@iMo6OGSVcj2{}*Y1#>z@`9BanuB=-v89Iusyu_Iv|ZoQRY
zxHtb@Mh|aZb@T`k{%oTM;#zU^nAzLkg}wH+DtkkOKO1`y*NW`5=byz+&)>0(*~W9x
zj%UZMQXBzSI8NWpd8E@YQxxB-nUJYHceSA%mE}&a6VupQyai2FWt3iJ6jYIr;Hhu0
zP;G^G#c*-V$>3Nt0@(F1)tJ<(o`a~KL{!f~R4<RH9w4eyM0EwCMh>Dz5>X=uQKLMf
zMu4b65j7NunmLG?Nkq*YM9uPungOCFMU-Df<N^;Bo(9uSsrBmoQ?FMm>8zvHS-?0=
zWgSUnophCLTU(1apsA{i(yL^uEEWp;4HHdpLUOg%N!MBdS3QZVo`b7i30Jk^?jUA#
zf>^BfIzQ}ZdSD@U{^+v<qvFe-93niw?71Pr^UIzZB0NjX3?-(9P-S<Y8TK+UAiZD1
zyuj%ALre=1swHNHGSk9#&-22bCWh_TF*9ucC{x4sk1{uG|0t8gw$kiSd~(?Cadz0x
z^sxO}=7;ScXoA>&@)-hFCiBI5zmvp1_m=><uVjmfQ}Tz{WfE}x;y0QET)+6eCIQz^
zwcQkH^B!i7J#IQt(!Y{@Cr-;BW$Q^m_Y=(`=w4R9SuVJ~`Te_B!_&))S699B;pO$|
zc}!Zk%Ul)LL;;a3f4NeKRP$MsSR(o?R>gxxd<z`vRxXP|2jyruxMvJJ_vt@;QPs~6
z8rqcX$ahycm~VFI%T3A7EP_n+{@j%6&;qzr?$hIAatRQ8(q)~)^Zv}-*<yt$2w7t+
z*d23V#wx$rT7%dcA-3LN{|x@m<o_)Guf_kh`M(bS`&d?#oXxU;Foo{pbQonL8jf6{
z8!O6BEo6pnbPwzNE4%%}yOV{6_h#Oh*mLs8o70P=k1(d=j7Pc)u5?WoAKVoINHlQY
zhDK|*3B6kiVB&bh!Y9zz;rNOOE`6O||Ag<<+m^NH3Uy@9&Zr)KqP5v%QfI+p`>)@<
ze0kZ!NYm)viPK6wbQRTG0j)GI6AwWTO-}Ion@uNd&tKy`1&MMNW$|{p7}-;hk6SFW
z;z3^-JonyRJ*@9&?7unj@9d@fB*_kexPTJ^${<7-Mi`AR>7G3|Q7iN=6?^iSSoMAP
zv7X(>`X}jQ{fq5mJ++VZ%s$RH1`99O<S{a`qPxtb;qs;xEvrYCl4#(RTtnts_Q<io
zNMk}$3o$LcL`(;w*9))RQh?|T6Ju8(dOce0C@jWsQnJD@R(Z2h=9EN(Zkc6GhG^2s
zG8*L7fpS=K<YCE4n|0F0M<?h&lRSGCTZ}px!(mRl!FXuY!`AiG)^%0u`OB^Bl|{Z9
z%4NA4ie`&d&1|868<DKUWhQRU(0`@PmiL=NtC^H^fXM+wdM#=kykJ`c@+F~wwI%C;
zOhiLP`MO3pLX@v-#s>x{`BWScjKU*AfetS_A_)DNKP2GQ)|Bp)_96^gj_ZBwepS4b
zt|~>l>uHgph+X5t))YGhS>@A3fhs8N&VtGumW`A-uM=>%B1?b0Sh@Uica7`NdZBQ1
z&R!Mw!K&zmcPbDjsuuz(yqFX5;a8q(ffU3s!kw=22szae!t22`x|mEd$19IZu(V99
zDWlcodQmUGv0wZyr9j-KdiO_bXcdh+NWf+X5y(Nx*#W6aVvvnb_8x;Q51IXY-uvO=
zj77NP;pN4N^VY^-rL3nLBy0ApaGWImT)=?5#XI(%<H{4U$Da=G=@oe)n^klfnAdx%
zsuqNNg(cJT<o#4uUvJ_4RJ@#uH&gLiD&9$Lkdp>pE$=i)yFs9c6)cf-c{>_g^T1pl
ziv||7J6%ZlaS9qq%%lz5V4OA$#(kFdlR{@*35cY~U}|ULq{;h|X-0!&O`W@SW?El%
zm0gLGTvk_hl|3oc<P~#!sT=DgtiH!ia^juTEunIJxER}*nM9#((HL93XOx5U4)Ffz
zRh9@+z`HJ(ysGP~ED1?v6(+X*h(a(UegaAO<14(fiyc)LcS{>kQXq<i5haIV5Pdsn
z#;jOy5S!(eqHrm8OS`O@B1WW0T2HY!2*TlX3>~b#M{MY!Ow4qmAwul<kfUIXmyU`U
z*(V*9l{<0Q<GaApm5d%($2!gmu%L^;plGnmGrqG|hxHmQ!uDBTUtA4_1CUShv0USU
zNh?p&pQ&+bx(bJ4?5BF;xiY|l5iMz`)NBW)_;ozYf#v(d321hfH&7u-<Y9;;G?bnf
zN+g`&gk|PRY`QwNEC+|e=wV=@7>m3Yd3W;>JKiiG0A*Y}$|m;4j&`~8bw!M|>eATB
zs@^(CUY_LJn%jGOB}Z}KSg<1$gAVT&9qQY3YVXo%9%p+x>l@q(Kx?3t48V|yb=OfN
z%{n&^XGe*%79Rdk<TZ-FMoG{p6&fW3es@Y@%O-%4pUU6k<Ip*0gu#kXQdZvF_r^5P
z<pfh&Vk$jy2UUqpQ(O|7!<kcAV@t=naIcIlHTes#ZZ{Ox9E~DQf67MPiBltJ)xL;v
zGN~zI_9V0EMFdYI!gJ5fFog;I{1OHkm&YGvK`N7@79P`x>0*XBqOi?YihnG=W$(wq
z&FR@-*yZUaFj>XL)!TQ22DyGWfWP$&8vl#M;>HG8;eY+tp{8_^qlZTvu2xS}!p+P7
zlFjy%%6grj+wDq52JScn^DR$)?3GG=;*3{18&*vyArnB4(@C}@lIc!s;U3UG9~DM%
zDolPN5sS~7D)e&yUR?EiH?(`fFu11O%jj(o3jdV<{k~4oVeeFD7u`fXEn;n{s^16n
zB&D$gwkbW5j)fJ1QlHR9WHHkA11*rjJwW?DJcHg7wBRtBOI8`n!`W<?85ZN=0W9=<
zHch6FGg#(l?Ux3@4{R}+Q&{JP^&*80Uf3v7*yM%HB84qp*eX)E#S6EJ6mIjv?IMLc
zyl|&T;Vv)SEmHV|7d|P}DM1(N#riPp#6rEO2i>R_>PJ23NWD-`>OoiPh5AwtI+MB=
zMN`O~znKZM#~qImMCE#@@hU@}C@ZRBsH+%36*F1IR9CTrDy?J{OI@WMROt-01Tn2-
z9ZF+^ks5EJtuAh`;zqo9M_t@x#m#u}uDZCzidz!$xMWB=a~Q$UM`>gAaBZ+Pq{gtc
zkzKjXD!0{@&Fsn@R=K0DY-LyOvdZ1Ca#UNEKF%V_2~Mimlq`2bVcuKqSYcCHnD<pD
zR@hP&<~`MwkfsiUDhEO)XH0yw?RYFUK?@n`N+zvjg_WXO6PheCS&5<0;83yRgsVcP
zLhFG-Gd;-EWIfR2sBEeV>wyYKWm9!m4|G?L5H6y$MYXl4nikg2YKmD+Bde)mMI9`r
ze8se`n7S3zvvLYnQnLb;!g#VZ`lV4!y>j)d8_Y{5d@9S(FFl%Fb)j^MhUwwNs+&UT
z6dlvUIaO~BwQpMXP0K#NmWlJ%P~X=K)(vKlll&P>kET04e-6{b2~RikXE8mT^7PiX
zdF-2p{ciIZRjp_y8|tOzGM=`GRb6b>%gvfTPu<cIOEw6fJbBBa6R8Zb+$oE&L6ba7
zJ0#KeOy~>C7pv*FX2b28m=i5GiVIZKV;=l`S)5Z-S2wBWHmRpHY2-F(sGBr$n>13I
zG;^CY)lHhYO`0i9TDeVH>L#t+CashvTe(fP)J?W>n{1^t+0JdUt!}cN+hjYX$xd#Q
z9d(nP+$KAUCedrXL4Dxq^T=gBnx`D~PLo`emt^3Sb3^cw{nq*L7J1*t8%*p|$wi|E
z^{fVUMqJb&GkP38{3WlE3TWcF@Aj;o%b3`T{nIlhw(Q>zV<LtKbCuS}WltV|5}Svg
znm@6v$4?_T`tE|0*pm75jFI+r_A5AfMoyaG1Q`}Zb3}a|Mq$_twJ@Yd`qda!z2diI
zxEB+(d5U;39Utc&p?VXdrQEAm9kItErpQ;dQ!40k1+Ytl?&0@|*i~n#kV;aiVcSSE
z_Msu0%K=HL9!aQZdoquwT6)<;2pTJkO&I~A^s-T!jIyDU%rfzcgpNq|MkDGWgU24K
z2TK`#Lrv-x_?7iKb_<?9<9#J}1A|jIoKfIWC#pn-N1f{WT=Bp_dYRBX`C@K4DFbr4
zm^&e}Q`i@SBujDekh8(b_t05<n6DRB{$Nw|V*jTcVggnuc>c92u%?hPBT-5hOX-0+
z%a=HlzCbDNKG<rV%AhW}>lAg*vmCW>eHm)YZnLY^Rt#iNa<(4kW~4$&zsUKTa|8%b
zaU^^vMudpZ&Ga4p6%Ja7dgAoJo(WMzw7}ohsH;Z3vVfR{`qfVC0(X<$)`+@Np4h*j
zKB+SK4|7te{azzUb-i&^dXvgxdW-39Gnq`?MAdqQ@hUF|mc-AmNpDbGc`7d9V@!u)
zZ&aEj6xoM9RRb4kRcUp~&MEdgsZ(&@j-?+<(JQ&o4hohIXrG#9Z~vJ!jb&vMKB_b=
zdXuVYtjOLptu(kDG|k?A!J5W0(dB3wE3^kqO9CpYX?S_<dNT3d^@{_4z1oaH!R)Db
zJ0E&b<QaP<-hQWS^)VXWIQ_=rYBR5iciR!!qF0&|n(Vvq$YK3S2w3tph86jj=Xm4g
zx8RZlyF|nq8+y)Gy6MiZMhQ4@Add-NUXQ+5;ull=Vu)Y#h7yz}lsOSTcZJU#;d5IF
zMnp)&M?^=$Ciu+uQAKVr<}r30D081P#&B3i5iIy@2%k;ivz6JGVX>A_r7e8!2%o#f
zK%EFx1STKRJBQI1qZl+@Bu41*BT*LMOFQD{X6}PhxDN5}7OORx+5WE}U)uI1{5<7l
z=)-5vo|Bs1hU)Pyp@JC+W$eCXD7Fn&upU57iGdcY9`$o0P?)Fcb`i_SZ2`aW7U_-`
zul4Z&|9yN~b)<kHx#U1WBOfp{;$!(I8UQp|CpAeerLQgdE4@0Z(pyt5XUJc5;cK>{
z7DW3<0?G7MlT}uev{s4skxbr^O#G3|nj_Lx=mg#}7gZvu5Z#jc;N<B;JceAXb~SZy
za@FA=ypQ(w^z1)}>>8DP1r7h4_kL^;5ciAo9=kS|b9?P)TIClGLG=yfhr#;=8JH8W
z1Wc8=zg+k(*RAiTi}A0KwG4(p`^9(US9djcr%Zpii_$<7qw-<2!07@<30Abp!Us}M
z9r=m=OW=b~q3CfacU}c3MiE?$1Qb(-Viv(A7m8H`7b^j!B|~Wy!6g?;y9h4r1eA^p
zrBei#TqxZlxO5XxPGl%2MR3W5q8AcgT_JaI@hv33MF15NVLd5*dQANELQ-4=P$5Cq
zlQO8sgitRe%lUx%LZ^jm7QZ%tDQ94$hw&C6=!GGeLHr6shG3{6KvM&=7!GD(2x&N2
zY6viC58B0WXcdN#hC^Eo0qlc8rx*^M!VuDM=&B*W0&1XZ#gI5D4k8T;Jw+@}5Qbh*
zF7@ItQt?O=O#^{g1raez07=CpML=;aHNa(4Axv5&fXJ9+-o5QZYyG^-#eKl1pLeCW
z4^8&-E)w?vseaz|;XW|7pLc1v4>b4lE`IkxefxPAzWbnG{k)6beb9k^-UaVI2x$K#
zmzl_NJ)h^4>A74-QS9el;O?WG^m8w6_fe+%xfizkC~*DYqIS|!?t-Cg8ZTI&ZRQ;n
z*{<o=C;HUQwoiQ~wXG5QFGf~wMqQoOZKc&Uvg%TA+kQK(u9;QWq;)%Kb*;kmntHr$
zt1!LEy6wXBChK+z(`)MWcn5d03DzmWx|&%h*#zs9;FP+$mQArvDNd`bXOpZ`lG7?1
zg-K5GS~m;Rtf*`irdd(BRhZ_Wn~#r4pKb{d+|gJ}cpxX{UFyfcXG$QwiP~qkMwL9x
zl))2L+f1#UDTM51YhO<vXR_sxX`Y@w)M&*iPj-u;7pGiN+bBV~qPkgva)t66RyOH6
zBRxftj8-=3IwL);b~}f3osphay^}+_&PY$I-YrhKg6}8ADNoimwGxyos_P{v7ls0y
z>oGUR3<KINXqF5%5}-{W?D0vf*oM$DGj*AHR<MFzwt`W%g4qa9FACPM$~Ms|TcKUH
zLZ>tl-Lg%bl<vCnovoK9LoeOmx>>qL=?>R}9{;G?1RokX+u&jlwsPCy200|blmE&(
z;ENUC@1D8!ZOgm>ly5tGU-EuM>N>Yub(yPITIRB)n^|d@E48bz0}8jRu>7q2HISZW
zJGJsxF{xHxyKuWD*SS)=0m1U9onzR<XNc??dg&TQ=^AEu@lQ1k_5V6NFE|q=qJr{y
zM}2oO^Q2ae2U-zMG+*&c-YWV^61NvBYvGhy&!DyXPb|)D8GI5v+hLmTI%%`KBTe&y
zZ(aWU95@z+fb{Bswv&kr%P0U#HBl#!GOGh(NGWF)%PBvzqz2_5uw<{r6QiWTF?a~y
zQj=CXTKb;QGSksAz65TSnQA{Hw@S_8ne5W{#4f#F)<DQ)m%b-<rJ>dS61Y{ytoe-G
zQdvyrr7foO(iYQsX^ZK+n8mc??AR?ZJ4Opnr`+<@Nm&J#2V)e$CTAJ!q^yI>gRzQW
zljHT}q&Sb22h%QsO^)A}lj1&B5KOAAptrvn3*U@|Z^ptmW8sf%EEIAhNXN_ClhC)*
zhlrC3Xtnl)RwrXX)jk8e^7*B3GSEK*!}2+%5HQ`S#xi|hEK3_Gbh8@E^nI}`6*23J
z;8-b7me0vC)slzqsb;Twx``D@UM0OsC8J6uvk}}{DpSj<(p0NTrFNA{ogm9?vG74O
zEfc-w#IH*C-6>7A`;6WNIzIF!tl;5sL2*jl(>*?1-?{VMp<ry8018`X+CFXw*CsZ+
zA>VyR9t{>`<PyAh=+4~v+I1d@m!m^ywUX%%mHg>eVo&@IV-@BkVeE-DU59bz;3Im#
z0L^4>&s-lw%SA635ITFReA)AE-T46zHG{7iKT&RbgW;Rg!D}9KY54l!Pv3she_Ukb
z5&PZlxZM+L+#WmK$uZ^dvA-H0pDkAIvAr7K9iL9u-VZK5!Cu`q?~1jd!B5+==wJAW
z{k06MWBf@swU((_x^5YN(hR-RYX6C7pMwD#e{HV_*%?lpxIf4bsLtTd^U1`Ug39@1
zX|Jd&D((ITvB}iCy<2~9@i#ep{FsD;*wh=X?9~INE}bk^1UK)1k@ob~h38FpjBm~s
z%ZC-MTRW>0L(?_#%66yZ)yAFA-I2X<$g^8`(7X*F{JS?_d3JNMdZ7kz1i#wkKXFL1
z=WB1i@W~r{b!)G@HQ@hjX0Lu-`2Rjxx$eC?ZZ1~0FE}og&i-Pvg7M>Q;ke}GVl_jW
z{mtm_FvJq>bqB-F%ts#Jmp6-lc+;tk_1|trQ*TTzy|FuoG48%ejHcEk7Zc#p7|;x>
zyuBnJY@Wl_!~F3hu%H|S@xfc)K^e~P2Ki~RA!B<^7NfQ8!JoNH>@~Stua{4bk7ulD
zB;gof9z$!#O`2769>thDyM|Z2A5CL;dO5iGp?3xYo}8&tRpKBvlEMgnkQ{mODZe+*
zx5hReAxroTK(GE|_~Z2r7>dB<;mlp#x_7l&SggLZyy5@TySZ)<>EUGf2A|fS9v}Y(
zHXc_N>IKmiKC7OSk57XrmBZl={U3)z8cmPU3Wn+T7q5oBtMiM~tGbM}v6%wa@H@mB
zcXw;y%Q_{&pE@G`P{CoiN)*83DW|I&5D_4#DG1O6<a3ZR3LzD=PEv3!jTMWaw-<)(
z9k;!3Y~S6;!`lT(ZjX<T!XF+ODtZvar{GAACqkZ4+4gg{OBh@{p=hm6o{;+*>I_vG
z^4rJabw!KKVnL`7n9ZStft<+wQyNV`$k!2u)W9M`Se<!m|Czsbo;(?|Y5y6e@&$Q8
z04AuPy_v3uQgKS<`eEs!vW7^}Cr_1c?(FFV-?;P5jEB+qff~KuKWvP&^qlZ8H>Xn?
z=;3<aAKO!yZo(J7_YW+_<F356{nr4JHGr`o8VAAiQ^&ziVMTb<-;Cmq-k`9_kIudO
z_@gWL7H2~7(do!%r8M!85qtCo2K)FUdmBEA4RgH2_qZtF2yp2O2yoYzZ2Empql<n_
z>r{3^5_N0X124#LVYjIInsy!U9%S`%SpU+PD`?t3=e?r0KR!+CToOYlef@(~mY7pr
zz>dqa0<-T3O?)n9h(Q1}#w&(ssnuwu<$4ADnhe*q?`U!IZ|Ffxo@3k=%0;NqyvLZ1
z0M!Xjf}mmEGE?lsI;%}Zl?5J+)!Y=R8q~n?3inrP^g)C0KTEy684us~dxN$$e0`A>
zx;CGY0y)`?JPa0lj6c803pE^5a#+JE)}g0MqPHo9Up9Ja{NDjRXR|DUU<|#4ARjU*
zAo-oh4#IsH1YM#Ye`~6zh{k#$$1Z}FzY8&AAZs<w7c7gfn)gL`BOtZeKT?}CtdeLc
zJO_k+gJJp3U8xXkkYas_jW@^zSlU}0JUz!{LyU?#hSBNe#j7h=nb?z`jbBh_;R8KK
zKu|wxhYU*aMi1ZU;+sHh;%aM8q#dLzHt;SQ+v{}!7AT2N?W!F)!g@JHZq$NPhuTaO
z*f#Yd;v_JTvY0>D<>&e@B}Yoo^rURPQ+F^-v~C&Owrx9aY}>YN+qP}nwrxB4Vms;d
z-aY!_jB{0^en5??wQ8<s=0ZPhLFGm#6DPnHeHt-Ql`0$-w_#o*1{FbJOjigLnQQPK
zJ{-<#ZUmrezIh#(wnn8eRBu^V7Dp|UqLc#OguE_(H0#o{tU{%W7?e*r4+fwdKI1tZ
zVH}M7a}5@>uHOat*wm86KNlUC3}I!nZJ*x(%R<8t$nnx)furbYwZ`Z-{IOd*J5wgj
z4^h)UCgQg<-Sc$vdM4l(aTi5&@4@#}1MEkT@q%cYBg8~Rticpr6T=cik*w#JSba8a
zrdxEGQW%cildr<G#-yMu9NWjDYHyZSEendCuQUM(I+&7c9}nN{)8^-+QG<3DN24Co
zAzcrP+2<SWHg`HHeFYA|*gGsm!ik{`hd_=~2U!LJM$!B=mLmocjG|sC@5ZB7HICyT
zeeMR5)QVi{mEXJpCD(uPZrC+i6PZsmM{PP#y&2YtyHzARQl5f9!3y>esdM{6%kJ#v
z#qRC7=(&QO)m=N^A~G-cf3zRmh^Df1?FS&<z};>iF2R5mj9yAjzk!~zGN}no@w%<`
zW=EAFG=BM>HzmXK9><C=VH^?1%PfGACkO#UK+P+5mdenETfkq4Zo#qHHvSVBiH#EH
zx{?y6PM9wk4k1buEdm!NbE}Gk5_!?c2O_5-UXzM_jym7rT>vHU;L|J)JT^mtVBZIN
zD$7GdgJDW0+NX@D*rz}QbMq^{BhKcAf>5neMte)#h5Tc1O|CGgeJ@U=ZjeKl%`G;-
z%=Hn{Bw`hpPH#Mh3m7)1MN{Jlbdaa3WzZ5P1%_Ta<6)rzh#o`p{n0k#R_>!DQH>_S
zwxuMTdrKlXwbsU1NhxeJaFJLXzH6H35ox{(b-O_sZ4O@$%mYw<f;H#2coq5YU%Pok
z5oh}Tvg$w&_(+pO4zEzEWX6d3YKqNK!7fIVM<j|m5=w5}5Zn7Mfekx{yw^kUU|^*+
zAd8`>xl1e&J$7=<?>3IfRrBS5a}%-HFQiDa0|RAwiO90z#TOir<#kYjQ7$XAhh{=i
zg*(M&^eJIR!5|AvJ|0j|Phhp?r+$s3dd*%yONG0#F|DjNyyWUB!KwSJ%B$~u-|@#+
zCQYi9U8q^YZf56*eOkra0m^jee?Kw*Ae&5TcB7vZWZ@{W1(*Aw(SjyJfEO{c*+*!E
z`Vea*_M7T<kIK{7md2(oT$!FzGpcu(*vRf+84G(lWi&`vMx;V2S8P{(1dUq}cgT~k
zmC&~MhKYOu5$+@O0Hf`3wzGAg_jF&q76fz~?|l39bgBETV0d~aUhWBetF^nqQ*O$~
z8~ZU;3eXeMDh^GK|52HW9d<Tk)V@<vdKZEi+x(}n9hmE^yG)d<XlYOY!8xBaorqY<
zp*c2dNLh-%QcJ)imb?#*WYl+zRT!6(O^7489Qo<5ZxL}R8wnqNpzH`}u$=M)2w1G>
z&3`uo_m3d+7`V_6^&i2f!!^KYX{RfQPx{^UeAK&ZaE|JB8*tTplMK;**{1bzJ8os%
zr$T25u_8IowHYe>C8^w}&@kJlfOQl2^dc^R;B^%xewFMNU{f(PzyD&dH^58K8X0N>
zk`LvZ&atz}v4C(a{6r4)_D}!pX(>Fx-YXp*l#Mq!%d}}He|?zGL$N4aTnVz5gnRGQ
z-IMsUZhm`JH#$cntfC-~liX!c?0YjD49eO!2Sa&#ekq^ItgA`nWEf%M=iGiZzX(a=
zHmC#wMGNY3T=I|#s2OOfenHdVKL(CU|A0Y_fR<{!RF8XoZBt!n(k$tAT4UC5u|<tp
zC=T}VpXrirR0=^s%@3p=0xl`8r|3Qbxl8OWPda(G035v_bK2zF4t^bohXWnmGA=rj
zFEosn>7jo6my3>mj`ZN{lP-TI1na)C8b>y~WX8obN`(MhUQ32KVj@BxPaOveVNi{`
zXva!-`g%mEC*5_Iaei}UjLDo4C1H}ZkQ~GVE?q$Q2{{8Zp4Jt_gl))|1!jPnVkyCC
zh&XLqr|azTGu3NnDZpfxPL&T);UsUae|h8FME-PnMy1Xw21#T$_z9wPPO0k?DuFN6
zlPW}vew!&7#GI_f_o4_$sqLF|6*P%+0hOpXP6+NxVb}LbU$_(4r?PJrB&ytJqoR_z
zPD(7X@D^KPW03(zZ}GS+_;_MxI}D$_-Ea3Y@O4I>`C8bmP+3Le!BGs6B<k3N)qNl?
zUavPeWEUf+FL1lm?`~$FAi=&*^`Z`;Y&@<)PB*bSdu?iXv!C8RZT+f&gT~G4)3^*W
znKPypNeuk*>gCCtKc=NRU+tkxc2)Z4L5CZ@vuMEBO?T0E7IA$?IJCZB-!4wgANOsE
zMZdvBn#XTl^N2N)CvNfbM)+SeZo*Z@=2UnUV$J3-RB`4GunH$nS_mpe_|+;CmG|8B
zY^ZUO)isVEGKf*KeNwliD7@dJu8XpL4Sq#(lS_8@cU$&{VSC~}9|wQ&5asZF4ThTs
zuQIn`yRf<Sq2PNSTEbt=pKtQ|rw0v%Q>f&-C=>m%aBh{YU*`=EknBv#K7%$KdP|UQ
zC~y=U#yd*dgeovGeur;-c300kQ#0P(FZ|_?MF1vazl@riRj5(FM*oSPX?^9@G5o}>
z68<)1c-Curb*0bnxRua2H(JH#`<=e=kA?9^NO{}EKC((1-?(n%b8jq-&}M*GPSrzt
z;;By2?TB#`gf-xOFf#y*X9UcG`}~mbAE2dvTjg6^(4EQ{Q+{!UE~>v_@Z*jRp)QPD
zyRWmixA*>6HcBH&tIB1+z`$Y3)5X(sabW7^#{jb(X31rN&{J`+VO;QVelt&?*`!h*
zs<zaMTMgJ3waUQYfIgF;0z#Uqb$MtBfOhb04~7Ka_oat@>}QlkQwlo>RKdUTX!hd#
zUmziq-gLy#tia$kC5AN|{1L<6wCD{Kcdhmjv)w;o{UK6GoS1CnRvoOxQG8FQeXJjS
z)ieO4*)cIsC&y}5@|679YY&i=?|Sa6{+te2SB!8t3P?CqH|FICQYtdsT^e}`9{k3$
z8^a@JTaneR6dw^On+ay;CCc&M&2^pKj-UG9wXA{5GzvA`nj))fS+%vU>(mtsOjU!0
z<9dMa14XNNStVxf>)H)A+34K_1<2@K0<nBNPpP)ePSgK1Z^i3CAb?HTpIys*92c+E
zQwGvu@P`5scNaTVyQm1N1n__frs`#-ht8e3RRCKsM6X@Ds#>_NU8|__*$hE|Ili2o
ztCVo%!5Nt{S0R7W@BCEwb%w{SGoQrP{w{C+TZPEzQrmO#R1=FE1=0lHIKc0$Yvd*C
zoOE2SFSH=Qq`kXY;F??4tb7EpueDY#@|r`Ve}d_B{t%dfE{Zsivg0Su)J3mIE(49x
zZ2JI$+t<0xGF2>Qh!xDd14)YnFe+!sU_rpKyC6HZEpkt(U{JOy17P4dJr=pnF7Z35
zV44=HKtZqg3idUrzJ)H+mHPVa4f@NLZT#B$I?4tpCHf4;?6C-UOO2Z3sLSDj0R^Qi
z##Yd{qkM2=7s}b-tM))uWe8~U+x~~5_lE^B%F>DXqUA-n*TohYIfb+#nG^rDxI7Lo
zPY=$VT9S^-ULF_Ibc+*4{|~IK^#8-!`Gz7BQ5K;a%hAa4N;SZ$fhHH^{B>YdR>}e3
zRE~>05?|*4a&>}tUNJ=F7vwtXvzMmmqvBPd-7MQhuAm?es|=a?zv?>Y;7eQ<ZnQeU
zg@Cylpt%8e(Xk7*E!DLJE-w7D=Yz>J5OlC})^`_NFHVP+DsSkqC!H(*_1CX3xfH1Y
z)-hXJYKXSgv;;^iaW_208ennVhM9M}dwsX<%D6#HRSrDMusGW1D`62Tt-(n1K4TBS
z!+vOCrY3zLw!mmMgJSfLQOKLVsU3X2KO9z~*FNfJxOsfHMOCQ{vETm9ZoV{*NovoF
z5Y#>1{HN3B=8Ld9rxEBvpbZ|OM~H1}^y<-?zNt@L?Ayq5IBSaK<iQ<$^219Lj?)=F
z*wPoO)-X^V2EW`JCZ>PI?hqmzT;8tOa<-AHdE?HJOKYdY0}2_m9lq8{(e2k>B?4)G
z^U1;C-&TTs{KDo=TDz#v8S)g%-oC?}uon2^2^>ceF`l&_`9`%O0(-Yk)yQXj5Ca%!
zkf8;S@mqCCH(@x73qzicGN?7(kwrKZk{Fb$myKgFB84d~MWZSJm>sDOoDK7&S#x0K
zeGB{EsEq)h1}?2{tt01dAng?h@Qk1{Jn242@j}K*MP!XD_TQH|qOZ2&Q}(%BP+j;g
z+Vp?4I;`ej?_!>~P|9IFrgi5kUJB!6Bw`q31vJuYfWXC|raELP*Pq~D$r|C-o1!Gh
zqv(^}U{!*8_m&F0$J?o;VF*oC1)G>()nK}ICOoaCZ<G1A!)fra4*)Yf80seh3?8oP
zj}54G(o9<Yi>Y`|%a)~$nIn~$6)T&QmN}j<6X1n3X2T7FP^c%G4lH;uCLMHk>@;8{
z(#KkpPD1W*+GTN=Tb#0Y^D-=t|E7_Nozu!qa7gST+UO*alOp4ZT&P6_)A17JqWg`~
zKAgQ?=^E79j)T~93@0P1mZ;~~?=BZcR;1wkV-0VVG2nt~$PBt+v!Er#L@s_7S_xxb
z4keCMuMykQJZ52gW~pYKl#j5D8+|HsKVv7{Ero7vM<R?MgTb#1O+9%(m~@tdD(_rQ
zKK}iLWcaUNEzw51JPMt!Wh#j2)lIOWh-$<FV{79zYuryb0}_82W0z(_6`K(a>6<3=
zw0;p+RVPpt95(sYyZ#c!=`CV})ETc!(N=guf}~P`%A_(ur7%IKG(x3Vl1hC8Ly<Xa
zXCuZ<m><dD8h*WMw_=T2<vHe^^dadIAdD!}VBskJy}NK7q;=r(AM$+Cvs#b2nTDK_
zO1c`?qEh63dIz%Ht40#vc9(PN7SbQHj<05Me<k^lDP{pY9#2fKmx7RMclFb!$4&of
z`Ey=&$TFKYH1s?=vTY%70C!Tu5e?J_)wRPlDsUMcim|O+q9hX)#RY5fwnO1b5itgr
zjI312vAEZKy;ix5-jRL^%A?Iwwb<x%>*q-S-gSgC;AB7$pAr~xghaft;Ly$K1GdNw
z*FStAIRK#s+mij5LZW?VZf{Y;=Q!*<gRkWvN1xu*$_L%2gE@PQD6jOO>k%P1mqx(x
z=p1e_vsW*ZxKB#ijveLywVATjXOhU&CGoXA-#YJgz{226-Jz~z$#&O)-|961EAvk^
z@5(fSqs;w%W7IhRq&VqZII%Kc7l%UY+aI&KN$^oCb$_069cf%QRKyh$^|PJDfL#y4
zE7EFi^)8$oqh9%ernwaM*PaGBxx&rWg>h&)+|TBn6lkNTYuacMchAeC`)l{@?i{;w
zNT%+>v5B#tox>r7Ue>X;T|J&NkG}z#tbt5DxE5Ygv*?cjBKN{`6SR(K=!vQ-H?yIo
zfxAYxP5fm|Si_Zxzf`7odM?YAVkXwiA_S)h*o~sLvY@85w7(iWZHUkn88(_MI+J6i
zJ|na%q)2WPnW;308-x9E%l~qQ0SOC!34odJuhHRTc61^Xvnja;$kL72m%i`j@C{Nd
zci{?t1dSb625k=L*VBK<XVq5@b`%ZuLGQB74r!4Y>P3+!EkazSSW?ia0(pY0O8SVO
zR3o`BqfoVI4^oxr2U#;YM5IcpMLY&0<&VNnbD+RE#1xpoKofB8XPiQ7qAuFaswgde
zMXsq}8@iD`75+d9P-4}n&Kc?%5E!a%&rDja50ml=rM#=8yemfAt=87R77q%AB?6r}
zD2wjt1~i#GAn)6O>Pm)jAirVWkNuWq(M}e|N=9bURYtPXe9~23QA)eVg=)yG=tmaD
zWwZe@X78I60(aTG12;_#j8zUW6}^UyRoQRqg81QjDThm+iR3@rVt*1MS5Ye@3O_71
zhi~xnm<1#27x0+pcP6u1yvh_ST@<@sudzD7u)$!#IsB_nzcCyFZLZ0=)kC@|S;{kU
z9|8!6x!}IbG>*s^Gki+>*EeqGwZ#MH<%f>yG4`;ABzjg-k9<Wzo74$4m)?KVrZhv@
ziMA+q$Yy~{hmMTJENb(N`mtTw!%cOL{8_!Q3;F?z1x*=k&KL6QdSZX3U+jE(h5WWz
z+9M`3(xfq`NE>dh8dySC*sh^S&>E+9*G30v1z||ofiVY>{zvn+v+C%ct6)zHE<#;%
z(z=0ICRuv|6zfB>(L3ugJDbqvRQbBo=3KG&&{z1&j;#6wM4t?l>dD%oY58lXvo@{D
zefn#Yj(eT;`?_}iyYrpetFqmkh@@J^AyhSm@){g#la!O&bJNq^0id=2P<F7Db@}LK
z7Id)xEK~RU@$tZu%fr^i(lFe^S&*_;_HBTVR#Xqw0}yp{imO%B%gGzp7O&9=bJg@q
z0mn;$4Q5NCwicwTZ#Um{SPGc*MEk^-J_2jW&&;+&Z0QAKg}_2F-ov}oPQj(~1V6vU
zs4ZtVt9!)tg|d45f5YfQZFL<2SMAyp#*6M@!(LfriQEq0`~r9S$|e#qfc!}$JtGsb
z)%oVR@2e%v@2Mi;!iq)=u?9~?#3~DaaBVL_)bEV%GOpup?bBncFUp#?eohB9jepKq
z&9Aa!0r2&497cTpVNmFM42y!<oi^4M7l3Mas<k;XwqnEI35}7oYJsI9?eIDIfXodI
z9Q?yob=<y3d0i?;_2AZtzOn#TRfI@;J)6cGa)b~E0c^|(s?STg%t<RNT-#f@$kII;
z<Jv)vVso#VxYPSZ)jR0|c;n!Q_gO0cR<BDl-&A6~RxskV=7Qq`_l7JSE~gcjP*@kK
zPu2GdF~E)mVzV8?h{a;tycIE!Q>o9`OTl_+=8y}{f;S|7sNgrqkaU#eKf=-E_44r)
zaeiOeKWz;4DszXlVAf*~^!F-OEt<PL-e5M|zo8$1$dC{IH&9)D7mOb2y1sn-5$L;E
zV~qoH{hr_hjRsQP%FKIW{A~y==bCrSUdZauLHqNej**HUwgT}>4}APlxi-%U;T&N4
zn-*|)dibA?o*jFvzYokT;thp(-{g=L&IMs}=-sUm4K+^^Nu~++<hYGYVDR>R^BoR{
z!aq0;%95bD&o9Po7bD+!q`>~uJ84hBSJwD@gR={n|C*`4B3D<b?9JA0Yr)Re{JILy
z05sJX%$fAogUd+plMSEE&9LQzL>E%1>Sc)Uu{tNbX<Uj5vS4>C_fHkt92VzWLgDnQ
zMXMHU_sJB6>@Z@An@R!hzjn5!qMDWf`*C`E_uJ<+W6p&j;{EbD0tI^qd=eN$>+5Mh
zUaD;lCaQTvdpNIu$)UNQOWj!C^cJqE|MYP^#epl$;|$5v^M8R42U`O&Hd?=W_uF$W
zJi>3XusyODp0G<lEErm+?oa)WfQQR|VbmnivZph4CRzvCgacOq7O(J9JPIe*5|=4y
zYwUj9J)CEbTs`2|fRK=lhlgaPn+JR{`kQFi8Umsu8xd~J`>k1E>f(G7LMq@3&{hf)
zp2no&o%D+1-c(X*h6-?bjRS0SH9#o`V>GA;%zBZ<<tW8KwWh&V+Hr<E1l|*aug@1J
zQu6dR5YXnVM@2p>Ab-Wt%Xpm#ux%Hf;bgSKsMd?#6Cl~2g5!Q_Odp>^uh58iL+JrB
zxGjA7hM>vQhn;i1Y5FI2^OcgcGx=&1GD2A&Jp%419Ky)eG;jWmJmDGHfX7L#?M5fD
zB2aj;G@gjEj$g#vkgf`ZC0>#$cZv5PqqOnAGYyU@K+{1562teSVhUbT5x->~5yo?t
zCtD%DfupfdK+xz=12o~J2lm<foCRs43Io<?)gg0snEvf^w&<kL3RY>}smx@@GZM01
z`(WGn;UM`kD8rm&r4}Cfp~m1Gk~49cc!XH>hB}k^xr44v=gCY+Cwn*%F2wB*=KUI+
zTcSGJZPD+aosqxLg-SM@!lB{KEchUfj^!XJ`$1u|8el=gu609<?WM=ZEG`K_L;Av^
z6K#Z6zN!Q<{zhiJnu)*E5N5FT?~}COOg<vHT!fbx9!*}N?l0sAa~@yeVBoXvW_AJ7
z!$S+(`6b<q6ENMY_+!zy#BH9oWlGy8^~fyYz${_UEMZ6N3J}xs1U!A;OyN@O3RG3n
zNj<05yA$xVD6Mw^yRKsfL#Zr)<Ifc?Z@C}RgR7~ioi~LJhN+4>Z7UERx}KXbJEbXb
zdIp$k;VUU;f~4H+j$Iad?)lgBdZ@hM4etlx`K%&<R3rg<8IT^{2m<e>$P<l9l3-1+
zu1GV!Ml!<(Vqcy_akoD}(GA6%Ug1R@3Me-mfbX_E+Mm2w>oB(6Imif*qx~h?8=Y$`
z-fYgYLCu%=BK`@t!wfDri?fr6*#Kb|c^Wo!?pP`Pea<uMW73!8l|^NXcaAtZ@16#u
zND1k-<0aX>y$IS5kIz&h+GqP{WX+|V`FG<Jw2#j+UMz4aT5hM$jTE7iI*7*|8)=Lf
z|J%A`QMjQL!isS4fSC7vT>KxC-v(cZE?J1KFrn)hetfSCTL88kVSXQcg}eXs9CF8q
zpd6S5j581V&;B9{(a$2Tm;2b}XxGeeZuJf-FOjeu#LKi!4H|Wy`m(C)_l_&l>%VnZ
zgZNb{7>Ped`)8-hKI!S<<B8`l;XT&s^~V)rwF1qQ1&U0zK>Et3vZ)ivM^1DPdOqwQ
z3KF+>1PBD>e0#unn>i}x?A?b98{zj~!AUP6!5`RH+c2!+f9^>bFR%tbk%nr{4_$=T
zs3Q>cYO+YM?Ux822g(ca(1m#6O9Kbd*PwdTH$si^Dkl};bd}G7a&{ZcLP~_EOUVP}
zd_LgV{KrxPYfH_*Ihs>%OOl2q+=kb%4%>A;&BXU+NRwH6M)9#>Z7id&t_t@BWZHnG
z3E1%72kIcy#|%Te68g6i^)Xy>*Na{id5^jy8Sz9g<_=@fM=|c68B0fO2bGKRJQJiO
zmuI0qMpDJ#CDjS<@>Dr831m9cyBiJfrXOG{xfhM|iIX<Ce+-CcaWZ#%3`2RuN*aMk
zDn-NU_Jh%<!0J+AcPsxDt?T}O(M!{V=?D;XP!>r#7WeVwOSp>^i)W!<+Yq@~4^EG1
zmfPVHASofulugCn;mAxnqPVLz^g)=qP|V#ZrtXx}cO1-;>bVRXB{S`cXoc&`Ft>P9
za3~?n3<^;S=Bl}d*IW`3K%=4Bo7Lp(TG`taI{yFpw&W-SGxBA|P_juSP_mqjWIiU&
zcu>xy)aMO4=cbHvQ%U5`Yok8IP9~O(qe@;Nog|OV0`0p}YAV<Fa9GO)>fqc9k1lX*
zp4iI-K2ja)Ma@Sw+9@MsQ8<|zEr)xL|5rkR#86Ho5oSV4Ihm-Kei*cO_+#<Zh$_ks
zVP{9D7UhKzwNQeinV$vJUUyD=stA$?=7C}PCKWfrU64EZG&=}ngwMc$r%N0|un-O=
zz{Khq;&c+rYwlq!$JpF&+3;-2W{13aKz1@&6Eir~cn0k`b1?Z`uR9%Qdv%i2myT=~
zlf}i7JwPO#9*MVF8`S%dmBQ=WFG2o<kvVslNLLc#_Wi692bzv4d7){6DB7AFARmug
zO%xGlnG@$0WRr?tHOLk+e*s0v{jd2b;%5B_v9$d&W`0XWXzO(cwc44n0TztaeV8r!
zLM2A)obiet3lXSC3wh1?eWgn5@OtnKr7R?X*&aDn_v(A!lRB*08s0Oaq-UrLc3y%w
zjpk+tbu)}-kW<Wz5t4zb0XjvzHn|jfLhPs+&_LS}M?62i@Ml7d2<qhkf^5!VLSa7Z
zmIKPui7ux^JT{Y-?};>%c4=!%2mIRx)v?HwI+2=9Yw#h9yDLE|2{nyO&Ab(>L$PUO
zLZ{lb2n`uAjf~B*8LP&yXhdS?M)I;1sbNc}Vn>x&P4cNbi)M)vryipLC4%%sv)_<>
zLDo<q8<U!6cJ5H5T0*l|x>Mh>r%QM7->f;vRGLjtXal7H1hm&s5)id^tt2k5jVp&&
zVr=H#)9)rh+IQc-6nvv(U~iB*;5>IveWfJ1V)#p*p|*V>5mV^tMc9(=2=>v-fLr#~
zAOR_W-4Y1+ox%qIpzaE*GT`F`BH_Ak4oL@mS|;5Nw!Z3c0|qv$w7>|M?sXL`EJ0#@
zWrRjGun9#HV&O5}EpTxBDclj8+>#roQ&+OxrA9xOvu0;|$W4da9ZI}+HUl3<m#m|a
zD~z{GZvf&o4G#>b)3;q-xE=gVx66&EEMt^Z7<w_LM56nIy4dv-4fSZ6b(M8cUB{EK
zH6^<qxVnL0H(j&@;z<V%C~I$TP^k?$3iI1q!@^QiAikR#1Af*`iDPa-ZU)Qz+D=Mg
zY+A?2=B4>GvEgC_#4jRwHq43yvZiqdwkejt;h?v2B&t02h`k0w^LW1^bk00Ja?PCh
zl0$Qb?fKG%IVV3s1Z-3o16L!)C@kX*`CTXKg&EyIdJQ-zg%aGi!|MQgU6)?vdN2_3
zK$&gi!ggZeV(jS{{kbGaIF}boIN%lJ&W6U`po0~{US5Y^5#BYH@XPH^U+c*Mvt{16
z&&!4H^6htU-oFO)`wm$l%eV)#qPUo~rsRlnFwTsa6c&_A*QhdVv>s0DFHbVZF)0(P
zHIvgOwEuxIDnPS|uo~+rso(?eVbb_4PhLO~bHTbB4HqWc27=w_JTQ+|0Q-v$08P3k
z<ZQo7#Tmsq2>+_ALwy<yja!PrLk1CoWvf01pRxiJazl}VZ@F70KpIq^Do{1dD{Pq7
zG&V5~i(#$U2ya|3ESBD5{y=*sx3xEAP^foaY|bY+O80;H0|)o1P=Vh=uvmR_jU-ky
zl~-UDcV5O|ybK{(j}=vj^JRs2HWs03Cg{sR?q)RnK-184d>Ei#hid$R%{Tp9TIWjw
zzDha(MRw3v$rIP}bg>%upj#7G;d}HNil`G?@k*RRHhGRMjt)*rLvKg(_UT$=dCI;6
z!lV2xLy;KugC>j=4FmrjZdX2ZAPoJS1veHN8AxK{U@z2>vu0W<6S&5;=-OhI7Y|cQ
zM!ZH)Qo4aKJUFq3H$>|=%JIRiP6262C-{TUl0D`#wh}X1BKR>2Jz9~7kO4}xUZ84r
z|1=ChOi%MsDIn-7BBUo1P<*{{Qe)zDD6I@8K0EF@CZ|8a$cBXn;}f6%m|dbrB0R$y
zvt5k*g+l{!&j{hibqJBMiBc3d(5t;{>(Qw*baBF2Vleo1r`wu3<&B|^&5z3gO}!0;
z!OIZNp57ZXm)R1HF?;nqv<g{S!3yOIZBo*-1$TjFo&^*^I6)QMKL-xN`aX`B{eU!$
z@U~3eWL#DupaMpOhCPvt{cgO4G=As44?qb(Dn?>bnS07l!$f9Knco}ok5J#VH9CI7
z?n;hYBf8Tsnia+i=N6#s2Z<nslSh%no0fyP>nDH=8X|^Xt{eh;-pzA_WnnsPdJO97
z1Xdg)UpLSd&hYV^s{ccc!itkY^ocz-iiM(&RO2K*eaom1*;05?L6jNT5MOma=_b8x
zad0t{lxcBiepJ{4_TwhhO+@+CE=N4~L17n0FS?&1EfPFnK-5LrwFtgDBo+?z9Dz9S
z!6xjDLC~9n0Aw(4CDFcP3==_u$o>V{n`K*KDji}N<7D;CZ_wme2FCvZ%TRBR3osX1
zPFe+^ja(^Qyu$vpS-E_66xwGPt5DG%11;rVZDrTGz!HGP1K1>*R0ruijQ5=nf)~9T
zfax+kota|f-U6qysH$04-Jpm!K8;EpwAG_vjg*Wj|12;052aiw^K#?IxuM~eInjT|
zz;4PVtY@c&JH1XDk5>M_rT^Xi<Ae;qdNu}1T8|21a0I^@g5{;SkM6My-r+|)T?kNg
zqf*WCkXdgTNKMM(?1h%Y+1~q<vDxS6#_xW9i(SS*I^JVg!0P%3e<_)E&f~`VCoe$`
zYa7?hCxgSR(f(X3Q_v~W->EIE)u^Y#GB7-e6I@TNEC#LjP`W2;w1`rn6Va-!O(BWd
zipRI`PO^MBNlB%bqt)AGfB)z_p?*v~8en2s?8x@!(~0(jB~J={KIDF=>dNd|T2J|4
z&22cpoAdW0>oG2$NTsP-HD-q`KcMD25WUk(tJx}CiLPFK5@xlPtDC7_Rje9GNZcyz
zW<UWwdhxRyc%$-nTvbMit^*wm#CN;2psy+GvE%(dd3HXq2}K-TI$TxW%UOKBWWKV9
z3%iq?1#8!ee}My+d67v>hyf?21u8@I2A-FBiFURA%BXcWrN{+D{`W5hiz>yQy9&js
zAUE$E{<f0xv0G%@TwFT}$qs#&fo@AKDcU#F`?9R@8^o%%=#G3G35PdAtW)B+=71H&
zKp2LcdPXK4Nqt<boPr)DDk$$CDv(>G#6pR_WBMrr>tY0BCKz|Vg@$i&Yl&uLG$dGa
zh=&XVFtC(9z^Ve#e|GYbX9?ru<doL8lL$D1PEEqschxX|F$Cx{7SjWxWEnA_9`wG%
zVl0p#KXfRwspN@>j{?T|i0hsj9-17yn3)4o1%`Jm^+cxE<O0#G8Ze)>I;i<6V5r0D
zSoB0ZMb5&r=9*DVYDc+h4GnB?#~CkzflB+~?E65xhbCJv4e`CtNI`x;gg&<rI=bYD
zilTTXMe9*W31W<d?Mx38-RnwsT1QndzFFa}S<zOX6Tk~-u~tw(WW%f{PC7)j6$&)9
z76h1H5mr!1;-b^y*@nUS>G@C?w2S3fw2rrg^Z}?E>)ePh*=L`^Kd+F65lJ!j;6Pz?
zqllsh2l&JgB={6ofQu6NJUHEb+X#{C&<#AUhV&GM6o)LNV{|mQ?2M#TdfoTi{oc;H
zFu;!s|NCYK4S0WAf5x^}LrtfzhqBf|%%)E-2udr3IyVwR?4bv-q9S?=(U%gtAId!r
zxWxD*+EmFbfsz~D$PHgMRfat-3l|l8XR5%6p_5SOSIk^ZA!jaK=N2xeE(#KKZYqW=
z6;Uw?j5?SRuIgzwB4EQA!H(KQbtaow!99#w7$AcIP_5`GYP_ZlbY^tY)VHX|p+PN%
z4j?bI91%zhM#ueC(~m_FCfCkKdp#2dDRdV@Mbtk?b@z{)!-)@fWWa-^fW18~O7<6S
zHt#X!3xxw!`E2m^#}oZ_ReRr1wP1(RP-@tLBDAXCVfYalF`Wui8c=QPV(Y3|twrHC
zVHZ_{%?~sW2UJd0zrx}ViLF)uNGXqr7M%o>A7C2>pb~*d04h(`JhFDxI36Yj!;mcq
z2kZ{Xjt4wB55E~Er<Bl*9MfE(A2EXN(v3{h4;KS0#O<x-qis9iB2n{ELHAjtqK|}7
zc!yH$;8p;{bQEjUQs~l&fcgty!<MMvDrFg8;Jmlk#xm25IT#^f>6?ntiNxkzaaDWu
zE-JH0Y0nZS4_cqWUh!A?I)%LjoIupu(P{J@tJ}SD)z&w|M_GTKkD+(X5qnre-lRE;
zo&yoKAC+??xQLG1B7DM0ek6P_e$BXiw52()Z^T4KJ-mweI$p@};S)a>iJvDXX{6J?
z3YYyS1R{J4joa0lJq22T9~eZlaE4prfd6xoI0)yo8nd`h_EEbPql3|S->Aao4nJMK
zCe@xcW<I4WRbgQ?RW>K9)m!Hp&E}<*%hoFVS>3GSC=IGC$yKUYMR}1P87DHP>>17s
zxx+HxyL@S}TbVF-91BzQcce!XqG5Twgo&*4AxaN1n!(}ahb%B8`}ld@OMC+o`3P7O
zAGSZ{%fwS|G+q$50hdo)GRgNerS7+dxF*$MB>iZKSM;DmIA`>r_bR?yqAfh!Xcu6o
zFl0vDgEMMXm+dF-n+ty!ryI&L^HSnH((&DWiWzlI;WD<E*TV1|E?)L5ZROyV>Af(t
zzxe@%*Qm?PemAgT_Oju0hZGI%?}cuy{}<pk<YQs`{yzZj%l`qm2PQAqbz^R0=XP`Q
z2G!;v+_t)@w?o1BY^DnR4#;2B)?kD_lWZ+%m71J)n0|^eJ8hePgq!CE{9&i1s0Cd^
zk;!l1AA%U0Zg}ub$r!};>0uw5(ivI;RoDVFzHPq;M)r=c?v0RO!tV^58`a<kZvM&Q
z_ltXZUJgEz@r<&YMV$;quGZwNL=;@0z3=AG^I#CKF%<jXe_9LovHQIAcyy_6|9oCg
zzm#HBUV?@RIb6}RQP00Mg&ZZOMXkWxKeHG09l5ukqDLFV-XZ#7X`A1}+i|rt{Ae+>
z^}n@0joxgBS=9c2Kz6=83`eNSu5Kbcn^f_EyL&SGX0;BufcM?1n|$bFrGT%d_4s;j
z(ulc@ms2L{kS;iT*&mGUX@&JaF!oFk1FZOQw#UY&L%1bqP*V04&kGVGhTQf0H6nwT
z=@JV*ICz5rf@x&Xaeai`0&#MHo23xc<CQf>wGm$AKBLu(P9MAX^NUUn+uLX^AA?yc
zqmn0+)0!JJ_{P89p{b()tEnT=w|j~Y_i*%iMESdY4ryC~Gj6voZPtnRQInms0(O{1
z3{qwRz6{n!p@3DKS3ItGN$q<1LbdHRH|#to|6cWK?{q`o5$H|qbDPz+w}2wRKPU?{
zMzI#iV!lA;VN<7rMPzb1x2c$>pekw#FnC_!5I8rVJxT6g|5NBbLkVw;RES9Yf_Rn;
z?e@E&FUlfreWL{D&fYYU5-^}4d!oR3=!udTN3<P-HccL*k-$~DSrB>vETWh6yqDea
zjDSkPfA+?owi_RXX?o#W-56ajyvDPB2c~AXGg||4=FC0Z*f4WoYwWBMiJkS-SQ~{@
zLe#t3^C>N!^$WPvgg8MYyV3uB!D<PEOgVO_+w^91Y&|ED69g)`_R{fUt}Jycp3BMq
zCBwEk+I$!fUzMzf^lj(LJ!Mq3xgn+QnTNW-Z0s8l5xT)asR8hj)94h;&?^d(M*heL
zdNwEJ<6V=LHxeW#tmOz(plr2Of!>s&3b;%}{^$Z_-GI?{ZI4>^xjzJRGBCI8d_(&V
zr_u}kw{Zs85JoZEpPSyz7n35`fGX+Gkt!9FTrV1-NTn`NfnsxPjt6Fg14@DG=$^4j
zv0R#h7u9mF!fAsd+#OA&x)v=D|J$ii&FiKsj=1;O*q3C}lth&4^_nM%2-@nsG)yA{
zBRS)s@Rki@>6wF*v`JAK5eLi=;A%qcD42vl#@YdwS3e^iQ#*o^4hNB#2(CX_(OS-8
z;_L-(QZ$K+V|Hy(MAtg|-Aepga{GHwQiYV|*YWA6I>k`4{*z~FvG?-Z8~WW>XS7F`
zKQfYl$QyQ7?c1_mZQ;zby@>R}Ip*@Ph-VP|pq6_{j0`Jd#|{jkue0shPR)M8B7?Sc
zaR|(NA9tNzNr*dN+@~Z6{cf(*Ugfn`0K45zc>iRJ2GidT2xk0I4Sxs8oL;|V>KoJm
zZs#_jmmi+1M~{;U;C})m`fztXqgCfY{TGYuv)Jd$`L=Z-6W4#%#=iFFqj;U2c;y?I
z!sG1n{al(EsagYm`1}3pc3UDZQ#-qRvb1-)|K_f@|A09C{mJ`%ZH8a@z5RXvJ^x&)
z$`ijGYh&;6&C}kChu;4M?GF~Um8}Cq@9_=N*3QxC<?d_uTp<7m)7F7Um_HfGiULf=
z(PlS)oX9->AoW=4;`D;mlk4ki@5cBN&hqwPY#%(;)!mbCO>8S;4~lAXODkhPzV5>2
z;Uy72dKdY<dU!gNxy3QQh1w+je_3tdML!=;56YxydyEzFU~%U6Uhwi<U%!o^7mAKE
zboQPmz(rIaE?`-p)HK*m`nOg$SHQdgMtw3MO+HqRb>G{%+kBjElCv^9Sv|mn7Vj5+
zx$oaAA0NLXw%EJx9JzuQU%9nej#$zu`ibtVk__prE@`GYlBJ!Vo?O31+AT|)DdLR?
z<}ROqpTPdb!jj1}8?#R)<kuu@A^`*tfdmjhXZX8%-6F>UWNrApJw09BpZ0DB-fm8h
zndiP<_js(+TW6yyGqegfL)V~$j~`bz(V5^B{G8>0d41pZUJ@+b-MwAzzdu7=U4KR%
zs`!2S^99rFyn(H{h;{b0b+b`^boSfd(EJ2E797?i`aM1Vcduf4g=?4zz|QNF_Z)55
zOb|oGvBMv%HS|550xk#N090Fqc|AHi`bq}Rv^MpFL@e-Be^WmBJ%?ogcE=%)q6g=g
z(6x4NqwJoa1El-2=Kw-mdd^>j_u0O)<+>)FxoafTbYvLVCZFY3cM`8ngk=i5u_r|G
zV{=y0jQ5=`c!M2x88G+hFJ3o%1PE0#ptmO!yHUed{|Ib%6o+IUZwJC3CG#iGr}T>u
zmCV7$7T!+zf$Qff$bBI3XB}xEJgj=XK#kZ?Tkz|1hQAN2X&q1C08sl+r2fF>i>}eJ
z6mUa65Cc%bB@XcI7|C;De)tgt^GN3T>vd2?b!7_^fC*+z7Ftu7Fm}t(F1+?A(k_I+
zSt2?*&U}(X+C}wF!I7yWY{4P1fFI_z$D_u_CTvwJuy772o2YgKT=>K>Y6kYb^EuUH
za%YysOIWlemGEEw8CE%2u2PQ_cSzx-m6LZ;>xEg{TdFC>tuYnK)f#pHDq&;vIVzQ#
zF9n$RAD>d|OIgo1SO1;LG%9_-_k-}Dyt-xL+}k-@jkCRO=B>m0Gc)xJ)qX~Jrva6x
z#IeLq<fC8?eZ1YVFh&C$b#(DNx1WNY(#EiQ!0$2o_ARuh$!ZKxAxQFn=EMPy@*&q#
zor3rfqJ^WGJG$t@i#AbBSVHGLsMgxku67qzTY5vi)hObbPe7XV>a#khO*YY@Ap#BP
z5C8y$cBpW^fep091LR%c2<8LMCEEqz|3xi~R}HoXEWFy~o0x~fFBUTZyae9Z;IM2`
zTBfJHJ$U~YTreEr(y`}|R+N_BZ44p+WPR!@`4=p~hJ+z2`~u!~(tupnI~1L}5yx6Q
zZ_pD^y%=u|mdLjTffMp@q1N$E1g)uEo<ZYtG$(4bgTsP#=UX(n5O;+Rj5W}u1{!~$
znr#>DWu}QkI%8~<8#*l)ah4(wzcK?9ez))BLXIN3V(mL$1H^$)Sp(btiz$jU=n^Lg
z_3RK5avjJ#a(Ce#aDeE_nvXbNg7Lz_qVL)6vQep`fxXuc;37)k9bKo31#4<d6`7vA
zOnF2@RobHbszNH!EFOt_s4I;_g(@O6LDzzaSn|@VP#|K!Ok2O2q)>nXXZh?y3~Z=R
z43=zj5n(Q0?9=hki&&tgk%iDNDJL2rh(Xbk7YHV5Ac!F+8$&BjMBIW+_Egz=56erp
z2Ghk=-;i`zsm*9<W{KnQRG1d|aBwt(iiqZ6(`^j;+~Qa$C^H;}#i^CdgDc~zXw+-f
z)AV|_=k&^2GVK1mXwHgV%&j|{764AntG}z69&?PBIv27Lx^rg;h}2-K{h4gCQ=oIK
zj|y^w!#&;@lsFUg?!#EuLXk2#e6Y3YK4z_q_l+v00nt^2I@KWf-HZ^uzIr&sI7j5@
z`V7cBEzV^y_&SgK;6odTHAsf#dZC`=Q6Um4)^8Ay^$NI!B6OCDy#D>-In4`K&fjU(
zoa1duL5!aB(opBa7VbSPFMu1#{RZA=49rUdp1jAj3Gh^oGjG2_5Af6uAz%79b7702
zv`EV)weCRb#p8VDTPdjb;&t25VUmCsF(11jb<&x0b+&CXR!WJWXx?B(A0w#?jyuCE
zL5_cHKo}op!jFdv<e?09>_8Yl)<Ix_jUwRv{^u6##Bb3q2l}ogneWg7IFE8<D4QZ;
zkRsxu4Lq-MWEc}c@KHfpXb7fgnpz;c1QJHZGrMOESdCndL}pSO9|u4bO_fgU`EfpH
zqTd_u{@e#YcJIyx`LX0l0AX)3#Y+N2SBvvhkXm^;<k_8LLRThi42Rfa@ojfG3hQF`
zZFV^l`&+tQj>bN3Ijv$g=qJj;&zb||T$D#~-oE?lFK^kfV$K}-fmPdJkVx0kablH5
zZdEaGQb+gg=P+<mM)%GCmbB4*cQqJSb$nuy{Tg+H&Dw*yR?D}l9c<qQnJ#))JJ+h8
z-u0fox>k$Vm75vcl|3~}zmOn}gO;7HS97fuzhP#1C<fgz?-cjizdmN^5=Yafze<el
z69@1*3m35KGG-_>`>@-3VxO_5|CB4hZZ_`qdoaI`oD>4IWrc<}3z<C<NalktPWwL3
z^^W4u%!TuaEeq!or7d>`lJKWiUI<w1Gl3&QMB9qfD<EFK>UPcp^3iS1-2iCjVJZ&3
zDlAX*X#`7;+zrDn^=FYg_{~g_4`;_OPrG#!?_-7+)J|j;y!Q4uGTWGO{C?Q%-8~}v
zJ;Yo_Xm7*a(0oz;itlza<CA{4KtvVa045P^0>^+5BTqq!C#}RWWDIcB=hvyu64MVM
zzKh{>h)OpmE^nIc!|j27qRGc*8DZ&$x$#x>1rCafy-m=%>TP!=shXXAAIy+02OtJ(
zk%CGE`iLgMe=)h(MIR7H3$f!{nKQUA<rW@~3l9mk3tBa3p(CxC(@|QrrmbePnN%v$
zRlxtjC<^GIn|Ux?&SW>R|FJvNVxb#?yyZZ3{byjlY^WHM07qkZeFbyN>|X^2XTx{*
zO>)EIP0#GBF{o3ZS%GmV6?F`6?dH+#P?0~cq$whl#&$P!`km4im5&(N&X)_2>G?*w
zE?-n+oGZ@)>s-WcAL9gfT`9}0cHu&pfP_!>gNDE)ho?fPQfqEyKfqpmWPs8in3MM4
zN&L~{f-nAg6QCr*;6Bb7T|GVZ06+64b!zI+6cF2d1y1_X(QapUdp>Tjotk_c`U=AJ
zzwOEM@oQ=rg3tmCDVRC<bZ5jUnrpKy3If?X&x3hIJ@vP=Lej?5a4;M54EBh5ggiWC
z*X>v7&O4a?hO#WKwQ;&W!ujoa!ECX*1N4#SvRcZaA}1|cA}@tq<wDI$&ju6#S#q}f
zVt<Bs8F>}yrir(zN>?Bej#=6p--&%n{&J>GI@~`)QV+99niFL^v`2V4Y&U*a@XD+K
zui2}U0i_<Dfzf50p}4wqQ*SJssIhfepWUv3Ip?W*V5|@Z+JpH8e!*usHeKAdSDJ$=
z%y}6q43lm3P$xt&5HfZv@k4P{lg6xG!OlC6sn$6I!-PI1j_4;|A;_cvznZTF?BGqz
zID1n4Po&Tm&qn;67!?z>ZyP@haaf78XtB{8J{+;Hhe`SjRp`4{ZLFrSeLnhfT4#qQ
zjKTn7x4O~suv>Z1@r;!yi`D{~EU#cyIxUK{Y8W&rihRtoIg2M=TOpuj1;2<|aa!uR
z(y(|B%+x|6qM$MT@fq^le|(R7q!*#|Z6_fEX+;OO1D~f8YLXM3Jd~~Sk!lVu*-wM@
zKvgCxn(!qqRJ!>@oP>2I|5-BLi%Pkz1HTkLLzJNP8nQ(Q5JQSXqe+1X)P<w2`^|#c
zhTz0PyI)207mV{E4I7CJlb|z(#zt?mua7%Ps1xi#>f?YKf4hs6#`4MSt|V}uz#t;&
zL6H;4l`@JWV){QghI)e%HluWl|1i}PEXN5<{$|u60_D&ZE~PFIqVOv#&~w5iU?gN(
zK<r=T{DNWner=iv>R5~{KdbCP^)d@0har?txB;<ih8$XS3|e5q($lS5yKfcpvCtY2
zlu$+ta^)t=gEv)&40^D|-uPFnPF|5x*9&CDRdSwDqKX|1r3KbW*=iOPbTF^6Jb{#D
zG@9p|Ua(LkjFBwVsVTG(F{NGNYIN`LNcxV1&lvbOu5(}Z@N)b40k<nxu<J2WnJl~O
zYU3px#|Y)5)(0Yq>~?!J6``V>Nx)H>t2uc>r5HA8Sy<gJ<KGU;ilVHaghjpf^CEJK
zkoL<J%iRtJCF*fKxd)1Kudr}GoZYFHh)dX_1T(TL>S@E6q_|;%G&Oh>S8T!=#Ypt9
zwFS1gnj0VRCG#|K@P@b@qFFT@v^#_Mcm(_eysh_o0rPEdgD7tnK&r>Hu>+>KS{W*q
z$4tP*)W<(#YZX{_MMH@0?eUPvQ*Vf8s8TW0cidTGd`7JOd1;c!INED&hhld<a_3t*
zbY`TlT#2-<9*t$B^pqUv10?AYN|#MNEHpDXFZY1n&l!Fm6X%2<G_W3U!_y!AI6t>)
zkR$@i{!>bP!%P`2k|*8gJ@U*$y1U3;NZFy3>MiSFsM8I(8~-Au?@Ls}Z>np^5<v6^
zW;)$(<yH9fP)%;<Sg&X|3sB|sTSeaBqwJ&)EQjQC{wM-@J4rCk&}>yaInyLvHo_TK
z3w{t&x;R?Eg10RZk}hOFRIC^s%ebAZ)eGW!Ut5=B1`blDRu`X-oBn`M<@|`ocR|D`
zY=_oekNW-&he`)Y0);<FZA?4%i`xh-s-o3EL|VM7Q)}WzGZ)4>Rs+9+8+5zbhV2#*
z#r-zP%MPD<xE6Fn^i=ZGtuW=Pv7BYcx&$jv<gBd%#m28SZ5q>cZdOfDL?6#Ay};8T
zmNcjr(n7Z5aTqe$h;{vN)@?8nEBhHjvx$3IWGmNb&+?+#c^vo``gSzwl-6T)<{5P=
zV<BofhB8nzMcj>nm4#rM4&)H}R=}wigNC^;aNvDDn?}$bi8nIIi#4FCOktzwQG7e@
zlILhvIeqv{1W$T>V{`dkIk|px6-vr_$lk)MQgz)al`;|F)w&4W@GUB5i}hmrkvdXY
zmHgs#w=qN~pF!VVM<J7p>q_i+)V@Qp)9M<BG=)1aHar9Ry>d?>DOoKklWkvEy@{#j
z%pBxtAbY*52z8#aP$9#<WT;*@Z;m0go;@ymhXif!U$L^#{_jyxk#=apJTl@+zR6&X
zmZc$-9nV)>wl_;PNmq_VBO&TGCtoxSSjp_B*JUtoqYd1yJ-gf6K5&HgNgdNZgY2PR
zxfrdPT>>~A(SdqdPzB0V&NvfpEQPA1by%F42L`xfrv3=mb?#`uWS8Q2BCsq)U0|?8
zH#4<n6pgufO~_ek+S!~YRbUloLbvT*0v|YY5<8y5q%f#!NDb9mg49rUjdcLw5Xlr=
z`pUG?G^6o=dmiH@rGr%g9*KnM&W&t>Mu`+VHI@#FUHbNFv)=NGe!JB@hCQ!+axS?^
z52htKMvi5A!rtD|JeGc8!#C=FZ#U5NTa;os1Zd!7%&!Q*KH|+zb%~{K*zYs(_aH9k
zHz08--+DA&qwPap<b!JjnJ;|*-vO09=F7Pd%UHVHE5A{sQ&a^sOggAhT6YXibeVJH
z<ivd5uf@dIm3l3R@&f}C1Lz<&ph#N5f?ELZxC!*Pb4qxJ;&Cbi)ILZsR7CDgUa3-Q
zo&g>=au^<r?a9kap=tK_-QTc7-CC8Fah-UH&1Sv<e!Q!h(dIhi(vx*Uc#Mx!sX-Q?
zLWWViPQf*?<sxg4pLaB$ZzSKp;-n_D;aHv1Rt(RtsLZ;RNCaatz0Ku+a{E(Ns^)4m
zccjkDi$<yWnH}q)wx@!HIG+y`e(xlH8}Q!%3$Q5D1Q{Li3OnE%+b$l32{WMw1APBR
z@%cvZ@modc3Q-N!;@9#m(dr70goAJXg@?8#E-D*=p@oLMeNp^P;Q8$4I0hREEE??4
zY$l}fM?d{5N4FN~<uBDEKxT-i#JGwJ7C^-M&m2u$7#c}=%k9eY`|t~V?N<YiZyOXP
z1}LZQSIf1`i0rYiMKg!~;}`w=H?HP)ij%5kvafUyeWtJdw2rs!Oe!zI%efCSTs`e!
z?srLk*_&v(&^jAyut(6tn}EFKaEg5jCy@$GfT{^OBb`QMWFvXHmi<v{5)_fI|It5<
z?enV#*)ZCV>}=t-&y`?O&+Aq63khW#=&XsVdw&~#C6L;$y4SHmmqfpx=+EK`pG2y*
z$Jvxogwpe}X!3haw-V^~^W>0S>53SgxCbsdV1mU$tytv)J)<-0p`je^;SUPx>)|UO
zjObonE0|5dQcOY}{_qEtspO*fMn47Ema93<RGR^bXN+1?<UIx)At&l|XLUTDNd40_
z{eJ+fKvcgQq>FC;>*ZFmbxCrmoRe<d+SIzja!H9->|it6)VWJjw=4O^pn&Jf8HBts
z!G|>?`J1%;(zwLiH$+Ce_4p<O8?pIFps)i$rhtT=AWkMR92l@?CjP>qd)C~M41_`k
z8JuhyFf6v)|3%$*hQq8)YRym!`q}iTub=hnYrv=e2p=Bt!zcLg2|ql*hld?0gji;7
zQ3ak+Nk4~X2~2mS9w^0oq27_e=}1uTNKo%+cj_UQ`=XwL>5kN!yJL7QWgE$&(~*Ns
z!|zA>{fU0>N*e_S$?TCygBXh)dk;<ir0IJcHOSbdUlMesR|@#;k)N%mdtv-T!A^&Q
z@ec*#AMVchh~>T*U%_<8p&xqpd!gR3!0A{}?^sapcz5a{miwZfg6WRbn|brysW+P8
zl_;9aeKKK+f=4WPlnOp!!6&KU0Sma{bqC&rUhIXsh*q*AaX)sYwX@y_t(|p()~rCH
zH7k&4%?kEJYv{$EXzi?%9oZYL(Q{2l?8CG!e3;?x#oj)ARf#p@9n@09M@zO681eUa
zLrJF4svwDyo0q>geq<YlznkKNB|bEN+*N}o#EQ}?wUd!?%OFuUL58b^o+L&YqNJ%~
zW)LNZ1c*uo5lT^^@#B_^Lb2i&;nELW;YJz}W9cU)R*X`bd!w1O^s5-h399ank_Z(q
zh!Ud|37<Ti;<>a(q*h5n<<V*B!`SeaSqOS4F{6@4USdThpLj_#DtXug#--@E8Rsn=
zI+Yo?>;*n&n3}NmWr(LO+>E8$oapGQEW?LYE>#n^E=mi(=pIICLMQ@&D8Wloq)5uc
zz0pF<i#%fFh}-yb>i)Rk!`yi?K9C2O(E*`8Yk)o?eKpmumio0>{jD-higNp88-*3}
zXty6O(#SS`WmYiG7;n!7jBWV!ADw0+0+8Z4>CoM<q#v?c5q631R^oe8d@r!(a_pnw
z<LI;RH6up>+SCJ;vRPdyjv`+#2F`MsrZ1<gex=i43M&TtdYE2H_=EH+$RDH^L;fJK
zCYJA2itHQK!+rFu41XKOV&ny3wilGl1r9d*O9}Q9z_VSKG~i1iAGpKSx+wC-Gc<3|
z?@#pmp=@_pbA`LZ8r-f^y_|i$8vL2|;nD`)N$%I3Lfdm)S9`hJ@)R0z``h+8+isL-
zjH2bl1|lbE589>Ad1jv!8oT#7+fJL}r$b`kLt@~=tr@tNYjo8&`n=?_X)K(6Sg2ph
z>Ig3h*GlpEQTqHz`doT^ErwIEj;kvkhFgoVlDsH$mw|%?Uat=qe&|-1*TA<^fAF)=
zK<!uG7(ctg+?}$P#o6-=BRaANb#`D4dJJ2#(xs`Y=8tS~NSC5V&TK(pC^Oplq;2C<
zs;Z@J!z$FqgSL%_RMn=ojppVyOl?2RjD9?7`|-HBjR$QT+I~E0`?0AFOWTH((Z(lj
z8=piQ+p?<A`K@1xSAW^rPdVkNtZ^jVga0pj1{LuE&h`H_8!fBRPTl{rn_v8Y|A_v-
z?JxenTK`}9-gCASuw3gP_ZocC6POZpus)9b>C}Hfe_s|#f_!uX_v6Y7Fw^B=LiUMz
zV~ZGYDd*gR@gE9B+uGILR*Ah)#MP1-O*vY!(kFMb<i!|o!%)2_->#;~BJ#!5_*xno
zZ_BicrpApkRb(<(&OD#iq^n^xW`c5Z%!%^eh7#SA!@VJV2z!&c^)aM$feun9K|ByU
z{n8&37HgLdEVJ4zab`&%vQ8|Eqp~W43(?@-4J9g+J{6SO#*?`0)=M_8IgZ;v1^LiR
z3HhR+5u2%~)%?RRF(}zbU&V1+ZHJUNN$T}Ltn&mrj78!zC5#+Co+uC1gM~Djs%cyr
zL^6!i_taU6mYG<TB(42hdv3x=nsI89;E2Q6P=2Tw$u(yu+x0MI_}{*d3g+d^{M&cC
zpk&}CbE$JIEM~Tx$T6L3r)^>5d9ew@jm<iQH&2`bCg+UVBpaVMO)8^ASSxNdR!Ghf
zPUeG-@N%|X1<)dt(-5n*F>YaI?81YC0wb8U`DU{&x%K9`R~%CvnB;c5D&@&?ti%ZA
zRqp2kHr2ZMfO+L)Ya(rUJS!Zt2JKdLwg63KkVJiJ+f^SO`Lh)tCsTkWqc$rT)ud$c
zN_IE1-WXM78OB32r&FtAE;Ok8Jiie+aomnl4juNjoS%>p{o*7U4lR*aTJ5jItm#zp
zGIyq*_LHyn<%g+P<);C@3Qa!)AW|v518qX7l}=aBsoe|al>nGBuy!Ld`t|RZ$N=#-
zN#r9fk&iwHk(CPqvivSYP<Y!?lE~~p&CG5dp@chfcI>EhV?gR==6!);K=CkWzbpz6
zf0HbFsAbW^&%vUz!qPVJeXxP*64^&HhmjpX{6;80NWMCdAEsU%WDFX}>OscH;iIP?
zKD=j9c#ixwnEhDG?8k+feJd{a=D&S1i9E%ePA?G3pVh5|aVmdTU*m*)f$;yV=)U~D
zxS(In*5-Djv$Ou)(fQSObbj@#&{^4yGN+%+O=3~ixT18umgv%6^$$6IcYc6kc}IcM
zpR{FAQ?}h(qzTu{WY!JD6M4GP7NG7(9hp%=7oZLk+t7qIHW@D~1gFNUW3C+r_>`CI
zll;W(j-};x6~}#%I7{w+MiNgyH;G?-UJ_?%$z`<S--`%1+(iULwquMgnPXbpc~zp7
zMFhk&s|jUCJg};T^_^W4bR>8nD?f)4vooKV2Qe`Vm_sKPOYu=|u@h}zh|3~vQ0U4n
z+n_*(MH*qj3b$+}DBMgSx5FmiR-FD>_&kHl%kg;W-fduu{q&B-X-)Nx6+dO~HgU>+
zy2nF-ruxUrpR$9&=jmaP)y1HEALDcgQmS{}#~&`v?{3~-{`W=i?1#U$ObaN&T_U*S
z1v?>`S*^Bb;B^9n9jKV?i9RBg>RJ<3F6dq(rL;-=dt^G+7_wtc)MVsbLoQx!KrU!=
zpN8uQ-9mU1<Y5-QRo8;rk4FxEYZrFPiHmMm{bW((rxO=llhsy*)en=kpvX$X!Z>@0
zB4?c}I``*4CGm2XSIjd{E=7MKr4|dr)<^|rYbc&V$+&*<_b3`#sbFXd^A++Fg3(Dk
z3N<asQ7FL@`oLtW;D}#gUP3Wi`JO`Yd)njD-V7&Y4?*_ag=e-a>>^WDe{Q+~NzHL_
z3eTuO<Ij|@63eNM)bu>BD!d==8b)ofRJu|#8r5ncHz0||<Pnue<HOQ4Mv%SGgO6D`
zhq#0k4piHXL823@C+NlMGjv0Cgvxa`((;pFYkZv}TM=kg@!g2aOJTCw5tY@hxI`@+
z(BOVtIm7aUs?Ht5;a*{Q8DOn>-WShJ{quo%Zt0&7#dC3YL@R#ylpWTL=>LKAe_#6F
ziT=-Etvt$cFHL^3ksLN*H;6r;(VrMyiF@agqBNU<lG7X9FBg;EBIGIRgq1*eZe4zN
z-DZI=4QI9fyV?E`v1N=@ff9i~_dRG}nQ;0L$)%~xRLU$Wvy`$Xl{J+zo62mZtVLxl
zrL0Y5ZKbS3WgVrgOJ!Z9?1airlro3mtahZ&3e}zrSfSdLAuCk-;<7@uGb2_=y-AG?
zaukdV&-6CJY|xkr8n|i%+%)Rrl`;5t0{^=3uLJ*J)gHCrpAG+-@XvyOp#P2<)pQ!}
zERicYf~dST+Q?g*#joJGu(z0y-^tszy*KY~K3u=<-QD!AFV5ay+};6BcmI^0T;KFA
z{(7#PD0+V7dy71VWtoZ<eq}`pyRssMTUn99tgJ}kRaT_1Dl1Yrl@%$B%8C>|Wkm{`
zvLcmBS((bDtW4!mR;IEjD^od?l_eOouqMVuA?Mx62vwMgOTUNodqBS_Fx-UhxlT^#
zcNcz(w>nhPrr$04ZPV{2{kG`0NxvI<Z7(M*L#D!k$LV<rpQBkR`|3gk!V=xE6eq0b
z%JY8Wc|Y--0xB=a?F*Awj^xfs^D`Kg)d|tq{2S>VTCMldJNYQBn5G_Mf@RENpkf^P
zqDo4Ag|$(M-zlw;>)d}gtF{R@+Jk>=KM|P@)Dlj?ICof)oi1voi!7-~foP`dHc~|=
zsgiC6yiTfSJ5`eIgq;B`-4QE|ZmLAWe<$sqdr+qadxPSw8}#B@nZ}p8!S24Y1uq@t
zBYJ429$N83EA_A$KWwHR+VMj>^{^E`Y^5Hy<A?3k!%qCLlX}>VA9hm@PvVCsX(C7@
zGBq@kiIk?2DUr!c)5(+wWu_@*N~AK=v@#`PnT%Q)dR(5@cf?{fvz>DNW`<2icj<cl
zT&mH}ylX9L3^H%^iyFhsJNcr9o2p?aYK&4fT8SFtRE>6`#w1mvlc?dPYIGAdeoECi
zN!0i`O|c|Vr|Bw5s?O3?l4PByt0d{_r>i6htBFa#C@Zr`zt`x*>kP9uPJO=Ea0g9^
zBZhg^VKXySs=K4S>bM6|s*fjm)yJS7D%HK8@~V3^<0qy1&(pl>Ki7<@Qhhehs}5p)
zu2lCIdDUTYSX7gQ?~ao7CteQWbE@kIpHp2&_?+rG!sk@i5k9B7j_^6vb%f8Ut|L4$
zaZ^83P1qvhjHoeAR~n)h?T)^wycgXOGIn;0G)roX`_nJxw3zv#_Y*REc8fG~YHUWN
zU(9JS^TpX5CqQ<KG@EK{mZV?IX)*C)ay+{Iyn*bFYHa4D2XZjV8%X_&G#)uE(yXho
z*_3`Ur^U<{X*_aTq?uS_Gc5gLPK%i@(s<;wNVBuXW?lNloE8%=CdPx!5mVo6+ph3g
zTN9HhHK0^(HTBJJ0)W(F9Gr~-pHnr#>Usb<7~}$ox?s2RX<&D1LN4e5v^)7U$N~^`
zAqN949%^FN>H*|nkP9H{LiZ$}2Hk#5%tbu_r=L%QEC5j#axmaivL<GV9zYHTxd5Ur
z42JnM7>sH{tmpx_qkI}<0f@Sgg8`Q=H8JV+0CF(U0AOAX*Ee?;r|8lkT0EWrMA%jb
z@SBB`88{&*Frm*=0x#TPwpzNZQeCD*4`x}60k3DRO{>dKz9hj@lVw27XsoS<N>%`7
zZVjwyrhr++fLYC&EL<`$Yi<opgPC0nnBA($G9v@C*4Dr@n6--mYj<k0Fv`HJvo$ac
zX5C`Ix+gVR#$;f2vNbRbW==6+PQNA#uMEukTLaTzHYf&cFs#WkDFd_N*1$BFxy69F
zqna%2GB6u$4U90;Mad%r5=ytsz<4CCGQ%&D{K*WQNCGAO)Q+Fp>8GvuX)FD-9Y1ZS
zpLXJ>o%GXg{Ir{XdJ;c9(NIGomZNzrQLC?cERk!Vc`VUusCg_A%+)-WC^p*B2-r>7
z05#@iZw`c14b0qq1fF1sAZ@sZWJNPo(bQM8QWY(I#b&BvQ(w_eRkZaLTd9gIeZ_XF
zVq0Iald9O^r>Gi2`>SO{X><?p0|Q_z$^O`Q4u4)4-RH)$XI_;%T}Cy2Vl{peHGUE`
z3<B!?`1hYM(gh&&(~p0*4m}!5!}z5%lxJN9&NEYj^vi)h7)Qzn4S_X&%nHY$t*#CA
zVP#ggbk&=g)tkENc4l>3SG|>4y``((&aB?nRqte0@BCQZQ3Tk9M1cJ}5&_xM%g&!(
z_QvUDZ=7Csf$5b6<(aL(QKnZ(NL>|@wLK)$Qb9HUICK7&g97&evMvFDCZ=2h=$5zx
z(B1h8K#>renwtRKZ7u?IHTM9zd%OnF{c;Q7^WFmJ7QY10-4rZTWryNd0J`c8fNrr1
z0Nre7mnV1r?<PEox_fv1-;3XWS9SaE7W0?SBP{p$2Z5L4Dv#oDA0GeRgv0-y@Lzp~
z4wMags$(N!WbW>-#W%)u=~uVE0I)NDtNkM=+KVp*?7#!N-VR6$7d|WZ<ig+0^x*H7
zao|Ve&}Ikzrlp8A<{0&RbmLF;ojb_y#q+(J>G<9)>i6EwcgB2Lw|5xj*zG-Qd}TgR
zo!^xw%2+E;lo3~+DC4d?QAS^RqVRz7MBxPGiOLU!6azkYKf60Qu4YnPJ#~U-hStl6
z7kj7cG!iE*y%p!}Zocn!H{W%;`!s&(UBxZ^SM1il9Ma=%W&3+~w{iC_#^-zTp#BZ{
zPG``skmj<%kGjy}_%G&7U2>Zw-@MSy-a=EA2X+8h0~fcpDp=4{ol#a0BK}f!JIpDH
z*wbJygMQK;c=IRgK@9pydyp^xcj`e5x_A$^v#58|XKKP$nCn+hHFEvxsYb3}J=Mtd
ztEU>de)Uu%*RP&x<oeZ9jV!-<nIMg3UpRi|`H!=nH=4t&%CZwP>tdrh%Bn28Ftffi
znv<-`vI8^2+3xg(&uO0PHN)BN46`byake|7tjcMe?am~tGRIjVwApW%gN8X2UWzG?
zR~m{Mr?T2(tJ$}j1FJc-nwkdlUv?Yjs9}y9<|M0SHdHk}3hK*Nb7VEgR&$coa?Z<+
z-Rj$|f!!Jkuf$ZJX((%al+>5ooqoGBXm^I~PDY=zUv}-*$Zn18)+DQCHq<pfit5Yl
z&ZylPw>y)pmQyb)1Kn>78pDR$7&XR?Nkh{-I`K*DP*a9TC4$*E2j<Xp&5=1aC#D9S
z62Pp!HL!-3YmKb2HL)@<lX$JBjK2!3*>4V-!=~FDHOI|KGXt}N!0f&~u!pv5kL<BM
zu`@7}*r}!ru?no!Zw*?*mfIS&#;r*!1G9p_+Wq#RJ#4$}QG48;v@<Z1c(tbRj|!~Q
z?+iM_j@ub^#+^wg1G79}nGQt>uFLi<N^n%Rn^A&WvOSIxoRIB&lo;h~KcvJ6X1gRM
zMkm`lDM9(&w2PeM`ou<to!ZM=8P)1r;OstGf720uvi_!%{AB$?2l|O_@^k&}(;n+v
zdpg$N#%rcq>Hxi4>ioQ0>iE2yJUu@?HeP!n9rvmUF9(=9H1y_}AanVr(C7#L&o1Wf
zgk7g^%$)h7f%^j><193eq55p$P2C{Gbf(IYd?k4#e|vjD=`AZ?ah|~Nr|WLJ*S3}N
z^P9KtdT(!kIIr?E{zleGe%qGM@$C}l`1Y@Lj&E;sj&Ezu@$EgH<J(`(@jvf5zFqtr
z-!68JZ<jvDx7BleyVyCtz3CkP>5lR3gmuB*|1tjHei?!`)eBKy=bb2Xgm3Fk4eaet
z4Q%DqKu3UtdEJi9>%AWI9s05ag-7Vd(7zdV4>d+UKykgkwqBp=C697o<xUqm+_baZ
z005fR^qEnACzyQ!bHY|<K=B#CWHFL<#nyQB$}rm;{?2$R(3p0xcWj&1KCz9UG3{jU
z*xKEFVjDqY+L459A<4!B+l2NnHR>|^Yx2L1t<Ov1sUDj<zfARgO!dRx)l@(HT}}1F
z-_=w<{H;xOX>Vp1bCSwKTIek>lPiaro^pKh@59;8PB{DhzWT78zn`ja#<DcfNK#1w
zKGb>I#obu?RZNQ>JfS1tuaCIEBjq5tk#G3eCU+`_+GNuC{F?31VQ=HmQAmjVru{JN
zNkPWZ5kb;9^4TcmcaggsRJGn1xv%imRO_j+@x`W9)p~Jkd=b`|9qhq&+8)g8S$C<U
z9~LnwWxlUzEcN2t0O6Pg;8+?s%>*1PdET%kKFtDfa`4GI!bo%xw(WO~8uY%Jd6tnt
zBJT|=vkeR0XjU^%H#BXCH>6A^zr<hA7|1y3-P!qnUEKBFp51-l`~K|e4NDdE{lUL|
z`T3Q8TojQ<?0384c2Dd<dob*dj!AYNhr!_Z99HCGCm2kQakP%_9vAL8NDgm4u1^ZJ
zfhoh<woQNGC-K*`+Ktw~naxJ4*|1I1w*IYQnVq)zZ${&D5MTwuFffdr;UqivJNY3y
zy%!oIZ%VfMg%d0dpPOxKu>4KE@nren;&0>p*)t<LJD7U?zzH7lumz3zQ)ZWTrehb%
z@xTP1ocoK%fZnZ~R}HISwvBJyx$iBF+a>5<ZeYCV)AxGFzSqaTKmL#L%<-o6fj@i2
z;Si3u{wf%_0CVUXZ~S0}_=c<gPwrp|YaAW*fHE)(5r6#h*8k*9rw+dVZq=W91LMjY
zxbx68?(2qCZ`6&;5%h2XI7Y9$yD}aeJhE6CLl0NY{%YwC5yXSHoUnjBNUNIhH-BXe
zoVnrmmyU;LNUq^5jmdJkI6Xd|v8L;OFg^yD$I#kwowDuCV^96NzrDNYouA#_UBc?h
zCE>rb1E!oB2XklUh9G`jHb}6(N=2m8&YdZi#wwIJ<q7GCgckmuzj#58X*EMR?TJrs
zFW$bp`x`F)AI|T3c+OQ(U$08XHnORMDRDh<62ySLzWCwt{G#{f!}a+_!??e20|)j2
zm8!utS3;Bw&Fnin5Bm4ic^uw-ATr?vh^myPu%cuotZMTVHX99oM5?Cjl5>ATqx^=0
zR8Mx+i>uKxh)1dFp@kTV(Q1yTS>ouVz{tEhhz|A6eM(+>y_(&z*Qx%i)6=uz5T6Mp
z_-rt6!w@eKIe@h|{@ctLz(v0Ywg5DLU8ax~X*L8br)dF3$$xbx=1Gs<XJj2^E5=}N
z$vnbsb_=#)ucjGBIsN<WjFWN<)-s8fG4{e2x}n)0XB<2M0d<}V$O+UXP(U^Q3JTL{
zoCn+?DzCAZz-=%%_ubLxJy0EEP+P3{ZoE{A&G;ZZWtk|%jkd9%Atff@NZKWG*;_2>
zB<CW=MwaYgZn5N(i3&=d_?h#w%fy#m3Ftx@{C>nNl=RuEKdM%z4OGukAGV;nsi(TN
z1J!vl(0Cr%HABe>$_gVVeo*ml(Judpir>HqhNKYkJm72rVmhKXU#ymSi9IQ6j3bCy
zD5YmXitmHL&GHPMmF$ZVHxAkpKYEps{Y!CWkYr(Lrh5u*D^G$+f&{v(Daj;Ny8C-h
zL00)S1md2E)m7Q08*xi_lzFdaPrUMk0;)6>RF3Itu3aYXcIU9m@6FBe_sY7q%DQ&M
zy7uUm))}gV*Mj0BfG18{`WlE}2np*xS0LGKq3z(y=lLSM!qc=O&f!JChNo#ooS%yj
z5P7Y{Q5~X?4oVn@-?=ohRcT~v&J4wqh_NM?ce5=;cCdfd$POjF9|{LnkPkFaLqxCG
zw6RV5kWft@f#^(nzZd)eYvu=jeCsZevBSOC{~OJwWhLzYmf2~3vH$-8?f;GLk<~C8
z#&?c8HNIQ9^SRr1Rzu^(7z*m+sNg@mdEhzqQSeF&rG12;#8JY3Is?CNy!Z+K)cJ9O
z`G-A2I5<8u&Zbjp3Pua$4Imswy9SX}fCdy{_dyk0xC76b;=00~y9dUhfm7OBLdRDD
zuQGz};d>zr{Cs!}quPI@c2=Q_sR#~%98eeN1Y~vV^}2C*oIKQceeve(!_{5y_Tmmj
z+->SiLsJ2|bZES$N+Gp3bJpI>nHp2yU$A=|(55>lRTui}7^`^z+Aiw4Ky<7X;w6$p
z<Jw=MG7LmSFzoJ&xQ61c7a-O{Mg!t+P>ubEIS!Nux-mfd8kXX|8{o$`S2yo&zpfEf
z;IX0avJoBl0YF)xQGoMH{0>kvz@>N&aD(KtIa3YrDFQuC(Cx$Z`<tsPTokS@-(KD&
zh?aP2n2koXFeQn%oB)Coe>#MrhnWkCzhitI0y(E&<FMklsS}LduYt_KoNGoonjP`0
z@CD}2eENuEPhAb&DNPb&y8unA_-9CWkU%21c!h)dUf1>R{OtVui#P8tFRou-{jK-z
z{mpAM0N;K8{^Iugo2%D}-fpOEbTj)d80Wzo!c5~6vX8zgA?pZZe*^mM%*QLdFC=<_
zqzV1$y&DicP^R6JIu;t0se2Fd$^u9*uh)0)-~K?=;nVF8)jHmE5_hDKa9u44?mk>!
z^zPnYo?U<U;p*&t@7>w;#Z~Vwm#^<qr+(QrrLL#?E-;9BE^x;ne^5F&_aF>u`izJ(
zQE8K=KVyOO6|nYv31bcnKZAu4Wk(lT*=*O18+0&ZQz``df&?0&fYo)Cc7<Y|Uw?S}
z?Zx}v&70o)i|;OP?=IfoCi=Uny42~UHl-0JUwk^AO#i#SxVeU}INjF;b-&H}pKV!o
zQvWl}Fa95YO#hEvRX+gmU$`-K=CA|tL*p&1WOxA<P=7IVf}j2HKSqHI;(CC3(;wP>
z1Z`|r7k};HtW0THiHF6VMqtfHC}>w{T5B|5E$o#C@UH)}_t%^E6|@52QHcxd>S}D5
zO{-Ni22)r~X*~!@I|N<BxStNqkG)mfuG7h&WEVWYf|U(}oyKhhbSL*8Mq|zAPXiOi
zbfwR_ax`{GTXre_B(v!7(=^sj4|TWsPuPjx>$AJFzyG*vi@b+TtD-_Sl*pbcK@p9G
zl_coy2&WLsz>CbrN6`;A;KT*OfW@pS)?g&XrPG;^pa%BfmyAlFbHl{(BzQWz(y4J@
zaX6I7<Z9ffELqyS>fv&DdOBc>(hG5_ZDieAKS<wN2c)8F>**(ZyN7ON-a}7aL^snH
z(arQlbW^zio+yg$rkhW4H(j}iuC+SmUJ(bv%6G(dLzucNZWX&L9*tS?%i{Qod<O%u
z<7}$I>>GpK;5dkm=J~V^+7!PfAbUSe!4XWs3Y%ev)o=K%j(D2JbK)MF=mS0J4FFI>
zC}r+uQy1dP)KU+kg47*LD(g#kw_|+dd)XM9u6|$Z*Y5!#<CiidUG?{cPYh}YeC9MK
zG<fLSWYyJX%Ql(1FV7p4uiC6wh`oF1LK3Yi#0=Qxb1lm#)uAd0{Cn3A*pk$*B^T~$
zX@-ig?3Nioxo;!m-mDq-mPm*Boy#l;feYFO-%I+aMg7xo%wFKie=X%Nynm0UTgp_q
zAFdWX&^`FKG02&z{}2%llN3Pf=}HsOD3w~(=PG{Q%o(Tg^Jem2OZkiVS*7ta3*l!*
z_<`_Wdg!AP5z%3qWj~@14BAHq2`Jy^?2~+z`X|3?sjphfs~Cx7Me7OFi)wFRKs`U-
zTU8BX@^)??oCWpjnDBF%RVhTH&AnBZ8oqDa0#WfUE}m$U;S+_?2M=+ylLzEmOr(3O
zyr`YyQoZfuC_0&;l{{J!bLbzDbY#6M-YM9{eTGi?LGU>{wWeM@b*DCP{k}T|i~(EB
zr|wkW8|>bx61|eHjpwUi(H+{z3i$1Z@7S62#p~YH&Dm?saxPy=2jH?3;9-^Dw=T7|
z%U8s=!S};;0gP7#P%eoxEH@>w*4$I3SNTO{qtxYYL%(CI=t>U_BXw_=I5}_pZ|`p2
zUkFcx3a1uAp?kacMMhb<i~dCD#=|coyvo8j-vO2z8KjZs7s54Y){Nq#pL0W;ZggNI
zKKb@KKy1`SBx`(nAEI1{v>R#&SNMv04IGq$0FNCT8(u}dAmdfDaI0qWRjY8TsaKnN
z?HrzENI%V}sdw2ojhJ?<GPzDSfbykv@_tGIBrQ~;0^>ujPU|0W9aX;FUJkaE4ZB$_
zkxD9XNA7Yk$(Xj8{7#b>gkuoT)3f()d+**~yt%kL|32QwQ=K9*PuGYV^etoMW}~Eg
zU3I=fuPCJXeYi9~s>TL#Kv#B-=yp|dJqA774MH~)L)rM@z{UpJgq_QCSwuL!z_hxw
z`@<SIzml7v=k#piiD$~*hdo3X$*mQ>QX`-lHWl_-Ssi4sn65`tZ&74~bb3^@GhxPp
zgYDQv$uW_OgE9uNmvJa6u?}X)M#0P+11oz3=n*hJE2U<tg7Xqqu8=>B=@XV_JLr4f
z0371Me3)2_!qp7$g5MVX76u307Ls!`PCH}y;2UlGsP8Qm!odz7A;Jtn+5w={K3*mm
zuSz@f^aJxpHt+<<(gKR;n}Vc1Q9$t#V9XVFHiP-$(T>d5+i3jBV!#@=`P9g@;tAMs
zPlhDL?1HE_l2#x+@IqFg6mYYg2`4g6I953WPGrA{U)*TEQTDN^9bdwBW9@Fc(U@)+
z%b*7_@Qf^NxPgYaMK5mfjXZ&a@vv`3Bhc%lXvcVdKw;Qkut&9+)iY1L_!%9k8Nq5^
zkyR}eFB%n$T_3|+t?cN?8yOYW;LDe0m5M}MZURxqNWS;(nr*`?ysXCIFf~aI)2%;r
zp;Nz!J6(V}86hJnku`R8vxYIDUu{0%GZr&Utd{EBTGQc2P1%57v_&dJdr5>yiOtrv
z$7*fBG%BNjMY`}gLe%Dgbh}m@b)TNHtZFrGqwh0D-kzdJ%NmjOUz?(goK96_o!OD1
z|AH*aD41r{EEe5>&(OQy)=-F^GdHhq41cloW_XJ2vB2%<R1)c4*a+J?>xVS{Bnh2L
zwa&XGCp<(o4)@jfy-?_H<S~P)!)nRh9Or&8L!Cl7u;`;`>_<Bu0FXE+2s0EVBoFS^
z@qJ0Zsu2uf!pz;d=f^_Bep`uPXi;aEvqipwn2HLP))Z4=z_`koel=IA&2C9wBdZ~k
zH3U6N`<e{>6?#la&L**A5cz<@hP!R!H9~Q_@ZGzsi{9&-58uKc@?`q<!*>V!azEnB
z`HC*;3CEmiq}!mgO=GP%*@&NCVn?IcVTUgJF%TJUJ33IzZ)Y<qQFkhuCM1U=)qlqv
z3^&M5s8Pay2Wu<BVwmB-V-2@*-(eh5(tih#%JAR8Cp-A>G!*|G^NasZ<i7)i&Gg?9
zW0-Q_F^f6yM1xk`fd^la&vy_#ou!YXR`&P^xn4P(8b)U^3}Wf?qX^;EE*R$>AVoN@
ziXdGAYsuQb0qNLL$~Zn`w{RZVju-Z@6X%{T^{he{c+6qkU?9Swaau7Bk0Wg;MW$~G
zpE^ls#|}*r)=@!93!<{LFA`Bza<dw4<BP<|7&$cUjF04IwLLx>-zbKMzD>n9iZ@%f
z$&_zYPJ@XPYFKM=DdflQ_(?s6MiT>FvcG2*i+CxmmS*_a4pwHQB;%wtEb0U$o2JAu
z2zD+~!=gSlHaRTGhNvVcraPu#6?QH{KwCN&;j87HiwcgnFi^?EUCKNqjyxpB-onF(
z7?$lNIHk&Y7%_n*dl;4GyrL6`c^KtSApN7Vek;@|#lz^yI>kJUK3k_052GjRR1s0P
z$4)68Mo-o$=3%sBr%I|^vHMG5rAy=gLhtUw|0UY;RJ6r9mlC3MaSs@w?(NG3W}g^m
zI>BThy@LxtjCS^rZ*&6SCX8G!fK0y}VnLl>4sX3?l;F#3&m3#0@yx-ctWnQ$uYn1*
zt!IvSwSZ?%LMH6XM@C0IJ>4?dvaFX3hp4!+h{O;YHyP=X$z@ilgIU3ys`p5SRMvQp
zi1$_Z5um@6^fqIzd_yDXDArSYv6S4^d6J~GeW$cjjX+azm&hWEB*%%07)8w}*$c@r
zLDvj-Oo&_J2mCvBthaFsvbl3*4neVz1L;y?rl5@vK?#lHLu&i*SToCx57`ms;c<Kz
z3Oe<h%fG(8z>ByZ$hsA?5X_NTA_D;-3A>31)}O_@7xyk|;r?nof#+KKO`OmGPVYZ1
zk&C@0%SO9|CNPAp6L_#=5w-^(TM?CwzQ39eo#3%vua`Q+0t6*NROX<SZ8npqSVX+F
zhM<p{vi+Q5iCu4Dqr}r5v?eK9{Gj!ZP0{TSTD27Y7i7`>9kdcY@j+|p{ZgTWR%|5e
zpcTh4{<QI+6;wo05G5Zkz*sp%m2^pVnyRsl=nhrMJSZKiCd{SUlhte!A@{~SK9neX
zc&uT+jD2UMc?b+DYuY&GX16kOPjr#M(uuCBcqdPEQzo}fC%UOBg->*ohQbXex`i83
zPjr(3c;a*0y+ue&gs%7!qvy93|K}}iVBcV#0gPl;C<@Sq_>V2S*-plPY}$=4@qhjR
z@qhjxfp?A%DaKpq&2bHft%SQ?P>e)l>W^904HorpBnMygXD(S6FmU33uBIMrSKj%v
zndb(70s@2>tqxG*bUYO(jA2)`ggz}?e_qE}M)2zxYGdW&15Lz5%BF|5lDo^B>sx&|
zAbp)}Qc1;u*I>DfK=#hAu5SK<@d@94xVpO}w;il;N$o{J3_sjnygrD7YP->HIu%KK
z^o`M5CtUm~j_>+;!h;I-SZh*4dIGcatF!kPcm#BN*Aq1>rcpD@dV}3f%dg5pk2mVA
z&6T}MW8J~Z@VfwH=rCPdF}mG)v(vWVzqZ{uX*G(zjF)VV)R*m(A}<fz#l#JJS2y2T
z7jKXfwr#bcVY|`oc2C+(+bjVEF*!$v`Fi`L*=(IOJFT*BThI@yZd&GvZQBj^$Sw(B
z(ZEiebUUr~iP>s2F3Lkdr~G>Jq-&m>03kXTN6oSjpc60@&8FQlPa2)(77&^>!>*g{
zb_<vQ*4~RFt1JW?d(oz!mfbqJI4V1ki&o8O)kQAGhRGoSh&3JCdUNIzxyHB+qaI^n
zYJxp?)~V;>P8Ntw9&{D|R@PO+VV}mvGh==Tva@1Ut0>Y8n8XBa>M&>sip@g~S7G1?
zJnVolivg?#qp9x)BL+)o;rIMDdx+kS^eyC98Aj`B1Os%>ao~b*3-C%!9~KV>7ABK{
ziet3QobYFYI_A##T=ozt#KS0J0~XXWV!B{=Ox`~0HZ&{2)L+L(i2NnqzsUP_q|%5@
zPt~vX)5aI9cuXU7pc=HLfAC2OyFM}=l@=1vak?@}sfesa@uPCT=`n@g!i=j$Z$B1(
z+uG^v=1y<hJH6dvWKO;QN&k8dR5RlE{>qyU4eNjBjK||Je6txb^DLS@(tL@Z5MZ2B
zvl&A6deQ2~o`(}aa~{>Onkp+U{0D2U4Isnb$mlS=CZ8ld(k6aSn@+ug^kx5--v6<2
z#9d&~5U~0CzjoWSn#uUDtxo65{_l?(|G#na*@M6u-@DE|p7!*eke&4)2Cxmgy}dBd
zLP&wj|G>dw<;?5a9ipZ}@$KQ~3HGkGg2^RsVaEwu&C6?+Q4}|#Z_i#QTga_h2KM=X
z7KbkCYT<7VkgmBHQ#NXP)8e;OO1@{d4S}}6UP;NyFMD&F_vm@*sojgW@$u7G%ZPLV
zFZtAxo?6mVDN=B(k%O8bdX~T0xjuV)QTuDv_@C&$POOW7Ui9z(jukYKZ2np;*h25k
z`<u5g`get!(yRCk0UrDp+b@WKm))*M3Yxx)9_u2=8z-2xZI<JLox5X_2h0KyC#_%t
zOr#O08B-TC%fZu-IUx6Xs8xbI?g{rRQZtuFAg_dhj6~bSgGX^b0uaSlS3kTZW7>D;
z=SBqt+thW!WmTF4ILE&IaCvoid0k02R5kwjPx*q^ch?`zfeuNwm;AV3X1=d#P-YLF
zClWFCdO(X}0HGrL%t3sWn#6^`IN_|<gTMDaEVZx#z?X{gHB1d0l)wL<zoYlrk3as;
z{A=U%)L`F?uY<1*4&c=IdNBB!-3nvZ)PV5)$2dcTA7AVeUH9hW0=UHzVZ*g95$~q>
zH7+|V3bQQ&`r_Kkg$~M#0Jzf|u=O)aK?1||=q~pWui4hz_j(E2*iu*+$`!AuiOBP1
zFmVSz)BJ%thtcgUyu&45*E$C!M=7}S$7;dFdXcz!_Ayv0m%BENm=HVoeu+`@vPR8I
zjhgq=qxMoiX5~=G8y$X4E*4BEYI<j&Hr?FbRqPr|f+^>~MA=4S&kuff0(2@e#)E-j
z*DbXCHA(E*b(`nlgvmY!O*34%2bqba`8vohgyP;<jtU6-|JbkM^fay{*TP{wtggw;
zBCi>XH-i9-P0g@MJ2itbQwlW|RV>pSo$T|UIARuWgxriJ>Iz?FXfP36i!}rFBj4{L
zjWKjJjajM2c+WB#%a*_To0_%~Xq|;kG17N4?P7N`Z7H^TVGKMO1CiNaYYe=ymqYDQ
zO0ko;MqE;qd^YLE)I176SNv8RDq9KtinYW${SAeAq1KFad!gK3@vg!8zRc<x_XSQ&
z5@W=!q~i%n%>X&n{6(?P4M0jQhnN`cr6grWvgs|hEhb5e{%8?^;&)DvbaGG9q*MAL
zxX8=viKpoa64^`lBuw_i)AgkIps3;VQyDL6MzTo0+8Gm<oVLnvpVY3GXxB@(>*?AJ
zrFKh9FeN^J*$zj?3Z*+NwYx=!som{5thq01CsGOALml3&a-Gt__H?kr2zH4?*2!D9
zNwl3(xTe8*TX^-y=*1q1+b~jeUQn8mq;|JpBx-j%MvA>g3wIa<q1HYW3pD^L(IMIe
zMy-1Dd%BymbQd)PDvO)RX!wg$UHYSu8yF5&@P@8z45*=>zVOf=`cNlu2i_v^2Y6|C
z0Ut08BW?K!?)Ux!IrS5KP;sG8pd*B2O7$tP;O*tx*PsrJDVt&C40h$9{~u$z`5MC`
zWRR4Wpaa;9Yn)xbR-a(cj3Z`i2m#L!Pl(-zWEM^Ucn@zaTt=-bCm__G$G`zc^SEE*
zmJ8@OOMVELE?0AxY_*(v?!(fbAH8=LlMrtcfND?BLk^ve+U}Y4Cu*}_6~rgIbA$j;
z9lH(MkmpQmxO-RI7u27Lo7V^$zK`7{yxe#3o)jmUJRWF#hThcKT`r{3O9e^jCjg=q
zlH(f9DZwx7IfMPin`1bKT8xH+s6q?W{V~6t{@~CCi(Qu%_HSgXkS7d<&q`a4)C^~8
zV$(RSR^gc~O)$;OK0b7k_f=SNA8)Gs!=RI~o)d;{u&j_@HSX}fW{NJ^z{kM7qAP|I
zxHNUb)nefX*k_;ya{bq`(veg(8X_^5agLFUj$t|*4}m`)BRz*Oah6^PEZif5%$tj|
zyASU#dT-vI87^Ldd6>X253O<7a%`eIpzvAvOzd%yt-`7QV1)i?`GB~FY!?k3oAv=6
z3Y~;mbw@L2RH=S-Ktnu*Zjg<xzZ#!n5&q!eLwI_8Je}Up=;#3+hR5N=f9S!d`e5w+
z*&DvJI+occX;q1>X^CF2J#!@)p$SV>5m%KXKb<t&bwUrw;DxF|@rO6>?rp=XyLCn!
zwz<ZB(ZR~mY_wRtoVzer??;P!TY*EjA&e0vixo_L1ey^8=VL}()1Zw!!sCgCfh%ih
zRU@13%pH0tH$)`@|Mu!kTs~s32^@)9E2<e%)yp|vLG@_1;)Dj?f|l5fHk`p=6*vQ!
zafmSRBUQi)OYAv!h;ilvp7@xqHT@TE$c)OhyIzHcT|MO(of?>r@y*m<9EZkpga{=T
zD}k}BmsqxwmqM}@uXNEsUPyuOxKX1fmQuEOeJ)J2!NSt4$U|4evlT^h-~x(3_aAVu
zVc!4n_QTb)AI@ICHiS)^PD(@c3yni{9L)xcRgH^kQI85=4p4-wkDM+EQ?;y9H*P%~
zWr}VLLk@%QsfmEa>U%W9)BBfBFuU&@0Vyc`FY!8vmPec&GvJbe6H*>vKF8epxXz9s
zF)6e&JY|o>)W@0ch0CxW&Hsf5yoWj0NhBN%-TPy!VK$Fmy8{;!5+66(-EN~@pDbrn
zv3RJGMQNazY&lW#5#ZGriX9xz3}HI2kV)@r^en5RW##{T{Cn_Y4gWaoPyffS517sK
z?`TDT_VBe)396#Vfg&t{-5`(C3NNstzNjRN_8``_BIR(Z@^rk2MOmytckr16iuDuX
z6%Z&jU>FLMgj}_n<0VRx4Y=;sODMRhd6A*lri+vrO1%k{8Cqg;dh=bhd*O;$Ub~Om
z-A22|cDK{+XU=+;4h!GP@*R%b-KN9XuF_%P&?|rJp=y1zh?3l=wH@SHGBgp_N}B}f
zyb~qO64SBcvc`7=!X{tB#4;alv2TiJ<#rjjG7}z^39v$zn)yzan)%k28b5dfvTe%g
zwKuZG*l$5K!g4#Rt)GN!o3e)Ojcn_k=tgL6M>i#m&^}4GDL2_ZC#uwgEvQGBZb!XD
z$};6x%rFW^v6PJXrOz{D6p}{y!?GxBd&H5*gO;E}@z9vN5@MB2iLyxSK#DkkRf-t;
z#Kd@Mh~?#B8F-{4RE`5YVhMPXkDe*9dzA3e3~I?E9u@rzGExU(hp;U4!))}!eDp&V
z{Su+DBEObOJPngkh3rE4>Ed%`5#p+3RIb=hES`=jRJ`J-gh^CNMF>mNr6^gFA;5Mf
z%t}L<qJ+^SQ-Cy?LBH%c7av!QbLQ^8)9tD8xHjk5Q*+#HPOL}utweomL;Yr=zLG+Z
z$CfHVUoWCBAMJ?3dhwD&V^2q8ZlBfkC@e|guq1`Uk`xZhP<V@|YbpMf{8@A}y+Bko
zO~OAxzC<QLZA~*#)5@%AC2BS^Yc{Jz8BnE;hWj>(pK=rm3sWd8Orfwag~9}d%7tQ#
z)`z4Ln-gs@im_O#rAYWEb=4v*9Ynr=Wa(!CJJF_U@eFcN;NU-R!b<pBDKY!BTOdSA
z)J|r2w10dqt%z$1#iyin+)N(fETbZ#+sPmzqINbSVJ0HQ^;lg%KtYSbnI+pcPb>+a
z66szXo;gyJauRFjB%O>N>vUcv>0}ALSKD1L>Tay-YCG%YbT-WFY?$BKFt@W|y0axV
z{rxcCF$OAZ<rtF!?CAg(_ljYYPKh3=pP)JEPUCEoT5ikXG&!^5B1*0q0oFc-@6@0&
zXw+bouz1DShjP<VifoxCft+wN@{(nKyUSpX@y+3nMaqUrzpdv5Qgf2&_mj5}XO<y5
zKAqxCz4=&p37~Cx>bXNi0i<!rkEs6QMtA=)sb}=2OCjAIBk&(6dgS2a%HtwD3hE_b
zVbpiTA1MrYe3Wx?)>!OL>qnO76#JL1W{PUfAB9Gi0EXb}_s*0-L@4~5sBdM|H<kM4
zkCDg4Yj*iCIw9s{Vy+cPH%=!hlWF8ni1<a=L|@Dwm2{<YZA-3gC2AMRNJ_)KnG>TW
zN|}n<CF&qcU}jImFnc0~+KCwEPsDKJL=3l@h+c9crU)QUM1Cc%=rr_xKhrQQHVwl<
z(=aSK4a29MhGD5`7?zoa;pS-wh~JRA9r77JWfsA!$SGvDnwD8KdmG#}^A}E*9yrcp
z!sqQj9rK@#^-uD7;1l+rPV=9R{m*R|St(BnwuyHYFh>f4I%Mx1y~1V>u^H=-wImYJ
zik!L$fgmS6p{nFs<iXA$d~g;E<Zg8FFgQR4Tmu%r*W|pol+IQeJ#~R+FEolMpsHBy
zZSZl;6RrCgszPFT=b;LpW~jpCP=y<YDohL&%`R_VZg%CnOXSZkczJuZ6kev~4*u$x
zKlyluf$`v14%<?dV#{HYZS$Qs^4K<u28CEr?qrxxgHWXbUHjB>giHlw2xnc6A)Fod
zDe&?R4B@O<Z=JQ4Arg<_FEyCQ5Ljh9hH$o45wmDej3J~>hWRuIH`5?Y&_E=+eMdGI
zios#@(Mblwr+Jjv6fcn){1&N0Ua%g&Jx`v9Mzh{91`czq@#X_R2wVyP&aaljYVQUC
zC?(_?9CAn6rrja=O^rv+c#)G|t_z-NHac-!3JG(b<h*wGxNvLd{%i&<V040bp#LW)
z4FSFd75?!*<X~4M6+6C>$imKwcmj3?i33y<v48clQH{%^Ozbd}9PbJT%Rtq58ind}
z_R=r^bMOm%JdiuzsEJ8gi*mVFl^W*88rH@d&5bo8JUm__%)}$JhPAOqQ(NQU-&Yjy
zM5?({B4y~MoyeoCnj)67g}V7p2EeYR0$bCxO+m48pjas=O}-~cww9Z18F3XmkVKkg
zL_j>!M&r)U7_EUUKg$Dc6~x)h#@W;YZWhJa%)yy~-Ypq7aA=Z;<XyDFL1A$YO2Dvk
zU}W%5KKq37P=wj+6P9J4P#KCahke4b?30bNnT>N1_Q}E7%)vQgpA8wwqf0ojn6_|v
zrG%C@x3K*B)4>FzZ(&jyXKn!d8Oy*MkKG`?=EGcBDY^u@!ZP@E7Q<`~>&OhMMo#{;
z4e9k@3o>;n6wR>5Ly`4*(B(D@j!yZ@QIclVB;Q;L#xwC3E_3ehvqYRrZ{}t;Fu@HF
z)@nD*Ljz{b*salbR0`-mL*Ip^aP1BWf8moq_QGX-44AL_br_T5M`z|dI>*+DWy3!s
zTmeBsg3Ry6H2Hcu^v2ktLw8bOC$?G)9dsF{`*WlV{{$3^$;_UI^<?-eT~2tTlGV!>
z*gYg<RUZsX3#}oC?b<Fu{kM2UsnI!=1AolCkW{mwJD57chngFXM*bA013pDNS{zZF
zbKu$Q|3av*5qGLIaYI+C1}rI5tI%-gm`aHcfj8onrNAdJ1{C#aiWfT`qc}%su?uEB
zyckuV)Y#oIe-+4Gzza+BKP^eibSa+Qd^Gh2O9D3^(%gW4;6<ds^8;^*_Yw^s#sgiy
z(0t`jR}@YvQU#3{7~uiq${BaB4lvSJn;SK?i68HTg3lG|O8iFEaFpGI3Ype<EohOB
zG6h<CC4wzS(%{fU_K)YOr)Koj96sM1G%3pSRy_U=<{Z(+-?4ZW^?igwvh<;leuel(
zjqLbFg#(gUO742T1f&C!Sj7U8Sf#VoTf<alMmaBgPDB!(N3ev?D7GcV)kwFcgw^1*
zh@xKf0_TB8*r+#ZX1zh-HRKI9z}JKe0X!8Ln$RIoB}s%WLx)(yWR4*f!IX!#HPpu2
zz<jf%JhUw(jr%qYv$z*qHLQsdVGXtMClGu|;HjVv)^O{f4%RT4W~wZeqQv3lsTAM+
z-lky|ML}*j(!>&1n3ETi>5J2cb&qaWf&{^Dt0mp>i|$C(OHeR*doK(BbdlzMEzZwv
z?$_=U6Lu>yfk`U1)dpN5%kWlwMb`<p;;YyKxmCk-txs<Fw{^YF8p`+63imMQ)=trM
z^O@PcOQrg@hSEi`eWJf9vHbqfUq1={n{vqS4}ay>)IPhgDP#Qpx**+rN~%qVWv^as
zsklj3j*6j>d+lj^B;qSOX*3M!&W_R^T_cLxJScUwMda6x8%cUlTf-8UYygYxZ?>dx
zDT+$Q#g@&;WZreK;{5Rpo0^fw9s5oZU+OlAsGoJ$q3mrB)!9DXebMo((J6TufumpG
zGD1|-EY9V^S6%RyI(DDgQJwGl6!&ng$d+{&l>iRWsgzvNxkRU0$<CwdB$Jss)shG#
zorzP8GcV<M`;-+KhMUi_Opw(?_+Mb`T6Fk&(&!W&9@#Z=)9_Rhq%ocuN%B>>&we5G
ze;XX6)Ba>?S}j6?g-Y)N6rag7Rw?IYZna}Bljz&`Tk?dI93lB)p?X5{BPYob(#ot>
z+EX!Q5Q%9-vaOK&;MUWm3=j4;m=yBn!r>n0LJ2(b=fmN~`B2P%EP-AA202@xlUtM6
zq#7|Y1)Y`>SUBsgv52?I2`s+&%KWi>Wvrp_VrkoY9~HI6p503<C8@iiTP#$uT*kX3
z29glkg@EY20}!K(^L@6a<>hKO_LetKlQ!y0QuRM-{=eJX3;2f7MAt=g|836y*J?D|
zjb#46M$7(^|L+f&{}1DhmP!5hht0=_y7AS;Uwbg6QmOdVXT`JdDXvY-Z(Q<h;wl7R
z?gk^;5asNw2BQkBgTG`{it<OrwV5i>>&;B7OKRPzagVAk!C=LPanH``fu<iqo@Y!x
zqL+P9&VKl-l5CWMW4g`|^^phnPq2v^`<S%~l|kQ_qCLSGyZD+C=DVJt{0|rBcQ@}Z
z|NEi`H8;9sgv;US>43>R5yYUJgT^2$uN6LdQWp*8FCog$hLn*;$q}{cCyU;$rpY35
z?NE!Z>GWACI_rn5Rzohb5=C#(^CMB@tdm8z?(&L>6S?%;DQYCpX5{iwHW?03Tz37?
zjqTQMAcIk9KahcvDFl&IH%EhXd^{?X4-@0**PG3Hcsq3wR-F2MtzW;d7QbCjxJb?n
zd%y*t?SPBUzhN*+_^WWC!jrFKqjdJ^K_NCxSh&le=PtaaRcWldd~vRA`dF2X)M~fZ
zejvIfGP0NHSK75==Uz3mU1{vzs}kLk%pfILhdRxDDw4t(7Qbc|w!r3`yYL%c&l4a0
zdv}TlPpmK1nt`lWQ+BM{9}S#|3}-V0>F3@Z*rQ(f;C=cub<GN(K>3LpHKU1PcIYIM
zB3&2nTmmGH6T)jy)*~zGCy$?9w5vL(CjH;UC_lyEmFzeSIFFCL^Bm4+{2XrQVzNAa
zH&d2Rq%uS{w)ojZ>6*c&F(-Jbq#0~onxg=TKz6^ju+ODwzRS`C8;t#;6U)0Tuvrs2
zd&FkF6FRE1#eT*L0H!c1ff9Lpr!mrKT*ZAUVjPo!alZj|&0Q}~PXpJP_5ycQIV7NE
z38^?-Y=O1j{UU+)sxabZR5$4uKNs<DWPgnq03F0<rZu&y{ERC;#dPL#{}FBHU(cLJ
zyeUFSO;&Sn?qR|r(42?t?gH+a`5DpIu)PlEQ5qJW2bFmb)bF|UM-qYD^n~O$kqsO5
z#;978z1(P(co8ijZTk?Y$!I6iH>+K_x;Uf0w0U1{R||j>hWqNttC!H3HcHzCY(r-b
zC9@uS&Uo&JXntR;!b$d1*jD&>jp*y6zbGw#okblgtQi%V0@ZJ#aJX4W_zNjLMOm<X
z9m!Z!N%xxIXfYu4yR;&ia5pMA2{n0@=+(n*Tz(AwroW;q)w(7P3IzaWjp;<d(yzu*
zHp#i9;F_TLEj>{ul3q=`xpMs|<@7X&Bj{?duNlQzMdt&g-@CfB`Vk;+=8O}rOwcWQ
zgk4QN+Mv%RU^cR0#*eaQ(MC6^P4v=IDw7ooxKL>T5<v&|%P^!1y+q#0YBZX<NMgnn
z%pH!D?rKquxD;5EtHKFCuAR}fo(A1!L2p&nn@m6BGv@@&d*N$_2nj$Ut@dEz1cz1T
zb5=8YZ{PO5zr4Qd-4%27(zTdyGSjQ7ky${mj5O2A2-hU&O70O`sd=t7IUU-AhqoYy
zE~QgkEsckXZwwq<rs{Pm1Eb`sMd{T>-Wqce!g`E%orbP4cIVthU)E!q2g%^AIs!mz
zf46=_P9zRM(TU^FBPmzOLlO-Rs*fbPr$`&;48RIhTFonB0ndQD?dDMHyEjNg`BWt;
zs%s0{0Y-0&gZKD(*jo(A-cl{70qZECI;rKcn9v%CCo7qjR3owoO+^vM@p)OY$&P9o
z%w$)!jfRUz-Qu2KfEIWCBEy%8bo8Va)h>vCHi>&1jea^Zqp1+RdD6<`2J(XH&)rF0
z<+#Jd_)y#2_a#$yqCbqh%E?YKQaSgG0f6j`CiaMV`O;)*;QnzSImASYb=)srUK15)
zD(YWg!y3K78js$<)3jeQF1(_3xi@jAAok*sk!W2rD1~Vab6mdKd;qAs?S<r6Ye@8{
zDH~K-*@2Yq9#1?N6Rc~G)!Kl~bEANPxv*6uYLn=dZ(oksQyiq8o(c|Sw)%dR(_Kj4
z&t#_GECGs{8q-!xW*XeAwmgvyvzU=MT9|fWc4YG!ar_r%&trDR>F8hC|E)6nKJE0M
z)ly&5R=+>K|03g4Y|r>SE$M$_GG03V`y%6$yl(6CnH^-jy$@a=)QpJTw`BHAJTz^_
z>_Q%4r>81QO9u=9@lhDvwvZ<FJ}9gcC-$#E;Y>VsqcHOJqn5%O@RNd5l=YcFTgA=m
z8^d2Ly%{=yc`UU!3Ou@?U?)#^Fg3Pzq~lN0)}T_YbJ@rg0xkzbu}NVPqt6cqGtb$f
zhw*uUw4jGIy(r}dF(yqVs{qxign0jB-8ZYz-f)wz&0@M9O})kOp?nl+93E@fB4g8?
z5M=A5AR{z3#gtP#FU`Sl!z_XtB|I;&w!BykGh8o+zjNP95gV(N^Cb``!}$`Q?BIOa
zP@FH#KdkfR)~=R7-%M9aF_bA^OS71-Wi)oheJ$}7$=4FGQ-lB%qy#RNrJW>j!bP%d
zxP{LK;S&2=?gJOn7`Mhn<7>%q(6%S4N9J2*tt;1L$_<h?m~w+0W+>mo=$uaH7$*K{
ze82?BNV6Vp<?%>8h_!$+16bDMQDQbe#?ksaj0~sbs5sox;c<v-oHf)a{^h+MWiXM>
zpXc^he00T-%A=e!x_okO=q2u(%KG+g1iA(3Hj{7{e?BJrWPiTBh`;$R;!pAC+Xp)F
zY~BJJ#h-5<*sOO#NAc&|(^!!|Ujm~zh6wB(1}?1R4qI~#^unn{2dNHS`ttF%7oLWe
z=!xtGD0M`q4Me%W3;F@lQk00%q{dOHa(A#&w3j{fw!%2i2Qb2UwO^zqx2Fv_$L@ay
z?20BXDDn>&3&NRBAERJ$7}{6mh0r*yz`iNkk*BEiP07eo5v^liuVU2JJA2W)lCxJP
zW7FCA)|{Q~Bo^&XD%xV5hZW7)!fs+BzEKsyM5@<be8cvMdzN?DX7qP(b%{HW%|4&g
z*(D0&ta!U*2=c0A4G^9q+=QTY78zD_&K$b-E6j5Q9WhxTfl`xG=i`yu6*ybdM)A_f
zC5`Gn;FRx&<aD8HNaJ+DeI!b|N2s0aZQLRh>0lYR2W+OK+XEqTd`N8{9&2X#@geg+
zI6RKsA0qLJQWWEpvSnnBEEBCFgd|AV2-cs)yBGH^`cwO>@dTc0DTs08VIuS&m&okU
z2KVQhy8|~2oxp?TO2~8{TM?CwK6-9}JZDZ_#jW0eV9k&|7I&0j+rgyOn~1m85cE+~
zwx2^Zu`eyGE`Cl&iG6J3H*fZeZPDa?U+I&weeL=-?U%PY9ZPa?aSw=pZ7weA0r7db
zct0kK&t$YHY_`}RLw}LV`*Lk~LVlL)mH+Be+0v;NJ4j_~A52{CL|x0pnRsNLs3UXa
zov5QhWE-CaNlaM#ps-F(Sib^=Gx6As!rS;P5Pq@GLg`bF5<UyqRF=;Ij&%HKqtC)n
zby^r^J1q#gMNSJtaT8KnnVHu@bZZ9-3=Dw1HVZf;uv4#;LjoO>CL9vjm}?vo$b6W(
zJ+Y2m`FDwbI2fe@APNdTn9>5lE@>nK5^{n@Z&(}NFxeZiYsqr9kY9=XKH2H=#6u7d
zAc!BVsy1I@<Up@`ed_1GZnD<_hvb%jlMhcHV-oSvyLG4JJ@5kQ=y5jyjSmmzRm*&c
zt2FznjsXkTixJzYNog|2EvW>Ln~CmG?U*d1MC)2$uWe(5VO-aK@JC9`bk9GrBw
zOZqt%V4{o4w7hS+4_+?m15ME^gmF^V^tK8W-;`Opza7n{&K^lNd3uti@ZFhWdx}u1
zM$`Ke@pH%cpXd+vMg;vX76mjv{-<qs%w+se%W9fm;(z`D<9{|f#(%D+o(n?CpUpfs
z&{jJ6mkkmMn&BPh81Uw(8-RA{t`{}Zj;H=u%J)jxh+zZq>e6Z4fpM51@1KAG(484f
zS}|ZzxbktZnlGVG3(WUA3S1X{9Ybxbd<>wDlQByv$TO;ycb7NUx7yI9+A75(m}(*%
zyn)pwg4jE|y1MzxB`ia4KV02iQu1A_aY@bbcRpNyxV?CNAY8QZoOvucY$x5nduuru
z4%A<xCb=ecRv>YHb@u)OR+8)6yIvH~!Zd1ziGgW0S4D3{*eTRon=5;j#=3)*;WyWi
z^Fa6ByuW!1i}GE?o8R|l&V1QJ(@D+ve+YmXZFnk(Lzcc`=1iw9P+;!QjiEco+wm;O
z4T~5%U(Nb%fTLfy<(W6{A)MX<Io7}nFHyr~fULrvH;1m{Ph&lbus=RNGV7z`Mtv0Z
zWbT0K06YUj>jW_GW)!)9C3581FoR&fw{+&NzY1B;WJ}exb4~Tu#tY`;y1BcRfxDQv
zVej_;zQ6na4Gw0f0Si~V(>2X*yJdG;mR*DuI2b^@w`kOiPQB6Ww7bo=bz)r{S!E%>
zmgWeU&}^A@+iWz=#>J6Y7KVjvY_>b?lap@8-UdRGVPV-;!?YV+yS)VryJkEC7`A1f
zG%c&s?Q8*~#qemi+ucU1({8u6fYC-gKpMkO$Fw_J!RR0!R^2|qpC<gj1&l7@(X2P=
z$22>3c^Ds&JFL2CnJ2bwH{7GL4EoVRK5w5io2`>(hlJNbEGa$1oy{804`kWN17-AN
ztMY6yA5)ZzHQR-Py8p=yd@Ml|6s(-Mn2rM%1b<+-A6GE1F^5O6a`C#s8o$7VRhkJ2
zg4&AvF|!fG5yd*@6Tun_kUX-~0yI#n3|6T7j8w50E>}=Q0s{NX+Y5uKmvvEP4cl$2
z8Of@jU3bB<bWx)Bz@ar9I!U?~0bX;cJV+wPP?6{~a-tr)709dM;=&q}@muvddRT}R
zy&hjuBs$_P0y0%bqtC<C(5u)6zK!QN*pL}@2{E@$4|sNv3d&J~2}YwDMM41RM=uRi
z#5o}3*TB?knxqY0{tKBrjDPUQ>=6YyAj(9js$)G}N(4|Ov)VysRzi<TtgO%xUGN12
zmXyvFrJ%ggEA&X5VMXsDz2H3nT~(QXZ4rx+#dodZv2I3>EP5tCe5@Id(9ENSuRI!z
zPlv4e#B!nbm@NU+(xaLAT2uR)&0e!IUu$b$>r@j+J{sD3RuU`hp{8CdiN>R)t=Hx4
zM%AEJ6X}*wZb{`pbt-0L=Qu&3n`-7G1IM|B50^a2I7r?SZ(Zgl#uZVI^sQJaW_nDQ
zuu3Q$@xFz354(o|;|)%y7^Lv}>>6Zq^!4cS`b}(6iA97O%~KMaHC2wtayuiWl_Z}O
zI9!%8<t;l>*&g(GH}d299qRELVMFhF6A!fCZnq99p#}f7?aoQ75Muc9)Q9C>ec3)K
zvWE~${?*NQ*2NnnhHYDIXc=_5Zug|!w99B7h|0N{QXI;>ZRzxuG7wNynsrcTTkR9G
z)c~E=E{Owb)@HqV(ltSaYc)U{ZI*=qz38I;X1C0fMyI(21Q294Xy)w}utE34F1J4*
z2zHI>!|>CxTPNjoNP+-5b*s*7S|+IX971A^tr?^BbFF3w@@p`4K%TB;6%4b>iM5yU
zeKr0v^&>q4zEX@<_zPN<;jm9*P$K4sAW17$waV0l(K=cI)Wt*aw5%v)yHwacQvgG8
z2m3}QAxCQ#RhB#`BrOXxB~<&EqBUAai%0xeRivJ?Zh=<I)!iD}r@|%bgoc@`@r+4=
zb6OK1={c9nu|Q@E(<oY*=sm9fh^w5+IyX|u#wM|<!+grCYh|v4a@i*~vY%8vNzsMy
zkqUd!NH6gz1Qt3)AYIEURI6F2mR+b;D_!doR7~j-vVwhVP;yMe`rr9xev=;KL3O6m
zFK-T}(dLC!w3_l6AfKX4ZBlmz@|AtIHNd0Abu}Z4^Nx!?W_NXSQoAM>eW))M{d^f`
z4aKUzNg6^;3u;=0YDVfowEzOqdf#cB)a_=c0Q_i`XRox)=1H^BEYjG<6&@C-*2zf;
zQ1LQrware)1WY=1*KD*-8r@D8*6afPNvyZcdc!(_CBM<>04&=)fetoc`L{}ff`#{_
zev<kz0hC#?haVe+R>M5$wi+$7*#))AZUNma_o!VCEwsj1^_FGZT_AVUK52JtvjM#X
zP|cE{Fw{@o>R9bo%kEl@t_dhv-A=pBdRPWjv&OAmiJud<QIb}X(r4PO4(L3srfIdx
zVRKHlNZabR8*S)UvvFdZ9jj@X5xM!z2HA(vBROX_tY$9~aB)y3EC-<UyTQ`A%kp9u
z<;5CG$_xKO%FEWe3qnauRo>9Hy2~(McNrGYU4~_K7i*}|U52^3%TVYp(ADj9mzZ)X
z-39RZlvlTu?h>ywR)!XXs}{YeHpA4Ig*4azl*|i>dJOdf6)m~qYHF~G2~<96Q29ux
ze2iQ8sBeLrLkd*1idJk&#@L~Ek0=v?q5fhm8QG%gVAbFW=Zej`(Xf<m#AL>1?I>ME
ziB!aD;g!ML??1eGb9GTMYgVmMYgYAI)mpj&new|%YE^h7?A|-ml_RpZ<Kfoa4e>D9
zpAYbU6w6`_GQh&q)WDmE-XN?~@EJT!rl{3uQHRg&8mDXEJ_O!UN@~xB2mz0xQ)awB
zWxNaAfrlB|5rDfMxYMaSU!pXd`tUmZMcXgzk}achX$-v~8T0!-rHNnq2x95{OsBlQ
z;S2`;3f`u3$OQm)rp5&3{}eHSLVg7qPv*pN0QTI)-1NrL5qgXceU}}+bE=M=ppTJ<
z@Es=@uP}5>X!v*$4Uwwxdu9~zn~)(IfY8EAdU4NYUWhcTGNN7kON@^M1ZFqS!qs5H
zz9WHnwC?KL-n+{xFhsZBvE8F?&3Kr2P>&HB!Gx<|5qLB<7;jwU7-z0@^#eR$e;Pws
z7^QvxO&H2HTFS^xf3raih8=4DzamvB7UJe=cFnL$9kPdb@scf7QZUB?#wlhY<&yCA
zJF|AwsCT+8v(>f=na2|EGYgq*qJ_)u+Mt~_8LWbbd<k%PfZMEhK!HE8PvDQ;0>!HV
zT70(%FkrWVKcd~hky$?hL@cydnFXzG;&l^kpY<kadu`Z4G}{FYX5w{=TIZ&&wrLi%
zb@`z{9*=D8X47nSOtag7ok;iMsL^PZfnpL8%`V_?!f4nhO*%ntG@50gKv$UI4|W?Z
zP=rsqU8{po%Gm$j+`PM_?OCU3HBVZ$4LdxuJz^)WwsCBr{8G=aW-~mbnawQv4NO7o
z-uIR@<MDaw<hO>u>C8%0!xKENCX&!cVe9lmf4TyBN|HQ2?p(lw$~tyU5%p*Oa5eSG
z`-rR2Ov9u#18)>#JC)U90TSMyyIhBsFp%tFG5Tm-iRNz9XgF}DuF-!q?BjPH1N0ax
zb+Rc3i5b5}0U!_|^j#VQBEoC#5o70}njnsgEuG=!QRvb?y{!E7%&Z#U7!{CeMkRDz
zWi=*uwy>Ilrsy&j*JXr`Vo5Y$=h1N`l%SF2Nn@e31ZHEGQ~beMFtU%P&Ny`nu4}{I
ztWTXJn_2kN$GJZ<LUN(UH5tc=okA@sN(rp0=s+hb;&R_W=N<CkdFLJ5tg=I?$(Z(m
z^|2m4eX)M=68>5&AfNFq)G+#iGan$G#UYw-A*zeR=MB`fk|bwN2=d6tt_bY)YQo@&
z6z(P1H*IE946#UrURF%vXcQALc_iU@N+=CSyp8(e%o$^@FeeO8jju1(OW1R{LrGtv
zDT=<H<ay=7e*Ei$=!Cyg!9)B<<IsRrt{Q!`;*ZVeTnd1=k<Mr1bHsR+WcvX|;GBAv
zsMpM|*G!$VA`ZGjOIH}XbX+inoXu9cED&*~QBnD_D#;a;Ct|rtnbxJis?mlrX*QN2
zm|DZKTqAu3{wbp*=`Vo~Kj{v>ajW(yopNUwuoDzw5hNrMAz>maHL6_N$A<tGr&gp{
zXq*H-)s!k`b#omnaWqd=G-Ye4Eb>W+#XkutCa4vao_uhbm+^xydK>cbx74L6n!}2(
zf_hvp*HNmlsr*=bjO6udF^q_f(wf?oyaCqY6+@cwY;);MXMV_~y)$!hONSc>1o1Sx
z6JXCTyOTZcPQL6dzU(c2hkJ|1=UE#H{SIMEWnHM4X;uAqtfQ+?6kktCH!Z8m<FfN~
z+Wco!srRl;f6<M9UENqcW|4K?PqloVSbe^=Ml31kpII;dEvUtkU+!m6gNx|FIz@Ol
zO*qas`sNH)JiN%p;)q6f?$DMHehij%cHhuVRyQy(1ImC^=(58WY%jo9phd%3ECPS+
z&Dc2(8Y<905rS+-c(5W}yAMl${-@9Yu`%(7H9X+K%ZDMYSLTt0rZ2ks7mFKE*8K&B
zbQtmrjtGmL@sQaKk8zAk>DYvhTU_HAU9^G@DOVlX-LFuL=kzh^>I`o%I>UQszQofO
zas80`<gh~zg#ObC!*4EJcgRw#QUX<y1I&cyED<0`X1o{KcX@b<AyH%DEnN1EE>bcZ
z4l~IR$-<g3S_M>!;S~pXtb$n|u*0JfE;RS9fxDIY;IYo`gW}PRH}g;)19krI%K6!C
z0)0my;nCw6<7mK$uAIO%0^er?`-X_JbiAp7OFbH&q8pPB4-fUe`v`NkIjjR}$1r`N
zz2!0J6y9L!hR5Bf+B6{-mDx3+coOruhw71Z$sPgV-bWR5f)jdazrq74h4E~`F7xzs
z7j0^4O?FE|*E|a~iz&kcZ_t~<AT2{4%;5ACR;Fr&#Z+M@zURy^Cru#?jymw*8z_Z*
zQYN>I(%h@`*20<owT~eufp?~C?kN+`Tk^!d_2?LyE0-wWlYKCehCIX%+fhc#6b}}K
z>@;146D}B;2oZD+ktdOKbyYyca!(nNS~Qkx?qtt=IwXGkeC)QCV!#p;a5}G9m?KTH
zkUe??O1SA<0`<5JBiwXy{%PiHuzAw57%xRH`10!n<uBLHojH4TQ1I0XtWCXz51Xdm
z2;?g6^-7o$*lai|bWcLUas={X<`JgS4Ptp>&)sn^CD|ffHogc46OXGW(Y?D^-n0E5
zESerI8ElTha}Y71QHiG~EDH?O2<I?cVR63FYBg{$N;(Wlh->hJJDpY)5j1ssnk$Oa
zNOU6ByuXQ?Vkz}U231SRwWz~y@D#(NyUih?c!|iN#KG>1OGL-i0~ftS+_0Xct?rUt
zPk*CQ*3c6jjZqgcYYIKow~>al&;XN$ie4wJKrtXB9Mmit)lY(vxU<A>EA6x_kY+5H
zGLrwsn%`znCsEwmB}L3hj1($ZFoHN(EEonzqyZbF)U__Uu_ci$V?p?w!i>u@OfL5S
z*?Zgewryl#c)uS13bcD|Dmk((Qi_u}wtF1MX>!(aa@)>s_jG%7Q4(cwLx~D0+48n|
ze)}^s07!rYDN>T1G#j_EB@!=#nZaN%7|e`;>87(WfB<vv#zkfChbd(hpaWiXra=`9
z6glMOj}h08aW_Ii;b_Xj(thpr(aUM9YtAWrh!k_#wYfPEL9ddvNn+_P7pJhq>a+ry
z`m3O+ub~N{LkIXqYLyfR13T>1?Zgec0ft4nbjDZc_LaDX#bIFf4>wjtU(7`tlc_Tv
zE@rY!3||0_6iLKa2ixe7E?JwGh$NLfLv?gM)#H#DV(Q6Q{eM6D$At)owZUU&AQA=4
z{J{e~jGq#akw+Ly9%$Drh!|0vUlu>k>)F6uC1DfU1Qg)J6AoRX5MiXO&QE9Wk1^KQ
z>*Griv=MhwH$nITS40f14wCYx<73$0T`|JDLym-#APBjqAuc%zOYv4Na4&`SRoF(s
zHnuyX*mD^DhGI{N-#v_4GmUnv_MpA{q~CSg{nn#q|8dWH+I!l?D+ab}Jsmvmw)%rd
zyRB{o)S|Ct2VF-2+K%V6Y=SRJ0^LS^iDc<4U8QMd3Yi*|37ZU@NL*JXpqVko&b`I|
zQnNx<2#xjl(Ia#GFRR&ZwI8SAe_`N_Z}Gpr#`s@Pw*I5zd+7nS-W?tP^@Q(1jnTBS
z@5_hM(tA2SI(~KX^8EG5*^ASEbl#o6eewSIvUB<B{N&=*+tU}ReixKxPhurX@8#(H
z_!X}5N3h~|&fdRyesbP<`?7O>^3&^!%aikqR5y!?vzon$c|n6g4FlM2*6V7&*lnlt
z^374FlLF(_5lBVQJV51m4Li;A_go=)bMzuT(1Mc8lAS4i;`Wf|$Tj4%t+NLYSk1hY
zvfqDK7uh$^=ihnxPUokef9@PTe+`YDUqJ6{pIOajyN)dPcVszOY=4Mk%3B5{rJ)(8
zrwE^mOKxXmKib7zYvRj+c~kiogS_Yl0Z-HdTnqe-9gx30ddu@HUpSG`cVARQjt^#2
zoZBC8jd#N^2+2zN7ERZ}z)T+R0We=LbRC#m3WVZj!q&j)dRyYzL7UYQ2md;64#B(e
zBA8VG9x59`)!>P`#-O%byJOEq)wc`EatOLEHVi|@b3P6&T`?eIbKv{I4Zi1*qp=4b
zIeZqndx-*t)o}8#%Q&?TErc?yFm(gf)dWq7V#R_9DnBrm%Lg^Z=rrnFJ3bF#Q&@}Z
zVpNxZEZc$!NIhe8iSnX%_TU<`MN^Np>xzv$hzE0^wZ|g>QJb(^zSfciY$8dYQTTku
z;Qvc8S`0)k)BKwH+UJ~pm{<-<mG=%~AA?fm`-kynO)7%2mY8aW{^dQyY)>+Lv)X1}
zB4rQu^YN*1GTh%phGgMDeZT`_eWK*Y<r?u0*U(76xRF?}rZT2qHztjbjj$2b**}5W
zwGxTnStg(s#;>l`qWb>+?(ek_{<MFueS|;u@3jeKgV($Fin~wsc3s@I*0h=$f|@pu
zs$V#&zBa0loAyJC_YNO<{}b`drjLAtFj<T!wlUy+!6*U6j1t?_y7dYpO^}$^fkvTh
zpBX|sN)Yn+;Ubt+sERGmXGd>N8b5RRbh81O;{N<+EI>!~x~1snay`HvFRuqQ`v7AY
zdq5-0&t=DuSi;K=CWB9HbRPCyvU4xLPkz7Jj6jbM4_Dat!f>T!gJ)7-0tFdVByINy
z&!Va8^zlmZjr#+rCRYJ0+SnE><UZy(z_VtkMkP@)QOxO-wx{V}24SXWrxyX!VaXE;
zRY!YIYrURr9(_PCQN8H}WuZrhC6bG@%>9jW0C@j)(86-SEN-V3$?V^bFM19hN5?NH
z?|qwpe&s~3ph4a`6lpd#<ntHaHA^bNg9|_?JV>2n#jlTpsOa@+aHThQGM}W{$72%p
z!FiN^a_NkRMV&myL<Lx5@vU{nl72|<Q~P;|H`%o($FAp7!$~e4u0JdwzKlO#p7<Cc
zz({;}u_of%)rr3hweFu9;_D)!fZBeDEmfG2UfzuK);J@xh?4^|42kC%Nd^_KFQ6?+
z7`aK9gD&6`eeqP5cy;+efMxB1T4feDW|hqhfZbOC5N6*D0CqV5B+FL;pzWyuU@lX(
z0tUPUfQpMiL`0@z5Ok>G%K*<ClvQgV=PJhOW;jOk+Ib?{Q4I*H5#Zu)#|H-(o{qNq
z$)*=yu&0SN2c4iiA7kUj`)|yow1<vem?*XDx){oP3VbfTr(pNH+vbrHB2fQE{oOWb
zS`jX*(;p(%8@jy@>}KeaJC!FKwT8G68q-N+%SK<bhaeKrqt#8iG}Y%f^kNq?ckV^#
zxz=^3cz8AN<_+e1A6!hojGJ<YREQ?*SmWa)Rzsh#(4k}YK%@kzHS6{5a(PI|WkuKP
zlw%l~^gQJk;?l7ozG>*c%4c7<`x$LdJz;r}+i$<NL~p2xrIat;8*s`4oLD)%iI5mM
z0~MRxyG65AyBEY-60>)XR7=6CKqH^zFjMe!Oq`6N^^M`SXcDPPY;s0MFh@HwQ4fBJ
z^<rU?9--CF9Spplhi9-8Y_6}iMiSvwaDy7N3<iSsa|#1OUC{{H$oT;9T=jaWUrPXS
z+D#_6B1p&>WW)#!gy_ObcOCGU&u+HrT*c>2Q%*4lQ$=+BTQvWX_`tc3=I{xPu<LRc
z8B9Y(eGVLt`@Ds25ccsVMu^s<*gZ$p@VVD1H&~KM5W|`T{aMfL^F0RPO38--&30Wp
zJExt-gvw4umM@M19-KprCJ|OJcH!oFB*q{je~=i3#e`cVoiW7%QSs)m>uv~O(Ef|>
zN^;Io$<F(o+zEAXiRyIf<)<7C);#CfreIGkdAVwUk(`Bx$W*q`<p0rWa;&mdfChLe
zPXIWni9(t4vfdfe6d*)!t6?{+MzdZ`JScEv#e)%$tp5W5qKM>bFreU2aiI$i698Od
zq@|4l123SVqdZ-phXT)uRANSa)HM@}@+VdyMAEUz;l;@&uk4zMQ>kObzA|DjHDWJ2
zV$a+$$GPmg!a6wXu7+;PkGllAla7}}w@A!e8Qsc=mqT}QPpvw-$$!z!;+gx`q-<n)
zx4BZG0kx#Y#UHguUdlBRry(2~D-h5y6rx;qDE7ki%*jwvfG-BVUm1900uvp)82C7&
z->h4U4U1_|#qtAdfKwsWAip_2{+EFT5-xKp&7@3ooD9n}hp5Ry=NXzSKiVW`?~)Qm
zTv)3tVf19dOGp@zYPYh4QAS)=!f*$+LUWc5m>;MzDE9S2qcfg0%Y$q5EFsqy&`*UI
zCa8sD48F(hMMB4nbj3?KdfG^(6<1GGHq2Sf-N{S<)+j8I>d8TMp#f^c0;lLywQgK3
zQ?E(!3)Ag2Nik_@4sE5mImsNoBsU9B-^$#abi5=t3*Xsz%@Phzv{AZoy|B+h$BXEK
zn#T<?d^aN9Dnw2)ZXUx;ow$k~RhSs*{N#D_A^WITPh};KlVn3#zH`-mY0Ar|3{W|Q
z%s%R+P&^nV%4efjQCg+J$Rlg^Q4Sb=1&my8@m#DF#$GHVwY~9jQakQ=S*eX3FC(>+
z5#N#22EcM;wlhG<OjEJzj(fv~F2y&!Xj2p4juhV>UdBOtGkMFakti~EVAQVDcQN?s
zw;s)au9%2jofcJrUUcir$<L89Uy-kz`Eq;}cf2fLVaLnxRWjl`;wu2`bMe*H9rIN&
z8cK5=9Y_@MU5eIJp0gQ-TCc)qYF9>1Gk4Wbx~i~|a6P}msIWM9VX3yiNO~zVAl^$~
z^iqB>6zRRfXyU9k6^EntQ)WDI1bSz=mDCxpYS0=lY0w(4YS0=lY0w(4YS4;>MRf~H
zF%K=qwls)TM0PPtEylGpWmH6UGTP-Z9n0wIh@P*C>G_hVp0A4Q`I5+<uZr#Y3h3rb
z<;s#o+-P1#m?YgVFHflZ<wS}y`lY2xqHI(r0Pca!2S%I9@v8JqeE$BY&e7@XpUzHR
zbWYzMy-)$6b1ZRYU-7UgfgycZw7Con%bYT+cu)_EHW^z651bsKOKu_)cuNN2eJwN2
z<{^hO9r_~nKS39&Ds-IX5;8f5WZD?spQ)pu*L0C4>j<;kY}V;L=-+2=FHhJ7s^3!O
z4hG~GV>Okd^{&(B=hioP$jlGEC4OeWD6+Mhx9x5>wDb|lF)mabJKX1pVa4yhqiVw2
zKXheG0(=MhS!~b7qxxEt-!repSl?}L)+NrxwFk9<AvO?V&JIJBhjvnlXblDVceyiR
zX)c&0(?JxJE_h-$BXGGVZ?B1LAYIoFZW@G9bmx%{M%)D_`mW;a&gBY%Ou0Xk!x9C9
zs;D>0xjxIC>`Q~xPe7_P<!9zOzbqWpraf*2tI@JQn=BC48^z8B;*NtjZsVe=p9ip@
ziiEe$b96PBh`C!)!Y8oj3Hgo07MgMez}LStf2}Uk6@=wTbu}`VhcN<^9Mg$laVOqx
zyF6X<P8}#&RtkrU#)FqD0UwyvK={YkVv0|LtaeniR9#%YJwM^6z1SY*EQKb@u9hk&
z!dts0DVPQYHKj_R^p^$&eUxi4Q4Lt}s)3R-i4!tUgSrA~1yE;8qb(6VDQIUaU~aZF
zLluD&Ef3DJlq>*eWn!uUJcM&yB9pHibgvZ-%dB9XZLrtlzB@-(Wv}m&&@p?v{?gjy
zWq##Q{bHxSTdD6&`8_YqYYzA*mIe62QJNL4$|VNC*Wk4Qh$3772*iuAn_Z|EgO+%M
zu7uvQ{2_R{=b(#qmN^7l<jpn1aG>lj{Oe}(*Cg<C6m<r%m#bFF%9cu90pvaL-&Q?U
z-L6#KPFHVLs@|%XW(*y%S+-pyRV$#j-x<J)zX-Lwls3427PZR^F2(Z1?0Mp_!m!9g
zDe_8iO;R#@UA7F{EUsV7H;e1Hl4w}mFIsGrFkv7DHR-yveV+2*fD<<yU{sRP%%6G_
zA3e9PUCo`FZ&4jo+NR_ZFdc4JSiV#r+RPM>U2ye4uoEZAiHknK4o@0N?a?wC-=IzD
z@)IZa6y{j?!jNE|jfn%Wo)=t4Lq87<cE6Uo=FQO{gKw3*b(BAZ&>gtb-cYJVsr?n~
zCPLIa5KvjPCJOb2PPkdGQ4yYP^wsOL%g$x9ybdHgmnx#Z2H#WXq=BWjNvn*!;C7Yb
zf6w2%z3BY?_1SJ)hP6h(1B_zCuAR`M3xWf0&co@Yu3z!6-E;#BuNYiM7@{2m8u6TC
z>?RDxS6d<iI$ks2h3uw9*SlVL6d_$)=@8e+MlO%HjeYj1(w}$jg>ymNBjmxzN}l^U
z+o+7<IthE+5+oB>1+_jFo%!)TXuT@eiMS4^55#1m#!j&C+$g;9cu{~b7Z;jxS>c`u
zzfMZ>!t;#YlOFy$DJu%kGx|uyPVbJ=i?6ONGhMP6zLOb~#p!x6nd(MI<Ps<Hp-VB8
zjb8ZLhA2_Pod&F*bL)9UUl2n_(nsFdQD1gc&3<PbkeA<&blHhVRN)x18?F2H!sD0x
zr?Kc`<^n+M8BzWALV6r)xVrGt2>i>k<5=z@Lxvv7Erwj*sf&|Vs{gLc7sd5Sc}^2|
z<IJQai$ceX`KM*2N0Qu)NuPH^6LtE<JCF`uG9V#cQAC|W28h!=(D0Bf5|^s^rAZ7B
z$6eQAZB|@KEThM5OyWzA?Yt|APM#aO!uidH`pwe5+1j@jCyGP)i&AIF9P19|rb_Kz
zoEF-^<wZv*W0}iX4f>ClvCU=d2K~p&w9I8%4f?MP59XjTZ4_IGN1A!fhWgFYzS-Kh
zR<U<vOCnMpN%UtEnp+PQ1ZLhTh|9cF5SDq@Qrs=6<@5=?PFZzBhJ#E~HT^!OXv@^J
zgv>TnZYhno%<p;Q@l2GfX#UQJhsK!58&QKzY&GI+jh}u=tRlsc(XxTjuy*rTJT{v@
zdc?&eJ+s%*;?a@PVrv~ON)axet)WGJyRFdSwN*vI>CfhqTqRK@oFco2;_`y}S-{wk
z1m<qL+FM(`ZT(uWa1oB{M+%%-Ewd2Leik+*d%#_wO3kI#q$Ft;{kFw9sFXT<p7?<g
zf1<3Vl^uFTW$nDOD&|u#2m&RLnmbm2G&E*q8WRIfLx#4op|6!`rXj*I{?Gta_Ud+A
zz`hrL66~a7#lT*T6cr3b6i*KxE*__Cxt$#czHno=AyO{9@FLpkW9#aS;1M6Zh`IA=
z2)dJh%OXFxL8S>(a!^1Uv{}9$PhI~9jDz1BdKhkJ2HSq#G#QN9nLBhq`9eGBD<`6?
zFsP^%$tHMh%r+Qbh{|Xipo?IL0J2R}ZRydF-r=eehx`ICJt41@QdG?y>l_N2sHT4#
zyTi(n=q<Gq#Ao!}s>eW-yaglo)+(jVd+RTr(kA5LP*nNs;LT{APDRxS=Y2^qb(#^$
z(zq_%-Bj2)tKvUnfGApll2u*#TCiR!1u{!!32TWiAB?7wjDwDpy+buZgp<$x-eGz?
zB#KF#3(;0$aa|VLOvd3fD{$W;^8`jn>+GR<K)NC(XK#f|`_(u_BS9HOA%}PJbcN#L
zc0ut48iPXfVx^(DNz2q^j_RN`3B=9z-82j*ThJH{C8b9Q*&*Y!^YoiMjFk+)v_}Ss
z(af*1W_K1uILK;`dM4iGmME-TZi)I-X<h0p2%Dfs$S?_Vf~-)GR}gre1;G^JrBb=-
z=*}l@=%MY1-(|-j2(CMdL>T@^q;id>F0^&y+)~(oX>NiFB3)h(QV6t~d$rww0rT@-
zymrp`6-u$@epeR>hTgt-%cdc<*27g;3wFTz9?wOq#HD}z)0wqPV|8ZpU)sMaN4fBT
z#}$cD&F~p+HaDp>%y;-i&usY&XP`XiA9>?yJY0O=JD-Q;HSeW(7%JZl4;Nn|FTukI
z+4^|6_-c8X`@V*SQoT=ZH1E?H^Tn(8FJGRXP`nxnlCg04PufKS<gcH(SB>vt{;8K9
zC<-T^j+B3QggFNX{1US(W)S<1=4G|1Cf?(c!zbH5pK9z!@ty67$)}XIMQNLtZpqTN
zEZzE5!8kU*PqlvJT~QhPStdPJh6lvFcIMUQwKHFCUdwCtOU-Mjd^hvjnXfpn5wi8o
zYiE8J^IEyjzt(vjxBDMIuMaED>%)@s`f%BKZHT!^=>wAFl*`1hG2I9?MX_N$+VlUG
zvf=O>UtM9GaiUS}d(dVk5&O$84(R-U`K9|S`{($dQ|Ou=_Qb>QVd(ho`-k)I8D0{@
z#5_=ieUINUW+oKnUwUcV58V`l2jrz!xE~`;AFsR7`RkL+<TG6eO$Lu7_6(!;#Hby0
z)Dzwh49=q=#-lGWQV+g(@Wsm?X(|_(*`PtTL^J}4dxT2RB^=A!!60&{ar!3?aR=w#
zK7ZAS1vH7s2v1de`Ce|}Y+O@(tx=d{`6hDVNgrn;zb=W_A$f1#9f&KCjMvROAW;dJ
zIUW(sH?nzjs7-jmJKJn?(f3J@X<rE~US+g+X0&)qphd+3uiOw##{*n(e6ade93T0C
zXUB(+^T&5^U@ahfKcDRVT(b95WPhU2JH8zXlf(XFOpX{!zZr0aiWY@9rdCBG&&tfD
z1&56SdCht2sl1*0iGCvjHR$YO=(rll;p2@y%PGlEdkiDV>A1B9w5T^Hl(I1Hr!A%T
zFJD#^a*0M+6+aoy5M<9h7ai9NktnVZ5a`4>%kqs8Ud~9RA?DBPxsxH&K4-sWF;bmJ
z@0ci=<`q`2+5?{s>zNPty|xsfUMw$J=HmCbQ%F%C7NT5fR_CdLW)0l*Nl7NOQFym$
zV=@h$aWn}cSD@^<AM`%No9ssW@PSAQz(ZBwe(>`A=<MR%TijE;yQuNE+dNn&of7Cz
ztjrTT^Q0v_Dpr8i?l<f-tRj_UvfovA;fvuWjHZiaoEnyFi9DF-X8UU6ZhKi8iR7A>
zIj27*;f<TI$~2QhRHd1m)1s=-w8}J-15F8f@-v^yW&_5ZM>fMDQEml0uLm0{v<wwm
zc|FJqRgMz7n2Jm6ST9_k?vL#wzjz{VT2@iO^douiaFOEQIHzf`7TCYtFi7r<;Wx2{
z&?xn?5dovzFymB|U~TjFfQ$x9KeG+bEW@*=&Nu@HfbZxrCEbsrUtXm_LqMZc`H+}|
z;cLFbP*Inls-bkP7`zI?S0=Hel9~;LFk;J+#I93|DKt`@E=#+qRb`n$0m}@ES;oXM
zu%czyCCe>P>dsgtk5g!^U6D=DEWJFFNLxl6TWukGq{^(q8|d|B<PAHQH&D;Y;|+XQ
z#2bAhbM&*!(Km2MzgC&*{amv5*Fg6EN@VZX^q6*w$=)wR_VO~p8%J(9<=!ubV1WD>
zsA@1_!VN#@NvAvN)`iD-1|4#zBhD*io~8%O1c^qD&KsGCGbql4mK<6?H8eaLS~Rr2
zKD7RFLraL}JNHMknzg^*QV(^Oy{9Gfu{4!iO?5-i=Zrw){O;%ykUz}AyAsP}@OqaV
zgNU{A5L-3pO;f)rR*2uU<T2p4Z@@1HTgNY2PDLURXFB8w9`G=UU&?y<<_OR5+;KDu
zT^gPjP2F+tmcPCh8EM_Vh<;El1%o)JR?e<u?1xk=h;Hh8PfHZHw>KC$1DWff{E4#C
zZyaqQ9e$ykeqiWY=~?dXKjCp7rrRAmBR7H#gB#z*HoiRZuH5lP=BV)9(eYnTcs6Qe
z&(5ojKYjZ#|G0?3E&hA=vHRp{ySLlxKN;*0YImZrw{skX?hZOa?*J}$E^jC9u|r<c
z+rx5wU=FI?cKc5v9aZ{o%YMB3C#%(b)M~aZt8M?OX<Lt*tv|8meIQ^KO<@wVmD@=V
z?ho=q@en)=K><@6{!W~bqErz1K{hf<f^_3jq?hA|50kTX-|J!k1(^;94LpEI$G>vn
zIcC(vH^;%`Hl(_><2uGfc*LH&<G=%jd+I`sknMNr`*y^?Z(jw$)qh`&9M9kG1*1dJ
z4#9X4%tDy-#~8r$CB~RRKZ;rRU#KpFw4G91hj0|hC>w8re|x^~VEuO>g}fd+^?D$a
zU3R_A?Cs_@dp&>=_5jAX%DYo`<8ZHvzK21By0fX<k8j&fC9WFmAHj_EFyCo+>Ui)6
zQ=(4Ua5|mr?d*(r*V{pOwF7PLKyN$S>F_a^|9E%OIX=3$d<{aD%c(!}ETQ=YHd4Y5
zr0PaiSmIN~2}0VUF`YIosW5qEo^BC4jIyx*W=@>$_Hn`|8PYGp-f`d#2IqLgIC*?|
zHPs&*6Q(c7q2?$)QQ3fOdb9?8f$v>U(de99$5un1E<gf>!r~!$J~t8uIug8W%s)NI
zLk0<i0FmCI3Jzqfv(?aG-w7J*ejXYMBB-Y>*r!f?uEl|a)c)cJm>8>OvHg8!*V*sl
z<8JEKL%B&M;w-J1rR<8hq*NrPd6jkrROS_`w|HcAi&C)XggossKb&Dh#~(<6>5V6|
zX@&>+;g4j%HEDGQY4}ZlyI1}1e>vyE6Le0TuSfrDJ=$%xQu?3WwA<hGzpqjMYqFml
z*JnS?-0|4$I<r38zk&zbSMh`Y#?<xB_8>eoHn7Y7iXzJygS-tQ_QnaXoY0#B==&ol
z{18O{JqTSFcG#$UebwqCy1bsA{0xiz(dp&uznvUs^hZ;P@(QGIOb~0u%x7!@zoF5y
z|LOev_8iYqYw^oE8Z|nR->*sMotamM3jNOio}9n!G?@O_r^o1$_H1Y8H<siomOnhU
zgZ_i7-U6WPQb5VxSq`j~p^~})Z_euVmB9X{B^gT8V4?k#Q^=i9fZaT$JyRI5J$7Al
zL@V)1f6WyG4IaBUSO5={r~nU^Hs}p1JhL%g59I1jtkGRRQ@+EXSg-$!O}^k+btd+n
z!Lm}VBIouVUwlLaO?JqjV-fqPqZQ{tYC${0DHB6ZPo5dypkDLLtg6vv@>nA~;^{)(
zj)e&@ei4CEmngxnoahxa$vc3@&Blg&e$?;dr??_Kx|ns-kKQ1E>yKV|*Xc)Rl#*L}
zbkvP_siykKPCa@9awq-BnWrB;M_X<RRCjvH#XS^E$XAViQaP?fK;#-T0rAb+2xxsx
z1k9mUmRQ_HBOzBjGhhc3W7{+bIY|WC5rqdfs1Ar@yuflGux;&rdX_iq5U8>M0i|;R
zl1!BW$SMa=%ta*CcQ#Q`xWVmD-|YdT@K~UmI|ad>3cb2|1J$jo^aidca<ednBtilk
z$RTpeuoB6n5_z7EtfoaKt|v*PN&$>m)Br}3nE)2#+AN*@Dp?TIQN8qF##JGXQ9N4(
z#d`Lh93;=yj^)`ZST2G&#`A0qh*m;)GxJJbGd<ljrs)7|mdUsxZnTMD>2;YkWczwG
zfa9gBW_)Z)a{{QW0<gsH(h$$?4#nwID5AsFu{^swEa$?UgXjt~P&p`GFaaMVNvL3D
z8M?q%H!jRL$M<hx)wy<Y8#;sirJj&3VtX}MbK)zinJNFEbfO?d*s15+g|hBJHj5QD
zAac{epKVa%$@4T38o2H9`}Uxw2L`P_P}5gYMe;z68&S^esr^dXW_n4s9K$dgth``1
zWxFs<9Q`t_GZ4EO_(1s3v%82lR{X|N->Ae@$>Izu=CmQ6G@(H-VS`mzEObPOuHT=%
zzc_iJUl~`xriSxV<t<E0vWRv@s^to=6-g=bf0<>eS(w(LwoLcLbC1L<fEQ#3g3aN%
zr9a0@)GH&*L}fF5b7kj3qMMMC2<s#@*G46Q<n;Sld8AO&F;G~|RI8|2NmCRgc>B7+
zx**sqs#ZxW%3M)!<rwgn1qj6<K)@an2-OtQXx3d9N|L5lK}pieI#9Az4J8D@nD)ws
zo{hc8;&E~p2gh8I3WhE@_RW~qx@-@sc1fdzzW!BqjPc;=xa!V>P<cXhPQL1<(yDmK
z041=HuNYi!U$@0t$ZIE1XVT><@tNa#fVb^yy)kRa#w@ilRl%r0l_a8874Fyb5ILRD
zZNP62#O$>N|7^O)gJ%qGIGRyH;=W9tbW4W{s36k044q6kRG^E>1T@7XRCgv&mrrW?
zHL-SQz0SLflgr(9=hbUkhoFa_+$le^9XVIt7?YnQtx|j5s6>0GZ)JNilmy8p0d*}R
za(Bo}kdL-YZ3y$ap)zy}m1$g80z@S6jI3fju?n_q>n$S&vn^k%Cjfop0l>Abokrl-
zC%JX?`J`0E0{VFfzmb)hpMa5v&~$|?3pm-7U~&~if4Qj$ZC9U(ibhdXrMr0YR_@-a
z-u*hJJMtrAYg+G+m!LtKRMi$<t{kbbtgau8)B4d8x<C-J^`Zx6+LGAOKv@|(LAW|{
z<^<9jkuzT&I~pu2V`sh&^l%saJD6G|v&Vp|t^}<5L`xuB$3#n7%iyb`nKwx^zHsjz
zryd8WObO4afz7fBlv5^hCCmB(TgpN>sDaIjd9Gz5LGz|O7aU>cl!*p;`~S=Dv2@;@
zpS(P|JbuNK3|7u*n6nkmClTAM0Lmg*j2-Bq>~iLq!5$*(>jn{_Nfht=A3!M{Ic~8T
zuV4i7P*IOVXr78?4LSfu8H<j20A#VLG+j-3#WGtsr`psr=0sbx1Cyp%hud1yhF2%g
z?79@oX|c--IZls2?$noL5j`P5RSpqoO32nl?KEZe5g2Mn49fI1ooH$gXq92|+ymLa
zzLLhNg-SOe&=6K%L^H3pyU_D}7!a3@%AP&PX7N!s*)!!AEf_@%ouPmg7vPMk9G%CX
zw5gaj5gNK|n7TpsVym!pXW`EO4%oW0a0^(KwNs;5Cu>&;e1x4>jC@2$-q|F)ZtfT~
z!<|j;o4&b|#6a5FVJ~0*{N@DxJUv*3XZ*ZvjB+$SZX+O>leG1ttb20pj@fh=%&vy;
z-pCz8Ct|R;bZ@6_9M~0IKgVv*jUp%X(7Yi|@-V|lT$pu(*|_h7x7*v>D@Kv(RSv$#
z&qfS!q<V$n_t?P!v+8;nsd#;fzwxQ86`*;}+f9}mWB9Dt@dwR6ouTVb+%Vx-#RMP%
z4K$#%d$`!w(h%pBxMptPq+7$YUY$Qftf<1MMz)N3<F=0XSW#1BC2WYL_d0Q(gfG#r
z^w8bgqqd+bw}y6&!8|Kq6mi)gwEkyfl%o@`1$Ebf(f>de<rvH|YGoEhJ%cG9%TIc4
zLrr6pB;4E}^==K%)`L-L%q?w3xZ6pe#%TFT|8-+@`AKiY=zk!KR-N<<giznRUiPd_
zrj!XLQ*VU++@P4@4W%Q0>ix~_X?ih_T5cBQ$tOkb)au(}WSmjrgS|*So$8A7_2h7>
zqpHu!0X2-XMh7GfXyIXU_y+o9BoE%sPyTjtesQwfuGgzNRU~Mve2i2UjX4&FGzH_`
zMwfrcIDr`}Sh9lkt7-qS=oo$C&j-FY+1XSY-rO;)vfBPlA&)e=#0~^=Zc{GqoFA8`
zJ4XtnI_I;!g#9LN4FJeNpI)Loz81T!;}lTb*7XBz9*#8T+jKxmWbAaz$>c~mj=h?k
zQ)dgWr+IN~ooyf!xO7FYeg!>rTZQnG={DXfncrlpSidHFC-WO+9OdOh)ev~Sg7}E^
zG93g@<La3|P2$}t(HP_2+AxUU43X^`rJh;FXLyr?iQb9Ye9+3FOx(K~e-61Q84UU<
zY14^@@@Z{%+f6q$zuf!ZZ=C7yrA+;|_!fBa{co%JsA;F}f48jNZ}ES=#{1vR$LxP*
zzURUs8;nMt8yX8e{dP%w`AgzEn=lfWMcx?2GboYne9|E2DnGbNGInxkB;|XUrV7tt
zH`r#1LO%ckU^pWTb0=uNCqcLhXX7c1Y2pUsZA2;j+JVwocxMAI%BAkCQ%E*+Wxjm<
z_Ut0};(Bhe%D2}$(uw-`^yvHqowY75JFzxru?DlYG39!x;x-hw%NNIsgbmi*epG)}
z5*{E`XXrH;d)$1w-EKWD+Y{FFg9h8;^>(e+(^j)py4#*R8M+auNI%&pFA*c{w!QoG
zQM28A^5n_W-B!E#xb^7i)2i(uRG3KC+O~JC$B!+5>2doB$or?wCy$@FTkW#LQ`@r|
zthL>=pFV8^RF9#-w)GUo+ibd9_A>3+4fb^VY39d*_N-+F{IN*@Hm#>m9yK3XttY#%
z^gRMx+V0lw5&(X*kmnxR?e-I(L#zFC_etA=h4Lx1*IK4M8{4xV+q;h*wV&9{Cl&x_
zKY6^n%LiP!y%x=h?9WrTxeWe}Eu1pDPg_>|(c?$f(?>1KezXK!$2Kvt{baYf3&U(R
zpSG>XcFVQ|9ZVN3UV?TUiye?Ur*Hp`v4-Bf2l)f)887jgWRi9*Vj~H(<R-_ehD3{<
zd207G$2g~RI^<M$R1G$mE7k>{xua22IturE2lf<m_%f$cJ!f8<jJ$De&aDkxhSZZU
ze!zWhH*qBZ)iuJuwRxR6{L>YC$i|znIMnQV9c4Cy-g(PtN`h8W@<DWj;*ForQgPg)
z4g4SoYlAs|%(p&dE*XeA0G2>$zZ#)+1IoXP%JTHr1Qo2tI8;nqo__Z4x{gUoMJQ}x
z^VGWuRn)tgH0c~#89X#Pw=>{~+}R}x+G@d>&v?NUC?5@=d?ZjlCOv#K_wbQ}Qlg+;
zwqT2cHy1V;1ep*F<tOucg7la@>;}Anzrq6=ShPFy{64e)Z;ZhhfOQ3X^rjIfZ<Ci4
zb6T4vKE-FTU=KihLHy95-+UiusxtGrC-x(osyu=*!N*V3h3K9BJdB%umU^?l&+?p#
z{ll7QJRTidz+gC_7{T0Q4=`mk3np&pz}_fg|C-_L=rEXo3g%<}&NJsM+GfW?xA%cQ
zgPOv4LlitMBHc{vEJ2qVvDtVO!0ym<``ZbdQ1<NcY}9o_8Pz=x<Qc$B=*EcewGQCB
zGuPrF(;>g>(<>oAXpXP~&uYXIx?w|84ROM&{Ai%%7%5kluNV|Ck*}t-yz^9AUey6U
zc7NAJ^jx~Ds|Bsxf@Ztg-K-_i$8cJ(nCM1J*6HA^n_~X$*3)L^dJui=gH*grVkiJE
zO^wtb3&>$Cd7tzM7ZOCqPJBOxE%9_V1`zyPbUJ}e+z(T_+l@QG(X2P*`F=w-8g;^t
zQHb%#anIHxJ9{vOYLhUy_WCZ$2j)!R+uR#b0FvzuvokG#gqs<*lf~6tazVI|4cKBf
z2+Q+9Sb-5fv10q&A_HAwq08Np{s+c?<hVyGV5tz0xEpx<=uwXS4>obx_>Zuw`)2?9
z8sk5H3*R^}Bq@--|7kmzxePQk?)kGm4@(2$Y<qZE!7`SLB6)QD>g475>yxt=r~l}@
zJAeD){qbez^40mt#jCfcFVYbui%YY%nw4AW<<-$e=K>ZMSW#ZXj{W>SUvJ(Vy-4>^
zSh7mn$Ny8lby?Qf{?YmID_jSUU`_0ty?^uk<h=9tW#|0lr`H#kC+8QbE*2GM%-W56
zlF`RH9i9_}60E0xWWeud+2XTt#NA7kdhyj(LwQgVgTU8&5#_t4^3f3Vm=sXF{F6d(
zdP67FisN~#p@PUMVi>O7FKXWp##j7TvOhAESBj;Z_n#%$7tTGuZTwfP>@n0S@W#le
zSaDG*B;hRqH7&b?=<$AO)9LI?KAOoV@L;!1Wqff7WhNiZ<dXyuGA3pm6dyqGlm{~p
z9?U$=JjAYXJnCNYnh>HG8Q@7j8O<}wVDonu`bEX`2IMgc(HjuOTt$sQbbxFG8RPgq
z7p~q2?{d$k+=&{902M-4>a;x})+1~fceEbH2y-KBUW7Qh3BnIpiO14&`DXXZ54w)u
zU{QeWikUQWdSQSYWl)iw{zg2*jMl%GN3jX8?bXhX-kdakuCsrtD4evt&Y%B`ukyFe
zm}P)A#<;w}9?Gu$T(6h!+U^^=#$AKmU)VKju}asw?TTIdrmndmWntHT^{y-6T1!&}
z2>!WT+oh11-uskoy9~Cpwte0<DI#v9usTC7MoI3Bx11}`XQo~ciWQlvb!O#Zh@16i
zjN%P9;{QKwlyBti*-3+0q7ADd|Npd6whh2`3`prZ{!(=$tm){|oV4(k8~wKF)CfZ>
zGh;#WsH37=O)QJQMI2_OuwcgHx9`s`U%dVM8C7T!cI*agWx`40X0|;@mlv?mDF+?x
zWW!K%pwxPcCc2*+#Q+z|!&)5EBG)FDlmo2QF@!}1t5_r*3q;_+YS@idgS8uv8oQ0h
zjm(qG7?rkOynlE4`uOPb1Xj6pgV~mq25|<xLDOX%i8Vc6-yB{1mEYNXb8>e1U(NYI
z|7m7sxD4y}?()1gum6{b$u*gVpk56+(>k7WJTN!Ur}QgWt9&_&$9Ot23DKB~>(vl9
zMR?`e0}Vo3;9hVb_~NA*rLn|bJ|SMdJ^%aB`3t}rGZNxS<7vZ!$2f!ShSh3V?S}QJ
zh#bN@w8f3zN+qqaG^I<lxXZn<%n+GbrjY-DUH<;|#S4l~oC?c=WkjqNT!++XEgITT
z+{6)B%2%@MNlf<)rz+!%KNev}4h4{OrVhn4IhKI6mB5;wLn#8J_+w`?58_uxr!Qgh
zH#@NVHlrTJBp2ry5EICqZdAj9Z(g6hzp!@eF|1s#=Gf)`B_FKaM*P3}<x@#vm4Ra9
zhF9(|i$I0LwD3Mzq6vUCZYUCjn>&jjJ$;)CslO&j^JZbbyvXMV-RoAc1Rdt-=x=2C
zM-sclj~Ty6{KpOJNyB;y^Rn5np@7}6TLlwN_u|b1_w?=R;N~IOZa3^l4STm?!wBpr
z4f|=M)oiq^Lf*(=`L58h4BQ)unwse#@kND&AS2-7<)*<h51v%ScXrm9d7D)L*v>#4
zi`LZFD$p)q5aU{iW+cVz^!)wn)63Uqwd9;mh7YN;-+x#9fIB~(y+4MXiEdWPga<i#
z@#4JBvaRtKNpjs%%{bAh#=d75bfeFH`O7aDd*s(&|2h7i?d>uCjeQ?}&qVP(_I<DS
zJxem;E=>JE`2|^(R1cuNn=?JYi?t<sXqQX$kV*59N%D|M@t{87DKzr3;2H5HF7j7E
zS6*h5_PP?A9NZ7GD5-_^08?6Il4EGS@G42li{l{DADjkP`h%1CB-O^paH>5xk1|j2
z(!TcOCHgyQPmW#BUzFTq33iYX2P&kx$xD=CuPaZ=K~9<*>l;7Z0Y8{?p|m|U6^gLa
zXOf?^%OyXNg0)IAmbR80hR1nd9)KmX!Pr_d7-2?VIv-7y>@T^2xr;7~wFiy1ngNDw
zh^l0_2w<qLGhz>7e=@eeUrW7;^lMi@wFm54b+uvFbj|8|x_u2V4|v|K2%X&Ek%tZ!
z@bUWwtoQPfm3?GqAGOp#QQW^`W#DIp(9bg9&kXR-3<H=MMlidRA=rMw2y8z+06z=A
zA4ld|Cs?yWxMmr^W(LS+2pU^IZ2cn8`f14eS-|=xl);33MVGkaTP}Fo6nUc=IV?Dd
zaYL-0?E&3U(E{(dzKcR)L2}}3OBuJPY}yQ{M|EgyHy-HftR}kSE>b-wa#?eaudrA$
zaga*vGCWHOJ?GYUy)=)uqg3Vvx<H8!&txA~l|HQH`moFPVJ$z7R+T=gkK>cVVVrQw
z#=L@cx?+?j%<pXo(u9xGF`6VVi8xJvVQm?vNhP+3$fRtKMf1El*t^1hI3BuA#D#UA
z80xl4*0pTCauOJduCihDt_UXYH1v8OZqd`GfhWZ>fEO*cG{3p>5ij%jQn&$#Nk*<g
zs15{DQOHjJ(gdv>cB^fUy44=6NYE7DOZ>*uH~EH>;7R>irm|(OjKxLRF4<O6yO>cD
znAGNSde-{LYrj=`%m{Wq+&QC8l~=x6dBaZFbjiqVm80OHkFhCkTYqubw$w~q{&*4(
z@u`*klSwQh;1S1?mBPQ^sjZrf(32j;(3uk1SjH%!5Py5@EWr1nH;w?8x9&8Kc$3W~
zm?#~a*Vq&(9{3?K;$s#c`AXiI!0%g!<cm`_#6o+((s>otgnw%s3aTX9So9e}xwszi
zd{iRNqp~z+3)z}8wP?9?V2M6UCuh`B27X@pkr&iPg{*XBx~#>_mrXiHEA{e2qJ(tn
zn5F?&g#&#&@nd2_kwmQOSh!iS7V}?bEQ7U-C5|GhDUuH`7gj3FsHUM|0b;#R8<j>S
ztYb2npDCGP|K4dt`<QAo2Ss1XVJa4V4|K_=-sHlDqk@!cd6P8dKc!C6IE&&Mx@zm$
z22)ZnHLk>%9q=q^>Qx~fUcPYhl}CeJq@SxR4UmwF8&kvrc2jN&ZA`i$NHy%PZ6Xlc
zbPN~cGzZ=X%xN~%Z<hAW*1okkLL9sYnO+bb8k^J9HakvB?xP$l^MY@F96R%ZUkjyP
z^cSuADwxrUycjnF4L1V~Hv<hfgCg7v%(#hn>HAG6&@vz?DPbigY;y@aDbX^QXo(U_
z<1alQ7a|Y^T!KLQW@+DS?VCiPl|mq;-7zwkmB(CG%v>!~V-M{-&THmzo|SuBEhpzC
z%1x2-5+-6Yi!{wKFB>FI-Bk8R>A3B9xrsczyhNT386L$OJN2d&ziBCN3`sO;l6#f$
zkYqrmX_w5Il;W9(3qD!DJZub>!G@hALgkYvnE@t^)PxT)`PP@`)s*J7A7^=8BKc>E
z--*-n6x52`ud2T_THxxASCHA%%gJo&iLE~>clvW%K5yyITfZ)ejSSZde$P{LE6)L4
zjjqYO#$P&KvivVOKTlDvJ{wZH_)1A|%d5*us#h;{_1MdVH^oP4rly*MX<5ssKx%7g
zYE5^Yoh+YqQ;Kbmtk;QJ>&TCkAvV@edx_ra$d9swcJ|S$r1L8&Gz>S`oh&ot;H$!h
zCdoXz6os~`Qb<pAyG{yiEu_%a>J(Cv{w_tKt;!Ti@`5Z!q0OokQaW5Sg*F#bXmfQ6
zDT!H@qR?h#3MHYDm!r^5RSGE`u9-qR3n{d-I)#+5*Go}or!s{MNur4~YP(xbH6obW
zOy~PDpnA(OBY4YM6|JUB*nRUL%VRE+RyvJ0rxMb#%?*}LyJpJ7ZJV;VRgksmfd6?D
zIQytq*Q5L^5+NnRDeOKpP*EypK+Box*+;z+k$#fY&9Ona8-MEMhI>g>v*K!2T+JBz
zrMxuHq+rMq5!pv7MG`n0RJ7tO@FRC>Hv0mzF_usHl)V^JSZJ3XtMPyeM%qq`fEd9T
zq`(+RFb2zjLB~f47+K4-W1a_CFl{1sL>NRJj0&^W8!x}^5=VGrJ{v0yKk0Uf<!iZa
zNdP3HU2f4gMs^}tGE?3V*C{EePV%KQDspK%Uy**!d>Q)1-7ZVN%H47SKEDh4AuOfn
z7Z+R`{gg!I%h8YWLRY3=((RJ;tKMyUz*1@1*F`^(8hbJQl7efaUy>_)Ir3rB^UBnV
zJ6)1^l{;+@n4UELYarcxRocy$BHnye>dluT-~6kipPqWYItf#<*Wp*g$AH}~PwG`s
zpeDeqCR3M$LdaJICF`X#G+dh0N!j&K8)I<dRivVkmE53FJMlowyD_!QYFjTAY9m*q
zSU~h#jp{wu%UT<kKn=FD05eLF)exh=oZzJZF}&kyc%fMBYhW23^Su}`YMIqAqe5MV
zWkk<acjJN6pXEECGU*+acXZDWzAA0J)gH5cqQyN9s`e;dG-tU!n#Db0`MAf5c<s+7
zWn6ocN(TR@_+U|?H06hzz<$4Bj8jyv?zXEyYM_Y0XKi9|M~Xq6q`c0_-DQ%xWiv6|
zVwt3_`$&RRcgamgWTJR_L|Dt<(o+igaS)MuMHkgs#@{yI;TD)vbyC1`W#@s`W7;b=
z-z_%jQPf$w&y3G%uFGoDr>vuOTCP_S9D*Y%BhE0u1NM^>BZ6L+9-!<>U???j?xZV;
z7@ecbcD1N4ajkkn_a2HgYV1rD)}18g=sqH-h<O%pJrc8Ljt4c1$184D>o6EsZGb@(
zPQzeBN3r8|qO3lc)4E*|AmI$60hGHS8$CxjK!r#tzp}_gty!9=?$QfFf07g{jJM}3
z3dlzmXFV=XrKG)m^H4Y@OAJL`;kk>UU;^Tb+Y%F1jJoADUr?^qlmNW4YR=ac$ZItz
zKu+OiO541yxQp&aizd9fMeRmav~b!PccTj0Y{d>})7y<!RPra(Yr&2-e!bKt76m4d
zuRkj^)u)s~*!@Q{y57^(;t9eI-jVn9pa6l_@28JOv-O}XwMe&M^Xk(3uBL*K^urgW
zCWe%m>S)bm*R`<pXji+w=M-PvbCO$o87$DeD#k&nbxf!CBEH1z>U{WSfZ~-*<3fRK
zdF3&KbPjNG!CSdaBK1MK=U7c(P%o2QPToY0i&D&d4cY-wn4C=c{iN5^A0p;Dk>`dK
zO<QD(kr$pZx(FpJMkNOmx%C2nHX28~GUb|zlXTwP+-xgJ6?SBBgq@Ky4ZV4%J#Tm7
zMmse2h*CPZa~SiOqMq<}Ux+jdvaq;Z%<!6NFkqwL+C>Pil>>hstq})J6rvB}cHy;h
zz|TV^<`lC{ty!{IOBQRHT1yv$QTznZ;xKp?XAJGiuZc&T!Lq<}kR&`<Bm;Q1NCs#Q
zii87;WB^u+a9GcR8(?G}k$pDe>3{b4t5a|6My&bJQo+tjEPkJvV3r8rHSLEjbu@;W
zwy2q)4&yl`sEexftjsieRaGZard3vCW~UWYx|RcYmhfasbi@p|f<&EpaXSTqRWAkd
zRh<GRsH`<XWtD=;QlN_E?5f?Dz)0M2ib1NXnX1suEGJp+$V6mHWE#1p%P-sfMprBn
zBI&oDE?~t4EOP-nE?}DrwBiChMv)RlD*mMAjl^Iw@oHEzK-@4vnOfGlN@D=FT)CE}
z?@6+OoOp}3ZWxk<hvA8{PVeE{(abNe6m}MPCv&puJ4>wX%;wsijT$_lJ!S$h79pQu
z{9wrsw*1igwX#mvS>PJ1?cjvsM7bB8#=R(Y<@Rr?j@DV=s~umcJ{N;B;6bKWie_E0
z(&Et%@<yThCGiutC{UGE_vE;cx4dx`-YWL*%5FNu{)nsLDfcjOIH_(!<ZdZjxSJI2
zlEYxTZcwVBYJCYNEOs9=Goe=J3ueb+AGPepDUV_WJBJLxuxxi#lyr+x(pA)gg>wS=
zB4K%FUWMhI`I0PukoKor#PC|-iVR<FTq(YH=GFOLtMdi(y)$2l@AY98^SxH~3+4O5
zi;o+{AvirX1Siw@ecIG_ZM=}GROwHIrj*pT^etWG`gT$OR;GR{UB6Y-e{TIWm^u*C
zCg^^)p<sF&oLexx8J=2Duw^Ki-V(<JS0lOIZ`ekXW0Yh`ax$4OA{e636yIOZWgoM_
z3T>Xg%_GgUnL~=HCF5XTOni@9IpW=df)!j30@hC8Ro?M*<cX2Vb$UY#y0@_~@a;y0
zz_-l27b7~s_!>a>soj{igx!IXbWVfGmhTRx%%0ndsKe^Gp=xS4N~2A=5|xy+vL&sg
zWU6bP#?;)mwYYDwFSHi-ZRve?gXuIF6^-03=-t*vZWr|4GLBsIUNrdD;_k)ZTZ_9-
z27ei|AZ8%MuBpc;wKwK25$Y2w^Tf_PX~h<{V;UvEknp@1Tg0;+y7S)98DF_LscCB1
ztUq&AOROg>q&YlCGd6kpEUzS=!gf(f%h;xsUozFEsidv8X%+Qut8KQ5O16w`TKOeY
zZB~r}(8UPqLMys(ecEL1qqy-zSo*932^!Dy-eKkp{!LR4W*&>EaZ0)(o8IQnNUo=!
zS%zn};TbG@pY#-;Bg~T~yb4DX_hXHhEH<a%#V^bTm-fzqy0v6X4Q=e$QN>f;s@NVB
zV0(~<ZH!RO$J`lu6%ei>Qfb2vjBP9?2DX7X;!vdN0Z;Sw<C%{M`uv0>sTK*QOpAU{
zxkY|(JM*VPi-%7sKQfI0V(mYnO8iI+wVrCpDrm{dwq(hc{2+BAOh8t#{2Q6g&Sf^-
z>E$sSzFWj>D{xxXDVwEc<th6KS1?Q-k<IjNd`X$XH$>7Nd|i}g{7>J0zWyH<Ibw_d
z-hJ#odD`yn_WDl-JH#qGQP|r#4nmh78V6yta}-4)!1hb=XnVMzk0$)=w%ha<eo}v}
zwr#clWVM=)TFtfv-~QCJTaO;M{=}O1fq+>w#Z=`hx04>+ALNIU8ha5NV5(F2J8?of
z%O~<Tn8SS3{BK;+|BoL&WKwX$i*D$Iw`i$DX93&>0wNv%%7y3K@OVRL*L?3#JFYXk
z*?h8PH?1c7$#H%5)65-@-L5n1v;8Z0uzeLj_-}6<dd~JBJQNKPkQc!$1SximX~|y(
z;RpeaX5D|G+@1>G1lVv#^1$Iu@Ndue9jyOu*7dy}JN0_*7`7DG+nCOGo4p>u5NV?$
ztGqj9H%`RH!Ibqq3{3@l0Jo1#+<4O=lo7~kuzv(I)^o-z=uRDPjC*9}Or_Jsh<8m6
z7trPo^tQ82DdEPLZbqC}U0jG<w6vK9v3mh>S5MexF0+x6>9Ce<WY+*%;Wl<QH`zbq
zWq~pH)(t%X;!=`QOm3oMM46s;xFXJ+c(!9j{HMgr$J{yCDSHu3d4i}${OBU=@kep`
z9^O<Zm>{h3#O~zHyUTyz+W-FevV()G#l`AMEZ)T4kn0P~>m3K~U{H(UQLc2w0MX4d
z{_Hm({sHSgqgd{^K~>|pAIU@l4Hl1zufT^fWVvwgf(am->APhK6d1ETrr6#dKdqeZ
zCpZ<$ehd$qsrmIrkyH6(G?@YeU%MfC>Jt?S6a<^qX0)NbW$$2S%%<c)@`fCL>P3{y
zl1+gKTR&vKX(8RqF@ubXFE~!7ax!(IaL^$!vXZUrI3S;{v)Sm9H;}-+w|DM3eJsEO
zrgH2Z^?I1v`2ZwFCjyX+{p{^sJ3egfA3R89UsGGx5~XEIs*R~7lgwW0nT=c+IsLx0
zJ*+r4`~6PY`KcDYr#brYk2y(|^5{dMe8qVxw*gw6dP7`lRWqQiHnmiGt#f*@hdBj~
z`gb_}cv8W;nNFZeK##q(%qVE9?2OWi-qVcQ`oqsCz}jb;Q5wCziWzk=>)y?b8li*j
z1Qc(l{G?)8N*pBcN}52+W#^Yx^q%I|<{y53p{vz8=NBtK!8C&1%LIEf^Y3(mLHl<z
z!TR1c{5QT@$JCNb7qnS+YH3C9X=?5K;ind0?Q=~njbLBJ)Oz7v-;p+W<4h*X)MBsU
zMG&Hc1YVEv{orOBtx&*{<6yjXEp~gb*>(|fo-Ei8f=QgNaF3c{*mupV6DYN#?H<m6
zb9Vrb8tgg$3ZLmfV3XYp-Oy#vc?!e}0O{``faTKVoDi@Goous9x|HLgV@HIBu#xk@
zRSWO!QLC)$A{Y_#y$`Mcn{&8?VVn9q>P=^k@834G4o76hL%W!J?T(=>Hg?@U3^($|
zJ(oZ91`<@jwl`LQY=~ms6i`7xLIXFUHx;|%O*wSt_#yDrtvC#c$UcWK;$831#WsYg
zZ#M3`p?`}PcsL~)aSR^={X)D@_P}7kv%g9TAz2X45lmc>SW+WS<6oRS+p28XKkCrz
zJghxXsRm_{Zk&tupUX1%T+r|pzzsz}XMtgK-4r2jhUnuUm@rOU>j)YsXqbe-{FVjW
z+&K*VK8_sP;^L}7T_Ik0RV5K7dM$_?RVXR8ix!LYHU<DKy^qn#uCN+pmx9zfKxq#D
z><!S8+UXp>e{pnt-06^nk*U3CCeYl80vOf@4{!in(3|z0y*-5VM=CDz+D{K)Nuj3s
z0iEa&y%P_dXUsX6KHK;+9>-CoSwuXCDUPuvkyRExftWWpQhz4S++-@mT*T+?IW*kv
zFOt<QV1+~+m$Qte3|YliEIM1vJ#<`n;Al2#e>;u)1DyXo_r^|mTl(y8@%dl7Ww))=
z`JesRvc8@FeLMgA%Fq9vzdk!U|407WU-o_Jb3XuhEB?XNHRySul3u&4OA}ZwFcC|N
znND+#!YfPJV8B;9UZZ$RupV`~pk&bv1T2W-!)p9;c*hB^W~hdA@ZB@Lj4cuurwdin
z<$?6QU^D?b$&1oN&6_~zg;A`+|1$z=`+yC0eB(tfmt}83L+2-A&tVvQdjV)=4z&BQ
zA<FV45G!EO7sx=oB=l6P=V?->!zgW9JeQAR6mkGN&=9IudnuIeCv7NIRXnHE2RR_I
z@EfW(v>r((K>hL(j|#bamLP%7sTapbr$^_KA>Y31ygPmW)9bS$ZBXr}9&1n9<C;us
zzNnK=iTYGdr<7ZUctA{275Wt3WHELWP4hf?rWhUMeNoJQ#7z%ba;FUq<scMlIoG}3
zPHMWrq1XZ;ph}%G$+O}0!Kh-9-$qJ<Sqv4g;^bR0O3jqCLag$Uah~KV$F)Q=j_V~x
z_J_&a^X%LDj@>{tHVkH0L*_<PZv;d6uE@Ayf-Tp$p#sE@y0T@?GiBt?=ko1~x5+ja
zcI<PPLZ?(QkVF#~Rtg^!e%gnEZ0);1!k&{zS007}Wo$PHeAiL;`O&u6dBZXZ|A+3!
znFkDj?L>hO%g^)K6>HnO+p#=W<Y_OMji<G`CS#rPEj~F4L+4hMg3T2^Qe`%OmJ-Fu
zE>>d}1iFF04ng4RuP~b;M#y8oDRboA`3Xjmd|{Ga+y`U%wMyKBuDo8+*JP>MM0}VQ
zD-}n49<F7*5@6$+b)Sn?((T<ZTuEP<#VdLBhg-llzw8B!heG%w7cdGya5sxs!cS{i
z$b`jRv&f%k-{1362E8s^%J?x^{!*4ijrg?U9s$#O%`lnSA_*)PGKWRO0Jg@x*_5`s
zloxDXhZ48!X6W^XSV$Pe@qI7>e9A?1G7BlR3@{bnw!yw<Hkn|1BiPF}_^z@GTOruV
zu{ktK&EKv?R%2633G6h_jsK;c0;%+U9UTL5eRs}D!;j`xmG&vX3q(Hkw?Lk!TC0B!
zNvz5jf@kRZ(3TMii0{V0fraO=R^LuZ#xF~Nt3wgTnBq^tAl5BJQ}nGSNecz3`RFoU
z9<subEhQb-Ak{|RcqPk64`03U#*1}H=!u`ztVKjYYxiva;5`<Klm+sHw^0AQ3LP-A
zJD(Ccyd>+*7pvV-R=O`#yF=LSL@;qixnC2-eMl^Cybg)w%-0~XoOua}<;>S9v7GsR
zZ+>}~D~T*>;Y$%&&is!cvM3Sb7b>!xyRxcsM)~?gmNQ>MWJO}XH<~!17X>B_i~VyN
z^aJ)9w?O1^!@<}}Dn*Nh4$G3M!H(k=&hSldG)WsZ;StcvcoVC7+Y3}|>dGjiIAr(9
z8?>uP$Qh45HLBy$L>YY{M&i+n=@|@i2d<$JMqV@Eq|UJ+<vtpmS#r8rm0X!!eK9RM
zC;#toiN%S4thT6dK*}#F)J3w6R9!}-CG+5*`)r}$TLT_XFTZX)?%xv{Prh<A-V;0i
z^p&&Y;XR@8D`&^Udt%4;XK&A6eATG@u7-1){r)@a?jL}*eIHmo``Q=p?<9M!U^y7x
zPJ+U7b9M6{(KUm!ARHwJ<>$DGTHpqsA{_OZ_+K~Vo+v{r{k;;QK9>@*={kBE8mt9o
zZ|)p=m)nI3-}mgEUJ|Y|PdnguZdDKb<ly`L#cOE&;_?N>kg7!=!)aahuLm9cqwtgW
z2DQ%ouyf1b^F|oune5D;iEkh2>n+=1qS7b&9XsG_wQluV^K>m4@EO<w;S#qCYF3>+
zgesdKCv;0=Ij{U;wn(-^F}jNV^Tn2jJ=|c)k8Lg+pYo>53ZU?&S1*XKA&65Lu~ja!
zqgvjlCAcxn%luiUh|H7uBpBo6x3s@WaAP*Ni58N2<xOp=stv8Hd+;xzezLwL^f>k_
zkwX#lD3-L(w6}5$iPU|zvR4W`6R~}Z4S*TRFM%mC{=p~{+h4)ND`JftUkbushA%YJ
zgq($s3TIZsI6vg<NI6)K@yb?7C+npr7EnrFfm?@v^VT{H;R#Ile1fNSeD~8;>n<~E
zwYPsbr?9Or+bIkqecYATfJV44z~H?GN2`ljccG)zk^w(b^629%n0mPEtmwVEvz{|K
z3C?{V%qjiey&O_H%Sa*R-5*W{?R?oX2tzj9l?+nIRwRKkd|I+PI({d_O6?aFv33Wd
z=!JK^P!vI#hALLm+ZP4kh6<ttB9UV&NHT}t4o4v-t~8X8V#<{SIRzd(;DSsSOrpsS
z^ujOEFP!Cijiv1~az5f?h3ENK(<}sHb<->!A2^!1R|j>mgB@MGC=O}0u8k<V;jV$G
zzyDrDR6s~Ino@=PBPm)gmn11cdJhyWP0+cCqMI8P>G^*f`P7_OCg=Q~>G^+4lXHG&
z^h|Ar)0^LJFdNeiFcHedp$JXGS;Q~`iW5>C6LhhksrB?d^9}`b;bC{MJn{e$+>IC}
zSpvZ63OFHYe4B!SCp?lJ;7x71%uQFe#m!?Alro?U@BJ3a;!@%G!PU&A?kENbbk%o5
zE1K@`Pc}4ziDSflaTV%hyTLP|!q#gl8zyIwsfS@@sixFi(&g~l00p0QdSQSJ|L`HZ
zc0;;+$7*y%FM(>4s>C<=`gj8YDp-~&_^+HbF+X7&FfP8cmv4Cxn2a_eU+?W1u6?sb
zcfk$Km~MvWTo*5FN7a7fA59IyG<oqz#e6gk;sLNj9WXKK5MR{j>u;Yn)~Y+3Mre`*
z$pDq}7y+43RPRljHYv{ud~qdpPw!4GaaZ6_LS2^z!3`t3NVGpQE8rRCO$oz<!uv^4
zJi1AhOJ4q#<ELYsCN|gY{sX27A87%%-p@QSU@GyOTrf?XhGe;WohDAXX`+X@HBA%=
zwh{ZQPZLhzYSUy*nP{>RPrij{`m*9boy~l|VgRV(_)i!QC>{T)g^#|)fBIVEKka^t
z|3uR6?EUF!VE`yYf$|Zc%6hfP-HyKTU9{k3D@&Ci^Wp&uFcc$K3Fu>ues7C!pd66t
zcg2pSdD+}qwN7$9WHJhd=%m<qjmH7Y=@N915UU26-p!f<byt(v<)CRJl0dKE^rGND
z2}il{Tk43t$#ynzZ!hlLE4ymEPe1ef7!MT#AoHzh=w88`a{=XJANT1!mt+gpz8MA}
zp^`u|Y;&iPxV}`6W!Ye_qjPuSI(##Iz+kX=y3fp6ZPIa3ITE}+s9oOlwxrESDPA{n
z%WDu0^JSRDz^QRtEDUZMNz+in&+rixIXHk9>_?XPNA1ba%|(713wT8tcLA^DJ4BgX
zF-lR86}Xhhe2Qd}dmX)oxpd_wG=>$8geoe-9S@qT)NG8BOf&m>l<=L+xa-Xw?xjWN
zl9_rbG3O&y^+!WkX$7Ux)usbeX%Yb)X`YNaP@@L_V4pqeV9^@>qHmG+Z`Z2DRSpg;
zU<3;oAbzxE@q+L<T~06w@(Z=Ng?;RZr)mTBh5&@5mI$~$fdNnB9ZoYTH1Iv@iX4WV
zZp5M8SD#=<IFWwK*~Hz=bn@caBOMw!u=NeOX{ISv^9~N0^&dIj5jPy^YzsdT!neHo
zCdW?wneO1IH6lt=Q`7>rg^fOBp4z~=v5?-)W>FCPHKK+UdgGoq@!dW7<QHPm?d|Ob
zGV8AZZjJqd(29rn@mIEqPd)L-!&1LAWdRuXudL4MCPYgoPzj|+a=+iN>3e3uhu)S%
zW4)m<VEvii4lFO`hB)$J7El<A^gAK~yc2=j1-!%p8;Ugrn}&JqhEuX%)ZkHrb4Xk*
z$(38^`&x3?D-4&5ifW^koIM-sOR%WAw}-DES~)oJodhL>>>`{Hx2Lok0Qp(=DK?cn
z=HgKRYAB6kOCAd}X*f*Y(#pY^Qon|HSL*{Vad*Bvi0?9a4`|SeT!$vFfm9tlUYN_3
zIuY~x#-$@}6Z4fsAW+S%P&bDM(9e%??Y*r8)Q1Ti^_^N~aiAB#TrlZ=wDxjeC(z6y
zH-;$g<<P=o@Z8$_2{>zAdD4tny!OO`mxQ7+j(V~$;VXn0(_VciCPuRMFzu&gLz^n6
zJgYP@^?Iq73Uu0JQwIA?X29g<j;CTu7)}tE`=Pd92H}kp_F3QU0bo3!SM3H5)dmj0
zkA{cP8TW$`bGUlS!-4e!ms=4+m-0AVdDkvBH=|f$6Bjl-S7<i48Nwc(XB3c$NO&`N
zgy_LHJUgVrMi@9hPK?hqvAwYy;klj?fwR6{K@1U(nl#>L%QlkQ0%(ouqK#MM;>>hQ
z!ZMycGgkY~uvLhKWZjUJQ+0iGr>ERhRaopfrk!tWu*}cSCQ64zApHJ&LEA69>w05z
z2UVo~tvJXX=d1xgtj1Q*y49P7A?%%ifBIg8+Z-P2)v)o2Pvh4>`^*Bf?Ak)W?lD@P
z(%S~1%F>EMweLu<R#WL?!wQ1zEjk}++=r5_1GSwTr%LFgc~nmj12V~XBQ+yT5{wsy
z6NFCa!FzOlQ!f`xd4ryeWkGBiv6^Jc>u8$|`~V)K#0ubXeLEJ>3v?MAR-<U*95TMo
z7GhmYR3Q*C3gc`VVD#Z0iPXM})?O&nXM+&rFZp~W^aXTG10)dCyLLRECW|yDp*S8(
zSw$WmuLme@L5>Cyp+&vt$dmj`QB;HEccKWC18T-2{QZk3Q1W`{hZ&K}MT~1)E`dk-
zG>fqxtl*~yh6@FB9HINiHZT*4eQwC41V*3{XQ}|-p*$ei--GrXNsAh`g?XNM>}H^Q
zVz}mWeHOag22+kW(#Z(&p%Zm@?;X*5Y`Fox1o877mWbW0InwrJYtmO#*k$;F%4Xxx
z^*y2p`n`BQzz<_|r$0nV1Zr{76B^%Co5nVHv_U9~R)1%ThIij}P)V41{&dU3t44HA
zm3Sdx?k<M5^=-7mv+ruG#%Ja6+c)nzZ!Z3Jd}!@9n<Tr%o1f0!A9s!}JEtc{7nikm
z<4Ik8OZ8qKA0Hl2y?XvFCX<v9NXF3#P!rtcn^+r33kelCf?>VnI9?Wv*?1j5G*<&6
z%|@$1v38OeESYzS4KqpZ8HgN`RE5G?L=uyFQkhN*w9S?9v<!eL3a$d4bqC<3X}bpW
zt$oh%-PLCmg4IB{oB6gB4B~Rny4-UN=h5F>X#TVOe=eqerQ~l5{eP^c)oP~mzuDIA
zH~*h+`QN^B|DTJ?(|q5b)XOTF-(uTje2<1U+(aj6Hr^BMnHKBI+{1jlRuG$Pqy5X`
zL;TAUp%ilTg<=rU_}l-C{ZIT^sm=?X7nbdO<c&XL?_(EpW_-5p{oo2sKOIntj38Kd
z(;X;Y_u3!Go_p!<Ds-XrJ>e4CxX(d#+>bNIzqg^?V|Ub3XByl~XTXY`l{!RITdmXS
zq0q#CY#+1$%WAex>6|8Qf@Ay8;?ri6!k`u1W8>N6dw7+NEUGuK+M+LE=#GMG?o5a;
zA6Fi6e233G+*B15b?cXN@r}au!5}w+RYBqV;Uc+jv4{7Dl{<q)B$Jn#*Nc;%FFP0S
z-@SW#erW`>nQY=OtzvB<3{XEMVm2F4*yJT!8Bvw{Cx-qU{YS1Zy650HUKj)NSwKNb
ze0EUyw8Ep0Aae5wljh~7U}giGTg%Ql4QS=FU1l<BBWOUK^$f!B$}TsLQwFQ_RL1i+
zbf}<v-u*rsyEn-R5w0V8`cv9T>X7iG#$Yy$U=zZ(Vsr#Y8^aOa$>w=a>4Z`wgzLhh
zh29&fmPRu?uu@X4LQ!Xc?(g0d8{r-&F27CQbz%4BiZ{?$e|vjd7hCaWQ3Ut-Gc1w*
ztncQ%Uax5|wO##YS^G-uf0xJ^>%9L5+F<K(_Ws}FZ}z{h(EexbvXdFK?6T*fd)*J%
zK9t4}|9v%bJbxS3ldsz1cX@JtR(RvjP@szCPl>{!h9jNwNo$Try0#_20Q)xq`?bmc
z_f@6;BKg0&+kTWe|9SM-{wDvw-T(f|&wt(*uLbW{SP7PnnKyy`oHwNi(~h|2nin<i
z{Zi4T%{OUfza?+dD#h#TPJ56IBD3Y(Yi!cuP^8@cRh0R!)SX-fgN8e~=E!iFd$}9s
z?_R%hqBDYg<*<%UHwt|G!hKz;FscYV3@udSPN-0lF?q@Q#{H!5C>feRHx55uZgT2h
z5OXI0EzjAU1S(T62za<6TOScOt;*F=56wFYGdHNjFo4y{VH58<m@Z)NYAj9tb;xg>
z2QD*WPm7R!9e90c4eNOk7F;EMgx-Hbt!${N9J2DjQ#+gNh~Nrcj!g<C*^T(UsvDPm
z7^Af>MzFZ<z(Xn0V}N*@oxyVN@BjjQWtBGrN@gxOKp0{aBdFLfz{1B_m;oqLdKKwm
z0u`D(yc@xi+ya6=9+VI>Mf{SBz5R@XYatKq?WsI^_<3>f`0GdggGRQfNy<9{<0}ZI
zk25kruVu-#E3dhB`9;@W_o^E|yQ6j2{<0QcyyfpS*0A*2m4m$GIcXKHxORM@Xj$F{
zZ2W6R3TePMfq3I>&SX&e@;jcmq5L!!W)6=lmSl6ae7!QyR@wLP`^b`8vEOHsXYk8s
zVqGKSD)6in%qqt0GK%Bt=vc7Nk41Ui_E@u`Zg~uVts^Dd&_OkFeC6xhURrvGR~3bK
zcvV(*Q^_xO!=%py05+Zo@c9edJH-I4Ai#+lt4@L9#A+}N*0=W}#^lqZ+U&r~qEd+=
zZ-6nsZj1P;CjK#KO}%uA!!!3ryYV9>kP}VIE(5%%I%PzotIQ19L=|-gA&Hf9Ik3e%
zSb&Wj@ytgoyONa*>V?c&lLkuOLjI98HTFy1mvUIH#9|;ZLI*9=LznzPV%vIo@vHI~
zHnC8{NO2X`Dalo$=zZwWt7^}LiB%<?7;jPR$~)o(Z{%F@>=dgp1h!L^A+S!_V3oY+
zAAunV6RR?W81H9b2(xhDDN#aKBn-J9SpnTukr%x9J+sJzrG*B<Tic`p9cAs>Jghx%
z>g$jv93NknOj(7GmKHJP`A8}Lhu|X#?_Kecf{!)wQI*Z2E0QLAfaN7DeNersAq1+F
z<Q87ieB>+ARWE(YD=CZvNN?;|xt*N=VFhzW(s@xRuLV5Bv9zC?!M61F>SR7?$Y`#5
z{w-*%ivf7>7zOjI_E|6{?m7C~&#B;1DL3W6oghJ9|1Y5I&5>*z`nqv_pZqDjC<0C0
zM*r-c(ahf&p`-Q$BO%@Pe0N*fhtkIy2J#FBBI0SC9UQPWkK2ZxDHy40k3C@%Y^A~4
z6wkB)tz*d39n3A<V7olh60h_lt35GqU!@i2Ayg>Z-`3;a`@yX1yL;@h?EaDHzNL0=
zvA%cRV9aW%aO|;~)m{Kd9G<QiAf;eByQbJD6~?NQo2Pn`aJDi5VhrpG474>2G*c9?
ztwkgNWZer5>{eMCSoXaUK_u+WvkF!b_I_~Xgx+*GswC=_tcuV;RdEkhN{V}4@@~aF
z4&bwi`=s*~#68E-8pQo+aMj4nZnbtitL|<wz}BCnrNt9%t_peGD7{*cNNFw-DUTOO
zltlO~hS5$-wASuIiPm1&znJ$NfH$ra&G2v*WeCn*d53&k3NKwQ9~`s+kT%-Co;<4G
zh4`=--SCaAs^yN-bXj6emXIdaq`W+_CifvxtjTAUDb{2yQpK9AO|DoIEz5T~K_)H(
zqP%(g;w>9-j*l6hAY2n8quAO4J!HA{P;7k=pyo3C+pNI9&GqxIa5uhq4g2Ja%NLzD
zN0+Z^)K?r%oDc1@@^o`$6Q4w1!N$7y=&DhT=bI}#DR2iI76!pI&5(<jG5Q!ztMVe&
zDaneW=-o<E8pCH}I;Hm&<fuTDS&U+M3~@d8=;D$l-WvAXil8-Y&Ypl8U_UX0HctgQ
z--&n2meh5$-BH00Y@AB(n+kr4CnM%@NOZDt7#n$G_P2}wcYX=D3xnC!5M~-a7xss8
zsDt5IZR$K$$V!Wa3*d$|ICO+=&zpn+G`xz42$zu&VFe)(-l>#`GnMUp8S*031;)}R
z2^ooDyq2+GrK-l3=h1rj5>zupVp%>^bbqK>-G44B?Ku}M;=&bHDFpqqF<sL6D(-Kk
zZcMC|<DkkqZcQx7ZBVbWZsd*2szy}ujx{3~z~@tp;?7sN|3OH)U!J@0#tF|Y&va9~
z^jV$fpjuU)gDNF?j+eY!p2GorHl9m5U!LbUlJ1x1E}iij<eU@sR#;n|B^Fnw{OanI
zSYB6VKIgNpvd%h|S?6=Cvq$S$XpsSzX1^s?Tc`YT>)gkB>wMM)*ICDk>wJzS_h?OP
zuCwH#>nySAI^~yL=RVe5=d&)n&N^0J=W{H*N9$R80rcUL`ra_jdTf#R99Al?@(q{d
zy(%i-@J?B8p6Pl*ea<sh6joi?8?NX03fO3<onFPM!(yW?#xhRbJs{7Q*llBD6^m|T
z;i0*1x2@_;m8$+!ntSco{!895c8>KIPuG{Q_Lg>9Qhru)ol#Hlb?_}~3Hb9W-RS@l
z7XY1R6d#GRl*mtr2k~PEqrd6QM3Ihwqc;pj``snb&{_fu$S$j3!rW>(XGngiT(7Uk
z`ZQe9`ZQd^{4^|Ye;VG00c!YJEl|UCn4pHA!v-~6kD-0Iq?vuVgq?j@-q3#ctVd(`
zoCf#dIxOzP&tY<32kUv52R?GE9yW?<m9LZVYGrSe#HH7L*pnPKE)@(;<NcMcdBcKP
zn?&x^+86!RV$kX<2k=Pnch@mIU^QQtcT8o+7_3#54|%Q9&M~6!y1CHV{LI|9o;Wcv
ze(svfpvLNfVrJdSJcf0uav0VryUg;Ucfeg=27kqDw(2s=@l?QN_U0GEXJIfKW3~um
zjIA=d9W_wdod+wGb>|Uf?;+%Bd~KJ;^!Z$Rl<rr!8!Yg)Ok9;E{dus*HRyT+KY)iz
z@FG>L%8OK~BrnRc_sENUOrMPx)$W()MTxij<HeWQ<2tsyuIDec<%N2cx4clN>{`r=
z-XkB<fIi>;H|c$ag_$Gm{+RF>I$V(lZ-y>Kh>Gtwc@aBv&dRc(7mceQMN*}*dtXsj
z4WrNvf+gdnEgO@i76(c|aO$gvWf7xVH>*&*gk?Em==MGs`QhQtmNy=hX9*}=oh6`B
zX_nw+zZjMPaMr;Rd{ln`mKaj-m4P?*WboYbyuoW#XAfSlG>62+zZfRr5Uqnx;<0^E
ztfJqa#gxyKZiG}DM7&FG3#D((N-Td@jZs8Tm~FZwujn<(D?G$ZSzf@$CQkUyS&i&E
znEB`b8OmeD1(7J%Mfb?NLO`loE}_=KP?LmhFX~O^2eqO&o!=ca>nwZ9c!i7Yv+`z9
zULkgRK>Qq*JEXi?x91d^l*GBPZnS4L=TLlX*6VBwzV9pF>nPn&m)_aId&cykP_5qC
zxtlZ1t16CGW&&UegU^ot3PHRuv$$AaSn`!Q4otRG`T#0|>BJr9;2<uF6f_dODQZwt
zNpU>Dab$ODc@b4|({UP11QdkNe^BPgJMg|5FTPk7yH*rQj}66phCL}TqB2%T{}j4x
z>ECWB&L*t2neS^-uY9b2rTF@-DWTziKqAAk`Tr(i&<Dnfc7AflAP4;Ce$e~y&KbM@
zUjePIUz7>3F#n(3+HKi(D*s=*ZS8)`|M#`#|7$+|mj93B3;a7dKRWF^KYe@r*Ur16
zvy;<I5<qjA3aJ3IYenaD7-b@Hcg~ZCq(XKu_W+bUAe~NqcrG6vZg6E4NInrlIpgHd
z{>c}!h;?!CcJ>dN4a&kKA3NNu6gAv!RBXjsNzsfZ(+~vmpfhD7XY5?Lou1PhQm!8F
z-)__yx;`i+wNz19zf(`Tb=vjGQ%(Q@maQ(VL7)hH+E9Wp9GxG(>UgauyPdb^ojLs3
zZEx^3In^SN$boI{sdWqhxfcKptWGU^(*oJ`thkD;R<R%LmZ{Uy>QD_)jk~m#htb-t
z4C5oqt_)*RN2ZzAnF8o6G?lnkL|RDlwN~F_qmFOYwyb&sKG^a@3qJ^z1sY_2GQbA&
zZS_k66W91Y&fM@;>7rCsp+_PtFtQ>>)SAnpKWWHb)R1jx$Tl?8TGUX>&`@e<Faua-
zy1`(d86Vr`$1T$ZA7wgIjy4mdwWA(+B|G#dJ#{yMx`t=A;aQ7M@(8`0dn#Lvs6+nc
z8IIK()Oc?P2N$SZ;>UWtc>zfuxjd?JZ-NrONBi}E^W1U#tK<8uiy9EUfCV88Mj*(p
z{GjXjY86-)y0=sAb52TDN8+Xfg3}GBz%#HB$Lj;WjXQEl*#zWm!FZdTBH5QVy|m3Q
zTlBIeH!&zJ)ban69e)KdH~EZ611n$JhL^T{*)qJ;z+3<jEkJ^QKT#gQSH$w9j0jW9
zJFc{nt_lpt``JLt+9vzSyM|TMVIwwYAK27Ghxrfic??{N0kE0Scqy`;$Pxl;WyJmo
zT7+CGi6!7cvUXzATt~ni7&5kiCE{Vm1Sp(4K-q$&cS^TF`~aTv&@Ryg6dH8g?*mlh
z2pWm7Ssa)STO<}zT;BVrdIc1^EZU~4CR$$+%zjwu`w`wUxpqP?m{Fq%2z&T-i$FNF
zsNqvvVdbciybs+gP>lfBzI*NZ*^W?s=>y}dih)vHTvLb<i>Lw3Y2Zy!o(Gujl5do2
zl*IAc#jULOZ+?3LPzwr6*LVAMNlLVu!IV9o%#>LVqQ{{6m^2?n@xkTfuf3NJcBqti
z4LaNWaD!x+i&lKc%v-PASqO;ndNr!?fPH8%@o5x)8jDYehR$>tx=tUrwpLxswW(lk
zKLI)h^^jdkb&Dj{$s7hcCJZ4V@zJR}{UO3YK`5*U!ITL~aCO2g$X6pa>ft=-0kXp>
zS)?bpw~N@(5As@)KO}bJjeo%6;~pn{#Gc2c!Xa=Ue8qzgAZ1L`Q+wyM#o1{a^zzUR
zt}%iqG>@B(Fu)DID;mdHJPJ8HL!^8#o?@V*5spN_1)LGOSjsa^tYkKgl1aoHccumw
z-sbxY-h<L5btcCGE6Xiy0zjz44B=zRE{3>Z;R3~_nQ_UBrEY^gz~T?E`~z6Q#PURk
zAu(?pvRK{87}hu!pfycOr1%pTw2FC$u@8Ok8dyzoI82?IY+!`nM8K<<-|FmP+{0KY
z5VK)jm}#JC*oQ&WT64y#61=2l2=$)GZ=+CaMLr&@kH`E9+NWo7y?#CJ0jeld*9Ue5
zk~3m6SobmCD++cb-UJgMOnXC_`fuuvr*hBt{qgs*CCL`R#IJ%IU=0vc<DNLHfmuK9
z-6F>gy?);%h7W1iJcbSe6drWhnOgD^n6Pr})CVFV3fPVN1KrP?Fqj~~k}bQ*YP?Jz
zrSS+QsBFre!;J9g0+;XK*7==1u8VU?@`10Tcr#JjOrZg9pUbU!4OZ_hBv5_l16|~U
z-V}4Hti8`Z)Du>L#^j5wq|9G{T%u~;!u$akRyI$nr4H*jfz`LSd|>SzfZE7h@a%(V
zNN)kqA!dhSC8mpH=$itwN|`>&KxNna2U;7+paIt}g7FViz<AGf`<z!CSd6@BY#{LM
zWaj51L3VX`AQV!bCxR0MhFg!3IL09)h#tp?9xp=lSVc6~?0!=kHM0UBr=}tBA<nHW
z6fw|y-YbGerzs*7l(<7ch=xFThLUthd1Jfo^u|Rs3z;3JW27)Y&Qs<(G$pA!nA<G4
z0flKe8-Jkqt}TgVmHZ=xfm&PG`%WCH?^NR*_?KmbbYsQ5G-?=|01bK+&l;@_(2z%k
zv>yQ?V|;<UOVnT_z$hmFC?S6y1B?^~$glu!+ROwns@h1W^hjZXoL2->(%pvq9q%iC
zOXM+|jX|)1)(bSf0htz%LG8vFlc8=FxjcIf$kg!4<yvizR7|k}LnZ-X462Io{Y$n~
zv9pD~>_cl)_v)5QV^Ai2=LRIGc5nK{$@BL=buQi<ot}2yoxgp7M`C1FyP$q)GxJoK
zE$D%*8g=MN%V^sWP@g!Ve=Ds_sI3c)B@94a1x2e1x)=_fFG08~;3{p`^@AIpkQ`Sl
z<j>g%2bFDOTlV$N4TsJo!sX*8nEAN3{^0T@FCq&k9XNyD(}yMO!{&H%^y81R?pZ*m
z(5P}l2e8K7LdWA<*-0a{fr*CHK^L%nJ3-cit;YEF#<`X4$#DV6lHs|7f!Fh3&D!FZ
z+f$d2v1cea+7`m3mT;;w_v*C|jnsotV_dKAx1Z>Tea|-5q89<fyFOxE(L_FU?oeCm
zR92Uy$|I4!1wYIJLB{uBATHy%5TJhAV7H#@gZc<cG?tAkK39s7#eC?6{X|P<HE-M9
zZlEpzOwbz!z)m<HiC>&?Mmv_x;%*M7r&QeKatXTz5fxtFT97V8yNFPhg}npo2Gb$#
z8X}XXO&$Fm)q4O46fA$%cPY0OSK79e?P%O4xg&wNn^RZyxEa`@;c#Gn07N%I=wti^
zn7%R41SFqZr1=vpby+>x!r&%_ZxFf&%m}*kCOpsX6Y&%$JFq=WE`EsM2f>61mV=D~
zh*?q-ap#N7J_6R%_cOs@FQW!Q$qw|LELvDqt}<NZ>u{Y98M)3Lu-gAui#dG@Zwp!m
z?JDs59a8?J-c<W=$h+cWSC)DJbwG;0q(sQm*8$?X#eB7w)Z03gqFo;)Ab_M81$pNh
zH@XQCIe`}e$$Yth50fB@5T8k-@N+Z7L#z+3JCRy9#^X@U4WNvG%tX2=%(1PgytNO1
z5raLTLjwL~BR|fupu(0SLpE^<rN>*=R?=X7C&B(^?ZXa#{1E6+7qdEVov+o1C_5i0
z2C?`eTSblI`;=9M4^wbu=#KDsN}}U?U5%5H#2)v}k_7sxo!X7lRy~#|<CH|9;QRv*
zTEMyJdk!%=Od06?N|;hXgw9~1Iiq04H(q>2r$8jMPvcqvE(-}L5{?F!K3=r9kx-k7
zHl;OvUgNv&opHB=O?FTR#XUD#hLs4>0#AI=1-AM)L*GoC0&aNipxtc`S5*(!>nU0Y
z5Hy$?z$~ylJJXCp0*VJE4j$rJ;B{OM@#jtBu3y5)JeXz<Y$qGo0l<i+z5s1<(8cbX
z=ZKcmmeidARnS74>&=`;t7tRooK_LV%@WX791A6{r1j{r{baZO450_Uv$h|xH_t^~
zP49UHV{RZ<C=14n^m<$}5_v3*MvAyc`;ZXyL&qNw2JCH>T#*a#_4!+9AJquZE2gAq
zA;oZ;27;4&h$s>t4VWm$6-^}3p8k$@#Si;@4k35JR{#u8bDgm3fi4+((8E<2+)Rht
zsLxI?)uJaz7rCg;aoIiyfqw?GcpZeNzBfYU7nLa9!XBzdUA&Toz2K~%$7msS*nn#n
zbHHy5pRheNaF<0X`NmT(<m(d0rh+$I>S+KsTpXkJdliTUMc`i8=7O6DjSV2KV@(^;
z1Z{x|7mVm2gO3L5q9ElqL$t>B`SQ%|eSBT!z;O$RJ4Oc|A#6yZ<3ThZP_kZ)d)WmC
z2Gzkc5;TI2#I`M?T4k%nJ@L~KSnh;YwUx9OFJW3?*q2cgwM60C%&KaeA2RD53|UGf
zX|19$k$;)e0Vo$A3@tv}NcK(f<WrlsbDOWVHshHI>xjK+Z*tyU&<(Qs(fQfyv!B>=
zly?nc5(*E*XQ!fb;tC7v>J48*rZ<5~ys@TeBzwtP3>HFV8g)aALMR|fhp2(X1R5WX
z%j*|%`R)b&Y&4F(M9o4>8Q{citsxYZL<5WhMlK_K%ZAnYd_tiiEtn3jj|22>f%T*}
z#Cbl8<n|HwQqaZsTh{lQ)#oo`sl1HMipw4pLXAb+Z@~Vmx3jrrZSHLGCV<V9`B5A1
zjJd9oVPhm8sU!yn*(q}ofP4cvHVzVs2noc*KBEWqxJSqsK8_W9y&iLX^c?g*@##DR
z5rXFxj)5OYz9lP<P6yU^rVbMOgF8VxtysZNt({Xl=@M0$z1Rtgde1R2+$1|%=s?I~
z?D7No!=W|H^#={Ub;5qcK6oh4h!}~^WOE&KqX5Pj8y5M*6MJ$xI6$o&cJevGk)@V!
zH2=i2Q!j!ua)dK_q!_f+p7syWz#+HViAf0$XLvCdAD0NpFBc428q^B>918S;|DU}#
zZ*Sw)6^HNN`V`FRb6iQ5ZOzD8YFqA4c9cY?-mWBfxouxPBWY|;tnF%vlhfvY_7?#6
z*))=!r9E~VTbdz35(GgI009sTfWhnefl2w++|Qt<vr{1vKm#yGBQ494`ZWCwD#AK#
z52E5efA%7F6tDzuymvl7ZUW;ab7zH<Vaf=*z9Jt*HER#jjUS||)qZN8;?;OhpFb<{
zB84zw#5^b0RZZ@|-f|nIDZ#f5(5UbQfQN?CqJS-IRq^*fjmP`;>|nKORi%=WZ%I8l
z{O3>|wGz42(BxJ_&aH-uTM-C@TMdC*4R?kSZbfbaF`nDPxGVfr$jc2*BCUzv3qWqP
z?7OLEU9{g?0(!gNYIj-}_2b5?#xWPoDZy6bf9fq^=38D+>WXg@_Fw4AQXrilq+3Y>
zkYN@ervk_*0mvu|kh6m2dXKjixC@6oj)+ubXP2$|%g&FrW}|j=QqSKO5?>9IUyY1_
ziBc<zUL*V5VMJZlIga(vFq|{+RJz<#MBmw>s=o%K(s(id9m+OF0Dmu5+a}IJ0>m~u
zU(8k6B%3zL9ty;ET$@9)O)aL-4k<Nr{?#0qATT^s5<ghWqxhDEtftO><P&`V)~N@d
z%$4U;`SPsh%%jHHPHS}NeZcKWx~KH_s0stl=?A2rpu?t?K2*qA!^LM`)q&nE$aZ}d
zy!UZS^k=<k(3R0?W-Lz%d{lMdQdvwvZ1!h`ZR_6RXY<KParau^69@V8lf<0>;Gu0$
z+43^E1sR?^<_PuZG*d_5QQM=nhh@fK!dCa`P?rKU&Y7v?l+p*zeYEw?`li<EV6w`S
zdZ%&5@3uA0&pOBdI<38Y3GGV{dj24P{I622bW}PnwF`_LLpP8AwM`L@^YpkNy{Nf^
z(5uGJr*)Qx0|tm|c(%9=Kn8^61*_}(LM}Eo7PO$TTr)l&8J~}h&uw+_k{rFTGaWcD
zkf)oVDB+QQ@>A`Xmb^(ToLgz;ri+odKz97E4c2>GVzmwQeLvb@-4Z*7M<v$YU^~Tm
zamPNFp;Xv=D^TE-x}}~eChcb!Vp{hPpk$l>cewUY+BB3l4W%uIGPj3;ch$Q4^oK@6
zA;ac{eHtI;V$S#Q>uMv*HhD_qurE5(U=&8O%+6f<-Ns{55bojL9wF&%iRMd;i*)#D
z#IE~dJl@ZdE+Va78!&jZPk(5w0BUPklU5I*mIKnx{p?4}OOx)kqi-f@Z;JS9k1a-c
z949@Zuujj>A%Hu3&Pr^18$CK1RqOSOy*>-q$#*-`68wYH349FTV}J)0uCGK>L*DDa
zP6vg&PJn+4@<||`1O;+L)$zEo)3Ak+BMR(t()E;_7Uy?k**lhZ-@Bo@LdSqt34!Bv
z5t(Sx-ea>nk84ha@)|dGKpJW%v49xiRk3%4S-O#=x(lw)W$yc3p=9NSCWH5cJ(d}L
z9OGCK>I}vn*M4{~0<T=c{~h?hz>m6!IS*A$3%tG_d#il4%R-@RLNOPFV1t;@U93IB
z+f_HB_p*KSy=qVIYv%jVHQ1woUz~4;{bzM;x`B*Gf;L_^Hk<>(F#>2<K=o>b>UmQ?
zZ_4Lt6`%<$d|&m%Xxu7@iVLe`God7J1mN&{vEs6g1?&5iMoDJeV-5$?Tm?)s8H~RQ
z7(WS2PTV+#^TPR6Q@UXSJmY58%lfO@<w?79c6oYKZ+6aKb(;0pjaIwfY?X3h7f$1X
zh9QxkfgnEhximjqpc2?;4vM?GIpt@%n-$op0W9BJOsD>6KKLN~XHdD@^G0Zn>7tq!
z7-iu1`4%Q_;&jN{RcHY&PHK&_P94V9Ic=QfooA&8HVQlL&S+XFS_DAJA*9oNK2<<M
zMo$LgCw6h2P@;CyZos$nLO-^CR%T6YPjH@T?^*e_<s)mmX5W41DqEIu0+Y^K?@W?G
zR!FL|r9;8$qL0d+MV2srbg9^QVsO~HzDAlFjYnIi@^<3QZ^))K`!K!b>Q|bW5G9A<
z?HSW(W+JA#_A8aOL9O5E_<G9kw1*~Bu1sCP(XvaJ-6bz|LND=E4t#%k^=7nsbCv3T
zw7RdV$JNKZF~G&+4Gt_Wr@LK>j46FY$spBGN|A9k*9GY-km0e%k6b-vwO&s*nI=tQ
z03#@zR~2BZ&@$r$_ZW%=u0S7ZpD_%LA#YTrwb7k|lz={}fCwS9DjXG2TvZ4H;k9gk
zb?mRM{k39rRkgC42apH{D`SmrMR$`#(@)dW0tfy_PWX@L9=W<lQaTnmi^@X^Bnl3x
z2LY75mi7~&Yyh{I&BH-d@R^=otJQq5n`vT25B_`M^_O8Qtp^C^e*uQ>#bLgy4<NDn
zh!t>jqHuJgdk06Qlk7M`Fb8(4B4W%#_3#pwe{hYv;RAGav3_4|4NvTI#VVGL%{Zlo
zMgU_{1Y<r2qOri!IX3iUKAeOH58vL3E6MiIOZFh7j4h83aB$X_e59u{xmgZCZ0KQ_
zFYcg4m)uC9!T5DI?n}MRE=O16>3lH)j1E4a2y*93fCJ#e*S#Lu;(OAsO9^}+4&2=Y
zy&Hb>YdrN$=OU@KA8Nm!yfFWwv0B_tGp`4|I;|C`wJR?eh^uke=#4N2apz`yM@~#Q
zDar+Y5)8<lQ1)m$lMYO1z>-WNI5)zVCQb>Yg%~I()GPGCP_Q5bOT|JUm5f6~oJesV
zY-OCVj18@)h|O30$Qw*<fC!Bz(=bt?n8mx_EAiq}lPkusZVBv-H@G^?)=jHUTvz$?
zaCiCB6}oJtx-+}%4Pp=4F(>N-x`^20@tF>E&BKxfMjzEh43D@2c@n7j<GC*I)g8}y
z@RG-~`IkMLn@8)f^t4K+wo2q^ZJ+Aye^fU-XCIG&kH<iL4E*$mz<ZxnRyn79?lVea
z>-Ro=ta9?OB>4wAQmk~G$ku_!1Ap9sf7O#x+$qUEA(6ajyo)&*k--m@ksw-QUA&Dj
zoRl)~9rT@|kfuE7E`1byC!b}>>y{s4T(XAh)rL9JhdsG$zf)qgxBU)}F5B<Y4ln%V
z0wu-Sh1aJUl@GI?Ut*A^qTm`xlh3*(ExSsj$!|MFXt}r}Yw1bch@76;W)+USK=1pX
z{eY#8h>-)!WICP$J#tSY^MG&dhvJiz&P9A^Jl4k_cw`)1;a>1^3`Y~6KP1-@v2k7n
z(;4m4;yX9AE(2;pli3pmArN|E-*2dijejkI3&y^ja-S-^z(qY~lJGqK9hN-*P>{uh
z#aI>N-%DNbL6jq3MgsmZZM@0dMo($Kzj)v2kMBnF@oqW-?C^={jY`K5ZmrgGVJzrF
z_J@*+VhiB4FG{P1rk%ubQrwQXsU6;bZUMH~K$2&eUT2zQRdV?}i=uj};lan1bOg(d
z@hK{U<To0;gv?e{BCx26Ah~R;#v!?QbH(CjSb$Gx`!^c3)dmr#_lUwFMcaT-jRXGj
z6TZ=jG))0pUsv$;C_vQBTXg8#lQ(U67(8@*jhxvnHHA9!TbL=u9o<7E5|8d&ExGLY
zY)p|jL&`bMaQY4%y_sBUQ2-V<(`_7DFWMBEf2HHF233-c-}B5K?ojDLj_oF1N{OfT
z2khs{CSNHi!kOU@I^^rb21|rM2_f7gTWuhBD?@#ts35QDP%03xhS;?-*7ztqYK3YT
z-7MDDLHH|Z0sL4k0CYr7m+<*mw=+?+CiTe>2qO5Rd=t<?fm|xCrGiLtN=YG$-&?7Y
zj!rrFZi(K?7T0br8^Yp)UxejIbKMGde^A{ONKEMX7uSU-w{rA$5|@xYyfH}kNRW6%
zxD4f%#De(~dq|t8t|*5Gy6FfdhW!+kEN<j!#a6wnx+a#QDmu@Iv6!dMXGV=4Zbn{|
zg(}n`zifZ~{Db2|gEnq><ORPMcmuj${F$|%0+!k<!;4oJ<(py=le~Rtjp?WA^h!Fz
z=pwOIY(iTUQ4N`!IJPIO^zy`6k|mVx%-F)M^5WcC>Y18`2OOZW<zyRK{3VrT>|E==
z46R!zT@}}S$*)r*?Gh>4@HH#4-u?=W#ZKQkIyVf-0vq2Dk2yX2WQN*I6)+?JM4U+r
z8yoyHptKOWAj7qvcopDk5KQtD)Y9b@yu6xf9o11(L7bcA1TkqHtR4Qr>P&iHum`$F
zpQ10T<0Y;aF`bB_k9Q?oUmmP0vqg7>u7uo9P*a~(j~jPWFV;b871xynJ(O8ES@R?^
z(LA8q$?#l>S2W3&;!wY&z;Zto!yn}<(jz^3tt5G^++TmRy3feNX`CLNMIN0+{({aT
z|H)1YYaSUz?QU5faV+%0AyQhDpp27HCZjLJ1DzZDi%Fc{$|v;}%BEs!8V9}%`lZi#
za7{qkqwC6F&~*h4-S1LeS43gyc8?`f9Per2JbJIhd#~{Fy*0g8xX>Os^XwGacrp)$
zq&$6Hr@Xi_;vfY*q5N9(l``e3I7B=|pvYcz8oZ~Vk|+XD*n~ZHj3}$YyCb^L|0jbl
z`1=Tdi_nieKER(88{kj6?f)m<GqICpeLyQ5zcn-U|927aK+6<HpvL1307HS}6P4xw
zIVcX6`QGe<4Kg80z)BHljPEbKxp(SKCcX&tGZcSE^3aG#UtB|5+Mhu^qO+tKheY6Y
zg*;j%3%&(eW!PFhy+C4l#i*8uqzb+&-^^BC35)4AOj%y7vXF2Yhdqt6lPLC@Cid`p
z3TxfrwDwbwXq7N=*-A%dsGi1YD;=rf_H@1GO7(nQ&tIXQ#&s(m*)fX(Mn`Ft`8Q^M
z39A^aL$Z-tBL1*1Pl{$q<9YN*X%=io3Wg5s5me!Gbs`nv%g@i+g7-^_e+<L&ptJdc
z)T$~(Y%oe56p@}NEJH1_cZnBArI?VCB7F4tZYE<`!I6G`Bfs><gPF1KiJc}94rYk%
z`?czpQ?i~M8BfMnS8(2`Vd7)+!4c)rj8~q0Ier?Q9Uaw(^Ve8D$~$5-4=s<<*ehUk
zZ|^hA_=8NhHP@ku4$X3Cj`dNKFA6qS43XI!g5nFSS5nU}>8px4kHr@XF9BoNw+kJa
zg^uh(S@gOI+n~IHE8-FJlM<qB7XyJ#539TEdzQxsJG+>6YnSb@ax`8$X1?(Ionem>
zze2T2F^2IVD4Z*dN^3kW6pEn-OB!)SBW^+?x;LA7YH&+Gp#bZl#@$sKcSPe(`UstH
zBThmioXfKb#(;eROq6msoww_(g<*ze5-pj2MC23Txe~@xiV=mu8tqhDEdPIO^XWes
zWmr9Q%H=YuG~al$JAW`JB#rI1$0S{`4fcQe$?aBL4jGwju>%9?@P$B`w00s{jrNJw
zg<`@4L2bD7*l5sE`H!~L54+&mLs<og&qTCLcR&dob$Oj!0z-@2NjNMT<GG3K2tYc8
z#3jbe!eBo<usr45q69)DR=+x}$;FvS>ycpTJdXzId9Hnan855rl;L#Da|Xc_QJ^xK
zOTg7I=uOAbgr!FArF{G{69_Ko#yzgojsYLd+a$Ah<oU!Dbz&tu!xYaOECRiMONDu6
z*e)go6j37kD!UZrYh5}b5k>pNF<Lb9W$-=`(#AZ1H)TD*+#de>IqL2cW8%UThFdn{
z_Y=j4L^h%NL`q^f9tI4?`2Kq_2?r@zMKm@1K2I6SjxcRSiNesAu&fRbFEX!TR5~*x
zt<^oB(%?803a-FZ^Z_5l%f6YVO~i`jVUp~C$08GgNN010(=uL6+3g53GY%XhTC8q(
zwz$5Qnf`Q#3HqLjJ}X*J07mR;1jqjc#@J_hfBSkH$+0%-PeJ@pGe|4!dV#B_q&qO4
znxmscdLlCIOayNY<*1I!D=8wP)2w%BQYr6-#gdEMxCmI5QLmj+p5<XEThWYd#o@F_
z2{ef!i?LZoFjt6NL~%JRL+xv0FmQAppOpnpwY(3%5A+U(UpK3f0YxL>Cyw7IFT^|_
zZ7?e20bm4P$=F7XWR<#WZ(Z!#Tlc_}8t}npbu=xg3V#^lmNBks;J`o*!u^70n^hYx
zMtu{O@sVc};ej=AiLxZN2ouIxkjh$wK_Pi05K41{iFgJdY|s=Rq|akUR_tv<ky@xj
zF>)=+bsJo~{WT~GS#MK_m0|hk_k9DH+;8(xz4mMQcb!m7bnqG@SmHqVsJxl#pHWU9
zP><oHatv(b*ex-3Tq(r{=0Dzr0#D)JGc5Exq0kE`@Et-UkdE1FF79qid(|=eHAfyg
zaSz?Nhb9R|-|og$c^dcdS=__tslZ>vRrxM<I5^12L&u&l>&!nbQ+lD<Hq4><A|I4s
zA+b#(18lUev!t$*QrBHl*G;KgSyH!R4|rPMRwQf>XX+%raTDKE>^JsO5D8>v>>t4h
zgsgw4@avJqX`L;P)Z#q)^{p#b=aE>QN5I?@7vvFNkOr!h;;9CUqUKmV)co%jdFSnc
zlDy%|W_5C##Ozd*<<@EE`26&=e%8+aqYJBiL}z!w)kJcd%H-$We^{L|oHy7}2l~}6
z+pmpYyU=49J(xH8x_)%gIX|nn+VzY44+Zv{Rkc5c)5~um{*c&P;7SwZyNSKd0t!>y
z?g^33n+=uH95a0a|C>@SHi}GHdHN&{rnC|6?Wws0-nt1~_=ipBS;6APF|}205Argp
zHBa#lM=Z>v&HS!_*|cDOWIIK6gk5|JC!3u}8-j~SYb>$;e0dW(X!Y)QATi>snd&>S
z^_|T1-Pro>lKNy)g(bGhU;w)aAWqeYbQyzd0f)zPFPAv=9uoyGEA7bAyID%Rvh<53
zT?q}zd7vRVD-G%CN<;c?wIR6=G$fQSRv6Q>OvTlpobqa;syxuBDr}{hdcNYQoTT2w
z@IjR0S7oRldAhqiSv?E6+)}o96xvB*_Js2CBF^$6?(!m)<wbUv7kRq8$g|}|p4-9)
zBQavL$|kqPU6a}GhYHn1q1ZD@2xP*XXW8;Uh6E#_M$;)gx#&TfrvwSb6iyDfx*4RD
z&14y6B_U%)niAMuj@ksgnwZIbgDHQnVwyG4Z<t%rtqZ-*tZwV~WL9_==X*zQY(i<0
z$X_P*BpkfUMt*q9s%GJ!=|n0z*($n`if*Qgf<<|Bek+M#0vgJRrFM-{PGYGSMk#yM
zLoeyLy$mstPJ}8tDHWZxif&3pH*O-GbtlqUcOsqpO{9})BA;fO$nTOT(ph&RopmSD
zx!**(>rSM*?nJuxn@Bg)L_W(jkxn^zCf#*s(p`5Z-TTdCW!;&qtUHsH`^{t}(@Z|k
zHj_^BOjg#N$;!GjSy^Q!V^yhIj`^4~bF#ifA2zbaX^|$JhVW(<ueJ^=tPk)LX|6Xv
zz1|x7em;`vttnkH1+%`V_BhT0`RTRDs3d-|MC+TbwoTW&uBkT-Z}6X6VfcHRLa-E)
ze!qlz&`DJf?nm{1uRo6h(D_^nz`vP#jTF(){~s(h<L^?Hz_P2=GH60fy;_Fi%N1+u
zXf~XsnvIzC#bOCMQG4+^gZkoi2K~kB3<`|b88jHLGpI0LXV781&ZNZn0xAqAtit%x
zI*k35iAUh?0hAPB9ffZ6QAhEpqqvV4ZT|h3;&SJ%;BqHi;oRk}aF6~{U#q`Vil>zO
zg*~O*sHaq%tCZ~|rG2K{%sx{O;4S5TW^XC?zTQ&qr}CC^Kb5zX`-RTC!fPsv)70_v
zW!_M6&QNi_P^-H_#d$)-IYPzxK|OjZak7c=a!>SKT3Xg!p=@PY*+-A3$l1YN?(uYl
z$5uRxR=8m@XCz6_uI9Ms&jV;l#86j&_^Fg7Zj!QO`M49*CvKAZBr`-SSclY#kl#l{
zFJh8Jii#z{^JtyfUZO?0FGMjS?Mn0@eI$ra@xwzvEOU(0F8XkNB#6)O!$bA4!d;^u
zu#Xk)M=gl$VR%KlmMp|6ST=Q8L}ixf*Z5+yTqx{+)#|mBzB|tfe;r+1CWoQ~sd!y|
zoclGp6oq&3HPv&=@#)@6LAOLh2mdO6VJ+P{E-#feU0yzwdN0{Zp5z5n`O-?j>>f4l
zx*j!^ufs8>vMzyD?nhuKM`UxU$wFckvzVLa){#hLpWpSSvM!NTRwA-A#q`(i<>7uQ
zFOM&!_1e9khsOi^aC|YP$nRgD(!*J*^w6(!pIzJWQ2Guh<rMR^YCJxL&g1@WAZwk4
zm1{^=4Zx?+d_2r~*rp`*=s@y?9Z1}VbRcoRLI)C?1wXJ8iTjD1NdA&X(}z4Le*VK<
zvU^6NcZRdNcZQQGP!TSM*$TmkC=iiTObI|_3f7iQUR&rL60My$(fSYbK#<nCXegs_
z7^8==%bNytkn5dKWj=R4mHFKH(&qE^4-`)7TQ~J><<Y$TXx_e;dHV~T@|;yn+xua+
zl-ys@#O-`FCT{26ChpH+kbZdc@xvQ?AKo<k`3$MfqapP_%8+VFq3fPTov+(``Uy>^
zA@tqfWcuaLqia|^S3i$B$!4YX52V?RhR%c9{wnvwSVl`+B?V>f!(4VQXFS7y!Q087
z!q)Ufj)=>Sg=<@qzNB^N%bXdO-SGXZ9}t|s%K_mYwwq6XKv+jqb~1~~v7*(ZY2{%|
zE6zijR-FH&ySdIMFsrO9!ajfmn@td1Q^L$_t<ZGAZ_7C;rV4EoIMA2$LIw(D0tlxq
z$gQQ|kJuO#eCUCdaUs>vkznc|Um_d|H*&;FajVK`%Ve>8>qqScj`wMK;saXTfe%v$
zc<GJ%K5U|+Ip0Y!70FY_W!+$|?G`qmQ=qc3Oy^+WvRZw?pq4kAEr$L~<&5CBc0G3E
z4X#9f8aW>`%!y=Nz7-9CoH?4ZyfU=(7Sk#0^@9&=w?G{vO7Z85>4;J*nP!zpwytpY
zP@NU*aBq+Lq@TQjuO{9xPH;XZpP3|{e`1HKRaRyj8=;Lit=d7Ay(p|y<q4qn&p%Vf
zf;|`MP^-8ql38#oi%EycY)45}otOg7vI6e10+sN7Y2GxSO>P;}*Lc16g}hTr#@J4H
zqc>r}9(hZ!XzlL+=t2>GrB~S174kFGL}5-K{vp(DeLk=rBWJ;ll7P`^VHaDq6^?&|
zh@xfrsnwjYfuLn8h>ki=s52VRkz}{104x`iamYpCRiHWP(40bQ=Vaz!jO1NL)Cj>h
zV--hMaT2RY$r@30On;UXbr7SaWBOBA4YTfpVz#mzv)QoZu8!EsiimZyBeoKcSf#2=
z9`t^sN{v^EEaZ>Yx8t%*(M!L83V?H!_-7Y76xM~CtCkk+YD+eKy{<%2ONqvqv-Am$
zT?X+}u<`^fK|e@`$sj?3NJlLQ2Q3VN;SE^N!|)g+UflU0T)d;)xmUq-hJs6f(ep`6
zz!)6E#enDARmsAR%i)uESZpwpV<&g`u==8uUD7C7Pbi~svh4B6dUbLW?v?q3_ewqa
zg1u@+de!{wy;8E0>XrF~_ewqCy^5@NbAnk@=s9IdQFiU2h|Eib<+&I?`WTR*GiVq_
zj4eg1Ly~2-kgpw<3!&G4M8acmpDDS{ZoE-{z*pG7!xg6wS?IFGMfXtlK7It);wX;5
zw`q=f5zfg%1i-p6Mbi1F7iEbN4$8S_H?E{T)vQN53G$lvo&NZ4G#~Fq#pDRv^~$Ag
zxzxkoDMcyMx_Q`-l>Hd`u2R&eO`-`t+VfSKLn5B_y!l)bFDKS$Rl8!OmwzM@cjmRS
ze4Qk(kvK(77*WTm^RN~o%+H7tavbCtZYMCv#KRl6$)L+3d(U>`O*1oF<EB#Q9+&et
z#m5f+<MKb1d}QyBm^k7!d(pX5s*f1CM=39a9XsZnQp5Fx+K=1cBk!ORy+r4({m45<
zymKP&j{kL9d-*codt1`t4vUqILM2J)XR$JGV%OZ1u2nL0&CT4kO17>^F*<EU0u321
zl31K*ofSjoJiDZ0mvoX#x^_u7xn#vIS;^vFO-mwTq0Oyll>J-l5T!Yn^A2Y@?+9^@
z{RgwHX5=q_<dS%c!6p(}j97NzGI}YosDUkkK`R<fTpdv*`p2BgQNo+dVHRbiL6`vL
zPA=#vC$zJ$=e&bCz{J8cou-!Mhtq0xx3I_aFqauHe)fVnE`~+q&m6{{GWH#FD*Bnr
z*fZvonX{{(RTz8D9EUki#WQ;BJY#N|xx37Lp^7@snd>n3DRaNm&t5RsW$rUpDeGt7
zF}K3p=d9wGAS*7bR9Iz~Ri3J%m1p$-3s&JE<+gsH#BY)8T#N8LYrXz%<gZ7EHijFn
zqdx-w?2g9+rV^x!?-yucI34%>!yM4%qBn<?FdTMpHadX*1KF-<&TK`rAfO+Q2dCcK
z@w8+%VL(TcjhsU?I5>EiP7mf6q$O^QdU<}<?ljI$8fSI(ZXER47+!mGSTlvZUT(+@
zN@5-A1~!nIN@j{~`GwnTgeH~NX0<H8^EFz<1@I9z-Y+sQ31k06^TgQR-X}t<jnqOs
zNrX<B)Vw@Tav>7wLFAR!raMwx#nz;lqaR6XiZKb(BX-d|e|dS_W*4>NAK1y87B>g;
z;+JQat@_K(kF{o_c63tD^Rc89FfIo8!L>i?U^v(5JfF`$QFulZha|Qtnn)z20jZ%i
zBd<#8f`?U+OkNf~sEWQs+hX}VP5VC$yQyf1o*z_qi^eO_B!5w01P^BVZ6~-oI9m+c
zA(S>|$75IsFqHsLk<lURy~T=rl6&I?ka(4GJ)UVW&~Z%RH8qM^=8dWRp5>8vcJk&^
zv?y5L*&ZwJCxLb~Xx9d<Ti?u6Gh0R`=$*V9YSPWzq@$a3Vn^eIKs%X1mj!4y0$R78
zOZ0o$6LlTysq7ZLD-cwdcS22JQ03oXhr-veMe-$vl2CA!kC{4@QMhcvRl|Xu*?Tim
zSc@_QP^}J5J)4xq+{98*kbX>r+(dy%3rkY-3^EO{KP{u67zu35bUz3-%SF>M;MNd<
zj^c_tiN%lOio1!$C*-P-=L~m~5CLrW?e+v0WWENAonvRSsv%^peRQwd$EmeHt~il`
zk#6w`-QvfDeM3srgy+T*o|}#EToKn~Rl;*W4dHz*s{3+e^l?SRu0RM*EFn1A2tnS$
zU6l}=Pe2H3Q^E3S&FPcqniJ%#KS5kkXH1WKVC+{y`hcw{Wtz$~O?+F*GCzFhT4!3!
zLU&l`jyekr<IX;A3k?HD=;rS*W4*U|m^7HFJDeV^C0x?S449PoklK(<jj9CTa^M+9
zoje#Zlr+*v>i@^w$J}4>)Y=^1J)T<s0;kqx^YnOX{fnMjAMlh(YP?T&&`gH@DbJe8
ztv<wYGe>t{x$0Fw)2IzNTA8@jfQ7g%oNq>Axk&SZOr}Dk4EBv8s>kp)*v#8Cb`Wf#
zh`;ya#Q~BAs8kkVfQAy01!53*t34JCv=pPlQp8~qeYg=9)vz3C%6ZZ=O14JcQPB#K
z5RFXi-a|q@{5hZ@;U^(RgaL$&0ldr+GCm=C9g2$%5#KGAr`hLCO`s`b^lWo;QV<ol
zIgk2v@_U@DI@uiBA}Wcgz|Tgr$#~}HIgG*@t;fi^o5Rh#AQ?mK4R^DUXs@s&GTxX$
zd~r53Ig@Epy%r%u1aL=w`CAo~D66~ivo7p1US7+Tn`IKk3NeXTx@Ivau~^S6<|Y>F
zo5d=;*nX<u>!rW$roZl`zwSq;L9Mrc?iNCJvVD%-{msq0Lg=c?QgJ<W&nKOmq)P4d
zhYD_)RHejde8rCyxQ)@*O=BHDF;6<oWW_F*tN+?Z0@%NL<H-l+;iW{_%jbUdP79Qw
zJZEU%Lr+BbIG)UdA)u#Ej6&S~9ijjTzoEUJA7dCCPx~N)o94kimLD1R!7^C`U9$C|
zjnJu}1<-kl7QtQSKBq#vWG{S%)<81<InU9!=X{5So-+3Z6?#Dy#WLCp85sy2GXEi5
z_Zf4)qe9!p|8d_QLM|Uo*Ek_f(<SfGbWLksHX(TZ<J21B^U7ziqqxJuG~uv-ZW5pG
z43=fCm!7&FVD8h-TK7a>+*vEz@u&Hw#g5NUPHX=<Z~h0KgA5Oz7qBWu)5GcuJPP20
zofoLYdm$}MkqWz_!mg^Yixqb53dTumU4fboGq8<N?+X0uAN<d<JB=^+okm&9sK)~f
z-Ubt%!BYCWcYOfp>p}^;*9+sYpvv;mDY{1M+Xm3^uX*S~xln|UEt<LClj}HU9Vc5I
z*R11at5Y%SRI=6CHS6qVtMk;X^E6wXXJ(ye+3Gwu>pV}WV;srs`IV%GcO~dr=lRQX
z*fqM|zz6A~H^=y%m{?VuO~?bDBn-3at_Erl+43WuT{`_BW);kI7777paYwTWp34@p
zvAn|L#K7a`ExNv0us+&f<Bo0*&|iL-TkzA|QN(f)cZ3T;h_PoRkHh$4e2hEH#m}~I
z*W+!Lg-F~j{~AW0RnC#+ob+<8Ea#?|tH^Sd^m4nh+-`cgr?TAB^m5N+xo7F+p38F2
zBjq%Q-H)A#Sf=e=VfdUZJUijkA*zaY=<p==n_b)D6zBT-BH<$aZsd-B;&zkT?_FcB
zu{>H$nYTZrPzX2Ca6jcptk&4-&XVd*dUbb6bvM0wWl4488grat&EVm1-Y0QNvvIGx
z3LAIX_usFqma`NyQ3hU$^zZxKJ^d(7(Pr~CVk9udrUuPco!RA0|1DnC$y(KoRCTjf
ztwgF;lB=d``r^2L$hw`d1(ZWtyjVz17DyHnGXQn70CiUcTFC;mk_=RAe!ImuZ;f?y
zma!WgkY@hYY&dp%11d}@<*HI{N~ww}Rk2HHcDWzD&5*QZEakgP4e#8Cfpk?of<`2;
z-N@I`=eS;pycn`vq}bei>#n4;TYY0FWK{P3k&8}7{N$^SIr~QZn{~B-=^8$~LlRQ&
zeZ|VLwVVbXm1BPk<=A~35x%H$ta<*^qjKysD98RXTCUWsBK}Z}F`q(OmRO|+IW@*<
zBW|@K0uBRj-H0w?iI$)iQVV?KH-y9e+I{eT$*?3D>%%WolDpp^6C}!5dGaV8K_P4;
zDL5l&2=yaLDN>FMF<vcF{?$n@>FSbhddZ3|SxGOst4r>tmwc*AK20zAOqYC?Uh=sv
z`8-te<JYGz`MT95D|uSS@`PB1Ftt5eZX*E`vaHus7Zc;8Cf8%+T5~p8Kiav(b6mNQ
zT+T7dIVt5_qnw*ku40s{B%ot=Kkc6Sr#snwRR_EKoX>zB#Qd*!MvFm|I-z|;Bw=`5
z6s~75T3=<w0EwnpWWVv4k~2c04mt^t*uZ~>QFK{vvS2;0cf-S0qKSZYKg|7Efh}gJ
zt{XFd1~|rOB?tui;tlvXaXP5Z#G69<G94JSI`goJ<H_O}0n@RjxRVP;1B~iT(GDl`
zX+OBS>OemnRVdG;<4A!L8@TX~!YAmgA46~CUHcvEJNhMJPiFbo{=C*h&0>jdV8sor
zxiR2zltqBe0igY3M*O&69zX79$B&yGKb42X&u=-v5c|wOxmD+*^4aI3qUNLJ&-qjU
z(oY=b!O}l*4fM|p{Bv4sQ+)Y%-e3VQy{V70LZPerZx=HTjOIXicG1MlsHO}{>kY2Q
z(_nrx<dNxl9POznNsTC?MsKmj+`r(t3y#Djx2R(pp>%u?;d_>k!D3`3L?2c76fD7V
zD;dl2T(pkI<Xr9>cnT5;yq00L&+5(EN$2R~{P>5?MeVG9(s`5nug9Mien<nc#eYA0
z?!WkMxA&~qe{r=#INzB~dppM<x9!LT7(1{1kw5hYr>J6l&wp><q_<IqpJ%(f^cQ|Y
ze|LAwPoMplQz<{Kly@Cx*Zr@uTY2{U>3^~E=Rg1~J8#O^z2Srh_ZRsg2hd=~u7UxD
z(FW#E(X(m#OCZn<g6o_4osYlS@#ZE|!O{jncj`?);Hd|B(|{ywWKTXkmw{T2L1dg#
z-TZNZx#jYUE%@~vd+ot)^?KotMt;{@^x45RJlMWg5B^^;ng-tX)$~vRqE5EPiz%>!
zV^n*+8c&DV;n|}57PG7Z6Giq2jAy74=$wxK9SjB@*1uSE2SE=`2t9v<9C(|#+vRQ6
zxFWAwkZWa?ixYN-@xjM))(=qH>jDeyBM3^?GndFJv0uix)sEP>I|pf4ioiGX`DAZr
zXULo09#5}#0Ok&~wzCZ^EV#1r0&oU)T5C5OKg&OFA{kfWOXWfg62#tIRPw~9QPprq
zg*!6T!5tOg=#cdW-fYIDib_Yy2dJ1U$^faszddC$_R#pS#f=Yr6fj2o9T_6tv+5!2
zYN%ignVSX91#iJUN%d5y@eQ@L`NZfw>aFoBoGxLfIY;;9uo4?yTeX{Z?Phvycx_lX
zi1~H+xa1$v{{!An-M4VJtPbf;G?R)ov6xVm+XfNLzacFWO{+aIeO$v!ZUPZ&2rqL4
zb2R;MJ`uWL1EZP>FtY>7JZ-eEsBm5TBxJz^_XL5q+RgK`*Oob8x)|ln8&1ZBKvO5r
zmG!yUz+}ed!huw%nMIB0V21jCY8O?VE@G4|RmhJo#A9#qhau9BMe~M|oOzws{&08!
z((_>84;rJZF{g;bB9PhD7&Y7an2XjtY4f)h!EDjHQK=2Er;EXyaugW;`vf5iE#^Xj
z%SvHluT~|2vG0}7GTV!@&@}lfvSOu`!b@)jcqba3EO<!Y_x6x_h)jrXIFaBS>&Zpp
z&{Tm_tLKzYqM8}Ry|kio1{Tl3put2AmIH|zlLH(wDMf2J+{k(HdORP0i+d<a<d57e
z%FXmd<E|9L%uXygVP^ITSeamCifLu!R}pU2Nn%$KX4OgJRtB%?Br&TPRuyJcaVWGI
zRR%U?ZZ<RmfbY#ZeQ)joouC%gM6)pShA89^A86b95^&83xMmn!GY(t?kRlEvJ5dO-
zB16Qd2C2G>GnnsKJcc-k>Htn$zGJZ@Ucgrcd@O*osOGO=M1{KTQT#|gC)>BZd~EhL
z)VY!}fd_OX&44!;1E;~g;Rg^<r#>B|6<oR?EHan`qXOMxR;+M)2<>1)>2ObD;EqCY
zX;CP+qfBtm47fQK3StmTP10vT&g9VeY^A4&P@aUlZ#9Ij39)n*ZfG=(AwLO%!vvni
z{mx?fjRx+gj$#zv5p7OQ?d;RBL7y7l)LNbMi~8B=1vfR(24@Ktct#1hmT)50xpW(P
zk`u|A*&H}tm5qm!&NaHX+)RD1-x)22d3jY4%h1!AQ4R`-va`H&L$#vG;w4Cvw)qnY
zlsi1gvx)S4;ANgvRitPlG$fzSoTvy*#<#uVs3)Ox`aYacrhZ3+U+(}PhuI<Cf_oc0
zdFR0ivODne#-vCO5`tM`sy#p&oF{oG-W&9c9gpU4bPC_HERl*&N1ew#AMgqrNVqUw
z@ZR2DVy&p&_H28Fc_YZ{%@=^7+t9+h-MsB_iKwu3yN?|Qp@hN)K+8wp${r5X8MyXs
zP8*0CBXPkKPRm9Pi{6blqL18!L(|J{JPN9R2ZXAWk7JCFUNZ8m`E!tWP=hjO-4C=v
z5GP09ZW~u|an0l<=7l}3wdg8i9L0r~qb&ie(eBMMB2uk$i+!nm3sea`R78E{&2E6G
z_jGZZLNNktoUek}qAPe3Kq>_sRwoeYJp#E{M7^NDZFJzx_|B(fWZd3L+5#M;fKf8D
zgABb}Y-}oUIT~|}DZq%UU@%~d8A1r=1}cMXf16;<eNqHNbzIoL!ZQ7RdjZR2#{B;U
z9L-pHJ7?xx8&NH^0ffs7l?W`^F7$!ufiGw3>GW1iIP}NtCD%(|kHr{KK^@FxlRS(?
zs3Baq><<+XE7Xh4ssya2RdQz;l|Wa4haDoPqCpXZFwMl2FWuTyWEUCuqieEgYRsLD
z=fTwnP1PDX^6W*bFd~xx16X_g4~UQJ@pfdOvk$aC-kR`_x$=F+jsXDR0BE-e3xkyP
z2-GmgBc}yAzM3O{pbrNUV{CJi9>kdDp%O+H$wLoBnSX}yOdYAs`GznVW0hoLrIF&I
z@B03TEhZpS`F&i~=E^9Ukne=~0tmJ?>bLwkZIs-~Gz^9yR(l{=V``iM<;j7qdfTk8
zRu-(2k<aT{>;}<=lfbimB=t=g!bBgRSk4QCn8UK|wqOu(z$foX_0}%(F)nL2ZpL|$
zL^?!_fk}!CIhsk@K>}e*rf3%8q#ELgB0oel=7L}W?AG@uV$A}U7PEy+E^eFakG_pY
zuCKzb%HGOo^_%cDN^CJ3Q&=K=y9wPyAcd{BHUUyNh9ls+Z*hWE7qV>*Z|!BR8ZX#w
z9CzLti)_h^p|7`^Aw3KH&==QO8v2|5z{gBc$VRQLX2w?(uwCetq-dK~<JdnbN<Y9!
zvj@}0i<&H)^HiXGCl^fxfKmTU))DBC9^Xtrk{x;|$1!g{hQ(=c5>9;_^p@oZf&VC_
zO4#;h389=&;=Nsk`Xx>_oxz3%BgAA|mUJ4H{%TzM8w%+!ML1kTIy?zUhczx8O1dyb
z*uuj~pEmrXyo!LxR~zYV<PzynBHSvR@19gi+S}NGF;}(6?+Cuo627-UyIhV9$m=$6
zWh@nUvpQTZ$%c=nP;J;F4PRj^aN|NwQZu)%5|~{rF)v|G5vkcV#b(#$9p^ukn3d*j
z21^<zkxV*q*uxA|h&X&7mH!cECVUXkNr;8#q#VRzuoYvzDq{uoZWbfT*)a5n<LL*M
zKXG?XDoj|;3noBJRWHSqKtjAjxw7k%pn`kA&>MXqZ!8MsVlxrJpFoxtbnxxWgdZ8Q
zOq7c|6$3^BXZW6v#z7eT2ZkvmIP7hkwQC74PKI+!w{9$RABLH`qGXs>Qbkz#EvWmE
zK1-@i@E2FPS&Z#2E>TMQmggFKo*Pbw{1d;fV)T)BJJZxh=RI!bIY4{n`JCJY*Ego%
zk91H{^T<yUk@JDw7EkY<#5{nZUCVLt7kPjCdYkW}JC4hnENolJj`R5^7DJ<}48TpQ
zq4aPW1OvD9*3BRq&`9ctlKBlCxn}%cLZ9|pwBC+@dV6y7<|7em3~`}z&j6Mn0lMT+
z7?mg#L1z;y;`6+qOAOg99{ML2wS<hXxKSYJNi#qP#Ss!;{y|gz5B9o#dRpT03PK>e
zE($81Y0@S7<34G3Q?3!kvs0SEA@|?|EbgpzD+<-)CCfTfvaBuU1u8iD7+<Av2#pN(
zt+=bj2(Mg|Ufq0<SB3-;O;wo-#HA)|VS_AECF{n)h9ILFnE|YpwX&bDsy#L&Rt*1u
zMu)Vprs6XK&f_ooW`N8apXr^1nKyy|L6R^TPcX^0G%B><qzA+xn69BREbuWXkx_%)
zHtXl9!`sM8^0Ld@$Y=Stf$_}*z8{7Oz{Wk8e4&2Soz<7in6~(o;*Hh%!X36*-EvCl
z1{73=*rNo-M^&M2LCHr0)T){z6I=^na|<Z%2RA=igARot+>kH*tE;Aeh4o0M#vhLt
zbAQSW5V2LKX4Nw8W(wn}RpqU`!bYBxk=+4=qbMvLl0|nWsqurcp1O$nkK>B#(oL5K
z;3#vF;P!0Sf(?$7ecOv*gAbQK<?D@in?DiuHmjHXjlJlGhMSCa9~0Fswn;Y5@|$GO
zJ@wu<i4eq>W1<J~<t4xDh~Iwbz4DNwRGm_ejt;MIMftTkDx!jc-$uv$ss#6>`tk#`
z7WDA!AfSey5pedT%EV*o6h!<>j<rMHM*Hjd80BJpjB<R<$hp=)G_v;X5;`qgQ<sQR
zfnlW(-4_W2nvoYrVl4`rT1TEd<@ZtO`z=3uPRPe>FBj)Ol9wxgPqm&eYsPvcz6^Wf
z!$^L(#Si>!CDabWZ&w=p@`By4Kct5;by!|wFbj`j3y<Op-^LccrNU<Ei1i1m85IPk
z?G~D$I!7^ej`oGeMwW%Ngx1zuN|Hij2>!t8htayxrfN<#rJIpWKyRlDpY0S`wHmu(
zi#v23TF_R+*q_t6siQfmWBqc>9_oIJOZ266;KdDIz&jMHncu4yz}w32smc;yijp<r
zxJ@1uTUYyDVifQqk!;mC=DTaC2Eo~rb-9FXvy#|hD=4X$OX5z;K&BwiWM?MG%>FVF
z%(sLx&A@9>6Qxn;NEISf>OvB0Xpt5Qzth^!omRWnZX9=NCym!<ougmc^%gs<%D@Qq
zSGCKNw)xUf7xJe7!$`n6>~OD~+OO1Mkzq4jz8NWhMCCCT=r<}2bhuXUCfWdu4^?vG
zE3FJHu+8M|Z#fBFvVe=;XyDE4z`3?HvZur_A|bx+6j53%?nqH}r--tm`JI$s<^w}=
z#>XTx>=dsSgMkb~f8_xdi#xVv4;2qvheNK;(}c7idOhk{Bouy43OY$t`wu`(8nkjj
zP_w)rSK`Phab!|V45X<~9t)Ex-d^-(bG__}hxxp|?Q+=|%`fKDylNR|I-fGtg#z1P
z|7R$)(+ejKB+|iVwhkUWcn1aL8dj3X=`b%2+%bP*1swul9Oa~`IqvmQ`s><Yzzh8S
z<qBH7|ADf@f55(Ooju4{wI6wOw7<gck?!uwB5EA5TNuQYvPYOwSdR;B)1(c<EIZ}m
zS}$q$5O=Tj<@QK-Y$H+PUC20Om(*I#>qbk)z)D8P<Frs(mrJn<Pz^mA1ICnYE+PNL
zh*b^9`=7A?qoi)<q*_MWUXAH28gsC36+AKu-kJsFDAKosm1*#cQ(U|&qb)m#*iH6+
zU9IZqqdHNI9)XTa@DNizTu!N<a;A-aJDhqbf+}qUib!D6&_43-^w<ycqUVL=c8`{{
zdz9SnQK;Q!xZR_ac1hB(S`I0r%|}Oh8`A&9vFb21-m$)q`9MOrEHN7~>BWy^aXlUs
zk5Z#qWW8q?F`n3$PeOL4196q4^b#*=wr)I-DvXb$cIU;pU=s(U)AeS)0(8$-SV9~^
z@m@lLja?i?^}Zgd17%e)OQ?vHY#JqPm8LAJC3?F4poIpyb8!UEv<@JyBf7-i-b-@I
z=K*qR<fKP;Wcb+)%Hp?JIJy&4e>i@J-Y|m$tC|#etcS+~-vd(2Fdp)NZ!R5AbA|CV
z*BehWZamHCcnn!p%H&}Mytp#i_@8>`v~kwCXx3lVo1IsUpX)F4L?S|}x9XLe;tdM^
z)*IvsA1z$2LGs6FxjBM$(-(wIVHnxK`}YR{g|`q{-Y;jg63t6g{yT5b9_L?@*-~3A
zlPsK8;2$(g3aj5H)HOeH(^uiZr43b=J5=1;=<VK~=mm9vTE(V>Mob>`B`ZbnGk^X+
z{&cKaGv?s+YomUx-w(qniB+zHKB?%pnK0xL$S;f_FFLL;e&UdjgoEs(pDnsye)PN(
zD~~=(0$WSQ@5dm(0xRtMAxwCkFfKfPZ7}2lK-PJMA}gAxMo_it8Rm4vkH#3j(<W%0
zq|7ty%_zkF`6n}hs7F(?lteYR;|M`HhP?vqabL;Kc+{a-I2|k%zjr{Rz%@gAmj8Z^
zUTD0Vh5hv!3NiI5HPlRMh?9tE5jO0>Bq`$GVnR=O6)vBPUgwrXLMkToB?QXoS9CXx
z(=ZXq?d`o9c-ONHsWmwY<^?zbsmt4YI)PQ(QN!vxhZnawmDH=~juyK=9$Gf_^kE4P
z8mY#z@7<{@8oEl_U1$3cu7_u6|MGD{%nE2El}SEC9cR7|Oh8@vGh8E@k(QhHYq^=V
z<rV4U&^Eo<_$kGTN+sbpWw}d{Gsi1Fr()vG_eyVKFt(Dx$he28{OoYMs=mo6s6bvE
zLsd2skz5RM#CuqAk;f$$s7il{!l6c<F^at2Er`EcUM@;gRk9ds$>OQ<o&1(GGdj9K
z>Y;7*$Xel$Gmr7I9^-#S<A1fMfj7Ed0JDmZ1ePBEtFrriw;YQ9_4K*>82{^U8UO3M
zEw}8HALD<euvz$3J9~Y3Qfp=m1!h%<Mg?Q5ctJ;Pq4?x-axyj|$&gl0eKWOsGk<X9
z_21%E)6Ucz-8vn9S#v*0Ww`TTzU&4WOPBN+Os)|87<zdesVGZy1r#cwo?roIv(Yuo
z-{1pH;#{#awHk~7P1~|5%#qB7cq3dkAv3c;1;hgrh)|8@kLivgtNnlN0zeVha%AMz
z)-TN;7>a0YI$l7J{O#O>dBO8Fdgo31j9*<NXZ;!G8b#{rGjZh^g##L{KxH9brJLrn
z8b?sWxcnj!BGRO$$O)HUx1$q+Vf3bRmGp)S@mnIuoW$~yDZTMvW{lWO>>wJMNtpxg
z6`?5tW#vdsr6p%Xg5uAj>s2gR{P?3F%BsHW;rEw>fYRvdX#F41kh)qNT!o^O>D)ur
zLkeOL7uzfu=MRMG@XtLkpMb`d5%)=h0+y(Ms%w9I$!LEf$qr@GFfF$$#PwVLlP&UJ
zwy1xZTa&<qzOA%kXh5x>TajksbO8?%zVG1!_%TD<fHYaHo{aDO>GzkEbBjL9Gcv#1
zLUeM!z<$Q5_mQtWY<RxEBwy(~C}Zg?<t&}g!CKOT`Dxa&o)6grM*xUGcfa_1@ECue
zB+6fgIDZ);{bh>vmlEyIbl3}BD<@hH#q3j29iBXSf)$3CEuib;2?Qjt07EN-+&-G_
zNdpd)IW(foNoxadYuMW{UKRD{gz#(G0&vVINzdOS1jlC!PWWlU6MpUhg|t+Svs!wS
ztGC0YibFR0(CvyD$`&VNmB@MtF+#|5Q-@vfMSZdt%D8dzw6i0R{20u^o99*tXAp{l
zhXh)P35ud(uh43<Vy`TTi4q@wh1xOWuVjqf!LkPL;DLTbKo#AUqRcfTL#p_M&DmrS
z%z20m^iuR0W)|@&WD@FP_DsmkVOugNAGXpDylZsB#v>?5N$B|lO_J6Qsg!{niwUw|
zL$RQ^07XwL_RU1u?89*AV>Vq86Cw)RgHF@ITEvU6fL8!G<=|SkhZw}nkf;lpf>D&S
z#-a#IE8$%zph@PU30{21e`OIjQ`N;qN&DZX+Wv|GW^cr<XvrwlBY3a+QN)r7!7@aI
zj2MK_;UYeGi;VUH6c1#sAn&gm%41V^^{v&_FanL93tW2d3w-I)eO|=s*4t#PrUnAx
zN*JyG;*m_anG_b3^;K%aI8DYX#jb;Q{wS>NRJwLi5u?m3f&p+eC_0`<WhjxWjE@3U
zwyfx8W&m{&^k!xdJjzUJ;hW2jx{eOcJe4Ak&Z!;}fAE#2AE-1KqSh7tI>?olVz5)s
zpa=>_GYk+q$G^awX936ps;_9<5ZbH~4=yAEGsW;A#LLVGWGt6)iFY#;_08*!G4bB0
z4_&~}NY=u~(3%<%st0nssJ-SJikcJ=>*$?79RP_2p)OD`A3nE!UxXIoz-B`VY6B<Z
zF`8*$7Lk$a5I~3n49d6w0a8~sw0b;7seop52sS%ZbbEvCuShwZm>na|dqp-NjTbD%
zxioUTFi)_y$EPM#yjtkPSp_2lV!p%Sa6IZrLH&RqcP~zAjk8Yu?D7=vJmvulAc~iw
z?;2;~-68s=&U4~&Q#i%=(t~L<WW8i@k+ZMdGLnwo38&ZSX*7BO%zj8)cD!0w$Q_Ct
z<ci7^6&TROe#6b%Mz7Tr8=v$D_!2YkTdL#8c-p~je!Q4>79)9N@upMngY?%cn5R4*
zS6MI$oe3@j=PXGO-nAEu1ZzuVYVdyEQ{nxX3JHoq(u9QhcC)I0+cLE&U258|GATod
zs5p7uHxl`_*jpp@W{C8}n7Z=3e6;zt7`f;byZsxpw>09XYz3Z{Bu$6LQ8IJEgnz=K
zkWRp=&7UHE8a0`4*9*h<zSVm^C!eS*GC@XJ7sfK+A#cUzJRS*~x6R~ZaiW$$UbKVj
z5}k_l?5d!({H}OlA}WHO`=iYm9m(K&i&5JWPeBlg;^z>7ItQV~DV3}fFA<t10m{@3
zOa%t`7E^RPR<2#+tmRY<(VKk_C1u8X^T#d|M7jla;9>QxLm^om+wpVyYvBBjW?{iR
z0;gp4=VSO<o}z#2JP=D-n_>K9TP%UOSmJ1bgjjeIJIU#E5l;hQDK|Zl?v5E<u6EIg
zb8iriNq8+ea%+b_4Y@TmX6G2AN?k7sspI8kiuu^o#q)D5`77S54=XlzU_dtE(Qwf3
z@lYf}wNu;ZS{taGklVNcVS|nh@<q%*<h3NFuO{4eLDq?G{wdX&dHp%QQtV}%I4qK~
z?F<eae&4vVy`SJYy!^V<{Vz=IuP`E2N%p-H17=nCc?rvt50V||alSF^xYDfNdp4Tk
z*=S~)jrhx6_rLD7`fM~Kv(a38Hk#qtXr|pVpxKy;*?5qv26}$C`~DOT{C$9(?P`JU
zj8Ro|3pSxAa@CVr!JBU@Bdq3FKL-BY0WmlLV#qZyk%<gdHmWH#eg%1GCUUrf>$`w{
zIYX5)rxcK&s>UkY+iQ$QGB#nzz1uPBH{<Fzm((|7QRKq+5r*$X5SGq6;_98?>Ok9a
zfmNaj#JVyeuLnloJh4Z3D0sg%GOT7Wk*6ocKrZIyx9u5a+m_hLhtT`IF#k~?8Wn{`
z*XDv}0p>~fQa;o-kU~Q$z&0{5Vhb7*crk#J&N+F38qLIZxqz$3X0WTwOEZ36nyK^B
zjG33|L(EIl8e}teUYZ%_r5QIb)1-N6E}NHT!n`DPS<g$;nip<vOz<g{v%=5#mRAEw
zLQ72X=~r>^n%vo9DEu0fCqrX~(>t);1mk|1_kw<CK<5rKk;O+mSDxYUvmIkKcJ12e
zuc_eEp?FANSb`Oii@qii_E8#rltv$=Q5KKm{}dUuF;gPudJ-yWkBl737C=*?iPR2P
zZpK$`rdC$5JGGi<#x=Hx8X1IKzvQ`P41viGRG#g&vC++V5j`$GN+~WBin-&O>^$~%
zafT8m&f-eM-bIeTlFCgaGgB|>hzNX&8kgKeUgiZHi>43}C0zea7jz;Gm?#esT0#TO
zObslD!v%?mo3NQBUInf<iBbixFPm~tSRmTaLT7B1W_rZ?OyXAfa&K1QYB8FT@NdXd
z(Fz9BJ<PRj24(=G>Kn{1(7+%tp+Y<Y3@|G+K|%8ZetwBel`K9XqqJjCNS?esqD{E|
zeliuQAqp}aMYYN}7uV4#R4vYLR1qUcMUJf@Y1*Qz^1-&*xQS(XxCbhP)BGW)$<#sV
z6IMr+D+%;TgdwiP7(~Q#Rdk=Sz1;Jfwsjrw^WOjM9G%o!kxRho_kW$I<>%r1zt5h!
z&g1>x&vO5_m@_Zw_VALf<DlVu?xXX?6fKKrhudbg!GKXQ%!Uf<8z>g@<TibEPZs@s
z(D8+G#*WyGU)Lp97`i8bwzVPVp+b*0VE)1+&W2+k>!3&N0M{ia8Gkwq=IECKl=lvp
zY+pNIUX7)*j|#rZlhzLXImS9IhWbSJfkkI}cz2k*r(x}RA_E$C+36`XjK(~{8Qz1$
zO<(}P5Qf>@s2}PJwAdSXAUI$ev}hNgDFX?z3w&AmX}!paxP+_rLZ)}?k9!N1p9z6M
zr5!*%&w2}6889DAE3ELW@`joNpfR2Kfk>6>6LZ4eL<j}?txFK<F*^%%n!_4TF*n@@
z>`0gL$5Pr?U~KThN1}j!4AI|KavaGW*%iFtn3#>P=65*Zf`AYhIbi}3LA?M;Yl_q}
z;&epZ8ELisrqN=p^H=SkYRx)>&lk<}9~&?0FWJ#A@UqU1&o6#yHeSDJvp44_FYC<~
ztDU`sr)R)ak1pHi@ZcY{7S#L)zR1C^Us(O;i)OvmV&_fPIK4P&05kyKtev$R^%kt8
zv*VM?myNU6B?b`K*?F6tG)^0BDAqnNQLCa_&Zx)EU$N7A^Y{&XtsON^8tq@GE3X>u
zGi>Y?v{qvmwPw3<jI5qrTsAMxTXhBl%dwY@*6~TLaaw=54edjlto~#Dtj$_)Y9}Y5
zfu8>an+SH^8s$+PdWi`<u_ZpvmyKrqxQzqSpN;`}(7}@uYhBcj8~CCAa~+0LYyMIa
zKwI_yzJx;XlD({*)?Q=8_WTm8z`Pt^HtVO@X~0D5@~G8rv@hFr_WJz%CE>bNZ~oXg
zuD4(-Jvna?!Y*6&6135-QPTheu+`ef&qtT72H~x7)~+|3mly5E`B?!b`6oaO^s<IY
zO9{c}XMD7Pr1R!42o>=_6HsD5y{W_VCgPN^RYNSa0Bgr>vlui8NNk(KWM}o)C%9=J
z*YWi^g88Y@suy6w8Z9gUFtOdAYS8E<jT+|(`o%vs0Be$ROEjIV@ru=6{@B2ti_$;=
zt%jf$Lfi2hfpet(2TvdeECFx+0S5rFbx*__04|4lL@CQSiUaJ`^XI(eiMZ)bPw~3@
z4j|!&PzJf}PtZ@i0>5J91?=PM;TJ@$XbgsB5mQrcE}9n%YiZXDYo#R@!k1h!MBO^q
ztTW|jpZTzwvwg2iqSv+f%Z9b3@57@YY5fR2ksG5!_6HuR&=X`n6-n6@wau)IhM@_h
z`2i05!{r6AiF!vct;YY<^D;HZZz*$<@#ZMNSaN57m3yLx0B*g@p;>N*M*F(@MNu=b
z0Q-X=-v4H2>UHr{4BPeN?{TJH4_8+A_&b?$K}N4*Qn&sPox_U?(Bo<qBlM}KZ0X`#
zVe+hLI8-e}u2_dIRa+OX)2~<ym>+o*`(=Yjq(o!B67HRs5|>cI)%WlPvELG|qJw-P
zxyGl&5B|m9$e5P$n*Mmv#o#JS3tkPz-n>9}GR|L~vl|fI2R;RcA7Ge#Z|2W(N*j>Y
zpr}&FRC=aLV{wX0gWkc^nH}EuC49AsX=2*HT!5s4uk@!^muJVzd2E(#E*o!*EGyZs
z#EmDkrZ~xSJAjz%#*F)VR{2Gw4ep4ES}OV^NfEwWPN0x1{yl#Ua?&ODME^~up?r(?
zYxDV<9fv`7gmhxMgUXP=NpAmv*sZ|J`utV_*eSx|F%wOxwS*5<v3SlaUMI0$MOESh
zFyEb`d|TWx8SqX~Stx`Xzp5<4HX0uymuYzww6qm>02~b;b=(3meXk0m#?<ef0G}MF
zcZYd>l^put?<E)1w;4)olw54aU9)eKOYl3Zf)`N^vc+UDuIBTC^pIq}J^2=h=G)6}
z3nhMCwHhHfsgtMT?Qi$KJ^NMymD7T{%e*6>$_1U@!1hXPg8;+d5&XNwzl)*&evU>>
zSeR4)D({JO!bD^nfX28&q#!ME0)a9;(VXP2z>?_LfGkOJLR8EzVM)hvEUEd(nX+)E
z%&h6pKUoS{LE$Gx=|@hMjgzgw$7*q0jLet+kxcB^Vq#4_A02Ln>B#2ndG|V#`BY(y
zSCB3Z+sLN!5+nhESI%}BpTM4jx^%8V?FmXpl_)90F<BT&Y_y|TMr`H@cz9-@WoH~1
z(7%p%&@^S0OYB@*AP)}NAsc#=8pRwQm2OMcOJh2f$jUr2{7-7zvZH=pzXamI5!+Ty
zT@?{T@3}=(ybz}ZdRs-;(~R4MoRYj`%*6u1P+J6{*?OO}IpmV$Q7*QYnEE^WFum;*
z63dgYTVhQ9or&_MEI#OGmc@^i2yS#i{QdWhPoPsO&s615zj~tXCyx9aRdGE=PJWF4
zEbYJXR^QC_U&ndwhU~w)yU(6K+J7JIzmN9cNBi%i{rA!S`)L3DJJ^4P=#UV%oAUWk
z9_3*EfngTMJ_b1W{&oHI^h+9rSFi*pnt$0B3V(kci*XT&Mr^nu&=2+<z@QJenhN$V
z@F-9SMmMt!tH9T(bV^nwZf4ouF5td=Gwy36f;#&E2#OXWjyDJN_K>zU>p3reNSct@
zgNDVY@rG>1YOay=v|*%%JR@%^ZZLUc<L9uAgFiKOi3fB>4eJx}*(w(0Y?kAtibvZr
zCx?@2XOx99*@UqJsJw`a#v}nrrI(DnM7oynWv@(mp%F15^-U*P@fR)`Zyd&wOSF4N
z5vk$BM=NUicqK|WD-u&RMpt7IHwX2P0eoduR^|#pt`g$QRV<IuTdAial^JPPJfR%x
za@B~|_zfxP_bqDlP$Svzl5TH5iWydf_eD`ZR`<^jem{TnjamiF>&%A}AV~bfJGE0}
zEsWQuasiAdTx=B@Fy33#Fee5fM>SqPZxW+W?4cg8{ViTr{r2qJeGLvH2j!a?!8Mb?
zsoo2u$v$JM?qp>s&?+E^3XakWA|cCyw`m2nR#bJ!nmpMpuCL3W&fOQqyQ$-d6z`=L
z_s!y}qv-2iVycU%+l)@MWlFFoTI)c=Do~T|VpX9kuJbU9DwHS}2FO#bMU<hW3B@9|
zwsRTIc@;~6s+5&nPl^|}gql<#sF@K#&HEupMYX;yjY0^TMiJB%2%5$th@y4HA}E#0
zy2}uRwdtz7K#<;Czxjsln&men!{Z--+%|?;?2qD(i=l6py5`86F1_id7%CIh(jQSH
z9VbIb%`anp_@(s0h1kMU<IC8UsgI5?thLO&zM)}`FRZo7y(_I*?iHf?;@LwglOHw6
ze;fVpk95Z>MfICO|NCs$c^1C^Q+f9M@&3=>GXLL;EldLVnEx-1KmJ%hZl5nr`lm`|
z_cFJ#rh)9a#Zz?&bwH8vKsQnovxnLm!Z%FjcnYH)s+h$ywu=B3LsULvP-u9{74sB;
znUp@=G2m(Hqh1>!kt%pG?D|v0F>EsZof)*|t6389hPxkNDz4~WKQuy`?QsZ9AY$U6
z{sAOo*lqk#@546C^VG)($t6rZb~~b=%e+gh3xH<q#(PIPb_6uTBRB=y8S=35b3_zs
znDFgs@Sb8UQ|j^Yb<o>ml(kRBGoUnrq|W-2!592}1i$I^pJ)vx#S1Thf&%-qHyON_
zzk#d-vZbyeQjmWZ?FBO_K5;cU&?*+X!`RI|7@iCV4aa4rv?(PtVuBaF2}bV#5&aSm
z-i-%P%#TNt*oWmbob0hGAMw6{g(AF&f)xG6BOXTOMtgY4?uH7&O==}9zeNCsi-ABs
zkC@@lRETd*pmbe3=mn(G;--#})q~HJfU<>>G|s&wR5|<MsmBo5Tr;^PGukbyB62ng
zlI6oI@mSivH~a8}<+l#eP*&r?oCpmC*#=o$?t&Rvx5x^lVgb{-nDN-`2;vO5Nd$%g
z!RWS+QQWf+mEx_A`+dEPF@xp81NnLGb1F*&G9F79x@EK^8PGBzu4MIxdW2gD62&#}
zT%bY<Oac7CWixXBo^fLwE(q0E5#^oJE+94UZ(nbhSgTd%tXg_mt7f(o-YE&BL_M+^
zdJvusoQOJ%F=jF!Vmd7tC7n~zL_vTISD+v=K<*uG;c|<%A>@U`s9~$?E#|>HP1q&^
z`!Y_x=$SJ7+2pMeD91S?6*IF3dqrkH;#k-eq17%i87$Cryrrq)N|*`>4cmmOSHW~P
zkHxr1ClZ8U!1q25y30c&Pw0(SVJmI8&lS*!J=m>7;;lFB-R!`toeNB&FbC=OX8R^O
zNyZX_>AJ%_2(aV(Q(-G+Lmy6|@zZz>43Fml0g+)>&SmY#pN+Yk70edyR6+%yqAR|c
z^=>6^43m^#er>t*pt;$2L?QesY_vqR(#G*>+eW<sCQIN;iK77n9~|x$U`7`MGQ@Yi
zE`aKijbS=Qqby)usF{O*%iSn4g(U_;6kr0R1soOZEht{i7L&;sq)o)Z2Y<f3O*ZE9
z5vF!PiP0O(#_Vo9>QmZ+57M3;UXchfX#t|Z;CqzQma9$4&d!T$B@iXa2!N%zfBga3
zmS{FIsBOX-M>9{T03rwUW!&Yy5&S`vB><ay0cZmWx;tKs`sfjWEEq8Y43^DcOT#YU
z*&LqJ8jS_AYXcDzxf2sOBqbN3`AkO0k1;=_=5lBzRaN)cmLxe8aTZfD8w&W!13Edm
zp(<(g;Zr9v%&9O@KT3HArJO3y$TE+lq5wcv!Htbj$!0>ykn1wk1MKFLTOqI8YBgG@
z87_ogCKxMFEqmy_)yMBydNXW~QMlCRDnOb^K>SsJ_)#D{<W6{|lSe?}KI<q9>@Ta_
zj8<-Duk1%F`>JxZwsQFK1_vf8O1i_P$Y|2Wk_=D{q7)fnb3K5*0vH~(Ic9#+eR+P?
zmfrqhYhX@>C5spYmGw$7iVgDp&l&k8<nyVS*flXtaAVY)3Su^|f?{}{*icSi4}*>3
zkr4QW7%uk!_!?%8SU*S3<r(_=bbhQg$-gNtYEeig9=%GcPGlG>QKDMqi5b2Lu8{aU
z99-@A`117f1pTxxYR5m++bt~OPp2SnB^IGmwl@Ibp^xH}kLnE6`1GkRD$((S&;lhw
z$=9UJQZA{FPDOuq3&IjJA&pBOvtIoA_IG^=X5#hx`3VXY2WmAQ%3=lcvJ90B)$6_8
zp1|Q9O=@KwT9Ke>#>8|d5A&E;0;|u5ld=Kcgydv^<WwXmgya<HV`AUj3=mzbZ*D^0
zDl7M`QYf%Op^(eP2Xf`aEjM|Q??^2~o~9QKZk-!{U}xmDvsoE8!c9+*9%Yva0nq$i
zd`bZ@;Sho=+wjQEq9W3#G>c&~c_Uq)xS3`sL#k+rG{pCkB~+KJBhAPCoSnr|9AInn
zb38><*>1SN#P<gn&D0IQP-7ObPrcbKk3p-2j4dZzORWdHmL-~CEliJQ*1Ldiq*N(V
zH>1soNSw$b<*ciB%498U)H8V>2MgkL$nhN;dZQ0qQ5M##!f6XmC0s8vru>rjv42|t
z!CZTHm^5z&6G}HK>0AP57)oQN&hjGZ5IX1y%><G3wqo<5U~7)1gsKr??+RzT18cy8
ze#v;}$X!b!+i4gA$BBi%Lk9#vtA$OLr|V@~=F<Y(VE-34g$0?xCrmo3IkWLCTb|{U
z7k&yoa3XW2j+3pqKM7MC4I*#L+grvo|I;pxgNY?#F_ch?7hIz)hax*st(Kuxy&RM7
zLGFdC*&W?+(PhS1#mR8Y)h%fil^Wg%54(cUNRf*Lna}k!j0_K)*1CSr!^;HdzrW~m
z|HYm^i>?pFe$Qb6_;ayPu8a=kbw*uOHwLKm)9Z#S$CfRXh=wd5Tq&(~LCpkKJQg6U
ze2Tcjh@9$iVF?r%kn?n@47M(DQIN->W%B@0<j>|auJo6Ul^E)ev_~cJWMI=2awLq5
z16Jl&bK?+I<*A10{bd8FXFkOxS@<VD9$)Fig1D2Z2<i3;6)CYo3*UTd)1|@ad4yEP
z9X?<H_M$jsyShzr^KOs9cH@hS4|JUa=St|3Y&{90%tu|QvOEwHBW-L;_{UP;24eM`
z&3u126BN+(=cK(GC+eDjknZ@shSi1~Vg!ed95E%{Ob|(V_VqjwI8@(^&a9e11-*bA
z0HztAAJ_~=1S#eTNb?5$+U3~BhOyeM>3)Lu#AE1d)8-~c19st`b|++@?#)f3(7uYL
zV?3-@?V46vLY}YthOP&BBV&z`VQeJS-n6*&+Rqt7N}V`LND*D%GKU$?snr|1teIa~
zzbJ8cz@&WRSkkxcH9CbX3Zv8QoRK1Kr<hDOjOD{atX7Q?77~~_G6!=cn;~)Ruu2EI
zl<MI%heVjxSctw&TH90`rA<u{dPjmU>=$=JEXGt&L^)av^HO|nrjqEz5Q^Se`2r7Z
zQe#>V4ibT-O5WHA@3&6j`^sKKK;-o++3Guc6<X4!=zN5VVhZPgQ?+HHZpWI<LeZYw
zO%_^@Myk-DYQAH^fzcLx-K^S>6}FWC#m|DMD@E)asON|LD1jjtFd<>NPzYySp`+iD
zikkxKi2&SX0Nli?m27QS2!KxQN_HYVjm^-xW-OKAl$u*?GUA>f?rk|8mO>v3QMM@y
z5$D>Un}<itbQ)}y>HLX@>9kS;H-&^$Hh6MQtM1B1D$5(G=%frRvi=O#U4*?IFQx?Z
zkm~>~Vu3-K7B?6LbTLJ5#Qgk|Q+Yf?32n_KPPsy(*)zH31HD%9)Q_43#5Eo0C&Y~M
zQg<PAB>U-Aop4nrb5%E7)y-VB60TZ_Ry7ieicNvDxkKw16+zGpMBv^{WEY7~x=Q9b
z%@_A9LNn*bICk;L(UT>U5L>6f@J6#lMFx9+=x@PJ;@X}1-tB(4=f)xDOLqNI()UW8
zH#Z5h_kUP*69!?Gfi08Hy@#RHwSVLy3AF@Iua#l#*UAq5a`6{1!J325@M?L@!Dmh-
zoJUuoiM63<+fj*?<qtsQtY(>x(C;7S&GBqGh5ZOD!Q9+cVYxA|1PiRJ3ae6x43cOI
zsYoj&h=^zkim9aZM}(0BzO!TO?Uu6{Iu<bDPI2HI=5am&US#yx$_luAyX7Xf;$kZ<
zwBp9LvI3UhZdDRnsbDJ=Xe9>wIGJK=EoW&awx6p22=_7qu3cs*XejnhYl}7^_>-i$
zkAvfc;5bX+Y+BUzdm#>8(e61MEc|Jg<Iq!HktE$>8c_m60f?FK=JJlYi5**!ijzBr
zbPP-mAZF@Vg?Fry(6Ny5CZY2yE{^2R<MKcwQn`|eij3)YGF339AzK9lJ<c-pgh;j&
zQ{!VU`I}<CpKJ?PpO$UnX2*oe{7VQSF+I;ZhL3i3cX?|lUcy(p3n|@C{h=3(c#s#e
zYUI&FbTANl)G0aDjm8P!3i8BYodRr}9MExIp9AFfx%L@g?GwP-^8cFB|3|5yFiuk%
z-&`Ay?wi~pQ0RQ}9tP^s5BQOO%h7sopY#i;UC>sEnb4=0dVw6?c~hzYV}@?4Gfuv@
z2R`Z#HaBnWsaZ*Fa4SyUW^BjIhv-|q*~On(O>ry3xz)lo$FM%Pa%;l3Gy4F$(h=66
zhD|{Z!ZIHvTw~c9om$oz5zpL^YD5ja$Jo+s!R(u<GYWI7z+zTGDz7rReeiq9*eEhe
zTtQ)p*c1F-FfpVwx?jgQ6z^SiB<j2$yXKXPxn{D-yfvy|32fFHDApPjEKNlarOqIM
zIzPrK{%vkBvvsvhFrbL_Ae+YC*uOq<ONhPd$@OP3N_YMF7`kjV9pByWvK2Z6{HNTs
zBH#Bdef7y3+<6~nbU}{?+00YOXZt>s3{}tT#$0{+TYE&yz!vL!L_hjNKl(#I`a?hZ
zLuc};_5Z{Dq3_9VKY>5=%5!F9>5cd~8@`3y+4-}1J3o}W^B=#3%*$O7wkCYUdy9v|
zkeJ<!IWClCm?ox=oU0r;49DlL5Z6_F9A&|_^aU&U7z>Dw1<?&dT*3cY0CX*Yl`z1{
z>Rqe=#E)NvH}gF_u_GSMRokDr>ewF5RoC)qu2!O6&By}QaA|JFyEHe`U7FW(gr*Q=
zhF<3{=b3D{UwWw5pqrNR(l}k+w{<pnp*Vp0(?0r)&3ttJgA-SxgYz`(X)1!YCAl--
zEOlol05Sf|H;?|ztNSxQq)+qL=+~^N&0;KZbt9C@0~W^mM3dqnn?p2!P4R?oBB0Oi
z2Cba|h4=E4s&r_+(GJaj{wZd94Yz5IcLT&U1;jKK1fl<L?p6I2`$FT)&@3+XhDI7r
zbA?{t7aAd_J3=?<N|31x#V22Z3$(egQ~jSs!I1MaJ}nWO;B4q29H5u`Kc~7sXLMhN
zv(XYiX&UjSDcv24)t=o$nke&OT%<XXeR}_B>hfREK^kU8dq?9mZ7Hnn=s7GDjrrVU
z=Gu&SMAHiv97xFI3vGZh+3%~y&!_c0U^m{J#}NVYUQBtw<SXtkERMt{jn`+Lm-Sb*
z%ab<y{(AyozB;Y_Y@GMSaaBFYOUDtbh!ZMOV&V)Ld02@6i}7jZ+XwC1Q`5EidOYsm
z%dt7wEaQ>A#L_*REtl`rJ)4EYw``@dq!n`Rekj*w?f)1uZfmoiiGE@IbC#Ejy3E*~
z@2T)BOUv!&R`$)7R4)?SjK>p-q$Cl;;|f`%^yEHX-8ve8PzQ1Kk2dlWzGLj>-NHU!
zBlbH-yff>&p)#&fzY=;^vFrckT(NKdx~|wae{olAmdzI%*o{=1;F6>@DU8W<WaAfE
z@5Vw^z=FBYvp11l`6lQ`E@yGOFxq}^WJE3d0;Dz$N3TXseUZDo9|bG&8SoSwY<3e|
z&3A^F32Qu2ed_x2JJcIf05j;d1kdeAvnWvP#=x7%kY9a&7GNwb-7#Kso15O@|Kf1b
zyD73E41uX=lW{medD@!=y~yw*g?Cc?<3q0TDPHl9PVdFkMH_W`e^BNBqf7n$UFz2|
zoN)Jg(`ph6*NJ+cYpci?b3TvSVQ95$^U4?VK93r7hy`gg&1Z8hk2I9HS~o2QaF4aS
zJ|TuEe^s({(#bN0T%2@riJ3+xZ5r2edcXP0JH6k0DW`XRrkJ2--TQo)TI8|9c<scT
zD2&&x{kjr*T`|rR*&Xh8GCABY@daJ?IG5&}&vi*r@BAfax%-{u?sJagSSK&nqVT`?
z-6Rw<e7ew{VPUPYC>Yhu!j4tgEkvN3h20p)l_bcO{Z%dk(CH!TEr6-ejM=vUlsYW&
z3P2#Q00i<1Kw!`R8v3qGsugQ_uB4bU$!}#R|IeZcNnRFu0onH*vTGsYZV=pjy2}F*
zu5UB_be?}NLG*qHiKw`ce6irOj?uVK@F_-_x(jBylkl($Wyvm+d94_C8Z>i_Veua$
zmb=V%7_{LpgS~C}?S+ij7h<w|JwWO&m9*wD<JR9V{}*L+OGyHjp8w0KJllO5%Kx>y
z`+WB?|JP^9|5ePHNxFKNqzg~04OsK&Sv{SMxe|68tp|*XA(Q4g4lt{D(^q-2(42+h
z;)3!8vKdd;g-?X#4+H1TN?iL4Tp9n47bs8y?-1#zOF3@f_ou^Pj@NuJ(L~Uf`4sHD
zKg!RZ>zIc=92fB>kliQpI~1KE*OuV|#aW-yc2P!%?wC@fNHmPb^PnekdLVZl05F7M
zHYZ$VbYIYCaNq?)@<xbuAsCrRkX_)*%1`SBc7X4BtMx)AuIP_@3p5Xj`%nxk?O@g(
zyhV-I()?-Q4Q3k8D#)cd02<SoA5bN_Xoiln(3=RMK)-bfLVYl&G*%qe7^LYTkCWK-
zky6MP1$Ga3;WK_QZiwz5lH;&GbP)4tT)_*DiP`vSeuon-2nd0Z0Re~z>IFzzQ>2~|
zr=!`7Mv!CeH;oo+oxf`TRBP55e7<O&|JZn0f60!1ftPi5e17puv+?>(o4q+dd0B6^
zSncd3JUxS5?C27<uonAAtpzpzfiH6K>lar4`J!2Gwb*%+HBK*18UPKzH*06@M!f|q
z>FoIA@@3=fb%_B4c6Q!oCymoa8;Z5hOVp~UmNV+H^H=P&-aLK-Uu#E=lSca&>dLD|
z`wSa<1+CTCMXlLx9K#;oWEYpsi}O~U!N79tWutX`Qfr*nUv5MD&?c+@SU+pC)|=YN
zNob(wKfz9oowr7LREJ(-d~|GykMm`t3A;EBNPjv8<Ut2dN=)PyEde^}Fq~TRmy!V5
zs{i*T6oQxRW$m=~y57oxJeH0Xn3v<rX8ja94VY+M9<|zy_GP=yUZ0=8BwV-Z%^w@b
z^%jUcC+96f*k!9;f;QSUY8qewwp#o6`RKCMAiOos+Vy7h@&Xf}7GRQp0>nTsYZw!r
z5PW{dM+-<gZ~lT%5f3y0CHB*sIy`S8P6=Bz#6k<OcHB0LL4$zAwmD38R)2khlF@M;
zU!NnGpBk-t0Vb@`!U6yj+x@8qjb75Iah{-G{9^;KCMmZ>)5#jISncJH4eYrn4HVF7
z2x=j;9lsGcN9r#=@DI;bbPOvS-COrW7DC{1ec(i7NJPPay?XwfmlUFdxC*;vUxZZ|
zn?ORJe;_Ar^638lb^Y|m?N8A8;!d`7j3QlamYj^DXf!b+OU}&rK!Y)}c*=}dGkMP`
zgmF03%mdWsaemDWu-JD1bL|h`>Ev0Ms1GxOl4X`#Xr<fA_=-xW+@s>Rowm0NxG&#~
z``S%cWhTxQWg>gJ#d|<+4{2Mop7Y{|<jz0KJq*rZ`r))f<F$vj;*B=EZ)T?K&U~LO
za(6A;ZQu@e8IE%{K@#cVQqf7C>v`el%zjHAQA}T>{Q>|h+*20gBOO|&JMd;5boPcS
zs7yEzZ+TIfU*s?$)5r>-<kh@G`|4dwPOaZRq;%=wdz5t1Dd9dIz2myTl}5uh@Yddn
zwbcOF##Gi$x!T99TYXTP=YmH~gNZ_&BmOTpbw`U}FmK3PlPVzxpCvv&gl57PVMyUd
zTu(KV$!9_Xtt}fDsiU)2NqRrGarpT{$+{=S^=<Oa)P9pPqTh1#Z~yCE*?Lan@64`A
zX<uk%8*6iom2L2ad#vo$NjDYqUxIWM(dJS%Et$48r3g=anhHe41xYD1Q-zjqrZqe;
z<l%Mumy3D{3#bop`g_&*Idzvs839FxWn#kG3)Gx_*~xH&R#UQ2xG|(Pc|Vrx_zq;z
zmmgqAL9gT(1dT}IO2GZ!a%(R^(L#~)^U&4O*n(4C5I%A``;Djg5(Q$`m3SFmSVypU
zdC6f_M3?CV`29e=IpkMVN^F!|f&$>1DYM9jyzeE~P(v&rh4kL1w9d!K%Y8^GCv%xa
zafUvWUfzj)Reax>%%|eb4VqFhNi1EGG%2WBl@pKGCBHrU7S4PjVFMl?<Ku7J-|j1U
zBmzb<CM}!{>4I?@X623aQCdlUpOJ5p_)C)~d*7K4C;L$uscFhcr{3g%qej;(Vb-d~
z2o@iotb;>#2nVW3=RHmf<!*o_VHL>ZY!+0Psp%?J+3v}9^TumHew7Ldek$J(Q8<<u
z!wW;h{Hf+Y-+Y5Cpp)OAxht`aA^aQRKarmc6Rvlr{*}nlRbqKQQpwAT1zLR@K(aCK
zs9kuAU5W#W)P*p04O)R%{sZUzzfrJ|B@9-a3(@^YeCijL)e%QZwDgHjv!!+9{-92x
z?B7f4%v2rOPOlI(iOn9;UM3U@Uz7dk!h=m{;G^6%O|ki`VE=J;pE@D?k6U^6^wIwF
zS?oVMrsily+8)%|)AW}xMFrX-?tHRH9B*zi?FJBZr{44fDp63eGXU|4gw|^xo^Qir
zd}ElFj>)uCX0JVez+NwWV3=KR(PszO@L>B|J@|jYXc~CiSJOiQgZkIv=9^<Q-@HNt
z3Yq~H-M7G^Wkz&ZbVC(l;&lA)U@-8o{sk;>5K2yho<D-s`EHxJ+vRQ6xT5fJlMkxO
z#R<Ffczg(D`tbV*LYPDZo=WVO@q%vFj=N+-!u0droZZallf9jtA#ZwnJiXolm^;wg
z&URSWcu|AZeo{v{v>DMZhF(VXi7Dzt_M3W8+{pzaoEI*T4ZRU;!5z~6b>=r1jwcvh
z=WATm=g}V$Rg3H$4-+KBIWuo6iLyfwLf^Lu0(vBD@o*6VY%K(&T5b7r4pv`ujK7fk
z?nza;gM@$<--R!+OW?2904|~!@sYd%rh0x+KRdk;&KY^L?ZdKk1u4FReweTJ!#)>O
z0(J!uzvoy^pmmm)&8o6g(veR^hF3YdCF8jApiF_I13fhOJfETkv5R3R1_R-2M)&aX
zJ1TTJsgTi3D16b($k7|347{x;`FfsDJx=qeOV^Kd0>=ajQWJuLL3d6)CG{<c3^35E
z&YTS#I6b=XuM!Kf=k@TmdIzs4&n$RYaRX~^4A8+<jQzJM1s>$rO#$yl*h@J?X`&|P
z#5Y~M!1LS8V>5q#)@?^n!H3+IGpNuo^;bs&)FCwResU0^@31!3?Dr_9(=>nPfb!Vo
zcdGa4U2Yj&e#g-p&sY*pwuej{PG9@Op`@V3=xR*OhacuKSk3IisF&ZuAlM5S=!~4k
zK=AC4Bb53!@JHA48~Dop_^9t`zZ*jzaHB#XVovl!f@j?7R#5k?M;vr0be%kSbHlo!
zuYZN6kg*wkjt@LXh@WU-2v6WcAvTLj8)Qf+B_4o{_)6mFPqI7S{O0o~Vsr3yEx2&2
zPEsL7DdJJoN-6@)>}I?epy>qm9}qh!JmmT51(6ar_Zwl6X1F7<CwT#d<-^R8;-j$8
zYuSDwXSwA0y6fzn2m5!IixcC~-~$Dm;AVc1A}9V79?d3$V4f3~$9)+bSDFH8yhN*t
zR1p{-QD0x$^GF&QlNcgG*t|F6mgwlaeIJlA;+88JZAZYlyBQBej3{F|r)=oMQFs*0
zh7`VzhrOAiQwMi)<>?-`6Vr<8usrea^;A;;0U(roxx{ItEE59q?SuDCXkVazAK;Ac
z_ihnOG^cvv#gyZCgBhQFAllOl5lE|j<A*$E_V|mf6n?}58?hdIo70qD)2AuF3h77y
z7@uqElk6lT{efWQDU`}f|Ee`&^;8E4^4f>Kj~CZBVz7A9TFiaftzZaTDB3mQ5W-<e
zDNe^#W?Htz&GE)xOhKvxd1mh2`sk2@0fc+K@nSUBzD1G*Z-Mneud((5Q9f27oryT_
z#?xD*N)il5QyrUxM{3rw!bDzYy*j?<e)M~xIQf&VDL<eGC<ROL-X93nhC@k`*^Xo`
zdwV=I@7~^MszNxeh`0H84h6g+p#TctsP=O5>`h;(CnyEN9zB~&Lo8RcV2J0KcSO7x
z<5@bmMoQ?lAzHdU4MuHmb}QnDJjD~#zQJIEA$lkIGSGyWNW^jmyfGPfMn1CZl=wJz
ztWFeSQ5w5Ckc6+tbklf@dytyaaG!Js3@M`&8i8sTcr$UpEt>_3TR1+Dq*&t=)Mt(@
z1ErR5@k3SYmsp0_KYJ<8d~%2?DgHY`-!tHsp$1JBFNIt{ffjNP9nRnpfiu3#0KXYd
zN+OsEUy-;k!$kWt*-ugGdwyofKT-QGkCi3BW1JTHp3t<oAs{5Icm|5x7e%POJzK<;
zEO*;{+~R1-TQdP2G&W^U&@h>^J8wdZz-Y-2I(@$f+{~A`%~Aa5-F8SQKrsoP$jvby
z7LEjMlmhr%lB6kui+nSgde=h_Cqd7mi4NIib&${*3MBS;5k#UqoWR_kS#g%&fVGHa
z7(+V2chU9#PI7*GJxu-oUHIspIP%3!coD>pKpM+4b~44)6JDh|824_E#}xRIcnK^`
z8w>?Q<dLJw@8z{Oqq2-QMT|C5nHRSTsxLbc<~p1fiZKtZ9#8SUMNm{w_9F;5GFuBa
zooY2aBEBtb0jND@05Tx;j8=}t-VxJ^#9Trvl>4=(TH%0%viWNKYw-KF5}Wb9mbh$S
zzT@&UUQw}!ro!MD3x+StUbz(RAt}}QVnx^meX=%g#PB68t&wp+RNaq4q56O~7(z;z
zNB@7W|84n$E3f|+HoQ5v5Axq>s^E0}@9s1ASvl<g|MZ#jsQ>*N>VM1MJ?ejnyR_;j
zuWB#<b>28@OPi1Q6IKSNm0ZQsbcbTzFjRl370fpQ7_E1Wj<+<CD4?k;05F-84nGUU
z=zt=T`3-jmydCK1k8Y>`|Fie*ZEYOO-st(8PtjR<HkLqKBwvyPh`a*Jw$8@DD}<Bm
zm9_K;X@CLUwnnnea{S#-UHUrHGa7NR9S5Gol4kne)z#Hi)xVkz1~*A}N$vC)-3$h8
zY+@UB8B%}f3Neq~XLbdvz950r!E~<+%AJP#Q!6dtoa3}*+4exNl{BIJvMlJl=-~Ga
zfgC%1A|-X3S;sTm#8_q_{Ij}hg-w#LKj7;RJ7xzJA51bSqSXFL6|S!Si?1KUzW$+f
zjhGt3x`E40Pz}P73{b~m+Qb!Jn(KphBSUKBs?`qN+)rtZzMR`wWg<q4H@Z4tJT_=N
zn#J+dp=5-fx&xYC0vefR3D-sHIuw8pML|(+ammxr%hmGj##cB4?2a$OvR>L(FrCi$
zslXlE9Xk2n6G`MAWo|C57zWmYI*TdmMm=r&{c)1ZLqVHLD+c4_J0WY_h%at{n$Oz?
z4RVpyO7lf+G@dlJRLrU!P4XKXHadEH(0!^72^++$iYxTNi7k9ePcBNhTYb7>_3;;3
zR>6K+EUvh}6~=i=;T2PO#Vy>_zbY|G&6DCrp%SaKvOuL}Q)$`TqjIe9vcGU8|9eI!
zN$DX{dbndcK@h3FJc5X4$9}b2Py|a9%A^>UBy;w*rZT5BPD4zyD%7k>db28bYgRdL
zvno!r%G9iKdb7$4G)u9u9?MY_wv8l!!9+nFdz)022)BrIo4#RqH^Efy8{@qmCY*n}
zfWpF}E-lZVfvmdhWzo5Luc|Q4eO1&|f5fCLNoqLBB{fZ?)O0KEo4GISCH4i^{nFhR
z((GK<mAHBe4%}^2Fke?BR(1ySUVL*vv<7PE#Jf4jA;A{@jB`2Jt^SN04<Bs&jJoZg
z4QESac_y<w(^;NjmS>zrEhsiT;E`%eONkxn5MYSzszUHBm7VJfeymo9lk~NOX(&2w
z``V$;PcEJANjly9mjp?=E<wpQFo%<@>?E^ssL)rEI_ig&bxNr?rMR$VPQs^7LL05@
zbE4v(b5#7yDeqZ&#m}9D&(jmWa1y>qPxzCQ@F#Cqn3PwMVqy?ono^<w4RexI^l^-8
zau<w4s|Ha2y0S{^&ZcVInXfA<bbvjWNx}jeWa>lG`kBe_XD`@K=K@c>(ke)1T!Fs0
z(lSYR&L;uh9X~%0CLH#2GL3{$3YN_Iq$F2G(XK!zw8J3B=>Mt`W6r6hv6*1hgyI5g
zfzA?X*~)3z%9+_JY1t~7*`B6ldzzW;Sz5Mdnc1GFWqY2P?L}I)7n#|9O3U_BJeyu7
zKl>JueXd<{F_SNC{2Z5Dt|Ymv{+vND@dI8AGziXQLLPZn;bi5a|Jhyr;P8SEF2<#k
z@-iW>O!V<~$!%G3Im8+Vr(9~@&$*iCn~4v3X2Lr6CMG5}dhP_!(0g{Qd5hM&Yvtaz
z?tg3gE9^Phi|Dh1NM&#9-VH(R1UuRL40N5gi)WA9#W%NIXli)&04(*{ch6FvJpfC6
zb{{PD+4sUyvj{XUBTp~Gi4u`xN9$bT?JBD(fT7t^a;mJG=hXEmcz*AK=Yo4?LAjq5
z3pSCJB=r^J<o7If?|dY`+wuvr>2jD^gJ{j}nbJ_(;ee8b>TXQoIYfnLF{z)4&zRH?
zuC88H5|Q`fyNldVJN<Jv1|Cf(a75kD<cwpc99o`~QFO;pk6_socBp_gRbgkrXleL;
zM-go9iuINY6+@@y)a-%4#cJ2q>|j#P9%zpaCS^UX=IXfyNo_(o8fTQX8!O^SJOA3Y
zbLq6cqP`T`C%?aaThuPZXNX@yL5W9WgiDekbX-iPnCm+NU2SOnu$ZeLww6db#sEHr
zcGtMwKHz~^F->nV&CFt&o??Q?tFu;^1=e6nR0B&l{uk?ej?THt>UeWw4RvuL;*Dyi
zB*sb5SS3rJmg0B#PFl^Q!`JP`;X&iDZeSX0U&grh-ZC31^}p9D>`CXf5>Pq^{lsRI
zlOVOcWyWnNiGJ`t=s#`K;s)lfhE-ZMmW6s@%nkGB#3d;ntb?Q1js4m|`?sTJAvWO6
zbVP$+gaSzrTS6nSSiuWyGCG_MS}~hUBiiR{=E#cYHpGE3NN;|OxDIaF{DV6;KZDA3
z^EJAN{LSDrY)WGCl#oxx^28)*xhQA|8vQ^Lk0y@ogf;G-IL_kC<zD#+<PPR=54un4
zG7U54Nv84g=&*HHmeJ13GujI=jfdbG+utHlq|C%uA`)JaUy%gb4@?6wL$$obH8Ec9
z3L=U8BOSB4DorqFjHS;uV=JqFY`S*w0mow4zC~$q{2h!uKZoH{jV*PMxqa2mct@K4
zjwH&_YfN3a&{17<*#?9P-r0rf!AkcJOZJYg2J54nKbLNNx^IDQ3esbt?e0SEJ(ue)
zkd&gh_3LgS&F=*-#|P3=mngB*AIZ3u|1EmqB<Lz+Z&~u7>z*`eb?TZ1P*ZVxUdeA`
zB|~ax@IYb?L<78oByR!@Sb2OMtM$5D8g%8b@SQ0NSWXqy(CeeSGU)nW@A2!dzU8vm
z)hZ3JD!rblMzi&=^Yel_qu!zLJuZANUie-?Yn?3*YP~w*t3(k^Nthc(<~#cTxV+j0
zZL?~S!)bd`$Csq91`G!?8C?LOMjqy)O4^qFu^f&(>pk9+;iS~`mN-e4=xJ_pr7qqD
zBj5g_n8ubCZ3XUyrjv#T^yCCuiU)GkxvuO)6g}p;vRb|Fy80j6zfxYlO00{@Dl82w
zqPT|c8Ajjn0i)!;4^YVvAkmS!3KlOk4(}r4HM>j3{Kprp(=zYSYu-fDL05lB{UnkJ
z__i|?{lBwdNO;cO%h~W6GY*2mXmVrnb~t7neE@^tP)>rfQ};S#rK4zCgy=k*&^7c)
zFw@!`HNge7N@Q4}li8V!DRf;aA=ZN)BX&!4Vu_P#j)nsJrh8&;<Fh$2&4##TN)lyx
z>D+SE=E6AU0tY*=oo)oG2JOVqUCX(fORdOKlUY?XV}HJ<G9NeLEa<}&`^zC+5ymt~
zlw&z$5><xmJ{AH^4!TKAn1?Roy98t%bW7Kj;#yeQ_F7nd;~;!1$M!chk6i)?D<yOZ
zbVxwA%r@Cs><#e^lcgXXsR|Xn>5>v8rtwH)x=2ZI?uD4!jAML#`>d^*1F1)EnS4qs
zB^oB{aC@f_yS|4J^U)x6o9K|a<DDsyN6@Clc^k!T_Xj@f>=tZ8uT8?=GU2kG8gF-L
zb+oOcmIuV|Ove#Bn|jZ$?h*G<$MLt*aomwe{-!#Pt^ox2Hgp_cS~cJ~!G9MA_{Sso
zmpp<$l&BIvjXT$W(vJGsNANFs$W5FJ+i^=yLg=}!IETV2V$L5Qr=No4eYCqupXB`D
zNS_Ju{I4MyW}=~N+-pEB@#};+CnNo(?AlBBSm#Cx+JO+BJ9e~grTFr8gv4e?WhyB-
z1z(x#byo9LmoUB-Mo=!Pa#+$J+q?Z_mq85VKBntBmLXLRI6Vj&GTsM&;3Qhv41_0b
zgmN1`K&Au#AoRW3gFRI_DYmv=(uQzAKpnL%Mq6${N>L+fLPZ<=+MZiCF}&v7x|<8s
z-86OU3yf;NZbYX<Q&qBK;OH(F=*gc42u4*bR;%>0WHMM5W;>hpxQGzN#tf~r`&M1e
z`E@ltbv5VJg^N;6ae_i$2GMla98L5=1fPrLUNkM4$n;{l)0WqH@kx`POw1EUE{sDZ
zOJrdvaveEc0UAlg$zN!^$9KqG_#G<n_*c;q_o&r|Ogg<D@7V3p5Vq_745SH2wc}72
z5IjCa_6Z{vBhla^P+50cpvDxpZ&y*%t!P3OHPb4ZTxzwW3cN$5)NtH{jOY!=YJW>P
z%c~9ogvnMfiR{0Xl(xi*vf7ngkR)Fyfu~LUZ->ZaYwK0Na}g~o{q|lsMH`>`DYmq5
zTykXjQmoa`+pVp)osS4YQJd~_O!Zbc2<Q++uMmPO_Ua(@Re6^YT%ftU)TuuEk(S>I
zh7MVA%yh>auKHds2?dKS5z5ri+}+v7aGjDV*5bT!j76S7PH-eu?X7%gRI~8(T!<0m
zVG#6MBQ2K}i<#a`sU2)C+QH_%b+8%hU^8b26MY~)>RAcG-v`&xNH2smGW4AA2+USz
zTxLQI&d-~{d1138L9N$Ev#E*^L*+n5WP%7{XYx*xc_;q7lSE!-*-l=3<jWgGv;Nf0
zZm~UcnDU=!i6_d8mM7IcA@5}&Uj09hf8QkjAAy3WfB<IW{~_4lix~dj^QSL<dc^<x
zBKUtROEySfmx8pEOUbbA>!Q~03rc2!6kt?oH_$y<k7pDFNkoj<hfmJJVP|rK=k);s
zF5_Jb4ARcw+r?ni!_e(i4={B^ET*{aQ#69>xbzyQ)<nB^(abT<@D|3oj<tzE0cB$l
zhMe74CIMmh5%4WpfxCJbh9eNa@qBn05?Aks5rHDw)eU>Gwm^;Deg|Zo$-1uz;m{rg
z)dXHvK~^iU0R%m`U@L^|TyNB!VKnHDQUyY$4IL0*fLQJ!NK$5;HTa+10qWCX5K<<>
z{zZu#T9Y6YytbiJ(9pw{jE&~9Mw19CC9CHdVM9}30N4TG1wq$CI|c|#tT>M7K?5lu
z2E&#k@Z;!w`T-|g5)c+fCiinRtxiHDtqD@kkm-nUosm{CjJl4ZF5vrd^XOgUW&Net
zqloWf|LFMlX5;mnmUwe?@Uq@Kp_uR#>%Fme+B$*<|EQfn&VS$wjuc=2?YN1d<&T=8
z@%H$j0i{9l&DvqBQ9pr|lpX0FYn9oStb7>0{cXLu{|0`_xboDLSB)0Mv3~`%)x>eF
z*=p>c9@LuR__TR^bW#`4u_f`cak76<YrL($T*tWbhes4Jz9mlH)D8|}9X<N>u-?SR
zJH6biQ$YKJI@ZMfeA#H$G2VUS(0tp6!Gi`Kl!OeHR)XQELw9P;-%GO4lluQXg+%aD
z#-cx25`{UV0`sze+N{6DMpGdBy^~g>b=s<n*GES$X{=A`&3BFc`U!|12S+C~V5cYb
z64cSEQPofejMm9Ee&0JiY0$WVOi^z(PmeL8TM;JtR~Q&*WsPDR;Q${Ua&KXfj+(z?
zsW=Wa0VVP4n>svi;+WEC)o>I}VAS?ob~30C2C;0NQ;;Y@mu^qnwr$(CZQHhO+qP|;
zwr$(CyYKnuM$E&^MC7ij*LumwjJ@{yOx%>PSM>1motyMVSH;w)>SB^S5lE{Y0wMCH
zt6x_!l|3bUMmz#t?^i*iGYhv6Wy;LE5nIx3G1nY3Fv<6d2`WNDZGL00odAtds9wLq
zqycbV0>Gfl@D-TADegFe<H*=>B2^wP%JSGrAmIqFf08@dN(yJ)KK7qPvt0-uf)V@n
zt}A2oc9wQ~b^QEu^Jy9AgsY$@)6mLPNcH|yy97v?t^7M$nk{7*i)%@5@Xje%ySR%%
z#!MIxOAX*Hs1h4){4}&HuqTj(X{-VNon~LF!aaU=)f-yJIUF*y-YnLXbtR+fwIQ^(
z;o=#xVXVP3jalqWuvMBGvfW@tYA6MJH*wRjt}bt*!V;kr2@x-Ot4BcA^>Mr_CN@3%
z0q?7bFHxo6PDD=y7D2jxow>`qX|2mM1W~Ncwi(^H2(Ch5M7_7;`zfbT$*u^m_1kmg
zfGAZ>MuA=p1YDR1owMr@SpBlLozw8p;gVa@z{p#%jaal%BI2H*9BUN0gf`2Fc{2WI
z3;@4^?(A^iql2g_a2j}dsLs347}ZTsP`s1e2h4mt{MjIIOHz_n0E~@(2^74w-2F7L
zkayJ894r_WqR+TH-gOygg?zDMKJYTUox$;%IXY8yHpQq$du$_nsb9PI9Cy`Hk2No#
zJ&fO>w6r5*F}=m;;m{)1ri+(TXh|(<q4RJ<+_J@WOVqk1j{6*I>x@f(Ev2G7>v>PS
z8$0uk@FD+Yeag8O45=o6nw%Vn;fuR3Y-FV!<G(MJPBmOHZ3f42XQR3{Wfiwp_1QID
zPRK&hf=4Vk@F=@3wpPn%+Q{hBS?_ua6yzT%t*b-pG>Su8xh`m9ot%9jV3{aR*XmSi
z)p^1Rjs^(F;0T6Ro4T9H#y0L^gPY&WR-D)FICJuT>vJ!fvXL&+fTM1ICvKVCt0Sl&
ziTl9TYao6i`%-%<=DLmJTuhY5BSGM(C~jZ9Y<yoyUJU{KJ(R*cFK(QoMc$wpQNh}g
zf@)K>Vg7XmtQZ~l0l<3|TNcd}-}qWl#@g+DFl;PeTk{idXz0j)Fho?-#pVuw5S$Ex
zqNMxKlkk}TwM{{b^BT5!i5$&2(AuVWprFfqOhMc2^gP(yFoQf7TQGc(mb{2C)qNIz
zr2Y5CeGYcvpLChJp5i*<ZTB=hs3gmbsw9JT{8!x<5kJ_^LjttVL;X2&pQ)M_>0&iU
zkcd!KWH$}{gWjI?9f`9(<zzQ2<zmmmxEj<t9P?8Z+2$vTvdoWEWSQp^5nyimskQjK
zNx!#Uf9R^Wcz@iEEsozGg}-i3>2SaO`bOV&`RaH;0cIcQz0`K?a9AGM)6SElfC2I@
zRbO&dn>dYa3pR0Kc&0AR0fFfMs=50xg$s)uUbw-ZV&^u-xq{^+U=&*c3$a4X4Qur0
z4Z=F2N`u#1FP+;6%WLJ6N$W~+bOmT|lttOgLvV1!84d>{qW!^%^#Z^i_v)|r8oWD)
z8d-IY+j%>3_NITp-Aal-P5#lL7l1pwfEe`MKl8MMe<W}7h+g(NxUO5-`Lfa0{Af;w
zq8rtZ$lvXZ@<@!t!j)n*ctgj6G#lTVPG;NY^P4u1!U*aZOvkNl7PfKF++V7zTv1!<
zo#X$kt1k;qD%otsc3bfx*>WPPk3jIa$8&n$4mGzATr)Hi!)E75M`b&bn;Bc>ugN-$
z5!ZXFg8W5L_#&J@uWKP@n!~XZk5LLh@A%r3c!C7#Y}Gfz)Sa@AuaxCcQ32O`IO)}H
zp$qB_@gw3x?ZY`wELU-0h!tJv+1T=(oA4W}?#8a$nyRz>Q$>ix;T0ipI8Hkr7lz&3
zV77GA_JY@FMW=Lnd~vB;ae3;>i7!ev168MhFZU6+VLOaIBg(YK!cq${LR5$rGEG>}
zSpBVyJZboN$nlHt1c>`?1U%!mu3`{O-#o%s(V#llTxIEP^F{b*qY-da>k2t;tbB!9
zMr*5Aqgum1Pdj>mV7qc06SYB#H#}U~t~noV%Oy6_IHTUKNrQGh#R~rynMz(s;GX)&
z<C%^&&q6SIzdANzM1h`0%5$AYTE&}1MV3Qj#*#xr@jok6hQ0<_d0%xzlF^7{CAh5D
zTQ2E6w-aB@oij5uo|?lhi{3)5D#eD4-vOM@yP^)CMF3J`oMdGYVx@m5G6lszb1KC)
zvS2&Zrx+-)KroDwdeHP)PWGn`QKgHN$!nqx>6tU7e+Z2{?Yp^MfgVe?q22H|F(`0Z
z_Zy1nbwBuJWi&{mZ!eg?(nb(aeK!-7913(ra#pbvI9+0R6KeKbN`U~Q<9h8#up!&8
z0K?dF;6)~{Qa&iX9$RV*nFh&E*U^xXL*@LXs*?3w!@`mfLYB%FhcTqoRcBI>ddE=x
zxMCA^Kb8jvU5`rIl}oxASmF;RV3H*q2zSdU!F;EDE6rCLk9vcFB=JY}+?-$8B5ZDS
z=+QBR8D;NJI8WD^>u}R^nX|J;>rXUcjLr-#d?UMm(yFz#^056x<XMcehpa))x-;|H
zn0>|NBZrn*6T|Q`ST#cEAjULy#w4TDJ8FhOGNuwMj62h#p5_U^DePAUj-`Wz@FH}z
z<6Dbn3u=uJSK{?DYeH7c{T;`mHe47?4!--FKd3apX2X?<-b>?@Zf=?n9rE}UvvYi2
z*xYn1^F!tR=4NL&tyq&U9NqK;>><c*Ke%b}W(4y`aP=vhQXKs=uyWi0!60*Tq?%0G
zYLE#+s<{*uhuq{>v-SrKbHMgf@8G3~3EJE!VxMy&Zwv{|ri6_A90LV#dxiRTYcb4h
zzOO6^3@He7dbqTRNa$y3y0{^e0=W=KwNSLe`hCOSqO)}|>5DSki!u?2Q3f^kO=ZH>
zRyA^^Ml6B10Qpu=u?*{6_NTgQWC}6Zg|BZ_nIs{23q>i6^P+g#5@fWZtPt@dr)oV*
z#xj(M;zWxucs<OPI2Cj~z!p1$sou?jC!55gNj==WS)gcE2+*Qe@WPGPpjr;N*G~bn
za0gn)r!xbEU(|Dj#nmFyYB5cWV52EWoc$2E<jBn7`nOaOx^^`BUg18B0U5+>P}ZFn
zGE#ijl$V7pRGWhZwV}VnRCoywwi1oEhe4W$^*VgqKAXOq?9@x{SU#KY=KDb%l6*QF
zw1c2%ZXo+<FZEzspm1CI@gZ#J+<>sYRQ^G!geu{jPi-uO(30#$Od+hHLdUs#dtU}`
z#vqffm<cnG4{s~xt{B}4n$Ac3c)fOymC6^-cUz^lx`N>B*ZA-cPWk6YRd~{9ZpVRk
zugo~JHJUU%iG6wFA7AHRGKUIF(~R-cQ3j@2_XdW8y|{8@B$_Vm{!qgU+?)?z8qN1m
zb^0%^0t^$;Mc!w^PcxrxS-rZQJ|VknDo5n^xssUQB7rM|5|)-HP9d?TmG`AmD?(X<
zvrwow9fgN+`yCA*z14WROPgdhMW)v-Ju;$Ohe3<0F+ybe?L&ucCuW=dPT6XZpT+2-
z0^^xbH&2_#XeKVL$(GMCVHdq0CZvb9+l*MPehN6RT>E$7+Q6<*upH_#<c>kyICA!R
zTdce|J`4e}=SlspXn$>V+z|>cvyp=$t+%qna440g0rrfoimhYu*qAQm4opo4Oh4?I
zeCji9rc75j?oXGg2$}3ytxvXY04;zQ8u~?Nr}ppzQ;FxydP%aQEwQ<zwh<n(<z!zY
zTxf@%N>LYk#x@TwC(gwk`=2C$fuWvv)zQ8vVzZnR|0Z*)69mmE0%;PzRm!JZ3|cu}
zj9f23qa3Y~KRM__S!eDbLZ?oOk2$>wcE15p`YMg^8TSENT1(X5*}S(M6!{(`#B$ut
z?bU58o+H#gXFdOR^ZZzKd|j=}L42*A#=X`3@Ac~c`(cyc*9m)SRomr(`gGY9!}i>E
z+l%>7l`QZ`*w;CZ_Au8+*Ocp01EhoO02R*7`)dgaYF`gC>eB%QWHho3yxHe$C(0M=
z|K)qgh~BM6&{$G5ndW>lbNjM0asmlW{NrD{?bTGzX9MjvOvWwpUSPA+LgHofC8y)T
ztyK=p^~x&@YP$#EWviE#pOvj^4KTULla|M2@*tw+VtC1^d-H)wK}EDABJ*@}{pmCI
z4CO($*qdZ+RyAVxUD??8N8mnVr?Y3mNIb|CCJnkDJ)J&qpvNtzZ@O96cKgoDyuQN@
z@|W7AnxL0Zb4pTBO^8XU9IGrNf-uMYXQQ@2xzIs^nHs{^!^N}xcbeKG*9^_ZI3Yrm
z8sRF6Rgp^{bc5R_Aa0Mnq0j)!j^7JiBvHoPK3<u3KLpc(K?Hj#zR<dTk{q?evT#nZ
zkuK}5$Kr$nRLWRgriV!u)qMb?;>5q7f8orxI`#aqRP!XVJDrFjJ~8h4-kES09~jbr
z8oAmFW-ci3VvFuz{2b|QCMk%_oVzIxZ|-rCtPz!|>!7HgNkUAVneAI<1`&Fbc8vM(
zMJ%)#Uy*)|LfJlL=kpJIx!H1D1k`6D*n?FNZ$YZDfKHp<RV5FC@q~$2CFITlHjU#U
z?r9?C_qy9IZ@<YBiHAY$?qQ`Z+Nnvh+R6N)tZ&qViL~zAFPvoHBNJ)uCD6UYNSGR@
zpA*_kAHjEB^Nq32_iWLx&&T4TqxA3R(|glt+&}ff{m<{=@-Oiyhb?Xo_qLtM1&VZL
zm@0y^_|Kw(Co-b{nwZ{kX8Cs)$R@9KLOC8f%qN@tj0qUyn8H&&<W-$e<_I#V7ogU%
zJM)!@<qUX;40ni6_xo<~^a&V>H-BMVdbFv$3VaP6kN;!C{0McqU-<ZPt$0qUQMz;J
z>M<*f?p|(Uvt1Z!cFL5riO<7Yn~jHBjx|_2N2Qk+^-(970sNugBlo9I?E7YW{oM81
zowpc^jB{NgY#FSu#=t3ZaWhdfA^!w&pK2K=tBdmom!+~ZqsqieY64==xizD3UnU}(
zV2OrmZ$=~Xu{(AbZGFTl;908S!%fDq<mI^%z=&@Om=B3I%^tUEmE;bLLb+JRw2w5J
z!@UlHHVs--D;^njOW*_0wJwT%>>Z#0g0ChIdHIb&;I5+ObnW6Bw@)uf=>QKMkW2KN
zXj(tbN+b50-k5?>RBzQ!`Hw^km<xawZI<+PLFfn$O`_6}v4k<*)^Y!=p^}IlW&*c^
zii~9U+>9I!*^=}szWd1OLl-l(8Vr}JMJe<}TE`4R8C;TC#`(Yj)K~IV{zXUG@r0R0
z@1rkc+}-=pC!P8ks>I8gZu8v|suas<V+gG;?W~M;J3UPjcXN*Bk)ZBlgd^8UL!o3I
z7>~?aXES8hB|t{+lSK#z?HvUO*UVk3FPP3I(oRk&xb6O;2PVjpV(!EbjG^oZDAl6f
zj%!xlX)A3;s*3%>=ay&2Ma~Vg`e((W`p$%s&NUk<OMDj5Z0zM<9dYRN!||H!_L|1T
zn=PWx23N7x@7x^9NZ!-KBWe%qI2paG7j!h^%Zg)^OuJQyO1;x|2r^aSsV=p>oNVf`
zq$@~BzNxBw_PRiwD!?5g97O(qxt}q8qg%mDBU>LjU2W6PwO;ARVxR31Jhp~#9T6O;
zQJdBVsKlxA2gNGJiCCnP3L)h<Rum3Gz-i2Sf0_w&Jp0`}&mSanC`0(^;BnOmI@-?n
z`ixjs`v|(+RTxyw>JZk@2$YU)qBs<ZN5|4^)vm@vnPMc}Js!o&WA6b3bA!#3B@3x_
z2BV;1jwgWeg`Gbpddc`oKWMvIg&toTjR)P55tVN620yn<7OYlAUTC(%7ksSwN{$RA
z&Pkp8yowsl`U`SQ(rk0OXkV(<bX~iwWv4q_CoxAw<(W;cqp+|J7`xk(KRPxXjCNEx
zutXtCgs<azC$WY~W!=)|s^t%59(B;Y>t7`%x~We})B_8^?$45givnwcHZJE}pf#2o
z(@U}+`Dh}xvf3zPFHbb=!0<+s{Ig}ChFyrlT3Oga4K&((%agtl2L$p@B&+iiJ-9#s
z3B&Fu$|L&xzsn#7b@uOPngL8U{aF1z&NWD4f3-byJ(_>HH~fB3U-C?^@A|09Mr$@E
zJyRjZWL)F&XHkKtufCCh`5D&|scy~W<~&yIdN}@0=MxrYNrdS5lr)$sAe(-FGP_QB
zrX7ZdNcR!W4y|vDZK~w;DZUENggC&7-(T`=POE6<JwV9WY2LoQesWA$Vu`9rE-{^s
zJO`ok4kR|yBL`F~q5y;7MV?1S*>D<hl)<BpU)=lRsA~4%L&;HiEThY0H|}(qOlLRR
z+5vdDcM>%9>&4ij7NV<fRUHpUn9#OpgXZ97wBJZlcH+myif43su57i*gGfHgwkje+
zYyzv-lUQ4wz-7mXb+uF*KYzm+AZH)2G!)&-3LCAi>{wnWlsF`tJ*W06^=MiCldCRn
zb+x~ex1|Yx&3DxiPmt;LZOV|n$T{8DI^!HT!yYYL*gh|x+w9fK@<>vZ<%cY(lu4*8
zQ+O79C@VL$qf)Gf_iUovo<y&8Z4tfPuoAt@Fd5$3@)+JG61~8RUgg*g?Tm^+s~Q|`
zY3h=<$cp0mTO=L)F)lB7<8Q;~_yUQNqn1nR-yt_%a+$#}dd@fv|B0w&(%$xpcO3rI
zN9<?FwGDHv?XW%<qF`ndXg~v%f$xLrBH;SvNA(~e{|l9M=4Ql?c_U^w23jdCb9&BA
zQznxo<@C1>`{&12cl-q#6@|L<c)uHY^s9V?ejTSt(01XOtMBZ4P*weB6DK!aw`xhY
z)>)(%iZMTA<B3)*YsX`wSgjP#h)hXeF)%&3Pu;W-f|6a63wU1N4w}wiG@%zeI<k6b
zvT91<f|;$PO06hWB9on_QJKuImns#p*^y5Z|FzhC_5%+nZDKO#w<qZ5bo~)6rTzD|
z$t*?eD@^Qr)~)*2f(5H(aMj(350!wv7GOfWJsj3ts5s9`0*(3K2JK`Q;Q$F5HPF@V
zJw@<{85l$;9B(2pK%94nhu{Q)Gw1j9Z$g1+uODa2E9yg83N<{_C24XbePxyN6LN5(
zb2)^s<Q=L{(c7?Aid8h!J>c6)mmh80!PSYS!vGC`9F5KY#eXj8IrS)1ZG}Lz27jm$
z-&)s=8H=!UwN#6{2Merz`*{N>T1DC+l?^oMU_!qzX`oSvKpJPz#WS!!teG<BrhVKo
zMx)lvJbw>LQ0J1FbQw(g*#N8(8P8B6IJ?kK5l&U@S0sMNeH<Eg5>N7=)132zV_m<i
zhHH&2qRnR$wTx|K6Zr$bDYXWxCD;-NPR^I&FQ@k>D&?{~h?)B&YClA25raKEN}BA3
ziVE*L0h5C}ryTV=GK><nG&hj`VkUQcG-kqjq*N4GfgU@s51|-5$!*pIyNNr(0uq!Q
z#u7DClaXJmGnHRU|NY2v)~cS`y%^7!`R7)j`{+U^^J`s(D*XNr1yR>cO-XKYqj{Bw
z^V<d*o_vSHUZR?%l=Ck6`V9_=TzWuryz9L)Qj+aKsZ;ImW_U5@mRBQp1wCcM`e(%p
zp~GRtH7!<D$4b#jj#pYH6yk?Hx~yJkP88oSS$g!HR0BD^Gj~slS?U{sDB8VHbm&vo
zlKwe`F0v%qxd9-U8}&q?xX<|JeZGF7K4Zcrhu$X}mk&0YKk15eX(neMQ;^F(M5QKC
zNVE+ERt%KsN#qy*!^;7n^hAAWA0iTA^sHUnqj?-w*rWOdG`nZ?c6qX82xL)gGxxqM
zXpZl1I}`LrUno!f9RsGUNVPlvy;UDdvMrFY+OtdfN@~aKtdw$KYNgkAA^=EpsY_v?
z=9XXOu`cz*9}*KB0Y<eYmErLXRxO)N@0Oei^-T{BD=&hT5BToon2!$|y53}nPsFHu
z1Jx6p)CyS5&zd=l&j5(!5g1XP0rfAiR8Qlsh5^g6`c_g=Gj?iJcYSww>^3#;ndxN#
z(*PBDM}4%)=Heo2?yy=nDcF`A5Hdj(!6w6PL3wJ|nR7MTZuDgZYHYyzSdWBLsu~Qs
znsYiDg;<6O$xxd@{PISaA?WTVsAZa$y!~U!cnQ*U;B#9<Im9z!08$5GWVVG9mfS}a
zh#EaNgn*$a!kuepx{_TwtuwCg`MW^byk8ik=o99MCkiQ3n7{%?LPnj%{HT>m50bu^
z6lu>6e&P0@NCw-2>>zs?g&<c{aP(6sL8xnJmr?TNPRax3OapzPh>LN6o6!0R()65a
zRr4)Qs7@T_gyGgpKQ(y>@Cdthj7s@tXBX0FDo^=@9K*P+rgddN521I3Q|`6KPz!5d
z0JC;>d)w*<!*NN6Z5o))jpf{2BcE#u6|JCy+AgY~o7UH7+`f}%*RW4TNfS{O%m%ld
zyi0WyH*)VdsNE>EUb9+^_RQY3bP`{(wcvHsSJTte{$hE1X}PW5`C=Lez0IbA3n4Sc
z^R04rp$|;8?(S#u^ui?-dIMDr)Wy?kOBFvAgzW<pd^i!jSV0Ee?ARPmw$*uhdNovW
z>z+_sP@f{v=m!2_G0A~Mp%4UNDF$ykG`q=411(hx3LBm+t|~@%A8rht{PD@bbCrU7
zPttME%yG|rA{n~~(i<2`V3N^l<(izB7cQ=~=9X(6HCKh|-r6hmEM4w#O%!!{Qq2&$
zTBvMjGU=1f7nkUq4U;|7XqIXnHOczQXB)vb|HvjXq3KfNYCW+tU{lej=}Q{Mu_<$e
zWK}3B3yD^&=Hb5x=Gb3gN7J(Z6uTOjh@Wtc8wU@L0HQ(7hfgjlYzAq#lo!9qQBM6m
zqr_ILC`_?SBwfiZm$jas8ZKfo#sUsV&`kCS(sHI|(qJGw{43L-<kiB367#rd5G;uJ
zT?4$l5l6Jn>jbaLN4JN*>?Umy68`2Z?XU<HsG}4cj3yC{>WBQW?EZMPgEu5M)Z|SO
z7q#KF0{&2CsvnWkbt}zVj0`kZJRo%}RFg_6A=dI+OHKKFANY`0=K)+g2={dupxu9c
zale-IRxdy99jt}lawIhccSC0GOef*}3p@FKnTN)d<J0Z_N&ECalQ+2!{`GVWZFw^`
zpj|j(16EBIR?^&_kPqsXE7uKUR!7J#kqzmRjJ&v1t8Byq*EO6~X*IaokPIbpQ3Lmf
zpBh6N$CfnQTrsR7I1LC=lR^}VEj#^06dQ2|RrCx=AB!Fz!!1XMrDBQ2lG@#@SDx{&
z0H-yiG9&GoTHNP_pact@wN<G39c)Vh@(rO`kX}9ulJ`Up@5v#{Bw__x76D9nR2Rc1
zMQk4d0*)9~Sl*}Df5glMLtLGSQ}*7ygHsLSTkVH6Og4}KfgO90@Sd-im#sGQPLrGj
zMdYw6gd@1ViCeXNBrZCgb&lejL6|(MR!Kg^sOd8f_bu<wRUGv;TE)q??&oCMovz&n
zXY7p5Y>d)(MrK#sTJ}+s|CG1g3Mzspp#4Rp!>&V__M}muxi3D0Zz9pxzcNj$%Cb!c
z2Nh#cwgsZl41bp)mskYAn?<7zZB3a@Y!Z*U2eDK=B}R81AgCqLH5qbD)!u04n7%RP
z#L=o2ZX}`86D)1KD4_e^v8V}y|48eN*rTM`c#v(|PBW}7d{3geRM&3}Q97hK_1s^7
zsVSe8x+&Yzj8NIJ9aZ&m$wsB-42KA2;7DB{Ky!r<vGk)>SQ?XSF0<S8YC14ap<Hf<
z*`*ic^sQ9|)A<dx-UW>h8Et`x_nD&+kQi>MZ@j(>4w*&wh($*vFW75bbuNUX3I(UE
zRj%-A{=+nQz-??ZF<~mKnYsK@(ZcxK7jbM`7EhojH*V3c=vo<!aoNSDeGC!odOLBi
zwN`zzsQ+wU?a8d_!!%L}g0@%Ga-r3zx}NpTAmX*#l>v(0zZvs=1MI8U?<aanNjd&<
zeWlA>>Gbab_b8)nQv&g%F>H;LwoCb9{_5H6RkGG|W4^6n!V0zN)a5$o?z6V@*`va@
z9$uH%mj+S(ZVu?l3_zm~*qMh|k}X)@(0Rvt?%Y@-oi*|Ikd5GajK(60TK0$8Tn91R
z__{Te)WoPz5tW|Vp+=O#FE6w-!p2-Ng_aJgnJM8t)EE6Ym4SVJ(>e%=R01<wnoX@{
z$!fsJt8njHGQ=z-C(Tgw2s)YNgg71j8DRSW48c$)ePoRNh2rpNiD}sLs(+s3R@Eaq
zeJGKsI9Nb^#IkEkD@T+HQbS>k?d`VGCGCMb+rOAV<0zJ7`5G19V%!scGdx;SIP02T
zD0O;9ht$Dja)@P5qDdsN_K04P=dNp2=_G31p7G)}C|9UZw41mxOH}mwW85sQvoc@j
zqSA1)q*h;KB!}x}lHCO)ib?myRWjkaid{laCD9^6%pxff<Gw0VPNh3T@h`x+rc$+m
z8cH6^lnu0We&@L%LWc~^luSv5+jC{3p*X=eewwUymFlRJ=};s_hz8H|S<>R82D6n)
zW<88PkjdLIJ47EGS7Fj3yekN@$1zi|&UO>QRi$%?D5>_C=bLJs<jSiv(^^Xlh%U*r
z{wgoi+Vc!bW`8Xj!?K5_d(OtdtkkQB>vWga)JcL;DGn7H4Pe7(^PrSO770T~ec{bA
z$AyoOIEV6Q5bZj`ieT5pyJ@Ki%Z<4K1J#&Aj(EL`8^u;g%V&+_>%<P)e#ej@68DxG
zW=xkGlzwd9uIi~YvJE>%@D(PvBQ1>)N(T+sjLBV-1ymm;h%#Mx<7W{o@}-yq7GvC*
zRk8*C6hpg2ggJ$kxy0Yb(5ZynFAb*9Q7d)FX^B@%QLQgdgA_Q<k|_lox^1IrET_BL
zZnlGGn>P+Ov6#%<nWc|3oGsFb%q>x>?bugAGStDc+0qYE8$sOU8uQMqKfZ%ylZ9v&
z=<ehS)NbmBQ*xPK#)i4<CMRG)ZAx0B@NvFNaS$Q<HuXME4tqJwPW%PgcQR3bgvO&G
zJw`m*cj5doo*(?Tk~m8;ij<~7IG4L^$})RpS$J|fae95Mr~JqCXCtOl1N#Uusdo^?
z`Bvf}RF<Tx%pEa~1E*fwmH@mDRA-rJYl@pmRFju$z4@O&bPYRzhK&yvgzO+haT!PU
zPqH3Tz#@L|pO=~FzPkS@bD4>%=$Sz?5kre4PO!%kx{IS}D<qi|sUgpJCsN3LQ6p=U
zqC8?#AtHYCo|anWMsTZX6fl-wf+}v%I6WN|Y;8_BFgc8edpC3^#(*0-<#UWJ6Sp5!
zjC0eZ;i87WROQHJ;PT^n9%Ax5bQyCNKXQ}z08M`+U+z+~ce)V3w)}zGy5MHHoFK^s
z5d&sM7tkOEkx$Vau$wqg*zEKi9#c_>KIw_;`BQFOyktx~$TX{XVxqgJkgtHr{)#n-
z17)MICEC05GIo%SZzVNvB?;TchaGg>C-Aw!Pk$)2{jvK3dS4NQZ7*|Q#K@Q3_53p9
z`@tF>mjw!#9P#tg^u0e2f&RVrI6iL@`)Z>L{B3PC4r+EmD6Y{KrR9d1GcymQly3H|
zi_+|)Bi9I~B2Vvz^8|<8fM!<5B>cq>B-dSm4kI+2V)b@JB`jsQumb%nDeeZhCB9)l
ztPfm8bfw7i_kuHt|1XClyzIRrocxZLdT+VKwY0>xC-23p7~ULxK0*r71T%Q4mQ?uA
zQEP;4<=BWb0)H%Rg)HEJ?I7TKKwDLq0ec`48lb0M=+MrGJbJ2u8ktI8DUG~Q+b%$Z
zMdOVXpt1=}pd}z(6@C@U+MmwQ27M}Ied~W1Q9+-%n1>jDDGa(dByz|f6@Y4U_~a*c
ztHb{$iM%BSc$!HC$meX60z-L;^_B_JSTcp>&tX`PqT}t(b7}9y+!62<__;ufEb^7X
z6XN}|4zP8{vIdGC&%v_oCHU3Rx4Que1HB0nSAZ+b#+-%Mvc;=v*kX_z9G@zmt!gyT
z?!MbSo@(zKe-y92Vd*aa7f*!HGMIMv1mFI+V{r+N`&Hx$$nkG3J#{Z{SLD0x(DeIA
z7@I}p@cLc=NHXXkt7TBN$QO|6-2Qj($>i|PMS>sV;I5nm7Sj%kMtp^nQlW-%GciRD
zy9d$ka{8O?uBmmE-MrbithC#*8MdKOO<qd|do{CWg!hK%|E7(?J_=+N{M7iYBa+x!
zf`Zh=;(U3k*&6O;>0U49CN_2iaI6k|d##x-KWVPw-&^5=z+kw^Ba#k3e1ZCw{Hp#Z
zanxPiQ`1wMM;%a{$3sa^_gOamp=He+Jv4s4BT+TaX|p_hSyYt9yh8ST(lz4y4a|BP
zRD^_CznJ6+abdn(Zb=R#dTET+cbk*TY`%pVR^!>^;kjjrje}~C=ob<Ypt^#8DTTRL
zBy>Xqy`b>uMvXEWK$=hN>!TEQDub<rS%VoHC`{6vC=fy)PScfjQ2+Ds=k@2CvzK?k
z(AOq=DT>}821)vliD6IQO8$b<1<MRlb5tLz{u1(WsyU~~VM=+A_v+u&$t8XOG8hRO
zX6sM8qjaL@x~)gNM&JSG%!8T<8VErMZP=LBe`!I1f!!bHWU}j$qpM2go~KO)X$TL4
z=P~S;_+EcfAD{|vYxwx#<UP`n(vdzB@p6<(b^jDTat(t_UC+SLfdUoWRPKztyN}NY
z54F^GU`HWsyY!SIrH6Ti1FoY_)I$L`aTaXGqY=scCI|{1;jEG17a|qkv>NBf{8m_{
zbp>x6z~LnfeUB0>4>aYDG5PJd5zN-uS{7qp<d7PB1)e^gwGxmU?0*~Bk_QU~Q^krU
z<oyfom>mC%+tS^+Q3-B0Yg4tRJ)5EN$~1$SPkf{%wxUGU6j$x#fReBxQ2*99NlwBK
z(}VHmf_+(yEdrPk`7Pq)9qemYWJ1-mW9AZyHgkrhK9LxMteZ+^d=tzSbXj;G&V3a_
z|1z`9VBG!!Ec&{RKDh2?DmTQ`<0k)`wDK?W>3L7+2Q-fV%y;=M2JhjukJ|6ZrMwek
z)VJw!M%%Zeoscz(BY5fXH{nt-UAlFwr9P2Xvj>SYPqroq;&P}`I7z+BXcS3kQFb&I
zE*L1HGLc}%O=cZHdrgsrQUvT4D>1{IWQaB1>uWz^phPpr>9_tBlw;dsQ(RK>S9HL_
zrLr1I$abT7TV%fQ%lQS3nm=(GP~X)xG~3)&7Kda1>WW)}n~NiAgO%)-(j2sCAUT5$
z;?#8pFwR8fJ<9SU%2Id2)uuXpaNeHgV;7^675v>==1Ek5*&7h7$i5Opk{2dIEx45-
zuz=$>pLg>`FnP2BVMi=XIs4k>`+744#+J1ce!=}${<y)UL!c4U9P)0l&Bb9I<K-g&
zUHf&j2cc8$-_}fFPouAqmXyC36-)`wL|}wH02RHHlUbzC@RlZKcfUdDOJ^%2p$c$p
zvyEwORp38cwanO+fJU6-!aHHbvlYf70zVAX!Gp#UaTPxOe2Tt&on%384PM{*vF|e1
zhae;jg9++bY_VmZ?*Q*`r2Z>RWbu)Gn9Z11Lq~d9Dz11!od{ont6f>xPz-Ti$mwP#
z_7&82L!%&+j5!2k-1sBVT^fXd7i!_&GRHq!g!hB~W3(B4Yt|`$D2pN590@KeA%i3n
z$3ubcKNkxn1_ZG=lE46VtzX~G%i;S#!zLt3LZ;EL?Z>o@QmQ|H-dq;skTR*c3K&iR
z9_tWhzy#?EVSbw=8>Y>>?SxJ^p~y9K%EdWs8$cD|k)s*%?hVCkG1T4>JA(eX<szj3
z{SSVY0M$)|$*J@yINE-@uRM`fREu~RXK-LNgERyDC;1C1^>*iqO6*|5J`yo&8Vdo>
zeFL4hh~63Iqvv95%gONzXMdig!$y;g?eN~{0T#t$Sm)+&4}4s}FkEX}Z|9?&UkSH^
z%^S0}9M$7Ug2SE{tad&!5=sq9L<jUbEAXD<TFgQo^M8`8YZpM>w~VEZ!ZRf?O=s&O
zyhyH%=n6L!rTpWQfK7W)xRvxxhEK$8!6Tpt&OzE4%XE=fOFYA!Niij)=WAj~G~vWC
zO5D%UKD6umrGTeDgt1UgO1*Vy?Me6G<r<~8+W#4!0B_d3GeLRg8!z(u!0<wEbTq5D
z7KKY(OvxksG>AIn|HnaJ4yD0>jj>exgzk=Po^E=9BE^xvWXv<-jk6*tsec{K^b%D{
zZb~8m7J*NhZCybkH>mP%6Oo*rpoGd1BKrI|gXQwNHD^8BH!`$Ag7l=zu@X_CK--@~
z!vq!IE^mVQh%v1gLqI66g{s6(@;(P^(qi*FN2%H6e9OQq!+Ukm$%ir2po!m@Sl}y=
z%wEJuwCL_I?yXzrA0eovZo1N0&3?@oOX2;8O&j;%ml2+LtmFHIm_2PiQc7^7UgaYy
zh0v6AM3a7Y=4?Gf!?gu>WMn{qRWd1s2{}=$U1ME}a9r(+nkw9WWKJUoEi}#o<lcqA
zN#*#o@<jdoqZz0MXEE<iZ$3?yDp7fYmiRl4u42JxJT5WZyMlTene0k%!uI}VPr?GS
z@?=+CU^w?G4ztp0QJ&?|qCE58=jWU}M?I4TCgn&@MqRWd;A}BP5o5d=E{k$&Xe8)#
zRY4<*2tE>Zt*z#IJ**S)ehFoIxTGUh`c17~XO$<#OmZLCw_@bp>X$0BG)9>40{!qz
zgyD|Be0h4N=rFT~GJ}s5Whrn(*?IPj)bP@mCS~afTa}?<^(OWOi5MD-+VIj;g`wq+
z&am^ZW^^}YDUjCi(yTJWa~}_i(xX&)I_msygg4~hlb(xo(hSbh@m=Xr=J7hCOzJ`-
zOWuq0UK$)Frn1<I4875%sS+d0(#v$*58rfWs-MsszaFT&3#X0cI_}rL++K3KksCiA
zFV%JL?%!vT8$Sbnj6KU!bXwAAW?(ECKJf~B_n!7_U+^>0g`bPc&<cMvGjQ7$;zQtP
zh}Azs8&Q99>;9vA(4pBJtuo2_t#jXo*yCn_3LV_5X1a{XfpV|+Ex`NAvj-<`_2pep
zF_Hh2efwD9)!p?BK`W0(nw_Z)q|GUX2%$=cCIrEIb$!aK-@VoUF;agpP{R(^;aup^
zLrZy{c9~}n+flf~OgVnz_dcsPuBqb%;h`4PjYvFHM(m3-{ks@u=q&0sCMvXMTXx{c
ziDPGD4c&h6Jbz>eG_mEs!Md_ohjBOI`(Jz_(+%2CA_*O8@ivwCK_v0hG%IvisF%vc
z0q~6kVF)O)+cP-oko&Uh@XO!QF`#X)1Ck5vwr`P5+&EtPIv1|gfnntuN*hkO6EMK=
z?YFg}tU$5D)KCQlm{pKl!b&FG^|QiWQ1@66==$W7)|GC}W4YdsyHca4UBx1r)DYB%
zN|k37vQ<p`q%EMlNe2MN0l*`NzpLnM9Qn(qKNjxU|9UMW^;{Gg{mN^XnnKgJMkVCv
zIB563Qi;}(zQ2P>^?`8hedk_t7z_MjiG(_Ty`Uas7<U&3J82?uR$(u}uJy3!wCu3I
z;FNzpxb;lt2GBEmW{+9KbQMF&{FgR7phJ6t1d;fbG`UG?D;F>5>fC3?<~CE$!}5ll
zQE&@(RaM&21>D_j{?j#db?+@Yi;Gfm=hyL8JtNz9LBz2l<y1Y%W#<1|@3kQP6r7hH
z#N#qBt{P9w-UlV4D=3S$=Vowl8))erZ{01A{>|Fl+rONakoz!%M(;F3wX3d;7Ns;7
zol?|HD;peGg3f=)F6=+z(2kT~VU1m@dRQOLi+pvMTt3B`Pnz1!3d=I4Lp_rZmbWB8
z8Y6ZD$#5V-t~%_(4XS1hf?A#PILlY#gTNv~uwNVssCL92oyL6sM<Da0Mu$jyuM$Nm
z@kxlUSZ$25sHH_>K88E2K3Ks%z%v4HP7<L92XbONfIQJa3DTfJTn-i;5!{#SeJfRc
zI$X^(5EIpA7P77KT0NR&Hyz$edoV&B3D;eNsrYE(2FUki8Y5nZRn~E8X>Xfhnv;?J
z-P1|>CQpV6feCaq`DE;$Dr_&Ew7UqZ9Vta0M7=Z|eWkd+-6g@5Jn6vFHr2$8x5#kZ
zx?b`iB1m$i5W0v0CF6{?R^BbfFTz1Kwyt5XT0!`&<o&Qyqj01w#(JZDLb^9$d0#%!
zh$4WaFB<-xB$mh0%ANbgy|BP1Lus}ra;aa9`^h_oNiNa1@19BSPGp9A<NVn6G<OMJ
zXMMiI)~j*e@#58~ar_@J5eSC*Yhg8mK<YaK`NVO_N`~b`_?}3Hb@=9%SB9l>p~k+?
zj~R$E?Jz@OnrF}M?)i}I#7CB$EF=~X$HcFDe>mm&DufO)+D(T1W`EF4_IT-zT+o8i
zI0G6|XAc~>zj`0qeVVU@6To`@R4hdL9tg9g%D&oli~}byo{o{^9hUE)9%<_U9_g4H
z@K&FE32zMIA^6f-<r8;vjNClN?<JRkz9PgT(b;^Ash?x^h&tGv;gtTYZjzVSYyJk9
zo9Pv+6(21K&sL-X*3|#$iu_c8vDpUUu@~iGzj_AhoU{#N!7uQ9Lr^Hdo!dV!4?R{P
zFdQUIuMgT&t4gDPZ&ImdsIOLqgeJ!gYH%$L@QMeJk(56L)_evY(UA@onSo}m_aU0*
zP7dAg=Q^wY%I3vw0p_@H!-hYsmd8t3T+(`vW6M=+YhXb6W;U@^x#%MhMc9f?m;!mH
z8`QaIgPX3X)ffMAT9#o7I!!JZ0NKocq1+D~yxn$$8ko@Lr*xYX>;felsEZ*+7uP!E
z0shW;AQZ-bJ3|2th`T(dW{3^C?T=vP6}=pJGh>=Af(%RYV&mr=P;Z?8&MS!Vjpd0h
zuzlGMjZ_>boPHE3y#%_qB{vk&qUZ?Wxat&PiK{b70-w%eKj}|}9MvD$X4njV&LFx4
zJ<WlENT7p_qiGpxI(I%PeR9c$4t8P{Vy$uIJ=K|d2-F>;{NMZ`yZGtm%xEGCYkx#{
ztyEI~YLvT(m2~;n{1PX{F3FIT7>_jElcS%3F6V`TXFZzKZ+@5Gr#YR6Vb2lFt6|I3
z0*_KSfT)~fhY=V<r-&K25r|-4v|Q*}EAox<2r)a_O*Ge{PED!|N|8Z?BV#NZAWMXO
zB=wUa^M!-+&Ln>*t5|~Su$GAc>V^Z*b>zzGs!4)NdesUz#5e>X(BLHY--TIrh<pXh
zzD={bwH_>Egz`D|Y?y;i?=wlFM2PpwZ8%bu#VkoMAOW#~?-_gc%v@0bkzRk!My*#M
z-Lua}xi$;$@VSCE2PlHjx*5Iwjvm_#14D5aGIm(Lu;one6zlTD%nhC<d5Jce;z^y5
zS2Lf5@}IrtR2004TP|#8T1eFQpfw|O#_al|f#Lxmo|5uinT*}zV`HKvPo+hI&W{vL
zC->>ZS1+sTtM@^lMcdOF!p*G04sz?CduPP~1MN`~1@hDfv9Wo-mA_wto2}hRe*jZi
z<Q@uQO`LS!`i-4u@A*PcN7^1uzVC{6cr9>Ptm-ap9M`yc*?qRzot1eOY{o7k7Ko3i
zLVQMK?hf$~o6?oRGNudnV#~Ipg{8ezsYfCBXI%=i<xTfnqCu!qgbHBaAnv(0tBI)o
zHB1GxUP`^^eXKu~dh59<1YDkED+g#Yz!>L}#Lh$Pj*kT%|H2p@jxR@_4uBQxNBn48
z5>Aw?Jz*xIoipc2I0c#}E#<AK_9P~uxB56Q8S%D?L*y;W&1#5&JB@jA3vL~N-@D_x
zhoi^VcGr|#eIg$mhWy|JyX4OQz<^tBG3}n2N%naZOY{`INy$aV|2XSfJc6}BX)X;#
z#kVDP(CTDB`4V}td`fEHM8fRTEwpc3{~^Gr14Uuqv{p%NXJLS&v6!TJ>m{yr7rViC
z-51cS!~ISE{~RbSr3MlIr%E@0NSA}3i~xCU?iXetycF{cmsI9y(V+Hxd24v$6Dt~*
z(Q8NV%6Fev=dVZDn_o&<iWwit!LN!g_s*|(^zSSB?=8d6uF==OAqT&>``_NlH$STd
z)C8iB-L5*6Z#hYGj-ua=3bgQ_HU~RfY1Hf>fv%iVm~Z`+ywJnmE$;xlL(;%$y-qeV
zGRR0a_dW**u|s$I?z#}!_|IM_!rk7o^59pWroRAE_J`mf38A|rLckmT0~kbSv8g<a
zsGxU*-%&2K*SYSDfAqAeIByX}USJ^6f*f~X(x*02GSsN9#Jyd;d$fm|-rA>OBGVwN
z#;2cm>6+@3dnGF8X=~z?o8XttQznI!WUvD+br{hN8K@uxOiW7U*>(N{Ns`0N`#^am
zV7SS1K9=6Ef48*-I{TfqB;!Lbi>uUsZ;B}YX6;pAkw1lai1OAu!3Bv6@t&F(_dM5w
zgB@niA@f0i6(P~E|MSgSPl-;8qZ;&hP?RKtK6u-869ceV85GY7C`$Zk{;Wk7)!lR9
zGD!|;o4Jq%<4M6|VW}<-Z3;o@b!@T$NcWW%48NG#k#cyC7j2i;NqjS}M_?&{2frSb
z=AVm$_*ZwAM$m~#T!Y}Zz@TCTf9{p*8wo}u^#jHm)<gkk23}yobPU6`v)0fb@GbQ9
zpWmqcPm-g~-T!9L_a&g%TF#R*ttm$Gmiz4(6NXP%MM7X}MWje$a$@6v>8=6D<U%=9
z8KlybRvK8-BpZaH4_{M(7(V%Q?deBhn$H)2aVM*}1M(r8I$(s2LE&oxPe|g2ZPc83
zWAE})HR?+v!6>kBPV4^!&e}yHj<Naa2fBFxBCRQa$LG2p0FXtH4BLCy^}zp|AV`yf
z!OzB%ibiG&c8yAeZD>Z3h(fjQjO-v*Qnx!fu&7&)5~prFpq6$(%R%FU5Y(t9sYE-U
z)&(G(+I$MptTEu{mT4PLwf+F<vAhW!%97n4KZu7w#cAwou+VK|mEi=VQ0?D_#3E#i
zNSL&E3^-EMjPQ&?OE_+GdgFU2mC-*g1vUn(>>|7GdCv*IJ$ZO9diz;L+Qrj0U4={%
z%lK9>^-EsI(=}BCbEH}ktAw@-%WzNL$1{2ROx#C92<>Cw_|Zz-50i}j9Wi-30c7-c
z0>J3)@Q2==O58uJjhpuY>9(gi|D8SDWjp=#V7wU2G^U&58vZTk_Q?$Yy>I?H`+Z0M
zJ-mhgE%I@6zN@W$*A0zR;NB;^5y2u4o)d9Oz7^jj1jSx$!jsMu76b~Br*z5si&Ue@
zvG&$XLgrlmbSCqb`lkl~lyMe`OmL<UFq_leTR9;3UA|-lGJ1O_iVD&2?>EWikcOsY
z=68bz<uo!C<6O?hn_O-)CBk=o(l8L(<j<Jbg6f)~Qwg03aWQ`}DSs>%tVD|^v$YN=
zSpWMI+Q-TeOIrzEB^IoMPHbSK2I;dD!@r+nBbtKwK6>^hqivah&q6{<pN%tBlI#$`
z!f<BH1!n^BT)PHn1021`!m_Vr$CMic`1qG%1B|KpBA}*g5AfN>lnyxDqYS$O@yiji
zITtps6G(JZNOc^r5|(k#5j7+-o&l$l-DvH8$CBteWpXlEgA+*L+fI=V$LgJ@WVrwA
z(?wL<ky6yKSThN>iD_hV?=T9+d$;0ojNX?<WF4myOp7pDHs_iV*$ruKasZ3M-AxKB
z4P`jH24bTSM7jiWcyzp&Bv58&^cON_(Ao%S*_W!^%PmMk!UqTQg?kirPg_1Ezd;9j
zpYqi4yEAGc@(52HCK^wOLIYy+JT4S%=;H^gn7BKSqAq1D5llx7OS-wr3e%aw^E5kg
zSCd5Q>H?Q&uemI_71TxBO$~J0=Jt}u*Cs%O&J{@Y6W-|15?im>lr_!8w^jl4M*a;K
ztrv~XdsaJ`B;Pb*UOL(+4cBJ!a)jSy75@aprw80Hs=x3gvHnaPU667eIv(NAb)t$&
zkN}oKuzbm(Mu&z9wzlYK()=)SIN>a%TFA&{2Jz*+t0{ZK;1W|gK?r=BNLJL`StRU}
z3j<&UI%FXdTsH0={yfxfnBt3w%QyNeaV~vln)mIM5PoftKD6Q>e;{QAU>6ZJM8FlV
zb|CZ)fwI56BBSJHK(jrjB90tc&)pYxd5a$Cdl%p@-|-9_rx1D?t17?1N&su&z@@%s
z(Xc;dIpdE~zh5pV`sEzhco4<j2r0Pc_k`$vhB^TxR*qHLJxak+K9^#I=`CQf6jf$A
zdFhsWZJ>9HfkQEhmu&SZD6>|9s#+y2p+m93o=xlv6!BKVNZ#edO)1~7CCVEHsd^CI
z4T}TTO|IQbYn>7HL<?h$j#_y+qCNbwb>^uaP(0KwSYUx5f2#0${NGOa!%OOJ__S~z
zc1+oT_HbX}rf4%MW&Efheae@+5v-hA_B0Of(8L4DQcmg~jf6FQAa)Z%iHb!IlyyP(
zx_MyKWp`n<e|8A*!P;6}#FzJGw3yFu7}o4qX^<924P?VcnpC~wN~;vdYFpzm*G?BD
zWhe+dCB4A6DMDaMKz;VoG<U^aMv37}{u^kb!}qEiN`B)Gx;bzZPxs!NX;MoCn4q}7
zr)B-^XUR||stsztx>Q0q4UPp#+m~3D?MW7_Hw8oOWwvOB=Xm8F@evTAbL0l_bX-H!
zGiQ#Jx(CG+*s_EB*341(Da*pWlMyz~S?UN@Dwvxo#z=aradBzc0xeoSBJk2o=2e@s
zaB1Zo1-5g`I5*-=SE#^LT}A0Q^MEtF86HpbWP%#RqJjx5<1Tv=iS_c&0E7DFkf?vm
z;^q_0Q|cN*;+cxEO2@E{QIxvhxDJ9cI^V1od!@X!IngxkQODWR>{kznaIO%N=Fu~m
zmIXl>+Q;zEq*R?%1c1M*CSbAB>il!WXHrAUXfYy=qEM*_tcFxdiXX6CNm`5nU<=)f
zhxrVuf3_(Net)(<R{OLZ-!BlY$9@?Ne$w2S#Rm1NmebiS>XmpkcHk=Z!IasM{Z>Qj
zbOuY2=)Anf6OhUt_-%M}=tTv(bELceT`JCK?eZm}O%RO^N?SDKi9H|`(>n*aBX+v(
z7%1NYurl%+A&uOoKSy0Wu03nIug6&;m^s5(EUgYOf9ULtv}i0te1?>SgHg;ZL?Ihg
z4YWg+9UUC}yJ)=pk^wMRNE3EutVIwr&JpM1@bg67g0?<BK__z0VpYJU3>wa2lUWYc
zma8DEOK{X>RxJA=`W!zWDOR|eQ%YkMyvcb<LPcNBtLrhWv>xiC!BZx;lE#wMNd}FU
z8S#LM7E;IxmM`xp5Ix_VPcgkPd7ovrF)-yUkUxK+^M0U?8z6imWMF?BY6zAhm5jeN
zk<5luWV<QyjQVh%nmvF}s|-NGziERK8J83&rWgY0{rRgcfW(VJKF=^fm+65rc(7mU
zW~Cr}*Wg0Y7AIBB91*0S1<iKaQgO+I>aHlRq`jIa-P5B?X&#{t26yM*J%p$0AqNz%
zL^+QDx5OYB5v5$C%go~W$NC{z5h+mTqBY)^?#ur^*IwAx{Usf#SS_gikAdY>pHs85
z5}Bs}^17$NIJDvDsT$qx>+)mgK5-K0s<#tBkM~`js#R(xH>}G`kar+DeS1;I^MYGb
z{?spDdzRkbg@lQC=f3_^AzH;arvb_$h{md5UiJEZCRJyCD<a{`yCxeTFH;j(_SDo+
z)JN*t2^wuF+`X2#x_CGFj)fgILgTvAfs2EJ6AL`=6rc9*O)HHwMW|AR;``rLDz11O
zKDDZ#T0Qp%Y_dbhI)OIe;pz|+GwJwZmr&||@=$LA=~f=(N;eU+KxY#uM4Z&#jC__V
z?y^*1mWcnNV}_3kAaSNl0GMDjb)r*9Gwn(Vx@Ou;%Ls_JiwaJ%7?-G7$^1m5Vx9(C
zsabT}730R!<hQTS;M$lcs8JK`a%0NRQ%j-#MI!1B#vm0%;&dW~=8ME|L2SYj`l`dA
z<PPec-x`4u`fb)3fksAiJlI)-GBzA4#_%sV8ZO5h06(CcWsXyZGP*h~>@ba++>Wai
zLmMAwdjDgOCk?tXnHXcQDJQ3v*j!98xQh8R4JX#Ni5+aowy_&aqj9R-m|Wxc$Z2pF
z*7oNe1S!?-Sj$K)gXb1XNUD&iq=vG)YpXTMYJ+X3iCI<t9{^}Tm%noZb>K^UWWMV{
z)P@Pvs@R!*I7%M#C|qU)Qn)z)LteTI<qE^P+LGy)D`rn3$4KPpziA<h!UqiU(!Vhg
zSfs*tbbc;j7CRS+d1fiVhrtCJnUpnwx&#?O5bi_6)MJ$~acr{na7wvtsftm<{ZOi6
zJc(whiXGy*s#vO)idcn~CDYPqc{Czgv*;Y_H&Y5YpwqFJ%&^gGo}#cZu@jtClzWDA
znT}GYTvZJ5WrQh7C%aXv^CdHCSusznCVI30>G>?0wkoye&YaqoJZI=lIzQtsA3s2U
zT3YPzOt)6(^t|NQ7vd*E-j=*{n>4ntd@*ttoG4Ux6J{8;t`_}vj4O*9rVQmT!mI{h
zRt59xzvAlx(o(S*Np2C_CTR=cO5#Nq(+ZC*qSyv>RoSg>((2erFk)}=wK%V#%o!ZW
zui?8_E~E=R17ey@gL$0j9(Yl1ZUjVgRq@1nPI4rl>Xd-<Q`uC~0AOo_`f)xKR!Y_g
zWky<xsU~xBK3l7FTgH`3mnl4|G5iVx=$eI7S*l{f4s)b9Z1g>f85`o$G#k>$cywE7
z7PP!J)8w2=UgeUPna9;+6!tX=&Tb{SPFSNH%MTu{fjE+eAG`fz3%i!Ku<NJAMNCm-
z)<L<<T4SqaC?(#`r_hKDJ5|SO?cAc~Sf*%?+tCzD7QPhw6q%<w__-p^=9%eW!6wYF
z)#_&M@d}6U+mkDtZYvO+vee=jUiru9)_q&4sygSH+~TW1i@;Y1e=Z$bdfKfCy_KdT
zQS&OqZPBV`o!X?1XVsd^6-(W-8dhBOPiDNhRGY`3n<F2wl&ear+Iz{CCFuaNl%Ep|
z=Ln*P0naT<b9t+CV98_s@oMoMef_-$#7JCkdc2(Z*SY_Bhxi}mr!UHz$@m{XJ>LI(
zjQ{Z%|Kl<K$7B4D$M_$Q@jt%H_#g08e#sF5<n0%a0OAUy^CEzFQzx(JXWh^@V2F5M
z^n}90JJ^Tl^1q_)Nciy&Pu{i<eDH^U%tNtrH-`%^`k~`up_+2r%dk+x?a>b(4#^M?
z*Jyq&B%PI2(~Hi{cwK@8a+m-o(4a?I-XuCv9Kg`)Qle9vbg6zN37--*dGd32S!}GR
zmn$2Dc&Mht!jPK_?o`UgbkbcdN+`O1?@Xd45sV$(%&#K4nvG>weRB9jBDg7pEQ#$#
zm`-htL+7A2O2j%sojaS~DCr%dK=teikcfD)PR5LL5ah_uKHaq)Ee8dMTr|pD(cr{c
zaCN%$xd_tPKJ_Ypfj?<ri*XD)HIy_XreD%O=B>Nd#mgIDDPh<8l3*O11s-q5;TQEA
zQa>D-h@S+%sIP6);-G(=Bn9KdQ`AMv0!2rmFzR8Ub!<VhbtJAEdR;oGZ|XhT349oD
zb3h9@j}yMsAb`2}Y6P1$)dMrPXv=HKh7^+^q=wz`A~{vl$zX%iNi^_-4Mu}@VL2kU
z;9w4R4CvrpkdQ4PK*^8b#Tj?Oi-txUn+FW7=u4KsLHmy{SrP&*@+I@2pB0zlJ2p8E
z(w42RFs>vw@6;{3`HgSci@8NU?%Chfo^9{P!yKD?)}Gyb+_N9|?8iO(``fc|<&r~2
z7d5&&JK`U3W<O~k#3DZa@W=2U+uZoHUA~!bmv83V<(v6-`DWTK|Il`>uMBW{XZsIB
zoZkWev;4ID{CN`pbMw*u^F88!{=8P%EN?!7fTl17iF!O=&8QRR*DxmIoLB*MeIaxg
zJ7?s(49DRvcC4DM3KhtcIdRDPtg75<d<dVm9L{^0t}Qh@NYkW{-k&K@oQd|E=EpUh
z+O@*L*sNAL)JYs_6R|kmvyLgibl)!y{-TaATk46-x@*H|CIWz9Pc58GTE`zc6Gk>V
z>vXT_2Q08?8ct^nK&7MfloXO|M}^~gF#5U&YN82A5|axRYhB;6&Z_odwOZv*It@NF
z>1jFL6|$|~DoI^^>h5@@aQ2wmwk{*`6TT>ijY|{HO~j32K3vCx*rG6zj26557hhaP
z!(iR|(*+SNDUOGNsI-%J`a2oETM6vi^jN_AAHpa&MI-*vc*j#?L~U*BGe~<hHbb#n
z;=b?BuZI&Z@1`c)+QQmJ7sHX@>=e}nI3h>e&)#vEmYd1xVcp(Qj%$bYgZ8hDm#sJ3
zv5QONFeggeI8-QY<Is=NHV#*-jx+N(T&u?2zdtR_arsV#Z4+xyAeCAYj?=e$(a1S+
zQwE(m9m9vb^<V*QCa*L=7KYE}n~MD#KB5sa@aThtLW>(W#aYnp%%Wh+E~X5PKr}6o
zE;`W^oh(M=APJ&|ovfmhMJSo|r#C7+$R5Cgrr{v)!6(29O!gh7ARUs#$_&dcahnSU
z%=j}(m1ky5E5j4ag*%P^Y!+eD-dSZbhNQGETc<f|PVBX1JVen0c)dFc&d-~0&{gA6
zQhh60yQ1R}sat?<v>3%iW+(~QbJBW?<%)8w5aVxY`BEls(d2RXjzeRFu}LKyE0|UX
z#7d!0#pYiyywtYim3r+ur9#i7H*<lQ%{uJ=g|N6D1sACHE<~m4;<|>8FBI3i$Rdw<
z;tPqbtC`rk%CIxx=&oW(MJRYSDA-qj!t=rp3GZl63pc8g8-^CbknOOVU&QH-`m@3C
zJCbr-2#LSEl%u=zkW$Vqgq+&~9OM#ma>zKj1eq@`(fCE192s^WPRRNGq?{BmvPE`$
z4WQ)Bx+?q^+`il{w&+g48@}8RViitqiRI|qNj1EpyOB=ZjTHG?o=tF4#LZTAWkvU-
z?{2!2*cRuPZzaX(Dc!P2<2#ggZ!3gqOov+up+I-@t}CVqqH1DX2sytf8WToWEg(J-
zzs7^_b=er!H%$a}rK1P4bZA}W<8_<A?EN2*jr$vDz&wR7nbsb^Y2>Hp&(q?+y?Fk-
z60`qozSw;BX#e>x<G(%He~kSHZNI0j#zDf~W4%~%W(cp*KxKG^W_aVw&1KL>vqBe^
zXcR>1VvXaZMu%Ryhy&+N+(+e(U7;8J{V5s#*ZSc#g2%$D9|qC-Vs7l}h|OSFWc82k
zer(piU&7Hz;Wgn|dLTa$7s@?RLk(34)8xKrsu%jq?EqbQ8oO|-hLfaKEZz(d*6iky
z_<#a#qHPvH*8Q~Z7)mRfua(=kRXz8#w(qufNyOV{Ke4C*n{p5X_)5;Kle_8WZf56h
za&Af)L|(gPOqP@%QkL}2f$oz0%cOG)ax8yu;{`YU3rH25>@(H%E|}nLxax^=%L#gE
zy>=yN=(Xu~Om!t>0TQIn9-#QU9Y##ULq&2BTe~zbxR;2Q7(qWe(1Joy`XsfM#mAyN
z`G3L-oU_SLC`TrOJHVjkpK}yFrqmgw8WqUcD)@6|&t&AJ`CZfX=Ii0dJblvn=XUMm
zwo!+^#u<lwQST9ag%daQE=WeBfskHNL}IWLEX>C>r63iXUywqR9gQFFU!<W-V3BfJ
zdm-zoBrK`N1Wy>H*vA$sjx3Z?-c?Up&7;HD3K&+RXN(7(Uat^SW5Td0)n?N?yD2#!
zuFIk;$Cf;5<>-#7xDu2*>lZ$jZi+-W^V$Gu^EJU!qm2v+*>{1MK4Me>=S7oW+$703
zHp(tl^Q^d-rxI;Zvi(oGYcK0~AG7`H^l(4#fG4^-cc{H9luCrLE_LPdU$8-X^wZmu
zS-qU;^r1DtEZ*Ujev`D&cC54|k^aVlymS^#S`NJ&#KTRFLzQAxI3tsL3Cx`YyBew2
zu)SG<`d6IakZ)CQ6XBq9VW*(qH|W`(l0X%)CekXAMSR)$$C<a+;N$$?)BLB*t~LKD
z6!I13|8e=}oBp-=)4%qG2%w}zpan_6DuS|q;l*!CyOSVvYbtQ+pCbja2()r@5A$J<
z`n=7U3VERUu)BY|6?wP$$gba0-N%*qjARK&uO_2G$((COdM+-4Y5Tl`=TPC7G@XbT
zZZBFRa#!HCVhwZ}(k*O_e5blJao99ONyGC(CG7fJ9FOARws~&+1=Axv9mV8fSn@E{
zyWBdX-6c{W(#=4=*&%}@U{}VZL*2@duWJXt)_y-xeTf$$G4I6+dOzU(xB>Y=6!_;S
zYmb;$vMk6tK1)Ng7Sl;zA_s-5`Dgeqc6K%=Y3nL2s=*N66ICQNMDNGLFdid!b2o~M
z5dBf7m)V>#rkL)i8r|9o+*sH>L+P8H)n0$&h@;661eq-<oHmBj<LN|Jx*8V$OU4T3
z;){qwsar9gjHVR%7AZhE^Kj1X%r)fqfr^c?p}PaJBzWmAp&6h^x>GkahjJX@K5Q?L
z?XuH97jNJnm31Lfx7&Sxq)uK}<4JHHOuLr_9JttAS_e^;YS~&CGDzdXi);CotLrw)
zg}>hOqRH0^GuzY>^D|FtOzL}V5lwgm;~uyTQJb!8?ef@d*_g6Sb#RNh?E*=ln6<XW
zioLcsHpD@SwFymc9Q9nGIgy7$*bNaFVSPci;3~osj6zh!=}_`E#Wdm`bzh%psJwBu
z5Dz`#PL8mXZpVtTH&VBlU17glfN90Yt)Q4XJ}YT&GV(=<v9J>S;x`4AAdRWzsyaR}
zE)^s;WCt5puMdfP?Om~VsnF%8C8)u4KWlY+d+zLR&<|(V$ki1mN1|y9H^>q5ph1qr
zma9Qdj?@OF$ij>5K-mqETTyC!<tyZSD{AV{YY&nB^@Drc!LnPKy_GFAp|{%6a<wjF
zYs+Y%ht<!{vP9MN4Y1ppf?Si*a(w_*rF*tRF2IDt(hcz~=do`^-%D>y6E8Z5nXu4Y
zaF(2X$itFsuW8A@6P68CPMRsj4P?lW8p$$9bVzFkH{d*+MAMpcP9X#tSG+xsA5-)p
zg_}?;Y7bRex$+wRY>%Z8ld0M^?5mwwf2u;Y$+9^PTQU30!~JR2CGe*bliUC|dK8uT
z)W4PZq~JK-v)R6Q2Wd))Oa5yJ=yA<HQi-k7o^bDmx$9LAxa+`QtE0-`(^69KxJ82s
zaxy)#NuFlyWOjCO>Ejl)6qXC<YqqwV;I@oQy|x=$@1*D1SP^gHv;{<e^B;zoW(5}c
z$906fr6q!oV+?MmK%N^zasmvy1k@TL6jL8VhC)%}(O4k5i)zwZN$>>`9hny$RaTUr
zne-g8g6<lk{cSv9nOCaUXlz)t_WJ-xv0LTa+LAP5G^x%t8&$!wsDFg4UD~_sRPw!O
z$KE^FqxPlqL)E8_)2~Z1bxYlY>uGN(dUF^Az1Aq{N<}Z<bfVJ;BSO9;pZXeef~qJG
zasaI1f^6y5)~kN!A}TK13HQS3ycki-%TyLBg-lTmd7h|E_u1>d6%K+t(Wd_V6Zs0X
zYZy!<{k1#Ow)EX^qeH$mM?n4r@K&BvG(uM~(we$+mY9G0rW5tWJ8;qy-kQ^rdv(g5
z?yarkekU9rkpt<HedmaaWI>w?R^%a7x6<jxDh!qpeJ{kw&(_XLM;^BfhXcw&_}431
zt+4`&3FB9}XgT!Y64gsD&t`ELM6kU<>DnE~T8cZ){V?BUFl_=c<p>atwwzL|I_$OW
zPa^g@k+V{7)do3SUQ)sewzllKb9lgAm_Tx=b7k@8e9ug0j@`7p9d&U{D&fcVA&o{|
zbN7O>__(#?raY%Iax!yjbaHxYkJD*v?^uXht!K1|)2UavjVaQR?u-uMSeGFWXZ?Ph
z)L_7dP6n>>!HND0HT@22<(^3vqgCCVN*y!gI^O#&uG4Ka6r$D#j9mv}N*4K=mRRxX
zzdT`D#olhB#n+?E`)K>OLz?}2C(SJK<Jy2mB_$^5H$Q38V3y+#Dqn$Pmh;^d&vR!z
zf29@g8d$jHP{b!|?;4Fk&<`I<#8RbyMG0$O6TY35`2cc`y-e4{f?|#<<!p>+(WTH3
zDIR*sCJ%k6+_}a4C+o;KH|Hc4Z-YG4VAApM#;C(Si0wKjHoBTF^Ndj0BH)zCDhqkc
z6v1RN0uI<2-hAlXP{XBufWm-LwDp+`9fN9*=9`n-HmseZK1e^8aw)evo!;?q>Kt1f
zZ3H*UdB{hQ^1U2AtmK6p%kv)3A-7zc-n%rO)hDmfNF+9Q&9&r?;3-R9bny~F)h#21
zRCH@(*H~{D1^qz#!Z|=Frf#*uS1ebt*0LBF%pKb#YPCa6*`TO!vbA0@Yq-1(+-a4p
z+fcHl^V(=l>t@zOWpvqYjMH~)hAca!!4leG0NuX7@TS&9-tXZ4|GpHDqQw9Jvhkmv
zSIXt5vHSn!&C2t~`~Tl9{`2$2?*AuC`DqQI1jJr29EDTy|8=^fGqH1pAM0oIY`y${
zE(V>jzYY?^cMTzc@fBOO!&Wjho&Cmle}7|Tg@@#Qr$58ksTll%Vyt!qiT>gHK>Y6v
zmOF;xD+KEo>qrl5p$)lv*Td0=AvReGW^1VbFUhc^@Hhm15p+04DOW=QRae-*t;a!P
zfrCpZA4mhU4m;sGCZdG<m!07SA{3b-V3;Lvk~o_Lo$H9+)%F@taim{`IE5Gilmx*J
zBKT?21tN#>Ko}vUT!v+$DmRvR&p^|uDyT~K<jmoU$b{@2EP6rV9~K@o6lbY28i>md
znc&U>73mavr9qI=TF*wws@VK^PJd|~S#1n~bfE3q5^t$Hn@ob?)Ckh>t-~Wdw#E-y
z#KXsOSbuy7Yib7?uv;ino1iK+Z>kC`WMW1z5C?0adNqXXiN5HJbEg7J>yG%~5a3&!
z?at6LaoWap?5B)q`#zZ5U@BPINcIGvjo~(iIAJj6@i`NE&}AoeYs@_9MFc+!<suLR
zkY9Ly!!5*uW_;b6=e|%WvZ)o*Bi{h3pM`LCJPUyT0wH(et;AgPDE8d+HsU#G;K<A(
z4mu2C88r{&r3*+q9L+8+vC2W`8fa?<Gcy}bM*Ti6x(}n_6W~W^tsWWz1%7GQ0bH%$
z#}d_u_dxueQT$>kcO)x9OEh`*jG(pW(9Og&znNVG`#3aJQN}G)&h#I^)h;HZ*;t&T
z{Zo(?l)Ci?G?8AQgwlRH>dgo}Oif7uOd5}aM6)AkD=-#uhO6qsW!SwW#tW@JYCho@
z#MBZ*0hP9Ls*R4F(l0|&g<~(5wKa&CV1h>z6p*mE;fM-is*F^5*dXAqFwBUaW-$N?
z_l#k-q2V_&ne%=;L9_c5w;8I^C2JIhOXeCsvphTxK8dU%aY_v99R!0B5RTy@iU~pL
z9(4dx0VBY3HdU$EG*00quqf02KcbGq2=pXv$VpUFb*^4eHe|nQc-gHXzM3QUiGZ>1
zVMy!90MJX)KId#_%(ke5=(Fa{Ya_dI_E*>ddmv>iddS@d2!7)!NlpO1P#!z558;@f
z-Ka%?b3*b!XBsCM8mvFy^fkN|;*SYxBu3+~YkPl0W@XHVdJ~yyy!yRxN=#jG!{qQ<
z?A4-@=yEjc_nftgZ6>za;U#aS_P3!9IrjpXxd8P)kVQ47lHM3t!%%7$>szpKR1p#&
z&~qf+5V2v(r|h^{-><dmFSA=(qr6sIpOKR3RpYm}bz!wFi&jrAVQnivtSx^R1|+0H
z4;VTH!BuqrgAt5wBD>zSESE3Im_YLB^?8EW^*oa6zQ;I@f@BZ5i2I{16Wxc)5%N9d
zvI*}YBh@hp{&yBmI9wr>B;{3XJ*M10J_RPxy$sRg4fg?Doue_Vc<hI4gLG9}M>iqj
zlHtdow<h5uamPf6Al~3ZuC~)t`Z1#`lY-7vZj;QKjUSu@mE;obkugId$UBM{UKk|$
zXYmijz&t}{s?5yT?eYsKYD(Tpazt5@8=?e)f$HCf&|G$J0cJqEAjz8CBQ>~1Jq$e-
zw`d@E5EP5xff$Zp*Tg+XkAPH7PDfL$3A6FUQk5kTi*b0Eo}>(t94Kymzt4kY50Ax}
zX&-7S9w})nLal0W(U|~+MYPL(xD2Eu#FWd_gvi%~Z3(+oFhtRunZ7j0v|!h~47%6M
z$SI*I;4H)--q4jG5Px(;O=4tX`?T|tBLp~$H<plA?a$kE{4u8=MwV>+Pd#hl!WNO}
zl<bQr#L5xG`|%XjEAmdDOhQx?9u@<AP-KA*y5{RtYSlQKS=H0Aaj_<BrVpcc)L}4V
zPqvqS#DI2-l(6}6b6e`W?#wqeRic;~iSkQ1Z0qy6C!U!xyJQ`4W@;E6&bq1b>_$j@
zU|7Z_>9HtrS@99dN4zaY0SkeB9&2keXBm@c1<zS)9IswE^TK)>^;qJ&n8Aq&q%yhv
zv2+Lwo(~7T5pf<RU0EjsuDLM5paLMg6xYU`5Du-A(SRBkvfpSlQv6g=h^dd?z~UYT
z?}Ldv+;!e}!hVM$=6M>92@o0(0RlS=&<mebG_`~pV&_;Fb?Y>Mtbhe<Zd8QKQ}eE#
zJW;%7A=|cM#8=MQXnL7o12|E!Of<qu=o{*dLv0kE$(Q8Dt+#hriyA&mU>`g-PZI<|
zl_ZJ696)YBs6)nCA{NaA?VOv^MuCq{dhb9A4?cE-F;c`9m7R3mjqH)7A{ccUXZ(Yb
ztELl*lSnEToF<1il|bwYhD0iQ3{^c=0EE~An898pU-%lu2U?rR?DMxIIOr|a#s?0Q
z)R8zOnbXl^E(v*AVVrD8fZp8BWL|H5-0G6FNt#_0<x+>mv>vSESRw7;^1|hV=S}Do
zNEE$Ur|*h0NVpnG8{{OcK&!ua&z``UHX^5M5I=frq=$*0?<w`6K5oYRgL%miovOt$
zn8__2IP>M`0%fz3)b339%p82|;8BuRkJIz_ct${qlqdW^ABD&(WlIVNfAm8$3N}Gd
z3#}QHN)6JD1X`Hw++dGsm{}qMRx_D&U@1{1(;KB{i1pcO#oNw!Z8}<Gdaznjq7@+f
znFv3zD{G36rK(T2K#zajUD!BG)}W)#MNuqpaNc?p;Yh<2X6Z=V7T)BZ1Vgequ{IDU
z!cH~}KJezBLGQYHI-TXDbSXE_U>#2SH&hvpJl5|!q!{v5%#>zrjJ9ic?r6JoXV^Z4
z_-i4J)fv$v)q*L99T)~!N=Sv#?Bar~yvW00bBYQ@+9~mqOShx<;&wN*e@b<SeXd@Q
zuREK1Nrp!ybJ&rqhL2lzo0a5GdNnnVyxI1weV>a$A)M}6IOAeg^+^y&k!C>W8d!s7
zTF<reqTd;6dEpWmu0?s!!vsU{vw(C;iWfj9XiGn*&d+JIWn@0Od*$o_lF7$}7Sd44
zt69tqYs1(M&Lb(6|C|!rh9;H~Z=VU_0b3>2MnmRTus2S6k)wM0z#LGkV#mSc91TS%
zV{$d_#-v1X?ph$Bmd)YHCUUC>*RUxtxBR0!>IHGlB3H2^JGNWcBx^h*27z0pPijGg
zPkuLJLuV+QH%~HD=Iq_TerC4DIUM6WMR%5^!Bc8=TGOn0*9tv>prZ*5U5hmo-4K`A
z<tPM}sPxS}c-mt#h^2L74dsLc8L2@SoZ)GTt#3T-2^D!lCugyQ(t=vL!_hIV`$^EV
zlN(NaYgX*zz?$(+FDJxv#v}4n(1aO{I};eZ!faT?qs4?%t7$>n?o1WaB7waWr`7Rf
z^d8j`F1-rh4VD$&mGHKvya~6ROiEU%6=uftJy&vK<@u9S>4&4?{{)j!B7?h(Jaf3O
zM=H+tHtC)n-sg<SQqFpP)*qq3J_Py-<&GpTnffYt1N%Y`1f<E$9*T%no8a=?!}983
z6DAY107RmWuuuw#eXix?U1t)GW^6rBx!4K_Tl#;I{dgV3q@gl+OEZhIvnH*q(YntP
zKuu%O3EkEtvOQ9{J}eR`@re>C@{caXHp9gYUr)yeBg+ygmGZU`A`1HFo!%8Lhj(G{
z0Vn`>Lpy~DwZ;ipBCzraiogav!1j}S9ac+jap+Nm9m1T-4nVR`pTh5$qUB2}LU7dI
zDzMj*e_~Tg9f}#<(oHLtf(S#gmF!~wi+fKS=LoGEQVrRY0?oGEdew9@@06NlWS>8B
za3Pq2AE`=Jy$r|%EqGS)RrViPh4)q)E@@fY&!q*YLGA*czFS@4Nm8BqlYW*Ko|;@$
z@x<w>=;!VF<Q-yJ6N<bjKy$~`_}#5zdFtcd>H3`Boyu9?gEc291y|n*tiGCAwMZg(
zentV)?1{1+qzvt!ugTS_DmNwILfdpIoT4UvjCNWP-GESy4K1P8f6ZF<B@KH{6~P9e
zidI>0sco{GcLq~K&Lta9=5(oJ@4z|o{G4r)N~VOAAki8=<DwpYB5ILotDAa}L*+XZ
z-9hd8wAj$vz)m7vW2}%pkmBIa)t?QgNS0zZ3_TBiW_yV^8-X+edzM>R`9-z-qLjNZ
zWqnC0?P;a@w35HHr<KgoUOcP5c$UAk7tgYLTKTyO|CjRiw30n=Z#&U-wetLV$vm<~
zQr2J}uug7(Qg4{B^x#vOKZeRE^0xL{yLQlcec0any;VP{VuS4}(I3ck2NnP6SzkO$
z$q6qqGd``PWP}%)8Otxy+WI0pV_F~Q^l&qtO)oQP68VuX8-P>^(n#2o78e|UZ-L4p
zlExV%W0*;`^GkFkWLcy;nt;GsAVo6?SruYJz&Y5ECI%q;&{S*7b?2OXut+~3GuMtA
za7+Sr9mvy|@<bfMxf%sDpdOI<N>ZbYZBPfq(MR%<JbHPw1&3MS#CVMUKpdBB6w-OI
z1DvGR>x`$)$$RvXs{VzU*FH6eITxOrI4~`IX`(^1B=$zr>1e=D`8&N3^Z02VA#p6#
z6?AZki-%3Lf}n}|7$Gh?{btaceMHqAZ9R;MfR~|;#)cE?;$(m1Zi22Ox;Qq%;@Oq8
zx)XlU+ZH6t^I>^zofJq=kdqn}C7H#$0O2@L+mu!{YQFBHUMxpe`UVXHINT}Kl68jb
zCEHm>-;w-?;Bz%b)(>QHsYgB%_JWeprqYOHs&>B{(uLtEc+#OJ@wpc5`+?)xW#+oY
zDLZ{f#5Z-m<O7{55eEl0+n60?IP#g*s8=$-uBMcz;V@AgS#gr(O{GSeOaiD2B`@sJ
zXl$#DD}pb<&<oiHwu59eq5i+K5LQ*derz3#z1a<7hHR|N_<m?HNvXasMx&k)JtQ6A
zOw$X>LTzc_A^n|pmdR+;JEwYJi<dAktP97Fe!zVn_u=F!fd#nFk}=3O%oKN5J2H}j
zH7vQ>qN3)!PiqrShj9UtEn{8R1}_!nrf7+i4RhK|bBo?TQUR3K<`bNmTuqkYp4fbe
zT21mM1j>>1ghRF2zz^9L3i*SADeKI7!8xu*R6{`_k(@u;6|;y*yWs+#QfrUw_T50q
zLjBtG2!85PZnxN#bPbQ#gH;YDTQs?Bnw<A?)6C=qr^*R7jT5jg->yk=uEUYen;Z^)
z#fz3r_&`dO-Ex{mYZ@xM==r*}Qsx&-Y}pj73><cG<T9yk%aVC;3nVQn+p)aUp_v`#
zKt?Vt?XvH5D6?A1m?qOeH}>w+DOtBvr-{PZEbLF$!eKEUxJn8EkqU0sVV_zxsaDW4
zJ+0O=9MwH6x-ne3A5o-zH0J%wbN9f}Sd~PL3}856=0I$<Tq=@faf%*K{4TD773u|Y
zz4Gg>i+Z*=`Y~!ZOKGJ7-&&wZjkrl>gc3z^X2%o0MgWZjb`$o<zA2CNA^DvI|NH;5
z_wIddBgw+>{>`UQGVdlfF~&9o2)9{dOyV=fu(rwNnaQJ{um$MEwmgy@V6xfI{??_t
z)w)^+GJDQG%Wozisk^$my1Kf$y1EL_$YI2FN7uDAabQe7)53XBU`py|$RaSqZ9GN|
z<Q4;Ow2|(tC?wB(<}w7W^T(nsawgOqDdnh!8!DupU6v3D;TK3JZFr(IVE6bj6!&N1
z7z3N)MLKlsRRkOZC)G8e0;bc%nZbUIGd}M?r2<${b>ps<%PJvenQoQ9wSGFKBSAKM
zO54r|^mKelSFkjegb@r0$2!%ftL}WNp!qo>48a~}lm3m3x<5Mlmy~HTt9H<`sLtDk
z1JG}zX2@r7E}q(9r$qi!q$m0z%*e59cLhz{=oujHu6A6yUu2sz1F!PZr{Jp8w-w>&
z$UD7@&PA_ztk;o$ikjaAc!4z1Uft;d{;^>@X$-Nvhvs#@<zbC4ooiYjaaqFHtEeZ7
z)ahhhLt3w+;JmiQl3Yfbt%8Pri<-9B=Z!$)BcRXaB`T4C71ah@SjhiN`4tSG!_JVL
z38;}4z#oz1c6})Jv8m$t-P-cl8t=eJjas{v4_zIjM7}mQUsv$b4_f+(K{QHXJO%lP
zyF6wfWc&&429W;RqbecPWdZ;tr1qU_J(1?cI#8HZB+a;{-qC>qGH}bMy|^;H2#L`0
zdvtu3HK{IAPd>1H0^2~rfct!L1Tj4ldYCe85+wGUV+9}MGzKp5;s|c2^+A+sW8U6i
zMd&IwA+b;R>R9Y1eu?+p1ZPP}4TaC3tc2yOy&is9Cm%Q+P6Ue7cs^jcI1JvtX^u<b
z1M^Unc(Faw91T(+g^^kWDva4e$;bwKxnu*Ii|boo#gQ1yD4HTp(0jqB+WY7lt?%u`
zM!0}y<h-P`kK6s9<;r>Pe052~)_%y-wyH?61hkGDheyzA3YwfGbcNRGy+2MixBIXz
zdur*sAlH79t?a8R*pgL0cs!YpMi-$LH%rSk2qJBgySY9T4mY<`dQAV`=jq>jy}`&C
z!*cz5pQnGJ=4!F#v?C9)9rozhl7!7$PU?&LZAz&?=@$CGaI6+v6bcR?R9m`F7ebZV
zkh>NxN`h6+!)8z$C5`IC=AY3AwHpfcLF|ciWD`0T9)qS{nEhztdobBySX$7s59Ol0
z|3?zMC?Rt7Hq29?U+`KXtqG*RkromF;~v9G0?NVI`0eWdGwNvY{%r(G$j~y5j5~ZO
zPblup(a~M_5Lvhd{R{Cu(h&}!dj$9<pj-P_qI*<NL-(kjjP7P7jjaG{KvA%j7@U8|
zZvR67%v#{i(|4(V@1DbADX%6W0g`sBflLb)zG%&0)f4uSU^h>$MPiN((d6K;PaMif
zvcS+8Y)BB}ahm^y4aaIti#DkZF97*_H@rwIf553+3N76e5w=}ZW$=c@^u`^*PWAy!
z$c^{HW?;V!tMVfoyMhI1aZ|WVWw%BAB5qXm=6t72oOcPoJ78y4p76B%(j6HuMY)&S
zRjHCRf!xd6q#vtQ18)eUvWVB^KbCKj?+PVsO`%cOo5`3D7&~b02jRcO&6t1%8^O!g
z<jPeHsJ*8{FrV=T(#f;@xs0_PoTrPr+<DfxDZ>G5)p(Wj0iZAT;6NTS6JVG;W&A^*
zDctb4e1C594p#te&B&YTD}ja`*qpro87zzm2%c+Oad|_$KobdUb4wax&IJ193uiMj
zzm}xk%Nkv~pz+>}$qnEVuTwc=W$HyU1iRBBy+7R0p4i93C088Xh<j-z_D&{7z{qOO
z^))TTqPVh1QsXT|2XNe2s9noFxqdoh>A~hBn}77~P?yz}g;u$1_GRgk@ea1d9(qBO
zPnURID2C2;_$9}0R!q{5wP@kLE;@P0v-*~J)YiaKR(0!&qOe3dBYHw^JcVOt{Hj{~
zzcuAgLtf+~ylI7|Hh?3KOn{5V%98PG#VD7{M#ZRpmdhTur7w6-fLjsPDuH9z%Z!`D
znS}?2tPbERM6WX%mdhk^K8D(%ZB>5y$lXy;S*`9%*p?;x4Q^bdwhXHgu^<oaDat82
z#F4J|Q?j1xccTfTz*1D#K2fMKW;$scGt`L@?M^u8U$55;m<dloW@wTK#?Ua{_de3&
zhKMcRJnOKMP$F*+A$+4(VNC!`<@*?S14jv8un$C9m3q71f~Rmbfzg7hfMmEDzR0eE
z9SPT=H|%Z^dN0T(mktV5_<gDIPBqKO+iJKQn97@Ld{*V^{+^;-3t-@Qt|%*%_<+t>
zZR?MF@krT{EdRu_ZS_VOe}>`}8lAXS%BM4AI8~pHe$>-jd;C>@7|Wh4ITIXm-3Rg#
z?~jN&us)0;6GAsQ(FFPS7OzzB(R5G#`CWzOvB^fMCL3D$XDIAp2_^fQ@H;24!V1fV
zw{Ll1NI4>W+7p1Wxo$~lto9c;JqnuMlK@j8c{Gv>K2UcoRgeUT=fUTv1S?~FvXN>j
z$YMDP#{dyv`9$jD)XN2GA`V3oLD5lH0BM~r_sF@&cfLd@GhD0v8*2(3JW}rTB_jxA
zfEx*MxkefPcSDTT!|I`p);AfDlG1bIB{h)tL;G@m-2wSf(85qY4<J0>Grr>#3v)vh
z_7b2q2oc!t+_8(S33|KUz!I>d5F!o8>uVA8m!XReAp#wbFXKdC2D*X}f!tS)$RYSk
zK#o9&4r*Wycp%}U5F!oeGyY)>L?Of|6q@_8jio3QgAk<>N!UlRegHwjUWvz9#UO=r
zb_^H|-LVQ!FyrJ?u)ncHjAt_cuj2g<Y3VOZWa%;xEwD=11d>Jp2w(<5Xu@X-3Z5vb
zgVfhtJT_BdQfWI&el;HKDJF&p;_t77Hn02<iA2icU_3ROf0yS^_|)#xK2kn4<%STS
zE;?u3W88ho7agj*DIecr-Ph^~>u#aW{6z6>^&pdf`OnY)zMrMy{~kLOo}f3IPo3ch
zaeTZu7BB|$+kX8zCH`+^Yy0(PB>wLP{QM{W@1OA#jwbt|-tDwHZ+qr(t5-j2H;*GR
zW#iwa_0T-+HDHU^Wr@96tyGLPbP!Yi27J+Uq;0|q?tr3<4h;;KN;V$$d@+TmjO*?0
z4s8&;?tMYtu$UH$s)=f}39L0zL1bmQ+BTHa!u0DpX*jwP72iRy=!b+cRy}fwaag+z
zf-;cO3%KHDLDZ{NvEGD%4J~hoamFzqrz=9x?m~UJBgRj#5TY%N)=UXwa}!{^gx&8P
z_g^oe4jTpu3Wfc7iA`5uVpaOH_OcZCpN(9O<%JeY337V_K>mHEt~elROXThexcM{m
zY&veY(bew@Y!gJSjT$1tOCFjj(SAC$hGY$$bIc-pFhV<CfyS($M&)${+e4GJM;BHo
zK>Ns^T+eRk$^l+NDDm4{%Gv-+q6iB})vGnAu7&!OsV0FC=m{Z0q4CB|t5(BmFDd6G
zQccBrOMv7D0dz&}ZNaP<WL9_rMp(&sd3XQEOBzR6r-~Gkju5bM^1iUO!8C!<2e!D8
z%pq;?&!8f*<4aldCGiE!1CBd85jP1Y!1N>XD8@h~vK6}c&aRm<SQPNj;ERf>D%#kg
z4iSGKd_)M9F}}KjrbJ9m@DgYbpMkV5DOCSUqaZ>ZSjghSI|#=$EF;dF-P3xf+3B}G
zNiksd`WNS|V<jHQi1X7F_MKx(0xB4k2!-HDYfh}lkQW%hnIs6!ahkw1EoqL?Bq6pY
zE+vK|wjm~<v82HvLL()LD-m;)X|_83W*c;u)ALqaimY1o%@z>Nb77^MkZ&v<9zW;N
zdro$u=Mz^x7PP;GkycCr@zZKF48TfqsxNQjZ~zw$wIq(F@G7<7moN4`Mx-dQ(=uU9
z4%d{-RYnwh$%S8vctbFchqMXiQkU-VK7@%%S%OD!=y^!t5W~W(EQqlE#$aJNlOce!
zgk`R-y7Jhdpt36kFimA{HQ_;8>P%WBe7>)J1?vA7az$VtQ47S$HB8hon)Glgpf|d}
zzq+-v$@qtard&Kw-h$*I4^X;=Z(pPNhom{IFf-wOMnQbVr7LlqE5^1~d;Nx`{F;Or
zZR<HHPP@Cb@i_Q4oq53PSLW>C+sK_<fB*iReDt91a~g{gnxZK<DeN`Siw6S~Kh>P0
zDVH(f)O;i6$H6B8VlU&92Dl4G$z(#TeUO8>Hgd1=ylai*D~#8gguY9o1AHk;U?y5|
zRLWHiQDx8Y;r)Q3P(%0e8j$gV98T>qNVWu~lf2;B>{$z95LHsuFN(WGPd@JtwK&Rp
zJRfKB)g94=6=@-T9NmdVbQgN!`nVTP8-Q3rp-dRh(<%ErJ|=HF7Y)-y!pgr754Khw
z-Ho>}p>uR1QcUZD_;bygiR=gd^A&ae`%|b3o%fA~<SMbhAZy-p)QmWm4&FKdHP0H{
z;8&P87$%K$6^RLAVdAxJk9Y1v+jM+WtCRqLC=&(9RbL!eP4vbH*5Ve!FicqW&8()y
zu`M648o+s%wx=2q5gTptv1NxuJi3>`FG<0hd>sr@k*U^dwS-1xfM*Mcegh*0e4`W#
zl3dyc6Xfz&XJG4PL}*fqi-hjO0$txI+}nOhiyb!(LW7Khfa1PwkNNbCe1NHAwv1ly
zt8m%2oUC3kwS%?Swe75rZ_+<*y{=|`+{pO2wUhNRtIh37*2fHlwl`j9ecTSFlrqcU
z5T)(a9=-oRaXpZ!!(;L*=fE&|G67}h|Jm4hUD=B6|0~u1?En8Q^Z#Jxo@!-@%st5|
zgeq^AOCj`coB!wJtP2zU__*0<*Sqz8>#P&c1e9DRAtz9>4L&gin=prS`Z%_$(*gkN
zmXCL#aAQH*I|hl9mjok8^^Ai9Bgmx(5XdEozD_8DGNmiIy0TS6bn+KuSP6=vuSPuX
zltJ<E@k&Ks-q+&&hF_*23PAwSl0+m31lq^EvCj_l5BQ3%X|4pDAF;>MdnSu&{gbb9
z5Ho0gH=vG@l4<bR_METwP=Ns2m>iPR6d~f3HwqoU7zai{0F_lmvJTL=BVitk#?M9-
zkf@N?MXu)6`HMNiTPb?uG0Yx>ik3eZ0n%NNJn&RN%p^!`7dc2k@n@qDYv$*~t0e$3
za(ShgC)M$8VgnJ-vS{qKPEmNo>}5|<b>L;H>8gqwS>^W@<U!18v@;jJcDWdbBoJqs
z%_z(U4iz&cM_Db0D%Z$BM&k$ypNkv}Tp_!=yBpzdjm?;Y4p%e;$+O27jmFzFWtM1c
z%IvzgzWB4@j{FEF>JssRc4PG)TErjP`qle|jnXD_9WFJl_r>BHh!}3_n(XT`to!v~
zU~H5(%UiOvx-mY+1WZ5g8q{Y{&XxTY1A98`b~7v+ANX1&g%|fpMHt;nSZMFCMe;CK
z|FN$11-1&RUCJqS&sW}^*-O~qU61a|5(4^_@ZN|+m(x`gAKgXgQ(i-5;M0@(MZ0hI
z&&=-G2lKT4UuRv-Em?vqkTES><+R@Kwm!lx_^exh+ceL*$IY&Jc8-f9LXh{5vnFxD
zDA2$i98_rsjwAQ%po$Vk*nlLIl`H}9tUxs-6oe?C<VQKP0fCIV=v^#nu694PYpFep
zpp=b%?s21z2^)nzeNL@AY#XKzPqg@FL=!*b?-eOK*x@N#eCZ`~iX?J=N|g&i7;#Du
z9SP+-q|=jdN+`0CDd@Q1UE9OG9H5oLq>4x&!e@-`sLFR8mmC`x16CC%pzwQ?jQJ~_
z;Ms{dtKwxYf8E}#HT~dWt~xJ~v?6~r%h{umJNQB@E;1!*uZd~ZYdS1p7ffqHD}u;u
z%&k(5#P*mgf+l;;AGQQ5Yx5;!0V4~9te_Hj=PTZDC;vUahC)fjJ9F4xc=s6pDG`MH
z5ung6tEwcyyO;=QMs9ebN)j{-{>Z?VgNF5z;2k*%J@u}Q$7|}Td*dI5;K?dU;QG8k
zhKN`9uB+k4TKI7z3}3-W78}~NpD>m)odAOQ(VK8Do$C)5X`&3KMTZ&5qA5l!W@}bX
zU|NLzqLl{5k9owXbSg{}q^2yuvqtB0`9;nD1sWI5#rmwlrp7*{KE&6s6vVz9#NI?3
zl?z3-2nc#Dss)lk(1@a_M1d>%Efz2;*=rPrR(Z+J<L!ZXX@vo`hzJ~D%2zq!fs0~{
zGJitzVF$)MdxQYroJ)APiBxuN;T=-j*ZEXyD~nYVYKGg)TIU|WomspkP1uZTdebyQ
z5@t@7c_U%)&w?}^iBpX~ML@n6`}WA1s<o7_g4TGRX$Jih8?plxD^z1j21Iv%4#dNL
zrBpdIH2UP=G0en}!dTj-Hoz*_BkHD&WUSQNfEhIZsQn21-QAYY5;r_>8lCs6hb4`Q
z`_S3lT{>3Rx|3%K2>i3tI<JpLND;=HGjT9!+CVNakx<n6R4#UIPAi|Rx_*E??Fn6l
zThxZIm%uoTt(Njv@^Ud0I$tlXk<)f1Lsli%Vw5wgMUdmm5NTtsbQo=dOGoT9*YO1k
zRfw8jpprc@J$v@m8kv`=i$JdW%T;y8u+e&W%X;+%PTe`F6jGDo8>H~wh?es?3~}t;
zW#o;Nm`XPZg51%SzMR-4DC~j~;trpQA9YEC+qb~XNcWptS8=a>%4<T1NkGa9s%OIU
zDUPzp;)N;UgUbYDm%`Y}!NyuTa*8~7c=~=NlZabBdY4W)8g5V}9Jx?t&PX!XYEoWT
zBKy{LXqDILqU~OGcafC(GZJ%m7g$LLtP`GZd}ymj)UF%_58$XyBEasl0J{qSB8D8d
zRD+m~7;||VW_bX0H9!hZj2|^)`uBXhs+GiH<&`^SToDMPLzd`6b>D~!vFPvf%phy8
zbfwd*#1s{9J~0}NrRJS>_chc^@>wDtql#ogBFgMKXS!M=udivsrCgd<poh-Ku%1)I
zLwS(Ofx0-aG+8+`FA_EN%E`TvQZ7E~!s3IL6DW5;#vbimSH4pez}C$hdz7Y+2d%{$
zJ5V1LI3|nf&&Q^ls3<-Rp0vsj>?Z|FotM1$lz_+1<Z0nqcPa3a%ezxraP6fkNk&|*
z7J4x*RRLH1r7BpL{<7t3OI7fH_h!rWdFdMV-E^t)BlmjAv0IKteky~e&~(`btYur6
zS<97o*x0w^Kx2TW<p?{!%&wqG;f{S9(r+aQ%U_bc=*sA_bC<o-mm(ouc%R*8_m372
z#~)1Z;!Jd%J!YlIGi<lqiH+8-FrcY>`=sLX=6>ovtw8?~i^T79cZL^q2`K6XBAEAy
z3ZOgY+;+Kz#<c|->!ny==qz8y_O6yKKUj`0m&b{#EZ58*_H4OEE^Tyl<3_T#tMb0}
znY4>goCu03kvgXTajN)_Q^kLrD*iQ26^XtSAtPdvlxRq^bK*%Eux#>&CQv%T@@kLl
z#V2J62f{&)YrsSu)kG2%<#DM<1f(Z7T7aCext&lPqLQ$;@l#;NosK9nX`Z8sAQRR>
zB%x9OF8#naPGJ#IvVD=y6fQ_8YUDKWC0M(qqKc8J(2SC&?NHRqyv`D_aG86{kmW&5
za;}s!m4!WI>&-L9vEZ!-i(`z+=2b-3eApfxHx`y<q$oM>0VlhawcG9fgR34V*X60@
zM<zDAUh0lFL;FBwok(C1&O9)%v-Ft2M}mpgb@>>V6_dBNh!M%U$!^B6EDOqGXRM~6
zn|=74V0#nH<GFsG1Ma_LehF`q(S+<|7@^wn;<9y2y;Z++{ERIOOQLb;Bzt6~@pp2i
zAkSddJ~Dg|s`R=zAr?1Wl81pMU0Xbe!Bo*M1CU0VMv7@CK!ro-p+k`v&~v`RKuNtY
zFWFPV4q~M)9^H$HgI&Xy&g44B;uTx`v`A!fheq-F7S5CKajNcNiBR}C$2KqOXwDVo
zE}GVNG+L(~iY9t`%61=8w?_t)uItVoQoHS5TNL2;A(hWjq&}n~uTly1v)6PZ##VAy
z=R*-w-YlhCA~Hw;1rHO+mvv7?ahYIW00Ldvwnm3y{k-G1dm5b32YOowy`o7{q`MQ}
zmkD^4e3$C~k@!!izqCz}JUh?5&z$j}wzn#^$o<c1ZF}oK_doxX`=6^T`VGqg-mt7z
zODZ9hH+4M`2eyuhX0aH?UGQ!1E5;IEfgOap<cJ#$G){2FQp4xFmiXq9_F5DJ3X~Z>
zz4KH66p-E&{(?MRSLhNs#yG{aUBe&_yGouCWcgAmEr&0MC3AOWh0^@$IMHQJfehZ~
z=Zl3NnWFfx@RGw!NHR>^nKKXxlyTcT0$`lpEoe?85eaaC7C~k@W7{hyp#V*3M35-3
zQPj@DVnxQgSP~0EbZ+Pl<|>*m0)tBHDyb(%QSo3I^aDDJORBb^cYylTu^rA72Omi;
z12!>60lb3}M0yP3N+D$s7J3hkEwXD}27zNi%DSi)KvlqF?9S9#FdCj{N#Y;{L<!%$
zn%&}n3kE`9D&hUqbC9(}(l_EG+My$__TRO7M(^yT4>G)Iz~ATHv-hpz=CN_~2|hNB
z#@YF&ZtLy4zVYs?ecbH!jC$u7-gf%k*3m`(3|>61_n_u;{ICN5d@`CJ&%4cD&p7KE
zt<&>%3!nk`ZoSiQHG43VI*s<lajWyTWB>%CbJjQ7t<zQ?iuKP*)T*eqqSZ6bPK?uL
zxA6`h>qo72tN)2mIcfDf*chxsM%_5Ccl)iz1uR9z`9=5qtk*Q4V=KmSs|V{->$G`X
zhW4RNqxrts=^MRw^>#bb(X$VoW*3nU_423*SYp@!Y>E4M-0C(Pee6*1R|AL#2)0W`
z@4VS);UCS9P3TU&`>7;=_L_gcfI{%mIIf@8-!^+IMqvR}U|bp(-R3DG4V36z9QFFG
z{zboOygfTRCR+EJ-S@3Vvj?kw`>aO<yXZAb&_=&bO#=*|R&NjgKDy|&h;A7AsoT9E
z+g1?<`2!FJu&h&RKO`{P{jj${q_gfPgo<>a0Vo+C-ZkNU7imh=sv{M8K($6+F9r<)
z5&L>KjZX7z8#RbV6F;9Jm=CR9vj_v$>R|zZiS2%<L!%eeYaA!Qi~no^)g<SZXgH15
ziBUg(-$KkqY2bifOK=O3t?^FK9Jzn>iGMijVFPA1Mrjc?QIv8+N-GG{nuJ54TQop(
z;L@$hU^E}v9B)8G0~(J(YCs8*u#i2UegL|dm@s^M9u=D2#rZi5R_xY%_?wJ#C{jW8
zn)DrJQ!LmHemiQ{MMRwtNbOS)Oea>kT|cikercxE558uAK-}J+wo^a^Uj+y)GEX8Z
zCbRfT7Cs5leG0KAiz_MkBnAhHlpxD9bI<<x%cy*FXsoQSHg4>}7m%R9J}M5EjJi8>
zIEvS<v2Ro><qa9bX3k-8_|OBH7C3{zGaWfR9H9&=0G<T=BrM<aIGv64)p!Nj5QTY@
z-9hK1a3mkmECqFn!$6!Xpcd1C%c?j{g88%tonNa~Hr6~F51#BbXbIh^ta}-FP3FAD
zysH-ZSCwyggLY*NIPf9`a}@2T^o((11__80F2cCAMBoc;3@Fu;iybZr*c9vs^WI&@
znl%!pReA}<ahWiX!56}ijGeEs*knoJ)C55S*qHrypeCSNVN`27l(H7T9G{)mTOAS$
zCBvQ#NRbU#W`v;unR(~aA#SR-k!?ifa4iZrB5Go#YPm)%#<N;$jOZJ~G58oOOD9_k
zYjDw!D2eni9R_}lm(Ffi)e(i$T0S}HrvrhHv9j6C${JiWtx@8?G!XDHRyMm)S%ZtF
zH46G5sD5YZU_^n$I@t*5HG?Xl84(XCvyy1m8M<>2zlX+9yx2=Fb~SRX*&%55s#yBs
zQdA%iKai!y(a7@Ye%PMqnJmE9e6^2^e1-`qu~Fo?B774o((`h89sYy8#$f#tpqFo+
zCq(B;(k%Wr*?+8o=laW-eiq07s#dlu8`X&Yr&8N~{U7_!zhnF_4Bl0Jy<G6FIFv06
z0k*S56tREX_+O{>M)xcp@=Lvr25>*?wvUyDxe=@NUuWI)udPn#Yvi-|VAlI)yIJq`
z3wNda(jP^mkkInso$-%<B!4-$L%){a@&r#ppPB^35AQYL;V%dG%K*>#NubE#!5>BV
zSz(6d7?v_2lC*8AtQn{0<|>w4*RpkK-_ZBW@NHtrs*0f|BV+n%cOy43ZyOC2h+hPP
zjqS7oz*Xg{lvbh=B<oB=vC*z~-bx(D=y3FE?4apS?f?oH)xTtHRJM6TMiFl}%G*Vm
z_;sL&<n!0iN~;167nAYi_Ej;$z%*B3%k)aULwpcGMt2!Z(BJe9x7+VegE0VCK&Zco
z_TdtDS)e_jop$tf9O(+aO6>^&&Kwgekz-6S5VycE8st2Q=xn-`Z%RgmZNliO6v00X
zE2SUUdMvhjMz64Zj!wZOd;1}s@Z>@{xxS5HOtm&@sYGX}nN;T@IWX*9bQ+CJ414Yf
zv-}vn*?j1@5mcf2dH2k0G*VDy@R_J`kvzogy$@JKr0hsS_|!p7&%K%%?;U>*FktBq
zF-SSw2>~jN*x0Jd5Ryf5EU0&bVl*Oh=)Qz;@hQk8U0s`z4ac94$3WUYWV+yKLYC`D
z1G^Nxk<KqrSb;Q`nBjX;Ea2x23;OEoN=3Y{R`9tZ-fso(YxrCh@3({Z8~9uk?{|Xt
zXwj)|iuZ5C`{&iy&#67_efBsC<3%Rpi2&(I!%2rnsH2ovP~&uza(Znr(mbJqp@GHo
zSs^loVXcjds_-iku^F5p`;nJHWcfS-@#r*@Jox=%8>tZ$ct@v=^K+0M*q7k--Dr?1
zQ4m(<B*g+ba6W(A%EbBH^=2a+ySJ3uQ>q$~4gi!CJQ02-o>(9Up7m}vZSi{wa*YPo
zj?fm~G#XNwO2HjrX5x+oek|@y;L>jEybqw#I3XyKjjUE~fdWD*UH<8gf%)a^wsd7t
zl`W__U*Z{G;#HK4iREH+@{>{+o&2mih39%)An+<0euo?usdp5W8aM>uHvzed>8h)w
z(n1|KE=7_$9jh;Hg#=q1*x(eT&V9g}7xBou0~0nK#OW$S!(A~I>gkyTx(Moi1nPb|
zQ~^-jLCA!902`z<bvdEul{_7(<iRVv=C?kcHg`qFCrrQsD?Mec;d~&JX6Y6k8;HS$
zLz1`#bBEb<FbYE&3oEi3`v=BGF|vt;nX_R6CFt;9F;P~T(98`b0#Die)w_*%xcLi<
zmAjoSb89oY;GCKtw>Bek(i#u9Hsga$U>Qm(^Xp|Qi6?fZ>1@FF7#gjQJ6qv*@FKc8
zV$~GH@KZKgR3eO29BCyGRF!mKI7E5q5h<I!dQgp|)x!!6Jgp!R!dhP97EnRVL~`-E
zCW}EAa^SYL8O06@5d)?ou!xXcBy|Ww0zIgZ3Jb?1U#mgM93(nzo%a?}2n$6BG<K$b
zOdrITDFmZ-GBKwTIWX>>!>@%XQ>h5fc(ZU2Lz>>EV9jl1qRmBe;N7a{;6=7Uv}V@C
znMbWAj+McX1U(Z`Dv<-v^Yg{jCD-TkD1E2+b{ePfH2&@sn@Q;kB9Pe=ETFA3xX_%@
z6;j;w`uFHfowC@R_0sH&@a3Y@`WWl^S&z5Z^NDlE`41<vNVoA*tk3u|-apw&TKDAl
z>|ugZ?N;X^4Ox}Ox$R8mcOn{B)B=Z}l9)-s&Onh1EX1+iIqsgdj#IF#PllfB&@liy
zZ0s7ccyPCt-8SYv4(nw2#k`z5=s#gf$e^?&7sdo=n00i7CkQjw9Sv?QXA-f|1W-al
zC*CGfMxvNO8(Cx_bwKON8btUk#!v2C=xL1y7s`k&3+-u`l*LH@t|xOf&iZa8z%sFu
z3bBk{sxk|^dDJ^jom2QNc}kslwhdonv#Mjyl3v5?pCrA%^oLID<*|oRU{6N(DeR9}
zWpo`sFYNVcy^+%EQ)_S*T}$id=j~<+HV7#L8~nTwo4<F`xD38TaDg`|m>`r4Oz`tU
zOg^+aDVTh4CPTbGP5d3k2`X*W5>Y~M87SfBJSZis%VDIpHYp8d6sx3thk}@m*TSWk
zi#3`HFl#guXk~tAG}ABxh}oDyfrXegKD}KylIxB8>sx0MHFZ(V6b=($Gw>57@{lgg
zKEy~0%aA0al(D`$wxvk&HH}>`6A95MwUiPc;Fm`HMb?;bsfE4lU8Icxd?D-$lJfq1
z5><QQjaU}6vzPbX=>FRO>Y>(|*d;_Dqc8Y*aVO5&=<=P`5k4u(-I+)9Mc|RuA5lct
zJ!1h<*w(XI+F;^F4TPMD(zk7jm$LIF)=sZ}(!^jR=v39~!$Mu}9-H-Eui5Rh8v`|;
zgyG_vRo$%A=a4A_&n2G=?e@vkLr2{i-!S14_TAK?<n1NOPS0mX?4LHa?gRpzv%Xp9
zrV3W+GHOiHK1Y13lVYqyD(8vK5e*xXWUX2Pj*1+j?cgVIq>q~&Ab6{D-27<vKb<#I
z8jKcW;;DSd`pD2l_P{uO%VC2J5orgHlwM+Dbqq_6D)Ee^EPFeIUK&OSm3MVeUoKkh
zeydZsD~1ch3rVl+S|QV9-Dr>>!<B8(*3iD2*~BKuMng<GH%36JE213doBP1|s-0Q5
z@XNv7o$1kerKz%V`!5ys8EpXxuocLLUdIr<Knh+G{ap40$0I-Kn*r_5@eCJT67Qz4
z&4qa&4ouOJxUx|)@DgBk6aL@kGXX4nbdQZ=l82HpvMIulkN5ka66gya2rgJ+Kz?0^
zS}Vs~mOh$zxJE~P5&t9g#dz~3!H}dCI6SCUBLZ+R9nF3GzbFj7>V<K5K=&Fe8|`BB
zd;-rk;WHk%Mkg?GLgV8{Xeikd9DEO3fsb9vgD4Es-WsO|3&!^62t^Q1_kF#6(KNfw
zlV%rh3H7qO%|1+pFcTy)5_MP<Poq@^sYJ~rifsYK;!8_nXHr+0<kFR>YcUa(GRtd|
zOA@&8=1mes;WbLVcex0uucQ5Abq*<ZlCihUF>l^fszrH(fq>BOEC9sk3<N;EeA{jv
z0n5;u1qedzSUoJkIMPWdt*S9cixk4kK4;Qf6<UtWAjV+ko9<N-v4K1=fISraXD170
zYy#Jgtnua0VqRUAON&X<HiS>c^bMC=jv%Br51oPXiA|i???HvCAGaE2pYBEW7tQRj
z+(Itfu2$PwQj1s!YifKV@~RU-pQV}0HDUvC8yClQK7p4h<wI|vwH@?qE*dHv-v9l-
zp%mk|7`RsnWMNYk(+|y6<g*yfXD%*B&2C^l*(q0W*}@|OPDJkS$~NU~XrZ(DT$ePr
zfoJ(Q1V7vy-$np<ef!$8jE~g>K8TM}2Gb-I&c``dbKHbU{^3Ngwu&L<|7;X2x+y+*
z^{TpAOp<A5on9X=9Ipf#kQgPC9Zw@5X+0MW#xB!iGgaJm*c2CkNZZ0&`=w_KyP~-7
z&AywMsvc>*U4D&!({=S!R^H5DWnCVIc#LIjl8Zr{7q=^~Q<on_rC<Z_6J4^)!Zfy!
z+9T`E9&%h*r0BB3X|qxbme4}dA^K)%r<hKsWWkouw@g0Cf>4QEQCYOk61$A8+0D;b
zuuU4ENW*M{-Z68&a<m6#CypV&`l6_d0o-gAj6hs(yFz7&+0-NL*uHaownjvIg}N56
z>2&KgdyUaas%tc<ieZHLbT5$vkG3R<9CL;#8p_)|fSz#dtT(Qkn{``@W+~BM!~U{Z
zOt|SACKzxFUiXy|Dl>~#;x~gc`2vIrZr6B-G0=krpuNP!kXymK^YvB-6dW;*fk%Cs
zg2vt}X`?F3;OPFC;f1k8S%D%)Z!L8rkwxU7`#DENtFY|Y@Ssdw`)*)QXX!)IIct7w
zG|&4>4oF;V%BI4D@^3M2l14d4O%+cXv(;6Mr-P<gv;_zyimb7i)nY93zlikpW4iog
zU0u;NB|M)V4CbCSP<M`4H>YedMsGg22|*#@Dwp_6Z4ZyK8qJU*v=uB#EC2wdijEhw
zFlN~O8~fgXrbQ%Tar<$Xz8S`vfm*`|W4G7?hb#>$+wJZyd@B^GP3`5t_2DIYY4>_f
z{`meQ=!O{AQJ^n!;Dk9>AsboXf=+15W5hmnbnzBdIJBAYg;U$}jQxG%r#t-jxp4sh
z!yoiS1pv;7<IaI}!`Q16yZ|@5b%97<?1>*~p;7PG$R21|#!o*Pm+{a1HBvf?C~)U#
z-6+X+L0?91PHGVc2hV&x2XD<pY$(4padvdx0@R}Mla=}%KPT0p28D&S-#zQRHCr86
zjhcLh35~jw>mZ7O<UnVFHvmgQaWu^<%;d{2QT4`IRzkoWoEpG5qmeNLxixt?qn*Rs
zM&l(WO1ib~eUmGe*=FmIoe6YjRE-N9F9e3pna$NC!CL_|kug0JdHu`AWt->|i+BA*
zyOfIwaihJ+e`7Qz(PpCKdrYWAs2V0zsP~DxhzMb2K_$?IqR{?O|J2jSkAxNj0Mi%9
z2-25c+AJDiEXLLsc^5(k*c0$+c3HzADfSqQL0y)>7Ok1MsL1!Z?ofJ&QH2U(B>bBo
zbhxyi(`HK?FHLMr25EWsyEs&o+-fkVJWJ4mL*tBwT-BCo77C#e2PqBszD2iWA^BVm
zdPNsAF(W85!w9odVt>$*?_+OlX#U3X#&QXy(rkAnf6^eVRj9$B--M%#rOsXmo8>CT
z=z?|Bj|CpXMp(L1P<3%Y(EMtR$~U`js=FY}J?|caUnr&59_}iF8dsJx;y4WhyxRvv
zf64nnVT?yeDkZ0U8HB;ymthQpteu!?3S=okdCBM)y8#niy2E?+p7;t)TVmJ^qh}*Y
ze3#B=svBF|2}e9~yHbnpNWvVtU}Q7qLl{oN97!++2Ts}%0T0?z3PuxGA`{vWs(GFX
z38NEXl(7Ix@_|cGX$hpyo88k^uh)9tG-+#Qn(Al)cJ@i_L~7{LV-}orSxU(Okzshw
z5GNBLPeOt?*T<ba2H^vhz?qKjm4A`s9G*nDxZ}2MAY<E*1i5xyP$}$zB@Q-}Rfd3w
zD0O6<Q3(tTmv39+4QN8_5(D>n*BDKZnO6MtT-hRl_ccWak;hH|miC=JnDbpQhINTy
zBV-sm(J2b6=h*62w~v><WdCIMgg_A8gB=ZtRAB-^y9~&$$*qO+7-fw!nMHK&nR{z_
z!(fW*GPoSmLNI`6(>%H8H2QRHy4^m59o}2>^rGEwp?}PA^Q6^jQHas_5gTw<&bo5i
zNrO>`6rh!#!U26cXQq5_L=G|4huF4*!bX@=Oc!ffzK;ruBUfYC(<{M8l!&C7BS}nk
zKxT<l8n{O9AAs21DCsB0p`%lUPoQ|m(FhLrbUVSq^I}rx<55h)Z?!<ToD6}uhEn`s
z6aW+dD$paSc;7J0<lF=4yJvi-^7_A-9Wa8f=(`P(g+v~XszY_`sEm#^0IhZ8Tpn`q
zD>l}7vq&iimR4A|$A_wv(uP+aLJ1T+uohuDOSR}AMc6?RDN;60QPH|g5S1|CZkT5r
z-0;jOjwK=-7;<lHYY4ihN^BBvN;!fc;}w_XOAeM8;;*hV6oX>o>RsUVdH`C7v8sY{
zmJI#1Bt+yb1%JRmG`x&pBj3d4G_)gm&dpI!Gtw7!d26Ct;|&W;9Yt~auAb<#H4|@9
zEy6du^hliwZ*dp}VwOU2FUGHVxK?hmhc3`FFcmmgg=a#E(UVTgA-L1F=QL)~qB`AU
z8=1QzI?0}QoBjT$Kq{K}@}z~HhWHnbylEEqR6E~M#T?lM%H~*1w5xCzg=iB_BQ7j_
zu#OJxiQ1Ct#a6=P2?&!Wr4mB#QIN_`z0H#DEp#ygRnjE}J}UkeRZDI^kojScpicV}
z3eRJVg3TqcWf2ppLtp4<<fz?@qmTt7#ga{Uz_7v=87h3JcRMX|C`!Jl7q!F$RikR)
z&_)9VY6(RlToY8m1pt*cr~%6`3k7v`MoT)O)k@}9ADQ~me$FSjLn|ha1Xt0yy*HoI
zxe@3|FS#n8<$-SX(g4*4l^PJcNy|d&q7aWT-#=6x#rUl>Us<*~*Av&XO>WwhO$#yG
ze*pp-1<?1E?}<GLSPyCz;}RqVIu;g@#h!(ueARoWWP@pT`o(0X%eLvqZ#$`z*Cacm
z)?DFZ(ZI3v$1sY|e|m0w%wnx@#kd@Up<Vl7Kg2hzrQ?oy$T`1Z+X3XJWt;*(yCW5Y
z36y;dbu^uOQ<u++Ii$15Q{=EgH0l*pZ@1od%%e|u)Qar{UqF%Tw;Fl@vSzSqPIe67
zMRJ6~*xML|`yNlEp(!>n2#m>mLH1DU3_rr&i6*u&v}P7Ytw#ADbPeVT`cA?28`G?b
z-U&A<_v(Sz%VkEyvu2B}I9>NB+7#?g$W~#g!!`CVLi5~w=8hHqxG}-VQ^5rp{aRB=
zi~NE}o%PCkpe#8cWE$Y{5PM2r?b#7&(5H{iE>M^LROl2{L~{QIbO_j(g$jS$DOI;h
z8?_Sl{X6YcUEL3p<Nw;>F`&FJpy1*5k+z~gkeMkeXIk$jMYb#Y%npqDWDWUAq8Tz+
zQkQ(_iGOu(^jfFKz%akh9nThdsf|_RoqLP6tCG0ROo#Fm+8Ja`$ObOO%=>1ef7WgN
zpJtJ60<nF5SVCC7s2{E;Ev(|~K!l(VMaM-<5I~C#V#lCdYdj9Co6SCiu&tw=FuDP_
zGb4hJb}@+zlbmDlk`#Iwk_E_tn|5qVA}M?hChlZSdxb&z5v%o(8?@%?kZ_SMX}Cay
zVn;xTP7(}0B*7mPm>_rSVQUz%2z*JkKIH_fR1X4)0ORl|A`tstv-y*+%6qo3_K?tZ
zCv<f4_`a|j!`UUzDJU^4{G*Wp%l$3#q%d70`-7|^BhXbAOxlg!yK_&;C|yqx;f7Hl
zqN~fu88~<tB~PnD69AJqfR1u2Y@D);V(^6Mp0ra8CR&U@t>Y%B=25v`5n~XBrNEf=
z_a9$Z=~fT)0F(n>hJ=AjphMeUa%}=x=c)L=SJJ#L<i{{4N_fdTgp-L_@azF3Hfnzc
z5*xJ?JvX^{bt~jatK#g13d!Y&mUw4EOFXZE$Tz&MQw9omCQhg%n`n}alc)L^%P}74
zFp3i$vI{YUkd;DFkQzlXP$|go1(ygujN*kttq>h|X0wsjLW3fmWN0%_zu_rw=>Wwo
zIwGi)0^q2Q;Kxfoz7gxBHKS00o_%e3LxL?#o=hL9Zix<aILW}NNt*yn0M<|H?e?e8
z&5~$2bb{5qy0Y{eI?vB4aELRBBuS>46zEX_8oz4zex_6JOLuk?T$l}#+$IBkc2FrB
z7rvcY=%9kyrDgc{uxj0p15GcHY~}c=UShe#{)kxMxWAlPB7`WNz@wGyegT$gl5M$x
z<&96+c|Sqrrd`Q=lA~OZwIHVCCya6>#WIiro&{SU<|Q7Ghm_&#IZg(BHM+F@Sww}t
zTEKOpf$EkKXnkWLFiA{`bLuOKgQ@#47(Hh)u-T}Ini!){KWdTghk}CB;Ly2TN+YC=
zj||*TQz9i{X%8!W>?(4p96n&nK=(gfAj+*Rg6i<e9JeE!^;9!5f3)0)SbY3~e2%er
zM)sI?PvonjP?6?aI!Kp`tb=Fj+{nQ8D8j8s_95%hX+Ev*uR;+Q;4NyZbbM5#u_OZl
zbi=n57G(x~MtQ75OBfG`Sg%BJ`d;v0J>=cr1g#2qXWknICphZN%`cW#ghiWp>5FYo
z(eOOgnxNzkw?ezPvA)_NSDpw$<#Y?evj>?FBA+lsMEDJczetD)g|XK<$6Vz2tl5K^
z{-F;40JArKVP2=KJJpC`ERKMZ<1x1^^(Ase(AW~H#~JXT?*EMkKkV;a3i7FMmN#*7
zc<4U@Q^&X2?}aFRIlsP67<HMPj$7$Sm6w-gl^(>o!-IEV^c#0-vr9IxYGkLhS>BdD
z2+C~)d7dT6gzryLGn?fdUa9R{biA|eu{`}mpM?2@0pjuIV+SKCDpy_*niHD1l9@1N
z1|l7CpiK`|fMH{!Wc20}5yO;i27hlf*6~U`^Y*hPQ4-e{o+X9V3V+jroQ6N}J^Uo2
z7&eZr_A-gNM71yhBf*Ult0cfrS>`e{;tXU{*USLX@UAJ@BBk%#X|v=AUI0lrv^|cu
z25Z5bE@fV-6Wl~-30g;_L_`79zU*f%y?NA&C5RovJMpq%f?@^wpME1tXiTjhnx4&z
z4i#-8_jl``#OZQe3}=@Q-8dC{b(pT#c4>j;ELEGCH^&_~2vKRZxy7_U6S{dyGagh+
z4^#koITWA|z6jF}nKD~Ylr~7t1gi`{Z-CmQPOH85;Kp?ZwCZWQa8@VQV;gOwaCwi;
zrZaNh$lM{&en!|4BWl6q%DE1i+!H5Wk-G2>uRj8#&TdMqoMm!_aBUOT$TDv~d&-Il
zolG&0tR$FEc8pYG%1$w<0w{ZF6{=f_HDLFiR%4@<SOb>qv>IER#f6NI`?3T>5fn}Z
zn?1_OeqaJ-_&#mwsY2!^r<lAa%_WPHcA<;5p9^=K*;~!!_L$}5quZcB(#~z+mM!;Q
zEy?utESuH~;bg6Xaq*mNKRQek?};%A^0%HISRwz?64}EBvR_#VV+WhD5}}Y5Pj>1N
zXCd^F>NZ)QH-Bk<`f%1gjt!tJpf1^k`lr68RWnS}v(brtFiY9yOS#@YYf!FjlX8Tj
zNT_a9E88{W-~f}U8NVFG{Hv2HidqZeTeQy5H{rD2Uy010U(m(w_^i<rId8_+jL&&s
zJ3z<HFa1ciU5xbxTYGuBMF^tTHg0NQg4_PUnvy&5jpbReNAT$OXX6xBUi8#9120N+
zjrXqSyvwn156&Lk!REG{!k?-;@<_aOc0#xpK%uXcjOWI4_~*9?{!9NCGub^ITruGa
zEZGA<ceKaT*}ZT{eHMY-JU#D!T1j%d*Y=aK00jGVVF&lTX_rZh(yfgz7E>*`=`3*!
zVc65bZvq5uU~OYZUSS{;5uRkY6l&4+I91Tq6(~BS&kU}NqxrRuSsH<i)s0%hcDvPi
z*X*|XsMiVI#BBck0wm1Yd9z!GDP0i$+tU4s+vdA5s^NpZ_yG@Jb`suz!Woe7>7p-O
z0iEJR9w8vXd|duAv?-I&SI54cxl`bm>CN=#tAp*$7qf$ErMC5Y<Hw;dzQ+ANl5Rr?
zkS`SB4}=Ry@Bvj?^w$eFbWq?Ad(w|@6*cEQ<GW~HrLGE`QO>Y}^0I#-EGNEAF8U$h
zi?A+>G}nb<9RIWvl>gDA!$1Qcz_hWF-@|?WO*k9|YT)W>5KRk@kbk@M9L)DeGKKD}
zOy*?pb0z>74XwVfET^JXK6RI8^y;=sNx87oO{Q^hq$m$!;-Ju<9yiWtE)kbtEMoxT
z&K(D&(e~EjJbTdq%AxB8W4G*J6RlQOjsGIqYv3lHB94|Zr2+OqhG1|e>Ld+?h%jN}
z1wpZh;q9_jpbeDd-I#FQFMM?@<K?scmqi8BM{_YBN3qlWn!0i{yP5j{Mj1)Th-=@?
zJj)ol?8tBXW#fY|&EqDI8FlF1-BaWS%+ewfzpSkC!PzZ`n|S%#OKJlIwXpM38x{z`
z3EqOdGDO-(U)}$4Jb)2erg6=+<kdyK1GY-JqQjQ9W(NkKVV++cwc!`}Xr9y?&G00X
zStgZ)iTR;Hznz)CAKcsFn>+^p+B4QypY2QL?f{Y=M$!Oi9Ei-Ite7Q(b7-;?vGUvw
zCC)4Zzh$BX!265~Hw~zCZEIxUKS{`-cN6Xxyb-x{Pc+<A%4HYTA)h;hA7EU68`?9=
z8DXer;1S=J#}=a77<~f!73x9%uw?Yv_B(Jr^s{m)sW{H_0KN1cvaD&i_SIFF#7ZBL
zj6BDCCq$6eBU>%^)&PZt)}3SWj6tL{=NN60h?_S<2T$)3Kyg4@_Rcr>V%NuEvBcIM
zLIFv`w@<{?Fz)t?mo(yh2KbWSy<|m)TK0K{i1Y}AMmiP?xg=m*&%QG8<{h3kY3xg=
zc0iA}hkFm60szvwwtqo6qy7C@c$DJR&>tA04ZI60Qy0dkJ`;If`gpnUAP4MNt&--s
z-f%|`02!;LgFG2opk(F&LnZa9J?9TQiVaVV*cZLpsdblLGwGvVx#hnn4oR+^{PjBz
zi>{ssZ7BYMW`YTVWM;}TD?!!}!2SM`mvO1iU=+fr%ofL>;gllispiq*c#g^ZW`Th6
zEV^FGTe;Z2D5-()qVT8%gu5L=Nq@lJM=l1oF<*1Db8(8EEPU0xP$&T76fL}7MSsD&
zpAGSDSH8*sRR0(ObeIW9yF!l6Pv}u|PVwE$<4>LXX{%veLzk5Y4~)d@)>c8A>@~XT
z>jR&GfU3|fXY0~;M|eGX>Nzf`cc8TKbzJt4t(JIG9=s7o!*DTDIt@OY6Rx^tA^I&J
z&jwMkbG{-INTVZVl@o1}DnbqE5ZLMtA{Tmsn`%+BsM(YiGCsKhaP%el(VE=PC~lRc
zLWOjijp&X(g_MzjIXEzATg*d2k2pk??6jvabV2=UxPBGuSC^<SNf&~msPeP=o)hB!
z16d6uLi3oT7KjkXfgv@GEy*($!zxHADELEOUme(Lv_m|=Vf}Ae{S2{-Hui2ZoA6ZF
zKu?XKji+#U#*BW!BGkuhuKe2^YN!xllwwJo+2MkZ$&WaS+Cl~*0x|pikK`Y;`#b^v
zgewb(x$0gf)wQ0EZ_!GV$&ctR$lgJ$C;B#Ur!`YCyXm#$RN5mYk9kadKm9CSupor7
zYM;GD?=$lw>{}&b7Dwm0lu^3z1#-Y8WmS&soKMf&t!7WueWukF7vOCz2p((G?v+ZD
zK`uXmCXD$^rDr^E-O+X%3C8lF1<U+Vt5ffOGP}+GMYlsyjSKB}y;2wcKRbs3Yf7_6
z17RK!$g}CbB4H2!Cip8*=y=~`N>C-!ofh{)<X1{TObY}w)K;>Tvrbp5qpBPRwRzKl
z9gMt!isH@TN<w&<FtN_g3#ZNgyR+jEiQ@Od;-<KK1}PCWBYTtt+>+i%2p@rCu@C}x
z(d{<LafO3sAJs8xYrEd-g>8Gr3t{u($_P)|y~LLC5$F|^VX$PxErq|)jkxHXhpAQX
z%7%Iu=jUhLK1h0oSi&aj^zV`Ad~9!lQN4nl^1k@atRmT37!NdPr6m52!A@jz4ay;~
zl>v-ND_(5oW2_6&v$J#AQ=OJrpv3P(BOIQR515JR6A7CPW*@hGD6f1IJ_$mUza{|X
zE5ZZzPn?-E8i1fvV#|G)Ip{zdB9u-TK@$S?q=XTGbJFMV2M|N2^&zbQAHvZl=<tV9
zIMd0<X&aUZQ2B?(^Qko(+^}&_Mh?OXp-Rt-8K+bG2b2psA_n1mwPJoa>;6*jqB}$|
zF8YvsEGG4tU1&__Y9IZ0N*_fh6OZNcH}o+j9vVsPu*87MXPrLUo8P|cXGw2k!L?oy
zIUx|{InmXH@WKf%<z*80c<$Jp*`{Dy%exk~&QOT8e&{Ke2o+PkUz7|es8dW>OZqQ_
zMO3a2a&3rh^>ZLz>X#mbpmEL+hyA^zl_@uo6_}*uDKxKhPUaY3fp7q~?76o-f)Cyd
z+!4Kr2_^;Go$dz^3xfWs3ueX~Xg5!uVhUV&4&I}D)J#|?;s4AT&=)MGW<W%3NmQ%U
zRFDE&*k;kjg^I=tj4tdscf~(tjzqh#V2&&#e=s+qD%{h~45m>zK0#t19y(9rpq83B
zN%Q5w6YW8(RMe-5Be8NQtw?~Y{yR*;^O4r#8=xdElu1SSYy4aH6J8RDy$8Li@eY9w
zNWs9!3@Y7>9=V;}SWSGLj+o_-3(rBBn(m=0l3X%YNVb*^md@Nnw4YS4(L%z<CxHxj
zlMpqQVmdJxr+C8kgh0eUMbT+>8tseYCe1ZyuionS;)PVuT^WZsy@=U5!SFS*Acdic
zmlae{rBnxv$hJf<nAtmTHn6LzhKMhuMn4&wMR;c5MvPg=g%lV4Sd0XIH-f9S7zeFq
zzFYare}4X7_@Tg1Yy9ulw!QOabFeiS?p&?Ywte0A2I~#iv)AV{XS9B5k6rJ+e6wU5
zm^EZ;bCZ6<PxyDW`g(J-@|WsH<@H8ovs&G({iRZ?Y}7XYVpRSN0?c6@^bF(CaH5_2
zU-Bcvy-;j8y!LN-d`g1DUotFXg!f5rZMs#8NxhWbJ8~{P3l=`%WQkfX5Yif5+wfk7
zl4-cpdyncC8b#PkS9aE5L;c!#YuO{??cAPB>`QAtH1@CIMfqC2_?t8F9IJfg9i~F5
zyfKdKiR;Xa|FQ<|rLq6t_^W(L@5<G`U5_ni1RBsd8N?go7i<i+{at4`q{w(*t~~zp
zZ}--X>!#MNR*ZMn{EKb8vz<%m$NmjIm2c>2LAy}5f8&lV-)LA<&l%XGkz?$;-XBo&
z%CqeoJlgeqbk$6*zhRtPU!l==?#P8j*&Tjyjr}RU`rA$LO17`U`SjemH$WGdp>3eQ
zE<3Zbr^?LbOG7r^DHh=ZPTfD8(a6I3=kv>vGcekK?8LW?uVtfFu23W*i-IVsD(7wE
z*5X5`q2r6t9&~A4_+AM<D;b~MIbAPyFX>jWOL3T`zv`HqCT~@Mxel$Zm)QgG34d1L
zf8$)<Lmu4NgD*ZGvWQ3uV+#KiM8VBC1QFn#?q@xA-@SQy1}bkf+P!)myB?38?`FN-
zZPt%Jnc@}!XTjtP%|ZIX@HKhoN_HI2O-E=WQ$d)@oOym?Y;CSxVhnf6Jnxr`Ymfzl
zm%gmYyC&bLZc%WgHA&H_OZyO?g1d@<*<#FZZ*R-jlR1sKzmDM6*`IU$W;Py~6JXo*
zI)ho`RcP%9n)>3G@w?)3JetL;Z|V*)AGZ^Jz_mMNRT4f+|AXJ>woh(=H=z6r(!i|X
zPL_@OVBmVv`HXm2IuDjYUAP_$%2)Sy<1z@9b$>n{JF|6=GaKvY>{asDT4m<KKU<sd
z4?>;}uMof#_P<6IQ!PIKwSQxOHrnS~o5mP=JTiVQm&>13IH^ktNhYa((P@1&C^q?X
zJXv-y8IMB;$O!fKIi5}X)|IW@s1{D29JCmQ;*LZ;QKv-L_<o<GWETz4q(x++dE_aF
zoy^u0JRcs^I1v&*pSC(@GH+Osb2!O@HQL#Q(m-K5GHtth4OMz|3}cM(bB>R@&0Y@x
z>KQ?jF5%#b^H##mcg8AlW~8Re%d8YixG6HY8(ZRfhP<gRCP7531fkaj_xN8YWt9Jy
z{!qE;e;usj1%!2<!dtIz;k@9v!SQ|98CH4lGnPIWTi%!XRL5mI7cK_xQ)I$5^e8xO
zo%cdlFa3ugyqyo>FmnmqB`*}h(FY<i4iL`=vcpNY*~E61WQYaTLh(wpgie^YJ`b8j
znMx8*vm^P_^Va9uIrum9qu0vrWWqhZrAWA#bmDhB(CA8HoY9rI)pbRW*-V>W6Ijr^
zw1y_JAncaEMK4HdHX_b4h{hnYerVP9Lih3ME4rzfpuNnm(a({?Dv*QXzV`XBuvQfj
zl_&NsV*e{v_`DZfv;6);RGd~M<=Pd9g2^z5@}~#-|C3dm^;Lr|Ac8hM8FJvkaDq6}
zM#I8t9c2#5C<^Za(u+YbJnf1I=&N6BQHev^%#??Bpo`$zdOf#1jOx#~^e$s!k~1Hj
zz<@Fw&~Y(pT6K$(sUV&hVU(kag7(CFWKPM2sxb4BHMtjZHppnC@#v}=6;rfoGU^4L
z#qbw1F7frM3p)b`TR)JQ$+-#?dzC6Fhzn6T1+~e=OW3+AdkVay`~iF;YE7XGb2j08
zSSZ9kgjim-03zD>r{0rvIR>)?gxD)18~xQnGDd)sK)PtGq2JkoA#uQJqKl%)o|thO
z&*0+Yiy_p)zmg`DGC+}mj2A<OEigf+GmnQ4$&U{_Y*ZqnOH>lOQ;efbyeY0QbZv>U
zAIutNM@|ez%J8jid_hk|az(@ti%K5I068!xRK!?~uLuT)kg_QH4S$le5N1vgx<7EO
z@+3-mxn5K#@Mq|G$%xK?@Ehb^ou*-KPMuXallGD(Ql4?Zis5Ybl=#GP=f3G)U13VK
z@It_0{~5ws(g!tD5Oj1}OPa{{rGNhkF6N6QYJg}a@m@tnfe9f+!n1=YykfA3!#u%@
z0J3b!h?XrGif5i$0R8wSAGe7?I2Jn`d`nwc*X4BnfmV%Bcz@OfS5lA7i4Vv)W1_qg
z)7Nx-(#LM`WRfO-DEk2&xS+cHTtsY>5(p*RmFrzn_6Dg;D=f6jn#JcNYD3XeW$=QF
zPQP{9%!54T6#-NQ(gI@><s|1EnNNoxtV>)F)IyV;ybla<Wb}>X429{UBJwlZIOHT}
z80=<E>?{i@cpBr0=kLqnT+ouEq;!Z;#>>jtuwh!7arj{hEN+b)qqGW{?LLT!l4y{Z
zrD8l14eELwCjkK>8Y<ocCVph6zK0Ty26|3i=?TACB-dDfEo-+Rvvg!(Ou%c1Du{42
zWVpB($x3%9>^W!=XA_--6lbTDs-I8hqmjr}8rqEUMGR1`rDl;yt5qYHVx3LVy%n`K
zaU!xk5ehDSy*oIl6oZ|0>W<nFnavVZd9iH_4$Bi;scuoJ5D!!JeHbXTCg`g(w^jUW
zb*o7DA!GN*nj5v^62oE;!>|pP?tPQ9WGKW@Qs)jRl`OFK=n`X$5&P&H&|yU1cD?d*
z*<}@fNbR^4l@@AtL99N^JtXQuM|rH(4nJxapdl8*M~?AQ`-Ct)<%NuCWNj3lh_`}m
z20{+oFp@VcK{?Gf!JCbRTaQVQwE=DP+OC+Z5Z(RgY<5F30TVsVJv(8`khD`EiiP(D
zxHiOh3+`xW29rHGeT*uTFx#JGJi;?w_5*t$kuL5j+gUopCsizI$m<ip&}V>Z<sq}+
z$1AHUxmhRy$O0qPkFihXS~FW$1|3bw(<c!{GrXM5Xeo=Fh&^gCv=%Lg>WU31SA=4Q
zgDOuhjrRErQQ;j;L<j@NC?p=y^giiDhflL&r!BBcE)tJhBhvf&uz^bin6#cG$oM4&
zDYY3nLXp;@%Z}cZ-nWH(u)r#mN7hFeRTrLGeNB9Z>5>;kof3I54Ivm?6AW!p_%R~5
zfzH6*oW$$;&{ltw^&P{+8a9jQtOJ5SLj)fS+(b4Ri4>wK0@|`f#a)aP(s*BXY*T1G
zOi4o#Z}Fgij23wnFvS&Fxbdd9XawV_Tp9hSFwN!dPxC+mG9{v>du0-qg?@s<60j<z
z^S}W83WVw(ngcjS1tnD`GE8V40r^Gq4o8CXS`t8H!C%+b@bAy%fA$LrCgfoJA4J!j
zWF5#x9z)muY5-?xvg&W0J*EEjyPsBq^sDFrRB+I$yk|JZK2_f{UcGXP#_lej+>wp>
zl>yKFy>R3*_^`LaNm6yM=W@ARR1IYC1Yz!oosZ^#En$jQIv4HspQnpS?F)Lo&auwc
zMtH84sg3;zL6F2JD`^*m!`b9DO9_oW46%uU8LmJ@NYN=^G7*y>>P&+txW+n_a<2;g
zVy1rZeX;r|XNd?>22)~5^Gn3=@&Ij;!vAvQcRbl#pi*)m43RJ;I-e<Y8`Kh7?E_}E
zTuFh~GF83UAnxPM6{*MdHAOFGqwfdyd0kbd;anvU#bm)CCSR8D;a0U{f=odWQpAKg
zX3^BwRLun93N!K|b+MnUzWiXKXDIiK@V{ytQZ-tE*78FWRasDI`f9Q(%%kQa7=Zsl
zx{Jh$W&VPNHGNS-CgF;5Fp@VQOVTWbf&WDM<(zD(o2DnH>mojmZ@`{Pa)`YONsvBn
zdHm%!XBjd^+x=vg({^sOe#?5G!v$?ERKfgL@x9<$2!#iUI=MMiLM8dM9ojFJ0tI7L
zBb|DuIuq;YVW^|ubcsb0G*EIEi_cdQRIvSA3sIP*U!q_QwUb}C#>6W(kt}3?$O34h
zD}1&_h!$RbA%a~3hH=6)A!~y<dW&P0QOq#qV{mX|XJ_+YyMzMXL7dk<g?OdIxE6+$
z2QZv9?(qzxiK2foYb@l9+~SP+;eU`!%Q2GVC7dMt{yl}?(iAxiQ{>-UO8zj#Ma+y$
z?IpkD`9Ye_`VX~5cEu$piWpCEhQ&3wE64L^(ugKsvhi*vLY#Br31M0xjTS>(Ygh5m
zV}c?oqbgeuZSchrDyg=?C#{B-&~IWK(GeBb;VKdnN-*MS34+P87{XVtMw5q%k?A5*
zic4aUc{~l9xXIm*_T2R%oekw8q~O&YOMW_%;rlqg2Q`<74Cno1Lq{1eIvVz!g;p&l
zbFbGdWwi<OMBIZDCv|kZN6k$s<}SB*B{0XpofAFWIi2x|^FIvWDdM_@v*{rXLMMP4
z^(X)w#^WmCp)I9|)$xONt|=_2#qR8)X%<1Z*eyU^c->gOKOaxoNJkZf7!@&3=zbsf
z%*DKwK*d@atw$8XoWkVdi8Fzfp~MlQ9kk9Amuy6FR`uZuw?`RQIBqJ1PX`)O>*4XC
zvIiX89y=oQ#%|Q;N}8T;)}}0=#!uUC0wVTBmH<xn&S|ejoJ^dRQeKE?G#qptW6!)O
zD)%gCj52souy`LomT$xa@+|>I*uWIHVq~9)LHX-JK3ye!Wq3yQf?|>%ru&C*0P`f=
z8PqC|K}i(Uln*K-;-6Ddj1Y=vpP@n7ixgEzi_!t}t!rRXYn@vD7rho1UaRg2ADqzZ
z{ex<-{$lhAN5#)ioSVQ-sO&QHPXbYZLIfBCxM+Y0JH`}j#WJemy96&o@8U*ZKi>%0
zSmvfMd>ojNQPl{!PUBKbzIo6kqj1rh%+6<Cfq<YJbRkS(bay8AE7kkI281j=<L!i^
zLSL6-9KD}KrC1lFyi?gKG%745N+ZIfED^w?AcQdiVwUm>@(_)D!61@Ms(TtQ1CNBc
z!B{g2^t})v=z5r%7#cRrZt*vJIyMDvkI!*1FfBjtYG{7{lU8;pOX3?qh_yuXizE{2
zB1X=3G0#gxogz7YB+H;6?U0O0T1>WGJxT~gTg@emVllk9|K==`eS}ufV{U;%+*6v4
zK(!5#W`8~j!->@dkZKwn76fvklk}8PXHnzN=#n8Wxcbxqjf~QQksmMYZSW`s^^D&!
zN;k|?VSJ<|!APJ#lUv<T)Tt|*DFhUh)Wjx3egf>=`3m2IjG=qC#Z~;5`?f9<s!VCR
zj^`j2{SmG7BtXaZcrd-EnB<_))79~r=MLusOr8V%_h55|btwZjg2Nj>T!|D6U@-^#
zi#c9ES1H@&k|823tNbJ40-RuTJWF=MJ2N!2mNRxK7ruQ;P2xEmor4FMnK$-mYGe9?
zoaSxX(ggKoM7mX0d}Tm7c(zgNf!{Nnt-?7Y*6x14^eo5r2Htz~NkKG0kOkPZ4O)6v
zp-FTvJ2bu}#zqzw1@sLZHl4ehft*J9%S+9Hcq%~Q;+puI0Ez9WW)>5Sxu>}y%gH7E
zk*Vf0PM{1B#b1m7E`?l|xvapiOA>GGIOqG`@|0t@oF7NJV7es^w!j+Z5z=HdLh{TE
zap|Z`-qCpJj{T$c3rV!sI6pg<Gi{Cul!B@4%KbvwSU8UmT!coVkp*)}n>q5D)B`3@
zxUD~H{=`}hrqI95gb7_>d+fA{5lovoPccsf6#~s!d;}fsB1EsSkt}<9X{I((%@_T|
zK?gTEIn()pxG<sK^N2>?2~2gbki96vCQj>H%Nr(OZoJ~=KtO1wyzhG}f0BC-j2>s&
zq4e*X-w0mk8c|9bA(Qx|q4>l6{ML<o858O&NHh<l*QlJj!sX=BhE19S^3b!dEe^@U
zC{xZtN|mcp%X8{q$_2q&Cu=(O+^Of_Ww}^)r8`q3jFJGN-@Q5OneA5R;$z_EFQR+k
z9#38IBFN0Z(fsBJQ54{781ca3>Ge#lVl>q*g*D<?i-}-;?cAIRpj?pICz-v-XVw=R
z9d|s^ZI$yUk`}ibo^8uqxM&R;LG7V6qp)D8;%UC3$VZsL#+YIzV~3I)MG-<p)N(M%
zBrIcBb5fTr5+lM*dSqK)&;m1{yt`LH#00;bEV>x3Gy~JPnheWid_CCfP234iM7#h?
z>(P_~n(gwQchzW+67O?%ka&I0KIl7_qrmWYImc{3sOqCPps;mG&7Zf!u$$|kMUQl0
zQ4rbt(l#oBfk&G4l`U?8!-4`r!>T)5g-1+%fni!#>a`rRsHsEX4J;>=atbP?83Wi6
zjf3RpvEC~*TN55{9J!L-mO|myp{=n`AvPrE$keIKD}v%wGE_)4bK=mg5VDQ}QHOTm
zrxw0>A)VX=<?(VQ@Uj8~GDr8qN*Z`R_)CmGKuVQpySO3~;fzfvPR9==_!~>|=`KDE
z)%~bKsGeMd@*ibUD@7GfI+k4{>(q+sCY;p%Gm{QK3e)UEAA##wKTM<6bz){f4>=k!
zDnXZFDZs#1_K<}Dj5L+liv{)J=J`F1y59O;x=O`z$p^6mD6okrE{B3<1Bqncaq{6%
z-H0k;gnc{+24DC#`oEH=w=}^0GXzC<L&j6QUq1)k#?t*Dsq?R&qis6crEIG8tMhq}
zs;SWKnU!VggG3NOi5EC|qHTm4py^mhhQtL`Zf02{qxARe-OjBwE$GApmNZ^irP!-W
z-M1VoS!KeqbjiPIpF%3<ceJl!4uE(JtE8AN$2Rbghg&}VX5~;@)b)FoENc<+(7*ub
zDDW}!S9k>=;r{8oDUXO~x>IY8D2`D<bvwIExKwZaB6JW6C*5wHw)(6<pk+R)PEl;p
zDF&Sg78rSSL(LTibI%h-=9viG*mfrKI|BLM@zIvno6U!gOG&AV<=|wDb4IehKqoy?
ztr4BV3;Kb(a`0KMeSRe3WhjNWedo;N;z?u8D<@sw$MFaP+2BFv(Z$J0v&$CtiMX1l
zRv@l`u{Q7s>XO6jimbgnbQT93WhhL|iL23(J?=?W9{|C`Iqo}o#5Iv;XS;knHE@Sy
zb)-4IN_$FEAYq0Dyyj48O6&@pSQ@oEiZ4^*-Q_Bu3!wE6KmA(yOd&GyPjYvtex^d-
z83kZ(g+(M?bgIrxS`mh$^j!2FS9!X_n(x%JL1<&fVpM_LFSfkp7>X#O6!w(`_vH^O
z^2FnVEic=s1oED^pec@2$lt{cH=zzpFbXd3LE`aJOXA`SU6l8<Y@oE$23g?FuWyi?
zF6g*p)F7=}aW*ee2#%nl`vJO4EU+oy+%2XD@d;K%xy9D|LbYFI2JFd-*MqkrmW%!q
zx_+LcUom8hq3it;!gB16a<IgyasXn9C!tK=7uTxNoU{PSA>$--hTyAJ7QzQcLBK`J
z&Clv7o&ak%!JP`>92Qd3FDiFGC|6uX6!s6Y#5JCNB@#IXr<l^_Zn~BgxlEyf0=lFT
zHB0Mc%4!J{X{f?Q18?nOL6Y`>2u;-AH7Y!|F{}<V>9iv5-v6&+EwL1(^U?A{ha>kd
zA3ILBK?;;g8}g@^#<a9Ea=x6MH@o%zS=a0|KN!E7t&`7xQXp`J%tu|8!oVlQR!zaz
z?3|IyV4(xMV88Fuk$EuD`MR>wpObC_NxLq+pSXPQZT6xV%|`M~lO92C7^Q$GZCKO%
znsr>ItF-Yw2A}*Y4RWju+NtOzzb`?k)=379jMYpU6+^v$svfIeWS}DVT>mO$6fWXs
z3u9<d$ix_f<U)dcI>RW?CVoN7K!|@?+gi(Ev(fsh;r{{J>c5f}e>z6cgfm%#eIM*}
z(Kdnm8^)_F;_??%WP0n$a=u`CBC)x_eGun8CpQWWWIUh-M<m5i_an3*&&2MF&kVu=
zg@uVR0+RN3nE1pQ%+UynHq5eKMx1-)5CA132!RpEqTStaB`g@FR%#mwGrMG{@Di!5
zV6~)nviMdW3}}6baYwu{0Yg+?jxQh6B0x!L6UJ05P-}E$UnZpZ!}eU@Nq>f!U(sqG
zy6s%lhy3x&X$p~PCO%*#ebl6k3vKC>MM#ODaRQ3Oo36lMjDFjrgoJyk@!az+rCAwh
z6P2tS4TyYfxiK=|HyiNO`ac-!%RFg)gd(VE7G(BjS~(+$2h$oylW?I>5WbvZjE?Uo
z*4P<{^cZ0W##Js`{A;2{hh71c!Tv|wTnHbPsbG66WO%&x-1*eNOkd`yJDiW~mDLp$
z+^yGdlNAF56vvPiRnc^Th9R=p&_5z5LEyHEPAdeeDNR+<P_>HI6ZXK~RL4<%LRbT=
zA|f_T_t9ipQq{zghUaMLbA4DcR#r7_Jl0nZUOcrNZ=Yvi2-+d09XH1J!1Gi1f3~^D
zNWQmmo3e2g_h^eFOFpv`5vKuZBhRoE4hh{+4XD<?+Jg;jT^ZD9G<*w2WMoag7{7Ti
zEPZ$C40^Ls!=0dyJRKV99-lnm{lk@%k%vtdpF4;YkBUlA<(qaSAw`A03ek#1e+m!H
zP@%x^Z{TlO(_yub745HvH&5coPA{S?|9YBmGofJrMHm*)hg(g6_96@|v1mWCv7;!n
zq6b>?UqR!GLplN=tGs7lI+LL~X^{Zqym)cQhrV?BLQrF63Pto(jt&l<l6+4t&pnL2
zf59y!Lld7EWKzd+Hv?VD-_k#mddr{W*{wP<Rh(u_=3}{fvI?}mdIN=F;ifq7lIl6E
zD~APuRSY?c>#@ZJ4u=IJ)nWqmL4ygx!5G5NN(P+M&U|H$rlnxk0O|k%H1Gth!%-46
z|E=})HT7dOtbw=_;2XhfwirB<8FiFS(Kx?o9Tz$MtnP90wsS$bX|%LD&c@D`39H;4
z{My>&%&?&(J?AZ%|0z;S>MMAa0DU;{d;_5H>HxJERx~>-Vpgy&2g4E`3Y45-$O37a
zmOnO4`2E!d-gHJbJQSYa4Q{SS#$W$xU*46BzkaFi4W@HvxM%3^;q~)f<N1O0+#vYB
zRz4R?@chD)FRJ`vAfIdeJe1EHpOKfJBLKt=KVn%x5hHA}!`Wie2tp<iXKPatr_5dy
zHtuiasTiJq85SN`!_qsJR|*E`J$gMD(4LCkjqRZW{D_Gd(X-6JRfCg=m1xOQrz&z|
zM8|wuv9gBUJNFiEua(5!iW2&9QZ#Z0*J{b!WWt#U;3N6ThwBEFY~0G(*TBuJIZW<v
zF_zf!Opw6?-+ur8+w9w4|N7k{M)<wDXK0)d`}#EeP|^6I7Uc(VvJsyKzypSy5bN#}
z&j;#sV(%^=-#J6|PG)~jbX=2?q7eB2noad}rBcixXL8WOG^ww6TL@);G4wTxX2?x0
zIek><2$33GS<yq*Q)?Ji^ecdYbqcTd5xq7)%f$#^@mC~bDBvU${@_4-u6#x!2y&~p
z)!=O{cw75SUe3C|?eBBjyC=bq8wsC0hDYWQw9&x~5qn{bs+8{?{;0`6HpGqW0B>AT
zMpaleM>YIsg96@(A_IhOI>Y!oTDHX7p(`U<F{i<M(N+0ZP5rg;S=KV*!&<r9EXgLt
zdaSjE2OYA77e?i-a#A!74<#_?st}YtIH>;PANu1)5hoy?RpYFiPn;_U6mu#Oc4<-_
z9NCy!HPU3WLQTHjqxoF_b$j>qXRRik9gCj>HPNW%$w`Bf0aN{2jbeoy{kl=zt<`i0
zWO@?O^db-{m5q}V385+>)T+DH5Cr58j&G0v0e((Sm~0|m$<eRX>TX2`;M=oUCqO#v
zB^?8zP{U3DfEWRV6MO-Q@~T=ySC61~;)Oz1JY(ChsvN18_7$*J4Q>kBh)6fDZ%PP~
zgJ^Y^c{pf#&$#q#>&sraTy2-xd{McLU7iUU<;3c_8-D*?^QDul{OT2HZ6X$Dz?O&P
z!Y@nhSGS5e=Qu99{*qICskxaX4-#gpNN`;f4kGi3&wG8&#v?Yw3E@E^1rG^c@l_3h
zu$U!P9kujn4SMU@HJJ$oi(Co8>l|QY&i(2pN99z?v)~$P{#WO7B*X>|FQ8r*9J3<_
zmf5*dzrnFZLD8=A*p!SN<Cmk7QKR`?l6fv@ruh-54FR>f6@W#kjRdIGEdsll18h@(
zt!;+EHWR?sHVN#O01J9X8rW(j8QW@QNo<WmtG>3iS&V>xodW*#4ehu=aeNH;nxyqc
zEd;)nL~DSD;CB`iWuuY|cB7Jjb$+m0DYV|opmicx40nJwB6}EVU@MuSx3*FldUGlG
z+bJE`Ud$}{8rVrD{MJtTcrMjIwJwDJo2@X3Xi(`$iW;fOM&1B=K!v|-rO%`ko~(&=
z<8`%`*~W%!0}5(AF$&L$vEB^l)@UucOEtMpRV%t^$Z8NvW^q?mYp+FXwaVsBzSgkh
zLtA?zTHCBF7%<fumV97q8*c=2RBJo=xC06<&x0G12di&tIh%P?OXtGWW{w`(o7z&%
zG(<DC*Ey#Y6np^Djzu#YJDd5Mfr1YpT2nN$X!6#QCvVPXHY=i;O1P1U%P|H#I?9%f
z7=`mK12v!`Zd@d*!`n@M8-ZPYEEi{JA)B?wj&>ICjmHu{3;3oa%OWvZi;2lR;9pC^
zERu`0m|V;Qe(T|aaC5O{vbin5?<|r@3jB@~N;wpt%^f*;E896o7E2QNYApwR5Zti$
zAJy;@v`#0w{rfGxlQ^K1iCmHpqvGEMW_gHOP0<L>H+hoQ$jC>HMR2VPQg2l^b1v6d
zGCwVj9zK-S92<tshEz9dwVj+A02R&OgJapk)*@jaw6G<`eXc%8DZg+GYOyh><(N*}
zJCBz&S>PY6r!21%ZPy<v@E_3D(L?3`1KMgle25;<+VR6zhaB>CyZP8bLI#PdE#>6>
z&GyMdM=8hL->Ez)3q*idA3JX{R_LADW5*y1_{L+&k_CM8@lzoS{OiZ_eirzxC)L;^
zz_*`RN<_f#JbW{~ncpI@^X7?_$CPI357#`FX-29C3zf-@bY(Iu?^Hu8>1KTyRvFKj
z;+E+bFNjYL$nuWG1DpA%YOD$d!lzp3X(M>bWA#tB_tPK7*t(<KLwNe>tGLUdw}%IU
zpqWR9&;$U7&g`vdN7z{>5n7D}J+a2aA9-$(a7MAiXB4l+83kDw<)Idx#ckrr6VGvZ
zoK@RIi0vxJ@^B`=KTn6rmc*nM!KAts!(^|z^_jOR1LfeO@F6xU$m4o(%1FnId_=f|
z;85UcK#vLN&SyT^2?K?q05>I1_zFPzI}D~H?w)&EjKqQFd?W)+%P=Et80U74eL`ST
z(&VJ_z6^)yd_`ydaBANu-z?&pgk$>{BL%1PZ|%t(T52H&|B9se2Aa+5Eqm+r=5~p;
zlw0`p2LIj~Y`xyz+_JxiLDc7XxqIde>ZivN#MaLC=JxB&Dph-fm9}0JxXsPsCTvoo
zAWn}R|I(fTeN6@84Hw300O0jjZF>X%u2r{dh{tyt#OGSoXU|tSO`tPGe6IM;Fpxm_
zAfz1J2_B5ejexmDRQ;&K0|C$pSj><pN-&hd!f8*7o155xPF{+cn_NnxU<w*2Z<6I1
zkc>k`kqZ<DzC<rw>J<E4KW{yxV>}r0?g~YEb$q%r&S8BRe}5OlCK=GP#Ew0G->Mt(
z|L4W*vW<Jsxoh!FhJ|JE_RHLR2YvPzl@){BKLs^wQQ5v{P5iMvvw+JMmF+sdE&pGP
z1Fq^YPMuz3QQ4l;Gvxn^%3ho{4Eg_}vK{*lNO6pTbSKaC3+s_Xa*S>rNs=BGpTFrc
zQwrUPu+#*kCK7R)5+Am!iW3XLYB{Q6KEe`-#nA?05fz`IDImO&M%BHCqFD>sq8$+y
zV`Pa`f>V&}=#`kzfxbI!qZ>zZH?Rw$D;xMbZ&wP!i~g=y(DabZC@Tpc$v=NZEPoV;
zoG)fHPtUra%y%n)`8WTet3PY}@7A`x^Ja6fH5l$(trKC^eQ&Uiw?fwEGiS7Z3X{Ej
zvveD%0Byg1{TJam68XKkS=p-orMgjhy;0e$RyS*ZsZ_VOH(&q7sQeiOm}68%!+12D
zXy^Wy{K&w8R}KfLnTjMJH2k2!!Af?0GrP6%U!(ErmAWASA1*!1yGJ>SmT#Q$7W(X8
z+wi^&uU9CZ{1jFytXpUlVJ}<RS*um5uZ_2sJu=?T?a9QxwB|!&{~BJDuhomcITO#Z
z%2(cDB9yH)1Z5oA6W5s;|78u_OJo1P@mKkh-j%C=yB=H4s624ThXNPsXV0B`Fzp+-
z#GJU^m}3=DDnJ=25~RggiM><z4`(!@fQRRNS*Gm_Fx0y7wQSVNl`=(uF*q2nsB+$h
zO()+J8iF3+IhXU9Jwy<~3tP~;WPFk_FE1&}g$q*Cni)5<*>rb(eaualUGI7wV6H=J
z>*bY5h|1GuzYbz5a{rBA(nWy4Gre(fTxY*qvk0FigV7v_us@){KZn|zu{FEl$QIUQ
z=9g~{gL`{npWjxaad=SOs*p<?x#f=LKK^eUqKTtgt!%XQmOx_THLV19Tw*u$#g+GE
z!VzSSo1I?MY;}&CA598iMbYjlGWs5^X0(R!Z8eNL#4*#!fz)CLbrrf1RJ<~k9)d8#
zL?nF|J<=xE9I&!v{5CLtljNkpReIzh|9*Vwj<8HNg4{&Xg{^Z9+%fIFLC1h*do$Ow
zuDMTGdp$QTBpH&Oo8ytiexte0vV28_n535jzDnjBFdDG!8yKkG0lL?oH6CLWC%P5{
zN&(*kz!;Rs<+!DDu0%X7&mPfXHq0g()v}%(NQCypSg*G(28Svc_Xy5@=~46}PXx?7
za$UM%#PO5>gRijFp<8q~Cg@RbTc8C1;RyPzjd?xs&Vn&?Coj<{cS7^o8V(H%6FCKr
zvnPXld`GZF^eCK^XA{Dj?o<E-pB!;vE(3SdwZ{=oP3zK~&w!JV2*LF>OvdbvR7z#Q
z1_Pslf&ni{?-$TS0v*v0V--pR5Cni1B+Cz)SD_ns3ZRHTy%+vGSKlw>cUEXquI(iP
z5m)~-2t12rj9E|IkTbra+x>hDzv<c>T2l?!#KKls6eXulSvFF%B8mq07|A)bA+I}Y
zi7Q~H>yJ&0Q!Sg&u15oKb*+zrv*9g1BPIS{PLTW5g_0=W<OtcS8GO!eMr-3j1>lBF
z2qm9MrAb!wZc%x^U+N#mAyi{jO#tE96!}|Sh|~!ebY~d|`~4S($x!luk+<d^iiW6<
zi3y(o1}%t-dSj?tw-Gop3}pru*>%N@<^JJQ0QvQE9L%qulL3_i7jN9K;f5?5HkL?&
zPw#UNK8pH>4jR-A$Yp3?R28jYUuK?JwuL_1^?<{FSPKYSTz&YVeOUd0eOP^P9~Q|w
z%L8>9G!)QN1pJ89BOx7?oyidPSqnA-TqCe>dCLik)7Kq71*nk=`bGfnF`#f64f^*e
zP<Vsmi9l(0LLS?}&;^sjr@)HxIv+j}Rt)S3u)-~r@l>>8U{8P*a_W<fAOp(+wMaGy
zui8;6k1muA5<+z1JfMpTRAk~j22^C?JO)%`;yeLXa^gG*Ru7(qQLN;|c@nH1JPV^(
z$%*qMSml|8nu?b&cVNdHQ&xp8m85NEL5OPqFA^2z<oPR6Sg;eytS+^ihwg_|M@)BO
z1`%Mw^Ea7Bixi|3(8(H~R5bJHUa6p0(?PF37PKUE5@@E(;~EIT%GFHL(4KhiShpXz
zYH!ah^VRk!#FdFr12G1Y=X?bKyFqjxYjQ6NqG2b9@{`Dp;lc$OwuWOiz;qC|-E&nI
zrOBN@;DUy;Z{+WQC-?f7iEGF&HTn|d?!rnl+n>p#UG59=swnOSMX)v%38PRlFu*Fn
z3L-;^3P@IC?3-8cdrwAFFwIt{-)xgXtJQ9H@tAb#S=VFBxV1dW)HI#@Hwmu`Xf>yR
z`MXA~_WF&X;>N}7N+S3UVJi?Q8I@x3;6QwV?iP_~OCaS=Wpl~$Aukir5oZM71UB1G
z+6!SDVmnk?G4lK*?x<iO1r+$jIDF@%CE?ts_0h<^_0gIMFaWW2IRGLZ7i^7&WM{Po
z1D6wTp>0)$wx`@rfqASP$%dV7zdJy;A2s>8ZPU40AeL3ReaiWR(aI8SGMT3zT7)Fi
zJZbmM=6SExKI_m|Vf~MMFtjJF%<)lME2BLn9CHlqub3I@TXqM5XVCa=j-0VG^H&mc
zRtB@LZ`f1cfrY;iiGHjXDbP7N77&>QS^a(S!g&79xxT?%w`9mySv&Z+1ie`Rzvsmm
zIA@XquI=6~3(c8e>bBW^GQgJjx)jLY=Z-xyJ60zdruw`XXrk^F9wA{nSTs;nDrvAH
ze8yc_2q+j*0R}m|+ozb4owM#~y=|W41|^46fI@suTb-QML*o}lrqwygjYW9O!f-nE
z4#UYa9Kv3*Xgs)B@_>XU^+Nmwq}Yfc6c~>D^&`VT^>Yk^sGmI!d7&&AhP;D72fYhM
z09BN9;kBS*jM}iY=b|=Oj*yxvw{rI`v<+O;&5-^4LH!Luh;{rTZ_guOA|$Bf?L2KV
z7j>E}mk;VJ)r@r#X5!miofOq0o#W~c7~ou;4V8Z65GQy4B6t6zfF3i-M*g9G<annH
z`f{V5GW3rc`-d>VQ*l8WD?E@FvKa!sCsS?#faYH}F41v=&jg3A+M-EcYI1<0{G^<u
z4xXSSHjZJDRV;L|H()T)e2DSH>{<!2Vb$L7jyPT^UQjiEkPIc14l)}Hicy<X=%2At
zv3(X1vVr@*iGxIujuWnNEwUDfWMgDya?!7BKC!%ebiBQE=r2l|CdkNH=#={E8<(z#
z@hP(FQQ+>ejowX;KQ;>Ny%My;)~vBTzQnM+7P{eJidLTh&!@1v!}1gRjv<jxkVp^B
zpe$v(7)sOyG?gN6f<A5R80X8jeD)V6UznkmF;8|I|GsiObO8GQ+56J=wv8m=`TFWt
zAkMBs$`B<gp3JPSEJe{4Cz2?Ol#i@8d>|5%U{i!bkdhV0``h0-`UD!_A=#73*iT}E
zKzDU_b#--hbseLN3DHlK2yzYk74KQ|^<l$jLm`zD8q=?#t=1S$3THvzy9|2ob&5DK
zVf(dqx82$6o;2EMO4WKc<rb5$1U>p_goaiH$tXsZvF|Udo30wh*_;YlH=al@!YF69
zKG7jpyuJ*QE}_~|2pJ#IUBR4nOFd+i8;`1@oGkJ^Ymgj@5RAB~2Ui<ibWmTRtOQ!;
zQVSz{S5~SLqU)|2Og9M!Xo2}LgEAcx*+8CX%0k~feVo#bEv}^oWQ-z(#`2D%%UD|x
z#+UA(p1*>=DUBnSByc4Gt$cs1>NQ~lPAN-G@?J7hpt#7@W0mAHW`8B2^!1iaZIX?1
zQGnutZ_eDVLH<g!Hoe_6K06s?W_x2aN((oo&NCVnYFAH243bI3iA~0+x<g(Z4cU`Z
z<X^=c1VW*4(d(^zU-^EC<Km(|Qb6u-I9Ef7Dw{MTq2s&dof2lI__S%ov{xvYjY^GH
zGmLL$B3sM8s?Nr8cn7^N&ske|dANE$t#dXC*L-8hMNDbwvoNL4Hvp4qEiJ;d)_e)K
z{CVcb&C<+f|NWSo1L=QmHa9<we@2#<eziUu7bMR1M`MGI(M;`d0oFA}{uoU1OAPM;
zS*UziFMqK~p2l8{F8w?6+3d{xxf$-g7Do~9O}Vb1ms6L_DL6Peou3VV75Yv_74rMK
zFl0yPL<|n&-Cl;|>JJlnLW&AP0%Z&fKSascZLwsYymd8FhB?aDcjWA<4x7l;<*Ysk
z5+U-K4irY|q>+5X3sDp^D#2W0NF4D|cCsvu4xD-omEw!_JKgK?s8r4|5Hl7GIlbZd
zw&}^j<Zv<=AlXQ*--7t8t97bsXww6k*H8C)tG-hzuk6T1OXX6zytAfOs$QcHf^KUm
zb;LTF1^El$laF`*Y;39?(op{#m=zM`9|^Vo9~RHH%~Es7hQT2Aj>!;dGNwpG<5KeL
zXk2<5cy9n4Ib7p2nlh2OY{<%U?=%T608b(<B`2li5n=tU>e~AEJ17i{brMF~tO=#<
zH->c<{%#J>Un7=MuiKOB*A%wt&GvS+?EUCfy$$cp>U!BIM^;jMxMO2Mtg(X7%3Ir2
zs6ghs8{)$<!Q0w~5ps@E{Qabg$wm3W+rh!ZKT~bTVE@ASH$QHeX>ou5-Fr7l=wc$d
zM6cItFMv6pTt|IMI`*{ky)>5-H#rJ#OCJq!K(u}s^=b4toF~}B^6I)Acqe!t!h86P
zrIx8EbY7rH;qfTE<x)7mq7LGb@V!BAv1-{U4-I9@tAbDod%}U<kfgOyqat0L`)z|H
z-uT=NpP>utNbhY-7K$2tMF2Z#Tme8)1V8&U$7rf7x{~TgX6Aonvf5xI2y^DtBB{%D
zK{1Ia(>5%XD-h+bK%k0%^69?iHukRLBtlPgxk$VkUF7Yu+22^2`mLa;Uj=WSRAOzs
zcNqp)H5gpP<jg@Ffi8#!1Jx4wRM#r<)@pn?Nxa*5^gi)8niGbUsqJj<RvhcB#%SE-
ze}38qLC;<<wdJ46|G8V<-d@|Wgf<#;m&e?dJuxjajY)Qv-KzP!{!dS;)@&E`ZHD$4
zrcd*o?F$UW&G9e=-{^|M7fNZ9;}gVvY)?$2xXWhGa=H~mNghOYN);&OdvW(md>zV|
z_@3gVoKPa;u4om`I)SuN6r<Q<vr1vCFS1thogLa1p{xwE9*p7%bV&RoFD&T-p060=
zbA2jA0NMVDgv~Q5@&H8Uu$dZ5e7(gmcXAYb*p19BpbH;Pqu!M2#P<pcRkhu&A<Fal
z1$sf(^+l16^gE=74`X7+MHaD6**ts8EmE5%x4|`>V;={;2Y;WTH9^N4Um?dm+w{&b
zpxh>~g>8I?<^=`!n%qXabAmPv&7*_nQ6skv4nu(N1OKedfOTM!oRRTWwN_@a4erK<
zZS*1UUb!@9X3rE5#u2L&FAkYxKe~y~HeTfAL|Vq`lj{lvoT25HBmjrPVVs4A6vAtd
z>IFO7YaA@`Y-i1hrKJuw)$N7RKpO57V$}1Qm2<EZJULqdF<=#DBPS>plf`@?u_&m_
zU%vkNw~haG6b}!N57y@m0Y(8(pFYir|FyPOUH_{c|7#tD@Aa?ozrMsDD~KviZnt%G
z@CFwDXcUit&td2$vI2^SEBKj1E=R+34N3<rS>)Gc_2*pwCn4(<+H@9t#YezoDm$36
z{;N;_nu`Co{{7P*{`PhKfARJIs&;bJJbK>l?ls%B-Gj#7(zlag(dSe!y5q*lVeP1K
z)H!$~%YbBedfePob#>h4?qR#$eMvre885sNy5V^u?y45;T>g7*558<gID8K;x?QUV
zy#vWAxERI>YP8%{pC6spyIoYZ$mgByA1px6>vb0xmKSNn2Dqb_bpXNfXYmC49Ue(|
zVR43b4G2UsR8u8BT4zRZbfXxp7>P{UQ}TDZmJtwH&=mH$eNP<tD2|adu2d?<h-3Uk
z!m2Z>a1SO)I3h+bqU3TI<J%~q&v)O!sUDgbkD?0@WTB0-J8yW?>s3T)i4`13R~rcT
zLkzP!8IHoBcS%OS1;x>kB?<=7Z`8H!Fdlbjs<-~bU#nBTe{al<{jU~wv!wo&{|{E%
zq_M4$H{BdUU@`w+f4cV9rz!qlefq=K_^*G?_^<a3{rWZPE2&Froo2mTYquLGolN-G
z^w*`;Wx&7{#p*;g?2u>oyF^~(uzxIW0m@roW67hSs@{qg_AR)jdh2Mf*=e?p{KNM1
zGH)`$D7Zo`rKdi6S9p_vnh)Ds9M!jBFr+3tFZ_Vv!#T<T>OlY^s!B+sh@te!Wle?m
zP^uhpgQU8@#VS?Ra|<g+Eqees@zRS0I)CMru;nDU2}MtNQ^ekuMK_}%TS#CGQ=dbn
zrx6H~EIj;K=VKdFj3z@GJ(u4QU}5|?A7UIDvE-@@sK5a9a!5i`I=FMq2~!n@Gd&AA
z>Mit`Tt={K36N{ZkXF+Kl}3(~TEr=@_PSd?K1OM6XS-UZOQ&uZFEB+I)>7{SW<)S%
zgAzEDv`S8eFfFqj<qB!ab&NQuhwYbj{Enee%VlM%0{t2WbXj3FiMVGms_z@Q9ilOd
zYWYi~^qNW7f(9?Y#kX{sNxDH{CF9j!zg~OIopA&(wqKi(crEiG&Ui0_B1jV=*086j
z+WYuvGgZ5=yj%sIr>@|v`#8^g(>m>TPMVFqZoPJJ0CG;Z26J2Qv`$dVx8K@>BR#x2
ztU;mIzE$n8`5LNm;dZz63<RWS&l)En^LN|LpBrsgX@XO46c_L8H`}sxctV{*_Zf7^
zXsXt2A2;gFXE1SCd3s&5;khffcK-oQ7uRi_q7zZ~q;~YY(XG{?sWbv=d+?!qeA0S;
zQae0sk|QDz-%kb*rbs;4oE+5-x{Z?)X#M!0QENB4&)}2;^><I7cTXC}t&>h}H9iv6
zkZ%YO@ZYRmz;Z|mFj+Nd6DJJ^YEi@9o1@yHz<?l)u9}}5P<9;ynAt`ys8bkM^Z5}J
zJZRMb+!54(0*!794w^L5cK7tC3A<$D5Gbm_un#&-0F;_vyu2XP!1Gb7Llcasd)_$d
z9-SUGfJ`WuwlRIrpwSLu>$rvf)wPr7r`SsO7@5XiS5<#iYj+#3fj*!Ey0!_ldwAOJ
zbaxvtg|!1+q~2;g1CnYY&{rqTj-;S&2bgi=;8|_&AJj7Tw|#nyvv0P5Rj_+d8W0E+
zczRF+x<NPnSIy2oVx|Mjt@gZy<RQlGfRji6dWv+}=Fv--l07+RKvR3C^-lJ?U?$=p
zQ4CP_j41L|jV24o19}AC2@%j~pm$l62p6$-3NJhR4aD#P;Hsl$vOTR=+2$5%$HmYz
z6SHWiOW=+gPy!}JLaf&SyrlhSqLaJ0xS<ezw1LPT9K4x6NA=dh=^^gV|1h}S^lB%q
zSGjdk$m}#=dDVb6fv{=-s&SHAmd8|7ezyg5shBe1eZTgS(bR_0w7hG;N?Rv;jg#r)
zP}H5?sSu#%D}d1xSXa3GJNq@Bbg_!}S`A(psvmMG(`-!df`;AZWw+Nnf%-Cmj9Ag_
z)A~LL8_=iYE;097^T>gF>{#spxWwKYx#M6*u<16G-E*u&3uDwHDlS%@?wyUB-IH4V
zr$$FGQsjk>9am8>OeN6zexr@0RE?u{2Rf?Z9y3TW;*-XUQ@{i<Gs122=m<K<!w|d$
zK2@h<SZ%!>*k;?$FtSZ*{qvS{gwqJ?6FBur?M<7u3pa{k2nYsw0&bv;NE<q=tRs+i
z)0k_vO=1!}wbnfV5T^}bI30Y4^GDrJO)P9&os&0$Wg~8>bxh-cD`!t&4ma*2{FJ6z
zMXG{_)7$CjtA41OL^{0!PZT^H)P8;=8D)FEl>mHhx79kBUP;UqO`UtLc%9aWX9=p_
zJ?#)TC*CLCxp!d;f?a`u7*O31kp6Y94YiYlM97j!noFd37-It1LL10lusj%OeczBF
zNzR0If6%Jegb)MbS^XzORHxPI?lzwr>;6#ePntkUUI3A+M$1@Fq1@waQk300sJ%hy
zRZ&%{S91xaUA2npEf^rJhSc1f!j2+z5JGV|w2qYMlSfd9omW^E?L%OoX<1t^`)4PO
z24dzPr_XT@0)cmrTBHsf9>dbcD+v~lj)4uJkOT+>*zkPV0?uS3i@Ht|pD#a2gu+nv
zoD(byFd4Z@)c0El(7J@)hFLkqj>7osO`2l5l;y(0Swfx(3(bIX3J41iahz^r_7+sN
zqrN~|^|A5;6?=3D$8djSmGj8Rf*}ED?Tt`>(51p7c_EWjJl4XcfTMH7@Vl{u5?o)S
z0ufq5`!EkLo<?}F;(M2ri~=<&%4*RDl~nkMZ)Z`#j>`nUMW_FiBI~W1mXCxdO0JTp
zaKdA}27sp00AuqLH~X#1J4pjXi)GZpq4p>dcTWTwqgEtd3KZV>DAi3rgyI}~<30ld
z{FJ7T$+XL+*w>i>Qg{<FM0LldZcq#ziPD=CXqjdoh@~YnQT6z-Cv`RHi=1qeYzdfB
z+noo|fXu|sbtICvfk*k3B39}K-c3jWZt!EF##L-6xMoWTfv3PVK>WI~(M(<v<l4B-
zD$-7L75c`!*?;EJ-jo*}?|Jkg?606Pe&JyCF>$7`qJOG2&TqB_Mef}JgNk~^<Gz=Y
zVsTvbxo^|4VYa^*KDHOb$0C}R^24kJc>{8R$x^+kC=#S<Y19ZAzK4$sEf(Yex%ea%
zMlKfT*E{}m&?Y9&g*Fgk+r9_lZF?H>PU71ng&nM{(N~WL=>iCMe)JWnUT4JLZ7i{6
z$QNwYF7wXeJxxHNl)VyV&eA*!V`hm}OSml;BA1j8k}xG=*L?iDnvYY3loGj_U=;@e
zmeVU*RQvsRPgbigRxPKsLa1e*!OWt|hl#S02J+}$6AvkjFiDb5gdtiCsv#6*We-d#
z7oT@wg1Z6vNnl62ct^HH5ZUbl{W_3F7lKjMFv=Q-C6l1d`O7Skyot!p;i6S@S}laE
z#X3w8XbvE=gl8YXqE&NREreWFkkn8z^ky&8qt=mBx*J=odue`iG`Z@qD+U(b!F@v*
z?fY}y2$ZLbj1k>EUQ9qlhc?*g%5<30e)Y)Ju7dF>`jF*nmw5Gz2^d52g+aPbrgN>r
zMOehO=33Gju2FEOH)R@06sdlwT65^`YTmAeUZPbbz=H3BVCiu1w!javl(X@r+HiiC
zZTN*P%&3{0f;QGMmi9u-N#8HVTZTtQNq0b=xLcyW%m*+-&*xBhP8k9z`vBT(gK&<5
zQe5ivoaqw6`$`Y;_4VkdXxPWN5ToD@e#wC^IA;!h<ur1~Wd(yK{g51a)%V2H`sYSM
z)x^Tv*^)_Ql1fZzV_ml~XA?6{$8-&U*@(qK+!pPPFx?9IpQB-$aByV<BPtS+0n{DY
zXJLw8ZxjYe%!arl%m_{PW39aw8*lXpcrTs|`e<JxOif5fX835HO)f5QTR=0M6k+JQ
z`u#@zCqq|?rfr2(P%rPW1O!i8+xwG?unvP;mJj~@)wM3&N!pI=tGC3zx&1G!@%?ac
z9ggPn04ucrt*)*Au>M2Z|LbpG{Xf3i|2hHuu=3RinAtN375j|?yv0fz0GnU1ZHnd*
zn11(XVJ|@ANOW9D!tjccKVh$73mgpYU^d^w4tI4uz7tl4iE`rKP%%J6KyWy^V8Cs1
z47|ly-Z<{wcqJG_0sCGnyG%Mr#fA~<Z3yQaAgDXT6c>ue*KlkSz8^By7&f_-y5d1^
z9NZSa;dkVmQ|pwT`rnr`R|}29<IWp2K)^duRC$zM+b2lV&)wyf{+hzN&sl{%v|m5=
z=*~pm2u<&qBYBiEF?E}lX8pz~lFwd<@`3%XJ?iZ&Wzz;kLtMnVnzUr@M)*byMFM#2
zMg0$DGAb1uK*a4))j?)4UqPL+JQa@;4BjIIDW3LNPP!n0;5N9!a4dZ3BL^yPf1qs)
zgQ8FQs#TCEFFB)CsVwqG!51M1qHk1m!D*g?5k=kSL{HjxUID^3%H%$eRC_&&ui|mo
z72zJBGCB7Ty-V5<m9;fTRYI~o3xRFIgrOa~KI)_J8Hk_}J}t8i;jUy9S{4N;)WA6L
zMlmyIv{;uxK)lQNRx6r2k^^x??#8fhU|9^HFZ9huSpv|2*4v`4sh8Hh;*bXLp_#N*
z!Ys8LXw!db@Gk@Wqjwl3B>=_(pMzw>u;8}?--+-WyA&0xbuFe=cg^qk@%4^6G<Z6p
z0W^lQ5RdTE7^l}558|OCd(R^u#21(dD4|PMvd&L<8djwGS<fTt?#bF5n3T>G1{I}8
zAB@jHiILC1#ILvkkh8$2kdBqtKy*O)+W7`~>*2uQ4<bMxEPA6dcAKCfGmUx?@>V0$
zAX3g0U~rg{YN{yVz=hahX3l{~(FI^+_zImE($R4P`}1|usCfSlHHQ>{i2B91?!F`z
zUJB;Sh}o3KLkh!rjd3q;Qp7-ki=rV=V8lUn=w{~zBo1O^rS`MtQLRy~I2f`)$!3%t
z@&zzRNQDr}G970W#kTJVDVeRi)rR$>gAS!qCdxoPCn4<436ndYc$45fG_Z>}KNc9|
z!0rNLLu=Y;8QsxUjnZk(juZ80wX`I{q}4UibYe-s+n%K)F$im`<C|cV?n{CL)Z-A%
z?WALBj8O|{D3nvMRIviocPB%|IvCCEF$&rvMz#zfQ@lj)IrEhLaL3!2(mkdU303!~
ztvj;A9w{)2SLobCahy=A-Mjo|N*@hD#KPiin4_*)1VITvvm|Ynv8nqg=8nRmn^2xy
zly_Ig@rvXkQIbr^-x>y$+CyZJDhs+r-@cG<+4*QZy2G_1#$<E1*_p$=5N2(i7UqZ=
zeB~*$F&89*I+r9VoDPXkr0^*nv669oU0IAc{QNb)Fx|L-$ps<@)R8+wh;mTwJ~ht>
z=jm_ToFdsNiXcc~E@4ZH#vGWX%Zawo8cszLh5Zua3dj>0tWW6xuTo)ER2D8509sE*
z{%bF%oGZ7Mvs~E~(oTk<yTqdmis?Ei7Kv#nqbHP~?JOk0^bA;$J6c`S3^3kY&!C#%
zQ&jIIXErCpu2IEIZC1-$@^QUf_5m*ZpH1784o#YhvY=(-mr@(8jC)wKOcMbQ@}NFK
zW5|(=ju!we-UOpaoR5Ps#ZW}$0iK?EJ&?~qm|<a<b~ZTqknkUlU-D0my4cbJn%yiO
zQH-<xWE4_;35$BgQ<W5sEFHShT-z9af(q@3cIAk=(Ro)`YB13mkvQ|t$Bw$?IgaIU
zs3G~&)rS(`8e`3IJ-YZgMf-8`H)-siJ}=#rH*HhI8@vnblxTEik%`#V6>){^>QP4_
zTjk}G0Q2GHht87|-%*L^+_AUwCITcTNuoO<q-)2!>QWy!b9&>#B@V|z74bANr0hiB
zcisqJu$PIRmc)yX^B_>q1hX?WA9DEjnGV^-j}jUPoeFA}k&qV7F;)^@@RTd&h;$bB
zTzkYH@3R02z(cJ<);N7#iUJeHdK5v#7hjZ&+Ap{=)m+ZIJUec*{u7sHQD#Pwnll{{
zcS(BT<3%mNtBRo3$3gFXIIhv|4X;_)qc6?mB)krTF>Uwa%aT`^L_x|r1XJ{2O4-TK
zD(zGWm~SqF!TDx-Ok`;ZW8#hn10nU{D@TuIej-!Qr#{HTy%`F<4e~U=!SdVN()tVy
z_wdbsrh31n;q0(QAejuJiJCB%Fd8z_))fH3@k&jB5+=`qmb+&ll6FN+*8ZA<^a@8g
z4bm3XG&`F;+^UbRTv89)Iv<JL0&^jZxo|u+FIGqAw#psCeL87(v7=)zr+Q{bM3LqZ
z7^9%O`=--qo6&nIG&mX^?cR-1p+LXo0^xTlia$if1pVCRH>HvW@Sk>FvSv*K=#1|6
z0iy;0(b?va*~_gIC{N*3q*0K;(z<Fv3$&SGz9oNLb1w5KBe(iHbMsp!)HFm=1u2kt
zCq?*lb=j`EK>r`6$IS9Y*nd<wjP6!&MJ$OsHK}i6>h#8O%m-qhG)o*gdPsbEMJ$2L
zNbk|V)pd2v^2x|ej5ku}${1ELeALj3h=V?=)i53*-L{ePy`t{mVaURh5Yq;z@Y!c~
z-f!Uub@|>SV=c_M4jz4$xho=ec{y9ms=R3}c^?;;G6cOWv=W`NU&xQ_87!0sa@z&j
zQyv9f{nhOo4d*x5m0B~zQWq_E#UmQnrqZT#Ti4{>iA#1ej%gmf--~Q{y%K@k-liB;
z1oIotxB{;agLm<$p!NpT#u~CVLK3y%_4L|O<a}1c+~U%b+V%!Nrnp|^^gs`T57-(E
zlu?rXj2*+z`0tWDE?fog!!BQacG;?2E4WfYpPC8OWX~xHCN)l9m4p|%qywL_q@ErV
z`j3D{`Q>+oxw7}z`@ft?*s$8P>&q%sKW7>?6s5THxk#ztkWjRxNKlX;lb#%nuV%wn
zh~FucPE3l{1kFJ4Ndd+m3byz`;DshuE<rJs=Y!xPvF6en#kcSkFwi}VP|6mdA4@D5
zguV*}Hke`A*igwhY%82`G#Ic80<i7NVAzMl#~x+=<1-4v6HW|<rNSV*2?u5kw2!LC
z;rV&88zeGN8G3iiJ3=Ja;360cGmmI^9*?d>s695q3X!#-&iK2;BOFvDkp1`;+5=QK
zy%L&{VbsBBC=7nnofW1J6b=s>KqaN@KQ}-z@y5YLSjlGnNdx=cJ0OO~&qp9o?%th*
z=ls*26_+@XW^_863OX;Pyq2;lVI)^GjzMAtD=&v1rG5Zc1M!k&E|brSUte=_M5`}u
zN2+X>TpiyM0<;9|@XYWtMKX>Rc3!2`We>U?4Nd+AL*zKgeC^f)F@F?a(X{dfJ<M~q
z5=kGo!5s=7^i?|IjQRArjK*td0j>ZALdP%)WjoPOn6f0~z8MG*98s}2gASZ$M?h<2
zBfo8=xYTD3ql7-ur-!jQr5Rjxb>t>|b%qr84A)RTMA-3R&2APe7SfeaD8OntXTzLE
zwv3x)W8lg?!Fb1=PM^T$?qd{O;EhiEGPus`YiDEQ7^}8~CFt~X=u8NO)_K2y(fM4%
zYF}(dPD0=fB_nJYMY0R4sB~tHWlI|ZmRF+gJ~np}y|2LHQ+s-0qhhC<y_aG)v?>_o
zcOG(Tq&tvnX6W13iCJi3fL#;!+{k1Z19pjDO63X~Uo+$Eu|-y7-hHN6F7jnGt0cod
z6@?~1d&@ycGSl_l=VQOv<0<DREHW=QR4OK&(Q8)5WEJh;+)QO7Y+gDC!4_9=(5$0^
zMYd_aJ`)8O_rguYQ|n~u*s3yx$7CjbhV5nQ<O_3?b8Ar;Dik5D!e&k@1g-Y7f=S-W
znD+o~ox6SOaOzuUQKd#9R1}}Ax<yAT-*YS-9^O5mJ}#Wd&w>NqHXaxSivu+=fNVL5
zuA%|Q*UFG2Tf=E<YV!m}3wC;f$oq)5F)_Gq0kd7oEB|@qP&NJnQPh1jf%DK{ay8`R
zAn-GaN&-iU>~RD!ZbS^=tSwx7<%*RYZlo7TH)iAJkxl59W)xF|s%b_sMXRo67csOx
zYr2&2Y4d(Ub4s6Pj4}JRB>KI*U48sm^e3f1H1?=$eYpx<A4qYRD>P;2QYjVYry{7j
zFbSz<ztYcQ^dmH@L_dZG`Sa3`X*AR!!CMLjC=8HVm3*G0-JaIqB^9a=^}|5vPHsas
zA0?wfDTbijp}-~t&VT~ji-^(4CjaC$>It{e>rwpSj({=e#{9)m?^0^$l#wb3Jt(x0
z_xw#bx(tJU#iU@*wIx8Gx^j@yEjyRwWobqx^&VSlJ#mEU@IL&Qe~EOfOBA=j*bY4J
z;oB8WnvD(PVVAnl&71pLzt0w$+|yqt!5})1&~%m+SER#Sk>|0_I=X3~W|ows=$%tF
z^<GrY%|T3K+m1H2g~1IIpb_ro5Qt-*DM|ZpB;&14RY!<evO-em!a=_;&;t0OvO>;Y
zmzkCslk1sz<89o~D=S4fQGKA(;Yimz7^BR+ZTs~xMMsGe*ZVqH=~$dIE`vmHXk7?~
z0irvt%oyFN#Z_rQ4g~0{G#OpTi3k>l2I=7~{Er!NNd6J-hZ`w6*wd-P(M>SOIP)2f
zfeej3cXK+Qu>J5eTJr&hUiG>BT|5C+0q~c-(%BOXgu_m%w}`{6tD@09l-7M9=qa3F
z`7=r|r!t%yzKF!MdY5t3BgL-twH%7P^Nz-XWF-Rmm@bc0V_Y^ErwgZNMVns>2Qh5O
z>!Yv7Sf^CSwBiWSiib<h1<Sz68oEtkyL|?n?>xfbyMmt*ZgC+ZIiho1c#hxks+jG(
zr3TvB7`b$FgIB@ORXtPy#H^bvd}W2Q3?^E|O*RHXOD-qD=qOPIHTQN*mf&bOCOuMx
z7paNZko*;>;fhsFbV*tH&((46Q2?tiA6<fwOlMZJeM`-f8i1w&OgjW@L3F;r)OKhZ
zUm2jc(+3mlAS11gq-<uaazhp)R>rc*3G9!;s8G9U<%>3yN}g-@!XOdQWdN~HJ=RyB
zuHTMYord>{BIleF`;!&a+V!BxaByy~pSvIDi#jCZC37GqWHWY8iMhb)Nv_blvlLb?
zD!vCo7Rthzn@p`eA$QJ*q8s2$?o=!H>J|o%h{hA;78(pj(7`)z9K1(&Aryjy<1?=N
z-4LiWJYx2QS}(lV^!2VG9AsNZ_vjO@1H_1)mL7D8n1s2(h<G2+zF6zVovd=!hIN6f
zNP$Z3gRI(+U2qdPo)WrL45cwje3Rigwf?cQ2~*8cO?hE%1E0`A*FdJ0;g3M39{S40
z7ST;6a3x*$36RHfdf7a&=}JR5iV4QH+4!=na6^*)Hb-D|=nc?r!9@U^>kKQ&K9@bR
z9WCh|l*>yp&U>K*)lf8Ua2taMnIdw5b)!@9k{czWv&>Q=#R>0Tk4M7&3*N(zGO;Y7
z|JUWVI?FpH{7i8bvJHxOVn*L(Sh1{5DHG&PNui|Up=<0j*u~hKZaMYVG7rGrR;A3N
zaO)W5#n>Bt_WC}D<wbjLMD(oW6m`+(#l~BC$0P#2!;ay`vK)xJ`&$YpsukBo;;vD&
zSc^Stb1Gt9_Hu0Q#ul5<nbnKBrm19<1Elk1)Kf#8ucNtOG^SK{i&T+0XqK(auo{KU
zgwe%1EGf%`gVrLix&l{60Z0l4UY!kr_*W-o`EPKMD>#*RVF_%hc15qd@`baa`l=c{
z3U8-0X0kVF!k_7;Z^1F$`iuUEh&+sg$9{S&*%>g@4mH!#LTk~aH!l;PIGsD|7uMOT
z`L)z)Ll$aTKFkA5-XT03Oaai(hnYHvQf`rRmMI<yP~LkW_(x<vbA3IyWBJ68-B-{>
zmE`j?3IwCWZ|H*_#?dv;E&vRnCeRZ+p6wdZI|g?w?04LbtIybN$GgU}>X;7biAso)
zI%}B&@NP$FlTDy0$t%OrZApjg(PSV(mYcz5==gylv*@YDq#0k!r<EPAmioe*5`Lj4
z`YPVR*IiXle@$SNs5&s~^1D=JQPBSUHXQb42Un*q2IE;^R5K-<&+Z(H#04w+XDW1D
zPJRIZ$DfY|MbjT7YtOw@W`sPwo3v~2HP(#X)!p3VIV|Yr17dTYF3#J{-G}NXcXIbL
zb@Ra{y-f>k9+c{Ca@{XTo1(M#>!z!p2bW1o&^wWDP>}+kTWI(rY^%kuDU5|pg&dv=
z5?b0K*43Y)*1Z|p-;$67>J%&!ObTzQ?w;uG=FIZRgY{PQQUY9{w+DH>B@1{bj0I@Y
z>aF;Pds^5;rnhES=M!0kUYw;COQSAC5ZF0LZA#C(g`&qXt5iF|#l55tPAX=m8(^1^
zaTKaprb2tW;XMzpu0*>&{<u*;S`+A#`qdl`)vv{58t*6g>nC#>K)(2iKe6axe758k
zswt!s*XMo&7&BpOz*G}IXFnYSiruxa5`E2nnpX&lI6jn^R#TY9SMv!$3&295KWmET
zcxS>*3q8oNGfUX4;8?DOo#IWz?ttr^yHmU=XFBycFttdh^5vODdgbcD?#%ON_v&*a
z%QSxUi^=LUAj;AD?1<WR^ysrt`OG-VBdB}#%F%;G$^0`D6tdt6C-f9*JpT(4l&keW
z9zo5Hqt8W9t{yB*P`2cSMhe2}!Sv6<Yc13|^R+BWNYE}CL673wRW^BC?F%wDy|Sph
zebFl;9AC3)(U8RglwrsmAcnUId#9^ZOhzEz@nyhw${4X-hJB~@I<mRdoal}$D5i9{
zqDzKS+b*iEZ*3b`wA0IIc!hbJv=*-V8CWwooMh@4mr5HOS<hT&J1%3q9nn9inAZEU
zne!jE-_$+T?U@Xkq|!QbS7EnkUUukPBwLkjVU1sz9rB!JO`o3vVdYJwz%-z&QKwg3
zbS4dda_2m{+L9y)b6iY<f4q)U|5U&KIW#DmH)v42`Df7}vVBd2;*d`-MPw=w^oz3J
zHYwZPv&M-`mRp)(mZvfz)7S1Q_|To+I%VW7%{K9~@TYQ)m9wN})gvdzn@*zEsEkE~
zqGj<adia(JzHa!RvkYYW#yXI%=2-~-6zb3AyTfE0496L|*Woxy#sFPDcjX`DkBzmI
zfr&kVo(;%Bpy?trY}CqY$*$>K0GT3=-I;_#sY=OR1)pUo4?Y&l)880O9o3;nlfi%z
zV{D1I(TWt}Ycb9p->}`zy-9bjgemmzS^i=g=aSlY=3xAaqE%i>SFokhH*6IyZNW3k
zmF>bY%S(1*Ok4X*MQM1iVtSNkiI4;xk;jask9IT+3gspB$uq)GM{}~Kfg(F;s2CY+
zQJ}9lg!CeCMFgut@M3Xw{Px`N+otb8dtEhifrk4Dswom9(1YtUpNv_f(HTX-@L~d_
z&rZ0;Su^#C>C?*_b=sqW!jOpt!(M~Zz^&>%hP6<YE3LCmzt@fWob9R`oo``&n?A*o
z>9R#Kdo%!^9zS+`Tjy`S*HJ;nmoe;^WlmSZ)+_yB9F)r2+t{@DLHUmOYlQ0iHq22!
zRE4Z}M%fq@|EaRpyF6>X`EX|9%ZaORN{eBuT#PPSn=}H9T_<%7B5f08eUTmmlrWSO
z2;#u6;?t4yl*+ZFG9k(9kjro~iV}>7Ck*@b9Zp}Hr<+;?N&_|OT}J3qG8u&x=RzcW
za#$<`wJkCND5eSn37h8_q&AMUp_<qnpz~cCk%P7Vec4WRayS`~No+nBO!p<PwXI~_
z-`K$0r~!s8hW*H><Lb)#avDNMvi3Arf8zb%?svA}_o;%leqR(6L)2rOd0^26nrIK=
z;f1e@`_7G4gy+Muu%iC%<NR<iRddhW$zDvZKTTbr&fLAp@XK_ts;*7Hjm~v4LW;+Y
zsbZVYkCuoAFbg0otbTX}To@xnjRMRd6p){nH|8}OotMVj+k3o@I<iDruH8rJFON1=
zr2|Sh>8O+XU{s+?O^q5y-MkW+b_=287BRrI2T?C9gpya{k&&(L(T|UiwK)+!8IEGI
zSI#2*C_a@Rb$iCkG|n8Q?e|87I<5V2=%|zVH0w|(3>*#PoQGkS8lk{It)E+moQzBe
z1-&QR-XlW^_(;_><8fzd8#TF{uE%pdy&jh{>S@{2E9xni)Adr3>2llSa=M=7WLnUk
zDrZ!UkID0Q6!wM6pX?_XkXbdi+#}&5E2f%e?30S3`UyQnLiDQ3B({{+85|(dl^&4w
z`#=YycyjGwuHx=t+@B0W?VNTtis<S=`9w+=PMzj3ZD~rH{o5FQQqbs|Hf>Iog7*X=
zCw{w?u76@@H^Uu8iS~}`=wvH~od>YvF)V{T04RqH@3I|(2aK4=jtbaC=QRahqX0rK
zF1-?F^o@|Os5>Afov=}@?&vdeU%(?}FQmLifOzi|q&)m*oyLq$-;?Xk<tQYNiIGZ#
zXtj@G54$&Akyw?r;gv0skqR}WT3A=<W<Z{zCixymCe6jJ{U^Jcz8_-r0P?~-yHoNU
zhq{!;D4v&<Xe1SJgi5igI`dBg0#4U`Ro*yZ048+u%1xn#yJutTKD0MSW2S|Q!Vh_M
z$(#&Ea$PbNB7*2mRGLtp6`)AWhkAuqYbu)qtYr*K1gr)YbCQeHS#hX+wuSV93cmQ7
zzrsP1(p+T*)f#!dNivSFv`Lg+Cg()5D5&=*c>N~_^zeO6CRcV}^~_YQnQ@GP#<6=R
z4-A}!lI^B&2NSL(XvBr<ohJmXt5ixb9L^SJgdj$;{RnnO81SPV`Y_tr8ei`4Xlh%l
z@T`CDs^4;)>V=VcRlYik$KuB-KxN?OP637bJFp@<9u(-ie=X;I;|Bw<7)vl1Ka$05
zx&0)BNnN0&mI!E&0f>=`>1^U<Ix}^YQ@ZwWomr)s-qAZ)60inP&S2gxv!H;?#foWN
zEn#eDC|utAD{W%U7u9!kjsWK#Y$ql^uAN|-p4~Uyz1HhD@J8o|J_G6LHW>BEUJ(fW
zEE+`PJD;M9;F`FEP9<_rW*f{>KfDG}2E9|uUT&XR)yD*dd&S1IqM0EIaRhssH;j+D
z8Z!r{FJdtrzoU74D)oxNO7hH<7irLQ<n(j|)6nXe98&cS2U1)DzBl7Q9%LM!Z4jNA
z$AQWp6qHy6DojU(Ma%;c2c5>}?2ge3=o5GDLApbo>CDrebGtjs#R!_C7X^5s;D?UB
zyh3r|*9jbk&_taC%#+K6BbMKU-XqC}2H_)1eywosb#jr%+=#+7NJfyxo!h&LKF}#u
z3E(XGVjO}|comX2yUt)8p`1HLx%cWiP7=x_D#BCTs6##oXX8t5yd7(!q@0%6V2VP8
zOokw|0T>L8fkIm%0=nQzpuCWx37Q2Fy#<>l0Ul@tPYf|O8}u>U(}jp}BhqXOgDOs5
z-;XEQcYaq{`j*9i!8GgpjrvdB-R4p4<W0BUIy|m*n!C+|=9?0{YaROs`)&VZzg_m;
zE_o^?7w4^Lw;Ly&Qi;U+jg9$o#3NV_;QG{xP7z=BsUgpM(>m=Q;>CM+ulej*<D_xa
z=^mXPHcp`B&YR;#yW7PgM-3>^X<?qZZfC#It=HO}?qQ>|-`eZ8o{6LmyEXjU;*U0v
z#_QvQX1&=tc*B|STxVMv;8JG0d3}7c-#9pKoOJh>{_@5CP!6~i{`-f&g@6CAr@bF~
z{lA~DQZHANQIDSpo5`x+pY*C74}kP4ms8tVgFin!eM<krpVYtKuRX2)@R#cMYk$S3
z>eKbVtgU~)zW%qrcxzt*0dTT_HSgUUPI_?vB7bCJp7Urxk(BUoi05&dVINjIMRF5?
z-0DxBXf47ZI>UG(lHm_XzXdGoB82zI!SG8xzP=k#-BP{mt*@>9mscMI$c+z!3C6n3
zDTLV_yyuf}I1JB%N#EPLfESeu_2R#y;V23!=O8aT094m}_-oz!2Zp2Zj)1uVx7&J$
zzyAA*80;Vn`$;9}RVMFuL{|w7?RYYRC0d7b8}6!Ch>~O?A_K)DAkgSa`PCf8zeR&V
zfc1|l$A)(RgF+kEn~JwyS*u{Sq=3TOsVc_@-fh6CrQuu-OcXYZu#X@_;$tx#zB<mw
zXJfR7MT=8l#+T#q^~UPz6*papM;EIAa}`=!tt_QPapb@Ujn`elX<G*`iNB<Hw!093
z8ukVgz{I19T=?zY<ph@eqot)3KZE12P}PN+U39NV6Q|X41}jNv>4ftTDao=-Caue>
zbEcHUGa94aiGR~xM{~rn1qYtEkWvwrGm)#h+J-k10!rE;p9_W7Ev0?uB+S$i6><z)
z?}zv@PUxy-Q|<j~d*%pm!I0{+4s0B0rK2(@78*mj-b{63ICb302Kw#!e|R<eoAe2^
zcM6>;yn8zcuFm=af5(i^0s{WNp}&(!?~;WWf^_3;t4cQ#OvOVg$M1Xecf{7y!XjIA
zL?!h$yB-gf|GX^E@zoH_OAX8oLv%La@zC*)tidT<3y>s_{UvnyCHIWO#u=EAO>mI5
z0ck3Qh!?4R?`RJpW}c&>g)BQVcn1FRmyavePZ}W|G3$WuLD<hB4tq+kaEVhC0g5O!
zOy%en$$?5`88xaW*8l+Ln08ZXg3r~m4Z;RT0*HGPO!dW7$d}#0Sa$|h)zCXm6fT0)
zC6~fpUclh)hXLMZEB2_4<64iw12WdOFJ867!FjF!E(XCbwenq>4r9H_Y*-GqVWu<Y
zBtpxIhqUj8@$l|S-|c4|G~ZsE?NBUIGoO^nGxX!;Qa1Zm#uyC9M%O3^r<*YmOI6IZ
zQPakNW^rk*wq(VdEoz`B60ggIEnD54I^xr2c0vsM2s0W!5fi@rwqPz4VVJE8i2To{
zyS&yAm|^k;9{%O3D2^wThS{UPH$jG9-SXaQJ(Y-t%NyhOmG3h{m1@5vHRNw?PZIpI
zEdNzJ<!#Tij4VXIQN#GH7%`L@;UG9To`L{1`8)?r!80G^%ET-vw*b7808Ch_zGzW(
z&7K0QQyP6co{}9Hn`vZp<mj1KQKk@+l_f&q&(Z0OPo;7`k^-?8NT9TJ0E-(%CP7&}
zs~A8)ybXsJP(?5@0YwDIB=c_g;+n2iSIRl79Y4FO$v#>pb1Nwe=(6uzV`K@fMJa>n
zxUj0Fm(;H5Kr4SM#%uEH1VssROP&X>ggc2x5!d`7aOyDVbs$dJ>$=VD-A1_$jT<+v
zg_Q~$xS<X!9=R~64FG9CmcPb~f{2Sr5}r>6xaV`II3;f=4Gn5TFuW1D{4+dp5$sta
zMdIB=ftD3g7|iaFrSg)UN%5*1f{@TFWl{*y2eIt3-x53*xIRQla~O?<Eu2U)?*gr<
zuk}8+7|`5n(c_Pwa%Z7#AeST9hF9_E&bvmIIEZ_!h)hq<LG+#_f7cvQXi)@Z!H~>B
zIWVFbrUDrBdhw_)^oB&?6j&lcCEq9p#(U+x4T$XZ0-=kZ{7DkisIF%nMR{MM4CpyC
z%f1v;qF|15zVkfie0=!1Hu{=3>&9$vYMD4IY}09w<X@?{OjE#H68!!r8vd^*xtrIv
zncXFZ;&#A}5|zM%I?%5;p^C2!lMmz3iE~$Du4KpQh7)HhcsT#&?l{toZsN8rvPx52
z%k*)JPpt}0rn5G;Hx~e&V~jHh+po}%g0qduyE6iOa|pHB8DJVsv)wSgAzOJ}^|6>4
zkx<-)Up7b2TMU90Mz9qC_Y%KAa)4h&t7BS0UM3dsFS1^WhQkYJNd`gWnG{9(9E}nW
zmku13Spv?ky%S-ilOfvaYfakszzN!s%gWHY@R}0`dm{IAYp#_%Lg`!Cco~$e%H-0S
zmOpIjDtqFf$TvF*ka0?8oK6FSX#mbb3KC7qfinsXtl&*uTBd%7v?a#4ol5|<jY-G~
zOmxjC8QWZ*sHaAy46){z5R|;&tM1qNe|YbeHfLxS20$Z>)rhvqyj5z3B_l#pbqiSG
znZPqNWRi)FFnh-6N|@vuQWR-&N{vZxfGJRhWh&-{m^lMG6Awd6X=JvF`k5>h2+=mH
zWE$p8r=dqu;8>r8pL}HMXo~eAO2!UaZ?HaQ1C^|-t<6%l#X~B+>uXiJH5miBKxHU(
zHY<R5GPb$9!JxI6{5W-{;8GSBq5|C@0SR|pO7TPVPkEH>3E~$u=aDX1c_bKA2B6J4
z31WschyV~GFG;Z_>MKx}-qL7M*rJL3WOha=^q({H$y_#RA9IDK&QeH=KK~nY$=P{a
z5jIt26RYNx2C<n@cpZ`sC@ud-8X4k}Eb7T{awz%S`J7H99ECEn8fEdYbs-7{?(veT
zh*4&521ZQU3E2>0X#}Y4KNT>Ft9LKn!q`}xwQ&7H&LVXkk8OVcCHRWD%Bk7Bg<k)9
z0J+Zr#E=U;V44Q2L;Ff|!_8UA=A}rQ(=v}U?g2lbz?7Pd^cn*C(!kldkFo?nm!PXK
z=QqTFuILVInjnU=M3^2+^P7y60?1RGO$&B72cmV^Dv2OHm5^YX_D+!yT`rGkC+Sam
zJQBLq^+=nfl7ObS04~E9Is}hjWXTYRCQxK4jxAQ5b4}&$x%A7F6DZBxptXK{g$C0)
zafwk@!Ep4{NWFsMSF1Gq<@+){-Vy`GCz8w*kd*4zV7rU4O@WeySf;F-VV6^n*;!zZ
zQ#<ngj@g?I(FZNqwa*O*9XC-tNvxTq6Uu~qn$9PK0x<}0C?+Dc(|HaEfIuEAM{zyI
zOWX_MA(Vm!ZKxeOQ30blJda@mIFG;Zrb60+3OAg$v9!anJXm~)t|nJTww!3JoH&Gc
zQYByE`xsAxhrx%Ysw7l~SNtxq^#rnWnM$g7YBa>8XAy&o#-aMpUAss(Zuc>T$82e%
zJJ+LlOlBDDA0m7~0(9Lb3)0?Y*n8iV%c!fag41m}EEl^+t)p)9sMC1fI4SY1glbu}
zl{zEY=LTEM$w(U}Bi=Hvzs1NmRVKx>Zpr#$ABr|}x`pbbO~x2>VjMEQ2uJ<6o@D?f
zec>mV1s<dY0;n1ap@y(r%3~$vw1c+eYIkR;kke(!-o)2qZ%P5a#k?;|WJlEkf)YNF
ztE<A&18vUml5Ln=M<WazFp96df527}^ica@^B+^Shgq>~T_epL0N#teK*>DLZ4ea?
zr(VZ9{biY}xXutFd#D*?v4d<>F1dNNiR@TvGd>)IVC}fnc%kVf0Ns@3xs?vcIkH1?
zi33pQMDUt*(`^<i5-#D8m_v$c^VFlnUDCS(Z;G2sSv4+vazHY?p#>lPG<6}++L}hs
z^DJ2=TOxdBzeB>koQMiaKqa0;T?>=5O|HTZ<Bbhu>?QrhFOgk<aHQ=HBJ_VX%>$rD
zG%OoLE4u2vHjK|Y(7<bN&wKq#X{AagP$;GCNvSMDnSzjo%=?-ySz%(((p|)aVfsZj
zhk@E`{<pF5b2y4iusnt|xgZ&fUGn|*aHZ;Pdhd6(*YKZ}6_Z#ITBXN%;?7I&{r5oC
zw)I1)T3PegDr<te%+Z!2EU)BmkALIUx-uO_wIm5UuL&W5jwO-!PjoNZeox(txO)+I
z&)m-KE=JtNmFyy*BJ^Eu@UXbvuS6@=oi%-#;)J)d?KxHzaHdjJ20`KiEzL^Nld4bP
zFfpG3Gbp`x6)9JU_DZTR7EZn2oXkOvAc7v{etvHc@HZL0Ku*_MSq}E&l9AT1DK36j
z;J}P$nD_W#D6Wxdk`7rM^?u`DiMLiU!JH--#+LI~az(TU4FgPw5tF2hz8)$H4IFWP
z1G>~8<dVZn>Fx;d|6AEg25POc@H&YG@vuY)SIbil{!=l9n6LTWs`s0gZa$@!?n~5O
zy`-(>^(KhGx8^>>uhn7}uh@zbUkG*y%Mn;HaMFObxYQJ$mNemPa^sZ>r{<M<{>%Y8
zwUTK8Zw>M9Cr?s)4zw+(QGqK7KL0>0d1i6Q$&XzOnKOVYPOt{+e`^STnS&E!-%RbU
z6-u{JJ45BcIEV(N;R>u(;LDWsvh?`9+F-rXd+!?xv1Bfa_ZR{qlC;FZIc|gNl5XLD
z-v6&}{EGh9Tz1Z3|37mgkaH-2YI4dZ1%6tZ!`UXs<oz%Ic7GQ0{}FBgbGXLj`~N(x
zR{!?bl>g8A8oc`I|MS)V=d1tE!}$NS>-(qqjz99N<?*wRY1fBm;pk$|GVIB7{>cXK
zf$gDaqZCJH<&>@Sdhu`G@4Kw0?vCT`rRouf^LtQlBVuW3Kci!|u}q3j(R|fN2G?|H
zC~UjUo14$$Z{+C9=WsZBPPbN5Mo5{Ew8u)uJH^r04Q2WuBtxx#KV@5EJhLH>F^tbm
z!xyL(nR7G2kf+8OPhE^^PZ#5$XrQ`dn4u(cp=Ym;grf@&SL3IS^1I3bYF=_@4CUO^
z!|q4+?Vwj1&?@2-roMM4c!7+Rv6Q}>$=;~Tt8~+l&-NWs-dC=E1%?eQ{>Ic|6|Y4X
zauS{C+aGyzmF=$<+i~L5Ic&)^4oK41YT5_nl&ZayH{LmZRBwT)hSGYmSe=&kNaC{+
zV8PFKQF1#b&gY)Qe8zN4n#HB)t4Gmi@hF<&;PvNlDoPLX-_Nng;v|BDI5^(JdcAma
zxxJ(&=?~)hB3jBgH_q$(!Zs)#8KKaKy<@`p4GW7G$YQfM>%!c{wKv^@=<76hc@TFS
zed({<Zf0}0VbfEXOB{^$)KLzl7y}0FV#q!sK)S+!R}~Rx22TWN9ZNy?FdiS`M>~|<
zj$)Ae1aO~xQX&k^a)%LR;7ldEh%pr%Q;--PrB$Jwc`L>KB`jWO&k{^do2H=c?Y%o-
zeQ*H!(W`nJ*#c6Jy1`1j4*PbbJW1%&Y0f7U|4W&-#YXfDFiK9Z$8jIHsIvgyWDC{f
z98M-VM*&-mQxi~V=H)i@$}_Q<-O}qcZQj)(rhHm#w$(93x%+#E%*1MuK9y8x$cU$c
zS_P>l_jN2WW#!o~c284&B#Me>b086PJ!@g2^gQp8&qDIEx`mw1<O?E>ZOY^<f5-I}
zuy__wPqQ?{Y!rm;$rS}%0D&Z&$-X(~0OckHY2vJah)`d1nmB8ChBJc8=6SX=q_I($
zCu9!YajogN$QeXI;MiGnShi80x?;u7Y}01f$}#@U$RLjm&ak+d7GrT9ZvuN+DQ^Nv
zjx>8&M}q!jB-J%m%l0~OScPG8vn;lZy5lpgx6aq~R=D1>=)qXJMgTyEy8-7pwayl#
zF-GOdZ7>|OK19riw$_)X)fs&kYVRqS>V|PNLq$mX_FLC^8HKl7rwC{V){_ic>@BwR
zI}0uS&fH7C^SPJ4xe7Z|jr|``5c~s+f%h`f&mDzglxKIQVS3sMU8eMnJ$)~cvRG$5
zo2rp_e?grwfM~mTGSv`f@us7ZvB0QB=&3krVaqR0ObcAn44_*-=B-dblFR6PT(OMV
zr)u)y*EJqKjb%?oT?Vt8e3;~j{}+>}k3p;WY>1)ygZJU^p}{cqy*>Yr^}h~(`5ra*
zQVV}Rh_4f$R6*zeQC)L=)d%cz41%Dj6uBf4;M|hOyw>z~Erb0y&&xTQb)Qvr<;869
zsSAr+-=Auedu3gm<xNh)B2BowZpf^JCF!4I@Dcv9FE5IsVP+NX=U00Hq%gjWt!QvB
z#J)HRh6&YI?!`4)<8Z*jd!ZNb^x~IJ3bsZ;-E{XM0Iz|m2_+H_UEBVcysL<BNe;d2
z@ktk!!x-;OS<Xr1j&6F?1cOl+^zW2UjRQdfg$gvYEmK^gTY?;}MM=<y<ih~sPoM-j
z0Ms1YUQa9(z&4UZD_PmFV`C??XXy^Uy3}Iqsp-08prn_<KMKOyY*PW#mwafr=#^gl
zT!vrx54{lcEH3|QQc)`Q<`iMde~((72Gk^#w3y7k_6pH`-t>}~yr@l(8IoMmem<m!
zy9uH}K+*qH5T4{Ro(%dzEeD%dKOByuu{!3KUZ9&qQ@oWC7mN<iz*1zLJXV6_Vcwvt
z4=osvX@i!_Xr961*F+}zVS|+nXJ4l5vw_RcFMQADNZVgsfaVu4v9Sko`pKTp&e{D}
z1s)Pka!AeobignGr7PlB$KmKCzNMRBeSM)N3#VJ$s)H8i^g)vE(F#;N{(;<%>@H=S
zKP<vA^6F81TgjU{MYDPqXPY_V)8eB&UupM9b?&6od~Y(s2=gHR3oX(2YpJOe{!+{u
zFejUvNVcKoro-ndbEp%iEZ@?3<m`hfzb90bZaUYnxv-|XMA@31O@ju^*)^P@guF%7
zlgXL1Drumxels}=LH57E(6Q1Ox193eqWe$5HrseMkOe0~rliR<y{A~R=)^jyEt;p5
zY{dqbKL@IV!=23DtK#TSH>=t*w-(18wl>ug`4yIlPK7EQ?e(KFu|0(Vl!WMJ7=@i{
zjO_|XPLO%7LLeb!))S1_Oi2PlFmwaJzu)=q%EFKyX)+?RL`!%P{AO8Ygdzzg#9Yi~
z^4E&nM#J1LqaoTa@`UF3*==mhG}UC=VYt@x4c{0vCa<z8v12$Wl)Wc~MG&!1s%-9T
zKu)HCrU@x?9zU<7DIrP@K}2h#-$6L)Tn58Z0$io<PjwwAr@UP{hNA$_e%jwfdb$`h
z#Afu4{Jca@bwl$;$7ZR{6pYC_Z_hBZ>Sg}4+RHM(-1G%YbI+1J87%~7iNZeK|88%4
z)iv+;-#z(jeXZ;%l%nzahhBKiIa+!2J|DaJ%qA>9Z*Ola!*D!^&dB&PjL_`z{VH_t
z*HPu%L^rfwGMz+H^fB?H3A)!H{Y@*nQpLjS<+65ngZ}n1vlS#{rz$;`QZ{tFcddU|
z=jzHF?`#nF-b>ZQSv(%YXa$I%M|J|R_LQLRw1Zgp*LS?J^i}Cpl%X=g-onpn7}fMr
z!qr2m%S}%RIPS4u#n@Zj<|3QYb%#_a!Zn1YPmryWbD@_Dh-XiLFh;8}$e+LhZxLfQ
zrJZqugy1!wJfY;zs?Q52wPRQn*3hv@SExEF^rHmpB)@{Z+FS0yH0v>!o|c!-Ty?^6
zx|wCG8EEw^TMdr#y)a$z$&~()KS&j7mALv7>M=SO8U0+(tGKQ!uH;o@S5uowa;s%;
z$8)@27w0zB0^(y;^b|XS+lwwywM(o^C&u1o94G037jVXhmx(eZ7ldy4_{k-a-|-a_
zr)6);ISRW_*<=b#>&vboxk{|h;)ObkJ9dVnKAz5N=U0m{56;}yZOeUT0Y_@|RZbVH
zrguV5RaLj<ng#WOWNfpmovhFDX<7DL@1myRq0%hKwd4sY*HfO;>TQl2vJ)|~d26Fq
z2P)$9pGSy<jb<_|QA@rz#pJ-Py|7OXsVW&y>Ul|cW@XDg2>e0SF?kC?6X(82>TKVI
zDykEZ7<<kGM<IrtE+8P)0aHd`)`ghsDrbr?mvI<bn%QB5a3LctNbMq4N~gUDImd_2
z%*l&q$qo~ObelivU8d+cuA`ZeP`wJDYEv;4#Cn${#GG0c#00zfic#^@UGatTZ{3`>
zQf9`w#R`c3?uhK|pBt4Ki|o0jf77IRm9FwPgu;QWI5E19Va+YqnL4$es!|rjhn(jH
zwiio*C4JuTdaoCtpGGkBjOgZU6PtwLmAq>vu@;r$%3NRRa9-q7mDEe7Xz;~M{r{wB
zvM@{h>Z;^1W9RLzK&AUK*E&(o;oI#XrkT_<OCM<_Y@Me#xM)=(9vbz>ZBN?jxhhzS
zw`F+(Vi`tCq=v1Mzy`)o!^GKqVZ{mE(SjtYh`2Op#Otm3l}g2r1}jyv7Qh>$$@M@w
z)NnrT1g)N!hNKQ>KvhT`L_rHoknoXUP9!TPuTyq&^KRqOdptk`1(sarzjDQdG<i}b
zSV$NgKaX$X&gc@Gl?V2WR}|`@{cWwsAjxjUBY{|a9~jScVE5M0FT<^K&32!oc;F$s
z3?epdH45H19tVTwRG{bNvK6azn>d%SE(c*rTB9n}!FdEIKz)R9vH%JwI7UlQfQOy5
zM?dh5Zwl32tT_NYhrxU0$}h(wey^-V6I(03O3{obk}6oKCotEKhv6pm)f_S3;>j6A
zc~<hZXtKS%rg~t&za?ckSx?0rLsetc$FFpQH(^Tw{Pg6BaL198B40o&dZKpp{Y1J^
z#yv;-04Uc}5g!czPl&=za~=jb4|tI@1@*YF?CAnb4^sCJ%3}dL0W2;Vhu1<kMGl>K
zi#wJVAIKbp^^JHb&4qg-3}=gGA!n1pg_t1d!jpAa;EJws`Nw$4FpP)1+&kJ1h-=d2
z!exM-5B>Odm^uUJAd3`y4&6EHnzn5nIugI3H~&Y_i_fAHF`m@^g5wkio_GwK)%%c<
zl2UsOXpk#X7nk~oD$hZ=ylE;#$qyY}X-XDnf%z#{g6r$SU26!)J_xbDKrjS^`VesL
zvr-BkP!n>x5{FTuP)UsZN%=|D1bKoD+b~@+0Pnw3>tzB9;YSQ9v<0hI1r;%F!}77n
zVPqnT+vL<5)0fBYD(b>CuV${sdHRfGCj@YiHywo&QZOGHq$A=<!F*)usW>|pQ`*{2
z-I}^o0!qK5K3W&!o6};K6{rjrin=1>4gBTnpMT-_FWrOMajpJSV`?0j;`lGswd(qM
zCjawV^=tf>FB1P{dC3f)(xdPxRlL+1hj<k_x{l>lTWv7#s2CbtCE@6Xje_CaC8L;7
z)Bs=vcn(0jb`GY3Z_X%PJIUWDf}xSD$j%i{#@<y7Y$D=gr@utPAP6s4(Rj?sviOXm
z?C#EEw0J>5riW=mgcS^0O^0L5^SPYr6J0%t&X95Y)r8E9DX0ob9cOS-rywnihG9I8
zdcxKX`Q{*!HoLkxj*4#d1uq~5L3Bld^s`-nrVJ#=F2LZ_sWqn;*a1>o&DjeTD#1+d
ziol@KDyCUvV?to)AYBg<jc1kd!W;mN>Bulwq?0l9CPOHAd!R}re4Zh9L579FqOSrB
z>0!oFhm8V0gh&+7k1KSXVAMcWp%RwoO^nDBpO0_pu*C#~z=U~B4{bD%v_?ohL#Cr7
zp%E;3o&9FpYqy?tUe!(-9y}kPv|cv%8hhUE8~E7p>aF89C(Y;k9dEyNu-7<gd$prI
zcze`2Y3`nOTJYjgtqnCF;fE#o>y6iVeSFerx4qVh*M$A83D5xiN$sf9Y_wq|9n}v`
z_nJq~eGed@)uMOMJZyHLSf}MvtD@SHQO|2V^9~y)^?mqR+ie~+J8!5f&zhYhZ0s4d
zR`ZT)C!J>f^q_X)9iN^Yx7rO42DaqwHQV)rTJx~6SAq7SO|S8?an$kJ`?Z6E)IeLW
zjv6P}d25ur4d`WU_n?6-@i_OICyjas2c)0sfIR5nf$z1C8}%kWG+sAgIJJ{Ez5v>8
zyf}qI@X_0=9oC*V+Dl$(3RYlV>Zd1-L+mtQqJ6sC?le26ord?k)!HLmw;LxfoApK;
zM3jS8n-F%|Zurnfr$$W!48T@<6Q6fa+fBk7253GxIX&()TSsM><X3<g=w%J6?h%4p
zM?6|UQtRXmLPb2#1o+;o{RX^0L7Wn{YKVn4V6EOUi$Q~c#Ev;k@2K(o0EM7>13$MA
z%&TU*QHBX?wy^-f#CBiRpwUwrHO>?CiyxbSHA%TXO{dp<=GFFIHnHcTG*CdhDX4|e
zR^JymN9tdG;6F>0#Q_A#F`fW7L}&`&a+vjmbcQS;gj`Sss9^KRVHoy#@vHDX!{qMj
z3d6sEByXAytfq1VJorHHkXaNsOGr9X0%++{&H!0t5~wal+Q_f8^?<y6z!Z|CwMJ9c
z>uXNja&{*%NylNk53*J6gXmESzIEOl1Et8%_AlUA&zi5(zq_YC0<E_WPFk<@NB&**
z-fG?GPNAEFC(Nndx^Bj{RM$kr0OLV5zT@pYzF+nGTp>ftAL0K@;EmauHlk%XfQaxs
z+VsS{MKFuc81)3|yq)(_Thz(DB&UA3Z@uXgBj+AB-=hAaci*DA<FIRfaUExk@A-#X
zRmi&Eq_>~w??HeDS6#Gb#9*xE?KmE)$MB^OlctO9q%%f(Nk6*s$<ll?4U&s?DJK&^
zw^t#T?p*@myo=9n5}zWJZ-0069e?4{-EY55uk3HX?Y0{Sux(epBphHoLsC3r9Rh^5
zG<q_A^uBxf9k6r-SEwmK@W*Jg92q;^@j)NJ%!&_&2f|x8YQuQ@p&-l*RXLgo=4qtk
zY-HMs-tlg^;H@!cAU#?cOmh;$X<UE%?L`<SC2JmibGCdj1As)|E13*>xh%Sj+W^`?
zz*v3nF%wIw<gF^?0&!}9fLST4HO-yJ2)=3&yVejH(Y`HG8Yw1{ztE;Gvh)Ibi~6LZ
zZLKdDqAv+TUP5!oDc}3e_kO1MN0dNa?qRrix`(xYv`)%};FCP?WW(&k!UNcggP*bY
zGX1*(59|H`dtU^8VP&3{eZK0{{>1t}BlO|S;h(4I|Ni>)ho@^P{a^KOYfr!G|Ne~e
zpa1?fwDSXncBa2o<a+x|Xn(W9I;%LRxq+M?D6VoUN~#%F+0K%glM9d{XoSOXBwciS
z9Iy7CVU+E_3ri}W>#R|fiy|nqLz5jv*{e&3IuTWa;%BO8l-MJ6(AR-%J5v%Rn%Qkd
z6lKbbjnL?n#i4!i-P=KMbp|3ceFrcS0{*_CzqP+MLAn7kO;(};GPfne+f#|Wwflw{
z7Bl1aVJDCBmD84y8{14T9yybcCPIr$S;vJl7?1m<w@u()a?h-E-F6d$D~!Dn3~qxv
z)W_qYAtJF$1%M>m&s(MnN(^n}<~DM2S5#n75je=5f_TB{6yY|MFo#)S(06*64nZw~
zu~xkO_%@_4#kV0b72$kfQ<}yFwoWstrXc}hPR{ReQ7EKJg!&x9br8MJOSLSW>&?Dd
zl!E+l6#hDiMrt8xa-cYtl;S*6zRa9ERLlYS?<ls6w<n_C{{#FayoiP$g|WydZ{j^h
zl6~u~+`!X>!WrqWZ_odiYL;}4&xwVQ8?}fCb!?H9l+z^4VLlTcujrkQA8u+i4c8Bh
z(3!|KuVMEQOY2ogL?j|{0}5^ew00U>9#nRi2C|q-!bv|~DI&T#!jey+&4uHW7v>G0
z9-iFh(0TnUj#F->WfGhxlXX{zEr%L+*;KnO#>P=9Gw_Ur!NRVJr%G>{i~^&&SwI1~
z3dfgmU#ti_o|@iwjOr`X_>XlDNFv*;vx5>!A`=LRIGmC^VZ$YDJx)8lk{JN9LD@4F
z_qg8s!Sz_&?NQ;az`GoU=i85@C<HQHWeBoV<s!aWeHYvWB9mMKKdyq+M?3$h9oDv1
zgB`EL0o5@lFm#yAL}}#FNCksGBl9z8y3uUod9`8%g6Gkrj(B6kn&|xa@A6AK9GutK
zVra4G@MbMDID94~#N99+-d(Bt0ws>?Aj4CFn&*X>{~BZd^Te3{_D>&R{@Z`==<>|7
zalWl>DiPXEzQ42hNdcAuA>&G@vqYM=!t4#g;RRaPFnbnIQpwA|T`<zTBdmN?MV1GF
z>;JTB(Y1u2^iC?taRFNO@hyE4$l_J*nuUn{+#&qG{o}>&H)V=@gzvvkEPW$}z7B|=
zA4}iZ2h%sWS>fbfQSMFYr%VC8DYamUtdwP<LjL6pxDD?|QkPOvrp^2`zPbjOjzIDb
zbTcJhg;(+D4nt#U53J1S4&)ikH5qu0B2X3#$q1C)0bUC!fI$z=Eq&63um((rin<=f
zz-6y6JR=f##X_-#yPV_!W*MD3zzm?y_~vW|nYjLH2Ih|VZ)aL|6wuS=N<YxCdgK`&
zaaQ#hW=<2}IKe$&a99WQ`C`MGJR%(?HmiqWgF^*R21PTHh3AHo9cMF(n{ZTTy?ZAd
zFj~>1o;o(p5@OUK;jZ~?F#4$hM`wUBE&OPC(Ub?-1Kwh(>>bGuc<tH=4ZQq}25L3u
z7kR*CU6_WUGo{z*6>oo@FI_?=P5iX*bQuM>lS?=vG5_bG$rOa1l-48pYXNUaqBB^8
zLeMQwSqpDr69Hmz#d6mys#t%(>I0d5$qw0WiRMK+d`^gVsYI5YbrNk+rUQrQP-nRt
zHFWRkZ?SVKY10VzIyBK|=>A|QH3K9Yswd&ysi7C24@AO3hLllPshrfLY$PRVsWIsd
zqJ9_*%k-HOj^pQe7*;HBVjO1cc6|ECisx$R(1#Z9ckDcFb-6F}yWxi@8PCmlEkd1(
z-C8yCm~2YX<I=@B<VHiLPHX>&ls5}kxv8;RWU<$?i5jkomcOIDuS;H>YxE_FN$UBF
zBP{`u{(mKX6Q-{}Oh|c}U>zr(?_qcy(H#umY0bH0vP3-J!g18kTRwS1@>9#gmEOUO
z=i$_WFA#y(7{*-5U2N$>C3mrwC^6b>hL>@C!EudUw*;<`za%@&A`Q4{E1Ka3D35d0
z+%W&;xPf~u7cRcc9!D$RWGXgl`0_?xKH4{c#94(iRCVR-Zl#LjT*s$%8(jL{QeYSF
z<>$P+{0~ddm#D6>KrklDQjX+-5f9LgC+bnq3o<~h6eCJHk`*yRt-f)s)tOmWnrrB5
zGfP|J+dH$(Ka4Ak+}pJo_Tf}?%31f{+)PQf>Q>yG$rK+ZQ}}Z%xAe(>6kdmPk&sp^
zManKL>UmeN7e{<Rz0*4WgbI$>L?=i%Y)4w3P7z;*W{~krKlFKjzGX3&Fv^p-fGo-t
zi$zhn9{l{h)l{40-&(r1TwDA8ICY=>Irm$0_oMcvW>xyCH(O9|worm5b91!PdZa`6
zre)^qv<#76lZHivw5*VBzh<uYD>5_lriu7QA`-TU${vpacS;}Fw-kUG!;Z5%VsxB}
z#kxr<5-`Y$Q>ChsLqVrL@@!0{rBW7`nhTSGL{U~O8l1$^{Z*T0;wj!<bk+=v0v2CI
z!(glvL>Ofi3`b9Wao2-ZOX29w3$Cw6@rMX~$_Fe7P;w}IQeX_@tD&?HOJybyXf9KB
zC^r@81X7Y5qzxA(oMOP|1vHeCW{Lyik?9T0=p@lJFRoNDdJ(#4ifzbbe=*q-&%2|{
zoVm(XDU&yEs&3eTvUu((mu}dUv?TjEVpE-9&`c6IjjI$q07KgqHIJU5)N&LYd6LVF
zJQ+0Gi+jDv2$`Fd+hpz+oy_Io)40sPsXbs8m+bZA-1$<JCcKiVsH*&d5~%3zW@{K-
zlF2u8QjC6F&l04Fc$msMVrDL(y1mP=_r5FFOIMjsrF+Q+_3cir)2w%E?RMj&(>-b(
zbuo~@^TtVuZ}Am69mPxE6h%Syy`$5^#tD>0@4fEfX}i<iZFEl>wF4vYlNf-NG{sOO
z*kWBq$eNk}#nygI&#0odv{XJ8HItj!?s6tgmp#NonwPK^|0NBDEgpnHn)Q~ib~a~-
zb!!a4Sdn(n&*CE}ne#mi%V9ok_>QCLR)oP#fD2oJ@0X`#^3ahjhZF=;5{wEOYR_IU
z-b>lY{Tz<s($F7LQ9Qb<NO5ZzC2qLsJ$Vx80F~1ckjNPJAUs@@jP8g~dw4IG%1+@b
zqVOS;uTjsfm~v$!XG$(4rWzGC?V}0InO6egZf$w9^l|z~)dl2~KPwM`G|9MNw#+zl
znmxmqXNjIJU*@io%)Lo&N!NwS1FoVktNH!BYc}kOi)s9p!7$9-OV=8Ghimj5uhDl8
zwMO4fC%1R@8pT97@19h*w}<6)VCvhDaUSrwiqOj9p<aqFap(Zk#s$2XDX_E!qM~k8
zOV*~F;Q&_G(4|AB@>Sj*o^?1YVvHtS)f>gAU|e1%wxu2?r;ftBSYGDk6C-S<1Mm|^
z&`cNLl|sOuX2GKQ8KYx53IGM>@tv0oWo<AHJUJZZvA2}VdmiFQ#F-Zg1WX*TohS-3
z^lUnFNAVlA^pbOAXp7}BF@mIL^c9sQ``|FYY0HukDZ+%r$1ssd#Vg4&D&B-Ek&01W
zC4AphJj#i3!_p}1IbobC#1krHqA^pyMIq*}6HB()l8gaj+X9)10wW#wnu0k4C-t^c
zrbS*V!Zw!CGo>StBVWuUa`PgZRu$y7>}6R<ir><DFPkyWc^#{Kgfh@~Ns?8$<c6Wd
zDYLoaL^E!)Ei(^~0Y$_YvD5`-k?bMWWI=}TqF*Z3ZFX61!JbBB<*%6Es+2_$%!FeS
zY~Cv1{ifd-g!P-uyoIUNuJPW?d*Ytg<?IY$J7o*343??#iMeJ(dZVmmW}Tbbbd;^S
zDC9}iRTd8aKpchyjoJWtsO52%{oHTkwX#qD+GF#uyQ|v*xb?p3elMpHM|r)P{r;OL
zYvIk3nu0Z)gEdS_hEHQ_CN(WhTTZTB*+nMM&b9CTX70o;fbQKmAK=Pw>BT#J^`cNa
ztrQX5AAFfd-ji)_-CpU2b|k~pO?CK`jU!i;B$Q7@;juGgceF$MUnV2fEXhW4MU|W`
z=j5PdfkqqRJ5LZ?NaonPPI$J`x+~*A!cs*v1S*PpQ8*lbqar8BOdRU86b-Gb5F_)V
zh}mwQP&hi-JJ>_9v#Em$88|-KHu9XH^qGVg=vyiSvj-o_rFSb;p8(>9$tL|Pd7sP9
z)Q_K7Vt_&J2EF&m<SZFSW4erqIqQRAzc;zYY|g24BSP$<E?fJz8Vys0qBH{wc+8_d
zS2y8#hfp$HS<zLE{Va3X8kQ?`3KQ%?6#X@)D4prbI-`Xs&ZAQc=-12;G^Y?X25{uU
zk3K!!8sYePOtgE5cCu0Z@vy2lXT1lR=O_e#7>NW%BF2bwr@zhaZM3srXZG-sXVFun
z>1p5(R|a@`>BnOug#cnr7xB1ES&d9eyBUp(Ks8Wy3Xy5HOZbCc-EO8{Vpm5l{%FWN
zDNq<N&jd52Z`oPS!ek(~-ZwdAok<&bdRIo?sy(b!->26uA$M9PPGE@=pO$QpM9auL
zg(BhC3EwQ_4oZ;22M6J#WFl4e&LwO`?y~)G2uF-<H}-kGB6$j#w7AF<uV4z73qCq~
zh5;c<AD|wS*ovUPWp9LORjJ>0y`1S8Z)S)DrLPZ|ZxtU1>~u;26cV`73ibv2F~)v;
zn4fAiBssD>361(<laKKONT((5At`=&JXms~4TqH?mZ2@-A{GS6jwzomAVV>^vKy_$
zVa7o;Ag8kx;9IK}c0W1fAR>9@)CPY+`EN)$o}`R~>MDb`yLfn@n0OLQ<L8*iNNdOr
zprAH_ySk2t7#|u*j|1Q*u`{>Rl1(rcLSbK^bW=N7T*C*%wnL+q-dnIBXN>t3=H3B+
zv$!rXpG?2RVk`t$cqmQfc<yD618N@q71zkU)uQ8r9QDez|Le2?GIA^(Zm%~BRxHi1
zQRP$qHqUM4<?NYXAXuMGAQ%M&`@EO4(L-OITYU<4vIy*%AQy-+UqIZzVU2?HYZ`45
zd~P|!xFRW%MW!yIl~LfYBtdqy+*<1*XJebTG79#?ytUzyR&s}@q!@MA;1Fa!yYO-|
z`e#cnQ2kdZ@T<Ff(QMsQH*Q%-v`^zOEln$`Pi`XnWm<qC+-RW`v4C6EEe;kRew7a&
z<inkO5E9Ym(tH&CVVPGai8Z&&qu|M}%Sqt#>qNeWhhO1amhNvx{!R2xqx_U$Skib<
z_)4Mp<1BI>_ZAns5E|2B*$U~=esz2)JH*REX)?)cLsF@uZATlQiUr#AuwAT8(81lZ
zqeXv3kVE#XLv)}i$S}l0WEYc00S1f7<I}yR@?1T%YhHY}41Z4{o=ikgpN^6et8(DS
zx|HN|M0uryhM~T~+vZ^(T`{s3&M^FUlZ(K<#KIW!lw1W>RLT43nvAQ-E?s~fuB;SZ
za$6URl#ldLQ-%*Yn_QqG&os0s85W$EBKj@aN%TXuz$-s0Z>r?$$z?nl^r_HgaD5#P
z?c|0+(FVtYV3?r(jE$6GC>P;4;~pQ6B0N(_WAv;JQEJcL3DNMKsXZeju*xtF22nt_
zrb;6^2+uiVBwdy-n`bgZ(YWTL`GQK<X__DqEHW{p^wsmkRkvgJ{=wpZKj(nFr~;oo
z6nJs`@2AzLPyd#R|NV6BuYda*|NBoE|GT=r0)PGRrGtQHA^hNV;xkzG-m5r<=EwBo
zzmw}|81*pPWIVd~vj-4|{~IT@gXYhTy`os+xy3SJ#b5mVxnqjE*-vhGaYX5bt};Ui
zX1T-OKk&E+0Xa+P@s&Sb?D6@qCbW5kJR=yn@W|QyJfX>V|Ak_c?|$Cs<h!%PC(lgj
zN<pyT#pi#<=;b}-GmM`0hw&(kDV%QplJ%~Lf1jZP5nD1yha%DZF30>_#yF;=PzE%_
z-fcX39{`IX2Tb&Ji}0#?G8o_aF3(Dx1~o_=5;sw8Fu1-9gi{-P-_g1fPW|J+rrZsa
z)VrvnU;yyA%H`pSCJnX&1%uz=6qayb9o&_@-@;Msi#XvSb9@T+9uJeKFZ{-D!@<B~
z_TBeTiZu~Pz^^9Q1JT5)>WkfFy+W@hP^{+NP{()aUyA4hJ3h^<E(HJo(t)B-jvsm5
z8?n~an3z{-(lk06$_SKm$1tCRWF7I$X{M(IHK%&+J;>!z|L+*_T+Q4*PCLiSnk2zR
zXa^3LQNo$hvL2E-=L~j|%yE#+slz^!e~c*KIr7WbfZtyOet!-4{Vy2sn>sJBBbaqT
z3~=0Zb3zNu6a4$LNBw?Ce@xR8a|VR=Y>JzXQxabu0IC1N5!D6AB1|_vz?x6U=}ax+
zjPP3&kkUP-IvPywl2J$NdP}>G@$7Yfoy23b3GK$$sd0-H=V!X^>&LI5;=hK9|AT~z
z&k^~v@sO=p!;&?b@5B~hzT!33dLP_5Vj3%}a_83(DcwrB@yP!q(dMV1KOJ~Jr}zRf
z=*0|X-t#PGUf+KvGawfXIqy6V7lzW}^KWrDt=dCJ(%StKhUUulaN#iaIU0>Q;yx8b
zk)KtK;nf_93~zUVVDU8-alPgW%`O(3^BXTX=coHGIwx3Mc(uZy?|_%xnF77LqUb26
znCu+q&gJ5<-udRs3Eqn;aE8k)3R1ZzWN}uip)L6GZW*o0)>wf}u@jMz!@BKVW&_17
z7~pbd)mpA*ZSnAzSsMS+(Wo=g$QOt^o)_cFSO_{$hU_QGK5o=|x1k8538Tlbnd97;
zj?*I=lX#`hAyy<h=R(M%Bo&6$4R6s9tj)2I{V(IuxCi3RIcXp8wior4D?93_Nq%B4
z#NnpIABA97-lVv_SW5vu0`$>(qB7zdL-dT4`bkAJP7zR%yhO%SK+YBJgmeSQI%p$I
zqt=(8Fg6NkZS^ClHipIDN7aPV;&R_<$KYPvFAV%W7V16MX&%6T`?$5pJ=BRqs+x+U
zu&1Ji%^3?;Z@k1&au~=g;q&Z%l%y!&i+$gbIAv-iX35%841jO$Y%$6f&jjOuW9JoK
z>QbN#wFC&pyA{+1*fT5;AC*@b@$Yijdum!Fu8jbOmNrC9=$D)giT;PfI^Bu>^N_Ar
zg%j%ry2bQ-S!`3IQ-dMTcbfZpkoy{A`Og|-*<1;^@s?lw><YG=t@^JJczH^%?;pfC
zhkd2u85gMy9_LGJ3+oSr0(#M?z9|+xKVN3;fC^ug;?H(aUm(lEpuS5pU~qvLztFvf
z1N{Ce<Kt!{Aeu+htbAq5I@SkcTNXibHAVThYDve$eets+AnuEw^P8VB3ay)FteAE)
zlqt9+nR}H!XpB2+>DG)Oc2Z@COL%vN_q#gYooC0osTomjEaqTDDzeoi#s@2Kp2?0=
zLG(7bEShA#@Y>$bv_b6Bua5WglCf-OQzEi=ti_0&f%B+;G|&X<1{dLysZy1}>*yXq
zI;#+QJH8Z#N&09<*)yyA1`O$0lZF0C#h0m}&rO(Yh&a*LTjs^@O=%R;71A{@uPz1g
zWX7<(Q)1A#eU)TLpK{*%dfdA)9dWXG$Gpc^KdV@QfMb?DewO^+QM{ck-YzKK$rkUZ
zFgrz&1C2|cbi6>CXc;E3T@)fP;{;EHBIBu=;ZzN3Nret<SEO|`YQBU+?PgjxZ%0Uj
zYMtV0Q=)O%ZjU#(%h5$wFdVCB2$eP4JMSjdbLp8^S+j}(3ZB7PQ`!hyqIW+=!Qhe<
z+wmy0T>pgUU}m&s^ep3;QbP`G)*VDwnIOI(=aowZ{_D`Kqj<RD=$2K?3ed&cjBQT@
z+s)2j%B9k(Oo#Gq*ZkR)QVw;I!<(+aS=9_WhCZ3^o>VuB%KS!6yi1osa(RaVOpxI#
znt$g*531$D2t*$<!L$L>>mSxRdUxhy$^vSg3~WZ1D3q26zso?`nF!!}cd%ney&VcD
zmW=&%Klu%vfDyIJ(B0W8l)cIz`0eg^6vI|CE)m(Yqal<)fEm|XSAW-cyaDx>CV1I%
zHNVac6Y^FB87+IRkr^ekjo;|<qNiJ29+w|<qC#5BU~nAW9855%8cHN3z@`J%JL!{}
z=NRpb_4CouCq>ERg}IW(Vud_Q;qVknO}<39oGo_ReTVZ>6%(nZKK)HZio<|Zvbt^`
zV>laitq<km(SFJa5@C9os<;NW3+vD32w_@pHfN}x3Crv#%n=wYUc6SmPc&9`Z<emg
z;RooqQ1|88@{41v^vVd7pO#wDAa<e96?l9+zA<t(>M|X(3mwvN1n-gS;~Z{xx@&98
zoo*F2)LVA32<SwFdiZ{HL$S*_-w>~y$f?6YA+U7%%SsgkPQd@CI2YKL5`o4>${R~o
zN1%rHKY_RQzvf%|^FM3enSN^olMizDT4xr4v%KO~bdXHQ`ux4Z`IbYP2Fqug*7$z;
z#XD+s8XMvOpgVzZ(6cQJ`fLNLBHU=_9lV)4Y`?6d1A1jC#r||xePmf)dz~eh+*A$A
zDaN_(S9eNheo(Fi$)4($(f$0u!R>W{!!>^mm%I91^_`)DT|t43I%^c5M8mX0<?_tf
zkwK&skct^)c7r3w5{Kd_U_U}XHtbNcPWjKHV5Y-&y!REC7PF48Q&EXrf}X{?-GN(C
zJ*8B03}2{tt@mYIfMgaft}8htf&soq0wg6YvG`l>Ce!_Vp%E|n*kkMwt4Ziw@`y>4
z+{=v1XhnDthPX{Cd5v;JF$128Q<2$CIE0UTLlNH!@`kK>Gq+O4Y0qiJ-nQlt4L>f;
zCl+Rf!Y9?VSh%QAm=+oe1VXnJ?mty3>`fO7b9?>8WW(OWiG~@$@Dm3){FrUhmi)4q
z`YfW~lw>`t28E<0!<kkj35$iXadHGU&bOfj!%{{v?A_#xdpX%UvSMK>`wCT5Ot@5@
zJ&gBK+MdsfGoIGXIYVq)2}E?1?b&0m<i<Mq%b)en;_+YH&XBH9Fvam-*4O{`!}qE9
zFF&lWt$&UG@+XY{vi5h3|FU-P_%E}Ee))F@-_mXzJge>fqlI^Fvqf^rtziXq>5L+I
zN{fD@`^7I9;3ePL+8OI*T9k^kS|I`O0fQHbyMzzQ{Gnbx%o^zBbH#ZvLU%~VdN$rJ
z3V~yeH$S>cb11y^gG3yEjNm04=S4(xJL7?xcooMrv&ybr#Zc3P9MnS(rX>w?-ky1Q
zjXuH$BfA3plH5wC71q`x$~vvTP&kSCLrBaUJ;F`kGX|1?N@XU6!dMau#E}rVcgB)<
zs5lZ!F6$Y090_tLjC&K5<9Rux%+((Zjhv2JI{`|<g7uEsYG@j*;1v}su0&CY2pK@a
z@hTf_VwQLkOEa=dZ$ku@$m!UY1@xokf;vMD55s6|rlVKP6nJ8an<7r~dGsOduh4a;
z46^V~bvRYLqLOjt($;3Urz7K%7I>V6-b4j?8KlSdVH1nH4D560n{*@Wx7ugDK@O%e
zR!2Di%RnnQo$yfXXGkeLSEMOfBArR6_atTM>9MINmGzG0K7dnlfy+bsN*fypS(Pd%
zg{<us1SqGOX}&j}rkLEJ_$rbW(T{ebQgk#k{e(ys2wYR13g{9gOhZIK6o={(T995M
zC|x3#8jaA4BBnhEirkLk;e~10$xf~48xoBvELU_z7bl{J0kHV_yrQT3+ZNHP<$CK$
zVq3tubza9-kO87W9uYDPLQ%AdHbfUGN92KVsCtyV!orFnshDC)o`B-))UI9_D^fuE
zBXlhm0V--<JX#@Pnt+@Sf{Voa_^l%^ng6YE0Ko2i@_vVswGZ9F?g#(d+Aci6zq|P2
z!>6LLcxu0Zj=GxqT|k}N0e3Eufgw&28cPwzi1`(MVi@GCC3S(NFO^en@hEJ6FvW9^
z#c2|+-Udv-Ca5G!%c;nW0TUSAr>2E)9Zy-zy_DQ5UL)vTTDLU#je#d1pC#v!kUXL9
zz-h*J7LX6WEK}dNy{Zm}m@8Dm=BaB3it3cC=9qkkU*FIR`hDUJe1I9>#=zLnG8YeM
zHvujNjNizjE1jGA#IJ#eiY7M{v=y}PXKLlZe(-F}%Lfq_&?w9#&4qVsxrJ4wY%nx8
zSMBD!*2KCl;-xWMXUw$4L33EO{LN$j#Wbs+lC@Js>UY_%B7yr;6!c0d4GSe!(a2nE
zg}I9MYp4ASn=W({=e|)-B>eINd;5Id;=XQii@PP&(CJn9ylRf;+rGwH`YPVy^v3Ae
z>QKok=0Wq{@#QECb9cTB=cQq9<Zg(cpHFAD+<R`d3T|3SR&noO;k^5=bKjlj7d?YN
znPaNiv!3GEUeB~!YY(&3sV>jcY?_Ce6HK0`S%mwn)+>>t)WJa~E!5R7<3X6bzq5~Y
zDcNWnYW}jUrrgrOk*<bRNLgO4z=(yBLre!8+qveIpp~);^enUs8C3H!vHaBHa|vh}
z;T+Jf7=73AB3x)Rw9<I0Oo~3{y#RG$Xcq?+C#J62sAW&dREZXt(I`x=V{x1g6le?K
zE_~pl_n~aSly7LL=UFn7ojDiFC<n_GuN_@Ql;I6Xg(xPm6h3FTvGjFVJ(MsBwMx3I
zEH35=QhE@%&IKz^kb7Vi>pUV}kvri-$a?b&eVZwlIr7<>-DxXFm1_ELd2<!t*jQQ+
zMdp~<iOY7J)bi~1HvQ0^R#;#BbbmAIS)N!{3nkI`<Nqtc;9iEk!ytM8JB%KF;bR#9
ztQixD$=75RU7`HVw*HYSt%@5!0Vev(xIbC!lK$iuC6OZkJ4+E|yGM^A+#935z`NG;
zJz~O2IoZLe$h6EgY#2wGT6ce}J3YykEE7xGlAL=QGD(tDU^nv)zJq^GzdMD>uqj%8
zjaYjZNoALTBT3P%=(ZCp?8;Wc64-rpM}Tg5gn0%cOXzc!@@|vcaUCrIhc~5zGB9a+
z_^!O9P2Sk4G8khqqEEvJO>L_$qwsd?01pW}%5e+FBiyqNKw8k%aN#wF$Ah33ZncIW
zG#rHI;~gA&-BI_nxb6v>FsRB@o5PSBXT`doCWIedd<l-9sl$e6ByU&Av%E9Pr6mP4
z8K7eWWA_t{Ar8qhon8nJz>?hqNzCbyngcBHK!hb&S(;*rbi{IEcjek}PAm`{$R!Uj
zow`aLH)wluts<!*u@qi^@VWWpPmN|<U#DjJ)Evw<D;DBYLpq=#StT}Q4!nmkV9Yh~
zxsUzdeh8cR@GKl%>{&4=E*H=L_WjefY9{~N_wf0v{okL|{%`HSzS{rY-~MmE(QY@6
zb{i+p_Zs!q;c=_o>@-_P8LPkia!x}&{Z0jO#x0@vtmBDLT{-N5OYU<ZNVWkfK03+B
zUmO%<R;ZSY%GNA)ICa{!%wFIr?$d)AFXr?!ZL-LrB(h=Vfe1AO4r+u64uCjD<I9+R
z7!y`RMB}?uhSUeQS~UC8cMiueHWVfWWIdHv&q@`h6r^ZjD05nII1O!Sm<x$gUL|AB
zK4#))DWyn#p%ij~3C$?6?@BXA5j*w?*o$8}+|8Ob5BeS<utT0v;x-I@22(qr#Be-1
z>3HFls9&sG+LqwU0N>F;3~EUMLcOyP(;Z7ALSS1uWnB`ZeTU8<H@UvH?mzgBwP-eZ
zW@PFOJv0Ls(9vb%!danqGK^sRyu;A80d~Sy)}+P|mwAYbSYZ|t69Jps0yAfoX7wmD
z<>|`p;FKI)>A+)4M%dGSrsHCK`<#JG{CZ&;Ofup`dul<c+(i3S?}d!`Rgn2FYip_7
zp4muJbfCykQhBFcPLwTV*52I53$1hS#ff0M-lG>M9zQ;sBkxuoXa1uk-fDA@qM34S
zVcElIIDz8}*5Rt#o^T$7bB}!5;BPBlb>3`Sj_yoJu$AhRg5mC${0v?DLuA={kYrho
zY)1PrEC)FBVRlAx=!<$7GCIjmRy0Tcr)+K_1x@EDLb-AfCKS+pn3saa$7>|9hbY5T
zBY^dI=8RD$85CwRN)^N;Mu4886h<tzDKSQ~z*LV6od19J-n_4kE7>2u|6aX{GtP5u
z30eXPPm*DqD+9JQ7lVyN7LOBs8q}B$lA5R`Z0zyxwLi6-wfl5S!Z@=ed~A@q&%V^D
zs#Eo?E?!H+q*;7npqGY;Ry2u5$xYOUL&?yPOmi@Topfn5?Bnj{=9FlRr%LXF#-&q@
zGnL+nr}gmB3)oU`c@CVcd8|OnHYvB%WkcPbwT7~$xy7g9BIdaN3_EVy5I1jts|#D+
z5Z7mbYs`f|(-5~&V2NAA+Bisfxb=2+3|Vf;gTfBj!INqMyE&mTA{CUzej2A8gQqEj
zZDjDvDAR7UX63r1jphK+X?OBP?Sad9L@y|Ektq_cV=s<zVGYG-#&-_-V(QXG%z=Bw
zm120b(v^~DmOAJ)#X|Q&ONqH;It5XAxoz@4l#TGLBRT#UGvo#qM=igcWD0p_xa_)e
za&NctxObUi9M5utk(vc9$}LZ)o;o6{s4RW0i<W&3OzuE-(n3cpZcT|%;J@TLzY`92
zc}@CE!jPG0nWi)nD$-4c+mT&J85NTxKPBg&VOl(aqAZALn<)d?Vi>UirHiYk5m0Rk
zNDAWQ+hhVv*hqZSc-&}VvCp6pXX4F6@g3~4!z2U|Wjo&epBJvdw1rxnZm-3=(&dBT
zouy>mTp$0uv9>{<FNVoAJD12<*<;7#Y!Jaw6S*M-$;zg5)|Z8WE>>Sj3ndp1y2O&D
zpd__P=9{m~E5Nc@S&*<5l$f<NnNQdZ2C7_lJ0eTow(gxW+33yxT>4aqC+!XSL(6Jm
zAxM*wuspy-nP|0CJe$Q;L1q(p1J@J)LO{L0+biWF)0m(=9O-8ng;&W|FCJ$GA(!OP
zQAs%*yklO+%Do(2$TaC<l8huC-}#j!CVuvW6Y#KLAP!RMSPj<N9lH}Ft>&>dx<Wv6
z#IuLu(&XE-`o0KIyt~%XTBl%HEQlK~chw?WL*KAWP|c8Pt(%+}uc4^=``_ieC3J$<
zmQ<$8(*{ev+8e*$eWgPP%K_C&P}hzf;ko5qjImi4b(T>{B~MqYsD@<^g>-gS*VvSL
zPO^=al|IiF38}SH1D8j|JB_n9lO5QPY$f2VmG1erBSIk&u>&a=ZnnTKipY*Pn2+q!
zE0Cx&P7q9ccEt1k$0IVn9S#zx{CU3#|JPlscskT4HYf$E2kB0yAM?GBjEHD=%Dpx7
z)sMM%M70A0eF-0L`2Wwnn!}P2TDRiXE@g!h36wtKZ`gSS1L}kiloj8&rQ~OieFkU_
zJng=rcK=<h2Xfc~{MZb{i)y^u3gVi1{-O$_i#V7+@%59_E2I}M?0K@M-f;@a@0V^A
ziNp5G_TK-o1OIHoLFpn`lta|1Z~^kXkYM$n!%^A?%iY;Zr<5Xl=)zU+JD_Z{Tlet$
z?GGf0FHm?M>CMUOE`Q7Ezcwmn-&jyEG)>8Gs<_|lSz`NBnZuh5Z0gd&Mh+bkc6HrE
zH(0t9kFL3)jY%RN!tnYUMc}Ck25B<{7FdP4WrPudZ9-~FC#C1AWhw2Gy4UkG+TAn-
z8gxs`{~Pa`f(H4-CmZcBg9eGPvxjs;qcbX#`MPL&5_d1-i)nQM1!!8tCoH*D?EtFf
zyY~6-mjl!zQ*aK7^SRSvya&<WP2RP^xW65<Q=PRtjZ>D0(Bz0>?%I*jo{QLWKT=hr
zk}D&R$@HPiy}8HHCYEgOISiv(<wH}g0?Xv2W?<OuY830~_ywva)0GKgkfg~dLQ@Ew
zIfS9b+ZO1*rc}z{fy>V7_#BjQCCO&i>F2N$l91h<lTZZ|F%g=e1(|tp>0$1o&o#pM
zEh%BHDOJ+^)SOUa@eQof4Ej^%YPiX_!{$o(AHv)SH}$3PuCOnGy_fo)Xkir8#-PA&
z<)WEs{a4;kDz8gWYBBqvRFLHZr`h)K4y%<ZH>(+N!+=F9LXnI$=FB16Dbg{Zgm^Nh
zd8w`L(A-PQk~!+8oRtwz9X$dpa~!_j)vnBpp_lhTwG+!;7@AU|iZf<KaHdFq_IHyQ
zcKui#RE6&Pvel-s9p$d8NFAx-AEn}f)V@1f_Xh#mB1`T4w~dWP)3rj%Z9lIqk!_Sp
z3#ni$xxRV#v)T%r^Be~}i9BJ#osE@41}DQ8SeVifkLig8+|JRbM7%vl^!C-&*zo}w
zM$<=pdl?VpDKACQ8v=ZvUMh<fw4x_c98L{5f4thX0$oj_5eEg9=0)Q=BF0xT5iJk^
zn;*&tm)b3)ts;@2G+A|ut2IX9Rb;J1<@|EVv9N3TLbA~Rreed`**N`|##5wC40fR{
zF6%sG>(s4PpfbmesG+@8$iDJ98h<`ArSl`x;Im{pO-AHBwwJ(`o49=s+>teGWrG9(
zYf!WYTeW;N>rFVHVB{Xs($T{}0k`Z{QnPh4C)_|-kW00P`Zd2nHk%z_w9V-K6kUME
zzf>=4{-)yb^r^t2{qt?UnIk{2f<7n%FnMw^Y9`)Q_-V%Grashj*4wu6wqx-$PbqKE
zQ;BI!HDV<dGf&N!E4-4kKfnT!iE|>^OAZ-l6;Gq=y-k_jtrQVwAaJlzaE3XC@ziWE
zjxHNOV*}X3eiDT##sOiasH#^2%<>`<?0h?zO%$&<j4vXx$gODTg;?n=G`b_TODf|g
zj`dktf2>(0!>b9q)l8DJ@C=B?SRn%-qsykH%J^4X%C6n7vqeQ?fbz2^x8LE){2aFm
zWiwlBpHpgtaETU)WlPvr4J_D7DIhLzK0l=TYh<8;pQ`-zqetHM?vS|`Bh*ij0De+j
z&jRJ3y%-G(S+<p81EGh}HF5+4buH=vB6Y9Dg;_`^)Fg8;9*@y;i@9XnXuYIrl01|>
z@JSuAZ#!}n8!khoqzJ>6aC(a`Q88S`yRDja-%Y9b$it&bo_OPW3#fj*WjR<Ip3!kM
z&BprqWCz=n+816cAFWHfd=FGh<#jvfm!W1C$}a=Xdc!R*u~@lq>8q4?1l1D35?-Rk
zE@A)m$ItKUXG#BGUYTehV?8bR{+;Lli=Mw(|KE*o*1!J4|Mx%A|Mw5q-_LjbebMfn
zbbff!?jF$h@7jl_2i=_GZ&9kn-F`iM=sp{{$-wZPRJw6#mt9+;N~0ku3y5VzfCcH~
zGR<SJDJLs5@HeGO%d?8s<w<5G!xfXgo^06e=B7oNECs4!U?wy77Q)F4r>)U{<P?uG
z)#m2QAxs<#Si<_u{D%E<)JK-8I+Co2WEb*~|Jzn33~`RR0**glYdElcQmWl5nVZtN
z*wzzBr87JKA2qt4Ijp5Dmd`BP6Z=))=2V=^wmrwWOWv0#k<>;|H0pr7f})bYOD+uf
zp>4k|qv5rpRGCN<QqHZi;zC{epGP1O$vv50u67>jHf%hvF>6CO%3lXFbV6!~h~)VY
zu}23Id=%=6^#KCQaRh=u%<c%cffW0tL`}GmDbzQ<bqi04!&iC?bE#+AVqvSH*q|KP
zC&^7bAg6qJDP@$$*h@<#?)21@oxfx<JD_h*F5fK`+$$C2;p-%$ytd|IbJJPKfoHn1
zNh(UR%#1}QU6Cq6lbO5Ii;6d?i$BUF#RhTk`0h!B2vbJR%TrPXlck(0dM=mDA-B1S
z^v$@jY-#X_#c9I7AW5jO>6SNs3~Qyt;@epHo)>TBWwx>F$^O1<yJZ&h;(M+vV|L^D
zM=7@l&GKGwPub46v$0ft;{Ds{^<lCra$7CaX4}J+vlMN-1u`zY4Hqb@<Se&eA!pc5
zrRK!kFU!4K0Eg)u?sf@9M?7rS;_Z6a5{Q=^;y%Q{r|v#IwqLo1^Pjafv$fikB9LCL
z1nrUX#=06O@5Z=4$R-2B#!cYJ#+<B(&z4~!QA07xew2K<BvQxU#hZV$-~6MGA=BTC
z!uNg5GYQW>!sGY&{1bfQ5AforiZo)GC|rv7@n}W@RdPe8=1`j&tLSgsCh3qxqFik7
zGY)3wLpXRtO#?H1kvJ-(v=1k5(drq!xag*sM%s*$rWF!aXObkwL|W#x!kH-}76r(i
zMx(fHp>jJwGVPyN+RFUc@Dk^<$pkM`d_dBkBfu^wz)4h|ZdBme?VR&C$XYbzSB}+e
zNwe?7?o_-HL>6avRKt#NE*Ir@oJ=Kj*E=$~_jYOb#mbmlYCr$t)@e~rc05d{Al#(C
zPKf%Ii#<}Zu{jNrpr3YrcyVCR8jxQf-io)d+(Y1RaAzY7&(w2PheJp_K>Mo+0?a6Z
z9Hjqui1{*pG!^5BNrPHqNJHM!RXB9!l1%OKRj6;#nyK_>2+<c|9C0wqaWZ>**)Y@_
zdt8;m*9!k>$aPiq#jI~o*K3vk{%<iKRgzM<CoDcO4acwLeA>Sf;m2;)UDKNeYOLhW
z_Ph=}lpszEXx?X0FVk@^?OKvl-HoSPoUoMU%u<&U?rkwTQqI2Jg?qDT%>t<lDUaNP
zP6r$3Xk6Mp!;fa8ECe+A$NkXn{Qd`*<*V28E_{}{|M~OR>yKNR`=9l%n}6K@{3qW3
zG+UxQjNV7L;{TFKbRCY98?k*A#{BPpZ$rFgZ%k(Y(bqR82kpcDtJk>;8}mGWMMD_=
zcQRRH(-i)tkV<(Q+s~OXvMh>87yRnA&sT!Li_i?rf)D}Wz%K6oLZ?G%-li-YKAnq_
zhQj&y>8j<=2vS3KKhr@Fq><fy5?IiNE>|a8TFJxqY+7iVUGbVlKovUZmFV#!DOtaw
zdS2srSR;^g%)@xJJp#x=w~e}GL#_EVD5k4Za9Pl&G|nep6ydZY1Ku*sDGonM=s5N<
zy1yu#Q|{cU0s%eMFsixwD~^K)iKN5i{3_o<2~wl&+-(%tX8r88-sGYvJF6dB__(E>
z@Zc(^)Ct1yDmRE2Cn}B4U^fkBk;D;O(U_=Ccq?nQ^!#ixN2`-FUKbx^)O#5!hzW-@
zV&`(2@0;A-_qwrCVWN6VWp^o2&t9)fV5t`GXfBG;WYp%?BL5srRUQsWQ9QwDp7<is
zJPDkbC$C=DDq?|P_WmAZkXE^*IdH3Up;o)nn_aEU9tsA=9nqk$zRsess<32^Upg1n
zh5Z`n-R;F<#iL1&%FYfcG6S}EmG-K*`{4WwW#BVBu?)@Bq)Jv|FO|G_#gmqW<*_8H
zf`rn$NfCPlC-MbUgJzzEXr?IFve7X?*B|(NHsl7rrC><tHO6R5*p<D@a599BJ#ARX
zON{J%9irFT1E617EtW5S8EgMke8C4;+g{^I<3%z=H5e55tnq!LJB{7}Cu9$wfv#oQ
z%)+hpVcA-m>P#~C0u}|ziVwiZ17oX?ZN5Uct#Id~&Prb~8+KNR9H#4{D75I`G%U@U
zdvCHXghNZXvh&(k21`o?%xk4DiGo?$%8cxyu(Boxsr=+2vOUj%ZP%<LUs-ntsm=N1
zIrRkjnJk5F^`+jmSWkPk75D<)M}>TInnR482ZpNbwY9;v7vxk$q@`=)x444~Ay0G1
zQ4!RsUE}?No!&3n>3zeUmg1c{)35;IsWNQ0kSZnT;b5HRXo3xR+7z4l1U0c~VX}1@
z`c&jlPJ2Z!UKPD~UG(BzGPxoVj}ol3oR_2UeFvr{tv+5~UknbDO!Jko`vRz`h1{S8
zDkvEjo}TP}VK~XQQQXS|z1{Gqe3DD~q#&i{e*|(vk_BoTnaJXKiniBsn5)@g$gj3S
z_O`tcdn*kYvx`i5X$IPn(_L%j?_bS%|9Z}QvjA1P&sl^PQ(=Oi!{|H-EH2;R8u}Kh
zf=0{2&g7;?_^5}vv1u292dZN8v+E6&X#<Z~otx`y&r7A;w?0>lSt(dV&qlOT;=(0x
z{8S`EUMP@M%d0InPIiUbRppg|k(@gPufNO`yk2|?UjHXf!RsZap!;Q}pu6}Kbidpb
z=n?7uTa8HXHyDv#*@);1>^TScduvGk^$+lcv81!-a#_6S<qqgA@AA7&CR0)$lHw4A
zt(!QRDSm==9e8;l5vYNEZ<H-W=uDSWq9upF5GW(Sgh$C9VROBukuc%fa|2<iQ99#`
zgy1M0uo&?L*D6-F$OazI2-^wPs1yaDx>qOlVSI&d-*|<MQcDK2Ub$YH1ix9o-VT$q
zaM;J1z1{0Z67h-5;Ls3HchIz!fiRV64NoS1#)~7PkTUCpa30T-83tD(Wio53a-*V_
zei}_*Fry|%hlfisZ$8Lr^^UMY_~SgpX;wHnKtCDmn=WFa_3+oLz&_(#*<Xrb0Z8x`
z2f9_Tiru9pJJ!#(*^imyywe+9J(p^mQ6CLbjxVU){IQ#@&RGSeij58I7ONyS(sCV0
z%Oq{hP1;hSU-C)+7M>?(@kzpirW8uzZzrj4E4g-GYFFc)zE?4B*px($5!#TXSqJWm
zcs%IB3LkbE2kc-xo!lx2nORZd{Y=V>K&BYFHK!oTPmeDydhZex%vHH=qU%|Dxu3in
zlW#Yt_@K%wEO;7S=jskDg2Dv<6Kc|wc2!-pYCkv?uTkE<;NE4NG|Q{MZX+0xCWiKD
zIiQekCfDp@2>B;Wdf+?!3HN3YkH{CltNhM2p<2`0p;sq)ZUI(4TJ9n$Qv{u(+wPNq
z?iVytrv=&a>coMA_;u0)Lff;T<9pBwcq+pjzdEVf?+TswyU1xzhqpye{3P$nk}V+C
zSLQu*)AXau{dff=T4(Pv{EUTx(|=_dB3k9(xqysHDmnuMLY&@OXayE6u>;w>&ghj@
z&MM4H0RQds>E;m|>03$EaJq#?Cy1#7Q*7Zj{j#&ZC4Sl7fuidh@M(3`mcVU*CrZ-X
z67lZNFRLy1Td%vqI?xL7Y|DB8O}s)SX15v#zt|R<7|?(g7+Q?sJMpI-v2LgM=-6{U
zBUC2tjGeRIguH=p%L>Fp{eVE1AeH!~R@-ulbdz8cb|fvYt~b^_i47_dj>9jAB$QbP
zSWz52^Q;B4ZrxH4F>2XTU{Gh*16=yAoacpWN-uk>CdNvVRXG)W^2}ck1_;vTX0WG6
z9pC<GM~z=?;hD5bpm}slq?zp5(-iy0)P7%Y&Yj5IC}6J4C`clcT+=ZU*A~oV=H~Ts
zbMrd43b_1&Rq&~j{q)H|l(>hEmhhxo#9@<6XKgb8i6v@B+^zRAr;sxHUhp)s!C-uc
zlFK4Ea4_~+B(?DNVWdV(@>u@K*4&u?wN3sQw(b|PO_d>X-7h{)S5>#FTT}f750|%i
zOI=+LM3t`SM)A1DdU_y{HOE3WosFv2B$_5tkv-I$+C}~%uZrqGx(bWYv!0vw!z8(4
zhtqic76gD4*(f(uK*U$<K%FOO00h5q^Wch*oI5-bRVZS;w&b+)(E8?m4*TaU1k2eW
zJWE~95Sz9GIn>j4muPl~9TB_cATbjtxpd3~Wm!iE^*Z3b*Tic6ExxliA8zFo<B?kH
z%HGc;3uPq$&gLzf>fQET<wn<OJWR$InNp;76=$#&uWUIht=F?5easYY6;THN63H!(
zo?ad`2}*~MyVG~5xx8Svjlj2RE^sy7|H3H|1(sYrL*L@r5f?U7W2UW_T@qq@XWp#o
zJ^lq|PmA_w>ZK=*Y`&0`v)gNc$eGxQyvHphz9(8<By=q>9Ugxsk2tcEr~mow1RX{0
zV8_RVxKa5G;rA>mICoc@PBp*m>{z^ySsHNG^Jv3CTJ4G+St!;ju4c0Z(q1nXJFd(^
z`L<CUCzhsAm^nqB9)%cd`4~?oH2m2%EXghnMo}FW1(US!+CCfqTwAF^L*XD>uhsDb
z#{A0X`4#e9uUrG2<TUHG70%VnP{?0*ch+6uK$h?UN+3tBUuzjj0o7PJ9Uf7f_LCTn
z!C^Q$8-$|c=F*O0km8;6LPOtHyzh6AOK?+!gB!FN2;3shEyXBVaE>C_Pt&MOvcW!H
zV;_r|TO6eifgUKQyU{R>%5v5VbLJ(HxaQ#<SBrT^j})?G34RHrl*jc-A6$u1FqFv$
zxVu-Iu8hDtK07*^X$J-@U*~FG;&C|rMKk$1{W)Wh=kizcvR<V~My2i+3pcJLRGRox
z=D4wzquH{Kn(UXrC5(mQ1{%M9fE>8pdeslX2eOf%C*A5W7?M<n>RJwUB}wQ-qj7J?
z(Sqtd=X7%RceyewCqaDpu=8OHes2%)_v)(mq`*drbuoZ;>BmaPIZ1IVq9pU-!|sLn
ztEhhXfV|Ttd^dAeurWSB-=J1^XW#?**pWE0q(j%8BLGk=72+oWc@pyB!ySsytCiAG
zbBk82kK~kWHXpgs?Qx5h3Mw^Zww41cz$vlpqAGKBQmP<FEhRfxZx)j)Na7(qJiR$h
zgH5t%+!oK+%?*30%H)3N9F2XUJ|jzx&=D*ilSjIo9VQ%=(t(FG&X{u^OC^xk{7@2W
zgm75rr?Ezm>)7#(U4GokbiUc~l9lF0%R_K8EED2SOrvWeQ1?$;=U;;?!1OPA+3-FP
zx3Gxadp7ydX3(+Tot@^om+*cc58<dmVDE4sE|YVSVq~O>u`k26NdIWFgu`ZY$E>7_
z)%5`W!|Do~t=#iw&E_Y=mYcJwa<*Ez=SZ}}4flB~SK=Af*(<5@tl5m!8f?NET+1e3
zcayhT#mSrb9)0_b)k0jC!+Oh2@t1F%6zhD<+f{z^O;MFlSgydcZ@(>0-lWoVqDiWB
zJJ8-0tWRlkpmi%)pi=|=46lcV@HJ5eO4EEIOp~T7sdclBc#`mZ71zlM{>K@hUnr+s
z5A`!j(prH<A(Oz+TNTu~0D9Xpg-|k6+bwoXr^{5-G8MHjNz0TV+?rbRYeM0S(rvWz
zuWe@0-gjSn_gy$<bo%K1c|_*;=mqI)z4v<K)5;4joP`Kq>n?4#7Xf^Q9%pI&N@khV
z#F{k()%+q_af__EW1iWr+2*;!wq80+yR8j08~GD``JZoV-XV+LPbbj`gU*)^nn9qO
zgyTUnQo5&V9rjAlbPn*_CDVUcDt)Q^Ku+7nG@AO6Zi?#r;5PUWtT!4BWIL+uCqLJl
zxSE@yQ?tId@N3kxi(|LTA$vGN0u`3k<2Z>*F=>vshhC%NJ1mw8uuD9pqQ0Y&$Yqky
z;w89LIy*C#n^3NF+C@yMS-NQ#gAq)y-Q4(9Y$CQFx=|fASBI?{cdJ&GXAn)RFKTrE
z4}U!wy5x}~otx3kf+s3Urvwp#Hl=vl$^(+Gf6)(OGNG7rs7?tmJKCOZE^q+mwr3#U
zw6bsVN1$i-1oWnrf0M~w`~y@elUp5@&S~|nlSplKIBCEAOa4vv9CK61ocQK1**9*r
zXL7uHx$!!a<JBu1ui~GxRwl0=FZZx?5}6*K$sYF#dt6fWUov@vp{<)hI#d;K)_%K&
z_0vJN27^#rf%En+Yna<S!+ijwk5B16<v{|jKd)5J%zKp!HRx6Nv<3^xmN4i{UBj`#
za&^6D{4hiOs<7tpaouT5E1QX%NR4q*a%aplZ=*9O=mwRUGxU>6hP(#hYUg50Ty5_F
z8w7s~Gzd50Bo5EeaTRmgq0%PN1%i)S<{d&CgbBLQMO(I3j$@jJvp@!`%H-Qf#pQ$v
zwLR(3idX$BiZVGK$CokbnY-01Sd~}m>JY!S=&R0lIVp;%rCVSd)9VIfsH};l(B|M(
zAo6avcI}%~DUY1*$Sc_JeXR-&2<SNlaFQbBBt>K4AzRA~Yz?oYIUmJ?!7!qVHg>C5
zwXD+QyjBtq%QQdW0k(`28&GGM(}Mgn>YbxH$utR@Ae&SYcx(9<T-mCXjAX4=kxh2_
zc~d=iq;7kWzV3g#YJSSB(;gSOD%Yv}YOhkUy=@UZP7QKN=27m!sx6Fs>9uYvprrUc
zK`LjA53W`pvzZu$-9tdVN%GFK<ne~;db`l3c2`VC*VX(K2E$2KKh(8vr$*Ou#(Zn2
zMQcbYuAn5K@IC^?<yfW`I1!D@C>&HX)q4|&`VOwZm8pt_S-8myS_S#5vH-V<Mj?|K
zgPvy$diOO3y?JBMTVf1)Ut$bC&8I9M-2r=Dx)7UcDV8im+ZNIcyRV%qRaCM0=qz=+
z<{k9cWRQbn>W-gzUO2=hbzvTB*RPl{_6??^BxmYlGiCbe>U?_G^3lVlO#lln7${Zd
zet~mlt`@S2Q%dBhpMC1pd@7&Dx%9lsX-{RD^Y0zFA7w))TSBZ*f#!k>DN-HqtYywU
zu`N*LW%c4iCewzMiLexhNpj7|$XAw%^i}`kxSl)eum!9$Z+Ug*(3=9aI4hq~Oz;k$
zJArc&m*~6mmou`MVl8{CXqtF^dq$yGdTU_IHG%W>OI#Y<hweYKcDbd#i<?iCg>?OS
z{r<x;Q%#>oTt6Oro#N=Z2bZbL;Q$NEUIN%|SfWZPRM29~^zfmbi=SD0>LIrzo8?J)
zmIp_OUYrGNlWL_D2NLqY8Uk_afoW;hM~uAy&BWRMfjJi<SY>hmwxr(!w$Z#bN1qJ7
z3@un`S<Aw5EMirp*&A@uOwi^<T;pQewA^=WM-yMz%yLH`vYTP;-eJ>*Tu-Gh2s+nJ
z6JrW#{Gxgl$G2a*2|4CjsC6fQgFra?ii^zmVRG*4xisk;?ylpUnnc^i=_@Z~U)UyL
zj)CgXJ)h9Nhz#sdl8|AU`JS!HKv(hjZOvHdQH*hh9ZNNky!6I%4u*@}^_oliE`p1a
z>siW!wy3D;UK7V-iB*4*%*F#R=QT98hQ86uvmjcH&GleokllTkP)%t`RavvZ3x|;!
zA}7?kLtCKsqr|b<x{anqTxNX10YV<7Y=V-Ejj1Fr({~YJw#8{ehel{a9GAM#n5Fu`
zZ%KF&3~O1Fu@AdD7sb92&|qq%JEl2XoDV^^=ugF_Ww{!@X9aRPBk$>V4D{Y@&|NVu
zw*V#?^^EkpMnFQ)%gO8<s2amVT0TbE!WmVtq%4AaoDw|Z%!Qg%;TOos)y5>!WmBy5
z&|@QrSb4tP*l>!VyK5M^z*=b;_YL!6bv5e^V$oRl<)<<`hV#n^?Qt(?oyj#j5NFXv
z!bZ5~mt+TJe0+!TD4v=_1$|dR5c3l-@mKBtz3Do5+a~~`zGFo&+hd#Nd+dhMB_uF8
z$#RxF%p~zbcvQ47@Mk55MjZu1zbE$*wgkX@-S74rrV0YsA%g|!N<D%}L;&(zQggm~
zt#P0R2^_-|Ye?a4;(b|ouG|l;OQ1FW%+W)v{^0tcHHeT^ytxlUjJ1o=NQdDWRM)6j
z?oBxQJ3DfSJ;y~d(XMgpTO!_;{&B0Tv0v<Uz24AFC??0Z1>B|zymNo)4}oP2;4kS4
z-8F07;{jXOhr#dvLdEpcn?aRatl2<35D!&B4`oTN9U_fhoAw`vRmREWCVA<iSFB)j
zPp)V&uPjQgnv?a-TBh{gvZWjQN%}&jkJcfa9UYoj!|G<{uFU$C8{E)nw>fXWBIhvE
z`jX6_GR>S%xGF`i`x?++c5|U9(GD8*ACUil;Qzb${{(ZoObCE-{Qt+_JpS|J4E}#}
z<Imsxf&c$U{AXYM=YXOJ{1F72=yP}P`RR#2kbrsx*(8edgT|T>0`?u`tjh=fu+Vo4
zIj~})>tqm<%?p3P0ZQbHIbdsc-kVbHFV4V9VFT_EX`m#~05`la%)omDtSSvJpvSQ&
zz`!Z6^b&7h+FqS9uD(Bt0P@!y6Us{U`9T8iJ4V3$g$O8%5P-YX3;ORz`nOBV0QR$J
zrEJ|u0~yYVt3{=?0~t7RmYwrFh&|!-#!kp<3))o2L!=5ldny}$YuQ)=MVqx^q6z3_
zQyxP=(}%penl;k3%A(-`e2vOS2>AQ}0jC@VpyGrGSRfLBG+yla;s2cgZG+@|X3+GL
ziv@;Bg%D0H>|`oWE;JyF@wxQ(qDP9*Si$T{;QYfucs(_dD^|pIC@v?_#m)nDa={VY
z7{iupyiIP_ehF{FF?0bsp2C+=xb|T8Z|xWD?X_@MR2jY=dO3myld~*!JH|N3gUE><
zaM0<#Z14SF!-?AV`Plp3SZ)Ql`%HFGM6_Y|7XjUOAouSKxPNE3{e{5xKND*IbAa|G
z%>J?<`}YU2|M>v>{~UPz-yU3lF=%~-dHI>Z`aVKlgspF?ykBJfdj;0NXI%X<Sbb{>
z768?kkdh9fJ|8om3##wvu{H_1aasVOZ?Gw+r{yX1y(+KIYC$r*2rYW&12FeDB~DMT
zrwLk<lq8cd>B?$B!|*Q#!e78fd0~84M8yZVD>n`r2|Kbx@lJ|UDa7Qqk`KxElz@Ek
zZXEdx;E{X8#GG1^R5aMa3lj<UOO6gbn~(Bs)#Xt7J>AF`>JM^6S^MUbIa^fFBT*Ph
zc20g73(0JwJV((PL#f7s!?Vwrog*SoF{Mq8hdP#<ZJ2SY@QVE#OfDJ;{x84fA^<o$
zZn-SkE$HMf+KclNec?^%nlGr<=n&gFOu?ybkK=_grhvwIA$VN4N=2S5FN#Wj>N`~@
z$bryh8m6?j&;k-dGEw0WIlNg!ki|KXuQ2~*m`eP6*sl6kRF^k@)7ru(3%hX0>4i-g
z*xEFJ%EOf|s#EJ^Cr+s5KkXJ60{0knw{H>V`>BIE{jstH;xvyzDd=>j9Cwks0SRDD
z&>WP;Au!bzg(J`)*I3sgE51M_^o7=a?>~Fp|E5c~_eGbky^wlpAqi0_3R&<wV|Nw~
zYT(K+1)b1){p?nHMv<w9+V*lBEcwN<`mroJX#0?Aqv%3_8ev|VZKIX2Q-tkeLz4kh
zX%~7FS|}wGtnn$DGeWLcCq{fR<};*sMRgPLMT2C9mPX`y%ndnD*aFW{+G|a?$cNj7
zZ`UNsNv*68n172!u?fMDy`9Aan8<i)_4F1O%rv};#&Yqzx%mHDe;)t5fokiEVRB6Y
z9NvUx+2F9?VHIjauocYIT%VJPmeF#|Y$j;uBK&J6d}pDYSU`LvU`TCAc30P*jnZ)k
zc6Y0?ELjQd9UvF)b13dLhFtzx+O)0+n+kBM=jsZ_0odiG{k%BaD-UP*immnN-h)wj
z^<57xtmnYEb71J_;#ICmW(@XhT{ji*1CS)CI^ph1<Ee^EddkR-qOdE1<uTX|Q@lt7
z+AteYUD$F%T2KirF@@FM_Y9C6LR~adoya=7%ZQJlGme2MMp3Bw2q>sCbyfZhpI*e8
z^xV<OEe_Mb?=KA_rMI!Xh*Lvyo*RE;#nKsRkYJQ_VfH25ju2I}E$Y!9dLZXn-p#co
zEmfzhxeIb;=jgb1fDzGB!!Zp^_!!gXEs-XHG@#W*=9Z)?RcW7Y!gz>Y%7M{^rI*QU
zIFQOHIC~_Xz3O=N6+-lDlzU|ZXBxZ!K~Z^%2nj9^lPNH&;bZ8J&o9CyGfyYU$gD8g
z2)9f{Zk{WD2)AS%2QCmUJ(A4N{R9c}aL4x;B1n$z-1tCWj4d<~T=MdT8$f3^Dh;6H
z5hs94!xo^0YkUziTclQSFu!2vfYuc7qnFX-1g)#%k=*B~K-ASaYU<1YTkCJ3h&&%F
z3q<4b81$9OXl1=^sFreT-ew!=Hp}Ab?Iz``e^EBTNSr6hyGGH(>D6kcaItA4!Yp69
zlVxt^ZH86p+RtYbpq*3T)?t$HG<>|Cna312$+;v4@pxLWLUtf0aU(h=5@xRCq9Pfg
zDEn9n-Px;_ympP#6`Yk$G!|C0ZB^!lQYsFZB;UCDhpmb~`ch@#NJte6zryk#I=<*B
z*Q6!F43&ipnq?Trz(LA+2y!z?6?RE7X&0Yc#TanBAe%eTJ$uMP3J$PcBvM2v@Vq+d
z8t}c?fbPvy%REKNL=QnCva2F_&w?o6H+H>J-kMYO$ZmvpwC2^RMr1|kr3%gX1oK=e
zcVH|P7BmITJ2&PYXZ;(OkR@t#iEl3pHWPy0wNv!e(ZCGSbfHhq9o9`U0XDEK;+~ea
z)*St=T#YwQlzS48p}I|A$>CJL+!0E7?aSbhqZPL<YMVmXgFh6FBct%Pqf?CVA{gBp
zYTSw{W!iSf1)IXh1Cuo!qp9rffLAA!Xola`<U35{{BpEwjpUB>(?T!KoGw*6a!UG}
zJz^QFb4}ph5zU5-{zY<{E96wv#YUs(N@0ygktaiy*s@EsuK!4E5*1Ovyvmv+w^}8)
ze(TAr%>6}0&5>By;RUe8Dmk~epO)UP3$x^R+RR^)W2ar8MK(3#r1r;ixdGhe%Bx5x
zdL>8mOYi3AdOLeA-IQ0c+>F*f$q{F$=wUv;_EufjOVqk#kmCY8(;&V9A~0Y@|0E&<
zhiOXOI7ZDw7rynN(P#wmu-+sUsdAwmWPE7PqVo_f&*U{FoJldcrpD~&=@bbssmz&@
zkI6D!8t*D3auA5<UMOS-w|BrkB=6{6NEb6(ZBjw#@`72F_Tr56epBj|tGR6GnT5)C
zgfqUPE}DjpYp+-1xvISzVFbogQrAnK(+&2?1x%A^IPB<B3JQ1!XRl==D(SjpMsW7H
z?iBcC^r*|CP^KmidU6{IXt7|t=<)TRI649odg+f1<FKm(y?u0}vJMNquZdwZZhU*>
zQ%M72<W(CFc!+xhG8OtnyCF=cHg5hbz>GW23-xqW4VCMGjh%9X*Lg~X_SCaJWV(=<
z?UkMNfX)Mo%Knx5Vw)~z8C>QV6`$tvvD})BYj$aa%G|LX{5qrvekM?%2jIOJtH&)x
z06s~RQ8Xn-Su|<_accrBDjW012onBmZ%@1nCu2-etsgf<{d+V(uJ0f1A3QmICTe+j
zJAX?s{7Hal&6<PWvVbxz?w%%;IC{;=XZd$)<mk^T?{|qZWNlaT*-3&kZC>pfp+6fy
zNHOD9)|}LUOgpyOk_?c0FBJsH@7yN`P+_A$xC^O>!M7QEH+BU72WnxuxxJH7Zus~#
z?cuIp=6#=wIk`D}T$Zn?6hzZA0k5)=_1IqDYY}YoH4<vQ%u2PK$I(RB5<@ak^Q`k`
zi;uFWogZEt$Xy1+DL#+!Kp|881eIgBh2D|Ab`t$Mi!mftXdi2+s(;Ui${>>5sB<F7
zN=cnw%?GZN2ebRLs&X;x9lE&gC6jT)7J2n%z<+8D<q}c#N5r#QBI)Zwf&QrFtM+)^
zIkcZ#Jy@b=@hlOO&t6{W>Jn2Q7F|>(jGAZp%nQNiV{d(7_wph4K32S;_N;@{A8pS~
zU<ceYVZQQdIB?$1!jbbVvZ-UPeX6*z)^byJ%J^SBX;nbiGaOoNR%aY)I4=D_FO7w#
zONU$V%+_bBr?RuDqO9d7cs>C!oP9aq_TNKswk6KR;3wjfIU7PXL}GQV&e6XHae6*k
zdt8%uSM-&L^v{Uu`*>v#!I3(}M<HTk2MCXiUk32@?7UJG#0&xTGMpQRxFuGp?&Ilf
zx<SFs&{EqFBr7Qvu_AnhE4KMp)qyqf7;57d%Z^qGSxq237FU^1q}y*NlrrJ3$W0sc
z2+a+mHo%&r?AFvV*l=t8s`gp|#Ka_-n!R}KR;X0`$<!10EIWkEvC7IUl$u=bdTDN3
zZ)5;+>4ZQLEg#AZN02rQR+I%*lLhJUUo&4U9j6F4;~W-W=#eYXTFtNOlbUQ2GnzF~
zP(9~!aq?;&ibfvGw2V+<wHQJqW)kjDt~0EbOvEyBAh~xlXNZ?+_d(|FOLx9Fz0-MY
z^@U&XD@fk-Bv-q&AWO?%Lm6$6WLL+rc?2XE);-2Y%7(O2(I!wlY+5I>nogh~nKo%L
z)Q<Dcj8cqeE1dK;oK0@kAx28QxYROMKy}>92B?8-seM{BTblNNL&QHVOx;{6wY`h#
z>y5`7GK0#bbh|Iwy~Fksn38eyu530o{QOZhJ8*PN<wp}JyJ}5K1HfSc)F2qZY?hYA
zKOuXTA=yeWPt`$?I!KkCR&D!QRk33p*Y#a9H*EFudMAs?CH9oi!UA;d_G0%$>nmB^
zCF@(aNmrdlQ}V<K;V{w5>MvhEHj%pUf06ts`T2N+9>wfCX6zan4-3&mrm1w{mZ}p6
z_MfHB`ys!-={;kCgkkdCkS5TVmo%Fo(zB*04R6>HdHlAbggwF%Mar>-qYK<hJlqoX
zdOR{4(7CbejYc_=JPI_lw3BvHv?8GpPjcS^ZaT?tGSsX(q5bSz;<&~$twe~bQ7y-;
zV(T>~{Y~j{#YL>DM#1k1R-Vh+@?~w=^$@WekzR6W@UJY9>r;MwhL@>#==n4x=JZAn
zn>r?L%!qiWh|0V%le6LxAPH8K>QSXa=l$o99-*v63XNYNY&$e3Cw_XUf&?!CStm;c
z;_*!~q_{pTbq+Uy4YY9%;}JZ$A0qJ2y!0oU;M{?$-Am)1eY4GboVlZgBv(7!3Q~yC
z7W~~-q+)gT%FqYu6g6Gnr{UhbqSZZ8B8XNc-<XBrW>Pa>Le8?X(2Z$(CXhGU!Bnp(
z^o45$V?}W>#zESLyG35u`hxo=8=V59b_60Dn?iR8pScKy5>@p^+3ewUo5NG&#{rG~
zu7C-Nhs7uDl?v)nclMv^aOMW&PyT@}Db@*d!&(d|%RWxEQMI%UqYa5C?{Rhf^F~eM
zS<x-w@GbD*)60>|7c=hZRUqKs274XXujT}8k%OPtk~?LtQ<;_?5;ot}>ITv*5c@Xf
z#bqh1#3z|*+0?F@{iyF+(I>?+dsk=iqHp1%w?tb@S>r7GOtDRS(J!#%D@!i+j|+(4
zW=A>`sS`D+_gS^YlB*=UMpWqt>DjvDT$Ly*GWGD9xLZ9+fI1&W2#9knu9M+##tt_#
zHfh1j%zAQh!FUE~CCA1i50dBP3nnLsbE*-|w;*kTvPz`f`bI7<9pejNVET~M{=k1U
z_>cX=HX`yIEP?}Bj{ms+O>@1K!GCN${<`@G{^M_g|F}}IF&57WW3h?M;xt0JcXFNZ
zX^j|OLk_WtJ}AJ_-Jq9a1<lzM)FEMQp*|81{IN(GaS@-K#p95{p+`Vj$lWhtEazl4
z6{7?;U3^aU1Qoh|0IqfvPtnuydXljHk#<Egcb>pQ4kRKAxq(L0O=!_drcHF)Asa)`
zQll9;P7?AWF$QPJO+-kJJPdGX#OKoe2dT?2R_=$UIwj!iw&0b)FpNhL0^#JF099Fo
zpqc=k(@5O~t-uD5x%-l>&;VmLiWn9Knbu(NrGFJ1+mkpPre>TaRAnUW4p5(tEDtUc
zRiHJwg@V_pIz`H+`-JnsWx<)CKy1z;M#O<*1ITWISb+(8&Lks5AmgEdtkRxRUDF|K
zXL2!pM|NPmfGT7!Lk<MzXpoOfYl5q1%<CvksRtF&d*11a?(x&!_wADd0iR!<9Dmo@
zKiC&feuS3?V(<9nk0+gH&wJwe@!|f#NmsOw_TlMK@1*nOw08^-9<;lV^8vo7z^@<0
z!4EG_4!T`&d?Gq8ULJO!G${V0ebnn5bYUkQ?H!)(caEL~0!lzfE^*j-(dj|5-f=*+
z%4`)YpE!OhUL2h4J%_LDC!NDi??-CN(@yUQD|-sHwZ+T!Nw2eade}Y@FHcWi9(NA}
zbgUxwJKep*cIU;xego=<I>o_v2S+{8ecnDi%yjhl`=f&sY`oLUCkN2V_LIW{tcm-%
z-#Izh>tTn?r#%=vXy73Thc6HII{4w>hXd$N`{c(!7TP^{bqa~#rPy!3Xg@pXR)8NU
z9~GFFz0;F}7uaYRiSFr>Zm-ij?H!0`$H)6L*4=}X?>c)2T{zVbkGnKrr`>}9>gctp
zYA6FntGk7tpPY6(G;W=v-oeSq>C0Z{_^1Yx{5=c|w6YCZ_i2ESkGQumNXI8XVyQR|
zGy#G5{`mnsKfy7j(Q4x;bYax?dUi6X5C*YlcT*f4JUc{|V($Q7A7e4!ce)2Pn6OS4
z6F`|*@Aqw}^ptvy^91eUj~y5_wQ>WRPSJTP+WX&iu;nr}tblGut`-`$z2|bwarLj<
z^AC~)bTmQKtf|<PsAxdtFpvtV`j|<GI3X!8q`C7riUz#%>DF^LPH#segiAbk??9cH
z0F5uug_X4uTwTfIl;P`|B*pl-T4B^h8il4!OeilTp~T2|WKo2!eb?wm$ewI+^aQoh
zX@IJg+knlf!i(OIFJWn@ckVYJQBONRWWV=M11xdx`0(WTd-IaN*Tfs6o!TvR^cGT2
z^(JTcy{+4&qEwm>y5?Q6`|vaJevUH;RQnPBFX_nvIWW(V!cg?lOKj{!U}wY1X`k*3
zwoS6#Y6e+Jqyw_aLz_jBDUy@+lgWS#4&BM&)aRGLyZ(}#-2hXS!n)acbo7YdaO(b7
zUu7r!tFQXqgF_(SEs;hTfS7MqFb@@kIx{mme-)3udjwlsmn&sTX?s2n#JHAg_;3Kl
zEIJ-s9vtG)qznBWL~?v{mieA)!euJyc`KQ>(sqH9K!%l5znfS+Z$c68;>-g~CCwYv
z$Y<x^tFPWhQCfB8H?U_hkTU~=8;GjCpI2(KD>9fSjzJ(E@*>Klgtn?w**9^~8#x=M
zbeKrhDUhhCUuXzUPdsLxypvGh_omF$Q*NIEYfo#P`RzR^fn)JW1%hB-NF5nc1Vg*4
zPXm}@0?U+Gm~z9w?&`m2|LypsR$SP{FbSs=CSHAliR<36u%eb3lulU~Z2CL5tgNuR
zX+zAp_eTtk-;w^O8x1eoY>Z_=DbG;?mFj<*>&>s5S^dxB_2wV(KmIf0f7CIC$7crt
z`6K=Za&b@F`+qw|i54Gpj}JkV&B-c-$(*<#Jz$EYF}svb3PXaFBc@jv+2>g_jwXDp
z`$@)@UAPvIK6*;$^yF@eTwfQjFNlZlq!MU*S1=ABjQIp+vk%<Ug|ry$Te9R<>0P_!
z%ZkAuGmpBnGxc=0l7&{VyjQJp>?598Rpoj}fgu9Bpoh5p_^Evt?l^?H;>v*?AkP8%
zi(+_e*^PdF$QJ*!W#hMK#~RzV9JP|R+)nCOGbcI}!j}o-l8<(=YycNIvK(@vAb?9-
zHHf3G;BTZSRxe{zd~)Oyb_S6z8Wh?Q)gC5x1Pq1*n?k%RFY1<7kZiieaxe=OQl%kL
zmGiM<=1mYs?Z{2j`-Z%9*O5%<HZ7$dzZr(3GmygYGnQ%K#|ZXL+|X}>kVRvK_r@yY
z4F`Pjc_$RLM>_Go3vU@do5(O0yssu9G%;8dE+i0NHyVnn$Bo${(P*KV?@VD<8SWZW
zgBPY!!%e0=0RtndU_9SJyT0QHH~6SZenh5Ixg`+U>&mo(`)k6m*5pLvTr(VL@$z?r
zifAJU!l6!L3BYs_;xXu1cBZWXJig+%r&QWSn4+C54%rCfqK4<^vq?z!ry-B)TRsKE
z^=yKR(~i-ro58C^hZ@NTw88T8(K}41juCqkYkPAh5GQJaA1vQD_+}e{#HbRoI%we3
z?i2`T&bFfB!li=YP!>P+^#Q3tidsov9PhHvH8c|-CmS5c@myc=WVc{ZX%XL2ht47$
z0>`>lvKmsYdCRlo=#)jo!M(KHK&dD~*$_EPgvaTjWAji3^?4JG)FLMhhPA+Dpzu7+
zdvc@brJfrMjEuUUD6+s2=dw_4G#u_LYCShMogw~GL2(Kg1Uo{`-9qGa*%<I&BP!14
zhPBBd-bp(s90gpzIxJMep>Gfi-3~9H(Gl+$9_LO$ZFIx^;9kUn(=V;@@<x*JJyfA=
zc&Gbg%g{Yhni;6fxcH+<z`tMcVjb}lIX`e=U#X8W-bOWQyloJ4A}Ps4Vlaf?oJ%cS
zEzo!CHWNzu)oD^ti7reACCcE-<tDNqRFO3IH8aW%KQKVhE-CLA-FLa<v+dujJcpk8
zjbxRdoSvEx=|1nq@J42cHM4w$<xryHNed<Sqza|OB0`G+YiE3AMx^0jWe=nATQn%)
zIg&-hE+k;V@+p`TGq6Ah<8dEa*T<LJw!CPX14BkF#3y#pB)jaqgzZ50<zMHE(9QEF
z20FmPB|fKV%6wlaiWAN4Mf^S*)M@M#f%;bowUmt-W40S=Q;-)_W+icU)_J96`vwGY
zmKwas0)~R2uW4`wd^0p}c=F)g{-XN@r~6!B@8yci#i?QS(Eb4$QS_gl9__Jty1)Cz
z${2&pkhY=fL|vhWisNveOC?A4u`k-oBGHoqgx0MiBrKj?C@_QYal6dK*ip%r4ojv>
zDdndZiBpydeq?q{VOIilyC3U4WPtSbURrn$>1btU_hqA&-62|;HN?57WuG0bY?0Ng
zLo1c-VvZ~3S<nSh$E>C8d8d{)&D5@Fiof#OsZ&U_=EAFf(U4*OV6KjUS+L^W*csit
zW`)CNcq)h;I?a}QdTAR0oTYC|^tR>hB8fZS!bGy>dn`hJwU<|ZimE(#D6Gp@mPQjB
zvvjDjBC*<)^}So)?H#OpOKVmvB*WQc?-kW!snzrK_}<T#rDCAnx#T;9nvin(G9f1J
zMhq4)7Q)?;Xi?EKFRH1Ainu7Iq9~M_MOBKEDB|LjDXEP6EKhFAh|SzsXtK|Rq0sjJ
zAV<rSqdSEH!W+Yk7SbC=+#)?$b-L!>R;;49H_HCn4plvBQ23+8=u%(;U@6`tNA)0Y
zVn`pk!Dh9CXSWs~KOAzj5L+WyBs;nxs%I%7pXs|2bja13=7tS#8cQ%iU7Q=qNZ%l2
zf+po9FR35IvdvIf1!MvyDcDh{#;wB%fuu`vdV;RSGFFnt6;V-=_R*UGk2D@x!juH+
zWr;OB@832yaJ4)Gar;cXc__Yv^oL0}4#al6`#&<|jdj79Zm-3=e%Vh2W#U+DCPW2F
zS!d|hfojgbjDOzXTa1g`;?SkxB9Xv#1APVa7;>)Jd2cBl@}DYg!J<Ie$pnGeXgSbs
zXCXm_s`H))B9XQLm!TPdyv<j##=x-OGAX{*Tm`DWWhaZ{5Qsq%F%Nxy31lhyjALDH
zsQWm_Jw+8A+)L}*KmQ!}SGim?srl!)UtPPzIVY(E#-)dIYfxusfQ-wrPxW;Z;t^%V
zH!EU`^r_6s6chQ3lz$l00!Ky$MuNsphPT+F>tuL4PDU6A2no3pH`m11+tZOTNcclw
zINXUF?>CH&`a)kXsa4Jd{%V}O8}lU+DSGMth~5}`cByscK5YzYo+j9vm&9Q>b+A|Q
z+cCL^QS0QbqrPJVx+pP|Z}J}AD_Q_0>mfRb&QM{Pruw>43AXg|9fnFG&nqJM7A3&G
zJA)BVCCmek_#_5Wo=8%iT${jlG+Ln*CldGmI6`Y#n0Dxoj=jX<?$FVEr=C%ID!m4<
zF{oV&&=ev$V~4yPCMd$sU@D-7LmnfR+$Yi7a57NGn#C+^T8vd)YQurCjOyPyFGzL1
zDe%R{9XNcwl<%&S(ga|imVdP<`sHGqUu}HVp6veDqm+pvMowv=bK;vb+YYwn=YKI1
z9i~!iE$=4Pyy!Chk%#P(q^LlJ$<GVs{1OcAUAWt#kaIflY6*+hxQmp@7jS60fL7`f
zLc(2;bCzDl<V#lMUIQr0F9~?#s^ql|m~fWqPWdABg?`G~^UM*!v?MYw5HCw%3b#gC
z0Y#;%35wL4Q-E9|?vop_m}BVX3*ewg#QR;*Z+-tJua>N6gk!rFC>oBj^g1CQG&?x2
zbu};JMw!w}FRg^b+UJBD1PwNr=hVmQ!R&k(52A2fW0Yc&pYU}u#^dI~l9^E1YM4nS
zju{IBs!!PxD2FlEfXLx5wK-Z~1Ty)`?&8$hc`Tgxijo#cmSE?SKfjsNXK=O7!n!Lz
z>l9MN$$ACkvSY~NGp+0Ehgs8=JA~9L5J;AZ7;ZP>_)13%&u_qd*zjrcmSYzBOmut^
z{YcNXNI1HTief>MR7jEBJL1eyX}5VELE;uIL65RoSzx`6NGb$IGLftlwKLXM@W=wd
zp&7GKe`|ClDJ7gRj~t>y{P;F^jZVm6_6}}j+u;b%Df8Je+O(ceqL8E!axVMq?Z~70
zdApIzDBp}~4+X{6Op9;S>?!ON^IPpwd(TKC^f?Mu3Uwo|BkN?z58{nTInFtSMDcTU
z`F*rxNsDb7uj#O-8n?`4_gbcr#TB^b*Y+V+n1dM_S%cJU>QWjmPpV7Pnxuu#z@p0|
zyKAAW`R9+#wbmytLVmuN!;}TlWYYb0GQIQmbqXokzba7&EtJy;)8aPX4#g#q@tp^=
zvDO5vkrzT=Dx01vL(jhSMQB`3N5coZnYV&pZm)&Ai%<Z|=EH9O@8@(TTxvygIOYWp
zKqE}|bvO={Dmpg^>kAR6n5X&9WJ%@!w<J$C=X@u+;8+MMLvr7cO8P)lp`IlRN`q)h
zyNtqXr5a=4VaGehDViae+`SR%FjVq3heMcfyC{QwEXx<ZtC^(<ltnf^n@-tf%_CQe
zPI>$FFGao_!umU=VjaEe?t`fPt4D43FOU0+<^EYLZCPOVvZ&yGme2Slp%-pECgVQ#
zp)+@8a>!^E#p7)BOw_Jh^#n3Xjp5=35tNdT6>f=F&~w%i`C^4{tRpnab*!++7z3fU
zWB_r2aRp@PaA*x)9B$efIVd#-OYEx)v%WUY%Z6!}KEAznud~;0ce@8Cz5dbhQ6Ig1
zpB<b?jQg4m=FJN$qs4HgdRKJHNiR#=A!|dHdk9VL5+RN{?e!#fpqGrR%-uX=71lT2
z*C~*1R9TlQ{t4d?9n`1Nkg~#tmN9LJ>AOTZOo}(`v&eVfq0$Lk4nAzMN@)OgK#9Mc
zdx5-Xo9!`~w?nW?Q<~=~-x$>Ir%;T4z`w~=kPI$mSx|`_m?_<~T?vw~bYApq7>4O|
zwYgkA8I%{;JeqD-T~r^TgNnXONQRk9FXc@w4JX#^g2xV(ussL$%sfpd^460ny^79v
zev8f`X);!hy|CHOqrnWLW+`$uQ|3)*ss;-sjb*=3&r2S|legsVX1SkrEJsNqLS)JD
zC>0Mpwi^!ui>!Xz*l4sisgMWC&LnSN;x#XMkZ}tmKYSRr@NINV=tY_$7@r8+c|QAq
z%y4%r`P0oNfz~?f|2pYo<Ep0k6?v;-Khf^=(##hZq51-A%K+Hx7}5N0%#QP)5xPF#
z6aUHT2E(48rm`5cS=jenb;<yf#RrKDVy&<TWO3plmsM4T8nZx^BOe50De$yx`l@X3
z!~a)>?!me9B22IT@hlp{H~d(u?d+^uR@A__=qx5f?N2-Ai<|mEj6@xS&r&X9>ds6}
z5yAo@p{aU&_i;PETRIbTajcQctXQ5FJF9WSnkN48&Q8+?!)7K}1E5u0yHP6&NkDN(
zfZ2-PPd7KwORcJSU>Q;aQfTb#>UpsptI7r}&+#})p?PDCTAq*TS{AWY<PK|t;gub+
zYyek-G!7#5khR-rQGy|4NbT*rVkJC!n^VbO)xbciRLJXa6{GOlz9ypq;|QzdQt|hC
zbF=vqehP^$C9p^940DV&HW$O>9g=5RG*7q&$uZWa-^J0p?L#z4-Q}fVC>L3LM4a1>
zPdY;XNu0f!3`$Swc6LloLc3SFt;ZaE0gId060x?!1@P)roQGPRGK_=qhnwbDK3En|
z!#P<I2a+o5f&$-+*Ho-uUQafHUontMuHXz?9r!^~<XkJ#g<;ZZxX!}!E6b=u-=~-r
zBv$mu%!y3A=qk?zDK0;8?)wP&QpO5=(5$WXj`xqnrf3h)hEvg6yv(KnC)Z3KSXQlF
zvbyP>sjoYQJ3>hvdf{MnCnJnlVH2{{&>_M|4?7?}PUGn<EyV`yj~Xv{`K6{}(=NHR
zO@lvcfq$u0S|nXDr)*1D8C`LaV$@?ZmEo$jbyASbrqW7kQyh<(2M<bYqXHDm5>gvx
z)l9yU)uPzU*-d$z1**y;nXu*wvmVA6czSKFWp}ey$=F%BT7XSia<QHyo8PxdHnr+%
z>ea+C&XHEbB^YLVY8OnSz<kE0Z6A;OcA9vseT-_$Y01tGuC#`Df$sMRt)z{n6FF;*
zN|CY*z56p7u_7Fb|17*NZ%Dn0U%`i4a~Ti_hYz)glwnX&+y<tx-ZYO`dPhKD{fJ{>
zB1ABTgHEQfH3}B1uYqdp7}G#D(M2{e$B=xD4`Se~+Ue_Pl2E)&9313sK~U`v;}L=t
zINtXZT&d<Q4M8sAl!Jj;98#;cVt@~vvUDIg$uh`*=`dQmRyfd1P#YS!EfH@+GABAE
zMXJ%KK%BmqonKNI_h{0C6Oe+^$z}ET_@`Px57f6+^JTU9Q>_;0&eX&|{vk4XF`|w;
zc2=lQos4I<A_Hn+RC#um3Sr~H$j!#e%96qcQ964BP@YnS!Gn79rz}6}YM>!W-PS(h
zc)-K$o%KwFJ=G#e@Gin=4ybBM5;l!-l)4f79MAZg2w$zPYSD(4>#oS@g=<bdYMtX2
zsEg*7-A#Juc9b4LSN~*M&5!D`JTHH&v=-a;E`2%--=+^09`BQQD*md`d4U2eK$Tr)
zG87QAV$1b+R&9_{!*?wFl?jYY59LZl@Jel+5W6LxqGR1|?|4Q`_AD#&>9}`ZXlE%?
zbrTrh?8U5On<*^oQH1D3<`F}FvBnRxCn|dNU#%plGYc_H7dD%c84cH;{-pXOfvRPz
z3wJF?lqC{q_N-dEGU}kZ_Fxhky+z$yH4eu07zDI<%mUiOD<?J^hRxF}<)hhz6x#Y-
zc&#=#bB}+&T7})arIVEl=zOoklKrp>ji3U}6DX{v3;Sfnanqa~pC<U=OQLPk+2*^y
zk~%&+vgKqpu9}L;R7;()o`xalk~vA+Ie&tWn7^U)X6-=xvSRSBtx9R97wp`&HB1`;
zdDiYyhjRy@zu6f8tkPVW%NQ45rZi^`7OFIL?n)>R)C4Q0BCq)@wx8^^E#nZhFVK$h
z7@;6M#~4E?&bsA`gg&dGV9R~hqstQ>Z&Ie=Sv&+;3*JvBs9dOt^fH+Z(Jn&}&@cw|
zV~i$OF$3jjy^I*yxwN4+SGl(M`jl(w1Uu&QGYNz(K~w(?&EYyQ{>-y}x3}Ma(e6F3
zV)*V_@oOO4<5tQ0^CT^K3k+q=6W17o(whwv53gBUlb<$xA$ik2wsAv+8rMl{=BV!F
z=T|wH;r;3RpEuxeJ#VeN|G6br#d&k(J$_$D!fRR8BlvvAwAQR5y2^=DU2Z@gQg3Ha
zDJ(z*<f_U+#b=$$8DNyi^H4%6`e0NWb~ILM)QGgCE2C;NXyTt%(85TahGE1v>-<Lk
zp%BbEUrXhak99dZ6FtUqO<DM7%V|q9+ZI`vPL8poO`R=Os;OmFxSinlx^ZrGEnS_?
zM;|b!nQIM3!e*uV8bLxWNjK>D;#SGR4i!+-r%DUSIhouX^lhA4Im*d_3!swGpVD&k
z(Jk-emha;lg;mDu+v!_q5>z3D$pFg`M#IiwB~@y4l+CN^d0@pCbtCy+hQo{gnH6WU
z@#oq~6;{-$Y2ixMtRnL;)1FMZze16!_LbZR$ifw`@E`8Qui57R>S;I4rXRud>{ic-
zI}LIk4f%FvVse=%I-%O<E;7hF#b7N&*NFI4f2{@Fay<{Moh2wjnF86UfHUFVg|+Z^
z_<jvSj0ER{Lha{k8$US<BuV?KX82sC&u+t@I-LYQJ4ukBQ0E!L0Eo-rR$AEyvYk*W
zHei&DlW8)>qicAJS|DwHuUpKQ9CU!tU5!2WFnlXj)7Vu-dslSpY9xIFN84J##JV%9
zE$np7D!e(T@cI&ki)`U7w{V*_%A8!HzQCD(z7lT<>lF`?{uQtk(Xg8F;lxYci*0kl
z?qc{!-$Q9^L2j<*{LSB1YSqTNp6I722al6+o!AQLXcDiBD(naw88?uA8x+1VdXEtq
zL(C^+K7DnRg_f?djSpVQXa#)Dd)f0aUN;MnZiJA}heboq;%}3L;4=m>Ma7X2S?IN#
zu-;O7$0!>Jwqde0c!4TAO~f#oK7un5HHGnXQ(nZ`!>BH$xCzeI8KWpEMQ4jYJ%`8V
z^6_|`C@=nYi*1}wl9YgNsAwpAwORWgU@kzMAtTkn5m0+&w>NH5G0jDaX%!cP7p&{M
zW7r`Rx*9lNUt@_3iOpE`j-nSyLpMs*v{UIY?oyUg>h%RzyI0yt6(+CGfjKYTRPvU6
z+<$r7zJZzx_Yr=xZr?xi{(adS_(ksEzrZbgfu)<&y<)jM@D0cG&Gi_%LFW)Rp}t*K
z+o+2cGB%f~{AVNWn#zdhh?9OVSIy@1qN;g*m6)_9GF-jVoV3}hhLO;&ZyOfkL(Is2
z5|!YVl>f79GD(7Yc{@wh%^d-t3?jDBM6#-=7B%uh+CmwNbLZj!W$sDVzL_ou2QNb1
z4Rv2_N#41ub78XC)HIw-!dt|`+ToFLPkMPvZgLjhw41wj(&Jcd)R*PfxDuj2U(Bzg
za`K!6PNJ#ovOJPriUt-!^I-)@@)Zl5i-k_zgn$>N#mnW(u+VkNbV(nz#;RK7QsE%4
z%TK^AZtqC&Xb(cLR+8tnT+pO8!RJq+ywju>@g+uA8jhlhqt;5DtJaJr%hEpDRoY=g
zM%PA@24CzMmt$7Q18YEI+M<YVgh70O(VMym_!6d^*VO<|H$8m7U&+r+OQWZfHcir|
zm2}-oDz%RAED*zL%T)_PUAs_<Q3Zaq$gJY~NgsF>46Uq!TRCW&N1qlQ3%10(@>U7s
zo7;baH=yj=mD38S=}DHKdIW_$BFxbX0Yd$iXfgCQ+CMTsXz7hpMO^VaB3j)z?QPgP
zQkSQMS^~AeNKIOEz7A`(d1K=pm!Ug7+dHk51vbq}(Huf`oeP{Sg17@Jj5kG=CqLQ@
zbKLGlHdf%4%rX-YbHs~5j30GFOx$B#>&G~FH|p5(Y$aaJgorak!O9z1owOQExdO~!
zHwMYH>W=q{^L4AJsD^G=)tMrl*bs!=;&eCvGV69&vxtSm-wtM@(e2ja3vGxtBA93H
z0z=bhXO+o-=h8g{C%@tuB9i#5@2#3-i>+rs&CNIts=50Cb(Wk&wsjP5wPX^%jmf+n
z@-`&<NQ}{nWjdxF>O*+dIfVJpR&12dkkGs=v?sLKu*FPSSl<u*LNqJZ=sO$7vDZKb
zd=T-VKM!YDk$4`(XVGxZeL-dE3xn40>cZf+QL<Y?J_>3cefD(SRnd_vHb_7E+zijs
zR+SZ@bykB~+TX{ul}tKR#MXR@^Jz0zQ8lAqmDwzH5M-)D6SuOO9C*GepMP3vH19*m
zEWWweD!IBibFlPE1l86bg6bcFDrpb5=17HEna?tOr(H5q&#c>&-%dizn7#;~yf{zu
z{&^Cg_zaP+>56=4Q>5S2R_-JL-dT<-o->y_^T<|V%ewj?60HlfogcCnUj;cVB5mb3
zE6w550RtSicF$XWHs9smwzoyyEhrFf<F^X)uBmI6*6LYnIsUl35lyEN&4n6qo@<1U
zUtOoloDv!{;P{TUu<;x7r~gdebgmP?m-j4N20oK5R=<4Msm)E~U&sVKkj=0W=@&Cm
z|Ic+-^x6$O9z|1JomI<96R}+7x1VY&w#6o}sF}=>@sW_Zx7Y&jDB<Xy1d=_zN|kp9
zGz4VcpIc~rGabO+^Kgs~uE-W3FA#{!<Xv>bR_YuZGM&c5A(v{*bx1ogj6wyb277Vs
zC_Hks&jyR#z}&)0mM*a<!fm3&=XNMhxSj670A#a_X3}h$MhtazE#O$HFs`fkdv{&@
zl_D<Gn_7~X4p!)O)Z&vs1s=yZ6rlui(`^pOqwO7zB_WVfu(+5k<^zEc7qHkjD<w^m
zE@9Jw*ZvmL!f{E5dCfA~{HWdg7ay5Sj!P@U<?VHX{SG3-6-BL_gBmxc8aV^#q(LKC
z+j|WL*7^=(3_mDGVG)<Pi#q#<>&i@4wysN@xJ%=&<Bs!tHO#alaqdU6%ntcr?GR3d
z2`uZ<D`OLWCANjM3coV4)!)MN<Saf(*vj9wKnL+}s^q|F15IL|A#P$?1ycOM;sOVW
z=s_*d9>Jey*;z^l*a9pdB}5dqGyMt11D~DKGu45spK_BtxVmmS`Au4<2x-2G1frsh
zWHHT-m8P0ajw9+Tx#m2t`Wot_(3Q&tp})H=hmmla3|*5)wkg(bFJ^G6(HSZg+rXEm
zSF`-8EHo$2l&4-X6*dKbkc0N#T1;1JB)OJtWufk}@3{Qxo*`Uq{<M{A&}w0WbaD$A
z1ogz%*HVd0fSrg$DPe3R$A#RQk_=x#(XXJ;XS&t$v~pXFCKjC^UL0Tw`zS`CUz@)(
z8m_t}Qz$e*3~x28!c<3QRc7@nDCs^&Ow)huT6ST&epL+%j7zf+zc#41$K&2*bb=W1
z)vEx;C4eqlpedh_$M{5+ifAU1V>Y}miQ}nMzJjOFX?QKMS`H4Mw)g*bj0i3GpnH7y
z-N8x!dF8+UDL=%n)%o|=-$dX3<?;E~=YwxA)~NYw>EwLPLPc3Cz}{jcoP)-tw~lrA
z`TFr=`U^jqzs>c>8?FCpZmj=#WBqZn`MC98>#aXGH@^NavHn{Ozzp}X5T9L6wsZd`
zKWi%$v6o!mPAEV^^}Hrp>&+&zn$u_qZzk6XZ^s7R>QXX9%jR$c4Iox05#Ewc;<Fi}
zxT6v%jo7?%HaTZ~(iuo-9HnuDE<6N2$6#O>D29D0&)LGaLibtMXksu$mhO6z+{6P4
zZ6I;sZN?8a7kjLbPi8d5pwlavHqqUpYz*o6$$D#o5p$R;qsuI^ir`_u_z&k11GgX%
z7{;j#&ta+?XJaiujpxAUjR<cy-vp@28U)n@=p1@mm9+vJK$LOGRw(RXCuTYp2AS6I
z4wEjGMrf1~qqMBYSwpnh9iToPMX~lZCCLa{lUpcgXs=VG>?KHHa=0wil#W6Q_hiMN
z0wS3p%3y*Hd&vmp3?7;(-qH~Mh`J6#Bkts4`VJ>tE+8xn?}Kp>LB1H5)&y72nAZ^@
z>)=}LJ@0fy_xNe=`}WC!fX^>aj=$^dAMA@KKf=ocv3LCP$CJ*p=RNWK_;CN=q$}D-
z`|$LrchY%s+B=2^587SG`2b&3;Mb4h;D?td2i>kXJ`tT4FAqCV8WexhKI(N2y0DXu
z_6|??J4ep~0VRl|<DNL|yy)~GS?@TYT4lD1l}{W$6)z4>_MXGn_LI(Gr}rZ@<!PsP
zgq1yo+S=k}`=r;|J3VZlh?l1)FORzi0y<U^`<?FIVY~C<V7~$NL!IK_yMv>i=ss^B
z9%edv{Qc3v2{zv8<&y(wW&6qD0oKI*-0z$m?Deoi=F=Vw9yIU}gw>Y^dma36@WTOg
zr+xBcAPemtygG$M@KWrzU$mbcbSuCnmyZg}%iih9!3%6Oj70bJNw?SOo%RmIv*Y7^
z8td-C2}<H!IHnJeyEI^@-Gcz?=(VY8C<8{TyM>>h;4KA?8y>qSC#Nrao#Ue#O!D_I
zFwn|2WZkC$K0e~!!XO=={D`IEIM4(H;``?Z@cabFlt!zKqtJy>+w0lMph6hLp50Ax
zbnxsD&-=Xte0_|?eBbFF)L_Coc!PpvV!hwDq0&?8HO>>Xi$8W?)YQriXgWpbsc7$i
z*TI&{)UX1&9l2U)*!G^wF~`-va?d{%iUqTmVEEZ7h*TUv9;h73@1z`Hz98a+q?M{G
z7%@vvw3dk`aqsP1Spz;Sz8K?GrsTrfHuELf7huA^B81pzPO3_V%M%|#HgMdcL>I}C
z2@<EQ>CqvRBl}pPk<M<_KnZH8(*{*5d{ew*6+ZWVd<n}yy>!2wxL=RjFL1?lp6wkU
zo*aK~UhsEO6K{;JY_~K@m%3tnQ?7;F)(uqLxcQ*l0ZjSB`zmNLm@J<{6I$>Q{x1n(
zmCv!z-6UmO;%+jVzU*$2(%$dgBo#RfVVz&~am>aUL=*J-o%YEPYTG2+&Biko!$GLx
zp!O*sLBKw!RDJ;>mVD>KBQn`+a4dNG#zN7eL?7h<ehy}qETZ66&5xI*tIb>{nCHHG
z1go(NTp`HRjw+m<eUc*qqrT{T$vyxl6-~OR$d2?Px2J8#B^;ap9RS5I$=QcM%&v#Y
z&ZEOeYbybo^1Y3A9<?8>tZ_O_;_r(1t9bO?BRD@E9X-<1$OES;{OT*4m{l{2$<!W*
zaUiNTRa~j5-YMUCT8&kQR?0-YLZr{UUT3l?<WvrO81;t=xMVZq{H}5o&h9dEo1NKD
zl}e@fNZ>$i0jk0DC+9~!9;CO?O`w4W)M<co+JDjh+wn=Qq!x<$Mn#)X&X0PWeoife
zbG`-&6!DSY`??SE?}y9i?zN;vwWvO*zvuI7xyOl7+Av)W?6muzn7h@o?8MxymSrcV
ztd?aIbM1cxecZpe{x6UCT%hw?T>tmyjj#XwIHUjD*l0EX(Et4=`agL|d4bqB2vaO!
z&uG#E5sD$|qTWTMk=k2b)#3rXIGco%Tf8|%=?3M3P)ELMz~c&;J89M89;z1CzZK8I
zXege|qVYI73ugnd{T3cH-s%Vc8;>V(*tnSN`U+^Si@(LwG!jQK`T>gVU+~-i0z+__
zHil6&NE_jKV|KMG8$g}wvhHsWb$?G$`-VECnGE;LcN*x@q}`lfBp>48FvR>XfuP0b
z;xIms#!%}`L$n&}7{d<({X*MxmY0Ww@{czbT5lE&un4J7lY<<HACnnffhT9AfJ{K1
z3a8?7I=$XpTN`oJjb!q64a!`D+SVFb>9u##+3&QU9Urw1JFnZQ%E*bX-XvbR)oVN9
z#fGuWM-)s-o*u|V90rc9fXgjh6s!OngDg;>ZN?%np2nuQ=2?<uc;V4r;vM`k&3S?4
zbwO%jb;aCcrI!dq5i{1EcoL2WoE@=>A`mKIXbI{G!7jw7tpeZ~FPt6O8Cssrm8^Y9
z7I-ydw9^i<MLZOj^dx1VYtmHQ4F{hw{0Qs8GxsJ1<3L&|L@suA=%C7H0U)Q0z}~lA
zyKtLQOcV!|5x#Mh<#!xT&+$N{!lp&d*y3%;HX0O(K>{X*h>`>c!x*@D)m~Ma#98Mt
zP*Z9E+KJ%s%WD%eV7lx`Sb@#b83o)9rwrb%!V0|-J@M*i{?36e@Y5?53k@%6@wS0N
z3c#ZtM>-{p6btq}55j4>()PS%FLWJB4afbG$ljPd;3Ed7PuanUec&q`Qxr&@E=%8A
zP?RG5_$)#zAbiPpA@P($9zF+QD;l4Hm{s909*Ybh14CSd=Tmq5^F=i5NsA4nNkMd!
zTwu4A1wv5e8)(LPFC>4-$b^+r+N@fpvN`LBN^_!YiuRCbI(o_tZv$~F&mIMgli$O6
zLqG;H)n0o#2K>1V`eZk(dww~aWSSRF-BQS$ARa{xaXgmyUv7pt)v%?xW>-v<+E*O6
zN`C9Xw+kI~3C3=Mt30}~P=GLW`zy-QlQ(3Z1!3*x%mo^b^Fo8B;*;Wn;5EWQu@v<J
zN=8su*d=<TR-hI214CHTqBI*|KjX4Qul)<P2t&1RpPz-K9FIsoRyT9eeY&@>-)~Jk
zV$Bn3g`{|MZP(LK!Ew}~+b?i!auO{9bLvg+u`Na8rVQ|?$tbPd*>sD3<4M1^E#3?e
zZ-Q}b8S#brrj5o1H<gcisij4`Rv^rFYe6<m+tWP?5EAub<CBSLU&P~CdOXHFoOMTO
zx5Z!eC&X^Vm%E~gnkW2NC(FZTO_g=5W82$R%cp_1n0fDMA3QB=qRe5nQkPNCQ1zbM
zs=le_vN=s=j$I=jXIKK!40PWfivMTi{7lOr;ck}YDOeds9ZEKt{qtnlpSnDyZKMke
zhd2%!1%1oce%p}v?tRGMt<UYlBpSgn*@xr9&+}rd4EL%!V^WZ(U>v=3D>CIT$Be7e
zc7;a<nAa6;)DRB0%S^+AimTW16cr8zeWJ&G;F0N2<|%eSiK(J2)!r=_vTUWCuecZ|
zd3Fu`+l@*f>dku^87j&<I@Z$94Nxw+xE2Ep50E)uo+ogwVz8%Z(v5zdp?2Qh;tTGj
z3{BbG+)Kv4%-+IrkQ<0yUzPqo>_->jY&f+!DqqBMl79&Joug2RmokbeHTo@HeB@);
z$>_!|yQ_lV+3eN3csR77;r&7R2KlB+2Kiz(By2v6$ATZ2B_!+6lx&Zt5yLf@fXpln
zx$Q{BO&l6_mti51d7!Qtb|Pkp!j8wxA=ShsRMiljsWKNuU&hIV5wHj+Iu!3F2}pyo
z&_JBcrh*JuO>yyvqO!>*hA?#vZ`@GBmrLL+$<$>+0HVb|h1c?MiqhGGk77Ilc6Q8J
z+Jb`;pDDBAO$q`*;h3oH_^W32+^8?*dU`95UHn*g8HC)WRFyo?rHQUD)~}xKEsM=B
zlRX@%qhK}jVwMY)nAVKQmRr1TJ!ZPnOkjh8FunG&MQDul4uaJp(QWh7-1j`P*x|V`
zZkI06igYU}VOhMgd5d(u6@_Po=E8;f7OHDm!7J8zw$##9?yL)iC(=REbZfL3c$sHk
zSt#s(gB@WaZjg(|MKqy^vyl4DIbSY&DSvU-EWsQtm(DQyiE_ze;6zk;*ny-~XJimv
zQbj21)CnAw@kGMAO6M-?B-O=CqKjd~ay{#W&Jmv}<^b7`$3JhZZP3SyVREe_zv2Uq
z!!0AfI<2x#&+^G4Vgd)zWio(<P|R<c&xAQ4%P!d|uV$2sN=T<1pdW-k3au6G_7ZI<
z?nr;+6y1r^sJzS5lzd?i#p0G(?xt4z)kIa(l*MdYxsjE9kYlD*2hq!TihofWk&j>J
zk2cL45hvSP;|{t)NqFf{a@P}k6H{0%4N6s1s)zhcP?w>tx$Tp3G4)n5akqFxF=28V
zOixbK7jtxQ41+R|;lNC3%V<1Hwruej)2@9d$&aIvbatW{ETdN+&uNo}Y(m2YNfqw6
zgD*|%_v{67mmOVHmbJqjK9^h1vxYWPum+vQC2xhq1AB=17U_XYGNi$1Mv@W*4Z2$t
z^L&RFj9@Oe_#*pN-;LS`v}VP4Pi6J@zst9<xlU1z7X_8+^0f5qtG)62-0VEW(ZE2~
zNcFqxAA?qWAmL4=do|}r>?J&m@p@n?X4ls^P&!^F+eFA91Vcr1?P$=V-N=^ZTy+f*
zyVQ-bt!Gw}AuQM<w4-B7HmZ0pSJ@;=b*wtBi<;~VfxT1J%@kfL26kOA;y^}}4^Scf
z)MI^iYv(T9)B#gmc_VY~gc8txq+x281wTx7cf|Al$D?}llXx>6AR7DgeiQz$<Lic<
z<2mf8z`f_e5}x;CzWJ1KCG1X3C*e5N>%mt)=H3z24#Mrj#~c3tbFYSP?evD>yp$Ej
zJnr-ne-{s^;7FLdEX;D_mXe=6!R8*>Ik2+th`%FzJ`Y=fAAuoyQH@tyL0mJ>UsPdq
z;feU^=U%Q?xC~y{^JGoEv}ut<AipcSQ6w$$NLz9-n-B#Wpx@mv<;!8RIHHp>Wq;i}
z9+vijb*B^3DWK?BEL`B;GcS**;vTEN=}~8gkS;&zl;MEhn97f^?1(d5DhoJ<zA4dA
z7595Qovdvl3@bpEb#-Z4Cx;D*xVlWGt65!&N4?z8rexADlRfui1x7i%b=V7QnACl`
z^a;Q$PD;Lv5MNmeChvw=Y<^r_sdM0%dwX@4?dhkxy9)H|)?L7Fx33D?;*%pA=j!gj
zK&HtbbRId3#&4&WI-?X}4fKPPn2`K53R>}Yra>ndcZ*f^*!1+4+~ank<=egY``hjs
zRzGkLNZN3=%QNE!JEO(&qa9m~uWnGc#1Ry)Z}ny!kVUH6vNR73(HS#xIbF;I@}5J5
z$P$G9q1g^7MH32T5F}4yO)_i+gPHc!(#sisz<~BqV^edb2T#zRE(!<azz*y#EtF7<
zqj%N?750a^=n6!oT*l{<Ax$`foZM@A6u8}SNmA~<+cHP`?TAsX6&5u?@3vZPk>fub
zR(oM`(O8IH<Oqdb-|vuGEX3Wa?|0&Cb><?j);fYq)_DGQ-rnytd|Rap%MPyo8?CKO
zmA}K~h4X7oQ-%bZsdwkvYmAYh=Vr#QC&`qIr=_8wW!mS;9a$w{Mgd38%Ncq&-*A4n
zq95jGY}>Kz$@i%;FH3fblUd}w67Y<KW+UG+)!v#961wmRcqex5Ab2MJ%a4N=gH#FI
z_2#ZSC|=K%N;W+0ig>x89X^o^k0m~BuQpUtr&}V+Kv!Mj20&YX;#9|DsT%Z`hW5@9
zUG2@8RMvS^jaorW7r86K_X9O0bOr=@?WHPb7+qjYcfLv-1lsW4TXA7cny@k@?@=+Y
zjGiqXE54DHX++yt&qIVw!h!1nqtQZzA#Z=fL>6ist7#N*K{Y-4nf}{T>zTzKIeh`?
zVcUM2^`O!U^tA5XtXv8WZ5y6fmZWC1xF)t5LQel3aN=aMB%zJ?n%oiX)jc<AXKn3e
z#s_2>1!?{+%&G&mqDqlS3fN_opDP)y=Z>1tzsEn_Ndvla7uDxNYEPWCoaU1ry2?+p
zw>x-3N>o{r%AO^|+1aC1Jdh4RYz4xa;|GBihW^{eMx*Ik@#hKQ=B6ua6dCLP<{n}e
zwy?s+K+VNgWm&uY($p5#sB4rjA&&Xdzb(}uTiL^Ayt6~ECB%sDy;8f7T|jNMDgN;f
zLC7F7B!UIjifd+lOT^pbdJ|z<V*}I533LoMX+b}($Mt3oH1$kIJ~fenw^Yo}Hb>B`
zHIO7$o#JW)@oFvd&iEzG*=0Z_T8OW0QtzelD2CE8IJ$d(!M7_a+MG1S(a}ERM$`z%
zaqKz#e*Q@A-RJOtHn_MEH;;%4-l)O1+e}@q<@%%HRN_^N#L$$jRikg^ZHyCGm>~h`
z5K3sfah6P{$%t?__Yyb&ChpJ#nSq)c8?Ykg{;UY(#eb<@)>!f{n;9K3+0z@uk5?tx
zaN2ba|Lo+*bOS?S&8^wnJg!<cYM~JncO1e7hGILt#u&5n+8T(G&g$^fbravZZ2g!$
zWYnfA1-`%BQgE#7cK$A$43Lq7Dr9{4SkdiJ`kir@jPXEwS)mtpIi)r0h@j)qNY^G_
zdq8!(9y3aa(RTSe#%RBUXgbjM#vi(fnj_pk5$-L??6RzFak&?>;m}h6@o88>5d(`;
zBQysa6fwj;8*@5^bB2N$n?%F=`Wasj*V|Lq+c{>ok!1j_Xm!?Q(+W?c&%^KnZNjA*
zmN+*w=<+bC*iCN95(*6On0N}8Itng-aUF>&-m5Bc%}aF;5w!rjSU6l|O?T`*NxiFi
zX&6O}z+?PbiyT82mbj=uXSo^<m>9hgg$DWz7icbXAvfq=;Vj_gbt={QG^Df|joU(s
zJ)zm9CS0reMf?akO}X`FMSJwlkX@%7cyWvhER0oT_7)Ydz&vaTMh-2?XFzF^Te;VC
zPnB(mzvUcjGQq8ylJHJ>l)8%rsWhF9qEy56-cp`$2%|pGa2XWR6Q~@;Y@srSbOB~N
z+Dm3%H6*>Hz`D{1=aNERqOJ|&hN5kWUWi)CrBa-7tuj@IWU*%v0dXLS$@ubT4iw9+
zmtgotr0uv4p$=pzH|l4%^$jf=ODPF`W6*#oWmHJMZ$TFt%mQnCjIQug)LU9|16nNG
zw@L9-SxcuB2o#U71R+?AJx?N#hcq^1qX_yqjdD_^EnCasDzVJbMvbmv_KGD)nr2Id
zwy4rsOSaM~9;qrZ<VVUO+7L6Jn6!`@wx-46QZ>3HKapgyr8<2HReH7nR;g#bwf(>t
z3rlyl3#c8I#&r%|TIdU$f*;C&G?`K#ny56y$tdbn^C<ju<USI{)Q$B+E?LT5*;k4H
z=HJ#?GFyP{ag~!8D2RH;t=7%gEjN%Sj#BVh1l%%Ek^)|7kr`9WVUXeamf_+2P&D&2
zvRO^K$N72bGK=gg3jG>={s4=vT^+Mh2dli_)vDz8oZYc;L(=|6@>;aaEdbi%x&zSB
z!GQ-o3VSO6M>ec|fi_zO&#^inMI{v2i>yp=nC6<7*w!UxFXjwOvUVjhc5ZIpcz}&J
z-fyr4t2*_V2R&Huv|C#gAZi)P5Nwo2*}Zqo)xX7VD2bA5#}0$MJEBU-WLWY35cnJr
zVjX0JQ9Q0y?bti(Kk;g}VisD(-ZU`+buQi%rf-?xiLeP&!tEVwit7|a^)yvI7^ZN`
ze$!HKT7d~fsN&tMn{4a3mraw?@)^1ixh+YX(ihwEQ^g6ls9Ky+5DFP9F*gPjKvJx#
zVbX1|^2g(wCtQRRENjguO|<H9wBEEjiw=UEH<zrXyIqp(SGUSmZ3W6-QKN-njn`W%
zRW<GyFe@8FGe2)K*RRF%hvnEQoO!hS?43i(-GnavS~i;>?ODaRlud+AT_^n1^(?pn
zK8G^lCA7=`vK5FrB+16FEGDOLD&NF-W0fbX&G$`5j+n=e+?@%r>1H#qiddF*{wq+y
zLh6Uf{jlF|j9C9Uta9pYS9VC=5nWHI@(zqm3)$h~YLNHi9HQ9^LIo7rGUsq{^lTVC
ziJd`V7l*eLG{&+dT;N1r0{pR}u^IW<liTmG19yOVwDG8l;R;F}t#OO<rEO8qEgP5x
zTIi3NTFdyv*ThF<a;Le#Z7Tw4IvZLZv~~>l2qwFiR85kHsCI$RKC420g9p>=Xp&Sd
z<3M_zXF!qvM0z>T;KE`oGi6%FF*`m3mbp!+FW3ghZc$#zF&oP>@G)mSBJT|)0p=_c
zbD$^-zuw|%ESn|=PsgTxM^TEpP-Ejn0e$Z$N}LzI^`C_Q{47dPOM1K39>#CS(cnq)
zegV+uh4G&^9)Gj(O$PtDx!(Hb5B%pp@Sp#4A)H}fKRY_ue{%do4!YTSRUB&Qt=yJ5
z{zC2766jvDle4Txdo05Di1ErPfS2-s@+v_w1IU~5;|%bCm9ITI!>9Z4NZMbGU`DK~
zppf4S>TjSNCfI-q*CzsoDh_C(fCs(MyOzaTnD3Fa!a~R=bvqJ|<B{~%9YJ#Zd`=hY
zQkn7QT{t1T3V9)ks8|R<hQ1bz48qW{V`ZB}!K%?AE9SczG`k&LPj9h)<G6Kx8J%Cz
z{?RX^RLbdbdVtU12-f;#*#}4^x$=x)U@;bb1xJWlN^9hIE4d}`v7#6&UVty!|97BP
zVgLB$!AZM!e9}KU_`ctH+W+og55B>n^yKiM|FrW1B<XfuA5>L+R#i2Fg>4=>7w2&H
z+l%6pt$k6eZ2nJPbCbi$o0FFl+a)-Vprt>H;)Qx7DZ6qBQR@D<wo!BPA0^`sBg5(Z
z=8>J>#oR2InxF}T=2t22;Z@@k3TAnHy={P1c>nj)*{GjftC0^R!)1!fKiRcDcV{D#
zBaz1KGg+kMAz7a$9CuP)A>#IA`}gL4AX@uK6*l&>+kHmy=xE;)Z%<2-tz}3=Q9Lx(
z*xZ+4EC2VScJ7JCJL`nkxzh}E!vFn5vD(yERt8<fX!{S1fTnQsta6@^sm@_^F~#DZ
z7{eV^Y%jT{x2*s_@~e$SYuwN^daHaoOp-~aUVi3Q&d;nC@B`TlQ{G+_4|g6r#nGcb
zJEhU1uT5d}{hLgW7!%gacph+2*@{b-F}$!LB1)N=WbGedEXe-T)1y5lLKy-_^6*E}
z{}e3tKPe2png9Vxl3ghZV6(J@nOY5CuHa25`h$fIw0RBIfNWOFnf_n}Qml-}Rr(f?
z-0_~jQ1|9HT(=q38_@_x6)_T^{iB%U_R{B&-AJq>4>%?1U`*GaW9$)hP~}9+oN)4%
zTxi0vx$Vs-4&Mi^n~gk<@oF5KE=MS@d>{h{LnE4&8Y%OtwY%V&U>cMy_#t&kQK)R$
z52+8<OrQ!}&jh{tSUmL7a)4Jcp76lYT$9-g49<07-}f2yYIoVCV=E^Ja7z{RP+XfX
zT<iAJcV0@=hkRxK&)!m2Pqi<Wy9XRpP#gff!b<0C0$%V?NM2Ge*t<4<CiOvHE<+co
zu%bP0Zxj36rr_0z^_tUK(lo~D5m1DDdvzft6`4&trzTPeL26l5!FD$@F##S~V$3-^
ztfJk*Q~`^1QMEGDM_x6Ja54#R3D1$%lKNh2gk!gXFTyLNITCr$Wr|JHWOWM;iENra
z$K@i3rhAfasAd?YOE`<7bV@<~R1Ro#Ne(orG8y~}Ux^U%7%p#nmIHbSBT3h39`#f_
zW<8cuV~8O)iU8r3YKpHBbE{|jp!c8GVW5|Oecbz{JG@L&e2vIg@9Xh7x`Sw$Q8Sct
zI?b;!N_hWC=cs-1<MGRWcdvceKIxwv^iEHXC{Sh9VS#NB`at|TOQuM|aE)^tEXVT9
zUR<Rf)YhE)x}pxTahz6~r18K&TiT2>tJPOPo*;KrGVHUV4K!xu+i0p8fQmv0AYu6;
zVN~e1Xmo>cKp4c2uA`+g0fQoM!hV+d78qDp4iPy)WDsA-bz<a^U@5T#Pm<I()-=d?
z#i;g&Z**_a1tao}=bY}T8*|F<t6sLNfsSPU1Hgs;P=Pv}HfV!ZHXoGJB@YePY*Hsh
z_ss}|z`>^FR@}fFYw0?$!pP##up3xR7{O<O>HP8&ne*e#4`$&kMw4mu-aCuZxj<5#
z3?SEv^{yTd<R^kEf0s<IWQZj4$ack7-^P22_rdO~DAgh`8Rf!`BNUDn3%FWn-jqcc
z;f;8GOLDU-X=N)3{IxuuTZ<zc21+C}!k}I2M!r@TwvUB!G9(@h)UC`)Mj*|Jqk%1m
zx?<=e%hIwN<1wfjt$}=EEhj2ah7#?yd!4<0yW2fD=~ZnF*Y=LsfQCHpbo(z3de4vd
z`|#=b=<vt>)8iAVTZ0Er`|W=B<-uO(DHMb1HRU^FH3|0imVF~OLMlrhSQPIynMmPr
zu(Q4;2HV6^qj%dNgPna*<*X0YI<8^Se;@oLFXrCEM|G2j)-8GGV3Rl=TZ#XF_P(^e
zjpImoKl4{i>|F++K#%}sJK^Qf5@qp?L@Ffh*om`vhQN><3zsnf582w^{&rRMQGE>t
zB;+`WXqg0N`lzn1uCA`G16y6&OWNE%My@#oJ|sz{kU6?H8pxATK7>8VUwH+Z4pW>$
zk{eSn_ooB{N_P*Ouks92i#2ybIl^m*!}{U~{+%sN9u-n4=UbsnY*Z#sa977USXU1n
zH@7vO!~$`rRBtu8|8E*MEM}7Pp3SM-zuRI>_1(}UQ4h^(L^@`N5p&?B1Z`}3@ASFg
zQD<R1uqZ42Ij3NIt^-*?hIq%@ftz?$v(Z*LOU4~>erECRcQ`KWlhN*;D9nyUc!*mS
z@+W(|yXPLB`tlNe{`pQI^a2lg!)ZRD@akolT?l#R)W4csMCDF1irPh!juD~p!b=pn
z=Zt!RG2?6%5FB2HeX$12PL;Zx%$znDS33KuYLCH(HDnhb7P98z<M57BtLQ_t@r=ry
zn6GTH!IsZ>Y$y)^Lsf=-_~WfVW?^!?Ssg@H=aSbY&Whr#%`S+XuR#IU8FDRv+Vo_B
z^o|v9W4u3PN;KgvHX(y-g7G^dcw4PkR)()eCsz=&twZsVC5RQZgrx|>LW1DuAnh3}
zK!&r6`v)TkJ*AE`XAqKNZ#4Y<)GXbG3X?2E5+y<eRAQV%KsYS6^cO)evX;g1t58>p
zUo{Ey@ymih8XzQ5vY6&_xKc+?Xy=(hJ_LFT8SZm1ZkWxco>WG$JyGRcSt*Z=>N<k+
zMhwrK`OZhGkU}Haw5av9SkQnfypQ2(P>k0obtV`+2A*BySNeX%Y~_{tbrqBnhpD*5
zMX<>@L?gx(I5y8Aeg)bsxLiQ_X{O1v5+hjFzBE-3m2FLFXKGe5m0NU+n$|=kkuB=U
zUNYqjz<iIcbxUDU{4`6!GaJr9kje~f<!>|TQ|yQYuDH~Y2WxnT37zxHh_J4X?~42;
z`DpT7_88co8Van#a9K{Y=4k$~36<)z@(xRn1Uri(mKdYA=tBrK#Wf$RfK85}M&-eA
zY(5TQA)c@kxdQyP2(LO88dcExSi)l#IN1ZJ4mm6}{>|CZ@z<TR^XHw{2j?$q;4)O#
z`bzL0GF{bTNEp}aQKjYwCgJL9y-|gER3PE`sE_Vx%ft9Z1mk*l!`+@;ctDuusf^{2
zCdb*WR1}d}l5%6<_b#+7tdd@8k}kyzusQ;Pgy(x(Q(-3fHBK=|ClCy}G+m7)&cs%W
z1z_o#31z(%Na+v^l%v*6o#l099`9QH#Z21vb+%dP!Bh23Om5i+0`z#uYKU4}=>s;@
zmO+$HMHt4v(E84Z0Zqjji5ru!VL1&(2ez7wprn8Dso`^_{4hxm1gf?E(m4mIDx^#)
zsS;d)N`0gkBiJ8|$CD9U9Zx}LrWcV|Vc1mzQ7wZ9f!#)4sJQqLt{e=`<&6*SRiZ%C
zTDHIn4IkUy&Wi)=Fn*ep))umu&$uL5uFvi&cCNkCl%lye8w~C{8gL<J25&Nj{I0u{
zC?+c?sEKMSlI8tEV9<qykn7ojfUQ8k*c{!hqnk`9qOH8w12KV{E0=7=^8^}3ub#0h
z#I;>qKukMh9s#$i*uQ4=OG+wn0&ta75^UI<+qF&pD4k&pWfrvXE!;Z1HMb6L%@V^a
z9aPxA{YSE&jNy-cCn1{Cw+!OtI+ih>xT1SFb%;`HvSaSvjO~x9ZMqGJv_Gb{Rhjwi
zzc+D!u}~!RafDDlC^ZD7ho3*#oXOL2g_$!Y7n(nLPOmVFtmLETQkuYBlB0_6&5(TE
zA6<YO2J_Y6o(|*)F~`wG+T7E1e32B{4F-Bq*6YRfXfo~2rUlLnCCeBM1`sRI>9w#U
zV_nIJG2?U!i@ne(#q>4xHuJM#6az70kPpfjWnGLKqug7>*H#05w;STu7W{fj3A6G$
zbR&4DL~T%rbd&G!Wl!2XVp?z=z*u)cSJb+L+?e0M_932JQWs!^gWr49>S%V+X92EX
zVAc;AEZ!Q^RuA`20&*RJgfJj33PJ9qMrE8StC8<6sS!CCYKAu<Y9ok6GZ1FUc4FFC
zaoniTiv=vJo*%wAtC3*{Syx?NLS`O{+ylSqNgcySDQE5|g|2PP^+)+(MbR2#IRpT7
zw{HoEpMGjU`1Df);7;jyItQ;_p<#eOvbyNM8U`>wZspQ#@vnO6E%C2<>8Ijf_0rPb
zw~>5TUs)+fjv)DlKS6nYPBpnMoS(~i)rivFdOZQ<r!(gJ7pY!SXc5<8%YAUU{=Z<d
zesCv_e%JrCJpbSJ=9cIGyZQNNpMCWI{pkPuZ|whj{PnBDgYS;ceD`1WW_jMf+VfYf
zVb}5ZlzGg8J)Str$q34L>!QD*+e$hCV=Q#7om}Ko3_>X#@0{4ema{O^Su|uoa@U90
z&4s>Zd*A*QlvYKC!NU^RpoeBeG6x#9i9Uep7U?1xh-D6cFEH%}*sIFqBbJ0`><ux#
zrrPpB&XWeMBqpK0Wtf9Ti~>*o(9cn_QJ|1?1!5L#B*7v*RfIMT9z2(&;N{wbZ-)8w
z=kFTdH~#j%!A9+-l}n4fPk9jrIY@|yf(j<<IB!K0v*aey^FpScQbIL_z0?hNuzDwh
z>gAC#!&R)<n0V*O{Ig{mm^2>{)NBA4SD2HW9p#>cU9mEgNlJN}MhQgF$APOfPMBbh
zu3=GEVH&IbWtM{Ujy6F0DWSQp<!C3cJ`q+>2<6SG=%2D~!<)VDRGx=$>Kgx~+fnYz
zI+hbUHEV?L>H>DGrj8Sr>3!rRt*Wm$%poyhdnAfb590UR*s?pGUp#?VLbz2f=Q`U2
zp=mT0XDmShYk0LJy~n|Fb}!i?|8QV`4l@Zc52*=_ASC7ty#)}`qph-1E#<Ool`_Pj
z{m#uJ@}dqZ$+pC~8W<K+9*75kJsVLm4_ROpV!0VlS)7BYt)HG3^5XN|6^A{Rpo!Qi
zR9Q!+suvB01qOqJRPG95C!D0cJRPzNBl#XGk~82OX3UhQ%>5M8$fOoimmI{@W`VOA
zw6t_rcz$d<p-~EsiZ`N(o*vo5tO-_h@Fs&_(j6%sErQiZ%G8UZ68UDrry723C|aoD
z?<GuG@{=Y?!0J!x=FU!33~{gF8lZn~kW80{;p$<O0sX7-CDm>n9PF7DgvjCzt|mW$
z>lUf=_mZEC+t`CqgRxB2GnKMyDW18`u!8eH;KLb2JyKK*IJCO5B{RJSV~bxYCy}B8
z-gm@yLjHFUz<vF8l20?=0Z%3aSO|ty4MeE>(7?@DVd^4{bLXEeT2jGr0>6FWFfBMD
zT<hF}7ug=AM-kl!&IkDkBs(j0M{&mz7=<Bw@GTyuQyC*(K`_tl+TNxTy$dHw{jI21
z-RJoiNp=(RB2!?*c}~@T4Vdlsivy137Xb89oTCJ<SsJ{l+A0|G$%Irvp8Dv$h?;!l
zct>oAQk}rkA?Js9>KKhNNr%T==E)PPkC2z0;eYFLl{X~)Du^|MHIj8HhvhUWak*D*
z&)8X<otJbL;pDMss1<vG{YXjEGD+N=?m`VpQC9FTr!8=vj~a1(v{!kgy^5N~hB*S+
zJwz>07P3OA+>J6T`7IcgIC78y-EA&lI)W@}kDfPaH=JiWLYzy-?UhwY@G!<AKHM@E
zvE^`X7@Z0J9pZ4z?@sRB`urcW5Bt}(PH`lIhHDY+#bTpKXWC>X@V=Ve3JPDIO$t0^
zmMdoY=#pyt(KTo@(snE?q;piIs^j-482KKG%Lk(Jsarrz;;0VXXL`b&`8|<-aM7Nq
z7VD}fM|GA>4Ux%1kCH{r!Oqr;8qW^w6_4KL*saWYS$421x=fI?`S;DnIR6JY_Fl>K
zYAcUCZU!S%D_vIkA#|l=*Hsj_>OV4?sfscNs>On)rRg*sw(HTi7e(EufxuGkr=~a~
z=qL-ovhvH?a5u1SvT>33alPOY=amIJ6ZCK_n^>V}_`~FPnlWzHEk<i|&31hap{~J|
z&_z-Esd-zgvv}$FJ=sk*m2Wb1y*}LGY}){nY$U@SX(A%Mk0bqV)~zOXca0x&Bk{eP
zH|smL<%#r^cpoSIz<FP0d42$$EuF@~vR*nR>e|>y&M?Sin)Gr^r^fc|TTE*PDzb|V
zv$H_336S0<4XWDDroYDoUGivsF|&x;$$Pf#AQiIj*(VPGOQCv2Un>cm7gdnnTv_|I
zk-eaVt*~Al3$7N*3eNJ=EWb(nAod2aF^~4tgTT0?%wGjBLOLut7{pmvL6PEu@46#o
zVCv){BRwcZx2UPvf%vR8V@!AJ>cBxd5UuGlydCkprNqISW(w<F-jHc&IvC@bZ!o4N
zscI8N9VJP~L@okelXG^nfmvyx-ns_8`7ThhMcE;s<P)dlypG=mJ8lP>3xU`+9j`kj
z10CPb>$qh)X3gCNJ8lI)yptV6Q7b<{%>aQufo~>4#OY3JQU{>@-$i3x*@B4_@_r!$
z7<PgFUmt&#Xa9pY$p>_ie7!XLpUp2eKlkGQw_6|We?Hp({2SZ<9Deia@Dw?fZvkY#
zTXqHjt`p|AA>M_Ol7^M=M%VN*e(w$FKSo0q^h{-Y7E+^_ZMd!7Y{2gp{%)I`M2Bft
z2bP;&PqNHPnC#Z$ceOIgz!)6=Z`|vZRNm#16QG!tpT+LoXuof2OXhq?CjAiDL0Y|Z
zfu5vHahYVPOxKthPAB#`NbLnN6n_vK4OGCYpR_-5L3<|w5u4N}o1fsLyM*)+*|gh~
zM`^&mIwX^Ak&YHZ*@Cm=I=yi$%Sro_PwyGrWa9%FBKMd^p_Zk8-~5MGJYACJ>2Gu<
zTvH*V?D8$I{CB10+rjo71d+85<asW}BZ!V7mnpJ?bqEe7Nadc<Qjyefoe{sd#vJ3|
z_j?J%QPeHXK^T!z-++pSBKiB~_Bj8?-QU;8x%GiH#Ei+gUEIcloQ|Z8h?6VGI!363
zX=p*R5-h+;kGw|2dXORB#F|i_+H+;}ay3H*34UWOIZ|gpFnoiv+t-tHe4Td-!D0lj
zJ^Gew3azMj(9-GAcXkxJnsSKRYHZb4^dZ4AtC8mg$MD9T293zJF~w=4*hoh>!qTYM
z1Ie?NZa!(%|7QypN-BibFO$t=n*!zW(zz&`Zn{r@gcj0GA(pnp|Dr|FkXz=hF4GoL
zvneChEa~Rn9x*`WXj)JbL(-|BL|yx_kwZ`v{QOpYHp-P63}bawR@*iDn&g*X#Nd7h
z{h9as=Din(PIkP{bUr-x9KBuF+83JEH%Gycael=o^DC-A1CVu6PFigEOx>mW!2FKu
zgtVfqO6LYykrbRu=yh(Vl+<7&`76eA*@Kf~12j{Lo9E#A_2XV^CwU9$70@*5aw<dW
z&vfXr%X|X5^V^YRYYo-zExGeZ9V?X0C!xJiHa!N`nosqOEHK~KO{p$2!FtdpL3MFq
zSzSsK-#a}7nRahO6;HbDBq>@=fz~em_J3N5SZVOTt^;4YE%>%xv~&PN$g%;y_ONi$
z-4YFN={oD8ZP8B``{{=I*%W2Apq>lj`Sq;WPLe@ORBVC!T3bL0nNgvORZuLp+UiZ4
z-pGzuIN`LUvVsU*&bv}lw4|28dI?|JY>Btl6v4f%i;=G>l6wo}XI}mYX-f;pTB-{C
zC@-lJS63QgB5xTzzpXo<`U@au3m}KitOf8W=JT6kJkCO1S0e^=t#{jD{!v~{*IF8t
zFuKU=>x?Y&s#GF39Gn%^0{~V)slV_{xv4%zt70EYJ}OTB1NA>IM}rYK!H#Cj)c<_8
zx%tJGr~mn){Zaq(@2LOz<9d6uwfRvAggo`-$?FsG_weK!UkPNtTfQS0Tg)Ztr@?_n
zfK<aM^+g7xQBST-RcXFjE9$P_7tc8D_;`^`zR3@Co8jqwkiG>m_&NYsG_zy&5Ku`0
zaHK;vGLtK4h1<li`n5V`hlGFuN_&+LNY|k(c5rkM*8esI@f$Nj7Tn9swYjm{FY^s5
z&;1UeFrMQTu)qY%{Ep@UY^HglcT+yuUlHeFS(;_t4yS+j%ZbCdKx}%I{W%v1VxNOa
z3n$rSDDk{9h%-+pzjCCF!RRLIXgj}Mx0Y(H6KamUicmf}9o-gh`m^F>2v!F*p57T=
z*~uz@x0mQNabE`FK8aZ<Z=sG_gl+A>j%_>}^<yBwtH*)hwGafsAL^bVr@g&zv&l5?
zLUPJ4ZH#RvrNcGjD{1`YXp+B!7_fcOI>&|GxYr6b{yM!20UrTHlny}?Y5ROIe$h{_
zK!EnhXnUJpt)fF^FBdYz{a0dHP)$|MYj|(3B}Q4(Ew3j3CqR9j{y3V5+B~rTwm$~9
z@-IoWt|~*x6~Umblzr%DudWO2Ysgqrr=ML;J7c=B?SNs(PQ?aKBMqWV;IvqtJAE$y
z>oA5E+TyfJBU8IZSR-$0zrlk$jXI*?{Avg~(T=j#K|aac^jz&FZh}NNW^1JrpmuPS
z{Q<3+FYk9k&AE^(mr9d=<oV(8*<t6+H($Lvde%8QK0iEtaq#T0#_H5ptl~zOt?hKB
zQk&JWtYgSUVW9nHiS8gg5L%J&7nCS)ZqC1np!aY(Ftmt~ub(6k)1#(wROjqTEDzvq
zoAn)el%LaOSFF#HF6HY$mlo5qyNvWpeYixhnhc#Mad4#JaL5oFR$2LYsZD8f!HK7#
z>w5kujM|{kYBISlj3KU3uj1hm)`ggd5==3rDvm1#4B4tTL}y-n<R}7=Z;}7aB6twh
z8hjzm#ltJaL;t!^R?e(8;io!g6xqh`;sRIebBq9f=3#|r0Dqnj=z};7Wc2A|jJIwN
zL7et+S64UCJhrSFZE(~i5zNkS=GQ{@f&;?Is4@jFX2Wi5X{uYZ)05+`1($wxbbJ_c
z`yfqyXLPv>jn%B|XLTv(N?l)Xrc4;2=qeglhgPf{JiM+M28-3Pik~hB2gqF4t+X6m
zP9qLyn3l1oU;LnI+<K(XH#_aAko$1t>yL(3_F-8%om|aepKhp=yYifECB^;VV$=tM
z5NEY?EXDz$JyMwzF~yK@I+Q@uIfmNtHpt|5nxvO9SNUkt%cYOt$U1+rSL{Ft$Hk7I
zybeKa&b)^*JckL*VHi<|wObyWBT~l-&LPgEbSnUMc_(A%RN4qiQxCi&FTWgTT~MM0
zbSw)NwGZa*Rk(c>O=;CI&6>J_mgbDO41&GZ_}=sW@g!`iDkw}JCrD6amC>Z2Elh8!
ziIPISbgw~%nGJ@h_7EacU8yFR9r1@EU7tWX6;&tEXf2mWql%LNiG~T($XOIs&e5Ah
z$(ElJoHkf>K9G2^IPI3wV<%%W;-hGilze=8mBS6*{8r<?lFW|RTk_7@ioL?TH<MmY
zfs@ceOfc<-vDXMSLJ^;QO_QYL6EK5`5v?p802I{LwmDUTioEsx>n!cn=p*G4UvJen
z=sVrGIqztX<kn9WI5>-YQkjFZI&(O<DM{{p4`J4hBVU+hAM1|hvTO^cN3m>ltT>yd
zlj-Yxi0UH4uN#}<zp&<MZ?E;GnzZd!Nuk!Zp4((Y-m<dUe(c(lJeyedI?UNZ%W)W3
zh_nb{Il2omwo8N;s4xD++V~aLt!Sj1ji)nQxXfb)+858@w!qpGECMUpAh}C;Y^iF7
z{nvbv)f%r>x|_{kt`FV1oJCx9j|#8r$~#<SsjD5%G~W5DKkB}<cQ><wldu{(`7*I8
zNafHvXktPy0*zT39J>yweFBZqi#qwQ{6*$uugSJgGf<qtl=0EMNKvTwvqZ$k#VbQ#
zV8!3SV6n6nYd~eK?CVx)0Z5&hSeXZLbSkztBb6DTQPf8(+da>l@6w*5^UG>4Ont3z
z<!aQ{l5*W*7l0=dt-PRHJ9Yb)zRlz*9M6|h*=}9x1zW(kJ7qx;3{P22CNq_R4=>zS
z?^r~HJLOKlh;E9jrsj;rSKClqq~p^1E&C#~S{(}`!8=H!p-Fqa4x+#e&#Df$SK{pf
z?ssp5(4w=z%|kSn5*YRrW~6?%aNVT+v{qjb>5s(qo5|=$vO!%G<H4Hq@$R6O+ZilK
zx0SFeFEmQ98oNkyuHj4X(_%MKitFq*P{kd)l8;Oo)RCknKAL9B?$m``ReCG`te>UB
z@@si$!s;&wL|H=+&=p28&L(PH81crx@s67%$o7rTg<m-YPxKz!`UzBv+#=7b_NJfa
z!xOZK+1{4RpfaeT>+fpahOWZZG#P<!=Yb@@1X|}ewgk{u_uqoH?1L_(3QJXB$(oGC
zme%9e1#5`oJuttv8H`oN7{d6BvUKE<U5du6YZ-natZ!xDE?AVQm9r)g9z-c4a}7G8
zg_>;T=SXO#O4r74TSo8bX7+(s!Eg;p@7ySLOp#Jd_XF)iB!vX>K%I|jP-Zph6;n6q
z+{Q-o;^@2Ahv=wLO!F%+t-K!H;sF_)|HOIx-90ul(=n~_*e)>%%H&}(9tnAp!iXo(
zO|qBZ!D?4enLGb3n+O!CzzT!40xA`0ZuF)6yIrl`Iy<c3fd!`(kB5AM<rgJMQ-lLC
zaXT6tPxcaBrskA}M&LkKt6P1UxMCB1vMV)6+OKhHF`>mOEmQ(;XJy{48jh+uPd+Ud
zLwqZo82dB_n0>sYk1^2L#$9eb7~7vKSeIEp0<uN-;mR8@uTpW_MR1nC(Vj*&+K10(
zwY_%v)Dqgx5NY=AB_37XvwRo>R!eN}I=wCen-*+IjgNcL^7bM`fndvgPtvm*PN6Cn
zP;@8q_mr~I>#a(?_$$@je<phWndnIsjeD%0?K`qy&8~u)p9kza-q7}IoWKJ6PH-b}
zcN@0s%D0(=$yJHTxugvt=mm_=RaMLKJjfUdvLWxh&4<0tbks4;!V?b|%K%+)4YWgd
z4bT}RXV<A<EBJdSN$i1YbJYWF=m{<DJ_o-MtkQ5>+82vd^@7FP_?sX=BhH2KI2)#u
zdo%1gXp~U_;4Yy@%-qz}9hNs??Ssy=%$dXQ#*^sDN4~RLXP<c`12Ze2^M#Zet2*eC
zJYRMW5t|i%hsJ8i+hIm(^n$n_q;9=UYNRUaU@%!Iy|$0Hz4bcPrHX2mSr$=zh*V5k
zKW!vEq=!Arw?&hL!PfI)Dq(IU9-$;&5IB!ecX<P$Su>|#uD-<jozYmS=K{-1rf=co
zJ)cH&Z^_xD%cvx(pTk`W&V*(h$*SO^YPUAcB&)@ws~$gOD9NgsdD?YFKfqhTiFeXf
z+qL~3wg>rM?!($mTD+BYezzxEA{S|RL51-JG*MR^A1h^eu`KH}R5x{L`VB?&jxZgK
zq0p`#Okgtr;C!QEwL3`Y{Q`Q=!mY}T1~#f9o1TaoD!2A;jHbD;`D;)$>+d67FNdx{
z(*!RKVFymv5yF--R|Ud`v%i5lLfApuasgJu5|!xYL>dpDr&pS8F-Nf2KBA{2*_uVP
z*q|#^NUD^dc~?KjCI7iZ=pF$Arquxuqemv9vjq-`k>QhZyFYmi?5HWBh9k?InOgLe
zJjpKc-a8*&b*{7iI3s5s<{=2V#{_Y`Dp4)S<%LB&j!Z6C0nrF0ZoLHXhVU3_;~7b?
zgI!8Xj$Fa$+tAAzi#(&;4e?_&T}iq%)S=qDU@+z4;9?5`NC93_<p;~BLs;Ci!H~CW
z%q57b?2--S8Lj<!?RwyH=Lt5V#w&4fmGAcB;8G1)jyL-Z4r*WJAZ4D6cMV^rd<mUm
zHUQqt2~lgcYy|2pN9tvHhw7m<I25mUN(jChUsCkWWNd~hs>*T1IZiomLf>JjI|`gS
z7Fl`@0$pb*R3&Y13w+p{bood^$CG6Pl~B>(x}jHc>8M$a6R=kgmfER=D(fLJgpN#B
z5#te9er)on!aX>NVOl4r1fDcOMOF`R>!jH?h?iw2*H(@WCBdrE0gtPdwW;@aTTAl9
z#$e5K1>&pqjrRJ6=|o-{K<;y?*{is^Pz+dP<A3FB?7OlNb`0a2yuZvo^tSI_<(0u&
zvgw!K!uo@LC3k$>-UE{!+3_I^$jS4QZ6`D)#6UAyFEI?mC`o%gXb$taT`5~x3}Lv{
zY(s&q<`y(~L@q;@v*~OC=MmIqrjQEf8UyHGK%%Z`KA`-R|0E(gpXBNAY9@A$B~Z!-
zrVEg;ha)i^X@POSN%0Hw44u^;FTh#ER!x4}qs_M@K!GaaNqAB5?Nva0gWA7Q@$Hos
z-vp2!TzqRa02PlA-<aqIJNifQ?J?q8*&efi_%>(Z?YgVi7nYVUy)6Nz{~+OwNpHUH
z#S_UUF_BE^5^Zs>tV#vxjlcpOjrC1RZ>@%Do=b#3Kgz!NkvsQ-PUDZ4{$lpDUcUh|
z+9gYlj?L_^FGJc!?+=hBtGU>m<zN<$iSMw-@d)qO6kpCJgBb<PRpu_rz|C!p8ZvDJ
zS+U{8k7}8p&VPPCFS1Vc7@&4cx{tv#|I^}s3UZr>H8ac>3jaAL{%8A(_7^QT{%7lB
z{+EyWUm^{Bl?_Ki{`r4tcXW~L{s@1X7x=8%`qR}Q&HF+`82oz&4*mYk;pv-K2giqg
z+))1N1B4CLJiv8oK1i<pcl;ryv>ht9Ux9uCOyI944%$Q1&<xjG5EnE}Lhno_myFzX
zfj)WrP|<XSB)Q`i*;<zyV4+I0Xe?6-UfNN4<HW?WoX9F=x*O1xyIkAt91JetDJUVJ
zpc#y;nbLb2ZbLayrx@oSRwP6il|e$pu%)G2(`06mKgS~>7Rij;!G7#nU|_Acn)9Qq
zvf45p$-8Val0jA}xxCEYig{|3+j{cD<$otbK{-9KChmmP57{_2@hL?>k?+NCoD9rC
zOP<3j;)HCT`mRYH13Zed(_mIbQN5=cp}|v{pQJXryc96j)e;vc!>fGg<W@|K{MX@!
z28JW~Ks(BLE8*uA75z_%)2|QyeAqd7b@cV|;q%UkSdj-1&fxg)ubrb8GPuFf-wwWd
zB_!^n??e&Ij-vW!U27}H<V!XC;I?WCJZaT!eV2LVhPmV;!lH6O@JfDkwOwi*(4XK@
zH<I2tfnR<#<U-rqWO;zUcZ7Jkv77wR1BwHU5@h`uYK8@BS}z-n#8YIbCL@fiIk2l^
z_q9kPb&dI`-Kd_CXo>BlypJa>aj?~Awr7lsaHz>>mcK0<Ik|Lm%W~5a9niQDZq-&v
zzv(qC6PZ8HE>p-R;^vbd&A^BypEB)yB(u1Rv!mA(BvyY<IuLzO!;?EC?ba(X)D&w&
z9!T|rd4yv%&eG4LVK_4RLgpElU+Ts*+|Boz^oTgt!3>6TP?QFKNRv*_-4tYA_R!Wa
z2X9&_#0F1m40@?^_~C(I8X>6l?uqv#kUd5+V*%}u^2#LR5zK;k0D-P0U)7r^^8DL$
zGF0hoBo~*>V3M|CHlPS0VxMEtF{pM`;-HB&i+nX1D)j?@m>IKvIr?Ff!nhFIB#uc?
z;AMr3%1ZkNK3N|&#qL$hj9s-i(Q|O7w<Bv8g7-a%GM)d8=A9&H(8vD7&FnKH+C=x&
z-^wJ=I$KS8kDq{E9Pw;Xjln_=$18-`D+=}pS(K1m3l!Jq%U=?C0z+4m46-^?NWnG=
ze0Wa?Ip&Jt@4?_W|4E!!vZ81t#R$dP$wfYe15whw9_1uo4$@v$id3m-^KdIfF&sw#
z$K%ZqrH>gpSu+$nvNqn7ST}iR#N#Hzdu<F!kj9(!CJ13C6T!P>>Rzh`Ul^iMSQ$=R
zy1Ys=BVImy1}QtFoWt`4CU64Lf+m{A#d)DS?cWP&;#ZoD;8T_Yt|aD_oiWce&`7rG
z73bGfN^Ul5<2FrgBe`kmd5#gn&1SPZ85KqCMujyqf*dYXQO2p|ri{W;yVw-H5mR#0
zGM|maXqtw^hvOpeqrTRPRcgy@?Z(!AGT2E(2fNV0RB1hHI6VGcMS4X&L4Nk_^W8vG
z`$=)rs|gAJ86{SGG3)og<>}uX^oY_jBE+}ywxTR|lvR0S5ApZUTu(C3>I9fQo{nJ`
z;t$pSQHfnH$|%)tSCw6AcQjhVu%PlO^sd9X3Uo|KvC>t>8ac~Y2^nG4qLxb-W8UJP
z`sJ79*K6KJ_lDsKE`D_|lZZ4a1W1nB;C5%>zNcJjYB4TskqfU%GKHuI6l9T3#BK5a
zTuEC}la(FVOutUmV#JW^QNLHr5f-F@c|P#H86!yJ=UpW;G*#rix#y`Jvn;e!Q<y+^
z5n%D2OP8;~S2I}(2(^42h8IR080%+D^1H~Uw;3%Ai9HRIBUO*J1XrX<dQm)=O-Luf
za&)UiblAJD8Q6%t+0Y{c`WZvWdqrwHks%7$Mt#Uc`Pe}^&@M1ej-E<`6hiOabbDLW
zt2w=n>(POZWt1Xff_xOt0mzCfIBy7Shc+El8pRlL|GCg7YkCSp<4CEIQ~<71=BVSy
z9Hm=BE_{~ASx%m;&vbH8WndMI1T_qXS2|b-maeJAiXOVib{ee+LWZN^`fnd8aeQb@
zJRAv@uGBi5s`0_n?HBas6OApnDfy&K@%v#<p!Nw<Z;76_yxcq}cq`)JL8MR}L%I!^
zc3YR@VW|>_W25uNRXU>hXsqC{-A-<+78G>bY7#I1TNK({yX!!1>t?eles?=R_tsnQ
z#WVbiPqhA_^7L`N<7%R`X|YLs$h`gYM;aMprO_eLfQ3e3_8!_xmHLqMkOYRi=FUA)
z9r8TW&5~qbaFIossg0D^MqLIfqO7f3^@dscU}%NY?{Fjp0dZ!~1)<_tY;1SAI4ou2
z;0@Ahzn}GCb|}9K$?Zady&m<%p!$7<mUMV8H(6LFb2n7BlX8T4x?iQvbz)vT=pf#%
z^W46@Y>LT!X?`Wc*@QjW))mV)Ey&*MKZ}%BCv|ZcDs=L0()mgf7O1{^a&f=c#pyA;
zlqe<r-}aM>|84KJ8jjWcRGgg(on8Do!+BF`Gn`~>FFcJ+b=Ehww0ZG**1bL(!28L$
zI1bhXUU(uAd90UXA)EcAn-&>Mhft~S<W1gvYm<qIfDN^q`a%Y@5lrgyuwLIUdA(V$
z+h&WBKn*4M4MH|)`&qlb;gwqVd)m=GF%?3r+xdNLdVRE1p6@`nj_yLv5!yV;Inn@0
zn$L$fMD}`q6YvP1?ySV2raEgr($<2M->$Dm*3X((vL27_k|e6%$oleI^QR{UAxEHQ
z;s;}~*=NMVr^-r^c70ME0iPVCXnA7sS+liSyP0#YP)wIB3<mPn04xmd@OQ9FyAs7_
zp+$HJpz`%^!)O}<e0o%nb%<=w{=v+vgOOOd(p)zAlw#gGFiBOeyfhSpm&F*L_l+~9
zS@V9i%ZXLfrJu!1HHG=>xFso}MPg?!A*hTl08*>d%e_7G&ZsJxVn;OGgeXcA&_r^r
zBtNO?n1`h*2EIHk5Y$}MgWY=_ndKcyn6;U#t|p%dkj3WFQz#(AlYGb!yS;4?TmQrY
zP_B5S)t!n9LQvUq*1<H?x@A5j*2V&>AOz<>VJ$eV;stdAF_2K4wOT6V{x3wn*?2sf
zOs(UTb%}mneuG|NHF+lJu`6{NP2-$ht53_AHT_gxk_N76lMdZ*5kSRVrRn;ft>>E3
zy6mS@l~bpbinVO6X%z)5W*!DR<&;jff?kfZTf)t`O3<cA_87!vN?>;BmBf5CLbU7a
zgUoC)K|!8%#Bx|D#RDnc!NyQ&xQl@}hYG>TITw~Lsxa@h04$}MKy-9KN{3*eSdfOU
zEJ9#r8diUCYYiQ3T>8X8Pc>J-XUV-E2-xx<q|}zKDZ%*OGn}!h3R9^RT5I6X53&UY
zPc=iC65A2aaK`OnQ45-%%`Ug+9(1%N9>&~~_km$Wrl3}VCfo+~D)i2pdiAK|C?x>Q
zrD}R2*gdnh5u#x>8RSDsfCGA6QNY<^oNO(5aey6Cs+NTfl+c+(x3LTxrG}g3C1RAf
zih~m~5P0944F>lejU8~X(hJE`s%hwHDz(ZH+ozdhq0v?nOBZ4kA1e{xl$iSoH*Nmt
zy78;~f8d4WYt+~ji+F&{@&9OTeg2v2|M9fF_0j+1-_rkMUHtmvW1T=gU;pR{LOk*0
z^!(+?>qGJD`I+Yn;=Ej@Hwaf$qrs4l0nAg&aqY3L`;x3fFs<a|KDio=dMJdXLw#z)
z7T0Bq+X^9fZ@WF1(2dp~|G0ro;jg~}Qo&)OpWVSF?}ZSS8UesSR<Nu?w2wmV2O&CK
zUtIp2zJHRd&lm6O$q(RvQJD8#_P+NX4d+Z%RoWCz))Uj3`x@%6CwWN&z48XS@9T6C
zA@SgfDLDEqyiqSt02vV|^?RG*-W;5s9X{`T^?m30$#>t2ClAf`YveuvlzH(RiQIxi
zrn>$V6&aO!U=dlRFcHp7NfvLOp2+Imxj&ebxYE8mkZm;3fUj~b-aS+$lPGg|RAyF!
z(Imr6P&BY%J2@nlF!u)1t7t+kskk;|f1JX3vEN7jK0?DRT%EU>cvj191h<evbgI{y
z>8Q>_Nt+WE`C?;)0)_2HTb<Ux#h)ixp4)<j58VvJ1RyEYZp=OREktCL)KA?Fh#CK}
zMmZz&Wz9r?ZNoC|^RN7EOX;Q~jXA|P?Z!6UuF4zRbo;g%Yq?#)tX0Z5hyG6olkRoH
z$s=in*Purb!eb?npGjUE>?M8^p$rgsNTL$?yY!+D4ovW@R%6v(^wX<iRrf0&)K}l6
z-M88FpxYJH`BgrxYkz%Wi5OlW`MH7(aDXDmz*DT;78)SMp%Ru9PrtNcBxsPGfCzDA
z#r$qnBFd#w?ULzsX_Tv)+r+;ts>AzVQKCs^Xkj}V_9=7jT_d?~V2O90OQ>|P1aAB;
zi?^um=`V$t+LV&H(&u3!Yq~cT9nOmEa@NOYG3x@s^7Oehwh^+9Ahaw+Me){i5G^q-
z+Eq49(%wxvoTgU@huDBN4*Jtvkdv$`UA3CgY_+sqK3=YPpxnTB_4sza^(*Y>PanRY
zKdrBnCzUMSJLLnD6{TWB;VqOuHBDL%0=u<eE3y}x&6w$44I|!#jU@+N(QG1!>pYzZ
z8jW8O1V+fx$XM{m@jbLHe}J+<lenw_+<RvUOea;>u6|~3j~DT*)|AwGg_)XZ?S7B9
zrA_8Fr=hjpw0IM%E5Sj2gl0l;HZ_-NTF1K8WVOLLLDjsLp5g`IS63{^Q@{aqWclES
zxy^&JVlU2tRvwry`x!v_fK1sR`sm>vB7#+GmK{$Arc8S`1Tm*0;cai@9kJYxzd!k=
z^BVoxpC7$=ad>)oeBL?!=JnyJ=;{3XH-~4P4iq^)6eZ42I^UcfcFtcOcAg!aop(r0
z(K&h1d3|txdi32_2k`cUUfPWue)s0p(X*rTSKrsAhj2O9bI`B(o}g4a-SxvyA!0R!
zIiEuput;jbl-0ph)BsO@Sqr40HZr4ijqh|9%$u}@SN+V?!z^;`Cx@x5J~3Wm7*=-^
z6&_&P0N+q36XWIrrFz%4-?boVx)AtgkWs4UwA!*-b!u+%RyW-qOImGPt!}#TuvS}c
zk6x=_-XN_fs-QfO{;(3tiKSau2M05Qb@&#;I=P}%!@6vTwHm{^?7tG$Wjn0Z7}f*h
zh+y5gy)Bd&T`f-;!R;L!9u4Ob-<=#^I_VO3y6O~JADMVKMU{UbvsDO@5EO1AL{n+m
zp*CD^M;#TT9n4i?A_My_D&{yV!Ci%98-@jOZ<cH_7T7jVwzvP5O-7i#!}3uj->^Y?
zmxD<)e^v~8>=`B{fqnGg0xpXOkWLXOW)-Sxr&A!hyL@kNQyD|aMp~W^Yd*XgVnks5
zNYLVj)GD-voUywr6QMva&`iByzGJ8L=4C$2i)-NWmZ265o(i63vMh!W;AdW^ZCC~f
zQm6$iAzDZxEf6%O1+GJYmaxrOv_mZfl~cbyd~EIii!Fo?N#fEddo6h-&eO93Rid+L
zW(0O2)iRNt*$bso@+LZ6lXgIKTQnVwl5T%gWaJnF2YSIP>0(V8&P(HFAq^`KX2U*2
zI!6rzY7@YoWHjlKLD<CzZbww35Vxt3+=8veP(e_@DeNAT$S1XA{k{Q;jLoEuZE1V*
zB%hkDRd%4?`BvXRop)e_1s~qklnphc42qP#w@TY4m2(#^)YBIU*IC-zZSOm)(p^=X
z;YBIHsIRf5=vKoW)2nqirxxyro7_xN%$VR;I4x?rV4j>QQjXNhcM2nOjg%p_b~Jmp
zE?5A?pvb4cFOo}vD2znH=>{nS3*r?)J|vGOHJ!=X2q*&QKSaMswzU@clg*<E&&G7I
z`J1MBzvBFDmQ0dAx9_UV(p-vo|6v(ydpQhtS!^%lcMwETBfKi=P2-C~)?9NV(LP#<
zf_|HF#b^h;k;XYUcq~`SYJaI|($7-yxvgW!U|G<`O2NI|SYZa3nM9jdKW62^mDvnN
z32V6fHUV))oUo?)zFYOH0u0s!7rwI*Xe&sns$hKu$0K#CZeW6HI*F6xSKlkGX+5M5
zoU=Bh#*Lq?-!Ec0<`_bJH2Zpx{g)d7qpA(q9Q&`$*5^+h`>%F;>x+-}UmxTDevJPc
zw)%Q=aC#uv+3C^W4xhg|3Pb@86j**NV3!~G5rMscLo6_`AB;#Khz5bC*nl7kl_x`P
zT>iH<eHy*TuM}4V_6}@Q2hdSlFR#+0Dt4?r>5;CZI0#};n>j|eg&O_Z6r@AewO9mB
zvkfdc1<dI;r|(rnzJIyc!b^yb>(R!Ci;?>=0InCX)p(xpv%#{XwYqcs@R+S$OXVTA
z9u&V-#>k>u8Wk`LQ^A|STk;CW1mdb1-xcxkK_k0*D01Vw$|2zcdsI}$4C6>ma)#kc
zJwA+XlZur>oKpP-7_Nz5YT{|fBEFcK4-NVfp$r})NkwGw-{|g57*a(A_{YJMDK3#^
zxL`Fhj!%~7<)gW&#Yr6kHsSp0JK@l?-rQY{U<yzL1inn0(hq9fYF3MS=|cX{NTlkB
z=V1bB)yA!Lh&9U3x60ou5UVcCXBLQ9ha-Drz&b`K@**P&aEwGljnbPP)D4yeH(_lA
zYgYt>v9^){Rsn~%6l)_G8}FxCI^n_b+^A~P5r$Fbao2_{td{Gsit3PFrSNe~87ZNt
z5I-(-1JKJaFCltkvaV;vavrW>fpv}}^f-pcbuOqvaw-YRF^ef!7N~0(=yFG26>Ggp
z;r6JBZ_bX6zwVr!KL>k`m$l-ji4dbDe&9>Z1>%u0`0T+}h!#{S?jC1ldI3K1CU(58
z1qyq5qf3(m0;4&Y*6hG$QwhP2El^e6inP+*ghY~TG@CqrH`1Uj2eKss^bm+5A*9~G
zyetp2k6{Fs8%DsD3M?~E@8s}L3Mo(u<g%gtZHE`Add7Qnr~w(bSDsT;xB(p^ITmg}
zRx?KyRg*LOlFI)o(F&>sBygwYSC2Fx50_q~0d?BLNCRM2#|3k*w0;#2O~AOnX`KtQ
zlzV>u(F+e<Y0`ckN`z*ejsvk7-B65<qLOFe>P~0q0n*Eg?j*m+RI0N_YwpVS&B{uk
z@cyVf+1`HO**LzzE@lwz#c|{ZnLXC7$!`z}#;%3Iy<|O5XN^76p|rK1fF;|aESpX#
z#8lcJA&m$&v6LyDzr!v(e+Gdo3)i(p-Ht2k;!OI|c8yKZ+ElFJFd~(Bt906zeiT&|
zqug@7f>KvGc)0$MI!pK0UrtKlyfA7%sHD#)X1x}m?I)pi11BSWF5;myH<DO!elo`O
zLi5aY<Fg`*oNDU6AKyosSo#@@lR|hNEUjgn<vn07{@D}p&taM$>`cn1QgP<FQ@W_>
zP2eW_qt(s7rv1-fW{~S=2!i$!^M8NwdHd;>m;Za~^N;pF|E}?$TAPnG0{SDIaIf=h
zCK{SebFshg4(LJiiXQx_o2lymwpKuY`R3sK@bviT<k++Sv0jA?e_&T$+Rq9a6`<_^
zspt)-u*^qS$l6v%VM_JYFr9KU8P^u0x%ab`C4#sS@g9q4yO^F%%Lm{}xrd(uZk3@5
z;0UNHcus-ZEc_>ps|$fE@G8u&hUgjpFvoIbon_Afx66X>GS4@xTmt~}7`JVW<OC+t
z0GAYpwP&a2Z|cS*+3D4>O{UkH5<)O0+X>kSC8yFhnp00BArB;DMx=~lVx@7QsCcK~
z7^Dvy2~j2$!r0o5^Fj`8XHH;-5AL{ErB5rKrkgmL8AD+}oMca#9hJ0*CCU701*A6*
z8r$^fDP@JWI|f?i_G>9b9+9RXq+1S}F>M*X+o<nR>dHqex5V|nR6i#?P!DW;zu
z#hOfqSN)8<V*J();N^jMr)JDNk+xL9i%M~Bx#Jdl)e@QA`IqsqPpt22d-p(AknlCJ
z0N{Rc0PYtCpj86EyTt)`w=e+h5&&)&2jF&L0Jb8zm%Kzo52A@xB#Mf#kW*9uA7N<j
z9{|n$V$igzL-Xzd(7am=ns#+)ZXW>6?PAcx2`BBXV=O7jG5t_5`5?W@hY*_&#q`=;
z<G%5(UMb-gvDdG_{b<)u!3EqBLRff<zz@=mK9*biu|$K*g`GnkhWFu+R=~HFLyRD9
z+*ZT)qMLX;37St!3u693qj&`T^C9<pRo6GCC&yoRj*dZ56gxz$CQ5?alZvo-LA(>*
z;g{MZcUs|fDuSaOB<FM=4BZ1ah#PPsgiwY(m9rSmjj}OFR-B_F_*H%bGATyiEFeS0
zl?vQR5>PgoKzKI{`bN)1acCVfdUc9vPx(K>dGqUM&yuIjr_HtuCf6U0-on+=TW|z`
zmlrd*ZUPHWbRxdY#JT(uvaSoBILX!p4M6P09BoX%!x%cE&~rP<_2@PeV8UTH?@CjY
zThVYImexI8Y0{yZkl!<?OVW#xcnj*?ta;T0b4_z+ji+U}4edb&X=-Maq&*Di0e|V7
z30&K6xuL?DLV7^+-a<BLCz7fYs!a=^jxh#Pbu_uv<V?p%JldTlS*|8&`VfkZwjK{E
zHK^|d*a$lf>U=jqkEuAQ_u%vNUQ6Q<i-RLsuH3HBwmUXB*AlmB#-QCVR@1)ZI@#Q2
z<fFwn1<dw5bGRGUc#3RVZAji&h<>d4Vl0|d?(WV5nI+KMu~XB`F|UPNHMIelDgAxn
z`V*=7b8$fq_zoVprRoCtsCl)hfMJ`#Mn?D8QCXeXgBvb5s$uzXhVD3yO+s=Tde;OG
z;O2($KA4oJeBKW3q&4S+*JO`_qA|-NskEDAP)335&4n_s?ycGmego^@9~AQ@2w75(
zrd49q!yu)04dSf{F|inD>07}H8=+<(b0tO}FcYg#W&0OOk*tu8HrHuiIt9TlU?ow9
zCKac(%(q3+nHYM=e0#xN!zO9CfUhpnL`fg7{jC%~YBm;xPUU%bhPx<;fn9FM6+v5f
zTb_O?LbOt&B&Jd69O+3d+Lg1`rB8~=S_TA{f{2age;1-OI@`eML;)1B$&^EFu>g(o
zQG35!{1Cg*i^*tkemkPmRUrDLYFr>PRTh8#>$Z4(K{CV=#OnYf(3;w$(wzUqtiV5n
zyuY?r6^^-NAWip_J)q1K=}fv0W&lSz$z&M;>d1OM4n*}%SY$rd&4M@%v|IGE$=LOK
zPz3jw1^ye)6Ba;NWy9PGXi*q6*9(s|$JM;uF|bw7xpgf~g;s~@xWMvQIznHC8_nvl
zgrxfjMTS<Vye>#OnwQcO%a><sHM#ldfAN9*FVsaFTi136OLL;A+>;Zb{5@WnhwjTU
zxbT}--+X;^yz;5&y4#=ivhq@tK1P~?f0&!|4d-Iqx=2?GcFC9Gj|u={U-t4V=5u?$
zqvErIR+ZK(U}Azvt_Cc#|0<F9!$r<eaUTRlPA8*2ZIXj*Fq+&a;skLuggFQ`(@dUG
z6pbG~O%7($(E${O=o#YsbI}-nZ|VpOVj1gFJkmM_^8<|VjKbED@bbTt+b4JVllvc^
zyn9Q*MP!`FlpJ=3ctN4-D}td&@KKb)m_}&u0{J0;2WLDuyBOYRFh>1>m?nx7Z+Ah6
zME&8RAczSaTOh_7iqR-NceA3wk3o=uS7ued9QFI7TT~~h92CVi%}qZSw1W!GRGg>^
z!M!0tMeV7a>7veynJXFrC)9!1iM940lFR;RT+|!9tT;7*3bPnrXLyn$PBR|$l?uy*
zfivL2*-PF9Qy5LL>D}Z#nPf@NKsRI1ciDQl#B$=104SY?%k)&4-e`8w54eh)px}up
zB$x$LR3alImb5T}BPvU0LmfsZ6Ay9-Lg(sCRLD^~41)<UCka&RCL8jB&df~a9aW8L
zl!2;kgexsz<i!Hxbvnw6adER^BbF9rG3=^^a*?J39b?+SS_>Ih$UrliQ|$S-B)po~
zopjlTWtQ;@&C<WZ_(kv0(dhk33HE}Kb__0z{fr|d&c&>6Q?pPD($Gd(o(MrB>lLNw
zG3{~W>~E6!Np#M%%yM-6^6>QN96WuFzCL~pi3U^zp|hH4ydJat*{f*fW5HQ!bmO%}
zJzC0r&xj-*kf)!eH-WW28|nx>V#QY0tyvK>xEq^bLOF{D<7y`TZ3vv)q^t^I2Ov+8
zeZeRM;oAoMzQf=5O0V|LIV&k9C#7hJK_#R&9U^CPH3Nz$w&f8*@D*s;x}LwxCxu*4
zkOT=WSmgxDj7%0}z{X);$$E)A;~9RBoo~yg{thM^4K$U&-|z7E?LQV${X56nTXiVY
z_8snnd-uURit8iKSd?5!fO1=}FqO7Afu4zk)=7Gw^wMb>8;5q+Eg#StHevxt4}Ok&
z9*54r4^|RYyZRNphzv69<-cd&)y-KH5mXxUF;L<?9HIX8pMaW9rhDSsLt)HvG(43g
zovd@6qL*u^!Hek}V%(O%x(nozJ(rHl4xgopj=4w*#G++$tgwX-7{U%!v)wW;_=Ozc
z8_9y9pkgCpJF3-u)U92wH0_w~o4hz)>9MP5dG*R9hi?QeoeH8FVGNq(Xq^3Cmacb>
z^5J==DLMuH`}w9k)y#BQi<KfA)-ao;Eh(XsWK3r<Uti(qD!89}VvWK8EBh-=?~Ca)
zXf8uM8Sfc8-RrFTwxg(~g9q)RW(V^RWu8iE`ZT_3j?ndF&%|Ujru9e1-wJF#ml>iu
z=ZCM~yb^;~K4yofXLT8<-B#C>+slJ|*k=k~&fb(|aNyv9C%b)q=G&Ry7ajE$?u)AN
z&U(vNM4eG?NfrBX1P7z>uCIaG|26zFLL4Y^3<B!p=ST-?S!=(M@Ds~+$*D5|FGx+7
z^;IOvtXJRLlb@0=nf7PfkS!dh6uFzj<#^XWU|P_)M~V0GWJixgi?{Xs29U$qd}S1|
z^VSD}G@)A(b*{C1D$McS6q5cEf`|PdKhw)-vhR&2PpRV$=}!L=^2ZS}+jxwMGun7k
zG1pRKc`*b1TB){_t40437T&@d(jrryLSm|H4JPU$3y){$(x}~HE~8xUnZmgXts!pm
zjM7&Mxl;*b*0?~ul@7^)<@c7hgA|!gUIf*9!}CY-pD4emjYU;16_*kcd`u@(eL14$
z%V=n#I+_I*xOR*xQ;${AkBC0!DMl)(Y(zV>b(d&s0`X5pH36Yd6y}(Qe(i#aen_R{
zCYSJ<`iqKRuM`F4VOMO|SwPoeUC@*&73k6g98cGj5WkoFY|ixWlOKe{0tGzyWZDre
z<C~Zkn$EzFHA7war;@d?1VU<Td|(g<oC-Eo>YkD#Oi1bzn0FKc)V`$>P>+6=wpbrP
z(6(gUB+w9(bt_4Ok_jJLX7v8?I{lC2zP}yz`#8qCOqTnR*?wfUzXG#e2Ai#{6_Kgx
zmdme`5W{{9s|Gfg!kvgjuT*afLh+9D@)8-@nPUUT&m!LOzL9M1YsY@`R+O!B+K&V1
z=>Zl8Q0fFYgcg9kssJXf&(|;Rr<zJX2pZ*5*>|Co&o3Vj>@rbmpbY{00dy}%C6+%J
z8bM&=dekeHZT+($Fg*mj2+mvA_!nv8u7jv3n|GN`MvEZQG$NIuy3$GaI){Zmn`A=F
zNRc}pOu+bA@m6)m*PLTS03ra?R1$<yL|fn6Ofeml9ugLCqoaoPbD8S8qMfoyq8Pr8
zvh>;EBBax{EaL*EU+}gj_o}oBe#dLc1^!(Z-|yu2JNf-ye!rLB-^uUdZ^LWk4kIm^
zMsDTzdsE43<UZ8My;Dgw^3H+mt_+fQPNiE1vU_KEcT&qNP5$18W6lZWL;S#@USF1X
zh07z>fASW99dFKwX5zlEO?wwMfUh2PWPc@c1Ih%tWMPkqSVYxY%>a}aokD=<-47|q
z*lOfGX~4I)QBOt$@cqd*o!6M;>G{!%7l)@HuO5H%T8OaEI_KZNIXvrhpvdu|C~<z$
z`R44fbN=$M^X%a4yhCXnJ0~yXwE$XeibD9(ZshR0H?NMK9i6}WKA41(Glty*#=y3>
z&jqu}rVW!>o{uRU)H_i^eYKe^X+&PkygE2K?i`8{2~-`|N*XlAMt--}th1tZnPO9>
zAyr<>YAK1cou(&lU-Q9Nh4#XjPv?7!5IT~$HWyCz*ylAB_gg*LF~q{2Nz|OOG_KkS
zAJW$`cG6UlNeAiVZJ`bswlSVIj)nFh``nEfR-X<OMN->u)lHgNaugqcpR+itXSO*c
zTE)mq?I{!m-)XeYBpo{eLx!GtVZsEAl(ivvr%iD)9~w_3;*(_U2xbBX$L(#P(XGv)
ze)nyjCdw0HYddK_75uyP+0c#I=}=2Gde93hN)OWXKh7Adx(uw)b}R%RW9d(xg8i$}
zB%fYGmTe)pnDg&QNE!pBjky31_+d}Ap{<Fvu>sBtVM(52W#U8maDyOH8&FfVbTkD=
zmJCE&N~MK_a0yNepl?ctvZKKOj6*f7;zVtrk;mu0;BaEnK}M@>Z9>d}Vpb$uf0TB#
zP3)^*fXVhxvwQ*;Vz(pJhH}9WdnZilXd>t-lSUxw-RwpzEi%EucpXhjTzn(L?Tgn>
z_QY>1d(I6cSi~wM63om4-p1^vZktO!h@PSbM05E~#pn}AMfz;)nr=i{OKeo7K19oA
zse6ku)WydMEc6UAZ7OdZ(yk7D`zkbK4+rnzIlVU?Nf3);$2Z_&@{^MWNwC&>?43Am
z!#k-OyiNrYRUClkXZ5u-Z6Z#<u1Bms_6kr?%*Lp&X6N6&SS5)xvjo(bHMIvUzIH7G
zO_-G>UI-xI1*n8Ye_~Jz&mD$YGum^o1RYPz#zDSP;t{E^pWe0PrYW^(?4^}2kng9l
z#<UB<-JHf8OE}qHqHI-V9l3{6Ew}%T%4?Lkc?lUxrP|_@$84kQPF7(E^(Bk(Ru{h^
zIo=lF!=W@C{8+NCKvmOr9E(!>GbvR$;+vMR9J*?{eo%R#cG8|oHEgBAl!EtdUS!;L
z62wF0)6ra%$+<3lQR<QcTU>DRyxzg|iwjF2DxI7kVyqzSRjJ0Wy4~byvyJQPY4d3!
z&Qi%F*TG(u_;8=pz$b-DZzaF`U7Z>g?ZI=|rqQ;epeoytb0*{`3iU5=c0IewlK)Gd
zY<>1W4Qfb6ENYnMa+PjF`&(it2xE;tNUw(Z6wK`kkkBR&CP_jCv5AxnrpF6IEJiVq
zpkIijBe)SLHB~$TG0%AG!f#ZQ>0M&BfOdV281)@7WV_^jsN{X9<hxMGccGHEcObgm
zt?6weP~;wp+?N)42SwhM6}huI3Xbp2>L^^q>L`pQtCb*n?yXkB7_^GKllHP=23au1
z#%A51xm91UiN2qRnv%O%MO$?o{{6bZRp51-UcY-Hu<gBWQAeWolThszHO}F<y9>k7
zk{#W@6L8#luiL64czqvu-IBGRglo4$aNOT3+~GRGMPS4V=ArFN08VTw!+2!?6`jQ$
z)O_jWBA+6AAxVQIO10Tvh*Qo`Iy>HqWnYjd>BTJXljELl+xV@r4hyg}m5w^W?8zy?
zGK!bDXe1Y+e|!lM*o%T1G!F76)AE{~<Rp!|fm2C-bR}%$!|`mYG0_`=k0h`q6=%)a
zkmv{tmaF71n2|W7gS&zBfEDI!ObZ&6KSD}ca)6V#FpoJ+SIlAeW)nO%tT;qF{B=vb
zzVKd4z5>x1Zdw>T=Vntz5W$@f0uAl$CF=rDYSO@y38L-yA*9`SRESf2LFM=MleF8N
zO(?<zc*f48dzB_wJW^e#my&d;c9PiXSk(3`s0NdN6isb>(t~*QqLpIQ2S2rK-GKPH
z``>fI#isYPWjzI<W}?(05bIF%ZngBwFL%HEBC53XLkggCpJ$NFImH`F2o5B7*zsg^
z(a*p^O(3q;O>}&<^;Z2!rPvqRL>1;^D2@zw)9vknz*()^Fl|Zog&lLc4N*x4JIMgz
zy4`FxQOyLN$q*kyOfCCP)&F1G9bM#S-#*_BG_;=-H@(`Rftoroi7#gT{<pHC*1X)I
z+lwgJB!Tl=jJ4T^h__@4pu!usMKKd8?@715#?Ft@{)8P5F+uNb+j^r;h}h{G711q<
zvzRXKOV5<k*nwJLNGgL`d>s_3`yjaOIxm|KZwkJzY!v>1R)Pyfcbr2oJh#wLCi-)h
zi;gmGxG(P94(~9&lg<Hw2lDhb5r583YFl-~%7?(A!)X!V6w`w$GAjfuLXcRS-J&_m
zXWeGjn{5n7y{z@eKQynWgMQ4M%JN-`r#bSj98HSR_^a)0?mRS~TbAiQ%5)}{6gF#F
zUd1#c+Vg3%k+kaGh03fwr%roK-n1MADsNS<?8%I&qUn}cC$j)(7plOl#SftD8orj2
zr%G+su0vXuHB{l?mkae~OMbyOTRef<>HP4X|FI$zow04V#@~SWRK9i}EloXesV`ly
zvP?>00%#9`PfcHt6K!7Dy9`y)i30hR{{W^Htm*`n-IyS2cF-j&jMFd~fJUJ!KrK{M
z86imAk7i<l354G#x8Qd{S3M+mX)30eBVxKYMw4Ky4MP*`4lf%LfsQb*p%tsmwybp{
znX-^<3*W2xp*I&WIEc&LPAEF15qfd--Rr}oxE{^=JxA4Y0UoP;2#9UZ)sdVkU@tuO
zoGEYmf{<czF2uLw%P)<1Fl_6~Cp)^Vln=%GeHI<K-1w%(0MXYP$%g0z+spcB%&e~r
zWjmnj84B4n?<rj#+|c$@Vs;qHdrr69KyrKgZ`ov|qg%bV47fCWbQ8^DGd;+>Y9VVX
z==)bOrNZ!vbIhb_KY`6qWBn&E4T3-S^yi&H9B_=Nxy4F6exf=Nnb>i79C2g4g*gwd
z<wzhND+mutgU)n@_U$6fiDHbS&6en|%QNCCWiPU%mtNFZ=0MpwW;q|&5T}aG4ZmBE
zTUD6KLRM-N1h^(PqDk7z(_!5N1m@;ydU8QqFmEmwwZd5-0R$l|KD&>N+#5ekq@qF0
z-W}i}Jvi$U{<SR+qcbL7>q8~%+|Xhz={3p5K*c{-DuDE&sP%xa)Dx1cla(;9Fmc9Y
zMno&zm>1yL4i~)^4o9u>;~8%ROcpN4<nNn1t$z^HFup-Ns@%}&pKNrE3+X7HcUOl+
zB`^6bBU2E07R`pu+x%@l&IFxennL)taRw1xn@gt1Wiy>6g`F*-&I;H3)&UWeX-tnH
zPKg0!RW>g2J}9Y}2BfDY936F=zZO*coawQUh)K_(v@POKwz=axXkn53vIh0{_XKoC
zFGe*SXr<Inl>sEB^Kr!@0pK)+EiWAMyzPZgH*R!OI7I_y5h}@A71a6gT$X^7FqT#-
zFby!gNoqFSNu3V45V~kXH{*;(R-BrmZsGTYuHLSZCQbEqhSRvvBEeiUHZ)IE6KjbZ
zPCXhQ00cttPLe)~uAM_9v`vLvl(Y~qubvf&&u%D`46$;+Cp#esh82OGG%g6LC5v5)
z17}B!k|}Uwd6<S7g!k=aLM)e~YO6@KqJbJU!=33OiYz4P^Qb{!@(DZHK1=)ES${^1
z^+k$SLpd2$e$r5F{aLJnv+GedNZ)3Sr1i%?e36_Tef{<6!JC(&;TfEWKv`%n3_)O`
z9Z)WJwPG0#H&g#?qrw+Vm9}XrXiP1K=4M@sD=0ua=10>VR-3)pn3`I5SgQu`$Si#g
zTxRA0Tr=X5GnMY$N_gkVE5YTXQ|X;s32#DqCAe5+l}O%~?8u<vXxPi{k{oU&RO5g=
z-mVzIzSXWmCl-dwiEvH4^yD@3M()&X?^s~`2t&%BwCew}Wxsd}FMgCS+)1=MST|pP
zZ2ZW-zHPj%N9Ws5Q<pmg!0}%~0=MqOytSeDQ9!|nDb(|iP76PZVLjoa{Bd3jZ#@{C
z78nfg)rrtyohDql9Yip0l=f)m3PDePtISoP!?0F^bGXOJy82YT@*emYq;d9lr{GI;
zEMJk~n&5RHef58R{P}P{8BD-B{r>!m><@o@+WoxS`@`i1&df$J>24@r%nfDz)qdW*
zUZjsr@$>o9r}$U=IDcE8ZEbz_U#+dp&$c$7wpvfy|FzlP{Njt&e<hp01_EYcWlWOf
z(e1c{`xp69BFrU5Pc0}pr(k+G{czFL&#$h9fDXTtXHT9O_sxEO0T)8RHsMeyPrE`C
z%EWWXum^7-2<Qas)}Gaq_U7gvlCRUOpL{*bhQsV4o%NF4EAgOtr62q$A5O%P@=`EP
zY6Iar6Y@_NJlK0#@&ayF!3}>#@du>V4f1moN<;U0^e*rBQ>g!j!a%&ryV+2PP&dt_
z-P~+~7f!0PTVa(quaeu8Z1nWKZPvqVgb*%rs_@iEz8}rd0d{mTO~ol3j&I_KE>Fe-
z>KYHlqRkD_+eUN63A1;40v5R^$L;5W!+lD&ouOAN4zEmpP97Pw->hOcX#XjXMz4+z
z8!VbUre73?G#xQsoH-D-=CWh;yMgja!>Teip}nniPM3{4mzhFbrZ^0cIyOOjT9&kM
zjr=`j3v>+CIyZEBk>TD2jR{=pl#^n(ml?Xt6f(=CJjl+bBM|1hE0pg`yZC@Cv4POG
z@7lg~@axee8{MD@j0w(?576LELV#cna!|9O`WQxanUb}a9S(ymR^fUf?dJU)y~qH#
zTX8bM02!CbmAop@=M~a9^|BOlh!T}s8OPknE`B(dO7Y<10bcf<Aa#Z~bubkt^t&56
z1$6mkLPxyo6?Z+DkTL2R3Pk~!#EH=YszP}c21Z4diA5$y0bj;Zf1VHElt4k`R0?}1
zfs7aC+R9@;H|V`KIcn`o1D$qRDKQRu)ZVe;iy8&epo8nl9;p>e23EuKvK8tCSyoBX
z^!ltAY1zz!qwAA#tI3;&Vpb<fNi6o;{CYZ?O-;aMx@N&^8qIeylV-!V*N=hAzzara
zCp6hO3GxxZ4;*g1TvSlS%~SXI5X~N1gRz+h=Clw1W6<<6^#szE8IwgMgGy;RrHhmw
zgodaxo(UY-Waj6De*g@t8i-K$n$pUKikkgKdz}BHVv5xPT#FaeH*gau@Oofl!BQ9v
z#!Mr8DTIfbgihHvGD{}g)<RS@3eMz9Y)UH>SS^SjIv6XxXvN{KkMqXD<6ZJN^UIp3
zwlCkCx9Sd2G`YSP81Rl(F3Bc#*ZnpSTk+Zk$MFSGHBX?&0Npf!hBDb|UYGcB!s5%Y
z8RYxRLS|I%08T)$zuN>3kZI}kE9X8X34$e|5|l<-@DBqePxBasQ-)!e@a)+DMe{Qs
zj0NY+9Kt7|P)FL2fGKxmKNF^WL3$Y<eS!R>YpAGED{?{R&ruOz=cW6=PDv)!_WVq$
z0q5ZPn_$2kk=q>!mN0=VQvp__MGse4w{}-NnNUb3=`SY4g0#ScaD3POP+N0|iWL!;
z?aqo?=55fiRO8`{TIH9?B=ky5*!S6Z@X9eU*(J=5d*BP*r#CC)WMQnR^yasWFY%oP
zbOT~*VJynx1DcICon6Y}^Fam&Ew4=@64A$em5<9;X)xYkC$x$wD;G&uxkyLKPAG||
z&GDyuk$fd{Q5To%8Srj{d_%+;R5YyR8(I=l5H=J&?_z?j)uSG0bEfxPj%F}!2Kmx&
zGx2Y=%)TD~X}EXT0izJe@%z~9zC0R$gh+|X7H4Bwe!+h2LAf>fk=9Lk!3a(!*Dy~W
zlw6}?XDYjfc_bq|K0x?*xafhE%fyco?|>4AfRI5I8@sH(8h9p7O^b<oV0|&)VlNbs
zLI;&N9o-fOQ*e39``LLu$oBTy$?B?!O7U#e_X;#H?8L$I!_GnH&FRV4rw6ZJL*K8D
zL@$3ngiZ~AB0E3ZaG>Gd?=u@glIB?e2%dt=C=rHSTnQ<A=T4w{5Sj=nEmxY#3UL}9
z>Vjpgtq!DFls=3xN7hksAS2B1w-TK-ITXPfcXzRDBUy(A8wve}NA)J8L@uti177u;
zW9#zd<Mr+bx&MWAzsL>nocrG|KKtSe=l-|#w6*#1{`cek@4x^3FGe|v-2ls%;X7bN
z`)xYu2?@0;7%yFJR?Nq&ma^~cL;U$1?`ly>Jt|1qv}fLIZ>xvaR;(?Z)ZW)YLnnFb
zs3NO&imZ=yjcc`i5vTG(@T=)(6wMIYlNXPahnup6`|hQY4Dxcg$^cpFIINrL)D{`-
zOKrH2!;0zxT+t4qJhsJaB%=eBXv4jF#1XqGC#*X_?D_qUPRvmGBCtwc3D<O<r(34(
zx}%A}>v%+dx^k)8u|bt6tmJGv%@@B#6hndkjl69LYiCY1eMzbFCv%L=NmrmFhnP^W
zjxv0W4x=4RqiI!I+pp<^dTCdIPQuLx8rJBJh?VZCBbrRYIZPY;M%AqbNDX(>FZ&Q6
zS?Uir%2uzZ4Jic>7%xFG+RWG$&~pDhNPa+jMfW-zWKGrT;ush#ZQSu2nfde)zmPF-
z=pym%{ix>r=xA#UQ(CZ6NsA6d^MKWpz}MhWLE_-Pcd>JB+GiES$#80v4ovM(-n#MN
znl5YnDkujqPM{@$gW@41<c|9rYWIwqHAm_qIPBovOwk0D_h_=b$yT;Lb|}uwB*+n8
zWD*CHH3#!Qi)pxn?WT;)himlm^a`z{<l3)c@D-J*L!!NI*EM$51>`Lk&Dd)@qTOT{
zB1y7f=x7$kqmrPacy+@H`ff+H4+}(~pMwe1x_Fm#P@W4pnq@)SSqkLRLsc`HMw0or
zJyqy0jgW{St-j&phC+pCRdH66OE5a@J$hwE(c5phLcyeNqq^VZ5#c8b?Fs=*F`hFI
zH~xSL8>L+N;1bRcK2Lrwm!v04l_No}!P2e-Td7U74+#YPBXSU1S2`Z|?@z!#?p1a<
zt@T+c6Y7t=FP65kS@bvc2n}TK*W&c(1X5Xi|AR&%dtaz%!{l<GR^JcIo)=juMhu=a
z22(JJ1ip9LV*&8z!+EnK9Y3Pph`E>92&Pi)PAa*Ik)PtOidD|63m!pt-8R|j_cHAl
z8JT%nM=__$_O?EnIp>xb;5yKNISA=ZVyDU-nJ&%3WjK9^rEG`ap_#Ss{X<L*CP2;U
zB2&O)guuA)!|U`8PrHlFiQh|<i8W*ojRa@QF%>T5ULO?aa{;ytBzfbEr*e*(tnG<;
zS@BtD)tQQOkH;+-r@02)zJBmU)2cjuAvtitxeTyJJ#%^?d93xOUePHo<jdh}zgQYh
zz}y}Fc8}Wr<spa_)l#g&#hd&HS8Vd*nHWlXCHKsZpj}o>e1nqvEAu4unDLiM)Gm9M
zJ)2%lkES<u8RpEJC*(D}o5FpRZz3`AWzhM~*_*>>M=!+Vv!lNq%FC(-<dL_<eS!XZ
z<T1u$LFr^KEH-Mj=bTxG%jf^}>0VuV9Id$KaKBA*;aAONR1*=h$Sy)M6h%ZYv%VO+
zQMf^BDIJr{gmcuiGtze{Y4GmaQiL<-F0il$Nsg<bC4oj0Tu0UD+UQKF)5)S@%8bd(
zdbrH*K&t>+0~b!^#bGDNrohS4u?o{wnzSV8@ZM6W*V@nP4ePBOl+mZyud$X6*vjcF
zsb$TpX2Wux(m7)q>;~Tig?&bN=-+!%Hz-a=L-7Lhg<aemAt&UorC%y{EoPe*b?MTu
zRf$N6^4&b?Dd0uqQMhfCS;Ui<h#srie0VlvCe}(lw!-EPHj>tGQ5<N|KZ1uipCai|
za;6eN{sRxkvjcJzERk`=D>&9e_MF>nGhieH{9tv}$ut?T6@nq7<W`w{1~R_rk|G;8
zSG)gG_8(Hgy_oaAG>!P;v(IAoA5Yt#KXv{8w?6;kqy5K6`;UKL`;W6{2d{wPxTYWG
z`SLyg<@zmk`&WuN_pZ!Vu`biyzZvrP=W)WzK~Y{KCgBnV3zxmgG0S8Eb;{2)V0oja
zVqZv<ojAr*9b%m4`y{8><uW%bhB$}Sk#}d57tM-JfQg-Q6B$UQb1kNjpf!yEPa>os
zvqYRyuE~8LqJJe>TI5!0j9FJoumb$qxQCu@Bc|e#@;k@oh+|Ra{c!H$8anK>t+AqU
z*%GC~WL*aewyHEN3;489Z%ui2JsgQ1FxOY#zG)TWf-(PCihF#_rJXKkeV_KQxA^or
z<SmXJ+XJXc-}8WFv96uXT>;dx>;3NZ<cinm0Wa^I3*~=q$#$x&x|sG|swGw^`l0Os
ztt$nK$J{pEapJ&&<MBNr?e}rdQU@;zEFEM~K_2xl^``^o%eJnt{c`j<DOv0aFR5x*
zv9k!Ef)ijzK`L@#mU{fqy;<2%Ai-n}YSs7l;G?qYsDDX=+{SmYp}MWf{(~23q=$VJ
z5D?8}PX)IFr8_W9h`d=dv=G$1FG3xqHgkx2kaSL-pCsI@O6wZtIfcgk;6$B#*rATd
zKaC}HwC>E4GUtxiaw1okHFtT(jUfHS5IGsg+{QUz`ZVkV7Q5W?*-+4+s{#vh^Bu&Q
zj7d<Wx{o=p{il0)1~KjWPR5)wOKa7vubS`Ie+A!5*V8h%H9NqUd0c8DcHJ#Qd)IH=
zH8SJ~8QjYMsm_pu*oGq_Fn>tn4Vm_O(^2p^1_R#M_|wnYlZ@{grTYVIA<o94vKaNT
z1xGSYs545(i&L%GDJJg|Cr&abVsnGN;4uc^ktRoK%|ly)H#i&+f{h_~=02i3NsDVJ
zdL*9oe_td=&z_Z-#CvXVbP7KF?#*fEtE1zC)9+8-;F<W~v~zlR{>|wzrhrxsgEEv$
zLtazYSKLh_=BS7VmU-5jD?c+OAlxr-i!jGx(-q-8F#vDtP^^%`u{J*q^E~MRtC$3S
z1-U)O?WlZ>EXkJ+jdGDy;7C{75_n{d#{(3wZ4k>`(MuTIV%me;B4B!Sx66trW&||(
zv4ZyWS8iR7)*o*BdGP#Q?h>j@#)HNrZFea8Y^Nz(Ke=aPiM3XPdZJ`-wzifI{?;Pk
zdr#iY9nh8<Py<Q_Cnc;B;Re%4l+r-iwxg!_$3|=A27#gXO|Ax+;~k{&Wm^#abs{$C
zY+`$(xVQ^)IRb$pDgcwl%MU$J6~Tto<4*DpZjr|{5<z+ggft08TY2YKYK1sM!*8Xx
zvp6hUZYx_4fyHkHVA=Rn$ralEihsVZaJpX!Z#OIeWaU#o-j>a?&{es(mD>v>)uSY0
z>Wwzq?P;=TUgrJ2efouxOv8bZzqOJ_oD!O}Omwp~d5*L4e((6nzN_Dy8O``dukC-%
z{hu*4xL){bEb9N=`uuaa^K;{Wf8PFh|M&6!FVMgrldrPjNSvZEdv>z>Bm8Mz;In4y
zPgjF9?>B{T7OdUcOkSq5w^{Ns%P#~NyVvlod5z!fy8pH}kLRZc$7e52PG5U>kNi#W
z_OWVkXU!<Ib+JCzcF5;gTL^0vF4+m|nrIwHrW!B3S1<KDP~qC|P*;raj*2&v5lj>0
zV1&#nwmR6nL>s*KJGLct;*SOmoL2%l1)0~tij^%ec?JjTJ#YBCR-60aKE`1EGT98}
z4cty}1k4NHGlUVlEoP~%&F%0d?#*8&t#D6mcOa+N1(?%_v2Xx<qe*|las6pSa&QL;
zqxnSTn&aJ?*)-4OJ%IaI(}a2SGVlz$0mSXeq$(Z~gEe5Jsz#Mj_p{3?gMM{g1S#s&
zevVqQgW<hJ(ALa(6E0eI>FSqX%B!4uRl@66e)><FQmq*7z`0OfsqB>UKhjtGI85JW
z9lC|<z=pfCGS^^R-^F#zWt`5%yJ82?H)jIX23Dt<CV_8xTz&Hf`M;nGOhqN_t?>&l
z^c;6M`S#EGu(zXKKoom#$v75!d(ZE|o4+gA55~3Fraw=T7R{44eNYjA-`lgeB(_PF
zsiHTFq9PQMdj%PYy9FY|B8|x1(%v&F{i2^<0dw38>?Dfl1B5UzgzykQk;;}omYqe+
z_Gg$}j|L+!02<8-R}+nyKaTy(AInbsu}G{HQx?Slw`tc-=$UqQU$eKn_2g4|BOIuQ
z;vN|B^hhxd2iqsD`v10-8rg4;{oD%fLXh<#>EC2@i>e|kiP+yalV-D-Y$hs`YZ$m^
z6zrxdjP*MSDL0Pynl|g{A!9EcIu2jj!hnH14tlH;RGZ0@3@N#m*;ESPeuFF>;_W%c
z112hdEz_nPi3a<9v24-63QYVj?~~`F;S_Z^aF4Me4kTh-zfOM~O`2gS&#wjDtBr>o
z14JKUq`iD1FoLG{2o7X&nDp-f7#8A&TdifZa3<;a`kn$9)1t*IQj;D6?pGu>F9bSN
zYnVb@S(>Fc;s}z0YJGYw7WUOO^QWlQWyt9!*mfaUki@r#MJ}@I^d`qZ3S|Fgtkdqr
zkwLtaDGl=BThX^`H->ng8ql!~37{QRiPBzluSenuU55@8snAJFCO^O5W=sf{=gsdy
zPzRsk<Ojjz;2Fo^Bop9djktY^gyuC8<#j5y9dYJ-OSXHgugm{VegNUCCRki{l@EVT
zCzJI4{m<R*dn_Q&sjJ*x=Z7Egm0E|ZC>5_En6i5E0}ieZ?d!hGbvtEM9-Ga{f!$&N
z6m$dD1kA=2F%UVALurHnA+9Hx@tN-Bmw2~6#RGbg3=q|#l>$BpLem8dVZI#QW;bwg
zIvXOQ1b|=*qOZRKVRlU3qX8^gmF!4{auxfTSZS~jE=J;1no~N&Y;Ym^N4vyA?ERjd
zq&TX}5yO><Ae$aGN3Q##6S5$&5mqEM<GRA_2DuSP!`9{2H6CSmdLb)YOx*`MrQ=mR
z-|<L;Su~S_ejkuSQOM-Qspb;bqKCd{?<P;lTvX|8o1h(#dm4;0*Z%JI&}hB^dE-ew
z?24sAI)8EQFX%>9ENf|KAmhZs9B%+oWA9=ndYM)?NGY5a{RkBcVxO4iz(ZVqZKzBD
zjffC5AtqS8mTeKoJh_xfXB&*6HJG`OsFG2tD%lvasvPp#_Q5UeBEzsQ27fd`-R@}8
zlefp{K!!91C9|}9O}dn3@=8!FBn4`~7CKBYpafRpo0J;#cqAY!8giLUMH4hvIxiRU
zVlleGNSe_E=ABG&kqafgk?PCA3^6~=^$NGGKma=0TL?w8kzS5q_hLNjf-C!zsoK9=
z_3dgr?FiUeqPW-(4Z+5KXjJE8qJOonl2ax;RPFv$^tAbq-gkeuzOEp6MSNL&k|xNn
z9stR!heD#DAW@4!abOEm^WY#b4qOm^Xv~FReSKd6q0B49N5?Gt>;X7Adn``Gk!Lm-
z!Y5p3V*jp8p9H~;?7ec27<T&tjnOW|YO}g8l5bmL4BrkF6vO$5P;hiwV%)IDuZquV
z*Ei3q!FL`G@Z(8TdV_l_PQ%Cu0~;MNi2iMQUodkzw=J`$9|-wnI_-loSvMOi$6jDm
zp9sbh;N_nrlNo9Dac2T`ByAkTJ8dyVycv#$A?An@YeX`G9bd|v>6E(A@{STAjo2e3
zbP$0XYA-S&VC3+J5-XvtS>xeM?3`k60J$q4f)otjxO4D8d&_EJN}XbYrdDcKR=yl!
z121q773ei50zEI#g+h&#>G6c@0?^bXQwTCvf^wECXAf|{(6yviM1oE>2PmfCLE@)b
zCJ>quC6i5pGkq7}r2JOxT(S&9<F=8;NxoMS`JgEl8SJ*TdXtr>%iJ|fTxUY>6wkI|
zIPbE_D1khd*IC*#bM(yEO47Nd(D^X`GW~s#{N=R~O8#O<9ye}%49X3$j$v~hb+e*?
zp^x4Qu|Q^vKJT1;^XART>A5m)gS>kow<1Y{*_~Ls-~z4#ab0Kqv6iU%vnvpjv#yY~
z@?s!bEV`5Yf;_vigphNU`Q4>BjE(c|vzZ`+LH<r)zw-tgz%_%ws2Q;aP}~nLMtv~F
zbT41ty!z(rqvMb}xAL@cJ-4gHd?;5Tx{4z1TSE}OKKS!t=it@R*T;v?J11`rPY=#d
zPCLhkf9)K-=zM$lOnf`~+rd|_4m&T7z7s_-Bm&n!a>bwew4KyCw6(6O4|Q8}D6e=r
zMsKVR?Ry&h2D?I{abi@ljg4K>7BMA~m{~k=htwU{qkga8cTNORLp257ILPcF7_L0&
z`D{w-FcZ3qk#|ltON~<}{iPWHOA2}TA#!|H(%f2w=q(MGThJ81Z{<zkR&z^%>c@Og
zXD#D;7QnLwe!yn|)uLjl2&vifT#&ScVLtF8KvA!BWv~MYQ{ZB;JLH$S0E&ePwEB0W
zWRZ=S-&Ezn%Kb&%q|Y0F(EUM=Ov=S^a3Fc*MkIOVf{=U#saSW}Wu^kAgo;;JlbVEe
zRcYKru}uZ|8@%4|L4Gu_G;IX3Gh>f~8yhPJK89~89#tJeD47(5*rrs|yUu;JJ%Gka
zsp6GKeVbP_IPEXd2S+U|>bv?}{)Q0Y1;MLUfso=>4xk3fC61re%J5h@vbozx8P##e
zLymDfCUxl|Z#8~b^TNWjsEXU$biuW%wd!Bx)A~MFTZ9K6CXdA+p10B;3Ou$I8G#+}
zp2Iu!Hjs#+CV#FbmE8Qu^#&jwWW5}0sZiX3lSh`xnCL0w?13ZK(X&GmQ?I&RvGqPh
zartR;(?r-ax6MMx(Fy~Snrbz=H?{m9wF9JHSal<QoNGUNYsXWkG?m;j^@2S!FIbf0
zeH^)Fs!_oA!(-wx{~S+k9^l>F%s<#$TkrF93w1f`dm#%@?{=9GdI_HmqrvC5eMN?b
zV(gl4PQpq(_?n3{*uBoWZ#zoF@8H#0p>j%E)bC#{?{d>80u{thsq5NKEQTthLn|zn
zrS9{<b$oceEP<tiP(3CludbTNc@SvNx*#ZK34-l(HDFXIwyDZuy?Eaxenks`P$&=<
z%nCaXzMwyCqQgrq1mk1@D)DpF1_`Ew9zuzh6{86)1=rlh7|84=0aKhg@fZp};IzyP
zC~)_`?F6p^-J6$Ysk`W-<}n)M+*y2~xLP&t?bv~fcg3HB0IDFYf=Aq>{aL1kFN~&z
zhyLeBFJ2s;9v+`}j=y<*cq+OAf4#F#2Z|gYiW281oo~(#JLfMCJI@Zz&O5IU&tINA
z@0`4lF%>Z~mMDZT?M4p2d-Lk(+0pr{@9Q!-&BAOkXvAelSmRN<jlf525hzZ%fwGwS
zJRCqMXQZZ#<<mvUy84f%j9+!~-E5JGmuFpD9kK`-i|?p@aCzS3V*4$4^g^g3y2K`p
z8fZKrnCdO12GNRdG*;J&k8V~(c5EGcFmb{_=;0&O%SVQ3`*9c<PcpdLWq$r(<FKt>
zOOfdpTVxX7m253gVN9x`i;JEey%sckcvjP}dv@uKB>1@G%nNJj&EYBNk;Q^K5S<)<
zd+_S$xlu1;^uf-vljCm>PtU~1`bx#Ivmu7j@UY=ZdGwcGg3t9SA2IPe=N$6^@F34r
z(A!aA&+?vBMUz&yL<^xu;<un*g`1(A7>a{k)`E@fMLUq$J0vV(&i<O=k&-OL-~i3t
zSTC|}s=|Mvg%(_skYQ_&ESma#)0B8~kq@OIjBnL~LegyxmNS$5>MEN^OB8S`!VqiO
zWFl4L*j#>zzAM^>Bo#1S4J{s{*^jKH*iBQrAmNa9rMB-$Yaf&MdfaGEd6k+34orki
z#SbXeTLy^*#9UNEx~I}wgk7oF=6H^!GEQo|h4(e4E2+A*DZF`DaH$LL|HM}Db|?<I
zcqOPP+oo6`4MX_9b6z(sqA=xdp>@Oc$ht=cp#`(EiD>v@bR!CmX8oQZR?s!wlcl2|
zY_N~ZdE;jz6W)(*ck_Ney{D`3bZRe6f>2i0Nf;DmsRwFKT*p`{Wj!^xjKoKpFYtK>
zI21X)y8jjgt~nxJ>ApwcW<qc~AIY?9J4*r77fBa^IRaKl))K!y%`c6Qo4FAV1S*rD
z$m?g-?M|Si4}ZLpy3L-B{^Dod-YEgYJN;IKq^ushP8aZ)E*+4_#hN5le;VmVa5B0r
z{NR6aU%KQ=!>GHXKGfY#YR;qemdvCX>s8kDLJnH3uqa2x^=LBf&d3s16{`u@8=|OE
zR(QdP6i#c{X#;<&PRmtv^2${=P)}+aT@5pa=m-pG$@>p5get(VR)D@7Za2adsr%QF
z!!kJ|d;!Em-_1$fYGp8rr0+D!q4z#maB+gxB+G#+2u~1_#V}Oy1~dwnb8~6-EeBUl
zIlVX=C=M-%z{dg|f>%RY2IOM6B@-(&>!^4+S&n9N<0sqm(d?=gUB&u>h+g6dLclKq
z_5&8aygf77J|Bj5=eo|3yh3#D)Zlak_M;Et7mpuI`8ezpx#hsG1hO%EP`kHx7Q9AV
zNmqDYq7xkq&<^c)u3Qt=!7E`Myb^RtZE1&=j@xSU>gTSTp`ZTsiFDVL#qs(~ybc+q
zTD8RDIJRqsdAnN)|JoyaJ(^@Abe-YB5+|0gWY|fmMqZAjtgtjWcr|FDC+!y_oqboq
zOeWS0MhFKq7|*6zu`L~`0GRcClNnAn`9!|3NJ*J))`K`bRwmV{$*gm%llQv)*1{dv
zjqzwT7}Vt{V4#oo!qRnvUpoUeGD}!SCU6ER=pLpp?lA!xwCD8b5*^`g(;?o*YO7!~
zC)_v~v>%}fmh)))=;ShOk~!+{lh3Q{S~S)hLGPm7TX32ifsw8a-iuuN*W@;w==h=(
zZm!Nnyq8Ns!o>`RsiL<-56Ll)8v>(UId-shnH(rVldMt1lbco}xoNA%H*L`WvAl%h
zy_QXlov<F5jBGp{P0_>DNdYfybxD(;^iiPRQ@W!G+U$Zs_GK>Z#hLA8FY5|HRvFSi
z<ft0T59q`h4Jlmwm=?P%)smBJ`7|qL{izJrtHW^WIA4)lnq)N=a{18!!G4Y`t>N8e
zM4}?A{lnf_VjNbis1%ic)>SDyzVJ2>+pDe0{!tz(m2=KJ6F@@mJ*B>-US)SL!t59j
zAa~)NrNS+rNqRl!JXA`WUf_|E`z;C!LAp?fP8XuoR0U`>YKPT}`aGIl1X4#`nHrjH
zTm-51yb9IYl<FEZ1?hEiU3dipP9~brVo6KRuNCVx{x5+3g8WFB2hgQ)il@wU1K3N|
z=GVWL`QY+P{arRZkUIUL*zM$%2|DL$Hyys~!X<%mk2NF6r}w24tZcKYTva<X5;pCM
z(4Hzt)+$b?Ky9&%0O4LRM4_!yRFM!5@1~S*_o6iFe<ex_kW_Wf_>s~Ur!*P1+Et$}
zlhoqOt}5aA>7ARy5yp<i+k<rN!ntAaf^T}|19uH#nri0K)V{K+%Zm#uGU#M<Taytx
zFZ`#`#jsr4m9>h-D`sm^HfZ!$9!kox%(zl?6%`~~B_6$aig<?=m283GU|PFQ>;tq^
z^Tf6~Gc)-#S2rKePF}z1K+N^Az?T^EcFg3ME(~Ynn?A@X_Q36E@;04}W~c|am_d#U
z2;iQyTC$yl?s2zG7)WxzoaQ765o;lD$k|Y^Lp0H4S!kt6@h8&j$Q+quB;I68{A)Lo
z&3)CO`Qelisfsm|RXGKp$Tn)smX{ba(&->o*M(^IqP(0aqN5xhJ7Og3>wb(OjfO&E
zNR4cP>7I{^hCxPIVLMMwBiS|k(Y__%RY9fQn89hCU|GPrN5#F$yWLK!)iFe-mq@Ag
zX5}?f;f6ighbv`h#oVcTSN5!gg*5YVM>*^{3CKKNW8$_1xy}OiQC@~07n!?Jqk_4D
z{X<Nu946<x@>GY9;oF}b$vv$(iE+YDrIo@5o{H#RQF<_2G9CXq)4G(stKxX#vx>LW
zO73<pK#Od8)Pra&)B9TV;uD=|?2^X7ylK5KlEX@31<KRv^<0*9C2d)LI_ix`_|3$=
zhdZp3KIs|_RxXr4jA720gkhLAH5nd~w1jcR6EhHZ4viH6kKk&`KhEYOXtO|B)XFnI
zD?QiF1A7j3tTVVR(03Inl{ac~DehFP?LfsFSmM~#QQ)72_$BqP+*4E%t!n+F2Vv%T
zxX?|#axPqMhho5$`z*DTDz4$cS0R<l&b*d@%&Ir`7@r;6vuY<|NK|Uhv{up}yOY*5
zWfD!utJ_NOd&vYty_r;+71^7MlbFZXV^I_LBr_Ny#!}cR;3{_-(N(xuD*<F-?VCEW
z8x4m)gqX_Vfp`pO3L=j;#HFJEEW(~<4|2mRvxzKAY`L?{9@i>$$#Q5CXA7m$l=SsT
z&>oWHGqOUdz8ft=bKn{?dyMmbW-Xmq`F;esGmr8J38IuKZ_QPUDp8Z)L$an|YxrFx
zYf_zC3bt2`LG5=~1#H=(s$7c$)=0{R#OKO~`;Cl_sl85()B*Zivx7Rbkj;@t7|DYn
zyy}KEdKQ)n9e{*vq5^o*{U23ufD`M=JZL0dqZY);Dzdl8h8y5tI=!snr5M>j26+eH
ze&<ac4RW1~&_B3ITG8Tzf{&5o2S(Q$Kv)Em)uY@Dl1mY5NiUtIC8oOcI%sA!91Yi(
z29yI%86Y3??s|E57g1>dQaS_!g=GpI`K=z9zal&DYYc~S`F`Mj)!p9*&;9$rm<2c%
zr#XK#j}5TkIY<GDl~`A+hwnRon|XwNJclw1yH_>8R#IWjgfu*MNCTcpSbLEdb|euy
zgos()5p5TDXVdgCFHnef>xf1HK~i8*;$;*>V=81KU~;>&{4$bWfI3!IHu^^e2YqRp
z!*;+oA{u6$^StP~^Wq=HU0J?_-nVr^dUiSp&aLBSShd}nV|D+pj9eb7>5(WdSwxRT
zbQMf{A*vpVWL&>ep!p|JZ)=|qXQ>2?KD3S<{l6euA3?VP<Sv(VEfjm6j&9gkd_U~=
zXT7YXg!cPmQM9mOmM8Gd!RguI^Uhb_cb=bo_q}++{QCnJ5g|)jI$4Hg63dx57=c8E
zd0kCvI=Ra)I#wg5x>M$XiE{6!Kp~y;!`E+K9h@J+AP!z1o*$l`)wKhzx=#W>dr7dt
zBuYGOuo22A?{K@Jq7dxszz8}E@&nr23beQNp!QUD<q&|qM>H6Ou=gkwsx-y)_i^a6
z+j=DG9KarjHV3k0piG5ovmNP|fxvLfOF?G@=w%=`0`=oi&9We}Fa(c7a=6V$qBjEQ
z;}9ExRH8KYvm*YNOj#WN{&Joav+;N&wt4w3^0R?CYJ=|kFoYu(d(DlkrGv;rb}<!b
zokjaDlC~2Gp8KDeIP=`HwOVZXjc984hE85-5?-<_m-{Ov@tm|#%$#rqn7gs$xUZ}t
z$r=(et;b@M{hMNNHK~H-Iq@A3%OT+equI9`6yEIM`C;dv^XByAYawjBh8*>;k3{i5
zAA-!G%y7d2f9keoW_MSqW?HRUT?b@ovoF27S|IL`s@3MT*q7ctk%bQ9^6PZz*V=5r
zKXe(h!+1yvW$Ix?m`JC_N3{x>{&1U4WZDG`YGe{oVBWO;s3=fT{Quee^1VimB;oy=
zr$~Eu83`nT==L#yn8n@L%?^DG(DwLq+xnHH0t$3lC2@3{eV=`feX%c&JR+;IbO7CM
zds;JYq^iuwD>5=N&a4m%9nB&W`sH`Wc;%tInT~nTIv9jx7Rk6Dp6k^#?TSzx@yQ8Y
zy(=p7u#OM&A-r-`>!GUkxvFl2s&1faS>xN(vqdB&`O_aS`5>$hcf$N}=23l^Q)c1G
z{&WUC;2C6UJ(9KLk4KNVr!!9|huC*Gq&}XL#SLT5!zCC|>H{%L=AdzXxW`#@-vejS
zQ%7L`J;srJxbYB!%LBdthyy_N;cJ9&YsL4-SC}iSGeLjr-B<g?SI9Nqm%GDZ7lTRa
z99^7@m=cT*&=-j;*Vs%hD{U4@nmGyRjZC0sUDAbeoy5>G`b~iXp7oa2?zep3ojS`*
zcZywT@ewu`zp#iobrB)ZNy3^(ulAnoe+!Q(Lzw6Ejhh~2-Q@yO5{Lp@QOD2rk4&&=
z`1bO}!H>;vWjlV*R}uvaO_S$LS@LWE^??wj#&l3yB?b?MAOc1i)FbaRws1tuYD<i<
z35*xVEMB;K<JWA9y?b&(Vf**OljD6XH(Pejxrh{=;-erUe|QSApijmhfryykX+8zX
z(+u3!x@dVJ%zMAo<Bvr;3cY9EKlR9~Z^wfnW+B`ryMo0?iepTqGTbD}byTE)f|#7=
zcV<PLFU232m@MjK^gz@kr|Dm9r~t#T@agh9V8WjQ4esD>_CT1BoRo@<H~6AzLu;;T
zAsI$WF_c7x)v#AWKMLe(V=-)UgvI933m}nw2WVuz-x-r^c56!*Noa-UoCse!!<iK1
zGJ1mT3uWZxoYa#P!&FjCdCF)lOcpavDBC-(4=*B|`0kBSIHJ)uG=woP$QDNDzE-MA
zDcQAIZ)xLcx#C{r(}z1(Tv^N|URI??K7`%=X*b@zhOy!sU)19gN~1-1lqM7{6*Eys
zI?=e%mQK=mr2C&v+GiMnXK&KQ5QPXn<pwFw)G$gYDx&BTT0=d4YUeif6G>~A9)0NN
zQ%RdAb|4ku;wabiJx~qb1M~e+jd>h4iA-c}4O382_>z@`)8~T6z=z$pM{#V>p-fnF
zG8qw>>crzH>4p*K54`Miy6V!(Ps=s(%o>xb#c^Ur@>%g&nX?m%9}nam{6X)pMjgBH
zltkOU0E2v$-^2am!9-v4I#U;3L=59Frxg6X-ep{tVKwdzRl`PI$1uL7_3}!3qgMTg
zb;2<l@^yPVTPI&(1t$pLFH8bIBHaAw5pxko|33t7&P{c*ZoZ{@g@&EpR8+luZ_;^}
z52-AUjBl)nFF>ehjt26xg>O3d&~&r7X`i*Fk6BhTN3;mRU;)+eL+~u+Q!2T*5BN5l
zt1GvNzsuY#G&M6LQP2cA&1@!Qf(!}iN5xq3Lnhq*p%7(C(qxY<1AO^^z|EDsld0CR
z64%;E((PtrNxVm_nGy%#gU{jA7z*CDTY!JfZ%Tg+{%~`9r_DM?c#7HE)<)IvbI4{k
z?b`H%W*E1VsB6tf(y2$uTE=q4yH6Hpkumo_%@JL8nmsZd_wI!S@h(ub5DqC}g$_sG
zKYe)KD&x;P&mw1;iTPy+BCZH6dp`OIh3Lc8EtsZdE>TgXY~7XedIl{B61srlT6SWD
zI7s7k&MZun-NHtIHj96)Y&Z|%zI6=gRt*2r5~D?IKW@iq8jTXpoS2}f%?StgPjr-i
z8^r<G@3%l0Dms?K2s1Ve`W*z!(_o$a(u-s`)D}#OU|RsEM(+c^?jI?pz&m>$6nj6B
zWC&6JJ8BPF2~5y-7VUNv%zVil5Eb==JMrzL+kLHmlRv-bSSC6X9!3=?K~)`8;58{j
z-0$z<yofsIh#)}jYR2I|!k=3hPBt9?h%-deRyycH6N>VPtGa(>O)sgD6vi`wW+{zi
zstIU%$wo$p@d@5ZR~qViizW4T2K1NePB(gcvr8#wOI|0b(^=hZ(J(Z0DcfO(#vC2!
z`BP&8GMMZ0n(nHySnTz%H=_@?=2Cq>qF^@vwQ}h6M<NP=Bj^JWg+=2q-Jd8taGa)3
zM4^x<yq-xEUVq#~;R$-b^f6GC>c!);0fjS417`X*<duwTMbZnhyI`dg8lXISss=68
zb=Dq!E8Rad*!z>WJ!DL=hl?n7#~Yn!$rtaUEeitn5I!W5F~gbb`iBdZ4x_dU=mgth
z$qTxTv%RyVW5oyK)6h`!I5Q)}orzFDG!~=Uh&{Lr+sAEDFQ&4_T^Oj0m;7YVO?Ssn
zI6E0ex2)>)X8<TGY^?K4BPOk9uFG8BQYIaugalprQEhJW!f8SLEk!P{(=dI?wtWxj
z*)zD{{9&Xu>JW};32AoRWwCrejyoJXEuxy^=)!nJ=+JhWnp{8D>h=IRa3St!%NsOl
zc+WRCJ=AfbKtzmk-J}qM2S%uc2X>&@ghjYTE1BPfJ((p5cYm%9Zf8a;{IKuech$@G
z9`BVhQn*V81E|<@5yDI`LV<v{ctT^co&473JF4T+q};)*eU=k#15ZfYAf8T`0q_*Y
z+$TGbZrnN5$4@_?$9_XLC+ZB5BkFbsSCq<R0A#y|+1Om8B<(<_0^kF$&a!Dxokksw
zL^eMeV}4F{3N1?gRvKt;_i~LhK@H$|Msp70RDg3C#oF|ws{Y+AW^vq`EN`vj;3!Tk
z%GI>NiHi-U>zz(uCSVPC+d<d;fMMJTolDYWaWWuu!aed_np)rJc#4NW-b)mmP-kEu
z0Ttdr+zFDJF_5CpvsF<vk)8M?nsmq3Y{-VFBQ{=`8MTQb1blxd{VuNbo5+OBM73b|
zD!NID{ecm_7#yAHY@hJe2||Ql7J(eazfO{ozEu$-(1v3~)=U))&z;mZv84`pcko*A
zy`=^>BLayzK4H>MW1WqZBlb|!TvBKBq+Fm$x2v$^Su~=R>BN|u2YO2sl``^VtTnt)
zd1mka9-7R#7^|XCr?{mH#j0D{UySyBqai3|DBj(CG}p^QLA22%q0lyZ?`&!<)2vl_
zdI62R#9Gp7_#i$3?qLKdv~A;(m8BM*dWUy-!zkR+zt}$BE3nyR-og*nqDR|hdmj84
zQw23)@5Q5W=_JDLEHA6i_xnKT5}DG?Y&MZ`Vsmq!+xXmT1M^a4^rWM>7bUcNQEHJc
zO&!@GTC|9v)ptt9zNP_`YQY&<QGq0C3`6j?*y*bn^ZE9P%o3uQ%(Z&Ax8l>JPlG_3
z0XurqPpq4HM<Y~5+#lwT*Y2T;W(`5|R*+v9h?hHFS{vO}6TLQ@?@>7hHJf9euq>BC
zD9hwIw$pvxnxmpfckjN=Q;`zLi`tprBwz5jFHzQKl!5i~WA0_|3YnG>FEfvxWr9We
zEoZyxSitw#i7?X}JWlL+FcTNHgC~Y}jS*?dc4nYXF${P76e5;C#d0F^t1G58v_AQ@
zC|aQmpxdp7CIdFeF$;dQsq_0F11~woZjhbG;0GaA9yx4Zo7h*a{omGbgBgM=%FuLT
z847Mk4nO?PnyYp|U%WDQ3AMHK25aPEFxz6)9U+ymmi%6UDGGJ3TvohNwDo?!34iu+
z%((TrD4Ow)dnTX#j#KH+k`)5jbgDDw>EUK4NT}b>G=<NE`_9)4v%`Clbw}G(h`L*o
z6%JsJ`O0l7-TM&u9F@n#Jvd8k_@P7o9$0IWACPSrXvyOKq^D>WX@hdi56!!mG*rA@
zp?PXqh)v-0+i#|+f&yjrH*pzMn?+!?AVhxxVEqCJRzH~mQe#swNK?RpG*1d4Td7M?
zXZ->Q**@76)4_>Ly}hi8q^$w*Fz;z-8|C1Er6wl>KOtlmssy%Paj=4XoD_n;>cZ9V
zB`Kb~RIH8ygklhj{H=vz(c?;hKg%34CX1pIkP2td+mB{O>zNs)jiz@QYcm2&KEHkz
zd{4BN-V+5xujh<P1Rr@S5m6jFGE0NPDEV=(e#q?I&PSuc#}Zl3@Bed8hoOG5chr3O
zZS(2=x8Lp^?!7oBkJ2Mnkh{Th&~evKW~bcj5q^}W&Ud-dpy>I>EZP9lq1%w7k5e6(
zU<B1mWzMmRy5`iXo?ZPC6tC!5jno~lv={E+xMq2-70Gk4rQ?;GrE1+BkJ%SmeH2Rf
z_OWcNjZYivci33(b6;T=8-LRT_RL|7X?lu<TUtmYtoTv<LUh<RwNpB-c{jaE-QRw9
z-+!yP0iEjJY`}-zN$Np%Pkjevr^t_@9>zFt0mqu6mJUX_w4e~szTszEc2iE@lka0o
z{xBF_kU17r(D@S%cK2U2_g=hy4#@kWl<92<6gFS}m)x}4KE)Z$*^b)F8ZPK+ozQ4x
z_9}dFH?6Zc>TFx>hQ78!iJ#!v&(g3W54>bfpn3Ws)|z_*RKnv5q0L<C(qK2;w)_w`
z!R|xf1YtKDU8v3^^h|_KSW(BSA!ElE3J>lfV?no3p9nF?`NS*v@b`1eQ@l;H^$^O7
zxjNxy({)8|f4lpGJ6(z}D#6V^9(f7^2c={Jn2cT8I48%Lfe8K;cRr+eI9ymzn6%J(
z6iAQ$+}IIdleY4TT6sqZ%EX>|dAWSl3cy3FSTd0_VR7t}twL@G5L2r=9Z~uS$j~2w
z3Q?I4jXc44dI8E4Ko1j72@m>F{_#RM;_d2>fI|JD2-NR_IP+7Txrk0U4%d7HM?2ko
zbOh&5OmEaoyz!f{sFR$C*cG8z<98!6MdnH_V5GxSYp)a_b49hM;NLy(5wmX*QJ*{N
zOz|F0qcq)u#ORvL4{yU9%9n@sWystlr<S7q6EqkxO}f^0D#3!LdC_u(ZR9**A)_*@
zZ3^{c+XoI9Db(1!UEPNH!Rg&qTCO#<UZD3lhqLS1;do8Xzl)*PGTuwg4nD-+czvEp
z@P%as)Rz*lk@#6e(SwyTM4yVxV-Yx8*7oPZYfAoIbd>y#*eEmBJtPX|@4|!{zQD)*
zEZ+qIvTOb@%j`3MGZ$N2AMdi>g{{rC>fy$J*M*<SY!*d>ZcIav>30`da*oT(?~P$%
z)n=lXj~{ozuyAX0p;uw!9}$L?B-S|O>XNm3*Tow5kYtvq#Dh8e0yEXAEAWy8%1b(%
z3%<_goPM_SZ=XIN@rT?xs{HSlU&UYl^>O>lcIWGpHCoxVbktrmiDTD<lCyTUKpX4u
z`SS5&`U{`T-}+beM_>F~{n7dtkJca8>yJ17ZG8iNZTy>B{}2-}fh{~z>fUg&gZqPg
zV9RnIvZPBPx=pYUTK<ZPR2LP!S1|>TeX_dh9PzqIYZQ%cQ09TbgSFfv>Ym2%yatc)
z&6C0Kh6B%*o|M(b`uf-EyD09e?<R4-AGe}OM{S?NgW9Qm@Lx%Plti_Y(T;{e{W}^=
zMs0L+fSb^_c<sbMArn@BjtAOBaMVjppp)l=w@J4fVf|N=Ru^dWLDG)<a3i~{sg2q?
zI^vO!;_${+d3B(!BKD;0BycV!tqDdPMUNl6`i{*!Rn(7z2{}v-TH`2zKmAxmV|6wj
z4>#A=dfaqvFgjfWm}}75T8%Os_g#u(qL2p^`Vwg5zq7w!@ZC~|+M>}3hx0l$8J*c<
zurSrL^9*F?NwJUJtJWxi&K;UatEi%v&ROBeVCv17oo|~u;)jVB6LI`<<F=X&{7#PV
zNgE>&m@upc5NeLFh)A2KE72TOtfMN0eL<`pLqQ!F1n}_b4a`^4c0GjUj2VY`^d`+~
z;@n&(DSu7JgHb$n;2e4#@Is9u6LS`CgA~s_9_hhp(`FK(pjX>u*nu5Gxas6396}@G
zx2c2F7*|xWURbBdfwL*Oz9vcf@+<vrX-FGk(|VCV-~IbubN68XyBB*;n=fDO9qwWR
zgco~1H21&N=@9n+-|jaDd(Cf2kxv;BOj}KDtUz&xf=h*cwW1HUx1iM}K?YXKAWH~2
z7=~fa43_a91I7;~r0q%wpjSB;1EW&k5m98q&@@%c_&96f?mTUfVO}=$OSmnqhRC#)
zZ)6ICyQ6g%IYDi{wS=vMfiT{!L1SIKW9^JHjV{6UVGPq7cQ%*&cs*$S4@-<ziUuWh
z7oTdUZ@YzHC6V?HCK!)HyHGL%!d)zINw#8~cefgHQE?eBQrDU%nzG^GJ+D~Kb7|sx
z=t0)lEg-$lJig(_H^#yE!8@6I@^)IcAKjtb;_+L4{8o<UiCIJ@g3bjtm+~fqfjS<M
zvk;SI^VA_fU-2~>?Iq*liF4SvtlKH8O*@>Dc?9UA2V>7i7?F--ab1=DYTfV|1c*6(
z6-{g`6FE}&`-!>VveOu7`Q~0N-%M%wW=6|z@740#DJ{R{mgk-B$P4Q}X9t#P*6%yt
zcWnA5*z{r(e#fTY2Ag&>UKB<(ze|c2kVlb+6oe71J!eOJIsH9);{z({lBQ%jk_pBD
zxf*x`X>OV=RB8Fx-^RvLN4pS4Sdw^|zNuA{s#-rN7xgyEg;?-&3L+~oeFOoHB0rL%
zxZQG4TXT0X%yltOG9%a6eFtJKB>n7R%eqes55-2>E$IC`9S<^C>MpUV1zK_Fzm+g>
zjQ1^wv5ep3MfKqbUg70>124?Q*+1?vA`j08T&)wjYW7h^(2GVfhqoa{II`2#0hS;^
z!K2NV4h&(MbYiOAiQ%#{GWXIM2%;g~8BgXz6VD{UY;X~1F4RO=$B^*}&+0zus|zGP
zMd6t#iIxU6$A}l|Bmy0f(dshx(^--7yt`!TPA`Z?G>hnlVh-&T(BK(S_{U=Z`1U0E
z1~}mR*iEUIqtMaoY4c^0<9Iysz%azKDT$s2`^tqug5Rxf+)Pdkh1lHOPx(;veG%M2
ziP<(WD!R(god`|ob^!D;KyJjz1qB4g7LXY)OkCH-TBDegahc~po~<^BQI;c+$7eNr
zT>1xaanh2U2ZS9?BXbaY>uSUP-wMZ!9B}0_pM<;Z)?#y$_hN9`Y&=&Yx>B&xj?ytp
zxPbJ&h_4uFk9k#D{6t9<QBpQOi^dq>DqE707XCWH2zV@RZBIrc3@V$E;D~g@l%0;_
z?1_kKDg`<}?%s&>OgtrpNt#p<%6lE0N)ZBE0Vk!&eQ4tAZ87B)@qOECg#|i#Pte4c
z0@(h3n&Y@+KC3sp$q7sRr61VPhtv3{Ij7NHtn)W&IfM*I&4q~23AycKZU#>6iSuRn
z$Wjr$ljb;aNJp+^!{KOfoxo9o`9Lkonk-=znj;;l3utpYNttm#Y&9XMIql*b%1_s>
zd7C5`T`%3G22154KDsp+bd@buL|ZD^)D<r#It-qfh0V>wIO;A_pJ3tw?>m!T@1|)7
z1$3gA5Z7#vd;&)KKHEIu+N2OBqSF}<g1QA9^xE2gy<75xg&Ge|x`Sw3Q5!q*>Bh-c
zuyAKEfupICDcsqb2KkX!_|bI8nZf`WIzCTknkji^e8X4Wo(QvSf{%Hu-cn)VJKd|u
zaAFEaS<FvLZZelViP6GKpcG;(<-s?NIb1X+)m_p`f1pK~?~eE|%TmDP&#@A-c=-hu
zYeB;gtGfJTUh${0|9IVtZ=znbknP7D_8*VezxXO+|FQn~ll{k^)BfYH>RB|oh}E+=
zX~kW&eTKhkXY}j8PJ2<(t+fZeKeg${>*sqv?mpkmnSHqDOWvw@(Wf~zyo(r(MOS#W
z)}9PTvHX4T+Ix+uZ^X~t(I~nJTbKC#eYJadwD+|6&5zBeFaP-?JkkA&Q}kx<!s$q1
z&P_i%DoyYbm&j=B7#+8lT;nB_(%MHNgm-FC2*+n=3<pym4S;d$@fvV-SBoQqM>=!5
zyLTZpSRh<VSjWo*I&p(W9-X)|X#)g`{7(EM37?obG~F8MZq|wd@~){T((FXE%MGvs
z69xmvB3);tl!RNsy#XAKdEJZQ7>9|3rd=Ii_fGpg`quXbjix7LH-Y@k1XgL(y}`6h
z7+M2v2lDP$Tbh>WL1tD#E=DHN&^EynUW=woEP;jS{wn-hICMhN&qcJbu`tkb1fyUy
zUmfm!yLZ@p`s0h;=lf5lhc&8XI~EFkR9P~iq<_G5wc+=#;Qvvd2t@mAFo3zIqylLV
z&;hp9HKFNSV>iSY%ZNlV>NHG1v2JpK_eg+q0vN5+Rl}LlzNi6ZqIIiGAEmK4x<X{t
z{o*_2vq5iwp7VoA`qY}xG&tZ>U^>ef>`mJ@slVB8o5dAY>$z|u*$@{Tu6tW7zjXjC
z?BlFAyOZ%?7cp4<rb?j(zU@Y*sajUh<LA-&VDt@4?ceM#7}1kKm%f(o(g}F6L*wN@
zcejR3<*XEo7|u~?tCY3E$&9$V6~h6EVy>$Jr#r*^0<g`oHj*(o69*$8VRUk}A~ZG|
z^f%$<=Ld0=j;raflL+a1oQ{*8)vIJ0ku)IJ-Y^;?jY>bS0Mw(iq<@|mN?K8`00~w7
zuYavq)YF&yo2tHEtJl~6y7uUC?dz|<`05e$bAN1_k0ohVZ%+P0wJYikyf|x}ygR(D
z&?BTu_}4ri-_p-?(mo@ijPrK#j;Pqm?Ggqkd2=#~+VASO@A^<Qo%EXTl1BaZm%e&)
z8IQ*FkGg3lstUI(*V;8aSrx66Me*jwFB{YjHh_5(l0Iep=**aTTEYkj`c0ao&gkqi
z8YK~u0iBFyg$H<)09Vp{XS#5GYrfsw-Xh}Y&b6Skzr2G<!DAs+9h#4&6$3u$4}owy
z<nSi%`ZhSHH~jdDjt1^d2Opp%qi4#cnILC{SGqb&U?K2;ycs5KIsu9Dm1|~V%<%F5
z{mVi3(&Xj=E<v3|^pj02yYV%qDo4@o$$z{ob$)qN=Aofk)+uZEqV^cYY$A~#{DQma
z02Y-r8Cu@vW*O>hD>|9|kNZs--_Y3MrZ*f9P-YHn1h74=0UUU)BfYYop1xXI3C}VN
zM9vG&pjn|F{9r_R!h&oh@@F=k2a3@-&=03G!6TiR2Fgu9El~g4vEJx!#)HlP_TFI9
z?POUJ?6W>h;ilfgERCq%v|@q3F$A`Ng)5g~d8wTot~Ase^a&}c>n3dJH)ox}IIX}B
z7$33PWyM1?06XbeJ;lwfb+!ShHv;O7gnCm56?w9zu^>1yDBW(<m0gwXHfqGk4|aQv
zzVi(uO=*uBNiHGenWN)b%9p1l$JkrSvhc3OSahod(SU<R(xo@{*E+s}4#H_*D_RPo
z$%oko$Ov>2#OkOuxQuJekmr-#=!`Nv6reARyGxqOHZJfmTxYnea!R`_GiGI60Y$=8
zuFNX_eK=Z$XK-+e_71Qwka#X5=K_qXuvpu}f(FUs%i%a~4>16@V=@UXx2CtKS&*XI
zmg5nI#W`pN;&_Gsm}*Pm&}g3p>h7mcn6_5b3%K|lLh0ilUjh60`sld%&0h0xZ}*^F
z0I%dmmZVL*2^fLpl)-1F({_XyV9jQ^m~Adjo9x|Dvx!As>_Lg+m(ABld(Go#d(9`i
z(C6oS$Io6q#TaiC@9m%8?Bd&({L%-q_s>@c`%m_d4}L5!c^2Z91D<|su&V2#D?@RC
za+n(yG3GRLsdC2Qv#Z#zJ8Ev!9AY3pxdED^!4>=l-gi5L{DwW2ux%#EeYgr<E3~nP
z9js%#QnH$k;EH9@zAnl!C8;Q^Ln|hOTey>^lTnNp&~%CIbnNY(F1In>T5-?UYBdYk
zZ3PsIG=XZ4n%IncUaFVXnpT82NyF#f?Uec)4iN9}f+yhaxbDluucLMVTR^106W_T}
zUnOV*Py_yw!(rNPY=!Cr9W(Xe>x}x`GdFUKTnR@t$q-V_b);@Ca*IX1<?3$gxjkTd
z%}}<cLBRf62-sr@!3@Uqbnn-iDn0tLTrEBNs%)X^^}*(V72}R*m~#AdBk8;VS~+z#
zxQ(wJcsX2ToKeB+P+>)mU`00VWxeup5ofay6WBRY5JT_weT#Fx+SB>&pW-^OiAuB`
zhUQEewTli7bvTv*L^E%Jz#F3uj9Ll*mHj#UYuWdSI09hO&f{f2PD?$ml$<D4$|M~2
zk|pO-0sC*a)w<$9Ge@N!hhfI7rJ_5J*UN;+rN@u3;NiGN(ULGoQl(ev`Eix9xsi{i
z=S$BnEhbNsY=bkZvAx;g-g+3@mO836gTLHZHuG~0nAegGE^5Kn8SEkL8<^Z0#^NlI
z_RDRaU<on)YD2YGpn#W0tb~)aBKL4$5;3!GJD5-I-aVS#A2OdXp)AG=YW19C`jf)!
zUs(SajoN4SR}*;AIW>VZ^ndGLeYKI%|H0?WPx`+<r~dD7aF&eKchTrnZI6bhwMX^G
zzTz)Xsk#AZ^2u|Yi1BxOFX66r_#<w>oDMPbGN|2~k&2x?hwU|-fBI4htsep=(MBQM
z-UZ^2puZ>F8{m3*v{zBVt9(Ui+bvQM#GU$=p(IZK0MO7iYwEQl;;OiJ8QDcMGNOx2
zk&O7F*>XHbpdn+2&!C#kUVY<hU<uO;bYSlFnkkUK<}fX-*G?+tzbgErZMt;@WPO`a
zO*<3{!$z3-ix}q14!^YS*KA?U)?I3*b=}3@q>DYgjN6Y-n(0;Y_H8$=m!e9&vH{G<
zM!AxC)1v3_wwwXDF$3Tu`d>W&kifwk4RA=l^`&JrOu8;<d4v048&Ri&ZO{}?TIQ>6
zIm)-dZ9gjKfQ3KPhQXs>pA(Y!<T}MOyk4H#E9}bkfRG6Bsv<_8)+;HjLV50#D4Few
zg6UnOJoa}g;Pd&^Z7Me=SAU?5`Rjx1x2oA_{EyplS=`FauljYyZlcka*30E7p^*aG
zbiS}1oTZh_qr)>7S*tx7r0F9LqLrV02g25@@H$X>lE4bsrgEaAZ41o#Y`E32FTJ2t
zw$)5%DnZ4%GRuOVH7!(>4SExqBWR;pKn`aBfvsC_%%*LR3TfM;at`1|V>VF;0W?jS
zM4j`mh{N_Q(h9PNL#<%<rlR8_NA^^@u=okC4XWp+6kPs2o#9;i6`|BdM-mPcFs2Et
zb#vG$X-d+JhsmRj()u+JY5MPL&9LLoGK&mCJ&#i%Y~`mbxih{KjUzU$wcTkouW#l#
z&n3=_%5Sgcue{+`j+)okP<aT0u5X*CJEiES^`El^Z*GDG>$!q&-v$eA<O*J01q(j<
z*^0r)jT#0sfJ;OYr<=0=xL#4?4eeyR#C}A`+FV_&z-+Fno12xlSNb_}C~x25^QyYO
zw$Eu>;jp0M>RMH$SJALEX2B9W!XUsQoVukUY_$@0xk9T2=ZAiBb5o%u;B81P4PlF#
z3UA+LD_mc@3c$2Gp4|k~*+ahkkyY!we(@SD>%QMT+{Z9GbXZ{zg9jRPmFPpKgwrz|
z#q97l-k?+K-?cy~SCxiHHQ+?#<It~5mB~DY*p2Lj0|r)c!mn@q35VyaIN|0wHb~eC
z6{~BjcEV9$h+p-CdYP6ZzY=0%c_m;PGb;g8EmlG-9j}C}z$?KO9%?1t(sY`+^OwX;
zDe%H-%9=0`9PE5+k~7ACU(DZor0HSy&m*l_{*USSy2k-_kOM^CuUu4qBT(KJFXj?E
zv;^ymvx&Kd&4ApP7v#oN&hvqSa%N-$bIbpp4JK)PHt2TZ(Hdpd6A$K+0M3&CeYsxG
z%KyHqfALBF_a~J9)z{VcXlsi>iR1Hl(vJ&DeUkOzmi^}7<&(cRpS^^`>DkMJr+bGv
zfnTVE?>+!z5}Qxi0GbW<oijFt+lX|a4$@btGq=6bC%a2mm~;VIBJdI9HEJJ`k7&U4
z_!w;gNptsYWK^Hot90r=d_vTe&C~%VC40vJj7{o$Q}n$#w$Pi}i!sz5#sNzBr++6-
zWy~UbUv}*;t26#l#*Vzhe*%^s0#?q-h-!E?87(M|PSLtiW1PVyy`XKj)7sVpO~ZBd
zWiccT!GCL@bIqLwzu{)MDcH~@?otB1S>J*`+Zq6TUtKM8F+FIdyFZd$*WIeBWXqI7
zBmHR9r^uFl8*4z%BCO(#B9$iNqJbkS?~mE%18}b&x8pPg@<RsxxIL4xYQ-lQzq2=h
zWADbrf=o+EB`MdGffT?cyEJDo0^OlfQkFfZ4Rr=5w;G~qeN1jGXv+wbO?I&Tn1U>D
z-q{^erym>DH~fyTF|sNe;<z#CMpwR1lIC!X))<5hP2u~#vQsiM!M95epg|7?KzK$+
zu-2WpR9;EMn*^w&c;g15TZ9IoG1IPnD>@?FLCv&DoPx<=O?2Yh22{`BDqwoS>hFZI
z*y0X?vQg(8?PN-219gtiirfy^Ads_eX_S^j{f2LH)xO>T=kq-Ue7X|d92jR0M<J9J
zf)Kq%!(X%|?gCw#HF`37G_Rb#pD@nl2e9pYpTt+&uZMs_J9EszN@274RUsq~Vec*e
zxm^m9WI}2-$sVhSi7D<L%y>-EYvI_^lfU%f<on~G-S3v>q`@D8054@pFTbhQkpk~D
z*437(Rugxfno4)!zxtLeHg*6LZ2&|$utwvZbvBK8r*#(I;M|rQXE(dWwc<@Gw!FII
zM^Kc(cs%GGBCDtkU2t`=Uxj<EpNtRCd%)(+G9UfpL5ReRS@VcQ_MP11f$@ziQ-`NX
zi5VLGxm}t`8R%-6p$Qa(8ZL~0fmhL}!v`&Zr&xgsjp6{vvvnx+td9R|DDvT;Y7{I2
zjR!>q{^)8Ow%WR@wl>{EaY0fN4Ts&E04*8xo6tMTnOBlYt$V2Kx)asV#_xKf$}+{*
zG0*uC8Xfwn!!d`4?;S~juxC=8tizs!X^fCXtC=6higZUEm$;FNr8a~vu>Z|C79hqI
zH%o?+ZfUDFxA1odRzq>s<P;;OVXP}<Y4ov={<+nwD7WF&_l9+{Lg!sHmit~EtM7kV
zQM>a^uE)XW7O^sQiG>ch<iU9!UFQln8g)DzB*0Ly&l>ge-^{XZ8b?Xd(#(b`vzpjZ
z<K7UNDM!3{({3$27{Qcy$E_XDq;fuM)Ej0|ZeBRvurwSPoo`e5y?J;0U=p~{W&Pu~
z|C92+MMZz}$p1DTJ<iJi>R&znB>(%9%KyHJ`vbVFlZC(9KF2?`7Co!gXBq;2k_!?c
znI;j;$O7k<+IjI-`#VZ&k2-Oi9f6t{FCiQCVHDG)qS0_+3HoYN^{YDY!0nb{Gqr~X
zJJ82n0*Ue)D`8a6ocs7DCqy~<4|vFd{Tk{g^m~ugI=URG^`Cz^CJ$>Y#~#uF$NXoo
z&Jt8Z?Zc&L98J`vy8LCu)JIVb*4FV=FTGlqzo572PjwZim=`CBy1yL%Y?`K0=)P)S
z+6^QCzG)LoPA`Lr5L{@>6-EENq=?2aKe}CZvBi#>94RQyVt&a07o<3loyWkSIdk4G
zSos-c0<uv$UGf<j))2Wj(E=*B<ZI%!#rv0im<9AdVAH-8I*w;npo<P&41-qzYi~;I
zeZXBc*UkO@D`YQ9Ch$GXU`4o}Ez@)`zs_6|OY5WpRr}@nXur*aK$hBPlr}eE2kf}z
ze+8>wvXR;<s$QWmBc*bc1FL1H)vH+H{!$<R1@=uC>D^8m&$F*S(*0r(@ps)e`B!@q
z-4@taB{Y<fb(h`BV8`WtL;tbtjuR0PC+MsNq_DHOnf^MW8wEdtLnXc*Z*Jn%Pv1=g
z&d=I_qxtg1!H*@cOXbX_=DVTY+FsevR#qgDnX-4ifl=wBU(rJnEnKJ^Uf}*Fy<OKa
zsSFnM9V)l%jH&iI3H{;Dt8Kq>p0wjZ7mAc<!f=X4NxRuiPyr!*XJN&Xe%X51l|=8h
z8w3MxmS%>dR=QC#KayT_T57MC%i9eo=0uCQnHv6JSja>>q+x+!)G4gbFD+L1u_kjK
zZyE?<K*{SCKvJt>ay49Z8qBu1)9`v{<ck_~75`>Bg>pE6tkx9N2a2XCts5@lUa3S7
zstUWZ=E_)s8AfNnnMRIJ0fR6(&TI;boZ);FlanH0qbSEhrqv}#8X1RGNm?lu%Ev@m
z@&3<DXT;9Z9LUZ@S>adgiaUluM(Nm6MDK%SxVodo>MIiCXcGc=I!1%SyK*+gg#4o-
z(8VW-B_h)Zwx1eIlWnbWbSuT25^4ynilgo*gw7x$bcIy_B)2<LqRD3sZ<FeHanW$Q
zFdhQNKp^|+SNQWKUDUJ%HOZz*wu==;1igzh?A$DxZ_VEVqPrc%!uZUIfT~QIjA|VP
z#G-BJmZu$Yh$*E_4Aj=Z6wQp#s1`6(R6apb6mCB&s8Sk+FRkJtZd2Rh9}C&yd;BN5
z^)b^e=0ra@*<y0}KIoQHzJ3_G1?|mCx0s5A>DIIWH?#Rz+G&z(trO+q1`W}w)(glM
z?^B7NLKhRDTd3~!=@vf;&@BQTB3lShw&;b7&?IbrDf>qyU;h*M{};&rkK$h^l!fr@
ze)9h>*6UwA%E<p8J%0S?ll=eB>Hoiw1pj^tgntS7|Iyw*UhlnlvNu%`APN=dlQ&Ut
z#U>XS<kcIg#_x&e?1d5_4KlO5c}5boyoE96N^g%j;3h5(bx|9Gk~B7A$q{NHWX5=*
z?zX`;ZD>(4eKsK-WwXYrO+s>(9?C+}jO7$aG9j@C?g$eU>Q9_dws^~BsS-tO<#g9&
zWQ83kBoqraY5gch>ru*i99gY;I>(ec8ShCl0@f*9j>Wl{sac1|y@Db-jeCVT=oqG;
zQ&1h*-b*c_25hd{m~$AAT(2Yo>vLkIC7l>dcWKgx-K^(1>1%65ZWT7oXr4yLSA$@q
z)RJphG>YzrqTy7wjB@RCfKvgRL6Zkqg|Td$Bh~6IU`%n??Q99v7ZGO#Sk83FQw;Yo
zJ<kNz7O$xzl;oRrO{Xj@uh@tZ0+326MgK93R%${XlY#Qvcr-vMyK2WHA~s0hQ%>8d
zFJEMVL?K9R^jn7hj2N<3JUV@f5o;;+9yE<t4XpSNWG0VCiOCzJ6SI4-Md$x<xVb3^
z$Ult48>a;>->;I(sfB5dwR2RY{xigf*nRUHt-v`8R@}Yef$=s9)${fi#Ryh?6C{{_
zYHGq)V*Hm$KHqQgG%=UuH@Gt~u=V=wFYo%*`mK7_Ohgc8icwqzdRBnZ2?x=|N__J^
zgKwA3gj#~XztD}Fb0g&72s%Vi&UDilqt>JaFdCa&WpjVlxAsd}SL9*G?E?eWcmJQ1
zE!<E)y`gLbaQ3(3cW~o))8z=l>$fl=)%u%WYjFLJ@G-r;r3;CyuwpB$K>2<((dDC2
zD;Xo3VXnuwx9aDb(jEBO>R)Le(b849=eoF#7HtQFI|k!n$DJv{g#5%f+&bof+OX}S
zQ=KrPn~>puEA9@iDte`#@ze#-O^Y@#95Q{|t<jxWT!f#Y#hdt$-C6_wgDsh>LRa=|
z4O0a*SGz^6II$GfmaY7<QqWGOj%^1J)3n6b{(pAVKk&s?z?_%jd)lmoBoXQmZ>0mT
z)BM1l&Q8&OA8pMN<F7;k$GIOBb)$Kys0$Q3vLY^%KD5-(CCe43=){|4#qI@ry|oEI
zQ@7klG4I;Wpgkd(na%GJSlUwFm^5HSTBhpI<@h?n2*9;MiBD$sL%PJgxOrB1r=U@7
z$R~!~iIIu1p2Rv}M_}mN5}JTP)zwZoB=j`jsF}s#E9^kFo|Ql5uKM=`5I}t2@-16J
z$xLuGjfP|5{-(STCaDf4ovS*x<@;tBV1Zp}-lDjX?{-B?t*-Aepiu2atob(!!4`}x
z^oGo-uM5V<2}K%e-I^U4oDCrB(OCq{{X5T-{yy6x#u(a2IGT6gAUH`wzG`N{H8F9;
zeK%d<G|<0WOAj^~XTZHSF*kd&6GM57f=jWFyxvF+z1M~|Bct%!R?-=c!#!(4ylz?)
z-wD+-p#5M68ELc#)*JiJG~F)%ysXYD+RFbT>$!unCp4hDSgo&~)q2hJEIAo_fdGnT
z%1l&!r-T))0X;#isEdj^)2={O{it3JL=T&LrDv}1jJc;X%124(9a)GDb&C;z&kv6f
z+^IhaM$`>XWH92wB1Jum6y<=VJ%}CIgw879Fkvl5WiWfXMm72N;nIfX&SwVwP0|NW
z|K;#lWGtA`s7PeM37TwD^Fy-V%Ci&N<3ql#s2O?4=JoL~pY=-Ci(>}RuoK3yOJOz5
z|KMI^TTK(**(1qiyuITwG&KjpTqGq6cylwf4+}^H2H~NRY_dU+WAnm{UM_n^VAvr>
z&)`Ccd6^StA$}FGaC3q!W}OPTt@$D6YO@>S)~tn}3O92h@L+|o&^Te{n?cEC!bz#N
zr>0`DJ2?Pk6!!*~aWm?6x5L+pC*~gM2$5F(I=DXnE8l&{Zp*j_^YmJ90sbr8i1c+w
z1>l(Q`zuxl@B|MnJR?}5!hT$AX7N+f|5>R^e)ylygYtV{T9X!wYtkU_Gr0x}Nc0Jk
zg=oi+=IU8_Jn1YX6l+@p9a3i`dN6Wl`R0;Dd$y@XM0{QY=#Xv18(|R_g0}611%Xls
z=ykGmf&RoaM)-LFiw}Pi)@Y=YmXQa`_WU8W2i-}pUnV6Q0;Gg*C-D`H5MNPZ*FIS+
zRM-u<aH?%|n+PL_^lyTL@^nnJZljn*f!tgzx5{rQJ_$+|gCgSrs}9MbBjqtr_C8$P
z7Fktc8dsJ{`+)aY=(CC1HqPK1fZq&>x7)*^h4PZ^<|!FsYf(Uh$v8jWG{TH|2XX&&
ze1?)H_;G}+r*+$>P<?7nv6s3GdQTfzo67R1rAuhcWUeor(GOxuOO!W`es{T7mo<Z0
zM`ST2A}nh@x#Wd0xDC*s;SKjo6QS9o3cOinZ}-q(=!Ijxy&mcznj-wuE5YM71?ZRe
zUeALmz&``CU<%N$=`-@erhLN97DzjKnZUl}taid$Pb1Ap=S0mwaRoXt+FPfr$%N^H
zCC#L&)bxmXTAR)}(+RCMtNEy?qgt?ODzDZ%b4khzw(O#q2pMACY!a+#6J$u!z2`^+
z`k=F<4dF7gWYUD?hrO3OHKGEwNdBC?FpU;!FY~h#JeT`}epRRNminBFu<|NL)7Y-X
z1Uegu{Jrz4edIBy6F~YJG20gJMNb2jVKAm?sGJ3>n;F)w>F_7Ni-yf~<YzIX$Xepk
z5?{c>V0<M`U{x^zA}hl{hT02v`kP@J-Rh7GcexqP5Nay5159J~+&VJNrlB)yezA=a
z%4mNj84||}{sT-;L#?Av&4t>wl-WjM(cA0ZH{5LZ$a1v;{o5)a=l^MCY9XXHOw+Vu
zPYH=oU>5YqLgyuQu8e*^)QQq(<_Oy=JdNr4A{B6R^f_?RP{KmBxX*!#C3yl^5RM=5
z!jSl49yd-i%=~-JO<DBFmted#Cw0+8?Xn6}mE26(RVv?k(wtX9;gpHtFR-rrTK77o
zW2z*MIisPS+FPyA&7Wa*82&LP1D(;;IAG46j#+N0rMqOcj>?zgLCL(NT(_Ya%4Px8
z<%8nGdPYRJI~fls`0sBvdW!%MmKN4`oOyf6vmIBa&y<zd*}&(QXTCWR62l)1upKI&
zogkeE*p^xrtp}#ezO2E?fE;{g7)*RGIS!K9H0EV;lFxeEB(nh;w_~}@E_=5m><yBB
zRR-!(lsvOza~2|UQ^G|NA`D9u#gW0titTat#>SKJoE`b&;<#OKSSO0&#s>#Ral3Hj
z;GIAZ!>(cQFC9e6Ms&M?|2&O=othvG!V%;HiaF2~(M&ElTWyM`M)p{`@JS{*v*IJS
z-W<8oi@&D5LEmd{$!g;6on7)xR=Q~3OcstOC&~&rJhsh@13e%vOo%tuw_H@~K{UBt
z3brwddzyDN*N)^w_2ak`cM32JkP5Xz<l8}eXiKfGUX<_JGt)1zPVc<E0X1xjQ{!5a
z+0dUhK}9V)#G;3XF?xD`)LUU@mv22BS^R|YPh9FR-mS#s+S6tB(Oa)z;n2BjVx-w3
zp_5e}t2zg?MBz~9-upQ__w3x`><x@HPfx_%#-k=~uX6dQJqB*Bc|qqkZ9)>ba7`pI
zYExbKN9%|&8_1O=LwZkF4J;3_7ukpwKn}UZo5Hi{ec&{0zB{0FUKJQZ>44(cjxLPG
z)SurGi~aeSABkm{Ub2GFXT#&Kw_GsSgl`>2x<UrJ(dZPPV3=;LPjHr^F2X%Z?kQg7
zk~mTrhj$vTz2#+dl49)MdVWs?gQvTFl&m^n(~dt>_sjjnAwVe1hXGbeRvjo3XzDGO
zY~{fDn=^sZ8enu?=I#QKuRx<!2Qm+(g}gZyy2!t?rYmYSbV7@-z8Tnbtz|%EYHXjZ
zY0%N6C5p*L{Wqwd!iz#$_iz3><a$&1%nAWAkr|k?3z##uq`7k>WAO<zz!M0h@_jV`
ztGAgGgje6|n6fhVsu^S}_0VgE{yPh~BCN;!HQuRY7Jb>I-ZP6|vRL5t!1!PTMam%<
z)__e<MAMy|RsOT8k$v@|;-W*C<Lsj1h=teOJkg4EvlZKQaipd3DH^1gE-E;c6_pXo
z<oUD$UOvbf<oY{5ZxAA7LH^)E60D1AeQPPm0poPe!y+ug_}-62n8pA*$)=6jX1w)s
zIuzerAfP%DIu%msIm3unj90_nq&rTA-6ZbRf*O@Qn{vV^tgWk)(Mssf)n+J9+GarH
zp7dO}G&!fJWQswAZI|8f-VALtHExZR=K$lmgyd8MtkCp@`UOUcHb_MQQJHF-XvKLR
zsLf3}JB)Y8iq;H^_!gPml&l^OFfb3<AB&7Iv0CG6#Z{C)<gSfn+LDHnJB2qR+Gfy`
ztO~>?chj3BziQ2}v0^i8t=MF9!V}nF=$^@@rn5CHc_XBwSex!>o!vC~d1fyv-i*4U
zdJ$hKwACl4H~})ho4&a*zP=UNwDvW$9);n}W4sHWHP_+4x>n=CS9USfJ0^{-Rfo}>
z^@F-=U+Gq?Y^x|esJ^e=Ck8umjbYn8bg3KF)b0sI$MI+f`Y@Bnoi}u+HY}PmRPAYt
zhuJnL%ffnv&m5_9G5gOPH<;AelP+s)C8AWM*06X(htM6+hU*2ehF(lZnrNyWlJH=i
z16iaKsBNrJD_e{pLdTn$*IAP_3*RaK=faX{1AF%dx_36}y`y=%y>%vE*;lJL;^`A0
z22P_5jH$%jT~aQxnG<6RE2!r*q*`{WY<;838t_;fAB>$SFIa^LEH^`5dFD8{Tf+5i
zgY1XGT4D}5=^LBq@#Y)jE@NB8t<BzD<mJd9U^AIKV7Qq?q%~Y)slLs!4Jv5GyAd>(
z-*EQw8}<Y$w40|B2ly(?vl@3ci|L5f;i{8vbXq_(m|GS^L(fb><-!xkFOS;|1C;>2
zkrigR1iZE8m+&*cCxwv+Y)*;;r}r5~88#cQ4nKe#ao}N?$#fh?9pjiiomPeS6Whtn
zxt~~RP5D}cdcbWbp5O+*oVlGGoDhC9494$51kbFT#pbY8`3ue}XRpzjjPufz+ZN>;
z&6ex1>6zH0zJAp_(=-KLPUgDK=yO6}V2aMdg|9L}yY1isSWx@Ummm>%C4BjL{w#C(
znRaEd_ZoRaTFM?DQ*KGb76pUxNiMyj?&Pq!@C9L-<96i%u1v;ecu^lCvMe@!W$$%F
zOAmVCTBP;RHSDYw(hiwhn4dO$R)nYG=uXyk<1+3igr|0AoUpwnGlaOC3TS}xcja!-
z@XP`N?6*-v(_!3BP7<`P9i37HhO}@?%ywa&AF+017TmiSy1!eRW-M2P&dyD_1c1-V
zf5Mf^m^<4Zb&`{lc$6}L(Bzjf%8|2uHe1g?M{-HiM`5?fG<U_nvBaHV@0rm-W?zbz
z1)CxwbvZxs4}ygi>7`5>(`UaMR&p-{THY-nrwBy%Dfs3eBmU?97*BCHhYl#j<I$jt
zrkHDRj3@0^qu8ox;9)ox=#2bdkLzE3nTh}T#aCa|KgIw2Gv@!QSK*(>pVD}7KEU0h
z{U^;=hkH+6zIeKSy#MmW-r-R$j%QJ+*?~O6JJ*ZpsV&SYT%Oy=3Pa9xOpwA(!nTgt
z2<0L^Wz_AWY<NsDxJHBP1UTQgd!tfplJH}<b`7`e7GAbm-MEL2nCYa4tj8`p)mW2C
z5fmOnfALM~YB1`MGbrMy-`9S8isCm8!{`}jS|gA?<#ys}`b(<1Xv`a(!WvTlQtTi{
z1~75i6!cfv2S@2s`$N?fjfBgKIAvb&&!$P=a^|9vnn?7Kh$r_3l@zduqco<zjM|t-
zbC?MNni(;neKL+<B57`0Fh{_|bWgPWi|(B;R0;@bCcI{Y<>lvx<0V$f$R7D5ihyad
zm5B&#*=Ts|CcA4>h!cB;-n=)=QMG`kpp!?BFSS>LNf&^Rk%rKFy6=uIImTKacQi~>
z9JP&D){l+GsCIxM_Kzr^j#(7{6sqWg){Wr1QPe@7X`f{i5<tCJJ4;+K3^o(TqL~X=
zmWwf>Z*neFk7m-gGT|r+!j;Vka+V2W5sKhoTAm)~L6<Ty8|ZI%3bfZ`lJ55tRHr9z
z<Bs;zRqqNld@vi#_)?#*hcNDBT-1UU6xbZiy3PJ2qHPJ&v})^wjBOYRY4CkHAOH`0
ziCVBa2g8=u=uh*REUGil;KO5HhV2%0Z{EhQaA%DA**PeOTys3p<bB)cv=}&?!1QP2
zrDpWnT12dev?E0#blzCsQs>)PVgO4sI@0sCx_X{Bn#*>0b8{b9ff1iz7#MhrO;yf?
z6~@&W;BX00X?Yo5*T^xkRQ{}SUWO(TVS9$YBbic-#<_k#Qgy7BOC;ruw{(ul5p2eY
zHCNNMlQHBjs+%>j&ul1?MNi_b_qg6>v&>p;-L^xI-0qswQuOdF6_;VMmvtURO*;5U
zTd#Wr#g|o!ouZc2b;y+Bnrw*-GDf3Obi?^|?DrXV2hhfjAJ>c6qwS(I1<MmC>TO=2
ziSMD)GhIiss|{HAdALoZq)A-q+UL&cD+AOcN{vQK+o<Kt`ik4z+~lE^Hz{g$7nY`<
z4-a5d`J54xfWj6n^=^Hhwf>krqsFDRRkOC2<s6o?{(+p__xmRU=-tTx7T7uaPui#j
z)DD0`^(NfP?F@Np5pOAk-eX|~nvxMDSw0PUGr_K;L5}1Otn^=i<+@fFt@kfsj`}Yj
zDN|kA^^#%jmJ6Bel@H8hKM+@}ika2?>KjJ2fZv>xTb;=dyD)Cy_uCfW{2Z?BOdv27
zW@QwKBmIgqx}cDjat+Q3s*_vE-D_B9Dm8QT#Iqul(&9YoY~W#bm7AN7Kb_H+nO*pB
z+~XWwnV*p)*$_>M7q46ANv#I%f;z9qf{ZA$@(Zz|BKa!NQ{*!u<Qrjt+I;xDoX_WK
zIZddtvp$7|6gf+QjfB$k@{eA{EDve7$$R1?=UDp(VJh!`=HLAc-&Oqxie*ni&h#P9
zVm$h84AaBm#8jePVMPr4`A!~pR7^VsxP<Yy4ag-tniwuOi~gA~u09DYfF;>qO=l4L
zbae7s2m>yrX0jn4mDPi^8z5a%o4jXpx!rQt@!5NccbB0Xl)S0%l`1FAXl>nb(-lOe
zEzkfKiLgKE-~Pb*zvRhxN5{M0+&BMU{mc5s7uo!O>tAhr(*J$Z{}m+x+)z*d?|;)|
z^nd>g7Z5ZpiK;(D7jPro?f^G9ifes-+PuQ2|02r7ecq3G{h@miR2ha7#`u!GgTYm`
z8(+erwGt7q>r~%l(RH<zFw<!hkjW&<z!uRZTXRjj^4;hxrFU7el8&<1l4Y-LifFox
zYta~<bQ3(aDyXs?P;gXj`$MN}#}<&*rH0W9Xg|rSl+<g02{z`gCxnUmp)}8qP1Io<
zcdr{QRT{;o(WujfvlE}S%CZ;97|ovSu-J_BT8Q2froZ{oxgx`OgmEBIFkuR(-cZMb
zZZjT@D9@m12p&Umw1T3SvH?s`l#azn*jc_yCjl4tjBOR^f!Ici3?fMy16_<qlrSqv
z(+S`?PMuDC1e>KbilYnF8C+2d_C!%d##N3_65`~%a9RBH`L3NM68yE2_9<d)fcruv
zgXEz`2wcmdvp&*XEX&zxT+$gHmU(@%RlV-A$iybKHcQ3kS}X=3SN-(Y5otBVqo|bz
z6CP#3X5=vi<qLB#%}{6>uY1uRi~_HpW-Rzt8<|Jf`-Jfgo*7a&@~ghSYwKvNOG<!O
zBb0X{si+b$Sy9*70_#{nNBb8&8Lc<dUrC+-uh;W8cUkl-6=l!Td||lY%R&vc?h<%h
zXQL?TjbZNKw>DKdz#d?Mh9a+L{V?hd&!X(ZE5;Z7&>!(7Ja#`fH>vG@xuS;Q7nupW
zp|a|2{>{ahUR|X0%72&AtFd|GFZl+q`3nB$<|#AF@i4`50m&h}NxFl6sk}1nrNbqT
zuVH_VVhS#h^3x@)Ayg$b@)uI=a9>dYxNY&#Eu>qd=AhdlR2XQzI4&HaQD-nNjr2Oh
zO{WAK5iny=R@*eb6^nH_u@@e7{rC!dn{*~Ngcuw;ZQP0D!9Wd0<Ff$^qfNd$9i=sa
zGpKzwNMkgZ9qF`@qjd-UARhlEH8WuxGpM=sO(TE_?j(9+w~!pG*b-s67j6nY^}ryS
zO`+eJB{I?-J-MxT9KkD`vo$@@DD*G!CIJ3OLq}8|sZ}ly?}yOqD-4ON-_t)gP)L*A
z8ee*c^7W;c;GV~PqZgFhqa}t`^}-t9DK%czg`iPg>20n(<M=!dH1$^3l3S&R;~Iyi
z6DAYh?g(k0(@0-YCG25ZhTcLsZ3yB7hgVM1ysjZ$tJQ`b!VS#j6?ckpp`lVerCMXY
zx|$ej<+uhY1&2>5g8*x8k27^NgkAU_5cN}JFf3Bl2uE*mribEzG&9PKMv)331kt7k
zQbt>1c#N%?i4wzIEbLe|Nup_JTh`9j$s}nLMFmqL58~`>_&;^Bpt}n|XEOIJ#3U|`
zrJ7n|Z<UbKHJW*IKn+FT9<3^qS)3@`A<pU?xN&f1T$5Sjnyv^fx>mJflY*O7cDDbu
zyi!ptLQb*By*0O(ih&(Du#sP==EKe`UlzW952*oL#%>46H1Y8KDo!}-fT8b(?iML~
zMcexTJDcj^VJP#_fU@RZCXvnuK7P(8Cs*h6qpTv0$HxW^kU^bNn%74AQLu8A;9y8C
zJ9=7u>lZ(*ySBAyoPvd9$0d}s$KL%X;P-7WJNBd|TDYlx+r6%~ep~yk^_x)nw$v-W
zgdw<1bP((tMqS`TJ4Ry%I4sSTEz&M1QTq&iW$0#!#un8c#(B#i9LM^v*(BG4OSGd%
zw0|z$xyTi=IzEGo6-8b*$bUWT4idZx>-cDsl(MaKsPQ)219q5ScnJjV`6~rf4pu{Z
zRAL@nxaggz<H0a#KgULnZ5!BW?bPGVuO$Gw6O^hvGPw;#ltG2*2^EV0N1mZuC0Q<v
zWuj$sWz@-&O~w;LW1A^ind%fNx_##X%^Dhq>4G=9Zmd(@hQ1XgoKEyDov_mr8@%lE
zj?mkIOAstxXkFtn{sBOLhj1+Vi+M4fc4>=SE{nTysjZ%jV%~T0HO7C^X^2k-NJ_D-
zH$X8j^dW&|nX2pTGWLdzmJ?VoWJi01bC$GW%umi>8>AOE6;))aQ=obBZ|MsknEr|E
z%oOZ_#e*XAs=FMg)8IHu1JBNYVCSB?J8Z@%!QxclUH@bdc<LJ*IV<4#S%4{75T|8N
z_EH?qk_*gYGnRzD5qS-BPUUm!nEB3|VPHKs8E*58cxM_{>Im2pBrqqF5uM*}+ALGU
zvM{q8Az(!<iLg~NMhVf9^bBrP?aA1l2c3L>_1@C)l~>RKJED*zcpMmQT{+X-o2zJ~
zGkjQLr;|cOE(?@7Pz>9E6z3`i$nZBE=xli_eE4g5B^|#h+tYJEBNWXgV>(3vFf>s?
zQclx@O`k>mQ`>jZn-;<ZNZO*Wrk>!C?0B$ilVJzBt7L41(sV!7pd71%Ms$Nta&jYt
z+`D={aHWf+bs{2{)hb)8(5X3WdP%>^vf8z1vnwnFH3t2UKrV+<8FSRIF>1~hRa&Cr
zG&JcPn9C^joxNz1aPcIfqqM;SQNB~<EKyoE<&ZT&5hm8|sA^ck<ndZtwye3H83JQD
z8dGE`C~$l-=>Bs!QrG9DYhhCjchAq9^SYIiG7ex=OzL*&9KKzjS8SWZ%xNJ<P%(WJ
zX4Q6voOKGz2AFEn)KQ7QA$P~}<9!q#FL^n|&EU5Kqlc<mS?$+(F(7y7(P?a_sMu(x
z2&E)R7skt5cf&S;VtI4o5V~n$qqcj(y<XkX5!hS42~<uZS%BcJ%C%1ayCehGEU4pm
zYNk(W^?w2Vf6kn>&=6p{{(qzX)#LT7|L?{Z>!0-hpY;EKa+Up)moNVN^>@3+d(H0-
zcc1R>y*SP(?}J4iK#d;|^L<j>|4S+E-;aKsvG(apz|;K~2m3GfmWbD}=+-poc2C{=
zgJ|LZ?E3gStK_L_SSg=Tx*O4GR-*`fq1of6%u-I-j#{LR>8f^J1a*q4SkrD4Pg>I`
z^;YXr7jG0>a#qfue`l=c6{>4$H)|fC2V%Ree~0>xq(he8b`3d2^OSD0y3GTzW~Zzi
z8k4^;L#@>x=2VpiDBL|=dNfUq;RJ7=In8orvEQ=lzh_{JsR6vl5S0xw__-cq28XC?
zn<ACvu#7E2Yb5Dmt&yfqP#ej7>_S|Q$cKyL;3fUb=XsUC_Ws2%^csELyh0ZPdf4Gs
zXy@w=VYK;f+>hHAwo~>vlzMBUFV+KIQ$rdYv{>HC!i;)@37o4}6!Q@eT?%tG$}45V
zJ(J+HjK-XXCJr6%y{dzRD&grM>~6Hef%(zesQQcXnxr6T)FssKZ)c*Q*Uuua$Io{O
zlBxT7PN@z{@{+bOuMPKL4#a$0*8OmkRDhJ(7PS$B#RcxcmY+8+>b56c0~GE`z3Nf>
zte14g8t_C%S8hYkbSbpg$n8aA4e{1lLrldRfi)pe$?9scntvMI5la5Kl=>XK{q^u*
z&;a%`9(@Qcf9`I2XLY~r{QR2#T;smZf`EPOh5#WefSl^TG|=kwG6kQhn&>lkwFM|J
z0_Ypw_%fb)ZK(-8w-&Vb;C7GdH4z)9=9e(Bz~7_Mz}>9{5LI-{V3U88FX*HfxPl9&
z)g2nmM%%gYq;S0*n>Ov%%iet6v=yZ>B>_MDpn|{ABUDJT_Z#URCtc%4Ze65J7mx>s
zo8QQ&i~WzVVlWMamEeB!pG+tIHBBe>E-B(TW^415b@m(vG~un`Ayk^2_7k}9M|9sm
zOHOg)`Uu7&M@x%KG<I+%8WHWAZalWTl_EE{yxpKJ7v4l>V;V^qAt=-Ztjk$tP?P^6
zmKieA1D*qTb*!T>bW23y_-995(><c4ZxO&+)85l}+Rk7CgWFNNrGCW}KoKKS8Yb5*
zKS{VfWkX;pas_u(TZjK>pVo{Dbk|x*Ht;bss4Zgvig)_<U@}(Q+v@XwcYA1)2;1lQ
z7%C-VFXJPshNc%GC|WoJ6R!7!fkHn40DbaM96#5pFtMNmG&+rve(?xo?2M1wE+)62
zzqf(RT(LVoloicfd}smlJKeJ80oc{Kz<#p0xtGO_7w(VFp9Uku5=ZTu+af2oFrQ3w
zKAGk?)0{kd+|NcQgl@m5u}(OQ5mO5u*$<<L_&x8}*0g%Nlr>V&k?ey&yiS|Hbf1(u
zu$z@Cede`kI`l0ZU*!VT#qu&#BjHM;Bv_sD)bgK6(zKtNtkTo#1a1ra=@Vj`ugfNQ
zXmFCkDP8>usJMKtyn@*&1XeCL%~aCncQwQCR*dygN5{Lz`%jv?M@M^y$0dHY|NPa#
z{@zikaLOy8X{|ipJwDw3XY<>`y}gifcK-;TK7R$xeRHta{9*t2S@ZR)S9^!e*Dv;8
zzGxo2`~ki{fBAH;`SQiVkL87CEYP2Y()MO1Xra0p%3-I=UV@(BS{%jQlU<gxe4oTu
zoc0Kpiu#_-Tsiw=mR7*^R(F0|xv7?Nzn_iXBG!4C<wGxlCY%A*JkOc&N$kn0UGq~}
z(tz%1-{(a&n-+=6TKUaqz!$RG)B3KD);4cWvpaWOK7;{Y&{X98*?;Z#jm*T{e2NUf
zrrUo#`r^?>F8{;hFF)mf_*2?{-6L{K9@rP3?8umvd4W8~!5(b=C(mAX_m2MlW6qE)
zTm}GXU#e}^fb57T-2^FOz}QTYUP)?<p5yfc9oQUIP=+k<fJq8$b;VZPa%gU1Navqa
z|GNfL+b8iBeEn|{sQ%yoR)S!^)e+nDF^sL`l@A9=kA$nexHlNxRMgS_?sJ5D)J|$T
z-dRnlje4n$aTA8HoR6!Izo@ACU;p~ooR&v~n@#msp@CrQEaT=E(BvFHoF+$K0sRkD
zWW|Y8eo*m@D&^$}51#%hG11tofqtN+a&+}f0X4PyYxnt6Q)PPxn|TodV;6PZL$66$
z98Tw*klM+7CVeulV;lwi+#tRfp)mGxjA7P7kr-Xd2%uE9C)nNsfq(a{4l2#a4-<!R
zClb*%DV5`qdIh)pSAT&oZ`fc#9jJr9;5&lL`5_Nj#(}bnkz9EO2*{WT(u_z&t3bR4
zOL<>W83Kwj@M?l7{W_C2mV>VMG>V|W!ip$bH=IP-UsW+JEk$XDNlx0rWa>cBPj5Cc
zdISyc8;l|#h6s-k^$g<Ueq!Onz|#=QdE$)DqY)B?0o?Db)(L^VAp-{drlIzde$s>6
zv6ti4%nOlMS8YeF(=W_STM0MJI*jg(v#+G)bv_&2_it*PMe`~_R1d;&=9#)GB}jF{
z=eb%{r8jAE+KX0yqaOaI_Fa<DC6gbQi^y!D*6fN>9`n$nRGW2~c+;>Vu$=Hmv(ra9
zxw2kUsT$NEl2o>bDwtp1RUg@Wx+^B(uRmIJxk+uhUz3L!u#Gh-9oVVkq>G}c6!~(T
zk`YPcuW8|M$)Kjr?UK@xVc9W~HVs>jt#xA)^yw&*JRseJ8+|5f0tB0J6r0`eO&bZn
zVA3(8IFKxuGY)Mo?<^z*jN51Z<kyMQ-3Mc9Yl})JZio5P`b8&p_sC}>a~4|~wv;7<
zz_4SSju6Gty!?_8;pcg4C&OnomyopQ3md~wExecgHKO}WlzbzBxHs_P0N%bFentkI
zIl_u7KUlf9DDp>vh(#9J+<fIIDFMj*E`X6KH7#L}3QV(a#S52yy53i0y&cWv<U29L
z1o-GWSlrEDzRF>^itLW9q-?GIOfAw{GB3GJ-;s;l_Qz!E9>6O-Gb|hR6dOjK_<qtH
zWnv~5=uA{MjxLQ3Q|FH+EhI8;aK7{h{i?{%fojjwIG?o~S$wiZYWpI7&FT%KWRwkj
zTwZe#iAfjGi!MkLz{-MPV)XoG&0E=L(xwNhn1oB>xT(k0N99Vd=^YvS>B|d|{mI&U
z;#58nQ^><LlUT%8S-$r)LHiG<V{(>JqT6!>UY=tvH0(PJxG1(iLd(T6Coz>tp&L%A
z!tFT5!miG=<W&bc$II3njd2Y9`PwT=e05+2qPjiTph<H-9ANx4V0_FWe3=X;sl3b3
ztvF@T-pQn!N927pq)bjv(aIDJ04T!^@ndB9Lou-_#~fa*TXEc%33Bj;X$^<aDhVxK
zFJrl-E4C$KU~+mydYsvdf$e6d7`kKxmC4DJfx2d;>7DQ2Y)pWR4H8`D;GEEj^!o8B
zN#Qa(p&(}W9kLJwn2xC#h(tm(R=nqtp3&kc9tZecYc-YOjx*yUacq|Tl_?tV0mdlz
zcQBeV<KL}<KI!BW`qqa`YVcO2UD<>k3!dB<=zqw#LH1~oNK7#|fue!UjCP+(pWyL1
zaKqa&9{yPtW|;IZ%~C-*N|0eWUSSZ6iW*kWpoc%!DqEPzNeLh5tWrDcz@;~FdeEXD
zpX;<8PT7GRu7APj3dUP~U8qG4Wa<v>z^(#mJbF!<r4%q02yiB;#q^XHR@Go4*H6LH
zS5gk(;@0|yR9@6`omqan&B<z%#VJ{GqU@<iR-VWhoJByTh?*fm!V3~v5};G1g{Y%B
z>SoUzW0E3%X@m@O2DY_It}}PS+iBSnTs{xM4(Q!BKrgClTW-E2Alke0r0<UdW+jo%
zXHUe((0g+6$gT_CxFy?V3~+;Unyp=tRqr&b#YrJ@WFXVHn{zcQvkLipvC8a&W$VkF
zJvpw5&L27Puzr{-77eI*JDen=bo{x9RH90TvM_hlXeg8Ou58i+5~*5Itzzorf5;7C
z%gZoG;;vk3GQZ4$qpMEHM`V>m2S8V~QRa0yThV4iD+IxotA!w@_iy?%Ey;1~VwR#i
zFf!Gn;d(?c!~*H2Ixxu3SmN%nhV4EOJf?>rp9e4mmR>NTTft66=C~fklvucLFC9j(
zs=IdNcg<XXmrc2&JNJy1W2<n=rb=*Msgkh??+t;&VA9oRagi0~mi@kGPW@#G@zs=J
z$RpJo0oD&^QgkcGOOW+8!vl|zfixR7Q=WQq^h6kdA6~n8$^+0VNM1&I(F8}Fb-?p^
zzAXo)fYp?_ALR^eDfuS1#9QI?IFj?ta$ja5c^;EX*x8#hO%KshHt$0z2ynC~+Y%f1
zuXpes_G*lYGU1(j3Y&Mob1k19!PCnKC9DiVySzj7%#-triUq&SBz3Plt_W$&vO;mq
zlL5MBF5P1HVM4!=$>5!aWEi0Z5Ef7mpt&P3_tMr81g$nb84qi{c3O-2-P-Bka&0(i
zt)U3GIpQnon)&<c%da<T!_G+{7(z}KTnS4LcLH^r@_eVYo+U{2L2hnlIjm-m9Vm!K
zkU48T-WiYC9Me;H)+)m^z|1}!4TeK*i<L!cQ~f=ThkzDc);0|l4CWNQx?)cFPU;ga
zc&KQN*wE-coMgh5HmM_Qwm;TkC8vo<V9F{R(+lZ?7IiwL=HdSC4u`s5M00}#*Xqj%
zZ*yF6@bGkZ6;&o;Ge8F$G8~{$L0?1@)+9)iB}9|~T20W+I*vQ+`b(m{bV7bz(b&^{
zk*ggQ2WkwE$t4=Ik!DFUy5NO8>ZskgwUz6RFDS3m{g&NnxD0<00;L2+fn>);iv
zH=JG0*m3FL<?j}1VXk2aOED44LWr^4i~d{vsz2zX<(9VJdmarlG`56=NY4hO8Rr?Q
zdY=Dm9`4n3RGVs3DoJYB2eGzxIJg2_>}c}=9{?W)CF&&Qu=F6T*2@@jL%kXhZ*ML*
zE!ZM`(tnv;n$Qm$)M5YHo&lwPRYRZzEm-m#C@ey=PJR9OdTh|KPm>X_UeKG^J3?)P
zc*2+i2ED&%Uzp$LWvwkw`WFBQA4|cdvxe6M;@pBgB9Ud2mWQAsSkyS6t1XJ?-@=L-
zyM5@ONGdAl0%0UDKMsai4@zdEAxVlg1(v>$Yd1IXq=qD^ROKw5p2`Rd0XFCWEK4#?
zY8qf<Q!HZc09u&Bt(k|OAtElVF>$3Z8t`XZ_2JLzYT4Mpni42zVVwKv{O2wBxov;0
z*71R<<7{H7NnFQO7j7YHr5hz$nTxWQFoO2OPZvLb);M<;5d*`%odrv8Z(UE^h}x*t
zKf+)JW%->fZVTtZ+WA5U-EekFbiQhlP_v6XN-8vIa9H<}e%W29#7t!Ky}DPq%ne?x
zI&~CJjh9bfq5&Djv@)xooRC<S4-jBbYH^KRP7;!SkN_OciAg^}f!5y?#fao>$|nt{
zc?To{B2QNT+}07zA#!kSk(!rTXy;gL7;(vl<0gqty`(PQ{E#<pI-U;=-`yD%6;UNX
z8@19<Vg*<7cxC3Fm?a|*F<g@_X50L^(Ks&<k#9N%D0dO2Ymf~=K-VE7!EiHXkQ*&v
zN?wo>8jI@#p6OhxY=e`E^Z%fkq6m>rv3%^ORLjKIbz~*e{<epp>otwTrg%oCUNt=+
z?6ksFUx{Hc7tRSF+S-I>y6(@(*V~iPh@wqkgG=m3jMnC+@123<sT`NbJ=Q0cW1bA9
z_Chga@vDFsYHV^VfV7B}3QMK~=NE}Wfr$`PhJ~0yy@u`ut_ykM^G;^-IT=^GgTaNP
z`5B?;d37w#A6@NRJk=|JkWnXFh%?ul<5E}ANTM+^GipAiwO|z%xJOzxIQKJ%PuQMl
zC%cYSjnaYUVqAF>pU91^R)OlNB)hp2w;V1-w|8h#@G;)8$<HpEwq*%)AIfHE*+O*Y
zi$Y!hwK>MSW$vvxpuGWdFddnZ+jZ7n3Jc63HaFRw0_Nqy0mgeb^70snsw*|AnjNy7
zSDq_!+0T)3YW@mlxJK+CGQJn;Gw(&*@6cm6*Kz<BaS@#57*{TsVI*WTJ55`T`gb`2
zXezxuzd%AO(y3d_ylkE~sdYmj(XoXIBvdL}KN|c%xmBGmO-zMEx$_GT&sHw7h?X7x
zd<dr*2Z1v`b${Mo_r2=g<#GrXVkst^qH%8p>LAkor0e|bBkjDXTY$hm8V57e=Q{4j
zTgGT)(*_^`3;CCpb!p)iOS4ukDkQRUf+{PVTFQdEU2^1)`51Yccz6hF<ejs0q~b}q
zo+mU=c6AlE=T4*Ut)z*)C-<whQZ$Dbf6QTOPx|OODfuUBMI6E#<rUUxd$a%(tl@<Q
zPWOsjTQ(Bb^I|K|<*Y$BNGVi=OKgexg!~oO*5xG#)ZTZs!&g{I6yf=F5aBy)i1K>Z
zB}d!zenwxtWSh><L@tn_ZpqKJb~lE3l~h86s1v9oLp|!VkiwzmvBh>J%ONzlX5g&5
zU?$^Y+w4Q=@I@Zsyn~T<`Nhu?69~EUhulS3tz{a=(yv`SyHVVW5|RZN_J}2~S!Tc=
zR04ylX0BkO(Q@|K!|exWftH6EiQv$3Vy>7K1zHi`OqpN_`#YHU5;8M~Jaa?AQUF;8
zmenyBRaIBknbMP<xAlfGBdeXoQOA<xd>s<V;NpmN3fFPInE#k0?Te#fHvwWt^o;;i
z)X<hiHmi>ZV!_;**$`Z;HQ5@D=i#wd=jN~s32z8OKOE=Cge$^a)UbeTiS4`@<gPpr
zcwAM*2@cQl3>HBDP8$}{0in_!im&0zh{ZNBM{z=<0k{}PCu1EoEPFdx%JDc_)^=WA
zwVjxLRGnAgoU+H1e@bcc0w>9NqtVaGO!BJmc`aCObqG{0@mWI`C~tW&8SkLYkQ%B*
zwH6<<MvD|CDAYf95(HdKICn(y*DM(^C92BsF*;~+(jN{|ts~(P)YAAA5egcp49hhr
z@njI6oRpTith@Td59Cn|K#CTu<-=SO*>nBm{Gyz_79L$iL;Zs_Kk&QcidTGCc0;<d
zw4JdeOSHXmyuB=!52MH3=(&``g?HSf4dxq%^K}LRGbnQeyRQ9Q8&1-*W-Dr6lokPr
zz0gvR+hUh!WqhJPB_R0#`9B<KICy<rnEPWM|L-r?H!}X;j~_k$<p2H2|9h7I_oqA}
zIp1#Bi_=`c`Sk(`M92cbi?gAF^|68=QokZwTSc9;N|Cw639yD(Iy|6GO;Hh%c9}^_
zwXz&NXC~xD8X>cmI+iyD%9Eb3vmoVI2Np2_263lhfA-rGWEoNav5ntzWw(Q6cksLN
z@|i%Fgbfl5#42-L(hoRxjwvDgD5Aw?M$@QAB&+^kL#=JKm8nHe0B^54Iim?cz>7Gh
zM5ZfiD&(Zd%+2?LfzvF|&Jyz`;;Ny`s%5%s=yGZ~v8gerQ)MpzP(ZK0oL{z~DdFp+
z{Rwau;cOU&Vd`vons~f3v0?d^T3b=wxPJ;8L+>BG24w5TRZg@}JA%p53J&agwMw;N
z)=m<>4WN#;+KN~k&B|K>!?y&wg~Km`0~cGkXDH1V((-KUa=-n?%eBKL%f>=(UANsC
z7?XvhW5l9cDiD+YBEC@$0ikr+Sk`1z5Zo?F!OszAv0YY2Sl$nNX+fRiHqb4U#no+f
zRZq2M`!sh*O;?nznRyQrrQhSX^X$F;R+}@zQUDR)h;|m|hsou@34pbAVxUzTpMe}`
z)q8IlW1QX{MpCn8_^4_mykygB3U2e7mF-&NLz+={Vd9d&U~^HuoOX3_lvn_0Q~WE%
ztQc@f+>FS4=6Fv_7#EJVo^|#G>MVn52-CTP*BSMXjj2&}2~3P>*8JfQo=!$a@mApR
z07T|$pgP9-YXp7g2-IZl!TFd~9WCc?EDBtm4Je}dX+IfHIF2y|c>wC(r>Ke|cr-fe
zBXAKpoQ`O(1fPr?*nq+)54$(cpOA7Q+2TNT5g;bJF#vTqga{@8BNn@k;ZfD?`a-Cj
z=t+x^ky<gpjk4Oc!h{&j0s4NfC8V@gIR^b9_o5t~2l%FgSnhyXFB6Y`S~YzCS{H|v
zxPTS4ph{@V)?vT*B6X-L*VK!_SX-N!mJ$&_;$$@FdA;LVb}n1GlD4Pj5D-bKb6MaV
z;Eb?aor?*KHJW1BCQ5ks#`M&f?YiSJYjcy0LJr6oUyn!dlEQ;9Zc=1#hX{>zRnn)b
zU2i>rm5tmCyJ+kN3>Iwk*EV?52?XPGV{qZ57&gQ^VrOg0b<&G(1Dsxp{%}!*r!VaG
zO|_c|j3agA*-c+j`9LaG_pX@Cs9k2jxv!`nhnG51TfOv{%x2Z2ufM83Uf&?Q*~el=
zU%WitgKi%Cfl#nFI5RG~21hMpEWyddp=8PtJdLYIuXYcQ{=R<<_|v|C9azb6^6OtP
zZ+IA=N_G9~enQIiF>zYYQR|Qn#*v-|K%baS`3W4MqdeFC-HX@i;Ja4`n|9hXPknWD
zRmCXlX|0{sCcS<w?o8JHCxum6O9v<8D?p(&)91Crz1^qJ_XxtX!BtGL-NibpQ?$C6
zFe96!sor+bH+@4uVn*{rA>H8>28eK>+p(j}g7>0?jC;uN4HLG&*ei<T#cvXbibCPq
zruy#*?3k}Bbe+-^*bEIT10MybP#aIKPoiGZy`j4UzDP6A=+?i6P}bJJTI;2!bWg9H
zjeE@GM_oGDc#Besx$mCuzSzq&3Fyi@^RyR1vj~{KAs*M@mRRF<9z9y$D2%(&G~!9|
zM5<g#0+o-Bf%7^~U<@d?ZJTIjf)U&?9tOa?XS9oEE{3#xR~Sm-&Ji;fi61e4C<qq`
zVryoM3lqX}N{oy581f;r44E(iu%6a!590x7B3}wjY-iHz-B4J}1UG!CjJ8=#cOaJP
z>mo9%Z-cJ`2vpRp-~z&BZ?^lO4d3jKXL^oBk>t>vl=BRhbqM14#}GrInbUtMj)I#T
z_jTqR%^82e^LP;b`V3Y2TlpXj=Kggn(O_Djh3+I2X<>S>1gPVpCA^nF2@8cFxKjjz
z+V2*DfEbFq2OoGS@B1j@%lrV|(NW$#+OCV0s`aO_fuVScnb2m!H5xd5zM&H@2h1DF
z7j<$PW4dU|nlE>VvH~l%JjH=In&rV9iH#|Jm?P=htXG|4fOy(f6D8uQCOw4|&NN{v
zeCr64Pt}C`(}(eqW+&WNN&iy1Eb~(!CWZdNusvqD8BeP{-Mjb|=W;bOEn^cKiiWo;
zWVuhOp371ar&~1VsmeaLA|lS3hX9&nZeyiftbitOO!Rp|(?}pFB~SB9x9lSAuwHaI
z&C<9*qY;4ktO2z$vaYTCKt0#j?qn<PhpN$Q%O*dPXWmw!-|2L>(?z^aHKS!1BlFj1
zv|vTIoRM&WU))_GW4sY<#6mvg9!#87B6TYi4#tQDoJ2uSAQmv<a&ygA<b625h-vH#
zkd}Nwdfp_$J&;4nx^3beLf6Qc@6JVcSi%lHtL|8AfjwsM1}V+$j45&mwd8aPg5P1u
znH9qfJU0#_Q_=Dal*3HXKHzy`{KaY?WCn`~u{U!b+bMRKp1&y}9Si!nnD1`!5Q4l!
zGk*lW^SrufX{PN&w5LlDi%Evi8N|#K8LtZ56Han#&iM*gWOpF+y}?MRl=Ft%h|C&$
zvOyC#E4_j1Jcclq9Z742Q7)DCf~8Ftqgn~(=`X2;qw@!ua7kp5r8~d-{Haczv!ten
zL{u4K6OHSrQ-Qx*i&x#yRcm9Da)kYokS6Z`^ZA|*yr5YrVzyAShMu=>ZjALU!ds>O
zXlq7_20VmQ7H#s+1_>ulB;Rz3#Ke3W2Pr+Hcr(-*+Lr2gf(5OtP%F+^n->(A;@YCq
zw>#ct7upTAN^^;$m(fc7;wJJ0mNMT<00XXPue7rvaV^X-h?r2E{<wW+!nWp5g@4zE
z<#=eE5I$(8Ke~*PE+T71l@QmeC{Hiz;INEf6CDmVW%rXOFAuOjAn_QbU-!c(2DZq2
zAb<)_gqah|&&e=Z+CXV(N?5C~Q6SxW`E@wr)$dCErba7>riCk*OM}c$3M|GrIhwp%
z(xpK-?MK2Ho-Uflx$wtsU(p2#18SQj^-Ku(>glxC0_K(`({JOPpBc*zv}dl)XNl1i
z;&vB7|1cPlYnRDBjP&3#0^$LfpIU!nH7WJWYRlQw0=d@S%ueN`s#P0|nKHxTPJ)Nd
zD_NaSoNKeTs0QknNZplllAh!M5=04Z<iPEtqTN`UXMu+NriG55TG}PeiRup8R?U!T
zn56cr)|ciE>@Y_V3>%P@6~z=3EE@uXSA41%Gfzcmhel(2#E3u&nl@;1XLhjZ@%hN}
zb2y*yPZnT_A6$L5{fKo=2i({cMik8YH5Bg=7w%w1@>b=wKB>*MVT7r*qxxk5qF>yb
z+>(o4v9{AFI$v9pvy8TNQ$}Yv(A*w0ytc6=-@L|-K!Gi>+u{-2W&6M*m#FG0mxBS7
z?F6272QAn}-2q%IFk!FRMlQvv%**Y2Ehh<Ekt7LwZYGIzTn6BEayuAG??^M;38ik5
zLncEg{DEkrr{~I}3u>|?1dg~DC*&gTPkqrY%eN>dIRm%L1Dgx3@cJx(3-l&QD4#7F
zQy1Cz?qDIULowx@F9Q&n#b8J$p21arK*1{4wO+xVa_&OExw%iyOvVNCyqQo+t%^dB
z?D~qYVlT9;7%G(Wym_HMIrG9_U*P5Zfbd<7Mp5!2+M!)~lH{E+h+Yq~&bIb7%rh-s
zdBz-F2&*4ag?dATza$F4#9zz;$RM_a<q<u}dTLk`oeMmSLj&Z64|hdEnL0BL(KFsu
z=V)(_4Fd_F-Loa9?wkzn7nwOz<L(o8?%V;~yiKwrHu#$jfg#)VolIalHh}i%h(ypx
zn&5?x_OIEM9DD--HyZ1?w02>Se&6R&ScJjV4<kz8^lwzDvw2!TiKJahcq^D$l4I1a
zcKT=8dDuUK@faf;&0ZK~iJ?qLwxxglxg#IiVK?r(N7@MN9Nq32fwNxs`+er?@z8Qj
zTUWFZ<wvIy6<6shW`-hV?>FWv=pZp}N!9_MJd=6@pznl#=bNG%mP~AUKcNroZ3cXS
z^)flHP{r!`xpX$sZrK+U3dBu1_l2u!(Bvq6e(nW}<aR!5aL>v?T7-N$n_FBtjxEh@
zI%QyFObz*A1z%p2{e8o&&)hucey#IGb}w~7_h3@sz<3y^<XDD6MLoKuKp(61E$MVj
z3D1Q&ST?m|@sV?p(3u0wOGPiV?UhDWDuIkEeKAJ|Ah7Ap>MFh2TAJo&pMip@9?OkJ
zrbT^rA|D82q?uA{x`w57ZrCWl1ne3xnrGcSf!3h4Dgq-jY%BT9<JI}ObU{bqXVQ11
zN4j@wPXoA=wlbD?{;6zvF8s-(Tr%|5n=O5E^E2}@5OjiPW;hy<!7onGN_Jwhdw>}i
zZ`HO!rxR|LQAr{;IjYgYg_uUM=1lZZ-)}J#si7ZVp>x`j3`_vv&}tqJT3ol7LkZcu
z(t*=68?At$7M!HCx$sdol0X<2spxj2UoMIbKuklZqX7n}u%jM3;`-Q~zmMJRhGi}D
zox~e8wKbDwc_3u1D&muTIS)nC)$?Hxa-kXww^D5=*^r=i?4X-7<Tie<B{)3GN+Z0f
zey$%+?`4Lvtgb>GOFP_tx9)s;7kehPeRoGnEKe}f1UhuN8R`gl=Obf|naS>J9P!mQ
zV=zTMp;<>RboHX_(ROI<F>oN*LP8Q+@^nGUk~mlGoP#!ya&vPuP^cbVeXE(<-;za3
zjN=h&28($X4n0QU#i#^DS~w7CqlKTw=H?L}?=J1KnsiH!VOXzdh9_KqKLp?$Ae<M2
z2)egVx_G*GQ4`P?oSZ{6BPC#wLoQ0wf!p15qel<DG4#=0@<OXgQzt{%P+eZ7Z=&|a
zH#aXi<OCeOulvNl#T@`yVMc@wyNh6VgAU_RA|g>#!~s5xbv$BSL~>K&O=(Z1s&o~T
zWpZbL<$U`NPY_wQjt}}y)KDfkmu1GxZHYK$ccj8;q}k`KrA5ZC7}|uVH;1pUgZOHP
zPZK2F1DN#{IJ_QOvpTPt*n%Dxt{DfrBX=C@;D)L4XSAj<@?Hr?l_6pJr{o9EWs!#c
zc@cDgF`^e7*Qyr4tk#LSrsPZ*S}SU|2b2D|7;%CddqNxg^#U7vA~rTmQk^&Eq*`BW
zcTeQ*4#Ej<@d<A6$?Pqj-P^C--u4lKrd?x&S&@g>&b)S1`2-z2*x}4P!gz?NiiHy;
z#XQ_Pu-&w%4Ecx*w~J)y&N*ZLVg0|&V`m**-~~S2|9j)h$Ln8Z^M9jD_$UAGKcWA3
zz52;-+xcxD?;Y+Q?|;A7JbHQX{oY~oo86<my!&=Rsrj9_m!=zXYD2svZI0rTCh{e)
zuUlO<U|f2)B=Q#oG|b2AX!)pNME#p4UkOqf{ybD|CsSB!jxPr&vI1m0UN#5WJe756
zO6;inaucWO9oiB_Kh=NUl5hG0W!`Mio28J6=P{29debIY<Cuc~uz{C}ZF9V{O!n~9
z`g9oBD#7r(zJ0t?QMFnP&mQNVpzFA0QoXc^4p$W7pOa;Bx?jHK4o*r4dt0n)N!O?-
zU>zF`T?$50#%aP)-wiEDlb`8A7<U}pQQR9`#!a|E$sTTQj`<IRnWI+~HkRZE-;QEz
z`!mO(<htRvM$H&6BG@q$&zbLO&8ra_yN|Xg<HZj#C*;Bm#KyD2B;^~E*-k~dMwl`n
zPVCH>(rGP=u2#s{109nbLKwFihk63A+HH>~QP(~3un{qxLgE|zr|g_Zarb1`Ma>sn
z;k&ShQML_OJ+m{X1LW*-o12_&UiXf7opAK=)|4i|mI_ap_SPYf6ZBDm%jr7#)o`(r
zeoUI#+wd0ON4HjE&0f%ou`$*q)v~o@-xO`}Gj4}#WT(rdJrb@FJaFsE!0&Bv=OR>k
z%I=rgWx8jcj-r<CC7lE0r{)*6oi<q(w13C{1hL7;C^t9z@s%nqo8sj#@=@9e?Gsum
zyWrH>&V@18?-|_tmNW2B8R5E+(O1nij)g`d!eIFp@5&>_j9YXAG}g8z3%hez(LSv}
z7j*7mw6n{gg;@7nCO+bwwuTFm8dcrV7*##NO{4MkfhZ8(cESWmbnhXlj^h<{JGqIP
zk8)tg=O!FOvwzp!?wv{oM0Y$oU;!;Z03}$l;0pIrpdWf`o2sfoZ?*z>9PX9ZEKc9;
z;sGt#yA27DVnh=+gTz$l`~#!Zg{i;<l~nu)xdu3Gqpk@zSWzak-K$Xy9p}gf=!CuB
z!31yP8~?fQVZ1F}bTrk}KE(Ok>l9C7Tlt+olZ9@=mhc(};Ti71fqiCs?%10#yJi~T
zo10?wGP`gAApQg@|7Q@G<N<O%%+pIx^J<=2vmgYKPYFU!z!FIAePwBi`>?DTDx%4Z
z$RjUTS)X8Ra2C!kU(@l8^0-(MM<QTK$(^bQYTrL$ct7#c;L@Y+(&jsvjL6jdO<V0=
zH(FE{`p7Pv+Tm}7Z`U&IeLjgQ$usjq(19X!!2cCA)ePj+&NTPtDjM}UfD5)u8<Iqa
zY$&25W|%9z9?~w_kzB=SsF_j}84V0QlTF+bZN5rzO*-+AluG1afH5sblu?%wrO@<X
z(-g_b^j-9(g)qqw7C<JQntFmmayfflHlk$gRFlADXi%ol70h_s8+4MB8zE@e)$_47
zAD8Exy90<ao$*^%yIEfK2YsOJ3LYzfY}vH-lC%4tAjY!GdJaW(mfsBZw>T9_s4Z4K
zhsYX%EkXl52EM4LbM&t0`G3Ne02sD{B{q|On-q$@K?+X^L(`1KuTeG*Jpx!mb09J|
zp1|R3kPfYVP^?v<z9p~(Xwg)0K~Rjf&nPx70z3*zxnl;W%{x&s3!5oj3@h~4<**m?
z420m-CWc}+!@E<;Zhs2$$%Y+5Oi#f*C3m-#qvx7q2l6e+64@?_oNjZWncJJzOUgH$
z{}6a)y(0=x%mev}INog=WL;3^+5r#bC!iMEPF{j;e<w{32Y#O|v+#y_zZ=6{lG8OW
z9sKW2eLwBOd+kC$0>p!!LP37y9>>pn-)lBxt_)*Z5MxG<NyM3%2BaY9i~-^Wp2;O4
zcb@Ssd<e;~beu#$bGGJakB~Yfk>TF)o}R|zV^d$tQK(<ydNy|N^I>u9%Po0~AI0PQ
z^-bPmSeanl;L-TJukFNIgGs+*_N=vyM)y35rV@`vgVCn?ZTGs``fcsE)^9Zn*4=$9
zbl&&vGgQ>s=t$}mAoj7hfk;a6FipCHzMM9@Z@hWUM-i~|B89kavKp?yJa}oaf!vNe
z)hnFKJW=z}o(O0gj>16?SM?-0olua+zP`lzVuR&OZM+?b9P_@2L1IF-Zxly!M%Q!?
zc-Ch2BZrdt-h_5dFS-^^QTo^<&v?cWPLLNTbVgcNN?cd%!3f`;N7wtdqC)>cc&-tv
zVkrDo(vCAJvQ5%Ak7X4ETKfn{=tZ){5?%vEx$X6``kNx>a!EV%eNhz5Fy3~@&YBk^
z_P`x;hoQ+0;!R)48+7U`H{>)oY?(#2hTqHo&)%D^HF6}4!tZacqTD_QOLR#ff$e$O
z<_JNz@z8)-yu5CYo|05RHQHJwHDJ5H`#INfZtROC_sS|sH0|+tn#XQQRk=n+Mn*)&
zqNgQ;7r>f!b?{sYOiT=vxgbgYYRn9U)f^7qOt0b4eL0T039>W=;jnZtEN8ij$v@FM
z+-9Adyy@83jcQtqyFxJz?vuV{e2+(G9#PHGROif~QhyYbsIz<k!NjLDv3s1_>#P3-
zj2NoZ+w?t~s<cbYo#FG`5I{pKVPNC#*Tf+tqua_39F7xDSI*2EYBsKj3&R*x>c>mq
zD%0I16+KA=Lc#`r0HQ>%rP4R#@KvfxYL&9C7>Id;(z_JCir-MFWti8(>&5D#qD8Wr
zP<vFcMLRof>D7;|321Ff7TDOZrYA4kA5p9gz)8idw;jBg15|oIK)I{NkNdS8GGfH#
zGo&q~BWwV2jXmJVj)zV00Cy&Z2f?h>;%J)@VsB39`|dpEqADWZlzB|sT<JXV(cEk6
z{2*zj%e}Zw(ba?H2!9q%-o+L(-k#vXZ&Fm}<F1z6vf~O(uLL(vuZ%$~!=6Vw9(<IM
zTQav8Mn!d9g3rvTtRyJO`<!k4){L?`X_KjNd4U@DB@y*vOIqmufHvsDx()ppfx99b
ze4G%9lo5ti5x(#7J=w()x{^5zhk>NqW(u92L>!j`okXj$hZjKZ2N+R<qi0}@=1E2T
zkh%icK7{quD~a+nB<=)BFr~}IRk%Z;I3mLQAWBld6x*Oe`=n}{J6qsB?M2DpL|6#+
zM*<w7SpKXx!49Qp6t9gGjnd*^(v~P=cok0YbkMYl$<m~)-}gtN1~TgRRR%9!f0x#*
ztXl|<DtRbEm9p>ZmUa#+=?#X*6@XQ&7>@*Oy(<hf<71^{Rz6$!D3W!wIg8jA`X+Lw
z6~eAKgnp2=S-E$FqQvHA=2kmvqwv8NI-;|xEN#%n874;VHNIh0)X19W$bMKsVs>Lq
z+AY78k;bw@&XSfIvTVK;*w7Lz(fl3GAv8p>3Mr0CzLjm;45@sZAJR~k@We_C{UH*`
zQel4U`y5-^Px;>#w*Nmu!hTM4J;R3EjsV5>|Fy4cbvOUp*BkXu`QQGM_WxggGW{n)
zbac=-zi6F*ciA{QXf;oczrB3fxHxFNJZtP99vo+LxXmuVfIEQA!5<CpD!uq!jF$&u
zALALt@-(a`5ZuX4pBLH3%$XU^Fow~EaUo)*SG!5+ohy%tdziLa_8zmwanc#~wY4Q@
zN=Cs+<wQ^pal2xQ)~Yda_JJ7hs5Ci$i;RgGUtJ}ggf6XwE{8%h4zfvCf~*Fl@daIm
zmB#UPH11L?T6o$nTe;&<38g1ipgnWvr;BLyldWn~oxpj`$Fbd!qxCU6KqL2b;%IY}
zB8Q4s9kV*GWqyI|XZ*A|liejo@BI_N;ltyj!{Y-EI!Kt-VoYl(ylhFloRQf81<5gv
zLd4q+mx6iz3N^@~j5AHkIA@C%6ig$wWxY`Ighmu`5WTWxL3+NN^0_v9Gm?Tx>%z=^
z4_`oNHqh41qJN0gzv`D9f>emuA;nZQLMkq)K!d~{D|Mpy+aYRFA$o%$%I(tV931sp
zsuH&xvDtBn9Hqr8>c<Qjc9R}MWegEpvg)kB8xZXXAG=tD*?tVBq^e-Li9m7jm=1b^
z!S$p9ArY_;{?K}(sz*A&(t8KQ>N1P~y@LPCejSco&P-0jfrW1hNVUvL+qyr)GAw`e
z9J$GAVpT!1?3<dsT%!_)0l}o(VzUV$+8KO#D|e$|xMO)p#F#xv=r_;{&F<}2=q)5=
zK)>w34tCjbj8T?kxGfB|=?<dW9ZpK4vWWUg+6q1o1?7ZB@DP$0K&XriQfQxVfs(<y
zSNeiL!jm((V>rR=eFka|218PMU^G*K@+0ANEl1uP-9(&b5@)bVwd|!<aH2u>$w~bV
z)%Fn{{XqEvV2rpJ7K$6herD1*bb(+)19|$26&Jg&qFaMeZOv{*)}&?@*vmQ1=rE+%
z5@mH%gS19E_m<&(U$kc^tUqASt)Y4RbwTvz5DS9Mh%Kh%8L%7Fp3}m!B~Jp<6CxXo
z89bc%@%&eb=K`iV2=4aUjwcp2@+h`43l60_)1GV?q{n{cymQk}y7Hb~MY4wxic;vw
zliA|f9+Hn=gXi4&9GGk1t*^O+<I^CyT8!99N||tEfF++df)O9t<RtWaM}e~w7~;0y
zBf=ZPS)TQn=8VmTjCRtD+`cu_f!IHV+<_}EQsH$_*!#BGq}!0l)28yH&2)We&G2jZ
zAQzX0PgXK4n}r6w1Yf%-b(Zr*rzLHGPYV>GJv{}?)%#suKnfvSRu>#1UM(s!z9Jc}
zs`6$kgEf?5+c7KbRor<Lxo|6pOfg~5R^kd*`vPhjk?85}^Icpai4|>0VDQ&4(P%Jv
zKu_j&(n}@}-1~{V+Tx6leAjbyiAHGr{kt;;M4SqJzlwE_f|_RvGR&I{qJ2+K+Hz{b
zB!iYAP)k$Q-bWJ_oJ@=&i3z{hM#7CyHX7;nB0E%Y)o#>5LbqYpkj1c;n7QHSO7j~S
z@WO+n=eKGg7%CBwX|I^7$U}ns%ey2JVI&<}44pI8whs#cp>BrVv}&4AagJ7M#|W7&
za9a5V6b)DPR&?>8T2k|}a>uIuCZ;Kk#W-#ch4>WvSzu!|_$D65Yz6^KNT%e|6cY-^
z_YoSNa#q3DnCDYx0otjtK9B_*`oU)~bEH3ZgL~pf+kzW)0*-fwQ-I2D&?xo8fxg9~
zD?7&gHa#n*tKKGegI?OX7Qx2go3gKLh<=dd&<kc(<b4*&qn#n9O5YBieXdt!la8=w
z_?W6$<}dJ&D#n9u&)1I7K)@OZD?Htfv<;!ZY&-fGZ}U|;lg;QlN(M#qcnwX6wGv3N
zr;M@O%X*4H##+Gp9oL-WdMdO%rtyd%b_3o<W)@^_OPi56#uswW7|5#qBQr}IDgqiR
z@|o)bIp>Kz2cMu_B|SC;Pj|>K+<1<^rShe<y)c!!!g0>D?yQ`5;-0F|oippp*_z)`
z^YOlwGqpgoI3m-NvDjm=a1qFQP`iYE@>%?k_E{`OmDz3r5|>N|0fAQk-z+rzEzgR6
zhAo{;=V;m~A;4Nh>gUSqs7y0V>j_DYEY?XHR7e3+5*6d(-LX=6gH|eUww2`)W*f)^
za-Xx`d3w&Mv75u*kE3yL|E6>w>d3a*uw$0;UK&n>!juMsao@iQ)c{70tG_v&P!8CR
zd|&JZqFgz6s@tBq27gz^dYiU2Ua{$$c@h{VZCj7SG6S`I_Ln(mmQgk5;H+u0crPt;
z7BirjeGmCxpZ@+M>_2RK*__ai^Vol^t$qEKWB;-7)!J8|>_7gB_8*@@KQbL9^Rpig
zFTQzsd2oK7G55&4&RTc)0_E+St_b#xyd(9|rezyT5qihk&U95vr=9Pu&^pblG&L2c
zr)fOx4h_>{;GOX+#_~Z`rgAUl#LujZw-t5PB=b~tq<9tO(5gX(-_p$hOeQ{EAE9eS
znWy+vv}(wyh!7~}LVPl-8TPbJ3|*d;mkfrMnTR~SlnE3a-0q?Wk*yWgP)awsnxl<>
zqLp`a^BD&St3}n2fjP*%Y+BWwTA~uk^+MnNRAcwyrB+L1jf`QDT5V*e3a2T%)E$L_
z5d(%dEC{D0XR1OR{UaH4MSx3h7`6PS>;dl1qXwAWhG__Ie4o+Up8$ADqf;<UjQPoV
zmxkl))Tx!@81pa+3zT!8zmrW?mVtU0=0t3xO(sQ#4-`Ko2HS{58FxzXKSiR13TPif
z->Fzh6_&wrg#FXO_lM1c){D#IX0e|*HgEWM=hx415yv6f{^8jMJn%q|mk#M*eFMrH
zi3Pr8amR@;@iqi6iW^ce=ccZOzBJPiz1YeLd*>`Ok6*o&OU-}FJSg>6FS>Ypt<Fj#
zMzYwh9ZJWg7BkfAB8I%>FPo^>?{lwXQ5-|P5v7lUlf@x}mE9nQ*CHVSv1~)))t>g;
zh~*IGBMaG+LlA4-j?PlN7@e``o2^4$uq7^^tDRME=c1^3V&?D!MMEW-d|xuWH5tK)
ziO@s^7>sFeQevN6X)?#Zgol?>28dXv96-9&iPFh7cDS-zLbjt^W_6luQmLQ#CmJ|V
zt73DBs~UPl6rh@;S`Z_<p@CVAzvt~CHzE-|6Lzw&ihp`OiaR<rF5A0w(+T-lb*j#F
z2~yW*SnK%Y;sEWL1~FTVq_P!KcVZ*^$o|*qU!9~S>bH~YDP|K)?QU69gzQ@+ZdU62
z;TELuK=FM-F4O&(9M*grEuFwA=<F%L#;omZ-o7)Fc=K(i+=va}KCcHD_-5PeCX4#?
z+;>dtvoxZ%TEmgb^v!*RCj4e?v7CN_^KnOvZcE2Fr*#-l2PHjrIDS)Q&8sT~%jsC1
z%Cm<YEbCptv=v%CL*oj-@^Q!V?o{uETPVIa9Yd3=QVHIQI-l)`mde5JzX$Y!3)2g7
zEtFPZREZOSi#nm~MkY>S<i;NDT?Y?}_Z-fFcN{dax)PdgtgMRNm(lNyQu8=jMERGI
zN<Zn({sZ)X-$tEbI}sm6dcMc$|JFC^^>thSSNr<QuQopE|NesdzqS7k_Ts@1PGmx-
zVEYYzRNM5dTKhlO{V3_7CCYA2?N8c6B*fn~nkRdQ8Lgpu?DYk+EFF0_Tb7M)_)*_@
z#7Y(}dp4XXeVo+h$!9w36#^xN5mi!(ixnl~>MRYKXdhK}FR>YTE1Iq9YIM0@-&e0>
zszty>=;w7O_?AKv46Xr<F-*&AcsYsaD}xBF@2R<Dl8P~6H8ysXzf!!XXX}7Xo+s4*
z77pjvz4!{S5au0UCxhRj@i=;T|68Z?9vWB<UJu{JV~V<x2CrWSl{By(cF5L9R0>`*
zMwhW|1saJGI;S3hXN1<sWYRgs!@APx4Li4JI;o*R@|_5FDE#PP8t>#>sW}SKfPiM3
zc^QKgXS7KZd<PT!_Ma>8iNDAYVKR;eDQ@#r?hl3vw#68Qrc(k&%8Ke;l{!2bT-?M*
z(0u7zZKzM&$cklkrHq^ORVOfWb_?Y&=j|Cnkqr-hj%ktw^kOiwgjyr#5%WBDg=kXa
zMjp>Klc6m?B;PtC)TJ3NEpW!9tQEM;rfdtA7f~D?d*qs9P9M)c3reryVMu6P&g(^R
z)WQW#@zU}^%7MQ+C01XhrBn|~RkCBb3FVJgW%x8QP9f;XZX|UY@v3#wt-yv*2UdK*
zIC?kCfXNF0ajh^MuLw}sua`OUj%`fAH~i970?iqu#e*1{17nyj;5COP!S5Ba$%e;i
z*?2y~gHTk#mguXhRt}z9?OAU-<+5!Sfsi^ggMZ<^9u3K>0>7d!X%V<od1FC5@W_Wm
z7*y0~%UgNFlB8*_F=^kQ#YZH;3g}(IA*m7hG-Jw`DG>1}qT86;%bI8?lO?I=8_6$k
z#ftcwFg-%=-R=0GG|)z=U!~Ycax*r_r>3*FM{Z+pL-BPb_;qKE8?p4GAJk%KY4xXZ
z3*jhc{zT1EU>e!3Ni7J?kUp>m32^Hbbe19|Tp_kAGPn#yAAnkZ3rt|=hEF_oRMK;J
z-yRAJIX<tEO&}%!F9o%^Z``|*$Xma!%cReeb=WIp9qxfkctD0hvkDLJ=vObhfXzK5
zaj%PWQHf=x=_CcI<5PSOhHtVK?g6_97K@MEL-;9<=q1`1_CD3e?vreo4{)%ahZC?|
z-?|{mz)I*RrFwY<a~VHKhhZwQEiwR$u=Fj=d5$+>s@kr|4qP_l0(AnF4!Wi^w`pMH
zh_*tvsZdUMxQfK^zq!78lL%89m+mXSmRE6TTW*0n>GdWFEX<}}f@*ja8S_*bgU!a3
z_6*AJ8ii#xYZX|_hFPr~tOi&Iy127qWU2i2Giz3yn#Qn5Ux*ba!ERuUD)_t8*rxfr
zQp-+p;OXB3uJUsqsCe}%azHCV$vnjXXV`fXzTjDp-zNBFuw@{`z*=)@_V`{px6MTh
zJ5N9hk^Y(Va3NawdIGcl4M+IC_1kFl9t9}t(cAY(DNF9dGEh9H=vypWNI_XPT8N4s
z2p=DmpgX;3H<l8#CQ#MRXvAJ06juX^0#$G47YF&kBGNVqv`C*u9eFl}2F`t4@?5|f
ziBO}95pdse6ZAc76H#py+FZ3_`7HOVe04esM`aXQ)X%pVQo>jAExf|rSFk%AEd;)O
z>D?WGFE9Ve*e#@F!f&x3SRPmS|9w9GzYj);+;3-~tyW23uZ8W6vAwmt_Dq$G;l>TS
z{Z~}9Fw8N;jDCh{t--X{1JY_ZRp1MFSY|~{qnA=>$(Z5v5?{Bc36#^o<7ho3v?92B
zchLiO81<_2Z8ct-xwo1qZf>;;D;<SO8Ig*ip&c6C2fA25wlG2hkq))g;1xxcPX<8)
z5O<uy`%-;v4O5ZsC#likDXFUD4oQ{Y>bdf}CCv&i$0@r*GGLDyr;X-6f**$CTg1C#
z1aa-lwJ^9GB$P#XLTbnhRDz~y9K(x?O7pz3wpQ8raszMfIx%@s!gm~6^$*g%a44T5
z%CNXZz+}Ow3#H-xuU`8i<wX$nwmqEOVTzFmd&uW{q!+*JAq<L2f`*`eH_cvHcevd+
z{X>B>_Df;&v%h{-2eYkjcXlk^&E!cfN;@iVuc0@EVwr%4@-nS!I+g}7HDhwn=x}Tk
z&9dU_Q9;*h79;g`6?vq$<xI=^aS<;(su`2Pco?<PD9iD^&6YBH9MyJr-qvjHRyL1g
z905x=s!0`+*?HY)mYBv-s+?<N*Vw|_ycXVOT6n82nXQ6T$<o`5>vL#fbyU|LJy#`n
zWj!oB>BtcHQ`MCtsjN=5vk@;-x<dRdOmGF&F;1s)(MEs_(?aIo1?<}o2JG7p1}vb*
z3PTq<Mv#6uJP9umg-5C_p7N^31BUsimI1beG)fn5SEpV;r#7Tnc<@p-9QePGsu@b4
z`N`T0>Lu+)Sp1t!w;W;Hp)#ZLBIL_yi#vGh8<063C5IMLIDE?^0x6eY>+Yq1j@_+}
zw#;XWI+&Efk!B5gx1GUXiM<7+K;R~dym&pZl(+LU)Y=&LX!Y#}jQj1Zank@j1Mld8
zr<sAj)dLqI#>e?v!jP1Ql6h638#T8kLvJj7tP!l{7^l)TbM`K|0lZ@(a=g(+W<WQY
z=9MnU8gHZ^mBu7T!D|`Tg_6aLy9wX;U|tQGVB<jxMn%e2kcb}qH?gp8DDFS1Zjq|>
ziMVMID#YM$`N>bl6AG(i(O_!6C4t*Y1}!MkqWkg^8S#+_F4_hW9BusXg_WQ2>L>9^
zsa74@Zg#vJywQ^N3>?V0$Tv`ca(KXlkzD7uB%}&a9S((RTn*8<Q{@jt71OsE$Rnig
z$nIiZ)e|*}=6&yxUZPUoef+kZivd+nE^`}BGnU@6z${p}TpUq4nd~bIHH6BBFvO#Y
z$>6N3m6;O@=xjb4-jSO)86H=50f-bgQNMXG-n`{E^ML{I6!6hRwtTym#gI<sqgoj6
z_Moe8AB%R-E1}(T_UT8#I!TWHd`QRp;X|-a1}6}>h4BRh)`>R{#+%1rUDdc0XAJb7
zgHJ`rS64D2l2=y$Rn*<G`*tVh@l=a}{R(~yPNU9kJUQpIIwShgnmEcB3I_JxdkL3Q
zgB-iKY3j>N0YvkylG$|`BlFTE#qY8XQ7kh3n5Fg0WY8JktG7DAOEMS$Pymg)nc@$D
z>?*go^7>g3R(od$=ND&(%?k=twd{6ATI;j;N{9$KRUc&4hy1GWpRt?)vaxTf9iD;t
zBBd9pjt<<}*)iTDTfMv-r1nF}M9Vn894uoc=yD9{3IUo~If~CQiY7N8bP^j$47A1}
zc;A?yVlL^$7w9ev%O^rnL&wyMF8!hxU8g`d1v={oj28`x=&_ef%BpIKmUU;RhO?kG
zgYRU=Z(`|AP&0%j5QXUqO!A@_z%Gs7PSI;Q>S1VBj08kV;C|d6A{y+f+3IMsayP_r
z4@K0SppFP{_#c3%eLaY~=M*Ar#}s=p{d7yH-CLqq33jcK5z1=fU_e1%&Gw$d+_8AY
zd|F=Bvo5db?_yXYB04<|MkFi)XI|UpC%dJN=-V!rX3C|y+?&=-+`~LS{4Nw9h3Z+7
z`gdUfZ!*Df5)6iuY@2h!fH;X)0e6oPm+ya0esQ!v5BU7Sc#dUIDnNB4RC;ddL`$Sg
zc}Qg82QFMmy@fkwVat{-vx`SWZJ+^bHib2C<3D6-NU3xd0^JJ)+S##i!UCNMY8%Q5
z>VRzU>-mm>N}}>_N~0xkgH~YF8%AA)8>4%rl2XMUJjEW2Vk6pcP&EWf((RT;{V5#t
zo$x*>hkzwj{+)&oiL!_@tHH(@rPFpv7wurtP*%Hls!>R6b91OS5B@jri<FjvyJ0FY
z|3)ndx?$|na(Tt~bZd^K)7|sZWm_p&!a%Ohv~U&glbW~WbI(}KrB{^#|LUufHE&6O
z^St)cN8-%X9(JaQn%hRr(@3y5=Bavi=h-G|W*aph`es%*gT}cf-fNYy-twRAXdIej
zziR}ze{NJP*!p@+AN)1s(wGfCW^n_ngGE;8y-C$Q+l{x^Htyy>^{J$Dm#zd?$$i}Q
zGHlGRY2|VO&-Ph{XXC#Q;M_z5d;*>xH4cwk2gjGMTCWa|OWM;~iaeW}jh=9d4FmV}
zPv-0LE-<f@x$^MGR|iI1Yn1OX@W0L&|8|<-??_fv4C0@VPnyYR%)wY}`I#+7XU~Bz
z5qqw~ey*ebQ3w5_j`_Sp-haf~TV^mER0{TrLX#Xw77~V)dmmX?d5}+6?&l^6D-UxL
z1d#-UBqc>p<dX;Ip3vhz{21qA<-GREDfiFi|9f<~x*wx^WHN}{)Q(T!|GW0(*I(6~
z_<#8HlmG8u(f{{v3;6&3W!-o84=xVQfNg1b5<S}Q7D(zi+fJ965BIgw+P23qV_3>g
z-0H@Yb-{it6`JkRL%K%@BK~OeN5?VWrPLT~fx8w)+Rz1!QmnVNuw7QcsKv8-2&Ype
zaLRQ&<=R3cUjU4Sps=wh6yEU1Er0a+V_*Tpp6{D*Ei;eXu$Gzct*6|Zu%>N>TtN22
zdZxl4T+cK-@RaLAxfy^o+!t|*#tA`@T6Q#GG3JRS94zey!nw7pYf(rO0+R>}Bal}0
zE42Um(nuz}n+{Y2H+lsNVx+CRDye~B%oS@OJ3CrN3Hl<o%g)YP+4k?8H3VfIB(c4U
zR5Xctr^&mcX{lT_rdcwgvg9La9(-iiVNlzBdRVF9%YC}`M1!sWF@r@cR@8xakosx8
zT?=bZG_&=OH#0vuhOG&shb?P}#FhNAFmb!=^Zc^)@af>PVZr4C0fIRT$4TcF<4e54
zoizYf0MkmvX2ynK<9-82#OF1IRb5{TK+;R|p}OHf706xzT&>b|D8F6=S*!HAyJiKY
zYqS#cjs!+AYmGO|HC}gCxlno|(~92Iu;F2IgR$YeZ6E<NQ@|)`*RRz%_>%m<pczE*
zS`S#{RfNX>&Akv7@icdKL}bkJjF_SW3K-f#&5=TDHlxU#XW1B|0@-wHnVsT|V%i_R
zi(65zx6LhycHsl}hfaU7l_PcM2-Y@7`6&7C#k5C<+2v!xKNZV=m=bFv6aQ%)K0e8R
ze?j^0tIC2wp#HK#;L&A9_M=C>NbhqR7#V4%)EO-(soKHYR-xS`9l+asVcGep7|sFw
zkD-~j+~AL65w|%o;x`-SKdjl`3D(fuVZK90+VcTO+(7bd=%yy*Mwmak)REt%S#%^_
zd}$fkISWvDl)M{a*!fUoyzEf|x~n(>>K&(#$q)&@?rxIK4MrkGBN66JWU4oJXfq>Y
zdFL{CP!gwqI-7P1s)YTWWka0iQ2IKb{`0p=?R{_*Tn69qSA0W3OyMC0q#q=c3TDe1
zSD-zW5%f|vI~|RjR|v~f`u9760le=|h213$suNQ}NEy{@91U(O<M>@1kyQ+@s6B7a
z9~mq6$TTP^=WSx?lMW^pO3MhKOE%9Ljwfu9L(ZoW#n@;6-2n&3`ZXR8*_224r2;}a
z!OCplkH*7y<nWG#MQC(R(Hx<B>rmw`NFU(%W=bT;qD(qX@xuj5EOGv>m;iu^0@z2B
zgma~epO_`}7Pdv)W3U=*gfr3%roc03fYYx6X~)`>h-^Zv1!E2@*L7^H$v3df?>I~8
znDKX-Vgf_V#mXo_{;J>aXIBXOO9ohGFagfHkGHD2>s3s;){E~G%;1XDTjtCRvE6~Q
zO?bxUz7_V|!v9s}E`yPn>2fIoJbMrH;c<)+c@xG;nPtt|tsw>M1)@&<MiaFl8DMsY
zaXR<{nu}xGce)!*BH3uBmdGs1*+c;-5nXBGec}z!wrie)aMKV&I=U>Cd9uc?IYm=l
z<M(%*oQ_A)5IHDf2Iq~)QF=4H<Ma}6`X)w<12LvCIvmIEIPlMk_OvD%)Y9x;p`W!|
zIOAn4vg84YfX&F;<~3}H+u(2jCs!x7W3eav&;cSEKU1B4jG6C}wC`lbG%cL3#8{@T
zuZ1Nd*ssJ!%eQj24$SP~qr#XQNh)(s+q}qg`3WN3lh$tE2qrG$=EMI7XNS$!>E+%L
zd>kGF_y3~NJSeG3Whqtjg||4fMFeCKED`8tadXB!c|sTA^0XI%=&R^HBPhXnV<xQh
z6$y3qjJhscJyV2lCb#@i{j{hx!uaDY5DCeXaNEp_YC=N_c83ED+{~r}*>%Zn9|X<0
z+lQItwSTI2zDVJlnRe)f*REOhyLlSUE{As=t%gqvAYv|8ZoGUM5l&uGOGZw{RcspR
zM`amR_97?6C#TW@dVrYBCm4KT-VEswyd2Yl4Z3)EsP=%&V*ELq2&I(y`sfm`$~l{+
zc^g0h`^#!*9J5Yswo=&$6`4OXkqMQJhezmS+Z%SY<*+&BqvSTuO>K&q+Nmk44S7T<
z=>g9z)gYWp<XCsDobS->_bNe429cHfbwq}uRlP<)d5t;dO<w0RG4fxeCP)fb_cb%#
z;_UMA*@phlcLwVL+kb}>4s;?6dY~1cV}s!Uc|vC;vj+m4UegJSNksN!s#85}H?66z
zA#aO3SfL;xIRC52upgIEKRM8@(yzIj0VYMU<c3ifrsZr1!M}D%UVr>SOdQ+mj)DEm
z$J2B%Jld6KAuV)qz_VQbaEQ+f>!$SC@^Wlqo0vfUvmvD(hE&2j>4(X8Y(ao3I4%b^
zg)0QBk#4q!uG0cwN>M2-heVTygHuE+{ak2CX~3DNrIwC4vrya>3%afZ$~r87<YkEO
z6~MfbtSWpIlt2um%(b*XID4%Uj<%%BAewsdJJm2AFQJT;0Kb?6$3!G}6#_B?Q|n=8
zYWZqAtYq20YlK`tX;tWJhT2-U`^J%Eh!iSctTr*Tl;dvp+~wuq+0mtp`;_Q6)o1SN
z;o2839&m*idkAJMaG4bqsf1|$_V@;c8CeOh6=J+(5Y<3}?4U$8K;ZK*VJlosfBp4=
zpXzqbhb;L+DHc^xmZS3#qca1L25Hifzo5^Ql|p!yQdU0I5WD~pS}+zw2UYwi%3dhr
zLZ$w-L_t4i_4sfUb3G;aeB@;leYJQoikC2Ir^6nSGZiZgpSF|hYZS7n1@?<IpJ%C@
zPiI3K8|TP10;=?*dxC~9AT+$AuD5c>ol!mA2dCbZv88K!G}&}y){b*o;blj3|EP;N
zDqc6w{J)zu|I<SH;z|_D4(LAWY~L2oR_l#S9Z!T&>2P<WR6F5-brsS%vsO`he9QEo
zGWea%V*yhQJD)^@C}>A%qAGB&o()ft@D9yF!A+FHCIE6l*+Oyq(GfN9^~_8ak4_jD
z<K06~D(^^rnb$3Gf~;DiPJu6wDv($(DS`-_=I#TG=ZUO`%XEnSi;6oWq~!fRO$8S<
zxdDoKGlWK%IuNOS5ceY(|86|&;nh8^0joXDHq&@=Asfhs`1Kr$+3<ks2yKbAV67Dw
z_iC;(HCT0(-4{t8Wc9)gp@n{Kss+o4cgBW_hF^Tty)k<fC1SJ6#XE?mJ8J_8lzhT1
z#NC$6hReBpx7299()_oh<P0?@zG9}@otF=ne-|{D2qQY)-N*#J%+}$C8X@}z4^ibB
zavyY&KyfWmDLte4Na){(qe;>iVqS=MGZf9AqT@`AZG1=jRHnBZ$4a<^G0f<RWXD1X
z_^B4J%O|4safuc|c@dh$74k@yq&9QWENUuCETp?1QNOHUVQ;}dhOXs|v?PMEI77-K
zbDUs&N(|AxRCmeCxu9{s(+<T=S3^<_N4Qt8o{zM+)T`~}7_T7qN;;?pBJL7_yf*p1
zCX(5GFejyPDdy~Yl~7zL5l}Eexo9|sEka_g+YLtKV{u7RuxzVsWf|L0xkrnEv@meA
zE<-L)4v}a@Z9x21=O*spU0_QVPYa7ju#F~YtpXK?AkMdjSE{DB0>uy%%t321Y^9TF
z_o3CF_9h8z<APp`HoC~yI{!xJ3V9p-LMbu6PvX1HP0l-C4iyDZm&h(LP**<_bqSAV
zFcmn_CR)0S(zwDpiatjV#$;1nj#+GPXx`^)(X#5Mvzv4`ONKaOyqa#Pi}6%4CinP$
zIcR@Og5w>NBr@%)X@r-@Na1yC3P*aRVF{;oG8zv%B>IYw#D-qPw}Vtja;1I<iZ_!*
z=M4#Zj6FV%`8?HT?2D3X>a``|pbSu3y;=(+=7zyM>}bZc^s&?!RjbHR5N#AD86re;
zGI>yql!yz!l|CsA%7VrXWUHD7lF45T72$nZU#H~gEO7u)4;aes1CVrO^$>Rp`JqIN
z+M|=qtM`JKF6G26i{r5A!E}UMd86Ql9Ex56;c$)u!KljUJ)Z9GqVa$fVb;<Kn*%aO
zX(!ishLM2=+F#JT<Oo$mXgEW>aXcVH2?hF4*ru@IP?FwaQM~cE0)~5aiUHX1n_vBw
zJ6%Uxo@<V5+~q5aWRLN&aox~R)+uP!)fp#rmTTsdb^Ozm(h~+9*oYCu5F(q9P?#>I
zLUE`M{JWc>k_1c^8IVeF$cQK0xQ9bln1vgfIv1>gzGKXo6HPF+M;5L>kP>u4qoH|T
z#}gle9m=$Uu<ogTSt?h9y$5-d83yLv1vBdrNsMsH(eX2#3^6wtD{J-4BO#}ey^(5M
zO9Cf&H-g;#O@#RrS(YUQ8Qa=PsbdCTuNMH?5;jHRrWq|J;P8yPxtyN{2+Eb1qg_ZI
zL$b1p9JwUhPJ`E$#c1!sVze&G^k}PV(NGpQ7}_9R1E3~k;uUGmheM~L`I+c~zrXIq
zZ_^&`U_JKNKmsuQDix3lSXrJzp-K#miBgFIf`nQc<LnGU?8HU7qb}(V4cU;+tz^{Y
zg?BrKbxelike1S3HF3Azn&jO{FV9$IJz3i)zYk1dd@6TwSG9myghb9AE;jO)k>BUK
z{nB(}4j}a6$wVjSL}WrFkmcm|xuRSo3~0G?q{UqU)}f)Z5VuL;q3i>Iml?7(hq(Ba
zRt*tZkplrmJVP`vtOhXkXV9BgttGJQm$%F$q4r?T^X}5-708Qm!NHw5R$gV;nz^D@
z7Di3~k>QXVI_DU0PJ!yjf{K~8OVj5?`NrRW&xKc3p`QcSzx#KQsEzJmWQZ>i8+S;`
zI<EL0xXeUe$CdEv6&e2#^54X{O{R$R&uD^CSEa&DQbjJHbxO=ai-jp%`4k00JD_t1
ze}4nwd({6z4}n%r@^-LtNBc*Lyv_RC^yQd~M`_ZdtgtI|!&;SBXXh$LWj@=Cc#<bp
zEGn4)MHO?NyI0;_9(2#<6L=DV(@an?j)iI=iMy?dL0>RxzrN@!lYO$<np;*pIe3iJ
zS7k)8qRr4__}I=~{Oag4yfvwn=Q}}(`v%-&r`aS-%9uzXXRz+(=7wg-cdkU`yTSW}
zyR&gu0rKSPA0Pj+i5)0|ir6v*Ydl8o^AU?_dBLDs;SHxk&*~<HFyD_x!T^ERc$~aL
z{)EwKi%)5Nt@0m=DwL)V7hM4JqvD95rpn#+V<QPLN#i+?<P$DfxNxq3_6H#YUmtu7
zWFY?|_Mqzh$M0|nhmUJ>V3882$lQW<v=_{S^0qIqJK%eYCq4KNtylzlMiMrm_x>8O
z0@aA@1!MXax~qPf#xAS@ohZ(-t&>Ai;|`?|RUK7j&Lxg@m2${sOsf-B?PH*T(?H&`
zQ(MAS`U?=HBH_7UtaoHs)CvH7mO-`b^nB(lhC#Yv9uar>xuc!Uwd&I@3qG4eHPbB-
z`*lV;0uH`*#K9;>Z+nNIHqOou_FH>Dwf0Yb{0W}q->k7;k&dpB8iw7O(a}@NE5Wym
z6pca>7;Y<A9^a&)Y$sUmf^cksPK|=-O>;PSGrfkN#u%PtR4tWfqdWfq36c~51IIcV
z10_Hd*yNFmcr?@#T_LyRTv|$Hm6mMn4-fYIqH%H9Y&Fi$56&)H$0x_+3HS2gOe7n#
zF_AmSn7zALs%@uTBT{YJ=^9*WGf3BK4BjAWUI?hM=aMRFIj~_CGr&H7QAAt6d9Z1h
zD{t~VZ=|>y`qemGBc=GrunJTK2xe;}=0#ZL5T!6zS+tcCtH=yypxrU88ywqzY18eq
zM66;tziUxo={J9%w1q~H<0vJ@)ti?10B%5$zZE?UMIyQpJybv6maMl~T(FC05x}nR
zg~d{lK6C+i6ndo01lAnBDH?FXD@7Wvs5p1TmS|8~#SEDG%X@(9Z@>d(e~TXiZslTA
za$Rupg7`h%S}e8-dSHJs$Qx9CCcx=*$N{X|9QTobRB@)ARoB{Iq^RMnG*L<+8WT7b
zJ8??4?r)~4)NF&`9B(e=vvk`#T0afVz!-0m>zg>8FnedPG6f}hoa)dBB$tRHc%z+C
zvZ5aTfN$xv5C8Vjp&9j#Tvf8UM$)Dny|`JDQV8x?Y@1U8M;=%_tAr@s%5ZuB66Y*b
z3ITFg_zL5^bUIVK1z>N#z7tIZT4LtcC<Y8Kl&D<w&=8~(P1D%E2<l=up#&8if=#x}
z_i!TBCI4Qi0)`?aj2Y9EecQn*YlvyW6^X2q&`{2vIct((R@!_CxS@-)!|x9pM;R!Q
zO3pXXpOarc+esKHY*qu4xKdPffz>k53VPWoD{JM;jrYHX$%DKIFKGdbDwe4ubsfDE
zSb3Le$+fopmgjgIYzHzv`>hp?@C6EaA+EhN!T@nm7!>f$w#phMF1$^Lr$ky#_anZ6
zZ4W1tVZXw0vN}T$N5?@4!;-O6*#uV{Bg(=W1-E5`OTz#Lut1`d7KI$Z+Emp0#`e?K
z1}-UqqLo6axJ83)4P|2A-QXM^!%%XZ{KZ=gV3*SEDKeP&FqRnxaHHf5CG%KrMg|I{
z18mX@;$A8Y2KX%JUawWBgJ}G4a6cNC#L!CRg3^l6Ea!uBIS-yP@M}&@yhphuOorcM
z8J&h>(R_x%X*xtR9sre8uqY0`!MP}@caadHH=_rMifE!KjRpEul?p0T6O&S7mIRr(
z?Kgb^Drg%T$B`o>8wpR@q)00S*cO0YA_s<^z@Y-VUGmUOG36!-VllY{;1z2Z1~wi(
zP;4KfgR#0yMT4EHx`)qDid+9(*Dc?5IoTD=^fX@LsS@TTWz4>|Wt8@i&z#aSS56xy
zaBE40hL9As)0QMfM)FB`dOH~2kzq3~j@*Hza!CpS3vPJec=M2!vxAFa>;~pFqeUuJ
zp)Ez$jw!kjt<)FqVgUZsB!3oVq(qhS+*(Bvmfl4p6>|fz4Yw$&PKAMs3R+RR4R6Z=
zgNxVe7FQq-I9p4zmyjALCYk`QTTE+{*NyECkB2X`TE)q#(>QnibQY55&+`u6EhD9v
z982!1+hf5@r`A~TL2lgz#*)DYg#%=vInmAzYE=n2rlYQ4m-I1cBwEazhV6&oC9p|p
zg0|JY1kEj3^{b$<O{YVx?3wl?N_yh(fC9m);`TVK`mNp$x0H{?X{C+H%1gJ;Ya#wG
z+X?v;g0j`Y5XfFcH=;X59j{o3K`&J!TPmH*s?dM~t?Uhlqd-LTAh0p5B^yTESr*^v
z5M7fg;btxAK*Z-`{%eHhSchcbGI?Mn!B&PLV#pv@v`gGkMIOOepEkL*w~S=Iw_(BV
z1yD%Y;H@<b5hrFIzKko+9zz}hFvh@oDCnn@hx-4!UJm{qRB9mD)<h*+eJ;%bXUp9S
z*%{}^i*v<3UR1;Xf!ck?Y(HPaFMTeU50}u#ouVEYA|42Zqw!EFH8Ph-;&w&d3bQdC
z56HML0)A>m=kT_?<)QUy2i<Y5{+B;6W_3wM*}0kyI`ZDpimTI&+tce{G##VjI<5Lc
z%9&_j=!0|CxZpD5!`IpptkEjJo1l1JB6}xT#IlJ1VdhQ`2n$8Qv+iC}tB$9N^gfD%
z>SRbcsnxW^#VZcB9Sd*UYPCw(q9k=ps*d5#N^-<De_vHWiYJzj)b=Gz{|F<%07vEa
ztnS~ahskh+Vb2!s0Ata38kb51#;Ov>;FA*?RrOun3O<#kOI#dEGYYCY`0ol)JynzH
zXxl5fMZ{llDC3kU<51<5zOwM&>Z2kOF9~<$91rzOU}B)KR?${_W@tset9jM<$3g2K
z2WQ6zN3CxTj!qBG&P)Dk<s}`Y_*Ha&j*GpY^xdd~;tJ^Se-1F&VC(ek;KjjN3y}C=
zzhvRXtnh?kQRz&WJ;V{t5ABPgpH6o9o(V(LUkpq5WI51xVM<})vtRV6z<@}S+D*m`
z5?eMz<FZ?l%c-Dk^#x3md<|!FMvz2L2R2AU0z^v3-`3uf1`=qmjH*fujSL9@#1ys=
zrqmvR{G{UW^y0}Eh9nvB4cj<MF?#E&a6m+DiBJ%A;}M1S!^mm9VMOmnL&{7P^*YEk
z(fJ=7+3nI0wW~1H>dEj1!7O)n06lUr{{zq57*~$<1NI8b60qVA<tH@v$05QV?MIv0
zkbm@8y5+0L&Nq3kW~YtsV7-nmwZjTI?88z}(C6<;_{|q_>U2CDUTsPPx=n`1Z5)p%
zwK?;X0Nv9t_%4LAkwRT%O&M0zL@#cpX-G;sLnQ1{Fef8O8jXe{9Md>iX8bNYJUYiH
z-dA8ooTjxfqeF|}&*@M_e3sFkp)QKwmWoye&%>dzS*i$AM8Tv9jkpJlIMvhcf`Iur
za%}kvMkT$dk?>%VF&ac!wnQ`90FV-<qGC{Hmrr=5E?szvAf2>pLz!MR3~FJp7Uq&K
zOcf+Fq$S?D<;<0BzWuQz3MvTGig=VULl#M`wf(Wz`j{&_VXiC{4QnMz9%?j?5wcF~
zOKocbfAsvhp`~GtNuFGibsCX{nuL**NAF@|86zUa!*MiuK!Ix%|B7@-Y{-5{3@1@7
zLZnOi1~6MT$6}Z)K^GK=_e5LqLS`&q0gwjqbuYOlCAQFb$wUVr7!I#gavUizO2n-O
zKgiuC5E0!D$u~|aWYB-jh=NB}7h^%n6oW<%g?BOg^jVBf1y;9$3Qh5>B3(Gyg-^PC
zrfA1Okwi@d_J)ULR5Z|9eKAX&jQC<<$vQ^!Fn`u@EaP1Cv*^ZB{~B#A;eE-M@8tsQ
z7AIpipKC%&Ldq}(U#d58)Wy%tX`xDJ?EonkGsDATCW=EHyOOCb>U(En5re`ax!mr#
z@G4X+wKJjlfD}+(t{wq|G*tA<ot?2Ghqt5+A+~|>*xrcr7BNuk9y6$}Fdbt8c4Do)
z>SONSc6`HU9pxAy1z9Q;o>D@QjVE$fsuttx2#q48HRIVg=gh;PcqrlvN2vLSw&;8v
zO}T4T5p82Aaqu0-7D7L!cC7rG`zW0pkqE>WBldDF%_*V~Aml}}R&|r;V_v(6)|t<f
zOJ*#`V0ed9>El$UgYHn=T@cS?2B4GhyNMWhKovgopb7Vfs-{G#njVWOGWMnoSsBSQ
zzMs&69MaIfNY&*7-NsC!9uaa12!j90i-?(6O`IU^P~Nsq<bH7tuaC4c?(mAe^#!oE
za^b_h;%0wjpZR;$P_ywcSkXp3sPlBLZg{9rHw;L@Zlz1)^kb`e;;D$d_6X^e1=RUu
zQGZk+pD1dUTuc%@eARe)@JSH$3!;g{l6(R3kfM-%k{@j|=TBn5A1DUQ%g8W^?D37T
z9g>U{D)Z4z&xtmZUW({XkEWM}DY<6Zrq3X@3z@Au)^ROM75)gLbt1>BGE;<4OQMtF
z+>MjT1QigeKRFhso;e>PlYsG@Vz#bh{Y}cbbj~O7D5z~J<wge^LcK0W%n=3Zn|M({
zVfH)nn!=7ucXW>`w`y>3pCI=s4aJVK?jxef^_fjxiY8x04*`YUc_2M|j6E0)ZER|1
z;_ooN`PPzDLU@epkOQvl3SGZrW+POKTtw&-ogx{N4$+=cGTK=@0f_o8N?ldXzQi!h
zr)f_EeQ|ph=MSGg9%8IXF%e=Hm+cHE9b>2=t-ZPdVckBX3632G>O_ntWU&g?mjZP!
zVQ~I(uAzHPG1m=<ENck@o$S>I%^weHc)Bai(qZ7zMhgqmH8;Iex8VM_fKI9hy&Q6$
z>;8G#OBFguGkelP<e4#Hr&W7o8%sIIK(0Hz<jCjZ|2u;H-^z63w$D!dhX2qbIy6!H
z{nFgz`!W7L^OEyNc>6phWhb(<LaI16%gY#3W4-`-k9j1b@q^MbVAHXRTt)HUf}ruw
z3!<Kv@OcFPpNbBO&W@bJ<2$G>4v+ay5A1gawV(goI8u!n)yz}k6bi_fhgTX!NY6jU
z=wGsjXaBr-hMhe7=gKofsGje1$v;nKo+3rZh%z<#3mqf8@r_$$aiAnuK}ju7S(`Mg
zQ|1Hc@B9=PN=Dr`C{CG=<|fPLnAh{htj@+DdxyWs@wm`f?KAT|De?Y=Kvp#M-vZ2w
zjMf3Pds1ioDPb>+GK`kqAyw2}lsu=eIOJt_p3@dvUieVg?U5u2Gg_c!=k%8Du3nRj
zA`P1+508@>J<6bT^5baiumT>nTO&n9ERJT|tdeo_-nJBLp=l~jEufQvr*n0iq~7IN
zRtfWJ;4aWur%wOcxv%|0Ihx9(hq9a;I=|Ts&c_`w7%XM_9gnAjk{&AYGocKKtSSY|
z>9`YS!nH3r>+Jp7Zm?_}PKb)*PIW*@fzf3mPBDy-yd5Z+Ehy7WtRasi{4S0pt)-Zi
z-kUK}$2d@t?p8kZZ&la@_NJ}AlQvvv<;dOwQ^3e<+1@O0GWxR4vx`v~JekM6yMUb>
zcbBXX4H}c2^&#g;S;sp5rE#EB!pL%KFv4Pfx*M~h64N&(7@p-JFHT8H@U@YU{=Yu`
z{dNC{Ew1ptUws|_?Y}oVUv;{FyILiTScUDqdUUxes2iRk)tiO;ScAW>Ha6%J{+!SC
zFW1*M{;RgW_T~E8My<9{|F5;$*I$42^?wCxe*^%g=x7%NAKZ>Rxqp*Cx?@3m=A=i`
z8l^)70!YgrVvZWT3cicU8>9LBxw-*@7j4X_ummmO#kPpC74@!TcwU9a_@+4=J+LQ7
zsaX!{Yiob2)Yocj!CpKVCX?XXs55K_+i&or+NNjK+W)!kM@bJ(mHw`;iP}c+GKzb_
z%V|6q#O-L>4Ysf0LG@Za_&>=2_|59ocvlRArg%P_j)BxQF$C=kbV8=feU`JNT@A2@
zDl5^e;jc-r7h(NV&hUK%oET=xcvlVT)wL>$x|}&dRXIHh?jkm|?j{&?xs48eOXN;W
zN$Ulu!$8>02g5eSdPR@lXcF8^CZo;O)joG!9geS8q0Lq3ZMEvE;SZ0$KR7$b`^D2E
zAk&|7&?5iKC5O}pzXf>=<Lc~Fb_tK~m*|&E`7Y5f*XAxonaAAHnz^#IY)py<eu#H)
z=j;J)awpj&)sop1x|EjSca7^=I|7Yh(!`;yc(uH|nxQeOWuXeuyp>Xoh?oN-ol0S9
z3Lg4q=U)JdRy636(*+_Y=Trg2SY1K<#e31u*!fy6GGcW)9gWDi5<MFNeIr$bDKx~|
zqB!yx?8uv_m4V%aU~Tu0L5IT3=;Hd7Kg6}TV)>DAn?{r2*b+dv&jV!Y`ew!>?pNP;
zu7zus;YF}&R232>miYiQTmZSAa2X6McF$TEZ@myV!@WoCbYD9xsTd!jxE*)bG_-R>
z=mK&bmrehtP(?IaHZJYpVY$#M!t|ip(IIQh)e4OEM4(`!7G93$OibBb==$Y6yeXI_
zBa_jkdq+K!BGmsFijWP^wqzoci?!%kSeN-~im}YaFdr^JvjC$!WGV~E%?o^c-D@Ga
zkaBxlo?11oRdH&q71vtBX5CtAv-_%LN4RG9_5P2b-}QeL{m!nnMZbkUhhH#6%<Yt=
z{w8g%T27s^!2&duGZ4roNB=J->h*sIqAuq3hfwuJk;r=rcyQxG4}#C^lRz7s`LVcw
z{x689qELC^qhtNaj}8iu#UF2q&Ws`kt~0!f(8RB`e!uRcGfeH-%9CMV!`sE=Ie_wi
z2kVSgNu8h7jc0GD+#9Rhn>s(^a%`ZfGi%7pf?5cbDfEDRQimsXb-z-vi`FwmYvspM
z^6b8!b?mOsckKG;z^tR!Y;3!rR=6J4c9+#krWi9|ZLZTBFKpI0iilZvo+H~r9Fv^F
zIlL?R`?=2JyoPG?H&icds6KB)^_f$v&)d+fsm<Nctf_e$`a{^5^*<>aGl#<#ZWssW
z<w;;ZGdRu}c(Jx?i?n&|%tDKK+nI$G*>;RWkx#^u+crirJmqK-M$kzNwI#wEE^xl)
zpR4P6=c+@#{v`b1`X_$y!}vk58~;-`p3M$FcF+CUndP}R^`n{RjVEEA)g^a6uk7qi
zQA44&&@ChRkdt1@=6sg@=mU;e^_uvsi_dkzyNl;<#Pc`nW%F{}(pNwmZ)!$dmzAJo
zzpsaaA-0~bhtMc8&TiRtZ`pcz9uE3R#gC2qja>eqSO(ef$RP6BIzL-Co>_9qhDR=u
z&(`@F*L7qS4;pCP$K}MOTn}k!aanOm#fm*It}H6D5VUCJb=|vB*<MOquEw}?6Pm$L
z*^T{!4_2fF8?z3~C%L4`DjrQtDn29fl#ux?)Mm6$pE-v5j26~sj$wUw)~!jsa18Z1
z;7~6dLwyc7%o@XNILsQuY&g^kTd2(ehg#t*YIDG0));2PVb&OC!(qK}4C`~iVZCq+
z>vO<i));2PVb&P@aKMO}Hb-SMTb2UEG~zWGuP2r}a)peI-KAjDPfWLwR414Z1|!~Y
zUSNXxNmz8GhJ`6y{X;+@=U*mbZF~~3=D0}x2r6__D3}jbF|H%P-{A7+=H{33t!I}W
z?c$qb473`YbF%N`R~8n~*6Uf9%%IRO96nq3qp>z<c{Z<ExOg(JO}H%n0Y>1?iz2gc
zx7R<1J-0gn)0kT%f=w~eQ!325&1{S6eUp8twBfcw;*xMYfM=iWh?dHj(7Po3+U1KT
z1wt}>-bE$}Q74q$h^7rHVpuE*U-F~I;c@HW`0^D-v?!?=W+PUZfXTcSJ#m{S$45W4
zF40B(#mO0v!M)3uCG50pdi@Jeuu?#;ws&egQ~a^J!QbP1ILtbeRyUeNCBK<Gz{4tq
z&WV|10~ms}fgz$Ph{SjZRH9FmsUw2@tIZQqf=Dw?p5verU_c;>-0IgLY1HDewy>JS
z;xHe6K#eGEL(qWJG3ptpUf9Vv?hoI=3DxUuTNPDv{y(d1vlqh&7k3R@vEBy}{X!~}
zw+I%R*~Hh|RcP!oL5FoGguMnGhK6VPGZgq6SfPonG?ywLr1mkuM55#w6kH|u7{3sZ
z5#t>qG61tCVylV#rIct?DTUa5|8SsFrU<J&UnO#5IMo)3mxh~zktbC|wZo7m?bynk
zsnX|)5bne#CPwB=vW-=^91WH@)*-i$^peQ~Cp8KK5id%Gz~mqbuX_odG&yj<KnN;R
z62Y`F=)wX01F%R<Y`0XdTJ2<^LQUBI92yD-_YtbdHiOs6pwpXnO;-(xTGh^IM1@bL
z6Br1Tg~#oJ^26!G_9iKwz+&Dd^1zaB{k|n`PEgG(TKNr2pXo$S;;7l&JWLM(=da^&
zc=65QIkA-&hnNTPymfqXL8MlsL~H>yKRAQdE`B;aI4@^nI9QIiV*5IYKIM>0PA0RQ
z&DJDPD$h1&yP?yz$x5;<wg($E0$hlMv%^8}K_g;?GEbS2%sVlei_>$3pT=&YR9$Ky
zAcX<>rlc=Jn=XiKM(|i?oRWBBwq)|NnbzJ#<AkC-`r9+=Rx_hx-Z#@EdHff&tJB-Y
zx!G-1M>$M%z(E~hXR5?zGCy)p2E)u-tYF69g&vnmt`)<!AJf6B<}ET#!vN9mU1aDX
zAIV8NPdDbW!2GYraWo;QL<12aP)>GE8dFFc=B7Nwt+@2a;6m(*ix4GGs2@YZHKk84
zfhwknA5)YxW!*W%T9iD^C={zf@<li34LLOjMs!Sv{a6YLBEHy=*#Qjch%w2|UmPAc
z4&Z2x2B``_$tjP5!7zxgu7E@2*!QyusBpun-=o%>f*B6SK;^EJcNEqP6RnDU9jRb#
zQ;KU=)wO%56B+Y)8yZC@(zFqcwlhp8L{>2u%LLhwZcMeRONIO*0IZwvX5Z$;bX^s2
zR&{4Rz5M13mRFz@S|KtoDn7Vr!!;6qOKw#Q>J;$C81*9+GR#|2kNO}Bb2TLjo$b@g
z&@DXr4E%RK^FEzVfA;g~Lr<nhoJotb%TIbT&3h(2?I~l#YIJM~{`Kci@$yKZn1s!M
zO3w5bbys(s9zS_^Kb8Ntr7O63TL14aYhQ1CW&3~E;pzG(|L?z||MyoF_~*Yr#Qz&=
zfAaqpVOTE?8%GqQ!*>2Q-()?%AC<f*5VtfUL!C(wGSRX(WhpC)pn1K*&wm_^@k-+#
z$)Ib7(mAI9{pY~HVcci&=@#<z5a5Z;TB|MF#_2^by2d*XU0`#QpDm*V@+x{W9PcGU
zFl_RDdDLwVDL#$}u*_CV{+r#4Fw1TTc)?RR;9AL5YaCx~AtL;S|LBQ`3iO-ae^?jW
zz#?y7?1oAYXoB7D3p<*v*#-`GJOnl$km;49;m+!j^}F^sfewA4bt291!j2Gr@qvuS
z!&!jFG0wglTmhdt1!h3EJ7HGcSa;sI_+@CNM}l803}7~4m0ejf?C!{HDTrsLG6|+I
zX#Z4Rx20$V-wcLbC8F@nans7;#H#UBwa#H7DM%&sNf{tG=r6{dtn@HyTE$F$q;n!!
zj}eTgvhB!9#Nb{#02CA8DtixMW~kX|O92tN(gcKB7%pry%+QW4Aj66C#}h#S#=5Ov
zQ6glO5-tTtjnhW+A78-qujBL`t7`EZ=5c&GO@vq|V8w}SM7vnj5adXd66?k5N8MqJ
zc#dcZ2o4l`!g3aKvR2j)Asce*%pk)FbvMKa^Wu64=9f%3+hv)BJk^4pn<HUvVikDu
zK$%LSlu~mWb@7sf0Lpoz3`7uUHpt)=LeIe0IJkx(1K*5SEopRx$+Kw-W5$aR^+1%z
z0+$4@mUO0`BvsL{@)tZ*=`E?d_Cx-yiBurKR=@)=q6q1gar_R%7GKjCnso4>Ho3?q
zHo+)kC~gBxKrX=Lyc!StBCink9a3-U6!tdk6zoK7ZcYkpT|L1B0j{!(8P`5E0UySv
z(_|v^C-G`{7X{HP5otuU5NTe!;QgDp3(MR|vBKblU(gL3h+>0oC=f_M)f|eHrnH_T
zLWN&E$k%l!oWbjANij217GZO{38r|$b?CtKQUWI&2G=*dXN1{_j?N#t@fCekRYVYB
z+<Og(aT^>CC^+oC4R(I$jO;ZWR#c}Sqee4H`wTXvF)-n72VDuRca{-Q=wB}Tu`S_}
zN@Biva2Au|Jv28#VkZuhc^^_e6tP%%sB?`?+YrD!7j1Q>%U@;sVE)Opqk#YE8t?RS
z?+8BdcJJ&(qj^v=tCj7PVS-%~)+T%zjL2TxqRgp8*US^P($MT1rTFq_if86633c_1
z#Y47wrU)n9uz_a;78UL|*<QWF-8HLb7sbv?J%6KCy#kvJbFOB9VdJ|@o3x_yEebYn
zKfa3ahQBHE;0?#w+>9KBn+tYyLE=u*7!Dak1Eh1Qp{Ip9@N&##zl)i)RC^**mc9Se
z;9U|~yGQNItPOnGNT5hsf1_HdtfyNrO=jTwaep{|K<)xgC6UKIo1-&P>94auKyHG0
zL9-1>f0Y!5L|HWmM0Ysu62r>UX^NJ3m`qO@5xtJot&3Q&G;*eCgCM>i0nva1#VRBs
zUbBgmf=c0P3zfNK%sgzjk>AsI#(0{%i^c<-uAvkcbb(Zo#HY|{Uy7^ORaA=8MVI8*
zU{)p>QI{qfIu>Jq$0vZCRs|E3tLD8-0yLOTKqz!jDogC)ESn0Qaz4NrRY@KvI;<-x
zKOO%<5Eu98L-W%rKItmXvwkz1Hd|Z{Y+6>>fjS&9D7R_cEbbX)1}WL3lTwP~+d7Gf
z9kaCzU&wv1{OXFFtU3g0e;KN5h`3x{u~ij50@(_)b4BUI6dEN<y`_@%!P@dW)iB#}
zrEH!Izs97%<c#s5O8wUzcBWR?YX>DfNsy6<3Z|A)-qY6%DYeY6vbOU`dsY53ozu*^
zNa?qd^bo*9=GB1BXeesp1I=fMvW6-{wx8BI;_??SV^;PLPOAlzaTt|lH$BZCGpkW%
zQWu6Hqd7l2Sp_9_emmvqHtu)Yp-`eUq`>RYIJNGyBV?gDC>;~eHrqg=hPs0CS_Rc2
z@?=*7!5eAhyQ@KyWvfW$gF~_0^0%<=b4eCN^?*=^AS&aY0L~yDE7L}^n|=g*NJ@I@
zgs=6ot+s`k_s&j3Me;5N(!#*;El{>t_LAAe6iyE#E|s~0_hQ*ohcA-t&dw>Z3(dpA
zN2UnyKM)Gc#{6&>J!Div4O^HFt%;dw)`u{2p$v;Tbr)%>NPqWsiq^8%(YV_Kc^ZM+
z_LH9rD__FqrpWFV=3j`r<Vi_na^3i*qA~(XtI!kkSXOxYz~i>jBw!ds-7a&b)-G08
z$km*4?d&Gq&C(JQHZ3_b)FpXV8JBN-zZ|sx7($abz!D*R2n%GIpyrWT*wb-3N&1n7
z?HJA_%=3e1lQermRVhL_M^i1M&Id`lWD<bf?Pd{g!#ZG$J^n$I+OW>TURx3l%A>l~
ztF;Co$%Q<*a`9nh*~ou58S-$W?HC^z>UJW0iKGBsp=LI9%MQ!Q>RTYbS;}e|0?o&R
zS{pw{4~24W;@{|AGjLn6<C=YcT!iqBp)}9hn`TP>^`G;sMnx=|EYis-Zl>{l4=D2F
zK{Zk$B9FWLq%<gt6L26~?Iv*8riPeq@}~D?EvM%&%Q%3j2l9UR0q`jvNP_KKdGfk=
zVcXp08QT@;F`?)cz}`X2;Rtplt>-A3+@uA&B(j!3Xn|ohVFJl@AiYy4|Lzr~6yp)9
zhG3**LK?&aQWGl>hHQ7l=g>GwZ?WhQTD}6wU=`g)=-cc)VNKWG2wBT0iSS!y;#|^=
z0&$i?b=;K+TCKujTj^wE@_q1|$$3j3JHY8oEw+yG){?pV#7>x%Up4o{&C9x{mAJMS
zaKILBk4U2`2}Vx7y?N8^X)(Sq6JBmIywi5oem{32RlC~U$x=0r`3R<+NX|eADgWC^
z+H}^P+>D>uRADmh)*QLC>}*-=$L6%TK3AI}lbEkPgMfcfV#xVX^bx5cKVj>f*lMN|
zWH(?>en1}@OGKUzk5mRdkyi>vqS%HkzAu9DPRm(bb<*&C_*{$#7f(K$Oxxc1<ZNjl
zWieYsbBJ^zcQBnhB6VDVn9csk;zH5%a3^-RCCA@VR`97ffhAx)o`kJq^blmXz!D$O
zHVH9<!KynM<Dk~sEk`S*HAI;!FnEPnZeOWovfV4Wgx{beevEUaSLog$nkU-kFveRm
zLwxKmS@*YA*160D%_dD#<}!te>95&|=yziLr8V`0hHd<|23@*}U>jfnu=h_icH`bi
z&jNpj{7}!w`staI_5S0{t56$1=cMOQ&1+x?rM%Fw=^#2tb2W6VQbl0$7ko6l^yPqi
zc`!R>-O~VbF)-@k{h46Lu*yxQgsM&P;KGnu+r44u76=Z?%0kA(yQ@+1t~YH>2jVpn
zZJ$Yr3^PaZG}z?g1U142qXht_{i*Ckg*6PolejABx5!#TRQg3|<crE{;3r;1BNFqJ
z8dN#x!|IGi_}NCL3dGYmN|TskB|&S^fGwAX<TDlA$(uaXnvmBKsmfIF2BD0R22Ig)
zGVDh}eaa1jXgWfjERX1wu&YFs>K=l~XbTlMFbAP#C1*dncTa`dH^OVi_&!35CNWrS
zG<YtijVf?Xkqy9LY1n}W_6pnoT|7w0$`AuDuyL9cQWX@$@T!*4^Jv=+;VNy5jW6b6
z?gr?_KRUWJ4WFsmZF-!ozv~$NxRsQXqO7RuQ3<!&jF?ZxY0BxSEo2w=8#&66WKdy#
z*4Fd9j)LBFNn9LKyc9qewHwg8U<kbWO`jRv6;bkVKub0eiUzc?pAI_IkEvdQtshJ*
zq9AS;MKj_;2F3s-TTV$<KT<FZy2obA3%Ysf$!jOp?k*`^2oU;de~|j-$jB72`V@U`
z;pw35bShfnuFaXSIjX5mw(8l?Q7X0zQm^8XEe3RzjA1%t`YAd|)D8(S88nImu=hf~
zq#DH6WK*j8l~F)Mf2%q&5_1M>;gE)#SEM+9NuO3c(AIC4vrx%~GfxQzTWM!FM%Tjl
z>Iy?D$skGR@E9;D$1qB~Mc-od;cJm4IX;Zdu)bDKdad=zln@E(wT35CBIW2uZch1*
z4wsafFr}e3f;iE#wREy>3=3G}0y@j|Ehn$O#pKn{C8n3Ywb^wsCow7fsNBkS)LStj
z{NT6tYgoDARjZv`U&k<{WPq`WWeWtvz<P%n!<AGd!Bn~hDJ4pJB_2Y#Ivqsg2Mq92
z5{m=pb+wNMdKgr9+1sgBLi^qZ&v$}bHTEpN{e1fiO6vmWN_jmf8M&01Rq)vkPZ$GE
zi*j1vgc`S4!r!ne_L~tnijfRqlio$6lKAOwzz{wISmz7^>DTX<Ofi*@{`3aT!taQ2
zp^%P5+|Z8v?Q}Sylg>D>*@!Qwt<vJ*g4~9;WwTYO)}_N2k5hwU$rk^<WX#jTfiVRg
zTNTJ5+jm7@66gV{?pD~@snoLIhe44Y1Z@_F#DzF>R0og6t>@eXaxFAF5WK-88BE#m
zgab3luz6&yeuoaZ=zc&ah;Z>xjd-^8t)92Wz1wZtC`RaN09AW90oLgZtq_`okPOL?
zHgLH*7)BU?4A{xaP7cUd43y*N22FauWL)sL1Las*Ps0ExpBwTw5FQ6+aGO+|&u~V(
zj)*RWe4<hSHa=GQw_x4`8zEYQm7W5_jgAqYn2LV~reevwAtM2b?xT>t^x}7MkHxJk
z#3Wwn(U6{C{E|thO5)aVb4lKZ)q)C4nuweRE=5v+BRU#%l1RA83zJ*C;F<(*PAC0o
zU$s()Ryd4GGPuIa&{!gtY1C^qNEV9BhnxRat5sz4cXK!>gMZR2NN6w{p%4XE^bX*c
zz3I7-Y97sK$)@1wrNLrs#GU$=Uug!6#1QDJR+7D&^2^SSbguk64trA#DW{NpziAL2
zLlp=xfSxc2d1$gs3vyV}I3~Gr9KR;Rp}gK43>oku934^Jl%qs`W%E#iB3g_t?n57>
zvy7~L8@_6bzd(K7AYri4G@Z7)$-5-AkQ9a{o3N>ZA;9zHtGJhiR(OWu{=z^uT;wfl
zj_NT+4TX=@zioNml+=$^0MbhBD{mpaCgN*!U)D84X21ScCaX)0bT*(q5`TRAd`At$
z$0)Z`djeRiavSP8TeO5sL<t9_krkG(nwl6qH&BvW_`jV}b#!YcUpahb*GcsX<kTTN
z(02o*?_sMzg%O+jb?#qdZ7uj6{;ko!bzP%3MW(&cqSq~Y-NI}SL8C^GYV@d9pQn#H
z)u>aAx~!p;5ZDRsBemBx2WXb5bD7)c?Uaj6QjINE(Hc^y5ID&Yindd4k-8}IoQmYP
zRp+*_*lkElnbTsut_$Kn6Kjm3yrjQbv_|fJ6b)h}Pys*{`+^?0$hMoRL?FeAmqEYL
zpJCenyubnf^Wv9(Ug%cnEQ<Ib<#?l#+z}3K+A<#Hu`%cyO#|l>;Qz~#Psb4R$f}3>
z=<$V5&r-(cw_(BL!vM(1mgat8IJ-M*Xqi;R`}?`+=i_*y0G}lzASBMU2-8s)6`96G
z8{2qBqfw8T)jm>YKw(iu6P;w|b}LA>5gSk-PqLy4mxB=_vGEA1$uBreE20`2oVADr
z+g3VXVB!s-5QajWeIkTO`5U?^mwh4zEwNXQ;yGGVr@89OPJ6bV$?}Nm&WP$tzRk?o
zxSpmlz*a>%1jt>f8T1C}Q0)<B63~L#0c=}aTtX_fg8kS+=o})E2;)AtOyrFgpd_Vg
zo`0z~u|Vclba#muR`V8sorN-*xlGk%FF%vt)#j$@2I^Je{pJRhQYtevO~`gvr2ylM
zt#$zKWlv!vS6QX-{aTs&t1$w&Hzc^gg~<=CXB1In*@!3dcljOTZPS<efyM&C=BCA0
zDqDR8EQkJL0v~!ioK7%=3Q}PzR15<@Z~A^lc#z`TK|;3tTR?Sr##d?i76yE?u&QKj
z)0qW5^D)dH8RSq3twEPV8vx_W%ri%fz=R)58CzEc&}|*=7+QhR-6YpHaXOKPG3uI7
z+K5p8lSUh=;yU1ulE`(U^^{ySuS>@O$7^Y;0x=51(-IyseV8w5*i7FzWr+DqgksAG
zL#C-B&nSCCDx>&o{$yTc4ZIpheMos|4h|^v4kJ%c5;wDWE@le+?|M1-J23gdW>6FJ
z8_}$Nb|6ccTU|`eKAy(&+<B|p)m+RLcVqblJO-*+a`<>lZ0Xm)i3bJLC^a48KsSl5
zF}Ma`>S#RdmW+_1o`Cg+Hcsl{?BF0v7|Nm=SIKYF@;T$=K%2HPo$%pOiZl#3CZdZF
zqIcu=^g0+#$7rxropsvnE*aW^e0Uew`pyUUB4`6;A{DM>Tb$0cC9`b4hLnIrjx5VG
zugqtdCMhLDyu_gy<X}cRLknp^c^?Q|p`Y(%%9-HMP7etP1Og_dms$L8wZJmM(-$e?
z)Hom4;hgzrOgx$|1x+4`1s{@>W)>kS%2Ja%prfgKl`rSOZ~i3tY<Mu<bT(|u=+E~d
ztS(L7>#&PagC-9po%SX)QrwhvK95-u&-gDVy^_G_;*=ju|9ELC`GK?m)uq2z)y@my
z4XZYPDhLZJ9CUYr+8WV6Br$bL*dcTbsww7bb!IkK;)FGYA*;_H2E)NaA3<UFQ~gWB
zwD$p!oIe<@83ghv<G;2zWZS{~abSz%zpj1t^~Smr|Fu^8dhJvE*FPlwE9?;?!fFR2
ztk!CH=1pRB=>Se_EW)ZaFtP>~!^nuhEWJZdwx!@KR*_C|3!;pIUXRHDgfI%Yici`J
z+H?e@`w&qPMQ|OER6(Wz%uP5Z4zQFc>!6s2P8nUzAbLy@m18Xjj{{1L=IblYnAn4k
z2Sb?(=)!dzQ{YvOtB!HAI|>ZI>VY)Hx4w89^`J4ndez+xy3SajN1!rEKOR?oLx8Rf
zAjlz%guz9@D3-_x7#Pq6M<F9;SOGAwF{rdU6c???=mTAfdIlpdBGtx$IRTo}ah#}V
z`a}SSS6M`XaqAMP2vW}h^RX>7ne0d8gW0CwPvlVxv};ISzhfLnAMbjF1cC7oIclkn
z0+<f3CU>~t0)enG5ge%l^%6ua;nG8RM0^9{DrPPU&QD%k{LnZ%2;lqa*~#~Z`v?2M
z-cRuIAZVVP{&aTu@|%m`o0FscgR}FXal8*tk1x&+_bx9^;K8%TIn;cHFP7k+pMrxQ
zPtOj{&x4b*00WO7LTk|cS>yQP@ZcPF(sA?Xa{uu7Wf(vU!STsOaCG?U@B)fmoP^Y?
zsJ3L(3r=1HuMW<d-@wnt-r>>V#ZNSp7l#+e*x3u{tq~B7Z(bfX&Vtj+v(uCFg8(MB
z6zm_KH;)>JuMYOB&_DDU9DILpd=Z>~0|$aL(UTvJ56*D#)-3l9V3du$qXX=T=ed7)
zcF?@Q3F&W503HnRC=AX|51NPg<>1Eym`>yDr%*I{e(>ET6oQw*e&bc+<-z$<P?`Z1
zSeNGI*}*FuG(h6~a_{`&@Z$2~Ab5FlvQMx+KR83Tv2)<;j!w=AV3+3yz%4f}8q_tk
z0nj?%!tZ;R=Z6G03ORRnd5Wp4%CN{k0AOI04NMnE06sb9*#eMG&VIsH5e~EfVerE@
z2k`t1VM@?yAQa94YRwC?7<33gyfCL393Q+q!gINKfUi%mnI8_%56ZA$hv!%T+Qfc;
zXh5fzG;3TZ7#Dv%1gJ^O4QV-p!xuqg|NBE6xhM@7aDFJTg}~PQM!+1gf8~k)mdF;O
zIi!%C!KR2W-9{zc;F|6Qm{o|Ege&r#3U7=`(Zvfa!D+PqejQK7S4wSA{RmS*wmvVs
zHV>m9%C;(y4YnQ3=NaHhvO1ALI+5K}VKZ3~Bj8GYe)gaw$Y4}BzSG0+OYi~{v4skp
z_Aek_FAje!2e0kC_O>DV>l`P7ZPmE#h0u#&!nZBdrb6A{;D7SPd^+PWm^w8oBv;+v
zJSH<@&;>R#x;?+ujDGL6n=yuyF`l7eGeo4z+~qN4EBziw0g~L3ZTr9_ttJr~ts#Rt
zbM+KtD}=!S2Gxa$t|U+h)fHF6Ej<Q?MubmbmCoY{#v0ICc)~}q2?g8(YZM@=`)zuH
zzY9ETot^y9de!(AXy2xM)I2%jM{3C3h-{6`D@_BFN^*V5vO+{g#I%O;QNnkp*+r8D
z`h{K4#iuMfC@jhbMY5>ZC<R`MQ31Ts=~(Q!&p)?!-{+qba7)h8^17AGeY;YQ#7fdc
z%eo7^ccnX!owcoGyT6q@e_jSW!(u-tzf>w)<l8HLezud8vtxe#d@Fdb8npDHG=W{1
zQ&}Jhrk4R`3{3`9xV|oCG2l>q8E_EZh>>Qm=1qLeyHcgsI6S;o&82qT;grHNsn2qL
z*{j|#n!pa?kF=m<S40m{TU-2Y$!>lvsGR0i799tj@{Fst$4LF^P<n#=FVVPjvno`G
zt@HDP#d5$D%m3^3FE_q)^S^xg^_QRI|G#4Xms(a}^p&fvEtcfPmnCNY95a8R6frcD
zIDkFMst(CXZrpD$o#PY>M77*JxyeBHjt#MUrx3eMB?B$t7pX*nO)M;iB}MZp5-$Sq
zDo~bbx{7>brx&NIDjHFRyRV2=Dlk+i7*`Ms73i=+O;kpx*`M~um5mC8-$>KpIIX~`
zRIeoBS`QDHRk4oW2uoLym>T$MvM$$Z6;4~ptAyMm-9BxK+CfGc=EGQt-KV6a5*L}*
zl!Ae@8B+{QiamzL5>v;(zm8CGK{^e#d&m3?>5??vO%zl)VZ-1-UWZbBsWH2XW1W~G
z)T%MkA@SPTY9yg5B8v$zXSC(QEep$zCV)^Q!Nd|!nJ0{aBoR+F+bwO~+BgKVJf>s>
zob*r*UnyrWJ{md3s41|2g4EQIk+alSr8EYphftK`IFCU#IC$AOs$Tqfp%PeN$G!ME
z>Jis4?u#XH#5b7>4t7FDa<ktpAhgYCdu1jT6mpZBLlliHHntmZt|l0F0}WI*&ry9L
zc!HdSqI_YkL*sL?Mo(N@3cos$9Lc&(RAoXVV_{B3D}$+1(d6?}=%I6i6-$MNP-N{=
zGSl*Ai!6yy`xysKGiR))n<F}R8kC>{+X;=Ec)q(?4rfKdmkD4SWV~&=P<w7iy&-=?
zD+we!lj`eCw`D<kF(^UVOu5iFI(&It`$~*_IuKDwkgso@<JIcv@$u=;8|bb0Td;O7
zn2og%zW*)fyDUSArGy3L(J<=j+%M`FnBZQ&28tzP$hBJiTK#W9>!_v!Oe+QeS=o59
z?V#Xp31tMkc72iB8*c6OMQZ=et-Y~8ZEXlDj!|_orf6;5nQ~St_A`36pCI)mlGYp!
z-cIB3GvE$7Hxz$>@)0Sg44}jbk`V1}(&fqo^D9gZRKKqyt5MO(WH?-H9EqrhWc|&H
z6JT=8l<SEmW459YVQJB(JJ#{}0vR@&+R!=_ikF~Pyb5P0_IQVBX)t|HJDy-?n5{rl
zc-4>u2>XVaKyQS>UKljPV1G+m?$J=q=}EVm&$9HycD_I_$yUGLnhtQ)dLRs@CBVlJ
z(MlWLdkd{&p$%22X%)Ijye;i}3t26(;V*iidIyRbs(Uwu!^9#tJuXtmBDKsQ%~qUw
z?0Z_-z#{9mpNv?I1ePO#^+;erlKn7aT_in4=sdMzuV&Uu{_R6kjXi6lb)lCo^wNb_
zU08QdFP`Y0nI2?oF<>(AHC+H;w{CSb?rs&PI%X#c8F^RqV|1<dH_QO7$UhHOf<30u
zYI&v%Rrs3TOs}qbG5&0XjS#5mi!xxh0JN$B^PnPo;jH?NFQLBkW-r`>Hya0KX^fAf
z+eb{;$I)?6y!~e3==Q^XIl883Mb!|agH@YV-#5Bu*wmv_!|Nit|K9kWjddC`AcrJj
z?fy#`T|*2`Rl&ifX$kSiMrwG{ZI}@0BD8<s6I~WB)*g`{?ozH+chBi^FVp2-*xj?I
zi7@=VDf%p4f+o>C?o+Nox9RlR%=Fm|yG?7FxW9kj7hUepSoL6EcDXOR+;_U%&vdyT
zcK7XRGVh1Z5PK{8AV901pRd4!&sUmEl)J^FX@>iVEBk+gb>1`EPjF1l6>N<O|Naa}
zVl$1ka_G=P*kFx42`tq})a~L+Aa1joeDP)31%_zSSbzjklV{ESG-jaMo`0|e4mH?@
zGuTErt4YsbMUx9IUGTFEOz?|XMSCpW41%EARHBnvI8}v>(RPVc*Hnd?0WRLzU4@%<
z=iMG6zl$jQD?nI+iWgx7Ei+)pt8R`52}ihwPNd1c)i|y}V+HmWG|sSb7AM<!Bhxx=
z9?^QkS_tx!gOO7Q&yiBC0Of$L{9B2N8~fCRaV_lD%VAeQZLHP+xJFqtjVJiER$aS-
zp+IXm7U6OUk}!%;|8sD5(!#rCG!R%a+%d;;9|P|y=iBLz2Ju_!9S&d718d(YUK&&?
zZ>bJc+(-dgB(3`xQM>W^Uiouc!@bW}8Vp^BRPSdni1r|0{QA1uF3!oCNLpQRc<t57
zxh;S7mA%E0EH`kx`=2*yyp5IS=PUc%w?pO|nN}sCGsf$jwMjFa$x8UfD_a)hU9wio
zC^zng@ZXw75H4{)9E-Q~vj<`x{^pC0m9iEgEfPY&Zms~r0<!uRF1sz7nQehbr`Q%Y
z>2DG><(<G#_Jho5eJ|5G9+IMUn_m2gmq$Ma#9ex+HNYZixL9Sp+jCdYc-M4K8snX!
zEYeFhl>tX~g!FGaVF~|pS_om4FN)#qrF=gkf2PT{&;p&3pt^Aty84>k5>$S^W^wZl
z%;1Dc5cR+Qy8Nid^^C&&#_RRRHC|san|B*;0(d`Q<m>Y^E=P_wwOfCPW=~!fep2oI
zhpG0sQI=o3LXUTRDo9EvN+)RoZ(5_SI@se;vR*HNJQwo+N}mTm%LT2T9?#RN*_!;n
z{BNT9vJHY*4>q|QO8r+lTLDIi%h9ET+PWJ)r2?!_+a;H(Q3H9n4gxBC?!jjhKAZ5l
z51;!&*&)?qGqq!jDrTYAMQUi>Rbx>lF3fF_8al{Bv$mkZ7a+HYl6Ot_R9{dz4C}K<
zjTst4i4c?iXzS7I+5E%Sv-!yNgy~VZ25XMCwzgY>P0?@$XRBemC1}Si!_78p7TUPY
z!rp4u?e+FGYm||y+CK~l_x>Oxln~of5Zm)X45-px-5#6|VtejF?5k$?ea!-K+c&iA
zdCjUa?==~e+ay84nIPp%kaQ+UI};?H2~y7l$!D@(4}D7Qxpdk{ezBznw%*vY)XrV6
zf_ovQc2{V%Nu`bA^h|B4;#y&Q)cMS)x$0ArNND@OGp}y7t8}YId#oc_pRHas>L9%L
z%rWfQV?eNV{mRy^Qns37Adzw15=FB*`C7J<ENtLEHJ8R--KkQ~Rsm{L3)gg&nkkFI
zd)dvbCf3W@ZdDugaITTBmFG}78?_Keo8~$+?fD`icl`?Ju2Mjo>p;Tw`fMGvQ3v<g
zzBz_{dknZ4yMB#yS81fpF_2cx#)a$+NG&_j?6~#~T-a|qxX{clBvK{=7j!RT9rv3y
zF61_$aY1*h+OTmUcjOK(jOSL&JD_QJuuW4qHQ8?6tyV9nrogK8hp0gAmYAmbUczdR
zyO}2O5&Um2clM58jyd7^ZY*B1U!S8+9nO2RPR*VTog_1W!u{$M*Bz@y>a!c6fvnq0
zp1pl=z_9N@9z3*7Ff{6O)TzTEZq{+Z(6GU<??HU-*8oGq1w*4gyAc|Q3x=GF0N>##
z75?seip}Ser3PIL)G{mQDPC8_>vLTs_(-L^FmS<~?XJN@6PcV^2B4nebyZxA&c1x%
zqd({Nh0kQ?`lVQpU&|7C_e6u0#64HjihLIp>+@7{fRs4rv6ud4)aknAdO15D)pWhE
zX(G!roBr@yAVDoHQtI3hb?It`FJk7s%BW@e#0-6hDK<^$yN70@K2Ifg$(n^r#_W>^
zuCirg$=uU4X%;RSb5%2&wht@1S<|XA_*$#3sf)Eo-K%x2d$n%%UagyVujaq?=^D2_
zGw*JE47|Lf$=PHs3%N&kIkmiv<FG`cc$N~;os?h$gp$&wN0}nWr*mB)`M6_wRg{C4
zuDsHuE2UZ<syQ-%C|-Zm-4E;k=eYcl7X^IHQSZ)m`%|zXSZ2npzm6DPD?hG3`$A~0
z{%8l)AK71h?)mLWoxl^#kGh_xzYm+Abxl>&UsGy2K4^aHi}&|r^8<($^tUhdH%~Ud
zIr}r`$KhI^$PLf-F!&Suf6d|l_bQ$w={dWfTR-OY0xb6btF3+g<%Z+`S6|=w<p1|q
z^#7~XgZ=;af74|A|Nd`qJjDyUsPaQ}8})<jZVZp(>VTiN?(nJQ<M-F`(vzM_uMRE_
z&s*o0r>7@p7lxOT`#kHRgyADFs>aJ$ghYZ~DwNj*FKEeq9KR^vJ!+Be2%>8W$ASOD
ztQ*o*&b_F`yWPe}zU831aaX(zsd~0wCpIt7#>|!-Y*J{X;ayPTG&bcRL6ECqK>OH8
zDJfI1UsVdrjH=Y_4~?^9;Jr74Mk;MDC?*C?QDo}r49DpEHym_Tk_?fDC&A`}5_%5C
z5nTb}0}#fHU~opvXFDV_N0B2Ecp^Z&^xM*iaB9<x%c$WX5<WWl7DKCDH?{GWl{AtQ
zMjGW+Beh*-K)<J{Z3S%7;F*OP>Qba!zD6PEq2G_;&98B{bv=$oHy-nchBkB%$zV{z
zl-ccqY@mQr!-#M!3vztpljEbGC~(pFcbAQ`gZ<!D14HK=oIlg5XNk}%ZM9CPGn&fK
z7lE4^rZJ&W8M&^-(2N#!4wb=hfC5;mj0VsJ60Hpo=T<RY9HQIgdBd3Y#c*=`E5~%u
zPZ{t5regslEcIot;z1Q8z2=b808L7jntY4YvSfu%e!~sF3ATeid^~?{U-A<j<)rdB
zk?DXEijnovbW8xk@s{3%n`Pd&6Z$S{%hA0-<?$Qx$k`u?Xttf+v>S_9UmO=$!bEqi
z;3uI5z!GqRtqFt1&UL1PpyV~lIUEe)@gb1qh<4!wtt#IDM3M>R2AJe!LP)xp*glz5
zDgH)@ldQ-gA&KDJQzh(KjHr)xN9902Fp6LeesARF3rZ43&|4Yv^t}nnfj29kRgl1G
zW!@t!F`m88WNe=cnv+L>bMjbtnn?;Xa40`L6yD2?dB#Bm1CI2Ab8Q6A<>dPuaOHF|
zmSM^J&!v=(?{NAY>|MU6UU5FM{bN|`j<IjXF`#$UeFMD>Iu9zG9&LOE|0<%#96Q+B
zyv^y@q6`afZ#`Asqtu`R-xKZPMz<?ssG>;|MM}+}9cA)({wVAVi*e5a$G_8-wo$D_
zOM|7LuvLy;VECMZtv}BaOg>-@W>SC|tMJr>U|!fge{N&EpI{ifq;KpJ-XZT7VSR=p
z1{$y^K87cB*|BBGm1<VV24z3U&&XpjdL_jV5zbQ#(6-HFhL~18q5!HBmdDbR`P-=5
z#R%Axj2v0rrNHGwh;#0j!$f@1JkSe_r4$C5;WLdWiS?@r===G?hC&bYL?2EkKvY}t
zcnn<Ul0l`AQIQcEY{_5@bx^V)frNz-u41+YU}VYX7$m#n{nBzjG|vH6m4+_)myeGx
zkB;QO2mtjWF@BZJ@dBP(-G3(gKlz8C6`BL%RJj)<DGA|9C!Q1$u!Qs`qfMi1F-%j;
z&=F7vshBh6kn6ysV0$}wc5rqEEb$ND93CA6&BoCY#)8CHjtnJ?;na9}5WF5dlS{zy
zvBWRKPRgL7`C^$Ud3<tluo;j9m?z}nGmf81OeG6WEJbm&U5|JGIzYw0r;hflglBal
z<U32e<KQ6UeAzI`t2xpzkXuzT4RXPWkRtNt9L7dfkwm6cE^&`<%1qmqELt-f57E3o
z?rvt}0gbb?%}+TYsT4#d2ja3I`YiA^s4;$v&vT29Vclg$J4upRAheT9kJSH3QTG+c
z!C!6kyH6SaYh!J#=IZ~}zW(~_Px`;VqW*88xL!-ED@#jEsz|LPPmvoSIqf*c1kKTC
zqC-Q+LGv6Ykj6SP)lw#&(^p||k@Sa?n}?uL4fdmVm@7c2>+bIEsu<G<w+}Rf;j5qp
z(I!G^9LFla(N^#vRDa{x2=kOoM1<`n3Tys=52k~z2%BvwCSM+321haKO@MO41_#ES
z6_iU-K?xHj^-_AHGw{5?L7dAdOD~4dIMMd4fWvbyq2~tD`g*k{dlKzY&gD`x!I6&x
zjKnn@lqo_JF6lTBHLKR74gIH#DSiVy6bGGx$=zW_bB^VGHSL8<nD+gL!;5b)iZMqt
zrl8h8ZK>#0@w=GY!4R?uh8*X(PpE!fa-&|uh-U@8t_Bn;N`Q393sR-?MP$V!S~9N<
zOcqtR;tZ!W+~c?tQ^;{!DLlt=^@ecXY7Ox#%6CDLH$hYjDJpp^4+3i)m-ISfG}=xz
z48E*Ek!WxWI}2FE5SX|tX!k{Lh<W7phUo;0zG?&k=1W+s00pfD!R2{lNdz+%a^bUT
zb+rleU4^~VTRk2QUY#D*t2fWwfLZ4N*Ykr`<JEp9UY7N0{%~1vPG;TeE_MTvS>qyz
z`rX!5FBu6o{9-vsFl4C+09r{Z^({DVXSXpO<hPP+uLXbCJ;AHZAjxTV&CJhNQ=PPj
z!ydKJkH)tk?TCKXR)Ra30_UQ(3^J1L_3r04zih89)(r_KvYUB30nS9t4S$!hPmIsn
z=^;73^Cvm6)!M=d&XQ?5Y3q)guyq+Yw$Luxi~G^N^K9kLc`)Eaf7Y{G$`W@~!Zv{&
zs|UaT9`KLn+$cO;D{H9>BitHkn+KS)S+Q@>&fU+qzaSI8riE2{v=a#51ZsOHsBZ~2
zPQa1+EDMwVm4HI&;ZcZu8|>Mi*Q(V)rS=PQjMxA>odH}K(A{`6xv6m0`!vQ#sT}DK
zHz>tM9^&QF1ULJ5IHa&k7?clo=9_sI$6b|4_bPSNOAe^r^;x-zqpnKfWW5>0_Y=1?
z4%~eP7}B}*tY8A779jI*BvWu>L?`SHm+~`goxWg`^o83|a3_r#1D!HJvXY-*ORkxn
z+n?8dVZA~o%+HF@8@>rdf@><zRqt+)n<?SAC}gpxvM9opM0Ph=iOEBmn_!H?dGR=)
ztzWjB{?Qm4y1?Wjx^I91JVzM(Y}epQ_!17^!F3EE>A)cP4ocO%DkXSvA~P9F0TlYd
z2+lqz5H~a!0*^uH2KYtIj)5}w;;V^lIUtPu8FmapbhKqOLCC1&vs(<wFOe5oqqm}w
z97i@WTWA-T{O5$`y^5Aj7*ho1O#GP|mN{X$&d;ACgi(>A_8~InlK{{iSC~OXV@b@>
z7~A-k5+vDy^*I3)2rs}ojsi!LY4PzKP@%>MB)a?L|7Y*p*V;OgefPiSDLUTYI1)%Q
zU?(%N46`xdgdGeF;9PdX*0+!rus}$PmSjv4pZ7V>abE1yt*@(>E;b~d8T(6Ykow-$
z)m7D1zruVd>N@LX*W+tA+UIy|jmB@XYdguM2qpcetaqX|op-7{?Vf?6*SB!(I#VfJ
z=4g~gQx0yGW8YK^X4!Dt1!?Job#SYpvm$?>jDBx4{;U^$E&Dm42nrtXUVoHYywE~9
zW0Ycy>QO4-qCgy{eaQ<m#)N~6ql!5UNoUAea*}+jG}RhPuQw?Ohe;k-r!f~y$|<_k
zeov)uGak;Hq&w!EF`Hsy0MWC5OK|@&PAOS<OIRmesIr$W-piKW%L>c4`gxTbGHH6f
zb{-+D)0|JbkK|byPY4H1lhG&tWjK<^bB~WAgw7Q@n<6{rs}J~AO~LA?-L!Xw|A4Ru
z>=u|k{C2fc%&mU43ZFlL65RwDIF|3Q%-{Q!F6e8tN}O}Uy*C;Db%}pLdwi?k;HQOl
z`<R{`R5q#y+<veE|3hB`=xgv4{)a{eSNK1Efuk5e*T2B7)YL_6vOSwEfIg^f8ms7n
z;xQB3h4q4Q`xaDHS^3hxoGPGF0<VCD73lX$uZsS?%tJtk?MRcO3pjIlcQgCY{ZzQ4
ztXHk{>Z`J5)!4Y(y<dwX@AZ5CgYS4Ude`>UlLsq&o5s3J`1j*4;NR87UX+(7oepgD
zY5xW#1xg~n$%EaLHurRIBRxXUu`yPiK!j2LrhKMA>}{auw3T!TKeR<Ww&kQjZM{iA
z$;Q?4jL+ZK{<cWt76)4Q%S9l}o`6YwU;MK7M(HnMhDw>~2gOh(FvV6EONx;UVTwH|
zE0)TTxY*OOV#3Fz*cWBRZj?KPFH6TJgX3a<Eh~n{kX!SuirVE@O@=XCq%3L&#6$Jw
zvY`drC92?@D)o9@*^IU7mvMiTA?ycP?(y51{&0)_3^pminq8T9^y5oprkfd+(>6+{
z+ysdoc8+E9%>f7{AYIq#rxUD(-}O$kOiAnuWi26jN_4@EX6pGGwwiIPERT>hlAX}f
z*H@ZlD3L-GWIoOCek1WuQm5o2rv*;%!IF*l*@fw9VoxBTU4;Fxxw`j;n^dHulmc3c
z*aLMbsX4ls;YSOs;YbU!HUVVvCKWjj)>)>sTwZniik)V319qKw39*Sa#ceewth8xF
z#5U2!oR>4La|W+w2xDABHMr-T8ehdNISkGo=QAG#eHeT?E07sk1z>AiDAqu+vjoKA
zrcqP}Oxn04gXj1I&^SKK4jhGbGXtr4Z7nN4)jI8RW^(<-=`c;Mn(q6k0cJG2(bTF$
z2UUp`^u5yURecr{Y^oNl^*A(v0ClfMFD!90;3hiA7L8aBBE!;>0bUqLGf@g9wJ0Mj
zVo;DtUk2g$`m}q?YO`tchFe(<Q8jq3>vE@~qUp@nn<ke4E-Ffs_M!rzH1wSa7lY4g
z*-63M`!w$lPq80_w$ZKh?C_oCT!}8HJI<+Vsm(rc(;8d_7-R257cEiNV6BL4b;wo%
ziYBu=`HWRC(=YQp(Puja4R|~<WA1qj)fv3*%(VN?KBA?D)5i$RT^XhiS{84AU^RLt
z6WDLq!UPJt(Ra?Yi@9_#*7Se^Gq8;6q-L}U0-9)3-aATjlvh=!@7f@Qs~u4>UTzq_
zR#6j2ZIulu=V<mq#^n-y0EUSif<C6b;?Ga2Y=_Y0D4Wzf%~^;tu6zoT&raM6(rHgy
z2d-QyvPOa{HWIi*Bx5EG2x_M+GBi~QC!#{XZ(secD}ri+oxX`zl8XPS@1-1uV8GFH
z(*|th5;k(xoJ0}WhqOXZ)@e6GP!LX3=RDE#UVfbnPvLq)?Vt7@)cN|nw-apx&w5=g
z;|{WpA-lT1#&VNyM#AlP9>9Cv1Nd*Q1CYMIE`bgl8yZaDlBpl12%*Md$TCAWA!-NP
z%Y2&{8hOxaz{XRw3ea0ipI!N##uQiexZb6)9lF)Yly_ro;+IvoozN)@*W(w_Y<XD*
zuq?2UO;x4Q;p-lHwBU&YIvw8H*8JtV^u~sxU0OP+nzh4G;scMK+hyreWk3CLqL;qu
zo)R*u%(PUp+P|Pbnn$JS4I8!D86h?l;Sf(mW`6>!fv4{B1R25A6J!E7LN+4GESsgv
z%Np}9&GLf-O+cYWqd~-qMt0o2Gw0TSS2#VJT8*U%Y0{hb^mMMQB*DiQ*jDK=$NlSq
zICh2l7~FNc#@Z|A<Vu!daD@S<axm9u^ZyQIo-tABwt&M!BW%FV&NOGE@}}J@3`K?J
ztVl^s<36mQ+QVHe@du$#DUc+TTIWK=1CEA|@|nsj2D>REOM|gqxl}T^^i0S>b?w!{
zb;*jG<C_PP%jJpFOt&X`!xd_OG*r7p@Y^|N2|o+k^T+beh*l!ygtOE%I@2W6!X#m5
zQ-{FKh|@@m6GogCM@$>KDjT|r8`?$ncs0t8Wg?f}X+yTtdz0}MmjR!CPUNPszMGCj
z*`XI8VAMH!O*oT;Q$V*{7L`Xuqu7Q0fl_FIax_7w6)hUq3k?z1IY;4y%K+N}5gbj>
z86P936`Uc?-+2WHAA2(Y?76~777<1=5=KOu_ZCiT_)?`Pj%20cNXW6NCuz@p0x57s
zk4pVESVq?R+r(Dh*%O}P@FS<2h}OH0^y{XhrU(M3x0W|tZ#k~(nY%S2`G(%QMmG73
zX_77i*xhD1^FJ{THkw28dJt#sT$Lz}uuSZ3V2un{z)XJMz4|#bf*|o{;ijcmk)97d
z2LhQ9Ij%GsFlxM)q$y*zLjGL0{MoSZQcVWBxH3!F3zyHm;9jE9I!`h3@a!xdA|~k^
z)(hR@%;ps~{>v-Awz4?p6?9%n5ra3e;tyFkuJn=)(6~ptJ3#xM=CCL{tvq8J{O`Nz
z&%0JMH}{lqwkt#F$kkHfv!X5+_!>5S1yrpsPB_M%^#b`(#@r}sk#dA~s7g_u#G)LX
z^@dxZC9D&Bbhx%_*}<`~LcWAgG*B3qgVsQ=z;+gs8B-$;yKIRL;5v64_qeAL^^j!~
zuWk>SqJpI^vLMsbXYS)T9)Vtt0cEKp;ZKq5g=5H3O>r4+XlT&d3h>NPc8%@PwF760
zNupdLFA95Ge5}WV4qhVlIseQ{uB*>9ACV5r$u&+>1TKtDAJ#15URQ$^Lp%BtlJZuF
ze3+K&${=YDdi14Bd>5>(bga@cSP?oLtZLHx0OI?s;4h(BKeRUP@#T?rrqKnyWI`uQ
ztwhdEO{Vlkghtt7iC?d~9z1@#Xz5&9qgK4DBp+X6%5w4p2^go>1xpz{7BW)fYH*dN
z1G*8t1(UlOJ4~B}jkbYhWZ#bpM()WAuK#deC6_i{*yrcyEr6$ym%pLX`ubHk&iLhR
zcCX~p_Q&|HAXaOO<p6mb(dkMvFuQPp-?uYoI|omwjv?fyn#$#wAYi9(3aDY!z6Vc*
zj8W<vj(fmAI_E4#Z$zp2GD^)L8eg(#d^tfhzT{=;4~v3jhb807xV?$l1iBMKsoyU>
zN_I%>vRUS}1pUznw65UM4bj|XnS!NhkLV7c1Vh4eWW<grp=C{x+ZDOQV#ta%TcJfN
zveHG<b40MrB7&uNA-l>)y<GYD9r*)%|9kek-&Co}f|a1ok|;!8Ep5ijngy+8x#`@F
zWfuAg*7=D#sQW~wJq<#a22%#pE(xEM`^S=5NMG6j^W?;1grvN%k%-2NFA=>?So?{t
zy%MR7_jcNC=&BS95H_%K60;zP3j-NRxD|OG>J?il=PY8W>awR;(z$GS3Uzo={z5On
zB{my`v_Q30vuIy!wa*)*fL)w4WBMl5Jsl(Vkk_-+aUV?V(=oK!^o^W$2+*c37Aj9_
z`X4Q-5Fz4I@I300S&-kI+gCdr#a(VHq8W6qs?GToizTTYs29ceTVL!u8V{gx+-(Hd
zp4UV^lh`+14`Bwe9$X#nK8P9;VG0+H;bG!FdgK)$@ysR9g#zi7yn8|8Zo4`}BhZs@
zeeiLctRVP=;8{|d)VF-ZK4LNBl(C7qtliV#?8lz^w6qc^P~Bc#Yfm>n(T#p0N|<ed
zyxaUEryn(p@9Cu;e^uX>=u4P!Tn1Y6ORevHB$f(Wyz%1f_6WIzY#H(q%&giB?B(XI
zgqaw$0+saCE_rHJH7peYNd}jBe<n&G4{5pST>U&5biceT6*V!m^{VvNm)hiF3EB!!
zHs(t88Y>8=m;S|{Y2lPF{Y$>|3&JxWp(T;Sn&Ees!ToFQ;LDJ<t-w||YE}v)3b;H8
z6=QT`h40CrSHjHAbwKoWV&XmLmEtvd7UjN6j-q9^R#8tzlO{2Pfc`FKI%59rJdM03
z-(<FlE}i9ivwNdh>3Ono$CU*q*>+-n5F)WfMaJ-@ERX>`4;eU-9c|f(N!Ec?(UZSA
zlO#E~-mqDaHmUJy^NyV^9LI}Hol@*VFK(o^f=JyM5DKms$~py?O^FL=$y7ORwMb%t
zr@UPxN9^qw&lP@)DVX=C8~WdQ&vfOF@${b0uP(+?;cFvEf!2mw2fD5X8j|;t1Jb9B
z($)szfXuaZxzxGo%6C0scq`wp>1u-KiDx*Ds_~O-?t#k=8Q90YF$N##fIYL&?>s-c
z104i{zA5$h`7_CnE}~4*C)Fm=*V~wfj_<y(ru`R~t}iXE8;JnRt#kQ$y;?$RHgLwg
z*RjK(<AlS~Aat=F7Y1Ev@KUU0!6pe`*XNq9)ZnUeYy-wDFrt`jryZxZzNX&EoKv22
zDsxWD=j<Y%l}t80DOgRy$&PznL?lV$6GjKef!GwocqUqm)Lo49q@aDJlNUm{b;+FP
zxtAH=<C=<i3p)#90`&-;p-6SVIkU-5BsxnKR3Hq@3lyM?f;Bk&jzjIDuHc{+Rhtg7
zgkJ_t%IWY;GRoehhZ0#(rBEk=qx~?PS}(HqKR*5WxBK%R_#gC5YqQ@?W&{G6i2t$j
z*OjM#T@CSnzg%7Ug#Yme;eY%I5WocWs{w%3zaIebLb&Dx6_U9~CT%)jkMrAW_(!ia
z<yNI}@`_?qX4>uFNalC^G8w}u9h#}P$(%I8cvMn7k4EVkjCH4>c9UU7I2fpxT0|6X
ziC|GcpTU3rtDWU(V|><aq@6KbR<C;*;e>EP)R*1<FzX}@>0L+2;^J{ro%9V1aiBwA
zVKGkOjwFj7t>l0AFM8_htb3Xc;ez}j9roWM#K$J%L2c*P{a%M#+5<IYKe<weS?3(6
zC*jP&FcE6756gq)bS{l8pwMUbgQIi+bc+bEf2$!3%o7;#cLpfsiC1-_eFegH7vG&o
zN^+W^UBiGZqK=$_&@DEL8Pribqg=8stOudMa5Lu!cap2ES4Z`)zF2<x)nA{|Z0YyE
zEkF6<tFOEP?e#}Mu+A`iRGXErzWQ4o=cI=b-y*KM(e1(tLe*R&aCV%&j=$MCLR{YM
z!<~&?g#cuS``_(sZEvZKBZc_Ni0q;^UL5W3zCPaG{ZZ}h8-N#XVf%-J!|kIZsIh;j
zc7W!0wzpud5eeq?(Ka@@ajbsae|@O-zu!}bJ4fF--D8+-mgfQ>eF0KA%!n2X))<cV
z7~@%y{%d?>ZfgRz;~Y|Mi6I<@eW-k0!yho~aK}bIn-vtuXoHZ0`+!+~d9mIk84zsd
zr2n;0*&1gDk%<sX2Eow!>@N&M%uosKA(~o88cQgXHQ~VtmjY?5KF$RrqIb)u6_NZH
zs9Qp>8OSA9oBO*UsBv5UAyqJZLyrYRr>R&&c@*u;!=vUBMne)k*XbfVbK6pC5?T_7
zus*H~52t`BV?K^!$ADnI&E6=q2A%YDe11--d%41Be@fqc_6#L--JP_!x3&G^_1C7i
z;8T%*7Zrbi_2HMbJNsiBRNwacr$hmSuad(F7D#kTd7i6z8@$62VvE|HlQ17(sJZ${
zohSrif@?<Y_do}dkCz*bl@q!>+T$T=VDJ?h5E``l{yKlVxBub@_$zf`A-5T4(OC)I
z1%AR^d^u|uN5>n-*oC4#^cYXfQqynGB-9J>1h4c)grkVHzO}sv3en;I_iEW~f5mHm
z#k4N~4)4(g{GwKPC%Dm7XjFm>pggFNc=zx{*@jzWRA@We^vlDIuc7VNd&g?kZT$(f
z&PSa-8RyTGLIi5H|80#TNN$p0kCO+W&;#7ChVVO#Tv@UUuaDFdw~wc|i_fG=);sIh
zl>C5v@e-(^YcMa+;-JDtclKWHuc)W7k5|<fv5%jqFKGnaXPXQ-j0eI3hco-)9QMBz
zhE`dw7Qbt~czxXZ@7J$h?803}rB({12)whk(Sj9gN$3!jtd|uX6pOBw6zu`+w#YRR
zU4N+Pld__)ra{rCWkq|xx+N?1FUm%`+!By-o_zhw(%Fyt{Z@XB_Z}7eYe_*IB%4;9
z5mf4bD=CiCCm_uh5PqwF);dL5D#t2cl~j>}a*B8(mP^X&%SOOosoAff;d{Few&v#j
zf82ebS7?84`v|xPNNwA%w)c)J!zwHb{EhUyz;@v~`r~!0H|EvB@sE6Xu(Lz*lr|ta
z%(mQPys^9U-L`jlx3)L7lx<<NtR|Lg9USiLZSEX^oEsil`g(7F&n-d+6&rn(v=O*p
zV~-a{D84)N1^QP3SM{y|xLjI0M=k6G7nuIYXRkI6zun%VuT%x*|I_fKkzc^Q*ja6`
zQ*h9TQ=x(fh98ZU`5AaPtx};PeX6_1u2k3;T9Jdguuo>>Z5eq57Fqq&hPs5;^t_}F
z_a~KtIukpxdqA~=s(7xvLbi=>cXoI2BG;4rO+{zP4JgFT^J<zF@uud1|E7Y@Lpmxn
z;?a<9sH7R6-ZYNiy|5xS-eI^aXs>wbU#IPhq?hH_>Q84ypk=^M`s|w%EZI(wb;c6)
zkj0&0-X%DYg&n+f2!+WM5L_|wv*SbpoO}Z0RQ8<XBm_E@5DtI@8KM(<62OXQCKrMG
zOl`Z7M?`j$u#zw)#|awenv~YXPqsttK*9D^58qjueoICV)dH1?Ue@-A%y>W3+jh5x
zqZZ?mjGWLwbSc}ZytMG(<rj=t<e%tmBu5vmdOO%3>cV#FcV?IAo#HdNqM8$?p109H
z)zl5~uk>^Y`tvm?++=XONRvTL_G7}7_>G4eqd<EL>qbhDv(>h#;(%8t4j7+$$isve
z?m|*bC9d0vH&nJo=yG=5lGvlZ(~r{!{z$lKZo`}yWLh&oE^qW6!;e{ioZAE&ZdJEw
zYSxa_?Vxf>fDof_UfwXO{(?T4xS)dCMyOFt{`gaIqo&Ul*GaddrnO`dd%UqerLiK`
zx|)p#F`sP#Gn<<S(QtYi_xff=?lJ&^Zf66BDHPh6ywnp~e1u7h>d_ud;~sVC`O%3@
z*agBypl3*A*$Bn_9JzF^D%E+I16|%t39TQB>kc991py#zgd3t5#s?aEER2>`y(QG&
zG^hghI8bH+nhN}KUO;fD#`px2%Dvy>F!$@UhvrSue8{%*XWrsC=>@hgy2BHy-4Ney
zdT>XB6A->6IswKmRtlim^9+H)PZ4a2Ot!2g9BzYx0@?{0r7NJ$)>IFP2>*eqRoH8g
z&l|9}Be4YJ=-rx;g%w&#hUa7UN|(St=mbiyE<#h<B?j-(U&a~wgdlC{#6(uqN2ptV
zq1W?Z49$iSJ78!$oF)Cy5I=%us-_phE)DuY(GdF~0SMP7uc1rZ0IBRv+P$qIZVX}!
z4;_2ys1HqXH)v`{VEj%dFA+2hJ24t7dTn7ctZTL_ovDWwcJLuWK;j5`gjPsTM9#9<
z%_IeA+>;o;v$KqzJZ>B8#Xt`wv=#b3>ZK`GyTP;~f{95sXQ{CLcF=_;Mug_sMmI{J
z1$#bWMo<}FaT;p%2|eMg)`TRKMZ8}{IAWC`^e41=-fpY<AnhdR;%Y2h*BHxsj`*YT
zq>m0C7iexf486S-`;qMPg2ClBUsY7aQo%}9vC$QlyJCgY=eD=1dTa`qPphiRy1<+R
zHB{$~6v;0((26F_V4U^>n>tQG9)GM3hW$|=$Cyj9MSkLFCtZF`kSq@Q)GL%w$?fRF
z+aj1(O$A$0Q_<107I;!q(LJDh7r(WQZ3Mp$g5Sx+_>*g#K2em1Zl?JWNG(kb#akrL
zdz#qh)QOR}h95DIDx1E4at-@l@x?)l#<yDd5PA+jyiVRC1aGyuFz>dbuc?9Uon>*H
z33kze;hq;O=-->T%-W+K^uU~X88~4XPQGQ}vnz!e&pHRB-1)lZ=$jVldUEXUaKX?>
zMUrs%p~wlq`Q|1Uk<3~Sw$`6^pcZ+vXP$*Hare5x5Ggj~6=yMN(7bjcDDL&02h_VU
z6r1;^7q7owt`Jyxj$OvSUzzg#>Xh%FO!@w)^L<!g<LGGn@VJt`CB$`-(<>Du?J5SD
zFX%klKc5b>^h_O!yf_NvR?@Et*$pLEoj+TbE-#5962kbN-dgF*t!d;^0b(zaO|c4!
z(qo+t*~x`Op}QhwaYCP@D^+}`cP<4@<m5}MyUH;LpwI8@=)8pQepoptB=<ImnP=lJ
zO$31rP|kx>m6OJd>5m2!Hf&ZmCq2(JjR@8Sp<PC|U{LN+()1GIZn)netw-!<ei^*A
zZ5c;1<zgc5%&}2r3BhNO4c%TUE~`ORWg07D$aqX(imFYOo?tO8iil_SSCT59Yor7>
zqsD?0qohAyg;n4NmuAvXmYgqTB#+e>DC2R~6Zg=NzMY;W<L*e&e!!hE%vCAHx`e6;
zfmzuz-d%DFvou+eC;0Gz{sijDnyE#dQ-Jg$#&GJO?V`_6cOpJClo*u5POyb&Kky6p
zL(4L$6Kq2F!(K$>#ol4SZb_UaPxxY0u)|tVw4wzc1Tuk#UIRh$#bqjm837gpO_3e$
z$W0e!z9oBPlPti95E%ca&!QeSfNfKvhvmi>4a#fF-WWAt4UR_$dqX(B5yKQ~qj{vJ
z&<c1qrrhMBn4u;}4jsJWRIfqlC-Xk~U!}JcqKZwNqFEACw4!rJs{J{$p(e3h`?&c%
zVCFEjs0kS=wsiR#vNFR3$-|0%LwzLI#s=NH)>Fw20d#SW4q<sdzUDj|oH<SMtl@l0
z`y?l>9Bp2bmOw35DLJXJe{oU_Y=D=M4I$S4fgD%s#~!!j;^w5`Dxuvg7c-dQUV&6W
z@?2!Bm2zpHnUM$)yL}82)p-Nn=Ly?@<YojJ-CPh(_Zf9<k+p?sHJ!*$nKXu&T6Av{
zv&6|gF~~!4jFIPz6ySCX(K7Ud(s381JZ23~g4h|e^B$5fF)29vL-ob-U&IfH#!|{a
zB8})fs@M)QSmkh7U|JNHn3I#T$2!E4;=HpY(-YZPu3Oq{<sY0|RpnzsNd(G2sBHYB
zy0-fo3K6@6-#*{?$J*}h=bZb6Vu5rX8p`F$wk>s*VwRY*Za-m|V>|%FEh!N*u?J%G
zX9;FI&3@Gr8u!AKv>SLTHa_1d@H%rw?pku-VysBI9&=*0bj#@6<wdJtjdDa`?Ty{E
z=hPYKbr+_POD0@hoQAiwGWX%vrP=#QGnCK=k2cA1oSH%A5tc&O_uTAzDCCZUwBf5u
zvzi_vCYSr-RgV2h8GMp%zdw*?o8VmrMOh1D;>xsP$wO3rag%jyza}rOb<zXLC_P8A
z2<n0m6R)dv(g}6xdb7M$vj}5|w!yD5<hYu-h{J5^5IX&&gW2oN958Pll7x7ba%*v2
zdJzX++8WJ@T7zm{YoYPQEQmeQXn@p-kl;s#z_31uK_h6-nza#PC}9iwWvJLj@y#g5
zSU=4Al@`1Q#B!w91b*dRf^~Ict>w7LUEcC9AU-$zZmI=zSJu!ZZqLmo!9m%?U{;fx
z|BlCtozAJfdOf27=oJs!BK9+%Lg7n17=G?OtX5Ui(_tvVMpZSZT>9)D99;V11{z%Y
z@~P+1O2|mYJ+xUUMJ}owYFzT|s`IU0hz&tY*wBw5+5^;yFr6}*n5$`X^0IEsxuklu
zgR0Ig_<^Gx%rn}-`;C^b`R5vDoqyF??A8sU_8g=39dDbFiiQZdw<#0rksPAi`%4Gr
zCU2HAPZ6aQXNvgihfUFQ7@jgk>}}c<NsKB?5tZ}lQ|vKPf%-}Kds9yvKDU#GUp{Qg
zuH*CM8RH#3Wxj^1Ym-1>^1WYY+}#1`(>ibMJg3WgXiuCEcQEbrVHrGt)SgU&YR)hX
zxjuHzjH*`i%tCzY%zb?XdSOI)Mesc5>eC~Y{(^h4IN*(L7bHc2@pN1c<>j5DF|~)n
z<N+IzY^`Wu!;X!(w^p>aQI<0l%htpVVM$Kil5KX$5f~lLfVI|1uCBTnV<??%uRlJ&
zuqYpT9w1eT*dgI;qVkDL%#7EN_4%KCmPpzxDM2?S*<Ys+=N`5~1>xxbrBf5n5Hd;8
z$7|-;LCG2|M|B#}QpNgO=@KIytd+fYEmqZ|4fPYYaiaGZOP2`CwzRXSCu{;S-aI<3
zMbX_vAy9<x1UvQcQ26$<XBML(d_Q-+W@<QodQqyO{t1d0i*6oKdC6kQRqjpF%?v(M
z>F&Lf8eDv~n_Y7hYwaQLPv9$8h-GJjyYk0cl%Mo!rBr-1%#-`%wP236@ZiBC{owsb
zzx6)S58L`w%E0#xq6od6P8sCH-83r2T5%S^<G4Q!*0zJmz$_jR0*g$*JW&(pDW68%
z_iv7A6viGtYul$-7-u*Telc^67bduC-u3kPkZ0;+ajtIkz>?Bp(_<c8H>zi(w2j82
z3Pw3xm~dPW)UzR{L^)aNF%cjx%RR;g?CtN>UmtBB);BkHcQ;<_ZmYxn*T*}1+egpT
zpB@)LZ=i@*fZ%TQk~ypwoQgm^yzNs2H->*x{%v`BA*cmx=-yT@V6Y!mfET-CCRb_h
z+KNdC@Jy9#s-l}`m?k4Bj;F#YOUXOg-4U1!KH&t{RWg!Te10mksm}usM>;WhsmW`Q
z9U500_v$(0E0B$n{rx**bQDCX%`v#v7L@<WNRX#+t@IVPAogg&H;&dfz#D9$iZ2cf
zQor2OjitjW3RfbHW{B_0_)v6%_!$J^;1lf({)f|?;27{woEES{7!d~aVm5gip&0<P
zzly%gkuDDGW9^5KmhVR`SCA^0zxdU}SQS;#PF$>G9!Z>PlU;SV1#3TtUYAV5iOGIE
zVO27{7YDnc3)9;BCV$6tPqJRbC*G-SjQ;%SXC5AE(+5!$(m_U-C?6f-E%zjbS!3mU
zFpzr|%eGi>C4R*?&PR;oLoppoQCY-dxyAM%(xkQz7Ynx#^Q@_hJE`sjYlCA^+@LF`
z`fw1AsjQ>nj2*?oHj;eQGSheCZlrKj^cCW$*^tu0yU~O#B137Llo>RYvMEWdI{2Gw
zC*{Dze%FR34)}Mip_%XHnLrod4Bob|VCI2b6INlb6+)<cI~pbuQ-+=hbPy0*r=AQc
zSt7b`pN`Mf%JS;+vWFkYl<uv7HV(0=9&fMQ(Q0SZYhxkm&1eHg!?o72QWdwbY6cOt
zOa{b8|6<-^;3rr!(r>kh>@R86v>4H-P^Stvm*Z<1HntG+L^q?QRvR35rw!-D5>FZw
zNVevOamUL(ZMY#_kt*-x2n^g89}xGt{oXm|)uAAQnu#fTh;F41Nf{WQ=IJj89%h9>
z??4?PNS8TE8tT`+sCehgW$fXBD(><Vc~=OoFDq%Fds$CJ8K-IIfDXxifNiijTP(}?
zn|nYxgt>p?96-G-_NySiS>7Vv&C3i7vyvwG4n<&SG-k+I*PKDPulpVF1j7m>*>Ril
zt5Q)P5R|(C9=^1stH{IG6{^rfUsyxuEey#*2OS40|BRPWoaFGw^)(&zpwYS?%)|%E
zuqT>f{eTTTDB%G7O7NhBa`Y?0gHnEg-~Xkf*#e9S$&1HCY=LAhZEb%LWdeo`I19CA
z_ZSwejDToz&I($&2d-=u)2);~Ob00?Q>(x)=t)*Z2QLmR#jV2HASeqMOhW>N2q0O*
z&QJtt&v+*EIFyMX6dp`{vhz?|K5)A#vHhwlB2jOzR?7EEugzn_3~<A6zi7LndsDIz
zpM@I{G6NdegrqbND3Dq&x7jJloZNHNubSB^n><20JmNRu4OFiesf<qV@XDGqbMzC!
zJNXpT!RefEg+Ta;yo~qF!8jKNWuM{RIhP3ARV|~RC@xJCb%pFBI<w@s?PSBWjmTBW
z*+{Hr?z}iqmFl{>Hhstb5}wrMJcgNX<=RFT?AIuk3c0^?$*jzN$>{OuPwFfBvnB5{
zPp%0!&l#}O=p2`p-bu1Z*iCqEr3KGe2_q@tB)%P-6jQ^)YU(B#wK)@)IaBU!WwR>G
zaP`_NvFxUubk_==e92`?JixzcAI!jS9zZRHdT%ANvsFg)7k)bR<US~7uy|)3)(cCF
zSAkvT6m6rQYY*KEi2<)@mUag0ARSgevh_CWMoV6{a>Ye)=$`T<X*P;;X4gG@VRgh;
zHI?q)PtXkZak=!Ai(}GSQAiC>@=;2bs+-g}TMyGZ8yshRTDXIgTrO;-ZT%L9x0PcK
ztZB|IG^yXR7F@HMzHo43=f7}_NsU$93D-?B!ZQgB-$CQRkRLiI=x<M6xrsVY25{q!
zZt%X9JBeErq*E8YLElkq*aXq;2LG~pjCeax_z&8B9r3`npaq5Tps!8u2&GW;e{)QF
z-tDJJj2#!&g72H*>0X)Tbn~OQ6A#6t$sn^N#bS$;aI#~y#H^7vtW?6T=yCfoEkpVi
zF+(JjKTmx(!?=A#f8yS(?q`9;KwDVW3Qc*czIiosPSxC(rXj`I_=cm*X-*p4`Ici2
zHAjV_OL2bTFxDY5)?tjX!g4GQk*x|dwPIiyGu^`1HD%tU`HHC!=%v^gss^cY;<7}Z
z<?i=oNk3M9WYB^1vh;0+!8H6T4{PZ=+0xrf{|1dNNw+=j(z8<1)1e%K>vV=;1u9KI
zlF~Ye38KmcRM~;FNnN;~{KKAC3NiSfUP|u#rNn(y6M`^zTV$dpT6|((ZnQw)_FQA&
zGQDku4jlA?Ii0;#9PVI#zjGCZNKlhKzW;!SoSC0-YRY}exT}X8?e^}%i<$Qnj&g~;
z-+bWnPcOY`wFed)>}JX+?Ms1K02?{si4kE<jcTV428UQ03{m>_;|y+~LS!3dK<cKI
z_QJ?r?G{UcT^5NjYztSh*cPdNbF_7x^;&jSF&qXR6VrV1qRtp%;YQB99kt9eCscVY
zh*g;Mc+__<qy{;fm{YoqFJ0|9{9wyYX}AMxxQ#F@%G;aSSggu&wPwG9tIhdpYB;s;
zxgBoQU3KY2R{Q?&rKVLH3k~BL+E&HqGgy>bIAm#6@3+=3<WRXD?s7z|Mc!`Vw<O<t
z=j;8p5ZXH!vkDT3Nh8CkSi;FbC`~}f1O`vOayF8A_q8=CO3<VL#ls014EybrU&qc8
ziZdScIqgKlq|pP_Nl&~^>J$6a!w>bk)sT6$G+3=Co1@}QU$1Eh0!PuZHfDE*;<`yF
z`Q%mz$|J?b2ilRfIB>BwRj)fPcKD|V&`Wtx`~+~NRMZ<g!X%C<?L6~h7K~3HEB8JP
zxVZm!Km&ermIi7iL@?(T>QsG*F+$*nfdqp4=zuNbG6q&L$^Pdo?o5QSUO0v2RN}6|
z9a0I251{`NqKY<jXk(da&Y1l?Q09p7uRSoTd6}Xbw*bbhe>@ED+2k%rdy7_*(EILy
z$SFS9N&|BeIa>|1&A_!7xOPS+B_EmFm=e9P0EY4r>cUnZJ)+vr*OU`yQKUe7TmJsL
zD&<*Bb)gFDn8uU=6jz8SuJ;Y8&NvI}Z8WcLZU<>PP$&jm@e9yfN45nTO)|nJbkz*^
z%JS}9<FjoX&*vu%>Zm{$uKRO97uu|;wRTsunvFIr^V=A?4!YsdL}Ogi2DupYcFrpX
z8<C!MQ;Cfb$yMKg2w#bi&kt3_NU~MeSU@%f=zxp_E<uyIu3;KF>R+e2DP0}ysM_(A
z!c7c<Y)xJCZ_+pEkcXJF5mC1#8i09J?WCNktzj1d^Z8ODWsbMyRlQC|-Q?7oL|lW&
zd$N9q?SB~85B-29b52pHh;IfiBITTPnwtpB={y~|zI8?CkwQXVKZ!U!c-agLWZ0}a
z+&Y-L<;;>FJZX;#PrLX>G)NeNZ-zu=v_mBGc^)Pfcf8YRZI{-{pH=F{LyMPToD4z6
zI|srfcp(b*Cp3|>uDdpR(>%$XKCJm{^PNTX#^j0UL+z`vvQgGT)G{teE?j9vHohJq
z4VOi`4a697f)v!ftz-T)_cl|NM6)l)dw>q#Lpu8pJ%c?mJbno24uKAC+4+DOpOe+y
z!=ppCxrd!|zX6wQJMMOOC$Yva4SeGBmtQ~pIVx@Bj^uCGDSvZwQjzfTRN=W?^DG@D
z)7C^LO86WXQjM0FO})I|69kt93|(TjR}fviE2T#EVu#b)*o0i*vl2o|hEkL*z>|s}
zGYr;41YhhuL`o(8kJmFk|L6;#%6pMr6K*k_X}Es;XfNH`Z91wp=*85(P+WnWVY3oF
zAR4tUmyAqPFeJ=}-AmnZH%dK+T^sqF5j@gh+dMuyISlgPDx$l`&=UUQig;|xZJ)&5
zJ(Xj=admN%1{@vz@Se?6M-4_EXKj=80!Ly4^X9W*zc+FP1bXn3hXU<zHUe3o2ZA9c
z>^3^qC`;?7z$mVa@9;eLElCK-?Te9ezI`aBe@Ql@@GAC@F7f>Es{ExRrwx991vEeR
z*ze377vj~m_=&CArFU&Umi<tSbjKbY?ggf@wTlm(v%Y!QTPV&rOqT(RvV<{_li6Ys
zZgzKk(J#hCO@&XeL6@@qMMOD|EQF4u8&U%;VKNf9J6R%%M|Ymam9;vqDX-5sFP`L!
z9hMD!v8{d68NKlY(D&y};^2izF630@__8LHgx6Cx%DrpY#5s6od4f5;DTq=W*)D_m
zELdABsnKk7xxxap3j&<Cd|bf2X(gO1(_{`m5TqHIowb!7Xw6a2T*l?fSI)7bf2@y)
zJVzG-Tb<WGqYHXrAE+=ors?7_Gfz>PRk844-nq0@(|l}|OJfPY*GFtWR{0}*#J0iC
zMfNRsh;zM1Jah2bIFtb}KQPA4qX6(aC#*dUx#c4DNt^N&ncY3eK6RarF8ZCj3M2IQ
z!UPez>U9ZX)xSy@VF*eW0I6-%0w@m5Z=Vd$wcX+0;dQ-;Y%k>xLl$K)c8gVwt%|F@
zDt*=tAo9)qL+{BZEfRf|chP5u_Xo}w`L9?1sORyk;?r%APb?p$gYfN;ikQd4Lfz6g
z?K$EfXAWTGu?#JKpFfsJ*kbTHZnBO~84=bR7HiEiw3g10<p2T#k@2Va!d^C1+d@e~
zE5|T6Ln9@5wmcR;y(Ltkon&_WqnSl~^EuK@=dzVGWb!9OA4#V}UW=vs<~v2q9T7WA
z<w&=)#`n$<GK_4*d*av6_4;iy{>BfK9e4)lr4QULBsE%|owM0?WOD5JN%j~CA9ufp
zHeJq-VdQn6!<uYj%F71m=+R*<lZ4FdcaVf^Lfuhv-qJbIOz;itmmM~J8wJla*<y9w
z67TQOlLe8uOr|wCB!2oebD<&;khV}ttncT=fk(?tkx4ZlnosCapb?z=#GK!1jLw2p
zWI8kX0?ay>p7hd;oh6An4Hqah37SmN;RI@IQEozupWK+uRb{4{9{^w)vq7;oOZGRi
zBuPl&%onfO5NLDnxb<q|2mNp<^rk=6i1@I-&%4<*D4>mtb)KEsvIv5bn_)$S&O?pK
z=IVFiGh(rrmM6Qa?3+{oEIh5bZ?ECon$<ks)33mLbCoC!b)`MxSD6q$zxtdpO5{x(
zDI(>^0=8ngQvFd%?``f4Rr6ovXN_X8A-rya&Me1B9yZbX0AiD^KQr5YqV^h7yxe9Z
zPfXs`OyDq@e+QuIZn{x<zAE&!7T9ARJ#uL4xyfSTSpCe`=1GgK7*>wd<{+n==)`Re
z{b+7K3D?ZAHIX)AO2$n_O~<_J&epXg&1TQ}%{5S!nsg#IEe&np&)K)Anpsq<5um{*
zcxn&yZktygzmgN$k`g<#546B{hRM0W1_&L5QeW9}1_n^cbL^3xq-?wzT{vQZE#+xl
zWu53(sVk==!v&-X_K-NeH5N*9i52GLqDI8Kbde<lkrUrGqgUianso}UGY@ci)YpYv
zUlf?sOWT;?5v1nXtPZ}U9hmZht+dWFSMFmbtE@{wQ|oI6nbVRu7meIRvCL%?a0w|E
z5!*VkHfnu6`S0KyjvNWUx)GQxdLI?I(6SfB_za)(1igppVm~0YLPvv?kqZQC^`}6#
zbOclU^L}@BBnC?RtysBpkc_Y;HJ52ht_4y+QOP%kdXDxIOZIg-JQoeuJ2qCMT@wOu
zPojCQ!zPCGvNe|US%k)0Xv#!dF1xG)xRA#>f`I!sS)SpYpZD?h=KB}8KzH0pJ5KZ-
zuosy{AgQYv4*Qy_#}Bz~-L@%=jnX@lCh5KE2V78ZnKqz#o+s{zTo06pzll@yp8QS<
z&voW%F_W;K%i@P3_Y^xqyRhJ%8;dO39eK0-F)?~(-K?7ZfaMDc+rxlx@sDw_Com@-
zJ#VAD2`3>rKNsAI==D>2@jTFeCf+TAB2e(;inqLkxywPacdd5{9!x_~s)vOCpoEiJ
z&iTN3>n`tBbNe9NzQ7Zf)>L5#Va&aWFXp#>m~Oupu|>^x;L9)dY2J(B*b9-=-!g`y
z@P)C-53Yfd?HWyE-2?<lE}o`NP2rnKf0zuj?(Lt+SkNQnG^NoYuTFaAsa3n0%+7I0
zn5AW7qnq7TBiQ=<#qnv?T5ouPiYDmV@kRu{Dk;q9)+*f!vkL{As=f*9MXRrstHMg_
z5l^u}CP_;kQt^k3HZwfUMzXmTHlxXsShJ5eEQBJTvhQVzL(<jE*;5|el`ePj;@5Qw
zM0SZY&(L~C7d76FBnS8&c>dVq@BJ~c+T8+2v3?g-q~f%_&Intj?YZQ$;k<GRk7DdK
z7N6#jzrMkRI!({Bo(8eC&FHjTh0fSJH{ff=5G+DSJ~l78mXIIvq;NaCV=3ok0CSr1
zES|Jo+U3|sRYI*^TGFyWp)p558s%yC%pZTnv20eQ1jSGwf3uCGZ$+%rsJ6ZdwQ`d(
zCKnxIKV_WTi07+XZMb`ZV;W`GXm-8=*;40Z1ew2o!w-WUWP2ie2P?#eB-@kR|JQ0f
zsN!tUBc?y(6ruXP!CiVE4c}?#G^O{qcG5#jV4Z`dvb+{<GB#%S$zHm)C65$n5603=
za*ad#74IUNf8Rg}CJ{q5i`3Jml$eXRazpF@zp5GkR@)MabFeQpO}J9al4^e#yx~WH
zh*-#XdaVkP#cPql+*%`SBiH?2f7I_~=!kF|;BM9|>DjPYPB)Rk0ZzK!q&WAWcuLqD
zm>$Ksx?FhT=5#YCaPdW*3z5htXnX+bD~8T38nl;~F4NYTQu)@d`^af;vNx0oc7)*q
zrNcPT4L(ygM*>5#gp^v)&5S%BT&DW&@}>FW1LwjdS>A>PxT1UXjwwpkC~u45&l=Ur
zZ`4gkRw;TU8j*FfJQ(PUN`;C;eX`~dom$i$Rls$Lh{#c8*zo|<LC?n2?3$KmbFj1z
zool5Ct_DL!KP#=3LAf@IzK4^6T+3F>jB29Gr7bqq?b2&8wBK8m*iNz3dr-WRb$b!B
zbs35x3q{hy49=8(TX*8zXkP{+sEY>d*0~|x!x^g(;1iLDV<9~eoa2e%k)drRM5r^5
z(`#kV(riTt!+jn&wvksGhu>~*SrZ90XmozZNCLX@gMj34IEHtLmh>Qa`d+h7=ym5M
zp2o#51ClBL8$)xbg}}V!r+gZ1&?=>O=OG4TDvc6)ZoE0gv``pmX;q#)F68)>;R*^*
z0dl8PngtAJ3f}q&pw;iM9x+FR4%dDS%_+z*bei{vr=~}}h(WifDM(;&QEOXiY$-X!
z$XiQ$<s|2@0C`RpC+u;+c(aFi5eHiHgNVwE%SIf+SdelwsB6_&I}1z#M+`>0<k3*U
ze6tX^=Z^;kp(PX<-bW;YrstA}J{mD4Vs*SX`Gq`kY)qto6FZ+yWu;TX3ya~DGjQKB
zn{=?VOfDjLHDZ$22QhGOg)0KfO=egJNv`^luzK!)-5fy8isF0opcwV33!|Pw#Bnz6
z85gk!^vRN27M0%!AGKt(H0g2)13w#$Mo>L?&op1uLP^UNArJzSr(oy%d0sF;Mbbe`
z3k9{5Uko*RIUb@l3^*oS6e&9K^ky;=c|r?CR29Z$B1~Dh3J=6tNzV-xQ%4>!nB)b$
z1Eyyq3f=RKM@3-k2hk8v-hc-MGk$&Q+eWEvPg-#AQRB1YV|T~=10Kxl8}NXh_GeM2
zN-@bPieQ)yGzzC1Wnn!mOZUCFo!l6)IBCjhr<~V4&{sE~oV-r}I`K8rhs*IWT`=d_
zMtRYBAEEPkZNk(HXZXvS$^1zf7m8C;-|&TlSulH|o9T&Q6K)`1NF&sRt75Shr2SO2
zM-dh~(a&dYf%QOHG4hV9uRg@;580qCU}BW6gLD><%UF@AqCJR(qKKQT+aA7R$317E
zu639d5_E=Zl_va<-FeLe-sEx)T6G}V#b@fwlEz9e?~}$__ZN0at%`8z?f?~IVp!F3
zW>F2tvRdhM{D&QC_HQ9r-lEn|MPh_5U-^7pujN$HsagPIb?wKl+1VJI&Do%JCMvqo
z_bG-y1s1`hnOVi$;%~g^xb$0aQQ-<LI_{EhRXgtB7T3ZzuUbLY4c}4dRnJ^9cBST=
zY3vsA#9GhZI-kpKS)WE`fr~;v|00KTJ*Z5X^)=)3TOJ5a5DjDtL?*B6x~XN~-|IJg
z|9pw&YBbds-a2b;&chNQoQWi6nwHA<s#*m?Y@xnH>yYYo&o<)-G)?aNwD{;W<exF!
zr$qveNEmmGV{$9|@6;ygp6$3Oq{6fd!sOCY^+c_?QB<Dbj98VS2!UjRBj`KZ_IYkz
zDBVD5e4D#TKvV;fE9){kxp6VW*K42<XaC%)Zle^Y4g7oDPCKeIrgy4PxUD)p&tgAb
ziUe1=rg;tSED^D9!`K|{E?{1n+`$JIK<>PiqTf1)U)aywswN0AbXiqS*pN-vi(1rV
zaWzOkMJ6XkKDbB=5I*v*MSg7c{#o5e@UrVVRick2z49Z`kqP`V#s|a;vF_q32S_lp
zXntl{s<T_}ouiBCT-A8cp-d+s5Y23ovZwbQxV}%Ph>#Y@fV1d*_l_NL_m3HcCy3HF
zjRFXSQ>p9?YU+!`JbC=?3X~!8AQE|q2|+0C3!K4pTH-r=nC95D#+{qw=QN2peNQH>
zpGQ2I_)IyS$W2QLQWyXafjmi`r1yJD&%q!`9!u<ut@c~@)n5ja2n;NK;w>{)eG9tR
zo#&%ftWa(U8nPj%Af8l9+bSfMiQGSvY8G<V6rb&FW=YqtHcT3UI8jAR2y&Z8^@JXC
z$W$_G(g^LSf8U8CXi%*-h-e)+As=H(@{16RLMQOXS2De(=mV<#vkKc#;jG;zsFJd_
z#t=Jl<L3f5b?J7Fpe=lpb2n9^M(MH9$9M$$4tDP;V&ilPIBGnQRN|c6R*jHWE3Ov}
z{EVBTN2%d-KpeP`S1$}@9Lb0=vX7i*U@q6%<?T(1$j{;l)ME!?3V+13ISLKIf>?D@
zlgL|4q61B_my}J+<E7h>#B)%~DTe^On4x{7I{lOa2fF?K6*Se&uE<FdSdk?8=_WnQ
z;fNN|Qxy47jOpmCJI)f0I2S#-P<7$=1e1ScApnUiEg`kRVq<&?rfbQruhUKjoU(hX
z`lo~|#J*YB9L0MYpERdO_;DY#J%%_pu3_AJp>LB1{sVK1WIgOaagGuHI4)Ec?jA(t
zz2H#Z8<51$Y;IX>NP&UBnnT9a%za~aWEN%s2zS5+ZCDIctlZ2pVQjer^)UgPKn{5`
zL@Y7SCQ=}%#<umKhtG2x(ig@DlK~*cyVQK~!I{JdXHLef%mwTti!7?FJy2`vnh8gH
zj)(4g+Iv=Y?L413-&s2pmZG=}B)P2cGBGJ+R2Fv)H`!kDeSAkd!vwiCo(lq!+0nZ@
z!ii+81Zx^(KOg$dN!urzt%X=^|0tUmm#-Ee;cBwaNPbfm8Qpt9MnR{x3s1_^7;4-{
zgU$e3K%~EMHyNOT052Q?LnLT6VzgFA43`VvZ+Se3N)jxNejbXfhu$GGId>srzU1v`
zd#f$ieF=&8-_XkNR_R|8PbAC0t5B?FHgU}y-39ZCZSEHRUH_~wigp3X(4h+%ftaY+
zigN?vOh!qK^*(FhPQ2ZR20HqI0Q(LTU{6F1<PD1#Y44VDp^STe3%RbAoc1YTD(J!P
zqM_8;_rbAh)%ymbz}U2nbkk>OtX$KOu~(hL-8%?u191Xc)9f%Re}A3!5U@fFUycm)
zUib(OVrd+(bl~6?yQ4fYu!8UIRcbFj9(-g{`xXr__hFDg-a<}GV0D-fH_c7j$slu^
zizM4X5DoH>ORj;*8FADUM&F1Has?616Xgo~+mf;tY0Q_n;Kobja9;QUyO(Dj!91bE
zX5F<H;gBuHa>~`9sW^JH6}5}Ak^bH1zcZ7xDu*+ODnsxhmus_LUtQpDWMJ!H_ql{r
z<j*AAsk{%8K!s+g8VmpCV}T6jtQux62$@SQxYa`iz%jbXz%OWX^m8fJJF#!1=cHxD
z8gb`-K`YWn=Zc#Q;huw4c3w|mt!a$)iWwz6d}g!u@)D&vU82o#3r?w894Zx>-`hWi
zq7OAjsS*7jYS61j5oVPd&cq8HG1%qZ*H8q@+ro34U<=Vf4W3Yl$!gzoGaX*{R@b#U
z5?$)f<Y)mlK-;=zm}-gyd3cuIJmgY%C1yE7_E=4+R8Jzs&ee(vp)(c9o>~oqXqdj)
z?gkx0OPAwZlxh8nt6NPbMus8neP~X{0wcY9l}o9+(VsjxM>qV&YH6kTjoLrL1DIyk
z6+(9Vm|ybrTmX1Op6e)z_*7=$P5dUr-dCn$x>fI6%QJ{=QP8#tFn3l7bJie=Y&Uam
zL?JnYu;>=`Z9do)yE#MG{Ro2JiZiDGaM9i@ro&?W)tTxSYL+H9oZKNtCYWlv!(j?H
zCItpgfw3!7)!zr4&`WMT8|<L4CjCGhqxuN6-=w>VAc6v<sP{+wbMx`AOcVMA=;6<N
zfQ;Wj;4$F8{s-^=@mK%f7UD1d6JGR7GtRiEE6#;#l?z|N5jUk0|D`x2Siv36#TH6O
zQL%%%b~k16)6XRTI=wQJT<qK>n%miJLg4vyXS5iSPC3O*U6CN0xaJ<7&zj*WpB3I9
zA1GUc<}SZR<ff?CP~1mN8mX4P(8jvbE^%u#X<OC2eEF3^XM6#}hnWO6QOhD72{v3?
z8~e{~tl66;cvQ!LR@8L#_w)qd8ao*lDgxx^JgOUgdCHoz^P-CEoGKbi3VRi^7Ex1r
zf2KPdM*Ur*GP|>H+L2oURRi}yfdtN@h_=YM#Vw06#)x~q4b}=0<-l;|r{904#VAR3
z@mYK^<!kL!Yqqz|OY6ri*oXviN*UesJ%bQR4Nkm??Mt)w2PhYmWbaydP%sl4VSwz!
z7AeQkw7rx$ex4iw&Iv-;JnSQsb6^jnfH{SdtEOZiD^tKVsYccibqdO-F;sx|W1Nm*
zjOPh?{pW287gp$qlMrwm7s6e}fx$iY8O5`#((H*`af2yQ3=ia;*_LoSeBzM^HW*l)
z#G8kw!~PYgtRkYyEFH~33Upy19gjX!h0ZN;PjjFzv($<k#bm-|clAgC*UM?CXgKwQ
z8p;f{mdkM*#oqp2G@X_;9k)5$o=_D0cehG)qAfYRd7Qk|0t6bkc02LFakuPp6~D|q
znvzBnHtKBh#uVq1StShODnnp-gFYMb${hOx<klIT>ZACbWw<!A3?vwY?>t}9+x?nu
zBOs0+o9PzeN-?-cXGJ5I;M}qdP9I_HuQ&bS6(i1Xa0-(S2>&UXqEMJfK*1&rUI`+k
z32q*9NmLUABwF&D61*V%j2Y3$9R{sLoCDL*j-7N5gMOar(1nd!DV(Kom6&MTGM23(
zq7NfbLJ#bQL3S{K*eP)mz~?%QIyQ^N04AIb<^v5_R!5A`lug-UA-If1kzuv&D^UwJ
zI9IZk`SY~Qkzs{z|Fix)sa1^TVyl(d;Wo4%17h`|8^cAv+sU1bKgIfsHhT%SfCAR?
z3AMpruopK*3jIO(N)`wC&@o%({qeA!mZt0&bnqFkX;OjaBkdwsQsdI~V2GM(hO^~@
znANgYYM0{1twqbTmRPp;zA)?39xo6|&bY75ncozrC;Fp(Dig`AZ+KsDF((C!hq0kF
z=I3kFMI?Et0MmLNn_Q<Z1YD-&7BivKYFx@{T)L{mB`=4ldnjoUY4WSR6Tq#}P)*D>
z6D5aO7x54x8B0qi63<2(zBcPnR7%<Ck<F}W(C5m49-q2P8B{_tq{joAN$=1g86jXG
zI))^@9^pVT&1Cc<$bgtSDmV;J;uAwHVmTd=^dOoAy)?9!0uTcmn%G(WobRg*V+hi2
z7S4e%WsUwivG$tgdIi;{u+SpmpLG*<bu<}<v>nx?IPCY*&vRPBt^`M)8N`D{8SDPu
z_7P|!uMT#%Uv2Ll%NhkjzE5pi??+-0&cs6y4sLC<^!anXej+|zAUm9s%dEbL@vib#
z!b3eKomkH8^U#GIKd;wA$0EIw4m+plve<ep1}rMTXRyc-E^dV5r^2x*5rVUMx@Dav
z5>QG1G3uGZnmzc|+4!<Y>~r(wfKklLm)k|YT)egle7POt%OT4upNN0FonQ?9AH&0X
z6c2SHPJN_%nBzk08?0?7JsqF7<TRNGCO_$BiR3)TnbZFKleXx-ef*tR);nqN&Q(r|
zBF;7Lzls9?v-f$Z(&!LgRHbCkc(1ZZnLBaQb_7zptYLyoLo)Oh<>iB_1d(m`n;WyJ
zY@v!gi~eMid}%*zsEzJZ-p*u<@D-#XxNn6;yh-m$cnbM^5E7($j6TUL`nbMRj~^?f
z7B+t9*m1S+pJ}g?oh|&wzr>#uBjfA*_m_W7|Mu0>_LuF>-_9OW$oylt2OjH8R<9@n
zZEk*VT+G<VGW_}S=~Ma_{sjN7tgd|V#eb|kS^nb5^3#=-r>p<5yt=yl#nb;#%YVcI
zjPcG<>Vw+}C-+bCM=b!AQ*6^23XAMaPEsEc0I+4%&CV}IH!1$DHsQ86{Ir{$4wK<6
zwt$CKY-CCIJcZ92@G*X~*&o~vscvPns#amUmD>7$|2NNu|L^~*y)oJ$llu3`RdTJK
z!@Zde(?*X!ZQbCf|CRNCw=~X%>(U?%_K2+*n}{;~67$?5s~VqPQm7{TfWUFRHc?=&
z`oDq>lVJUW@o5(r{%+O=nIcth8fvw%+)z852y<|2svPW65IyQmoh%;>vr}|l!zM%}
zkVUJhANym}uvGsPeJ@c_K;Qd|(P;4O@#Aamy3rq=KZZ6RLvN283k(05odL&#W#0LE
zd#|;BxV3$_v-frDn}z@6M0Uk57A!NOy&(<<?QjYg^^iVnG1G5cJao$&=EY1AuH{Ab
zv;GjXf#YxNtR0;8)~kMJ+)cU60B^IOvfc^YG1l6NAq)BK^)(_Nv~4DAQ@%e2jSUOv
zcmz{cqx5e4@A1-x1&nr$sOslLZ8WI4mpeba+J0u8Ct=4g`r|Hp$)Bdi$v5o~YQh3x
z#b{!+cmFGDm+!&_8<ho9Ze65Zd}T^TEkt~8B^81ec4|CA>#+MPPm6Cv`?=br)PR^#
zpfMUIy?oHm(@M38H_G1rF-&5U1`dOUQRk!XtqIM+CqlfP_}O){ftNbg#CKP<;P`J~
z>ByX(J=^K^(&5esot-f{a8%i>sSH~fHPzBmW}e*9w%wqGJWS6jRqV8}+?3l;wrdS0
zv7|QeLL|C*7t$>d^lEE=Z}-OqO;gjUsNKcrW02K#A^ezk7(JrNfTyvzksN+b{Z#4r
zyUxLfKL?r8sihq^ExUfQ(unZ{S8k}VW`ZWLL-e*JYRq!uugb$75j87y1S3q&Q<7I0
z2A_ht&RmC0r*E#DC!(LYbB>CpZu9F4#kE2L7!V74#@ISg5%=T3Mny0+Jm0#7262Ao
z2d{0n22xJbTJ|3sJoIA3g+!gwVNEa<@i0&Ap-27N50!-<ugC2Rb(ZAhZgN&U;>MS&
zew|!G+h!bE;*2blbBd`5ALId3eWe;XxzJfHobw}^TJQ$?*qeKSTS>7anz<}>^dKIA
zWSk7i>--hYD_nnU*?=n;WTJ9rgcHCzaF*>>@p!d141{79-h6!x>dB}v7>;{s<tj}F
ztxlGlSE|)w-*T!XxZZ?hSFqO_FO~ahJC#=0<w)TRTGF}1_k-ZsBu4+kHZEVyPsR+N
zz8>uM*VTm0<{deL&NjHS@3QpfxoJ-~F#{#2iF+(9PT%0oCfxzhonGo$w@j7p8GI>t
z@f1h_pHfDNlueSknt4w*gAIKLN*8N3*#C!e_q~Mp?2H_~$E7V7uCUL_=(b9=xMg7B
zseUm=p3RT{F1@+3#|N<MnfY=!%a4mUdpy-+)OX5PA1^|qi;rjGt@ue}nBt>*Wm)qv
zEn76_rn2Id`r%jg6hTDn!*j|c-0HFIN+j3QR1#9zjY2cn&!^NgVv=JHAJA_=7ONl#
zj?0lSo!V8pX$k^+Xp{-PG3pIg@BWBadF@FfZWX7cm36iReMh7`AKinhfS=NW@!jna
zJ$wy6=~n>t^G!1B;ock@c_daIq!)@`;*Bz!a!vV-X55kw5@pl}$TDQtX~AZv!x734
zjVyNuhYu+aeKJtP^p|l)POrgo6zAXITdWbz8CL5IUttE&F*cczPVV>fuo~Qn-dVkd
zYt#LJ#?GpcspnPb#VVk`XK97rudyni%l;4g^ElPuHq@~Igo+LKEN0d+ZwBZZB0<{Z
z^29L7po7P{rNV2@^(_7>PRkV-T1|nx)zb`;#&9HKfFqQQyCXF)mSq!pKSnpKt6u*!
zS3k*X$hUj@FOE){(M3kfw=H>eym1UA3-W^ukP;Md*Lj8d2sh9<wrWu6Ev-~*YNJ9)
zE=~VH78P#{*&1uQYpN$YE-)d)f*`S}0df)OvRLw91V`bg?4&uZCEQuv)P`*-Bds{r
zk`~6;aANUkY+f%;TV*+UVI|3vsJN>Y+{7B~IsDzgtYsBgl!5t)*0IJLVHb&Iuh)C8
zkG8j3-)$W3Y`oaru3#T#$-Q&U-P@xKs#b=0U=LNeMeg<6t9mbk{(P(jLT?LIm1`5k
zfB~U;z?5C_ku2sIG87o97XGzN=KoTEX0ZPsBuAs{hokWT6WIB`&GrN^+5Z2<lP4=r
zL;L?1tE->v|9`}vKi>NP$@m`{Q4cnbj(2`IdVO%Pe|TJY0SLb>J_9IfP;sqJWsCi3
zen3%BH^q&Jr_B%OEI~gMOX$i<{{Xz3eY(n;%10f%@X#yj?env4KN+o0Xx1yuBFnnI
z9(?uBV>F)fcRI{q?RN0CJ;P;?J*NsF*7Un?2zOyoYh7qxB#kGtNdu`AW7{#|=C?VJ
zb`JOS4{)!&P;autZB?kfQPAXY>p}EOGm0b{rewwtFELPTtoaZ=1!`(oEj3Tn@ffb%
zEa|civ6Ugz=n;)SFVo~&Ru8DD(|*6JcJiaq@aPt0=CcI$YF%b~kmNA(-Pg@&t)PKz
zpK@O1LEYWs52(C_wIh#E3%_Zt&NS`TTUR=pBmLSM4U=q?lS{zFp&-@8YCbm$@!Tz~
z*3D~*)=ZtR9c`=Wm5KGNErE*vnWe%Z(1}#CKhD&XE2p`?(96yARq{7%+LG-ayK7c6
z`}}F`rmx-Bzt`f?fhN-hD<`}t6<Hh9cv>U1xaj=O%PAkA`pX~m_@BkWXed8r9blw%
zv#3Q-yK`Lt?s)kT{(dtsW!_3dr^5?fHUkC=g+)hR?BK7y6H!mx1b<V|F|g9T*Sou*
z`*hx#pRsH6%R5^UyU~?xY5I6=rO5^J$a>WLxx>79$~aq|rk)|r4L;NplDW_vNNap|
z@2jsDZsPLmLLcG!cmB!OKO$sJ*^Fq7k8o$&bMDMAB{^a+Oh>IYlzQ&Vql@bm98J~V
zyy=zRRzIHo6|s1p4K`Ii!X5tgj&^wKy|#F3<~?4JxBni9-y`>32=xWfGtrZ$o{w%}
z5%<XHmfxJFTJ(u*+O1K)b;gafdb+x*1Xg{2U({!nm=zS={;SG=swT%F*Enizcx(XG
zjzmjKne_l#TAFTUb&P?&g*@6;nVt2WJx1;#_toZmFR5NlSlISVONt#6pcC$`+n=m<
ze65kZSL}MnN8j^Wd*AJ-*Y9pSxUI?C0W_~|D&kOoirIx&8-v+pM#Uzh2W#X8w=!>#
z^XTgwz1tmEJXW!-*4-hswu`#6-B|u=3)bvS+R-nPEIGfxF+7vENbS`@N-=)(+|;25
z%V8f8{?d;BW?5QE;cvVIV|tsy-!WSoeO>(Gx|QWSy=;_Gnl38fzLB~`TVzsbWiUhM
z*|H#1+eZRlZO|ip_RLWRT4mb6TpGi?3d8&A3p1+=#m8Xwq|vXyE9=UA|Mhp(uVS2U
zD9HP0z~6Gc6i%hZf$DejjS)UmX5I7{5dzk{0T;fnNyS&m+i7)UMfjK*N^q%${<5@Q
zsRFeFp&^;&`&Sh*;=omP-AaH?ZgZeO|6@3jXg2zlo_>Sxn5gK1X_-C{Ukg8;6uPPu
zUAg{`HzpsQ4RL)URf_uZAzZKKm#s7(@uN0bs+=R@J!;btg^KIy(P_>7jOwdLZ>!|o
zI||$Zj4u!-$C1>@Vhhx1k;OA$=V0UT720-%g%uu82}}LD-^tEyOI}$U$`n23;7Hi5
zQP##RYh6l&C0$#rAIj!OR%ma>$i;*0kA%;ayb+IOgpa*RyIt#|kSB_0;MeqkoeyDO
zbX%}TzH7k-#!>lx6*TYG(a!&D!<l--QcuOF1&MnB`ok}MmmGhg!*x#Ru^_{oZ$k;P
zG|Q#!=)CZ<3k%M)+@W}=60mc6pmd3L-cqBJ23;|Q3iLURPDCh{0dAF&$v?^F^K@!k
z{<e1}ZF?ua?RT*ETV;FSiR?X(T#t2zo$pN9c?sy_3y3{In2CT4XBt7~X!IGk0*hyp
zD_8=P7m33ZpDGaJJEg|}jVJA*8q=#%K5n<uJU<(EjdzgUb{zxxGsr$%4yDt06VO7<
z)HHvfCP=p+Prwzqljq}9dtD1z4`rMQichfW*U=_+dI;^)*&)xU61VM)%M>SJPq=N<
zC)iCpp&j&JKqm&h;uCzBe0hw#9=0e~y}=0LN4v}xYFJ+*%Qv96fMgs|QfmvgCs{tc
z9u;1$CYAt`$Qhl&af2rfMebSmMETC;#;XLmf_R&s_EGHhn1*dJU<i)L!@5qhw5y(`
zh_a?FYnjsOOUf-G?#r|o@Jqi<Z1D6}R>`(2qQk^J714*b^VqJmS!Bmw()L9G`wy+(
zjJ^%z(mUQbJZ>Ft9DR#^DjNqo^mA)-cY9;68LhFsw>71_mEAon&?8su0{I2%ZtlN&
z)%t$_aCfVQ82$WTg=N;o>T+#CP5J}vp}UQC6T$_0W4(sBF&Vp4sN?)z<cGW$Ob`=y
zn7s7OmNeZDU3s+Wd|nVp`AhUp#M?DI>T>*)S12&0xl2lMQ(NAtBbS4Lr8M0Nj|YuP
zROy(W@^^6F*W(^gKZQh*Z~0k9kHoer-=NTdFEv@o2-*w8N`@lVY%vfj{dQwFv%P)M
zzmJuyd0LKn(mE;>Q&E%7>_Nd>cD|O70Gq-S@pU@pe;e5dzAkZg4$PYNwtfc`ut4``
zfzQ%4<?~sVzZW+&wg)jF$$w-rX`E%<Zq7Fku~b@Ki{PTA`luG*-&&50;>805b_+UH
zR&8>$c;@|Nq~LX6yUl{(I*mLXq3Nb#$&HSoKFIp(E&uC<kZrx%-`aWkV+(%Xc(r}J
zeW=;j-ac6?W?@m58!cOQ75Q*wCbnZo_>+uq<M?prht_x7hdVEKHaCuU_V-S_LHfm^
z+>8CA?GukBa&h?zsvQ^W=}_5|Vm<n*b+oyEy!lPB;?5r1h>zVq%g49BzuDT|d9`zV
z;`Ot-yu4g2eE7`~%>P?|Sll*#^`cqa&*EzNTe>jgRkyRZb6jjbddumef5Zjnkl2jW
z<k@XmgoEop9S(`-WM`FNhKGmyhpoNWuU^1msEW+NcMi<i6>FR3E0I6MgGNWeqUVz~
zv6}`&D38<yR+XCMQ18JqN^fW4A_ZT5(NWgepe!nmmY_UI-AYgSFJnZ(c&L{)kS9qL
zpf9X6sKtYM2^BFk-eVD_Nw^EYl~UHibivZ71$=R_s(*||ZSt9Z=#U)At&R={7f!Cp
z4f*-H7}Xrpn9J*3XK;EhXpx$X0>*k6&LHxhcxH1G@&!qL#V=;%0h%Yy$Oks|Hg<nJ
z+J1jlz(cIXo>FQtKl}opz%mqCq3hg@DEy&Z#JnLHHsEX6nj5$ha*UfSsA>EpXyHS7
z$^@+rxC*QZO&xZ%J-CH05q<m-eyPkSCV(|KTi!tR`O+V<`O9HEczjSl<j5cb`1Q<U
zRQ_SLIV)kq#=P9vob`V5O25Ye{d6AQ^E?zZL-$ZnZO@!ja$+VPm{=G3)Wmx5PSoQ?
zb!0q&*#$W6c8kYqePO2O4tnZv4kK0d-Ur|7@gpJxM2un47*oRptkGma#~ESV*UqNE
z=2a@#>9c1qyUBU}=&*l7Pirr-QFU#tOr|^jW(P#v?c;Cuw-AL3!^K-KfmAm4cVEBS
z11aIZ;TMt>Hn)$ea<wS(8&NAli|9IX+T^^wLW?y;7RElD%$s2b!hnEzr#6ioEa;e+
zhfc#5gHSmY$%x~a3@JGxy5%qh5uKRF(f;mt+s#Nt&1xbF2UR@Wez~)^EmM474l<EV
zz12)Arnb#%S9<ievW!qMHAOBmGjNf2vv3k~ZYS^)pf2@yWA1(yVI~WON1u2zO+M8#
z#S!>!@@{Mazch>Ig4%s8K?UR<(Lov&3xbY;`Oshk4U~aNE_(e=(#1ZhLI0K%lzG4R
zh8vEsQ55#c64FC(q~1Bc*HF9WDL|rqDM97PZ3lt;6DNMbqUUHNl!h~B>nef)5xSNb
zbUbCm<N|eKDcFESDR-z8#^(P(<9-nYSac|E#{e2M9DE3g`gdI;Mo!FsF(7=ShM~zi
zzB}>#3)a0xO^n43hCsp%kVl_H!ZnkHsu{}@8mkh%)fNc~3W>c>Ci=v@vKS!>7a$@g
zaROTwsB(m=hVgW7YrG7FF;PyO)706M4hAT34lih=rY`7JFQd^Voi~|KEj1VHq>Tkn
zFw(P*Xq)(Qa*m5cPn+pT{XDSig(f<<aBb8J-haFee5Tr%seLf=!eV|N!Wr~qW7wBx
zlMC16i1Al)bmtG4@!gK##2Kd}=k_j*9x201n8h{6Y2P&OweI*S72Ist-N+H=i=HnZ
z;+hjoOOKDdRM<6S7GIL|?Esp{Mz($w#Q=V;KAY02GS3!L!R;)Bg+N|yxu}15fC&I;
zN^%bSq92-zh2d!CJsS5svvkxY1c4x&GI%UL&!MZ{IC<X}efPmem9L(+Y@lvvd-+cO
zdN6`T7^S|*tz2p${~i4~oZ(acSQffQUz=TEy3>2%d{w>^(br6GAH;<&!(`%knlps-
zajq9$xuX;gzY7p_&6qIWUeN_c*|XC@1FDqoNZ`u@_aE|Rx+75J9Oy*hZtHYK%fmf_
zrJH-|B6*_$Vkq;N1~r7Mz|7Jkfyp}_5XeWf@4>jd?f;;YXx8TYjl;d2y{}zq#*&Fh
zHI$3pEOk*Nu^TI?xT&1v==LvHx=I@p52x846?-P1ZEHSk)opLVc?%q0L!X2BsUxtu
z{P;>6P~b;-cqkS_qsso_&i{DEU;MCzVW+VdXlL4G>sM2rsj5F7h1oDF`Ubb|&QOXi
zRpt&1l2I7Y%d^U7(FL(>{pRRUpxcZGv|c1jYB0}^eVANu^($N8$UIf+VR=xX3kAQJ
z0O!RZy7{lB8jIVhL|QN3=9dV9a``+u4z@oX5#K>y@yn|6bVYP7H}ZnB0}({gX!|vF
z8ATbLfHmqZU7SX(CCAMy;4GSKe01fP0)lu~M7N2CWmJ&n3iKQJh11J@<cp5|Vj*Mz
z(P1{NgKTsannh@|8Fkv-V^^u&(CUw<wGMaQyy$n+{OVR!piZ^SW6{xni(2TN1l%WT
zclUpfqazEwmKHJfT-Ft5gSpgVqn_c+*m3Ml%J2!<h}TiyMMn;FrNmm<n;+FM!j<KH
zuy;&*JR~1b^pvpAOfa?&S9pv1?tQ4c2$V_V95~5vEC(7>fW~hB2Aw^JP#U(2m)p^E
z`RgRtHU@DH)5~xV+8{OtI!DqK!hbvqc0r3kK^KB^Gf@=`0=uP&J68#zn^-R&s2T=C
z<QNIw@HOs3M=h9Pe4y$F4cxu%7Z*8&OK6G>naKn-<PTKR?m_>uuE^eU(DXk@2m2S(
zlkQIiQWJK-5@3lFZj=<KC?)=Ce4JWIsl+rzU74VX{J4EIw<@xGd~do4d|<pYSjguG
znVOdQ0}M)Z#T4IW78>9f^}4sJi<f21M}jpc$LY&>063W11anQd?Rur^Xd7ik=^m*>
z;J;@OhTm5b-TG$~MYlf5qMu~Z50*u@K2R9lnj(#E{XydB)<k(!{c!^67D}Yg-d`Tw
z`XdEWJVsl>FpMpSIm^GcG+J1StxwYE+aSNh{feU{JMeze=%QNqNTtq?R_xrmN4fJu
z<;Zzt){jN*`~)xkXW>5&J6Ybh0MPEQvmiiE#(#eL*A@8L$A5mZ3LkyKfBqxzpFy^D
zK$_bG(!5e5bX~;6AHpWnt<X>#-L9fyC`RY$@J-rj0QWsi4M4J1(NHD_^&@ATX8hzd
z>wy|XnK|+r-^#$5@LzntA>a*ktAYk<3#7}Sqau2Jc96Nh$vOlnBra-a{Vwo1<nh2h
zI~4ZK3J|DL7`(k0jEUqf8B7~Nqe<@=ri9l_Z^EaTP^oQ}0Td}<m^BhCkO6g}F>J%_
zP6D>=SfEFc+cUUQ8?hlkSI#2nA;9E1<90e}6gU8qaONC^RscKw_84ChO}IBydW<$w
zQt2>z4eBzvU1u9E=}ds;v?qEX=WMzr-L60=Fm79dmL5ep7&NzqV(@i>I=bs*0}Q-B
zWZe?Rag82vyfjc%z?pP@Nc8aNpN(#C!-asbv3!uWk%*vPhNLAv!$e1UPBT~_UuAW)
z|MD1g>TLx-9~|y~x3jgqrC$69Uv8_-{evG5cfS7SSbej<yS06Iq&D`p;M2Y1!<`qe
z;if-Q4>yjW=0p5q0si_?ZU1m^xP5e__7Bz0tApJgXbqY_+}Jzb**=1kw70qYdJ7cz
znt~S4=}%GgAQU^^uTig3ZNaIh_Ft-3+lQOqz^@xGc6N7;f25(j+&SLE&R#-q8wx~_
z<DJddyBmk<;Pv6b{?WFAi7lwDoukd&jh$E9TMg(R`c&KBZSNhcqi;5LcY}%Uf4{eV
zh=ccL`C=PJiHRq$C!Xil&f)gvF;2+-v<Zs`1Kh2tql4|u9sFVYhi#b7#^H}OY4m9O
zAFrVhe5tlJUTu86eYBt|Q&t7`W%Kpn_A4ATti;jl7e~iC$FGmK)z|y`TeQ|k+lSww
zOF4)LyZcA9V6Tt1YtYB>26YW>z-k>e@#hz>k9KI?cJ_|94-a1-u(<Zk{`asjFv<-~
zwm}QLzsIwMMcP075nIJ|pbe<0@4wlG&ku1;X|*<R6^_ug{n#xA9l|0WyVF#A+h6aZ
z!~Nzqe!Y*)e7|$FU4;$XIl=<aCieUN26XzGW{vv<<KiE8VAV9`)@VD`&P%ni_1zAR
zTuK849PJ3T(6ViQBWsS-zj()g76>t96HYc7n4Sq7{V8S%>75gR95IK=VO72Qb{B~i
z=yLxa#(^2cLr8t<s~l55_&N%zKN)V^fSr>#G#||R8kwoWCgww1kGC-qbiLTP%?t0s
zSyNN3;kefV8D7Wsv+6<6M{PG-`?yzuF@C$-+B!OF0lV7T-EKi6qncCbI(d86_CEnR
z-~D|=)3d*mcb5A>ug1#LVp103V9PFMiCcB?(*KoVL!G86-odTT=%CJM^YiYTt{%au
z{vl9rZF!1)pcoYS3eo;(RV%nKkMw#x;$?WmvwtL$euRlPV6WkS961m7QO^Y~eWXWp
zO31{IsKXHEhCX!g9-A%+Oxf_~-9AVY^cO~&6d=pJukFm6pNQlRh@|!9(?6F^V!0V;
zWa}Z#yjOMxx9W4tK|keZn3Q86zG$KG|5J*Fxt4G1-)8;91X*H8Gh)I*aPA8Ty|ivS
zQ060s=nb%UUC3RJ3{02D!o@xq-Ip8kFnp(yimo#YmFTkb5Qu4iHb~wW%C3$CuK9^x
z-J-<_PQmsqui#~dU8zlm>BW4pBN*RYje2LTe-i-zauQnjs9V-(1Zwrmr?r*Z7U)77
z-R!)#jwgk~glugOkC>Nbhw`!FOFl)=^H#}{tv`EazOGF`G1e<P8LwDMX;@-;P&KD{
z5<p)C-wOI?UXIABT3=K0c?*^5ueT3dBf_7FmVUmb{>JDud;MPB^-@4;#C5eOjUhmm
z^974D9Ni5}Y_E^66NB-{)-1ylFb|Xer+n7J9u7eUPOO){)Vps$(S?6DG*NjVOh63<
zHXtc*d+B+CE-a0Y746e5v(EpF@e@hsP10+pXu1ZPrMI+3aS<Z`!WQ*5L7u>N&VILx
z4U3hb;e@eCOd5Y7(P-GzRj+@eF5rLhXvszmy>uQUd8BF7d`hfnGNyA3()l1Ak};mc
zmHAD+<1MYYR^c0b8{gpZRXEX)d<8-~_Hg(lktE{-cW-e@KwL=5J@UTN%brVIpWx6I
zpXDw5U*R2k@~gaD?T==DnQxY^k35ry%i}CZWHnNC4*mD^At7`Bg&UJ*)^&7|^xX4=
zNAe<TU-)>ie&^WIY0Bnp945ZR#4vbE1KRvGn&BN^ZVXdJJAO>7Ff%|rDa-GRk<x*G
z3`TrW=A^bm<{rkB1<a;OP`936#1J+r87ZtkQt0nh3@$53nx0L?lb)VEL#xPj3l;qp
z!B%AbzP?fp`{6aA5y5H>pq_{DF%4*T3JPrq0HTcvnB|B6b^_TZ-o2Xt2>*3A1nUrJ
z3r;?C$siBHHTp_7M1fM)Z*q+jAtk4Ig^Po%p;ndyS%UJosFfzWPVDuy7peFN28CC!
z@TwF>cKj+GA$yaDEggj%)LK1ZxrAyR>K9TD=;NrphN}Gi@0@ss9>#!7@kZ_s^P0^k
zzyIn0@l0CB-yCjlY#sgm@7186CvHD5m5d_u;8@nhn>Oj51JRByu7iqCgNg*+fRL&6
z3{iJF<2LeM2iP^J|Akxsd*~sx5InGfeY7y0BJb~}XCno70dThSv=g-Qr9TS1ym2H!
zslU2g03xkq*pc@{nJ`|*AY$R`CPjVcK_h>gsjIJuG@clsT)1vd`EJkNKphi^N$;X7
z+x3+vA-NE7S(5y_S_b8Kg(TG1c}mzT781n4%_Vnc5UX(6UmYBU&93O-Pa6HoYDwp^
zaM?8|9h|=z4ka+>_5}fsYZ{^C$uPC{McS$N&(2gu`_0k?t=p|OX67V_yF9!N2DpNw
zZzFgKZeS0D#$kUj%+OCLX}8D2q<tGs?1?k|eUkE@N!BJhJG@coAb{HxZvD#BnHKMh
znM!>*Qz;_ZI&XV%yg&rV0&@K<FOf<FUg9<Lw@5Ru&bs{pGoanq5de3m`d6i^MrUX9
zYsHbgXysb)VrdN@TXvdQdAq_39N~BVvY3R(2n8kB+D>6w9j!@Pn+4&QKCJ@Tt^nB<
z*!GjBnr|b>6l;sckvwrnvUzkE@-25wmQ@{8vic^%5H_LHy4o0z`q%Ve!RG)ZtI-V#
zSGpJ;Q&>tk9sK)sALI?Fyb6__&x6$jaW);v{SG&?%p8BOPViks;L!xnBEw$;Qzgcn
zQf6sFzDFM!m5$ntz^f3H2@njn(Uy;y(Iq0B4gL{Di|1P5?+bzqoz&%LUVwHCeowAs
z7~ECUuUvonEcoEB(dJ^ffzeilEkzqCg8BvmhlV8&EmkPhIKJpp5bNDQZ5RHsR*r%1
z8QrvirzI)BQ+_T&0{Q97rzie*o)YLQcDhJtvC{bO3+B~Hn;V#L&<Ip!>@{M(V=IXO
z6&Hp7BtSe*bfUWuyD)L!ixcZ{x)3+Qh*+x)LJknFn1w6V#(6ptr`kkATslX91|Sb^
zcO#{*tj;T?e}F8m450>Spz>i6TU38D4PbQ6K?Lf>>`Fq&p6p9xDdz2DY|nCJPceTO
z(rCKaxpR{GRP({i6t7GC)6)COCwiZQ-tUs+jUxTW(EKEGP!a>9FJeLe*dMda%gNRe
z8lxUgJ|l8JKx@&HLn~Z%8t(fntbXGCcX$>iarwFEIs)4)-&u>}4<TrJ>;z1-L%uN2
z#(^=?H2p|<pv@LbTha+HM~sX341-S@8r`^vdtDTAd2yOHdo>e%j{uRp*q)|25j{p;
zre7+j?s<xw#I?0k7Y5yb6|BV8M6jdPBjut;)1=k!zf13+(eYa#l#w7Mc6q-bi%8aq
z1vEdLGK3#KbO<I+3e@Cr4_{<@vkt>)6M8rms4Zx&ld_0wmJyi~!S0K$S+Rgnn5u*T
z`B`5&db8I*#xKb=EVK%N`YHO6SLIl@R;;pGQe{Iz$|Jq3gjI|V#=j!TaT<C!&g%Jd
z^^lxrpBbDqeyvb_a;i~3KYWOV91u0E^7F$Wa9v=&^&{D7(!NT2ou7LTo2J{)L`GlG
z8ITM#2w~mgWcG9pIVHS&xej!1Zw?2~I_|Nc|FxNhM;yAiAaQuaK``u;=0V^BS+5S9
zh+YWb^qaw~skg<Rt)tQPfN;?HY?}$d$5Pb6+`iZA?a7mW`|EAC>EpStPPhm^XhcE*
zwHVEih*Nf_V&<Iyq{9u_i=uRh3+5z{j?nU0j>aJ;*MZ{*(KyhSb(32#4zz4)^bP4%
zusO`rFoSZGjD^}~e=SNoOhHLb(18!c5nxdm910gOJ)>REpzp>V!IV=oFK#sxB?JsO
zfn*T|e$m#ZMHnLnM6un6hyQtwelBQeq*7;T!ibA`O7<|^M~rLZ&1jUk2#6hYrKx@f
zeu@<=w8cGOr{6E!cMML$^)?RZ+C|W?0-xi09KrUE`z^ZlP4yW-*=F(X<B}76`&^Gi
z=lL||6zQ5Z&)_`wI;!BpS-Jphm=IUULgON$7JH>F_I=9`I9$88$1vpFM__lMtYh5Y
z)yflm(OU7{u^dzn-O*$af;$_)1U&MX-+|RkXT@@sBNoEhntSg$xk_<x!xZGtPGu4}
z(~Nw7GCI@yK{DOrB|xxh6VRAG{31Z;0v3y-xJ>^>SHitPI?3uJy<2sfjW8hyo-2%z
z*8Wu#;8tVsrUImse0iNF<itnl7dWUfehxZDmT<NchK(ZEtG<pn=IzsesHNW-A}YlG
zPce5!ALF6@1_R{zoKtQz{zPEYDNs`SoKLw3eT*j`Nox9iIHj3?_DeE8tKwZ*Ho3|E
z_Y;7xqL;6C%OWJ!hu(5eq3Y_COLh`;)?M~LG3Uo5`1^TpTU6lXX7G`8#>`)a1@n+p
zbTVoSj|=Sueyqfx)f;!ag}ZznARbd`0>sW>PBfkda$Z=+7q=B2Vf7CXH@0S$H?}?!
zfn)2wBo0iul;`7XzHeq0Jzm1dLk!3oG>5lR(#`umwj>gC&E%;0InhyW#yi)#v44_m
zK1j0B3p<~1Q=?@)())FfvX1G)lV3bfzZ$x#)$<8pqcui{k@20TMEZT9V+<&8>e_gL
ze@=mGYj%mtM4c9x-4vlKK-P3ZQ0JAqN?ZD87rja+c309@seQv2%cSE+6^DkaVLTlp
zlzyl<R3T5Zitf>zT=bRv5}K_)Rfx-9pC+<x-KWep?;xkib)TfTKSPR}hOCHRmjLH{
zLQwpd$AA3C;p5GI_aJ-oO&=~_kbyht@E?b>06)%z|M=zdlfQ=ek1J39`s5S-;~#?m
z_*g<WGsLdX4ahoeKggw^y_{c+IDSNJE-hJ=LF-x20?Ln~I3?Zl6h3dj$M}r{ExC!%
zlFNUq!(U&iuM<#`z8<H&UV560JL>s4e9$;IAN*I=0~xJxHe8Rk@|Aj#_WE$i{ddyt
zpQ`7V_)p`MK5MM}*ZFmlb%F0*ugfrKibsq9iBF}dKXK&C`1BH0PKo9P{o&f6bieBV
znsvJg)<0nBZ5M71RCV4o)M{h7p?1z-fe^6KR5{o+jywiEG3{Uzf~qJ>T~p{GhazqN
zlwc_Py%b{$FGi!mv&WCGx$8!Mc>WmLd<?xkZiE<ToBO*5JKyae@9cg3&Hn47ZAL8o
z$6*2OY(l{z?pX{x3_hKLg(fg7f~yf>7X@seXS%Ql5=6d*EOD_D54yU%Yc5QME%6Ky
zFg0s@a19WVf!QK#jBvihfX}P?FnyD9h}J(2i!s27O|uO9E4a%}z|8FLt2+GmOu~O}
zB+oO+OWL{+4k2dc7}(I|c_#4lDVifNjL)e1@X<jH|NaLlsPMxfCxsqUcyco0$({Yg
z>fO&!@#6ICw~cq|=bHKL;2mtD{^cLuSBLMUffM@o^j5@hcMr&02<O8gOAYl+ipBsO
z8W~T#fI1DH%|ljW5M2>*RU;M19N-5w+Jx$}qpaFR^;gU+E^oDojY7Ky#f6IM9jrWg
z6kH?3DjUQs^g3KWr&%`}-2yuyWj`kjZw?FhD77Z}!1Iy*`Xxwug?l+h5=JN{`wxuh
zO6~N>K`DS!;17d2qXmWPT&Eb)ndR4R+I7&Ytrs3DPvMY;q*qK!fT4j5V@LG2G97Oc
zB$J1>QUG}Ynw$&*GXZdAA|8zc!!oVYSdl3xE<yD|5e@G$Jqh>>>i&#~ROrgO^TVrc
zC#?;zkq^#8Ua0H%wsqmkY<05aoE+9%--Tz-h^KzD*-y{T@<?8jg2Gqa!RX?zyHzKk
zIfbJHWiTA1fClQZPR1{qr(xaV9W_^Tw%RlepG;`pIk!jYFJnYXaT|>EK1{EHDYlYs
z*Rlm?_wa=EF30D<HV(<#*_r2hth)(Qc8Tu*C%w<YpYyYzM`UlHzcPDLKnDRK<F^hR
zX2`i7FET=hkvF!)jJk=24iuvLLyqj!<y>2E+ZK|9RtT#ik=0~dx6mOJ-H+?s>!3YY
zT9f<B=kWZ@5#hWOLj5El+~E5Z>N*y%>((!xf97@Tf_K>e5Dg{H;ugO=cHN4D%6PdH
zYV6@v{hIPOcj8}e(UX(^ofl(Mq2L?udzj?GFwNT_-KU-73-}S|ECtnXF2+#l9<#i7
z4a5)f$-oWnEdaUfW>=|aad)Wz^b2mvYeUPFMu11RQJWgQ-3C!btoHoeLD=aGwvX>#
z_(p^lEk8Z_q)nbc2NnRU=AdT<)WKiV7cn}ikem5+)~j+!DmamNymMHImk;Qv&^yso
z;nR?=N(%^W6~4c7`Z9Ed@-PVb{(~vu<|iH=Q5?R>r7Jo2xz;)g8G;;@#o!>J1dpL!
zHbQ@q%cY}>zR65gscKO-L6Kny1U)9SiDyL&BYC*v-(qT|D?kDn&18XHgp!S@9h&;I
z?}bG`B{Kz|>|?;%P)-BiI#&7E0b$-s`Zq<g*t$hKJsp;bvz&5#{JMVCIS$m6iowY$
zA0ZvF@~^|QHoc@Ay7Gb_bo#tk2cLwjlYTJ~+TEU0ke7zteuB9=0)fEf@IZ^)`|Ejx
z)An=QeIcjQ%F#Wb3!KH_#%3VyM|XO1=07ublRll3Klq&N^bU~dD#hbLR4tdpS%&gz
zVv(iZYAFQ-d_E`}Z`V$1NTkD*J!m54Geeh)WOL2(sQIjhSeEFdk-p9HQ68acw%ait
z6q%sw7^7In7)5;xnsI5rq6sjZc3zP}z5WOkq0|0&)FKcaxJFyLcs{XsGCa*jXpM)8
z{j8m~`e&wQq2l)21fOr8p}n--9e2{$(c%c$Mtd+Y^)#B$&%l08r&sM8RMyg?j);CP
zR4Vfda><C<kZZOAbrFXMe56j_3s^ML;Y{$NQ-)h10<5XiviqS#hgBC&TNMLtIH?97
zfqz7CRb{Y#qs4n_0@A6rbJMZI4_qs4!Lp2oOt_Q``kT-|*>gT>5`W~T;g#@VvzPRv
z!6F3Hc2H)?`48>tBTmxjq%%h5C)ip29=-<UR&k?{NX?kw3{H0avKBZqfev}tou{xM
zqnd>+(_VDk@gcep=!ZSqsPB#73&!k&NM>$)Y-(qz_VywVv9Kfki_>3!u$!KZHv7X~
z3i=;3gt9FEJAE_@QE7b&mHpX7HdWY(^b|s^P84~XQDWwb1(Mb;7=`x}e;EUF*CSOh
z)BLe(8gyTNiaDnqU4yBG*JMKqKJ;e2q&F&geVa{uAUzh{EjjlpzkrLkWH!1rjZ`Rg
zA>IP}m0netNKd!g$*$90?x@uqh8l>miz<l^f(DRuhiTHeWl&%ZAyFb?u(*}UsLA)!
z>9C!~#30SZ5^wFqB@^TUbaOo?7vO8q|7&nRQ^SF`)gsOiPVzYjO}+m3{KC$id`gF+
zXAHCa3X7tma|Yb<@fhT3`pFuk_Ca2_Np6vtZ~DV4l??mio+OFPa*d<i?VoR)=392m
zK&`-EN3d9N6(&-l=Dlxq+^+%wXUfA#5{|?I7Dq_8hi=bs1HgzhVSJ+IyP;-VPm~U8
zOD~X9I)NIA+CInI8rPsVvPQ4Ie@-l%U<pq5eUV&@{wGo$-oIrY36rU3hGiDdbUmL`
z+GPM&M|i;zf`Wyi_yU{S2=WrVBq-*D;|j*P*eOR^3zY*xgnBEEHiFC_vr!5nW;(KJ
zEZ=>0B_CCTj0S#o2a;MM_aOT-XoTwKz1@z^mJf#MHAtq}pqr(gmR*uH#TePw$y-|R
zDj8?>d`(L-A(n2P<YaKN#FC#em};h;!)+}4)8ajBs->l@YK#QZ%F>#uaNlHiP0!9M
zSuLwp)q2@r!&{#lntat47|#_*&}sJ*G0#iPW+C^_zNIDnV<kLEtx}gmHDS|-g<K@F
z?JnhAPIChFPS6;NFTdHFw3z!uF2U<$0DIF&8@%bD#>41^zM72A$LW+Y(C6Qjb_f<1
zsX!L<k0lymz-wzlKKD&EDjyGLpqfv_;StA?S+qZkIxHIAT~h~s<v|AhJM)15&)%E&
zwQ*!^!}s6%6cx|=I1)$#VJDMe%gBIjWo*C*aCZHKXrMt0(PB&--=F<GOYL3VY6);M
zxlLk_x@%ugojT{bR6BgyZgy86NXZ>>+2R9r-YB|PaiQUA_jDap^OU6~5lg}33>XXt
z1|Bicf*Wu6Kwe_?+Q{#CB$x1-8%7jEuG-^@3B<gj1V-ypQW@XiW(n!HH&vT`+Fs5o
z5i8p>c-zuw*g!CqOGAjfEsgDs#M!QlHG}4jU8`|$+3I7vr?o?@-gIozh@U&+O>)dV
zcX`aBkxOG{>+~!GyT?M?VkIjmUpdvQYbW|^b^Ql5HLHzrJ*L$Kx*z=;c+ne`*>)U8
zxK#M+(%)PRL_gw6J7c%6mP9<%bK-#|QL6!P{Aj0UXfvHba>X8_H(jzF-(sGyl45D7
zo~S$ztf*z_GJy-|$UaI{9mjVZkveVamNu3Rjy**0&{pgdkjkx9bG(CSj|S`+uM@ZR
zUof6<IlpMXi>M3{t1Hxak=x9ah1_%iq&1XVMJ&f(x~7b;MNNa??M6SkQCkfw-`d_r
zluV~CId*Phbo&l<EN;dzcD}3nqI(v+Oo28#hlWxn@n(Vi`RxCro651wf<OP<jeZ+-
z+cce2*?%6xfoCKGsME0ufA;@bBQzqlk_;gCf|L0CJQ~Sdk~M@a06Xc1vK*i$C?Z5t
zwG<dHdZfM^49kp_?ev;7A8d`M8{0je(%+`qKImBZI^RNKmLQqoFzU0nR?~8v*mune
zlA}4g(U|-zc&;%w;z(E=MKqD%uWoWRhm_Au#`r@gEGcPSqw|3_hJ@qYfQ^Ht&5JVO
zY>zC;M;s7U*H{fr%bYH7<CoUb?aLH9<msR@LB=U<YB!rKHholh<0;1&aG2aO`}V*R
zsjNK#WluH9Q?>?gXQ)0@`&g<nYt5cjg8Ac*)b>hVS*5h=x8eC@eA?MvP!4gND<d`%
zi{4*HL)6mH%qSwT%cECExbk;m(twR8IQkz^1=|`SryGy-5(25CHxYN%U^13FD=fZ!
z+Kce}%lQ1F-TTrMfw4EFavpl_)%L4#US6=(5WjJBJ&L<@g}7k#3o8oIy#uztZj;zO
zr%3O8pJz)64U%muRbnT1GPBu?2xMifqDAjP;DYhhA4O#;nd0s}`Th4>erj>M4tSJ9
zC_WDMzZ^4Z6E+p>Rj{*l)Hld;gnd^g`|TJv6P*yefO%pwrXm_iq<UKuw~fMA2|Rl;
z)O)h7`_w+4`tsIO7te8b>+H8uD`!1VYGc$mvgK<`>>zzzfIXg?CnOT9{eLZB-|15}
z2W_VvzU53sfu_@KcW>IaBa!5CR)*A$-6sxn$cnuRjr4*}0;GrX$}=3Gd#QC#79x%Q
z>##XH^kcFMT)*i3`ayGNyY=jc*7n}_Kfsf8>ofLpGHkRX%NBzZ+uk{DZ0)q3AMI{s
z(bUSc#N*)woN;72;_<0POG^bMI1P{XEeV0c_$uRQZwNah4PK|6Y@>I1k2e|<fgP2O
zYzJGazAY`Tj68f7u3+=9e%RP*)tk+ogTvPD-mYSEFG_;GwFGf%(w?)xE{hhcnzDSc
zPMX$vvfyE_$k##B4&Oa4XgSM4{czX@YkG1AQn77jrw3${S%(GA8PVm`t$r=B!h|;d
zvc0@Gv~8hfQ@sOmw{`v7n#<=F#KfL<Sv<wRg03(FSIOj_o4n|}-(AE@P3vn=scR`>
zH2F}cjr;GotzD-a8;Le!oY|HMUvNHhmC@IgYWDzv6s9a2Z+?96hEBb<50NkZceSkF
zsvW!*fy%<iHbeJSq4w~5H0epYABT@U%OA<1TsLI(q@9p=(Sx&s@=9`((3YEwfdE^d
zG~7io*)s@LFDa>bq&2uz2W>9ZpS_&nC}De8=E1qoG%1`i&K^%6h%?w8N*cW1h6c9E
zVr>e00k}uWsq>0<pS)$(8BxJ9HpMmOPD-<hGy{WB2XP9WvW@F#ATALUQUZ5QhS6$p
zKJf%v-!KOtNCpae0p}|aSrwg!zl3U*7o3k`!_iXfcbK+tCaQTS83_ZYDW(rm+p&T#
zlO%%$<!_>%lLMoCp4@sh_?an!O*cgcGxSGrIDtlNT2cu3u6Vg7eO3|yQ0SGmDOYTp
zawXQOWzZhwsw5>Iq}FwvU*Hvy?Me@l0qN>Hcs^hxS~#pJ>oZu^hCN<AR7yBduBwHQ
zTMb*e97JcBh3OsmR>gGGT~xB}e!TQ#HS1SMpV7LAx@aH1AqZ*Y{g{T-iaK}~lNt9A
z@&GM!@F<>jQZI=m`>2lNk;-Bf1FP6N9o+OWX~zOd6=w7s%#F-k<k)ZpA0#?Q0$Vj(
zVWLHhWl9%e()d$3!J{74x;jv2{Gz$|qKKt}u;ebt!m8aem&4dTS>uaKg5ygQI3W}k
zUcQwPyr(-DT#3mw0mDq7Z%C1!Xn0vxIh-!qu2LCW(h~OMV@!kTh!S$89b)ubiw5n6
zGX?^EKW@ienoSiLpL!rXPUNPPFo_pv;dP}f3X%PtG`fyjMq<~ZPDAzyYCcW}%>6YU
zTuWiXJ`NHT(N(09#RZnTnD1*zkg+Z_kx@Px!kooBvjLK3pH@j}crrN`|9V>EsY+yg
z?y%a}pp_B?oKW+vC#ThdKLeoQN?MDW7_(N>w6CwF1BXkI=9uJ<3TY04-ujR-9-y3^
zLv$#>mPKRRw)JA$PF`%=HePJowr$(CZQDP;2R-i9gPPZ<R;_#Y*(Y3eKce>{zUYF5
zdj7Q7yGr%Oz@i3vJKg9sGN>Z6sd_5a%VCS!v@#qs><?Y>m2fqZMHo8jJ$IX3X6<2>
zby^Xi(t4J#6)=uDuA%{_=s~l_xlF)<SHeKB9S5zxs06Kvqsw_TWU}DE84)U@lx`Ks
zqn^v05}h%ghLS_l4E?zXbHW<)zV~LvBhY)BJQ^_z$(^{!5PC*`v!L^`4z5`<XSzow
z%<zH04FC^Ik|`n!v!RqyH5H3twnkB5)$q>6fCOMxd^qJ6Xx#)+ktLo~-Wa!5Wm2Vw
zv>BC}Cm>}7ZKr_ey(ZBVX*GcXV&3E*+PdR<qk^4%$P=r7FBX9!#VEt~q7e@TOE4`*
z5Bk`53nOzZ{wOYF#N8JxWfCa;85tQCd|0jr$UsDw%{BVkA$)A~p0YE)ip(5c)x8GK
zGJ>1vp^A4O?+ZSSK8o$0RriLUO#j&ta%E)rl{2~^jkgoFq$J-jWaLpAaL(WX;YzJY
z&#j_`>G`v@g`zq4$BifT3+zW9k=hZe?!>e~QzFYP|L0HtRCT`7JAEk?M0=)ol}T>x
zLDDhdG>MusW@AASOW?93kO7PQ)1$7^+X!K=_XfJ;p_W@)WegG3+)Zk#PeC31S)0Tn
zcMh0^l(UI=Wx`7|x}$f|^g}b(aJ*||O8&W8(Y`a1{ENdo0Mz*_SyRD^R|sn}Duu|F
z<FpKd8Cc544OI>GrdrYuDa)*XIvxJLEUK$=_Dgakso6OaCk3Iim5)N#f#}?88`#SD
z(9b(D=Vo!Uqx(xP0o4#AEdqhK?l_J+Zwe3d{(nelY0M=!6RE2fqDh3QS*q^Yfq6!Q
zl=>i-&AsdOnkZ$5nEQ5rstCf}i-I?8)ekT5z4ew4L4{fG_4SK8!jwqGC+7So?Zx7h
z=mf>3uuIc&p#?R;HQ2&Mcb}>;D1pQQeAu;Yp4K44{R%F`AaYG2#cZ$GqM`Sf`}!@k
zr?2&_vQQ5(_h3=3j4j_*Kwq^#obxoQ_RkSw#MWPtWf5(6Szw?bLh?XBsJ_Bc5ODE=
zM1@<$J@yv1^&laNcGts}yilKJM$mO43PD!9#_qpGqx96=weT(2n(84+vKN}L!oz^Y
z$9@|dpy#N*7(d93Cb%dLv4y{2fXh-dj}&TxIrX0rsrLL=^JeRG)TQZs?&}hxXK@^#
zD=+(A>T>DM{nbYO-k=)JJ}&aujISu^0OKzT{(5bT%~@?l$LlYgBRQ@Q4q8iAu(ZOV
z3K4o?jSiK0C{n`r&i_k>rGCsnOhX0PZ`|*i3yq};F30FuLz&VSLqiZ+Ye^mzu@0A;
zP*4Z-KNa7<Gh6)|b7B+5HvJ%Tx2hf_KgN9duQx0Zit8qf-fwf$T@dAp3%3j100wj<
z=+}h+|3~L-w!(S?yvv|zY)!cOkDEg4Xs;+8MFuPLUC5MA=rT1-uSw${5H7#&dMKMU
zGG%|3p&*DlG-|gK+DkBGgLfX0YyH$<8%9zhB?D)cz_fb+Rg?R5qN2iYI)DHHqSW#Y
zp^6ULFr|TDQ{swV4EYy1j+>16>$v7O50i%82g=1>9o_k@nDFDvo{S*9Py(#Rr9btE
zg6bP#=moBB>Z4UDIaMUUuis1$Be-vT8eYH2iay}@sh__Uw7y?WBfkxi%2m&4KL-i7
zzP<1IbCyoAIt41!QdmL=G&SHt$!W7z8HBU9{n%*pu5NqU2;gxh?edS2Mnzlbl>vF=
zqh4$zzYA%IVylQ@j8-$xb?n~8D9rX|t=tW)PQR!T!{9>W4OZG|Y!P%Cl9GGgxJ%er
z!dm`w_YUbo{e&R>*#gL|$G9g3O)R+C5J5KJbvqZLgLc2*hScD!ERYa-V*zC>8g-t1
zd1*Aj`g$w@Q~@0&qa+|`{@`AwWWlvs&g1!13}Fqy8{&G<%e)JaOPgfaJR_>GKyASC
zg=!KnBe!_GG2$>k|8^8y|8zV#wL`m5bHXB{8X1=`v>SYy%>cp#aX}6bH;;p2LQUUw
znbpTM4FhI&>>a(33BbTG377-8eAk0nto_ryjL%dMcASckKiD#Jo1Q+e%|5lnyAR)e
z+xCW@rkdOzfI8QqRb5~Ba9DqI@SwJ-)APGKVC7ul;jw0-;a+z83OtYvc>B>oU!MY2
z-+X@bO4d+3!SnAtew2X~0uJHNJ3^we$R-Q7eyb=qYC)4g3kh6a;D#!OFNO(0xHhR3
zs~I)=!pWbrBUZS;HUI2ceyJZQAKZddjXw<U(V0BsI8+dyt?3(HoGO3uU*h9l-mtiW
zC{+?PV^3A7J`G=FT~QsvyN72TWNit}d8zw8jaQs*Z){}&({w6Mm~zRFzc;9Ir6R95
zcG&;^twT{6bDDGSa{twg=ePAVEcL>3?cDG?pM&!y8s|@Cv56r-=!9RrU%W?TnFyei
zG(tU2EYI~#ThcH$3O}5xKh)a0Ikgh&TwYl#YO=Y)%`!JY0<Beak+EP{-Pw7R;V)BK
ziUVs@v<2tn;Id#Z+L&#++1WMvuC8v-3Xt~%1^*%wgy*a*x7&0pXjtYqyWrBO8-AR*
zG~3;qekb6o#fuuwRzYNq>&tn#FpPns+>I5KF0MMAf_X-k;JP>ZjJDWQSx8Q51z~^$
zGLtXXxQsmc8isrVZLfyF8ZuFvjLs!y+~`kN2Vlus!NG{LR-<!*Q_W8b%^cC+L7v}~
zv1w1s01U??2FmTHnY(-KzOvsuG{nL<cI9f=`|}JS)Zv$^WMo4z%Ppn%zWLTg74Y&h
z2-yz`f65e&iuD)~<CUZZ7qBFjgc7t9nDuJ4%n&!TWH-}KgK({kE}*G;6mgQMQT|D(
z1fGQOT@H*Y)NI<;_Vd_!=?<SF?CIT9xex8^QJBeMWTc6^j^pl~z<e}1YvT<KnDMF^
z-|E_gPmRDQR@Vguz-Ym0!yT}PJ~-|H4?RwHo~9|JY}Uo1fyxrB(Ga0SU29~ly^dJ^
z4*I99Hg{PnvrIotMaF7{J~+1EWQx*k?1#bhFf7aoLz>q5<K@lCn3pFufiPY8`M__4
zGUG$-Fbb@gsYKP0y2q{Zl=WX4VcoH^&|s*T6%Dw8osnxNHyONOA?xLstppu&;%x-n
z%pK$!Q&TJkGU{+Ncs@q>Hp{+ad}rG=oaQNJ<arrkGDv|INJz+1eiR$gOM{T$snh`e
zKK%No&_oiLyJc@+QK{qdv7&MKUo3B*N3dD3F^{K)2W5v8FeOA)4Y8zdZU7f1@DZRX
z;3_nSCr$QBll#35-D!2mGg{p2>qvM<qa`Pc+<Uv}%VPRq#@7892jhQGeiX_^g8=^^
znsyExR2+0&bt%vBuktEg=OyrI6YW%nF(L4ft+)kyuS9pmC&R}4?Zm%$8mQ9<HpPby
zYP5cGq9@FRC!8dImLw*=T^W_AY*vU|@X-FTxyemT-&S}1$NtBRFrTmPZqe&3OVVqk
z9*gweQwsIv<!v6fsHh@*YfRR)8jJ^HvlX(R$~6Tni@b{)mQ$|X9gCTI>ti+@OZ}-~
z`|&6Jn{9#6J{*o+^XCKg`{c|whtBud_gnLxZ>1Xz{OYeQBT^0d7xi5^4^H27JMX3Q
z*+S0@c))|rqwl|VOr5LWicDOb?oH3S7R?Y;w45sz?9x=IQuyMaoU~E(!d0R->FKrX
zQQitBKpv3LED_ea2XA^VQ?x}>ca^>&{*af)ze81BSMgOz^>mt;U#Hbt)3E~1tl!7e
ziwcJB(7~nxMh%<`Hv{!VqThGbgVUhkZhh}>ZzP;CANaDZO8Cx|12Ow`5<ZwZpPA{R
zL^8-1bqOFaB-G@J+xYmQ)*8{&Lh)b3&dox+QCD<3ctpomP*05xa->m^&fjC!pg$^Y
z#GY50B6PIO7sEL-ubCWAUe70uLF6mk!_LBYWyg7b&EmNyh5xb>0N!OEXC4yxd>f-`
z?h*NB#6GzB8XmmwId`&_)GacAHm6-pU6w=7C45dija|FRZA_shd08S5dKye<*vw5I
zxJ%4lO^SV!&x~UEXQTkDFmoSf`ATkiGmIr2c;$+`lJl<PZaE@u)X~bAtjC`C0np+}
zW&@!bS*(Av7Q@pl3<3isJ1oGBzlBnZ&8~#9$ue#^iDOI;q||hiXB`IJ$r;l*(p45r
z6q;8TOytlKL6@`G<eJ-gR9SSc>zhTO5vLQlE9y7{ZAO}5C+)FA+?oCCR#WBvBxEWY
zk4zFqCbiNSr4#+slLj;@37USKbd&x&fnXAND`o<RsD8Mv6(mb{Kx8_w0(|Q?Lc*hT
zHS2<elfKdgYy`_3g(hgeme2G#QJ`tTUN(4#J0*7=;V{|D08i!3=re@?-6I^v4}eT6
zo4QfZYDrWx>*&WIF2M`hVhfM<hcWpiOt-khGSz<%`1gACC`s1rvPh8_tUVKSf-+HA
z_^w|9iD=vZVy`FqrHDG*sAepz$vJ`HJ@d*V)&A}4dyU}~t_n5f`}sS}R2Rr*;_W9^
zrWyM>LSas^;fQK>&feVe4~0|PFLy@6y(Ii<_N*L4A=C-Iewla-3!5Zu3P)semuk!e
zy17n_>ZI~ge`W_@M4jc#B^D(RUxw?)grCI2tC7PZd00jYT9r;m!)dDy0j`8$L8H%F
zTE+q|CVg3h2RNFc)R>{ZB#K)c-<PI+OCR)gj!WDU9Zwx5yTW+SD~VPb?;qdtTeFTQ
zP`UYM!3I2Ex#d@W3+7}$BX+7bD5OKyc(asDykw*2t06-bI1K8|IyqH!d`g3agM__k
z@CF`RVzPo8?O#UNK8i`<Do@+NcDkzTy~@V+H*sfN^#@)^XP&x0mN>Y__#z7=gOpNB
z(jcv1q2|<kcQeI>wYOxuz!wJiDnV>eNNRFlg?gF&#y3LwAtp0kG@T9hx8vs>xbYC!
zbbH#6qUa_`&_GTF7GTuVa~+OR7Ws2$_b#jtrAYkuj?r`|*HK(A*`ba%5iBd52-DLk
z6AJ5}N^LI^i5C>e{aSKuBXyT01g_VS2dN1T;18b%tJ3A37?Up|Srtz7aK6{f+gg^J
zP564V#tBE$a!DC-RO2?sYis-Lq*F0Y<ninrfrb9G@H=2EZd776f@ZN=Ig}goxTV@x
zxTH6PPqwSCWtr7s*${y+E>b%AgUoO;Z5*196%P$n-?UtGQSvNvhO((4m(&rDPE;dj
zJ8e?&%*B4Z_NnAI^OSEhi+BYGJDw`9C<T4y`anh2v?~Q>4xFWv{oiGr!cbmmT;a*z
zsza=XTT~?NIb4O=rB)7*SmBb6`fd#K<#P2sg<-7ipm!Z5ji_FjOF}pb1Lgr1NFeUm
zlG%iiAEC^a5J8npM$TmU#-ti6C!dqtskTrxr|AI096`r2Q~TG2)#F1OWBma`n`sd(
z3-gy%Qw}OKV;hvl8|H^u&_U*nS!2~>Qk!E~%1__f)avp|$!Rb}eA5bJ{8v75HCs1u
zv}a#&@eb-q$CuHOg_*0CAW3|4AX(&dn6es8?`a?A6;;<kzDm}JXpm=_hXvc#?ks;E
zO1TGR^YF7oxz2TPri9q36kFlT|H_V?V0YQp&wWZ4)LT5*M}obNkOog0W-<crly0IP
zt{)0xdoG5RoiBbE!X2$^KISQ!2^lNypzEJ%S>@HkxbF957AQ`Yj-CpR)RJ@o-=yF@
zZex)|AT!rh{U&(y%N($9jb)=|aSjt5v@~Io92pfI1k<w}L^UL(6ip#06D-0A(2eu^
zyxG^%=>9m|iX4+gG*ZVV&?)GIl>t~n7W`^U51nvtqL5Uy$i@!iY<b}%3RoONNgnFO
zixtGrm&l`d9LOHRFi(jS4&ylFGZrEx=GBRjGvgFriSVJZBi`z2--uXL3q?@VGEV<C
zAHbl*Fw-rZ;o1*g`z89fYFr8ho+x3#k$jrrXXv8WejKSnJRt6<I;Nh;Z?B=DA;fD;
zs!YfY6duh94O7CQ9^C|oGfHEdKTw9ZxqU-1t@;(mQHR5W{_w8ApCXiahsPIhCxAd<
zLtr02PFVh2Bz&SqR@F^BurZC`7o!xuJy3i@yo`fsO*J=CNwYz~Is61^7XvxDF+FbT
zMqte-3=q5xpm=a&+h<pAvvy7p|A{miZfN51;S5)wStdrMnUDj$O)wDYN$yJu8AMZ(
z6(wzvR@@f#ataO>il?&JB5JV&IU(U_JZd|Z{`*frn9zr_bZJn)`Uw3ZPA!<!YKVJ(
zff#JiUJ5Tz9;uWp`FF%+M`h>{{tu_khhDT!7c<#Shkrg5VbR`;8l{mqVXw^f>$DR)
z$MRPyXXtB3thF6w&V*Cwvkt(Yt8`|>Zc4>3ZvIt-CB0PRjr#?MEF8}$pir;f6OueT
z6&ferDvF_C1zuVAcp+PfPeq7~t-u*LGttwF^HWEo`8{j?7RZBnn2RK+Ifx%I5xV|B
zT;D}N+!r&X56YX*A*TDt5Y@e<m#i6NWl~%)&u*uI8=~+Q<3=XG#2Bz%g3%ry04yOD
zTWpvBP)C(SE!62>kS0E)#Ga7=3U%+VVoW3A-6LSW!2>>Fk>axYR~?UB+*Kx4-aEeT
zfQhRKyL4G5gJZA#Z-$rcES&A&Yl+pat4NdoGLpz;KNKKHa;?}?X55%M)z^)t+FDNc
zAG#WyB+z@-v6z7+pELA{bKUXGV6bzb@xLTQWD;pAx2rap5bbG~Zt3<%1%$lwvVePF
z0e5!F4x*Gzkn0`;f`L<jEL0hXr(O$f7%-xUVoloZORrzur5Du}vO=+`JwAfpRrP9g
z19pLYJxb7!DuhCo$0|5uXuox3&_c8n)~scimjGptrjlQlb=jnhh4UA8t$T?P;FrY{
zryxl+$XTft%IOE<YY+uttiU$2yX^cxFcGTK3gJacG*Q}-9F|ak^&~Z@$TF(+Vf<F`
zHPjQDB46T+jB1)lhBoWcHv5AI5`fl)N>-(uhW;XX+{fiPF3t4Rf8qYJ9R)IUG$wju
z=$AOW2n*6;A_%_j)6xVy-VrqUg%v_l(p=hMi7v-QPh-k%qX`>D5ppEqEcoc==P4I&
zN@3Epr{)agXE1n?XvQ8VD(yIr&TI5N{@MzLN=mju&5V_DBh{1oR#e5?`{Rl{-;L&n
z-;5m>5>q8(wj`Y7oPXG%jT2M*F-c7bqsHNn_mRX=BTpujrh-!YiAYE6p;DB_^;Y^H
zgNNqWvKYtffp2ny*eJ<!*<DG(_D{{F|Gv4E14Sx0P=JONO~%q?c<N?kk=BWv4q$VD
zgbCeo)l;HcR$$cAsMzc1LsRLH^Ws`ze0;+YO#iC^yct}JyGN7@;%FE(%R5MB<?vEi
z-gs<C#lmZ$2w%8;g~S@?)Oq7`+k6*+=aXOyCv{Lkw^cK_Az^pDB=lV5!(~6QatHs!
z8*lx&2$8%ZMoFNXNvozfgQ@*z`>ljm@d-EUdeE4z&owh(o7A(a8L+@yPyKfOr{9HC
z>LJ%j)q&-w(Mgqq+2{c_5#`VDT4Es~Rrx=k#Rm_V2BD+bNE1atnITc=W+Q)ms*ZQp
zlkoIlf_Sq*bCL1g*mSTEi-Wvhh~;DBz?V123Au%aBQ(ch&(}x$!(;8lx7_>b$@Fuk
zhxq&Co(=W&>Cat0uJaJIn#RYRgr@`5^30yVlNR5r>n1p6od@eCO!57nJDPL&{kqrc
zEzHc&C#g?Go;=f~^`03|yUmY{fKXmOk0eRkll~BsAei~+LsWF!P&srC=0+ouUBLjg
zydav&S?Od6<v{uEnj+15QN)a7HGL0nB=dIvDq>C9Ac6*|WI6wIsBkQ*hM2%}vn=h_
zxicId(^x-~QwGYIHH`*m)j*{lfQGyx6Er?(bmyOm`hXL6`PgLEN^x|3yG3tstHlt>
zulW(lr5(6y2q`M)wx6QoDaX&zGtOko1Ht(26OU3(liYX@fC8HWjVrnC$&~@fY^2RR
zzZLmO!G8399l5zD&l93xa05G@Xiq2e7QIf)(G<VQ*w=Xc^#kT|nCa<N*5<0HMGO`o
zTF0P?qi}T)X_0TM%f*I67~J=NWt0=v!zd%pVtw#vD2)cJ^5wuuoNQ|6n|5WcAktmX
zM&%$fS*WbeI=f1oYTUM7KOQ6Zjs8ItVu<zf$}(C76sg`5P*(AM23vsjECMdZD}^)`
zN3m$~>vb$Xo910Mmq;RW(*l+sAzSYk!zv6M#Yh8%Mh%9O@!*e((=H6mPUCrdYU{$S
zE7oRkB&`k%N;kmOtRPCBGiLQ1@fR@d=3@eJC1fecfy&YWK+5Q`(C0Vaf+ey7a<Fur
zcuE#Xl*;M?;d+nxuy*3?Wy`P?D(@Tce{YT%@Y(?7S-5$6o2s~F`SCgBaIiOt+`l<*
z@;E=nfiD&@b{zR3nLOA+RZ9kLkWZcQDpjqIve$Cdy?JrBmIhp4BO!M<Wdu8AF)0=d
zgGA+~)t+M|6E^^6cu!{?Zds9Y<;-u>uRY>H)icEBc{T!j&7H^fW7nNx)d@w!_x643
z9F0#)t!*zBaursb6wGT?d0A6caquyhBi|_9veEKQ?OZ0optzt(E{C)U^h=~mJ|Sh&
z^qI(v<k}z8!DA(Etg#eY;jyTgfN@lHTzLlrzb5qd*GZ+eNc#0!Us(K1ayTV$HfI)&
z6lKxjHD=RES%XK^BlF7z`cT*PxZkab{=)9qW>RDvk`&hfUMq3~z>Nmga6`kK(kI)3
zc9k#5mQL*Jy|s_6zVu$shqge5nrOxG8?{Fx(FM_P<YtZ|V)q-0YA*o|f6Vs-Ri#8l
zhMX67o59QNGwA52!f3%@w`86wu2|&2kR718OZVDrD<F!c^*M_o*dw?S6;diQAS1eN
zx7^7(qAqo0U4z_E0X?KgHI!#vSmN5-G6wH8NOm8RbL<O_6<0COGf_ii;YhYw8Hjsk
zS=AbuzIsj*N|^S@N2~Mt;t@G`_$}zgZqZ6cDH;;zad+b;cy46&S?>fokb*>XqKz`Z
z-&a@B%2IUWy6GwADn;%F?3Y{-Z2Dw^<+Ps#)u%&Jux?I2=r>M>&%cjpEbSc4gFC}9
zW1MXy;anN9-P~1iT_G)o@)Wy;WLxYbv&r+*OV}2403DJsI3Q<O0U)giA})n(Vxq2q
z4a-!bt+BU`Y185VE5*LSqqo0-M|;7GvqvWSxqNM14(8?)zy9h!^ue22D|qYXxA*4O
zDxe)79+HV{e-?smvkKkp_;+~BZnp^>n-?%`Ea1BdIY*zjnE-RoF>5M4QDSh!nL(HV
zHXDZi?sm1;EM76A-*F?;e(>6kEmlIiK!(jeNx&7Ps9dk#AI?dtPV6!v)<g7$Q=F{i
zGjeHz65+fWZw%hliahs3!5R(7m^1|Y_pnn!7c(mh^}s=kDr~B;|6P-hJh5wK2hjQT
z!E73o_w&~Q8pA?Tu<~F~Pv(LdkaWS1<!KGU=V*xYvxCiT4ltTywm8b-W+;Tgmmh+|
ziq`J1T>Be!FV<u5z2DQ<6T0?vBVow;fv*AL-F+ZGH+2Q{FXT46kw!E?Q#<P<5*j#a
zns=7JQZ#AP+(#|bV`se0^i$a@flII1c@R6?KW}6cv_KH|2M(a#JI~Aq3H~K>0xEHi
z1heYPu&$6_98J<_tSq+H+HT8uoX#^}7vVTrhSR0=Q^Nl8;AXFP?au(%J)@4hGnD_q
zK{@#tv{ylxZhv$QbXevD#1Tn+!vrQggNc`VaYJziUg>=Sy1v}!yl6ZbaDDz~Q^ZJv
zKKP*IWSX^$l6W#Zw9l<JBg^^X_nHad=>yPtl17k|g*6TSnHlt!X<f+*yx!1YQL^P<
zP{Io#2DWy>i6iR$oNNXAho)a@RGT5++Rbe1c#VEe_GJ@Y?XlBsPFDXKB%XVr_EuCW
zT7aKCT`SA8=XDN#j$d7V{!Ju}?_1ou=8#rJvcm^ETOJe|4YmvM2xDZ7i0~=@Wo3?D
zWpsK7k$LjJJjwGfPZILWlYIV{Cn+)L^*P^iD?~>W$E~7iZGdQsd|plk0O7PMzhmU>
zF?;D|7Rk+4Z1*^AQ{AW;Uh*G(YOv@NlgtfRS~~SWaU~@PK0{Z*rYbb;Oq%SxnF%<_
zqUxCO+r9ednlZ<L9@^4(vkW;Ee2~?uPH(nNIo?fB_+rJ#9ogiy(vn^67M-zybGrKm
z&>u6O;z#3V*R;pw|Lvr8j$)YQq<0R$%)&DjT!I#39ZN1qSvT69mkX7@K2(~#GzzJF
z6&ZyIP!Y6cMA^Ia-Zh0#9VMg1e+mr@LEBFTtqulkj`*9j!0BT`lW1Co2v9u0o{w~U
zD9LQctt1ULHglxdFs#?Bw(WPEl8b5qXTuwns=C;5V7HYUD9yGw&IZeqxsDx+Xz)}a
zbsIYi;In|JLgp!=@&3^V#cyf)33oi(9(Hcr*K+pldmAkh()BF^g%rk04J?7$J<QAC
zd?ISS-RtSL{&Km>O~<u>#@`sw|3Lc|d_p8NpO*??l6!!?o1GY#Jpw%<WQ6u|$LPq6
zcetBCe&zpV*kBBu(M!26D{%U$VA3CCuq|U8BZo0ze$2cK;4MqJ44=kEGsa{FdE%wB
zv>P#x=pL=~F2|fFg&1A!CV5Dlz`?V2^2o9CF@ihWDF*zFziRlW@6dZ>fcaXQf$JCo
z%xp?<RxP`L3R^))1SQ521KpUrpCeJnDEf>6<L`Ct)lrAHz7d#Vk#rSz=jpc*O8;p|
z!IJph(_2%MfT%E)FYwP%Oi&txG0s!v!ggpIar9$UX`u|7E%n-X$m=;98*3c4Mf{68
zl*I5#IF{tiH)s?^pt6WNjd*p0`dbYdqpw!n`Z3;8QPJ5QcS6q-SvZH_jh5JqRg9!M
z=dU?DCm)wOk))J4c3!xHx1As6^4A0!+dyL#&qJ-?ZuH<W*dudU0m_b<lx)3Jt4gBE
z%~d1zaJ0eSdUe&8ykJ?`*G^~}d{wU;KD;RZ9wn=80<j-naUP>(s3I!rD4rL~I<%MK
zFF)A5hGOz!9#F1lDKAh*EK(bG99ZR+sS!dj%uv(?S^mZV)9SMrI*|o@nkhWA!8vY>
zjvObt3^Q0%a7e5qgS^mNsZ|XxsV-zv>Zq!IKp{1&_<kGeAG-3tkAxdFVN-E%L=@cq
zWy6+h+fwR%OZgXIb!Z^yD<4J}O*NTJw~0VV>z3<o>ePo6OhvI>iMw`0gNKP;k}4}8
zwKLREa_T)YGNWOInx^&))@v;@vE_+nj)SyCgY0ae1K?%$rW8_ysAfCUDOBSZ`joi*
zd<<UPB;JPHVt*!7!31;_{e-(hQFOiBzj&`@JOD)`#mN~JuEN1l$Ca(1HdJQU&YW_a
z{3$mYd2-|7vABzl2wc<)8{>s%TJY$WCBgF9+?B+NI>E$}6k@w_3$2&?8cV674FiX8
z?3Aa|6Gei;DyO$7){9`<e_9lfcETlPD#NiR6s|dBw5KNlRCqxvxcQXclpfj9bGS>E
z%jrSWwF2Tha_6^_g-KPP13;k%!INr$5AP(*c<mhzut~ee^^wp}9K^MRjjh`V(##Jf
zoQ2J(d7Ko<qAHc>nNHCXXv&J+MX<zD^|UxQ>#fyxD0@%eemwyb|3FZylbl}xSYh4A
zvG4+5Q~>E5)l8y1tlPCM@5cf^3iFL9!HsfyjIA8m@g=ED^mytXs5DVvEcDaN)$5Q4
zmW<b@f?8g0#}BHxCZ@{=a7fUuIoS-=Sxe}gWHX(?+*GiG{ZvxQ&XC3CB{!V#LrK_I
zTd|R-)0{AcP@%BRQ^fUhck5b&4eqJ7)b<+-04H(WR}FokKsYmP@nJx`sc1&DV`oY#
z(2EDM<Y;#@^sxpxdfp-ke&e;l0Mkv^Ki&M(sL}rbIdX#g$4b$X2k3FV?IjYiNNELw
z<_Z&i?I;1O%!w@}m1ORTh_0DeX%LE3gp8yYW~M+gG<rK!z|a$0?q+Oa>aZM%3aK*t
z|LyC&W5DmU7ZTNOkw<5R7^FxHC{oJ-%4Gt{vcZDbD|X_-)Lgcs33qQxQ#lzY_rOkY
z*lq`y_(~h~%bCbfv~<wh8X<KZw&ZQw!)k<mT6RIQ<AxxcaF-!Wz?s)A4pCJ(htl}6
zQ>3%S6w&Qy`TE*@<}I)^_g_ff;}u#*j8hJb!f+(iLmCzaaiMiyq8`*Gg{YD*ixVqd
zd@xWoeWYL=&ztyml2Kv?kEM*JJdh$E?}qq@z`4t4ztuYz!R%I39Yny8Xw4y@+5`6O
z-1Yo{PQDxKh%_opx|C~Td>Ex&9#?h3EBr(K2s-{Eb*%%<7{mGQ!X_QQt$jLBKu=r!
zxV8+itK$%zg|h>75VlQ>Bn8`bpj@<nK5Uduz9n!H4gti`6#kv@+un5^hYc>yK>413
zX-Q?3HB4IpPn9iHZ;TgY;<68O`*$ijEwZ^}-6EqGbOes5mHaXuCcg^73B&TO5lCGW
za(PQ#&%1G-1l!+s=dPOQVH*laTmoPvNCG=P+fq?%n}n7)weXsT)@~lZ*uTJZML~ml
ziolq}PqTPr?)Q1w3ZEcM-CVG@Qg5$vP(#Ea%Mdv75*ctY%^#Q6&yoPnfrl)>Tad1G
zzmfcd9K7D#F4ina{{TajM#;^XE=m>=xK}g%!$^K7MfaUTE|*R6s++9Z{eI5$GqIEO
zQ`G!P`KF6hNBD8jx+5?!r8;NKd?w~=nw$!NE6sT$)hswrj5an!89&s2SHcuLDS*gG
zOfCL(0F3Ah^2&X$;J!L+(kzpvhoajkAx1*-K6y;ZOryK>a-sRdpxKotcLh&V48PS&
zP--^9L4g+i3?e4gZ?WxY(Z4uDBuwY8abPSdFYJFr-g~9*nW`S3ho!Ksj%@(D=9C?Y
z#tbd-lI$j)%Trr)mPPYB+rlc0S?#9@`g8&z5%`XaXbThLWs3GP!MO|NE^KD1+<wI8
z;B?J@^QcVBo!MvDmaeF=OAb^?JfG8x39VI7%8tZKA34lcX9Kz{PyssPw;3p9b3<>^
z^V5K@KyC*6ni%7lG;<VB1YJX+G%5j`nP~m_WC>$GHr~B0#ez8t7HZZPr&p-W{u$0h
zOEW_A1&a{z*$(}ga6<Sw;zsbr1PX<DedVCvGc^)@wJjE{6*w|25_co83FHey+1m@5
z%^5(8h160Kt+;jhLDS&FixPvM!O=T^QuV*efv))h{P)gBDEQMk`}tsSzagXrAIv-L
z5j8tgFs7K;DWE8c!J@GJ)EDnnBqiUz1fZ#6Q(DFq6@o}OLO}sF$QVCgF{z^y6k`@J
z!cN}-iRX8f%`U5{ao3rw7|R@sSItjTAteM~1o8-thz>y#c`A#mb@fygO)#3T#)aQa
zX-K^(d<*tHE>20BlwJndIb6{hw3If|Q)QOwjKyw`+(38)Kzyk-B*&boIKIC#s?2rH
zpB%<*|I-Oyd1xr2GqiluG$60j)<$^U!g9k8!@x%zc(5P0kCWbgC)-!`CQWpkCcMiO
z)_XGdnI=pC9@cAWH1#30{Q0|}W-6Tx0DzhTJ8;Ce<H&&_KlvfzpB^AQ__QXl_y1NI
z?-%ABy_FUa`RAKKnk7_~fQwQ=$`W5Ckt3ZVprT3{Yo)7F_Y|LYd*1I!e7y>)0o<~2
z!0OSFgyUMaD87tqExNk+p}tbv$s;>t>!->z(87KojLSEco*5k8rwOThoW4(9GiRBx
zkANk$rB^<)=-;_qRToljp)ex06nH;txG1#-slwj5NQH$Sa(2BW&@&HgV`gy^F6evP
zZ)G2$I3R@as;W8RFny9O(e7`cQ%3i$i2F#nMDycS-;4f7eLq&+0-Bd|czy`L&r#IW
zFv}vC{1VU;aTPXhW`ef@a`cu@O?zoB;#B_+?!|Shcw$2Gi!Gc{iglr!L8PdreE~e|
zd|*n+U&BD~cG$nnepM~mM<c<ddluAE45UU9Tye!TCJvMZpyFv{`J5qp0p=~ehC>%W
zo#o<|gl_qErg_m$HP_{H1XfQ(yG0z%O^Nb8S1&WQ@~5t~cSa)GDu+6fPlGx794vX1
z7tUK38!ZSd*QGrfpWBRDqiRY_Davs(eB=Y)BB)g=%PQ2x!GsY6zJxN`l0p$>aSlxA
zi)T{h$zE;W<mDAxqP5@xYgTwv%SnW<L^lP4{a!)2Vw~13K#T`*8N_Y0+DS5*EIRL;
zhDKh+HVSIh7=sd)n-Y}=1PkrFA*ZNJfvC_avJ&W5$);w{5#vi5v$lwN3iOGJ2myhS
z)QlI1kyu7coymMpN5*&HpVf0Ws1JBV5ET4fqBoY0-OcflJ%Tr?r(fgY(1TvqhRFTh
z|Ll<D*K-Kkbm19(kMG;lY!p))uL##o4uJ$rL%;qZWFB@`MEibNCsJ*V#Ox{zh?H=P
z-dC(33YmXuYYb;TZ<4&|tRgz~ivRsXrjn1@><jag?4;W95P#K+N56GBX$59hfhsRa
zAt{~@&{)>#8FevqyjE^~O7&W^v;sSmbpm<;MoW1MP+8UMk|sCi7*=!^B(<C8Nmg$+
zusQ5It=TG?&6$X70}b?fO2SPPWO1~5f!>V(|7XVY?V?}Id#U$26Hf&%2i&9(Krj1s
zCd}nN?Zc3Qx*8dSJA$D}pOPOKYx4rDE_>qP|MGx<Uxs49L?rUuf&_q_zazWdlv9xj
z*+$azR$R2{NMV<_&Il?jyCS;0P<~X%ivle>Knc41XUZNxds&aDI*Ay6bV<E9dZaP$
z{a7^$lF*+$ZZZtSld*IdGGk4zX>GN<6@BU0AB$C$o8?d5e;UA!*U_;>g>;^Jq4ONK
z5>a<n&7anq+w12%X3!{E$l1wqJ%7;(6dDzV#DkXaap`aN9XMiYj=nh6b)+AvJ6&7G
z`PO{zd%jHJerC`%;4crKfv28-w7$<TYAmyVOs^<Azb}8@C_nl1Ve4>6xDE-_2bg{^
z4n0>lYQSlBRO8gbs-EmLfz(fNZKOc$5zO&Lwep$IwebVd9f}6Y$r<+VkrtpU<BfXh
ztoP$^irM~A;x<rng%XtpiiXy>aho7Jvq~@R&#wVcX214=MF4gH#U68F6@1M3tCDf1
zj`br%DA)?VcpfT@sjdPSax6^!BcaY3t*szzfGyTHCBM;^Q6~^n3)<t3xkeZx;#W~2
zE+x{Ey6lZq&;vXT_8L~8h{p_>fj|xrmvW1d_>AH)VhgYyV+2U%Esd+C=67C?GLU%3
znCp)$l7K;>Xn_=vi9w=?3oy_KAiNiuAL=qD1dn+C7s?oC<25UErnZqS0gY2MA#879
z$J*TomjVodoWKi<8;brv=-b%?JJALA{~3LIp0*8tzh>p$;^8*q=HlE80FODB34Mdm
zMcG|}H#qtD3dk~1yu=M1zW7pw6^D)IE4{&EbxEgBaB0!OwPthCP5ghbZ_w<1xU9|5
zI!o|_-*bK6I*wxeLr?}CbDJK@ww>E+)q>x#C953UYdk1ZDNm|3o#EGATU#8x{aj66
z&A>&Ke4b4K9QHgdzY1c_5H-RFb7(AhsMm6u^bamg*w5OBFF2(VpIaeyvh_`OM-ac=
zqSfl2>Yi0|G+}$m3jQ7|T-?jfVL>4&=9p|2c<d0d5U%~9?QPa3XU7wOnyd^0itcqw
zg#cU@*WFcBzDmXASg=MFM_4{SE(_Mut=s9B-CeVv+Fy3ZU)Bd2_V+$Qi`EvqUE0;O
z%=6oQ@TpYIz|Q>Yo!)K0Gw`)CWliU+P%>ssRYF`CMi5bMW~z#p*PU)bKoiSw-P;33
z+pOtaq^Gr_$iV#RsW%&(MjpaV!#@5FcOzhp=~!*X=dyEdbmto*urzHju*A7*u?0ct
z))z%qjyPY?FQ1A828C6C?UqqP<&VUC0>Z=J9O_(kg$VY&n>Ks<CSwj1yygd3ep2Y9
zL_xm*+{v8n0FyoInd{=a)6Np`9mkn;%{@H(QiiI)$vZcCzD&_hiVv;><xoSQ(gn8l
z&sdd(t^lcXc)>W((E8SfEe`tL-5&f0z0)H03pS1Z4E9dS;7o(|sNk#C3x=p;O^taV
z9b$G6eG&Yv?e#?%?441OG^BJ}Qtt!=)Qdpyb#tC#b?zWR=q|^C*Pk^u@Uhu1c{5|3
zM2IaO>yZ9|=c4K`|2a3wMz8_Je4yHze9>_sv@YW|Voh*QTg{q_oj#<7ay;v+u6;2o
z=cSXM*N}~mpI7jyvit<{Ow);cT{IETO`62oQ?Gtn!zrYjr7*25f8*tI`;SKlfF`D7
zmd9`6Pf=W>aRgpm=}=YvAKIcI?h81(%2B?8MZ1#u_}WShEg#!58XvhRPRR%=rmMu_
zQw3Ab{q&-vg`A)C4_mnO&T|?T@;i15ofZL=1^AL|;D#ArYTcT}?)zz9xo(YWZ{D9T
z*gIbfY6C8;UtQ;o?5sgO?XujNXVc7%Cfiko%OC9Tl4mE@&)go`X1NClkYh6KfnF=_
z^&Rj$Cc@N05Xo?g%8DsbC=(vyY6#1{-q&$J)M0SQ&~^?YYT*G2Qq(x<;cn%4EJ?1t
z<qU6LgZy$1P+V>%@2qvTc+H^lb{j$4_LnEfY!Xa)K)Cs%BxOx-4x?%X(DmB{^gjiP
zl|UEW>TGvTx0Iksdz%+6dIILq+mJUi^q3f*{epw_E`rclQUV5hn%JNbEn|mC@tm^|
zrc6s0<^b>%c%)j8dzH**(S{~^qU%P7$-U&JLhLb39lWy&5Ev1Q23q1*DZBbYh(QVC
z8P{ivf;&;`1$U*S)}mr9#HWtki3pn72KH1T+)Gtlw9s4?-L>%bR~bRTXRUSRn)<qJ
zcD2^QwRt~7bD`NWG;~V_kWwCagjaQKzY<pkzw8bSdi5x7OAAGBD`9k83ydu1F=^4|
z3-N&R9)`8n@8U&Rs!(s|a{mRq0zAih3;f(>lZ0-!NxIxi2fcIq+;hT?ss5LJlj`*#
z?pkbM>Jh`MrXY~!4%b#-a?ndBb4Fss6^as<mtt?O`VsRAuG1V<=Q!i}#)iFHu?jYE
zQskAZ*#YMUv0cx<e;O=My7Y~6R6PQLjL?0&7mWN~elo$LX7o*GZ-iG2u^Vl$3YGh$
z^?<b|-r@r8V{O`Qn+*L4Qf2!b73DAfX1@aIR;tslUuq`Yg3<44^r6<UP}?X`liMX$
zN3a3m&{Nc*m9~UH8Utirre;8j(;F(dcG*l>q@~wCBEAP?tQXIe<Md)e>${X4*2i@j
zQ8ak3di!TVFH8Z&<SsBFA<X+UyViC&&aQgTt#7;Q@Oe0ky9q+<Ixb+rx8sO*Ti{-I
z8SLf>uwmuv#!{Ftm?e{(H3;L*YfXzRR)`~WDc8bvp2Es%ZlU3M4iAlVsgxuXE-bfV
zD$#aR*pO!V!2<l~me+Hfp9|HJWbFF3iDjXb?V1jZ-oLY?(fjG=iby0Fi^x!tbX(mC
z28VT$U@>;|niFz?EBYhF0u;HgX20r>mHR_2TV8?WbZ^Fd+{||s6vM{A4+~Cfu1MMh
zpn26pOAm+OD~JXY7^@uuK~Y*(1_vCD`#0(|$&DMJ{4Q;gNGn>&n>TJ|K~DZ^Ym$Fg
zJS(rEj;Rb;YUX=fL1=+f=;Bhmjg#0EcAqrwDxy7At@wl7QjPn_*GEv{h|dfw-yLQU
zVU0cvUGUPF?q@8K3zD}LA|Oq1%8c9dJdfFl&g8IkbOAGo{81Pdo;$X;?a^Hm0dy3i
zQB8@WB}#k9=e>`Y@%S^;UU1#1rA4zAhl&qU4~Dk%Q2(6RN~8R7+uNVH4>a))8yoJW
zN`_nSNI$4d{fq?5)EE;tFp5f*Mngml$RO+3UqsnTp=AX>$5~N^g>q<dMd@Oh@W9pH
zNfF=L9)adx^mOPYm~n5jY>QRCG{A;nR(``?K9dd$dZl{IgpOsbHZct#fr)WLeX6r@
zh|hzAuoR%LLoyQ2q3|7aBnFXG)Q2hXvA+rP(y$qG*z!C<3;~IH<$ElG&&P#RMgm->
zu4r*kKbq?CRo~pxo*p)QnXC$albD^r^eMW6l_{PGil(*U4MrS^6^ITau@ismA<VR$
z<+6^;EBju0rVfI6EgqGWa-nJVG42*esnzpdk)nU@T(x63eiU27rw@-E;oWQDWqOK$
z*WD{KR@8~0c^<IHex`Y>Y*Ixxdql{fK(Ny$R?+moXm-r3QOK4$0a>0H?jA2K*f*=;
z?e&;R95oFE?1c<nc*Nzkp1A(^_I5~i%AM*`gUxe}-%mUttuMNk0YFh)-I_Q7FS2aY
z5n@0E7!@IPM);J%aYXQ0%X<c6TB1tI)eEV)tY)!Oc#Fdrkl*EU2z)0?a+^`v#G1W5
zT&+U&9(_7&!Nw|zrM|?HONT*;O-o1V!Wu^<O>H_&8AB~XpS5;1YL``h3|KxkVXr@L
z`OEqj<U3a-SXd#c>Aw5}sc8D7-ATzSx>c@b1R_IPFa2ofY0>_JcmBI%Tkz((=v?^$
zmCy=>MhLI*{EZPcts3sdliRUDCRk%^(Fg)K+IvwI0fmtt3M9|)_hb;rVd|eu*y6*>
z0B{X@g02ouq73AFC5q#zeIV=l>tpj9(`TKRx?KS(Ty|{S$B&3WtO*e-_X4?A{j2BV
z+cg_44N6fDshDXs+ame8PV>=q!A{-2G4uwF`E`&Blr=>lkl=b{$v9UxtB+X?#kt;2
z2Na29#ty47WcEtyY;y21H(AwF8C>!Z%ruB(aS8^}J26W}C?gfV{o$ZT-qdQQn8-hq
zUQDLX#R6@0k|jMDS3Eu*?-4s6GHV~E49V=ASxyjsvg1t_fAHWFQS;*X_5Kbdz9dQl
z$<ZRfo8%Rh{*wU0Ux|Oi<N@)@*A!sCjd~&|h;k|N4?$OpXQPLS6g~8IfvUX`-x}wU
z$f3Y9+`qnKcT;&N($kcw_cJ%DskOHM6@tJzcSvB#$}|+G{B;lZlf4x9p&h-^d}N^{
zkDt8>4dqq45*U00WkW|?i|B8^r^AyLWj#ii+DE)Wrl(@s<?+2P{Q?<rWQP?0b$6ZP
zho0fFEe2wo32|D~**9ujbVPJE7~fm&9tU%6mSm8oW}F$yEzranu7A0_xPZJxJy+w1
zW112(+1kqp&ho)>7r4T`bm=;FFdL;6k|T3qT$7fNMVifb&9;6xkbsg>16CmyqMUEh
z70Bm*y_(H7kRO~Gm70;2acyb0pBpl*z9YEY$z%+YBO00@a%YTBWzUy$bSCgx_gP+M
z)PN@<;X!O=4?u$<ImbKn1|Wq|wBaJ}RlM;dMFZiD*B2<(wM-n+aTH1Z8SB7GXcMnB
zj%I}BF9+0yh-Jv~Coof?w*=$R)(i5F&IYSJA|cd=Br$CrXRk8_Ye<s9tuaD)s?tIJ
z)jxF9t}Rg3^463I0dBXGCdMvrQ^`G3f?Eik*HTXFfd&x;UZGeim3p~e1j!2;EEU+u
zwmG!eYzFYN50mI|B1EIIMkpJZn`cC+zG9dXj30><AJDGhWhYnyXAGkSYD%?Tlt<zZ
zd<xp~_)t|mr*099Z;re-PFW#fbcgmx8mnE<YsL8-s5;WO5P83jvNE>;Tsm>zRZU8j
zkt=V*DQVMz!lBND1y4}rL2aOq3mjwwm?ieZR)+Z@I}J~hbXz;it^m!TZy!MGCwp(>
zvv#uXc5{8=mCxY!q}U_vON8t@K)2;XnZ|%hSm-(*@WX0kv3}mi740~3i<PXu!&Xs6
znk0?usH`REEP9b;mA+9W6{+E1xZnzHRhauP?#Zz_gdU0aNNPzSF;T|v#)pD4qNz$1
zx7Df-zALfrq>TIXJ?VR$%^U+Db<=v-ro0!pWUw`aT}XHz%22K?K1x<+wHKTbRvTh2
z>ju#*Fqjvkd7)?CeQO=7r|+R(&=M0ulfVXMDcP6PI~X&xvMqD48<x>J{GH%ddxD^q
zA^~4RZ?kSM%yOQ~X&V!A|8;yQar$mrsY?sVl=cDCJ;hp>448bHBol2=&3Mu}jRVN~
zX-r~8`C0KvSM3ACPxIMZ#kW(cGWklcZq$u+3>v;(Mt`Y3{@CEz{^8l!_UB__`Z=!s
zso`;Nudlyc|5@q&_IP9Zx$*7z;@jfyROaojCFd@WxWe#`q<;D~H~!VBSOcAK8T#dQ
zZe<TanJe|?dIpZ=hM9P-w(&d49QG?*cagX3vz&wKNT2R5<FtHiovK(VyY3^7GT6P-
zj0O;L`1Dx98*+lz+d7!i2L{%Fe<>Cb9Gu*qB}Yv0F5b!85z^l<&jge2i+D12hvrGn
z1SfQ>@%im6Ilft8k>4TLVF!pW<AqhRqDWT0yjeB{=>A0gMoFLD%f$wf@3a@MBdSUl
z{F-ztb^BW^LVAuuJK;$KrG0U<bOSCou7(Z1((#miS7PoH3GoJkd$0?5XcGZ@zOct^
z#FlJOvZBkcWSt$7_UHXNXLV<;SMFRL&vt`Y_XhLug^K&wHn?V@bsD2x;j*t-Tor^^
zE;^zH@HI65xHf+;z_k6-Ge-)tU$D94A$Lw>=eUa8&dPoo9<m)bXrsZq`{8@JYeC@K
z%h8oDp>%bP-Ed@>aHzI4hig!rr)Zr9$y6J)a>Ww58eyTZD~86$71o-zvZ6b}tnK}w
zegU8h^B4|ycpI*C?c|-;C|(w$p7*XZ5{mC;12EQW!<oe`$@N5+usy|S-|dppnj0cZ
z0cqAdK6Ns;JuiyfrAIa=UTCr1l5^MON2-mkY({UQN*CYnTMdtV))z#u>mJJ#-!n{9
zg-UkkA(YRJ2WP3Gx936+bc)_pcwh8a6?-fngBUw+tx@uv<!-8>TsjF?5pI(W-S@b%
zj14k8V`C5yhE-CT^9k654#$9sktq&MM4F3%vG2l2I^yqBqSFhy+%|9#KVH1(V^6Hc
zQ_K7NNGC`K=K?D_xby7Jhezy-<fx{))cRMLvQYt(ThxjA%=Mt6R4S4b2>|3wkX&eU
zuLv7L;e8IkpHwzKK>EGl856b{n%y??Ap5c?&IbFCNkEx1pcH~}d}IR0ANtLqa2`9{
zz|gWnDY*bOWYBIFyUsE+i}Cug96fh-&Mc6F@NcX1nA83Y?q80M>PJ1P(|z>QVA)9U
zIZHh8(VFnvBV%N&jf#BBf%kKCO-%?fAR#D?YcLg?Ndy*i5aorDZ0@M(kSoQoZNaML
zwxy`*zim#s=nk=#=$AwN4x;Sa#mJkFN0YGTo){>RUF}y^xhM<IOp63$Ide~|f5#sT
z!=6~PUOictFNDnu^U~sa$vRlv;9!%9doP_1^6Bw6*he;khoI0=`Kll1tUg;8JhoOz
z#XhqFmY?A}o>ro&402JzU?h!)#2N6j+M;>jGupy<G(mINx_8#-isRi)m5?SHT&LE_
zTD<Pf{W_Thi9<%H#W?E0j{lyIHX7n+GDMdQnhnqYgSAz~NML>&&7jX7k)B@>QHP+c
zNxNBzubXn4N$RJ!MQW1SyHBbRcQ*1Bj}o%o(qaeFHcYex>&Xxrb{P#gwbCXd*cnK$
z`{#!IM|9RN&L5??&VIq0Aq{JjTG^dUj^od$BINkk<-dXP3WCjAF}AA<qA~O?G#Ud~
z{}m&7XENw+=#N&b(2qmh84=EE%n|n%rKD#~!bw^kQNqAAM#Ge;@k+RfVF7bVVg+{f
zno)m`Kj28LFerpRCc^Xw0=b1(Ak%TMWR@|)V-Xd?R>V`tj_Bk_90I^iCV*h=g1QEZ
zT{cE?c7HbW31Oh**u()2Y6I~0N({$RvBZeY4UQ19X^q(7xECBWFeQw|%L<7t@d8+J
z@Q3q(Mj|1ykVYqM>9G(ok0AGl^8Wq6n@fD*jJql=>k>wNBi?|kgVq`1chtpfy}>uJ
zE)7M7HC268<GM<?oixvbEjeuxiLy5>gVNkblujy4Rys8CP{(;b(LQt;h<^2{O`~O{
zp&3TZddv#eGe`mb2T(w-zw5CdzjOJ4P_|WYfYC6qfTXc_KEUL0{pqsj&Hw84O-7gZ
z=GJ_A&6mWpOeJ*&oQ%eUo}34x&-3f^{CcqY<@3wVG0{rCv~rh$KSaLAyN!I^XWHF`
zC<)0LN2dRCJ5UcIlU5X&*JuT)b?UYl!G=7kt>!jYY4-L`Txt5tlJrGsWi3zIirA=v
zu=7TGx$=xXN1IQ`z*8RDN!YP?NpSU)8YddoJmJsLB9X9E+kNpwnSPqJ7FOVqyizS(
z594<bcvY263o3JpMwA()#ELhW4(a^1&_9sBciYZadJX5bx6T6ypdXAb;<K?yT(&*5
z$>8jDN*?{PK8rjSu<Dpo#$$n_8jedNJIHY|c^OKM33X7wvC>^u=3eG0Y7NVV145n|
z9+S0VTJ|b7SI~aYn^=trNx+ypf!ZzKOJVqVxJZ6Vs$~9iD4<o`-!IzZa05o2WCLR8
zKdsEps0j8ytlV81)lZ+K3@*w*eTmB!pFk*;r(P-(;Scm){;8mIyPT43zPWcb&m>H$
z8m-Ok$VT%tkB_a#yKUHHj{FD5n`Zo6-yR(o6FdS(;_-!(Q6j{#6XIBf;0(p%zRcKq
zdMuy5Gfl{w%48UwwwS7@Xp@9}KTMK@)ys~{*!M&6_?cda*o9o8k6NbWqo{*=o1Nc^
z*2DM7Q9H{F7mw}P;)ZTw|3x6Q*=3M@T5vo7bS*$4mn|kgkuuhLtpr3vssRBHq7HtS
zDi(O69Yf6H*nZqtD;UFl-lf(G7J;ocdMe8-kJJaH9Ckr&Mvh8sE%E+TsuYuNI+oy?
z`Sv*VElIhVZv$~zia1Jp5r~lh@^N2-42q5hO|K=Cs$N<XkF7HoT}&=5;gzIwuUNoK
z-`CppLWu};TV$L}Y)AF>SYBn1^030ln+KOi_^xsY-CQ_B2O)0Bb#0T9lWZ+#qR1_l
zS4C4QxR3#qjdtMN&~syTmHc1LSQCq)hND)0r&U1?XL5_>+m533{MbA{;^$akr&^_g
zMpdDMv_C%-Qw~R@cO^ZoXDTmz=Q2mg_hlSJ(~?>V$9BqctjltoRTev?l5Taqd}x<)
z6Kb(mTJXL;kqv8B9To4%f|LA5c`>hs#579h<N$sg9MpF|(g_&W)9onaaOfZb#`-$i
zP2hhUf65Q%{m3%a&=bu8C@H&eB&k{+pG91og<5+WK7g~w5C@T_car{Ht!~^)V6YJN
zlNwh`O9;^pSwz>OQiF4RL;CDX-7>jz4fu+_Z0_FA;OmGe@NkjTH?y=J@@i3#Kr~i0
z5$Z<oc;<wOM2C&<U+*Y(4%Ef;-!UEo4#mNcG#(UI=VXsHaXET-s!paUElH&@iwjOf
zxd}m=Vf?E6T1*5#wu2wL!H<jj!0GP7DwzA!gbr?4DpK&JiJOkdA`k222fk+M1FcS8
zhCW8Cu`uWgV=;_640cI;$i%BMcoz+qCa@z7D`#C8!Ue}m5W`0n#*QI1@8KO}batke
z^CAWHVTp7C2_z!9Mv{me-VoTtOy{i)^_M8k!~(8B%d9viq}Nn99TK)Tg;@ryp+^fF
z@+cX5Q=G);-F$tkm<>%SR-hEpoawPLW-6GS$OWuW+l%`&tH5nt<XhwDXZCoFRAKEI
z$e$gbR!?G!$bLN7M(aPWj|;f6<|U{MUk#YXdW~>N>>^8r%CfVoR&bAA=Kqro^+b(q
zHRr@To>e%ojq9aG;EFd8Qe@qdk43z=Xx1fbfFt7oEWkyp8stt{gyU&kN?8I7LwAf#
z)<&}zHPUz_x7Y2(+xKHnTOwlP#qMQDjNwju#n@&6P?M8k)V{Kv-f5CxnPQ~lblW_4
z!BFf;$>C!jlSsl&ad&ey%wbo>?^^y+JN=`zj9ABRtYbTt-k~L)p0}dFc^t>MeY(bd
z+Ix0XESa;S{&Z9dFFWCMj{@l)Tj^S-(QFb#fB9E}ogA`-X@cE@sX0>5J%5Jc<G7@_
z;E6jndf&Nd_s>bmd_*h7@v0>opcQm9X38u*ifBDSQ$-|`ZOdKBk}dQK!bi9xP0DE>
znaNvK&$3n9PyEP1sj~$Yxm~QTHcq4dI3C{?1JTNyqaWRai%L0eAmkwLAbOp)oFO+8
zs8boI7U9;3L<_tSG1R)cY)q}3YmYjaoSo4I_|a|n<7C4hhZ_Dkqv4N(4SzhX;g5C0
z9|s!#c(#T=y61*J&T9DM`5OMCN4LZX<H`&f5gY69;2FW+4q|OCJz`QOqj2<$t?O0M
zyza(hiclBr5y6);1msWk+Ajrtk@{18g}+(Q=QVk@+SusV%4`{pM%$y&q$eq&;eSF~
z7Z>gBnabNnJ*v^vuCLIYCvI^Pu>5T#fyL1nna(^Ha5Q3qJmnl=sv!yYs<2+&plPu*
z-HjRI4q;>D%_&F+=!2607a}WW$~fu|Cg&H`+?2ziMfTp+yR?4FZ`joq=qBu?{mt6)
zvU9j=SoQ13n`^3~tX^5-z{$~K1Isugr7lixRhlC`PS8%g&pleaf!6FJ1T2G3bgtCM
z1Bs=A{i%ApYONA|1nk2b?7oy;5HnlV92%omv~@4c!^ESoEO4atOTlO>foO{mVWAIE
zR??z9s|BNpK1C~wv}nQHRu&S`<Sj^T14QVnl?@3@YjE-|Q6|1z?;|?iWde6t6zd9g
zmYB_<t*dR+Zx>5nY%Ig)K70~M&{CM2811T7qVeJ-Vc^%E(HxfHp3&z^>-FN|Rn13Z
zb4x`6aDOFdsaCjDD^96Y5>}~JxKt~trCM=IB~RUugFZcBOZ5jJu(d}#dN#08G<iJM
zcmb=g!e6OosoR)?STmE#2G+nNu|i20RL83o?MjTIsGgDb$SK#mq=DNM<KA^(2$lO~
zHF6+;oA<&qSwa**d)I4vXJ9|UQ|W<Ydr6$4oy{YduEc5gsI0tdBtHV?PT*vHbUhm(
ze4FAnz}3#?ikGRgX$73R+gG@-j4UJ*2!utUoOCji6UB@u-Fes)gI8slcNMGfZ#MK$
zmx2zpgG`J)dn7_;A_H_IF^Y0rkMP{7MEW(8g}k5)<$$l|&44Pe(q1H0hOUg_zOZa7
zbh=fq@_0p>UVDTma`*7J5?YYk7j0BXysnBO;#UzGpGKa!ZeK@qFG$8XHLk#57!fuY
zy2g=>PG7t&g$vchQPCb*&$Lkzx0+QMy7@zH%{B@QXi|@3k;e_SHOwQ^3T1jr1RCwr
zQz)9Uy~R|j;_b!lx8hmUX@ivY1xg2P*rX7-l<m|~`tHc;J?tH_U+EA$mp`0}3#2<J
zv6I(8mL9#Nh<t^>;FRDZi$-3cG&zRc_SBN6NY30v$s^SE)p=zKIsNX}-bJ0uhS6`6
z7|~rMv^PodQnO$w#IFZ8-jmU9!*NJ!F3i2Llv&iouqK7fzz&HJzx}0^wR_gO+j^z$
z^+o$X-oq2<v;Ppji}i50>_6$YdnZU3d3_OsyaxOEh<0;hG9sV{&ZPQ}-N&ubW7cK1
z#78?d955OqjAkQy=5Z35+DM+e`VXMaM_J~?*0PbCV$lpR^Qq4_cKuRW*yfkw4}IAN
z2R+-g5+?t0sbrLI>Uyx?-t6Vb691H{XunT8;fa5BK$;(4XU2)6@UV>fxO3d4ieB2`
zL}jG4P+GNl(&{^isln?>yA^oqT9X!K%0TkgT}Vp*LAj(UQSyoIPhVMSrDo}y{aRZI
zahd#!7b$L**1Q|-kBJF#YJ7*46Rn;tbfbrJ7V@x4W*6hisofQ2NK@L@M7S}y>WNsc
zB&Fz_NQjChgfIK7qKiCzfW6?N{Vo#cgTbkEI~`Tk?SWKuX&&U5RxoG#@6f!2d=&t{
z%v<hYj5J^Z3p7(5(O&6pb<S=I(j?TQjYqtji(3>8`jND1B{WGA)p+H1(ti)NJ;OCg
zrwADla<9A`pmu|QX5{Og$9csA*bfbljER>_(WG>Rif<?F&|Kh-<J}qOoVK#C7MBl_
zi5ORF8Uj3hrwti_3wv64&esy}SkxB<v-qTCTg#Kzav*WkFE67tE$p(zBHo6`nXn*p
z!ShGxG4h1qbDjhXn3oD_Ke7xy{I;`-MhlozfxrT&YIT+skr&CpI&!6!`3jGM)VB3g
zugz4J<ry+hvm!AI^rAI}-r@>aQ_VSCPTVp{Pm^U8On~e_2>YS)A_epMR4k!DVq#kT
zs9(`3ICSUZf@cV!p?+oKfXMM2Ev+I6DCoXFPJgfW2{(j+%}5zvTFv!aO_*Y!K49uV
zqd`<!GvNIQW+KlCZ^fIMBb^sC0gxdjaJnA)8z~Al&13jlsg#7j^<&xM@BmhYvWFW5
zPY=50mmC{HF3hy<(y^+oFnK%6g+yUgk|q-oC{Pj7U4Zx|NBfwxA?S4lhEy^GdpXLg
zGiOnxR7xfv=X3=bt)T8WS3OeZW}<yF3z&$Exv%LFgT>hhw(E?kVj?N4m34A}CUV#M
zz=mLthKCx)n04$AR(g2HA!*lX{&DB;f_EBS_P$&4_G2fLg{cE08AWyb^uqxUm#ct~
zJ^Rk&rtz0t?!)qS${7T^g|{xp%%hu#ntu9F&Fk!#wk?c2HQhCnL|<5Y@L4dwT!xg&
zgnit*#w8;CAin4|q54iI!&3E<kM-|P@t~G4V$&T&Y!T=dNN9Pha2&X0N4hh*{`Adw
zV)8;eiP2(ITPhj<kI*R~EVUcKf5t{xdALRiS+QPXCy)UPc8JrhtRF3E=&+KE^eCCq
z<%*TcO0HCb=|scfDzD#^Sh0wOs$iO>D$4Su&018ctt?P3^uVRvatcV!A#C<L>5rA{
z>1Oy3&S-Iw!#G8gi-28*o-#dSLPW7Huslz>Jl;#i(<GCj4A#H?rsh3Q)^Q!veB#s*
zR-eSW!)TC0HL&G?A-pj1z4b@2mOzgNXAr0?j_I$b&r=>?tdO*Q0s~r*vQEo5j&Z7N
z`a8o?%?YI~#K)nx88WvBI2_HYd^43R_7_)5ODo#qDsatZqpGavTkfe>Un}MQVlCzJ
zOP9A7pPqIjx`4Bax=7b~N4gKiv<OPR(D!Ib$AR=#%b~1OT~F;*Q|<K^-mYg+943{9
zcSy&m7*W-sC!DgSV?9xUp*NjLJ)6>cjkV(xlFdRc2pN;j6mYMj8zTQ+2-R}(Do0}m
z;xmj)*4gK%i_?2z8A<kZz?insxZeSp0BeRhu-Y(sk4*0*(@S&%HV+30c)0}ihK;!@
z!C&QlqXgC)!O{fZj?5y%CUj+)Bv+zMHZDplk0752!Y*xejunHC0bgkP_Pax*u^EmT
zNA@&Q72wiWC4%YU-u9l@yGCePL`gu51O`<%;CHJiB%!nTZ~z&e#eKrnLFH~)y@48C
zUlR(z5Ju>b8j)XN9v4tMvWzG4QfTqY;OtDm&Nvv3$_zizAH}0F-k&I?tZZoy%x@rw
zhEbv=l4m5*kI|hcUtm3d{hC>SB<|_W%}o_|&g!Id5qF?dtI_FX>3_S?6}02|5^Q3_
zQ8Ze5vb?fH9&K@Fv~)g<;9YxoS{V+mE}-bu>(jFk^+~R4zlU)|r+dHqykJY-|7pi<
zbS`!D@Ka9x%1+1LALK=XdjpU)>Ai);DG)xU0wvOCwRHO&j$<qzNfr|2kjNmabW4|M
z@2(nMxqorWNBZkxjd!dyf5$o2LAVuiNy}`J*<G%}lC<02g=O@QKd2h|f#t&INV_lV
zO|e_w_06-n*00<Bq+NfHt_$eG;{=AeIygw27P9w3Bv30vZ}86GX+RETP8~tH=vtK5
z8Zv9_M(uafu;NBuzJR<SuOoV_i=Y*-eNNz8Oyxk&FkxxarX;_%!{Ri$ig}U2q*k>x
z9a|B!P=!EOQ#TiJH!7;9o5Y|?BSJl@)Jhv0Fgz+1^~7>aX3A8wm9y+TR1+!xu7Yj+
zb+xg)cD1gPT3oy;DOV_zb+KEESEbTs(rLkUG%WUz#C$_su?Nq!0hhx@C^|lgQrPM#
ze>4+(xG8}RLZ)qOMps(%V?aNpSi%K^BQirM{F8-Q#=JY0okfz@qSfGmN{HWA`G`Xk
zCiSf3#yOR7Xif#=cvDBoDQ59ico8_Zp~0-vgn<A-AAI(?XAk>iYz5gdM-vHgxvI?P
z_%NA&A<aHWd%6!a&^K+9(xR9~1(p=quzJ1B*&)5js$CUmmb@{kC0BJ!ZrHTQo^89o
zR)kjzJ#DQOqvT!(d5cGAWoF{H(W+BN+ND&?&`DpGJ6UaHN{TvHHBpS-U&A)FjPQ(7
zr=S~O8gN#M+$xJW+Dc+c_yJ0HFUX20ZZ1oc-t>atWO$$gJP8$m=C=|@YGtbfE4m0y
z&J|;f>y$G!P_U%a%K9|3v1Tl9b{RfeE7=SuRntK!C(%jjD>(zx52)61idzR#i8@$C
zxmL}xf)A3Fnr-F8xU%8#<FqYchK8f$((CS{95Jd<j@sfOz}YEgsqoRShvG;Lv?Hgs
z3*tyr^hu&<;1slvj`oOBA8^De^<dh%d<;1rs~nHL9FM2us9k}MGjg1(cTzPksVbNI
zZ7$ZPLMnYHk9%#Pa2}sT^#n6p8uX&5Y-v!~QaW_-u;`9K*hlbxR#M2*z;jYWwDCn*
z3?ogLO~(g<mfXWrWJnN~p2=pVM0bqX7%ZS4y%N>p(x&+g$2iw%!?uo<FhjJsBX)|F
zMA=P31v8cd7+!v}1CZ~mJ}%mUkNv<OZ8!or=#ywB^-rS#Yl#9kK2lu;Cl(4it*2Of
zrqZcc3M7T8C@+zr>+Q3#w}(r~vC-d0_kp53F>7B7E8yS;!48VyDBf{;=J>fp>yQmC
z17hTssYNLyo-X)OdJxbeUEWEiH<P3taO)<QIR6?dSSl@Z@U7I71)Z0?0Lt7qKNhB`
z$P7+>Kuu<8v4ReHAt1Eg<j+7tvt0R5Uw}zeN#)}ksx#Rt8H**Bsh+nwNl!AFUNN@#
zS{FBndHnsDG;=F_Hn}8ul_g$TRCosaNN<8W3O&aM=K3367@I`)c^?wo*Yu8?s@uE|
z6inF}1(Ek>9=J^EyQHc!vEn<<nxttkJ)B?f<ewrFdZ&V86LiJerz_ag$(QiT-n@iT
zB_Y(rGAamFG8-En*aR#BF>7sAaCvP&mlEVtrj>rMi?|yIyWHvv5GP41N10+sJx2Sq
z+&FBSB(;sI!9o@vSI{QnTP`e}X=&hI8N>aF47jYfVb*WuijA~OabE%bd=v#`BSeZz
z=HbCTF>#qN9?(sP*EYUQ7#US<xS87XA0-~rCf>A1tEi8^5?^cxYd}ERIWk^;uF6?!
zzhC6Rm%NFWf&?^x6<pSuEy6cuO-|kfJL9N%6<?bU_CyUCO!_ZjcPwgGQ=SggF<l<_
zA7Dgx*Ccqca)MVV#$7G9N$POB9#oC47KvN~veT%8#`ZTC5xMBZq(JTtMv?Y1r(VK)
zNXhWX$+9ft^j9xNTo4MrD2f!Ug5PkZT}kxr*=q`JHYSK(7Rlo*e_qaMIOFZ!rV#eo
z%irQ<QD|?c&$REexL_BL5=twhe909~OMuFgnu2^we^A|#QDnjXyj~W|)!IT4OR&gk
zWVNTaQOFYOFLt6U50U7The-74gCzQ|he-9S2TAln`v2>PNc5i%k?1!Mk!b$z52#y2
zLICECY{TBjP#mF!XA=|ome<f#xd9ezJO3V>OA(p^4{o<(VnG~6co=;bX`fwfilncc
zY$-Qz^Yql*R0%t*+&j?n7$3MLk+`(td_sJF-c~T#hqEQ>;<hG3<Nhe?&zppY@I^7O
zZFqmF$dz=##^;0e%~As-nz6+Bp-HWkhKdk?<P$nVcCKHQeAu#Z*(-Squd=P=%{Q@o
z${eljMK*bObn4@naX)j@<MP<a!IPO%BXGHsQIU#<iZ%KIsd(xkQq`PsVd=G_AaX`d
zLS`AZC4mbGK-Yt=G_g60$7q;8>33{qD`N9PzoFg)B$5+&Hbm?f#?HAx(#+^xJeZ91
zkcKotP-9~TlxvY}g|74s?*&7<z-dy2rpN@N4Kbd@3Rz%-yJ!JxxVdcImI-Q;FF^2?
zcV#>lDsmK+U=8>N#(`l<bPP_S>rvbt^hxchPPM)E``8Xu)TtsW4SlARODe}R&X7B5
zb81cy>kGSXZg(y&+W6K+^nH%<f18@}#DYwseo_5&R>b4>hTTUXTX!xz(h@?}k$r`*
z9M|1QqUj_66WzMu5x=&i8W=y(<F#bm6YMY!#)Z-A78}4I)J05J*H&Gq_#m9GCe!Op
zqzzG}y>t@D?@Ye7CmcSSX~qfRFF;PY)f#bTJyQ!0=cYsQ!!!Gt`zNt_3mg`5?5^{H
zn!jv1i!Tg1Z(11OST9wMW~b_}j#eJa(-YRq&!$<GG9p<ABjPG<laZu2(SaqU4kR63
zg`l5KNk5PKt6mX96DQY|R8r=5ijI1ZD|G_6qW5K6sY1{7_mEtYIEw2rMy)Qtt)&fs
zB({;2#*D^udY(1V)BMXZ+m)i-;wiar(_+wu1(_5UH-ky{6mLNkwi}*Yqg6W{EgXP2
zW!J_R4J_CMePXt%;Hn2eFyqLpqL{sJe_djXJ1E#-7>(l&<FrUbxAs*eU~eyz2^gDf
zVP)E9k%u#-1&#Dpu1qV)T3Y2Qvv$x&cSW!lvsbLhF^icqoN}&cI4uMd-=E5<(bzrQ
zIoPeg;>q!%vHOB7`<yTt)5k92fmDW=GTQBtyO47J<e$RvS@DY^H_(DC#rkq7Ie9#d
z1V)fI7IufzF{NURvE*~UN~$fpHw<#ti!~09G75Ff#)fn~Gg=CIUa=l)wL_w8yw)f?
z4B7YeXhCXFxT)#UhSI7y4InibYr<5}soBs3s%*-960KlDI*+A-bHi9S`StneE`ni&
znqOeC?C-f!m~%={af-PqR_;NNvr?4eoSkJUFqB-CoVA?Ug<Evm&Qj5lJP^>$%{o(5
zTDTPJ)+FVcefs`jox{Ba%H(Mh`=~!zYonWryFP{z5qiPG@+1@gSn!0Ud=2NUk4j7b
z)73B7kopcui(;&Xw_Z!1or!!VgDROsn*8{e4WllWrAw5Ra0EjAdv|2KFZjX&8D%tK
z+z5q-lUqL4pjvvUvMB0nZSu01T;nowYGRX_F@X|aw6CwDK9r=S;#T<W=^#SC3Unao
zNvgbl)QLtT#By!-Zwc+G!ndqaq_Epb3KmY|Eo=}Z%CuRumXWB`rzTRZ<VaQG3u5ZO
zlTXP=b%o|oCi8w2#~9Sg^blLa)1GgUgrfB}`nnJ{T~kVPPpY-j+_#99IT-dw@&2xK
zgI9!-F?MbWJO2(*A*fBcFbv!kqa_I5jp%DWvL^Gq%w5za0~V*QihGrs4SX~>J45ho
ze1)ff)TudlI5?%y&4-tN?o_ObG|ijuI<kbht1RR)LT2Lj)m`X*3CEDWbSpjqc`qsY
zoL>2NY|d*$4kmG3-78|_;8oWtK}MkIqJAWe73KE|1}W(?zqzCDa?QJZOW#fIPzJ++
zx}fW97sdFCjjQkuVQ%^3^(zp^V-Q0)g^CU7D!5WG$&Hz4@v^jtHBy)U#hAXAsE$-g
z2%zeBS*qYWJ1{-SeF^2%1<$nVw!D}q(Lze*r6s4J3r<1U{~nIP)qe?Bl^wVy@(gD0
zl&_NNikzt5y0#pcl75<j0!F14=g3wO+*GaNaGd11*51nV_LtjkH2JKfH91}p2o;xt
z#uvZPVr+N@*=4t<k~5PWvHoiMX?@(gR-jl|5<T?InxR$~7$PuFF#M{hNKap77st4{
z9m`e2-nZ_>s3q!*f?Luo?4%E}C8aWydO+j6qfFn5W?K4nH)*2w503F8^`NQf>grKH
z9<$Tjm2>~$*+HhnGqF<cFI28c$`)%)-&kJ51%1}_{HPe0sa#a)7*`iHh1I<h8l$l3
z6@P*-dYa=*F>+fep^P|!>d8&gIi2Y$p@|0B&!|cTnlDkG<J1Omo22G9NoczYHVfpT
z8b;N0K{XL=JNMB@_Ic^JxXjsAYHP`QCR)jc75`XJ?b%Nsn0S3C1?B7Zc_eBpt=bd(
zMwm=BJS2*Cw>N;P*q?OSgZ%dxz0c`8I#eq>%(fT~#@ddET`FN+hS10=5gTVp#%DYe
zujgK??nFbJ!x%0J4lEIbWisRTaO@&k>9e5imh7T<QmisZ_H8CdfuM_VO5mZ|L|pqw
z98b=Zk-<qE>P72=2bAjNwHRBXt1E*is=|ds-Rzo=pyX9RC7CB%ZKix6N0WWX6W<j(
zOT%(yv@}d$Kqing2-@BZj1jY4GRFz=n~;(yVCsy;`Yu4sWV{P<C!xV0^dbG}6MMSx
zTq|G7*#>gwCC#xvPLYo5^zskbi{!Z$HBK<!SBkGoa@H{eA6|*7nK#m~uD@S$gDSKG
zi*uXN7-GCF84j)@hKj%&5D-dW#ls|{5uRx7K0QALzxQ188KjHL@Dxv<z`DrTx%$hL
zsca%$W<;84O3&dMWevaTQ>g=v`Syv;^<A}X$F_vC^|t(ZO*|?{**{cMD0Ez@*|k-M
zI@4m!6KB!QDQBdWM5K@>Lay5JgL&$RtKK}dTtr`)d1?v~S3=fKv_&{7nH;6Zuceg1
z4(~4ApqD|tPG@O@6ssfoH`l-jZ8(`Zrx!9&pS66+8kmjjy$W`!8TYBl^o?rb7JR~G
zn`^_G*h|@DQ77ZVNolV+zr4I$(<+>r#ap+uZ%t_fd*f5y8{3^Rnd39&5h<^yK<AZk
z<5Li*Dv0y7Bz}tfoh~f+@1kGszP07!v$&dz*kWd$if+_DA75Bx6)ua56suCWylR`Q
zb}+*}iA(5z=M2jd5O`Z&Tv%RZXK*;6V~$C+WFYFeB*|!oSB&DSMAGtCNB9R~+~~F6
zZ!_c$185-I6DI|4N<09HwNjZ2gFfo%bjDV)Qk-jz5bqw35vlixb~Va5E+l1v(=zM~
zh$wC2&W38F=nq!4?3CfJ1R9q^YND~^Lt04n{;B~|p-X#oT}k)_3h{@q5LXn!0fbd!
z<f>9D<w0UWq|;c+Yc;tZD}bkUuS03m7I{q@(&fS$qLQ(*tgb^*8b^zDe&!9OkjB&C
zP`QD5%TLs9#Z|$@8@07~om*>7x(XV*Fj}YLlHCS8i|%NmWjDi0nP^;pFIqx!NozEm
z7IKReXeK$ud2Db8+No}sc8*iE;xYqsDi1W`lZ-}GMAg<Hio=8Go*M8;8ShRzQP*q3
z4UJ!<YO7l_N6XAG0xhK9r%!AJ0Ph{Kvwft}P5(*j)a(dJd|}Uc0@;91ZbNAbrY!_(
zq46^YF}7=3&tm1giCC@B_QM$ZRC-Sq*Su>f|HO?AwIm?vo^J|qIw7P3EIy(iYO8F@
zre(&}hQx}aU0Cn93ZTZtDPo*m*YBZ;^r;RQro0j+ERv2KgSAy$p-CaaSFfvbs5rO)
z+|~-W?R0>(V$4a9Fc{)Z+ET;ND?29W!w9V($?wBLd9yi27i{!52klQGpshK#09Npy
zmVdEbMmfD}!`z{@=!lXxcxU;r`i){iA5;@}3b?!ov%ct6+QNU6Gq<!J!hiEieZ9E&
zi_KN9i>b3Ki^~nm%Myzs)}UtV!W7~<c^{x}3}Uf~NbB~GQvl5Va&d9ZF;k+Lq?l7<
zASGrTjA>R{>6Bi|lV{3LR-PZNJEHk!Y~07CN$=dU@7bJ>NHLY)tv4y%ORzix#J_vG
zR<|swa`7)~!LfAh4<>~b2?bu)AyNidT&ew%kx!~kS|E*O`!47NmV>L1AzHFSwxmx)
ztcWoqB$w>qwnXG%^0lGHqW2WqUK<diA3lcHg+<xmegr%ah9j&j*ikgUFM_SZ#=+Nf
z@WD(o_@bl<)lf2rpWqcW@?f-Py@6sR-h6a(GuLh2@cMRnmB>L%_OYM2eF#pBC3z~Y
z2!Gt73Cdzktctkw%UU3%?@-00s%1t>Hck$j^}tsbY}r9%mTT*(?KdS?2{y(oe&@jD
z#M0a7JESJ&;8J>GYfcVsvX`W7lI$~aVR}>w^Vci}>3(G6F_{c`cm?BrHzjT<=*Bm-
z9Lr^~Qt}tUoaLan;rn(ioY`?nLFHA0H8pbGck)ff{Vpr6-9Ze+P9qB^Ltx<1jFE#6
zXO0<rYB<sxoK4^PZ(yL>o2W|SyI)j#{u>H^9owcQvkRY@V&^Q>6AnyZ3dpH1=qJuW
zPd#9nSg6fgU7s^RL%c~7D_v9}pb_hI=gKjNW)d4ZbXb=*YisaG??1#eZY%-#6Do*c
zEt{rzSx{;xvuEZvUdy=kVH%`Z`549S3CuCl@``MozNQx5KC*=@yuxPtRzEm-uY0FG
z=(9%wSrtTc=lYTd<s-|W!oHDAUe<b0{|R0My6^FijTTH<p5vq0=`J@`)-H{T<C6PX
zwgk(HQ1|r>g=Hj%qLL!rp*SZ(;6tBcKQ1jfnteKAqb^OfT3bik^{uT|tFW}NAby60
zq179lPP!<9E#N=#v&3mi!s#LG3FJE%#mb8N=Z;t)8+L7{eR8WvOdpG_04J-=W7HOe
zd@r^9_;4vealIW4p&d>)3-U6+yDfOu9!2Xa%<AfXw;lKQPA=huw^;~=fTa-8h$qm6
z$S5LtZLsLnPJ7pMi(lehfi44NW61pN9Ew!)awt9;^(e*XM<MT7u-uFUzk{=|phEo5
z=bumdLu+7#e}DbY=$n5%?R?!i{pM_mx@BoJ>?}c7FKMv8>c#ANEW^*&PoL6X_;LT%
zmX}w){_1~fPnN%Wvi!7Gd%E&J%e8+#UH<xi#PX+TfC<|C2yy3h{L1~8{4nFw2zHn*
z;f^8k6Ru^{{cs)><Q;Sq(fNC8aZ%{Y%5HozY!7eoiWJp#NSIE{@aGXcSL7yJgX>#9
zyBD`gVr6;xUzHW;a`7zc58^RjSc`Q+Mz5aGvuf?%=e>5^1<|&*8A_t|1e16wUbLgG
zcrl6k{ph4UITh>Y@Su9GAN)J+594<AY`Ce2R<NGU!DI-F&ej0d<mZU(Kz>TJhAERk
z5C?iAh5zt+@O#|twlV(x<fI#S#H+Xy^<n>iR~0MO<tnN-VEKkZ>L~lK5N(XafYTV&
z?kBhzU=k$uj8%J=#SeoC`SuJ>#%<(X=|`yWki029&bm4no-aX~OOV@A)ofn8wSHIM
z+I!Z(-SuVRAMDi{cvJ9}TfC#Oo+%Ls#5?ZIoAwYlOxTo$(k}$5uX#gjY;Bbk$q$T$
z@vS%+oD!tnF#2s04>{E9=J8fw3p4lEW!i4u9PAGxEj33&3ww|>uZ__b!PLG7QPIeI
zf#g<KwE(E&Y!;Y%O5nbJZROKidB4)4`~22;&^n1)bPW3N5za>-2P{CT#I;kbktiNi
zkb<lr;6439PoEeWd3fHy*nyf)I;>e$Fnmm!Cf$4R1@aHcVMfmA0~|b~-zF$FG9+A8
zrH&AO*N{-Vb5Ryo{lQHib=FjjGU5C69gZNHgiz>lTVlw<Y(k(C#gERq9m4OG{e-7P
zjKvx16<+$FK_5a>N8K|<BLqpd4OPOzqv{3?BJ5&!2u%ahSt=Bw{-g&3%<kC6+3UfO
z*uxGl+I_UyC<or-b9|5C1a`(8<A<JZLDTBa|83ZEJF_*qK=1$7>7do_GXN3^H?rIa
z0b-9UELDAsLkgzsVGM=%VnayV05}A=3SK)RR^y%`u1_hFJ#GH<;wN+Til0c?Dn3Y<
zP=PPt$m(}3S`F_G{HJ&%3Ah6laR^|H>`{n^IPz&nw)a^Ra)zr)=mhQR@hk>X?;S_5
zzZPx}>T<XuS8t9{y>91~%!g*l>MFE&VN;||mKZ9)OXJ;UBOh?W>E|eDnaS5SVWwWw
zK8%`6vW#j=Vkzk$<ikpb6Wx4Vf=U%+%MK^~V%~Vl8OVfsY%CS!`G|@t9e>rH;#ONx
zrA@ls{rKI>!DJL&47zZThOLKJ(X7^<p*2G}`0blLw95o=<2F2&31yIsjacxXHMy`F
zKJUcpkZWyw-F`nUW+({HL{nhNtSMPF(aJrUA3l2Bz8vuTP^z2at-8LztVyk!e=JV#
zlFi2`vbYXkh!#FXW5*J%!@bsTL-qBKKQOeUCD1P=OMweAhcEv3%>kCfc<HRJ_M;n7
ze2i~PLcTTGm95krA23xat$)Fakz(JV14<?~lUhle-b|T!d0QzkAEhMa@D51fN;cW1
z61GxXNmfb5Gx%~;U#uVJCuD1Lur~+AlEkcEE-#mzJlt=ksy;`-Qz^0?$I;FD(e*V{
zt!Ur9gd5r93YkXl+TFu}49Sm=%63oq>B-cNk#xt7bu6s$vu`f2Oo5a_%tE~iFMZd;
z_#IlSS<^4nskTk49DNv{KT%omt25#3icQ~VAAGfR<=Vf(YMGSXa!FuwF0wlEv}{8`
zIP&38Y6~GoBK?hxn)p@}?UYpL$KrpBYPnAMg|8;tAtR|jxVbs0L`s1eQ}&i*O+$N)
zu6Q}vfGPWrRlGxWGlgY)8;>uVFv6PS!LWTE?IHEZC)=I84anV{yshgVb|<;43U6;w
z>rt=OKD``({Auq&sd0jP*>UP#X6=T5-d-l#TkmDELSCE7R={PQgUJV`h28y5MYz+J
zCnX2BU{kleWYI#K-*KMJ?>NuqcjRw=NAqreWKb~w4*37G_vUSJ97*Ev{QL4L(w^Ug
zyP;`l$;S?tw*|;%7Xiy<X)R4ZMRx&Z`f^t{7;Chj{f#5<%BrpgbXX&v84IeiG9x1+
zBO@Z?n7W4x4#uh98X8c{iq0Tq^IKgV;x$eS7O90V-4K&;@U+cWXn=veUwtMpz{Ay<
z^4xLi@SnTrZaEb?9h_Vx{nN+dmnE_M?=1RXS;(15{+G)aTD@#SNxM5tWTDI9xqRM3
zsd04(fl9G?Xx^syJj|k32JCkqTzv^~?*L(zQ(tn~F2Gru?S4!a`bEe2H`Z}JJ0<7d
z$M~nE<J@0sDLTf_%E<w<C|Dou(0}hb(kHGYO&1f+W)u_7ip7MpkeHyT!ky}a#Due1
z#Dw>*=#`ZfJ|9KzJ*s#gO}YD`*1cb~?u$bAMWLHnp(}Bocu+p%g6c1>+I>;#KD1J2
zRJ!-3J$+)j(-*DjQ_`Bg=u2Ppr4OJlm5>!YC?BpseFm!2lRuu2%Qqg><ZmrgnP?|+
z<8@7ubm=&SK{0R~qg7GQibNVyVBz3G_m)o1rGt4oS3>m)`)1DO96!ZFQ~PF`)hp>w
zi4$6zCTdvu!_2Q7whIVt8y=~@hF(`!f6?rP*j^2v&dU^c`XAFFx`aApE8RCgGH#Vv
z_I^;Ck>WnULQn*O<Se`>d{D{M3F?>MoyAef+;*@YkvX~o=?Oy#3Wc1Z_>=$gGG(eP
znWMotQ`WX0OowYt0k|gSn;X`K)l81uY_s37nK}q_lLa=@d!!2t^oPX9^_uMXnNXD?
zB?$Co>Q$#kOb_b~QqgE9iA2JsZB;chf38O{V+F<zzWbfusY-)JI*`d$6eg2ibQwhW
z?|{io7iHPg7Q?QB->RxgluHd{y6{@zBr%eUGp<cwPxr94r;M%{54g_cMR#y3ZZVaO
ziTCMi5dOsav@oHZIfy(6&53Wnhf}Bma4l>oVlH)nW2OlH3e!zj{tSr#C6NAmIi%;$
z`1jly|6Vxb-|sc!--~7plrg}%pO`$71H6fU0vEWYKq;AF>rbkwn+|lAA|MBf!hpr^
z4C+NyF@T@c+Z8UR0wClujb+hav%W|x!7an&XtlLln2fh1`T_OSpsUi3PnnFlvI8SM
z+g&~0=6u|pVOBhsP67s|z*qmSd%_ejmib|TF<-{%*eeIfnyhsm7bT9h+tmuoROW1t
zS`pS1U1FkU%w0w41A!6azNS5mLNljswzjmIj(^sVuRtqDm7`21Twdpzb;d5ZLW-Sa
zyoky-&9E}RMvmA3dbw_D!i%!<X=|#0DwKpeW{JRvl=ipT&s;&&YHNv!tp5ZuJt%c*
zj<an53mpw6Llgj8FPy9!oE3wuo*FXq;v5vB3vqHgSi*Ja$R?y3WpZ*vc1)~PRZUOh
zVKj<*&O9gr9Dk9~-24gt#c{E42w>D}Hmuhf+UY(M$w(34gQ2VdPL1L*r3UF@P$Cu#
z#In>EcMdfFbL35r3drA5?F_1V)6p32-Vp#fKa^b)I;L1+5R>B}J?anoOMXnRIF2zJ
zW!j#ksl+G@pC%XzOj%H{?lb^h*Er3tg00!=H>le7??iv;{BCKeko-ClO-{^-CCPw1
z<t$!b{0c8N#HlzIXa5AmUc8|<w^+0rzdGf>X6mgE5?L~ifZ5=iGk!DVar7JH74Y`h
z`fjL`VI`CjQ^LfnE+t(oB7`)Dc)-$*J&f8{%w&yV3Z?bL=Ep=fI*uEgy^eb(rwslf
zF^Z{V2owDdM~ZV2HTnEh0e_b55vv&y6xf85CS(VZHAU4ZCJSiy&RRw%*G(!@j^0Lh
zsY)-(s|h%x#T=j+#e7cT>&Cd@69gSkGORqCStrHdbKo!0q&uek3U`BvxQ&31(e!~-
zarQNOjCl#DzHEA}=(;E3KZf0;os8`XsZ<N9A&rgrKv~Y|SXP}i3Fv!q)K@$GVt{)T
z{sO@IRt&$*WgMcW%&f#Hpzj`b4BZe!q)5ann^4pnaGSnb-(iib(i83(T<pOwvbQF4
zxgss%FuByy8IU5=9*l6wQP4%aQEhsL$LqK*n&~z6i0BGq3Ne!+N>wwpqREJXwbjK`
zA{I?saR*q-KyO`20xI`mPa#edvf9S7)zEHL7yE;8LTMK$Rj7i{*(0d1X=0(=)ZG}%
zw=%a&K}Z^OZ*WaDwfqct|Aulb#D7F+yaRgdp>mD+Fkr6dK=p0lxzQz=Ea^0BcPGFZ
zG|{)HO}#xFQiFrZ7|3Z=yuOJ?=Xs5Rz2t2nJ`IUojs2648M(8`YQTF^PG?M6B7hl<
z;%*Y1C*5RxN9c*#?a3%=-_@yTZ3oy0u{J_9mMO-3!YL3t9as?`Y3GUJZONb5H;$_l
zTBV!_21Q0cImDW{f{tM7#5UWUp92BW<)2<({6BF}>HqTy=s5Vh=IwpC^m@N^k{-+r
z841jSkq<<nT3rH18r!Yx{Y%p_W6C2wMVW~hm`d0ofs<VI0%!%^`EzHio7offAXmT~
zIti6Mhs~c{$8^%E#2rAS(KRyE&JA!hth)tcnxnTLr*u$CSbbd4{*tM7l74u?hKhH2
zen}rFPn~C15%zz9Teu$~F9zuncqN=vX-#E?2EMCsW99S0@zL0UIc9(>qAeuQ;u2;7
z6Jxf_CnDBC?xQvV5MNMwHc_Di3N|626J&|101}Pu<0H<<8Aw|hNeVC~_g^=e?m?(%
z#GEa#Sg-|%fe^g|h28-@T*UBhj51X!nb*=Vxf#&b|DU6}9<fZksG?rz6j%<<n>JNC
zHbY=-k?a6`9}W^k#0A*NRsnBQq9Ux5-v4IIGNU7oCQ1TI;?#gz0xd-6m?Rv5>>?o|
z&=#+tyx|d(od)+N!i%?YYNmtK9GnfYv_xS!=_OqzV;sg~N-9f^z3G_7Ks3a1@GS6x
zpfH6cRXC%1rXLLyg49Q5(+7FMfxTJ>tTERz8$%7DnFtl6XWrVC_z8fN>g<--74z*S
zVEQOX!3K#iC!YpMO%cWj^o||K%s+?$Jr;w7osqwQS^8|&6s2jPJb)%ls+gI{dW5;C
zpkZCF8?*ybMO!JJrYLzbpCb8;7%H--b4xqYQ_7JANleKo9s5~LL?tZ>!C4olsU&@+
zRxvn|l_h4eB`a@}s|3>%?MQjF#DBzX;AQ7jmc7GF;C6u93{xZ~z$hc45|Eey^`N_2
z%unU+H!~%J1BuLJmax&0T=>(#381N2=GlrbaN)@+M{kgG+(BP;Cu^ih>$tl58;^U_
zsZ{e4NS3e*WR&hz!Tvx<U6em^HdBEy7G%EP6!<r4P0HiQX`g3Yu-lZo(h9UMn@bGs
zKHgQAN))9^YTwV%>E5nRT;b{%_P+lyr+qu#{okF|i{1U!;nB|Xouk(FtNrGS-K|O%
zj?4g6HNu13sUGN1MW4?$#pk;(ceX1*h(hC7(?o!#d2lHot^gJB#^cXUEkZplpj7*t
zGwYkPsoyA-{EXxw`ZW8<%ATL;W(Ic#z+?eDGk7`z_Ol&%Zg9cX&U+ll(d8~=Eti1u
z-)uU)y4Tfm@yTS#md9-tA5l1CE9zjR3X?3Te_5xJ4cn%neT=gGfW&>aGU3S%FGqyn
zekWxKn){PJA^!1slSGDFW4dj2xyt6K(hx4sIp=XjUiM<)?dkL643<66g+YQFqQQ(j
z2336?8v%Q$`HXY0fXV}P<@BMH4xNt@OGD@mK{Ny<O<uf_NKHmkEat89Euu%Ao+zD=
z{yYN+km>Hp2bO<tQz*fdyvRZpu{Kvc+I^Dy;$DIYk?p${gKrzpmJ1N$(6eh_g1p!_
zDI+b^Lw9U;mI|L`#KsFt*-`8@4$LplKrL55nWz)XK8K2oAS)cuG?|Icj4tBY)fChs
z|L2)D1J+P5C)<SnTuY@9$iU+zpo4CQHCU}Vo}&VdQsm#aEQkA={*%1qYt!MdKR<In
zKl71);^<$-b{E}5AaNp8jIfbj6l9ZEWc60r6+yl@SH=jooK;Z!Ri-FeqVl3jmK*50
zn+{asc?E_UPUf5iof~x<iO*^{^_(YH&)FMB8C9*Fljm7_kG<o@Xw0;2Uc}?8L1(5N
zBRz!LlXN`jNkUJ*Y@;vR=>2aay?f?G?qOYa8&%gHbu)0;r$^Wg)*_}LED%Iw86tb3
z9Orcum{E<c|Ku(G5DPqXbiQQkbfoft`S^1YSH1TF_{F8@BH!m!%G*1iS22EBh*z!|
zYk6<ZvTsnF2(|A9o{pmaHBp7-Q0+%|+*9DG%6<pqxQ%soGGl8uleOGeXQ5ZAqs4?;
zB<U`&f&}epaVOlMDQ6da4a9diiqrNeIgdLhwhM*d(vHk4cuy>E%J*kezAt!hqdn9L
zSP))EA5TSGm~?QMZt!_WYDb}W!#oVS`jW8YI+W&|HAQ7?QLU`$oZN|9Rz7z!;Q^mR
zlO6l77*({Cul3qPz@f*SjF=%a8JUYg3rFrLpW7+TrnrZj&x_;xHYplH4bVcnvLS-^
z2!kD-9e}|Qs6#<%JL<+AMML-X!Y8&@(p!FY^}+m+v&d6XDOs8b`@o`|RN)Idp+A_B
zaxplIahas*^w|c6M2$E}xhcOapSL;ZxFbBf0`^AuVDeH`>o7Ap)P_(CBFESeib^~#
z3~zM&jysJF9rP1|fO-t^IG6Wwjor-0SVqYXo&HKz!E_n(w2x&}d*)-}+GR#Q%NhBs
z%*fWq4zYYy@lXIX;|Jnn(J^E0A&sFO(KVoSYdIf)m`eHsdk>)k!Ntdl#yZQ4b#}k8
z&H`hx(RTO6;ok1fam8DyfBnlFw^nQf-`J4SeKk{qz=2k(P6*gpNP8e(eOrHE)uEH8
zI@Eb$xM@R$38r5Jo1K@x6geoFGF4s_Q`lMQjJzO5aVev!h_C&DPUV$lD6^w?U6_Cr
z4T8Hw)jQf8N}$l8EEKN7+@>a(OqKVMD+a3gSkr}EfJz{DmZAw6VynEJbr)tdYkU1D
znciH1$|_cU*7Bn@sVQHsD2NNF*n#-WK+GqOw@H7L2%81sv)LiG1-R@o)P%PrJV%CX
zlTligL8=*XqplBGsu!ma#9wVnatM8l?+ZWvYD0hRiVtcAL<5rC-rY~0C|e_9uC&bY
za1#It9oc|4jjE!=9<^L84=ebxo(^2|HioVTL*|*cR5Z4IWx2gSS&(sL+6@(*>&_E$
zA~Ma;`%>THqjAUh|5*IiI55;04r1IukZG))FioHEfoAe4`DVA<liq{^!r65~hWO#0
zDU6OYk1J2AVqto;9DqjQcyttY;y#v_?aGN5BOQkd!18gFSzr2@q30z@r%7LWM0E#)
zA$qICu{fVxiYNc^eRb|}0|Xgnd`#Y&5m{19-KxP&JX)q;wk3>aLxfr5(vC6#|80z!
zYMuiP)q8lwiXjSWKT^|;4GW<w7>6P|upT!Q`=D59@<VU1GotZKU&Te${wbdSyQ#TV
zSW*iuKS5TZk7So)=xK^&23w_W2r)E_c9pd+-Cn9WUlAuO{tAyy8B3<%b5kYoB3Ky`
z40pDevkSiD(Qe|QV0b?tmy(lbKh`;+3O{y^i0tTRRgsedfsZ~*GRTfT?PLFhA`Qn7
z*%Kx*a^AOMM>OZDb5OlQ|6a6{b3|I3LHS~N`$FaMz`dScxyln)f&NOpGDK-LW`#-x
zI&}s};kPmF4rD&)-kfmkbd+GNI*cl+=^JC5oA%~ZzTe0KFf=NioG;0!7+phJ#$m&_
zjWzgZ-9qP@SX>;sW>NFS?|7}V2LG&!AvXLy7>t`-u%nhcx(c&373<juT(8&dkNd8(
zH{WAKqa;->Uzo9DLXL_9Mlp_u{<r(lKD+we47xYyzpDMCsWW)}O$wL=QQ98w8juX^
zN6`j*xuZv5T^zIjV)w2lLxMAB0LKYACxDsEdyYD@q8D9SEebmew9LevnY6KQEnzZ5
z*cKXXQq0^%c!c26=ok;p4tW3q(D*0Cq9<cFhEI>AJd&|wB%Kei9$1N+{R`OWk<xx*
zLX2gkW5*2?L{ba}3u?^HXn33^2;zZYqrWEP@i44mA_QlJ?aaMz9f{scYhclj_^pM(
zK{fc7;)2$k@62QnR+<326SiP-9$XC2yy|aWc4(sa^;%f6nDL#J4A+*+%#TBWfske%
zwCVwWQLzJFi({I(hx%A$gsD`0z;j@*yHFDjLvdz9aW<t3A7{jH06HK;$+I&dxR%Sl
z3;3f4<OVs1j>rvi3o9ho4W%}}y_+)y>z#A?DWA(!U(`MoZuQ(L44)Xc^w-zXdhi@?
zsWXvx$b&->W}G(1KD`n?y^_p%mzX0N7U(%1xSGW%EiE1N;@}FUqDNtPz))oNfb%%+
ztIoz0yFqU3rMO5<#{^<1deX<p0E{+K{h>3p0||v|8xT+@>4WrDS7KE_%P}Q)=jTNx
z`aQ7dNiE3>f7ed3=CF%o6ntJsCWDw~57KEvlB(<?vU3uISpdenClv3VGv0EvR+PX2
zbDxAiw8P1HY5D`eJFYu$y1Uiz%oQ&2i}#2ET)IlM?y6t`yljXi|Jj<gJ4re7p+ICx
zwZhrG-(p1HXq?$;6epUtV?NJW{iRTSZLqY}e|fW+yI*teK1n3EFE6L=>tu1!>}x6U
z4$@5aAK;5eDzwrWl1B8_XB;J6%Tl(KSX#h5^lqs$`Q17y@=7N6Uf%Q*MF{6WTi0&B
z!wQb=^aBsXqV*9cP`ZtVl_H|66_6wjQEoqC%eoxPrM<xS-eDU4{>=UUow?NVudLL~
zl=S5w8H>z;<U)fyxaVXd5U*82u*?~f6=YwxxkK$-g?wEHYf^Vo`XTWHC1NX7M+bET
z@*aeOIksbr8S2<pNzar?;TEdd>gpl-Y9COk`^~vv3ll;z&FaX8DY89E1Q{soag>oG
zXlHyE%o0Hb>gVSEjtt`hUuVnN|Lx1qWBt%o)Dr*q`yb-}_{X>H@7taKxLBrTSWZXn
zWx)Be2~^%|9yYiBd#8R?evB3P`TpB)>0kKq{%thAf719{<LSzGPglNeG`@ZEx0NR=
z-#vNyH?i^|5HLx{u%pF;`|&6D7x`HRzOywL-f^;sO1mnaz>1*X!#M83o6&HfjABiU
zsKaI$jF-lv8+1OMgVU;$q`*whCpw%C?X?)zIhc&v96x%_vMRDX;0AOhDT4_H7yw;B
zqQ9Cv=um7Oh0)EC%s#}R;qjP#`)?@NT^fC?91KEB%Pi>k;xPu1EqE|-kRbzuVSS<p
zi;jFNk|EU5`QQd$DKxN^sYVtiJW;yP8Q%q);iMdHV8JXY!1;RQ@R<c6>Dx$9AaI_O
zxCjaafKHVep-}PRodIa!CiSSmkCxGglER657&{K<V`}iMFO2O8(46+;MCVnaa|WQv
z5DLI;TBIsdN*e>2u`f(L(u*jCl*(E&MxCyMH#|rb07nm%C7A*SbUHqt#09)yOr(R0
z@hvX6Bp~ce27|QG@exUDgw)e#I!ZY#eombHyn8H;51ya=(mdJ`@cZG>!P)Ni&bD~=
z3SRDrt%Ji?N4r1$d?J26*xTMYIu_0SZFstWa<u#G^yB~@eBC^TFTchYbMT*6V&~=I
z(a!O)I5-mQ<ghKE`=jRm$?ncE?4<pzz0>X8{hw+AdJy{uCt`2+#qJ3-J2|M)sPeNp
z>pOAqT)fyh+WHy(Y(CrF+dX+jpgi9_*~h`2!)Q%$*gQJf-8$WC9*M)#qr-#a9RU-Y
z6WhDTTYJsj7dzW^7$3$IJ7+ulC*t_$=H8w+(Su+1ca9KvXO_=)0LtdGy&W8h=efOm
zw6k@B6EeST0rCLAy_z^a+}Ya2Uv^&Zz;v2NuWGW><DLIGg+}mFY&Tytf7&^o6O}1g
zfpytBJ=%GJKm#U@PoEv1?4F#S?1-NZ4z>x`$2&)7yIVWQAkOU_923G$k9TS?#z~Wg
zhCTpW$7}fev(w{U!rSit$<EQy>EX%l!G0AM`4>P8K-q*(w+X=q`#f7f(!tRy>=p4q
z3s4ik{JaCtj}WJXttMjO7_hc=VmE^U0f{H}G{yeTPkSiHZ0+Fd1MKFP-Q%4qEZFWb
zHh?~H++Uh7=qb$_*9qX_Uv~j(igIhToMQL6Xl|eFBIL3)P{8r7q!vQk*3S~>Nc{_s
z{4+=1{+OZw{kd0JazieMnjT$&vak?yLz7d8HJxooeqKW3aQ!_1iF)JTlM75-kr5@O
zFasjx-#M>EL5n$0L=|s^PGly-)sB$|nM?&L&lX0pQG5b$uE}u7>61V;kl3N?Nq2lT
zDhAoWlap76u;%Ka``3~CCx*oCzN}$CTL*hb2fvtC{C8EnHp;{%91@sK1fKaW3+sAZ
z>8fJ=Vs-An+$>kf>%)#0_OrO}h~<4VV482?|Czwodk$=e!Be-(3vS1u`1gh7f~Mho
z&hnE7xRAj2Jm}-OTu7iw*?;8l-3WH;bqg1#+d=D_x#j@4ZsO6nMc1b5rs1ZVEJzjc
z=oo(O;1xj)I+);pZpMsxt84zC#fGg7Z?|#l_7%+hx4}7d0smEf*!x*5z`KQ;aZPk4
zz1|%+z%K;4c=-4BKZ<Xf-vGnS+{wd>V|?+={x`adn{lR#v%D_O0$rfXqixJ#iWZ0w
zQ5L7#&f}tWO?EZG{{WCSMi#2(D`cP*+4>!q5O|B@Z#34#WMf{U>>a+gyvUJyyJvE}
zI(cX)2S0n31<_Tm1LKsjinr<>V$U4J65Psd4Iwv<lClX*CV(*cVGg!##)vNXZvbEG
z-f5C$J#cWvqw_WSZ^$dKA%}GQ`0=D!lmBYR6_@<~{&)B6`upE6W4v{AHdf8vKDClK
zC;ye&EGt~g(jY=EQHl&8vYH&JHl9sXwVp_}fe`aDO6+liYM2b{QpR#cO$ihHVtIkA
z+sJ#8jg_@z-Qs*`)s~S31$PJQh7BXx<?z&{rM1E<2-m}F?VaY(L8B|ZbIh*r<lWp{
zR^aIjCg)usC?U*m&X0QBp|hWg*(xaq*>3Aa^S=&`s)3#_x`Sv;-I?Fq%&7QSz&g#^
zb4OkX*3`)$|88km<-dQ<+H;!Mvzmf}xn11z0}-vN2J5cjaC;m?&(`xSh@P$ISrA>+
z^Sx}^M$Py%?Y}a4(=1lsvi9GnE8l(h#I^r6epvb8$rt<Y2eJPybCmW)(xtor=uLn&
zJ6islh(s4{k+(5fB)1k9b?hX(ILFL{bI=2&{p6&SQTH;2=XH3DZ!FXD7Mhk<{;~9A
zrLiKO#r=UwB_Y=7-l=|0&+3i;yzE6u7n9g;hPr5cD}IXNuJ~yZ_xtgAH0g--OL$Pf
z)DQkM>5r1Aelgn20i*F0`k25e1S0#h(WEcd&*8uIOZjXX1W(0&0`fwXimhlgip6@L
z|E#z1&;P_UoW1B$jvxB`zv8Gbw&FIZR|A+n{8exBUnM?11PEc!{~Zj6$vEzI6R|#|
zNA*ATBd?n|wamwC1>Zs|_;a*FqcwFxcXi5%8q;;A%K7NU;LoJnjqv+JP6M}>w4v`<
z+|<RB`U<6Ij>u@OKRMhJw~_MZlosT;gI!3=vLv^f&dEGDCsRD8rT`9mH69OFmzR4y
zbbT<oT!ucEVYKDCZ^}Ww)I81_aLn_p*(N{x`~v?^rJk2ePd(%W3;0A{(GD)WWRt3;
z%=<1cRx>H(Ip=aHX*_-Y$Qjgg6R3ZJ`O`m%^yv@Dp3Vz+&)n(s@!Y4EtNas7RsJbv
zD*y0_$`=HY$|sQGdA1DCwvWrR?JY`*o$+`nc6f2Q^V_+iip56xmz7S8w@Yq*(FYdH
zdq8f{;*q&vCHtz`f7oBr8MnI1jrc9-8_VnBbtguppWID3Q7s>bPwP+ToRp>?wsQ0P
zPG<V?DnoyuG^XW}m=>0y4%D0uJjy273^ocBqxQ3+mam2#EO+gL5w3}ydAjK*TAiBK
z6y*><uc=*=Qsp=eXJLi^NnSIrTo@`=h0z5rnc0G4d90Yy!e)g7o5_Wd?@j`J@QZLy
z&PR5Cb23es4uQFnlAo1Bk`i|<kc)e;su7D1(?tdWr$N#Kj=~4KB_{9y*yPl9@n^YH
zroe+kTAqHdL8-h}EX_{*;bZ}U6TjcVmMoU4{(9y5<EVb;57nQIlM_alFDGcpMTQxm
z1ep;}yi!u<F*)j}oM|0*7?UxL^FyR2#kw^S95avykfIjtJI+%(nEX`Hj`n+Y5>fbu
zTg?9<)0m?$mY7#E!-My|>wf+<>zf%8>LeXIUBzh~j)0Du9!)M8|M<0AJIzTfmP^WK
zAB&XFEK-(=QfX2?bJEIWNqIOUDW5r+W@ablGfm2@0DTZzJ}X7bXErULeGpncn~9ds
zW})S?d|GBylhdNBNq!*8mKCaC5QYlyCqY8Z0x$lkpSrkzl`wmCv&ZMWaBrO+eXaFc
zWK$1@m&639_<<=z(K2f&rWS2u#*7u7A4}oE%eD76rQF<r?{u;ScW#(6GOya94r!1j
zV%SGZ=lF!4D6q}m>PRD$6O^^WSoY-OExj5{x*ab`hJ;-XA@7Qq((wu=8JjesCU}{R
z5}LELN#*U2M}sapS%IY1<;okT$21=66owVwMMb4brxld$Q8FVOAS$zNp<3fp*V3sG
zlDDFi^}xG+rCawZmgrUszLE)90yVFP!M4M05_ejqg3H6#yp@$Iop1_Mcs*V++lZWk
ziNMOcV{ULmPJ)nZZme)7vLHC><~lUHs#sb|oSl#?Hax0t>hhX8l_geF(+J{2NYxc;
z!{VWNlF2&7%X=VWAZ|$pR5u252~?yewje28l_a!x04UmNfLCU+#CRJ(<z_|_A-FDb
zxjrmJ+`Vu&=Px)eY#M}!=6xBDLz|~2Z2`DfQhUJ-943uFfCF5i$XCl{5Nr!KBVu3u
z_*QLOS$)Xc)b2c*e(04ddYZ)?N=j&1=5|!12`CkLvZ|9NYuOPuz(R9akk5}=ln-Bb
z<KNRRX5=>DzeYP&DjUE?%K7r*|G87Jt5|aC;#izxl()QfQP@3?ucDg-Z<cW}TuvqH
zF|@{2!=SW!Sj^Mh{0%CpOgJsB6()y`hT#@&3Rt=4w2tbWq-Nihn6{Esm?9~Xgkk`=
z9a>}o+4!$StlDMD=$%7CXi}F)4yKhh^&>Dp1mAy2VOmwah*B$KI~upuW_V38Bv@D5
z!MXc6y6S-JjZGamu%2!vDud*T(q~e5$sOXDy1-VFj>4rovg8DdQDzDcXZfYaF6)nt
zg;4&K*@P;}vlzyZFH_~<@yGD?C`__vlI_WSh|wSGbWCnmD-Anu(n4_tvY@0Y)0&WZ
zpkh}M;Ba-Vb5UqeP^Gb@9!gncY4x7n$xEl`r>#6?D^?zWXO9fv*%ScJvUxS7S<IF3
z$jLd|6Q#wa2|3N#2|6@zS2p)i5QXk^COU)|4y2~Q9AD@3`y^F-O{L8b>U+(4=*&Gg
zdi)X`dYvV8^@@*U7+z&jBQYi<*B|qBlS<o{a>`eganDoQxn_f%b2aV{CYM+0Zjj^{
zbG1dQ&&4f*Lt_FsIXntMJyp_K5c*_&z!XQFq#UZJG!TkSl&}sbBQ#8q-Ga0;-6<Ej
zw7RmZ0Lx0Q93yuNYOB6ElM1n3H2*$~)oRp{vHi}Z-36pa6cyKFdE*QP1YG1LT;wxX
zTtMp1UVR;1rJIKIq}q&&rYy%b)r2Kd+iXHH-uQN2oPR+3pJzd#>Qqu(_Q=Up`3Mu1
zsJJyv;bg)4%(&|7z5$F>^s!bG=XL!@)zS=HhmynQ(eci9>)ET;_QA_n@FZ_N_%gs;
zPNrijdj`$9qwO7xPHR0s-QSv%1sGa1e@dvX&Nl8x%#W^sz+@j{zLSpSN+F!=H8?lt
zR9?WPCbK_xO$8=f3ec7oT+)gW2js2ON%LfPtJOR{-Z?sH?H}y7FeL7$oudk$GB#?K
zmlbLi<5n3@RBT8`Wwv{8@^nErRCX;r$Lebr%yUd=fvR{Rvkn32QFzTwTwW7Jg(74J
zZKncGwjM;b*;!3w%RV^R$gBFm=*!WZYr;*UT=Np3*f~tXr5ol-6}6M*#_d26x50&R
zk&>)S{w;ODRUhLOk~?()MhE?wcj2|jG|S?yvSPia1AVDcRd`arD_m7p%nm8oMN;6R
zQUudDMuF?XOss@<wkn%y{1)!PMgo-yu2LD$g&Ak~H=QHaaut(FV+W%7&$mmBcktp&
z9P_WI@YY0-=wgm4bVLFby7?C?wdptTri1Wp?zlHJo21(!E9QpOHt2e{H>~#uKz9*M
z(#{Kxw{+NJg+HsB01LEvU#0T%P>P=`V1MSbblc?nOfa?tNgr)y3>z!}^`WxV_~WXU
z@8%SbFUm^2IY-Pz5rk7~^WU*{g|<?BDPZx}DqtZg_yd%$m~EsFT)g7{QzaHOl<%cz
z#b2ciLvg8v_gPM%xMYGH*+j2Nu7TOB{6L><h>vkZ|4Zk8Ivoz<(X&K(X3Go*CBuQH
z#Q%K%!_%i;{--BT8{d72|M?8#e;ye6^K|Kc@jsts?8~#=?cL^22m8&v-T!T3KATJw
zOsGXLjfr`tbD6wFpu^jqP5)SP8&0J=G}Vk^pouo$14JVG0eNiKIK8^Q15=5-VFTjf
z);!!rsu4}TqD)cv9g`wJnSN!_boh=^BqPaTW?*!6$Jf#MKsu{S$9|pJ9X%J~bgb;C
z&){^Gg%k*?dPwNDssV0F@ANP#NNiT_=T43LR<3)w<86I!f9rdDTn9dUk@P3&K|glV
zIgr~9Ue2`e3|-G^V3@_0GY`L8bEaO-Z+=SqaB33gyL<Y&=Q<!a0uINUxISm({bbg!
zydQOA6T8g&-q51uLs8Zdx<P8RibUSDcHLA!L%nU1D}KqD=28%Kbc|UJih8qh^&H2)
z1KV!L>)xfAUl6Gld>?S#2AUn*#4S*WE%S>+%zCERl}+3y{of{+KrDRODK%>R*co%v
z2K!XO{ea1p1m4}!d7%3E$)9p#({F2%)8|k4EIVOUdRkrE6x$K?^^WaJVp|gv&Epde
zR-<r7cB!E{4JqX`9D{(JQrr>srYcJ~S7NESNdm`ELAfpaW?AI$`bw1)g)4&;%7)ZB
zPfXR9T8_n2yp^?}<3IQwD{qYAQq&>?K~Zj|s#$0UN-u#^{x_kSIBBHaTG&-P>vRDk
zOBCK-EA#njpYr3!ooW5dg*eK*a8@#VhikoFlat~$E~lLrHUVV2F>lvw)Ns2zmSV$Z
zyY1)HkPKbj<;ZRYw|QE_&4a4qAf@Kw$gX|VR(z^bbxWaVW_^}7YrHtbDs=P#-5+Dk
zdH_C3$9EzgjnEmLoaDdhiEi}g-N(SrSw41b9Qc@`NwxHCWL@i23Y_)EX2mJ<0zc?&
zv={d;$5)wwaN{mD0_~5ll8dpdvu15fuZFvmvB{=-dh1fVOVHFHbhXAcoAKsg3~LmP
zI%0wdx3-GWB{&E0MbvMD9yJ`}<+&^kqck@(tYcf_GkgJ50NtS$z{QuOK@3IJDMoa<
zRUv3Hz_gM+=`pQ|<}4mhsg?5WF+bBp+XY?O<gq)Z8MYp^d+vIHOaR-M(-Q9kNCTQh
z^4E7Z(q0c(mB*L4&XJaTe8tpV5Wc7`oEkNq{%Gpm5(ud$FbO5gsNZR3EVN2fqnavu
zT<hxx)5@+bS~a!hC338GGAEn5tf0_tp6WH1r&wO#!VM`}0j@VTaN0`^AoVUNxeLnz
z3(;Kr1uJwU{gJu)5KH2EE!1Yq+=gHq_<zHcl+%J3N3dB;z|<-LuDC?o1Ld+NTUvR6
zD%Vx6ec=G4`B5D;eT!@E_7Aa}3V$v0iCx#Ofln%W{q_2d#3$B~!U{?`Fj0*!l}|t!
zubN`ew-P!?K+gx`@t{X??$%(`heO{1x8|bCKu^P4;G2?Eh12JIW;I1QKSdp?@)MBQ
zHQsmea#vOHj_k6q8EpExz<_$fKn_{Z+_{xpnMIt7%TXS-LNLI6)@Z3Q+bVIztwLM?
z*6nIw-jdP8twqEA`_4=IONBdrj=Olijo>Il`<wF9=Yn!nrBpfF`3E8SG@A+_dok6u
z0zAngrzR?=xk@eObRf&WL~~^Y{@)P);)i%~|1ZtN|K+r7K+A?~Ij4gQE++l9ERBbW
z=sEL+3!4dSecfgjLwROsYIH$@9`8vTbH7CWxGR1Ipq62{H#CaM!s2}K2X;lA=xQ)Z
z{v7nj&eu4lH(p2*W?SL<U`!)RydxOFFfkeobx2vxk3A>(V^vyErIcTlh-p4~B9_pj
zVL`n5=LvAcm+;%^KTqa_PL6X%6F8-aA|M)*C&o$=V^N*SNMc{(rSOPj;Q%ch+6gn9
z;|M#GKgLw|;z=;{xF|2_%_~_^Qo6-;RQbx-5LjJP$npckASwK`u861|8K^mvB=LU4
zpZ6*I@S%C0&h!y*NtMOAKmrP+d!A7$(+FU<aK{7Wc{jSmdwOLMe3pzi#in@qxlH8M
z?2@K3w5wz(2Qo!-$x+#?tp?of_f_WNb#xW7`y?MKe5fQ)%nRHQt0sLeys$cWP`|QI
z%p*+3>70J8rOd2aW0P#hE>N44qJMI}MZN=ml8iui4Lb=to1z2V!P63bWfP%0YlZ{>
zeZfaob`7~zr5e%9C=usUi)h+9gbEpnCap=0|3UA_8}u2ZKX2CDvXD|fzZUDj6-r6r
z=Viq3^MD*~W++~^t{WBcQkM9>uC3JEVL+G^Qv9;MI-{?e)4oztTLZ?HCk-AV*GuCo
z={Z6|^jwA@#7*m3Av8zkw@ZIwYdl^7Dm-D83`Q<=w$q&scLng|H!t}cJ(B#!Hass2
zIJ|wu<>x{Hc}_x=C-&0%hm;UCD8jhdSU+!T(Nh_mtW|ggQxFxHuUv-3hO-b`<cfll
zrO>@wU!UU4%_6R#3<}L_uGninowG_GgZL9iDnJ?qVp&rOmotQ!rd5C!Z(d1B;oDsj
zS$IPT6e32lnQXHmHxk*FOb<_CMWmycxNBX5Otm5VSim{`^JIb67g4m-SUAJ~>Sw3)
z?4|WATRz;SMi>Fn93(~$OBzoEW;d2M$}FSDm#my<$0<Sa@UJ+LB8pK1^mLH2T@Eng
z-p~R@(yI*sKx_G<(l%22l$O}tsqRD^2US1~-gA;1+>QHj`&y6T`n(F>d)CMD>l~P1
z-@(EdFV$<yl{KgTmoxT%nrZL!5ACtMsESiZ6Dn^?jmLKuwS8;&7x6COFRcAn5*?mM
z5vP;Zb=B-Nc_QJ{3LhI&$!9`*5B_Ugb=NZFIbr673}C_Z%(MXe-RF3Q>&Xv2>@2_m
zP|P!3e0=Bd!>n$S!w?DXB9=G_>;4bnJUyrQx8gm>6Fu%T$#NfTmBW9&xA@N=v}W7~
zai0akvLFt#a4Pu>KZ9+u@Cx}A{XLEEV9$6s7nyz@#(G|cSx;*BL{V~_OtLpzVb!rW
zY={A<<K1YO=JTl)8+y&D@??1GH1oltJ4O3PKm5bxcs9!ehL>e}OAY^&TOiW3%rgay
zZ^h+Irz}DE-{%1*We#P4LQyiaq{OUN9B$Zf2SxU2@820lZ{~;#5;25u0-)ja5@eSX
zYb4$PQyLZWH_c*@S8c9TNh(veK?JR;_8Cdi>NE31y$)8|7E`;M^Bb8aH&(D3KzE+n
zqM7x{Iim194fG7;B#)5hlP9tl>|*mAV-#4Hhu@9NZ;5Kn220x+n<ag<be`6ztEKZa
zB&;_ZD@M$Nwy@~tyYLq)4O=jTb|N^2RB<SP1;3%$ugJI719&Zo31;G+(`YS3k$pTE
z?vXga^CCBD7|PxzdVK$I%5?hD?~eSz{q7_VklF#(5YSR&2wP&w0Je*j8=S-f()ze$
z*)~0+*)F*Xo=2F@K-yhRS<PVt=bthgq7iaC#&{a;`7jz`dgLxVMDNf&_JnP+C#*PD
z#VYsMVRjWugBo-2&vBZ@{quNqSraEC4ISG8&D|?`tkxv}=Xc`2qWEqw0m!zl21yOQ
zX|Kh9)y4Lpe;JKN5h&8pP0|s+)Wt7R_c|WcMWykNe|%S!J)|)Mz#ef^jD=Qg8Us}R
zA~kf4+N@dK*xnPfS&?LPWc0@~WmevJ`ee2)3U4`=7nI$9VflVHotC2?f@o(ERb~!t
zAfgs*-0&|*n|arx#;~DYq?r&84!Mxw9n9}+WAg1_bS*e^;Zdw*u35({Mk$9@bX&{V
z6K!a6It(YBeaUmT38Y#U#9Y_e2Y?uGEqzrXhMECU+OPC=i5s+IwXUW7iT;MIRnmZE
znXXbQi+3#+H%zy_9qT-G2f}Zbu4|_K8sGJC^#UR_c|}7_?4Cy6eRN>&VQe|R&g-J>
zURURIVI{9Ly(s%o{v{f{R{wfeY0Z@<bV+8vvw!rvrl8w4b+OiLl>APO_XNIs(x6Ky
z^-eY6yC>|G>MNY%l^gs=9|knfn!bIdOcvu+e}D{7&obA8E<g$x_WMyBi9WN86*YNo
z>zFZPk)tccbPSt8IcBm(n05O>Jss<e!=N>UlFvI5#iTw2pwv~r&lC6*FUUx}RP{FK
zQZDIFVs&1tyFcqw)x*z&+xP~3))L^x6caEQu<K9_Q;^`if!mLHh`yz!MVNU)9%GnH
z3<kKoGseL4lzlZY7LYKq{_TH|<61iC#kZUr@;n)n5;t}d1S##Zv@6oHnQvL<E2iIo
zliA89x#}sgx>|kg0U8e;;HC7OnejWVyRbh6uE#I&HzQ0q)sI73$dtu!FGSb%*4V6w
zXZOAP`T)!G;nqq2%y@I^Eq0E6aQ`lIHJE$$Xn9hS`cWJ@G#B<VUANoJixQ2r=eqCm
z+zNS$zYVz987J|Om!0wdUegI|4)*t6wXiID>-oV^YkTL}=}#3SM+Q>`$jJU8?^=q^
zLhqO+dp4x-CcU_$BvYtr6Eih&%eG6AE+2}ooTwTylqn~?xfxih>(4wl(~hzoVW;li
zl^OTc)b1M(>fS19sxTca+Ub|<{1_zvaVTATbbj)iL6p_OGtT?w6VH-68$tXXM5>iJ
zE6E+nVz$-)e~z*#>r`fG<N0RQ1<Hh3ifJW*h(>bA%G1@obUKGv>*IjJfH&uYEzSdx
zKqjq@oO90DcJGz=2BRaYcEXoHu~2QznxT?tDAM=Q;{QR9O5-h9%Q(HhTRxVl_@b_Q
zG%XE4as2;x-~Z6?>VH0Y`U5oo694}h*8ltujNpIrC4yg8fWo{oTf5D@*4EDldppPf
z{VH3@GT0^<#ZUIT?Dl+t=z2@3^88nOdndK<i|xG=3gHQ61_-4@#4m#;<6SIGXG;i}
zxsYzCQ$1{}EyrzLj-vAxSF?NTgw?wJ%eb%w4l_0H4#zTT$xe<qny9!+50-hZn^z0;
zBIbt}?&A+TKkf}(#~;3Cp?=gMieJLBdF|TpZcfI?2QhaK+}NgMJ5|BkK+tv;U8PW!
zhouaLZ0syAz?>JBT?{F7SU8dhDLnk#Dy_~Lqzks1u;V7(+(`KW`B5jqGM8xEh7X1M
zQd~r+H0TI3#}}S$ZEiPvd;dHk(-iCo_${X&N?pw(X`gLX7=-%A-2C!-jC#{mQfaRz
z<Ct9A)7bA9^Sa$~n43&n+pNC?WIkgPA@$An{Aa|A&SMZpDQRto_#A#na2dyN%4<uu
zeTvLLuYbPm0)DlW5YJEVdc7E@fmXbps{D9J2pUr11u+~YgOOso4$wT`2}Ny3W9L15
zXDTHf!kQiabE#nofK%$`%796gO+;t+FeP;zK<HcDdzp%o88%48eph9J4@rcSWp?C<
zh;#0~%WX%+gV(IGA(ZxOH4{6j!ZdYR%SS3~Ia@b$p8-%(NA@3f5)+Vn_XbYhkW4?+
z;v=O9nZ1yvzl`p6kq$AmKil+9iM59uTvLJ^H^sCR9Ts5O&vvCdg_D8i-ic)HSszp7
zjPXc3voGX+R}RYy;ViT=XG3`NR_we9N&jL{Ii9qEJEs(ronm<O!No-ykIDaAK0%zv
z6&X*RN!*Y#P?^LH_X9w7V8X?7vM$g+|5u2Iw|3KqRi}LH`jp1Ux=en3*ODVYFO|sO
zwGi0@Tgi}*;^6?x)xjZ4hbg)XC4))oR6<5IA?3xqm~`uMnHJQ)CguNYI6Al;-adu9
zfF5f4OZ~%zcCgk`)ZI-}&cSUiGco89lrB}Sx*uOgC{$=rmStsLJ>V`7pdc!|S4xR(
zx8hKVTnS9`nY<vG8f1=~VKA6R_M5|`e}isy%NF?^n%9y6dae~Nr=EBEbFpAxK3ppV
zR%ZGo=|#yV<tFf`&@(o|6M%PXYM&$g$MIO!yg~kA3frWCS}s^>z4O~7W!X*NIooB#
z?byS<B2ZWQ%I=5lso&HJMx=6C%6phI&ABdG(g4u|ZMVDnx^LE?Ejq0PYmmK;V6~9f
zibecAMJ{U^WZ?CRUeU^fR6S0{cb*?Jn+A0E8#2aIjj_=vU=9(i+69R6n57IbZ<|G}
zJ#5$f;iMkf5>GsMO=REjOgDsgqB!Rdyygzp+_%T_Tk6Bx-A>IQI#;F_Q;_>fTNC62
zv%>lDcoavyO^(??-Ly054PV5)!RW5S?_gr?gc3?Dh_S>QLi_2aVtrkFEyUic;;LWc
zlS+kJE^bu3(}W#u(Y<AGQGp(-KrOmSf6)Kmcr<`!3l*!$qWoQ!P^ebL=B9YEQl+6^
zoYR0`zwUqS!-S;18i0wj&&q0l1{&3U$=(4;akzKb|3Ju@{&6a0JC{WX($!4o3P-@)
zK=gaiAjs~*GdO&3zreg#1N{Q$d{EaX2PaJ?HSU^vF8b8s(j|7q(N$Wr^w)H3ZIEnX
z0Ivd!m_M(%mmu)HnD4}<P(Xn^;IQi5^x#^S#*O_-gi0;|-gX0mL5D0%v2DeuQ9|w>
zo-wirG3M}*mJ21`R>^vC*E=78JX@(Q%R{45HByELp(EREY@FBax(2$WCQ2k<Z74BZ
zUwko7bDJtE+gvg25L=Yh{}2e|Tti~~*>#h1x3dfjL0|giWiB#wo1o|L#>TGIiIVsE
z!#((o7FdPm(6YHhSX?6rGQH2OT7b>0{zNI5Pm5qaEdg^ejsZRmnI}X(baUAHy}Ly+
zZ>DS%WBlMj*yM)ANwNM_i%fAXN^g-m1&L}x;zOOD-mPBt{LCir(#%7~J*m=$uGuqt
z{TeA#oX=_~o!9mNZvZUxP0iR>z&YGq>82Z0`^WKL&fL?J8spkgFy1n?Tws+cBV))%
z^Rs$AkXEdUs*BAgY&7btCK0UT-JkJcG{!Z>KoR-~R&0SAGir2uZQHb(u1oasC)Iq)
z4dYsO8NhZ_fapl$I4VSM^d2x9+R+<a&9{OEz<IhKhe!NxCc+MAe)qyD6sOh-mIr+&
z2hyV7z0=Wid`%?DSmlw6`ge4pXCf@k<}AKb@oumjUlhC-i9+P^F3bPlI^nYVzSOz`
zI7{N&bwb1@m8OY`TI~(c)n^RM7WVwJJH)T$u93NGG9b-PpfeGty)~FlT?8VnN={E7
zx*ID*+BHrk*x?QKY0|moAklr8N5|$J@A7a{0<$ivEGKO0P6HW4a@osHXgsW1##hX{
zjyGKwOIkw*B?2q*t%=TPFtq&B<#Tnm$U9B!fJ6}M;52nS2e2I?AT8Jt`nDDpa~>^u
zN~x7kU|H1@RT<VgmF*oU6;<fsD|DuEx5zs=Ikc!?272F6W*e_$lFN(??7XFAOq0~S
zm<!OIlB&3c<;BSI+1fPcJqozaGm})1Ajk+(wYuLGO6Qx}J6*|QVfVhgL<1$#>&aE{
zcp5WWve?LG&Biip-M2rod8z)Cjn3|Oj#C_2b@43w$6C(YW43!BtrU7b*2wzMlGkB-
zgIn~mGY!@K8HQ54*j9f9dvY(RF!hapJ-N4cvi1o<v;9edqh#JZ=atfsh9KNjyiDG5
zI%{J4uhzH=>%&baelWj0gl?g+;@`cxH?J*ozgp&YHSZ@Iv^f3lG|-E8n_6_2D)eQl
zaMx6Xj%&NOlTi}MB2_3Oh0e&Dst!TKwyC1=_NI1|<IuFa+GMHBWZu_9uq3A={-_S9
zbgGdDoE%(S#^b$^3T55A$)=T_yqpSkbc!9Fmg?xV(lc?{)a7(ecHbSa{TJT?kGBmT
zdn-KNb~x=P;}5YVU>LbAK9fyx-*Z77M<01--~`{5cd83W9Im&W>s|-1%emNbzr)es
zCh5d@`L*}{z3M!#FmkC&|J{7*Y?AW*>TDeUf$15NG27;0=8?;<Hr-B%x~MzNH`s$b
zd#kK6B3tecX&sYpNE8!OnsdA#q{7Xi6?N_W1M{TDbr%-BBXG%;1}p&QR|C4!1}JK3
zA#Evh6NlK3fIUQA^UxX2X>L%9yMu8@X5SI8o_0JO2Lf8$L$W5HTHUHg9vpd&i!(4L
z_BP0FXUnZI=rJ^_Xd7}?oL&PhaDrPWWeLD+pUTh1xv+#+;mq!V83#=djVJUpXM+4v
zd6w=uqQlhYr<P!BQ%PxuK7fyEFR>#VdgPfq5BV9Ff^l*VAdS16KXmO8aO=b2rg`%T
z1&(~I-2@$HA>eG#|0qsVIlN)qVoyOJg&2FpJ)y7slmnX{*Rg~u|Kae}teFxa121LG
zsqwXCM3%ylS~e2MRM#OV!_t#H6%|nT|G7{tnCz+0cZDBmvZr~Iea!nchrZ@#yia5J
z^d*7(c%3S8ZMuG-et@R8Y=>%#MEYvo2fnXo*hW_;douNalayUVKn9Q{ZIlG~rUnbT
zQQc}KY?0uFxi})OA=-@rnyctW`G!&9Pu|s&Q5^7`f>YJpkX2njsggGhA?$izMKvz;
zTrEPObV(>!{-9SGR<i0V`XEUQ3wfK#E7XG&<>amz*!A#omJfS6kD+_X4C781h2C^S
zc%0!L(j!p)r6SR&fPrvm2L|pXeJl+tS;zzqL7*X1#X($*23(AyJsF`b+hgx)qw%3_
z^%!(^HO=-=_IDV7UCIn=MpXR0rIIN|=joFQcA=+7Wv}k5WNlB;vbOWpEPel({ysrh
z$<bgiu20jw3PYL#9ez>Bz9?j$okI3W=wsWnDP-H9tVXu|nW|*v<GmhLGu!^uRkQ7k
zZuXv)v+c*z&SX~Yfcm2E*0+O-i*;wuU2H#|?y_A*c~RYwzksEze5%^#_P>?-x&2uB
z=X=mSw;xUU#C$-dbWcZ%{-S@D*FO=%U)0YBsh>=wpPu#^;Ny;_X(^JO7`R(o)-;TL
zp%I8V_pa_9DyV8y`VJ#A?E4UpHLEKxBA35*{TO1hdi+IHel$_ppfn^dPm!_hd^+TD
zEjH;GpS7z{PuP{+lTB|~@GW5)(*cmUOXonDL<A+05QMsRI8#w_f$WGBkctVWDmi(l
zj3=rZAiYh-?W>A;VT~O*49?0@wW`G@1{`)CSr(00B`YTvceEYsNo6(+bR^?{SHo|0
zWt3c#`-f5+G>5#m>fk`?c`T2jTmI`K!fX3XzptAaL)EiM$Q1Qrf)-{Knw{9eFIPPu
zL)4bb_eIz)A#6{l_B{E!nAA&M8hyeM(#6d0(}|Pz?z#%2rFDX`vN6-@gMz4D-DIZt
zX%FghQPgkkI*UHh3QNQbTb+Wj%lhE6Ar?l)o4KoZfms#@ayzZchjW)l;qHA=c=~uK
zJbfSv%TQoJboqPXHXkSCoWz4{xVfHcNt;Dx{ofYJmfppAVVtac7q7@U=*YgAp$d1L
zsaNzm_)lk9bmQq|C?5wkh26q++DBUD9A)3+KYYY!O!5*h6ANpFOPXj{lNDb@4M!9-
z3|oI+7Zdd^YE{a!I5(<}2;(b5k4f4>$!6VlOt1OZxhi^Vbrnv&iB*c~Z8%nUONZx{
z3YYex(w$6}YJ!=F-mMyQ1Xt?dh<1||26-i(T`D_;VYzF%q>LX5g3M?X)AuWOux>O|
zkNr60=sgCGBjw9&$k>(>@gNKUolq(qisuJ{!(Zv%zlz0=Y%9`kstUuok|QS3j!^-i
zR9Ct%Dv?$fdy>8i1Lbo;l?{Zd*;9nDM?_gpnbOD4BM@=m_&cD6ce5n>52pmbU;g*U
z$p23#TtN|l;{5-O@1B0|=l}oqhwmF-^8bIy|Bw0q=av`d=JpRxc2<!k7z;!SO4Ciu
z`ft>UhIm1u?`YuoRsgQf&Ea1bByED^J4_*XlJr38y%Wtk6--U*LNr0cqGG<OIEvwn
zy+N&-x{$>KR{3CfHJFU(Muw7&%Sr*#B!I2qG3Fxa4Pfu0GZ~%|{CZ9b&VqK5@}Wab
zFS;DX%o}k{<al)+kCsW!30LX@o2Ih3>>7QJo?D~Xlr2ENv8dl!9&r710_6@L((P8Z
zUcyiNr(zH5(T>DVai7dKTsMIsmbDY2lx^7Ql}W?+9D&i5Zl4ce0Fn;Yq_WCFvBbcW
zx=x!Z`=lCt6-EN^`yqZ>CG=7CclBisCagLQs7+(PSA(IHXA;NlO{ZCf*5a4llb;Vx
zPegP7mH4H3bky8Gd9|kf9%HoIkkHd(Z@^nDtlK9u8Fleu=V<F^_@?=6cW?LP6;9##
z?#cen@v(S*aD<M3&7+gut<$~ckvKd(Iy^Yu0rEPIV})zRY6#yN*SNEKgArX(MoBlV
zYuXbjK-ul^X*!DAF{MU#i&>Oh&+Y)kJyYF6Vs;I6#{K}#soPO9j@6OutTgpwR=Wlg
zXS-e#-!-6F)V~IvAdaDf=g9?h{=7TD0M}=Obc{`3G=*4s(rBzK0q<{6Pp8MtIi39>
zVUG>~2%SKI&Mp!Av2q=xn@^l8!3;wDB|05y!j|fqPuxEp?$wkFY{&R+YUWULb{>-o
zIxk_P<_)Ysbe?pjsdP+kjmag}rqr|zACGXU`dmdC%{2feY;v~Do#I+_2JMMkoSJeL
zbtJ{Y<WdHMo_T!4*+~S2YDi|Hb|OXfyeVnqIMac3jyY-R*QY6O>!DtQ`q%b?0Q#Ac
zFp9~8v=cLy2G(9QoR8iE2yy|P1&C#e;v8UuxJO_qAFRT(S*HkJChgcPEe(!&mNht7
z>7n25PC8^R1<ii$@A7Z)b-lj41>0>IIC*z@f6xa(>q-6U>oSE~@KLGiEwW*@gv%r7
znlmR78hS~W-fHF1xeAs#r2VXd!nm>+3M;d=C7=r}q3fKZ)@%8~%NOc4Abq#hiQ8TH
zycK~o0_)uZnXP@@(w>R%<R$`n;k+AHoc#A<99^aiMNO=5cUT$OnOmkz^5WvPQAn2;
z#9rmPwps<zh>*HUo4P+hANo-owXZ055OBH}Q{}J-(*F}5G%G8s*GrAswe~+#Sorn&
z4Gg-}aI^8iQ2ZMA^|i7O-^F8b-5=a)7ZaQlcMz(I44w3%(KP`GodDs3Sd$>%)G)XR
z=dM=P^yh8h=LCXE@rrZdLd(!0N;j%2L}hMrfL##fM!RE9>IBCy&<kJ!PL(lYI2pUD
zp|c{GtT`EjW{qT~6+F~FVU4Z>$>SDe9%x*nD_(+xJpX0T0xb>D!FVvl8ci47=<*UU
zLO7ChQS(`2{3dV2SA^h%G<~^j$s1!!d^_hL9X5T(TE0Y{p0s?*vSt>+&atWc78^(t
zZfR2IwYawH^ko(@d9%2<W`VRPfN+y46A~?@JG`1`F(D-Z8nP{V@2rZbbCQ7Mz7%^y
zL5nC*bh_h(@xfN4MPw&ahS}7xfW2%O{sOWY&=tNp9FLef_J+T%ys;~vm)hU--SuJv
zzr^3(P@?DT=2#RJ=8ob=u_9JQL)Bw+7~a0JrbAcw_nUBGP5dz)2Yj*O;(bFE3*ZKJ
zPBD@BJbTxq9V1{hiq4a8n!j?V%q%$orO}e>dQfrs^!L#?TQ<|1DPEbq3fz79SDAdr
z<(fo?#wI$njiTOoP7-WtYaM4^+h9&>_nZ43oY{{`P`uYXv70Bvc-`h?V26f(*bT|_
zD%;F{&dSU+clPo6u)|uNu#ebd;QZ%ggV)YBq6-Ttcx<b^PD{6XU$J3X8_pH)m9wM6
z9Zo4A0Xj@;q_Yl&gYMvxN+QsPqE7DQ*<4>)%S+Yw3S|t*;%`YZ!LjBtM=G9t8xqh=
zCxlPZH5H7|AMl6qT?v7MdI*agCFBi>sc90&M-w+TLh{BPo|=mr`i==G@wW=}KpToO
z3c2TK+9tGuK$xH~nDjw3c_k|8q$fHPWVOJD)2e;AGGN`fzvc)h@PKi54;e)+?XS?s
z;vz0VvL-A6_a1UWSqVX)CkyONgnm=3R0BXQEdhbz2<8E_e&b)+&1n%;ZY^C$gixP4
z8%tCo<qc*<L(+9A;o495h((Uoih^L@t>E^<i*%<F#-f)tQj+i`h<V$9#_MX0!bk6{
zYAecO$?9yoi+0ji6-igt9-p&hP}r`9ufSg}`5L-a_(Jrp*?NS2yxLy^5r<5>n*Z!o
zRxPbWU%=jBk4W4Zy#%Lc1D{wQ-!W~_4CeF~ROUre6Q+Z`Q&i>;+HGAY)}GrQxGo0x
z8lg(WvSic<U&w2Optqav{qZXZW67I08ZO4$@Ci$NNl@Z5z01}*(>JWl(E8Gx6jCY_
zxMssAxMIVq#Vq!cUx#nrh$W9<0C+>9uRA^ej@^i8kmj@Cet4IdgvZo(E@w6zXpBZg
zwa+zkMW-=GqZ5Lll897;EG~wylo=%Hw?{D=wsILdkgOc&-CSV3O<@{ZXWAGYi#9r_
zA(<VEW$!b*d0<4~B7|H-Np~^=sem^v?hl{wd9J>JOF?%#4vX~&(9Qfc6KjP_b&dCx
zC@q5mQgx0*4J9FE-A76pbl5mB@#?6tW`g#oAjQxO!iu1-xhkR5ayAf{A9X3TRj;&J
zzCy!t+Jmo1rQPjsUw-a&|BGhPX74;<FZO9Q-^<_sKKT}Yy!+qA)5g;;_rIUz{qL93
z@5G>*FP`n5?QESK9A)kdoL9j@@8P`Gj>jSe@BMstK0>!*+Q7@4EJen=bBJ9BR9@7m
zX$A4qewxsAV$@mUTR+aqPq{&KOdwuiK8w<%&71*a`B66HzYa%(4x0Y3$4Lr0mWcm|
zfMoavEF2!FYzfvAJBCb_XEw}-EK0$+y@EKoR>{~l4FnudCo;l|y4&QiIwzpcjmR<(
zR*dz>E&zKzxwwc&veYZe8>;Fnds*SHm<@-bJ&yy=cDf{ArSdt(P*s+^s($_ji`JXS
zwuc9Lr8Ah|okg6ElOA@vjq5(Xlg&*#4ArDKEjdx1l<^y8+m%%!;&>Rh@sf>VPA2C|
zlGtE_-VQ)fG-0wd2B=TZ@a{NO*aA*GuE|m0K*zkjy%s$MDFDX0iAUoWtLgL<MDAV-
z<pKXKe!S+)8^%JQD^5ugKoxeQbbeJXkqRmt#TOE+B=p*PXN9aP`-6P=e-=UDWIAy-
z9>>4F(UjoAC|AxWowrU!E{8s(j%YjNtM?VhIgUvzi^U9)WQ>=g8oI2Pz6(^Naz1y*
zk_hYkQ4K{x8}B;jE2o_GW(B{_(zG?{Jlp{fmz8q~4+Fc3u9ADlgMokrz6Aab%k)hu
zRIfnH=a_*B<#Gzs>BlkNUSs~JIjbfIW>|{pT!2QMZY&&*Oa3HqSg7Pqu&?Sm%Fw3^
zYsnGd^#+I(SzU)`OJE-J@=|M4xNr}Sv7IhfuyiX(iY3rjm)&P;R$yd?;bbVMqDNiu
zic>DTV>|uw?9k2<-^%^;tM2GcDPQqYz@?7>R~iCo`aTjC%UUsym`lm6Q$WFdh*m=8
z9+XwADF+Z?h6~}SLboryq+fLm*_A-+g+~3mYK@`wYAe~)78=K=J?)-rQ>po57Rpq&
z7_dmsN)b{o?zM+^72UpO*19HKeEMKl*QV`Fmuj9_??Q}`s_L+D#Ln~vQu8%%l5Qen
zE!4d!0g-R9?#F-Vid3#C+sHH8z(%IgHK*bcK%+`Mv$>%YjU!}E=K~OQku}ZP2zRHh
z>*JsRvzBH1Hj(=;aAO4D!2Sq6caFQ_7Va+aTH&0`<Sn#Yvvt4dc8_zR?RK{~2}sUc
z23Du=uyc*{C%tp@X(!v5(8e)4u?R>`>d+{Zm-EDBhQr7n>*@oy-Ri0@$&p+w3x5Ju
zHa2!s=10tq^!cwQXt8Mh4jJ8&S?YIl_m}|j#1ilq3bT4L{+rE}x)hn|``r|uGn>=X
zk#FSeKg2Y&tW9w90w9Yey3Jn|6~VzvV$K{*zwBp3rZ`#u;f2g3iTCN~)_N89F-99@
zmmaC=GMX5~9@YUPGT4#oM~ZONmtGxd54#wd*yZ6UzDWj?6g6U_FK!R|-;7ZM7~M(#
zK0AsWSQ%Q4yLX~}6-UFVTH-^ot%ZJxXuX%|K-0TkBI_70KDu6!HQ2n?UNuUNvLL@d
z;I$eP*s4s%nkG;IS3>uO+K(bxXI_hUqE+uC<9i%!Mh?ab`4yL4f?S)88fV8-ODSO`
zaFtQxig0FeFpZ2gfAV}RFGxGh(a!P7(eBoX=yokBYZQ|z13O{dYDejKU4FD#Q9Vm6
zp=Ik}`@n>~bLKkoV?$pVb_mwlYw6ER^5WJ-^2#g*&zBcuV`C+w7SVo8QkfkW{cNP1
zqKU&Soh8rOyJp3{Co*RP-QH=`{JQdn@5g--%Qj=?;>*}cPHU8fG+9{-85r7+>>}hO
z>sCuWTM0!jx-JKl#A1Mql#X?fLml)P2~U@$WJ;Ka7I?qr4Qk7+aW@}T@NOD@pov6^
zC&jp{>919G^>Ue`gJ=cqTG16*Mtlt+L5{_18q8ggY;NRz!&pJTF`YAkJjOJ?ho!ne
zSvTopSGatbJ5sI|9c&zfh`Rdklc>vwoJpcUP7I<Vf!_qW$I&cs_>F{QeIuy`lvf*}
z+}`M+A+)GIZH&CLgZc~6u@M47R>-tYG&teV<a-BaF_0*yYrREYG=X1|vG~dluFf0O
zqJt@Fyv>$2Hv&?8*oh^pDBB*f_hZDas|u5wGDjKa%Al@?R9>VxZHSLYqrqrZ3}9zo
zphvQ6@v##ep&mkZCMHOXT>vIxEsJhhnsF4fj#A9SW*=p?rzmU{U^5{`lfz3~=i(0|
zFnprYLfNC<IqNYld&>D1G{)MheWA-kj9F^}Cxc{_^g6&O@wsA?N!21e>Mhfd$t}$D
ztgb9(HyfjJyohp8ptUFMVu(B3@p5uhgnMZL{ykSNr79rqbrClE1y@cwaFb(-Gya1j
zL<o@zk<MUsi-D*JaO{5(57VS8vyk;b4elvVK5d>FjwbzBWbmu=Tw$Sy;Rr<Nq>bTX
zzNRLVjbVnaOVF}0#>-J!Uq=cC-5~~+rPs-DXl2v#(vd6lOP_bMVDmV4RhrzOab8b!
z!?-`<eIR410{^)C(DIg#&MmX9=|=u$UM0+(Xg6}Vb&qkaIrm73&COgHMe4RYUFD~1
zs|f=UappC0!c4jx#CRKx`dlADcM^d*-#L?K5p;9^PIl#yWlfAkIwSJqnZjvq$|an6
zC~MYmWCRHpdsa)2LuwvreXzMfj1pGdmFjc@$+xQO8=4fVPC$@7c3v0m;myRLU$3u(
z<*r9r&+LlYEBG#)7V2%m=Xq0ftl%N=3(>ol8{#w<wzr3_NG(9u<oC8YKK~N}QQr%*
z`vovlj9Qp74v>%jPI0Jt-2-fpGCXDqbLb451wl#9o~gl{Mc(H9y0stBPArK%v!8x-
zzIx-(dxms#yRQ}yz_cNAbCfyFHVplKBzEJi$9*`!yT{^0_hu@8Np6sveAT=7%ZdnU
zJ=c^bK1Qd5u@sv^7fKqYHA{V$W&~xx(0b-FUv4eAms?hxgI=*UqlhZOVv5)m1kUDk
z7|bI9YYzsaPSRIWpB~&{b0!2xenmL96R}BFw3dlDqTC1<mcYq1uCp$}+9hrV|6#`m
z?~DJh^8dw5%UEh9?noji<pw+@|I-gYWc+`h{LuLRi~sLuoB!$CFHXS3Wws7p9D+1(
zytCap23FJjX(!_h9BlF+r(oQ<?4ZeJ$uy<~mjQH9N^71v&nU5qdI-lU7Jk&%yZ1MA
zWVnfA&Uh5ZUiUnHkQu~*D+7APQR@kB(n(k1Yr3Z|OXq%-hwMmU*W!naIn8r83xf>y
z_n*&m4&g_JizF-zAHu$i0W<o}MkyG*snZ)7-ERj1+s`)xj+j<%$R04z7IGB%k-|Vt
z2={^DP*Ms_SI=x2x~NNu$C~gSI9<wT8H-46(ExUozKw>Ja!BKB4%psI(w(Co+isc%
ze6TT3MzwVO5$$FU{h0R7W0$7?FK-LK%{u<^WC9+667rBkCuI)I^+#6sptnuOom+W`
zO1DT`DbKXs^tVmW3*7%K;69!HK8fQ${pkn1%{S!&gvN2!rqHRqCZxxf#1Z!Voa^;z
z>u3QG>oy3!E3^biIq7PTSDA0PTCsS&Yf--fAtg@7SQ<yj6=3h!43rBDKC{WHqxDwo
z@z-5k**hAubl<iBxYl{p;pw><tTkHg4y(D&$YH1AXb1<K%FCh*(Kjl-A_+#6`%cQ5
zGxEd5Mvc<nsPTDY{F-#oap=Y}M)SUqFG~9MS3{L^$^{~1K5);Iz?ZIHsoMo%k0RYG
zG6Y3AGb!L~oGu8HM<qLsWF3wP(D@>_Z745HqO-bWM^d~<4VCv{_!E2nnpVhCsmKLB
z%)1+Bz`xp9sjm66p+NCKg`u1=)U575k~0wt2lK)SLn)e$M2k0yrs<ps6^%7*T4hEI
zmoB7$73lkQTU=WO$x$Q2%P@HS8=8iGUE#zd0MD=Z?9J=-*O9o6@4U*Wa=QWPx34I-
z_pUV&XkJjuF!Rw$O$#IVWpN2cXC4SJ2ZK=*Si!Wo)x)Rehn}flbEeGsOrYH+$D!pM
zM?K=F*lv;AZEiMvE2&y!X!~XzhqF!$6-NT^78|)HX&G9QbaDhe%jrH3`xFGwkyo#n
zG?&0<G@6@MX>6lLok7e>GPw*<R?)uo8a|b!)7Z$$X;EQ3bU2m?b}_4|^u-$V>)vWf
zeJ>gt+N*i2!x_MGcL~?75b_AqK9ClM?4E3=0Qw9@-q^6#iF#RTys2hg1|%Z@S3s!0
z(oNQk-{3$0DJZXTPo_3lmA*eRwb@nNzQ)m#vGT<P4S5`J27<0CaD-yM*j{fy0c9w<
zKyNp37p*UY5umrH-KXqA5JZiEZK~j{nQf{kRjDMMbt&B8l|aQg+&4DxldAa9l?7G<
z7_~@mFS$l4Ls<*cFcSe1RAhisrL0s`#3<@t#tuKop_JYJ5jLH^qg_J68Y{CA5_P?~
z;aoA5p(M>O5Ro2P!Mj45;`e_Eg3>EhKvA@gk3>#ZLHAL75s%`2TW70&uN;k;mrQep
z{h5Obzk`ud_9hu%t1yR~2jG4I1n(+Fcn^M;C&=e+b!S<~2fBkkm?Y-@>tC+?{?&$c
zDEa=Iu4blmguL6n$kTP7ENHdDapSa1AML>~1`P?bhbrqFYRzYCR0=Ej#hp0CX`!oK
zX_CtJHRs$nk&#$!u1WtoJd`ojSau(6@mWlO4QcS}LzKt!fVO+m78Cue>PbScy9&Te
zJTy+nPTpCG_|3$*C4cLro-Ldf&&8UW67{_4VLNYmr%hA!tp2omcBY%{`JE#>WsFlr
zm=77K0{9JIPv;=?ZNbX+V&Jwnv4_trI_>xQ+r;bAMr0O`_Iszxca|3uZOcX5ujxKI
zz0GGb_JYt^`ED+J{J|Gl0|#AFd-e@|0u+1U89R`9zqz{Fne=*hnAEnNV6OX0khfmj
zAurO9Cyg-1isM9Itzg_$--)?$IjK`V>jj)5$LyqdauxL}tRg2MEm@OOP8VTn<4-7m
znEKkK96Sx^b=LhGy?U;TydG$x*R_>u*y$l-Nhy(@;qS?Xm0bU@d33xJy7@ZVY3}VE
zY&B1IT8De5Kke>oDDV3v-}}!Mx&9-xUv1<aHKot0w9Hp|hf^R9A9?1nXXdO{)DCw?
z0}4~kv3%K-dkb@NsoxgW*F~pS`LoOEEzDux&CSfkXX4;lVZ^hc`gu)sB)okW;M&-{
z>ws)9d`?XJWBRQGuZthk{{zL;n+Veb@?$6IQ;s)CvH!=@mG8d$!Snz4?)xV{eDVMI
zZ2dpJ`{Ms0ckBM~&e2)(WcO@`&o2J&;nBhN>DEcW8Dv`f89YO>UK@K?sg;B6$o(r$
zK~|$GhI3e^8gR+iBo;?dMQG(eLuLwg{aYAeUUaczDV*WoBmCR+k1|{;tJ1LMtaW3R
z%9LZXGL9Uh80BANuj?=nh;w7g4DPv(%*ll1*PxhBMr0|VI8jQQj~scK^6k^HkS1tL
zz)(nRt}LaP)Q*z0qYsR{cpTiKhXPEkuKQtOP@UFYa`J`uy7x-9&_LIHkLhklZkP1U
z%vtdfnKeB2qV&4PVjKW`&kXxAS)W#KAJCUd)31dDr>{AAsDomY#^VYBzfzNVdT>hB
z1#B*>7a%<7_(5xrE3tEf`IE2MufV*flFK9cdL&;nQK(EIIf+^*k988K-1suVgkBML
zWlEUzkrkj{{VS8+rG^9q3q0vrWvPJ1hrqTl+u|$vx$IZPjbCA5J9PJ^{4f#dSVVUh
z#AvL}$5)g5%rY0`P%K@B<%Xn+V>Z~5vGJI|-d--g!mJ>$H(n~^p&C9r26sQCdi2zr
zfxpi0POvMCysqi$ul@d?xH1cfehh@>KQT`k{)$ia&e7RhTJVZ^3hy&qK3&3mM>axx
zgIe3bOCl7&eBH_h$PXNUjY@V*-gjKTRwbXu+E3o3N5sWyd=^}cWq{y7RzS3L!rxFQ
z{B0fC0;+|-aoP?USqku>3H2;)SAMJ7q9Z_KbWiF{^FEip{c;F^lHGAdBBmyO!&^pt
zgk#e$UUdYS4xTyIt}bk_meGUxwO2VI53;=gWL<)+t02RQ4P6CU*FcuNTT7S>vZHJ8
z|IjTw@VR0&vXk6m3G$^?B*amzWjTaDFnPPDSd)ze7K$q@I9fG4of_a^53uNs@XpPg
z{*`w+W^z?b`m#2Uxn{Fc*~>*lLcCr_&`e#VFt*j?uHV?MDtJ0!=g)x2Kf62NQZ=x`
zr)u!ZZ<up_!_3Pv2x%<KnnfjYt;4*cE=e)OJTo(-L@BF*%Zf2bpGdKAVk|cr6spi@
z;0asl4lbWm;B9rGQGfc6&_xGc!dCFhg$6WRZan>mDRr<cp6|YVv7_9XXp?FN>^DML
zh~qUbq7?Z9t|3LJ>WdcIK6E2Pk*e5VCW$5U84`cli8t;KCYM)YW0Oi_M&kEL(!Qq7
z&9}scZWD|Vh4XPR7z*@5!q2cvfGHWpb`T1EwA*JCn;1Wg4p9lB`sk-BlLjLeDVdb6
z>PQ_JP|2gFdnf0{aNoufV5vTwQ_6|S5J!O(sT&u}3gBTKmvDX_QL>K9NdfMNZ&M(F
z))Fg|!{cgItc!8T;)ZWG8&zpD7+_)%DS;>(1%B?<UMbu!F83(jGex_mV6)_Im#mF0
z7=#9E!TCtBMqXfM(oBg|?pP{4JN88-!#MqmgIqWtF!y(bb?3O-3#`N{1#7H+R%las
zT%`yrkYWZmXf2IG2efN|ZH(E@%!l9eZgiQ>9}R9_M86G2&yq1-cB((cDyX(y`c?IJ
zl-u9Y{rZb@`!fn&`EF4cq=c0v)W9_Ea@gC$RL@z5L&VpxLu;`#Rx-bA&Y)Q$&|ARM
zK*w1NePmvTbWmG~9!>hQD##|!n;Mrbaa<B>J%Bs~L`|(@E!@blL94dh?b`hY)L1St
zplZwAHYa(>+2v}ZaJioOre^mKQ%IZREs9HzPm*5jyoGPQx0&ALc2KMCzGv`MLbqN2
zcph`jLM&noN?b*{w8)f)q8-uMCv^xudaH9LM^j(AZ%A1Iv$eum<!5g~{SdYjaWZHZ
zbx0c-Mg0^jxnP#ETR4O2bG9H-^ODV7#gkEzB0+L7?Q>Aisn9Z?)|Z2UOojpcA2mC!
zU$}|J30Yv#5z<^7AwJ-V^<mBW(DG?fas>;v8L8E|b1Y+Z(tF>9XR|^(jJ6enRwFUr
z@>Jo7Y<yM65@*fy=*-q3a%9sC>6D&11|rbX4)n^!&Uzcu!((CsKnx*+jcf`$mj_@Y
zcv9iES(L%MT$H_|tB#M&{Wbr~;r@@tnLj*h&aBlHt_SPsIW2fKEdmu;3Z0$Y+)-+@
z3eKa=3d2@ooH7_{Je!!-J{h{v<dPwvN2Z$(PBtD*bQRTf71i8>XzoEX86pYMOihC4
z(afhwGJ0V?canL36p_O^LVP1k1D~L_q2`mLZAgu*So_%gw3Leb%lK<^r6k73pr^9p
zpifWmXzddgUtDtfY-j+Wg(vng^g)Qk9VCxlNKXE%kW5x``AkV>q)2A;K}lvWLnFB?
zhUw-n#AQA?lKBgnuS$waA6`;gUN)8meOI+wuwlY-mP4_H*7o=6gXH36JZRB9$AkQj
z9^_&4KAe6Y=;2bv`{0XH!NZ&i@Cr)T97$@wRSPcg-+j5D%DG9JvY8+A7OCV=rF6PS
zJ{2uz`svIq0HCMSR3A~WcmUQuIV;F}(O5J`o^=@mnuoWdj@I#sEb8a3hO*GEJw@-c
zH~obTY*VEdd}=%OJuT{qy5zF`4#ao_e^o~D{V=U$;BeozEKK8An0~zIn`!>}fARdU
zFEFQ6$KHu$62KP6|Ezrbq~XW^JY9M6-Iw^EFZo|T^Z1_^&BH^C)FUSGqIq((`!W;r
z6K<1@^O31uJ26Htqwl&ng?Yk;U3U&nV1$$f_BE$q&2DI2we$7)DAC8RER}9va{U&S
zmX#u7<ga*qh@>9Zc|ELR$6h9J?&&y=70zyNZ|nVd41`5!>!Lf~<?_|I*KH}POPBkw
z`2T3s@hQ<=CkE_yvErx`pOdz);$CbesST=A`I&1;xoMI1VjZ$x8e4hJN`)tjOI^Q5
z_vCgUHbDcDU(fd4+*>%e3Qw&EPOX4ER%I&OEOC_p+Y#wz??60@(%4DItKF|M#u{wu
z%6ndN;G(ZGS8F<^J&Tiyi2<uwrk|RL+NgP<!UJ}C<(z!AOs>AZ^U}*17;L`Ju`cl1
zEir_%sW4y+5&`24?+s`M^16026eGLY=?%wsumx2`C@;m${{?VW#ST#DDr?_x$@XA~
z4q?;xq%jZ`Y)jo+hE5T8rVAwjsf4kJ)qC|T+#BC}bn$XHLV4D9IGqBg{K9M=jvrJX
z!GcIKp_Q>RPn#wz*2|8>Je&pkBl=yg=(|~D<?y-ow=X}Rvi#4#Hq$h@?3XJ2P^$jJ
z_fJ+l`M>e~_bXrI|1a|YXD<IU^J^X-@BXx(5&XTE!N~sXX}KMblAE}*Nn%m<^#R6!
zsWx-+mwCswMu&!KvbyT$)2G3yjnkKymkHwE&N=s&QI`_w0KdSfw^o0EVc}6)>F{@W
z0nf?W^#d7`E7K{;hil>LuX-l$W2Q$0!?QbbV|Hh&)8W6JGlkqSkd0im)wwBpar-Lj
zCuwg^CI82FCxgoSovf6;?lb31)52+=->D>CIu4QNj&Ycs0c|2$?wUvk6gmytC5<lP
z?j3wPL=6KGMpf-$b9%%A0`+7J6LAQtsb8yE)NBxIrX_dO8VuL{VkGtlO4MhePyMW}
zqIEN$Mr-2U<J@!2AF!tSoA<v%r^xiiGR-n=>pJOo(3-2a3_X!@?540~Wp%lWlkJHA
zhGuK7l{~le^eP(0!9HaQBYZFDzucwlhu=Pt1`t%Z-4|SMm0nm-2-(OG>VKOl(I7Q@
z+)XPq@~sD$8k#06V3_-iasuaQ|8fGPC&zpEC^T_7Na?vqY&~d{>2vW%3GS(MEL9mx
zi##5+m7+Aq)f0GIn;QrItzy0w^Bv3+D;cN$Ud)5goZ~-YJ`OmIsMcn08qLmpxQU(Y
zs29F282Z+l8iMJ43m^LrMQS~$;B2TFPn7cVm>8uEQSsrCm;Pd2IM@DPK~sOEaMQIp
zH~OB;6+8N5QCaVDI`f?k0DkFc1v)GK!LC+ovbuZaD?J&?NQ^BK9lK<1tWd9o(RB85
zq^ki5z*G|^9oJR)Bt2fu8cyD5VLK=Ji2jt)kH}1Z(H~X28>^RvCsix^hvUr?z>;?E
zv8vi20Dg86-@7J@S=YZN8CPr)Vc(yG+-s76Z;dK^i}COQzZnE6KO$Va;#BPtBX)`i
z{N4`z=I1gv&s{u~@F4P*&+*5qV2Wx}tA)+8)&_*FI~fisM6ua!$7y;Pjj=V7F~NQm
z^*e)}^RBwFk^4&g>t9ZUybzVIDv!Ub;0Ng28|iD(ne=XE(E+75(yWH~kTO1;W8$8E
zr^{*CZQ;X`H@;R0h~xgG2fX=tHyOSc20@%%8-6X0jH{Zz0omXSO+Mg$qv1ZW4M#BL
z>^NMrAI+v){jEk%ep@ZE5vgB$C#vm<X>{;iHZGL>;9z(DBvk+@ivY#R-;88xPI6K9
zD7PLypY+i{t%XEekFYwizm=cVTsbN12vJTdP>(~(LxGG?3rx-EE%VC@7XazPvN@~r
zWEbR=mROH;?h5s6#|^FrOIDxIau7g|u^O^tF^_1pT<|U`c2?$A3UIG0ARcuE;Y8tT
z+V&ak3JIP&Ojx*+EJ8|bCRuYUxuBf0x3JEH4w^j8b}hGKTjK061ILeTcuL4<Q$U_w
zWSgB6`@D84NFM7|oVP|i*Gr9qNUSpnY~k&8MWV`b1jd=bZcA_yu+8vpTykW0%N-Bn
zc7m*v4`2lPYv!GuxLr>NT{y4M3GHiaynk@Ag9ep;%;$5eK8`VE1b*u(cRG7h)H_cu
zCxc09KH>4=(P%I#t1WHEZSTp^RqF41rtWDw9Ot#J`jfqE7TQ;2$3ecV)xlu5*6v$-
zFHGEpP|O={!UPMw9&x5nL*i@h`z;O{@Tnd%V3DRk-+Z+pd!h_j^nx4H3+$q5GZU$q
z$Zu+D7<EGPInb_D>n$QPj+h__uSkkg`~ID69@$Q!%K^;aHjvC_p-??!lzG2vqt1V{
zL&pPYvxI(M<alsD%Rn=eCBC+O2cIYHT4R&nzT6Fbz*L!OdQ43fM)7)GUOI0EHivUd
zEqhdV)b17aCm;ZQ%Dc_fK#&7>Bx%9^vuj9TSD2;PT<3ch186IKGn@he=u{&~VY9-z
zwdLMHavwHE31ctmAEULlQ(1A=kh@!I<n`$IkEk9O8zuV8^mxoYnu3@TybNxXUT>4E
zO)}b2UQD`dD>uzQ1!^U+Tu-!U=8tw_bvms_pNl__#mqz=dm8EF{QXa3DKw2~-go8w
zV7zPIzk>hAv+kgMUEcj;ivP#=jURr<_<wx+!_zPRA7A`G{w@4JTF>?lw*EWE|3kkF
zdVGV{I5~$y7onF)<ON3Ng4_{M<+N0bq8@Ex7Ett`D5faaUwW{MaW(2Q^M!6)!?bxd
zB@$b%!I>kMKAGB8)<rm=wp&5i{?t}i_v3U-x8US>-eL&w6~AzueqEq!lo+rC!$l8R
zakM>;rKNi})gi5RX*79vpua`qf@uk7h*xDu_I9A_Pi)vd|7ZY2?@I+C_LCPUXHd`j
zuW|qtVnUfA+1c5t;O5THA;rVfXM4L_t=;{ToulW?t(}VQx$1aGYtpGm)Xs}Ph9j@=
zJQ7Q<#bE^%cwNN`Q5*)3*7Ax2R3)ie?@>tBIC7nmOF=;9cuFB={Y?RQ{P($`1M30F
z@b>`SSwS^4+O_1U{>%!#KDXB2z~o{;uGq=Z!TwLMRC~MoJ64U7Y~grk01FOfq_XC!
zR48fg&mW1kqbVcFK4sh0(Ac`{xpbsg+D@o2Z_y37He_b*&1oLa-8X<mwoV10m!H5X
z!5~a|VXVMy(STSC&L@+8|F;eMD>P=tXzH+xm)u%2x@p%$68!KDQKk%mTp9hu-9=87
zWHM-)NCzv>p`a$bwaWN4d<TfB1WAstwklSt6seGDwgT^*w$<uPAk%It4`~sI2^Xz#
zns9kxAH;kJHCyV_v9$9CsrXcK(%&`;GPUhwKSe#{e(UnTlFUG@OrR4Cj4tkE#oF0S
z;&vyRp*<#F@_6<mG9tgqWl%V|FoD(PP61dKS3qi!T%y;-sS9Xf$+}-LeXFubc%Rc@
zW8S)~prnt(O&k2RKqW(v8Ml)ZyRk0wShl4C4SW$GK-Ns7OOrKve=wHSi&C@>-*GA&
zlp*x>zqm`{ZYQPsI7!#KRY<w}-L0*~7sqE?`mWfOPEXhYQy<;)!gpjmnRoBj)dAEz
z0jUHu=$)gJimVQGQr*~Csfm-HcaL2``uOzO&PythxO=eQg11LI&F$8~{@yFUPWb|g
z`f3N0E7|hG<NeGn_!ltd(ndzwJ>qD~<-@IP5Xx%{@~WAsx<z}BZV|FNfft>`B6w!d
zM^_O15$>faWN}kcWys3s4@Sf6vit*em6_B5kLlz@SrYR)BVH-2-`5_d45>NWr)3^}
zR(zCya8>1gkbTxF%(VChU(sXi+%uFRq(eLwPvJjJ19ie=Ud+q*deS0n(Lff^OH%u=
zMyppB7J!OD!5>^WLZBTeVarvFak*B(tbS*i)C2WCQbh$i%I*aM3Reu2e@|fPM%_DP
zEK)wQ3SKqDlJut>*shdTjVM)pf=)I$JcSithe?`FbSZ<Q_+mY~VCsg^-SVM4WWo=z
z9ruQnP{2Zdg>U%6PQbUZ5s(0@cEPVFF#m2m{sv^7tGIoQRj+VBMBIq1sInZFF7yoI
zRBtf4%V?73X9MA`8x7Z0FlnP%ZSB1Zf{vE6<x+X~bHCcn-%#Z<>JZmLFOtg9Fd2=I
z#%OP5lw9G!P-3*j04f;*yat(Rd}fVwZ%X4MJWD*0H8JB4?unZSgj5uRM}t96$jU7F
z!()~SJ$mu<?H5c^Z*aq4#ER49#VC%0T_&l0={uJ{j~}_Giu|q<jiXAn?$j4Yf6YY;
zc?G?#TFpFAfjC=CgiNUZq*vbP4=6xjG6Y!S777)>r!QN>0d2a}uL&kgv1v?HLuJcG
zG0g}VQGWuz5uX{wok<(D>7l8LVku-<)-2fsmYgRvX=I0y1`B2p9MvT?EFiboz3kGy
z%~tj1Gq0di!l-N_;j$Xo^(qU8%Utk_yjNySHC&?KO`RUDy92%^3OR-R28=+3jO<hu
zT+^mJCdXxeN){Yb8fZAO)3-pFf_8r)7hz}Xbq>8m2o(3oUF}60-K#rT1b{b92}S-(
z7m6$b8~`)~s-1LdnVIVqZ6bFArnk|sB8RhE6|fS!Hfv$+L#$dDX0G8Xhro3D1MOTX
z`A<!mKIp!XE*k+Pl%+s%vI{!ff{!d>g(T=ya<pfz0*4HR+G}OwB1;h<SfF59NR=Vh
z18*07qReGc9#u|-B1oFK9LWQvm~;8`S(-ky9rpe6w_H4aF!olA!>ZXcR<U40Bl~en
zbGF7L)3M0~l$z6fH)WmOsi7Avx%$=A9q7Vs@(%eDEUc{KS>{q_85!8`!ADC0D{<U9
zhs7NQ3daVG%ejSQox!jc`L$!SarA7*FLcw;60c9?x@u_`?q)8xHYHbQ@%jx{bZ=d+
zWmmRPSD-J4_E$r6BJ=us=Myi$pT`5L#mhIixIm@3EJ~3v+b3Fhoy*+>0x4`{Ps{>R
z$U)#Wl?msG6yPWut)IP_DzK&4ud-$n*}4A6G9t1jKJ}{3WH)o~b~CqgrkQsgmhGoh
z4*`JkbFoc<&1>reSk!O%9;X59(MKV`e%r<1O<fRv@6&q^PzwJxuL(`a<yq1g;;Bb=
zAB*lK?bLO$zkTeH=d{Y~^_6oAob7XCW5vfLZ-#lHgOGyP_S3Jy@zalZXaB{`;2v%U
zHfZyr8)5JP4iy3_aDm!7+u1rfIHDWTO5WXsXHS)8RKC+OLdNDw)iU!;l`!%d^{?ej
zjg2=;4P7(dlHDHO^_#`~?dLYs5PZ(wSnejsTvOfKrKMC?Uwm6W4c``ju|B4gi!MKP
zpG3%0GRn^CEuE9kSdZf7bDhblI#UiI!<Zjobo8$<UIXezv{fpaakkMj`#qe<sMK8Z
zyd{jAM8-3?%+0?{a!W7h5noZy*4IPt>TMuQU#1u9d*@4r)pici8roV;fsyyRGXl4~
zAAEm277P#BCHH~h%U=12du2FEZeW|NhC@$PAgP&Qj{k{qJ#mn?_L$vP*JL`0CH*4{
zL$G|n2{&Iy($2etUY`^<B(Utxu#RGBRo%r)pOn{a_Ju@$6ZJclZb;DAv%AzIX~TTa
z52|&pyHJ$JG;~|*C9~W$CQQ!9N>2I(AJ{feD>X@V&WK2%E}e9j^a0659|?8hD2_Ty
zgMRmJIu@BM%HVPfTRs{>r9P?An&11Uh1<s_%hXJCt{fUu8;{3)vtvIYKZv&`$0|FA
zstmU(jf47^{6Q@g&&}is<LC>nPE^ZkI~K$b9tN+3SA|N{<}}diiopk$7x=8V<NF~b
zwM~6;a&iMdGEsd<%6e?_Qkl<CY>Ls{GE(koGQ~Ba536KIn*@;rQb@O$LP|y7*4P$I
zRB;&f<L+$XxaPY@3d*$r5l&VnOcQOAMk`^-^UW_6UuzFh=raFEA*~<M4gURxGV3xt
zTg><XK=jL|oz3-l<W_R4^4v2#*Icl-w2@6z^qOvLG$?;=MKA5fhMo%k>3RszHgw(Q
z*(~o5gJw8q7~LuU>2>Oj-zYVVG1S?UfiD}-6s9gsaM^Fj0%s3reIxUSW-2C02D?Na
zQ4d?G_Ywo3m)1p_dR#S)aA5+CbU1)PtJ_LHx0(?q1I+@#0TMMuMs#az+sK*uI&2uX
zK+3axKmPriLA)AM-A@^8J81(y&?VY~;mSc3-;<TYP!<Nfw#)2(v9Tc<QkvlIr%Dpp
zITZG+Zr@hk@1@{-n(zCV>?az=oW)?-`<*P3N?+;}aGj~QHu*fqx|Ug9W>=$8g5JC=
z78+4P%B@z0CMP$*45H(v%(Kel$XtnLph-U(-I<CVvk8&52<cSxP<0N)=2}t82ZJ(s
zkZRWaNU_*6Tu0>#BKFplaBf;edH<0za#Ybs(0JyNGE>>K>3ZhwGVpHNr2Zv5{jtK+
zXDX=K99`Z^5tW=Ey#H0FQ}@4H#|MzA)l*Xh=-eo$CHvK3;mj5-tBS(~*Q_>lo1D|e
z))!r6wk$kWn&JPSy|>?MBS{iP_ivse?YWmJfsn!Op4kONAB=G~zqP>^a8LJkd-W@%
z0?L+D8kJ<sxV`VQ&v9PtMnrx^W@VL1AZ(8Z?>W<`Dl0NGGBPqUJ_9$vdP!m{KMWM8
zl{ed^f=Z-qKeJDp+BKBn0b08EC%jOp8S`($A!&c$Mtpwo%gcTEGld<I#1vlo#~&C(
zF@oKjC*sGwJ@H-ZyVg1!EYnfi6Ibzgi2L+mKE1dA82~fT!Ir;F@+24WIGe&5w7mSV
zwNx6(H%{JDghhru#kt5c484$nt{B1rWEKyv2iZ6&0pi~KV9F0db2bRulY(j7+si0D
zx7`ZrGt%v`Hd-!|r7YNL&;z-Jw`S@JfXVbItR(Jlvo&j$(Q&hE6KIWsNA;3~bn#bj
zmq(twmyP%xG9F*hEj1gpiej9-<wg3d;9X81_51lGg%dfkM}8mylY$rsn3WNQ+v@?x
zGBwOrpr;iBQKsM(k8Br(898ZsOOhzfN`Md|8aJ5}rg8FDr|^-T6+fG8rCvFQGIRe;
zPXft67kLo4CKU$}?1bPbA$}jy<$1#@>hC;KHaN0AN))Qg(ny4T>%zlTJH;EtvWR6`
zF2^J(_Ya>{M9o@eRh7Z4;HRpaDVnu6q0P`My6RX3mfns|OZFC~KeIOix$3^)7o5$~
zA%;GZH!7PE`RWqn#YjXPHP$3g1Q|7?y3cK*O4Nw$g8Q#Oc4A!q*Z^jOWrZtty}GLN
zF)sx@#)Te$cUuXMa!pi)o7`vu^2+HjR-zziUp*$>YOM-$n#Kau73i1>Us$eG(y?x}
z7U(Jfl&WaV1gcxD1%|rite!_yGmzvp1_CUjKgmT@>Zq@r^_Gbcx8<<~CODuUoYfN9
zy~=XG=HH-WgI@})+1;Jkb~2M+m(6j0ek~^<xhLbAdM1&m$2==oByb@M*W%&!N5EIW
z<JrU}kwhgw?kpbdX&{&L_2KK2{b!w@caINtpT5|SlqX5CejmMn`?U@emWK)Uhx--(
z+v3~Q;<q7IS0yd!8W0cgr>y$GzkASjKGR1A91e6VztIU%brM*l1lV&@N?xIN>X@73
zm{^~Kvlq<zYO-_i@nXCS&1K!?+PcD>Qe+p@sjTcB?^;_U=pEV?8ljKqh1*{l_pn+Z
zlni*i$vlb2-AlNDewg6ZW(~6|0WY1XEI^VdS6A)QZquf8RY(BmIFX^<spfVncM}07
zB3zUCpElXXYW<HRRGO>G=H@WD64A0GYo|@+R%FSj)VwfTAr9kRa_#IPKamcBP$N`8
zZnz&+6jAZ3w!0g}**J;cZLzw9zQ$~h<Dc!knMe9Uou@06Z42_DE=U68EW)ay)yq_P
zn-K`Dxhqe!cq{Ns4X1mUF+Y<l+Paq1Q;LXXw4pN(<Ob?FzBH*U)am{f1hd$2l#tDW
zJNYVzr?Sk<+;wJ5If>iUe*qNjPsT}7w**Xe{g;jJzJL6^SO4Yl#*@F^*MIp2>%VL~
z7SI0g|K{oV|NgHyoPvOm$7_F!-^By5-Amwc(i+OAohy9$t(JjVdbA>z;Qv;rkmshm
z1mv)s!RAeqo`5?+Zv0O>AT%em^SY|_P@GIbf_d>;fm>TM55^$5po9}?5Kmeg4QPPP
zUJp~4I>6kE`zfs6Fpb62Wc)7aC)Z8!3y7*8#H+ZUiQPUR6hu%d^z9|cd720TQv$t+
zuRz9wmC&Dx!&n+9;MG|Ro9#|7V1tQ`Ce{9dC-{&NMYka~{^LKs6KB_|S70-kOccSi
z1a#jQ)ROXRVae`ZO94UKsd%}2x)0aE7Xto%b$s;m!L$8mV)q1oHpSl#PJa@+Pfw0s
zyguE3@hf0j{B8I6c=z!1SNvX?S)u*Wu{eOWIoN-OJs$ol;GT&9cTdHyN3V~?(cccm
z@xjS|>YiT#ZKY%iz}3Q(pD_B1aXg&JbYW90q(0VJ!f;QGGk86qZ4wW!5yogb#@gyl
zeD^*@6BVv{e3re(RIC#qPc(idD24)2!D}>W0WxI1!<IJ;=#&UXW+m(5zQFZ?$Ch?2
zNzr?Bd|%O{u04z<!Hv~BGPolc(<^Qn(+~RD54-&-f>oPF!|oaikdSafk4$kSbyV#g
zz1TlJ-rr}Hk&16ZWs$;$aP+=>o(|;{$5}SfdmT5_Bg_K?Co)bGt&#W~uj6>3fm8W4
zEa|Hlrf?be$*W-HkqC%&N?db)%Uk2llFFiry>-rU9NIgq$=fZ9(MSe=*bGynQIs|)
zY3eJojSBPjLcJcz*FTPNc}AuY-LymmtsX$LhA{V2f(V~pj5t`8H3?~r1~|eQV`#)>
zJiL&!4J15(78k4|sVq^zBy=-o`pE{VDjBNF6}DGi_b5kA$%P})VH`<69i&6)r-4Yr
z<%+cvzI53k&->ru32ZKS0&LWTdZa@IM+J4@z~f0v@J|$?@GHMF-t0UbQz_5=zHF<;
zm#P#}G-hf8P!gG-8AcxizZ_>*PJggt2?G8xRe=SVGkG+UNln@80q*Z-I2kvbKv-hq
zY3RcghxY`1VkmLbOF8Y69wEeg$fnOa3Zzz*B)Ni<Yp*xL1#(M2^L-!O%x7Joph^W<
ztV^=Wjc_;}oFO5bs%SpMK@EV)?Yl4<tOTj+o9C0><|a^1BN}#_gND&mD}M@PGZB_q
zCqD(nb3jzl^h`^GG>Pxrn(rJ8Y#t2+82Ve>rkL$J$1g`C_8qo37^V|d3cT2u7T6^7
zWLR55<&NHklA2XYa7*uos1Xz+3j>e9@E6c8MLEGK6RB$R?&}NOFVT9_Al0iOs~<N7
zYoiL&l5ON19Yi|#hJa)Ema|ystc>-V=#OonB>HQ0)#~cpgnqJpSG&%o=MmC#L%u*h
z2ETrTzu&N8^g-$lRc2}um_lKukHYbAKm@m;i7Xs%YDQVSMHJfRP@n&nytEru35Ngw
zk!7=z?m%pLkxu`dqtKp>;e?hVN`5^UK!da^c`3*M+_5<M&`m}Y5^{L2zm4BCL6`#~
z0Y250%o+%$cLB}D_y!*AO8=YF%V};&soPKgJ|#(Z_s=W4c-)K=0@1_q#pJ6iT^mUF
zL}wnEa8(kiJfxd=)OGQAmf~}cs{s;nza=9ooHd5?153=(|Bi9v%}F`-7`4PFX=bNA
z*wW`KIonH0lFQqbsRr!>Vh;G!Z727r{vNwk!6S5Z=rXkStjY1fq9pT_eLi^)LJ(a1
zQ@FTfi7E&1AtjGZ77Hm?bOpW{cIC0#&$1EauK$p9@vZ}6Ko_M(fQi!1m)k0T{PBS8
za%bP5OY|;qH=v-QBs!8{`bi8o1u4X!pfVjQVI77<5=_tCaVeh%%>>ossoaIygkfHa
zC`GqhS|Eo4c)Ho8K)Ok}vH4a_9kBR*r)|PC;PwXaG^DYgsnSZ45yg0nln`iST*9-G
zPdxEf3MK@WPNlXaM(o2K5xsq&##BhAsYsPtxkRitfue2%%-*Q2;5{wb4_b@=$rT=9
zV;bp^gsf-4mM+&MSI|a^aW>FmJYFXcF_&|9I<{e(bn(_VWxTy0lGJ4%`On_}{ofvP
z!e`mEi~M45|JgCz3x9-9MosZDx$eiqrg#qGFkWK9f|?_fQF)BvxRXqJ=Q-OCtdVi<
z9M%@kI`7h+_Z;m*fyX2CM--LaVw2%?P$I#FzNi%5v?ZvG^P$l;X>eQif}(mvLK`yn
zN`Bn}=3uN%8rBdln@i?Y)SC*Y8>uFWjF)US?@BmG><U&}fP5i*Gn|wt<<*disK9Oa
zEa}F04g#yaBwZ2pp}xU6nZY18lyh8N^*98;2G08qA?wkRX^(-T-)*y^c8Y>q)I=oP
zvGIF#6b>`V&H)CM--)~L^sPf(g$zDumzsssN0Q&KL@mRN1Bh{gS-21m#2=GM5Dzsl
z@rfBKC)sF(!|IOFc8Vte<7}Gb0A0gXz?bAI295>u8dD1s7)CStJ)>=;Xu*t*n1Ilw
zBHNfKjkW%Sj0`!Mye*!MQ`m8@(7QMr_uw8A3w=*8cS(7c56wLXN0kRIw+BNI_zG8A
zoIU*y!N;q;YEYAbVe?n|^93+59Lg>D4ZkZfeyGWWjzzdryn~7MQ4+#?A#9l*j70eC
zN)ZOF4t%z&%7P#{QT-6yq*vLPHf%f|$Ja7FI$VLKeR;_;BhdFEBrt-f2V`z+^kzwJ
zkitVbfD_mZ(~%0pn!@gdbq0z)Ba&2;a6098t~J$hQM!GPG`qI31%K?2l>2^djj>+$
z!W;K26L_yUtJ#C^8xH@4#(F!t524k2dhyB3t?bC|O$UQ(d0$qi_n(FeOSETkyh}WX
zzCKVRpf*il0YYf1<4>P(Te$Bk4QzGnYel8xzy0(4yP5ru8$D-R0juqQPuBnPmvztn
z_vFcw$M^QXZ_)nuz4$Rs`r=2punm*5c-j-&7x18UVIF*|Ha~6MaLn<0M=zBbs`GUB
zgqu2MG%MOY#8O3CA{Fx%+P`$v)Quf9X^xcBiAt9ebcQf}?|`tW8k+Av&h%}dTu*Sy
z;Z;jm9@Kf6^`?C#&6LlIOH%M=0MZ{u6)HTCnoC*|e(Rd=-mHCui}4>)-}z~co%pCJ
zMTqv!DB;Mp%9Y}tdZ7uN<5bAp*YE413-S|aY%&XJnc0p&56bPj36u^;W1MeN|6<Lp
z6Re(LA;m(PN9FXgqZ_@9N5#y&dS=ymt*TI6o?^%@cu81g=@%@sPj$YX(3l#9j6f`(
zqyNgfoU%(J3nUFij~-*tVV(ru*9iWqQT>)S_2wq77Z<<XGCbUUwd8}Skg&YzRT}tV
zebeOm;<ZRj_5x`+;vFzY{jzcSVH8y#FJO!s@r=Ms${i<R9KSIQg|crX=jUlRMZXg-
z0Z%vyfpe=%&m%>#NJxQVNa(KWC6`C%bU!@N&aW8LNOwupN>-=N_fyyhQTApnRJ+mb
zC-`A#(Cby{_C1nl<<mjb;C{Kq6^a$zXiKy@+3B8zYew$bxenu1toVF7>`s(|H%<r2
zLV(}Xo<B?b>Ev1-;Z%)ANiHb|^m`tQLK#KBe2GgN)%9yE>04wLi`RIQM?dj@9s~rY
zK6BFZFF6egjqPZNx}%BndY!cveRhqY5GqOv1kFtZUl&TMkx>r~^OFS`HT)vRM*5c+
zl2HeDf+3x?Ae{*JbOCu!4|$y=Kq;O4%mW~b81}I?o9Kddo<i968s3=b{*@43jp}+x
zz8Ogu#gnfM#pGCJ?kfYlkJn_8#WG%{+<`Z}6~%r7_?(5wd2m=nq%K`=QzPfzknj%L
z^2Su7U|D6IGH)%%I=!-ZpC(t9+hk*MS7qXPjLkQmsYP~^@DkdK=%-l?V3rxVq#}#>
z{fti#e(C!*{E5SmZmucjor1N3o9V>`T46A&3`UreKQ?0RD$SEX&pxkl@uV=R{qgjL
zXcuR+t)f+Q8s-gDo)7}~oyv)J_3i{)8viBm=nfXK;qjuNRm*dQR<s+h&sVfOKcSZl
z?&~3pFGgL=rX7`XSa!4$9k~Fl?r3Bh#>&S67(d$A(!3@NXn#X`K}xAXBX>?6e!e!$
zX`7tK)Bc2~vu%lI87fjQVTld%2q|hJ3UorSrp87n``Qt%r_l_Vn5rN|9tGy9Bpooz
zjk3HrHI!a{EdVEF0#`%0$p(1VBzc#M)+s@flIGPf8H=sn^wb|1vcK*oPzq%-@Z*zh
zy;i4Ndfw7MoiXTF>Ngvaej{V=)#yy~o*AZoWCNN*11_e}D<Atu6WuHaEZV}I!4^xH
zK*MG&)DzuACV%Y4`9vntB6nyLWltWcKU46yGhHvuE%nsno%FVNb_MoE<*X=8-dDDg
zA-dyk+dM6Wv$m+Nwwu^lTYTjSzkGcyz`68W2(?_t^^qmcCX}tSYTXJ~2%)I>s;tDV
zfCc*gjeft`;^oI_;^l?M*)W2@RyP<pN880d#sRGggJP#lq&uWfeC-W8n~Mk*x$Xf@
z<1R>JuDYR$VBEyPm9w;_d^=Mm`07*2anN5Dy?7D_P=Jria6s?Un)9c|GSS-tCU5k~
zrMV;E<i-)Ku5P(Zq9V!ka(Vs>iH+Y7;+tTl>~IbY#u-&JWQ!bFF&BD`+$LrTf&+5X
zkAxF2ROWFX*Dd$S>T5cQNrI_K>g6Q~jE@7HkzOJqmZ36Blt@^vp4mO_LU(QNxGNTk
zE6>+k&FFtv-!v(P&AG|PVIFF%xRpQk0qLc3awI>DUpW`o*;HI46LM&8qMSX%M}Hnu
zP@*#3Q4}9}7P#)?6AFk*S{E$?QMf4qdKrjkNmDQ9j@aF*Y`GKgP~UCp;>Mv&!4-_0
z89GmQmfeGVG2&>*GFgyHLyf*+sY9mF@dS_b#&U-Sw;<6Yy@li<zf~i;jqGs~ideh|
zLi4c4@!bg_71RAHWDyS$cOs31aA1X!wKb9q*4Bt~FIXtSI_V1(O90#g1e4;R{^dmz
zQ%vAn7fy0pk(|pb^W@d{edA{23bO8TZWbk({oe7LDmV}sTzF~VtD3qpOu=z!$;lt@
z=Qux{4o;P;A9Y1?2G4G;bDNv)bZ+xc8n{V(#LL?&4u^&rsoUZiAIZh*aQ8)L@2CB}
z|Lp7?od9p!eerrfa)xKp3K-_@Cs|U163cjI@!sjd;s&rZUrKYYy3N{-D~KqFW}m8A
zEQc!NtnFAH*)fJ7$wCYO2N}!7Ex+Q{*$GqA5D6KT&76pvh>D|O|9ZCzdppm@xy^xZ
zYRr1SZN^upM%0Y5RUg?F9Z}$cihq>x&L$GI-B#?N;n!NirQTZdgsEESWM}zrqc<j#
zVC`yPGsa#>WIB4FCxO!LL$tt5k|8&^cp<(I6_lILhOu{(WQ2xUXI13jHIvk{fNyr%
z>o^1WYuk=V)oxo}mXXuz>aT4+YL%OSv2k`Hb8h1tffz@-X#;xshtlX<_8|r?Q_WDP
z29c5W&A_T;ewF4$DFpQwo=xY6S-?Q(XMC!xM=HZ+x~LTU9m0aq?X=~zkg)K&jah!=
z#hOg(xagEaqUUB@!Nf-O>5yX{37+y5^s}o34mk{*j=a&^39ja{nr(Rlm_w7EH7Ln<
zNi(&OTBeCc+o))v#POxyz;0-_C1_l!ku9xcw!C|+tnd<2P8B%KZ9d8RvU6waEBgI^
z74bi4SbdseKEtC?jU<58@jrig{G=HFv+@0t`}m)4F#c!#-`5_mZ+v$j1mx`Uza3y^
z&V$3<<6k>RuS$b}3eAE+Krm8rakxsvW&A!t#9YS19xU$@7;c{AvXbBiuua9<vytc_
zR_OU-cuv3Yv1~xkpE&L9i@(9F`*}u{mas1q-2#aH4)cfU8^<}Os76TN@g7Xa&+))P
z-;OH}Ag%(OOVc#J6lV#LVj^RoK~(ACH49xaq!X4DbjdBzR7+(4fHR+`7gO1Ui+#%P
zsthd@!LB-^e+(-n|1MM!ar#$@+r<G7Ezm82v*yJ`6NRDMvCK1a+TBObj&QB1M3^kl
z2kZGZJ0o)FX45{(AxT%|wNkx4`ujASq^3S#EOI(^%?OY(&Vi_OqJu7;TWgeu*Ip5o
z)N=+x?naK4%;_`y`FPlapRLeUUmt!flg5inp1}F|GS}*Q=d`3Uoc3$_=otT}PPujm
zQUHohiW~~Fn*FVkX0{d_ouw6!Qmx?QBAQ(?F)9>oiyl~gQJQx!+0%BQV44a!gc<Ib
zWOM%s>pHV6uqde~ssJAQ0C!Mr1$Qa8EZ!e~z(^yOug9o^<cLT?vaD|(Pch?AyDfqr
z9G861@Jjxk{<IIsJ38L|aldnP{A~ZYbM(CP^yu`b&dI^E{gcki*C(f)m%FEXKN)>+
zxq?fGCCbGix9mg3P>x<HuJAChElm1rr2#vmmF~z7Z0hT+n{O!>u*J|!^7b;9S0^%;
zD!6*v%~6O}lzEej?keFB6xYF3e107I{5bsi?12X|o0~ormD_r`vvrJh%2~4VJb}<k
zr9&vh@0C_w^K3RgL6q0;krtZ1#GEOChQG1sg>`Hjdo9L=7UU$(O17!rE=NOo5w7oE
z`7mRNnK{2gfe~&a#Y?Q3i7bjSwm<AEQ(dFowoUZIMyZol;d^d7C4NVZG!|@gQz+hc
z=DW@bs_daYa37A{ho9WgL~q7ephwSH7U}Wlyd9eI9nM+ql4yO#Tm#b@yVL4dPHSh*
zX^rQZmYKusC9GJjz+iM&>y(j4hZ|r9T*@jGJGGgWgSbDJwGF?sQlHAu=9*PFh+b?}
zLa1j~>}mU5c#Bus74>SX2D2dr&5XeAJ^iMIJXV#$JX>RV4?)9Yu?KE_)O~eX(Y<gN
zGmG$~y&V%h;#nZs5h}cGcI>=zNLBWrkH%$7{PML?-$`;PrbN=-Oxn6tZ9Vg)0QdcB
z;h6W|tFd*|(X59}Eqw!Ap}4Ryz-jB4e^&aneX;t8DRrlL0z7Q#zj+Zib#T&q_?tW#
z%*!!*Ii{CF)HbnIPr9-71?+TcvY`UPt9H79*G21!Pl7H5x&&i;cHbS#cVqnyWqC{y
z?P+hmgshEd{X&BFzlNk7uFHwH$LJWjGVT6`b6ae;*)Pc!SXXx{Yww$anO4@mrIg*&
z+}S!wt!*e5tD(wSp}$EC18Uiqjn5{iQx&_OTg9%o%?4T&+!Q4Ns(z09S}>*AC?mO9
z^&hN=ro@F!1Yb|dK?X|>D(!6+;5A%lMlPrp1WT&L*RzU{kB+lqQPh+Ztd=u_lBQsZ
zUkXl<<^2@JbG5#U5hKGE@WOI$eP#0Icp>tpCu({?i51it<!l!>x(RHC$9)?aldn;g
zyPJ19FPqKz*~HG`V>50k@hyFMffQ}oAF#-g3E!2;Y<2Xo)F*l&!di-rTrpNGi~n&n
zrGHc(bqWceQRuCZd0EkAco?akzOWE{QxTYsQt62Nd4yUG(-caSV=&ULZ^QfwG|NW#
z{mRtJ-=wtr;_H)kXKR)nd3em^;e|=N{eMDfcWkBIak;eHU$C^>ua<Tl{d)g)(r!Rs
zEazO1cKf%IcC7-}1Ai~<E=svtqF22`VYmOeh209xsxJp0D>-<~<=}f^cd^3mi+f@B
zUs~7=L>jYmkIS~`lp?SMS5aWp$h9ywH|4hQTw}O&JSV*4N_fzl+WPH+a9JFc3b+i9
zs}`JN2FG}&D3=t#_FI+e&2dkG?W`SMS}PR}FQ*&q8gcq~5jPC!!`4wQZk=qj4V|1m
z-kH;vrD}rn_bA*rwE4deTr4U_XgrCuo7r5v4dzdopy=<9<m7}aDh(rF=Ym!nOzu~+
zxSDo<(z<@l+tX7B+^?p)n$B`Q`Vz~j9f?2JdRF#VXF;n0d{rw77w2+*d)tM>`%MxL
zJ2Clc-}xC?+FhMBEC2W_Ivu~*YAyNz{Fax>?yOQ?`58CXt-0a1x>S6v{=0_T{8Ej}
zi5}$*{Mk)6v#bkN9U<?t=89Xb<Oj?n_IJk@)$L$$aDmF&IX7o$bW5-KAcW%?5b78%
zZeSw0TOar;viWNK;Z>>Hx%}22)bm?+yu&LUz-KAjJr#SYqw|8SC0_r5?!GCkg{=>j
z<UY%5d%hrq+Z!PKD1k3+jqn`1Z)kT|pv!g%sWv{F5BCimv*k%qX&neRS*&|@6&t^I
z)vl+yk0{f~ARRWMDzEKkrPKE9bpA*x6>5L=+V|a7x3D{q7U%NF{<=lQBA#DJyW{j5
z6$z`Iy<s}_JYc?{aCt`#;Kls03lhmZ!R6NO60(C5fu+pTh32(c5X726=Wq3LlNV>W
z1IcH+Fv(}UAjxOE5Xon}xL`F+J65O-dJ%J;wLl+Yo^y+3y8PPYq0i(UT*e#k1+Tk`
zk-MWHxEqR7M*CK(C|!*5t=i@HThY9gDBg-IaXd>N`%0y$7ylg6)Ezk(-<dSk|D4j4
zqy8!L3$FAq)^b<BO77~H$X)m9rO%*VvLsb1WX<QjN?dDB2iAK@i@T=YOIlw~HiFMB
zX+d9)Ru=bfX+<39ERr%8OjgW9M``+4K3Y?=H-Ds=N4~VcK|QKW`9Gs#<&sXiUY<~I
zbJHB5x*8(o^eon`6w9hEBGLn#jud*4UJMakkxB2}ROJWR1Djed?)B71=s=`=Y=c8L
zp0i3H>em|H!o~kgh*|p#i1{jz5;_r04TGh5FjZ(<jxc``!2RvtYd+Gx_!L*`P0<t)
zojJwTMNjcYd;VD(Ut~avGiKsZcW0P&;(i+E{~G)M^B^goo-GhbB&9BR!$@qH4=PY-
zvUby1`AWJ2Q!x))ki*5~<|aO?+KF8TD$?c5s~L^DzOlV%20!P(D%P-^CIPB0SmI-?
z2A*B_Ge#%U5iOq%kWPZ#nK+hkuZDh_rm_GZD(7=%oQ880QO~~y)W<Fa*3bnhmd_kE
z`Blv~iuoozo4wl3=S>kD<^oqcIKO5ka(03<4WXpC+B28ZEP6Rz+-kM&cD{43_8sr-
zUV46>jFVwE`4U@v?MrR(FEZ_~ZKszY;WKUcwJ){dzsR&_to{7;a|$Ih{_XzXV(S0A
zu3i3TM*W|Sjg9aA>ev5SfBg8q{?9jC|L6Pr`aeZ3^}1I1pVu?0{{(9}1aF?N3t6#t
z#}|$%d>RTAQlD+>&-F(^w?b9PB$NTG49=_?CjNJ3)HdEnT4##(-z{sXWtN#L+l+=i
z)AI5<V0fu;`c*a&Js52lnO27xWut~;1<6L-2m&wS@da5p;KeY>fo%7URVQ3Gy0*RJ
zb;#Ut6N882>OYU!gBd>5G)oL0BIsgX;p|`+*>ee1bA*Ng3X)@c9Wtkg0B}3(vSc0=
z`%%*Jt+t~#Ib~5(`}3BNIfj-#hS6;uKK!h4fyN6Qj|t|xQS!H`pZjg97b)nRK>>iO
zNy{vFz?apwaf768<8MK|GmSFlbPHE)zfFXV?8I{@4f;0*x5fzMd7%1DmH~>Fd@tJ@
zKc~sn4Ua1PbX$FYtFtNusMcY{Km0mQD>;~N_ONoMcgwTtvz;Vz(2F=o$~VibbEM8*
zx#2-l5A<g|N#-BW{ZVq)M@cnAeo)MSM{O!kr{Z=aN0y?3NnfKXt(+Cs>{`a`6=Td+
zGi6rH0!@c11X_?T=kZxi)wguFCC5>&b`x~FSM}fDVilgZ+eC#jt?ZUqU41LbhT1tB
zCXF=6=38i)V&hcPc-msixzd(XzKMzXw#a|<ua5jjvygv7z3c0Hwd*&j|0R{ZGS%N+
z|N8{~uc-fhvVO1sz1RQ#OXz=}?H``(zdAm8_ImHMMEUDAsjvAp=cd>;xleU6K=|(f
zB%_XU3-90=r_72?+Tu-dglCP~={wMiTNWA*i!Hny&fAHEuDYt*NvY-c#bqvovRo_V
zamnLEJ`QA*-R+MqWA7#4k}}rH;UAIbd|O13({-pevlB1W5I+bLYqSXj+^0WTKN#nL
z&tD~D_+RRR&m!EzdIQUYy6xcWe0Q=ZYp9>5gQVTw@PpeX@VG+}aV9flN*Bt-;<#qF
z+in*QU}wp~y%;03!7wx&Pm`Tl0^B$mWbczs-1kO+?sAxX8GdfnhQLRy0UXZ{AaMaC
z`PTg9KQQveS0n{fCz!km*dj+)`>voutmEm>aXV0mS;l%~j@|xPzZl~SJ_UG`n6@?#
zU6SkNT85(nwwuikuErvCwgC8VgK0^WkpOlEyl8PD7@4Z&sXy(WfXKOj`0|y!P9lHY
z+)Rh*<W)QdL??Z1CXf&(m*XVvq2fz59iE-aM+BPWTRRFk>6A18YCx60-bHD=yA2b^
zaHd=e<6W({su~i7<j{Hc7$pT7L4KL_d*Tufp0fn_`FS=4X*>A<H<Xm3Ao_hcp@!n|
zGYWfn@=UzS(maPJu#wLC$sli8&<7K77UyY~J{l*eR09w>9d83b!~l>S51<D+=&!Qz
zJ3Ktk64ApWO#TT^;jl|5EnCJzA!=s>Gy3SP1p%YYj+|C8)6}0vz(NPH0M>+J2CW5&
z4te)7=}r4d)Vl^MNxRKxxT<<v`EG5a@hCFC8Vxa=4m#?$er{7!+4V_rebVirbM3)@
zfU$0pV<=ywW`R{Qnn<ohk<GN(Z3GJ%Gh*QLx|=>f+2|m?H5nu)jeuE_1@BL-o=TfG
zw9oF@(1&6&7)9%9Hi#Ra9|2hzETnidoro>K*F+->z89-)BtES|S`l)aVWcKJ*72A)
zlr0q#rC#HOHl-H`f@T0;14I*Sr6R+hAN=xie^Xptg2<N2y)l}ODNIgDg6CPkpIxQH
z3pl=#ZW{N~e^4;e58@!lF(c(-yQlrE`!4AzX$79bKKKE<Slbc(Wb)@6<;eI9#cTP|
zFuM{lfq;%-0nR}%onSvCU;+<<P2UyL4QpjUnhV<X^3N6dLf9ZDETugwSMsA3@@yBK
zqY=eOv6r0_0NYEDC{`ZrP)RL$YRl!+9%83bw~lZD$=ed71?NH$1m`3EkFj3R-vWpb
zrCY4zy&)gToTh<O&awI_ue8CmKS@XZ<grS8%;(7b%}yV~sraf7V)>E09av{_>TU7j
zWI&2sxcFn`Q!7E<i#oojbC-gQS%HG|;t5toA*Qb6;GZc<>*otSR`Vz9-YVnB&{gNH
zu>8<+)3@y0VIK##J|jP#SlDd4%}u+@WS3F6L)PQ0^Wkmad?+$}b|f@HM@Q#v=xqJ;
z?c8_HV~|hoxxi=Q0=2F)b1;ONTijIbb2^6Q|7A4Bfx%A04S~rS-rC)Dv%N*&a1+VR
zyJGi}mcy_cyXBZy*Eg`G?74csrT)dX6rN;zI2gUJ=mgUQzleS1@0_Y#rjM*U+E>kx
zxT@SSX@N`k+a1eOxGoEqmw~*IU}=yvAR2<~vCP0}?f%t@^@@@U^2((&+YUEVmppe~
zx^h8NA@}4m##zvu)ZY#)u^EIT{nii|!Yb_vU-EufH@p{7!U9zr4JuR)7(WYG?eocw
z$4u~Um+TIEyS?6N7GaBJ(aR@In<&vCuw(o^w!cfbMPbO&%ZyxbXxI@eA6`_f{~$I1
z!HO$*Ds{%)4+-ej>>iJ)#}_kud?s~ZjQ{g4Opk~V`4{7VrZJ*Ny&apkEX1pHa~r5l
z3!*6(LvNRIezfQfa5_%o;l&gdcji_o*$Z>;sgHL(Gv2c^q+0WtV{fyLct3Z#w`Xqc
zuViPdJqAeLxPz_i+5K=+Q(Jf*W+MyS<2vKlWE>B3(q<;_lkxTBG96yfyHovQ8&?Y#
z<233Sf7d!J!gSj*onWwLKl~Uw_`JNMxWfv3WM1A;Z_}3Zj#GDP@RQ8Fhe-mv!mNmY
z*O6!^@zo5>h`=*biNX%*rKp?*g7rFT(im`DjQTgW=7mBNJ7!RJyOU|$_iikVnaI|_
zna<;Be_|~D_y?=iv_u4)N+o<uAVohppR8S?5<E|P<fjtLjIQE+g5H}=`4N0E6lc@4
z--GuOi)-)G<Vxh|91>5&RYD#cc@hu$NuKwwK~P5lJ`r$*8Y_EOB@)uSlf$#^z(q`X
zpCc}JOkz{2f-7leM>(guNhz_prYiDDrtgJ|)3R&iosUd1;^m3~+Pt&&U(n!eX5yHg
z%i2)|uj697kk`EdTr*#-bI_H$lP}q@4i<o+!z8@jz#Kd<KkTMhZwB)|{xrQv@K2T&
zF%bJ<J?hyM0`;LPa?4h753&4VG_J@p%X*DeOPOgFutk38Vx8qybu-8E=fDgn*Ab~w
zoag5;d2C-K69#_Fz!RvZD5qshB@RJH4bd<T@H5i#RwhoB={#$uOBMe2MdDBpUEHEy
zLd^`XGNWpZjzZum5Xa2uhK~CVV5?r&i&-^=8QEMN$bMTaZuLM>QlmWGs~S5}l#wOL
zon)EhNw(VRIETGR|3!@pODgQfnp(Ssr0qfk>pa~m3vpZh;RiW!2?0U)%q%6*OKlCK
zjKcC=z;6IcR<Q(9u6m%B=|NkLGg@zOg%nNEw%S^9wk?)bA5Rl6X|n-TKt&-Du5xNq
z;I=rv4AC$)Sch*}pN9>x%q|Xw8xY>r996PxlAW7^voDz_TIb5v57n0+Z<or|5x%=I
zl|-9kYVJ(fMM+Nh%S{c>!(u*JdHTx{xT#{<anHvWYIH0cjaeh*rJ<4&s}kRm^tEg0
zTt!a{=-%d+TrK50a;fW>TB<;Bik?=ku(_Jc>!8gh?q0)h2g(Ggg+=i4(x}DwI_^Kg
z{-%gk69rmE$D#bqx<0B*&D<0=5%ZEduj^h87rCw_m%E#+>+#~&_4wOc*GeV(ydg%O
zNBn&t&*zN)QR!Ffh5*gT|McYXU)KHjpYPZIdLRGuEyn-+$9?=yA^+2}gWVsGP!y{U
z0t&SYMgkGK63xPKW--h~?pftUDkr0rWcne_<}49OoTq(S*i@(CPkLgSqmQbs6JaO;
z*IN875ts3M!2eZ7qG5`GGN^hMI+5Unq~UxMTW^Rp8WGyMv#YfM#l^_6XkWJW4veFF
z2}i_`AW=D_ZFs8uUsv?l)hjI(+RyV$>-)yZG<W?;{VWFX)nhv*xNOfiCOXvBZ+~Ev
zCOI&?_yVY==?>BLG{%*4vNQ%pZT;w^h7<R1+C^QrynHBHz-6HrU%3b(9z$o>gFyo8
z-fflHxQ*KD<k_pvZZ0oxT5vLtdFJLNd7CTqRWS9ltj$Q#C0#n?JJkCl<kR?~pb}TD
zi;~k#j0z0?jpRrRr)lO<CP*rE-pqNBaY^tQY;J1UKNCSoB!vxJKLi$Pv$#sGU^;WA
zT&z^3v&z((O`{(T4-lO)R62HOvksSkp`GzPn9D-vp9#p&Ul@Hx7@tpE=A!c0ipnH4
zxAdG)ANBEuR<cDBw0qs<)IBFbQ-9X%==*zxS7=AwY|EC2FUMwJ)jkJ>zxXl~{$=pF
z0Fw7_hm7R8&A5X6Yab%@&y8&lX5c(3A+KulYe@$M6P0;e8nPLleYL%+(xepba!%_~
zDMDwNmcqR=Gf-|CSWA#&Jdz4|`vH!HmWz@?a&>(@OVF%*MFbJVy70)2EkxQT0?ER{
zBx)8=br4d8mm=A5!c-I?mJQ~NE2iJ6ST@MAMb#XHN9GslkR9Ac&c`yt)U4c{7L86f
zq1>H8HFMcM@&USf<sw7y=OR_ibMlx4@s|Q`DVNP^Z~mDdRMlBsn&I%|4%&mvs;K_p
z_gZjyDyzpi@4rpIr&1qHGj{7@HKRK00(iNyfZaJW)ti6{!UkBV{M06m>$%7XF;V%Z
z$T^jB_$+2=$J63AVh_BI<<%i1YU}Gh%8R6ybmf`RoxH-XiN6GZS0B{@z>Asky0caL
zAT&B}y(;_UWms?2r=Im@C8BPUA`k|HHGM76VroD(ov`UYyb2Z8eZI5YZ|J?Z$%_Pc
zSEp;ula=CBN={a_6&G{1mLz4k`Pr)duWDd;;WnO-!t>r@^A*^CvRY~1YTAF+|FXW}
z+kZATzTddF|9q4A-|kI64u?A`xAq*B8hCC|MzY+a(q=K0GG0~c)Lu5L;-k)@$|^o;
zIb-+qV6U@#a<YGX+WC2Z@AT-nb9DIP*GPTZsL_;nRo4Dw_vOC+=;YwP_BH%wjgO<q
zqDT8x>k#4U3ZP0IpcEFr9=-0oJosh*860KLpYI><AD(s&U%%WxhMrD;eYJnm>0pz?
zeQ0rd)Oiide)`jXXK(l9wDWTR^rxd|oulWSm%FFO2fsYs#kWWDr3+;Lmsc+i_6|;8
z{MsNV4kQ#TU7Cz1QBIMsqOEegw&ZVe>vG!hw>#uN`0yI291TU%@)x5dA~2B_S*Sn;
zkOr=Y4e}hhNpjm8Wz}VQZH070)LAZ)T=^wlT_QJ&QtR^u?=aeSI2|M(CYzgBc|J;c
z$gP~dx|Vpc?W?P)_JY%Xp%_&C>8sj^^~xaMZ^y57JAT<MRMNX!#8(cP22m`zJ7Xwb
z314?SZzBU)4ybBVBWrqVcQ!ddThHgWX776b+%xw7Ti**BH$A2c;C{}S1<vho+w0FT
z3zafXKYKer15p=f^WtziP>FczvkuD116QvK4wC<U3Ov4d2r^X^sL!l1HU=<JL^u3K
zi(DFZ&+^Anqm@qwRb^Q}?_`GlEacIZPkK1f@q}eK$Dffi`LAUXXF!K#%VFW`RurqK
z76vCU|0VnA{$725|DWgn|9tw7e_YR@0hn?Bf4uSdiGTn9{`)8Q_y2D;|ND3MN&wD*
zpTGXE|N6B|4WM5Z6#-#KyV@5J3eKuHhSGd@G#Y0g=DA;(L=JGoDFJSIG)}twB=a0r
z?1|=B9M$FEag#+TXp^1{3g4YA3G?u1vOidR35bgH#Jx4J_f1wTXvSbUjNphltkU^B
z4G7it;E&X##hPvoqL(AOZMOxY8f01ZA1%6J&S(9(`%P2BY6S$LxdjPLsrQHKH-7%V
zj7JO9|HG;O{r5%v|9$=MZ}I%U5B+d?=F8pEdf)O@;MlL7kPZ%OttCS*Vfb0h{tXYw
z%q*J}-`lj_#^*`hY=H}GbD?0{cE@_s@G<OC17|RW<4mj|&jHm-VKvW}>>w`0Ov-V1
zEp~*w?Hf<nVp0&>K)KqAWfA=k8pxksA1CKeWkSeLWtBVce!+^e-2JkZEt`n=!ihh`
z3sMo{BC=JyS;<Jh!cg))!J<D_zKpwA<{=yBoj$%}a`|PPcdo|iB<aXD;s^0J{JE>Y
zz_f))+&dcfug$Zi9p&<7PH>5irAI3(;`elTG0vtVVNd$b%M2@Wz$jNjAJX&R&&DYh
zI?e`zY=|XC64exs187J^3jldAD>!3^U8=0$2Ap9FX+^@sODm76?lXa5m}^2|z7Bor
zX$>fJE$KNIozeo%m0uZm_<*~hq@z03X;sTA*Gv<Sz+AoB>O=NORcMy#PPck%d#mVA
z6&_Cb;#BUkNf#)`GSjCW!ibeyUxNP>npWJDe_}=7jg;GcM%`?EMdRj+W=HQl)pMYq
zW(q7>ib<V>>4~?z7ZhY@U+j*@@%1+E0Ir%FJJ|9c$vEo_hyP#@RBxsOMo<2G>IWWz
zc$D)bExO5|L&(T0zCKhAXEZsdIx?{r6ta3*x1d7U+#F|DIeLgb3VJ~1MOQb^;n5(E
z-0dvJl)Yv~@3Q(ruy^&Ux_8r`&95{^@8;{0A@KJFhrfBJ%93CQJLB)exv*w*=JH9j
z#7L3|_NHHkI@84b+r}HmOSMLyX>OnBeo4#4UzB1~6LA|;hpQ}Ej~=bn{HGId_R=w4
zNz~Cy+vFT%g6cI={N~kU?4gVE#1)|%g{GkP=qzebWG8oVS$B-btO%!lDe^S`nBBi+
zNA>!5+LuzN<Zm@+uU2}F`P(mMl8+fgebSjVXcd6;DBy=L9aap-<VCsyS<U3g*^UQM
z!r^HE0(cpGy%qREM!kDqKwJ0%zEcOAKEY6{f&q|xeSV`q-)uPzQFLzmJM@RZcdHw3
zFd+}YUG=f<fkqoWwQRQmej(?Xm&A-2_Mx?Oej@1YwoBf>!Rxh+HwAcl`>FR^wie%i
z|F;ks++hYPq<>SD1Upa1fY<J2(tU^hrltrP2)Prvs%bY(h7$`aqHGFXDQ%}iahYCR
z3c4Ji(j|i{z~vxsEIAA3k}@tHnZo<oRWk0JVUfXT*_$Gv?M-tzIP)ewYXqqSmvOx2
zf4Ba|OCT3TD}YxChm!lHy=`10x#$XyX%E-BkZaD_0Q@3gcR<dzfcYDalcp=VI9$bh
z=Wx8{jEE&yR?-|g1W}VJvYobA=RrL7^tF5I2b>IvC=@PWr2*Ci-;xYPP4vkbtv4Ns
z80cowAukn2eB6nkZG*nF*oYj&IANE3StAEW1VgS|q%mLzt>+#2STHe0`s~Deqr45~
zvsN12rpi~>UBQwy3NOHhhi&X=aoDwBTD^D@M~%f|c-=!O94Y&h%CVP7RZ!-ic>m)f
zx1qlEu8YBiFmc*<r9wa@2h~mdyt)KenG#SW)KEUSfrS1CMfJ>%d1N#*IuNQ<Gf@ZX
z*QuqbQdM|Uz5^+ocdYkPQG4QSzkqUuv&oXLt<bo2L-9W#_&fiM3@UqF&@G@^{txuJ
zz#Bld>>ucRfztmhxj#JbyO#Tr>4m6bS(y5VLGk}f3;x9gsu2H+_jkGkl)-SX0>S0*
zI&*xmQOIx0uXUIMmeGc@2IOljOWiZL#iH9X@Zmfis;~!n-aKrS!nU9dm~K2ov*KBz
zJ0Q3y3~*uJ!mJ~>hqj*)--9n<aI-~tu_nS_uZOW&BG2D${%WVu=sI1v6DfcHm7U1t
zbq%~L6mn7D^o~%$ewtN$ai)TMg<TEG`AqR{Km(UUmV>2XdIR&sUE9;0O59cGD9qZM
zkpxCyN(TRxO0ryPJt-vXlVT-`tLG*igk<Jx{q$V|dzetxvXCawCLTCVDViKr0m<K|
zDWC%rNx70P85NY%!Z@Miie1=Jc%x{dDq%3qX{*K7S%ZtsuJDTx*?{uc&Y~G;s-IyE
zaDUWIWv$(j^0({oAhY&%p8nc-cJ#}y@Z>Ytzs*c;@2F6I4Oo;P#Gfc&q%0wg#d6uS
zW(>3JdLy0rW>`VlT=}KK@mG2E`ss^<J(*Yj`R?9+1dSVW-3>y<BdW-W8y-vkjNyi}
zMz(L-WRt9r!4z+WXps3mCPptl;9rt+!dZgOIw+*WGvGaVtgrb;P_9||3hh?dR&j27
zfo%;P;iH}^V}Eu{+i)b63ps-JxHCNQcpTnT;i+bIEz{$H+|$`enlJ@1hXlnn4TK*Q
zN@F;j_WQd3rYwOJqii(n#bXp?j73BZ9*ZaC`E-!<Ovh#b<Z9K`D9@;m_<fxA=^ma9
zv0&0Uh^s}0mF9#_`nCx7C14Uz2Kevic=Qu{`!FKLthu7dyO+s8<tDuVS|u$fw)`EK
zoH(6AGhnx>ipIl1Je(4WE|UDA>Crh7fJSb6=r2{6eP<qeU&T>BiwoC1UJ;oUsrl}X
zbuY+eshWW=&M)dKW2B0`>s6)=t{8`$^LyZ0N_?S$YiHT}gtLGJGYR^xDU#MjYtul4
z`)@BPTkKv2Q$f@U@jFZgo8D%(&$KZX_WVq%XtTsl+ni}o{LBLXyxaT>JDS?);YIqI
z`dN=W53dDv<#{-Mu1*b4VY=r9K-TGT>3VJRWSR7?4Vfy=<c2ND2`iRC05KoY-=FP3
z3DVSJ-C=7i^1?ctpS~8@b&$#y1^>=z?Iy8J(i#6g{)a0KE*BF2v+><uHvU?O|M~02
z`hEP*ef;mgL=ce7CsPszWL}0Vz0_1#7WH_(GIN(M0`Vk88Q#^Yj1xRclHr8(oaRG$
z4Isd#Obn_@26)$3zyQ|!7^`nj%$g_%rr(nR*G+)L`v~OS>0oHo0a9H-s!6erRafBs
zU^@JBE+Y77)WUBXWSaR0Ihs;Q6Q;5m#pxJ3CFxkzyUFvco5mB_p@xHS)9J7wQ_ac_
zcl&**TnT|sNy7qY@1wug7{+=w#Tqv)@p?#lUQ$}P6O^+d{R{va(3ZebJ0B+Fn0nN5
z_E`eEEg9<GW#@>ys|;wJ^mZxT&=46M7kLBUU&=Bt=@4x*>RvZYu8girx?{*XKRqN_
zLQFp$q?FY&fz`W4JzKw@b){Y#wN<b~(j>QP;$1T1=skBn9d@bdAbyuPCOkCa$jhdp
zC6LqPH%37f#FHE)JXHiqgB--eYt41+P>QN0WC}*5l%P)OrYa>@N#9Y<5;T1Erz}|0
zN?MZnFvUFADY{^k)C0t#*5?eQm~#p{LLKEe>-SMfDY3*=V=b{a9b+Y>zEnEWReO{H
zL7b)ibaGAI`2PSKds$zq2H^i`H9?EY0`+7W3~aQH36_9-21z``f#6=0MUYw^7Pupb
zKkHQ53W2c#CxmSXt4)T8-_KyfT%f{~T1R;QZ;Jdf8&61hZ%X^-XfnofQJqPnrAu_+
z*naO~8@3`>@t7332Sf4yN^bb1^_Vu7m=1eccdBYuQD?vVc6B*)<nq9{#^`*~zcvcp
zICn6g%4xDe!$`<$i=p}oH#LHrNImG4@;=2Oo{zJE9psY+bcFHw#Ds8b#r>=JI_KR}
zTQ$m-+Yz38Ic_tAj0K~z^OET`6_1%*PIG}w38p9ubkT}*FXQ0_A}Av;1sGlm?tGHO
zy;g|z>ylv~CLlNiKy^qa;xg%vG*yiH(+k*mg9N)x^8xA~x^S{eJ216?&g9b4#sqfL
z-2=%i8>Ih$sdiowfE=~$gebi~MtC-5(w2estj@AN&LwbwzIyTc$Ad%V85UM-6z`xN
zego|k@0ZSA@WU;gn~;HIp|~G=_qk4Kg8xpXBbKXe6s$73dtJjp!9@u#OZp{?i!7w=
z$70#nFy*wXtJ?Rg8@!A^%me~$X2Q`CnuclqA<BX>;ct8PNCOv^<B<_f)6oc6jeh<r
zp5Uq@U`j-H3>z^UIPa>$bmt-q*$UHV2dP9?s+f~pnhau9Fzsx*v+CJk841{qojmE2
zkH9uFG^)@x`^4s155O2??~{%(Rd4%WX=t4yxLSKc7IZHm_Iev8uq76-CpK=q8#dg1
zV5t4G(`P`vGcT1EBE}a-{DGkE^&7`%&&l?Km#gaF;~Sj~GeVO&Om17^eyNVXx_o^O
z*FTi-*(7AyFf*?8raK(JvWesP%3S!0-49Ki=xn1GpH8FHV|cY)9{9Z0R<4OmO9_8C
zxK~ponn7MTa($qK1iE7$ACs4w=AD7hikoIfdNj;};5lEr=~4@&!b-n8PVge0xSxj}
zD#z>sF0EbB?Z<{d*!5OKqH*eyu=>TF#g)xVpX~OKm)?Bd;uSCgU?rgn_IC|ucl8nf
zu@x@dRX9!6%pzGu?|)bsy~ZCrE{DC?TyN~yYJCpzIicVsf#8KOaMzYtjPCZ|dp^p_
z?@xP}G|^H^4kY9^f+96~kx4_XURMqm>wZ9k5<mSFlHcv6AC~znAteks<EYz!QU%;;
z(06zg@>W}CAh(VGu7wY)f5K5UykVv1&P6mEV?|}9Ba_96t?oxYSXdl+HTE2p)7^}D
zzHs#7z+?55a*NW6su*)97@c6=a$OpX8VmgyK>F$lQl!E6bfQQdi@e~<Qp!>nm2yDn
zKj}qGGz(!2H;wVeo!7Drf=d=#H>`h(*$3ubJ>&ToTZS)a&D7n@+K#d2a?clIPk2a8
zgB!2>*M*8|Bvem-elk#6XQ`Eg!#I^<hf!SGlJ1`aGyZJvE)l{!^zqMxqiXDw;^k(9
zV^2YA5&B$M(aP?0BHV-7`u&P^G;A<4$^cEIcbMGbCWQ43;IMXxU;u9xaYpAH1ima}
zXt>V+9o(0IaMsuW3LYorQmQnVx`JIiG3<bR(aEj-ZOKAuU?zWqmJ1VUC8m`eS`QvH
z0g@m70#ckfp=r>3r%mZR;ID0Uxmd=?$-a6He{D-y--MohmXOI}2&6|b!4xQON)w;h
z8;Bbw>BZBjqZB=`j?n-V1^Q^>fLzygnT(*}=HV#i0Y=ZOxxoAc<wrzl!s%s+bj?UU
zq?nf*FRjw(CJo+Llq!uCOCb`FI!^lEhM`acUYb19NuKo2ON`Yr`qQ&eOM-?uXjc)k
z_{Mos;w_B?FOH&qc7bUjI&T{dRTvegADyMccziALBp!D!8)z-l4%+!dN^OOy81R-m
zmS8IGW7N#G#8@~^;syc5|0LV&HYthjc0=A}KyrGH5Vs}7Lb4)7O}M(atRb|HX!^t|
zBvHP=xMRr*2xybi0X55c1Pvk@nMmM>3-QsYp(BO^sg&%U58CkrO|>QxW=;-ELiAzQ
zBrSz-R}CpgbhB*S1N2UkTt$+ZKr$|zBw-K~e);=Z#qRUjo8mXWcV!7>$<KUIM8$UT
zMmRn4M?y3H(9_r<d=g<~;Vg9F8ox&wXq;sOa@l(<K9IZ2+78zdS>X&=iu_8&05`~`
zL&kB#NEBrPf%MaaN3kg<9OQ>h#*2|lk+r@z9h0x2N{l6WDmsb4<s<p~6mXb=WMe;4
zvp~nG#Q2%P4;98HT^daH<|H_~hQGV$n1QpV4Y*dg`npRIV{R_a&zt;=$tUA%c+p4y
zDD+~J4iC_Il9JqsjudeAjMGs+nXL8cfXZ-?$pC$r>_Ruu5eHsT5->9W@~G1IR)QQL
zCzsC54g5g8m7+Bk=NOIAo6VK1Wk{nz)JW$;V##q_BHTqZB8rsv7cMd_FzbRQO`Z3e
z$_G*(JkS9ii4-MCHg>jhK*dMVhCQv^LyE-z1YP_10BLE>BBn#Wh(>>brv)k(WiTam
zBM(+ZN@KF<+mRGz{fK2cU3N}b|J)15r5=+_8z$ZFvmJ9borrfyG7`#v&bZWt#R7vv
zb6TH|6S=%y<LTvtYKc>%gnokAl3=L@==6vGm8%DTNtdvG(sXG{Mp>axrMwql(T^ng
z2bBlckyRo-$1@n#njCL{@B>=~@9EScab%LFx^Dz!jQ3gEgWWyBGcs~z3B~feFr%@p
zzX#KS=8k~c6zfe9F$M7IMq>qsk)22UC~^)V?|8Ncj_JU8U+AN<2X@-LGES4~(2(1O
z%mao%ikt5h@)DJ_c#|InI*7k`5VS(5br?18yh3jC@~xogQ9_pJ*a@qLUKTJOPavi-
zS?NP9rydX=N+D0jpVIR>v8fff{SD5;w{O@M#QmQrcVRR;aA#p`+nTWz1GfNmw{a(2
z+!m+W7ZSbc4CkhGgsX0r%Z+`W!A!0sGgE1)HVsYw&Y~Nw4U&YR&cqmrt+;02EDLS+
z{_Rbas++NG?nh1%n$WFMJH-CB-jt#MGrjzVZ1jRn6(*DdD-qqx3{O$y%>8LEk-nRf
zdvC+6cADCUQJPnONed6qZcHgDFcx&u_3kCcB|~{7Xk_k`lD&6cE^`ve72W;G2$2hh
z-}K8wnRY6(ms%ls30xV@z_uoS5j~K!zjyTVRp;f&&wCB=#~&i{9`3X^zF%K&pyd{S
zVjRWy;+#C5CmC(UR69x2*{lcRP?CATLW1cG3=P(=pDG9R`tUXG(4TjY4|bov*pKvt
zSlS-K3b_`@rOu<kN_=ADu%T9={qI}l=Uq5wB-KRX%oh$0mpkfn<D<cwa)8bz=*3}q
zeOs*CZ3lCstBS8w2rv{s52U$wV(O=Ol6F-ZB(8zniD>5yh&l2@3*!kwr&hxbjfl@b
z9C`iHOJJEDfZsWbdmY&W``no#tV)I|SCCdv2xewwBGAjM-;?Ur<cgaYpfhtK`ko8W
z^vE3i#oKsbAdZ_~H=7P8?pgrHQ>Wuw%<%no+p#3CuD-RR!ABMX?hNENIBMDw_|_7S
z5XatD+YUit+g0Ie*xs&gyjgRfylHSq1#&6^waq^<nsLrt9}4Tj!4lBl`!t*89;Vsw
z7>?eAQOR1YZ2($#5V>n>CD`=*%5g;x|4Xz(BUuw3qBx%H2-3C&gWad9`?SFte{>@H
z9ZiJ4x~p(>w%sP)D4@kw2%gkfRKk-@s^NWNf6rbX7b)H=XzUNiZSCr<=mU+*Fp-rI
zMg`HVZG=>0g$53ZyKe(K-BLS>DW$_Qc>PpVB7LmMc51TTT9*=^`8s{wg!O(wj^}i(
zlw6A8pzgE=*Q5RIQ($QqlT1O^nGgrDpKsY?lTCkEjUdOifLANWW4D2e9&aCbh$<mj
zYSmZ-KUIqJ^K-b$2KIY369pkhez~(=&MwP1cJ<=T$)WvD7tf^^&8J;X{V6~v<_}qY
z{wf}M$_#h^Z@KMJ5fp`6eBu_APX*-^=ARXC#Z<KcM$<8B@46Z4Pl4voQyE_9CZIL$
z@J7c|qCB6JX}vvz9x2hJ%3SJcWyn@Mu~~oCc#La_!OCw80wnL07fqn}>u#4c_ZPy1
znKDnr!p;9_a*olX8YQub5oOv)G{(>!UPio12bl?6DI(9P7*MQ|E-fB1g=mi9yV<Zw
zVDrmt3X-t_tSvfG6;SD8hwz%}zfa@tyQkNRP==@zB^mo?ECL*~^V!d8So$1ei{F)3
zuK>#xG=*EB0P7P3Y%@iIGxNeu=9q&!9&1Q!avNcV7VMzNKeVw9c!D*A$%R2B*})Jv
z1l%!+s2qKFl<HU0iB?oMN-_nDaGz@dD`UI8j%opq;jv+@Z8WxR3f?*cWSVp~KyLo4
zFkY^%R=(C=2BQ(&@FU{|q8z#!VV?xo;#W;7+M5S2o9V=)9K${J4x?|c&@_*~(3H^2
zu<1Nh^m?ceSCgFnNwDT^u<+c2YJ0U6IXWsfRFHrVibN}E^;d_#nq6<yxDGS6=E44T
z$+imOxGmif7SUNa8+nCSLFJ;nR9b#zOO@6awO#2%Lb;B4%n29}y0)nx9XkBXQO`6=
zyRQyf<#rf*(9Em!a-_jr>bvTdLvfdW%TuGJ7K1j{MJK5SaQ2wT!<zgdP`kfmBk)lJ
z8l}qL#953Km!C_UAbi>mR#({%4Q88*;bF<!>jLj3&cHp$pB&+hmzP^|HJe6>d@vkM
zCptbWml{rOMSDKZ2B*@{tks%s^JO5ai*(o&;g{%^u~o0*rmdbb9X&^}BpWY$Fii$r
z^roW@%0GI9vY+W-I)X+Z=%N*f+#`Vo`tc}F*kenJGDAlPzUyQo6O%^&*+Bx-YY?as
z6PjnBufUKosMkCtGH2`ThN=`;L5_XLdC+k!`Or;96X|ls(rmE2CLrX;lk|NO6nw+x
z?ttL*)|_$`LSW>0TfG&V=)LDWOX1nu;+b?>nhhqH1jb`vv<!ZRnR@(7Pr&m%ljnwB
zh|mG@z+orI40hIi4>olu_IoEG|1y5QJC6fbXj@=`D+D&GG&jCEVwilG9F+{^?JC!%
zd71+={tAsjP+#ydsFi((m;86z0VM&9{#{MwtbYT4SGe`-QNybWrEC=(*`fr_r&(Ii
zw}G?&?VA~@Bt|NHx%}(BRRj1|8cR{WrD(lqgs-O})u*%Z2&un!x#OARGfx}VzAbK6
zfBX+vCS0>+#nNXqi=iMGS&0N_I2GG&$YHv=THJj8a;F8VNRd+3e5t|A?vw8sfjGyk
zC4^VwW+Yu4q}MX?{@>4}`wym9!s%kB_%l2vqz=X?Zl*t?il)>qG<K{CqIK1h~)
z!A9fh(Ac}xd0_zQBm6jt`}(B@(iVqPkOL^OgMGiVx!Icz2G<=-dX%E>!#6F<4pk<q
z4*tTs9UZ5U?O65FDEg3;Q)eLk8hVT6m1yl{q=mxlb@}<V&N9@aGaqq=?p>}E=(!RC
zr0+*8{u5fD2q147CO4kv{h>8HUKVWgiwYNOpAHarBIcmGEHyUH6H>jn%O;DMyRINB
zMOP5K7|KOZQj%T0Mu}gBk(KmZcftguUoT6qxVCxV69Hy;JbHFyTtZ7Y3;I(^IUplf
zR+&Icrm9odA&Ot0T%}z-L0@DrUg6GN<{jsrwGAi!@vAL%;q#ZB0Yy{YJO60bUg)JK
zIM=W{9Rrg>$7>vE8a}9j`Qn0b_1)V|=VVPAjv~){xB0f|9-wKXG4oKDEsKY5i9uEN
zRXp?k<{(d0pV^<5X4@~{g%8em&Gq6@cuemcc0*p}g_<M!Z9j>LTG0(c#$E$yPcmTK
zp&A^@CtUX71<g>ZL-K}|ez=M2T4FrHVLWgS@cpwk;9EXcH)?ZBP=!aPq->!2V!4yS
z5MjT=D+qq6J%KG7+(SlM%>I~(QQUneU4E6`f*7!rGGy{g?bRAobZdtOAV%Xm+6-R;
z+<OADANZ24M90ikwNM+NA$&e`n5pb|td_KNjZ={=?Y3NJxERRiDp^Fktsn+38vyy>
zoQxE`6pn*jMKodrqFu?cDx?B%r48eZP9KSGD>z~IN+*mxqIyGL(G=m*G-)(m5DdvF
zdo;g5+)Pe(7B;6%IaU2sV!+iF(>@AA416nNdHjAtW$)0~b6_%TAa0p*DJqU!4%N;V
z5dI7fvRwMbNzI;d6J={#;voEytDqza`C?vJE4HG?J%^~O{9B98$d2^7%5mMO>*M7Q
zXAVz;qRj0B3z)k3vE)k5{1{zjQ;|ZoW#JW0DkNB&bNZ9;>$h|A-iF(R2(6}6bkm9&
zZn|mw>@hwlxhh@j8{b2slG(3rH16JAk1fjVd(EwGG$A83VO^~XomU<X(Z@ZO*-T@z
zBsO0K;--AVA|Ljby|C)D=8v9wa<S53BVwzg&WdOqRfZ0ZO5NI)hXLJATJGBtj7xOe
zE4}nk><FchVgeC;zdBq4I@)PFT(cq8*6a<sC=t0=YsF_d0M1O0lQpwNn0Zske1R0w
zg{h`7hh}U7UU0pf10r<AUn*?<0z;k4Da&$G?ru@eO5s<HJ$MVBEj|$6OJ}PDQ#&Ct
zcJM;x5XWpn=!Nre-H$$<<;Gc;Q?G7R@@n5Af+O@)9KzjoZN3h{tNq;4whRbMCsdj<
zp`^_40iOE7n2mq>Fsy=r91w@cl_GF;sc6L(ih`Y#<hFuLq)a^9s*L%P+}-y2pv{Ir
z4iM%iGwFQ{cusLzY=p1fbDZ3qZ_IOAXSZ_ibT+mq*hW9ijByJiCag=Y)FyvgTuff2
zHC&0=+$~Jm;_*ToOOdTe*LujTQgJZCQ0;=8hoY6I14=&71(Vgr>#3xj&du0F<H@f|
zvU{3ui|JTOzKAHwyHS<nj+H~LJJ_NR&4Jh4T-4z}N7xfA10$=70`t<%3>@8@G|{=|
zz1Nf(d9G4qkwYO3Y&gcN6kx&Kb{xa2o@eN*#-71jCCUuCbQJ+!ukboe`aP*W6baSC
z`@IWiCj8F_`ByM8Bv0KL*@AtH^dk_#nw$}m`wrhN>VdP-qIHh}Unbpm9j$8Zm>RI`
znyd&Jc3wa|<y6eor=*;lIw<9wXHT-~xbUaG{#j>k3nb2%zgx+MYB7-BOFm}12J7}x
z6o09z`v_mFD#|;hI4frSrR2=c$IG&-yF;YPtbDqAda&2oJvrGwK8=`bM4ETd*+4r*
zh~N};y4)QV7TB<LF@HUJ-FbQN%l@;@vxDc)_mB4vPdkUNU+y16Z>PV$+CS-Zu*u;*
zv^YKLygu3Qoc^@m+1oui?Y!JS{psjg=jeGy7G`+5i*JwQOBcxgFRxx4>>ZrG__a}=
zKqUUjcrr|q-f3oX?_yYj%niOQc;;S|G8l4fB|T@&HaA1xa>2#x&faolx{kJ7^j1#-
zj6$hR?1AkBMynLrd&m2`r+}>em#>bFcaMKnH7%F~K38)u43v3s0i#D}i1cEJcB{@X
z10f}ibID$t{yaRtbnm#DV>e<FrnvVO!ve+oG`X5{Pd3GLsA|k0gx8}NS-ov~0{!mF
zet(8;w_TtF=gxO*4a#<to3|r5q5%6FT|1{;1Q#Hdqoaq0IIMWqsABy^Hda7^EXLUU
z5~!%$QCy~zwap4BQ5xYj=%JhF0+OjUgx6vI0dz@8<mvU#$S-HG{4a<-yd3YE?d{N!
zP$)%|oMu{{3hnE=M6F2-7{IZX`f8l2{{#ejauttEG42UE3Fu01j8k^49gutk5p%6T
zez|jsf-!03$XPH)_k(MZn|{a>q<?J!uwmGwF|o4<rWc{`rBZfat!MHCdiL~3bqdLQ
zbQcYa+uP!yTV#r${ywz6)n$=V5{<z-J^#cktNp?}D(}Pnz<T|b_F%0uPA>uXEa_8)
zpX!$QBtQlU9FLM_lv7pOL(g*~rI)2djr)04lDDvsJq%a5H_k=^%jjvFqETjrv-KRP
zVMNv0|Csetg~%KozW7x<KROmC|KID~<NarXq`kfU6T8^b{^wrJB`niKB@Myq!mP%_
zPUL)=vRvcp`xXrSMLImeN-sUUCOL`fWFxq%h5F%TA-s-wYGT&W(J)~<f7XyWSxX?R
z8@0yUa0&8ej)9-b4t$$FrakNI|Hu{AMAm73Oh;ZQV1Q9S!6ju|r%ZS>mOR4OgZ8q+
z(!Z>DTybbh*I0#%umiH=ePf-}2k{tC0*7c+<Z&UsN3NxgI?_GA8g7I?IYYGt=RTp3
zW+%HMA8D1$88Rp-)eNyR!{ywXx8W>LH@t2a$(Hut4B^12kb=|WHyarK>?}34C!h|;
zBe*Uf9KJX>+@E6$mQP99=u{miVh$OIV@W&^79^g&k@wEzEpkUO2<Z~oiEg(x9qETk
zQL7Yzn{b{x+hR~_OK`_qbh%5n2Q6JK3JoKUC!6g&dWioSv^;+@de?N{Y3DI|*ANfq
z+6STGu|!#g@wxtQEIzPRVdOOplTgdmL95!PtUrI(9u*?-MTFk?qeOb&7lQ38kom3x
znePI~^dsY0_x_a7nXz&_oo1&JcE~-SJ|#kj-O%5v_xvH5V~s%U(r~S{cczeQT)Fm8
z>?llkauDMGc~~+?*hUZP?IQ{4z;$+V=W~-8u{jmoLi}mDYcy+WvsTVZJ@U0R*p=`|
zr`X!^PBic%XY+#i{VXxpS62-FpMcK)<5DeK<18VKb$|38Ze&^9Q+aTet|mqweJ5@U
zW8fmkoqOp|L>wgr<&{A5Cna;pRDIR$q|CF#vaVGfZp`W16OZKDP@|yisKkAF-b(pa
zOFLeg@Xu!5tQkA)#^tH?2^{QBq#fJ0<tb*%`rg$x(CqAO(p5o#a0yKQj>XF1rnfGz
zpx?Djkc=5<-wPEcXcBUy#z?91=3(vnNj8+XlcfqLlL4<48F)*lgnr8_!Z}6s_&1?!
zmfnCgbJ93^&(GFPJ*LR@2u&NLx{=40`|3UCmY<fwb7A>2bqu6$s!iK%)i-5>mvs#`
z(Vgs*Q?u|+i0C!*<}M{8z5=%x#mVXM(czEbabU)P*<-2pDY=Uz|Bh1Pfp=JzqzVXS
z?%!^Qa|rmpj<R*bcd!npfhaE&u)6B$i*$<og3EI-4@Bf8f<VS*kC+i$6@YIeJ;<X|
zdB>M)FqibIg=~m6z{`p4$1<TOWO-?#o$uiA+5RtN!|S|!eRA4)y5HIDoE-dkh~_|e
z+Sn@XAW{`Tm!rL`PfPuWUh_ty-PZMS4-S9ceR1$inGop?{{k=baH{RT+&|qvKA}En
zr>k<t!T2&Y)o|=tAq1`@_>G+8b(|VTtN|NR(P|OB{^)33D&xA`n69dLu26XP18(Ad
zIqhDa$e6&>%Xk>k$4wr}r$g$vYDh^Mrj>kWXRJTkvc;8)HI-ppOEc06Frzb<vCxZy
zz2cGhS|!;*vtegak#C?X-GItA;P<&KC^|5dgn*To<{YS9fWeF9B5nd@Xp5hemNc*W
zrmCKKs9r6L<I>w|Q$O*_^iIa`X@t&+N-D!YjYzgc@)%+FK^t>O7;f?Ckr<(5v9YOA
z>yZ!F@E^APz}kB&Yg9~**Y&+hBN`9CoG5C&N?XR?QPr^~v4JhMNGEcr=(s(h@zTv^
zQk4;XLUAflL*)@Mzqc()^mD+9(~f>`JNQ!0+o>C^Vj5%S1l>VHp93Z}h(ze)O?B)F
zoh8f^PmmGVh#-knLX~z1y-IJ07Y(T7ur~v4nrYyfgFoaEWOH7~2|U?kqYGrwVA(#@
z<n@=|ZDO5xRXCYU)?#gk;%Uh^h{Yy=SubIPC;CBC55SU^@c%8L@bf%XDD&&9s7%?3
zpduaFIpv?wECr8)bSUeGkSRvvjgq0GQvzWR!~u+;9+LMOMy;Ww&frL!+yo|y31ymZ
zn~HlAbeY@7FV;bjD5N;W1v8_wBh&uLm$A(Bah9l>8`8YfjfZ4NjA8MRgGn6s49Un2
z4u)g`vgKJ|Y0*WF28@y9vWf=+gkQ{#<hpOj`v&<An$?s8qv9cOS;g8c(~+K)Hd7*w
zj-nx}fYp=>f-i<%r9h2LcLowg<l?Ojr(`1KwQ+iQsn4#ZccJ{mCj(t#VMLV#Wr-6i
zM{`Db0WT2)HqTPgHBO7p%>Lb^&a$z_t6w8SfSh%i^;$F@>DZ2rma?Q7P&;0_tz(Ry
zeP0z1S}{`r(sR@3uxpi^7fkAX+N-QNU_;JV^ue$8uu?Kma&KM0jW;6>pc@I@45fH^
zJ4$(D$?N>asyu^i|8~iXOh=n&Lh6u|$^>-fM3i++Wg9Yj33;ux82U*CQpR}FP{o32
zj$y9JhZv*r-MQN{SApH`WpIm1cEqXVpuD=l>5g8M7pd05=<R8EFHl}OAj<A!=N+XO
zXY9DuY89g2;hAB-27YTa%`ZD=n5gko@{msY)=h!npP*vk#D3DfqU9%P;$09kL-h-P
zh#BZ~KQOofB2)VH0@Jz39u$^q$?+*ZWvtaOF9iaqvp>N{0+0mc9|7~mAc~0$+HcU-
z7}Ex9s=7<KJG_!hl1DGeSY{EQ<+OJ!ug>0M=1pQ=eV{MABCXj#C0V+Ul5d`o7bwQ#
zN{*^LRuZ{G&EQq4P5^#Mc|=##hjbKSepQU5)}^>&PBIP=6Nx?w2*=tL6Ft{D`kG_q
zq>dw{EzM3AM$&Rf7Bs*+-W6^^S;a_PW(H1QVBhw#i_J4gu@kBONr+eOi1kx`p<qDS
zLbNAMocsC9NPl*P-tO#BLk>MiR00omKL7@7l71nl89{4VrGj%+sUd2crqNNq_p`i+
z9VS=wSHsy$!sB0itE(Funr~4O4U_f{hc+Rz+cjmt^!r9hv}EJwg<YS^E#DNsy%Fxs
zx5<Br<um4if3_t)5tDFv!QIn$v;2nm1!%tF-grkFf%h2P6HKU9H1AXlUi|vpZXN0+
z*Hc&VS!zC&N(ScwLJ*t^aH@!_M6+<<+cA~n%XqC4V;49l*$wsQasF;eDfR4qlod*(
z&fp%32`Mr1gKpoY8Tm1frITe;-G@>3{f!<-E#Dg%8)HhQ7!^JorBpA_7E#s-xahk_
zlGU6|&(D)FMynTc*mxGcxAyWLh(;D}E_;*XZXZPAVqTlovUF!WA)KEJN4(pX09i^n
zH=}Oqr+#_I%|f^ujUBhhsT1Z8Yp$b_1_`lmw3-mVZ0}&I@F|EQC_Sc=rC_QW>Y>5e
zci?r?Pj6GN?1b}?tOwxd#d+tLuNWHq=00*r<&ZXt<p@?+od?`uznG9H*GcKHK~?S?
z1#_vq+>w`vYiX9ruEbbZKdIfPr90qpu0v0?GtUf`w>%$zyqRMVvCD5S6liIF_rIDA
z9hbkQFA+;r5=ZoLsm$r|^mzC1<kb;+r0pHO@|+m!wq*0h9ekCM{*BV(U1Z~Qayi)4
zFU5vfqwNg4%~RD-oT;ogi@Fylm$v!*F?`;W#}=H-Q_|mzt{)+ybeoeDZ~Ll*BY@Pt
z@8ipyO4CZUm%gX<<h8L0cSx+=s~^+N@tl@TB~B_R*6lJ4VYo@JgWtNrKaZP%*1#3g
zVMnqz(zp1FOF9_+UwVg85?ixIF{X;L7!ST}KGIM+C166NX4~%A-uL0P{M0;>Tw>4q
zp88I?&d0Vj{))^mt?uP4iPqnB?g;pHlz2*u2)}k6Vd)7s0&wNOYk$zco*(@3a$nNf
z8K70FkGom|93<llARVO##?mG5@sj5orR63))mBJ?GT*qIQzHru8n6wx?%hFT6Z0g)
zQ&$6zCv^b}zoip((l_3)S&x%*z&PA_mu8iO;-ah;EAyy;+pQ8j_zf8fK8Dh^Oa^EQ
zFHXazK@7d~g%&f=z&l&SX)xkCu_9c5>>en<Bc);BUzgQuGKLw8CTH3;spjKd(M};H
zCCN_~jf{${>w!CK<$-_+rrP3dxQ+5RkZk~_G5!=qOBwn=fdB|WweelWv8|Gn<rJ7p
zy&GQeu_BJGz{Eb8W6fsQ6Zt6VqU}Is8zogOAJk00a2jt=$n4B1Wri%F+nregq4o%v
z@2jdR=f%4<G*H$}_-otRKB2XG4u5SkmcBHvqt84}J_J6P;N4J*^%pnEVVHtlDd&Z9
zRXBHQP4%*2!W|A}B^I6WM{ZuHh=f;g^YKb3)NJXdMPNTmu4{5CzF|VSrNxy{=V+pA
zp=jsk8WIx|hv5xM76hEF*@R5Aq!tQG3Y_TNqbZj8g6rm*Tz&;nDpU&Y3En{_6Y;vf
zSYZ-rl;-bmmzQ2F9)M_(XUxm!4?cdo1T{yg%3$5nne`?uLO)4f0!Sn`IMnu2l7Xv)
z?B~5qhtfVo>zhF9+e2GHOREIQq}0(lnnT2}W%_weWQO+kO;SoZCFd~Lq*HcCrXh!e
z3U>qA;U}8<TN2#g5?N7C*|BiHz}+F1UIp&X!LBGqRHl59fxZOQ?y1(2VtRq9O?r}+
z&z9B92)Q;Q$J}CJ<7$KJT-9n&g}u)9`4lr3+wDEq=2ofO9QduGkUT2e+fJn-MVmSg
zmvTbFtc=lHQ*+3tOLk(o)VQ{WG&}Neqp8hQI#in8C0#r$gkxpmty4QpcO=Ok1y7YI
z(aJwS2ZpX6lo58}XjO5BcsVE64+GZZEXJ$<+74!IPfBT3PQAeD?ZPrxXqC@wIh-O5
zq&Z92H<KXB$lBmyTg+hie&XWJtqzTMZRK=~N^<|`VAWInZTrsKvcV(fohl67@s64D
z!g7{lo+L1%o?>N^Oh32?WsJ+N<&xRDW$QpI4rf{S(%bi?+=;5XnFaD|ektGTGQQ+)
z@fplbb=T$#A+r$%fXiZY-j0iVN=Q-Wr5kQ<qxfcnU){Lh-S@kD_U`8Ar*bGwCt9``
zrx%y3d~c*Y0FBVoU}qqig${aE6%Q)|Axa2+ZG)YuA`;O^?C8Ge+8MdZ3Ceb?jyDm^
z_Q>KW3ZHr*7c6+feZrcqt&Ix!oF`&qR=mb1bH#NF&Q@+z<9v-J5kx*!4w<$d1YQS#
z0Vsp_7*;<Uh^cc;Fsr;-&TGO%t@8TIJ22_0q;@fncE{uR+PPOWpIr~*LE3F@>}+mQ
zQm}Y9aZ|87Zg506CFdL$WYk>9a3{4Cx7NZGZFyfnFH<9?s)ql=4V5s(D(^EZE=bLc
z>{1UGh^wRsZa8|X;4@KCVBUU#Y9Q5ar;VLDTR}Pi^BWaq%)v4nR|#2!Xe%VnE#m4j
z>uc``u^FiyE0sHbaH1>+tZvDEnbXHjJ+wx{y0q71otAOxrmGcbxG}5zxHK1=tj#19
zi~QtMX*Aky>k0W~tnupyZC1M)%QnM39JO;g@L_0tUetw`rI^i|UWInD86KmFEz=$!
zy@!%Y%LDM1Lg=8zTT8Q8_afB5Uj+l&o16VCiu=@#^K`3h9VzhUO3w0!7wSJdJObUp
z$j_;$f8kPy(^06ju+!j8LG4`8wwBg`wiN$ttIT6(b|GpVT0>?Kn7<Ao!6=7*qQeck
z?_^*Ii|$~_hhjX|B^W}VjHKBVzlnA8e;eq^{)XgCrR$Q^MUqBSB&~~9bIC*jJRbHO
z>`OOza=A$J7jaJIx!WRo;I2ixy|%&XU`V7r0Q5J!Od^8#p-kZhjXEXu!6|*2zXIA#
zNq#t+dmF2>bVaZtnNpkfGZ}zEovCN6gND~%Wj*=QTJrEthA-$7?ilZX0BXZ-kdR2X
zU3MpiS*7QW^=!m$fGU5ScE8X@mRJnn5;&L+@SgYNu?gQ<vRVFFXMug=SXZ7rZp_k$
zm!IqisVSfLrrpGbAMopO?m$An&{$AzE)~OLJz?M?i$ZfYbS|A*olEgd7FIgMtADCu
zoFrhiI;N5#-bybp3nPXsocFUUJr>n)IQ+-Hj3*&Sa(Wn2);3n$cVu{Zq@F4IV8H-t
zK$XAa4sArNlQjc|lY@a2W>e_hIvpdC05uSMp&D{33X1xYxPf_Z@|*rfqp&VOIaYZg
zeevBpP23-3rVu;__PC*NipVuIkY%zqhbqib4v^H*B?z*$fwXz#LnGpinj5X}8_j=j
zHC#hOrRJu|0qF2rfkM6FeB8V#iX5<@s>!kuu;elFnTu(Xn{@y1O772Y2K3sUSiY_b
zK<CB!x2l43+tQA!ohWJ@*4?^rR?V@*iWn7D8tY{+S6AO?zQxXDS7lN-k>9;}Pzllc
zJdF5|*Var0$ag;9eP@&tb`U%3jWU<svbvS;m@;8zO-#4(yS25DV?_xg)B%O+VlNM!
zOa+Z_$2|MhbkuXoI$_S)O=mTY>0|Y=hl{t>3-Y#NLEbtGvbOe?@ss>AJ)g*Gu5gCX
zz+uSVsNNNQ(F1GMK2Spa7hf|H!oJ>n+byp_bu&TCc{x%UJgkS~JeM1RNvbvrAKQCN
zA!*obhye2=OVzMo*yMf0Q$frHz>fz-KxTlHazLiVmRU(mrfu7#=wVJ_rOR+!ny=|^
z0D$Q~Fy0>bE2_jT0g!X&QXzS!lXvW=rGLBs_oe^QeSS^;?_d6!{QG}=*ZoVk_wVPA
zXayhT<L)C()#K!mA9&EZtkcIj{QJvy-_d{JpZDMIo~*Aw`L~TH>)$_F|88UByT|{w
z{`m2CfBF93#QNtzz!W*K5VwZoPwt=OA4bbjN^v}s$aXuXux(oYCR~I1XdAjp$U?NY
zx@u$Y`{~&j3q7jHHcV~=h=hw$0?%9U7~i0;s<bXedkyh;ef{6p9<OhFCw`2RzW8yP
zz}a#ZPkUnf0v@z3%!B`v4##QSIv?*SKs3mcY&r(!vWJ1P&$BUwrRP(X7EekzaPACD
zy8M^fKj4xO<M*#9F`9S*k`0JO;(bdzZmqYl`c*7_!}TYxUWlt$#+>(3IC|2vDOx&F
zw36Q93Rz9@tJGAA><klbP-*9QA}%MB(dMH^1KD*e8(%yEn2(^hM=d`WGG~Vr{qob&
zpJZfvd4na7$%>D{M0j3|v*}3K{n~k%^`?Cmps&*ZdAr!Ymqi`$naUK16<LSZ#c~AW
zVhN?(f|*c8F`+^ts4h9)#MRFUC3!+cP@yD@d<D=^=W8HC<4Kq2zd*wl4G+;zf%p@d
z8NeiVxlH;a<1E;pV!oC^f@Qtae1Lw;U07FpjUUAqGG_YsF{XjYMrn5s_(C?I>WiIM
z1R%$RFeZD8@<JSqGc0(D__!XNWmw&@Z0TgcO=$)I9X}#U*JxAaS7bS^O*9(HBRcor
ztU!~AOyOn&Rtia|Se=hk8RG+Oy97%k)vnH;oeFbmVbA*+Ce)Hj11absj$|<i%E_=z
zO?J%My2alQWI7tGaFE8iahp&hrGe;zB3epf2~1da$yvxCUzJHm;l#wbQ4g=^9Y@Pt
zSe0_we}X72*xxKgq)cN&>K{2JbbFyvhi5XMguaRqU86F|Aq!g+;yLaZB)w2np-GyD
z0wT(CJ7oFSs8#C;@W(yN+)YYAglfKQ`ADAw5218rvtY%&UPo39$c7!evYqt26O%SB
znWWwAg34F${G;{PI}FDq23u^<8@<0@^-S^aKy$rBVGB7A)!7PuCzqCe!RuCV%CC`@
zUj3+@OJ6KIiRWc*Je>sdIJ?Ss$&>_V<Y_uc=muFz9EIjKv=ny8wq`#n#E?oE#uM^t
zfc9&cL8fI=OTDG~lg?=_gW^>xq#j$~6`kGd_t+d$R98nz1oAtTdgk3QyBhl4HXUFx
zPfeZ}F$BTN#gQhicxWp4+BB?|S~fRz5@h~N##S^z-syWt5~yl%(F{9Nbx>$ty#z(*
zRZAe#HvKEELXlKURzW+B6<#{A!DQu$vG@ly=-_m=Z?;qB_%{z7>|jjlq){LTxy~m2
zbRY`Y$O35#*3xO<pr7`$OanCuQ5lJXvh=TBN3uo95dc*<(1H}wqBRbzU2&J#9rQB(
z5aMH{O91Udn=Y`k><2d?kI=YoL<S{eHY=``thqc;W+DPDnj7fb5kLdHDIp=fQ^!*Z
z$VyU*!<ZCVem1TkBF&KiV2}cIz__N@jTRlRm@_d|RnjOGq61W^(R)cuc0IaR#HuUp
zvK(={C}v=yE1+!C5ULiQ1C@&LR)3BzRV_w}GE+b$1E(e-6q5{@J5_G+esfMB<^RfH
zlR<oq<Hi3Vt35-32}YJqWEM<vR(9@r9Q!Kwq0L)J(}9hL8T^O~+fE<qtZ?-6p~H+H
zS*l}Mc4>0UEKh>?0Vr)#tT)~0u>&1_7dZK%ARRX_6)U_feAKAkQJV^+NldNa6iC0J
znih78-C%V}BMH&@DbAmzXt>+$;%!rL3YmeTvCLml5(<SOG6!Yjck!5PvH5hkx3^R%
zZWYW~uMD>`%1qt*_?f86v__2@7XpkJAQO1p-)0HKwd8$3K16UKEN5hUSU~*sFdknY
zjkXKv-T?mF=>_I7H%Z^%R%tRtsx%s^T!@ijcgnjf`=<JSpeo4n3xb<lQUf(ZFptW<
zsk#wum(NTMh@5PLeYiOia(M><2GGOQ9T~9z8H!=70&b(yi;qbo6WX<AQ*R2Lh1xB2
zw<BI*YK<e(=WW%i8z!&O@;E`7eTXJ?At~b_jd4r4ub>dY)dytvmyU$=jXYd(bEl3-
zj{%r(N@WHzNU6?fAULWR&hnyb$#=qX3z{BLp0x!S%s5hN;wVKhzbFECAw#RvqK*Uw
z8>uWZ+iJ=1(jXlLwjXxHdaKI}NX&~GGx`fN0iT_DY3`fC)xzvahfbe-oe>@<csfxU
z;Mn9RrZZc6gesBLiUFjq9nwwK8$Pj{!)d=?&T!}|!TC-2UatO&Mm_EF9-N4ySo`}l
zg>8pkMLkkgtLb=ybUQ3cg+<v8IK<Gm3|4lZ1FFXL`d2E?ga6v^yxjdIsxh2KmE3v*
za696JhRf&2%Pu%fa#@&M4t|N3m12o0_0ftf;@Q}8t7ry5hJn=t6RMKQOFqniZ9m`k
zRZ_ZV>e^Y%qC#02p_u^}PleJ7h_2OQ@AWYy0y{Z9I^O+pzeDQpy%)PDC;C_}Xt$l*
zR>cln_|J~^I<F5gb=U5*XGe!8k<294JbKk&M1+*RJ=&quL1x{y`P0!7aFN*J^H^z@
z<qwy!jYqFoW>*SKR@QQx?$=>0D8tL^X2=f!c_1dpg4Nu9G}^)aRGih*7e{;lsb<wV
zJVI~q!~J7G$l?AAXI4DdvYnqK4dj7%u&vRhf=U19WLPO!bL5z%>0=7>O5re|7M+vn
z*-LHmQMH@JSwHK(v$@;zhjpjJrqVfh9_<|+K5NW1oh!VM`13(9Me`J`1Su_%mp9o7
zhS~5f1eKF|%`V*n&_HfkGbr|#=C6%8*Z(H?Rr^=giBlEe9=2MKq)9A)1SC4hhFreR
zTbB>(G=G}bXfHQ-ZSbslE$~lq4PFwh*mma?=GB@N@%*+5hf^kvma{mwoxkwKx7ABt
z+1g!kCKu_AQ&go$B3FgF-C~C(<|8kjismFz`P}s3RnePL%g7ED)KG_NETtXF_q96Y
znn#mG<9FTriiFg1<Y%q$Rqui$RDqb@2=$ai)d1(#Y;kQC(^y%%=ujr&!;5_nede5U
z@Ty-sLrN(wN7`<af=-SWf3TF@W%y`@HGn>vRJeu;k1N5H-+$?flE1x@M)kFGKo`dz
z^gSep05fJ(FgHt3NXMP|B|%7GwrO$^_{*V>3_U{@$8-`T1Fnd1GJ*b}!(%0#Y^t9#
z$eB-G`stcUW5!}wbaFn(9PH4gyc#DoyZI1;BP~M+OM^W(Lez%ebns(DQbkKT<C_nC
z(9!QS3XR}Ah{x}C?S~&(TdEF(pZMIxbn6SGG(Za=I*;K{N1%^ZD7&~$vp`zSl}8%P
z?QO2|e)#m7x~(kuJx9s((%$B(Fg^>IePO?<SxL5X&@tus<27YNJ<SE1qC_OzACn<E
zx6A_XIM*zw);eOmP;b4UL@`O24=GKpkpI~#YaUD}E5Fei75QtgpTxtYXUbP%h)F?j
zSM-=yqZkgNK_S_qYhkp0(N2duwDBp6>~PQs*4YN0fYB$IcW_uJO^p_kVC`!oPw>Ew
zVvfz(0}LeI)I)B{nD;=&R=?;wbt<+fNutfUxkWkH`;B11^L4-2IX$+?H3Ox#Wlr~g
zS@mr`h=CEM0z(K6AfHjQyE4K3ozV$}530G4b+Hkk0t_;~ODb5Oiw2O8T^ewR*ja+I
zLg^*+P|}imm_-#htbvi}1B03|Bny*KfQy5mb%5~~-vU4d|1eS|K!<acey`IRmWQVf
z8|AbeEE<2K6Ml8xL-)o#t=*)j1x)N&8egD>|86H$r<VmxW~TGo?PpTYzpn88O53YV
z%tusc{iv*UoeAjb$)uBcHO_kQgUkrlj8PC=9Q{qvdNPtw(0Jli=Fs0L6V(m<T1xyr
zOPrpYchZYj5I}Zy<DSS$r_sGrJz_@^uhWzdwW9fsuqLBVbL_8BIeFQ!`KW@nd_<mV
zFWoNtMv-?Jm(#mR6`(Z6Z>2L{feW_eC$;A!`%}GZifR_ynf*GGak*o3?L3wJL`Je|
zib9fdR!UgU5p(#`ylZWKdCdvT*(s87<2m;)D{e*O%e)arM6mM+fVCJh#{8#Iw-naC
zJ~C>qd~+9_>YtAjO`T!1R3FSf$p(i`#^F1s*?L1<AjR^zO8fWcCfV{Se<6Z(wS-P&
z6zb_-iJ!!)|6bcrDSLj>t}x1OKh~}h(wEN^KUVrYIgyj=!5~2k*Q;bam`=!~<uBvw
z5g_W17ui)Z-oc_fcvpIQt&qmPQ}Vax1JU&AUjeLvLp`p%eY0V4;-U_2FSW1SNL6OK
z!oa7OSufcs`XZc;(|C9>#i)%mxpI9Cn(`ye$i&)2MUG%PnhsH!B`eU1*P|Dik=;&`
z{<(5aLm*so^PIT6mIlQN9!UUr`q%83Twdz*&Jpa7!9$BTl9@wM7+-Sawd#Rw)0a}%
z5IMApZ7X*LW_RrNd&4ekit5T;te4G_h%~JRr&1Sqz3sreU@mRav#58NjR#R#o1uJJ
z^7gDhP2A6y9M?@oXA}L5d*%chMn(23r&{Jn=|J}E6lLF(c2aO371qbrgI#DEv4J>Z
zU!k&v-f?6(H~ZWAGXJo#srfZ~s2*)@G|`H(BbdjvwE2}eu*z=SEj({$gvpA&g0JMv
zl|ho(@YI38)^)zm3zvw5yxE;p*A<BN&qO&xo;i0OOopNPv&t(FmWQXD^Esbjf|{mp
z2(3iqEx<Ym_8b<)B%>!$h1}qM5*5oq)fNSmC31H(8fPEo;d-HKw>Iu+2DXgB<Cxhn
z;F8;$4hGj9e4nC>2V-}za?`PYp;5asv01{F8J+AMKV-biw~H%bHZwVWWzq#H(<%+V
z49ib>z)R6-`Y@w<&=mHQsw93Gu8WBd1eJCy_7zd7C@yz5JjV-i{r0^tVuinPat?y|
zuuF09%FW)S|BEr`yN^K~7%|y$9jYC9vBY$-?G>KHXJL=gdH|w;20<U&(?LK4bp|On
z2$vouL5{{#SS(KyX%GVx#cGxmsV*7iX&=x$|90{1&$Ka^6M=jxZ3}uPUJXT_q}54R
z+xyV)*^lG!Zol(#MB|q>P2dR71lxJDWOF}D^2v@gSmSD)W)VIS%Vw68gy}{Zm{dlQ
z`y@#B`Jd*>|HN+NwGu#8=YQJx%j54h{QOUUg-7@KpT52PPaBW#Q$O*l+^_bJU%tlT
z>_`7UdvCrM$B`up-@kc^u;%{gER>W$-lo}n5#Tnr5$I#F<!gKDmsBN?P+M1431M6P
zKJy&&V&7R}&&aH-0?D$BX4+(B#&RO!#EBDUJuC_S<P}+P7Yx0|D6+B<3=hrj6jIri
zhoeT-jn}xR`JPjX&xBHrldJ<zbe!sIZb!#zv|vz~vLz@Req-E_X(LvGpsQq<a$uu?
zm6M&a+i!7Iu|mGDQ=t{hLduja)^;tW3au0h1|urfsx~E5jtsRTO)ExiS>%XV%FR%V
zbsaHPTmqO56;<DBe{<7{%8!e-xp~w-y-E+#i!rohy*bAHt~7;pVldgxff@pH9|@Ut
zc)+>R<?5>B^X*n|n8AIcresaMi9Mqci5|sBKta~cWRSN`O+vE(=Sf^1aLUBuEs73Q
z7X|q`R%ZFR`si+eMFBrun?maj4ZAVI?^e=IJ>^AXznP~VQ*mQ>P?h%``*YL28+b~i
zAE6O+POTL4l82LpCgalN>>(S$YmEJjGuV^?w#d%_JzB3Dj^Pmk@>3Lf=-v?jWf3g2
zY=#Ec1SKN1$3#Q0MM+Lev`xyIYBWtY<3`Bh3g%hI$0soVi20GFfl0)W1hoejXaZ2(
zZ=!0ZJ-fcBil`Q>_+U0-gt^3okLQ+yXTP{R;@HyAM1(-b#*8nV_}pG29+XZ_KgD43
zJ<zI$+l)9R;mt_jn|V4$Y_;cMMwKApTdO_1vn@TRA%wMTs=CD7^{{PXp<L6FooiF*
znYYlqSXhdJOnDTng>7hIp%p;=L8-!n52<8V)fpqSTC&+nwwMl0UCrVO($7kb@@aCZ
zRhP<04gU59<Fq9`Z7BMnG&7AINvVbG602jUU0>CT0wdC3-Vc;ew?XjgfI?S`qunF+
z9l98FyMtSjsooAKIZ1brbU39nITmBS750mkw-%F05(hQF%OQsrz|7y1%PWB$XoQlc
za`sJuuQ2|F;rJ?#58!|1T#3mWXUjE=qiY&kK)?+S*xdYTFzI$Ml#$GN0T6SlBN7?Q
z-jy){wPi&g3EhGa1_M4qCA=kD@RrP2maBfK4XRqO6SDCocJK)=Lf);&3I=OB(XdtA
zzK+#IA~EH8jO~^kQ$r`peT^ZA*Ta#zZ57!42Lj5C5fci+ZxzsC44kWz3aQ0j`TXm5
zfp+~bQlNP$x&Z**!)0kf89VAY`GuvGB4Igli}yzNF(Ad0F7ey?2*zi51nZ0rYik%`
zE3z%zop=p0=)P=DQol>~ThjKlC<?F^rxJl2+QeFb(}C`Qf$Uysct=-?V*rY3Nz{eC
z7<#xYk^Y1Ck2_dzF2p@HP>2%!@jGZHs2;aay)9IULmyxZ1(Z69iIEZC0~%5t0o@h@
zS}{yW;mIDrWs%5v+}h;I5nyPgo}=|snC;cHDg<zSVX9HA)7;jg_Ej#3M5we4p0qDq
zDA;1o1YyJ-Ha)9C-^ThrOEKbnZx6f<&ak{@s@9?Q%$AwKmModI5c9zpx$_<ie`R<j
zrY}W?TG_nL?r6@lERKAy&KB8ccsBgX?_s~luJ)10dELc<`wP?`%S~i*Kiz*6fju?$
zH`T(shgnGqc=CRhv3$fcY`5v_Qoby+@4(_J&OeA~JbgpXlNqXYvUl+9_U`{Z+CMz~
zet++WN9%--Qvc6bP%rt@bp8LAU#%AO|0`cUf7JhfviYCBd^7^Eh~BH?qusNe(-Qr^
z`C6X&snj=!vlP*8rJuKhLhwJzd-Q@b8zpe{kXDQ7=nb3)Z%pkWi<MRCBhGdN6WWK4
zenmYMAx}C2ROqFhrcBfXGcp+udZgn-6F{u4LOb;^HY*8qbsndreU+teSVGrMIHfWV
zfvg7`+TfC!hdJjcUu3|U00xYq4vZwbAiF|+)wHrVqjf0Y@8FIvUou1%J?)?ZYJ2HL
zqp7-EcVwt&?meN&RSA)jt{i%lzD?RB?hZ6FN-yZ*7ycAW+eTT$3d*%49lCyQqoE{)
zq7=O}!8yE`boInrR+KcEJoS|+rWU4|5C>VFn(mGThQyec-p30v#J|*osM6!GTt^Ua
z6P;P6Mf{@_YRw{S2#V^AJSl7=x}dEId}C3^k^s6~6qYCBG5(g$i!4i#HK`62966)Z
za+{X+sfXUz)(Bi9<gh$ApKb;`Zl{?`Y`4dgqzlmc14}xc(71H@;OK1l`^kA1b4YDu
zc?&~zZ`2!FOqT0PoJM;M^|MRTAY_U1;Oa@Nv{=%x>Il)&s03cw;hicQIWAT|cB|{w
za7OqE-A<+SOBcrZkzYsps};9_wKDS0c+jFn)p(57>upDRYFSlj2qc8_hFb`=eeAa_
z<Z7V-jUI5U2Nwgf1dwyNK63h)2&~5}JM4qwAnz1{Y*}sOfDCfMb}XNNHfAL_4w<XD
zasDJ*#^#_}?*93p+vYuQoezmC2JsE5Kt<p1#6o!w<E4VzgLv6^Sbyl_-S~fp4tk@X
zdLYKZ8NW5`56RvXhFlKs`x?KCxy$N?5Xl<|zh26v4Tb#40yj)l+Ay<ZMQF^&Qh8;~
zR$=b+RBcn-NwnJDuxr}tK^$2?!)fJ#p*EZ#N~SFx+{g$ur?suBXxZO$+HmCx%wQ{M
zGi+%v?Hx~6tr|kHXl;5^Ni620wt7AHU`=~^RASLk4Yq-T{xPitV#27cV&J*oG0BAd
zE+Z!{D62H2JY46qP1#ZdqZ`u)E=(V=iH0(RqDm=6la_U2)r}p4trFjjsu3p;L1QH&
z4Z_kfI@td7&yVh%PV29Q^0N<k3ffOYD|2$z#H!^#Jyrq#{@22)IR<=Btl}%ubgZh)
zd&q3o8V@EIGtjaWvK)|$_BN`rfYnikt0;Kt*!)Jo9;n#TE=x<zs@o<r&RW+5sM7LB
zuYheVGOrbnvcvIa?jfvjCOJxMDN<~5E<>iyWtsEwT>^2q+=}tGi5iU26}Q8ZY6|@;
zMJ;Miq28e-rRN{pz0t<8^6R5Ei@FCq^Zo-wWV}^ka+vrfd?5wz4{Me+tHRr8cm72B
z-{Z71d0SiiJ5B$)^7->GKKJy$t6#2u{iy%_Wc9yaJnDZf8gRU~d-l3S`>VbMb-z^_
zzS<Fxs}y!tz!&0ah)f^{nU2O0GXh&HVsTMs_2K6ycQ7yLCBoc5LYm5~pN+Gm3wPg+
zG!=S6WLG*$1AUEF=tU?lzt#xqgaB?>Co6M=bZrW|HBHX|V218HH>D#J{hxiD(h++(
z8NXRfw*7RIKG~j1Dh*Pale)3EYAqG39vR%0<Owb2x5XZE6^rWl4yxvKOD*HN99Z+x
z@*iUTvx|t+0A;yJml#QriJUmsC34kb-d-j{;2b;2IN|wzH6D>I8o%PaU5zG<0z1>5
zfU;q>UYDQW>&meIZ*itY+0v9%_<&3Eu(LmhF`%M-k4AbQ9shSR1NS$L|NH!_=PO_M
z{NL&qkNn>!8UN?e^v|((KHmGbEbb3~Dl&cx3)&IuWSsOn$*5zFPMn$qU80|348;u$
zE=bmNuw@-4VGAcMAExc>;tuW1bbiXLKb(wB-bIXSc#dyT21$L1w(bnqy@#e|6z8RO
zI3dl>aq@aiQ>2KhSOGw`&tdpkJc;2Yhb|~Rs!w3tozziU$}`%ZoDI{V)tBmFE4G@q
zTXo$EL!4Q4yCNhwtXI>LRwyB&<*g^n16F9j9WOrR4DB|uWgPnqer>ng@E79<{Z1La
zUfkirS#Ew6%Pl7z@UwlTOwF8tMpl%ImsDue9~kiEp@gKUdedq0N=rmhp|xVZyUoo=
zw^aP9v(^)QZV#KnuNj!O2C`v=f!e{6>c&Yb01(@}=Y#_USntRulwK;S=U8G2@7{NR
z*w!l=gFfU2e7Md1Ti6FSnrElavbO+d#J>-S#nwCR-5*RS<^ANr31zKzgM_kP%};_*
z-i!ED)5$-a5`83e=;87E;r31~I#hEby&pQH#gOhKJDi@8>TMO~N`LyTy8RFbX-~Z(
z$wQ|@m_RF=ug`9|Wu@L!cp*8@UL5T2wDu2A_m02a-r0*B2pZU|^CnW7RQiFIKhA#H
z+>}C<Z&ua{A7iu+DlK047>gDA|4BECF_Aoj->M*=h`nTLKpO%VFftz($-~H~e6x!P
zUvMBP!YUF2ZEmh8sc1a~Fg3z5qnNs|0^rFP6iLr?A>SI&lT^c6@xEs0TB)^Dqt`Bj
z!y9T%=<H&L+@yi}ozX0Yw2I{A#r{EA|8g{#422QOv|bK6lP-A-agp;;2Kc<~czV6H
z9mA9=lw^SDGL})a4pqUFG;NOIQ_^#oG)~C?SH|>TlMKd*4p(lMMBC-%TB~0>0>y(d
z#%NNv%ftv3vOeB3+f>co+}vf;0Y;tHe2vZ0+_}gmr8icL{GS6r0QLx8{oK|q!E5~a
zA7%II7?T1(ffWYC`-kps%8L$K(&ZTFJdpzen!rBku0Vh&%Wmt%kFDLK*FVCW2RQ|c
z-|aI{12h|ltQ$3LxksKk>s^xnFg-zE|3RC)%&Fhtr)M)19xoTvg{GY!@eiYHobn+#
z80A~IY41{;;;Ur*H$cKP8&L>?Nv=;Sfu4M^IG>C;^Th37bd!H3<=$S5<9puwO+y7;
z1PpvjaQcMxVwy7YqO)c}XLf*UQRN-Yu4#aiC&hd{!JNY&=Izfq-b2$-KuUAWPdn(L
zFdXkxaN72rZP@b+)rs+|GOcc(0>IlRCws@I5r*fEc)<Y)6pf{O#l}n>3R3s@d_J6M
zC#Ofp+yB^W9c=%2bao13-#cj?9-X#cZlCUar{gp_$|(^=iry{?3UFFNZ5zZzkmQDs
zPFF=Jbn~poGu#T5E@u8ICe53=D)Xb^<J#PFZHkwP3X)_xcer<ZE&fCEg~&`{v=y>~
z)R4i+elZ&KfFZgoGD0GHzH8+LTHTeYff_1PPkpdw0GJMkT6kn=Tb8%F#B^G=4D=fi
z;`F=ylh)qrSI2uNCzwrOpYdV4b@2W6!P#D`wSS85j`y~ATStcnKk7xC6@N|Fe=@|R
zpL9`Z6>(HrzmpnB=2+DUo&-kHL5}%DGHr7RP6G^8ZO-k1x)WRXV0cdN#qqpvzF>X=
z7BIh(w1DiduaX=!sS%0J+_R;Ov|89Rb(d1!FE2%20Z@3-X+5PRgMY;Z05x#1T}Z=f
zQ`!6q1Am~Bl|E5yuhxK8G&XtBCU8$i4jY3AW2BHl8z^&!s?<p2hig|l3KMDqi&<kT
z9fp}>FM!1KN!I;@XZha<ndvCIh%^ulD>l53_DcdLOl$(>A?yYE##C#gP;e_!liNDK
z8#@8P;q~&?zx^5}y7oH*sGmk{n)s+7K=Kn+Fmi_k#V&TEvt3=nkJ9bvvk=uUCboDl
z6Dq|`@U>CsAWH458^YiksTC%v`Qjf;OW0nm3<9AeEwq{sw|*id)pu9$J<N3dc`lpf
ziaGZ2o%a7#G7>H@KcTF0a1r_aH-1^S6AjAlSx`=Lym`fVG8biiF<zlUi8PNj1r^df
z+0X+@^SH}DnQ*M8+U5a-WB2K4s&LFKci9jNOkpiCSd}nqZr$wr7iJYe`@@Ax?@P8c
z+ejZkeo?c-d&)0r3K%{f;lk=}IZ1z%CcH0c!Y6j!pZ|!M>$-oB?n<uv^^Szc>;B_)
z|M9y2*SYRj++ZJXu7BXowKPQPxBIVO?#VrVOJ6By0p0>BGzb)w-j&&4)WVaK_1`JE
zPmtvXxt@^=iKTjBfW-hVk1<h#@zo@kp$Pa$M$)i6gy~Q4cmJ~4Y%Y9&<5*g>8JE$A
zuxU^hgMJtjkFR^6t~~SE57ntYTvf*p)3S=2Li$4!%|sts4Yq&NVNXf|o`^-1A*xFB
z{M)++Ge(^Qryfq*QWLWtti##y>jAqCV%bBr|Ec2ILooR>(XV@TH81_TXQvhP>pm@8
zai2a>>h(eJ>cKj^AN6`4IQCHOzh_LFC*2SGxITK}|GE7CPtxwicISEkv;pJVO^pRm
z?f?JP>epXB_x%4q|KjVf9{vA6#rRKue~kZB2m(+{dKL@_P%IXT3I)6=<piqKFpHmU
zb*#6aeHHYpJe_m~wweOwx|j5`;e-$YF5o7bxa$FuVA7f#noqcSl_t(~!J25#qr_G<
zbFf+29>yh5Nr5O!JRDVNf1EpQA$G_}v}s2EAn4O^Ft~*$_TTuAv*9ouX&?x<ctWy3
z5+xkLMb{mHROi?uS34(>ML2~I;>>0C2G)iLq9QBE(b<ClY#b|1gABf-Cc?;z*Ve4*
z#MO#8?GVaHdV_qd8XlupsI9x8ob9*C9SdMG>ItJmxLSB$JYCY=v#>-6a4*PjYA1_H
zAA`@P9p*e?p~5bL9zt$dO;9Cr8I(25w=ICcVR^BWlTjP1py(0f+|qLhH2R+AV{*#h
zS|A}Jr<P1x`CYGviBIsa_D@hXDZ^syUM>S2=*YUdlQ>+8@k8liTb<Oz_nlf{i9n_#
zApShuYv31?Zud_%_;(Alpkj;Bs+uTjtr^i;E7@iRy2^(6PNDQs+SuLo>PEV?$)Kn(
z69sQjC{Pk&%mV^5eivLxWvd0J(OFt8R8}tIj+Sl86xo`#f}42}Z)BLjI^`{d5cUtZ
zt_o^q6;OfA;?*s|x2*lCZt$b@!UW{C2+`VFf6#BSc9}@H`BoXG*lt+|-Sz7#r}<X6
zQAR9vi<1hm;37ocKE^b$q!#1nNM(+-bs~W<bDNv&E=vLCye{yNp@RoG#Qa~AWTb-3
zwbT4bPy@+J$4$$vuaieB&{Vz#sk3rwN?^)r%1jQbQ^_jH6U-!1w6f|+#*(8rtK(l9
zYH%-wl~B`J4u=B0eCag+We8w(5^Ua#uQENP9FU6Pf1U!f58O(kI5|B3mp$eOSg0l9
zEs*O-Jf8381mj;fCBiJJzv6H(PMP@_Q(nUinQW7q5$I~td6V?Vk~@;QB*~z0(!PRj
z6o}}~ONfNpW36VAxd>cXWEBkz;RvOBzOpxk#oq`@QV7E$Hh+|p+B)N7VjyiB2YBys
z4n3GQ05&DiCS#Mf((V0Ol@8l68*uY%dz&f_RG2NbA}9Ql(If5c9Zd6G-F-xuK$(`x
z%&zW_ga);`S02^{@$(S{?}LJHxq%Bw$H7D=$7c9EnGX}KY+u1vb#ls5EUg0?hNZXd
zI84?y%l8oRbWdH!s_+XhSGl~<u@T=h4}%|UuTS6V?`M<Gv&YTcg{FoltzuSZo4ct<
zIs=Y=FzlQiIrZeeVq%1Z4{l;9vQPr3#Etl;m<&58hf#@@3*r`iUax5)^z+64X6e8t
z|9J{Dvpc*>L?k9+@?T4$K}BQ+;sjn&{$Xjc3jc2eIWCDUW1$oXg$kzwMQ1C40+1#W
zs5>cl%Q4x^oo)ydMoaOW%o5Ggmq}C3M%0)}2IV@PF`AUEIL!JPP5Bg?zB53eiAEo~
z!Ym3N(Wd+ztv2l3U)3k&EAgFp^<T@Y3Vsxe9_=0poQD)sA&K#AmXC7`*O`bOo(5Wr
zBex5(;z@qR-XMB}3ZY)ny&r9L`U-suysebLP)}*+#hsi3tE#+Qsr!`r>s{{gvR*-*
z=7ua%JaEiiOtBtA`H(9NkrJay6i@_Y)4_l$Xx)BPAB#%V(s>!UY+3Qv0Jml4CS2!M
ze*jm*EXnZ!?(6-0ILK2)rM)KC-HD|_&ZNvXM!#4-YP(R+hzVlYpLDw+AZ6}7gK1Q9
zwgq*hJoi{xPr_5Snt#}2XbQqP!Z^#Ixk|^@<v8Tpf%f^WGjuZf_UO2^yZ7SkA9TC-
zZ1zLN+9rKrKs+EenU*ha$s4_lNN?Wrg}jg$h=d6t?ieq~94Ab41}Q6Q63V9yv<)qX
zZ}GU{GR-Ic1o@V&J`FK9Xh~sl_0GGg5(&Zg_F&Q<!@3?1Zl@R88PIAD;XD8=T`N6y
z^6^$7Gp<;a^pR~a;cR%Ye+%rSLWc$+Cmt{UF(!bRT@5<<!UJ~vtU7Cn`s8fev*E$O
zO0iIpuSDx6cW~OgO?ync372|<T3lNreN$EX6JPaV<y&x%DUUob>Cjga>dwho^6tY-
z!^6+O^s!fu`U4@cFNXY3m^?+-RmE`D(|j$~EC*Eb{#+`rwxisUdYs6XxfjD^f1`e&
zwtyahVeAdz44VL>ZZYIC1q}7JxhYxV73r_9NHJaqdc&s1YNK=Ip%d5VCV0_RpKeNq
zd#PR<3qGIpZlP4wso3xKWCYioF&;n8<;!B;uM$?m(ly6lyJp-(D+k5%alr`y;oRkx
z`_PuiuLhHD2UXi_Awa?X$!*)x&XWR1w@9KZ5_+8~PH`gyg4n`49vnYerzs*cD3i3C
z6~cz!Y>;^WO*-lhk`4>QddW?S%4H02*O*%wtZyx?8P&p$>7MRX>fBp5S-*qoOw&BR
z*{D(^{SyknX2L%L|Fd3*7;6DVdB+9)myc{YoI=6p{KgA!s{=oq+_u=|hMm+r+~=0~
ztsE-V8G(<Ay1E^Dox~O##SRO-)Xwp1GR#^pzr`y&kc}zne(k!lN?bYvCD`F)zpYXq
z7Ci_bAeSx=>}bfAnG0pyp=^q7F;@0~tw3^MhhAHC?Rws=qec>88_vaNODJMldS-<I
z)^k#3JFsDop6M-KXp|0%QlYC%oFv*H*P=BdZdXaa(?yXs9PmidVKXjoQc@C2)ZR{Q
z^4RXCNq;gF`G9F(KYfehk%6XH=s_gQj#6}4Li)8f>jId?<q1M0{LqVneJ+yF%dE#7
z{lWQ2D<UHyo3!`x4u!*jGO-{4Y<$o~>WL|6T+|Z5tk?Ww!~8(9+V_*DeNjAHqPY<E
zmil9s5at877?k=mugk+EI{(<#W!YL6l4ew`OlTpb%&BYyidpwXwf8<96sO$DMrj*)
zO38txtkUe{<Mqm^4Fg8g3U$@A;-fRI4{#_!7uEt;^FK8c^n1VQhBRqp^=VtoKJblq
zN{TOSXec}}#lcnXF?hQO<Z;RC)PKE=cQ|%8lEyxS(<)!Oxh1u523j-&Du23j{gr??
ziai??(&xnv7>g)O)QGMCajwYa(ort7F1KsBy_cP!-kmZVI*fqwwqSs#48ojx#w4ts
zm06&2MY}u5UAfKsj#8oWTLRu<3f&-F@|&M~7hbF9U^S;ZZ7~SHjnSZ+Fa;aTm4pHl
zr|6DD>A<XCu~If;q>69RGUL0Xdm%Qx11XAd4=hGxWd1wZonus5_qSCCW7P_-Lq$kP
z{+R=1NpWrqi@T*AKu=Q4*29pB&wS{0{x1<)XWS+&#!XaG6C%sm!cUZo*%V><33^Jv
z?OzPoB?2h&q}NB!2z2Imvo+sfw&t4+G%HYcc~76-m~*3;*cmnD;pEOrp<$tm$RY`3
zY$7NEcyyfrI+-Pxn8ApR@O4NI5fvt`7(ep%3jJlG4Cg*;W(Z=`*tl+BMradqBfIcQ
zY;IngrTooWR7`+<9p5x;fmNoGNW(Ks5q)2`>Ns7!(@{LkIES#mC*5&2?4tD$G{I(=
z?PLrSq%`qnAJKMsk@b_&9h~GWc+vE(gmi0OX>H9&m$9qXHPgN&1-ZQdNxMDiq4Nmp
zr3dUfk)Z(<?_Ei_eE@ButoJ!g9CWp!TZiVNb-jdUlYS%Il@J@^TC9nzAt;M?afLMj
zp*@gc;E&AC>aADU+o`5FP#m~)Qm~Heh~HW@32<C)S)SKe!{@9uVu$?&tA$$UMq1Z?
zw9pWCQUB`;Gw%vBr8u1wbJaob#=?9!{f^nHg^*YsW|%Dx^_ycnrIFo``gO+@ne6eh
zwqgKUXdkhm+n}l?A}+=;`Rp{k7HqY3j)HWz>G#mP5^RPQhaRJkn<(AxceXnnsYrA<
zcnJw{Kn#+MlysF#=Ew85z!ubY(_D|*DWOB0wX_BE4E$7L_il4PP|C_NP7DaVsx+Io
zs~;DuAG_6cYd8ak`fMXlR>k;BoYXaOVq{TcoaNmyYB8f5dVct}p+SgE7cnoFVI9jn
z`Ejb8Ey3$K>$ZqZRT}}zkaU)9VKx08dVk<;;am!i)ixQq+hopdaa_JFjz8*cQN0sF
zn?~LEwZIRq2I=UIf;7=LqlRL;2UvyENJ)QZ-%-czj?(ZoEk#vq2IaoSzKdH8pg%l1
zW#Kcx?~09+5l(F?6<%7BL8h1HO0I6?Z^#RV4YyJ1;99gR9pW1NzB^B)ODILqld@?(
zl}XJP+Cf^r&5#->Z>a$^jyc!hCjC1!3Y_zVO)fPhQ4p9r-z%5A$#ZU(v!Th4Td~zv
z`3|?$Ot#mZ=}g0@dbNRGTeISth2e0DxLToF<FF%X6n32!Cs}>ip^d|LW*Z>s8^_j4
z?mWA(I2MHt?}~0)fZ`Fxgv7rM3eyN27=Rtw`3+5aLqo0VrAbuH$!!{>=hGI#R%N<B
zEG%1FlSWXg$CwAlz>ltSoix!+Ybiu&_uK$ha$j=$>+_$lbvK1`%1z%CRcBz`P2oa+
zP*{+;Hbwb3KGIDwlAB`m$8Cyf3o-Ae(0e`RaZcM52gOY>nzkuQQ<A=a@wWDIAnzyk
z`WhN@+6Ok~sw>(43%JlX!gdn1-K=MG=niIqDvZ@t{NQ`LZhd#|B9XD<N5FHbbUIkW
zmsMj3E=yLuAXJmM$RsVrh*|haUO?c#5v`HuS7nK-ZM`A|Kxc3Lh%X?v<Y|*X*wgKb
zkd>sD*9|SCp)G(&`BTFuWx`Ab%=m&Hn$qSP|I;vU^K^`PjJYO_!YUW*!YGrIQd|tr
zfO1^?riHgWPFVu21SyciH}vPH>tBG5ONkaXWKFTi#*v&?6!5}NlI4wEfiSn5;jE37
zplaRuHsWtE3I3Nhe}S{X|C0G|W(cPi$YvKLp)ZyC)BOEZT-tAN0-&AYFHfJYTe*I)
z!O`Ys+ixLQFcRE1hrC&YJM31QQ{u@F1QUJOXa|xb1)5ebukDxbEWw?cmwx|Om>7v3
zlU}5BUvP{7Zlt3(bW0*dfQ2xJD#tcdH^}5A+Wu+f7eXHwqc)59R8sx==_)o5{`QM_
zs)qPWogP%#@&?Vo=|+cna%8&>^{m<wbK$$wdgg@ofMWnIh`^TMH-}tqMP_r{Yj=cz
zsNB;&z}5Sz*7h=(EY(a3BWrFri9cRO_n&TXu|q9n*uwSDt8P-ibIok*s}J5)Z1D0i
z)CM)XKSZvgF-TEt#pP7Wg6fjO4BE=geB4=E%WsomlnvwSp``=yr{VfNPJ@_dx~C$5
zqp>laHbUg|oyU8&>^N4fse!CML>#M6iwzC3pF!@kf@}M@`B9Ip9JOh1fS6&15eS=p
zToc*^tCA;GjcQ(0O`$YNkcbPV)dyo>0sTb^vlSuvMli)$6Vftb%^<%Tg=|oD?KLP1
zR5d7zET||}Wi0y)jPpGh<=V_a{Ui2)*oXr6j0c2Vb(}yT|M<%orDSLjfrmt~kTG=0
zwyqt%xIfPNb~;S+Zw_`rl5j@EB8=ald<PDkUe<5O0|#o$cgb7zP9Fe@)&0Y{(-7+%
ztcfLlX*?)HI}?(A>i9PoYg@(H7P!5K!re+1X=*2?LDPxpxX7IcxV%!{4vGt2$RG^3
zM#By4SuLeo%v#Ff78J8qb1pW_O08RyVbm<!yM=}O#pIYKh79`AO^Kyi5tL%KxT@1a
zY8p74IGW}CSH{$w3G706%?}w=(uv|ZOT^CdAwE>}&dMH2BY76GNNGjTjL|Hx{CdgC
z+$#j9y*kn^;eCYI<X<Ltm}THFb%M(-SSDPKjDe$4gE@gWNvDIKBU9$LcedHlhYgnL
zoPk{2pPc$ZFa0S9)xh`b!MGhY!gkml$fg#`^OyyrRtH^GLsU2REoi6utj<i|;-Jf^
z@4!@~a9gqKoa<_J=b*F3TV{PGPg?k3uH{bJVw17B(rHX35SD38RC~3YB<GwvvFP|O
z)$k?q!cVcxgV7H;S9DN~8cmb~Yi=VLTS1Oe`yn)(E3LYS1Hn*lV__ylIw*<y06Pv(
z=!J$acw3f%?fF7g_@WhqFyUJjg(<w|#fT!Z9bL7KOWFf0d-)XV=`%37^N7A^`p2{h
zt>H~Jb7q}lvWN%F|1#*lNhun{)YLCE^1rOEe7U;f<$rnp#n&s3`CmT4{4bBGUz}}v
za&++h-tmiV*s$TGFHV6_x|fn0qZiqn@_T%jc2NYXqGRSbZ#iyd9-fY<OzX9bhNsJU
z2Gfm(H*6Esl<88)!v|NOlVZ5xL9T)m=a{=0lcNJ!2SA2NA8ym=EQQgNF`f-}hsj(=
zgURKU^i);xjI)$NHZMI3#!2FiGKAc>QuXY<jflLDQQ2DA#Rk~}7VbNrZLXr&fDI`^
zU*$bufxvD_b7Tp;vO=aIM#_`af)aR>v|aSB;K**s8`!?1C#Tz|`#Y`elasyU(@5HO
zgxnEV;?wW;Pg*bcPQN?aZNU?Houb3(&e6fy%fr^o?f(luUT&Wr@9*rLG{TIUWxH;<
zjIT@YJqjC*zCN~5Gp;zI&v|#yzQHYXc+OLHgb`lZ%P%);*}bE%J0@zi?Umd#XFpF5
zvx!cNo8pUgEpdm=cPMJ0uzUr5c|3kH@_4jBze=rO**Zu_XFyvVjiOl~qk#vUcYs*a
zT>!5m0kDK$5~|p#1J@;7RotKGuIsz9dlzOI8vwNcvI_!!rs}VY^^LICxLD#xczn9)
zK<k;`a3n=JR5j8KX(AvSS-$Z{%vbH+u-Q#7#`qyJ?KI@d^s-K;o6^Ud*!rw1zb>l{
z+o(x9Fe>Xpv85=iE9-|D)^I0H#&HgDx2~jlfI&yno}NxP8T=Sd2pG?YqEVYbqkjLr
zj7DvK7#cMT0jb!MQ?Yt=i~JV}YVBxMrxt=Lt7&DLdFSAe?m!ok8V`Q!p~AcAQ69Wp
zYMM9ZoLW=TuyKa9ZGjF`GW`(w=t@TtYAiPyy2hO1Oh)b_0>^0Vq)g*#I<eQYs#ud@
z*ikWIViOGd$R;74I&xMWv07S!3!Rl7Mj%5}`wd67xGT%|``MTiz;~jHq?@N&(nWv&
zT~b|rm!m+9yi~&*5z>-vxh)GS<84<E6==m1M>%admHZZK=DUCun$Z4xi*GQf`L@6~
zhv)P>$#sr9j>4z;tV~%)ZwDf;=ZQ@IU}m{A&^he&En6HYuhKZtCW5Cf#5<=DD<8iv
zit|tuZzmA2x6&Njm)4XsaNuQKf7LO2y+-unG*#N6^QMU@{DeVlAl_04LsaveW5T}+
z5|65h=er2<wt{$D87A`li616AFcpy%qW?UQ2jQ=={)I52k5~}K0{6a0x4GTciyvFN
zN3VZ`H-Yq19Z5&CqB8`*fjHq^)PN<gTH(0h5G&(@x0qSLeyUbgES|@qu(9a0$=pF;
z%`NxMsT@goDq|5atx{9#i@S>RTGSSBNLU%8`+P9y`e)C2nE<X*#^&73|5Ev%>AzA-
z@B^UatI?zn=vgKWtd{>hUtRg)OHcmy<rmMte3buvius@a^D+N(K?q3yeRX`ad$x1>
z!~W@aua3_S_YeP3CK4<!K7)V|2Q<_vg@&BsIqNUO+P1aKXGuM%)J$*+PbeQ6iutra
zC1Du~ytp2W>iE&xzeupC;#64S8l>+(|55&wxCHQnsEvJm1)?{|tE_3?iQ^XNA0rqg
zT@}z_Jm_K)Hk6S3rrcMEC8!D5a}1skBM8td2gWMgqAd#q7=XOUSGdSctypfS!b70w
z+&KmohuuQ?lE@qkV`)9pQMG_2LiyWmDlV`%v8yU+HCl5ujZ}^jeBit{bvw3Pu5Cni
zcFZn<#Ss&9izOVc&-;$X#Ur;<9eNBifV5>+axMBhpIlr}zVGecdB&C{8*bE2r9Fiz
z{9UC#krS8ovNq?mLXcnFk(u=og0#jV>}@>q<F-L1W$)W2d7)0NqHWErBAl;9r&YmH
z+SD6Y+YY?Ojp)zBw2kCvB|qU207sSeRGbRK6~o<L{F;si!1s(%0MCUvVL2sHRPo!;
zFcPR7?)@KiEr1iygouL4tB7M6!W=>yP*loRxm`(PF^41YQ?Mj#76(@VnQ;+msMYxR
zYLb@#OmY$<=2ZlW@~>N9OPbAn0_A@F^hcT0?6pdauDUPw!Of%4TefE4Z}PWOYzosZ
z(nMTAQ|JJPE3-t&_cqnfhImSkGFu#Dzgm+{3qBDk;k|&%*X&EX-aT0;!X`$gA7$zS
z!Xosti;9i2z-ur;E_j4ji=9Ci&=$pC^fFZCStP%8wIr{xBwra(d_xJHy=Z-H5;;2t
z!8)+F{1t>JvU;FRF8AbhLysIfh54|6vpzhtUMm2f7002n_lRyf{u`+Uq=6=eryJp3
z>7>X`WPN=PBV6qN%HYy~?OVFUl54tdrn$bDG4xHm-bU6ymFU})aNGpsga#Zwj)~^G
zo*p9%dl*Ulhy~hJ(<_{QdT_0))iPKS({D%FcnsUVn-SH|@-@oY@pO|LvI4isd5!qJ
z@)}g(`pj@(4uH2F8-s0mvb_~vn^y3?;D8ST1Mbp*ReVR`!m`evaqwL;c-s@r0+RyW
z9j6yj*J(q>=5t2=w&<(HyN(75?M2tYwVQJ8z}PI?cmrB&xDek=p1aNZcs{;J+BYxm
zj@V!y<|w+3EpN_(!SXvvDS)%2>u*$+kf<KthIXlF*n<HREu(MLx3DcLuU<i!sp=h<
z)jOV2FD<KQSjs?kZJt+ykyE+d*B9=^)y~=-MGV0Ptx4AYSA~a?v=(wQs(Hq(I^1TM
zMHc3f&K}>EYfi?zga@|+tNFum)|>Po7{Vl?!%zlz!RM0D;5GUAz?x9>zL-bTq9It@
z)ldd>cxa8%e6~=29~ha`1}Q<N8Yu9gFn|pdNO_Mp_wF*?I(25|fw&rUJBkm5_8l|u
zuy?CHIzQrkxR7&c;6_i?Z9|#IiqMDmikgA`YT+o5PW<-q;HNfhEj;`w@RRvT6aUb@
zpa6nirej83WES752RrETA4lbIAr3q$N0ZV0^HI4X7EL)aKO!u80D?u~zFbPB3UPad
zc)ahkGaco$00V9S_+tedE!4y!@W+b1J3D3<`i%?SI9Sh6#j@*I6j1*`cDqc{H|q+N
z96GYK%5#Sa1QQE=5tbEar4qpd$0Zl0adXhvAy_{`rsz+2=*+x>^%$#muqxz{i^6M^
zXSzFhmZc~(H<gZWMcknhugC6*^pTxDe?lJnL!i@f7@dw8om^@^^HTUt)?_YOXG6Vz
zw0Kn{Q;%Ft4MyXExQOX|L${J?_;{()BNhdD;~MPhL-3+y+~=qgiyqWvu)?n3e?9*D
zRR7Im|D_W+)N}uwV*j=B)#?{t7yLh0zj(C&`eg0DzI?R*vIy_#@%G`#tD{mMQ2Tp*
z3$Kt(uotWc9f{B|J?(6_(cN#;j~>m+Grt%*BBC*|?K+}K0tRxI!S`ikpMn9g$^T-<
z7{V5RQvs^0K(FlDj#&|adn*K*DK`zg0Q*s|vdmuKM#5n;fBdqxCXJm-g4S@EI$?`s
z$x1^oF9wV;obcUVjIn*+i}e!Oh8T|Ti|bE7pe_j$Kx?KhOf@ffjcnZs8R`W#o2etO
zZjIi&=y)^r=GCn`HW}z|&e7Sm*HxBC*=1mv?zuqKOY;zmQoS4xu@KXi;2x7tQMTTN
z0}t7BgUMVefm2a^EQ)6CBGtrEMobHb`KsNBBx|!pR@xq2Iycz~CLFc0<`@#HO2$H&
z$6$Y*Z~`38IajPKgTqxu&{bu98zcD-EsX564xw;nfsU<ENuj7+0Rum|t6d+YUMm@0
z;>-uF{;Zs@j4kd6Ong0S43;IBJrom*;P?oxefx(8`-gjOd-6u_E!iT!I2y)sXtc3R
z7{)cH_ed6Q>Vd^h`QP4!B4Sebd7tdr&0;{ut{tpQ+aOP?$C2%oNtd-;PGG+swh~J^
zI|FhV4q=Z|M#bwRCJHL(&{*`M*xB15tVC9rTtB<f07G!<sfz}~0M%(o4_R-27eO}2
zwf^t|1)Fbuyl8(?_x~*EUrvB$p!|<l_q+eEe(^=Y|LgNFR#qSH|DWRh|L=3$|BF$i
zo{Qc8`R_a%{Qz-5j=#@;i$kb0&XeUIlAEL_HaaQ1PMdvx+q%WKe<fi;>-2bk`|uxU
z2iwOo?psKtP%KpxEXd#%l>sd5AQms&5Eh9TYcj!DMG=sHKKC-9s#a)CxOzrj^T|0(
zSOSY-#m0bnRXnjxf?|qMHK4WQ^+J;ew+tR~IKwg}Y6gdD(jRB&?!aNK_Q!ArkKrWC
zgMCQf>rRJ!@G(6=I=&io?C4g>n<VR!ld%=U!#Mu~Wc}`4)65$>><35Vi%c-AuU&%t
z`-5>k><Fb!*hLQD8S2izRZv*Nz||rk$FZ9!i^{CnC~<O=4Jmm%I(XP=Wzci~vw+;m
z4t&)J#A6o}5Zul`gDr_^EyZ?I2-HDBFny*0ER@*Il1qe|x<VElUto0fRhXU?r}%eC
ze%8;%P7U=!*C;T5<`z*srS5m<cH!}QKw>(RUhhsucvC^&kg|~%poiR7$#8hrG!X)1
zAO&WerPL^-cMBkbeKT|~7z@@NgUQgVvlj>ZJ8;!F-8=qvduJ~yHq|K3()$PlJxVXq
zQQB{(rNN;6_i(8{PA?uJ%(VZ0r<*2y1id`8gBO^1Hy<m}sG8y3z3=yT_FCVb9qufY
z-R(+(LCE&SA_s%wwkg!Ee?`Se|B)GMmbY>MXF~y~z*<`)NfE{9*M))DB6)97AXI95
zbF=I-s_gB!{<L97N=GZ`AYI;~f$aBRp$oH>Q2d>VRJ1*WOIFH$e;MXN%gOEV=eAg)
zvsj+{95qVD`!dGmKPO@UpGUEHqCBOaln3BA!lEk9lC`T8fYt_biY{^|?%taLczN!;
zXXpz=pmGebflGx%<szcc5mqiWaww;2LB`Ma<wt8pxlws_;hIEL7PVwPZWi9ktjN=S
zaKdr5r>4A`F5>cKTMKp1YuVs^59_LCtb=vhIX%-UUH6P;ZIn5|+yq*43WQ3<L53fw
zCkhganCO}UsVQL)W`qt%gJLdH_L(Ja3$uwQYYHc2%d?0k)!Cwu+5W8A3NH%XJw%r<
z6YN1+BD$oobFxrDWXbu&aVC!Xsf=KU@1(bUju;g&2Tv8E#)i|B6T%>rEm7mT0qHku
zcfsAGn?@1Qf7(z{WFhzOzpHK05P;TvNJ_WRjZFdw^SwB)d0g%emID~_L_U3LCrTtJ
ztv=nF*6d)yG-qB+8z#Mk$~$TAP#V=|E({dK2~ih_3V)ICZ*4lW-3U(`CXO;>geETB
z{tVMF+fd)t##@y5njS+q;!TzXNTU=!Le+I=-h(qW%hJ<IdDHq3E9XKq@9Iq(OWhT;
z(bYVuK#{na{A8*XEA2sSg;SerjU^PoAz5qdP!UxmJYzKwBD)Zgp(?VqDA*bT>JS&P
zA%Ckiwr7>L{;ch7mNsZ@H0gg*Oqx?v3{|irKtLS?^(iN5kiuOBbx_q^x#L`OyS9{E
z0bR9FwdO%4jYIX4MtfknbiCr9?8G4e{<d+96NHMvaIZmeThZ#gkQTBfGbiFoUW5uU
zr~z1a-xul15N=JtGOambG8TEv7?^@tsp6%gTA`=n+sbc?{H=zfuOeKqlf8#8WRBg6
zcR6xm<;Jo?AfhC<UKDf6<^F9!FGP(FB0F;AyE$^F6oliC`aehicQ($tHBEnO>Hof7
zeeUW1RzLspG5^bB{KvnB%I@rR|DZ%qCqD(PexWIB>2P=N#o0f++Pg3QAzO%EH-O9`
zH&;<Yyq2VJA*?OjKm2a*c>fgBlkETF@a5j&Y3u0K-tqS7(Q%|{@6oU!eqNX+N&I;s
z#0TpT?3(2}8p;d14F6wK-uUyvZ}30w*tQ#`a&~NCDE@ACsLZEX`sLoHO!CRjHk>^7
znCm$vnd&WyoEVQ9qmc#c3@0#-6E%>Afpi8$+iWb9^jT0RS^cOB82k^q*eNfIOx=Rc
zAqT_;WBm2UY(xGk86%fOY}%$+dAqu96{UOoankP$dR}QP`ds`TZp?0if_RlS@kN~`
zg0(fk{|QZ$cJh1a5P~j?E$LrJ-L{lqbH&n2wVaCysdu(q>;0E9ZRbO`E|Y`2-%hyo
z?`4dzhi?64G90EIcLcMj{Lh950}oU$wc>gmKh!PvLS#3GHyE^qkfFfWYnd~j=mabE
z{9w02Jv$-D*XM2qbLi|MLvW9ls6I9{J4`WO3AOF8J4Mtm?N!}oJ@BN8EHG2YM9^77
zRR=yJpV9Q~ghTd9y1W1X_F6CZ4_mK}_rBdbZhgD|dT%!>WTWPQbA{|haiJiG{41os
zZs;-5oRkhpL;L3(oVDz=*2{C};{9GT_xcRl*i&j9;g()wU49^Xrh%7E&nS`z;_0a_
ziA6R}9TOv~GJH|73r~Y-(5;WfNw+>ScniOU-R)0njs?J3jj5O{QsPWtbu1-Ttx88N
zyS#k_w0xaz$l1U?cQ*7PW6}Y)yr|ZMKUMRf7q@C2sL<-iKM&g7zkD9>LV2`%#J9OT
zP2tLzbbF}B%{staQSwT-P{LegeVNa>g|!hm72ZC;M)}~AsFnLInPi%D`=LCO_Al{`
z#x2;ZIsx^&>Sd9E8T7KKF6Thc>fL!SlnlY|L%s_CKLtz;TnJ@C+1KkqH~&h9*4eU_
z*$QVD{a99AoB8l`vSn}UeI6VlFgc}%M@6PbH%JRv*2#X|O3Fm85;yG8X`?^+5E`NR
zR(dij5Y-T+3F0f9=OdctvPIE406DnJ6d0zg-FiEeHdICmPdM;;I5Nc29$u;Z-@*xp
zBQi4oldA-P1-zD#W++1`3hDZnqLZFaE)M}x+c)z`u!Se<;vFKg%P5<>5qC3lFMbQE
z8#9@jOk8#+X)DJ<`G%~FKVrWLAZp&c6~LrPBlS@IZu}W7c2k&h(@l+xm^GXRv?=<q
zn~r^7f^C^enuj41mQY~d#H6f9a&|Jn21Jy_*D)!~8m@)Cjcu@s>G<2xpeMV{%MIxj
z{W32s>a}U!+C)#rX7ot2%CV`2`kQ4@Cf`MD#)T^B6=^@zl~1g4Tu~l<szT9Ad{Jbv
z2)(|J9dG<a@dge9sR><?MiQ#`O!t0m_5OxpFTK-Q#Z_uU3bH)7Wred6pr@R=jx9?=
z@B|g_)V=geFZ3Z}X3o}4Q0Y!%E9-`Blq=e|)v}?s=>xXzC}|yJpmEAM#)DKI`Ks<*
zdFZ5);KS+vWZs0UhtdCi{>9f{`tg5Ozg~UR|2@Y4`D^I^zTew9JvuJY0P2rH9pLoC
z*3C}MJJ&yL6VGRWY7~-AOO~E}4CEs*q**g<^4wcFs41yR*3QrkQ#W;zrfPm}Z*QCZ
zbPPyDQ`E&=B$v;w#=UM!VOajG4-5VG)h50r9P6Yo(_QqVbJCLK?JFSG&5$c0uFrWY
z9sAOb$p>|nc83bqVRv!~3)oBBS4lt1dzgQ;4GRzDWluxsh9_f$9sq-3*4_caG3aH#
zVo<+V)IcuI?*uKOO5ZyirEnCW8U_6U?|SEhF1l+3*7w!H*+2FVo6L=bC3Dl%3;LoJ
zRxW71U0wvjvXj#5n#;nsN<9`@wHy?<bM^V3aI)K9_5A@&-PK3S@_s*u>kUJW_ga0&
zT@>E^bp;rk)p$XLnRRbOP}Oee8>KShIx_a-lq%>cpwiG&m6yreP)Eh9nkpUYqj-69
zJO}i6e;4R4_Tuh1y+EtM-^BrO5&Rsvkm6K0v-7(M#K+=}PA0?QU^G_mr3w(cE>jQ6
zh&C))<HcR(u9vrSR7nvpbpuP8uPx}6Jo$dRbv)^hQFj6xvwaoIm(-2=K^hSEPAZ(l
z#x|2aYTMqJxGzlqh)L)_Le`{E5)<_?kghx)W~%_%LY0jmOf5I3R0vLco13e$MIOnt
zmelzDP_Im!F(o35$4*W3ZsGp4A$Uks$4Q%ufJ_OWX1BRzIeXZl(j0qKUl>(w4q!Lm
zD3-6XKCr9C;_2qSsxNN_yjqRE$V`fy!f>)m`OA^(x}>q7f^y~;@!ym#4SZuHh-z&M
z?#p2(29+YTcvW&qQ?Es7K!NNkyXNxtID0{Ld#iMv#Wtt6XZhb0?{oE<uoIJdO|0rj
zYoY3OW90S72R0CFE!1dLEVp1o)4-c0$e!#VD}+=FLRv3IT(Hf1hUZ(EFPPlQLxk#E
znK#VbiW6s1a{iJ#ly1CklZ%3Urwenb+No;78Jcb=xZL4rf_wppG!=-=2DXj(6_Ih$
z{kq#3V_|P@uAosrw6mq3Hh6=g=nV5g$Rbd9OFyY1+G1Y<E@5-JK|$6Ufwykub}S!L
zHgy=81SC^ntowo82(y!MWaB`THC(@S2ZTkTvsDJC_K%)a0&L2>8st;bH_WRn!!M|N
zymfz+f;qTbb6#SW1OA+dOadTpd~BeIK`yfXn?V;7lZ)=)G8$bCmUvL-8G3(;JWWRJ
zE4uvqU3CU2-TigT>DvrY{w_r>^NNxp=foNdPQA&!jWKm>p8gAi4rQ>HINvmhQ#i#s
z$C#lOU6@TPJ(7}~<0TwGY!C8r_YS?S3h9qI`*CtUc$3m3U^jpnm#OaJSq;3&8Z#ko
z3FiI)*XeyVgeo+4ymd>W*1GP&&acX7x5p{PLU5~xUb@k5Ii`WixC>NGj;h@S^stVZ
zf}A*zJ=>Occ=KNpWVdj@<(!Gt+w}~jN-kS)QU1;GEj&9sJK5W9eZPIYzy0E1FXDph
zp(UnIu)8ZsO-XiH3yRogqaGF{%M(pO5pg%A^x1|ujOGe)hk!txXa#OZB}AEJ{La!W
zuR1+1*q&P9nL6taFf(o1`QxU{fH1?{`Eb9VNi2uq<2l-#P@*)01rd-w-`1VN>|O5{
zhGI|{Rb9@K9~~b2*rHYc_UO1p4juC^dtjx5(>vn{5SS36OZzBOS#bwHBvBEb0?y($
z#4=ooOa^i?!ODB6aTujZw>xON7zTG>tT%l6JYF?t5XC$Yk%)$3Yg56%&gS-AAsRNs
z7IJAtpLw^~)71vKdplLv{lbjPZejV(D!jZr^xarPojD?m$ZVfbUn^C63(NRyRgeqd
zlsqXc?weF?FaC8xd*U~1&Z&(3l@6ssYbJfT)D6p{616FvLEBTjWB!{oaulg?A=^(w
zK}{V5su(^olq)D5=2;hx<V|rn>7A-%;OhI<+8RYfYYj(fJCm_bLWGU`H9g5@i~Q>`
z;b$I*x&#a%ux2Ry5X7mX*dojTauh*g6-R?N8R{rd(}AW!z{{Rz-E4fvS_`r%Yp1zp
z^H2q5QcJ#(vL)G@=uL8BCxI(vW5!j#B2q4^7C~E4OV>uL3b{22lNv6@DOA(nsb!0j
z=M{t}D$vWE31;3vIVx=OCv?7wPS#8NIWf)%GQ+$2#3G3dfKRJ)Cf51vtJzO)aW@2)
zuakDuF<v;jT>(7cD5H!`R}<jO)8q}xE|b1UdqY@Dr|IC_5@y5ZgY|!tGE|tb)3B<g
z6D>jXu{vj~cKOvKHj3Syb2?&4kJUKQMu%uqE+%j<nzXM%cOQB4BEX~)PBsp6+DY4R
z9OP(8MprId)h7$X3AhZ8=Yt7_>$JIVXy7ItjwvFjBw_{tv4bnrWN}6_hYMRu1Pq3B
zF-l2KI7(nqdxOzkDdH1-v==0R|A8f}SRxFnAwP%>xnRX(OCH<ynV?TYJgBXcmLnio
z2T#`6P(6#MPqV`DCebKs6pybE_DDRjFz`@aa3pI`I90IuT%RU3?`O%Rr6Cj*Mw=%A
z=+nN@8sQdcqG6|vDD8-Bb;UHPBWH!$wjrCnm+7#V5W6}0qBfXHsSc<3)s_M!kI!1(
zvkOWGrQvn3R5~0xI**;>z3tPz*6H5MS4YR&$3IF%wnncR3QG@U=~`7WOKQ83DaNMo
zJbHuIi?SKvSB3FY%rAZymrTY30f?0L<Te>v7HGK>)Dg*;iri?(*b%Ug-9e(b1u131
z?0_7V28Bp;ILa?53i~<jiTn!L>|259+R<78|02~M4oD8n-6dmb^<n}R8~RraO-!lZ
z8|7RleJ`DEHJ@x_i$!_9V>x8+{-LNG7E}fBjXSGOloCR9skz#!g3htxjO}W!;w=cj
zdqE?}EX5G;E_w*%Bh|{N8HXFda^vK^ISt|MLx}sx^5gH>#<6$^S)fq6t#$(cu_Lay
z=Z6(Y>NhJ|7HuhRM&1qmBS~)@sb%omEe71WAE7Lpuv#=zjm&OPjC;fI2n+c_!<v0Z
zSsOc@udQKrRp=ltgfnmh;^JaKF<($noHf@jjmY(G>&1_)-J{n(!kdzrE&86$I=zl7
zr%F@{lPhzly3-pBN3a>GvbMxSg|oot3t-3l9o~0?d<?v0Pgp?^Ha0}$ebbp}-68Z`
zaFFsUS<2?hItojitZ2Yznc!?W_VXlqf`58V=MtYa=v(Z5Q+ykJYltVBeBcX$7Y5b!
z7B(*&E@&WwRwMwcOPl}g@m=;EzWjJmjYx%9KIu6@!W}Wct6%)Nlj6l2<L!$HTqewQ
z#^Th?<wgaues#RJ15CrNB?<pF>vmlkhIf>xme&j7BwMr*&_|U5>&um6zW9~`t0WQ^
z7O0vjtT;GOs)vyg+X3kwx8>ev@iKd#am+M=_b3Lg1NH1PKLX5p$l2%i9%2Qe`9_fi
z>7f$)&H{!dysO-Bu(Y(@8Y(T9N-l7E1-nR$ZwETHKrU|65gOKxaFoQ)e%@RKAiH>(
zhwJh{bkp(Q@OqvO@pBK0wK2IzKIowuwu1p6yLZy)w%4pN4r!QBI*-bLp*4#s#EhMI
zwi2yr#u7A*|3MsAi7B|e7yXB17uwxPC!JDGo_BuCaW^M>#ib>nh&nmlKHc9zbGE(X
z)5wTa9cCDf$WAEcNPPMo+Q9AZy>6ZU_-e29^6cca^<uBJ-9mFXU?=cxW4)paZOOYB
zDR<M~mCb4ccxc6q?;n1@eXzgVdbxdiy#Jc+lLvUQgsItnxp%sEd=k*qNdc10O)8fh
zvh$iLK>bwfO4sDt_?8BZCUpZXZ7V5?A?oW^ExKAxt7Df^fizSL6+pO&J_^ctzvxs2
ze~lJqQ1wp>*KY8+rS${hNbN;1tdQ+?6iywfH8L^PwM5E~;_kO97JJE<g+=T=9?=UO
z5S=#+*|HvPAGCJ9+uQkPYv%~|7;J{Ky~r7#9eaJQsO}8H@|MNVnie(d?2lp5rLd}z
z&_$66p>8QWCyPZgx}=(2*Rkcp(`gFUw;iu%a=k$n(bU}7YSaALq!uq|SZ)!MmFVll
zs128kQM)0ho4F4~Y7`p+1?;%+o16H;xkDCBhAj#M!@!RA)Eg5T$A62U;0x?gwhVlJ
zWY36G-rvC)!OGF|Q3#uj>C&TP)x*e}ia8lKRh(qBOCC3^P`dBl?*fs&=)o7r5$=58
z1#<43rf=)UY3ERwc_|GapIt=R*h#yi*R!~cOdj6zVmeM4b1o{3s<48q4Ke$sXE6-*
zWEQ9_p!Ccb8)%&;caD(8+KcdE{C{g@{j44T_v^2|{HhrL_v=Uh-%rs0cXf4+_`k(g
zR>W@7&!7p+NYuX^P0qzecW~Rmm;dLomt@^$d(itU_zsun_$*EADeB*I?1b?__fa=X
zAzVK&EdrPDlg(FV@4JWBsKw(ejvWRS6`_!PdqXUW$aaM^&Wra9Y~uM^!`%3u)+X-~
z;!GkF&6#AOGF2O43mF!Zxy3+6Ly_u1&M4R^IB1)p7QC@JfD-=VY^Ap;B`l?ul(RDT
zA+1+JzSQW<oaSU?fz~YEP}nxVVnEcKY@Rzy(U}~Y`nhe{|KlubOD+whm83P3fk$Mn
z6FEvpBdO<KN|%dDOS19maQGK0Lc!nUY$1oin^pFOYEanOcOs+iC%FVi2bn4BJ)Y9_
zvnhMd$ES*HtYsT=`rfO^oO!>zmaVELD+VBxRKa+5*7ECws*O2-R^T3bsnPgyukP=r
z4|#z%4J;y>`%!ibMqAN#;u7y)-<DA<hHM$9*)dwO0~8%_axw{pv1g?BviV5aHcIu6
zpCKm?$uYT$CV%E@xS#H(svduqhAmFTN@q-2+bR?2y22Mzw_uggGK8X!2k!}s$XS7a
z=GqO5P`#LZuMA2JnZyUPj-k*=>Lb^(&`2$FppyI~J96a4{s;_IcElpntJ|k(x<aI?
ztiLbNCqE(D3QX9fpIzKB2}Z@j0Bu$9czTmyG_rB7nK`7(qpW>%C(hG$LV6V-qZF}8
z8e6ilex~lXl=+wfe!w@v6-otaq;k+CwvMb@n&hTkiUp+#<)Sm-B5yHVv$kQ>OydMv
z9(C^!27@|yRFo$<EzM|Z5p-3wn8?%|2w_33UAy3de~&bw713CoF0Fj1QV>5&DoERA
zWQOdif%K5~ND!I7NlTJL;_TZHM^H4FBJvHM8vOB%Jfs4d)%xubVesid8dFA2KC}8G
z38oBmJF~&A$nRGPQaJ0K11iUDa66Nc&eS^j$+HxZ)mct?&a4Vft}JPU8iKScV<yD_
z;k20zH!XFRznrwMPMAME1@<(OGA^}{O^X)mcbd>v<Snc>=YDp<(zpzB=AW&|K<jl@
zD+?_pP_3PXmNo)aV1C0}{b~8kN^v6`v2;EPs}G0y$m<-G(Vqns%0q|W-4v0#Smj}R
z)e_D`F6T@)Z4b4KwmVe(LlNa(3%(=4C8Q%(K!(%gRE35zqv?tv%%OC=e;|>Tpg(>3
zpftHWYDh52q6?BrUTKiC!4X{Slds8f669>_C1_NKK2qxzXosrn<*Ps8<!Lc(PMBI>
zw+Qd_JfYU!{6>KV@T*LgWo-pxk~(dfjGwaA^=V@arjRWRkK9U7vPF&|^lEE!#cI6t
z9QL*z(p*@ib<ZnB&7xOXedM8(d`wY}_c5IA79sG?c<W1@lRmg{9}o695B3KMH*G?V
zNXeutzr@QM%Py_=+6jZ?u~PbKOD?(T#FbtqT6$?qE*AnGpK#n_ritzyNfZHj(>_hy
z06(BqcAz|k0qlQG;Jyn*Q3u^S@1`9IFJIOX)?GnyNNm9xzCKx#vgtvGrPMOKVn^&o
z{kTWebjb~5-5J0^4Qq5FzX49|Hf_1)x<j~OdGg7SrVWE*U{rXx<#U{pD1Do-9e1D|
zBa>R6QowP-g~0$i=<(c=$)U3h=r{utonR!+i%Hi6>M%uUIb$vVwU&)4j|Z^GWbu5F
zkgX^1N6Dl+mg`bSC|4eb+!X}Jg~V84!LW8=a4cSssnh}VgGqIBUvgP=rk-*#SN-!u
zVu6W4*^qY54y5!A48)}Hjcu22g9Pv7&ez-8S4e!|y^EHtgZ{NNg*9PjrEda}nA?G)
z6=}1Ug*NSEJm@83*&yMCQON+oRq!Xd6pH2kp5DPb!uHaOMpI$cd|R~84or{SbD>W)
zwk7FyYHvu^^2~|;RhgwF%=g#;rSS$H4yC7=b-af})U(-tSXm)3wMoqw@YVJo&!2zw
z^{Qw8@%+oLK7X|T_$1@MKZbs{RGB8z{K?V5_j_eQ-~+|(W!4c$M2Uv%z%vsiRLKO9
z;Nv^7NOm^v1^yd;x8PjhRq?aouuz=dxy6@PRd@2HeuQzSTmueHuc$3IEK!$lKd7=T
zDH3JJNM<CxO-3EWntYs%C!D(puJ_A5Y;d+|s@=EpX^d%dY06?Xh7#R8lnzMg3I_Q!
zGYI1zrCh93>+mGHN*8!%Yjk`wN<QwO(4y$#$dB<!`@Hg9Rd$XA$V?Uf7R*#J8rO<g
zr|??JZ<zGMXs%j_>s2ssg(kJ8*>b2f_idGxpf}VW!z?z+ViR=IDhRCh`1=IY>-aJZ
z{G#&kS`ju7Bx|nFR923cOl1pJ&#o{)b{DXgHQ#meUu`7q2>dQ0U1)PNbERnKqcpj(
zOXEB&Z|ZKBoGH=mcvEAEt1iqQ?q_36@N0KyHHM)id5QsY=+(4lxqQ2G;W*nlV_L7*
z$1^#sBINHp17z6m=SlnK58v=a^d2{Sy$x^n`tmB2rXY0dVtM%?c6}?$oGo9mqutm#
zQ8p$KqTZVu&KAU-ziw@x=(EE9X-wY?ejomxK=4aRhzr6yXCKmr_FEU5o1$V6xFpla
znj|v@Adk|R&I+WalGe;XVv0AkO3DE<xL!^y!ps5G|KQAl@_c-x74?_{WlmPX94r>;
zgu#z>2Opu}XPYKD9^S_X!*SNje&xv0B%`HvNTksoPe^3L3mx1kbqGy8QseK_spQ62
zOUy)ac#l6TBRcOKFhsDqO4!S&OlTt;V#4v`muXJ}$qB(WJ4^6kw>h%}kK4R9OTcB9
zSD7QIWC;Gyoj&7kR3?z46i}`$4bIQ+MB<03?<F@WVC;xx!p=vMf+QCyhiOSh7=0dr
z<Ph~Thh^?`TaO-<QKZlOZC%{hGt-&yv0aPu)EkT<H9m3uV5Fx~gE=kqQh+EK^(&=>
zp7e8yz+h3PX;H9fiyLV!N~%#8yWzD=1WV>{URxsqx8Hw-=o(E6jAaSM9emJYa76=v
z88z>u%{eSos2nd2m!WBQW5?><hZX|7`>-Xl29RGOqQ1YgIIF=yrKP<Ui2?&Tlf1}V
ztYmitQ8Erdh2H_pMThc!C*2L~Cth&8b4|fQ`uG-y_ifSn<io=G7wwme%9(*)qVyG~
zN&ny(#HXkjIcy;inymax^Dw2tC|+wjHZ&X>cEVQa>8cmJXk!o?z$Ks;80Q^+E$B_F
z$UYDHol=?Lxj#hWDRpndWPJ6BNlFipSqS^KP3KYM`x>IsIoYIG&(f^G_<YmB;C^hA
zE;bwudV_Jw@z-ganUOeRXWM|Ru9Dnu>A296+M~Wb<vi>cmq8LI_H)8B#6_H|%}qu{
zKAyIF=b4NH6HF$DODbc**;IDvxr%6@zc=%A{DJ~6M=P;MkvY@m75WP1PCK!%`99eu
z6w*Q2__GN&q8gi*i3BYEcK`LuJ(0>tCxNsriZQarmI2=uHUIwiXG<5|K{D2nF1wgj
zqG7}L%q}1&VCk6&!061EYo=*^gW+RHJM)c#3cC30vg{*ddAM6<N!Ixl&H_CYd1I?T
z&Dc;;J0=Ie@K_oxt+9D>iD%!etU27dwLMe``-Z+o%)8AlKh-U*t}U-tf?b#<!|=CJ
zFcunN<}v*3%;xul96XXfn39FSW<Dsc1T&mIvpM4}03nmp;4-4WdE~}ps>|7Ffm2e-
zJ-hs~C#<)t^UASEH7jMntJkP>c*QxWkQ{`r7**1PFv8mrXn=`_K`zy}V0hE>TM&pk
z_lk!D%3LxQoTMvlpvhxfb)`ulCX<)|1x$Qv8;X0Mb<ud)riU&KnsRc$uCO+Q8RWC2
zcr{&KeU3D-uux_mpFU;4*OBkgu~IO+X*(TJaMmXn1W{0MR7-?S5mWgsCe^{8Klh&m
zS{Y2nvZ|6)V_9JWs#+7}hRC$T6&K?e$@^evpS?3sI50f1P{2wE1{WJ19k6mV7ZSv0
z{MtQY%t#F`AbcjE<GRBV<F{Tiy4lt-o!1}cTKaL`O6vt}{_uBGG2i8s@aSRgr_|I;
zS5%3_ALy1!y*=bb)w@gh_f;F7S?{4PGz7W1kjiPRypzrvzbRk<^r7wl4$kz$EU}-a
z+y8y_)fdk{_w4`h;nDu@Q;h%gT<reOf9KigfBst>V!G5kS^gopNqS<Vlfvt?+2^;d
zTYNhu4Ak>Ef=PWs7KK5R!q&lAi4`G#DH;wMxtFja+?pT|c3fK$J7s<H;Q`{db9Au%
za#y_!*7ignV$1P%I5=Am_5Kp0ig)b8oSlZG{7BX1PY1_jbj{wh!N#bRHxzz*R?+U6
z-EPRy2<Eda_|w4|y|ZIK$>>%21kU*Wpp$gP!I^X}Q938wN3bNoPIp}&0Y4Ozj&>pQ
zbIsVMj-><)FSpP`q#LL0tA6$`^fb5_4SL+P$!dIpy+HebJ2x0gnHq^Cind3c<!*YJ
zwC|eTbJ%=?H_!h4HcJL~=`&gHS)++vUM2kw=4J-&rH^McG~T_V{EtFA<KTs*3s;(C
z1iPe{j<dG(<G^qNK&P+_`zQ(RZxaJohLPwg2^ohUr}lDbDJ0`W-NCL0?g8#YGB#CH
zO|FbUeg*sWJVo6fYXgCS1!^FZ^NAM++b7GPn^e=e=u8mx^%A7Z!=w-7Xfo^o7Ur_`
zeN3v?hb;*U)&VNl#xd(TKA*rHY#A2d3W(dX&bBa?8x;E#fOccp(5k_ggHbwogCd|?
zogz&pQ%ejif-?c#u_FnkQ{&dq(IltnS_$&pJVqIyE5T_^0Nyd)3mu)Ph8PzJ$kwPy
zsRYl_Ma4=I5pz>8)gr$dOu8La9`DMuj)#=vR9pbBO8zgz_{-eNz#dO>o!Si}mdhpO
z(3xrm=s(+i+zNxy7-vg|GiUN^Hj516C8?Bj;%UlQV&uLvMn@DXAcu$CY%Vbzu`PWA
zc*+`J3O<Gj2^E!>G0v!s&ZM3CNutwRt%*g?qcP1prno~LtwbN{$vEaXTe!){d6gzZ
zoM`ETl)Oo@F0B;|g6XiFnUgdX;%7LIZ-k=EFR4y0$rS<UeG)zXgf2W(73l_+>t-An
zjhfb&N^|m7YEHbFl`F)s5T$~m3H_zTzexch$LUbcuW@4HtMTiOcpLSB)jp5<<<5z*
z(5e8jZI?PY+i;5KGE%!P9{Z=3KA7OTPF^kC90>!vU9nArJ=ZUHq%_6BDZu{TWX!un
zW->G+ln$2(65wHin~EINwwXGy8%+Y!c=v1iia$r&jdc;~t^`4Z-IV&0+O3?)U`6~f
zM_i@ia)pU=%kN-OdRzmok!5@ICpuScIWAuO<~A9jTNh!eO<7^)<n}+Oj1UQ&ghUDO
zIt8dtZkOkBv$19gv98Cb6Wz~lvpki)eVFa@P0H-mEw0Ns?z~Rg)T|Q{H5w3%N0&$A
zi5=Z1r+U0h$Hb^<NaRUijR`omRGjs@*-a{&%$ze&AHWIg#>SlX0$Dx6ZR`e4&WxC3
zxq8ldLv%Cgps@zPtzc+#-caO3u@M{<3=>Ec`pmfmW|+MhkT<HNARftB&}lYJ7(>dn
zf%}>cLDS)#oWSvjsoQyy19yvXnN1DcC^adbO#0|aNERm6C6R@IFyrhPmOne;>dmVs
z*0jn<l@@zQSYwB(bOJ{bkybfr6BkvSUB2bE@35(-MdDAv#xN;Kn$a~K$bljY7`*c3
z-s%2HQ|z+KuOzT9w|9+0FRnjB%G;zpfOBs+wuYJ~=$(2YPA9l5Nmu5(qS8B7X<aZ(
ztKBG(voYvy{7ArBFM1>cV+CIX+G#iIHYR{_%IZe?qUw>Du7v%U1h~fKfCK2bp#$6)
zI<tZ-wqNb53>7LIm3+0f)@x<?K5uN~^ZbxDUTG;}%5=yA2Rs|4&EvXP$iYkdM@bx>
z;yfy~wwPYQipggl>r|`^`V)@esZ;~%l=ZaBhdVEI0Krgne0kqP;(CbJ7`IX3Hg*zh
z%&oF}2cyc`@#>bGu<xaLL#T?a<hLr)i%`8+=CGGds9HilxFB0KimP3+o5F)<WDW%l
z>E_$x9p<x7F@Wu+v!;db%ADb4@^)I?P!TVxh`n#rD53`JF!b2j8;u5|=*g2t+-sd-
zCg(I-iS_^C+x)Hvgc*(_;6l^rBI)KSzV2knC7pBQD>$jIqSYpq?xbU!(Bo*;%U}t6
z8LuJiEgv-`uD^4mo@nrbVO-(a&V&0AfG?E(E|TCCavS~I%y_{D0<&6ETxT`>4r8cE
z`RcbucQKbC73FiaZt7^xvtIEWDJg)IbPdZorJl;wWzu<D%rPv&`2bHR<SQko?D5Hx
z#b74YWi+g`?j9HG@URXCB9oFgMKA$4LpBA=tfkuKDOZhoR{}K;LRa`!&6?8M>REL0
zM3||o8{XH6?OQ*(uNB%?B~^nuS(_PPxcCy^YOX+If2xu1Ra?oK%`87r?<u$w7As`A
zw>Kf)U+#K^ol+e$#JbEM#F{K4i;kPVgW)%{XeD<Lh603Zp^$YqH)39^E6I)<WfkAG
zOrB+V!Qx-2^gCadfE{4kdRyUnXVOjEcQXqeONFHy>LA@Rja#ALyexw?nu&U?1DYy_
zMAe@2v-pph)3f`|=~-Yh;-Uz93Xim9z^S_m!H&i3J?n+TP+n*55*BE#6-vvPlSqXL
z&<(YkwnXITi~lWNwMO(epR})-8o)1nxecG%|8)v0av;v&os(`HDT>8BmE;*`V&pMd
zi$vj^%H{LgYazU?k%zdo2F+(eVB9=x9NX*zFTg&_zzefBb>MA($DX82u9<ZibUQ34
zMd?8zkg-8S0ZQjUk-N`HOp~<nT1%Uh_@1Ui#B<<#@a?H;GAf4D;qD5FW?e=K4gTms
zGkB;8o0h)15aS*8JXto2*Ay@cmS`IuH^mEj=Gz~w?_`8p3eidNP#@%(;w82<*I<@T
z3Bx#`Tt3eFyagn7yhbMCH4T6gbdzxHMMKLtVC8xf$zKFo&lAlIdI(W8t?|=w%g{&I
zqrN+nP4O0!N{3US8#brnexVaQ`?d+;c{Y>HdavUYg+)0kDoEA>B>PQ<B7Po;^tPh2
z70@ToFDBi<$koDOKszpnCKur*papZ3=zmc2P{~jC^L#?S(YQ<nmDxx_oS-ZMkhvFg
ztSXK6xC=6v5>$2sJKFC96Q6bViMh$boKD7Nf{UEN`e0^l{m)(O9W(6x{>1>Ax)=mE
zxnqOmi968x=*g~w(;2I(GgGMfq?pthdkb+^#l_l2**D23)4V;(&eg_H^1c*rR_@Tq
zU{2Q3m#>$w+;Ch~(=lDzP=(Ts5xaTmc%KksX-Rp)8$v9;h}}4QP+u&*rnt}{068Ds
z>xfG@rbEF+a&U1!Kf0lXIyHQfxsYg<9|&_c%EwQt)9WH6;Q-Pr<<1SK-|e5YUhbWK
zceLArr=!DzA6wrZ9k+IlfD1Wnz1;r4@Pjh(?(CiD{>`<9&mq2|vGE3KfZ_JnVe?b8
zyg-8ZYP9si)S_L83mYTJ%mwN?Z$}wi9s1}asdSKNO`++yto<4-k?|VzLirxNFm4B?
z2Vx*6m%w9&(vym-v^yj{3EbAvA_TTWLMH<0TY<ykEDv;}F(++aFqgk3i(L4#AzoO%
z>et3P_%*o~T$2rcfniyqT1EI|ox*f~8}Qs=hWS7_AJD;5a+`Q<d=#u=XXV9;>x@3u
z?(u3{70)p#1!kt1^(o;mP0#D_OsMQoqe;3hYSAhLIXNewm36d^3yXF@aFF-M-i<}&
z)6%Ui+3J@6@7QG-D!7Hj=|rl1NOOOlT~azNR^aEfVQkwFZU_JY_USSOhHNmoyn>Ue
zhx#u^GX*?lmfv7e+)NkU!R<3jSAtJQ{BZ=Fg3QKpI~d)FWQ2-%=3BE|>P<v<aJhY+
z?;4n4E$~20lTMgx2~>d5WS4#Cm3PTv*>&R8bBHT*X>@2nxre8y*7*lGZca}V;WL08
z<`%Tz1fhYVCK=Rf({A~2V9Q7}qjBCHv~S1)?ToAjq#y!#NF#`dO)H$-jm<i=bpTt3
zz#W53`_>wj+mW|@U`zgHhljDe1Jt!M;2dKvmDf|lWM#H51e#M{m%EGLG%jr-UPQ+B
z-g9y(#Kn47;-sxf1+?m4dA*cS3G46ymDNJeNH6T|%1gCWNid2#iPX|wBwC{#b~kKV
zvEK>R{$}V$)<-JQ*M^S{H8RX*cyA|jZtM^dDY8%24u=ZsCHww;Zv4>hU1_==dmq_G
z;-m!|P#$L#E}@fjy`<3ez4mkymn!o1+xy*c+sc4SC!N%3Dgn0wJFO;W25>Hjn_&$8
z?IS&382cDOS07K6xlR=>E3tcH#rNLh$z9e+>jhb}>cVZxuPiZs<pH(~V7Ad9+CO%x
zl^v&52UGKbcTeX}vvVFfv-iN6m3bb|j_^?lt~bP%SP|b)hilfZ4CNj{Ex3%!pIaJk
zU)%19oas}OY`I%xSq+r+t99@5DD5V1iRKFLOD}m#cz_?D|7YPd;Nf}qhTHVN!4mVg
zVl}i!mFXxp#dAOm`)aFp{WI}Brpr7>BYS#-H)q5*UutMe@<{d-*pJRvQc34oZa&!3
zI$HuqAC=J$D~H!9;FXqxMl3afEz?kYohgW^#5k(!bt$-orQnm*BIEtQi#W6XOX<9H
zR}=KZEOte$d&(B0Kw)EDQU%$%uc}HTLA8#7Dp^$)^pVcSN0=gMJW5l$hqRmZlxhLL
z3D+Wx2mqU{{i8cLPr3(ZtueH7*_~*lD*dO>NSh6!4}ji05*Hk8`?{(k^HfkYGUE@R
zj1RQ?sNQx8v$+8$iL9sdS!amSc6O0TpOt&5rWWknRsH_22QmLWTW6x*=iC@TqYucS
zMCk}}r=lWGv3(9>1J)U-9ms5~up~H6$}6mz<pd^KI@5g!&BtMIj^<ezmZ;y0f{5+H
zJL-N9ooHNlqy=(RHI%TQrs($}g{iQ$ac7I+6(9}yIxYD2HSHp9!|cKs$xDXK%Yi;E
zXMJM@E0)DJib`49>QyZ$MV5tAGthYf?WL!WCMJfIn08(baCa-;jzSZOye${Ur(1be
z?pEg6-@4|vbkJx!Rl9;y4(_n7u}{!us6p1Tn5_C4#SN7HE5kvaXXLU&*s2WFDR&gl
zA2L1cRmEN{%CKAd)0CU4<DaJLQ+$hDSNASv#Kcm!u=&!WGZm40IG~W|>)L8DC5mW+
zY>ZB38^ywVqEPTRqK0BcU>l|}g;Qw=rQ|v~_C<z(qo1~Fv6<^kk9_d66%%X`=^B8X
z*0X1aN2hyhbS|V97pY9<L+(X`UJoZ0^%H2Uf^$x8J=8OCU7C<6bGxPtHL=EG9O)~<
zZlLG}a1PL-4h{wQ?0P3t3^fg522lF3!g<-Xg}|~WwtCEfE&(MNVQz8?$I!~hN&BXy
z2@AY=Bl(5Mv4o|Zef0QhJc^BZ*V9${k>$2q#WzMt;n>qIZJ{+<(`gH4d~L<C=zGSV
z3LZK58~ptnNx1sPSAzuFyy>Y?z@-inQk9t->a>~tAM`mdH)9d)0O_4e;<kw*fqePF
zusy|LHH3Y`=Hr;Gl_Pg{ggrf|v?!Ek)bi%RnN@E_!Z@zHBeh3R|5yAj*S@*n-5)Z(
zKV+Ex(X!9Wo<8;Y-2iky>3t&SIBG;$;}?u+U>E!;`(@o}CX<Mdfi7li=;`V&K(E8C
zb2Lq#oK&6-7iDLI&0NuC5;(BEE>SBxAV!ziE=h(I7ZzI;v^nO2yo6fji#-Z`C$SR#
z4D~65qE<<XKJyr<hFlRFK&%*x428Sv8awbzV;U+5Dv!g$SNhm<wsmA%YBtQMs2gKm
zx?o}AaVlkTR1yyvGZvd0LXz*8fih4CxKhdXG@6$nDaY~VN)dJgh=Tix9wn^!YF*N6
z25D1HkF$Yr{5{Orhx=%I=~Wwf9XXlJ)baO}m;pQ@yptLJ2zB&0B`%q{_BnbiN?0EC
z)W}PRh}%z1w65pu4K?DdVX^CKV1WDQnamJ}N#$()GrIUV?2b)55TJ|67=vL>WLT^0
zqHr3zmtgoHfuUb?3olxX;oejsO?3a#ua!6g`|60+Ppp@HSILu>c{?(Xx?s3epc#i%
zo{{i<AGby~P2OOhO(h`G7UO|BM`);;0j`j74IUX5PbqLY*DQl2w*{x<7M9E|xEVlH
zK8Z4D(J2Q1Ha5lf@)n$R-8FGWQP%|7xF+N+j%);b0=v+{WGp!5N94TD9`s{8FOj1#
zzMpWBu^L4!3SMwyw+ZsE&)$$<J+1~>Katbrpxs9Q`3v;qww$NgnKW@*kq)J3=ku=#
zqi53}^OhO1@t8ip8)LG=Zg<coA89hV=%u{@T<GLmHOpG`m1JGSYl3ft!yb0wgqMv|
zXpSza$2KvPDnsuyK)YLpiUdZXN5Rwp7LJs`i0L=_VeSSsyd&slz$!6w1^`s!yZz2v
z46#uVygK_4Wv<>0q+v;3(XZCBFcO8h%aZq(4h5T=>M|e?8EXFsxl|-TIpD&`@^^ir
z`1*oNn!`gW?>E~UO;Z=wu1%5`{^(yA0=HOgNJM_^Gz~*F4P4#1)8I*AsGu=MS@-Tf
z(nBqD;SeA6#v#v*8Ey?yv|zgdF1c*k`hp7FDA(HOo1M~nLVOmJo6$iQ)yd=BAl#-}
zIJ+29!7lTYgK)q_t{20W>J!*QPk9`BodDaZ*%_ZLHvFzXNUst5ge+yseMtU{e7U|p
zg)pI$;z9J&FGU0**CbM);eJ5tHM$zp#aNObWS-9z{Z*E<MA@7z3>7%8OvjBYvtaWv
znhABNlgU-#;CuNZ5rZ0>IXvFi%{9emi=|VQ{?hjuX7dGZb?Tc56luQqlD8aXW;3Lz
z0OGVR$(}qy$ufiuCTNI6M~uWoat12IjP6FAm@B4d$;C+j>9mJ*H|oU#C<_1`bCo!t
zu!sYY#w?L$%1J?Bp>>BiYK;Q)`Q+?y`^5pE$C21RI5^tbKHU>P93B62|L`B;<=)Gq
z;~#(S|J;8f)YYJ<Tig`nnyG?MlZJ`<Hup+&dzcK3b0k}OkJC%wOw=SMqfv5~`}fE^
zy&R=O;{N&3jr;7}=m<NhC_k1LeLL);zbO{w$MT}k<wgJ3Na04HSDg25J~=Nh^On*i
zW84+_Sl%556@|NHZ^>{R2WN+am-M=^F2B3=DM2zqiLZACWI;cUmRA`A41k5tmFpuT
zJQbJ6_WTAi)X&)#l%6zT&RjS(kgt0~HRws#um=oG>nR3x#XS&4a91A~i)Tx&5^(8R
zuOS-iLCJ>XE%5kfa<fppcxL=1Dl#OEb;AJdchH39;!X!LA=M=sPb+6e7nR^+3odgq
z0uS!ZqK75(SwdcAnIvG4q83-DmZQEUVsO-^vBZ$r^u}RR6*IsM-m5LeAF{Gd(|^UP
z$=8~MhtSTTap8+mO_K(gBqb<7s4_lqwF}e=P{1bOMt=~k;0a=}?eZgHjsw7o>|cH4
zit;yYaQnr?e*t$GRfk1FD;E0wfFysw0SNy_nEW>kM3@M;8l_FK&AXHtP%UEPr1ZG?
z)+ECmB@J?wlSrlml1Vd)^TBvL=s8lASdc3SRX%SB>pUY1w#x~-43P>W78~aKmf%>2
zT5bTB(vd!}uA$1h2$DizGsXjn;aqr_y@5rdr6f-^+$XST@@6XSHQ-FS#9+&=F^N35
zf%G#f9O~OFyyDnjQ2%UMvfIh|;7uy$65ByAomMIuNt5=KX~`VTKw8(WpRs_0`1+b&
z5%-CGNP7|rnJxP3!stGbUFck+8yZ7JQ=boH#4`EvcF_OZnCwZr$<PQ=`sv%TOHrD(
zWYDpqC{jrpDy!Tn>Kme;>@Mq!uMGQ)Cpdxr6!+T=j36Qd1lVQ`8r@Ty^x`3Q<1^Gq
z$qaS7X^7A49jB#o#;nDG{RX&TjZn5tizp<1;;wFxO7@Mg&u*;mH>UOON-RYTvme5s
zuNPx*VF#*H1e5#dT;zBp+4F&Y#X<|Sec~50^2U5T#@1;ZONJ(vhj%P_14DJ0i-{$R
z;;?mQSW-lX#K4V_NE>S~9CQblB*C*J%l44Y;nWF_$pxGa>mtlSm}E@OgEPWqxxuLe
z7So-lY%Hy|s<tjLNC{g^q9yFB>kWmVHhN!QYk{vqEJWo9YBCBmh-bIVJILIM+P_`C
zMNcWi)L2d~IBrBFO?1xFOO#<;Alj&4XmU48`k^h-cGKbbYMEp@%QBaoiukAeT^YG#
z4W^tg(CTf&BBfgvp4>z>&fqOT+kchT=GAH4L~5D1n$RIptm}r1F)mU2`Z|ooMUD&S
z$HwOyA30h#GorNQhzPm>&2aB6_QhrI^{ef}U3py*`-eh$(d~YF5;g3U1Z<lHwOyB0
zi||xkaKwUZdFs(8XFIA*NO!?Z9kC*?a5i(i#k&m_PVPE)-AdP+CmyP)6_9(kw@eS!
zD^C;v*F7T4U*^h<Si$1$eSBdSSdqVZm$AkwzIb`G`$3n{tmclg$v9iqtOELKp>y0)
zBRr?WyViKnq9Qn{yiNR_A>z;`(-Z!_=JVB$t<$5{8BFXSG@%p`oV>%;ga$E_Y{?EH
zCPtpgT9`g%zTha9Y^R>gIp*^8yFEcZxUa?ji8whuKHE7xJKjDx_)(nfbK(HeY&OOJ
z+dDpz*ebS9?TIBuYQUzP@ZU7)$vN8BHlk+U$b6JCmcTa=1nTLgRkoQMVebcB#WNyq
zH7{hYWw=TTv*z7!#u$TFI?BUs4`VsU;x<(RON<zhoTF?Vv$o2`MiJ70)XS(<LdJrZ
zT(zS^LTjifdWMAvvOI9B*VhgA>bY!w+5XM1@I2EDVzjb$eGL~^sU?y`{`y*$m&lA&
z6fzevSD|T`P*_K(OA`!8$0^)mWd+G?6b>BXsVw0u--Wbb+};Ct;lVghEO>KN<(jR)
zGXAaF<<*tiS{qjSoS+;5^L3jJV~2HL;zdphQt2K<1#E>l4rzIr9&H;Hb^o`PQqQ{M
zK;Z}*c1*E$v6vDeA)0wns4a_D6$N-f=(7MS^nYami){?+ET-0@*?UF}{eUdi+M{eZ
zwi>?3-cE)G+-`_fQ}j;4&8jRTEl|O@c%s-I93>%0Q*tSGW}ybjG-5}>p+6BjYDq_8
zTj&y7auhwx9ivmz!F@2`8(&hLz;Qw_Mxjb7TGY{#DvZ!|Bx*mjV<gmwtf^=DO%upi
zqNl7GdW9Nkex;>}n#{_GDU3)V!paKMccBcZR%lMLv1je|v_uSI&C*2o2dFB}F8g?l
zwm78MU=+^a8R{;O(z6iFu>gVAmKWPYz4`E<K>;4|DWadGgL<dsCsLd3M7SFsIFa$3
zfOvjEI~HDXR#nsy4vb{SRtUuwF@CW|YYbfkVlrTFl>?`m>y-X0wARxyWh_j~lyQ%R
z*L?@pyR<*YDB{4ZZY=B%XZph#uCu%FG5t7;)Ytce1?6ZULTFF}3#71cbd|#ner+ic
zje?OZm0nw#M<Q>S+>WD!kl7B2@y>iL^^Md(!XWxYk#QI|VvA1Zo8@I!uEupPpkUk7
zgI}iupmtH?ryH${ma4^0V#)#JmUf#OVU2V7_g_ff(M=OF*VExHoId*H&~WqAk_Xph
z$mR+aJug^%ne1aztd_Xr75w;cz2M0(G_Dif>xg<m&pgb+Dd8Y*83uwN{AIn=bI!_G
zY1##1uwm~9KTNU?TWn=XSc%0E3LsCQLMPpzVA_#bjLJYF2_k_m|2VZHSaEqW8Ia1L
zp*Tgwh{aQ&JA(r_QuDXcu;3YC#3vV=z5~n|tj~5+8*+`E6oea;mTyZ1!HLd->2XV!
zy4+RMXejV*L&&H^^SqOu@(B2C8ODU@bkOG$=^M*I9KTGKVuq8cy6&Ay&j1&ZO)D3t
z>*V=5%IKDGO&LvJM=W=_Zp@U~RCDI&$b_j9&X!d;Py2+kX}&zzM3C;&*w;<c%e`Xi
zJ)v^%E&Q*)(!Y7~|6zFYS#khY=l@$>`F!<jFaO`_^XJbW^Zz~O|NBHU_o=*noR{z5
zthEo{tgL<h`#{dVV#;13)-$K6!@W%Jk=2wwqhwTLrLhZ|I2Y1-xS2cxz5M3o3HUIX
z8$^x1)L)ZQiVKAEObj>duf&QJcoDvRNhTiEASDSt>><|6*m0a*Y?RK}mY~kcC(f>2
zGI_F%((D(2PR6I)*6zf@)exCd-VkPR%`c=<DaLmY0pw&5_{M6nE-9LNbt3b15gjmN
z{<;X)iT>sIDpuEr^S$mkSHEu3blAeswYqeF(rZzJc=5;;OCt?5rDdf2rJGa_2^mXT
zR+j1q8cb5a^SEF;0a#{cbWzwc*7`F5fW5AqgI@BsoA&)Vc$xM%Zb91PY|fQ)K(|@n
z5==R)yEDOHEQ)ljF6{E&O%=7dk5d+d$pWVUZa|U0hOYJA7t8ufSfKpjP;hGm0I9F-
zKA1nY-`wDvpsz<P+@6%g-Fc}r*k#CK1A`h>F$ElXAGP=PqO!Fp^kH`)bz^0%+hM;8
znlooaEIIaGO4W)+MWvl60V%LWPAPx-mW3#=HZ7Pt*l|(71p@UVA-IC^G}HzmU5Q)E
zGLmpUNR3LcIi%Ym!wH(tAWjr&D7^51Av1^aA(&>_4#JyWLkZHi<E{mcxS`eHIbIZ;
ztKK0H#!$(|ke-Wtx9(5IAA479SK*49{~+$pv?%@#XJeK6n=v%$wI<m13b*HtY$0Zy
zIEVI?&y4$0+Emn^l}e&u8Szv=e_h0DXJLi0?r|2nz`6)r(!efJ11OaL@Js7XOkrKi
zR<6M4RjyuPob@I^ToAUkr=2E_F-aZhXITsp2N9uk0?I(;-NQQWN*Mi~?854pDGn*n
zz<gL%f+jMp6l-cjT+E_i)vft)Hf1uTde&>KVkXL3bI(|F#9G%wdCp#pGE{_1HIktQ
zEj}SoMkl0@E_H-->YB^nybJMPQ+D@9$^Tj}$rrn}2ylx0@2juATJ`0BUw^UkDF1tu
z|9ztJzlCQQ1Ytoe3FYZ+tpEii8FVntT?cajs7z@;<#8u&$G`r<kr0|~s^J9v;x?C;
z^$Y3JDI(R*=grjy6u@F<{S1Z=4eun~3|6q8CE`U2JWV&fi^c17G<YjsCEbD8?!u&^
z?G{;y@1#hGV{8Ep*$$FhPKKqbibFCiNxJaqyotrOCzt3}vKqtsul(l%Mwwl*5u@D@
ztAGFd7wEpjz3vP~!@)=<vPLIXP8^a8j08P;Lm``}5zI}3c8`0<``ZU-`S|Mi===TM
zy<M?=0zYH%!~W@aV*AC((ZSj2-ocMB-QtJs<Kyka(;u<^-s@M#dnYGQ{^(estaX2H
z7kfPXQJkIZVT0SJ;>V-2V{!Dup*Y?@`KRg`O|deac@87`0Q8rmq(4qoa%oK4AVa0$
zz8VcKF*P+sDeT{28^Z|<lUyxEgEtxaH*-vRG$;eSqo`IGF?N(9Vx{C4gI4{}XqM(p
z4-AaOhghS$`a3eR-N3?MgIDw&Ltrc^sB43vLTT|g@(;phbg#H=bU&2msNJ1(?6@o3
zc3Wcrw}b<FwxDI>GJ}nhgtKw5mXL?P&LAh}<hjDQtcp$XIVQB_55``QB8I?wCkdwI
z_^IS(n(w;RPh<Ub&LKKfR9p61O5QfmhAkRDJ#Q@CHeCBFUqrm+1Ax7s;J_pV765K%
z&zEk+Q}G4-&+^YjhZFc8$`uC}+(H9NL|Amy`^o??e0JEKJ(8xf2jAN)AKMP*(%cpB
zAR)9H>BxT^cI9VI)cz)Ii|;WDmh$LT^jpTo!)BBZJN#t%t<}I!Ss&xn{USxTuyw~A
zLyovuqQ|yJi!3E+yz0ZGHlZA^7*K=a?Lk}6HZ|r}?5DTgJ1ag7H{^o#i<F&;9oe@r
z#RMv;eff<0y)!Rb+APHC8fRRQ1EkPA|37<g-WSJ_EQ-E=FQ1~U^RA{E8bl+>K0^b3
z2w|INw6V<bIkIm5itYjmdcnPD8IRs)e;He5WL8!$NU~=%$TM_zRc7QK8GBGrvE0r7
zTh?`~Z`$Z^rjxC1P7&j(=gk-3Du+XG_vE{X0`FX!V~wGO2w2{^t!8M3u$uOF5=({c
z!P(sWlG6M{f^D962jKtMy}1N~rN-K0a`c7E$;MEp36h=TTrhbF$dxmHM=q6;I>hcC
zs!pvH=V`Da+uO-{@{fPG9<wumEbq4LjHZpziEHQ1%@D#!j>foP5^O`)SJ;R^tu%<?
zPHEK3+(J#vO5KDfgKno~=9~g2n{^!kMW_xhM(0IKYDxOXaeosD`M{?H!g-V734=5p
zGk@3E_aa@dQ$gj^zFR^J-dX2E=_Q8T|7CCg<;lT|=f_};XC|GwMlp1O7THOh=cmT;
zA*#W&A>bs<;aS#c;k4g3WBCs@#Dd5uI1-LE8Z?Pf0E5j}O5*KSRU^dn;h<-5`ZLxw
z)m(+ce5_w2ojmJ~=>9FNTLIq>tBq#pv}(6n*pY)io9t)kXzBJo??Dx`g#)(?^UkE5
zcl;*hT<GlVc4Pjo{RN;hXZ`p)(KGgxWfy?mJ>xw?<>VtWzw(@?A()f7&&B!CpwE&$
zmK<>2^Kc11*l<=#w>vG>n}#j7^bRZ=TvhExF_M}Ee%8)(R*dlt8ZkA)30~0w@2#g2
zz1v9|>$Py5ld^>uBt$Umj@Pmu=kvh>>(dq@59@8=U&BP^mGDZ<9&LwA_&Na35R1ZW
z&`a|_xox-gUM(SKLou!RPnt_A*$2fpTt<F@bD_G3JP>mihGZl2VCTgJcWUTEr2Lss
z=qb7atbC(s)m8bvD({@f?Q76Ws}d3{L_3eDc$umqg$jb1$8XyZWDik=aG*K?xDB(X
zFGRSeilMp>E$oZ#1O$*HLt<hY-fWy<=E$sFbc-?4mlWpHl2XYgMJqhZIr>y50g7Xo
zeE+?IYjy3aM?<;T;bn<HvsINN|1pmt6UQH$bY<?b+G#^xqnQjKXWzR5aIEAp6ch*z
zwZ{|8N*|ICoSM=GNW`Fs!mq6Z_s9V&muxDrQ^QkS=OQi$maH?#M;wd#1`^^*_2O+9
zgVIIR1Zs|h=h?vO_^=fG<W{sJ;rTge!#~IX1F6I)1!<NN+;Rf+0I~fBvRtY#<aNw|
zgD3&P*jotc?~@)T8=)Uv{-q*dxc$6r?lCh!2u1w>J&J&%BRbS=$hdD92$7@-uLZdA
z=%Ra6tGj8c`Heja8o^Lzm_&hg%o1Phc2@@ngi$+$aDh|N&tb!5NFMVYFKlm<9V+k#
zbaP`_oyxfxUaw9i=Sz1ApV3mp5bcg($3rfHMBI4Kps$Z%Jxqg-Yo$IVD{fd(wY=%H
zTAvlQIcl*1s_nAk7Y#-U*5-UwGuA1O#S4@+x$%*iy?<4{07tDJa1=E!bl2gpI<JNe
zb|h2}M~k-3da)Pi(ieX*senZ;b>535>!3P(YpGi7;RD@6A{~WtPjNj`Vm|!v2&r;P
zR4ahSf<S)wbc9YfW_Z0RASbA+RBLX2ECkXs2}zxQq&oAT4sxPFx?VP7Ad`^GP*i__
z9N<DRTXXn<%W6#cU^}t&x<Z%b$KCKToNNtgi>M$=#j1BIm9V2=?MgEp3v=_0b;%5#
zQFObYoO>oL_EfqO)4Yg+^l-v#o(SeFtvO9Rx8#f99K+6x^io+;M23M77n=oamMRy*
z1R$l)%VI7}MPf+#+8PEHW06{)1tOX_C7QiGQ({K__2^(SOiWHVvpi*b!CG!ux~VF;
zmwKp4IZZy*QCgwtIX^ZvrX;8P{L2Z*=A(hWpslAPeVEQ<pLhv*%nM>}%hTi2CyY5%
zN$@(xly;0o)DTnbr1VbuRmL7(xWG>Vy&eJ0<4|C0`fd!ztPW@bdD%WrH)4prBufTj
zztKr4s^j=)(ElH!^~Pvk1ArO&|Hk9T-v;{sjmE>rcl!T3{r^|3|1WF*+XGO#pj<b@
zl`vd>%oKnW|D*1tWsL$l`MEidv(*2O{si^^^TFF3ZdXwMH`bRD8ai>(B+MaWu!$ka
zDWL@Li>HBzVO1(73Av+hCja3wtzr~B%(uD1_k?NCKzhAi=NE6*sB#JnMjg3{8f5S=
zpAhSv?JivCk^TTPs$<&L0^<PoFMtd)Ffioms79QOl}2Lhy*?0GyoE-qAQ2aU)O@~n
zu?E4GpjmTKG?EST-$VTGk^RqfvcKKfs4eMa8qg@$c$9pXG&b-pB&Xd3vvn));<Sro
zE#QU&@u<-;VO;2NHSZGDhz&f1gRuG8mSy6QQGh>Y;^5=;@*B~zj#&LKao=4q7&9AK
z9|9anHchu~%><#ozIqePCLrZ8HJ>C_pv5zL1vA^0NQu&yHI=Ftq0-pU0>Mxz6d%^+
z$y&Bq1~&bOt}cv?9lH7VbTX6#<ZXj}C?@mBMvwINDu#o~x}yPQ*;)4I&dPY%`~v~m
ztWjVBY8qNYr)LQCiuK(({ywUYRvPuSwYB<qd$cnANazUTILefa@MJ<LF09N?$93M}
zvM{53%){d)s3%w3v>YSz-L$}qV;X$3+K|bl)!_r{#K@-Av{=j6{C9{ZZSQL|?*}e*
zjLe1bB_NElqHmJ+Q=K0sLlBCBAwj*W*JH2O<LSMI2QWo;ddl6^y`68-PQVc89BsW#
zVOlzF)Qm?3NP_zUwPO(1vwy+HF#{CYqdMT`^1}!wf%*?9w|xl(N06F^H)3q2u>4Ph
zclldDxSTJ+Z68qLW9E;6jSa>L_1;l;@g*n#u>^$cf|1&e)4t65AJjfly+aTd1Jw<W
z)GOe?t0GAcY{5V<kMX1X{zwo&Fq)HKrPOVaVxn1^{hCFAL&J@|&5_&VSg5v*HfS?S
zcZ=1kSWJh48~h8n4n9lBE}VVdR5Oqrg8C$ECr5>OcIlkD4yctWh;JVPNJsmKloVw$
zw{=!A_7!dB_GJ!nIKV=jwK0f(%%*^}8_C9Mr|40vTIr{rRul`N1ZzUK2z-Z$#U?AK
z6{}0U>k$YLMi3I0WRJ0lrZiawnKSZqSCNRispac8YP@D@p;zW1O%nDNNQ3xNdCMfL
zgFry)<KHiE&-zzO(^?%Vq+kHO+>Khn0n3Fk9ghP8IQ3$F(>HO$B}=zM!=AsaClG40
zo*Wuhy^ajK`F{8}^W@b0{U|a4FwKuhCp;hEsB5#08|66<C%@l1?UR?Z!mK=~9Z2Bp
z_A;*u#J#)~$rdK~0HTf3P&+D-<VF|=a&$B3yX1i`uwg1U%)pCmcXI<sE6h1T=NVg(
zt?JOrAyH-eaF)`Lz}?rBJQ7TdVWde;lZs%fj!ihRt}|9@ADcQ1NU!KIs8s?b{aOf;
zsbr;0hVVz>Esc(4gdngLNnE+~WNLBlra%7^4CEc1{?j?8e<H&S=KR!FU=$I~kFEG}
z-uXeO3@!1-J$2Z+aVQ<w%BXHCO}_^}P-$XR9RuTU);I&mh9CvJoPU-2-N_GwQ~(m+
zo^P2EneZ4!*qm-J?*kOb+Y@AIxZf^t8#rN;UD^OI?xC5b4)|l?EZ}RDh^y_-Nr<&#
z6nINMz@u7H(#IL_5O5u^xK24+N=0k1f#KNUhBLP$rz~M&^GpihaA|+V+Q}&6hGyw5
zvSA0os(7}FI>M(wtcG(V&XY|}CJhgYp1BNgkD!IRCJ9_N$pqtp*vNQi-D?K|^1vGq
z&<7ii4PkApK26TR0giFw8LD?XO&vNf28p-%g1}tHDGShPk>KUX8i}BK4|%22It~%@
z5+XsfJKGIyehIF+q`wmXFBeskT(M^ef~U)WHy%EE6v}@ee)rv-|JR-T*O&jsfc==6
zi{eL9rJtW=lTNaAVIHhq*au&$0J*|T#*p&+N6%k8-92nkICvFQYL24wG7&SPOj#9l
z#sQ_kH!15EH<PY}RX(&AjJ0tjxi`GDp}2#r$|L~B6(qQxlZV&^=Gmy|68}kxZou$C
z^Gb5uagse>Dgf0+<_>Zq0$5v{F=h>6zmPPl-N8E$G}yqALOFrNMbQ{C2$1|In|dsc
zhp?pI@g9;-V<owA&VCH(0q7D=UOasPzx|+(nU3V8Og0gKU(_^M;CGlFv#1xB=xL^=
z2`WmWZjy$%p>k-(xU#!d(ss9Ek-}8C;^*#Ssf-!c@*@N%!Al*s#p4}s8%6AP0n2pH
z;{!~6>lbAaUs1kn&=>@K=>Of&*Fj_fd<Ic4ZpLB2iXmh*=mY^^A|>Lr%|i(LfM!m9
z<b%l#ya3k3A7K!^oqOwdkoy$2tQ&LiYe63RB>DdqoockvhJVYLU((bb4sOi$Hv8DK
zR*Rl<g~xRM2*6YMD902T<Ug6c&5CYz*0o9SfOZ_5PqX(HP%1AjofZ*wTIJwq(!OMw
z(paR+6Kr1VECimU?bB_sTZlQtHavt~tMaD%m)di^7KN<sxqQeJbJw{h^jg?ocm|&w
zNk4qe9g<S~vs{?EZ;H{Essaw2_dC)!={C33*wel);GTPh%8=9?ZL10M3#zlEj>dD(
z(ZJg+Rk6}Bnc^&HjynwjmSd`;a3&5=*%XriT1k6Z;$X;_Yc0Kzs_MiN3k+#l`hWL&
zL>qoNabd4bzg)*~KX_f^0Al58^upVqA(%LBQX^bMi1zEyTv@Jte^g$gbuZfaln(Mf
zE$^0c(6j)kjbOhp=ou^Bu9SBAbj*~|n|fs5UQ2>KY@jt~4O7*gGDZP5-{`E00ah_b
z*^1gnWSfBch>>3NIff|vH|Uq0{5i)ub4dfRkx?hp7ZvqS<H2f4aGp%x#wP=F`v(i@
z&|-6rJu48Vj7|fbMpMLwW(NeLvpHsPXj4Y!QW)rgIW#_VU<$yLfhmCVZB!>QtvG2e
zAwxb33$SFegj$g9@wgVD`YV&G%#~in=GD-F_PmdB$XTX`T9Pa{ay3d3Q76RjJ1Nz7
z^4RB9b17Gey(JzzSm>hjcS4A7A3V^%K}CJ+l(HHTVX@b5eDovv#G(xe(u4d_v+;FY
zH#G&Y8iMx##xRPHfyym32z}ViK-^DgrffY%i)by#1hg2EMVrv)_`w7B>-DHo3GGxt
zQS~M7-Qq2<GMl8R!2@h1G`<+XN^3+I0GXa6ny_+~SYb3u+)#43#3Ysy4=fl(JkHNm
zSiO5AD#;K00he<`Dfk?5myRf=VixxoN*{zgrTM7URn|f*mr`l{oD#g7(rfN+d5y*1
zc;$+YnPn-bQqM$+(k7ttRL^@2N{DH|RvKZ;=2T!|?|Od?PgLrh=n&q#DL4HI(4cE{
zJ8)0}iF~7$?tUWaq*hjhSaoa;j(laMsh^mRm?!>yvkYQbFa7~F#nL6tR>D`Qm)i-|
z9x4Y8xB14qf<0*W3EWYBuCH$d-D+o}G4JR5JLxp|wO9k3m_^f{k={BI^)=6j?X3p~
zR;~m^a=pYVv>idAo5j_v?ylJe2UR_0TZ?j4S@h&=pc?g;UjiQ`axRiBbslQT{rm3C
z&bD`wY9-C96oy-BOYLo&KImfwjW7eX@S=G3B&I)AVq^tUXa=6%$Umj|*U2^g>O95M
zt4qh_M)QakoUteb6)*&Sb_dBB5Jn_>=W8Sg&7=sGpr3Yenq~OMV^&wGRE-Ef0!TJM
zGuF|4CyOhAapOgLZ_)8;`o@DNTd7$MZyzI`;01IZ5=xQTvlqO*-;Z5}H=jZ&Z857)
znB$Vh5wv!esz`4nyQkmU3}SP0PU?K=cjiC9uaQzeqJNorDb5WUQDM9W><C!Oqt`P8
z=ow5&2Mt5BK37;q^&og_gXRnJ7UsioD!KELElgHS;mo&$*0?M#E)7uh+l*Fb3D(TY
zrJQ|++Ouu@?<w6MVNPj)OK6dkS<OQdXu3-zy7Qm=)E(;{ZkLt~qnSXqSLh`-2MSM)
zabn}7CHM$3x&Du7TGD|9$p<0LAk3W3J}5cR;%Nu{e1jE7>IZEq9~>ihJUsXX;vf>!
zMiLSF@Y#NgkF5~w+6J`(?P)G~7;lWy7E*U8`>@Y>hDg=t1;V_DRS6OO?NMbJHlq!a
zxYJlxC5>IhlEepL*p5}m3xUo=Hww?kTP&s{XRk%M-U^|2e6HV5?3jF}%#Njud`NuO
zY0|xnc#wsU2!rc_Vl3EsV4vnNecwFU`&`~WB=DwZ?Q=;e7fst&k%5wJv3yqqj0=f4
z*!d$k<2OGeXFLgLSQyiaR^}5$SZ)wr=K^(h@<noI0!se+(`ELH@gdN$UdqfQI_)%~
zRd!}DBy<O?M0MOX)63o3=UN1R4OVc#xDuS$evXB}n`=C{IfA<x>ebfzgREO*aM7D-
z0oTgt7JC9Z8n~SyI#TkfxCBuioeaL7Y_iZ0IdEYDULbHlm4yagfh-iuSYX=OWD3^6
z%fEnMguHm9#Im{Mylxt)Fytr4#Z1?00eooF@$A!+h2Gxdeu~Sz7)IU2|M%=aPsc@f
zG=B!r>Gq%NkJi^q@&6w`Zrs^_-r0YCt>&9m_M4|C2ZzViHk<N#5j#yK4A_I@F_z)0
z3`2dZpf$+y&k>a7lM~p0AOMQdb%x)897%0puNs@iXq}@JAisaFz!gU*<y+F+NOhYS
z9vK1PLDy|KC@2bCB)ey8W&|%=cg+gtDmN`&u)?ZJsI#=-JMW6Z%i4%$DU+?{W(^;5
zjU~C?uT=JMbGtjx5ZTPq5PZMm)U9mWVe_+7d?qX)L{X5JP;hmXewn-WV>zhl55@b0
zW5IDa2l7xwAAEGOSk)fHVgh?GW>4Lb&mxb&?u<=Wo%1w%`PQvPZ+3Ijg^05%sH7Zz
z1J;~G^6pX2{K*&I+Mk?FNrzM-Qk1siwfW0DKSdhy>{~JOo}7l+yI!L6l{PR&JD<LV
zc@tG`V(w;nAItmjF$UN*-RIYKJIiqGes*;*Ors)=e^=4nuoMCW$TZD1#$9?<Bf|7K
z$x+cW+91n8@^X21N%##@S18Vpu20u;#8w(8l?o5pwWFn7cdym(OtTo(RZCHni<Jwl
z=&JYi?0o)kQgWBvrNFw&|FtOnuW}X6!v)NU|MhtNyY(>s*LRP<yQBZ_=>I$aug{o3
z?7cp=Z7#l8^04(kJbKs*Lo$JKpz#>fhdsPYA9j~MY)bkt;j1l~vuD=s+4I(uXD^;A
zzPjRdgs+Y>);<IDlHweU#EpkM)D2Gr$*)F9Y6v~B@vR}JJ-8Wk2N&q;gW)8E9pb>W
zkc>98t%D7Luyg?1QGTIqMJcs~W#&&2k*o5U8<%W4iQAvRxyZo?)70|<4v9|E*2Nsc
zR*OqUr|LCjxs@(>K+QU>t?4+uoYD9D@4sh7tQZ$MGeYD_K>Q4ag6f=(*jM5P3Q3oS
z!KIJjXoCi+0f)L4g!WyP$i8M$#Ck{v-`l}5&dB-f11zhH_KSHJ41Wq%f*cpFoqqzu
zId_ELFPbxYDY?zR&z@*nF~x>01Wpzi;%f{sevP27*_O!`B(E`2>J(Uz^c2WVO)&O!
z3%=Kr2}aISRnI-09O|#@q&GC}V1N6A7G_JpO6DGhK7gvk-}ay|=M8yXHp^YMJT!;y
zQ+Bh;Bw*R<>iaP@d{%-gwgdf=M!j5C6stOrmFmXf=_;Wy7juj&!ZZLsY|pQpKyRQB
z@QyHo^o{2Yx;IF==SeruFkX_)J(20kr>kD|2gDxJ4qyQT8P#5i6R4JI2wHJGwxA~n
zoAY|P4p!R{Yz%E&qv3)<ud6Htqn*)dv5?KQ19u{XwMou$LmTHTK%HV)LJ@5w9{W<+
z24Y&}k&Z@XcFx#(6?k|ME=X9c9^CQ<)ybV9_LljDjaE%U4Uo@~oeg~3!a>syE$!j`
zP3Jhh25E=L1Nr26%K^nj+ynAcUa@bsrEn>$*vx=#GG8UR6G3!ZL!KHxX$hZbMli0h
z>%dc4O&furY!Jk3fw!Jb1%r17%qI++e(XrAl6lM)lAc<d8^2R>HeDWzlxskm4!aYj
zv@WcKYvq#s!6%q7u(x9Noti*GdsXwt+sahIjj~DrlmOY{Fn%vX4~q;J6MsW3-^fU2
zSImu;%9bJJ`P5B#^W2Ph0+E3iQF{hkg&`1zc)&IBS6B5g%;t!d;<VRClP(9?L;{g3
zA<+%@vJBH9PMq6ShXSm@Qp(G)z1?t1eRGLko9&x-kwVyN7>dDADN?j9mp@(iiON{Q
z_nnzDxkX)~5WZWvPj;HToN__KY@G4snzzzVbMhG9EUBmGR@QBA6uK^64hL-t%jJ2l
zK(Z^o5FtYi-WE$N9G>{Qh%O&QsUdQ@V&r67YfC;m!+kJ4j^bz41e*ruJp?IaipYT{
zob4x+hF6^tc3n9?hk$I`mVW_Fg}F9bk~NJjQd372xsvEfHg1i{bd^&PZ*F2b1UP71
zh126vQ#G4l6g$7N&0T;(oHWJ0-0_mQxJ$=O0XXb)28%g!>j-(rAuY@Kx4<OTUQ7nK
z&G8~L&O&%ngV6O@p4ozK%QCK_SNiT`Ga9RcfcLy-?I1{DF#%~mZVI%i;X>+xGLu=e
zhB7NFR9Sc4TZiBMwrmPSo+>UFTH?xX(<}GUORIz%pElpN$GymC$UL>(h4^!3gQsSL
z+oM8sGtg>i04RJi(=X`Xj^t8`LA2H5(zQ%(N6V6Ul(bLw7LU<X2)UkcG!I>3Nr%R;
zE!4?_58^3<G~%qdxX6bb=^5o3<97qhhdSCM05BzVG`Ad6gScNVzOB_&O&6Fd8&YLw
zds-bRvLwXt+_a7)GU4*3TeuQ-b7%rcle1+SC`_@-oc1&VXxpQJX3;-Oi%>I*&`1l?
z7iKuZoY?S#OPZ9qlD<1qJbw>zDyVYB0Xb(9C~D9W7&RWz0U-iukIMkZi7ywbOiXO#
zVk*r;1ub+HBZRIT6-2Ha>`H;;4O+80v_j!S7Po9AjE*3E9G^@ymZt$SJi&dFMU7A=
zH9h0nv@{r8_1G4+7_A;)w>-cVYk&(E0Pk{TUAju<r4#F^{?Ymfoy%S<C7_+AHlD{M
zvO-jPr#ZdT|8~yjGXkBV|9$lBx260~-)=m*)BoP-fA7?{PJL@rJ*m@o8!hg}pP<E+
z98jpnZT$7G-`%Nk@6@=rP~$%R;dqJ`m)<U}aO1cqTH)55TNNB}f?gmWhC9nFV@Wna
zxMjLiT=aocZ%M?1rD(DY4b)}JprEL?O0Kb#l>3xwts)y|Um;tlhZj*-CV{>((CyXd
zsC3`(dGHp6N)UJjdO^HN9e!aqTEqN2rLXwZzy|6Xzf<+pLcy3K5IutB4lPz}&=?;q
zgOqytfv6=PEMHwMKXA-4)7ED3Y(8u>5ANkGXX>P$sV@mF#h@KTBLca7*~^C4N>l{4
z=4i@`F)3WK;+l<NLStC7d0h#(E?bv2D3J@l3T0rnJ?aDKW>T8i)kdx6&1#~Poy&+R
zaSSIIeuk1dn2_Wn%Q_{%!4*dklXSBRu58X*Z5<pIK$vIy$h0*vIzpA@Mh5A;&IUjm
z_)(w-eH*+9m~cRPoxCmTDkmE`8%+8gOjJ+i9J_w<J*^t%J6`9sf;fdi;;|EQfwsy~
zR$2lMw#FV9@D=#CY^-B7`|YvGJ*7k<T7Y)(2&ubK$*(%=h7b@yR9(tdAp;?H#$ZZ}
z9EC&dxt7DER_C7ahgf}-b;k%GRnN_{;Rg$@29t?WLK>1G3X;H}rWa9$yUZq`<xMGW
zihWJi<jD~vNpvm;x*^W00pb!$rx7ac(PZQu<(l&J@fL>7wmrqXN2G7OAyz4qwiEzi
zhS~W#*EOIC6t>Z|NxKiiNVv4$Bsk@jjP-|Ye2$|wKBKBmBxTE=!0bmyWfQ|6Rp~vr
zeR%200b#)$(<kKT+TKpCt}xeKfmFxaVpIVA#ev;nYbs+5Im}m0y|&p9)k3P>E=Z<5
zlc&I>=)yB83M`7)o+vaT`sXzvGPC4a5cPcXA$9eKPAq;(8F$x#!O_xZu^^KD8-{zH
z%dkjF(@wfwk32xM@xsZ#4OD8fi>V-^7ESh=yb@QP(4s|o^RS1H6I8nrTEM7U-{F34
zq|)DwWwWL|s}hLOd0QUg>WeFx5~PhF#2?ad7{)@WIZHM;=4kJV{wL`S@I)@f!o8#^
zxYqP)$@g~6AxGagng%g6;JHEgc`dPCH@_><_BBb|r8=6cEJ?Ode&W+KS}=Z%<j}UA
zyK5zg6)`=wpz+KZXZ*YvxV*aW3BxH;i$Rm>0Am`No0y15h7IdLTJzLXs4=Z8=Z!4+
zoI+fVf$VzT=rV@7?9JHDmmPXRMsY|vUZY@}mK#?Ug~aFKqC!?Zy^Y0XGVo2S+T3A5
zczV+ab%~<HXt?}N@Tk{tDwu}SuV3cvYsx>Cqa}bu+qA(c0Z%G*)+o7x46(RA1aBK%
zJz-CwT5JGJ=_T9%=L2!wG6k$EC>Yv$4YgmEOzu+1{R*c!0wCMVpN&uj#^)WK*T%_*
ze5?YfMymH3F#x?rxR*~yUMiFUF^b8_hB;+0i6TR~$!<9I8%`DRDXZ8^b2#C1mD)7?
zLsD}n&1W54yU(&~XqZirXW2FyoRN?Ee;G^d{Czt|`Ggt&<)GV99lKIG#TqhFd@sd2
zrDtOqgn(F|!@*`m(>O)&FmHFWqG$bEMHC<bk>yO10@|$3+hQ=$`GhG!^}C$1>`H<f
zs$9y=>{H5;(b-Osc`H>l=a8gS9`-A6X%y9~PIYy*TrWJc*y+Li=n0aznr71DlcFOg
zC8?;rX|vH@kU&5H!>q?cy~cC`EMoHZa4{Z4^|dY-NNICpaHD|=A>sox^L;}_3`W7d
zyUHg;RUz!DAwsn#CHfl|Sr<&*RH0R6ZWSC?SGgPw2Jb$<d$?(~ZI9^pmN<Z}tl)z>
z#&{a&xGm1M#60(GO`w8~s3K$pP7;iWJ7}h(oA)oqm#G*A==dtMNGg_UfXWl+B<yUl
ziu2e`vFb*^jGGCaHbWO%41Q8{K8P#1O>sc?t2y<>SWHkH`dq<9qT8b>{<E%kH;tfw
zYU$#ZD{Q!OU>AFG1IGf{dwqbG!u7ssR05>&&99c7sh3^))AfHhA0Rk2<~qLoME&3T
zca2A3{FjG~JO9r+{ohxs|NBJspE`VZ3cx!B;J=9i@a2EEPF}PiPkmek=)J#Xgrt83
zPtIGoTh4T~464Ot-N|QiC;h4&G97N7%Of);(Y&4GWtAK>sYk`Om9C&=X{D0#RCrwm
z;>;=sz0c^Q=j4XssaH>mW;8aE_1KvBl2ZnOLbF0dY1Ttd>a$A(MHddeM8$33Pj#n5
z?wsu3F9zgMg4<7O{XU1&7fLAM{WoRypP6?F8)yA86*oJ}FE2?doGA?JX<eH9*rl`G
z%1Zt*;2xuwkOPfYc$|RCNG*v*SDop6GQrdA;E?ciQcvLasc<||@T<H}D%#g#eQF+e
zYpi+H`hRNkOXXmzzTiTIlC|?)RE4UIyTBxY1Y;tRyCy1f7dgV9pt9dBt|fCc+=q2_
zHzN5DHxVjU!G^|=2JTY!+kul|+tL0ux<|9zZk#-2;)kat|DR^{3_8xf>NrhtXiH{!
zwfSgH+;gy>P3MkdopItqiBGo^_b9PD6YH<f{`(^D^;lQ+>G8id9)9;Y68~%CPX6<C
z%76Y{Lv~e!{QYtN*|XNq`$y0B532+J`i17VO$GKg`PJg_1P#;3MoSX(t#XDbiM>z9
zIvL!Ii=MgJJ2vyVoc?m!aKo6bCcB2GAeq2k(ldt&lI+;TrdOEu1<Qf+8EUgSZCg$}
zk?rakHuA=-EHJd>fsA>X1q3&6nZ3=OUN`Hq#VqJ`cc<qY+*zXeefr}0N$cSG;lcBL
zit;+bF?J`2;Ua0zITA=c$nLybcb;|OyC?45S=~O}b5Gl35HcBoo;g_A%}rT5=ATLi
zy%i4HsK<SY@0!+P6G;e?G`u)J$}h|w2L;PE@3!5ey`Nf7_kY+uJv?bWKYjLO|ETri
zht|>lj|aym`$xybm=e%qqRpqchjZy?rqqAdXXIls1Yh2T;hVGc4C@^|H}y7V)*JF5
zNKPZkbB}&bw#(z(*=~HBe4ji@HUV&D<tNbNN9%w6_5qAyaGuhrYIVQmz^KrC?yl|u
z;hMBBC;e+u$^)pE&z9Ac#Rg&eaAUUbz>FUT|FNk;&g7UzKJ7d>`nk4z^mDV+y3z;l
zCaLu0NzEX~Vw{~Bqz>no#WUsc^3Ila`VWqtn|7X0ZRd#@s0aOq!YIH3nst+s1|bgm
zZ5My0SdYXYFnc*OhrflVH}9mo^`3|dD3z8}kepg6xL5vfH>0)f?R84&XI`|UFN*c;
z?OE;anRaVRNO5UUR5zDAL<tvjYt4ub?gfxBW&IC=F%c_nl2AZWZ)^j%v{kI59#_Rt
zwk58h#MM?G3S9XCa@a^8`0Ksb#md#P8Ri?|FdT$yfN;Il0SMQQ6KjJL|C86(Snuo&
z;C}<;+Wg2Y5C;}==eXF%A89IXRFDi8SsQ1<Zg<=k&m{Y?8u{5ajJi16>cQWY?I+Jd
zi!F1ROw96vbb$QWi@pS;c=D{C^j3?rc;Ygu7#ze1VA$JWFqF!J2glE9%g4`}ag-7T
z8|-(&Ruye+cx|Z-sqjE^?zYHAm$#K?M01#r|1xr`^KKBHQFj1iTMfDjVYIi+iT5^K
zaDBDm*gjcFuU6J;u!ouz#Trtq;T5agsMP+oY5(VPf-RjQs5{6yc-+Xp!Vql+$-D<7
zPr`W^tu0SeoSdz%*ORjb{Mmp%58=;x1D?U34fyl0(Tqr1e}}qovyJj`V~fC<o!DM~
zz3y_amZ~)3RT_Sk;ygY2d3*a2q<=O2idNZ}QsrU1%EKugTW>_FG^URwUS;Z7;#H=O
zC0->MOSrC?S;x#QUS@9a?4cpG%<s1OJvN`?MkMW0K&Qb3?yeX_lwQ+z&YD$48>(ny
zdeIG4bYptahpOnqps2H35+V`0yN9Lu^tGGiLJd`@5fqB7aE6-*4waqw-St}RC}fRt
zNo{~-*E1^j%QnijH@tSThvgCvYcb)O7hFm=!pSAGA)4ph<J_JTNGlIkIt3f?bV!*7
z?AxT@226U-aC*bOAaT`W>{PZM72yeU_Vcsdr%%(i*@6a*n2p!FF<aXFH8szg34OUS
zJWZO_;aw9i@0+aCF?I04M+@q|UL5_L9@LWL*SOZFU0$ud^_619)KS+7U655E%FDRd
zSPTRY7!DsIQaEjfxQhOp4x%9-;LtV}0}<W1I1t$P{!nHQq`?=&Ck#aHlZ6KoJ6LlJ
z#2&cc^*}cGlB*iX$LSy%0%FQQsz5}~czhrqXAk5dU(}y45Cn0Xfy7YJ90O7RZcqgg
zJDqS%`6TQN5RDiJWoW9YBccB+XYv;iko<)tR@ub`*ocD|#iJvUoD!Z}>vnvG#iO7>
z#k1{twEUlOw9lf1<7chC7tfyUKR-$Dc~Y!H7GpK;MeiI)qn@AV<Mw6Bhx@TN|FbJK
zW>j)ih^n-4n@SI(l~gT6?*841j~eAi8|6n2%a0zFA3ZKV`nLS&yXdt#ow>su<Zm0W
zTaRv+aHn`%XCv4;@r_fOEOSXtP+$sg7U(nZ<7KXCT8}q$k1dZLQDv40o-l3Z-a2?B
z8kTz(zAO{lu(I2b9}V+kLw;<S9}nfn!#DoU-fIMq8X8hVLuzP9jR2A+@;x$Y@vbl0
z!RdKB6&eE`(*Qd9xv3sC{6`-}OlXL??qF~OR&du#Rd1Z#(I~lEitHQhe4V}nArG25
zabVY8EGun|Z7|Zbzlz**ZD)Nh&bhz(<yqp!;v8`dlTPD(W5uWNy%Dx=8qXUmK84?1
zh(jE&KUfBuVEMsU%pua2ykq~qJo_g+Ta=1hgkg(AScKV%Ls*1Sn%Re$d{RXp7T|Fd
zgxj$=nkc^=dr|>m0q&%#5A$;t6|BW9#v*z{WU~qXKZow$xL2l5|G7!zKa~Xbbe%S<
zDPsT8=cj9vFG9=)Wana}=5%tg%prVKsXJ#7e#9?>zJ3Vy27^9E?`2~WEN9iJO4Tz2
zMQLZ1h&3Szhn<iXoT}T?)2~$X^l#T1GHfel`%WYIo?e;jY!iJZdBAS2#5mwtcC$tC
zY{g8SBk<Q7J9Pzr2TGD*X&eZVCi8*XgR&8LI6IJ*fPdQ}=^kkv4fRr&O<E&$<@yRv
zuTxt3z|-B8lnM}q1`w<YE`|`%6Q4V>QTqe>a7z>ZBv~;nH|A}*vB2onzpr@opVack
zye)4`nRkbO#8y|VErc%(YzUUDoCArCY9rk6Macqps?FGEwg0@#vzyj1?`H2?7?OO3
z5$KHgzw3>S$D#l4caI<6*?->o|9-{sf4y1HqAtIsV^;F^z025o=jr<|?&({0_5Epo
z_hsv7|8V!0*75TfPxoW)zL7GExcnx{=l7svwzy0q3yl&yb!G@II+A$z;<=TBmfhTR
z3)%n}L9~~T!E4y?*S(@o4C8p1=j7jd0H(>KNiXMEnJx-ta+yJ#6L4D1(QC<o%-V)8
z(Hx1{I%6;BW)7gWBQeDc4NW#xiF{n@njfMOP>nE0RZ%QQei%zTQyk?*xd@L8lI>*R
z%k12n^;7ec9YH&}xI2e~qCVw*_@i<F&D(hOqQ&<dPQ4)-PCU=ve&EELk9|L1Z&HJ@
zYBGX1cGxyrAYOhvKM6=W;c7Zl+X4=%IrHcM2VG8NCt0^ucJzfBn6S&*!~)Eh0>_U&
z*RR!aNKlD<@t`0(;__m7(2M@t78FkVD;#eu670x15SmgXnQ#^HPf^1FN*sBK-hd>{
ze5{$@=6=i5+V|goZzty{=Z#7;WOj%<Fku%#XlzE-44&uX)+l?MQ_uYfg}TRq6)!<k
zoyM;@{<5Lq2r@(;8{?3U#dl#~Fd}*@%3M;aHh^h!Wj(-XZ-CDIaA|X%CAXO%&lfT^
zrb?>(d{uZoi5G~Cs|NfOVK_5aG$&UyCs!*gH5cuUfZ%x3t6I~#FZ1(nwv#?OF2g3q
zu(Ok1?}I_BgR!K_bWL+5<V^MBD;pY+EtNe?UdG@+Z@4`%9%itMz)#der;;rds<bfb
z87`QYk14a*&UZJzb`wc4r&?y=dFQTE_&#UqtMTT@+pp$n&bT>*HW#4szAE*!>rSS>
zjJ2stsAdJiZm~NoyI3b`al!CDwuD>3B~$Z#)UVJ`&hEdeZhREU-Vdnw(Xj$!^H?`<
zBSE(%gPyli)WCHqloz)gpagH1803I+qU5aCE<Ib(ucg;@hvf>fFu{;)@>HLdB!yUp
z5U+F_R%unY2BGx`kMt@6D57JOP4}$Q?IHmqVY7p&i(({-d(S9@;-^x|v!fhHl=7T<
zvnpZXmaHf$l#|Xip-D+_>djl>DJGLt@4W(NaJ|CW-;JzGBUA76FiH|>Eor_Ir*Bvi
zVw{lf#~66sJUiFtB*0<cK+I5cT<`$a1e8tfH3!C$3$Q(S%c*7Kw+i1vpAvvB;_n?H
z#qZpzzi$6;mE8B!{l6Oz8xJ3r{J$Tq-^u^(<bR(0@6MUL>`ZR4Fbn!QJ7Y;+;=i6(
zRAR0c$5xx?kpBUck-LVA=-rskpA@}(C*fR1K4L(=HFQ5u<_@)n;cv6#qIe5g(=Clc
zf)+!DcXO4t7esUf+lENP;6dneE7?sr%t~r=v;+0Yk~`SI+_DGB=D(nLXQ+<ta8P9d
zZ0}mhD586U$<6Wa01k^vxiPKT1k-|2+6fF5&x(I9<xE(mlO=~?(ppOhI#G$PgZXKn
zN7JJWO-CiUz2Jl?kTCAGi%E__<!m_X5$Cv%_ZuQHbFg1Ie6hsM<#fcRr_gLLB+d-4
zhGnzp<lUS?>v2e7WIT)P0>#%;1_?p3BjTk}2(^l2U-fAEtsMh&1d;OENhufs<&+vu
z`kthTcXcFo+kFY%=0C*E7e3^Svz(>_v)cLsD~Mq*mmbxVf??KxNa`yOa;Ys@tr}%#
zngiJ^eSw9jn13IiZAH7Yx%tn&N`KrM-YP>KF!7Y!8E;Px`zOd6Zvl$NwT#FbC(C?y
z0#So-CS5wYHUo5m37<(9K~-KaziC=~Aio--?Y0<u6FTOq*s`hWVi-^uJH%NlcH}BB
zVBsV{7&36F95O9UV>@Ykn+<Df-}4UIj1{ZdNNX&7booxa$Z9lCMLfuv;ipYe{K1m5
zr+B@UCUA9?GehO)=NyiO%5(uUQUdmsZM=Gx9p()thC)8}n9^W{2VBBD<4Z}zU@12C
z_(Ma*b)P|inVovF9;D(<b|_JKJW07dsrE}%9`8B(*MJ5mDb78&W`Rp=iQ8r^*XtB@
zPe?u_TV}nUEWc0m+DL%DCI75@QCyrNU`jj}x3RmOZX}Y2#PCT8jcCi(=nRS-*=-i}
zN%f*q>oZrB!#0pTU2AZDJ|emMj4&a3X;*9`!B~CRK<&4lxF%9pdUaNFPwXzzK4vP&
z$@Hmh#AmM?9ma}387tBL%lwAZ+U045o#$%`+z`FkO0u`o?ix^?FvRYfA^7my6ZK6d
z;K|y+wFwU<_SGbO)wQp>{wo#W#k4<0iW6^#HxxJ3uuF{d-#`?pOcCyq^kpfjx&;1c
zRBzrfKKH{_3hEcdh(F$LX{3>U_c7Y-Em~hSC8TMTGCFFlf=%3N>=51X=hN2@mU%fK
z{p-Bc+k<7CZ>Tbxf?JrZR`J9>C!PK05z&nZ#r#h)?|;w={Be1l`SH1>0rBt#Xp3;-
zY*J2Ct-iNf=H0@ONC*JcR6*6JWx^uZ4Y_=R-eAaDMM5J=5(EVpsiCmb_C^>>3PX%7
zBQ65`q~A2kZ+Lz(s+5(UH{#Db>oTF{nlglOTC4{J8eswPW-(O_hCE*u2QyQWesky=
zRp}^4zyN7I{t{#RcC7xpFpsuiYHG34cIb$s&&?Xk$Udhk$d@wSDObK${m)Idi27$%
z{NMFQkG>7{KaanCbf^Eh)BoJ*f2s}0Z=XR*9pBsJkGeA|zcVWTS4$uDa(9|`)O-5{
zQ%B*!izkm-6Z`TfobD1DBAuVJH?R8&N2}OkogIP)81KwZGK)OSv(5)vXfuaNpHZ6`
z$?IPI-J2j^RWFP~%1}kEX$!tHM;bzeQ%G(<-_<dQi=o?r_Re<gmPxM`$Tx*e>4A|B
z7A$I^W297_H=h=yxh8ES$Uj7zDhW5H=h>oov@_FQeL`NUc`eu-!uJ^Op3kDb^M6vx
zs-;_NlQ6&4-1@JD+KFY&3hn198+tjL)l!(w>guYFiCV!XwVdLq?>>fgW<6bfN`!UJ
zncf}mkLHo6)E)^zQPzlOes)~0J(22|k}XgwT0XfMOQyAOCaq{9Ed>4Px`O%BVtM00
ze2GC0JRQ8DcQ|oYknawRUfD9>dez(vel>HnBsmoyV|kx-wfzXlJh<GfY~;H9&?_%U
zvt0PW%*<b<6k0(BFcG#&FF7d|RdZRfq*LX**`{!qd`s$(WS6S0QVaa58>W%>DlXY*
zR1kDaOiu~@%G?3rkdKkBhATUacWph2`kgR*_oRjM$pRqv6jTH5DOs>zaSrHaa&^V|
z$IYa?gP)R?N=$z3Q3qAb)zvCxf?iHs3M@h)D2Mb?Z!u1Rx~ZTS3$32?$UCMY)godP
z+2dwlekd?6H5)ciPS3`Lbbhv^=7q5`$7YgaCq~!B4aTIW{CYM&lxng%zZw^ynRdyn
z?0T?0d-lGT3R<Ffo^ifJ#-I(mvz=^6GWjKi!;@}jGDVjtf#3;#Nzul^3>f#24y-Og
zGU`c5@pT%gW(6z_wXD^exFC?ueEuRcJ9(I#dc&CTj#_VOv-WxRr8Y*Ny*K~Z-RZaH
zrNN`C99!c{z1$e9oF4Fufp5;xdOUL5VBqn<APL}o%8@H(or}3n#sX&}nJ53Cb3mAA
z^ayfriyt*4duMM~<vLvQ=$a||MO|ue4uXB!!mCQlA9br>y`HH<YCUrliaKuEn%=cl
z^=4=Jb0#m-bN!iiS4=Nl+io9~)9@)fl`t3()_2J1qdT8uxpE3-7XN3pVb5vxVg>jp
zixkrLXQZlH;goj^V@u4yCkyWNWUASW!)-TthwvJn!DJWd0<CjdVcRf=%jmPp@BtTL
zQ5%%g2p*I?#{QV7`RV~YsT=_5)#m9wC5GXAFoe-zcc!(Scgt{}tRSrAcUGCCIzEpD
zi`+U<Ll*A<elClS<bNl=O-Fq44t>K79PNs4m{E0=bxT*-p>L;w$aIFS#-X*<I9$wD
z<J7)74PPDFSBDW>4S&mx*oNVhq=uSMxx0J+z3$%Gm+E|X>1Vi9Kjm_CRx~Q{th)_O
z@l(+ut7}ucD4r^fuz$0zPYt3s%Oq&ph&}V5&pF)HZNw}%-pE)F)mUeYGfE#QA5>sl
z^S6dk-Ea{dm5=Ok{@VmK664F^U~+Lur_%+WP$JDcl=E*-?pI-xN@8-g3<F{zfCMIb
zQeWE0<ZOOzEwrsO6VqdLubx?mzP&Zap4B_3BVg;Xy*tWj{IfqB#o7RmJbM)7vBKiZ
zTFct4R2SC=OA^+_(*donh@ue$QfH$oWy<CUIMn^vDEDLI&*;a)az7saQT+)21^r+f
z)GyW>FXQ>X&Q~5mqlqs$D%XjKg{@%{K|r6wTbeS(Y?(Zr;GO+X#QvvqKAOw?XO{g>
z<MH~V(EjI9<KdnC&z=3xo%v3s`3{`E+gR^x{0Y`O&j)Ywo|x}6{`%MN?#y@Y%y$+y
z-+B7Oakcdhy^I*|%nWU?6?bT0cNKfuSkoQ1-=D`GKZKNOI}6`XzL%M!G$fXBGwHRU
zOp5%WVYgcJHg$!52g>PeV9EIw>zK8bf*1bl>PmU`oTmcvH57fbMeX@|Q@;o>x>uIO
zGn-;Egm99RT&n>2@p)#*QL$1-i<Ms9Yf<K+)HB+6uq@;xxOIqg2y&YGLHiQAvizV%
z<E8-m$vy&nmiJ8Gut}Xrokr9QqM$9?QB}Le%&@?T)HuP#o!>YIHUtsoY1wDOOmkq_
zV$gc86?bW2pBJ*c@hjohBSJl9JQIU<l%H3L90DNkZww1zP*|x3O>U_#V69O-00*@@
z80IAto~a{uLJ8UWz@*TM+)wxbm#bT|AU~QPbl>bIK5o~jpK?NJ&uXQQg*w<E+^AQO
zbxc=uC&q6BmJ@>B;i_^Q=wx`Fr2I0Fbo?%M7uXg#svjCGVfG=QxH0@D+%*}ZeavCH
zT;~&?sGX47*#d=N5busV;x}VnVANN0o$NX#Kh5fzI@Ef%sU^YrbRjW06kKdLr)D^2
z0w9v9<@Z(t!ymvyQq_jf&1ut^>xtzWv}Y+`0JlO$IM!0CY)_=4jfiy2Uo?Udv(<qW
ztC6~};Q|i$aySk{A`GxJMQrhAxw+nlsd6zJFObT8<8PNxvU%hW_iZsMfLfu5D9YS~
zfbH-Gt(6)FsSv!fm%!90f2QFVAQgO9AgROREQ9$dz5v=~j@^<56ENs@XvW>Zg7A>G
zPgGRhOf!1zweuc2w4Eq>Ctv=^%aL(8LVH1(7~Jcd%qB$X49bxV<Cvs<S#&$FAJw$j
z7=qY^E$&N2WJA9X1WpO$v8E@@gC{F1-XjkNUAzh-B1|8KmsZG-^;G2>z<$1z*3Ula
zL-fguXioyX8R<!o{*HQ6#;8Hkqu+z;0fO&lW;^rzF}n4!Eg{%2`A9XUC0Jq3y}9uy
z&1sOc@1ystlyYgVVHva=DRxO`uY`<|Yc7x>yNO}oGAN0yLUj~gu3Y6xL`y3fU|O}E
z$|w_o)k-Bx!t5v>t%c>#X!UO%@+_N5Ydp#wj@eU}I%zsi#Y?^Y82omaHt5>kpg&$i
z1Z(i>NR|w-39fjl8?<Z|O9CormOE=P@wLXX?jPTbI1E(3{0Mr5+>FH_zIXlP$MmqA
zqh~q_kIvKe^l3^2l~-YMjd-SE^U8f<MagE3?S5HpELD|pm}j*y6}VrCk>}ObQt3Ti
z1l~Om)g2*Ob%~-Z!;zb9YDURtH0hrWN9xR*(vV0MF`L^nxTan>b;D}cp?@gT2x^Pr
zIf&!{AAqRCg>)Miy?iZ6pAW_)GYD(nW4wT?a1X=Ww=c8)1q5z3tovFmDE}n8HfXbb
zjXT|trGQ#!JL&vmj9Go&7`b9Y6>E@qVDTnu@D^h&Zd}pLkbvK+#m)qT4+dLzFJq|>
zKe*^3ih3X(Y*cL7o*0Jz27>fWP^wo?dPT2jy9d6?lwedMw6kHJT_aGK#tuj@mx2?G
zB#e7%&x}JF>TT7TvKk>KVz662p2c1~9hglzF1qvrNJO*39+{~QNh$zdK%u|5ezj3s
zS_<A>yHf}G=fK&$!fOlDcbpACIDDS@2_MZnQZXrm_17e|fxj!@aZheMyVZO}B>uSX
z0<uQQMRL!pi+wjh@@8G6@Z7)vv#}0t@3*JYmE=|iOahq^3iU^&V24>Zgn<BJej`#9
zPaSmUBJbxz^m8+hdut@n#zqQ)!UF|^Ij$Q8iX^aMG>f-IXJTD6P-s~66Ol0DYS{Ve
z4%(jo*SW`RnQ91BjZ#n)07%ty^KAIRf~(mW=`?M5XD!)R>PtYWJhB`dyig-z8YYAX
z!d{Z>Iw!A_cLSz#N92V@B2`RR47Ff3H=&@VhapYo_BP6ZfE`Aus&H(b3AzVDCFKLd
zoij+q1BAKSceYv9Sb}x-!A#PyTjWDOf;{Sj%v?7U-&gQ;(SI|yzaSQl`wZf%Mf?V_
z!meS&ZYE<fEiuhfoz)&h43kJrK_y}ok|WCV-kXrmpelr6UIVwpQ+w*dg5;05+ip*K
z6EdQ;8Rt2!-Kc5^Sl^NM$+71YCaz+EcHFPcE{n%em&K%53l9McrRX4p&Wi*x>cMD_
zmYgCKhxio<$Ky%FZ=F(#p|A#RNp#5Mxl&$|lJ5a?-BbeNo2LQ>cv?dJ3-Y!E15+mv
zv+kB(+~3k1&4t}9$=vtEG=Cpb)5avfzL+=aV7ZX&H|{tY74UiwrwMoGlH`)qmAyrs
z2PPypF~5@RZB}%%vo5AMDr6hF(ZJPc?nWEo?7-%GmvF*JoK{-U2_Yuq=#eGcQbJe&
ztsMz?sJQXruri0%kAbpLvx+zO!wl<}a*`x8C-j7}sFjHu7dlC5U{z??<It{f>2n(v
z#?iz=_Jrjo6mHFY%<8NMJvT(>3TyC~>B;VE6nnn`ky|If8DEMAC~C2t_n<xN?(A)D
z9`p@Cb1(*?1`<Y%(_IvyH_cbfM>6&Wt4`a1k`l0!PaQ?<1}mxnQv`#Er=gYmYz2h&
z3FHDmAyl~nVxN2|Z#}sfh@Aw?yR4(%6Qz~{`a_TpVI7TA1dV9*kRS{>TT^sCf*e4#
z<7o>}bwh_sJPVn!hh@`LvL$*v`!Ol$-p&Yhbqn(_Qab<p*?$#p2jj!}jK5~*|7bk^
zHq8Ig`0i2T&i?Dp{_D>E%h`Xu=TmnVvgyv|>)*oW>)>xMP7b36U-D*wJT?_#KTfC#
z0h=HP2%IoH!x7Cqno`glz{|a-2Y*WjH#u)t5s<tD5QYIUSmpysbJ9jF*|YgO%}%}q
z7c-#8v-hpE*x5F;TXr@|cUuK!|1&i<YRSP5u$6|%=q7K&jE<;t$jV>SLSUV1Q+sDy
z0H6G{|C|LsqFk}HwlO`WyxTv7ef|SOxmp0qRRE>BWUnc+k|ZGV-vm7S!JtdisF~7n
z-u+-UO@A!sM$-Kd3=|61*@q<`CzI`UgQ|m|RmcXN#MerPk1DbYgvujySzl<yFL>w!
z(2SLm|HNfShI5cQ7ECRWIu%&8ExppUS4r?^hHf_Zx$BYX`OEh5?()m_UrIuw;w?-p
z$y#Kdr$(5<mu!c(4lKiH(l*C>R8NXzhn_$rfC7b``TMLr?y{%UCq$(p=4Bo+30`Pv
zSqef=j`FNa74L@&I88Dl4|(pbyo6?NR!Arb#FL8Bxt2P$bB$PV%>(=Fyc7QiS(}6j
zF`r&i*EYKhI`1kkw`-Ws(}FkG<QVln`FlXJ&<uGVO1zbIFCJAih>wJ*a(S1mF-|B`
z4}w=&8F#5e-ry=D5!nz~@!V57e9~>gGLUy?kP6EN#9mIila?viqItPrib(Lf1n~`f
zIF410s8T*EzX7utb2We)Duj<aA~G4i@hZ%!--ZjU{kn_!Fnq8iSMK1&9?5hiDMo;H
zhWnQERl|$iY;%LL)jf%_3)}_3E)*J%+9iV%YZ?P#2OUHaUxAuYkRAD=E+y)$11NJd
zrmKy$brlNSJf)$!_*AZ)vT2xtHv|mU^iud3^hb&icvVlXT{2|$7w1L_iCAX#a+AZ6
zhymtggzjh%3&~6l2`I2BQynf<w+CKBwKpoUFBRM2k=CpfiGG9G3U)bhk7?)_4TTk;
z7MSb;Y6XNrJo7b~8mZ=zL7e2QsDd#-;E!Vbfo)Eu6I<JA4%2nuAC&2-DJh)=-D+Lq
zI6w<5Y&S&f2U?kWG7$SeG@2Tr+qK(LeC;gpc!_?Hu9SN6z1>}#_7uFPX?jBw#CBl>
zsumZdY9n=g#A)Aop41kyWLro_EOQ@Kik`j8!2oEP8$(s!80K}%cQq&&+3=If+R4P#
zD9-(KbW#>L%QIFIVoDOa_*0^kkkz$R2p-Ft-5I$2JCI$tR<g!?8n%<aH_U=qki_<O
z<?e=N-nbYdc7K0U-a+^8GZ0H_*4*v}INE){ZiCngfArEG4?CJ3ns@P0XJC*WTbTHF
z93Kbn-8YO9RC;urozc7OCM|Wa?w(++Hxj!hXfZ&jBUs>fdDkE&v6>mb8+bS4JG0I@
ztv5KhPuiEHgg?|)+KAkNHhY(S0JytO;9W?(6vTpX(god-g`&4t3SM-ch!v`{3^%ex
zP;M&R&H5TE!c@3NeU4V>K)g{g<2V&ryHmV%eAUS1vEYvIeuCQ}d7y4uo@$#d;#$!7
zN~38wCX2<dWrV8g^_ygw66fHUgkWxdUWu`AtzUus7o_{2P5!(7-Fl-W|J~TQlmFhy
zfA8eKG5=pWeRs~kcY@%54?*zJDd<{bf?#=b+nD`ulIX-8u38HrS>b~*Su~XjBz`_?
zpvp#m0ha!ZJV}8Qjou^D^qva;zL-FB0h6{_Lc@wk{N(`orEvEZmS8gll3zO5J)*}-
zsbW-cs0)jnuoTRG(R9m^_QX~KO=8rW>1J0^iKVu9@%(a<f(`ahQ!pO3f{!Z{VX_#F
zyl7Y=<76XssEdEdrv`n>dxN)zQ>iDN0TF)UV66k;_e^`+kZkmtBpB;pk={3>1RId|
z@Odqv^GnGl(MCQ<HmM_}7h?2+{Q`joymmp)qy2JrK~D$$zwk*W1eB44`*hVA0t@3x
z-UkJl=KJ>Y&Z@?Pj+6s3WEcXyo862YnnUXvw3T5!2IjU{)ltYmm}2gJvw`7}xYq*T
z2b9<dZgU(iU|bykTJm3pv%4Hj48z~e$F?IdK;YU3ZG$<00-<<c80Cv4)L4H+y<Kyo
zcKg8&+<(*n*^g&c)KPO9PDe3jcat%Bgt1lVeR+{lt^(5^z)OeBrq_#FWrx6fkT!3?
zn}WQGu?`pA(Gq9rd&%LFx2s=LIn?ncE<y5L&4|rp)tZ6WBx9?sZLCkfs$wL{FxfW=
z#T46#NGvMdB-K{OXbZk85&Lm0#r(xS29c806bjH<)HAg+Fe~=kq-bA{2E*|xAZL>L
zvbf(CBs8%N{YrXy)@RoM*073*<1%}T(dN!_P+g!L#5to8h~OMcHwh_3y#aCgV3PvB
zO=s~OP0bU}*~)?s<}P%u#qGh%SiO8KWpb+sr9-sgtq+v286T7bAIV)y;EH~&7P!X*
z;EKKraT#+5^fwP!1H#D8@-C1JL>HuRPCw*Ko#dSw5qHqlurd*CRBw~0Di&mksA@EE
z(lg~IWyM4O;9T@Lp1W!8yM$YOB^Y266e9!AMQD!k?7O^F8w+0N-~`#v6Lv&#^1d~^
zO(9UEeHKEX5%;r8Gh%jO2U>Vk^a_wq=?UpgMu7cFoj~6zwk`;ze`{rbW!kVaJe294
z_0?m93|AmnCam~si0YEEiz8jr)nE)poCi#>*a(JtSW}?{Q);cW<R52jaC?U0IL`Nt
zET3=oN8@WQh<E{85+h!`<ePM<?2&2lukVI-<7$*c7rOiYvq}?%_4za8f5W_+y>Ag6
zarvq8zxBtD8=?Gf<GXL~;{V;r|GsAVpEv7SRN{A;XE)wiD~bH>>^|?regER(zOuaU
zX#a5cm)7y~7f<(NLO=iY`~m+!_LnH(-$4gLl=Jiz<UE$(X)|4S(mm%$?ecKqk-fR;
z7PR)(KPhxrB;VhIagvQjhT2mc5&N#%uqneF%^g9>-v)g)+Kmw^b8#0KH|2pZWIqvl
z(zhB@MGUnY_RW{)0ge%4vuD}+qBrTq(zJCA=YTQ_GiEzgoWM}!>KqyD+R4CK-3b=;
zQ}IQ`nylImM@4-Ca!C8khw1gcs3cLL-o%q9_5(p3<X_S6z4;8vs(C26Uh;^oNg}Aj
zt>v&@;W;N$B%0`b#j0&?1G2yDL6;4TR0m*KN;GsoNsLMdaYX_=a7FB!56=aWU3d-W
zI!>>Y-4j0tV+v%|;c`Wzmr1Yp!JMtZ*gDMF4sfL!Bp1?;uWAyVpX~;`<6J(*aM`$%
zG#+MfMUuH~dBSkXTWAf5;;ys&qUe)!&58S_%|~tOTqy8pPfgqe3m+0SOJFb)n~SaB
z#0K^~S2kWq5GKv<#z*HlIY9*PIBOm51w6W8cIpOhB#H%FQSr7))!53GH)Q?C7CQ<E
z!nhJv5A$yz@qu?|($C+4Ip?{arC=+316EFEvT*uq0?P7Yb1eKU=B%1KBj)U1)d_HJ
z{_oY1{W7=V$6Wm1x8`3M|9$<@#-ls_@2lkhR?VM>$x~$Zgaw>~`Ky5RDETqVyUCBH
zUOzv}CY@yK!aP{Jun)dg7E;LO{evI(pSOrQZoN8E6m$7aj94y5!FGP|ih_Cp=+oSQ
z@OUR_8{|Do1RpdbK;#{j=UCeEkXm?kgm!jN;<gY@3@bANo@#zjYD`bfhzxUC0^WM{
zayaOiC(SB%vKY-7>#iint{V52Uu(yb4W$6%y5Q!fTcSjdJX+Hg*xdBO3xx5>=d1fO
zm|d&yRFi^mGqG_rcsJS|gU_4U-6us4)K#2rMwIA#gKkw3qD}DrZhrDqM>fc3pYwUT
zIc3)c9k-ir%dc;%P3bU~+1z~DHF)F&p|etFEm;&JbI?ErxG;0p4Yzya4#hLmt8Pv&
z{qs-Hh^wY&hF&`#uXYE6Yap7<8eFdq^S61{g`cmE{19tc9seL|CfJ3U&YS|CWwW4l
za^R9ANDM*(-468xIqf#6SN;n$Lz#}>1u_kT`@8uFS@XX2KpYRcxoM|O!zG6W=uy!w
zz(bDAZHW0tBGT|vXkZf#&AgqA7_F0V1OZYbh9lWserY!TtK}m@*BVMYIf5dSn;WE5
zB`Yh)mCPo&$^UNn)~vghxCwhH6nVtK&``<{OpV^((4KNJ(z;zUhvpH6UM81Tgc!`O
z0%&INbc8L9Ub?4g4JP3tA45zN*Rfa9@L)K;9FVI4%RFqfD)MOHp5aaV(_k{n!IdEg
zi<t8+1=8XHpUPssf8`p@FygF%haxc;h%K@<mb34Q<Gn7Ask?IbqTUdMIv-%YQL01X
zX4I&TWK_JjD%Xg6L7M_YSCU-@*ov8Eb^#8~G+fo~!b}FWLe-9-^Mjj1v+MT;!+t(Y
zp)BM`rO%q7vfe@XI2X`Q^@KJcOy~IoB&nu~8p5w*60furI)hH4USFKT)X8}dVW>o%
znfZ~{=&Tc<tsNh{rWddQf6EQ{b<)et{xGNKI42p$uV&j_TWaGOz8x{{p%H{HlV8p8
z!&*HmCC(EP<nU}#bjPa&ZIAptU*ka!MtY9msM921mc3znU{C=ngc&)$%Z4ZpI6=`N
z1fvV)%uX4Gr|IDiHT%7XqNGutdvmwB*~_l;JzLDIu<Ol(2tCt^l4#zX?=1N^8WIE$
z;Yv@AabQofk-HDKG*};)xpd&lF=usb&V;#Py!+>N5ECfcRK9fZ+e}5P7`aehNM2CK
zH%moZxH>IZEuVhTo7;U2%6L0e|EQpaba*Jq@PZ$6lvloF?(015%tx<z2a#o&hQfLV
zr_>i}CPEpN2P)N7@wz43lE#h}R>o_x)K%BfQNR33>JH)hwx0dscU{$>-j^xQ7+I<3
zv-G}hlWo7BRX0||(W(?EPHj|s@XYPA3Hfs}gLOWP%;p{qu2VE{!el-m2HPJF2gCH<
zJ$zuUz^9Zo{6swaQZQJm)!}HoIyN_|K@ki99_@n&>xIB1NubN6m?WV0Kv6G}n^~Di
zVYJV<>(>mvt?|SxC-#en^}NEL1KWj-jWUsar(*VGa3QYvdm_Z_6tiqz$4)ezs$u4&
zKXlfY#PFB;4V`HEnJr~CumI5#Rm<VuDE~aGf;0N}{3T|U?E_UJ-7l>aDq3VV;)~=>
zTmah5U!vLwXB8U>YR21Yy7CU(w4!#weicf~SHqxdri4gPWUWwt{%jO|kdcXs$W%>h
ztTp;wTn>A!8(3v}*$lT$MK44PaJ!BGJzv=odfBY%N5<6*t)7!v05e3tp2_wtF*CeH
zkEFZd_Fs$dYzWR=<Z6%<lUcxlz8VB=BRr=ZfLd9h64^lMI<oGCX>EMj8`-9u1?Br2
zb627FGeWZMzBA;j>1feq*sSCR^13tVC776;D0$G}!VwxJsX1Wod{JKWTCHAg6$n!W
zVHMrvoe%69Z%@Z7A;;$C6zL#R+MiS821r_NXVNx!GQeRm&x!-9V?Z8YfDE-*T=XsT
zWA-4E`83631yG!aOn`)Y<|cr#$$y&^m;yvVqgd1c&Mpieyy+IWEq=`K%Ohes19O!c
zD$!*lOU!lr+i=ZtNKICcx~|mK4)|so*!t1n9nwZS$<Z(h)(*bT@aF9$7*HM!n_&(W
zua^NtU|;k^&>5wJrKz?1jyo)$)}wimMc-U7B&s)zZYMW1XUBONp$B?L0)Sv#@UgfJ
zc|nrOTMKT6Q9j1-SMCN4?gK`B$i&hZJoJxRsFvA>`EZ;Dd;n<o?jaxP-IeSJVJ<lc
zA_0NaAu3ND&2cRGaL?W%cD^`#D2_}Lom65uPsZRAORqu^2BW<SNyZYNLzLL`dIsEN
zWC&aW3hIDKzddFfBRGYgID%r1bHnE3N!;t;EG=``-CsJ%lSVyxveD$nevITN4SxJ&
z14GP;V8uLWNDXgmE-zUmf|3_VfdrOl>Ogpr$Z4)#ZALm6Jw&gh)mNZ577j96bSK@d
zt=TTedrOfP`Vd;APLm5e1gy1hNP=hcKG2GZgzPLIzsvK!x5x~U%%He@f%XRD5B84z
zY49$83lYmY6MGNBW_3nCRFi?ZT|wy`#+Z?1(kJXtM|*k^RiR*$%JuWSLxM*r$gm*k
zf@C4$kxAew9v1Js2Oeuut&Z4H8-j2RvlV=R=75kc!=z6kVk(J;h%ECZO(Lk%l-<>?
z3VvZFsTLz0jGb__cP6G!S^x?zNFtEZt-(4w>wM`BBf+&8o(@>k`w=OrLU+l$`0lAE
z*e*zyShzxK5zIE2j1AU%i6NJoQ}Nm^Xdo~~Rt^piXR>8P6qo~l<j655sX$nkkbNT?
z4-S$XQi*l)v&qGU0Ydk%d3<RmvYQi}Q7~`hCqV66X6%{4u?8K+E;w7JyrHaSjzO2@
zWk;7IPXRL;?_f!EZ&Xqwj~PHomZ`au+<#Kn&#-=S-^|+h&$-3C7T}bv?l3#Cy`4N^
z?g5*U(`cp|28p7GqW{)(rc;r<gH-IWat2j)1O-CdUZ``p!@-e?8pjJgh-8i`d$L`7
z^?y#x{yCEZ-a`1gDub$Ru@4r0T%>O<Dr?>ZF}psLJAfidXNDT#LZq!Vb8P`PnBu<d
zW$!5{{!T3=7O4J#rxm`@BRk*^Agl+SCyME7LAT?=d=llGH=;IlefwLHzuP(mwrR&y
zrFA>=n@~-k4aVa^4_`?aR|&Ldx#!QzF47foT&r_{wkUGj8ct2EhK6}yL{`Rb%W4*a
z=|QKo(m#e&S<9xKF8b%-w0ul~E1T6Rr!7J_&c~x5G8SGDpjzXQo0i6%MhkO>opE?v
z%f#;ic)6M)MLWmrI#o4^76+B11QDd^$L@hbk{u)2RF^b{=&om*LQ2LCOt}>UH8Rve
z*v7*=V;viiy3D`fH1q~<^N#okgYq&WwkZ?y<*1ytA7QTWh&0W5b=pE>^~!#qeq|vQ
z1)*bMGJk<t{Pq0*HW%412E-HmPK>?|yWnewO_kj^_fjEkjNT?YT8_e5yWyZ(@~4ia
zh%t}fD)*BoK5g}tUmxDNjZGF2jD}%Z>yR0P#ReZg??PynBC;Gyp_Ap8Uy(n1RX%ZL
z+&Lv1kfR);!J@7kf<7ADn7=IyL(D^G^Mq_9(TL9{-N=!YGyiK_kU!?ctOOB}tC_0L
zg^oWMjV8H-SuDxihuq~%M*|-Y47b^ff<2)5rbJ{6wD7W$jbu%t$fht8+)HaEXJ<w_
zdv25maR0$2csr5FZCD6s&EBfjbT*$lye(S5jkaj)tygInr-3)LQl-?mxu&B^PPAAB
z>@i6Kt1eTIAC`<c5#47Z0zUSzFzoCrGu_gOS9Zwu35Y;Uk?^_gt^^Ixv_@GWPT^8K
zol{f4@b;X03v?9Zxe8ZZuY~58G&h7VwV04a_n`>##9+?>*z0P6ZBXy1nUr!ipZIh$
z8<$kWcvS&I*#SAp2T3E8ir4_SL_zqd83ND}H6<%27y<vC2<6&T8P+}f6VMa`om8TH
zBana!qBO&<H(Fc(_9n;=S+l7MNFHl0kj{&~5;6q6QFa50UJB;M61keL?St)4L<-3<
zHSLy+P-;y9eTrN?zSQyYEfZK0qw0P=_e9PZafgbT`I__<7`yW5iV^PHy|`yxq&i-)
z=HTylW%Ju0?GofR%umL1U{85u@5&vfF%qVq8v77f9XtiXIFiv24pRHl^FY+Mh`U|O
z=A}xYv6GyedlxSEN;_r*^s$Ndoi6%WZtxe}o)qnu@`Fe01C(Xd7gaWMBL|#%6na#T
zm@DeDR|ru7{0xX^opjLgS16d@T@n$lV!TT0;lZP5ylcseK6A?`zy+D_4e6PY0ioez
z{)C(E0P?J9Q?fp!y4E9N7K}VXJ61S6%;|FGoX08!XvqK|?(;a?YG3<3zB<}c6})1b
zn{z4C(ICtxKTP+EkzQ4gTot{blDX>)uHKee126S%vKDR=E9A~bqR=%fEiK%Wu+Dqv
zg{>TOwW`u5*U+i=9ll>uqA8(uKyBOa6y)N*<z)yQSt3JUf%VdCnF&RSs%d@i=7Tj~
zQ1UUgAz`N{B`e9L#9K*tj-oH~nG)E#z4CNCw?1ihM2&Bs_xL&68F;buk3l)Kov<LR
z=-1L-y)TI@K`Qi3C>FBz==KE*)A~$j+2@>mAVuNSVbMPZ<MU41uP5uCnOfRkZPaRh
zobSkFl}@i@+!ga|i$(pr7e?>81pa`Wh*7V47q25Vz3c?(T{QIovKpbv0IKc>MMvU`
zBblpSJ?V4dSL&Iph{`0o+r{i0h>hC0PKGG=+{iG5L$Cly^3k~H2|2;7K|qCcV;R=p
z)B_PA%H0g;9}a$bwx9fn%Do;OvkYvOp(zIk2mu6LkuKc|Tg9=G$5YWN7GRQ3hV6~n
zr>5=nee`d#csi#l%c|gtway2*)4w_%dloXR(anc(&Cr#fIex-a$aBM-(Cy>={@)5u
zgF)I$7YlkLD3s5)2^m+zeRJY0#Ny@{U;LJsWkQ7)L^+c{>DKTU7;cQv!mKP#;cLKN
zs)YGHYuR4tee4ikxo|&LSV^S4!qc&&N0es6Yo}76GUlYgW6%HIV7sPHW&ry_a<+01
z3J>m4(J?r(R!?5R7f6^p?H{Iq!RcZiuwZhA^TFir^N}^0FNd8yLZa!FyHx&e-W}EH
z^Hp=MP^ck4fY@rDCOzhG{5I>(9BsfhlF<Yw4i@ph4yxd{UP?tX@Fdup33Dji)3gA-
z3myPJPB$)IGUsrG+8aS%pL1PO@oNej`Lt`Q>Y_q%2O9(${K({3O3wTz0}knkc(tDo
zpA&X}IUZuT5V)y2?(VXRr>P3~ZuWPxM>lnnl!G$Smash)$J_z3sJPJFG>@__FkA|n
zj-f*_*BJ<8BUb_zFY%ZH|ApB*!vg|th=Zmw1n>~Wa;yTAUITuVzc({w7xp}jlbF6d
zt)d?Wvg$1utl#jy2wSW7In_-SGy|%l5#BH*@h?U71s+>P_O)Ss!%W#XdUq2UnS39i
z6Vn2P+fHW`8_nyRHx>PTUA$=~e%)Y?POrVvc$2Kofc#NWsmmUfS`wWAM`^aaS>nSC
z6KCfrHZ-J1(Mz5hl3-L%?(K?oYVuczpmjVkw}7Qal4k~BaXy6b;$8Xy&@-^ZI0ApN
zBQ1gwLn|PJe$a$aJ=xuRmY{F!8U&d#J-S>=jtA%CwdCP{ri~gz?rJpZ$xAR!`_IjX
zI8+q-nk^a?8maIvT4<HM-*^uCPO~T0)|>uLiG+|*<$;WQKgNr*NacB5878M!wKdSC
zTI>Z~<8s^EI`vRZRC)@$dM`(zgzY!hAsJ^cS%H`$%dYk^TxKWaw(H;+k|@6!6<yGW
z`>czyx(o7+dg4AMYJ?l)fDO!2%YFGI3*Dl7lFP@Z9-JPC^g<+hlj{khvluLqu9I1Z
zsS4%^$(;VWoA&<9%6B$sm9u0_zA?DeUaxKHA<R4k>%OuALCD=D7^98&ayoK7cL}_Z
z#&VNUBl857ivxMI4v$;`dP77Eb%h8S^Sz;j^(drlaRxP4duN+<1VD?Zd)|+NeH`VD
zHbK_Q%flp)<XIdA1_9UBPAwx7L7v@uQmo}`_Cf<034wzMM=A1NLeN2^t|9ReWR@}n
z(I@H|goT^NN~cIi)G#U&*$QA_pzAehn-aQ~(w6nd!w*SkkdvYbvx>2if;lZ1L>%K_
zKszEa6n`Fb9cLYb$?WjZd72G(+SkNds&zrp8Jre?48rdLM$SOa)ga9w^L+hyVzq)z
zZlQ`7uN+%gbEwL)Nm|TfE@!w%4zL+hsu<r`8jOPKn<vRX{*ip+I*b_v$2b0c=%0?s
z@SyWPvRx6mL0wnGV^_>8;BA!sj43ZgHMp9;v7Js!n+N^Kp3+=xWcO5%`cfVK*l5te
z$D>2JOSPucEXBexQ65r?vZ&0F47@E!W><<*uJmE1#e4f!f{J4m1)@W{-l@FTRB+hX
zNNac9YhPjcaY<<|X&AX0f=BM00Z~LK$NDqD)*@Cg1v|oT2yMxv5mj$qlt)X<9|8l+
zHX0M_xZCN_hK7N&_!~T+UD^uU7B?kG1fAxG*RHlX4a|Moy40Qv#;ivVG=~fxeXg~$
z{orQM9bDkFD`}>i8VpKC&{xk2H9H(vO_P)32OC6DswmSq8)wBzVQ3aLGpmJ25KA_R
z%q++z<~>=zfn&>=P2+MReGHKm<=qS!Da`r9s%gpWEhuSel4^5F6k$3A!>e?&eqAda
z>m5x2+`mt&EA7y3N+J>cU)ec&g&P4YRPP|&k6<UcF-3P1xcgM>qgQ$#Y2&>I$rF&!
z<o04yi08;m11=FVjkM!%M>$OGb_{QV25g)i+CZzTF-F9qQ`|enbX!R*yJ{|>OQlz3
z(Sj(PeAJj~kqevnQ-Tk(D`ZV2zhP%Ej_wOdG)=qXx4(cDps6mV3Z@#Oj8{k$^Qx=?
zFl@vc?qb2^Z6|NF*tpc<3EbC{n+_VL8X#~}z{hIx8#JCxROdiuyveV(xsk9pBEZXf
z1|i!3TPXQV!Jz?VGM9P#`hg(;APgZG<4^>3>s5D;Bst%2v)J=qO=HqIFX`XRC2fc(
zdkvW<bSwYnQY+wrHBb0Lg;BvTKHe~XGsdREe~>Y`J+=~!YO&TUcZdbU^R}x6w{+>)
ze(phP`!?^=E?$*si{Ap;!rBJ|8Yo@~x~WxWo=`{!u(z4eSBs(?i*|}v9?F>q=~U-I
zv7wN7vzLT{d@AIKW)q)vl;f&c-m`q1;hp7boAp+>ddn$w!mPad28i2k5YcN5gyRxS
zeKeKgnGa!GkgINiLZ__x7^%H3sPt9xJ)H~Ys6Z++<tF=qtR3-?|2D~nD8?qM=T~V;
znU*(ub-iiS432&qTA6`IF7XZ>|AxF8e%Z2OXxiYm@`{rnWM`v|)b$>Ud33UEFIZZ*
zRRu=T%P!LPdaYEbd{lV@NKM0bb)ja{M=*9GE!*k#CXfy#uu79+I;_Z2ZVw)&-jJcY
zD>5Ofeb|mc=iriz4ofw#3ZHX~V*bTaZ3N4cJlHmb*}A!Kpiw<{cmk#ShMl}hQz)|9
z#&p0zX}c#wGUK2Rx+lp=WRUhzNJV@eGQyE-TOJb@VPTHe3hdOAtbP6D1Iro>Vn6M3
z6yB7Q*jcJe6PCfUl*!GtT9o3d!c4IBA<Z|=Ck)+}jfLj^Reh%}`@$K$i<k?%jGOjZ
z@ItTnfXr5CY9_?WzAmR&y+t<X`qa(omPp-?cB5+E@zu-)bp5Z>Ek>lqt8n?~W!2Ip
zYC;_$?XBdNf5S-stn^Gxq!#2`yrZt9|FMR9#7%Fa^N0?HN4~Y}$YSH+m1q0PmuryR
z7tWRh4-XRvX?dvA2wY*fmdKmeS1XM-k;~{PZ_hRzky*TrW-ZbeU#%7@)|4NN(jGWF
zh;deQtrf(oS}b3&$VnhUH6fy9F(Vz|Kt{1pGN9dY`c>E#=M2H=FGI7dCM!C_l1CLK
zwb?We7N2|Oq9rlcQn-uzHLU?8d)hqo<YS9uv-5yihnIip<XPu0*7h&hlb*dYuxCeR
ziI1F&IiW=GN65uE+h0$<d&;a)wF6y5m+L(!0k@mLH{n;g>LTXjN`hB*l>4}}w0Ka`
zr79CrL-3K8_mdBQgIjZvc%{ED%?z&Ut%@I^Y(~veC^jX)w65hCceB)E&mV~3-A{hA
z>0o&T_epPDC%u66Km`&Q=l!=~`r8m<LSeBK|H4n_tE%e!{&8u}KtQ$<Dn@@BiU-gp
z{bG~8qy(=jo)7lRtHvsM3j3l0UM|Iat)Av1G7n<+z}#F^Cxet9L-LA|^H2F*8;RrQ
zxBYLbSABWq>=Wm+L<Cvvhz=Eu%4L{P(^i}2Cj${*z%I=?Sjqe-H>>9ZFiY)AFogkm
zD1)+k<a}lxJP@9Zdh`mUGYCpB!`TO)F>|6#rT;XQ?b5(?_<M|&zNYfmicT}Xsxpjv
zENh8ZbF{Rb43O7cli-NEWA563=TA+Y?QO>;xU8@xxbSybpbG_e9k`rHGblRJ6{I`O
zD#krduza~?PJ(s*EXy1h^SITvG(ZEEeP<h1Ul@>ozx8IE6+_wprD@+;#I5iHJ8D?;
z&_r*D8Byn#X2q>{QB^UXz8Cy-cCh!~aXgFOZ}F&`5;K!Ocg!U8#oYpGsW6!{!?RBh
z&W_<OOEDNrqpi4H<pP5{lWxP<UBkpk<j$_5GALTwV7OfNww5w{kvpO?2xe2!hEzv<
zQYZXj{b1`9{!u8ng`>}BohzW#)9~($$Jv<)5Bq3)GZ&rVZsvJu34I~E>UDLKGb}O>
z1QR>JYc8Ihj>sWI4vWVZo<JQD<O@t;HL|%q`iOLaoM|e~RHXbLa=8BVvxUsfoJ&!W
zsAJwG0&;c&V{9#)vSSYJW`5pgM4Fe@b<yv%#)FpY7d+XT79<*yaqMhvVtx)u%n?NB
zzRXMy@Nckx6(cwr|8g@7xWnj_5#s>1G+VmToD)}9vKERw*&5tzMY<t3V13HF9cgxh
zKKw<QcPWpDZH60NnsT#|OF3S8S`_aQNDbP=R1GKn)KyIGcSbg`pzB<6e>7~%<Jcrh
zGDDi0<o<(YaBEq9Q1cRplzOS-Fclp$S*dAfO=I{Zcp!F{P}s9rthq#P5^j`5P?yoT
zh`_s1U^{0@-D+neA6q2LJL!GtLTz;?j5$D!cF=-mCf#5MzGH`3r;*eXeW!n&#{`MP
zyHhg#l+aGHdtQgg)cv-*ey-y}JL-0s2ERnA!Flrkjl0KWl3~7fp60(z^2IU$PS5|>
zXngnG;~@WEWBt30yZnD&VgA4Mzb=-rF9zVJY;v6^Kjp=l0km}qzt%4C*Z*@d7+iGo
z9)#&`4|-pB#=n!NPxoJ)?w1q(xfiAihbh0;fn?s@(c0xb&hf<oJx6R;0)idW6AsJz
zHt!c0lJ(i{-q8zAC573aDI9V(`e1JKyp3uK@vQ}z$QpJ4MX1=UusuK_hIpU3KU0xz
zMY9g0NiKm}hv1hbj$<H-UbbQTvdMS=KFDCd@_}-P`vZTv|HHxaeKY>U7EQqM%l*BB
z-NS?b**!UU@jP8yTdO6%Do(+4*8Kk6{9iFjJNT25=iorLTCKnBwswzxJZ`n>?8++r
z)9)aqjfxS4SBMo4MIce9P4|9$zW>yG9=_PyJ$Z2yf&@UnGdQ_FZx0P?w$32)4#MP&
zB>{X@iEJcV@TM8-1;SD@f+Z9G=d_9-Xr(&#TQiwbzHoMK^u?C8>@yZ;R@Lvw{R9kI
z{AVc|Gr1<x3|ME|g%bOSM&V*@x5LG#N$-_}eDfcIp}J5dx9A(gBq-ET-2sA&-|r{w
zpl(}_E0`>|3jIk>`sY^uenLi|?n$X<SP$$?!#>#sL+gXOT{oa`0vKEZU%0OFa6|~P
z?T_-kDJmrsZ>P8lU<7H4uwVj1+S$j{<&UsYA+o>&vPQL`d)|TAAJ9%h)~c7ehv+;+
z2xvO_hK}P;yT`4)Uw&ycT6-^^pX?g!baV_#hi_6sT%EM_<Ke**Q-m-UY9vyw{NwY}
zz1HqY>u`Vf_#}N)Z`8mpY8f4G2(Pji{6)LH`e>~Iz0A7c4EO<EayL~U!O{!eEbT&b
zoRX9KwKc$`0apBdseGwKk!tJ7i>LqXVUpnNo=ia?_DtOxlSCoRgHoaut8vTJYQ(PE
z%Kp!<9X-iCcZa0DJd#?93Up6)0(#}7UPTO?Q1;joT|(vm>&4N}yGJihpFeFqJO0}q
z5>m&fFJHbmI!RAzz9}~ULoNZ%tRd#|PRnf%u!I^CX*n14P>}j5{ra4p(gc%fXbxn!
zBo-1O#x-}xyVi(}@_v`s^01zqu-*hO5PLsRJ2ld#yc-O!vmrXQL3i~fWb@^U4bt>J
z#E#E<C;9+eiV&ryH89vyV+i+hi>zu}eXpuHC?E@wipgqH23$|XgcZG}pmke*D^fnz
zx;aPYbm}K(`9%TIV^-9tOFgsd1)Q&&$>`F&ZC|HV)8j#K#o_dROgu2?rN00iD*v8!
zXJqc}-A*{WO|p}-Hx)iigDXfvq*kx8V3=|PsjVzs!6D6RXpt>8wN+K0I`AKPS09Kn
z!4xN#Hp*w_$GtCnWES4Et!FLVx2<g4g6&9gXU$XS+nV{r+eYnRbxb1QcrI^y{BW)j
zFbE(EBz<AdfViY6W2@#Ty_Vn>{v@)bK-|43OEIj#+MhDLpMn!&xM*k2c0a>9prDu1
zwTs+0De)zN2nNDRw#O4>R3r(Tu3VB2-A4X<Sd7MtiGOFwf6dE>f&6#<yN&Pe<iB61
z{P#|L%ct=0;P@maz2)bBlJFKi`2S4VEe`*aMYk}F&my@k1NsEP?X7`9#m^+RwV)Q1
zwWxaYNLW<yIb^m_dO?|pZQxcS5p3`a32a@@Dn%h`yzrF;A+BOv4uZ9ADF$({BT^6q
zJWB}TAjD-LG>}jPLQj1Oh}s)}fRvUn+UF3`0&4maGFs^2f+E_bsE9U#J3o(v_O?hP
z24Eri>|AJMTCD}8vtJP+l8fvP9Zf|?vyhRGihf<1M-Yqm{33E#tZ-W~>}Qq2Y8ydI
zvfQ#~pH&j;NFZ!zHqu*i$?T@eVy8=EXTzwJT<8o)Ng80ls5sTzjVwh_Ps&1s5>O{f
z^`t}zumRoNlcALA*7T#B!3~Kvt&6xXn8sPJiJfOuGL08+Ny;{_s1=q)_opmt6)9<H
z&YBW<<rBrEN>Ex}my(?H^GiyDDVRf4`bEV?%$WXPcmKYge+$e1D7y4Q62KYqzm10*
z8=?K@#^Z1A<bQYepLYVpX#&K*?e7_CGA2T_Z~i19A_qnMGsTEJ1)nTPq@gS%N(>~3
zF$kX^L;M7ARP{5-5f!ing#up9d87eeg*l{&RDMA=UNv$nMxGn}LNZ0&yGl0R8#n!6
zR$f<+GxJ>cmh8NSA7SVj_$-!QLy0r>GN_QPm*>8m(c3qlBx!s)=>J6t5pzmKj0h9r
zEM-GwP5_9i4bjLj#nS+8%|>IE#BnYQ%qUL#JOaMkpXNA-g@k-_p6}`P78LY-bvQz9
zojXkNhhmDEvOvDC7m)^Xm0O!-emT3$PwUxdl?Q5K#xu>V#*&}eHj^8gW}L}{@aH!*
zD<RvcJkcxV>@&T`WuamO_H+x)NQGz*$R5jk6f?*wcW+@UO%FswbV!0j|K9l$_LY$q
z{s;rhsm;!7VM*JY^_^+8!jm#zEvWK*%Sp<nC_O2K)lY+W5beLtfviiS%h4E8=2G+k
zptc|)C32G483Zbwk1ym5Gp0HkD^jYfPdcOC3WWGXnOn}7_{D8ozZk~3#d-e>=B=@v
ze0%iuFJ$5R$KcesmFv81f0p&oDhsD{n93@}loM9ubtlvRa`JyGL;Q^Pzw3`5e%C13
z|9<!APX2!<|G$&}Pm%vam;-|n504`n0R1kgSYkj8^7X5iN>nm#g1(o6P&FXF;_&{Q
zV$Xay8tn!WIQ7Tos<fA@q(gsqIq2q^l37#yb+Sb)wc@U)@{%e%XC?2=_b<!NJUJ7E
zqQ`rjLepDGR=L>CcxWc#97@6`g`JKRK3VozVh+{r7h)RN7A2-2TJ+?;56_0YjzYZe
zL#!x#60q$`e7NLGyJ!tgNqoTpuZH<q!LioG{6zpyXZ%lh5*5z*BeylRPRd~pXB87U
z+LSW)6Ob!a?~}!vs)hsYFrfG}e)34nM1Gb=s>a}f|NrJ%A6QHfoe;>q7tdazRELqo
zXwb_cW9+C-l9LW5)H1&^N_z%Z6TGhze~HZ9@M40h!}kQj;LvKeG8xrRK;HKbh4^Q=
zj2^Qf7;FW+c8*$2NFmA0!COdK$~i(IGMMn2ec(nhiBjHKOaAK;m84jzTa3r1r|*W@
z%}s9bmXB1Y3$#f8DAGelw>leznN#RN*4<*kW2sCrYGs%>+RSC!Y&0_&aBk~d{-j=2
z(Q%Qj-so-HFlF5q=U@ZbB-Ap%pmVTZ6@fNmB2d8yQyOp>`ou9p6oPh2EewNy;!flo
zH7o+8IHg#hzxqH#_zPvB1uOQY5Ofrl@ML)hnOST36;`&*mBZ(HfCSSA;L5Z40{VKg
zqZ>OZ3%9A13dRb;pc5_8rZ>$%m_6?_mRp=&_!iyxNPHnP>Wdws6B{DWH-C#!MR%;<
z-2C6fEZgC!wE^>f&cg~$<^LM%hV2XE|2<s)?(rS}_cii=|GB!c-gvwi6ZrM=aHlW#
zkIYdb3|402!q+i2OKYUKLhIS?|NG*o^|$?_<AWE^Nq`@H1)1F**KDg#_xBE4KOP_d
z?U(fB(f$tyztn#?e6f3S^4~A_>p$=RH+<Iq*Z&f2Fed4|w|5WJs9)-zs9w1>l&@0P
z{^LI`Mx(dy(-B~!?xZ!i!O-%C*juhAh@vQvE&McuO8KL*t?Ga6OS8dERXZ!CA3?*x
z&qdn8p;B+>2Rn5RtN&Gxr~1cj+xoFPILo?bBjO_T@?L8)f`I34-OPqJ1X9bbRjoOt
zt&hnc@$XknADwpx=A3h1Tb0|8zXr>-yU2Rs@NY0<SY7=X@()f(N81vjNp$MBJYW7M
zIP%tdFBrjL{nO4e<f_r|PV2ncfs~|?b?NJ;<BhPbC&5^Lsee8k<<FsEfQuVS@z%N+
z4&IF~>&X{u3)n+%&CU%c>peKb2cI;D?py)`GvYl6S?B7B+zz+B3#cuP4ea;kmpRJ{
z4_zFdHoqv|@NX=O7Yr(pvn|J(Pc)xpa}LenoA<n_^)g3_P7og?v+d1Jk8MfbI-jr)
zX{*uxx^Je^{IzRWPwZAXAJm$kGKZ3{MnJC_nzPlNG{N-X?87)`3-PlLNX2ndNY18_
zEI(MFE!GhO-}p8kjzH|Ql{{Wszax|G$fQ4vO!^eU=#EUfBa`mPq&qU{j&xasbosnQ
z%b!QOkoa5CD5E-JV)|RtZ`%9@HD{n`>*nvb@X?M}u2b&}CTHE8KfHH;mv<fzdS;-2
zcnXtM9+*2|_weOUyKxy88hV*Bb^u&2UI?UdqoP-IvtjoGN*`h7ias?zmPr&VQ8M<u
z7!{-UArW_QJcWq6{gQu4vTr)EXU6!+#NHJ0<TKHBQ(9PnqMJr`O=;m1X{#?!P)#MH
zDi(M#GV03{PhXy7`tpR*mnV<DJW({26#A_A=*tm6Uykevi5xgwpO?t_@}!Mw;foM4
ze*_uBQMJ^i9H&~~%{){Kl&hjz;N48Bg{hFT9fnK1>eBmmC-n4LcJou-y#Wh0h_I0S
zYIFJ}$uj&u*%j*`wq69%g%3@F`32kgXe~K_0IaB82iwW+2Q-2j<|CM1&Z8-X5h|E5
zbx^J8U?9}~pp$j+HRY;5&-!homrObISq3d9Hzi9~jGP6Nbo)d-<(~Jl0(zDey%DHh
zGeEf%!2yFW4$hMfWegWjRf-V7Q5k7d(ELq?IkxnmG@r?)#<S>$Xy2vIaRPEhLW=VH
z47VvE+FW7I4k^Dq%M`DXG)Q9{gPqs`Y!jTd%7)AKGO<2zQ?3`<Ql@Mzqzc~O;SRGi
z$HLtWyjo=dv=X*!lRm88?|{hr{l7QyO6_FLk4+c#Y8j2qMw}6rFn&KD9#9@)el(bj
z?W2#0?S^>HjPfz({lpyC$8hKjvLEZ^0Ee3GWZbkaGvn>;V?!7{-%I}Sk3@bPsUJ_D
zi|<N&cC53JSgWEjOv`L?@NL0du%JP|C$mUCJ^J9>vk}*%g>{LUR_P$jc}Mlo@d=LH
zcM9@M{6?m-<Sr$o$98@)$frC-F!bZV#itSyW4<Y%p3Vy@f|iqMfcI_%m^(Zb^8YNh
zzMQSLyu5%H8A;!(&0JW0LD;;t4yBnpB31d*Rq6W)<T7?k?_S*pyq>~ammFQZxAa(&
zG}U>*f9Pfxqx(mLch9n`!SG2ju6?tuf8nSN#j)ChC&3L8zaD~VaKjqP(Klb#!gEZA
z(0^|?dqqxjIv%O2^$b@A{?YROxX%{i@Um~lk?>2;Z1yd9JQt8gc*{KBNnsMb?A2Ji
zJ?PF|+mxl+Zag&DHG>nAxxna=9*iiW+U|IdOr1{(GZ){(Laec0_a^>8<aul=c5<)t
z(mEkvYCQ#{vRuRdLf9o$0kJ(V*X_43<%F)<$XV4+HWaGa$yszOM>Z4SgBe?T+uPFK
zIj`7?&yOs+UU=u&wO92Q**o6O`xr-0Ro&jZeYd@4KV{3?h!^3Jj2=qanTBRa=5DJ$
z=>JbX9E@h}aVU(xUyg@qq^!^%QdWNF?<*<@bs`nyx2<qX1ST<34UhS3pO~Wo1X2T`
zWI>jOW+Fa7(4Yx;BG|28+WE=-<P>P|np((b>c%@cI&Ds^eK>eGt@a^z*1J{GD5T@O
zd%oMb0+%Fn2G{8_Q4i@Nd7u3M?0so><2aJwd|m$vcI|sPwo8>)Rd>&1R%Wl$>8kR!
zGo9n3>*((1M^TimvJRS*Y*{nCzkLDl0!RSFP11I~nNC?EKp^f2L<Ev6zd@f;<q|9y
zVHr(nL;<jU7F*Z{)R}%d&Ruu-$EWg%Pw)|rzm`d_+|ob;(X9D|>0IeFm5wM<LrX}~
zF14>{`~mJP0LYXAwW{^AcNKyWQ}hj5KBGULl9$)$I?B5gS9JAUilYVvTvM6N(j?WC
zJOccf>MR~9)ih&1rb0K_*QQ9qwW`GNK-Vf&Od3;n)rt8;qOY<@t@Nan9)nWKMF6$h
ztsyK}8nS*;hM+2(yn}>6ruK@b-n@avHT)Sh@P<LezfHnB&rjFp<zYfNH7r<6s{q0^
ziVngU&F+f^wTVDA@US*@fRx^EeS)#Q5iYxOjg}5b9|JFsF0#X?ZA=m+=wrGi5>2EO
z!bA{6O*-s(7Su`c{rRK}XaCRtc-aI%d*&15ADI*Er__yB?4=q2_91!aaTfEJd@3YG
z-V}cUF@q+{@FWJM?KJ3CPBFk|FaD@<_}`1Q{&RrOP<^g8K`uk!SpuR`Kn=?@V3@9M
z_x*NdNGh^+yp+uTg$iDQjHy$55y-xb7pur4Yrif&Ek>(ph(g5wNW;R}15oj$Uk~F6
ztSATa1@?cz!roBiq;<k5K#d*V10pBS?-%2I-ZV~Jr|^rCyD-`=v~_K5W!Y(p%+tjb
zNIODZBVvx?1uR7lM`U9F<{<AwaX&?xbnpQ+y~$kN`VX^<Npd)!EYqf<=mA<I0&IH~
zdx6pvvo7#60U+@FN$5|rG37^*_oQ@{@*MsLIhN#^kLdxy;MIFC4S`S}MtK0Q2E^;2
zF-^ov-UD`D0&Y&{sRIX}i`XwPBZ!H>tI%BF4cJ_8s>*!P_g}(zwo02qWz**y%KwEv
zt@qv|9*o{nD2af}d%yryB!~0Wtgx7By9ct33!S_Z<0pUJG?w-V){Y@3&MoY;7gBOs
zik&tDAigF{y<i&p@ehc#D^?+%@Q@NwYT#9Cr)L+b`pfM<vT!*anHqk~k(phHzZi+Q
z^f}o5yy%LN*Ay1IH$58m7DdG&c*%Hjxr)<{bLE5n^m2h}@sq5VT(QPl>c+Ed)&ws$
zpIrcfoq2WjrKU3VBuq~T^IJV&Q<ceLm7vqYRo<=4hZT|maP^Y$YWh4w#BTm?x$S=d
zQJ6lLwl=0s0cIE!Bg?bf!27(K0SO1F_YgL6PunJBPUiYK)R}<DA-IO{bnp^Pdm9!@
zNuje}zVL89UZ7*^9Et4UeMNjCbMIpa7oYVy%uU%z`Ox#PFt;HCHhdf5DL^o%OUl~P
zXUDGluscn1eGlAd{4GF1{lm{KvusDmg-hWWW@mpD6A*kR`J;%4KKFs@z2Agi{3TSw
z+u6f!YtQ9+G9(|tb^(MBJnw~co!rlc3#judi=P)e3?|EaVl#DuIji==fCa;oV3nrg
zt&$KB2_(t6mjLw$H62v;32d4HY-gZPhtm+O=E}d#JO02k9`YzSeN#!JiSLyg-607F
zfw=;%I<#Nxru*4_K|;JrK<_-Tj__^yE2PDDIcJpLqZcM%Y;t^E8$=i{*%RoPsV4C#
zrus4t{n`6+YopaH`V`I<@%?uC8{AA%?4y{hVyWQQ5lxmq;47kl$0wlzJjsi!`(ZAp
zwx6ZF8*#x@(Bl@?Qz#R{NosQY6-Pan!tkfet@Bc0w@1W=amOhiQ%q7-BgHh)bFA$P
z_Rbr|_u#cB36NSL`}h5Z-w$9yQ03?BcbKCZE<J!^z*HZQk^2ii^aD4lMm)p}trbt<
zVGSa(LC!@+EQ)jL;(;>El1)>4pop?C9;k%k8Up@xfc{cV4SJW&Tp_mEMYvwZKBV}M
z{?gxz!;g~~02*h;e^FRryCwiq7C)Q=piD-n3Hw{7bimj+Tkp~H%9(%m{rPiBE&#rm
zZ22Ei%rWJix7clSB#~DxliS8$UEUq{F$7|Caesl#kk5-znE4Ic&(h}$Nar)0vu1$0
z57>d^`12i-WreoBRpi_PLe3X5csYQW2b!j`FygWE%46ph?e@Y8{NM`UCB0Z?0rdH3
z6=avdW+_K7WiLo|3bet2@*%8iF!A&)@7ERiYt=J=LZ|iw;fFx+lR<&;id4G2O*e#f
zq9|0}C{xD7BxR5(6`$lP@buIS<QWX#@@DyvJ@pG^wj!+)Jpn(H57l#$2_u$#wrtgf
z<Nzj1$p<sXsPsi~rx6q216*3mPVoaO07(G=K9a*x;&Zw@nDkjeWz}rOO-U4QtQ!OP
zkGL5@zIM0(a+V?E-j+Ilato}7H#Khz`;1z(Wx-6fSPmY?*~<;#EC7T+d%tp13)Mg^
zm!P-^iua6i>oQMr%$vp#nT2^LM|8Oq;A1oavK#lF{{qCwhy1NskD0=Xyq);Ku4SwR
z&7THaC@Xl5Vp4cE<r?g6wn|Wgxa4N#$61583DQ~<@WymZO1BB@7fobg{(_$VfLg+U
zDYEX;>3JJ8fzhkVoXUSJ!599Fi0z;THt<89C`m34UV~9A(I;aE@5_nr{CEy`jlp)3
zC6eZWx9^^>^Wx?N3$LXc{1*HOxAI5)p;|f+wiDRIGhJ$mdZ5gLFj|%m00SO4nq%*5
z1<?Z-5sRPgrw<RZm8L%lMfxgnJuIeIlK=O3WH9?4|1Lk3qea~sron@KN`bmG1Sk@s
zOP`bp&Si6ph2<)vYATE$TdR*IRU|E%#+!f)mvN(Km63#Etq0MH_NIUY=a<l9g=Z_@
z;|~tf!>Io`2Ug$5iGTUt^Ny6JNsq+dmu4)nnhL?;5#bBOLa0IhTO!Pc^1$*~TV~1@
z7ZewF^W+aR0}jO`q)C}hm;Lo2yH6>NUC<&6|F2jSHccf=^%Qwv#d&m`jubb%_z)Wn
z^E6Dy30_fv9(tD(*b6mNLP-5w`3VNV9DU0xe~tzIyyCJ1^eCMK;7r31OdqAoO!Z17
zMhUP`*)R<iC8C&Ecv}gi7H7KIJau(*a+M6Q^Q2^ng=R2TC0>aX1{nC>do~<+L?#Fr
z?>!rVe+rMU8vRZuI;t=Jh#C%sXX*?yamPzTHiV;D3w=%rz9trNUSGtyokgtJi>PXk
zXQ&n{QwU$|EGObFhfsyYa{kNCa&BsToml~}PjWn)%>DUtyGl^Bcp!Q5nE9QQU#f0f
z?bkiV_a7MF|3dpdqJ4S8m;v(*>%XOr8TGdl%p<J*xX=6Xi1Gb7<NMMhuKn~w;Z@cl
z#`tMU`*QV`h0D4lN&e38Ldl0X&St}zb?t$|3FZdhK4h23;I@Zi2pa$L`Ii_7c8HHM
zhWPoI`s9AzSNrp7HU!=p`^mG|pC@Eo^7HZ8$=>Hv#>7)-xZ;oRA0O>~`dH&-h995%
z@VNiZPdh?T&Rrbrhc|C{n$f3Z?NrV;;Hetyb&6C!x=|9fKd<TJ^k8i=-u?qQL7Ak;
zF?>>KmtXS+^CS_n>!%k!ddn6A-Z+?Hj3s*?3Q%JH6SL^H@?6;=1R3LyrXcBO0ZRBM
zS`FFc=o!8;*jWUL%mF%xNK;G)=&&OH{H&C4e}DPU`n8GAN-38=oQgI0boRx5@dab}
zQU2t4{fW=#N8sm|tf3$Bp}$a`ph5CkTEAR>I0ff3)NzIJG6+-fNLntQVj-&pfWQ0z
zU_=6ctNAEk3rlpoTHK`v(U4}n;3^ERk(+=z|0mvG{_>YCj7-yi0Qq=-DNe`VR8G6!
zZn4h%(zpMsiEo9BPBwF^S)u6u)6`a>uz!6wPzFF>U_?J%2hIgzXq3O4hVC*aY~bBf
zL`N1J%U#SJ7$B>PlNXrhmDLGQP8h2hA(f}%s|4n=fCq<IMli8mIMKUz!?H+biz@mF
zc~ATf<rDb%hdp=!*}=!!@9<9FleFs$oa6-EMTmr0{v<RI@<YUI#XH3`9~eI(Nj3f+
z$2mbld1z@+O-sr%C}D2W5Z(c^J=}TAoeh$V&~$4w-I@^5AK+;unc#}v`aHdZ2IeVV
zkhNghL11mznm-J`K?Cjo%haFDkJDtG;YASLXZop>r+;|&@1Os@)KBOkNGn+|p1FMR
z7-Jo18UNxb3stowq-1JRm7#Q4N+wrAgwkO=+*2h~a2&Ss580oFlu~+!EMSDP$T0)F
z_lI{WljLQY=#4NR0)1QX4W@wJz2mE&^n!XAV+mki{6%(W4SBv?#LbF;i$C%snD;)8
z7c=~T<lhs>{!@RLhx*a9gRBid-q1hT@m~Gse|`ru=YI*~#Txt;g8@isJomgkAm`DO
z2~6q_<69Wp$!0nG>G0J1w0CxJa=3TofzMw~j{kP}@!+GkcMAR-ct0PW{p9U^I6Xc(
zKRY=36^Kpm=e-jke$ReI6CC{V<>cV>6bv{%@eV(IIXXP}2;cenSMU7v0M58~=KXqn
ze&QYf{MkD>JpC(rH%}AO5{GfMTPbrL<PW`hLpjQ~rB$<6q3C9s9AHkt@<SEa^*n`N
z<*lM9zpo4bdJIShc9Y6(W}kyUDNn&$>ER9WFVhR^S7jo>>3-Iu1cvyIk}R75?}U**
zvHPCPg-FML#>b!2#(%DiKPSd#;y4X4o_>y&aTbdgGe95%Pw}Ln0hZ@d{`5JoDN1+v
z&HMR$Q#`a{k6%^CpA+MAIPZE#Fj?mvdSk6@sC)>f{G<<2i;=Pt0`CZrV)h(f;8ad@
zp1*CydE0sZwiW+v=lR=K^|!$n0byN|3-Lu3-A%d0hgbDxop8cD$_syiZ6>^HGy?qd
z{X4I~0ebwc_x0D;WUvC!G;-vRE`B&YgFOlNx39~Tn)E7T^z-TYhl|s*y_2(TG^lt6
z@qtHu=Id8G-Z$AK`QwPW(BrSlq%v${VAreJ9Lfn4V6Scy+vxA`6al{GE%^97u>tU6
zxY^(Hi28pQDiF~<?3S{<ond9C0MZ5^W#{il7oUI2Hu3w;KZ`Y@RUV{5#5*sSgDq$R
z<~_$pzXe}?PkfavY*HpC{WqqaA$|NENezhha)syQ3Mc=3iwy$H%2eBE5hn01xas6N
zEDz5%fSaD<>rEvh{w|=tUp{}iL)ZzBrqkcQ{`#M2V*t6^lb!O_|Dr?SWxDHBh_d$z
zmYtHq5}i20dwQ5CufqnRu^m7hrGP+0{WTpPh;;$3;p1o|or;d|#T@0!n8H!7p1(vb
z$X3AA_C^!`a)BM_pvUu7P3x=jzRp1VjrR&$AAzL|JrGl=B4cBUT$w@0;}g>$&)Xvd
z@<$cOPcg`U{WYf8VXLN0?ELOMe~wnSMSOitd=2wnf!h#n!tyW>mYTm|sXz2~wsu2g
zk;{H|tfx37{g*(<rAW7?MO5;P4BBs$BNtnyUTVALXVut?O<H`-MxRGNquRnxp&+rR
zUQx<LrQ&%9nDh7VAy)Icn@rb~RqGUk0nz0*)F1xoz4u;LlNXkVf)7V@^CO+@)mwT6
zNzD-Xdpg%Yzkl~TmN4t_9)0)j_mUqWJa((5Oa3I@J_@;rWO~as=Ay%oX#*hOJmO@x
zTN`szj=7<a`96bLimY$IKkQgYar6kDdVy!jNAUu9vM=z&`1C6jQ_Bnc%fZMuB^o(@
z**gI~+r@`pFFqds@+<hpOgCuxw8lezUgJN8+=5iYS&lxKF#?cWPiBz$!4IoEGeaZ+
za?V?8a~33?B<N3O=?&e(Fzx0E%gkYAS#bd^*m9PfI9b9Lg}?>QSCv8}dER0sqP3h#
zlPQje1-6dP&+;$9GrzAc0s4Q8{m~?x7f*kf?EeT%;ty#;q(nyKp>Rs%p`o0}!|o(S
z%4gUeHl)}U4>?J#JABHmQ_HP;%8xUXA4gtPTG!iM%b}}Y_Do;#Q)1LpVpRKyQL0H%
zlj-V$ni$pBl&DXiK3?pdU3>xN`o&&lvw)N53lGY(S%i8mMA<1)9`{UEGumt5MKkcW
z8R(OOf8LhUA-faU>p;jt^?JQioN#^f4SZK-`vak}j2BaH0nPgvD!oR6W;Kg4gF2fp
zOCkYF0}2vOdGRq`L?^)A0Pp^Ia{T4u!`|uPzV|=rAA2VUd*xu{LI7tB4*Ov;ou)}n
zaI%@d&R?6JWwLfjecM~!(`P6`4-gAbH(4fmHSwD_qt$G7e*x|M2{f{SkNK)vwpve8
zJIu4x4%O1s4j+7;n(}GCP_7xgtl}xxjC;(lkWJQZs#ufRWi*xS@F|xJvS(8!8GNS0
zd1OyXWag5{x|Bke2IpH*A5U3icGQbTUYLAVSP?tV{-tzzMD0WKCraNr3$TNKz8s&N
zUHr85Z%_X{)c=qQ`zHP0Km9rUkN^BG_){?YkMRpc8!wVLcyT@(fANFs>_nyV=ku%P
zcf17u{pq{!(7)in%D><K>E+AU|Mu$Jmw){B<#(@MefRp`UcLr@y!toqr3(P8;32u^
zY0p#LTu=T3OrHjSCor!h4gA88Cqn-K*kV(d!g&{>l+XRIzb@i?#UU4%0orKUA&@`4
z48iw#rYDvzy1(PSe);l0zIpxf)hq8uKb(3$uEO~|9Qvz~_x&aK;`wFq#s8Vi<B9)#
z9KX*dLW?|Itl|K6T^WVm4~uvP7o4n$Zj9)tCxo%w?5yY0Vgp=jACCW$o`W1s0@RS>
z_PO`^`OD|t;TUWY@4EBp%Mt2$2yM~PBmo+G2&~ExoZ=1`nrUt=1lGWVz9ww;u^29a
zk>D+WUFI*ntK~9!^Ww!Uef9H2eE9;*`2xJ{#q+9tfBxx*FMIocEd?!k8e02#t0KCA
z%MDLIy|d$w$9ZZfypJ&dcs|I@Vt5>z_Cm@sHNbwr1ZlV%-KF%RdbLu1-&=kxki6O2
z+RBPykq@z&jloi`^KewMosP@{D1XnEf3bi3`RwHQ2q^gMCO&WLzstdsa(p&0J#*1}
z6ZKv{{Jk5Q_l|)kzsODpw$tm*b~A-rY>M0yF(P{u%%6`+lfP8nM!&F-*q8RZ^RtNj
z<)_)-yuB7<@`vFY^Upf36y31Y0-#x)Pf!d**V|pde%<LO*sObtcWY(o9_~ed|K|Yz
zds6<L(dynBepTI`!2H?x{VXiWWGfGKpelYo{QSf5KZ!2$7|M!0r~6I!x)%-hCWF28
zpjch@&cPwNM0Bei_)+hD>kc$GTxdRne7FXFFKkMO{93T`m;c-uyp)7;&d&-&$AwA?
z!jj!8@<tk>b%4rKXTH}uLFUV{0Wvn7^$UQTDgZ@jPztapO1PcI&hC?eS(3~i`=vJU
zL&@<1z!d{6&g_Dc6YU*tyP(cr1|M9&Pl-OMgMMk4v;&ad)FI6w<N8m(Fn{@w{UU|2
z%W!!yUClR0$L&t%o9tHeRq-hm{rP8f&qw-sbFqS1=~aN1rWBRTiWkCn!MEV5rI@oY
z?%XY#VUAHTXl@g^K!e{OAs#`We*b$(SV!yp>+|A)^9lCAd7~)La*4m>!~Y8#{jr#S
zceJ=bp~B*~lcdHlJiO+cIf@2HzYmVqgY#%`J{g>^2VVyNHTbw56fZbQw4p;~a5k){
zk1IlsI+qe0)8C(Y)Ah>>$jHE)ukrN}5(aOvr&pY(&udSQI8T2po~9Fd6{CN7map{9
zOCV};{Ql+H3jZ2ny9ufQUVnKu!biyx2c8C=!Fl)7x`6ra!MXzSpZxg<hBtDyNy7L-
z!g#B`6TAz&f}FdUQ+VZ6S~9qgoC;=ZCJ}(QaYz7P?6YhZfUTyjlez0;XqCuaujHu<
zrKD%m8Ev-PkYAp?QohUXcm>PF{rUCttvs)i(r8z@$!?i}4(TgeEz4CTSBuqj1ZP2K
z&i@#qSQ)%XEn5=$2BsQiS7*Zf%XD?e@U3?f@J=7yz6M@DL5WGxy>~YO8yUjgj>6|f
zj1ZkvqpbOP`TWek4)=?9Ztp;J09ry3$zmmC%5Gos+Z9^)W@JNTtXJ>YDxRU{staD!
z#hQe1aaY6rT+Focr^~y#b9tA_eOsKzi&Ta5dav_T4X`FC(;;dP4&Wj2eB{TYQU?5h
zni5oIJxTk5bo3Ume5ZSH?B8dqdEiz2KZKcVQ<=k+7xU)~S>&w0@n#n&A8{L)J22lM
z$`W(7v+vpU6<G;IUF4tyKJr?7tZ8z&hP=N6@ibXzfEHLTiBf_<=I(N9J*vAWulUFV
z*^E&6v+|Jvk$h0hklCd>9{p_LS@A@5JlXn`dm~$<ekEH}zmx6hk~4RgoRJ-e&yBPk
zR&F+YWTxA2XZj3xro*scZ(;3g@3wN5`3x*ESnIUJYn@7C8#k#3*##T8y_<FJfey(q
z1n|fTpL}3qJ}=$@%~4!8152%9OXttNkkJT~JNvl0pey3q^Dbk5n5mfOWF92^k4%S6
z^LvXQi_}=+`-gCaAgk7JP$mnL0$PSlsMU!*p_T@sX{H*SI^xCcWR%H^Os>GpZKR_|
z`B&)$>v?oww2E`oWos-@MFbbrF<B2O<<%MC30e?bn~3hsYeJ#cEv&US)I{`Ja}7e$
z!e1{??Ep+)VuR67?UO8>DKAD!!X<140;}JFnVr_=mjJE9=`ia8m_80?@h89l`s{!H
zcX<0d%DzY0FJ5*RAgro~v(~3N4a3}=_eP5Vz+;{nO^_`|A((lZJg0)6@O(16|5=WZ
z>~AU(3aACSvp}odeTX1y0+s@px>4X9rD6m~m3vfirCi!<M0!_C&4dJZQ}J1Ye4y8%
z)#Q0`Bp@Nyg{%NkeJ@bb{UK|9S=JW)QuNG3RS@v{I&L*lK0~df=$G<`HF+;4@79av
zdo{5Lp~#J(Ui}_Yk=uI8rz=;5=B!tp6u&w%H?G)^$lrNNRpP{kYsr;`I4Md#>IMkV
ztA#GZmxzB!OAupU1zQay>@=T^r3(?6G7T_@O<9hr^pV0sQPoCJ_)lfiC!yEjhMrdk
z{Y(7PRiVqOOBDQtAX)E?*?X+j=&>q&D!fNpNf;63^3O4Ls=SHnhjIbjh^`bN^<BLy
z;-klZZKxMzqk=@16ed_zq=XDd=t3GL72YW09BYDH%IUod77JkiziJ%2^g7?XnTK~C
zP-pqWJGG$q_FheiRjCPEUUhVBSwP7?Bc_BkSJ?Y&4N!&lA-I#``F|DruyBcSdF5=?
z(;&^ts>SH)2ISfqNFM5cTmKoNumI`XEez|sr8<2}e3>(8cQ5I!o-`DH6W8<#G*Bqd
zF^K|hRwr_y#@?tTBrAu`4wE5K7N9`+V<>eDYUlxq>JPj;k1IPot!AD*L!+LdVb9WW
z&(eX<${aIHq*jeu*eWAs1Z*`}du=pjnjQjd!kzSQ_<|jFzwK$+CmU<DO`RnDUuJL8
z6+Tmhuc;VwlYg7dl<z7-Mh$9(qN{i99?-gU4=OFW*=mi_8goGF#676=uH?-aVP(N(
ziSWz(|4OgMPXAU08)TjW<5yM>*V4a$%V1?9JwugLFA1W3i6;i-jW1<eHSl~YBxxAi
zG3I6=AP8TZvz<@<=FJaN|1x<7gg!bK|1eqZ!0KeeBld>iA6R@0+aBZX8^LE7{-0(!
zHsHnHeYpIJ^1ffJye~{(M2G&$A-zny{w;iUuD$ASoXi2HgD}msD%<YjF4Wttvi1G?
zBJI15d@5$r#rk47i{9qt<cNb$!J{y!`G<F}N}-OP%SF(-Y)+B>mcA){W>lOI{fpAV
zH=~GJ|K94$-=#0dr+<e0ukGJ=z(3)54DF$p*$SxcBFg*D4P>ktfeq6itr1>HM(xEw
zSvSG=bo-wUm_J~E$>z;L|F-CbT)^WG?}`rJnOMQ(V<~HjzA@s({k5XfryBUD`hRZy
z|7?;3FAit^WhheYFJ1rt@{fP~?iE@8|LV`L{`B2b{r^L(|Nl?#LpWbdmfrvMgT>JM
z{&)D#^C9}~`K$kPIrAq|ARcEAy6*qe;pzUx;itVH4{A04#81?E|MI&cPXFIo9(mSc
z8?x#J&=5f3UBG1UJd2l>j7ww^rKV|mkPdVU>JeC*?fd$WED5)kWm<a@rWP`iVJq35
zMQ$M@EjCr*4e6lSy=Z6)sJ%I)niO`qgBvDGVArE_F!aN~ySrLso$MR_1GA67YvxIo
z#*iIn57QRu(9+7^vH&bFfr!Em*>YQsJ^3A2-U40`ay?(%K@tnny?INRv58h+ygu*A
zj&{0@j^h1!c7qWPq`cKr?T1hPx(R^UsUAvjht?9{68PRUp(iOXlB?{N`mTBgD!Mmh
z0khSK!^H|fVZIQ6#B2fH4IZ2M^ZR@OFhKU+OHcPxqM|pWyr9r;M)DNSDe}?Ni9g)`
z4O)>DuDEDqi~19F26=0WjT)AlC%i)kqWW9rMb_T>44xb3g`@acX^!wuSRfT;@rJ=R
z_<v+GLvn<FJVzhOqZnvHPvMyp{624>_zDe26kI+>zGb`q(*OQf+HkWuXY~Rt1j|<y
z$yP-Rr97T)sQx(oo_L%GR>mSJ<Jo|x%|SkqW!TcR0PL!bcjpKJHJmNB;SXSeMYHX~
z^TRLG%j%UeI-=-^-S^v2hwgmyKARd&TKF@S`VA(iQmCM3Qs}2}9^~l3MiZy`RX`xw
zKyjLKmAX|`%X@!4$kp<}(U*gh)9u>ocD#RRaW8#1yCS6dd3r?%F%H=ku-|lkjw+<@
zk?=tC;3Y7KV5?tNDX{u`$|kjM0dv;BO*!x%6``douh4{EetQENQ+?vUrCe;nhI=6X
z-(LZFgi|#Ku*v^>{ii=w{l9O)S5N-mlmGXC{lC8+oP0hwqC0@aZ)zUkHDXp{E?|AS
zuKIxWqPN_IsrV7u4Vqu2TgyC^uLQlmny&}-63^Fbo-$dKB@^&YH3<E$GKXt7!>9KS
zT@U#csv8Cj00~;tmHdY7N~Z6Wav<r@Pr~$mOMhObz`h1*v=AhyrWu~Fi8+cHLNl1q
zz52xra%Sq#i>3KMAVJ5U=hEk%E)&z5?kA0H8U81QCCxjaO|Qh9$fB0A{uI|dqd1F&
z!zbi2zIz9s$XvG6bHu**CU+y{-NUjbvD@yUHGhu4WP1*sildgR&xh@)iK%4?S53_o
zBbLEucFf08@~H~5=;_?fwf~2x@^<uk{aW4!Z1DfS{r2^DmHyxG%P0TuLHd8N|MyD?
zC$Kift8d{He|l;mj^yIW!QRp7-#)(n<#o-IBz~$pk}N-xm$x>+u16`<6!s-j;aQ3*
zYMce+#RbCd<RZBpF>jt6WXeCvKiqK4c0@FjlSP`+`+d2P_ulUB7t?UOR341zEbR!E
zDm#=Dn%u-`N?`ewKTNRnEDRQ}2e03IGjsus_x<-o7qRW47$qE!x1&J>Oj_;(H(13A
z`?;H@iEiLTa7CQG18eP7<$f7>7tt)60(N5YzApZJHF&jCd3uCB{R%$)4L0~yb{E=s
z8^CRQwBvp6rP;PwCe_7sGJ}BzY(-fn?oOF&2AB98THf!l_ZFFC=+~h7>#HK4ExVq)
z$aDr=JNm=Bm#DCF?k!)8ARj`Gh1dR#r@%`dhJg<+_Dc&IFXxlxYLt7JMH25Ez!~`B
zuznHJ#;CZRdbwD5$;?MbrLzdNIm^TgVDfUnGEM8p0J(xi8{P{qyFU^~$SeZCL1Nq{
zwg?cDBOieG_4aB9he=Y037EaeI>X?$D!+#io57|g)BEH3*;RN1m<r-?03S!QXT0*w
zdl-yJ32S>cc$pDK#)n{1IKbO2A78>tuz+P4Ztt)X*Gkh~+&m;?-IG;k-u@d-{X{YS
ztHJ*J^S6Ke?qxOp|ME|NezN~u{Fhr3ACC4;F$*I56B-*Ra&!?y(^Ufhe+vj2SjYX<
z{#QUx7MDz6<9;~*yuS_qXUBv8`@jG91vbymhoArS-NnUL?$u5gKnkM&fkTguKmYi)
z_-!>$CYP{v1o_Z;`qckMBAuA0X$oR=x^S4{3~$}d*S!v7K<Yc0r4n2L+Stwd)uz{c
zfsas|0Tlr+(=Em&qs4N^OH-!7fb8ddXzdFyMSTV|M}3CuKY=N?@>c2joavM@tE$aF
z?kn%MTFg-Sf-=L6^b9FdiU|TR!Rk%|xY-wQs`^ZUk(oBtHD$Y~@2mC=eX>A(vKkF1
z{yMq`STFm?@8-wg<^Je@*T8^%ov#~v(}?}160P*VYuM+%>BiZc0%y?MINQ_OE%bsN
zSJ0!s$p-YSys6H*FpwZ6uxl(y&LALeYpkX{L!iBjjrJPW(lgYNk=*%_9x})oRFI)P
zL!iBjjrJPW(lgY}BR^TqnA@P)Qvn*WEpF<NZxFC-WCx5k#F7Nwux=PogY+cvXa1VA
z>)L=3F`yZjlN;Fqf$WktvTs;744{As4)A2aL<|rClk9*%c1auAH>?{5P^k%q;iRYu
z8z4qaasvX{C2eHiux=PYqb5lx$pT3hMzSE1g-`>4>cYE$w(GLEfmG|VxM2WAJ@BOx
zMR@at3`KZ1NF@rG09@t3gbxq`6Mp~=Ok7%`fr(2TG%!h06GF8Nn8*PFYib%$2a_y;
zZ)6F)QFzzE1W_#oCSrgHm>LF9z$8iF8(9KhHw>Uy`GG$zM_`K*vG2*T<?BQ&d-bt_
z#+o-8?_F!H84Xrj^N1WPwdNb~v2}u-$FtXRUI!YnEfJ$7Um-x^vI9mNVof4U)G(mV
zM6p!(dqBZHjlE*2(D8tRTN>k8nKej3U>E|M5izTwwXSbW%t|GKOE4{svxV^uC!n#Y
zFlx0X2z0lw(Otn>b^<CIW|upHduT)moJwpl0?Sfi5Z1a>2$(@Ls&j3<*!7OW+i>c=
z_Tt5AIf1zyyD!qsmpi`<<5K^4<o-%kSiE8DfBi2H`?CH#uIfsd(BHyTciC$Mqj@E`
z+`J|LuUW|hO&V(OJ%NMwY#94#f}1qpHjE7!f6?02j@A};RAbcHZT0M*a_XaUtR?j$
z?+grQc=^=|9#~2J$U8^F<wlQI8}y12Hob!Fy6da02!&;w+!kl&<a&93iC|ld&rK{q
z;Rw8B-L@AHw2gvI+Ynf{uz)HXMCl6h`78f2z1jhbs)gojiu7PaYIeTB@P0Rj*SFT6
zuO{baZOge?+j4H!ww#-_E$3!!%eh(Wa&Bf=&dm(VxtU=(H#02fW`^b5%&?pricBb%
zE`_06W_Ib|i`Cf!tB~1PUHjVf>@}iEtP)LPm1q*HM3Y!0n#3y6BsPgAxo0Y~j)agz
zSJ+<IsEaGCqM6iC42@ZYC0R^>e_&vX`Xa10q*!qNDrO~#fY`A8wi>g<61-x_<q&)+
zwlM>4hNG9(;rrSofXzVvwMj^r!A-}O6m$=9o9;nr-H9dD6hq9^3p8tpI9sI3|5!{Q
z)6c;u5?*({`i5WgcHE!=*@yoG3|{jVUC~A2IROM%EWaYK52%g(U97FVq5`R_P>Y9H
zeg$<}anffw6vn2lLP1iJu*u>|lIPjj*O{q~WW(&h!MkCS<&c84H`S@bwlul2FHNqT
zNRy6x<Qvfyws(Br#TC{V!i`71xpL`|udm#C<nu*5+u0Th<kieH-q@yyqQ-kSa__CC
ze+^M$3Ux4R8DjQ_%z=^kDs(d78q++xu!^7)7}<0J7VEZFQT6dS%dp87y!YB<gqGeL
zv;^l(OZ#2BdvDcPXbI3)Edkoyd&}~OZ~5gBv)RtJWRgcm6S7Tiq}5#Q2eFahp^(<*
zp<ra)<Bd>g>s!p(;96#-$0-@cYP3ydR)P74l98w88l9GwQPU-_XOv}7mqU90A~lIk
zBPbd{emS+>^7=qF52DD#E`ey%4G`69K7^!{nglCr1?hj+N(vjQLo6=UKr2O}j-)3`
z4f;a>oBlv`-RY&)%OUzB0%rXY0ki&yfLVV;z^p$aVAdNEklgF~Dy?SrYOcN2oZr5#
zJ?JsbY<dj$b-&h5A)03InH`MQiuVh>n;jU|iupTzcReBcx)n|pPL95Ag`kBKr8~U5
zpxemZbQ^l>&Ml8p%itcp#`yZRd*B*l`o69`=rPS~dJOk<zt&#Q7wxSzufO(Ib4>ed
z&0mFj)_l_$VRMeVo>kp+D%YI(?)v@pa)K@!fBMU_YW#I>d6Z(2>;4MmHngER%INLl
z5!on2o;zp_8sAB4wqc)lLcJXKU*(Os>Aw%<{^zM5`N6f?4bc4ipI`m?^|!C9_dkF8
z=RZE(|LpSq=cBzZd;5RIZh$Vn0AxN7rmIoteU)AU{+0N((6uOc$BXYe-wtQp$#yuk
z;&#F6JFnqx>*o49i5@k@%XBL6?7y!bwUR4;XMmIzQj)7wuWW~Nrekd4dea0wSkQVT
z=&hC35<z>$dU?cBp*}^B5JtS|IX=1|IRtiBzcIf0I`DnrNY0k1Jfr=>#Evf<Nnvek
zP@1~zUzq4?bN?In`>zu~(lP-gEfYY}G65tl6Tos@-i5}>x4{3Q;E*y!Z6UlVZuF-J
z^f$86U&UH!iaJW-u{KKTQ%IvkbBaKJBOCoytd*vypagh^chSza$QBwfL(arqMgxM3
z%@i2h-Nx9O);cp$_$d+WxY6)EmC+KxjvNi&Q#sJm+<`@yY%H#OZFcS&p(MAW;;M3Q
z8k0QgGxAs2OcaJ^42<u3V|<Nk6Ejwj9W9WiW`=RNc%=^(<ZGS5fzGlwI&WS}4z7b6
zYGGw?QwEEJTX}Gxv+Rw|o7a+qYX-||rA*!Q!Ghq{864;=d!zH_wd7z5xMw=xp6P&l
zrUUMo4!CDJ;GQXidl@e$ubFPMAlUhn2!h|lB!P+TY)q_VtvE>?J@Lc<dN`Aaqepv^
zz{GYoCRVamoP?r^CpxNlqN9o@I;wc0qlzaws#s$1gUFnC82`!Te}qa2ATS@|&wu<Q
zHUHzgm#@El1wL2uKmPdL%cuMg7yrH3HTmC`=j|_|`*?DBwe+@w9q;waS1$+N;d~iR
z!7p+296Yf%oqFhr#EZiujBmry^DXfGNjRDS%{UpZmXpQY^XDUPm4rZOc*$ZF2O;`q
zIGOwLy*FOOvt;1iO_o>QB1Zo%R!cC+Yyo7)B=F%B1J92`FAC$?WVsASUKB5GCnNA-
zc?Bfm@+t(=Os9*x$@~&#G>j&2P=W@{!sVMSbegbRSy$pM#`%JR#RxnKC<tt1>4SyB
zIsM_{7XFljVZK;Sf-patSYAyMZwlsv)8H?ik18tyF9qi3WERHH84Cd~#K4g+1nhFO
z0*j-pC0j$bBBix>=`OeOW0qhw3+GEe$Jh&ij0N~T_GbPvj3@pyDZ!4g1t-S0k#FvE
zI6>p!?{j|^!Znq2vxw7I%opVo2on=PMPTXabc;C27B=(my<rIH2W(~G0gVEF3n8h1
z)yx*l&`ZGtMvuTU0SS6z@IwlcWHDae0V>Gm%81KLqA-B81%^%_&Bc(a<|%z83EB!6
zarV>Usdsw(!`aV!CkGz*{N?2MZ-*ZbK6)R11wS5m`^R5?JvsdGr!()T<D-uUC#T-t
z=a1mq&u1rxAI{H?!53fcoq~~HL0$nr?S1~$JNV_x$-(KVcYNX<e)@8B2&Mz`pX_};
zJ3KfA4EA~d==|g1=N|_in8N#feC8b;emXn@kDVP4;A_EL^?}~;58kJPll`B-pL-t;
zj}Fg%MQ{A!@a!{u+YjJ%d)}A5le5G9^P{~J@5}kgm*Z178Qk2*!_)nvy~9rjAD@F2
zfVX-Fe>?bm=AHhucXU+Q>+#Q@4^H6niH&}MQym@deK<NuUk0}N@$lqe{|xS_{Im}M
z2$py>@J_!R>>t7p2frME{p_9mIsmgmxpVya^x(hF!DHZO@8jO5y&u5_w;Lb>kh6b&
za_|W*`}hYi&FT4v)3d{~^Rol*$K&IV2;!#)Cx1KKKRA8s9UY${pq-x{48S|i_TbCm
zBmiRIXYl#M`RO5q9fbGivxAe9^Dk$I$DelqvVR890sG$rgFhlLAAd$027o#~`4vtJ
zA%U<k@P7X30DKRy{W;y+*~uOR#VLT@{u%xtcqahm8MagJ^TCfthd&;C-aml99>aNl
zK0H0xL1;TXg^wJjul{)ty!RY#9ia=XEd7X)@BkszJN&`h`}ns*xO4<3_&6Yi)5DBx
zz_8Qv{hu<xKi}Flt5<e`6ad0tzD(YDpMfX<Qf(ialVLpep{!eE`HJAdy%k{B_)T4S
zfZu)$=OOq5aQkN<7-z#U9zYfK%ItDqAOjbrRbG7h>(NEJw1bPEF0dc4FY}a3;=vbi
z)85%Co_j#<{j~SDgY4%EKn*_}{&?{d+;tpgWB0#FfAvN_kjD2>m~4S{WOg=fG?vzx
z*>)E|j*cgnFu}7h#d&$2UY_Uaz1YPc@lpU}vwxo~N7Kph`PKV!^XUsO0RD=-mtP^7
zi+(B>fsdLlE`ffZR!3%E6LV)1ynxg*i>Be)Ly$ol9%vvS{#U>P@xq(c<kp`8>VhDJ
z+HKeSVF5fze-3mDK5+jS$ee>;w!hm!N;3cRKXw4YgF(k&IRO|1v>t{UsISuDNC83(
zv$&gk8Iqvx_QJIfU*iq$y%cUx>1Or>GBVIqnR=d1u0!;?3IwvHUZf5m0Q;La*;n7A
zuimqVWS$x^CjIU`dJ|Y;xtS#2PO-q*3_@cHet&}=|K=lYN$<X*tm!M-<LRp_&u8oT
zY76JDf~XN9F80q)&yGLUFz#(90H9N-^nnV8s(QyuxdSIGVzC$jXWv5#=0dzkd%4Ko
z&7xpp*hL)HiMjs7_GTwbZP^a8zjwsupUnJA{`~L>eE#h3a(?P|voB{pk{sAC+Zh<*
zFb_UVW?t3}n#4**c0yQ5pOr#Oo_3iBB1G|$KF&o9_DBIaal~Y!d6>Z-kwHziQyOm4
zpSE*gK<s26GxSs!ool0+58QyDQW%hbz3nBFP3Y}-<e&Lu)w#xT7}jQjUv7Jmmu<87
zvs@8bld%69m{!}~8VuMj#r@9q?)EmKx80p*;J2OXu;19jvJ=s4Ty@~d@y{2Z_Wm!h
zrO5wYy?j+$+VK(N(bu(Ue?C6>>&2&oPr&%Q0ES{Q<{L2Y%j(0x{`-6iZSTqm`0%Tg
zwEzN!U{hYM3CF8>P<X71Idu5$76T7?Pyob03tl4UE!Bt6Re}}=(11xe4TGhJf<izq
zu7IeA-vW)8=|}KS<W0jdavb5`r@_@KMqcQ1WVL{=&_EA*Q)o6|asfjjUlR0}fn}3;
zXD8<e-u8Sk|7H`$i=F4lc7Vd6uJFC>-3%6|7pDhDz%ziiMd5`WWZB0qBx=#AzTH92
z9&bAN$v@WDSt&Wo{*;-lvQqkll=&N9u%Er)Rk>Q4+!tExez9|6vDEnnx-a{!ap4kR
z{!M7*6)>}3N3pVjt#k=YrOULCisbTX0wW*UA3q))f7m<PM&AvNPWL_m53aIoeAqTR
z?brndC~Ov)ejH+DDAfNjvwI2$5`Pa1t(`j;1^-6zV!24IK*+J!uw1Ao!%;E<`T%}6
z^Ox~teX)o~(BVR-1gmJe$UZ`_-EbF*O+O#x3^*!2g~g|p=rq&Osu*;;B=<dnUu3_D
zu4~tu+(2ki05P%#h@8$w<;Sr6C<lvt6b%@>dGc{L0}3=)ba4|mfP>hlC_sYB+UN0#
z{5uG^b8+ZO7AgffvS==-{1R}8r<>5?6Q3#>Q6QH86o}I^3z|W%XpgQC8vt1oWb|3&
zgadsJ0|?pljJz3&UK1y35}PrJELNy8dNfJqeriy7zb}Ra<YpC&kY@2ngv^sLV#zz1
zH}l>^+P@eUvv*UseK{P)BPKQ80yn0z9&ue=AwB%>>#68G*QEmShZRW)zmUN}h}k&g
zOgF_Zkpx{wOL^ti2YBanLj$<Feyqb>WUi>HRpA#V^<y6pLRUr&*kH3t$qxeKD=(G9
zB7F`TPUUA2Ww}icF^qAb6pE=FC;?hEUS&+8iRO~YMOFCa5U4;VqTw~kYL*p1Wffn9
z0U(PdB7&0oj?v^cKRNKxLIu=W4D(mmdd4v<R7sWrjAq@C0jhX04;+Cza5P(JOldBq
zY(<Djgi<yg292sA!h#J!I3m_Z0XT-d32#<3$SE4|7DY}@2)MzTf48^`=0f&!u$aOp
zi#5#rHCz0+2aT@Mj71I0Vn7PHa#RMNVj*k*1x^9vN+vZ^S%s7YCgT8c(`_K?x={o@
zHW8i3ggGeL!fa>|m<Gj}X?dn9(42!uP^1}<Y378QJ0NjKnv`p11)H#B>oO!&g`1dk
z6PkfVnGEq3j}xYq3-cZmW9A4EgTG1yGzTxjNj@=_F>c0c{uS}Y>XJ&~yB1%`x)?ML
z<C2Sp;19OI?J~l6<X?Tn9m?`oQgD|4B!{$oREo7?dN$gEZlleD>s_5|T$^mH<gBVL
zppJX_(CT`?WnTfSokJp<UXh5A5JYXV)`)~HArvCBbutn0Lg=!Dt+dxoq=G&Jb157N
zTR`)HhCNiJsb(E6U|uPU4`#57m^YSKLojHv07R@?whTc*HNplEhxY||`eHaACFLPj
z_QgeGP_H%u(laR{B0u(LiO?+Rqg?ByFO;in%hwGZqqJMP3KFXb293s$kat0$Um_|j
zHVdL|1_cXEK$3U;Yed7sNYCiIJYDAzanLIwRyreZIF6#&b`rBGn(|e_T@}vCQ4dK*
z5H$^j0w;Bmufh~mkxv)v(kvk&);E3T2<2E}qbci*C$QL2Rp_fnu=LnM%@(exlK!a+
zhPj^HHiRta5;-yruAJD}L5-RjC+3h2M5h_Dtv;s!5aVb82$Za#3xdEc2)1O^m!yeX
z2Ng@T>H==lRlq4a2aS+Zq&cSGVQb~F4>SBQ1-)foCv$|A%85GNwUUenq58PgEa4q!
zNkpshvj7?ON3{xTCj&xLfL$+^JbB!4y3kD$Fisi#^Eo-<&<hk+Y@>SIP;3?+g#<mI
z5Hrc&Y96fsVG-0Ln;fK|O>0EP@>3NKCn*9+@T_AXB#<`!BRAEgIz@NUB*D=onTi?G
zM00wFt28+$vacZ_5KF>in@EO8&tBj{Ge5_KGBji-nK6{;f+>z}FXOuhe}d8ng4Irg
zGEcHP2Am-ajwRZ1!WA~+Sd>c-cgaP_)M!`q<fIS8Yv{<t*kOsfsyAj_8nG_L%*%`!
zM7GuE1Ct&yg7Qg;xib<CRxoJxnz4ID4POz4cqj)Y%XsXpI!Z5MP>>+oP;<E%O@Fj$
zH-XdL$Y*BC+L?me-gI@_s`fQ)-%M8QWnF-BZ7}7;o5$My%)wW$4i*Pb&~5P8Ei>1T
z-10=x<4O<LJR}u9B@KuEH50I?^A!4&Oh%y^geR5WWs|E(C4DUk{AnmUUFoXaR$J>a
zC03hzEc$IpZnEg9Cb-L>kC<e%_LJx$#}pWS#5t?2o+Lb&j6UT9gxX%#moTX-7BvZ=
zT?8SKu6Ph?JYJUCMj?#d?Snu+k6mRpqTm)(wU4-QOxv*tdNvk!RU*>twrbd7u@Ajp
zn4&Ml#F1=Aw(Oy$zbg9`f<Gb-0~y^28!hsU$MUDr&0v2;94<1t8&OV@Z81ro7wuDi
zIUIm&ECWOc+af4tH<6VEN|j6)>}#-}73qdEXv0*?HuDQ_#Gy(rEIM-<U%LrSC*KUs
z`aaZjTSLn`1|yj5jwHkj<Qbd@OHli#2({#~vOfW_CzhULwr3Na>1+VN7~i>U1m*$~
zI-Kjgn^$PMe~@ABH^lxHwx}%t4B=T^1Xuuq2_euz*DN>CP!j~Lb-oJ2gy*K;<`t{(
zwlm)Qt5=a_*PN*YlXx+hWHc#_wL31W<qwq+i`%3cLrpmH!zf%nHo!$FI@JuAd>TGv
zil?xshd68k)37NnTBE~8zKl<X^Xf39PkJZ64snofOoQ|{Qy^86W{of{2vA`q45%`t
z+u8_R+43zN9?p}{NYt+eJArBLN+rsdcEVAJkaUP|d1BHmfIpf6Fvm`lf-ojfMa8Cg
zyd<nR>$g0KYZlCPnX3e5(+3X95?EXWo92OydF1?N1v&cXChD?WLYQl^4@(c+axpZ;
zKF5M47;eq79A0S)by|hTwEQg=wheD%j=;782?4)Y!6v0aqxjJPI09SXFlU(7gdj`O
zO~t`6WlmxMrW;2v)$y2&*r62SRG1|m(BR6HVr)e-^r!wjAi&&I)}LX3tzFb|KloPZ
zR3eFm5PnrS8O&l3vek$4>N$~wmFyaEYS}K0)~+P-I8<&jSwqYdNb#p^fIG&ZBdgnt
zdNHG4Y9wMVi)_H<Vo{HP=K^$>Jhbfox~AmlLmdhcGwTeK2q{GxM1`<l!t+C}8k2I!
z;zhV6(L7X4t0^7GF8vi-gV|oV-uWqe>4E*Fv&6ANo@qV7i!E{-Z%ZqGmXf2zVUphy
zt^?thA6J-E6%wl0G|Td1q<qI~V+0_%72>_spmv`*tGYo*=`H<bMHIrX*zDaSkPCzw
znt)zjFkgc@v)n>Vk+9*;p!Mm^YnR!?Tp4e6EdqGrifyy2v1c=Lg|yX87`5#9B6)f<
z?hu)dJ4L4Bhd`#|he4(?wV^ZV*kipHnN09ey1?4qc9ept@+HyEHsmB{r0E6)4!d;@
z3_5upjXm?Z3_G&ZSfawFY-UvGXPi$gaMufN3Ifbt^LX7VB=r1i&Ygk9+~%MWScir^
zm%SqdwQ&tXqu~LyM$)>J$P`h@lZeVhRPrREFcFnJi6~tWl{|?kBO>a)M<NQ}*zeKp
z(vGOQ@0F;k!9<|mGwYBjBeCuugT%5<UmMq;Y*BL6foVg4-Aua^UNpNF-SDE>)wC2|
zw7Ln0;l-QiTPI{Hr458=+UUXsiz@{6SQ7&nUApOlg;XS}u!PNR>8JWr_a(|C<GwP<
zxW7y?ewa)$eyB_mcjwsO>{_%9p7;SA`&-?FOM@rwkX;^k$}W!|f?Xbu9}1a9CL4Qb
z6ROeG7$8-n>oL;)MU7f*?nAM0ixR=cV=GO})8J7<GbZsaZsXeJV@hqyiAmjEAY8jL
zn9Z>c(4j_Mm9a(<(#Ea5^=o?BV#@(iOhl}?GmT~LGaaxHrrMUTe$F6oKh*K8asw_~
zsJq9qS}>hIRZDngA1Eo9SSC4|t@hSU*hZfPZZd1gXsjqfi5u1idRkCS4aO4v6oZ-y
zmBF^x$jIc)gAeq=SogFiCTAGLr>gP=b$%?<>oTZY3{nKh7BH2|{vOuleoU*z5gEE_
z<k?^Vbwzk~{KD4<DRwvArgue;zsVHrj==80y{-@9mT#81+o7VlxKl-Q@k1z@iyuhQ
zTrwK*YQHxYi{mv*<ChY>LO8|m)&L+<s;*tAZUP8_MIe|6vXx2b^x0P8v#prTiA(@;
z;|`E|Exy=ErdqI6r%IM^x|M2j(qsvWr4v;yL>8@y<y7(--iBauQBzb=p@P@&Mhtgw
zSH6B&v~t((_O-jMtKSnWM#$zF(iAC+)Pc!GxQVwfL*^NVd8V*%<E5$lqZPfusxwG~
zEmHw-)hXVNXeociZj+@d(ram8+EOj;omCXZT&>~&HrjRu+lUq6=iB9=4xeIn9)JQz
z0D71Pt{4$++JW%{vVCd9%oK&cn{Lu(O(@=y!)CYo4efYx&?#Uvsjm(iAvstB8_{)+
zOE&BX{Maw9mq>&0((b5SPb8M$lMS0w;^kluyV)s%;%%c8(@`e@jXdC~rK1T=0k*c_
z622SM>QTI5<WjH7bLtu}&NAw`Fk-XtkQ_Keayt?Bb{XGg-dqPH#-@wJWb~w?ZBf&`
z1U=I-ifv>VA}k|jhABJ4u)HyCY0w2n9Nk{VcMm?J&mGArrt&H0gqW5}Viu5zbdHSl
z9z!OdGxBSz7^`hrX~H-&Vx2u+DZA+?nVYTEYhOMaF&oX;jSZ6hQ6%my$FH5YFoAT$
zwsp*|NaD<ut^dp}$#l;V&e}JY*KX#xmPelPxNGDYk2^=6@wmU_8IOBSo|EQu2>n`H
zA+i)$iGg}hA#!Y9ifsavTJy&EnsrLxrO2<#y%jiVhQi5&4t8h~S4ua9IM}1{>VL6B
zHSOC&+=IrXh%^#DXS4t1(7)#80_1n9PDY^`m@1cyrgtS0Vqw~RDKS~jm|Z_gs`nyW
zLOhp*DKXjvN{&Nes1yz-6)a^sgf?7)2SY6K;-;2O!W1_ThjtVM4;4&I!87H<$$$Z@
zb3o0xO<kN82`c2t8!1@mrI(n7h6L4-^v07jGVo|86QkQiwXq09qZtUDqDF?O3KJe6
zZ<8thM{y1PBIYKxrt@E%UYs5r9qgZd>m}hdSYE2gJriON2V^pulopjgjlg!uKPLh;
zW5aM+E(O7eR1NT(;@4Psd*E5N9JPt4j}3@2dncqEs)~1KxFY_Z2$i%UY!!N(iY|d^
z5L5;671+hseul5tbbNK<%1b#cGu8!htgse@aE7(ZM|Pc$S<{$c>y2G9`OOrWSiKTd
zi%<w@pt2fBD)Pgmg$Q#VhD>2<>$McQaNVSWlGyq>eI}eIp~+qk>emj@w71BCR;nwQ
z>RUX1P|@r)+zF@lhGg@nTb|;#iSv(Ua4yi^LKo{5LqO=%FvC0B5|G0@mu<yUaKlQ@
z+%3<Fu!;P-9ifE$F5dQrXhy+%Zqg$>lS3i)CaXs{=7@D!w6RT&zgZ#2j@+0hOOV2r
zjjCy5%(js-ZZxGN%PzX*k|j=+qISEsA;ZY(HxqNHp$M}}Bs>#dyuk`cb?pRJ%DS=W
z01dFkj|Lzc*n(`QJbMRzyo4Li_Tmj7bmIu2N-;S7_CKBRxG9XOFiTA%alLI$f#lGi
z`tyLma-*@65&&-0w0024RYok8Bkn8_+NcWgb`F!uO%CUA8AKSOzbts|u4%{R!D)lm
z^@L4O18TQMUCksUvZ9Q<YH0+;RvYQ2076P{$`Js^gw&%GXOq6Wl9^$~&M-{Dq<rKG
zL6Jr%gtU|m7j&0)+dt@U-_e&dsU&Qww(=aR;<+M&t%M#fr!%RJkHT$!BjYD2*`N{L
zVIcc-&$eW2ZqWF#K|X3k^z+F--m}%L&0TgZK3<t)q}9@<^20q_890P+`yt=zBqO~p
zFV$b}IRfb_W;bDk^W0#cH)?%-XnDF?_f;r4uQ!!JH^p?D$}bzXbn76rgoL7syG8+6
z{?kUB$Tw^u+CgA;+hwU^M1BrYV&C)uKixCSwNO;D7X?QQ`8`cS^A*2V4a$IKTCzp1
z%?(;JC<D4}$(DsSciC-&Ql-!0h<TwNTben9aQ6&KrQU-hcF%r}K)PvcL159tUHU6u
zuVF8kfn7jtb#rRU47;gI+v4NwJ+QlXdm^flBJ7!K9B^)&g43uxk_D(y^C|}^L-xpC
ztQVQgQ?@O8Y;LaWvd8A$AEE4V2+l_%dn9f%DL>xm-fJR*on7ymJU*GWl|w?$8Iil@
zhj}T8rxPtnkCpBg@ikiz_Y>2tWqhy-D&(%r5xC9dMts7XfiS{@o+7*tVNQ-ruy~u5
zM0Ul18Hu$)YMX~oYAr@=c|u9y<#O6ev`vh&m@`R?Oq(WhMval(^w#tjZ@HKXIBD>)
zN<j|ObdnQE0#=K-WZ{`8Eh5gnrIDf)`=f?DQI41ngsx7PQXxz}<yjo<;jb%(gK>_P
zUzC=NSGu)O@ZgtcK=gugTDJp?iVUllg6f#7Dy|XR9#{s8`F2V)>BT9e+$Y)%j;7+R
z2}jkjtJ)W@)jQ;Re!5sZ3z?}og|=86kWD2z3yx}|SM_gRYjnxY_9>jU%G5r57zi)0
zdS*LSAF4ibr7_eE&23D>TZ%fB5)k=AS2|-|GXO)-#bZ~kz(>>S7ZP%_CMT!k>NjJV
z>BffSE-{Hxl919Zn6-N^*<#)Sf-yd<sR9YLQP&dFvCQ~!UCT&~2PB}<kei%E`T984
z?Q!+nDB0@F0j?j2$`hh2##oJ^tFdb>t@=7U7yJ_POC*DUP41gE$i2@NhYHDKvQ=Zz
zW9N2~^7~r6g_thr8D?8@E0t2!bbBCXqSZ2%v0$!5e+;%kVtDDe@j=N%#)Fyq%MXk%
zpQ%q()Fo3-s$?|_-nC*GAUJwe1S(sS8fXH;Wl$%V$eRsC30g;`Aw)P-n4zK~kp=Gk
zma(6y7t$KAJ4UW?Yc2*SY6{>U6|TjF!*?t{S?@TTVos75v8roXmZ*1v=Z6$$WV>5x
z-W1!9QgoA8q-n40ryF8x(_M1zwo|3(PN_LkiY^q#d3qSWQRSj-DOU;I6iZFGVYxdZ
zwhs-oY>R=ihGxG-NJUGmBcQHBNw-KAE2;p5KzqL_4T-I7M7Aa<XQXQc6$J=udrUI!
z=Ru9m;iAlyuS<tlz8XL1&^is2xiog!+NH6N<y2hXkmSj@r+~NhBX$GT=`1lGTn+g>
zkvG}LRa9vgG8YU=T{CsCFCN$O%b3gEfZ1I|bT($+l{SD;_Q8{aMn3Fr!0u!s1rH{5
zjLbvC1brAL5D5x#4-hdLF-{~hRI(NU3Q!1_Ar2rVpZ;2<51wDONPIJ0pt78^hK8Kk
zL#s*yCi2f!k-#=gmW!!y&}e3s^eaBV*q=;!+|XIXV@#ui`No!PO}NjWYC^ui92sF$
zv0t*eiH29=!OEO87((GPSu$zkHY}oh8j_OCV80$ZkPF<{Y|ZO2lB2@rzHUQUl>zx7
zKRp;TgMK_Zwrms1z}N^8i&Jl@Sfr;DZg$vd04vh41o4VVZ{2Q%h>UF;t}TdT!=!A2
zmb(mP!#0tuFm~pTTsk#NJ5BdbPL8&d(;sSTnZKW0Cqu5&lGFa#jA3s&>Q)|Chc=6>
z+X0Pk{0%!r4~;Mon{+}{(8z#~Bv5$1u3YiBv4Tze`%|lqcT*h+&>P;YhNkv!5WzX}
zk^+qhkuT=Ye0Che!KT!(T3{t988{&mfAWp7&}GlHZNyh$FmV8He|Z9Z-tV{owCGQn
z0ilBh>h*}_!#8$g+iLZAE&LCrlOZ@;&HrFJ83v*R;^ttoilPmV#vCI_r;U_X-UzCP
zC$8nL8)nc)6_cUXPYl&MLvbB?_c(guCq+*<WE^qfTU*{oEpn2nVhuL6x&(n5p*AGx
zn@Cd&FK;D)fS8!`T9!$sLJ<b8>tU1^a;(l)pdmyYCa%_D^((pK(#OakNQ_d5l^dNz
zXrvX0w0HxP7@e$*L#1XYbh@kOBR$6HIOZ*P<$OAE;oF(K&OPd93C%?)=8R2$EHTx1
z@c7y@wSZ&`=5$+V?o~gcl%dxR$VMN-n^nlqE7qtkH+jO+!231Ki%-Vt9;vZ6IVuqr
z2O>vMWTS#1q8S`#7vP(V;e3<`&6YoWL7Y(*-@+A(6HnyF{;WJAOaJ7GtFDr;;(1z=
zWHU*`<=LJ3lJv+}P1><q3@B4_k$OOqa_wt1TF$2_iJ2jglUqZ&x2Af{1#N`%qoBlN
z^R+Wb=mcz3K*6p4W|D<a518)Q;AX7vj@#W^vU*mu|MR9@j2jJ3xyN~*8{ee7E{=Lz
z_KXo@+>jYgv0cFacQcDl&<^jo1s+GYm+>8GkPG9sG;!z#xs(Zy3>=yfBQi^qvED`6
zR<zPh+2;+^Eoh+kz%<WEoG{f@Xq;}U%USnoR4#1B^YX7PXW@-R%#x6EBRHFT?d?h+
z|52<@Xvn*on~SiqYlO+PyX4_y>>MYuD`&!d(j}PF10_7jPdWv8_WGId@7-yRm{*`3
zdvFdJ14kfApiziLjN@y5Gc!u;wXruXyU?#VLPwV(Z#g(bNZ-f1<U3$ej&tDT$H+?1
zQIa#+8>#TgCk&VkF|RJen+-3mH2VZ+yum(i)apbh6<(cOO+?J2s|rA_3hQcF`DH15
zeS8`CL4Qsv*U2m0+b&soKk7L2bMm2pzp&z@;7{DBjzwT{3<zJb-dLn-q;Xo%as1Gw
zVI5oYbI75UA>^B6Aj>Y`NP0PTwAfLvSA@alV}(hyNP}Q_CBvvmG6+}mn@HqhTV;`}
zisr~s33KGEggNR&&2r?h2sU!lvMO>}I2$?Z5fwSDwv8P1WQyFD=tib<T)Fsasn{@j
z<jLewdnJU?Q!Y0@mpf{sW-wAu;E32%)=KGLq>+~p@zBy{1z)6)(dEjRXt$UzQc3V~
zXp^*D*cYkz-Ofy#wrl(%CC}5PiPVpJzi7ZFB?0@2)5k$V+4rrFL)4RszG%SV1Q834
z*8t+78*{A{=BZnB{y1`SMSOoM@1pi9?=-AgS9t7MX<g*Jk|PqEMB~CU!Ssp9ed&hY
z-VI(Mj}YH}t%iy{4jK6oS6oD$sJKva6%1jn-*pp_{hZs#P$Oqvl5ON5$u=_AbeWq~
z8`;mQjl}O7wz(XN7bB2?8QIT(jRvxLt&xMgR_>~pu<E{x<K#Zs7C@nSR&KH9U2>I#
zles`<OgOnr;voBDGM_Ak@?@ur%Sqr*cfBN>qPF>Dz>jDIaV5T;GN14+uFP^Ll%RM!
zt*F-`xCMTT#TRl*27+cYnU0sW0L7GYI2bc?5nDu@_@&>RRK&YjVxBuJ+mMDl5#y+W
zEE@JC4uvV$lR|dPOk%WySVFkyqSsy~BlD2`b}GS^Q|2taVD3#1X~1aGBD)E&CK+I|
zplfu%QqMQBVB+dV&2FJ3t7_}yQX6~b(s)O0L8GK?8E{~Vj;hCuWW2Wnk#@4&?OwXx
zc}O~vutsi2ocaMwI1x#HSPO%19x$)>5qV!3T9bSsRWgG&M)%@w<V+%Gig0MymFfnZ
z;gR)S2vnvU*hgS}AKmE4%^`oM+i$J?Hj5nr1TJ3(4S5=~cCKxsqdloklK321bNPB?
zd~@uDV3mFn2xcvR1ec!}Up_~&lsgk%JsbJ~T>i#3X>pKYqVoZdv@Sz~o5Rbi$vDmR
zs-oG1I;~K!OWu?eh9kCebH6vab}JgY3p$HA;|*LmP}p=A$L8C?<Z#$Zb0{$$o&{Ii
znR_zxgDdz@mWPKi1LdLmOmjLk(Xh1q(27w@U$N!IaHS;)nZWvD^bJf33^%Tqt!y{c
z!F|=VZtJ%VGXUytVSII7=iTJq+R7vu*WyYfxz^%J^vKBF!eq&48H=wBeH=~vOHqcO
z4fu*dOWuuITD_{+tCMRRtGA|lI<OgKg~afd;xYRY@nkXziKbNfreb)$)>sshEEjQj
zYs7M`aMf+~;9Fea*V~8WzXakC4cH!9zu;LMA1VNp-H@gD$pA=$zJZi+PA_ph%b#f}
zKGTxfuudN@o1WvPq<n3}l#HN(8C8~HiuKFt=@HQu%Tqb5D!bk>6P8ERSCeg*Jf(vx
ztmhl^?E&@eX4`|2iR@@)Ml-%ewxHd{N^cVa$=-+rmrU`H$pM1dl{%BvEdzOWE^&gj
zbrff19w)ejfR04NFpBq9)CNM}3<#>;t%~Kt<Y^O~4ta8+n~>BoBH5C~NN+%{cv?4D
zFD3%RMXx(ty2$AGGWBc`U-Lwb)!kqy&fH=`QpjznNZDcUD6qaG4Ktk>lLA8D%x2BO
z0n)}bkfbyWIfx1-HS%F4fMtMUvHP4%r<#l=$&zw9DdA=01|f$l&A+s;;5eLP{;dpC
zm_wM>*FXS*f%GI?+DRh9d(aaN;uT|!bn>ybe4<M{*}+{kzDOrUYdfHdVl|hJ+wj={
z7&h*}Alm((F^=2A-$dpVWXX7$9okQ-%wJ?<a5#c9y2xVSz5Om~$gP+w@i1vZzasy9
zty3h~kSIJ7+>=&<qw7rtVw-Z8vqRav#$_wbx!;C&ubZl4Z~F{3UCxZASEdd!!~RHZ
z<@?z{Dj5u|VF3;7REJ<qM!eC?U+d&#!3Du@3|kZJ097ittb}T&k?{5(2H|Sf$a$hx
z1XTm#m@N^Xh*ts3D%sPk)l8<Bgjo?K`*O2<Ecte;^loo0@<g$SE;)%WTG@9I=L|;X
zt{oOhSy+0#X$|%8M0WKI$j>;-$IjSgu8)&QUW+91YKZu@V7g3O`;+QlLq#uji*y{u
zmqYd*Z>yaOX3D348kd|lAL~2Yh0e{FBInXzBUf>=zg($u7IkZ|Rs+`XV0rmEm(2<y
zV0vSzz{NiBEiT;w;BThA1-!+jy8wLLU%<zG27KIKQ4{xC)X16Lp2k(+Lco+<j(sa+
z)@V-A^JrBKU&$07cC~WN>K8a77k3D{4i`?LbKP4IImefABxYB!c*3;-fXJGT4Wwh)
zm+T3649Q+KS9C_VUmvNc=^!xLl`nK7`;k&d2Z7Ntb;M89QBp_zL>&Zm#7~-~q>lJW
zvs9>K(;<N1%WmIl?L27OwOCs{0MiOb7_BUC3lfGRA?h{ZRJw?U;0+e;ytp1!DWH1!
zkv|l<4K(crf!{#I!mItA6d07;N}f!d?uEFKiIqcPe>0543W=7R5Dc<d+Bn}BhBT?2
zH6V?_OySL{v&^*XC1nFl;x0FT=C9doWqVJ0Y=w4Jd?ar<<LoQ2zb}IvR!5ykgG~1B
zH_msj$A%On3pLTqAi!W&JGa7KSL!LS%byfUfE+xovB`KSZd=!{Ae3%kQM;a3;1<YH
z3(!_2xyoooc(d<(l<(SsAkM$KkuKug!Cbi~o}|#{Uwo~Gs=nT9-D&))YW0RIv5jE3
zS2tb*MU^baHqczk{$ULgL$)Bdx0(VR!pjM4E{7o(WNsxGvU%6px-qCqE}TV<7H%P@
z>Tw}^iDfa@Xoy46t~MlQk}!>6(Jck;wzMUy5+(J4pl;@{S?2(us2N3d7?*J^U&OA?
z#Y{F<Q2AgFyB_dw!x6=Omecu-c3eyZQVdviqqfkb>!E8RisI<Yp^9yfk<mL1^Nb~G
zTR@jW&77nmZp_D`v!ni%SK8OcS>@wR_X$JQMKkA|F$92t>%jEy_@hV;Qap&rkp{<Z
z8ILXb?`)=L>m7Tm;ZPb|uG(9UiO4*g?VYV#O=EM(u^$0(c@NL|&UCFQ30KeE-dqKF
zwVl2WgT%1Xm2m&YJi&xATnG?J_V5EcVzQsLHy6NUa)-57Gbo)&ldcv*fP_g1lhz1f
z(jFm9x<Uw(?hry^b15u!FXUPYD|gqs5KH8fSLDw2p?-gvO{Lx4?Mr2DYAEQ1!PGdR
zo8q(VHUVzrXI`3Lc;n^p0!tT5v^2kV6CE?<^D3?HLoIn-DUEW$NKP}BU~VO;ZX~sC
zwRCVJ>2P#D5LvrJ;#bO6-B{X&4(*F1sIu8$Xp|33FoG~A49&8F8D0=)hM^W28Ds}-
zez36<HIC3{2|F2YR3}Bbe$5}(&Orv%EL@qB4CW!UfDjc=&~*S>6ZLN#;HsSy*~sIg
z3l;kS*1vk8VWzRfRO3NIer-!fy`n6!X#rz8MS{#@Spzuhz!9uW#w<kKx&d?OV|w;1
zYIbNww)l0fG{GjImrLWIq0U3Z#1PGM8aADO6L7i!PhSk@qtbxMzqn|0;%g%yF_CD9
z{Mer*LbIfga#f&HvQe~rn>C^U;b5j%Q^ksqNvD-}Mey~2K|Mkfkfc@r8qtq1<}%tU
zrr{D$;E;e^`5q0{fC-7EVjdW=4<50s+jN>#6`ji_GsuXPR5FarSVp4RY04B!m!WcE
zSB+-t@|9uO*+}On)6@s7Aeng02K%hi;`$qpOib(`u(<;T@0D@Snv5<HJf3v7Zy}?w
zVdLg7J~GL4GE}Q{xUL$`AucakhRl#aU=<iff&ty}+-X2%ignAW@`5s6I#|+BYvd}-
zODSIv3;H?&ydIm=W4`p_0jFYvR~{f36n2CF6Omy<F<~mp7|1L#(x1T^uWi~wwNWji
zg={e<?Ov}2A?{`?HgTlu1QfN-|K*)ygEh9I!Do><;^_7=z9VBdD$zu0?1mnnk&Yz_
z#-g23j%NCW9_?tC{Thxx=LGaX;?b^%NMxv9Alg(2+FUiV!x(f%l_Damw<2%TO=?w)
zMH`Dkn=Fxb8-^Ak+TMv-hM6Z>^K0_5XZe?#yzE)}?G`V4wz(Gh%bwwCCT$YMq*bDr
zbciS>og#`(Ca_9Z-8tBZ;j;lAwe}$~Q$LN8$0NN+bTn|b!Dh|KTKQcxP`J(@vR70W
z4V3OAh+LJ*MMmorH})6KgsfD3B7-aqc50`SrTIcQ^J}g{*hUeY`E@r?cvCQ3zTn6D
z7U-A|s)LiM^oiV6`b4!%?5GC~^&^M%I#E66IO<6c{HV36n@FRPee{sq)<^A?)kHeI
z=c5PTX+E-_UKtt9r;HrrQ$}VJC?n?yl#$H_+mZXc$jEM+?8tF4S~S274<h0()g{5O
zC1Rnob>uqREE<SgEDxd?b<}P;S)|laI(o!OVNuITVUb#==jbW5D>8W4RPYa5bQ*kH
ziF~#kv45%;v3bJ5F<!()?8GTzCvFitX%n%Nb`k3^q-5@VX5?b!((aD1zth_3T4c3f
zWz^o8S!B5pW#S;6g)9$>Cb#4lu|K~S=#aQvOqooRxX6pq-9~m=#NJ0By$=J)6vDM~
z^bI#6XJd0yYH0%kyCQK#XPqTFYUDKNt6t?D8tnr*;w&JCOYP$A(D?230#HzmZ-<7@
zhDG3|9QF;(9|tePo0!Qs@ckQxa2ARtg~j8W3ioW-6eqhP5h!3zSy+A)T~uHnOmylL
zUEszA)EGp$5m#>;4jnQMaddlcGsR@(_p$g~;aICVme`?~Pyo~x&^V&K`ovgbx^S%5
z8%iJwDt(g{o!gxy3uq}Wc?xDsHGas)!%KIG4SK3wfwNeXjUW8c@{)c|s4V$PJyA1V
z#FLm!FN$xyh-W0*0~Sj*Sv(=O2xKO2T0J0MHlNuT(>Bu1A`(QdEFTMJ#14R~<p`(n
z<ylDXnop~@9dSv*x}JWcYVC=UWbqW)DbwgjLu4`9b2CZfka>y1q>MmCA?4Y^lro%V
zqp;-!_x%jUW0NU=IV`Mm!Q(gxVe(YZ*6MlD0Ha5HOJ@hFI$Ff$A>h*8!K{|xwR&Ed
z3`2Gf=<kc+MmHUlf1!Et+ns<5BplXuvMJtWz{QVuBF+$XUrcO`%<s%JzLXH=U|*{D
zH<#E{#MxS8Z?QkW47IPxB=M(mHTz9vYd86+-R)H1yz_QN44+KNKUc*O;4oP(rozX%
zGmP?><R#uFcI;24JYsZ=Fx?yH>nIaA6$a=mJ~kmu<d8W<XccQz-Vr~<8qo2^;IzwR
z$wb_3Xdof?noLZ%9x5{%8QQ5SY_;GHo21)-j4D-Ef-;h@VfuSKy0L67C{hheLeoHE
z$&hjn88ND1ib6?p(Nyi1*3A}U0!L5O+zph7$31j(RjI1DQ6?alrbZ21BKhweFJLGW
zig>axan9&0+J*dDS;RjXswCSl<B?@^Ow5g!Of0OSk4@oDcBt#n)6e>;yQCv>9A7hc
zxdhysP%h;{--{Mglbl<KwrO<5O^I&kCYMb!441xuxfn39?0ow<3ybZ)@mi%u2;?&a
zY!a~AdW;ASr;-IFxEoBP9wm0Wl`*zpIBh*~*x*cfC<9?a$?b{D#%C%!GMpTrL^fjf
z^_o5vN5;z1XhnN!PDErUcy0`KHWp_TdbVxxT5m*X(H72(;5>cwa~!~y(!P=O7u3N^
z`U?{96(LA@2s6l+5!uD}vb>M<DVGdbM{Jy1T@bsi-7w+-=~iM%h%kA^Wm73FRzV&K
zXeTAH>SjmO%JRrwKs9273>Eif4po}Ca<eIUZ0VzGZG2}nWxzz8NWL;%=T;=YsE(-W
zT&7}GNsbuEDAqOrFX%8}lqVFylK?(9f~QO7#x!!*yWiG*xk{aZtD8@Si!YNNG|Xi|
zwT<Ee@oIwP!S~n|3pw?>?lO4<ZFx2D*spTS#7!q(8L-M0XC{t%`^xM=x{@>LX18BV
z!6@7+BbFPVbhI-td+&-}o|z2jq_HQf?_85yY&76HUMT7qz$z_;o2|JXv=nv>XVZ;^
zJ8r<)s|jrvZwC4cKM0dez4Q}ySQ;rcVHApK(fVdA{S{l<O0Gus+>M?^ldXseps@u2
zRq2iglCcet2reK<Tr_DGUd|?Q(zcnt9wvcIED#9YqvVmv#0Hs2Eh9)=G=7#`)FyG#
z5Sq6tCgHe?zA9$|N%_)F8%V3C0!g{n9c?Vl+M&JsTa(WAq85aO(TiM@WjNey*N>y9
z-4)uY8EEP<A$}-@KRHjL>12tg<`94%P8Y$oxaor?V+s@*7?%flB8mD2v-7puJynin
zX@C~0(5E4R0cjRFkAhBh{FPskgO0!U!KB&o*9FvUcKikFzl_Bb@uc&^>K^GsN^wk9
zQN&{?JD}z9RtxgjYPA#(!og!EUY6Xl0_n}8r)32n)r4GO$?VCzxy6`tt1X(P3ZX#!
zdME1{WVR9Im(<=eln{5z5yRrpl4;9T*U$vK#AwGbz}U1W4zb|z(3ha`Czt<s%MqfF
zhZ4;T#R9f0p*ZWOEg&8i5HMYd#cu*hl~W9#E-oj5Ki&0~@qO$E9N9X1kY<6svK@pK
z8t4Uvi(w^rT&!SLhTBKa<`FWDfe6OH9+Fu4q+?fdC0)FdE9v@`TuINb<O1v0X%<c$
z&T1CUT0m2cyF7lixIk_>LWZ;`3os!xG7ch57>&#WNgqm2k!D+w=G&)8v#CfklX}-r
z)N7*^ge}NECgSDSo!(n(L?v|@v#Ab9BMTc+yI$IS2PCSEeebG+RV(P+*xz4Qu*+c2
z@MbkMwSTb&f4d(4%9-LRqD;Y``RqpGHg?h2rmt}y#bad3K4&L^m7SPixDs{J=-If~
zlp2>|m~6rpi+wD%hhy^39fuKY_Jm_|!W|bxHv7P-0m*9fH&rmjjvBtP8m~T}KG%Gg
z)d8%Vo4h_Q7xvy=Kf^~r-dvyd-eG&g$3oo=KCY8)hYJ3KbXCv2n>L9GTT!?~@ZN2I
zMVU=8OTY!J7sovoWwynxj}|h4T(?=2B11itIc~KmwKHw3wbyM?;x?T@69ylTfzWE3
zMk$mvlI9~bG~6|DRJS(#adqtYVYYv~ci8?>!(uXvkB&Evyl<^>q?BCa_mPyCYCL#^
zeI@VP>no{`WsEIbL37XAPC{I-8<34YhBvE_pF+gG!-=W@MW@10t*8OTRA3@;_Qv*7
z))M<-b=w;@F);z#3-I*Ca6T%Jj`A-q8c#86BOn2isEhpApNX9)rjK&<xl>XTlPU~a
zQ&sPR`aO|3BjtFGlJv%CbEZsjMfQz2$<>Ca(Of<am*@hAMCa<HxWTq*OEQ|Uc8qjk
z<j7P86-!A-BKL))VPWFQ=vzo`ZaPiEig^J|mRoBQ*N8D^#+u{GY86VR%W8%^mZs&p
zJZ2c2_WPX6(4v7>ZTAk^p6hK%zE#`3^DgMZExdiXPP2k~i7kEIH$`^@3Y8<jR(hnX
zoFmbch>CZw>F~Mhu72N6cXg$cFjL6U_KyVJqecXY#ZJ4>mt88rW+s68cssh^ccP=a
z4$4)dsgJ!EJjA>>>UBb0YVlOncmc)90=wAFT+ezI`%q$8st1d7E{&tx%lPiW$5frs
znPM<i*{N@!G)iTGB(0hC%5!pi1QAtNB&QH5lng@|3ZjY%MTNC=CVu)D#ObD+w5=Fp
zH5FYI7Q`y6=~7_zL5gsOd8&F<kxD|S?9D~ei_^<}RJh3KUiBg)6<4hJ!W!x2Xt{>X
zH`m<Tzvf6v6bj#NbMN3nB<+<mNpfPJBqeWBl3tX7JTM{hoOFpiC9hz*cW?!hq4hIK
zwZmD~CJqE0-rZ!d>xI<$U~5-78ITML94&EP*xt=fjzJ@Tez+X+y1}IB2-T#P(Zs)$
zsDZ@(x~|^S<Vz&g`$MsQMQv@VK)U3fgt`(r-;{D!qblwBL*err76*5K^BC@>b}!bm
zXgO?7|3@E}u|H3YkhDy6q{`8C64xkfB8J*6Z|Vh@d6cGAutbivUAEK<Seub>=FZP~
zGg)PgO&`&vDw7SKeRHR>UP-hZHXFX3RvYH7cOCTt7F^k`dcP}O`t|@CW~8sC^Vmm7
zsR}-QXL6j(n6N?s%Ku0vBhijJbm&&DUXp4w==sy=%C85eM8u7twCOw`@(vF;B(MmU
zkgySyHJyhAO+;jn*;1_^vzPwfJ9rYLwxue|tY*<K$GiwW0aW!6LrxoPq2XW=1w7g8
z>R=TNu3?kB`!x}OViC<Ul311aQb@%Vk4xH#IK*RhF$Fl_YKlK14i#Bl4ugQ=vC7e4
z#GyNvyTD<razq$$Ni}v%suDqP9#O~6iCTZ*f$fDGCI5lgrZ$c@rOOSpZPA(IGQ16#
z`@)Q<tHs@HelPBi6_?*7;WUJKifY$>B$b_1kJKY^f{hCHyq#8N6=?+shQ-xk<QKD<
zc>g~p*LM{kSb{qpAw{mA6h`t<vM4FW*~dxHS)9;iS{*~GGev_czT_pWEMgh6P|z$$
zX0zEq2+f<mpqb5y%2R0}T9t)a(-=lIOYN-#PE*tjW!{}XU*1UE^@;{;T(qy<^=`zc
zyD`#aHd$7ZZl{2v*m}$%fGu^Km4&g1o988f95@BCqpWBPHP$zSDyHP;P`>25icuay
zH(f=jR<^j63TiE!>hdWYsSIV4g!85j+`lh|vIxBy(9vwjFfpvQM4F_)D0!Kaf&QFe
zDH=<^E#+6XSc|6R$8xEo$(XGPdx?F`tFPqnUrd-00P*((&-hr1b#$qV2Ls9(Yfc8K
zp!kd}m`w-41Ehm)tNq3^H`cq6oMD*~GGd2iN~m5Jz>-1&qLodUn}<)B-7{aLX;WAN
zcF|sv5uGdx>UW=a$=Ry>cw2rH=2r52KT<1+xhA1w=iVW2xtRLcXK}lVZbz*u?^y{2
z!mAM)%q&wNGejZt;69j!93!fS1YkNWA#iI=N*?7O!-_E_*fws7t)osNW%ONqW48lO
zOBOW~NzhLsvDX++;S!y?c=3d-DFO3IPTDYz0&2p?QADVj0yhtW0*yB6Q@J;YBE*E9
zM+sk%v0^P6Ed`?nk(!i~EOTnkCDV4#{3&4tQ#Ew5XH?{1+F7$uH?7hM8>k#jj=G94
zP9Vr!HJ%#IxCsJcO`<T*L<wy{SJ7s?ONh0?J~iB%$swpU4^^duO;6~+Kobv(6@x}d
zK{d&M(%zFo4N{p#EX{mB<);}c#pP&43^Es>HRxl7v!0+bD5y+25?@cXuSQszIuxn~
zt_&)Zc@w~5q2Ni6Yvk$lTG2bIVdAgsV<(S5tnr-$28-(8ItmNEMK}~x_8fKY48MI-
z;`BM{M54Ny(V$8)sx!&a7JZI7rHL34f^uZjX*z4&NMKB2fYh)KoVv88Cde3*$Wa?9
z^2Cl;BI_-NNLwQkY-Z(ZU1b%KXh$knD`Qn{>9>_?X$Kgn4fKYJoOWiRwRDWME^Vqb
zcv85!NxZ>Rd$1D3dE+HY)h<mcS1Cz!X=(vr%4T2m@VhYe0Uc75{t&t>Ma|uR=`9uJ
zLWv9Pl5a~d7O3vF^k(V$Y&Yu=DX@;5wh5Bg!m=HcTilSOc_iE+xs5GZQf9g>WcmOJ
zK{C+q{SqygV+XwvN{7#n#~ik^x)_?5$6iigdpQP)pS_EX<6Y<xZnh65sm7){nKP+#
zAljY7{A#G=IS_5SVSYU(=~KAPhV^Y|5~z4%jo5KaO(n~iAAO5mN8gk#Z|2A8;&$V7
zDy4J$uwC48*iI>xxgm-dn`^uvR@q2!&Qg3!%U-mQaScv!)wUee;EAPoi06`Hx%Cb@
zg9IiaYPIyP4&Mjeu%+{8VblFX@2?Uw8W|vbz$M)<s?C+8v3jPoWLFdEZHZVg9I5DN
zV}aR?*%MBx@Q`f7VG|lgWrBHlFxa{Eb5>XBi49lo5XV+R)li%h*|_}^S!a2JO*ib2
zr%z&4ik!3CxJPHV9n8D8e3Tpa=_t38+3@yHc4J0Yh)T^ustpad95Q4C=D*wtiR!xv
ztJ_9d0+NL66X`Yp=G45*N8MpGL;;#yaTOb)LFXXaF}t=h#~qt|MY5AacBVnc@zB0S
zU7h643{$WKbC-*EDbl7|fh!f~YplSPD&YfwRjj~`yIq0H1iNZ{wYiE`vR;k?di~Da
zai0oo<>XUu%3@V{YSR<!si~J<)w1gHCQDH@lrj0(XW<H_Npd+K=#?~C=`*2J@`jg&
zaa2!7;b^J-gIY-!QTN1A7#)S>!z<?$pzTd>@bNIp9&0?p5Zrc?`h|1sV#_)H!pU{9
z)0BVVWVzUF1%O2ETk8b@MtYX>2(woQH}d5IagWLcZ1kJv@!SJFy3j8sh}{$usARhG
z$_`>jWd};>t-Jz-xRnA01D#2E^$Kx2^$I4MU2@A8V%~MCTICA^mO|r{Pnutiz@-qJ
zqDu4Yv8OCJHJR48VN|s9YC1RZX54Toz|4T{Ump4TD<-iTUW)*wvR4zBUX49EWp83*
zc@svdG}{Ih8LRo_kX<iyTaEtC)U2>;GhMp5X9H|pwY5i`95FRiUd0V0Ij^KF3akIg
z)ue44y4{?6<V|nP%acwppSz8@N7~Kwfg;V_##%sH{7{h=KU}264^=$I4_G{!t3|hc
zyvN6pwS8DP7UQF2)@TY>i~rI<tZJKOf*{*J#Rb4Iac;{dm43n34*3~5hvm`mRV-4q
zY7DTlu7tVhZ@Z-2M=;yban>oC?jt>kVlDdZK2Uoqe{ZHw)M}hsyARa9QfvG~t>&pU
zexg<@)EYlo0_LeTezF8CQR}8dfF*BbeJEa7-$2|`4{Rx<&}s>ay}h2wKH+v8qP>*3
z(PbyP)S4%(TG=?0u&jf?#2plhlt6_7gA)28v7*k3sHit0D(bFGN`|9k6kOab;%kw#
z{^lMRlr*dI4#V;e!tB#ZYA$#iebEb{T916YY@UlIt#-W$)+z_UL1sx{FdV{4-E;t4
z?AAW)*1i0r0(^_QY83!eC1DETqB)2<h*<=D6N|uDn3-!(4l1yi$1C#hAP}ZK8W-r$
z@@l~1kU$dm0l@_vlI{h#`FtS);3`HirmBKmcr}!3gK@rKC0+NK#u;rlR1iR)N<qf4
zOb-hTv*|a(u$pcdK%a(=Vp=`Y2&{4+RmF@Z$=uISST<)-G!^R=Oe2y0dbZX;K|8Pg
z*5z;*kNDIqnUdd70UD4Z^Li>Z*>$N7{c0trKh#p%a-Guf*`lhtkCv8|b`P;uI8Q2Q
z<12p!%;iI=s52;jXv)8b$YaD3hLQmChg9cLbP}y5lbnfYM4&j8T>lczM98ZuuF&Dx
zBmE~QOf<;T6IC?G0$oFrFlI7AQ)*3dcr&F`Ra|4lHIgoez@XsMReC>@vYmTPwvB_7
zK~{pR&;t>DOT!J(<W^9Zdw#kl{OT%Ov7{E^tHp4t08U?;LzuWq%SbSZu4>)GSkO*q
zs#JOFh=CD;q(yDn3?Ce&Bs>#eY4rs$o#PcZvOm08G0~&rq}mq$Iq}IjVELHEU4V)E
zU@^u2^k@E>Ey~*jml5+RmU4TFQ5ndJp&87IC9#1Pv;;Kw;+aB{6_OL3LYyQwy@GBJ
zD3~oKhgP6$T2Pvil!ioQiy)IBHB+=^PF%JDxaQ#v60<L~x#=5-dqK+#!5NPezWJXu
zoHS`J$uy#@pXzA1X88=`+7x3A_fDd@dz}JGCX>5*3+J@j1!?v|B|Rb+t%Hur6gCJ(
ztcg99V$^PHBg97i$~KF^pB_<-t#D9v!m+f8J=Tb0nCsxrIVv$Jz#N>`^p_}g+%(NM
zc7SZv?m6H^<jjguH*rBgy=x)zMW2B^9ggIT)A>NxR&HpeuVgIO>^RvubjXo>dt6yf
zAa4m8GzIBLWnu(LVCn)4xfsqz<waci7Z+j&7`0)L&`Hr3`LRDs#HY%hm$Pam&Dx%F
zlb-KcH*;IyC(HpRzX+tKsOtpGR+ik_nt3>?$5lmkgd|(l#%b_XksXmSmYn4;Y5Mn#
zvswtWX}+>!dQfjS3U-}2hvZSHQJtg$ap&rhu)UT7)_rGh6}CB_&Y)BGc=SEb1(g6(
zgO8frik#uhWwTftVk#02X3ZXE<Zg*Lb7EJ#wFVRkF(m*M@z}N4y>M-TWlPDfP>BZD
zVaT$6j6ihL6Nu^@(lmlphf0|ut--7r`$aS3=B5b7jT(+DRL+;8AAYGsa{*9xGh2_B
zjMc^wDrxU4oJSA`5E7B6z7dp>@-aIkMNOn=`Ixn0Nva$dkmmyup_jgqmcimSqld1P
z1AL6^l+sRBb$^XSS%D9^iPi&-92DJPnnPf}V4>$^t(9YI_E=Vvu<7zv*#>K(ZHYj>
zU>tueC4Z^(E$2ClJI-t4G;aAcO2^PNZfTk{HLah~#?kF%eD`!@D|uwwLmttxopqBD
z8>BrZ&Bd2OnAh@=Tef{ITWVK6pE1+ZQLX$@Z4WuhWUYhQvsDX9OZ6DEg9oY)Tjx1A
zU+q7y>Lo52J`mOAR>c~gUY&wIeAVjaRCF1STPRUC5KSw%gKd<zvo{kVm8Q)FEp1M*
z5JJht5Cp}hC!o-Bw`j0nGV~)Vq14Eb5KfWk5kL(T8R}$L2&Y%{2&4`gG8sC4;mOCx
zSX-u?S14)+vxRUt^so64M{Rw}D=gH{geH}q((uw<Iq{nW{xqyA%DjKKZ2xDb80`m0
zir&aHm=s;H4FM(SKb<JrgSP`Q2yGq!*{F&eDvX%oW-0(AIq}ZC%Z!w$4cCF!tyg#{
zI8ISKiD7<M?EAzFY>OZR9@CrOmx+t$>YE}tVpohM1dzLN0tB_Y;|xKeMhh&~<#cJ*
z@m<;MtI}$_yz8sHL<#&6DYtg<rrh&XYhdB<zgo*^>;^CF_M;RwsvS(YzzOUD$CwZi
zzaR#IO>toe$XcyA1VO%W267FNWwhhD5?To2*Xuh$LkvTJTIm=}#-|uERIP`yVrsU{
zeKrb3(VaSK-)$2XW@R9?bG-d-LJk3>EwNf`XC2yVhASjKgp!+By@4xzEQQTMUJq8w
z;1DpqVyP?k8hTEO+k(9Qs+NIb+T?QFPTDPW3|L3>Y8+7rsdY6C?e&i}CuodG8evgm
z6(nYajY(-EE^e#>#+bk{A#qeij!Ot>2pvOGM_KG7jS;rnyyr=>h?{SsO6R^fy*NEM
zI@mvZ?Iq!~dWTqf5x6f|+E02z$!JpA&3Dbbnm<`amv5751U(U+h3bTg?q|BVoCN-K
z*BkOZI$QYp*nppsqV*9}*xk(-Djvlm<tjMhM+%H0DX@p6{R~O3=txp*1tBe?>KJ7>
zpNcC|L9k53PL!i#Vz?0UH};VKn^EMeYp6!-7rdsrCIk4_<aZ(#z@Y%95?vdn0k#@V
z=LqMC^f9)X^S-Li#gwY3^GL%t5&a6~;-$9(O`Tj1E0U8;q-GIU?Y5}qqd}@MbU<3O
z1aMU^>{C-IhX@LWk^$)3fv&C40`UYkiKlLKs1py%0+GzqqZpN{@p0vBBn2H+k!cl~
zW%N)|u2Y4Ajbn@2ezQky;x3GNauc_SOd3U1QCJmqX3twCn@1>^RkzJVH^n~3GQO&g
zK}5BF3hQQCMch<$!rQnyRPtUV@!T2PuvIr37&by_15D*Y5^x*ESPFpkLJ1rVZ5hI<
zx|Jd8O!@K()X$hn0aw<d0vy%o7R}mmIdfjSKBjfp(qaIBtCAWFp`kzZ=K+Cxp_YId
zZQ$0AYQ&s%z{gZql9dY6fv3Tu&1th_ErD>>!f;+qqLDYbO@|S9`nNSg(y)uQ{dbDY
zz?HKkCC0`R^6|>-xpX#L-Qw0v@0Dqu*&<0fuRJj3)hp;QgXT)7Wud+B1J9W13kmCo
zMGb}u3{bI-DJDS1q>vTQx=RVU%V7<Uv3Mm`4^Xc#l+hEolu%CzT>7hqLlm|t<8GS<
za{JImRb6^_J2f3RuUEymDvD_hX8Pe@O3jM9l9k_+=D>nqeq2fD!EzKf*RuQ=NkMF8
z+)+(#%g-yxSyotEf{G&RlKa<R>>BVM5q|SO6cEg;epkPlKnukT4xZk;Zl`KjNKRq2
z&L&e^TM)>+7TN@kPPW>%wF}E$t)`M@t7J#;Fz+JiDqt?Nj{Cu^GM2fmP1#^y89Ut8
zzPioOGFG=E2z8IerEKh}bj!1u+UZ^W(h2F~JLS&Us<Oj9Y}C&-)!Ps&iy-IVah&b1
z-I3jpAnQ7uwG-?0TWOmUha=l9aw|emHC$PTQ4u@H`)<g!2tW=UAf@Yp!yte&cpHeD
zQXpQq^TJ&mvX&opm^(Zrbp~|HAzbw<NA+1m=eCGC@`%i2WBj#g57p))&l)#@7VK{f
z-MY+5W(AuKxh2Vy2D8;}&lzwZxVqx$rsPRCbyhd=6ixD^XgaQFxLV`Bzgh!_;_uPz
zQncwDQ@B~v9`}9L9`ayrPl?<=RwYtHQa_c+{o_?8bqw}Yr`${*;CZ60O|^ZVXlq{&
z=RDC7gnE9ScoTi=nCTU*KfQ!sTiX=8cSFYf(oFcZwNooMaisdvk*F|L@!<%<hPv2L
zL2OUdm@7rNiN<}EOguR>?y(%=NuBYN)RFL*9Kov}GDKJ+#J$f>wzUPf*~zwc(bw!`
zNAT!XcJfX95Tgn41C1ucPtk-X7JrH+2t*U&ab%~5y&1OG;CoB^GR65itZ!*+=B{MW
z;C^%L?*>Xb*D~lx0KG#885t3eE#KOn9u+lQAtQO$)wnvrSn{r)@}72bX7#+2b!ACw
zVXJ2x#${ivA^wN)d|{`^);JEVOfZ(6VLV`=Tk;k$P8Qf&a#8DYXo~UT9q=z@Te2}-
z*(uJAo#=C?b59SZN?I5bnG_xl!ewoel(tl1&DT0nsh97tW>O$49T8<Dst9OnEks=C
zb9l1~M{))ct!}VloyZwjC22{>Cv&t!zB{h0C6G683|ct!ONqs(iNMkY7;-V3kIEFt
z{EG{*JA-S(AeEAo5c#n`OT?$jo|iM$B(1f!SV2O{8Zh3BGXr)b9{EKeJ%yeH$TR^f
zcUSANL~#?lBu~`FX)r}TSI!m69;=8!>zpmz-+QTR0J~E#dV5i@cFY;kk3v7`q#%ns
z1btyiors>c1JOlAXU+>T=!QHVJ*gMy751tgVsonafi|a#pPExOod3j6&8Y;Ac9PMk
zu|?=cvh$6x8$m1Oy0DEWvk&W1_F9Lf%O)%iz-*u*F~J+ARv|5Z{l;=o2erfAD-K|7
z><FWva7Cf0fF{Pp5)`C$;qFM04lM2wZlXQ0vW(ZqIgeAjaeJds9~Mq`YdyD6tlmJ#
z31dYtII6<dt^^AXgto=>06AjW*1in4B*fA_R7=K%K(cWM67BARY?z2wVT;kfO|~}5
z0hZH0h@<vI!5&?PF^lkQGNYw(7ERh?bpWVGp`;OJqVm5YU=sZfW*PRGinG~ZT4C2;
zE!)1T!^A}$Dz=Vd(8)1;jaZ{buF6n>v1$a&Es)&>&PtkRxqQ<2VY+<sA(J+G)pQ%v
zX5ubPUr!g=7;kE0+f`ee<o0)a_ZHc|R0@BC_<d0IdxmT^NoxzdCJt2~fTg(m*5XZ0
zToP_L>>m5Uk~sHPi%2Ql`M{!GrHTnktpwQ8ZgzfOCWMG`K`?0UU8`WFQ*$`RLI=gn
zq6?|`zjJahU=-RQh%y;LnD>H#Wna6JKQNZ6wHOjf(DH{Bt_h><6X32$8Pn|`Yl&v9
z$tWEYUDP1&IMJFvl<$r#>|zkipd{6DIm8`|wQ&kIXP+50(9FqB!zP-T{EMU8%lJ-w
zaRd=na{J^HY*FqXJ?&PG9x8Pp*2_ZLAaI>}5G^rlP|k`Oo#NBf(^@OCQX+jb<%Fkl
z5LtEzT49Iwn{9MQT`1ey=8WwpZKK(EMc6awJfm6Y*U}}mrS2aXbPz;61Yw4wItkJo
zN7s4`rKZMn6{V)v#YfnoS&m<~*m)NcFWVNiwKj=t9=2s*YjIY(;BvOIakdqM89h9L
zj>p|4=y=>=f{w=zhM?o|Ln7#jV9K4LhDZ{rjEZSa;#)G+kVhdsQ#P$gcvJ2;Mim5R
z>gcQiM1$`PK}7j<e;N0vA!6OlP(AG8iGn^g4K$eS%@ZWO@2RIPYed1dX%}2A5R_;~
z1-1~n9QxOdX<XT-WHJgh&R{3g1^M^YM6w&}Gzt7^SdnOP`lV#DCM#WzDV+KNsGI^w
zNEQCS5~!GoZ%nE6Sc!^}AuXFS%M@>qmj+qs$SR;^3c{%XYz;Rp(Y$sPVoA=u1Zb=h
zz9h+uTz6vym;!mdt$?#i@3NFGboGuEh+0n4Q*SR@V55n6g;UW;TIc$zE<kNQwaf;v
zg-Mq|`kvU8L6>3D2=CiuivQ7knw2WSMZ6qjZ@M_WI6XKz*gyN$OTuZeyp%+t0a?zI
zJ;`WNnx+0U0?QX(5`21sH5FWzi-K^IS{4FyF*vT;e2MU?6k9oD1E!1137{dc8@{4(
zLpmSzNXLlgY9^lIHDc65JU+owf|l6A)Ci~?twKSX-9ThWC~!qW`xyznRwE(9?J2Z1
zb)EVcD}=<Qx++MBYC{piYGkQ8mkJFV?P^4AY9vJ88&~xGn^F4K?lq!s9yL-pAwQMZ
zr9k7jXx1ijJxOi0jT60#Q=QJ4X5`vDr3)F7=`-Ox37xk!q-4+6C|5X6ks1y>yHdCg
znM#|uVJR88Nn$eFG9>@hJ)c(S(@0J-k(x!^ymdp;9}SYUfmQ}ah`M49^X*;nzLa#r
z#>J=tWfO^w5;3EU_H`}h7Kl4=N8EMOTb;O#9g&)@9F<@U9ZOxuR5xm?8#UH7HH*^@
zrg9WAQFIyHaOQ9JI8);^-gs@SI&aGP7+Ow-Suew^n^Dz|RsGkscC%)Al;!T+HuEUX
zqOnXIkLQ}G2fVp{hG-L9BD`Ir&8g@-w>3hPybf15#nn2d)Gcm-mMCVU$+E#@+_a`3
zOfQnmF-<LFOx27uWkrfrM79;YRbXnyObR4pEpW>*x7<>jxM^g}G7_u_Y%i3mPjWh9
z%3>ImYd|zu`9pu|&jW%8#hduWgfVZrMWRBwgM}a`g=#SKLNX2%cP6*XAI`BF@*(k;
zXoQM%q3s1<g(Io$5xapVdJ5!jDf~$zc}(=RLGz{(@G`P507L%Sz0j<1oV7L5?{5ZT
zXM&>&hKWIA*i}L%P2)VupsjF(wl_T8rBv50g;fnOwvNl;F#FdRV`skJeJ~+k;rQ<5
zbS8hjj@+Gek(~sT?3a;DBggE?`+;H~0Y~7nliZO1G?MB>oj-a%P>yrD0a|Ad%_|d)
zRIP@!>cD$P{TJFnbeGOxt22$1HON|d<h`@r5|>wT!)-V44L8{5jq74j8?}+y$eYTX
zH^QsgR7P&t`lkC3wo@>f4^;qiK2#LSK_8&pmk@>mzUUK)VsFz!>~+~$)kh$g{}_s7
znuIpnr`h*H0?Z!~oo%h`g}4EiT`wf~|LncXcH=m*E<Det@LK<*Pc5z9r{y#INA1jP
zSN6{AR;RuDxNA~NBqd6fk`0rzQ`XZTk)aDOA3)qm%BoQsr2r9EL<9nXazG+I;YL3S
z@k6wsL?NNBVfG)ss74{3FuKoCNT_YYyNF+uqmXC0-kV*gu(Iikjmm=EG=cWU;fGWf
zHVz_|g@b7#m+qe7hhC2NjDnV32L=TnJZ%7-aLxV(pv5=)5rG!xw2gW4tkFoBdA|_b
z2V?GsROe&N{m|=T%<UTUPPle4=2nb(`Wm&?oWGQ+N92n~o*jg8#W8#B)P2?I5d&3@
zXjKir)Ryy7>PD+rK55~^L-i5r_aQlMk52Z&qgR0o#7Daws+H3m)z_-uhXnQSSjfO~
z;WacRQMWR+UcT1(8CUbQI*meYzUTBFHJ}$qYep8BvkY@J;C5K=!C4`AK~7T@xSlBM
zKo?ja8g&)_=PG+z$<C7hHn8Ls_rwha*IQ1Z>`1QL_&`L+<*BWt6rFw?*m9A3<BtMK
zD#g|#$v*1?k){3}W|xdv*;pyD&6b;ePM#^SjB2}ZGec<N6^oqOA!<%9ssqY4ykevM
zYR`Fkv}i-n)1dNpV&YHpz2Pi;dfY2fA^h~RbX9;W4;4i##pjNq`UEek6TFNK-w3^$
zR^F)JKjy8{dPyN)DNoU3Wz9<UrK6}Z#f$P3CRE*9jA`{9^4cTmb_pBG5LWGJ$$B;0
z$`IZV{KXW0Z1}Y1o}WtBphr48w=+(vYm|Pmn;8DZcme}%R@76if5b?08%gH=pO)=l
zD_YI#Q_Pijv&IxAtgg(Yf4aD-IXm+AXXmpdDJA!l#NEc~RmQsAoKJFg9Ga`|@Xblg
zMa{x#q=v8^KuOw8S@iN>*8L5y@^+p+$!cymfhqjUygRST=)=|uv}~GoYloYB*U*pB
zy?ggkTQSx5BQ6f}4t->^FWW1^v%42sJaZNJ`su5BNxhWToi@E@N0-uZp=$S6zDMZ{
zlai%Cy;jO+R+g{{9b%*Ho9H?v+TMudUdrEoe9=&0E79IV^fwP#vnMnXE&bx=(=e-S
z>G~J(9pwhDRJ_=e!&-^-RouuPM)%cJxrRfiS@heP_}C5muax|)l515B;*}hpeg^Kf
zvjH97MYsIdzbu{mF@!;|uOH?c1o-^W{``V=V~cGb!Vg5>1{KYl#PJ3Zyh-dXh}zFG
z9=EW?Sqaax<CS+<P|S<2r9E84KfWW!TTOd}EblKVtSIEKNPj!F+#SbY3s#`SR+<tj
zhSGZT7!%{jqXt)T-o+WVaXFQILM8G{)S*{Y$x}A;Oxa7xR6VM;RL3r|&X<$0eivF<
zyGO{|l)^<I@l@Y-|IZgD@tP4glKP>WmY>$qnFg{aJwHd>maj^OB3n@zqLnujd(DW`
zyBF%5IK~(we)CpCUMLmlNJTk?UPN~JVZ;!Ns$oGlY$`|d<r3{%>Q^g^&sU2Swa=1a
zqN`Sv)vC4v>CnEpZCyOLmSZC=A3F~#o=M&tD`crRR>az^K)M=md}#FQ^`+JXU5Pu(
zm*ZuFAkCguRc@~bXB}-l!$u(U>2f79Y(o)TBO_<;KHmNw#OcLQ3&MN2TcIbt{WIX!
zo*Ao3Fvs@BX(kro*V*xC$aHV6c0KqJcs0MmQ^H34=kuJ<^n7n%Of)h@;zK9pTo1o3
z`$lo9jZLi=>8rP(SDDfFl+~V=4lu1T91fdkGGB$2c|6pFN$fE!?sV(5xanR}F=nZl
zLCKtpphZjSE?FK6S)RC~U|FrL&kydSOjA|wa>|{`hNF7SAF0jsJVN;+rI@?=Y<VMp
zq!gZ>dcgd&_84KO9bZQ(`J=(AM9DH6>x7HwO+zbSYRXVAs;Mbe#W?Lc)?Rt+xCiV=
zDK(X~JsoN>tI^4U;;GtmX3@N>xBuxosjZyQl}cG>HtmK+cb;%DJM~-Q5FD6rqZFq`
z$Jt)Art(QDgYLY2Y5cSkt=aQSV4q;Nh3g;~>CpzJm3uPN^;9ASt0hoA31lErM`=h;
zmMg8^XwV~)tQ`5sLn?PK<~-x`Q!}TGUrn-;$@NP?`7XA+=WKrJCT%r|D48dnc`6?=
zZ=`J+$!gM(Tk>$o?3g*~Z8=5X{4r?PY#veRvWR=AZ=FY6B}#my8vH$s^lmDli0kPK
zW7~IUTzV_F#*cZtmpUjD={*C(ySW(N_f>*w?^IahP@&HPcd?j5ITY4O*v~{|h-K1e
zZ*10wcum|m#?rOXuE=5#Cbe|-&>##?U|v-TQ`oK#n&!${(n(ijINjiSnY{AMn-9Kr
z5^D`>+U>s-H(HFgP<tXLEb4)OY5T?FpYN;8um7UatEYf}zPn<)nW*x}TtFzSI6FIX
z33z)M@D56rdt~@7!E~R4>DFgA41`gxmz3p*dOBB~Z$7#_<RTRU%0+Y?N|dW)ME5dv
zd4ObZtTr;TRs3`I{4$H=%a(Lo3|p^QGc|7ZB(d5*GBwU*HWE`jd#5<lhV#O&oMcv5
z{`(H(_uw%))e9a5ewVAQY=Gs+RmF-;lj%B@pKAYmBiC8pb`M0sOe!9YZM}6BJJnzs
zu3lq*u((0+T4I|i;<27yQ4<?*2C8_Ni=&HbKEL#Sm%du}q#BNt<--bj*|+62s~lSJ
zEnf$u)-R<srM14bP%4`dO1At{5r#K`%wJ})mJ!T%iu%@}{jQIt17!m*Ov(wHt2`G)
z5Bs{y5sLJG2m)z8t!GChFRysGt|=7TQrog??6cbohb>xCWJ-55`DGu;oC00lRax-9
z^>cpM{3h%*PJUkGy>)P0y^=VZn3<VkW{8=YnVC6ehS+0fW{x4on3*YNW@cuJneE^B
z`|j@Dy}!F#^{U<<uj*CJOsjQtq(16agLFm`GCGTO|Fmq10=u$`v3w&<HjJ+V@KmUg
zZnJ7gsK0|e?7-12$rNJqs`m5=w#x#<jy7er%wq`NKXaMPMJveLncOO3m}VDD!KRCB
zvt86S)yMap+Urr$j}Sye6u4REKM_3S1ioV6kh_XfVhA%-eoe+$N*Qa&R2oq`@?8+m
z-5){>OX)^Q+6;XDtx?b%uXFi6=iG;(tb7@GhUiM~$~MP?1Jw+&sP92AybcD&dN%q~
zbh<}e`20FO!u_tzrk$$kU7hc&)}+tmZKd4zmvkTLnPWAN$6Z7^0)2j7Kp8|@rWX8O
zTY`KLCcl0%3j0#=ZHO(qT)CPTm!BjHZN-;-jexU(#r&cBJRvEjj;m)=&WzZk^zd)@
zKW2@*Js-f>3k4m%T%f>;axpXwed-HiI!^-_zPPl1#on>KxTIv@bslSpv~Tw%g1s1u
zPm{VBs!9vD>#I0w?c)Nm{u7*7q}Mne4W9PCo^4Gz#-|B6#<vvP;#nit!o;*wdVIWW
z0od?!nemRxJn>{L%j1hYy`qlXIoU2NMTkDVx(Sb2S-1YgJGi)~8^7Kr-5oqB6sena
z8VhaQrl)1L=K9Gv_}(8|pM>jtt4{{@ds?VP`}qy&gbsl^Bz7BI{wu~z$5uBjOBTCX
z<h+eJTWY_y3^&!7$H6;Xxf3g{93p*T_Oq$i&G+C1vUK>GFDcF6n?*c%L+^r@_(y`L
zS^}eAq47N@FNVK*{}@kerCyL0E-l=;B4)~x$IQ5I2f)r1<f+<5bDjR_LNi0rXHq<q
z7+0^SZ?)IR2?1PtEWy~Zm5E`?bTUKWW^TBGa~VFYw?WJ=_+zuwR8a_ClrRRJs_k7n
zo>MVsbl8+~YO{H5orEkr*A}oOm%~;I1&?_#l~s5Wr%$v`<CRBB*A|GU-yW@#Uwhak
z+@9u=pLB`J2EZsT#7I99$(y4$j8|F~MmRSB65Cb2YWG{9NRjiU#EhJhel9I?$Cgv?
z)G4T)qa9UgZ<gxOf0ef9Pgqvr2Tq3QsfR0lA6>gJNLY^7LoFDZKS5?7s?72nMUA3~
z1;{kHZ3mNrVpQ~I=5KN+wagWgL5dqZ`{!t97861LbdvnF$Vqdx`4Io(&TsL=aux}7
zt+)(URo&dF1f&_iLtwe{q0mRjOqhI4v*ldwB+zKcDI*ItsWnwQ;AO66zia=V#*{Z}
zpH1^FNKP;I6fh<{PlJh{nfr#CdJ&q45C6T$WU0^VVO=?`v7Lt7C%MsWV9v|`)m{UR
z*?yti&Zpw!v`O>F91G7p^N7k-X+4KNUodV~n}pA!GeOR^MU(h>$Jjbr0<%?Kudnxo
z%#xl#oOnL1b7P_?%}r=>y{&z;Q^Z<aZ_ZxEiTH_|O4z_pndaSP+Y}IO2HpJIwCUpC
zzI{7`L=`V&Nggc4!=Df{C<n-0LgBMj$wcO2x30-YtCyY<gg=*-)kC2V8S*&!TQ$$=
zv$Fr$DQIXEyMo^q+jjGtUZPbxKJVX%Xw4rv`mNL9mySL>i-^E^%m@|dFyq#elh?0#
zZ2&f`Xl%*(XvH~T3wkhgA{2gA@+x_PVzKthlJnz-2v>o0@$o<ls-<F~`-;)izNlGQ
zI$3kn7?oMh{0M?=ymkUb1H9av7o(SN$@sh^uFYMMoBDOpt~p9EzStae+55af8Zij?
zePoQDhdj>DNp_((>V4D6jXQj=0Lu%AH#~6Jn$$BY0nE6A5Zs|l#mw#*Wt*eCZsBhL
zR%$O7pI2w^woshxUOw7(J+j-q9g7;QojU)^t%9l2#vcA@)3&VSsg!<BFErvzwYYV>
zIB)JZtM@Y$=-sKp%0`c?K$9h@&L~c_p@S8MGHdXdPCH^H1p>fUOVGaddQruSMMK{g
z-H-@g+RW1OqAd`gp!<>iA2I$CwZ3%eC3joayH(8W)wjU`L3y#D7^-4{1aVdS(czCZ
zX9#4V;x$_kY>#!ZLQ>D&scz&helgq#d*pOp*S>Mmp8MdRi~2`lAqbN4VF9bulF1tV
zzaA{=e)M;vx#)V*UQ-_<FkfE}xz-=ETQ=y&qaW_%Kp9R=Um5-WPFN`35^1)_zMKyo
zKN`KTF>^|l)}H=ol@w_>htXED?pEBzuAds8R9gjsje~|Nz8(0KgY8n*ys(G%&iK_r
z^i$jP1@4zHM5y2)S`y<|txj1(woHgTPCF&GMceXmPL%B6HVPsRswRA@uaj=;-0A&}
z23=bA>|0pL9O_v`t<x3deEwI@E+Q|YDqpZ$rz>mux~>+GuNJm8k2;PmgsR1!GZpd~
zYp&BXz<-dY=b#B?gpJ|WDDyf#6*5q?D|m&;=Zmf>^ks#xdQnl8kaY**BNeKzkKTx`
z)I{z|wBf^rWj?~-i0nUM3-vU{U7nQT1Mt5%Bnl}|Goddr$PXsv8)O&63CT01qb?E2
zLtVi|B;$XrCR8IKjJrJwt~%z{N1q<FU5ug@5j#L$_16&_VPP&dEJcri9`zrY_@b-s
zH{bHYN5O#<+A=Vw654|t?VQ})anR_K@e?g<x^-&#fDxd1?K}2!#)mUM2>_|>{tOLU
z2^qMVWr&1}+iuAqoo<7fUtFc<H1O4fQFm`yC*5SvvdpxX>N*N+d6uoOx=gDsI$Nfu
zYDFySX)dNy<sA?A-p<;B8L2quTfy=wfAmj~Xk8u8Yx%fx;c$)S<n2^aJkyE!Xj@HX
z7!YpBXQV9~I{Ym)_@yT*MEY+R)XN?tvQFsbMRSoB*RFYZMx4EadN!DBN?zu&*Wu$~
z?9c7W(VWj;wD+U?9AT^t7)tUc@CIyDdxs(-jFxsYU*8z$mf})U>R-v1!>43+zhUEi
zE=q908t~v#D<Cl8t!^qORWR3`Gg2kz007if;?0X?I+W{%<prSR5Lv$_Oy5&9*!GmG
zXyqyNNvd+(;gCQ-5>P8qB<^}{SddWh;OxesW|1OjLEUKK#V?oo&AOR3bsIOT+d#4~
z+9QJr<;i1YmC>TyMng#VS>b)xRtgxL%8qp>2^P%RSLITh7|)=C7iMZhGNM$vY-^Da
zB|_JOV#F$zpjQ%9iqpy8rbHhyg(L#v+=G(TI)Ude`-Sl2rFd$QDygR+3|;k$<P)?!
z!d+WSTJ-*0qMPg8wt~p*vJp9oFhx{iJ;e368F_*>UQ>Fc4GcA5=Z_f>F$aWO!ub!_
zP%VU!F4C@Z`#GTKCvP+NaDf`Y|B?kR`Xk+EcN`G*4vB_l!Z<i08yyJ!&;f3t_t!6?
zP}><p+=%-R@lAFX%WOYi#QfxMsaHISM)`i>46v>v_G+q{i$T9^HBRgt<v^FhFR577
zd7S$gsFPg~6TO?Y!bK)145NkepQKFXhI_n3h{vjJI;B|Mlr4LEp}kN-l-bIJsXLIc
z>z%nrNn3~}iTYX+=(kp;2AZ`_XHFkYDesO_N~f<0(05pL<=Fw!JBXsNH422^Uy9Px
zkYkB%Bq3JV3WnT|W}Nv%8}OC#DIB{Qp@(=}<dT`}Ya}w*gfi*pu=j_RM^Bt6`7L#i
zKNr8iO%pisNyqQ72tFd7AZTD(tGPV8+=VZrUH9WT?MdQHtm5*zyM7|m`_dWrzBitC
z%GlVv5=4K&iJG1Y##0a+wCTd7noL~L^=H!>Q9M<~RjY@=GAv_>J;QZ)N7$R(Y)pzW
z!OW>q>HG3#07L=ko7jo5uDGK#1MN-i*XS*kh993da9n)F!dAK5|A0NraS_<qHn#PQ
z!H(g8P$+=CKTs|D*yfN$7%qYbf2m{h#6Ji>02x9y=l-;v8i-%RU8C@-%~dyEG|*y^
z**GYJPwRJ@8ob60?Co^k&cyY~6n&)7@yNB~<E;Um;jk^SGit@}8$km3B<f}zcB*i4
z7W7A~&CoOVxe5+;dnqao!AW^{9n!$VW;4Ikorp+I=G?G{v$L`Lr|P-pCa3xs<bF#O
zLJPbp?EYmi|Ae<olD8;2noZ_(kVe^|x?{J)H&2G-@D{yoY&bvu{xJUwlm$*HOz1Pg
zu3?79pAGO_jOYxh@q*kG!s)Jidko_r3|I(hk>l6RD{aATh9t~(^~s`LX24x3@WFyg
z-wEdJcR|#zd1(Ff1azqGJM&@Enm}q#{eIIv6Mh|egC4Dk0ZDk$M4Jekn!pGrts==s
zYhft)qU7wGoOZpnK8FP-IlJPBgD1hmSzK(}%&J^WoFb>oT1id0yQ;fQb?B{9{5mh&
zwpb1WmpR?NoX4W8$MQ#laOdDFSDX4D(vorvp2CtTyG7kF_x$K!!3~pv+xM$q&bi}S
zDclX?)ir-eWdpL~T^=|s{Yda@3Pq;1&@{H=Pdlhxx~KcGDu^}v`_rkhPvW(E;p&ET
zGIcQNdVbBGDm^%_bJ(f;=n}Pw6dP);Fh5T)pU>inHB`zhje?~ZCWE%6Zd?P$;nTD_
zJ7c<hMX_nnw@pcG{E0x;vj|~NBW^tu>=(4D=I7qtWk%Mr8a5~<CKm3av=<dGZ%wj6
zd%i<Qg3J>aTNzt_2sfBiF*GC))YN$);EhO-moVXh^`J}7I{#_Jv~*a2O}6kv#k=J7
zV(xsMnu`q|y2?MScJrW)ldIUV*IaFvMtjeSd>}+q1-$vQIpI)9GeU0J{vefyV~6RI
zkAcncqh{@1yt(cbjU`9K@MTZyB8&`m*&hCeA7#8G@xY>DLu1imX;p)U(W`d@sUl^L
zKw+{**I9kfF{_rTivqpWrGIpXM`=o)yPyR7XVQDu_U$zzZ+t1ty4V;<(hdJck*L9_
z<7G700Zc?1&RP|c&lzJVS-*m^Lai7&=LdLFryzoDBw<htJ**FGL)OnP=kCPTh7sD8
zzXCv2_;t2}#lz7h)^Xb(51N9Av7E93uD5OFfIA8#&9?KOXWq6nw#_x_*^>9C7dPo(
zEKHc(=$^v?_`J8f)4N($;Tr8lnotE-n7&QSR39JLJ#mGPFdiF6M5%P^?d%@xB`T~r
zUDNfD4+MxcOa|YZ&3Nr#*9B1O%fZaa!@Q2vo>QfccLj^4%(lo|PEaLXOWkV`+epzw
zTgXbz%6jgZDX5Chf|ZzU6{rrkLD_yPF>-|ZHWM-Lnit~o=JtjglYK2fQ6AvF0<qEP
zR0CHbb#-r&l9{;7rrPm%T}@3?-PB!s8uEwMlbB~j4`!mKsGB(uSK^oqn(wm4z9B~;
zV-uCxDn*5stU~mOwHFrW{Mnij5uQ19iaR3x{!eNQ0(M#609)F7GUn*7v9RkM+<Czm
zShGc>K1$bEIQ-NENYH9;Zp&|5C_&^5nXI+;VPX;EC2cY}_K)i%VyW_1ue2(*Yqj*Y
zYs`CAcc;3%uY+98Y7?{Bwk8)BdORmx?e1nzlet@9`z~+@iBqqAZv18J>&!CQq9!Tg
z2)83zsS_)-GCzYwnACgQ^z;p>=Ro5*iRQPw(Dz^B*yi`((e}5JSm!lgxf5bGVFm{3
zevPyQz#Pl9JNp@}p;s{nxz~5qNrQVFX1lc%Ny~KZw)MLOd8eeHIKATS8Ay$$3CcE?
zd`ps%%YGKqbXS^^?pUxWhT5@nx3v<++R%EYg)juH-D$mc1%gzaN_<Mzh5lXh1(=NZ
z<Q&)0Q3Ejh@+d08@Rp45+fyO2Yqe8y8&dXuN#Ht`_g1S~Gs<@HN|mlPTjmSm<9g|-
zg(_#ymZkY|Bi1ZT`18yY$Ux0~-`q;wh?FuboKrkhb4mR1rltF`MO-O*cw(RcRnTws
zyHgcUV)t#rl2=~l?<`_cqT2H=0`w(_qoV{?amsX>x!9S*W7h3sy{b?aI2mVJ%n^1B
z&|u&1euUO@>o2G|t#BziyS^+m6mDzdno&CjNk|&wL{4>!$|=7%fLl$eO(CvJl|;*v
zJKRWtvP!C=;74kIH;ZX4pMC2*zQ(P6{8bL#_<e^c>ZUAlox7-Is8BQG=M<PTn|fxC
zEGqj<4@L*sNlwl9YQUrl#W>(vIx|izQ*mDOF}r!D&=v)*lcN)j`UsQS)L#gtc1(El
z^T>v#QzK8czEwHg5Tw_4wN)47c+CB8E7;e8X5`;pZQM@gM^a<$`rOPzbc0E25Z^TN
z+r&Y7;+=}^Ix2&Vy*bsJP`zs6WfHEH$LC*J=S`i=n-;>?le=b7vacQOJ*VxMxbgQ@
zbQ23(hc}$vod~nz?clZ5bFJ(iq-HzpZHUn0+iL4_Zb9Jh2cc?g5K35T5U9-~?@bOw
z@ij3`IS7;(`WY2*Mz!d(UM|tBRfoSfb6w{-lV~&IWoK=tWEU8-7jWhyI1^9YaHv6l
zL)vZ9O_sk<ktB9ON<<~Akw(`xG7^;&@mHE79n&6|lT5Y&IZT>|A^s)_5hQdNzpk^j
zE`iFE#@El>3~~KUx)@o%MWe!YQriEmOyc4|m~-gl_@NTBfHS#t*fN=603?YpsYHVM
z+UiWj81X8HT<BwRdU|3#Tv{^av89CQ>vK)!!rW1IKd}=f_;dnxWAK2CHHR6w9CP(*
z+qkvvk0-@)t{3D~&2-CSO-{AyH~@xS+ADdv0*%$90sgj@#D3jrY&CnfMp_FBw@Hr2
z!vW*+A){#l8!G<t6P%7w%y^w{o>kW>0z1Sp`--(iFxlLpHV?YnE@A0y;VW{a$P<O`
z!qQ;9u2Ai`Q0<mP)?nb^mpPK9*1+#;!QVAezDJ0j=1VDxW>*|pUc2!s4@f!IDk)Ny
z|Ck_&_O5>{yWR~+6<Hd0E(n8(lQSTuA;+}Rl~uQ9Wc5Dx-G?z-IzsM`rD1y|8|rFH
zvdHzhb>hcW-1{m!h>0#AbwN{<XaZJ1&oH9<TDNG(emnP7KZST&U5>7yL6+O1t`fqB
z_-6*hYg@N(T}JZra<9W&iN1l+YE}rlXJwOIGp88Tt-)pNp;3i=)EL%W>1n)^7WRn1
z1QXZjL7e$cd9wOa!<X-M)X`$5ycjTQA(?QO!z_P(jFviEp}+!4&9EOd!H(!^mms@l
zd2x$PkW&vuYGpD`qsC4(ZjXr=c1Dtl3Ru_9jF%(ApzcdrnuOr_7Mau^uS^i>d~oWJ
z;v8K7sGw{)#?us0{IXB65>Jx331q108p%l5$W!pS_i_PCcQeN(j}&!<H%rK!6}l`_
zQz4wqL;SIE4o6PR5Qwdo-oDdY!mm}O`w-5>jz(X6iv)BXRf^Dkesh>h1nZ!CxZ#&J
zMlk39<Xtb!g0wjrb2eR-*jS;#KKeu8GGnUViOUwH!nA#HS<P<ht)I@Gga|_))7y+x
zB0W(E=)8Yv`oVOevcglao}HmACh4w<VTU8^?pxuk_Um~tZZ+xH8H-{5^2l%+JF>Jt
zYeQNYc%EH{XM&LVoRQQ}$VI5r^2OSv`@P;qtMv0dZCZw#%-4H{vtN;G&xV6B<pXEq
zQ=S*%!X<519=m_em<;D!fir0r4s!EB<2v?`^RLTIA{pFdTDqH}Ch4tY3A;+&vlpel
z)37UrSsT#3G&?ZU)5V0?b3%=o3*6q`lXbS5s|YN&&!InCMUv#Ot2aVW+qdX7Ri?(Q
zqRhzlF*2YEb=$ALX^7iaUl?%3E)abOtG*#_8Wi}kap~8rtN2YNb<F=P%RdSPVzHw3
z<k)bA?N2@N{8e@gl{Q_il4y<AtzeZ0%*hgZ`jo1_Y}$8TN@6x28cF{En1Z1O4azv9
z9SZ|Agd<bl3ruM|<iVo|udFcOp)SN$44+}9BXC?A_X6{A0NZW$ZYS!YXYJo8rPeI|
z{E9z4NtEQ##-nJkyRW;Q;2(k2oWs8fs?J)NYc+5C*2>wbt?T>lwZYGjUP<u40{CQF
zWT6pk<jIJ}73_Z!qk)X6T+~}HT@6SCK#N}7#snMd0Sehx<je8Uf_k`O=VYY5*gAK@
z+<A*4r198?xaa3c(x=XrV%S&u<3GJp$9KLiW~|$6w61?&WyLS1vg^_Agq2^mm<8N6
z!QZz1d|f=I(hHuMV^G(;N|x#AFn3?L=r4Ai4zHK)I^wYQ*#PdtFVK#}SPq9M-s?f|
zN8ehR9l67c^Te}olM{Pdu*!hj%|CPoRJ#jdCyHhsLCw^Xrk!bM-7jC~$b^mAyCa<+
z^5W5QT`3<Yk6Ll5L~6vM&WqH#w6@G-(1psYvyYAbka<xHw*pBQ0d~VY?f1#v%|&J+
zw7sg2lj5sjy|wE^&3!=~A2(a)clU77M_z2q80&BApWLr6nId>WuzBmYj)_v(-w~T{
zgmzizO*&luR7lI`%b&m;teifQRmoSefx15Mqo^-L^OvXoJo0FX^Stk)3kDEzyfI+o
z#dMbApmZxy!yaPDTB&XKx_{PRF`uf21FmTq;7U1?puw6F58;3@T<|&2b;~C?8!)$^
zq<-d~;J4^_Rp-<{$m`!r%NEa>Eoep#b%|#NT;E+Ka0TT*UT``5*(Y?FeX{pGzAiI=
zRJxmtkJKaE%hwvtb*#A1LS&$NobQu%#9Y~a=;~`|+t;R$pVB=IOj#gVVb06isF{)N
zp=9Uuxqd~-qBp}?b6s!o5SK5<y?1slSnE?`B1Nw1EM2%ww8-BmniEr5UQ6&y=k8P!
zUda+^!slx(YTdGoYvtc^<kwrjra!*eUgM)NbT0K=O-Om;n%ua*mdl~gZ;eO+g~vZX
zYUAtDdEc$OrjSXG!Nlso9%jeQ<l*^cOL}~PR5JdxvrXHbXZ5VBHK+yTx93D}D~C=N
zD{XeL8>M1DThMd2<N|O7YtH^6!rH{7^x*&)o?{GQG7q+g7P9F})}WOo@G6#gbK}>y
z$J6SX%%Eji`eojT>`o+xy*=B0EMX~8ZgL5{=&3ZaiaDhzCxU(iBg$loXY!n#(o>hm
z5>@Cjh2Z?7E#Fe4(i1mIYk_vH(m;Kj!x3#$MNuEw{;^L*^^;`Niw-2d0*$Dev#1x1
z-#fLa^1|HXe$K0{sBotHdfB$jZM*1L_Fgf@D%Y$V9%x_5867!(*B<Y+A7!d!QfW2C
zg?$2(PJ8r#*~F_4zvmCd6St#p{Rg;b!ycsWJ5u)=I+tHpPXaD54vn`Ybb^F_et|jK
zKRvGYueq#CQb8rFBb97F3!HuG!!E3wPcbn}7XSG>Z^!MTKBl8yF3ZDx-{b|*a2Ad}
z#Wof8OM_#lftq&tf&r(1m$oC(<~3Ej&JJPA)b*7Vl)cqJewDV})*_ZPb{Kh7HByxf
z^K8?UKF*C1%#Q_X_;IQUJdP&-l@<0*dFWP2L$kHWOxtyZ%^;wb164WHdJCag|1OOw
zsvk55(CZq{Vp!>Lb?rz{bguH0Y?%<iLE^bgzQDCcJ_?i9l%&YYOhvbfE?q*m#-ThU
zDRMmIMu$w*Nxs*@@QzZk;wgLp_d6HTdg@fW6tz~Jl6;kwp2DVu!UlAbK7oWS<GgL`
z%v@o>43exBFE34$P!naTlHKNvT;*Jmnj5%oGSz`|3?>~$V4T#EHI0gqYH9vnYkdEm
z0Z&z)^4qojc~Z!6=uwskSJ$X3UtL|#qRiE0`=>7|jPk>mLq>OIkqTWY1n8?}UEj1(
zB`BIWPChNqpG-lsRd9FV=vX3CUDQkrTZN&;m*du&aHI%;CrO7FDH9ibnG}`uVi#y|
zl08fzwmnD2SwYpbwBasLgWM*?O?HA;z^G%}DAXwv;w$)T!kNU|Y%k;(ba{qLZbsnN
zEK!gB<xTmJ(#=<d*CJ%YhuD!e8$r?(cY9=l^r{F#d^x^}dU+_vTW@d2)*3@Xua*8S
zjcjIM_KTl39xpyjhMr0J?`ykS%UQ&u<%_M$2?^22QjAj6#rCmt+W>5WlxFQ{`ib%+
zgz&-{2Ex-E^ZuOP+<>#X0`H0FP3PIBf#G!qY`LtV^~z;6x6qsFZEvX}yI={Ti0AkM
z{4l$9NRc&!(N?;iarW42XSzZk!JD2hM@;F(H!AeiQ9a4^<Jw~CGw9kfNtK0E+5<n4
z4<-|8W$+*K3tMdPaJ*t!SCmfzqHRm{9_Plw5i=*zn_mn}M9Xy-(NlZpug75O;h7yF
zvA1!FDn~d`(kpNH7V<oO#~1SXDm#P#S@ve5Pfw-93TvtfkCI|(0usD*Cla{;-2who
z0WDs-NRrSY9qr&WfzVdL5*dKal9|w!92css>b*`rtYk=w0d&md7lJ!gbJCEV24=kS
zZz$a;G-^Da@O|Gf_()!zE^nM(9}2O~E3&ptIdoqglg8C*U*3YpjW>#dJ<)rQix~?k
zw@-iVR+cW$R?-|Xmo4NYXya>C)^DVvW_rU1x)j*vv943e{T74Y$A4Qmk&3%4f@Jvm
ztYWrOo$_q(<50(n@TDhMB_qSVh1`x6zAc%+sj!9~^Bl1+m`b^y+@CqSStSWBvRb3L
zmR%2FbbE>1+VhJA1D-m6QOr{9P0g^v3IQ+iy-on2;jZL{XAF&oRE;~722<TOw*qVB
zEWpLLu_4QM-lXEOk0?523f!%IL)qyzT1Hupg<{&hx9p`ti(X~UZOL_oU;`ElWa6x`
zzh)tM_{SMU%)-#0%v$lHTJd6bxOaD$I`=o8?)c5Q5bJjHijCJ=j!>x*AA0m~&Dt5_
zAKwv+ZayVUC|02=O*yWgN&ld$Jm^5BN<Jhpm`u4L98Bs=foWWXQDplX0(dTZCZ?p>
zv(|vF*U4)2LQ<if+~P(){MqE}*ay+3y6eICO~0nW&z=n`PbEqHcDY0T_PF-GtjI8_
z+<Y_Ia}O=2!9nsuH7L)lzCDz8APUpxg%Q0d+l~6c@#&z{1bpngXrjKR_DX*=r_pI_
z+8N7B$5g)5;%MV4Y}iYaB3wuh4b#c8;Z|!xNqaD(#10~6OS>r@S}5omlOx5iTW?>^
zhcaK`UMYkG0=}6$E9Mt$mWbLy&0)}m(9P4qv=i-KQo5afb)vfs4#+5{m>QidmO~F@
zeOQ)Fy|tBM(3z)7kY3Y<4z72>VVJrNhq>QAjzi6#LTU%5he9qxW1mgVo|rSOsgz-^
zO`TM}U#fl!C6guNvA@P+6LF$WcG)=kbQ_cpKn`4?KulI+CVqcXqnM<gE|&F^;zVz;
z$eYxyH5*u2(F2WRW`}1S<5ik+hCs2XCP&5*>;W-tW$+#vK6Xv5u`D$$22WAs$Q~!R
zc@WK%ZC9Z$afUdo3Z9PtzRwwNM-x^``DBhiHhDbt;-WrdYwS4)T%4LWi$+t&nA<e3
zf85h2|HZ%RP*Nh{1|i~|CB_>((x}gkPQJrdf&s(deC6!XCa9bvC(lPIJzI^fq?{k!
zFDF+>jqpnzvj(&nr)ejY!g9E1swG!ZS1YP;Cf0^WQF*#xZBYpMNu`v~UJWu2o!3#3
zlX*!ga+-u!jiu-Jsgv3CWYjVJ&piyc%|)h|*QnBSTII9bvZbH%{J}-GFzrW3=W$SG
z^q-Z_xa|P?wfFf{Vak~JEt=x8daBD$N%;q-T-JzwZ>Gpz+4wafzsV|w&Xha!0ygkn
ze-jX{sJ5Y27TdL^pPSny0&LEDmHHgh?)GiRQwXO9#vXAJGpZi-;$%Qs=M}k4WyPhM
zi7t@-2zS&0q}PZHesvCI+;!m-x~SX<U9Z>Ej_m7kQ)bn<r;mbroLFc|JU+m}zS500
zjQ@#Sb`4GhSKP?!LqG3*TZz^APB6sw;@nJ6IZ3jFkwvd=rkh)k;p)*l94s-?cqGF$
zl4y0m1`_5JVG1Yd7UuHmtr~rOI;3!QdTbt<^<`ZKA|Y9w!W6hyRfVvG88r%~E)~9x
z|DfgUsF>IS=8s$CBum6q&)hG8kP2RtUS0_u_Xd6YwL`(uuzHzgFO!QC>OIo6I%lf@
z;^W^M_I%lInzh&XKH1Zqh!&-c<p%tYLMUOn`6Rg#wit51wikO(+?EvW%#K5Z?#gpW
za2!s@e2-1=Hdq?I2Vc;7GUnH$k6+yS3;h~iY8CPf5JQia@-S^uWoz#s<u1$3F!?RY
z&fDqD^&97h$gD?=D}LH)B0ZO>b`&N8wem84O3SOby!!M^8bHz;QdTUl!)aL-%Xv2J
zs-bz-Hc+#rimvZME`#m<$5wmPpJVM($2ZXlwqL84J9g&*+uyT|=KJYO)-J^6Bq(+D
zQO|ZkkLSV501T}*#a|~~8_OkbE2NozxoYD`ZLHX8Hm7o&B%^CG)MDAU(%KYbtN73m
z%In@XhN~#S3ZL7UO4kH$L<|FFY7eeOTu<EdpEPBp-_@k|8D<_FOnx>7wI)EklGE*-
zlYwBGV1c>1vng24=J#h-&C^I$`5*=inr}2p2$3}i@6Bu8%~%{_nrHlRVN`u05ZgY>
zD!BoV%+IqbEtxw3^qn3jmm)yu2;$q%1y*2^Ovis#Cp&a|MLIfs=DcKY_sf8LfT(B{
z=Mm{2xDL`e(`L_oZVkJCZhd0c6It*|h^~At(Wh4Ke9dt7dS1YiEFSsI&60L9@_Thn
zW0JOSuZ6ThR1g+Dg~q9kA9}gUy?6LcjQDu3Mz+Bz?cR1FeNPB*G+xLMtjeWxW*?jw
zXese?&btcEWdvI<8eKY^TrHd)pg7nOe>^Es$wvhE9vP544^eqbl373X?EVI(;BIgv
z4fiZ>piU*)snjN6;{00edM`SLtV^Fv;u-C#-&;#FeAId&96vHwJHq?xY4`AP&;(P(
z^UKvkTgzZx3y-T{y&G}NkSGD`E@Sgx9`1J;`sey!9`cme<v5;}Z#?d<$PJLhnWBkp
z$<m@fTA+tA^V#%qnP;6x#I>S(Rr02XqT3MDql8&j_J7Q)FNqSb-AdOy3Et$CSVSM>
zHXDucg4*h#X~_D4H`6xKzE72g{}C)X*>qnJXRnvu^VHK#wEH#8&atkWSXl@d5`Bay
z{p+_JFw}R3S9V2)+32)6+<3+SI^wQ_!d70wVJ)W@&uST-G1STnb8)RPSf<|~Q=Nqn
z;gIrLQ|1a>#>n%FR_WD)l!+5^CoQgTT#yZ1Fba{Z{)ONiVsKDR_Gi!Q7w#u++=vO)
z^f?y2(ZmYOP_K>Qq$P>U^k^rd)Mb{!TFu@B7WP5BLo5rmX33wRc3$UyBscJw#79Jv
z_QZD0Zpmy9IW22W5OtL|8ZDblgWcu~PDFesI?ndyUZ@_^L~0_w^}c9LBqdfq0Jpqg
zfcmk%6uaMJMj9k|2o1Up%G7YV<Qfn|2I2LffK-@EXt%>3_vzqD`rUoRM>@MK)aY&I
z_0MGi*%M>>eGBKs))(z{$cib*==Wt9G~OX2*N4rQKba4+DYdhp5Bb&5`WN3iUPBHB
z$a)Ld68FX{==650qe`#>66I^0u`7#AodY+xbE!qXqAPj6_o1{XQBJe-10H~HB*y6K
zzCS)qyyGqh6GF{M%uYY-F%<a3^l6X1nOFJ@$$hQdvF?olAyqA2W_-FCmu*NjrHHQT
zQ!?vL_Zvno`*vn4V4UpWm0aCV3%*xk6Zsy0G|Fr{%8W7LbVzxZ()nX`Z0$?!^p~Bh
zckGG$Lfn+1e|%z^@eN@=+t{>~6Bb<MmiRy^rl=n^<|OVe<tk_=T9<CysP?0S>3XL-
zXcnGK)AP#e;+DuB%|&O5c%An2(Y|t;hP4M4jv5YMF*qimb>cVDu6Ya1qo7cyp3d_&
z1P3Htmq~d?ZgVvN(2X~J(dWnaIiM^U1j}pDrp$NL$Ci(}e@dDX3Bt<h9%bV#+%D0N
zxH0-EomhHV=UFP55|@3A%a7MYT;_FIO@pIxt`Va}56502C1{>$T&vMHU4DK0(QylC
zB+|)$#yo&34jcQ`IZeEOX^`3cIz90_O?o5vd=wc1(_BslI0wCd$D#X!6WNb`{6{Jm
zruldR^ZgZ;h+h-qOSJ%n?U0U^q?Y6O)0>h!_dW^NbtLZfVg3yJPQORL)(JNIJTt+_
zZl}!;d6OB;J51T8YJ%0;{rju|q0h<g>jGNgbO#$YL$#_u8>pAm`c{V0L|b!kC0pYL
z8;9CcU|usZ9{I}JPpQ1tl$#E`r{|~&n7jAmvR`O-;x_B#J<_kH2ssWDvdw=$*B2F0
z43Y7oQY8#WKk%S@OI+q{s6ha!SIUf}VWjq4l)MLnDdbveQqy$g^lAh=Rn)*rc@j#Z
z>OC$3Fd@SAeq(=2o4xcFE@k=Ro%U=uz#og3{Z_P1?f@9l)r5<QRoIu4m)<ACXS!Bs
z9yqFWP<gZ>XO~nwReUPt*>k6DM)BTKzTtz(E<qVa7iJaKLsfPGrB0p;7bqz&qpDIZ
zPSnGWT`u`HJTZ&577pO@0MK|yc_hzpVYv5%=G}Cn!|89G;t?eYhoF+eV91PdgRds>
z4=B|twCOxL(V95LF!Luj*=F9lKH4clX^|PKutG_xpyEb4jeEC2o-l*?C{cP{P<s+4
zOMv%;qu8NW&xSts)fAt&T7sVw%hwVYgjX1?&CEhI_GP8@JuGV0r)~Oa6cDEj3d5%f
z^5fVtinp_W5*u^G#u78pR7J2ARoSg-7;!gMv#O_sDawPPg(NQ4*~q~p&|+v&GD`-e
zku-)=I%=4ic56{N<!KO6k^4U@QOkPNDGD`^QeWw!6ig;g>LH`0P%Ck(K4sbB!jAh_
zuuN#yBRU3#uqDyNIl2XkF8I?l678KsmFmOFZ9LX*(llRt|G;7A)Itm2Oxg|OrC=Cu
zPcOB{RaeK_BUAFgA@iX)BgOY7(?ppZ6}M2IgBtl(K{|1+9esu_d&38=RZQ9%PY<6!
z=_B);1%^4VrJI)Ik*W&Z5vTwyg4Op`q%5UjIPtk6@w1xqhT1+izW4s~D14OVQszE_
zwdr+|jC;`q!<LQOEgjk{O)U&NWYDr!v8~^#o}`+WBg&dudP|d>Q_=YJ>Z-J(Wmxa5
zaPrC<-FWF8u0ruG9{UCGh*M%tm(*=G-Bioq=i&VV;maDlSgRbjaOIOD8&y~%qSj2V
zQCsdN*0TK!n%Li7wku|szRFC(OtqO$<JN8tttp<F>PAk>9hJxS<Cj@mb0lj_gySsz
zDKREifbyh!t>V)px?^;yD^Gf^1+Uhw{-iH?@h*M@srpaVD#2hZxzu;mdlxSCzlJ-6
z9zF9mUuUV8ZLi15PxdobkFl<UWwm|Q*jcL;1p7<}$V*+IjL=pat#PbhC+A%t<#dNr
zoo$w%th$bJTO@qYue>rEGFi3~%laH9n<VRHELI$F$K^_rO)SjJGThPb=fY-2`gc0f
z&9h0xM)$Y4+}>IoxZKWH8+LB;jWG}8PFI`H2baQ}4l2nG3^H5g(j5^7n@^hP4!E7q
zpVrfq-%%*sh<o@fpYG$DQH$03zU})w#z59~9)d)(e$IC39HdiPLnsq3+PXEd+O|>e
zKZ9v0ok3_<%M9lwuM6r_1&!1@OukpzP;DI_5u2&-NJ(&t652g$y;#vyqr2|U491TF
zlQLUxsV%d>Y3D|NCen?*io$hIXTvJas?)aG()4(Ec;5_(nQ?FMmSd_Al}wqESx1xL
zA<25b73RZ7o(<p9I~k3a<RE}~x2h^~d{!VTK)#~yKn#i(A6g*Vo2p~6_zuWi0b%U<
ziIgyduHgq(xn0Z@RNPeFV^d|qt&seqckKrS#Syz<ctfr|97hNZ2#QZ^5mjfFo|Hgo
z{$_?;5eJmk`k;dfOvEH32A3WG+|*n(qtc+=D=|Zgp<S0=$~}M;Pn*=Yc)$2p)|?Bj
zRYrJ=rfzc*=CeC>VXioR^)%1T?s0UyzYd~`U<G6;;evTF@cWAp`=r*V9jCr`4`-c~
zot|C!S-_BPmCdzhH(0R>7#hlsrNKuBg6@O@h;&H4=ftJNv6N({xgAYs)*jR-KFPW!
z9NG5-{Mb*`Fefse;X4+MMDgOYF+pfk%H84dpX!-%q|cRi^tLZIS37>9<tSR8!3|G}
z2zL-M`9iW1J$WcKNKrwAEhQ%s*A?uL=QPc+E|w{tJ;W31)_S-x#grDL>Di^0&P!IS
zj(gYW%29z9qy;BLhg->{yRH^@$m?r(ElJN;kDVojZOYyT136rZs0X6%=};O)H(cYi
zOpLv9PmJx+Rne!>=tl3Z%LOK?xveE%FB9759D0UB7ZIeDV=GkXpvj`(VO1Dkmh2=L
zFTIKd3n}GP?$5BIs(&ZdN%;H_#oJeJC~RK1dlYu-!5ATG%=A)bMe*L0HKMQ<t6A_{
zrX`!*tq~~$Apd}^!<14rZUlP*Et(lwQ(sr(Plu*mWgA*xuf8h9nGs%Y!H#51fyP{M
zs#xLU6cGIVXN)7~4_uKlQol3Rf$xIT`JABzM$Oj>6(>%L6({#OItvGtIx0p?u?Br*
z=i?rinMp7_?(<Yzwb|f1qX9ArzotblM+w^DL&kXf)a=bzy&7Pj+fJD=xMU@JEiRP8
zM^W2dVIX;J_7FUhFQt8T23}eULc?!ED-tBvCXWNy^UTscnAn5&Im-<H4E{D)ti`-L
z{V8`IX*tCOa{>Rusc(CD9K;77jn#@SBn`Fdpp<G}AAz;iV2Vt!Y^d(+I9WOj>{du>
z0Ec8P_ovyJ_)K5<0Zk&i(hjN^(jT??I&WofLdlZR>-MU3!nQbi<I59k^_2P)#J%aM
z`)(gAPSg=9)Pb&`1|8M0Xvei9Zgd)BnjIdqLl$BsCe4C=e?%W|Ukq9=r+WyQpPxRa
z?kL+u6lXeRZ_knj?DMK&ezU%nf>r4EO;HS0@BAW?ZOR>Wa&+Rn{4*QU)+-SjwT;Wm
ziF|97U$I*-vXVd*9LNKm0WMX-)8J=v)f4j0kUo|YQ_D+ry&XZD_T{bR^=Trb)B6<_
zKKk62f5Yc}<-H_81S$|hJo~FesK7V-8{`0MWGMJ(?;b)6JVG6i?mdVdl>Y(76X`H@
z?-A_$TL|u-e+s>+1Q&fy{vOywh=haZ)lEnQ^0RA79Ii%`kcHzZs<8@<j<z6Z&G1G!
z&|B(oV=JiOn;{k5DMDU0@r|<3JHif=c+)6!N~fXDK2!?l))jl?2;LSGh0a^BLmG+B
zeju#5Asij>bq8`Rui#1Zz&9|4$e%%sT8m(L6G%^yli3D3vG~R&1LTX*L`XQRTmENQ
z!1nR!|A`8_M#kZ17;pyzpF9M%!5G*PZcYD0AEmY7b;7%**4BIo>Hl3P?Ep;uTzA3G
z20OR$vHd1D582`33ngfjr-!-Al0GU!1OCAeShqOif1vuBIhC}BG@aX(z%E%;ggfy*
z-^w5SB0cn_wdYME{G~earJDORrr`erSv~oKA(Ftiy?Xd$!U3s?fK=|&|37A72~^X5
zMx%icZxujOg8N`HDg^Y$5aBb5rTdU)J1o*Mq<iM=H{N}|zvL^G;cq$W_+ar?_ZDo;
z5cblpr*#7Gu5<e?whu``!2Acg^<Z54=cxLK;sU~n|8a-^-S1_W914Eln+fEjl@GW?
zjD$1#Y64c{6oyRydCM`t0UGJw!yzPk>@CL_hkpw}1YPMFe-~f@s|tk$61plM{4p;7
ziS{>r4TL4pA;-cDwl@qM0o&<DqK~vsGtj~64nn3!jf9=X6nkRodh!?e_Vu2QkzPU{
zYL^7!FY3Ic|M#n*(oYcfR1i)ZDBFMEP4qFI{)uj>V+>v~3|_s5)A4_h*4T?Ctd$n8
zwr*H1LdcT(oSbSTK?D8I)?^rOlEA(8)L+oSgaVrFpsA9{e2w}H&b5WYfkS=NOZx98
zZ2U{}0MSU;ZEvyF&Vg0&ziPw6?8O9m3>9qnn}{8MYezA*M}ifTziJC}5M)ae_y>D|
zr0|W>rUl@g+5#WC^BsQQAmgt(ph*9w49X<pul{(1UHP44b0_V&sbByqS+z8?=~vqZ
zud`v+`5O`epDh+CgO_X)2DCTN*WEcl5}A+yyf+VbH1E?TB_Y9@0RecZ_~%QBsw#M$
zC9_Tz0&uoiunb;!0*;ZD-?uh*uz<};Ls01IC1sHEufjaTq2l+cXxQ6|VmvJaJi|ya
zOxshu`-;KtiO4a8gX3qAO=tn%X+Jp#0x2B;LBa$+)<4mgAe+r$w&wy~d&(+5(#2DY
zKfg8CAu8-@^*2F75O5_z$Ffkq1;Jg0BFB)YqWuGk|97ad9_wXnvO&HW`5{5Zc#*I*
zc3@Y3+m3xkgnsa)&tk3y{%4<n%;f3sY9izoApF8Oh~YPBK(HgAGtKTR&6qmC@i%3W
zpnntkA83#MB1DCCt^P7-2neo(|5z3T;*UL>%_p{Ja6bFPD!;(RQ>sf}BX)sy@b7A!
zMiVd}wYFXgvLTt(LT;DafMPZ-RU%B`X*ML%uHZLV1DSlyfh>(*WBs2bIuO4MgLs(=
z>OZ3UfPdUhA;dd|B2aDgH3pTa4+%K#9Ef}WaY5&Av>)7opX7YQ_?!gz#5*EDiGuY5
z^b%@+^zDyO2f=48vFByp^@aZ^eP;DvTI(gN^w!MP$|RdR26WYPNvo9B=4)j#G^v?~
zS-K>vlwN}`9MdY6kH=fObZn-_l9fPfDgj@E(m|~eVI`6-fqwM25=#C>ZX(G_q;D@f
zh%y&QTTH$STLG<^3oJbs2<QS9Q$elCPmg72A~y-M6iQY>y#ikdq*at1j0c{4{Qsm<
zUX9%40J1p(Af=GfDxOvO%K;2ccLrgWh{-CRf4~<AXcf=8;w=$7Hi5-Tcr}#Hx-fM-
zt3-gMOL!mrtq_xc-5E&ML+aa~b)n1=&=%iq!dCphU+E`0xTWgI(?wo3%nyBgX#AHM
zZ~SUK0A#`W<N+<5q@!3oAchR6ZwcoV0bMR=(kkNB`CQ=)P0>bS$CSw`;+NoO&$S0B
z`yHLQqxY{9?@wRqF4zUXeciZ6{&0sr;9o-9adC}_@jD6pxnRwn5=j6B%oAtTJ{j}(
zE%-C|y8R)NfDo9cR1P}C#~?n0NFWyAfe967PZj^Xtti0L(!~RV6a%syXJGhQN3%*F
zCkEs*sbE!67rgoR&pP^5`kU|}2%kwss)~ByuQ)%)gm<kT1m+8TXqg|e;vXt18}$kb
zD5d94SO1{m@_!+Msh32IC*45l>)()o@xMZULMmZ4zc|@cgE?Vf0|zRM^n>I`gMa&E
zKy<5nnw`{+CB3yjH3P|#KmB=AW1Vo;o5d}*x=rxc>$mf;9Xq;Bg6hQ!<W9#sD5&}`
zW3LYD^vNA;_f<En8(rsbQFp`r7ZKHfY_Lwmu8+h5Nau$C2Bz`jdgy!rQchPU*rt3A
z>lDygYAZ47fG3^_KD_aTt`|&PMk*%wTFw@#ftx!IO1=OwWhD%@LkCxRT2%*$0R9yk
zfn}2pvGSryfB^q15kjJ`d6z(ZfKa?$4k_gSn1}?I=m&|f#vL90G^8QG<QiZ~E5L*i
z8@XnH4I3K?PHOBQw3URP+`qL6k+3NMx_<TN2?2I@>ND!L0JM8Lq6saH$tl6G2E_lI
z2sWW5!EPNW8;y-LB{ja+=w~7|#vWSaO|FI;(hy^U5OgIlO2B;7IA<o-^96Q8vw67t
zXK($;J#XS|@CPr(WY0YN10h&qI6gzPhHtVgO)JfW5{w%VaTdbP9=`*Z38id8RS;9v
zfH4%j;>qU(tAN9LBL7Q_V0GYWIpT6`sbG*ef09EM+%yPd5K@sKrGBTw6AqQd?m{P;
zK#-=P$Mg8bkNT3<3dK_>#AC;rs7YLb4Pyy2^-J`VrzXJ-VUb{G7+@Pq^kWSzf{py+
z!eYrM27h$DUN8^j&HFwL0vtiTPYk&aui_8SbRRTBipF9NMi}gh%KR~l68IJy>;7BA
z8VLutd*?GrjbN%Z6nZag)k=*~W9IKQjF%3qkr7iuhTpc@FK;9-uYYC00y8FUN4_;?
zZmt2%0wK|*Z}lP}3tQj-sF+E>*CizCs_lZI2mozN>el%k1Y2XHk)81062&7W*7m8B
zq5nz)?~?f42RHC{@K^WWl-5WHxI){XjjIK}SwqD@Mh-$4))1+<!}ic{(?<1l!IO9S
zLA32{-Qr^m=7Ey;`k_?O|DpnoAXfiF1GoDjsXXHTCf0u2C*XYxo!bvMK%%??A^i;z
z1n^eb-5o;ay3#3gl7bHG*}M2C965(}0=1xdLbmU}Y+W>dbdonAOg+kX1gh@{Zi`O9
z(JLXrV?fIW6$QU_>5IHXD8m*AvrU*Ymzy(J8LRf!4P~$en1<DawBCIhKr@;j^P6Xa
z`8eo7rXA@$$diaKPcL+07WPuV33L13X2(o23pai3HuVQ?dWtF5B=eOXZmSzA1-d_o
zNC#_+X=~Fl%D$Ogr-G4w0V>6+dk<&lzh@Lc$Mcb0Iw=hlVBxL3UO%68b^!Ms{s_$i
z1zk}qrqt(elI$L6tZtH6y}<nf8DhqQfTMQj{RNcH8tC)b-{3+l$u^1UYtE>5Y*m(h
zlt%}vK`okNo=jOYpf}ck2fr<V2Kr}_M6pOf^+j*|x384K2o@hPbBsN>c-$OYUHR_M
zH2>*mmr(yF33|Ch8mp&a_Ln5qr{VMte*s4aOW#QYBI8dVPF7Vc<3BE3!TXp-b<`AA
z62`8Yl*h{I#mT;f+g!mc?V>O3RxXlP{%e*-f0p`SbopSkSD&kO(CK3M>7u%|q1s?~
z*<d%I55p{ob~K!JRNb0ReNd}nP|Le--Z7|bFT89|-FiUnUvG4GLS3p3JY96N9zZb}
zK&dg_sK%F$#g_)wKHy3#=t@h(eV1wce_z8O7sMbZg1yGqP6g9U4Wu1)v!+9)2Qmh?
z-!#fz6P14BE4@|VNV@%Fu!AOv&U_#&h-9rzjm@A2BS%Z?``NRdKtIs5Me`Ycbm!*b
z-@Z5AYp@^;2Jpz?hTfEoulfz44BFcxcBu^7y(x5;d+ehrblW4z4_CO8uULHyjC~A^
z9NuAyFDS|{juAU4|1N8(I`wpsuEChG#+Y%bTHAWS$Yj8%#<+1Yx^FPL&tw4TV7@>|
z&ejdJcCC_c0Nw+w9Z`WcDkBnT1cA(!75mF4`hfrY3<9|SuC^rb$YbCK7cT;w5(AbJ
zqqVCV>x1+Eg~5Ox7zJ7k1zOht<LXa+MxXnP{vnckxNpNaUZ%48f`k@G1!ITxKq+1W
zv<Oep0G-Y$3OE#C*o5cxw}+Ou@l~SZloG5-#L#b~C<ZA70tz0Z=~L`Bifo|myoht{
zK5B<1>dy>T<h8P{r+6Z-AqoBS>JAeLeWvLY&oYL85zFvWF65R@)T!4@`G+^T!1D_c
zXP*Pq*<m;fQ!%aYG4)Kq;3ZCN-MC!jzYh*29q#Z+-TcsVVy|wuIAUi^s@zrypyrKd
z6k}rRV(O2LWQ?zdPG$@?iHwVst%_tMHI6i~_BV|r!Zs!~Mx{0WAJnzR|5Zixe>Zo6
zvG&M>9bmmFxT7hbF3AWx1Y%jn%r?&g0QMmxujUVTpMfI~pB{-cyQ*0oBcM-xQV%$9
zQS|_`O!F_HuKpd)Rt>pMhoR0qR0`|X75q+(SW_8viXB!8YqzOv6QHMw<^I*E>Nn7{
z0=h1+I-foqDIn_mF|UNtByd5T(HF|Vc)5phO(q5i3u9<ZWnkQvmH{;P0m6{=jkaMw
zE!pr1=wls?fSr8AscrP&$NwD$6)|*S{2^P!05m_sUln{73INytB*C|Jg$yu#)Bx;_
z@NcRo$86G!3~bZrzkP1*1c0sT^I@#MUm*^@?vZT1T~%-LeUi*Gec~wJ`Iw^O^J;z`
z(9igfX)5jkZn@e*_72?9_RE!4&)ujGm+-zHd2n3Q#vka0brJs+`L70u5^_T#{#Tvb
z^{*lH|1@{@S3YnAm_}i{lUTEB%yO;wuy>~uviy&!mqn1h0gS{i&*%pO7?k`CIWLQq
z9Q+LekJaFN1Kxc=FdRci$$v*fA*Ppu^<9whs*>Zsr2a)s$3vFDqP5dgdc2~GSN@&q
z)7Q+aEIXpv_@h4mA@49CLo?+w=!0Ey>Y@mDmHLJV4ZxljMZoY41Tr=PkM`3?=uhid
z<?|Zu@aSj%DA@GE-oFRgy$88<wSkrL1uErB;E0d0jyIu>Hxj*UtaBWQPuYj5@qm0~
z1geTWd905Zx~X;u=|jESn<o$f2WerE3LHigp*%FuV)(&cB?2|xf1)Fpi{_qpIBd_q
zP9d)OsI%W+k#7|(fGk-T2O20KRlv-@ZOlcUzkurqgq}WQ>JCUdAFwq}`oAOg9e;tp
z!hW3`yXGF}fQK5Q4jzLJYb1TP-W15u6!`6t;D;;U#~MMv5|Tc_SADablxN$j%@Om(
zk#?Xl1J~#4(L0v~KKR2G_MM|0u<@>x2?hP>M)co%HmaLf*_HpPT|SJSA5fe>y2<_#
zaarGJH>PhlX3fZlR_smV|Iqf<(QQ22)?i}hn3<W`am>t&GRVx#Ofj<^GeaCRGcz+Y
zvty2#?NNSj&3tq3cjvveX3hK|b?J0<N$TpZ+Gp?Tb7bF_8SaPv&4dlH!hm=F!gBu7
z<h~U<(@8Yf=@<F%hW-E6<bJ<Ctb-od)^UA*(<OKT2AVf)tAhR^VmEYQ{~<u!yeJ@~
z>afp~Sk9B0+^eEybaCc%Jt7}Q|D%bC<(#R>-7kFR9d+*AG4de=`~3em6PVDo3Ai6U
zrQaFmixJE9km2=^ZH)jX*}t0p&PRn+<rWEUpvkB6<pE;l9|8?Tv;Dmy{?1GP^fzeh
zi23UOcny7ZaGA>FN8I@;<o(2RI7Sr4<cF;H9`ke!y*ozKpVIY#v3L_m1ymn^XVlx_
zp`rWZJ67u6c|hmgVFF2Lr-@<L<Q=TgRNg(tP`101T;=BeW(VWX*}E-(9n2ZjhevWn
z6g7Y+ychlF@r(FAqQSLz@hkKmSX#{yR4Bm@BCinpZ(y3WN3fwZKj^#~?H^G|fTX<#
zXoD2O&7whLR8cZP_Texo3fL(yJE$m()10~mb%p1vh=+}*ryFL*ZJ;qCb&V0?@9Z{i
zgY{qJ;NL9+k8VXq^BI&A(6-B0q?sKwWDWh-P2mJY{>O=pfd?CTsqud~tlRB;GIQVZ
zhPud@ig-E9+0og?X}02}yTJ7eaf>$FuDmL5wt9Wqa(_b+CjQ4UAq^4{mGth_5QT{j
z`|}ut5{^uX^79&YuU4qp8YTrgY7A<pmIO%1%!n5>7Gm$w-E57_h}V1Yc`u$Ah}wI=
zGe{xSOdd4m79|s4F9nlwiJbzm6OYVTjf?mjhR)d-%q8eEfkAMLY;Fv5E?N0>9PB&_
zlyeXUH>*E)?~fSf**Mrmlo02jxER^$80LJkC}-mUmmm@XgM2cyax#Qt7&3x1eF6hX
z3GCxxh*KC5?p{d=N(~9@Srl<@)*f!uPc3k8Ev^2J82?RyxCKtK(zY51DOm{?Uxr5y
z_#9vMeg6b%OzrzVEpbb0k0ZwaTqWsq%P8oZLLL9lg7*~i7TfL%-=SB?V>ZgxC5r@W
ze_Yfz$5^g-k{ILuI4{Nu#f}xag^V}6rHqa|&pJRU=L;t<O16Klv3fswj)5dAILb&l
zj3j@byD?O6oF+=NcSz4LLO*F8Df*Ll=u(#uj5SOkbW|_YPBjTRAu}dk&|t_P_wF!j
zWJ)|?NY~9&fBE*hJhV%*<#0DB?yJcN`Su(Gs!g2V{XBHsSH2-x?q<1Ym*v0vU;cJ@
z7}h-51DmZFj|iWm+27(t+<z$Eba8FilFgre&C_klvDe;!aao8iF1esuKl&<cV@r18
z_yWs0AFU{`5w?2l8_+BYlM-hC6yy+&%!slBx@!LAiig)-CMtfrC7SnpzHD~6Au8^>
zBbwiRzHBF4@*`G%7b<_{!AuqS(fuj}cH@@G5v{lg%EfX__1~3fqL7V{D~81Cec^xI
zVT+zRpj2%2n)5-;cmhRFo0zp?h3W~hW}ZQ5f1k;M1VypEd#OcXLc@MM2GxWi<D%?b
z!IGhp$dU>N_eCQ1nus*>>I1*Rq#*8Ge|K0R27V1110OUAv|j;Tzg;*#=+uW7AVx93
z9S7iA(gj-#1uHd30BF<zgkcQ2eM_)|2$B{<asV1V0AU1!tR-E)#Za;mdt?D(3`4|m
zK(dljqY^uwPTY~B$8q2XkR`qhHkmGDKOzn&R}Ew#2Sn{#1{~0Sa>PJ)bOHZw3xbXq
zKY$zhEpR`8kmLYPjY=?bz~_+##N@J1BWi+<E`bNM|3ei>A=fp|u%cbk?AG8bRfOG=
z=CC4L173_kcUs2X5^ZdS0pje4NcR)WK^2yWBEyIxTL)f@Um*P*WE%R9rvI-RgJYu4
zqKbwq_qHyo^m{>~{nZU}*R#X~yS5n9K}3LSM1(KVUy6xG`@1iZ$&K@dcM!AAksb1;
zkB9>D*S}=T7vz8~yT4BU>z&Y_7v#T`L=GWD*A$WLIJUbOi_rjG!~@!6gdsQx*#OW#
z|1(}*MUZ~_tHEGaQG@+3T!HPSAOUt70a1fwgqyvGn#o~Og6yAS9HfvLVRx<x_%D~+
zV?fYliYG5p_%A?`AfIs|2LX?Po0#9TXdocFiz4ORK>N}4KQ$Bz_ZW|?*J;#A9>xTF
zb-;5d6Y#$ooCQcWK4M%1gj#ccT=)&O<`8cvNIrBKZ7BS>pvr%7HxTR6gIYtY^YPn-
zEfQi0B#2n)6uSzV`<2>5Ibgk2;NEMYGOnLv*l4GU(Akh4GV%Q<Honj6^HMCeF^IeP
z_KX;qUuJQ=Y#;tt|6J<w{eZTQ_+QSaK4>oaNB9Bp1&~uilE{PP&_?u7M&yu2r~xd=
z{w%2eEK=Rr@ZH!~ezIpoWSv206(b;0U@|2(;e)vI@8om@|Bt|h$qMz){0xti(aqgt
zjr3THH~!@K!8x4rR|WGH`Uk3HGW{2-10AzcL;N<9?!5xq*YeVZ*8l%hjp2nQo5KNA
zt@(BDo~`#U5AnYtK==Qp#?ns%FmEwF#iLO{YCWZ5YCXmB6es;fdDO1Z4Yhx9AETQh
z9Z31n*bXRFaQkRFHpc+jh9QOjG2t_SIvVsb3q-1dYGe*BI1@0Y|1%qR`n+Q7%tHL^
z!N-D7uID5ArTg3HB$$7&V|u6<>p9i)XRHI$Eeps}2->j@?-K>Glwfh!8;$UOdU@xi
zzJKf5==SLXLGOXiFaNmMf46@jO{5Recl5Cs6h@XE$4Cc|HuxX6|4|G4ziEX3I--5S
zPWiVX&wgal0u9u4F=KB{JV7#lU40<(Kjb(FUi|gx4x5R8Y}D>K%=epcrtUfNyo2z|
z(5HWnILF>ajct+xx`NJq)7};`<JC9oT~G%*v=I1-Hn+Ez{}*6+Kl1A&0saLxAaMSd
zL;&^FUytA~fZ3-P{CB9VK=W_tndHYhMfPW&Mm+l0{i*2~4k7dEKM4XNJd2>S-mCDA
zaIOLR)$jsOZ1qMK|AIfZb^wO^+JpPrn}%Dg;+-b4od*o}3`YNIqPV0R#)}=n>l)Z}
zk7~b4ZNJh!i>~ANqrcMnN1tjd7kUl!HJOJ0=3O9zT$v^okV^mrxdbfz|2V0PA2@qe
zz+Y9sIY8wV07S4=9@8z^Z%r8@lOO#WnDq05?=$|?K*aFX4??W2@Q?QKyS7`VUBN#E
zp5l+^syC*}$`9|iqmPZVM1r7+HS+-wA2%^E#G`SM@Se$PkC>0mNO<?s<(Fmu%*r%W
z%yQ!>o0ZlT<)Mg&(8$%zr{9h4%edipVH=h0j*vsxlaTdf{@%Yg<o6CPa1(A`OT?HR
z($7B9=E!Lw{EG{_5ccf@5o!&8n~+%_P9Wt%Ak_RCwAfXsE_%t9*>2C}<`~NVQwuGW
z6EZmf1y$_t5|2D=J_-;`sK5D$z+nC0*8_;?!H4jLjOh1eXCe<IMKOr`hJbaMCL9-*
zD|seS;>Uv=4injL@c8NB`)8HM7UvHxVpJMKOwjTa<~rqmdfaimH4@W;FirN<!0swR
z(E6JjVWHu2bhcAwATJqwT9PoxR3adtuO5-+g3bSb{X-;XtH@NbKoIRy*SI`P6cYKM
zOCr=bJ$xxqlEP0sjEVQu_viD)XYj&5PRH*~VZ@4(_rIP_yA&lGH6)Sx2FiL;DgT=O
zo&wDHf2m42JUpkr>5fuzgo)#UJ1zv*Li<AWQFQE=e`Wo)jv#B!J|*e<f(0k{S2a-p
zrF%ve2L0%E@}N~=kbIJz9;7%BrxP-H#mYU@Hgx2U!#y<i8~>6pXV2s}KGQF!b}#Rk
zl%G!RQIl8h&D|m+)_$}eQ(Vd8+bzb80;%msnX;yT%vslR<Tjm<sMy@O)6Qe5KBF7h
zBVM&`?zv3#oZ{}fbPS!Q2QG2+pL)hFaX?pDg)Q9?R#~a~1~^BE-+-Z)tNayq0Im92
z0g5j(?Ff*k=s~Nohj)za7`>1eOGBE3zztuA%!ZzVa{v@Z{$tl3Wsj5)TrLZA7E8Zb
zyM+08k2!)8?xh2+(+^yc@<FfqK^P|cZe@f+jUI5!zjv?npYMOQ)0i*3`hOK@GL`R%
zoY|oQ0^cQJM6JZ~p*ZcUPqBF3FNTQt+xZ!OITi@Ae#|Xcju!63Zl%PoFab`L4yJ}?
z7QFA+H{PtY-iXS~xu5MJ-|0TgN0O^ds>}3}V{eMeKJGpM$tY3`WS*X|>TD^eFa?V>
zA8g)Qbqm%vFLB%zk;8z}n$n8NQu%(zhH`eK{S>v=C!>YVR3auS4|v=tq%*0^vH>F4
zOZ&T@bqh|$Yb1=?dd-jevbbi5#_~|?T3PQN&r34LIYwUBh**xfLY?t@T{0iuJzD~A
zA6Jqxz#YTtaxue;MZbxxLoo~7Fb#Mn_iOq<>u?m-G5V{B2S43;%Gyx-(QLS&z5({9
zgOlKj>@zN8{2kkmyw0j_l%0EpRY&-5h6U}Pe7<~_xSJ<f@M|g6jMuA-Ki-R-OI(?g
zw$AY7rw|}VaOOdU+GK<deT(9P3-!Y)Tc~3?$JTI$WDyq@M>j8Zpimtg={{B$3?KRi
ztCN<3JnE_^nGlzGXuhgHa5*v?OBHCDqq34#$hvFzaU*%^QEysH88jjZm(W-$8^SKD
zl8{<S&8*|p?`*>N<CRnlOo=Nukp;^iVJUG*5l(=ffmBsDD1y{nREihYp++{rJcbI;
zyL|pwl@{e5_N!`6z>L+}Ruju~TQ)`8M&^iF_{M9%p_(l;Y8Mp8r6WUR3o&wa$tJsM
z-$O~9?#mj$maDHs-?iMZRc&zon^mb{>zShqix(46ZP2+HD5t`wH%Fd>R*+6uG9uA8
zpSDGt6(+yoQ9GM9EB0Wl;j9p{tHD|U;VztW#iE)2xVAHAln%=q_V#5z$X!6LZx4Z7
z;TwY?`5%TvMegxmfhy)WRN-O3Z9a$U!*N=SAjo*Lx(oB%6Jcr8oqiV{RqB`kjN(5=
z9Hlvn4UGA@Ksitt7y&|^N8E}g7tg{#Pvmj3pl+n!=SSHzJ#vPUhgz!Elv8=jKi3<z
zM@x!I5G2jCPa^i*UVKG-wR8!JKi){$WPJBmEyK>R_lH?YMqR)g;s(1T=S4{kmp8M8
z$L1C3fXKb(-`jt8N4=QrvJ%NBpo+*}mw&#=gJi!*IUk2dP(Ehoa~vX{qif7!j1IOE
zS6@U-*eJ+=RE%-iq$y1=aRs!h;(rvu4PB*!S#Cjn13pOMS41ZFsWri+m!)#whN<-_
ze;Z9kFF?DG5~R7a@li04zehhLo^5PXwe?(aA)TPX-R|zyWQ>BO4SNl{H6}FiE|-Y2
zRYEbX!hh6~kE>A?+Z%tm4sUn0iD_F70h4cVr;MNOOAD-!u&7XWuqs&{e!ow^35mw#
z`jPgsLG4uYA;BQEq{+Fe#IF4Z<>_VA*OGYtp-#Px8u7Y#fd&@JaaNk3XR`_a8%2+z
z{JIJtuAUR`oWA$owLH-?qTS2cG2uC4*2|ey8U3Jxjh|_gB<E%*=|mC`Lwy`;&K0d@
zn|hUH+B1_%L3RpDwjeh*jCLQCkYworI9WkNWIz4AaZGnpS~c$P%i8rkg6}SrZ9iS=
z<yPoduJUF?a31T=R&FrZ;;>Jm8E4KzwuZ8KQjogHr?-{*<>7llSK`T*g)HwH!>wT1
zP*MKMMcITBrp{E_Y}89v{Dx}WHr$`tZMbTB!q$r`!i2sYFI8GB#5E?Ml&gN+z`!^I
zX0_xK_wnb|ZORhL*;3!?A2}y(GO>T49;Ekj5uLJlpin8`a39WjY3oHw!^#MpC*#`F
zp+gf{HdeG*7du$KzDt^a=xXH10t)x}zK`*+y19s7r?`0<wy|=UiF)%0J9j_>4;<)K
zUN_-p@9Eea>?V&bV<nj(F%=*3V&ei`OgY{A@tTJ1Pd0zkC=n6vB9KkS4;aBXB-1mR
zVVGHv`an5g8d=f~TehRo<f<eYMbHkN>|<HtE3;@@eKA?8bj{Ou+A1$j9AAK8yED*j
z{8idIyq;j*SDFpM!GmPJQDwdID2c`P7SyEx(8A#f{ppt?eH?>uvg5T>Dr?)xj6ih7
zD4%NUyl#d5%Hi;9EzL?I&MRCv;5+vcxhjDW1+LwJc{$@FTgD{B!)#!}_OLo@?Lb*V
z3-}JD`H70U1g1otzH~$Z>iw9fs~DQkLbY_v9xJUS-vA1u>7^DJ`-}h?tfa~TOxuyG
zHsAF{SC%Vk+{?Fzgl6O&@~;ohgj5c9JJwuH->fDJG-tngT3{AQE&UqV1lp&mJNU=R
zHZc_y&Oj=gHgOl|VB%4M6HYJjVZ{H@D91-J9?!~S6^IzYr=`4cW<COgeOK$6jNRwW
z9WyiFG;`3@M>}ZjgKa=)9IN9caz8C9&mx3;7Tl=6YhF@`M1sllD_yWi+M!Q-@%G>U
z6WD?tcleqqYr?%E<NK0hcQUqy|FPtqOuh?Hx(4NuGc!oSI)C$y>8kEWT;NFw-6HeF
ze<#Wz8LGm-cQDdmsOvW*N`J<`QA*H}_HQ$H9gwN#Fz?ol3z-<h6(4tL*KR(jof%Ov
zLgn;cY-iAcPC0NEYq5$rdq&S0KRZ<Ty}A20{D#H#X8OXy*S7Ha-rmB7<hDy{lS(UF
z+wO)<wbJ*_myOFFqr~LwsEydyNdaez_pd4GxP8m)X_YcJ7ay1k#aX|lH>6Wa{0dI>
zDOk(k@O0C<!4ot$LlPEeew2!ogas)s_|g6-c~b}`HStt#bfHrU$w;c$kY4TQC{G)#
z&1cAd{Yr&4z7t)^SQ?*P>DjJ~p$@yCSQ}BORGgo4E;ER+VJqX?RQ|JKP4$lBIKs`#
znI6@4zxrVqA>$y;#5SF70GWUf(Iat{avgZ(+4!n%hx^9W3VxZS*9u5d;Gx-HhJLG1
zU6hujvLMg&g~tNE)lQsvWkgqq*y8dH)Xzdo`kMV1&)~k~n=Ww1?xb0D&gL5pHN7Fj
zn9}%~aQX~dRo+!u_hdw|qk9P<)wY2HkacbZ<1|Prt~!=NB`158IV?UCa-~dW(8dGM
z4iFYJNe*Wd(Moca1fMZ4_83G9ck4#GtiosAD$8$#<;jOgt>)NnS+{YNrT`Zd9o0Gj
zNC*3ZR++N}7G2~m&k6gZKvj7EMU&{SCaiVir~1+UDJ`lh^;a*nJfoS31X<0rmbH`|
zGwq+3@lop33sV-JFj-6pBT}E&d2BhBKF#`jq<Q_ee#pM@N{bb)$ZpzGi9d`VnAhA|
zm+0z#$o?U_q9RqkAs}0-$;!R_iy!gYRau?yEP<;{^+KoM_GQ&Pi?!|x<jm#OdYS$S
z6yN6TW$O{RJ}pk(@ql@se2DZq<|22ZBQ4uxY1)H*39wU~%7AT~{HKFZtxDQ&0H0&u
zZSh7%jjw~ayOu9kK}p-M?4bI&fZf&#mDV1!7r3olq^UM85rgqUs||Pc7i#e{`Y&{6
z%2iLo4;DrGM_$DGDPd52Gq{bhI;2<PD&9nK-{vCPS16_r>t6voX(w$(sFz5VfK<AZ
z#C+6-tu=OAs*~h>)>zz=6q_%+m#2x>>O^wv)YhI;JU<km@c56-l@~o$8h<<Moc~rj
zOdHYEHdrjLf{tC6;8dliP$O0JM9ZsKw4hWw$#>uqG2yJa!4KPMY1%X*anV%G*>lui
z_9h(=18y!_*->(gwA&<)`IB6pr;^MU)MFkaF(l6rzZ4+m&IP6E!~k1A-PgkBs!-~s
zgBZA`1j_G_b<i5{bEZynV*@sqSWQ`@$^e3o*)bO@yQPXOxan%#*MbzD*&{`tJ(awd
z^+~3Vb!)=@i9>sqHc@voq8%=iYl_}ewIGS{&Tnd1MJLvov^l6g4yBCpNe(4WCvEdj
zI?jypZz+Oh(+#nA(6kEf4^8aPvlOD0m?Y;eBew+{A@NgAGebS6d%JnDW-6$O6o|S3
z{Ha<@WxdZNNx+EiUuCa?i!VQJPS5z5E<)=)J!s<qfHyHeA07pWL$O)f0(;YA%^Cc`
z-mMY3i!A$#d>W{76}c8Ts+wGFf)1&vc3Zqww;ZcAQKmjWh5Y>^$oVjfoAGWrha#$g
z)kr?$H`%MN(>J+4@hY*!(8RrQW>>V*AtzJ8FTad-D9t+JX+`tBwIEHS)VS!?&CZ3q
zZ+-qV5fZ~F))hNZ(9~Y<{qgi>Ep<gNlN?RgLYV|8*fXm{)Jtn``vkC76<5__-GZr<
z%|`uh)u?fy-YeLET^{~xeC-7FaoI4u7Rg*cv9<_|OsG~Tjt|wwWUaGUS}^?3*CvNu
zrdO)ITNU`-lCdP~<YpEw!iElK<VvuPD`@w@m}Zi!di6-PR!b$V@aw%m?0n1`siJyl
zT2wx>#hWo}zCn8HP(4rX@d@r+=Y#D^2<<b*I!|OHVnyjqcgOk8B7RxsbOb4Mc%(+m
zMC)hD2wu(c$snrHu-jxRi&Fh1omG@1#GaCk>h`4NSn_5J)8r;{XcF+IT+3W`J%r>#
zi)8lX$th+X%@{S(hN?9rRpElEQ|?))p8dsVFq_aerA5qW>Pi3!s<e%tHb27Z>U+Hq
z?{9Y+xEg~&XKUCw#;?=01hOyGn{gHTth<B2mCuw(UD<qHs%&wmQ2q_+Wm$w9{QONm
z_+HOA*NjXAzOVIgGSOUxr^lw`32VuN;x2t$gb11MILYFW*aUu{v_+35G?~JXnd8D`
zNZ5@@LX)F9zfnK>Wi9`xjtgwCr4p?AV+b|Sl!WA^m*!Gn%3IA^2=RgwJk>Mix>Bgl
zo@t2cOe>xYYq3C|zGBv>-yE9bOD?aD*1SmXn!lTn-vkAu!YzhP+h(lK3U<s2j)I=o
z62$*5dC_i5)5A~{SMGIU+;uOFdZ$%g+0H(j-VJ3^ejF_}pyUbxR1s;pmlf8wOee_{
z7t;aG3Mm<sTShtP@1=cXfLhEB=3uR%@v=-^9=d-CXONRd`e>RxSDvj8$0XmIE;J0x
zP=9HbhKn^<Qt#bXVL}sR-&mR`SSruRqIN+Pc3!Bcx1=pagk?taS8b-Zkz-Lxs<4-`
zq;DKEk_v{iHcu2wiZ9Wmb(Xg0U24udnpT!(EX@4rVnsoLmG62_+b5s)Ok2^=K=DG+
zAbQ9#(0I3SddXo2&$VVxs)K&J$B6Z!P#hJK#Y2CAFtA7>{``cY5*<Fvxe9s|FDIo!
zp+QR;6MjKASlLviV2-CLT@lrv{~LL_Nl5EA&j+A$<d><O;MQ+Qfx#BMGI|u*ma1%|
z>!`9L@NTHk3Cd;a^c=eYYb`A|>K0oJCDg>s@JK}P@MEx&J_&-%eo8?C8iyA|lk&Xz
z=&+KJRM(~AQMGN=S_RdwGUUqyt=mm5A=-9}jCJAj==2NCopfU%!o0B)l}Dh2R2wsk
zmL$vB(eZZbh^?wA84mD{i?#S1mi*@}3~^|#Bm3+XEC__%VQX?xiG+3NbkYlJH%k~(
zuzTf+)>ch%fF;>rlW$gwr3GU#da_p}_~}R??`1fPXWu{gtHE%NZ@DIf<;&qCOkV6_
zuhiW>ZDRhYUi$X*lufkgOs@7dOHkV@`UZ>R%k2DWRe9)TVrrw}8aFEW93Xdm%dggK
z#`75tojo80@bl!o&1prQ&xpkrb~IV%e8)*~82#b0V$U`^Dk3m>eJysOSl47Z`AY@m
zqe-mMM$u!|&r<K(vmIbJPHo5p^T8Lia%98PXk>;4vI$!FEh^+-5G(@LHsOex3RcjX
zbK6PJ^EMlY9GqpkeLV%pYZ~8rO;Ee_*w4uTc|+A~1GwsQG)}cEF?4Gu*Z5(6w0_<Z
zzgZNOd_Q@mVAEOgp@&ZhejA#Fa+b3A!cedg{moLZrH&>l1?bM?xRWKBs|5yp|5~Fz
zcitVM8~<BGF3-&<IhskFPMl$}57WwFjAG`3XC}KOaX_MMPGAS?%g2paB4x^-MYe+U
zeQ(Vnzdjd&kITqrq?Qd~W2oTs8R<mUijP#DvB+L}Zb{p_AM_4;f$euOk2meb<3=<r
zdelL8M)RYP4M43j@VhR(pyLE{4gm{;;~b^~zt{q{BpW-^Q$B3qUA<37+|dsY6Kxed
z3xxJCCrn82Sii)NH{~ahxX@DD!glSIgrWz0N5F_~qyf92?jznHkfIB3;>jyyRg`BX
z!5gk;kSyRRuHv3DiviCD{s+={q<&ov-9e0hSaL<qtZInxvv1uFjn^mv?+A!YkE~H%
z!e&?+HZKQUZ5EshW8Gnlz1&(hdD}oh$XCy_)t?Pt#wwq(7Lprxw12h=J&$30Uglg2
zZ2ppQ1JQ~Dg;48(24;ddH`4-<%2;go;EKjuCn<46M9``q-Skq1(b&N5(blh}{iWU2
zgP<X6jbflflmTPUD&xG7DP^^`iyO+_Z+by_d2UTbFPvMo!Vr(s(dh2E*rrNq*2Z%Q
zgW0yf_sC@W-lu<8+_6OZxRiDvGMBr3g@&ZCv3*VEE$rwSWw4%YL{?QOPwi`7O=|M}
zf_MYz$(YjcXSSttq^xqn_Tb|aErU*9Ksc-V_0@9mf)`KMm^WisWa}T!5qJ?=E-;=b
zAA*AV)TTttJ*+s^0@11nB}*IK5{*N<c-*gR)CJ`wRZ$pdKw%?G%{^PMxKTZWx9wiG
z6!&;xgUDADscW@EL<lSBQ+Z}&-R)|>`3neWo;UJx{LYtdg(g5)87mO;_o~e1DZXWe
zekHrDZ<h$?G<eE>RQT7N%_M<if1M3Z{ccNli}SH99o{ifDn5*F(tZ}d={7?aDnqjz
z)9;<UA^^S8$9~Xhd}6i3USVuU7JO>lwBP*L9;7-<<9z678`{**vFc5KdoEaSHq$Lg
z_{rz;MK~w4z^iq6viCitn>wYfSgQ|Mnj(Ol6GyseAh-jnJY>^8c~=So!4fmKDoW=F
zy+u?6PmjrO3Mqh>!0eR{)*MpJ`mi^J#X1KwyKj=LgVE7Y9UY?;1!2~$=aBeCWm<G&
z4FeN?I<n1aka-@iG(qXH3ECQAC;Lxr195Aj?fMRFZCdg~I46nohnJVMK%nM1FK)|T
zl5tHnNfT6DSp8VtG{KbT1I)^b=T5gr&339+LKJ_ypz9%0NB(D}4jkn$$t_U0wR<dU
zfw0=n&j{6+YYRG{bawMSDn=F1Q^_5j1{gAW4TQfebZ8vN_yRrj=sRvj_^L9fboX0|
z>>>(MQu-&i1EiZ%b>f3YNm9`Avi#z!ksD=D_u~7zab2Vgf$`Wjq>iwJj6Hp65MOln
zpHO*$0nlzuYbt9^Ygt{CnOa?7m+wm3^HY2tG9+oDnA39;9$#kSH8|E6Mdle-%A7qL
z9~@JS6KAp&7E6uvCr`0Y+|JdCdB6AS1V4ixaybLK96qx1?D1T{$K8f29IoEq#HKJU
zJ^KdIJP;z?wpq|++ccJ)YO5n7gZY-U(XP(8(g2sRm3a7SvYlss*>ZZaGX5S@ek1f+
z9N?ahJ~h^TjvAYN>a3@S6rKCo+1=Az@|Z}Hx8<Qoi=|Aq_T45|uDut)rc|2^+K=B~
z?$e{2R|_k&##=2puqFUsZ6*wT*-wLl0)#QlTYXbx10m8a3&jKBs<PZzhezx;00jwM
zr&Ph>p`O&yQq;jC-KupJu7dK~RZ|MupAA$o9%;gJ(4<Bc{nC>o+vR#y<A*MsHK5Vd
z2mx&M7xV5z$u2Pn13sV-c(B<W)*wkHg6P+xO#J!KClmRQJ6K^e*?VCuH5H~G@S?b!
zg&zk>nxFA`r^uzxmuufE?l)0rZu|3MXJ+L!+NtByie@Nf;CA8XtLu<E@K#UHU6O+U
z39pr`kYz`oO?SvKH^%5Q`&*}&;frGKG>vy$bstZjfV;3Jvd{Pm`@_K9a8ur$Kt_>h
zDpic_s6AM${BKq6y`iK!yRi5Q&Vny8yh_{<nuu4bejL*(Zwqa_3wj+(?z<i?Uysnr
z*utqIfHx<*c!Z7<Ws!Evj;Rf;5IKLCtz2OFTR{6|cxP6g`cF_5oUs+iMR;>0p*;EH
z+Q)9f_cyW?$jzMi<9KtFc;kSM6QS;;E&J)(yW#tesgQer-v^?p;>Vsu&8|cpxpIC_
z>YA<@^EA5)Th8!BQROfCxLySzEt;V!lqXcktf_07_i?a9NeZXI3QOQ_A|BI+2-eO!
zsXNSkequyI8tS<khI6yA()J<l>>?|I`&!xDaHOy|i18>vf|lwSb3|UJvS1oWvD~f3
zs2=#K)U}2jL*?Kw*yMM&rYRr`4#`m(Xv9gtW$|4_vA(Un7B5}fvJ0`qwnQwNeS;ti
zXTS#5XYk9@WZz*4+BUvTHY*0>*Qw=0uAff>hYdLi&{F%9RW2v6oOF1dN7qE-nrx`B
z_0DX11(oJ?5kg*-uLG)hy`v9Qae|Zd!w~eUb;?s<JbkUabpWcY3nTyvnwZlKeJxhG
zj28+zME>-vOERZ=qJW7H0)^o^QQ7Lu8O_31n`YQK!4(L;rK4{+<jEt|sV0pv6dr?R
zXt+cJKueJfs9%^~kMwMEBZi;{%3y5h6o%OfvVNo+WfZ{LnP&NeQ45NE^_b++GDlv6
zAJkUnO4<-icoL?#^DVae6@8|H3LRegDeQ$P&kW)Xqh51J`j$IyYV5(KpF9>$D`lGg
zil+)(c*~jR*PPJHs59XJD0PFQZpyO{-S&YuP7agTUf+SeW_ud6lBeb7;ybCxBP~jb
z040+gl@7P^>c`enh?lly)~oV|A5(A3sw4QhSCatiNGL|=M>LJgV^?i{ty<rBO~+dT
zoAog<3&ys|2!l<*M1sZ|KOECgvqgfQ+WLW!A^dEP_6l{m<M;P0zmGJxVKOV)&y#@B
z-6%HYF*70NdUlu)bQ(ug`kz<nu;;ofhSJtOCUkJad5uIeoy@6)b}KYEP%aB6u(`k1
zC3r9wzr3!S@a)ppj37Haj~F)owAsL9;@4ynp3)cDsm+>>`4ZxVH_4?0<$+<#VpYdM
zkkj=d1xb_eGqTQEK*%PWVH`D-)PRM_h^H<u%)O`yuEOjg6K`0C`%BS{H`==+&OJ60
z^NPDa%}|m_AcLo$?&^{m_c23>TE0U0#I@QRf8p;VSm`9$Oy?1`6zbxWu%E%&!{)p!
ze*D}{Zh2z|krHMZKb@8s9qZu>kEPh2<^wk`s62JFmd;IR!=WZBM7Ap=Ejw`sIKC8^
zoPyu&VP~J_N<eZs6{CcJVps-TbW~5a1KMiyJz6b9l3SfbR{q3<dAu9{Scqhyn2Pol
z-IeB5WoBeVN|_UD$lBodEf`e-MMG{v(r&Jf`PSsD)o&ITHZCqdns0(;UI|N1M{Pp|
zv>gpMnJTKqX+~lAa|=-lQ-u1g*Xxos+3d95$XZky>StqOJ)yp`SYIXOT~5(PKahdx
zS+6|Zd@d==-GQi^l1Z!eUES6QFkyPSHw%g=*$BN>>m;$8cKq~ldg8pP$8)c{Xv<Hp
zw<>1oW2i1AgzIeaklxtv%_Xg~8g))x;fgi0(y{1_AuU80ox;{!dXemmIOY1ete4{s
zH#pN_$d8k2NmupHZ1DphTv;nd+)kExA-8~>La5M85bsUzPAUFWp=-Zb9UC|qrEO@1
ze<7YiR?>MH3ZBLsxH}3{!^40IMyg%aL@0y=Dt7DLXCEAGME-P-M9fK^B-^Ts@2E$(
zH48~tBHDXrpXxjXRO2%jfz9jP{T=sUF`5bNURu%Qul6oX+tqZbNubdFcam|O8`c*^
z`H04fW2Q`PZIQ36!IT_}cT~JsZw^fh_G;zedQa;MBa^=BI~pmrDx;pM^~QX1TD1n0
z3@f<_$vriQx0_+pL<&;uGVc3+I^j0HZ9l7U?(T&viq(nPRRnhARxH!}=@BYuH28v9
z8*{%#F5sz@o>8aST-_<bhRlyQiXv*MGFi?ZNzoKo^rCkbaE|NK6Mc}E>sd(`$%1^L
z>pJ!C)6$>*oR(UTUZgSP0SVF-Qpx~6SBI#|#H!c%^x<&~TBY2?Hl*g!={wu~v-HO{
zruMP*$&R(qR0Y7Q9`+jZv}TKXS)!r>n8J!QIk`-+d%7$Z*e!tF=lHn9dDPik2Kl<u
z?+kUEU-6_i1Q*O>Db_BucPFa`M<;MqSG2sEQ%kBgoD^uZn{9P=EF1h{7_8?KaegO^
zQ^b9j+K6MBK`fCWVm*by8n4J?b0~4>t}lBf7^Al;`Lm$okq$L5>QV|vVZ9qG_>06z
z%{wFAHQkBKD2elgqPbFvw7foXhC(v2?<1)o!Z5&UCzBoyvh8FIjWjLbcsU9e;w%rV
z#*OlK1nrdkaqSNIx|VnK7J4eA#n%*DEj894|8+WIe_2ML2O<T(hL&h?I}Bup5+;^V
z-r4|b!+_T9=xE;iS-%nAXD#g)#~OBbozVqwrF3^}Uetrqz(`73em&3YxxN7sk~6-p
z{SuaA6;vkF2hWo#oOaBwn0W5mc3@qSnH?yR;SRbT+dQOOTn#}>N+oQzke3noM+;z^
zO`9~4QARaRenNU;=3e75ZqKWa^r@B2X8I@R@&$a2?3tCOt38Tic^n&huv6puD+Z^v
z6VDUJ$HKt`%2VHHcXChsev>F2#AMEY6zeNBN-<<uwn1>TFAcUfV<?5zwulqiwG;u~
z^am(7u_}!2(x7w={8|P)RCB}g?=wn#OsK1Vc2UgFiD}X1GyIIPvb?k3s<TqkyV*sD
zq}*j~DJbrql(X!CHb7bzP2_4Nk&>BNQ`TTUD&?t%KG83Lu2<%osdfIcZZl=^(T^dS
zr>trL#5@|n*+Tio#A%J}-WWAU&imTn4K%HCOnIJLDZP>gcEsC~i1<fHm8%6aYF~oQ
zHwMjeROi>|k)tjoSY1poJ;4oNP<P4gpH)c^X6@_6NKu>utC)2fw@}Qlk6{cVl_I8!
zvF&Nu4^=68?RHWeM+awoNv5=ryh$PT&_XGHWvM%*!VWeT1?+fN%_T?2WBj4ShoYb~
z-HkIHIpYx3937aOy4za|8*)F8Uy2@Ttu;8$&m@@TdyCOj7wq;wcgXB0O9~N2%l~6*
z`9}OxN-3#-2oxEz3XXSIFl&`H#R@+K(eo%qa674%t@Rb9Gz=1IrHX1i{&|CN4BxVD
z|GZ)O2VO&tq+N69{6Ux#q;xvl>vQPv$y{h06;es5Jce{XnM>90R8xYh%5bSVDSKVp
z6osEu{8#s6u(PJST>E^Vcj({MW~hs>{5RxH%}xVWs0cW=8(a6NvJ2rWzK*eqo4+JR
zxP46`bY91bFtI7#Mk%lqO*Umcy0R$rJdy;XSnv^=uhPS!8K4tE)kxD+qC0<7Q2m5G
z*1Q<;trYhQ&)aQu^?1kJ3fU3)!Ad}qjRJEqf0M)Nk}sng9FAq1eoBX?4w?=n9f{1n
zBVku&4}iL%*iItLHOfWK3u^_T&rH%itwFWbDR|C|m(QtR8LIDg0nN6OG3oMf=RSCf
zKyNb!($z!ftR6^?H;)%S7`wq6fyDh4NW-sQrtX35K_0cz_dATe)s31w9L<wFrcEO5
zfN>@UVM|h--u*nLR$Y&8F~8!3Y<M)d`D*tTZpq-(LG3s6{FAiO?P?CRy4}h|+2Nfq
zu763JauPur)wy*X)vX92)<Yyl27%_C^nw@FjdVgAaaV!%L^?wCe&DKM&!p>WM=RN%
zv#84C^81V)5)7TUxqK>N>nGeF!+~sURnWNjPIUoZq7j-s>a-dyYR2o?l?U9*(t^nT
z+r!wjOjya|JgW^GyxLM>vdU;|D>C0}w&1@)+Y}>EDg#bO)6lh3<VfuvmnMW<RQ<sk
zFV})Z&^|A5RzXprOfne{)E!k2>kU7=jemt&v0T0<i#?7w$JBgNIctHav$kYAbeh%f
z=P`1=BoCVuyKhJ7YL-Xf@vwk&Q|)j(=CN2zS9CW+zU=kKZv)~NpqKAG$8!<A;C^m%
z*ZSec@Y9G(X~Bw<idRFGmV0YUe`8`Tygear*#*P#PTe~Ty`-N7r3}30rg?We{I;OZ
z8*-H>7x`*x-!q8nc~6cwiA0Asd+(#>;o{9^ZMgT(dxb(z>(q#JIK51*lM{Z|ki-1^
zrPNvZ`I$x2c^MIf5-VP~s^n<O+2PaKB|<m6U;ue{q;-+&sRpS7&NyOVkA8Kch0)4;
z8Bu*R0rwFE<S+_%e6Migdl|v1OwK+tA+!7r-NtoieQ@kSIawNm5Z9ON@$2gD;3S%#
z!43Q9CGaDT8Ig|W$!oxJq?x;YsuK7)KYyRmUZq8ln8h7--EpD2D2}c8PA(o-He$Qe
zFG-PP+ef9RMzXjtkQla+pjDZWpZ!FoMal3BW0@7q&QKo1?nn8@^Yw_%VG1{x4Onux
zhh*JD<4t=Zg|JKW!A86$52F*XZCI`eKi4-@^XNkE!fh?5%$`?FsYMMpRWHJzYm+Rb
z121GM%Z<Ja|00|QARkvrvf)XyC{`Eqr6i_>bK<p4<A!f4&*Q6WfY1mpny8@6hU6Gv
zw58RTKYTZ%ko8>7=&4>4QGFJW`(xy%x~D0Q{8b>9<za%gqStQc5qdf;@RJ-2L1Ip0
zf@;z*r*7R<qAm4;<Qb3CPure83Qc4fY>5^`|4v2h;INtNxvM(b0$XtEjb%-uxsAh#
zL9wX~<6gyxE7Ah>u$$6(hlVxg<NHxuCn+^oe70<BX&SnS>uj&xb^#gIB!+@f0?A|b
zG42}$HMeB;xS!JD)hBCD0YPA578z8vYA9~)l)Ibnn#(*{kJ9~=4OE@9?J-ZDJhD!7
zL*AF(P!$mxMf;4E?a|B`Eh#b~ZtCa1Zzyh_BCoIhm_#2v2W;zlRo3Z|H&a)3_MMcu
zbJ86|#*FS~?Fz@LGP%p~5!Umss(To-L=98yn{;C2*(vHJcWVOplcZY*ps{H5JTp8$
zS^9`}PN1cgkmktwLNW!?2dX*-(fKh+Ef%7ij-s#EX&M?)-&L<v)NH0Jw3(XcU!N6~
z=9Y4?0Hg?o7TiZlFD(?@ykASIQdlsC^*msT1gWlHHfn3Wfg!2#)nyVbB-E=_Tun6p
z$trcrSacDMk4RLsuKVWa_D$rNnrJo|b~H|V_%W0CfU2cWRZnFbp6Y`OPFRO(9iFU2
zwO$6N<K|A+LEk5izFmEEPFsw9;X$9_!0s(^N6gK1e50^8>Gl<HB<!tQ%*rF?Yes$-
z@SA%Ou(MuXE%3t=;7lMw&w-MpRh6=3pVDmNiAb$7pLwvDhGC5^a~zRTNL$D*rl%0@
ztnJ6h^y|iOa=Fpa;FI9-l!==@+pX;9^<r+a(f!rDWmv#E?e7wCsgk`Tb-8_r3sxU<
zn&0JyMhaKL8PAG3zZj@P&xFUl@WN?-KwU7DO1F%^l*nq2#jOQ=D^V01E$mbnkl~{;
zjwCr`Z2q$Tl8?6WQivwwc$%dglF(Jknkb;6h%C1i=bQLtgejy9I*CR(t)LgFYf&N`
zdP0ky*6IS6_Euvt*P`x{BrWKm(n=ax3j)^V_e>_82&K}<X{)jWrr-ddR1>RhQ8u@-
z`9bUM^4k3O6s=)<&Pkluwc{gu)zy|t?o<vniOIuAQ_Op;yVGH*!k8%bJvke0WdcxJ
z!%~eg#^`%;KHSPU7!RTn@~b753O|!FT#kqTHsidXpH+DWYDF_4#@Q}PhEjaT3OS`(
zYY>f2n`@Lsj#C#y*}BD34)%|AaB}HXWrRiEYx{8s?^(fD8#3I)E&qur&-5JV_Em>5
z>nXj3hUn500ukRVRWb{VNG+2k#WKX2;y4GFF;GU~a4M7LX4<O?wm6Vq;2<AlPUp@f
zxKK=AiE?s(I-a$f0D+3(#2{SsOPfsur|HRiq_A3VWPdO!S^$o4NutthN-dsKmxju}
zoW$=J*TR#KiTNNL4O!nIFyJMiAND1#C`M3bY_{l`>~<D{RJubLjK=D5>@0yerOdS=
zJ~8>bB8}W3gRPb%+Ot(hP%`@XnMwg_JtI|WN%#v~T-$UjyBl;qZ9sz5j7KVZ0qS5|
z*}?Vcc@;@!Xqs}Cc~d8)BVpl`@*I`S#HCxy5$|{h#&}0*yCV~zZVW8@*d%6(oUEZy
z*)%X5)tO|B{$?Fn?Bb8?+{l<#=J;4%go4S6VsMp*W-RD6-9*pTl+x}hEnP~P6sh*P
zMJtx0g?>Ly_wXC_>b0Yml(TV*M~ZoX7VqMi*RDt^&)t@mB3JwY$7ELLu8Ys0Fk<DU
z$!ICRE%~97x?yl!YkIM4m->01?&4mbZee9VyaAVKc2UMI#^z7m1~$(YB(vM=O80Wh
zjBn^E-C*ZT=W4o@17+>)GV*1(9mAFnWywD?Ph-#*9e~M6Ql`>TqS$HtsGb%D>e3N1
za}lBFHR0y6!oM^uRZCA-B0`h$FzaBcwiIjbbX%X}6@~9eC6f^^cGBJRYo!yrffQQX
zF%|iALrP+nVgvHpL-LXb?xnH{Nh+#aJ@+CT`82;sh!+2dA_DA0p&=4yNA!$U$5Wt-
zK5;V4OR{l=#}}*yW{|L`ElBbNdz2aD-d^&C*hbq%H8_eE+Na1iJS~MaY8U17M=~)E
zXa~=TTFsvw41-JBKW`SVNC;K4mte}dS>vSgbd#EQX<L009-!w)RhS+3pq?|2qRX*<
z5N@2&z%f#@VK3bQTtl>6>2Je6E<eFvJLt;Z5QL4SQqLZGC464_Hpo2*AvFcg)@P4{
zs?+*88n||~DpXUqv$FlR{ii+0MKAA`&utyt&5)7xra%IqqMH`S-WooBXPZ`jy5sZX
z*KX0>f#Or+C)&DR-vWD_T5KmBGD9|WLQUt<YfH#S{IQ*z=^-<P&pB%3L9xi$jg@N4
z@Ck!{a>odU`|(mqR3aIENkwus<2T3sH1(l?MBar1pdtnG%;^#a&3$g3L5ujxscZto
z=-dr!({!tMsdwOqr-yc79PI*SysDRWx+NqUEQ@hMsxmpvP(d-f;TDtKglT+g@BU%+
zX#{1BY=F8WjYSWaoQZPeh=ncK%Gh&nVHxosfE3tAnI#xj`*Ypz{$h5vK?-gGmUELV
zs>-rlw#lM$x@2Gup|1n&C@Ez&*b!=Ubqrh)Lvuc#f@Q=;KyFY%9_;8WCAMZeGv}(=
zx~?9D>7Zq*=3JIq$&1@;s%!~k?`hWXfI1gWX$v%+PhUAT1vX|%@nx}<cG4{-4la8p
zjl?ssY499jwc5$xJ-WmuJRbuf?>b2?e_(n7!0B5IZzD5$895A2gN=b#N$xpY)Se_Y
zi>u2HQp9l*ik?rB_B`A#$9Nne^C~P(>NlA5mEIeMlnVp)dzSh#u^5?_#Z9{86R2lB
zs8V)%<-=M=*-WJRE)CIP9EhuDXUa@Xx@lC*r$f;#$lJI4fxnQpU^;T03wSCg?dq2f
z5miFh!6zop0jy;?sqKlGDx0X^)E2|8=!9joUSyTwa*aNvt#jxSGxeTjDLr$j2*+40
zd4zi^S(YCUa=XQ&Of(i#GjNs(9aPg)2wAb4QH?Isq|ao`{c?mURf{dvc(tVVoGu)Q
z2~C^TzFe5w6Dmq>FGE5f$zeRTnqQ+)A--8L;5WN~>3K1+i8xkf`k9WZs+B|6+u@Rk
zl}W>N*Xva9X~POs`bg~i8rx91?iJSbYD9woe#mO(l2W}8ju+mdjL8;nt-5pIjAQlr
z@Fs=SJ$&P6kf1*TvAS4W@IJddx8UhIpv2nU1*UbII5mAwPiy;<{mPg<g#M|wk@~Io
zB<Q<~Y;-OAIFA^K6T!9)<!<_AS>f$OJf`%u7AB6AE43S`hKIlMT-M4%c+a9eq8n7~
zm1jYriwetE6*bug!ezOReM8rgtzrvLPMKdiqy!cwnBQ+VoLN&uRQbo@jz*vDqRYK+
zIYgCB##!7BF-KdkG~}pNlW99dmb51AW6IDo7OMCY6BQ1iVW}F3OOWV3?k^Y1KPy=^
z=67w4ai11V;6_bff+DKpgrG~IP^V27;kvC!4sP+ZPQlI}x)iOZ%_~T>l!(N{Kkyxi
zpWl)vXm#8IZx9~rHIG*r>2%nR$_?Iqn%|u}mU0-aZV<$<B%_iC?1<GmTr2p$AUn17
znzu~$-;cRGxg=Yh*wwc)ab2WX-E7dSi22ly-+XM`@oZb(uBiUPuwBxrE1%O*QJq+C
z5PcZCYq6}oRgYP_tjfOeZ4obut{p6wH}NHMTdo-MBb%RjINkJoLeB11XG|?}X!&X;
zJKAsm0#B)?sR}beBZkiky%Aw%QnD}Q(QKhm_NTC8%`u#Q0Z9DC?M_Y3Yl&b2PS-v;
zNB!tV@**U4MzR;08Mk7BNSRG~pHV+fT>45%?5Vcf^Gf;JF(EOW^q8|l!0k&V#?~kx
zG~dF4WbpDqC!{VfB~uDNk-CI}PBo$Fl(>{&q~O-JAmTw?r5cS-m#OfyrLgontzUDt
z!$f}N*?S3T-Ed%6q^z3D##hX>?#YZM)~OET@Y>z*y3jViMOkO@K@%j>8z-v}MFAj}
z=AY4#24w*k+m!v_`4*ko6IwTYn{#fX3H6cZ-eTLN(|I+;)-<K4%Lu}X2_-G%a)7+W
zf(3cSlpR;b0AtWjEl(>_3Wgqp<e6~1jclU8z3GCnIu_(<2~b!}-i7;B$WX<5mPmpz
zE^x(!Jv&1-_nf=QH$zPf!Y4ywI>2IwBK9p`4~ROQ_+o3yZTb}T34#KwR{XM{)}c+l
zq%N`4Z9J-s^o+y{Hu`&YlRKu0biIoHPcCBRXW{3v^%DwX<Rqwkj=roG>~hkJXMo2=
z>1B{5%6kL_j?}4Ke4nH94`p~UkF&3KoEZal8%id1&R%4DsM+aXt_wG#DGhv$D=U>L
zX~oARbrufR!{K2pTZ*le2_!lgCY02&JuCa)OO`*4)_(6Cli8--tH_@y7}Y-I7N_D6
zQ@0FE;_RI-ujX-1*kE9LTspyi)l*}0k1l$Ht8tfNs|Mq#1`}ac(B6u>3FN5e*2-?!
z0#8lpog8sdD|jjtZ6Y+OPx4FB7xT!Wj4-d5G(+bMWmQmkE|#kaF^==XdqA1I*!Bss
zBy8t?W3NqYb9*3~yx4yW9e~kG052duXA6Fe*=G>B-a3hnj7W1FZRK*bwMqP?^1h)h
zl;Gq!rCzv`;{v+HoD(v}S@Txg`P5nzY+s%w7L55Z9tAG?DdKOIUF>51JU5*gz}kh4
zbsE>cCb6kKsb8%5Bt}%WX=Rbs*5yPTCu|>i2Pe*#rs1;lBwk%?#A-qx`ROnnEgea;
z$W=)x-2)om$;%^B`KRvNjP<U#MgdQ5i%!2<vA#JJO&$p%@Jck1+L1-O8f{XSy9bcp
zuuOu}x9A!?fSd6}CG_kz#02FC{1%))WQ&ur$cz#N3h*hrsC~Z9ujh~-e5%aSvKS1D
zw`180iR{Ow+ML%38~UA&M&{E_8ut4oyksiBidCjVhJ-N$hp;m!abq&ov5Ghx-ng_T
z6}ed(=SOQ_VRP{z9t!MEd)c=V$#N$EI+)abyGe3u#Fdq7Z>ELsU=`3$l?5BjJKz6@
zY;KY*8=Ww5ClPZ(?B~&d6^_}Z<_4jO+(r$|ROd1T16Rv^PJc4Vh>y(G$Er`FdKe_7
zsYDc)6^XkFN6C+Mc`UN+1xIp`H&*f&oURZ!ubMC+_)-|#%z;D;#MIrf$4@uPkn}kp
zCjs1?oJIzzuwB8dWJ3m83jwF2U~%Im01OyC6QN>CqolkC+KdCZ?>f{?T55s{yhVvI
zD(xCt%12x#&e^LIWAWV6CEC!*PCuoi4S{Rav@2k7xZpcW1rafcK~gp|q_9Dxx{2xx
z@x-b|M`}&hG*GTJIE8sGkvwdj-{wqI?hoC#451JBeAKKTbPqjUahlaAD9QzinG(N?
z8NvhLoGBRNOwjPvO~K>a{0GF}8>WYy`n9ww3x;&jR*s&(`A!MZrd1iWNiIHe`^%Qx
z2h>Qi|DJRiiqPw4X3=3%U*L!e9~G(*l}IS%frF;$_GsWMP?QnO(0Y+0U{*SMi)d&G
zVx3Z)eJgdTPmGvU?0A#ru+to%D{^DIwdm4v(bCG0rkz7~f=FDwrrZe8Qf=jrUlyl^
zQJCS#@0X9I6}8emmOR^9;0#y|aE0((%uuf|Ybi0X(3PgqfMt&02{hipmg!MSlF-I1
ztrE+ny>Lo|gm=#LYOtmiO>z2($`=ly{<f^F0+Y22ml^gmVJ)<jsJ#Zey%ay(_)~57
zA$;tzbqcm1PF#f%1jJ7SaBVg+#qH#C`jh_yWk8z0oTSlkKoBTO7&a{gL(z#jfPzEB
zSQUsW=HhI;e60^8nKKEgXu62ORIg_?A4he-Qy4)gQ0|Q^Ks8B|G60o}k~{?K;gLGv
zDa9kv4qOtT^q?)7;P*V2em?uu4O=PgQ#Wws%#U0~Tr>DZ9WrTryBJ>t@Q)Yz_(I4)
zqC&A1*Zj*}@@z>!vtG}Z1VH!6v!y<qyYAU?rlY1HDA$je4+PmU#$Fh^>)DQRDRnVe
z)>R5I{eCHRPBT`?i}SD-9f^=>Z=AIeI0;loe4F6bFqSc+hi9SMX($UhNd_?<b3&$_
zM4IA(Bq1ipcNCK;XDpL=Rc=GTtqM9yK!<WCD1}t{dOxU=!S3pf6r@XrfRwyQMMxQ<
zt7py3BQ09`PHmp8Ayjm;X7;HdEn_D)*P0M2gqf@tE%gYG{N}YvbpgCzrQpp!My0Sj
zu2f$DGT|YwnI*)9B{W@8#^!9su9cgMgsfT9C=}*VJ?A2Vq*@Vjd`$zXaT?QCsb+T}
zdVXCW8mgT)1c*?xvV`(^&Ck9HPV7tR1*OQSi>>$*5?27O2;K5a%RGBiC`2l-l@OY8
ztIYBioKSM=8GBMv$}UMOu#*Jx;w{{~6?vdn#F0*7lu(4;L^(`iAyEsPNoayu8!|<2
zve09*`R7>Di_fb7PJC80$GN1RXbo5ks3t^P$PuY{3#(UxI9WGs!4T`e6cg4Fo+Igg
z`>U~SJ#j?Rp21mv(p0fNTqzf)tCxxWNmXdGzT2#2uRR)Eav(+3#jO9eS+R|x3}0P`
z^1kBm#?3ntJXbmOKrU9RMdo>$st*k2_gMMC%KR?tJqZ3)8Gy=9<pLXH$clpdQsQz|
zqV-vwIVl&9NkjEjjlWt4#>}9lLOkiv6!#m~Mzji}M$_$*lE%`y$XsQtsx&f`tr^$w
zR3hf{-OQpqY)MWRMY5_vBH=(5r?$%nW~>U#ZDmN!AIjG{hEh|)%ly$GzdGS)%t;!1
zPNejGc2mM7Pr81+Fq$gc`je8Y$aU#T*<nT4s@J%VpTjGX3N+oa!5~5<B5I{)I8EW6
zrF6?h7I2{zQW*3~d_?L<zaNsLPLg{pIm?C9OytO&+Rl|TDKg)P_onBQxbsLL3Kc_=
zvgw=;%D8e+r1tOg0sC4vU`i;9ho3HQ`b~!X{-+yZ6e=mp^cH-)5jVUoYB>D#HKmJf
zaTn3YucFOLZ?+73n}&VNBB!()dC(%W#S!f<lBR;rrcu8$#_{S#pIyduam1jxnTT-B
z*+LfsDhAKZ!;s%118sIX58BbzmfwxG-P!JRdEpw3#_oi<JZRNp63VaoS^jLyIN$Tw
z{PTS;Kh0{ESS<OkU$S?bFW80AGVT5hv9PatahY26l<M~r_4`UUD)bV;8~8}2g?_Sy
zzL&j{?DZlu3BDN2m;NNDYM&|HXWQxbvahol_4{mTrX9&k73QRzCb4?BFx)GBnVGOn
zEHH9LkJ-zqPD}L#aCwf|R|l)yR+!T9N3~2ia>T$vU)M#9^kJ)wTYBKuNOsh2aZ=MV
z&V^ejYgW=kuYd()@AAlsDv>~(djqguHU+1AuJjujq_yt=uH*-MHx-q8^A}W9bkKh_
zMTMh&P*eqEcV&$xqo(N00+{Eu8NDfM)?UJn#Yajl^ph<pnX=dYBn*8u82U*V`u+Nh
ze@|a6d#V-siWR=7_VA@&!G&dV47~0oA1fZ&qL3>*U)mX>AmM*yB}O0$QZ4k`8KNN5
zLO&gbz8nlFF`OtotbM&I?)%HUmsL<n<z7Gu*__-%cT5<qeZg%GFWJlPR#I|-gAHp)
zGg%l%E@rFfeP8Ox1i@Ok^GZA>%(t8i`-mTf{l)}!60b>iZFHqD-?+BWagv*dpxBHs
zv7Os6X+mG8*oa}?%j@N^j)GAV)e)K@F$T|Uf@6AWW^iBoxyMw#@HM{l3*E;nNA&w`
z2IR0$4$BFxUXFcaC-NNjQPG+5Gjpbz<W!=J%8RQ>*}1h)%IHG#Vg=qI<)r9o$@;kS
zLQ=37b@rryY{u9m>ByNBb>viqq$)Cw%i*@D??!o~Qv4v}ca!qZOse+JDCd(`qHM;+
z&#Lm`eotGHTD0bpG=P;CS=p;9jaf3uTED5=g*iKfZ0Y!RVfGFoyA_$(F3igwR!nuc
zw`25XJMOdsu80C|xNN_hgkwR#;l8z-?=j<CWE$uHQVHQxqV)Z?bPG6pv%R}o0e@9h
zzFK)3H%k+;T2I!(Egm>gyM60cVBc1>>87b|hq%S9+tj_w9OKq4YTVwdZArJdX<OAk
zWWG8sVA#4<8@9FXG^TGYD$2>%@5{5R624k^zd_v>oR(sw_>Myvw}`-d%zKLSe93X|
zG4CnP@g*m`$5AwE#c_9ED~r2JcX$+vXZc!D;N3m*ihQjs^sX`|iCT9BV_Lpe7JMf`
z)m!C_qM<H|u9&aY2j$i4hAKVWJF3Sjip#ridaSa@ynCj{Dl?HE^cY3xoru{hvSFFp
zMB%8tB0J=%O_bj8R9#ERhwe#iyQkOqqTI)qe%YMN^1S?Ah3~OaQ80e5NT5iDR1}Qh
zW2Hhigij*0<Y7@=M3AzTP)(71>_N+&*g;EoW-Pu$X)*+P@70eB$B)7DhK5|~-pcR4
zkiGyT^@^EW!c$Hv;rv@K<Z}F%y!JxoN}P2gP{GPn%o1M7Z5JhzC6~R?=MGatHCcHz
zw_KD>-&`_N7v3K3spKuQM(u*v4rXZ25?N{r4WQ_?g`y!;kgcZ37}l08xZE{53YXZ|
zfq{yW_?En~Mnx8@zTPfN`s9SG@+<B##Ys+Bs=eh-`kFgMou0$*72j<qf4N;%!=N0y
zedjoK`wnvK_V>WC+xMPhz2#ah(7BR9Dt)Ju6gs`O#1$4JsisD%>()Ds=F*oprW=7-
zS6)b?S}_aroyPUuUGYlm{@U^@NZ7^qiMDrZ{alp1Rc^*fDN4nKk0goY6q4K!YhL}<
zg@Sq)!wGfJkkak-FQwOn&WcvnvOIXJ3v+_fx)$ac(X^GBy%V_+6kZl(9dxT~5M#MP
z)Uu8$Ionsa3@e0#s+M78=fo<PakqDEc1(FTYFp1$x137|C)fSrMsBxvaU;GedvZUh
zQF_@|<*F}*B0;UPBpKfilGJ!>aw$`*oRKK2CPhLhMcuFWlDgGi23^kZTa(o)6zWtG
zvk^1Dbbg6vsC0_l5SM<7QMUZVlIok*__}P~PeEX>Q|Qh)<uRpP9iT9%?7M^-Kbq5t
zG1Z2{@p8U$@wf4<@uOi)%&E5Zn0>B2-fFOKC@u!WpNH``AO7ck?Cswke(v>;Va^O|
zlm7ecx$)xV&iL7Q@?yF*z(-rIGu~R5?s$uGm@!=F)@*KCv(ANlA6@u)wzEV2!cY8f
z_xX1JZ@umA)9vm~uea0xTesia>23c_>#hR<E7#K<O)K3_d~o;V2egERYQoolKMxMx
z4o-(h$A|Afy&b%5{0Q$%%Se0Oh*f|0{^ZTUdwi<VijyYyX87^o{CseFJbZuH(*E<G
zu+YKVx5HMvGg_rNA6xpuXn#69KKL+bffgj+fBbO>ifNwP(I>v6Pai)HPGx(dAEdhZ
zz;|<ea`^MX@w<WAn6M|;D1Zjk&%vMYZ)NfiQ^d@)+^qx0(Qgi~OxJj0TDo&{vUKIN
zK(_pE_n!84V)EaA`t13G{IBDuvGw?|_Pb+vE5~v*-O`NfrDM3RX<M2-#jtlUUySM0
z9GiyaX-Iu->DZIi*u(d~kG03%g^4vM+V8q%Lj?!Sm&f?4{qE}82i<eb>o>Y<?Cy@S
z`bSzD3v4w!18f=HGxofjrGX5sonQ-Z4adARCi{)S(YwL%@a^EAM~8#qPoItt{U>MV
zrzgknKoPw^Iv)527JYO0zei_-;hUr5gVSG!r-SoPAf4yGejGIYHrs3mZGR}OhP^Z#
z9abZcn_Od`PEQBN=g{5B>A^c__5j*Ge1C9uw#UJ#Jt6~dQAis9hQU#kmIA6dp%0p^
zHbrbFd;kXCw?+}#eOcKa`Bsd0|M06<Pd!et?13d~IF>%&-F3(MTz7^6+(Uc0cSaty
zg`Mv1(zpA8_aZ>5|CI+Yp!!zDRKJbERLd=!gj*J-mCq=HYaJIPyV~!tc$u#z1|9wo
zDtE@qB`I^V@|G(Pid-7bC|S&2c^uy@^y_Ae@)#w_hXYt~a8h*gu?aQX@Y||vXp}r+
zQAmJP30ngc3=^bKJ7qbTw$LP99<S!}ls-u*o#j60@#9=Kt&`E8#@O4}_JhSE9oNT$
z)3d?b;hSHFZ%=;t6`nKzY~NUE5(4-(hc896(RAq`b($fMGhOfQ9{RvF9s9~fRvz1P
zH)QD&oUC7$;AH(;ij$#Of|E^4hLiQ{HvG0K8(JnO$3d!wllAL+$;tY4$jL|mRZfOF
zYv$yoePuWt!9AOloB<G)rlXZkUhCMJZ9zr16EeU#R+z4=v4?%2D{18!Zbna?T<IV^
z4oPA9Sw>cpm3+y`loKf5hTm4S@JypC)5zK3!TW>LsFmT#@%hh#)1!AkpGOT+9#A5O
z%MroRUDKEZv<NuUrxrttNEE%L?&u5cTr(;DeVIi|AV$BAcI0UbSgB|ugV0R(OuW4F
z%JeQoRU>YQ22h3tpo$UIV2wg}-)w0xl|pUO_{wru%cbpjfQto~u>d74|C3+c<$GL^
zm$Iw|#blTZi^UZ&S-nO;hhIS;-7hQMF^Gt+M&vJ`4gbvTS)eJCrtbNELrO71Z;WC*
z-){%XFi;SC=b)z$1HRHZe-sklS2?f3MLqiC&rjZ-z;hTatkBVZHLV%oaqQTL+tqx6
zT(Yn)DK~x~FFF@wTA;53WChhJcTeo`Y5{$c2cZ)pA&iG7A3mZeo_uU;`1#=U0L<i;
z_IuuH07@jL8?EqkGhQqitZJF&b6bn5V?FK19~sc4jB5ZM#$0}p0PXx6eq|R5YaV6h
z5(2>f21EuBQ12=MUIxGlYyj<WvE)0$Z>NL;=P(`|TK(fHJ&+iJU|gpX15=W@?B-Y@
zpb$RN+<>t;w#FH8a~}#TnnteAXF|BA4$MoF4De5jz#qfvBW=>+$C5LINhcV@q!SIJ
z107B1SQ<IH^fb_>_yPXCeWjYCX2e|o<Bz8Trsw=xJf?41r><#`&6pkqHsMvOkb=KA
zOlyuT!!!12dC623$PVF8V&jGY`;y4PSO~;i;hWIz3&~(0euY+9zwc>*a}diEa2m{^
z2N`3Pl#5%d`h1Rq(5)LX7yFRxvtRaJr#cn_8!ar_*504D3gY2)6^IxbU}Mi^(czbt
zM9OOpEAmKr4Okfz#L9kIq|6)xw7MgMk)UV|=s0B&vd>g2!7%%;E5eOD&+rv$kR={Z
zui1*lndr4NqJF>99Sc`^qzKfR+Rno7Mjw0BCE%E@ZMC(l3v+ydE(N-}o6m1FT^s43
zttW;~Jq9Ei3Yyzo+8E7@Ga>a5X-5F90}~W39LE>N__GE*X#}mNcLVE6@5(U1yt}fs
zOHjeK<3f45h9AR%Uer44FoEgR1JFoUEO=Ah!M1^f6iVoQ1qgx{tFK>gnBi%<i@T4P
zPzqMLH_+C>624zITZlN)uIYYQjW5m?(6r&4U+7k|mG4&|Bb4Viws<zTMF6%%kV&el
z*T7|ie-z`gA0D{uu5g)Ud-6OM6<!C2{Qw+xOjxY(<BD;tCaYw>M@31TRK7=2pG-{Z
zPb%)O4S{^pc2^5IS8eE)CCp;zz^sHt(D&sS=;orN<$Sdu5fp3{A1!@St%o+%{W)Jr
znUthloNYoZat6X?c?TyDMaIDy_8+-9X#=5?Gl!(aAMV^cii%O|doz?Ud>(GAF_>+K
zi{exS2dk_zc&X0}^m^lpjJTPdR^(i+LPj^+(A3m$3~OwZ6xE4wk41H1T5_^V3$9IA
zBkHKg>}Elk<!I(_^}ypxeZDdxiijy4+h?QFf4}9JN4}&uZx$8jTzS^C++EW5gP+br
z+1_dgflGm8YZ}m>Z0Z9=Mu3T_b_ggYdso!_oS2s;%MDRrf{Uc4yVb^=P(L8F-l0tf
z4f~|_H{(<3eO;W?J{S@2HbE=Je1a_>sxA1F#R*!>FD4%fmdy~*a6DWMS)!f73_8OJ
z1(pr}U+$^_$P-^PgjH~Z8BKbwpOJ0PHQSosbo|O`x)Tc;x;4DEeg8<N0%c1RHA(?v
zl<1>J#32Y=Hug}17K&U{Au0nB&{ZiC@&BtT5;52WI5<B#972V`>3OpmL29}~7lyru
zpayKWyGs@Gqeogw!R#nSzkVw%%;)^)qqE_M!THZ8Z-?;d<oNxs!=Fx0hl5`}o(|5=
zj!ur@;ZMUiCnxWLd1d|i>d0ocImLmJTiWr-`2cj6tAqZSXe-Mz=h_vl6=#kCssk0q
zjBJ+Eg)BMn(4}Gq4BP9e$Mno^X8K~Ghfx+C#vE80V^g08Z`>h`s|X8#(hGXS#z8_Y
zZ4)#e85O9&%TU=(Owvhn>6$IgKw{}Q<?3E&gmseMfHwfI?ikb6JW`}|`R=z$w6g(J
zad$xnheK&vEz|?Bh6O0*h$~odGjH41Ezaa22M#beBPfw->-cm$-d7UJuwib4mE)G<
z`0h<3f<{s-L)f%OmU>`t5g3~mySqepUU`NtF%5h*yefF|)HD{lW%13CPOt1xmfS>i
z=9k^5M`{Dm*S-mWQ8F%6Bo3co-rv`HY=tkOy>MYp<0ZRM^Oo%yvB)StkjW);7Iiog
z!05S4)|hZR+PNK+r}HECx=kQhx$yWxr;aI<1?pnbOq>M6$Qxn=A)cD&?ei9yMs@3k
zGhNQHHVhz)w2H-cfiPa!6OG_e)90p6XJgVRL16m}(XR>FBPH_0?jLGmhi#^FbL=a$
zO<+(Hwts-IeNh)SMaY^l(Q0d7ViU)Adj2zc{_B&{@vofoUpqbcKYspeXZz{H`LDH{
z|KcN`KBWnZK62A#WnViOxlGuRY@5z3vrT7Kpv{rn&jGZr-m3e>@v>SygZJvKI(f~W
zx`o=sdw?>bh4<>Mnij0RTqCRDBU(hg?ZAe|O^=NB-_T;mHis0+KEPVYYI|_KREK+R
zXL9g+dse8;-ZH1vUNKmE-Mn_Y>g|s0OwDj;yG1xShk*<5ucv3Wd9D{|Q_mUv!MMoi
z^_<}!xJAaV=OEy~(~DvCA_uJ(h3J2g!vGhBIQ1fjRg1t%P7h?y4p^c9ysJ@WyNB?;
zz^r4htV!mqg9X&vg#{Avz@)bO6Ba0d1;d<YVgcS4n9W=(TO5wuWBs^DyQVeGZuiKV
z7HM~CTBesh{klJqGTh`?V9^gYls#X;dYHNcnLWDvvtT2sO)o4K-}2hiwF}3XzTTJt
zhc1W1Lj(&;f6%>)@bpmU!duLLBxlh*bk1Q@8~a!!T)4Y?TRI>IEx{t^963|A0j)Fz
zOG~a$v;0Bp<%N!jw`Oxid^<C_({{6D7zvK70D4%fya5dh(Zd$106kR4PD*lSUy?QZ
z74l|3l{uTt$+JRS+UMD{pTej8N+}VXOf&U)xG_}hXd%;5#sNce60d>-hHP*7;egjm
zsW3n&RuQK_Q({IT2Q2-Qh`=v+bQ|@4hUjw6Ic`<cpvX+-C}9g#z#{;x?wH=iq9lie
z?PTx=K}-ew5%rZH&u&ZUTr5@*cR*8O?jQt}Vgzm}+MrlVFalSoYDVa3B-dbar|BpN
zFr{S;&m6b0*mcJzZhR97-ZihE^jh?Nn>2D^Ul~AywjUiz>>v#Q9Fyn+x-SxO5IV_M
z;|pLA2WH*zc;)C&{OZE6NXPhoJhx#4(A!tTu{#w}v6mv{%t7OPI<kxZ25B^fmc44l
zdISQu+3Qv&-ih|4M6cJC6h0_eo%r>-RS>QlBV0F)aNQcLC(W^KuO!KsmNIlQdMzta
z%4~PJyn(&Sum&kt5ocgqne%p@fyc23??rC-XiP8&U#tTD@IAW~@`<3unh970-!b=k
z!%4$<tY8%)Y0j)wSp`ZPkw7ezlR_+(s{*n7215he6*4kcERNM8Xa&O<p(uw|+DL|e
zBj$Gn>do&cmwr%AhJ0A8BE1GpiPf6F0MX@FAe{fdK~b07skhx~8KD9;$YBD4i7Mte
zDHd=|NeQ+po7zlE%mRdjzyfYo8nKu<;I%=DGO#Muu%2!S7$6P5P+A7RSga!aLQ|Re
zl}9hPbJg&4A$qw&6`)sl#)>G`C2_1*h-5t#%LGjU7b__=exIzNry!g!RuRTi=eJa(
zOU@m7g_DP#I(w+A4t@i*p{L9re6fm<t%_<%9j#ZWr1exSU4XB-UFC+JqNn*{6`=Y8
znsG+<+#IXx><dYyeNmy-zQ|N4xO45hXfNdT_XV%OFH$u4MO}z@O%=rB7$vQdiwez=
zi%d?ZXj4KwA4Slhq!U^uwILp$RX{wBO>!l0QQ=aca*pQ%EgH0@a8+<oi=#sx<9-bp
zqz6UQj&g3XCpjNWVk!|)*YLX$;VV(NI^Us!av&At{5Yg!Wv)1EZIBfU5astIP7O()
z93Q*W2pl0IzZ_q*0_-EaD*6G6OX&wJSCxK%wo(*xm{wID-F!$>gVIZ&8MtB<Aesb4
zDPWqEmq9fxSP`yCV=1P!VUnyMBnnH`|H%h>HAwS9|0iFt0(=uN74S{U%ix<9tO(zv
zv21+%IFrJ6EC8<I(<+JYSive334p7jNT9fsBEfQ1DH3Q)jB6iHYRKlp0vnWG0!_jd
ztHQ>>z(N%Z1LF*(><dybj_e>Vkf#4Jt*NYG!G||CNb5qQf-hJJh5$?@3>{fhISgUJ
zB!(<%s79!g#1I##*&wZpF~k+D1VerRg9?&p?57-_{IVISQsJa{)M+E=K#Z+;@d^<3
zLo!=IN?J4d6a_g5XO67M;G8X<wnF&fwIs?#=U~DN0zWjXNdr*zF@6!y$*rC(ZF6%|
zJEO;oqn#6fLj~T+9`~8&oTJ{?-jfpQq1gT@`mCnkng!c7oXp_yfio@C<~XO(aWZ7Q
zP#%N1)%*`VE>9B%|EHyJW;`|MhqNda^+QW?@>T9&H*WBapF9Cf0I){hF>lf%C>7G=
zmfsw-VA*Ud{Auu<li+kn&BRG?dWbydG<cbqC(^Ij9t+Y)NCrN|M4{miNfi1ND_X(K
z;KQn6&Wm;|B?bjVg+*&l6F`Y7&l~|E*3KP%$71&IlL>@e(iWulXa|ff$3qEy+6Y@u
za#5nNkc|rIw|Dx=bv46rY>Hwwl|?*%N>OzO>RKxJ+?jq?iVnuJem;gyQv$1DPLvI?
zXXoSw4nq`T?@a69$3r_rq;pe0UydaibNsz==J-}vbDYYX@~k=Ld2^h?oa0pP{M*CN
z_xz)f|K-ix9)JEoatq03ev!!k((CoQ+uO1HFTMUw_aXnwI)3mzIii_O)RQhIb0fju
zg#2wy*XHKz!n-o?U+wV8lQ8uqycjt!>)?_dH6yAWO!z&Y8SuOVkMYf+y}WTq-R5CS
z>vy{^Hv8RP7Yr$5uDx3smSv3e)kNEy!Gq2$eDL3<<(PVB>g)?!c}81#V_3H7Y5%W2
zwny6DpZKRUBF{R#|DG*$bYk0!eIFJw&NF-EjB(-wUH_BqEO5;3id-lY+}#Mv=Y<<g
zxgYGW=6tSW{f{u0m}Bj|IW{bqdoMd$ztin#M^nJk7$6L*e0;B6!K?z)9;OiV9CNgS
zSpl2y6X^N)Zfn2VD{YMK;*qCg2nJ|wm<TUCZ@Ig*wV+*hY-hFwZEittTb=x@uF%gG
z&fU}tz*-SO-ctm3zFJr$&6n>&Ai}*D=Dv!Y2%e*xo*TT`-`zd6uiOI<Ee{yNxw(L0
z^xE1FM=rehp#Nz*m`Gv%%hHMb6Ta7qbP-FkRkkB&Ea**Q+>U*fG3Z_<IM7WT92OM#
zBs=C({g5FCok<LNgb?7zs|$N>_>^}mBmLVOOJCrDy*LB7q}h7dAgxo%3{21J_`!>$
zGLRGYT#8ZDa8p5_0n?O14a+(~azMO$<9J^KCAR_vddy;9`?ppJhaJbrOfNS4_9_hs
zGS)OmDr3P^s7kP03Q}3Rr6cvY41dVsGzE|hoK^!=DoPn_ritvnrXXv+phyS_Psgcw
zZA>)R{Awf#iAW<@AWAXB=ucQBm#U;PttTc?O6z18*<J9I5_J^f>!0Kjo)}usj2SMI
zMcWayX||RK%c2I@p`8Ibd2}UeFoP1n*!G(cV~%S+!dF%wIpa$7=O^CPfQir+%r@9U
z$B#Cl`hFAnDTU|2R`lIETg0~A3ew%;Vg?mGF+``P1JI$(LysxZ%8BAYcBZ&*8m84C
z50G2PYiqR_(L~ThiDgzo?uu0kW|Vk6Qa9;`ijH3mNzozDk^Lp!DXc2=PUY4{pH+xX
z;<Fmy-2kxOUCn6PYf2*!b%rYFKk_L?G(bBT%~;2)PtfScv;yQxDotN5=l(76I@KDK
z%JeK4y|~J{0Cw`+pa8tPHW#3MNaKEkc=^`C&`EY>STOjhym6BoTA0`dU3`pndcRcw
z-ojYe&JErlWsh~5&>6?;JCRNfhi~4W9RAPn;O$%7!aY7YA08ba4^Dw)j|cA~)VQ#I
znCq@f6-k?tt$75ClIu44x5YM_^i72-iK!SeE=&Q@cVL;|d&qa(1LUa|>&gn+W^b~N
znvNP}nu{9plfU~bMbnM0FVQftHi!|3510M7)sYH+W+s`cupkPbX1XbkkcOIq^6cd%
zU8|36kQFw_FQ%);IckK~=W68Zkf>6=5&=+85z<3aM_G%cR1efh<zm$?AYwd>K9`MI
z4jN0zP&c>|SlgsE+CeL01<?^f<HOPFs7a0Nz!bzndW`9tG@MeGkp%y0n?nUr2&z;p
z3%+d>&WJpM<%J@rol2zx4Y@Y_?k8s-&el6*%w%HZrt?~u?Vhuq_h<{W^6oCN3u7F}
ziI8+FY5aFsSgHt4G=db6siJ70orJ?kt|w$Ll9WBQ1|^Y`C63B6n3O-iBA9t(=X7e)
z8&k$~*}Cb$x>X>ULZOsyinip>kkTMID@)LZGB|8A5%+Pqvvd=;ZNa+4TVc@qPn&Yl
z)^!Y_BFh^y?*67|vlRH+xX*iOjtwFVI?dI<woBUNkYZYRr>#!6puw)6PpF}6Tb`p&
z%(2%Ym(!AM<ty8nfVBLs`(*ITlRH`QDdm@yg%_4nj0evEVE7!m?SheEpd~?;aG0G+
z^t8GWBiUXxdRs;{msP}4Hcn{;U6m2UsOy+HB!{Ld6t#l!RE+{pMV|UnV{T0E$4cWp
z09t&ZA*g-V))^6qrIs1pqCyq#zxcvJW+ZCsif#o<iEK_kCmYxP)_im$n?EDr6h!S3
zT|7Nw0U>{vn3f_mmYR*2X4c5^Ygs=qj8@6))GZ<{x4U3qQBd)8)0JN$<jf)uD};v?
z!o6N0lrcW!n!DTU16A9kumUb`yxi7;pqSB9PC=Jn0*q|WvlsVe!+}CuX1Si;!kt<U
zB)}}bBtQbuMWfFu<yYV-&=p)TD2!EM(NJvIlFJD>skv)QiUgvI&tb`?zN@80k|oQ}
zZXrV+#u)OjjO5+o;zNL>D(5#@d}WEO<m~paJb752+~4I%d80_7$veDEDQGj5Q|2`q
zRkyN`DP@dRpzNjbS!kJaKQ=okuq9j$sSVu0HBJ?~9&zsJmJq;-c0uF^S7J@mt!5c>
zzcxnVm@PZYeXrXi2@b4&3DYeoS+~AuscX|D13?nYl!r}}hgHgVzDlWbAEjn1r^YqP
zT6R<FF^X<+QL^q07AXWKmLc?f8G>QD<Uo<1)WEZsRKujMJ1m(KVRDBupr-OkjG#`A
z^e~b$Jo>2#KyEJZxM{1N)dF{9)(6ukzt!SXY)1u?+hkU-q*RD8413D5{Sybn6F{ce
z_sZ~=<S~ENKh9$^=~>@WZb>j<XH0|zCc$8Ovl5fwu|EOQ&)mdUWJ2?4Tx{+ND&LBr
z#m=WoEHt=N>wCv+2rR?q+H37tJZFjPS0<u+!exZi$dZThvxk>AxfC3!w0P5La6Pk#
z?*~+!NMhCQ@LUo>sx+O1MuAm`Y2&@BO)UbL6=#=x@XPbz!%S1*OcRm3*kr@hzu)ss
zlK0C@IhBcVP18@xXv@z)*#nnS0lMbs*31YcF?A~9a%q@k7`_I{Ppna98heZB<%lRO
z%*kYK$jxG@weIjF7D1Yl|3Ge1i)s~+g*XiD0u!6WI20$kydjn6@ye&UTE*o%m?9$q
z<z~2ucVy)|t@{0*AoFFY%!FByil}M2lpK5c=`#xr()P2z&qH?&q!)6<b3M(8<8-dJ
zlH)3=Nd|?q)>)9tP8fhHT*$E{lS^@G4go18IhpS&caqBnGi6FshPVQjOPS0>JXd(x
zCe_&YvvLc_U2NSJ*1j##D=;syj2r3ZINN8;Lc#{B6WY>~kHP?Rr<!78l@|CQ<auqA
zj6*{P6=xZ~*4>_FLReMjnJ}2qQs>=Rr)o1!5@Pi!CkOZ{Sn*-<seJMYDPL^%iK^fG
z=_errCFh??gt?}fNIcd`rlO2tt2$J2CnYYauD}tJlVKNxzI>|OxHnL4W=pMgw`aBp
z*rqsbs*h6H2`{Q1vWnk;ioYO&r(Iy~lOB`$lqfO0N~EEVd9kc){WYf5OkZLCw3=W2
z-cPIf;#G2<6$qKK_NZx^l^lB|^KL2{5_*O?*CCtpKJ91+3QKsp1HXt5c#t;I4tbD1
z*Oy*jG2?x)O!tBe_b${4^d9D?5Aiv{Z1I36dQ;+O;|5h#gl+%U#1XkfaYBQ>sUD`O
z?CEVM*y7x^nPww00nB5(glo|W-_e}D+wJZBX0P3Az0!U+tO*vnl~XOpQ4H{Lu~RLy
zg<~!hSR&cGOzd>B>FfmWD!J|V;<3@m%W33MlOHf$6iaHc(!~*$dGaOgHjcpdNjX^}
zYxD%oq#rI(GoK;_CaJ+96((<FF?WQ>j$C-8Cb~65VdO<l)sU9H(?c~8EfTh+;9$-1
zEgY;Vb(4!)b9Z14sb$8%TaQ(Ll&)2&Xcj7e?@DZL;{PNp<uvTqHYpV$gc4Eeo%HhP
zlYKim=nQkyy9lO-&?BH7qoqkMDvC4*dJ;&tE~>?@r1pTOrBwr#EwxU49Npm!(*R<O
z(FK{;{LmXo5fMJWm~q!r%qwF`$^f<~<4213p6(<{2Skq8Xgt(GX=qn|7Y@o64}N0E
zOwcP&W_k-NVGa?Q>FXk7ChXS9VX^QUSPp>nI71mqXJ;XC>D_o*)9e@>_<l)l!lR0!
zC9;BTFtmnXF$`%*SGb(!3`=7=HOD4);#-$~pV>~lu$C*Rmd|T`VP)eZUhft!kfR8`
zr3>VQrjn1Gri?c~dFdiooAOC4CYUyMc7AYvbT~XXI~$yy5C1thJU=-do*cjbwdr5V
z*op)p(Mw`7jJ#91BFX_M>EKr!CL_w~gB5p}K4N)2AQV^O20)(pS1-soBsCEZYdi>V
zi7OqGoPA;FlV+U)+m-jIN3awSQEnhLi)6Q;I0EVJpfnoRL`fEd@(+?&sU&*3Ij>a`
zk<T{w5iLQ4g{)vkpcN2IZd^4BX66v(HnDPz=ikRvWMw2ab!$CUWYu=q)3(=*xv{9s
zIL`&?e#fx+qb%mi3@UTFFfYN+vGf*Ig4v_p;VhFh+7nk7GFbq!O;j11d@oAxQ+)5d
zG@43jBqdm+jrvsWRE0@jqe?2JRKA~}SVG%^>V#BXzcOKs=XEO&#RWpyU>jvr+>V0i
z7E%r+7Z?w^p>Ev(Yo}tG0V}RwFR-@z+Kr0PidR^r5baQ#Zb+cxmW&4;Gk<&d`Tl<l
zb7okZ^xtRCjTbL>#?Qu+7t^f)KH74f@z!#_nwgfnb!aaZwsm-Ay2cyR(w&==rQ5lX
z=%Wii&vtgmU-*gr?e(^|cmCGf?mpe_?(}*){l9hl{pZ`y|E6`<fq)g-Mw(W-o%rDH
z$xj#|1ydesz$LE!LFp0lx4|x$<Dr}@gB*f8eDWlU<eZx$2bci#CkYBdwFijQ=Q9JI
z;~a)>4(;U)JwVqyY-#;&_r+$v+k2+H(~Y_IZe>`OG16BPZEpq-I<xS>f18$L>Yb^x
zFKp%6CN`zLF)Z8kwEtHh+aqo7PyEvvk!PLWf6o@6RXZ@7@B6Tjah};LXN;;5=ER?1
zHYX8UtI?levY<DRq@gsNg&SBtAMCH@e6C~tkE_w#9Bc2*F~-MhmmRI&>B5wc-kmW(
z7*_fCUc1t%>u+MZFz=359to1wu1xQOIFksz+uE=8N*n7i%Zy+OfIl#EfLU_kdCT3c
ztp)A6V>`1gXmbmC+v+qLaf<2M2xfJE(x$RXOcH1i9g$-qwcOS!N^lqw=VFtOALnfM
zPt57ma10C<h9`lmDhRM9CJ*kS)17qUvjw!=!04`Yiy#(o44BXX%epo*(Vy#%^*J!i
z)Ug-n3grwp+&%;%A)vdv_R?^4Feo?CIuAPu?G#QSwY9MH>n6^L;U4;jgZ~)}5C8Y*
zY%qLtbbN67>+s}bGc4SW2H29phr3!7M1($>#8H4xWf1U02?0-35b&f8Bg!2)^a$z_
z=*57#Nw_I!2a$lB6|N7&m=Q<rlhod%gJ3j)O-7>014jiiu1(qs9{Es(Kr|6~0^|@y
zMMg#J>Q)HP&lve>y39KbL~n&0OT78m3HtDHeL?&3p>TI8F#<E2bUNfZC3ZO$658dW
z3TThZlNl{HMnu}$7z=)M?8$2E1uPI6u90!bA-Ww|jVDMc->MW@xPd`>dUE`3cy#>!
z=y(uZA;Lnq7w>?U@f!}l@|}%H;Q{bmlfI^X9ez4J9UPxSpC_jW?|^*|fQJv?gE73v
z)z=;oE(_Y`0eD3D@^$QSvn2|dDLD|<s3stFm<I@kA7${PIR&GC>JRhd!RguH?eNX7
z!?!2D{0dK4!96=Xcz<vzkle@ZB_(yM@uM-vt*%TF4mB3)9#{pBZm2%<m9akdg_u5w
z*wA0Xp9c~ds4ibH!x}IjdfEaohG#=?DGdTN0&((ZzC=ZQM3rF#XD5wk&-WW5PJ_jm
z;aK{7cb7Ik^f5BDmwRXAN&9@i9T1GHia;6fo$qV=q2xUFIqv+C524Q%uk!GzlLz+{
zoiO`?4xcDv^}J2-y3b;>9m-)UQUhr@gO~b@oH4{j=pzCx2*r;8j5%$T6Af4wpZ`2K
zJ$m=^c_<}Y?Jq0a!}&^4OeW@~IY|%}_j!$bFD3y>RODnURZarh<r_~4>tSTua|4S0
zJ|gBqIFILBSXa7Z(Fq!gg!RxmhCh7I*olTd%E*RF`qv1?hCVz)-S?`c&fdx5@wq#I
z=|BG=9u{M6WKGI>b1#2mS2lGSsDQvy^KSs`qeo@t-yH8%vl)SEx<fowvWKt+DrI+<
zO4p-DT1r77x^{Q@`mMBB{PUlW&W0Zb=Rcpk9l|Gcq78pKIUNpu`FJ`wJ3GQ~;gg?+
zZ%$6$gGNfR2`a!gV<g0E1m7=NN{&!^?ur=Mj*1l7PO1<AU*3`s*^a6Vfxth=P%#;j
zMs|(}J$QHP^8bEPasTG|fBVm$J&pT+pY{6>{@?G_|J!}};Qzf-{@=XC7PUnJea(d6
zhU*jP@Xyr@*rM4YlQKNU2E$n<()CbsVq(YoeD8R*ICpf@bN9k|=g0*$1IEO)c<Ny$
z?N4dbNC-=`uGx>xCi_{f`(+*piU5FQ#D_r_(T?h?d?Y!BJ`XPJymc%gF!dqm6k0EW
zn>w));p3mCag~A*$Cz?h$;O8SCXQjsBq#7xF=8(CnTT7m$%?fy$f_N|`yyZn0TAPn
zfRKjA5Fc?sB*$kjm$qwW%C>fFfM~);u6gR7&_H{lUQX9^xhG!(2-sy+?C9wy;)$Y1
zPnkWYpk_`8KOIeS5Dmu5?-%-aK&@oyCSwb0=xh!bN_3_;Hm<PPz6KN23MRip-SzM=
z>A$rs0fa(B+T{>m?*-PmlwQWlk*g_$AVI%CbO`0NZ?E#G5-x5-?-{Ic&lm?9_DoKD
zgn2HEEBZb!nG{!;+l{(1NJc4aI@lqi<HW>Boh*jHDu}Qk#3-1FMXAwPjPkKqQ93do
zjRJ;>HhU;>_kyxC_NRf6WZxhytk;ZtDsp9JO!abPCa`47MpjUxooIu{*PgCjfYN!r
zF>~zIayUFhg9owqLH90x3XPo$Z!!Oo?2vi-Xl@W=VPhZ5L>AHBmJUoAMa=E)>XSch
z;OAy5^OQ>5BxtZ!k?34@yh1i&agLm9qU%L~VVt}|<6R1~Sfd?P6LTTe{uxK$xmm5j
zSva9`;tRzUQ=EIUMv34M_kr3_v06J;S3F)ifIQvI&s_~}rVdBV7ZQ$o<d~pm50*<9
ztgfvHaH01t?%iXeUVk&KKQy|Zsr^JZq5(BadN?QCujZQL0&)F4P+zYQJ?rmIV4FNe
z4&;LXVgrbW0&uD}%*X@;TzbwBz5@zLFbD#VD^1*enCQ+VSZR`#lAm!7*J=y>a~KT(
z>$Mf(2MZ%~=j`ex9!yJzC)j`Y(J=&Ypn7&VZF)5}nD(7vt&nR%Ao0lG6<IDXY?wU2
z9B|E%IR~58aNFd$Nj6<zg$nE1g?<yP<nREQ2f9LiII80iWYXYdX%%eIw<)>Ba<mG}
zLsC+UHul@{6C1~*a*o_j+?M?(;g0=&V-<@s;7Z5h1U}`32}pI>6z>1$WtXc$&a$Fx
zKII45U^9G>4d3}4;m45Ea(Ob&kE-m5Jt=V`StGG^#q1c2fMZEoA;yvPVGcv)_A0}X
zpiC{l+MO{t#LMZx@QBt!!eZH`N9J&E-SQBxn*-Y;G4M?5qnQ7n8pPGdfA4JXJWs@b
zKYQ^1KluM2{Qu?s|9(DZ|J>WyEgK1bHojOnnjZ_MH507`(TF(_z}ad3m<ZPF(1El{
zQX#aY#=*$#7mXxH4~6wN{n=>i)4}<t)8pa!uOA1&p{+LCK@`Wy1@KlBMBq;kW#Gz)
zYAqbG^7=IwrXn4q0(I~#W)QJGrUyiI$8>GbS~M{W6ayUwjnB#QdZbS@b7JUJ2&Z$K
zgm!N((Xm5pUzbF*kRzN9j11Mm_`(=}ra>@xaMSc|U;zr+5Yy3I*?tZW7lUnxn2ilr
z*g7BCPg2m3u2}G<pgf^%G|&l@&^yf<GhKcCdczD4Pq6|?!mvcsJy^o`Yp?<Uaim?-
z{jeHeoWVkPZaC)`y2TzG$d|kUj#W+WXqAN2;j3fYBX3F4!4THA2*?^DdR%X^yGw{S
z^z4)!192cvG_Hzeup$8K5XT=L#PRNlV<13k5*gMK3gvA}B-lp7;!+u^l-dQeD4cZ&
z=fbo~8&w}nOO1`Nf|N0}(65WLi|m=A#GWGHgxOM5W+WJ0tvpm6ylY`7|Ik5F*{2fS
zwD?1YYbR=ON+I%fBuMLT-lmc)EFMO|28}j@ON;3Bfj;I-AY8vLFTwh?q69+)ISDqc
z+mK-Wx|9Ti+F7pz7lpSL363+=U6Eh~B&ADmNLo1whT>`xTm<96nW@rp{13k&HDL~G
z$vZg0lfeG>BNBLG+=8el&zmU-dNA_!N_#<&b7h?ic1!PmyjUjWsLh09!Rx_46a<U*
zN82G5G#4UQ-haa#GtwDt<UTAMt6n(9)^e2Qk-mrvK*0N2=Cu-x91FQ5=@p^H9%*ix
za0%OC1tec-yREiXP&OWp0#$bYhuQzD^88Qt>C@-E#QC4??T7uphyA~Y{lEPEKi^cG
zpoXLa7}=8$m}qa%z=1B8w&Mx51(P#77eVJINK6=|WS&0*o*`kj{=-f~Hke-@xmj`i
zhyMN?Yotgw+?Q#&uimu##qn~RK7&F9nl@)ve$!A$y=jvg{kg6Civ!*)lnC#RF%RJ1
zo&(eSi8A6RCGzii^XVgh*;`N{Ct55h*XALNh296;(M&t3k+THic(xHVmkbe37S0Qf
zmcYFWmX?G0Z7ZTdUzrGfSt|6~VqnGOC_#<BfE@jFdh}}`i00U~CrF!|PXkmYYBV4<
zWN3f|eHv^cd;-LO^shdo#j#{l>im*G4>rO+Q&7(ruLAX;ENP}bb*w{GMXX~DDXc@m
zHi8xl1(d=lr~VsZdubTuiYLy56+o#!lN7?KKf9=*HK-(mS-))C2U5h&VO~60j3=hV
z2Jc1_;yqC*f%?0?{yRE8J3n~;zKT9_)_?t8eEkRi>^yx~|E=Ta2wg6)%AmKSTl6p}
zn*C%^M^=H2$N4{v#^4&3g%)jRWK8jR3%PXy78{N1ir5DcB3LWAB(+d*CxhXTRgT++
zZ+IrgyYTaL!qV!3_z>+NU4O&x^TuCMq0bGLfv|pa4Eqw3LPBNN;F{GYHeI9{O9Ru6
zSy<a?;PMuolBCKFIpxV+e{Mi?H@L=vh0X`VyRau(4>SK0L?TqU%ERMGD;K{Ve$YP~
zGC1_{@hdX|y6DxfbeesR0v}#$KwUJre4UsMabezcLbqoFaAQ)K+S;Eha(~0}hAut@
z6%my<dTpv-+72FXjxHzjGuY~L*Y?xclH+CmY7(rssiA|OLpu3=aId!-v5xEraeWSP
zDhO*d-W%TET|m`*uKB6@f-e5<l2!f?p8Q>FUJ<S#MSP_etesn(#v7hhVFZqW-VNQN
z_c%n2QfKrP=IFy@z1Z#FM=NtaA>F``-rBp3hNg`tTKE{hl4EKSK=6n>@oCd&;3=IA
z_R$7FV=T$(FUR<@0su69vM?=^q@%Sl_}09H0nE_T9kPo)Aw;GZcQG<OG4Oyg{oKU2
z8!*LB&FhU;V}u71TsJr{xcT;w<WuyKH2iov`041E*NYokL9>^n5tKOuw~Y>LSWFlw
z_@+f15Tu<2SP=TmzXzHQA0W2q{m)nc@0@t>$lme!|8#Kh_QRmFnAAT1cl%GD$LIf@
z=g%JI|8@L4eoW_Ia*#me-xWA_cf(z>cCdBU#v6FR!X4uUnf4rw-!{EG$H2xWSQV$0
zi-p@k-ThN=1eU#1XJ(^v@x7mfB|F~gda5tX`3>=cK4^{g$^Qr}u8XZV_%C(ve)7z@
z+<uRu{6BvhzyG86{Au^${*Sf%G=2=$-Fwt3q{QB5o->?qZi0|^F4(uO;q5mX)N;e{
z5qGJN(Vm*TN<1VTq&^O_kewa<YtZ-+K9T!u;fvPm_IJYk!a=dOgX6QK^IyY~{&%h0
z>Gl$ZX^!ZqRq_?=|J4YV4ng6-mF~Z#j`GOEP8}1$hO>WJSMYaVdyBh5v`k)|o?rT(
zM0rM~*`1WYPfyXqz5!HS+U5j}*g1XDBykF0>cJYyyCHk!8t$RfPq>TT){^Z!(k8A)
zZ{C6r9kAaG=hA3Wq>w%M)?=*u*l(9)@&s*-xkt4>wbxqr75v#F?Zc-hPcWkB-y{`Q
zP|BpGOelrz!6#ZOD5nLXU6)Xh$!Y4q)L}37$guXC-FB}9Ed*d_|Bsd^=_9Mz(srRk
zzio4>H7|d7-ED>Gxp16d9UG3<G~0i+pwmlQ_*;YiVgcX~eNL{zBB44lT=&VqEbHq>
z!#T9)%E;c@g6>Fc`54!Tdwbdjxd>X@z|Ud31oPzW@7Bhv7>W2&3Und;g2a;R%rt#<
z{VI)IVHX0LVbRY}G(iP&g<`jj*x2l~0y4nD307N8=nYXjX?||~@EX5>)HPcXZ4_F>
z+CCXS!^1{`xSuI;)7eCh>V|o46G07v&XQDhopGBL{g_YzuK#5?b~B)nMv3$qNwf!b
zJ2b@QmG<PxpRIrvS~Jnn?(UIg8P1Uxq-OS-SeuEW`4bv3J#5JIS`kK?CL(2G-~kVc
z6R$(|Sp0DY3i9~m-G>j6THly07F+P!x!4G`6MO}xb;rX6ub|#778KN1v7+`oC`un(
zxzt(=Ym&O46ocv^(&2Fq%EL|>gtLul-eO5m6vPrm$i5v~AUs*L&;q0_Z<<iHO@@Ue
zice+7Cm+t=v(vL}yWW;>es3+jIsSGomdpK+9Pj{JUJ&QLG3mbzbFh&&3x8N)iV$~?
zJoofjK*c|8Laq<qJWlE|Z3ArtZJ*j#AMhc=5AZIBAHv!UKX3q0&0sitVaa`Tlr$-t
zZ-F78^WP$d_~W-f|42wDLwd5se?}4(HjHajYud))2B_pnWXCQ_>$eDNhZM*h%#Gd0
z+M^I>_zl!6fHHQ6V4snx2{bTX9NVM+-x9iDGXDW!GAz%z!N~*zFhg<D{%kY23ul#o
zhqDVm;|A?$hU3_dKf}@=;~L=Uhf@ts56DpzApxIb|4@X?<rGm<$1wb--<VvJ0m&HY
z{q*Mj!C9*Og6*ZdTU1X{A<iyI*;}ThY0H#?)^uyO!qkWor74`qC%8i=4X#23zXy9d
zl*b8;NJiH+DIBUQQg(OOFTA@;O0&v~pV``}<>#w~6)3JxZ{T5|6eHRDgk^IikExop
z#nBWI&tGkAs?S~H&YBdV2PryIKsv{5Q0AU7f+2d`gU1@Iide+a3xX)Zf-33gqIcM)
z;)cFva6=lPx|ib4m`<C2inQ;^+5a4+sNOx>{cLXGKe#pyYkn{OAAi+flmEBZ-Fe!L
z`G216JbQ@${chubd)>0}zc2rqp}%~d{x^f;!=H!eM;`{Kr^BB^-<I$tR@G>MNptwW
zN5|X!miC|jM307tzrKUV!(prOqZX|+j*dSZ`~pujzohV)-{bl4)A8HEPeY~%)q`P!
zq~GED!NKwH;P~zE1AJiH$$1^?MqE1VF;LA{Fjc`6N}QI)oW;WM;N!>l12W>U^g;)v
zhyM&cP<nnP*I*l5;YRF)zM>NZX66L$ptG1e)|W<;e15!a5ermz7w{7IW9LS5V?*18
z|2AeD%_VxPHWtRhc5XK4+{rdIp4+35;WX15iPw(%au00>X4gi{)e6&Q!@#gOZ1(Wu
zC(0%0`oSdkr-Os{;geqDn}05BFxQtP?<$~${Nre!4PzPX9ghs$dv=&hp-wFA<H_04
zFIo^z;o{!;ibSK^<IlsH;gPn8TpYeXIsBjD$J3L;!P(gmU_t<fM{hepQ~yh;7&P&Z
zmEqicq-V!4Tk4^`LRU}N`OyMH4|L1h#i$NYO}JdZ7zT3X9(?~y``w&7{)s4helnWw
z07*+C^kh#zdfueYGzD=&8{_@K={ww5KOejUyuw=H3teBt$MF+x_(NYe%|}?sUxh4&
z)8IL}NvVUjh0i+wSm~3Gpt8g3_pm&=2|eml_=eE`Ulx4r-{hGe=SCRKFCTgd7egec
z+d-#ivuC^v!ML)W@Od|U>e(LHqr9u@TdWHbU%_=jp18k-{P%|jV*{o*T&@t(X4t%6
zFyhNUXhBTv7?n(RXfL_`klDEI_};Wl5lpYR${8TA)tO?nJ~9hcUjH1N;^kPwgL70|
zXXnj+`(?N7JE(oML(>5P!psCd1TjL8l0HqsBhY+Ns=b)jfo^E(;%az78V;Ve{fh80
z?1sMU32XGC8ogEwFt!r{QutWZi~q7Gt|RJ);q`$1v?s3vI)!|Xdh68BXpdnhtO|oE
z*ER4y5jQ#s3S~eGc$8VUkb}S+A`v@8MT87S1(1fSlLbbgE<?tN4qN`)mq-Fdz(^5u
zh~Ol;GzHi+Sg?*FD1?kTpb#3U!a^!30z-+N0}Y{c8Z@0Wa7ZmxG7+ANgTuGWX^*HK
zf3-*_1%dMaf+`>QVEow?&jJ2i)_la2^!eV0k9iSJ09(u?@yfS!LJNnj>~HT88z(q*
zLD&*j&5wJ==F3-&NNEsuT^eJ}^MU2&?q=Gw-PRs?F#u9Pt-qN4Zb4IdmqQ>~OM9YW
zZ!lSNuPl7W%w}T5Gm1A0^Jl-Foew@BP#WG)i;vzJ-r0={ix^V<k?;Hs!3-u>*XvxJ
z5ynC|JI3ZV0Ls4n0CS+}v)-EVjq?kRsah;A9ovJIkTKa+Gj4pM#MsYomu_(PC*kDy
zihmgDYvvizOR;BEJ;W;$AqD$1Fw}ypGD#HU!TY0k$Ah=SlaGVbgY%Qq;ql;qi8ULn
zlgJ9GNkqJTetI-GYo882whvF<4ltea?*y&nZ6~m&{D;AA>1sB=xzruBu6yWVV!!W^
z7SYedK5Yf<1}*z$(W~&WpzVIF?fxFX8Ed)Ev<x;qj3H|BTcS_2jH379G9!3ID*#K7
zGdJ)auaxz%h8Hh<hBa^?Z0SqCl_m2&dcZr|q}F=EF0zUW-vu3qK6~K;`mY@SRXp-b
zD*kKd`A+=&$FuGJL;UBu;=htUC78$Gr+Ae7S1GQBXfu*^9B-(_hb4A*mW~0l)7Wrb
z+u4H8hDR;|+(IYDkZdfw;%%XniV}rgSX<|p(j7;?kt&Cguh9EgqHvTUMxyW%4-l?&
z$x$(-v73c1g9*<8Dw$u6U^6X|FN$s_(fmSW?QL47M;{Jp+)Xno@(5#xTVN{bp6?wU
z4x3HOrY0UP@G9wm?*Ho~0OynzX}tp7{+eklhze?AxnP?OKZju=-&BWdPVfRI@2189
zr*^>(`6wvF2l))iD&+m}@Z`hCqxXX-9`fzc*})sIT+hx1!@)0ygOBHUg*RB&z9obv
z2}X;NFS@&Ek`EiMx40g=pgpz!`VC(H!CZg&yY;)bK}*@r+<*9gw)-!(`|YSSQ))hu
zXbWAQ^zFuCgM^f|h}rTjaMR|FCvIp^t;WSZ?S-*V`>A`G5%QTG&3B6hL%GywCYEqP
zEED@IXD7IDAQOr3!$Rg>)EwE}<u-Teo^G@miVxWB&=}^<*jS{QJKuueVGxzDFL*b{
zK)adv9S!=I!aIv!cb<vg2%^W2BkWd*xTuFAE)^a_CvD9nQ7JqkMH|ch#3~UMA|LF*
z21_XMFxg@kVmT4fkic`S?w#f2?=C<=xODL?wQ}~qe+taI_Ub$Twf%fMzW@L9`SXYU
z|Ml4abobw$<Ed!>+8CGYYOj`alI4Lg!a{dGYb%djR7lQs(NkmLJ3NV+d0O`|EjwJS
z=AJq94{bYc$Gy-VXV;_<x?{&&dh`NCykS>!R(L651e{IoNHj?B0!YKua16`S!j86r
zPyR7h0Qkt$JpD6_2U9@m&TNIZXS%x$Z4<LKVmt(~CW0-_x#@cBeYT|8@XBE`Y#8ff
z8O(A)n+Z~J1+d~_d6FsvI&1tNt+SPinvw3BWAe5$zP=_9M31m3l1hW<iP@7Un>(GJ
zwt2FxZN5EzJ9zWyowhlpX6EMHT$tYL9&#ory*=67zTSop7{RzR+1$C_@xS~M`_k`T
z_q+I&)a!Qo&wIUYpM30X_jY>E$j6<2f4lELefi?q_I6Mj*78qZ(C6L$&NeN*{qp(K
zXV3fs&-*X>>=633524q6{-XPoeC)n>*?s!_C55Wne@377p7mbr1ViY-h@U>ErJwiu
zPha$WJiOT6>G>muvAtx+uwDNcws*FB0b-x_Us6c6pLciYvlrc$&z^;4pFi(z`y<%y
z?>wi3+V1uHPkkcv``sNn0CT(?1HiZs#=vjco5uCnpd7Ak!dHJp3;lW%=d7|^)qmF8
z@k#P>2N=sI$@AWe=M?hZ)0aCTTlKmxw|yq;zSw^Lbem!VmKPy!KHq-vve(FwFZj$$
zSMWe2TM~>34{fj>7E4l%y2jg}1g?z<!IB0@*hQdGEP!q<rO9&`<V%;Uv)#Ym?vtX=
zp6)z<_R=K<pY~tCC++pVcHQl~c=qzy^Bt|(Z*ety{TJ8pOX@z|>FvBARd;&84?*2t
zr~k4Gw2xPP+UsA#FIIgCO!JcTy3>F0bbH&c*#~CoZ@)}-3c`Jj|LkhLozByj&-<?S
zva|hkryq9t^jVkoScc8CO-cNApT2nc0>@6y1HOE@laAjPAj&<8q#Xd9fw=dgznz2M
z=TEzoHapu-pY|98KYiZY$wKgpKJfE1ir}4{?H6Iqmw>c9{DLNR;aLaFq^DuU?VT4r
zAH(wLg>vja9oA|$UyaQX?n7ZBWW+K_%U7BG2P*4X{Qk$@4!nM_|GvNYPhe0!a+Ahv
zlK9R4p4s+nZhV(<o55P$gr&M;=nHoJK6bqco^y;n0{pU%=!%|w5~fsOA5N3)Qx7iJ
z{6hCQ|H~Nj!#Z&g;?-!Z;eKkIq0q_Spetou=7Kq`!yG(ZI>y)}N#<T_J0!s<81A6j
zH!=6q=6E$S$5hWWMz%dC8KH*c>5x2yTI2u(TXV8nEH^zJbfsSj9}MXOru+)Pcro;}
zxjpL-m$ui$2I2cPjj{B<&B-y?MtE}dTHD`$t@T>k{K=D7QFik=@vUN$y=Ox|asAjE
z(neslGB<+eKevOX7vU$X#b!!xh7f&j;R&S|E$u(d!v%S`gAc=JR&ZrA&F-XY7sh-^
z)7`rsrq<9k3I#Ix|Fl1}k2Q+EX^`grI=HN}LDPO|4Cq6e$UPh#3;|%bUWHilOOX@T
zpE0saTabS(4F3A}w?sLA*uw}H!w4Bei#9X-OxhVPNJIEk&=O@R?%5;FrVZE~+JfDo
zP1qfA8*!>dk%D*(xrf+pHvM<nBdvQq?ZUA3_kl2e*qXRlcS?)x_{FT0V%;ez)(vA9
z3DcS`<@01x!OwWp!a{rW2s?fabbv}2%;2k&N8v3Ddwb3BnH%Kzjp54USdX4EZ<6EZ
zUgtCU*4F&53;wG`zP2{}SY^UArnOvos2k~#x;W$bEJnoQ75BpShQzE0UvXgKZQRKs
zG||Bf{w&>B1Oo{uX#XU@nFjTuLl4@c<Xu^xE&Ix9lP~&wj-o`>3VjslD5`&eHsp9Y
z)rDjRK=p=@_(z!&qu<x?gGs|${H{HkNLgc{4A_{o{79Q(yVT~}4f|;_PpM<FNn70A
z^~VwMt?6rijs9r{ZTO5xj!)889TT%|8x~$69~_Vz8;(OyM%t_S#QM933}sPx2{^ly
z1%g)bvtdo<!7)j4YO=>1s}FI8b^(3tYtxzG;j<<MrmfL!uqe3z0E&+~U}xY477+!{
zv{uHecnOkCVB^i;-O;i3VQ_Hv>2&a6aD0BIVT!kpC+8%2$k|2<C8pbA4ufI~h)BHe
zGLBHD7=xCh5D`f{Y(#!D$C#|F38u&sK!q^7%t_N2)*{T2NH7F1f{x?VsC*nX-=tCW
zcJR}|r}y6zia5#5M^U2@8|drIfu=R%f_*r?AT4mkF*&}Fc)J|-Ru<*~rpfy`RoT{f
z#TWeW3NU((^<5VX)9KJLT)>0&nt7}F?<iA%9pyQ=*}d^!*Bbsm-Dq=#{hvo1K*ACc
zkmC0g&=v<NAbcCyiU;>U|A{s51KWG@HTQNOXXxov&`1Pe@+I>wy#dojAFyXM{h_p&
z0{`X;hmrtGfK80%C1jOUps^<G=@d70eEqq*5fsP2Bo<a!Js~WtxKPX|jA<|_e&B5*
z7PGBRWRtYw<Bf?#qPzIiNM^<xPP*|yha=_d&_dt=5me)>1GZxHR8Wo55f5-Agvg({
zJIj@OF&ydR&rMQ=Yh(no)@PAA;zXWPMv@{gE9N6Gp+_)rj;u-)fDdZ(zb0*s{<cbK
z1c2@lg7;`-Dd~i#Ytso2B7JW}x|=MMSQp^VI5*lzK-MV!QI0VxPrGTAzVS9P8sy`-
z?S?1bqzsaft3FwdF}7zGAb@2}WosRK4aOf@-ih8OCRrPxRjKLLjkYj!cjXvl;}p;0
z6kjiVV;6H#0HQG&7F}Pm_EWfW3@$UkQwrE2S$5CuE5jK^ul%K1HWZIEqZh2Nb4&KQ
z6}QprM4f4Xn0|+YD-RD81L<qhY;c$bYC}uyq8YgD!Y7ZSFNt+(_@)ysJDG*4#=H;K
z6imaryFx=bVn2OkJ6@AxSAB))%{`52s1va7$e7`!wy)TN!>Q@G-VmL+!<l($Si_ix
zGCgjQ3QhkYYMbV)3n9bIDD1KnAxf>r&P1lSxz9m>z2!j3*GQ%~GRdK5O2i>St)p8r
zV`y6j*G@C9BTS{lC_M%2vD%HCDKam0&p=g3!&qe@Z1|HAaA2yR1rzZ=fuZRquz92r
z!ncdn1qwwhD^xBpOws{Q20wu!Q7(AvaX7tnJKgE4C}b^SSkee4)<K#>w|;&d&H*A~
z503ckQZt$gjO!&i!bNWYK<!{p18GO~oMWzpEj0&0Lzv}0XkbcG&l4cXgiD*dFs|_r
z2kjmv>m1XCKMZ?hXfwxNEsaTQBiU=r!U<+n4w>XcaQL82#D_B@zZJoBww~lo8=+Yf
zClg<ovrtIUggY}T%uEQcnc-<$3?kSX>2^vqo6;;2c=hcNH7mLE0>(MSn=FRpw%s9N
zlx8C`|01bLn;G^Wr6A6wlN|DIN`bg#LFdEpd~N1r3iA)uLd3jzod&~iU2<NR^?PG#
z@?pTlubEat_3Q^-HiDQ;zaEngHElpaxF+UEW#+py;LJZ_eIjQv;0a7FdTj~4ujE3%
zG??Hvs5mklM#jPDV$J!zRJak4Ko7)fY>@E2csmTzQtIVccQMpb86#Q5D3aL+dn`)0
zR}q#efkQ=2%f!SpN$KC1w*KfOMOWA=C&zhh#^swGG~r`r%*^+E4vPv$NXQqN@yf{)
zn5f}DSW_mbnU6ZET@(1{*4^2yF&GZpVthCWOD7bLK&o-y9#a9}v_>!eW|BGt;uDh&
z8NN=F2=Y7}Q)*3#iO$#7jYfQZf+-K()d-IhuukYQ`nfG`h;k=XquoVXoPG~9)gIXb
z=Mp9Kd>iLmRmz+82s*rk5>GDma1YjpG)4VHZ)n5JdusJ!07zWDbYC^nO+|s8=q5Fe
zzuHJuegVFpJYkBw;uh@TFt~|?l|B_Um@qWC#NP=X6{00Hd}As<#7aJPLFwbVm>IKX
zuxc+26q?jEA~ypCH4D*KM7cUxCBm~G!iQv?Mh}v3p0TukFd>fcT|1S<A4fL8Z^j>t
z-6!1R%E2|-#aTt#V#pK5HYY3mpxT)*wAd*lmyIKCloGH6&nJAmMdtX}8a49D@Gcl)
zJV#MP!zt#uy*+(=ht%sG)1jQPo`@m=X69WBf}u?v+wz*NPJ*#xO~FDeuPL$`6SPFa
zFXFH;j(E(k@<Y4>fuo*rg?(#?-T$G592iN5I{Iv9hK0TkL-T{;X#TrI$XzG4LXUMA
zX3+LMl<#f(^J;mHO=|z<s)i-eQ%RB<a{Pw(yPi=plWGav&_JYDu{N0&j#rB_KO8WM
zlZ#{UGcKSLO_Q`Zi3Z6ie3-(<AB#RpjE{ki3?ck38kM-EaHG(@02(~j-s+x?`TNo;
zXI9rHQ2RGhH5w<@!-IwY-nh5M2xbc!Dp(}Z_5fCl78&W<>A{Yvz>XmdBJ6xWpM?Av
zWGF7|>Q%gv9O{c$HMziN$H|6YHHk~;ze2(1gNXv64P%|YX_&x3P$t+S-k^^+;-*{J
zR`;6q$L0@^I%$>dx3Ir96Q^~@j5^<mjsg)XyZE#i;`M(q@flUprth*Q)0y9MTMNDF
zN$r_CPjb}z_9|U<2MOFjOQ)9lCVs&+o8n2pV;v;-P2I*TZ^L5spl3hpLi*Y~Qg1kG
zhV>#1=aLB_d>$&fkUwRYF_CK}ZE)ISw-pjIV}XFT+xoj>evAGrLBIKnu()9ll=F*+
z5%57_iY0m+qscU);Rytc>|memy^iW3uu;2V#(}7GlyV^H-e!t%zU-*M-%N5n&8xs&
z6~wD!!qi~sS*w_ofd(<?!NLSBhj4hC8s~wEux=UGVW;u#Oe+_=L^<%F$7%(|6Sg$n
zFvOj;rS9LhOjE{^n%TB9gZ3U{Z?Rqf#(oJLZ=0GK$4fY^oAg9V$T24JgrJc)5R_%9
zs`l*R_yYG~HDOikjx1qVIED59puqTDEMv{3)QNNDpP~MbLx(myx!6tsB)#V@KnmL{
z$(-@z$ui{J__B9oIFUjCuE5j0NTM!Se_UC8ex={IL)Xxq@kLl3tKxgz8_o@#TolxQ
z70E}kgDI3UN73ORE9BB8u%{MDzqQ{*fnWN6_FmI)CbVFm?NHeLt6N^<y-X9svKM&M
zH+_&H?MqI^qh=n1)gDk=<JyBaX0T?W@)OnC=S!xcG_oOlnfK3Rv9q@S29}lv4TJFy
zO~I-zz}k>eRZadVRe&}vI9J0_4*(3}BJp#eFq+mT;7*RT2@Sd=h=xCV6m$5Q-w6C6
zn+1UO%O|~70ypT_Tec&ZTS7e1E}1(jtx13$Vtz3*-i0~402n-wq^fp%Lj!<L>Hbyq
z6V5cZha*Gx!jfWL?HCIkW!*_F7l|{abaI~S(%oA`c?KY467AsGtC(6J6Bh|7!q^bK
zOlL@>!GDO2gWcwCB*%ltM<*s&dSu%TuhDMC8(@@nsuU+$5p70m$zse*?hhkW*@?2O
zv{84nSP;`-X>mbS+0E}W`%C#K^Q*oJqebF&SY~I!z+^GH0;52_Ml^QuSjf^WZ$Lut
zM$DHHYZtF>oBPc`$w)|o@@#=XaFWyglWaDKU+ap>I=MqD%d^8Y%QFe_Zz51BFZ0s#
zTUsx``F~`F?oy=FPC2ZrtsPjX**H23?2WgbaDQYvJMg$utc8SEBW?6}gJBNS#XUB0
z`#w!J(q$kvnzZ0))}SxYOWY@-F;0y*2KTdOs#ZP=XX03_f-}@`@^NPSX&juyj4$3Y
z&?eBADW#j6Y{Z?9YSH{bwuO@8>;Az6<}$Cn(3Xu)hW`3Lu9yP<k-QdcB$QwjLH9Tb
zN{fx{tV1cJC<H0~WR#)IAhZNl;kC<6+<b+YKa!E$Fk&S#Izyk#-L^XuzA3~=%s<6F
zidmO2P787Qs*<?Dh%^yF&7`d<v@!*9U>R4Oc2`^-<RD)H>j|?yg2?AF9@o*RV+A8-
zTRO1z75Fl%;;6Eo%ru{Cs#1uW<UG0`gU5KP&!&Fo2vI>i7W#z!6(wOmx>$+ed5k^`
zFc(S5|6<;3aKUMe+ci*{BHt^0m*Rk>#0o<*Lyz?ttwkX7eUY$cO*GFOz@zCVm{G(=
zCFi^K;o&i`yU`E_=S3Ls<0okS&HZrn<oOTLi*V%RGz=4s$$0cPvf^(a<DV!7mxm`h
zk;P=O5%xkhBX$!(_2RIVTzv6P26hXBJ(Q%6iKeHAkS<NfTj_IOa>f_B^B8#c(*8`6
z%n!-opeUJ1uzn657_0ew={c`Re?e39O1hO5b0^bmM2`fGv6cUC03E&Pjy+Qr&QU=T
zZMXPX>`TLO%!v_YXxRum*iadd79EBmiId_TglAInv|Adh`DRae3M=X=<DB35MX)25
z7(We&C3pjkZcJQ~BaEg-jgJ2%;z$xf;i3Z@aR8&e?dUib3xBxF%^shLa1woPK4wld
z(UcAGLurZZ4f!Y3*dv}9Cm!bd*zl+G@Bm%Lk<#?e`9tNR92}B#!=N9$N7LmGa3VGE
zzz+<NriyBdBA=;_z98As$Yq@2+yh9a-GHqE2o6(zBs%d2q6s68QqMid@FX~An|CU9
zmxD8>%Wo{B%ABG}I~C(gV6M5!d3SgtBxZ~3N7^j7CoC8_bzzf(p?F*?EY|6Ces}2k
zuQ?c6oL??-;l#Xev2LU0LNArN^`F0=EMw8fET;&+I$m&+xlmI!%Q*aouQKO%&PJnW
zchW7n!~vXyQRiDRkCUTI)aC5F#M{JAk|#8lX(F3Ei4t<oDQ=X$ew~Ms|NKWpBi=8c
za%Nu3k)A}mtT`1@P%HftQ&_ANMGI!!|HCVX2xQD&SNc*hLVLLJ+DmwnIINR%Tz=tw
z#xjTJPkO-{-Vep}LOlS?gUHcha*8(`2Hlv;_2;o+P(RShIg}M1-%LUl7y|4dL^eQt
zotSR<nqFEC0t2siMaPIg&$y{}qhEpU)VbmD(Tt<SuNbFU5M3Nh?(z6CK44LFgp?}2
zpgwz4l-WeUipf_&C0=B{k!7I+-zS<}g2C9>AJHhUR{UaBw-M>0kBO7Sd=ii3f!h{S
zdpEQp2sW5jXuTvhnabMczNjok4_oftPw-?bjN#>rr>(TL|Jq>NAU7xgiZCBt99GeY
z1!Qie!ycop0Qe0i(2oRw&=}#Y#1~0w_R<O6kDUoKdLj&wBo`^%R(|Of$m(VwtHBbC
z+ys7MUkz8TJ~M`3%?^o`7r*#7oE%8G-Ao(W?}LWRZ(_h68qJ$P1m;m}iikFv(T;RA
z07<(tz|_V7qGfdVFOACQ%ww9N)G^#3*D>KH-j%VzHsM>_6M`sW&m-#zO>M^FtzpF~
zT5?9Gp8Juy7E*|Fg^1R4LvA?~@9DShAAy%$SeaYMB`Wv+U|jSmJ!Ype;ff|#*lBI@
zNfS+avQ=jj>j+pRxq`~aC^s$+8HG}`B*(?KDLbxAc9j^H&#N&twIArMSDA(AAsVh|
zW;&2ApJIj^F`Jnth6`by8rJsVd4g++=N4g0LCzMwkstC~p{7*S2HI47QGSs~6P55K
zVrffWXQVAjNvN4fv60n5EEXX_gdg&$KvVNPdc-?9;w|N*V@bq65ZnoqwtnNW7h@Vp
zcykv@Sym%=utj7cCQiOE9=eoK`AAbicrpi4j9a+2iGiO?dF1EH@!{T&k3<km!3{I2
z`$|+!GJ{KU<r7~E<foEJPa~5wWrboPOqb0hqY;iNbxolOZDV1_$=oyvvy|;gR9s*%
zz7_+pCGOxAk9R?4oL~as)?6w12+6*DhooG`4CuhB_=kMsa{{yTMQYYS-j5jU-5kv4
z0krY`o0!N2?Z?)@!DHb9IOrzf>HOA+pDprH<KkvYtgANPNjtG#Xfcx{A66X;vh8=H
zwz!~^020f09NSK`$PnTs={K?B0grx@ek&MfBZq>JHiWK{)Zuv`MkD@Zm6Q^8+ZJ`0
zN41sL3Gd@*_FJ5VL}rJ5O~)gm-Okf#o7@#f{`BadE`8YT(?7lGHzAhvIA(M)HhA`F
z@+^b?upm|*H*{dx9pkCUUF)N*8G24FP#uEyS}lPG6_)I5p0gnDl5cXetJ>FzL`h+X
z0sC2h2-m~ZQIJM*h9rqgQ-YWXV24KP0+(Fa?99e(e=!Z8FMs<ZJ&2VLeOOuMmz6PG
zdd^<p>B0aA{ZcpQWLKF!^%H&x7tf@_?jLlZek=RhZ^7k@oesQWw+lO+eg1S?&}i7H
z?{td^WSr6BASM_uQ9!_mjIWr6E})-qb6@BX2mtw}HTbMu>~1L1Xf>Z`h92#U;qnJa
zjb+$dHe*+9af1!l>oA)jg)zV;h15aT2nk+7pmPdE!B0ZgNMaS+6`-2k47H3a!KQpm
zvSzw_mXFn<PK&tDf)oiVJPx)+Uxxt+!C3;tD@-w=uR<hh6!(brwm?Dx-%50EF+DSa
zGZ)t6CNJWYgQvp_Xnk%>_??5TE$z%RmRhf)ojW(^4qE8Ak?lhtaVNcksUuXxmB>eQ
zoyd5xY!Zeu|7fR*Nh`jA@dIs}x6VT%5To4ixV2~uD|s6c@x#=KqaN-On0tDoE%2fw
z+@GWuc@YqP5N=2pBfV&O5P4vjL2)VFn1%5-j`aN)>8Uzp>Fqa*1o4kk{(bZBIdt1d
zrUBXDXePLUi>E+3b-v|v%v{{X_sL)2Rj4~5defo=C@R_6D{eTJ<ATz(-|lE{eG>$w
z+42krR5Uq@3T?TX?lLnD%Mr<_pGcMZ1C7E=V^L?g_{mK$b7T=8wTH)of?JSx1-WHN
zGzmxYxQDN(er*W#N{QC!gUx;Pg2(;cIQ{<Zd-=(^|1)_3b@lhZ_n-Bi#_oTA4nGg~
zf3D-_#~-#vriFCAXf%G*4&NW0o&VNs&cG;X{qcGOZ$94c>~J?9Z%CGl-;mr#ylkZP
z62H3~dXXun+DTuolfFzRa(xIZ!$VkNqE6Wi5_476-pVyH#=k8${_Qg3->!K4+m(#J
z@Qu^hFhY5~QF*IAtno&-U=rQJ8GgQX=wDa9)$4V7Tixe<BCh1!vz@KNz0VzQqmk(L
z!d@6#w(}MGbuX>0`TTORb!y<*cY}00p6eDAj*;W-;OO1pc=-0@_<Z=!!QuJI>CwLi
z)pj9li!#SfLhGo&lh#H?;isK$=?;@^kivI){zu=0;=SLp^M9|uotXc(AMXEN%TM;*
z)EjS*^q)L)-nrOd9>>@M-lh4Yw8yMJ>h00l!JGGk;n~?>IQZpo@bUcU<oL`_y$=f=
za?dvWPx9*chYI|wib+%gE+B{kyMq7m4oPym1c0$i>o=S3YC1KqTUyhvMz@`tQL21D
zgAZE!===R~VodS!tq&(}2WL=!d(vudx0*YYim)PWHh19BP77Vh;iF%~kNREz88i*h
zBwLRcVgML|3K^UpoDYUS4-VcAPMfFggVT5I!F%Fmd~<Sgc0T<7*?ZH*wryo$_;d9u
zP`9_0Y+AD9C8@XjDvr|n*4wp}E>D|Bm!c?}i6p8dWykHw^V{E901)6J$<ESq&P~o~
zB9g#h01O5*gTc(jMenM6d3^Rg@Rw@(s`KAerwV1+M`?XU{9cg;=|wd=C!N!3)6sR`
zzDHfuR?$C}Q|1mZ%L?eDR+Sg$m%UE==!a^yzmE8SjvS|&b>flc4_DD?n;U!9$X&iX
z>pkn70nl{JewX>R6cPXtK2_y=ks@`}?p9BURvb|>q}AV3J5Wjgow9Lp3Tm{dhLEB4
z^k}bec)yA$8!_Eahl&0fuEPBj_s{#ztvfo@!?#z-^js;}4}#lqqtSAJ(}*-1|5m#3
z|As#g+y8GKy*19+|2JD3IsJe0v;P0d_W$7o7^z?xme@e=S^Mhvh=2Z2QEaxfK!tLx
z|E2hlBQ)o5#QAuNKk%?JA;V+h?;|Te4F^7FG$*e>X24x|JmfRYln?brVK|kZsA;ip
z#Ia*Njvd+}jNM)IdTs&*`m@CE^{UlL$XXBu=K0jf@Qv{>V(EyT7hDWwr;ETds0fzx
zEaG)=pvKj9zH{Sob>%BMyy+2sgql!=S1f0i6t~@73|-nzq*~bx#{QQF9bEmpyU*{T
z`+--JutALS2Pi(Ju*QIR^;oRoQ9u0I7ONm<!Q;3-f_AHT;d)%F)$wf3cFUyWn6yih
zDwgVzEc>^2==}5|-=}3dpjX4k>q1cv2z4uEB0+#1+dyt54Y086iWr15H1nrxqF_0I
z?9D4cW)oDh+lN_`55gc_U?v_$N(Fowg?F4~E~(?ssu4X9KYQUwea4?sT}hi0$5pK6
z;76)zd>UJyD2!&1@b_LY@Fz)-+*gnJlbmzXxsVDXk>KCb&7nFY{11S0B>OTo*4kQP
zu#6OP1d38Pt||f`Fm>GM^r>owr|si-A69{nA@550mR%osU(LSIFwVrt<&t<d&jDMZ
zUr2I5@SqQs?jWDVuzy<lkH}=?#^3wn@gvPY3*<kL__mu_`ETpVXZz15`2))`Z*D!V
z*TwpYXw2fMF$(%STMah>8fb_dIf(Vpeo2;0_{;0V3p9dE!#K7MhS@SwVl`DC$8qep
zD)?lJpU*ol+AmMKz3zGM^8DxCY5Tv<FSUf)dvS8!?p6v-TE<IavyG!L@^h9h;<{wc
zf|k`MG)6r;X<vyO-yQf-T=CxmgPe$!qZQ!1DO~Du{pJvM)AXfuK*IKiC}v9>abv?r
z_P@XK+>r}>UUlRMo9lhwIXx8wv=?8)&;ln#kwaCL+u*{w16dcjaHXQF%gu&2jD3dU
z*iuL4@lc%JbMZYxF76TUz&5JtGi<+}x-e+Ga|_<}c~_qFaTCJr#>iPJeK1L6KIL*}
z{LhcY1@RT`q)VA1VS)!iOpb+DR~?Zan4o1z&Q<UGvzJG`p0UFMyxH45El<&pl{gu{
z^<rQm;y-?a&#%n;^^@Ndb`f0(qmg|1^=9koX3I%S8>L2rC1CBs|M=_5cm>_N-lhS*
z)kF(9up}uQG#xd@_!CAGM)F6?&F$czWosW~=wb3@H<9M6+Q9+eU|dmdv-Si?;Xq5q
zAPHeE9r*7@^gT<$KtodKYRy0*u}N1$${I&Uwv}3nXEFGey1gg6%+b8%pGNg`75-=3
z?~|qWo2pYrH3dhR9(QthGum5=aBAjpDnwW{lz!YS7%Xmd$8NNsv9sc4ek^LB7`3f=
zsBPP*X>2ST;mXT0w_vj516j*NB1_idP-Rf{p%tk))koQ#hr&38A^q`vfzYBUsdO%P
z$&$~?NnWx9_QvHm;gEK!79W^+FjdI^{A)j|%>hQEDfL(QImXN3?r3z6*#-z+dUAr1
z9Awx9I*6Iqcaxkr#=HcA3Id<+&=wUz1=zLd(8qu4d!QE%=7MA*ECEUSHbA0{$seG@
zKWioG+BDwFnFF3@SKma4)tIUg-4&;A&jc(1`1eOpuz!RnHOTQiz%=6q#}u?_>RSf0
z7`^ytyGA!Y7lxWrJbNjPTUWK=FS+|q`ePS;Ao;O+iWUp``^@<Lqwzc4Yn9jBVs4x1
z(<yiCtxRoZJ^e^egf~=Vxk9+3>5Yr03yDkmu@nGcV5_Ijk2TOb`_&(vGyLJfn=|U?
z80}=L=<e<%>$F&&zV?g0{;2AD#dXgXXxc0o+~JZMPhl0HV<XU{@WdK0zEu!c;t<ao
zsY7amz;5wxu1@5~)5$MDRK4o89dUGia@Bh#|ApsfeZvu=oxXUMmf65>N9s9EBDn0<
z$eMWM$EhbZ`GS)b;myH@*b{H|)p|lp!W-KhS7<e`p}Zz2ulLnLM<}mrnL8#@mYO5J
z+vQXFwR(gjcp2WEy060sMgkwt!fxcEF~@$A5{;uMZS2+ulrE26j{ExQjHwU_Eq!Ns
zniJB72vdgGQ8;=(#7-^5ek?%jv;Z-Z?y^&q8fH=l<E55$mRS+e%00`n<?tG>ayTk~
zeRCBwmoTcr&z)6bLJg~U&4@t~Hw@lr4QvrpDQ|pFJgjxu<~zA6y^-%mAD68_E5z7Y
zi+#{?EnKimE?Hs6%r9QHBB!pQl7>ndDq-oSkO!0!SH{17IReX)7;k+alg9owDziH}
zYRXpRZZm2gxv=|R>CkXgT%FBl>b5d<Tl4B>Pv9#oP>{R&)b)IZvRC<7YP&k!T4rAW
z_BILKQQ*cHX$-CEcv8_Pw6(j;uRnDzk6-*+Z95>*Fs-%mc1V$1$I6TL@rfL*Q#~bA
z^4+rL1nR(IsRY+c7havxw{G=}OVLoO0wB757zxJh(wRd&!Hr}Z#xnadw2SZix%cx~
zQ~E`UWIijn{`wYu=T0DG=m7ph0i)h?&uh-#@0vVZne9$uVv>ePDT2S&gSb6~34L2d
z01kGh=B(7Aczvbz`2H->n{J6*u~c`u=A=EWZ`}{sw`F?PHASXHU8`pLR-<FczWwe&
zeeJ$aUv0Wvf+`=>SxJ`N><V0(MyYQqrhS14lamxOn_@ei$z20H0SxxhqQx10Eh7-!
z3!Av3-CaHd#KY9XYSY=-GE72cg`Vs@H7rDnOJRf64Y4Lj?`-ZpT2suM5@pxZ^Xbn=
zb${FW-_cDt^5Zx6ALsmUW9Lb8Cwu<4{iON%{O^<af2jjNefWnbfp`E&Q-5;OrDHyQ
z&Id2hKwodDCj?*}?X5rOWBu>1PQ?1?Iv(kBMEs8r$>OlSDGQgL0?IQ$E_K<t=(M{B
z!1Uyc?&Wdk3ZCWT)lvJTeR;qpuE(7Ks<)EZ%`dzb3}5)eQVCnR(=ozYqUA?EhR|nm
zsx0+I7o{gCsf!h#6>4`Gv0vyVV|m&L)73_#S^&&nk@HNt11ZlK!!O44h%x<Pq(#yT
zDF^Dto*DGuHSSrj_YsIz2GS!nO!`E-2By8F7T3jVvEFnxbd(@~2|I6VwHmFyUW))g
zdhZtjP>f;3sLBJqiUVD15U^GJi&0b~SukK)E>`24mbQ;btvZ(g+H%}l@cB?&{V{VR
zKP{AXB_fT%1i^B=?O&7y0t0k=N)eg`$|i2z9lVC=PUOsF&75c9)gMvf@B{v|Otqe;
zrccdHu^_PFaI2Cu)|)AHEUv?JBkO1~k<LxD0y54`2I97DLsx!-mEY`3S}3q+o;*M4
z?q`SY5af<<GzsHBFsc$Er;Qyr6hL)pfjVC*IjPp51O9MWjU3OiyV+PO!wJ#p(w%&-
zE|&IiPT!X6RZ*X^evCPKKR8ehj0Jl8;C{k49t?YpU4q;lxrFt>B0QEU)zP>Sim7VO
z1|I*QUeeIag0dxuT&)#tfBFuV5^|nZS)-AMrz}t>=NDJCQWQQc0;SE<kxS~mpm*U1
zPgIr|wM~-V!;ao#idn_qM~d~N?=kRW=vA#*vKPK2uN<i9{lfvVSvly2-^Yr|9r2CE
zEW3sZmtV#Tn_q&<EJD)^YxH64kG<)ARrb%xBBn~i-t3O9d!7_OdIKNNo>VoXuw1!D
z3m7a%flt?NYOF0<ziH}U`>^sKxs@cep_D)<W9h1rX)*-8k|UGeF!KGX-EYHFC5wVo
z2tlsHq`m@US<@k`CvQT7py6};U=%yYc=Vlwp%~A+8|BX8c?Sp0)@A|Iflm*Ko~1m1
zfmP9g@Nby2^RF#u$9d{BcbwLiv)OXCz60&rX||p^t?eCWyV-KS+t_fL+wj-MyS!=F
zIKJ20(!6XFz|&zs_&Q;BWz*aIaWniu>AMOfJQ=vr05o(dovn#>_oRJQBfiTp=gvYJ
zwhF0U4@2<kFf~4iRF<?wCFMq@{Ku+BeaHAMS8)lHmeV?v9HXSfrn9-A#Fn$Qpv1Pb
zy`aR7v$LSY6X(f-5>K6{3rYa>G#B+3D6gq;vF!dINB<|?<|-WB`q86I01NzoH(Q%q
zw*KGRXtg#!>;L}*`hRBXQ#*k1!LSDyZ_Dihr>Fl8c7p%D{Ad<}r3?gHxl-IWU5p>u
zYLK%f$d4b?m|#r)2Qw<rC8A_-sTF)}CulmT;Fjt8tEA77<}bs-5+v6Gq#jnq)@SoT
zY96?9m1!Uu{LPjD7O1cM(Xh=Z8#Gp+==y=J<~~g5cNdBLntV!xem|&7f3d8#t)+G-
zLVGQTCyfhK?yT7E=G=QoaOYM(=5Qo~h2x=@(5D;_!mD6J{<)12YBCGdr<5zPMWD1c
z^t`Bg%E$}_%vKd(%Y7dsfNQ%YX>2lvf$<pg-b`X&Dt?kD9^x0){g$x_ltKYIK`L1S
z2F;I*0BqKFM|Nd(UjdB=<FPg#XmtM`HbNs1-;c*WdL8%vh<fhmI*fwkW?a>%IkH2J
z8kVA^#pCr5-Xf#$_@G};)??tia;p$Ai+Wo3R*tb)mQTjz3YIsx<_?-H@|XHi#B%hf
zg<MCMG~mfmL?_<B3z7#6q5-0C9QF5ih1m0O$}f*Fu`I(O3Mw~NAH*V78)PV$xQgf5
zH-|o`1#wB{@lR$|%mR``@OVJHJXR&e2?>bb76CG<()#KlN4V|Xh0z<*ClC_8l7P%I
z<<PR0Ib6jfKkeS1cK_t(NA%N%?2i7-ba)QrT*rYG74%)cwYslME*OCiY%P9&j{jh5
zu`%7cY?hfhl0p{Rj#c@GZ4{qentbdiDXPd0)Sr5!=$9;(A1z>jy~|Gfq*|~tjN$u}
z#&21n^`mSI^XflDcwA(?w^n-kT_00qYuh49wl2>`zN#8y6U3jToW_LpX9@F^MT}`K
z;2pA%Ne=ezSZFB!mpHL#rH2-o6N_aIDok-ut}zVV8+bGT1qw&u!GwTsYimX_?LG4-
zs6g^4Ki()h(^%?2W47FhhH21x|C5b!XxS5v@{-w;j`EV(6OZze*>jEZlG)RX@{;*e
zj5%dL;AzI^KOf`#=f#WeV_iR&@&DZ3+{yZX?rgO_$N&3O@n5<abeB@_!oQf239miV
zdUb>=M#1$>a_8gU;s_YGJ}DRl{m6~(QP)MLipK$T)A!nk_jP!UUr;EBsBZPBCR!UC
z--%N<dLy2N(X}59@Pq^dOF}#S^v%RiWOYLADx5{051<CVco9ZpL^GcCU!yuEb7Cic
zG}gDZPs2Zh(a6R67qCGDo;V3SG$@JNx@grm>f(3^4SJ{-swx*J;?CtgdJx3&sAGU2
zC@pcKhR6}WhBM(|zMwuO@uIXriMUCU>29Mj=BDe=mj=LWKx>Wq!egd_!y_ZA42w7B
zD;9hXOAe}x*Rx4Hn@+<By3*hxu#^u=uZLw{$7KI6IvWBYeC>})hr?>kIzu`X)>oNF
z5G2pPRK|9Cv$FSYP9@&mty=tQezj2uzYFRb?-4+Hw)X2qCp}t&hs=qYDt87$r_14k
zXl}IhA)GGtyxnc5#qdeR9?=lX4hD<IIXzz^$!{_oCJyBZ8{Q502h1|;r2gQ5a)mjZ
zMeN2M4Kc|TzhvpK;DqaJHtt4lki`4l!`<CW-yInzTm|K&OhqKAO{+)zyr&&{CjlL?
zQTH2&v6WS_i|N?<vtbp#*a;^kByotD@iIkV$oJJz)S)1t`mVDY0yE7Ml&D|%$%~PD
zUB%xoRhc~_2elLqReiCNy+y6YaUSF!pv)DNNut@f2)criLO0Yu;3}gkjfYO-$xocU
zQ@KE6J22x_IWWTEF;wQWqL_i@F#wd-{bv8^9;(`lFa6_CK=bzHeihHuOvW0Z`W|p3
zQ~OVO5A<<S_R{^s8we)1JeUr*Vn8ptD{tqZWd<UtTAk&{Ig`4^SmiU`uCF3(u8zX%
zR#j>uHKJBbG}`ocK+Cpo7dM11hReeUK$Ka9$FW)lcD{6VMV~(^TEamqC>7yS7Gfps
zzeK%pAX+vV=H@n4w%~Qqz5TQRjQ2`5eJ*jw19D=;0kINk$`~Ykr5eW%$D&qvulw|>
z5zA^4b$oqF&aR})6)MFX=!>;t#r24Ls*O}7wLPpNnhqw9GecsQ6k{%GmXmv0F%a|P
z{Hn~e7XzRos$2oOa>rZPtxUN*a%$@=24j8AS5~c<oA5%EiVEjJTl;!3z<d!C%;ymG
zkJbOa_d`63{8;z@o@^A||J(ko|9y%-bJM@TyRkb7riV-V#3@24@r;wvH6N;gEODjI
zkgh2h{ctw%df_yY8$kCb&V1cHZCgL#KZg@(r?R&<a^o1YcRPZA!cNjOez!Pg1OO);
z2*Y<_YkLoZVO8Cyf?R|f=>BwAQIYg8Ae%6pACJ#Yj?X#>Twroas`rg>WnK8y66*Zr
z3pXB%>I&XA|3Y#<w}}p5S^#+WVbR7h{dkzmdrEYG@lm?~|9z}pK(%VNC;F*2uTO~Y
z!cHZAob)Ix0f^3HyAPyDBFZUx00uAz>;NUu5sp<nazq&6(lCDk7-l|JzR2qvUu2Yx
zFS6%*%?)|3_XRXvplW<kbSTJ+TvY^AFeNsO#O^K+J`Q_JsM@xO|HZq?7fL7vq|yxM
zTO?t!KUZN8ASHKFd>wOAH1mPh@%#ytnW-aU>H0aQnDy-R?1)=BKPk*a^cC0ObBTo8
z`U-2?Z)A37Rzv6w!$hS-g8l1v7<j3qch$V_D}**}oDR9jo>51d;#GxP4%pvJYy9MU
zW_emQ@Xq0&8u%-m9*P&p0kN1xfBW>_5i$S=6c&d(akahQl?q~xDAg<<(+@V7;=>If
z4G4G8{Cnv^p??a7WvSynL+Juyq*T}lZ57Qanas+HU}67b1D7Mjam?GbBigehY>$F#
zAluE_G8LYOljK1aj25;Y+QPgF7g30Y;NaF@x(&0+f>v0WaF!pbe1!!gHMfPWhqf@U
z!UIOCs7j`lQtC7JwmD=-lGp?3q^!=o0pL0h8|AV(bBBPJo*V>}vgk)a_nFBp=zVRu
z@*1PU@^gB<Wcm5<=dQt}Yb@7?<=dD`ybGqyBwm}E%O_GP5)YfsIkn~th_3aZshv~H
zZ0qxh`)B)4q5TI0xAtK0co%>L_di?BW-k9rv-LUu%cr^jIo|+OZ0#ws=`6bLc!l>h
z_1#2FkiDJ1=(tSOq>^w*OI=X}O0IGZ5TqTUe)iL!UYfWJSmM$^S&#Z<_zi+hANuBl
z_Yi<T(SfK0m_GE)2X9WYldPhkIt5!D4gRWlx@Ubr&6*0V(m(uBkCk<A&&3VHAM_^v
zojxri5H-72f4N#~TeY>OUDI2(J&`(f+o1}Aed<N((U+c)xxWSI6xrv0JpO0HJoue+
zq`K6(s%h<5DyF)gb_|m5AQ775IWN+;r3#l@GStBvfnDWwl|6QARMDfMzH0n!g;>^+
z8q<>$d)y}zKaw=YgRkElw#1%TU)RssVWE68h}wVXA_oT>>51fD)?qK%UttBAjkOxO
zrX4d~tv?xz*PZUGH-|Xdx|U;rtd)W2=Rouy3c|~Q@RkEn7$3aYm44Impf4K2xq-qn
zG^t{QkG+CYMM1)C2l#>@+$p7)@j%m_8z;tvB>fR+I>p#v0gBa%TSYtS4bhIdM$@AG
zT+x16Q7>0Cv+|c<G@Fm&Gq-^LE#P(QeGne=fhQz)UR!z0ZYy`bxh7@3CtzM9u_pQj
ztu40|f2h`$7f&)iUFm)a^4A-mOpfSMB6rN)T`VXr=$q-|mEIYU-<YW`1edZ}6Q*!C
z9BGz^|2APegXcax_u;v}xwrfv74PT4+Chru>Qi%zFm7(rAaB^c0C90p{RTIG`<5rI
zi^8(~!m@6&1nydMHAStck!DFFiz?EoM`4St1ueG9T3k@k-7ITyb3u!nWi2kKXw(4)
z6!|<;jzxxpK^3X1zfmi|xNNr9wiaMqHuZ}tF4Y5jYV+N}h0&}%gvhoZOJv(0qC2hm
z-N7~9sy&2)cOFZ@J0GGuoAbMa>}#|3kmd2@vCHGhhv?4M{O**konm0ykE7gTU^|ba
zn_^&3mYg+f0N<CkGqW};V!)2bm&$Y7ENYJ-&0=gem%?WA5!h@!0-LR+u-SS9HrtQD
zW_u}YwjY7b&LgndSqht-M_}{h5!gIg3Y#aT*u4AfQu*&}|L<MDNAmZD_W#y)b7MPe
z|KHg8?Em>G<3Fx%)whH_*<$@zU>^AMmJuA1lDiMXsXrM{G4A8}=|%7C<!P_`!)2%a
z{OX`3M7`nG!&yQZ)V--!26ZzFRRUcAl`j?Iy?U4YdCU-#_@55(Xx&tjPT^*OwhRI(
zK+RO|o~gi)RPKlNPn{kn8sf+2oul)Y7bl(PcsKEb9h(&Uye>RU;ZTo&S0>t{)oZ>)
zFh3xq4Y8dm+}1%Q3yVL?6n~a2UJ`Cny-x4FXsZHULBqeYW!0r|#w&LNU$je#KFb$<
zmM>a-tzR7FN~WmgOP}%#Vd<21D*jvCOvTF43fbqfL=s0{V;sa@g94rcMZBw2q{k+o
za1HT_DP@<nU;pVxAs<}G_mZ9m+4AyzoU2ekEW*n*S6pg7Fm;>58_b247R!O7dpwg+
zxo3QP_O1O%f9QU@2hzP=YciVzNl~p)c;{o>%5P6HANY<ke7H<M@R>%Udms&UFiphG
ztQe=#I+%S`WRaxAUU=}N!k=;~LV6=3pK?}0G!p7qs9bL}Ak(2$b2LDUb3gOWGh3jj
zC5Usc`i&e@C;f5id~@b}^P}?(PQ8<gnADk`=f(+7`|4p;rA@0j-vBD}%l6H({XBG)
zUk$!`?0u5=vYt2MgZzjO%n={_-6K9&JmQ1W5$C0`<dEya*)bn19P@c2>=h8TXHm8H
zx0AKEaFDA^khO2T#q=%vFKax6+U=T|c_>9eeR$}^6wtnB5x@7hQ~#ffCq|Mj49gPh
zK#qW_^PpvsshgdhQatpRg88XvA%|va>?R|(-}9!UxAd2qFqo)F&lEfa#zWuy(uXhb
zj~DSjfeh#r+1YaOax2u^%YIsiPXT@k=u=QstN2qO0lu?4g7=5B9eGdGQ_O>cFF_6k
z8cW1usj*ys^vZ{UP*4|d(kKjVCk=q`%=mqDShi&uOSWwc+%v-SNG(mTU>N4b`m*me
zlO#1wBA~%x50;>9`2S<(NB1XY#_7F+I<0)2Za6*hZ~akGP3<*%(i9$k7dl4Rfjj~p
zH4it$e*!iQJlhq!V!bKPcYk~)58E@KT8)!U3z`PiVo<GeQ2DdZYrj9Wk18D%rh-+C
zJO-Ekbr3^+;I^Un1{E)f0aw`F9Y>a*f_goP(!gSAB07>4Wgy+jY}^O9;gC|~Bmt?a
zVu~I`(Y>s~+1&9`ex?e2aKxQ2-uM6(!_;8bCmj_Z>uhX2Rje8#VaDoVVm?gG2O8)e
z4s`Qi9WRmT=hnffWj=4RJdOdpFrdi9_XoWje}vHJT6HEpOcT#&kyiJN<bFd=3m?4t
zZR6F!!ME$*+I|Yg_a=Uizs*N|Tw4oLLnC(DO~OD{TU&c&SRru)u;aSFxkff#eVHPW
zb&8qHgDnVZ>F0NuppO_WA=f)^QSXeonb+8|9WthNe%Iy!v;l3t_aG#T20^Wdl4j-t
zpDqH<>vJyXcXqf71;`|S3_PGJ^<jg0lZ!-O)y31rU`KVt);C@;Us;dEKHoDS4j3Pb
zbx-W4ED>Kotx2B;2cD+%B=c?^YA2>#sKgA|UHb8W;*_ZaM8eq^_}0LmCO26pz@|(F
z_*F2Jw`70nTwWcYpY__^-btr@)vazim=UYS17pzmp>*(z{p4sm)B8<qmT?RsHh$wx
zDjnw${@)gEb?i(*8g=oms?3T(Gy6>Rt5atJpZeKPN5X?z_{qah=Mz6xA0m=x_wwOB
zPmK{$`^1>Oyrth$9?Ml>Jn@%_8HO^m5sv|pnj<D&bprpq@v2iLg3E19hFibj?AXub
zMn%Jw+2*QmJapAVU<Dhu-r{Jht&Tm|)ZTbC-4oB+>E3wO4tGR3KX_DNtD=P6v@cEF
z$Q_ORk&xNDF+!UcMv?C&9Me;<nQ$OU<cIcE@BE^3c6w2fIV^T}gGrEFNI0ni0suW6
zXXrl0+U<?qx4q;B*4#j8DZ?t2Ic%mYEoXzHg{oC4*XcWvT=ie+g^n!%5%gdK)-5#E
zU7A8ct*jQ|qv1;;6(!Q1<iB4=X}Wl?R+F>lM=WCe>_!uqG)LTrv#5BqBX|1c?-NN~
z9Itj{jQcWU@gM{hU$n?85@KC=c3O8azb|E7(4COe11L1>tK-$l>U6)gv9VEK#lPt<
zTc*(6N<AC1(^n_$Fpcw6AGv?t%cWHXHj(+P;mjF^+&~nJ!=p#S+?sMh`pua&UDk1q
zNz;MV6M4i5F{o~pJ~&Fcge}C$kb{h2o#h*D46vrNG;S|BgObsjl93)=yzF(4Pdk^j
z2JW)TGxYbwat$5Va0W4<>1wM{Si81LnNP8j&TR@%sz!TBn1E84R*{X>3iS)dT!zCc
z#2jOLo2WkwbECFIeIe#X6&v$(a?P0>Cx!DkPV8BnKY^U1BR_%T<nhy&uRDM8p!|cT
zj&aUUooe?n6IW1o?zCx~OC~K{hy`0)(dJ~$Y&Z$`7VbWBgM`m#{ch%4N);(x0MPlA
zQ9RCAPZ5ahJG^-h>L{qafIhIs0t%Ufy9#-i!TnKGQdE0Bous&zF7GVvW6<KKpeTsh
zRJ<)8a?vOxc^(*Xa45|sP1qDSRv!YP)?o1p;rxdsI;4`dx$kantkVBeVlHA-S+Xo;
zScbf@cHml@a6k1fvWE-{{nK-?614xh85vJ~%Fa@E2O@%MdRE_7$q>z0l{NI+FbX{@
zER<5tDX73oWs9<k8GR1a=b-3w0DM4$zdur1U19BhpgtGY_9CVD71s6w^|_#Sj*DB|
z*Cz4YsQ~C{Zs7DIryn?8<aj|bNB(_<YyQ4sbL7WU`NL@Zp<LL&*W$vDC+~+5_cEl7
zMEvE4r(#K^dBelM9F?X=KJI4<{ktf1DFXfS6Y^O}p?T}s-^EAiL{Re4&lLJ;DKu43
zKPPJWU*!Kio{f^Ar_x!)_4ww&uF+4nx69)HHk;cm+yA@OY(Clf?En2q;{Sfx=m(QV
z-;Hl76%4~r{rW4hFYq!Jn)WAr8s^=EV&!G*Ui-V^>kXaQY!t-6Hr^)e<Bj=I(8YVD
zLRQ;~S?b{HW<?h|__|f8Tpw&yV$SUHwOO)K`HGH}eb>9u4LRyp+LD0H1cO}3gK>C^
ziDW8tB3;Eh!vYVauLXqw5w#lp7=*~ejB^W}pv`{^Eyt8Vjo%#n5B2%$zm86hue!f|
z9lUxc{vkYI)$0R_#<@Ne&Hv%-ZNG_iU;NMAO#le%uKbU9l^Z}4+6r{D_>1^D?c@3}
z`pSpgzyGJ|s5a}3I(-nY1h*xkvEWyGVi-`%iy%t~AXR8+twYfm__vM8Y&0^kUolf-
z^D8}mm^Wt_&L)H1bOf+GAb~Dbl?wd$%xnK;`Tr!0KH~kKot=%X&+)(h_W1AUOBGCp
z;o)QMwyc=9T2>ww-#zbe*DZ3aOTQ|WN5p@`v+)@aXSMr{kZy5OFukoK(N=3EE`3Y4
zkhmaMmo;I0ecy&|?x$f=)=v8IeVgfzW~IH+-`=;8k&7WiGR@>tNu}k}Xe7#a4Te^G
z9;%tZ1WhVXmwFff!B;af(L#d#$({`S^_LM>Ot^2+H4L>bw;xw;>t5&&hgGdVQWPp`
z=I{W@+ULsH*4hF3=lO3F0??|uZyQwJik6de9bzmQg=(`V*67Wu4T|72piyl=mi=|#
zn%+fvy7==d_a^=$N~!?#eJ3ut3#V=zKW66CjL_t{QRLoNYjqsuT7B%kJ)(g&GknIY
z)!Rl~`!K7tKWY`={V|s5Ah>nCOgoZj70rqUCs-<@NFi>4<Zzs-t<|8m%KrdW(2upI
ze6z+!e7G7W4FU)fCQNyCa%ddDlsA%T#(>ZCWB@kFN|V!Vl|n_pXl2+qw4A<C9!l`i
zItKlzbJRV*#E_4CXMAfYrlUl<M5|Zi8J#}#YvM13M{hjtc`!*yZyKXH610(b5>#uD
z1JxQBpzxst)IftW&;mUI#Mg(R#&wEuD}c8UKLsyEP+D5vy&t(wHBV=BQleECIkVGM
zvLB?bI7@JYiq1JfOJhuIcVjPot(L6}3l%rV@K+1|3UB>rIC4bP53bSl2WmvRFYGDb
zN(8tM5DiCT{7!X(zMo)NzXbmC$H9dDao^%UZ>M|KW<cfrX)NQ7VSS<Y2jtCB;Ppnq
z7_F=T|8RE~;NXQ9<4>ajn_YD6a{w*>+K(6=M4|+gw$?`OAb7h6APR25K67J#PZa!t
z20>N#Y2DM^ArPq6r<9VLJ;!54AYq^Uxd;q`Yf7e9h{35lNrE_b_X;pbebf<`b?Phw
zgHaG;S(6dLIaq`U6zRe2fTB1_*b5%s1moFQsJ9AMx`CI%a^GfQxo`8ZqB)ao!ZPRE
znC!x#XEMV3>Ds^m;{)VAcD<Wm;;#>USTdMKg|CCaIK_44+{OcSD4Jqj0Gm$4KmH+_
zYe90zy@sc1kQ^MeP*W%J+TT`up1<uMZ0T>fIyOZOzrcr<Ey!@Xd)O|0;PT|jkM&j%
zU3YXHMnQ5j#tgTp)AxOGJ#!<#6x|fU2`9pp&ZyCfCHLS0!y#R-R-5b8omxZ1G3+-D
zdYp!LRkci^PK~FDujcmAh~P$)n0Fiupyg}df?nnESfhaol3GJ10_wL4TTl&P3$y98
ztOdJ?mPTLN>6_z4uu&XC-Dc=wL_ki6OdZPiU|m$X6KjONI=g5+rVZ|r(FbOr2nZ>B
z6`<}}OP9Wfcj+h~6y$^~x=v$JJW(*CiO!>kTv-<)VF4<tg#wKvA(U_@9<!;5e?7uD
z{D8R|kJ+N+((*$X&H6?In_b6WsTeM$W#q}VLo8hrY9qm?J!>__?$8$#EqoX8O<5P=
z?I}0?+)nn82#VyArz0m)O3<YvCNcz&6ks^)da<80_xbao8iNg1Pjsa*ti%Ub!ts7+
zB_zr6I`Gvl-JDyxX_v<3T3))}vI{ro7G{Z7<%)fc(IQ{(YYkGoUONEl)S`j}O5|Y9
z7%7B0=@!r6ew!mtA68#09Z^^B<%VZG8-YgXOx4I(g_~uCo0-C6H-7UhJ6j;{!jL&y
zJv7Q*WoZb?B48R86;I75f0`v9kUgsskRuyU1F^}9N*T?}fkim7MnO@S8G7p!H4Dx0
zkUbs*Von43C=e*Dm-MSyt2uJa4A@!HkyEKG0rN}I6v`q6%MxRBO<6K687nkQm>_aI
zgh9aT(y_{d$Z-?~0k4b33OZqo32!@(>q%~Oa7{V<kB%?)xE>aOZZhbm3Ho6@FPu$h
z!BXPJ-c>BU!)`S42eY?4Sf<4*-*HtCK3D~F-B1O`GTk{|l*48ZMY>`&N0bPqySYZh
ztjLnZQlcam4ewUc8yGVCq1=Ac1nX78J%d)-fJ6=dX`n2lUUP>HjK?~!dx;TFWs$TX
zkr?o<V%#RgAuk*x6iSxDwGgQylyMS81BJ_F5YJR>Sk>O8M{|EwiYqBxu&5*~`U-Mx
zl~<`QVs->opv-4AO#3`9t1J)JDw$E<Gmi_6_m<7lG>Y1mRcN6>bXYSkMi}V=$xWzt
z(Ybt4J-a+ba|));<IqU%kn_dMz(?+YABzGRxgLM}J-vu#x+D!9kpL@5yx|@^=s@i7
z()jJAAAYaogIkjy6UVleSPF!tVzbLP*=W}R#=__H6;KO$w00n%qt)9Xnukm<6s?_J
z@av`8A3U)Afo^|bwm&Fnzp<LdKvPoFA!HhxwJbi~LkGmu2zl0sc-bLYZTz0SSn?Ir
zlgT03^=W9uv)7(i1M@Y#mmfU40y;=RYbtyN`4{wD&znWAcQ0MD$W$^^DY)oqAY-|+
zK~8?SN7|*bYN4QHnzHtYeCTu@c3vK;?m*TZJfLoxjR0F#PrJKx8#cy7X!k>-+(6t8
z(sBc<+@RpTY~&C92v(FY$2wj6Dp^3SwhEm#rlYvXAa{LMx1o;#6##uK2&=_{oPHS8
z%HiqbHd!q_e#^i!KFous45_k~wyS2KrSInfn`WR*%fL;SgO?uG$$G}=T?V50WgbZ5
z7)h-#<6tQR)cQ1k6k8dn=CMz1EL+7;ix+9{)@AL{A8zof^oLFPW1!v)n%06f7EU#;
zWtND1lgnYC6B6-0eLLjrL(42ywWiT3BNs+<!wS|btDv#EiXDMIC{g!z@X;EA@qwXH
zVlG_`pU(nPIP>|yoN~)DgFH7VX9p!;hI}1(6i%*%F-9d|<`iDYY4WV5oY$0mS&Xa+
z=7GqXj7Q`cnJ8stC0`cfYCw4)suq*XbIEcx`RP%8PySm_XCbl{oCl)&{=B`Y)<TqR
zNDFW#!^Vf+Jc?>9K^Cp$(Vmm_+l>2V290WNsMAprkLeBmF@-`hvNt!kwx4Wmwl?A4
z?I%ySx3+hlJbmgnTfpqgw*p%}%!`K@my0Anwltb~;Z%sKW(FM+ZF}LHxmayyDhw7>
z_->m6ZkkQZCIbrfT3Ttn^(5EK;!2wvoB6iT;4!b(T0v6_>uhecY<apqjeIYNgK$!9
zZPbi!Dxk#gle17z7aGb0vnv;IMki6T;OOCXL|hE=CBj<>iduEdQ7i>`z7%S=DLDRJ
z0_`U^y2qoXDwe7#n7M3>I=2+dT?Nu#Yr|<hahl(4I56-|TzsWS-GrHk2=)GbFag4J
z0GoBhV;vD%jLf}qgDWDIEgelKZqkZuZBgR2)y>vkt|J$CJ{XPZ^sb6cg+q@2<*@Dx
zAh4M3U+RCMIZ!ssovl~OOJH$RG7h6`PPd#>*2DC924t@Uv|%UGa#}6_NqdOpzp1S~
z|7BYKDcxf8-?Rvp|72`)MToSlkA-At&0jFF=58C<{i4a!+sDEg1Zc}oVPWApb7+_U
zx1PkmH+9NV{{N8=%r*8Odj(*=|Nq8TbNflw|9_|X<g@?(ze4`+mq{>MCjIxE%-;Dl
z;N8pP&Xq1pPokC2050Vz?RKb7?^8!f4%gR($HV1vZ!2220&ZjdZ^Ss2zbP)_ff(z6
z*D|<jIO5lK6m6NNc&we;(`d06zY$%Icsy2dJZcT)Cz3WUIRC7A)NB%)cd)TqrhI>K
z`AR;;H3m1ve}9nw{zRqJDWm5pH%>y{I&m~DLK_Y?v))XHWlm8<X1!?}%Muiol6{(Y
z6qk}zTw*ls#I{)xOAc#6UR*aGS69C3``55B4#O}#pX_s#b2;_tp&gY6s<TVuNUUK9
zbiO|j=}>d;Y#db{T^STqRIM3-F={8A%ErezI5?1rL}~@X-r#<Mrw`Vlnq6WTy!8hn
zudx%Unn4|jNa+PAsaBj<yhtvQSH~Nxa;4%)r6O0pdY-Gg!uQCV{`!v0A-^hXG%`S`
z@l(2fUIQEoP7S1dLf)Hid=bwg-@dRp2tofv4@J!XGsg9mgrOMEyc;>xFh>Um&DLhY
zV8W-l$~FgQeI-MPYacI|0*d<i8`@F-+H!WBr%rRnX>B>1EobXHXJ^Z4ww^kz?HvcC
z7w5Z;4QJzBfqha%%vPRhp+NJdDF1_=q60S?D9eU)v}d8>`>T^8dxxwFUqw~b7i)@R
zu8DT{q<vK*iei{5<Wu21&<dMrJGLH);ML(q#-eRA(fR3hss<p(_^piA%fM<{P*!tL
z-f{otfoeG|J`7t}a?{ydvgDStwPeX{XM4$#JI>CMC7(D?mMr<ydAejtAnE3^;{?m1
zxuXskKSzrAm%slDD%X2l|6L^iZ*0SFTmIj9vbptH{{OW1e;0}W54zK<_tu)Q?hsn3
z*&OL{lk#%31W2Q^xUF6t|DO)AkXwu%^nd&)WO<4rr%OZA?QhLjls&<40S?%wClLEm
z!N7;LHRIToCAUj|SlytIz|g>KOyUk>Rs|`BY^*HPsr^tDgP*Wpj`scy(w;80{%4aY
z9O2!A$1ja}@n2fa&8+^v`8ogR-w^-hD{&2~&8|55<(Gq%>$No@)`zSkuhZSA16oe-
zQfb=jloMdRMQ_(JF!>noTdm7Vk?+l-IJlMN4zL^sF$vOEC`IV{&;=oTeG;mQ2!jOW
z^&!0U-+Df$`CO@BrLX^jzt=w!e(%Ir7&YCUjS@i$OGU<%ktt4^J1p_mWIP!a3&ltb
zuq(a}K-+@9c3DgM8t~<mrxbNX<vmTq)^KLj=cny1^1K76x&EAQX1+YTJU=-(KKs6R
za(sH+J=nsYglSE5xJC6!)Enu$Lpy#;N%Mtcw**Rb=k(=ON8;9b-a9!5RqmkqV8nRj
zK$WB*F`tF8e^UOBQ@5@%N6@;AA0!UWm;bgmH+MF(@xON9)o1?y3I2dC*Tp-X6ch^n
z1s+Lfek7VX!WARTjC$u&e$*pix!TA&3i^>7-J>>0*BybApp|fR?Zf*zCP}7L&C`3%
zty(>*iPlDQ8;y=(kcj_sy|7QFWO#aw5C7kF7+zxtv2c7Ss}pKh93|{11c~%T7>yCl
zc-DXIdx=OwvI`}CG>$207CfAWe+HwGi}f#N{ZZhFlfd&Qu@CZ_Xw^6B;&=!h^8htn
z<>Ex#xolAy1hKRd4-mv%klgUKR~#Zo{2I>icB}~d3COnsv%|WHxJi=fZlf{grt4vJ
z-2j*kXsuCSlq}i<kr}O_nO88r=BryeBQxI&^PtB5IE?Odz{A5zg_3yjg^gM4_u{GV
z1;fDW(Yo%j$(h_7sYv>^_r}S;8yCC-T|imvd7tQZuDZRei_X#Ui{qo-vy=0qA8{YU
z?c{1cCd&ir{aAqT1FD}E^z8xF&kBa2M16W_9`L#9m5ig1wcoQF&P{lSm+<fDU5p9j
z*bd7f^~S!7=G3u2NpO*x&#`bLU$A|CbT8tY*>E`W2Vxyyd<$P_b%cNq)^kTAaV7%1
z&K`K~$c6(8!$r$;y^`8bjRHM09o~W1WW1px06PW|Vgx;)B}W1nVTI=1_$G{K$@hK1
z9utZ0&>Ho>1#Dz5EktPb!kX~ugxlp7XMpmE>xpZ;pNS(y_mlhKEgG5u3MOHI_C^EM
z1-@E>p$b&fJrZvY(Bj=1au=?A5Lwru(E42%@owoSEXe{_8)L{q<dc-bXY2z;^$HOU
z1D16t5rjN5!UTM>W%q5*V=|+h0r0K9A)BQO;K<83EfsYD`gRAr$PmcT9RO!-Q~c<h
zI%mM>V2sd1B(BLmDqP&RvUyH0XnzbQ@*_yXl}vi{Uxvo1Q#N-xpL=9th2%3hE_3%<
zE&z4)tg0~nM(CgGqFJkn0~jVptf-e`*zJjW#FTF5Bm2LjlbtL;*-Hc{f7$@$%Vhy7
z1P;~s6(B{|*(032i7HhNXg@^^4_$U@MF9OcW^k(PKBzCX6?rlEH5PLPW~swwVSOdI
zP+asboQ{Z|m{6FiaFdt$;bg8NKrO<QMNAn~sh4nWQl=HyI$)=m?CCeMOtl8;-<v8r
zm5=G~3EE-)dY3CF;ou&mevZ?h69&_JLvVVOL9L2!3m};^EZ==2LIx1l8A4cR5MjL-
z2A+U4cX|mOL%E)DEJ@ypbbeNq-+*#9-VQgQTZf0@sXWja7Z=+Ya<MHVG*eNrjUg3N
zcBywTjfm1y%;Xwgw|di1kJwvIO(ma3>fly}ka`PXpoy9ot*sd*dNOFS)Hb;VVgnii
z$<Lqg0X7Z;kOA^ASB(G?#TW#mgu*)je`&TvAC@gK*Bc+GiS`^Oc#e>ou9%DSu7k21
zFeni104nSoz_PI`4#pbU(x76N@>IMBVDxz5ZbJS(j2wVz{a+9+VR>%0-Uv|#MHep{
zf^<2%zNuHTQ`a+Qwwh9_;&J-d#0n)=)x-RTjDSyT;vdG#F}>Wvm-^j=V`$B7zmPT)
ztL36<NgXgw>SO#%HUneT7AP@{jF`9}?@nN^1=bJp?gajV_E2_>jlF?3q`1y9Hztb0
zjUt*&sx>ul#2jLp2lyAQBLT<Wy@vg1iqf%<o26t48nR>vx&BKQ(o?<;xdNtKDpSy+
zv?xt8na{jMGA2|!ZNADUz`KPlS7q`zr=Z#NTXDO5;aQd8wyQcO)w>K9lAfk1kxf%7
zn@*fc=FFEr^wZ+~w0M6qw@*d1g6(31(@AYkkBe#|3fOi(t@+^bc;H}sq~0x>PfY7t
zf<7KJpo<W#h&|%GQjvpAWQu}i813&@^}ASQ{5E=xQQvrTq<c92Te=`nFX|+I8SqI6
z`ZDzE#TcvfgBIryNwpZL++2<)185pIEaD$y`hmMN{g~`2m(b-Cj|!c6l(&23t;~)H
z5^Z)4c|-3UGi|DT;OwY1c4_AO_<xyTRtg6#9SdW~-c{^c{d&q6)WvwkEq{jn1C@$l
z5aDdyL(e^!LQ=Xi+!4j~nA8mlwGCyVJymv^y4}`U)z)NDEuw^GF@dyzLPUyYzga>a
zP_p>TKmK7dl4=<M*e1&$V15ys8JSSb0XIPy?=c)%hMS3E2sReRhi|^A<h)=2gxqNk
zu@?WX)ntsy)Pax3a(9=Osv(T3<smy8bEe_qbt2c@{8cyCf?*~}x5yc-WvIS!-a1lG
zG5e<nQc*ZgGtl#Xyi_}BHfp=0RLJh8(TP0+wrM-T8|1oT{Y^=+v0ZFjQfy)un;2I_
zG3ztty+a8?X%(<+D%#~r?goH*A2ZleRzzqRi+X<ClL?r0zr&#nCQ%csl6b1-RM+2#
zRcr3+W3i^FY<|H>Nx^ZhU`?e)-q{kTD3g-JDuqy1%O_z!9Ndc=5O!U5&PO>_>wQ?p
zVHhl{#|bO(l(Tt^Qke-2y>IR{Kr)%251hDcpGqhoI(sP1PXdsaLH4`z#UPw~n@~b?
zQu>gn+|jy+VI|SBi+n5=D7@FRlQEl&f;ax?zDND)rTMsFo}tUYYg-$8xhc$>HCP!c
zvUvPw@$234rt|GFIrWg7%=VgMEssiLK7kQ%!>x98bb|(~iGOFj{Vm(ztBf=5YV@dD
z1L)FnwUmpV900@rAzPdsAtWz?&hV{p(>_CP7B-zbnFXQ(0WMeljgwy`SRK0enARue
zMRW2oF*fQ^7)~qZjmVyx^jvMIOH~DY(}Nwlho!VFO=L(YAX&-Qok>cB0pvcgKP9;%
z%SOl$Tfe59_Gp^Tc_|ySlR?p%93CJOkyl0<>jRoR>nm`hDEm%%ESoP`<ySF{?4BOv
z2*yltT{J3oc4|E${$wy`Jox6DVOEv)*8_g6s@m)6$f{b1vn+jhkT=Js=w@eFR%+x1
zUAcgRDEGz)3ng*{v?|?<=<~oIfk61%E1ZWt^Et{q!z>c&Qg{Ivn1X9nEZUrMn^42<
zQY!XLBOt4uH=SYj29kNChj6bq3MR9+y`JI1w1Ub%na0DRp!}~5S#x(K^=m>u;0T8c
zt<|iCuxcYzGb>izm73C3<coR`_do)kRCSJYxqN^S5T+{U2zdiTO`F1#%;W|>ioO(l
zZxkv|ebYmWi+|;h02e;R8Ne{KFD_0x_AtP|Q@-68-^X4u5;u350l1G7e_UPBpSh3O
zhE!atg1aZ(!-^b5eAUBuRm-TeGMl`agm?1pw~q3iuO_2aWopyQ#dY;<ZmTlC<=WC_
z0}^5oPNc!+Hi(j$JCduE{o;ekZTN=HP-JK1*T3-Kgkv{J@aW+&O`<)T8wD!Wp!4<2
za{o^uQY9-aD%(^wz6tL@4t8PtcPE3M|2CkIe4??6&iktk+kM}ulseRN>wKWPKbr)9
z%wSn1(Y|W;@bDYq-nzjE?TbBrD}%o1o0c<9hD9Vp4Q{+*l~Y1F6u*V+vsACwUunuR
zDNMuCZOJK!cMHqY4&u&0W(!a-$YEHgi!C^s@cql80syH>p|^rWtvHQ@jBQn2g>UPJ
z`KdM#d1oM>>x}I6?))x`l4G)iGkX;p73j~EpRmdx1BA=^q)o7uIuvK1n<v0B=K><n
z0{DM~fsi=`H=h54P|RYnD=6umJO>%4&Rz4tw$&+)i5NwKX45H9DBjEs(E;JkjmE43
zEJD?{J<`Mm9&+MAW{h@pOUaP!&IO1#K^Iw|2K6)H14_gsUerBC0Pt_ZK8z|z5IgW2
zN4y4&&qf2nO)41!7|6*z=q1RJ!nvSbTI1=MWH6!C-Xvw8imOvmKI}D5Myr{XdP+qQ
z11iKR*YSK{?w{WyYJNDH#W#BfD?vWRj6+2b9F2OZeW^&wduqvbhLJ+_DT0P0(!bTs
zm%6kx(#Yr*>Qlg`*LS@)Rb|vNZh|8#HOfkE;F$G^0n}14n6FIte9$t-xOp8Ig@0?k
z0*%Rd-po92WuCWRDfL$_oqIHr0WG*uKS<D5NXNZL@*4y(Y^D*gQIT-y1oACN2}XL}
z_zaVG7Fe7_fp~Ti2Lt$*PBF>A!Ta8gKjxZr@)W{zRM-8LCl?2~j<1SlYqP$!y))c1
zvYT>obC0~4jonFaFk@E&xmVI1Papa=AR5S)sdCUTP-0~v43{T2w&kqIS%}|=fpHhr
zM<s~kmJI3!9w?5b9}mUm4jxCQcm8i+^id#9%QUErVZP-ev`TNpii-cPimxBY0;{^-
zyMlIitX6?@&k{e|?vB~2DnjjIT7;sORiRZ`SGBB%09KySVS<(dAkgtjHv0`tbt>|}
zgvIC}(L>iS+z09I-%4ZPCT;_mi92c}<7ugr<hrb0$MP#$jPauY)Hu*Hb-lQ1idm{L
z#}-r!#sTt4T2~*OkPW+M6bpO6MVL@t9G`R;GAdalnWxRH+@<pSm-sp-92^m2=lhoa
zgyArMlt6euk^9|60Uie1+}i;4JQVlgOx(D)K9%*+c0LYhpkhuj0ejIbiZD-x+C>LJ
zM1yeupBb=vTaaddc|fKIr^kb*j8y3dBf;2^(5QZs@vw1}T`}_>IBL_Oj$5^h+tfgc
zKbOENo=FX>WTFdU89hcS#Qh+PQ7?g)xsfqJv9cwf-$`FWV7|*!@{%<C&3h1WR6@jZ
zZKvjVS*VFJ@(?)f;4xD_5Bef1+}JBX;eF}nA>Dmn`kL34K}fKDA}N)iu1`0!l$Rp<
zzQj8ZPc$e`-e>|cud4EiiZZW&aDv_M#o^c@^i&corRp^17nyFznB`~!nNRFEG{xWP
z+$`H;yQ`_0%4mM%#&ptk4f7KBX43&nd}egEw46cpJ;S52sQe_nlb(us($~hq;;vAg
zjA50Gr7W$XxYNVCA=)yS0gPb@3VFIPQPU>KOWz$?Hb_1J=7i9OcEIN<Qp8}7CssRQ
z<Ed*iWUiA1(_$XTYK#t-80R$*Ug{e|JATD7)1z9EJ)}N%CNhEobi?c>=)a)!`fsN|
z$NoT;=dqVNaK`1dP~9?HLR~dY|AXpuEFE^~@G3p#LNB5b%r8g*gJ38F#43lg6rO>t
z^lNot<GBqybf`s{gym+R!`{`?ipMCD`{P(29A#>=s@lgviNYiqW%NVPFYZ%d?l_#G
z$<*pY>pzU<enySMHMHaZaKtfPDFCudlr{sR>?908X%Epq`U(i^UObC}!L?tnn5HQ?
z1;!kwV@hWgt)U)tGP)K_)*-U|wPGN6(KWlZ8Fqg30JNzDsrDwgzEOR@1F0>7fedFc
zjI5-JLEVJV2~hQ-!|j0G&rHv38k%6Dy||^5*&SI8Zrtfq6M@Vq6`C&XE_t=1TlgE_
zpGvh4HA0)brT6$`V1lC6JQ{^}WSs$?p3>`{0QnVPvHl5L(1=b<gJ>Xe0@1*P#YhJe
zXedB#8k2u@dK|_2BPL86jcAGCaqQ5IiU#V=y?3xzumteH0;=L}K>#UKq<ifW`P`+I
z3z(2OjfT*D$<Ro>PFQkpZXkcbLnF;)vldg5>Hh;T@?5kuS)7C2WMIzI;u+CAwb}F8
zQPzFEfN0Z-WjkMb=wxnn#-WXL6^wk`vWZo~v`~yCx%)3zHv712)*rQOG8QL!1A!Ax
zq;tQUiJY_4T*#T2_ora=Q}xZ5Ip&JYlgS%2k50k3G&21zKu_K_A)8==rpW=fVrqiA
z+H4Y2W-Zfpf|DGNU_tkSVUKL48H#Bd0Ho0Hfu~UBfL;opL$TSi>l!0|HIj@8qy56_
z1+lB|_K}+qhoh)QuCepg3#QKoD0MB%F^%St!*XT}6~)jB6}D_u$I{D6MAQ;2&tRWW
zoQu3sbVY|4FAzq2Lp$#@0E(PVrI+{}URD?;Wurh^e#>g~o$<S1v8PT^WfQYf!0ncr
z_(CojFEl8dR^@CDsj=nA&KiUV327!7EJNSwxZ57@cP0bP4tyauy!6?w%O3o&MbF`X
zg+e~8<q|3^lKv~pZecer%76X@k7w$|476m2!4$Zmv{6T)VX3y@GrM=><pm^iTKaVL
zK{;R10H#olg^jx%Ll6|jHReU)qHKT{CT8fNi-$jS?Z`aYs4Un)Gox#}JS?f!q!UdV
z6kZO*P3z00+qQKzmV+YlK$H5Zz*GK8hLps6{K-wPIl1q&I`}+@rz7{?#slT`H~|GC
zGJR0;%3M~tUc;oO!&-s<mRG}5y={&(fm~)8>5a+W5uI+^UT$d9&mK?kqSGB(H;fgy
zT~@IV@HTCUf&r#axrQHbsFn`AI*`>Hg6p9-0Bo5^I&MljU)slL@oyP|ePy1VWxlcR
z(>rb3LH1>Ag$wLCraN(|L8H(rWEx8cMvIxNErn%FS7JKg@R!`iOW>96={c+a3p^W7
zg@%EWGML{PbY-#d;<eu>Lnp_!CChe4Xac+^zLHtHnNcW!R&0wg+{#O^JQN#Cl-e&~
zh?bpZ{%oeQ$vZPR=$uNqvttA^horI|=Xh|#7s<^^w*6h{hr=iwi`awpO|dlRt758w
zOZ(1vMo1-f8@&~9%cBd|Qtekx3l_6=6i}D#4(vAJyVC|5swNn&#{zC%SszmnRNis#
zg$sH!9xr1!{7PyUg2fa{2MYFJ7z(s53(9OBMYAa?YeQ)Y;21XX)SWnlNjMmAn^@Hx
zUsv8hSJ&Ff#p4=~HT_fNjqJ1?#*6Xq$qV<U*;^mt8}HIXe4?EqmxC?K<sh$nV=}(n
znMa}AsE^i)Y}ss#GMr-{uwab5n&oEhjm+MuS}nBb8WDI)O!l^EzWk4EtuOyGM>$ds
z%2yP!Kt(oKe7}g)B;#U{blj+D>nezVDEVP<1hi2H<0-V@PLev3wn5c*K7}a(Et@WN
zBi+r|h)q~mY=M(mI+JUPO<lQ_EJ=|J^oXrpY>%r3b-zyQn3}C=n;4QUxoGI-q-RIH
zWD7-b89tZsh|6QCT;Z~~+CFeB^;*#D{&<?)+xGix5%R;S72l$QHiKmSaM2yD1t5)k
zJ{f?<O&)9K$pTCrhS0Asv+KZIq5YIdP9A-cMMxV{Q$Ur>=98O@)I~<I5%dh2hTP}4
zr+h2L?)&(eD(_W*y$-P1&0=M~n`s*Lv5=@tdNP=4%F3Cm8qfP>p-jwDrX#<3H{*>8
z2G0Q*_T-EG0~K{cy+QTc)Td1#<1CPI4#>C=hz(|v1vANknUsLZbc)E)>J`5MP;aak
zMUXOm%fT9JSYzu&F|14%bI>Lln*E{-nsuU0Zq}a9E9wm&(2w~a8P$XP=|~(M2|Z&k
z%Nnrq&+iINUGFj%E?9W5br?fx)E6zJ&-=5^_s3`Aw9~$NdD%JboOQ26`{V@UFbE3D
zlo7K(5O+-x02Y*Y>1RK<4DV34E<XnEkgz}Vj=zt0JYYqY7=1tdN_gIR(SCXI_pgMd
zvJw<Y;w3V1VKOjO@iQ+|sjKitME*7brHapAF)-&7?=H?42Wk(5Qp^|_eVQPCCCYC<
zD0Lf$7Rj}#Jwywxm+<)G$U6l$LX>LsQ7&eX9~-bmZ#=Ei)Hh!_rMEK-gRCBaASt}s
zW>_3l(Nc`wcWAQOX^XW}tZFH!az1Y9J}5e8gQgIvH;z~-xht|_-5X2y0RIo*<#>bl
zdP1(=k#}Q2`}W{l%~VRFu0fR~oQ<jmptYu>ZZ)B&z7nNPLyLJkXaZ|1yq=Jmg>S@|
zcZGsiMVDh9Khnm$W-bHD+tDfuYJtX?xo@L0=!O&nM8-3wGEY~f^BwMzt1^uXhG;5=
z5Obh6yoe?Nq<7L(#E@EX+Z5%^Y`QEuTt2Ye;qI;q2a<!EhOS$YZsz4hbqG8zs-mtH
z6)lZx$W==|mmrvnE~rq=d?>44K+PIL_M5UO7uPeaFKJqtY7*U}70<pjR9h*!hiM)A
zS67)}fqO~wd}bTNEKB8^m^MiVlOVn!zX2H%CVQHpf!XI6|3)GE|CadAA2I(&bE~zJ
z&;Ri`{`04b|BQk3DLT9k46mcU@4r3FMucAw;=MT7ad}K!PWUI=(~)=Ov)I%*{|e*a
zJ}&9tmYkwo+Ck3!p^NjQWKz0tiF__rP5E485}nBYsDBT|v5tFDRz#-QmizkxaAU2f
zWKRNQ5uE@<YjRZZH5Yts7JO|MwDq=eHE>>I@QG@$Cd;qBu4SD>j4XRAH5)OP+jF5k
zo(%Y{nls`|EIl}44G$nNh&&!tkKwU8ULDu0!vLMcPa&iBsIe`4nR%){^ZzXWzZwPJ
zV-vtG;{V$lx%@9rHa_Qn{oDAzp7dXGEdQ4xp8w0knEnT3<;@51adLBc7#lCnS|u`R
zt1{1rSX{<(0*(nNqQb};;}{cJ$y}@~0x#x_>D5`#JfXD-{4?<+&w^UA^|A!3T37WR
zP<FBq@b-rDRG0Q1QhHn`lQ^bLb)YX3k1}w<U`eLV?@G(A9_Sohb0Drui0Z;QLv`j=
zX=DML`pj6y_fXr#zs+W`EdT%IJrsaN{J)jU|Mz5LyZM>_e@gyO*J;PV?2iTJh_cjy
ze4ePHEyCOxKEZ|nqG0#e3rl6dXO)QHWRTky%9M<)f-R(UyLre4@`b{Qq<#J^nOsJo
z1%9IokBdGolI#~t^j`ozUzh^?>$PUBq1swC+Ip8ZSGFrLF!*?^B=DvZz)}8YAewi=
z&az=xV7~a@DQW+U^MCIyh##o{EaLyo&FxJ7FMRiz|No22{~wau|I|6MxV?PFM6pR2
z6>)ngVR3u;tTMtb&+#9|@)csp^5t*I@+AVu995yQdg(Mvn7sTh%{R<;Icn-g9a(PB
zP$eG5w~JBBm{aPs=Bu@O{Kw3Vd@{F;8(UfSZi&X9nft#d|L?`KAn_m18t3HyZMB{>
zKj;7boBo*jf34iQxjeWSNdbNIJ&u8Z0moH@Rj5+oT&^$J0UwVBH{Whz1gp_(JXs&R
zufxdEJIqSONJ%TqoU{)+Y%uf5F4I0aIddYdIETZBZaNC@mgu)ksmsnsz3zGM^8DxC
zY5Tv<FF)R(7?{l+k-6BtFH+K4#%Q%R-nKU0gIbmqR33xIazyiCG583y^U)Ce03`Gn
z+&*vJpJe|}MvbFW_l<uN#7SIF-X<U3{<pogvz^`lcUqtK|4;Jg;`02zKo_V!>z>pq
zm7~-4kDcD>@!9d|%hTRv=f7VbUv{2@esFnpe10Zc^__YPip73Xl||2=zdY&mF52B6
z#MggGV7;sJmzPJK-t*(jcMXqXU3~o)X>|E7wTjddtEa9Ph3Die=Z@Ng!PTs<!#+Zr
z=NDZ-0rBjeorA&xd?|=2X!g}rr_ml^C@GJ8?Z&~MGSTZDCTg07aU5W*b8-IS1pvD0
zbS3h~XGbS5pLcpkFE1}UXI;cfoS)SyGKPBfvh#iC7c`?C{U8Ew>WSm4-tp1J5%h)u
z0I%yEou6JDpL8zY)hfp?Bw+Qf2Nk|Qdx<O&*qyA6XF4ek2;#*_`}-@gvVOf`>)~cI
zROa1^UHL^AC6PN>2*4`GKmb=glzjePh*KVgl%=1;kf9!VehIoG0qvYUx8d@Or3M8@
z@AT@Y^5GGU^A`M<h{pFPqA{HA4l$U7-X3d}ufzk!=93Qs_pK5Ps|FpU3(2Gd>R8F-
zr!}%2v})yMoJ<&L(`wcA)DjDPifCB8hNZ7*>33M!euR0caL-t-RAZI|qjHF&pdUA`
zh$np|K*<>N(DBo~1~&2I2~IWoEyB;<i}ROf&w1|M!JtPP342&p*Ps8;{s~wVSKy&j
za>&W?v&;77apwx!M=<*N9hRlRvfx(;2buD|%5#rXe5pYFDxyT<#WyeUPB2zNpKrX&
zIl6Z^ah#ojDM-FVnm9Y}iXy0L$_rau(enxjp0<xJ&x!2X&!6|6b<TikB^Vh#q41XS
ztQl`yR(mRL%q{GEU;ov<{Qm46lRq}a0V?>DS(1uCb45@HJXMavS>*Y#33ddcL-nL{
z_C4$o`g;#hoAgHh<T|;b+EnVYa|&ywciioqrbS`n0nRPDmoGcwMf>Ecla|jxjU&$l
zin9p4Fxq-2U{h<KVBgZRg}uj}KqyjJ4hWQ2+tkbBljps!|4L`zomhD`3r5DRdtrTE
zL1?ORhJ|b-4~KEZN$&j8IeOV`KRfAE?UoQP9ucE7=CRL|Td^_Vvit^9$GnjQkGvt<
zKyD%7%OWa$XYj<L&LDd%pk4SdMuDIT5@-)swnS?hVZ_JeUZRPi1Ey@BkQ1J1MGgQ~
z%|NKC2!?tdunYKA){)Cn6;n=eNF}=JiX1ex3}t~#Tjl`16Tt5-I+tB$KGYsggaV03
zP=9zn^!kP$a|eV*5@~@Z`uOa7tc2xaLyoEvUnOaOvo`N*tL$syPvS6&%RWClX<wCn
zoArUb%RXL@$7P?!$!M$WTYnTJf0p8UJNA>ZkF>dz0d?_Sb^a@ciGcT&|7H5$`Ngk5
zPw%H1mFs_7o2@5V{coq)`t1MlN&XatwE*c{03+{Q!WMV&^4ZDpk%0d?XIGuFXc#~F
z5d{WnIpV)&6JIpH`>qM>_$Zv-N5S<?BC1C<c=_}@hdzlHkq>%XI85#^lnTWeB9+Y%
z#}lt!*2?yGqKh{JeQ|-e108XNXA)v_bHfqO!Z^YDr)?oNTFqv29VqIF5HGLV716;^
z85l-7h&kIpl5qSr4-n*t8v*KkeGK;uW&4247(W8zu3#h=ie-o%u#*_2H*bd5c^rzU
zff;?S@$fQ8I8tuH@oq4-%iQ207kEmFp5+xz@gu==WQ=i$`7G4{;eP{PM$81X6AB-B
z=iXx~g{U%w(Hw+h%I0}P<)KG}7xXx(3-OF|zDFQka1<MwVHC&Rk+=Z99C(!z**lEF
zg&E>x9;EA;3wlHX(}himc}?NBio$t)JqbC?r3^jor`>@juq<^H1Cs;^r80`*I_G7o
z*r*AGnme720-vG?(fH8B_%<<BQeyfxHb!v5S)C^LoDzmZ4!L7-cM~GyS#krr8U||x
z;((I!K$=FcdWCU-xLO(ETAlMSU@yj3y056gK4Hy4l=Li^#EI_?>NO#L4QCX%1v`5$
z7#SLRiE->;G-RT!pQSnxPkr|djs}gY!o*QAk|TfUN1PldR1@eRp;o4t>J)l-4&aM1
zi*vMR&Z(OqRa7XK2St`isl!-MypD4CiK?U>%w7egP@zQzWI*w`LFmRbA$LK1Q*(4v
z&==3g08lL60_cH{L7Q>FP#O%9imK>R3Q(gemXVZdQ6p5x(2#(m$5<ke3FTcSeAT$`
zF%c<nn7L2G9Z<xn7$IYzADW$Rh*cAeghTU3j4-BIQQ;qHEHPdx%mZ7j2s{i%KNwVi
zSdnMpVEPkU>ar1rgoq<a$8Y#kh_e#;l-`Eot5d0ZrOS1!R$xiTl&uhXAV`lGS{h1B
z;bYJbK&A>LA414HqLsoKHAm7xjO6f-Ll9e$d}%0NV630FF0yV%l_&%kWAb8G4LL>y
z_i@=(uJL*w^_X}DrqFh{9nAiOVT)u)c`uj-6m}W;N;(Q0txM&dsq-qMx*2lDxM8%w
z98GEld0{dtgYp1Q8%RRqftPnV0hJ^MA2TVdXk>3>FtZ@4sHwp<>=#jqj3{~sbDH?m
zcvn=LHQKCrH?>C{1Dsb|H5flw=aNi}?TUkx;~2&CapYgaGEbB4sD02eXBc2MXd9ww
zG}|Mr+oKppNQZD@_6ht}b_Jp0V!`B4MDTYq=NFB(B)CeN!5Yb&0{Ek^whHvmK~rf<
z#k6p^3*A$b)H`dlfr&UC=JpsuJ|I3!q1><)x~`3=P4UDEFq|NM70E4PlY#(UIb|44
zOA!YrE|MEP1%!hs2`26c1HE#WaFfA!&m#=94SOlYequCe6Nxzt90CB=Orf%5+9L(4
zBKM<jF(}9^VQV(8B>aq~By7ZFPlys`u!cpAj9U4<a}G8&DwU~AxnJu<cgS0bgK(M?
z%rfXsWF~S%O_Kxej1<m}23_x**ogqt)7xOcfjYxJF%oV>%QFt%)%AfkJz8xt6tD(^
z`fC*UAcWu7CDR0&m~i}PA|4v?u{%IvMtCFNl^DVh$PV%H>uVW`!}lrD`c{e?$mQWR
zj=L`BlC)M=G8l)%P%D@Arx1F`ToYm9LV+GQDf^PNUSVqUSQ;3HC<2xUfb+%cZs+p!
zO0>_Oi=*?i=f`MGx)LwWFNIXY9P#}4s(X3->?MApf~V)tk6#=gwebZuxl!k6ss(bB
zq)i%m=ne}tc=ubf2^14y4&#c8<AfVA1>zv$r9h0D2i=4t+-YL>UJAcs5J&sI!Pf_s
zncj?eK$Rq2ENj$x4puH0zZDEI2czgzNPyAk(;i{;0kP+>d9GNYuJm1A+SIawR^ixq
zK4)7nz90;Q3#bOSoRNf}F!E`KNABHjMW(!@-4+uzxLj5an+kJnLKYaG4{3rV3#TFx
z&{_ue0GYi>Ke6J!dZVU5`PhG&vC0S*>dd{yA*=oXY!bM_5Qf;%6|g~)<jKlEpeE2?
zk6ifW=SfA)j94*SUqLangM3{s6XJL7V1OYyam~bH1vZiuAn!Kt`dgM3!bE2WDP1I1
zCrPqIDKr(AnK~pn?8zXWk<ZZvX#K->3eXj)xX3z(m1s7}4ZY;hO6<Y-mMp#zG)(<W
zfliGo6+?2TFnGa`8q^81iKhk$ZI2>HXcg6R6(bE#Q4X7siURD5qdkJKPF$={A&s*b
z*Xos@S=ter$f6m_-Uy2_<Ec$ZcXAN=ye%~AEYZ04%c!(UaFbxZjZL8jr`MFUQHBmC
zv_i(PCCor7gT)6t-%o{MN|`tfycy?%p|v<=#tyU#Z>G4=VYh)!kia03G)q;|l_oil
z?1VRRF(?Egt;B#mj#~&z0x3X>dKIn~tL+PgA=H&EPRME?cHZsBzDmUlebb;TScW9Z
zR9Q5n0(<Cy5U4APvB`ES?#K!YFH6Hz=~-g6ha_1k)Bvl!xsPFSj3iz2Vph5vx5U!k
zy@cXQp^+oZNC8mGN`^3oJMdejrYi}WD5sTDjuiF?BKImPCK$yrC1<NJXM++m+9bz}
zZYe8v<i^T$YD%5N+HFQC8N4XiO0Hz*niaPX>$ZR{Rj6NI=Fu5S!93m6cX^9;YpF)S
znF1)uv=SgOrZ(|>55|@T7W#<GTap}`{gJQInpO%YjrTytTtx_q15wC#NqAgJiVFQ4
z9z>fs02-_@UNAUvWl23Z;A8@KusPFLlE$J$Z|n3f=B*C<m0^K$VhP5Sb9ewd42O=)
z7n6!jF-|bG;^HVMdd4k^R$~wiG!!JDGD$#f{W3mVP08_k=+#C)0lMO7@V+}>Lk+14
zbl()ur*DDJ$j$?!rF4e(_D~{?(nksj;}0|!rdZE28SdHuND^Z(kqDu|DH^~|8{KoV
zrX=KOOE_u+nKr=X3Bcpu8E_X$b;X-7r#3b@o+#vaE?=qT7v+jj6$M;SW*jAfY5I_>
zfbq~3YN_>tOl`E{4H=&)UX3<Q7oP@Z7L43R{n1G6UkFZ+<{OIJz`x7zM1qn^c-78Z
z&zAwQ0-LMAX6uiJ$^fb60B|D^xYyB^t|@^BpG^i6YsejDVipHdy~#@mf6Ri2?J*27
z1FT-FXd4O@<-AU8pOc%3CRb`QT^*#DRss|S;5&IM#lEKlgtKZ%Rah1(U6~GTOK6Aa
zBOY!XPAHuaZoB|KA^|ZKJ6wv}k3qf?mS$rq?u}uHZ&4>BR|InjssueTuTTeVT4c;l
z`x(N{rqQRBl;KBY`*7n-(`bO6CAvZ-Lvt~vE7D-0HgO)(H<`o8ENii~r&O|g3h`A_
z-jsV5!%zyMtfiPx#U24R2AlbahgwNevM}aLMDw_u`U*~MB%4t(CP%tw)5t_+lca7%
z{<Rwo$d4Z7nVS$Z$Rs`+%DPa|G0qVXQ?kV;nqSIsqP+;^BEuF)a%!AZOfwP`W92Tw
zBM%S)2w64?K)F3k<8ecJL)sK+Hx>V_AF(Q|jA(2pLUYEbaIg*iDU2#I<(<-|;zCIP
z`g}Y=%_88#pD{8z_xc(~Nr9Cr5BCE{r3g?ZD{2vcWY6SsL8Qic7;n+t8~0;SdgCPI
zlmU_5KNfvcCe&BS=1movxhByxDk+7V+~#~qJ==vW_NYPfUaF*@Rt>kPu>TtEE6N@Y
zv**ny#XSjR1>0}sO0Bpi;m{H!M3u=<1I&Trt2M>u4H$t`d}@0XntVb>7HK{tnf#6t
z9*}*R_Q5f%47^Z|dpRP>@>8mVsC1BO)fzU1If5W>HFkGsg5~6S04*H5Q2_Kmi*=3%
zWiZ4&hlMfB1L<giEZ;L%uP2hY9YOgYS+E8b+eb${w8e41?tM3+Lz`5qAo++G!o4Hq
zaVbDe@Mw#T(Gx!0<A_Ck+NBf(-2D7ViQ01DOnP-_6Xvm(DIn|-H@4{P<XH1Sav$%%
z%gsUg_;XJlF$Z25bzz80kOgQN7%D4?r*dPa3F56kb0$^NWfW1!c;=2MkO^@xDaFWh
zUA7Jk$&@%jK@>S9Xl*%bhq5BzrpFPcBz~r-wRrJTf3<w-YN|u69L6PL>yEgZ_0{gs
z=h2svi*;BwOqq&G7C~lDR5N)x#(L+(LU=GIt+uveM$vBTe756GOr+T$r<XprUZK{x
zNp&`p!wcR+BQsQ$f>db%#cni<NilKbIP?N#4}&FucZE}*Pgb*JdFI1WFy+%_+^{QZ
z|3c&fX(l7F2d(9!k!uPLX^)_nKfuJ_;-I5!SBa-S&A6{*Str*YbGguQAMP5`9ET@+
zoIXxjdbCVyR;!}c&elQ+8OEqj3P=U#sg|w;x{5=+3MV9;R=J0W@f#pxf5f6mjBLCn
zdsyMz$+Q6O;{-%EvN0j!wfm2X5{#<6|HoORRZ|rywYzeKlAYNeJ=o-jIcdRwjv|(^
z0`TNUn#qXJ0lx}Br>3RH21&=RJeDSEn6g6%NwumL#sLP2WQ15|0OY7oiQ6QM5-{O$
z&Sy8S6p57<m&5}Ll#el&QOer`d3w$^&hZp<Cd2E*0Ftp58G}J0!&WLXAQ7<{h>bO%
z{gDZs#aB~ku|$aq6RJ%1jb>a@pX5%jxVa&z;K3%Eml8~aQXvuNj)cLmwE6PZk$vJ3
z6O$kv8)2|)WZb-$;U*H`JX%Mf1!z_DGF0P>5;UC;Y2L~fD;f-S;!d-R8-x<j42z&X
zic?MuL6mypXvApc&@7>LgREE~0O}x~dBOjOvk^0vQGl6dY3jCl>Xde8>Y7NA(<GyU
z#Q~Zm)j2g$j>?mpm|RGupQPi=YgF*rXltM9N`oG7@REr^YfutrPd?ZU2NhmdDP<9z
z*WCpDAYnuO$i34C`BG2J_lM!Y-V)+5b^o5v$jDk@$=4aveN~zlOHJ%GHtpcqqo=8c
z+jpf&*P7#m<X1egMth_3IC{t#A7iZ1uVe->qv1%ehn+f~qR=%)mQUtM`U`q&`8Q-1
zkR*Q8kkx8b9Ic|xl;sz88sSB3TM~`4euWpTZem3Vo^1Oi=c3A+Lb4}JNG0u-!&yY9
zRhI9GRKZe{`nN*snv%Cl20(O;iCXD^t6s5Jl5|#KAqf;)_}jyIN>`vfP%^kN^&_K@
zJ*kW1A@A#CO@Sq<&joM~0Hyu)Y;aAMJeE-m<&=+~D<FvChU2S3Lph)7^Z?B@qRI!)
z<3Rcz%A;#ot}{QbITeEnNfxGYBtk&isY+KE>=a`Kf*%Pape?J0Q%+q|+cbLp!7@pt
zps$<AtPzI~+juSGc7di(Y*+7$<#pNnLb^g>b<?6BN?|-k2Oc1=I2g}HuuOeED&pe{
z*h8+RSebJDig{>bIPRf8JsIDPYI5h#O(II(iZDyop7a37J1MzpcC{l7C9XzL>_aM|
z@E){;`*rdzFc!BVCa6|``LWa+l1EOckJsb@3;URl?%8OspFoQwp#u8BT_ny!dWH1<
zL}CD})bRFUkWhAJJCwQ4QL#QULp)E9aMRM(Ae!Vk=(7lC9QowzQSkG|AE6-4x-dG`
zO?XlIB;@hdOCWGDdNa%o;02L48^^@8nZxznQOd%76TIP+T49@{I!#f9j6;cx6RY&4
zoG=Mh%x3ZY^w_evO=l5tumY1C%*;&g&-BEr-f%>TQ>O}jQH70XWUW&^d(u?Lc7q_f
zmuEp0vSoA0J-cy?w!sqP$ih&a@kyr|?DTacAu5l|RKK>Sm}P!Pn^!9Vl0I@0-r}e1
zoS~?HN(OlxFpO-9^XP}@imknKDs;aOFU1hok<DmL{0<NBBRYXZN533O{-C0$omiJt
zNysr#^7C-Q_U@P%CwaAahRWuGutb&iqz!d8)rY<0Pt_QN6P|zp>^=i>G9|Ao5dd~T
ziND_v&7d$y+q-2e(6}o6Qf4HPWaks@Eg{*U+zFWx--H3lgx$=lG0Bv?;}B771&#aU
z>vSh|#6Apzf6EJil89$^E8bP(q|og6w60FpvQ~<Q^ncATtROagC-Cq<xrvfmgjd3p
zZi=Lm5`X_bJ=QX{bY{`1G?H^9LZ(QHS8U;t*8+&4yMqB+9gqfKwy%9GKE0vibE}Jn
z#|vz0d=OV*KCHVUleD{uUDdiRz{Z>j$pB-}1S;u3Fz1-XvH^d9JK}^-rZ986W3ytA
z@k3a^ct8>pKQ~Z>WsWyUrFl`FJ@&)Fec`NrS0^`^Qm1qrDdl>B*URYWmZu!un{WBn
zMqH6|U*eq3A^-}pxSHT!=-w6f&4fW~2k8<v_+Z3Y>*Hw<kpsIjGR3%VWevVPff$0=
ziGB-EVc-Lajflguw-Gg=eJS~n0|=3vAxXBDz;LQKd8tcN2y;AxP9s}XB_^|R-;YvH
zQKcG_EoVr&dZuhnk6=z|c+<&^b_IDidTB)p!itkBakOJA2aVKpZkSJPd0Kghsq;aF
zR2V6j0jnuB!KsfPl0&6{9CCe6k281>?u&*Xb3}Hp9auso=Bp~GnigYIbUTCnmNx22
zc2wS|#_AwZA?JKTj&RI=O@Gu_p59q2I3rUtrO^Q~E-~NU9xBqC9;NqGk(QF7-URe%
zq{-$MjC`iKvh})0mpQ_*kBcm>&~~6rjIs88mKQb#GJBg&Dh(t*NO3{u@$1kX(dwqP
z7u_ndW2pdG&Ws&<p?+!upjYayk9C8FA%){mtL%6Oh#ea+gRI;}bd_u7C!_lXH=)kX
z#n0`_%l29KS0ajLT|Da?wO?L!ME8dd-k1IUvVAI!uauwcbMfM`(-G${#L*A!%kMi5
zmb>g=2@@82%^Cop)HywOe(843y5gd9d3xOK0<>qpiuT0?0LS~Y;-vjE3~}d|qs~QF
z{QN`bta6Uve?A6$uDWfkaC|0yzC6ZTwgeDeb1#p-|Dh{>I6rybxg`JH2DDBU#YLNL
z`&IytpN^l~omy#M0op6#=i}}V=P$b&dF)*K>{s#Q@!4}nbdCv7=a-Ai&K2~(f{`;$
z0pkvQ<S-`W8vG0(;GN}@W9S*+**&MBQDqf$yn1~8qH@}~gz-D;wx1oJ97B`nOZ?)v
zdj?IA$8no6KYEGb^DkarUYuWb(58b!2tZ)KFORQ&6z!{u99X;*uK>a50*ur4*%3`t
zW+Jh>;@9(+xRpS^Po86eiYkBu)xnI6oulsYPcXeu5}LVsdD@ZVebuD_Iyn(%og=^(
z!y#OCE`K^c!f~ivb}rf&%muxsFE0_w`57~uRvjk~sHF1~(!k5J6YQ;ubd#q9gwg&U
zs0|0xnD@%h$Iv*=P<HYissbO<$^R9|=v<t(e`QzgUnO|}n%bA!rV1d6lsenb&T$N$
z0Y1ls79fHHf>ZRoecJxMbLCVt5m3|8z1$I37oDSH`~lyA^kA+}cvN5&{r5|pMR*~B
zh&D_ob`9xJP8qI2q_s0eBhYqsT~^b!GxUdaadLiz#0HIY+k!BLf1h=*^kwG^#*3Cy
z`{?N9B`hs0fi(b|tCz6Yj?ZXPD%f9I&BvF|)q0|F7BAYzCoeB^GzKl>Eqeq>w5BIS
z5!qGEAsP_JFQBQTALQ%_Yt{ZLet@ZX)`4Q}=RX}I=izoLuokY4<(QmH2yz_DxE=JC
zY8N>8+ckb*XR0=7t!#<ylAr)De?`uG22zpS4r2-wAuX}61CPQf?BY^vO1&+OD{<17
zPwuMMbY&z?Dk@A8DFaGmBSFwf6N01Bfo#_|sEV+3#NP0<Sp`YO-ZXf#)OUQ)f5|d}
z8n;LFnX)o7s!MdrbekluJef*mDDCnanrEsg=#a`3yF=_6VytVARbkGXibqlSMIOcA
z0iwQN%9mEyvk)YSTmN1jZGn^@OL;DJNhVJp1Vu378#1_%1gM+>NZ4P|a>I(isQOZQ
zRQVRsV~70xsG~DJoS+MlC;-49N$T3i@ud37vA`IBZ(|CjEJ61{Nf`=|4P5p)a)}Da
zcl?lG+1Ir9(P7~bS|AW89*}f&$nBE;YTP-p=6g?Hh_q&z#pLup8T(l##ZI9`my=Wf
z?bs49wNGEUM3r8*;0x-ik(2b$jG$C)U+P+3(5@E`fO%po^|c#x{z>FGE9nfD6-=kY
zUL~OUs0|+(liKw6^gRuEqDW@o5jhqr$4`*RDIr*9?}ou$V(Y&0iB2l|8d8bIM^i-?
ze^DWeW!KRFZ_;(@Z(+HUlmnJt6vk<1I+lB=_yVoZla;A{Hd=u5=(qrt3f^NRTY~9I
zg61~nz~q6zH#1SJ_R*s)3MbGZzCs9E0tN&hv3=C?>ad(x9mO8i-IM>Hy?5_RD_Ism
z@4w+w^pNZX6cO;1QS=^El6beTqa<VYOwJmFrIG-P%>tv7?fGW+cR$bl>wbk>RsC4q
z>j4Wy6En-1DD<Pcy1Tl&s=BJWj^o%@cE9b`7rvRM$Vd{((dOiG#|y(87FLigtgha6
zffc{zGJvG-)n9YI<&3MDnM*lPy8$wX@Hwq)pC0W4JF@>#XQelYBm_0kvq<bu@^=0D
zD(|fd|90RVJ#-e>ZEOd})weqk1mSL_v`kWP+t8b9ywVZLbMuy~+a8L8bQ$IuhD-bi
z2US~;uk_S498e5tGtx!v=z{KF#FdHH9^Egys0zJBH3&sXZgOdk^bneVR48h~-HEI7
zY5AYqsg4667aD-U7gB$8?CuZ-%<i}g3%HS!$c>S?U8ctf5RN9Ivt+=B?#omJ7m9Oh
z<EzX<@Kz?vqISpbXBqeX?x$Q7^xOPGq|1%o>0RZzc93aaI%&_DMY12Qi&Pg~>s{{Q
z`A!(K*U{Dj*6aqiw4yo`|DUM)qjT#tZ@ac!;&>#U->QbV4UvZctq^2T#11SP2S9&n
zbp8r&(a5e4R-?;ZfL$i6S<qO<%_`L!H1>ZXHtZcfINYZlu*}U(&{-U)q;?A{&FOCC
zSP@vYeydFZPLOBe899vb;kKk3*ax5>q5h57b(J<oTrYW=5TqY{en|(C(mZEXkOFlV
z*VnhAI^N{=9lXfK&4xlzC{<2M+K`hIC+_<EsG>VcjtRyBLmc!LHTAozY*$hOxKuUq
zP2sCFC6#XAP2lIGlTAo;j8u4ZjGziM3E%q|@t;nrmED8t|M5`qpNh-ND{H>^Pb+K7
zt4aK)$N1UCY3A$`YFx!iOr_>x{-pLqfCFNk*7=EyjtO$H!k$4kBrRcm*Xh-TbR$fd
z@oEn>i|T26)IDRMP_kJO@dx=uhqMor@n}+TY>B9%(!H4Ol~GI(;w)A~uL^WBK!MTz
znhF8W#EpoTg)*@#PSBLF+rmfGTOG9fya2?}$S3wN&7r4*W@8EiwO!lCP0(dLLh>SO
zV4ILpWDBf8VmS(L6f;tVprga_1?zl)6rM-l8%^;gRl>?n_o8(P$CN1D;4S0<;U&oK
z55=_-O+62EB-^IqbT_^5fin>#&FM8TbfwF$!{~ZV4k2*QjVtc@$**Gi_9e}RFgZk*
zFMLr^KxYdltPDjUcPsUV%pUlSyuT2SN9Z9#?2JpixN6F^xyO*OG2#}%LPu?PuUo*h
zlZnhVph_f$u_kwp4&Us(<y88$ev9pT$(!kgx`0>ww$4A4?&i|o=v2t)8CjUHnm4`M
z^=q0lt3xl+t-~+ZX<ysx^+7!y+CvrG1DGv7IWTU+Hbt{R!+nrIZh&uQ88sJk`6k8-
zLaXPjbm_9uQM-Q$GXgve`bo=yAPtU+*}}1+p#I(+j5wds3tmWJ{Nj!WyL7rDgbYrD
zi;^k7h$Xeg!h))m7DDvo<H8ImVPmw+)>#FH>Gm%f@&`vK?_KSnb8TJt+h%ZhvLt=<
zWE7T`a6!jNk4%ex3~~&N!ew^|>@n?DYCp=U%Pwyna*0ZCo7AG0g)H*G>Ff9j*Iy81
z)|IA@RxMW5Quw;=+^Vq5w|em&EMXoQd2W%<yjhaRmsJ>Hgk>I^p(R}XcmPWl1djVd
z3b)v<n%}nLeBnK)e<gT1Rr=Dg(2(EPI$n}>97h;2i&^Ny1g@u^@eB(Mt=ucK-m@@h
z7OybL2J3CKZ<MdKHodsxG9v;t5R!C6;NN8sVHD}Oy$|NA4G!wd{bgO1C23TV9?u6E
z0+?g;vCSa4=fqN55G#&TB9JB5!y=9ISb=<I0l0~dxS(z*mqK1cI~$>{@b9sjb`Jn}
zv?v@O1VM^Y9!PxI0hda?)de6lXVJCg+K^5aG{RV=j_c7ygBwY4Q_LGo+;Ja7o>mWM
zZ(Ru2O<Nx7Xw-zOlB=HSn$z3nzdGD90MWvU<`7w>A#Z`nHhEpCDe7ot<!T27mkUA2
z!SfVZ7u<QI`<Y-=&?UppU*^%HiKfyL8=0KZeCin-3ZhXVe2eGgOtm%Y1e_*ERs0~j
zI|H)CQJ1Uu-io92N@xSYGJ=R4R7+yxR4PmIJ4RHwq{x)1XB6~Vn8neb43*n3+9R)-
zdK;Tr=Nkk^OwXWcsJGExkD&1p@D(7W8e-9Lyj+Eho8{<mJn~*ud;$9z;vwPt(SRB}
zuvEJ~><Zn#jtgHviYWnpP;W><6|Hp7EvILpb#*4|Yly#D=V~sCH<}Hdhn^^&sp~cz
zor|{9bD51nn+dP1G01pOjBHTq<_1^N;GSjNL34ebze17q$azn>JVjHq)7M^6ROBu6
znG{e$uuMWyr+a<IBkRarK{#H~4kqZ$o7ilgy(hamjEyFhlZ2F@c+J8E&O6u&X!L=9
zX<MYpTc>44wlMf2yO>4VYBD?p_f%&H^q%E)-9cL{HK@!3Iw<!&zCFQc@d9Pl&X!kK
z^kHmWuN_et&kL_*oFMUqI-+9Hc|#z678bS`t&@z`?YXQ{jqO;fhSrY4Qmv?)`0ePJ
zMzoZAHt*Yp#6c%^BXci8MXo?8qoendRdVzg{b7~?ebw8JXM$O0hqN6vjSf|Ua5<s|
z(l6QeSb(DFwgFIV#b~5v!2|}8)hGbIW0|byfhB@Famy;bC>0bzx6JP3`7Xl)P_=b*
zD<qpzoMFLmUBJG@jh$$o#d$GsJWRdr=X5%U?;O)nsLJsOpei$>QQaPaYRKs77*9F2
z#u&<IW`Pm+#eS}7$6&=e1&P_Wgw@rrDrMq7YQ!tP5s!htC~M*`6UX4c9Si?$K`Mwv
z`1<2~VUC%pjTT8NVr8y}_Cnlo<Mj$xjcg;gF8p(!J)_HbG69f`4_|wSH-|b>p^Mi*
zN~PW#DK-KtV0)iQ7XUwN<@4{BW_}HEzsCcTx<Lj4IWp>LIER;^5O6H8476l@J(~$<
zav;nW#Gm*rsN`)w4R|f+OMn53J`Q{o9FA$dKA0AOi3mtTbRt4`?2GVWJM$0QM+J)n
zR3RMI+m|5L4X!%;=<}WxqTl(et1KxTg<4a1$k!z^EJF>M`6+{)$-^p#<Cm0y=;BFF
z6bfP`d5BaAes0Q>ok*Ii;43I8lBHdK%Zn^O;aFysL#slTX^cMa9oDKRZz?-gw#BwG
ztXN<<dVU)ItQ1(5Z9q+9r{<;wLb1WlK6;uDJsu^<))?Nn$t0snSA7bGbhN?k2f2oo
zLW;hizro-u#w-LxaGBA&*?F1-ijE-1U&JJ;H|ss=I6{uax+d@#hSU;T8|CcHMXsjJ
zoot~&c_)k3uPKQ)54NNNfxz0$o1R)&<LvdSWws%2m+EeZ^_v5?;q{oV?z!)?o8lJF
zw~QXoB6FEWV@S5g88p%$hj;XOLOoCxG4u8qu9B8d<hF#{f+%aElfu}*_nCKq?m4rg
zeY^q|-e7KuHsrmb@M&>}J?uC|VnDOx;pHXD#7c_j(4(@-XGlngI1@b`llgE2K~~1t
z#%Qj=_XysdsNT@bZIKLX!4%~9hx$zpggUaELdlk|VGq~=VmK9~R#&Qu)P9mmp}Ify
zcf9`-S}*a}(oO>yml?eOxw^cvYTp0hi{Q!oFOTxGxVSmZPik?Ip`H~M+q%@a_$UL5
zRLI^4i|psoIO`HvuX4LSL~UulN8kU;ZeHXs`q?ckZ<;Puci*s+m%BN}+C#kK>9M5(
zD=x2>mez~EvYqN_jg<<8Qfm4>V4L@{XhZ<&t%J!88nL?1rg1`hMuZSFX^dsg7~cI!
z?HK{YJ<S;b!u_ln0l-AYj8Ojmri?gq_poHd^)t?pku8k3WXz!@BhBUW)&-CsNWgUN
zTa`-9IzBnteZNynJr`Q&uq+Xih^3YCK7W<gl0j(olt{3wwY)OdP;P0ZQM<c{zbvIs
zFsf(;IENpZ&4mfd7LW0GdQErG1dc^=?`5v}RNB$46i=uGb|v;Ll@e~vq<Y642XLSl
zEdqrzBzF*FoL#r@FV~?%;cvnO_w=;#wwg(YG=yN&*$u8!gJwwd_p4GpH9Ap2O2nU4
zoiF!Xd)Ra=SSd}Xvy~wYL(bAP`Fde|6<v+`R_1Jy!gxz*l$Ur*>xy`|t0`Dt_p9Y9
zRv$z@u4363SNMzypNYa_LB|HGgS-<2AR-pXc)YAJf*`oS8b(+znMW6Uhi{HfGwD{F
zzqHbzTSh0W79MwsROlJWo7Nyxpf3`?U4hU4h9N{RcVxe%#?s@ij~-FcD)oVZ0oHZD
z6@!E#3Z0pB69tN9H4ZqVpf(mlddH1`9-Kv~qX9EWeh}K73BPm^qod}9U%Id~j!Sx8
zYnxxus6pp<5BhzP8fOg*FO}g~&W-q{;0inmmUatfPNIgEt^yRxCu;QcqGIs`#!qiv
zU1`bI**B^F*P!a8RtbT~L0#F6FBUg*Fb4|X&7~?^#nS7n7`F{cf%-)r!H8PtLU((X
zlbe(zGvj0`<5dY=rD+hn%OGE8Xs<g9?sy}CFuUJ(`kP{$CTkr7{8?xyAogW{UvR7l
z{8lk<FRY3YRC~#9Cr5{G*-m92Ss-#|%;(9tcvx?5rdhenaZ~rZw2u_U*E=y`J@WA7
zDyYz&`jzn;<IA1JEGPTB5!JSFJ%ALv5u@N4FBk`8EN}pd7RrDTRlwIfOnOd_@cxwI
z|1|Qyr%s-g{J&HR$p3|<ME-wr^1neDiTt0)|B3wn{p9~zB~t#^vwnX0-`ka9$p6}1
z>BREC)=(n<|19#qP7pr%Uz_g<<bRz?rjh@3ghtE%T1$!iugU)~F87!JOZmbap{W0U
z!m_?bR!?8n*E^HQ`iZQc$ojLC|F35&|F0EG!TbNE<o^E&%Kz7i{GZ7GiTwX#$p0Ur
z<^K<VEBXIAO8yU)(1m{3w&IEX*WX3#7fwsQ!rLPY$I}sn7yiyV0%3`-3U5>4?RbS}
zq)-Up5W9WV_87%OI8FNX4%buZ9j?dd9f}@${~LydPn;gL4h(g}RGJ0hWwJdw`|RE-
z2iXo59MhI6au-AS08+N5aJaV8Y@P*G4r3G!9t@whVI!WF;hstceQa{uaf<r&5n`+`
z2*mEuVa?h*+}}H_M(ZUIG#0L$9#}g^2M5)|S|)vNU!teQ;5r>_Tqn>9;CN<W832Xv
z_9{T5^%{!KN2oS}D}H7QjBB3)WBYHW#+bbpL-&@9QSK~exfyFa6qYdy&PpvJ+l|#`
zqC5FJ>wj)~=eIF6KXLs(mVEl3;!0^b(f>R-{g0rGpeCnOD7<EG>vo&H9opTleO@0n
zS@{wY@|P;%zgpdXtDe8;ZzUR?M5B{vbOIWkv*YcL0bLGH{Q0#wxVJ>vZ!n{jX}D2u
z_pa)hLVhKiv(ABD;N!3OIgVzB+6>d_gzGoPsAkS4(c^r4T-D4(`jat*KT_e35z3ZP
zs*%xP70B2+6iy93?Fmil1bP!r1miR&(o#dyo$%ImSv;J;M%re?Qb<G=`6!Bv)1iz4
zVf0bAz?j|u1G_x~6O1LP6p3wiF_G#bvZx^N+z9`U*vgI0tx#{^n1m@fh>G|whS=d2
zW;7iNlk^-O#D}op7^(mGcwha8(QCf7@t%Jd1<1!oQ-GAFP=HLRu=qGr#fGVd7+>(?
zczuPZWrzVT5|l>*oQyWcmqQBM@M$(su_5#tiTp1Qt;xjy@$!Fh*)RVut*j;T{}Yt|
zZxh3hL`{&W3BI$MAWTc}H&GJY#_;jLFm#5F0K?H`iupopwZJ4=fv2n%cv^aaN!&V!
z1_GLa2^9kq=mkv0!0kBK4TL?p&&Cg;7`T^a;C7b289#?&AVMzy0y4$XJd3RlctCZ)
zgX;rSZ)r#S+4TU0Y5X}7MZg#V9|`F1Cg9Ip{%>44ZQK1afev7t{9h^r;y;v%r9}RJ
zYVyBE8Hou%BHJgj{mf<iop(q3)zg0l<a;mur<d{NCNo~nmnA0-<e-{}aOYTrzDC~t
z3uglNw$q6z>4%48*a?-~=^;mqi_XsY8{@F>2lzSy6~I-I!Opfe3qY`CVJkjA-;$B|
z35DNVpWuJC$>p;8xtr0FvuutP?Gl^E0O9wyBp4mqA$GuqF$H&WJ32=Wj2_7Ko2;;5
z4$Eg8me1a>eCER<8FnFD_T|WQ>Ni{lf;Ib@EfjMF#ia8V<~LbsF4Ld;Jl_Xc#?4(E
zC4@(yrXoVoUwmbRN3v5WBs@GuDkW4K6B{gq#&w7lPhv!&9lw}8GcAPd=nOOwL?`#s
zLPSJ*m_W5)!jyA;mJ1Bv7?nV%+z*M9OVl0`w8@!U9p_+TOZ1}b^g{iPU#RyG2uk*<
zwTXp$Ra43LB4wJ84RJ#;?8Np=IEx*JAx@zkzm%=ki&zkUbE*3&%m0^lf3oZUvGV_F
zA#nd+TuJQzALS<zmJ?z5Cl!|8R!{tLG9~@=A~NjI(U$UlUtF)w#BwsWGgZWcDCSP%
zTn3%q#v=wsnCMRap{?)l6+5x=AORG%r;D^O^RNg+rlz!vR2W}SCYp_J%<o?<|EuFM
z%CsNb+0~kO;2;Q(90Ot29sw{~5fKvZ6_hbD0Kv~5GT}M59e`mCp+BVEn6VNRfDt%N
zrZ0HVunE+&ClM}TX?iPxpd#VH0zW1p6J{AQAz*g!BWOS-m;bM9*R{LncK<T28ep{i
zUo5RH1>!%hloI>@CwKpEP)1?_kSH}0rN+#a8t-86s)yUvled1|hMx1&t2n%^X1pDM
zS9W3zht|v#VF1Fl79mV&Y%_odf5zdd+}<ygV&2~u_*g7_UMQ-|dr|H@yj&@G$-fGs
z>C)Ue{O-^7=a%U&7c-Y9zVz3r(Ntt;#1>zlp;PD-SN}RTBQO88c7FEjf3K@QqXB?6
z<1;b=82OaLjv)w7Wi6m%oY)IIhP}XSpLqBVrUfy%2msh@)i0f{IsnIW6TvzYa}^=Y
zJ`i_-h2<eS$}mvw6S9~%YFpYNYgi9{M(aWT?K2t;dZ(ahFX3o@aE>OAEKOeVG_Ug$
zGcsWd!DMJc=sZ4S6ShdEU~s~)9LeZ}wEL4WJX&PxU}r;Fo+r)lgiyc}VSK_cPZ*$t
z0h*<4<>dPR_RwnD4X4wKZ3Q^a{;ybE^XvbYmRAz}|5MZd>y(kG{}c6pqW+(``hWku
zwOideIym-g|Fw*tUiq)~neo!PDmt-{?lqEV{vTUTMl&u_ud82+OZftbs_Oejd^uGk
z0aHBnW;ni7s!_!h!l;JINEy^?<)?SGb8urE*j|@^MxDDi?@vVX)DTVt&P3o$ocR*}
zKVANJo%Xdov-m%2g<`<|uT)Cp|EDPbOUjru2C%$g`W~@>+234Wz!HVQ&!aG4AsU0z
zqx}!n5QTwA|2yalXkTI}pT1y9p+&-r7FFmLTtE-XEK(B@^j!)Hy&EI}{CjA-*p)in
z^1@02x?;#Iw{LMRIsE~wBF>}+8rCEfY#8vsl$`$1^|#60H{<wlD*uxB9wu>pOyaqQ
zPU0Cl$&1<d3a1>)=_IDxNnFR1@vZK^-(cVQVeFAj@4wgDmIIHPkT$!n_gl0u)}y<6
z+`4uK)(1W&J^ZqP0O*sLEI$Ta`>Ul_%TVE*-?9dv3|bvHQPu9ru{g#>30PS*%57Zr
zov!10M1z%B$F5^AuMTiTJVO-ABU3ua_$+utY%jFoX|>8?A;ZV{d8tSu$ouaZ^Z+o{
z*K4#_V-AK^>y%Lt4haql$3M`2fthTS_QX^u5X$mkpzoo+hVIm;;9$Z2<@Ef+;q)@H
zwAj~h05@d;U_<&Areq8PA#DlcYL!vsVQfV>@knyvdNIGp;@R}>ih{-kMDxApGvUT4
zkP*YhQX_WW8M`rVn;7V{Xlh+n?i=xVG7Nz;g+pI+we^s@Hz>2O0T)Ys(d(L6+VaIQ
zjzs7cjCA{cFt6|6mL-W(Lr^Fm?`W+g?=~m;p9%CoV%HLX#I7|`88lA+v%KQh|Ev_2
z68+DU)Bi+KM)Dw3Vl$GcfWDi}$U&ucvUeu`9G@KRzTc_&)j<)3etO+d=$<v+t|YYD
z#5N}3jZYO)Ysy$z;f1#+F+DkZTsxJS2Fn^518cMi%+$J)01mO+*V~S1{1QQ@rXfr?
z8OAq?i6G-p1DV1|1DObPM;gn7w?6sfS+OE=i9*|xnbzDdJeO=Uwn>jsaONgE=6KKW
zv98d_UE{2G?i;Yn7~JhRMg2xWQ(Fav6T3e$*Ek!Y(MCe*8Lu+%$EI&tr2UW)g=-qM
zN<_yZ19LGpp-O9pvAGx<^@P{dDlNkt&{H1WJwT8yQEoeCxwD5bII6@oINFYFnKYvy
zU9#Pn)=)l6CnHSPPpKc9k^aXzZ@G~=pmF-2)qwra%IaDn(f|Au`X50Vi4G{y0VO)1
zA4>;hZSS4N(gX2=_s|7V9$k~i(h-eP4@GN+9#|=a8yTC80a1Jr>fSa)5s+qY6f`4)
zAU`)7m!F-^AZV2OYZC3(!x{uho_PZHdJ+cv{w7H}lO3DQPPBO1`m#c8h`fw|`Or3O
z0<`Ja*Y);Lqb9?Gi1rFBmJuu5v!P+%QezB*g!UxLHYiMe5~r-xpU5@i*$_>lAQAR3
z#IMjebbygZPNd;<(3}bm?_c0YGu-%k1^jA84iBkFGFdquGP!S~F=b}ZAL{G57Q<HS
z#~t8l#uYjZj3u$QFu-VSI`eT2GviXnQj_R@5g7VHUVDWy;@$a}s*Fs8I(CHmEA?>N
zuOBcf{cPT@FX|!b$J<IxZ6~F4@o`k~u{H5=74dQP@Os0c7Fgd`zcjV}$Fy3!m;KLb
zX(ed?Q%v+fPgMUCpo~QKljwdD-Ou;d{d}nI)Q(QZ*#Gzo{ax&T%sp#_{ZGX6dEut~
zQ`!Uhka<jdplRODi)Rn?P;ciYkLOKp4&*1(P;($36;5mp<fq~Z;^joWx)*_d9P=O_
zHV<ST6yJ5@2YIpY-a2eCu0O`(ec%7}zNwz?i+;c_lz7u&3ZMvIvcNNc5fA;1XlT->
z|HkNQ9_;bIQQmYv`g4FQGf})GZvj4;w*Y?_Go#7%KfQV%H(skfoXz`xh1KHfa^U^H
zrR7Bblf?f`bUBGGC(-3-x*Qs@%D%P#-mk#XbAEcQjduo(ch&OBPOOj7nt8wndSb@_
zmTh<HUtqk3K>TO8wn65%0Z)*ye1Ss3-}=#>@o9O_`0#IYtZ0`&lq)6ygu@BQ)RbuS
z5nnx`6V((kgmr92Dn7J!e)cHBURQtOAi{x8&MKI&u)Im)3I9a#gp(k_$<G7H|9xi=
zKlE=L`F~}txa5=ni>1O+BL6?ePa-QPvhq(VE1w(%tQ;umr<aalhmI19BMlg!&cxy|
zwlkG+LJXVTL8mv?Ea4tD4EL38{g?40tqcN%AKc1dq-4vCIX5L*oYMHBED=Cl88-S_
zpYYJdlrTcu)tZR7>#p4ER=zU+GTpmx50hV~7GI%zGr2%tl^5t!T%C`&<G$C;`Q*3Z
zQFq}mN+I))w1c<Yh5QQqTQ!gjz3En9CcW35Kz<%|u|1XiOudZ0$$nJnIe4EPDmyRz
z0J5{#$|t<rj&X%M=|yxLkTLI<O(|QyIDRDg&%NzjPbL74v;Qxy1mwS^wIu%Mle_;F
zl#!VGCt`0R_I@|9_w?hz2fy6Q(|&rv7dN2EZTGR(#Ii3nHMQ`obsHgdep>7N#5h0x
zJu|06QywQWi!YFKs=U8M&QXbw4IjcB{*kTWXMF*U<CWOJKa34Lj%}#PI}vGY3wIC9
zG=jg1qq$Wqz0Qi++KBx??`$pIdB+pO_QbILNg1{$m+e2r{l9zLFU?s0Qz``R|C9Ki
zPf`Cvl#%Fv68%r2|M`ykpOaEh|AT3N1N~3GG>!hJUmC0bc`(z42M^B>&;KA%06mlf
zD6TaICquvb2MBr+^$!%EiTVdoNz^~_)j$2x52F70?%JPOD}VZ>2;~ou=mg3i31((i
z9}leI!QMRy4bRf_8lIq$N226Olsw7LkFEa!dZOpt<LiLN#eY~?F8bp?l$KW${m)a=
z|7etvgnvkMJBe;*=DMBQ$=>PF{)ehxzvHF<^g157$&3&DAWKec+oPJ8S<r|5N3`cr
z@gF{8{D;q8{D;rx`;Y!O5uc415sVSezke1{A_`KzQpuk`vH4B5WW;*^o~oq%-2UXM
znV6a(Be+CivT^3sV5mxSpLu8v?Ip-Kc0nE;o5~?YrtuX-9%WAvdBVeEWZVhW#<Wo<
z_GhM4nyr>cc1*>*{0=cD78fT|!%Q4yLUpq3e6{;97qMQW{SmEX{5^RR+LYy)*vpip
zDg}n!ZUeg{fN5aZ(LW<0O|F00pRrzL`yuo%`!UrUQ@G(g-C!5f%l{rrcVGLDwI%=i
zA4{vtiT%e@egDIJ`6w|6Nt6bO(%?r>8u%V{jnW|miv4X=3X)NXtyz!~%d`px6+G(^
zU-%gH<f%2uo2M}ghs*u5Qc_gHl@ygxN{a9Hy2w*~r}ROTA2M%llUWl6<P0poaJ`6J
zU6ZRv)Vd7Sk`!WTNfdIilqIso2YQ!8J(W07EYJIy<ruFeT5+FC^g9vFUV9Vc)$NDS
z-rUzR$M+CyjG|F~S@+Zlc@!5*H5BJvkZ}=6&%%RsKFqtU-zzHV*`q}zHSKPS_X|u~
zi5Zx5J7&4Kflg;^dgFgUJ^u6Q_cOiGpH7eSaQaBK(-iNBOr!tlbY|=Svr-7ge_2W5
zza;vfME{fMe-i!A{q;Ww2SH^Iru_{xJDpA(bxx-<g$@TmCyJXyag!)+CQ#hqLi1~D
zU>!}OqQPZ1m0kwh9;1u_*og+_;WRLvPNH_1MC}3;n`l`QElZ+hdFoo0pThnJcH~yW
z65l4#0FBZA6xK=s{m<&sTB84Xl=oje`AcYV>NmYxpJe9cob#+;Ug+!F5nu+_MuA9R
z0Yy?8x6lxH^)U_B2V)x4JeSg8xuo$7$q^Bc!|}qvo>A2(P)<nBrQHwD;IA&FQ=u)j
zVUJemr)ItW8~EPj;+s~tS=l+*&(qh5m&S<oU&K6f{TG&3lJ);M>mRuuoL6gqZ~LTj
zvR6Gt&fT>KJoU}q;jZ=H@As;;{g146^1hl)(ZLt2l)3;4vD&Tke!YL|%LRysl>_=)
zZ*+jW=!w7AkaAq9y~CUr)x!_pu-@+8$=xCV($g$G@f!fP_6|>Lm3^L(lFeVV+IDo?
zP+c^icO9+e#i835u;s_q<tU6o8*g1?4v%UqIFO8WY}c({+8L-&dw<H(2f(kl76w<q
zpEm6#69a~7LJ-b}-DZxp9p?*5Qz)yW_N#5H;%N?Z^icC3*=hcR{no~u|H%IHpRnHg
za;#uha5!X^hHCh<@E<WLFeBdlT=V$}!%~|oGl6DoY)?j^ON5=tB2SkRTWkimz2kc0
z3(VlVdbioOPY3X)-fvd=mu{95U61EmS5yGlWnRLnLlNpV+pWH-yx*@`2d8fXG?MDn
z`(LcOYt@_A^=`v%23FR0=Kr+)zqVgFo%{e8v;UV?N`CwA!fGM$|9-^%ALljX0HEX%
zCxE}2z6Z-ZTGWGf-GwEcU*d~dty%S9k&0+*S^FsR_@YLzxNJjKa@KbODGXc=fHxfj
zOt2f3<I2uI`HqVHY1eVdTUxhN;&<w$fuM$+_;QcFzgb(`hQHXui?y}$?>3PV4}ru4
zJ%T;mqlL=hF5};J2%Ps_jG>nxTFj=5<)FwN8q+J@F)v|n`wrI-{Qn$k*87bsy7z0^
z7j-!N<(%NKwOidUsNHT2QpQn9a89akswdUMovOqw9+j;wXH$L_Asn7gFWJ;{(t$;k
z<=JBfLZ;lQBtMut4o9A}Fh>02Jtt^(r!@jLkL$oD7d)^1MSd>*k9YpVX#cA^^Yg!m
z!d~F~FD2*yWBfd43#<m>2kb$(Iv7zxiCv+z(RX379s!B7rqi{#Wdj*~a9e`Zb6m?r
z6V{uf_pmG%n0xCE><+tv4H~wpziiyHyi$9IJNxf<tGF>kS=<tNA*s)3w)&($x+T!}
ziy!lxRhso4+OFWuC`0nCV&6FVuAA^11b;P$zt`A-(;YxN``kJy$I2Z__^&TbJMZ+-
z=7F7fHb8j8g7?rv*G8)uye<5S<|B=U({G{}C#*gUL!j-`T^Jg&AISr_Hz4T=qZHU-
z$(Drxbb9sfEyCD2I{vtK_;z9Epz_b^!s*V*-f<1iLO3h(DPrLQ15C%(b2Qv()Efw{
zjS}RoILq`|<2+r89EC&Sh;4Il70eY!2u<VSHvZ0ErTkLci*K@qU+|jc)=jt>TX$Gr
z0-JYOA$}%><@_Dve`))FyEXW0_VQn`;Me~Z3&lkL_g(h?bDUzaYmeXh_iFzN-}*fu
zqLf%hr)uBF5+VD(EbnobfZvrZ)MPE&WnJ4wy%`{kJ1zlAMeOD=Hu6BVwH&Lo+jf`z
zM%T+w=_`GRs5I5d^h2E<&zZgj?{kUax7Ofl*FGO!X65l>Od|5&zIl>L1KJ}-C=KF3
zGEN-5xTedciC;wH(OWul@Vv23-@kdYcSZ*i<j(n(%LI&cf}+c*$ivC6&-URc5r}SK
zq_%iwXC5sh?aWi!%;+EA1Ox{^<cJ(yBZ;E<H#z@FBA>ndS6T|(|E;Vh^4}w!|1oxd
zns;;0=1GCwpC?D}{&65_yT4gn+x;7)h`jP|zf^v@4c>=_Y$Ljbcz2OOuu!j|)TnVU
zA>%bWvvY;3-X=R&d{Y~o*1)+ku12yu|6u$7+k=BC%s<B3f32*n`uG2()#YUWe~cg8
zlwn#;HOC0W5th}}MqFzYfS~Q+zC58Zy6kioFVQ|~0&NlI>Cmv!glZ%@neZB+^0sQd
zgX3qvYC$81yOon&t8()86wx)5NPhIgHnL#%xdtha{>0b+!9VvOVEva?mP*0(zm%;1
zN4x)rc@461m>A8-`KsV0D`^+3gT2F}6YE3u<P@V!zCEc{YSk0A%!|DE#tXeWI;~l!
zA5Ux51B^4ZQ$0OBI=N$~wUg?9zpw1G^v&y4Yjrv8XVQ#T0ng5tw$*U@wx3Iba(D@g
ztd!G3r^12VUWCGnb96^-cUG4NX6BC5=>fmihZ1vCQZF}x$<r}=`+je?3dJvnt>#0c
z6jct(P%`Hoqm0#zfh0CubwHng{f^i8en&%34O#8p>N0(G?SBvLF1p}|;so!G^|<w#
zW?+S<#*ab_OQjgYPK6$U4B{g*mH<YeJX->obnfZH6qC;@j@@NuB?Cbx3`W4e;{~_?
zUxA;H|39ehFF%0&_wwpe!2UbY|2&cVf24`9J0Jl_Tmn(4;Pt;Kl-&Q{`~KfQz@xAK
zwMjE>|3jd7f?Qw@#ofQ-xP!&+u!D~Iiyhc7;9vf#*ZWR(0Eni!aR)mU+`EHi_;C}G
z@%PpL9358=r?>(bxBnLd`XBg{=zkth{*&_>6arOFv?Lf-SyO-qvpqX%r)PJCD%|%q
zUx>DD<&3n18_$L~1Pj@kZb>{AD6-BWu;?Cx7}K4jgX6vZ>WQ^?YP~&tzjJmL7)qU|
z4kq5Q^g>&3&1|84dExX2{dza8anLi{DrM^vYWhQGJg}vPl}hyOPkR54B(~89!kGPk
zc_m=~wY--2|2!7|KVBt{`!&mGd&GazJ1}g(RbK$d3QV%}aZG2~hssHj<@0%9>5zw~
zm~x$ZqwgSZjOv1}Fy7*U<0!HmlGfST87CqPqBm^YE-|+*GRJ!Z#&<85_+nHH?44A1
ziRq>jiNR(D>}Pj?aP{tFFxp)cEu03x0K*{z`!EMmet&#iJ=v+8R(CDz-j?8ouj?Q`
zoVST7N3%1reNr?)bk4*eAsCQImUH3Ua#6^@=zMrAV#VrB9YeRRa{-CP7~%ca`B|r3
zGT2twHqQ?BOWJWEQw(+#+mO`(OAM~m0?|zkvhXPdyw9D%RR~lZMuZUrPMxBFPI(-R
zE+0c#kPlc31r{23+55c<9BxQg9<9`bnzpZS<l@Zdu-FBOJQO(ZO$E+bt9o*BbdqCa
zmDaR-NG>q1U$1NmcF3n&LcrjF&`6%fKs75mjI%7`Heqt#?!ifFqnt)U8DtZ!O4`XW
z=o`R+W<Xk!6wT5Un@^7Ly65Kv|85`!c$Kt@gxKpNg#%v0ffxyF#<gKmnjVn?GTJ<I
zOl5N-&Z1mbjXFqW1RcOZ1L++KREVy|B&`dy(loyqpArQ1R|BBQFG5_GaAoXDw`Vt6
z7iee6V?urf`7m@TBN}}aE)Oa@Cr6pFB+O_TG3!lbZ=cB5qR}NWbni_j{o<R-x=U;7
zJ#8d}Z}Z+8mVwd;?vAsal5}MBxYKGaBcSmJUUFQB;#xYf$@MD%iZ-hIr`7wna&(L_
zfS?yqtsL%-?hK+a$<TzLWK<@Y*l1j!DGa1^vUm74Q#+y^QFVdOr$vdp9JDPmdFq{^
zK%iMhx`Z+~^LHM`p|##rpzj{OW=%+bb97Rz?7YiZJMT|Us)seqLa(xuw}*F<PGl{u
z$eliWqZ{@$?FRyxVK@kaW}(160v15Ec^9&i7XeX5pObL`_(mfut-~idAU=8;3EQCT
z2;@f7y@#2$BWr}lGbdR|Qw~T(i}A)G&gwG?_{It{WA1!UTdJ9e|F2zl?7{5qKUd(h
z-~VH&nDGDKL;pX(DV{+6U;A)St%d3TIUu3_=cR)Be_kk2|KD5v9~j`#`hR`aj0=D;
zINl9Fa2mNPkEU^veuo-#Q9ZI=eawph+N}(cDy(+_y;WEtu6hg0`P=A!4p4|cq8Re^
zcBR=o9iFT8I_5|iXaBXlycE#?B>sPoxBnBPV@YYos_gDsr|-9czN<U6qmz%BEQ77+
z9JYLyh2)X&4x%NmpTPhR;{dWUkj@sWh)_mG*%Wur@o2QVaJ<x;xQhmJa{$ddqbQdD
z;H$&ziwhu9Jj#mD)tv)WF;NK=<8X^@*I|=v@qJlQG#q)hi=woB0~@`FeIb-tUu@i^
z@j(G0bbqaL)syxt#AhHYhNq_3@LgIht^foL)|m?ectdHJa?H+O=A-Ii4LE-6n?WDS
z*86y9l%P9%ezXZ)H>aLwaG>{noE`r917hiZY4xa<jOd{+WWA>?S3K?D?`MPC8v&l}
zTkOd2992LLX~qNq2@#(ZNJY9?+BbE2C*#lt(Z}guz**ZBAGOo=M>}u1N`z&`&?~cG
z5O@%@w|Lm@?N>?8z#F2YLl7r37L0BC7!FnxW1#RIt8AYh?Z2;a`4nW+>e=3D4eyCm
z(K}D5%8)p`bH})7U-Bd=u@tx8@9pne5_Wof*lIU5S`is(E>?diD$t~sm=$oT9`FS2
z2*{9Q!1P)SE_PoL4mVhvr-u%rl<&q)<4=BR732JG)(7=P+Bo-dbIA{G1~aw)F0L#G
z;=ipV^4}xf|0BKcyg#iS9auG3m(^N^g4w%`eycYCK0Ae)H2#e)^j&u8SREUVl+G9W
zD~jgbD@^IyU#-jTu-A4lh1O@!?t>U;VUh(0#Q-)~A|m;0l#wRtTCf>+^t`Jplntat
zPQwS*wT>MuZLLSJ*B(?3cQb$o?;O)Rhqk$Beo7k$d=aIYv`rP?@7O|>KWCvbgg)@}
z=>5q~l?TN%(6|c}-QGK_oP4yT5QK`O6zxJGU4o9zt`BEt$|eCivLK&}`VYzYY@rKm
z(bh<8#cAKS<a1FlO<oV~ji}wYa!}Vt(z!mO=`ncGo@MnNAQss0O{`M&X4A57?8b0_
zw{#h|<KX2N|C4Xr++_VF`u63pUGL-jKi<dCk{#q1k?_4-c~R8>c8BMiYv?D~|3lX5
z4E=us`oFbOV*mRnKlS>6ZLsvW#kpdBg;m%MtNc$5)*hsZ_Ym+A1pL0dz5@M90Ws(m
z{-=gjMpwAjD)2H?i^qx|Srr4$Yn4B;O<?$YS9NyA=Gi%L@a!Y6FCKxC^;g!5d>Rmh
z31MD=7*o>-LLcaX{3GJZkLraH8b1g~q{je6?-P0Tp-8<;Th-(^sV98{SZPqHL?6UV
zQ7Cf;<KOCDXZEw~EX%=1!3_*s7+g^(bKt=}2!{)Y1RU;P^r_sDNlhD%D93H#4mGON
zB<<S+8{_5kN;)`^$@bsJhuEw4(d8u+;4#me2p*(0TlGs+v5AMwG=Q~0<|t5Cw(Hv6
zbGv`3fq3U7#>NTXIM6`gs|NN!dxI@L{o=C+K1h8Is$cJZStzk#kDd<8RB)9JVUxIq
zWj<ur|5MA3f_rJ}1biu8X(X6MAI!0^`i{OI)_cA7ErT>+M#8N#dhL4G4&6PL)$Z}L
z$$!y&`MuBo<<hGE{9h`qCi>q;`9aNH*J-p{J=SwtUEtF(g}ysoJEdh0zB=^v9EPiy
z7i3NeP9K$cOGVZmFj#TzuRNbJZ;*QE)2;gSm-=*5^0K{;=g!U7R&#JgUz)>C=a${%
zZt2^F(}#5n%tQJgmuWraHjr{t3Kj|`{pF?9@udY9_6^u&yRPW^C2YF%@r!-S2CV@I
zoawYmXxkU45$&-P(rtE~zKv3nja|Ul&NjVWwosza)F(bP%q0uKE?Z{!swt`*i;U+N
zM8oa+x!o3@>;1m-HT9f{9Mr@QZUK|$u#5Nlb^|uJrmW^%x{Z39&z$@M<5lEvor?>y
zfh^?Y|CG$99)+JGeTTL0kTO+-tQNp~fK^|9uJ`NwAfSZnmi?8j0ziDAwwM!ecsE^W
z9a9BK0EghE=OkwCklx~w6z6mY&af|t5r=}k`p4`1(&Z~k>cGZ~=r!PswOyDlQSGYL
z1Tk!h4@RRq=o6NV6X>!p6~|F@M@xhWY`d&J95`&nm&Zp*B-3d1@myd9=rHfE({EkE
zAyLQ`#B3o=Gd@`~Z6aB|vT9_}g80ksJN28EOKYj*TT7);xwj)TM^+uQ8hFK^KGPtu
zLVm@|<#YrSD^oi+l&%v<=QfyhQwk{-Oyj`BPSR=ND|)z^@Zhy+GA{b|zj;q$m;6gF
z*>y&i?K&J$@#WV0GH7*RwE?}tx<?aQw3Spa_;RB8#=rQI^`*^zXLD>F{(aImgswwR
zK%ZmacUZTX#;}jp1;AbWzEA+J;S<a9=B5L&vc4iA4DSu$`rZ(3_>whwEZ<<=ltiIG
z4i5Q6X8=2MbMYUY#edxX(f!A}^?w}jefLIUS1bT}$ZthqD4LnBH${C`ZUw3bZ}yLl
zPXh?wMt0?P7LCuM2Y*Fj_}46e!`{M*_Cnz`KXy9xK5l~xd{aC(zxK_q1M@4IY~vZb
zV@@_Q)}`f7sl<cmd-4BV=|44Vytc9u$^REhMU($8t}PW4|If#<|M+F`yw%0GJ+H`;
z?05F!IV%G<`e_4Y-EN8r4gGuc3Rq_^3hb-X$LGl`@3QxIkOgo51H2(FRbWUjS~s6Q
zrOBi$mFj%Kd+>!Gdr?e1f8OY^d>)-AT#Qzd2in5)RL5!Bn*=~1vBgRn5>((tDRt4(
z3M^>_c(a=?mas^wy7v|@e@{051m>6puL}N5rM}wdJ?Ii%xZrcJn=gus=yGzCzZ!Jf
zDI|7$=i1ftN$XG9DH@bluNYoZ1j-t@zIH@jxwlnz2Ysh|xwYeTFItyF;w8J~MUje?
zqiYY_Td)3rUcV~0TTqmGx!CJFjm1W9*y>(5>_6<XZv)eU1J|a9xzqZk&!1GAu?$om
z(sQ^Aoj=*DY<Up@D$vr_qrc8XD`D3`P|;tHhJ1-?U0t#kdhxU_ZrMVMy^wR`tF?%I
zV&Zczs{birHWR#jBu(#&A}bbIX$6=9u8E<~@CRlI27<ldWHJ)U`3NvEp_nRP4sE3X
zvJr<D8=1lN^p#EhH3tLrCr5&o=$P8i*@#Zm@6kHxyzf)p)N{`O(!B>%>9VH>5!5^1
zt7Cqh^Yp+pt@HFiNaH*;-IJDi>e{GFp1LwBkMFy6gj`aL9Z5R!o<CQHd8nj@jg#Td
z$(}#Q$I9{8p#A#-PC<NR7*4T2!*<w&B~JMN5dL2pJWR_PN9+HKrP5l-&;KtkCH()R
z{46eRPV<A$kiWsLhOu>~tYI*<G8|mNK?|w-d3M~cUv_O)ZgXFne2>2Wm)*R`U-YwE
z(!LYxneIN--8byy<!+9#_OQ!bU}TqI=UZMcEv*+<*iQAdhDOAt)IHG0e`MzjZ1W|m
zchPseZ`;7hTnxU}`}T(Nn(o`0X*b3a=r$LfJ_3Mk7};7#8<Hh+!1DO*;d}O0+(;e|
z&)cmA+vkSJSiKIdW1@RyHyQAqSmO=ia4K+M<P}P%@&@qK<ODBGxUl!K838kgF7X+>
z=^*cJviZgs2)C?VA9z*sQT_M2*_4-^S56NP6o4sh%6a65i(xy5AfPb&ZLjw3=zWb<
z4nMNrDkmqE!`jCUatv@F*S@wns1}@TEr9$LUv23QZUK3Oa8Nzjc?VT0+k5+awU5|^
zH+!|i>gg$Z0kaA_uAJ2NcHZw-PT2ALljEb)YM!xEZnup9qQ{t?(C#>WTbg@Q&p*Pn
zx>w}OO>P1p#@elGG$Q9_e(|QR4N@9+ei&$@xPiv<U1yMEq&t(==*Fbd;!*ROV|(33
zKF3ymWi=bmeq1%c8Z5BWAwpQf8<lOx9bnOe3S$@^tGECg=30)uKdtb|d@h~n(!)Wk
z-5Q|b{Z-maE^`KWOH0A2?q2FCEoh~0*E@RJMIb3nM}s9UfRLBa8>05z#hY6)Wi<23
zu07aFrFaAeWH#{f3l`j<xsggW9T;Pq_7ba6Z#Oa+%3>hQ3(BqTV2izEgZjCJ+GID!
zjBH+6mgroxot`QJRY?PFbz0qETKy&qjW^nL*TrxO)&aEtEtR5W*;of@R3>KFwbN>{
z>^cUOvJm@BHpjWQ7Gm7E@)CgTE$2ceeoN`@&1nQk2DH24J=h)SK^m|yO8xT7FOo>n
zQ$>VJk;Uwr)*w@$FLyeQymx$NVK5L&*<~@lFIlJ62&Op=y<_+*&^f3V+CQut)kUvs
zA!y34(0;q4H{b5$)t3Mom}#VhBBe^{snKZSvNv)4U;n&E&Z1F`Ui}rrYNEN}&(JZ$
zOfhS!=Nst4dG0uE3{}>zqxf288Rj25IyAE)8ESAm7Z>~XVA$_M1>Bw%wt{W4Ek<?2
z28wYq3xy#&<g2fygYmIQ-EFWGx^v=nZ3~7GKh^t}4F=SA1w7Xre7XJ|?-0IWPtlDP
zv&*YusP_)v9G&uoP^GIo3@bFG;3Vq=l6}Qv3CNNipgEEQf8n1LWQyp}OrJ_nF=FBH
zHn7<&4L4!i@;b8#!`{jeqNPt%i}&Xyti+O5qGb-KR%+RV6H#gfOCd%b*dbc5Z<ghZ
zZ!wohgouxj#zQWH@LHRz8?7?-W}`JfFNTqR3EEQ1OlqU`X~QeQM?x0hqoD{E#(1-^
zVWc>eLP_6*8;<y+@7QX;-Y_%ee0@`N<=!7}ZQGvOwl%eFcWPtWp4yyF+o|n#YMWEr
zwtZ@y+uyn`_y2Mqva^z`<RQsU_WphpaCx}|fiq{&WgE3Gv6~F!4ycjSS`>Tl456&D
zSIS+?5)RUhBJiYxLG$l-{lvdlQA2EGyQqA*dkx8Cb<83>TJcfObOJA&CAGLeTaB|O
zde7FDSn8(yVM{ik$=u+Mzf%4|zb?$bTAc$n(AfhpSl!$d@axcBlQW0!Sht^-)6~Rc
zSXL_FV0$C3ytwWX+Sn*3`O+xp$0A@dVT;_F9S*H^jGFvDzo>9BwlA5Vw!NPvU%x7y
z*Utg^`VfwxcMV8;_w|s>#ESlWs2*MCBnLq%X|D)aNAWK~a^D5bpo-_VBhDea-euUp
z+Y|~-oKz&*VQ8M@G<Ty^F^lX6rfn@O_qF-Mm%=h(HI>vhq_^Q*fJwe>QJ*-5FF+;s
zoSX*A!YI+nmMH$hnH$mEAw;CDW3b6plJYUB#I6<4^#^KX6Uo-8{27NLXWFBMMmc`l
zv2jLy8di@e+5f>#k<3HMJ=|!J4JgVj9k(<u89Qu*wlzBrb^;W&Y&cdfLuQ0xxBZDn
zJSZ^ianEP}7^=7sb55~C*835ta~D>yFl5JE`*~oSPl^-cuJ@c)PH7>s;p8i^>zMZa
z7#L%r9)?%qM#k~CMk1&RO6Wsea^j(+e<|$@>Oqtt+o_RwdDx%A<4GuN)X5LaVy)x{
zRYDI(^wYUGyRMk)u$+W<Ln6!bOFP7khtBr&@+`|imQVonN_(|pl-c8<JY{KTIojmw
z3U}Ddd*@yxSmukO6I!JC0g^2J;Vm%{WQ{~7baKcw{|3sh-)w^zeX&UEUz{4hwnWad
zbBHq8eu5Z;XX)rWwWx11l;@xx*%qVgzdxiu0rKMkAAoWV9r^bj($UWd9~jY|uF3^&
zV=Q$WUSPqXgQAfM#l9P}pPln&$qjST-3-@Hc|R<wUvgjCJ8T4>x)+)-?4M$6JvNxi
z$&1Q*1Z65iB4J|4T2_E}v8P|uy@+!NA(D_)Z-~&uk3MTcj|`=Ot+J<U5CqCY+xur6
zj_Q)o0jm}o5&nC+7RLAX9j<ztez(Z-f&k$bZ`e{TS+zrn!`kN2heDQaQ`9R)S>f`l
z?(Lug9mp^7i<wS$v-Y|Sp2iL-FSA#<^~TnjX2wiV&)g-LTA3hqu^Zq$Y1s5G?u*ek
zyH`ktHaI27I)z|q&j4{qR==m0Q6+jQytdo{DQ1IQij0Xv23Z3k@=^{>KkFt%Xa#Ky
zc}#rd$r|7W^=@<1rNsGeTh5)|el6}nt>|~PniiYMJWLZY-8CLc6oBC>8>MWYAJI}+
zCUuJo&VH-?s{1?*|12`EVjF%7&`OK>d`6WYN(lwMZM|>zZS8KM=xLB@Ox+Bymk{+u
zU@P>YR?^vee(+;ku^Y}QmH-6!UueP|SBczx`|01q6f#52e-jSo_M7<UkI!v6W@vHm
zj=2j}3wP1(X(pmoOL&_79^mSFb(?ZBSiNixvAVvrV&!o78%q(cE}`?zesk-2v4EFV
zh9Xlji&=`)_-&YUA2$kv6K+I8qOXKo0txx)bnQ|qsY}O<C3Q6h`_U|Aa`!KS54q<}
zXS@hK7C3Dkt{v@x*GfNh=KK_j4QH-)@>7N1mhpzKRD>;>@Na}aw`uMi0r&bC&VN&z
zjiUPeHH`+Hc5#j>b{TARY@dzc^~Z6Cv|o&X6kVE*9?a?MPS5r2X|`HUsH~ceDOO-G
zi|?J3yIPkvy-^Y_1uRkjc&(^cK$G9IvO&{xYHT;3>vM8<pVNhJpyy@)#;xf`JltqF
zdVA!U|At3KhYQBr(C2}xFiRxZz+U(1?a1c<L9aavr>-w$T!+NzG@Oz^^;^bPPj<0L
z`V~n!$!dkOLY7#=_{>FV;xGkKHZ9kO8Qml|k1l~w&79s}!Bcl4`SYR0T9}DBql>_q
zf?A1PYD=PpV9^DK-rRCF2!GP*tqbag4s9%Zfz**OnQ@-41RabAb+c0K>W<~Kx=jDS
z$WU629_J1ac@;M&aL~4a0I{r5(o=%=h!8Ayu^0KuQ`wb50ceM?rytF}b+bdH>H6>^
zf6VAqp{c4yMv}}uW851J2$;+(p{dZv>ugxn+7G|!mYMLy8LYa=_Zrrcjlw2p;FgEa
zcQwDwK(yN!W27|HXxnhM&op*8+BuI$7enZy%_0O0C2*sk*^%wOP`A(+<w@<;&yCUR
zKR?xO4f}?%5{a+1*(}3Cc8muPb)>aY{k-a}%O?*TgfMd%82FBLSF_q*{n%#e1$*y_
zUu4w8oVSFKa+Es@31f{H@dF&!x=UEJ@x}>WTg8>;-sIhQK`krny$5FpBxhIor5y1F
z_CR$szLF<<M$3kXnWeHBDI_%!Rsnpbl;`h#2%Cf4<pj4#bUo;b&Va_JG}#5tHTRDf
z((Z@Fkz+)*mFEWQ$VS^$1_X=s6@IHWH?QO=@<C%m;&q8imtVrUx1}$nNhbf&>17Xs
zjv~8a;r?^D%7C`BWGoHqElE+YO{KL}wlw_9a(j>|x5o5vDzD3)o4C-m;raF9PLJOM
zrF7Idxj7axoaqZ)Cfv3bNm{9VJaeVYR!bmsQkc0@GPT5(`T1|zaUyj(v*&NW;5E@j
z0zvH@ZKG+)j$e*zFNq_FCxl{p4pdXyjxhtm(Mo;>D35@qsp1C_S#Kc}I>A6&MuC(<
zA0xSA&cK_tYC4uEn~+KLl8ja4)!>bks^MAy@uu9_Dn~cuP%Q*93*n%03Z;Y!uNh3>
zV4$lT#pG!@m8j2cK7iVJ2tw_LhpMU+xvs}{)!|CFuB*F$HqTr2bW#;-P-!YW|AzLs
z&GdCuO7u-EeZU;iow)M`R*pgc0pE^5G0aGWX}bZpIxk8RiF#pIG4;dmVB3e+Iq?r9
zMCM<KTW2IHB<`fDZ>RVU?$>JN5e0`dE&b}-?^?T^Vv~%58(T)66x+3A0?AyASI`rw
zH$!i}VMQi*es3p=4a86BYv)a^*I;dstOM`s$JR6A`YxNmMc8@ZH@G1MQc`3#)QXCc
z>TOb_H`TvTF$o~xjZD!^EB3u~=7+4SoyWJPzVA0N+QMhNV4ig`k^SM;_Ax;Yo51kA
zld%naaVE?Gu3*+>c;akItUZ1G_h^{UyI*tW`oXD=$T<?yPA5jXWph(yLh>|tOvaZ5
zM5puF$xlr`q{UNV)e-})0=zluTJL(E+~+3tL0HR6@3$baGWjo_G$rWgVA9BE)J5*d
zr?S+e(;A3m4S;G1HaRrF-RC7Hfq_qF1H}#vR*l3iZmhpq0$gmbXklUWrI2i<VHvNM
zv44&#qf4dMR)_Lskg-JQk_1aN=0cah-mwFymoW{W5d~L6;*#*ECzSi_bfN3PlHP-T
zM9IOJ+AZK0E!SSvMPjqFk!^vSb}Y6S`ZYpXqM6IzPNx2)z2&)^SxaI&w+pmJVuiFM
zxxawya_jnLc+|5t65RL4qf^wd6V4v1b!Ko6k+;fj3S>YT6#rpaMXo{gyouP_=ReM-
z@0$Xf)$N0cj|>H$?={G}H|>SrEwgpe<PAZQTyr{SkayuXJ0B}en=ZaY(za;rzOouQ
zYGKK2HtwU*jz=$h8Bp>oH@^r4E&1p;U1Sf8Ji5eC)WM>mXEg)ViFT42D;?O;q1tIM
z@_L3#G44oygeV1o&Y`!vt$Y3e(g+;|sqQAD*L+`LgDFv)C;BrU#gjNG=i4LiUsGs6
z$q;MGMK~8N#vvGvcb6@Uupw(ZJ4P!-C?yd?V{U({p?#pyg6|qDGr!e_Om;E3)PIc3
z)P&NoDI<3uc2q!1*g3Xj1rBMa^8N3h30JE@2QCLX!JG2$LT|%r(-X{w_t{sX*O(E}
z#6P*)feREV9hgujajRI8Ct)|O^8U(F#}e$v+iLT+ONC{tq|;I=&FFAWFzvSagmDXU
zG(S!ynS>CV?k!%8E4Lh#kKVBVfhslhfaid;WrV2Dr`Zocg3{yGuZXJ{v$Ol1ld{Kn
zz#BEk6;uetrdN+oabO9(^*>T%T%#JJRvH%JRbc69@|_}hTe1<IAX41TG%|m=ikT5w
z+13%olT@4eICTO0rnrcO(Q1cmJTv{)y-xlyPd~yfuhzjde}ULlL^6Nbg3{v@BeSj|
zr_J!X_~$HQ@W*br3pC8_15&t(yXJek8PrQ|B>F<H@H1RAiY9$iRJ-Zoc}VXWIx7j{
z@$YRrfZ$w+FPJU(eqCa~=R4mye~xtYilBQDR)XYWKBXiPGH07(c1O8}!}U#JQ&*4n
z+p=VrR31)x@7vo2B(Li7EI_1azOdck3!lo4ZHHMV77#Te`2#D>>oW1`as{G0(XTX(
z!g|l{$uN9RGjp-E9lZt_%UywTXFht0qP{zUXgVH@vq!5z+wXHCZ9tIE)w}w%KCWm0
zu6>7ZD{#v9iTQ!3yUC=l9O@%FVCJ!Ff#Y>f_~1ySzn)ByEo{B|@1iHOze8vbB-Q{j
zd0b8J)8<glJn6#pdUIGEu-dm}tNTNK2LY0RcFT5iW%7@ulTpnuqmSM?DjoblLLuny
zU4ArkKHVFPbX|;78j!}E;KakWjrPz=qsRuI^R?h|VUop7Pw<pT+00kZlk#EXs(%>M
z^gH?WgU)K_?z-7~!8M+M3Oeyf)F1*h!>XKv1gcj8irv!I?&p?iidP^U)qadmwM@qp
zz8*VuY!-`-)z6uuX@Z)X4r4u`ExB-zuJLgKjb(%0!SRfMi&*}tPs5&H2CrsC!TM^l
zRRVs6P=H6}NGi{qfpJ3T)>4zTl`b%TqttSMLre~dRv7Mkum(V{(GJ)2M~sx%X!)(C
zL}kXWM+jk!EKi`u)>6a+{N46Vhx={xjOM1$4<6-{Pl9b7xSm06XEy}#c;r_JeIE@`
zSHy~OS2fh`-e_0awfK2+XN@n<(K;{-ynAh&(0|YM1uTRm!-nyb;x2_vVdyisIQ469
z(ym9NH4s*twM;DLc^?Yx23n$eiOpr-dY7v>PxE$xW=F}MIbaez^ScCoIZD_pYZ6a#
z8{cQy*93-aUYAa77b|iuH4^N8%0!gn&!z(AXReW-Gn`Navh{slimu6r49K5;xw=U`
zkay&7LPdS_m9j|}@jL`gtuH?`Q+aneneOi*f*d8Yd1S{rA<-+Gsc`-XKHe2m{Mlx^
zoKKzo8(rB_8`$>B<wzAeImwE)jYX@PF8gPhHuEV`k5wcY;uEiyDRoQb<v#Hvvfr|+
zHp#A;1#z=DIJ*n6a1gR$A~Kb2sL*Q6m=lAJ?tJ#!7PZ3(`YT}8^H3<ZXxU-K2lkJI
znO>vrJC1d*%XkBuJu_R6u}>`MH#f|y)nZdm&e^nRn1harw1dtw$3;6+B2Gq5guLvl
zHH)E9_ispTcTxV_@$L`$ETh=4?o$WX9>CJbVWu=<y?OVW-gdB><@U3qvkj3+3y)K4
zS2gZBm&wF*%S26XSB)1;AIzJ2z|Gb3^(o~|H=?nwIofOzX-I^ypLxW&Bhd%c_ivx9
zimbA0XfG<!Vx4vW7#?kG&A-jwvPJ+0dkhxAZUi0Lwnr7=nvr;$d>hT31^9pSoDbfx
zg(o<eHSc{F&o!A(7pTvJBu@}2jCzMoeh-^M5rHbLL{e#6SaCwxfEk;6*0TN}2vz#6
zTP6?H?#DQUmyC!jYZ-iz+ZPeeS;Paes=)0o3NeFK9f?BhvEDKmb}RZqwj)kROb{U`
zAg(y5Z4t}a)Z6QV7d=FT96y8~sinS?CnlO?mtnL~T=e@ltrw{R$c`5IiV$RJYw56B
z3Noax{`R^}Qv%|$p_qlD$=H#^eDC7yV>L+$`BCkQu>en~E7@D^h^XWno}om*+_BXv
z{`M0WhhCV!3@l57;Z5su-zDXoj=IvX*2`w`5v8Ikk*5AKXP?#0YV7omI5cUsq4JE#
zQU9OJ&j_yARt7x|EV~sldTDQmLb^r@GX{<F0L)6Y!&q&-y|)&7Wpz^*v%PI-$v^i;
zwQO)=J3ArE?;5ONq@ipi*tgbD^swft_{fl|d}w%5<P9L<B1_p=dp;fT9f`eP7cPbj
zv7T5HPT4Mn6~EYq`d|^ovaUWV$D%9SP4Y-ksdRX1w4;pPDqbBQ6c0ZFCLw$Us)>lM
zwdn>qs^b}s88~6$%#qXl!zRrlF%ao@z)5J<Cvew~#F2&iW1^eCWLImWMi0N4K2(s-
z4&F%(DJSPZfa2DJV-SK};_n-PayW=G1MbebH?QmeBML?_*wN}A|JG^HKkn0M;IO^3
z{k;6Pv7L5)DG%H3YDqQR7R$r@i7t`4Yr~ap*|&40qqWrASP#-a+%I3K)7A1Pi=3@z
zPxpPZ`&eqyGmQO_Im!MB9xt^3BO{?Y^!!a5iP2ps7d7H9WhSbvb|ueux0HEEyW^9c
zJYSvhEKB?gxNL=2pN#CVCR3cH%)Zf<lh>I!COZCJuie3ewj_vZuZ|dc(UtQX6b$%#
zmX7P%iOjX0`axQ*%fIumD4Uk8pqq(J1dDzn>yC4)QpzhP{I}sMCD?&(o{az)5u>{V
z-A7WbRv$whW(%Gb!xdiXK#eZLtW0ywhd~&laOXAoNv4w4HP4?j!iQGi=YBsO@D96+
zz4!Qg0#d@jn>@OR{6L<^4E@Lq3r}e_0XsF}(~98d^JN_&_3j>OZ7eP9I^QrFjW~(C
zMe3P{1^y6fM!|}m@49u?vYe&l)x4kcoGLQ;m2ejg(3f|D8_*0eL9RUx1v<<z0pd$<
zII?KifLjTK;EkhqgOhdL?=#`arkDU@l)vn&MDtfaB~2UoRB{pah7GnlU8wE_bZwDp
zbwy=dp+I-vaw5{TJl%lUKOb^IBhvt+dlvr>`6q1mp8-Y%0wUXEuY^bb&?^{|><`}p
z5lkRX5v6*iH)NE<V)24sUWf=dZAK<$R~tq*cKMXF^$Ay?Z!N_%w9-E5K$aB+g-l2N
z@IoWCVCg)Uo|PfU#UQ-*SxZ|n=I-zDU<h9kbCoWb>%^W)by93?2q^TYQh|d+-Ec+Q
zza=ts^?n9iXR>{&pvx7UwtLK;bwG}{joic^@DlCkBuq0du41i9|Ap7rA-Cl*J8eYG
zif0baww7w>&AwIK>_Nb4zu^%mQRKTOGCY##wGL+OMug!d@ZJ&cH&tF~8<Nu1$wV<p
z9#$NE@1#J##L(JuF&pTKOuDNdf+;CeQW3mVbq;Oq0q4N1ONv1EIEQ#rc8O{aJ9jW0
z?^NZ==n4#(M<JuExZPbeW%RCf+xo{?+Sl!PB=!s$-22aS6aI#WwRg63Kd*`s-R5Yc
zJ5;*+O%z)iDar=|g0SKGOOYVpu{?FtKu_4rhb(&IY-)UEQ5NHU1onN0^ubW1+caTp
z!g98mu_GmDO^J|@4JV{GliSRn$YWva$rm+Y4UG^#hJYs&5(Qzl?*#Jx2vb&swU`(U
zQIA<J!o88v_kPB;5=7Gcmxc_c=1rvt?A&M|*sINW<UF}#*I|K)-C<2?(o92f)_jX<
zkpYIk+s<5>IE|2;E8M?R|Cdz}S;Dez4T?CiYyZX=Sxs*b_*TKU*t{j(_SG|7++A(k
zr07+i5Kf}bhmsftE{N5I=py(6>ynC!lMuy)+23#^kdbmNQT)lV+(&SHrOLtVPJa$m
zn%9f$|6!_C@$tXT#{k~*Ni8^WoG<K#f6%`#mO%U}-KVV7?}>`dnQwScx3G|OSegZp
zv}O%yAK{Se8!lu@4Np03OWg)*nA5jI)Z2>%0!jL9);O>GdgM33)r&)69dE2g;OV&r
zz-u^q&<(=58yUli8wiapD0KUdW*nP0Z3I|R>ai3;F0I}(jHZ9Z!?Kubh3FmHM6<B6
zlPRoj+NFLk9Bg!Rf3~`=&f+36RJ}=?9C&(rJ95MnLqVvg(SRdc?aV_8ugaAnBnOK5
z;VZz^&=z)j*D=4Rx^)swNErz@&mlgzb8pNcoi`rEWM)c3zA9uAp|r#{zWKAeHPU5C
z)LescC=A~i2sKYlx?(eLh&E(C-m2l9=UX__@JAFXP^um{44kYfI{Las!aT7uW4!y_
zCKF9RG)|35TqlagP$~i`VC-)~VPluTk<g?%=4HOa<uwo#LN^%|!Z6HrYK}CjH?4Wt
z2;i-kblZ}p*|W=Faz+YFt}HEhVy!j_giI2c=PM1pw{uKYIe&@)kKt6^blko1xd#iE
z)wizhVOmJYGl_~5J}5w_@`&pgzKJ`{TqUY)*|rgg1!G@f72STENSs>}CH?rjzo(#M
z_}cc3h&U6^pw#BeIyv8=D%ssD)S`ae6}!POiHU+~y#}%xDNE&G=5Ma$?W=3=5o<i6
ztdhRAjfx|U9#&JYp&@8`m>E7{u7R4`tHD3&nZpgQ6xpx%y<<O&ZL4<xAU9hkAJ8TI
zCg^o^*QKDhN2#EfMs+8-!yJI+9pKaH)vDdF)hV(ewxrY}jbbs<+rz9BQ=Q7@)j#;B
zbfZ$+nd7%O`oXYfgWiU9FyMtK)U4@9DhH+Jcx|A>9Nd&M8%0b%tI*6X1_o}=wpN3h
z0TLRr>#_h@hZ{$*lB;Ee?KgZbje~geEOGPt5nSlLg3GV#VQs>Mj8Jzc`TCz(pba>d
zaJH2{K!A_!A}Pqp*2j{L50ty~QZuq^>Hq>L+Ji8E)*Vcs*5MW=8xAtvlRg`<=lW*)
z?<_5RMeyMlkie?AvhWfc@m@9qu{D{6I?uvX`KmAF5ii0a?*g3ih){)yBURSC?Eid}
zusl5}=d>Zb;I2VNuaGZ1ch7x^Kxdj^T7<HM>)Y<On9%-}l6&mjM&(a6u-A^lnl9p;
zU47Sln_7;=3roA0aC<t)B})_V$vKqj>3rf3(W7yf02v4;`xZ~r3xDEEB}X{UFd;fQ
z`fr{a4qrewX1rx#M}Gwfr4u`k``o|$Zh!&yiHxpdS$2c$#bGyEIk3G7o=5j4u`9ik
zr3MUc6;)S|z|G3=Lq!gwz1pCR7d5_rO|MTJE$MqC_Gs?7C&@L-M$^kO{lbU6=3Six
zvXQ>zdxph1cO;V*GrY8~L)^NFe+~n~#GH5?LQw!?DzURP&^pj7-SA<EQj7kfz2Lvl
zA`tu&@itGGPokV|7+(P0fQ{TQuMHeT@K1m(*Dfd*2lU^ae*rDSgK}>`@F3D5P|V16
zz~{F7XVeP_?E3mviwLOV34mZGy-w99N!y-leP5z72$0{u>EV!IzkQi@s+Gb+`?MrN
zWYq$-T=anSnXA6N_LSm)B+w-=-YHGNFkp6mLW6j#HBEYhlSDf2|G^^=6TI*-#iN>j
zC{2mEcQ3SEBz}TJh{}X&=WuqrdXqca9!^iHCMp=2q2>m626~3HQ!r^|uQ+i0!C<*?
z)dgp+lx1!+GmUW=LK%s*+MuA~$h3CpD{y%{37njWU8$llnxFqofl?zf{BQVKXNr(g
zE;1rM<$F#7)_DD)feuo87LUyNd72XS>+XKk9)3K6HE;YgN&MgYgIQCgCezW#+A!(E
zlbO){AJSo%_lv+4+im57<4i6PYN%z44+a{~6oXVGQVP`Y5J(?+5TMIMecdBS_hAgh
zCKyU?0$Wn~0^(l5VCXh7e(~_PMTQC<L}F!wRG<pZ!_OP?0rC8_UNkhz3<S0OHbKh~
z$Z1N8W<-3knc|R<(#L7%2W=(ZGOwqE$4Ms!)TQAyAa75OgdM0+(=YG>g3}5X@b(i3
zy`R#zD7zv{J&e2@VxQ~3Z=;DygN@y0=zbs@7a}lJf`#i+ldx&CsKAkBzPrGS+Yd0B
za-$x`LaFL4WUn(u;R5#es*D&>%E^0}tU|#H2U+a{c_ochh4sX_Y^WO_{R#)&{LG8C
zqR<{D1BM*Wl#=l&VAjTm#wc(_<P6lw<L;(kYgLeSr8<+vgn4jKHFJ`D&}(sG!D-Ym
zhBF}W8pTi=PmVgv5;hEyfJiU>`>?e+sh}8)ozhuQs@D7K_M$ij=o<bOpgMR%d$Ika
z|MT00asOvn13>&#zH@W^%=DpXuY;IGl>&{7Rh+H86;6U1N-ek#vyv1ZV>s>_@xk*=
z%V=+mwYU3g;+`EOxr8=5D)cKOr8sbc?ZV}R`*9Mu_Kj(^IQh05)7TebhzM~KlR)ec
z`%;K?f5@(%L`JA*X#8%5@ZGdb1SaU|U#-;&rd>XQp1niNE};8q`Ci6|^P+$Rm9k2v
zhrnuQlC%+_TNS%FT<RHd=#ADiDC~fFGFl;sJAd^$Z<<um=CE$_UHYPwY&fD=`lZUi
zKA3(gI3rruzaI*hQP=xjWOg~g>k}{-Q#QW_V8o3U{sesc>U|g>QfZ`w{<j*cPC)+~
zO(y{By2puFTX{<-Wnk>{-+V>xHR|Z1d*>hJpZ-X1iN8lbY7zL-oTA9=gVHuM7`gcu
zAXn4TvTJ<%%{gv#WIQ+3+VCIfG8+iA{Y%eV@e}6xz~m~vyhP@LxK~kbzT5Wshnjzz
zzzEhfW;cbV$4YAbi7x1il(?}!+q>vE=52?iR7s{wJp9R1#73g;%DC1+uokVj7c^P?
zL)*Ic_ap-{yw^^C+uiAuR4Bi2piljxO2%X?&&C)e&CrJjl|r$I6tP3)))Hj0++<y_
z62BG;_n%Hyv}$x@$BO>6$3QjqU0E0x%A(<&gYus#)V|{OnnNxpV|PkD%0X$~J`JzI
zvtE}&mX731-x%v-`Xc_$AJ(f%6s3z=7R=VlBF!qBLsDgy4?iSo<s&#?Hs;SCjU-gr
zlM$9F;DUDroD;}Zl@8uj?OqkZfgy(56Oo9`ASIggEq{r+J&iw&Nl>5SK();pcW(fu
z6ghYhsOTA#K^7i*t^E8MdJRJH6M2^)WeT9bB0J8uPO<8W;7DJcCOvKt-B60aq{ZI5
z;6vN&oAKD~5Hd;O<`MHC6ou+Phi9-Cn8MYWUg$;hRx-(xjo5BsvDaA)yf~~~Y=loX
zWHw!h(>hs|+S8-sZ1IbT#0q~FM-E>l0(z^C2booGos`D#KNt;$$n2)!=0x_xDkWf6
zt8uJ4MU!iS8>*_9baq(O$nSwyc$pgK#j5*$%bzl_VPj;T(I6dg6Sss?LK3#tQ%w8W
zcmL$zYiS524_i`GDU<@P(dVQaNba!y(0!oYbl;79X|#?XO6kUT9OGh^M6*bhn6yFx
z&7-<>Zt}FqONP`ynG?Blo`licD0VNtcqewkAde$@6sjYYCL%&<B#b*U<15r+D$zG`
z=FnT2zMtf7s~2lio^iq>qAWoxj^5b*X^M86G6a|6lT`JKxH0HqVFik<gWTKP-><wG
zi`mc3tJG}{AGy{!hmnKJ($#Unc?a@_;xB7gvoB;*9fQPKD;o3fnOz~3-i}><)+xP^
zGbRY8xEwk(N(yJjBg7b^sBQF)p*fnMT(dF%nm!Y{Zef&&Y`0AD)EoQ*!~UB$W=E>Q
zG65lESVq0$u%ymsY?!MieZ72SWQ7lc{E^w?{FjbBO+8Z1`gZ)0dOoKDD8`J+I^J~G
za0<r2U35kwIIKFUB*9<j&R^*e1)4`Ywx3Ohs@2v)MXbSvqo+Klmr)GcXY&^=eiW2)
ze5m7~h>lf-p|2k)_QyMDOzz9rLWKBJBnuDYJXaD!mMZAl2hv$Id~?<d4E?1Y)4D98
zC{3iF4^!3FfbW?%fl+EhIJwwH{289U5#T<z_Y&viuFj^76++j~ie=b{g|NX5r|yPs
z&2VBNt+n@>A|Z=E%2=&G<5`1ZkhHiD2<$$k?E1v$l!FVxZdHJDNHC@e)$9R<^+%!%
zi*9?Bvo1CT(+;&QDk5lCk~E@IB8aDW`Se<-PO!y_|K{tdNcfgGiY>jKexY@vc9Lmt
znJjPTrKg_-DU(i41X&PBOLP_ckJuaAwCpcK*#PmkFz87fiHTaR-nv-^F`cpq5(}>x
z(T#?<?>=KM3;SmPO5D$b_wBK<j9ifZ7y0t@`t$~nG{`zGOE&oW0RT;dh>KyAwsMrk
zqo_LfW5uI%{>jjA`BIMUqks<gNeW|PpUCcWDRPRx7t!Rijv)(adsI7kM<MNdQVg<U
z)$=XYaiTKH-6Bc*399i*hKv)=G9LpeOg3^CV{EvsNRhNy4t8tmCP>l~L)(nUydCqk
za4D3ZNDkI?Ch=9=AT+MRhl@^8B8D3xt7Brb&L$_tQ%6}8emcf;4zDH9bF_@g5$-cd
zD=0`)vXC+w7R5C}I(G)P3RV$y(;1=oXvFlCl?tg-chH7!+;w!qT-?kH_C{lyd9cyE
zb=t(A8_XJEAHALti`oU}=RFEk|Apr|CZ$Nq;ew?*NkBlG#KB3wUu)!SQREvede*L&
zUim{X4eav!K4`$4YLrPS7}hic*P@!c2I!QbRS-L{^Lxefk%jx^O&9oc9CLB5jOl8{
z1GkkKp_#XWP&m2bB<+!w$gGml8eDiBY<_!qRr7_=)fXQ)Cw2%PGo#I|VJ0em;fLg-
z;$aApjOPlLpN3O1n#jl$zD6c$-ta_7ZaasM^j^ksXS)sxZFo<ecZ9L*WxLxx)0@2p
zb!f?M0lpvfj2`qz`pFNrNcz3m4)K)dq(%6#Wj07+p}oFyz&q_$yy{RG{4je1srKvZ
zJQLAf%#1Mb$!OqV3jXUiIndR8&cp@kD;1;lX!yRTUJn`_r8P?aGG5`|X#PR-b5WgK
zUy8dd6rYk3S2C294N6~REl5W&6(^QP?>UMb=O22s1oTGwcx|^SvfU8TEZpu4p$1<O
zyCZF5EO`J_enxsK4*AHPI21JEsHj*fE*&j48_9my6kI+FZ*k$Ncl~bGFUe9WB@kmQ
zvJJMHfuzE(u!ltbLYRP{mwo!MUaf8#^qo@pK`NWA4TUbYY@H1faxDhi40sm;{azNe
z9FqL_KWdRE*Q-K}@GkAR^17@^fTfT#^!LZ)rzL7hi+THL$?_ZG0}!5)Xlv1umKJp)
zC@pRI*K>`s$|tG9Ih14${4q)-Pt_)W?uA%n(}7r|BywUII42jFtL&bkCx7K-rdUt>
zMba^N(UM-zRu8f^aUK$SKD&x;Mx`QT=A6G1LGK@NNPS;&5>}jFSTj$hi(h35rsn~u
zw{I(oSHgohS0Knu%1CVRc|H)*MuCN?L9^R?j_VBMsdza3xDIToz*5B7_a%wP8@SNQ
zkg@co!kTKw`Ij@bQk>%hS|b;u!kSYBNentQDBc>Z>`S*mYT)VjNNibjND2?Ck(ixA
zwUu}{+MyRr9SkSc!M~bcu4}FOlhQWf^M&^Uuh07aGp!hV(W_rw1?<9_Xeq*10JJ^e
z3&bN18_7Pz2ZxgfUFUE12tE3B1nz-^pgwoGfp_8o2eAaWj9gA-jBhw)-6704fiv<^
zvYrD~k$&4JzM-WR)E<<VzSCjlQ)(=ergvG_d=55-NcX<vyy68FJNTvyZc3No@^j%^
zE<usQ+2TBs-@-Owj#oV1#C27{xf21Sk15x`?m01n4L!Fd^bvmQa?#MUjB`>+z@3NO
zI3fJf@{a!zM0@E7c?d_EG(c~2K0aMRL^@z(ZrQZh4B7EizS$F=y==J9v-3LN1s0?}
zWSrWv8_LXxN9mv&mI&gZV>rR8pbCy3BHp%1+r)DFl^=e7{CA!tIuIt^-t(^w%X_=n
zT^R~0{4yBquT!j<_xp=9U7bJAm_<SWF5-OX!WQsdnI1;X6xVM4^4citbU9Q=Pc-me
zmx{^4pXbbD_?+sd>o7SbyW{wtL6){K^2s!KNqzSkf%Ee=fFRv<3sWm-f5M{9=K1IT
zPvzzYKqF8Yv2cI)JG>yPGOsWngx14CFfiOs!fVmsnV|SMsjiMu1LxAp8=?SHF==5w
zXVU~y+dG~@Zt0tFGaLt_=;6wE&^-cj>26Rase%6A&EDgmi7P8v>5D_1vj>92%t6qs
zbo3&FdU?w+-mpAZ4V;hqo4blj9M3p<VDxAE!|@5Es-LKCNa^gK|Hj6~^u9*(U-o^b
zgNX(oH0%HhARwX=;7S+m{(?C9!1q2$ePP_&^u6a0)xv6JK;{FHvqBf#rUi~wMNxi~
z0sp#P7|Pa>6b;G`FdvvWEM5%}?C^#JVTFT=hBzQ=ZV@_}5{JHf{kz53hEZgUizPzX
z31<c7ZDI^VIH2%f)8SzgLJv)0b|exQ`IS7nI3k(1MIY->hgkBmf~vNr`QJS0^bctR
zF?{?7D@;CP6L@{RKa#1f$wF-Z5+@lRm*J>o18SQYdcjhxafR9!ziR>0-W8;8R9hBA
zX26u$B0p6*WgP;5pAz0Y6poaUwCSq(gbguU=i;)9?#U#a(rEfwtTHOI43oh}1a}dB
zNLWwRBci+`CeR!dmSh@5)={^uw>0Y8Y*$6o#kgK;qvlE_t(r>-1P*t?ciZ-RA~^DW
zPq~5nx?i7Onr|Rv68_J9$PvKzjWkJ!cl`bCneW)GU0iCUYNOq)C!=wR;A&<&M{cbe
z?0Z9}<b<13X8$e;ZzwNjK4wW+g2gCb5FhpP^S`;wQ2mQo07ry^Bg7FPgFAS=V!4PC
zeV+XO(6g<L>Lpyic(}Zm!Oz&z%7_vuPEY0bu#D|9L`)K|mYi)VC`Zo4kqmo_v?r9K
z$u+3BALeWjnHb9Fhu@_p+fTpJOvC+S8A%S}P(jyihnqbs?x^Cz&#V0Y=dL<)qSkxS
zJrKON_begw{2szY>K80_BFaeHRx4g`1Lxj6Gu0#m*l+Z~&7f}+LbrE)r`#Xj(J`8k
z)r*JF4NsNha!5Bv5#xtg$<+NQNj_%rL7x^(PI!|i`&a6&e}rOUe~qMoO`4Xv#Z0$J
z%V4Ld3WcaXm)~{zAw5oUm`_KZWoAMYnC+~prUms5pGoKxc`tDqBX?A}AlB3ShYm>q
z%^QpZUJg~A3&2<J`yMT{m}%ihqaVR|aZX+YFbJ1bPfQEpm@2{5AoRMf!yzf`aTJic
z+BWZnr{aF7soCN+N+F|jrmCWSLrAq+%;)9bcT7yL?+{z~^|#B=F2IQXa3r{pF7#ph
ztGJTP=mw4y1^tiZ!lXs6jE63lAVl{{_ue-{z=mrBNra%beY4!l!9R#^kIJ;S!yqvx
zj4{OtEHZ#PGsN7&pSXTjrT^zA3QCdd$PbjChrV|H+5d2%AB(t77=E52`K{hR=!gH1
z3LDP@$&+OkG_&Va(pkiwXw@^&>RBJ<nQ#NX5C94}FjP#kIlk|*_*o9DkclL648>wo
zJZtd`x$UhS5rzuWjQ<2v5N4HAu*uyoe6>*$BCG6vR7$esobJ#4BF9vl&QV<nMYHK?
z5td51D^HdFeF27P#uo<@DXG}NNqRdPuE)p&M4DGY;S-luv16vLo~uDKTPE0A(CBI?
zJ-05m*3$=O!aPi!a$b=t?;CAH)|I_<(+U~D{oK(em(H1iFtTDU*oF`S{eoYgoxtS(
zJ<&C?@cEK<{2i<KwE{-7sQZlk;M_?_N4z`34L5HgvEIsSplrg{KH4t4cH=cJ5U#4L
zp#LYvM~E8xk2B<|acUv!S@GFrmcm|`!-29tp`Z)qA1rLPSU(URja?Ul@m}Hrw{eHJ
zMSxpmzXjuDACi9K8PUh?_$m5k+y@`;hX#qXA*59Ug@f+T+%<V7bV~`D(HP}&b*L=C
z^%c{-6povD)KDP_m^`|M_FQzcP9v4aK3@vXx|dA{s2$8D*|sO*=n2^U3Cq1vxm`Yd
z?sVo83P3W7u^qC?xFMXjP1)mhJYf5KJ-Cp!B56xnn4p@{LeP`yKp|9su{<-_FT}tD
z2(o*K9rtTvO*_E#%$kUA<-&|~L5bdROd5{kOwoCWw@K9bk0#5U-Bjg%>ayeB2qNGd
z&T*M@>0-A2!o1Iq4{92+hO`UDn8&C0V!DnFU8<|N9W(nN<0wSlf#X<}$TSC+zo<S|
z>GjYl#jSZ^s<ifvV{kTwQje116#Gb$SCdPAvz!lTkz%Aw_G&gygJk7LIA}H~YZK+n
zqXesQ&W!gN=qD5ATcwKd@HgtMw*ah0KdNUP;=^pI;w^7lBYP}m>?oGil`JxYmHp`e
z<+Sl?SH2H;WSLs77e|`Ku;!~qT%NFciruapK@e`pQ_Rqp`^NK2kJ*1E$s9b;lS${c
zMB6*fhH?MLN8^TZAdsW2Tgl%!tQkuZyxGE+cYZlYD5=d`*|YETw%@iTyT;pPUZTYm
zkH`=1UT?vr9l3+Y<eZ?8%t(EAnVFa{GVgT!;{$W1MCI=sqzJcAhqYUW_tyY1hFX$-
zU{N$DsW%YJ_t-&zG{ls6yh=`si=>dCFUF{mScl937n~e2Ek86ibW-!gDF0P4PvwK!
zE?BMPPB7oL1=a)c7Y9)<gcD;2Z5}O51MSX80NlrujaI@+J-0C5SYN%*mVHS|b}|#k
z<z57s#6ztAJY;Czcb97MiIRh?s0aB4FA-u;>8kl=CP<&I8XNL`Tdn}~mh{qt=SN-_
zPLPcjvD|V#qGN*d<6aojTKhc(JE-_)myv;%fuX87Ww~>)fb$LQTO^J>DMJLGrvYj6
z>i0u;>z)h=BXm2WCt`$)U@j$wHyK#X<Z1(&619H7?bKh$fkz6ib2eF~DbodlVdH*B
zRkALnu30Ywc0bYZyyeokR{0>gGO!z@B}b395B-={RuZU?UK5=^#|lJO*LEZ1mzv7e
zUWv5kCpJ+!3mgH1V1oTr$<Rz>>A*0huyJW~I@Ayht>JUz@1XlKhyUQ|0XE7u!61Mq
z@y`+6LFzu(_)^5uj?DcDHPR_yyB!1b(s|LzbjR8Ehn63QMRHTrhj&sxPmJo2mukO&
zpu*a%3sVa;N5fkIz%GP`KKmu++P7UA)p^KCO}w~Dexj_4%;iMbtUE#ZSb>_W+kapD
z!>;{|3X*t3f>+8Iv*o#bmBZR4DmKpBUl^n!RP)R`b&2D)x7z5Z=d}qCSo_p%NEoKR
zE-M}cx75%uP_4_vc!^cZAz!{5=6G$%*3)xoK%gxFp3n(`rkkm1`10y%RN38hc6KB^
zD7MP&-IJw2>3XgbD$7#`9jgiMH!V#tWiQ+gMgI3@?^}d+4zr{HQd+h|$U!8Ft<ID(
zIuq)l6U`j|H_8Vjz(FV`xc!LaE=jS?LM~gd38a)%g4auX{ts8Q;4rtna$LI>wZd@S
zJj)#lDg0;=Gasl^mRFgl1V=P-u`|JkDMIJEJ8acV@c}W*V<=CQgDnu?ajU7?g~%4K
zy9cF5eBfd}GZf7grhj)0N*r}s{N!*(-pNU%jp!e^1cvoRD9r2CQ%jHLFZ=P*m8qyJ
zd^Zn?0*P`B`V=`e&8SNP_(dy$WP#!HZeT8yBC~7t{+5luk{B2ec+{Tm`!(LSn|9pi
zku7RMwbA^HBec>5Q!xLbnktDwiHC7|cXtGNhjF3M9t|7hQ5R`j8uSgdm33A{Qh&*3
z!RrtB8fI$r>!K#`Zp|^x|6iAiwq+@Vcfe<5V&dmf-@!?Nho9VR9uvq4+ltD`&>%Ts
za(q5{{^+(nE~zjkjPqPL!8l_#vX=a~)sS060pD#e9-_tZ!H7r?rE=B}u=c|^ONVmc
zs}RG#ESgE2Sj}aOuUO@~&9pMP<U%#VWU%}OM~mUTK~YCj9w-=3cbJabO++-9GXIAe
zrBsrjD-ZmJhUB~+exY<)JVzhcH=JC(u73r}F<x6h;TpRItbEoOFm_CXV%A6BL_}s^
zM7VCH18hWEjI})ARVHZCz(&7`y29@O;GN&1%7=p?%l>dqTFz~~D!Uc>?ZIAt2E^V^
zx^Cz+y~y<??36=udqMcfC`;5@r53xLt8yL%PSh>rOQT79#(4(}-jy`bRrXV?TdY=|
z0u(f$A)DuTc5a^o;<PmttuZ}#Gh)}9d*BPqUovY8PTUJn9Ac7<FZnklgG=U`tI8@%
zst0R&KIJmp&*6cvsA4n5WA-E0wz7`~U6&!Y#=1m0scc92-Nf37o*B77pp0o~4lzl+
z4e?D{3a3A|!b}N6ij-SlhOxK)LP<^HG=_IClG%@9W(i_M+;z91vs;~9(TJX!4+Ae%
zEH-*>R?J3UM5s(hF_P}?C(TRWRNii$XJi~h-E8%dsvsV<qqr1Z#5v(~+fVg4ly|+v
zmXwe;DJO=YYsWT$MH0w^cFh7@k<`o-c%l(E25*VJT;<>nOiT0x>v^wV8-yQduK?pq
zJpH(a*TtpTw)?WIAV-sf;pu&yid)XNT)_jr2atGO#tNwFz&egUIK<EQ_tkHW4}x&M
zuJCjGj}_szR$JmPmvCvDLRc2d7_1L&m>?;b+O+A>Le&iB`10Y&(^Z%CF(sg-gbYtj
zH@}~K11_w_5Sq#RC86ELMWc)EzvI8%4ZdU%>z&Ltnq)@*rhG}6S%f)RZe@D~u^n==
z7PpO)9wzrGEmpKXI(<Y<lPCAL>%v|mUS~y65zbe@QkaZlSKC4wKuYx4AYkk)oUlUS
z%uXGt0_g}+A<7bHoyVSv-?CPTvwbIOMwc*8CLehyRbBd|DTZ~%zS7hs@x_54N`>xB
z;gGUH60eR?7Vu`pWuF`uIy+fG66bA&**#dVdW7Ys-p_R!KgB1qTUxHJM53@uJAs_-
z3<rKh$c+`k->5fu3X?}U$sJwIw;CD``AtpHAYn}#mnThT^6{3={!{PY!Aa+*vW?pO
zzgj!{%qOx-fU&_R5JCSr&QG&7DlxwwCk!H==LavIzCS#Cyl-s5lM3k<RNi9-iJkF-
z*WGUZs)Ap$v=#_>JCZ)^0L{!`yYtiNOZCHyhD(Vwg(Y-in(kXlL~`3SCNl>dg+Kx@
zndoLgWuFm5*h4!s@t@iwX$)UcsXhIkO@ID-n*Rh@3dWQDwFZ0myK^@h<JF!S6mOoK
zdYWeQej9Eyg8bJiQr9oJfTk4d`M8xihK*a-x<0~2Jvt}hDP{?QygQ*m&I?v0V6Bv4
zq=?tnQm|KwK%quIRP&tCIYkGUYUdLf!Ag^oR8OJ8%08Gsm(KYR)7+K57+|5e5MjGv
zy#R_pgZ+PkI0`WAC|V?NQvtX3gux7bu>GWU<h(}9{Q1g|hB*tSy8ZxH7oTkE>#Zc?
z%Vj{t;qWO}_vb+P+z;(=Pa0tfaX()tu1iRXN8%+qRvkP_#No;)8I|eMqGyR@T$oFr
z{8pjq2*RIBqh348(PRprPk>RtYJRzK66&?`K(zB?9YV3+?m`Wqhrc41i!Rq^mGSbb
zX~;Sj+t1sa{@;sea;dnyw-a9{Yz({*f|OxTjo`KV8Hscft@@xXROB5I+82V8i=Ht5
z2FH5X$~sAP?LCeBjhi`sXp1k)*jy!-M&~@#4G$OMbTLld8#I9hFuA+v6>Lu;7fLXC
zZ;XPoKM!B@=&!eSfcoe~K2Lz||Ao8qZy9>;q4Pv|>~!d1Hk=DUY8`rLsnwa0Q2gqv
zvjA_3Gn_>jEa=5zbUeJv#|?FDf@fSj>ld;c$~tZ`fh?BOfm`FYe?vqV>{UpIpDbp=
z^X^%~UY!)(ZR_5QW}1N4BL1`(q!x;kBSG&?TTOt`u2({V`ghGk1CCVwA)yufpp~|3
z*kPBiC>S-p79vW5S|*IvB3fT8K4#8t<G-_>U6c7^M-|1xY&NiJf{Ad8ms^<xBQ$RI
zOa5p?%NMnBM#~qpVj++xYI{fAG;8BT)K+{o8aKKmCGy`$cZCc`p5d<1$`2%{-8&<t
zsO!Y3*cEGbWJ=0A55Ag-U73<U8<G+HDCt?)6gya2eexx(NS_30eG*|UX$rgxbt4C5
zXkiNE*nT5e$$iHA?9Xnp(4<Vpsjr@KJ8{J|0oPT8tuMgE3+oz&=^)@a(WUTo7)P}r
z;gq5ErVcL%z3Xq9X~b(Dp3}0JU6I*_Ctqe{@16^TyD>4ZRJtTNc4x9|f=AMcm)|~5
z*S$`fb3g=30Hzb2iluZgwE9ldd0;!4K`uxKO2e4CRX#IKR4l@sd_NBCy~O4qKN~Sn
z&<WS*cxZ=@8Eo4WZ=;y<k=$JrO@q^dB9Chv3clYC2L@#)UDwo9GLYl&`)M-8Y}2R?
zF+I!!@o<6X^flI|5x@X9|544RDR8<RE^6y;PtAIMuqR~)XtSg+I4_uKP-!b2Zbf|?
zg_HSXCB8G*(+KhvQ@Uj(^e6JkOh2Kok!C_p+6v{gB(Y$zI?7u)-FXv>ykz+<heybI
zc4_9W+l%5_%68;bjnl&R@nX}t*-4$FVBeZm^Tj7&?fqT2#40zzeV}6)`>|U7xP0-%
zxta^Wh|`^CK6WpO`nRI=?F_PI>`;F)?#A{q^!}5eh+u*S9-}5bQYt+*jC=}0NU>a4
zpIULhNt>E!tw}knPVLuW9eN}jMr;_wRD=*{`7kA{z}pGg(A{`Qp~n*U@|nY8wIh%6
zRuPsjC7Eq(F?(E{MhRQ$80WBB`mhfDJl?nk!TwNRGI#-}y$|*nAA|SGNRLooG_Qqf
zmaUL)jPxP4`r`u=|4V0q^rnJ9wDj6bn+I0YPejvs&1UMF=*Sv|%OzmFK5#@?C^2#t
zb;@tBNchVd7dLbqjDaNru4cUN(Z|%Jt4bwHIy^B7O9B_@G<qf8EA0x?WpBwe%22p<
zKK6RwIgu(Tzh|I6V||!u_v^XJ0Wpk*rM0I9aroLNz<Uea=;r-~5D48+NlM7r2JiC{
z>T4Lll9M(U8l(0!rk3hpTnl7bM}#~t5m)$*F@f{McI<a^@@0xGFv6gx{h_Zn+}=Ks
z+PP6@Z|mCQ_qAZy)~^9kK#!?@TXhA}LJF+~)vq6NEoV=an-!>B{%DS;dp-LZT0HjF
z95Nj>w`(4mDjva#hFUf%*!VHU8QLFkIwK;3k)5Atjotwaj2mwiQ|l}EN_Jl9n7eWR
z^9pF;Y(>sIIZ2)@HraLH{VwUiLki{kSrOQP-SS7E7EWtI)dlT$)gS#5IO<8K_|SLA
z*x~a8(a2*S@qTMwIg!D@-vP1YbX^~LfrIp~x8W@OQ$ki)*LAf7!u<@w{5e}nF{-#m
zqcXAYvQ)kwY_bOr&VPc{!5mRwnV0cZ|M(17A+Kd;^r<;KIE+<NUz`)V-}!Jk_)cDH
zPu7R<n5^~@)?Sz_G75M@rV>}>SjMF}3puU{J58C#z*Q@+drbOx-=<cNJITE)^IX0G
zEA*ep{ZHefjD5}T(zcwzW?C;@Ob-%FwjWz-yQNuO^mWbe=trO1$8v;N+So|-$A{2d
zo26LA&y)BC)+wID|L012I-R>1c*PHgOo5e-n-TZLJk$ZFaihk*>)<-O{rJUoVN6-_
zjuZcgZY~GCik8x-ySE!X<X*~Tn#YNQMsyfGsJ0*wuN5!7RkaTTv}^aJHs#+oe=s8h
zm!ZwCzO`fNf+_OI!*%PWbJMF?ptEkul0DBktkrfIZX0WFdR_MtI`Q8OXQFi8mOwgn
zp7tjE^n}~bTmN~RFn#h8ctggHJV*)*Nj$zDIe*s?oQCQbpz?LPBEJoe6ryqq{(ts!
zQTdNP^UIYMxRYC0WF?jqptFl2hETCodDS2eocfT+3Ij(YZ4O9(T?x#_Mk;}kpLTON
zf3H3KHh16=<t@U3(;T51TUxd^)-{f3hRQN{suRvJ6ihybc>QqR@8V|!CAn;pz}c=A
z5z2^!rwtlIKYkEQIrNzc$SyrR{*%)6#enm+a(&Z?SGr1P7zgHk@GFl`voZX{N*%SZ
zx_Yc3O4<Nwq#ldCdE}(#nrl6hKberYq8|Ov%8L3)hqQ92SJ6$fUKti6={*LFf~7#9
zCZ_kmN1AMFcXkmPX&Q@cHp1GG?2e)x*bb+VV-2;}fK>@(96A$}uj8LqtD5@1n{4zo
zA0<c~Kb$TB0U4xHUFzYP7hWFvtE)`onr$8P>u0nQF9DStS56O$@pWNTt7pASoyh*0
z_c>GyZU2Y0w~VbUXx8=2%*-%mW*oD}jALeIW@ct)_L#=ZJZ5HQW@e_n<1gpjn?%Wt
z6e)j7>a|ucwOTE8^;2E-etcNi9`2QCF)sAn9ssn<F{FXY)JJqBKJALs*gz_M`eV|_
zLv`VRNS3gA3g-bw_iL)RwRPT%BXe8p=$yZ&?jlwp9<N}H7s+>BrN+;iaXzOq?xLZW
z1N*gf1CDmTdVr#5&rbaOz7a?9O>@yT+gtHc=hSjpBUFd?TaIZ7024_B(_hk_O4KV+
zC0dT*1RS+BQY`sQHB)hdEuOx$cqYZx?j7tTxAkRoa4b#Rua(7g-iKn@D4KX^x@Q%2
zP35^`8l>kLk^|RirDOqtPzzs>3zP#jf4;{f5`X|6Y{CD2<a-hJPdzM(@o5Y?0k1-K
zT168V4^Az;z*Uz0I+gM4-z*+g!~o_<V2%Zd243a5Lp(L@;l-iU9OA2d-Gdn$fms^J
zkTMA-sB^9bSyeiei@u~j-iE3(=vJ%u8f2n4=#Dkn=^TGRh=8B!hwuFF(LqJ8WIrrn
zA;bHdg^L{~yBevkr^wGvKTl12UH|pqer7;IB_9n!3NOx+Y5>C5e97J`q=N*^$<##A
zPN8&{qnDf47p%8~lh>A&o^E1tw2TrJ@p<}6nFN>llg$0z@%_M%Dx1V;YLLb6A5h5|
zk07f;;H++`^~KMKm)vg<MOGkIUySYPc^Ejln!ocwSRzMzgHa`FY7uWNhzslFhWP(H
zk1FDukE!cIn&Db+vy?cG;4Rbh5RSLgmtQ7XLsA#->Lg+|NYzln6snAyONp`#Ydrpw
zm+aP{#^4mzvojx?Qq7G!vbREBtsSA<51)uW$X>X3_WXLkfBd@!Smo=m+tz9E2l`%d
zQ5W~~A<#jCggg1qhvL$0pm%O1B4liZ8`XqS3li7R%qUt&F2^sgbg#W#tSVmn`P#;!
z<m|s_>9Y{&k4BQmbLje(U<i4k+!vSY_D{pD(`e%38CM*djG8R22<;Wp1W1P=R1Z*(
zC2rrCtwFo4E*P-1^tq7*ajZO@Imqe6rq^%VI&bwt#+iK9lV|#=H#l`HQw_bdr%^7S
z)Mf!jQ=_|Yaqmbte{3oK`g|W^VQK7mmKXv^CbIPW8W?U<zDC8zhO46fGc2%GiF(Qg
z&hV#|D3IE*V}|?lGVF9+omDBnsh;;Ax@Ur(IH+0EGRXxOHzyPVdw^r}4_Qnk1%pN5
zNlTG>mj})Zh=`Z&E%-vI_EQTd+Vsw!cLsisxE#d2%23n<tdhSlB6?P8*Ug`hpNjmm
zD&Af=WZ?2vfNM7Tfw$IOOr<sRhpluy^plRMO~M3AwbA8XEs2y*JAT#?WAsbo`r6v~
zMIPa%9>n%r2pVq108+#JTsREo1w4xqV-3gCoUkVdEZJa!gq~JF_sM7jYmF|hs4X{c
zf6|q5S*<UmDhTVe4zBD+P>hT_VhNX<s!+p#KO(QU5L`t4(pzN_Z_UzYctbcHa$4T;
z*ck(h4qa&7VnG8))ZzQOJ$gq&eblr)fvmwoOd#0$HRE5~X|>x2LBWF=f)BxDdFN{&
zTxgOxP<<S`HTi3Y)~{+U5PY*VoT&QUGg-O@1K?igopx-xVJV#THc{uRc7yWlPgcs;
zi7C|xaT;y?PZALe;&ns}A%BuTfyH}!QC)21w89#;Sio%n$K9G^b11+~AO%XA7E!Fb
zx{!M|^$tQs<#!XYVXb9;UFeOTgNrAgB*;zsS8>7oI+R?`&%_a$3>N8Z(?s{;JPP(e
z%%G64pL=Q5s@3cR!=+r_@Ari1_iOG9@5r6G)dwTu&xFZM2@ze>+gY&6^}}33SmETp
zzsaBCY6vwONFbi}f1oZ#V+CRPaav)?W1N&nY|(h(=(S#lDD)Lof@}3>tVj4n#U<4^
zhBe|rr2K2zPZn(vz!kRGdw7me+v*p_5##xo&WMU*_tRSE)3~JHbxsFn#x<tSI#9_D
zc#=%(u8_OE6i|=|`u5UfB$`ogJuA#Yb}-x>q0`XzgZW|LkuJ8k+sd7RbUPS{Rzf*d
zv*&^0$7gsp4apWv#_>+hKb%sSPHV}XWoqYYmLT>U?}v#zByt#@y3qR+3z>KX%9}JH
zXr8k*@iNMOCZ-!1Gd>iRqy#i@XHAEYQT=&zM&)F!mYs;jAcFm&G(;c7k|+W*Z8sZ>
zM%0~wPl3+7l}MEE51+a9-eUyGreFobEV}>5+Yw||X0KHr`QEa0eKe{j`OCLqI<5)E
z2XemF7|j-@5%5zdU%Vo*I9|1U=UQQ17ZUk)JH|}!es+tGKQn;6>3zJo#7{rDB&D#M
z7Ej4IXS#{6YJkC)=FLXB0X@j#(PV^X;f%(rOx~~>PYWnzsa(G+GtHJihGFi=E|oRJ
zizQ~ih0#nmi(O|MI+ddG8kvW{43}Fk$jIFnowTa3-43&5I_8K~qS+B$iNoI?phUV=
z3M_BJqCsi-feclzzEwSGYex4snO=VGC!g+}@LFKKJ8p~P6Orqi`|e5pP1WN7{k$+E
ztDBQ#7<9VNYupHTcT=Z2f+(&V#+M!Ce*KQ$FZReEjmGS0?4b<}Qfiqm_a7MJ6)qgK
zFt9JQoOV<$2@VM=3>%gnZBxnGxLN^6^{RrR9SJNd-S;>7+W2OnOnMzV7rKn8XJ;Qj
zz3!(+Kxzp%nJ21lFqU*nYGYDo^d-4|k-Ms%$>U4=`sF?vBPEFPx4nHdz10P<(3qCS
zz;!X+EAvf)p4nlrJu1+k`+A(Nk0OcQAql65@4PH8%cjn~B>Yvos1G)I=2s2i?58d8
zvt1b#jRi{irTG9Zy5RM`<$+*ld~}}$LzP%~fVxv}Ok9K(I{D4<>t)p!$463Mf7k35
zYqM-Oj7Uhd+kR!nEOqm$VS4S@Yr`-vJv*E~rDxY(lI`I|eceq+3S-p6-F@}taAIws
zM=5{harDe+vH8b1=2vbCA6=mxCZw9vjpX6y$F$X4zL{sqt<mA=cFeAhE*A28QNVpw
z%hELtVN=U&E^k#9B1ShK@KV{dfwKP~NPlv%oek6TzH5*Q+^LWGOB^IrYRRL+q~ufb
z_n{e<<{o!hwVR~S7U}82lY8`r|FZR&h?8^P$G7=jWmK6|q4@!1*W9%z)$`S;<@s%I
z!u)BX$?mSm+q`v<Oxp0@UZ+AUUY;EX!IqQzIP@J9iKR$X*o}rZPKz??0gmAi${@FY
zM8wn>VYiJzh4Jz2u}kPOi|U6WQvDhj{?Vt=g>hh+WWiMC!syNA#4Ke;RU6k;!}=MW
zyss0UR};5^TW>+AV$L6S)3Pg}G3nf}B;rEfyDzi3<RY7CUUbL6iqgD^@{cd`rcn9E
z9+WS8lJbEZKD?;{Jh?_-4c68R@J)oH9tiNf?2+r1oa{z#ycxXoPBq(E;L^V!8bOet
zl=g7XGWqMIorEeDd<>gO-)^hd++K07bCI$pxY6x%Khs)+lR<0zXP4#JWu+0>0$0Z_
zHGE_F;oUOYyT7nNy2R023g(+h=c<lWj`*%<E|@m@Eq4PR3q4`Hdj}TZEGu<;zHVu}
z^?FQ+wPK&c;aAa=OBNAp*s%Gyg!IviVGwiti+A^CFidQX;~<5985f2I&PO)}ISq6u
zKSCOoSRYWt)GYLq!Bw0fEIFfca|Ajd*0D1wa{by!GE$4MOE@0XM+sx(Y2c=Z4i@WU
zJ+grCFz3P``h>l%XV5f<JoQdLHKmg2XM%Cmc42}kc#*eo>pd0;ibJ)dh57Vy8|a=A
zceTsR+7Z18X(o08y1V+exVkX%)0!91<F(&Cco=^4_;l+yUIF2Td??m{)!#Xr;%yAB
zowmH&G@nqSwpXX4Lt|3BJpEGB{tkcv;J1Xe=$D{$iO3Q^a3a+N0OUInv>VpA=27I~
zftH>QO#0T#dhyQ*f5z<Ey}6mHyFSURT`Zi5_3b^IUms-7t&BnaV-?fs4OscGzHAib
zi`dq??)1ID%jdpc^JTm@-rj}Q*@a(8jnno~M@6FCWfUB}i_zWj+?-hs#JlKof9-w-
z(a!9!6dSh%Gr|F}F~z&ai*CQG&3<X0X!o?wcv+ojqb|#@2Q^yvWqj=DobFp0+s@wN
z^w0L%ve{a!IbU8o`QeiV84>$(0uA~!gRF1VM(}1xba_4?q;)N!@&k9>px`o73RL}+
zkIR9e%BO#q@X+~<pJl1y1?GvUaGMNGgu0h53DIiu_UPdtU6ybPpQwj>cat?a9Gq%d
zk`Zv{?E+n%(bQOyBiHO%jRawoDRtW8=y7Y{9I4A25Mu<2<aAJglmaeddaAz>(#L6h
zTT<3J`f}@ec+-h3q}_8koSnRx&jT*C&iv*?i`o;OwNJl##+#$%2XU8R+%ei9d1sK=
zU*&P}qwl4}(@vY;bypfiv!UKlj*tLr<Jw0{GZ`nv=>V)5t?}ouQZy$u65cP&lqnQx
zgw8gh>+o2x(}^k?4u(mS;?%}&+FSG{u_2Pi>v0WA>;1_lX6mR#B&VIM8B~6>F5l9f
z;91YUrbJik;LLqf4GHQ#w$|$s@4M-XiF5crd2f$AQLx)0Iuj=uSEHl#$DPg8f*$64
z&8gAsC*@?!LvC(dyO!S`XRp}-h@rL*`xb1*KH4RiY~xSo9}tI$tTrvl^2f?BdTd^r
z;t|3mwEMA_wk@U`rCO(*IG@?v<J!=N2s}crQbT<a%-b)9RG20C+SJZt&7MLRW;lxp
znxffwR__H{s^0waSR=zDN(?z0q~2+YS_F%PZtc&L^(%rut4GlWPB5y2A_#X0)-Xaj
z^ch&#f_n(gh7h$#mEB?Zt+C}MZ0eJ|S<d!V+X>3`I((-6HK8m^&n@l7f43A^2Z`!8
zlP82nQ4)&Xskp0grPgJHn38U&7%!&lY^YPM$eq-d7xUQW>gbd#U*xa6yexfvJuyMw
zg4XZssE2L&c`?;<_yvFD<4*aDe)WteoU{O^sGfTwziJ=0%ThK!$X^7;x)caL^k0T9
z5srI=v6}p#n&OU+Tk~~sxcx8I6mK14&9T7})~6X^lJs;C$o)SS9cF*iOF?cd4uN3E
zDw~79wm^9b{spX@$<&U=U<DXH`)^RR0@XpIsor;JFM(d?bsI!d_eCyc#dzjVK=y?i
zc0@H&Cl^P+-k&8KU47JpAso8kM?8AZDv^)F{Zdy1uU?kI&^7l>z+EV$+b(ZvSDjD^
z;5k;6*OoAMi-2omq%3_0mV7=-WPiy$WS2vsILD$qN8^}letuXQ@evP_zX4My9g|g+
zQuoam3<u}X!njCrdXBbCcUAvI-(W~hwz*ev*hck+mTXg@U%Hlsa2NuDS0q>j5G4&&
zb=1bA_O}<8=KYnzRkz1y=N~0!-haiBgnM{8x@`4z!DG+UM+)YY&L5Rxvlki<T2ym#
z!9hj0K^0eR1p8~_7=L%HG0Pd{=5jJ#q?gHw({zA`m1Kq*NQpJA$8Z79ZdN%1NFV$=
zwH$ANQK`}mz>m2X|LI(On+dm3?{AFzf^N#K!aH9q^L#@>>TXW)DN1-=0pZ;<U;>?f
zxtm0oL+BgM6e-Z+Eua}xPn0}yGXJ{$#*8de7L92Ja%|VguVrM<VD&eTgKoUD4J=_O
z<6#OTjjH!=ypcvFVULiy>RV2`zu`~KJTmLAF#``qej$A+#aAM#lz1c?V?40ZF|LW%
z_#oY#XyZ?H_ia^2Nj7m%xW8Q_!9r}Z@bEoGTUjQAhWhi@ov408^l79CC?-uJi@tR9
zF{<m9LrMzeYvD~?e2VN7$vm=Ha$xK^Ur!gJ18chV-~xd`KMV;r;8>C5w{e6MFWt?e
zF<k04xrKLvz#xBjN7@!OAzUzq?K2GXRGZ^HghuJJi!}AA@aGy5<DDh6f|+rQK3X>j
zbX8{};t)P^YHNR0k|aX;1lBTfM{dBxtIqu8Ei3%i&r2CTCl|F-H;%fGT+3n>O-4cA
z&omx$rrRKEdS7k4ia3I$zEOxex<%Ze_$Ub?AvY4Y?tV0IJnI!%{8O+?`ddQbAmzyf
zY`eAD`%!fmBPeJRhMSS%nr8%>Cpi?COIr%GN_-U`8J<n2OiHb)F*W1vgR|Y_BFy7N
zuu&5vWZ1vD?W<?cc^{aW10+8Y9D7qh>zi=h@9zEiSwO8(J03J(FonSEL{L?SWd;+%
zfi7v^meac{UwCUCb=^e$cN#{4P54VjxA=Rh0GIAAlCFW3ssBy@{+$V@{`L;Q_@W%W
z1ND!x6(DTS_=w2oGSN*Tc+Dj!eZ4^D%X1Uv9#~Ebp&Mvi!lauoW3MmC`yY0-a3(Ol
zXZ4@wiTJ{uvN&mAw-JA%>Zb60MnFDs9`XUtux^}NC?w<ng?bg&9bo#zf8^#*>VbLv
zSdMc2Q^9W<rF$j)BC-Qx`ab4TN!3UB=GIaW8(<wETTQi$(*jU<xDe@%m*S3?z4czO
z9!F(&`Oi~JmKVPG`l>zeAS6o14V+6Az4`&)#G(=HnJ8olsZOA`$K&Z*zfuhy*&qBG
zNEQeKgJ80OBDADc9e3C-`eL(};G>w|<oHpfyV`(PS9=#ezS5$>Na06%GYZpV2B~0P
z5jU%H2e!!9@N5}~2N%L=`12OF1)NgH-(P0u%X1?DRjEVS{*apn|MHj4N_PWt!?OJW
z7=KGoklUfK?Cr>A<3cE%F+7qM)}=EuKIFvoZhEu+<SrtO%_nKDW$`WTYdQ(BR5a28
z&EDp7cFJw@EzDWdjCRpKwT(7)Z#TmR+IyQ-ThUbN!FyBjjOWkeF;`cYWQy(_h0QAc
zo(?NLy|MyvBt=%GvtaO7816oaB+7l`XkEyWL)@;<$ldmh-#32`dNQCy-mO?~$=!1;
z+g24j4ZAbO>L~Y}S1bU*GZg3GwDaE}%QA6A!SjwzjaN(ytLD^0J}2+wd>HEc=+JhH
zX|)Q9PvjA+zo-`_NeF%hZrX4NnzRrO4MR3X>e&$avlt32umQSP6oK|(?#NnX18-34
z3XYfUELdqu+$k6AL(EfcgodO8yCEEjR(K;<#17sOJ+~)t**uH$2Oc#;NT%cO>T^lx
zlKmSfT<QWn7oPGm#X&Jv`9tXJ(H;$Brdg92>21CYfv!a7LNxmMb2CyITwDP`MHFpq
zFbreXx=}wGL_=u#*~`-?1;7*-k{A>`#Nm3;Q*^*t;8>@f(u1JT2%XsckvA=Pg6l_H
zI^>=#70&PXaS#`0`BBSFvv7b0_ZiVK>DGYCJpZpBgt=Tag4dI?XZ`S{)#VW1HNW{n
zR{Ei2$K5o&2TAi)WZ$pxgmBuGRm2XxPcfo!Qrz3#3W_3}pfPlKk*oU#^!Pr#41!A|
zw8$cU=fZ@!{)j(@$;vG-VW~77D~fDV;HCTArF}FoyGQbaF5e@K2}noYBJzQNGM&dD
zX6)>K(zA~*o;)r@NY`)!OJx{0W3gWXsUWIUprvl`88{L|q^5Y$j**P{MN$mW>kXzm
z^eS#+2cwh5@}1{4$ZVQuB9786j>T1)8fx5iSXnY~-D~RUc%dsBh(qj983BSxNWpNs
z2tqA<jaeS8kW!=^!^q!v4C@pF8w%D*j@hD87p+ua<c?13Vi3PnR~3WDvng3JITzDp
z@}jbZlfMsgz?{EMcO6+eP*V;c>mF0jn+HQjU($81x6@_qd5R|Uz#IT?&ZWBh3x3lU
zZlYLB-U-X14K4#yLp47Ou~KM26Qo^1m5_BAs}umobw9+AT1KlIv8zrlY?WHd`p9{d
zEpG<0lmQl{0GZ)hCp9vym4v2>L+~4uL&?3ENm<vS+T#aL*IOFzT*9FQF(q0?QFCPD
zYXa-oacM6x5)q6sDAUa#PA6i43SC_wNQZGr{c=cZA4gam0xR*VIC-7AF`r#ua!Q{h
z{-0k*-^+3iXQ`dN;tly6YlPIILzclxdWl?mvVR(geF0P-J(+-P1?o2^Yy!V*PJndY
zpdbPfBVUoET1F9Rn^^RdsEN9#(A%gGKYwV!hist#ZezPOt1IzW#)UTVgA7ysyq|>r
zfsE~ocqb~Ly4xmGupLv(J>@;8+_CGXQ;e5V8Fb2ESxN}mDf1ra&?Y^7L<75P_LLHH
zg4-^lF#3)$(rfxAO!Qf-mq5eedn@`bL_}gG*a~d|0X@p)<kKXo2p=wpK<3som_($3
zR+Yo6;JMj2o#k*4rRync>~p(F_DCZ`lzv#!#vMZ3Nc%%4KJ50>o`XKB|7j822S(IG
z6ZDJhNn|i{=gLW3Uyk5DLsEcQJ|gHCEQhtsAkr>5UuZoMaYBC~$yZxD@ryhCG&S5~
zb}^@X6*XpvT$mX78PX%1MvdyqcZy=SIjwF*kDJpb*0i_%_0ikI6cP|@7_z%wFBj43
z#4GFPxBMVD7vgOQ$X}A4GY0B!%1;CRvL1R;2mRf0-zmb~#tL<vEU?e{&jm7Gj^DGc
z!g(^Kl{Z<+uw3nMzoP%hU*s?3GwXn#uX(oxfIa8Y!1N~<37O2Sk@TpPUEPX&Dw%_~
zW6jZV5O_@%Oh<#A-Ad#lngnz@Q`cE;k7;WoB~S)mLr*aXzJ4Drc|SF~9&CKRX3l6^
zLB47=%Dry<TOT3;>8JkxZMZrC<`d^aS8;@rERH*7`rE%&;JqzE4k&B3G<&WIOKpz&
zR7bF@aRa%>P!J9!;sa6bzmRv!-j;z@-G|RWM%Btl;LA)S0J=`>4-)yg&>y6#?XZjY
zrl)fLp#(Z=!Jwj-jxHpKk^wN>6v*h(1>4QUA=}<#t`nnHBlR+%WmcH3ADGpZ1nW%)
z6a?f%Ilb7nrN9wAy_S`sVSN~gU|3(OTv7hgIc*SyD!ph0%Fl`+ZF#XeF|?Q>9n$@s
zUgmKsPQF$ojVQSk+*!d*+VQ(zFII9zSZ3$US7&c$5gTT(%U#`}pYyROrBItE8Ic}c
z)y<iNkXsPiGo0I$m)<H6Y4~A;41#D2@q>VYE?^l&R3l^n!Ws~J+*9Fm<VwT|0ED_D
z&UQ($1Q}I%c=T;lWj{cM`W0ZqZs@-J?ZgkuM91^K3+{~Kz!p)`vj2+UJ<Ol_n-Hif
z6cZR?s=9hIORG3s(Jv6Lme`?C#phE{PjVJd7DbNy`5n?Zy~65F^YmJ_`5%|6wiP#2
z@=QbRh7cP(W5RQZ8MCR%TA{0sy?C|2b6_T{1Cnh#5lI5?5@R&+V33Z+qEJ@ce`cKE
z`4c67ivJqclR5OKf4dRgdAkBe5xwbeGEx9~1mEB6_uaAy)+i-fWV-uGoLU|ScDB3Q
zOY+JB-d5B6Yu-v#*y#j&I?ZdQCjD**hyyOBd7wZ7wHzxl*<+MPimUu{52YgVwSmsa
zR>d&PhIcy;CAPI+`Ydxm0K&UI5=XCpfmfP^Z{%{ig>PK6*TwAQNzyhGGIKrC$dSB0
zYtEj+>D|^cs^TnFYVFZ>P9whkdJp4Qe+7*D`+<OR?q==y!nrr;%02X_rt5>(c2`AP
z?Q!a!iBNYg9@Tl{e3z2`oIXcrfFB2GNzV=B&R)3A-bi~N^1t4)FE=Pf*p4*hT9pVw
zo={!HwY<$<!w?pwbi%_HLC*LZCPq-NkMoMLYd97~T0*4gs5BN3LB|c_IUGXD@6;P>
zi8BSuaz8g;xSjb!e;`-g<VfX)m=X8_KU&?hBKWyHs3q2Jz*#6x<``qpAX|(B)=2f(
zo!A8)MBkcaWqVUE`yghf*;%2blgHg^r;69HnCl@mhg}jqg`Ql?E~!We2#ZMB<}ohI
zM|~7GCm$`70f7hNP5i+RvJHB^rm0pEaHPh{G+c=kW=Xnw{sN}%llS=x8LRUpLEpc=
z#kkU~T|fRgsU?@Yl#=>I2i;r>yMj6Xm9TO@UgeHsDIdA2?Xi`7Ef{P-FPv;h-8(gD
zNh2Dh%pm?z6i7)Y!>k5tRW+cU43=#a?-#=9DO5bkutt(QS;hrt|G*hP4J}FR*06GI
zt&h1Sh|-1Jw^emCp=s_mn-y3rNW1mLW^<^ePVt;EYDk+~RJ|cpF3!u_u@H7LDIwAU
zd00`$8L#7u+U43fn}}XU79*Z_@iH<?Pv@N0Fn~Iz^MP+aib__lHh{C0T<^qNys^Ic
zn^!MXTtAukyMhbjGu2g1(9-eQ!Ql0NY-c==YXlXQ^r}&X+kbK3r_}2!^Wl?q@Nrzv
zbIx2*c>IzHH?M`ej{iZX&3yb?g$R^Iti$jK%gNuBodCoh046PQ26})p&y4_(DP<xO
zsBZ*J+=L+K0*(9%|LEs1vY`x*6g9BhKqw;G3F}u=0h1HT$dhOs>V$6`l$;-_SPV}}
z6ai_ILwMdA%;acGh{k~rww|{)1Yt;;MoOd|M=E%xo<v%T{X2ypO+en7#E;w&i90^|
zo^M<GO*CsYs?;{0i3>m|-}V0hDgN8Ye$|rUR&1KlrlB&$P{c;yn5rToM4m#fX5t$)
zXaO}Ydak5eGoE^s?p}cyw2@!t4FIYO|Nj^u+6quoxDeZ8kpwDCZ?Qc92iGg`Z`@Pg
zE|Wtq^TfOT&mAVSuUa+V<!_}m(f^~{_=^7u9abV%U&f5aB*H?cB0f`a*--yNbaaS&
z*=b|2b_Wrv^%oNZ1wgs^FcPS<%1HUW<-Zjc<|$Q2TOT{9p^<Ty7#eb$$xB~VMrTiu
zrNQyN;a6q(4l(=C&K*U(y_-ns;{vyORl`To2}K3iERLPRpd7}zkH8P@cQ&Wy9h5{Y
z@w&rx^tag;tWO7$`+e-0E0xw`K7;uw;?@IyS9@?*DlIywmAvj%N#6}<rtXI~(oSJc
zSFk&Ozi2X0EXf?u6tgo}c2G;i&)~>WjSOuxs?dXT#-CcAySZ8(`Y0-jpCW5ud_s(6
zpzv9yo+$_)E0_jdqGv#q{#M1LET}8!!HaeaMRF)FdUzMiwvAgg{dlZE%$l3KV#r<E
zhRVd&BOya<{;3&;Ra)njvfigdT;g^M5uucgoNCQmyh5;4hR~c1VQ3t;Gg2UA#~KRv
zrRMwE3)Yzz7b`h<*68!=hkWA^8JZ*SbWlWdT-{lyHRZB}?ypR~KDLpZ`og8JGWo-E
zfHReoD=>6W+xMJgD1&cg@^=O=vDS7!XfQ^ys?lT)g)yqDs}>{H@ASG=jjAv43^un4
zM`tg$O*h7D%RD)l5fttNR6kdjNGrpd@4~@5(?HK<fuFWI%S;N{>N`{CHd^mo22)xq
zCWw(_nodKB^stWPMKivI0!5O{i*<YVLHD>!`GLJ&7e0Z`C}ZHWF+>7_Sc&Z+LFl;4
z{7K!C@9|V}uGTMOSAhN(pa2114=~*Rb7l}o#sgHC1|~lM4?hJXftiCq*?oUXLBSxv
z30d6n5)f`<DQE;}{4@W5`VcSJQ4m^LXs}Hfk1ABit5W{8d}ih}k!$U38M!(vMknST
z?%%N5bKNVyL1CKD(O5)8(a!WdlYuitJ5$Y!PLF!LT6RGB=xX4%d{~$lSFsbLla91R
zuAq5<bPpq7G?DgT|09i*E;%EW?-@7!2Oo%*-CrqRRaPYzClfq-!geP-w?0RDp5;Y5
z`#@591``j12FusQ_O;zV1DYK$Mne_&+I^=2731nyjd9`?%EQ3lE>k|5>VFNXzw#D+
zXF)0&;U4$z%pl-UBW_z^V|jim{MkEAV(AUYG-M~oov#C5{q1W-{bbD{U(}*l%Wx{R
z=Ch1xQ&$+*$X||}4g3DVyfnV-%*>8PkHw%UHZ@b60Lfm>^jxR;PJGV$_fLZH5J>Fm
zyI09XIMNX)&v!z1UVqY!Bz830y6~F&;H;^34+{r1^rP05bQ*|fKj88$9pi5h>BX>Z
zhQRsis(C7sHOITk2Xxkm5Hq=hhX(D6&~F^;E({qN`@32M1at2y<GT4-LX=)#Q4eb^
zgz(rB#SL!i3}pESVT^2OvtHL4o=YS4-_J)@^k>Em!b%{B&omJmr5UME$L~N5g$w#O
z9af@h%z(md(kH%=#8?UtpV3`)71qvW@U?#uFzTWA$VL_5JZghf*@f_%lhOvF&jF9$
zhon}|vBgd=02|#Cg-m(d*hPjxS@OWGq7j|y<SJQ)yt&eItL&Ukv>}pRBI8B_VsoMC
z5}Bcma*Vz4H6m^+Qp(<HBf)^c<ytA&NB2#yL%fa6tjR;ere4dEUX<g5lzs=>wyu@_
zObIk%S@9P=*WM?)7tf#QchNMCq>=@b8CrA5`j^T<Yd&r|-<KpqB(#WJH&n3U*fJXe
zm;qL3e)_ivUlWoSVYgnSfMyu)BU1ZMW~4ErzF65Mp3DhRF-B28oo-gxUNOUS(%b>?
zOcm?oF2>r4JQM$B9cFmcm$*~gJ=GDi;josVynYZleeT~1G%Zvr%CLF@TQbMhZ6<zb
zJvLaM=^<EgN5MRtu1sT`NTUpYU+PZ%(Uixi(mXE6jUm9cY2H5{qZi?$e*QrEEB;G<
zWw<7`5W5{Jewg8*CoEpg6uE{S?@50hBt5UMdC3spOb_4e9ff_C9_*thsmgUxMlUEA
zyX3GcQJ#rM$t!~B6eg#;%Hj-(Kn;g=Yw0?r?F=-B)g8ta8fqapOF`5yFX5a;QNv8f
zm5P{mAO+g)3nj1>_Ae3mR`wh<Qb%y#pL%E6bBBigWxKt;Z9<&8jB9z^wVQZh=D1tV
zplVlye68x>xiw)K!Tso!<>h=Hqkrj&?c2pWzyg8X_M#DJE?el499T%_z@rt&5f6A*
zY1L|x<$sE#WKUzfBuz32hUQEjF_8aW70copK&XFSwiWF?{JDGQH+S)=JXppOAQ$(m
z7Jr(JS4q)5u+z)p3nnr-*X;gv`-27r9F(4HwmZDq@7&^p<w{Tu8_0hLl)Dr}`}p?E
z$?Ac*X26e!L4SSu+n4#r3_No#_EM<NNZe0fP>0;U9dpprSc?59U^fW;)_+=O!GC`O
zK8%LPxBwrrI8<h%&Zqv-aX`K4k3oMX^8f1=ZBKf)BLg1~>?Vom;LO9;Yj~e&1u*de
zyw^Gb>MhAnf||hpHvXO${*SLWB}d;sVIT!~TcX-eW%ljJ_}{M_nP~#CiGW@Iu5^Kw
zWAFJ@!2dQq=TAFK-=D(X?O=l}AOX5j49pH+AVC`YyVLy!>u%~-fSr8?ExR{+7yjLK
zSKHqbZ$l=)o~+lKxP-HtnVAK4SzaIJB5J5m$&nv?0`ICq4lV|GwALC4W<*85L2^TO
z-Yg&ebAY!!zHS5iUPG{;7-+yUjf##Pe`?hzsn2|3U7+A^;S<tgC^lmDbansb-k>K~
z>J(lqzKOyl7cP}iKmDxTV3c;$F934MMzF2$`|P9qy&d!a;&NNLP+XS!N*grIbbURE
z|6MMmR121WUNvOB5j;mg2(WL>)7A00{*2hzLJQZ?IuP9#e&l?pUatNRVl@X~p}U>e
zw#&Zt{A8*U8RUl;U5W1id}d!r5CqI90+Dh=Y8{8A+6Jcc=mFQY_BYYI>HzQm`~}vR
z8?fo<FN@P;RjnrBbOjC(gZe1!J3o9C?`BBHn{cHnsM6gs<V14^?->pN>y>Luv}U>S
zvWk*d9`Yb7J?m6}XFpZ2KySI|M#&!yxu`EM0~mJy9Blz5D|RUxuGuZNVaE5?{TLS*
z)#{PsZ@8*05cfh6Z56g<Rz$jQ=l`i2F>BP<cPsQkFe%;#hxItI&V<oBEX*GoD_G1H
zi}7v%XrS}}d*H`qu$(jGrN}Wqqom+tkC;8H;zEF{UE;i6%G1rLisHj}+fbx{tM|!7
zE#;r2rvs#1mAU?}K<DTWt_1e_171ZYa|=-4`Uk$;H;xMTj1=?*AxV8XsJz1(HbiWE
zQyV6;SAlp^8DE5l5~RT4|0m8FPbV&JHD@82VCxmu`5(~9_z(HtFq0LNSaDq7V<4;M
zJ;>PE0C-5^aRT4k;1yV$P{AeWElRN0I&XWM^c_c`EuvAiTM4T=w*cfaO`U#<xoZyC
zA8%6c(*iV?TDrp)6t0czocVXz1@T|tC6GM6tT;;lf1plgfmO`>&umD7C<H8+(PoJY
zY|sVpWcRJ&mIGWJnk2>@W!!+Ojgb=~A?^iO@s#eM-w52KWsccix4Zi@p^4XqMDzTz
z6kDR?^K+ib_XpDmxs3k|*0ZE%J^P15Js50yi*3@IB?#`NhKg_Ugb)hwf@pv2h<;Pq
zf`0>3(1c<sKMQu}JuMK4p7-}w!139|7V-KdOaKxDeG<+PQ}T)p8A}a^bHCQ9axwiJ
zFuj=BJfVAn7jIF-dndx}kOjbkSuLZG({*>EbQZ{H8a=58DAIC;Orsk#C$%{ODn`ze
z;iBkw9NrYbrN_Ro$k518VQ4^a^MuqhaPc8WHc{H>jgtm(=WXAAjOFC{@@Lu0rkjm;
zjj79o_jJ&C^i{jFKpOe<R{sD~aY<?FRR%ABXjdm+o$U{Ein!a%GkAdD84eK%)J=lZ
zrd<`IQLOKT#pJCUvC`1nLd3HnH;pqZrpO+~GMcVqo-BKg^8RyHXMHYmKxkYQN7WA}
zV_Dm=;(nt=)G;{!4^?IFvJ@`IBCu1=Ftce?Y8nht9kdaw@?5#m5YvJip5&#xLUee&
ze^Gx=yt}RBUZn&c^)rfGvU*-6Y`fHFVeETum4)ugg$5R9|4*bdoxY7@334ThDrq6x
zz8oD<%J0$eM<9<@Hg_~KO|p$0+~?kIAT!qOy?jjl28eVs7(PtFPc=Mi9P^3)Po%Te
zwGpt8100F|BtH8T)TdVAzY#n^2i^<<sOo`Im;L~}JK&mW$4uWpDp78i6ILGg8|;@I
z^dn;FYdQW9_Xu}1ho<Y8c?FUF$C0Q71u*a@aI<DDY>8Qyd9z*c>1)V@XxOnHB$%p^
zm@a)TcQqu*J|Q2`TdvZTXV|)bqa>e!EU(5P+wtg*f^G%A^+4y46|RGwo6z8R!pl6r
zVW=N5Te5v;JB*xP@o1^rIYuhd2$Wxr6?vfzt!J%Bk~d;8ehu03OADIrN@Rp#?^UE{
zWQEGew!Xs#kA6jm3x1Lp0K-Fbgw?Yd?$(JobUaG}k-J`!!WaNs7@TVXscR<!yjRGk
z+?E+C3BpL(qZ4>|m;os$oY8oh4HZUtyJ#e@!=JLWe{c-w+s}L~zfLT?g&lu^<D~8z
zIZ-GW2MU353T6uwNvJ(O1)4&r{zDMC;Pf7wL*tO4_Lf|nTUgcF!<k2a=)m0*eijLa
z5bsSaT@wc1sFTW7%ytwy<;0io(iCMA7ncjR-S1kyX{dK?JrZW+(fo+c0cMtNHWI|j
z)dQKZ?;%&-h;33I#@_Uu0O>hEiW7f-TcF@eu8hw~&#EJS5tP{@BGF;5J783}F*y2o
z0|!p9#`wSnqzKXoCY(?ZZlK=)?l+EEFfPuiGpB=x*kxSq*e)b)_DCcVE{3SHn$Akl
zf%!BXE4|0@WKqiC^gH}HFxau@U#j<|+qDZ%xJ>W<BhaV*a2W{y=JW?xdw9aOlG0s8
zNS8admHUSnu|wVX%iu}_dP~|~3jApI%h4FI>q~%@;jy63O_)RFK>BRJtHK3&S%8_N
zqGksd+InRre_F`<lGvv9OM8KMX>W_gozRa*L;J(0Ccx_D-t;O$%K@l;6?YRGoa@8c
zvz&#HHAA+=Jxb+t1f2t9g93rc1?Ii>-2RLNen5SP5m(Gju|PlsL&ZP}Og&%%p6|{~
zg)l%&2<>q|>H|&i00a;I!{EJjFZvZ??w_?j<=<I_-;?;i&*uxaAW*cu^AucYq>*v9
zEFoQcpAdu>H4g~kfaEb32Qq*k;SJS>69F&}Ks6W<gwP>S+{17XF9;k7fES(j$}l%X
zxia@OyLrUVtr%JW*16x22~D8@Ex(8MIB?-F^;t#^Q2wJud6%E`s_<VeG$2hRFlEqR
zdS8&@|Lw6q-UsrF>EVw{LAW=O=2K4~gTB->5dFWszE_z+|NYVw0jh^>g1yzQBGAw3
zJ(GgdikN*|;K{W_(Li}<M)lZ4TAa1}N8CKzvcB4Hc9^?zc$<<1fc=^o`SddwnIa<d
z;B=EU$jSS?XlMc0q6bXHK1*nhsZ^CkHD^*lBu4M%YT23b8B*5;>M}S=Fa%U(188fm
z#|VNhWK=n34cu8<_ltR~n%IoMESBePq=p!PmbCKM5B412=L?B}HPd=@M69XEw7NQA
zcZj5hqr?6viC&_EmI+98kIg4NUy0Nb8{)!X<SraSrFyn>*;fr`;cu&sbr`HpyG|{;
zgRdDORwCrF-xaC0YnbsGgKU6UaDjJ2FsI)k7Qy9SyaNL4^ORl~7my=cXf%c#I$W?V
zRT|nkw`ReW@l_Oci_#)iJZBpkzagp>Ob!CnsBoP+vm7+@Z>JfZWjH0N9%w@B<@g!2
zQVPhB7xqoMn<?3-x6oIk`DoGzP0uBF4@k((ke>!Ox?BuU2~HxhZ=>(oU27N^ys++z
zF{vk_l9x2(VA!%CVCvxLB6W^8Oq|}Ilk<VG!qBrW#+2Bzv#$!NNgtoJ8uFh#Y6rmf
z(r2ae<spuCyOtV8Mlt_s_VU&^DRj>wUsp$G&p{<aaiBP*Xu?;R0)-cWSUh8g=DhkW
zy|EEB1QsIui|Op-0e#A^|A~{7zBAs~T?r{x(z&J{@$_&@n0NLjz2r30e&&d9K3QX0
zm#u*5YIjjXa<?{k&WZC#Gw3an0w;5H%Tz#d;gYgd4f91Sg>Wn3J|-(*T!$@2g`0;H
z|HSM2IPi6F10DYF=-u-%dCJ)3<;`J@kEe^@fMaE04sVxVOAq`Xkc|U`q0V>-<Crt6
zx@c#kI{HOIf;d~kME4vY@8N8hpn}g?oAETo^#@!@U~b8n#=xPglcSSJSCRMs08u!%
zQ{Sl-7ssd1Y11V3!Im9LP;n26oyn#ahH%<q<%E-lzpWGuTO@;zu79om-FvC8q0W|V
z56Erak<>C+S*S;i9L=AQApb!;6BI#liKG9=U<9u^npy{Y(uuM%T<ztgeRSz7b6VS%
zHr1GU%ke+qiY0+=3*FX0krnLp?|2K8BG0kL$Zp#=d{VU;9`4`g!p_cQA<Bui{bK`!
zee_S)(Fg(PQ>Sg-5l0<aam)eLoq;oq$=;`v;VXrw5Cu;eSSb`}X9tKt_0a7HVp|)O
zhj%1a-j3p)-Ez1*Z|6X|ryC4OE~bhtPOwl`OKMTX$BN2&Fb<c7ox&YwsE@qPg{Dsb
zP0;Ko=LSx045ojzr;%X2m-|m0^7h@aQYWBl5)a+Wj-mcFT@_#5Ay2w16AC;^lSj^n
zO(z{c1%M9X-O^Co5Yg%9F#X-E;##I>w(RRT7P%~wuzwIS!9<?@_=uKTTA-@Va!Pk>
zK_?k3Z0TCDuCHexFMW`Jfk+>IfcI;xlb6+e_&*cqyr{E+e?D6|8be2cA{;k~z>T3_
z0HDdUe&gtKyKS%EI|r#WN=9l89KccH56lyFAp=_E2UP;_dDzeLUT_YG{+2%q<Vpe7
zP-`Y0`U_cneE0S|>_MiRrUO=nR(yPYtDI%M`jg)SnaF^d=mD;0mj_V0=~Szm3Sq-0
z?DH<Y*3m>;wuD>TSa8~xNSiaf`Wm#erXbibtz80;>{~`V3`hn%^|H^w^v}Knm$NNa
z9AQjsbxcIdR+De(0Hpedc;jPYi=&%pv4o>;g}wg52^;@os<Jg*gl$@VUH!b4?XpYl
z4429DzpboKlUR*&>*GzV+LreWVZVWDrF-Q;ZL`*Y7MA}L%cz6?%Ohs#Q|`-qDg;0w
z^AArIlm&mXlgusnk^S-W{8#hGOkunEjnWz9ZcOP`{U_KadG3yAz(N!1A?n`BjV0%s
zTl$jA-+|u6>&atY6m0#z@PI-jj}c)vqWmB(pVaIZS2%3|2qUx(O1=Cq4fxZJL<&d^
z)VL+k;sNoa;No+L?mhiwim<?<6F%a~{&jw4B|cKs(WHM;Q5)5=|4sbPCb7!)9jXSN
zX&`lbqu5rb!xv~4YrYi3sjQp?i|V_AF&6g$oDvr@qU6o<GE6(~+o8eWkTh~rcY<p)
zxlN{7_IqR*8WGkfo6p-wlco5+-^hgM<3<q!4gWUZ69{Bi9A*;$!FUHK^mPH5Y)+Ct
zdX`rGf6A3QrmTSrtq{*-uNX3c)Qg~26{FTXL7}jnu-Og{x+T568qWaNh(O>nfkPEk
z0PpTBh5a7j>50GFr+YHPK9qMn_91^y0?CckV+Og`r2g>}32ig@4dPl@fYdwZ$3D;?
z;RfJl_4_0-(EoSe&Cdc58*DONU8h5_&f%)ci*T9^ojKJ_?4#GG+{MV>`US!E)g=N*
z==NVP_X}Xlz;U;}FMxf$*0%gIFyHL!r9tcuEP0mDAJ{5^xSV#<mrmhlGuV~>AfK5n
zVlP&WUzR;N&K>WD@+l||O>w|f)}Lbkc)t`DI&|EkQi*8@HYM=Q(*9ijdufJey<deK
zng`p&>DwAUzYQh)JBhc?peNZn6z&j>lE9$G*D~v16OG)9MVgU`qJJgykNyjEzk;xe
zq#}z4PA0k%i?nL}C7>({ki+7(&1gjsB)8NU+X~B!RsIACjay`!6*$~L2hZN(7?8@=
zDxy-tQ@i-*8fdVW8e_Tz^qBUCUy?=xo*-WP<KbMVZvkiH4POMS5*&cePy9xEl>ym5
z#?6-l8(d`$dnwzSHNW1Fvw<@0K$_H+trNjFX`VZv)U`iBnZVH1;UX2!la##g{w7ed
zyldP7tp@4@nUV#$*CNjrRqNK(#<|Gap4kivcM9_-oQt4>4IW@Qw6O`{uiN5zW?=`7
zVc2vo^zJ?59DwybUp;TK7KnLI5f9X_SCYF1`VoE2OkhP)W>^G8QntN+^kfffevJbG
zGgNf9iPV%=`L88AO((^Z&Y*6vIC@G;e^S6K(7b=;q&{zw%}Y1q)l%qCqY+c7rh90O
z^6!gm$}uPqnY&wYMAg5wVRZ5#Q1|NzbLnTo-Krp3kIXe@+{iluRri9Dz;f^Gk_J0-
z$am82dNF$Z(Q0cO7^zfxKB|UxOo-^eh}cHu*pCU#rR92_2+K~Qk~_V-&ys($n+LvR
zLkqNKPjp%4ocl;mu(=yRG0zUU(O}mqe;=QhYzn6*A!QHEJ>w@Hyc(ZfQL$s{uT^rP
zAr>4WT&wZ#%${(=k~!N`e#*ZuvOV#bj`Pr;o<a!($c5C;XCD(-^O&NVn!ZfMrG?6z
zyjHPrsSlF(@7r}74sf%q|8pHCIcxQ{qI>jh^SEgO4c~Z}!rSdQbGs4*klwUM+K)^H
z%JE-#@J~;Gskx0__{sR_PWY+N$+U@25N{@3OOA`s-wDR~1E_ZB?v1t6cI`jnBZIi!
z;Cq18Y4?~fUpH>wVhh@_U*Vp7z5QUb2Z3r|Rlb`xoB!;Im*V~GTY#<$|31ktS5;Sk
z1)UN^2p2o?uwAE=()HCBZ;sXF4r-1$a(D9pX9eCJoV{{*JGkxopRaqYWe?JLonD;Z
zCW!hV-YMdT{T`M3-c_0eQg4<0kYyU<SpH_DQ^4R4Xfa#3NYEV!F98HJd%D$y5DepQ
z>C%eF4&{B75MDp@Wr9rv6C$<DD0BBUN;KbG-R1kHVFQ07g#30oR2&q}d(sqCRCm>%
z)!h1X2<#M|hFjeQ1s78I6{ggqHc<ZMq{iLv={Mx2nxJ6#Awpx9n+jH-V_o#0vIWq+
zA`l;;^k>+6{*h%7Pt{)H<BaLg65#hUB*YGef&@i=!Uak_)P6LPZM&joR`Fr?4W1@b
zLyn(xKkr}viJ~8ROU>LCbNcm}OIRB54k@k0qE{6w%;HpOgRF$3Kg_(@<UPj*V`pkf
zGQ#*UkYhs*V<++4z+*fhkdMG&9fK#QUv|H$ph(!0@V9y=7usaRp7dN+%i&9S-GAQ~
ztb!v})=HZe8hITi&}>d`nC0W6-k4kJPcXr<wxb8XGZ{J&di+r7mkPF{?kk9K|1p>c
zcO7Upxsi(?eyDPJCWO0Vy=93JhJ{>L0&WxdKx#}-DyEqCP&`kjPS;hb8BRH8%3r(7
zHa4!=Xld=u)D-2sVsk_oNaLAA86M6lgg+)Q&f$J%mWNe)`8#S!(0+whKO{@P1bcu8
z-I=C<LqR_7Z-vkWcC@!f&t1F9U(Dzq5I%`wrV8xsq=yeB$>``JeIbhUi*QIVf5Teu
zMk8N^0`im5mrYOEf)%M3?|*=Z;(GIfRxT3Vz4b-&U50>z6cLMoo|DOO!r|1?n7YSN
zX|&zntg`#CLk|!LIAbaH=F^C3UxuFPJK5L_wUfl~V7=Wq_%iWwb3W@i(DMuM1u}8-
zWuXx#^b!#yB(RDK+Ul{0MWmt;XZ3O)iv-iKff#LGkoFMk#nIHbadEyTlD~P?KBuNk
zL4|wXU+1dY^(e|^|IkG7XYPKVcqLMkXswB8Km*k1OY2+Ct#?ohduK7`X=wL5eTQED
zUOK2Nx~rBl!_d5)5O${)XuKU^fH_A6@|5)EI2p)OFqh$FG!kYsiaPrZaA^}>WHPkg
zQD#AHn9hvyj*fHPN-5qgT>?8jwzf*4>E9840wtdXkDTV70HTJST2lM|)raQqHQj--
z4%J8)TZZZ#GcE5UqXhe^03kd%M2@bR7Th?D#<_ys1(idvUEjT99_=z=I&WV&JahQ9
z+{=1Sf|ZZ+ty_Pl=Gv1>#!r-AAuMO{v0WS|&Ho^e6CO*DMoG;<;X3~kq3LKN%&D(+
zi1++?Ahq^2m+xL_E3WeFr>nV>sqM31Ra+O8BW_9+q+EO?8v;pmw+YIGF5u+8ugNQ|
zVE()DlvCr&p>(E?%zSP}EKsi+vNUxERe-~vNND*=?iQ9y>a2OekX_o65PfOz$S5`}
zmiC^MS2DfF{3E=I=a}Ue_A28~RPpmV1)Z6}`AeL^qnIgDq8<<*)oG=_8Lik_DZrAF
zJ!c2V5KHjxpu6SrY)T)}Ck!#n`r;yG363+RAi}o*UCIFi<H%retq@#_0(0xqyyOmK
zJeS1OMsq6M){ieoETGHV4lLk0c6CS4<&z=)Rlj<Ib!`RuAky!~#q`zW`Uuv?&bo+&
z-DfcD6fdP98;IHMbNEw?o`1N=kXoB19@fPaLLZ(4uC)%|tS4oz5A;&qgOQ8aY-iUU
z3gBE|Zocw?b!JfdJVFy~WIj?eS(EVr`lMeD6!zuqbWF)wHI=9IwFfQ6VlaxIx>+Ao
zyl(bWp98u^nj()8cW_`fe%C~4kiZLiEDv-+t_sG|H$D;x_kGIcOnD!Hn}+$$`QodM
z$7bkW^5pVazl3ak^QX|f_^VDF|ALE6jCL3h&>wq1R!@GA7|@_Wkk6x#xf#J1*lJT`
z6Yx1;^l3I$u@<Y*e4*@)R(?EK1&t6%*5w~yGftTW8#R-oXwZ}ckH}m7k3%`(ns&^*
zSV1%EfKl$GTHkQg!Hp*PVBkIV<xE?#lD|+jnH!T&Kfuze9<k!Y!+G7G)L4$p<4K{K
z;Z}xt=Z~5DOoI`(){&981*WpiTmBIkl`StOcc;?h72JUYl}6ezat15S3cNsVaEvDK
zUh`@n6zynDM}nozp&6d=r;O`Ybfl65y}%uvO(bz4xl@w9=Exz|II!>|^%+$}gz%6S
zsHHrV03&=Mf2OF|*%jc%kcT#|RC7x+L7u3X5Z|bYk;|*ILM+=g5lcDj+etPSPs6*9
zQKhi#=#XdZ06xBIjHlQul@lQ1cYv+o@?Ib}!ls1&{0f1w8{99?q$5G}kDV*2F`j=C
z)$}cYiAmpyzj;T&77GWbltl^C1VjmH*eiV7u7H=ZY3k__gCV7)XAJ?F7N(yW9w-Vw
zr$jn5u2Gc}55_=@+WL1@OgKK3?JhV7@KW=fz`visf6eCc9KpYS@_sCg$Z-F5Cqp}P
z{Ud*cupBZiYvQO1o#%LTh|{_hEZ1uiE}%igbt48sd(v06^Wa0J{Pf?afj4x8M$J!F
z7|osNB?iNvqi%l&nQ_*`P|G`?zfeY+1F$EsnhVzcHvnNkp1-l45>nO~hTlDxE|F)@
zH+PFj9SfK>tM)upfo%lx$8C?$3rp`j%rJc+*GuPMxB!>*Rsp|S7OtU0wXI!U6frLY
zU<;6XOdlQ;lxkWSw@vHx7Wb;cb%I#}YkAI#&2D{naHxH^#*T4+zdk=BkO$g;$SV6|
z34`cR7lDKSt`DT)MC$){{czp)#&mzIAMTIu4+YNK--7SHXTH-ZBn->G2_f37Vl6uH
z+5*I;9XyBekl8y<N*EyZ{}c}nphX)`lSWd1{NMF;*NrHUrOPm|7|Fz}d$@YOb8?u_
z!|vgxh>~zBVpk^43hQppWAr)9r`Nn3x=vDT)X=-)d0eaM=;8y+pX+73T?U|APCXP-
zu-myF1(sfSwz*%%y9$9#emxL{rY}DG#$<H3FOhp)>A~pzHq>DI(l>y|Gy7Z8;r<rp
z3+;$ZCD7@+&m8mNyYDWx#Mvd5U`oAJ0{Tp*{iFs42nc2Z<0KU2mK?AOY{aB-sy7&?
z?JFQxBgbK_{2!l2sKW0Wok-1zYGUN(oEwK(nzb~~IK33MAhWdTP#>eV>DnI5(yotw
z9Nw<)rf9a-r>fjRt%q!`<{0x}LQEO+1rWkM42Z#U!ftPtHrZYy=43W^jnNwX>s=*=
z-2XvFsWj`oU&Q;rYN6nY|5m7M7n1isR^<QBoWSw@T0Gc8;}LX#rq^#-B9T6N<bPGD
zU;fd?2C*D%`)Ws*^cS3@c>0HCB0xx~f2TmY>0s`8BREgqOG=--pD4qj(vc=zvVR8&
zxjQT2AL>w7rCd)x{8PR2%X*@iN3EBdHU?;uA&qZhM$F@0_s|af>qrJ9K;{5l_d!K4
z5QeP5{=*Iy(9`;T?JI8XWsB6(unWUMdt1XQ)JI;?1`3|al50)t8(DI8cH|l$IZX<?
zyE~gWN|<#l1Ou>x8_-C6c6b6S?!ZSLl%N2#H5>f^Bcd8K8TxY%cmvjV==-m9v&@|f
zXpcsxcK`u~5d#&B01#9FmI4AwL-my&w9`-rl-k%==fIiq8feEQ87T~auv}M@C+I0g
zK?tNnXq3#HOeBSH*3Dk6rxBS6c1u^76ODq;8ydj*FIj;(Xaof1S&{KGZJu7f4%9J4
z%!J3YG%^Bk5{*Wnt51N`lR~~~`lHFU2+dmuiHPu2y}L<myA%V{zus#DM=Lab#Y7*n
zU=m{up>;K&4uI<S-Lu7F9E(L5cbe<~vjC6q%`gyGB8@5OfUK&nP-h?NjhE$2jFwWA
zyim3S6t`$h2R_Rx!)lDoNQZ?%6j>Pv3|mgT*rWs=fQ-hd6XGPn9s#mQ3(f^L;UF7`
z1M&<_oQG7TQEIZV4gDbM3lJv5zG9$&k?*ZmbfF%V3A8Gb#DKQ?FD9i>u|S9eNxlb}
zxXIXqo;7G}1gL+(9gQq8w%AePUZ}8(9^Or)I|co(U;qqFx8~FJ?@e-badQ3d`el=x
zUXc%%7avcLo5y6meg)sxvsNSyGYSW}IH6HF{(0Iu&XVTshf55radC;zPC+=GHX)gX
z<UDP?Ck?3Dy0|81r{|~F0QdTWw#lKLHm?w#D_#d3qOgD+S#o`PebxkAj-i3p=4tB$
zU$t(YH(S>rid&FKnjhf{odUnt&(3JWM;9MHU7o)G{hIuKadzCiMAWfA7p*2aKE;=@
z0b@kmL77GqI#6$%H7RDS205;u*WWiOfD0&nNsF)!kbnQ)q)C9s(M9X}vVL?82wz-Y
zTc!Vf3VonKZ7xr*u)&kdi}Nf&QUgUU%&M*C(e>#?iw=VmCn$p7Z?2jaOe$lW^)mo>
zh4sbqxwYy3Hv6`-jW?D4ev<kV#qehGh2X^zf`jUMqOs3mOb;Hf*#@(R$l9?d=!$o^
z=JW)&*TR%&DDsXBvaG^8qT?T@9fzxtT8&v<a8r>z>kfe@qjCQe8l%J7v{lOGeF_+V
z=wmFwj)UP#ie;ssc-nI2pb`>RD@n!$*+3^p^CB39@ALmFkxe2OGtd98NH&OE$UNis
zJesHH2-ap`6nlKA{6MKed&y)o%2p1QNp27abj_qv9RwkRf>*Gc6bh`R9YvO4y`rGF
zM<psqF9Wvd=b#GjD@A9JLdTQLAXUf~@J}&YOasQe&qYkivz&ak$jU*1O|hOcx!@?B
zH<OE5CoFbcZemh!W5Qdo3Ij@3e$mFjZexQuyTDMfP!k&~T8&w?=!YUL$4GjFOrcL?
zwtB<50XzmHZJ;VZR~>o~s`jXEVUHSLKFmSmF6%>D1J8@i(iX4K+j6Tqg{xr;xO9j^
zRcxci(niJvu3~H~^hvU_Q$S40dRCpZP#z;FTZIqJVb1eoLjjDHLn*?WxtIFAc1Os~
zRnnOh@X>~iVJ2nsj-6<C0B|d$9Z>%OfDCemH?U!0lJR^nd+~=C{XaZ-(~+c-g^^vD
zZlHgfBjOwcK87Q$|1#7Dpll1{px<i_(XeS^B{CS)9!q91X)FzkDWS$6=`8DI+B=N0
zX0#o(At6N*MED?fouwc_C`Ee#)TgXsQeB+U(20!Il3gS3tg(rmg~7E@+AxJ<#zH`7
zuMkpu)3^5bNbAK%Zaqp@3Oa_Rl6#SmK(RBwC{K!PWO+6G3LB=l%LXbR@WmlV4UMQ{
zs_s$~H>zk4$>&XwAYYhNCkE6(mI9G0jcANB21^@)TKM%|8RG7+uSkG8ZWTFTg!lq;
z0K=W%4b?F&#ngTVnMuN(lrUY4d$otQcu*G&JY^-7-YRj6LW(7#-9__4T83T@n@PE@
zD$#StR60kWdryI+JYXP24x>Fk5bDWaRYR$F3}`Epvo`*AaITN4@^$|VUF{F?LHOK2
zdOp&WUXNLBSn!wy?{kVKWI0Rnn_jE2tpoZoWCF7zK^sFu8UIJowG_V|7Ssu=OqOwi
z30QUm=0S-JmYvDqHkEv(fkt5ONJ_99>Gc#0T4;!vWHKD!I!4(WsIN-bjIOtio+hw!
z0A5fe-C&D0wUdIOrAG;OvivLS3gHY(cl&#0nknG>qk9P#cbu_23(Mhwr9cH(%<||g
zWhWG2b<3JH?4PMM@;)^-r_?A=*HvD_bw^j^7jr!Kn+=$DNn|p%ByY0$O0U|ive<9p
zD9e=upR#oJqcI{R<9bNRw84p++C)$pS8vHCMJD4S8$&UJHPY!ETKmD|hS5#g&3!jN
z*sW5@Yk$qBbF!`(M#@eqc%U&T@M|dHUX*hWOXbsgJQ(p-;gd)TJ2hZ-Ga6Y#B%+4a
zQ&3OWzS?-22?f7C&x1k1i$P(v7!<u26k}pwdW1<Vn54nB^L)T+%vdsIwzg@KKD~cH
zH%+x#YcjajC0ItdefVLmri=`=uML?MKf8Zs_<=|}?c(9e_$6gIr186IR!-y%;zt(Z
zq6#s|LIk>yRFU6nTXZ1~!VrrxBxNNzX+zBDXLI3EaO2_N5aJmiVlEz*pd1z)v#-E#
zG?9bB85Sasg%2ho<K!l=RNGg1W*dSrfur))0U#&P2#>R*M_{3e&cmwG;^>d7;vtg>
z7Z(kd!(Dl&q2v`co}^;hhp9bcAq1++Kk`yxOFWvMna7HnuHLDLKUycC4$a0m!}(&*
zOfjeF#sODDzI&`UMfZk1mzzN$@ANY$(@PhxKSrtRcI;p)0;IzS%Oe9Iz~5SpdtsEa
zOtq(87q-s@EPwSqOFWZ*(cF9CGF1ou7Qb6d1_JsQREIWG#ayvmELW=AS@TmhyPYdn
z%B4yPriXECi8840FVUNaZJ(5Wug)-{49)y)n9)C|C#kw<GVU~thHSyzhR2-{gP%}<
zpzY$pB&<j^ew%9AJ2TTeZr2x~kz*E2VM<dxssIfl9n=&bBg%&e2WZGxXj(t%W}%*C
zdjh5a`XIp&tob+18fW?@t)=L?jXG5Vx0pW26gX59jhcOL?Wn{S2UJ~vJc1SpB1ary
z*VVVERXy&P_C%cugqo=H<ht58Wdmm_EwCfYP4DN^fx6PB_}veAn(x5f1kdsPi{738
z1@8av_-XN6CIF%FzbYly{eQJu&L{W(tBL;=;hJIQI|F(-5aiiw#w@=&s-M*_tsr3R
zE1~w{X1qST?CF1+c1id`cC0ey3eN$Ur#fb`kKT8Q@gMP*yDWF+w6nY(pmk5_tqhjW
zYz5V5cxnWdJc@<Mx$phUHLfbm$cbJgSQ*FCSn~1WHRCk$TKuHV3FRGul@{y2BVG8-
z)HDXEes$hHx;V%CywrL}0UeOJj9@v`<w2Lhk9+<VU<B7g{C{Wcz`rJFJTXLHN<Qqd
zIzjNrp7lhut5)~1D))WdvpieOL>3a+>@!aZ?z+{8Z~w0`d_29BxYjN;X3D_0;@>)+
zFzK`d%5X9u-;UJ*G<;pF!H4g@5#X<VPCWzQnk?{+XlYQ;*yVLfqe0i95>MEpaW%bY
z{|lYHXJ$KpjP@ze8AgyisS+NGI(ah3dXO8tTBDlxbZ+%Poyxk%HZUS{PeMyz0IUzX
zhRB1O?oSWi$&|>2c$`bOC<6`64`8Q4^-uF)x6{|;$9)8jz-qN4VDI`EPh!=+f<pc<
z#p2b4@adg0Zg+tbzj3Z4B3qL0d+^VXSd31W?9UPVbHV<c^VIXB!!>6k`=%((US^N7
z7g;C?g~*}1@k`!r1W8H5%UbQwb*_z1mr%0u*tP*9r%#4Kf-~(pz6omIoS#{<gU;+j
zO2lmI&|*+7c3d$8h=8~YM7FhA05FK_n3ol|+W31<bh4#(I0fc*md|Hx?i^<+yLnxp
zc3n~PC<t+TW%@wT2aLl;#AhhnJ<1=>GdNVDMd6LW8Ox$kFyMkm**h{Y?#vJvJ{%#2
z=!B92X@a|oN{?gooLgGH6|>J$Xe>+pPR>%x1(M0&w<p%NQw=F)%ZDh=O65KF$IVk7
zIaJD?8eB6Zs-cxqv|3QvdJna>Ln8HhkxHWAkz|a25MD!&hG;kSAQtzk091PK;V<uL
zYv9u3g~H&V6*>M*@A*))#3))~6fL<aYI-@2bVXM2EICznj_H~iAa=cF-^#3e)e+>1
zy&(TU_jFV@3IU#o>-OOeW9fB%S6O;4t!G&u?57V?>wj28fn19SLgpq^0fe(ArNn%d
z^G*r#k$uEWiLwC3vxH0>e=$=gW;SHX?p!+lj8Iv|3~b3tHv94S|G@dZ_COn|_=@-N
zZtG4N47Na!4DS?-<QMG~_>leocBxQw#s4mq3d#O|HSxbnxqPm$27e0dPcfHY!{6n#
z-zwYXO0BfrtL!S(iqfgdYn7edLan<~trWNOyQOlaOUQ4iFEJBoQvY}hwRUz(wPLwb
z?#jDGx!NsZ8Cd`C6s%sU?e1>NMWwgflXp7K>L=%QsJ@#o*OY=H^}2<!B1r`-BU2Zu
zXKGZ1jX<HDT5m^DO7eEGP^{2Guv+S9Z;X%GtLEEmBkOb(K^F@(S=!0(bUNiqwUf6o
z!@TX<P6u?HLT^_oNzm8b(sr#Pm1U_Qb$8?n?MF-b3YuqUL>@uxDSZMnp-W9aCL_E7
zDbdC;#N{(76CL&+2FS-v79^Ca*9JCK-mUdY<(-{gSt(aKMF$$y-I|i$&6f&YS?+ar
zG4EJ4=zU{pbXztX)`q0s$MsIeO1s5cSt=KIdle;L-VrF?QDB`==#3-@V6<}hty(IT
zYrU#et|*nxPG?)j(x*sYwBjaKQEDJ%j3nJq(0dwGL;jou!l;&ZYw~WPSdgpvPDS3v
zq)Mfhul5SnQm0ZZ7J3L4mSvD2{TF<G4-fibgo!a3jWl#JfgW6*W5w<5+IBv#z_ibo
z%gQ#vnphG|V>*g5BwuxHc!y9JA-Y&;r%)>tx<DqW*U2j-OxoS8Rg|i-qZIOz)GZK;
z3>ZmLn)I3NyD~Hw9kz?rTCZ2^7E3!j%64Zrw0yZ(%gdc^wXj=Jc8U?o14mU#-C{@T
zDV?1jEzchmq?f07Uq+cp8mY!wb-P-rZ5Jv#<zBZ^*{&98`E#kuY*i@tFh38xTw#WU
z5;e)^wrl+oi)Fe8oMC9hHcN3-7pxi)TfB>e8K_aeiOG;zKElSS1LIBM>Wb9m29U@Q
zlGr|W62@UymMhY3snCJxPLaM7IteSr6P5!O?Nt>RE~!(^=OtPWNHx(Zf)ZNA57DrR
z1+HZ&(hv=2pae(<YWpggwp0Ahg>xjp-zk@C`O>b^1L@S+uE@gRun}2<B0%3#sibtv
zdAX{v@nCBiI%f2gF&gaOp>-FI3IeTEYn@6*sQ^bQo$a0gR2_Cf9s$m#)drLXxhI#T
zYFX+?+wyjv!S5^B3zPaZG@X-zJG(ozYOhl&C>^C*-etqCQymm4@ac}E$Jqj<cWXP_
z+dT<}TG=jFr<JDmXV3ylRJmBmSBoV{+3hjfGk?go)?2HhSllgCdWBA*Cj$f0{2owQ
zqzWgLg~)EPRNIB=Q|VQ^z&i{Pp#qdBm1>n@WxJ@LjKFmON;oM2+aypR?_RL*c1_-q
z^W|c%2qVa-AhCTLjQ_AJ(NNg(V_Uz6xv~pvx2;sV)!z0_SE{hN*kvc8q_1fsVtfU?
zo#C-lu9YjjVs)olmGiqyF6aaQ;-GAHAJE^Ve{cNVAds(X&@Pqr%n1(Ius6ZrSIYCK
zuc;XQoy|LJsalh&+m&v<(y8|NjJJAxuS}THc#Wl@1f&M2_tXX$z^Hs8-=m+|m?h@c
zw)2G&EOy%!WxLYLL$4`V?i{617rpTqy-Oy#C5R|OEL;F{BtqkGi5SokyAZSpg56NN
zRD?Y5R7t9k|39QB;i84uwUp<HK`LoPp;Frcp}Ny6Rpi|YL*WwO&}qo;dq1$**x((e
zG3=3C)Zjtaw1H)F3+u6=cJRr1>cquv2w)r@aT!GsZ(x%^%*sx;SScwxG9R`N8orh5
zENZ&a;B<moJGI?Tal5!%mUap|j4c7niPUe?VQgbKF^3T4Pr*uk8iAV%90O3p#Sq=E
zY<Y=}fw-5)MIA%CGp9g>JPx(5LyHY4)w9G<=tRdg6|sWla=QZhg;JD3;U#==d_vkw
zL>Y`!U4=cPqhpj>j`;%os7Op?qVPAzCr2*O&=iZaC_BvXlup%QSI0TT7F$v(c7U%i
z15ifJ+Nsp4#cc&7XRjkw+01WD?g(-+%!IDBTKDKME3oa7kusaiIg!g>Mv6Qz)@Xc>
zU_qq81SoVnyE14~vJDni0n*imf5({eebfjf&}zv0=J`1g8emXBLv6Up1ibo631M&(
z>z5I-Gxd<TN1NQqkfVpFKy^Bg(CI;VK-SV)sN2=rb_F!p{I0a46oEUc1tz6Sxe_x5
zEUdvQoUe8Ay^;*-St(y+tbVWbM`-;|mxn%eLR|Z8XB!rs?Q&;(XIJV<yG15gPnjCp
z6>W$IFx}KqABLE6oHeN2WlTn6#jn)xbfBBjuO*lzU)d^LBq=3#hlBXLWPDZneRA~S
z#z?R2K+83yCwD*-Q_7`E)u}SDHHrqjmu$aXi=ml`5!Lp5r*4ki7>X6mWB{F}lEg}o
zo>gc9=7Q8kB$=77I@~hteG$HCZR=#?wVmDVVr{3;t$_YsC~R*tA|F%6F!dHDD3}i%
z)-<&ow4$bSSnJvP0RLn1Bf3-oO}QTNq-xA?`JP+#j+l}$KtgdpT3cJ9-g(GPfF(?i
z?ry2l?G+1Bey7N#-T-$Q%yq!A?&h!tEaq~tD_5$@cBxacIKK$01n4{5ZjkdfWd_Rf
zlc9v+i=la#z+D4q2x_~LTiY#^VJhu(cI0xgpp>dIN1+eo9jMRH_9h))dc>$pDjF$M
zv7rtn{lzh3@$jFZI?nb1yM^s?ty?VabmZbrQIZ99b`2DQBB=Lyn8#ADCvA6_N{$3p
zbb1sylu=CH=?eGmHTQ7LOU67OAmWmNP*G(l$gr;~S9i-1ppq^gIEdh3xCHP52PMZl
zD?9(eUvUgVLe77RrK<b=w_?7My#Mz0@!$7*-lYGZy*FKGBi9;5?{7ba+MOM2@C-@1
znf7sjq`6_*V0!q{Io^d`Kn1((&s8SS+1&Rz&v9Ptu0c{sHF#i1_ud~Fz}B#|w6r8G
zNoz<tbx$=##ei_|&vqa`t0ss$8o?SS^*EIEw~PicP2D3my9aM5oZ-wIIXtp85-*#E
zA9L~YZHa$x@hi$!#UD_B+Cot*ZHT5=L{*QwG~nIA7&idEsO}Ab82yr<-@g2PrC(-5
zv(PVHsaFN@ws2MQ^|vj^^V7YDzl|qs*;X<snzi^`%$SYIEw}r_;tILA8=OnKK7Kdn
zGGQN0NLN4*>(?Vy-19<;g3WHg&W~=QcL`{r3bmDTID%@ZF9>eM&@q+`f>1j|rQb#c
zNZW`1KK#EThond{)&SO4M)gWb%OdCq6TXgzRf16zMO{m266<<^i%}7fNz}{IHzjUq
zmfn`O3>U)`ouuxTe*|0+!$sMRst8S&wb}1<(KDSHoq$PV#5SPjsDyW}JXpFRN+m%d
zkC?U9SG=sxM+5-Q1EwP~a$p=)4nslp@rRE;3}2bms;vksL<>5CQ<@ZL`$lvz-}roG
z2P-svz#6Vg<)n~UNs8YY-BWK=$Ad4d!~H8H?mI)}>#2f|(S>43!#0B1KgWxC?ET;3
zs)z0WaK@^~_Wu?5_q_jqi$6aI_?!Dtz~5YBMbN)oWBErR|3hL;$p84cri$QC`lhSZ
zYDQ|!EFFDOHPn~aE4h^)@XNBOzlKuFg1*vcqXqzYo~nPD`QLN?xNXmI2$(YeS6<a>
z@%P_)?OFbRmjCZ3|EIYYXqGJN|F`znt<0!Kn$@GzK+`@ByjACcEqNedQ`4a14BbKj
zfh-oAQZ~_ss8;UVC^vbSs2~RNcCPR?9l%xuz|_nn(UR2}rB>q=|7tZir=3kE#Z7`!
zNeMs@EwUpZ!D)qxyj`0Bh>L6~Bd?}?=6P!$dbaL=>Peh=5+|NF(@#A0nmY0{Y9@p^
zk0-U*`Q-6VC|T!|h>7(fWu7C-B!=3w*rpxQF-B@O!jUjk5g({24g>&02+X15v=!_K
zWhq5#n-Y=9JlpR3SA|TTS2$6?F!YhZKF7t9@bMG;rV$Iicp=6W>lT$79bV1D(8FJ`
zYGKz7*9D9K{@X-VOmPNF<<*)|t5Ji$DYY6U`j=5_xdwkzYAu)OUq-E!8vISE1;=#!
zmr?6g4gRLodR3x-8MR*5;BQK;*CqOwQR__&{-)G=Q=)$vwSK6<-;`QEl;~eZtsiUf
zH>K8(CHj|Miw3!ttVOEN@Gqkl{Yj}+FVVk@S`GZ0Qmav-f3aG3u><TAUmNHJW6Iq@
ziLZ|)ULW9(@SENt&_WPl_<WV{Wb*%vmjF}G|FxL^XVg`xKg<8$;!j=RUpQ?G{9DT{
z3;bKltqA<9e%Eua1pck%UJLwN%e@i!SN*Q%=pF&S(@g^WhJ}>>!AEF;{=vtF;D5%)
z`t#BL-)#P)K2Lkt8-$hMa@MKw=GCjL`Cost{3ibX_h#ie{`0pv|9@9K^Ze?W6I{YM
zoV|<Du~@2G5F0>-hn`kY6ZA#Bf_5q2lP-vIVob@y?s@K)pPH?W7xf%f+j!B)<u0L?
zJKD&P+-s4~k*GkB$YDGP@a)*}qxepg{}2l=Fm*wETEoVD+U6C27yLnp-}6-J_k14B
zU_5}~yJB6upf>Zi$db#soy&nxbvMNe#1MN>;iO#038O?5RuUy7j67-_gdS?zEl`<-
zyoPt-k8wU21+Uy&;f%upSLh6S<Gz1Mnle)3E+4g(tbG%Doq*L#KvqmCYNF(h`T}T=
zjSNPDq7J!T;rf@TPk~yye#m}$YL(?ztatgw^GmNhKMMy0%K@`Qej(2VpoRQG8Ya-}
z#y^RQX#*hsCBAe-x%W?rG1egsm5*4jvHAjjE!96sk{V=RkXnH?kk)#uNDvO5gzk?>
zWd?a5$fP3EH;$|PAj;@u-U~ux8@t5!nx|0#3*CeQ!{)x1n!ZpI1<ea0GRnPBtlV||
z02LYQjhZOSG5Zlex<qTd&tB>R#=w7N8lwQUfkgSg#||k#)+7#a@=k$%4oC0M^@XU&
zQvL;ENqWUO82LEe><&H2`ac@`Gp_tIt^co=m*e(7^;fT-?|=V{^}iwqm)^9^?^@=^
zQad*ezUq?-p6g$Gqk&JhBiGKzW0hR#^3n}gp|}u7$2;$Kf8EGmvif5Ayzi9rl(Y5g
zukCkx&G#o8`EnnU^9cWt74*1nc{~)TItPnJN2=Yqrur`8aO3eA63=a{?|kLDLxGoA
z>I^5G0M!NE(FLg?D#deZArN#DSIUa4nu@A`+R%+PYAwJ)PVI7aIFADowC=oE#3k<n
zuhSNO2h<l}kEnLt>#9HQ_4KYb9s;KBMmQR~IeGOZlg=Id3cFes$RHh}!r6kU6C<b>
zy}_B&YdfQhU;`mjJ0Hah-RF%BfgqwSn2|W1mpXYsf<bf7vH%y3-EvWRd!?uj5!fU|
z>=xHMx$=f+Z*c7m#zF5^X_}g7<*CQAhag{cMV=PAb};JX4H@I|0v&LJ5P<<)g4HU_
zi12KTc3c$0h3fE6q3bIWce+Lzff~ZF0Ml|_rJ}}COE|#srho%qH!hZzsF)51LlQRb
z1>>Q-N`?C6bKnJ%zVeoU)W43>@dMYB!qJ@xm6SWB(YYViH~z`OQ61QecIhwwRCx`R
z*Rb*wDlZrSjwyd^SQt4Q1%!$>Soo<Sbl~3&657amSriIryeu$CrGf{amzG2kDw1GG
zu)K{IU!9LHyia!k4!@ky7nvRQhZ5C1`UijEcqo*ec}0OADVQjH6F7s;SPtQOgv*N&
zO0EEckwQU|y&_WAAv0%QO-o`W=?e01*zSx+=p$GVeSs}Es3dEyqaZ9utloIB%w$B)
zWn%Eyf<X8J3#iev=t+hDNh`n$lB4aS{}LUL)XE)EUn&2S(c=!jz=~=}+XRF=f$7OX
zZ)2RuiK3j6<p^uJIvFd9UZ;?<*ZI_iiip!bM-hkDD(muJ<?w<<UHM*7Ualh`n?|b@
ze$lmb6a*p?WZB3?kTgB0zV5r`?)Zu&{`D`-^i=J;4Wk<87g85d4BClRBOG;B(I5#l
zi|c|YA2vjJ`(S(L?df|_?mHvcUVM&J^Fr^GqP!ceIdZ7*B_3+>Tj<6MX)sZSz9Od>
zEx}L&qi+1OQl*@K>Zqx92RG~vS^y*!^K1yGmSAS8e5?%HbGTn*;tOm%pNzk%ieC&u
z)BUtnoVZ#|-G6f9c`gCJsquf(c5csL6Fl(+rt-0YCXsToU^;N6!tD1(25fNP6wob+
z05=^8MD~@?veyXSo3v1(1Q1;F2!kp&ozKmJ8Lu_Qh9F+BG?fqXJoc%X-AS1pm6@Z3
z<Gc=gWB&U*oQ7=Eel?xTJ7VGE$5p52kNc~iJ}uy(E;`8L8k0E3L!y0*=YFcoew&7|
zO_a7#K)1Zb^fHza-d@<8i`FwJ>5>1A^INqJQg;@_!ao)4O2zO|Y_8QWB1mjGm5eEw
zAofHc@;x#ONOvY6dHF^`inHqFCZ8NU$$NDFp)Lg8VJ}wme5;{Rmo-R4(C1@SWt$p^
z)#;|y(N&~UZ>&xyqfW)TFicQXu3%Zky0aDQ#ELz;`*@uDU)DTnqaApFUXAbL2y~kN
z&&Enz{|i1p>wkWeKlchV_V9RN%7y37!M6A@reboU9m(~bVVmyIv@^Gnivdnf;!fl?
zMZC&-y|x*z)~L-=ub!y)YNmRPL_O3inW5rxqT;fyXj5iiI;Sh)&kveRPA_cZL^T{i
zDqR|MY`(EkM=|sOfB#+V`o24o3T0aQS`{BS_YfU@By;yU_r2S{PG%lV%7oN@lzK<r
zB2_M{*bPU}n|6QDby<4{K4|?R{IaoOqFjvmJM;2uW>FLWc+r7flp@u<MKfkueji(o
z)#=!E_MAU%XR6n;>lw6(m%L7t>}Qnh=acGK*tVg|xOW+t5%=tG7!5GPe;sm<)Nlk4
zFNLEN9FOtaSqUq@6dmcY&Q?*>4M?<)Njm}^b*>{ihy-G}xw?vJ1*deDqE@Ckn|^&T
z&^CQ&5}b)X_-GzX)Lz^#!NdW?*6@dL)<nW}VtTy~=yUNH>%k9kwkFPItG7?}T&(8?
zqO%6%v+AXoV@WB}C!NAY{#Mv8s!s>{(}$uR1%W*@R;oTl=yJ)@sv+46zOL7csvtL`
z*NGtpXi#QYl@T5Qlp9H<QCKgbE+LY4J#_|b;>eadGDd{d2~v85>$OT<Y0p^j74%mT
z^cXO@E72<i&9#KrLyXQ)cgwFA{Y3y%Rjo?*^kK@pInawI1NYY^@}n+|zCzjqzujNE
zH~+>#`D>Hsf_8tYDqNYqF12UNNX5!tH9l;R0@a6z7JQ>by<R_%t|aAPr}&bjblvTB
zU}u;>6DuXFU2(Ogx>+&ia?|X2vdaVjgazCx6FvWWCu~Qevrw@=4;A;RYv-DLdhEDk
zC3d@!J};&bSRYOA(PFti_xLT<<0F_d5ZP2)he03!69(emD|>uqOc|nR#g{};OD^rC
z<@(>XWE8P2h(xR0niaapA}^50f;Ai0&et_)2PtW{Gb>FKtJgfdtOd^lNssh}Vd7+s
zx4%ARYQ`e_MANzs`-;_&sa`3bO*Q}Q^FO+6Z3jJfG<-k=kjdx&`tqySuVe8)-qhbb
z`~Upwod0Ky|1sAUKx)u{H0k~y!bb*F0UeCf$v^0964lnR4WzY)TuJ^>D4_d$*qj4$
z?O&)@YUL2aKcAV&p&cMC%g=oVjx<TyG?DI)DV#;9t6}1hG+XmM_rmGi-lP3QoQQv<
z#EForyA?VepUrjrWeU#O9rydUZSB!oIfE#e!Hm%Y$Dxo_XJ)c<7HmTf>=y3GcY3R<
zkWlCrbqH(7-rRr|Evq6&MRw(o!(PCCp8N|k3<|%X15r$Ah{#)YkQKKXLoZ`9vB=9U
zSt!EHjJ_NTp<7E76a|Yyp&)jn+M=rA$3yYf)8K?r&3IiTQ5OS!=}@l-fkU{~O_Q&G
z6eOOD!k8CJs?MS@Egryy?fcH!N908r*zu8gFh0(S0bA255;m`XjfSKa!%#-a)Wa2~
zM*N(gyJz9=xj0-roVmsQ_&UbfX}pajdsO~zZRW_&AAF+7>lEPprqB{M>TXgLTG-(w
zakF@X&*k{dT9;Zfu4&w~Q3r3oDOPXpGrdM%o@!D}og86ot*OW{QA}1{+;1Q4it=LB
zh&<1n$KxE0$C5D!GFof3A#92%u+w0bvtiw1Yak2jd3J|GU=B5Z3}%_IJrwke0XbyR
zs55mk>9*2#3!FqYu;jF|0L;E+B%)tD@g8#FxV=A{%{jVsoCmSTq2o6SlNqoynb(yi
zzcG<yd`{IM&ZM%Qa1WSs<(VeRBTu@S_DZx6$JQ3<XIZxRy120seSVbvGr7;{Rani^
zf3F?x!P7BzBctYxe0n%`&!g$0@>oL#HF&J4q_Pd1boEUk{?T1aFBAdV6#qDvz{Y;S
zZcq`wi+13mGMy-x$wdk$RX8}^+e^L#$SQUiT@9z+c=h9t%Z-&EUsJ`A<97?3sn}MB
zTRe~X4Xk4zpBr~HD!iex{3YR8Z&H2d`XZ{&UlR58-7OEq#wHPcW7ASBBEecK&XHcS
z{3FkQf$t29B9=(Cbm|CwnSXI^btznM7Xr~4Vfbgyi2?yx&r9mP$s>o@7<XI7A`<j?
z`(>lHCcdmg@inou^d*|R|2W4r{EV4B;?QK&GABBN>p*4`ik~HmtM)n{Uw<-Fx&kIJ
zH0=bT!Jq{dz6|ouMuoFU`JIsgFZvOS;PgW*+7hoc<b&5c_d08l{<8|-B)oN^C(=wt
zlKd(c0*#2kr~F#w^(*y!-WlO4rUXLZw6jr3g>#dL5gA7Ac?U{%M%*)F))jQ=&d1s(
zD8mOfe0d9vtokS25Z`XZ3pAnvlGWlRmg6Pb=}oPCvicOueHG=BbYSPpU&@|eL{p7s
z$!NrVEFdA&4u-Tz4tQc)TZ=+xTZ@eZ%5Dr~ST1XDF>bfGT7QiLd!^R)uaV9x)W2G)
zGg=?7KE)BsMk56g0+c{Ssmb$lg?>+X-Rj6tkmyzhzH2)1vV_n2@GGm|(YqB)z-hd7
zHlh;z^-e)dg?FddMSQr{LxNA&i!?z9sP)p=>~zK>U?_~cuIS1{fS$>~?y}3NVb%?B
z)V8c5SLci>e5OFtIH5E#qkRn*I$GzlSGmL*m%YLz*0*XEDNa%Y@5h0)F2E0^TE$Q6
z?@kZ4isI|<i7=0A;&-(YnY`LyUR5>lxgIab@ws079`U!icCpq$jaM&SNC<42CTU>Q
zMR0>*1no}#Him&l;Crmi2^*W@_mES=3i_*8!x7+kQ2o2u?)kfaGK5_nxk4|&=iZIm
zjdTupJB+>?D_kO#7p;usb~>cdmqZe-T7R`rbThpF^vAsig@2#w|GQqV$K$`RJp2Fn
zEBOCQSO0uqP<Vb75~)`Lz;2leUXmBvGJK5@;M=?)ps!~o`9Z0y;hKr3pscHiS=#jn
z*KTLtR-<DeJ*7&0>h&5IRX;66bOd10<d_ihecYMwD1`V=2}Mih^N|-c<z22`OVc5v
z#1uE-*pP4vE9CdAQ4-5K!m2ncG{{34<h_bgJ8Ln&ONOMpJkhIme7FtuGD~`(e(_GB
z0vpkyUw4-mr6$<c(P_K2yT5bH`$j(yu%pOA<=0A^4q7c%3yCT+V(~r(aj6E82rv<8
z=9L*xrh-B|Im*c+KfBm71eeW1o<)j$6iwC0??OH|o3|VYqA$ef=&Jvr`X3zsvE8{G
z^xWX;_I`4}6#b7C*zV)@AMp2i|Nj<$;2@)IF=+d1HBYxr+9#(+M~BC)Tq1kxr^CJd
z!);w)7Wv_Xm<S{RfOfI8gm$XC|FyHh4miu(+dBs*yRF|gY87AxAyQaQly@r?S#oE;
z**e}8<-=uBzNkgkvI0N8g{+;EjXIjhRxd$5siMpc0!$a3m+V0|oKdrP;hv40YUqqR
zK^sH)RfAEd3RUK)aW?jP-D=PC$2V}GE$_`ywdb9wisc@qPet|pARN5iYo1`;3;0}d
zpz4rgbI#J$lQA5hc@(O00IRz^Mpf|wQ}%Y>&I{=D9S9lc-bHo3=C=;_n)};O0;zD;
z=?r>Kzgxl3Y4_=c0t3v_A9RT>Ae4H+T)+%`BrbEIVs__VeaR%rTu^Kv3P!otb1-Zs
zS^!n54jC0|>#Kvq_xt<CxBb?h;fx#RsLsPLAR21t+p3V9ySkkGE?_R#6NQg=j&_<Y
zU6#KJvz&4sPLd~g>@@Ux=sR3P+21{gw;Cm3!?oH<qG`D@M1AG&R+p@MlVABJBcxM|
zGFYG_eY@BGjQ1by&+8HxUVyYe7sXe3rNMP$w<l3;x^QV<tuq;@@>NtB|NJWWazjJn
zi<3KANVWTOqLOWMaylHkqm~YVxXDK@E(yt8cr7;x#EIKGZ+1Tq0M&o(VI0B?a7H-=
zmd(1(y<YER*yY3m3zpJT7VAZ03_tz^%nF|-LVYQ^fqdeH4@}f|&_V&BEFyqK3A`vD
zB#6kL@p5bpeyvj@@M&o&dXzv&h-Tq)35Lx)tBvRiPQR_yYo$i5xMtm#c~If=Qhg0R
ztW#gecZq#_FR$*e@q#x2oJg`+sFms<t}D_M#ox2hD2i24TZ=q46t2J9&`*X&cRcRS
zwYqba7i5?ZyiR1}gJj~MFZbNdn7;X23~CMs03O-yV8_sMM7P6>bh=lE;mP9|pxbf;
zdfsy`=<?F=R;&(4bg0EMOr=sWZt!~J;4*$N#h+`r_-w=1X2M`^ueCl2p_1yGz&)fu
z*b!z$6DUo*IFk8Qc_QOd#?2)AC{C0Y!uS7>H_Xsb#UJ?i_!r3QOuXo%JjA5J5iR!$
z#dxvkUDJlna+AFAv_Vk*-xBA*xbJ9m9sTiBbVWc>HaBSPS9b(Bi;N^|T0^>Ah116<
zr(b8z>DfSiSzVPk-Uyt?tc%@DD27MW!JtR2yIt1(BeyY9ACy+wECf~3=0>!4=)^@3
zMPEsMj5q+uS<?&1B*wxKF7O(s`xPIu_sZ2EkDZEQTIh42X$+g}uq9m)CW3x-v6oi8
zs78&Iu<_Iuw+|0?ObadOBhinW?nr+ruxqN#oyF~6wUWYjn%wqMXO05;w^7TK-8>OY
zY-3qDcNEFdRyyKRu1n=4H5D-pTH@PxYe0o~-s(OJL17vS@N)@1n(sKp3WmHb7zHa`
z&$$@*n21)8G{Dir7fS=UiWu0)#RA(aX1$RSL8h+|@n;#eoFxSanV5}Atd?4{?uVM(
zWN6BdeR_OOJa3Zx^Ld?j)}^J-(N<(UZ-2%oBHkbM>vpt*eHU$ZUa&s~p|9`sdZ?l1
z_yt=B*Iv?SlPi`48kU+sr7NqYrAb}qWi}71G4zYqOX{S(yk-)PmsnsfZSR=A<VKX_
zXILd^%8AFC8|Ktk^H1)uym76rZu7OK+Nlb8^H`lP@s=vf!m*B=vxM$D$A~hlba>^=
zPCv`1G`+<KE8V92K69h-Zd>i!R}CqxxA1hV+oo?1X<5`1>8WLF{o(_92mZ@NxQWUJ
zusS#awJR6fqMo$Xs=NS>Ra_)Sz*SIZoD*v8UN0<Q=%5<>vlb2$P%sA3-5+$Fo~jlO
z5J({n9Q+4}#R;nr)tYS;x^G8vvn~r?)0PKpqr3@ahvDes=TEp;*hx$EPl=Vo>LVAY
zUImrzM^9=&#8e>e^t6*)KihOrILix$HgH>i;ET>$ES(l<QFz%YCUSUwH{<C{OR>18
z2^evloMjQuW$hDs)a`TXr2WH;3Iap0iFyoLrn0%nEQNr5Yx9k(Fsbo2MhZdWfWXnp
zfZeJ1eK))ubOZK8U}SJQYwFZBVd53+(gKQC2BWTv&s<PI4dX$xb$Z;~i;9B)5K=jF
zGwZ;S3APyEX{*USR9cZE<v{-JA8va{O#hNPRr_RqVwuYr{ONnJ1YVWEXWETtA7q=!
zP<aGV-@7M{8p>=7iOu=EjKGp&P&8WExJ*G|7K6an#atT8<av?DO(46t1SwmEN;sSH
zR@YU9FR1LODnQ?%<0YXh*61Hqt5yvzO=}Iwn{VcIaWO+fY6(@HiK<OlN_Cm9rZ(7p
zmsAVHq;)kg*HvBXE<?CcpZVq?7Y|<a{Ob1a_WnnLuTox^`w?);`(GV>!^ix;HyY3H
zf8WObyIQ3uxx{mt9C>5R<tyCOXR?0STdBt0^<LSJe#T?OH)&rt-$=BMP2w{M{oZfw
z!SAh;qvr9+4rNY%6WfvxF#17l)(YyWhI?9ZCcM17euZRvetr3CyuRe*u|e@kCjcP_
zj&6fd=yuzEII4TYp0dKnOsyw|U(Td*7<@#DA1dnWDN%ru^`cml`EL{XXIB0;=c^V)
z;ccl2xp>au%(n}R%>}VorSc$uoR3@p(o_(1%3Gb%9Sw$m;BB)gY%?f^hUtPc&PRhj
zK$lP##a05#ZN`#oV)pg|9NPyXyaX|D3+*tORte0u84oEfY@xKE801^rTIZ}Q-S|DO
zKK;=38(&L{>pGvEO5(Z^U0`3=CpvjkmwXStqK@pt@lJTQ3*o3_qwprJ(EJy}U;nJj
z-nkD(anp=w`d>xI*6RiuN}$!;b$#~J5?W7S75k9Hp6E=?cB#6@Ef=fFVvA@U=ytfY
zeMR#m{j|&+=I`^uq)y-a3Um@n$(OD$<sq#~vk5t*f*e#RF5*Yzqe8J5<GY-B`{o4w
zLjL>yZ6^<FJP{^P93WI)H;*@M^NxL0?zA(p!0}FV&(!-&Wd;jz*s~OVj){A6RCT#C
zEO%GT%|E0@U(AtJCqm;)D(xOqR0}q7dh<$?acRk;+`aY7)<~xfbTrPH!)%`2nPihD
zhduc-?^V+WJaHp(zqXS@HxZA8nGg~7EhtF5OG|1^XCaN0fwJcw)v)K@r1iX7ec$f`
z7cd71anbL$mQ$8>C?$(zuKG|Ux#pWUl!~#Sji2>?j9-i`8UO4%!*yHD#W2s^EWD)p
z`bLj*DC+anZP;}iu{B0dz#E)^P+@fyMYjTWjhdE77o(|qizVk-b28r;8H1=%`lK1H
zi!X=?EQ+~@@LXcm*Jhd#%blWH;{>LIb<zoDXS&n3P<$FYog&P{i?K3S91MbRb=4n3
zm%PsEDj|?ywDC1m9$z;SU_mM@Kpx~5)AJgT*C?7V%=0m_u693Oo~!yab{^Aul|eCb
z#M~z`)gR2mu|-t1X*M+*KR<98%@kzzTYD=p)<$K@Y{te4Gcfdi*qg@AP$a4T5cXDA
z`O+@NS&$<+3(jbj+Ke?CR*k2TMyVaO_Y#^9mAx}xI-dq{Vm{=%`{93o{!4YhY&#6e
zym+7O{<k-Nhy~~r`_ETzYRhr|pP2o8|NCw3f7vDj?^dnRj_zc4MRgpEs`7rYN-7Gy
zF`6&PkMQXNrV35~^PbnZgT*V@G!1lqc|SQl+Bx1kjf_gH+|&JcN6oDdDJCZ|^O9$^
z_J8w>)$|AohF~IfLD!P1%X_DfrnGEnDs!%EmrQ*1=bOjZW6soKK3z{B7-=r|#?$nc
zQPmD+bujL9T%a&|w@ukOrfxo#Ppw2<@vgbMx3itMw=3R&^nOGtyMwE>BoijOKDYv^
zBG!Z6wOcqVy}Zfn+u$m#Kkyx^&f2Xh)$_^zk)E+t=_cK0$X`P1+ElvE)VVfQuK(H^
z*OSz&XS)sm57mIak5=)!aj$peUBfzK=@!GvGM8pCuf=nyKF=<SCl`WQMeA7>f%J3X
zS^8{FFMN~bE+|z)##B6v3piNd#&!*tP>nt9pC7^cua#rvcTsT0;h?CrWMR9yVI|Fp
z2*l;DSr9lu(GPZAt&PmzMZtH%So^ZZfde~%(>s)^deLRZtdRG&MYFZkOozVj`CfnA
z-*zuXuG?<5+DCh*C++4vTbZ$u=A5))Bh7T!TZ3M6I2;XbtVR+;clWsJRI{D`?yLDT
z%lWTSUs-t*xBsuLEI*(Bp7nqJXX*bKhZ;NydH(rebMnEpmZ}zXaPVKSp2J!boeNcp
zczyrl>$=ms7>vB|(sc2VH=RA@<LT;@JKGUB^ECVKYNIVT+5HcD@+B~$L;tGR#z78{
z0*Rbph&S7uMaJF<k53p{(F*iv$9aT0bnHc7F@+=w@YuTytPqbQ{7yk0fsQ1I%eP!Q
zqhKV$g@c7PQ$Tlm!%N3d;HQEhLwDVvGjhoHDy2tBq}9Lhej$-T9x!W(<VJcjITKyj
z(qNEWouJe27hd`$?3jEJE@7wSn=q%V&o2r0QUtRUSA?eGZITj?!jY=q_An6XF7WD&
zkvhPTqefx^*3b_}E^YQNdtLl_=7y*dM&G5nc+`~vVAUB6Zwt`Hl2~Xh*nKklQ7`GL
zGhnYEY@f7BXbav($6ypq5g^<t234Bd4_9#`yriIFD5*&|W*HiAtN@p#|HkTX8QO28
z{Pv%u`zGhr1S;tQaWo1|qkySRmoboy8I%>{<mPeYyq$Ra+a)zlvVVnbT{wa4PNMKm
zF-r~52=$76AmzFlu2zT;v1;E!T@VHRM(q1??E8v!uAihTh{*Z=A4l$Ji!Af>w!Ri6
zZ`O@3)s1?kZg|k|*W;%;KrfzTDhO&wvuG1$7H&kRFg`M)i(iUw3ANE`>+r)pP10x8
zJdCI|^A!v15BA9Qm*+A;H**>Y?CD-km-Vx0W9~Q8vp?s}bU97aX<qaf{&M)63HRvB
z!k^ih2X<f<{J)_hXd$i%T3n1$q^6*uDhSx;b8$I}pf*yWHWE-9DNvJorU4O~bxWbn
z6vddcITd3x^cZ8xmZoUGoK3$ZO)4}_Xwu@P8Jb^sQ2TSYNll7r6rSh<KZy?R@0GTD
zGjvA)&<~y7f%`><gXS%Qw!`U|&8=BFrD2mWr5SK|2t&++hvl5*DY<0osMwf1oONu4
zTQ{cEY_D?FMYYb&pg2vE2@WC2B+#`<uQAi~s0%SgYPF!vwMwQgl8H5P!V;O~$RL&;
z+udeRskUUJViMki<)d^i!Bxo)!9Lrr9fPLiWX+k%4U{gIb^uvPF38z<t|Z-%ZDhJA
zv)guWys%K80c$1SXW<a@q!ZsvVwo%Hne#J6Qu>^mK^ErPr0>Dp;$9ue+yx&~7Jc$=
zJw{h%U#S<Xk&76fQ0Bo?U4&(eq_UEjdgAN^ViC8TMLZTR_Lt2AqXd$An2hpb$4Mq>
z!pja(G3C;ZKsPjKsnr0xh)(#L@U^3+v}tX6?sz@7Yjn(r@5B9M9~p)Ia`Xl*Gvqyw
zv303PMQIH5@I!1|O8c5corWe)3{7z-vMrWoL?crdU><9Tsr;2`1Tp)KM!JcFnUl(b
z(@X)V7SQrxgPD1Vd=#6eY`Q+ko@sX4+tyC;^i;arrbJkVuzM(><FjUp@sQo_gO3&w
z1YDnxR_m{7wJZ-fGhvuWY!p*be!$sd5>C?2>)t5Lc=|~!#iZYuPPS?N%)>tIUb~}w
zN()m_oK*SlX>4h-XK#ij&Fu3s&CVuDPo0<c;p0)~B7wFnP8yFq>oVGrpBs}W`TWx*
zPO;hAnd>NiZ@ZcQV3$?1>3^{!a**#spzhpCU{maWUau^_j{ASAy?NIE`ZoGsQ;cSm
z?oZztnt@&1o3MK61efht`o&;y(Q{b?xILkO<g2K}G&QGZ6P>?^?<0U3dL0!ge1rwo
zVu`-%Q%tnAv`}>?w1~9etG3Ju!twceie*l^O-{m6B6{M9J>W+#J><JT{`egaZx>;<
zi0)3&gShX0F_H`F**i3>R?Pd2_@#y>MD*TPSVYjpWH<=)vRbH>up?!VRkQN3KagL<
z5WRKAB`Z7ueS^t`aRum)+9TEeSf<g-#qM(RC`4`WT^pcU-n8!kO$#OtgnQwJIBHU-
zB9%6x>!Ocl0=PB-Wg*U-&eduHQ`3@$jP?H_-k{>@9qgP%4Lbl!geOV%*;hWubv46w
z0P*dP`~BM_!#<1;320i(6Ppjkl9<AYaULX|LYYyGU9!ij2=$u3(BK!?9)>W(grXM7
zcx@o7J}*SNzSiAbTQdT>@KUnQW=lMAQw--$!tTB^Y>ga%RYHL1@gSNm6hPb5ahVb9
zdw={B>}M&UFZ}W9s@m)B1)6qE(k97jf=Wme{`)t<NmJSg49D|^D+}Xbc|S_w>Jaw~
zl+NNwPq~WIC5xMfRD}9S%dLTy;`ofCi8bUL!a8;FP0`*=uEA-~FvhB@o%_r`5R>`p
zKWaeC3Gx>oknCCbpEx4&J^ULD$wM7x{Es~U-}mG{%lUt~@un7!|NiFn>y_v8|Fiwq
zv;Sw)(kta|;b`ah`0y9wa)A>>V`auWkjBgx7RizyKIlRpKBT!(0m2?0zTex4IdtK~
zpPDB>rTBEox*(eIszx@n_0!JQhxY!?z8S$-C(;g3U0keM!7kj+mFT;D+zhNU?Z2Z3
z71OC4dvj=bn__D)7y&~%A>PfT73G+e;%5r_0A1(t+CDsJwRaEpb`N%BNK3jPA!{g<
z1=_CRTE%GNC7j#fi?%dlurCGd?s!z52t0z@?y^SQlflObQcfp)GWfx`@Ba6bL5G6|
z&61~UfF>k|fOzUgRP2^VZzbq>!`ml-OBxQiK*3ykGVlcC38D9gu#N>!*Z~+}SYVz2
zJZ$xILw(G<Lvn|MQQdD59qsrf!-mViQKCqWcL+UkE!SLBcPMC((cP9=c?iw&<qm$L
zU1)UelAE4IB?4ICKTUfps*|@Jl%&P-(vXym!W8e$940IPmq2L0gnoOy0urc%KlH1&
z+J%Jo`N2ocHB&dFAls~T2JZR!v3p+dK38tg(iMK+7K+7<C`fWl_hE89GuW7<^l%K~
z2wqV`+32xfsje+8dFE5IP5L1_zKRi^Ec#Fp%_R8ZhYy^~OrpnEg*B8hnc~UuLG+Di
zY+sFlp5BCOU+QIm0mF(>v@FIV>N^(t=zY=!Bt8alG9y?QBLdyU9g0l}AQ<mn#GZ*>
zCNNgaH<j}~7ayTlvHN{9$|&K*^c5y2MDptWZ6KwkgDpGBU7OZJ?opNx`=rriLYZQc
zN25$5E;-{!$<$cal2>CI|D<3ULw};*cwFPK^5jlf_dydWsHM-DX(+4ALd6MGNR?3M
zA%v{;NOVwA?95c43bS(36apAR?mQGwPdc+>k%oej9cSn{6`gtZ#81i7h9D-1&pZ-b
zk5`n3ks@7KpBE8J-*&D=CS6rJgLPuE`D~2C4(GYj!)O%~eP<vNw++Py=kR0J)+5ug
zM>Y$da7?f|GqaU2_YVQhOW7yx86U46)+wkxL$Ao`hY=uic9mC~x%l8?Vw`~<fSG@y
zS#Kaucn3r$t#N=JAW-86)`gDCIv0`a+qem&6UGh~)heVh8@z+9ztG?d`0Ao1Z7o)9
z%?N$aG0JF3l4*`$Jy<eFYU2f&mk$WhS9wmr-oV#2DF9+k3M<TDP5EwZF7iJ37Hl$w
zC)%1O2#vjpj+zGDs`c8vh^r4?lgYbuPx`)0YolhjQR6n|8j1Vd=S?1m=oDyDl}~~5
zjZypz1u)bJis?QHoHM>j9Nf)8PdR9QUi5LK3!j(j#l^G(C%~L-64|4y)+DTMk2Il#
zr6Ei4!xInq{Vs^_KlS*vH8vFT3HH$@pnB51?;OlxNYuL|(BFDm-t!{-QT8AvsioVI
zR<k5b)ia?-8UPlvF5=Qg&@hBy0QZ?%*4R8?7A1}`-{4{4+YVZmmed2r@17&~SY!4r
zo@M@wF-lRK`nMe<eMV3^^kTgOM~y(Vq~bhOX@`SOFe*gqv3`3r_)?0y=rHp;gWjZk
z|BsAZ^ZlXeea<K)A2XUaiTvpP`R(xIr%BnLGqYRb?Tvb%LBSt4<}nb)ANSAPkr<o{
zf8dwVQ=?G2)I&%Il3-plqF8i4kGF+|x5E-(C6*-lgbs2QdEFydr0|PO4B^AlQZcfU
zhaLe*KKeA4pO%(lzF#`yKFEwBymYa$MkySingNkRHC9mf^EKcIu0`#7*e>D*AFY<L
zJn{$nyBul`lsI|lDV4z(#`g>sm=^1q8v@jzHn0|UQlbB4<N=MFpG(afY;OP~RC;@3
zp*)#9@-0n3p@>XtR(977Lhb`1pkG!5as%dlit65)b%7sr_6_8kH5pcHG-5=C2^DG*
zZ`_Z@7^MWjM9BmclT8XsV){S_U^gNmBGVU+*g83eH3Dx}&^1x$4SZ4f;)R!DI0ym{
zEo!>sVbAN}+$-Ypt*vT=ToEu(4S#Bg%^bZ-UR}3`jyG!e+#rw(1Tj1&u0w@ZW0OMI
zWM$n-3dqF^#qbdnTo<?ei47w)Cfxk4#zU=9!H7I8v^`kNI<CTGH)El)){<S(%w8$v
zA1L#ATA3EIhF&#Hbr+-CyQS792xL5Bnp1X}PbN&jUF7UG-98`>+Wf(pu#OWqVCGWY
zWKnFb(&@R{ct+o0LCc>%v;7DIU&HU~3Lbv5(9qDL=PG&medQ7#6o5{jKYxNbjK7LS
z(`QG{vgaoMs08cGz3_b4oahU*3Lj0bBAQLZyQJ2JB>JV;GJ$^>OF4CLwo6{K%G|`t
z1THX4*X$Ahyi4w#=?psVx1$+08;eFvn!cqb`E)8T&&JJhe%=-Qz*t+$*(|4%W@ThO
zaIQ(r2%P3$mG$M)>$wGMWJPQ|0V>v{QME_0MUjShEbScR4Lc>Rp_eZ;vur>kv34nf
z32t&}4Wu`67iUDED{|TA1oz2|8#@D1O^^{dNjJ5tYTkSrt))j1yXCD92*d|UtKJfl
zl?H~b;$WezJe7$^>e3J9y0Mad<yF!h!>*8n_DVbMU!7AEuGlFx@_C+)p+-dvu9R!G
z#1_SfJ*;g7*Iro|@*}}d8KX3qAF;VlT;-mp#a#T^?Me0GQW=;$3$0~kXqQ9NOGu|g
z%;bn&yFcv`be>x^tUd2dnsr``p4sAv;VcF4Me79tcpOJ_`XE*ppHkcKn~hmVBlNAu
zEir6!kJM}f_2g=s-*BuZ5$OJ7Wsq^!v6^=j7OPLI+xm^hfVV(9guO9?Vu8LlVH5@k
z9lGR>=*guk%d`jQc4_kP55xL-03T;>;4izpmUMR>IDL~QF!+$4WF|U%JVcFIHS|sF
z|04gh_b~yS;{Ru5<<)XL{@e1KH_!Hee+BzLYviBG0x)ia$4Sx*@0-V9d-g;0Z(!*5
z7jgizJahb>hr~wKTJld6+W6}X@o?q~fO9`b+-q3@F{Ezw46%i0@%9I2W)A7Kn&#}h
z8V??_XhaQ33-2*g2$LBIk{V58GD+r&LNKFe>&Qj*6z|lt_bl?b9Q6#q)Dw=D@3ML;
zTYpOLzz=otJC9G_8E8h%VksD4nmb$aq?|!3=orxHo|KwF8#UXT>OAgKXI&w*|Iy6;
zBFgvd@c(RQAhXK<9z{_fJ%RjRUw#8Je@y;gsXzPweD?qGXZZj4Gon5}pE&-FTx+B}
zp_NXDLwD2~d5(WErmHUF+yKd3cx@tx<H47GynlM@h5wof98_47@lcI3fpx?v=@U>%
zAACw871~P-3CyP^_+Xe*Bs^HEOFLFGmyi2mU9h<>swJ2fy^*pFzde8EcV|nOU<!z9
zD3vs_WtSv{8c2#zUEv`9F0##W#j1MxX0y}l?;;Cu?jC7<d8N;ERcs0dI`cDUW;dd)
z7qJF#Gtr8l%f7|YH2shXGY_VA`T)lcuH?2;vZKw2QZv0&-{i9(+fI|tdh}JCAvV$X
z)m7HfENI|n;hV{Yl_Lc@+jR#%`UX9BdI5V<c(jxAWU2^5VtuMJHrYVyUfPqu>^yss
zn9WFA_0>L<PC3i#pa`iWpY_{VqBcd$@9nh9fPRBc`mqz^4Ilse&gjbRuE&?XLa{<p
zXOvVp%RU0&*w79B|1Q_(kJ~0S1a4Adi{;ahZafmwK*Og(`YD60PU5<Z%?e#5^W8=Y
zJ;^!uUv#V~MD|}}NTbq-*x6b#%w@ge`EX7PTp8VR*FWkx9k*~ZIilsP^vvT9Jw%Ab
z{5|_%2K~tDMdN<Gn3eL6&<lC3_NXD_CQT~7k(=W(j!DZ~PA6#hh=T>iAiAk^ozTHB
zoL>{s-DJ5awwpK`xz5$v-GoTR^G(L54c|41h8g#EDw}WHf>A<Sp)u_$FS@6L3?sCj
zscWpDLD^pm^0Ev*zb(Wz<l%azIJQ}MiM2o0K1pfCR6?Ag=3A@gnPwOS3JF+ZEg4sA
zj2j%@0JVuVR1~XUODv;3@ghARK_;%~b9S?l(ao8H5<bQr@EBcxk|0}?=2*tXa3WjD
zlG$=(+J&)}BIVbxCmnjnRZVWBP+zY^fhV&|pIN1o!X8%C{Gn&TlglTQ<j*ve8l@)}
zOD1vF<lA(U1M!?%Q=CW_%^+ksy`C0Eaepq;1}#tw7zgSm{_NIe$6Uqf{y~zLM)jJ$
zFVfcmJi}Jb1)5r^FZzUP*h%o&6$;|fSACgS@McQER0k$$*H!HPx2Er_483zirXQ1*
zP&!S~Lr+_{?ozpB<z{7$;O5VR(9afEA3P4bN3#aE%kbaw!1o{iy|wRLxqDs^29@w8
zd}RHvH_P?J`_Jq8^ZU;?dH>1bbVaTH_PfK=gKg0|KHVu&;$HKpx%DBDOl9|W-yS#V
zHlGc}Nfnl(fLWE}<$BK0>0IfORFscNSoB2X^N{$xIQRT+yXT#aoY8FovbT3n4h>i!
zL3Cl$Td6n7?+;E_Ue(IK{_wi|dZh?ez4K%ROc%oGcH8vc;Gv0SP{^0JSuazSm(IxZ
zTs)52i?7DFJNTMt<_;^;BuqmgN1ej%I~Q(Y>k=e{LARKL!dpKb?(H9LOG-E4zcC*8
z)f@@j_%eH<AVodFYh`-iX?ML5ErXsL6kdE)C3g?D_D;76_ZhmZYDHa!M4KTnB|i8X
zxT@dW+v64<1d~Y}r|fC}96u6ePxs#$JZw{TA|X}9>4a1z=Od)5HL24IkSeQ*Hf!~C
z`kahGYTP>&D<JG5ji8G=$9t#q(@0gEP9s%vJ{qZ7v(QMDHE1+jbxk9iNJmZ(dN;)w
z<vN3~a~VtAcSA2ALLN0wTD!l_Pe)mAIvHg_i;6gaTZgT!pQ45`M}f;()tPz{aJ0bc
zwk+aF05<i}g)bnnzth@1nT`V&yFUtC-@rf!#DNAePKOy5@MHsr$J;x{y9e)Q;yMYy
zV7fGr89X<UU2;j1&P-6iOib>0f{K&alM6@O$=F-S<*fbsxFtdVCPpF6j3{NMafym+
zVwn(fh^l+zcE+J|)^iI|Mr8#hVSPijkzx)6ybCu3{QN74Bzgvk)w8ikYehHEo(Xss
z+&JLS4MOQ*2Ucb4+r$>^dWmKpOaDv#Zr@)4Y>NE<=5_6LLjGTQ^(_B?i$B$>IH9W-
zzJ?jW(GwMN@c7Hme$KVy^^j3v0pcXQ8!BuS#kL#3QX~#ecMe)7P4P?fLvz0*b`Oqs
zn`Y!_ZxDnd*Xf&4pwE*@-k|UJo)s&aTC4uQPG@lDCDVTzi~|>>=&n0TX6}aW2#y%n
zE-PW35bp-ba(ky{+Sm`q!{J~Qy4@=0^JV4EO*nEkq2KZ9Cvy5F0ZS?CgyC&c7@`U7
z1fx!bv&6QF*OQ)qQMok1^L&i7KkVEj<T%vLT-<mt2)k>!437cX$}FhOA`KZWbtaAh
zT@qV^-agyYHP?ipY%;m<>MB{T3;LO&qoOLPn{f4E)6{jCrJmAmIlkl|9JHnW+7)Ld
zbcz$Q-=4URO(`6NPLD!f3-Uax#(#YZ2`~UNub=W}RkFuC^)Lj8|9O1jgzo#1)Ad|G
zoKn3c_6A=7>s=qs9x(tjo;uy&>Q<Gdd~8NGB1od-$;p?oH|=w;$K8cw<Pl&=SE(xr
zpFQwmkZe4a1N#8GNuU36!|))zU)Y7+X}{h2>3FBP-9C7Gj4x0tRC!UL?i6{);uZz@
zJ~Gu4ZlLYK$nD}1#GNhgqxMB;*n{Jh`?&9yQ84m>{mHqK6AJRUjIW@`ykcE6ioyxV
zA1S@-pfYG4XyXJxxeJ&kCL(=q1Suf$bXS*4xiHjBVJ7wlPM2{kG)p*ff{%5Mm=UQ}
z_+6Sy7f4mvV&1&gBc&)W;K94uAbd-Zz^@k@QbmeKBDn3d>2Aoa{^UBaFcxIPCCo2H
zup%fNGabmI^H3iWainL1gF#4pfy|38=;>)#-ZPf#Yjgq0U?f}#g?icok`70MYp?5e
znVMc8d;smJeq&+`q=E>Ti|;l>4Iu(w!B=21KtkhE5(B??OXJo<env(Nu?1!tI$}(j
zXXN%t<dhdBbWFqds!i!;s1mrnbEo@x0JQ$ubHA+f%!$+h<CnS^1{}c{0q6nw4f-Qm
zJ14CZ@$T?gNE64_Y3mTS%{^Iw=Z_kMLv-kI;{D+=cQh*lm7ug4$YF@er7uw~F_Vz1
zP8t=|OB@+K*9*<!?4=uP5vDRED|$2|UwecNLabn#iHods<c|7dc6TP*h%f4-w7QA@
z^7`a^n5yA7>G!1Im+Drs^59a0;R<2rKf$f63&qgYn_t$25p@_GtRof)$ta@|jL+ES
z%P1T*=xg$*baKZ_4tS$vXnqTgwjDo;#jCIO$^7i_c;6sV@ZVzxSoMG?r3oc+xrhsJ
z8$y?XxUiUORO-usT0zXkb>v-KhT;<AR0S{*abdL9X~4M+-JvCH`OS)67--$xgmsf9
zM6|B)mRUHHt6PItH@prITIKRg6HdN#z{YAOMZtn;iiP&`Oyn8B@Lk3F!&sr<k%q3E
zxA-8`gwccy!IP4im}RK6os#xF$W{Z2pDWU(Bs#`vDg+TUW$wA>p#%s#*7@SuPi{=`
zE$NgJX@2g|N&OFY>PK6V8qX6XG_L%~a`|Z5ro5`fRNPIFxJJO_+w}nfxlt0ZGwDrX
zeLZ@SRh5oW2^pD!_s@VTG^N=iuu`5lOs!*rKo9_R9^~#u)RO6r@APi}aE~1Hpz0^G
zRgbx%T<N92;GYkmC0O%M#vPc>fqHDSW)SWv@`%mKN<u$N8{1Oa>MXQkH71iI6<p=b
zDWu6t1W+SNQ&7)0s%VbX^ufxp#+#aO^lE>m2y>&j>=-9%2AkbjP}OE*^oG^9vbsZS
zv|35HXw)D6ZuCg<GX+LPE_FJc@yO}iG9zn|8D<==re~ronu{cS^ZcTX6PGkZ2g=Be
zpn4`hSi;b;+oMy8-a)WI$y;4Sbxc!3TI*`aiXH83NS=9M1y&UlYv~L_Nk)yGA;u}f
zMiLBW5}70dx^QYlC4*Bf{lRGV*srvIVf#PpccmM4A3OecZTS^^j@|#%mY?5$zsaBV
z?{@Zf_ID0itXw1t&7YgQdnbo`KkpnbmC#iU2&liEw08EFikrZ+QF)`FD98#_#4M!p
zTfZIcl<>)_`F=+hGK*ohD13jkwcTtri<_8LE{j8-wzIucILPj`;Kw%p+-(8Caybb^
zf=|NPc1N(Xz{z7YKzF<Ho?60)>YJSv1vc9#aR=Hbt!8Vf^cLjyHm8){?;XBv?%~^U
zbN^&9(Q&RW3J1-7>FN+t@v<GL(1IQu{JnJY+y4Ge>-e|Q(c#JV&bz;tn#a4XpYURR
zt91O6hG{}YXh#1M`D8UXAz$KPl87jTo2lhQn_MG+Fz5E+Fdbx&oBEL?_b#5iMzCAK
zX3>Mg7zqHzxecT`Jo}}=R3u3th_$i$Enf~uM^^xzqaURsNMucb0`v^=8P;Ww_8s~5
z;Z8y?8XyyDq8JPw&WO|FtsPC2rfRU`_#lKsgJ|l5`j2o%O~~d>5DYq=16yCkK*XL{
z2tLudlfih@an=4A16vX<5D&(f9em-&3JB<fxoRjrdx`l)L2`PjwF?}LkvES^I2He1
zOk)i!I*Abxh$w6w?jN1Dc6gGNT8FLXUNNn+vRsmFr6eO|%@Tana%BnohEfjfvO}WA
za72EB>3z!`Wswyy$!v!uB;r6vo~okj@zG_FF;>*EiPI9PnP_!~2a5($Nk{Hz1l=0M
zQt?F99m8xO;VF%CY0-=~aG+;=Bp0P{fwAfds;?$eSMk{ydT*D)c(T5=QYMf%(zk{p
zxWq%SIJ+h4GCW2oxBeZhHha4#dPmvaW?aPg$2<Qx-96sf-b@r?#+_W!?A+v%IIE))
zyZc9byHMai-v5L3`+jeLQ{=zb%PaBsKlt|gS^oPLf8q~I=>vYbDJ{(@u{|7lKFs?b
zoRXv$a(%e`-8UcASMiPwFX97`yC?&%)N$ER>!+IK$!A8PuPO}iQmlG+xVH@xc2y)x
zv@Ro?M!G@b1xhs?Tn@%P><8X=^xz1~uL=$n#g{}84Bd`*?m;Rq^FEx0uf5U0M?nar
zjgd!4O4SI$W_$N|8XmB&sV5A6XiGja7%v&}<zfQN8@jQHqy*mS%aH(4fZeD_x9Z$D
zKJ0aIpMdV+meIRS0q+Fz(&zw6SNb8h5+$|IFtH+brE~!sUU%T~O}=~M1z`#3$aeB1
z-FQJHX;f0$133Bt0Lq>mZI?{KL_@{cTj;f+G@+OQ7EP$5YXia#ogjo$pxz+NH3Ui<
zXbdt`|KibGISeDz1(Bd*-1eDQ7rasH9U4leUT6eqOn!LO+C4ltk?bw(%<?5}|K&1H
zKgg%h^dkyUatSn~R2((FTu6gn?n(l8y^#cPsvp`g&9MYzIWIgv2wg|-_&B`LRDlUZ
zeV$>aFh=n{4Fr4y${@p5Zu&im*C8eHI)N_*LFjb?68HobqL-x9E|%S4Al8rF-p**W
ziI0<kEii?H9<Zg~VMPZ|J(26xr~sz{XWR=#-7VLCC}uR$cWy9@+xot9vx|YIbUT>Q
zANP$Tl_6k7uwk39K<DlzJhzMJy<I&G(qM0h0>1jS2SdKs;!0S!B=Y6Ud{O-CUlR%t
zA3-5`30uIg>b^Cx*jv}DHSHw}Z8zOeKEQSO4qNgGDk+&J=JE|$J}m>csk`6rvQg~j
zOXTES)+k0E9ROdb25eWz$Jh*dw6P%$PWSeTwmF+Jbv=?#zRbk8smSZ)o3;X;H*Gl|
z<i;}J<`73h9FKhooaZy0S9me$0k@kQWwb#`Dw$xT8*jrdD<T<MJ!a8KJzxsKZ~TDY
zKxqKrq&pj*uSElgDerLF$vaGA=xH`W2RK}mJ!ciis_Hwm9y3}Qqdve<SLGFV+VzcK
zmW*g+Fm!!gsM2W%96y12<qZPp+E@jFlhCl%#bD4a6b(j+AxZ8y)G?o?!wD3*<QH}c
z#4rVApq(E3ZZrl2-^Nn;;MAvq1>#``<YCo(F2_|0>530Vm5Sa=@8nT4F%}aSAlx2k
zZmLC4su0j}hO6HOElt#8b(0Tcx50~{sS4ux*G^9=jfld8MFy5*98M}<<cCBC>4byx
z0wt@_DoS{h!12NxhiKprV7)Z35$c`sXatj%mLN&I7OqviO*p+WOy=+s4yL%{7z@~2
zd>C`v`!Yp6B^IK|R*ErD^u44oEMf19(G=1xOErsG8<)=vR401WoDCNdLq?M4F(#Qa
z92Vt#k#&g6*Q<2JylIPU^5I17^~B{c2SB9Yha0uv(0UmKl}VjwNuP~!e*8bre;)Jv
zfBBg2KP!!u*RSI5KW|<=>;HZ${ofWI9F$fT=}y6ZK}YZsy_Ueq@{3D1$y-ZHLPu1S
z&nWobiQ7|;6m3CVA$Nxr>E>l?FudhkXw(>P)anhf{eS;Y;En$8{}Bgcd|PnJzc^PQ
zYpi!kow(w2YWoYO{x8o5nW}O=+LZ7JS9!_3HGqTeJ3MotuGjeNGaPNzK?@ePexT&I
z{lOn_YICsu5v-A3N9@7!iB7w&E22@URrtt=4m@;~qdoD(;iGNW!#(2+&us{T<h4-I
ztR#LLjOkK;a2A5#PA4NcykCalaJ5?PbJLZ<=%NZRtI%4tlDmg`w5e~rH5j?Gs|0so
zVTQ3XlXwqIf>-LH-%JU67IozZscdH}Y#W|0)#4W3!hR;*UZU=Biv3FhAenz@EpNjG
zW&|K+l$lEP{-8S<NX#HH0pzrQe&lqnCIg8Xx+Z`ev0d$CAThJQshaN@n3y(iDTthe
z-meJ|xS$*!oV1z;t;nsYMIS-lMXel(uYlDjmcvL0v7F2xItj6o07m+n(EF{!z2^S5
zc$Haz*hajn$MWCqZoP*NycObgEhDSFyLZ}x{CL&(`^P&c;!Q0rnd`iXS2>|)imlz|
zUK=0Jh4>*}TU*~!_K)c#lDYL-2BB1blJ-XXRHBj9K8@1B;kGWhoLMpsb0rID%6*lI
zGi4`6Ve37lzDcIGkB)bM`L-b!h?r$Bk!D*Va>TjsdxJB2u;yC=zMS?*RnO3z$Aj2D
zpWbjO4A#h5&xGS#P9D+tvWtoz$6gCFl&Mrre7$Rv1{j^@aFX%ih~M={>1l9{5uu_-
zIl?ol=yj={@KT=y)h)a{bBW+K04YKk(Sy-i1BoVDn?Mp2C4^2o8234n`+940b@kXq
zJuGY(pL4wc`cXj5a4GfMg}dW^|JE#sdg{7S-IP?Hvw$kUvj%K9{w|-d=|lJZ{-iXo
zwdJh#)xpw6;PnIBpSAba$&yJSEL<|El}_OF9%h_gvm4PQA%D#D9+kX}G48Kk;;k}Y
znWE@Nx8%R>?+Gr;sRxIx9kC1NPI?s}#f&o-&Y%5p=Tf*%;JG8~QVWDe1koLI#;h8{
zGv2*nKfxk0(Cm3vP{Z?Yu{_LxUtJJdgEGj_6+UXK!|(dv@iH5y6?AfAp<byi2#`m}
ziTK9CY3p72hlRiEQ?pg8OfpMw>EgW)Oqh*@5HP9U1ib}uIdac87QT;4EC>n3YW%zH
z!!1(3V*M}i@pnIZ^9Dex5db))d)?ndd<8e2$ba`gZCaS&jW#{)zt?IeXMDlR?|O#*
zm7#ta)n1RT2?Q=Hqrrg3O{yaab373&U?~Vo5#Z|<tZed{j$N8#PJu4|WKHyAL-(rD
zovL2fB1FWxq2VUUKT#x8Rr5luYmI`91-iztg%kw8uo)i;RGPq%*c2eGtXKJCY}B!g
z)N|Vu8eCN;C1I{t%^nCPOHCB%ruRe%RgcsY4ep^miObs4dy*kmOzjwyIXRl8Z?b`S
z-zIj=m=QBzZi`McVE!~{&xbXMj8k!?Lifj3%+EpbGt%@PsnfzE%>@l^p1Cj+=}pOZ
z7XJQ=VEd?yaxZ+-`NTTG&e9(`Z9W>8FtjHiyFNTU9*;aRu8+(i^J6O+WG1Bb4Zt*v
zqk=PFO|YMU#P+4ctSHOUe<I$-t;jT_&7yxI#__wFM`P%3f$$@)7Egul*88a_Vv#?S
zps~xv>05;^`D9p6xC)=6Lwp%JLw{t!r|FJfrK2NWHF3tHUZZFK<mD7UNtB|iDd|6&
zuOi@cPdZ5|a7pnpkTp1hH~DFxl(yGZi81J_tE`RKg5;N?sY=;?z)d$MFVSVm^g<)|
z{MS<Kc0{SJD5Wy3>=8OWy4N#oIpytPJej)F)*ex0>d5W2>FyCrW@9C8s8S-5(QKBq
zN&90l&5qbmWiZumPAoHGL%&8T6J$AgFo}||xy{+y4`dU8W-odI#B;^Ev~$@M8DT5d
ztEz;u!8R?Tm6Z(H)~u$(pbFYz`2jY|>9j_jJ7t=+8wBj8`%y_AG2AQ!mkZMbf^vQ|
zBnCa%7HD>ADjQ;joq_67V*VsCB`cv#!@QWQGMb#3iY8kZS!kNIZ5$IES+!V_@5g?I
z0hB6{w&4-$2jNm89SX2x7CCHk&Z9;u77=A}Q&cwJR3|d~w!K7<o<O8Voo?2YOUA3R
z){+z#E@DxxFfr`T%2=Y9W{-ZCMk6xMk|PrV;(_73)PXtP*{ev?uGO^4r1>k90<4+2
zAzyPu(WFsF8{$XrGEfuNewa!ZE-UiU2z_Tg>7W8|!A{vUc%*2%(zgq4#C8L_1nhbn
z(T*0WI^N+1dawPHom*PF=u7bz+eLYlS%>1KR!~!xFsT}})79k?+H!jF(q?Q@w9<V?
z(JK<Cge8C|k{N^oDnK8l9AN6K##^l#&2DC^+ue7FN$Kz}EhbGG;K*4{OR3Kmc(U@I
z80C{yfOa=wzUeB>&-=5AHDq(K$vv1PW#3pwd(usE6_e#zW4%gcw4h=loW@~x<O8hR
zR8}w(Jb2I|nku%7^>iLQanb5aY8MrlBU0kAZoULkjQzm7@X5d>WDjaj&tQ{c#2OyU
zPKJ=B9<<(hBQ(D1;@dc>ryT$u+TNaw!V+H7a*2hUHNNBPf2~qu=VkJ>DW~I`WxaxZ
zwRL-u)c8b7h^Z&5y;Cjp@!}=*m&hgJKE5^^9tlOt-k^BbLL$MxY&#(A05rUvN;Rc;
zcal$GU*oFER)^HfmED`jy`$NenYtnY&fb+AO9!Mfbt95JN?cS=?vT9xOmvAoqTh3T
z7-)K*L^3`Q^he#(r}ZcPKr^jF3R0p+r0o`UY2H2&+>C*Wj}uQ+egIcWh=OiAa<(44
z&0XvS6N;jPaCzBdoR$p#qtJtrEO9vMy12Bgllwf|hv-!Hv+tm+-U*uzv->mg(kL-n
zle*0JLDu;&-e=OloYdtxI!(<@>T_~T-+FxIJ(Ukt!59~+<m`E62uwEG>b89{3q~j|
z;T`wvc;dc6x8jo!E{VvyER@wD)$Y|1x`dyy^5ujiJYCzf<FBvEkziJzSamY$hb&IX
z3BoH_6H%tBnWz$oy9mwMD9>1Ckln!bQD`2$dQ4^?p6EZhqrMk<j!)>JR=n@xm6|j1
zdbiq%PFXq832fi3AdZ|U0mM4e?#l4$LTR2`N72kZ*Mq)$9S`c!BZy5XVdP`BqM7t{
zY_1D4tPJ*}%mm(4Mt=H6gEbP&fUsFAD${$c)EiQ}EA_{>UE~QGiGe?{)5aNP(AA-c
z0CpI~E6XHIQi~&-e_{~=4e}@{W3QDYP(omtP9Fs>jXY9t$>ao)o2kdfTk{NDkk(@<
zhNO2nF4N4?X9+O9(=6LOyw{Nu#ys7=lVTi9{Vyfs^l@}}vfJAIc}HyTyxTp1Q=WMq
zQ7gvA2Ks`C779s0y;w*QVZJp(Ph@(CoUmj(kYe_$=u<BkSP#g6YXhA*4hCIZ6(x_&
zu%G}fI3kpfHH%NQwnQ!L-D8ol72GB*HxWSwU2B_|lOVcgF~KPr9W?kS>fo&Xn}ucm
zjO8?YHGQPT^uE1)s-<+oO8Qs}={>g(LsWV{o5=7!DJZ1GkC@h@r2MF~DW;EHwYv3w
zX}1dvdExDm<$X^H*<T|U;1fDRB5^95q)b1po23j$$pgSt3MiAHEJ{c?^dds1W#|TM
zY>{6Pys)e;O0$0L=_B$QW%|S-hZ@@Bj`Tv36N`MkM~(@}O-SSuN?PQywM_+!w&-;&
z823W5jddLiyi8TmM--{dN8WtUdwmy}U%KW|O|eS`!1Y!Y)UGNnL+O+!haRG*UBt$m
z8OW<x>7If6S4#65l5u-lvV2&S49^vOuWDYvW6We(aH@S(NnF^5TR1)B^Blg8p`ZS0
z^*Hfn$xnIcObH*ahvahtjYvl5U?$b;kk*x8|N8uRU_>eR!UB_peSV{Je6a?p7?Icu
z&2!aIn?~TfymEu)1vX0v(+`U6@F5li{+`cSG4~Iqf89;nDUSWPz}{&l6@fzN3Rk|#
zdSM0T404zXh_PO!Y-`_6ss`--mkj|(UnoYN@A;(y3r&V2o$985@(2EZp(9=WB%+EM
zJ)@v0VFl#gLh-we_(|Cuiqt+xhq;?VS3{&#Dx1*g`es{7$`(c(>m+>kp}Q%5+d-`?
z`OPFp^p-yz$iZP23?X81unku^vvuDj+yn~DkKA1blUB32G|l&q<vR?YtTyiRC&k;H
z%pjMiy9e*ZFT1Uul$*qi*?|?Z0L_bzhBHAm1NTMkbP3S8bOKB&6pNIFK|I0=r>yDI
z5ZPy;#p&1!EfcLti<$MfrxwnFF(uIjAoD+c8eInXDSp!%jo3F^9f6~4SF{@o6uHBA
zqRZK)oUBcWPIYv(?1gH*np}OwG_Jh^0Y&6;v<I8sm{LQ*mDJYFrPS9&_Rzgi<6W4d
zxpYyXtX_hV9Q$1v#tM=7a*0kz3Xs1}L1$jR6yH&qqH^~p3JjgJ{y=8o_*PKi6Rl@8
z?Zb<z#a(y7U=lJ#u}HtsQIxE|RR3hoR!rGws4GxujY=@rP-y#~>;H#cWIS^G&*j(k
zxc{G5Z{9rn|9STR^PlSf=X>wmCnb*~vUNUb{kFH${z?0sh$S1Q?uJEkmZ`h8?6!@!
zVNKguFpa@&gY)0z8hO+2`N1kw-B^%a`10jT<;yblp#I~JKURM|*{ilNqz$?$UJ$V7
z=*rZjW)RqOJC_>^HW~|}I<-c1LHdS2M$2zGBFf^P&Bh}Dc<3|}Fv)egxWyT10-YYA
zg}7lQ-wA?+L@^=4uK0>uQRtS*hsP=~!IzOUT)T@ytP;pr<5g<iPCFZPZ?%p5`g(qy
zC7QDVzFNZoEC5q=L+E+ER{Pt6xTM)8zr{ecjwNk=(QY;w7&|qHQ3oshv?)ciDtx5x
z2hm4yaPVLJjlY5nZGAo3Hp<e+I1z9FffLGnnLzT_#saagX<H#Rz7{qU<YqP<z^bDv
z|F7Nc1<`dvr_6)Jr}BkrO5G$r8YJ!TWndl!7EMZG+-Frlo&a%5f1DYxv_N?f>{QZL
zP1?rU3>ZP5JCt=1+u}?Z5mN|Ej77v-e1BlLT*d_+7dA`ej<cE^5rGHjp#uxc3eMwH
zlJWV34(Uj?)b1m&?KyzqNnHWkekd?`P=7Rd>OA^r0Qsak5gg1|D{@^1kyF{lP|#e+
z<m!9QTIy<6_s|?cS-DK=-GsSe&JH~(TE`hAqQm098-b|xhmj{`@~0DsL*M<?2!ype
zmjgJlre(_lUoKsCB1FJ9Di!mvT*da%EAwQe-F;)>%;{WRjPUGN?hJZ^(dx+UE=(L9
zvdE#)$rzq!h!DdG9T;E)AdLSw0nbe_6E4#*nb;H;$)fk8Oh(CVJ8S_{ov^dd&1w&2
zwu$qR416;7e%Y{hE}0Ou+Ot_sO9zF?Qb@KWLh!RtF<U53Yk~8<^UX{So;>Ioi%e#n
z^(slRQPO&qgl6NrDd;Qo`P^WqMrVypeLaZ}ZSqa^+2!+n?tjP=TOc)WA36ThtCe~p
z{!^o|{CxlOjqZO=`VPgkfpG=_bR3dXKv;xXxjPw{2`@qLHfNhIY{bDS2+Jo;@k{eV
zbH5~Z4~}=6_i%9R4f>An4Ijm~?=;uQ5A^Pv=HxTWA?SpNE-Ii4sR<~vpSVB7fZj0J
zBwiL#ruR{fWh)dUY=xRu^~r@-S9krcdn4%Qno%uQLBH?F51YUaz5+W-y}GP%d>I&C
z0@8||mBdX6BS=e)3c~<kQcp@^qA3J`n!pJ2HI!<CV5^f@mcfU|+dIejShvas4qAcb
zd-?7;bITb9ACXR8R=Zr$XgIQI@YH*jj3a^9oMzMyyaZU2Z#jYt^;11Ku;TcMYq__j
zh~n=&Q`6;!Z&Z16(XqccSQ;!j-1qW+B1#&^t1k~msNzE^F%&h8Vw2-b5j`QTi$)Q3
zV%$+!pob=WRLT!!@Sh7$YR%Z8*yt{tBjfZ2PM4c5G&umt$NDEbG&pMY;EcYA0vqwe
zD;eFZ$TwafXEwigl_24o0;tRPFG<hfwvQh#(0mkI{fXV0%7#m9gs5RfP?WY2z3{7S
z^rWhvcMk?3t?Du_(lR0*hyn=Fix-2Dpd}QJgPsUY<MAcA4&;W3F_3q*H9!p^0=7w;
z#F7~JJr#@T3vd&?MtFf_2gCd)bFE8p1HiXrO?(Z6lnMyT=T7(Y0LcBb=YCmd<+sTE
z-^V%?;I4qvOoole=>=vpxos1;jT(#C=4YZ|;Y>15<5P<NgERELf&LVH`S=QJotdQd
z(LzE$Jf;wP>ZO(xf#_Y8Z8$w(dLMo*Efujs!7JheyQImP^osx)B8uO&X!&1IA#^zw
z)?!sIVAB3J&}tS1d|XZO7$U3+hJTjGJX!|qJ>uBV%6E(9>qD4xEEecD!<|Ru>LTIR
zM5JJ;vC%Purz7%C73B+&N5Uh`!Ah#T8Ht5d_nQR+A;(wyj&M4i@yO}i@<KUb+XWVB
z0A6RJESih7rE@8oMF;rown1gm+-PV_wO*VV8!_RDTstmNkjbz+Z$Uaph9{T$hdg{S
zJ8|FtV-IMtWPCqOqm3Of==uDYIREvp=RNMtdH$;}ziHI$^I!edo7d0rf4|8eSgS9V
zmdY!Y6;a+kINWdT2~qx~S1ON&4oGFyaWJZ4T*O|r=bcpt!IdY<duhp4QSJ%O=ihCZ
z+CA$_yXAEO`u5?;hh0(L75`s#i=3QSIX+C2RR^m5u(tvmh+;i~MJp9iK5QV0+dFSh
z-vd|>4B%&9H-NDjw@$WX8$C?xg(&S+s!jzLkx_#NMs(zFzYagbx)|1T(Zl;zbFX<U
zPgv6qu}<$|KyliP#JqXb%*WGNb~y!7aF`kW+`s>Gn;z&2TZh{_MRvtr2umS4k}r+i
zq3eV}iSpKR#>r*>!=BWOh#Kl&_1bv$3b2VpPB1*{IWh}#lOA05E`yT(a%_EZ160Eh
zq>X^U`r}^c4STmzbsK8F6g{XR-C!K!_Yr=Fn&|lgPP<TD9(d^xjI!3k!GaWVI1+)=
z8(uo7Ahu={gwgG~L1*NAeEmtLM@h9$2<H7lBBTC^M51_dBRzSUz-(zSNUl!M>GumS
z{StOess@*^Q&K?WboKcq;a-YhmLl0r{=wpH+NqCNHINcK-*-oJ<Tg@)X|2Qrtf3D)
zNtOpMdtLl_=7!LEhrV}MaE!V#K<$db@U{S5EQy8Ig54*xAN7*1Is^6!!uCn4B)WJ{
z2z=X>1`v0OLFL^^=5+{szW(?r(h@}JS4N_&snZd*grX8;sU$=>mIPb<kvrO=_>wES
zF3lxKX=q?PtvE>?(|p(Mwq;yheJ}$FTMO8A7O)X>ma)xXwA2Uh6EP&P(rCff7M18z
za7tNpsh)@sngXa310*{=KOjE_$RySL(83rrMdJA(4day3V>1Po-5X=@Nb%EfNJitM
z3&h<LN+FZl0hH*#JX-h9Xvj822S4(C1AOz>K>`&lf)3r0%m9HIOA!q=#gHm<FE(SY
z4Mkm_ylkgOC&T@prK33H6v9H@CD4dd0$r9+S~Pf+k%m-76r|%hrYCny?k8l<!*Vwb
z9A(nSqI@_34CxxvJadbnUqJLqh!TwlUE}dNS9Grnl#V*p0hOa7-D~^Y8oM)Ei<EzL
zr#3fMGDay6WmeNj5cw+WV{>SFp%2M$f@l02Q4=MibBTysf=Y}SU0ZcYJ<zaC)>ekw
zI|nB_tb}1PpK<*ux+aNAB1SLbU9UBu8ik+_i>7W`n8Lt_W>dEh=XFBP3>~a!K|ti{
z%wws}fmL(Re-HDMTBwAm&^ILg?+LA?!d3^}nC+sXBpofR+dW0luc`RJDbneA4%*Bt
z!Mcu}?_RHoVruXCiQ)mhkGB>hL}trVDWUBcc<z`Sw8idbl7<AjTe6aATG9!`VkhXE
z{3A;d2X&Zx^_I;YwC;|q%LJWlHw+05bo*=-D3*G$z+H^DBfg6MGB8pnXb1FMqea6;
zp<f4is8L*_%;EXSbqoDvjNl|5j7B=oNh;$Z!y>UdCE%5IC<dC8lz#1Yu08h)W?YxV
z064?S-u1n(py$K@uRbV!3(f=~&U!d`z&KReahVNSDk<GzqA5ig%TR7&_^y+mn#Vis
z-Gigk7A!$1sEJ)T5&hc!ubtz=_S@#k&US?5wT&gL(0JcjfVCL)9<W_r_)cIg+umuP
z9_+TF%Dgh9RL4o9>NDPU{p2)bC_torV2Z*3xb^U;eQ<cZ-`t}fAjNA;^rFaE=IF?H
z?}?s@LF>4Aa55_b8O`}ob9k5<`^so7G#x5)g5gkE^VNwP{sfX`7Hukk)yZ%BZx8o&
zx7t7NY?aU(Up%R~_e=A)lXmO$V5i-D|NeOA<Yf2foxR`emRf)tGy_doD5<MO{G%*p
z$3DJfY(#y2(m_R{b^lU>8nVnJ=>VTZzQ`kO5_H>zW6t0<MpH9O&6vUU<ZQ&yDzKF_
zAB-(<WIS+ic=~p)d15Vc@mFX#)CA$`s?ScjRV$}>2_R*aaX&*mp+dB|j1BR`2DrL9
z#!{=R{8Xe(mB(5P99wcA8ww8BZDT=o>s-h1e=o-<g7eyhFBh-{&sq_hqN>y!RHaos
z$v6^a=VpkpAqU;U82fn5xR&VTP|V<1s(SBbDA8~RF-Zmitco+Dbsw?p{r>*aQjDD-
z&0?na{k{}LF0z;eiV%xj7%W1##Tep3GsMN*3~_N^hParKAujs;)m4;O;1eKt%}8eP
z08#GVJYcrfmqU%>W*5E}r#60{k=}3Zt;7ZzlB|K=4|{T;`&k1GMQEV=(OTCJd#kH-
zP-@3UyU(LdJJ>A66-%fJNwW1>9_iz8pnsh)GT77m3kk9bo_x^F2`$Y%3^tyT*noEu
z>NL8sQCCNeGmyBh7*=$`EpJ3$sttXGV7y+z29<l{_n#2PYcL9`$2-mK{hcRy|EaAs
zUaiF5e_p?ar04gaZ}6u@#*#S5tO1BBFl2L2njL+o>k3jZrufgKj}cUIIjnV#PN%ho
zh9fR8+N;;$%IJ*+uXHGgF8qjKcLpGvU~WU4_nd2Bu2nKE3NM{djK+Qd-^A9@DPK<|
za^4@EV*3MnM8+E<dI83V0~CCQsMUeBqXzM22c2`>k~g2#+y&XNVu#yNnRaJ5u5>z4
ztv23KV+y@JRpFl~!zAA5EG^aRqI|e)JVgz|OPH@+QSLj}H=;ZcHr#30zMq7xe+@V!
z(Y-))$FrkrzMPF=0X}zv5YNJwqI@3yv4OIMC_5t<C>5t<`BkhZ7ub_u&`VJ;P%1z$
z>e7l-J?$$dz)r>4D&UcX_5*S0^v;<OfhagvjyNCIUl)Nbf)Ggw&_?dX7_|+!P(|28
z4BRZ5DGh@@vJXX{K6876FBP=YUFh)*Kj1xbJFX-?VI?(914RIUHFVCro)_MVSCwT9
z;C$&_TyivMU`A}F&(2VBh~89Q*+t9&?+m(bC2J_L<KJLBUoFSRQ?;ARfy^0-GuWF@
zj|(;9!jkxs7-(J-U|`Jg9Ed)Y2LY8mM&W)gX{J>@eArMaC^L~kPVOngiQ`BU)u{ew
z&DLn;LPe{S-?rd<%cm@<Gqp7wid(b+=jisphQwkGSQ-Ol^#bX%k=GIwEqbh01&2#w
zHXn_~5n9*1_BzZf#noKBLe9E$BI3gp5h}Cng$%D9uzhq?Ulb5capjJDw-*qTj@)y1
zgdt+Os^Iz9C%S@Qj0G;x9|l!M?4yv~mvBN+^kCjskP@w)j|P2J9vS{?M4<NZ;bH5p
zdgl36$&fI(#$fG=WMTPkZ4YI?*SqeEGRKuK<4^{dgI;$7p9w|z7k^OpE_~D#EIXkz
z+7AH0ZKsEoLLkS`8};4L!7D6LcKcpfW{^-dfThdnmHTdgFyu;Q#1`5J%Jc$W-hO+!
zySLpw+H1Dn9UkwuPmXqviMNQgkk8VlI6?yM%r$sW4U$eC(sK{}{ZVnCoHM$u<T^da
zM}3Xu${XwuzwBqUNWD(ZkxcxIxiRa=?4{(QLWn5$jM-7{9S&U|II_Ime!sW-cI($)
z+ovZxZTMZUx0`QwH)_@(4-z;MiqEDy-(LSq$^UvJ=63!vUH)ILCH=qD-ZY-&|8HXd
zvxtrzQ{1%RO4ancBUJXm>8rf19E}F(Xa#CvMd?VT3;P**pTJeCIRt^<RXHV$CPxxt
zqf!$EERdI3q+!2EduQLdCFuiXnaH(@aBn*95aU6y(z=JimX!My-2geZ{7u4;K3HsD
zF3Q=9qmZUQhIbQ(FeVQtJuVwm_jb4Np|=cJ%UY*?4>khW|NeXIjd+7}hUoN{jH7za
z7Zd=UixF=AuutOoXXK%rj{%m>!!OPV{bHKlWDKGy9HHxGfdTs47y0Ii*geUMx6PB?
zlM(_!CvS(RE%8h9__%q{+TA%3hsOeak?wNPq{DZjdGMR~u#4VFT@P)0ft<9JA)4f^
z*zFSSPF&YQf&Hs#XQLf~Th7?I0O5WB(mn|*LktHH;INW-fna(b?4|57Guf{Sc?6AA
z=U}n#y60?+Wu(ClZKv@?(Pl0e-DsmFBBJmW_d@8+TP>e`&50qo_gPiy1(+<A(@@E1
zkuJtdtiV=SJok&J)>ACXd$42o0!Z(U3S8>kFNuTGy*+IhFcZi&$aohn0Va>6S<n$O
zwjs+W+E^6L(FJK8-4xgCjAN`pS-k;!J-zg#)^&u|0r~sW<|ikYgK@8WJdW#KEeff3
zg(ayISO&ihN8U9^BC9dDh<#fZrXDjD?KJ9D+aQCsMUr=aWT}KE5fLy=p{XM_k=|xc
zj_G>N?e=befjOWkYC3n7<gjX)WjXH-{?TP>f)7<tu}}}tyWU_Jz$%I1G2AW!dA-s`
z0&vN2Fw{j`FlYRWLX_u>!jw#<CrbB}`_1DIh4Z2`izXi}9kIG9(OF$J7{!pEzW@GQ
zwA)%HZnq1Cu@9?lw}|)Rk&Sl2cfX`UD|~+r>{={73t|7W<v(?dctrjGSB=+=g#Gu*
zv;6lg|2@lp|C#b%-Z)MgMiT2I_rkqNN`-?&QqMb!r=e+bTAJl!zz?Nqgqe!kWp+y6
z8G_6y_g1-qi;MO-?(Fz-&s+OivP85@Mq!iNelW2l=><h4mEAlEOMu9_;qAVZzR_<+
z(&4@{j6j&FtE-;$CvPS1hHJT;neb9v=&z_1`8B7746~To15%Zf&yaTVok>K<BjJ%X
zliTF+5S8z1YgVaa^t42xm(Q)^$1^8z*(5|O3S-h@y@tlFO2np8i)b^jrggqSqCq3@
zMa)y>B(9toFZwF2=qszJQsb`4)VrMWDba2>kB;_s+HDGg7_k%Vr1+z)SWVMZry`^_
zdh2iF%_Ol1yM2bFwWvJFi7`Qr#O2YLY_;(T#S5ucLG>ZoM&YFaU7kJEb9IbU4;+7h
zTG|8+dMQ{I?G|)b@zHv$iiZ?h0orrU$heaO-t$33k}ZVA!f3ui+ApGNrFC5&6*QGQ
z1nwTGf#Ryf3gclVylw~ZZG%7h{F%nS(;IZI+81s}t#9oeZhdI)A08aG4i9#>O5$Z`
zBE&szcbp(xm;Kl*DCnpLP_L;!dwXIjYHdvs?JoU*YKY?c<Lat;vY-*7Q&43%4ldj1
zYpkG-BZ^+I9}<tHgG)UP65J@7_7xS{XPsQn#lRImsJQc&XwK3bC}ts388)21^jHPq
z6761kgEeW{F74>mWRTMl(${s%$v*W4MO`0eXdIy^R;8dP#xKropkCRqefG{(Ng<Xz
z`o?Kx9w7~4@u`?_lr`je_Kv-r8%3v{n0awhrvDqT?8Fug_B~F0LYc)MU`_I28_)wA
z_D_4>p%t?dk>OCeW22(@bTNY6JLK1N1qY*xIQpej?(ZH64P6EkxM*$z-$@z*3C?`J
zBX_%%T+D?)!oo@S3O469+D)x#Z|snUqFLc3+M!7+of0~DkZvB7H+FI;ijMI?E*O<W
zzJs*&7xc@L_&xu(;P-r1f4=+O&ujRRTGEXd7IuE!ZMEMuclS<@cPx!bHZ+3{lHM1f
z?sgbW8gbGM<vSRqp#+6UI~@uYqaHE{=+5!Z`}V=%ezUdpQ_-+fibmQpHc~F**@Wy)
zM!Yd*?c>~a5F*qe=q3Ipb2T5oo%`}LU&T-ge^xV}%54x8&`#DU%89C*iP8I8uqsIW
z!$3Xq8IeYaFp<PdA04EPC16xMpPaKuap%fy4;@cklHqj53!N1D;VqS(f!LGpY^05A
z93J31wmpj?3Ghk+D_MA3C{Ey2>@CmT;JZXGr*R?8EHqARTmCt?ZF2BYr0_ja`v&4v
zD&lzNtyEgfe$PeiBt%)YZ)QrcX$3676$(V6!dmvYKvE&XD>*Wn3kxRpYD-jtS~eQy
z!js*bB@<gwo+OWF(mZ}kNR7ghi_7`|PpWusMTyi47Xn<&;2`8(yDA8V?wyir(}yvI
z2y(~G?%lemyFcQTHX5NcbCV~nj}4<pwB}_(u9{d|E4<wu_eaUG=Ex=sfzUToTlTtc
z0+Fl6O@Vr|gnIv+ga*b@!6l<swNXNWZV_uJEn5qwJb=VopLuu?O)_Z?FGqv%#pQ4u
zeuM@ozKD^;M(e-V7S?i8s4g}|4M#IMYd51Ikj5MR<is|`Tj^D<kN{Pz*4@`@CZvFZ
z;{}-jGLS_;a`XdVE~QbgOJt}$Fq2?|%ZtDLP-&cteY!fY2C&A)`%#tX*c3h=_j+`w
z)C88Rj@-y%YmZ#@#pZ@6)YsP=HDlS=$ZD7xSDjL%-Jtrn?tWg25dg2MfLTl*Ypj0n
z#y%t)tH&F=7hnsn752w~IXgsaXGA!%V*YRC#X$V6UhCqIyf`}E?Y>^Y8nm0H+zOPU
zfi>*mtdM*&tQsX_wsFL~^Bx=<aAO}(%H2cXbi_^ec6y_wmedy?ExLT;oow<BUNewR
zdy1L?>x~SoCkqR>#b$VhNS?QB`7tKICB(M_Q^OC1e3HA}F3NHMdq9N0Q$<x0-|YJ`
z;6p&)#^DU0;;Sj<eHYJkVK*tt7>&mMOI)um!D3esn}Gi4|7Y*r-_l5yfZ^{~{VQg3
z-ftHW&>8TO7(Fi$65SZ|2-(%MXCEGfQ4s`|O9DA(zrX#fs_vfYp6MB8P`o7gBnmTq
zt*)xBuC7bR#yu>v&p2OZpPgL=;p>%Y&VHz+Dz{5#f^yIc${*rJPFT>~^;eabB#Ma1
zx}wNsX5T%CGo(Otka#KS?c50@rF?;22E={^bDdZ4;+4xl@EdTHUjLPwvD(K(#B&vf
zPUsLhgCNXBT&^o&DYWC);W3>>@zCWWw1maJ8U-hEuUr0&RDLn*s~L!4a9*!LXC<IV
zARWq1F3UUHC%FF&HN?KB{hzhDx#_w8&24Qj{eK?iN7$CIAjoKyTRlW)uga?CCQZ7*
z^H~uq5zP*uiK=L-c6xGhoM5KK!=w_=;6JMr!Q;)FD!o0$-KY=0oYK4A2nd=QMP`G}
zOwkiRV||kSSOcr8EDxQ7zbu=Po9daDH1Nzo8Z9(qH}MKLXJOSMNGV-@)j6=?c!++A
z2jt#ZwELqQ*F5J_P3q?0^mC#J=g}gZH;cKX5%uRkp35F+^&kI`+Wp5r;Cs>h$3G_U
z$6|kSj=G0yda450|K+wldb1;Ajvv;ZW`V0gOBSU)XKWR8GkF`Z#%bmaw9lMT5>5V4
z0`so0i^2j~iEGhOeK^OI|3fFDhx@_y97C?!S)u%)l!&y(6xASS35@0em$(-{x^VE1
z`~^Cgj~k=msvj+(9gKmPHa0g<H|j6I-kPESbypv>dgGxnY`41IsT;^gQ+X5W!e}>5
zOWU})xgi(Olz<m$V+Tv+wJq@XR3^NDncK7iMxhqal*m~GB5X=O45`4*Mo5XB4Kdf3
zyZ>w0{X^M*<+e9By!QX*7KjAP{r@rce;yQ|dU$jOjJ$AGy{M5akgHWo<;uxf&0$uR
z+F{Mv?4p);j8SM?A}r#mB~;0ySbAUhd|a(jc(ob}YJ?dZv7Rh<_;Z4g7tuOv`c;sP
zk>7@vvtBBdrn_^zy;it0dzpk|8{M(<G9v$n*rTZ9UEM=6#-XgWC&iP*<#1%f2nNYF
z%(aq-Ud1q%J$BLE>h{MYN}ORpV`%GU8#nbYHk!o^h9c1}0AvR*O268eA$=$uWLKRm
zs%|V-v1>XHs{4k=YM%Dn0MmJT$8RWEQ$HJG&*gVoch0A_^QkMs7`!p=Y6*;L(XJ)h
z@TXf#bm0$cqQ<vxS0~aMwdx&qB_@I!#2SjO+dJ0nUA%7pKTp4olASe?ZQAt?Mn8sE
zb#W7@d3PD|u2FNlV*z#TyGDP?44=k#H72g`eu+KTcp8g4x}a^>@JgUfpSz-;AOmtW
zO~8m$6nGPfpWeFNv4gy<LlFb#gKMC!82-Z$QtTI31y|LBJYeb>9P<ZFLTgvlrl=X{
z_1bUQO$t-Vv;jOsW0%&!H96NB=G@hiqH`BcDku@%59*|3iVRN&hMa=ygF$`zR$^En
zGwsuKrHe9W_Ne9OUu2&TgH>R6J$JQT<sk(pZrAjBe7TwoW@BCMN88}6;%tc+DLH(K
z&-5*s7Fc8*A<cb1Ex=Ofw642$bKfW=u*p;s(Sf?>t@~Q6Mr-dKP>b8rVzBvLmY)$h
zS|=&fhr>4s&#L>0M@Hp#7%C4~$kcmrg{T;<!Bq*d#3StjYZml?xW!@@^gy$q2ibv=
z9wmGkSy}#I==fhRkU0%umyN@i9$`rXctOCz77|{Xk5^x%^n0Qd>zWrVR`8V%9)FDi
z?d(B~Dgso|0swz2qYrO_V%^lBp(Y^AD6!#MjYX-=@JOj1v;WaeYalhi<`8EHSH1CI
zbmMJ-k77~8iKuQTRM@4}<w*J|Sc-sJh}6Wcr$Z08WA*qhS{vRj)%BTcj^RysJM^sQ
zTyvb!M*LlnG^dm%>t;xNj&e3pHZSqOi3-i~_b!0H4tie?Zd!dke+}iEbo{D`Oag}V
zopDeht4Ys^jdo|V(Z~Xj+xAROZ|-A4Wp&8X12*Pv$gXWh7!_l=QnQb`m%qz4XeqJs
z{|oSUdEVS;i496vnT>;eUBbQ~#B20AOT_yw5w8m)F`qBu4R6L*SDnQic58K4YOK}$
zl4Px}o@3_~C2RG_Bf6A&zf=i?Kbz~k;)<oUOMz?3VA#=kpM#s@J^#XnVXDtW_Myi-
z%<{ey{BInj<yAP^7U)WeBQl`vyZdd~0f$_|-+sN>jCUe>c<jyOzBl}pef5}%{jgBs
ziuL28N;N4DmnA2tw+6>8SUdg@)o4}=-=IszTRimahvX;F*Rp88*cXdDbe;M<u=0-N
zt-3#0aua0S#lJBKhSrwPYfd$Db~TH!eL0ZtBatzTG9gBKpN$9GK>B5oI17uTVM_V+
z)I|K0Bz&hK;dYJ1X}%W`cij}_Dgj3rE9GGmsv!17-KqF98m%RhCSF?A+qO|}eu1MQ
zI{yQbH6|023}c+U*-l_c^xDPr+vt4Q8X1jxm&J1EiPn1k5fIBaIUuT+|LA$oThoP;
z^wgph>tcdWK%%Vg1Ruh4CsIbGmxErt*$BHCc`#};AB`9rQs)iLfEwyyDZa9?)`(Av
zLRfSl&N_F#DrD$Bbw>JZDUsrK1<eYxuAyZOoi7$jlLuOwbOw@eKE)wSr&5p*d~iS7
z?AAin2lmy~@2#PI(qG;UJjmU^;~*U@X(3CqM7_Ga`qlkJE^FqKn1%cu6nhs~%*t*@
zyFh<`d~#}_EBjczqcRef!Beia8r+ky&QTQFG7Xd^S*&5GL+GB2gTFaP?Ha0kt+Ncy
zt;YLtx3R?H7lg&x!SFB4PKKzO<sVb@aa<qRiF+a^EcY14P3atZeePf*8k-L#qu7C9
zy!8fbLpE%joh44bG@P_2eU6cx-t~*aK^$W5t1+<A44ZW&E~w7Lv*l&M;w}sBadtM*
z!{Dw~@ASIYwPuT_3RNDlfp0uhNquAAH-kqP$6@mr-zAh$JVO$0hf3z;IfO2vell+K
zIxm@w9X;+M=(&AwdUl5QM1*ZV+avgrupfx9-;X;TKUrhsi64=!H#Bsu8^y-G$@h`*
z^d+KZvqvF}-PBnulU{3RVDMq+5M%$1un@~<?m7FrTa@JfIG47!;GSLz{sT=xy?B|~
z+B>)EJO9P!JCzV-wszoFJ@Ak1z-{ebr>(+0Ph0QiK5ZQcVi6uxt{0Eg!kagae==TR
zqPH^s)xeOyp-m82Hb1N4Ck8$K9`&~yk9^P*h?;nprUBiPW=UxurVkU^Fy;zQI$fe-
z4B1BZI`$AN1J$!jnOM|qxo5)KtP2?!?-vY{>!VQS#?3Og)h5cfY0Q0S=-qHUstNhU
zTsI|*Sfpm6s7v@{qyfrLICtRF*St5S_tSaqSmoBDjF+K=JYM;U<EoQ!2943^GzB??
z-GWoMU_2fnHydRJI-1{*9*AqZM<#s0|1b6f5x%sjq6G~XSUmZ==~R3ufy^P{TYtK_
z6{S+{D;?*M67vubkv$ki^koe9ny4D7q~pQoI3C_POv&rJcNtQv?eH^it;Vdre6_^V
z961AbQiY;i!?QME7RJGpqJdDN(f+{x!a^RyaSCteL^G5df+f++u6mpn@jeonlWB3M
zMeW2r6_7nxxwH{HXHNyB#+;cpLZ|DX&F7(gX4gA%@H2`DO1Wy;8N`QaBH$dyhP$CR
z8oL|5<-_UcB<il+IPC|Yi-m&&7O0*1Epk3VQKg|P_%y5yL9%!&9EN#;5B)KF0%8Ue
zbA6tn!zqk$(Ca!5v{Eej;J_p7Rd9ekcb*(TtA>2$;Cq++r0sjcwy|mosDXdD6dxM&
zL5Ukvqa;sk;OxX7(ZT)4i2oFnG5aC%|K!c>4Nv?BYio0R8UJA!|LH;EKLIgUFyW1q
z^H`4Ag!h6NtmXaZ@<H0*vrk41<>L$0gw0U=9^Mjs@J44UV_YV01f+XMXbKL@#HX|y
zqnUffV=8;XFd<0hjKB0i(U$^)E@@T^!$VW#qYWMxNFWFcg5X30@;=_tKX!r~i5F6I
z0$*FoyPc4CzE+tdv#&w%ct`)(*9`Yg_O;_Y-wAl`9}x4y^N&b8-wAl`Yl%Bn`C4+G
z?*u&e&rW#C_~+-o*$I5(Ymb{~`&x6J?*u&ekBr+-`v>N{*a>>!YfLut^fjoy+zETB
zSqaBRO|u%(yPc4CzE%~hAz!<mw>zP4{lw-o0rAtErpV6hB07Y3E)H~9FTLCed#S_!
z;D*(gJ7F(POBW5%yA@(+`y<rl6yZGX?u%}Wy;*x?_^#-UY8PdzeCQB{Vw1z85bs*#
z@443wBdK7D_Emk{87ZR4?tCrkw_>G(C~*;`7Vo_)d&Ve&*F02s#0QaMBEoV05R;35
z<HLrTe8~2q$WSwATNSKUgZPbfYXzdD_-DGDEn<y)>>4|<Ylsk+ajI-d>^;2BRzCLZ
z-75L`Rd(W6i4Fj%PIL&AoqBQBpI6Dpud)-rN-Pu*bz(t)*NaI3SS267%1-<$afyKH
zM2CPGn+VL<G+@Ri0W&@YnDY@pnQF#deKYpro3Yp4jKB1zm^_z4#pS0~3jIj8ARaOF
z=#xpmt#C1Ph7by^ussSIil(f2a2fQ0)HM&SxLHwJ^GZ1K0>m8Pg9{`P<dKkNM%>-;
zsd@ZpC*9Dejcpxa%`CqYv%GQuoKeH1O3_Ee%J0N1A7?~3oR~1m$AnQn-iY!sVU&*v
zqx?KD+KLOLt+-IyiV3EzxNzEv3#cs#Qp?OMFZ}-Rs&8)9uJICZe9!zZTlp<t{+G>7
zb9w*wH1~fWPCv_ce~odojw$QaC$+)2E0Wo}Z(6<K2n3D02sechF6BOOsV{jC0YNJR
z&bW`8Az?kZ49I1)0|0A<z{j<Q43@idH|SVzI9H>r#W;fv6Dpnd9ea_Rdgn@a8R%;e
zQHa&8DE1?B!|>r$agQvhYHu4qIQNI{-ugkk0)!Xq>(VQBqo)?<mvibve$S>p<X2{%
z>f+{0eatVhJZ1T9l=@V@PV#o>+z6>JlnWit?0vT|zL`0<9lq%WyiwdeVqcGZ&|Hrs
z?_)DCE3cD=Q!Sa&98Ae`GqPk#5w!Uc)b}3(wB7+qq7Okj;;7|?Mm0uxsuEuV^+BV7
zO5TUiWz|G9d1OiGFGl)!!D=#(UTi(J8b}X5)m4xp;Db)96uEGiPU3@37Za!Dt>{f}
z@wQXztIPBMFyhfa+WFs?|JB+ym-%0p=l|vT|Ap+pfo8w~4K79P2RQ#PPyfr)|B@xe
zVo6?$$R$f!p8l7o|E27)JpF4=|I4Gldh}o3|GVe^;k40-=m0$9{BK#l{O`GqoV7gv
zKh62Sa&d4hD**8OI<59izdz?`UrCNm;YmCR(<{(EW6Hbn(yQzA2%1dj+{u4vckITf
zH`rfobl$wdNxgaV*VrDzh6kT(^pQu=7}%rnplf)_B|Hr!l4;|Qy+0FfF?-Um`#6qT
zr!^cI9T75tqNcDYQuXnu_p@OeZ``+rF&a#r&nP=I-@Jh#!1naD<}{hGokWm-8;$y?
zabxIuHl#QWys1WSJTl(CHSFxyn>q$}hR^f@4pbedO(*!u-_XzBtM^sN+*<*2Z=r|p
z_@1@?%h;3F`>p%>x<7VNg!)i~8W%~0RP)!&afAsmZhMP#q`tRIF`xb->gj)Jg7=*F
z|1Zt2wXXd&+vxZ2w>5_D|64hA|2Mbto6G(GarS@BcJ4kSMzwy>8`%3|6UXgz*slW*
zq?b}W-pWDYxNzod8tf}$Y)+6`r|i+cOKwT{0&E0+eC5tcr=>#GDapQi%8K}ipC}gQ
zTh_00D_oqyh+7Q`d-$8(*!9l?_Ca{chg+)sR{Wjs(@@;_nSvy7bs!vG0^V53RY_<_
zD6g`OufXgQ$#tcvq?<`k6v#Hlm+(jPLZo4c_KJr+g$*1x*SmVF3mZO-Z+)Ew7<GP3
z_;11g9Q@Bi)ofNBe3F%K*G1dve<;ri4C<HR1?;XpxSoa-z^<tQ`0M%43M9VN2d(;L
z#~wO!0GWz?mWG@Z)5JAWAf7-xOQw~F7JImhjSw2PhThyj&96!g3=-EH4?utzf}F{^
zufVeR59*!9xWmHC(Q<H%LeT6PW_AO%Yzp)x4G9w_9f+hbRyI%Kor*ki322H+uwF|9
zQn@P*<UL~}OOgFZ<{Wp26gQA0trm~h_m)u-#p!d>V6fV2dz=V7n6IMyu3RMr^TRN9
zcE;K;QbvowDFU`RVX-Nt5)XY%!Syf`Zm-v0WiqD_QC~T;ctsK``$nHoRQ-D?F+9%x
z->DDR-_^U#Do|bb8ZdiM!BCBZ?9Bu3|Jxh6EzkLXBfpVb?*EVRgOToTfazTUi6Zms
z*9R1Wk#^6tQ8zlR>zmP68-E)IYiovcxbCzr2lc@ePvFRBFnl2ZlHcHE;W5*(_ooA@
zn}B1ZwPBjZhq~P{K8)>d*S@Teo5tH~c#yqz9{k@{7cee+1&4Ij292)*JQw!rgC2+_
z?|Xwg9P4m=c?)MXkyRVm)7?;nkNnj8*6MWXSpRf<*?}YIF&wG7L)-Y0HLUDL);PMN
zAlv<^Q|0v7_*x$t-QLJ(w(w5#5+jUa6JJ}S8z{r!lr}DUV+!lvyByVVKLF9BJ~D1b
zqyC%q^*h#ewl}z5hc?%txAm+yZNa-jxmc^79l>epGqCQ;$?<Qcv)bXRs*edOwdzVQ
zB8Vd}@l|6zW&B4x0G66Eh-8hzWbcyAp#XEf#2Ba}hpKrDaVlOtDW1GB?8yjtEv7C2
z$kN+}&0?iajQVBMxHMAhj*`XIvG7o{xXpZmWlFmNtB-Gnl|~)<N*k9L;te*mTX-gw
zICD#2eK_(D>eu$kplJgdz*3B1nQ`2;Ax@~~j@~)1<E74m#uRY`Z&_#5kK4Lv*PY>6
z@9U@fEh?dK-*u(e{#zzoayiy*LWr>9uhRg<;y(c-9aJa{>03;S3iU`crZ&wRJ@Yk6
z*_qFrIyOi?#(#~Y;c@RPMfXkMe14{HN#mce&%ZG?jK6n1s-3~On_yu#G4yHL;6XWs
zyzI;+Wz;$dWcD6DvVo^vU_*%>P$LE1{jU^yolOg?B&Z4Mr@dyTmH8)}CPl1PHZ_CV
zOt7X|U6U7Z*4qt-iqV8ALU6tFHaOw6gEApS*w_zHR{z`uZh6vwtLbk@Gp|A6+wnEv
zZ>`sNmw!LaroKPBA6D?^{lJ1Omx$58VI!y<b)x@AC@o1>*XVWjHjG{T`<6hMg-;;m
z8Yn=*M|S!UI!95f)!IW@v-OrzN}#CO62}x*Jt?G8fpPzY3V>r+CDUqU%w44<$e-hS
zr`ATqB>m-GtD8);)6lZEAa3p>rS*2kY2%F{weHrwN{ABs8`81L5EHkruFmYML@SLG
zH!^z+oY_I|YoZ0`G5h)s#NLEFfV5~ZnI$upes8Fp*aCmpDerX9LmQQZ19>!QaeI;=
zC(o?~wBhud6o*22nvAqRcUz-ktA3r_+lw&Wbv|AAF2NL;LQ<nf#<L3WiWkC|4|<*Z
zF=nTMADJ;DPo}<>jCskJ(@ycI{2B4s<<WZnd@si3cC~?B0T1%9#c=K)%zW2y=q?LR
zJ23Hbde}0gF^U1E_&$PN%-lzOaFLlVa@^Iwnbxgo_>T&!BF;G~kwVWIOZ~`?t^Yar
zRBzjuif))4O-2j4|H+yD`=71tW&HO?`C07`dZ<KUIU(M`q8kV2=SjRepgW+%t88|?
zaR+Fy-a)wvPUW@XxZm#$Mp=6N3d(-%4chfVZwx}%Ys=b5yoa|S=W^!4zts*-KAj#N
z17G?5<m_VwySPe(mq4ipHU2fxwbSy+Y4zx&TsX!u&`kNHY8-qjd@T8zV!Ri#bcA(f
zoV<U}x&yWdlDYW1T5mRM4M36JT@BFW8nc4{h~NdaC_q-LoE=m!?^yS`W8+<^(d>;c
z`A5o6Voz$)Tk%LLHL(ZKGJe!<{8v_EeK_ci?y?Q}+ckd0Z%zKr`3!FY8|q-L52vB#
zuz&Z(ZFYD`-|%zehCT-mNU8aamFC|XH+`p_<}kdSZ#Qx~&5_*Ba3qaqG@~7*CM;a{
z)&bCff5Xazdd5uL(=74-`LFumZxpGp`Z{0IW)E296(CTDpYWnJf|HXPXm~J?t6CE*
zQ8{8@jm!AHwvquAjLKo*tW?xhaOtp81v#K_R;(Q#y*n$M9hEAz>Ir-)e}um{8%6`r
ze7DW}XxT&kux2;-ca#>z`bKS0fm_q=lfy^@+oaZ{hJB+ZrUxH8okX>8_Mud*gcf(u
z@a%KBdi1Ge6ib!rQMrJO$+&EFo%d{{7s`8>s`I{Zc5ry~Tgm%A3WIb@9G42em1<RJ
zqIOiS6^@T<Yz_!x1jTTC*=!ByI6O^^Y-W~Y`~$Vvzl<>9Wa<3i_;V2$=-J69tZ#sT
z3dg_8fn9IIo=}A<E>;J%=xaq<5a+{E=z1~o-v_*Xt#{7q-M0C;--Huz{M)|>@}KF+
z|6^`%o107d?|Z!evy6iqK($j4=!`?#czfHt+4;YB!>wHQ1{xS;;s1Ai?+5=&XN6-l
zqx@)oJ}qMOSMRN#zD&TK-+Ib8;pxPiA|3GB9;`$Erf#H+@t0rpw{=3ts?*5I*Kb}e
ztn=;cX6Wc&uNj}H4UkdP#>6SKYTuw<=j`JxZ;!Pi)US?e*J<Lvfm+c)@2jKrMAcc^
z5ZX?FrFb#qx=^Of69vZJ2VX>G27Omi8mLOvseDgRHN=&^n&iKzi>%p`{-D;ndMhR>
z>g;npBA<8_K0E1)Z$$-!h|##^N-wezV$)<=!M6woW*Edp6iH338E^sswZ<TnYNZjL
zNf`kfw}~%*wEjf>mNcq_7;$3Es_;aAe?LCfh}+a>hcz(~{!AMBn>RRC8YK-6N5zLT
zPJc<Wj$DYx(iQ5t(^8W99_7E&dZTSOD|Y9q-n{JrPVlS9cv9dj`EPqG@3H?ft)>6h
zQvUlM<-bbl_<f=H>&a2MT01Qqd@L1fr)MX{&j(eX7#Lb~ZfS58&Ac$bGJDw~gXTWb
zIj-YfcJ;pUB0l-tKkh}^pNB4Qsj*SaFGN0M#{nBHHLn#Ee1cked{-URTcaUw3N~fT
z`)uGZ_GH#wy&_Y1ga*5jmRfq*0;h8et=?A87CY0l+<SZC3YJ|q8*hw6Yt2mh#8A5t
zCWyl8wS?MUi|krv^y1vbuCKp8I{#E+d~0~q8+V$<rET2QyUh*|S8qHTf&iA~rD6Dg
zsO{o)bn8!J51kL}$q4;F>zzbUIj37FfD;WH<F4m&_&Jc2BX++KlA51i@KyQe=YHQF
zyrIvEz<%rq>@-Fk8jj^&LVD=l5m)|mJpdRv{zgjQ=rZ7}_<v^37yr-PSjvA(`R_ZF
z|2Q$awl=j-1^9Aa`>k|PJ@I=0%q|}w2`<b&UuOLw*7}@BO6HhM;;elk?osQrljQCr
zjWq7KqT7rLMy|6R_3W?jTFlgtZ(`D9T$5JZ($I+%g%jA0hVH(`EI?^95>{vU!!1FL
zpABvc3Xj;1!|W0d>qB8FCzX|q=fp>+_`mHz&r@7Hg5u&MZcMYbTc43B*_g7Yl40op
zpE`4e@3#EiyAvzspnf4URQ^5Io7uXixlsjRZu%*SzsIuptF!R6^%lN1>pGrn%z^A$
zPX7FJLR#qm{ZCp($2xddsXS23E}6hcR^y_WYa?+4?Chvi3G5+lBr=&na(^S4TwP%k
zO28VfN!F3WH7-mKyka>lz9g1&C?*(u(G+%V2z8S3prm0yf6x)s*EcF^K{Hq&WV(Md
zbpHE$;YWHWqa{CD@*{NsGyM*t4B(8RV30mwUe@;01MxOynaxQ4p3Ke9Y!YEyOZDH8
z)ea0?$fwpk7U{q#kxM3BpPdKge12d^tS{CXbGB%@yk7Y(?ti=y03Q7OzqRc<|1a%-
zmgoN;<NVK^Z6crlz2zgH|0VHGC!K09cF2Iim3titmb~F&o&K43<`?0s&#>leeb6*8
z9Kj`&r!TO!!%=I*1FsHxUo&^4NJW_p*S>DBCLU&fx>-e>gH~Eji^fm7)f@ZRz8a-@
z9}>`1(Oe@%tG=CFc%EOpRR!Bwaw~(uBT23dA|bNts#b<UF^QDrDf+!;CinNvq;N?@
zFGmKIK1rH=SJdolWlhz~vF~^}_9ZVz-+`MCLpd$?Tj(>m!bC81^9vRZVI)w}@8ca&
zK-US5c+*4eY6QoheCI58oa7ma!}>##5>cjhrAYS$UyD8yBk@~?zFFQorf3A-=yk?-
z-S3aaRdwK!#^<APm&7wf;tQv5To-(0WUyF9rS7;i<hiHCCI-YGX1e_=)Gdx|Om~*q
zV_bvx8>lpMYddv=shLB=CFsHI8{-F}#JOdN2()>If~ST!)!zR5pad6r(C(PokX>%F
zx*{Ch@xmAtV)z$Ui>=xlgFQF?XL$VJp{6@ad6Ij3Lgx6VU`%hs6OI$|4K$nKfJY(t
z`yn&~UZHh)@O}~J|Fcq|_^EUs-|q<L{|(D~{x>(ba!dd3$MOHBQ?}#ChujX2wkU+X
zJ1$hjDc`spw>l#@@WP?H@8}2MbW7FmKz3#713W{sqL)(x<bwLRGlC;G9F>s|;fbAx
z12tN}nWu1^?%H+}o>LTG;}`TOHxTuHVc|mY01gN3DMnnycAGfV?9zwuvFN|vZCmp@
z{zaqzyt%#Mi~nsc>3^yJ`wsPg=rv;n`2DHxqF8uKMu`Ewe`|CgORy`;)R!^0|85OM
zr0Y)aYo=p=u{&_+9t<&i2!KWM_)u8D9Jb+T0qKbJXfbJVi%BOu+e``kd3oeLp#8Gr
zHaC~yF)TIjNf-=EOAV{6JRNfz3)kFvu0DOSH`W|%Q@Z=&?QT3*cRr=kgCPIIIzK1~
zE?EZO6VIOjv)NdBS64%OL~-ID9N$^nOT3=nWq?7@+bOA<l3L$^&<f5#_@c-3BZ#FG
zP<OYb<oy!le>kk&^}6>E{UgZ#CQAL@^Z(}d(*FN@l>awgXW$U9xl97U!fqZHDj3u0
z(@EJM`d@h`WmwE+*WI*CuXHbdtv7m?tygjqM;={l-=D3?9t>dh2z0%+-4&^1?tm_E
zD`I16%9<Mo1X8vp-x!92#`;}r*dRVaIUvRK$|uz`+(tH>C+xdnn9h?z@mG3cxz&zK
z@2hIHv!f4(RjTGZ`h3b-atj@wh_>AUd@dhVl|uZ1T<EC$TjBVqXlzWbuH2_5r-^ek
zXieaM__(+COZgY$fB!Ss=k70PkH{zZA5EOPH=<Tmsx_io90;Mc4y0NQs>K1Nuf^w(
zT$M6y6-zh+Y5d1hn;mMCLppRMXHu;J)#9rWbtIopk54=!`7E_LrZ)L<hK@ut{u3h(
zOo~T^51x@|)|8=0=twl<XLvPhB&)41a!+<M*0`}7?OLzTbC5M|P}=YHmHc9WhADXT
z(;i|cPxR$&0R=~;4Sb-qLdcM!$Du8a`<<TB5<XB`qQy+nW2vQ8PpUXLD7cJ5pIJy|
zXS&#9^84V}6#h~{9*kinRa-~7q@ogeg;NbmjVBIcoT&=idGR=kKdUIq)3_2Ff9n?5
zhux5#?P7$z@f|J8(U3oc0|8HiCv`<*fv%iVXlRcYhjdNOpRrqCPiDOtQSXh@4s3+1
zFuU5<xWUX7IzOn*umf(imMOPdydj&cr6UkkL`(b?Hpred8Qw&1(qIc&Z;a%c8dXGf
ze^qLy8gII*O!1MZrVPN|I#Xb<U`9NeJpaTLQwovK-)n;P#rk1=sJQ`#PGW$+L#C`O
z_M8)EZOl9U`;6d$fZ*#q92@aIjoTimi_SDo6yv|!7|dSi&~EtLM1~1}kpy)z-}*+M
zpk{GrOmm(tA7-+eK4&dgltog8H!r-Sfi*fXqYetXz*xWy#s?)ha7}C1S;-0Oymf`J
zp6u=MY^r=P6S66DTIM^TYYI28?|NVC8jbgDmD{ADmltY1yY^SUPyf;VbJy+U?M708
z3Y1QFlTbl^I7xTY85&;}vtJG{TrRoFb>Pe?lz0)^U~}Ac;R}C4;f6=p8roPAxN435
zG;M?fjtcGVO%%v&5aa@8tec#7?9_8NY|ifVBxyLf6|toFNXbbrosuanI!N&B(Lte_
zU@}gv^7;Myqx1CnKmJimYLZ?kC+wyEPp|*;1gc-a{l95#c<q0-xANvv|My+$|9)K6
zpPCn)|0u^gMrQk-qMVHx-}KLGpHGcf|9&OqNG}}M)S69KtyRGg^2eofM=9o<YRL$|
zthHb$>T#F9X!JXiptn}=i?C`r=f(0O=2`Q9H+RWm`2Uul|1a&omi+%m<NsHkUVRkA
z{u?3e{~(n850~tJ`7;;)zn|P6k^fuU-uQp!mT4{J|0Vxl8W0C>9E>yr_KIf6UQv7%
zupMl~*$&XYY>w?9teU$Wtg^|G2MQ^7n!{#ql*HV%DO6_BJ}I<YDbgl652x<KsiW*l
zb>Hr7Bz;=06xSXaKmTmt=P7CuC3RRrRa~XI$!w!O9Eo#U!V}*|E<M$hmL0Q6J0^y(
zzu}Bv-yNo}_jHPB-{Opc>P#b#<ZZ-}WKteQ;35Ylf#>Z*gD1m>5;T!0kb1j7NW~tX
zi{pQ<-h2O-vOq3u|IcmaH}c;7e`9lb{(GGLKV`55zA`cy1CtC6ad&T`{~oNqkq}-X
zu=Q?i>{#1LvPQ;zj7-EZaU*Sl1A92`P!7Nz+gwSap#2Xd1!(Hc>IyXTsnr<thP|ti
z@q4S=?0tpJcsc}`BF$Qo%4gxvF4Z}1b;lC}WQ@<<)`XWI$`!KN?0UaGx>+Cf)_LW+
zk?Gt)ZOlh-MUI9%x1-?{fc>|Q_W0c<#-#wErU4r?wU_DO)Tsdg3X~h&z%Jcm=?&l$
z&KJ#|`TmbqvKZq6MId;OP6ewgD~*Pc%J$^8xG2J;B^(5IKv)Nv1s)K!@zH&2_0xu(
zz0Rgrp(!pOeEs@0%ZieSpcY?Et;_p2Ubx{M5wpF6Rj!OL2fgtKxBb*QyY9wyuO-l?
z#~|Y!+BxlO8@_g>&+W8GSZkzKSI8`0dQz84cAD~=iS&kT(NZ&)_DC)}Rm;NHt5LUO
zcgfbM1|Qf~ZWuTdLI9o~2~oAvYt%bK^xP03FR-0@t=YOWVLfWX>!4Mune^MD-#Pl7
zr{9~q{2Hv@F$PdfBr#ya`->|6MHT;|itsznBUiwP-l4}UEacU=+rYRc&RCe62c@1(
zICGVzopjqc#z`1o>LB|9XZOtp#+Wv40qJ452v~s7E&RQOzis&2CIDXb)N!M%4g<$O
zJU)v(!Anqp*mVoLqfEd81=5E2%V015a<hLil6Jo{9-2yXTJ&NWjorqb^_AL1#H1KF
zS>er67+jAr@BI*;0Sw#%%9p}mP@jeurf;X?LBBV&#mhbR<GemNeD~=$!|2-s^!{NE
zam-RA!!}{V==BlHkOPWJ@G~{ua8ILDAvH193`A!{Vo~NRW>4iVBozJz-WzpzG^A+)
z_nOf2DLw4M)@|cNc;K5Mz2jppP)M9E6atpq#k8z<tx*deTw)<MTF$rHaI%5nW}2<~
zb*~FF;R3=1l?n<O8feA`lgj{D3TK6bYUwOx%5Y^rCotOXXaJL?^25H}XkE1cV`0wd
z6+=KAQ^*QrbmU0lZNm+Z;T$-zmNX+a?MoXs6wEi?X#t_vhrX7sdL8Iu_(onG=Z6Nk
zXP{*!4CR;cFYv7Dj7~EM>eSmsz>hk%Z0G=v*#J6X01PUN9>C`=&|dc%9f@GgCasZM
zrp!uYjH7b3^r3VXw#;Q|jd7KjD?eW{v0DBH%8m^=?3f)Jq*mjbJ?Qxhh79v#jA4HC
z46{4FyJX|^5QP}Jw<&ijAKB1{K=;F|DV&UrY{!PjvBq9FK0PcDHA!b|VoxEHV3pK~
z3?)Cmcq0(d^Ns+R4zf>xL-FWSsa(N~(+T6aSV#kdN&5SEX9Ih**%%q8@ljTqITPFU
zCL(15Skanc8cAPM#KUBoXG1A4i1X+Z5(|-N#McyZ3%MzmH43iia1It{414#s!rr8X
zrpBFvz!wju!{`RRufAs78O!rxP`PRi5StV%M8R`F;xbT&w=vkVTD0X@qC1emaJIu9
zta6rE2*%;)*dRx@aaL^(sE!}4J#gkhItKO?>`>hXXB<imY@YE|9AAIXyB^f<jG=u^
zTh!MU2+?#<xKfyX1Rh`4?_Mm_2dw=UXun0!@v-?abjnK)S!5&B-ccj`yM7CRpW+D|
zUed;|fUhQUvq1Lx2}*pVRm?!?z+%vziPnKo`cvUNfsOtOH9i{2bXYaa^qzn|){G-8
z$4sZlC{;YrR5ds7_rXcIdQ|>g(v`3_l-E%S*V&OCo^XAe+p{8E6Ce9<RZp;&dUAEh
z)5J!-ZDA*tk6bT%qY)rEQ)PQeHef?qAjw7+%U%G<Mkm)_)5fQxa>6KM(`!hv&x>5k
zy^+my4Y)?FyZSXHXrRS##1;o7`I%wJLMVCrqDV|2JlG>bMLcHIheJ36kS`y+i7kwQ
zd=Oa}!?LOr!)OYQ<aSp?87ZJZ-?STmZ<+~kZUVwjhd2|EV^UytmOAjsmw}sdta+Zt
z5t~B#HlgnXFwG2(AXUE{0_Kzt&H-~~h**pXp%pnTFdlrU<8wc+^)|y#t&><2rcp&)
zSSqWotTRy3Y>{qEV5WxEo@ZGbG!>4du~G3|sai1N%GOj#D_Y~}JXZe~3q_HO`BCH|
zGKx&I1eyuPVY|RBruW4jd>yn#Bb&BcCfGGe!RA(xQ05{A%3RzFWiGO;jeDWY`|*IZ
z6Gd3-R!5@-QFS556cWiY-gSUSam|+7SSDlq&OwDzx|KNr0KsW9W4_+ml85zcz0o#`
zz3w$GPa3r_y)L>S*ZHP-T)$L`!v=phHHu^7V9@&pJ74|UI6yE$(UYsIR>PJ{S77rs
z4(oS!^#QWTS;fy<sETVsjqVU*FIQk+Xw{XmND8C9hxn1VxNO!qtpo42NA_Tbc8}>o
znMQC%(b9y6#(MAQUZa(F6_zt<905FAuxB;GyAa>1_K<Ji-ToK@0*NWJQe4~XK*Fdu
zZvm|x)g18<(p-bUfeQ6q{>TZ-gFJ(k;IA-XuXxWMWik`-Vd`p_#qf=Hz2?-_FJJZC
zSU(izSZIaWmB6dduhK?>Yiv#FV-oK_y@ijlFpE&EtCBSSySl==6p6RQwW^Uwb&a>i
zhFr$-muY*;<osm-Y)~pio4;Ii%U={PCIHvfCU>;uroM8flst?9k6!#FiU}3?E2d`#
z*!+wIT!?HXcQK8RxUpqz2~7IexCP9EzlFx(*fDIOIhKP+9B4Q!ay=&%QD%QYPqJ6I
zK>SfldX&KJj+Zjoqg7-6AJ&@aAj#G^XigPC;c=hsy?CO6NwpC3VXs2D3nf)#_sRrm
ziS{{2+Y=w^I8R|aLU3eo(NpZs$-6#m9i7Ie+Qz2(#-_f>&<sc=DlQ3z6zr4?e#FJ8
zcXyM|&Mp%n`MlAvheL{N_SYECRUrEEV>=oO^+A4z+Q7B0Z@9u$)P7IGHQr(Hy9st5
zp#oAY`%i4y0b#__{lu1zFk41;Mm*m7E+%=F9t*GFB^$D?E@1Ww#SVif1ZKR$GT!dd
zH5`QxtJPQ)>Y!G$40^ZApG^P2XMbifCOfpAb{@3YgI%2bvAIEq$d#49*+AW8e7KR|
zU7`8}F9c~|0wyUox*QHS<G{sz+DaQx)vepo)@|{dB&DvXmmTOJ!$zV4hxNeVV|n4T
zy5fR}CWqMG2Hbw!b)H6F<`pr{wLL+HIJ4ZsR(|ST`02}9_W*$9HK^cWkFz%rEq52y
zBMJ^l9><jbyyt8SFA%$m+MlV$NCAc#BXP;K!mglkg=vf5b=el}<*q%ShlxpO;Y2pP
zWq5HbW5un5;)oBo2vKL{9T26AiL)LV7fkNsR@zx}T1!pMmWVT`)}$4u%X?+5*%Ni8
z>2BX89`;otoeokr16KxTvYw(i6Jye4ekz<ZjNmh6))9#Ew@^CO-es@y{DJ8qr@Q(j
zLDm#lDJj!delD(xb{O!a13<y1)}YgrBkDRt#AYj?0|gLhn`{*Xgxsep0W#3|>*6~2
z>iP-LK?0ZP*Z7d!wN|K_^*J8_bGwnBnZ?P^^*?lg!4Qr?5O#otEo8kIOAr0{YB~7I
zn^0@=NvSpc01{}a0gpgYFK}E;mhs*4?P!}48*{L8*F&xxxj5!~F=@aUL0zmOC-5Bg
zPA^jeNz!7ro);}%$8zz(a_}L?>sc;7D4^qY^InD`K%lS_4h3d&L=EEV_-c7r32#ba
zU3|D}tR;C(_*nTH8Xt{1uff&4U&z?vOQIDpZc#RGPdLxLRmF|?9GRAT63UYEdX}7@
z&64wamYjc3mfUgxvf>I~(HV8#VbqqFQRl)LbsibDr7-H8j#1~mLXb}?dP!{B3S-k&
zAe)YW`t!1Bg70Jom(J_BbUq4~&g-~z{t>w}pXfx$pOZk5blbzr34y1w*fEtjdjxK7
zJs3C7hjH`#i{$1x4NB*{D1ECg+pWB0D<o&%8vDR<T#nK9FLYb2^*1f{j%Sxp)h>Yt
z0@<7(=s>V2@jo|H{Lkt4n%wNYCa2$Pau2)LNORE1N)93Mb5sy9SwX~P1rw90BPI<G
z&U<-qbY$&tO`UJQJSr>uCE2r!DX*y;=3T2RZ;tBDc}sAKv|%de80$r!Y@X?sIrC)m
zg7-{hkgs*Sp_g)<smIxAtb6S=3nXEt!hOW!LvHr*;f1jDKr!h%V(R>`bn70LZl_Pl
z>Su_w%M;Y{1odFMfoVoN9DQFKLihoM;9_ad5v$%fmi#<XVt$(J2A&-k{x*cyYmL4y
z<G+OafA<&b|83^-me2otng8Vx{lEK5|L-5t|GOXN|LuBsdpy2ZSL*#}Zr&kI-e^Ph
zAa35@gHv}K{<WXXvHRsaai7j`;(oq9+&+INPd>_*<-aYvZp$;mcYIa>K2E2|HlNOy
zTOMrT^a*^qXAa=y`*J@6f9TVP@!Cij-4uFlFh|Mcx^*v~X@>#`#C|Ww5Rd3EA4<Pd
zF(W|PwlpodR|<E^VUI~Wh0}!A50noG*co}gn&|b4WdR7p@$hpFK8t~~VwR>Qa!X5~
zuOz+&`4Lum)cRuMxFqk%=z#V3v_=PPAPGNnz*=c-OB}E{_BJwLpSSm61g|{Tx$ud-
z^88N0s%)_I&zr|T?{~Znrb3#1RgMvZ!^P@d<APzQgpDv%a?E*XcfWh!-ov{4&BVPp
zj(@?vgG)!ir6b_d5pcndfaJdCap<e}zc-IQtqa~(L>Ii*kv;HEmmYXa4?M32-syc@
z@J^RLcuOC=r4QcH2k*J~;I-Q>H+?97JYbQIX2Pq45<B72{VvE@3m2r_R*bX!?imV6
zSUQsMJIdv?2Bz&W^P#@peyAVKfT*vxAnHdmA&O@#;t*OSgAttxQU8y&Vlb48WIvkI
zSq$Z(SPbQK7DKs5_qkYGYwMllypD0QS!ugXhE^HXR<JMIJl)@&A{XXPk$aKu6p?lT
z1nwTUPk&TDIcv6|>oez_dOtbg3nY-%a+oRSwMJX!q-{SR4K>tUa?x2_bn?A3(Rm!#
zEZH{GXQ;X4unXj{;SOsG|DDqV%_{<{9%)Mvc&5KXxbzW56Zdpch!>(E#;z`i+LUJK
zJL>v83YN0{QntUpZ2vlnZ2weV0Sluz;^U+aW@PpFu4CVUL_J$jd4bGW=OU(_6Bycf
z4hoDp2Obupx_T{-6w4z;Tu-h5BK+QXb`;6n$I<8X?mG2)clDRvT@U2{#e5E$z0q(^
zFR&T;zw`NxEsy`#cFtVp|9mX}FE{`9O&wFN!HM^-*F+mFGS(aqZDB3IJf*H-6GcZc
zk(yzZ8L#q8AZcSz=WYabcJ+>qP9lqgTP{l^eKazfJzL~A`D@%7*hc0GYtYFXaSsO8
zWH<1l<LoNGc9&6e>A&_J`>zSVrv@x_qn6ORsWl`=wAB@_D_X;wVN`gUi4w5>v)!Sn
z|F6;jq#hLKD7Js;-{~C1W|)CB96K=FN<+=R#1H{AKQ|_Sn17~ue)OLkQ}GR8@iWXf
zxhwCg1OT>q5|nB4lmS0}V!)4|Kj6pr2mF`;o+bDeBmU;cd$oYF2z&|jCS~4wLCd3-
zi?m4J{M6y;O<oSRQeh>$?-sIb-|kN@-&}g}FWdM<EZg`cF5CG2%QpVe&j=CZ|Iy__
z<3Hp#@v}Gn!&ZJN|36mz2U_5z{QSekf53HD<3F&^gazUYI{c}DA<*`oe{0KOBBWBC
zW<4}Q#OjJC?t>Eug8EjT-~i?e4Z#9OrJdwR=?K9hf}%q_NnD5yiwS}8Alj3lI1u-a
z{~&s0;dLJ-CIq%2#?tM<sUK$3?Tx#w(KCn_QGKVxi$KGE#RJ|KHex8-`A^Wczko<>
zAWsqxe(^rRNna=6g^|QpYE+4d6A;4Vok#u`o^TNewlucf?R7Kk3!j=NOoQaXEahtD
zOwCR0>tsTIvhWfjB`?j=nS{3~-=ui8c|t`Ll+Ju@Mi2SG8VVX)yvPmJvqHHN1EQ?F
zCxU|$+k`n*bF^lju)dtUVPIpe!*t7Ry4_zSY|n;Qsj@1gUu1fiQRjfwY5bz4n0SyZ
z9u+>s!A&af$4#QK5nE#<w6MS@v#^l5&|;zD2v6O(&1OT}l2^vb3$1A{V$_k)ZeM=H
zk&lsw85p4Bqi6Kp@!cgGzQzIF9ys~JhqGs>?^}vt5vT9VI6Z%4upI*17+y^cW5dXz
z8QaHr!Bpa;z)fJjW{?2x6)5E)bJY8`ftfk#IXK~4qi3T_bqlx4MP{lG%x103PR&>^
z<DinW-WRk)<gm9ycWqzJ`cbbBL%s6ntM@kMf%XIEt9N$o5M~^nnqHTR-q+Y7QqzB=
z1&f-V{!`(cLcjb9H9lhamtUV(diuzbcOH^beS+3Aq*PPKlmjUAQcs@_J(}34w=L|%
z^3msIZ!`i_AGEG-{4~l38W3;o!D)1MptxsBqS49o*E9wjPZ(uvdJQS`d6B0ZzlrY7
z{qkqDOtgX$1{gT1*Cq5H^#X!FspLR$5PSa3#>KL<aXB7o!a<?bI*lcNfItyh2F0?f
zEXAnN4cbHD`aq&@+6}-}P2?NSjf5RT0th)NXd+7;_~g~V{riWGd*#F)I`_sNa+t24
ziPXFNOsmAtdZEFwmH~*C0f=5m03x17WREoZyZjMTrqkBB=47p)S4L*fc`pSndNJXO
zoW}SK$g6Usu6b$J>#J&<I<F2qrPZM)!$T)It+7j*h>&81`Dn{wK_NuXxd?%s^8PYy
zZ<$Sh8Ne$j#qv1IMYkiyiHaQmiIo0}jQ^yMy2OHEqGl5lzNl*-h$*|g8tL-(Jd0co
zt`#2R31ygb9I?9Z@sj|Tz{nWX<tQZO!zKr!Lz#;Xe8U~}ddBb$FH~{kY}&(y&1A(+
zuh$>y0!Y2xQzA$O$BOc<`A@=yQVBhE&md3*l0v2EPjMnYsnd(fG(2%uxR<G_VPA<E
zw}qu!^yNcWA@BI0YNGOl_&iDRcy1k+4QSMLbz(uQFpQP!#3Ti>Qr*+YJ&D`~oCcW3
z1i1}34N%c&*>J=1mirQy&y%@a6O2mZbr>b5li&GAo6|{`(#h}nqfP0=t2#A!!@ElC
zBM16HJ|D8zjHh#_rHRG^WD*ej%5i-m-xtDe#@AE;JL&CwNGHQPAOWmqdNjI}o8cS4
z>yFS1CN;C0wgCkkYD<zsk^45UfsQZt6@i5DGHsYJH<;F~6bfSw3U+g5D<quPkZ@i{
z!ft&J4d*p9oEJ1K#qWqg&q3*}efh2DNpS7YAEr?F<fT%x6-Lcg1Zs|kuRg~}B?eqL
zg}aFt{a7h4LUbC>OC)MZG@jSdxTT?SPXm4$&r39J`Di?+fmtqu##e+>vd^*9lT>*{
zlTrEa!0>TuBtgxd?=T7A835IN;ue{V$|6kdCU0sugsI~YX22$IY8ZqmZ}OqpdtGKH
zZSZTp4Sr3-5Ll*1Q{LeB#Ri|r_!xp2X@j@sWC@mzC0LPIf)&aVtOaiHrUt!D{|3J~
z=LT=;Hh6#PQq9cFfW6&}y0@FN_IC5Z_x7#ey`2%e|0tyF`jKe(d2I7DDO$?XYWgTT
zH-n;Si#HXD&S@#yzr~x{P5-6Pvl&Lu=0k7s8D)##ez+~(j7sJ5=4{T|-OU-hyE$uj
zHy>_y&uDgc?&ZF}r|j_=?H<1q7$P<>P?ckG$LvpojOiAvTZqJ?a<ZJEa+#cl%jEua
z!!3Bv5H5I`f=w{85bTU3T&e_{)6sg)N9(4B)^nk>u52t0B;-gEeq@VsY%;aSCi#S9
zCcVs+9jYSn)KGq@k)+(977|fTB&x1Uv^ddIHDjc@1{A7RTk=x1cYOl_eS|X7*~_E^
zXmZ(uyWQ*YQwoz}Q3`Agrn0yx0l3P+EFmfzdMK7Uarc}`>cn3Ze#Xcmt_id@XCiU`
zc0VhVxQyWWr@>DrYL<Wh@^WZ-D`btUTOn&-6Xs@NLT*kGa^vkFnmt#NUYC2!i`ru*
zKk60}B&9o{la~9=OW1cNKhm}nh_ktwThS!$UZgpRQCD**Px&Z<uUm2f*Dd-TDom3i
z_MiE-o_vIZwYqiWb-PYJ(yo&a-F5Q!-gN>Lz?N14EbDqL4GDXK@ak`P%?R6$6?(KW
zrESBK_8n8V?^v3B$J2m+-!Y|q$9f{saq|K87!^%l-1hRw$T`RUVn#dHn7R$djI_a+
zp&QH-ox9Dgu+1dzJJ;kjdr4k%uGx-yF+bl<F_~|x&|DH~4l~Qm<T-C9lOJ_22{I-y
zcb5Ch3)x>LKhpLRh_A0_ZaI^<dyd9Vgqep0=MJ0^9hq#O<><&1`G>yGj{nsp{J2j5
zuo>}xH_WX~Z~QNq>@xn>WBlkNMj`U7u2MiFw)_cCOuvCI%HKC+DTIcUyVCdue|E(K
zMDI)j9<_jv{Jjjx{gUGUNqDgs`Zq1h^wR(Krn#j5$Bh5&B)F;@oz`6o-8_fmOZ4;N
zF20BYP5WwS*b@w{B?1Jl^4uvHB^;h$$r2)y@$R@#F%B}lZf9z=kVl~BMV&?dW})h?
zy3n3ctCfyEl*+YYp<1Y3oR%uJ!&+_iAMgr8j_Kd7^1&F)tA)ak?K%&~-C^sx3nRq0
zW8dqmPOpxR@AZ!RjPWx5M}0LM4aSX;@n5Q8U^u~D5J@@P(-d>ta0)A&a#tBVv$s&6
z@wjQQjSDECx9%iqB=FZhrkek!fgj%hPn{%_PO+va=Z+!C9yvu&T&$-rv8{R%Wx@n}
zhSuuX8hn;Y)+bWQ{g9GPw^6BM@lYz}v?v$j4`H(CKXkR+8=f8iAM{3d3ylA2nwy)x
z_^;;1lK(#z|7Q!ljQ{$h#eb!B*F`6%%`4T^g>6*|cw>-z>XXB{6t(rJ+d{9();GW|
z9G>GtIYhajAy>wj3Y4~NH=w}iD|tj`UFY(MF=5>ZuyLZX?n<%MssFEO<C}Qw#81UA
zty5po=8XwG!fy+ITlCq&&sUxLwb}w;sNEkveS}nuUUPbSd|^}%OU9>@;?et~QqlNu
z^joQ1VeuQ^pPYRvRF6)|Y2z?$R8Od+WxTsE$_BhRJ35yeOBByjscO<l8^0fx&PoP+
zF^<Z`qk}@V#2Y+4sg$zRO6j=tsZ_33jFb09@!Tjt`6D@oPoj81jY;hn&lAVzYe(do
zoMgdzHgHhoj48T2J||_Iy;E=|aU1O$Op=Lh+qRR5?MyteZDYcTZQHhO+kRu)@5%S=
zy=(7N=bW2U)m{CsuI`(@=<Zs-wbt`8=Y`Qr`VQ*3At_>p&OM8eByC)pg*I8G$bgen
zpdc|4%<2y*yI!h;l|w55K!o%jX;FOm1$lQZu_RL9uEj;`EuA{nwf}z559J@{@N@n~
zsx2s?U#eS>TR`9ye?wSOeX$o_)q>H;Gy-@28UA+3W1l*<?xq6;gGz~yP0roVTubV0
zy1aE~&C8pnXD9disu1BB#nu>AooU*Qn*-*-d@hWRHqEtX;pYi&M)P~NT*fvnvV=5w
zfxD~nrzjE|jCh?Y_xIBg%Osl&01s3u%g#g>K>DgB|A%t3j0<29-eZj?>5inFkHAfw
zb6|jqo+h!a)o1^g7f~_fR5s?KKeFg}dwAb*5bxlr&cl^&|Buf)6?bI>cOX3H_ZxV}
zTs{~d1Hz@Ai5pcm5%-=7vO!D8rziOHt&G<LdV=tn-o02#bp)mYM@mF!+C(rfW<SEU
z4vDO(%Grv@;}<7Jr?BUJ*XSOs%sq($I~_1WuH1m0O<X>5l~ZhU+99PG;6tf3k4Qlo
zX3s1~2_eZ!=NQ>ugp9N23bUEP{v7&xT#c6tZlsWVk4oD#d>G0f&w4_0*v@$t_b-++
zb5tQ-3Q6W<iq%UY(#MFDT5bF3$7gO=X0_eo4aO=B?y$CgqcV5~ZubkOw=#)6Bw>-(
zv`4lRq`%%X92JpEE7O=$YA)Koz!bD(ofkfFpyW#8{BBVPxoeH_9$loLG=*dFt$4fr
zev5(?-5AZ(KJAknCgJS;IHzSLH~P*iuKN@EGW1TdInImaS+{hZW+gMTtYeN_ih`Dk
zvoZ!qT(NB%NL+8@YYB@NqR=EZ2(u4J9h$!ei_l~$h*TZ)jiwC97*VY`4mBppD^r;D
zw7;bDAau~SlL#fYXG2}MPo1j?P>!Ffg_{a+!**Hrt_c%BD$y&IYa?EMt}(#M3Y+xF
zlzgC?@m3)Ru-t<iZX|I`%>qF%vTda^H50%vazMqtJez1s9U^D{v;8Ehx%HID$iRoF
zkFF>>K+V_vXv0leb=Xnx$J{wHH6@D9aNgIom*VmOvcZ&IhZC&WhCcf@A7q$meyGSD
z>t0gAo>=ivHm!TRnHf&9S(X7^{+C<8GZ3Vce&;_e6`epo)z9lkWzY{J8vhzAXfdk|
zHh#;^;vxZ|r!#t1c0#4+h`kqmv#?y;<({7E9fQvCs1<Dku@FB4O6`q}jbv;cdm?FD
z(?51Lhu^svS5pa|mM2Ize+H4r4bRI=4-eKv!Sgd))xgh3#LY;(uJxs!oy=Qb%8xFS
z_G{Ae36oED>8%E5fa5Fk?k_^*{Fu~xOy9Q%=N%sY)^ZR#-RF-tr$og(rU@5{VI{0P
znJ-#M!<{f=qQcxQ9HD(p^JPT&@uai0<3P(Fm+iAfgg8Bf1o(J3K!&U=)xzkyU}<PA
zI%W5p1jjNRbQ)9Id+pO)Psb3R<?){B@_&YNvc=)Yrdr-j@zk0P&Y(LgOGPa2w@GUw
zlu(VE=o*cD7By)CC`O-{iXJ{;g@I8e7VQ^?zo<aO6fczlXc&02Cod#cWWKa`ehhQu
zFs>;aEEkBo;PCq=3#ZKaTfDAg2OJ`go6NoK3+UhOZt1fJWu^II+7zbM$c@v`{*v(?
z^Hj47qoN=;0oCyHBUrWe6HvS!rtL8iE}E2SSSbI0if^Tcy*&vrkhyq^UAcXki7~f0
zki&o5ftF>;Vuz%LTyHI#DDnm7EavF+=C$_T)<O$8up{DtT>HqSK@(7C716@){3_v~
ziSXhE*ahWZo=tk&AsSzTnnzfb?)IMyEsI_|GS~RbB=2-$9<p*CY*nq5eUN3-?h}n$
zJYw||a{`&O1PR=eXskl)*uA_h?CMq#7QeQ+-Zk@-$3dRV#wh`gry`)-v7{Ier#gr%
zWR?{|vWW$qlqZ+jb{fg~L71MOaA7VT^GtMjp#yIM-U1AnCiYa;ts~Ln`F8clsce6t
z2|*>v3PTh}Ao^kWT_(l{#8h~w;@F<xpXQ{;2h^_-THi<$dY2Q1+c#TgQ#5X{cn-SQ
zMsAQyHsEOc&`{5iawoL~dmX=ABD>?GO*03uam}WXXO|6fjgC8td~-l@SKXS7jH`+!
z`wUKZ3tAMEi3wnR{-rMeNrvR-yk@PEn`s28?I=f>AUn|x*9$LhWH1WSNMu)MCWo#f
zi-UR4``tIFB<~@;)sF2|R>OY=n*k4=4;j>Xak@4pT=wz`H>-z;v8Yqwd>9<mv5UtH
zfJ$x&0wl*C7wJa<JeVA0Bvtrm)dX{rx+2Up*T=d9d1mnLux-3zLn)fCDI->J1)5$I
z;`=wWy+=fPe&9iCusJ<<AZFT@#M@>vgJK^NSLNQNW$AV|i$~$ECfBT88W$i?hDULR
z6b_rs$R4j1-%mPbUvkYj?xz6`UQuHCu|^iR3UQz8D7-#SDlRCwcOI!l5P@Rn$y#?>
z@}T|GmN))6QEomKwPe=z#66d#<>+oaunhzMHjRHcxiY)ZUuCXs*N$YDyf~}~Z@QF`
z?FAhN6!Gmguv$=Y|JBn2@A~8)K;*-hl%<#CI0!Iy5{RvY3LpbONR7kBClZofkQhV=
z?Gx*7%Enodf!CfjAym-8t7|4h1|?$z@!krxvLgPZr&mW-P>dU$CqX{P)&y(&IwdY1
zAfv}}<Y7YM3`2mN$V7xY7uJ{b=-ZXRi#UXshzH5-+GxzzQqXAgpOBkC->r>BB7A*B
zzuF>;mQuLCD6@DyRJNA2=-n4vg5>aMXout|U_-Ld`I@dpEOJTMZi6gq_o_TU_K9oy
z&|Li7xw*TV0Q(fI+-y#J-UixOAE!L~#kc?^U^_d%-uJ43_FGoat|I;W!V}#@)5p!v
zUp3pOJBv%9E@j>&rykrBafkB+9+ofsGN}cH0Rxo=L6*;!%=rgU5k_&Ycc{hWzmbx4
zNv6Pz2kl{{a9jLC6cNpS26hJbwA`e4{4Jzz$-5SSWe1^vW$xoN&tMUT=~0NgimH-X
zNAkn|F}PDW@*LF9K{I8sqH{Qa8pWfSyzX<xTW<(4poZn+<?rStLfvN{k`dDJZ`*k3
z5^W$G)wURhy0KWK^Oq`$N%!_@symj9B8`o=g63|B>`Dd)#e?@a3bt4L3c@KqAuIW8
zwObU!gt0*8_SPCxe@G@Eoi%}jf}jOTe*M}2jq(R3PNHGf9gqNH8U|`<UUM97!a)YG
zh1bGlCe0Bcg!lY@QqGi`RiQ0k!f<?9Z1HD(6p&?o_Lx^gp3LrDsxV9MINae;rdjU5
z!81ivx=<^-;mG1{V!bvC$|ZIF3gVr1gvsI`JJ@RycgFpT=AfQ3hS^3w+j$eh58T8r
z{cQC$LU0)CNMeRb*9{v{=?9>~t>i8a-sh*YU}rkca7f&cY$l;mRF}QCr$%p`S50d_
zMITKWssUXfbHabz6bpaq<BZ1VN|e(G$e9}4aj+&M@wOrtUZ^69u0l+T2Q%fr9R7!Q
z0#kF(2fN!x^vnvTEqF6=0te!^GnYic3ib{%)T3F>2&V-nd24E5LV(_oGuBq_GdtHo
zJ4;PtBB<WbuVi}k_p*1D(t+fqeEJcTtpNR?2K*=eq=vy?;#~r8=s_DC)8$j3OSc70
zP}6)+=e^y3@yj=MBSp#rrdYef4-V3fqyA%=Km%BipMFXkJR!S#>-KnI{|?;7ul~f8
z4Uzs|mI>I<3Z)qK{+{y|kj@$Y093K{m!x1b2B?M=1;u^RDnm$VD2%|TN%*6;*E0L;
zUU9$RZ(e51Q;Fl*PLUhKe(-aGoSKA9o{E^jygVIuGrHvehh5?|*Tv_6a?Ky!B7h86
zlg<m23^z;1PtIs`AV_JrcqP^_2eU2Y0#M5>ksMT8N%M;-$koPDL-{Okt`WC()pcP7
zi_#>Pk7p3WA0r;Ds(vv*p%r!$*=UUJ*ugkCwtH@h#K75u#G!LYKYMIU^G;4{N;A3y
z;p}B|=A?7#hV+1JIjF72xiGk>{%pK`Y)NV5V2ba`=8SJ*J-3WVApjzhKCy>Z4OU#I
zFPX*ypv8wXat1WaqjF>x4A&7AkF!V?0$-38q2ImkVtm>?&t=D1(?#B}-n5I49;@1S
zkckOr!F>Le7C~oaWSeMkG4W;S#$+i0)r#;NtqpbDICAIpQ0b(Jg_6mHtJ7q1g;(2A
zqGuV`reO74kF<B8bn(Qcqgko?p8urPw+t=x^Ul^1ZDhDF_Zgle{;kQu-#;Lq^7rqm
zkg7os|E@`{%AKtxsL=;gbU~{;(+X(z_5p%XV)WHjEf*8I_%9RW<-ln89XJzBINAXe
z>W5rsQBskT&I+Ix7O(io;tD}D*Ro+|L^mtoWI2*g;GUo?!tIr`IF0<&U<x`-u7)Wk
z)SVIRhwOJ(MVJ)X24{`&J4(AqOe!RD*XMr?3$T5-soZgH7H?OD%ow0OF-InJere4u
zveSPcdm)Q7l*QXll6y+S{^YW?70`1&Cb9Ymj6LjAD#@BB{$XyS?>#c3+x=w>D<aW>
zIlf5}VKm<}Os`MyLC=Pu{88(&R;MevFSKz0iibj7+%t5o89pSWr6ao<Z<336Rt}cR
ze$^m{FatmPJ0jFd7!-<J|Db=N2KBhlVV$FE82p0z#>dZ*HH-GS`(pgk&br*r`YZ!(
z${)?|7rKBbONd0R!#`f$YYvheB;9p!6SuB!GXmC<lR1!mcmaGg^}{}VIp*^XsfAjf
zVue$uipsKwlKLt9E;b%A+hU8~zfkP$AI3j>8F5b5GO}Id^RX%3O{x=lJ%}i)+Ok3R
zk`0i(rI=`Ax^rE9jL>)2zTr$3(M3>dJTb{;VjZhU%;vB%g`vsc-(}d~PPWLssh;}6
z-K%e3_kDGf_~w#9)#TPnOyc6&e~vb(w_se*yC`_y7oZ7a27g8L?9T*MP(-4{8FWh0
z_buGoJ{LkV7cCMw3x=VuelWj$(u2m^!eyB}n$j$m2pwcUUSzYD7z;VgdmC%GB66>A
z$b_33C<o}UaQXR*{;;C+?UH)sZN&a@NJoj0j7UZ42=Z|{#u(%9SONlrMfj<iz_Vyl
zHJ}O&7W7fCHa%EfB*mEp>!U*b0J`juuPI4w5ix<1&sdYOJAKDs$KdU8&u13Y*x3eB
z?|ZV8^+8=!d23R4%MxLO2qVjlAbUU@FluFY+gRY()8d#zH#16qu|%TnCe)qPtR*wG
zgU{^NvMW^omm>_EpS;I_jq)IXaw@!6i@rZRlLF76iCRunTs(P>?AnwKTE;xy%Q@s>
zeI$1IsWEd{s^Q&Q1HL5fBx1B}p=u)Q8%MV`Hlz;aYbG;J)WIq-f(1EYk?UGj>M&d+
zk^>wK)ODOPj>PML7mSNv3!B?4PTem%H|7?=SB<Bf&!W#`L<`^=Vgpchk~eLr#IjlQ
zdWPlx0oS9Pp00QD``T^e(X|l2%M!LD@7Iuw5>`0QoSLoTnc=+P5cDXH7^eGvYYr19
zIS=ud=oy;c>OHjAm4!9C@j&`afq>7WNzhL3_bD@daE~{plp*MXjQ$BjQ@kXdmMas=
zQ$>(~Rl7AbLkMhBP5>vF@eeAi^@Vt;B&Ix^r_@y?E;)0Xp3O*I<4e>0BHUyMBBYUl
z8SWQ4$mwv+^yFH$&Zbpuq(l8Ai7G~pXqo!da0HDQd~N4m$oLypsLG_}=0VU5BRf*?
zC*NeQYT-Fl?fsXko5d&(iv-pfx#{aty24-Sa&oTYxm@9-bUp^oHtxeE!rY(k;o+ei
zw{1{Y{FVP^WH%#>E>Nu*A6TONYX4*JGM*$MQmZIHHdXuQYC-d0lmWw^3qV{ucc*hd
z`@EhClCW}WK%YB0axqTGSST8siePTz?uwb3I-Nf;wQ#<djkI02VQ(1|!P*6J;W}ox
zM%KP5rtAU^bGHnc6r<~FB4<{UoLII4CA6K8|0xt%nLT;hRN-%tL28*Vuv5S(o56si
zS{nUsVH~Fk3EWOg2lVDJ#>^!L>>xB4r9r%kt(H#eCE0hNnM@tAvE3@CTe{!j(aRxO
z`n9u-MqR5HRx<B}{P>peB#Vg*mn1v&jl7tTh!GB5Oo?c?>rXvS;;KZE<fg5rk%Jux
zCsj%9Uys;!xE|B(9<GhLZ?SHB#9^a>O-&}YF9&|~mx2USUq+3Qz(<jFzew-fo>jAc
zuv>_%OSJ(cRh1Kwz@Z2*$jSwytG!jf*`JA&T#?7vFR+)Q>j_Bjv1ZtZup3B5r%%IM
z%dC>oDwX8{J&LgTn_=&hk|FiV#(%?O!S>SvCV%n0K;)DblcA2l3_8&Y;ZVm!{p_cR
zf_L@Wf_v{Dxvravt)~A}>fOX5P#}bo9o){<<Vm>U0A!%maK!jX3ZG7{m^$+!E-TKi
z?x$HPmKR|mnE?1raifJ%#ph}vuBMSOqM8sE=o?O5{o)jPaP^AD%Oic`;(W!kz|~8b
zE98HIM0^K792Dcn-<9Getp<}ZPH87cEimqsFtrYG5k^|+tkX(yTh^@2weS9NIs9R}
z6l-LHxqOlcN5C|XM8#v1ciO3fCBftq@!5&*lFs~gf;@aM?VbD9?mFOQ-c-#s+(Nbl
zcO0?=ZSISVa%Z^wWJN&aFM?$USXpD!U-ACKcz#K2kHr3+V}tf3EA{aH58?M!lyxyd
z{sel@2R1!qXl@WF&cCPjpf6IE9jCJ1jc~4*ycv<1p0!quWDbTAQd7s>1g(;~!x)p^
zjuZ>i^HiQ}FFAbt;&U^SM+U)hnxl5cMGQBK>()lMkGx+6U06=iCCt8;H|2O`R(k1=
zZ{;{8(-q(f7A_}a_6zbQ?5?aR$jX&Wgf`rhqBLYj88bI*gv73!rVpzQ8_1U$q#Gsb
znD{92cD~_$t3{A->bg}zv)9wLT4Dx=cpd7TObqi*O68sfTqd!t2p#C7ASMjpH(5@L
z=hy?hoza8%wHAoD9H?(RU3z|{tp9vEN>&FUwR=1<uPF`c3$PZp7b=^C_6v<?a0Gob
zM?CE*9=#@^+SC%tiU0hwh#t*<dD-_Yi*>WkW>ZfmR_Dw|W^*K2-+d~g<+G_}@(ss2
zqozwtVN71(>+3Y{MIB<Ip>qv4+>(Lh_~9+?DjmV}xu_@PU979LHYRo6vgcA|Y)4+u
z%bj9~(ugrhB;Cv#S;r(S-0H2=dIUW{j$w*op{vf;tQLOj@S-J`&$t}Uq<W9-;E``u
zW&X?T6W=_pa^1?RbXL<w!y9W`fZmIO6>%+ixZKe}mZXiHaIDHgJ$Z<m#Uy?MlPqOy
zGxU(dC@zztJt1_x`EOZNRk~k_(h1X@;UIZunhYsK28)kOLPi~%sB>{W8~5e>AbES_
zFXxThFb3~7JkJU9lc)^Jt7~f2lsV-s`CLMw+}qmY5cj_K3+uu=B-S-r_tqv@T!=$!
z>H&>Ra4PtQ<D(Xa9!_-MnLo#Tn_IHsDdB03;;GbD$gz+(f;3r?w5Uh8y^*nK83#jg
z9VxXQC!&PY?KWSv5yy@KNmi|{6WK$VtrcYkkl|{MmH0_BJQ}Iv9zkUwcM<LGI07Zw
zoPC$*8E$gU!WO!Ktd)7j%n|Nh(jOxQJgk^e7k5e~>mDh>c}-1gjZX!Yi86-eY}U5J
zM9d0HRr;Lm*uy|IyNlsDKmra>q8?ac;~Ve<#Ldw5==lUL4}T}s;x2$|V2qc*9;0`^
zswIyChKp(3l;|v<g6h;QPKE>Qt%zuUsO9cO-Z`7I4$>n&5YpG>6ZXq3wd3;3R(mb3
z7xTrD1oaX%LfIjo%!rIOx}PkH^Ggz7#olB4Z>pEJnCuC)Ow$d4o+S_wvo=dnZcn*3
zZWq*=9*MAokq1gDZCljO-K)l+=Ael8n`UCn@iB(azB<(2G`BY3IAz&B3?|4{h@*x<
z53{w_;v)oeQFF6g?odTFpjM#qcS#ln0*wV4NxWhRt-=O>Z7bsIiEFfvbj`6}!7$QA
zxPHOA?o9Nbz-b%f29~PZ&0Z3+3WC(1u0ScrABt^qi+6TMIJ}l_KHR>8SlE_gwg7P&
zHY0u!2x$qWQ89<)>A20|h5uw+Bl_btbuaU58NHHH!-IFH?D7iu9uA?aVf+(M6m~op
z;wOjG8u;)LHYN8jz4Ze10ktvI$2{nX9zO?&)EWQIn_iH3Gx;Bn)weQxn4F1PfZday
zvo#Ut>x~k<z%}xZ(_9zI9n{SwTu%4n^x>30c3M%m2S1U=`@VQ?!lqO0@KUPc>H<M4
z4%Kq+pilTuU5V0N(wYApyWg`wP;<Om@-jWlcLeUw1(?gU7oHgU|GeDi{^Cv%pQYiM
z7zOPx>ntv%nf|>{&-WV_E<JwEbNJ=ky<SIl)Snmi81Qt7Q3cHkasTGPge!jt+v*#C
z7eWCcv9hw^E+@6K&10nLPz;}MM-eJe@5j5ukYt&XZkgi9eqEHvYDb%*vRC`K4i42#
z)aSnoaRmhpg;xBi6(DBIE462r`TQye_%w~)t{HDOwB4hZp@vJi7j~t+23roO<PDd(
zreau9(g}&)SH>SIvPDao|86-O+f-T}Ti;9#sIh=IfGnu3mo8lbMY=dicA{X=POca7
z&TWWWt8yzVE75Q8y1aCB874frE}CGOkzSfHGw~SMEfH$okjV(_pFPzX9c+}~#+()a
z!dpgNT|W9C__I#?;T{#A@&h%C+xUR|!!cij{;6&4Z$&GXIM%PIRFy6l|Ke~8;g9b*
z3sL*`C%-_b_HaMe&=Iw6h!XrIs>pI5&wvdd9?^mgr+)t_w!laO4on`mc)to(e#Czf
zN&yEj+{=KkbNhm`?JFpb=1>@A1}8IzeSc(=P7vu#p!~#=tK;)ezKI!a?Kd$a$BQJr
z1sUAy9V=IgG;D-vU4QJt^&x+k@qVP@7J3;D*7cLa-5CU@d;u#0je~)=lRbg()qdH?
zpTY3$K3l9=YWMeW^?P{$#ZXii<mc~ZDi0+;Sdh9s`BW-1_vU)5Qmtm1xp}5E%Yj)c
zNN`-|lEqy@rLn-iW{9i@Wt@(l1`MMl-K&<Tv&|1*hZeCSS*oZVl)_2W|G#rGz<)W}
znjDpq@i!*>J^f!yX3Umm{3V3TRq8m5rc02sV6!~ubwnEb3DRX#nAZ?~F(Fcff^A6&
z;Y6DlAr3fMm~;4U?FLQYpurqI9#?t=2f{1V!3}FB3xB>VJ<gV9eycw@D5j1zcVDeH
zphPBk82VZO6nXfPx39yOmqRyes69=kNAdq+GMWDyCfoN*EX7Nw=oDnzdx_f7M1SV>
zictVO<34O<E$->{^YhgIb?BZr<~*6pN7htlx%4eTo^|7}dFIdH==!aLD^335@*hk_
z^uICLq9_$6?9$)=$;oi)*y9{GBouv<)w(F$Om0uubb)pwA5BAnB~Cr5gR?_x<86zF
zJhj@Eua&A?7<&JzH)2=BqQ#<^{|6e|ybN(Itl#Jwfui(&>90Usbpn4r`X*yb%^l(N
zc0)y{-+(e2xNk7*w-=o=+spgq4{bgEvt+&z7zJS$o9@l%_|iY$IjRCb0W?go)PQwq
z`nUH>WpE0cQ%`$aJ74V1w4?im2b(9LE7Smi>a1C;B2t^e-h6qJ(A<n@t*Hy(P^wwR
zPJwk1cOm4Ne!g8Q!GWPy6O=<HgNe}{KT>B5T3j@~L9he5UNaUZ%w9dBzQ(kjKTFb7
z+qTErbKHR9`tF^IyA85<;W3R3s-?zS>TkXWMm`hpH$UW^jZ$bRk|8L!k5I{7*6pEN
zs0l-U;HAWP*ciud(SN}BcV9l|JYBcpjy%_--QucZkiOoY-R|0N4Jz-9e2d+RvUi#x
z)4Jh8y0McDT|o<_IcH4}(e!ozm?+r7QO@`UKM1oK$lwF$pfZTz7zncw7}*d-uNgfk
zSzvFzMWIgE;Q&!RH#o^0G9{Ad41AXH$<#2-lbGCV6%>F2s^|irR3bcu&)yg+^M{sV
z(f7nPvIg^|;2b~N(41%0jPWH?d#r`<QA1ZS4Y<r5e1Zm6!WiUR*QSY@h~|lv7JjAV
z7Jfj*M)r@}@F&#~8oulwHg;%7D+aVt!!`GJabmLTGl;8&J{S7SmBC%+y^U2vtPvw1
zbFvTi=Drl2jx3#{JPSsx5-m)!f_H_>9y8*=j^1}d)|RHz24F*TRngZo^JCA748%=L
z8Ax|Ec`Ia#1<&|qG2ra)R*A~3gNq61o8ANJunzlPx&C#<`5XRQpIzrRj63fKtoGLW
z_I5Kz<0pb>-qgi;Rn$jv)>@qE^vTx`SOHPmdaL8ZiSvZCnWe2#NUsaRR^l$trV76v
z?t@SH7q1ED9SPNthPJ#}ei^9g#<c^si}TXKs{({~#*YU~i)I%sTeYsO_Ny^ZheWA7
zEY^HXAIy+VwzF{1MNSxxacc8A$a`N5ocq88$0E<C*IXQ7D+cGo(C>GN1XSp7kBk{5
zdYO@Rp7LLeh}nyhh5zC(X`}yz!?YIsveZzM^3!D1rl;6|rsOL145wh-CFL@JhXdM@
z7zG6LI@(%ekM=2jEeSXjbs=3gS-L2j5{LSa`T*Jwyo20vYC4YG*;}z?$q#j|FP;1d
z<O@0*<5j|52%P@8k_98tHn)rYVVjV%Hd24J604*;sU+uvqWnUf-wf)t*ewf)!3s%!
zb4)X<Bht{<4y9<n8!^kBJ-}al3_24+?Nu4FFcCly^B$Cj{QW2i;f1(Ly@Ug~sR<93
zU?L&VUze1N!b>HB>>upf$TP*~jAv=IG$&Pj{!bVdbVS%rO(NYek&fuV29#BR!-qI&
z!&h^hOV>Q~cBREE_8!^o)V8cRUQ{1xK3=Y9F*<80IITD7VwF@G)-Ensw(!%fe%K3g
z{Wob)oC>-*1ESnkRi~vb>vcQ476+5045`SaB0!xYLkKe}k!h27_nP}7l!gLn?GQ-;
z+Oy*rH@T>EI+#NM+;F9u4Gwl>kEs?hCkECG8xIEes;<CPzbym_ggTzEP4XU71;6wF
zOgJ#R-2|}5!E8u?CWZ433A82$1XS?&RX2{3)qWP%F+4Lg;$iTGT)7JlF!PD_u?|Qx
zx%}n%Np-)BSfA-fUQkn1Fnbg`Jgn*TquB-PWcAgyY=@G<({%qoI4tge<FF5WSo+Za
z;xOU=1BV@r{Rf9}{QJgX2mcd?{mT@&x*Olk|ND<L>n^d~CBa_tkJln52QH*2{7;Xy
zg@%Mc<=cB~qHY4ZQhRmkF<5A~K5oNtL^?a@33}ognLeC}d;R_7*y7R1`2SB9X7i*F
zpT7S8WMSeN|C5D{r}Bpc`W--W99X$lNV*>3+QCV^PG_f0H^?2Q0r)l?vH^pv6C;o0
zL|#nM)MZiy=R5LGFPds=N{-!D3Lykvu=DgyhfR2(u~{v<pn_9ZZ~42@tGmB^H<XxY
zzjiSoLoxFHo$8NXB~%aTGH>8Nt=^*v7W0I4FeH&>-tg5XLV3GVz>`bNPFUr(^q9Q_
zlz{&$gBUaAi?yDHihm@EE$k$c@(_-{3k)g)w)`X`04v@?TN7si`JT@)J3gPAWMZLj
zZeh@hiWtIs-#a^>kIoE%9zjQsQI{P@>CcDylLeo_VlQ@~FW^PknM>oy+g=0lugEwo
zl;D{L8M8+6575t|LiDApzHx|6X{~Cw(BL4rvFj}RcZz?qp6O-^aubx9hDOrh7>wl{
zwPP7e?6ufM0g?xIZXrHYRT&S{O_)o%Lb{AkkTm_DGxmM2giaeC6J|g=hD#vf<(FCO
zB=AK<4+!V;N@(&2gnNus;X3SU{{$2%_L7-shSV>b`fSK7i!gu^^YC8gVKu$pIHqNR
z?HMvQL4fC!AWtG&;|06e$NTI(QCdvy_GIRgs%SAGF-Rp+Q1T^(rf_x@rVe^eQto1u
zg_o3m-z(gK$yjTskvZ>JC7E#`fz&|q`X!bBl`wRngBMqPb=KI$+KV-oUsGN}fnhv0
zk;!qHfSVsq?tNYy;fn0?U&f$f%6nb;#0NQ~aF)1iW=S-?=k#bfL*Cpk$Qx3VYgm~i
z%-H0ajYQt0P^oCQ|MH>*i0}tg4rq0$te-~Mk!h1D*)zO<ui5x>{zHFsD2}w94F%ka
zNN!%*eV^wG!S3rk<Sc7Vn!B&bwRenAu2>XuI>dLN#(V|W-jmC&HdWn}jFTzlU5;(U
zrDQQ(`WYGBwmOuX^as(c%i`6mhYB1jzZfyXNigpVM#YTL(xkbO7(2iB5}k6(c-Q~H
z2es(`4Ik33?EkwlS+<Kj8NB_yBY10g)kLm;1>~FEZ-l2#*-2>Ip3$16BQJb0lV6i8
zd?|^_x)KWSH2gkXSCHP}cO;QIV+9W>ekqv1LJLzg<X~{te0fSD^WMh2)GT}D%kF!x
zH>>TbE9<U19WcIGIL<3B?Y(t+y<NYSI_t`h6fUeY)AB?lvPkkZtPnjRH_A*?MM(-n
zM^}eR@3h1Dt)eo9Cj1Q5FPEv~pSbDrm5BFz85qT5B3mR&+;CsN>uBU(7A)&~Ht8ew
zQ2aJ{gaH~|28FXA2>w0;MY&|ZOT5sL+{sT(USM2&XVRXILzmD|32%Y}J`cMqtabAB
z`vbdb@q&kBI}Edd+@FVFf%p{mR2`X?38XNylh=9Xgu%MeJDAvmtjmJzc$9=18EhuQ
zG=x$f)x)~-MAMCfTt_pl-kA5nOzuoEg$U+;_9xaIuNF5{b~3}(2j7ES(L9vK=s#QX
zqDX#*KVjY7!lzVSYgEUQ;-|__BHwCwe`%4F!65*1g8(npkt}t104z>&bl+q8(AN$i
zpm`_2U11_rU-Tc%yA_jpBYRi4-jFj4il2JKK$Vj8FNMM4q;<3<iv+t-qU0z=k&W?M
zem{#CfFO&!>#44=iJ!j4f#rsu&~%`T;*G}f%lZ7SWaAXI`jXDI&b+v&_~KnVq!bgp
z-y<@<mr>o6*>yjjC4zaQ`e$;1(uLqVY!i^J<-0sc;C*IudH(Bu?)#j~`wZdyR|BCb
zux!=)j?ri3jWs{5WP8-f#l)9&<~5%;f!ORnajW6fdjPI&_B7L`ceiVX0B$G-jyl^q
zAI=zT!&>4LI{<r+SD*r<BShs;PQk2)8nM~_?Z;CFE7Z;1C1*|m{Nd?<4hChUEevBs
znuDt2R(ncTp$O;?!yXfrFm&V8t;C(!SmfMFZUq?%wf>;-u?)BL*>&GrP*7e{D^67U
zty(&e04@UG={_@Nd-cir#-O6Ts-f+_v5*yfmOgb3I`KoQ&b^)&tNGv@u|YZ9`c;k1
z<Or*HUbxt`&kchP@Zs63C?j^kCAO0?8JQl|j=mB_gy{3umh~!-Lvy$Wr?3T91FQn@
zovhcl4?hrX_MH$ny);mB(poV`Rw-BY%q>0|*uGwgBmD?wlSAq3`8c>Azn)DK3!3uR
z!p+6*dFqkB745Ji2<*Ac?UC>!XAAKLwgL}zQ?|?Ibg%<rSxbNM%=?t*QhN8rg>AEg
z!i{~~c^IaopVO8|&P$uXf~vOPgC+CqEM8sB%h9%nVDycr>L`$9thNErO?4F;DJIAL
z2G^5Qju`$m-|BYWx4JEx2Jt4Qku|B8Y%haqGIzv=e><OUDSbz0BY|Y?*RDEBf2Lme
zK(dz%3QIa9IgD?(COQ4BZgWEtBAmE{;L`}#zj{!`(~BXgO<Uz62fGqZzL3~|9<hyb
zJ>nQY6kr?k^?WcEK#Ty_*EgR=fc29_pCd+Vev!$yJ-(9fF>#DvnD4S0MBTX}-IiD+
zVponLfnre*N30+Y1WGmIs26V@Az&gCaZjRF%y)DHz6Q94bX(i48=jD5YBXx>XjqnF
ziM)<E24jlq@v@4{%0h6gvFu?`J-et&t;YMS!R}Ykk|KaQ9O2c%x<1>Q41+%}FZ|0m
z^J2w<45_$F2dAWg|IG2&WXw>ui@g)}myYVNsHw7E*CTCT`~H)v;R8pIUPS#*re-8|
zfZ3tH6{op(iyN${ZrPySgk$T95k=Z;ph8tMX^xR)ex}r^_l(VS2^v)Yw)BP=L0Z{E
zP#VP|YIUB(Cpe9N>X*RTuL$pfJ*<>qdQ_JKjtqDk89gLQl*r<)Fb;{ltuM*jeXuE~
zzXPKdw!JO}fWQAq(bVhzTX;6f%qFDB-9p5Wj}AobX7}XSK5e#7(Sc0T_EzLJYd|m<
z-s>xM?kn+*qCXh%p}ajH$WcyFIy`Hb8qA^o=PP)-KmOezkSOdtwx1$cAhw2-$RyM-
zr<XG8CkpAqfbOo1e9Zjt_vDom7jt5M7~noAi{!0*&X)X92N&?n0&j<ZFyd5mTQK1K
zDO<P=kkB0Re-73<W}qiZ!H5-9eQCfn(m!PzX2jM@B6XRPY0&$lcChH-vP)fsz>jTv
zaoRY1B~0=w9Q~+(J``Ifc+llOE&qsgFmxl5fY6pV-^9u|V(Bb@p!;-6=@zkL(Z8>}
z0{!RP7PfjNRb-~o${FOam?wj@njIP^Jj#ke3mA6pqzOp3Q?w_)k$Y%LOe;9YrNMyd
zuRkR#=ZG#8^>)u_xNG`W8rS=uWU_8(2kTzxZ5Wc7i5J=j5@GkN$kT<sF~r0zw#w{>
zw9N3Rn%)F|CK=H0qAwGM^;rjB#M%B1%ZuXnuQ~^tRidjaA{TH42cO;zou*SNVn(^=
zV@U=>h$&h7j7^8+&YZ|wARpzd!~X1Qq^03KRs6*0svf$iGREgR){+Io4Ac}K!w6$m
zr=aRhX6#?)BW@lXmnET}3?Ckrt`kN0Qz`YZq=k%OC1Av$U|fkVr%~AhK9!($Zv&6d
zGXY<LIIvOTGGly1(mBhe%8)E2+HyqN&OS9?k0f3!>-_%rgeC1AY0H!@f8mGi-Ry=s
zQ&*kIy&W42xwu}`Y9XhSO`(u-v7L=0>vG8GwCXZNiEc)#af;HYEib`Za7~Jc12+7S
zgUHY)fAAHHR{I9DGwe?`vZqZHM)#VmC^Z60iVO(E9#?|`|4e!ltOO^JB7^5mloh~B
zasTb>d1h!&hSaTcoyPRf2orj+d7*>Q%mfFMemsa&;&?;fPA<=96b{75lL5_^DMHs<
zo#_%s_?5qF4XLPZ?Cg_5bqdi<C3l)o6Dr&>eCR^HN|*`*$Ij;ztzCv+)&KCQswz4k
zwcdj7U!dI&cimOZj`HJl1<EyeuUV)Xn>jL;eG`)$rDT&rr>3yTj>_86FVr{(;B(KS
zCb-(Y^=v^Jz7^~(y5%pymb9JCn>C=F^|z)?IQEr8fAcMJOEdsWTw;=e9c#FMT&+{h
zdW;kzm(Y<hlzpzA!EQQJwm=oTA`w8dN$9)_kN1A<ONu`t=kDlKO%IDTf5|O!OCb*F
z_b8F>l*ZXDj&cso?QXRO8yo6yKg)RQ{PC=!Xk0(p)}!RRe+DIMOT{~~WSlTQV&%j7
z@_E^h|1SLdZxan&LK@?LH$kBB5fo*Pe2mO7wbOZ7e43ZUO_o+pN2NpB<VP9>>&lhh
z_w9nM%x~X6e)DkrrY!en3iUe$4m{ZOTT3<CTRe&k3?Zb=FP9V4zLhPE)Y)_pJWrYl
zBN)TJTt;_Ph6^k?ZwbS;(WKgpBsp)t6phmqe6W>0P8DmNsWgsMfO>ajto--=BEkvb
zR1Jdqc?J$XFW<QmvVaVHMx*%zHKT39Wi&g!wn+9*9n|Is@)C`OFlxiS(M~7z3kzri
zESRzh-<;Xe&D9@Y7vrFURe}xRFs^B?-7P2Y`6TM^BYl^ig_Q2}k^?9=yAD-OKtZn^
z;4(+`g^w@)2l_hyvnGn=uZtgGHm6<0g*NL*?g_QpPucf3LHg@-#CsHZ!Te^xXE*V9
zo3Uh@H3zW9JqzNxM-#Fran^ZnamkC~4eITM0AV~orF6I4N#5NO31vOXKL+_{nBxC7
zIV(dJiJMcHh7sFEIvaa8LB}(N{oM78vIXMZH~%8z=<rqe?>mAxJbtbU6CMVO^1I^g
z)Cz7nxbcR)BWYRhZt2|-Ai}u~o=Va<?$^>f%4dS~6tm4={NtN`Alu2}y2#+PX|zj0
z3?zS5lA;!pYM)2rXF<9su?I(^AhfDj;5?a(d0oZg`aIxIq9i;|)87t2J<*=T&Gcq~
zbw~Yzpd7=s`2NcCEXe6yvERYobH0)f3bFuc8bi0H`%z$geTxtSH!a&X$0K5$?!?Eq
z1Mv@;it@ei5FXv4M@C3mv;&f4Ve+o+k7;*|0h2{H3?nTmvTzTXN{0&_qE;o0oOl$>
znagi_aP9;X2T&7$P?Q-nj}Iaj{2A@12-K4AM1H?lfCt_jn1eG|oA=)!Omb)j`72oU
zcrJ=e^)J)j{(nY0H117!-)|pRhyd2j=QyyoN{mr}dw`BQ?4e@_n5GZ+-{w?=0jf9R
zaB8IR@98>)Uq2r5j>!tEsEBQXVJzT>6=(}BfZHRZC-u_eDK5s&n&br&GKq84*IV{j
zFT$!kg<dfGCT&S+bc_;XcXthm0CEVKln*+5pOn9ZD0qKW+LE0<*=M3C@8Bsdjr5hu
z{ixL+8OwgBD}P9<DA%wR(f!8BgP`G;`ujsnmYmj_MsrVPOjV9N_y@x~d96Uj@ZSg%
z>pD1n$D07K@YH~me1Dm}i6Y|xjhXdwk9=6uO11mQ-;=#Iq6VxhHlFb=!|7I8n!9Lv
zSNziQIopDM0R-hFJ`yClJ{C&{(4JKj)#}R&<DTRC^s#mVusM%AD~?Du-i1AlQCSVJ
z<~AX*-kvGV^w>b34vmi%O@`#a=N|OmFV{tTJ$By*LKL76ZZPo4r~`Z7>+<^RxFwFs
zj(qfcIi|mTD!jeJTKJChr7Or%`Ti=>s$fX8sesvsFT@LvWVqB5qOh837P5^IuwfW+
zgKwwyv;UNAiQFZPxJ}Hkf-=VcFq(Y#m9#Af1m}$mcTzk$k^`xsPe1;Z-il$ckr&(?
z6N)(++5Lp-hY6DMBt`DYAIqP+(onM8_^fhep7ARALR+|}UTDvjBuD5AzbF;|h33fI
ztPy9DH=vO#DQY3a#lK+GD1r8IzgUv$g1IX982N;$e@4S+^(%B5(TixJT(6>dRczHM
z;Cht?jCrYJc2>@MNX>cl>_Icj6>9t*`pGQGU`iOrI-CW@308)doWQ<nhp;_?ae+DE
zC8h7TH`<2|XAc3c(+7eH2VS_#PncQb_TIJknh4T~F9))n&GB_*9NeS5cP-kWj{veg
z1{2WoYqtu?o%YAJ5y?$;4L@``Iu7Dseh1>-j-%sp7y66*Wo__)gAYBoIm90JymPJU
z0`boTT3TJ%ngX3L+a&?f7EgK67E8u_ZGO&tV+7JGvIR#Rr8eZPuj{5mHr4q%BKoq<
z5NRGg5%Q!JMT^ta2u#N6B_wV^I(cWhuI}P#^^E$S-KvvkHo?q%G>8%^ei^RdWf=dg
zjv4;Nl<@e5eO=w0#zWPDLp-wBf%;@x>9#`hm3+J|1nD`+-&09&Q3-j^I~_mez_lAh
zuP6O0j-<O86uZR-mdH*sQ2dU5`<d{qOMgc7(TZ9TD6tyKhyAMGg@aaiHj~cy1q)7L
z3!+pzfBxI^^TYEqJ6aE@QR*K&#YHpp+EwbjO^?s@bkTI`P7c5hH$)ebZAQjgc!~T$
zkuCxcp_{uho~e<BD&?R<oN(inu*#$5A(UM*=$iFXr`xi_+e}OPv0`2K?3dgUK@RaX
zp|#hO91Kjax&nR>-~MUn{mi*w1%MsH@?4JhGCkx#ci^y%q32rp9e+1pj8FIooyKMp
zF;8?~vcf?;sa8LTZmvA~ux+19>Fr3_Q5-i$4u9VMTpwg0_XBIKt@duBY`>M1OabAu
zrty5-ETB|CPs_^PbR?*J&(SXsJl=&EJD|npcfAJd@z!fy+!%*~9k6AsJ)CoI9P7R*
ze%;joQzmwYD0K^1Irsy1rqd~=eag}QM>SW8Pa8*4L#>=nu~?un0v%0Un<YrSMc2j!
z<$+4(TEwyqOB^zH*E(xoUR9lktz{^kR|~VX3n{B;qJDCD=oKlUWcco3C5e?nQ-Oe^
z{5t-ILXPlIj%UQwI?^Fu3>5F6HC8?E@E7#WuZr7ndeLO^=t!^kjC?*hX9MFV!Fl|;
zJfY5;3L|tO)Frk0J$ls@di|yNMW49BBWs4b&%Wwdz!vn*n-OjvD!2~AZ_F`<FA`0+
zrD&w#IZVZJhD<oL=62R6-Zoa(S5_AnCu>FlqCwQG{G9{*x<xbcQopk;9>8P}G}zz>
z>G)TJ!_QIeYy`ILF|O8CcOBE%h>kR{rzoxSfwSo~LXe#+!$`qb=HcpDc$m2Du#H9(
z`LDFUyT64F&}Uyl+wC6KdgHhHU#Q1s=&Q;-EY;6vw#Ynut6!BT3Xcnj=WQ)6h$I?H
zGz{f!XkidA9O5iNdUcmuV_md?zm~%(2y}3dw{#!)D=Z_~VLbzRGq7$n3EmauJ%$?Q
zx;lM!h+6uc#7NtfLSKW>Ce8_3ezZa^f095b<!K4fIp*>-wx0E3Zmzk@7IV6vgu@;e
z@n3Gp{>F<QX;DGf`FQLO!bFq^c^~<p90||@OP_&$tyZR2?e(PWF<*kJjYP*02qJ%G
zPH|fWW+T;&Rck?S22wqDo(EJ59f-g1!EkK~>Xgt^eNQwm#B%vj<s}eFV6E#aUo`!1
z(PngNny<KYBQ_(4VyOd)_N=?VH#peBmQJ;I23=r;2YD-S+HXAUs@(rdct+4f$Sr$m
zst?weoH%=H8}a@XrR&H$se^ormaq3pWcI519QV<upmVA)eY5Xs2?I14^_}<dj8g2L
z$_00O%6GPlAo$BR-p)l4gUBURCw^D)q>G-`lM?AQ+Z8&&c)S;cAm|Gb6f(u|%hL@Y
zh1!aj*=sWN<H_;e)LxT0aAFz`Bi0Mr)M>TJHNfj~UdI}<lUFEx=Xu73L0idbZ9a9v
za}&otB6(jxbjI#_Jc&plJTO;qV50v=QxLYE`jov4^MDV;4>Nbo)ID|mkQ6i@_3+d^
zKQ|C)&=$uX6HC9}n}tum%}0N7JBAr{xrzOdggaT{R8_aO9S7@IWw<d_6i9`6)eoV&
zDgMQmx(`O^VU<F85;XKFrtjdFhXi#Ijj5uuxx*Fleq<_ym#P)fk)%wdK@*^F0sVva
zdj+erDF@+{0fg!K%el92TaNL(TO<~qoJM@!Ue-~a;c(ub_S)W!+FjjHM!@#B5dDu8
zq+l?)fU5v(3u{`yt4?^@jI#%4(W{Q~^zrVUEe+<!F-yi&>6*1^SErrygco@SU~4h!
zV87pbxaz|6d+6hT4z9HC#fB!wFUg$m^Kac{Iy-J6BEN$B7jzYPkKAyML|b<GA?Tsi
zP+Z_N#}m<%iys&f%>gZhomVEfR?$QeyYloy%GtH#OFE1YN9|>)=S8D!4D__N3Sn6F
zL=C5@vc>bpC{?7a*O=9lx%2U5ad*mj5+3#(2Fly84egYZLtc-Srhiq{{Mjl1@gEKA
zg5AVOJ(9vWg0d(~8>ooehBZeDphOmP71wB)<46g_o#P#v`l{ldWGU@)9GkL^13Bn^
z6s`P<$oj+ph`)URfh$PixW0*)fgA=cfmYvF^2S!Q`hGxBY*GzKD}(G1y%ddYJ-TS!
z5zP9zWt4}y)#T@QZ9|a7DTBFZ(-mK+%kh_lmg_H%X0#5pElS-8v}CL*nx$dt5|WSH
zA~eU@e7(GUSLMt{%B^{oWu{6%Q9-4a%eYpBJh$T#^-R}UkTp<i{PK@O$%?Y9!JixY
z$F=IRx(kiC;wWl5+~nG;)N+>*<x1Xeu)pWg)Rnx;q-D_?*p<vI83G*uld;J(Lm%eH
z3mlcE-c&8vNbI&=zW6PhD^Yq1fWNMadL_9@u&&A+f4oAA@@9!P4H{GsjFD+mDj#6z
z@Jm8hrXLS_a%<`BPIfuvozTJq8nDu}N`5SGnqF{&Y!Gd*=N3FTC{`daW2nQ1gMJbk
z6VV(F6hG*RqCBbV95RB|jW%28R~x?F2{#?08N1!7G_*hm+HebAl%^g)j%_TcS4H$;
zS%!{mX;qsn3Sgg1*ey=qTx(|UaEv%Oqn(KV!Kz|!D_k$x6ep<fv)jpV?Hb5g6m&Vi
zRBsxye2)@ziT|B39YKT-iMF2{VId^y31~jwT4)wg>~cAw)mU0yC~AIWtE?))vEi4S
zT^=}C2sGtiJ=|G9sHy4IpYVZ3!@u!b)dyBHul7*lU6dkl`3xd(!Jqr9L?MLj`NIK#
zNDf6u9za)jKLLO8+#B-umz?4@{4k*A+BQS=Ay9eW&z6}BGin*yFN+A<<?tuC5r_U^
zm>owHp^{hM1x_SVI58b%*Vq6~VlRnSv~{FA`-k<wJs96GXVdK^cm#i31pvCV%qs&+
zGO_b~P2}!snjq>TVtb3+;}e?zO`rqu%Zw=<&NH}T3|9|{JRWMcGK$gWXzRyJu^Zhc
z6`~+hgGH+Vq&_bWkQ)oZD;0QnWyp1saFlBo(`)#lMWmjmK@>%6Zb%sB+>psO)<O{2
zp^H%5Ft)q@o!c}-#`BuyoycWYeD6ZW{-bEmOVaDf+Hd5tU0ZIED^KMFYq>!a2Fk;1
ze&Q}#;Pi{$Z5GVc;Pv$<j%jv`9dt<RAX0Y96S~Dl7cpYcC|FhY0j_A<67Ka`)&1L?
z)bmB>f=$kH`*G8i1Me{{JYYmWD3C09sMr2Lib28g^VXc_fa&OuB%{J=pdWAZz{%Bc
zcTShLl+5JgT}^NE%lXoJshg34ofi{bq6yW~Hf!X&B<LiaUdaGo%?{GGIS#86A2$@c
z6H`X_({mqg`sI#J`<f6qk}9qO@|_0QL5+*sf(bYFo}FHIrJkN4SYYPE+sad=G_zM2
z0fnD~D$y6{`ii~c`(m?eb!6y%0HUpDjzl1KB#H@H5aV4e7y$>;B&+cofkVDEk1!Nm
z2@xG$2n@}a!bUeI5S-%34~O#uoUbmE@<Aj|ad$>y`hLz2jvA>@1tNBEzaVNkD9+eg
zvBK49yTr9?o$tDLUfCwcO@61_#+#+k&Z7V-DErvy=f_~ykG#$(ZH%%c%`_}K^mP&a
zMTS4!7aIxC9-rwS-oDWgu{pP}`96Ed!2aIMo6nqroVpIH%8XvEP=vJAM0(qRy6N|>
z(~GA2C%LPq>>*)%9RP=T&2xmuoTEuh-N~m9>orajXP4D`ri=X5_9_+t){PWN_VIn=
zUrx^U;T=RSm&9Cy^SNZ0U_RYsF8#0~Rwd~J%=4{!j6WbKPKY56Fd};t`PKR^3%-2k
zP_$nJ<p^~kP-_;<jt9z-3pHhVjQ5y=m$(EhW;XBsjksL_b-M(KWU4SAjzt@Rr+@t(
z$h9lKW)1U0JhLYRbfg_X{Ktvn19D<={wN&pj;_Dt^eRMU$;Vmj*(0tx^kfjxL$IgK
zjh%VhscZsb4e?FZ^UYu3o?wQ%sGGVyxEoX+k#<%;vxF|W+e;DsORM2VPVa3{9IY`+
zMk{7EY{de%ZcO^wJj{0H9Bv&|xoAWufO^uA*THWHI*QfJo6~6p%*+;1-v=?H&kZ)M
zPw=Vp+z(gw5|x9h9Qp+Hle)yfnEr|nx{UU-GP7bmD1oh=VQBH6-Q_u*Hm+0vTk!F6
ztat*h6$3mp?Cf0ZFShI-36mv+RbuAsv%0L2^gCcn<RkSVr-I(*hYzz2**Xaws#jC`
zP{&bY?ylKcH-W!>cH9_Xj<4rL4I{!tA8xI$ny<ZhiWEvB;h5+dFDL$3{C4Y0CH-^l
zRJTq^N|X9X%q3M=X?<F}I7@xcqd1Jq0t(i>eV;5^=sD^GFvPjMI~Xwft1UvOI61XT
zz_t#}=HWCxwR9U9tgY#ke^@4#XG9G))zUH$>O{Hos_%?IbIZLNMam1shBK=LE|L_*
z2x*f&TKDDXV}w&vaH1B>F4tHA^Ey`pV-;RD&bT?RAdHt$%Pn~w7#`=mCd-fqqeY#C
z|FBTO-ozx<j^3XLKtQhs9bhIx7=!Fl{=Wd0Kxw}#IG(XnQpoEYpyz-7$M=8JZ~f}*
z{}wA|#ee_PwpCv3|E~6be~kTKm5JE92c?4V$vR@1eNzc*#hZKQX@o*0CwO3zUEBu_
z5#&;mbh?rx_fhc2Rzj2N?hXd5#(+799yF}>jo^9^XCsE#Ur}XpMLJG5iMMsqRPuU$
z@&h(noV7P+OBD5m>p$MS!MQx2aq%d|15>6iFnILD#yIB&;jCPwoBtSL9Te-Ieu`l8
zH8r2_hqJf4<Kqoso;K~C1zy}V^BXyjrQCFMe6dvzzH|>f9V`fR3<mqfSA*9ZqgO9p
z4D!-Z&IV@R;L#j0EP?0H#-4~*fjZGU|I${02Z580o8G;y9j_k!q_+4WE;NnNrG1Zu
zyoy&s*``MoG|PMesIB3}1i4=zZ$5=q8eNzZu5>iZ;iDI3(#{0)<r#J6V3c8J1`3Po
zOiwt0_4hfSlNWGb_g(1iZQM=K9hlNN{l5wA5aAJ!E=a3iNKd)I3QMaXu6`jGF{v+n
z#@hzn?O>Z7&XbtUXTJ6PkHHoBsnOxSGM-j2G5!>F1SB6vqbF$&7yKzB2iPIk>%Hh*
z<oEa8jcZ{`yYEGX-I#i@q$l3TKb?<%`K2Q!!d}kxHhSa*x!AbiV$LB2CKwXadjmxM
zH}v*Pa^V$^Z())DZ}{?(Y))Qz^z^nS0!0_jw?92n(Wf1n{{|YGr_VR0!ix)h(W7fX
z|EGO#KzPfYviL(YzXj=>1_I~d5WS)wu`6kh|AV(%)XsdCI-YjY{R5kKI~$q(VEsbR
zOYej>yQDe6!S`9u1%`lD=&}LE6n96SP?B(nFwF)XsGQ@h0LiReZ;QUlV?O!Kh4aQC
zfNw+ewmg1w28%w}!n%Y8&<q<+nRxI6R0)ZCUK;-FFWSwc%fS`j<52JU;Ly<5lLCsT
zcZ07W0dqYA+f1KRq9%oY&V4}VTw$uKDPhYWeGACH{SzgmWc`2XyE*Dk{MNgDp6X^I
zW2u1c44$~9wUgAvapNTe6DeW&uje5;V5ekY7$vC6V^TBKmZR4GAMWQb%>T1kDVEKE
z|EE=2?f<Rz|9*`9zwgcsQP{==66QEP*K;pSbxfY#yHGF&=k+BF7NzfYR9Fk?b4WA4
zN)qA=nA{cc$tgFShK2bLg_XYu|Dn*j7VAGWUlUXN4`tTccnU)O08y;g@A|{Rr46E=
zTAtLO*Kyc|v~L@qj_P5P6WxL45QvZFO{DEglv)->co<1LXp46pQg!?!jqG~_A0*NL
zC?auROgxQA%y;X)2alzd2hw~VNC6k=4V*2v_5|<VoVw0lC==;@VpHc*)SK}oUZ{CV
zXnlpS$$gXQ?8a*gWw+?XoZF!j+**|PxZDHYy^>6MXPbL&=Y@BU@9^N#l-ql0$=~mN
z@9(Y9{t0Qs#?~qb#E%^Rf$slUi^cz1m7Pi@u>Vt8;eS{AKR?R;&&CEJzrU{^{Yt#0
z)Vng>vHiWPEAh|{|8qx)m(Ceeb>kk-C+%5p<H7I}vF#gn#?l*I(ggHoOXi|IHDFo9
z=MU_1cHf2EQ+&q?s?r@GFnh?w1N?$wZw9kLZ^Q{LV5%+U>h;CG`Rcf{;l0jegIzK)
zhl5*B%00a78EYi;N7F%0kqkRa({s)NYkwUNE=g{KLYv_cRcQ>_ua4?0foHxCR2i9~
zQqboqog(Rn|F!-F3JN<P1@>e&t`E>h;w3pfp^tN(_BXYMSG~(WP$@fS!^L&)bH1O%
zH5&s7tBr?8y+6hi<MbAI&baD<g2{frdHHg3H_98zTARn&)Y=?>3$67ps&vUtiai}C
zMRuC|)V&+symXT4<;A3~F53TM_pf1CKHt$jVha18NjGSNO>%nDJkHT6nBU)Tw0^BN
z8;5z%V(_lMc8!=3nk_2p!n=0bO$&4JHMQk1fVN5Z&V;nv0pn<YKTx)K!?T$7Av~?=
zG!DN@=qKlbXUU7{{Pg5!Dsq1|*mci4!rt&cgF}jEYeA;#&+dA|b7Y~<I5ME?!=n&y
zOkbu4btYje&Y=az_NRT<Pa-e6?BP4n+eO-BacVLM-J`K*zuS{>j(aqy493vRcqEQh
zxxi0!`apL5N#$vS+}6hhdY=ef=DH8!t2g@=-Ze&L7=Hhk+ucOoCLuza94-(biK(P_
zJKy~Tq<T}xrIM%mY5zV?E@4@l+5h`5H}5Mc(Zf&s4?-f;3C=Hi{cEo;{G%hDyE<$5
z!Sh|iFEBf!)?m8MH7Hj=?j~L;J_d0<<qJGW8RZ&(LDzU;b$8Zv`uAF1MHN0KJwsT_
z=_Xv*k7U(`ZVd#NZuyi0s^FV_-cZ6<Z!}M0M)RpFWCjl4?GQH|Kta95cqo^stlz{8
z<ZakM#tq=Dv9zISV^>RK<F$U$8(kAKWO%CH@Z4|ko6zpp2|9;WL&k`nd!iErA0UID
zg^mJaLx+uz3M(h&(5S~{5<e`l&RhQEw>rwJtC{*VR<EdqDXXk+&T&l5^Sj$i3?CGy
zs@7mw=g?Gjs}%KAUzLBQ-{<e$MyG}mi$zZ+*ZOpRl4ynr%CmGd!UW}QCszbfoCFm?
z+M7OqVxLavj_`T^_909aq?6Q92G^o=q*x6}(vSSw5r|((ho=j%BI!e_1bGS$eHN<6
zoV2ZI>1rjAt`AJQ@^6e;^BWIIgs&%eW7fq*3STRG=^J%;F_&Og?kr>~qXi*paUppQ
zLX-}LC@ra6PAH6Z%ccT28>ySXuGbdPDZQJ2aLFBK?<Xj)l4d2ambDTH%om>L!OQco
ztkvTo;UYrcKd|=?;`@ilMW)_<)+&7DcK0GQb<aFbv@)o6nj^mclEEq=D$|Qm+!NKX
zAU!>14G7iJ1DOOY-D)XNE(IZ5%8OkJ>QP340&e$o4FhM}l*{*^k-#Kcu1B{x;UCJF
zg0t_J%NGVWz0Wym8#%3IQGf211>vhgiZx-$2E^jliv^ey3$Y`ft^uL5cC5^Yr(iy0
zxCZT|S%ZF5bHul1JuG9*N@vKttG7>mYns0K0!x_o3dPo+8{g^#ed9M}SV=*92J+Hn
zD|WH^Y)|gEEAGwm7pm_{{<<#bH*?F&mHZXu@68hAFP++plfN*z5=vj6gNUo?e6~j*
zYKHN)aKNi7hcjGH;x0&d)=~>k*$jA!QnbI4qAMx7lA<ujze6eN?Ik{z5M2)bW1;bX
zcTCeN2jl;);(x60A3q5G!@U&^x6bhHG(saC8pQq1MG##TJ0QY(pY3A|zG0Ay@pwpT
z&Sg@~FH`@c_Lu1!=Y}~)yeU2L&%aH2xWz_w2fnLEcM4Iw@z9IoZ%{t_+#e68hPW*V
za_@`#7Ctg0uDo>!tG|~9J~l@Ei6P!u<G!<{cU*qDPR7norYE^+E$1k-&OvDRpo!QP
zM|QLR6npLsfxbGTiC)c7WPALLE(mW(TP|)*#@<Huyo+0T&FN${xCBas8`5C&q;BPg
zH12F^a*JofM+eSpeU6TrJ6?IiYe|j{79S>fLC41(8PVMuw@uXwupjm__ThGJv@t-*
z&^<wPkP_VENAg_gVu1SooPC`=_Q2LU*$96~41XfMZTE8cwtMTi+h%*@TB6`J*2-Bd
z+<*+(jqkSkEJ$9LGpn@gsvdrryDpqLL{V2UYtYe!PR0vKne9g#TK#7MIyyed;q+G@
zK`3prn*^YvXn;~l#|GkQcl`F(ML6ha*Ppf@0Z=ki7*1+{(hbH1|IdF2MJnNkfGvgL
zCM0~2na?XW-Eo*Wzy(97ZqN{=!ov|h9KDja`JI6j_V|C=N72G6U{Q%Nf}vBg;gpsS
z$5CCVZM6$GFSXb%RA{QcI&0r>>t_NwD@;}2MIGIwMOKMLsPI+n+@zfF+{A6KJA6UN
z6s&Nj`=!smNXDs??x+2WAZY4B22J&U+V4k!ruv3@5Tt!TmEtg)Oe65Z0ab!Mz_TA6
zbt0%5#3%(x;k~{En{31_+2&64noS5y@zWEGEM(>JbL+I#=;GH>-hbu|>t1j4`3`TU
zosNfJ9nN#~R$({ec+~x3H|;ARlpe1Cr)Y4Fb+$Lyk{`RwFPuDjOz$Vp)xG01gJqM4
zngsBaioBu`J;u%BC3j-7F<vBjN)}8u#!Dn`jxiQFkrJ6Xm8YY@>=1?Fyz@9~@)!&H
z$V)$u7ffD8^LQzpn0_8Fg$2{k;{}r!HOBKBevu}hZ0s%3b1TviOL69P*r>kCQSB>v
zkxKh){xi99#-3YRHerJDb`zU|-F7En3hJ6r8=mo`w&a*#by>}ueZtVPIrwTVHYmeI
zj42VY5-tosiBcfFFbm=4nOO*lHvf#MG`~EOJ)w@^<2vVh+7TDMO!iA(7Hw06lju`_
z!%1AUU6GVTwnW8~n7wV0L1!-7zQ}+j7j0uCB{8e<POd4(QR-?<i<sZ@lIT;57csvV
zL*gRl_tH9Z(RN1$EV*dgBPog6<Gr#!R`$mevOf~V%1dFI5OPFkl4HLgK8$)dgT7(D
zL8pTpzqb}X`XL?j+B5EXi@B2msIn1y$bzJz=SVJ3zj_Bwd{R4RkdV&TAsWXqO0Vj7
zavVBzP2&DdApy*$S`p0P6~?fW-mhm7;w&_P3-`2E#HJgf=a(0nj%K~S3yD~#^i2`M
z)y*co(bOP*Pi+>rZs>ft)9~GvV%X^gu#58dR!5(Wys`ah2SZKomXN?H!I25})zvvV
zJz?JK;R_hS7iOe)Im!>p3n|v7BpwW@z02ocndFu3dYHOE<w|#bH*a}C+`l)D`Lrut
zhTi!s%xs44`DQ7(TgbuFaSIVA&HP><q0w|DFOXz0btMOcOe5>g>jm-<<pom1{$Uys
zy)g%tzjHbj@ETxmPPoulci={-Y%z6nFhUgt7$g4z=O()?26O_7N%~{I_=^1TdUWxM
zym;}49Kjcz;%9C#ptTl_KgcGofler2NMys3E7XM5Y?_en1xi~X-OxsU!PJ|);1})%
z*CX^czk=?A6qH0rgC)>(Zq#5=E_*Dd?$Z}Es{xhf$%}nH1vVW1`rkkP1zjz4W}!bG
z=KjDD{EWV9IYtf5E{vs*29E-#Rwx&1V~gAd$?4TlpatHJ@eVRdRiHWdVsr9^uf5@&
zEpk~lWbjBjITT|F$yijZW!0>ejs@`Y_##txyYs2L_o&WRF<LFxZ`lLTYy%2yWY8#o
zINm!j{0%H|G#+izFRZ9L+!ny+L)>K{-d2J_&&`o*oGk>pOGBrf1wt^32O+`{NK|BI
zNr#QQ1Uv;4P!o};P~n)|h|u5W*{)daRG|JgcX?UsZ<UPox8&%>?t$jcM?iOS)TE%Y
zqOa$;+ak%705FB6cpUrtrza=%Hu-OyMlbpP9;O@m(>0HOlhOq>256GD#-OP%q|+Ay
z>cUE2Sm_HP`oceep9WWaf9$Y(&;gBsdJReX!t`M}vl)QU{=Ykgu7LYtps2g50^R?@
z2YyZAf~o}fI`PD0ZGp#}bX10P+5#&?RvFl{)|kv}bQpjVrvp~fVD>=O6uURs6rT<7
z`bLUZykz${4yLS-$@p$I7}*8Vx$A$S<xR=W9h7=uJ8GDXdw%f2CLsBSP$+{=Iys|s
zm}!U(vmg~Ft=Gn5ICtZab7wcv`^|}Lrg`qY5GM=`(lopV7UF!7nl$OCZjnwG*>v7Q
zoGr3pxrI1cq$X|tan*DS(55i*9D5;771}HpN!mi3DN>V`?R;H$=Pkr}A{&-lh|@%B
z(lp0aj^4SHEr}P8u_2Mp7h|;{@!f1lbV4^I=&8@fHGcZ#7a|QWPR#sLHY@@j8OwFZ
zxK>;g==VwY*_{EC^4?H4O}^rt*^(#U$?kLd%W-D63HP0SH{Hx>qv-^z){a`8rMbrM
zv3UkA^hqdvTu=J>DP6Kd{X)8SA*S9!*mWV6-~5y=*~>mT!H?x^pPXGE%jbT6N|)?<
zzmTq7h!MFEc3p@~IX|Vz{qa*>1E*zUuDtLUXG+d*swKZeh}s6Jz2^1qmVcIB^QiNC
zm$-|b@;Ywvz8=sxPE@+dgX#7i90Tf@ZO`TG?{@lx>`4CMaSg_&7{h5EDwh|^DMKn3
zAJHlEE?ts=_!Q$iWk}`X!#vGT>@uP~&Cj082>O&Em5Y!4lzEpf$q;;sk)Sf9QY#?T
z$^cv$fVu`?Y}h6pD{m2Dn{MYj9otZyi)C{u$?9imKU(*{;JxW{Tmln)|BF>F7fZhT
zUn<+y_UitZ)%`C&#{Dl)HiB1!0T+~eB-4kRuXz$0`7ZVz4ZVPO5wQzY1>E~1;@#_B
zr19X-`H*6#CSvzyA(vH8?co*2K!hAOs4fkonx^)h7Qx%B)746Cg^*e^z1LzX@gtt#
zy%v$md6GK^l@T98Vh4rY6?h+h|3pP6aDf!%Ad#lqIZFhSO9ubnNCW)$-SU?S<RAJ!
zJ=h;I2QPbA@b%f=A&U@nKy~3mW=Jf#+vQUi&<Mo)TkrAm>srV%H_2za$f?$-h&!Ez
zm-I@f*rh0P5sLWwO9m^FxFW>fSt%|*-p!RUt52dD#HGepgKDKov0x;>2r0>WvBb@n
zCx7oOu>YJf>nbUhkT!{OYz|x)E1#YN4-c3Y51lb#C#?gf%9{oOk+nW$y8eKg3q5Mf
zjM0d3{Al5+J?dQyorq+EKb;GxeViM49c|dQO_uV#Pvv{xy=V45;XSi)lKBB8GyC!o
zBc`n-$Z18gpy8k+&a;%}3otBD#9Ca$dcYV#)>0v!&~At=l*;YwgteUUK_}zGO3HsH
zw~NoaYV4(1HLP^Ei>DLn0R~TW)A%wLgUl}-2N@GiJbM{MKTRPufj&`~EYMZ(=fZF=
zV|7~Qne+-bu8ti4DG0ssus2$9PSb`lRXTXFf_z?H(}$ZI8z}6nLYsyJF-7%8wF1+a
zPtxKW6&gViv{Z^&%EwA0ZfSVf(%?3q9JCbb_&2M_8%+3$cYW{yJ&766r#qgy-gx2T
z8NgDhoZVEz=yQKt#P~SFL%j&3@$q^9)eT~X3{f>;{#QeW2m6=c$rB$NBO3lDg$+(M
zDr%R+>Fq>48h%YSB%H6{otQ}fT)eI+n)cj*BGDegyj$41b;fUCCjxP&^4*9~Ggk_x
zLcvU?z?~?~ehmdqOLe2Ob@3KCir=}da7Vma(bCn@k*+}#K2p?rQZ{A*T%@SA;#WRp
z*!74I>f+KCa2TIQBrje<3lcK(O#$7q)q4Y}ByNJ!zLj`>R`!2Vac?b4+!H-3-Ocl`
ztR*6I6J5EIysNkUdb+-L{*YkFV2b&TZ@GYG`c-yTQc!PqZu(%WN7S_)_Jqo1>7tIL
zy12Y1sG<hS(Tx`<=JbOU%`4=bm_!*|O7Wv_a>_0lH%r*$q%(POLUZHBNGLJ`8=G9O
z_-v0rvJB(Rx`08K!x=64E-Xh@uKa(a{eMeyxc<ib|C*+`W0igWzdI}c-<ALGcj*7?
zy8pf&jG=71ck@Q=60FvIZf@kG_Q3ttW8c-(H|m3U=UgE4Sbi;$^RME%Y?be?FV%&v
zhp_S-pYBf6MYwNICJ1u*8O8v+i2RJ-uy5#7_v?lD^+x<M62DxFU!X1W*9-Y;U;N7Y
zPI^L*7x2M9M?b%c+GsdLu56&KZji9g{vg!4@1mX>!phBs{l#$HzaES}%c!|GzZgs=
zynN~9cn95m(u380Xb&5pn7sBXcoPqsw=`h$^aF)8`S&jzJ%c+!^FNLww9ryttW|Xc
z)>IJ6?1*#VhVv8WA}1F>oD?WWhqySTEbYk!j;^m;lZ|e6#HmL&4RP-1E`T^?jNN%g
zmlC^-(Pfk;8{O=Piy7Sn#JQup0OHi68$nz#Yk0_oPR5Hl{lJz84^iwP&x5FrfoWf4
z2Gh=9`{((Ed=|EAgi)eb1O4~L&C88z^%jou4pGno73E!c<z2rF^Lta@A?kOUOTM3!
z<_^*N(NS<i36#<svJV{1t`VglRUxAjV`)%M<<Ogp@9|JQUJps5%o@beAm@bAD+th=
z8&}0WpWgQR_G@=}k>f--k<@&D-w-D|Zdl<q*tKCAv&o$uiPLwugvoV39^Yx27`pnx
zaLgph3C?iqH=wPr{4uGOVYMbxpXE3mT-vhfD7tNjSeGpHR)l$(&}?L_(X=R|ZOL$x
z5ZlhpF>JJGo?(+^8&R^}u(?5-(NNfoLCY^_IL>1QCb#ait-3fXt)ye6HS2G?L<rP!
z!h%jnVVcSTT_zB%<wg!<=rt!iK2spvRnjOp%cI;Er7I8uE1?DOsFce=asrh9Q_DVR
zH{QMPF1iJ6+<Y`L=9(o^o-x-vD&;X;qZeFDW_uPNYPl0O8&R`J%EQ<_8&R`d%Cle^
zAJ;FY0nzg{8%eWB$}?t~N2PoLl13DjZX~))xjrPEUv?Tz)?`NpcFazlSuoiP5HO3O
zUS#%ccx8!XXUQv%NA`j>n59h6?A4d0h{No4mj#o(V8vxA6Eu6RWhvq?dzEFuWNY!t
z{0B0TM=|O{7?K&lzSB6Y=ja|>h7R@_+^=b6922*caZKED#)miC86V#4L>{(CqmIIu
zowumYM?~M)u)~%#XCr3vpuO$Rktz<&4Ua<;7Sk+b>tE;tQEQRn_Lfv}d&_Rs!EW@<
zTQWS`Fv`KxjT^&cVwE4#D2^?HM;1;Yd3HX-26Sm8k0MAOv4f;ok_G>2-!YeaCUs%(
z`tSe(52R@7;5DImGI8Ms_#EHndijkD!f#=r+_K(WA5YeY>vGBqmR3&L#*~xwkaM1!
zr+dz`=wqInOpkd~u7T0sfWjfr)ARO|0?$2<<00<UqPW>DRKW^v1mUr)=iuVkYE#{t
zu&l-ecM@4+LR_;_xsg4c#>k#X^SGU*JL1TM+agCK6}K&|F|qsBKFcZ%w>{a$M7hB&
z<UA_F8MHvzwz$TG3F-yPwnuEt!erYrxozRHZJ7hKaM|{B8k3!D)7)oI-6nT8Y^h3H
zwk`4PqApcqTV_j^qOL8J$(E|CEp%v>s;E6;OBSQ0En(IyNkv<NOkR?Hw#=3+MLApk
ztXY(1wtShqD7EYnTe4W#)#{XQb;`Fo<<mLkQy`!gc7hjic<A0<0fZgZ$rT7OnD5^}
zoZ&4M^z?v+oxnI1knl*BLB8WKSE`WN4TjKDomF91sh*2?tmkm<C>`WMa|@TfSudAo
zpT~DVUk4g_Y2zdJoX-cMdU_|6PUA+gZ3-S@hBeNA*GJzyWbAERIDxYJp=XkP;Y_j@
zlAIk(hFw{h=v*=?cFA`XtpvM8^URVRO5RI{l0f=P-@zo%`O<ed2^75ag)5=G_A&@v
z!`8eEiCprWK;Uy=z~_d5KMVcm$wR>B*Dop=>lb@jBetzH7c8p0lAa<EJD}_Q+o$gc
zmmf|Cf8zR~7-RC^2uIJ8m+)sqDf`FZiZnX4<D-*i{gbFO^)4@I!0GX1#ug<v95n_m
z9NPDs`~>O~WT@tlI0Jv>RbhfpPrSBa@`7hD{yMO~y=HNlb9qk<>lto|1`RX%Dl{_-
zyd0Ssvh+sN+ws)SNoq`h(B^S8p&E6)nx~a2Lc_Gna87%{l@PK)>0p1D5-X-;%ZJks
zQXO<VAW0>v(s}iiAGy-F(D(wh8$ER~vg;Ha>dGDe?ndxBVP{fEu0!sw1>HDfI~hb3
z#nZo|5#b&f7STWyKMEe6qZKZ}OKyeo$MX)1=hFp@sJhva<Lv7T&)#r4<q=0zIe3d}
zh1Q6!`5*qn9$gNu0<NJxQ>?e{Gp@#H!mqc`v>N#=vEF*X6l1Bon0mHutk>{Ee?9Ep
z#_RBV8y&~c$ncR1^a5RgsWrI{bqCdp1sNOU!?T%u@gcbhh04oMqV|$$9FV8u6S}M6
z63UJgb>o`}`|xuFi5rTW@9|e=xThftpfx7mV`5s}^NvN*st*6?>-He6v1hZ#kIV}G
z8&0B5UymP|72;|2&R35gnH7Sx^2050HJtmD>v?StE##INc2eOLh?aNGU1-QJzYsSC
za7Uix<F<!J8E+3=FL-;%Z5|K6Hwq2tx2Rid&0?BsYd<F3&QWYWXl(S1#EqU1+ugqb
z6uSkaQ>eH#B&fx65$x<#WJ<8R{h`{+axS0WxK@Myve=V~+E@}NIOXrPbb}Gx*y!=-
zg>;K1<gtI}MjFRVNVw+5k8)ihNz{jR^YxHB`j6K`(Ilkmp+#>^joi?e;>MK!_*`{R
z9*lnJk9?_TkL#J#7v3$17fJ2w3omPou^nb(Tk{&(sZKqg%CYC_*ifMthHg+;bM8=R
z1R)i^Ms8NDl-K8^yk5*%-d@(qaGJBtw)JtP3_p2g_*(6|rqaNo2kPJHR7iKx^Asgl
z334N7Q$1)yIKA_pUq~vHR~mPK4a*c(Nl^FuEAqG_omLoV^pk4yUn*hVZKE8QB)7)g
zotGIzlCRwYWRk_^UJIwXk^LejR2Am0@YS-p;L%(AS1{^{Y#1-!>+p;HgL@sW)NgI|
z`-`2>6;q*%$LQZF6z-KX;mVnCK??W#dZ;s@q=`oQ{>gf$(;&anuHibkV7I|1;-2;I
z3>|ue!#UY;haJtuxi+oEan_UY;1U{{#(!%mjf`~%`YYDSD~>PR>El9mLi`k^aCGu!
zwX=aNbhe~RzuMZEcVr^G$$U(ibYzk^YHe&Sm;n>Gn=P=JDHzZ%Pm5~3-KVF69`#4A
zx1QJEICpZWcFZwTlv~;t`-gH%!{$D&TiVJEZP~s@Qn&$acd1A8@bY2VpNEH+)XNG_
zq$lS*<?H28=c%RnObIg0HPCN)gj?yYYlGi%WqwhzE_(xeAtzdaBdt6=(-QVZ!wtOE
ziPmGeW4*5{+_7}s8N|~nZ)QBx6D!1*_h-Y0XLoYt5FR|i67i5f3FK-WMPV?9F-U))
zc4FfnxkI!v4sLp%gRT8bI$C>E#M5pbaa`b{Vtz3}7oxp+RB_Xj`{<<ZoWFF;k_&2<
zFnfdU3~yWn?G;1cV7c&>=2RkBGE!+VmW;JPQ)VHy%<A+(^YkH=_VZ)<_pG#`m46S_
zuEPC$RvObvV@jkkiL+PIntXJos^$rLItf21ou2siw(R~hy1TLOXRuBQDo;{pAh+PB
z?EbWz)+-&M7Vr5K;EqtgAeUMlub`Ojk$i*#1uxql>bdlRc-q2Zu`v(>ZbW*nQh!_G
zk3INok2cxwor=A?SMKlhFL$rDU>%=sO~H;E+J*k@?L2i9;b*5@rhnnb|JN+-Z2!$H
z6}L;pvT2sBzZK0L)7tqPDK4V}?xr)0e>!(L{>D9%A7H#L#+JMq3}J3g$!%{kBjYPl
zM~(oSo(%E6%5OIQCbbtY$jrXE9rk86yttV3CJ$)uphY|!jBIL6eYW9w0UqO<+W7Wi
zLi6Tod14idd*ofu9+G!=_Gn~Z^zJUn>(B6@@Y#LvKZ6lW_`=oX4KIVz*BReU`Zknz
zX_L3($qmvvy}S6s?$0nv=Io0Nlud4?NIQNx8vi*M4ttpY<nCfP=#%E4k9+duYk^pW
zVu3WSpmi8h)6H_yB;R^dG8)gw<zNbv_~LG6Ut$s82D2|L1}FBDLC(f^q~9Bn@x`nM
z#@%>ilirMcnayrrZf)JLste=E=Pf963u@acz)Ziiub__z(~aHB8;0ZF%=eD#&19;6
z4*E3tx%0F8(gRDdV}iAR3`YIo9dy*s^a#y7AHa|mzWf|~f!bNvTPyrURJM})VvV+q
z;Fbu3LtRcU$Fn@1rfd-4E&6)H**4<e?&%5O2K?&ZyAL0{hY$I+e>)QNattGD=QfyR
z27&EX#cP5}Xn9x^Ed#Q@0wMgj)sLS1pV`wHECE}!qu#aMggG%S%<gAd%l}HTEX#kZ
zSlTYF<o_~$)^2)zuzT#mdGGS_90&az2c6)rQSZjiqs9W(Q4V_PvrQk7|N6J{_AmD}
z%0>Un!WC*$Sgw6<_wQysoUQQQE$J^v?PO#{v3KXNm|UN;1!iDReR(LeQd0BWB4c~6
zoo=;tSZyDkHya1-YP(VIoOh4mOY0;2WqO@X@&EO2jpF{o@~G;T)215={?x6vTh%5O
z*<lUF-oXyb3H?TXhNQHSo#WGXt$u#kX#XWo$jRLZzvI`!^vlnf!_EG)Mt*UWE_<_|
zoq@WvZ=qH8sE_1d+zp18m~Kj>mSEyFNTy~TXqNv`ZVDe~W<KPF751W}D(f<=tjn;n
zE<?-ehn3Y2E2|$`)}LWz{TWu)pH5l-pncy5Q8d0eXC0kmD|DW}lg5R;B2K5SEnfb!
zekxooLjSMqY;XJZ|BAU?T*?1s{A|1-|GWTUbp?M&4HT%Zxwf&kLCC)MLk^E?r$_Zx
zx7uwSx5PIz>G244f6#t!hM<!5!Db$zSJrq$dS82kAxeGJj<HbhhwIAUzI`i<>=}qm
zsxA+Ex4r)LmVJLa98c^`EOLu5dJa+99bmC9edMwC^yj2~{H|R+avMVPLJPsF0W)BN
zFeZO)%>RZa`f5)ufbJXq9e%pF65dY6pC`Q=YUH4q;3(TooQ1@sN@sy*IOt97OLBJ$
zZO5czFntE2-jJrDODdU+?`GIrvoTEaFZP6*7B9Uj-xhb?Ro{~OeGfx~kP}{c*S?=o
zB0&mxbYfGgce5|!$y90<Z*`BpBB!GP*gRn5bkW3`PHmegU)OrW0W@+n=#c|^a%~Uo
zhmeG07@gkb7~S}Q7%5S!`i?Ybmj$QiFVR%2-cANXu>MSoKLwcr9dYXfLh)oGya=#g
z`kys5S77;@o1|@D*%J_{wqkg4(5@-6aH&u(n0ZL9&IA@=k4(o`vv0kLO^|tepKWBB
zuY*hAgo_77@6)UM+ybMz-w5yX-ssZp>jEnbe&mkMsu!L4RVa{?9&n=#mi83B@$PN3
zy7hPUwl}w1$ERJV>#NZ5^{)3ehcEYc@?Mc_&wdKl88q_d2Kqd_<VLO9sT=UT_KE&J
zGsw~LAv|gE9L?i*kf!P7IH`6zUR}*GROZ&`*3+(wS>UJhwpt6P!7HzR&)hLc=k9{D
zj!>+l?zvfO@9VXXrosMN26<nv9@g7lalHN1gF+|=B+e1YUGHZLo&fu8@$P7;dPHvc
zVQ<!>&ayU2XQAxFYPT9_`qo|nR^=UQyS`c85?kwN@uZ_D>bG{ITW14o?BS2ImwI>%
zED8OFM~4P6i)-?D!sABGdj`YTbbH2IG#V)UVT$}|)27Ky#@T?)rb4<$M)o(-2T@$`
zvUfoof9(zLY=V^0bY1}<s9E-JgW-_$Zf|iXYd~eVSHh(|gVhWsHGzS?y_*?ec|gT5
zNwF}jZVDM-p@T_^TS$Y^*YWVHPym!RMH%7DAfRqd;#25|LbrWdF9;n3G~x*^{74Va
z0uL$ufs}7aj`ne(SubqoN%wudB{`MQ&SvbG!;$y4ze&9bL*o_ju3p%QEWivD&^m<t
zUi(4VolFL!nM@uwJRjhlU0M!sP3^eVZM077JX5E8d;;A@*^9hAZXZ><B)5(ehX&N5
zGMTR0&cfNYr+35Ibluo8Kjmq@T_K;w&nEt`;h`Vbofbm#9HLGS+CX@tRS(SQ??PTy
z(M4L6NAH2k8W6d$!%NAT#r)F}V?a`VYrS@O+^v!It?*P9PjwoZs)VVkpH9vqQ<X7Q
z?KmdYHm2(QRy~PKwF9X-wO0Gcib_?Xsj8jRqo`E7v_>4}s8oA26();H1p~dc4g+>n
z{j@&M(%`UWlZltSyApR#+pWM1-&$w08Wf@F-G!r(kM3?R?8&;(C>f^p$*Ar`PEAQj
zC7Ym;Gc<IH!o2x>=j(`oromK=RFgZ!?AB9<#Nz}<%7@cm4X4i_A7F&*FtJ-FW~@{R
z){2Yac5c_$HmtnCzV3KmfAv0o0v_kyk}rZH{OGrnS_kHxK|gijQ+R8z4vVIxAIn~1
zMf)&C2~z|W!ncS;0s|s??CI2ns^~<I8fTbpVM?X16vIiLR!JpPR6wV8_}^~5(}hy;
zkBQxaf4$NhqZusf(?MU=WN1sf(dvNe=Z=L)neIZd(3Ju>7#dmKDi%uhqCs-SLK*&U
z77LX+$hy#6xg0QH!}wsBd84s!8o+=b;15W5D8Ymh=<f)w%$bEEB(v~0uv_HU<rczv
zVv}|RelOA2<+xI$WYJ$2TnHP0P1+9Dpl$ZuWZ$=GF*|X^NXeqVrY%I1Re<yzFI}*K
z6<X?U94%6^=&#y^FlG@b+ooS*N2o}XZ9BQxSBrh6oM@Ed3XzgUe$|-9R@!MyxzX5m
zs*U=(+(P`*m@=JFi7Q1)7X5X>h4{Ig@|>~fwNISYacr_J^M(3r+Cns0r6f;&|Ew;j
z>z0Zvy(@Y$zoL@)J_)%SPl_<xopBWgNSNf|h3v}n)+Y=01d$&;jhWuI#qttH7Gd2>
z=t_=%cI(BM63Si)G!I)C;ZKSE*@-FvL<v(^zfY|IC)y5Yyvj3G46qFqE;RPssmsM=
zY46e10nI{yW-bDXC2w#ll)cJ3iMt>ZJs@2nj0l4h#m2)b^6}V?%Ufd7$MU3bnTey_
ztP}^$LNtZK_orhhgu=5^GZZsI(ljEuSCSozQ7$wK2?~Xho;2~x4sAswRFurEi};0t
zwmKrq4xdJnK$Pt2$pD|k(bZPCP}uoy_YaEB_*u_(2cgJkcG1%lw#Di7JoJPj)9OVv
zVOMyPqyrvROP~zpwSx}^Jc!T9tO?WGX<!eEJCCu~;7$AQalgSo?VsU}gP-g5HGyq;
z@qJWHz&Gs^C<41u#&X(j%SaFKO?z!MK#~rseF;%0B-xpeWryBs&yI<<a09_07W=1v
zbb5G9c8ZQ42o~VEPr$2k5s%i`n{CN4qXiG~)h02e=gm64u{MsXpK>iwJ|wqHJb|=l
z<g4AEp(9Q3{8Do0VbwN$9wB&u^C{QB>)`3uDiC2H`Hn}2bH*e2b@<&?>i^I6HrbHN
zMiS{km!Bxd8HF8C<yazUV(k;F-r#3@K#%-6bo}}0Y+kCy=N1EmRw4++let(zF;Rff
zvOs7#8$x$O31!{^A#~E0NMF?@QJoIklf&a)NXD&R(;I%(qlgyiG-xlPLD4PZP3k0W
z(oV2RJ6Sf#EQYp;o$H3RiJi~pSR#>|_$f7VP_S};QKF;9MxLyb&X@7s@N%@iJ|h=4
zMzOJ}ZvyqDSoi0q-dR!=+yx9DmW%#*zazMJZLCQSZ;s50=3mO0-q-5oCFwCgUg`%(
zLnYY%ch?&tMD$O4GA@X2h8R8E*=$A$E(z9MSH8(&m3a_Q=@ydNbvR&&m~CXbiFS)>
z@h42RZS_<WIC2|bQ&Q;fjv=y^#ynvYAZ5>Wu;*y2r5-bj3+gf~II7EdpLNppnPwl1
zN4CEU%wQK-@m)aI#;~S4FHP$SH2G#Qx|^yRzNu(=Ilkqakxh5sn&m~dU6M<Z&sm)7
zn6+{po6B`9%s=cS{Q1~||DDF~b*fH&qQ8+O_;hrZ!g1{IxSKQ4F$~Kzb7kkFm8&p^
zG5k`>ndOM}y4E#|pCEnNpxNQunec5V=X|QLPZ8zpdikJ0`p0B{&-fqw8s1GXNFCY+
z94Ph)`UgJHw%`X%4>Wa8)eQRQG~6LfOqAZO4E<-J6m*cq5%{fo!kx=fJCjupI=~=}
z)>HE+t5vsj-N@+RZZX4f6aSl$Y0qd{Oi2HPr$q+Aq^#sJ{Q&<;2C0N+f-J6;S`JSJ
z?2GN=TmQT6)}qkhm|X^z(A`<JgpwjN?nw?U&W2gp=T5N`+2b(Vka~V<wd3a9)A3bs
zTfO6R#M<^YSlI(BwlUj2AGKXgw?mL8!nr|(Z6KEO{zk<+U-=v^R9_cfNQsWUSty%N
zfEZscnYM_rGz;6?DtERg(W5@<bawG}_gro+ajx9UjHh&X+gkDVLR*A8wH0x!W2LUZ
z+vTWcExa9-U1<weUvL*W1DPe2=#K1w3h%43H!not9qAv(J8Rq2Wc|{L^`8vuo9sLU
z$N4F=!_WG_Rm}TfuaZKtv!<E6Y0HxA<rRA`RclEkA-8?H?IIebtyBfpod9{uklzlu
zW{gqMt!$|z4QK7W*iPHw1eGhE>Q!b{TUKHLr&%rb!yX?FPpA~R9ak(bu-;vELcQ(U
zfR0cuRTfxlWrvUEw%giLi9Oo2Vg`6;E6B`y;(2fs&3l|zD;3AFYL|PenMvo<o_q!k
zL&voHN?FS}&gmzTx;4l(+*{7GT{V5E<6Bc?OIl?w&gE=xn;sHaR_w}y`*>j%b46?G
z;VG>>J4+DK=?Y1^rLu&+)l)HoYfdHAl9QFn<S?$aV%&wcr{ZT^y`9YEG-+3NS5+iB
z!A_*S{Tjyfe3w<TGxXTHv|{VU8oH7&dtz*@t-Q-@9puhLhX;aam(C7SaBq3Vy-U<4
z<Z*u%+Js_V5hb#|Y|l=eIQx!Hjw-%Y>&dATyNTuQ5<0h{D=ovciM^!S#JsK0<7sQf
z(~H$5<TLxk=j97*l63B$Km`c#C(u$#0==QTpD1*P9%GkQjJ-_8enL82LcZP<9bt5Z
zb>&fnb$P|p%T++MWVOQ9dQ=T0%6eUnu(ouBHS;l59S*SF_PBlQof6&0uDH0tJqCiS
zXjEI;dQyBReKlEd0F#6Gqq;y!_!M%+bi4JWyPY}(us_jVASHYXIg_FyKXS-HIS??L
z`AmNl;RZZvHuIsD-W*|$>Om*xLlYdGdC$-CVzh3Cy!_Y-R8KhY5^8A4PzUPX4zG?H
zt(;}}-=fz$yrgd3A6v%Jr@-qo@%32%wmI;^qxUd|ImZvc2iaNPrQd_9R6^mKpG-es
zgZj_%w`b1V1fWhc9I;13H7mev%$FZ4#1tN~&8VFjY+%BXd!~Vz40P9rZi7KLI3fW0
z-_U{MS&{Uf#?pJ5lHN*6dQ0K-((u(`fJl5PK|FP+r63JRhrT_gArfricl@A7bvc}B
z8csWmYJ8cjgFTW_7nO-F2`Ie?Vz(nerx?{c6mtqM6BGN10Z(zyS%cUN+By#))XEYN
zs*BC)xDZ`>4JHFj8yyNJ5&>HqaX+l2h;;646y5iY4kdvCYO3vehg93`>RAV4F~gHw
zo>ZZoloI%S{1yfGyY736z3(3LQaW|YSino9TW=qcgERK`u<`b7y<KnB>IiN*ZFU<c
z%|`tY-(!0WsH@J4Xw}~}8}Awi&3eHlu2vvT_*ea{bKEk>hm)p3%)LsP9M)@i^~M}`
zxdZ@czt`KxG4~c2G|@f>e5OIUMP0?xtP9W|3JcRKk`5AN#wg#&D9&d(VEFlo-%fKF
zN<qp<<Q-(XFyt0{nG&M~>eDhVh6q&`H`YXrO%<}Da|jO^OMqUp$+<w%96|h}8i=g8
z(K*Z$yiJ8FhKzd}`TR!xq|;~~w{pnt4$j`ufZrX%tcU8w>ui)i?_u713<}hMoM|0*
zNv(Mds}Pjo&?a3SgXEC{q$?11nF_1VD#p}y0wdKWZ>x<aIjVO$)pvC&w%8RWh2YgR
zZ>z9A2+u?B`%%2x6%<APyNY`mgmR!8hT9*KW#RM%ygqd1F&njV*fJ8lI7aUW`)J*N
zd(754p|=--!cUD3YG2S>c!13!e%5{V(K>NPJL-owmI%v3uprtktRI96Zz?XyCg>hz
zye^~E*v3Cry|h!<g+HbRI#}!~i?K?8RFkc-W|Bpg7ZAM)%L^?0tG1|!YeNlnks9Fc
zDky)@*;U{qTtT6Q-FDSk4chhpciL#z@ybGY3oZW*6gE;Nho?ti(^cE==xv6)2D)?`
zSc-FrTCIBu;?GTXbP8$#@cuD5sJj`+aohV`Z_>KNMOd*0@6t7RM_hsvp}kq(b0vTc
zL7LT%PnC^)9IiTq1PZKXlh4ejhXZ$H<ZrA1cj|V8wdvjn#VEn&tWN|*xWlcgP}MM6
z%?$JaUM^1Wz@%$pu2VHS2MH8R%oM3$I?Lh7DG)A9b<`odza5j->Cr*G-6<&4Jf`Kn
ztsf9`J1vX(M!O2*e4xE0!&<kGAtE+0pRNKg#Mq&uL}ThOD@9v@XN71=mjz7D`IXcF
zG`}V&sWm}Kttph$5XCV{J=C1?Y}Q#tS5symDGgRJ{q*n5gZr;Gj;ikrN^Fu1!;p|=
zyono)O)5mN)Le}($$3RGOhhSAreZxK7sl6aRWhM4MJm?ga$%0fsYoUiCZ@6!VU`5x
zXl+Cp&$_eN4L^m&(2!I5a<LJk8w3q+M|3#mH>v=?R@;p#n8Khr;B%_WQbJHU;01tQ
zP;EB4XNAQW%h?5cQAKRQo@Fh1VnJUNH61P`BuC7gXWMqrZMbd6cv9OIqox>RByNOJ
z-oeZbc1P#N+7iq!rR^1Jc!gSBjYE3LDz;{mvyeJRoGm6Wp@F0P6s#?h+}esGfr<^f
zY=l}{apslPDD4;x(-O_9CwTw6J@{;oHrel;ioLs6?(g(3cdxeaO71OwCHGbzzRj${
zWp9?Ijw1Z*l*{xl{P_Qtil+59vsBzJ70aesw*FQ$E9LUe-$-#89dI|Dfe0pZm*a2T
zGx=HPLp!~@xnUEu1<MxN8yzr~t8FlJPrG1_khhIyeQhT&Q%+lrE;mcWOf<Dy_NaI3
zE4x)a>AVM_Di*$J#ue`5!sIBly5zJ|#{&gYXoC0!ee|uXb@Qd{Z4*_+E=uHf{kH~g
z%S)wH-{KNouh!nf``U5)un<`W9%v9)wopc$g62_J>qexeMoGQhK86}THM)@6+$j{f
zDg(LSA2$y{Z)u(~0}oTTt6*O$`q}FRn!badT5CmO5{s0`HreCp#J6RwQa<uh#MI(j
zy;7)9rYaW*<~1x2hI7+WlqjbxX_a1Wp1iNFp{oNrI^-!%lz%D~D%80rAE=0B(yQR0
z-VG_=ER-l>I9NW-Q+roG`W1$gP&US;0F(tfl%Sj;x7UuFr$;Rq`r5k#-IVXO%Y3h2
z=80~~Zd*I0cJ18+wRf~@*G_q&n-bL6ns&-l-O)tVSZ|E9Q`Xy-cA0u9|F-)10e=$d
ze^=>Mqfq^?R4(o8n121wgybvzZwWseZ^%C{CIkBl{*W4&-(8ED#{1q6IXte>qmU}y
z`xf5Nq{pM-U}TdovzsBg9QWx~Mi2M!NbhTJFr?;rZ$@Nu_{(f|`*LgR+qZ9pkv$s@
zE(+tx=dEGyw%5PjvhQz)<B7eAMQ#yB&!K%qecZ;Qmp<|YQ~soV{H|R+avMVPLJMzU
zh)?b&x8tep&yD%t&_rMD$pz4T!@tuD=bfwOF+B3@iyhpiZQ5V$A!s`XFkK2xPDtov
zyaE0~N14mX;Hy0%7Z0RIreDUB8M(xGqE~~VU7#7R1`|kW?vjb!A5Si)SkU;2R*H#8
zZ(_r%-X*!4!i*%t!F0BHIe^47`;rVsw|6sxCI&j7n2K18FDor_Jh_1}$&u=gL7EoH
z7f2PxYcv|;KFTGr?|V15P%BW-qm*Jfl#YIHgzXt$%z7~Dm)N39duY$xl4(0ir2nNi
z>GkP`+0|rxgYRe=<Kb}p4HK8P3G&VKB|dgch4I~tZxckBA&?=NC4w~|Pgvaos{b0s
z2;|7Z9)V)AFiB#H30ft{g^3Ik$e?MPC_7(mo<P!Q3k4GB8YymBVRnjkS+W6w4cE3-
zAaN3d1(K(-S+uL6CP-K<F_sHt(>Gpd;)4W<-zO>geWH?25+v5PWoY6gWOS2}(Mdu^
zlEfzo5^GyGJn?CQ;nuWV*a#*`tZV7;l64bPnR+*YWoZJs1uSMmx>oM*t8@cA>#|XG
zaK7Qz@ObnYz3a(`0{k<`;T@?>#(z3r-}i2AdXpipba-Ch8(qSq(G;|zHYO-|1;N4#
z>+Q@QUBYroruGb1-M643Okk-c2R%F%ZNf^4Cq={0_61bgxx3&BhO2b&jcdFAWi-H3
z8ggXcjG=_f9*B<dv_Q%x$xZC9gDGler97o**EAqu4>|#~fkA)j6@+i`=mzu(=%7x2
zV2}DXG<v`afNa&t=kW;iKGLGf9#q-vT|kYq@#Mkq3fij_i=+eV`WI5`O{b&^YS5i+
zIN1%-7(yqF2UD_BESL5SSVH9{9Z#gMt?E0mnQi+PdTL6#?b6n6g_7NEm*FE&*aB5_
zLQ_G%jVFWImm8sl?<V8BTUZ@umj#0tTLaB?f_-;<bv5V%ZN%E%!66u3+)W+`X^(rC
z206Y5?deh=@RLO<csg?W(cm4Ys+m{V81=C}(=38|Q8K49ub|FsV%wm`!SD`dHr{(;
zAe(&~p%MkTzam9KcWs;7o2Io>f~TjQD(^L_xonc(pi*Ed0%L6!H>-D_K@Z(D_jW4|
z<FyB<p*c)xkPZ+&=t+Y)b<%#YC-%*Q!{G+)ET^wnR|!J~)$MH-i!Y%&zw}0*ZO~4t
z-P6wgdaM51I^HX?e%kujI{vM-PNri}3qcJ<y%9}KXs6d3{D<qCle-b<(!<#RwNg;h
z?J4;N-3ol=sh?oO+7nb?1BE~`U}UPmPDyThH|>q6ZuU=@be`IemO&L5bv&6$RsuI7
zdh``J9Syz~aEf+q;NR)gwu$m}tv4J1ql}<a5A4Y`5cv?2a16#y?{W-cXavMaiLj@U
zLmfi~RB@YBZzlt=If|ympF-m)Z}rEhwBMhj?S9T}_kcB*{%1{XWMTQhac%nw*aW2n
zJnyhuWw<8@$w9lA+$oe%A=%tiX9Bhc^JsiE`-V0P+Je2$)Ry`>xMa3Qu=fq5$t^Id
z`;G8E$64$2b%7Ozl3D8Lta>rAR{?1?TA^MAV8g*{vJV<|KB9!q=|Ly53`~IDF%`F%
z;B6t)nI8mTs~%MpCc<l+@Qx@0e~~=RL!X)MGm;PYLu*Gt3$TthuY{Hq(d?rtJHdv(
zk*&vz)%MBGwv!mINQ^AF!HRA*4%y$22kdXP$^IVH<Em`%N-It&@OjTEi9To9|4VT`
z&8p#<X;t`{<db6w2&tcR473FeZuf;Zx&gh@0W*YN+tZ?reM{@_(k3*H@K38o+Ym)O
zIvbEyw_Oc&I{{0!TWuPoiMm_YpmwrBRXFPZO5Z{LT0NqgZU=h2?eOw@7=s!-*f6~_
z{%Wfec<q|D9o!!12Huw}?>%-LZZot~Tv0@-InJGw4Pb^;`I8O)AiQPJ?Ky`9Fk?r~
zh~{0+VT-78KIPoXKRIP^_K(c=$)A>-xg~{XJL?3qb#?Nc1@ir#BNlIMIA6bW%8p=Y
z;Mtrr``G<T?!<(z9}jZQbj*jdD&<i}faYTus^cY0ht!d=Ix2E%^-!N{me-(ZQaf&w
zTJ8Nu)1ZGW_NT=Dl-Zwc_Gc$zOz}NT<=WA&wfCuSf8y6a;T)3gfJ)>O2anz!qc{Sg
zREy|UDj<(PMhq|X(or{s+Z)+J5^7r_Ay}0`D~-rTYpiYgD^74v73ayVb+!?-Zjg1h
z%R@iCcb~my&%(!Zo$bod#B|GreR7NHp2QYFw@7$4_tS~<iSArP7eMzbIN@!=Kb^Rr
za2p}637BL{3zNWcCb+3S0u7Gpk9VWNjEwGXE<m5e^8$3K>4DiO7#}p<_!jIky74dI
z52}Y$@)+dpQ5V+o4U*?8OLzjo+4T6N-pcWXmF^+$^M%Ra))zeR%q~YrQC7of1z3<~
zn?}9c>6h^=b)#_AjB1mo70~q^se=!NclAQi^|#FNcX^7#c1MtR0=)A&fiKeGec%9+
zy|&XsjA8FHGfmkM%aly)Zv!xp!OG-^GHy?>Mu#^Ebr<*bZw>0m{ad3@Z5<Y9Gve!>
z&c?U5v}!z}c_3H4!SHSpSt(PJ!i&bX^ns<pivD8LJurJRf$_EP2T&mzUtp+TaI5?>
zswS>x`D&Vdnb^=f!%OnDH@vf*PGd$PZZSla$N+9Yk-yYXi6N22`d{q+H7(}WF`HQF
zQ}=id%b$Al^H=s3DsM&c9I62VbJ6d3fMXVp+XeRRUAN$<2{O}B2U>UP(ITl&Xnwu5
z?kM(NYVI0bDyr`x4bqn5I!c48Ck(wnDRLQ@-n=Wl5|N=oer!<@TjV{#Rw>#P+N>Su
zEU7(;CP}R_Md*QVNIpgl$;Sh6Kt3LL!vUX>Y_68+4#vlI$FiDhFi<0uhT`LaI1nDf
z#A^U=1J6kZLA$>_lU}2*t%$y+r&u_L=qXmzRA|+tnb4{U69K-^dC*GK_0kC%ofECO
z=LG8M@|0-R#DORk#W%hQ!80Q$fX)X5a=TC2tZ+;X&2cJ|ytK!lF<PaF(PD>y5u*eK
zpg2nK)f*-Jf}_OEL)}rLb*?)b7;}vdHBY6{fhVrP<293>LPoUmz%{>VE8(lxO8kPY
z<VLP;E3wLT-|kGhMp7GFY9u_tM!KsOo=Z6E6SUS{v!<Sgq$adP@{#W?on=9t)r=Ej
z>TJdMNvF)XyBEa|Q5kom`q*O%3cFz%hz1jEkbDf2>=udgitxz7)P1(+Dl(Bp1k`TI
zEK|W0N3o5H8>(u9{uftu;~5?O4Sqc+6gOr33voBt+rdETCqa#T$e!(Da4tZXfz^+F
zwNJ3~*shtgaVG_9Q`e27$TFckcJ@db-uCc+iBq&vREG&t59W?T&zc;Ks8W=hED_5p
z+Agx5+YMC3VsWrdcW?OkNhxlok{oKHH@*HBJo&f<6`r1$3Nbq{(LQk^hS06-XkLQZ
z0I9voTt#?d4+p)A!Ei8ppr;zRpTfPo?E7AyIhlDCw|M+HQnCzra}2deP#`|=F_I$P
znSy?~=G(%=E8J-xLSfjQoAC7oreqTdvnu1lL1LEggZJ#rdB!*YO)Jp(NKHva=9!!M
z!$ywe7!@SH0nfH9ph|kSfuA?=GrTQfO~KXp?2MrKGNs+j`VuUfyWy-rjz`o?Vs34$
z%g?$jHP6h322;}`xw{!VyP|0eYr^(3nxw6D5tB>P<k(xb$xhEs3{o}te(@)ILpM8#
zIisJ?fLGqBwl>b-y|=y3_stn^6)1?&9e?T_v5y;rIhqTLYz0LvCd2g?$}6<$1$GLf
zv-<9WD$Co%cJaNfEjdh>BzE0C>L%}_O|g?Uzt3KBanWd2+aPg-g#))gXLpm4v;<f<
zmGKBiDU0KlOt*oK^rR)_dDBGqUe7-Tk90@lnQ}WJswYZZc6Z!r*23e|u`N)x-y>xa
zX=0t^?Q#@+Q7Usptw=}#2j73J=bS?@UuBGENM0F1FQQldn@|F^On6X_3_rQJS&ZkO
z($nLQxPUWwa^N&g6~f6~?{dT}7Y+VvTmH4fe<}A)-M8Yeiu-FfEZZ)=-DAJNdcrOY
zwx4cG!&(BYidqoen})9zw!kbG{X=8sf@4A_HdDk5;p8#)_>i4AI>LY*Zt^3}j|X@(
z<Q#wEuh3H1*(g6C#gkEXJSv<@Q=wJG|DEt+P@>`g4i#op@^rXBQ|}NIi;B*C&pHzi
zYu>YtEc1TF&3pK|P5X5_(6832U!(dJ4?Ljvb`0i03Lh&5^AClOyM~m59&^CsJ%jmq
z0{<8JAmCvKd^PDH>;wj(wB$i>yES|eLb^N9m#`2g=uSN9fquZFMLg1hk0shAl|Yx2
zm(wLKJI>W9N?r>sCPlx!ueVWvyHgs!u^GJ^=(+7x&&fS!;nL-G9e%CQzS|4*-Odlu
zcc?T%ZwcBFrIH?7vBM>wf$sIOrP}bcOiiQRbGzcjm6<7SS(*iyVFi>l3#d5F4@Nc-
z&K|wSyE_8##&ZC19*R04)!?CMAuLLctP>v2sKcVYu<&mbYKQS=K-2EfIKeIg2LsYH
z>mA==_BuvEs^X#3+uZ`~L?M0(t)<?C+`rbrRIk0q=t(E-dacnxL?0B`t$w7ziD6NN
zBFq!u>Aix<i>V03s9-4Tl;_qtVT=$kuaDd0UAtaI$2UK*GzDa9IF#-PRk*05mRw>{
zEHO~RwyK1Wz7p1X;<AvqdeAv;o_6bAE_!4Eb<rR*P=KmpsXZ<cC81n4bjK{ja}YRK
z|Fzyk;9#=~LrJ!_)>$A+e@`7Zo5#O3TJHoeLf-MeH9#UA)G6ldrPDA=Ab>Cggg+n-
z*=y|Z)Mg;{$3W_eo4OcCEuHPisdx7Rsp%O*Ff<QuX$2DIkbTPq(D&C3?_-{p{F3bn
z(#>Ii$+)jiGALPObm(ko7<HPV-UO_<zyvU_S=dhulLm9M<#<H<s)Z4CBAhW+s2)I9
z9k=R-&T~F!yCU<0N$xmUQ-C7QtsR_P2rooD;JX%7dx$I}l<()7==#K|!cc(F2)2{8
ziMHit6XnApR0yOTp4PCeb{%>d`LK@c=dTZD#vzE)<JP<8S%J~U%KNR_p-U&eZgk&!
zTRPAz(^m-PBo;ghlJm_LNZ7=oqA$@rr;py6Rsjr|by}BK$~p@XcLak26XWodhKhY#
zJ#BXBIN=1Vg3Nyp^Qck75p|{}3~;-!E|Fs-7(h>)IC$DMwu#|9Ha|JTh#i7Mi6z1?
z@|51Rg-Vg{IOekBBrwGU4pnR&W+S*6M`O`H=$jP!$;1d!bWj(Fa<-Q0=-YF}ef@<k
zY-J=*!3K}jiJ0yas)}e@h|CfUEk)Nt<a$!Lp{TYHX(yo$Ipt7FcbB5zph@lr!vQ*h
zIlFSyQ^k)g_kqYclpzYjLlQuEh&F^L8ki7@2YBzDN9zLuFcW{cojoETUHDspWuqKM
zYmTdbxc58bv`l>9nT^0)#iE^y(}bDTa71_g&EH0QrH8qpcd3K9hNpMX30>i99^okN
zxwB{HYo3jJdE-d*F0To`<r<I;)sUegh2zqpy$l}@@FQJYn-T1d=Ns_L^L+>bc|d`U
z1B~_v|399>FBHLIKuDB_G%jR#SJHd7AQff+vH7uy|KS0a_wf`yG*R|8533Pnz*|fR
zMKm!4CH#X@j`*2Rwfzo1;e<QHNf$NSa3$%4&oo>z>rlws(<Y{*zd;D#%x32>G`4&+
zl7x4E5K&U{!mqxM!-*(pYaj)nK$LVL18PhFb^3=kg}e2OiRJ(YuswRda}Sf~nfm!>
z?#G|XKOb0TYrWO2t?S>6hdk_@gyl%-B`GmixAnSnhRV2IndV@f*F&T2*z_Qqdw*jj
z7-u~x2-_jFKHrfUott>X=mg{Ew`k>@y2H|!{O6neeN8y(q7&19&c&n}`c_jtVMBIn
zXtb2>vG|nxQMJVFLh8-Z+Op7Jz<28(^@ZT4r&izqv2jH2xC`!#9XTEnEzr|ZTorxW
zlU{BsBsaQkw16Ti?AXGy5hz04y}VU;I4Jq067`i|G@+?FpcEfbzvCjV4~`d^znnON
z;D=_W{%G(iuS(E#dJYn7dX6O|t`Sb**x_bq!K#;v!&n;rVTQ{Qo@1NjW{iGTUwR{Q
zGZ+nS?ryl3```-n{P9g8MNeSa`$*Lbr2B<}EOjGUzN3&vDCmF;=te2(F8>uQ5Pwtd
z_w~Z{;FdYrV_WfD-yV(cK7R>sHdgm8qB^!m3!LQ74C(=+V<X7POvJVD8Yb$&dqh74
zLwdjC0Raf}$bwbpo>U9IdE+eLP0#a4aq^D~S<aD|7L}N2&9C!cM^q<q{ZFb<S`F=E
zt`VKOiUy}4AEXXu+8#`_8k=XOHC4E3wyDxUx0pulB__^?q=RHMIhFU_b?iRxdu=^f
zy|?Y%3guoDQglev?^;b2kA^d~Q`}KRajL3(%Z9HLccBwMsl38{YAe4#HixP6>QqhV
z##oj<ztL)j1(cU~@T=XQfxZ8!prldcM8T$x;K%iM^>vXAv1v`mMJ#GVL)5LDMu2R^
z>`cu1xl-L(Xcsndew$IO!J)=YSW)vKlS~2^n;?3`&gFTz$6jO!5gfpnpaM*A7RLlc
zye&9~8S<eCn`Z(mK*EiqW?1!}D{R$sgkFmp9J*n?&poKCD&gQOd>aZs@SLTiQnT$b
zES~vU9|*gCVLvKnJ}X316(?Vlu{T<P)ax;xwIv5XVT353A2B{pP)e+v_Dd-Wt*mh+
zGP++oy{|U0zuJ=r^w<S`;=;Z{9Rn<!ArGt=Q-bICr#T!QXCa75eAEdS#9muF;lf`~
zxjIzEiAzS2njcSNl2vJGB2*?~T6iAJpjNEK4t2HTxQ63iScA=evlz~^s(Y{1+Zjx!
ziKke&tmsob!Jk}$KYeluw&o_7?)326)Z;`69S#A~6VWygoz*;xZ`+yMHr$=#Z7XHb
zwlm(gv)K7r!#mX}>NmkO3$u=+(g&OiB7Pk21qOH)uxPj^#tvb$%Hi8rT!m#@i&R5p
z+kJF{QL7*UTo}_#3WU~cB@dA*Y?a3@9=P|EQ#PwwO(O9fP5sj+sV{InNmdG5cVfto
z*&|sj*KE(nHI6MKaE~s>xV`#hxaT0iJ#zDsjlHwW?dPIorJxF<$3-$SAhf9zLIgHE
zb%@`jc$ROyDVg=I?GYJ_`r`@49vD98i8+c2IR2>1vvIh`h#%LLJ?Q1G)>zs8LaCTd
zzjbVze50`$a2K~v0~})b?djlBTpXqv-NyIz>B{C>;L;gFOjc%19npM8dRa;|3#+P?
z_4384C^NmON$b#2&%CVL42_5A8a(%*GpAbB;R;^B4l4lEEUY|U&+>9sysGhR3+HO{
zQ-YdzX@06N(aYxI6(%q59?g5edQ3xuat!YTV<Yfe(Q7GnWjI4EOsA#cdo^)2wb55r
zu;t8z!jEqV84kfIatr34R$G<Zr{PUCo#8)ohW~6qtGzhMJ~EbxwW$0v3@j{~?=<1W
zN;Rn~^G@pB)RTH|&PmNVINhw~Jh7~4Z7pqDTR+aUE+wDVdTK=DiYXANatX?oqfi)R
zLc>SIl!D%76I%v(hAJ-1n^H?{l4Awn*=&55@$xD36)^(53maO~&CwveObLyd09?Mo
z;}^`kNgzUcb!X+mRa8xy1xXUek3<eCKymJ|6%i#ITei0oPHGpuEt%UXtZf&0CV{(?
zl@o;oj~XV&6@eaGvv0I|A09bOFnk_ksDa)P#K8rPd^Y`NPaKJWb3VQlM<{5`ybv(Y
ze5{I#N-UYxuj2tquv@#wt`DLI%NW+392JIkp-9G)_)=Olss21OlgzX2VvPLQ-~mbE
zl2nS(?{|w4FZYTeZ%h}VB|r9s-PrI))9^(p^3Ql=3&yzUD!?3K^@1$zao9dO=B=p|
zjL_IjZZ@BbqO@l7A>@cIQxnm(=LCX@aa#%v?&WQbg>IJ0a3J-zR*cRc8Urq@dcG|s
zRUNW_E0N*xw;6GHw;!eE0#Oj92Gl#y2|rd1kv0vQ5Lbr)N-e*xz^17I4W6Gj2c1#4
zmjj}xy$^s;TJP!<iges=*K1wSe`N@Sgky7?!~AQzdMyf6*=$0KB&f*2X_rA=fQK7L
zr$+*EkAw64u$N8JIqjSP#eRBs6kX)fjhB4^(Z?;e3bQV9abVjL^n_CX(#@M&UuXCm
zf*;^x8I-OPg+@`gmYlDd3g8s3?bt0e7p&OM&=@k%N>vOxW4r0|q!7g9>5yzaBFWB=
zOL{4mmp#M{hg!n8{aS4{4%z5!_!-M%1V0i7K{gsd9P38~;;=9mEM~PE-C~8uaer2L
z9F-Ee!sDpD3RZZWr-sLwABqG2MEs2mvx%4^GU(=Ga!k!hwe}I83D}x&RL$v06J#0w
z4q|HDn>v3CObrXy<gRQd*+Xl7JXFnTbBUlDXPrd$SEf117<f}m2)qdZbvUgd>Gu|Z
z(=l~%Iva9C!q5Y>AmAl%*5QY`_zwtNkc$%I5_f#}(FM2o<NYblTs|tnDhe=4dL&95
zy&QuQK%c0{WocV+D4|%yNjTgj0oEiIXre}$g!4J4jY3dBFW6wvD;6-;itn2c=!&9$
zBq)n-$6dyKSpHgYWei|GmJWeot2EtCX{Erx(0@7zn0WAp267_^+fbg2B!Fzh;WgsG
z8v0m`08m30r$G;uV=)@2*T>>BLcke1*o<gsjut9I50VkUW3(dBAi7wLBtVR0D2!wf
zj5PQQZSX}rukvuy`Ui4;)P=;fx`pT=F0|nmb7C#DJ;15u0yN>@NWx>W6CWucA{r>9
z1%^n7gh&K~&^b}p13)B0KO}-a=n$z3deB2WB!fF7!a8U+Mib?b9O973K{Sz@y4pv*
zl*~t+X(r*21|5g<B;MxwuI5b8zNaV!Kb4}pcmj6?A3Q<BS3HdaVg`d)PC1Al=YXa>
z_COy<1N|{D)(OU00YynN)=}0@_G7(0--LZibM+)_uDSz~U=R}xorXhUR$z{#NRH6!
zsn#-dkmIJiRh+k%GJ<YMxDDh8+`kidsiSgPy7-G^YpV$ENZ_O`Ig=;$(3U|P+V0sh
zR3o8pw&YWu#4lU&CD-@KPU?^ybUaVsjIHO1?XM!}g$)uap)0jRWD<93Enn(@BfXv@
zbt>INoCF^kz#$3$C!aUlbIqHbbn~V%&%D`9J8$-uGjCFYlM+pwC|FYRnX~w*W2T%s
zNmRWjuzsXt=$*6-U0Du8?=GI7cc0Tju$PdTHPkvyC{3K>ycUi|8$^>BL6aIjlM*|V
z4mhK=<CGXOvp{n|ht32rndBH55g@~EgqPtlF5)HaUXLNcT@nUGQr~)*pb$+|OnOL6
z26z}fFiavOj2;ulV_+fxFbvhAHyKRr$*>&{RN09Es#KzYD!XAo6%WFq!mk9tE8h!H
zrBn=JZ=#By3#cOPk4J2OOZ=S;NTsZ<`eS;Cj9PuwfygMg5o6jcAYw`@M9c~i<Df*e
zkRjER2sDWTK_a&`A6QerD?CireXT>WklxRoDyk*Qt>q$lbgtAXEd|Nr!{&sR5`D?w
zC-DYB50k^LNafd~@~ct#RjBkDRPP4V9}cKPuP)C5trMZQLk%>s>6Y-!pgn3h&$B}9
zBmnAo*c=4}Cjze174ijeI^=gBK<0^IXfgxK)DH1i7k{0G;blIdZW`W`VeptGBVZ02
zRgk6lyJN_g%oy6}7}^={Pa|)o>HYu^8UZaMEJOz#aXrEdxWk#kUBqD7S^;j70&Y$j
z;)Zf#m)~hazZ}AgRu)j4_S1yo>?T9v>?H%@RI&o%a2JUgxw|EQ+LN*4U5r{5oizmk
zs^e{Kd(s${8pnCjQ6S`<954S0>a&9Ste`&fc@c&BoW^2(P7&;L`jVy%N6OT?T9?Hj
z=AhPnNz><}uo<<(5W+k~g_U@SB_6({Ir3UpqC{YlM8z-?zYvR0Vw#i#P>SE+cM(j9
zU`lS6?daSzLv$~jA=<ztMfb+uI7759ogrC5nOsdM9MGg6g1i~gZF<lrZawJ$pm@^s
zFeuIQNj)%&29ib7n_mm7qIqoywO1e&{ta(nH0UJ;@ggHcqH!!r5Q|kzbGoeoUy<P+
zk<KVe;7TbKVG?8|oL>?`JaG1KX<B}XemYb=KKgY%elk4^Xb88c-L=6zA$vqebF?vY
zA@t1w7x;dlApTDI_(aC-Abi|DCcSq=x-R1TL3a52<*|qCBzmeP2ViL-IbugLC~`Y1
zk-Jcl+gXX+XDM<!E0O!$L~iFviQG8%;!J>biBNT^Aa(i<wDFFMv3-&Sj4lO+P6E*B
zUY8^R8<5j554BI@vXr%edirbb9FU<jc%ejah4jcG4WNR*a3x9sD@=k^K-z@Om$`%h
zNrjF|ynf3U@WjK@f-uRX3{zP&!|X1SVRoMZ!|bJD7+tMl9*L1+Fn1O?m^;s4Fq2@4
zV;9LRKuPh4Bt1A%GAvR$AW|9>QVIxC{6=Up@X<mPr&!wbre@sk>teA=TsvncX2-4i
zd{`ys?fARnh4m>ZRtabPQm{%|n=r9GKfbxWo7tpydpjB54{oSeDW2hvCxg!e^jczv
za`7X=L&@=}8xINnT|8016Ds^p&!jdIbd<Jil~2RGx)cS(RJ>2TBarR6Z|%}#<loaW
z@~1~;<hNB|)55&#UP@4&4kAwnbEglu(~!L}P`g;1osR4`^<@7FKNpqlx#8zj8`c3F
zTh_?A;BKZ=rGN7^Gt6AXZs7_cw?fDnq?AL*mI&UW7yt&xLKvLCsh>jNR^e-#CipFk
z76}FC>oPTVEU!yZX};d^W7-lEA`!D>m|95#rdD1gOij3FDjT4h{}QS2a{O4fclCCS
z;nm>N=?bs5!m9;Xj$cJ3q1K)qU@h%6Q)$s_LB!gb&J9sn-7K|ufZV({OJ#bU)KS<q
zQo?Of5jRRfx(}gONa6LQM;!q!7jdT)PL0E8933c|9~}Igh?|Q6ttoo*H+tsy{eaqV
zDKgkPl<9>~*yHagObvcDTF)M<76U26=6X=DZEw%7mJAB!VYo865XsU-NqNr^rh$Iw
z%x_%WN2HjmBNd7REyu<Yw$}`9#T+^NL+G846Se5ucJgvtA-NkZ2C1RboC(j3j8IMt
zQFQc#VMxABqcFITgbgYjw+lxhC?N__i-4BIAxZ*3lB1X_gCg)Fk+6{%w8)Ww7Lkx3
zQG0=)3?6CSdV`~L6pi};mOhGxd#F(!N3kf6u!A6u@j8pU#3=5L8|4KzdY2fT#av<(
z(i7QT3=?@D%>0)ao#|kBQ1cVV^(?DPjCOS{6H?tI^ynZtZ#a*nJG|l#C0qrNPt1@3
z4WU{C=9Ce}>8KM}C_E}6ut>2N>u6u8qj{N5^buDa0!jDk9L-B}qKS#WC`b458_jEP
zV#$fU>Lv*SO2=nc!^@gYsn8OnyDpdk7CO2}5!F?rod@(hcy)YtnaDsZBj-p4My_`a
z$(b90RM+7eHLsAP%Sex+1vKk)NzICM7n<M6Sl`1q^x$i4P8S)j?v{&bdO*{qIK2I$
zJMj^9-#Us+sc{WQ!kFm6Q0VbqbdrakhT~b#Q!hTIxZk-gF7CFb>tN6WRKqzjdUnUS
zee!wQNdWAKpONv-oud0}2=^xN^^5cA({SiZ;LVr9l`pX$Us5N&m?b=c`(DO^N!w*F
z4lUw&+>!oxcy}`*7Z3bX97+VIKH50Nc<(^TGwf(h*DWm8$tu>vD$ePbN54o;7#a`_
z7I{VOMx#q$wBr*CX!1eBrH+rtndXSN+smlM=AedoeW%6J(5(94bxwMxFHePx^Uif@
zr+dLU=X+k`0$DHE{Sn<3;L6qS&LP-u_*B>5jk)aQu$&KQs+vhd-J`X|+!H@=_e8ob
zuv2Q?LIUo3dfueZ%)_o!R5+;VOuI<`y9npIBkFud#*<h#yci!mQH<|KIoR!mIoNSe
zI+ZhB;F<OPIM{6$!+g3z-Nrt!9OZN_c{*NjN5NYlJ<FI@zP)lm``%s9*7?Tf`+BEN
zs_i;$CQP5xCNvEaL6e)OM=jd9C0(bnV371c*l=HTb}7ub5cFIOMXvJP^7nYCGucx7
zx5^s+TiZW~|JHk$k<@+b{YiV}y|wb*T6u4+ytfwXy_LXm3pj^og1?=3H?CSpbn_4J
z58i*^PFmqTDSB^xP)0eA@XU&dY>v2slX^Qmz`1S^^OdU?u383Nva)sbf`0oX`+FgO
z{%FC;S#0w8Ly*^(A6~qYEME9dR`J5uWS1npXx?*rd<6ngNBeq(IC|-Nc}2T;DZ)%g
zMt6t}Fdf(tRQieq_p6=I(JI$IFKjf1WyYw?z6~oXpM@Aa>!<5MpR?VJUCvsM<8szY
z>2lW6b~$Usx}3GrxSX}-aXG8RIwiFomn4+@InOakxwq-0r3**SQ&y;suF4SbY-CxG
z*&Io-F^}(465lmtXmH9Ntt4(wuFq2|vCmVS!&4UCPBF2ZL%p5a%-5+McFL{i>7?W4
z6tS5c>ENX0-K68%6hT02C}nLIBAwu1>3xjUmwkp7(w25pBRz~_d~9MJY}kwgeKqG@
z#n}!x*zifDlo<(PC1vHLm8b(3-ghT)eW&iWa3`BNeT+06j1mSf)H%VAiv1AL-uZ$Q
zCJIle=~5*56ScWRQK+m<>`s)(mq^FSCwct2@FQ(y6fost+;Y5pG!AAXB}mvoxXZ~J
zqD4Q9UIZs*<sB5~8YH>eM)(CKbqY%65ftqXRMKCCbsd2+lOc0C|D^Z)N#gb+yMC=D
zbNDHzJk{57K+-0rB|G_SrywfA#pOd)zTzwBmlW&w<9B=coqF=bc+4>?I@Uwz!-sJ4
zxb}-FKBV@&`VgJnslat6O6x-v@bkf)W5LlUhm_>m<KOgrY)r%T^uayr6s|sKZhUYZ
zdr}72iuLMA5b;-^F&x(YfIC$&fm$^j$E30%$C+r|O0cF^*N3$1l*Uae6CWu}2Pw(-
zXeWtFYq(#O+9?VY^hif2w#kG$DG?N_FTe{fca_jsW4px&->HDt)b|!^%uMVG^=Q$_
z1nbam*ry5#R*tV@6%TCXjFpKqmJ}$<36mxGV^O~=Ibs&~yE@T~B}VjvccmvL(g`ez
zq9sT0+Sc#@+xc-kz%17V%v$+?t$e^%K49OI512u$A0voYEsG#tqMw+`9Sj3{wW30J
zeGk53)_3MBW-(_mYe8YbRt{p%#zAZ^nS<Ew0vyE3Kd^&X`6)Pvp^3go4>205Zsj8O
zOkKoiXuwrivhOu48THRi5tmGvEa1-Pdh>-k^O5DoGE;c<#UK4Jt0B^{FTRkc6YVU<
z1(2*_o~>e@&2zk|d7W`4o{J<|j5LycpfEJ21b@8nSYevy)J9wgH2r%b=NgFe`~$JP
zF*wO~44!MKHY>AGZ99t$)i%|O)uwUv^D1K7vx?Xj?$xOmYR4riW$ZS6_f8TK=`F>?
zSVe(*22tR4zUL@#EsbDn1{b``4g{Bww$^fD!6l6F#?f?1T4Xr^+<0$S7TDXnX=(HI
z_U?juJIXtkI5<|Fz<jQcE?wO>1W#SosC-OjOLXvM`yP3@l-Njj37tD*9e+uz@2{ro
zXKW!v+X<NH#1mAy0$bdf_jl&ZYsPG#1~+18gg});ux>HTAtb^lSeqX0V1N3a!HM~Y
zB=8T`;}eq5OIV8+lDQ^-__1y;-Y)vX4{8e5w6PWAA}VV65|bnWbiELYzIF+L6jg65
zb>zj8!ri$1{kR)fieaG)<J^sR9?{)c+yV06gM79q|KqT2Cz1CmD<|ZY6LL0A$Z}Lj
zsfqk?R8+_i?__m6$oU+U=OZ-4b2)@inc5{gFYjnLFXKK%2Y3g>M;qh^cm9z*f6*$Z
zTek$aZl~mGGQOb?eO|r)pS@>oP9)cLKhLl5hg%iM0->AtRL!a4m4pOHE=^`8w{F!b
zlJrcdC6EhbX6O9+E6H|0?S2pf>~5%;@Mv3>Wm%SGNybIx|6ZoBq_7EFJ=^!w$&CH5
zm#gVwjyO4R@{gb|(8-*18vfcHwfaqh;U;pU_f~h@BwyyQq-5N()#Ku2_CO9MbK8DE
zXO}yhJ&@PW!(ZJiS?`*watq@FhXWSjDNDn;YtOUPUY<D25uVQ6BAwnxk*7d6gnO<b
zoNEoGM`H5(LtBH-bbqLL^WG+U&Yzuk#oH)i&*XR<UVas6>UgE=k?V#yMtBh*#w`o$
zjCs}ZHUBo^-$vHweOwR9OI%&D>58rFHO|n&X%jS?0-E#aOf@VVxCFqY=1_){L;#w8
zgPH$p{fP1cApTq~=HDgu(1>HjJa%}|+c4jQ#<puu?jPBD`ee&S*9Zh12+#_Rl4nYS
za86Rheh@pJJT~JX>^QO_4xePFeF`ivUlF$Y@V^BA%kaMf|Euu72LJ2D;8-`{SJ)xs
za3_#Mr9X!%ehzi|9BRQiRBVHlP_4_7bP%2cG?X4|ioD8c@EMk(lOyupDLNWslRI<K
zUoL)cbDp%2;%E^&oN-nS53}SPdEoIC=k1%S_R;>4H+F+SNrym@<M8GPRCNfLyCFb5
zf&hy4i`#DBFw!C5B|z~t2#Eb-y(s)wG*hA#m=gP$7_H#MDDGu?w1U%PM`)BI&?xRs
zSX6Xqm|jeFJ!ON`*by<DJ47^S>`H!=HbLWUq)168Mb3Q->yCwoX#D1AN9<YAwQla9
z@tUsjo$z8YMeA?Ff@LiWw)U&riS}>yrrUWC*pF`KM_~60g}?nh=ytIPyv>xTXs5)r
zS}N~gGF)p!@(#H0S}l~`V*DFPTYj5?uj&RK%^_moNmG#Og3E~8A9B$q_TFuH>ur{_
zZ4PC_TW_-*-KN~MY`4*Be52;J+pNd8=_T&=7`dip<d&L|-<p?OYF>V8c5bQJ`K>v+
zrQztUw_AyCxA+#5sg|hStvB09)a=_#sk(MbU2E5dH=0b>+J)hbrqQ)_Rd}Nbbgf?!
z-iU>-wX%7uT-4BO7}g#%3~LV>hP4L`!`g?2VeLV~uy&+j@Vkf98iutW4a3@xhGFeT
z!?5<GVOV?8Fs%J(7}oAI48zcC7}l;e3~SdKhP7)A!`iuqVeLZ0F!TqL-BCLeFr{)!
z%B?ez-r|HFTlL#d-=fuux>a9Si`#CrmY`AI)8h6TZ73Qw-)c~OtW}C&t((2D*3Dj6
z>t-LUb+ZrFy4e+L=}(~$pj-|{J~Wzdboe>)7~0>a#fqjye;12y#Z7L5=C*t28`Ow%
z6#byS#o0;3h`zyaUh9W*Yox74%oj6sMD_NvSTq|rpL%;~>}hwG@>tjG8%tzQv*z2&
z<XXh^jp4C3nZ$7n_9jy~hP&Q?wXWk>YqK^fSPPBVrx_N`fuj$|<Iexdqw(s&I^Gbx
zRH0!!bn5EV6VhTug?<I&nshLDg>W2<CI+S(7X!ny;f-&vozS^XuZih;eRtcsN$xHq
zqM$EtRQK5)yP5n}JO}HFqE9AsHbCF;p_AK>LQto3a#g>Z%pZPEAJ&gpUqrWH>B~O3
zo?qUO!@IR!V7Lush2)S$^$E|2-jv@Tc;?>a%l-OgiAgpnQ0D<*2;X=S`;g;turKqU
zlR5gi+cV+VFC6NZi|;R@F%h|fv_tQBkxcTeg@4cK^Ev-~jeo}@`rA5US0i}8KKe)9
zAh5~Z0TH(WDB2lao!vyXK?Mk49w2rpo`BxBddq+wPCSAV!@R^RNnn(M6uxZnil`=-
z!!%cELi33}bi<An0Zufj^-0#b#PFnzRODF4S@OyY3y$S&1;+^z>TB}f8$6N`BN%(F
zE@0gQu?T;!PEUDSd-#8Osi(wQ7l_rr`ho#V*6jcV9e}+V_Qo<5eWFzFif*<6ors_-
ze7bQScDi@nn~5F%;1vMKris&&L)U>faU5YveneG`GCQBi<m5mO)$?=t9I#S7AIs-5
zXLJT3VUk#^kfrQX-Zav3XuiZ2?mLiSc(CbFj|w2Wx=E>d_nlm`v{1mJAr_E=Hzm2L
z$~5CkYdZK4OqKvtL-a)A4xUR3^Cw&Yw)8voMErh6zbi5;sM38P^bF&`sQv|J<iW3(
zHypTXp~f0#vPKWnxloNhHmgZ0(xk*c7d-zB@L@)za?i5g{h}r=r-v6%M7C*FzNNyD
zNTb0Jiy-?&q(x~`ik_Y?iI0O$<aOf2S;+uLXIhT`)&0pRs&jleJ@t(($3ct`&h-pU
z?TqIEh)%vxW&OaYcUoZX=ifa`xJ*TZ(@I}gls6U0H%M)BRV#P8S@>C_tD&M6%0x?D
z0FlB)QTHe~Qasn0GC!0r<?bLjIz098?+kBprBvF-lw6S0gtLt=^{z4<i<@w7fVpy%
z325~VISNBr)ksCfAuiK8G-;`5($LccjWY&+266=Rd*#Eqp$KUs8tZBtX>f~fV!BF$
zmZJ>n_P>ipoyLC$(&Z(!LnxTq5yB_<JAk2d+!ujVYC5EFlUYKfO#Nt;qB~w!wb4j6
zT(EDT)l7beU4vry@GwI;elZ!MP~}l26?12^PQapM`oz1OP}s{VNyMH4LOe>mFUo9W
zI+R3}gzFCP!0<y%QJE~Cj|a7(pi=xacM~D$c+(JyhO4=5CbitGO4^o0Zgir-=8{X1
zq$%;T&6Cmv!K-k%P%V7&cU<@QzACF8$Y91O?aXWiW~NfGqTO+mnR$tU{3QPj&7`G_
zWhfW1kg1l5N$2rpKiwo&BbIO`Q`d}5=eL<|ZX2oQw!7+3mJIb;qnYyMZA|%c>_n`*
zZpzD5@Y0$4mDnb$+nM{-SI&Llo423*wS<%ZIxzXKLz5p(2%g)n1T7m|UZ_=zs&e_%
zEOluXwR#-ui7wOI?;8HcXCbXQyuAaWMsM#Fn+<hCUNf7L;S#_Iag%tW<0l_WsnkD1
zt^O%b7yLGSw9m)d@F|fhZN(kKy%TT7ZD-Dv2Qu(1>iXPUc{eU4oew5K6=x=c&(gY2
zxvN*6<kc&g?dpE<^!&1xwq{B*`46&sr!;uS8li#tV$A3sQ#3D(GNiD%AvsZUD#;&_
z*W)@@u(;-CU*<0TeYk_qdc!lZ{5Gu0Uo-suR=$tr2mVXswXnyV-S66tyw0^3ZN~<R
z_Bl!MO<c(ey}88yNM@X;eLZp8NIHH@_sZ)%o4o;A|Gl<l>=8`5{~mesu1Sls3buj-
z0NaSF&Cdj<0YF$Q!`ghs!7I_Y2yfF(XyTSMLRkEk*!YcvA`^0x-zMUkt=pkT?IW*o
z3Vue_ffoy`aD%>oEgs_cf%3e1;C&>E4pj?@a~nWkk)Qq&lOGQ~>OXN3ojEC+rSe9&
zxDnCvt-58r>2Kg~`8KNM+fd85ik78lZqrg@GyBhH`yLN&#XSnsIZS5uvcl|X(zfqr
zQ!E5(f1kX3=ZDB=9vdTKa(Vk9XwyTpm8U-_Gw_zfkRABU`F-SnaDl|V{2DaKhWW%f
zW{|pMX#eJE-1+#sYnwg>0e7LepvFb*%&kOo$Te$ckNGbGL>LHl1OEKZxx;r`TUhsZ
zl<T&>r)KJ&x+Z-G>m7I5jPA0+^qhV$xdy#a&*VwR20e?gaR+w}9?bbQ3m*JYi#Waw
z-UR9*El?FVD+IfMwH@pQ6yI3}-86d3@xJW~vw7&`K~8ixr|S}xLau#2PFJCX;5?O~
zXWsg5Pw~!wT(sq~L)``h^UejG_?>TLJy&(oWxb$3`6a*^)x{Gp--*qC`I^DEle55$
zJRDh>+djw31`d4Sw_{wIGpM*hGiT5YYKp3Rh^sQAh)YKfhpYFq#p0QC{23_3s~p2C
z1LY22vb5P`&1UulRQkus>>Hch-@o7yt3M%Ry%<c%{tcw`j1P_&hH6oLV~^JF>x1u%
zAv{z_2i(7eWRLzzqMRJl(9S7lgd@a2L~AJ<aj;WO&05i)4<8RGl4d<ur)lO{!n+-j
zN1OF5*6&&t8=r$jKW~Pzo}aR2yL~fij=N5ZZ^{|2es#ZMGH`+(N}rCcsq(=35ehrn
zQF`oJj+vAY98+MQ8I?kn=fHM8V{&$dA06(mReNx$SD)St+n6z(-;jn5AtP8w`XhfD
z=_yQ_p9_A{$>+!P0hG)sY!&`dX`B0_&30^~p4gL(E;e@oO04kvZ>t^oWe1`nMf8R0
zrZX<v%%aeR0<#U+Bq+*wg;FtiV(zQp=F^2GS#2+Zl1Vw0Zr<}cehSMGnvE=Zm!6-0
zSN4o6*WRQ108`ul_u?>gz%}x`dxQy&$(s6);&|z2r?+Ihne!Z|e$+g7p0nI<n|jcB
zyyh<S_JxoyT;ybvu3uq4uLS-brrwvqh;zO`@J&ZM#`#Q_Py4)rd^S=(PisZ(CdW*C
z&Y!|n<u7nrP>MYcdljvd`q@MtTD=N`6izt)dm*`~RSSG4;Z<1Ri#o2%uSdvyD6A6u
zQXn{D-{-y;^v&m*{GV+8@^pvx?a7KgPo_(<<}X2XE}oo6FY_tL<)_&%^3I>G*30SL
z3oZ<I_WGv{GC5x?RLH~v?|)xeuXUB)P`0+ojyr&lg9}gllOHx)ITla{h}Hbu4RW{^
z$VM*1i3K>nxSP0&Uv%mqgeKa_Z2BMEEg~s?OqNpwv+~?AHo%!%Z+Ile0L5UrQuC-?
zo>(IsLjPC5+;{ZKaS2{Q5HJ~z@kRttSM2${jXW+QNZ?Zs)szd=T<#Vx^9K)K8^qMG
zwJ5k-;VJK<tc60*%$0K0Tn&F{<Y=$_Y%iya2ljIcd*nMC1V<ErGJdElE1H$4iWOxY
z!)>gMoqAb`dt;XN<PY}p?5%V6i>>~cK0l+q)Zip>DIzb2m^07aIz)T;BTj&&kJ9l#
z9$}Cp&+$O6F7y-^h+?sL4C40$KR7$r3+ZOYpI-Q%l7+dAHj7H={5`+|A{wx$GE}$u
zdMA!~`JTFh&0uR3Zo>TzyuW!4fs_Ze08%A_>1e}|zgDMr3SGdf=cW8T2(g$%*eMQa
zupfzK1oa(n>P4$qBrU0A+@W{k$U$cV^YhULMqkP+Paa`?d$8AhLo=BRbKp<B<7B!P
z*DU?B?o)C}nBcCX;ATa8<5Gg9gYL<uTVs+&b_!g5!J16ohochuF7}}k(Z(;<>-W27
zK!nfD{!JI7G4=QIK{r?@XtRQnWX)>bx<Q`WYs<tvI+3i;`B$)hm$SaZ>1e~AYAzrX
zjL&ZjqeWmbpZ(%`-f|5k5CX0RA<o7ha7Bo|2Cd(u!+3d%l;A|sDq$sO8C`$|Q<0n9
z{UCz%d!DO^WU!{PJijRkR;TBeHTH=6X+UL!GM&R(HvQ*K=$Luh-NMai+Y}bBxuwf<
zNteDBHP}8$DWs&_@b2LFY4_<B?jw0Ld{8411;$vSnE13oNH?*hr9M8W@#^|Bx;Pn0
zj&j5sh&LxxGPx|H=ey|zbL;_xkRo*RX|s<J@^r~4$-Ym_xuFzxG2?DCB!f|GJ5oAu
z%V<4$YrJ>7y{6k<=vLH@XwM_;)wi&w>0O{l$J)CuIWXTE^5M7bzU=9Dd!FZit2Mjc
zDBoA_AL`#u?m$wWh}Gic9=@$hg@?&{lQux_FILNCdWTo=-7K2b-;7eRQYw}Wqip_G
zG-~Ba={HvVZ3iz9`5=bbtKkGY_ZN8`iJ@J+JU#JaJenB5_14W`&>Y<|kPO#1V|Lo>
zcRy4^=XK=GE3_w1B`WQ}o==_wu!qfS>l_5F7kJ)?11@V}ah5?H?FO@S^iCN%DzRer
z!>O}i$#arE`m~L42RZ6~>4Bv3LusC(`qyo?&*6FdYSbx2!m!3y*LamJ=JvQDTU}IX
zx-gj^wNlED!Uv;TD2g~>ypa5=-*L53_j+Xoon0zWc}gZS{7{6Qy~#?f!WyES_h~5+
z6n-OV20cyTf0kto=Tosz%jJuOMm7YTS4!3ZIh`uu8--FXZxoC$e5!+&>J5NYj97%O
zcph@1f{ufn#4mgXvf`CR2}@eHNlT7L$xB*^@f=+y#WQJ1OviMY6lSuLzrDZyA+J>H
z|F_MTM(z4vE*5L0(E5*W@9X~#UdR7p|9iKb+TY-fwL#k)m$(-C$$hcTRr_Yp9gdr1
z`t&@ZqF3`7?z<n?Pc!zgxTozbZIIdI$7DLAt=MGE+@l11J+9Z!A5Ttx{`^^(+w0l%
zuCQ2sKbcLQC-;Ax*nd9H7E3#iAWs;_r{j2ue+;1KXw8gWkFL%}&4JSqstY}Qh9SOw
zSw1gTcCa?q{}&bdVK46h?<etn95S6e+TnwFZdfQu!ve2~MtD*23HXbybze?@*mJac
zOwb~^Tyr1hZ_}AwpbFonODJiS+0wpWEFV?~NCe+tA@ayr+LH(20X>_p*7=9&k{dnA
zQo>IRX6UBZ@IndHB#4XU6AVd)_BDNzWecnc!!@5TaLfF_>^~=Xf`|xA2vLMXXxvZc
z*qz1QdLk2fJ=imQ?Lek(lo;AgCifIwBh5<$V=<d8eqv#>#E@-PAMt0YDK1{t!j~*c
z3xx{l6j5w}vqS;4zXZA{)YyhP3ShROk;Z@tE2YSUsq|8)kWe*M5?`&ILeXdi6$+%$
zQK9%oYHH$UNbA(K5~Hj_wdDFL7Cui?II-4>g%c~Tys(u9=Y&cr$2mpe#7eAyoU|~_
zotGsY7j<k9S~HS~pFXLU0L#YZfj*)y%3}T<-Qn0p0siHrW!L@s?HlVsiv>DKKA${2
zO_nnhZ5&JV*#d}ba_o?+#01an`yltw^gH`*-vM~*<xZ>?&I*FF(7u11qf03p*iQ@e
z+IV?l!$qN9XPJ@xV@gK>MmEO=6X+aB^XWaB<sU?K;U2$a*8P-@#XD18Cx<5I#T*uF
zuN4~_STB~pa;#A+76k|kr12wbPgX0|gthC%cB<xBZw5VGOjoR0ESDNN5PTAKzgfMW
zKZQQNOqRdUuHKM@xqwyPp>HHcvPKV<8zMQB?vA=Z?&VK=Hgf<5jqjj^EQxwR<ptpL
z__ShY%f-tx>#rYVEen@qci@s#6E%%uzL75$KVr=6Wd7X-8QdJ-Sf7rD-7iOYE^~A<
zyc}MA86L6K0v115Y~^tbEQtb$`x2|_@?{PR!)!gpwHGuIdqp`cVI15=_Pg-%c?bc4
zVt8JH%k+w6Rxhi`oK~R!wdW76Zo>~v0O)e%Q-y@4G@(cAW<LE<K+YZ8Ffgl?Z8PQL
z_GE_sL-Xl`wd~~|faEVt!3Ah^lZORN!Z{%0D}*N{NM5O}Jz)B_=JRqoV`kAX#jntL
z8VKRs^l#jt{99~LLMrR#uMcWXQk)o#>~FwnIG^aiiq4tj!Ba*a(3J{hTqg2)btQhN
zxA?aHiJBQIyp!*wF#ec6aE&TV{T$-t0E{AkPET_bQ<ByN4h)5qkI>tqVoad5;v8z&
z8%9M$!(()d@uV#dL$GdIRwN88fF~SL=||rr&FQw%k*a#_Cj3-)8lK~|V9})Ob$B3m
zv(MjK-8isR1BoeXp(B^9iJudACPxIJ=Y$>_?Pq6!8b@AoL&D;e+Q(DGUZ*#Zhl#`S
zs2Llb-bejmoFkn)M`{H870;l4yQ@#Mz{b|tb5^N|Zf}ZQ%B_eqRNRw<)3cUL_c>Ar
z_co*3AaR@Ab3e|99NFQRcmmzXTQYdwl)veF3FOVlp%yOaUSRr)tyZ8}7@~}1l*o#K
zCRkgd8{1~P9jqFHHO|C?`I2!c6PY={S473Dz>dzG$wr*BBNs|Rm5t#P%Pp8FSFHkd
za?pxEr`^?OvZcB?DidxVlv>A|!$Clm4!3=NY2@h5<ZmVZR_1RN{sxn=Js7mlH+O@T
zxpKGQCec0=KZ*8h93?tDIj16E3WQ#eO^Dr2j`$82BY^n!l|Iqlls?hM^C)6E9r5j^
z007%V0YHADU0^f-+WQ@yByjj}qfsXhYz`_2DL^mt=~{TdlT{B7KGA*1qdil(#WU;;
z$UNtqzr+0LkRl>IC5~BE$YOl~k6+Ezb$95vTA-=tlMv<3Os7G`I1Q|n5dZn)dG)wh
zZ`=V~Q=;&97f)IS5j*&Rp1VbVb_{qHa4r!YhG%Yvu{o!|(vREiC)p5Zlka@b#0{Jn
z%l&gYf57dJFo`<7<{i|dkIT99_zQY0;~V8nCY0mbUac40i5d;s^Iz=SWIB6UMmEaz
z20du(%RNPENKaO;J09)|xMv@27ayWp5}HS#rS;>|2I|ZnT(@t>8IYpnqabpv%g21k
zPWPA_iu;#44tbtCAS8T`5AtWn3x62JU$``S7>T~4{nWr@EBxEpxFDC#B;HG)dD69p
zyiU-O+%$`7zojs48L@6-u(%A0B8<F&>27eTw=2?XOxCAND%d0U7wnayPugx(fmpu&
zD7wS~siy<s{_c=)%V@-~fVSry75M573VuQCg)W$MOsLHxxm|eGxR5UD8y5J*(SzJ>
z`V(G-d)ac-ZR#a_b$f|lu$MwlP1;Lr@+h!_+N_hLCi^<sa-+T6Gz)IDi!(D<uFO{>
z>TU+r&&x@eUtP5nH9k=l&8R*HT+F%LOE4(M-b@P0cI)Nfa+Cm=*5T8)Y1M!TE$lRS
zE04yE0vDGHTp*_RkIsT2D#}#@{`^ub7ygS5Bpp4*5qG1(v)y~#IRhc<6rLX7O(0o4
z$9O<#wMZJo0QcN{2dzr36lk0LQmGO~(8{fhF801s45omI8UhC&>1{v0x)$-BzApQa
zbz=5ql9E7E?f#Y36*j1BC<6^<U&-M5_cXbGL_^>+C=O(6&wBj-uhH-Xe|!C_VTMdQ
z(Y#b=S<ENnH0CV7OM5n*+)ZcG^)H^&0=7lsXxIL8a?cNz-G+xUJ8sF$`M0Se-L>)}
zjD|aQAN<MOJU2g3dK$WUf(PmfGgU)#;{n#a5IA5$_^dxr86O+Y<BQ<qHdQvfS-BAV
zKr<q@cDPSy4-hbqGZYj?C>+CYCzylT_uH|PtQMY@M4s)yQrd1a)UbHAOip-_afn^b
zNt5FT-+X-9V_sTRb~9{>%$nTpOJ%)@>Z~R@4G)jJKec>6@ypk+UM65>BAb{&f3JpJ
zwDpK=jf&lspY{fdUv6jN#?ZF>+UChb+ievPUPO~*lsnuzyk~LmYf7^M&<OA_EatHo
zuEhY)lSL`PN?b=#93Tg`>A8Dlk8}viewWbgcS64<p+$!J@O)JXi=h-X(L%#p%rKQA
zqNEcMPF=mI&>JUMoD?aAkZ~t@`+btXPx8MTp`8GIv^?0L9(lA}$~g40oCoyPB0tno
zDvojby&HilR6T-}4W=?cR2`oSJdT|&)=G;@6iI4A(X^aqA_BZb?A0e0tm4Iq6Uj9M
z6+)R~)Vm%p>!PVVBag)bjdA7)CyI=tYS2#S!Pb)&JczEiRkrB+l?RV`WA8j?aG|&|
zuX3KjD;9Ei>4E%Hb^g@EAEjhA>iDz{x+#$%+y}C0gYs$={P#FWdK#H9A!%Cjt)BOg
zG4`a8A1=zN%+DyrNhKX#dQLF$j~0%&%C+St`BKQ77Cgqh&TTy$5*C(_M1e6EX%`wr
z1<B4i$?(Jmt^e?`RPbL(A*pA+Q!hk4JSG8>;RzQWhrq{5!GF(&q+V&~)RUyqf=NO$
zv96_JsL>_Rp_=1vgYdCdaBubSlBE33P%7$~NV9rQ_(sC#M!|oDg`{8QFF`+-Sbn19
z*cUxr-0L~SdP$hkSr0#v38w}VR#`)3KR1iXY)<uR;Np5ySdvU5A%KbwfSJ(hWr&;A
z&F~Jxb2#UA-Z5yw6W7f^{^->bNT&-2bBhcX7G0DslD1NG2oRZHcHZ-Pjf=dpp(L=B
zXZ8p>QZJZ_AnsAO54Asc*`V1z$AlTzqi(xrVGs%c95*k!LnK!Vb5a`Dz{ib(p@_{^
zxq2Vbl`2R6gtrFX=zTWoHpi3;#E&eMpwM&CMAg%T<0(J%Lo9kBh9Fct5S%OG@xJ(Q
zm`V3@w~r3Q{icN(cTbL}+-VTWtA79LOK*7Q@!1l6{?Y>h(CSi%!bcgwUiKisxFdDo
zygkPny{af(3YF$LzCERV*wmmPCg?pa7Yvb~SA5J8<d59qOQ!!IWXcl01;~a6G7tpj
zHn}&@X*4v`8@6F^fbE;3QS+8rz5fAH^@h^@Gtlgb8|Tl5tJwnK0{g*EFf~BY(=6;^
zq&F$WoH*$OFk%ElJkEyDY=<XOcHoDS&qTFX_s&fo*a4m3rpG)g<M}>LpAIw#sB~`H
z2y4^@DkHCSkwrqO4=~fsm(kVmtbbeJxN&&F)CR5z1NvnIlN$iOo{ct@Kv4KI&lCY*
zEGHF1ABT`0hPqE_J8+y0vrvLk0;lmg%(npy9k6x>rXYxe)8<WoOrwZXB-Aeupyx0%
zdTktkIoW_6Muor#1SlQ^6c_<lZpoOc4}6>GO<c{#9Vv)ok1fEsi;}K50VI%7Mcj=g
z3sk%X)aksVxw2jka!H=Tf4FQwjDUzPQlY$P6hIY|%*(T3iq$K){-D(gw7q3)=D^mi
z8D?f?W=@BhnVHjJW@g3?Gcz-F@`jn2IUQ!^WOBYU_iC;*()=39Ww}b;r7EjrS>^q#
zg>sIrEfO=U$!8U+-*jV+RV24ZK^-Rjh9<J736;u?5hOMuTpw#pylU2f1nHQf?gV)@
zg>vAj`K#Ysd(ZP?GdgAVJN<lqlahM<@;+t7l&iak*xYb$#~bGo9PgpXmi%dK=?ysk
zRL0nUKOfU!bd9sHavRoz=L-PnGjop^v;KNZ?OLl_Y9qNuCq1Y-H!gJiV{eROK1m);
zxc&mo5l*HM&ILip$SlMgA;CBHM#cP2MN7T+>!Tq1j{Z%A@Kh<wj&3m++wjPxoUJ~j
ziT{f*XOEMuq)2IVULkQ$F7GHJR3#=WSE&l==|sKxoR36yXwW-^)^AKs*o)xUuuiZk
zyu44@K>94P=BZ;kQg+s>&C<R=<ClT+0;Qf@2mH^D?5n!{mb({ep2hB;q@um{oLNk*
zTD0Cfk5;8b0gt3D?y)!kh7<op4$ny3XD$LDaQ+#|AN@)P)Ckv*zYZ~~6lMh#fwb_9
z$w4@9u9|7#wtCP|0oH}rD|>Qn!G+(~%M)d~biqEKl0NrC+^71m^HE-tV%OcJ{XS~t
z8%DLi$0Pk*<X`+Lt2^h=(ap<nE~9?80F8EPP4=mF!fEIyB|6cK$HvH45(b*6$FS~+
zdxyYr0KX^oh1aa^nFU;(Oxk4H7qXM!ZVTX&B3wyXH{|@H%gaL_T1-IswYt<oW!rWY
z?U`U+Z1f88vWN+z78ttn-0rbtEFN>WB2Q@mbcl5dd>3Vt2qe3bdWbkB%{B|SzUH5Z
za9IU00`KyDJtoK(ypxla-&316##ZgKT@aD9T;Tj|{pnc#eg!Q^EVjFC@X)TpG98tc
zC|(xl?S;}N(!jqwp(MtqY4BmWTG4-nA5_J3|0IqoPy4cE`qt?`;cj#9qNV2FB-I+<
z=IOZ<wEH~gqcSOTD}K&2quVvJdU*T@_NSCde$$feX>$(jQ&yDmtp3`#GlZEyt55kQ
zWw^-^Sz-uyA62=MS(D_5Am$BF9*ihTXXAJPlUb?wrI(5$wW@rRiqVRt{U85vF`*6d
z$?AQwN%C|(pP(OuAa*@OJ9=wJksHdr=dHy7(`i67{YuZf2-sg!s4udQejS1)G#Tkl
zH)4>CUnmP^CHUsX%Gn2VtyIVvd{MiK<=jeo#WJE7gim*vUT^&$olQM^*@Io{B(N%~
zt}DQlVRSyn|4!f>#dPH7)K%vKOAOY2qxly*h%Svqp*au6rFONmRi=MiwVRo08NKap
zAGkatrJ+o3DG3oM>e-{I!_sv_jToN(y$ueD>}xzUjC{ffEP17&)4`h<P1O-~Qs|1U
zWxV=S&Nmr(D7kz9m*XvDEYWx@nR=g(%Qdy_0!{&ePm;$x<f^(Ok9Y9YET$xZj3wH1
zA^;^mi5@9g{4KAY5K=(l#=aXyK_)IcLgm7OTh*Y%!5ZJfD!f^ENQ}CYP<*FenQXdQ
z_89vtJG$2{7CfRJ&?q?V?s>QM&85}w_z$s5*wn8_)0pbyLIVVY=vkzzj!YRi1{x4m
zQPw4_(MfW5S)&z0gXyrMTF28m<)>^&ha+`GeTi&Fpesk%wP1=aL1si!q;+}4|7w+#
zUwEo+GZs}thAzWvHq<X240brNnUOt?h?-od_$ou=pONpQrdunAZk$G=R52W!HanNL
z8I##;C*~Yeu6?X#G}Z@UvW%xxw#Tu8qR&;nPhmMt74@WD2vARy&ZgJSrgDsi4z(%d
zqZl(yqEEL=JGrQnJ6Ci`BjMD;xpASclE^{JEKboZ_hcW#dgy=tWmQib$!r<T9H|BF
zz4Kx!VRV8V>~wd$nn<EpBxp!>F#q)KSv3Uf4ib@O`86usTs90B8z8%VeF94nF&CZE
z6vyjsJ;)G5cgEtdbD4x|!N{-hC93-!CL@Nz%h$x3IF~Nao6vt!m}NNEhohVz$-MRt
zI=d;ujX$3u0pt%!ckngDEgctl_G*26MAXM%00`SFCpSG@`h5!O*E<L(w$SJ2&=u9=
zz*!j}nKg$pJqC=J()(j^zTMO=kdbz%9GW87hcs^`3K{WSY%57z>5-M7J;d)mH2ID6
zEtnOkVkjJ0&U6!(_!iu{AJcsGD_?uE*%L0?D$v<%$Cy-HyuAuXYD=vO#78#@#9buW
zFbMLQKX+@Yf%`??TP@+BAbh)KEAD_C<H2$2R~vc(9gJt2SrG%p9gp3qeIe<Oi|`-J
zjT<;tIm8W(BBeIIs~zzzxqmL<@AxrEk5r4MJ&1gF9iuIF!<&#ut3{mc*dDxop(^&X
z_)Ig^xaT-b2>Uu}e@6RoQFTD)JuaEYedI|-B3t&I8%Qq%xLsK#B>K;h1YUYx5@`EC
z3`oqCA7JM?()K|D*$NZu#cyW5^pBcdH{`c$gbstN%n(-86kd2iiTOF-0SJ^M4eP8*
zhyCH(p3uB6lRd$Zgyd8rk4eJG+rF>=`}o4>_QCk-aQu9EGlfj_^Fprmxkd0H_~zxq
zJ8j22Xofsn2KUm;+3$kpzB$!<s@8{ezV0`$I5_(2KlH6#fKxhx9xdtQ_mkMW#)J5J
z5lOY_z5+n2^<ExR@^^9R?>PE*Qkpj!J9B7_J&|X0B*8|XWIpQoOm%T;Za1({*Pc;k
zaIiJUcyhAhQIfZ#{$U)#Ga0Tb&*Zg}wy0`VQj&PSg2MEV{LU=$x&G#Fy$7Fav-O|(
z65%&l?d-#;d;ru6+7)!z@wn|dX$pMCg2Pb%D}0Hi_A@$yfv5ysSyO){fPkG6TJ;y0
z*k#fqjlK9)#4j7a)!umK`AFuR6*ZJ+v7Bq;UPq+^G>;-9V2=WpzK&gpj@=_t^qFBE
zA^!D=vQsaZ!Qrlno?OE$u&he7?u2f1kuwf9*f3zmF|49;=iQu98I}Z84P(Swv@0EI
zpEhM4JM!ms;{@&o|4Zqs#*|5ebM|TNF<k{o1&n!Vorni>s*r0!4wtE@+X?A?r<CCx
znvN#5i!m(_&MG#3^)dWv6hn-2?Udd+nphPzr}#>+QOcVV=~Ky+*=U+TSo9rnap1lE
zw27#Qf#8)_F>=Q!VaevQ2)?;P;p=RlvKJ1mxyTTNCjynYVM{9&cc?gWJE`ZW-`NTa
z>YLrluRgZ_Zc;v=46=u@ho&qW;VBvO!ZGO;l8?BoeD7Y}=8QwtNHnTc_4O)r|N4CC
zMt(H;5a>~7H+{(KLGw0!)as6HHGW*_LhCku$m>ddG$itvy!rx^J&cFOa(*m+UJS`o
zS)uU<P&)L<A3CERh9lhnN*_6<OmA7S8dS;Q)o|Q2MlD$=zi9v>O}8@@)TF~JHG{*Z
zAERou3j}<;;5W(nW80F(Pib6t;R{P0{uYv^qA@A+=#e`h#gb%Hp|{K@Rlj1AYgI3t
zh1uEEJsQdE_NH!n({Bydp{a1yoboNRU0Kl>wEQVr-x;Ewq6_=HXuU`--P(NN<e;%<
zyWhgSBs`5xH8Fz#%yju#Sio`sw$1(6R6Gkk0W%=_mfQ+#+z*>coS7NDxHZkB_G}%Q
zFdD4%*{LiG4J(f^SAAqFtdrLtS6;2YD@~wX`R|v$^V@*AMDHR%Kk(&l*J$fM?&Ygr
z-6jVCplhKc;dC|ctAZH|^k<{=<K}cNZJK2(V9+uCHQ76^lUZ-@B9M{_3nu8-CgKmy
zyi-BsfQp0RZy*7-9#EW~g@ENwcY$U!Ka2k77!NDDyRo@61YWGDnSL$XBwpv?;cfG{
zCg|$L?}jv3Pnvk?sOv&*KR24|g*!d3vmOFY4^P_-=d^rX>)6@DKr`lgdvu2hT#6oA
zz&PcV{pn>Wur9YdW2nkzuVWOhRpQRx(-Zw>{dP|M(|!p0zj!iz&E#w+R!+Lu1E_x7
zkz+4ZXym!Wa=tG~*Fv9+S+UlwxuR1mY&<*2JvV;kl5{$a$B?lmwrvgS1zq-q;trhn
zxZ=GiTE$vWZRMgmfh>bjdm8em8V$-*s}eRJNM5<`OsT&pp8n*nKqeK5ehK67<hP$S
ztbXJ($7)`-{B`pTcb+K*Su<5#IX?RKe?Vnk+EWDzLBVS_g8P&j;pih-EgEde=8PxJ
zd(t{Z84v)=A7s?%9UGcK2o&SRLnxFBPGqE4JuC_M($4i*ck_w!EMHiv17OQMs24%d
z)u4t!U>$>B(AX<9kQt8j83Nm844y)DytT5UWVY(8j`Qrw^!57Lc6-iCGP&zDnJg_j
zwn?hCk5eiw?8+rp8)UHBj=_U=)Q$P*MSM5f)J^#%2|Qz;pnP_JHb#R|7wb%|wPtDj
z_vkEAf@gx%J(K8u{B7dq^A4QK8|3E8OCCRAJDa4)2adtgt%AH~I)h$UltK582LkDA
zDhhrb1<G~paXduI(3!I{ggOxwYwVSp%#MTmz<zt0Aa$mbZC#ONG)4dXQEZu_n3<Uv
z%3GjxS`%EjGv@l_&rvYDC2B{fpEEs1ZoF%!KrV{JQr7={Lri=M7~$D;x#gMYedet!
z9I22{&W<z{AT!G~Oj6ZbPnai~P2^KDM~KlJuo@C2|E6Jivs!(WJb5}V7CE0S9-y{(
zfn+SSgj&}@ne{&;mLw}cMSt@ym65CMT|QVS;V9%=5DQY75)nDhuuDI4srb<Z^m@HM
z$k0d6Jx$s2C_T3g*y}Zq(|p|G`XoHd*7q|xeyC<m3Sv93f4>t^aH{isy-q-Pb)jx5
zjAPi*TR`NYrJ?q|^*h<D%jsI*xi9uIAN9^veOeh$TF0lncZ1wUZjn&+9XyZc1=>$6
z=jOjji$nYXhnkLt^<?x4A?3wC3aJ%!pD%x690ZhYOcqskfZ(@uonKMCLAacPGwzIx
zp;pCx3q`6)q(u!@7EMgyuW$6;8zf-<F$ygBw%oovncLo$N?qD-ZLF@;8*ug8!=;cu
zgjV3&m^(qM$owdm+VU-8Euy&+c%~48r%Ybjwj%H>>6o12T5exY)K%`dq8XR^7WKq$
zS`W97JkE9j8ELYn25Yc+w5Sd(WN){+=3V)fSY;a)Tsr)!<U8}4)lr1_CC()T>OuCH
zy8Xs}#GE@+H>y@i1Faq4uvFmb=-hnhldnBt8no$33XP!A7Xy?V3y{xTo{6bChnBW;
zAp6d&E>TinGxdBI2cfv;^BtpTf6#U_plVYB#B;j6vuIPEsGyn1Xx@Yw31Y8?=yMAR
z`85B5M8$)MkWVNt^)u#MpY7IaU5%jYBC)qA6oI2`yU4v8rf`{%|Mxn>3F)M@#RhIO
z3D~4Ag?)Y|6zg0c!tW=C#)-&a4bQ6Q$hjc45-+`uCM^X*fhSE(KdPEv%!EuS<-mS4
z@TT`zbFW54?Z4W(zV~jH*uKx1Be;Gao^l=bCPE*q&)~hA+{jolukyWwO3~*O-UePj
zc*)NZ;o`?dnAu^Fsp~kT0~`_mK9x&xT5$&LJm1WtQ?Tz}>^v0Js3=3~-)0I-xH<4I
zaZBKracC56;E^QBPPOAr419nh*{ni?CX|J~W497J8ytw`9Ov1*0q3J~$EN-OgmGbZ
z+WsSC_;S5RO#AYB4VQt;@L%0D|1xsxNeA#wt!**hQVZD_urQOW(cpoGa{Zc8eG0@O
z1Tbu$fV91d`%rlZ8`zLRwcU}s0C%@XSQ}8;XkL*7HhK3`Frsj9nR=eQUpG)u-0jD(
zuT;?s*E&CF{*pM@tJ&+(<{gtc8^f-KGaa@2mqA;})6zkZg8*TZev7mLYKnTTzv&JN
zYHpB#nrVySmk_hCn7ed9A47;Tv+DEV)AF5gkmmBY6xEDMmT}e&r%6Dbp{fh1jXdnD
zbo$?ikMBV>>9y4KfK3Ow#$6)E7?*AI4b##MLesMQhj3adZ-UHUd5wi59VqR|WRAW7
z#E|izjF94z_9K^?P#otDED^MAy?@`^40rwH_}u2MTF#3lc0oYxry?u)NG=`ztXL?H
zGQ@3<^uPVnx*zrZz2YZ)nc_Q-OWf*5(hf`w*41jgjs^_WMiD8hPSb-sIyP<soPurw
z#tT9rn|W`6Gj{{<-cjNZnYMs{@A-sx>oU8(_*`~f3qT7b1J%PaM#gxr07~FdyMgM;
zGoPcHfr^rXqF{nNnreh7NWe$Y#k~eR4u(|)Aefj_<;kzSiM?{QbrCp!A|HzGuB=I5
zw?Uy0=mUCH#gWqM5Q%$<dA;MwR)xp{J7Oh+<^_Hw<|k7n1}G2O<4N~6MfZ%LP~~@x
zpcG5n+IJ~230?f5jN$+~45-qT;;l#XTur)_Q&vrs;Ag@Jju;-IwzTr_<PndU8(c!4
zI>L>WfDm1{cIic3@a3|}WV_$M!|IK!I*de;2~8cQ?d^y+S}CB7of6h~5F&nj&i|7G
z7Frw$?THAj^q1okUPL%C!~)L_E=(Yw_>BbekRBlmg6-Ecr;P*@zO;+vsoETI$@9sd
zl|cfB--bQNBuA~nNIom*AABZaAZbz8KJBB`j~oM!1Ux7Qu_~VN_;VQYSUic`UMB8)
z*ahdLiCeQs?L7T}O+(m;WX3SV&OjoN6MYh)e{x7Vh~SRE_pnUUof39X5m1+sR}md8
zSV;GExS+~k=R*O>h%6ArcLbn~3s`@m&1e~5NR_fBFZtw6868Qwqum%#$$h`~v4~F!
zf!SvNrXi(M+w&v+?B#Pmqv-qnv0C<Mz=+-L#);3$;pM~CTkh3gvG;#Ya^rI}{xS?#
z*@132_T#SBmvS2r?Mg&OMf^=r7F)iTCf3ZN3t_gERLw{Y%q2nXV{j5SQk*9hw^Jm*
zOMy>BnL3O~kc2R-Zc^a$+@#xG6LTYvNlk76(yJ>y^~%R=3HEH@;e*4OrcswM<QKvL
zlH7$#9jfLbxA(bo!;hbK^p2dqLF_7RspSi5DW%LpMI-fA5`#~sbSnPo>qutC{fn6S
zRJz+GK-l<>FNz7m7=(POEQo@nXjOQ2<2s0ZTLirYchiaaY`;deAnGw)aY%fzhEwda
zhBNGK8CM0lQuFP7%S&Zjx!m}WNEqF<A^7Z|nG;?^H5ba$1Y@V}(KB9aQ*6?BT_%uC
zDB$nz@K!jZ74nP))AJJigRuc~ObQrtjKy<nK!@N*jNlj4x37N$k+$<GZ_1wM*^lxY
zL+;6l;0+;z>i08C=<g`G)w8wkJbAN=aDeXKVQD}TDCIZQ8Au1y*lK^>mQvLY8I}jb
z<N)&8nd2>sH_^^chpj+<@LkRn!G*oz;U1qnGlzTwnBExYWPHG)&)=}Q^sf;&yN|=^
zkbT_wEN{xj7+-<0H|sq4Z&Orc(QtF=S)m6ZoxOnOnvK4I_!UogrW@Ko0boXgfFRl&
zP#&rB0-D57RvH!yQ81#0+uGpYstx&3^=vGEVtYg(EVyqown$P!87cfc)poOw|EQsJ
z(=^qnG^E$lKi0^Ujs`9`3A7%LtcYsn04p5+vOe7OpmlVdcqJc=#WU|4J^y3p$u8>|
z)wz!hj2mCwha1LYDU$3FStY}v*N_4jmLjtk0=y69O!qe?!Ae3T!j0d=!3r+eGrjxO
z5GrMZ4CcuosiVAC$vo%ZtQzUr5BOLUY}E2~_?w=eCeh>Sks_gicre8@`AeS<W|P%@
z3JTC){tqbxc1ipnQV8GikEVFXg!b+-P^P};73YPv_~tQF)-KmHHD$J%u){%9NfL+r
zg2_%8gdV0~M!+}ui$T~o`inv6H~WhLBZL+bN3>R=RzIa0jIbrsE(Q5N2Sq6PVyOzM
zZ>c((My8e*1G|&Grg7MilPt$&2SocLJj+Aq+1P<1Um}kf_o(oS{$N>O%J-4~*5f6l
zco<$N?eTy}E=7b)eSjjG5r12(hqJI<ieP8n7QjutD2ADJg%?muz%&;IPY@tm62g!i
zfC?7-Qi&|^Cl^=}j?c#Ygbe?L9Pq)(k;~^p>pn$DzAGYO6aJ@D?98VEIJx(qUVNxT
zSbrk0uZ=9u5<sS83#l{hqb1%2Z?LWj;{(`O2Rzbw_TmGWSf7W=fVEmVd50lc7!E+O
z({8UuhQ?gcN-{1H6UsBJuTT<h3yAiY)TZz=cA_2oZR#Ro5W$h_p1Qvauz!Ygft8@F
zJvF(>^}na2LOo?4`@e>C@PALt!(D{br#M<H0Ylms%kPQ{u7_)VVXRKD@LJ$?>(((A
zGcv1aD}y6NGX?<!wovSw$f7}pZal%KffiPYE0OUjDs%wzy=;h~p<L`PR9k?(%{ISs
zs3EjNL;3+R5ashnoi`zha<YwCn}fu&e>3*5`KRh8ANh4fWt5J~G_*{gVnruDHk5KZ
z+<Za^M*n870GmglKX18zV$$s{r%TjeAh#Jk_~l|4ICMf(zuH7`RWkI&c1vV%A@L4(
zH>BV2(?swu`qN@xsCzZ~tHKc0xi0yH5QO}xG;-v2Q2t?tNZ~>ZHU1znN5$Ab(}Shd
z!}^jHTjK&Mn$LQF>}B7U?Y2=Kb>v2_j)K6&9ID~+>CquDs3`mzEHn`#6qk^|K0=ra
zC)8-N+w`3w$O<hCnfnI8!GaJ7;Q+A5#2wIph2Q)0a180P4^4LN3;luqk7Dd@q9oU@
zfATcU(_7#R^eg5$Y~TcF*w_Wgg!v~{u^Q8R@IUPrp85}nhW}G8?C5ZyS74{qJs_!?
zpX2*I45-R6VV%>u^7Uz-LjF^wQjY@EoA`gT7WRB&&+UB2oLu{hKe)4#!kG++^>5-M
zT!Qu*b#dZ}OJQE*?9TE7;SGBa5ceq?T_J=&0q@oyd(H_2{I~d@d+zbfRG!Q<Re$E0
zYrPB1G(P#~{=WR?o!}0*r#lClZmI_THPeW&`~OqJ`nP9l%lAE<_4l#X^4I^%F#eAl
zfV0iDARTgl<M(=|x&r+~{C_v({^;7hW&bXkxCu2QC8Q{7o6-=6+I(5K?4|}Grw1YM
z-pysQxJC{B+P0kCA&382+C)#U-u?Li1-LiwG9mvowx5+9h&>i@v_Z04s=Uny{`Ouf
z=pT*-^-nhLH;8i@R<jA@9z65Obq>TQ{%mg<{nCTy4^fk7VC%gQqf9(@(>j&bJY}sQ
z@4oou;eYx3y!rig^=1%<=YJ?Q8n;e(UHAEw`@XZI_ideUU@u;u_u4uY&(5*jcJ1rm
z1Gv7fehzH;Wp^`$-zpGC{EgeXv*Y~zZoloHN1+;DkDJj@IV)c+QQGhG@MMwvU;#&(
z#C?Sj|MYcxLI}n8##1K2l_~HrhVLFO;r;FLfEYsX`Qt%~F-2&`o0VxFBhEv!^-sQh
zP5{p8!Cfv^oO_#TCjuwdrOW6l8Rv5#U)+CXk!eycePENC;L=p(iT=Ocb{2*!Slf%P
zGNM{t+S?4XnOvrK)|Zkmt+|H{dt6&QAO_=n>ZugrP2s*vLlbUqT_MMQ87UU^SX(U?
zdYVYQwplk+8(qa>d<N%>`HwVP6dy$6lZVIe{VenUz6N+oLyheEv5pw#um~@&P%NBL
zmzmrnhqIIquxJAHr@hLs-;8{=@nL@5ys@0cc48}?y9mm{F{U0GiuKv;oDDBkK%T?#
z%$B~eoW<FTiwuV5=jq2nOfkFU3C(1w5dnApWPY^`0P%2KX`4jr<*=L#E>Z1#uH))R
zta@4v)p$=DFvKjF)e!;*Yy28_%>IftaRci?N{QoIzK3jm8D1CMvJ14Jd?Vr4;;cb_
z=;XX;e9HHPHpSY}cXz({+0;!j$<e>38-ZYgVGA1&x~KICQ20dnz;>;<THX2k%uc`_
zu5PgJWHxhoXlh%x8b5XczTHiqYQtg<>h>HBzqVis$o?A31gEpE!IYtD@N53PcbeTT
zmpU=VC@ARXpI%LA)1u$KS!a;Sp|k$;{i3&y2nCvN9C^6^n=faB>&f}ApXq<#zt?p?
z!>38V#^ZBzoi*!LeZhtw1h4^Q1aqvnl-A8<SuB;mtY`TW8MHZmxQJA#`t|PEg(HW~
ze=N1!V0Mb#8zO&V4E1xi&3~0u-MD7Wl-%m*{$j%Eul|N}YkHH{*J8I$H&A}pTsbF<
z@e2dRvw!{cPY$LxLa~9L!DK<~o++Q+Bqi%|-X?uOW%MDVRuL!gx{H3dZ}9w<Qd*8e
zs=(#iB(MeJWto9uKucQmckiEW-nEHSz5uJ~ZbqQIkur3i;BMZguQ#FPMn9m`<D{~(
zF(-~3e+5N3RXViQN3aOPy30eD?lEY8_Vz3H9<Po=|G%V$?NtTYX)9Ao&a?RL5_So2
z=s{~4MnfQ1j<8~>CEO2Ldml$IsSZ3vRZUr=9OXd3lc0mOz=E%xkXPj!7m)3~A?;%Q
z=%c7%$PW9+$yi6^`V0;9SczjOum3{t-O<Kg!Ce2nwR<1gM%+qGQwmJ`DAoJBE?d9P
zQ3w%ja6x8o1l%<c6FO4|p^&Q%yEp`%9=R2b9eWn)v#|;@;FGel?VUHIk$i$XQUCA-
ze+oPKG@Z5edYd8lnMW9QcS5^o%=L&SJTcWvzLqe3n(P8q50C{ren`mGo%5^Fp`v$6
zS>Wu(GRAxFY``4A3Y0=w(;r+K-nV8!1=7VHq0zZJbGyn=L_z{kuw?CRC1NdD_q24g
zPba~9BDT=&ucdHh#vy5hv;zDlQWtpZP!F^9&5&<{h8Tdd4S75bN)-JJEIEsq`0lk0
zNu)dkN`faR2&CPPgAN4NZ4_!#z;%K4C#8ITDuGqdAaVJW=dzcH7@~*(U63I~u;e8%
z8pFUsus#ZWR8NXW!^8g*oJN#MMmMK0cvxilpa+rv<pmB6GoQ3DZ7-+f$vf1q%>j6I
z{(6!4_8t3^AvDaq<gto>+64$>@GdVxj>X5V|9R&Vbvs<4j;UB2YTwa<y3H^}mp<3V
zee1`<P~gewmbmMJwB2aWMwbft4O4PqFS0^VAf!$r1cI2v|F_zwYksfK<*h!cfjMtL
zQxtbOFpA;7nUDbd2&J2dgT%fCmz6RL@z1j_uxfE*Q>duTScxWAs^cQQ5utjh@gO+p
zz#rIBQn_3I{{dUlc9csm$o;UE;bZ??M5cw6A@!D>%L+pC*tv;l0))w(&1qfxNV|ea
z!JAqOGp9b3q*x0YC`WN;txu394yKQ=FlLw*lB1Nt!o#9Cm5$`#fRcr@-R{Eh00q@n
z#fAGwX_kCWS%ak#y6@e=$?CJ!1qImfJW|ewF}+L6&c%hh7*o9c;5alW;QUk+#-b@?
z3(~;E1mxRkuz2moRFO1<C{zA(fqUWUg-m#A;R+Sd;0Y)WO6+lJ)?Mftzt<h)7x!^0
zqr>W(>e4YDN8LzP(xYLSoB3~Oeq2PA)xP3{Q_ionrl>fQn^Y}5ZtSle&j~sKgv~Ro
zx{ww}FWxd|&uMA;I`GtA^gi~dZO?J|WjlcawzD3Ai0dCbAD8+%T;l3k3rFp1D+7CG
zX?^CFR2Bm3&;B=4-}Cx_Nx^*)#M|@F{Znigm4?S5zNa%^%<fM;k8ya9i&Ct1elwlx
zHJtl?DgaEDy$v`eY519A5?tjD(tdG%MZkSP6W6hA10}@p(lsL23aG*P^UzmC`|VYl
zV)2wc?SN7V<0YH~c8>)mq3mkR30fW7(S|ww(sZJ2of^-&-=JoUIAAoEt2{EzOS>&0
zh$`j=Fa%9=vcB$<wP^z!%;pWsrXK-dS;I#1U%mQ%wzx1-=+IE_Q+L5K#8qlp3;wj_
z&Vv9ls)0jFWGtkYsbzYArP+GGEuiz4?P{XToO8p<NTl`FeRlS@AoPT3Gu>0KcbG6}
zuK~LB#{yb%<J`4HdCmX$ezn;t+Dm1G=0aL-Cz#aFHDs+fLv79FU*p<DXdb!v4L+an
z1P%i85wjm4XV_wc36kHVpx^Bbd!OO2bG@}fNV&dta4~0dIsFfHL>{ZfoEENbSagEo
zvLL69KGd(DlOhc-Z(<@}y&oR!uWvM><?E??LB3v4E-1;Na@(=%o6FH6DTJ$)i0@Z4
zXy^cQbnX!!Si~xU=hI4Ld5{?R0ks-;_rikmR2%LBlDr7d%7?=k=i8iUOlc9QFk>HI
zBC+6+(_Flb1rJsMV$~C!?rD61mMdx|7vC0UMWvVWlCc!}Y6w>~PWn(o_4<6H6=~|l
zBm5XcX@}uD2DT{!J4aK93P1HHfPU%m!QN)HG7MRUja%Fgd?{Q0B3Uz{B}_A{fw`%4
zS0z?5_P|8wUiqOuLfLF{lq{{AnGB;?dhlx_LGZ65do)Z}b^-lqnIwn_>U?uz#NFYg
z62F;W7d`t#!eTX9v6<7oA;@waIZnV?80aoTVR+y4(kO5_ATtLyvRuobo&9-y=uwh{
zgX4IAmmBFtej{wv76Ds3POq?F=&-$pm%`bCOfr}~kS@_Ub3jNQCE(14e7)O*ujyn*
z48Jox&B;GJRm8hPmu?N^(sh&N<(FP$jcH$J+j*RL!4$5taNC|pXXpaCR2VO|8d)Oc
zmNl)<hYb1c<dXaNlYvn0i(5Y|<i?On;n4~lXM^&sO*!_<E7m0%Hk+G}xA`oA_KNV-
ztp#RN$Q(l{G8uIdz#(Fo&n_j09%IHdGiS}(JH;evZIXFYK9$36#{9{&#VkH)jgzCJ
z$G;EtF^F+`xEc+=p7h<bLAS}i-Z$?y@IVA?)v*ZY0na5ls}`)l_4WSZFeMr%5$bZ9
zII_kS>f-Pp)iH1IhY({82{9KCUU}4OL^<S9i*vu7hlKK$YV2>uU#~R8zqBdEPa{Uh
z<Osq?mo$RUvmD!k5x_Y=na{PVcUtH;`0WwaJevqzM0tNtxxJe$;<=Oe7UA)5%}e%N
zt(@Csi6DX$E+=beAr{ojl-!~h+tZK8UZ70~-kXb;1k|Q*%^Y7KV^WCHr$NmRfIJ)=
zmCMMdcj3dr3LSiA^&$DPdy|LukQ20)BbXLH-aVMD5<J(2RquoeiPRM!M|^%s2H_l>
zLS*%$y?5-6Z254=5njCF$bDT?5<)BRKTtx_q_xeNnY}164#1xjMNfUspln$AQjC6$
zGR`5PceLx?ppP+LhmO+2keUio0SQ2W)71$^2hRu$XzVpZH`WnVdz`6AQ)ReF_sZUT
zlfV+1^5!+4s8X;gsXxVJMF8SR0@qBwccNLHyAZnnEqXI-&mHgiIsu0MC4LpZkv4ET
zqdev$vC}_qaPW#fQmPPB&W?K0^AQew#X{qy$pQ`%k&o3vcC+F%Afgb+5_Wv2%d(sk
z6lwoLVh3J^cQo`U!Q@rTeMYNx3y#|YhD9ht8Re;$F^|=n70whfPI>QX;FLYG96|6q
z&mkl|AGKA|EJX&aW6KX1I3FVpAz-T1@u-Qi(MOJ{aAP#~yt4$>J`PV2J}D_^pLjDE
z#^`@@WeCvy7vZFXQfM-O+B5pA%~<Bn*iNyvcAm!(vF6jDSYRUQ;csB74;ZyDLFPa^
zv%O=(>K*9|E&Cip%&N&|_4+TxDNj!SCpP;5wGOZN#qsH})8TnG(vT(V3zG9pGKw(i
z5UsBorWOk;KcbwIx6m@+u2r|fQ&mPGoyjTio7V6v5?$a$qEA2pfD%x|=?B&rByBcx
zZJRAivJA`rsEFJtczI!W%i@0{OB8H|XtM-Ui*7vrPGw!t%<f#G%dRm%h{MIA!VB6)
zPp~?3Y6H<LC@cm{brAlyu}~|rQd4P}d2@JuwIU7u!2)3DF5`U%2^#mj*zrE3?k}5{
zHy7IXACPnZja8LTeoFF(<K$HsGTz}KkW9n5)8{1^{T-9LFRe<45ic~Iw+lJ)<UovX
z{kj1AtrCT9QAAvUqU#(?N5ooQ-i5W{V}x#(h%H?9KOE=C5Fb6D$FnOjnc>e5$9X>X
zI*k1AKOE;vrb(OfY|q5b)L~tJ6SP#8rp=rK5UEdSLw78mJ`MKqUgoc$)ius9pJt;g
zdhA8($E*wF<tIq*spo{Gj(S{?0<Nsu|L#zF7_{yf+n-?5ey%f&Fg1%*a|cTE8uLsw
zO6lW3aXC_T^e$oIBh*#NYy}+~6Vf2t2LA^5^>{;wzHDCXxG;ZgJm%qV!)gDh8cU)E
z4;A26qq5})OR-e8+HI9P*+r?X(pkO@PBQZ=4dR{OaUIE0^Qr~<qqcgrUOVc2082A_
zv4Xh#*rG2ry@Ir-&-gk1_UZ^jfdXFXn?>s<bYTCNH238UL;7M1t<9P0cBIi`Nesq}
zeW;6CgQD)lEf5639LzO?81(TU^wYW;CWiL_S#15zA}|P}^!|V7PaYkS|Div*`F+#y
z>?yd6!Gt-XGHrq{hZa!(I3g-bHTyu~yb}6#E&;iqU7v$7Tjaf{H&)4XXNoNrLTpe{
zmlU8WZh!uw_L=3@2KP9N&tIp6iD?jJzBS53Dl$%x_YG1%Sb98OaqLeQ7Kr3TvX?J6
zg$<a5otNtY86;?jc6<Z}gFO-j?)xm^<UbaELXGrF+^tu`bI&3+ia!r-!W$)Ri{K^A
zT=uy3@Rn->7+O#L(4kG0?Uzd&7e90;L3!X(6zDy@I-y^*@7luk6YtEoy0sfXsk?4l
zV6i7>GccJCnRaRn)JF$5)V{~|A?Q=NL7QQLr|aW7Op7+F<p0D&#XDYbX}?;7S0=3B
z6H@><^7YRC2`jfftKZx7Wlx+QCisT=4Sm?>N$<uvRZmm-Jb0Xz6DOX+Vs%HXx=sHJ
zbVQF@R_AzS&X|_`E5AHRDE8g1FW}G1_4;~mZe&+yzR=lk{Q7Ci;TbmcHu!b#*{Zqj
z3Fg`Viyxoz>%atvFFt<a@#G9{QK*bm71Imq4YKq92|0pnT_D`}q3mGqpuDm(&bo1H
zaMt)?W}dZrW8_RH=o|leQ*oeeqbOeh@G^Rs5oqrAC@5_a(MKlZR$NwN?!1=5s@(aY
zEh$7V+1;b5mqX*|thMwS+kAIW*j&B_M>DMyF2_e7j;{#JeM0jYbKalEgG5C&z{AX1
z{oas$ZHNDh{oux*Q=ba>7|31gVFSlxlKsKb;HL5Z+@j-f%t*#2h};Bdk!H?JoW^7@
zoetFz=}PW-YIXr-ec*;p_k_+uH|+ffOuItD`8WCn0fV2p*)Nim0OMVu$bh9qRa-ll
z@-fu*sKBcxMv?2&mf-3qOW0nBN8yLy=maFLBv3}lh4W2T0gH!~d-ZzKMNcy<st~p)
z7N3e0<utiFps;jd7DZ)|jd9tsZg6^C<BfzdFTs4RS^dhxR#CHHrnB>xBs%ud>Da~>
zuf9N*{qo-Ky)pgZI?RJ}$Q%0pn2l%qey%v`7WxAu#)AZk^<iOHAqZgxvEZiAA};HT
zj#4lrNxVS4Ms-S>(tuy8nk%Vv6p|~vQjhdp22rF`>7-lfZI@7mW%F;35&^FisylGO
z0_aGL!qx=yoJl6~s%@@9?9^pmy`J70xac%&pH%6vs{&mt5|)3@Ve8r0eG|NO_-HLg
zz>Q&@k#F$nl;N&XR@Q;HetInw9IoCT0>LtWdfojHP8%S44N)g>Y~1a6R8%}3k;09!
z9oDF;)A)S%{g&h7fO-PS^;YypJ;*16YX|<1RRMj}Br`%+rVzzw*gh#Qk~MI|WA8JB
z?=h0!vn>TaUR1&_tlM8$fgDr4&(d@VOGJ7>L@~-_iDtZshmt@gNI;Ii$a)4y90A$W
zD_?<PSEFG^I($`6a*aO}bFU8kj9c{YI79ZNDCi1a<uxqFx}~Dc$Xj-ql(SrEXnju~
zM~KpRi8U$}ZN-<*A<PU4_a>)Mw^@Yw86XAxsA@>lMoNk5g$|siB2-^$RB6{tgF1Us
znUwm)6DuDKtx8$?IAiT<qUt9#*=RMo-@cam5~-C&6?+zj;;EG?gf&@0rWFt^5jIF_
zFs2)0N_7i(Y(3%_tYAYB<_SZxT6BMv3VX<ho$keSrUUsgjSD~}dhi1K$ZmnH&dv_y
zud>={N0n+d#<7o9bel3``5W)(xKGK1im7JDK~j|1i{YJI9ayt0ahMwm8poL|A&zPr
z)^7-13gVJtGy@UM;v$gM_At2RKug6CGT;}Hz8c8=tU3f*PAP3I6h;yiwKJS*1M`Nr
z`dq{Og{k0QTum-BO~<t2Zau+@d&&BSA<T~2kQ*cilZ*=%3uKvp`%FaFz4ksE7eLv=
z_=fhpn8-peQ0JaxSsOY3Mwr>K%AZRghTGO;E^Xl8P(LI;2kCmQdNH#pF~{II0<W@m
z#^obT4S%BPs(+@x_E%V}0Xv|;p?l-jd~mN%@8X*ia%^Ys>c&<X9Nw*5;JI|=xu|~b
z;A!r9=>_960tcl*t-=CF708rNB8Y)yw+YH+j?-CFsXrWo_JjY}3+2_<KLmN4&*yCF
z?c)OHvFo!cNaFO!=!)dKRCPMh_?LY77Vs8*1o~THtFbgdXSt8>KIIV;zMl`#HO9T-
z$|a^4o)8<)#XwYU@dXMY!6jr^=86d{Vv=m4GHgZ#3#qdGU?j$HYq!2d^I>;<n*!$r
zX`WwxHuwZgh^Yt!RB3$7eh3NgEs-(&Ra9MxB`i%*tw{GNt`*FHfunNz*(%+rA3mEF
zx0j*U;EhgD!^8W-F#v*(JryqsQj7u!IJ-;qG1$TBURtb;)^jaV#ub%)M$r>eUvenu
z)L|x)B$}Z}{pO9q&F4Ztzn2S-WA{ST@N`sq9yumEp#&Dw;Vb(`dj4gX*60qUq_R~s
zHjQXsIMV8-BZ0Ldmc?JcDkLi(<S?vOP(%=EW&mEboIkG0sNN*CkATpmr>Q%LTnQ-*
ze%DkQN<pc5{Ks<XNB3BuhDTvSNiarNt@$@ma!laCC~Rd^R@bjFH8R#z1U4D)`gnE(
zqFG%zHi;rtr5IB5QHKJ$tWj~rs$uJYdWB#RB}WA!Sb5z1t_iS2DwqH{Q9MvKt$#u)
z13(o1xtx``Ah$X#h9vbrzA7DP`^N_lUkO_ZM;10+1NV@tarA?qh?{axecgHdfx__Q
zCo$7<H$yER{7sSuL~;Y)&16S0M~>v0Yk6q8WR&IaF?s1y-Y=(#vZcJK_ZttngoGe}
zH|uFGDr>D`v-p7ADw-T&M6xvY^GoF+pc#>rZc$9ajT}0{?K)0q4UI}UHotV=4#wzZ
z=fk}7d#_FBib`4P>4WvKZN}+jl)|p%>;J5(>Y4H7feiN=mZ;0m;?cICs9PO71c$M|
z=*T+KhQG7Z*;Pap1uoc!us^sEhCm_m&b^!d8!wO*YbL$;EilK>kk&bACH<j4eTSA&
zP+k-#u#~-NLL<!+%>HZQ#u&mSjX(WPM9Pn90x_jJ&WgyeTvC1<s5tjsNvN;hKEA#e
z&wLHJX>`EN&qeh9b#6G|xPvJX?{y8Cz7ys;F95#%0t|8p+bG7JRQ7K`27y<B5zeBu
z$&YP0OC)eMIUF(hvkAC{y&}c#x%sP7=WSD_g(KMlq`BD(Sq<y*iY#B11mQ%+tW6^=
zw<-n6xfzgTp{LpojF#{t1Ri2cHeFW(SGhzGTyH^&dU|lbdO>oaB!A44tx{v=e17ut
z)gvFzcj7yS5j|BX%zYaa>5#d_+HU8A@nlx3k-yQLL!`e7z!I4H9Bfke&;seh@t@9|
z$*Q8DgDoq0_Z-dHb~6!8Bk_Pee6l#jTL%t&umD0=WTorZ)N?6#F%8`gm>K+64Kn<w
zhV&=J?$Av%-nd3R9-+t>iPI>JWj&Nr&7Ki5m+8`Koa)P0#|i$La*WX*4Coam%{y!|
z>$nys2w`flf?=_+gu!1@GAPrZ^iy673hf$`&$}0l6imq!>5qq+VNRM-%B}~I*EdE=
zQMJY*Oc`jvL&|4k*W&>z^6#cWBF6Xv*Qil&(6fi=1DnZn*_Jxh6ozazl~;rr!w@Xt
zh-I{-zy4Hi;K#E?^p|K#6H&BNxGNWLh{?Taa7!i0K&pT_U6X+APVg$x7y3=)sruj|
zZT>kZG87Nz-}+-lVB*wjG(8I89&Q2jVzg5?r$gHXd*c}!`N6+8h>^ecH0pGZ%@GqI
ziZJ(VzWh{TZ(`LcB-h}NPJ{gl2UcIGhpEw2vNSp>cwPWs<*p(3Cje<#BhGYsZt*nO
zSb@Netn&PptS;1pSLW7(q*!BrZT3LQ57mhG)en!*cwtHwVq+S2?O^;mqCPm51mMUO
zB@_YHj(EmJRt~0Bf|o|K+OseMAjlUnJlp|ebM)Ej7v0;y$5(qXqZokogcP^g{KLPt
z3ddkc2RE#g9@`}jk(X0|Gwz_ga<~%R#G8Jd8s2<Gs3Z<OnHwcf=utI?Bmm^PTetL{
z$1s&r_wcNK-NGDz60Fl2sWd?4H?`<=sGAXkEq=RvUz1v(;ANiRxJCcDG`d(gzCG`3
zW>g2)ym%XLHcXXt8qzHmR6lIM6>&@Nk7;0EgbQJ8M$_mIzpYagr7rNYoQxlFLWXAT
zX}S2ao@#KaLx~)tQ)cwlavJKmAq`P<IdIT=@H#$~>%{^3)EaRyy8nU7gziIhb`U;w
zDUA>JNM_Fg*cw$abG{~au@<E)rS%f2V(R~x@7hkijIf)<eh7PKA#(kGFMaNS23-ha
z54f*N7&uTJtNqa7IQG;W^U8(*>!gWd2a9RFpLIpK;ZSp0)lOb0a61>f<Oo==!D-n@
ztxOlZRF7V-L2S{%Y|-r02|<Zt-4CJNJ;6`pB$?P&)^Z~4<Mh?aR?|!tVOW5C(FocK
z?nY5{bOSw%0DNl1Bc|;=+B<zz;N$|lVlpTSb94>{TP)f8OQBX0&BU*(&gM#^GDqf#
z={>@jlF}nL%cZB|<kB_H(DNMoJoT*y6^qr?RYRUpIRB=%oYXGrK{gvt>SSwRxpaS7
zA@NJj0=#UUiMTbzV-)m?@aAI2k0PjMz3?M`QNYTEI{Dr^QZIeh)3AScRjQ$AN))*9
zi4NJqT~!H=5=dJuO==*@RuGuaGI~zg_hx3n$@P2UB2u|+mnH)doa(Fx!9n#_4oMA0
z(H_iQ-~r5b_4fy_;ofOrR9j(2FF$EZqx{5V`8HbFZQD4$We95a)%rWGG!!ZuV1k~B
zX8nzox0X>FN?OF>e8HednXi{ofN5TRk8zDy#H5OBBU{E(fm2#xB(YWgSTq-vtX-<J
z|D^|IX@#L`j+JD@Xe+F?gH+@vtKh^S%WcmK>+FjPP`XcAcxdV2OL4cpv*95jTPE&B
z*F)b@yjCP#y)t$29Pl7N4<}kme4+ZKi_x)ml)4gN9;k$5UTtQhc5jLcE9j_08|hHU
zVg<cTx<7%kw1YGq$w@AL5(+G8+V7H6PhGwLmx~+OkqcRgJr^0}mXZXE)?nYB_g}CY
zNnv0%r-584^fxmv&~J3?T9xJDcqU4RvnpI}H9Y2%U*uJdVDLT#Di)`_d)XCOuS`-T
z5d*8VyUnd&WdQPDIrM$E;;M(QQ9a1nDumg?!w%CvaUwtB5ZP5H17emWihG9iQaQ@$
z(#(nsXwA$jA|0xV6f+u&Xo_#hE@LuYkPa#!I44q|Bok<MwPw8e1-aZn`E=Vdw*j>e
zV2M}_EE?l%0rDVsX$XqjB{Q85Yo!lk;d>OQg2th#0eAiGEZ9Una5SFG<5`wKuS2u6
zKqZNy#<Ju`KHA}63DE%oCX%lwy%k=2|2ndv$B6%MJ_2PYBF5oGbsfy?%tjCs;$g82
zK$g*M>;1wiEk_Z#bZ35G2tyEtOA+A(6kyTV%D`nzS92pVo4)M*S`QCVK{lq=hg0)$
z2->ekLo)YbR*fE{gp;5Q<GwaN(SyUE)R!He12{3J?u_fll!V=-dNYT!U13!IUDKCU
z9&rc}acu&mpF|abrI2=Fn2Y><EZK={Al~9o(C_Q<>rh)u+^n>bvQuj%YopOg(4njE
z+g7#1t_yHJ+CWxqBWo)bD!7H|ATLj1>=_RuvrIx<pz7FkqEeAAg|a$S4`7H>){voW
z3o54Ox|@*vxmZlNHj~Cxjt~S<>_(#oLf2lfJ=c5dlF6%)&I4jBTkS&k4Y>PxaITaB
z;^LW?|4NMGNr5^_(;l+gETod%+32p1DCr`k<zK^gBIrwdI;mMhb(z`URK~*NJ7sEK
zAE_Yam8iqu%$=8!vN#Fx%2#gs$C7@0xI8;;d!h^6nVbE|CB@Y>lE!kefCrUDl%(>M
z&tm^&Q3(k@{j(OPXqQ%`zdWiztJhot%NOS+my{-4sFMp!rY=(r;=Z0g)_cPXK^gp@
z<%K0sp;Qlw0U~OxKrw|__8^s8*%pSLPn0YI!zS3dGD3l|Ov!ipr%xwJ6pJEi9V5G(
zO{G$Xq@gj{dyz)MBgEwriinsCxcDvd{BmQcy^{pIY|&aRON7|JyZrgdaX#e~c!^o?
zX;kh@(dZ_KD&l_z6on8j5xmXxV|=W@Y=K9h9B_Te#QB2b<#X=qgFF7Z*8#8$YNLz9
z^5K9EN#?<KX!2id?hxq4$<U`_4soT;-5xKiU<tx{gTwIX2%}I7mMs7l+Z2W8I_Hou
zNg9}S&xeJxmLII*0sGRk%$7~o?2nOz30i46NwnP+k#0p6R?RUz=$C4AteR|e%<9<X
z;yBBQ#=AZ;TVuCFMh-|zO8DT~AGaZ~Qhiv=uQKgl+#R!Z(WUrHF&}P75}aG+yi}Z1
zG>p!hkcuMCAu;^&>&FC(rG8hBYaNd6h6y;6L_-m6s{1e7`&p5e)7i30)3ioz#-TM{
zU#p}gsqxpGFQ1XJ0fKiPLwHYGyior!Jh~&ZS~!gagY*1xWds2&nropFr(d3^9V`~w
zBtD3Tb@UGRkiAJ%v?yTYty^s|b`-2Hg4=9ocdFPvXkE~3?je*VOwj%pJV|dAjr%Q4
zxRA38wDsN6F~b)Cn^X^OGC06<s>NJM4on7|wnMo#m!2w9e0jpK4f`j{b(>bO1td77
zc*MVQY!c?X<RYz4B_oYz$m67h>A{Q5B>}4IN(z`D)A^${S4eaG25wx-mv4$qqZ;<i
z>WXaSyHYxeY(5IxAPT7#<}zSb5bwm&Y|-|K_2Ym^!hlZpC?sTe$F52D<gBUH0V$3n
z0UaGbha?MBZ;Od`E8oS_){Kf(%#3Q0?=xtFiE|20&1(iOg@qMO4+_H*Go6x0m5MxB
z@-vzL9<`FofI5`j?=SjTHSV^lVyuq8GD+weZ6iO|S`LIk?S_X6fp6ocf{B$hY*>^7
z-YzK`2bC+FZ^xrgWN5z;7>W$_g81pZvq^>A0u?X}U{B-B9W4{ES0j=|{9P>5o4l{j
zp73$PnDu_bCl>$gN<{6|?sK<wh=I<wwQ2<svA*26yJ!mJ+wpd0F$6^SVwlOCwyL(n
zpRBR_SZe7|3EaxTL6{~Eq&h{|w-W(N<JQQSr88Zow4scX?K6k%M#|vt_|eN21>Gnn
zDgW?qw;b}So-UJT980OnUI!|DiCzjAdVG&Nn6gTqbuxj)p~{mq>aYe}XF?QW!|;}<
zTAB+&+7~mUH|lUYwQ%MD<Qlh|?HD&|!HKC#X0FE%_fZtK9Xk!ihE;woiY>)%>`?>J
zQSC2V7BhAz4Tv`@1MzHDZA!yEM+w+Xaag&%5CoXvP(x=LYJ>J8pWdKOakQkfsn*tY
z<ZsU%$kaF1KBwXW1K&nd@w~)At#cE($6V)B@p+Tfu<zs{S)VX=Wqj+(S}s`!h9bUs
z?@PyB>&dJVw#e&=g2Cq{;}@HZtlCk!FJce3FbwdS9BkF(;qr3%qM29&dEn;=1E8um
zLoK{iV%Y{@&3-Om>t$h!h)Pcp+vb!bD*L)Y`_lck#QnBL(4Bh|F{S!S8ch|nYFa<*
za+(7%xPrtmWCpLQlf9b4@U}ZI)$jP<OYw_h3*S0wzzwA@HMJnCiu-!1Y$jkGvkvyt
zP%+<s;stUCj0$dU5IrTNw+%ds$Ps;>H39pejH2q-!8^m1FUYp7Tuaw&nLB}??1w|w
zuZp24{))d}9msF1A!jQ4ynf4x{MHj$iU_MjhF@9)vOf$~6mg}m*Ljr<1e3+o6M>f#
z2}7Ps^r6aPsict?M)hO(HFF}t+H3S+B6U~tERkt>*c+c>_<vD#&cU5T&AZ>&wr$(C
zxv_2AwryJ*+sQY!ZDV66o16E&zq+WJuIV#X^WRL>>GO1do^kE<1ozgtKV}B4bMZd%
z5dLs%t~A*bA4+UdQ6p#OD!N&1>QMh`&Tu}f&T?$#ApcdJ<=W6cRu_3Uwj{kso0sk(
zau+33K;tPv65pVpqPU)hS$q!&2*`9!-Dl~GywEKICHwf`_|VRTA3fZy-lh&7_>56G
z(L`fD6=~r(JrNR-Ifsy{M?ezQp+uJQQmiaoL8RKLAf<AZ-P08;hfM6=2hC*D+M{!D
z>O5^(EcFgf=Dxa$=c%~XUEz=RaF#?k$<K7&dCn4}-gcjMa$RWEaAC9A)A6DP=re0N
zh3vJlo31ZYRh^n2I(I7A(Du;J%>t)5&yB9P^I+Gn(D~my$}4n{bjt!G<l4O-?zr}?
zUhg?yDZp=BKqC(Q)$DU$v3}~xn_PyeLHN$nuc<bS)75zi9KvZ~zI53v5aj|S;R|}t
z<cmm@elOzbp3js4=3U~TeW{%|O61}y&r`I5b^0#3l8TeJO$aR$3L2d|;$CGz!tyzI
z&=s7id)Euwm*-o1_R-kT8?*o3rU-1^E}^!Lp3llG;--@`fmzt8HF4jPn1g8S{SbDo
z34Auqw)RLq@U(MjY5GZv>H1viCh*<=)ard(WeB0qf_EFt))|zEJS0ZakCzgha0|$z
zt_9d&4eE7o_ceAxiX5oj+Fc>>sRcNJ5lgEzo1+_Rb%iluH~sXg&4TMrTIIP)sph(8
z=l&gQ`j99PEc)?$crMP{?a`hG?`?cHcb(ngeT8L*xFGs_j4S<GBbo$#+n(%<yERO%
z)?<O|PYBCIG+4l8miZ&thc0nVC?9vKyd6al^;!%Qkp(^i`D2RRx}q{V<1uj!D=~O3
z=bPg}f)GbJAmbj($Na(F%nGgry5yp~E6rK2p3^Q@rCZvcW`sGmS#rAI_UvIOx9`d+
z8p^gmQlTr~Hkq`nK-w|qZ}q{BbiENw>t-Cx?fVOSq-}Y`oa_-h-e~;XJ8a`0xj#jq
zrNc`z*_)mktfvgyP~!5Iv=FPSEGmhVxV;Axm1ri0^s6Wx_uC5iJ4+-<jl`nxQhTpX
zqY17;$2F^;zFkKUVYIkxODg7;y-&$~4vZW>Uu3o~XNY;xBsY6t4;?+a3CC+#yTFw?
zTv7De%-aV9c#4X>J&+9%^E0ZYQy$EX%}K!hV=2uYphU~*s^w6%^F*Fn&G0ix|Bn?k
z`^Q`Sz%@tiMr|@Qe&41kM*Y<VyqnATxZXxP%E<CAF=Ov0hny%nw2*^FG@DWwm9qz{
zb#x|;CRoV27IgzvEo!Ok$ji+aB*MFln_BjeLEXlK#llrWO{j3uA@!tr(@5M@1sw*L
zo6RssJCecfv?o}};(&Tx+U7G6LuL_i-gIG{Y9l(4+VD%Y^Ff9VEp+tsq+%6RIbf<#
z3_qP)7!(lo>lbqA`c14ojD4|+9fP>w@tH!Dk-anA9h?yus;$e;8XJ$=@U5{MAv%7O
zv%G9gL89Ntx`c(<Fq1|m;hVN6ORj8Xf@KR+E%k~>Y#RmZPM*Y}oE^w5qb~gDv*I#S
zk2Lx!w-h9DiB>ADowoh6^s>$@V^8O*{dY+^-<RF&!>4_V*(%QQ$HkY(*XH=PO(VDF
zCe0j4Z$58-TZ^VgMh%B$G2|bs4%wSq=-%w*tha0>YBgy<qhZSQZj^Yf4x7I*Ko@b(
zv;~}NMh}i+t)p-3QWEz1;`lsr_MNE4x8koxK{ba&f%$HX5ROrKkKc<38!Lo;SWm-T
zRCj7cGl)08CwxCX`=46pgn7L^PZId{noxiec-(sIvX}tw+G8*)gP8zlhuKk;$A*30
zzz#6jXT?WpI5{Ih$_Z87!?xK6ZeH6T@myh2*hdhfitwPlE}*B<TSHu>tQhz0dB|4k
z?LR$CDeP6DMRVZ=&u_o`KqeHv!!FY`yh5nV(Oldr1&`@ISw9s`z*#gSe`|=OJn+*|
z1UXR^c*uC?lLreY<vp?|JQE<|!<z4?3O=UOk_GKt>F09@pa!~ue=?w35mj`HGF$xg
zkHXN(a#PQ7UBdGZ2{IWePC)V=i9mPa+&g|${Y;V#7P!r5N&SFPBW1|LJ#(9nJW|?C
z4pv5~wAzr{^4gDF7m{>V<=Dj|W%SAeEXl(%YmV?&zU{LaVmF@pKpBeBb-wSeeZO`G
zE?@Pp{BP1!K5)7l;IEEG$agJ$o%Cy*m&s*tOGRGz|3gS!cd=_4eOip+qy1|sz&qd!
z_Fw7HqK`w9<@@i2V~^iI52yQw)7il-z1cGNP{94ixXnFp$M@m?uN{7DqBe=soe+SW
zdAfzI7FV;Zb@AX8cFql%V;)?lA3qqGLRcGr`70Iv095`fAddEbTzPi9_U7<5*hiCT
z3v3uB%kBO%oHb~%9&n)=%TiQS^8x-R{CyC9{1^_0bD?iDsP>}_X7L#m+_#-i(JO}9
zFOsFOUf5XfGNd8UfHuF1|6q;SH8*Z8?)&xB7)jP`I<-y-$`n0p1?tZ~-%N9h2Lm7i
zD!m3ktB}V4_fR9GLs`qXm)W%126C8x-h2Xa9w5xVxjenX)PRDa45J(%<3Gl6KbqRL
z6%3XL>Ln1Kr}TIdDhbHkzBx7J|BheW+je*in*R?;bzeD?Xt##{U{?9JPInDpheaCh
zhos8-LsESZ1zPR{o%jz)mH$5^)ubPis&(mDXeFx34@s5dKP1(>ACf8@8Z_x|WjqaT
zJ~k%WKHw-kg_>I7eO73i1KjE+@-Qi_N`w+EXItaKqYWA~yHd&=jquXJN<#hepDeR<
z&~gs5Tx3W(SUhWRl?s|d&HfFtjLIh$zA+QrX<?8QNI$JUf>sD6pwwKPX0u$ODit@L
z!$#-qn1dDR$1jMfi?oZfzYq7ayZ)yeli=;i=l+pZbH+h;a42{D6UZ;?k6<{KVPFsp
zWSW!)-s<xw5V}BX<7&Hj-e6E~NN8dU`I*LixGNk2gxT&jkHvyl2g*f2F)<`aneR7e
zevxM$+*LW4phrgkmVOO%jZOX3!apY6Lj2zo$l|Uf5MUI7pc-A}FEkj4{9@UB7C?f8
zmwr}1O(JKHDIft>)5k9W+-6pG(q-7JD}}!WfG49jnp0q_r6@780qW1OjpXGhHX%#T
z2IoevxF<8g5`YV!3gA?k7z{SMTUuRub6M|sa(y!92QFQ^=Ap`6xe9rGR{=`^G(<!h
zCqoV9qD@<VIZB~`Fq;;gZmu!8f)JYXYZIK$IkLIy{n^&b-gsn18ZF$w@1^k*ux7#J
zIo41uAhhavOZd8M(jt$+*f#|=SgC~$_k8lQmS>16ghh!}-2yYF#bAq*LXWv~zA>^^
zh1<A3%T>jz_yJ*bM5GR=ID%`^3evZ)DJ#G}$MSpz!v;$;Vq6ht+Vy#Z*D~I*Sh7yK
z6IWR(CsjGg0E<wd=pE1*Z^U?B@OvlY;*}UqYCb5rhs9!D9m9ww2+WC3esjlhi9C57
zd3RyG^5ZhP1mo$%(u4og3md;?1tg728C!9@KZ5vvt}N0W;@0ky8GOQ{t)LVRn!2Ia
zkT0mVN29J8f>+)nYRUo%8U;j>+}UB2xjbpbOd)sSZ@ADQ&3$nvXRritI$k^voe=mR
zU~E4XcWa=(I?blp#OU?74Rj1ML{aPi0mj<Mkt4t$iZ~07s6$f4;!?Na6>x6WymjXc
z!WUX(9H^i1-mMHBfVGN$kHkLr0mONlpEf?9gnCGXU;7<MK5!hMprKy3i}p<hEdxvo
zv~{jfCo+C<<g2R#*M;>=9c;c+*fBaBvcJjW<P$7H^fE!B<ligs{3@=<UrffOs3mm`
z1Q8jyDt>6K$au%ig!2};=3=&K-(woNs<#0Jy(}{o8|()@OVoAkr}TlE+lD*Zt;Yd_
zhA||hiZ=)!Ole0#(Ul<Lw77Q`_lO@x$umAu7egP>D3<K!gZEmyynUHO_qYEUGpWW|
zHBn&IN><!+77;GmbzMizv=CP<E4yWxv`g?n$c1-q2TVj>r}QrJ6~uu-`^V|dQPCSX
zN?*rU!t?<3!fh^vQy$$(X2AVya8{Upu}2)55wL$&m<Sne`JG6v68|Ch0)N`vjxZZR
zSM(wM0-7;|-MAfU*vFq>4_7;DXfkegSmlG~W61pLOp5z==f_M9&ty7gPu?h))`I>Z
z-S{rJ*Ie*9#ssLw;*I8Dhni%a7<xu?#PYNHwD`x|P#87jkAAS}dM${Q{`Cs9Iu1pp
zJ5JS$0G3%)oCG8Nt(o`?b6(S2X29)JV`}0eeYLsNff&_7BU6lV2N*~6QeZxB12?q`
zw{Gny?w{;pW}L3=2KkRqWK>TdmN)IlzSKAE(7xX!;IR2aDc7+4ZL=IRqm;<Z{{+14
zcXLK&uA=$<KUAjHUn-fO2Q#qra-Ey1f1yC&)FT9|PdPu=`bNkhTdnWGJx@&3J?z`N
z%Vf<@Nn*npT-EGJd*Jl6mZ()fUE1l;ApMESbdb7Tgm8h2oGLdhpk^AJgc-U8PW2i>
z`z1NU#9ru1%K=(Fp;8)VqiTG~spkm(o#-Qvf5T!r>9@0Po`w1@YK8n9`Bg?R>s$6#
zFkG@?kRMbOrd|n_fE{EG!>=2uz-mE7hpB{YC36d<@NmHKYH}(ZCK8~u&kGV5Ioxu(
z{y3h=C*T8q2j_YY$$>dUB!~P&wf%`CFI0k>UQigTVnpM4Dq;_bb6@D(-v;3e3^ncU
zx%~I^^{?dn@{5oAPxl4<CnCNRC`6HGNS#?d;Uxd`Nc^PzLwPDssCIs(!v2bUHft+9
zKkCdzsL?Om$vbn1qat!leR)t1bmYLBW-n|90t)3_sR4h1e-rRQSMP@BwB)<x4!EH#
zczyMdlmpzgH^|V01BVJ0CK2{WHaVS%dN22u|6V13A3gyPPA5IQzPlOKCP?vo=hxD;
zGZq}ozSxOk7fl)h?s7lbLZR<!Kt>N{KVf~$t0E_*8?y3x>Bt3hKX^`O@4g;H5o2=)
zZUybB6UUPhp(0hTo;Q!45vWg;a+EdBC-*L(C-uWFAKaf&sc*7RaR1&d92MPobnK(k
zLx4SB+h`y%3%V>IsWEAIsge@i!9Ra|2y-K-IHQ_Xwqv~17h7t_!4Tpl@nr;3w@7lM
za@jH6OSVoW-W$h9trg$?4@GxCsfOS@zDOofucI-&U@bu>BkZV%3Smr)1sDD`X8MFV
z{n|k1D*A9<D>(2#&xDdb>s~MfzkU$@mj5<~6>{YfUSSTs`5Csk3<qCF#AV1>3;IjV
zgzQm!K{$Y+8U8&}lAuGkh;3fqTdn{RUl;i+jTf=B=##XS(8Fw4f6+^jr*luem3o+H
zz$s^9Mo3i2fJuvN$fALVWr%WsN*<r=6*~^uZ$Rn5oOe^Hi2EfJ-KciHkj2czWiNe?
z6~!(2?>lUd4Y;|*56l<tzcAm=601#_8Yh7=BhA`m7sWW2pV#HVyU=%lBfg-#aIQ2b
z6@`^KnvZZX^S#+OgSe}+M~0Wyqt2t{GaLMjvJlE*Tv1|&q=eAnWq}(qK*MK*CP*tW
z4ucBC46U+qL4#r%B0|>Wl1-^{n-Xn{_ri)c9-p-dJMUtI{esg8pa<m64}{fv!!DqX
z{E({SGN{B#Gjw1CnK>6oYIllJ`asIwkvG~R%H>?QIiqM%?u`q75sBk+jW>{K(Tw;a
zWPvwPZ!}@JI%v>{ci~#nR#yxLzmwvjFl$?s9m0pl@5&U3FO3(HzBBfIg<-(O6AmtZ
zm6))^M(#+^wZ^JIp@WdZUqT=40gn8*|MB+Z^MCbgigY5w2V{M+^_sp`*YEJZ;9aEf
z7V^Y+Ngf&AyesAZPV&H7-^J!qz05;_Dy+xt5a)6C8%nXkGgK?Jr;?ny&kgU39k$=<
z2^_>CX)P<UI-wl%@yl>_szI9>Ao_H>5~?Qd!#g-7`Gi%NUrx{7ijg3JP+*WDW+^7a
zwaB~4hn_i(k(YwK14!~?H!ktA;9$#MZ#8-fafqd~)>Bj_+=*`NrrSzU$;I)RMmcrX
zkMYFvuD<Ok{(^J6VJey>$)GW(?iq#53#ay~`!I8^7CbQ;ctt_o=cE2|N`Z;KqTr$<
zAG*evpn2Q`>r`)8_r4??yQgOo^QmZwddG`p+zt>Ox+3LHvUGWDBULtVjWWN0s>iWh
z6$G3tJ{Y0zf8*1ebcZN0A2fe4pXD3msxAQCWW{*H6%u;*a%T%rxk8akr*IxV2U|Tq
z`q^hXB~y75YaX#P5r<O82~Rya&M3(s-FJ(Gy2$Go1E`HvMz|Ecj@T60>H?FrCN&@e
z2@a;k*y@k4(yROC8VIu+y(2#uQ@>*jnkEZR7kT|FMfIyb#VXa{@+KJAd@3I>Tw76m
zqLj&$7}i}E*oMe~n@~#$lsSjg|McfJkB9@N$-;xvSW)`#7g`K2wh6lEQEdXYWq*o{
zX!4zxh%OnQ`LscL_P1+tJ?q9UxpV2vwcjA$AMZv9>HGfqYnXSq`Ay6%W(69mh$yL#
zf_&Y9cUVM|Idv6@;eXJItjUDb?IqMvbnnnXD%7W0&d7dnoLKe9{3{QMxGwHM0{KT+
zg2*TSi3q?u1_jRZz%MvBI3WvZ5JZI4eu%6`@^i?od$32m)<gd;8`<<^5(>NVm_bz>
z=!7Xx(_1spERN$7)JhGo4%?iF9#5!M>Kco6Qk>4BdXbah`_v-cD4;fe`$KKuIi+_N
zGK$ug&``T29+497J7IfMAD5kK;m?WWpfc1=#ck)AYX(%)6qslV%DD)fvkP~c!}1X1
z{M(m`I6@SdASQ&mQfxkQCO*C5+2V+q&9Pu);5BhtF>rMDy_dt(C@RR0x7&=@wZ18q
z$pqJ9^sT^%h)8mVt}5LQ-Rf)G*V%cGsIw4ufkfrB(Mb>yU6U+?e1U#0Ne-Hfu9N~)
z(6T;T&Rjo)HH7tRzB}#Zz-_>ZRsnEis0g(snwzPABWdhD7L%bLBcqV1s*>ppORt?q
zPAVUWFZNyOA#57bcjg6jjs-)Y7F2OA<QE{3RqC+2q)*kFECkejd3QyFyKIBQmOA@%
z&wfS%s9e{t_53a#XGzxxC_c;K9ZJlyZT{>;#(64(;F@V8x^o@mxK#1BWJ~%cO6tNW
zW1v)$yl~DhJtd1;bt*)peHBuZ|M)FP#%Fm-Wb5Un2x7+Q#7d%d9-GSr0|m2<@s<|e
z&}m5c0>Q>5VpTBc@H7}&vleQne?3>e=AyLIE2whP!Q?PuE$?$z90&_0b2c0DT#B@m
zb9O7g-665BQYcuPb%5s@P?wSLKI6GM(iyMWT1=d~!c1Z}2fA^ZZ*cxTcp9MP2U~Gp
z7r2aruCcA@R)EWI<?`9W@`FvKo1^1UgN)^VC;}Ppzy?Kj+vfF;pfG<w2Z3}TOuJIi
zHKh8O;!r~XhakT@XY*YBm}~zFZ<*u3#2O&rAHub3xgT-BFotUw01^5n>3oj`&ZzS~
z2<{Spjxjv@Hx^Tz6cU~+4RUBqB}@jE8sIs44eFcNpNbNp*1(q>yQidu6MZKnK<q5m
zdek5`-MU0s_+&|Pasm_HEmLKh`oQuQlSAFd$R$R@u93I5CMBmGbqy6a$@(6g^A0K@
zD(q=LZVL)RCoHt4`pMRlPs}MpIRz<%+!hvx#qTWIH=Y0(HyAt6z-i35d(+bR9KFq3
z1ICE2Jl75lxT}79P9{%>QdmchaOZ=}nU0JV9LPMH?pHIrx|Xh^U%Xobx_S@B%BdAG
zM!1{I1e<uH02pca|KZ`o^4(6VU#-rhPw)u>IYi0aH*TV#2igk_OMV`%Tdx3OcFQ`8
zg#V|nGX&!#wl0U)NO`YTUJe032~o`e;#?QQ_#Q&g*a5KloDuU}<*~z>_N&N3in$a*
zT8JV@pWneGjqJe+>`!n5mQA(P#<qOFk_L5a;6i=c{~Aeqg)Ya*WNd3CgzGdNgO=MY
z1Qnrn27P9jVH8?uLumi<_8Ld${xdRy@{0Ff65ci$x$<kArG;JHE&mx?fNID0(BnDP
z;eydL?&V`B4J{zs9)Zyt(4#a%)#EG;i`Ke}juB_mO0&rN;>mZ^Y$}}I`%{X+iw%z`
zB{V~+>R$y#u8`}EL8T9R^odn^Zg)PGzS<J{?<CQOSs0Gkhhn;;yj+DhU`G0qC7}8r
zM-=zg*pO^Ve6I~vKWR9q1i{@_i!(TGuACDgZI|d(QuKrRCgdb)qZv>1Ft(MC$3lkl
zH%EBjZR*3Lh*eZhdWUjYB7v3{$F)g8>B>z_K{7E$tpQs4wE>e-)`*`<X2Ny@6u;*d
zqw5NANjagJFy*x<ME4~f)V1p`3J*?_wvc)E?LyLf>mC>-L>JobraUud$`ED|a1r>w
zfRkQ63+sW&>aSkbs&?zQ`>3)?|B_CwF1iTM!;b|7h+t7Z%$fwgN;e6JhYFO%!^Xw=
z0%UH-ZT@{eW%q7sY-bj7m+gmSqRzrD09lV3I+3%$ua0^kg}7eNmzU)4v)EPqWxPVR
z;r8c>6}H)q`0Bz5azR}^YL7n#9eRu~Jb3fU?|YOag{sGfOI8$NJprC~B{aWf$0gN<
zpt%M`*NU!fTF9HorB`uwK@?uF>=_z{$Z|=DNkZu&_)GFOcvJgj@>ICc{wKF*ti(cc
zY4%-r8FyL1N=au{F{i3(tqzHVc;u=_X}{c9nFb*_cW4RXLRA+DGrLrP^UB>-ua3m|
zc6R(4C8ysWdV+m|MTz?u`{eMSrni1x*jh`Sph~7HY@-DkH&8cP6AL>|eyWwx{@b7E
z%S|>pIKPm~!8or1O_M~CY?vAea{AfH0oxH|Se5{}AZ*znr43Stqk=Q8DTe~NH9RR5
zAY}aK&dv*wISY~y*qco7@TKXSAI4gt5q!fSjiLuj2wJu>ODG!pEaJGF6hirjirk6P
zz*ZIOzbsp?L!g9TLFXzwz+uVa5htGU(+{wvCHLo?>BJ<%=orM)E>ENW@;T%A$p8e<
zwjka+&wDUaSkApI=NBI;vJgtH$^Bb^ctTS9=_OXf(TnixCt2bo1rh}v);g=6PC2Qy
zqGh8}q@iYrfj%P5vvPRnJ`5t;MK`z2Q<vCRj+|hi>t^rI+}k61xVZXu_9OT%C|qv`
z6!ul^wt?~^BYnn71UX!|$*0kow#DcYUg?W1>LVPRO}Bli+@tOA2fMUxKAlGR6hHgV
z!9QM{+&PN+LzsZ$%dBiLSv8$%WP>$QwNv|ZRCK*;up~0|;&USYd==q1EJJCw3A4?r
z%wG!?<;Ze7{0grLs;*@Vll@m=<-U%{IpCLMo6>q;iqbbK>ZJ5sw$vxHSk6hwrct*M
zP*q=+rt-`TzWKkVINpUvakE7>r3H}29MMILUbgDv8{jjyA@~Bsi}5#yr4OwCjG3Lw
znl|5nYaeg?*BZEj=JfT}X4Y9@gogA3KIDZ5myu}}8B(IM3$q0>Id{jSfv6qr$M5jD
z9rfgIj;0MQh1ly>%5&*B110JZl^YuuRpalUt{PJ)VXCY8&ab0?E15Z|q+iq{%?{R}
znYD$bQF>g9(3H>DXQFinP%{(=rIt@>8qR2Gu<6<EYhCY{RCLI{N$C|qYMcYBWW0pj
zvtt2hNhuv~XBgQRd}zY>$Q%YBOBu8CE?n6x+29Z@sdJCFVy5D0Lhg$OXpYk2{MCGa
zS1CN{x&m(Fhi~KcEZWjB*1@uwD=Dg8p@UJ-kGp2t>(#WDT3j()eUHU_8&8T(SUg&O
zPoECArnR)Sm|lNzPj;sGYHmg%<ub?y2qwxPw}uENx}zUi5S5}1`_qvtMN8K14c^A8
z8L2E);r}sPlp2|&U{MlXb4GTnNy|<k9FoOCV1b&vhi`k-SC=Bp?%SxFz`#(V+oW|&
zYJNKty}lVI<!vFBu<XR;vp3gS3M_R#YVR4e<f&hd!W1^A88u<E#U>9FEwHu@>?Eey
zdQ|!~a_btW_IBuP0Fgf~v3CDqLc|GN_x7j^s#34ziLH|HR7Pz_T{&&@$(2tG;hhts
ziHO~)A?|)C|Hnaq3N`nQxF?05tVQ44F3nYqs_Zsi<ZmYlom8!bpB@7?ejX~D!d8DT
z)w}bQ2*7abPIM|?!3S{IY=DAM2n_pbbsBdTve`~TZ?8aRV9|pKwhpSSbj%xwK-3oQ
zM0Z^%A2OMOx|Fm6r&fWzNG(%5!fMr`7B%(fDXpwJuey}@xAkLNqFKnpCfhJt+@=Ru
z8Ii>ySS`i~tfR~y+$>fl;VnF5c0Gs>=t=sPNFm6Z6LM`6TCyo@EdmrXqOT@)ta}}J
z@aq-CqumN6;bCanGx+DiP5g}~N+r`a7``Y>d)7bUQ?H;W4(0CK$yYn=mfBm0$76SY
zzGy5QyL*0vMFzPUk5O7Yn&w{*#)t-@{U$Sl6|VKiIGVn`VTs0s5a=IehSas#OWGa1
zoJTko-qHYXb1DHQ3xd;%0re|5dVQdLee0(py<W1xKCo>jny-NvkcPolm(C3?yoTmr
z1oisfIdhZ8T1GfTA;#@*mxIZw*2rk~=&O=~aX#zUp@;CpbxWfg*qv!KHD4!gyr$J=
zH(GD(Rf73Urqe6}rwIcfvgvn>+2uMdTj=iDC#6R1@E^Zd!CZmqC$CTmoek>`af>xd
zK=DF2#tSur)Y_NaU#^$yxN{ihF*mX(AbpU?`<rG}!)4c_vSihh%IbM&vz;rj+ut2*
zXm#U?<Lz7iDM~6MiPR2mpO@5G#?OeRtQA|JAEXCnNo>#N0$c+^7p_DMR~kCyB(%Ic
z*dzwyM3!~ZTJz29ldZZ6Ikv_OS`w`q3tLy`6r6Y1nNI?)zZ0;Sm>5d+xNTX*38CLb
zV}y-5FDt-8G5&yYjD$Q5uf$5Nk;_Fz{)1j2eViRhBi)7_90Vt}{8iX(JZA8jVD^?^
z_H1DPl)&g&-{Mhlxvpvrkut49hsSCL&#ne<MuI`}0nT$67Gf&0YoKmuy_^6jA<E_x
zVK)x@O^a1Cjk$12{o-ij--6brA0ex?DSTyvQ3Hpu(yUcKqP#c&wh?kO5>~E7q?2G7
zxPCM;QA1cPCqy0$oZV%~qY`A}2AUZIX@hNP&2M1NlQ8W-n7kZdYEF9%)`$aR5~O7Y
z>{C6zQ)**jPo8-e3u05Ssp+u!Twpt5akrHx)2s}xXWsrA5A;Q+xLPgv#?BaVj0SOM
z1V<9TihpKiQow+0oNJhaCXS~$vf;9^{;8q9qFrxqS*v%xV#P>^1<tfMrz!ZrZi=3`
zOfHOrY-t~?$l~Yn5_PF;DI$)D1Ou6aRMDH9x>Tl<S4$?++Vh(|dF+>aTz?~rajx+R
zLaX3u#=Ho7iZf_#F4#V<km+V|c9U>+GU8v@dNUqb=k6%dJS<LCam7Xp6Lf<{G-@s&
zskUI!Kcv}X(R)Fe&?cGR>#K66wXQ0<w%bc9njBBRSEt9_R;HuO_%qSt#``HgCOqFt
z59``p8Y8^y4Zk%kh1Jz_=UT31&)z3zPv2*ok#bCOrqGRYOO>RtLANJ}0UNkvO>`3z
zVU#plg*9eDMx~AuHa_=3qd?hiW{jNUcDBUXS@o1hN+QYqZYIBV75I%fljeI8xL5ST
zglVs3)HvwZ156`xHKoY2k-MZa?Bt9X$qb)~+@c3s^k7q6lH;wS#wDgr?v++EeL_YE
zsbvyEu*P-)N5!hLS`8Ud`jbZ_MFbobeIyUwEGPJ;uFDwJIN22dbsS@qQf#Ku*(6&j
zOLOhUCOV`t>_$)VLn{HpWkW+{<Xt9zW|(9&%hY%Xt48(#CFIG?IzY4>V)}v_t)3A(
z?S}Fc(V5-RtJBY)4wE$wVs>wk$;(L&HUevG^v?;L4t2zftr~qIibBArnG6M^MRQWY
zv?po6;8?a%pn*wU3OiwI4KsoA>i0jg#-1QLDYN3l6{=p?yJIbL4yRz@tUOR|mP9)3
zpd2^s{@&Z=S&5;vquN^02uy-Mv5xX#eK}N-dtY+;JHU+4jeg3YAK+K0JVVJfVHB%V
z_aSD72@+C;4CS{4eNf3M1DmW;mF7*7T;pa5%9{y7(BaU$T(j{r{tD>MfsJAvVd!#(
z3W23p;a0y;NkAhM2<?ioV;R3^j!ez(8(BOfcd`@4Cta(iC)u!jboz>$y`mFW=BPg*
zjc&Au+*AJbn-tG+FSF97sAxo%YaE>OOgI{#nG6|ZleDbH94fRd8V02>E=j4oK&0_z
zPPqan{kAnq=WN}V!L<g#2lG@h&frutl+BULdISwh*r^}gsdQZxWamL4^t)(a0)<A9
zq{#x&+*w@5a@Ejq(J0!ewPB|PQJQlEQ{M0D4K_QBN1cqXC=FT96S17yM^o=OZecu(
z$l}8k!jcAwTabo}&agW>l@V8g(BaF;PTM8Y3oSX^x%oJcJ}+lX+=I9<u!ppZ(nE5H
zm6*yY^R@0j`Q|4_axBv6PIR=s7`-O8CXJT-71S3WDw0G0htEX)q%>_CaeShP$dxpi
zc6GO53&XVIU)=G(l>aXBV~^kMqmIi4Y&eVz(#AV&$QFM9jh5U245{SVj1<}fX|zU?
zW;g*9z_+8BVV-9`S!ty<ruAlhr*($hO;Ovj<ZJ&f(Am)gzC8A&oB7o;z#8IhlT#S1
znSjj6XbPu3+*wOxIy2Q4bQmu~DtjJ7D*KK?D!)NdPE1mvjA__vhM3ear!HjR&7LSQ
zr)XPSDzlHsW1JW0oUf*w8rP3%%o3WS*k+*1IM1WzY{WSkh&fAJr|m4uEcNwQ%)(_(
zuSTvKclBn?!uMgm$pkexG;Q4+0QLa0QE8}T>9@C`!-vITECGJ{6Ms1Z$k9mm*9XAk
zC906jl`_4rXuAwsugjtw$v4?9<sKZGUpY_Z5Nt@}N$vb*eLcMx5EWXV`yfri#D-%5
zEyaDGTJ9LfAKpX(R)Z|`mF*WbR{5p|*`@}*CJx@Fy&fuZ^U7y4_U_%5!!I5lW|h|b
z={VWadigbH+l?UF%IO$YEcITyDYvNtx-HG9Yg)j)JXLs^LL(BY(3i}XJnU}KboV??
z2v}EUsbNOlb3~{{jHK0KJ%}%k^raHv^801KY!F?xcPi~v&jf?zUorsOM$CXZuSH^S
zPP^QirguHdv)AT`o=?#jxxPSd%W73a6uZH@t1&?XC9Fbo2c@Vrc493~s72|V2%qj4
z5mjRX(K})Z&uSRXU_}C-J~I`|Fu`D@pH(ZH@rr5VZ_OyF^dP<bGwHuK#`j>wS@D>D
zwe~Gw47vXl>^n<a>L-j|P_b!kiJ{}nt&vgx#j!&{cA61dYFdhei}R(91LG%i^QoS7
zNcD&)P>LW%#lh5JE;fX8@X7s^&tH$9b26R!>mWkTH|10I5@u^>gpHhQD?5x>=D<by
z$V$#+5^q{xEXk5DsUi2Md@zg|Ll(o1OjU-&H_l3I^lYH)6Bx1F1nnAByvTyX)t~Y=
z(-vom9Ii(-8GO4Ig2Mv4RIE-4#-=F7$_S&wECsnkTqJT@41@_BNK?RRdThi148Z8A
zBat1YqtX^hSPV_@aDi^P@9<0<n7zPu8%Q{cQD!E{R@+*CzMdeYFsopLYDfbc#!)%1
zT60*TA+E@@ds>pYxi%ux5R+nLC+p>cx*0){)*!E(os7Lp8r=~OC%6o(Je8Kx8X_40
z;+C)wR|f{qhUPcX{!4P7t6@bGWY7Y+xIeK3P7m{#k`6(efYvG=by?5gsus$1FG)w!
zdDRrWS3_9j=_}o^kS}Qe@p)msKs?1voM2XJb>`fn0#bPrSTIFYr}RTIE;wc36H;5a
zKS`1wCLt5)B^7-4@N#bU*PMAZ{R5plt$`q9@Y2){Y(_1sO&S;@4G6V0n53E+ubSGA
z==3)QD6GYB#00KKTI4HyjOx_Q@B5R{ng4q)e_`%l`=gQ%Ls@Y*F=YNu9$hyD8f|h^
ziWG>w69f1<PFSO3xaXYW5Rnaz1$aH<DC#Nt93?+7$s`igV*zEsaY2|XQ@xX=hY=@2
zWD~as#lC)*M7T2j?x&1Stk0mnZJ`(}Uh0t3%w55e*;8F?Bfm&ct}`iSgPohJg|;Lx
zmMO&6$9TVNX%R4tNfUF7A_d4yr35F1d9S1xZ)tz7iM%$4VWzx`206Tz@_rg}pQk?o
ztx9P{@T>(A_bIJ}C);B(++IaGEH6ghWX1gi{Plyo#K)C$%f@Kyt{ZkRyDLiCtSd$*
zcx1uk_N88agm$CYB_%J%q((+N#_1ed<JJ6If5X6q4H&IT>#s>wKcxB?0c=Ks@qf$k
zY)vB5SBfk=OEDo?mX^5m`9lI8eltu=Uesd-na-t$rXYyqfOTf0+&>#2J+r3TeG@!B
za#N+uS74qS*lxXvP4D7nb&0Dl0lXW%?uT0J?to>u4w{|lAknKFs6UYdQm+GS#rCXo
zINDpWST4o4PS&jS3QzXgiY8R^JW_1cw>s6p+M#;Z^E9;Fp|Mns5oj{>g3o_mk|_(`
z!-NFCR)K(?)IousYJYN7L`8qJpTI*@q(dW58jhe8gmE0kG#$hm>KNGPj!2@7AS`t^
z#6IvtXOs#cJvSFmDjASu4<wRugs282WB`%6ogv|zMF@#mfyQbOiQDWEhk=*88?}U)
zu6bZ+;k6aUWtl^4uK`<t9F6G^m*_@p?!ico(=oSet)pI%!My3^IH^;msG!KjUumGM
z8AV!+pS?|;Y^<4Z+MV5a;m<I?u{RAjm;g1Hh}WA0{aM?SZtSXC*e#;QgQViv@BNjg
zp~H&Z5&ritTgE854PXZ(-4NdnIw2`cJn|O?oMK81XNnCWDhqI_N#FN|kplaO(8^$T
z4LtjJ5Nl*n>~o``>;%n@vbLdK4bEcxfiyJ@>qI~mr@d$~2w5RxrDF20(>n$hE6~wc
zJRg+?%G1+#HAhDbm7G5@_goz27-*pNslVsBq1vG@T*<nn;kBLm{r;IipgBGO8Gx}(
z&=9KK`Z(h6si?V(Cqv7R?~yF|8AV`u$4!)?uEwDwmP%@lIJ(t*X56eVyY^nVXn|cf
zAyvea%}@XrR+<4RT<r5%*&s+Bi}NBZ!W3={&l$On)f~~QFM3f_VZb34fWDsIoaBy8
z$DDbx`HMT&D~$1|-4m<I*BYG)ScFq<Ko(K5J-^Ehe!xqh=;CEO-^hjft*Y4rvj8^g
zO}_=uU?f$ZRs&f*fy%}=1GC%;%J3@A0YY+~qaW*za+*Cr<B*hSb5MLBPjbBh|KKSp
zZ|q}(?lEB}Hunqsh;UU$>aU-s4+|#x7~!iv(H#^$>V2gMQQyWkhzk9bNm1WOH{V3w
zyWT~JF)yIu6H~_3`n$L`U5iS7^_J?VGbZjv_{uOGUAJpWBV_eDw>JM=@2Lf*b91r%
zA2~KNnYf#rh3rVeApQB~gSB9JS+qB958{1HnMDn?Y>3BC5Juj{4lV0Ul8&1XW_XsK
zIL;-%eWfI{WOx*`(1Fz4|26wj`)hgqD#=YxEl{HIT4|IJ<^~tgp`ne*jjzk^mr#GY
zxhk%<--o7|#ky(Tb}_Zl_UFm#WPaa05I5HJI)Zx9%GqeaC~@;$ZUt26`V(1Y2h_wd
zt~NVnqX#xkV7PWJ<)v)MAz)+!oo7S^$w3Ti9T2L(G)N3{({wlvSqm~mMUz=+btxo&
z_Ez3Dv;r6(Lx6~_xACnp?4AYJr&uUqbXM%jOu(kKro(I`w;%LrKFv!p-KovTS=660
z(oZO6p|vhe?sTxZN1~r(oZ`#94l2LSQ&G=0?{^`5wP`MJmr4umtL;I3aS%wkH-Pau
zij<O0^=!i@whNuw!Y5*1Kf0tNqcu0gNosQ7?XU{9TLjOFF8afn!c7GNTxa(=l{{$h
zQ>3yl3fNuGeKe;C&PFj^&^Us>z48Xt_F{6650BiW=Pxj1Vhj*$p(nWB!9RF))1XYQ
z<h<vu*ehUX?6R&6g9AD_NKs>Qj@#ZIzVHnB%#z3P4&?b2a(D)Jx4y0twD~rEgUuO%
zgn^j;%3pA>!vRUdQj8qx28K=G%5vk1N!^i{64<!V;MIWRKz+EWS^vP>-S~{<O>Wrz
zr1{6rm}4l6Ng%^}P&a+sMnq%2_IM5#&4S9>Z8Q!}yV(Ez^>pyAtH-%m+LXh)KSAST
z5fGvz7>lK=O^(5%J%7fGy}-C|A5^AqAolI?9(G9}{n?u_+Z(kaLKawb%a~#81h}Hj
z(7G}4`Iz?1;+GXIRnUOknJ+HF;F?Im+>B`-I*QBZjjPWp7yuGS-MrOv;U<<GETI`J
z#QB%L=a2PIea|25-|C)!4<^Aj$}1Y_6R-4<Cu;8lrPZlY>r62#wnF^D2@su!AA>?M
zWhJX<O4iOzeKqb43VRtQq%9nHYLWVgl0>6lD05TqNf4Z17R8%Eq>QAsT3>k;K`Nre
zUpH*U1-62*joSmbiVN&L+wfnmAV8>37#zBNmuA)hvH=I&aFFp1uy{q9$kS^{9=W0O
zF$^&W)wJ<uKAtv}Sf(N@QJco-Xu2%C9b}t&jgD3}k(fdb;BqXj00HT0)<e^FNvTmW
z#W0wTv9wHxKc8OyGK)<`)!UoG$85UC+L_S+*p|L2R9fqDg9ler9=-3U>Tx$^5Iu?3
zXm*1S9I@tq>Iv7%IJe9)MMb*E>Q;5#p#X<c!*T75P<5-aN903)BkhMtSfAe<w-r$P
zAPPxAh!}&B?QcY<u0}UQQ5<^aQrhPFs@-?olPBDOLer*HwMCjIc3pG69{J4n?CdeN
zYf-x@ihG&a<1#P$zDF0ks`3Y!*|Rb;ar^EaX|EnYx`r%UwHJrpb>#0BhNhB>4Kml#
zZ5%xdCS6O8Fbpkm^$p$qQ1rP}KN*igDxcEhX`Ea!3NNT3f3W{2;Qn{Mn-nqtyjDR5
z`|V2le<SrL|4;lp^Y=y$ySyNS_1{De>$8O#+HDQlx7Qu~`8W;R-=n2y8LOmBqf(UW
z<Ma-K@S!8D6!eQ0q8b+@C8D$In$q+rF3laCUGqpUAN8yRNtz~p0FsgxqV{?Y!VT|6
zUUS(PIkJYKcjrqUg*X*L2hW#g4<J>2&m9Z!XEGHnPsMM*BA%sq6S4CxMjDiPVfDr7
zx(s=7!#_9nA5DW{Jj`{umysf^>PasTOC8Rd!!)jE3|T(gfT1x-#cUNuwzj&tvPCO*
zFad1ZEU8Z5aYP!{!q&c0HU2blI+;0hnz1Qq5+P|lz@9lea?|H*!T>9&6&#x<*exp;
z$QhU&R~O7otUPx_bLU*FNH46nhaMKa2xGNMGCjNyq>0R#uY<JY+vDUBads0yCo9o&
z$3^^hfA(;F`{jIlkz;C>4vO(fEPbyZV--~RIse3BL2%|)W1RORgUiJ(;>LYS|NP+X
z`P?(`$7>ez0Df}M!!%B0mIY_vbp7QHwqZ2c8@}~~@D%WQ$?_*g8;bB6v0=*hESK$W
znP8sg!zc955Ks^MOYS64xBm6Fa@`mU7g2mC-f9|eJ8u_znlw~F2?_0l?slkW7J;P(
z3hRqyV14K!f*dZ|2v)P?BY)O9bV~Oul@9A+jrOAdTcLxVpT)kr_96eKshOIE-m+zD
z;WC~9%dOYjK8Povw|!1!zXmSkLeEc_1m1s1qzp(*?}bNXAFweY(_9o?K8G4?Z0^(9
z2xCsH`)9Gu%?x%sge&4O)p^UE@*sg^jb(oVv$M|YZ}jOL@bW{U2ffbJNj>Q!RU{Vt
zm}cyis~YT3`O@%qfe9vbjn`ZuC?}l=L2J!NptZD7eZLLYA>9~$3}32oR0PKGj^5TU
zoQkmN!<k<hmw3(N{NXD<@Yf%ULaeX4`W3Yf#Qp_mc$%2|{9+J<PcO%5XmwkMj6>O@
zvw~#EiFtq(TO2g@5yeqXxA|Vqq~qu;#EiXCTG9!HApO{0j@ofv=^Vv^g;s==!#kD^
z9qZUN#EUT1hGEpS49Y|uZ<D~vqyA9-R?&v8;9S92LL+-lH2>XfpHy~=gJ>X(V19b}
z3u@U5X-3GPNIRp?XHBBGBC1q-MhSiYfw^pP4*><evr&o5C-PnC`lgJ>-DKh}HmsE-
zzZ1f^wT4fQj94;D*j%z20-q@*pUedfwA*cCRZ^6d;*cl$TuEiocw`>A2dm3Ei^knq
z`zhr)P8jxembR`O30$5A@1w}+LRzB8`4fkprQ(~tU?<6PRS~(z2GStKD-WuOpWCc4
zf09eAaDq4#&hfcKwaH%(-QS~;+YW&yvLA7;@J<azuZo#fxP7SKjFcf2InbfinxoK(
znce2PttOMi(QxI;DG58ID;T95#Jb5I=#zLst_6MRNva+LW`{zqloctxQz9IcogL>+
ziG(R+>W5l|3TU=+Tjxyk2Cg%u4)aMab49w)(`(9#PRvW*HI1nBb9(8Ran+h+M}&}A
zNaXiRvSV)hIxbDM*sepg>F}=8x~@W=>*&eZmQ0~Gc`<v-Q@Twf6WJQZ$4i;|n)b1g
zwx`4zHmOV0)!CXhRdB1dn)I#JYerqtwCubs(uP_dn+ojRD3xw-$NviUk2Txf_-rdY
z)6cItrhSIY#qaWSSpzCglEZ|v)YM67+>-cQelLVqr}3v;Y*r+pq7_t1E8ibVn70bD
zU~6bGno93UYiH<W0OQ4pqKY_~)xaxXKl))@sN6?(rsNdShfbz#{^y`2S@9$>zA0C0
zUcR<Hi<<EONCGy8sm^fESwIz5{B!6G1nb#vwHnfr-cI=2J<a%r<I|UPC;Tdy>wS0M
z3h5)j46jKK>1@R`WQYUb5Mje7@H;W1UAEH;nb>y)VG$hV!yhheC5yi%<<S)G#M0M)
ztTkuP+ghK0$@l`Z@5;3}rzd{bdjh5=?^BZWx*Ah|53Qs6cF{X9kN6|$R=k9ww4+f=
z`i4BYD=bUmB8%xji9zdOO!a|M=|NJd$$*T`C>F*1e)aGO-QKBP;QSo+r1ha&6oW(9
z605V3df3SyPr%=_P1-z38eH3nX+y<Qv*h0%@}PwRl-z3O$%}KzH9$_sH#LVm@Ld&>
z9BN;?is$uJyTXSj<k+w7c9Ed<igqdlDHWM3*|YNL_8uo2B~gP-YJidGPV{N`8PAA#
zoEk6cr1>Jj{7g|6I2l_ud!%s5W{V$Q;!eZBP+Jp*s$^-vux8&$kBdW2+T$&j4z{jS
zhLRjG439TT=dftDFwl0+yib?%rk+G@HSF2TyPH@YJ)&Y>7$_lRK>s;2>_sJt0d^j2
z%gTK`OUY1GX9*G?71NFG!^0oVbzp9b=4byvdo9%L@Oq8d;RWAjUvXTcD>xvC=37r3
z^DRORt==Ko`|<XfNLufWsGjBt0Rjf~X49R)c}Q{U#|$2Lx%v7pnx(hLcO*NhBA)u*
z75i@G9n52a4+onc(xF@V{d{#0|CzXI=17nTOR-ay5zUt{s2g)WL@<H5Hw?|bg3g#s
z=u&mEK`W+n!j+j*^tX4A3B2fL`4*sT@h;bH^Cfp*!eE&I2P(XRAV<8~mCpipGw?;h
ziGmu>&TfvwLZfKtA_4=oOj1~CEYOVZ#>3pMNAY)o&@Eyy_Zj5*`H*X0D8G`X2TFGo
zp269H(9-@9>dC(suC8f=MISE+<vG-nUj2!AWB2;3N4_%$Aa94>cR*K*C^j4$M?~W<
z8|c%fwG5LC<dLFo;@t`3Xz((r9FhjuQln2b+9aaKC5VsYcSU$ATXX8r>BN1RPmefr
zXMNDY$eG>aqdcjQ`zIY6Wrcs2Tp4pr0r3JG604CVitnXn-GXfy!O?w?CZ&nS%PeD2
z1b*6RZN!GZXP(N(K%oKcYpM-?wKooYU%sW;ZUa#O4&XraRTi;0lhq~aP;El*GZUBD
zcY1~)*GGi{M!Hg~z15<YYu!#Ig<drnW}Rk<U~VS0YO~j$q0<`ZhY;RAyS=vr4Zt%B
zEYaRKlByi8Rp&2$F8{s4-Sn9U8)(_r{eR)k9J>5;W%>DzFt5Cx@BBc2y;W&dB|(1O
z`0Llzg%a0|iD^SxCs2>vcF7E24Q=@yzPR}yQ#hOdUdSCRsXv*fEg+vpK@Xs{7_)h2
z#oJZ*#+h|qdl49orb2H@0S<X|P+CziIZg)k9sN^#)`L_{{Wp!XtPr~>-2{=9ylE4v
z?pK^am;xRu2mfrox%&dDtEFGils;s9p~{NaOmnvnmHzaqA~)CgBGQv%W5&!)&W#FZ
zmS0}K@ahqr@lSaduRecNNtxVbh(>n>cu4vP)|Vae=bVjq%9|C)IVM;fl~4tA)O>Wm
zut+#1C=6dW4}YZuK;k|F<1zb*nd#D^BLt%1ABN8<1EnU=@t&$9WB4@~9-@<qrP4p7
zT+?%2u2D4yxf=X-DK_Xe3F^k;+O#|cX~q$0f}VLMf1Qt7C0|4M5P^BdBUvwe9+Op<
z&2;iKw>Lu0LF)C-?x)p-^grGmkvX-yAD)3d#8S`pm4G;2q`?2!TZMK)OqwJ9CG^Q#
z02gd9Fh~_g+Tqj226SK>tMLRnbM<>`eNSzthTMsJ+wKE4;?}#^dt1B-&({IxEX)~u
z^{=4x0uY--AzK1{ScF56ea^ehzx&EvGZlf1^NPz{tD?Z$$k{;;M2P%ZL5CpMTTVS*
zA!u44EKeh|Lxi!JX(p*oxX!MWV!R=BJ(?Ej8s;c*V41|q@?x>!2lo<A;K3@1%)C&p
z0Ns<jHz@Dnzu81AY0S9;;;dg!=q?l7w)TWN<K&qDZX)&)NSt>aRl`e8l4N8b5q{DJ
z?Ky;W4^?BcWJjhD3QVJ@q87b8XO7-dQ?=$*4Yw~iHSBoOOQ0;AH9K)io?hB1+Rb$=
z=c=kaXC!EXm9ru$GO-KdotGV`L7xFbqcuBH55~sp0GUx?*X3(#;~wKDw!wGgLV&!d
zFLIN5IP>H}mqt8%OJIeDVIrT&4MA6*jeqj_>iAVekD7JzyVP5&m$&-2(<jz*kZ~^o
z{_a3qx$?f<XR|b(Lk^YNq^VbU-)on4rPsHE3_!Y4!)?3qCB8_TMm_r5A!?KGmyK2Z
zJg0R!%FUL-ecNof+uP@Df8C?S`B3NcAMc(KgQH(JU7h&c1l-%tdTc<&#Hys}2&J+q
z+?3wzele-r_Bw9_hBS#V0#1w27#)lq$s$yb*l;U61h^q*oPgjSgdg37Uc1vGhI=^p
zuHdT@q?=Pq`Va)V{aXBzgx6jdULIzI`E&mmO?*a#<;7b06%MYFs*bqJjqLJeFi)Ef
zwZoENgs$f*FYMk8GgL|FGyys#@9N^iTWl1pq4r2M2PIG5PpgqSGyvOM%px7;J33UW
zh%{}mjy0;29NHPCRMar#m&p5L03<vdKfvRP#VODy%sE8N2Q<jAA9ONhD9~4WxAS=*
z)K&Usko;I->btLI2D|06?<kELV3qUTW$Hfq-FD~xdNbqiqbEzDzX#V69BUXku6Z*g
zaQfKjx^4ZUIj8vsMb?wEeK>x<xj5A0%+Xg0f`bQ?dzXKB&WFc7q9D(@^3+ydKI%R?
zy4&Z^Fh#ELvN#+1c5r$3_3G&JF?x6OXY}LE$SFh&pQnbe{7lq13eX$P#*hGmcWUL}
z$!_r&6sM?nMwFA3TYU;R^x=v;9r^tJF;L2(<rnE9czrPx=K2M3AL5jNmn(B{Hsw?l
zmtpP`@Fq4dim@oLPYZYY&UCSbt^nKfX6hwCr5MJr2v*`UIb-C+qrXMhKnR~aD(X_(
zml1Qr`TXDq9e&q!9#r`xiP4X`@dCMCHBmb3q#XGG0D+Z?BYmkm#YN2lhh4Q{9~Ifp
zWqGrXX=k+Q&%lm4R%EdoWx1O9d|1?YW$nn`QpgKRs>C6KU;{bS9vpxHgHC1Y>&y2a
z9oCy4ux#=P7l4s6{P#KRC`L{N%4~unS`W@n_P;oL#~@LHwoS8byHDG;ZQDF;+qP}n
zwr$(C?LO`9?f3g;XLe>||7=86L{?TtWJUeTs=TiIfol8E8Y7G|L_UB(X@m?XE>0_w
z4@jg>)CR(&7C|PtUtWMDW`QP;!;g##I4+K$_}$@ot<#U!)CXNyo1{ux(=_TIT2X|S
zx-vBnDZUy&-VCRU8uoV6nZp@82YKz|sZW2oRncC~#UCw>bz@`9^%hA>aMbbhb<c-`
z>GgR#aLOA2`itg+&Bq;ujxwFzj8xAoe}MeA4SIxYglm<y4rvK}$2Z$6<lyUcHVY73
z8k6kb(Z5ZYd>>ic2l`Ryf2ZK3d~{2fvNrT@j*{<&FZj1lBXb;jaf@0@$2<2|*q&W>
zpPLkaJ#u6<a=0%jPMOw#jhD%k@0fKz$GFIv4dT)FfdfBInnosz7yYjH+>wl?xw1Xa
z&b>OEewD;?V)!m$W}b@3YCLa0Ufx)AK6y+}LI_7VEK`16!${#6I6FH+jv^cQ4gQ<q
zm+>!ec|C1IXsnNbGH~S_A<rHj>g>kh&$sz=-tBO0-W!MgzICHRj_rf_afSL<IM|Vq
z##&@O%jo^)AD`ac@21#^&#%jGk)H=8-(8IveLnv$sk1x%)cxQfU#~(<?pZzf9^Z@u
zmk<Td)FT4u5rVDO9*4Q%zXb^5A(AdzZz-}iZ(Z24VdX!gMlR?P(9(-=!L%ue=K;`o
zh8{=Svn9T0b|-LT^z@U*MhE10ts^fG&uKEFvW^X}z<=I^67Apw7hY}PBJIGU!-)l-
zITO?%_+8JAO77dzb_Pc-JwAU8K-i(}Pt2gEAK~~PzV@+P1R%AaO{Gbb&lG+7Hy~u#
zROG)7bK5~}Rb$~&*^2qvcJTP^oXDLJ8NTlCF%57Lafh|U0bd;5hK!O4ukKb#@_FI!
z?mXl1Qjm)1>iT1agibpss7g^VsIKT=vb#n)5V45h1aHF9DSNtX-iDZI0Ki0P+}YM+
zp&1q4`MHeFk9!;~Gh5>`6pa>EPV1hH*{amXeUkw3(D%%QWIj6cb@cSb#*As0Jwf@M
z;|+Ck)vx@|hd2tSO^e8CB47J?@fH>9!_huzWbVvzA3R+Trg6Q?tZNEzU-?eR#SG6@
zcBv6EgRqzrhZ408By?UeO9Jga^byFd{du$I_}W=;q0D3mp_?cvMLDlByHfbJvM?;8
zcN4+qOstvp7W!ch1_c;QBNcX<1?}a|Et>aL;SC|}PX;pq%rm9h{KSJ<LFVXH(ZGY&
z6f>;-oN$UJL3pIv>)E&iRxr~X)>v8B`a9gk5~}cuCU#_a%={|imxsZbrEIa?$!l8a
z-70HX)E$dDwa@m6GheDbS2o?rn>Q<*tJ|ehO6fpFpi?xg+v`qTJEo>Q%*lG-9Ph%V
z%-`dqa_`2Cvv4|H0PHH)cv@S%p(aj;T&7^1T;pi7jpxXr0Etjgw&n=a1mnd?Hs276
zZ0899mvaK0_L81mZEd_xaO{suWN~_MscP3gc8}2>;V)S)C+&JM=q>)jv?&bH7zNAq
z|C<Vj75J;d!HIif{;F{J1r5KYGeS-4KYag(1t;Y32O`UA%~dgKJGQq$33_oOzx?9m
z{0a^pUwal^KEPRXmt?Cna5NWzFT(a{GycCFxEa67vf}J7yPiIZZ+HEwBuJ3=f_l1E
zkiX~v!k8k#0UW?a^ZwT}%E;&x1}r?@TGD=;FRlbjtbKOzh;`+{q_QHU#Ip5TPBH`Z
z-GDKY5BJm)sd<IH;N+)0<}>>`<TAAHF_Hr4Jd|~|rh@@%RMz}t_=<|$m?>m%@6TNY
ze{2%0v|?<R%wVaeM9M@Oj;+mH$XrGIyoG%nqWpNEeEW4M-ixl$u(99czsWmLUXn2z
zP2oa=%mH8rfef;^DeqXYQ9c8zPX%+X)8w;14zTN1+T!9cG0|R05WEI#5>yxFCQb0g
zl<4*?*Q7oJx{rX$tm*0}zSic=nq?C{vCfT{n0+Cy$Al~c^o(f-z4^E2?vhG<bXw9s
zFRDluU(*;pN_a=(`lD_&U?%ddC~K-(bz*GA(1OkFJ>nP3O2URr<N{lhgpG{a+Wzi=
zF?4^SSz&M&uCeSLhZ=N-aT=jHB|RbUj|a4$Cd<1q80s)?MNiA%Yw+CCD|K4L7e(OW
zmaNaKA9E3^vh_n!Aa9^WG)B%)a-3)OGlK|tf{1&#xewFq68?>0X00>fjp{_)GjMuY
zuytga6By_R^GF@r|9<(8Mb^<KhJY=;>5bptQY#M>uqyk~kShcm<4(Dp4g8dXxP*Gu
z6FL6n^y%)w=l$Lv^|g;NR7*wX8*RI8Y_4vr>qbfKe!$*^e*awlwCarn_t&UXK!krU
zFQdygF3`%Pe{5Nf><|2O+Dld|VD~<*Fb(^CJr*}_ET4^niQ2#7!0K3)L7Yg^kDya8
zofG{SY<NXvxcyecr%=qdYkE6r2sx>(aWgA2>t<|}y0ltWQMVhp;r6U}{83uR_1C5l
zYL;2-ch^Io?X;8Rg$de{PcX^pFYmWflOr;;DA31agA1Ys=k_O$b=yL?&`x3{?9qZI
z*Z2>{FGwY^mZ4QNN8?qCawvTXX4)GJY0flH-odGaA4u|Os2j3&ZqmVOj0tG(A@!?4
zXQLqo3wo8mZp<leLRpWYPPjK`p8-xMGwz9%D#MiT0rP1ZAz4$;NiYRPh6n;WJcV<Y
zj9JXMijvHv+6RN%NV_!mz3AD%o2JT~()^4GAleRt4_#@)!pnr6WeO8f<^|qnOk>5|
zl;`+1ATctq0Pt>@|B=s<X-E^6+M>?EmocOr+M1_4$#O(&kckw_fGLT$%*)_^YhH^q
zyUA)h|C(oxWaqE6(G~Wn*-M(^1lC>c+<Mv!4^&Jq@zupep3Oldanz}&bIZ=S*>b1z
z3{7t3`%*YTvKNL8{zX|}<x4ce*~ZWY?d`<LX5;llSzc6W9?hdRz{8BjsS%$R@ith+
zRARq!*`QDzi%_d5l+QxY=vhlsF<mHz|7^Ro6@UBrhlGLv<I(}}c$eVPdMkc<E0IQk
zk=r&P@t`GT*R5advEWi*d!CN@<q~qyJer{pV~>JyhDacEo=`CYr*-PN{LVBX>Nt0i
zMl-^*C#{+48-*Df61ikp*Q117<XkD&?nRlglhELeLPAYU8||`+d3Sd~c1f8|ba3Kx
z+xMs=4RqNV)E%|63T>?G1xYN}0*lt#8$T?loFKE`tHREn1h$msjDss6Z!q2Q49&Pa
zVAzykoQn|vldCIctFiG7-Ag{vb3kcb8ctJZ+{a7HqGc0rfolMQJ^&~j7Wy@BLS;Db
zzb&f0Ofpb4eTYgR!rMf=@2@gES@c7z9N3L$0uG%pj)fr<qtnUCGD#JXylbDPp#p7#
zf}jXxL4-Ec!ZuPG+>PGWPKd5+Rxc+Ik1n}$-XSda|J(0^K)rn|g`@W~$((z*4QjX3
zu<#oM7_0Yu?s8lxe^@FOwm7j>m;L)84?EH)RWynPE0(#FydZR_kAr03cjHIkSh@=&
zIDlde0~Q^B==u%1eenc3!XO%PI0ujs-!cy(3Q$_J*f#B?P=OW$1;i*(0_C60^5;*U
zai0cW1&?u$df)@H4u|yf^0D*K6<{8+bf;?&<E=s}kT`$vT+-qtS_+F9@U@-xCVLiS
zrkxYugSp-RG`iZRUCYS)eA?|RZOv+(vWijEQNE|7&+;+&>J)A}+;hn0y&p{dCO4j;
z@Mo?bSz-AGHqQFz`kA?dkAb9Kgbq8niWG*<v(C`zJ7ei1xoEI`rTw$ON(S=~u>3KG
z6YFD$h!7qy^dlNaWepn)wOMb(1+YD<07tS*VN!XSfygM?RuT3yE>SuBH;C%oRaYkO
zH3keh7CA@IST+?4k!A|VZb(Ul!=0QYk^$FrYPb8VjRC4V0wd!SAo5ay3Tjf%+*%@T
zlM{AcB#~}!jYdAWOuBj-k#=@9U~hXBt5CF#?k3_Dia1R9p{V3r1o%;;s%VgvF{MXg
zytSElCH)*_0CYK0o&z!O|BOblR^OLuPF=?Js2iFdhOL6sEypeUxxxBZqp7dR+Om+h
zLewoii?n5%sG0HZlL#xA^=-pG?MLX4-U~tV^-DxxX!70mjY2=c!G21}1<*EB(K1p!
zu)@$C$#+Q5J;mcTlanU(q~UhRgh=z`0#D!n?7{Trk6<|W_V_Hk)OIqTdTTRE4@Ycr
zu6D9c*V$XqlN{cHVYpK~g}*YuV%M0NfM<evT92cdRE;2>wqvRDps33>f#s$BMp3`6
zlJXB~4M5vs!*?Ogpqq@{`}nRk1RX=oz$@W8zmnVsUU@RzrBYf3tW%=1eT2?We#**)
z*9_`%@RyIy&dY+V@so0mdtoE@H~uq>aBE5n3Xb{BpLq0(&(7_MHk}!CV)b_O*|VEC
zM~_oFfmis;a^g`7VGGwwR3v>;@DpV`pw?9%#e5WdP6VuM;Or*FkUpK;>!Y+pZNag)
zOqi7|iu%xZtG*pN4-I-G<Q5;HFbi3~Rw&09EEG2u95Zs~ikR?5$SEj*h|yfPtY@5r
z(hm}tC<ktPjDOB0>6<69fxyuI=RQvx7BTr!9=RdTm@SCuR(~J;!c^H_4w-6O{#Rex
z3g*>$*FONvY>7rKvEIAS1>pH}&}aCCog&q@`YJucom=>vlYW`}l`xvQv?s%P#YQ}e
zuBc-(bwPiJb_Mz?Tm?8p+)eI->D1q!A;h?yv#jah35QwHV-nf|K|`pH(T1o@4nhJ&
zwr0Ge67Cv5iY$1EX}~>|XBuQPA6^!jYmzM}nrEay$=;d_!2247usugg^qL5_qn|ji
zEL)(<)%j#l)#7OasJqsdmtqx(Yl#SOyjJLlS5}df1Dck@#sxx<ix{tT2_Z<pFJCKF
zSDFa06-P-`jvJW*w()NZPE;5WPia-rl0jrGLoiHiln*DB+;TLu=hH1B<}+|kBlLHf
zn$ec4?K&PgDk(y9v<z9J!bD3DF=VS~np5Hq^Pp!(RE<%O`+L(~q;-hTgOd8nqVXY2
zL0#TH9zoYgIK570pQbg0vL~W6)CDTxGZm3ogGpqT6!GZxHbN`e6FCackFzy!WW@A@
zv6)Gt!9p+7mmFungv6RKbifnZ8y-0FqizsWpuJil%bFWQD1*Z#E=-urNh;^wsd`v8
zH;ikVSG7wJr&3Egc|#gb`bA|gq^i!LFRk76B&}RXMLriB>lo3%7>JiRwM>e*la+rW
zuVznS$Tx5Z;6N;DmnI>EW--t@BMKj$cw{VX`<*AoVNl=@9rgt(hev10OH4=scZv)P
z(N+_+e5!^e6=|1#ML$P9?l;K<ro;Qy@nbX+1(f>Y(Pfw3<6&&-xp3*`)gOMrM^f;U
zbHNnpp#UgFMJhf}6zW;H3cGYYN0c}*C>MC_rb}nt8oh^eCTKZS2f`huEONQ0Q!j=M
zu^2T~3@(ywT2nK6=#P|)tu>9S8<=3C_;;Ab_e>!yCiU3H_XwWTk$(k?yi>-8OJORu
zQs$J8OgSRVNYZD4U2C0S4mPg1uYjqfHJsjYlj%Fl#OMhiu9k4Fp=<1r*4COQhRZ~f
z5=m~D2#SW6#nx+B6@bxQA#Hazo8d9Kj=P;LpvVDYlGrCmCG@Z;C6bIMosbx|nQN*Y
zR9oINNUVc$>8=@wnUL2D)W1H?1Xxkah~!o87$5~RRjxyK2Fab8D{OKomzKc5(xuby
zgk>HKofSQ-`{jmmD6$ERqRg@(x!ZuAlLV->W<1lisjM|AX=aavp9ovO7R0wI%~(q&
zf)I!ssaqWs(6CE`<H@It$;S;u_-BLNTd)gpl#jrML|qN=3j>&fAqNUbXV+@q>7JTZ
zf*;(%pE7Z<sr%$o4@sKJ!bFRN4d?VUG2actPEAVAH}wT!R^Gc;o4KjeUF2k;sPG*I
zOC7MOAtvW+R*k9w#IC{(hEXzt(bmO$4%O3tt>joREoxqXuN$mR#ZQOC!i2hXw9L&=
z((z71@Npw(P~W|-i*asaSI|r~F(?BCVG{~$mI6vr#eh$P>MO^glf>nLmxNI$>?QFE
zqxEm|lFA!>F*zY@{r>p-6^QD%7(rzqI|9AWT0&iTB$d~%1!D;jnopkN^zZaMoIF-}
zAaN!E>+IQg;tzE{Fb;Wkb}9md1v=;P7?|H1X5Dy+6aaqX3k`b>V_XGxw!V4AzV<`U
zQq2w}>-vf6=z3%4V&IQ~NC&?FBn090q#$^2r6dlLEI`NMpeU6qJfMR7*kgb~ZodB`
z3u%<XFTe7k%EK|(p|Nw^-ufj9WoF^X+Jq)<?jO0Zw@>dKd3m+NF*y8HY=D5t0whg^
zi%G1reRlvMrv*h=wf5~pMF<@}_qX!_Cop`4D%RC?rdzMf9ov@xAR`<Zz0?JOBoo+s
zu+my5*k{Dw|2+J<(fdi0Nj?0!ys;=_w*@Y1JFqBgOT;0kg^S?QKu30MLqvfDkG(Pu
zh&qcWOg~?mk}O34MJn`1{YQioix13e#2xCQqyU5fGB9VFxD#oqW>=}#M$r{){dP-k
zE%+9Mg;=3Rz|vpX0p*4=kGOd5lwuK;CweOzj$B-ZC~c|#gJD&>yQT&BY#Q`wJV?Io
z_vIwsi!mh!oZq=34fraA=|3G{nFYFfOa3OnX=L|y2srb{j`5d=eiHi-WY2%w@s)VW
zLv0)Xvz`RFk_5PzggIvvYvK@fVC9tC5X!i|pSA+Xur9zTK6mUTkr@V}CmwX`NhO$~
zcqhp!UqsDL?1}IjES&)QNe=yQ4A;$QzCn2`9(0EsdTk8X+-TknI!Zy*FP@}h3`lcm
z#=k@m<HNXXAWvBrp3Vpu&7cY<)_}vuLxp+T;TOtIn0n2N_6uYr2viLKUDthj;KEY#
z%}j{^+W<i!4*(`Kk-4#v+2RZUAY6&oi0a}({zry<M;7p2FL*C($A^i}&2T2i=|cwg
zS`T(R#Lsu$mrcGj3f620zaCB)z_{nU(8n1bz~;9k5qe&v=X7vKfaMoM!a3?@nhQ5w
zKX_~ed|VYWrU4q$2!uxF>^mDa;Ep)c#c(7ex+ZfWpaVXaABX%jkfKJn)z_$Jpgfl~
zb%MFZ49XmO1I3sWkC|ZzNW>gZo#dXeF0t||A&~-Qt&MoqM7uB!bYc{1KhbhWH7q?x
z%2Oi&Y@tYyfn-<G3+*&cMLMsJ6+&wsOFM5E3DtOXO)u3;Y(jDC#xX=bZbs52{$doR
zSd^}DCyB%h$%S!yYpI{Y4Lzl3E|o@7Z|cCG(2`ZMfLaA3U*P8(7C;8x##Od(k}qi~
z0^;L-w*c2-1h1tDT>cbcS1659xMB{#_-7<e^b~20oNA`5)Tzq2V{e_j?W}1LU?;TJ
zhc|*b;mel5FY^fByK;d*Lbw+o4MmRP<Mz+DLmkP9O}0x0jt2#>M>tW6nJjF(65YE4
z6#V-qQz&~ikfnoKFbgOK*c#!FXG2;fm0l7YAz=NQNOss3a(FdH^Z;gT{Ca{x3gljG
zCWPV%s^O@hj*Ytryisv;pQ^7E0;1;pnY=JmlL{Tb)-wz4*xl1B(pe5p%c)8o&;6S|
z@C^aHe`UTWGdU==ZQ6}pq-U{^mz3pe%tM+VQ&_iE_C+Qp6lT?y67|ayj<KXYi*oCe
zUXlO1(G$zk*ny=PJ4BzXv=7w0YzyZyIXNZZ!uZ<z$wp2>2Ilhw1Hj?zImI}Xg;6A{
z1EaeRVK|oJ!G~1m-v-wTWzE%cMRP3K<cUj86-wKY@&#yiLc(tT8H!#ZFQDtq?wy|O
zOc+Zx(HT;1Ob%IL9$L8YD4dB22drKfm_nm@0V-F~m}ap;Kc<6wD$^_Z6wgkOAql(5
zCoR!w;yc$wJ*>n&U*N>Pf&9hVy?+%Cq}w{{{oATSUBd#l(rE}e4WA*+r&LHZ^tu^V
zhynxaAFCB6Pqivq!@8>8&b}IC>7T~c2N4x*0j||BHDwVdep*b0R29@-WWb-a!k9J#
zsL3=|hK4m|lzjD66%(1RA}a2PYRczDtmCuhF;!VAUK+;1D<ty<f@5KNSBx}vqw`$-
z6WVc%d9EjlK+k=YdX1<ArYl~mfosUX1pJ<rF#=3@fj3_shH)L~Yq+jfE7T%aIqZj{
zqr;!I=fL_T^3B}3(!H{#haXRbX=u|H^-P0O_J0jo=G*!SCl@vwiMzeJ2bIx#IXGT@
zf(p*`PvVbUf7RNRL^2%`V|0%qYFM*~oU-j!%zt&7^OVQ41BG-S*5N{}D~E8?ts^+<
zc^_;u1?CW%SO#-^j|l4aW|hLea=FR>fUJ17O^?GdU#GjJEpR(H_gXK@y;W0C`fh*0
z)z`nreYX=Xe!CtxdJ7XNEL9zYK0gI(w+|-uyWj@fU1)Wiq-Kt9xBF2JcJ)GTw^<4|
z^4nadN%p2m*ZYz?<iJs{B7TT`;BZbpv;k~beg{sQ@nM_TuZ34Rk7rSt_TuH?b5THZ
zDxP|9bJTbHnA}P?dAs(p$$^nLCtScMTmg?dgT}`8LbO!Nc4jze9wN1gQ=UamAWR>b
zOYE6V!EG7a(v(1SR6Nq6TT+x+E*6E<SVGj$x1pT$RQE)v5i0Sr{LcsX1|n_hEL|nF
z%Y+t%6qa-I79#4k`2tVnMC82~BONbn54jkYp)??Fhkyx6dkvPa&dS9>Uzxu#U!uCV
zKOMNJ$Z(Xtq^S`p_=;3njESotr&`Ng29o5fxts2U6Q5|jP~FX5C?3A>c)`bEC(Og*
z%<Rm>g*iPTz61RtvMUcG8OhTE`*NqLbxr!cK|5LxQfcGkc_``3Ls}c|q)kq&>Mc+V
z?~QJx!vEUbL;cC<j_B@)BTABv{CrG&{jnU;)j!?0K~sEqUHyyM&S~d7WnBGdu0Oxu
z^iRCL`}vCVa%U8hoEKpmspA#ad|bHeL5{L=OP6PliKJa^0C^g#9<~}LdK^K#e`uP+
zRnsDrc>*EHm{4ZG6=bez5R$T`wm7C>nAz1Ki3c0O+nK-(jnN^?Xj7pyK+OUP*`{^^
zS-S8zD+vg$OEeZ7kL-{GLz5iU@VdlrbRF)j&@EV5L#&>#S%69sWl3pCQrjrNp4dR0
z8Ipe{R(iQ~bLc?MDb2mz7bt+9duzC-G+|cXpo5rH$=Q`JfY$NM4ABQ)R|kWx9YI(B
zr_Q0e@xSRDl6&nsbD3p3fo4FusvESw86N(0m0c`e8cINE=5xmGr%FhAC)WrjTtVxW
zgi}a<4JI@9aMyksjIYxE43--}T~TojAP!RiD3-&Gf8q?^4Q2$L*2x5-a`ALse0J#H
z2#oI<)!mGmT@@3Vlwh%~LpeV*<V|<rT>ky3Ym^a|r2QJGYh2*0V{)i&XS|7WH{JrA
zb?IXoMffmg8f#o4XbmXo7o92k!`W0sK7o=$eti;ct+WybViita=Nv=v2~0-%$8~4Z
ztZUBg$Zz~pwBkAGCQ^TqePipWk3Tc*$a1+(dU|2Y5DeG6HQJGToq49dXOQE@E~XQI
z9#bm?mv{rtzdaJETL9#wGB`XW@odB{jnA>@OooV&ic9!}E?{iSpy~|0-^YeV{Ymp|
z_HWbK?{V#Ly3}F~`|ZACRsu+0jbG4=BmAU-Uw|0fimtk|$_pxni0~@J=a(~ws?FYO
z9JudHy$$bUAc>@Oy#9svFhS))3)+~^dKjm5dq+rhkPz81jjd$??!QB;#gQxMy$feo
zsSCCIoC1`YZ6#X6h&QXI3vsDTrSjK1+<0#HiV?zezcgxCIK_{<vJsVt&VQ|HMpeEJ
zgs_oQ3$vHqilgCJLm?)T`sp85E)thjlPWe1x(u!>lYZeP#o8HiV&`PR%64(3i|aW>
z-F$IxZs+7fNZow$>Zs@_N;Fq#I+x#5B~TGbY9?5nQ`SLkNJfo%UWtP3!Jv5G26o@U
z5q%jsIc76p(Y=fOVYgEc^$GlwA&h%e)nji?X&-xbeWO>_mh9-u%x_W$($|$xW`ILX
zPd}Kbnmf>%*CH-+CoCm!eoR|G2aV~Q^|zO=7=F~ckwd6>{4DY>?g84cb<Mv}j(?8i
zbbL1C2_BsxldH#E^24*Ax+1REVd3JCxOp$Y{2Y3&JCkM@tN;#)Gtjw80_S%d>%nu%
zr91}UF}52hXqC4}S%jDA&;AG#W|EP+35KjmI5qzF^DYt57|#5TJRILq5?Z&5{eYW^
zKW*ufB2Bp&T_G0BhIKHO=H4FmmHZAxeaijxsY)jP!oC9ngTmE-d|>TTWJcZVBbtcw
z8DCn`-hE#>b;jW#2w8`H^5-*fdy(|*kD(5n9j5r;`z&;ZkALvL!*i?#PV)N?AdGOc
zuVaxi5MCIQTx|xfvEmgj%v~SHoDz7#rRyh1=zh>hNnh<VrGtH@gNQ_~cXI}g0gC`I
z@HIoLLC^VfJrT<{v9@6ddKl@)Zipn%)M0&L)nO^ZHP3xt$nW34G0^WoBUk>c-z*yt
zPed}_62U%m(2vu4c><K-Ui!LtDctPsJ@wg=CS{0A2imoVqVoiJH$RYDIm=PI%IiDj
zYy9Mi`^#3twl9~Yrl-DCq8C_ZqgSv$j9yeIm_0xu6C)goFdGjMu$VpmAuzj8A|bW>
z@sb<Z`_}Hmzt{XiA-2JUgKBx=+w5Z8SUxA+R$L*qyD@7p_X*c`EsV7FkO$U6qSTu7
z@vD|$=0;uBN<r|^tt9u|X{=%`%W;aTqzBgB1cPjIar&e!?+dV)HpK*2UfNU+Q(f<8
z;gqQMPhC5{#gDkQ!fYNpn-AfFHbc+f|Aa-1slaRtP=zCJGV_L6wG>!qBCW$ijSsur
zmQ_I>51!5Ir&X7t9tFD(Ju_1hEX@}OJXIu}c7aGbHVcO&N;vkrr^e5R*vY!Cv3F$|
zr^Bn23Oact+jgK54Gu@uovFb7T}^oFYdY<M&~(trL2Dq;orBnS4JU!#zY6d%ShCTD
zJd%PKysBo%FR<=&F2tZ)39fGi`ApOsJxWSABfo<ox<OlG^k`y%+4{i+vhj!dS@!(%
zI_vR&oAU;K)c15M(|iEmRxtbD27!b6x(%~-J^a_WIv&m}^#}0iqRwDa7~M)pi<`aQ
zN?1!%{hROFXFcEl>$I_?ZM%tx>fhAC3aMQQ@uvyglKf8#4Ekixx)!#U8?VDwkJm}p
zgLjOwyg`TLxVJ}g!q+=F>F1r2;^$dK@f%BC;+G8my#e^!38wu2tK+h==yO#~;;pte
z;XzmT|Cj`}tmu<1FX3U=?{{Y4-&y}(9nQ*%sNL-YdxWV40qzPypcUU6MLzBd{9fwa
z|J+)O-$QHFd|#43cDHqYCP(hoB5t)Rd|y4DU3J@kwy&RiO|X4u>A$*Ct9{B4k#u`r
zwpXvcT`jv~G1E(6ds>IzLn)4Y``<fjw|;ISUv$xeD!>HBq>_VL2;kkzf#z9TjOqT;
zZrCt|)b2#xZ03o&ZZ45$o{m_iQ;(dCT(TB^u$Iklv$9y6wAYh-)M;KU-RVT5ZL<kP
z-lUk@Cs^xPjk_@z>$q(eiM+_n6?7VFw(U?N4{n+;g?wx}vL045vUFLq9*UIPj|HOU
z!G9X{iFEh+oLH^xX!!Evi2a+(9T(zx-1B)8cmTc0<enSduDtw@cmOxVGBdboJ~OC+
zvA=&tX?NzkVV-rIjobcYI~3^lh|#Z^+lvC5N3}sHh()J?@k;V`hdY~v>rr(*xj&b$
zAm`}B5hLs1esCVulRtTbHwSQ^U2sT~i#FRTHwz78Cpt4hkpSk}Fv){`n(lk~Uy1?F
ziR?{#sNPMwE5pzG6xAWELp;oxN$Tj#Ke3N9I5%JRwcU0KIcKGu@kQ2=v>7SckmQ;c
zir>AY(lIY<shCz^(gJdKJ9igr>SQE-2{H8~tozMsSJ#@dFd66WS_e&9D@=)cGnAVN
za|sgel1_hgYt2|ijAi<0bZV}&Hhs1~R>SJg1@VVep9p_L4N)$Hpyf^|SFifIGp)_c
z!7*fb!xBo*ZZ)vP7rH-#zK6dQ;Dy5YPcD5-(@gavP<$r&xMywb5!1LFp<w4em`h8~
zX9fdB3l=T&sDjhLL-5m1%gFcuaO=s`b;W!Dqyza7?k^nET7SiAy$N8bc#6<E^r!cv
zj5e$!80Nyhb{t7RCXpo46eEPcIcuc!&@-}w-u@z>a-!x@Lkzdq!@zKW{&nu$4JgpB
z8fPugr*oR`*&YMFe1*HH`{By>W%Nj#!?yty_EQd4!F?~g1(CE1IkNe3$#r{a;nCyZ
zIlWnY(gS}1T>O4=s82jE+$2%VONFQTN+AYEb7(sfwsed%GLY;KFb-@5C{EoGKV*{H
zN@H4w4Go>GCKrEkydTMX%hyCL<<L;5KPUu<mV@nQ$UmQ%LoXsuj9FvvUEJNJ5RbwJ
zLhNPxATRfgqN@A?1c)Ws9r)ER?DiVsGRDk{;%4tU2E2!Mg0+KnDRnHwqwUJfpu12)
z_y6d<{zw@9Lj}z=&`^;OVJX|^PE^NXAPyg-Fmo!%SCB;D+~QKc*B)HYPrqo}*4tvc
zy3hG*#Th?razZ63Zi{po$@p4XBf&kHvy60%b9P%y939kCM2RBYkjApyRIXvz90b%C
zVVRers*tS~l>OHbb4leni#*l;0R)4`bRU!QIZK3r$^66OF3((s09QwQEYu_cE0fwP
z<6jmkb{WNq1wvy&&!m95JBXw;$JOWN$KR|qa=RzR2<&bYf?S(Z_*i_p`$Py__mobd
zc`PoJYL~cp1B75EF}kxuE7-&L`A@^dm|W+9J2|NJTyWv*uNuBv;nlH;(mxgl1n|+q
zGCW89@nz$oBX@N)DODtlu#Q)Q>D%|>^Mv^3qi$}T@PWuwXCXs-%<<w2={z(gjmJ-_
z&b$E@VA(yp<`P-;kbzRAJU{;d^GyjVvSu*Z8Le(j3;17!EOjNfP{K3shh*Z_t76dV
zQRDW$J+ih6QbOvpx#@$#!!-GP97a(7P{`>Igv>l52{Gi`kp+-U7x_5`tIz9>&g!Of
zo>Mzq*dH6t<}deylLd$t@ka77gii#2AICu~<ROdRHJ8Uxq}jRmAF7c7sW%>G%KnRL
z((rnZw(UT#V;MMTo5NuVpv?w-%kZI{f>}Qg&4W>mP*Vm^V1Z~`$R9}|MP-tisGK_r
zfRm<L2r^!oE2_$u7!Dw<A2-O8n3hhw@OV7R;;m+rn|Xu(Rn#3`EYnZI{jteR9F|ry
z38~_U7(|R5mROG18RCdYACONajsG_7;)Qe%ME}$kD|3MS)#u4uvjY#E_fha--zm;R
zp$r9gmCafw*}(=Q`J2Z*N<3=2XiR9sv?+)jh3;8pxYBkUQ4+a>JHJA^;RdrHt5`2W
zp^zzDpelGi7&+W(n8K=4i9*din}A1b;hzg{k35q<1iGBv3{5^sVsWUSFEvb0>_Uj^
z63~Z3+ORu&8cW=|8+g?|#dY}c!C!$#N7`1qBR7Iy;)}KGYd_N3I$K6TEex!v7I7c9
zp{uvsZCa=babVjB&?Z1WloZKfj;zXPQG|diiBzfSX}vlg$uJ`&)K$voiX{aF1tF<I
z-N9n$v<kz}(X1AI;WkL^=Jb&is^-GkvuOR%!}Pg!#)XF!uDA9-QGJ0UQz#Oe=>x1N
zXq=R@I?e;oV2T9SOqBli6xS}MR-H0RLVZ(c_-uBkL4Qr>Gqi!n&`TfaLi0du_@+^;
z5J0=l8~i^g?od?6A6e%x>jh~${&$h19oy$C4^EjB;&!fe8{sx{`(kzdZx?FHD`Yui
zh<RLLjHFgjy99L%*+au%hgmxDxj+V%SSN;Aat*)WzV<lIc%NZAqkv|0ctBE%6p*Us
z(?+iDYa!f3DX@=0J~i3PRT!z|reUS&gYInqM7-_dIW7+BSf|J{Qlz9d%Vjeqor^EF
zLNoK{-R7hYAsKB%^reP&Kk2O&jg2?=S9NTr3rXNFv5(5u07D#4Of-rr-vAj<nefST
zIhg^2u3o7#Q%0+pq(+)?84ZJuS-cb()of<%`C-!Ku3ia>S#yHn4>$M)*w*eLSC0M`
z&e>ji$GzX8$Yw<qr{3I1j3HPzEKGZIXjV7u=ECM{XEF<)t3dY6oe9+H+xD}+C6g)_
zd46iTWy~v=(<A5SkBbV8s(97^Ve_Hx`7s9+r)soX_L>Sp<@<R$qydG7eew^hxAm}+
zB%LcbI`L_^L>c9T0JNpIbmfObN4h@Em_WObN4xhKIo#HX7=zo#dVhFACy92Rg|L=a
z^#cKJLkhw3KlnZ^8k|nhFTU^d-}t_ji#g?>L>WTPa%t6bR<HY;VoyeG{*Y5bspBV6
z)cfbm8*lLA$0_6;h|CV-NjcS{8>&oB1Y<jv>imEXH(frooa5=E0ps_e{~PJ?+vlLh
zH-opAaB|mV$E+uxV+B$dP&hQ*r)<`3R`#Hd9Zi64d@Xw9u+Uz{=qo`GKG9HVutkET
zS8N14mYOz-3R_#L714x(3@SBVW&Q`#BM#ls<QRY)7hc}99Rw~USev0%Wg|`SS$I)1
z-~7H^-`H+nTkMojG=);l<F)-|Kj6hjD<pMBFz{Q(cF3c3m(vZY+!a4djy(xEaR$dC
z*?)icwQgSP9Mlm<|N5vXzmY+VD!}{%jmTpS&hsL0DOT2feVc#kCH#6Pp|@*LT6p6J
zs+64mpG1FQAcFT??+wd7EQ>vE201@l)?ynaf?~oz9S4*(W|{><d6(fNnQ?S|@W85i
zW_7)u#6BwECl=S_=s3-dv2OrA<RhxoDHTj>CFExN2~!-6em_qPx?CKPR39vLeb62F
z*2tPc)^D~v;%5VZ1gaAFW##5wJxCA6u;yX2V&#y-H<;%aPnEk%B_a)BPVZxohCbmg
zlUD&NPlZ8V$obWO(f@HIahOu+GkU-|H|(R4_Aw|Sb1~>Lk_{`L`+#GwP}<?EpO{4l
zd_^<Ow4OX0LW8}ARsxso4T*ns>b9riA}OtKOpihW1jfNt6Yla{xLKJ38{4u*kzp7_
z=HDH{8WnmZOnVS3>aX6VR+&wt*KGgW4d{gBS1)XCyagyn7ti|Z4Kxyv;?~#3=Ny8E
zndmK7mBa2<dJDf94bnLJMX&}pgqqObA)|zKiBnmNxPo3=Z9j3D=#pI5vk}o9`7BP!
z(xO#AK$y9_!{@WnJN|TO<T9jF*(~u@<fYZ!UFpm54YT!=cI!79d5Y;XS{r=aTIJl=
zU#(uLe1`LVaBEYo`WdSAU*(oAQ@VXox($Y@z6L350s&?x$@X4nHjan5UJ^gQ_<jyw
zADvu2-Mc<pTEPu}+4YaFAG4oZ#e02UTZVQ+UG3KgXNL5tH8uF2Z$EF3?`&_n{+C(1
z8PnZ~yK8-z$8+;3pRzvqR=yE#N7NqgA6D;!a?3ksc;U34Yiw`(?{kiy-5+5EQ?N+r
zd#~G{aMF=}!`3=4A2$T^tl_}db6-x~lKP;=dqZx5Phm*1mq`hE|4#5;J?QUHCHC*q
z%g^2!zH#ZJ-R#58@!ZkI(+|(l57$xvDZi4Rug^;4CVPhu>l<ttKifP^$egP@!rz(&
zP>P#er@6v%6zhEPl6PM&pC}ac89}Fj_KWr+S}er)xWcpF@KLjckG4n%d%FQcKdWy(
zm)P4sc&BHViAqsN(XRWu1>SZJivI4V44Av|#P6WEV8ew|5T<Hlb=<G5b-)h$1&6Pt
zXI|9hNx5NneCJqjN)4T&muE8Zed^<tiNRkjr*$j{MtPuEQ{$ueNHM%!DB@|+KXm(>
ziCV7>oR(E7lNR5%9><c^*vCIM)73YxKdRqwKihAyvytaV&|Mt75UfI}klg>vNeG2i
zG?tVjKR>?0olV%y!M(}ue51+EI-5F;`UzT>Q{<DrDMLN^|8f!#Pfq_2B_WZns4Qo8
zXE=il5V965)ih9)?hN8M7<Zt3t=&V1dnPr}&1UN7HtB43{D7(H!{666t%9}!(NBce
z3|Pp>)F?IPAedgrG3q_|bd=*!1|W30R*Cz4Av_yy<KXCv`DX&f?<{E#XZAZ^-I~p>
z{gXvBF2`}$5?A)yqX13T#8sbYJ^cH8)2lw}DcsdvIyUQa{b?2`qIwV#d&D1H8vFQg
z<rUX<+hnnRF^l>;ZA;F_3~5H+Y{b<OBMy_9G@)#ly^yFG{{u(!4;wDJ(wd-t{-dsc
z{@gy6!pBSEkFe-tZq__;$<BqZ=LKeeGiKm;>UhA7S%Od^@<ieP<i?fQr+c}*#mb;%
zwc|YOIzE57amh^L`^UACJbvWoM~{TV<am5%f7Fn<NQi7Ukztg^;-Cw`1$E-9WQW%~
z?`J)hV)4@DJBi!Xj{H@HFir6@(YZ&-3s_Q`qcDZnP*Ew}&H-5epHB|?LC|4RZP+H>
z7A^E)Zco>@&e2^d_fw**GUL05>Vy*vAOCQ-Daggx1yacXC6l+RHVD8sT7jU$M8Rs!
za*yPH3U6zgkwTi#tYhxVOj+2%+E8=d*iO4N*CL}yv_ONqW5{uN5^d&j-b!}WQrnYU
zU87Beb(>R#QB0X$;O-V26nck8NI<TP((nQ0^&p|$v1`GnulM#qAD}OE$W;CkPIX;4
z7_XzJrQD0X0}jIjbqgdGiTb^P&%cmnKGbjW(y8!#!ZGpEDiS7%9hVDT4WgsEYx^In
zhePtWLvS*&C6QOTae+uGvez3RHL0@6HgM9cN{%cLa7Zrb@*&NtA&;vMNN-b;Pc#K9
zFl+KZn|md#{n5+k!#!$>oAS|%BsxJ_jI0Y%wnppf{1gfRV#r~)brRaY>-y5OzJn`v
z@v7E$|3CB>@N~6$Hre78d}OWgo#_bD;!}tNBYqknSEsStnZOgusf-$*IQw{+eNXm3
zeNNTLrapXcq>FlMt8wO|&JWwzQ{}A#m-!6fqaQK89vn{pVxP;o#gEx+YD7nrNu*iY
zuyZh#lFLT~B%@KOE1lC74N;Zea{s7}D6>c}<2`X3R&YMg_ugPCWy?pi!%CO?wc^~m
zeu~!G8%m!zL_E(#jdRCOX;aMqpnyC2X3J<*`O_L2fZtOEk>mOI6aND=xOjMLWT#>v
z-}UKHn&{k-=~3XJA0#W1A;Ti0Zvdj9lmB~1QY1hMSL5QF8XR3kUd35TI~He#8Jk1&
ziT3&Od@W%4fTCJlmcKvHFm`AJ%n;;l^^lvfFGuq(Q9t{j=%paZq&AbgSE}FXnrxU`
zmxTQ4Y<!;KHm<Rwe+jXpAa+_~cl^dkjTY{vO`WuH7S}C5(n|FGO#ZK1oqQlOCFY8O
z)71<;_Ia75zJrlUu7*X`T+*1fvMNnc1DdL`A<0>%>oeKck!^tqk18>8dAU72XFe(P
zoszo#(*?!5^&(}s?H(#J9uBP#hk9dmlxxf%?W^*VAbtLs_ID-2d98F>_2}7oJ9+I7
z>T!jlJtnO*Oyl-C4_%R@sa>Xz`ZT3)O(g}H>5#d%x&|#p{FYCSUY1JR?GCD{9e2kp
ztxnSvtDSN(Hki{w<qlE`X1*COSSS%|V9pn0&ZA?ap?0G|Y|U)mwG|s4Q%}1b7=zfV
zPZSu4%izcy4WCQ!t6jNMV25DD<1iSj43d$AfT;}JJ8rq{yw<VcJnlv8Xu^6G%*Uta
z(H$`B1RbjC;Y+&TSErGO_FyL`J_;Kt$ik${tF&sTh-snd(&i`!-Yy>nNvPf_XaKqZ
z>YOvtbqrh>#u_r8m1&Jj#%G+}1>|KB%`2-8xCc>%{N+H(Zj)D2NDPt0b(=wj)}i-|
zLCrzOsQ?8|V-F3@hc;&&+y}V9epChPN-zRpyTy&0G}q=EH8X_6Bc$L?DNtT6DXT{j
z))rTY=5u|-T~;uewM;fbP0I5skg^+)+M7*8RVL?)sl4P2m4);43j5*>Bc{F&qPLQl
ztqD4&NKnyW)Y&!{mnO{um76RP-116ZJvN}=Kp%}KQ6_m&cQ~jNH<8AvOhMAav#jz{
zcV^MSiB=S0h)Kd9O=~DS=;N>cGii^4M8ZnW5)L>1Z)v>IY_3h+@S>|#b;w>IVVL90
zoYZeL2d`a)Lf~D2<_727{2se)`1{-1<72uVXI6gw74sHvkDc`tw=SKosB`@K2@u3W
zT;Rnaf0c-IxG`De0Z8J{d?`tf33Dvf{nT$V&DNnLPr;|~ZROVsdJ&^B*gxHp)~gAG
zj7gMA?b<&PD{gCn1GG+@vNdLj9B`^4P>}5iIrl(cjD35Ik8utR34LNes&1d{VI3ED
znMXma_gv;rWfj<2V|)ByrwqGl#rcbnqz9oDk-+Cl(wG>tOH+;+h9xD&C91gaW@&-l
z#l$a<ZaRZUJ(L`j?1hr*YH}|+0eAFdoVVLv9Q!_@vO(E_FHVwIM|5z^6h2gV^tP^$
z$n%S3KQ#6u%0+NsmA`QU<JxDm-giTx39s^Kc$DCr6@Y>H#P0pTaUT0(prDEDN;Uo*
z+;;zPa;tbuK0caqdS&LIhoh(@{uM4FvUEZbTS$Xz!%=fACeh!>LMBe1)CY!{IXSu=
z*W~H;OB@VL9Yn+_zSakHvOyr<J-gm3c-}o?mDv+7gJ(|NNtl^_W@>-&?|Mm}3N*X$
zuAmME{!uCeU9NoB6i2R5+Y>iF5;;g-7QituDucDmx0KM#m#CH(RSS<t>uwPpw}l8^
zi1%7amHjTho6e&lCqZgt3rsk0En7D(eKMHHY&kG9(6JBqR_Zo~Ry+wWZ=X0JTr)h>
zQc$!amxOR;B5|3cyx3_HQ5F`lQN|)ql74br)&}#$_B_<MywW<7(Tbz2cRQrv(%+Sx
z0u@m#-*t1Rcocy-{-YEVou5W%Fh;4NiJ045!z?lsr=rBAsDFA|z}--cGK_DHly6Xn
zq9DEkWUVg=Su706GQy~hbxdq6(e{t6>7tu_e?|TFep$CkK1s9c-Qgk#sw5%rVgi~k
zEmWkxBZAB;0TNA~+F0k40AAg7eUkxh{^H5mcAQbze_z|aTZK%s&MHzRGCV|Oxsb@Q
z0cci#5c2^-Rh;E2W(Zc=nj(dI4J7d>CK5$=y3n`ykQ#1g?u$YQT_4bsL98Bsw}C8q
z3=jnV=#~7&r&Ma+p-bq=bA7fGJbZEgX#CC5;Umbb`iKHQ;LoA#YJyG}f5Ua$$%!jF
zy&%!U+GpA5ewd}SST$Q2#Y}Bid=O?>bA;?)=_xc#A~x`5GPtUEC6*MBpgUW2qDTKC
zSv(Ze2jaN08e6h_w1rI#Nn5g%CKE2Xr)%OZ;fCxg&{u>?n<EGHmv~P*^*2~7?52^k
zmG(-Q?*18pY(J3XSLJyQgO#kO-xAi-`3A$4pfW&JS_$M=ZH0wUTvJEtgzkE;{jleb
zaThjJks<xta<W7k;|TLi%E&im7o5Ar<{99&rS4;3KX+K9#ULIDqI2LO$mfUPd2n6t
z*}l!@)mw9y;&_jF;1nK2rl`dsT3zGI5652VNtibF#NL(1XNfHvi~2m$-iId^B~3&g
za8<w_#|0%Z-cJrT>B?kJ)k#dtViM{UgIM3Z2DhJ=V_RT(mNkSO9s`^=stSM*yMGGk
zLcRqXN~CKNGgDqAcV(+5FhEfoYhl}?6c%fuREuy5mCmUa{r;S>SFxCzJtrce1<C~m
zK?isTP(a|BRox9<XqG8CL<+94b!SM(HgRr1=*NY%b`-HK5grPEEVXSU)r5;2pFQbH
z?Js@nu4eCI(KFMmF|8H3Ji2{Ro-lv7#HHebDHS5ln2R(Z>xfdhVFW-ORY>-4ot`_%
zV2#UQ4QH?_vRN-FlN`!mnP;*_Fk7Rr(x}LpB5c^pU^DsS%oB^%FkFW%AYFm%KAGG9
z)mKz?NayYAMap7*O2?P7^B~7^0%SuvnM}Ty2mHPe=G-hTbB1}2+B%d6cS%tG8<fE|
z>jhVtse;8K!9>XHL#tNlrdn62Xg6?pk>){C5CxI5DyvAYNJYgs;2BO~-jz{KH5Q22
zm+T5fe-w=5Q|9j8vNv2Sk_Hl-7}0}Dc&w1`Ay|9dmDQ_FxTP>HJns>fNmbHS?7yxD
zb76^&NC8=<{afBTtcIEm6v|DJ)v2(-LkMMBvo@@$9ak&<`2FHgu|jOJVyR%VQ?*|H
zLh!~TAR-fEt2qLJ2W6?k-4b59JjVrIA)z^ho<$Aw&!lsvj{HT(^cY}~OkNWpE&^3$
z%19d3zG~ox!{bV@Sl%Idh${`0GOMQi+6PNYgpc7&*9L2|6<f=<RDxY2nq!Svagjwu
z7U8b0D4j<|<^~~a)9PJ58_K@s>~@BVKr#8z53vL(GTR1<R;5*2-hwE!Vuql4bW3_>
zG%Mx2bsN5GAt;F~)B5^>XwdRBic`|w5)~8Dxb`_Q_bKK?naKFL$i;5fSSN!|3np4Z
zgdv26nh4vHRuTkjC&pm=ehtSnGRd-49%?@|Q_frZ06SQi9rUNgn$+|Kj;#SOxs3(c
z{u*7Y$-NDIWaC7y0S5&LO|S)YtNfq<dA@(rf=AF-e)$pwrLIsL2WFf$3)tuu(6}BS
zakIbHhFxQ0A(E%z2#xHp-U~{Xhy&*zAu=^ec_YLS3FGSGDCD%T?I2NuVSiv8GBs`9
zQh_0H80V%@D@H0(sRUxM1NNCe$INWXvX(-}SvF<Z>n>MekdRc#j|7S{;}geOBKV5Q
z$&+to1@MPGsv;!Q3n0#t5|qVB4mEkvsqIP7pvEz4q*?h%HU6|>0Xy|A6wnvv62R1x
zhkiPA-FZDZobvqXy{QY(9ZSgKO=A>H9I{Gt_EA3z%d;^bS8cPT?XwCE+FW2mu4W2z
zh+^Rzs3^w*B<&(%Iw=7KR%X!wjP``QB?ZS;8e|v>YXCYiRIi5evMRKvVfX1EctAc%
zvB^piNkkzB<aOHY^L6>+9P~yuMOlo#@RI_8(x_M#*K2bzu-%ZX%Cb}EsO$3PN>*dg
zs>@jVLN-lV{O<_I%IsQ_rb5Z$9QH1%5%o+wSQVI?iPU1FA+oMkW}_iHQyS9hFg5OI
zTpME=Q{&nbQaQsw3qfLK7Z#=r0yu1XaLdax23=JC2+FK3G%}&g5$t~?=Lw?!j9O0Z
zlJ8bnh1sgG4z*HZ9cU#--_ek{^ywtqwp%7SqH!Q9vkkQd3GN%g_6(OhD(gsv5}P?D
z6~*)HLXj&qAcHh}QHiGVmsJ`cYS}L&0ev5v{5CGaT<HqoC<O3z<`l-q08kYd%i0fl
zxo#R2Zjdu4G2v{7#RbGPpduz@=h50VViM6KnBeHhmAj4jy3#r374Y@PLvx9VC_&HV
z3M8$1J_m);N8ch!<8vkB{{@vUX)4D+O;Av-$6;a5$iD8=D8e0a#ZPh&W+z_UvHp)&
z*@HlAHCE};MHURmkuIfRpR0HhcoYdCkoAJS%q|BJoTs@ms6|myUvVEG+W;vL@ZqLl
z4%H^jQHbEw@t-GsBn`qR>loy^x3sV%7>h<ds-gsOujkpBUT9852xuZ+fDotw0l(dp
z7pDSAjyA0a@n;r9xMo1Qn7E2fYOIhN6~W8h0vF;yEDOW@(s6=AnvY0Bv^$c3cVzzE
zE;+f*XM^&^!x`)0uKOlj+Lc?nwc90@7va0W_qW>HSEyZ_b$@$RVvY_CHA?fEpM9}G
ziWPe}R2XOsM=7+3sxN9Q1$;$d$%5xZcxNv+=gHb?YnL7P3g$bI6FV>`?Io+(kpmA2
zoNqthixI?^IEx{?jeTHdb!WZ<@93*LX;<;#-`h=!kf3_O%;|rI1S_6zM#W2ziWe4I
zt`VLARwE=Tri)j&$#O?3Kb?83%B*}tz|<GY>*~5wi!0S|*At13$oYdpPbL9-G9bn_
z>9*RIh~l(57tg8Ie1ol8lZLE8v5$KsftV{AQqeyPBMP$cp6(xtHhqK}FU{NuY3_oY
zpU&fanO-^r`^e{*C|cJ{j$@1xax5kgo{-dY=DMT%<V%GI6v*eG(j?+bOexkUyGNuh
z@~E6sb{O@;=efI)HBwX6&HJHtHdy|D0gym%zw%{a%nTRfRApBI#tt@MzYJc9cb%$T
z#Gb^|vcU&%va)~u+uf38G&9nS24n0bxDqad?rODKt*5(9)AwF@?SMH!2Px2QWAf`D
z`E^Ee;f;9D+_$Q}66pmQn~r`(cs2Q}pgNFN41FP=C(s){1}VksGPf~aMXxb9hb{Ty
zuQtA34RKr?bl!l@+l|*y4J?)tRtx1qeYdEo8cA;lyUmdH7|>o3vK1L@Bpv?fP`ZQo
zA8o%J?ytSlE@hWqPL^IzFI{*$uPc7e6HAVVB<dxZ40jW~o03nz$~D`9K3llP@xLyU
z##453!Ix7+1B%?1DjcvK?qdb|Uv!cpg2{ypd5}?Mhce0zh8^)a9<JOR9<ETjjIw6O
zUDm@@dE?<Kr-!TTm}ZKXtAuMS;le7v2P+h0K9MVK{B$jxav!Nqt&Xmv9+h-q&$#!h
zz)Mx{<7EpY{?R8d{51R&I#$vYLrHiW;3Fh<$fXT^xmLbpmcq-3ImpubyfXV4$5hKY
zc!DN$`k$!ZY0d2PuY<Aa^G_z%P&wkm38zdytxu+n>mc>-jkpft{3#NXF3pPiXr(S8
z4p-(D=TKVIS8C5TUyPLnA6z#h7+>w>ZF@z#Ueqs6Ek@SP^7C>OE9x+6(tng?WgK{s
zc313R<8VcOarR5Wv^!aTn6oG5sAS<jLQ<mQl%pPt#aVLK<U-=GzG^1m)>gy?WKjXt
zIo#=Xo@q|yfh%<+QL3!W`dWH6SOtm4%c@o~N~y2IHCU<AUXeL^mL>jC#9jN6?6eYh
z^HPfL-%j0X19!80gDpoXE8(URbX<S-6xSqR+0Ae<R28jvvpfX7=w6w*ZgX+~#aBdq
z1w0^DRu;Ldl!)1C8~+h6;6EbQrDW}O?<6a)eI;qW^{b@n^7VH|Zu&h)mO?&L8?W4?
z9NAu^97XR@T<d$3f$ri`7p~Fb=EZnv_uLD==cqT!c^vpx5)S-prR41~S6z|jdn<lB
zh8hJW_6H@UIV$PH0dVD4kRZIo7lqX#WqG_TN2p?BWeNBJ<~&R^LuB1|^h#T)j#7Ii
zp1GQ5WzDlnP`>i8s!3SWB|Ow5tZSYf=`v$Li9|}rNhZD4^zd5K!)qDlHTtlkdq`b$
zqt9xZ5)U=c>be4twCyfws&V2}=!6kKV)NhU^N&|;m?$~@ecq9>`h-_nR>}`A&a08)
z1<UGeMRly2hp4kH=$>C2o&yCM(u?Q&i40}_LyhexGl}0i@LMgOuVSdEVydW?Qc*3X
zqC<Uw*Ybx%MRj9g*YY<_BVf6hDmpS$<Txoc$sc7^PEAKisKPJA*BL!@V-!XY%aShD
zhqTfvv4_=o%4=DXx?1R|8(S^(kgw_Z(?nI*va0=PQ=O8gGJ5RlQ}g<&@yD9J!Bb7=
zWQ9)rsit?by2MjW_hfBlH8i!BaR?bva8!+qC>W9Y3$lrN5a~L4x6kjA&UbmvPl>qE
zbipKlzK;Ja?uX>>!%xZ3$l`t|NOCQcIQ5cV8Vcng!6ggSNG=djgSbp2w(^45OLw<)
zvQ1TF<}H<PF{vhz{+cHY-eh{Ww8Z7A5}B!WfkCQxSSID-oC~{qS%18wlym_$a`>U+
zQ4{dK=dv+qd#o`SG_F0o6o4l?yM%S%(WiFV#R-b%r^62v7i}nk6$qsO;T+^~Hf#@i
zta;7fk2@zP?Ewb4z)PpA?yz&&?X-{ackE9A>S~K3`t8$h=d{!8wv{09Mb?FXjgQ_%
zzrfBeyFlSbhZT0*Zh;V`;ybM_pq2aQ#8v6P+k*?!-L$xxE>NQPB@Q%N=9-#rsSS<m
zwv{V6y4kfWk|qf<%NtSonI;TBFLy1T=T>~VOL6bmSoe~_b37zB)8#4uamia#I3DM}
zFU@mZYNHA;M2Pc_;UOmj*x>hrKyh1<V>2G`k^J_&<6TySMMg({iJ#itYhQYu?nQqG
zNqjekb3MI)l@8UXy0Ij{KCJbSD4{jLTl#PoZ*?!cHkKjVR#Xr6*gRH*bVVAJT~VJ~
z%BbxU=4i-H8l5idwLPzK+9p=@q1`xc50ukC779aDJZZprRC%r*MUU`r*Zf;uIh)6S
zgTsM-iaUj1BqCZOISLrUsSse;#35F{Y3yxL3(?&{uQ6mh+sy6{M`wGNGS`F52ipwW
zi%J|45=b718cD?ewqt6pZ6>EU?N5d*RQ7>nK{O(K2(}9c&L`g5!=SYe?B^iFmkVyE
zN^k9QtyqUQ$1Mgyy3E_DvqN*>@Xgf8vztm3Sn5_A-AUnJqfZ=d05!DL)&NO2TF{h>
z;|7oqZk5o(;h-Ve{s2Ql4cfgn5cTi_^!(!o*kT*(_^Q{thK=&{ihklX&>mgDQbg`X
zYj_3RE=bn9g8d%|{(?2zK?Zg)2tT*GRF}HjYBt+CZMHRaug!(a;$ts%dp-rrY;$6j
z?Cs{qK=CE$1bd?M8~{!fdXveuW!;iPOYvN`1fTOUNr;z@Sq4m(Cle!Za@B94dlbF}
z?w}1pGlwLJnNlT5x%*vSLBo}$j>6*b;|1$q^_uO0SJcRJT%0>;H<?pimc(MS-GF&+
zT1N}!y@Oe3;Lh2!megf5Q<@@*U}`_2oeZv$l+7jfQcCE^JpO77It`GPf#=|J+WM#=
z$W}xFz)>3A&hWbUGV=850G&EiCXOuc(VGEwYU((CVH{m1i{W=7Fj#me!YR>OU?X{r
zlaG+ph+;4^l@zVz#bh+CfM%4=jG|cw1!3NseL*<-9A<${A-V$2?JJ99EWb#WRXcGA
zI#3uzvMd?Q?UZYUj^nDb*B@`-&mVvFC;R-hcIelSDmS&8+xnM-&-bJG!NQ-z_Wt!?
z4&N4T@piOWrj8Q))hZQwhrjr{TRU|BaLT1>xm0nSiu*^&IjokPKUirU1MoCo0DEQ6
zE+^i(pX6^_Ozr&X;ejv4K5R)Sg?J!BHwGa7UJXG=VJDq#d#jdM8dv?!PzWn(CQ2QY
zJsCa5%I-HVy$`@T)x!6ktiok3OpdH+$gaFL-etn!9GGlrcPp^Qmz&i|w{Z$0Kl0r{
z`(p>Sa*<LRC%FB#8?6uUd+TCwTudzk?@}3TOvsO}K&mKi4O3E+KCwL*zzL%LsWXJs
zPOVrHVhZH`aM3*mnWTHg#Xd|uK&O0~aSlg;(tG%+zg1!mb6J^H*^x-6{#vmr<y@4a
zrdEHoD}@*^iBrIYx3D~z&V5%?Vl-vhs`N(p@<U^*1Y_sG|5EWVQ7TViS6Z_@R0~TE
z9VPHi+r7VFq8Je<`AqEdDMh>E9!i$ot6m=_)jMt4ru43@Q`(n#X`6C5uuf@R`!GlC
zo^|ckDKBkPauL*Sj;vE&<|xy(F4HdMEA0PYmfsqU`+vDyb`Hz&{oko=_W!l~?fr}W
z@8{Xr{{nAtmhU!(E<XUh5C7QlMT-<<<S<tKg_2%O?#C0K-7OyO+3oa(j;SNO9AKkw
zqwzf*!$u1hJ@niy7LV@^4!(c?UYz)g`|;=EboTY&e)KrH`TM~C=kb0z^Y^jH1IF8P
z?0*@fZ;k2XU95S^f$ef|aSBK3pd*wQdiV%aeEBqcoX-7tZp{BLO7zX2eTH^Fi0?z7
zefBt;euYCl`!b!e#huSJDP8}Ye-Arc6P8F(riLG7;s>B9#AV&i#@~E6+x}o9Hou$B
z7VH)cyuOU@{UT-fGM+(7r_N^n&2)A<$AYF`s3J_nMl&CN8r`y|Ijll<Kb|l4Z^w{$
z;orh>{_$y1pv2G)D5j<s6U$0PPG=7=4?C<;E3mH1c8jb5b2OPu@r-=S{C`Fdk5DVL
zU_>p&a;P0QqY3tB`gt*enZCsy-TL?bA}E>qQD!%HquJ<&6d+$_(+B*G%9!5YPrqa0
za)lwS%-`W-DIQLr7UCF~CVE1Ke9|Y@!0DifBA<l-3OQCGgF-Q@5JNMHNlKzfg}H=L
z$Y3dtY6o90l0wpSsT2~}3Z_9>CfU@wtf<h59k-THA#old72BLCr&^aul4ztzgR}~1
z>;+az+{}^KT5P4nxk|pwk=R;(rNnufG0fKtFHbY_B)-g%*jl3HGngmwWxkT#JSE#E
zCI<L4gFR#hn?LM4_q4ISf3MrT@uI=Uy<L7oW!-f072R&Jvm*Q}u;VAznoa*JzkV1!
zJd9@dxGmu|%V=^7k0x^vTLze*7#0M(By5fge{u_ZADjCN+#*k4KcB(A$eJSrM08<W
zLpA;VSN}6q={<cG(m)^~2vwqgb2k~IqMY^ohbfeBJA$P@oflcfVLLPb+jx$mQ+bzK
zRCfxHa0G(@Tle^89u|bZqV^nw02m<eX6#RHeCYI;7XaBBv#-+$2xhEL(il|P9esux
z7t`5~LRiqzVX4GC*o5y`Yc!v;2FNT=enDn0u+BXU(sVp$wNj;gRDk_1+NFzG<ZHii
zs&;eWKf*}O*>F%ks2@_Zhl2`ygckNeR-92P7`N$cytsQ%dU!gUK0U%#wzw@8M6oRp
zEoV4(7hk@NZ+vuPSFPa`Og=x&elRwej&2L=;vW!oZj}apa@pZ=8(Mx|5Cf;HSyb4W
z++cl<Qvz9_?93NoLEd8K`yg|{^o|!kLedLJX5S|$@j&ix*dk@Ps!ny^acgCGdgV35
zsF9el&ptw>KvE3m+9~Zfp1y+6xbGa*4<+FZ#wbonqAV~E8a^IHCUY*+p7}HX;fJK~
z0u7cND?U`pltFbz`=!!57|y%V<f{*Cr!l<p-f#EYAGg_J%C@ii=lzS1{cSd%f(!>T
zBMNzV7KDC=qapr;a5#IKfZ%(-7^4UYQnWv3-(gsRu0mN64w?Q8Wz9q(kPHZ!q|G_o
znLo`(6B4BU7ZzP8m!s(3079M4<`FSoNckiBiCs;`--@_Ihd$8neD3>9`*Uk_KL$dX
zz@Rq$+27E}ABKceIE{>Mr@)3L(3nUf9BHIbdC~y6*=LQ%*%%H8CC3#{q4PvrD2w0w
z|G~5Mf5h23aoSq`Z%cm&!t#OO2L2bI5@Zta+@tAa;`*NLK({#CDOS)k;Qqco6HqlA
zWu{*i-|>8bXSLB+I`@1V-|}-oa`4G1XHa0;=oidGj*C_fb&(f_j5#vSs~W}JH@d9f
z=^Jl!;k*SuBh;Dgrj+1aHNDg_FaZMDYC$oYTYUtg0srf_(~81GXn#{IQBrbfRZ4AK
zufcEo;61gC@H>L#t1>advr`N1@S^*jWBz{L<nN6xe{Z(4s_ckL4`nIv`AC*T&rJ@e
zn5af%rmFBW+g-v}5o=$11w369#7WBB=?;vJ2eN*FozYz?wBm&7A5s@OJ$&o8s1Iq4
z$BS9k9}XJE>v=dE4jWxK>!W}LiSguFAUTV$Mf4lwZ?#jZ8G10{19>a|0p_3u5B8k!
zYP8Y!5<drrMGp_$!^H1pH~bw34iD~wC0sSJ#*Uy)B7<F!TEvq*@xa_O81@}W0+`W@
zX2fIFjwFk;a!z-G%0DR|WpQF9q?PdLQ#>uZ4oWg-yY`aVhBo=G6Z!tV!(7qv6v74C
z+>xt{yLk#0AbdS<?#N}gYbHseMaj-~GYH2E6D|jCj>4OD%?XSxlejgWw?Ju@2EQ?{
z_2C?`eDLP-w=#dL@V6>|tEEgB{*Ftw_gCw~(k`ro4=Y?N5l2?J&_VF%1ifh?^J%4w
zDlx)~^OWg@QR)qsfB~@sY(ncmZ3tFXph{D+QH`yGc*QwfZV8$^*yf6s?E>58S`_+3
zDiZ#QKQqtgHrIMkVp3`FPeD<`%gh2u1tGEppDyJmnzW1-TvHNiOYjL#^70vcva`Au
zKwQ){Pe52AQSE6mUa-m2!)FlPP<4*zf)O0h6fvLDO&{R|MEY<ApxHPkwo+gxy&-JK
zdu&&159S04rtadh-QN+rBdJ#3i`}Il&N=WPJm5$UM0AAK`B;!;pC+Tn`Q3D}bf<9Z
zOY2j(wbLezt%DE6({`~G0L*s8Z@XmGAIz^91o$l+1hLba$3U2cD}5*3{{84HKLqld
zhdG=1-^Xwih4Y-as|ZGdciP+~6hMNXKcZa*zL5)7>PB|m^TqV>k*da<iy!REXng-P
zORbbkI_9FWFE>Eaa9+LhN!#SlW-!10KVzs6&pB`;{w&VFw`nyASt(Z2;%?@{=-l73
zZ=?GsUk(~Sf8k+1twb(MMhFr~?B>qD`J0M)lqVEk`aHO)!t&?g`uvOk3YGWMXb$&y
zg}CTD-fK9;i$RfpI~^9K3=kzcLJS5=kBTHb-0pUNTT16)YJr6;m$u?g2@Gx95duhu
zWDjoBLoEsjm^=wYep2mal>FGE680#3g1yr8$=EG8hV>);(R3-&D@S##5Sx<ov?)1n
zsuObF45tG=W3z?KWjh(?+wyqytjR!eFESP9O?4tdry*enL?1*>ngr|d4o}Y(W9_Dl
zHC^uD8luacw58B*MOH$;r7Q&aLhGQPYv>~vOSC5X&t4NKgh!V|zokw@xupIQTM!~M
zqM&I#pm|&Hl&=bTNU)rz3fqk+3M^);oHASdx-4au;1s9M5_}D33BTYhi8G$<EK!}?
zQVa2{*`YILWOm?5aGD9bNmqC&y=(?&XzC?=4SR`Su$SVbYTHY!ayzDCS+0|GP>ysG
zo?s_~T?=mj<o*P`4fd>gC?V+^T4uYczq>Zuf<CL0#l-a4O4*A}o3VbBMh|Hj>uF;g
z5(U{Aa0$eM2=>@+29eZDalz1{F#VVv1%ga!5ec!Ih-I#vvypA1;D(~w_~tGU-9$zy
zzro87jo_xW3RV@bp?U(jU&OW|X3$VACD#HB864?xtlp#Fqg}50lG?Y3b25G1IEy?J
z@?)t0Ax0cI$?+w6Sc<!AN}J4@Y|mEIC)8mn+T}=Nc|}zb>LBUMiCZXh{dPEg;!YAZ
zqHG5x(TCB^9bPg#f(TD{lZquXCVI~zcGRh)1d9O@hjj&V@XWs-k3Ns@$BQ3GHKZ#M
z*eCy=(G7=Qh4uD@yERsJrS355R7%7G>POvtDB|T-Obyv7KpyhK(fG7O-ek_$pO})b
zBh0FG#{r3XzO(SzwR|QVENJ{!VQb2|W?l!G&-QlM4sQk9-GgTbF0?B0Y!5&0<7fC4
zy`wnE&G`D7;pt^g!_3DL4m3~qiz2(2&?$+7z4(w{4|!^lnNJO-W;dKJ=$44m7Pk}?
zGM-5L+YyUSOtZs(<%)8;t|+iZLFjxxk<W2{=W$J+uYp!hb$X9$_&rq73*9%Tts)t1
z1@liZg!icQ+o4=oWIxGiF&S>ZP+qa$F4CQf&E8uFK~{8$E8;_~Rn%2mgy{xDG|WFl
z`|23&|B{0g*nZ-f+6Q){>>8-pTs+Mt5&rMuLdHuNEhEm_FN6L`)=4Gpims_J*ANDR
zSErNdLaR$i8;P=D++I-4mf2MxS8j4;Pb7=lCO$}^ilZ3w7P-UpQiMu(sF2=~*I2R2
zILWYGtrNqPQH^V$l-6?ALBNN2b5RYfI1?1=FZ9L4Np>r7AExta5o~rl=q#=vUaHZT
znt0KwRfAvEmqUE1ix+*iBm5PmN*cXn*Gbn5n+Q-TN<XA<hOcf>__d48inEiP4qDP&
z04pfhhM9MN{JK!G0e&+n?kCTic;6%MF7d@!3cuJCx0`sa$?rCmD`jF=4gBw!i$Pw7
z|2<-3#B)5s%JiCnQb{wIsHBI{!y6cn4`fA*D{AE7Yn8^eniyAiGp=dlinkardbI+F
zUBbu10tZ#X$9f^k6Y=f>9v>ArI1=c;BqjlGFwh@cQMlTRO+tCilMoE6c@hl6ofu2l
zf^!V#X&d%mhp+LuFWGYkO<K5HPf^kpn&iLvsoajeS0GRAL}V(0)*9l7de<f0>o
z-g~Rz(Ban_jondV>}tQo*zE}~grP7*t)wd|ev!ly@d3pcyE^5j>sTJpL!6W-uELMq
zu4`F<V=AC*S-_#}e$sh^a?cocJ~;iuFCmgcK2$|O64;?=#q+}s+h)Q;i^pAAa9=oN
zVTR5$K(O}6J7Qpjg5EZ|Z7+7My^W4t8hF8UQZLdVDlQ_ZmUb6%|J8<bdFul@gt;8F
zTOAJ#*`UB-<D6VF!bS>3IF~@vM@2^zbEp)fGtSs1n%fo);|+nc^u>Uk4%!Vws>K^i
zDIi-%wzQ^H5g6X|qa~Kq5)&m<btRm~O4t^OD@x)<)4S+i4clQZx}Jc#$d3jTpsP62
zpP&(CrCdIB7u>OB0+G{g|JClI?OeA3Q^^hvwz*fY_()xJyB8lj{Zj?@CkFmw2RKr*
zO=ef`WXD{k0gT%f#2tUIqfSkvb`q)26R8h_)TKn~$UROpb^R!jn(h;#WuMd}k=5&o
z^r)F<#`G#t!oXlWPeiKFEkAD>j5(}9?yn3zV8J@-OHgoZm|^s#gC#y~jV5&KqTfE2
z&&2@ORYHZ+vcwpZJsqxt8ZHF1hC}nU(>_#tjMJ->AD`j$`c$&!Mr+Usu43B9W8Z!k
z*D^*bho&4~wXm!~8^#W4rj4W$uMepB7+Bv$|FnBu<Za{SeQbEN&Ee0T;fGM!0^M?A
zh0v^XUpA0js6ilM*C~h}@#IX8PC$SIv1FU-3QO7M9;$jUIY7k6SLEaFq;b_9(mdf}
zXaIly05soe;f&%FWR5VwgW`6iIfWb(7%qKj{<+2V3-Yn^NlqgU2u>xI2-7H1hD!`8
zMY<Cvf}Tv^s0n1N_@2pUa6hY!rTC=plcgeG^4OwgTWOTk`m_;q^F_ej71grVyP<MM
z&XixcoMIFkEKhL!q-HF|_5>$n8uLL;e}bbyLV0g4>;nTWLQ&dN{4>5EBRESc%m_ge
zH-3)UJPR~`nh^`jVZ_3$jabsL`9?_mhwz>_6BCbV>2sAW8CX{(tTZfTOQPM)0B}wZ
zD}jM?shb0>(s8#-Ih)MJVh51xlXPIV35h+#FQ*m==bsdZZ2`UR^i<?^OTk?)XSP7H
z2~Lbh!wesrU3mcjFEpLBA9m9X8X#k*iN4_A|M?ZXAV*Xk#UXnle{tsVpzuqO3Nru~
zJMZFuc!1@dU%`hivcm3hBc%-Z6%%3%Xp8_2|DcqfxEyH=PVp12tYchVY0C;3yJucf
zxP!H!kdvz}rleO`?YM;bjRpCrO}Cm5Bb&6ID2L7%KV;cNXxi4n7QlX9wuQ~>aq$Fv
zhd!C1?P{WD0OYw!TPV7{UgIaly009|z7$3sNakR>KWtfnx23DwZdcwJ?XdDYR_2JX
z?Y6vZ<kYTB)7%#Ikbf4RE_?+@8vTgKjiab&5~#k|<r1w;yo{4heerIKQj5Ax>C5q(
zK=FG^xfG&>8NU(Y+)duG@5Mq&^|c^Ud@9gJeO1v$n2-K}i`VM0Sa1Xx5Z8=u;-;<B
z!$EYCI;EZj67)f8Ms8J#DfYtLt|2#~8~V^9;&ps~*-I3mA6?NayhV$kH<>;>86|WK
zA;uzufk9W!uO!wgUQSkn;EysBY70K;V#J+y<;X#T{Sbe_vN|CP#{qYY1?zs)+**=5
zf(7nIc;V}?hbf|w?nV>#FrJJbo*o1ieEbEZ?&(8uiIKpv-(#!(#CHFwX_mf|JYUby
z2~9g71A+}zkj(!i3&dBV{@5t|J$~d+bnGi$fcul_)7Lw5x3Rj@l<K&46DTR38KeW=
zj=dx$a}L+X5DtWM?a`;Cr{7aypw2jM2{xU7SfdzQH*){(h5#MS6*=x>JCb3##4zz}
zy)9mPB$5O;jYgVKLt`uiqN_m85N+5^whmU>5iGP8UuWelokLr$;^-iJq|<tbiFXa@
zAQ>emc0b(4;S0aFwv*L|3gf<!dXY)dl<0r!cXc#sCTdw+FQqtLRk2+oHi#JF@RJTZ
z9g|u`RM8zwy{k{vm2pIQ`rW;L*W_n9LWAG@n+2S{KNYoYR4Gw8;ik~z_Gx=tB}435
zS0bs>)C;O;B|8DU6|-|T8>dSBVBz_%i|gA-BMotc>%xw@YglBFLz@GvM;%;|R{+~m
zOGu#rP6UTQ1lL(az|<JBJD6eDn6P`TkOCy!>vc`lhk(&W2p8T;n;cR#zZW>;hK@PJ
z3a>)p%@9(YmYOT8uy~F}ZYnxB)1Qu*uZ!ue%Hps2$QuuU1P;t5?FbM~nIWR{9;fph
zrDXEyIF}*=#Fh{u)5p~tee;?B&7b{1U@OQIpZzZ=W55CDUU=opC?Q11Sx$#1H-e1B
zJ<rUD!@hcE=C6oc9*H<v$;h7Z{3;_^gGy5kN>Y|Z2=oQ9Vk>j18=eGEqt2_tSHDv-
z)2tyzu|}K0Wt!|3hXz;OPuUIr6g2qLr=Y>^vl~o`9uZ?Zi<QuHD26X7eG6@vy4Tr#
zyAJw>8aC0l@+$gvE&6tyxjtL44!Lb?j%8*#9H*sET+65Y$$Nip7OYrop-PNjiCC2*
zv{yoe<r+kiP`S2`^oUjkDJFOs-6V5iyIu<anu<z1bMrve(~h=VjYb}hFD3O)pCV&{
z`$@i1_`Z|T{0tSz%Ti7Cc~m3MG79zBQcN(aZ-#oB3F?V<FW=ltC3l>P@|A)jj2#t4
zA@s(slBErt;R!juPi7zAM{~9q{q0ZKcycqHp@sha4?8wT@cD~#Tbj+X4ovwAFyBcy
zxL#uA`h{|7HRI;_KKo7{3{Z)CLk{=Qck_IFt8NZ+i(wOLecIWA6L{nbAtNh4P4y^W
z&n`=uvhb?<tKPnN6%{Twb*T;;@hr?*<u+?*g@V^Uv~n5^8>$cm)OZ0PW?|*oa#oo0
zFsvG{ws5U>Kjo;oPWee*q8L=!0#lgxi1If1h*?Nb^5C6gZWMYeg*_!;!8J-@Ha#`#
zYO-n?Aj(uZ%Q-ojpHK)DWP_K;tsMWX`l?l*n!D=A>A#lKfBizcy}HQGImu+6sNyRO
zPFR%hD(AvlYEd6PcTv}uUerg=T-1Vsms{0>Cf2pA-L)-i_xD-W<^0RqPK@Y$;Q|DT
zT#B$I1&RWjk&}gtQV{z!^P|>~M!|(+8?D5qs6{<q&1MgoD4$G35nV5auw%8{qy!me
z%6XVcSe18#mH%^<1ghz)?W%ln5!JHI3Ma{;M;5icCv))3ikQ}HIeOmCxu^px=g4t;
ziPLsqrxh`R&R0&F#dO*<NhAVYTT9dxS>R^R0x*WJQ?$q*-2*%LO#Z><-~E~72)O36
zOOZ^$YULTsV4g>+p_9!t?)_~#Mh^Dqk9h1Ex>!ax<E&S__lqSqon@EOr=<4l%*nIP
zs-+D6u_xSiWF<K)rT<<prTlnQGW_BMwoTE;G4B+6ai<e8Uq0~_k7U6K7oh?i_-f~h
z=t$VNBQ9@CCtw&o7zNoRQc>qPK}?~rg>o!yp^l`xlnm68(FInh8Z#<OqlPqkwN{4A
zZ}gOAs$Qt2B2^pow~=Oq^zEcX_Kn+j1!f^=-vzCgh755luN^dJqJ+3RB<#C(w>4H>
zdh>!1+AYkr$WTg}G_*b>OhWpnZ8GAy7!2C2A;`Z`gM^3$SkN8pL$^J&>rtZ0ZWnqK
zG2Ccg4Y}b7&~T@B)l+QVgjrkM2xXsnSKcMGINsixW{84zW27i(^hKZV!hDDV3#w|w
z5QJL)F37vHz0Iv*7=D0{6%e}6xTUa-*P|zuV%$&$w{jc>N(39UoC*9u$?@@Z@OYOM
zlQwEL4RKf6kZ9K3V5J3(D`(PRXtyGa3t_zfYIHlteAM^i1QAYP+@1wZRV>U|+~3ul
zg*$WL?v?`|4jZeM;BRBqvaxDm8>^O8S+zXhpauS^RxD9tm6XL0HC;K+Z<%u0Xr1FT
z#X`kwn{sv81=fPENz0TVTH^0vnWEUD1RjHgry$yWo^PP?``M;kbys1Ua&2dtBKJll
ze{Gt}6`QY=a+<Fs%w0rV4e5_oZ|&mPTD$B;9pD+Sp^|1Ji5r8s!AM-q9+-+~Ek&{}
z!OjB{k+^#~&CSKR*5pHD(y_cfM^<}<B}c-BL)ToMwx4BdlWDhMHrmKxv5{%6p|{mA
zleuUc8FIk(zsC)jxOY-Fdu$8_Ae!-s7=*<1)=}#TH(m>Fj0ww$wHuP~O`4Pv{Ut0(
z=%0-pNwx`uh5bX)_(6NWki+mH%jzM^<iXzJAz|)dYwbW+vze9-D4S<mIT%bFY%Cno
z4P2~j8|(}l5>^fU6q^cLi-tVr4Ebys@)<HLW5r-?!jMg?!nd*gwN~7AAwsmah1l6G
zSQ{-o(_+CIM@<J8XcNAtHayb;;hfA0(#>tGObT+^6XY@`u(_AEGb6}nLy*gWz^0M5
zrUQ0%1Nn>wa#;*mcE-|HAitqNE|_I5u(uxRUCxK}=8}ooFu(@3od>gR57_367SHWF
ziB_th>l~N<F_6Cn;(8ev-wGzNvg9P5Kf}P|wI{k6ndtMxT<4f)jX_7AxlTLntbVSm
z&s(r>X{}zG)@m>zIVLgJ)LFptR%H^A*B-(c3TbPFFf(y<cT3M5r4<=KM64f>B2ay&
zDyh?Qxwcje`Sw=TL?Q>&IReSfjdPBgK3IdCqXrK-5zY~0b{=$d1d(lzY|aa7PQtfy
zK$`9F%<(FcSZ3f*IRVMCk$FJLR*2*T9Ni9%yp(LBE`oDzCXk2z^RJugv(`;*xpi~+
zoOM%QcHJDUXWcAm;*)FPq#5_*UpX(obetuZP99P34eTGw5p-=?f<9aiLDyfNp6fr;
zMsSpqm@ULQOM{dw__>ueinS?9Zab8vjZl`fKv~ZG#7g6o+wkNCj{`RC%wcSj-_k@i
zGvU$4qedoy-N>@Ko<hWEB&Ta3eT=;vg;?5_EN@t{0;>``lagHaBz8oYZAp@1M#61F
z>=uJ#y%?(5<{q^SbC1I`bC0^o+#@su(OH2cOhA4WbB}T<X~B|K{IkqGBKq+Z^>>86
zS7Yi?(N}%GyrGF+e6=w&(W((M`mETMls9%I8@m!|rlFc<G%mLj79AGmB=H^dC$lbD
za;ysR08Q5BH`>8+*K6%!6TGFo7HgN7{fe=a^w4vk)VOk?-3kwuDFS7RAeka4CI!R{
zV`2Vw#w--RI=@8=Wy4cnv9u=R7QI2sPi41~!&K!|u{^OcR7skSe6lee(O9bdTV{I&
zE0WyiBCTV58RE;8$ztRaif85_Fd2xf*#_je(*VYWuU;V<tfXLVA3}ri;VLz{$?&%`
zB9UCb>8Z>g;~txdBG@rtkJy-;<TW|D;x;EljYAO|g}xla5B*gbqYU1rF-kq3Ey__o
zQ<TFMnW6}gz=AzJvj6(Csl?Z!L_>?Fs2Hn+%H2=_fC7Ys?@6p}RF@Iv-x#}Wj9oUy
zF3}qkGIqJjw05~dbC;`kl-6u3(i&O?9YNH6Yxs`R@1|KNw2lo83*{7E;xU$Z{El+$
zT7}j~vC2`}n~vbmw9w&#lr+)FR&fV5Jh=>Yf+4HfL@Z&pK}(pm>5gW2Gb5KUtKcPU
zmBu`QAY?Y_v3F(NO&K;j;~#PAu`vM(4@gTI0$JYB*_m@#*m79n*{!TKEW=OGaRpN0
zySanuW*-?gA5j~LbW0Ac8HZb1=6=`0fFo+~kP64pnr@VrYFNZ*!%QzZjT~_G2yR+=
zhe#EYj(dM?XD_Co!Z<=4sDr&BxFdQ+s5KrjcMR=&24MczF@}f_%K00ax2AdC&hy5v
zh=d*Zb!;2r<MP^5cA37^@|z%8*)n9#WRm528<zW`EZ5tx+@F%=dK;Gev$0(7O|jf8
zfbB|5%W~P3EoD?@4<XHl?q!b2Dh$b%up*0?klBXai9q|2(l8I5PqVUAtW4GH_uOX~
z%`9UvlMArFylssIR6kz07N?ID=E3S?+Z5%?vzP!|%04Lf{v8eEl5L=sG!)EBFo!Qq
zF!h%tnEFpZFh|P}jIGr09FDQXWY%7CGHXA<WahD|&D<ncVV09^lVfL)lg}DwIa8cv
z>~NMa!pT;I<}<%}5y2_5ZFWmDOZ)n=);dBu=Qm~-{r2;$bvUx{-x7rNDX+B-uK2ZB
z>sTpaGDm*;@c6Xw+34|cHvMP(KnPB}!=KK^U&jbq;+JySGa{U+iKUxu6!~xUMgecA
z@H^c}?d2FKYu*~Y4GZhi6p&Hzdlp`SRG%IF$&!%&y(}SrdSyaBX#klPpS$lZX_jVV
zlV)ReW^Zz4!Fw}|%`z>`Y<Rz8$NM)HWNF!cHVZPH!a8BewQAe3q&72BWnB5XGOMu^
zZQ;g7Y-1x<VC5Y&dQ#1w2<l%J7Gn4oZRRJlDOj+ZS(=2IYLD24#Rf@l6t-(_L-~0l
zkZJd|n?3|<HuNf67<yG+($GtZX0#epuXv~;b2)Kqu+#QnW9Ic%&AbGOsK#DDot@XR
z@rhPo^!3R;N>Lb+Xmyc?o@qYz+{i;qg&XRb!VD?Vgi<07LAo<zh#`yu+4B@*vXq!Z
zNb1MwR%YC;OtmM=Fy_*X*GIaH`*n=HU{(teldtF7Yr#vW|I-_78EvcbSUiQMN~?%`
z<JuYnh0oC%0pZph9<8d&$h4@ZGgiO@Qm2QiJSVAf=t(sQg3xinm15eJFt=;S-RX17
z5n8UC+3l^!n1>tA^xm+wO-wgpvWkhAz!WbA#h$?!hAgpCEGV)JCldA&y^Po}DP|I>
z))5&-5k18yB4Y56rt$(|t7jG71ZSt0Zr%VS-AlJ>&>J)KGL0Eb_Z`BN={hS~O;#`y
zZ6p)BXf@Xv(P|7MvAPH#sewP9AFbxv##9BxFm+z9R~4<MZWA>|7c%D6ZNb9n>_vh#
zW$WGpK~5}8!-N`IZDGx`B2%88m)I!6R-~BRWJXZ2j-6r|H6{HfC(F>rHb#nNbd>bQ
zWJg4?jdEfc-z2j+nK4cB82Z>iQ!T*6d`THghy*p@2Pxz1S|Cs}fv21cfE2zX*E(-N
z7(wDXa2{!c6h=A9$itow6Sc>P8m}(4o(Xagl3c}b%asm!u>^&h`>z5*dj!DgZDr#?
zQ}LNO{W>SzIcMXbA{Z?T=ui@CmF*Wf4o}MR9c&5yN@Dmd;AN?ZuPor6SchH?Xg*(c
zx$n25K>`#2q=9;KptZ74Sr(A29C)lHz*xDFSb1Ss8M{*s0M?2blQrln3-2Fdrr6E&
z{^?=DKK~G(vT**m^ig~-W#hXd*q+{&EwCpOCX<Pg$%1VP{{{IQhO%*o`_9k<A#59%
z4qf3cDb6fFQDR1}EoUUqgQ9@812sGs{i>FRCx)|daF&PdBz$W3=uj%;n}5rzk&pP)
z7F5J>Q9D@986K3JydJet*5??oGl2mk*3B;sZ3{_YqU<1hKN*avT+*O`Y*t1p$SDQk
z)FXrwo6a(^P8mQamDASKP)tWA6qA4}=@3haXYQ|qVyc!*z!@Vp=Zxp5EQfotfg1Ia
z!REqq8C720;Cy~{oX@sUt9@vDZPpmHshhBTuDZ}QNCaK(UiJDkxMf?Mj>cQFih_xI
zZVufat6rRUnGH&3Eijpi1x%*;o4{ne>jC$R>|Ji~G8??i1~0S0%Mf0sWpJ{52LIsq
zGl7ONkAe!5Inz2@PJzH=I9Nt}^JRNr_Oop3OL>E=iMdgRU7$dl!p0wRUVq5k9rE+N
z9$&BDqw!%r7sa&|xhUR>!b9vF2tN~jlkk0;j=M2{Zfp@Z>2Mp3XWLYR@=Tw$2^E$W
zx06(`N^i9~m{sVh7CphDBiNYwhFG!U_G9~MJBk0PuNeN*e;xQwe@XaH-x~hY&xHT<
zmx2HEp9B9n%mj!e(IFbRLL4hbnUG=xH`sd)mLv}rNb6CUrnsL6MiO91`nfSASpbq%
zphq%Xz#7pb>Ch27+=vZmBt<!z3Ky}$ir4@}QW}uyf7Ke0VdGI~dBlc3MxGsLV-0W(
zkRx{z0qjD=hFCyaZjr;i%Blb_d4})BNA5CX<n)7P#&kRr(qf5c$?5IP=E6A5SUjb_
z&+}@Q%b%Ggf+Y`pB{y~@7jVS}QIg+XmHGZu>oiEa3HK$W#NvuEm3x?OT?0C{U@P`s
zw^HbERZx^H97+VTm;yq{i$KW-KS{@)l<oIlThz(QILNb*Cd<Q2@?cD&IHRq6=#t8k
z_tREr7wg9Kqll8~5{*j1Q=B!T5rL8*m`spKfbjTtI$i|ljURk7Q7C6;=2i)D7gL-q
zjqf>2&n_zn04vMlbP_-n)axopl^tx!E`R*d4kY3f0;mdVl$XG&;Q8V#K&C9|h9?u2
zlEW{kJz+?SJwxSEhC4?Ka8jhK)OluBzRS1l>N;DNoR-1)tOVp^iSUVlXKHz1lV)I#
z9@PU<c`A~J>xEEllG0**n5^V3cWCrTR4=7~e-f~tU(F+tlN-zPYCeqOFrScjEaf?<
zl3B&ZXV3;qv=S^)W~B8`36DDhM|8QHwtufG(7UnvG#FE?4QuxKQ?&py)qWo^lN+F!
z+zp&*183U6nO+diRABDUj%dp30Z7F@=^#$%4XB^y|L0f0Ke@j&{>kM4D0hQ^`i&4!
zIfJy<SC4?I{Kg2V%3DA{;gR+wAyDKOYlDIMNik65hiu~m=~wlEL;%z!oFHkFErE1n
z>vf!@fxKm3lyH$h74?=~$lGzPlpP{{<6vpyU}^cwK+6!JD*^Vgd=8nIZB$U3c%T8)
z&rL(D1>Tt~Z*>c)zG8C4UVGL=RGvQ(>zjki)y_eNtDZOOJj-2~^Q_uSI?tNhdC#&4
zr@HZ>^;7xKGD9EjJgWqaTGE@AJqD7c_}*G#VQl<z{RDowYQM5yuD*r)tAb#>>vqkR
zvu*wLc<0LLb4!?u<>`?tU22KZK77H^t}olR*l5>ZXtdKHtV|ke+PF%K09n<GI#X&_
zN$q3@hs}X;%*4NDLtrftkeP)rYb-2V%5@uF1G-T}ERCkM%QVH;n+_RkTMWldq<~|s
z+hgsv%^o6~n}S#lpsXFGu$*vND{9Dx(LMXUf_+v?aYu5NJabtud0D>ktU$FakGIm$
z8D6Ykic37*FRtE)URi^?t^7K;+ryH{DP0!sw)RGFxACx{*K0uHMl?z)?Hz9LxEnm~
z`tZ0BAAgq_Q83HvXoQuc!8K!ZYZlmCR2_IQ=dkPtu<ivMCkkxEgEDDT8ERAK?8|KW
zKo4V}Roo2z?`aHM0UNRTqkl7|*z~hG#;b=m1?BAD!9GuyWfBwnsXb_R8&rm1H_i62
zK@i<fBoth;`Q7yC{+6&F6W_l@+{tI3-4g!h_Gbs%n$fO0fEJ%HoTwD=DNX0PF<`j?
zl{n2yzyP^(X-B|V>HV5}Mq3_>2dm)|#rZpJ_Wr`Rws<3-Wc-<TDc%MtXP6Mi#}i)*
zr)KzU$jccvJ$l39QVJSg@?-;^Y2f|!da?X2&e?d*=Kca#Uh$*>5=Id<W^`?tT<m;B
zqnl_*!-rNBH2wlB`p4o9*FTi_eKwtZm92+L0!86oKx1mf{4AJRuRprEV~g>FFDqSO
zA$UiRieBU>V`6L*;tu|;IC<=2H9>;|j3~C@z3g;EkOZPD!J!-em*IZ}{#W6D4gMd(
z|9UAtyGQU8B=rJH<OLMI3n*$AP_{0hG#XEj;^r<bcyTXJv#6*vWMrD&U$CnjY!f!7
z6bNk%4wa%;&QY&b{R(B~eccH*0EJEp!4g-H*1UdKS3fEt_MP=KploVDiMQd!8&ESf
zz+KM()KdmPQ-5jI^*c_!2B-s2dQJnvBOFJE7?wmcwj^F>VKn0lqx34vqZwZwYidR%
zr5UC5F^j6H8LrxrbuZcYXsl^4f`AN$W~_^Ul$UA7%W#phiHo2N7jls`GY?Vu)%Kdm
zvudi{T|?!Erpnhsi=`a3zYGahj3n56UD;05fAwmzozj5Uk?o=lSpP;5t^XBdyUYf>
z%#x@Ym&B!>E3aWOTpC#N8mRD6&y-$b{*P>Hewm4{nI;}JA!6c5Qjp?8NQkSSa#1Jt
zKCF1<b(W2F_GQH@ud|X~r#!T*w$fU5rS7WhtY_D$4({p*`OrwnO+6vMG%YvvwEWWK
z+|-lvOH*{yK+!9&x0+pV=_MA^p;f&rulC5Q+Lu{Ub>ot{H0}*Aw3sf98^a4Nqf6ti
z@Injd(!3?S5D8xzdGkuC=*TQ#c&{K~c&{K~c&{K~c&{O0c&{K~cx#d{c<36vgyFr8
zgyFr8gyFr8gyFrCgyFrCgyFr8gyF4E!f+h3gyF4Q!tmBDVR-A7Fub)(7~VQ0499#i
zi3LVAsa%nA9jv35xS)puqT17!sI;oBG}6)1s_Q(=u9Nq(w7NQvl6AT-HK7rzO1Y>S
zBs8L*UXY-8eK9@grxzrqu);VwHD2fjYLK}7`n389HDCK&8YlE6)Y|6Re1WOFG*0Ep
z#<iNVP+Y^nUcc;maZN*f{j$r(H4OGkvSCxTFU*ORYTcJ-#<i5Sn?YkQvV^k;>_t{`
z26epvX<cTK)^c@9Ll!ifNxJ?p8!K_3wY{ZMJ)TE3Ps(LB!_7)AP872NFU>3_Zu4O(
zgJ($E8W`*h+F5o6+zHt5`Z6#pI_@-13#=E3cNzNjI#00>J-^b$xZDdJn9aHC$vGkY
zM-$dVJa0d6X-4LG;|alsKaVE2-^aI$JIpUE2{83}m*BwXR|G=#7E?57AZK{IWRZQu
z9XdDU_gn6-boO+!c$%Sa0WyZT#WDn5S>VtM+(z(e@_jTx?6H3@z}5ofJez)f5|s%n
z0USF-EDIlcr%inIuIZJ3zQotzfL@#1>|y}#m)pOU6Y?DH$5of(#!?3tr&p;ZQUWMv
z9|~$a%eybn@*xfDf#{wV2VXS((7aJ&m_ZA~?4OyhKE?B##yvAq+%tWb?@7nydlo*o
z4X(2tZeoR|Rh?!VTHOH_1Fsh+C*%?P{M<YWr)?Oh?!`wk;JEG~!+;{M`kkTlU~T0g
zTGK@jnv&ww8Gl{ceKQSRuhX}7WDMN|8pQe9Va_`gZ^#MksdQruUDy5Y$={&`y5EQL
zcj;bs2TMYKl9*wa9Mz~EXXP|}%uL)3N`|Sxss|lPfMgl^ujSnj+)@&A1x72FZc+7K
z?CP9SL_8q&w&KRuFlyL5;Rt}c-9dZoGiWV6r=E!Cr}SK7sjYJ#fTiy+UKr&+L+2g%
z_56w#uG`!&$EnQGK?f0(ql?uVoD?+3N5~m@&On;m5l)|X*`HCClI0^nEhxn?t<E(|
zu}?!|Xdxy+@=I~E&>)u|!ULsjM_NQ)A}G7^(tWXJKMfx_CB3VFph1x-<!v~VeG;Gl
zR5mE-gaQ|UIj>0XYRv9QC<z6JaAA1(*&vCa7?N-=NnL=&&zVMhJ3hGFfsxkfOT>?6
zFFv5$pXi?qLm85P+&%Ib7N6&GDa{c>=tZlCUNW>^CCwEDJ9tG`NmVzgA)N(d($tN~
zk$Ft;Sm^NIV8x@;p8cd>7;3bW-s)P`(BNssMIUqyZ5cZ#x&A3C4J!W=hHhWCs;CLB
zafa|0d=ABskKMDFe<V|*5TH@UM!DwMDyMh6uB+q7Zn{7!rhP|#2bowieW*y)&R>Ra
z7!*Qu4u!(iY+{ATOy2V@Te4-D#fDR9O2}>!?~B$Gk*P@{i{%c%JJ8?CNGd&G^ZB4U
zG^3Qg%tIF_Cfam_ND#`B%W<vHpNg?2;qaKoV0q4^anF);&SG2&&Dc`Vax&C;EVC5O
z(N>ojj$>FQ<QxbY5hil=OvrZOoOkoB8hyhomkxE!S$09W`4*FtYcYjvo($mi%-yiY
zDyvvyl}yU7K5vc5ZRb2#Y}L#vYpYpox((Mkv|MbLvBh?oSZsJ|RYJ6gwjZr{C(`X2
z+Wo=6%kyyUde&5nF4Nne271HaQpJCM(ttrtKWS*BX?;biVkg-{M<;zadg()}x{WGG
zQTdsvASEAubY=^*qJfnKSEdARw57R4T6n{28M}}i8S6t+v)*4)bxQtF=ZZtrxpdcZ
zGkth`T1Z76`8V6B6th)|%c%h)o^Rx&rN%Q1eC>xy!H8cNa{$BMUuN|n(cvm|f2TYA
z{gZgVmhVIUAwBBf-g1%pV9Yw{Iz<?_E+0626Or|x&FA<Z2NRb_*F1zcKF<%&21Nb;
z219EtA681bvIh6CzCGmIP4K<7tsdwxb&OZ`x>2s)$L~Gu#Z^!JNha;<m|oxwxvTNd
zdyXC=`-<R3Lr)5GZMBr%NhWtvYQEOjjFFCd(VDN*YQ9d?e66WjvXK_CMW6ZqJo-0y
zgCqP%k0(Ie>}ig!ZKT37bYWGD<^DQ)`pR#UP8D?ww&eVJOQ&H``h!adqdX<FT&ITT
zXE?U!pW>>kB$))NQz?`r1aGD8;Nzd6;%yI>`<cc+f7r6qFcU^nO5{xDrPjmCpPVKl
zF=~=RVM5x8>WJ+-#Kn<iIuy-m)gd>fLow+~X)tXXdrGr$``EjV#{di4%3DaGq;~gW
z4OjE3xrr#@<xjpagxW5ll$3-WI*gm)hRr2|yYW2XC6xPBQV2ETG+(86<m$5V)LCfY
zeh`ogzjKjZ3C%^p`CuE3lKd&<MYT0;-cfN;)PipZr!fcoysgsTZGorVjTR2i_K>UZ
z;Pwr*oVJT77U-f5R;AH@IEvCmE)L)Qboxlr*FA_T^8&*FK~gpw&3ramuzP<5!q(mB
z{tFx3+&tkeNz^wL`8bVuY5f8`>7GxGm;^XlY5nsX;SO67HD_|7a2QasOfVI50D^*?
z(S$&dF~2D*KCQN#wNUdqySmj_D@8rzvK)R;^$sAp-_Z84@T|!?9_zL}j}1Qnr~c4L
z_(45kjaKVw&=|G@r=R3SRFQ{1p-a%PM8@lg7Zj#QB1p(@=3#-kr%t_C(||zm{GvPK
zNa5+^s^3Cas646(-og$rV(}pAXL1ar|9#3YGB}lwZ$StegV6U|`FHFNmJ_}Q9U)&2
z&X&J(w-!jF5#|O_4uDM&Y!(}nj84Kilv@%zd2G?3awW!)c!qK*ejygg5IX3C?MZ(g
zrv!GCO3`vu)RH)v<FVY6@o%E^JNNIoK&al##faMfzo&qpW4IpgdK*0^k}m1DqId~x
zNslT1B50My4zHf0RlM-qvXB)8rgCOJL?!?zsf$e);@A56)-_*MI$3wCSbQUpXK_7K
zz({^Wt<$vyqK8RDcdf7Sa@<7!1iI34rt<w+V{Lnlo~j~$Rts|6u}5P$aaOBm`1O)r
ztP1mlJing8^C7X0?Gr-?czy&#6GxTKfHzJbM3nbAdzy@a@jl%DV7KG>VmAK#glzw_
zzxeJ0$K5X#t90mx_g|Nj8<B@ga?Tc_7?g@J((YVx!!Wnk_^TZ!++^;&dO0KoYktgQ
zXA+(8SYts$Cga6;bU*$t9^tU>-$t`B7Bg33f%I@8){j(ulY(-`udV1c5w9xlfjFj=
z8lppF5H9@5jgM3>z{>EcgCfS~>C@y^AyJ3u6Xg72c=8<c)(b}E7RrTM;SeQ~qXL~u
zAN|>Qddt3#;WYZ$hsABkTfsbaZ63F3$WzrOcfu-K*o&vRaQ!~>M}M=YM>WsqA8h{j
z`0)|7Zw~Li=>3E#zVK;qJ1uz~B@qF8y+Cb@Uy(tVN~Jwue+T#>G)h3dV+)V=)x`Z?
zY}$meNJ*!@^!Sc~RL9SqRjdQ6a*Cz&8zsnT!OM<2C(u%iyDE9vo8Ul8-i=^CtXC)X
z9mCY2iYR?7F=fW>oW1=>a1!`_=L22Nq%-on(VX4-3w|;fO@wajJ9-}<FGOI2-z$Pa
zID{6_1>#b_F~)%f)21Mi<j>vKzDKk*VzR@er+OVf4qKl{k**A4oo=^%3WM-hqkGjx
zn;diG8t4W)j_YtBC24w^*P*G8DL{zyaGy&0oPGuAw>jwtl#UABu_4rlAbcL9gLw9I
z0-_SEgU8Wq0V%KqWZenM#s|p2)12L7kS$TSF6hE8+qP}&vTfV8ZQHhO+ji};ZQE5{
z-?`^>$G!chqa#*EtTk3f&fhuac-{#d<AmkJUmn63Qd^*6k*sBmZ1E?BaD`tqGVJ7r
zoFAxYJcgA!Rv+MF;$Q*DbBHd9fGLGBr*E0*`wP`+gu}WPA9b8Eeb{l|lzfkAs=aJr
znO}yK<SoPld0#b~49yMyfHmH}UyvBxAR?jpUfYhXzW8Xl1n>Ux*s_8eSNs@I2y4}b
zN6y7deO<2Qf$3N(xNDaX8LMhum{S~bP`K-c8_K|~|4x6^hS-h1;mS{Pbl-02%JQK5
z;E8469npR;M%Wv&ab=WZa-mZ#_Bl$v#7#e*+vp;8dgtQj?V$GCD7SYR_Um2#sKmul
z^T**9<2e<$eiV0VZ=*n~3x_7992i!(Sfg0r<`&J@wSwgZu2iccMG|GSyv;W$Yj9$3
z53i~z2GB1Wk(KWG(qTXQL;SmCcyT5d7x&c|zaD89Wm~y+1zn42w=`8%(B<D*ORELM
zCkj7B@Km``!6N@!;=EzirO&~?4}gtCjo%R%<+iEd@ikb97FDyo?Z5hASNg?U)roqU
zsVmIdO?o){cOG-!tAO9QG{yD-0*;;Y?*P56w4kHIi;P`+!z3Z=f}O8a3MjCO3X1q5
zKKhc!_ro-$D%233e9%rF^W3n;uk==hCRu~>d5=b6J3EvNU_$+jl?_DkGGEPB%8&$H
zEfRCILBuH$q2UQ`ke&a?r823fC{mDu<$<CA7X$hAnBS0*8)ieHjChI=a0wAXp^2!~
zO0Z!s6+M=l;by88J5E?Bd=0*TjNO;b4u6bGwQ5xMKX=OiVtx0<^mhB!|9o*_edz8+
zyW<zaOlrA!;(ga2L@q1WZk@<mEa{S;4IkplR=P%pdb)6S=L_QFP_3PAn0d*9rOgKj
z2O94De^!lAI&33qmz`=4K<bJ0ll{YIdGk;l0`B>tcK0T!vW=r>o3SS&pFagcJD+Q&
zW-o!SJW}M?yK%X4?Dq$5>bRrsGK<WSQE+^^$UaW2-=HTCv%GRJ4x#W=0p30sJ0HRm
zx5iw7NixCHpP6TuE-@ewR@|h7e0@`RTaa_7U+A!}tp^yYC<#9PmBTj(7s`r~;YUS?
zrHx=|2{;@uPTDP+MlfkL<3<r+)0)RWOagouta?H)zituZ*DbpI$1Ntv$k}{(`XVGr
zZILrAvPRi}bI~O;DvdmT`@G{Ib&`Y_G5LnZ78lWbYk&(WWFP}!+(Qtm0h5IjUVo!D
zfFG&_vjYuR2@sBxlTsw%BOE74;s+L+72tSvI{}F=fFO&-iHh{!GY%tPFuL=t)rl3>
z0hwPOp-5TLH0T*vmIsqP-!l&?xFCdI4WZ5K%jB3i+1g(M<F%2oK74n_RC@3@XuXG>
z2AmyI0cEYKBYdM2MKFB6A3<I4MGJR0eA0Wkp-+>4GTD+8*yRm@XfH{Qa1LZ{(AFU?
z@^AY-@e1B;FPQfG0hdGtUF~d2!xN8rd=Dbmp5@}I`D#?Htj+ZOu##psHEQa0BRIjL
zAHIS<4Ym??0?^E+xiXbhE*vn-D)MceI01R)RmcQGqW+>nAWjbIWFZ7x`ZmsG%;}RV
zz>WSyJR_Q4Q~2n_k<+MAuE0YHKo>6^d+vJiK{hv9I)4d9=@xs~m(@Rl5oK!;X%qu%
zXNTz^wuRfI-)6e=_3@F{*M^GD`iv2c8+DJo?)XA)TH1KFKy*QCV@wO@+HSXR)exQ~
zWBy~-tK<=?Vp2G+idWYCe;dBL0KJb2-|gPl)g6-uic7LO-*HdCRbwH7z1-C@6IrCy
zz#BBf%^CWsn=CDhey*U8WmUkfHfdy_aXI@~&Yi5Qsodj22!^MhC$iT3w!Y>Uxl=3L
zc-o*_hJxJHC8N*kuq4h$eS>4i9n<w(%|g#VQBosB%a5kMf<(UPh%)?Qm|HUX2r_vL
z@j_({qkG9fb2J_!=3Fx)FLieuHExjErQv+-yW2l-5uZN@guVjcTEBRYE;Bqc(*Fkp
z93~Y>J8RB6v<qM@9!>bjp!oDsz`>i;r=9t$;WefKHX^RDmN>v0YsUeD<nJqpmE!-n
zJuQj<a(i5q|K;|eCxxcH#d;TH8^wci5v23k<)u>o2kwa|h&jf#0F_#Hv;@aYKS~Gg
z)El4STw*&aj)|o9vTdjjE5LSX#kIr76FKgmkW?c8Z)no`76a$e&#<KvJ(r`9De-Rq
zb&lcWc7Iy@wRh>y+1i_jp&$vuP)#d?m9H@Ot?Lvcse-m;n^qW?f>;AT224jIFvfR9
zSRD;m-!X-Oj9<GPJZ&(rX)^)Wgnus)r13pkDW+w$-=;Qyn>#pvmw?_XQB7u6twt(>
zDc0kI5;7Q6KrF|PO_tXzh8syIFt3f!95IXZ@mvG=CGy4bJN2wDOE!B5o5fq|Gx-s8
z((KHCJi#}_&1LMo%O7S!-Jz05)w#ve`f+^c0JI6J;Z9`yx3#`ij%`uCRgP@iPCdGp
zLzO*%wchW$o>XjRHD}v-c#mtQnV-q~HusGL4qfV|XWfnT_(R#>g~*ui#s_7OjT@)o
zv<m>(<*o75HcvuZTnjZ>{j;+VBTC2J2>6DId<L*Eg+j?0DT*_B0*0hLigrCvPzM;U
zZte-aVy_^N&1LfXNNH&|zII7*oDpw8ZeUWVr(yTk7!r*?$o|{Hf!QI6S^BnZ%=ZNU
zTDY)YYGIh|y;gJ=J|}b^D9)%QkgsI$^}5`m%MLv6BQk`VaKX-{n}cy0T*{zp+EbRB
zy!N-YA5dQ3OZfc<B3D;&>6seOo-_wBDhnMl-c>aAU#O};b%fR3Zgx2d<G)TKCoim-
zUpMWq=>*;>T`3+0S)YOG%7iC!^3D2Z4^)k=0bGVEK2jWEU(lh0ACCn43Tw9-S#-9+
z3&h}E)zA03bRO;69+F_*Rq65XsgD1Du-g%?8L$kXe0;#fEtfre-9^e{U6Rp;%MfT(
zN&P>!AdRAoy$}&M3squrv12kih}FVdh-ZlfGQ-wg`nlLg(S_o+g{d@qM|Kje%2Y-g
z{G9SeAl*Une!5i;g08h0h^2O#qYu+*D2Uj2bAK{_J|ve!=IH;xvU`Rn#Ga7uA-Yvb
z0=4yL#LCSW9QTw9;_>Eq8CJo;w7D{_CMIx|W3I!u_mGfMwyzr$to;>Jn$l@m4H35L
zS%yChC!VN@4W%08mKW8k`y27X`XXGqhe_t-g!T0chtnwYXc)9MXl?vaw$}kLf<{T5
zpk1GHR%r6GQx4cD*@A8cyFMN$O*&IesZ(x-O%Q@Abo4fEs#NiR<_85CdGF7$#fQ#G
zR|$*-Bn90`8P>?ja9E0@`R8yUN`6n7f#hrxOC^eivC4eemjMZK+w8tw7iezGylwHX
zA$76)@kHO+%=fuM_yhJ&?}qaBT5|EE&$85WeuuwWii2WljkaS3{2bC{C*7A9*Bf^y
z7LB*<j2GK>2&L#}_B2G-FXZXDcygeDDz;q{p)^-xbW3z^QMd(*YZol1VjKm`ed72z
zN}$@w$ZjdheO3r-j^^5CF$c-lB_MZppSP8~v5;fAe|fY50aql^d*Gpy+|f3a3tj=~
zd3OzvV<d`e1BwfIpw@WLnz<#hMFSdy`no9^uFI}wfR>ba><&tUhqszQmWDAjW;-a5
ztwy@IsiK#n)<16UyXCzM!+nUTL9Zt}O(V=n4Yph7aic5~;d`8UwCZ$~bIiM4C#Vu#
z^WHM@qsg+_DfpHcRM!<-24^m3w7&T;8c3or;6t1?cHE9C<X#|@0rkTU2gBZqxebz*
z>z=ZUVCHSm9@l2%4P;c^?M%8x1*h^<QY3MPanUHI(iq+tDG>ZV10z^nw7<bk?C*S<
zT{s+@q*vpRZ?!#aRe}83ZlF;Sc-vvpkvVI4cm;_MU|}*wqtM4(i=3Fd(m$I`F6>N9
zKlIxGM@DV4WEyip2XZ4Rx-!yXCsDPfF4H?|i_BO2V2(V4f%cH^zCs1sicZ%<6qPDQ
z8BTFJH}yhH)p?3)Wv+#hOsayBWJ1|?^*9UV;>}bN>JX)NFEDQ%v^gkeKyIPQ|7f$o
zIdN|(*0`h@{A7;dvK6bpYBMqpNoM1BL|GmisqV(3wr_`wN2$U46LB^O$(2H03@JqW
zbu5BpqL@Daf^z3W%_p^B8GvN%(pQ5Z3ttNpOvdCm;RKIS3w3&r;@UF_B%v{M4Ey5t
zo{+W;`$i=g&L^~Dblk5B$wEieqNd}*qbbo)U}p5OD~9JhJ2_yJVNlLOriK|RQ3_$_
zj?$EUBN(8-$T{Fap69tx^BrC5m*gi;Lqc{6iE;2Cv?~f1JUU4@rRBR(BPlK*zhW=c
zP~})qb~Vl`un`^?rEy`BsG92@{4pVYAZ*``1f2uP?5WN5sWyKP{T@_m>mWV=MmNF@
zZ7K{%2!5Dso2GINX@|inN7IfrzRKGQ-t=u&ubqGQ(}AW1yHXhy!~!F?TV=O>v2V*b
zwV!SO!@)zzff=Zd)8>|meQzt;?ZAO<_24vT13k_!I8{wrg72v(FDzY72w?E?KLRWn
zG2pZseS*b&c<#G$OPJoebuV9YF3?)S718!kxYhHnet}mxPrhIlkFyUT!&;!PH_>Dm
z<rjAaMi{;vf&3!R@Wmq`i)$}6X>T_LsN+8AqERdu6R8{Wv;4a{SO^Hdm+rXs1>62M
zfN)mOz!8=Qj-L=4=Xb<bbgYQLIk^lt>q3Yq02%F3`;GGi1sZG=V(5e^I3Q0eW?a6e
zA9K74?&BQwz<Z=^&VT2Xl^0=#pZQ2yovpEqH$G_MU+9!U{}1OnX^vh<ufq{lCBOqi
z`&zhmj&F@lMQeqDd-^j+M`nWqd!-!Hm;@9#FuJ*j8*vAu`COw2?~b@d)(h5Fh0_YY
zKh<Kka0^@&jVpx{x5q$@rq)3gU3nd6f-wGfo=(pN&o6?LCd*gKKMQnBUoL8z&uS<U
zFqVi3{vO?2r*T-;up(27=Y~Ci{Y4FUs#6Su*4yk4Vg-GF97E=XGK>E<uN{Z-GL=0g
z0${c|j$vemb+J6esDhDXB0{Cm-wvY*0IyoM>)mBWKlv?!Au-`U^pb$`Xwptxnj@l;
zYnO#2;d`d-44>3EgR?0Ew~a^ipOPTb^@6~3IK;yFZOEUspqyAkBK1OZ4h+eGPX7R-
zXQ7i3^Hh=SRQ%<*#YgK$;`5{}mC6yuIg03(!&cd2SO8U}$1?&9VfHFe!Ixxg|N6B=
zH7-6G9cG)To^$V!49TJ6Z@@gBL~N7Y2}1Gq|BYBx@9o<?`0Eh?dToRoK+9M~+dz5G
z1x<IT)geyz81G{tZ$|FO!0nU|o$it27klv2PvIlz&0yif%{2c~+adITON&vXH!Q7f
zrk#1FXJ<uEf^<W%d+TsYbti~=sJSo&%L?PN-azA~77I%Yo?2((SDSH)L9br}T!gMy
z=?7E`0n+7=bfa*pofj8)^<8HS(UP2nCx*-NYW&9cG>-8qul&^Kl|1SB)qSe$Q(8K_
zyr&2#dmgUH9NC9C22PePi%%%D>>~ve%NkcVcPG%N4~M?%ZqJrc7kX#(dHaapmvlgf
zUo3@JkYO?Pq=R&XokNr-byV<!G#OOuri)=dwsRl~TRL=p8#9nAmD{szZjsWQm0=Mt
zBUgCtw(Ht;-F@l|bWhMFGQ4{dq<T3|<{Ts#Hr8$)wRgjXef)3=4I->}JD?JnD!)90
z3_Zh*)0G%lbVL4CLtri#a$+%b;K*X+SHmrMD3HTVQsE8<#k=w&g7N}Qu%H-mQH<<r
z1Dn~C`s$x(Z3xi-)AfUtHA>HZ+n64*gm}v$Gv0poSL4Rqqq`#Z-YpTcK-}{yWPd_n
zoN7uDBh;o}=N8TqxtW--CIri^pOjFT{_UUTZ8r}%^4cz=5CIG&zB)lK>M{)+>JXYO
z%QTfx_c&@~(OXQzWriZ#5r_H6ia3*XHo0VRdW%wAS0+U6Cjz8x23n%~^q9l!!X%55
zr50EJyE$#Qj~SreR(FZ}Ccf}8CFDfE>X(3q2@M4d`?QrGrW_v?<-_rnzgS19ZLW<r
zOC)K6gtmq>EvIA~hy^O63JTsp!0@>NKXkQ7m>g>Ys%m-<THA3*yA<i5ky8eNk2_Uy
zdy$qmo!zXoFvXA+SgW#>xzsZJf_ias@$USl!+iOeMH<TAIy#~3CR&9>z@JRs4}Uoo
zHQ!(U)J8f_Q)d^Nn8GcTZUha*%MQXQsPfMwt|>jG=_K9N#r!jS0$bkcUSSP8zK~21
zu7o5qB+R0y1SJDHCgwQ5!<C4^XrDO`$}s^MimM00w23jO95{5`%$UiHq(WARYRj~_
zVO%p^!!a>LtDcPEi6n{%n9)T<SA$<y&XDL`!AC_`ql>QkQFwX4EyPI7v{S2zEn%J4
zeQkXD8#xkb(ciAySp>qq5937PgkR1kzjS;Pri}*zjHuAi2(vAdqCQ@eLfAchz()SA
zE^!G#(}zk}LZN!$d&HCU)o!_@KN^fr4L@j&$A6(W_SlNy9g7HG%BF;g*}3aSpC2fw
ze$Wo897r$3Twa97nosH=W-PYVhJr>LI<aiW)hpMVY*2PFelxBS`!|dlZ>M6?D2GK%
zKv1NTgUkDVYlN^6%fxCTN;)&j*d~d;zswW$j0&4h`s@SstTO34>tC*BQEm8bTM~g|
zC1N^kyFF}xIz-Syo;OvtfzUF?ETkr^N?QF%7?-HNv$Vf1Kf+20$10k;x3s+O0x1$%
zBq?4})pU>)qCB2q<Ejyi%_>QUyX8Dj(suEpU~0Y_<V;yuqCmo9ReA=cxCt{I#_aT5
zM?+d-N?Kx+cHKiTor_TjBvw*nZ=Z>ge!9vZjq~YG@-+2XomUhuR@q36nS2fwLkpqt
zpj(l}B=jg5(p;EV_cS6$BqQ^DZzB2AlsFwiGiRy_-~Q@>lI)3dDH3MX=G{opy&3ja
za~Q-T{CLCq@yM24dK@o4bwW{oOe|0z%%RFX>?5NL)*_=rG*Cu#cD7V>XoGXFtAHO4
zVM%UyU*Edf!*#vyt=#eCnJvT;R01^5=YNcTXsUdvOA6~ii6T|yy?7}KK2@nwz1Yc4
zv3CO`cT@9!%Fs|R?ojXrS_hd6BBBDJstyDph^2a)i#MjCT;BOxR#_a48WEU?{IX+c
z>KaKSW*Ph8;Dk`2xO|@FW!OADJ{S#)k^%5S+2@|^{pKSI`CS8k{a+*^``mAgA*K9k
z(%(2#|1BP3dZRBzr?(ATZD`P|<vP!7R1K0-*ysEW-1R4NdE-_n?g)Uz3{}CHKH&S)
z#~KevR+T>81HCTqV{Lcz8}9$sLxrLG|ItGaUOYm8n~j*!z_EX~LEK$iT0xHCc!hd}
zuv9}}aP_@|t%F>$lp@lgUx2X@-vL{Kb{|?2h<OyM@z-qRL&cX8`M^Vw(DhEQb)#)g
z(p)`cW4{;r58(k#{SzF0L6WdF4z1)Iu#z@GNm}{GtAR*P_9m6X;w};*gb8;Q_#D>e
z*g@V*Xhj9~8F%fro*`=d*vV(010Og>!)^mq$-mAUJo|OKg<w_v+2-}U+TLO8>>vSZ
z`q+|TU}?c%XE;}{VO!wX+p(>5ol^&g+Nt`6l2ZnUaIEp|?Zr0d9TDt3sruu<Psdbf
z`@%5D5<Igq+(#*f{DwyQAm8GnMdbEyDKiG!%FXt`5Oq)O3tSI6P&O&qEg*07G`Be=
zGUd4UM1m%i#iV!3YCy4t55mr*FeSMK=JK3Nhr$&eUyDr%e%Q@wcUCok9#SD+Cqv~L
z{NK~<d>G<#0r^~w@c~~2G5i(#Q55=@I3;>AVbHUF-24(jvn22Nz`8H2DYD?ZUvv>U
z%|NUhMp%vjSdRFZk3yK%4Aa#r*s-=sFa4!l$V*Q6(k=loh)*GVjHHBs=!pkjs!0GP
zG0aG?@D*^86S)6SJn)PIc_o3l8^Liln5mQQjsaOGfms;EGBucWfeet7b&Vlv8Ua%8
zpY$~euzwtN5egT7b2Y`mD1|jLaz$K)EmW46r(gvrB^`=@RsX?Ef<kLTCLjCGP~BW*
zKkjUjqMoDl<3m7(XRvf|&K#*Gdc`x?N+U^jJrVdm4EPeF=T$+^0Y4`L^CN@&tmnHC
z+T(ei%O*b@1!_S8d0*9Yxdh~t=aPeMl0j7>lfQ+u<5<Jbh7OW}<}w1Atsgor+FmsU
zcD$7P`-cB_a?d^m@POKXXnb?W(ZEh8h^ZFZH4(`v(LIi>-;sRtPxtN75)|7lR~R}D
zJz1+gD62H^RDNdww(%k8v6>(FLI`z|d&XO6)m<X2I%%D~c-exZy$mWonP-q<Pp~e{
z5fE#-yfmm-BJ6Au0Mi#`A4zM{kU=$in-N!sP%I6{P(mj^^;b$*F_5Dxe14HZ@-5+(
zGJ-_B$uU0_#VPTnURQ5zu$lu6yyOuJLCRqc#je<hLxu)h9VJ(Iz$*c0PS%c%pF|}X
zB@NEr%aOk%)>s0!(HRDLv1$<J(EwU=46xrkeF!6mqMwL6g9vxEWOdVIb}~?83@6O4
z=j%C20Xfmq1FAr)I4I+9dU=e{0h(XJK85nE_TUa|!_ZdM0&OEd?GYa>L=)otqFmcU
z0o~}Wpsp9{ZHY0y^h`rs2|zEv#)0qGnv%{>fe}1J&@kPZ(NK<<)Ues(w42pb3Sy;{
zfKRrN9b+ppCt`#HmE=1v6)%+8lO#~soX<!Q{y+R`55RvFwzu3NWTUBk&sKKawE-95
z3!yM|p<ckWp_dH!$aP)%EtNETv9R?t)q9*1iJ%$$o5mNbn^SUQyGOW&?V~s3iXn;l
z9mO|<z*A(Usre$fbb^EY08ab&ieXL-OQj-!@(_M9Gs|qbS48FMilx}Vbl+J#2~~O`
zEn-9|lP8Gcn{kLcy6bxh-JHeuj$0JTD|<5K_gbzJ$xCgD9)M^<MQE$~$*y72dET8X
z8!e7;`d58n)dQgjh6qb-T*MA8Wt0=vT<i8;3Bq92*cc3!x*~w4LkBA|7NPf>Yb3kU
z37RH5^q66N8}R)cVSS#!LV9}>XRdZ`%HD~vwU>D|llfcwer&W-VtDF0gX<?K;izeJ
zk}Y9*yO-RSOAa2(99koaD&Fq0>Z0vj#xw^(6-`09)z~s+5@)+DCNgA7?yoW2k23t|
zH3JaIG)9KTGG!IK^+*-xnXUrL-Uwt$Yl7LNyr%7S29s{}6Ht|sgu_K~2>mNZ8oTGe
z{_jiqI-tqF%P|$P3@imOg9d`F@wgGRDJKgYVpY=xHX|6_>gFXgaB6z!xy`WAghHWp
zRwOpI@r?HXY(PBE!kq)l7e`j)Wlh+QCS%FqL?rdTV<ciOsGe~4-$nzr$CF2BCGEEn
z=dCxM@O<~?Z{&-ROKVj)nGT6@sz(tQjM+raDNbwgrG6+$WfCt`Y|m*c3hb5#uB%xO
z%XJ^{X0JV}wBnowLg06LX<vk(CdoTXxAHnv-IJAVvglc!?Vj$x+d@MiZ_b)+O=ET7
z_nJ?RyqgRFZ`Bpxch(DlMAVip{brc!54K@%Rh7t(?c|H&X1*=f{tT+#5Uh#*H_)wa
zd%^a8C)-UqkuABFP|3G^X!!jTK!H!_*V(mn==Cfhq|(k9p|0aTB;yw;A!Ds*DhWX&
z6J%f7jljpgDMXx{L9!#zSDY8+zwA5sJ&yQ2MC3Q7?3L8jW9r4Y(AEM1bZ52+AJIy%
zdS^MLFgB#tfHh~Zq%44`%{?SUF(fS49MSX1ih@*<y`iM!RCh!;A<Bs|Ku`Pj2F7V>
z+-)Vciv*a3<XCm{9xQ6L`h5>)L}b0_1063=?lMu$gQx&q_kCj+dTXX`PK(7sUz&Nc
zVFk4<zPh>*5GzT%iA=(h;N>V1>64ZT4>T8O3`9W}vsbJL#y(ItLD!94NFBW)t^*Q6
zwi)__nAn&J^D?@FOnbXtqDyxp8OgIOd$K3-bzKIrLD!o1Vri4zd5H1MLYnJu#7$02
zYW0vb?~Sj-LeFg;s{W#MhZMKu5yeV}em*v`{#1_0YVYniASu2)qW(v0XXJC<(xU#C
zCLTYJ^!mBJVX_#jrA)}i*;b9zl3K_uxp=bFd+esB>Q7K_;0xL60JGGUT#Z!?v(NcA
zz9y{mkn-{6recEQ(8U}4N=TfNe+=PhiLsSb3y-}a!lK4-4`*<Kl68smI<zQ^Fw$Tm
zHu<_cXKy{nQU>|g<JJF}9@?QKh7z4n^SZ}x_8uLs(Jp#fL9d-+Sp4}X!_nTBq_JOt
zK1T<6womy}Z0*aY!=(#8r_AbcSETso+!x0=BO1J>5e?k1O5W~t5u}1_ZiLk3?j{s`
z0~N9c&boFQGmzC8No8FbtphYXZ}K;qH5arsi_ft;4^O`FX6{c%RhN#F`Y@2Tinrxy
zLkSQalinocr6HouvCXY9me<<F-Xx-3!!fMm%cY(=<)c&}z2*8qrvjic#HJdgR29l>
zMgSJmZ{Ocwl4qj-^=;nR)HIqIuL#=<6)LjoMJnTXY!N_YLvO|hbt=6RFuNP{)CvV5
zVX~LuwCWRk4x@cZ3)6Xoo6$zk6l)bjGW@Lpb5s2|NfU5EkLW}J21ot335ZFT<l<zw
zkz^_a&@zOy$}y7E<Bt?Xiv7ZhNyq%#*D?Ag9`+D<IcmblvZ`?~$dlH(Z?;gYF~P85
z0Jd$q5c!K+jd`TDlZVYlJ}PZ@nng1S3u_(PyD1c^L-2jSG$1%A?qp0pmHSuVvEnXm
z2?lo|?R90qu!ce8F%qA51(o<+eRWnk>Gb#W<Zv?8QVsL@yQ6E;Pgf045Q^n@S_vRT
zv~4q6%~k6S9YctJ8_ENqT~5Vr>U#{l=S-~)=W{5L&@}Pz!GoNvdbJ5*VhdvKr|Ejb
zMYpXG)-jFKv*-fct=;3m6Y|~zv$@=bY%!||nZ@x}m{u^>bdoOUM5$6q+cVsFPWOrt
zy!_U1#(+?gulU?+xJ(jXWa$kOGKDXw;vxAiGcbinGiPZGB0hNg8=1JCUj%D-UV>tJ
z!-m$#i5nQhJL&km!N|$^>{MI31JlUqZN<@9T^)h3<NbZ?G_An?fZh=})<@Z`Y2Stk
zgc1kuPIXX9m4<$yjO|X3RL>e#&mIz82{|EZ9pIlE)1uq#TI$-PKkkU#=orV(@34GS
zV)W|gd@n}q^%|Qmt0>;bTuENm7w2l<DW&W{25&Riv`ydyUm1~&UDfch>q6;=CXl|A
z*AHquVXn8!g|9z#D^{2jsZa;B_8PCl4u2u(0m#(IYj00m(+FTmrAf=dPLxYwx$MYK
zU3?#8AuJ+CkZY}D7Qk%h_0#HaSQzkQTrXJgDqpFy7_W@Km^($7Nx`ghAOy|b(IKVJ
zD+7ultobduI(4&T5^7J|>ybKuzCx|3p28E4khXry8Z>Xq;ize!sDEjn0jP_^dI|ZH
z7%id*r55@G{If|s6Y7S~QJ4%9c;f8wU#!`+O<nwcNzu1l-&@ahV%>wONWfHo=B}TD
zygM{CGhS5JvGcs48zw#Ch9|U>d_*_8Jf3C)pSa1Y6}+v@`r%jpkairpI(V(Rvd7V2
z9#UK$f|}Eve}(D2u}gnoVCzO#LtYvd`XgBHW9=g2_0aJjs=<>$QipYfRE8x9S3UQ9
zF}_uIeNnyxjantM`1ml0J>$uEY6SQi0=^#jD&QsVb<r2c2yAC$?5a+PtC0sfyO6Bh
zA={CGaP#^;8rK=LEIGBbImL<{Zr`uaYkF~v58QSqdmcbB8NGmEFnW>^FnR{UloZU1
zkeiNBu>QAVqnXhESzA;zA8Ot+y`F3_e0{$PlMl<Z3hhjB4@U^=gvF>g>KD?fB`{|?
zsPPsWT5PjK+>umgZm0=KdSFCwz4~3vKLpybeks*v%#H?T@8n=Gp>OIUHm6J>UUatd
zN|oa#T*RsHq?;v|d{IQpx>i<cvBlt)RD<7e-W$oVenv$};mkmLU!NqIR;L*SP`m3}
zt*Q|vwA?9w=Wa_`ay(SsXs_p7%xPyp%xQ%UQMBD`z&ky0L7q~&?c#3_&QU>lbF7f7
zH_=|3B>eD1Z2h|m^yOOe!@z$%e%|}7UJm50K^%TmQ%V^=7Vi;Bsndf&i<tm^aOGc6
zLp_9Qyi1oJ66KlnsVTOkN^R02TYfGfP;EZUpsMBIE=<&aF2Oz)-AvpLTfhIhmf2ug
zB)|1IAglkkn*u|4y9}^&(*Nfk(Ui}A{Ii;Th`|+$fE>=Yi!5+%N4pgnsI8UK;OmP8
zftM%q{7za8yDSonz|J04Xx(ZUU`^2Wl>hpMI@PbO`#(?Xpp%BHkf<<F5qGbxO}N+5
zB{}Nuk(luGPEL9ORZ`f-k`s5g<@+rH{{JhyQ&QYKDJyJWR2FrusQw>|0RR38Q&!Z)
zmX~n9?f1Lt|EqwLth8iRL(jf5F4x~xmJg)tn_S}KF3;zs)_v#JTJj!RtLpob{IR30
z`$JX!P>Zs&qV#k7bbiz0@Y8v5@uPqK*-3GD(qr``7#R0@-+j|#bJJ;g;XO6RY<a)M
zvdlY${*6}E`!Vr#)8qD|%wLv52;QEaFvoF7z@~_=G1_uzGAVpvJ#xF0Iq!{CIIsO;
zru5JJpjislu<5R2`m`r{{<OxE8Fs<kQ2JRvG`(oAXI;%{XGqOyg$q^m?bJYeBof1E
zuf~zO!`6Vh!x9&2R6B%z)0x$<t((V`>6)=<uci@$jsw$^Pz~_CPZm^*`QzebCgdvn
zY30CO3GS<%Yta$z8jUQ1&W>&x*n6|PqZ~Si;EZo`m#ZuLh$|};0ar&C6!t5YGkO8&
zB3|-Gge$By48iL0O>!Qfqk)F8=`JhOKQ|HXW<Ka?&d^#HCKNVWLu&qronss&d=%+D
zgtpU{5BShEB8E+OD#r#@mI>NUWM+aQA;hkMiVGX9&G({o{AK<R&hh)pfMDJ26zBiq
z1!(Q;(5EJ<Lo?4ct@Ajy&vtd)_KG=YC0wbc)=>=Fa#_%p+SZEr=Py-C)F=Y28-T#`
zNgQqL-4ZnMh<txPPJ1`PUoTlZIaZW@Ut=#zvr-^+1thRBqbnUWWFTzMWZ*j^mJ1LC
zSfma>tpN*QI_GyQhbz#caq2r>=B(L8^0pgq4Qd=GVY52dLQ-7(#*qWS-7~&(TJ*&_
z(D24vU!CCwL-~#?eN5GM2_l$zCS`L-+1dd9aUSpkPkm3Ftyqoj@e|GOu|!b?r-GT_
zW1Kk4egVD?rWA$7yaUri-vve~_XfI0q6HF`Y!=_9S-g7u)xl#zat%z-pCUK22}Q&&
z9{YFb08KcFH2Ft_K+U|3>^5w796UN6{kXf@t&RX4-zq1JayZZ%wexBpqbVgvQFiD-
z(W3@Jx+qdOSbmPe1q~E2`0vIZ)t{ap3~i_QXQv47iXgBNsrP&^AKqM^*F9K(&7a<H
ztB&H>Vj+2#iN#N*2wJb*3__~%D`6L7as&u6l4sN;2bDPc9iwo~cQs8yT9wfjF2Tyj
z`dVh$K~WiTJWNV6IioN)tQn)+p%Y$|96F*z0)otkh{qbzA*68Vy`qpm*L$XcS$+Zn
zghHIo{OUJ%{YQJu&GAx1`e&XH9wTQ#I-vwK`j%odj_paPZdA~N8-sJ^#NlhI$W{?3
z+9b+bMIlZlbsPp_kf94v^YQvoj6idH+xd}4m<pl(i}s!UX|}5eG@WfujFB)$mj4R_
zu-;bwVt~n%B}5dQQ(I%==%DT*O6EW#ewJqHe_V&nK|p;8mb>(o1s!GJoF7rfaw|)Y
zMcSb23%ZX{!6&6tOsK<?MOT#sKqV~2Zf-s`xM|`xR`+q{BTOoGX(euz{9O|7HK2MZ
z^JLLSw~e+pbnY63dGn4W7LQsXo*lorasge$G6Zg$>9(*!wx@hen*<`^+{g;i>SLl3
zFb8i4SM~u&Ct38Di6Yl%{&Jqa>9ZX3PxX)CAsKCC0)+C4FG6y}7WX<9YOAQS%L(Zr
zoS%c`Tk~QIgy{cd&il`(p(Uy?*s!4)NGIZ)NF&$W81Z&aXGG+t^A=t>XpGe|=-ZwC
zJ$&!znR&w&Oq@meGY!6Y(}D)UX>3UBmq=|!k=aZPIT$yE75hCt&ldUyu4BwNK=wg<
zVO0Ax=@CJOE9Kql2|hEA!b_2_j^m3Xk<4i^s64LBFR0dta71UhVLy#EnmV8I3($ux
zl-}w78#*-jF%Sd(m$x8%+gu(`)~4r9yRSk9r20^pDSjQ*r0(;sXw`O7M>DX`&W6Jj
zK${Kvma#39hS9JPm<O#Bq4^IiP8z``894eXO=)4Ys9`ECYeC<9oMA5=#!!JFN_@#c
z%3vsgku>zoP{9*7J8Qo5WPUu|4IWOU#^r||ExQPMRQ}y=F;|pO5?VAsXmPI$E98LO
znB8EX$G%2tS@`2iXO(L6^+iL&S#S<z8#n@@wG-_MZ8#W78&V?QK}hao$X$DHTA%(O
z#)N~GMHkjA(p|<IYKC6dsv%P22#qSM_OnZFi_6*waGXnm1dT*^1~FnIA|#$Xbq~&M
z)15{#{vJq<7n?j3*Y=>a8yE>~*>#h)kT|V;CODWjZE$`al6@T#@aEqB`+sKi89%tk
zvLAr3wsyb}zs!ygdOU7o`Gk70wtV#qGc(B+5m5{M%PNcA$gk+?ZnhuPsX*;p-FG+e
z{}_w}XEi`rVz9)(<BB0xuDfZkP>lVhC0Fb+=5Z`dK}Lp8s!(^DjhgmOL(JT88!n4w
znhF$YjTKfiUcOUny>hpD&7U{L!wkn?C99gZ(1yVrbb-H17Z9UT`h+;(4<%vL5$(#O
z7rvKCBxJ)wP4hfojAW7ibLpj@j|67JE_DIBso=)be%?wS!5SF8qwm&94`8!_BNRvw
zt6%Ju>)=hGi+rF7?{g>C`gctu$I1}|>>4(Oa~_6cADoq4^V?(MeF*6<kQ12vNRTuF
z=O5OR#J0uVtOc5IMLsL*LU5#mL@vR0WK}y+#xV36t+-{&e4#kiOcFpU8jmeH+IRV%
z!+Aj7db#6oZr+0OjOW$!yPq2hwZG>y99y}C_il}4ItKFNmL`oVN5j`KM87EqXg;26
zP=`Y#_U9fcE4AaDDv?-tD_#c&M%tr#chP*^(bT}9V)5|@kmO5`ee>eqHBZ9QF_V?X
zbS6%z<m(rRgG(YYl2H!FBqNO@>(9qDPs@AiG-LKOqhMxqn|OYi7cWU3;#p;ho)Ke6
zHdIeZ)*y%%E~p0J{Vj;?WwAuPzq5!}kN(m*xIBfhpXQ99us?P`fm<;rk!!CHNY~W4
zU&F3=X~#b0Y9z+EUA8SzrnVR&ee(&_n8{xe^heP>Z&tKxHfbA{pGz^daYonB%hHHw
zWaZ>lf)ED2veiGV8PWA=?hIO~+**xKM8T>~$YY#7)_cMeDo$Dj7XsS$UC#vAH7EGz
zuZdWAXkh@91EsK#T)CyPiJHL~IE2RRU&w<GZ)vSMx6G|_w2P3T<RmyKr<dR0jXd4P
zzv<~FKfkl4^oP}YIUTIAnk<Lq7KxntC^VPD3)mfXX$`YXzK?9c9*Xx~zu|Gdv3Pj$
zFgLf?k=wFr(*BCOy91GJz-XPurnP8V!Mm!5N5u-}aO_8bKHz&J;AiIf7Dbplsrw70
zNhl;+(bc1Dxi}T&5l4nBGWAJ5g;4F(YTNcFe>5OOw=Bo+S)(l6MT!yV?aivwL8QT^
zp2*V9E*{T@lQ7VppUWP1(;mNOLX8;><7(He>;8L&-wIPYr~(O5deyD}zh3$_roW#N
zMG1{-rZV%g28a5~Q29XHQTTGEBWf(`j4U7i%rA!x1Ab~}Wj^QEJ%uBmWe6^xxoniu
zgZr{{s9G~qfZu_9Rq7yZ%k3N&00KD$;*6sB#3d{SOkd0?_*20rQgZZoF)pL)k{d)f
zzhOe9AnW|;>qW@qeP?U)^ah?sI7N{}qk?85&%IGzfDlWp+r<@a1S#r6w40QmCh+pZ
zLeq`<jhavEr(=qfW^k_S>41TggpZW3VUee5mY_Z1T;00SpML0!G())i%my(55$HJq
z7c%rvYU6`B82_)q|9YZ03`q=Gy-6Hf_R&cD80e6>7&PgMy5+|`z>qtXwuG`brm=os
zs%%T`M-TkMGGDOOfMtFovQGAsPIU>|iaO`GL@Wu=Vq8h_lEC#lyQ%5%{e)~W`XOYJ
z3o876kzvy8kL{B<Q#Rdd69z*8S0rn_&Nsd_Yk+^tRoT8^V_l?(m*+(Es7;N)BVYAQ
zpMB|<o(4}t{hZ#mzce6?`x>s_1)nZL!D<D%`EL8}9aG=?ZXI*9fX7%B;(}AF!L=Tj
zqIkrGN=MD8asA!!A@%iPRQGF~$F~~i{i$$5`s%pl`)HTxk_ybgmiv{?OOt(=ZgL4Q
zv`?X0A>Z{ivKw#t|HJ`;Cw1>&RMmz6Imb||lB<oA;jY(YZyO(IA3L*+^*<ontG)W;
z&^SFw+o8UfuLaIamM@_&9K=+;a{nU7xg{w6X9%7<y$FK!ofUIu!G}?!t_);$%i!+y
zdUyBZ<z_g%w++viR>o8p?pib!KL`8uhihBb^qU9Oz0b`v=8LZAF->NBIHMFmp7s~R
z;sb!;<af;p#<i^;EnM1jyd1PM{PdC|h!y7MZ^7+z9)$LlRYBb2Y-hP{<UVI*#Pwq%
zK1Jxa9D^UNl#WugT7l>vU!V(9C4h^^B_?bcyeH$)&lf#qkNGH6WV6cXvEdRd25LvV
zhko-yS(Ng&!(>j7j0|r%vBo}1Ie7{3)>n?dY9g%d&w=UN^1OJLe6@o(kUu&y;?|e&
zer)u<{AXxcKR~2aF)amxnKYqnhn*x|8upuJy@CbT^uY5t1zVL1q(xu=2dwGyVDUQ;
z`-C^(fY{9Lt(S(|9Ngc1*B9^&1;uxKZ7u#J(V<?!$%UYu2Z?t<s+YRwAe%zr_OvqA
z>A~7-au(z#x7*fQ#!z*6JNJR=c(5F(i{2!pQZZQ(ByDxo&l_DWT(R^Vxn5kr8tXG4
zyXsQ+$ECFhV>*hta4%=5CTQdIUxG%p%Eg&V)6augqN?5|z~NJOjEmU(2FUM**d9oS
zhhle{pLeX;Xo3PZV7KYrUZ;f(OE(s>RccF0b?l=@W~;}Fsr3&$bwN-euy29==cUKu
zWsAS;dUcHCNZ(6zN)BkSXywQK+QyChut=ARCfe0<a<=JBUZl2P1!=B#hQ$xrEmYkO
z#7#_3g=hc_uVIZNq?eH4DtL#Az=Es{R1>r6Ow1(NNai1fh&6x{`AF_*5HE%CIjHE?
zNZWt|@f719Hb^1@4-t1UKWQjd3{zm%%wVEAlSI5aCS4gwpXdBAfj(p_sS`B66xRtE
z)W^Gpg+Hh;s9Ak<dp`Hjl9*B!P@tI4_wFtAq2dw2`p&23z?C>s|JFV+TAv?6ZxjIK
z)*bdW0a5_Ph+8PS1_`e%y$?4E`eIw-@xs5wCSFuh-qF3?soKi|X=B&?FVlwOXNBQA
z3j7{i@-a32a47unn!|4JFKvED`yJ!BtlVe^IG?gG`43=gZSyu{inSAxr{Td-R1irM
z&CgIJPyF(Unck2-3}fz>ioHO6M_)%nLU~La>_zHsqrPbljRW%oyxfsn44oQX7+lp1
zpv5M#jd-%P;WSl_g>onM%0=qSCDJPH?v7bml`#~XYmgL_>61V5?U6DoAj@ZXp1`=t
zXqBTCST^P9XqB@qhV)}3oh7*a$M+Tb`ErdoaxuF{MyGxQ1oftAk?7j%aERFlWf`qh
z0tuVE-l!KHLiolGIg7ilyr$SH+FzOQ#l?y<^Xi$DIE9r}Cc;Ykm7<rT;?7K4(>aK(
z9l0oKOMCN6W4-n|(D93W4ap>u&8pkbRT`eWNN(VLuzFO!YJETOc!6Raf#-GQ5w<_<
z798}qkIok0dCoKgo!OI8cv8NGBA)dl5KHJG?GdY^rMJ3<NjrLQGi=u7$om!U3EzIZ
zBf<rQwhZciTiY&oW5_#Jm#AL4YmN+j>GjQ?nN-2Sv=!AO;`MSIRL9j;^>QIE2L|~r
zpbavaRGA>4mxpu@F0?!DrYHV@bbByGoJo3u5RaIH$?BbQK}N7WK%K)rTh;_sWWPs~
z-Fk-OrY1^25J&!W90{xfU@LN}1yWviPWPymrCBI2_#55ns?rR_Df>NE<kGJ9VJ2(H
z7LdeIe^=TQM5z-`P(HHQ2Sp@R5jxb@!hT3(rO}a~Ac~=WEa_o#R>~Oek9Y|yVZAuF
z1_zZgFlhIrC`sU}f3DD=As##Iw||!J@VKjx)r*sip`F)Oq8f}*7VjRy&7R*|Y$-rV
z++?Ir)s`FihzTqwipQIDqN|$w6^Yl-72#&J5~$lzhI@T6n|N$MOj<HDP%n3*Rv*5q
z@KTW`$vsQvaarjHwIX^}jwAyV1W{D=ma6z6RZn2lK1Wi5uKpf$g;~12cx2Gv)Zyyf
zR|lV$9_eQ=j+cd5uiBo`0mWJXR;2<?L^vC6aIlB`=BAWzpOW@y??s_yvR~FiG2CAV
zFPMM(afS|=e>+)@5rKg)$86BfhR@kjsD+M-hyqE%WR94EGZGP|l2lb485A5)&w-eP
zLl8|IOkSQj?iV-2u%yx^SN7fV99V$Pdy?9WkNh@8`|cqioTJklk|SrN(OpHlSgiZc
zsZXc*j{$ivju-uI=wX%>oBl@%=(Avb2b^Xg@0G8pr=Z*VPXKBt{Ys`KQ1b46r>ZnQ
z^OYhw&=cRteZ^J_0r&6E=p{ctC%H8eqx`rLMua3BlS*z*QVxoL-4w7Nt_h@m5ZYQ|
z?C{jP(qpOJzJ`rt$pG}gz}p9fb)Md-0(+dI=qnX)6QTf_;bd`%rWYTF*BJiH^AD2%
zd7)s+;UXAj-cv5*+n5<D{$X4Rcv0kKPBSm_q*p(8bENsFH*PJ0VA}cx)Hfy8TzL}>
zLUTjQqAK1wxmSSP{j=+LCuU_@#N+~Ta~jb3Zj7m7<fIYs8Y$p3wjpyJDRLpOb93Tw
z@jW4;1&la2$R>AfXbE@PwO4Sx=cQ?=>CC4}J9&+(sM&X<?d6$D(X8bi<D_fC)|spA
zH8s+gUaXUd<C{aW<?(?I;d|Uhc_Z>O6Kb^XEi;VirjIipto>~oOfkh{L5@fqk_Z5Y
zW3dqgvmX`==zy|7U2C?;hy(+@L+RfuBVXJhsls{SdjNPjJ<;meME2Z5{2>?xWbO2<
zg@53{Rd|bx<f<6|0E)9oo50!iA=Y=tUy)Q;28>H6Ru|-0Ad0HoGQ<ni+US_j1iBB9
z_C;`?u&NE6Hx|(99GP@dV|yw5O{F?-&)PH%@uts}9?;ek?d%i32GYQCmPUiKJ^{%1
zWY6xVR)*nKY3j<_ygyhi@sz!~XWgiI>6Q$L`CB_z-<)U2;kAkqf;!@fv^i7CTAqk^
zh9Vvy+r5F}XK1=ISQXtzPrWdcGAv4_Y3RrBsW7pAl=jrY(iDkD;Qd#2ZEvIcu{r}K
z*Wyx9>aQ~|9<rD%t7q?#?W^*RG`$#YGay)mcfhwl+#>3yUKn5GGcusj!z1fI;DJM-
z%E1YEUZ`SxKGbb^z{!4Ns=#qI<WHHOIe6phk#LHrDA1Q6Yv|a!UYyM5lz#>S9t)+y
zp#4Qvhm78SzQtY23#I|o+qBJ9vY?ldR|h3O9i|#dm|1$pdJx=`6-3F%nRm<pW(c5-
z4&XBK(O33q)?(be%@rQ*veI>sI+c|K_$tF}bq#n~LnBH8=*=Ajm+*(-PBwCephFI{
zn`K6~%HVeS?;TG963ovo`Br!U4Eb98>joqLXq6}_**Ewf&B8d4sC2#-4~`K}CKjUa
zk|1n3YrDFaQ1gphNMoG~7L(mNu~(|9TMJd-()6Z5T;rMIv=zdyS7FQUdMH001W>o@
zU)i_SQ8wb9Bx|dD>8orTAK<Z11kFU#p8PA(-*=l@0*p+h8{HRNNY*B6FS`}1Ns@!A
zf}A$2nJ3iBBdRzN?p+9aLE2pgN(|IE<;5{))=R^)RgV57TlSnQn<#V2SOA);(L4`Q
z;KAyaA;0m|sBIf`JG%&I-#z;Ykf&N`RyC^}6{_E9Xj0bap34jXsLB>hc~Rzqjm~>b
zxHqdH^;K|)R;-zyESTRyV}`{1##A;Su**CqHal3xS+Z-)>~vxplqG5^8cAwgF+(`G
zXygk5jf!iz&&8B(FswvL!G$(GE4ktqsZtp!Cr-e=FG~%F)%<NvPRWukmkO}YRvp>0
z@U+k$VQDnFE)jQDtgMi7f|N9x`>ZhZurX%2gl1}??QVE-Ve0_YchR%5uId6?<Cw@U
zhlL}a$7SbK^Y@&r-Ru;!CS<?v4Pwn#Q#A^=qTOICmD&6jAroz$t8lhE7vgT-+~Pl3
zmMSim$z+$Vl&;!<WXf^Tarfx#5W1b>d2)Y4%|vE@k<AA(yZs{~Azs8Lbrr7Qn#SOA
zmR#>Tz6w?}SbTV4hyiX$S!L;ouzaMNvt12jKwOF9S>0`cZ{NAH1#USiq{b9ges1Td
zzL9vjx`en^Z<({ROW_z$iJ_#)@t3>@%Bxx34j_~BZq%6{e~;i$>eS^VuQQtu@RFtn
z3J3y@z*M7ljOyA!F6PgaN!l6J+LoW_v<dooUn1_Y8S|FaSZ1E<#g9*BVxcqB>|~`+
zk;k4J1j}P!4eTG=QLGZquc|~iUg1SfE@HJ8W!{|+bayW*-k#_-YniXxQ^P3znJ=)R
z=xck}ESBnzV^Pk_I{sJn??)9b>AgksQMXI|GLv9OtigN1;G}^UtI1ZI2P?hD98VYA
z#uJ1ZxGm7YoD*EYxgU2VS1z)YZF$1(LX@-WYB5`JS-Rlb<-bDvgMN{v5Y2~0fM@U<
zh9|}WIG-_iQ)RRl<t29O92Ix^9k9>hBHGMiPU83fA_-a$4}>G-yPI{IZs}R-@zM_{
z*0W&3w)iwx?7^K`&)_Z2GmkuIJ7_yv=|j)DBxi&g2AAdk;ZaeLX8W2y3XhT?j^&s5
zuhg>rT83(<pgp>^tR+7eM3URLcW%h*hdq$DZMfQDmN07r>Rl4cRous0LK%<I3#*Ml
zm)<3jlN5Q^)%G)&RpIQ_9IizY8=A}3+0N_-nZ}K0I&mwMD%wv=FT2_g_lx_I%+|{V
zjWrI-*4y)FmTQM__B?M3xEiEunlZ%!*Uddg6uQ>7O0^E_@u$$ufvJch{(vd;BP<MI
zuM)t)Suh{Y>GYHU6r%VAoC%IunrAy<JQ}%N7kh>v6>TmLB1@}^IFxIQ&oMQi#|*<6
z5Ws1KA>IoAu?W#?^_hVUB{2r0&4ux!m~}RrQqQef!cHh&&wRi}s_9ALHx8_$9gyvM
z3w0hAeAC<eZIu^H6#{a>In*`|&~Zao6wOOU6qQ^>sj@3j5eu_!Tc-|{+N+M}2i9;J
zmtb)M4;Ja&X9=p~9=zekjw14q+@-Eb{3PJ5anCP=RrRnQzfn^I+(vn-i<J1&VFnZ5
zRnhd@D+V%*NE<q!;9ORmrrtbFH@|6vZ^ryD`4PlIoR?87OYup;Cj4{MWw?LAY%H-x
zSWdBwI;ryKO1IO0sg$9d-5PP`elyY3IVFMh8(EfR8prN|_Eq<GfuMdcy%Ft!%kpUy
zfj76z04Y}Lcx@LTA!)E-F%4MIn|@3eF`DkS1>7KMegGgdm+EFjxK|fgKu?^C32OaQ
zuDMbOY2__pMDf47jJgrZK>9vbR2r|0>uSKhPh3*Z6VUXBF||rr+ALhM>jyS6pSV?l
zR^h7}-tWpCk|ggdm*e<imaOjA+LyNcxAIPG3jolZq4cdi*#sCQZe5qj5(qEvwUbp?
zqEwlPxNm(HZDG1Wm@ZiFZJ2@s!qW{<r0TJ^qx|g5D+*!%e%ejjy0}6?vc#IZ-)u-v
z)>KUx5d7{*9&9qRj@K=B55gmirp+q(ry5+&k;_Zrv`8|j?5Y!l{4<71bo3@++0WQX
zNLoxODr)(^0JA_$zZ6QUMsbW&4-Kb0n{`gn)0EjqqQNPyKK<Qw{Ql26y~cUoIs;PE
z7z{Nb%XkxyI>#hLu+(geFUdJYGD?ILC{wYX2^PlJZdEd=FhwfX<H5p&#;Hgq6(*;0
z6k(Pm>F6BAZJteMu?Rm!#Zbs8bGg__&<&y+Zy`Fw_8V1zzZ-*21G)$_2mGCMSt10L
z193z6|G>)L8QvDQvsvC;uoqRt7VKHpq5}*1qNwR?Cm}f!<~%doLARmVj`5`NUW~S4
zjFI>zjCcn#ci0`BM{jnp{Zj6|v>Pw&uB&rKm#kuIjyVgdb0qD>Bqq$`C?CRZ%Syl7
zN+N;823u-$yRD?{mD4Fz^Bvofw^fIA|NCV6buv3<uhrV5ep<e(-rd)~oZxNTC;T?<
zlmEcCXQyyKeqN=HBK%d$WqOCd@O!CfJO8pv#Y(AIw(YX>ucBQm*DC+Aio585m*q1^
zUu*4h!j1by{xW=MmoJZxY;N{p#lo$I3;X280QR_}A?!q~_Z?8Qs*y=D>UV~GM<QpU
z)R{Xd+j%Z5XXB?(+5N_idjaxPF8tU|DqQ5k<S4I(*2rz+S%OvQg5(8N^u4EW^HuD9
z7q!G83gbchLkBJLfl?apadmDtS{LxVbv-yM#Fl|)85ZsVK|GB>=_tGz#-yhGNP952
zh8jM0hLGB>7K&Vpf!r6@-7`>Gx+Au?!_<QYY%LZ2?DqntckxsIP0_L~$0}JB>y)RH
zpO%eEx#FeBspY3;rI0)(Z3|@b8!Qio^Vm_87))6*D!tLYxoEsWlmlWMa^xb)KNSl#
z!u8}L6|qca6*SvJP5E}AM9#v|@+r@!^LFoV7*31W7*_$%6KE(wFGFr`U3W*lJ`DY*
z^QLLa3*#~`%*(tnO*tGGr!=m8n4)&qxOU@|H>N2;gKZh79GQ+LYQ<(_WSp`+aE!||
zOZl(Yzd!IVmHzi-b=63#|CP$+vRw-6e|EL@s{ifc@94j+|NXp}PQJjK^$B*~Lx=5(
z-}!&m*>#IfK^kQCmYz`3>)FF}HnF}vKR#IZ^E<L)jM02&jlYkl541BMKU=|_@VDpZ
zr?)33KYsiu%qGtd)6a$Z;_J!7_-TCi?~}<tPY?6O<QR)Qu^2sPlP?5so6p{c$dh*Q
zH-qc*L8Iq2gz`cQpJ0e@UKUUD<s_UN^Zz#``aW5F2D%^lce-m{+%k`mk)OX!q88fj
z<oo0Sw4EkQmx9O%2}Q<7;4j3;+%KlzCo}8wPit%~zs(oV);)$2{W5)+6ez=&=>k&P
zb!#!Xn=kH{SkU|nRf>tM@nQmx#`o6C5@w|JFkL<$-%la&^W@%|&YoVL^OP9qfMRNL
zF`=wf<b3f6W0JKR)x6bptX#orz<AAOb2OpcTa$mrk55o5P%tJ+u^gh~Zal;G%s)Sm
zVbt%jMfa13$+K57wWDO+eH$;vcVsL3vY0>OGb&^L@G$>@iAxm=`DXbRe-_)2`O7o6
z5X5amAVa#H2-eU7o^XZ*RR0Y&BamYg`w=K+6I+smVxqee<ib=N6Ubn=H&J%JdV2y%
z<98^Kz;u)1m8ILK7?%~?fMCOocPo%M&4vY%ue4_|u7<WCx>^!;E|AT9^FoQ6DH30#
zDfuE*$u}ty8}DT(aT+p)>Bw-?kdY?wO^U?EyBnT(lw!CIcP?xM(<C<C>F|<G6O&AR
zS-`Ti0Nnx>vj<u$-@Q}m26)UhLDfNgL#yz7_7#Ek)@1?y<*l<9tF@T_M|`~)KR%8Z
z54h6dQT=#!4?oS8pcM@;LBTHw7G79ypC_|>SWd0w<QZ4p_n;yyV5zm5V>}V<!b*vU
zL=RslpP@?k<ug|>Jf(x%tS5KhW>Y+%v3iroIh1fe2GKEJ7Ob*uWfzn0(<N$Vr5sUI
zxATy244nYl!1QkE7lcpo)CTkl=pgrQI+@)~pwUxS0Ay<{zRqW$_gQ^X_Mpn{_%qb_
zJYW3G`vslWibczXb^V*w8ZVbt1Jt0GNnT{nTb&2!r1^AdRg2})X&#o)V3V#Ffv^3>
zx!lab<OzCeX$=RZlX{KF9uCUz5h(0~D!QOl&~Nj_^!eMP)WY+{{N)K&$LIS(o)_B!
z&2@o&cm3td^bTmVoJtjkVD|ZC@zb&f^YMM&y8Z{Wr+bOOkB(KtgOSlyo_Cz8W?o@u
zc8B%Zb`jKzlD&NP3v!<qlL@GCFuc>}30`@UM>hL0LnR7we@BX>?yA_8W80~g;MbAc
z;Jrqg%dzzVDg~CZV65%papUDH=%L5<X}u;GZ!kp-O)zENa)I#aI53z?k@j-3m^}Uz
z9G<7n5`D$GN*Xe#?)11=d<)(AZ9Mxr0qvwQ9J%i@{q~0pUMZ3p^{@KZANm<<IR~{6
z)KJtLajOaK^m~K<aeZ^~G6P-u;dzQ$DJbcarS${475K_mKVc6$S)lqFDFl)MBa;HV
zw6e>W<#<NA+5f_%^VNRb88m=V=Zj@vm%uj>V|rwbX4CHloT9@C@b7XtnOMrlt?|PY
z7-a^X+MF!@4MhIbB%H%$XM8^gF*E~W0*SDvkwe8s22}Bh)p%M=Vb4*t9sVmco_OnS
zj!OGKcJ=>o_x^u;_a52Lt^W5$z0Jb%f#(L3FTf}$C*XINT`9w1AXXMy#w1Unj4H|T
zu{sklHf)gQU!H&9t_634<FB-v`aZp9yNzi7=aD9_z_<a(>w6vNt?2CnD-30GpsP34
zi>V<6R=?BNLJD9P2am0HpkwD^N^nO_H?|B+fY6wlS4{M(5JK{U2<*4xio!&AhZDOX
zD$m|5f!zHs#aSy9NEz%v29FR0N^KD5cz{F<I2F9Cn4al1*a<hh$9522y!OtjRzzaF
zLNT`B4lBCfIb-ivP4?dCviD{?smczov?fY{&!?g!`d#0rtQww~s>07!F1@dTWwmeI
zJZ>2Cd~>L8bO(CJg}n-0>(i&kzNh+!)PznC-})_TLmcr4MzH$BK|>2mf$i?F(al?3
zRPZ1%Zl?33=Mg@Zo<aUrJEoeU3q3v%?0x}b(1IV1Y##&A=(~}}-gI!$hA@mgFFF2m
z>^QVV3|6?p$ZBLccM==qKx^W^9PuCYErV{)3KqbOZZIS60keWF;>!7$^(y};%HZrD
zo9$ybt##{_q|bKiMzak~^4&)Aeac!6Z>^rM&qdiW3=KS+6|;}szvN9!_<GgMis_is
zvnuhZkg>T0hI0Lct9kOkPm(W>ikwevEhd}g%~P6{f!)@p*2R^br#FYamDpREy;ay-
zHD*llIZWkGy}w%*E8h;quZO}p6u2WQ6=n{8dVh`L2!v8ArdLToUSGuwFZ5DxxB@KB
zIzbXzCo&;eRh}x1$woD{PQn$ZxW|g~<Vl8Ei!ymD!%RW+>B9T%g8il+&kQr&P-3#n
zuuoo5!<)nc$d1CZd7o~?Co*ru7eGc0PWZs$pKiQQXlqDn0wy^z@>$@1C}??~fs#im
z=F4pQY|UOCKZDANhYE<h8N=RE+M!Uo`4ep3=yX8xPiUNx<jGs_dqY^ukE|SDS@a1c
zFzWS9yPxF?D;b*K@r5bRw`lN#zc&*?idYS!8DK$%ZJLdrmfz;jD>n*P&A2xCS^=%^
zNF97AoVN=_509DU&vN9a?JJtB6X2QO34D>(_knO&_S;S;9S`HLZ2!v6VV2fn@?#3S
zXxNJKlN+xmSflk#LfyrC`~eZ__*Q82&kEFx<hqy7^QR}O8qaZlT3^P~hnGccrA$fE
z7maPX1D1xZ_O}Tc$|j2ijPK;1DO8C2GuX|4=3DpsxSF_{6{_j^+hPK}^Kfr{A3wZI
zM5nQRBibS2N@QwifFY5^?!Ha#{!PU^iJcWIed(RyVfjmce*Vp#LgoE9o<lW2VlMiQ
zr#yDydQf2B&W8n|CIp#|0Jh%Lqaq12n#=SvLb3N#bLeoXsJ^E(N?VreC>>Hyn0EtF
zBxydDT?{;uJx8Vd*rFn~$o~afrD&73Sykw)KzkHT3be|WT@yk>auqiuS50|9uA2UE
zz-J_zt7WExag`A}tF;CLHNwD9Ts7r^@I6rc2JklUoHPi={q67i4f@)N>1#Ub!Z}1o
zU2#*P-wMoxeoLAN@P+0<KULQUj@f8V^w*vfsHX>~M872uM5!o02~7x|8Jz}bKA@ww
z_bZzfVyj^|PGu_>urU~nRw-t**hyi`D8U9$9wqqdj}m^tQR3S}(@~;2Gs1we*67go
zG%z~wi?`wNn@LBNF|BNR+iz+meDzz2U$B*YlWW>atTGd_&aBo*+G7VA3BO<?y;Tbj
zFU0x;t@YNdrLQ5W2`yQ<*yoPPvY^gtCkZiiwqo+6Q)aB6#_>a3#(G>I`%FP@IZOjF
zV1gqnm%t?TqNTh-Jhm`(pPhP&Ol%PmwVRk_DQ(43Y@_0as@nAKo2R<*j6#2dm!As7
zO&K3UHW>RB43vHWSEP{VPo)^03(#e->&L!&XJO|ttC=v|1%kP$>&8)JnNS`J8%f7E
z5dW7vMQcTM7$NmwRs}t8g3*d<#bA>I#IlMiMb>lmNL4IW2eZrj!^e+P(U@vwQ4@U}
z-+jZwkta~$=~z{Y>4AyfSt6nc8MEVg31$PN_AB!g;l<=(I{rL;m_GkRwjp~8hk{N1
z8Q(D^nqP6B$EUMOPTmUH&nz1hh)X_3fk=3^q+bRWxJH()c&9#u!mvA!;p<<Rl1(Jc
zs*D>4iCMl&|F>K58#n*kPNegZnv#mlTQBqFQP#>bDy-ZQ{C461Re|4*@bfW#hNmU0
zDY_cp-debQSyDH%zJwjk%foZQx}MQCiDBPZm){OqYMz-54W_1ZyO(EnyhUjXZzS_G
zZb|zY*=Lv1WZ6?@WTzvUyw%8ad+|rQ!J8e{+|uV;;FWWgY2y~2`{sRaH)p(6ASXv}
z{0Tzw4h@4@%7sPtqoNj*;ra{Z75eQ0JBKk@ee0mg@-{ImzHizJh7k*hU9XRZ>HFwd
z?xf@2vzI)-Y1~#%K;lRX2bw>hUly~#65!xe#v>f1EKXW7y#@;DNhRfY(_{>=4`RVn
z-`V_GX-<gii4vFHu2;<)eH=Sx0%i7|Ko*%M)=9pVqY#Q>nYGl4L?9sH#Z@~i4#7f|
zF`lt<$_RTgy&5(`Nz^joL9mT5xo9lL6VSk~*H^fJGxz1lX__j;legYwExTOI^Os8S
zrOIEFwo~t^{8IB?>bh)oe0s`WVE2Sw7;Qh9rC}`rRz)p{Y}4@7!4}x%Vt8omY;;U$
zVlzc7FP%Il9v`w3M<EQ@;U+)gylUdnkU0Lt7i}r*Y?L36;>jpG9+gg|NoY0jzpF0>
zB^v&BNtlu3ak)TKTL_9pMQ2{H&cwr-3)Yck-mhr9hp!dt*Gi;co!5Si>sLJRfZoGc
zh(p$|wLAln!pC|(kb{mn;OEmk1A_wp7x^IIVF!G*X%MQBK`8Bb5WH^H4}zw<BYg>N
zP0^ir)C2v1M~ir*10PG&CACPGly}o59y_kpDN0_`7PCUXU9<-%z`ZGr-`I?<M|!UE
z+H=94b8zYMyAHqBsP9fAeOLV>`VN&w=q*V*A}Z<FiXAS6Ht2pIJ1T>xV;gSTeb^N*
zuFSULj$>GWtt+5pSU^oQKN|T&I(zgR?^y)ki4On~Kgv2GY4A|A6gDMmWh@@<$Ys%B
zSa>+{$HRCuplQ1lN7!@I!GJa0wi`Ok&R~?J1|B-SuNSBjrT8jTOS=oX|8B!py>)@n
zm3&YM6gX^LQSf3|RG|p_CU2dc7HnQjO)5s^tCAs}%W%RNA+Wu^9$4ptb_0QLVPYu-
zWa|h@t5Ox7Yf?X0Vo@$JQbI*l!d0k*3{PB^5;vOeb$2vu`?=`I0_vh5G*E!5;y`;m
zB1%%ZZ0N382<WJHVf*iP7rhI+4H!!6<RrraU50z=y4$_}(CME`C_>)xA37kBnr(7S
zdn+2|NCa5SO(GmH$2#p)d1^b7`YMvT=A|x1QU}g<f~o7Lk<@g?5DneKTUvvJS!CaG
z0rdS$-v5}RlHW3$AQ^|lC8J&6+Ca%NqeEvy!w57(Xad$;U;>!eEG#I7QG>bJ7oh1-
zwJ@S00vcn5MiaW~y5Bw%zw<$>%ghg3a)H4nFGieORh(StgNROp=Y6R53|U4hKg>1p
z^~qC(xe7ufm?i5N_m;<9ln-Z6A&_!5YGGM}HuN&`VH?>mTp!GgGZ3fO{qyc^fzigw
z`_OP{>BPsK;e~I~fo9pELWq-Ca4AU6jV+L{i$g^ZaeGcby|<kLY-BQ2mtRVTg^=sP
z;K0N<8&N3P_l;3^NaKVPtN}9r0_IVtg(E7aCJgYPkO`2JPceYLI1x^_Z<rB7{A_;|
z!-yS%Ly0BAF!GfCw1rBM@3>>8D-zgp0znm9huH`oC(&385Bf3rhcYpO6gArtQNpy;
zMr_X)?hYWCuvIYv<vn<!PQ-K{ttw);Lu5NaZ7HTZM6M_4H;U>VB6Sk#kW21w=~*cX
z!sX<j>BAI3V8SX#NR_brdC1e8dppGW`B*9Zd@N%>pLpK|t;@iLe;&Odp8+h9=csJU
zX<)Sox)N8%VM<%V8$b_`XUw<}m^|A?Ff>^_{&<y0wFX#q50d^q9=NVUst*Z+ZU6?4
z;?xyz^cXC;k9gl09DOqg^iEF(ptmIWxq(-QYRFL6!f|n_m*Ha*KhpBsjbU#*0)ZDF
z9-;9AfdV^C3>XRjuSW2KQd*8Fi6WCiOX|CluIhqRm;of~RTuxm4_MyS2tIUCJa^9;
zF=fD0ObA7EF^VPpgHn3@Sg0{L$4@xn&T!Jj%{E+YT>VVLRkaO;ydQNjCB1?Wz?sdC
zYA8H?yt4`K{;1PQ$&cWAk>pCEpsj-xfR0fz^(s)i4Uk^|v`LTrmlLhwKfuiR+_?X#
z*Z4(U0X7cxUkSnwEOV0S4_g`Y%k_|l#bH_2D!sKz3=NlQi(^*C?aDOG46lcx_Dtxk
zn*+o#qKsHi3X;Wy>I<E!(cHvSND<GUU&NIab?efX!bhO|c}qGCqlp<l3S&|Y;n$=o
z90idNoxT!5i@$QHRbRF!65yqO;vl3Tba5cTCHMiW6FFn-^yr$rsPU{PKoosY<xyM}
zLl#Uwx1-68$c{cxM8b|OJV%it)ZNQlh3AI>=&Gf@HH;G4sxwUaCxRvyaeWX#vv4^{
z1i>F=Cde9m%Bd1`MbANkV;_5BB{f16jva1m3syZ;oYztqiDz7n@Tl9i9_I+f`Zk_f
zkJH)o@#T>N;iq3<D=>d7tk4r!_B*8NFQoe~1zGAwvV1*FBNTK%21HuQy32n>3&dCA
z{!lOcd-}w1_}ErF`k&0^FJHgun~l|-$5hA6xWGx_%%C1HI*y{8%tTxtZ*n4#Z;w7j
zqk^CFzycNnZNaMZkJTuI=8aguyFNHc0S>^0EGs0YLlP4={2BhzBb~&9s5IiV8tP-N
z5sf@WgVT_UQU^2b6ee1W&9l-QRm5v%cj+K<rqg<hiF2BCkc^U(ydP$;`~2^%OtgC6
zGTzs6FA6Cd67{owSH+`xrWVEZVv18$6<RhzomhuX{HOwshtyVn1#K2n=hUgX;?h}`
zK6ljb>SEFdc<}q=?s>kjJ{FWT%A6=|yJPq<bKcI#Y=}+k3KUtK%|q6$L?b}9Vs<8G
z!(6HEEZi4%aemuztiipJyRf3>G?Ogge{6&3kvo^?<)FUU5@I-jF+mNO;5LZ~EcwFW
zEM~}Q6LxPUR)B;@y{@i$A1&MPvC?ni28V3w?>NZ1p%M<E!YfdC)5lZCrDhg1ES^EJ
zo04Hr_org!+d@25N%A!vd*cp}fRV|p9l!ueBSd`OV|<>XlteilmQv(>+2TrMe80Nm
z?-OhCeX{t8pkB}?K2N@&jsZKI2Ogj?p#&crXgD0MSO^jl_guXo_S@>}g}<b7xuoJG
zB_n6ft5HI-29+jz)5J^*ALI;b#hb*TZny$Hj<&E4oBeiC&$Ftlv(@Gdrqd)-EZukX
zF`3|x9>E_!dIURb6HJyKej$63D52qyynbTZ<~wk8Z<E`0>$MH7bG&V(O|<Qnx9v7@
zezx#hc5)0(G0k+l<GAz@{6Wm005~w8Xvtj-ZDQ=4#;6=_zTzq@GcA&a%FKOaU^FU7
z@;^*yCizO6wUUQgl}zP{iwD}CvdU&P8fhfH(9}PE4D<!AC+SLI>rMjs2{w}La!qAD
zt`U0~iF-^r#_QFC;htuMdxFhNH}=BH9p<8RrJxF9#zjHr0c}&s%Lw-Hgqc4kmzN*o
zrS&}i_he>GXLs`jh9`LVX(r~#ap3yOlxLG%BV+zNw0jf?uhv+ZeW6s`OuxC4iS>iR
zH=q^wjshiO81LouUS1rQ2HnQ(`n0lnJMh36LPA!yo9a=%o>`U>WnoqIH@$qZD#}c6
z>QWsh>Y1Ol!W{O{0SC{0Xy!B;CS1V_sIme;&BDr)^(;SU&955Iws5X?Kc=X;PWee+
zqWkKS6(&FLDdlal9y8FO#Kt?t*hu_V^jk`3hg;ObOj@e9)g;w4Ksc?mm$Op}KW-4p
z3xZe3Ep7jd+Nw04>YHkd;lCBbf4imCUY=xEjAascRN)y0J1ojKN;$Dsn$)#*Cv|=0
zNqxHJq~;vF+N|b0v8!qA>}*;)f6lZnrJvSjYDDJ?Qy@^~l9Vl>P~@!zMVLq^1p#S`
zNze~etGF=0rIFec3@3nRv&mh?%O~$xbR*zh*fE-JLWA@(rQFO!JnB0<#KF2o0@>@=
zbXIO%MKz>Zk|as|Xa(aAkSly*MV8WIJGkFYIjKDd@4$9@h26I2oEGr^PFGHxvv}Mv
z2`B;`TeELeeaK~5Cm6oYF&e@60mQ*)3XZn?F<A(SfO9^%6d@FhW}e31XFXQMMJ18U
z#`pOYCD_ws%&sA#gJld4Z}kcflS0v&FOo~?Q&RJJW~Z5Fm12VYIN~u&l9JSl@z3kU
zn1`oD%@f;m;tKv8ip+5o4!98tWE7t9$QF!o5h}m{v1W0YPPu140`uOe6bx<nCNEnE
zq8M1S`4DoaFWb=R>r}*wNq{YB<A3?4F|DyQ=rN==YbEIX+Awjt>bY4eP_@Q>8z95y
zZ#(Ax@314A=c|H_Y(Ty6t_Zucp)*d065{F*ab(M76z*$kp9df5&Hd8IBTV>Nv@Rlk
zD*f{|c`IEH2JO}m^xvSDM8MU}YYxNL4$WE=sj}OJ76rUXnxi4}h5;V#^hQ0&xsUtr
zg*{`AEqCPJ0L5W?tBNl2=*COKfavQ!TZLH{d9GrWfOiT}|JKW!&19I1j)foKV;PjL
z5_wM%tR)+2roy)h*LLg{$^|R77@B<07fV$Pg0a2yIjazL+cP2A*(1r~&m+B*$jiP{
z4)?}{ar?W`?VPdEI|?(F&j?{>9rVL7@YQktR9_ty34?{T_5xtMy6kv=uP!^UE;|y=
z;nig)z$w3l%g*}VI`B_+(FuCu#LSVPU(Qu}x0{<r>k5C9+;8-bH=~;_h&z0Zy4`q}
zg8mt9H!SFrxA@&`-uvdybG#XKcj$K`R!U@lWty7}`{oo=`sPG@c{r__^ru^J<*`j&
zd5(f{4Yg-%QJ<K|aY)!3CVYnA`N1gNV3JPwt~~#jaMVD>vCNgypGD!XvbwWM6206_
zCE}o>BA2D@Cpi)&I;H46Qc}31B>JML9Z~dr&S@hr70?Tgn0FQnJL@F(jmEo0(LVui
zmyl6D=o;dLYr&N<;uf-VFB!H<lf|XKf}0HeGjWnh_U|xo?udGJC=X9kcy%PXbR_w6
zn7eaCd^t>AIq2Lu(TxN3`a~BFjsJ#;`$oL?j*;Vrnb$_dWuqVCFk<Sik;YdeoufuN
zFO5}PG>rW-l6jQ}tu{Zi3p-{6nAWrqGp7t=kBl|lF^oatv~vNP@I99BME8m-@|lSD
zB{K4tNa-w*%2UGR(A~^OBAtUoD(?srB2E1w%$y?9c|@dghcIl6p(8|kFNjna(o{%x
zHH^KK4#v(j6TR1i2_`)aAl)2|&IIiRIWmN)lo91A5Dg&@1_L<xDj3BL3}Si3LA+YS
zdu87P-3$!$Rb;GFjI+X9CCymJ?RGXl)|K@p>><t78{J&>1|-EGrW!f}%;Bc|J<>XR
zXfL^Xvq20U-?}@+b$e+;-W>tg4&(^5-^o_$xLl5@i$=P&Rrdc#f#VL~%~ON8gMJ;x
z2=1UaM@sm10Ct`Rx*dR<o5Qx#Vz#5$^b~k)Gq85JiYQPUBveYYwIDJL;@Svq9l_e0
zVXarvP2@>%#e5*r@PGPwQ(0@?R9BlfwRPrAebsq$x|?~kqW@B=i4*6!lz!%Hf9lvP
zPMtKW-T|y1t1)zSRfewZhN0`*=jZybv=E%8WM%`k&d}Q?30rUEYGdqYliJB<We=Mb
z-D_6!tueAVrS_`X;&#A<&J><B>D_2#9~ySAe9(i&bBbBj){_r-#H5Uyq`o^bMInZc
zGpl>eY`|s4%wHyzvy2%NCcDYR_{cEF7qiJ=8&8HxvM)<D!Iz~L=gU&p`Lg(4E-Dw7
zh=0rP;>%JhM%{DbieJl@C14+qF@Fd6dozA4Wp&k8t9#L?)mIZQ8l@RAq0N%hOzG8W
z=GAFNIFcASk2G##98MHYCc(DmB5Uez<ubGBzBZv)N+0G<71aUCo!vV7m|V<L+KIDI
z$bCm!O8ixYALV-peYYKU(JH@Wm0z&RFI1(=RQ-EY|2SVBx&VDA4m~kBfCS252denx
zJb=_*Kfjc7PYQn?--SovsT1SXGnDd0+<B}|AwQY}bG6x+PfhC#UxxTH(z(@qL={%w
zlR7V&9XraLbs8XY@zu=-udB%$JJC>QT#RDxw;BEco;H$GjkI1hU3uBU+rITo;jMMB
zy1n}4r1i@gF~=O@#v#8QhrXP_gZ?Hwb_R#(u~SdyuydNuU#GSye;tmbc(z`itp7|F
za{)p|WsGJ`LGtMFE#SU1#u*|ojzWb98hrfxuO30K9zm}jLBV4s@(3Cwx&)2TCusDR
z(&`;*T0^62XV5jFHGE6yb8#*ltuqZ_zM{fPJi`*t-cpX7(d8;Ju1m6F80+Sc=*Gk}
zDdESId=B6_I;HYv^17^Qa%YWYdTotm?BAs5-o(3WETijdtWA16d76;k@5wxvc{irp
z%siy{?#aYYN?>(Y_Led{#x(PJF>rV>1pOPiv>0AMLi-g+h41<X#{0%3IK~7WN#fmI
zl)f%bag{@G1231LS4b>oMd{yCT8XfLXN#U+QhI{m?BUY1{1)Lnq#pPF&LmH!U&4cg
z@2I`C!8=IXBOV?1m{|>dYj_p>J{}|CPPsaeack<w?JDgJCV`75ejf*raKG$($Zq1N
zT6#Y%BZrT~k&KF5_f_O>Rph#_BKMaRx$djT{nbRSdr%@b35C27|GQMqcPn|`nd5Ae
z@r{XnvI%dy6<qBCes-o8F9rM`kkc>^?N5`kl#TrP%-7sCJd0LwD@x_nu(|`1fj>jI
zaHTT?R+z@20cn$rFKY<_Yb7VD)a!TfDxYL8wx~B{T861@n_=qPWSIIdz%Zw)FpR0z
zu#Uu7VKA%P9L(x3Fqmmvj}sTkP53S)J1&`dEv0i=TFqZ+6=$UtJe86Sq3L{-wxT#C
z(q=X_ldP}Xb!p<-IXf}C?zh)-X=0$rPXdMYF|A7z&iI|UG#MFT5_^9B`1JBTvBpnN
zi}^p(M*^SX8UB1R{W?X|5<8Sj9uXdKj!)fW&(Tlvi2|Nb;deTdI!e(|#<Ded8s^uf
zC?KKYcb0zyQd#@{FGEKDv??Qi+%qG;ZvdMX)?N2j^xZRY+%s{_Gxy6gki7{Wd5P|L
zCbHi)ll`x5c5&H$H8(qz!8+pZwrK~us5LWCW!U(-G1s~nYvHRS-K!&A-YR9$vrG2z
zA|C+ewT0et;ii7ncxCC=HbWmd9W4Q8xlos>UCeT(6qVNNov`a$YC;0U?AV*FWZ=zK
z-ljL3bYs<K{Mo|SQR&MGyS$yZ2QB8-2A@W+Zf&n_ZEId^6>x6*<^0-Ky|ijoC%33$
z+pWocQ=7V{YWse4>)unf(aWiNx=X2~yQgCAr-F2s+6$@hKGdEodDX?-P=!<DEZ*G?
zbka-VW+%JXCHS@}dh-JvP5wT<Z@6OR*>bhf>!r@H%g@Q34PH9^U*4xJ!SxKA>${9q
zeA{3voy(Z->$Q>ll^j!VD*tzM=RmwV!<WnUM)J+-sk}XKhuP_2d+n&nrYG!3h~meY
zS`3+|{M?Qvcc;%>Z)lz~>3OsvZz$$@)H`5zrI2w+=T;Q(Au3!C3O$WC5xKI(xS%9?
zo<tl@dI>iW#rT=Tx|bw)mGmU9l7PoZoXrq4!k*D3IygGLc>fS!>0Z2Bh}s*Zm*@?m
zJ22wDV7EzkA|+j!qr5N2>`tWHggcQmJ+ZlaW@4|GS^rL?TN7U&RQ=>}z1`HENOhCD
ziBuO8?cJHqAI@IjO0eX^3r_>&6H_yw8mbN4p*G|T)pH{Yg-=C{&r;$&J;pcm7~ahj
ze-xJFsbqQ+kKr9W@x&zGyJLEnj^QmjiR2{SoRh{=$^_eMfW~H1N?StU>I}96(~d4u
zM7nA`$iNKgYl87*A_KP>Ss@u1x&Eytw_ZR~Q(!h~UYaA$Sme<bnl;){vtnUI>w}Ta
zk;vKuvNvl&X}G#uF0Rr8hR|bu`(@a1k6>QC*pvphdZY}o9*wM?jJ69P0Sy3ZQAA&I
zi1IK9E-8$+A;d6>6g2=b#?S5qcqpHzZVCsG<QbU^0+nI58tzSj2~5KN836xMp#D~X
z{H4bGr3L#XEa53&ej64{#?ZbbuM!`bXWh*oULI%G=b!vjk~axXeca;|lTm~L1X=Hm
zDSRvujFpJQN&*M-C>iMq!wp1-MQBmOZA=M_2S~BNCzl4$J3b<}h9lzHmr;w&LJjL;
ztmV>hTXpGo&g$SWUxiFU@rv4+F*qxJ$QfKn>j%&8kzIgmSD(Z&*a!U86d1>_eOWB$
z68EZh+R*v9-C{V#OO9itg@GMsn-&tm^_js=e=#Utsi;7*nasObpk540uSY07YraUt
z>m`8oWI0}sL*kw4ka!$&r$XIDesg{wBwnSc1Al4pWtS{Rc{S{w30$rhy;aiR8dK$!
zSFGCaj#bNW%jQMfZCi~&o0<vJXVitJK_X~!chu`s=ax*-WznGPkuc<;Xzzir<x&uJ
zIkdS7;uY?3E$VD1K(Dd^(5v!?0KG0?MiS2J;%4v)^LmAOy~4a+VO|FT^Gbp60?y%?
z;N4Bed$lyt(l6m3Jip{<E`3kRFt1Bx)btn#S3;z8^i#NL5A*@kO@pGpV#087%hSrX
zSwtAHBI3fJ`#uKnVSie<a~AD<{h;RU-3KhMrHepb-BbkfH$m`{S$yy{v0;(W*72w?
z4J6DI4;BvwQv|M#jTey^vN~crRHDkFmTeJ?S_K0115sn3XN>b4vTTurmZJJ(Mwuu&
zt-fJ!T7N&_wEl|Vw7xMot)B=^>#qV%>#qY&t0lsh1_Hc<%R$zMFe#0oURqi-vcA4T
zin=OC1O&;lAh!uoa<mRiDh<p{8Eu^Sqn`#L<-w!+so_yc*r-haqY}bHYk^S%28$Z#
z4#dqsQ6`9}7_)OM7Rm?+WrBi=As{h2voYGpB;MHSa3u9LquN3S0lRA~7%2gclZeG(
zGY(YNtbYmUAcDo=lSnBuQiNaH6rL+p2X4LZZj$;=-EDe2&YEx}Lo8Ctc!(y4{c+(W
zV%oc2+{0864-L^r0qD_yV~@15HZ}4n73|0a50pNno&NM&8CXoY821Gr$lwS!R)Xlx
z%soyv5H04B`C>S6Q$V656j1<`8v`Lqiyuk{9*Rc}mCRRRQ_RrDWXM`Lq18b`X%Ip|
zl-Qee*r4)?=l@1nC1YaxK|D}p1)^fmFqe(stzl6JQzAqtf{^({sGkIM9ot7IqKPgq
z_4IM|7gJoWjMKV|&n_zPfJdw1ts;;hv>r>Cpe$07V?Tc|gX7qF4-|xU>lIKzxZk++
zFhDB??MegyrHCSI&KQDsFVR+&5d75uWELnZcAPDvn-1QX)pfZlJFSAK+6bJ=5K9$+
zDOJ<JU-b|wHGT>-^jORkGc%znC5B@4rFzlzuBMG&RxifDtReub-z|Qbof<W@w}@xb
z_AtD&Un$U7Ik}G47-O%vt&QNe0>QUL=q(AlMG&oE6fO?Yx-pDTW|;>BTb-C#yjUDX
zJB|ugF#wBI{~WNG<DtczSGd?KT<jGt_6Nen@|N>w1O{tuA~2W?DN`ZGFi2QGE-cvZ
z04sBTGgz6!aAnT6LXEvb$$l9q*=af`S$zvovhp7cB`Y5Slnl4>+r*Ml_`6py*)Iwv
zqi}<-q0WBSP-leXT_M()GFb%o=fVE8s6T7B@zE6UzvLq#wrz+-045i5I1$tmu!7cW
z9JSXtYU><>8eXNm5s0IeF7BGO`QT%?pOS!k`uJmp=h^wBuxjRaRIW7;<@E<*cVlp~
z*)jN4!@oHj3;$N#X85<IS^PJHYo}kM!2MED;PmiMv*<k@Q7guSGsl5iG122zm>92-
z<$i(4a@F5CvRvOFfLopm-dzWmOG#UQH}U0CMvdcscWGMWh@f!1w`*JM?fR;;g?hWb
zrQVK%@mj*;)g;)T2j->K{YZxNvR361aytPiFtdE*LgggJ&r68|O~eRWi7;V9lxSih
zmN8z~G6_?tL=8I}_xp+Ae(eMsYSb1(aR*h{Vbfw5b|eNqY)p@F2tjiYaccgtQb38#
z_{2&HC^q7SbSUP_KkM@3N--SN&``Fn8nH1cPNNa**h&+Z*AL7XYL^tAQT4=8$8#(x
zkjdrW2bo+e>H;JtA(N|nL?+8dpzt-!x7&g$>nu+)@6}%M%CC6k&ES=Tu_gme<Uhy7
zn$!T9)lnwb!!oZ&s0QS$Av8$sj&aUa1DrFODqP?l5FZ1O9~}P^1PSBDoUFGNT5p%u
zm&N>%ehdMCaZ&kyFH=}jtg*Fxn%qst57zg|VmY19kS4F3<8PoZ(BYiG7k_LIn%xGK
z;m3WW_hx(8AlUOqQZjC><+u6E!#!b4W|PT1;$1#Zta}3f-2dtTW<5Mtg$!c@hXWRa
zmZf3cHTWzw(i4X{2Jg&_(&=%EEU>!GA-XmP*xE`|qA6GqX3a;6LU-Qm{Z03i@;CXm
z(hGd1R}ep4uuMFhoj@P5-);sz6mghP&amlXo*~ORykXx4?AyTodmp!svJ}@>)^uqt
zC(k%T3-24C*%Z*CN5`sB;lLyShDwJttc!U;(=Rade?EUhc>yK<Sj=Z%1@%ygXUIJA
zcu}sH?Liakos93kS<lnQiKz4$3qcG48lqA1Oi2*VNQzAE`HtrptMLf-z^q8aJJINX
z0t?JngsndOFTwvZ{I9_OD*Ug(|9UYz)~E0*>=5#}6Ud{|pGOrxk2-xGwctD|w&6^u
z*5yb#aPR;PrDRPJH$?URhNb8vLjazlr!h7do{L_Y_`S_bQ6wodYjD<EH6&)qd-6cS
zi?jM2RsCrHIJS2~KuJeHk<qYq0;)O!oZWOlJ*EQ`^%pl?zip=@K<<FzIt1|jW4*{B
zESf3N3{8psOpIoDVifl>J(}U^u_H3dF=P~XCoC#DG8~zbT~FC?Z|tZuyqifhWb8_Q
zlvW{QJ5r>ilOh*^g>}cmgI7Mv?1()py4sx`R9@3nz7t+7rl@^8ELhgEU~|8^9j|}1
zH{DJmU_ZKDkbvDU6v6uUpxY%9u$?JU(N2jQwN&20WVq3Y<Q;I~jan#eWBgA|Yu?Vl
zS9JrA<`6#cq$vn>!DqzH4>_+BTkluA@j6S|I{TvHjn`R@uTyMVHd|>mxl(7-b=H&X
zlsk8Gj9k+)a#PL7+veq_nwPiD&P_EtZ=0i=8jjw0y_Mv8i`$q?HKTerUhSz-wcD9e
zb?ub8(XI_!noKv^g<(t6=tjFLY-s}B=$C{ovG9#nHgA-RPW2jwy9W)!-GheV?m@$F
z_n~39d(bf49cdWsq9V11;qFJnaQCBOxckvC+&yU+?w&LZcRw12yE_fTw)Gl@yK4=@
z-L;0{?pniecdlW$yU;Lf{lR2=(8`8Psmzjc>rJF>oX}&fLHp@8YOSbi4K%g5=}Kz`
zm9mx=H&^MYqEcsDgBoD1QVeU2_QG1Dy|C73AFMUn2WyRX#ajAPC@fGc2ZI0^jV&F1
z4kSbSc519>Y7929xGiq-D|9y9!oZ-$oTKOmbsJ|VF(bN#;k?lg=f=9W8Z%#<$Pv}{
zWihWdbUw9xX)M*dOL?rTcFPi3s@B=QOs>UD-vl1p$|O!=u&qqx1n$}bYuzNV)@pT<
zvlbe$-)F687#zJr9s~cId*ju^d1M#7RIXvnb?W-P<kI5CxqgNHnsjBb3{gKAjSoya
z$p?l7#2en;c&>A2od!ni4cvq4$GN+T@q)e@sqVf#@`L#;Ss2zOd7q4DRu8e`eJ{Kp
zxu8yG1Xce$p56bL-amiC{M@?*Q(tundVVz`z`OfAM|T?v49QC7*~cs@dP97_XTf_H
zFL%!`3yiZtjym^P2FJ#8--kT&gMFF(7|#&rK6&84eh#Q#%)h?y%DCqW_8p?*c|6JU
zCca+K=L`1v249B*dTnN`>jAvqWd5kj1~z`pK*FT~3PJ|g=c8C6C;=6EjHQyp0ljbd
zG6H=`$s`IIj|F8WzEg_M2n3Z^I6lENr|C>{p2>o~@0qOdfueb@kHyY}iWFQ@5sDor
z(wg@g9EnQ}jtt`NYr^E)%-Ip096QZ6P~8E^39r}h-?QlV@c-&c-xKF;=&bJb2Xt+6
zZ+j>Uq1aKsGZYc)jUvLUrV@}76T}^#t{ko%?|jem+uQ?j+u^0tHx6SCjR)?;k%X!F
z7WFxb0DUfkm_vuCo?nRPK$q(IP&^m0rL$NP29w1MIf_yxhnp6I^C2;D*DDzY2&*1+
zC;>98AH141--}60s|l<ue3dE4q01drM4CYO)83nKL<Q(IB#%e;U@^Ba$igL1OTSY~
z`0wZRyCVF8Dkcb)p0!wEl>ZVV_u$v75i49JVll_L$kD-=FO;K;)oLP-G$=sOC5t%>
zoS5Yy-*eVqL1B{=_#;q|J}R{;E>odTU8A88lOX%W#7=2Y9G^}w){jR|>;>h-T4p83
zhh$Fv)&I#Zsw07TO$`h#qv4Xb4kvn+rhQg=0^*Y`SGizm)I4oz{^!4>m0m=w!HK2M
z%hNlG7#*bD`5Kr%;avQzQRz^hi^Qy@CV)_3!l<1I4;71wri>&N2+rFJ4-X4c{FjAy
z!&1=gBML*v`h@XKAV{w=YKtG5uZJOZl%Z<%6?sZSSyf3z#Ueh}dVSJV^~tHePtb;A
zvu7YlFcR2tG=vlpZO2nxP3jul$vYU>(x#;-i(7!dc%@$De?iwBi{6fq;Alq(pWyFM
z3<d7K3<X`&5ruov5)$R;N2?Uy@VcswQ?ucM?FFr8;yY|E6vKyS9?J1c2qcASk1F<<
zKbv(T7DdxL)?`C*FS3}(Jw*x0B(b(AgO=$?;#mw=9@c>Vn}nnytUenLszcFB$<y4A
zm!#uOo2weW_PUzX@`EpFYvNJVsSj3{T#8su#tS!#T^BB1MZ=|X<r5EZ;bWVtoO&d~
zk)^a#vl*J2O38}$$yKK26*~Nr0JJr;mLi*>V8ld@S}2-M<g0GFS*#|^a4J*N>{TbY
zlWua`sV29->`*if^<tx$^W{y<`Eue+tgJWZ#WHy1P5w$^mDSBm{_2{OANXbmv%h9I
z`)@+C|0Xi~(U2hBjKw?ibi+%vYFSk-pqj-l&9YWc;yvDEdizT=0q|KwZ;o#9pi|?w
zc#7?Yx+2-wrsOz-GNSCnAL$q>z*8y>&{k`J%5#XIwI40{iPnA!xk~$S&&2O}rZE$m
zcRzw~QH#5i_f}lEOA+eB;ZphOO7CxR4nTpfmr|tbrHG4lH-CJ3c@|b_3P||}*~eEf
zjVD^9fdLbY?g6_rOOB$%{L0)_!&wPFiM>?UySK%+J_llb>2J~P{jIk{6YFon$ow(O
z-fzYGP<&v&#9k(QwA!t(t=LOoJMm_1=+Q3gQhehVxk78M@IQ7l&eyKKb6ePT>|k%*
zOF_A*nVFc+I=zOlG`s&ZkVCZz`?9KVMIS)nc3f_Dj5rMygt@X-i>*C)ks6cbExJFA
z->pV8^52Y2;n-7TP;Rh0NPHu9EApt7kquSwGb#}*S!k{6b%U#NAHVmMN8CO6kx1HC
zttj3d0$oK^`zwr>+?RF&#7cAqrL30H+tK89OwG6In(=<XUa;odxSDSxHQy>~7Bad;
zYtF=){PQ%q!?Rv~`@(bvv)OuCV!$<F>GuOL7DBndj$gjAbLVr(-iVZ3-M$Ii_t1nT
zwFiX;-i;VB3ZHww4}uRav2(9JhK;jPKJgA6gjN~Z%1M=bAOG?V)kh%VE*00>_@tft
znP?9AHtyUJ`-K+~Ergl@FMl!a2;A`&-3EASc1zz<Cv{6D1HVW1j@#_Sw^?C_PCuGV
znO>=7;@o3}mc<yn!<z_+a&|j|L_g{gN4Mb%M17(;sr)*Ia1$`MM?EhkaIiral-@GB
zZ@b)v9yx{(Gu@8^yFm4j>7n1I%SaG$7XDD0xPePoWC4K7wpe$l*+Ag2cdsXYXPa8-
zhEKZV7c{7#2so#_cmNhSzWJ{}HTZUN9=gbf5i8>1=ULj&=@0yNgllsaRX3>S9I8QW
zQFVQBRYW22`;nEy6#a+!{E4*v2T+Zdc?)kE6nla3Vq%S-t%u1Nl>2YvhcDLn?(PLo
zT!TR*>&0*w_b(u(AK2jVVW<|>7yW3lVs8Qm4$+~)e!!hfMEK}$GRliFjqITkMmR<c
z#MG9u5|297)T|YQG4%1Y!ZcF4I<zy3EZ#ojvA3;`YjxYMYYi_zq+c{5k<#B=jaF+k
zXbjt4Sa6CnuKsI(!~o({dL-mJ)~5Ii5flaIQAq7t-f}32IEKwWw=0Dzi<|9z#(?e$
z>>cj3Rl9MaS-&6kTNq`XU95%(kpV0u-9a$)^b{t|k2yQ`Wb<Qs4_f9FHVc1LsORor
zwLROQ!*^wa%hfSK#u|U{lG{O0b|5K2L|>{dNMo|iDRN~fG~0kp!lI02C>6uU=Yb4<
z3|?50)s`bjY09Z|VV?{Z<u)WVAi3fSz99cDCl5@!me27+Og;I3FCIgO;FK5b3<gCe
z!|ET!@d9|Kw`FV%O$VwURnO3LuK#USNSy?uyVP44BH*~#$tI9sQP`IO*u&8S8|)+`
z1{UA&v}2slbZ2z{BNVWfvUyr7YIiw~@u7bT+{!O-a8OD-8e6NcO+h!Y2Uu%SkOGkt
zyiSsdTD8FUf@DPnKF{OJ{CbSchr}weF9`%s@&^EYVMsqy=Krx~FOQ$m*galaPvhx=
zO#2Ivp7TfV(aUTKqWSURCxQB>%jd=P^9!yGpC`{hCLorNi-iiAS>XNm757^B^oruO
zMaJA7eC%CH^^d<#tfglK^$Kwtf49R-t}P@rjF=9@jD<M6^PAX;-I3~1gm&8T!}LG6
zW5mArK3+_*n5A?#Sr2D#{Zx_~JrsrIN=;I|Bp)Rl!{ApSJ#Z4sXbE3k5V9K%@g@aO
zSthf)2{JkNJb_OgR8=lfd-*(nncYi_Z4+0+_M+f#ho`KKA{PomCtu1}^ELe8G*A2G
zr^#YEzqfu&VW0eY0>UG%fHHolD>FKkxQrEL9^0?1g^hYy^802MlkvZ;mnS*T=bzT{
z-_xfjG@9D1OZ*-YmrBem=}sQ%dwAgo!UD8()Ds68gzP!$i4}&P;v!Kj7LPy#pWp{?
z_j)OCXYA=^@F|&@TWGhaM2_P_EWo`8iz;jNC${E^Q(v~Ru3$Y_9d|`xP=w&G9wC~u
zTPRm0iz9~|o<u%7>%50H;WhR`A|HgHG!cJ_qZ@38`8EsNj<^1zsVuf%DP_W$xVa0W
zx2yT_Z33LX5O=41gB9<7^2~NQ;~BRR{=myprqBGora$XqCcg^}E=>yWV6;~^8LTCA
z=QdqGW9lNxX$^RNTH<0I)hggqA4w5!5wkkoZu=ZM;qOLw)JC^X{RMx}4C^dx$byt)
z;%eUd-lDqu&%}j1v8>Pdm$QDGvA)OYXxN@=ZZhMH&#oS$Rbf7R_{lWC#WSS95^!;d
za5nycOGNyAYTX8%(TnS-I4AO2i7GM6=q5Foip&-tEFzr0Bi(5vygrrb*_BJMTs^%!
zV~ZG!12SVN(-|yo(?4%QC(P6K2Ci7!q_9}$x-aRrFMTemuyvABNC~+iZ|m3@`1@<z
zTr#)%u!_PH3^7H~_y~eaH@>8$K2xZ%?D~VdBpFFSI^qqan{||#To!Kl?eu~<@l=9K
z5xPRP(M4B#x~-IC-#g2@>J;`h!*(MggHddIQaW^9Y2Caw);d;S!>=!Lz3LgSPZIX(
z>s-^cF4UqU?IoJ5u)rGf=3jr}-(>oAGCO9k)!L+fTE45^-PgaId<J=b!q>Eu|G>BB
zQsI95yh<IgT`5+}WqOCd@VirVs{gV}#Y(AIw(YX>ucBS66rF!r#a(m&EKIOiSZkLP
zZrm^Om*GRZe0h9iCy+FKVQ#vkUav8@wLpH|jE2_xPPhH08aZktOfuK;q)e3BD|<G6
z3YFb&+_)F89LR+q+ew9sT$tR-q9!@Qpd_957J{s-V(!g*Z&?*bJY9s~4RPZ;Xn*K{
zJPnr8c#leIyV1IU=dJ6(Ss}IzcX)k+H}_&FlM$I2<5JW8(Cn0$BAgW7*wsRjdo<(y
z>(|{gUuX7jlUDSz-wTwUVyE=q6fGN6uaZ@<PI)@{Y00P*b^&X)SSaS<f6mI<-lt-r
zmOm~QPIHmMSthdz2r;WH-!7E$$9BPvmQQ)`4#6IDl|?K<H(K{ur-CMioW$<8hHk|h
znhaCAx27qF1M`%wF`a`eQ#wskV)Usi(=yFc{_FMcb^VX7|6f*L8nx?xxoB5QrSST1
zSF5k<|1SQH{@eQB&x`5g3%psMKqnrSn6CWJ|Fh1nTccjPKWvbJR(e87uV)Wv9r*VA
z_+Z`7?`YFa+jwjIeLQ`j4deLP@=rkU@7we9)7z7iA3uH+W|QZK>F2_H@%7|k{4~D%
z_sQg+r-%7sa*RcuSd5-C58?hcM4qSHTh`6s`h3vnc@3ew(84De;+vPn(|kDz=f?d1
zO^Low7N3FcNB(^nG4OnwL@@=-{819-50(?p4tT~#;4iwxelh(%nW1fFjCRw-Gs9nh
znLbPkl;O*C0V(aWwV2$^7xzmnh<gxWA_9{vCgXdK-hP-apO5dS3uc-ndk#D3ctC{V
zsa#4ZCzUu~Ji?G<Y4_E2tX#orz;MlGbKKb9Ta$mrc=Cw^jEPVzhsd}a&#*c3&(C8K
zx9ooMFnRV$re>5ZwC;@W$VEsP!KjS+!^8XsCU!~|vd!`>{wy@b`O7nh!NqBzkRhES
ziZw8nD4_Odpo>C|O{k+#%qBF_gkqvfDKcRyy%aJSs;0`$S8Jz`G+sf41Zi|sNWPMq
zYFrIzow`~QlvT)PuCHR^i!_OiwN^}QthlnoZW@{ml~P7?ip0iBEHBwKF{Y>a^Rl3W
zt+NRTt_PBf@7}4F0e6B+1bxH<x%uoXqWP@L0{qJh8?^uR%NMHywHD|Q{9^q0I9@!U
zcw>|z<P1uDCR~bGDaMF7=z{D+BlW}A$!93uefi8+3~v>|nK-%oHbX?L)tfxd5pnYJ
zX!YlXdfmznCf}!YXkq8_R&NZA1BpJp!vlnSo}Hun7nXZBr4#+Lw7pZ1E>X9x`IT*Z
zm1~t<t8Cl0ZQHh1*)~_%wr$&H*S~k~6Mar}L|^sAjEp%lCvGw#XO8i_@9*(M(ft=M
z@W^H>+feB2<H#KM^F|fwJ0h*Pk?qPQx7J4NTAxm-j!O56djL(AY^q9=8g14Q{|nSy
zAOzrX|9NI8^>&Xu8;}_h(;Ae#b-t|@Aru2o&AxCL*T!uo=t}1!g^%9>+&>JO8%|yN
zoSS;gH~nw8$hqrpH<$Y6v1k1Fpz$ifpF}IJ5^XLXZd&y1{QGOSCud-Xmj4BoX8$r4
z`K<VQ4D}h(n9>Id0#n7s?X4eyx&3|_YjaEUpS!u?8hpfPj2}(tbLso@0&}SvBi9!D
z<Kcc|1d)1Hl&sxFJ<OyL{N>8n{``pJ(g6}on*!t<W-0kUNa?}GE-1K3Yo-+S;r`MK
zP@QbU>57F6`ZqX{uLbfHe+AC=v_A5b-(Nl=uKC_8H0*V5@B&LNnqq_$#s5Z1!2>`V
zF3!GpSr5NoSV{9+s(*OWD%rdFs@?;b8|*0x4y^(v279p-w9xP(0W6HjeM4t57XbVB
z-3Q1clalAKuDJEb4@&Bx{J&`#21q=(_@jjwP|hr7oDG8J*bk<uchAQ<UW5#XoyHrm
zh)VtsN?O8TPpJHivhM82pr(wsFUqd`48t{iKDX!j&v9sD5l8!J%ZeOLlD)QDm=?4=
zJo9B0;ElI$k#h`f$rU#P<{hi&=bgXDy%-yspVc5En;HES8Yq&F;Ddn{t)HFr%yo)$
zGnVro-qYkI^h%DyWAWvN%`NKGWFns4Mkk-aTr%bx*As$`KXBddnDwu#0J04J>TZ}D
zaNeCX5>XFQ_o*h-W(Jzzr2dnG{#(?fNvQ^Aex+X0N>m<YWy&wMA3FuAOcsSD`V6g!
z(8^_|x?3b*aitFEi<8MTEm$i+tK3Mv7H~<6k3a#S6M7EH7F}?vq?yNy?mu9s*<<dj
z#|~;NKpA%D|KK}uq=sZWb<Pc(Gi{maZ^lYvm-2deG|qQ;rqq}fz%-D8bj+~nT{uT;
ztLQ!0>%3{b^QEc1^YJXmOgM~Jx{@#e?_xTBFGA~sFTiGrf7uy&YF_aF?R#2+Ye1ih
z(KVbhg+3>~BFKs%!I74&B-wm{;y~8#yW0iux7&-fMF4zo)!AtyAE8M8&ir7R*My=o
zWDhZscU;?szjk`pwZc|qL88BYtHtSOnCzGQ{EELHi|%tKtCs!>kdP!+fFMAu(t-^E
z^EV~@fV~EFJ8}iqU3&2Tb#vjfK1%UQ|24*{H(?OrbIDuW;JG1bk5jRXQaSrZwp)MA
z{pB}e?Nwkm_un+8BRH8T%<DzTqcNAaoBDvt*aM9B>`Dr!gE|sG2P-%X2Nv9{u=Chk
zOIk}1YVqO2n!$g|z2$lL8Xr7?2z(%Uq_F`YNEErLKR_MQ@N5@0hlic-1?BPi?>KrU
zcAs5Kfb=N;+AOY#;SToQg@-zfbP2Q(44YCLX7nxy=rKl7slO?|(fIW(Cg#46KZ+tD
z>*1#itMxZ9HTj*Q8kSZ3H%zm{ESYsf`HZ^%6Gb{I{1_gz_YMLSQ_~PEj9KtqX^*O}
z+zpJvw6n-OLmmf{fo9!OOWYJTCi=_ZeCmgZ#BO)e59DaU-h|xsR`uiZE>S(5-i%x^
z5#*v#!IH3W%14~hwVpxX(D(dyYjQNF3$YONmLc+ZUk`O((U7t?L{3|PvV4SCpFPSQ
z$j_coAVG0nmD)cZ&#$kMX{qN}@tdfAWqyZ-bP^kkmKLLhPwKkM9R_@wLIuw4wM>mR
zz?Cn|C+ULvD<o(W<X4`W;MMy<Il-RvNxjFG!Bo>CkvSUoMn*$cGv%C8sZGGX0@(u@
zx+cChZop4I37*^rlFK6~vG?XJ<h>0%9rxE#r0y@^=Ad#5=bBAcHq%3*SvPuQY4qjp
z;oD8N#@{^tdwe19nFmjHf!K&0E^W)7)L8y|?qxQI^vyA<Y5zWG$q?pmk5Bs_Udbb`
zf!4`=RJdMEy@qAVKO^3WEbK-{HS_T3f^Sr|kQ~lpwe>pr`vY8mg9o4GNwWW)XJj+Y
zvbYN9=}654k4nmc;jrE}T=(?C^4Z9rXvWQlEkpQGAczrN4{`y~Ig(G|+7AmNez^oS
zRD|&1ybE~gn@2|A!`G0|q=8|fMi)$S=8Eky(w-JeG$cQ@^3o7YNjH7kkGXRzm<W<G
zKP>gO0a-lq5&0^^dI{fj34N_6bVOa9vhwJ>`Zr59V5myzqTk!~PyU0^6SkqEd0-hG
zrf$um3|e2DIZrh$$A8$979>k?1dAdX`bu<=i>$v1b1aca0;_Vs8Feu4O0SvMSC&RR
zwD#hev+Z$hcKyN(zm1neSGg~rPDr|ACN6O`MnNdaiKuV90t3y1I3$@wSOUr5Lww64
z>!2p_!}Z*ZP7X)|pDY(&T!U6v*F$|m;LRc)?~4L$nB;TIwd1fHKRF2|)QL}l8vv1g
zF?BR85h<yay3mq;>ajTR(qy07;HvT`QVQjqWE)W_OlVgA+QVo`IMG7|Hz%0VZ~_G`
zJ#C>vpEFCS3YD5$incrXALdK)>%=H+=$yLtK$eWkRD;{25I$WXBAxPe(U5LZ!fN&Y
zJjY4#0^BTX+$BzuYCB@bw!lM4R<sL=v}E&QA_6grEb1y@;oq?f>ckwgIl>aBxp2BQ
zhZp4*Y2KYQfg|Q!29Z{q({V!mT?cVgY)5>by~fg$dYC0+xB9>m(3?6XNS#GOk7@VI
zPO<A<duS)D?4aE`D6dM?faln-Evn0oXQ5@YNQP6!uIeI#(q6O&L6&K|u$wxS8#>PT
z_QH=bx$-T?+UYZQ_~xMLlHi>|{QPgqZlUkjEVPTHjz|7&y00QquTGM&lgiMn5uyG<
z`-;k{`W#MSrVPc-v-p9?{S=a6R_fe*svd?$kJS$o5AwGDIQWZ?Ay04`x4nL(f!O*K
zJ`U}L&@6~6yVHDk{GOBz>D<Is<6AtekSW6p5#T+AiA`AMLLEEQuJb?_42SQ{W{>D4
zupy{hLU_5ITKmN-Lsu3Co)YiGe^W9r&3&ZW!Fba%!c1v;H@Y~sLc9cxID*jr$RsHW
zal_m-`IO4L{MYZj$dL}=@yL`4P?gJw9lo<FFIA1-Hm*p~8`dqD-Y+DAr(2=n9ak<j
zyiRY&$jy=eUIigQ`5L5o5Nw0@ebkA13`Q<yTD`XZgC3qKkg!C$(iXwXDFy~%R=#+*
z>fNgiei+6`_>5C`8NDH-N+Hd&$<@;&!{PxuX6PY03BWBnz0qLe>b(!?&9pRaOp%|I
z%jo_R7hKGK*@auNV@dy|rN`@r9up>)R36M71t?BHiNp{F(w3VsJ?KDK%&yCw(|L{m
z%W40(c-T71C7g>i@!%FU_F|7wGL5AKS(q8Ep|+F@*BR9@CxRX><kbV<B>?|lupx}7
z(~UId3@s<_JkUup>)L<p;B(adU@3+kiR7H@Ay4%SUs0<-hgV&oNyP@uv2}X9&U<Ho
zEOUBMLG5q47|!6$QSBpy)_!VefK5b7g7@jjaBBN=5wZQ{Vnbn|qy6aq1tl)wG)pH1
zx;5Aj;}6sgW9x0WOXy|-kVZ;3;)l_oC>?;E&F<Z*D^vTm9HhR4q*#9m&xG^H$IsXu
z#3$R;q&DhHEKvRZ%3XwxdW5bb46&?C(8yb+4)9+zQ*4niDoWs~S6Ha)lekpVALLX~
zqheQ4t6*2bod7d5-&v{<21~+Z2e+*1`R(Z%jFq39CNdc(e*0^dr3U^!t`F&iUKFG|
zb{IBP{-3lVXbn0GS0UZd$v(ZwA7Hqc(ehzfTX49I(<+ym^cLW{^9W6Z3~jcjY8J-c
zH$D`&%r1ZbDuzZ{AG2NZ6`};C%VU}sw_w71ezCLm%1DQXNaRF;+I|?}Rogij??O?U
zwn=$iDFnk&!Q$-kyw4Y3yO`~n+?Zl}RbKJ80@_I=j1o*Hz5<)xs<}zGTpnbp(HBox
zyY7QfqfW7aeX5a*U*@$tsZu~27C|oA!$V7fm4`x>d`)RUN!6Ddptzo*Vi(cGRjLAV
zAfcH?Z~ZBR4(gStJ$G(>^2od+dA^-9)Ad=IlMdI|%>}=<^5zwZ5N}oC_s;BL{Ly-R
z;yhKD2|}Z9DnDn~!8k_xq=r#Y);%(bKTg)&Ot@uC=Z8~8;n}l%fPOMRn9PTU)jh&~
z8$3YW#1}H?HQ#7sKZfvyIKK|I=gGl22G4E~O&K$~y*`Ue)fXJ9D}+Hc;j>l!dJLTj
zz@|0j5O>8L^VUsDr|`WKJXeNiwNBqKzp6iMx}am?R6)1QH!NxyB_cEfIzLq*WN8Rb
zH6?Q9Nvy-p@o_#V#o9>MzVnj96?QY?f3;tY8K!@@k>|c)DQ_rp&1=QdMw}0#mXu4_
zE0H!>8-g{Xd5Ik`GiQOsK5|i~`qk*m$V^W6dQ9)1C)-*=CNAud;636f=%N3nd)98o
z?14iM^sN-(A0_&@>F_6y`<TL&?0CSos(Yq&wV@{^A_y%;tdGOXtSAmW)J|NoI5Q3N
zTAC&ozo#)CJym2I<T4LA7H6`_7E>(3VpERm%!pqX2gco6kR|)fL^{+NOsyzW;dJkK
zo!4^zngQ);^OW}P=8q^;KS&O&ISX$b(@KE+BTK^9r6F8K#dH_LN6xJ<Uy`mm84y!s
zP@<Cfdm1<{06iv1eXxz*mcD%jw4x8x^5u^WjU|KQ4QI@k&B=IUM+QX(&)B;(Y^xEx
z&+zQ7^ynYsG#8r+L_sM9jCqryWFs?$%%j=nk<tuO;o52CtQNAlEXu^|#6j7C_r8yJ
zuGauZ<iLpg9U1#oZ%i41f3;bOB}^ow)*j*+_Trw^KE@dOKnB6Y<uW#ve~De329>Mr
zM*~t1Sq$bK<{|py=B=fqB;5X)2fOn*?<y+M*sT&}@)aQ*K|m$)8P69WU(#esn9@*Y
zM2)SaXDl2hEJs(<d_`;K8j?2_9X5eUb9K-ZHLcAVSep;Z)}wDhNnV8%&kMV+m4Bc>
zv`*+N+>P@SLpJ1)Cn1U)F#L8-#!w-#DHYd(`<n;FNt~yDz?UYuutFi0Zgd%i^v}$F
zgp6fCX$v|zLF%Kit1iW!J+sk_9sx!U9WuaG3i9i}sXDf;9LDwxx*5WU-4Y-eL`Wn$
z%W!1?3T`d(-ETUyrRp#%&vVrMxnRDGGiOuAx}R5t8e9B|A^{nkSxe^n(7ayijKqeh
zK>hNXXVt<mZuXXmRd}dad<gko%br1T`j>Ya|JZNu?O)!(bGq_xRDlo5KAV4x#k9)Z
zim@H_1zPC*qp=%p1|4CiHB8m@M|+`BmT<F=7iy|U>)1>8(v?jAV+^mOJ5PdQ7)f@X
zR5%!f%QlN|$hay57qd;m0c17Hfx2C|BAr_7)>f>dK;BAriVee}o+$N;&ZD6yqz#!M
z5uG40NcqPYUfKmUBmomOk=K@H1c9X*^1)O~D!|OP$xUC*&nON3flkQNqHpFV0SY8G
zjD8C%ONjk^)R9+okH~CLI~Kns))DNI)bE&W?~R5Qjm$}+fhlFSGB<;meST0?f3V#O
zQkZDa2!-5k%Vh1{uT3Z-096OyiY?4^^!J#!(^uH&hz#tAbfR~`d|w>9F8glx)NB^g
z!gkJ*DKwz&2a*d*n=()tLMJ5iyjz$9FtPQdCL3&kqEvU)O<VqOZ;$y`qvrCDO!zHA
zJ}`e$z(3#mnE;-VaKz~&m_M+c3wzAS>A$T~bS7(N5IW71O!nH9oA}*a<#p6Xjl)55
zFv=S?A#fC~j^}t6Hc<-<u1judd>I6W>PsdBDs+)Hlg^5CIdozA>ed#`w169$PZlrG
zw)QZ+)wg5Jp0@2aeAu-r{b$xI1F{-+SXX*xOZmbv8w?Fpz!;0DU<fGa)5#Cg2K79i
z*QqbJQ+F7%fs^}3%2s(+7=II!4kxVZiYaZnZ6vT>1(=UB|EnsMEo0Q~g3t%j2MSp=
z0IW+WPGKPUgs-@TPme%eLXCLkSdZ#{t%HWnwmMf}uRN_E;==xb$!TEw#}Youc{m<B
zrxf^(P-Ge~eEb3bhX>j;<cyr1vXkgd2g$o8tV>mB+cxOL@z(gD;I+*N4%w^|lTVj1
zN><?XbR6lX6Z8b1LOV6A!6Wvb40eAW<auKXYv?>!wbamQzSE?}h9s02sJ>K^R>T+J
za7`4jv5((c1L#5p@H~^u;~B|dE@v8vXcN=tOfUD<@LICl&h|*X#6`N0MLavQ`ae@u
zhdgt72fqjJ`jlAaf3gO|?B@PuE6b(#0F^q^u_(SICD=q}JjT-3NU%p|j1wLzGh6;e
zS}X92TzlwFX2l}N61qQd_tatBThg3Zghi{)&PUK9eMuYaBJvGs{?w|@;-cV)%O6=(
zLY}x`sgIfQk3sjX2^g~>brJ*K(-`PLsUUsGP9(P3j#xkNP3RctegKj~?+Fd*G^rqA
zw={a}{PAxm)*<7sSI7*~6^$Z3F>n>wy!9FClhE1(sABo+m32rebI?P^A^ReLeG#8s
zQHX7t;lE0?tUoO-X(0b5<TTIunkN7hh2)OrMbbiVOh5kdY*fIF$TR9>Jr&HQRE{Ic
zd#`CgE0l0^W4I>9d_UibV*u@Ba1G;_3dUbs0QqEO8RM`bMu0+{wUHK~)CZ$Fl4DRU
zKF1g+b};E|HqotCDpV(ofLxt*eLhkMz2|6Bfxk=@bcCMLUW!wV5*c9zEAW>@@P`J}
zld(_S^z^~S-q{bwQmP6#79K|l_KMNW0fwa0sHB&?ut<hpPljGW<8#K!YXokoMn$ap
z)i@5wK?8M43v39;r3KMbK^!v<7-I}I0z=aMeWNDsK(25D9;Y_sFocM20@}kUq&sN9
zn2@Jx(o*4+Td|0!TG3Ob(xX9Y!TeiZMKgW^M^L3GNSPFy`6oZn<`S&4@^JPl>4iO!
z2<?%)_6aVo$X9a`Mk<U|mZu6%Kifik-;{6iMr-+zubl#^t&KF%Oq(zX7&3{`mt?!8
z8dREL;kJ<m(NH8@BC>1R21`FeEih|`6P$0%$k-VY2^M$MVKDb4A)~QnS0Bh7HDhiB
z<75P<5i_CWN{`})(Vus6W9E3+Kp~rC=qXHg2@*%C_zjV1K5kA~fIkjkZsSY@m|>p3
zqWLwu1J-KXub-dvEo=N(9DCNVF`EKG(ybx2mpJZsjQbIXE))XNiiH0@CiQOb$sF$K
z8J7VW|CCfgntM6l^E;*C62p7LiY3o0Cc_FAeBJk$?It-yweAuClwk}OR?r|DNxB3U
z75Enaa7DXWjDSqfrZTzB9OtlSTPOQDJ&gfa5MqHNUwz3UTmQ6(a+BHB;Bo4pE!%=!
ziNS3Z<8+-jlWKEQsAzIJEaxi1T3frf9-<W-BrgR_ex~^Sq=e@P`%qh2vqLk^E%Og{
z;oWe=s#G(VbnG<rzvzK1<Wa{L<210Xpi<bR;Gu_!$%!b-e#kKQy&ajbeCw>!(T65j
zl`E_i&mWA7G^oEEM`U#l(5NufZ&4T?%7SYYWF7fijz?s2ffg6`9m8LxsIaTmg}KH=
zV%-fg>djK*o)I`K&0wHXX|`AFFeK@v>vBf#$iG|S9Q=#&lITX&eX~dRa1!)uO`w(<
zK9HtcI0~$4y;En1Iw{h=l*STewwanrZx5qghz#C1uv`2>IE0$XELVPnX8g}t3OSbj
zcXG6~TzbPWWSpef_(7rq97;=#R4ojd9fBVeY(UH>RNg8j1XlVaZ2doM3p&^h!?B>S
zqa)U+f`)ht|JMYkNI?2D7q}LSF>K2kCBx=f#jq2nG}^p%vYq)L8c|OXWU5LO#3_qn
zh@>R3>jYsCvVsH3?wnK}ISx#!%4=|ckup4wmWDCa?p#7Uhm5ZbbhAknvMmh_7kz{q
zii8M&{I5m8__6jbpLH3Gf#^t{opqVy?F2{#;7BEC5^G!??o19g2!LPB0Y2#k>GN_N
z&A(@0(6-5t)^I;ul6oA@;^Uz)@twwlTn-{V{v#fF?!@XNMjN0$8%4(Hn2$EhA`&&U
zDW#sWh0|Z18teM^bdQb-W;<3Mjaz~{;X{h#dFZcupR$6BzNmyCpJJmBo&miHTm@RR
z!WSjuTibRDiIXMw=?oY1=?y;fM<B*5GiVhY)QPS;WE4rGeEv35b$KhWrff`w2R4Gv
zal9N0f;FDorpto8vLy74$LljtjVilaXFbkM+ZH(HW3)+-vhMA<FNIHMhw+EUREGF2
zkD={*KzW8%pw1Wfj|o||(91&aB9+JIO@B#3Uuu+yDWq$RtuI{J8@2*V`XdH(*&2R&
z-j=_s+ZB*8LG1@fnO~t>1wQ)MSbv{x0HCu26{Bnzuy-m)?hx9BVc+CAN++>tG@?A2
zby|A4*$8o~S`5S7;YN@?tcUl-&!7gO8i83ESh47TX-cE_7CQoa;{g8gQPR7Cw6?V-
zF0`~)OjKXau*c!|62Y&mZl`3fi%lS-j3T*+`iw&$tZZ-ChRer-?WN+0;Bi5ELKJf>
z7uZVLTiAC)IR}-S-_oNFM|L*vPmVWOC#D50PHiy>+f-ymQ?sTy#}ekybX&VTllyCt
zt-uy8b?ZHPFEr$YK3LlJWA7d`>=xTzTKP9W+|FOTbc}+D5W`Rl=_GLbEAae3T3`(w
zH(GmO3syjrZ)^D1V`%Wl%TV^%ra3GycWgVx^+)rmnZ4YrI`6GMILjYK?XJJRn)nO$
z)HIX8Y)_Dqf;&%=j|m1sU|MH(wEWm2f>@eR?q|DPEbEL74W8~O8bpq<bV$~?h0|Ds
z%y8I&YCrwe)KuXxGwf1bBVPM*zXRPCwCCJ2!@v3ZGTAq7noole9!;!jy9=idf(KIL
zSJI|5_^q=7p)D+z-p_SNb>K5^NZcC6HGN#3pZIvrur6+6>^5{w4;#wpgW(*>nOhmo
zhnuxsgCx$Q<GA=daS526DTYl4cV90J-Z*e#PS0@_q_&lSJc=~ey)(Pjm2~_0I*kj?
zmlYxPR}J-g2tO??Fis7jGlG*h`A2peKZi20#bR?~G$rF9lYU2NiB^f>6~8!1%16k8
zd?Ac!i$I^K@?I<c3pX1a)MeAKKV6Pg(c%~}8=T@PK+|-+4^16TdweDTHrhEdB`Xe+
z2sX^6hb$SedlQCZI&a0--c#9@d4gsOGBymQ=gc9*ez{{T^Qo@YZ<7SsXxC3p4N-oo
zGBkVF4JW|{;y{6WvWSd}O~iM|WX|4R>pdTl{tC*@9GsYeYfa{B<7!XMIGsHm^Mhl`
zHk1sm1b!IwW}~;WiH4k8vdG3X$t(s}M_{A(S#f_l+s7FB6phI_b&@8Zko&8Xzsaoc
zYeidefXqf3;-jd=&Z)1<4j8o*d!W#?0aezS3lS=@M#3*yyQ8<%KS*u+btH1)lvP~}
z*NNEt_qTNbA!^WAfcaceQQ?_WlQ89dD`qE}&g@+`n?`oQe4f3{x&`dof)pa4fR-5!
znLn|;KqrUeA1Z&XErm*l-|F~`2b4EK%p2`zS>4mi@$RK$dxVuO*oi7BG<xDZ^F+bM
zYE=o}YL8;BgIPAP^r$l5K6%`~A7^tg+-=n!cW3k$Yr)O#Hq0CIj*v+Hfr8w$rc7cc
zqpl0@TsuaybfOPKZ)l=w5Qc2vDm#_Vb3Q2s0wIKtHF$6w=ie<IRs+E+ruYy(^gh9^
z{*WC67$v&<{x@f4c4jf)F>~`ht9})|SnzwV7300|TJV|DEx1WvdZ~d4@#8>X$-1k@
zeuKom&Kz<{cV|v<l$c$eq&0y9glrCyfJeXI+GKn*!Urx)T@0jy8^V3{$dI$Y@}vSs
zBj1^cqK4?&&6t~+@zoKfeP`L_1nviG&4K4_9rXOjivQvur9@D}x%IU7>M!%qmQ=j9
zoMeff)l3n$+vAJxbKaeC<OlJ6hEx-iRpW-9S0Figg>}F4yWjSg#~^GaBV@!2gzgC|
zXpBb|KAZ{H$HW59gFl^%?^%s06WaCnUY{x=x0QPx_Y0>~UBWI`#EJu3l-!*e=1W_h
z7$&TZ8@k%y#^aDr&^kkEv+Zt&*7_S$?pW5Uha%~m=x~V)93tVkV%@@+xSD7+GIdoz
zx&kpkSlLXd4!k;h3y*bY*NDCT@)oA7=_2BML~FHTPV8Wb`VhCM`6BVWC5PIWbTmdK
z{|Yn>%58_55jfx0+$rp!_SbZoz1U#rpd(zYj4wM%j!Q;D$n1>cq0+w5t6IL%dzW6f
z!JXjU*jsoOyJ*eAuTh<_DH2;^M8+-2ENHA|y9fw3NrCFk^i6prH0IyZhhE2g^I<+M
zNq~rheVZ+$8gXocdil8;7RV_2b81&NQ;{5;?uA381JS1`H28#0V^4jhBDdJXg*Q!c
z!7DT>@E=u){jSa*hd%d1v)#Sr^yVdUWYW%3&{6sKi%wi7B$#xxJIN-5gxVXT7a@B>
zN>xTk?ya6qn;(h)^~~<CT)uxc_KZMuTl_WA$tB$sa*a;(8n;a=rmMCpWzCRBfa<>a
zhBmwKr8ax>TQ#oe?xNb;=hfJ6+mVVo2SlA{4mQEZ@HG`~oa9h}de8lHW4@ZJ=yp3>
zDbM7%Azis+h*nQoQJP(KxI_PrJe7;_%{qgftFU5QjK44g(|`kn;TwofsM%ae(Q<VB
zGTJJ-$XDV|%sZ5U;Ir>eDgCtip`|->2R1xjwQcf9Qk`&UX7@u}Ul3=1IjKIlTSL#8
z3)WItE^M;t!IY*CR$uoP@{emP##Yg|yW?<Y-dAAtD_w1D6m4wDnWTNiV;V%344!^7
zsc)v+D<~5;fiJlC3_{eZivycXm?IzF&tYswRSqtd0B|B$>u`{}(ay`qza^)_?aMHf
z{=9y)M?nEpSxU|Sotw|QXc*n>cLNGMfA6QCDEP0udPC+#R@t+x5_>m&mxxb{Jq}6r
zc9U=P*tpXS{1GyG({8BR^pu|=d0{Y}%t{qg)4wby>WVvdH2;m?>H#&WZxZC8qRZxQ
z1e$yM95(95Jpt}R;qKewk(<yUbMOOF+4fcwJ}LUzFP#iGthjRe{r>Bk%T1P%_$nGT
z6e)XeW9hX2Q3pS+Y$<=dmj!o>jIbb!r9sS$3vtG!GkXIgd!IhGrRdBXAb^RB4tP7K
z`Zl{8G4yUVFo6=QbY<*%H{q5gAkUvOr3N*I#}_WBO`EG{Cg$gDEy+PoG^`vVUb#mV
z!rlrO@!T6wBYCSZdl3Oi&l!d-ztA)&1f?e+1e%hF?`aq8ea6EWvCBLW^~Xc)Bjxk-
z|0aFW4f#fU(GC7)dC{c{NyBt0VNdYDg`gs-sg<M0L7zgJ;gbo1tDyLTBEd*&OutB0
zPg&_mg4XBGd`tol=e*DENc;#$JTb$A#Ou$R+buo$wZ1p{b^2fZ&3XB`7?>{HbqGGO
zb2&&Xd;npUAXfTP9<21aT!@Jqg&-4GQXbZozS&qEaMj6dy{LdjsKCe#0MZ<9Y^D+3
z*i0jok-7h8$Zj6&q#c5wqBycyW(qg*90#gE$bU^5QtV72IM4Aj3;x&)EP}CFh$m*c
zM<t)56tl2tyHg{4P|WVvDWo_PxkP9K$`>RQlc}y3c7m~IfuY;4eMYW0d#o!Ttqy>s
zefsJbWOzpF6fG1MJ~Ux{_1VS9hmI7oi6^O%8%5B_+WS}Ne|FqpC?H|Z!en|J)CeCO
zw+n<gCsD*~n-RB^hsekU5D5974PS+X1lC6i<3~t%WBL%{g~T#?a=ac2IR%n;bMjQj
zhhrMMn64=)F#Lg}ETwCMn8j!>%*P$aXs<_1HheXxQF8OD`|esHMQc*)k~=S%Duz>P
zB5AgJT0_|sJPE(L^4McWll`;KppiXv7u0|=eriscz2YwG<hqQ~+yJa-wV_Z}Y@kP5
zeL?dq{QT=Jzx}fd(rk}3i!6rXvD0!dGwjo1j}hs8-k%*UFfi9(I=y8B<9&Xh->Z(`
zl4%0Nv*;(}_LuIdApEE1iteo>fF`r^N$45T!@xG(4O#X==o|KaK!^F3<Ugxk$fB?r
zMp#?sm}u1zQ$AIOXWS0DFyGe8pol`qsiPqDV-Z=kyK`qI?wsy&66O_>>Z9fwMv^4$
zi=U8RNU|azDJ9Ot&icFZ37-F)WUrz_dV!(_cK|~Mb^ya&8iMz1j7^Zg|66Ovi7ems
z$Xmy`_6u18`a!C2-~vG~BmeJl0(CkU@&Ad}9=p$S`~6p|Yp?smqD9B7xr-M!v&IY6
z?;W43L!GI87vNE~g7RNowck&DF`J*$VOL+KLayIT`R(6Wvzv=iwU=lZP^PZTjqrXU
zyIW?Me`!}Z^YWYl%&^ZQfxQa57cu|GTC<x^9gvx($`8ltS6|Z&8=pC*t?#_k4$r>X
zhxk42DV~6z8$g++tq^wqKLxJ7xyCoY-Ba4X@2OqC|6jK8|9s#XFx9XD@@LusVTWs+
z+wY$G|2yC+d#tYPJyGAQ{2<W4@0ZZ)vY(6uRG`<gS%oY&hx}_5<XMQ{G6&V^c?)7w
z`-z`D1>uFqR~Gfd_o=IsHyTg<=8N+SRU=_@2R9JzCH_F_2!3dVuy+S2b6D!A-r8)P
z8z}wM=EBY4^-5FOqj7<*r^e_heVd)ZNwP>moKiViLb)j?od3RLKKJ!6^U8Q8#NkA^
znvWW2pWIlitpCw;G2cDXZ^ZDl4m;!|0+hhDK7Kbo*mT}3@JZ2Je?pqeqTytTgcPZ|
zOMYUB!9V@|tUt9)JdXzt@oU$g9Zwm&t>2Tw=ie?zFXY)lTng`<Im<rx<j>u&ZlBxZ
zxZe^+HX_noLX=l#!Jua2JEUX5F#PkX|Fx;_I>o-iZdewS>%4&NCFTfTzG9?AM_
zlys=U_=6rwGV}!8T-eD@%o=I@cmxQsYtzF8uaPqauO|;HQ`rQkz5i!$Je%NfaK7=Q
z$84d#PnrjsCA}M*B|TO;dbwgU`VK^X|Ixy+;G=ZG><fFrjE9v);&5Fm=B{8*Uejij
zhXj98Hwb@nI6suIse7tcm-crPZ!K0s$4tyC=2D}v@sp@v!Snp=|BP*JI3AjYh9X$D
z@g>5Z)D6U*94;QxdD?I*d|CH|DG_-snazKdoXLM3X>{vc>u@XFxq&$neq}J9{U9})
z^)k{(8vB=qc{nUo@Vs}Mz?W*v#4;8eJ`||1PpSmmn#lrx|B(A!y@hPIb6+-pvUs1r
zw96oE@KWmxuYy|1*8V8$tRsBh2%jr`TcCKI<)RS=!_RBw8dJf2!V{RS>go@AQMh)w
zgn+3x?D1$*GrkrI%Q1U}dX|IJc?EO9)r7p{J<3ZM)o)Rd1{A33R@ZCqF~#5sBpW6{
z$G((E=s3Nq>)-ZBG%NEU>~)@f<(j)4t&;Wm*zyK6GV|`Bu&q}@KKTUBX{Hp*y220z
z9Ib!e@`lyb^X~}gW4_GwoV&sKbN*aObF+Bha3p&r8*PQBy<UXr#qr8xBE8no#id`v
z$_gy|K1(@Ltr@2UKM9i7T74yLhHm`65{HlP3tz*3@S`UKE+Ea6Y5MDU;`V~D4^XPs
z=V<EBRL8c`grZfyda#xb`V_ZH>}e$nob&3^1B?-&Jt1T$97C7j&(7m%uQdGoGv@2k
z$GsPDiP1R)BVWn!eiel~-X2dQ-<_KQlmRR^jXfV&>}?#Y7JL~?t-C;tx<Wn;1?<_k
z0p+Rur*XOi3qYBq)9s;++W)+T?PUmYbpY%MU-(SB0o6EL4?LWuM!eKGrIlNLzBz&o
z2V>?)&&eA)_b8Lq+0bF>$*cgdp2pwi15&x)@a>J88frd<1t>W??E>?p8s^~f>lRmH
z!5UXxq3OL&VaL+g9!Sxql#)@RM_vp76Rr4PzTQaEzC7KEH+;Pso2yQybT4tz@2=6b
z(#n=+o!NJ)UI(F2$_DE>AFZm`p97k-KKhsDkADFdLX#E<jJWTGIjxD7q>0G^0c-G;
zk^;uRE$k51O5P_qI8PTWqiqT{1zN&>Do|%R{SCnj2Pq_QVUQc_xZ3C~QbHBYhf?{L
zN^e*y)qHDw3fTt!5bS&ar3F%~SP`)Jm7IfxtB$yA0Zp;+y&8*m9Yuks$y7?I?*daX
zUWg<&1QdD5Pr@ucA|FB5#&Y>U@v~@Wj%3ne=LTJ|>S?akfE2^+<l<a#e?0SaPK!GK
z8gJ*Y(wclC|Cs*-E<AGvjG0%=_J3tBa1!D_Ue`+4DHBYQfoSc(sKDwGy}p{}Dvg_E
zZBfodL)9*aU&AKo35s$?tNfFPu_Rc?W&gFpU$yCh@X77RUP^3%VB+u0TJNt6A{!e8
zc)%&fhgYrpN+n{Hy9GAO>vk*A0q8$q^q4Fkd{~~)`CUg~2ytix0UB4XP=P?ddZEt;
zEzWbp62mF8y5s~O%vU+27vW)14Q9;cfHF&vOAqWFO!wQ7>~!lSgV!(laS}JWMa!U;
z#3|fxbA#hqUhel<42%7pK%9gg?m8M4d<s@7F}6XPA2K;evI%A#M_HLw%*I45_<KkO
z?rhET4_@Dbw!70Lw|YwCt>lS8Z;hiCUG%9QH|y|hs%z`M#-BEt{%DHWRqe^ws1ZJF
z6H9Am9D+XVzl2yZLn9q8dnov#p{muv1@w`-=md6T%RwCq1w^4ZiH|@VpmU`=jQ2{O
z))2;+aKjPwY7xuK0oz|ICHkVB!tOcZZoya(a*m+jj9A+GW!!07ZU%{LN(Q6*d`qw2
z2rFyYojMD<tI3oTIN&8LV9Ipa5xU%ZMwEs*<i~CFJxXZLtf)Gb>TO<;RGEI2C)5|#
zk^f=OU7jIS;R8D%waUF!S6nqnI2Rp`V5^(V_yaCSiaeBRD>e$hE*^bBA{}qHWRaPH
zSu#``z1>rSV)n!lY{GmoFpZK5deyQvD%ocxt}fOhE-0MeqtPcZQCdIICgE3$NxGH#
z{KsxQpQOB!W)L6*p<xLV(Pwv2?k*usf@mJD-q>zdes!xNP<NVQX<#caSR*93vdT#v
zshW0abj5hR9ht|{L%w^%6JIrkF{0cG=+0)0szFIL&-d?R2FZ=+o`JQ~@=QH8;ByZA
z-b@wv3Pos>4lVrl{%Fss4*2aUdk^?3^Iw5M$s6+R?|w|Y0w+G7(r;3=F7DqwO52Z}
z%av<ygxGG0?`im4g;e8tK=g&*xJ4aM%p)qGz2Y@Lf3B(87LeC1CFq~f)dQw)G}6IM
z`#Y0nTXGnI70twg$IC81-z2Do+el)^eRDc>+#(mJ4_rbO>GjnAA)}fwN$PH?{8x@d
z+KqQwzKEcSOZ3&an7e2wa?z*=f4NAp%%w52I?K<zDvbd>ONLlc62kpfkfX{{jNeG&
z#We>-X~y~ciNn#kI}&}~u+d1YGp!PSWQ+gxTe75NQnrkZ97y(r0YGQGXd?K&k@g3P
zPmKy8>0tfTN7aVq0d&yz3)VgbA$934+^i6Lzw*F<AQiYDq6tG|*IV6<SHo?aT1ygO
z-4*3m9z5n2?Z#UQv|aK9py~9oT>(@_<Mf-cf;aurLx;(%>-3M{uNpStsTH3?1Vwd=
z`tVb)wO)!qn!#AQ2*@aml|s{9zSX5II^4e#pS_DYz%gc<P}q*Ba)~ulC-v6&+>nh^
zYKge>-@o4#RN~nkieQEt;?wYXmkzTa;M)Vrz9W?WRQBdjcr*(rUz5w>m6!XId6%-(
zA>s!sEUws;nh%YRhC(Obvd^JQxd_`y9c=fzab60Epk*#cdeBOM67iHv%McEU!`3!!
zWr(>ScJ`0B_wL;{aQ@|1h*^0FZ=_QDY2litSa}#v@sWiQ7F}k=dzs_oqd<Lk8|msG
zM{0vhP81V?0<4)N?VB+KpY5sg8=#}>P>UuRT_IW~`<jn+F<g`s-5^V#QsH!|!8tfQ
zy-x{4+KB&N;;A-F;ZT^k0r*>+&(KGx{s6dzfGml%{;*1opsBa2kS>8#N>*$!tH`YT
zv!zkDHUd9oZ#@llTRmZK90xU3?U$z{+}srn#P1EuZ|{^ro1<Tso9^lzIuUy{S5mh*
z9P_%y$hVJ|rXEhB;U8pZMtNgGDjBJSvol`z^(k&|#bzX8fh_*i2?oDIV=_qq%4MW1
z`;E07#ui3&{etpkRQc?4Ph87}_}D<Lt+#Wx!5SJ1j{_bhD2kx*$mV0j*!)VVBv{Z!
zdGTU4OR{aZhQ^fPrmaJ(oA(rY9}S~0qU(bVC1=N6|JD_5AJv#{UP#xt?#$Q&pJH;k
znU@CB_f@UO&^jy>X@$kd#A;$@TY9BCiI=kyI$ez*n!9@|XT{V5X{JS#eT_7IdT6U=
z*&DzEv9cF1q9u6_vuMDY4P~|I^o~{ZqF;J$34t$Kg5f>j=cdfH1H>NL7A}CpGl&x)
zqgNTKv(KVeR^kde^5E_?IR1?V6#^(qq0W)yV=FD49dzeJ8C%P^o4>LXZOOsO7k`xB
z`5hb*+li;no^D6YYkd4-pi3t4EuGXGwl3?p+AShGD_9G3;zBDmDzaS4JKI2C3<9mY
z#CClir^z3&L-E0csz~dsQSw@NO;4HZwmacehGgovn0=Mje&_2Rhdg<If|-&DRmDNh
zIf9T}T9nA(=XPY#q6Z9Rlnm22p;{{;XL8?ylAk7trVAj*YBu4op=J06^9%YG!q<M9
zhz6YP#zQ&kg=@EeAZFE&nVQK0bBVff$f)1_A^pT88cnNq<jn9Xi9cR?nR;LMY^Je(
zW|{a=svn(2L~CvBIOa?ky#^cKL<XOPRP=krG_LIcwuTQ`4wc`nh3=@Kpbi)n&5?6h
zB?Y3IbP7a=pK&``Eh5fd5xBiXVq-hMB9C7*{B|r>iMi62!XLaI(0wR$)6hbwYbh+T
zczaz7Y|X(AwOO=zIp^_`n~!ZZxGG9J*YHpp`Xo!W3#BY+7feQfr6hbQzkB3c@+V>J
zyn|iMYzXSCx{)Kv_erw79Bd3IS+?#>N#Ke?R{loq&OzhyqJ-i0x)ORf-dteRcDM@a
zh32u!<fH5N25IrS)47)zqK>|1$(L!_>=;|I$+eq{LD5IF*k|~#D{wVwiMY>_HrZ!-
zn5^|;ZNiP!(&4gXbQ(TewA$<GqQ~F-6_mFRk?qLZUv)YGUv0h999*>rpJmmzUDA0p
z<tcq;9IAQugzZj;+rPa3tnt2-Jn7|pQRC-SL2y2cmJEI1@(+-*#V-apxL<ZcvduLN
zU~%{ghUT2x8@s~=-Nz7gv5TtQTeOT8#v|U|+*+L%lDe#BlmWRX&;3fVu1f3It5(fI
zv3u}H<x@qRycq2nS|p*(?EME0ZFDU!!Rct4nAaR2(dflzOck;O8Tu^0Bv7rnRcdu%
zbFmw^jAK;&x6jEj^kNMw8h~k6P7PRpIaD?nypvHG%o^Y*s;$Fzu{aWp1y0R!Z+?vK
zO<F$7+@$}`V9LtZ6beryDHJ;)uB&L3BEH-u*j=V%{Tv!MJS`FIil>V`bP=(&7hPLh
z^MLyBLFuNz8Sp%99^dX_+n&=7!p2T=$@jE%f*zr#6{k(`J;bSW>YG5};aq`dC8v$D
zSCzRh6CfEJMcyG}`1-3FYw!sH5iNK!e+>5|T>nKL<K-EXxx*7&sIAjKxa*g8izV_U
zc_gPe3ij1FzHO-jefU@e7ddMvXWOlAoLak%aOp?&cS#5{DQ_l-9q68JbJgsnF`hSQ
zt~6Tt&4zB6zL<j&PHux(*O-7*7BQ&f0nF9E!rl~*uE$4g9Z2xPkhDI!p>o!0Db^2?
zpki{KLq`IE+gY<@`27ZgCegT~4}Mb*LOz2siNx~!EA^(D^WW@Hbeh*IwVI(<UKoqY
zv85EvZ!R1EZ|Wo2OCXyh)n8-<<0O0-RIvZ}Gp@s~jGX!hE$RG_A%WP2R*?H3TDb29
z&3s+;K&QFm`s*4sbSEt^2*U;`meb9WYZJy_k%c0VD&2hqi~_=zY(dq~$nL^)^(1Iw
zi+`K-rb(FsJ%zQq2pR|NN$DB7X8#!uw2uLU&BF(i7U@st#0SCh1y)6-$?*^Snq)S^
zo5#oYVMPjDE3Qjy;=S3VE<k5dTi1%mkpM5EIwJuRI5(r+0TR!5$fcO+N7j_gsfS{v
zWIaueU3zkRd<eW0p7>jxKDWWw!Je104lrs=cst|l=}Y|6Tg?Mx*egcU)56Q*as4OJ
zTe33y&mW4|P7ju$3wvL=b$_nN@i3o+IqYjZ+Zya<4C^Y7Uu*L&$+exGnoqQI#Bdfy
z|Mq&KCM_;`@%IfmF`f#0zBe};bUU~3FW^IJrr4(a<ZXTaaldHdN|m^pgE-YzZC+Zx
z!Q`B5S8Y~heN!kmyHIMb7tOi6@-|Df7tLMy)>aMw>X|_BV{hQ>4|loRK4^HFNP`t=
z$ea8+fe;*~(fKlm%g$=x(uM_M&)ep;$Xa^)bbj*vLKy$9xw*A3aDFILka1IdcX%k9
z4ZyIRei2l$#a-0=lzlu4^=X)W1F+=%MT5jK?4G`RwOoH|hG)5mw>4b|1WwFzlo1mk
z1Tg)LG2!{@Hc|Wqn^JMO<WuSZCC!=WcU!hOjj#a~Mj0si7nE*wrF$j!A6?dHtEBES
z5Gsf~cvtqZqq7!;mdYMJ`1IGm`;brD?{kuWgj7I?&0m~KcNco#Csv<btg=F`wDuq2
z`#C?}k%s3Po#LNW3mc=Yj`D?~!ln+>=iQB+%8bjt?iEVz8sD*Z?n956jz2%=a<u4n
zR1IHyy50N9y)H3DvVrnwFZ{H1IRYj1dH5bpEnfRbAkmhy!wtOS61#hOO|uW#KS&H<
ze=2FMFQ>^D*Fw+aJGaePJbZOD6JaTGJiiJ;Frwd^=4DE@&U!sApWYy%r?e*e@%f)#
zg(j-{^MO0}`mi^GLU!R;omR)gaXN&WDo0|fa0ZZ!e4e4>P;a}ZOfe^b?0V^S-1iWZ
zzx3Zx2Quc`Zf_bjuL)Swji{!3t{48XM-bX@Bn7?^x-`$)I0H@OuuIP2f3cf3ubC~T
zmq0iNMIVy(x^Z6`i*#O5K5|^T#Ig<TU5=*u#yp)Fb_SZ%`(PjdVwAckhG&Zp*7wie
zT|x&bXZII4qE!s=yfRge3)d<-BbW1r)sA_;Onp^$zP-ve4KUPtzQ;M+kzQ-GD6cQM
zZ@-6kzbm|<f258~06v<Y;JAMWjf1$P5cZ~e(Am44MTwuWCv_s^_kH|J=38!e$}nv(
zCfaIoQHLQ9aDc1tO;Avl?oC)V%3tuEt@W)&u+9`wT=u89j<#R?xtDTJCb)tEUI@|s
z*bPSVCcOp}DlBX5)Svm<xwz{t^ZsCEuon;_;EMM!Q-B+)%HdAq=R(0)-)B#H9UB<z
zf-kyGzc(^ddE*}|T$Hd`=}FsZ7JQKfUQ^yk4~s^U;6#|<`bdqwhZnjAV7STAgXu41
z!|pVJ&#quOC1k17Hwy;0f|e>rTOo8<Bx_Ic!Be<3vyjZgy<q_s`C$?CE^euUuMdis
zK+O!cA(5~&zwNj;Su;XHSGXI~r;J+D4G^F#WvT!|xoVSt%>;20*h6POSKX|n#?XXd
zoALd-)keZtJ3Icy%Qv)ZOp*#mt6is{vgS|HklPe4jiLUTvoMaty+}=9Pg(tDw}kLn
z*f+)S0^VBx>Q>y^in#jbJj^Qj{AS=0cpaPPi@eoNrBu&0AYJ6~z3Ow}sI~NJe=@fu
z7!l|H;5g<4swLTwc4+*Ru4MSoX~^QCj_2tbBzG@ATkW~Or0TkUNA*U~zp+ldhNvgv
zKEF3rTg8Td2f<`?-8i-|$(E3-EPf372XX<xqg3*LJ_1*xABH)@$)+gj@1va)bT934
zKayhAUv3`|<fNzv*Zi)HO#o44e|v{}>0a|*uJN_eo4L#^k2kD++TKmXwQPzfSC!4+
zHiEn^B~<9WRf|y%>}e&M=606}V{ViC@)9cn8HYJYC^W1!pvMdr3<(@bE9rp|^m~4m
z#hDEJ(r@qpH9JtN((qI@O-OAn(F$T<c4xpBupO4R0ndj=h#YjjX0`HL&JIM9x^%Q;
zg-(z+{DNg2-<gBPoXvru1H~<BM6^KCbFs<?NoL(npyQ%=QV5;qZ_WHMJEh+?US@3<
zvSs7j{^G%72kD8LGU^U@Nj3}dd*#xn>tpaG^drPx^wa$Za%AVv-(xMXhRx4J?`m{-
zAMd=IaR9A_QTM#DI!MZ${YvjzXwGcRS16~YRdBjcriy?p1oT&TaSDSLmIRrDe1|}Z
zv(u+)lfw!(-l#4{1S@V9Fbgxyv5zz}7$%RgGyr_Q5PO!h%3!U_&iuOYveW&$iyBp}
z>&4;o{J>AO(A-3Craf&9I&gk}pYGx{*2wFqM{nxht#6uB4DM<6rXDf6`*V5cy3l)r
zoY5#~QZ)T~K{kC+gFB)p06lt6D*D$kucHdZjk-dRZC^V26tCM*8s&TXJG8QSx}DUm
z0Coi8i387fdVhf<lqRBWH3w#v_PwxC0=duV!*5ghm|`kC>Me`5Vt!hX6PBi4JA53l
zZR;Et3upLj*QljJMp+8rQ?~(VWP*a7enD5h;rk%`SjaFJc`UMX4Gm86sh*)tsK>4@
zy}8$sm=R^FX?*n)cT)D&@zx9MZ>qmcRuF%0@&x-~xLQRZP+LJlA7s10<PRM~CRX>O
z(jl_zyAiD5nTFRwL(j<$0^hz);{oto-8_VTOE+Wbuz-p@Z`SrOesn)O@A5KFVB)z`
zwl*jDhgT*J25h*+wO_mWu(3?Bwa3=h@WSM@;8)3j2|iufvY{+u`*j7|0T<;zIUck!
z0Re|_dTE)&!unM`PaqrYCCUeqM~&;21fe(QIpTQ!n;7TTXNT?>yuK+kW#VK6yY-B-
zX^P%cjB}jzAth>ZdoAyEA>FCg*7v!(flV`2{meD{2QNyOjQ|sjZ70jgAU5{Vo;e6u
zbd;%rububjuG&^N?}Lw5tzLu>TZryY6-sfJGJun;tjt;qJ+Ciq@=NB-A6RZ<3T0ze
z&8}-~N<!JM*pg}4<!|_zS4&u6srL8y3@(*<0LF=La+L)8k8$}W&#EuM<p)C0LJt;u
zuXM}iU}&cL)XH!RWKU#GCZVdd-m>G+3xE}M5<3K#jF&Mfp=vF<UH3<t{}$J}F^A9B
zp=NgFPcwMxhV(5KR<wUb;_b4W%sxe)R)(K>*Jd&+1|>tYchKMIrgn;iVB2G@X#N6{
z2Oa=5-Q_ImfOBw@{X%%#a9X2#j7<>1VS1suj{aPyQzzd;HJApSpef`mS-T6_=cv_2
zcY=2h71A+k!P9H($T)jqbHSUgOoepHI!%fL<reUrvU<W{?is!>d2_aLuC+1WJETN)
z6VX7H0XxXv@hQ~BK2;#Kaex+<mP*cet%Ee~{qJDYL|c?+58wjov4~FzRIe{V97j|l
zfgqW3EuX3;*|Wy!GAPgn2A;G&uu@sebS8JR0)=AkzjW2E)+>Ksq0bYSaehNQR%=;w
z&^fKdUpM86)!f#}m_V+?oma*FI>U99J!^80ao%3ckk;AyvO#$3OJzgQ&Qo<%x!mFD
zVh0pKyn)|snqMC@E`O_CGMBqW1LHq!s?%qIom=|)#pW7n4fm7Sw02vg>3o0w%e<lv
z2Z*@20QCt#T2+T2Y#+v+b=K~npU<23yb*i>t`#Gbb1~DuSUT(`h!lU$1G$rtFK|>7
z?(=0ZS$w6xk#nMeo&Qp3@UP<WSTkP@{P9Z)?~f8MkzOSELv|evoS-++`a*)kJ?e|}
zdTsd6(o;1TkfRo8J3j6gdC+}%XCw4Qb$!7#>KkkEO>R%)o*NvF-LHpW-R8-N-QFXj
ztVCcAghrGjhg1r^83A<VVB_f|zjWJb{;bhv1a#u#(C{fZj&K4Hu=2~v+)=>gcN1X5
zSw&B)IHin=6(C)8c<JR0ON_`eo-ka>Q6L91Eshj*BRy@IEhl*3b>Tm}^0Hd6%c=C%
zyM))44!{GlocvJ?f~iQ4pPJ*q`@aBlK#afbaL>CDea>fK)gO$a+sUo4S`o9(Y;r5C
zG-1j3trK%ul%Z&9JNS9VYIvX^BzvXcz@!<<ezK2U<#4bhH4Z&@Eazgr?@w{J*U$)s
z6!}TO3#RDQC2x*%=lcem!D1STH8eo~0^4V^X{;W~eK@4hXmV^v5`%SIq9Sl<ueh~U
z+}YYI@9DC2>_Y-CcI|3VDs80{RD$;sEdrF?6<YMCrMow}yHzRetkh`vJ{sLEZLQR3
zC8N>&VlcWjO&Q{({FeebvIZADPnd}L)H%pyP|hNeSNhYrIC8{mX_#>`wM#kNHbD2B
zVd#rsHgsIpu`w2-%{{8bB4oG<bIYNh#%}cQ|5Ih(W#qqGx_vAu|Lty<cK0IpfA;p^
z^^^Sf825k1*1&V~w$n$S2+;Cjmke&u$T#KjJ8+qxJHLFDpL+I{<Ia3`)NJnq&pYXW
z8rkXL2`iSFOXCFXQnK(m3Ql3wZm%O4W^P?+tzi=>kjW5$IEgbDL+4$pwnPA#k`5_k
z0*T~63D}wNB)di6KnbUXq6YaH;C*KhE-YsRBB%inU-u;>Iv7N+81<!(9Io%r&OsoM
zCnx-vn2J6BkyLO_a+5losU95g0~$O?MI%rjll-=%*@y#06Ho*sS~(U%m?zj85@8Ld
zQ=PC&A{fq#!AuF{`kB+|LA76NR^?$KA9$afljxM#mqUg=J<oqg)aG(BH0oW3)laKO
zbtcX-MKc6@ce}se==BfkBy85&2la5*p!+PE4=r6+!AiY$yZHfXN$17+=EynQ-Bz1Z
zWDj2rP6)-=#BhN<|55E6)#mJ|Q?FtOg>u*i&NW+IkA{-rEW)X;=#qlsvejApgu9y(
zDj&=9WMmUB1Vy0c2D72%!C>N&LYpG~hCjm*{Nn}i&osD1r(r=51LFnMKv4a=_sOKu
zN<fq7l4!hJ1)#CGn9eAq**EuJw6T}_xe&VH_9k9yJ}5?u3CJ1MGwUTR{X}O6K@Gtm
z`BYK7+$4YyZIxfZK?c~jrErs2*t){daGFq~b_d)HfN!Nrpt}#mn$%yW;JkJ>q==aw
zS3mc=jlb648jeHqDV>eoLn|1o>aDgj<m8Nyo)NceWAn_paL@jci_ElVY-9(a+a3Bd
zau2uSnox`rnZ#`{knBN2Ap!bYd<)~wDbnrXE(m0Ro#)clV7FIo9aK99{bpmoQ|-Wx
z2pbRGjE27)n`QX5Y&{d4!3a1GpSk*n#k(uDrdLdW8zTjx07|e%<W;(r1}TBHMMu37
zibl|i-w#}BQtT%cu@^kU5L^T4R7N&YGF|e!RbzKJ8`<Kh-NqP1XBJ+O;+KEKl^NI*
z8O#3z6d2}%rfvHo|K>CD8Rj7EE89aSSPItXr(Dg==LXi~MZm9M1uiR$3L`%l4u1PB
zCoHS$MFE++*>cBd6mvN$zJ7g&Ouif;UQ3+#p|kM`PFtNS^BmKElBf4{Y7f`-njj_~
z(Glg3#Ik8H`mk4C1ec1#r&Jfk!U5cpeE%69`8xyq4g3X(e}*0b|D3t#hi!Ru(=5^%
zhS#@>7-nhAuN_--`dC~+4vIjGi3;J=llnwHMJ+&jX^?Y*L3S~i)%lIQ%uQ1Tm6nlp
z274u@x^Nwj+&f?<2-hy@a(TKBa`gLptryh^2Q$wDZ3vn^gJ=H$(Ke<Xrh!uGVjRXg
z9c_~ynbd-zL?-BgJy>t`o++7x_%y5g^(L^<eUxFI|A@n%rwL72RpN3`6e(aVX^LDN
zxpchLM2PCFhKfl?H{CC_W`-Kt_iqME=%V7F$k1FEKT#$hNy=(bJ2~mg8(pCqBdY%Y
zrqN<iMHX_Z$Z~;(g8hg9PXNvai|Z!UyL4d;6Km?8(MC<Eij+|PvE-DVr*3oS4FIM0
zXTByQa^X6RJG0lBJ-BpP?(0_-MCR+)uP|v9Zgc#dV}H6Yz=BxwBF<R>I~n*v{iNPH
z=o*?`q`pD|h$i>|vOST4sWzQr_!s+vJFEk1w}yCt#3^nxh+Zq}#9XVxA@F>0J4A3;
z8JJIA;u$7@73|!+yQIy&zE-8rtMFwTcGJK}Z4eo{WTZj@ql?y9h>nDU-*F2IJCdSn
zXH5Dhz4l43{dd)CNp3;0Md(N;%|@@^L~(>Fcc1j$C%yMc?|ssHpY+~eO7DGAd;fjZ
z-fX>BNTaCz9%CUKREuPDEJn9XXtSI+|D0h2naPksNNb_4l|DR1J_B>Zv;C<%A#X#A
zv;ctdM_KAms2&QRL>}wN=l4gaR)842p(d3wlN$riC|>5^*_G??iP#T;<=5E{RxB3T
zoT?qrC~y`-Xd~01SjTkL2G|{<v%woAn$Tr+x(i>1)d*i4;AI<l)77@&0iefk+K`9l
zR7BD)L+A8R-z;R5&}YFTdA(|pi7Uklyr8MV0I1D_l!rl@!wzDcbd1T3e=T`Ap9Y5E
z`=MSma7Cl-37d|r0CN=i+!cs|G5F4S3XIpA@pw@p#se#S%wf_I^@|lc8q^$z=Tcw?
z2^D20fsBP$Y-iSp8s0CmW9X_=pwR9{hebA7egZ9{NO9LaeD2v7_V+2FMKOYL{nHN1
zZ#(?Aw&$MU6=?pLPfp6wfD!0%N>4qTayTHl#vv|<EBCXNSn-;C-TGp_06gwh$cH*j
zDBTeA7usyMFgm;l5m1ypO{qr}4i9<m7=?$!Dd6K0vM4}#6;wG4x}?wts1}CS=iR)R
zxE}0(OSRV82xIGGkcR09X*GdqO3<ei%Tmreie?pEV*Nk1{shl?ysZ|_9ubAVB%PTr
zKysi4TulB;&dWTym<T_ijd_vPEg&4f_=dr2_0}m55OB_bq;g!vh8{uV-YW7CAeP`9
zuISPTc`A>7fj8~2gZ>zAoq2i-oMgv9KD-gVA$B<5SZ^3rEWm_5$_hawyn%{5W(MY)
zK0n97fNq>gTL=%uVN+5AU*7DaS;zSRhHKjpYX%lrh(8dND${D9U6lv;h_P(NXk&gx
zA(6Ti1uJE14GT!qSgfL<GVo_}O?CUquW)7h5G@5W_wQl13}cMsG+RuLFA+YiB`Nv1
zlVF%~Z)<tBCLVWB4-Xrk>)rhC{PDVe=}WCOpQN}DA-?-32k0<Rh&3K*Hiyw~xZuGX
z37h0?&~>p+{kZ+H-aj}6_EQ7lxT~rddRLq~q2#6)U0~M}7xG}~0t3nOiZFT-!aQoJ
z(PbPRj$xdW_2-JNLTN9}6L^m*{kD{gX!nU?;KEjw=x%l^VzgR@bT|Y3oAH@DBE`U_
zs1ki(D}6pMbGc>dw*Zn(sElHZ$d}Rc)rL?Tuxx~KiT0S~D4Zlf1AmAM)yDbI6Ptcw
z(@$)A1vX8~=ZRPU+w<zoar|ivTN*l^35A=TTcB#diw))!K{4S54`CFY4F)zIKXNf%
zW#>XabuAk68kzruZC0sSAka_3EMq?1<m6hV6ZrP%e1qo|6cGd^?GLMsCU)g*yG3!f
z$WREX{|oq+47doE4qf@RHy$_*bf)px9y%7r@7oZ?G5QAdk;fg8X1u6IA$z}QyOMa(
z7mTM=YoE5#wyxyDiy$urQ9xP?gOqf8)!u0rxTOsICA;*7nWs&Wy(AaeOsK?CTL_d3
zyc-!8DlcY<-^cOnJ5Y|IpAcyyx2>{5zU3B`P4QBQ){}=ndT?WP64!m#KE*J1Ku$b;
zp`dp;X<5zc2w&z=nj@V0fk9{)l$67QnQdmz<jONbw1?4?r)7vLp8pzjm4rjX^8$od
zr5~qIBJmVya|u4}Sz{=#VlRiP6<p@)pHDiOYg!$HCBL5kR*K-53N{5?2xd7+_I7Bl
zuLwL7+4#OAITCF<lo$zfWX>=*;*GQs$t(YXih8I(h;WE~s@WIy))QFZ%R78>DF7+X
zB|3+^@b%EL13fmS>Wlhh0K8ue6CdvLz_K?}`;A0M#kQm{pA;1|qbf#U)~a?Jz3k#7
zzQ2*hsSVU>c%2mGuhBXJ;r;j+_*8}lsFliqRX^zOS8E^0u_+yvRw@P7aiiPC78d}E
z7E7WGk7zk$XV%~wn%3Vi<H%gYC7H4!o~I5#){izH)lX%#XxQL7?N2;neKP-*XELrg
zv^_q49rwCGL5!j|CYPvNb4RoBM6aIURzaMi)dr0hbuf@$tIfty3tj4x+o_Al%*+}&
z7Zbcf$?}(EUBBXPrT?NexXwiI(4j&+y~&UDTCd${{FP$lE@;+hMK!|mAv)jeX~4MP
zK3vzymN2Zx)^}%Yk(t|bzHf5iu-^V<zhPPKM^OTHyLEM6E!qaYdt+|i?N-t)xFsr{
z(gB3!4K5u_3y!gB#VL=UGmqSpk*s92Rk<yamHS096U$g@%VHU;cvn(x_e&Wx*)Epe
z+=`gdig!n|oY`9WwrG~`56$Yw&uLA;tH|tH632v!cf#@TxSG}!yt*BZ2^H^3$DL9+
zgCg+u&8<iot#~&yn;5pRmfGAXaDWlNSsl-K#XI5ov70(A;nnSiJfY&9aBS7(j+sD@
z&N0LHZ&oH`yyBga+}+M-?dp};-qL7BE8Ypszj8SwmFrbrZ+&K>(3@4*WxV2@@Kk}h
zlNzgG{O#Lpkx3N~50}_uzSNqjJ%@L<lPVG_#*j&lFQJ8m$c#LZKmjNd!<OZFtp35s
znp~`>#e`T1)Z}U~SWk=#kv%{OFp~yId}Yrfvht=TvP-2tK_vb<(FI>k;S-|oJ?qfA
zaU#XC5SkwJoDJA-o|a{ioh0tO$@V*q`eFZ|UF(uJ^5JQ-seUaFII8R357d~}dw<C4
zO8)&p6-Tw%0GB`c)hU`=_Q*I!wA?C(%{pCjw>hQe9bArNDK^3T8IOof@IJ;Q1>f9O
z?sj%csz@%~vk4v;AwRMvH-R&@G3_U&a-UoXk2a@|W+9pK0s}y!lON^55E1-DZZYn)
zN>BwOS+-^*Y?R4jLe`^aLF0D?#W9?GhzDwT#&u)k^@U(VW^BR?gHEufLU{`N9a_KT
zSKT9A9mdiXIO@QR4lvQ*K!o_lo4Osc=tYa;#m%rNh7;F{0Rm<z{lpeA0ffu~^Ke^I
zm353#0qBKN?*b4JCv1SB0lBZNHNpr%H=KU<<lIFM&5?U;dsK=O5gRhDS@hb?_NTf=
zPEjB!DwsKp;qXQ#3;S5@D58atP7q_vCB!3!i!im6T?}21Exi8XoICbV9EbV?&zS~N
z9!boF5DC;P5;>(8$+Phdbb@|Uct)n8t^%eBFU^RK&@DXSjucA^k)+khO{_%{3y}tW
zM!8ouCk_u3Ji1}-Br(Y4T;9)_O)+;pNyRKzD2O%6eZV)>FT4|{gwQa0eq#8{#=)G4
zW!0Dj_9&;%Rv{UPYiBCCL)cV;K$cZ6#5h4<=Dxnfy9XdW<zpA8b%H3nW3-WZV#v*C
z)PAx4^h7}sPAO{8wWfZr1I!t>zfbRA)pSp)HFIr6e1QJ;b@J8wd+}wCp?}X3jfET`
zz0h2eX=!ecA=egz&9NFKQK$g{_0al!LId%MHx}Rv6p;5y6L>}hDLOW*e-Aus5J;a3
zkM2Ftr6id?fV|Ny=2_S3kw}gCjEU4wZ*UL6Ezcf0l&^5ex3LX=3t-BlrgAU&06qG{
z9h1Y6-W_TK<Vh%jsm2nV$%DOkKv3=ik8vh!n`cNATkk5nsJu18Jx*+%Z!!>>xl`F}
zDm0w(Jh!Degy@;i2|smx-=Pd^`Uv=m9&{{s@%8I(@Lx{3d0kq5|6)4jCuFL%`FcwI
z(dcUY-_7R7<D6C>A1+z{VlepcqgpM}{$Q|d`_5oG`0s`UAnN18CF_&+`roAg=2U%p
z7_Co42AZB>Tx}S@G2e9aTA#)JHjRoP%Y=Pe*Vh_fW1N76i{F0JPlM87a%DRe4!t&E
z85I-irB^{JCe^^_3B}{xO{lB4lus(qk6!?fO>D_ECbRJwe5FF?j)%@sT(XokDj+(!
zJXN76`zB=?FzyTb^=mQmB?lcEpxhIkOhRboZ{<E8X}=t{?@nxLe7WXRTTN^?q2<b(
z)TA`9SN#_@vT~18TTklwn^0C-7K6-&69h0(I>-9C1yLxq$To?acM)^0<}fNt2`_xo
zu1EVbHZYHbB3hDD$7Ja5-+5A9AwcUpE}!$TG5P)7?K9Y%?8#NW(S;uvvlSGU_?`IB
zysZI}4-*cRHAC}K6%3QFRzrIpucXcev<l)`ys&ZDp;f3h#nX5db=0I47ta#le~cp|
zp2Z96=Yd*zd|b|`QA(^)N~@s`aE%J$X<`#e<ui(xkpo93PPBSC)8nCiQg0ofNM6HK
zy{IN`KyAw!F||NxR{YDn5Ee~ZpG)|LER7V@TNWuO3N57J6<hscDMzg!dP;Y(q9e>A
z-)z^a%~&D>y|92CEh(a2tSyl`aAD>^!F_tiC&JB2NR&oyEs*|`kqzS)aidKVqT0|H
zrkjK&qmL3APK?vB1iM81k{82LcvQ*&J3&}ksxl##xzM!3s->aA>?oDepz1}%b(57T
zp9*Q7ype_$rWiD)-0R|oCUd1|#~9j(%8gRx|3xpO_F?~HyK&Gz5eAG`F$SkI-}HL%
zt#B*FjZA2yEO$B)rWlasrY>EcAb+SLJK^dmMNAoRJ<pm2m#*g*Q-kp9J>}sXL>?cm
zF4AoTp}#IKbD*l)>%2);tvmHY9E>+0qv6$b=!Kym@)EGdlM2WjLdJnqX=$Yukdn=d
zMv~cMs#W9p4*)TbsJy5saTX+CYg|Vv6g)eFDn){wUI-ton**K`=XWb8<rGrqO)dhb
zF3E`T{UnM7g~!+22D@+rdTSb7zUES=%}R79d(0i@)46Sr*tuuhGSJA__3X&xinu^8
zj~-*Ib}5e<B+GYCyLHxdCbRGBY`eHqtg!M{skF6Se#QO<nrC6Vzf(B#tjXY#VO|Wv
zwX|0$mG=Ig<4P#YQN7&lkQg8mN3|Mny@aiAZM`mUvwR1}`<)f==Y{Qcqq)*8n<INh
zzJcwPO2sW`ZtE3vcPA&5ahAi!xmy#oP$_PU{=m25u0opsx*gnbjJS({nlu&YL7tC}
z?QeQK1yaAV#Y)>dJFj1@!%NF2m6@So3CklH4Fd<FwsjBGEEl(mJACr&m0oM~q!+hr
zpgP4k9cHJ?FiY|Q2zHzFc9Y%~O1qWfUa^$aW&diD(ChN;=g{t}-GYT}7lxL1?MzCg
zg0wvqwu@yop9#}_i4N~eV=^d;3okpnJ2*syCVx_{RA6fF6t}`*l2iWn=pt6*DIm73
z5Ys2-_8lf>3^WeE4mrX3VtJnSlz8>uDF4^##F_YkH5%PA1K5K6ujTUYRwV!HR=KqM
zl>h57KF=Z_arCT)ob~K62FSA6v*eG8E~N@m7%UW}W)g_Xl0`Y1P~5^JD0f1!kl71S
zsz0_S&N&7b3cd#~D8SGu^4GH)Hj~)!YrkvH?)q6<L#5`p+G-rud6<Z-$lY=D@U)M@
zdN%N$_VXVS(md(%&s*aY(iX)`T40R5jel}s6bP;>8trgW(j{MUfn7wVrsts*Tw$of
zJSX`Eb}$z&-8P0&Z`cb_{6$WA2Sm@H*R+eIsZqRnieK6ITH2YjH^$uV>UF2yMVprX
z+OvZzE4+TJ`z)Z=0>h^YVeWdVx@^TgEDx)_YO`PObU@?D9rA0b7_W+B8tHy<buY_9
z90dh{4PBeN_1NDrAZ9MXc&3kEqKBk5bq<}0<6i>Gi{}t-&l1s?E2uR=oX#BzXU12S
zoZZxYHO*cm-ex=T+%xNJbi?TUS3<b7cvsJCJbbo2jHxH?Z$$xM3=xkdEi8gU5QZS~
zf?inUa)yZd_#`+y?*0!GZde_<qrt6{e=gqtxAt~-qWM4J=hOcGDEU9pL|r{-H*5Vy
ztJXX{sPnKR{E-T7Ezjck<HJVlp#PWCM!nbk!g`(4x>%yiL0Ut;)1Ttid9ygQffk-F
z1^I<Jw`mQ7QSlOfo>Y79@G-e1QYGA(@xc#K8GrzDLHAUAAsZ2qqw^6QICu^<0uxR3
zjeP?eZuR0c0ha^1(E%@eXHAA9yBlEaSZ`SM@SsC<jNbPzsQ@N4q!N9QTzrlQlgBp4
z-J{ez$l)hS(-=L+miMi1`F(46WlaY5Fu$nAYU_W$S?%6s`7c=il~Q?cJF@<Fcgs)m
z|KrI2C{qj>j<JlhE<yuiC|@3q9j7$~^A#Jm9foA5AVC;`*B(<uVzlZbEPw!_iEViW
zYjj~lSsuss+#1*{k5Py(gJAk*bMyN8x;U`|09SOqi_No<<!@pPSPE*3F}_f12m=xO
zHr{!H_Ro2US&XAR!eBt(><OA-(PvH^EVvlcLE3dGKC(6V-p2-t<8787I%tYGn^9O@
z3`HvtvIYgZR&`|;d4L>f>B2@FwO5-8)MYRptruey^KC&39t;|Vv@7TVg=?iS#Ym(6
zVfz&LV}V?}G3u;0v+=~xQq)z1r3k8mv>_#VU}0#L0@Et6k|P4|0^kQ>z9v0dy@ICY
zweb}QJ=#uzGU7N=iQ_ka5MT6J7Guq8=?Md%TGN?)L&)UMp_ONO`#Z8ziiJDkoj}jW
zLX$>b!jfqfmg|O^BSj979rH-^lNmZRo=1R65PFbn^1$Rbtj@6#ZcqvMP{*S4QU=Jj
zFd#0xG{-3gB_7IKoE?c%GoPkzfq>=h;zf~tto`<zMwioWox%(P8n{FVY2hkqq)-5i
zWVCT3v=<S3iop;pK9I1G!hZ1X6T^?q>1)dm9n?W5uN{Gg71-7oCv<@;6CosApcVt%
zRTnpieZXu(d=H_oo;9JG)(`_LMW^mYd?s3gQJIs2U~v77zka<F76`X)3aE8Bn+&kq
zb=1auexlgZ!lq*xUuA3}osD8sxN5Y{oJnZS;GGD24bU!tJxq{1eae8qWQcIRs$8*V
z$adbgxZ}secGI)|xj?#4_=xiB7)*&A<p+d;Hu;DrD_(B2tbnVDV=)hOZ*<lSG1pc?
zKx`P-F|t=$$zeI3VJ5MQ#_Ev$r2(V}alUXq=27J-TwS=pFrv5Mh}tHA(+Cwp#T;>c
z`qWt&#G2H`uqA!!2w<N5&lxb1A-@gf&<en{)kaEQJHDNa$6>YEtz)@Xalgp=Q_sfO
zg(Aec_(h9~8xtgbDPlHi#siBsEwu$mtT8ONnHCHq4Z?<o=%ZAJo0AagkmBKVk#X<D
z4B-<|OKqa}J?xRhoR2UcKR!_z$>ZcAtxjesfzun7(nS9G52`sw6CQlW`tx;pHLIpF
zr6ew-5K(iz2rVOQrO$_x>lm?@*RJ=C4^HSSsP%jvUdGFlSkK9zKkf@x<zZjmN0R0k
zsXH*)&(Igbtb_%O2N+*d)}DuVo}--#y;8YUBb;+JYKj7eCWG&25RFy@!~2q;O^`;#
zw6Y0hgd!xGWv#YyEW<Ym6++x$A0tx|=VziPc!6MY<#;Z?Ega5!oY(wfkCwTe@ryX2
zt?q6eUarfs)1ds}0!b{Pd({%6)sqw0d4CR~n={|r9JvE)v<aofPxVDU-`!Sk;460S
z;7d~`srr&OoMu;9{@<&IEA>ya<1!XgoU?mqI!9{!1YIP<pZs?gAS2dojM*(&n|CFN
zt7t~$L@t)fj8f6jIv2Zm(BlDS9Y0+FplmJ3_l4NEd0rmq;xU5xWXn*ss+guTa^8rE
zMChU?C6AGo73W{}u$bH}m5490Y3P1!#)g5hMK$V^v|X;{^zz!c)9smPn<~*+7xS8-
z1k42}UrHZbF$4-IC0b|?M5e>2P$^T8HQ3N4Ui)q0UW;5UcwYjg$e9nXDWEPoA4TB<
z?-KW8uPo5x&PFzeshg|v4Eo8)rpH1LVtgpnRCDs1M8hefKfx7%HT7qyi-)m<^@Fv;
zC-Jx)2!)?F;ZaYcQ$H{?g*Zm3)k5irk0uF4^Q2jyARA?%@K&9OCB2qGyB9$Wu}gp#
z;wPaAxeJC)Dh*614K%lKQVhhQ5oo|y{1leu9lU%Dv(BM6Fo$8b4uFGjsEJw#vPv%O
zFq2y;2`C~HLEHUL?aqhpG6Og_{Gqc(OqN@~%2&WDDqlv~@j_4<i(H-`5f5w_!c?#%
zV+A!k%1o>5+`bOYM?%%dajw>mo9i;cVx%fN#Vhl{Y$5`02NbQF;u0ukdtZz;m(EjD
zS?YpCzvPs;;~HOWMP{#^q@@#|SlO#4#AlVIu&|Y-RzV7s+b;mT_Y7QU=Go!hgnO7#
zN=4K{M?{A?0_|;l3(2L_)0%o4om0A#T#6!1Rw!~ob|&xzv!9WeBZFDU*u?hmKk2Y8
z!93dTMC@Yf_KId_<~sCR7|iq;2qb9WQ0kdXEg!QY=W=Y_cxf)X$V<ZvobWcBuW7Hs
zM){JI63{vNJ$zScCYl$>(yBH)P(0QB*U&1*VF)0?G|Q?z?a1RlSzg%xF`{?D3edJ6
zwf`G8OQZDW!cCg3M7)mDPJbMlEW?LVWV*1&Qen=?K<>E#9zgtlzPCqz31#&T7}HU;
z+3!_PYZ0=}NoA5P;uBdZA@D><t&>aVq+0tB5)~*W#95!4u$+N1#BX5I#$SwD^z`>3
z{d@MHq^gdlm$0vXK5nYn^@~lM@8i)!cmF`|KFg!VGT<kNNSF;uL~5P6SN1P9O@<Fj
zo9+x&5_gQko%z?AABFEDXMmvJ`m+hTZ-r}z|Nb5RB73|C`cQ9^dy4d;=qN&M<}rKp
z8|?|3OdR*DyYA6Zsdcrv%JSW##%9g4ZdiBVpoVk~tgXpk|7uKa;6LPSJ!%|Rd+&Z#
zE{5kp&RNmFaPDbRrjVhrf6VG#eTw{h?R+4wjvi*qlN3S?a>T~pMy#}@QOO?fZU;<V
z@%@EJ0;p16uTg(A-QE5@{25%L%uNr_{_*?847B9mr~L7~)9=|c6$(yXfCBAar{+&v
zbKRr*@o^&E2L0h^6eF2>h-9N5fC*7=d+Xlm=8lHpZCl@@88quhSFrI`OiusO@?>(L
zUuj-G@RZ9W3EGJU8%*8k|J7EkRVn`h<oUJP8e#WHRI&!+(Jv|;AZqt767XxDy07Ff
z*6r$GV2{Y1<i0ykr$O;fvY*k*D|-O)_<i*-tzTlyk0K&oIO@W6%*-A3ay;{cm+0n4
zelC^>w`Td=lCO^B=&B6eq47uVb@7*@@%;;oF^w<HbZO{$y6rL+V@4|xGx4sO%Tm9x
zpxAa;KQ%{$8ZkO*p-E6ZY49VOq8{Zn4ofwFO-p<If*Wbo&_yk|VRb&F0;>2^d*Gbk
zsPgD3fm5-#Y_fsseKr+Sw@te6XDOq?kFJJ>qnyi~=Tc46G&`eDW7#f^%$L>LyE?g?
zU|P22ouhN?u-&QEGc4gDF2+gkLR>_wq1*yL>tqBBLx@5^f4Tw&X)fDoO=}0STGQ$b
zY+<|T$;?N>bmAuW$pww_LxO0d_d5IvpAeO#BcRx%#ABfNoLo<>8}%EA&!P0X#`vqs
z=J;pT)=x8|<F`aTv%isHNUzVCwyH-zDTK1dG!||YPq#N8<^E^w(jAT6;q9-0Ex7-=
zvsH@T|JvK$tvucTdYt=TVtA3+0l%M-|G6@7q79x{qRT9XG9kI}jx5R!AjAN4jBk*C
z*=#D^-(lnMxU+Y7fh2JGB@v4=R{})-|1NWa;u;-p*WR_8$L$022B~})yNxPeq~Awf
z7M^w|HCjcxl{yeBA$BEI)XBb-%Ik<ofdtQ=If3m@Est_j%+AHv=<U@Qz7i4r2CIfb
z${p78d{W1G=3JR4oTB%*6MGWqg^~Y0{}HP_r}jmd3jMb2vGT}e)>vWr!-P0r4sWg&
zX&^OEm>69Wk$nC`t38)$1j}auGSvwzD|V(N5~fnt9L081C(OVd-4kt0z_%ce3E(GE
z$e_=CQRj}d`86mLojmtAd+t7cewq0riX7K_jjp2A*bZP1x(`YvfxivClHknmmlWMo
zo{pu{tt&!wakuHx7IPouNNC~~#7KamlOm;&Ec)D6s0IOi#b6CFfg=gWn1|-#T1;R5
z5pIw)*ak0+vX7LR{G<&%>3>Q3A5X?sn@%4j{$IJYy;F(C|10g5pY*>+(f^8`AK(ps
zbEMav70eI0C<A+Qr#GH+aT#DZ&|UaBw5KC<m2%I6YrM|f42DHfw&@J)2?p?^3nio$
zR;Ospm2WoKM-}&Vi{-IEPQ1#k|4uM4yVV%rqb(YZkx<6xVcL9Ve;?RWx-iRqfykv!
zT{RPcc<hBQ0*rfxHa^fm2hb0B8CjI1Fd!mA0@LG@ScEjiCmHdg{${gLtGBxK0w9i+
zsM_fSu<}C#{@IO;v_S62)-}djur55C-x3Eru001sdu%XYkdNT#p-Dp0Ni0E@(F<VC
zya~&JKHcc%*nYL!=xz{{PmSKY_Gu4ug>|Z}9&RD+4(uGQ17T=vAA&Ocg?#|sd4t&w
z#S*Z;Pd)4dMpqfreRfViOzov3Auo3G0|95iF!b&Wwo-QCUfCX5QKy((!RIa-77tGX
z`u^CFw^3qUE3T!-b;R09yeO*%*6F$KhzE4Q4WH5jPKG=pFaDs}qQT^qPSJRQ=XfG%
zDib*~YbPgt^n$J))%%@xyGM3O3UV(@B-j~!4N=<0Fao9k-?ZpvkLL12RuYQeh`}7r
zY#GFW7N(etKFr1oKzDXlgz3IH5Gzm4oK2B3b1<D2{NXpeM$i%04S9IZFs^jDSjGXq
zgk@}dYedHW*qJ!v8OBiLlQ%Cj6H;*sk<PD{QE&;1g+cx?*Jl{UM8|(k<rWBYDt7xv
zt<&1)&+@*S#1SJ_*mziqNzJy4uhe;HLSjaVn)usq*1KJ-6FtMz4%U?5mNU^Uu$DaJ
z`d=)MZA&uI@SkajVFS4++Bo1xwHh1FtP!j3H*gX_C0L-i=6RSRjC>xFTfa}fMBk1&
z^=hx)K_L1CN^L?OG`biivkz$W4^E-Oh(@mG&FozAp=LTQu>XmvFl9~?bmR^w;sx~z
z%VTTygnGNaOlJ0#+l++Fl(fsG<?PhLf97h)mU5|cjmI9Z6nC|ZIa$3oCLC0|^Mo^l
zpU0*gHgOx8a<HpX)rb@-@_DQ+>_W)y!VpHdB9P4z`MIw@`XP<&bv>46WFGHs_b~yD
z(6$(6;NfSW359CygStE}h1F(bCR&CZ5jWM{ZFc4aXmqy2bHL|6Qkw#GGp>&%Hc4!d
z!lkvR_N3NiLVq-r$-H<M9{9^j8mUBCdrcgMXKXO~s6;1NE)Fs>JR%1uUh~RmBQsWP
zQWJ*=UK`B#NX7Qa+=+#fS#*<RLPyULo!VSJqp&)nxY*Zz`%O^Ub1yt=Y;eH(N07$U
z91m-RHu@hKn9WVIJWQU+I-NzR(8=j-@o>)>vnNtMiNGPc8eOBKVK|BCHT1#G##CQ>
zS(F9|qVG)3-BfxvT8Hg^uib7&YdiqGRa?oF&MKQsW`*mD#MbJil3YVzA+aN?z?)4d
zfxQgd6*)OjO>|B{lt0$YJlaB%CcO;OLnq%gTD?WZDC~+kO7H|zrnPGZN9YK&#r2q6
zG<4AVTI$F~da`gOMam})G2p1l^8WH;oKP_w<kX?m=paMw4TVF=O>}qD96=~E<d;Pw
zARun?rGa}y^74p)ou~FOj;$Z-<`R2=RyVo{L?=4mEM!uwj3nksD+p7f4`<`4%8R}h
zj~kyrKq23(#iPh4wbJK>mPlzH-=||hgv_~>OC5p}lO#aGI;O-DRtYB*1<k?8SQL?N
zC&p@zHJUi{G{G==Je8Cs#6a>9yY;AYXaxzaK+qP9ab!p1&P~7ONQXN#ccqJubfm=6
zBOXn4nI%uiwzK4d&_ygwFi)(o3`|z=Zw4b8V>BuiC|F)$;ud_{bF5KeY>(Y3D;)4_
zim-E3yWM&RjZ^*FyL#;djLh4TW_l1IQ!&X1-k_!UjM5Bo>xt<21qEagk7-tWhwaWW
zS~0q(CnxPruYT}r&Ob#XIq}GKoUC&4jp+6AtR|`Vp$o#|a`Bm}VR_@uJP{d?Y)Aq-
zZI8{RWS(3AtX6FyIn&^U$pZz;OR+t&zRbo6Q+h4k-dG)Cq--_-RUSi8Q??qi0)c=R
zS@)<RTSIP>1nvr2<ItWAZlrA$p~145jmWr&7m_Z-6$9>+ER2b~i54!cH>{;a1LiNt
ze9Hq_P-c=htV=(*wUJw^1QX__UW_y3-5fX`6m4K+ps_u4FsqLeZLpR*3!oR&PBLaP
zB1c})Z3-YYWHp>rlfnbMV+WpNU&%5YRb(E;bZ4ayT#JX~3?IxH&#p?ayLJ#0!@Dq~
z)JRddmGR!;<3pZPM`1YM)FES2^CHd&&3E0J+>rNMf!mIUdKl(}eN=P_*w)ncf}6Z3
zt$QXLGNX~DsM|j6)apX1>a<UQd-5=hQMpj_nd9xNENy)y(DC>=GH_rx+|Pf+anb@Z
zrNb*Up8rr_=9n@*t?WD!wNSAY>aFYyS12fOT>_HvvPQ_crP0((Mw#fw5H^A$)-bvn
z@Ee%$Gm3#qL3Szd0}paLweXf!U%vMj?zq|%6Wj&x8Y2@)WG|FMW7y|K`=ThnDa7|6
z=$|>yd1%hWeKTrHF#I5vkOYkKu;p^ryg-vlZ$xb|-IIE)aoDK!tDR^o8KY-n<E9Wo
z6&PeKJA)taiU;+Rdh4Le;%_iG7H;?Yf(}{B8Yez9z;gTYfXdLkqKuY_lwT3_b2>Ll
z*ou$~nNg=ru;$d^74y%3gv@*1*DCb|E@8JL=or{&`<y#T**`@4swfXOtu@aUAroQt
zru`Zta!OAhWXkZnRN~ArWWzm}QT3kLkYce6P3j<bv<|AB1BR)_sksAw>SG=VL9g&P
z9+%)+Ri{9BHkN&eqgd<$tg0tScnvwe_7ZLa=K4p?#y%pCaZM?+IyUrn%aDao7*gB!
zdg_@h3JW+)xc&75x1XJv&ybF~UusP;9(>#*49)U=ci_<M<N*#v6ic3KSx#k&C5?GD
z<gBvPtg-?&v9Be(<s>DUxmlnWxT}axw63t;gUC>EGA}7fX`4mZN!ThPQmAAG>FZIK
z;$mx27qxVTCXi^V(6$R~o^pu#cq_mNG;b`z)BR?nmTgN+_0v<8`5}!FV%C`VagzUk
z>K?a*{}-hfEZ-pm)S~-8+m&+k{!h8|bpPj(?*9~6k255?oWlp3%>A*XM)G`@JBe_`
zRXC1F^Wp}hq&4Wag=+lJCFR*T9~CWN>hQ`s<^ge4VC*mrIK0Cbz{UAJA~6REr#AX|
z4MFeYelyq3Xe1nUIHM<fQaQ}7#HCHlD@oIwE;<oE8n_<vV`ZWa>zXN)D1`-8jobOd
zI?{EcAj(RpXunckbNK-#(%#zLz<>AHu}gtQn%2w%C18X7aZC^X{B217W`9bm`-<wm
zW=9s(Jet{)iG60xhEVyUcmdD;eD2wH@!b0pxd16&F^4?GhU7(up};XO6{Z&%02PLs
z{}OIjzT6u3y9aEfyt9O9ijM`qbfp=d)8F=wtMA*LP-RRl!pi0y@l04*Wg4x8?O_p~
zAa+4}5>`eT&^BxDvfHB~%d{t8t)+s(JI|?vnHN)DVlUEOQj?(xICYFg&Y8Z=U-EDi
zVw$hZT*aMw^?*=}t)w~6Ccf!=gUuQ!&YLhPN1Mk8(=T4;n1m~H#cQ&>NJ`(SAJspz
zV<KgaFZsW*ufgAc<0fR#ubK*7{v!Xkt-|ZS|F#}Nzji$kWMmtljt?#o-Q>pSDp8E+
zj?qMzHkwfCmqhWUkfq^TgzVV*hwI%MS)+-&A&b*zA+pZoF39RlBqD2;mnGFqGlqty
zdIA;n=X!%OFGfBE9EN4#X7}<s`qAc7z%%KvSK5k{OZFm5T#)m1jh7g;`|S_f#VIow
zO1z|sni30%5-Tgh-9(M*Vy~9_a=bI#QPt62=?i{?B}QV%!FCjSxoN-?NP&BmbV|)d
zxM#c*D%N%ikK*14F+_!?f}Z5$pDsdgqvDeUCm)%*T+)aV2N0Tzc-tv$hIft=#15W0
zhjUEimTO_AoHb$LO1WiB?>-#Vw~es~>xWJqymM<jqdF#wPrx#r27DwEdv<S^u*3w^
z?qDT}Xi3vKc9xZhXht8N?*HieU*+NSzun!PSpM&FdHYHK`)T@L<-zs83ab=Lc(EW+
z|D$*4G;ivEl@;{A${qB-$}RN2N~-=BB_nPvKD%@<U9xrN!d58K8OUf_KF?z;Qe=i1
zgn3FmGo#)c0xvK??=kZi<wyOQ3TUkIYqoDsTqj`ft$};S{`e34Q#_+*#nPWI#+Ea}
z3oiGey_O4ukwqb_Qq|Z>rW#vWtj1QBRbwklsIisBYHU&w_PVr0gGqX9Ww9PxSyqp&
zEYxEw$$D%hOOLH&>9LjN^w>(49$Q&PkFBh($5tLxkF8|uv6XCXtMa&dY$aQdU5M<X
z>amq<J$5m&E9tS7s2*EM(PJwqdTb?Gk5z9o)y_&p?KE}CO1jp0@8C^U^_Jix`fX)N
z{k9U-Z!78gZRO7TZDlq6wvwgaR?_v`$}0M8B}>1pnEGucUB9hFHOIAhcH?tA&E<rs
zQQ%yzR~XqTnQ%GglH10b$TRHE2I$&(J{t+a&z+6v2slY<Lt%ku{HFBb1cP7>JZBnk
zp^*}sSw4??ww5fWri@yv-9K(0oHpw`WoE+Mi;zM=m)d?MLEHD8i-{G0DAsq+L)HnL
z?R!5wB;_Bv#-V;;5ME8}V`(o)9yGG<>Nn(s2NgFaoOJ4kjnC>Lf>tiYw2*9FsPI2p
zP~g}Ek4{vnkSEYEeY$Fjd;-Mf%n{}c$S_D$;s%LI+#nI-KglMEO1ep+^6(~!CG`Kp
z!``PmX#b1Pe?j$+=>NOB+j~#?|6}R@bUT&uF2hJGcMMc4PjR<=6W8IX;NRpG*T1vB
z5f}VBTNm*&ctl9@&M|dZU2(<4i~vV$FXpFMEV96x*_6umJh;aFf$znxzBhzG%vi)B
z5ZEn7%G;M9ss72TD(F(QDzzKe_(`8n>=fTt&@Tx26AuAG{TFT}VVmfHg+fS!(j0eX
zlX%4edc6U?#K`cJaYBoVi~&}7Xaxm`*nOOKxLw-81`k4S)VIGwhkWPC#s+9>wudU}
zN!mxo>VQ!|T1LtKC6RPmdTUU^WAql`yBCvt?OEeo7(XV4^@G466ub$-Bazs{4a}X1
z#vUJ?qwMo@wD7=BMJ?3-Daw9GKXiD-z-I#9JPbJ<mU$j2pxu&E*Hf-}01q@doP4|B
z@{M^pTs_8$2R523Fu({Id`><b5pM9eUu(Ax8%JVk$egJmGKk;w+lAN%xnarYHEC(`
zJl;7ZX(eYM%W=KFv?<;T42=-a#9VVdHoTcwV?OPCx(g&S;F16&##?Rj{ciQ65*-pt
z%*ELPCiPC3`5LV=fT$+In%a9B4tW^1Jg9foFn_KAN)ijP)@hDHBpFGGG?9pVB%jDN
zK|i=mCkA0nNnDZ;2z0&{7n|@lYL5Qh-R9LImulA-iwFEu%^xPVfBtv~G!qdRJp=-L
zG^Atqnl599!=c6THBl;Fu7Z<jB?F%@v+k`YhFrjERsyr>CIw-<hdHofTX!4p+jZ==
zBdJ=`wi;?J<-}A;-R5#fRuT~|#4LdabMc*ME#fOHa!5v$T00DYsp`xlS1^}iVyR4-
zv)%hThNVqqwLqArXl3?0>3BA)y!si=Zl=-tDRAY3>VHSg`orpfJ9~RE{cq<f{^MiC
z|4%rgbAfAIJO7xljy;|R?2l*g;LlgqXn14!bS}sq?6Xi8dGyac`_bMHMu#Y=+kxeb
zd<w9ItsoBLsWrKwn`7;hFOAkwf%b_)x7KN#^t$|-E7o5_#Xryv{x3#$sIpn1SjB^F
z;;0}W(09}gR3p4FuftJu6M+;jH<%p^imHlY<56`KDpCRn>%zh?iK+r=JsS+PzH@7Y
zVM2#41pp0{GD+pIh3d$6#o<3_erR{+S*tv8sbxf^LGOK|pldjNVQ+<gt{DmFdYnd#
z;@Vo{kmWx$T9q=1CXK^<?aLAT7KaM72czhbkC7;(iC$JTo=?CB3s=f(($v~Mske@r
zy3izzt(!C7!!SLYP~4h2`qR&)QfGIQ2keoeTfNz>E36q?9&d>XYQI#9%*b@T)-Vk(
z*FOP`%Wgzuqr<vII<ZCBsVv$MCiT{VK1|qsa|l_!dUDdNuXAG#%n{&jGn<DB<!K%V
zbi6S2+%rnk2Xl~|Mw~G`y0Xy_QCvtygNB=3vRQ44U`d;Yo;9}d48q@}>3lAKZ;G(E
z{$_Ra9i1}zOZ1x3_SSp(_@v4e6lg**7s>jC&(zXK0<^oIa}j_8ci>uy5Nr4*7fWRZ
z4_R19hsp;-kbmWwNK;%!M>=ZiPu;0KxppR-l-oCQaA6ckc*Pe@!VB5dXSyUxkKJjo
zdEr^pOK0G3;!QHZF;X+B2xb_AEd(bq_GpD=Lr;=A60V2jl`j@TNNw>`t^^!jjpwpl
ztBvL&SUS;hSm%r{hMEkn+3;Cm^x2`E1HO!BVH{-KFp<BC03ANRpm}Ls!3G-0RGPr-
zfpc%532lM_l6`k<vujK2PZu-TXV6vZ8mp>MAJ@d%8AjHXlQpvCsC5%Vd~Uun=67^S
z86^5r?;?%Xo|#7Ap*{8N0bN=J?G5j$N_(xx&Oo=i24%{pi<eg5oH-*W5cw7<Ja4JE
zEn8!GW9S8b`b|Z%pdRu-mex<_G=7*cdfE%~tVupqsT<(|8X@_^ST;>y=-uj3U7f5`
za4>cR^ZZS`3!Dv5%PAmfzlyaV7Btgo%EluB&yatUIN%6Ww04s~>2G+eYsb~jhkmu&
zuO57?wrceQv_?dUwZ;-RI{FC<A@;~oPkw<rrOW7l$0v7E{}$<grLCRl{m<Q<($o3h
zBk6y-_b<h>Zk@7Y3_FBE&<XG~hul}H9xyA+r8AJx9)bTHpP&GR(cwkB{0od4e@3zL
zxibnBF_W+1gIFRre3+iZZ4nIZB86U5OWX!%#gn!{+3bXE(L8KMvj|hqzb2ri<X*ck
zLmafSq8-j4UW9NlBV1PDCtKP7RQq4I`H=j-1l+z7<NuY?6aRmF{(mu<F@NBIZp*WJ
z2u2SxK(KlEqh&^c;>7Az5{e6dw8=!a54j^uW*{F|%0(U~1jOMByEw6fO~L1UtW>-V
z#^K!#j8GE2+VMshtpF9nVdSlTzXM*Xn=4pQcM5qMxAG`U6t_9TCU-h<zJq=~nSB?L
zXNE%uEq>OBELAEsHl5e!0P(_rPV-rzfM>!vjw3&sK?E??j>ydYmVig%r;>I61vi-O
z`$%=lzMA{L2IRr_?^O8z=2XXGE);Jgwm9yb>@$rNtq*!QIH*Ckw_zC6Z3As(=v_$x
z#fVUXBmzeLb3K}FI#LV`llBO@`N_3M-`LjPW_fFC8=k(mCipD)mp!6VID5v<w1^?K
zF(#VxtvH)Fg>Q~MDh|<Cq_E&a3W`u;QGa~@iD6b188Qe7SSr5SV4r{*?zPW8pKP$>
z?#C|ME217xb`9v#NxR!^VB{s~9mzFD&{*K59yVLb4DrYGEb{YR`?x+QORZ5e4vPwg
zkLsU$o$6e~_J6Z^!h^3fjIHBu9!+MSa3$rGb{g7EB9lTpA;l#L|Mv&O^WvrY<-r<t
z{6JV$<hf}wMUsxgb#n&=1;?c)B7tc>JRz!U5hDjS6)w>)8p3EE0gf;njfxiHD;{Ww
zu7s(yhA1pYGFG`jOE{xO5L#dR1L2Dq>SM_M;SX_C;o(B;co7q~Kl~wvndv1PLoAh;
zH7!AUoT39v-yD*ae(JC8jlc>r?HxD^l1Xi#n?!H3KPJ(&-aItrBY09zB>&LQ_9Fu&
z;$KPIMDAe!=qrUWhs`DzDf>2FJ66^}BdlONj?_}viQ<L@hFir$9?DHDr>(~4br#xw
zaTDXdXy_0QoS)20GN7En**4FdNiOkC{3-E0s=$)pspqC$dFt8cwue?lxsl`W<-`3_
zCd}civC}^4RFAb@u&`uk|4ZznanJ&YZfY*<NMU_knbv>~Po~-7F<B3phRC|?-!q<^
z<@0C0rSiCP=R7c@%R&J{xRPRM**Bu98X#3D_kWG%QFsudgN%i0w5q$uV9Q8d=lgG_
zEgN?InxdouUTL2geiPc_)z#BzC~aDsbgN)J(g9tZ5o_4IePGUlMyprvw5m;EJeR#8
zc}8<~mHFj)q<T>@zHs9-Ls2fI*z)}sZh`5&4;x)q<y05!?}29x0wtFDnjFUN9~Uv>
z$@@4qIV3Lf4)dz5oAr_rI6Q4O)$O<V^qh{I@a`1sCv=5ylkVop*+a9zyz-YXu7CYn
zEWXT96ULR&K3>L)HayQo=c0L&H+p?<J)`X+w45)#T#vL(*Yj3vn{M^S+m2AjJwF$E
zjGQrfHD%USSv&yu?2FmRQu!T`q_Eui)kaXVh69X+isv;~)(AbUbA{&1Kf*>hxU{@K
z<rF9qt<`gKe*jc0P}wlEB`m_RGjK<0+~Y*5Hk&N>x5D55*8F?^Z~w)9|NHuHx$v@5
zlAhAXJeGnTge7wZ*Cs#W91FY5^g{t+&<kO!9WuR$xVy=l@a(aBWebkU3QgeJ1G;N2
z53nf8I38PtK|gVDq79hyUhWZDXoRf8pl_#s(yZ3%lDs*V%Oxv*OLSC`QXCg9i;|qj
zF-0*~_%zxm(8SzGZhxLj<MAsF>f1E<3n4|Z7LcNkjcLG{-ob$$Z8x7Q9K5T3Bnv>a
zRY@VOI7=#?bnX-`61CwQPow%@xTMU(&}6z9FXOR;nVJ*PlfF|Qk3UfiBddt9f16?l
zKhz6Fpa{U6VQhS(iE6^BXEqrY*-4nu4zv7OlNs7firSv`Hc12%XM0-X>L`#zUUB#*
z560I-VdJDiJkzBWVAIBch^ZlmnOmw#N(v0I$OaOYduRjvGWK8By%^c=EdTOB@_%oY
zc1t_4{NLp#`|qQC_#s`peblVKt9IW-&e_7}@guwNUB=1XN`Uoqym&?ZIFDxJ<`DfU
z`<E~=`(taWAMd4}=YeLQ0GgW+M|SW6<;-sw>c~Uq6ltyX!}!&MnTL^b^!yzZF62|E
z(W@(wZNJfy-dUR*S;7)6l*PdmQO!M*s?8roZ$OapwC?=r>m*jh>@bu79T5<ACVBi~
zc^88XhN9Wam-ypqo&6A`dh^Dt`bQ|gaXxab;7`o@T@{L;pqD!P9YmgB=1thv?`UN}
zk_Fz4Os}9*RRo|<BF3@=iYp8;@FE$Kyfl$Gs`q&As=ympzmw*79Mz=j4vF~wn&*Gb
zDkh10u=sD~N_jUH{|&x9@&Cs;|D)UAe2k5|-+Ih4Tp(ysk?HL1VR*Y2J@TCKbVM$(
z{JuAb1~5P~wJon;jV^2`O}PcUbJQ2}e&{oTXF(G?kbWa03s>UidQsf7wx(0hwJ?GR
z`Pv~)elQXZ2QVimDyxY#3tW^K83jop#=OC-!S_CgSsde0R9?CAquYKUv9JaOya?-D
z*|C$#@YSXU>@paS){F5dd5bQ+bK->I>3CKKdD-1kgjF=9O9hImArbkhrMwugfi|Pp
z>8Yg91yv*<)F5ivWPA;buA>xKP0QE8(>?pr8D9;ztZC?e1<C-SPCSh|EKk?b>E1+P
z#5+VDq)i(B5T1|`mhO@=0~W=7l4I)z4XYD80|qij0ZTBxW<aETg)`D_)X{MwR@6lb
zGn0O~AvSs`h>?sT&xlpTMV#0L0+*-wiXUsg{bo=IZ&l;b2}X0g<k`~X-4h;PN6b#Z
zO3Ozk8WNbmLK`_U1R<=burHhlVjqaj_-l*4((QXRQ`e5FO%~YJ_zeJZK#jj$GLbi?
z1*~&26=4z{+06~&Dli8T<3s4LXHBT4H5}T*gh{=TG(#=L#mwnKkhrx*7I?j*>BOOs
zKv&}rMW}{4CY<r<dOz<gaTL05?<;;oS!rx^S#Z_JVg3`oNY1ewcE)ftxBxaXK{E9*
zC0LQ6=X12GT+<ue4Z>(U&j*8N2;}$wWq64`;Umhgt4sa^!a$pf#5uz-7<$VJxSB#1
z^W0g0K?*|5wUr_5k01#@6DvF{N57N1Xsi_#+ye)3YWY4<N|`oMDugjt%MHUl2(C#*
z_|n7a1R}i3CqwI%JDa073c8!0*|b^6mh|Z(fGMYS@;V{k4`Ly0q~x`Omywe2py+5=
zu2tMGvc9kiI(#D#zi4^cL)jA2knV?}Z#;$N8ft+=1lH(4Zl>z;CgdW5=%Y-DTbK|j
z`3aGnE=3XKkk1glWraC)jIM_rl8EyG=EKM5DeKdrZeN;~pUFsSxr6%t>CqdO+)n=a
z52`-rA=Ie)oGVlC3qew<be=MgB2%O;tq^x}1qpRDY_rb?SVokKyrkPGDwv&5nawm}
z%tabR{zAK`9X{^!X>1RPN_x&j=_tx%ufvFUW+g0OJec>X3e}c&eo$(nom$t<yY!%H
zA#Ftg!)+Dc(YP9|M6P7%AAZiRl}#ukT}H%^#seZmKOyeUd1%kukUfDuB!zh4$9N*x
z4+nsD1x9u{?G){99bT@>veOt-O@=&3>m|m5AVlfn%}+v<-w|{N)(GzesVjq#=eyhL
z4SdDU9einqK?PEV)9fnC|9katrT&Sa4E$G2k<ad-o`TfiC(nQAKl$%0Kt?jt7_(cl
zHt$LjSJ85rCUUV<W|WGK*11@A)SgXJh~uY=1j^th1j5bp@<11!5zHr>L<7B;rebp5
z2%DA@;7)vP!#*pGI?lf^-Em^KRJ5hYrV(~GvSHx)jYfTv#LKmuUS1n_@;HgMIhRf)
zUA*;%vN0E_RA^AHP?iUJOs$d1t9KX`GmG+4oxC~yZQ@>oJm?J2R}(!wgUgu@um3Sa
z5)Fqj_`th3*;G)@q8`MO2eV?(PZ4TreNC}ucqo)xbMl)+!zrRap-?DLQ-7Aaco<7q
zKUh0@OHN<Al-8b%kMhQsp+CeiO08zZnuF0KIVovL3kiphMpR<Tz~!y7lP0~EM_P*@
zhG-_hi(JMF1uzV_C-ey>^a+||I6Hu!j(C>g$rTzVph(`aK}CS+=g=Dp6z30sm2jwu
z3JUT}F7+^zZ7ESGA{;^67#8_McbSotvfARIljRoh^cAp*3Yt;AybzSeE0<?XYvBc2
zr8h3gVL=m*a@8t3x35F%lduQiI9F@O&2_vHf8`AANL6-<p4`C<TCzts6zRdE02vck
zaozjkxVd!Zn#x-jto|jZ%zZ8P)s|-VB1&3A@fnuAd^B&k71qSUR+d^0DNt^|5@4dA
z13Q{g&LrW=dk<49jDW5-mDn&xpuLT6D!G(;T2pVMlPjkM{!Z4j(PIJ4a(EU<5i_N^
zQOFNILoZ=G{gl>c{m8c6A!_Qc^!wGuo{w;}Xp6CTEUc-;QDig-<f$xgR>y;}C!t(a
zYPVrF7h|zTaR)$95dqB^cNc+?AO7c>RKXH$$k65Ja}}81Xo;H<wAg;=`h`{>rJ>50
zlL^N6TrP}mnP9R^cx`%3!FzPzo6LNB$nrOsgg9C}T(>OsWrz!BPGQIeytSiLaB@%M
zW1u7eP|K&T?>iW6HMC-KDEyB@31r6KMvPC&|CGiHZAux=10hF?S&PKmIH}ARQHLgR
z{-8kHSJJ0kIi3T4qPfr){fFi`Puw<qgC^{LuX<YRnpB)~?NOnD&vJ%ixTfa-aje1k
z*QN_1_^1>S!F9~@#$S^*)xp3Xk?Hj@DMUI{5*^r2BgB<GXgX()L4@?Ki`jF8#&8Tl
zkT8G5NsZhCxXAl*3@YwRJYgdvGA87}Fk+U^rDLV<qM#a3peKLiUKfAWp?GW_7D^0D
z?4MF_O4vYS*-DeP^Q26r3G#}xH5f3IeJ&zYRMilZz9Z&M^`s&ENrk*e*d@V>h)ibO
z8NERx_DnTANJ?uy!KIZ#nBS-xG9O$qr<+>y@unB|^;V5(wiEt6N}uqiT;eEkZE|QF
zYq|WWD??_Yxf_2z&t=BC;J)K@H!ZiC!$Xc8wmY?Yrl6+dWEf(Wz$uJ~w!nWP4G}{m
z)DSU!(pSJF-5nzx1wFZfjzX00WVMg}34Da2P8xI=y0CFB#>N}J_vxYV3Q<Z@+7c!&
zs%}YhcRizi)bN)y&$8-AZse!Tk8B9KH6lK?OVy3`KQcU7F!&~*)ujh?&mv?oy7KrG
zw&gMXKaWoiI}aNFdAkzN|FiSt|M`>Rf8lUz-a%c{QFA;!_jq`(2TBi2&6kUn;#Nv>
zV0woyRB~XNpi89&#*#^ifn}dl^8!;v<>Flk36+=;m`dyJfeCcwg~Fu4^jfy~E2Zo$
zZL#-G05U=gc0?P7@Q?q%pMM^sdsT5{+e5!-4T`gGf4b*D*=scW0+aUoB@y#Qcs+#X
z=&i!NX$=~wY5~<!aSNs$`L0A~wUK{F<rj`Y(UR}R-bHf@rxak3M86qu(o2|Gg;NSd
zQ~YMYNi7jcC!A7XaZ6?@;rQr)OqfClOp}^|dGf1-6v7GPNMmfwKB4WX*&~Ybuo^}_
zVWZl7lh4&0>*JSL%3!L#D7CHLAn$f8vn>HnOTwGba+uYYfZs;~UK2|kffOuLvhOZ|
z<-*LnD`H8Lp;`Y}ZziKUMdU{Il|8x@z9IMpNb8ltbiwhFQB&GX2`rz++ncK-t!7#<
ze#$1foBe%&Og_r>R7f8IJ}p;oZ2in5EFme&aNM||?~g3nq?V@FTwg0@=#AqV&(F*A
zAKzP-qnVFa1}s=rlpSfzNON{Kobw`L3eMQ2Bbq*A^{F#*FJ|^V-m;@HUM;+-DVRli
z6rpuOz|*Yi|0DFwW%NHgaQwR{fD81$%3e8||7WkXxAmm|J&yi&0xZz^9v-_N*L#gF
z-v8lW@>9zT=pqDZbiigk8w7aU<08B}^2G^16*sSW^GC6(<NR^@t?^|b>1u#kTq**L
zTnupPUNWf*FsTes9pY}O`hcvYg2ut`qJ$YHm0qLUy_A!k-b7QfN5ww_S&;uYhA2`{
z?WhE}jLbqHfz71}e*`#H`b%h56I2NHy~uwdmdU~&$1+X&TL#PfkpC2x-P8RnME%*>
zU2v7q7oaOukoYJh3rS(lgQ3bTOYB?9I7L?V{n?mqr3$HM8x>p4?Mab<@S?W~=XYi)
zIj`g5XnbVC$5A58P46l$4(aX0<sl+<nxv&yHKfa|zQkoR{WvmOG(}d`NzP)O(4|$A
z9jq#@#=9SvS3^ZMhP(1AOJ-fX+ezFw)-I<*5}5q02p$nsP43TGur8{G>^v^4#>17~
zXJ81vNv<ffniP%pE<R2Pa%(1t6(rXs<UcMTJXrjPtx~159hLto+dEJ4-=oTZX#G|)
zAl@wzCR&T>;e7FqYlAL^%%;+Siy=w5{Wd%dkOvADo!f=6IPv~@fIy;BXSdeI&3F~D
z6P<YQRu30HPpaKs<Fm=9#Iu+HDxRlHpvwW%WY9<j%@{qUj`k;b{2w<;^LneLA&DX)
zn$Y=S2sT-Jzefhhn_7_rM(LYC+ZfART<7UD_d)dk?#W*s^!&G6-rkO#|L#8V|3~8g
zIK0IDXFi2e%-RKrkCO}HJ#PqW8Xl_s+NC>!>Kkm}j;t|{*+73e-={pbI6*}@K1b<b
z`HYYlYUkRnZ!?*31YYB1S9*p0^QZsx_$Golm|UNp#5NFmn!bH32BU2qMqzE^=+2zL
z?ovcHSbvw(X>{+virG6y+<@UDP#)$De$CFs*XR~wqDura+e}Gwb)JuPG;$%ld)nyL
zyC>C7x2{qgYCY9AvCOu(cWwf-P|JvP;`tBr0m4nFAHA(?U|)OJZXUPQ<#9IPGf&6*
z^!QNvKXK%>hnc;J0$-^kk-8C0S{xC?#^&CH09w0xU_5gL#2Sum?wRGEcIltH{}0??
zaQT4xUuipL|F4v{pZ5QsWB<Q}CMGsdsu<b#__O#1%AaR!p-TbNx6w!i{TRUKGdSTK
zfY(~_ARq*}7Bf{vaTEoOeJyPBVUtCQkz7qRi_$TH=yC47u+9^s{Ws?SVp1K$G83s^
zSKAC0@c->{Ig<akT-w`v(*GWb{}&7o;7j*f1(y-(oMbAPFXU3{eH#?4baDea9_|VO
zg|Wk1bhbU@ir3Jkz<Cpj*J@;ccPQc?#<axDe}fH%#~XP2*#2%|*a^@GU(kKLunb=6
z|4Zxha$df`O<M=m&H<jKy;Fgnq}%VcEe>ErSN+*wVEaCX_`F%%?^O80%U6%QsQ?(U
zauILzIS#2TLxHAz3I(m7pxKV>^MKjoX&}P%ywGUL@+P&{SA;F%8Ro`0q<A=JJi8>K
zj`t}skIcv!IKk*98$2WXCl7;|D~tfDmPV5gW`eu%zCCfBfW5Z{?iu^zKk!fS?EkU%
z?rm-4$l~z(@A`T6J80S2qz$(51(I2AdkByW=SP5WnaOOz@zdCC=!xz2xZ8k}&H3za
zT_m;C7u#Ht$>fmTN!+@Wq*AF=DwV1zty251i=OLuE3KgS5a!19a;r;GTWq^y+a>7M
z+qcK$5qL{_gD3MRbkrL~UFm{qAR{#*6}J`^m}Jx|dh#F%<P7j7U9?wL%!@k2b%?H?
zVuS-#UZSD{h`vxjiHWv*y}u)TC3BG1^rq}VK|7XTYhj^{g7y)Q?%=8E5I*o$ca=Z8
z!3BPH;G5pu{9U@v!gsRlqZQD%sFgvNi6NuOe1P|p{jn7!mAjpAG@=|E6bn<mkZk?5
z1IK6#igA2!dbG8pys01K^V+!|KFx>_dKs2ojC$K{MnO}rA5?(JI03h+Bkg}v$l>BP
zkI6w;MTy3l+$HuuIf=Vvc&kWtsqM3c05P9gJ0_ocP{gMz4+sNDJLvU*Zd>@8lo(qG
zTL^5W*T^UE9kqL{xHkJF_HLM_*exmn)f|%8=F{F&?f)~A`Bsz-cs}9u2u~Af=hoCS
z1O0@}p1Loex$Nu&bLsU>Wh_=gT&XQd>xr@{r)L;<%s@Q)wmz?ma9qR<H}Jy#(+o12
zsV(Ke#Za}WACEHmw<iCO2HtS*^zI6vY4U&Lc{2X{^QEOn{m)nV1H1icucK~sX1N1;
z;ENwt;~(xE?VTpHJu)$^JCYd_lt(kdGu20lD#k@ji1ZK4eCXe#FYsl(+uy?g%<c%U
zd5*Ty*DuMgYGu}(tk_(x)GM_)>)ZOOC35Yrs`0f{X;@!%>#Jscne`f0Uc=0r(|Wcf
zjxsb}1(9u`)tP-7r-|!2|21&1!%=ui1&<>)@`?pwYs~?qO2f4f({V@2kTi5xQeOMC
z4If=A*&XKx6D{kF4IEoWEs)H;UUP)TgH2jX7v12T79gKz+Xe1Y>*l7(QAt$5rO=rx
zMB@P*wmayB5&vsOtA#jZltr~0W(Ld_9GAM4#MyvMo8!*F?^Q_BL7xgB3aN4Zw0Yco
zwRiAhbFcY&=XLWG!}m2$P#5|Ix+@9H>yLT>tsh2VM$ct8kD+tNyZf*9c2fP_+S}RO
zZ|>}$zDA1qN@%?b{C2T`9ehO-3w?=Tjz=?K7CuhwWsuB6;Y=Ha!kSL7M`b|x^X8G5
zTP71<c8TZYqQ!y;d|?tqWE+_fvj`&rxH=iI>Z((mRIb70l=vu@aWF`(V(cDEFj=p0
z<Hc$KPuUq~0Wd6$lOQJ88fO$u8oy2)x-OrRn;-DrfZLqmvOse`K-#fL@#_??7U|*4
z(26<j8MBpnBQdorj|Yue<$oz3elhvK(Ws{Gf0v#;f0X~fj{g6T7yp&Xj3wt8b~Lv5
zuQKw*|5@P~;(xM=A^wjf`KLsG!>H#Am|&&)t2&ZlY&6j#D{J8TJ&SFv#M{(`7d7$C
z8tUXoE(-cyKXUA%&|C^IKeezQog^DbSyUbDZN4@Ps@T#`MVGmUF_i^3-hb0*%^V1b
z_ZEQIpo&ACB!^*+;b8`FzuxC|{f_o{_ZY}T8@-taG|^H|d^4tcvy;~ZgFPwIWi8wX
zlkp_*TN;O?IBQlPQ^MHvTEbjE36_wY$V_^A6y&~){CDJCcsHkTtPQ`Q{ok|YYRdla
zdHqrT`x<}L9kt_yEq8!i0`2;6`3`aF1tv7{QjmW5^q`#`iONURM>~gmn_D|#?mhgO
z<A3-k`ul{HdQzuX(OjLMHD?2G-c^2BnE%&*%4bClr<MStKPCZs|Neg@fd9E$bw^fr
z?p9rOzj(Lm?_~9NcdLFTtDjA&zH{QpX@TkeoG1a?<k8OlN%MH?;E)AtO(-a+s^4@~
znoT*X4J=Z_9##J5hw{R_Q+#*!umAAN<ji_hfjUez<Idvek=Id_QGwZg|Ne*f@5}Gs
zFT8(053>ar2du%!-@pIY`}hAbU(hp9K-tclQ;JSYu!@DhXqTir!(lMQN7)p8(&}0#
z7|PY4<07nx_kD^7!=0oUxLk>zr*L}xb4i4k^d$e>lWEZE=fu10yuMjT|AYAdY3S`>
zUgHb;f2r1=)l%`_pEVx&|JOPH-|kO&zAPh3!!I_sOVCK6;I`XMBYrN`bAbMLc3<u6
zH|$tl`f{e(HvBRhr42vIRQyq7`<m;&>-H~3c!-eRUGKyOrrrN9KYNz2|E)fI)c<|0
z_0JQ{>bfvz!r9rc*Mk(aV}%7yOWW<r=+qeGhU+Oez{1zw)nQ9S%QD(p{gBKHyIw^I
z9b4T1r#IM`f7n*Var8Ni<K+&?k{32;v?RsbU2#6T5dW;#tMsPy00Eu*Zl88lRAf5L
zyy}%_m3pxxc2V1<uHsq&0RHF=1GczQ(GCj|M;s)RO(XrD$7aj&&FRn$FTIfK4M%-`
zDkDM<P|&8*XfcXW&e1G2Tw%spE|X!L3g0QOAVCOeV{pZe-_p_GM}3UoNuG##lq{{d
z5eK#nFWsTXE4B<HTYN1ilgdi)yBMFZ$1xtL<jQm5yxKWwqSS}FPO_0Z*axYvxWZPw
z*@{5Ii=0z*f+d|L_eZ^RR{y%gixEZ+45_<l7k(U}-jYp|p|AqN$W*OSXZJ9)T_w7S
zRvN;AhR?fRQ;}AYu=aXv&b&`{)PDaez**o~y7n<1A%?d)51@<2<c1Vb9cQ?Nv&s=p
zL$~7@KmZpn_I8?koBOX$H(%{IYIY6|amU)+GsWMyBTXc^coFDTgeAgE-}kyK`n^{l
zv74yKyNTFV5h(sQKrqR_Vjm_*1_h-1PU(}gsVs>d<&mjg`H}rL5jY|3802828SZ=S
zbjnKq{C+M~ap&!h9cUn~o6OSH6VvAy!!)o7BQ%_(y*~IEZ7Q}-kB)b=jm3-IeY6}z
z{aehR5FO#70aHVvI#76L>*V0**SkXb<!JZhwq01XX}BQd5#fe|D0??vYZQ5CTZ-0_
zY>Nu3?ls;|sFvjSXHgO5SFu|#kcD=RHeo>0A{ApK!tw9U<1Fy??J?^9e%?IVMWUBW
zrHM<0@()f=4o^?`l7$i|yp8d#t`dxjf=@A39nzS2ATUKH3L6JP#Y(y(8XgD{(@~Lu
zj5Iogj)R<!5fxj~W)ZL=_7i7r9q{M&pkBVrf<*_;9AJ|hn!T9`jhM&T1mh(0s93z9
zIZ{tF-1Fed({e$j@b^SrXY1`-ftKD$o*Q%AbS{ToO(i4y&4NF`1_qN`800iD$Y`VR
zCy@U^ZW>)IUbN2_n@yC&n$5~!{AJGn)%vrP|JO(R->)VA@Ae~(<xMyL^semjko|c6
zkJtZTNMXLiFSPz^OV8>l`~Ui*{_kt8f70j$_(XC=jG|8YN7n4|(Huo#JS3qH2;7lL
z(O06!j~AEGwMP#(w-y#)&+g&lMbCIAi%+@W0PO*K-R^}4$*C8gTSbmb^TO-@=E7M<
z9JelgysaLJwewK@{x84Zsd&-l$4U!LZdKpd${WD7=?vYY!`$c`4@;SkJzobO{chJ4
zhokeZ-@+K#{NS@Bt}1X|s#fsG=mIVVV_oHNk5v2QQCGb*RcBvAn^6+K%8*9EIjTJ9
zr7WDzW#FV9w@h)90OTSxwn+MkLy(lBu}~|FP(~<TS$R!1#)bId9Y4DoaE)^}^jjRo
z)DweobV+d<fRU4DSQtGl-0Vu^HES0S;C&nw8wZf?q3hiUVgv#$pxN0M9Ff&SFQwSQ
z*MSl}0a>yeb<mBMe7O4i2&iG$r*MZue%;E&u^@FqMr&;wp`$wf4L*MJKYGf^7yjEG
z_klQCoKCjaaC9J%k%Jfd5i3;Y#n#Y68g%1uOf-tYD3Z#Xt_2w8i{DWjA`V^wkwSvr
zG-@~%8~*<;Rsh3TNzOl&mI^PP71s6OvQlrPd7jjl-y?K}<4~o*&k9<jRRDxHjGSso
zJYB?u#VA-LEGu5TsQdM_sA_kHK`*8x34Q2Y0NHxOCRJ~$Y*i;GTR8hbz#1}0?uCK`
z2%|1&*y@)LmB<qez5wXrgKr{YCgDQ{x>ESd9R1`x!I?-Ko5P_?Bt=B>0oMFnq3{>9
zz*p1agkHB(qBx>nG{zs@BEb90+|ErjbX)9vfL`Bwc&Ma+zOlM%4obrRfi&GKR^mxO
z!hwYmj(QaKcaA$Whdr93F3lC0g&`dadKLjtuqPE&e_cUREucCNS6;jnarF@UP|FAU
zKJsFAsg-4-Oj=G{yPg~sbhRhW$FxQPJT4q+vRlEQkm7fEHPIqkU&p2^C%+!z>3JK~
zC@&F7svebuz3u`Ib44{0GjG&~HQS{iR!*^k%x=(>Zy`de5xEyoR@Vby7m&&~qw7Ep
z6{*2=p1xnJ%)j4wij{hl1w|@olmrvyK(*q#>e;*6O1XB1U?nb^u*tYRJUI0#;h^hB
z&eIZsNEdi_QyF@Y*7ESGzJ#x%X|8VQ0@p5n#*>4%vNPV*&Jbx&%hj7|;!o`<lEL)y
zlgrhf68;Hc!{9pP$$R&-*9>9l0zExF!w<}QioL``c&Y}h7$@C8F?Ue)TMnu|<Dlvp
zgKEK&&lwbEWeiHz$Qo2#4T=h?5V`B3D*leQ0q&?P7Y~oS>-Cc$ief`F;<jQC$)*}-
zt433R=+_t=0SY?UF|%?}JQ3BKn%OhI-}i<myl#xOpqfw%+NqkApyP}v#VKCKJDu9m
zV4Yv#-XdRy@#gO1OEJQ02_M*BdzwpJyWI~^Vi=4rq%0!aYP*rE`j#~{m`_s!Skx6N
z6Bx<y$<gW7NsL&t0sQ^$sf+xQU}l%7wA@f4R->;n2xwz)iijw{3hx|E+)P~PFSciH
z0z&QrxGu;gDEy7E52GO^(i|33>j(cyk{|wK^%Y=1viud79t|<X;!$voWSC6e3c6OZ
zK}PB)k4udL_gOg#?xwt%z=VP<hE5VxiM%U#qgtie#2T<e7E#xF$bw+Kazg?g?0)Ay
z@X>KWcS4qq`#}Am0Valw87&%OV$hhOC?_5>V1nT3ECWj$3I?E^3;kv2`XIO+N5Np1
zrW@mlQ3m~;#(pwoLDaHZ!z|U>kS^N(@|XPC%$m<+ifl-8u^C&OeS8y}oe`TkWO*Az
zDq8R{B&xA;VLepZ-q7p7fBlw6azJw3Sf3Ig8f6<xZe7%hE8_3kUlA_BQLitqNQ$K%
zNV0aGzWYb}?5X&NZ~&jBVhN?oUXdq3L*>Ptf{K(p5hOHgfiz2$%yzMZW<eH6;Y8YP
z10v!DRIrwDMRTvXo27WVN-?TR_5$L)0-J~eX?7O3Asd*JC&Pb)YySvK)ZfxS!VTdN
zw;e>o74eU-IEN!JrBvCH=DTsrcGu>PWMAe)YI!613k)yQ<r2m~ik8U#MxXjpVm}<V
zSn`j{fMlK{=-*bp|K3IK+e;qVVn>YoFDY)c0<Y5vS=z`DJxqDwi;D}AFBMm2-|%<3
z9^%p3-91j*cY7WkK%U+_h12;M|N4l3U9<Ev<8J?#KWe-8^w2T}toMLURxWNlOTcX-
zas`Km6iY&a3-Q?HqROW)sjDzy6U;|<R;t`j9&&Fon109{KQ+bg<i}a*{+j@E$(h-E
zyhPU~arY!Ym%JF;I3Ga=rAnvm)4eIOOb1(IRiM`NQ2smf=>9+HZdB$set24h^PlLt
zz4Nw<3}q#U-#f^=Z_x~ET_!bQX^|YIrr}NE77dX~N4GOWL#zWT32ulGo1-psg7e=H
zw{u4_U#dj8Z)L4jBdsx%B{k&1`ERJ^g0%)IxPzd|4atE%NzaBqD~SPC+`+)>x1DYf
zd>9Ry>Rw!SFp=uWjdX4g(#xZP^xjP5<|eu)HJEOnb%-$P3T@A+t*<cK-j}`g75;LY
zZI4TnZ#WI<LbuQ9G2-OF@Dr<|K1iDr`f4_8N?GBZTw51}o;z$^ItseQxHBnD)jX4Q
zU$A;n$f^TF`SiI^$X%p`Z|VN87hbhk^<3$@(Ul)Q{QY13*|XYm^8W8x^)deISNXH{
zubW<1g%n($tDzew*i7j@aD9&M1Lyv=@z=s%3u~LBDEQe&4*~v1;1^J@AFj{IE3U8w
z60;jtdVXsdgh3}l4!n33Mx(YLFfm<t!$l^dMRfBzM^J@W`&+pz)nPaCUwJoBE^kl)
z5_xf3UZ1mejVIk`b-twk_v9j4o!elc>910pTHEa6{sgkem6rxXES{m1Yxw^d&f<m!
zvF(Q-V2_V!+unHh>4y`1*aQDGKA#DPudLW3P;qV1F05hf+zf#C>Tm2AwheMoFBDrr
zpADeb=U$@c)PK}r7=#u)&r;x4*saF+3QwX$LpgH$A2#Z1i}bIRa}1NT@q@p<UKRfm
z&}Fd#e>c_^DcvghGaic$S-RfP0(*s}^$LmVYl~JPV;GC^IHr&8Z;l*aQ4LV40Va-d
zo9?;*z{U%=_2Jqbwh_%L?b}Dk+M+T0Yl}#R^ivam9SFaEJEE_{|L#xpHk5plua`vq
z9;ALe?!%!0y;E~X-XU36&*m}-KQZ-Y6V9jX>3CN?!v&d>Sv&ij+gY3s-Mia%*`($N
z?{)pbdR==!MAs*Ex$&jCd|yP@9;VkSZRT~|fOXxt-MVhT|L(u8Z7BKcx=|91Kl8f&
z8q4QvF8ao#%xB*uN0`{<rAe8~e@b>SaY)Y}Y)JJ7oT6t_x}2Z+<ChxGeW$2Cg|HrC
zNVjJ9OTg}zZpZGI;D7gL_coM#cE3~-OW!TK{~B`1*W3%1rcm(&OnsZO<I<hi?6a@K
zbHC;`GI6~BoH)CUZ*r6Q@$LhfyzFa#%01}WJx09rU?Xlk-~`v|)2MRtkS63Vf2jfA
zcZwT#mYiy}2O9A#QV8d2Ib>U!%iri<w~|Scn$Rz;Q8gzli7&D%o7*5G{ai_9c4&Vi
zXz#3PK|7e_RG~o(>UPz`gKu2Bn;ec<5y{cqTG9EPX?*{96iq!os4I{=E!z3Sn!WXE
zo&RPRtGsTRv_3UW{Ru8%9#`f!fR(9>G2TP(a3iT!(w!G$0>C%EbK(Lu79%g0Kih^0
zgLvE{{{XDayb`w<CqI*Q_lyg%cHdp`&mqF*Y=>+Jf15>!Q=^Vk_%!KkZX?Z@u~0Tq
zXVBKS%GdGf!>vdO=%99Ap8jV!4JDZQgVPZ<at}J<>feH}eoJO_UtTnueNqjkZ-U18
zuJd@4m_!R|Vm2qDZ<=At9k67hImf1DcB7cu{Q%TvwW^ssQFpUwdB9mrR2}D6tvw(C
zH11CTjr+}C<NgFNqxr=>2;gp}58noLB&vSQuUfwiVa;&cdT-KNx*ugM-Ji5(yz2c@
zq;<Dz);~SnC8}QMS8dEjvVSdr;9(h5L*lpR|0}ltZ@SQ1?`D8L6e4_W^kDwKo;|B2
z{eRb<H6Hzcf0aM0V*Y-A{#pPK@?~#K%#_MmK;@Uga7Fy&jy`zeC(l3kx?=4TzbcpX
z^<Nh~*YDzkfeoYHv3#{l@i_XI6iQ0)LjpY6NEcMv%My2itFr$pXMP2pRh7iT3Y)yN
z{T@FJE{YYSp|b6AYt3NO8}wo-nXQ@dMjn91Gysh$0P52K)TaP2Zr^gtEKDx~1s0}4
zu}W~U*UF~}z7uWpo&Ym27zP;48J};rJ$%RJ_M@(LC)V?N!Enp~fQ&Bs^Z?XR@6yFp
zSP|+8da+`(X1S^}{j~kEd-Bu4>516f|5g05d33b7fAVWZ9AX?|B&u*U^d!aL<F|1z
z8j4`(1AVz&A^9D$It{xO419hL?Xur+u1%hZ#JSfAV6^1f*u(cT(nWei!7#NyckcLo
z7u{bA@8Z>VhTp5EPno4MpW4H}7V?SvKlz0HiFPmb(v?0OwW0mpiFd_!beslsg=Z_`
z-4CtO-_A<Lq5MXy2<L}ZC3~+#FHmOb@s!dvtMtbyrR!Gd>nWuhXR2_KeiZp}62AJo
z^1bVP49vZoGul})e$SmbX4c=SB?m|fnBfmzPEX$b;GdOf4p+od(S%l<OZD(`YTg_9
z^WK;=?~U(i-XCNp8xJ{?joZ&;J%1+alV-C1ZO`O`%uW3v=ca!9xvAyPO>NTL)V}uI
zJP5XH4~gyCjM&~bO>a<;aMaFE0+E&pq;HpEdL~Q>h!8#<-_sCI4xU5;<K#%kC`5jl
zl7+~{j!|e;Y+ZV-59G8^jpc<O870uG5l$}M4N5O`!tASo|G%c3hYa2RhZSp`s6Xc*
z`mBg*Mh?$n7C{x?IY0Q~A6X+W&S#8NEC>f`ltpf>#jHdil60?_Y@8p4>+7{tV_0p0
z@~W8~Nq{RF-$n6v@@n*lp%fRzR~+u_tiW0kHY>n*CX=aut~9xPav>xbO5Ow@B7xiC
z63xfvMG~RJtXFb+Jr#jPtDn<02KJ|AU%`gb5aRaCYyQjeE@(a6jJDW6$_WOt`ai?G
zQbV7Od!?%_W~R=xo>SGnbN=;U<9$E|HgO4jyBsXl?FVCF+wpZjFfrDa-Is;MMr?nK
z_yzsVpXBdL{z?t?4->#rJ^9Whuv?Ay4-~;BE`~o=2Ahn++X-Q@KkYloT=RdZ$n_mD
zETw^%B6&^cTeFK^;=Z!TpNMrS^ge_L_D6BBTgzZyfrXiPy@M!re@^yI-T(K3FcHx7
z9#`X&;{PozSCjt#s?TbV_y1qxkBTl#o8dOjd9hG%+wEox5EJy8(sgHB=J*|F|KLRI
z?7rIBZ=UQNpEUOm-kfmI+<v<`a9bZRAm*EBsmPIG-<&KpUml$9Z*wG3gdC2}b@(Eu
z2b;6kJs$>9PJv+H^)I?QMri_`iokh%vUzfPEasqLa{1lC_?_g!y6uPVImPj&D`wps
zNd*jC(CFsk_@zc8oKXC3BO*>&|1rMq!3w6W|L037`=5IK(f{XH`IB6>;%Mj1>F&`E
zkbVKf{&Fbg-Tke-)9sz+*69(&``z9>5(oRl4^)J3j2^$cw{z6oJ#N0*Kizu!mR2am
z5Ert@+qW`$xQ^$nlRk^`L7fAKBMWGI9dzSPe(D`DSKi*Iu<3I`uE<HP7W{szJ8FAP
z#YB8Bbi@yz-kj)hU`%KsM;McM^Z01%nAwU>tF#7#0+jqcS|SU$UeI!W_>`#gS@KK{
zZjDjT4X7fkIFAu6V7z_{`$x-7nB1_ythWig>qC>HDq=wkFbwI<$=1OUMqqz&w0ZRF
zXO;NsbXTWM;$E|_|D&DF?bkbBV*g*Rr}qCw{n_LC{~CYh=h0Y8lN8Q68YdMBTR}e>
zQiz#(vFCLn(F-uv;`u0wIKpJhBe&rw`D3U{S5Sb43KZW4ks4hJM}CW93X>UBHw(tc
zAm~x-0Njg5P`U5|qd^Wmip9~#l_ZXjcj^=-up9Y5do9fIdmy%4d1BxVDGErx<v~@9
z0MZ}z&Y@F5NBAn_Kv*L1xcj#*1D_1FqHE9VgG3LTzel<4V1#YKNv7qag<uH{*9vV9
z1Bm*4uZ<%dj`|UvQF+~x4Pu|b(kQy#mDjBlzTf@d=7|p&|M%H)eL1=RH)_j|>;G%~
zSzNqtpkIKH73p_z5pz<pet*PRzkwf#|8`r!xmf!R|EZi)TBT-K0!sk*iS&#0IoPiX
z^?U0`zX(DH`@K)7Un!G_Ib#`J&c*3#egKVnE(~0yt-Xm}jr_$ohc3TaOl9o$Kxj)R
zzeXmza|P_#%G(Nt-r^fnOKcPhSApL)`2H_`8$^wjm7#}P4yx<CIf_5Te6jF}4|Jpk
zSp%fBqf4hK=EWt^28QJk;uA?vVboq(Y2l;wwKXvZgX(sJ7Lv^QsMGO=bC^tJfQ@!n
z3tG9do&lA!p<S+91n%1fuKezdzd4(KL;XJNMWcS`Um(kFUAn_}OY`&q70Z1td<LOP
zlJA<1AaSyxLLm7Rqk+wMn~@hr;uCB_qaOS-^e}G2kK*$x{67YgUv-Ull#L{a+A|3w
zqbEmzsSS~snv+XFU>m0fQid3y61KmKCSj^1)F-2hCrP%4LcJFqGh(H#Tj{G;RyemA
z4P8G9*Qn){6`BR;9uR%Jk>dQx06&LR37U{WI)oKl1%Z@|W=o>BA*&F`>!CZ)Kq&mO
ziq#f(Iz0vJ8Mu|*MT~yNneDkZ6mth7c<SGa>552LsYC?O0x=;XWRl&v*Zu|3O94R@
zt{3Bh73bsMSlgmFG0(s>O<vR5DsRLD%LSy!aWiz={wOpG%9K>eu`a3V%o}r$<5hjN
zhZGS7!Fr?^OR5zkb#?NL)@dU7t#cjA%AW#v&d5;!Sstw1!1668$5IlBQe!G5M^v<d
zdIP6Ep8&9E(0qM<Y$Qs`r!fSP+f{^6T}85jS*9g}v6}t%D*U@<;0(VP)=dP(Oblxc
zd<<+nmUxz^rChn`d2Z_xOL>5%Ywsz@trr2QngW<K+E=s~igwHE;^Y6+#e!&wYYL+E
zkz~&nJ(5rn*m>ar##B3mW&N7qBZe=f5T;zb6$Hb!+Qg_Q6*2hebJ+8bLtvM%qjA)0
zT*w^M9rg{MqRrrLPrK&FifV4ql9gLCS8i1pDRN~)^G)h>34!rX=F@66e3~ElXU$+2
z@V&55jBCI1F&vbJz@PM!sAqxZ1*ru;<+P{<Zn%LPXuw?1m;&={vM8KlD~a`rpQLhL
z4XHJx!I&K?A%07Q>~(9*ABRm?>#GvNY-1j((GJaV-q1Lmn+3n9#(Ja2O0$)uvaljx
z!^ZfCDMlKX!LWWUeVf?!p~3|QP%Fi7f!$wU`as+*d?|zFLEs6aV4%SCFDkM$4(>Nz
zaGa1}CGvh1isft${;Gv+ALm4^VwtqE@^5b#IK`5^HN}eyPJ&GR6aXBTg4(LgKM1%D
z=h~(mGjB_m9)`5TBPI%e@sbvoxJPx7H;fgQ+7e<rkUu`@CNd^EH3^GLa>j9?c^(I~
zq)2WUx@&+DCzmw9Pldnho|8Vz6T2rnN1G=*;=MI6NpVDm!_{;qXH>%9N5E)+elkjw
ziG(K>Ss?OE=iqpOVaray-uThaDn!-wRO25tn4Eug$<Kwn`NNhSb7-(y%wZIBgDS$G
zig)F99hwRh#R5Fcpy?AzCfE2JjGq>Uc-TO~7y}<*c?HO~jZ;MgKB<W{MZha7Teuy&
zAXYC;?v+}ANEl0kb_v<OE0FWHIEraWMZE2~uqkvnWGVF7@5~CEafC0^M%@t&a$^ZI
z`UXS5R3N)6pLCj)iP+TVLX6#st9?TW?`q~~NiSr`bNGTTxiWRl-xe0)4`YYV17t~N
zCiwtA>w3;_uq>5`xR?g;<VmV46}Jx?Op$m6{hUS+F&D*68cvatowR~(Fbrqk7E&CB
zB}h2ZH~<j#1l%a<%ggf$xv+ICs<Ex+)>^2YncO;~K;0_9`1`Eo9-n0ml7<Z09Caff
zBobuHCYOgJb5{xwaJ0uG407o6E)+@a*vYMW%@Di9&B6lh)-hsqzabTSY<}|~t(16F
zI3f<yw~9x?yaU&$@9=>t^r92e1JYizLu$#k0|!5kC|ru?aCQ#5BifBpviy8~QKiTu
zZsOQdtN*yl=W7GZny7i@rA{#}xIiU=%pHO82FiSl?gkgNs!_2>(5;iA(Iwdd-J$l-
z?L=k;d7g-CsIz#w3TsfCT5SrIjQ($6|DShAULnnaU6|*Q&$VlvZr}pTFpsvjaF4gY
zW(9e^&aLq|SIIFzeTvj^9O~an#Q|Izok~-G2=U<nBOpuC=>{N((_a~f4N|TM)DhEZ
z1pck?5s3z%D4iw)mOT8sW*$8-MG-eAhwq}xv+@SRc%$fYFStG&`oMIe@h#zgO2fW}
z<G<q5HgmO##aU|oV2fVY9fX7%8ms)20xC>6kqJj>*R3;N6;2g|e{s4Kw^N+2RjbuS
zRbWN&BXdTG_#wg3i6w?C2W07?%u_I`rPKgRVlJ_-L9S|+3ih(ga4)C??IVjJDIWx>
zx8iPMfnt`*iw>Bkjz(d;igTV=D_L9bzYv5oMYF@}id8$Ag2&1%P~>8hU?g@uSW*3U
zaDC!Ou(7^jmoLto!2srp3o?0Sg{5}KVu;9xO5&;a6b|)Som<dAOX43j^8};ieVCeJ
z#T{NaCiWD+%!KJo!ZRRMw<)iO=8AwLohX_8Tyb)Ljf<qY>|VrEzrJ2Go9zc$s^IYU
z)8$2B_RtVX96%ajirOklOc`fg-YIZBBl{wKG9B<esctPPjqD<=<&sv*s&B5dvVvN9
zH)<ZedeNv8Or^CJ1OC@R1v=%(gbF~orrN-T@#fH}mP8e!ZDU#`Y`M6}Y9i5EW}CZg
z7_5ca%_7J|(+HI1GSsO6FRCp9mSUo<R4Nir1g|KmeKcr;aO)JcA~|Psp&QOa02j&~
zCf3G_c@O{5nsh;*s2IfS&=3U6C*lo&z}ward_@`$t_xE2AVB4xtOXy6aZk_>Dzu43
zVF(^VUEXTNu++Y_9Yc$^ARM!;ShGh2QAd?T+)zDkp){FZ4Gmzj1=b2`8lxu0Ruh^G
zR*fk~t-7sXnq`ko?1Yh&@I2=(GyXcb@+MC>6FFXApd(Lf7GcbCdP&5{o$1@sjHC-f
z5a4<laL7px^#ajdG;pe~T3~||)yi^_YQhgAV*?m9CCfFY2>`aQKVV7_Fdi$Y1z5AK
zlGMS&@2hub67!pbL039o5$<5%a6v3Wkdjj>t}*qx5?%(^+?EQNJ{VNc#K&RrGV@Na
z7qsRI-~0Xl<%8P)G?teV_CNLN^X10l{r}_r|95o%-|3@H{PpJk_Rijmql1&?Pldlr
zt*(`5m;}%qEaJWWhLI|7`3<I)k;|OS&X5KG#9=oIg%)fyVT$>!rZcz(h?oP$r36us
z2fCr};U!iMG%S*0u<~|~bF+@r@d?R?Go7u|IuC+wO7TEKA@^@3(I)rS$>@Z<K1cbU
zJLFuW{MmLO=@YBHXwN52kUS$#y0o9^iXOI()jZ@IIOYV2<2TKN7yrGpbyApwaULgk
zGa=PUnah$Ejaz2Mbf|TviTiBR(n3lxhPc9&wMHn7c`-jPclEfqb@_}l2c@B}UVFvB
zdcG~nYFsK;nPZfhK_*X@o+!^vAU2*hhmMTucS)ij4mfzek%)KdlLTcPt%Ic*`Sg=;
z?9`swH<WP7K|i7|f$Lx(h>Ib<d~iB$H}tgr-L?SGnk2ssw96k`nCB!agwmoWIivIM
z41j0Ji_R&6o+;wxz^Aif2@1;o@KXHWG?pzyOtBG~y@Ooa?5BdD;0lYNKhui-JKLqh
zeqpimKfC<r-!!HB*M7U%x|<RB)c0T2r2S{L{`~R%*H`&N@)i6a<QebA?a3P)IQfuV
zgJKSuZG}P4(_$gAaP)w=rDQ-fz6LRDIA$@41g2M8Tfnk{737r{AUBp<;+*V8nyON3
zFu+G2qTCBZ5B}K|mC9lTZy7IIE!CS!s#+k*f$nR$8%FKc!U9yPEXqXa%)$1-3ce!|
z@uQ^@efQ8qr*{K;cwcG~@rw2T`@j3=mg_$~h4;7ru2!EX-+wh~jr!yI{~phO|NOss
z{!=4C3=*na{7O8;{_~=Oq_)Ra90>g!G2H*a-iFV|?3{QDCFP5u+bb2wjKl4J2o(mr
z8@#&_b6ayF9QPylMvU+ZOWH%VJvc9@+Y`Kjw-@rIWuu<VoXb0v7Djjw6L=5`2j~Nt
zGC>+z%ynJN&xE`o8mL}*L%MY7dC_G6;^ys~ATJ_X@sB<Vp`qE^w%}CM4tD8yEz_Tf
zRx=8iojF@gl?*+KhO8-+dcOIgM_q)|Wz-~d7F~A3!NRModnD*=XPemuXWgzP8(c<v
zCOGEI&|b@lJ23P*RvW1%3<w*}6D*z{Pfjbz!A@-}1<h<RF?$1z=8g0e8kwni9Ncm)
z4OkN<G_S>YNP|1g?Bv#yaIGn}x-`=~4`!U}!Q}bAUBj7R&E_Yu2OmW^bo|cxdNeOa
z`ta_+9j35{f=T4+R}&7TbV_7bX!r{q^jHHn5H!!{SFY<&9V%`aph(DkdNeqXnC~%M
zdE$IxP+5;5u(HPJ9=a*X0wKF*L8wsStgeJGRUgr!HUp9k0!p&!Z3Yz9(AY2-Wm(9x
z;mW!(#xmgJ{%k>ShfyiTpu~JQY^9oL<7<y~GX+cKG}7$3gGmhy9;z|Kdn@odoh+pJ
z{LgPCrJ|#f8OU?|(r?rr<{M-Zw{xM}Nn<O~T0Z$uGZ}*z4Dq%;1F$lnGOTVafC(d*
z6kusXIL?Ay%bNIJy<~j@Q2~e)#$>WRL%kZ_E}IGUtii&e@vC)bA*(y-ZL*oo$p#iT
zi@%7YNdlQC2S@p9G?&2{8-T+wa(JE5uEFmHx8ZqdNEW*q+=*ow&@@{ae90A`Xt2p%
z>87>&U~Sr*X7FXWjnQ5f#~C~b*D)GNa-PA1a37<A3=TBNUG{^Fr9b#W+{oyo&5^!d
z_p-Rt|M&Rk|Fr(^a%R=o1pWWBrFt!)|En)QfAs(IsQ>%^^nZBMA_nN-W=mXpZW~Q(
z`vD;qFCr>j&=Z|ezs0&bR@`}QehYSu&uPxR_>{~VKCBP@rQUk5q4%FchukZ2qw}F3
zT$i9u%SGQiQ*?$>ujq|+OHxruYRD}ZGa(&Sm2B05MlH=|YtS8q_+KH02pbFEiQ)|q
zK>clmPuWMk-dIosbLl#P`f}l4VVF3)t;KtGr6Qy_ex-tZks3y5O#t)7N=Muo=jAKa
z>tNJvV{fqr+Ap{rkAwko&R_^upU7W;kv_2V3i^GVQHdn)5?zFa`X($gX36dP(U_|S
ziFdDW+m)=2OjkHyKWXzP`vOa(rn_h-J-GRv@g6CmGmv%#`L$AQDWUszwfJ10-&W??
zBNGy>&ir1Z;5^MRo`s=|$}BkNG>Y1Qhio)?mRKcg`%HY31f7SuX$?=1>Wyb=8A<|Z
zFc(v|Yc?CHB(}?$%x@^WE?(M-sda02Cj}6Vr`64<C8gw?sZ_^^Z?x0=+{55=gZuM3
zLqU+oVDF@sFo+^Uxsc)xgFK4L)F<4jat>b_+>%Yq#3?3pK`9g-xC^<ADqFiS0fDnL
zKO4)+Ry1U-t$TGK7u^&2*K9;Hk&O**#}%eg&J4^!<jFvpj4e||@~0U|rkMH}`4?Kz
z8uBVjtB>dZN#}oUmGzMCKWa(;uXRXy^#AyH{{J5RKc;PK;=C|w`vIBT5a-1us2Q5_
zMUf#4(yv-c{i>G~^P}gsxOsJ)IlNo90OEc6`~I8~2PG7-Zl6pWCDD+t<dT;$vPL?W
z?*VkTswDw`*DK3YOUlBPrJ~vMpQIBUS!_jhI&m!!cSnz=K7BM=<F7lB_)(wKm-(6c
zROdb=FUB(QXXVwEoeY!TrPsyR+)5G`k^8|TeNq?ca7&OB&Cc7f^-%ce0FVe2S0@t{
zWi>fCN{0B|Pm!6mArnFqLcFXE80*Wiq^@jw=?pbc<C;T~=U@WOTb*$ShLFgShL_K@
zCzBlFALK0ndL9sa<5^x63+Q)^;gVA}3*;`kr$_hJCE_29dq32aVB%R~XgOYMg9(US
z23)4`h1Wh%56P1M%10^X8C~5Eu5Z;x^I@&JthG4NYMeB%k{@q`zBBoc1EkzD4A2z$
zuU3EdJo)~kvApys|9y==>99Rk$^p#(t33|<qod-8P5AHNaA*J3o(hx0Ub3J_^YnP9
z`R0Uk#mUj>4g~^Y3CE$}6j<Wr?%vL4U2RH`5nX?Vh#m~^_mPXoI@lze#HTRoh7cEn
z5pSXN^u%uk;eak(@T=waJP=#)XEcPtp95>BuLLMeqh{=YAXyFa_!4nJ5(PzWRTT#%
z6(b~3o*Kyx6_O}PN9oL%A#u7HHpDD1XFIO(!zU|gcjuVUPcfRPOUcMgOi3JIf@3nF
z&pdf6g@I>4ZdrtLge*t5S1631W26QO=Tw=)1~?wmBjf(6{QvH;UuJp#_pIJX#D96V
z1Y5!*|Njbq3j6Mre?hj^D+LbH!oFoz%;=RtgeQKN-WKz5n;1?(^!#?a>!qq-L=+1e
zS`S6TE@c8+`rs*;jt!gHuowg(1%E0gT8P11yWJ1Mq1Q0LXoO1+1i1FoP<#};Q!t2}
zQ4Qb>?*Oy!hosMt4`tr|8iT=Sf*vrPBe5cS7;uJy$Z!`i3=JOvbZ-lrP>#IyEt0Eb
zx9|4|vxP0lXmwFPLUOoHe<k>tqANalW6>D`=tAKeT>tUz`0(q$)>v*VC*waYFF(Hj
z{Vw%?KZ;kb*A=fuUcc|1yQ8*PyMTnsg--Y{zd!WdN@w`pssQC`vYyq=#loo-7UtF7
z;zf5<x!f?z@)Nwh)4AODcU#oN$_oE0JtY7W``P!dcf->`6p$w^b`wz&y~APfTPj_8
zAE8Qo%17JkT@XhjKN>4<5nSeFaP=$dG@6IIi;ar()wvH8vF%e3kD5w*sgss*om5O9
zKM&r<o`>P<SM6Gr#QH<YXC3xy(&!j&wQB&X7<;BX;jpD~LAD5I<pv|=iBL!aCO34U
z+`6)>mH7esg}1N}qX>FNNRoZE@n>h6V<V6i{ICY8lYiC8Dguel`b9<dpzNQNlU|+A
zDH2;fMnIg;->cSS$t?UzPngV9DXH00mCfuVxftEkX$Hc0gH9Gr=ExDLcXhZ5jak$r
zK4I#pj%vWF`@ed9e4@7&q=r<PKt|YYj27>VP$3p{I*@i#62{k9eo1FP^xF)^6VE(X
zwx^vzq~`oOd#Yv75(CwFWcPX6P$qWwOLhZ^&0#0N<j7t(D`}DvSEqti3tNNM@}7e&
z7WrOT!j_~{77l6-<jX>eG8OZOQ;_BUEg3`#_-K$1bu!3bootA-C*Bq4Tk~m)f!l43
z16~ZO))VxyvmFb>pOh<i^pg>rlHzqi>=~7Owg>VRriz$o!r|C8qaj&r0{X?j2&^9J
zXh1%V{jj_N_%k;PUe{0rHp0ytFO(}>epOmQ*Qw(J1gC8EyV6G|vt9ITtKFCyRjOEJ
zG^n(OK^QtoXwz!f=*5FoCks+F8xkP@z?iZ(TJh3xWbI-x$s%W5FmfkL&L|(i^y<;<
zJ~H?`hROY#%&Pd|<^YaMH;y{3pfBBm9biDuoi)$v8hlyAmE-sU7Uh{WEktQwP8nm=
z8+&q;gP1+w*{GtL$A~7Yw??HoMck4xQBTD0iN=gY2w;D|eR~8(wnV?52>HN@TfoW`
z2Vhp1d{*A1qq0R8+2i1Kw$u?ls0czH{ns<-5Lf`aRAu=YkHQ#FRo-qIE?492TTQ$s
z6(l-y%byoo*2oYCPULYLdfMnXQQEcjy!bsM&8}9?te288*kN6a8S%U8n6fONSTtq+
zj&6B>heK}A{{T}!tiNq^VR)TQP|ojeoUREM_7<pbb)pi*okmByMH3;`Y-1#e^kieq
z#O@Kz_{3S}&9u!@nX$TIwaS@bj+HZ<<W}>$EEcmA1LOj?Gasv06x5k%7*#`30BGNJ
z2<ZJA3vVC@n}wN^lIXbZG4|i!UlS$!&)M^$2+~V4W*$_$KCmRpdI>yp{0yVFHlDd!
zQ5KSmT~CUUe;fXhTFhwy1W^081iU1QttFKlS$g>-kC1BF;cBZy!I6|PZ1$3@WUVEr
z3F|Er=DdN$I8a*tqSI5%)416|hpae_3T0RM$jWAoQi8L%QT99}4?3v^XfA9aICk!x
zIwxhF&3R$D;Q-Yn>oUlOlJJX*`unFi&Wfr8I|4Z>C&7GPdjaQ6m<@`)rO$(9kvJ)G
zIlq4er*k^~FTxLpfNDmVLI66*L^}(vwJ1BDb7FLArCMA%uTZ^2qGZ{L&$bJ?+vxqA
zhLTsiA#2z2!8fQt9eKZQVLnF8)CQ=2b>eACk*YzcYHN=Pbuf*Za){0%+Vt5wNxClN
zUICm}d)xqlzUb=H<<;OVdOW=v{L26Qq5?QvNg|W`?ZseJ0TOA;UZi?oEWO%m9v&T_
z+uPlP{j561(kxbbMzz-m+dF$d@9z8}Jt(BlbA35bi+k4ro1?3e+Ks#cED?)0YIzDs
z&X#&AKKZipjE=0D=ef8+Y=A(cxz$xM`!G9m|4JfB4gmCZ(Du5B9J}{<lH4S^kR+))
z=sNYv{OAfpK`bViNx4=*BR|X?J89NJ*|my<>QXR?`E?$}Vm2pGMaPqiA{$UeKl0{4
zMOT%G`*Qc~>m9N9e(^uXU|<%{nR#e*>2vl{$Dt0ExbYGLHo7loRiVW>=hEj~`W#FD
zS&X4>b^mjD_p6af@4uIqllQ;R>-Fm6{m<k5&-WMtU}yK$&VF-i^YzZrrV#`nk(_e4
z_{s}<UNjtAKK%~}0f;COr_wbt-59aUAksC^CSdw3=l00(YIAS*)&9<Q^WYEzuO1vV
z_ji7YnZNWEXDTx+_wuBtp?EckWF54(kn`$CmbZ?zK@7*#qUms3E7ox%0Cqbtj(hT`
z!lp~CW9fRsfjn=z-NB`+woIvz;fn&fQJUu*$>Vw0POZ`@Q}yD!)<G>WK-f0;=0as;
zzF9JQT?wgA+#9ZH<i?6J)04k><}wOrc>9N4T6M$>HNc|9t*L+t$x9NF_YA3E!o`bG
z>dGHRBeyH`RZ9v2RGN4JX97b}^uG5ps}n%w7B`my<)WZ#h}hy&eDWK-U6nrT?Fzr<
zR`@l$0t?T1m1LaMtcp_9&Z{Y}3KfmXy1Ym_ctMk0kuD0wXA6C)`<#r~N~m|UxQ9~g
zn0_g<%K6DF9<zIvidc_M^&+kP&Eh}5Pw0uws0?tE`eUxvWS24(#PV*!N~CfVT2ZrN
zK~G)>rSNIpu4J>4%xg7!0i6!=;BzFx(?Jew`&KQxOzNI2gS;$Fc&ZcfqGT5O;-a#(
z7{gMXZKBu$5G#$cl|`%RD)D)3(y=``)?D@fBtoW|X<hQvDwdu$*^^8iXd0hpy~X)?
zaSRw8C<|XYD7WwdQ@>4^LoO`l7volvx&{MUik1>5W;*~*{=qpl9~tN8)dy)YIj316
zZzkzySChpKCsK~x0w470Q4j1+B@tVisKj4*<4f9+unZ-cjG?1qYxd*{NZ-LE3dT25
zIaVI3JOz15C~a3WO4z!xRUM?9T%VthGnP!|WLoAihW^s9jgbjiTF%%A#tsCN5b6KK
z=YQ+v#6zF|pFgWUOTGW8J^FupJpX^E_Mh3No^$boq#O%S9LwI$=JAg7@YW524}z|0
zpkqO|&2HH$6|q`0^#>gois$xB#R1F`29Upm#KNn==qG+y4E<U)zE@WCPbb4V%rps8
zCYxJq4#X^7Um&Yo`DB(tQ=4~`MvMPzH>qwPj1q%Ucxh}NFyi{QlS~kUo?+$`!t}K+
zkxVd07o}_jC-+aaIjyY3*J!C*2z5z1=&i3GzHaVo3zy_A=_1V9!7#cE9I5&)3H_sL
z>cUy9#D`M`Ty-i<LVdNz2fTMnXYobpMfa5#?e-(@!W&*W&||#&I6OEy!CRN2S&ln|
z%>#(~8TSN!HqLbKZ1uwo=t<nRq~<Faq^2vrIf52~fovds;giUn(h>#~=N&z5N2F9U
zBAEkc6zw!O=mx_l=yoa?mh7w3#d)$<uFNm0ZijfJYr%Y+{yuWsAjlq^|0d>JV_<{r
zAzs?Sn%(!V-*BeVdR%m=k)_U+L(NDJ{no{67j|4)V9+(o>cN<c1Tq-3md88V@`3Lq
z^w-$>`luU@?{{vr$ApG8(Y8q(^h{m3+{8v+Y15$IfQKPtXZ8l^!V=9G%@ePQf2>wI
zi)chH=0W@qLI*3w9Yu>aD!Wnh%8jBy*?hR^aBd0v#TPg?@&@O|fC5vqMH2;FM$K4B
z;(d{4JoQ<v{5a1^ut?BJMKufW)k?L*|Ebawn}nTVKGjYY|Ei}OyT^KX5!^iVl6YJP
zkL%!0>)^QMcA>;liu7klDe4gO*Jua5H9FV$yY0fz_07FV60=lidcz3y6YtJodxSZO
zH{nm^YGt{k|N0y&{6C4@97+_qbwq9*T`{{Ik=u^QZHIC`OO|rcec@gBeZ<?F)6MN;
zJyR^(G&*=0Un<v%t7zu|Nbz7zTsay&)w8^udTvf5H>Y-%-M+|ee<>H%a&FG^+?;2b
z?U!q4k8s=Z*Y41>rOfulg!bzb$8y{8H!|AC(cfkKOLyp5J)?awvHjW|sO<T~nUU@9
z^xMv4269ZtHS*rv^{%|Ga#uuLWVaP?zTi_GnR4>^4(mHygDsT)Jo=x@2e<#IJ*(Dg
zN&AoIkMDmU^*`UU{l}}r)1P+s4tI`HCLm_cOjaPIAVU36oWwV86XJ8CpM30$Hijd4
zTw4ohUS}B$5MMMDG*W>0-*l4KAN52^Df3BK8WWUrOTtigKtf%7HZD45*7cZmGV~3q
zTX+ZcTKTWVDL1d8Zp3?6{*s!jb1r1%R<t~|6NY#yP%hvy{PU4}AlwVEjKIG`*j89f
z#hy84Yef|5hu-fae+V?E8o|4<cEDc#hG-^1U=+pk$^wuuCI&n26pi*<tzg)u$9!Bq
zgvu_5Rj^tJfd<?QPe5s4@jC1MwL9!%b*s}w(Zfg%eb~N+6d(dR1e3`(#oaM2EDTor
zbV!jHMtG(`A3wk(TAm5VXee~$X$#I9v4Jm~S5lSuK`&ntDps_H140@X$236pWY~P!
zbuYq=m@;}AOI94yHCbs1{wFJy1bl2f!TB!1(Uz#$nk$2z_MpFMGEo?xNMPS2E$wOi
zBP{`A=cM=u{3oX`g9;v)z6{FExLm#mndSFGCv1B||H^A;_ok3(cDHryhu;1OcB<j{
zV6Y}rDey|)?XIlwWHx;{@y%c$hp<sntvx9jis^}~1pNo5S4qrj$+3=nOKh*1D6v$w
zh-PUkR&;po!8Cdn=Rm?WXBrp8m^W?Q-Y-V((1nr8c-uDp>837G7?5P>^%Wty9SThY
z9fqwkn%pX_Q-cnppdH}iy8^7@I&6FAqYKePp{8}|wLYka*P#oi3oV$mT*z!Ig!Cx(
zjzOl<sD)<Tpl4|zk~J~nlg&CZu5H%snY1m%l;zFTh71ZgguQ%*Jr~&pqSV@MZ@nJP
zej4h+2V(noT8~CIzJc{fvq2B|yC}R)otHd%vvjeOTMP#GXvWa)<+j2;gumP+5ZwTo
zqUIM>uxB@Ct1UA!lPZaIeRY5_WU&;@0^q$LbatfUhmJplzIgl<rm#=y3Wa!J7}7pA
z29RpPl`jN=f!!7=`A?I-y?#3lx-U>;eFF>;&VQk#naO}vkcsxn3ZIPD5d6lx80-=#
zG(RmQ&7}-eD}`-70PV5O6iwZW5ymXrZ7cJ_99t@LMP@KtF2EK|#w9u16OrMZkLrAM
zfEQlEL8b0CK?26<CKGk@y7zY;A42zmL3a($fzXdkiR8F>E$&G;_;z?1}omzN}
zZuCov4_hkrYynAfVj_EDzDZ7u>l{rnO|g{x#p??rd3`=hGwdilhv6q%X-{h<?=&&X
zVB+&y=dR7nFqpWN)L=f}ZesBP>Q|aqdgu+T6XkdB%I!MEuQb)SYxp5&|6y9b13^q~
z7-__0tA_r27ST`p6A;$XRQzo{lLeZ0n70#6QGq_2WSAVr11bd6sz@Lv(No$6?4`x<
zpGav>vE~<kq?+Ea`iXNnWyEP2G&%0%(eMk2q4&d6Qtfb$d4BkV`JJcR$(`@w-`1<f
zoEv@CbDpyrr^w*9LHS_}>NW_U-mwQ-Uk1ecEUk%HzR$`^9`e5a%9@O!%te(qdlS!G
zx1uTyX<~!Zj~TMji|#fQ-Nb+;p~EGsCHpvnm#Q;2GyQmAH7r|EO{+rXjgahlkbs+~
z`^Ws6a(DlvB%U~Nq0TVqqZ5p~pdxuT%EyN+35Ck^lUqwZO)#&T9$n!-`v1?p|F>dg
zJox*M<$5~)OZD;n$K(C~cl!Rrdgw9vBERPjCfwpLE|wqo4-OW)=U#ek*r)`a2fsu!
z%H-j#s4Ul)pEqi?TD?~LXRXnAR(+;(|1By%E?1u|SAVSj*jNUx@XvoX>V<-~IhKZ+
z*vWUbx@o8Rsr**jQvdsNNqn+CbXM8Q!!N4JB}?Kh=Bkf!{8hH{$}*FxVh(|$cC0Td
zX630*I%(@M;octkuuqdUT-6#>6(r^wwOsmVX$gP)v$V`Vmf**a{NqO~L*?ODl`H&M
zYQS&&wG6+0<RA4?1Ao*@&+!M<c#btF^?B);tO04Y(lh*3Lqoi0+yP3WI#5oQ{3m4B
zYWxez*7;Yxw9H`Yn281+Ux1(r+T3nsMgMe0{m{SYdu^fh;Zl}EGVl8lFKkpP$28BO
z00pVgjd*~yI{dq^P?XQ0q`hDT<?ZIV+xp;O40NUA-*A{H{WG?;?(On~8pZqGwIy<*
z#L4a+6N&gHnJPnH<~1OKLFKbLTd(TQN)MZ=&<8H`0SYzXA2lGM8ZmmPsn9=r_kqnK
zQ61I8IB>gdv!uLZxV{AD=;$u8IJ~CNj*T>dWw(92{gg+6wCK%4HfB@y!mzS5+?r97
zf^8``)+yAa-Mn-hLp7Q$H;mRa==lu?S$eU&VRtL`HLqa3Blu^rEvV5wqi$?UuFR`6
z{ZV)t`s@FBV?E2j-Pho|gku(7`@(z8AuaQ;GMNzWM`S}{x$WJFO~``>IZiXNY}4U7
zQr{YsLn2&n#8gPR?XXJk&3FxRYp??cBfM5ucDPy_2*qnqd^K4dv1ONzNgUh%u-3o;
zo*(Q7bd-%TZk-Pcwc<Y-t46w4jfeHlU-)olW!CJ>TAYjH`G7pytLn6laJ<@vSB<rr
zgz)g%9XMXK<kj@6JeYnO3;w%~UyxrE{}B%TtQFVR8g*SbMKrKFB6gjF&fC`&4OK9P
zpCiE*&0hJ@`noFQp?$OJ;U^p8G@zKvg~m%2vhQOM?_?7c9F?n??pxD561}!6OKY!s
zwqShK&TPl(LO1tW{)<l}GkWPqaPszG$+XS)lr}jVU7-0QoApUWYO@s4w(64`t4)Vg
zo2@@HG*+Lj+cWfMYGX6us5UbiC+sa5Y?m}73B&s@vl-A0hU}Nz2-erDh9h1(kDk1}
z^7<4l)v3j=8N6#uN`fM7(bmE1!`;0dP4Vhw!eKz#K@3}sp*HspHcv>^6)&6Wnf-Jw
zem9}&-lF18A3bYqb<ttbG5LzYx5PU~Cp&MOn@2~RzfLQ=e{l4Ab1zlaQkp@rlcU|u
z{a1TC$0d>2VJGU)Qf1u;sKXq<Hpi8mZ3NkcGloDfL1^wYonCa&3@vLu|L_MLUs(V1
z@b5pWwdJK${I}<j^r-)N)c<_9?>~084vr6h+Bw=uz5g(CQtv;uf}xlB(1YJ?=<O`!
z=yBqic)ezMoih&y2~SBIPqfhOWOx>oS;}r<$~h)=q)XVd8TqR@9<!30wV2p>qlc-o
z$zINH=*=lzm=0`;^Jlw#GN<uR&;K)mU_TiDUs`TFuO{^WwdLAl{P#!x|DEdpldCD;
z3)D>S|2XH(Q95n!)C?QC<_Bp{P;YjwK>W2*UI(MlODFI42cu})ce}xbmHukzxA)v}
zFtUrkpm@u#hQVlHr60L%Kj2I&eLHA<_{DEucs3ZcNXaNoa9;#B6D~)j^J}-?8uQK<
z-rWbb3B|EhO1#I#dnug%mR7AizNJ9x4s`8Ntu(aSNd;4B`y;GZt5z*dkGa{vy2Rs?
z4rv2K*+rW@QI!e3Ef3Fo=FvGL`l*V1O~8zz<ya%t!x{38Jnmo7Y#K0(YA27!ENX%$
zLDOzhk0*yXkHcSDUQOwotN>8l0XU(~S1bRF^+`$nA}Zt;7W`Pn_Q~+Ooq>&>7J(|U
z2=mTAqi9xPWN5c0;lK<Zo<w5R$p5UBS9(V2g2yBY^3f8CceSeRUj#h>#V<L~3k+Tq
z&C`!m@^WT+t&)?>#Z~7r=ajr|OKLV$pkzmaRH~_0O-OgWwko<((GS1kONDq$F$K^<
zh!GX5l=Yk?wT;HiQzx``Id}TC>z$SX7v_33tBiYtnqQ}dL5x&UIk`~fEGyn)3IeK!
z0ZwJq_akz$b`kW^pw=vI#mzD^ey@-(=%9CU9Wcz$Nkt|_-#t|ep7y#Lr2^}<D3Tle
zhX!P@G99_5{Ii7&O4j)O4OiB0Ys-k)-^>_$pDdU5W|+t=44Wwygd?0yB<IZ=Ka}2w
z!gs&T*B5GM-197__=yO9*#N4zLY?mVdQJQbx#zN2L9!==)QULJ8NtU$fc`QXfW!P~
zp(2xW;4qV=W?&FmEW)=+d@iJasb%IVeYVQ{oOtErq^~w6t|O(hi?v+4|Bh9TnYTB_
z`id!F0@)cWt(Zl8rZ2+5s504+mnvmgKS#M?p7gxLOrCI5Pd1t7Cd!@$tsTrl*5~wD
z)ZDI()22j6`us@ze?wBys(nB=#=s}bf@03FeMl*rRJ((K5idX@ybRhIhN~wV;=~Mq
z5h+{8lPAJ8exI}cTpr`yd8by6FbLuKd_?L@=s+y93JPBlx3*z1wyf4HC*y8rD)vCP
zF9T#mrE_|SfX!!)J3tAZeB(>`gH8c3cwzYZKf%Z7009mO18+#-O|e)C5+)kH`oj!#
zCZKA&XSub<@kZFMe}2%B<hPA~u5H8!apTY;=lW+ltcn>g#i*U=wc}dRMpD`5b}*7+
zo>6pD+9nH@vv)?&Br$LlvtuEfOjtjOZNCTHqQTxh)a|$gA?>Tp9NSR5QEF@`Z+w$V
z)i%`DWR{B2qRgp{Bph<;?Vki9dQIMjut4kr#Tcnm<XAJ34$K@mG#6w!^|KcLtQTPu
zQ{Wvegt?fU2QRs1u;b{lA8@O}+y5@(OIXiM@olNs>11i^)aZzzrHf}4w8x*0+>-8~
zz9*9tD0&kEe=5@pIi?=pxJ+wS!hRxpaeJ`l(GkZwAY?q>`WUa1GJJ=|w+P3of_j(z
zaX!465626w8!8F^v_6(83BL)zja{cqWw1^t>mUpQFxD`*9fi*dq58<{aPV>3A6(#L
zPLFe(YN@WyZ0S1HxK8Bu>rPqKszI%i`lwF=QO^TWE!AQW=z1+<a`eQgc}>lz8Guqv
zidtGEm#YrBXt_?lHN%AOOc7?4LmbI+s_~)H(!vk1wg%`%f0y$u4&0aVr_2SyyaFNl
z%d}W50fQPh^fzQLwB&Dw!CzFWY?RXZy2+a)<g!s!f(}j4M$5X*aYoDI#43_lp=nY}
zCb2}#g`UVsFH!w25VhR69@P;xB@!5`RS7#;l0mwdu{maC%AGO;yd3LL5qL<UAf+H7
zT%CyV8P#vo*o*4p)$Ngo4pBjzU~SaON+Ln}Lho(%{azwmw0f-wQZf1Rz$c6-B)<k3
zvUzf}x%L03)V_NqbKutrG|urUC~prsEHm+EJSCK?CxlC=&tnRSK_#f^bGO^YGo=i<
z^`yr?ScL;~r(6S}P}Fh<Fn&*b@Wvq3da#vySMt3+g?DO=!ovXezOnNLaIpoyK2V}z
zmXX5P#n3%}8OJP&J+Zst2lUl5q~nGwBERRwPv3(NcIa(>(JBlN@@R_jw_X#g;Raa&
zdc#*t>Wo6s!^>r}0GT+IH`u|!f~Zxb3tlK`Dm0<XUbIc^kr_xy%#j-LX_y?`9AqJQ
zj&`*o=G01#$iSlJHPBh7coKXtT=fPCGc=YI-O-IYcfe0m?4m;)hupM++^R6wW;Gp-
zE~kzaIa}-aG>YCusC&{~`J6wgc%C|L!8ydS++3QfQ4yM7WE5z-a$c^gIKGCTj#Rur
zS$^Kuflypwa=PG<KTz1Cig$<hYYYu-nePeh-zGGKS1anprhRU}Nu?FJK<9L8p^+2*
zK8zY_BgPM>!EQe;x_fwY(jk}rd%LfGI;oOf%;xJCyE|wSqodWLPha(4t!Ntd<iXh9
zefjeAcqaj)b_)o{hdW!Rdz(iFh+2vMH&gY)gX3NF^KVoy6DCUXAO5>)0Lz3{%R`AI
zk;#3j<shRrGdgScLq=^1GAxv&kntKf2Vh_f@a56L{z-Flf4lhtcA8vLd5&6R^;fcg
zQz2#%&W{9)+)6*Y1ISF|3Hkp#V0o~9+TGvX-#I=miDyd+O|jmKaK5|$N<vFyRI`(7
z49DVatX-1j?`Etw*-(%3(sgFD(VNvi&w4p(TjrTPSLTzl%4}A4Gg{ac652LnigLFi
zmqjlni8VP6>%zJe864i;h803AIkrG`B)ZC{_D&#k5B9#56}AM`Hq@6s9XGW2FdBf+
z+&o9&%#p3o&~gEEF?{VMY4pnFZa}T^W-p!F$v|pkR_a{(>+30D#XP@43fs{bl~Je-
zY>VeG3I;5V>P;Sx^eUuGqM38@_HH*sO6RPmE`75p1vP$F!aEqsU1}D})IAcoii-}`
zXW`3Ck0m25@2;=oFGH?ZLS3RmD=S*Tww7@U*l@y<fuE8w0NJX<N~-Bt(w?Q|RP@Bz
z8n&cB+VET6aEJ%ix!pbxstWRm@?1j4ixR)%o?}d)_x(8~$ayz@q?O#CQ!{7^mHO0^
z3_ih1qnuiJy^{yf?_YVtP(rav8A54UIJvOrvup8d7AefeVN#8v%s1FM*Q-5e60d-B
z%^@CZ$e_V_^Z&dt{ereV?ZklMLpfe~0}89-gudT*fnM<F12vncgO&KIL3WS=Wt#U4
z=R?o^u$onUJGk!W04V1|z++Rptu)<PU|+>B<CE6}aIG%t(n(p4CmG}lSzxz}uEy+x
zDo=vm3$8rrwi1VjXH1`cnxRX)tcH%_hG3@jv2{Wl#IF{S?@>#xVt!fBH)H6u?B%&V
z`{Fo*l_oFftY;{e&4J{A`nal%K>8Ra^oiV+N&?N=H}@R;3P$J)f_?r%#@@k+qc^vr
zZIU4OG#a{7t5&Ov4f{UW&`6-euB^UWcx-zechrqmvU25R<4h|ofiqfEZ9lx{xa08#
z1Npu!OXFhVJhz8Ev}iK9Ky2cq*+7rdwt`^T#{EpjarKca3{Wr0adh<8Sc*fWx#)Dq
zv;|7V&iXq1c-b8n#ee=I5CAZ5zvBb1bc#<%ixTvsVbFaolleuSW<uv=?qu%Jnx6N(
zPQ=zwEBaBF1FffTY4cT4dk0$}ejOd0Y@X~G7pI2eh$u{dL#r6=chYmLS(@brLF_v?
z-QS+AT~>>w8aAp#L$d5KrcReOh*_iWwcnnEy-D4Fx%X@H->X?K(ems3`{3a9r0yKy
zVbJc-v@RTO?q@(utYn>o+MEe1;>ZJDF4uvzCUmsNne>@6vI%PPHEa&~Ymj(*BpXwa
z7Bf5XH>(%PD2dXX41;*d4_sI1khyHFb6nF!u_%jZ^%*<xbWHv$A&yC)(GbUJh~sRC
z><Cny+~a+0;Cby4Z(^jV!~1D@{~T{x2^J!r5cM2i`khF#kjxzkFffm1g%xv~fI%b^
zKgXStY$yxq*=-OtC)f8%Mgf!WtXS6{v@M&e?e%qAvu#`Vd`et=bGT>ro6WJIBgG(u
z-y875Gbu&8gMlI~3jkKGZ^K?q4()-MLzG(@+aR)l93BevcCVuMBx<MF(JWg>!cge5
z^<}Y_oDsuF`)zj!a^YYw4BXbG8XFDehI~7ZaT^|V-4;K+6K<$a7hxKrOWf9w+@6Nw
zEy4L!xHn#y9Jgi82J6DG-U7m7^FxtRx=uFKvTVT0?QtfQw@wm$l7up7c6~iTdFl4m
zBBhx296rzaJ=H*JPRy$5DB~#z_6%Ecn7K%B%MdGttVZaV04M>`F9ACj8f{uR;Apry
zWS>@aJ7jz+5cl<JwRoGBU~jr*TR<kzNJ(&G(D2muQZj2sDTAAt8<UZBCs{9dV;i`@
zBCrkG!%X$z_RitSPtB8^;}csWhIw13dwb27n_D|F>oDk!FPQyh=r%EBcW*BP%)#El
zQS&uW_R;QUM*fSvo&D`}^5)**Pn$VCJK1@Aa(cAWtZnD#)hFaNw)Ne|7M(QD$w_v|
z?`G-vs5}1R^ySMP_-FS-{6}bv#i<NQ(UIGV{3|`71&#zM@1!P7kgr{88?!_mk=IEw
zEHj5@HZ}D)8(m+kCD~$7@)I;1+&QAagp3Jq5M91@!w;-TJyjD+ZJ6n#AI|<^4;PNY
zhf*aHubDEfl=$yrQd=S!P+~z>wIdooQ^}(M7^y({mWIcud&^75+?jzR8P5t@o?f%t
zbo_Zq;!-Sz>cOc{Q4SZD<t80Yy-1$Qs0|qw+`fWmPs3B7C~ctyCUILo6ChL2v4G`V
z?oS75DNP7gTG5fA*vMAu=)38vF}a;srGz;N+DRtaWJ9^PzNI#9sH^gZy1Hr@=XI3o
zP)C_pa5-R%Gi6yAZ(^8LwSwMY6tNhob+GNxkbi(Mn4#Aqv0VkO(1@$$BCeK3Tx}xa
zYPUz6JgKOTXUvY<zZiAhA#iSxAzPl4>;>MH$d@-rM+($hwNfjZfZ<yq4=`t<22Ff0
zt`4ae!^k78sl*I*hs<&msKt6IW-T-Zlt%j)@aBVuer>uz@PT0}bPM+aHr<CnYFVVl
zk1q<$QrYTgEOJ{KLp?4THcC8w@Wx81`<1A}SXK)Ca&_@s8u@i{0i-7;rEL>JYwV=Y
zs)tecCDofLwI=YvH_ZCx)4Z?+vS8!^Q}-`ubW+u3nQrOY1Gn#W0WP}9dZUV9n{R%8
z5!~>j#(B}De^s$KN6H|)M;Q+l=Rx4n#p7gWj`;MP6h)y7UJykmLC`(%2hQANy*W>_
zBmLD$K3-J7xQpctATT)$03W~Ur-^|SJtr+R<HF}m_MBK7Nl{-UYSm0%qm}NXc;M<O
zCJZ~cQNvb=&Rn97>am64bM-TgWLesHO*$_w@E=vw$Z=>sRRhLDQ<@Qtc2(0T;LP#M
z13JOw&Jgn<J|@%SU$o;<rKwXi0}>?R+cNM|aMjqd=PR#=o$ffdXPu$vnX}XNfpen5
z0?z*7r9XJzC$lygX$<=AcEbRJ>v?omX|v^@(jL5@3sJZ9ehwK_aq0k3(>>G3Bm}A7
zRx((ThO&j>&wwIvi6;n`s%g^Y+FO!<L4Hcmur2UuO$&65+L23F_cVV21vlvhKnfm#
zx}H)jB+R@NtQz0yLP)C6CQpAtPd6)p+thnY?cMwSEwpuW#i_$(J(`-Fna4}j?l|H*
zXz$+lP4(SWZQWe<nlld-55Mq|N1F#;eQAq`nU)<90tQaC0eq_qF!|DMGC?LxSMI(5
z&1SO^2J0>`Qtan))7F`1JQtWgBGV4&?nAPkD9q6JueCp!s)UDGB1Z0R<n_q!>ziZT
z{fG38wy<xG{f{1-^5ab?$dIF+ow#!YWQ*~9xlcsHW}muohvcIPD#8-ECZBDu7nr6|
z_^mAU65_g~l+*)@RepSc&u53OlHHS)S^75oh9u<qIKvb|=Xf#_1KjL(gKMw735SQ!
zao$jPm^Hj)i-BO?M$ystwswyAkG`L`GZ>XHFGhEAV3b76HR7)2jJ~`v<)owHi$t)s
z_<d3|!RZtOPhPr1F%Q45-km8+el$cwriX@TF~k*VXlCaFwZSL_)A@>_hUZ%lb7$hi
zdJdGH-)_ThpM-<g9Mjts3!)VbyX-z;gn9zmx?}f3o~86rewMz(u&>?j2jRdDeL)Xn
zTyo&7x7&|0CiMikIhcak6^=Ta<6C6H*4tlW*s$;SJ%E}C_Xr=4PCqSX*4>1<hV_&_
zN@f<`g2J(}*yOUWpe);|;Y%V?Yfw2{qnm>Pa2VdULC$fQ5RnA=v~BSUfj*HinS3L$
zhS+&lt(KPM#TTWAm%+8=tQ+7~yc@42_09R-mU=;JuJ9P{=3f5aE^qA`ZG!*L<yx(g
z^#4{{u0HyId-VVI{rdlWbBb5C`@0AG&F!7n2PrS0>3oBwT?|H-6e-Mh9miXNou&`d
zRx-zxG5o3Fi-w_<61^Jvizer`b2*In;+t1{8BW`{?rT8JFMhurTx<WNN%?O@x?>X>
z4l8Jz2QU76XX`|UTbGU;wD)@svPxR%4gD5_#botmD6%;t+c}4hT+P|Gwp&GHmip&a
z<Hdtigv-k0<ASVC=;}^c>lg-V7}_yuBUMuN9;LXvX{l1u791tR!lGdM?V^U&Ri<jz
zQaP65#FyQ1y0X5&RJHB%WXVW1J$$6#Sa+8SmaQBrrBg`<VgY02v%1WXbE+|$W#PkR
z6cW^-jZ^jL^hGVFTfMb(BBu9&x^EeZ7@Z;IRBZPp`_79<-DH?S=|PMi4PvYINiKu(
z9n>0y<m0eBzqHzIt?8&pgxw$tRcv#A7>z(wq~58U7&D~2nI9Wjm|@VZB8dmz89-9^
z&lyE*&FyMt3G3{fT_mN2$|#n2XJC7DN*A{+NV4HgGGxf8p{<5iV>gpuATCm>-3hkv
z^d@FCr%Il&@We`)=Dv9~CcV^I{ZjaeJ%WncxdhErS_tvKsdVv4nVso-$0<j5cKgLU
zX5wrVZ<z7!YHyjghts4QCc}?*qQmnkElsw6>$HIKc$oTb&wsHL`32&CHX4nkWc<(i
z@}vLn$MfHJ>i;|YOlBP4a(!CUK54v=;s*z&!?(Ksgl|FX+lDEi6h39#BBe81E!e(X
z$rC|iTR&@4NeAed2M%5*Duje=pk!1k%17<^uG$SFRqx3Y14=RJ1`IvtvYR(ur?FjW
z4TCUr4A2=4c=;IC3PSb`mITobBFE~RdLtm+ot>jYJz2Mi!5u4AVLu`=JTwFFdIgQb
z&_7vR%?zv7<F^LV%p-owCGlR~$ao=Ox@$~&zAX`AjAtVDi!%+@e6x@Pubc(&vwHSz
zVvR3O3?i^*a^09n(y$VV7_?{-NQxrs@gt5W;;IporZcFcwY}I_7u6z?i%M9-czcl(
zk2QXlh2<nNu=F@teVkogzriYsm=dLvsT+Y0IQgTfx=<$f+F3mSQB;@C$hZ7gjV;hg
zh^kZ2827xG16mcdU;~@D(YFfg`03z_86eghGU8lL6(468ALkUmsbihGDvnU;p9O|R
zUt@KcYY^7B;HpX*tE90``lw4u=)3^`MXrz>{8=_~i{$UQDjSj7e)YiF@JONa<m2Ga
zj{@lzkpI3!{I6%r&r|Wg>dznLzeoA+`_=!GTKL80-d;-cZ{|#h|04ARN>M65^?q!{
zI*IkPte=&-VL=c3mFiVjb;vof(pOf=C$XW+Bm-^MoZ_ob7J0-c(Ub31daFsnff=&p
zz&_T#@jvqd+aoZtVW@|{>}saGYX$d0Aw$V0_3dnp9&7UrDMjP}LTM!tI*f!COGiDg
zd&>J-qJkw^Nnf>uvslZ_{V`Al@Ut{6CP92f{vWg!H>=Cd!(bS}VFbfd)K7v2h}Jg`
z%l{kITBDXc|5s~|@gKj+pTrpxq~gNj{JeNSa4%#M%sTzs5Rw!`2WUa&A@AkfLV@@+
z)$3yHTjHr8LqurC3pL$-BsM|VY+b^M#0$l_2ipp?C;_4ADnlw2@b`s(g;sn1fK2!B
zx>F2f`(xM((B4Dalw@`YSnhkG>&;OVoCGcS4B1)-c9GryYM%RDKN=s81_)TyZ<%?p
z#ZYFUa5_N9`2Gd~G%9uR4@MYp84N!diZxeU4!zF$oZJSe?MgJd>9{?=JErXb7Ec-K
zE^=G4W#kB9<udAZ=Qd;~WV2-B8m=w68x`0Ey%5csJP>oy_@@o~)O884*m9-)^jKVD
z_=s~T(;cHr1iacA_ky8E$i<R9Ph6teF+OL*`v=^-+ztj_C)>!r+Xd)lXw$#whn0fb
z=E4XzH05NWC(g%$eY)L#$cmQaaIG0_A%9fUhbX3j8`E}5S`k2mI6dCof7LwPJUVF}
zyln1ozTP=L+}zq}62pX{wT1!G<4gqsBS?&1Um+GmgTCrSN&E&30uDqgh4<LMJk88|
z;MtUK6Is{0*#>C>{{uQ{k6LJ)4n#y$2AKl(<a2x)&5Ot9MT1R{9lYQA;Cs+KFByBq
z2xG|rvcxWkZGD(`x6|>k8Nw#__I*p)YFUk<KZuY665tg(U|Irx8p42ek4Yxd@i*9$
zEzSOw3vwEq6Dz6{iF$sr8oR?Y>H(fP*wi&%u$HKh8XWbXa4v30pEE8Bg=GYdF08NI
zE?y8x`A_!3tU0EdH%3ipa_mKRvu$&FaD_=km7%-8jV|1Xf~6Ypdl=ZK9n)%^mZ$Wc
zqPl}dr~m-3u#g-Oi%_!%lLl2`3E>B%k3!&>B?08*wS17i#!A@qP!MJQ_P+CjxcQ%x
zG5tVc+v+ihudSdv>h;Tf3mlqjhUJkL-Jta$lpo@pz6ymkes4jN<N6%*Y~;e4gyiAI
zT6DP~$F{ZzzvJ%)f8$@WWP5{`2DdI>@UffZqLsMDi5reiMwgd~l8|JSQ}YJxwbM;q
zdTy#drllJKPS;JA#I)R|;K&Q(TO<sDux&2uVILO{zp+%G4~|R@bW~{(c;_S-plNYw
zSZWp1S!7F`mG>fuqM(;olXLSbuR+p=YB(?Nh@a_aWX4TT$d=V}CRtXCxC@ME$Lwe%
zJCKoeuA9%z%&Tg($Gy#IF_D>UGc!A*x;6YllJfQgv<Kn$)KU(k$9r1th)6JqxsC7j
zp*+Wg3ReVnIk@J)2iybs)uoPh>GzO(g&!4h?2$<_A1Jr+%>2R)y^6I4yiUh&;R}ji
z{Lnj~&=T}4$;bgd2K@?jUZJ42)q07e%w(4n*OxwU(M$IVIUtU6fL1jwiXP~39%K|c
zFUuFPz*gNfILRD~1rmwe)}`;E$P|Z(s<f~i9CZgE!1y=7fGZajxyfU+2$?%C&h_4*
z``=(yKmtT8M}YK(@3bYKcWG`u_^|T<r)FZ{C{-|&uGcP*`(<M5JaRNHGSGn=l8jsh
z0FxuFXkCQoh9GkZeE$t?%61St_^uUOKyl#;X$hrRmvK=b#RrweNnTlzF;=KBb&Qi$
z+%?e|^;_JpA#GU^i0L5diJuV9KDFlExVU{PiAg#2#=suuF!QSg4F2M(XfB9i_1lo6
zfb1pF(`>22s(EY*^)wV<tBbGw#{B?<U8T!udoU?2+PM?cR4M#&iMp-~B9L+xzg;-@
zfC7D(6bZ2t;2RMn9~DW3<&{*=<XnXX7k|P=fKx?Flg|!PJm5J238dR5=>d8Gl6r~g
z;N0)K!?8&O;Rp$X#lt*eFBM>cUxM_AT%m_z>$xWzj#yfO$$1a*yCjED$z|?2J|>XU
zRuKoB$|4eAq+J5S1hNJpy#U}4tN7+pU|>Ff(Df!RW~jt+DXnMdLpzXz&<FvLp0)m_
zkxT{e?~0W{aP2fK`jHtjK9HuRUr?ZuCoAmFbuj9-xkJ}OKZ4<?hOrq(uYL(meQ|q5
zgEu(mIYp@l#mr;DAqYa7{r1ic>arqn0<tjs<DexMdfiUhZT}YV!yP;Vcn0SbJb+7=
zrgPAB4QUZIw}3{5Tapemh96+&<28%;_ke+NrF7HDmhGbut~B0lVu*xA^Yny7)p4)l
zg@YvH4M?es#E1L|%@FS=FAbCO(q;w6zjlfq+hd$=ARWl8V;oY{9q$F#$s!`L1shOf
zB%$jehL4+eD#rp3C9=fHM33cM<#bx*PU!a^(`}}zoL<Y!Z>7_CGJ=D8e|@e#w=rQ%
zOd+rWHrpLq0{@vZDQ3}I4boU<$x$Y=9%^`rp%fDREud4yWEo6LmQ!d?md%;$Jh`5#
zb0^}a;|A?!Y~WL5<@X@gsCy8TU`jHTXH6s6;L36N<Dh%x?e-7x;l0V7{eGDq<BJdq
zpyc$x988#0Zgyhr?ahrzT)!r$D;ok45Y*hj-N=BJuAY0+-S$w^As-X!18>_(EfkJ}
zo~JJiNq5xCDrjvKs6I1QDkxx}OcoL>lsmFE7Tly*|49=;!ZiyiiKb$}mE2hp0<ll`
zFv-bEBsT0s6ih>v#K^N?wKw)onL5??%Tc#`=wBt0Rgc_mG`E3AE;`jI#SuygzRT50
z!jBNGC<1oeFoKh#1eDqQn<KmVpV097GU&FwVIpgC7vMMu&0DoqPXd~SE@|;5_T~8J
zZM!r7?Y4q*KbdTG<QE+*1*FDy(0>{+Ew~^Sw|$oatNR~O5C<@;to<4dNjJ?ZRD3(z
z_u7?-J)^m$WyaD^2mJxGL%X-UtkfN>yn>~_Df(Nu5$hH6$Mc$q7bLIHgI$IMXAswu
zh%=d(Tz`9{Jmo%y@{737z4c8N8E5mN?kIm3gY1r^>C95r@b`mj2~OJQ;CbC7@4RhG
z)?)7EyeK_+o8O_gcoS_EM@uG7HM$ySPVO@*bGft3WoWjMv=A1a%Obyr)`*k3-i>b8
zce8xl`D~G3*3v9nBuhWc*lE?g?-y9HZ?#L;d@dfB`TR$;-z7^8^ezQ;;%$}eO`rfA
zgg>C)9!GlkrP~7>ER`tEhH16Oy~xM*i)?JaxD~cvJQ%j&cx6cylg8P;ElIU+M^o+F
z5mo!oLsglAj^?w<rsg%VWw9mir=@a4<w~y2*NTZmb#aGwK%}!W6-jsnPS!@)X}X%d
z^xP{{)u9?Il6&f47@R}LP+fQt1Rxy2IUPr!d}BSx?ed&bB3Sp!b9m26`UKWo@oj9+
zzhE6s#S)Wci~_pvioa;8O*IAU#u5cY+i&A`KJ@6G>q8&5hU-g@L^z-C8C^ugZ>&}n
z7$dermQi+W1Y0WsH6!<f7fMCzdzTGuw4EAmx-ePjk&^YRK0c2mr(tp9FauEW&}thJ
zHWSTDO&x0=l{0cC<_c4W9&)2294~4W)$?Um<n{xSp6n6WsM6)V8?<onQ4E5ahi*#E
zq1{nhFuJESp(|fRIw-JAWQ|bEFqpwzfK<R;+`(k211+yX4&agQ*JG`*ApB+@?+nq%
z{W$(P+z&t!1fsz&;o&Su7bAT%j-Wh)CMT-Z(GEg%5e`V$mk7kWA*~gyU_Q|33s<S8
z$BTvTwL1<~+XM&ItVDbPT1O(UlVnmc??1YUF1JV}Ne&sINAhqJxfd5G=eqhz-MGI%
z2!`6ZB7OokeuY$ycQ^8;A4rHCJJP8rQr3+H&ke_{3q+eqq?>C(JMsYO$u|XFUw)LV
ze9xum(K&Y?$=d}vD+b^KBNHz$&OJ6-37GMc@0?50Oae1RIUjE_LNrb3k)|1S(y*Qg
zqEC)_`XXq-*<sRc1o!5xp<1p~Duq@h?%BdRFvxk~vh!RxRmyuHLz}Bn&A0SSg*^|s
zz907B(Bkp!M2l=aq}o*BN}H&{p-N37qPOjNZJInf1*4+L>)~!OB#Q(<Usx)@9QMeX
zmT?7}C(fB-6cpDR?L|1cHfo2})qwCTUZo^6v|$IK{u*t2kn%JU<2GjM^xa76=!3p$
zqEw)D%YH`t>Kty68*s{iPh=kYGL&~zYBw`!0x@vNJu>K`BrX}r;u;-84uk$hcg$hM
zMnkkQV-cW$5Mkt_eiuD7VIra{M4ALNU*l%165Ak2TqsPi8XfrO(7ynPK?u-$ur5Yu
z*x_<u9yDAV!W1I8nY-X-7ijP`+75;M8w-?SYbWfHQBlad0UL1AL*izw{$twUh&3T-
zND`N<J*$XqI`^K_NCrbcB+|fLiqK9n#<emyk#XFEQJ-w7fP&7)xVx>Ei~m1+Z@S&a
zk*tgU$5Yg%x-CEgK!T(uO61-UNlEDDWCFCUHrrSQpg@!bRKZmR4%XIL=P~Ymz4IhD
zrW|U(Kn?Cbce_`cBCE1;h>VPkjEwk#dvQ7TgAXttQ@?)}c`&3t&?<}$7`^J+i_cgN
zA{w}K<bbkIU~lM%OBJA!7Aj{j@0NW0Y#R<j|M>}x&XxOUlvg|VYd_b4ZoAX$wDy}v
zNAK&pFRetWf-rPM5{Z1eUd6R0iOF3xGav}!8SVCHWBTBE1hi>h!)Jp#m<YdL!ytZC
z&!#o3o$n9nZVC9AFQxq7Szw1lX@^vqY-#dNCpzJR@FKJTlr0_=hwsmrn2Qq?mj1)x
ze&_VO^^Zd{L%SQ5ssc*yqvs{`au(1?kug0`J?;>R0TUQ!`PmyByHoY6gTFJrF>rEI
z`6>}1`SM6<k~efI>54d3D5m<(naV<CBr0JI@(u+5(EgKnKq0Ovk(C;$50Q(wGfL@V
z{qPNLkE)x(lEQhSUcxfw`d+F%j}}co42Haa;+~Ro`YV-AfMtDs#V;!TgLG5u9YHtD
zVSi?zY0(NrDlUw*?p&}?BI$@S&}=@_SL$xFs%X9EOzz+XD1DN19_4>m#@;1r*f)tx
z3$oO)F_fi_?fW4MKwLnS+UxcaY5|!w;t@<qt3-Ks!`H#&LyEdI$ZD3KbnOfy2(HLm
zko@HX;88UoU?fz4cXj6OogEm83~b3j4F9#00U{u{J~>vkJ4cTBGvzh%DoOS(l{xNG
z>~Zilw|<GrY62&j8_&gSPYW1@F283<II&hNbj$G=UN~m|Jv@WK1H;qI*znhHGoNt!
zu0hkd_EO5wjmP)fxYb9fsAz0o?Lpx4*^eMeIZ8kih2=&Vz3S{Cfq4O_hwPT=v_smr
zMUko6)!EoDyr*st_jq#)Jl+@NzTLPlh%;$9&>j}K-XQbO5CbCX=vw(&7q{v(!1jv5
zVdQPAOTMV)2V&Kht8cD7!1K@89i*jvqCLMWE+Q#*yc)xqeQaE(&8;|Ttw{tb4Jmx&
zgv1rFUVPcdI<rbO?IYI3hq_Ofb03-AwXO$M=!W$yA+xY6MOE6MkjmQ%I*X$RIIPIn
z{&Z(Y3e>HGnV%^4nh3v`c;vDQy&&sFD1CkyTO$|M7NlsY^k}{GXYl3!{qdg|{}cCp
z|NEKW|8S051#T|&<Tm-b@jsi}8{3=N_@Aw(f5w0QQSqNmIAD|M<u{yZkW#eW&>F8h
z%>hM`kPbk*1>XPplq&*bJe^|-A(?@u5$!mC_x#8?_Pq%0q}k9#@+1X0%Od#|$Cy{~
z;IOslG+#Qogp_YcT1KekqdY}d2as=+WJ%pdZ-L&HukV1h`90_L%i{W~O5x{9vsK9V
zRz>?SrHUB{X!$nVm!AszvV}PVZjRkw!)VVre5v{Z&p=s*hc6xL!b9Bn>4mCnYv`O+
zo$sNVmxU=ap|Y*pErm<SuEr%Dg?J0fK-Kwu!KaPPr_eKl&LK9E!cQA>TK#F@T)$k9
z;}bqTFw}?fI-;ZDIgAJwmlOcOqI#SX^51LF$od>afq$~LVYdZT9J{@A`@E^a(dM>J
z=)#rMTMh}pP18J+E@J7@emx==>U2I<tY^R4us`BB8ypvY0N?e=!@B6Z&EoHvK+w;!
zU&&Ds9zJeVoTAYJFEwgCH}(dl{jy*EwUYUCA3v@BYCd0wJ-eyEmba_Uc11Y;9IXmy
zkBTDJWI^jgh(cvT#ruKR)`u_d^Pz4L7mE7u^utS{iaO4*zG}6yWwnBZN&U7Lw7Pz&
z7v!3S!|bb9PNRxf6i|`SqowivoF<<I)XBikmc!M-AfHs7r}|svD_OPd!my_x{J%mB
zhd%MQ&#KOI=)D#LE(4_pl6CGTw!elUV_%z9`oI3>H6Kke<If1~5qMO>6OC%K`7lPx
z4*l`C+}NnRv;`nYuO^?#QPXT}V9|nQvvBb;iz!1YsNt;vhBzg6WfHA8*h>B@@6NN{
z!&->FWci1hU*U+Esll{QN!Aut0X6&%h2mTfmVavZAXkU!!zrAKm-Zn{7ve4~|I+S4
zwhGgObKs?LvZyAgB7#Czkw=@gMt*iv6>6uAS>V(4RK$-}3?c0P<yr%?pw@Xj%ll!E
zr=UZ68`;@8@&ig@F(~80ufhSgI`XeZ8FB3gboME$*HtWBd04u}?kulDi+JeH+`6Ps
zzTKyCdF0oiU}YUs99BS}nm&a$JCyfu<QL|<#_Hj}$5`eO^{mz)R3Q+<B|QsHj7L2&
zSaHxEC}d=OHP<>AYYnTOtwLlFatG_^*6xvh){$$_0n1eJxo0`U&SZ7R#9HLG52x(%
zhbJYo3&QK$GE8{Aep^1NRO+ip5F<ElIczVeEG!HS>;x4l{3_ThS5_%ZVSY<%#v6&6
zgLxhrJ9s^zzFgi+(U-Azg*ydovrbT%`z7qR&W__h4y;eGJ%6Q7&gwM%*v79_QDHNS
z3xQ7bCtTuJ`s>%nEGndqSLW+Is_%Yvz|%Q>&eTzLcCb8rSN?Tr`5kHq%d4ogSKPtM
z-13bb)g>Lswm|e|KH+>HRy)5|=JsmG4BmWJ{xvrN^FPzkb@K+LJ_C-bVG;P6n9KTf
z-r(kcDuI7A--UclpEh=Qo_DNINFZ0{PFCs@CW+OMUn%B?9mP?Z&yWWT+AIFdL@>@7
z7JY`az}HNR+^2JfH~-U21ZsZurO0BqEE}Ji`!Mxgeg^YFteF}LyKLGoh;vO>Q22cz
zC)rW0YPUAjUupaWAKr8Pxf<>}{%i#+jURWn(;>N(`F-*(Bg;8#g31gU-?&o~|0hmH
zv4i@9?b~+Geezt%+FeJ9(E=mMIj)}_Dnx>(0^d<#*q^aqWjLNR_{nBPRY2cC>7RdX
zIKOWG6Tg<f;Z*c%<DWcebh<-O2IU3;q)GZO?>h+b;9Iz{o<D?ww!6jLg7t^!U4*y{
z<@1LY`<;W9P$R7eYEoX}bK8UuuuS>9Kzvv=-PYasU~yflrrSz-1W0j-1&ln&+RPew
zn7r7?HJlv*%zXaq%m>4<KOIL<70>ue<!1vfk0BJ~1D7)nSZ4)q3cOQ3t6~vWOPk`!
z5*x!8Ofuj6Pgr$C-xuw&|7aIZrFUabhs?%xqumNxMcq+<iy6t$4;Y-JULpj|<43m^
z`=8^Zvq#^*%shx+mR$U;_vkoR8Z<Xmo*u-H&agCe@;J4yr1ax->9h6tWsxp_jum{L
zZH($JEd4lJ8i_QgY%N=MoyyYU6mJB&nbuBiczcD}M8$f6u8!UmuWvAzV%f^IRxQgM
zWGUsD^L-^{OQjM{{gLy%6gJV^v5w}>6^`wizn_JPPe}i!YEyVUS2H}BS98Ab<GI~<
ztb3u#*5;P2VOdk>Zh=8k%ARw*rp|?B)Ahc}4osJQh2t{ib_)t}xm_wqWNqmr@p+3a
zQtHgqXeE?iQjp@A3x<Qw&ni|ygh(zbh|g06F_^l@P6V_0i%p$+{M!u9s+mev!A-mA
zS<frat8q(_r;XR0#mX?<gr+(f7Zoln2`BQxapUt;!Etz1SP-96LF(nPdkcRR_5e9R
z#=mj`ug<GZs#1KW`MFYDmg}YYDy|n^<?6-aMfF-`sorKUF3;IUFfcw<soN`N5nWK#
zxH)H@s9^n<%}k}6MKfi}mhA->SLz%)0NdO54zfR#E#rcVo&N-m*>S$Fkd!Fv6^GdA
z=A&kXm8kFWF)3g4n@E-!!Z#e=PNVWPiMpT}mQg#bRFnjuD%E48BX<@JvpWis3ttJ+
zr9Al#^SzSrKle~3lba=F_>9Prb-sAF^)Z+`0%<AG5NKM<I~61>u+H3K$W>Gfq1&w*
zzk_~!*H@bP&!9W{|3pAgRqadXXY46HgN<o3p7qG|1u6-Ygt19=s^xjrspgkecV3TY
zlX+~3uhdwL_LTlW6|1B)vf?sTXS0H;DgE)Vf(k9V+0J2MdX;ZJ{e>=<ZhuZ8)dBvC
zdw{X(Ua<{@Us2Tz`C7cx;8nhx=?_$~bT5m`U?;J~;BIb<@T#E2)Ca0qrp3ZC)FLho
z8-uUvU`BzM4m9<Z*3B*DAeb|o^a@>QFcF!yrP5PS^p!?Tu}I3lu#Q$h+Svvt*^2Q@
zfv9y=hpr%{M$aJL^*!aEgD~iHTQ3g(B@LBVcv9h7@6ZIvNfz!3_iB1Y1wqn4qv*o4
z0El>f9qr5nXrw`1Rw8P%-go0fZp-&F+7CZ1H!G?b-8H95i3W0&DuyZ0GRhC9j^Enk
zQW$-d(~i`U2VPMje*%ZP;zINa3gNHf$RGJvKDt2>suNJm9mmeVixYTACINh2CBIkV
z0fb2mJmN~jE;x&XmTIZ<Ou<OafsSAa1`LYoN)BK;k6|M_Ccfbfo|)tixg*=EB8G7j
z71=jM%BepddspPpro2B|fw~fCVl3d}HxyLC$+D%Q+#KRXV;XiyZu9kJr#nD@vNE&)
zV;PU{sx}(X2wh?6C{u&Nwc+@#VqH<<ZS2IIvimt+jfhh-ImvMmja^fc93HsY{=D_i
zm)a|WoK(x~kea(EHLJj*JknMy4>?6sM2E)HioFCQ9H2!`G2hsr4SC=u?$6t}Gkw%o
z&D)<|%IsibK{wgF-7I6d%F8*2v3@wkTCog|k3Z6t@Gz(fOi}WPUuRT#>@Qgr%#Aks
zSzw)X53D{@jAPYG9p9vLPpWDZt%E@<4=Dg8TXoqTlV>1BsKs|d;@;w7p_?QTT@?LC
zIb0@F!sGHM9?vvIygSWXJjT~S0+EX`d972N)@@yQa>v0UP3IDdm2UGPuf2*Wc1`HQ
zr0z`XaeY8SIxEcfN8#9uKY%d1HQ~X|+-kUz6{^9Y_gpoc4U<6!Tnr7(R&#WrD)=ob
zs^BDarv7z^>|ZbJ$M@%|g7|t+Fc8Vdi5VepyrLeoe>gz!eMji&)b^Y@TW5|08czN^
zC|9@~6-$@n7es>j6<kO;9;sFC`*gW;DaQk{%3UCQA)Zee^@Ly<@R9ZH4`oY&aNx0c
zddDjmGwtolGtdu3n*$yAeB2yfmMnaUSISpyPrX%zf4=tm7(I%Df?t*3pC!#GlOq(c
zi!=0VxyVKg?{J`RP(3Ze7b9143meDo``(|9y#I6ke_wAjx^Rs3BA+5%#-9WJzdZlT
z_LFCgZ2p%g@Xw$AzyEUle{1g3f8cFctYM$#BkkKCb>VD0K|QL?|CSh-5f#|Y`UVCU
zV1n7IzE(rL2Olq>LJDS$t|%%Ns;Z)|8v%&xK-?P)ON&Pwq(HFt6!a2#9y%+L?)1Uq
z8Uuy!k3lSq^&r8jppAHho=)}kDakVix26Z{ta;X;*5tkqgU)o+kas7+{)wHV23yCD
zb7p}nIQ+Rm;vsOr=`3=tLe#uta~NQ{G!$K&GB(>6Eb_n2#&?)b=S4*h4MRa@<i2vG
z2ew#aQbGG6H1s3;2)Q>L1Lh`~PN8<se#s2mh}pEzGZ*>?dqhNWQse@DGrHq=RE#F}
z;Q*8{g6%BkDZonz#J0ejAqhz2LkhZt-gpl&Pl1Q}JJjlj@Ri#OX9)*Q1Ksfr6FWTw
z$8)3c9U|4+%%}saC#aPt6p)M!?UXLZJ_gZBjNeQ3w>2qlFpn#^w2zs22eUpi4~($+
zx|oM>Rw#Tcya^bBUgEUIUczY;7@407nab+(RtrOm#OZY>4>_*a#B`~$3|=2#z&vWp
z=w3-NJ2uC%Zgk`El7K_$(2pkbm{aD3`mdOh=8$HO2@KjI{rmkn{iC#Ceh2&*d-SWu
zOes@XI0l)=BL#baOw2y4q?vL+C$SgwC$T4AFJ{-F(Acr*Y-JuP`xrS@EWdC-`}kVH
zyR?Grv#MabK-(1b=A|-EAwLumu8a~v1x=&^;C)`-bWnoht8$|xzL4zBP-(@XxKSV+
ztB>;Qv{b@dSdO<>-3f;X4-#-^bQd@fOwbzca2k2MzO*fv3))$lcxv7-%OGb1c7g!C
zb1u6Sk2-dT$c1Fh#-KRjKqQ5P7*~*S?ZF~40Oq>p<}$67V-NWcJuj_Kpg3X)Wy=bT
z2M17E3=k)?;Sk6UWb{#{QVJQ<xT<;jwX0n;djpF5rltxe5vENddNVWkQbQ#K7FIlM
z3#tj}kWF2-#*l1g*gS>+v33)m#T3U3(m-l8Ck~NUrwD?IO<BcdzgBxQhK&!s8@ox>
z{j8Kq38xX+H*q6SspwFg&*3-b8QQ3&ZWWe|&i5ujF;{Hl4+g-`q9+`Lw|BVNq++^r
z-w2wr8>f1l?*Ya3(=P25itc}__{fF&|Nb!u+9L#d_?)o674`o{W8>-5jQ;=h$>yK=
zzyFZ`3EWnhw5@}%4-zW&k}iv)FuLn<{)NQ%;_}(~>Dl3Vr*+tN4u9G|x;QxOzBxZV
z?lzB(oX+{hA$(g<oj)w^A2<Jg*xf%pK5HEvo_AaA?%R`#{hxlSP!eEy-G1Nh93E4S
zCg-@>*?$Mk9QncQmh&k?uT(uh{nUMP)O_1^*8cJ_^XQXPJ8d|%tHv6BMTzv5S8o?9
z>ST$rAIglt(K_iIo}V<2x(BEG7srPuh)!~P;;g-%`Qw2JiB8FqHRtrr8+4+2;Iu&<
zB!nL&bcSR1Dn_E074peX@4G<iza8#(xV!fM7{U!km-*oIqH}i9=^nJs2^Qpiw|(*E
zP3tH4AYZn10!52#U2B&>QEH&uKD{{KKg9YzF`nGznp6B126|c0$h*$*5m58!^t|19
ze{|S=cOe5=+uYp97w<IRz5%+o^L>J{FguHAk5io+A{We8_x`XAHDb=-uUF^E>6+rA
z#dTu-grR>0hm|fS5-md`nuRaU&!J$7w8$b@uI>i~C11BrnlRC+k|kch_h6;kED$t6
zZ*U*BT3`6xeOb(^Yze#BCGX98$aoK&m08d9z@|0sj10dx0IDajdY#>ReZ8nB3%UB6
zGlM;~rF*wWL&OnwF$Bkp2t#st<AtAnN$bNfOg^J^>$5Luolj2n+xs<czM9weQpR4`
zhhnC*Ynay;WKcnpiMt6J)!-bgI5!Ub=@cHMUX<LypldQN#pSiy0kb#|0<$ZEj6;!%
z#rmL4g%kO_3|etlwI%hyiBhW;W%DlgO0wZ~7Zs7tO3mq}OPIvIFM91X^5_mdh9emU
z;)1>cKVhdoZk`;N8Rfr0gE%AugVBK&PC7E!`jYA>%k)xy>jv(15_w*C<YJ`XudyXB
zzIhLXAN}$8u6q`RLjoe}KHa{3x?P{bdH&n?V3=HQPJ?jZaeMjy{UJSA)PZl`gE@dJ
z_v^urr{}yb)uz4ZtJ0$7V+oY6v51PL7IoX+ss80_$(E!4`gQd+d-LUM>}&Vt%hp?_
zH$|LZ$aE#Wt#NcjACI?lHVicGbk%0#r1!MKI_9n#$9iLGv>#@6%k-jzx-+Y8z(>gZ
z6Q@uGTxq%%EYhneoK3r3!mGScu#%J{uu0e+q--s&V)3_2ddP684z<D)g;Tlgn+YYi
zQ^nWyZ}sHr_az^rWvD;=-AR3OkEW@+b3VRTN3vBpLamh<eh$CSQ<4@-|H_{}A^$V{
zKlbtZ;GOS9Zqy&$eWv%vQv2`r=Ckdr|HpP?<4^nVzuf<Wq;;AJ2pox_Zb#~wWUr@B
z5TDZZgIKb#B3ipi1Q~hOjg<ED$kfL0yx|S|Q4BG?LwH4b8tTrz8_(QviF^@E$_#=!
zWxRFvS-|V+*c)FX^rpC&#Mm1I`G_@8HF^se9CgaOiuqIB!8PJ757^_OguP+X47_V^
zjK**RPfk0$SSd2{G&_rYkhr+7JLcC?Y2Nt#SO|xpE#jAQ?MH~-YeS5pAIEj*qbs$b
z_tTXfI{c#Lg|nQNupWQt_gzY-<xi$Al$jb>Y4q`q6DVI*X%aYBGLSbM-%Y}3IzmW$
zon>@?M*bR5*SUvjy6~=7Va$}y+yt%PPI>>lb4H-{e7iX1=X{ZlxH2;4G7=q_C2<K}
z{<3dhl_DJ|mnhyBn{*!ZCHXLvn8hL=QBoIISW!2iakdM3VCjmWq&FT?`s9*s8q_c{
z={sd#2G%EsCEbi^g|16e<N0o!yyU;WyGmYG(33{_(kn;*Bt$@E@_r_KW<3FYgD!yH
zF>%&!x=wBju)+Hboo}r@Vv*S7>Xjby)Sm`~7ZitqICt<(FmW6V5giiv3ezKU;<ZId
z+J*I3CVcDT_7D3Nm?eK~r;q`XC3iNR1qH$74r+{qel$ZnkM=Smgj|AQFJU|b0m?2~
zTIi;+FCwLrkul`Z5PKT$T8SR*nxG((*%h4}UBqYS29^uu4<gTbD}OG*CHFa>Z6w%O
z;sYnX4A5i8sVRnFw=fckFe5>%l8I14AmEP)yIjF<f8{S|@&eA(kYeP?<B6o5K}<Oe
zLU8C_%>I;tdtGNuGL{&esI|J)E4hrvp4ok}iVJsIPHlD{>3h8CM~5B_3@Tm;)G3Pa
zs7k#xVLl%p9bME^wnqH(`*Z8hdY$2|a@|o=hv_lUQ5~Di3|>}jRl7Q*yUWApOzZhM
zp?D58CAX3|1S_Bq=1$4&<J-)X1g2Z^sZ2@WtZ~ptZc22L_0)`H3l~g?)x(u1gsM%=
z2X1c~v_$#qs8vka6hiTo;x3u!BsE*~oElj?VY;`4Q>DkjR3Wg~997D~8KH#WR8kar
zjk|Z6CX^g0!?)b1o+sT>J^iO~iBb7ml!M2W?8)wQI+ip#*f|@NE%L71Xh7*6!XY)K
zQcY>+(9BTMxl`WQc<+f$;h{UXv%`2`6^4Z?!(3E!8VG97_O<bY%t#l@R4k%o>T4~*
zjXc%fVVKtfx5%DVNP7tmG4v;;^5NDAO`;18CY&dnO`%0dy_B1CimM7sL_K=wai|yj
zm*UPN3MKEFW50*dSt57Oa9N*{C|-emSO~D>=FBkJHN@F6kY&lSlPcmB#mix6XUd6X
znJU)F$_z7;nQrdv2(~FE$&_CTr}HXgHq8#xk1eM&2AEfZk-!7*%))?$f=;<O1?1-(
zyS{jW<wUB|W-?1ittwf~b}6%rF>L`QLSk<;%*mmY5NgL#ME=t`39IO{?4nO#(eN%k
zrIF3+6Xt76Eqyk1V4`MhjYJ%;z#C633M;plUWR?@`;Qo=sAxl^;$h^81LV$-?KSCy
zm=<E<ClUb}lhk8+kOv)`#X)>r`)<*Q1zdV)56hB_6d)g444&ay%x=OwpUUIZ=ln9j
zrY+GFEDa6XDiVsExl!wg`mNOkN<rnviPjxCdm}sVU4v%G2_sBTcmb*2;Ae1X!R$^$
zy7WlQ<k=JF<^(N|lf5P-5R_}h`NRWZ89r2l;;cGNdjA@IaL4XdB_nEmO9K$$FtOA!
zerYwzt?@3Z+PO_3S;<<tty_Yc;Djl7H&cyT0(_FQ1&ea|M08JwKmnPpheH`SpK4Qi
zLPSH+*~sh`Q`9mIhdqp)V8~GtZm`m*p;TW?Kr)>|W|9M^?3y!3g)|@NG-t7*1U%Gt
z(_?05lHnF3r?BSLsB`)C@(Q(C!R)C_q1l+dg-*dZgdU}kb~&|Xg~h08-H%mTN@QNQ
z2gk5k;$_xHrgvytPSCM=9EKlaNruMaI^Cyv>#b-tRY^d=#^X~Q#~6<$=rTNu)QYBD
zo7jpr8EBJ+#MX}CsHI}Bv${!!<8xFX=Ct<`YDz(>S!mZWx%OKR5KN)I?7-GCVjrQ^
zB6kuy)-!4dAF?;PpMOJi6B*2OcVmf!pv0Fj(7lo#5uUpLYL4BQM5dOLp_xKB-g0VX
z(yqX6i2C#5+EtPCxj$cqOJ#(9Q9p!}Ob4=UlwPgVkQdU6cj^_%MTM78OP9GPvFy54
z3B6|*mY7eebRW$2ouw0%TZv5Mm&wlgbIzbu23m$y6&g*e4WCK){4}a+GM5&TIrY(E
zYUhd;5IwXb2$oMoQR~ILJFg^{O})J@jk5=zB_yVKyeEa#H_HiZm;5^P&9q&}?sLi+
z;;NULwr>z=L@+Zbg2GOoiz(aEnxO0mdEF^}Ho*cZ5r0dXUkMIyP<|S+GFCLF{yZ_F
zkTdhrS6(bGnJ$}=br?hOXNswBfB%Q8gz@N-(Hmt2?NEHrJuU+m-~Vkrd)COre{XJX
zJo_{L`(JwhM;viX$^FjoUay!HoP9TO$Kh4R4pPH{C`U%db`Fd7)qlW>eg&-s3=6xv
zHVmOK+B<f(Ha7nH5|OFHXeYv$HhOjA;ZbMDd9ktaa&7NF(A~jloj7Obr*F>>+wK2&
zwT`vzIknoJTtafFFlAlEqY6ej>K^hX<icJ$+T{T^p1%rKm%7K2=v3H`T>9Con#}C1
zV#y_d6Q+_OSJg3a!sTO$Y4-!q#e9!fUN3TG-(CMfvLR7Tbp&hSbdrJsNY^5K``7D5
z-nH*atef5o@mR$v{&wi!txffa0Fmh7P(5O7CVDnd&nUT~Ac_g>78U3Z3rh(06Bil4
z4uylq0PrZhsfsNyq6Dmz?FkcBr18uWxz!q~^O6uu<C9QsSv@mWs=_Lrjk1@b%Hlwf
zTB?rTo$7|m=0rFyxv|bvOTC|HJW1AJVA7+3>h~2U<Yn~*wpY%5jKiQrnd3VOX6GrQ
zY-&38hps_68k3TE3s;$Qxh}43Tw<3pY)Qwv$QvUZlw?Q8EUBbU6|qOv5mRXm^H<Xq
zv7r&c#gSuK>YF;vo3T~WKq)cYolFnqDDu!Vn_!)E=VWDJk2uA$<UB@-ymFlpu>9`Y
zC`qO}>+7(Z`cqCkOU|vpL+dx|@MwMP23IqXH0$nQytb#VT5B4=n3TTR^wqk%XAeR8
zo(hLo*=af3FY)Ry3}1q_7?$0i{eRk7Xw$e2IEaU_8Jc2s_)_OdpN5D*;ghWZ34>ON
zp&Y&~SO9&ooaYZt>)IOP>{m_xC!8>6f|e3c)n*Ey0h8KaLj@rjPk1@0?qRp^$e!YO
zEP=cl*v&cRS<Kmgt?*Qh%3wAD&H|kOzEUsAeRUjKw@_7;c%A(9hjgRlXF6%qFx6QK
zHCOrWL9Zr3@c6OGQB&qbrtqxMs91!unDDR4i9$8E&X#gLTeDc&GAo58D7UUQ1-{c(
zGDcb<E7!p8Q@uF-ZEd77;Fd7uZS$<9)88I=aX<1Ye$aZx8UG5!g)Y@@9UssiC6hMy
zRl<1?Vbj}P+g{tFOt$vxQR+1($1_)Ug-LkTP;fb{;j>E+03%#sxIfZpyKWCHPHvqq
zFZ2m!X9r%Ou}s}nApF!N`lE6|mF+lF$W^5#QlU9<!mc9dzmEFFsvA3O<pS1LItKMB
zUk?A`{I}cn|J#AzDt6J_7+Ex|E}a69MzD{pqgY-aLc?o&#MJN`N+cN%q793HN%yqf
zKHRK2%^!Yp%CH?*ht)wPm4x_SlOd?x#S<bg9r<_h#LO$ae1ZgJ+_C?dKT)b3Yn~eG
zFj33r35o$dv-MF9tl$dd7`psa2i0M}I{XrnVnT^xdg7be6*(TM-xkvpi&5DXshO`o
zPrnqO#>}WU{s({R!702RMpx_j7aZ3}A?xaz*7ih4GnLc+Ni%TSbEA(i37=>ds?@_E
ztc6qBMbH@jO;(WfXdQbbiwJ+URuN_QC|pMAc`u&xtPDK7orVE#*hUG;%GK351+@BN
zMu8Wo@-JqNY#}!{M4TB9XT*tIKV}=g*^#H=4eU}hecmr-^1hv@gU-q%!Ee?aA+G&0
zN%v(^4$f@FgvP@h{I{6U*+O|j2Z$L3JW-}A8BO*Ie&$A?yiH`8jDs>!x<aqN8Kt3F
zp)~wvQ5q8iET#09(YZ05OX<ecp`S~soh$fR6bA*j;Y`P~7#gwIUJ~M(`Di>YoRPiW
z%pWH;&?yO97<8YsZsl-JLsm+mRTg^OUjK`yuK`O%DZ5M&nMNqwXr$!%7tPkj{Mp)|
zN!VCETiJs7*`i6}a_Qv|UUfe<Ha`7Pi=we%mPKPDy)YUZ#Y<!9rf^1b^eb!!Adrh^
z174uC)&%YqPDQqqTkYirL;C<)vSby^+YnHn`o;O!)0@I2vAL^kPd$B`?(^+SpnRAj
zIf>N2**rowWftK#YYs6B49;M+N7G2p%CsJ7CHsL~QY0R_SJJ_)@^uyxrf?~qIebGI
zUKX#Sd@)xBFjr~D+%t~;ZN9q9P8Dcuq6`5}6rw8&tv+g1Y0lqly08_QFZ|V-Fv@(x
zYwnxu`qBJ(p;u0{d}fLYx}U34zj5P4ow&L@A6=Y}wY|ve`_m}wBkG%(C!=3C)<5d$
z`t+}wug1DHWBjOa-rxr;w;#NI)%k}QplK#~MY_<3@D{V)Iw#&u0{f?*h)tpLB})*g
zHg6FUMfsm)3$duEyL45hMdvS@OQ#5l_y~Vwk0k0$U+oX8{VzI(&U$m`fo~lAvE=ZX
zE4F|RP!2St3>mgoC&Ra;1!i=bYjfH}Q_JR3t5c@fQUQ%gr?lN^a|HoS7F15-;>@*1
zt-xA%+TJ0jNJUq56sE)S4wv9%{K)*ug}w`V@$}{5pRjf*Zwc36ew{ac%g@!F$iE5$
zf^@awlfFQeiV54MNEV=t*bkOSflQ%A>)#K&ZzqcVKuKahus2RWSh8<^k@+f~XT8EZ
ziFldCDqU!i$OykF@t}Hb5{gq6q4?VoFLCGa9lUVhzs1DM7Fs~O1kqZ9<eO0s>KE$4
zZx;37uLA0Qfn=V$Qt^%BE?uknVhgC|MtxebN&)tRUq03vpT4~k?3$(Qrq{AtyqLd$
zqH`B8zH!{8OE_O_2}OA!J3>_W>yIL;S>kSbjl0E*{0oRWcVW9Lt!ygguDG_5rn0_;
zrtsGvMN_lz-So<LS6uqpz5N{=w4+`){%RZhXztEF!o@vWy0>SFEtrXs>HD|d=tpL!
zAEmeY(O2bUSQ(?a`#a9uX!#D0#p3%C@Y`?qBeUO+(i{HQB_S+?(cE2rgiCj{bf3=@
zTaAQ&z@|SkyZ$J>?T<cZ@t=4uMqFyE8##bs52utgD&tM?c?|9O<2eisZyfxwgrRfA
z3a%S6<DkG8^i@elu6?;c$F*<A<IqXP<M6A^<M3y(R=ZtAi%iN6^ZA?)(?@}Q1;nOT
z4*gtA?xKRL5&Vj>C`NxQ&8ee8bo$+*I{p0zkUa`#!C;Od6JI&>bBQ6dpx{!nSJCcI
z-%uFejeEynB)@wRrEVCx<Kc}&%Eb4%JG)F@pD|C!-2(FRSG-K4?wiXrro+6;v=wJA
zE9Ty$#oELT{ON4WL!GbmWNV?|Hp17T50M<?0~q<SxN?1-i%(2&yYqZ};y3ixQNJwl
z@{!6M^~<6yY~=Mn#L6!&b^D58uToKV`7Yy~ugDr|42C=O@V@?Ss}DUL&YOc)Fm&kW
zGV1$O(6<WSi|!QK&qMrIksR8$>5T*W7Q_zb9oYQ%#65B7=Q3hL9S;5cY+`e`6#xCN
zB6xr&Izw<A)XD_UIY32`2<j_$GI8e%Hz(^|>ZtVJIHF6PBo_X7mBZ^_Z!==I4h`9J
zGR~>@@Ue>KEIN4Mz<*18tTKi2^Y0Nw6dk^u?^HBYzEjcAo<4D*iiRtCFWiT4{kfFu
zV~hQzw96DLKII7CM|aL2N6Wq;X1|b{{jW{VF)f%m-Zs3L_@5>IHibp!`$whRp-4id
z*6p`*wxM>Ew+;P~r!znQ+LRvn3rH<59r<@LvF8>3YzKe?uBfZXzqY662lfC$4dgWE
zb0*8(vm1qf^gc2S<#0{HnWLz_8bvih)B7X$E=!y0?dMXb6|gu+yl$RMc(Ls30zw%&
zrU08GWw3hzc>-PvTXp^FZP%~t!o$eze{6jENLQ$zoJ;tw`*GvbFCXFkC-0Y!mx*_q
ze5`#+J~;`w!lJG95*ay~d>p_-toJ^ouU@J+D?vA5w*SgA(l4HpzRXGg8&3*s+V_~0
zx2Z{S=FJMu2uuj5g~{E&)6`@&AyHBOFVKJx`m=BW>lcpxO><1}`5bemDzjK6r~&!X
z@=Xa>n4H^1l1*vI%3c7|r#tc>x51L8E1M+NTE!ZRic9k_Ef}rpn=f$RKnp?9N58|r
z)B^rOrR!g9N%tt#bZ$w@V@LkAy*odDie5uydUhUIOT1^lSlcsisQu60iN3Ws#OcTq
zj;08IEOdxnkzns;(T$({P7PV~k;lvqHgPPuhwn)2^>8n`jAxVX#}B(3M#z=n#p4eZ
zBkGio-&fr07sdLm6AkxDB~XCAET8CN{0EA$^?wJYlCui%gEo5$uZ#e(1pj^GNn<06
z|FFIFXa3KB7yNg_1gZIVF6gjP5K3NB%N-+{yAp8R<q~nJ^qPW@F#=X$jw6t`M*!ur
zr^Y5)S6x5V*iOsAyJZDtP&F6^)j7}w)p>vgRgMB-Xj*<&sbU25jfb$d1kUXQy!oYE
z&{beEgDlX$pbao!yY(yv5+n?ZR?!F$y)YctOQ(Tz`}}F`>9!N!#fdkG3;Kd6yc0q!
zM||Nv1#f!+2$H@y9ui>R5Gs0xc(Xp#6A7L<$RrG!A0SW>94Y&EgjjA2(^zG@eOO`K
z@Gu&1Sb;kj=m==SVz7hkv11(#o4r%2J>WF#5~)r>)DuJlp!-2I!#}Rps*kGcRU!aE
zGo-*ZD3!db7^}nPT}Lv%2~;>C8gb5W*&|M7lTPFUU0!weI2HIMYzB^gT*0cC=kh=b
z3XK#xj`+2Rh=c>B)ax9T&-ksZ=UH4BV#px@{GmT}6}@Jwc+p?QYgWQy`n5_vnpN_%
zq*+h%61tXZ*mGuk37YTM8TE+<!*Cg6S&r**geW|bx1NGssgEiQP?1K70EU~52N>e9
zILMza<B;lv6Cq`Ik)%q6v#XJy;Ce8^fkS@aE)N0Cq!B`QPl(lkmEIw2m(Qq>K|ewb
zMB!2)EF|I|Sj<Hk!w2EG|Bwu(t}s2%LT-zfyuxw44^p5OMzqa04^EG2`)hkt4LRO3
z)V4bVrUmxt8$?l`xk+IK6RN3VKGn2ONX9;|G?yb_*_StgHS~cYwdC?6R*A{QZVZ2^
zWc5a6raTdZGV#D40+EIe_ISU2advik-s!$8JrF+4JWfCFcA960M~CfpNuNJnxj5o=
z>3AK^Fy<BW4e|`A2L;kyWixoSrxuo3QSKla@9apmpqK1e@8qGgzHSSqD^IXorm;Up
zP_e;mGQp(Lz}bv4hGu{mR?Nu18sRmi0R;MpP6{vwNSIX1aOiT!FXz5J34<1k;{|1)
zDR1zyL`k4$6Xzo>u~sZ0(r}dBsLCU$%YM{mS-$Hu_=y|>TbKdUh$ic`JJ?5n7yYC;
zP!G2F!SQUY9>Cw9UY6khptM;Ng#E|)@dJbfB6PPzvF3Li6P<Z>TXCF^DhI*sOXt&^
zZ^>VL)o=A;Ko{}!M-?21{%E6K@D&0eWLio$h0qCQI0F@ay1BTQkEowWwFi`I8Cfy2
zax|<9qm?kspe2OjoCreHpXqO1hGZp)K)2;N0dxgb_&pLkYkO~x&*&hDu&y9<ZwL#B
z)_;G7S`cNg37!53oqwnqym99x2pZJ~$wGq5Vd`IlARWvok;>5pr8@~DVKHvN8xy2d
z2EEDv%&3(WBiee^B!l4xB-XUXl}5K^>4uU_<QN0V-gp!$C_>1!aG9lv6mlSD{a_R<
z8jGkPE`z$0&e6x6LLj*WDp;i&G1!Gwxp>oi3WpcfVYw%s1uNYrJa*aY+%XUqVRGT)
zpcZ&ntgUfU1CZkE^dM<gIGCPLkX@k-3>}MLB7-=rs+=wX!_H&Bd+q?L5mH0Lqi{Tk
zW&F_bev-jZ((+!3W8j<H+xZgOJO#xtPS%z71Y7)cS}4FBXORTgyu1g(`z@Pcirzvs
zEE1LYG>bq9-=cSE(OX!;2wn%(C)|Z)rDMfvqbOG_NrKCIPVvlkvhrvC6^ysBkt@%6
zD$I^hxB(Px0fn2#(w!l-H#}9(%<j;5qH8icL}O!vo_%`BTLt2BXg1JU5<c=+^31>t
z*(C<=lECPOQ=2i=$fZ1RH3)_-$-W(j*l=ej2@_O<x^y=128~x%DN}>vi2v*FUKgF~
z0)t;_l`rGD%}1(LpaCdV_F-B+q4;K|o?(1*8~a<{V8nF(qf%I#;WgSVzH@$qYAwiL
zY+y3|y-R5neyf9q(uMi%;-jQosXQ*FbBtz+4!l4Ytu?Tpy8ICHAPwjQr%ySr$~O6K
zt0`*URV+$5INlA0$cOzPL@2sx7{>$_Mv~Ji3Y%N`whkk4c8P4l!gus&4_G-<_AXy`
z=#FFWWxDw@mea26IwGj!2_AIQ659GsZNJhsHD&5uOrG7{`_bnVh4xIro{(4DFuj#+
z%<CBpeoogusYiC(s)yxDo}obPU_~UYA(=G^I*8FUzR3LcZ;t<(wf_lHJN(VDldIVP
z7u)|fHn*Q;@n5&Lw*Jij_-|<c%g(=J9<bglIL)Qk-jH+p6uquWCJTbGZgFmy^TX!R
z(fg`nfDg%+p>)`Kdw9}4Zg$RFKXuQJF5b3Il=b08<#Ch54V1Kvj3`sYMGa?Y;_0&%
zS!!`&H%dgI^1h+LY3SWxK8l`zvYI4pDit2<?s(J<LA`6Hu(k=86srYAkkJ)Y)he}>
z&de6;!X?r`WAEA{XAk#IG8!?o_7z&m`Xe+Mm5%Rz@S?l6Sc%xXwNZ779&b9eJ$gZ9
z=ntL!r?8%_;(E62dNxfxwxv*8%ow7U^5lil2ii`=m?RLV!w9$w-&O5=dnJ9au!-*n
zT&YPtK{7dISDN?%oJ$cv7W6%thaJ$ll@dKqUQxEg{&ZR@oz9Z!3?XoV`n{Dt3ulR(
zCWRe7_yHktvgeaZnTCcCNgg-|G3$vSuc8?(*15w<)Gq9>lM;QtgP`&gH-QO9s{Wt<
z{r~8l^Z)$s{}*WjkFG&=662?O`>0&v9ZH^?NcLB$C$E%gytZ*Gf@uRd2>!p3d$#B7
z?d-5=|GTR5W%YAi&{5!L{Hg<6?st_^xm^A>)qeF_A63fVIoB2VAIt-7_Il#}FW>OL
zX#YQ%je$Mx4sWOTxc*zV|37OqHnaDCTbob*-2eam_J3>sb%s+ra00WL1ze_P;h&e(
zZ3UXiW)Y^w(s=Boc<-ZMfK+}uB}p49`_rX2L1EhW2IL*70ln2WNoyasenhQ=?GDro
zUB-<?gEv=Br-2ujD^=$oUKHZL`|>&d-a?>hKe>~<UFTFA{2|i2UOK^D8)_L-)+{<d
z5qdFNOZ|Xpz(~;gXoq@6pKc!gc*J?m;t-Q6Cm1y14OFI?6dkE%+3g%S!Sj_U4w%!>
z;f)tf&WU$(sOgM?%URQI>8N!Du)|s@M=8yf8+oFs(6x=u)?`g?ci)fDlnGQl6m_Su
z<Mj2ziKuy<kP?1}Y;cS{8MZ$LoewX}JXGV*KoeQ^N}5qqm!(gULH4u9mf%PA(h)Zr
zd3{dYNy%%oT^D_Ve`kv+LDmb_yKMDzZ^n?=C3z$0Yh!PSSG%{A378YDFaY>ne=Ijh
z@yH+I{0_pK;Kq#xg0Qa5og=N*Y}a~swe4IVcS@JTpPkE|2dwq6$Iti_+Hk&ep2L6J
z@ZY9$c}Wj9;Scjd9&!owLLQn=;RS7Ox_akw_|H<akX*CC|83?Gf_Q=#K(eMN+qH%7
zUw3{)@bh?7()p9!6c{*Oz8dzTE77V1VPta!R7<>i#ZIjA1Zi7^x|7THx6WAzNQ?Jw
zy{PZUl73ST^lSqd7W7AO!`-Y*;%suBrQws<!1N2m+B+psyV0}~p^`>HajH`GJ|eLG
zFj8Ii15iXk9?%PvHYwgFGyTB(ZN|vJ`5cbefVN@y)n6;6O=?~zD>wa<Yjl>hBDFzi
zUVDT|FWBcb%p;D0Mwl0#Z<9s7viU_Dx$)ZjcTX}ju;Cxkk7CxJ@H_(=Gthq@7J;Pw
zZAMqLeD#gLEOna%Rdwq?;+zbVnH&)cXGumt6{T{SUB)0mrygJLcQy4lQS+CtMUw~d
zlz_wmIgV4s&EdI|CLf=o0KqxdnHlh8lH(Q4)vI?SRRJ6)VOb$Af0dJ;tNIf1Pe88$
zm2uupCzDsc0x%XMFO_>h$Niubwhp<gtl?(9#_2FZO0AW+{x~K~e^1iIXSiLCc7AN;
z1KEBQiBEuM=OVVX0x`yG6n>HUnwO(lK%7QQ#A!ByYHD3&X`Wj~R{D4sC(p$kF)%n3
zvU;^In}~F+rP7a_6*iD`i<rF>MLq1+*bUrjh91Uec*|$l4?y2(4RzjVI0au#I2#S$
zi1ko*N@f(5vjUpIlPReVQ8<`^x>GIT-J;Iqg~j0&CEda^H8BVXMGvROJ_TocWu0Kt
z5xqt750i}J!l%q1DsX$@o=j>Q#Tw^!WU~3h{Rj`Zsl<@Bs?iDt-YtHwG>y$2OpwdZ
zBD`GXs9M|*BrW7s=#KUN3DY$3F%|=s$C2B6h`u(cfZVZ^hKkHiuNn9z6uCpD7tF_t
z>Y@sfQ4%=YI@X98K>Yp(U{;fc0_|Aj9E2;<W)RVZyJ|c*7sMeA(w%sJ3V;_xQ{v4}
z?V3uH&Tbz%k|$Q`2usq5N%$<!ah#k{#~Nm}c)QX0F6^D9LVL@?AtBRC57lyp(ZDqH
zDfV9y)vMy=W)-PgMbFlXk$6;A^+cMmSZTEm;Jt;(kEivkl*x{+=8^2CX-37xTZg61
zsHwvWvXUpWxaigyQ{LV=K-Wa@CWo`J+EalHcw|u;Co?L~6?Ta-t@P4q<?*T=>VPd8
z^XP(%LXuTMs1Ii2u!=qM2lOQJ2(Xl$4_6t~W~oGM1O3v1XeCD8RQ8ZRv+?~8u>YKm
zXK)ya@00xRv+O^O?Wc`D?LYs1_8*yl*s2rB&u;6a<aR?YS9I3!;z~)U&Qm-8jcD1Z
zzlxg_Uc-o^u^3v+O$_ItZ~K&@6tjO=(om;}yA`+W^rZ_Y(-zNDS81fFn%RRHPwtZm
z29HoOZ<C-3`<7|*Kb$5uhDUtzzwUF^?6gZ%m2?uUTHM<+S;5?Zsnk4cfpC70Hwa#Y
zp+eC5hk!OHG$;hl{^Q41R_~Na320^!i}pC5^V;#XiPH{cuXAxP*5Cf8^G>;kp^fZ=
zXi!Rbl>3{32em|6OSCBFvUjp_FczcQ6i?)#oJ^DOgGYP5eRub|**=u}JC&ZzK9Ew~
z=nSyF?qCve$9don(X(hy9aKR2yr2mA38Js1zOL_}T0nP`Nhp5ub{A-V_;Twe&=l4;
zhjev^o_(U)Ffq(W!elo2DBg!HnR7Y{yF7IWTTx>|*J730Yy?UKUb{h4id|^FsqYgk
zDd*|b+G95gm`~=xo@9O_H*8Rsp?_!ugw;Pt&t7~t>49#>Y2pn>D`Tvh)#mwm^SwO?
zx%{F_SVLO!@>ZeTiZz>{e8IY+M3Blb;fZU$%2A*q7<`c*cnPaClyp+!qTZ`&hJ+l1
zObnAlBF!9S&aCeaD6g})Y`-g192LpQ07cVIArdTJ3gy-ghNoZltS8OEfV}r*0f@(%
zgwwc5c}AX6RfqolHRpwbgsAOhbaTs<Rxa;~YPx3~rivw<`1zk@e_<CYOTR2xj(gI&
zn0k)9;3^rFb6t?vC!hzH?Z&Md#&bRm7JTCe*V5v3=nrzA4kZ%+rH&}4`oQDRu6Vc`
z;mUGOaX}tnJb*XY%TQP<IPiIebwatsv~Dj`pMS^8Te?-!-6{{b0F1T9Me6!VrJf_z
zs=c)6GWgOiDxXEpPGCA*&(nY_Nbv<u1{omny*<UqTGrp!V`o>-FFM6tkM|m0?M1~g
z6neDll*jAW<5UBk$PHq=lX|5Ff&m}=2tWFm;$Qs=n#p+FFRMrxYC(0XK3Y2yKR}OD
z%3jtQq?i&$POIys)IvqQ;jsK2t|>)!i_N)0_jct9v1nn1OBsmf3$>p&1huy!%Y3wE
zY1jN-r-<QWKPYEe8~d_ySN0KW@vHR2y)EpCxJ!@~^|c?65%rbD_u`W~o1iZ)Gwfbf
zXfHibf2s?~b0Rw0*!D_FXg-!fC^5_R<`&#H>V{dl*0m{GbKn8mbKoE85Yt3FLcf4C
z`A$?FJ-dsEqN=dw+e|ea8!BH&u0>rdFQ!fv+s`nN9k~MPS{U$cNZLkh>y@IZLuZEm
zRegk}By@;F!w;%NWX+3xLZdbMBU!-M#EkZRrHMaSqM)wjRhYH^-*~|OY5$pP|Dijc
z0}RN49m+?xd+i1N(Zr2D+{Xm8B>ungbn9u>|EJOT)Bf`>wf_)D98-4dGdwpfU<Jqg
zh8QsvRXwmv#$qw21DM?0#~kYI*?6hy9LrURX(wG1+PyI#1<$j}$&@11Z_w8=qIf$D
z8RryF#t#Tjfgl<AGSel~1<1hO*qg-SD<A=D1YP5fsr0q)-QeMogfQR!aa97F2F0Kb
zV4whDrJ-3=wNSMW$7p_+qrcsK*gOz>Qf_Vu`h?D+NAzKHeC72b_aHrD&Nr?QQR+g=
zo~AkCvi3Y6L^Q*F0|qj7r%5=ayYuPP8z`fbgMm(Cha)MTIhX(CY*wE-Z{Pi1)ws3(
zSQUqj1~we}6^^i&WQ70>dd{2UCYc5BN|@XPyaeyCu#*Fh9`rGC9JI~ZIPq&3HRs|P
zxn61|9O(m<plde{s^{p?F8qTLQaBV*ED*|P)AqA<ovSb$I9{Ci6EwpBFHmFC@twLt
zf;aFhMLxO`Q|WB-b#tCM5Sid^!kLVi7AiV4%QNa5<x3)H2GANer_49JGsk2J5mb2g
zqfpJj(`#UQ3ZN85N9392FCC{zzo{PZjo=XN7mBSk3iJLb9DDHxHjEry5Z(Z+ozMbU
z0HQt;qli-mrl>`DWraKtpq_w!S+eMe&Cn%4DCd&<K2|XC$77!n0OanF3Cd+6Z_GiL
za^-k*)JZeFxvi)1z$qHzdXQ7-ii^Fe;Hh4EE`eU-x`GLN#q2MDLK6~5YdYKg9@AtU
zy$Kl13$b)Yd(Us5)1UlTV`JkM{!2fhNA|OAe73!jE4PuYtFX3Qb^K7*m%{bg10pe;
z5B#@L-*{sEvAwDPcv649rT^G^wq1W>3O=nrd*Qr_XHx@*^Z;kRJC4`(9x>Pxz181u
z)EiIk^~L7a)B4u-{Tg|`{jA=2w)!`UDr$n=ak9n~I!Ke~nNz-P)VC_uv!_q!*%R|@
z^J)G0Q+T*te_FAJ(Wp0`!{ev+_fWKf-&gK8z()NAeOABchnr6}>rX%H%O}q_p4Fc|
zm9gHRv{cC%gHxRO*GCsNS)XS~2xNUh&nyZyp46Xi!Nbk^GxPA-#*6yaCKY}06{yNp
z-IJ!<(9P%f|89Hp8LYGC_nwTo%=%_F&XKl}CC#=pxshVd=P-1n?e_Efrg>bm3A*(e
z1O;Y$_BqV9{Q~;&gy#C5U)Bn^?p+8QK8DPD(f8*zZIH#bQY)8N=o6a=n_D0Zku(-3
zZf!lQKiQ<OEh=tp*Eey2SQR!lU(`1mBnzxXzxC`{ed}44zqg-ny^#4*W#>-yeuy(Y
zZ(y5S*%mkX*%SL&V`B>iE425_UQsVzJO|;4;{Z8kJqIzo@g!Hr(=A1^ZJT6Iwq;Ro
z*rR^_bfb<{yr@5~6wK4UAO@aBpt@a!fjds=pvwlhnVq>8FY4PezJ@*QCr@AuIKC9K
zZ#}6$D;OV$mlyQZUM`I%Fp#G-l7>y87qE_=)8nU=)fVgOQwX~W4W?l)TV|N;7k2Ng
zRSTV$#cmCBdmHAJR`a%=2Vy_i8yZN&%?*1^6s@Sgx&5%`w|_V=gkm;tJhyxDtWn>l
zhMqpR*`rqQTcfchGCRd|jmEPVyz6YGNL`!Py&RoiAM9uPNel0V-O6Tz#oLqA9<&Af
z!80uU<f$z{8_%D>0(p@mdjqKagh!iN>o1;(5Zp>hl^4Wpjm@+Kc=7^DrJrY@7C{c;
zVR?5=i$PSbcL-9HVZ+jD?;xaGdj&W%pesgJb!#eTF=($88w<)l9yMx3=hLdb_q5L&
z_*v0ugPg<eb<{xru%2_;i{Ze_iJf*z&STQ!$%l;)R&3Q>&LMRyg-5ip8(pE}#DWp%
z9>QTmom$q9F0zMvq+vM0KpJy`;p3BbMOZSlbf;7gl*>9{!iax^HB%{9NEeQoa7eGQ
z@fyna#vy}$;kXj->W(j?2-yvR#4wmi%>)pw>e#wg)k(=2o=aXCf;ETnGZ+_jGrAK~
z#`Xv27Jp16b~sf7(ugKPTys%Rt~)I;Qq0gsQKg5}Qo(M;?l*MwGOpIyG5Q`)?~;u{
zAk*YDTF{noE)so)2`WdvckP+bnlbz}ChRo!YNUW$KBT+7@LCR~#=^lTM2k6cFikx_
z>CO5dyo8XN$cQCBBll)qK#<nSc(P7rrmo`V{&c#wH|GT{akQUTouTKAfj2ylv4qfp
zCSf2n?iz+*P^T%X7fDKvAtN^$+_*S5VrGfdN%}e@Z{HNB*8e!0`eOTUczZ`~Cw}my
zrqQ|f-_7l(8(IIq##1;w|7rjIBmVcmQIqVzUR;L*L#gzDC6hRM#r??dsa)sma;zNT
z$;C*PBJj)1ct10&DbzH8+R;i|zPY)n%OAKlD<bUVCaPX~0EICFIRYv>?l2*o8d&)w
zr~IIz0#STIBO-*!3CG0*10AyEUj<==A&n0R-jO4j$dCDfAi{K-Z=GF1J^PvdsiL>p
zKcN8a8Gf9=CWLsbDHm*aI%Q`hf}}`S_jpcyKQKxp`!t0CdiObGXOAtIPLTY4q@b7F
zpm_Uj7UF$fdCgfvXx0a{ulGOb(HT>k4R&OA$D6~M^75OqrT(insyHceuN2s+FcQew
zkUXb|g@_ek68j(~-F_l)5axQ5SP?2CYVvg`LwG7wJ80vg^QQLPiVRY=#ln`bF61-u
zyfcQ@u+Lb;eF;+I&ev&?XsD{O-}5nm&MC8z7HZ?a3GN#mDMBpMuV^bSd3kU@>Q39k
zFC;e$E{m&RM%z^!4inrY)^!8wd*v#p-8j{#aH}{t{ptOi*3n`2@MQn=pmp+=xpS9E
zQj+Cxc7FP|!~IV8q<MTu6T;zBY>2-JqXCU3_I{hG(c{CoA^@<&vp7%+ksd0yPhPwh
zMxVyOAmtZbWRY|?{_hwfp+G>8{Go$U2Ey_#r}(q#BwZV8^Y!|PWdXsEQ-d-8+jai(
zk?*jPCSAMKryLzFj$a?16D6z&5OQ4%Soo-(U|hL(&6gB>H&IuN-B&9aRI!2RZxL+!
z*Xka^Xe?>@7)D6TAxDp+nn95$FwQ6(i}0kF+n7SMAIQ!84~OUN)+qttV1SyekthU8
z?;pD#ydxA~_2f3OD8$1w6k1axlFB}<Vf~EdomljFecn2JL*!Au2nx}}Rnh{l$pVjV
zy=0%D$535Lwo42)2vHaB3^<YvFQ^bi1%s~XUJxa@NJb0>cJT7eLn^r#KZAPKz;jV8
zoJlBL8b?60;@>@QDmuRI5kqlXe8Yn?R6!E_sA3gWp`+8crzrxk2*c|Z%+VAQcygca
z2Wg+d8Rc_;23}50I*Y7{d+SeT6KCYnO^kG(>XYHa!hxzVwj3&Y^2C|?x869`r$AM~
zjXyy60sW2oVsm3dK0!8Bf{QlEemK1|&B=w0#QX61G>q;nG7$xw4D(PRNc;5SeE+a}
z&^rG_nxZ`Q5i<}rnjv}BcAswFKHaWQgR3>Q`c5x8XBQp(4(dNWf3GBwT6>Db*s-#{
zT0(9T%+d-}ghviw8W_F7I-R%BlyWe|pyUS*hGm*7E2@J+@DkK3swiyK2ibcqgkTr$
z=2n$vbK?ra0W4LlAE5!@K;;G>cn4!_dqZE{f-LSFNpi$~-8yNWzt_Z0$++y--S)+s
zH?5y|RkxVA_RkNSokMu@8j7_Q$tA?~{b8HV=Ybgzu?l&(Lmk^2FP=K_tXES5z%3l%
z<Tv^F20H|ZZZ9h2CB4M?1xtpBYw2z1TnhT7x*#>vh+BS4HU)u}u-eC&Jj5$!7Fvk(
zJI%!MVW1r$Zoz?#wtqD`P%39AZYHqf;PQj2VmP7o8FoEZ__@$qHU~f-vz@Y0)valj
z9g^Gw(W4A{gZrO#w$&C9>w0(WiM+>{`Se7Yb$4=_SwKh4leZVmw}*??58mL<P_+8$
z4wf~a^a4qpsTGmX;U12q3Fzu+34#Mcf|IN>SOKX6FbNuk$s)moffCXfaIZS1G<BuL
znGVM=y;lgBQk9?vlQ}>YqT6TG#hb&(|KPglT+xi&9$w+Uj@;k<v5(07`)=Zn=@&SR
zdl(TUztlRBJMeiMt3oyV5$2holD~ThNpau?)PaLpf`1;y=xzwAUDbI5`<M^^_yL!A
zg9;=3`_{v0#>(DCp7){Zyqg7AWS^=!E$9aQ@;7(ta!1r}H9Q`vKiK6$Mh`VjK$7BU
zD^=(3psHdG{BI2dZI7Vv4El+WPr~TNyP{u@@mLY^<1<t)xL0Q(lk^NIIlG!sC(gr(
ze1ATRsUYa5QP2LRwtp9)3jh$dJr1wk4_Fue1~U$irb4Io4J<VL<>CX{|ELCB=0Ese
zLS9Cm5q6q=HHWJa8>vNmboclW^_T9!VS7I>xDZELvri?(sb({FdLS}}f-`4p1+g46
zX;W1YfTXHKtZ~ymPR9;esN^O?QA_DHRF;Su?AG7hYxf}RSCF~e+|!d&f-~}Cjw*xY
z1C+=QPbdN7vZ!!=cm^`T(#BRL0L8_?u~qudM6h)tg8de%SSdX1GZ6zt;GSx}!*QXy
zzk}*)4_2r1u61zOy*N2I-Iw9C*~*z4Aep=sNvd*k%5^N(8>C8PC3#^m2#<Em6isaP
zL`bL4oZ7${lNRa5ehs~QftRny>Y1a7+v}mKk6y*<xM>MjZCj_aqmOCqBZ8S6bLd@M
zcMfs0!yPTSA`ax;3Ex+dUL21k0pMeHjN;lv56pF`s;Ghc6x2%&U4*NkJ7TT}5Z+dp
zT0)zYR)2#=chwJ+-ZUoZh#P?VNLv8bS7#^s8x5J0CzGOy8`u0;)GDL)WRWc-VFu1V
zeN|rj3(5lmqtdB5FlHYr0-j}?h(->fbD&U%(YY!+&KhnTR12@iHTwxD##dgl2DMoa
zas5ImAB)OC27lF73HV(dGwDkWF@$ei{4anX&<Rkiny+7<AO6t7t=i_n-GXjiX3(as
z|K)qUVrd@Ag0kpaIRC90{h~6)ShJvzK9(;iW5~Ful)La{u;?uc5hE>|M?W^-w?W!;
znysV5gKqn9pOM@$Qk1!oq?aX3Q47u<nJl}$mWC>p*+a}K2B_wULAqaZdBLEfWlPOc
zQQs`(PDao=Icl98c3UU!4$oU1D24*GNM%oH>j)Gh+MU5qReKr<dfv8*j-|Y~&`e21
zTuF=$XKPYFxQ-hDNI<v09AOv@C`dv|%DXy;LZClr7{wSVs{bI`Ls9=Kz~)dnR{0II
zz|(P|o(O(C%7XC6AtBj{36dq-n~S5P?pd?*j*LA8XB<A_6m5`lb4J9Y1EFkytn0HG
zDlj?#nc@gQg&$hc6)~OvpiQ=K;kG~W*!DoFKHOwv=bzG5QX34fmOUJ1$CWt|+;8+)
zwB0#xops-wpB~c(SQP5XSioqb0j9zHZ8a6@Z99wNQ0h*b_9ME9#w~@oe}KjkU?BWQ
zUDt{*8OX*M^WB&iYb3qeh`_|S0Z~}E3Ch!yH_;Klr;hP)Zjy5Q60^QOuhAyT=OXLa
zgHF_DdlManXHMt>vyvsEZAR*(iQGEb*Mz<gnJp?fbTu6YbIeNC`c}wTsN%#@L&W^a
zFYHjDbkx620ym=|f5BePQcz!@+q<$|sq6_Fmk+BcNj^5;)*UP@gLp;kSoZl<^^T_(
z7Y<qj38X8U5X9mX#fAnuQa7uT1sxyNTF!9nUZtl@Pl=i`Sw-!4m_%sK3d#svy)7_U
z7A<j98O!L`jOd_5cXFvVYdn@K|Fo5YE#{GJ3ZO%}7bA?cav!1GfK&-1O#vkjPTO?U
zqbzRw2Q?TQ4%8Y&N;ytRjK4L1XrjX2yy%?HJ6qTi6Rlo2ZVHYA!I@ZhCS?CDBd*)n
zdO8T<Ktil3C!$)M+|ezLzPpQy)k`Hst#g8^!+fnz2?WVN;0#2U$h)ExOy4Kmo-U7*
zLtY4sb49it0+ob=t)Dpm#ofTtDgeMO=vt*z%9E~$z~6UPrmZ=z-k-K=n4d{ji#Ycr
zXO@__G%a#ou1wApp`1dS*YU+sr$t)8{@Gc#)wWK<vXpGf3iOI$+yqUTc-EzW6rv~s
zt4N|rO*is^ams$(t7Bj3uMm@r{RghE_4Rd&`6>j?SVUu5;7Y*v)G?avTSQ$rFh<Jo
zg?M?^6(#`=7WOsWM`~MXqZvJuZ4T-HEHsfzIZ4w+hFZF|lI~DE4~Q#}9&<>YQ&LG8
z>%uZ<FEvUKtHU(G$yc^5>a<uML|u&w_BGm|5#Gdnr@j)zT{<w<%qgGLMXPHCwgDyU
z56_zCcz_f8#98z0;fj`VfoG(fDV#vMy+Q^jLpOJ@Xss1&+A<nh|9DGb=~%cQtkVX%
zo=V+m-G};kr&T<Z)B@@>Uw7NBe;gJnAtG-~B1>^o@B{W`vTu=QjMD-%lEj#yI<aF(
z3b!&}V{otmzeK+-)s6K3cAe)bt4+3p>ekKbBHB_lWQ^M<0Y*^ZByQq4*Kf^$KO~2;
zL|jCiHS+4D@vPOw6C{(jbPXL(yx|Zu$K6YqBd3k`pojDz-nk6clvCXp)KXk&Nr+mb
z+*B$dcxh^(yY~Mwe^&~7Eb5#R&SKx8dC97b?p5SYM{G42iCt+e)k_+i0uREjT_U!S
zG|RK8xX)E|Vwhtu!jvgDD2!q{$F`d7v~nTV57Z93K+~>3YwoJF8KgGi5>c3&WzQ$G
zeiYu+G!Cc2LR3#GRfmMJ4%CxN4A1}(B`a`fDOl*mD!ZRlj%uZVEYb}w!&5J|vcTc9
z8+kARXzJg+ym2GaWjiUvNS*z*CIy#NN~n%qLTU6gO)@EZ|4JxH4CM%8rnr13n=_Eg
z>^Um(esQaP@v~jlu#O^J+_oyVYV+h$$fmz#JvcN)mvq7^xoSVIybzs*my+SFlv~mR
zRczJfN5&s^Sr07N*(IH@O0L?EUny{4Sr<6GU`gv-Xw`<tvt^AQT`Xzv=wj8zep^c9
z@6VSs_Wk*)jm6i4Wv#V;I9SpglvuevR#<fc<t2?@_Ao2GzBg_2*E)yzvVLWXuRPsu
z&l~Sv_GVX$%dvC&O&E;_yUL6E;RAk(+r>kN&KtWAL2QT7&J_yfmv}u<L*IRNTEy&0
zf%ozu$D^$vgR{-i!-Cd&AYi_vF^Ui&SEa5S_51{Lj(0&G!!Fhhhh6eyfhu=f%63fP
zjWHt-kXt{QM5LHxBh!%TsA)*8KJ2SdZ^%ZJ`pUf`3rY8b7X{w9n}ppsnGHZfauB_z
zI~eau+B5=ZclCuFXbA53F7{(%;&a(&P=8q^5PLprSYvU*Ug8?$NHZ+56b~2yK5;yC
zZ720}PbI+J&OX5}%9YiJP5=xzdNat5)ek!QCgkLTH-0`}*Ad=;YewN3L`_olZRA5n
zc(@sPq=Z->$t0uK*ic|SU352f6R*ncO2!gf*N`0RuD;!4gLROC72s54%3+mDl2r(e
zc@`kyjvqItaDKThSHuvCK-sBm&IZO@1Ow&sbTzfQGKyWN+<e3>RqDFhUey3PR9<S4
z;n);*5tR$((Z43JYMe#P!CZC7VOQJ(F`YBDq?}PLIx5o`Cm->~L(U+WNIJlWWS&bH
zbbRP=s*J!5<Q{SrPC<16Rt08)=tkG!Zd(0QIZ3QLSLh1WPLVj;lse%R0Nv90JE}7A
z`pn8^W}1Ch;~U~-ES>k+lIdEu3VqX3bw=S74TyN<q^??1W>5<)+;z@>I7Np#s$bH6
zPce+_cS*Coj4H?<!0mTlRx1`|*z(R7kbLVX%iqGOqc`^Z2z%>5J#}ZF@}uK*z#S;=
z=wy$37Y8w+b>G7?9{Ec%Z0F%EjK`<r(b&0MOZ|z>k+`7k>vC=Jc#!-cvm&UUqI{tD
zn8}AHns2uxp$BiJjMOqi?=yGYrVrcn;XZ%paAgu2=SX!1v}JK`$hHpWr^V|0blPa_
zzJi7V3AKDkj~?#*+<*MI(fH@9b$qwCyq4{JE!$Gd_Ug4f%h&QuYI&w>nO}!FM?T(_
z!$K42+X?%-&_MpBfg;+y1V!g63TM-``<e~VqrLXQyCZk^iA#4^a2W0EAJGw&e0G}Z
z#R4lqfsFut2AC9n(jV{a$o+rLE+8Is#wuW6%%Tz4d`)7FkaJ4VL-Y!4cQR-0l5#Ra
z&%m=z>$vrgL*)|s=JdR~pSJ==+AW)wY|N`>U%Nr_X+7iiM>~$b^AM*#<sQRYi|b<=
zFgEq>D?5t84<<)<5eIyYp<S(L#?-y%z$btwI8&453NP~TYLT<4=-X&198S<jaiG#1
z6jMN3!sZbQ4f?Cn3GX4kCaIt<tTKzXtPOr<tl~$(`vtuEfL?)B^lBQ@<tp~7A5PU5
z<1qYSh&hi!H1q`&LJGovo%80&suY5OpiSf}6G>{dhhX<@`6U1F?%-(EAz)e4|NC@3
z(`&4#fa=s2(`8jXs4{4&lY3v-PV{~3fE*PlOAw#!3=*|vERNYC&f8jD#WRR>RQOr!
zjUlGAbSr#G!<pE01Ge>d#Friz<WvKJC33P#SWc>bbl8s2?nn}nFu?sp-^h}m00mT~
zBJ{B2u5#>E9@yk#7b}c)-63`=RfSPv#<sHo4=J`BhK*bEj?+3gs?vhvpL}gnb@tmM
zH=4q~<Ouxuan(7fI%n_Bs?I6p!2*)h_TkALD6r50Va6)WFMfsJq?fQ#|Evf|zS^n7
zIa_86s3|qcgR*rO1g%$isP&;Kk7C*`CEr8I8RCsY-MhNV-F-VQ3ny2SEND{VWKi~M
zjFnUQx9~oIqRGyhozCI;N!xO0H=M@T@Cn^=TNDOnKm4h4c!GgN-Q(ujnH7yzAMN5*
zRuFb|2k?Ph${aV(|9)`#;|XkYXJ@dgIV^)w<qGbT6j4Dc*2L8jdqwvs`ic_sZbs~4
z$sPj7Zu9}uF$(8ZjaxiwdOAOvy1LK$%CjmefqMfNuRVuwit54l^}tKkMV7?t6KQ9i
z>XdYA>e5d{2JovYW;<)rK_jB0qZ*e@U^*pX5^?s~nIJ%-dh@Pxe8epC)A5nH3!yuK
z%*Yc`D_~`dPcj+k@yf>Kh^ij!FO<|srLIsJJ^eVI(Jer3zVi2M$XX~@+MOfq;l<ah
zaWdH1VP~K!|Dloq;x<(wfg%xvc&cW1DrKN5yCl#Q1b;=`nvV2-gC1;{4@)lZup2sj
zF)q3twT^6<hfDF6<mJ`|GYfr}Jlj%!EqFVDW*C*RFV<1Nso{HdkbPKX*`fiQl&|Z=
zAGtTq$4?cxRHjvGJjZ;Kqg<sZ8@oL*R$<qCyc3$lDw)VN9!*l6AzrskIs<KvK=<^E
z)={Sg)7v?+r`Wm(R?~az_oEO)S0BbG2iNzHT8@$BhWdu~60D;y>@nIc;1eQ7Rv-)o
zOK%K`IXmk@jTtt}ZTIcgnJ%*8E+6~Tw2G2-{p2GZ)IMRW2)iB8eLUk_qG}K8VInav
zChvxvbjhcKZq_MwH}bDJF{hb0icktd<2L~vO~kuYol^*|nRGwA?6t;O;4Oh$kUbwQ
zRC;JPMS+dsH)Kqr%$?yB#Sq8>4!@Gyz5!(=Mgs~vG*MV4{h^aw=-^H`ekXV2agbyK
zv5Gta)IBDdMbskG*T)q6m8kY~V+7!-+A_zaTwtXydM`;bOphpzxw<08vmD?qcO@Mr
z9Zet&C>EN|bBHBhXFmaB-k=j3B@}|e+t$9AlkBjsth5x|-MDwlSpVb{y%i3QFiQFC
z@VxWhnpazjijM#tcx8i7nO*W;1u7KapoRW!uP-`>-M8nb7iZ`W!J5}{|Axp@N7Mp0
zVevq+{Wo~(uzMig@$&f2#JzLymgR`7%hsLOcZxHR&8*WMS^K!c3EIVap6vmmmI^M|
zQqeeV`f2fS@tWSFI%0{3jA9YWN!@mD=8nY1Ba@>3DEVwTWjIw`#+A-?F1>S9lu2wx
z-dS{job_~ZHhNNxv^|(`$}_IW+}Vidd4=742zds>aVWlMib?-4Pp(u=SOd*5f!V7h
zm5JqkNS?Y=l`x1^bQ$Ai5L&e;RRYD2MaLrVnje*$v3PA$;ll%v;LUcs*uh4`W>wPg
zs-1Nf@HQKXCpZ$?7pyC$oQ`TFfFWMq5`8Unlo9<G{DWUqY-vnD$+3s76NtWpK0Gv^
zs6^2+Iz~4Mio8>Dg2ghGI&wQ6BO;%7-c*d2dUm`@E@$b|ZXKV2iVe(rvEL~qi%`R&
zPl_(2)RC+cW*fK(o|L@tz?cja;mD1wNM5jIg%do3ge@Scx>IDmjUkY<ae{;)kv1!l
zD^(R{GLZCdTz_n><g|}edM#Et+9^+gJYTFbk#s7Ssa$O$GZU1a$j*D1<HI*y418-#
z8SA`KMZYOn&X~T8Z!TygRqi_x%?oA|KDt_xjpE3QN)`u0NfVgq4VSGnD7mvSA4MCK
zNBX5BL1YZUFlXn8bw_ERh6brI`o7NNmI%gy-F&KMszikp4&~^MbD3rtl?F^UE9tW0
zTFyyE1F-z1CFvdoi5cH%GB&cKnL53&ZFRtME4XIYdFXl{FIAM)kwTOwQ;fzF+A&MX
zpdNaBA*tQn)nI*-BN*p7@FYB)RxoxPZM8+*k@m-HzdwF`deq%-?!P-^k2rm+DL701
z?^c#{&PT$ybZkEk>Fgn&$eLBD&tN%yKnx7D(0CJ!bf92+Ue}>B9M9sB#G`(w+p>x4
zrvvDCTxBnIP$SvxZ=mZ}^CSV{c2%f;#uK{4grkZ#@MjEYru1((0KGz7Y{^$lri4n$
zMV7Co$V`N>^1yC)0`?wY;^w-+_KQrVu-w8>8s<Szqm)e^L-1`Q3~j`{S$yZjH)7|v
z2uHrk=yr-Df&)@);L-gFIa#~<O3uFu{2}lpY`Lire~<#><gRXfO$E*ct!F$6XF9rz
zDSd!U5NuqL=jx~h9s~Q^Pf(6}Dh|bJhc3T88UP*S)F||{+twfB<Hwo9N%Xr21LDFg
zjxc*RgBr$RwlhRZXbiOl)(>$A6zMbLaAt`U!-32&R-KJ|)<ghHu~kvs`QspFV;7_M
zH-9NNo<7bkhzeH%ef9^E)&aiTsMlXqQ~ZJtt7P-BQv%{l+8uy7-2A2S6xQI=Cr`GX
z>KuopBIeejs5Be&)&zDZg$p;kS_Nw#&F`S2a35IV66*u=&(Z1W-$7k96=_i?YfxQd
zMSMf4r<VxHYMLLhh^8YG?yo9Qs;%X*Q$Cv8u03T@V%bJ3n`e)7TIsona^(v`jBmkl
z4LlPGgZvk3N|T0+_BiSCz@(Ctg`fv#mTT(d(L-!W=*So|Wm^j<^&Kt#s$Mym88Rkq
zVhPg;_6V|_FkNmA{wSu)HEgro*9CohJ@dzdKN#{&X6^8&&Utgc(`_Cd6~qXnCf>55
zP>45K8iO2E2`QvnU#jZ(+aSz>{-7eF()iX1`;KidslZiUIQR)$<a5`HBY!Z!-7mnc
zP>x1wRA=X{ADW#*szW_%cbc8nzKJh3!zHu0rpElRtZeOhy15jAz@y0r_?kGLB~VnR
zL#nAI9cu3X-GZCRbjT1c8L7KT@^ezE3#<S4?wT~ZxNC;$1wE7LO;Jj~1n=7$X=h1q
zj!yTRNAu6S#uCiTM@O8BvLqwCO-NcUcE*M!TD7+i(uXQiU{#Zrf2bX_oH{yN<4upm
z(qC}{AM?PdeoGg(uFGhzkb{FYej1-KmQ^jM{*3>sc99Z9Q8~7u=@l(<8kvQdY=}0*
zlnr1d`lJm&G#X{aQ@tpxJx8YF!_K?YgLZ~3EJ|f;ZYhoo!r*R#I3oFLe*%I_2ozI%
zB$u7xOso(x68Qx@)Gc)N+mAT+pB}2qR>@<npwF@(92li9$2j}OITMb=SR%Q>WKDfV
z!$sn<xV1>zJUKn-p0q&>P`F8E#TCHL6xdNx7X1;y7t$r69gSvBhV@k29R!LjvO2tk
zlNhlqyPsru6RJ=EGAi3xTowM#!s&WC@<iixX%I@WDdYJ9uM#-G2)0Fg%Q4moOmLm2
zTv3diRcwL%ZY2VzQhcXExDi(qj}c12qz!uH!w{qX=x!x4r_dt`M5GWIja`b&^)IKK
z7L4;K_vXBHcyiF)KRrG>g-`B9Jjt9|rlI{eZxjDbMxjdT8geE2r0y-}^I~@2D;`Ag
zSX(FE*QW>X=LCcnjx)H+>30;}8Vg90W$*2UgFD%}#S*^KxTQKZVvXX=XF0iT#o8Xm
z7SdI|M~W7yLFR&#TPNr(0b7Blfo0S2hdINmb{Rs`k_NoKG}a#R5A<WR>nNHV3Tvd1
z0w0O|2BApd-KuTufw@Rf(bINT3Ei74bEHQ*4}e#d`ecJqVqV$y;R*aKf?8nElnhb}
zZOlsW;lyb~1!P}3F4oBSH>R7)*fuTWsAe$r=@KM_@J5~Qo}h+7UHdbqfEhTXP|283
zDNTIa&wXIv6Y{lV&@XC<CNGcepzgL~E*0a^oKQ?`n1~;Jke)ZO+`k^XeG2%LUJpQK
z+OfO&YsDOmo%v#_y?=T(cPa|FDgy{X_n#^KpDhz;LsW=Dm@!jpCE}zw?<O}rapr0w
zeI`>&((ZhrTHbzl`Xd5<99^I=YaJb~7_XigCEeU<6mH`NrS~%}PC4PEKVE#{;<E9R
z;X1M%TyY`2IYk6sRzIbJ_KGMTG!V7U)%hYBDU+Una?cq6tih4WK0;r`=sf52KuP`J
zQJi;S&*mdty*XVGLx76?%=cOU<m(d-Po8%j&KI<F{iVRFEI_F@r_LgT0EAId=!Gle
z91Jq61EE_8%UEqNmHe%G23k^jN`h%Ra(kYj<58MZzFVQ2hV%w#pDEIpJ#@G<dBe%o
z2^lXtz6E4pD91Te$a@f|Q5$r}q9I!siTTlI#*Q^D8hvgiK7$t4AeXa5yNhVc?QWyp
zSwfOLD=<wsIq+PjNK->XqsZnMq~X<4aBBv*l-x8HQAtkEsrm|T3DLmGM|!4~F(dSF
zR&rc~UCNNNZbxZkl6BA$)l5Q)Qg$-z4M{dc`(s;kGvC`?vMWe4zCDZCjRQ-uPsu_|
zwRw!Mk1#ly?69B|7V<MO+$~(Ve5&kP@-yW}dS+5=ZI7XNizIr#xAxLGenP^Nw0T&d
zq}t1xV$o_kAc1>Ym3_un@Kt<#Hlt<#mGtfZoYquZq;K^U3?bG^cg$g8wWZ@sNWNfn
zm;tV}&wk?H1qZ`BOYh4m7b$8K*95aM_LVlMAj$L?s_aIljvgrnV6?hY!9)~ISLeJ%
zX4})wY4^C<*?))5IA%1;X_{n%Fy|2ckLA`lR4Cl!aYT1DGmS{YUXMVw#8^u;>P4db
z!^o8qy_ncTR;8~IgzqGznw2YB#*Am<-sq}5zSGJ<X-eXSfj^*Fi%5be!`?MnP1tm3
z3Q+KE$iS84GM)14V~<+yGn=HyodKT}(=;o<6Iye9itKYUV^5DeGiJ2q&Zws``A!!%
zmpV;?=66V-KAqEp)1#K`GPy)0<TP6bmJ}x?g)v<|Iju;VjX}u##k|UzUo7zcv}_g9
z{Uw$a_xbrCI;B#+YfClO^oS1Ioi7=YN7!VmjmV3WdyPnSEqA)tMP6UL{esT-W>>5B
z9g5zk>#AS(oQ#9Rv-87!`0d~eMm0c?-#(3CrNKcRm)18ck1^d@3+~%<4o{k|1yr}w
zIz4GG7zyUF7u^4x1xoC!uxnv#26y@r)AaCjvVYg>bqehDhiIzKwwpl$J#PL4)NgfK
z&7;;oKq0}SN6tEAXLT{bs)V<s?>Kx+DXAj2QHE0DON!ly;NyN2rkvY{EG$vguyo%7
zOsv+>ZovPYqnC9(<XSS8Ug{J$9B#;>I>{c*oeM$`)m_~nTqb{zY9zM6-Bn-=_te$7
zx6-|WN9dh??GI)|2p&D_z%>4OHU-57l3i)6^0+ydXuC)?5jmIGQR|smg2gAy$h&f*
zfw)SEtD`63HKpRo*RCbXHyJGe7j0l7W1lEU8eo!u@qu061E4mMh@CXS_%0z{@sYw)
z*WPsVSYU5~>Wt1M$g^3C2cbc$Wu;>wi`H&-aM_bpiU>=_FAZ6~=x?Y?Q=Ljg1p!3f
zM4YCi2;y|z8QA>qDWOy=7<jkF<%`GuhVhp=nBnsW-@757cFpybT-rHO2TNA>3vCLi
zVUkA4ms+cj6;)1iUAAz@R=N4(mXnlz^#$X$#!~>^NwI{IYYa)$DNed(!u3Iw!oVeR
z)us4Mb@Wx*ti)7cNYe*T0i@WemQ+494a!Isma=vsrBRDGEY7`F$prnXwphcZ?!nIB
z-Fp2Q?bhXMANAoWDAqx_Qb`5)zmi|}oLA)bSi|1zsf$<h){iYWJsoZ^hkWb|ZC9wi
zS9i2-V0l@xJ0<&LThW@%k3d=N!>UGxj-4D1grN-s3<K2Zo6^wOOjscyM@RBrhFA?u
zeg;V(t6;%M?5*;s`Ti7x_MzBf1{u3|p^Qt806In<Ben)g`<F2ijDYot%rp<+Y8>{^
zKZV30!j7mI6-&BGz^2aqgi+4&NCKDx4$wy4yK(QJ$xyT&$-|@@SNA?vS_J7Q-9gdn
z0_6ii$c<q^?%-=<Ipx_vjw%r!rby8QGNAG%!A&UHL=c*tQ4>tK+YxoJemfbb;OEsO
z+qeGKSkRHpnzG_Nai5dAj1xBF_`74>Go3+hzlGXv>$SC8%;6M@3YU=Hp5NmMWD}Z=
zQLY`jV+x`W?Nvn{8LNL?*Fii5710wDil0qqVs0D_(d#<XqOajf3B?v+ltmSUS`*lq
z5(Hjie`N|XLd}KOnA}@QATsdyjsL-)dIR63G@|&|`fI<3kqtVg8MAe<_cVt*u;V==
zN2^o}KAeU_0usHOt27;f`u!8pzukF%bV$5zB?TJ-*W)tMpkXo?3OV$mv2n}OS=2l<
zlQG<Xsg#*r8|dWgIz|?{LTX1zAiDxNl3>Xz%s!;Eaead{xv)E5v<?f8U<BF}2J7Ka
zg*0+6%=MEku58(Nrt-yFIe5r54hD5{noU;-YKX}>%Dr*&Ft287|FL;~(mHwDUZu~9
zoVmV3T}3JA?%8*?BAVXot~t)UOCBuJZM|W4j*H_%I3ph{kB-#Zt_(>0iVOXTsE-+6
zl6+1M<s>F3&}k7uh&ZuwKPl{T%0j40pohxg`T6PjYFP}_U<9{h8YZat!^TL+a`7B=
zVlryEY(Q17D4-&oaMlcMNhHi-i$KcqN8v#Br`9)G>qD$J!h&N&J2v7)<X*2a4x%u)
zqKMEu0kXNlm~Xo$r(ML&KE{aAboNMPK^4t1LmPt!pjV&mlZ@_L<S~%*;gXnGKe7gi
z!Z&hkShj^1TZ5ca1a^*-;-Z-nsla}>I;>x}m7P50M`<$Zy1iJ?_6od#ax^Oz2Z+dX
z-0T!tI7C?r7svxk$(Mw~h_ucsoS1J#`B&fySk+(nHt{cLreMAJFNodA@y^=AH0RtX
zdF%D<D_s+d`Nhf&!)Gtm-7xa6{J>S-nD$5{cbZYitU)8fopc#0`-^DI%etn{Cbq|A
zS*65gJPQQDQiz&x1}Qo&RAlmGW-@vjR=Pu^+hMY;rKa=f^zCwmT&0td(J(#afXUF5
zdX{DSO!!z)%3PH+R3{;2FQ+(x6KbS*QT5FES1Ji?q|&E@Z=x2oPC!duGyxVWPcT?V
zfof&ywLNsLWsT7}mV0@f4iw0xoc!E!up;P-waFc+{Qc>4O&Nhr8(d_47DwxG)L)<6
zNo3v{s;d&1YdV?kpb9qCsoi<F1$sxuq79`!t%CnYNAZzMF4P!KE3u&}T+1SYI-F^2
zApz2Ai4)w|<j0jf2%dA%c~g6?YgG%{XYY+TOot5-qo}g=bW(+}-@{zu*OVE6*oyul
zur1CFgZDVSzQI;Nl&4tLWdRf2+}zZ!f*BnJ5C#O+qqnZBA4+CRV@^~+wz_9}7P^Ng
z`=<wXtp6?#*t(9?X5J*ts>&zan(C?s2@`+`G`FRYro<dpbbtdHIq0J)0uBl|9Zs3J
z?h&%2h?+<BQH4Q!5%j=O+Zs=CEtR_2&Lyn`5K2=mf=u>GX;0#VJ#?<XxJ76f|Dzwb
zx7-i>`S>yX#J>CrCms05_wfgn@Tr7PCH!EF{l))ARNIJ(jHt+niwv+xNI&~A{FsiW
z{Bw&cz@{hoAN~Nw#CLa8*YL##{>1-)e2?naqXGXMixox_+?v=&O5VU}`~vom9}efL
z^=f_;#xZ)vhXg#4Uf4$1^C3?>HY|aO+T)Ek9xv7QbD79e+P>*>v9?b)L_d)u+9FZ0
zScvM;P)vrn`)T_rvwm8pvUL`Os^WVLs~z8|uN4qTCMBI~<)};09LuhzQ5e#eXtL>P
z%aC4n$}vG1nNs=j+$f*aen~bwv`ox6B7=#S&-tMYJ?+yAbowJh4<pFqR<I9Ccb(O^
zWJxyE`4!R6%{IHuer_e$n=h;g_Re)>&-}cv1;)Aig|!5j1^9F5Y2^d>!qH@F(tXM-
zw{H9W@losK@5P2X0=&$M5v3($lni~^;y7Brj|%$-45V`|0{=GUVl7sv3YAxDmBK1y
zwUP)jd~JGJ=pprmdz@<IbGA7$!^;RzA@w3U_ArfPI4B)b8(KQ<UscHxfKzYMrQsEJ
zJ~2lNk|%HoM28?28jQ&v8i9q#4(NZ8mh+aOU&9-#ZTitVez5+?&^ojE>^l3$?g#JD
zA{Km9Pi~XN1>(`-_jqtyTml{V7nNA{{?Y$hT>p~(kAo!z2E!#JnoJiLxbEMgV!pT#
zD^Ky_LKuv&q_?xMKoYz@JzCjfFK`5O2tm|S<2+~~0%>E6LXJ%D)cJi{U+0KX!8D=)
z6WEW@N&7J(rVpa=RV96cSOwH-eWa8TEC*XU8!PJ0p$OO=r+H!^aGLx7AA9cs*HqHB
z4_i<a>)N{!3&qd`6=@nELZk!<AlNV@0U{v@DHO4{wO8!DH|)J(*L7X7m$kbV#NIpl
zooS~eL2>K*|9#KCzxPq$oH;Z1%-nO&U9Jm~wF0nl_%|;v&Tr|zdwDrVfea=aQxVcz
zht%g#Pkw7YI;*5fNecP5_n1hnR_H_q#LxWAM*|gVy+T4gMP2EAzWF4x1_TgKsaPgl
z3k%>|=Kyj45Us<dnzK-rVXs#x2)G$3e8ApA2Y3#!`uAP?=Eo0VlQ7s{IKqGY*f#z4
ze__IyjsGtjLd6;&Kir?c*3#c+UVW=ibW6oqCFUEf#KsBz;{T;PIIuK|K_@WjFX#R$
z;!1;im9-Fk;a*wUS?$GLSW!S0Qx@R+7p+l10-0O`A{n&7M&~x_*p(C(W{}fYh*|RI
zV|V7`H!oKW69oPJ`dFC?kUz}Tuf^Osj!uRzvNSTQe`an#iz+4AkgDRiOn)fZh~xO9
z(W03YBfOsJj3Q=96rU-C-@%_iky&oaP%#NZ=*434h43#?E{^5f$q(iU!GRVt9Jot}
zP0uDfFdrb!8zR1XPUaBy-=KamdzbCb;4`?~%r?Y@KD0xUNgFavv^8z|+io6I-T;D<
zTH@`-It@&dRIk*Vp+Q3=bRFZ!MgoL70Imq46ER2!ivOTmWgsMWZ38YIAbGE(Kd`u$
zTV$%yrFy|gDs0;qx5ZMszNuO9)F1KaHx9@V%#jsXvKUbwPIRIM3>;MLvcC%zM=?{R
zNYxs$klhQ*Y}5?b0=}c7S_qD0_}pHAQN#KeIm!g%e1kogq_`D5f=}0C3U~S3_zM(`
zGLb=91I@8I;lRyqVs^Dj6bGSlBd-+-XM@8Fmz|>%K&Q#jD=;W3d>VJuKT_OHA~XrQ
zJ1|4$jK@4tlJ!VQ7Bb^)g!4$2GP7Wuc>;P}qL?vZ=Rlb^sMG+g5bl)FK+dGz4l!|n
zfCe2cU?r?_`axQhlC3RcZ7meNcXS4PAC&&r!2JV^1Zv5kVjX%RvrDUBs2k!ZB8Uzk
zauhr;6DkL;qZpC?2Z2H?BDiqi%KT?A*G!gSF;)+wWXvF@P)M1<n4xqagS<_k*~m5m
zN%wju1`w6Di1Z4YJclWjL)jG6>LZnA_~Qh<m6LXf&ka5Ti`N?&w1yaAq`x3a6c!&W
z6a)h38ng0FjtDFe1{g@@AHn*CZ%Ej{;P@zKfG_|A3CfgX@;~UJWrdz*Q!K8A#pF}p
z>J_LCi-->pQ}_J<aTrMQkra<vjyp;N_p)$K3Cs$MJP`7t&@d~&CBg`tH5<@VN@&}u
z=|i*!kmUvrno!KpL@Su(EJK<F7JVwP1Tn4;u68pWkA>puNMUDTq%b@{7zk=0BZZ*?
zP*7vOgptL_O76ChR>VWllH}){tR1pkA(lQ<yn@Q6VJyjFKTKT0^iz_*x2yCfgIKd5
z@6yGnCmA6CHG;fWe7{*eGIwROo&pYUydb}aZGew~?`ao4gFBO)1+qP%gB~Z_Gc!P~
z4ojVdAl#OPuY%=BSU*87MjXFlEn1*y#B8RbEP{2xJZ`!xi;;%u(u|e}<Ab9FLB$q<
zo1_sl`X28lgdw6)s!0VfLMP|sJwmC0ge7}72bBBELaS#=QEB!q_K5<@$D_#m_+>rK
zz!?-CZKtO*-H9<nJ(`5jk739JT*khcL4e-Je>j8NmIKThA;W{&z9eJG!`aih9WWV_
zl}X947M+nH?;H4+X3G7~fVI$BjZ)B`1ZuKVsfo~L5wn#Ae#mT1Li!1eWDH5DrcVI&
zJq1xOSSZ>7cLp1?0dOPH|3P{Zr_!8|O~L}8f5rq6ghm7l{Dn~>g#HpJjJ2dF023X%
zW4KGiG$7#YjIF^kNrY-z;)LV{Cmb=!{d$OQZD5tXu_zTVhX4U6O~jc;7J6=BiJEtt
zLsbU0k$FRCp<-u8fH+hFz{KEVED?Ppfr+UKm1$JQRLI=MB5P2Z;v`TvEd$S<j%eW5
z38V)^Htyl(?A*q+mxGOsrT)Niw}rzda8@jeBbd%hIF+cYzRrMyhtMR@Kw7p$WhS2x
zvY7z%ftwaXast7p@t(vzG?y{CD~p1{#gUXLPibyZY)L&=s5eZGDH9(eKq8bNRZP(x
zHpI~6qMpH=D9*S4>$F37f{?nbe`YJwh1c*~-&4hGZwV`@pWRV*F*p3N<y0N;D_k8!
zzG^zX$Qee<0b;+fmKXLnNx3;NXrmesNj2MZW?G5F@<72vR5dVS)KHr!4W^#gKHx00
zvsgJrGXHRK3wvB8Dwl#z%@Wm2*>YqDle}mm#**ZYvdWmy83+$W#z28mX2?}=Ji?0y
z3;{mKN#i?9^;!og8{{qE)Mi+&I$$A>L4$(LI%;ESAecvz<>`e_p|kcoj$UdHgS>1s
z5@G^;bEp;vQ_N6cDi`s2qSo1Y7#YZoid-C+h7<Pz5^(Ae&=M%TN4%4CmL~>lftCnK
z28WkASQ<-;xwC*5CRw2%u}{Wc1ZJ6@$xG!im5Ley>2FEdpg;n(iIC@qfgEHRS7deL
zfuP~p68Sh~@Zdx>C*puJoT#B>g8vS?4SVe@qpT9jmIQR_RA)fVwk^r8S5rD%PR$|7
z&NUY<%ccYR$p|s15f}_S=@4Tkhd~BbXh3!axdxq|Xoj>&Iw=F@uU2GGZL+fP$=Im5
z!7eEH336c2`h+V}>}S){^)d|JNTg60BNBFNhL<~D$R*PaOqC={z$SV?EROoIje+Lp
zpV}CB_Q~c(z5CJ8vX&5cw6Gvi5-b!(eS5?pd&P-hWl^?`Oo|EIS^J2FXC=39C=0d(
z3??6d{$aJk$zW9DvR}Z!jjtT+GtdOY)m(xW?lEMmv3khf0UXhTihr>9HQFpfS;ra|
zsnsy)6_W`HbG}f~l@{?jQ+8*D+C)6qP_x(qN$TZe#3mYbR!7;9a<bN0vqjH7XF6fw
z+>cfHLa1~Ugv60bL_o-Ob0W4R+6JTqrL<^PF(X2<CK?DiIJvO=+(rOgdT}j9)1mhk
zlyL$qgBA#EM#mN^Q3W^|nY~}=D1=HQm7~s(*aWCmV47e_7*$9a^`_w~M9nUM{Lf@H
z;YDL{d{VuWQt!>86jf`O5H|#(0R!QXZ`hbk=67>z=)Dfk1@0cF?keI)II#HOXd#{H
zK-cqA*CUP2(ylWN=Sk&#jX3!czC))auva-af&djVK;DC}Ua0ohlvo;;a#L+M^MZ|B
zX90xd2Qk4UNU{tsNYsPW;h|7LjyuNbV!4bPG{~vp2IXWJ1AxmnPM<%nY;*w~2&}eq
z1)a)LfRYA2&#evoXOd2#P=h|WdBT6ovH+ub8xQ!81garE&)wCP(g8ZH3YXQnqlSR_
z43kG`;2s2^%%aQza&d3lfjq#%4QbQHm8=Pqxil~GvGCM@;#Px15M{m^sLm@5S+Jx=
z4>E!gHz5PA!JT=IlI{#cSZoj@CIAi>5aJc1?1e1IoTVpwiFv0@+qT&Ahw2zn?Ka>P
zO9wDU`0Ey=ArWA%Wj9yX;>H^!2n!RKk2pz6ZAL^)Bx9ix;sXjEOU6{C8ZH^=WKk>B
zbh!B&;)p>xC<zK`rooz_Rt7MYjIb&S#%NgbQ7My>sUsSvEKpm3WUwRvh#K%Ncmwx`
zXh^6zXaXi9$Pc3NO28{PZ9ytC0BjL7PCPiclQcX}&dQhKXTan+d-7QgQ%kUz2irKe
z;bUJn_@rE;K*i12kfa4{Vb0KeP;=(#%0}1*PeqBsgpw#hSj4y7$Jp?qI$^=;!u*va
z5rHXvg|LUvP*$d5Hfa=?2M<fxWDz991f>qCw15K>oT<swDTH1{DGtP%W%1M#E-;-@
zO|MsGRK>yN!p8#R!UbWX04^|FF)}b4v<38fGJ1m4hVze*ZGZ`z;GGDPgVOjXM1mlQ
z49-o&=3uddEH+zkGldU9r&xS=bXdGFJStK|bs9rc(zrqBt_ETNKt0m_I!JA9fQeFs
zjg`$wfeL(R^cguK;D#PH2tlUGf`=_MSz$y7vbn(&6GHi})hp;v$g@Bk9U%t*XA#u{
zwMjw`dL8dQ_;p}Q!HWrHil!@6ID5;0G!;A%>C=pHLy|8d00%<iVUMV?=p1U&K{@)8
zR7Gh6IjnyIw^FnarK!}}YH)y51R6Rgf$Q`nUvuN?Qz=H7=Y9p^65|ucndgcUZb%Fi
zX#jfnBY+QZumXWw_$1Z_7-CD0$~eh~kt~_-(qF|(%Y4<)D&5q>27ro%Q5HSra)MTf
z9g?J0)yy-_*$TuBh4Wgd(@a1qONE`D7YhUK?x?6#k%V5AgQ?eOVSHGjPFLzQYVdb}
z-fKY8#?gJTB$gV3b6cK$s6s<&3pt!Q2JlE~hNx&L4s=4w1BD+ftbD4WVvOcUqZ*+F
zfRo1TOa<OQawkC`0?bu}nmiqT*%rQyv2<*i(76=lxiQU9Eev!CRVL`9x-6GKBpHRy
zMUqUdcDYL+dwk$u65w{h2;Eq^ux)Bh!6ZVso1mB6s6~t$w9zLgjXQ&*OyNvvU>6X+
z)4M=%QoTl<(gWEOWEAXHv1}IFm@X-kKpW)aD#5a&lkrSY99Upb5bg&YEdo=d3lIv#
zcw^B1rFIo{*HKBa*B{-$$f;rhgwvoWrI5g<q*9Q^bUb_FLy@A>N(~d!aD^eI$EZ#0
zfE8r|M$Cq)9t||UvrrgF11qCT2+@wrHUc8I{Zq0cni6v<BSppzIT+Q-G@}Bfi2xTI
zalo1lpO-)hN~OR_P|Q_T3ZbHaLkheE@+_R-eBe6^{34$R$<Hb2IHzV|899e#p>^tP
z8Y@K*RXk_B_VN7{GZ|x{`A3HbQWlOVkQN$sJ#i+hF#?C)%;Xgj?H?+luAh<M&2LDj
zSlL(tEJbwZ`ebA(a)nd8Xb_wDcZFk`9)QQs8HoA#Nr%j^b(!}VP<1pkCEiTM4b8%*
z(W>?3aXx1$P7^E)jrd2;5vZRK<^xg!VM>q)rb#pq-#V}-0nKo-Mr-0S1|o-G{mhdH
z)x=qroa(?pk(oFhi$w&c2W%-)TOzT*fNXFW2X>IA7`GfMVdLQ;n*c0jFan%Gjj|$u
z04bpZ7b}cCv4Ti+Q%ZI#WG#am2aG+j7`9P%NUF;GY_zeVyKj<O@n=L492@|NTcTKT
zqvFK@$Q)5BKc{Sz1;bAZjCJc~<{mExO06d+%bXRN3KT$6Yt&9C>DZEElRD0eZ%AW$
zlR(kq<YX-{%GiVZi-Mvb3<wrM{4Sh(0B_<TdzN|wL!VG9L&@5691=eoSDPFZN_P38
zY5hbhpgPtuOASJBv-bn&OtHx|KeQi^?MeVZSje9cNE;eC!PI&T2GU1kqvkNhfe(cM
ztVl4W4g@_9Mr3eqGJXIrT<&-N{CwbeL6kVaY&J-Jxs;cnR7=4kLb%z9q$~PjrbG(U
z-C_2l`703p9M220_ib!g9VresFA89j#YPCi0}=8YR<-Oau?DIR?9VnRQekQmW&mLb
zf(Xk5=@Z-U3HOBwKd74aP{SJwphBd2%<2xW1gocOuq8(Zbd_qTy-)p`K?LTZyC(Qq
zj3_cHS^&J*q0q67_>oa2kfo!^{9Gdb_hL_`t_JFQlLiKk-+`wUHx|{^zzd+11weBI
zrwb60y>pr@+2Kcz9w3Hdh1E4Uh^t0TD_y8Z0)qdBLnjTR8xv0}RKLuHU8y&N{9xa(
z5+Ud_0DB~zv2cVyXiCnca*a7I(~1GXfkJ;0lTU*L52cOmMUJD@^cusXt_6r3Pr=}v
zQZwNUu}BRHSu(sSr5V6nWAy^BMa}7sP-EisnKe_zSg&N9CJXid@L;KHI&g+Mp=s2k
zA)9m`4Vh{VuTJntK(vI^#2~v8ZDl?@e9VT5>BUjf0;!mRyoqx8-#sGq6(?NLf(B&P
zTa$H3|2|$26@^nqxuam0R7nlgMIgZl0Ium<tm)WFIx-(AFIlHZ^tJu-L$vj?uv%Pr
z0jNiv(L+8iQa@*NVW8o1g#o;n+~1d292mhDch*kN)UFZsVT2t5YdZea&{1<v%r}b}
zJsLJoKtIP4fcXF|+Y1&?DZXXpTrwM%l9+`924|Xm*JL$4mrM;+04JT6tR+?ir;pz;
zRfPeeq6mrbhcABSCoIVmGH8;J$wZepqvR0qA|F;If@51KQz?P>84L_89F;LisfK{i
zj8_e$p~IhJo{kjA7{Y)+GijjMjz^XZB#2iVfqjL)!nbJYFpfSGP;b~@@X1&hNI=1C
zHS#)ZSOIN_s|S7eu$OvPi@*ttzKx<%SQiqn1pOS45w-)(8)0mDI@MK5NL}!}L8w|$
zAc21sz>o#V-T-4EGG=$3I{tw(5roDR)Lf828WbSnP|>pu754IA3>5+Pw=gaV9i4Ng
zU`;GNt<brHCzePV{mx7x2u`K=3Bl8WWdq9!K|&S@O=0hoiFitMeg}#qfRCs6q-s-G
zYCr}A&`jjy5<?mKdIwCYqS4avI9BEl6G$|}=S5lIdRF!@6Kg@Q08<&44CrAlsYFSl
z_u$JoF^w!0_Q``%O2LgpnyySjKZ900!E~@l224ndN$3SpMw=y|(ZqH(TnSpv&$aM+
zv01wK<^_7{1bs#%)1RSSVD<>h1qCZBv?^&9&7_Ev!8s`qtW+u_UsK)3gCycwsam13
zc%fjeQnOC7x@ZrRMTpM}2LH%?oqcmm#Hl*)KyyGQ1c|l+j~KXuqIw<d`NUMvR6AWL
zohX4hoqS#(XoJrSkVr5`3nGrx6Pf@yt~>_b?^Nn0PAzHNEDXkA@@N)iG<6CmP)iQL
zi5pOqIT5F8a<7XRHw^|Um~{z*TT(raD<<RI=3*8oSjJd<S;!4W@S63|fsa|MO*SfA
zs?dyvTZm>zAcPk-=1a~3!Xt>Z#Rx*9g_8IPOt)kicFtfHUo@uTBn1F9g$pEH`M5;F
zLXsW2iNn%l#E)fO!D=an%}(0mWS7to1Bem}t_;XH0QE7|JrHF@SrjNT0`)wsVMN~r
zmEGVzPmDFlO=dwJFs+1TMS$6b@0k{{m`U7$1z68W6&7`K<x0rPGL&-4da$QDs8wbv
zRC@Y~4Hq=Keg$;{?Isu|2VM#M+4ydvKo$syFRx8oS31~$z+R$Y^&YHY=|(LmaW@tn
zCd);^MNiklAP6c^gb;U_=^jCX=>dl$VjBfG9bmi+{|n>V%Ya)ABCeQsf_2(PV}=Y;
z$jeG7Nd!J&y|N!%2%%CymI3yZQ3YKaG;|^O7s_j7r{|a~E|Xj<H&@nrgLQ4x=;3i;
z>GLR80>GEaWYo$7OsGU+z<wo+9?0#4D1NAgig~AiDgxb(cXZZ*MTxpf<1JbY>!OyZ
zw~(SqrOg{AYXx$Nhed}9sOxNaltk2BNG}7oHjH2)89IC{DpC+0A&3-)M-fre@k>*j
zNkmSHGy1qXoSpy(_yKU10rUqYsEF_&fa_}V6PyKsFmN$!6HeS$L9y^eLS8#(R|L8b
zJzw-=xB{4~#`RnvSCpK?WSTQ)JWaC#2MPiIO=f}^`xX**k@ZLbvejV1p!tD77Fe<e
zLX}{tvXw4fh=)pK!vs;mT?DZ#`$MOI<~?GZp@oW0o-nB)xr-E70;Z*>)OnKJSDFcu
zARVxZfDqY~RjPoCl{(SEaf#W%u+<7fqu_H$Dbl60=xdZRMZblVG=@~ER`1O0N^B&_
zVHPYY4?w-9sdq5&A9yZ9iVAt@5y4b%>JtWolAFfMf*b5cpKerJSr}~{+P-80$*l=X
zhSoBwENn)SvH}`mGa0^SV4dZ74nXNN#7?3QQ1%2_@LR&(kq@(u0`>y4cq=w>FBX@=
z@%=7h<)t?OdIjnSoHPy#tPi-e&VBW`7RH&A5X&dAHN2WQ&4ILg4I&@K^&`-lLHSSE
zjwvQN9@lZ24uY{^ot<6C0_NyrblAfpa6OZaNkkriqb7k0l@fAMhbs)wV#LKO;IK^t
z?0vvCz%*+(YKS3J;2x)MXGA(+J~SC1F3!gl8<bHCxl*Xt5q^W5yZWS1G_Y(F*G!W>
zLTzq&pAuanS7=q5tW*@<F_jU$<Zv4@xtiSu5t|FhT+<sE236DwT?JN>Y5>w@x}Xf+
zn^Mi7yy<l=H|khoV>AhC7_1J^t{B<~kq8Bm0l~uXAYyZ}_E2%zPnh{CL6eEIdCanI
z!T6zvL<OAz_yq+x2NXyr_NDb8&QuGeFWPvJa23JekLhK>a)BDz!SF(0Zx(-Z-~)%9
z$uGMGgP$WRACSERl)6dYi~|e)3g1O(3DBSxdYnxUW|0PAV`v-MWJqwctE9Rly7(Jy
zTSg$T3Pa8n8j~8Nhq>?u&Xq$VjEte|WPgDKGWvoUg<2YCAA#E&x<3)lbbxNco_GuQ
zW!4HYZ`@>|u;|e69J7$n2&Fj9#3toM1V_;CW+)P<cXjCs9sBSj5YW^w(dkKJFfj+H
zNrEVc&K36td<Y8_G2x;sJgFsuX~JsyOQ;)Y^-AUG)L{uSZwcWB{3SwJQ$VCNokogE
z6H&<(wL4KhJ(e{nbm2}jOn(I>2hc@rgqYNs2HaPybg5-RzKpFn+&G~2gPKa2n5dKi
z{fTZaMuK2TCw)UB0BB>}GLV`~s+A@vRS=;CPQLk^GKcDaP!x`AmVc=#98v-P>9TNi
z)iJLNN3B{4n<MJMlGcSIQT(Tc;b3YqOH>)o85&!Qz$Ul?2nY*g63V_&){O)}KqaA1
z7GAJuTub?%=`g!J3%3+nmjZh;BCs=FBdjZn5>?8e#!;Oq3wHtA=G<GD`Q3$~FQYSu
z>0Dw`xoMI;Wg&6OM4rV+JxmZHL|G~&mAr|}*{dE$U<eJT*$jXP34F%lEHH`fCi7rp
zp_6DRbc3uN93T#h0F;EqZD7-sSgbN(z9a-$q4$;%V}v2fRB0COFYq0JG3ryLC{>$L
z+~6T-JwUnWfsO@qfzSa2XBvG}1=52!-L$9@A_Ty(6%`lpO-G+r3QFiHJ3_%J>qUsH
zsBi-U!B}LW@#0B6_|zO3U`3Nkkt9{&>lnszO;?!CqSPZpDOYeNkQxrFuiP6LdYpku
z2bGf4z(WK<+{)Ktie8aj3!O*QtiVDSSqek3S$sAb`>;SREcS<4{Y%vYr`WeZtf@zU
z_0CmPKLpEwI>W(b;JQ=co}||t#=Am~o7@h;Z70hB&ugRFpj6QXtvWp{<<i2b{_EHd
zN*T2{h%RH?d)P^kqSj<kI#Ft%5gz2!-7l#bQ9EX}{=Qu4{ZQ>b@vyLp_rEVa{CM%w
zgC0RrHYoiYgojJ_|GMzZpaa-}aq8HK52m(RBttx7Aq#-eAR~RijtPO7wGQ}Rus0C!
zreO>SVC>2hdG^?;q;Et0%K=Ff{G85PV<Jn0rG^_oVjX1;Y<nHns4+-#gx(G)aXtGK
z1BDLI_m&7#319$%V2je%1JEPj+J_llCPFDDZ;g9un+uL=60Ic8;lvC8rUAfF!>7z*
zBIJ<9AZpxbB~E+<D4}50Am}*6!(qq*vuc)%$LVgd$#|HuS`ZZw93~DFS{j#}TN;k6
zz@9x5K*CEZ1{WkJF19C_LE+8E=KNx#a*~y%;iV`PT12S@1CG>M$Mm`ynal_rG^~r8
zK85(pAlV9-XK-vflO@4eQ_zQq0O`4TXdD9ytT@`_VPs)0F|3_LfiO5H3J^*xm1}YY
zhMcBsxKw23V0EEqs&)Xf3r_@o$%ko}u&f^c8xpuMo@)ABvd|%mUB)N^0BH2PJ!Sy<
z*&wuth%$>>Uzu?A!8&GEjE#F>a&2FTDS<F=k%u4EpMtqTkUJ1>9K%syKp05>h6KZf
z)}%+x9#b**A)1Q)dsZnb62d8qjMJRW`i{$E;1*U(4G6$Pp->ofTwf+?iW0Idd=&D0
zD*hBBJ-s67QP3+AHvQjRqa&j_|67-6@TuZ0aPC>k?iQkIq<_G}Dj`+b51(4yG}M9H
zp*?MMgUb?(Q$Y7J3|64rB^o6Kw5CI7cVObF*)wRksYKuj%Z51hn-bs5K`a01y<BHV
zEYYo;H7KsdPBmcRI`!?TEcP;m#qqCPrMMGl1`Zrm62l1&(}LN=098P$zbVhB@qejv
zQK6FSo$2-ZciTwLf5k}ZCXNgQYh8os5ipen4HAUHw3uTLkAy}v2Yg^!s!#Ti_yX0s
zz=rfVl1RS~$A=fxH;k}Q0O-TUpMfA%N~EG-^aa~a=!gPyLv47K6go-q`3CcLk&?rx
zKPXiQI9`?Lo+zd6xJhcf+yL)0hSx_yJnZa-;Z#UR4j<(6Qk8lrGIpPE5eHVAN*y=Y
zg$K;Q5YZ(Dz8Lcy>t9%xuT!F=IBZd}Z7dr04s1N;5(C=!rX<*Mz@1@&IGoP>2WLzw
zAh?H)@qg!_2@{0>X<sertDbchfc7k(1=Md!d=}6x|7V8*)q@fp2CPwWRVdW}d*oc$
zO_S<*24$0IQ0lV5M(1H}6^vx;St&ixkTX(R43G!`GzXHK@9ge?`p7}cBz<&tXDtcv
zR52PJg%Hh8EUGWq@k%i539cKxMu1C-m1+)N;Ua89Wcw^G2R%U~R^T_lfbI3@My=<x
zNAg&puEIvU*{jGcOgKeQP7z*Y`1S_QDdG<^;`9{_L~6jETL_%Q%JgN)9@0xN!n0H?
z_aEqy*nbZOIMyNxaB)h55<1ECCUtvSkX?!wF`Zk>LgSJvSf#^kVg$45yI3}OFh~2u
z{s4AxEKchF@3c>5388~yEg}a~1J7izaKST@fM6lP1uMpDh^9p^qjrasPov$lpH9d^
z$*D>?@*^NI>GUWXOT8#d!IwvTS;nzlI<?;!bcxj+i#3Jd_kmG?CFm>l)IZ%((2;M{
z&x1!IBE><Gg0N7LBnnh2e|ICujj37!;S2fEK;0F@fwgBAxdm?njze?Fb&wZ=PPTS1
z-~HTFPi`7DTwuWH19VJMHR7oO2s1c)lBA!Sw1}3S$+-V*Is=jzBQP2?L4fRmWq<xl
zdQT7?C5E7c!p=XY|19+xW;;rapu|e;tWR`SD=5PYm&SivO%|ghzm;PSXCG(vqvS7)
z0Y+U0;0r3k@TDfn=Okw&RfE&GEQ;a3Jn<r^n<YqPGOGSH;~8Ap!o-0;GW(ov{*8yA
z$?QT9rf<xz<=F-Gb!PKR25HeZfPeYjm@&sF#A8w`Ktee{UZ8{YIwLG4gfe17nUkai
zIZjsKNf918yNn-10-&85Bcco0Nus&=+5oW+PT!03evqdkB85>=ah7okEws-%@6}3~
z0wsK>0>(SQsRit0d}6_a37=WXMqra7XB+LnK&DEf5v(JVOk<b?2*YS7Dh6CHARxv=
zWl&=5GKX%wWa=rL{R)={cOZbZ#zf;6KPFx(a8E<m6LgP&@584N1REF-7$A+Lt4GNQ
zCX%#RLq<ofk%69~V7UOy!badm4_Aycd=hI*DMRhSBICu#QrNYG?ii#)AWI@f5Q!-~
z%mojjua$}L$Te8{DpQg{QU}10+1V!uIU7{tK(7$G`oM{?(=$V435naMf500NP!gRA
z0-ibz*rp(O{Lg4pVK^-Lq6SKt6ahks#IhOnO_ziM*xgXSQVTG^iQR3S+Cv%az-WMY
z&1x1&n4V}t9QYyjpT%C8gPDd20wTo-CPiYA?9CF)CMxZaQl=c8f6VC<A}#};V0I^W
z`0%<5id6AE7{wvAN|ltu0!Qh4pmjh>R~Yyl`aX0tVC^H<pc*wP!NpM_r!8<aXVI{m
z2nHrz93C16BmxZ{QW<J0fnrc!Ei?fnVL^V#e+WTi%sKOrTHp=^(j28KIZiZSCbIxf
zh#CO=+3@3{aA6?q0PdGS5>I{#x&ZTD%sOmoTqwOWUKAb>8XYJs5i=QCZF<^!0n`6c
z_bI94wh+rT&D}(p$1@OBXLA+_8C=$M(lOH)OA40LcnR_IO?kapE_%`nj6P2ozEKJu
zL!Ar!FHFV}-kVTcVq1W{V<9gltCtpuAlTsW@Fy;Moy-MS+C!MeI4-F@J#t7M^CP_W
z3~{cW=2%W9wDwx9C1QJ&4#Xk0=klYQ-1zxi;$)MB%k444$BHw^EDwyiQL|Qj2`3!w
zFs3pBpQ?l$gH<8M(X`OgL=Xh_EXO-|&MA;4?FA-nl1zs4W2CwyXvLB<AMYQAix%%?
zHuW{^8F2dP;NN_i*aOdA4Mqf*+Z<>{4c4zQc?`&2gSB~@R3)^?eBIzj^?D;zmQf(y
zp-2bal44=78i7`2f&$&ifeD7B0t!N#!+(N~Q8R7S>8VpkmBj!8lcnh83kppk;s>tJ
z!9gt4H}X4R4lr}ab?_iOd|_BbR9w6yDl$4C>RX9;=;;VSqy$lD0vvtnC*OZ1270eR
zf#r7}?HUy?iH?X6N0y)`RFB2sLGg(G)>nywaNm6^P#7ZujLy;F0lwdQ+7>+yOsM#P
zZX$`0(8h=(?8Agn!QwzZXoDJ<gRenB-GC^|!3X4l!pvOA78*$1l+qP)KUkq|-~|Bo
zP=5#$C)WDlmwhBMDYeA?`1m3G4*4gy9rRs$^efcAfapk2J4p#c94QDA!g_)DfKY)%
z;=_Hn6*LqbP`lo?B^WvyHD$R3MuYIKFe*AS9OjtXhXzadk--uNU+$1#qAO)cczNmJ
zpyDxhZoEe8L%rhVg~keAC4Z;K4VuJ%rGD*aTY~NhqT1L?`3Vlb;3T7TS;^i(ub4Dp
zTT*G@HDEy)d~pOwjW(4D^Ihg|W^y9$@+nC<a74xXi?X8jC?G}_`0{POVQ{i+7>;>H
z>?o6bCmQGfTlu3L)muz~99C%rV9rcQl0cS4lbf3Y&6I1KoK&W_T|LARr!Qg_zX_B&
zV6!(8z!kRQHt!4O2(~dd137vYsuHkdsNYJyuc+YT)U}OLwm<k1c|2Ya8X6A@+e`2w
zTHev&;t2NQF6n#Jw)o)}i&b1D8D&&3(4X<5u!zuaJsyvqhi)iQ0D?TG-U_Fzo6Z2a
z@4H_TL`DjzlTJu=EK(RB6e{)?gi=zXbinuDh39Gs9*YhIcw^x~p+d<I%rxgUaIp-f
zC%&XFOX4I^!myIR1<;b<*y9C(fuL5kL~r<qiUUIM8}Z@dC`$a)Y*2PYxG?ljw7_~h
zUeuYb)g&RJ2zagoV6%9r<4Q*I%#cXh>AA)z=4L=J6!5QXB5qP!ig109jY&N1Ky6A@
z&>mJpZKzi8Q67vGp)we?lz|4>Z-H*)<N82&=~1Y$%?=aX6{$)<o}=bd%1$Q5)}Njs
zY)K>rA&yQd6JaB$*YHMXNiP3XpoQZR^70~4c8*hs#zSFeqyNo*A&7-52tur2_|{N@
zOAEDJaI$r0oLhjsoY>3G#q1+#7jQ5pNc#+SJjlX;_Y_o$ohmgT*h{)uCaHjc2axn@
zaP+}yR4<$6U{GX0zVJmi_`hv<&d%Wc6Ud=gpMIYNw^;ZxdgQA4UbDVb8*V&X)VMEb
zv5hVM2#Rmpf(H<brY-s{7A+mt1_;M&*ls<#X+=kd>e*2S=)t7rkP3XgXpayCIcQAV
zGf{Q;Z%{@HGUdckoWyYO9du!1$yTYy*{L!qD=CBp*MO0NSVXb_49iGuHwdHwx&$+=
z+3C&%9s`RPeh73CKnoFBn&xnD*vH6I1E|k_o0ihasHOp3ATk{^%6wTrW#}ZNEI3~-
zQyhffVNjRQ*G)<j5XlO-rLz}2GQdzlHLk;E`(%^`7-`^S^5W*d&%4PH33bot0`ND1
zAv}N{sV#^7JrpD$MTQGPEv6T#RLo{Hr3DApC@R3g%(M(yy@`($dPUJNsVRjHLDN=Q
z#0IEcNbt_;T(NN=@<@D8q&Pa_yQ6?eV8sst#Z@d0gU^gZX4FATmj+*DH;a5g^e9aG
zXUX{of(!8`z$6gvl>^gH*QjDOsb!*Zm>oz5^27l?KTINJfsRmb^g;O;;3Yc;9MCZJ
z<H3Hp81*`r1f|*qJo?{+H{<~CNuyEyQ=tuL6H}W;05T;ce>%@nN)|Na=aVa?FmJ>j
zm+$al{xlsWbwI^w^d_Pv6sQ2C#3{|Apnpk|ztLw64g?x?YJ7J>p90>uL~bGpyG9G@
zSjg;wJ{K&A0q`DCG%5$mzeAOQTzDA<odyoT2w7STe8BG`gE7P`m|Tqxc&#Qwp#$MK
zSd{|wmWh1L);w_mBNrVn6@;4s^Rt3FpCHv?vQ%f#6R1|AQc(vDdKNfQ&s4pPsr52}
z#(;qdfQ1G(wqBM5tIIVinA5?Ozd&pp8B`By=TwEWF~tQv?BakEtZC*d*e@KIqU=H>
zJKBqE4(=8v2|)fm?9)du7>M4Qd;GERX~C(7z3=Q!Q#%>(GcgWogkKDo2>3nYC&i(7
z_N|&#iH<QsNSZ9GhZd5lOp6KF>z$^wOVHrmjOHH+KN%&Zo4#PwqjCTg5YrRXCj)>C
zL6zQWCf^k84Qy^m(e?nmAfUD^IviYMB88y>OasV17l43CH95WN@kX*-VTMG<Q9e8v
zj%Tos>7)fHIuwc~lGlQDK`GfFAc1scKLBM4r2)A1Kt*If0+I#ZNKxVx2tib<D5N2%
z6-J#W{t6W+4o<-J7bro1=tw07bHpjc1oup>!3c09hrk?7(KJj+iI51zRQa<GCF<9Z
z3@<7;QYZ+Nm_p6dXq<8wQ%c7+vYuu00r6q3S&XU;z$K+Rke{H!5No)Lt|ls1&Q-Yy
zQc`t{4JPLE^v#k*V@&&!vZzBi)tS~ekS`AA+huB1N|_R5)<NoclpYF!YarIE8gb%+
zHAxsFf%gitMXSJahR3`iLqT;iAR-#EJ7fA3&>xsBs8wJ#Gp;0QX+va8f0~GpyClgS
zG+3cZR~VEsDFS~4rwB?jGO(h1EL)q-1IH4kU3DxU9Uz?4Xi`wssoKDR5+jvFO7Ow2
zpv8eSE*J*@b_Dh$5-P!(gBcfkqYmvJpo&<j+N{?2+^kS|bxOzrnYXBL6Vo-)73j>(
zP{)dU;|8hjfZjme#UeqZ(>Ay-(Y;67SYs0VGlL`t-Nkc9#?09%>}^7$U48@Af}O&X
zh%=0Vu*jjwrCx?lLrhRDyB91-O+q>RttDho;Dv&{wC2mnnWZpkKQrUXg&j4Ye0sfr
zMor`j{VN9MPKPd<pc67!V*t)1_K-|N$!2C6WkW;+!>z?u<rcl@Wc1`V!LrnlHm0i^
zSVax!-U14M$<(7Yfnh@c#Vh<L`zWIiFt)A6;KhReAWlhwmI^FibE6aLX4+mt1GS`%
z>B_~>P=UWV5*~T~cjQ8#&d^Rp<<lSsS^-n1X`%7S4ns_(7$XyYgyjRIbFOIkVe|;J
zV9|kS+DDidWJxv_SdqXT4E<K}H;NA3{`i)Jg)fF|sZjTNb~lQ>*+z$j#)pc<Ar|f@
zmcctrj4fywv+Xnb+JKcTtX$;g1Bk<dsM~C`8aX+_G;)OsDcn%0I>`viY;0`keidUp
z6w`eSeaFY>F7W;U>tYp#WR09%hOdO+<0QitPQRLDULjaQDNKd{(7C7`;A<q!z*1Xv
z7JP-kFrleJWx6t0bgQ9kWMvXIl~Fn&GP+>5MPVXzWOo(`LjzHk6J>xWYpA<9%xN+;
zf*8^-<bQ+UMH+@`9qRK`#Qs3_PMQQ{!GhI*CQu@TIz}JhCJrlD-$$c>MhxI(M!Ah`
zU0br#KVTC`=uid8$+VdajCF?mC2Jqd8;$BqY<prJkx;&#q<JVA+*h!5ql94*p|D<^
zWzC!Wc`PLU@lDgb_;fZA++2Ll9!GBHZ|?Gvt4D&TXjwn9PYvWv<)}qArzEGQpQYMJ
z!Xsxu#e2biNIEdrgpm-y5G|=5{EmdU^@0Q+%1lru!^DXFP+}@T5zIPYQ1J`)NGtWE
zI@oL^7R3?(!nGC-N^+;N9EmS(s4{4GlZnh?4;`X3B&jqBp!N~Ru+a4t=`W<6IDUEy
z%n9GB{2?6hN<#5P-KU%Z2Ro(AA@@57eCg>zPfDvXh@43EqW+_8iy>_F3Kb<{Fb%lR
zG}<5SVF53pff6qZb=Wdu{RTb(cuE-gBX-JnWDhpT|J%}sY57=j{($@yI!iM^%eHtD
zNbqYB^=y2wNEiupLEPVC^+0kL!t?@Br@_)9&P_Hj5(IlSQcqJ?JrL2b&?EDH)76<J
zTs$NTnX`DvnZ#ASQr3+RaHG&DKddBB6b@hyHA9+3>Wk_1#~gGh2NPG0)1?N@ndEf~
zXJRr_p}=tfBBWYV0az%U1@|XW7)bK~xc`EvZ{0m1Zvrb&S`3#$0-!@PScFyN>C=<=
zJT0K)?yF6LNJB}=M0|T>RmcJk15OqZxn0VVH2{=@YK99^4+)NdyBwv-vdZx_yi7=;
ztOU1z%tb@nQ<9h<I!ilUiF!*tC=&8IwC35P871rtBnJSKH8I3I;0SWSS5b~AX-p%^
z^hHB7dxWvap=21Y6>eahLlR30Jx!4^VCY5)Ya!skFdYX<dH@bIUO$Z{71oyl2a^=y
zk#P~N*uw&goC_U}mpF86iD0H2)h%JBRz=y1sGv!qhv@oh1V9cyMNN8Vs<lK70RKwX
zX+TDWM4Cw5&yZOq2HCJEsE@>s2!vQO`}fSNGfxcoGI<!(4_Z&u2icPf-on9KXc|OO
zswE&wL@5mL8w;BU>=iJj(E0EU5H6MngX0>e$#hVPziHAqvb||{y!o5llsD{)r#n;(
zt1Iw#0a;SYLGm}02yh*UaK23;N4fbI_L#|lYNIFhGPLUOiG+6%(>)8_50eD^o!w8>
z%l>@#sh3T=&*`3J=_6$vA0~*64;KdtCErrHsnSeksxg(hB0|O)Iq5E4K7ud2sDcih
zH*|VDbGbzCF-H<~J%ekkp5wBHIsmF{zA3FGWnB=Ac-R8*!z%GUgCPoZ0)SjCg&v@P
zKvYWbZIImp@D%`MbSB%1qFw!jnw1q5K-V8xvQj12Dd^N5okn9oZ8F1QlMJxNLp5hn
z=A_&Ry-N@c8DS!&>C6*AvKEbjuGD6GVacb-b~Ngsh6G%VI;9LBhTx-27XpP5QNehd
znsy4R7esW0fMv{HEXi7-j0@o8N@P1N^cK!%0e+GOwQ#2_T&Wr_EoeZZC8GmZGF*~N
zhp*riBxK=yC5$}^^c*OZpx%e(tUFI3X9H~@nlEbV<qA3<53v0a!EswxYgl;7xR!&x
zZuBglDX`ONOi?$m^V>idM-s@LfxIV7_*IjtHR9b2EpavSe<Tua38)_Akp2#YKj;<k
z=jPw?0t7rnNNDtQxnl9FHTY+w8H9pjX*Dq69|>Lr;0ZQ!6FXeVs;0)a0m@}$lU>0m
znJfkP8`W}Wx>A<J`r+b1F#;y*5gY-OEC~Z4;ZlGud_?fYK&mMregoOZP!V*K>VPHU
z#S4U1F)DDzOruf*<eddeoo@zkfhQbHvkF`kfEP|Ghr~$+r4yx#U~ZI3nxIh85kIsZ
zp&liZN2j^aZoOm>G=fZ4QgSAl(qXYp0k-VJqC=xZ@lk@vAYqh*Eq!Ex;V24&u>~mI
zwJb;j#+(NS7zlQv?$HlAX`tEU#CRX@$Ouv`NN9CHGY7qd03Q#07POTp)k8pYSg;3^
zFwiy!aKRkQ2)tnL=jQC`?B?nK5lfi~<eLFZSXdocYFu$xyj;>05JKv&fqx;3Mg=J3
z<Op63L}po0IIywcx=O?_K02kws0Ygrxf5Xp958a3GvC0{m~=*pX=jV^S%Z-WgbD@W
z(Gfp79-PvQ{HEY82;fu=3QJ}d(f=m63uJI~r`f$RbF`QG-+%u<{0}9tqW^8zUg6on
zL)K0v_e_MzpSS{-T8~-r5jssGOpAzb=aJdY1CR@r^c$|!f9*Ux;GfigCV#eZZR6oy
z%FW%it-Gs-o0~_QQm$>>+PStb#dG~P1YiV}tUTUdZpU<R|Bw7PIwCx%icJHs`>Tk8
z10$(FJAwbNP@ekV)ylLR)E}kwkwN~Y*5x-Ur2bJpAtWfU)K~C-3-_*=O8umwHaJdS
zs#K+#;QvdPnmxB3^+RieC`@R5*}7pRw<?=GbBC5H#VaKW6hviSxjrZBbYfE6{XwT*
z%`Li~_pa#e)Aio-!grl33{M&|Y37DuqWTe~h2^bkrSZ2X<mwW;gdB@pk=SV7Mt;(@
z&hvyT0vpdb*mZxo!0OeTRj;{e#qO!EFI~U+W$&lo7bw1DdUufy4zHJcM)Ky%t1oE}
zeJ<^s@o3l6PxfDa>XmkF$EUk*wto&>zkP@9{qr-0h38!A&G&w{qe%DE;fq`8g*jf@
z3%6zOdKoXYJ)vypFlYRI>k+rEOKT0-aDD#OuTO4%X?m)D<u?;6mU4*xsr-S|kQ3J7
z{pJ@`bePa^?Y^!RZg|WKUpL{7JUb&JyY7asElO+iuk5eZbMx;XJ3CBUxq1w*!puj8
zf?pT!x&CU;_8i5cbB2#E22Cl|GyKYcl;0kC>-T0gv&!$jiJx(?!<oX@2Rkpx3fX<V
zXzTUsCm)Yl{IKcO507?TnYVoE*E_k*D*N|cD>@f-YoTMvLD#~2Ss!-|npB~>Hn3B*
zUrN2IR(kN(ixmYem-P#o>#=8;^^6Ju3p`G`UF>>k>o1$O%qsYxN$$0CeX>>VwtGFq
z{8ba1#z!pZtsWDvF1M%A^L<l#{qoAE{;`uQJ6;YMwm*pK^ES_{hut6UvOOj7Ikm5?
zsKJW$4+h3pEwy`9QFP0DW8=5nzwRD<q{`0wPhv*>+~oSjj@}N}pOyMJD|pV>!;_DU
z%zU%uk2=kQeMAphuY0ky-iRT;EW10p(g3}*=MKAZ_ewvi*Ly;4(v8KV?%W+Y%zE9&
z(TjiS-FE)CDFX-Xzj(2ZwED|@&7g+W2EP4iT)V?*OWIy4yuNwgzM}ozW?KiBjyLv=
zE4osscy_r<(B%3*m)qW9)4Ps4x_<p_;KRF4)_X6_>;2}bsLW5>ZAZWE<J>hcuT#IF
zThe2#4*mA>R=+~O>_yI9o}5({`u$}8;<PU4%RNc<{l@tz*Rx)pKl!tBubxRh_0Oob
zJa{wxdC}kzN2(s17=Pktzt?UtF;!igRP%fF{Ls67OYJxP(K>AS!Ib;=vS;V*c=T{x
zly&y}nde<+1h?9$NLhJubUAU$Mf=~iJQ=v@e9Z76yb85<mTtK}_Vm#+3zFwwTb0#M
z+<b7Ny}*BR4?p9LPM@lm5zP*F_;v1u3jtOGM;Rw<-}_|zo@WCFbzXB}*}65y?@a7y
zICb>s$7B1>rk)OLa;%_c+LCsM!@avkP0!tMIFC2U-^+U1oap1fdcHinXl<Wt`Kql8
za~Ga<o!4N~wX-(m7Fo5l`n6`G`R`UrPQE?8_)yigF+cN0&0H=UKFOnD>Xj!|r4RI1
zQ(gWTZI{0|r|!~1xo5q{18#LFt9^Gp<J8`l)7JOTo?mch@7}#%W(4kUvvGpt%BEI=
za=Zq8W2DwQ4+RTug)CpT)X{g$S+|+>&K`7gwA%X9;1*8&q-}i-Pk!kVRKupKq1Hvu
zz|=x<l~3yg>$eZj8Oj$sj2Sm#gktviR;^oCx|Vh<b?%^k^NNaw{PN1s?9}F)7k16>
zomRKH|NHGX`aOEL?{*c<`)Z;od){{O=Utln`|MruZ*z8R-y%PBrNc6-D+eYE7d)Ci
zz&Wf&=kasgZacKwmYw@<&iR(ZyDVB76j$+Rhxd_-+gg>WQ8A`Rj?uI9*owyJI#XMW
zE=<qPZM?^3ZOWE)le;X=xm)z?lVsh_x2{u<my$icAgy=Pc;qM71;5B#23)wleb1|T
z&pvPL_;E?cU9WGv%X@b-HpQ;nV1cqxlGo(Z{$m>M9^G?hNRH>0yXT(Rf9bd8_?mt1
zR=gki?4#d=y!Xo<=d=_rhzpuMblJ8ti*{u6+7RtL<(}KH+yrAdZ)qPpW1|QCDp)`I
z)V<@GOIOE^J#qcWk=jdhLXS4zvaaw=9Zy5@H20)auO2L!sQvV0dz(g8qvE1&8@&3R
zDljZ{sW8~bv!LkpkVBU~pHR+j@Ar1&#G=C=KYk3FA0l2b{b}^8Dj%xYP4^kJbxm^H
z+C{$>Ty8jU)b8HzI@j?^y%^JN$DMoYJ7qUn@bN*sR(_zs{p!%{8*iRvJ$g5=V~U+d
zo}K;c&J`}YC!1~^&r96s{;1x>)Qf#*&gn8TY~zLvjgr@%xw2vVzLO6hHt2eH^KN(E
zsY=eNJs016CYi2S!5{msdrEqqeJTG6FJ4~Ne7<t$(oOBv1)uZRSALrP`m@-pAbtJo
z*Lwqh`?<;2d#_jTyE3-iypjH*dWSE)`1q`7`)}=Q)@kt5$^CX!%cyr0Cf;~=qhkw0
z%{?jcNA`Ug(lkLS9a1)E{q5!5`nO#$KsWWwh@j`bK`|Zb^qBHwQJw5rP8mm!jA)^B
z?(oQd&6Mt!l6$!|2#;{=^l0{`T+z8&r(XTxF>k(G=Rq&WUL1UB&;7OsyuTco*u*#O
zk6kZESU(;T{vhvJm(gceMT>i0pL%oP!nG5&ml<Mbm0u=&!;bVE^%_z8s>$!ioR+*V
z)4$n{q9Yk?+BCcQVcj#kNkcn7I5(k8o%>h%7q#Cve#aU4s(WdfH<oM}I<4G-t@AHE
zdH-R>srlKt@i8r*g#SEZM9cJy?PnT%95&#kt@Xk)$<lF?1=aJqRygxvaj9#@OTC`$
z_;UBnim#v6e@=)@Ua_u6%*|Y@n^wOKa47Y0m2abuFI$FGS>-*s&$?#M2bA`=9l?9q
zB5w-M>S%Ia!R~!y+wUu!bNb3@Kkc}VpU!o>GtFgChpg$hMWZ*?S><y-`=@SB<@g@8
zr=E@MAh4VK@?!HFCwGN*%qeg<cYG#)USzEf-n)C`C`Q*Rv!tzX{_WgDuRho9vP%7V
zaa3kT#yji0SKfKDj9o=?pTbqQ?oPSXNdC5C$8D*-%Y1Il`|7`QU)yaiH^onm`W`Dg
zE^VAHq`Ljk;Eld>7p`5>u8xCMzSoVH+qO3!I_lk*OHG$=)(vl{@i|$=ZbHqL74Iyz
zU0<c3$(PDwMdQ!+nOC-8$BrJG+qQY#J#xC@&7)66*DrZYNnE`3=(~V1woA?=CnPPd
zP|&DqQ1!sB)rY^UHR8ieeZF5xmEaYHdmAP_>)6y`cmIV;su~-PnYOL3O8i^cvh)S6
z6E?>W4{Igj-N-E3l;L!-;+n?uYsD^_nLJUleqhb{1-Bc0{_?D-Nz;$FtTS(Ep5D1u
z)b#!VJ7=v@|Fg2Y_ESjjqg&4QymIBf%gv$%4|evlRg@0#YCHGpI)}b1j!nB9Jte+D
z=$iGBzxHrHN@<nlg)<ZNRa5&~O<C=DX4moyeQS=qEzs`kG5yHhxY7@vsh{R<xpt~{
z+Z7t02cMRWu&>@?NK}_ir^QdQbC-N6!@IWae9pVMu~C(Nwe_u-XLlwzckSgfPW@V2
z*AH_ac&}j4gzbYKd0&nlH136A>eGW|)(`kynm@O&rlHP`9XAWM5B!kSq5FW5Ba<f8
z>nu5W<ofK~4d*8a5-<Ng@5<MhsD9^~<#|0n6zDL~XMxwLN{1DnC_#2{xPO0FBcBcJ
z3aoE@q3omgYgWwndNe<*(l2WM!25?6f6TjcqKWnHCNnyhZ*3LQIQq+rfFlzt9-H-Z
z`-e9N)hU(dR`LD(-Rob(`#h_8bn?=3Z&!8Ab#9ks=(Z_!diD2n2R!5lhN*W9oYj8M
z(<43mzZYjdKhW#f?pkHYWRJ4bhdH!w{JP@GS)q>UL2c{4Yb$(`xm43}nzCK1W!ayW
zE}art6kMxi&|LkU)sFc=UGLm9dij2xHB8twbn^wddfmyVM`s0nJal_rkKIeoY(6ff
z%#iqo$9DS+X(aX#s^cE^o9kC=xFC1nt=n%lW$MI(Y}a2qT2NZ+GC?eI2#<R-z3<rS
zk%QA8zpJ$F$<anJWxJFOaqKf__RF*0>+jTdsr-JdKHKMl_VUI%tIG-a`U!7+RMGCg
zIgVU1X8fW>bq_A7Suk?v)tMLjw|OC~njf1o(C0*}x0`-i{cyEs^rd?Z=NbJDR@t)Z
z?a`@UcAgA5^3-U3cTMM_gI|XZ9K5an`^l-Vx~24bGi6!Uk)6H)F&7#=?Kih)#rx?a
zXV>j{@9Xnpr=o6t$|{w+e%rmGb7xyo)}jBTd*xHgDW6s-J8{RVnJMKAcghd@?X=Rq
zbKR!*y4`fYd+TANicJn}x*IHx3{4v0nzOlITj$0#cDSZ}TGh(Qqgw4NzIQqea6aUI
zWoE{kPu?A?*yc>xtIkN<wP{m3zfXq;1ijj{GwOW1uL%h&S605W#h~##FVSsv`u$X+
z>x1LI#0C4O%^Q5+;$^?rJKJ4OkX*CB{iep}{dIq9UpuRC<(7boL-f2xne+XFkH^f`
zUAUI>smW@Q&w?W(@Ae5<_-aa#^nj<kykF;`%cnJ8yHRrT(C$^A%2_x4>|bZDew!ix
zsb^CA+^vT^=E=Ri8!oTEYM6E1gIArOOxqV<)HricwNjN{2Z;BqYj~pEGArvVHC8n4
z?A?8ly!)=_4>o0F+!~#7@zZ_5!fLK{$7Z@1*Y{K(UpK}7W0#0alP*ozv}s(c+Dd6e
z<gE{5?3_dEB|22Ou&{m?uV(g1J2uSQnAdINg8Gh6?6=6ghn<zaZ8I<}YS-5f{XZX@
z8dUH!*gxyShbND#ZT-+=Mq%7Nt=EE4o6hgq*0|#2OI1_)$mH6iT{|4;66DbGNa31-
z3k_vQs$AJMV&3REDI;D^|0QW{`l!ylQi2B|{ly8@b2<;ssg>EN<F$K(ie8k<dzybJ
z_l^I^JLC3B5Bz>|Ztj-Dy2SR;<!3KixXFKu?V^w;>()3Gtd%|QSt_=Nde6>F6`Kya
zyXyFgeK~dywzYbD{c6omSJv!)`RGn?g_gJ78_t%DwJGe;h&sY+&Kve+$c!dc@}69q
z@MYWBvZ?ci`Fn4?5*cW-W!Q)g6Fr|t<cU{YUs%I$ZTQq1{$sC(H|evkzH4DMht^?n
z4cEoJbltkYZdKXBZB^~88n+&v(?6`nsP%D8XSeS*dwXuRCj%DTsXwsmg{z@^>N**w
zHr;C*aM-i&oqFri@A@9gcHDS$@`VX!J1uIKbGo9H@Lq68klmCm%^jv+>GE6qmJi!h
z3-%Ya&N=VD>2%10w+((X*4U-5_#}MnoQbD~ttxybetz2EAJl$GqHV>pqYFe+BF<E>
zdJ@Z<#;aVeQdrx4-fc7PPrLe}Wzv?SKbE+b?zX_^%79OvYX;qWJmUE4E>0WulWPoY
ze|MU3e5^Y7;<2g@<(>qeeO)d~>fm_HezR5H!ulasCLb=n(It6gjZ+ds_hyw|h0T6a
z-my#amWC}(w(5OLI$qeVWpwmZt2!;6uEfqAk(+MSB)RNG&n?6D&p5oO#f!s{!#_QY
z?Xa(Inr7eop)JODe%x!o$M**-`F*&RuL*ZwyZ&;`+%4XBM7?VTPsl8@Yf+z_%O12G
zdd0u_o}SlTW}I4_9$RUCDY0l=O{+!XHT7!-HWM_edAOoz^Jts?IR(;jN4gLDd6|7m
zf5mZ|u-@J0mTg&X)U3Xe$kP>C9-<8RPBU7)O>;Tv(RJnf=Sv5^4ffno>*kJa=X-v6
zWSw#2)7;QAXI;Zuzf-n8e0}f82ldMy^{Np+xaRrt{_(f6$~;Ou;`lQ0Yf9_t^G{vt
zS=MFD14EC)CtG!GUi$V=husF&8Zzl_+>lxmR$mO}Pwgp*n%<(i)%=CmBZ9@JN3}@f
zy|`Q1P;J!ksiW=GPV;tTv>%kdchl*Y;nGKiAD;Y?SMb=TV0N$6-+eruT|YNY7}3yr
zM)oeFt5f4eE)`nTE1O^1f3Z)~jy7?zs&lIL%B?+uJ07f7W)H8^htmG#f*p>(oU*^0
z{q2>%&fc}!Atd1O*tn~&6E|Iv40W9p>%FFb8ow}b%D~%uu8n?lZp_zS4Sqe<F<`~*
zdFxVkZ}(n(%Fm^T&zcT%VxE>eHvMzWN~3FupES%Z>)pJweA<$A*F&>gi=1kC?_78_
zB~80EWX-vQ6J?)W{T)X{+1&depSI}tzL5<p^gpu2_49>8*_XT5?DDgV)MayKjit46
z>h!IY#&cCqPuEu&(bVy+e*5E)kCdjWyrI<6Q$AmNo)}cx|LNOfZ$7@7S7~Nip`YXJ
zH+f4lqPyJ|32Ig7FLupuoHy8Y(&5pk0=!3hi=8%Y^7;9N?frtQ@1>*kFEwvw6lA{_
zbgC}2cj@FX<XKj%>~hn4je0+KyE3WU^lA$n3Rg_;>b9oRQUBICIm?7yVtZA1&^9Mi
zyCm;Iy7$d<J(Kg=HcT8;^|5ibAoJnnvmFPWS+#8I^?@HppV@Nply#}W4m&Pg+f=7#
z!w*A~Rxeo5BG2W);VCh#7hV`M^L&%`^JD)w?mND&uu;z^x2=D=m>+82tVQp7)$QEo
zH|6<`j&NUB{o|N@I|hxtpbouzNZZ|gZiDp5V^gz7o}8SRUpH#mk(z_6+m{~SX~VUg
z7oA3=ULRd>Zm;3ixKG*|t9Pq?2fSa=yZ?y;GkRTl{OWw?kCRW`tJ=+{@dTgV{Z8(A
z_lMf2VV}q0m8Z=T?C>bq=-Kq_9P!N9Y8|UqY*^*!vgWOwIyZc{Y0sYfwv!`mt9w$b
zLwcUqcT2@NsSU1Y?b=)Q>a2s+WV=odu34!`>#zfjyM1^Z&Y!5c=##!mz9QaTROQb3
z$eiiZrr%ZUEhC9eP^Uf(+yCo}HC`D%|C)L3;IA?KXK~5d;|pq*zM%{G^7-VN8><}K
ztEyDVt7VuoW?Jt~jk4c&4L%!ud1i$PjeGY|DCVD~Zt%As9B_`zvOg6tCa%i!gOhk;
zUfi*&9e8VhgJEZ8$K+ODzPz?*c-!+rVO+g-Bg;2ZEuRp1s+J%oHfN^8(Zm*U>elV7
zXXjpcJnc@T^H#Z6qXL)Qm*-QTgifz><;(g7Hyx}`8GQz(xYSvZyX|hji;s4PiPqma
zqwZJXl%29gX4+Wcoq88)+_P>JTevB<N6xb)ITJTlUDe`Z^4vMXMVrg7NlDzc;K;k$
z;@pXz=?g=Gre&Vh4Rk*;YiRC<;WHfGj;J;J%eY0$qsK>=S>n{X*8=-X?|TQN$9O;U
z%wF{Mmqp7~?$9m@Ew^q_&co6L^IMn7u$nU6JtFy7U$NWGH%gDhv+c%|8Q%2vv5HQ1
z$-hi;6~*>_?_sM+9O-qS?8NxH(;vlb%$JTAT&lhLr9hcGt8_$4NcX-YN8DZAeC*6!
zi!V&bR1NTNJ;i5Mm(@qAHkq!!I$(P9mGc5bLEVGX?M8m6(C$?uP3lJPCgU`hw*LM=
zJg!Rr4|hity}YKsUR~sK{7fQm%E|L**Dt&pS?^Y1uVEW&6qbv1IVhg3s_YSCpHpxq
z)IX=7`Ltu6Ydv@S?T-tIZa7yzbJy~9ty|rxmEv)*Vj1zvvpv=?KclMNw#WJRfqDKP
zo5)-i_l{eZalt3%Kt$H#oS|K-yqercull`Fj|pqvFC8J|1@;}$K=E<vC&{ZG*|MS#
z??a9Q!W-I_xuT!EW8lt~hrXQGEo|;qb@i`jswTa!qB|C8xAe>?W&4n8_Y&-^Zt<%M
zdS6UVT{*txgrW7DKJTx+HSR!E%lsiZV`7>w@90>1h;F*a+TU0E<}Ys&l<oFuPR+SS
zkC^UDmrODAyd?@+*{JcOdf}D3xeWK|l`|<bA$wm{W6W2_4P~QFIh4t{J86p5h@)+X
z#_c{=$MMO7z@cgC<6VLtv~Hx8Ci!n~d?(8$$>ZY8-xmkeIMcVK{k@Yj7rv~1Ho$R#
zYvnUn`m7bEghcnq_uG3&>)9-%lHdEw!(Yuhwte4=HJMj0Ex5l%({ZF-?KJP@8?Oxg
zFnmRH@QP;j?@svHIl7T+IsYCP*TvTiQg_gaFIRr{dC#;tt;=@mF}=~^mE&a2Kh^5j
zs$ZGXio$wzr;pe+v3eN?E4vA=H{{<A9lxq|uAAqhNe9zD6$Tt0Z?&xd>MNTz`W|jD
zEV4<Ltk^p%oj;XHtK~Yc&iQFaU)8c26T=@Ju;7S#fA`(Ov!TgvXSaA&aoz1#XHqkB
zUng7D=$cZa*O`Y~WB2rPOJ2WaP@@B93VyY!txZl1&&#Vns<yb|WUHawy%Q>KE#10y
z$lmgTS_zZ)e63k`N8y7;i?*DP9<jMjK_h9s(`u_3)*C0RaDJP0YjoJUUULq`4Rfp0
zvSsIn4HhK~@@!eXL66#%KT4YH@w|6-;L%FrrpF%4O`P)le7^+?YyZ^pCu6Sw`&GWj
z<*{xKjbj`a4;vfYQgnYxZ@>KYE_U~t_E$IP?@%h?mi+P98SA%Q==(r$sP5zORs1$g
z^{(Sp`bpyOf=`qs+~e2{-knY^b0-VaI$S#P+bym7Vx9JHTfMe+I9B!K$+L&!wCYyl
zTlTzs*}7MatV!=j+T~BY{7dDtjuRU9^1swJTD|jf(;C}li#-oc95B>#c<(tYr&f1<
zH|h8|*|_q}b2neQd+2jo(zUYPx?AbRXZu=B>Na6nU>hgCei84Sd$TFirdRsZ^4RS1
zT`$ayYpT0`yXwX6p=&1(93Ii+Y&ZLx$NE;u*U6U&=P#eR>)_5=XA;&1I9BVGvMqh&
z>l(?17ALMxs@b|(!z78{n_uE$>IPJ8-1C;O{kF`P;&-cFj@j{3&Gs^%LFr3(t^R3|
zxVlwn=b8_5*R`6_>Y4kp59bOZ%g(j?{ngUTjqh*MO^9pKd5UZGyH~pIo)uW7Otsdv
zhiyq*SG$emh|SU#>cbx0!-nU?HtO8XBjL!4L%j^q3r~zIY;o_msXZRFS>dQ#7Z)}u
zf8p@bNi)ZmvrahnrO&|?P4@&(kM@`{soUNMweD03Ff4nK^K;Yp6&zNM8FRdk|DYA8
zzMT8?X~xriuZI=({#d8ar=6u1+MIlN^q2X)p4hvuYdme(=^2?_s+SuR-hAQJF|OgK
z#|)UjZ$GN6rd@+~{SUhCpD3Hu<Y<T7t#`AAoN1fq5tTLaLfcP8%?7<cv-$kJ%GG<x
zRjMlX%N-Qc&sA>JZEHDE+{^Pf2LIeN{@BBu9%|2nTgRSQy2K%CvSdcv`7gEkldHI=
ze@VQq_SL><zk18Za_>L&J}|BNqJ2u?!K<epuNYB1Q`ahK?W8RY{a($R8M8)^SgXwO
zAtQ#1V!BSFtfEz056u3(M^2Y!Ru%ZhxIU|EG~;`Q^}6)9N@V#@*`aUWUB24bwLzH3
zuI9z<D=MUP%|Cl!QdlLwZ1tQ%k7KjvdaM10-#Br6^Y5C1vsEu_SXgwrPKIXOz1F#(
zrdz4A{IZ8uo!R}^!Jf7=Y~wv@Esi@M-bMUVd;f#>{#KiP#I_TEncDwBUKJl{VY{T+
zH{aF1*mt$9-Gb1>==c<E`4q>wXJ+mSOo&ab`0}__kNc`O`tEs)I<A_vboTV&T?;oX
zOB~)}l3)9qnZEibqbe1=+p%Hm#I%uT?kC$fz3OlyV)ldkyj$Du-FwgX*&7$#bb#N;
zQR|(u{k4+2V~1>Pr0ION_4xg-2IRgTb!XIg!4Um~GoCA)Ter)w%QX(jF;qzUWkt;~
z$}W=?4|XkH=V<8l$HOsgPHZq9nX5S?%<nTZ?fxINlG?r+y)d`(u)^F|@8@^wa(R9O
z-JRGTef78Y+ve4Me!JtK{ZVmmf_Kf!Ta_9+=-eNF^xA$ivw4c0_+`?ZIajChv&CP=
z*<@Euu-`mk#kQkw{luH5^Rwc+d5;P>aCG6_*cxv0<qx`_Zn}I+{}X4jDt4bdzVnV<
zfps1H9Sy#+Y~IRoEvxRo*fzTB?W~0-Y<gV?-8ALcwl3l89e+K$A#eSvkP`#dz1mOr
z4~~7?_R);DA5QG+H}l2PTSZOZ&p9>oZ2$PZb9{%&B93j`5;XA->(WV0`%k)lpwIS?
zdk)1rl&ZSn;)~Z$&)>-Eliy)%ryj>f*Iw|8d*vX9X(NgT9J=so%#(rn{*PUk+0F^C
zUoB{9?ZCVi&uho6t5`<Zxb>JWHCO&#@3#(P-5QL$J)yZ@?1{IhC)KVaZFK11=))Pq
zBei`+yXLLfy8GQ1JK@HG>-^5t85VG?D7)kP%;?e{TYEq3_{DR9VZy|M53l{>zU04-
zTOQoa@#lt}I<7z7_eqn9{ii$`y>s2Am05k)|8f4naKY-|n_e0>yic3er)TjlY5RBR
z(|yK{6DLle>@;b{<~P5WebsD4nxV#sNkbR6sWq*0#h^HE*9nyZ0`6AaGB57g<ykd-
zZ`S%_+0QLL)tr}6R@yzUZ;Eg0hfO@{ZvB$4^10u;;Qgqr&*tsl8=wEK%()X?R@a)c
z;98Tb8R<1Ud_5dvRBv6YUJ%-5^|eUF?7Z^1vZzkplPfQ8JKt`meEKHc%8@N>YS(r>
z?pS^3=(fq$%eGZsF?*B8>lNjfS<N^)U2<Z~tL8yV6JzfBw^-RXC1}~^`89UUS~=eT
zRm!wQ<qt2e{;c1k$akTGf6l$$(faF)SJAih<@T4ZJEUX%#HX7Z9`aII{ZVvm-P0Q_
z1JzA;#$E|;5xr=h=bY{ZqQeKKw{U8Fw87HBY3jISN1Ho|!^Dp+&*`39c17cp5w1>&
z)~7;_Z7@`5IWuQ^&(`Iq=@0H-x#|7<4O1uATpPP<|J1lD`;@m_{GJCY2H)CHJ$_Ey
ziF5fca`Y3+Bz=irdqVTpeZ`7tSNFWVa^sDrX!*IDMW;VV)6!aGY-wuPw5g5t`nWn5
zlQ(v_c>nC-nCw|wm4!3KODmLZR(`~itCMmXoIMyheY#TZP$^(`+{<6P){AKJI5Gc1
z9q&sKPj6kB`^HxMc*0iI$#U|YMN#YA+BbgLw7=o%=U+}t2%7QbWvHUGa^9!qR_l}^
zk$BaIRn;B!Pt&R`e|y5JQ+(BBuk)wgXg}L)cj2{jhfd|)S?l6+E%oA=-^V8u_1Jgs
zqS~$g<$VKH{l4zdPOPlkzWrkKFwdB^uGQak2~sW#9566Gq}s|YN9v9`xzRX6aClLl
zh&Gw>OQGxc6>1HqT34NO+Yq(y-uPpOtn<S<KiU7d#>e09M+`dABIDVY)k|($KlQ7x
z`Fbn+=&21p-TXK4?p?Yzxy8%-iS9f0Udm3JT(G{!q)qLO>LqudrB|B&%(~{&Khob^
zw)6A;GRos^wPyovon3Qm_o0ctd8;~V3s<e3T5##-ec9<(zx*sZp15e%(yiX^j$Q8@
z9MIyq@WbWlH#%-!GrE>{nT_*f@`EBC-&^;q)9$2C_y2f%e%^prYo^}wT<CV~%1z0=
zPJ0TL-04`M<+FZ|BR|~DKRw^UV`Bf8h5ZVOs?~l|uy&_UWX0Q81lpo5Yic`9JofPT
zrq}ZgYW=AWHxCw_s@FU|-_E_vB>#olo&~o=w_@)$>wBpFo!}vL%jnkzhmLO<mpsbO
zR=h`*lyK2%T)g3QO7Gdb+Bcm)^>O%<{D7<XLtQ2h6BRa@ljFQw8rDB$xv-y3_&K&l
z_0Rb$eyez^+TiFu?<%d`-%)b2OtbK&lU`o+x;QYzc3I}>nU_Lu-|1h@<pY1AV|=rm
znMLcZ#y8h5UeY2cod1h=_eGfnm*!W_+gx{SR<qdNeaB3wHM~=W{_Y#zcX($r|CoE0
zlV^JHs7oK7+k6@HvCp2Z2lr=`Dpjg3$OWid>BE>Md0$VdZT3Ytj*Ks~{<6<+(VmVQ
zH~zK+q$Y@j;eqS?`wabGlFomX|I-;zt4Ngn6fr1iz8@?ymTNM9JT<2H{2y2MHtpJ(
z<o~#~_xNA_&%g2?1bqfL`aGEI#mly_adC9y^@J-TOsY0YRS_VKGD@RW$}Fx29{i~j
zOoUEn{~R@_Nq~&iHZ~xSJxK!!_b{vwOwUB6>NrUgksD>8EKQI?YMW@eZChs>o1TRF
z)MBsV^~TgxKshDMRLG2|5(@pq#zvs#^bPa@7DdX7{~oAFP#R!?AL1K=kD&5%DTA&e
zWu@^Z^4!`{V=y3=nM^4_wV?Tfyli@^l&BY8UZ~$~<mNx~u=KM;_Rqqk2AwjK`!4>O
zh37mhJjZQMkO&ae+~pR2#(AzQVByiJDf!DS|2XlxGXy7Hjk52B%-?NnIxBU01D`z^
zwk#Y&nMxtmc>#!C&Lo;l4V*7Mt9}IL^ukJd&W~jpl`$2UT%y+jNgJftQ8<B`09cd@
zi-OS<@{rWr6HUH|QmN4|V-sQOJ79u>DZU!5R-*@OP}B!mD-7gIXHgqaDTDt8`Q?Zy
zLnqBbeN#}9mnk7e9jq~6y==OIr98BSZ1td^L55nfC8cAapap-6TIpFZ2OXlVVh~+4
zHiXh3ml~v$ppyZzj+93h31|fpeMzZ_QE6c2G4+KWZtZ%3-N;e`c*jGL8nU$bi?E~`
zQi+jTAy$bAwdD!;JD@K^Futj%D50N12eH0TsWjCL00H1RIU(jCkfp8ALF7WJ!BkL%
ziy+}38+#%RnH8?d;4@nq)o;NDBoZNfRzU4rYI!=a0S?{TcseC00ST)tMFH5-!!$Yt
z=mH<E2+X>K0eqx-jT&ae1K?-;23@?Epg@I%q(!L;Q?LW<XUGzVz!(w4Fs?0u8To*r
z3I3#*x!04D6EZk}h@vDwcuHMyx~YvPkLrjXR^HNz3b)KcI|K-d)TFz$a{^B={U*@G
zdV0Q55oa<$-JyO5i&g;if|~mj;!{EGAcj0OGrNef6#<fw?|_4XX1B7jadfe<8SuYc
z^?%9!BOn4%Qc%JR7e`c<mRcx3WCMQN{&R2B#?8Y#|J&_<_TPVL|7jVc4=)1KHNfkp
zQ^KM<dX@0dBfOTVDfAR|NJdC?)K@4qO<95HcXp6*5$GYx2+#p^Lk%0&2Z0R35*)3@
z07^&U5rSYjSZ^R*0#8N;Vh%th(I^302ss8=EGZ?){uTgfBp4Yy1M0UFVoAcDLL(Jm
zE}`QO0*&j~4L}tMkjgOXBcMvb+=QT97`6`vq|qVXQ`8DN_!#vHX+iaR2B?^H3D?`P
zjz};Xl?G$(B`DO2M5UoP0XhRzWEp^NR%PKDEnut|GBiB7PMQJA`t)8lJYcs0g9CRQ
zVtktR9DzfD*`lmh!l6i;CElbFCB_-SfVAwPkn%aEq?73~9~vgq1Bfispg~Z6lu2SR
zQq~~sz7z@z2qR0UL?9sSF~da$&sHTu^d+1$u#6A9j8Uh+gex-eBYav|a)bp+LnZrz
z5<*l>3g?`?NEEEM=%qr?m-GsZDhhhUrx6WRs3b5Im}gmpA{JT~^j5Gqg_QXWwKCw4
zj?&0j)-=9~Q8qGttB`<NMff8g`5CY!8E{w3*s9Z{E9DTQ4VVE6@_Hg<&tz{PI4E*t
z<Nd6J@X!iCuS$stVS_;$I3>jB!2GLH8AN9xZS#R`N30;7LMGKJ4Ms3RyceLz$cHi%
zP-MUYLk%LbY*2OEQ>REoBM$&G6-=OFfYA)sAlhR7RC8W%QRAh+OfCa-s!;1ejWl?k
zC3`*39>BK%Qyujd+2O1Q_&n|ddc=SW_5{Qlkg7<lcr`gVL&hV7LdqcW&~<xYd!ywK
zMhrI)1U#hs2l33{@C2YUB_Ewx-%ST+K72k;VUYc~9>cGZo(6&r{Dpbo6Gj6*M#ts6
z2yOPBGS+lM`BOikB}6HI1E(^+WJ5ts6S2cqIpczxfEgF7Uq-buF^jNuo9!M<`>nQc
zL~v&s5^}&V@;Mhy%WDgtYI+F52&iG{cKE<xP9=5i%ulK5{j;J0@Q3hwsKOMDlG=Gb
z34T~Rz?s0hhv(yx;OCQ|b3y-O6G5pA0|A<+jCuesK+wOswipp4=y8E9b!o-dSqvBf
zdWTv^{ifMk)L9nSFZekUDL}$Pn-f0=XOsFhl&`amj{zZQ0a5zeVnN&bQ9sxD`52P@
z@IbtLT&RD5ABbU5HvYE@^%Q<o?q}1Jrbfkrj7R1~JXGIcnK;X|S{v#uId}`aLD?}0
z-+zm`3!=B+2dV!7efC2#r#IPmWG<zqEx{>)y6mSAfg=eqf^+5xWHJRJV9(-iO0pn1
zyA!xM)U@)WFZ+N?Li4?~8i>~_+P%&;JpsCc4LS693^ya8M6^lQC;<dEHJErf3VTAq
zh{x|ko@~WS;DcSo%S3-<p+9u!51E5awhfOuuH;@`GH}WE@!{FR2EBN;;2#OjsQFBO
z7GKAgIe?$Rrxj|s%A2}!_N4v-dkf`HE1MFmd4k5la_5k_z^)!FW+&F9{FrE{PsGFK
zv3Th2`7qp)QcEJBUc>^BI#YiSfNgwl@GRZ?{n0Dc{~vpA-rcr!?ScN!Pl4#ZCZr8f
zqU0nkHDt-Moak1TR7sAL)?Hd42}+1bfCWGe>FsxadprXGQj+8J_TA^K)mj7&&V2US
z<8LFHKx~s(JIXggU4ibU&W7W^K7J~oy-RFMX2SN>*(ov=T*B1$-_bA|B<DZAsx{E>
zpQ3K<g!cMwzkdz~6_}X(M}jj$Ip<5tk%l-Pmq6kR%sboth3mT+wL9Iy`=Z_1s;va{
zo90NC!&KEae|1rl)9$SHI>DOre*J;vdZSt&%e!hW-oo55mG#??CPU`Z2n%#RoB-R1
z`85(oS+%fpAW~3L>*C>M+6${sUhcMPg*209Q2b*KcM-O@a4Gj-EoZ%O(d(D^dV|**
zgrRw!2JKmI6&`h^^5ZO@;$k>ccAS;TFg9vQhE{-4nsHgiKO|%hOthjCDq<VnzY6K=
zib0~=q~uHtuHiG?a7TBqx}nUZGXXcfaO2qx48?TQVu+22?KkO_ffKsnB*&r=G`WyL
z6v)z%R}w3wvaWCp*Q4DDsF5xlQ32TlQUfC<g(P>OGc4JG2%j~QPJx)Af(;B7;S(*h
zAzTOweq}5PzF5XmO*SJ06RxkWE)oc$ZQhKdoR^E7u(Vqs!@xEUqJgIGW~9w3hZ?Jv
zl6CiGGK~mlZIp~iJ%P-b83?c@l0hQAE3LRCa7CCDfgwE^(lJUVgeR4nlc%d=x56!O
zfXIQ%3>i3dRP4euO|Wb+oO4PDLJ{^6(<wn>2E4*lxY*GR%pjutu(F?9;~GebGC|$=
z8_Ouk45K_onM|t8w!Qq6t-Tw<*1i(JKT7grC`?gO_Lgjd+S`f{OTb5G@t8!q;0J`)
zR8@d*gvxHDtCo7{)PL7%9j2llpakVNgmz*jPxtttjzBf><|sRhNar@1!NlQ&sv1Lk
z!+5a5q~iH=GaY4P+`yx_NOH|m!C$40KySOv@*>U3hd7=E1<LM|_$DUHs5Fw4oJfQ&
zwxXQR5<Ckt2kY6)Fyn}dp~Y;g#Ob}AgNQgi+&3ibMAlw~bRsZ(qJ1GV1Jx38GR^cn
zM=?@60_!K4bb=q^TucqVhN0j3_i)^KJbn_djpY|`*@vk^T#^2x6^rIn84T22;hF&d
zHXP1#H}j`jq9)P3194@~1n@Y+Ysq`#7v%+F{3MvzZC!@lE*_7VOiuz#1T$GuuSmrr
zDH(N-x+*0X9x$4h*%a-AqELN6V+k8Wz-O^DyTa{bDm_|u5!Z&1<OUOZ&QSDbGgw;3
zHcUMSjJ~FHc9>D724pUj864BZb1Fk4B=f~bWG@GT!3|`CBF=9(pQ+rQbaJ3lVIHvm
zE}sBXiju<OmmoiQ3lQwPOe0%Bfs?&-nnt#dt*@6uSe$JtwA;SxqV2ieA*N3n%x>8f
z4ikKLN!|`S!CiNlO)xPz@{4oeOMuO*w$NO~y;gDm$SU@sEcc5G2dmPJPL~njGJ5(f
zXy1E<xS{4+9Uh7efs$4M*U+#JxbJYqVPQ;4RTMk%9O=rLtvBt3!E^)ux7oslv_D}c
z`9T*!Qunh-vAwz5?%?a%lg)0K&5r0{4rgO~GsG8AgX!&s+1{+qQB}E{Eelm1dTZEo
z94X*onVGasxR*a9Y<%|5wyOv-=e#-zn8G1;@>Jk4^I=CdXDo9QMYnpXlBi;AGVcU^
z_b@e?k_v)>bM-!K{$Q?a5u;U*G6?S6chX~l6tCT-YzMaaa-ACjvzVn3a%GhqFPeZ-
z?{M56hE7bAtLu_EDN5o@Adi);Vh_4L`qCb6`{@4v&>2>V=J!3uw5{<%KyHH3Kx0`E
z%%%7tAkPHy*C8WKWnn450(>z~$$W%H5doEUF00Bnd|_EHj8Qg6*1_BWm}-Sm|JgbP
zyLm`r0CV@|CKC-6CNH}uwn*J8@FaYPDb18n$tMZ9Y29QK5Fx)Kt`raLP;hHmUqMW>
z`U!nk-HoV3`gP0xMskZg6rMl-)g?WLnu1T%X4s`rY-EOyqY30Q#h17uYAeS<yqsf?
zV)%GCafq-BU49?SezR$jx~iMU{^yccxbQdtY*876qGGoT?w<e3JOd}2(T}>pNh;W?
z=J+Osrm)joJ+=+P7@x$1&Z&ZsWSC{VYxWX>cJ`6Z4G8kJ?^v9LbTy0+KN4EjR$|tn
zm6YCi))CzjC=VhrvRk1pwfF?~H4x!P64>Yg5*v)6E}*c;IT2HbV@g<{E+%;9fM=U#
zBhIBHAe|=ni_SN1P&frQu_l#Frd*#K6Lqx`3gS#U3N>UhM~+U?!bcO5zj%IWm!W-}
z$0~BtoMyWyHCqtZhPcAaKR42RhpgLV=c+|oTa%@x0npjQlQ4;H<IxvY-flU`51k*M
zdALUte@gXO=J&$h#N<XRN812BQ^^YP!+z<He*dp=p0zi_Z-iw-$_Qtfb?xindI;>8
z$BdOOS>lv;a40OouytpgX;KMhzHZ{YtWqi7;4DoUn<4g!+wZ8@WB0dH>Pw4xtha?H
z<Jn$Dr0b^4z9q^-cq5Us!>xG8kRyQ{v5K4}5>?Q26NsBM*D9nm%^cO+>Z<EGX?!S}
z>Oap&|3>C@cBAC9Wzqs_i@pok|CCaMlM+iH-H|T?vYNAVEbGnGH_v39kAOKVOIVKg
z@f29-;Rn16*!0hnLu92&rq%DoG1C#)ZA$VfPcwXwBE2?M4QMrhXEs?>hFRW}IuKdq
z{XhMYmTHpVH0h!tH_O(<2i4Z0wGf=3MBp18s#K_AX{ZpS%3DL5=($(Bd}c@%_F)u{
zgSu%#)aCroR#Q=((SD!qpG%+ym-y23q~9lBCqs_VqOsL7WJ+3ifKqZPk5twP9oGDt
zsU=nNM0%;Ua9<Zr`<KCPd%qLxkU8gfq^=j&wfm56Y=um}rGw56z{VTKo@$qM5k({^
zV65I!sgz+0s0Qn5Fb1w;kT7dccN|H8Xvql*UT(5TxLntTo<=@>c*dDRH+0iic@|{9
zH<=ZjqSvw2G*#x~0g6uqAb3d{%c^6aFj$7le9@{gac6On{0d_}OuuI-<j`x~XrnPp
z`mc`gk)#X6mI4^JlZtF~%Yp@EB~DSvp5x2!%)LS~3(_X?)WftI_KGptY><fI{oyFc
zT$|Fs=*ro#e}*oJsDTUZ-5__ixw6%@qb9d_SN9oE<80dQt-~$daip31N0r*w3m>W6
zzOQewqc=Bel-{F_1T7qKsuRa51~l7LBt+(?U9&{tdT-KoDy}M_x~~avePMN=ytpJu
z8D)NTp8?AZ=PGmUV<JjQM2e|{l$&yo@wP-g52q{t4)=e;Nl%=ckG(UTyYH6Z;FC0=
z{Q&#sE0<fRorvOV^*T>dxB!tfl(FqkG8TV29FlyTrOu>%7kIZlTMSGq*b}0Os^yJr
z#&~}@y8rQ4=;h}-^b_5oi|<X=aXj*dy(Bpbg_y4FGH95$UYiC4V-NL@&Q<{8;KGgh
z`-?&s&5A9&rBxL};Xqac!{%d8!H|xc3waKQ*U^pILvHg7{=zYL1HG&&@{LWff@BIc
z$k$OmHK&y5swm0W8Jb*kDFqIz;FMM%6OD;_(AzvDPF?7(=kPA~HFwN$<fD6CwKgfI
zvZT<t0AvOjavO7l#e?}3C6Oa;p(BQDZOR>!X6Y7|o>rZjAh!eJ9Z)0YawcM|6AsFn
zwprPP(P4G#HvIL;%-TL{A#tWQ-ZX%%c7dsH@Hd4Dme)-x@S4I1<x(}M1?`IHH0l`V
zJGr+a%0QhI#heFZc8BJ~5kZgcz9K2-LWA5GIu?*LH6W~HY6MvL8nb;nmdFL#eCQ4;
z>L>SXE-mwnGA=<~F)H3jn_N}TVkhpI&7s8HPA)$-f@_chvZ?kAu@2H|>v;FF!#!MB
zJZGIowIHpu>fq={yD)A(xamCifATYjb*!l#?fMukLdWgl6n8E8wbV3}q+eG-))92^
zsELu+aC(DmbYJfRsj|surf}PI2H18-*zsfa+}IlHqOW9jN(a|4XvX1)oo%QbjvH-k
zbFoCr*Fan4@F}HM>rdgSWGv~&u)fbxegARse?KIrHpKPmxDStu|LbkM_~vUr{;&7#
z#>O|F<Ny90|Fs?t0F*et19e_LC3;>SSl&P*0j+<t$TTx9>fZ0<w54GL(eOHniRHP<
zvXK{1P5cITN$xRgImBoL(UU@xAk_6+<ZyH^I?zh6p2_7!?ta%GgNwhF=nq*OjipKv
z#9Ne38{^eoSOF82L!NY$BhZPkxHc|2rRV`T&SE^DV76YO)a33(hDw%nT@`Vq%Ql-W
zNQ6^tUc>Ng#9l#GT5?iU_Sl)luCXxEa;jG%IYt7v05|5Cbip@Ce5;I?My09518gC(
zK}o^+`zYpdeBW}-gQCP}TyMT=gel|eWS*0s;{>`ax)bW>3`5D#?Kn3b9cS=WMfzs%
zDV#H2#0_d-GkAHlcOK*!dKBSWB;7b{;p!P@3~x*F?OUQkg&ea{4n~U_ru$7tF$K{p
z!i2h4Ii8p(y2oTH#O;JpP_hCe(*-q=<>EdFGnYU^Eto{?>Q^`}+(>Uz@@l|m$QYt$
zovDI?aUwK4-$d}S=p^QtftiwPx|<o3*QJ<<50-52^spNoppTS^O*D0~F&ka4Bxfpe
z;1e$jG=kY5asIfty!Rt6VTQhE3u9Ud+QRsKgi%Qa)`bj67-=>md;?T>&_;&-$=qq5
zx~;uJF<>Z9V@bIQIqK8wKJ?3BXGyzpcz1nB#SMo+Bal%?y-N!XG1d!`G9F4kPYfXX
z(0K01Q<v9$48u=HlY5uL|Nh|3o8WsPU(9E%zVV4&8_RTBt)sa7s^ADoD?Pa$$Fm#<
z0+ZQyZ6I2WI)g@&T1248JGA3LAexLs;6srKgx!?QB%D(bx+~M%4Nh>ZKto}r@c^3k
zaZj{_Y*Z|K*-Y|^_gnAB{}0IPX-4TZfyaBk6TFWmv+D>7_1b&D^vyQfFFOI06ZiDJ
zFnGx;v`<@^!B+6W8c<Jk-<w9iXv_z4#K^50l^8te;1R|a*r}9tD3Xzw>0U3yW>sP{
z(muQ%{es$TQXc&>zXB>nP!}Y5jgajvVO`0{DVI!Kav)P>9f+7mKnL%CMZa)#wLqJE
zVSSc(xyVLZV%GH_YR%K+A0UKDhsP27;p0d`qX{4vMyx6%NE*1vV$Rr(c66lHME=8|
zxQ=ErTpbE-L{uB$UO-3^G|&x`0wpd}E2WxbVx2I%os_hcH!*1drWvamfCZ;})Hwng
zttH)8*pRT5#?pADGjNIO#M7&)R-OqNN}|@|RNOu8*?K@;4Q%l9hELZF%>ZWKorq6f
zwdQ0h$uV7@)-HyT1BK&=m9Vm}Z585n7)*Q4-d^1aiWjIbDthq$(~YidC)OgZwu{YY
z)6GuT?5%d^S+UXS1c6Z+B}K*A)>(`)Yitw_6to8wJ;QpT4XE@*r{4Z_(``6#VnXo3
zQCDqsGDZUDkpZM;^MOmyE{`ll4V=GQZ_}|Giq95m8OKoEI1GJO?K#j9vVzB@L<Lg*
zY;V!TQ$|g&92Y~I)Lci{uT{Dkhpc%qDk0R2(_zIm_WjDI1X#8Ld+2@=@2tQrR$lje
z>+63IPxBUfmLH>KO`<w%r^wNq-XGXKTG*#2@`bwHv}@O9#}?iP0*`7Ud1Bnx$tRt)
z<<lOn_jH5SbIUBEbzQeg^}v>s&+zHuZ>pPWNh28S?Gz?u3d}Z>M8`3RltIYRuHss)
zG6fWOBKo)+kja!(y{KRHXrpw1xa@^4=UwTQPL{4|G$~iOv$oq0jhIetTbk`O4P3eY
z$Vd{J#Te+i#2o*iAyzsLje66oh=xqM!RqC0RBwhwZn)ve>IC!&wR1^Hm9cuHehYaf
zPPk+~s5V$=H2y<+Xu4{;!S(<q5i5LW{BR{r_Eg}n!loXnQ$%f<s4mAh(1=tN^(4-U
z)fHT}LS}I+W&4;z@R+Y_$6?cc)p{0q|M|3b*~W2Qff^QxoXAm{f{A3TjN`cA$Q$4%
zi2l)>#z<j@>R@SE@CtV!Q5bm5(k<&m>pggE484J&!^C|Q&5=)+(w*Q5bVrXChlqe|
zPcUbh5OxQHkDM2+&o27qLr)?(m_;-NwnR^1ku~mV@tSRB5+1u{VKgs3e(qj8G<RBV
z>nX&W0bkJWj6pyqAk_Ijaif@=-I)|dl_2|=CULp}T#cD9msh(dRf$pj17(ZJ09|r$
zn5kI!@I02dP7aICcmdO_>iSFioKZJu1B2NouGqo^*3>R{R2iG;$sKl5A~q4qimbV2
zIWH+~m_R?ME~3t+T)!{29({c3jM{XgBc6=iZs&3PqV}0f%pRzs@sa~_^>7!eG^>JT
zCDKh!K&Y&oL5l!h?V@o|$ws6Frd$p`2Qob06%8&q<e`@{ToB(FRMxpc4KATLD-4M4
z0UwDcm0PZ!R1MQ>ouJ8TPB1(i2#TtXcp72!09xk^+6+q$stht(Aa?W@Nl0};D22w8
zvD^cW;QfCOesHP&|1h0RqG9|hMq-QzrnsC>Hv>F?|NQOd#@AK+=WlwOpY{L$N%+rA
zTR|V{jXQE*7OmE~f`gGWw76kyb~l;zJbD`j`*1|#;=?_wH~w-~se-N()<Cgeal#Ec
z5K*H<L_)`C$`KGw+~{4GeK?fG_qF|^peJ?wC}uvI^mW&BPrFYFr<}`?yMfWkN}L~5
zOV=0aCxOJZ<LMwCp`&(j4>WID5W=s-%*3fg!{pQz()@U%mjFOAt|I%wKnO{sQG-Bg
zIcia6AKi~a3n$=JB`#cA7nz##`Os9wKrME@$e+$2EbO^KydfdlQH-7jr=+?ru%?xU
zsdiHAgM>9pZZg~t-37YQK~$hyB?fD|rUQy(GF)*KY}{>VHxc&h5y9nh8**TIMuFv(
zxN^9M=%OvsWHy8GUCa?nH=4)}KYELC<pDb8VM)4)9{eIB@y_|8dUqY8Pe?w?QC!9=
z@(a8vQ<`9y9Esqf%i5V=SxOlBWG-4kLd_Ll`O-lbdi*3kZurhhJZ%=5KCDg#2R&i8
zX=o<4oGlfFpe6tlQD~tfMSgoLVz1{pA!qVBo{Us_a^|DkVsB<2U>o1Csi(Z2BfG@7
z4rQL*!$}#<bCtTeJtIkzb|j_|o5r|zwrdgDdx)X|kx0h(pJYjRNJnp<wjQ;ws*Az<
zQ2VQ5uD37|iV$=3uN^yTRx%NyC5=AVyW1J4(^3BujFxJC+ED#Znm1jMXwYKi06~3M
zNowKDWPixcm}D6BO2)wv%dQ%72|Vtt`^`GO=2ds&<KTugE!!)kxTZ=}*F<f~M#mG+
znsVKUZpTP&o1%@G7YbNgxXea!_u)duKwufGmO3ML0l!9P3QW^B49u=AQZ^#$yu3kl
z6(#AX+0)dYy(ezz%13otTUs@O(K^UnQ``gGWb>2^?$;>WW+TPUr>#E-*tAw8gJgo}
zEbs)UXrQmjC<`-0m@c*Zl%^6W(<B*_v(^NCC^0mbR|*~GZun+A9MicoL1pvVNY#fP
z)p1RV5)rz`N0wAi3|L|U)zH{Gjmzt7WDf49IFNZPl|OB<>YpuME{hn!qcECP_RVw;
zkBL{aK9Wj}UDD+Uq@lp%SLeuP1Yu4qW$Dpb%$|$g1wEmMp%}gE$EX+Fk~9CnE|>K@
zVPDdw!{=eQ%oTRl#MxHwnEJinTYma&;O!^f4VQzYSZ{sQj_^LM?23!Z*hr0)ijS`F
z=W=Dy{msN`ulEeDA$3C$X~FVL=(0FM#NsLvhgrH$J~h3MJT3BV^NDFG-VR{{C9D{1
z6wEXdhABR7K^5cFlm}#dRm6{@qYL}@E9b{4Gxev*4e$~A@b!FBdGbRB+ZGiuasHPx
z^K?JMV$JWQjzu4GwA4usi$zePo<u4Nn$K>XpGyCO5+2Fzg3t9)PN2*6Ki_VyZ+=_V
z|7@&({aOF>pOOEez6pYNcr9H=K(okEk=#wFK**8cKKfe?$$tAfM}oDl3WTa>1~($%
z20)8g{rz8p;<imKt?k1l2(_?&Hc8tY;zjYRXffCczFx-|qqVibKrFbRm>6z>VXp{K
zPard#9Ek&N;5#qn7JNHzQ;McAxY0ju`44Pl4$a*J&x2bpY-nl5Gh#JSW*U(lg(qKZ
zUWopP&ho{4$}}2g`~3W1qmsXflt@VZM?w4h{e3~I3Rb5@5pQ%{r?q$2_lL;fiUpE@
za7uOY<cF=+o9KQ(uCpAx8d{?p$JJEUg%}s{2UUff4ITl`nd2jo=@Bp;yje?f+;rNd
zP{x6uOu(p^ytHt{v8U><l%#%Z);DBz)oKS>&Ch;p>YqubN+_gODP{jA@m<2sj<S^n
z0`@ERU7?O#le|bArlUGUeHOu(lydo|F^AF^a13{g1hgrGp#7{<*((M#6)tCv+od=h
z!ZUIQ8+tHC3>ohvNGtP*AeTpq&yNaIG>Mtcz-+hiq$JR(^)2{koAy2Y_567!nEx&L
zJN6d;feynz6|giF`Lq`tX*y->R*neTfBqbr9FV2YLe-knWP#R<f;Y>bi}wRV<!D)l
zr*<b6r%zlxr`en$Wcy}(rk#?EZgMo8=eQd1&KcLqfRM-7%PBTdORg>6+AiNmRI>__
zVoIeTcN!C`fd|n@y!wzTI$qhKth9$Ff(T5~m{c$3QLIm?lL$JJy^RyLsP8A7sk@Td
zTt)gVMA&8TyIZoJqqb&}WwWCH$n5}KY1j~t+(i7byMK6ds<uHkZ+VND%jye*$rDg7
zLda5iPNCS9d}|a-l$4bmawGIMPSLw@1oRb;kWSc;@TUsNuqX5L&`GnjPiS5!#Cyu&
zTpXY%iHwLqC!$y9C~+4`nh89CG?+kiNjYcJNUFr{g)mA+Ol*=D6C?a_S72Sx8L0)e
zNzes8>H07f7bkYNc6;Yx7pm(o%NgPah<jGD*g5&)0)l`WVp>GfT}1+;1nq2YNJhD`
zlDeoiH6)@581@Ufhi%1`%ts`nTk<|v7_ob*h319h22J1L(hcS_9N3FHar=hZsp6+r
zR#u(-hv<Qy(pKL2dOf7*|9LJ~uUI`;xcYjXyZ)6JD&V&X#B@{by{IR1vVr;6K>Y+~
zfUMMGdz0#D7zLqWe>q!ufJypj7l(}0M|q}Vy%nOw>|L8*F?jJuAw-@YoY<jSYpNxM
zZ$!o%Bv)71sy9>t#>0sdOO#pYBE|?sb|h6<n|Cbr<xiw%!C5S?y$*FLyAhvI>)6p+
zM!jA<Bi?N7MF*+T&I=$%UEa&>q!S=1bAAFvv%ji@&8z7N*CZx&6edQ1D^rR(pnq|v
z&+{rn#WTjXALfRYXfPVM<qcidZeLA#Fg-uY9(AAT##NJ>_<A^QLD(f{I-kJ!+R^%+
zNG`+e^Y#L(V6Ps<S>O-~Mq3MmSA5-tL2uVowpC%4n1wKy6B2`w6jlun=e)>=Bq*%S
z%Ve_lI?HdPeDum%ql)e|@Aq$-@%;yaU`klan~XB=Id!AIT0Tuut7+s(-6X!2p(&9C
z*6`X}pkwzIfPPB%8eGlnY^qom@9Pusepl#6<iI0)5{%^%8j{}jB=$lsz&B2M7R`@_
zRa*>v7gnPiykaC0r}M~#O5cgupW}Ju@ajS{mO5d`Nx@7!rb@!gORef_N>??)zKU?b
z>6Cz7r??!w5Q)mbSl$U(%qV+k|JESp<cLs6NaW^#PBea66C@)s=!D*OlKeH-m<Nr0
zp(!v4a|h%amxP(;#1WV&uW6Gt(y)-18tX7cPk}tN8f;BJNO^9~xm0~uxZ49}<7e1z
zn6K|PODS?Bp$x>BCh!Io)mN}h<6N$Mv2xmQ3l0Sn8qIrv0b)3_ttWDL+s7Q7AvQyd
z1-V%jMggg@?!4B04x(|=*^*|~NrWsno0%I`$tz$`!GJ8{?Zx2ig~nvsdOZ(k?~03Z
zuAVjE3d_lhASt4#F1J$>;pX3zteE$z5>pcSwmTV@S53xcZQ!Lk<FRtUOOzK43U;{D
zNg#v(lR(j(f5z?r%I<Cz<6$u|aw*FQy6OGx?0pKG`e(^qSj3*qjH3~OzgCA1v-ILj
zBbo_XdAH6&1m}{39E3G7ynq21P-J%pSOnXXEgtB~*3)y7L@FKf`(b!3tO#d<n0!3z
z{R(7}MJ&4Dd{LJOA+CVn?ohTbn$8~=cWx-_&EQfROJUq9`+fIUTUA(+zX~Y0q_E;l
z<((E$d&8e%0s2GU@ASH^Ex=!P9e0uvBl4JbJ3$`^73KtQTMQ&g-+Y<Ix7!!HOkoiS
z;*wyC@c%l&KmXM@A4K;k#kia|VG<;7<ih0V_AHY_5?-yaJ=I3LAh-EhM0Pte(5WF2
z`uw-QbEYXQh#>J%VbXO{ohl0;mzLe8LF2#~sjG?4ctb)b9wbtfy0?I+OBN~eMA?y$
zR4J6HMA2-OcJ>&vkI*fdnNdO^Nh-doNwsMXLl*z}Y9oDw__?u{b<L8j$<R04Z0M9n
zx7Jw@Po=jQ8cFIG6L3l@O<=NknlV#EWp1lyf<@w3vL(^(Ag)Dr2UPn$5*Q>`o_4MH
z^pR|T`K}MEVxqW3_u5DzDwK-W{sZYs&{f$mQd)b3`nbL$L<1VwNS-mZ6%!2?x&b4C
zA`{9~$1*Ize{)6fRnK7vT)xW6qxgnkkqlSJdFz$i1FlkIAI)s(9b?VN4Z|!L#)*P2
z2zC%tdI-P8%L%(sd8~e)$ODCsU2(cw_a1Z--TB9w_K!w9`4mwq<l$?PCi1bQQvyZf
zbc!+Xn3nJaDr$XU!LcI4airA*q%KQ*SX$=aVU6s%L^7MJ+7fRST-?2KpEuD3%<s-l
zFK7#)o?><>z2zGu677?UO*+_)^apX4oX*gAg}(Yi0qHfI*|)RCz;j_`>iyMeF4J^=
zP0DKwCyq<!IIFzm(1K>UHcL;&=kw`Lv)p^A>sZXEj=+GPBe&I4Qb_cuae7r3%PDGr
z7)H~?vU%dLW7zSfPlm&$Xl+6JQtDpSX5&U!!A@pMNsE<QK25wIkXJXG&<)0SbU|Ig
zk>tc=+|UlraaKs2Eaf5+y>B!Y7==h~plWTzC8CZ32_7<gw^mh2AKc1q1n&x8??SUu
zyWnpa#ggV*MHs8dH^eM58<?Uq8fda2_frxw2O5Hub{(&(Vm{#0X+GlvdY&1(5hLvx
z?yKeDo>EHRzFD>!6y=vyGmXOL<KWb?r=JHVHOW3k!M;pV*c&IaZOcJ$+{<NwfP*uE
zmk*su(!X|nn~PVS3ho@-Ezw=+5HDgvO)qQd?uC>P>x62-C=u^-yIpk;5>fz$znD$P
z9x-Oy5ZR_%$L#RbN-RtEy6TLrG(ngDir$XH*!vK-(u#9mtWsRD1CP<r1dCCWkXb<B
zID=iio&n9J#kVL{LaR271_Mk*L+llN>`nek7amk<TXB`Uc3K(8r_hNc<D<n$8r-`-
zp;qovwJs-(64i>5H3PrX430)5(cfl<*wOlDlrx0f*1Aj-@rGnjqNw-~%Gn^x-3W8c
zc{*0_l*9k#TaBhletj!AxO6I_%1%X20?#a8I4-6!XAa4fpTdRvYGk+*X0Cnnpd@&7
zvLktIb-P_7>N1iwQZBS97e@Lc1#%RmI1fO!<33H|E!ycl#fa?y(p2dIUB9tK)$sx!
zC2?e1WQnV}Bs}q_1CJ~lWn)&4r1zab0HLvuh4yfShA^1&FDjV)dd*a-r2`w*b2fT<
zwJLaqCLMi<jIxLor`i1K+L;=%(J7jVX~UhX8rP&A&%S1`Y)ljHrAO7?X<EmKVrFD6
zMllHQBgGsv1JP&~B~1uOj=YsSI(5O6yJ4115QcgNPp4f+&rGQmO?F7Mm^lS;q-gKP
zi{@SqFH)x@CyBP2WYS8jesc^rK*~%5``lE}GaKJx02Sq)yh@Ghg3Zei<!~pAWX6Cn
z&#YR+^$up&htNFJ86#qUg|Zp!7|Ms#q<)bZ1l9`f@V<J~I(4A%_f<(;_@a7313TtE
z^#BX3dm;t0qP|rO?;&djvjD@D%m5QHrMVpu*Jz4RwjEJHCn?d_12*WKf|^jBd5y==
z+=yM1n@1E&v`A`KBTO|x1*z_QvBmTLmV@tYYE&W+><&l5WBo*FT&z+Kyl4+7##tpk
zLN!tmgR=eJn<!U%)H@{9*5p?rSJ@091sMHPkb67$HQ$B~Dw{c3Ob{XySJqCtuoHk;
zBmyIE-Tqau!iY)4AvSch+eGl<U^@z~^LV_oa$T0Q{@Plcc5jmp$qYmKyIFp<hQHSK
z=Q(?ET%N@k$&5)pFNF8GJSAAMbY<6+3QVa$O9fJDdo9}S2CPIKv3LM1LG(e4H9+RZ
zb7a(2w~-UXE}jL`Eq3>EqK3Ev*q3<8;oTsx^Tst?(Bj>1&`3y_bZE-WOV=zz$gi5S
ztD<uNAxi+aQ3(~NX!$ni^07<q&*cwdoQ(WIX7zKk-fSUq+J3*SFW>Kq3YKqNCCLqx
z+f-v;x`Ux~6R6BiKZrJol;Te3`G}caN5O!F>Av9SvlPTq3(^Ru36|?=pzLy=$D(D5
zZwT%!o2oaX)v`NZE;$jvyC9DiO+FY$657a3X8!==LZS&F!70V%1_}W+bFR>Z8i-H<
z!x>Rq3ZjM{G4|$D{n*YwDf4i@P8?@7Z+X=7jd&10tgBfB5l0N_1?d>a82H3vkuVF9
zy@AhdcLb#a3M8oMpr>aszR>X#D?!dWDwO*QBg+r74nmN~5GLN#VZF%edJip+S}tSW
zCDj-n!6{PUDXYbzR8lSMnI&RjGD*EvU_A;gftR2t8<7LvFgEOwF^YNOiXUGrJbYIj
z2)?2~VJ0l2<jNv8j7VDqD~@3dJX5nI9$No6+RY7a24*MT;<zo`13Lzw1IZ#0-)L5R
z?k!^MRt8H{&59<T)+|n##458AvnzK}mBeHS6GjW78Zk-yhJ&eCUx|Uc?ih_k-Iin0
zOjOSfxeBkX1t2jXq;~yFKTCxsPk5Y2-;go__zz$poKz8#=OMHSGO8^u)<;y5TzpAf
zeTTb9IT~bO|JtgkXLxHMRz~RD4;)(<c^#kwriuvG;TxqvO`ajCjAD~0Q;j6eHR7)U
z6Pyzt#ylk-8o9qDxT@h6Qw6J@Wm9b>sWo}TjaZm;9fvM7Ib~fVFL|i5+`$rvj6;Og
z1eiNV)&uAacYYA#D<iKdm#>Z-2D}~SgeE5`f(1K9m3mF~q}N<U(Y^MSb?T5TzN+3n
zqXWd3@`4uCa!>P*qG)}N`bN*JCzc7R<3lDAi`i9fE;>cwcaup0v)TR7jk;J9B~ZeL
z#A!CD2;UFcyM;i9kOL#;MfB~CS)|egd(Y}NM!`BqsTDm|<Zg>56F1vjRT{P&lBy`7
zK1trfts=TaL6DtJ!|X3$AQG%PT|TWTJ?;1~l}{|8h!h@*y7D+G?j;2%Dp$sdNOA7e
zRR$ZeDM+c|f*d!NCRDH;nK${WWNsJZJE&7pQAu`4{UM=|_n!iDv9}6$1;i|@2@xPA
zhMsmd>tso6LpdkQKd^k-f26Ax=P*_Gt~TX8;hMkbu%c$3dqNeR%{L;32!BZA%2ZNG
zXX!b#EO8Rp4aAeDdiMO>zBPX#$pA)JCIbtJBsrf%_wC+Rp~ti@wqe$20PeAi9d#!|
zp|HW6guHtt@#K;^-M(P2&+Ybyr=oC|<XtL9NO2tui-wc<Dw;54*O*O&D+@2p_uDcC
zb8D{Pg+zd4)=+QOlPvanb#fXJV^?MWk|*4Jp_^64K_RgwZ`~qQ24mZ5@qJd~DU8H2
z+rUx=!4!KWbHGwAUMs)Mr{p(nHm<K=CDj+&iLTk}&t6)_C&Ri8G5M%@!7b1evrMO6
z!>A@@Ax{n5XsyO0>SuBx-^_UIpprHrm3j_^tE=jaMBZbHB&f=B*bg_lF+k9N7o$UK
zUHY-TWB&!|9YNuU$RyWh8;)Ih-!aU9$W17EkPnNAlJdPPUGELTO-E_WPqPEpVU$cr
z%$yfhYmSg$Yk5EtAU-ypL9w2g$Z}aU8P91dT!RnQ=1Ad0`~CUhs}c<h$lXy`-=q++
zYRP)~v+tOL_b3%OC7r!sWv2WDTHvbu!>V519Rh0qkkA&_v71i_*-;Y}pvl3#R7eY=
zx|*!Rw;X8`v-5*%rX^1W$3VPOvrOW?WS>j%BAEMXHgh*via?4txm|+h8^_UTJaEtO
zxlNqNpvTBY7<4(YME*-Vzz0N^FIiaVP#ynlF#sz?ie9oKZQD4=cG(}~i=Fl0%P)f!
zAvDMUkO!riQE`M$DKLugNhst`_`S9U7~w}nT(&za<nV8ScLU!Tywhd_nn_e7#U0N!
z=Ju^D?DBZ7x=e2|ye7j%hG~DpR4p-rnH9&xd|HXR?wHJ?@l%&kems2vx2n_O+_8z>
ztpu5kdBAw4jPFY0J)BgHdjSF6s~a3L7sQ-e*8p>XV~XSpgmi=rRrf!=js83nk#90t
ztnApSF1U6?EBvzY8h1BjZAE_b7y@7u>I-?{dNu4|Zg0#)L74-h5!usc_Z+Ax_I|WL
zYn@+7Vm1}+_4!KIWj0(jWIY+4=T@;pGIB1y_6F4!ql$Fr^9@0A@;gV@YIu>QUccrX
zJakTs<E$K&hMc5umX<D9GYv%gCp5_MHl;CLqLm9LP@4$7D*F9=p0*8%3WGY$ay@ZR
z{;VNw<nB~P#fN%G`ob;*{f_l6Ml$5`#ckGRGihTDyDFh^CY#r*^U$I!NFE!u3~2Wt
z%O(!cHrI@sa|Z{~cE9TPUr(Z|q61sL;e(e+S*`Xsjz<@nO>ePd1?t0s_qFHE*9t2C
z!F}a-;L)F9-_2KN*{$z3*a>!;hF5bh>^xX?{gdXZ>s@%T>T|e$D^-876ZCdlHSxK!
z&nlsGi}xezuwE<=-`d~+u1r%ZJru*WA^t6tpsCer+FD5=fZLfuxL4rb-AzT(k-e#m
zEtBvyj-p@h4-D{o_qv=;R(7#&G(m>J5EvoUw74>i1n;0?9CouX+f$kec6##0XHpPU
zlVz$3A~K_*uN}heqc|sp@z#h6bGe<?!0$sZc^RkgMBOjKbrF8G<ccE?4u)MN@0*n=
zB6K5)_122T8n^mInsB6CzO3PJ`|87HqMMr6qp#|0$9;PcUnOa3@HW^j$~a;;c`C%f
zWAd7gMBj?rl<jL**)kbA#4_k;;6~R3M3!*g>Ix9=`}e!zA0lWdOLyIcsaWisFqbv`
zJx3f+D7<>eG-{RQY#bbZe{g&W|2sH4+`l}1`|=3>IXu2NID5Ufe*n91m}juxML}br
z9~!jqO~*>pCQyziQj&svjebK_I;`h!EIFP09HorLXE5{#&MQI#;Hvs(bMg_-LmQ=i
zHo^1RirnlV@!c>M?-}OwG&fg1pU5&|1f7vf$zq}j3pu8L8cRrH#<_VavqS+Ho<gId
z%aFWNP9iN)S{UKApgdvhA66ym<ihojIV;|Jv|StKRA(FtWn_eu&WX37^4N`fNE=-A
z5dY#|<lLhCQw^f}s;DcQ`X9+MJbdjma#$hvkGn^LDyM9>Tp%)F=EtZgHDkS7i*kCw
zKb6meg53!af8PCdQ4_>WPff?18*<Kud;29DG~)@`dP&b-5a0H1<9?k{J%RmY%5r|O
zcL5Y-@BIAW?Beo#@6EyGo4t#(!=EnC5C3}5ZdxGZLp;5K_yzzM!;>n%fjbdb0rN&P
z%rkV9zbQ~AY2LCmp1Vm+psTD8eK!0ZJLRj|g;hW2wIE`}P#`H1eGDAvBvwhe>Y7hG
zgr=S6nD{74N);KRWNPd{ccH}a;6d&=Cs`hCI2<~99R27;r|s0c;2Yn2#^%1Ut5M>L
z<Lp=bHqA4R7<^)-=EKGG2B*cD;62374gZ(9+zS&;?Zg|sk17)^x*9t<*pgp&grM8A
zI5*bx#<@d@zHkUw)wt_N9g^!ne04ABPtiSuH(s@a9dRziD(0q>%RV<lLD$1~1ouUW
zyGVn}EHkN2QQ1y<Va|JllO@$AdMh(@Oj|?B)IML+Z7i4)Rfd$=%tSY-;M`IDA8XIm
zM^sH({vuVls3<ssd5v;^x7U}l_Stb}U$z+sqZI#%Y#>@TVxZkvD>leEaC(xLiSBZn
z>4_JI8J-LVT=7ipunW{;ZliG?ajFe&+oGWFrBu>6%3<@gIg?UlR){MlZsn`TGb>Fr
zZaL-n$i6z93yEY7*U}5Vbl4WHH24x^7`$9rInOU0ghgavILLCVQh@Pv&bgQ!13y;c
zS(^ys7saug)k-D2nX-yJh9)~C`BhOK7JCvN{avtLQ(QRz^_`lDKwe72ZJeX7aavsO
zgn>bTZ0P3!9IqX})6au8#mcX`PdjUNwmQ%3-(;?J(1?J+yAy~b`g7Z>Tb@3Du1Xp1
z>+GIFmD=<&M47a+px%bcY!fgRefE6ESzX_DgH1YUAhY{ZD!6SosVqVGq{TbWhx8qi
z&|QBdY>Zhp9;fF9z3I^t6;xSpUm_vSy1RhBRwKfeN_7n>)C96_L|k5ksj$e_XZ_8`
zCb~MweYAuD#~0xoU$HN^jLLWR2SP0|Po^@wgsN2`M<7+veF@Ao*;EXmko6AK3(V+J
z!Ya}L$s&w5fN8=C1`T5zBXymFx=$_r0FwHGxf3T4Zk8cTx|4puI#}BPGQso)0z65W
z)oJbydHqTrISF1-;5%%ug05f{VNMDmizJBp2zGwu1$5kE;Hk`a*_3-EV#(3ru&?rL
zJ`+}>x+)Gd-wXD!v@s(l=erg{EP?7M4pUWONDRDzn?&8!hh&CMjqz;-IH$6{w&T8j
za5m3Lqrk6S(n3lW5@|>&!~JAFwANb2m}&42NBG!bJE;gg=mZHtAJwuQT63c$Yf*R!
zQQUZu=uULGiK&bO2f)Y0+3;c((D3P8H4@cin$uk>=y&!2BKjZVR!Sfeq0^I>Y-RRv
zD8I&eX5D0F@|3i!)VDIbiaG2P_dVkShBcfAXPQ+(CJ0N3*%%cZ!}pOx`5ix3IhS9r
zH&-ic=B#q0*%uJGZH>6p4Z&i}#uN5}k`3qP#WwyZk+0+_?_kUj{ApKE>q2Lp$yoqr
zw7&!&5m%1UAJzXDIxeYH+*8?rulifBTE1xiH?G;5>Vll}wdwDccreqoT3By{k5)f1
zE)il-#BZP?{&|!|Bm8A_Q^eUe=%xC7+g-aD%JDu*KgB90ol?~`@lx#qW5(7m%V~%n
z$f#88qR|B*t7ZnIb53hq(Z+Ng!Job|<K~a*MqcqTq3CAYngq<u^arV(%t7!ADrAho
zLy`De%g7MtV!{N(xwm5cP(LJms0M%50;wxfZDV-L>@Td_!`dPW(?BA`m%*I@gjeB%
z0B>NzsX*uz51j^UUPr*TOFMV|G#k2OJ{Fb2e>n$mlnkTNH7k+ABxB!+Dsq#{Ik=2a
z#fcn~jlt!HED%+*a(;dwi|R5|453h)0Vn){m>}UxB>YsWVLUv9u|g|Sk~<3~+NFGW
zonW%&c^(^dDq>&MS$*o!0W7};{aoWA!M?agqCml(tjK(*um*cOpgXZYK^$dplSGaq
zmI>GDUU!;&KpI@ReKD0?XMp&cDladm2!~9+w}Ke1)!?6m$M)i1Tfx6--cVAUW*x?4
zp}aV7pn__}^(jb>9zqT{rqoRyK7xO-lB+VgM~}>x4{4xeQ#8zrd+@5|+cYcBU=A`p
zemu-C12Q3^nd5TGCn>(16>Tg>FShF5bC+((hIv9$D=L_WX48h%6Ch#Ncn%!YVzz)f
z@FJR2tvZ~=2Spgf#v?RJiWwU6fTyB}7|QOhRy@NbLe2A3e-Y9lS%CSN_@vM~1K%SP
zmJI#lvgi&~GG-p|XUi_h+MwE8$unn7xt1?^<(Qt1+gcr3QH0QS#1FKxN$jHI^*NC&
z<JDqww_NoWW2uH+Cs^?IZz++%k<FV}#7m>LH8&k9!Mnkk$M-9Qibg&%N9|=95W9xM
zJ9Kmr{$2Na6=MmGJUeFy{ErWS<vKj`_P4DWXL{I$HZQLmgPr*Z8mvZFN>cI1Gy$s1
z-5vZ83GbyF8{s*r`kU5qG4`natilIKn1iR15+mI7Qrzza>HA~`xBTR~cyRs+)AAAJ
zj4tY5P0p~?mJ(y}K)`h}I(TZNUf`By+1tnF+SXN3tYR&}i{1<gU{0>jrl>6DDa;!p
zTmF6P<&X%tXGWH3v-7@)OKV(-cQK){o8Wg=t5p2qtBoQxZXm)Y#d4+m!wifDdQzl(
zkxUcfSu!Weh-2=3@@RZ!;hbr>w|9)(wLejJCY3~uKTl(T3(MXVqo<)AA~*<M9J-k4
zjvHkp=OHC6nod+lB!sEro;szzGw3!`jw2B|Ol>)U3JVa0>yAsjUEOruzmHS589^-+
zgLpNJ?Wx&0n_5!jHG!y6D7#S6ew^T++v|Id{Ip5BD-EOa(P>%Gd{$T4@D!!ZkwT#b
z<}{oQL1gfCBI7J70HLx_c*vZ<yG4P;4~8*6#KJVN<9-Y5C35iQ^y25s^NX{$`xgPr
zc|>rSZ4rXw#B8_S7heWZ82AqcLX50;P6PyHfXr%B@Ptd(cB`#75Ir~@O)_fKEV>RX
zW&CFYMT|zHb`%b34nkF=UEC9LzRU@A;pizg&#P8Ne0BnGv}<}bEhL!uu@BO)K;By<
zWvEP58D~Ybh-$*dz;UQTF#<k9aYN^)DruM!pH$2MDYh0xhf2o(qZ&Yy0b30ABfZ3&
zNNOn8=RkL{J_*7eZIX5Upw|o9m}TXUYcGP;pw|f%M+tV^YI&{7h_<8D=v1#Xxz4KJ
zR7>4T9v$4<Byi%LjT@n|n1x_)<Bk@0Xx698_Yr@syU2bh-&t5?&hqVlW~E$z`w%w=
zw<1Y7rOoR@t`SDKG|F)_m9$M5t0>Gno}T(a_47lP(Is|Kd#a2TMy#PMUr3ssm@`?&
z{LuKnYZy3tvd~qPyP$RksTx<N0zS@}1l8r*70Uz@j@8oKXV^}&`dfM9i;>=If+R9%
zNxZ3Jj2aY%2pLhOv}k{f2AG%1I$&DE%zyyGa>0&!7h9ii1%5){Ko_CTVEA1jrKYqG
zf(FKi&3a*}2+Kg{B*_A;8Upi!`HeACqImAG6o=~S>3}>4okUy+t~*6n@$<O_)ELCk
zoHHRp@4gVwRt>c$HK8=5%@9<cFhog73`<jM?5ai;2<jm0?<-V<W#~JV38o=KB={d{
zG?)54sa=h%uRat@s;Gf>(m>?a)tr&7Th@=TEGl(Ya9xUFp3F+?W^U`_d?_iK9dQv2
zJqBqj60D`EM2c`FdV&p>1~6=-cS5F(=xK3{*#<-t=zTYd(yO^-551uMusW1f$gp<t
zWAX6}7G9;3E4#rvEltRGl1RDrIYILOpYuN-#c;8l|6>lcrTD1K&yUIfy#Dp(*I)bj
zpL^f-Hr7AqfBxUh|4cjLO_a`~$tltucPj+va0#meI!xXK-vx!dEba@Sg@vFWG6rUh
zlxz^Q0vAO~$_Kpcb77nQK)u398M<Q}M`JG6LO8vDaOfN+HHauV>mIT>@b4fbpVl-p
zaH53;ykU<K%ppq7)e<{{_$Da2D8+ACZQPxOof=NyvMNdr1ryOgT4)N!DSayh_i&YZ
zh=B2kjy4sb@>vFKIwsp-e;R7m;uEv|)lQd%No|Yg(!qQ<Nk%c;f+<3`b9UO1qN<i~
zM_8~)Ni!OY+^4IhkSOOrzCvexxMs2<F&u92moxt<-C@{RG-;Wo=msa^gcLT{g*_8D
z6M+b1JWNbqfVZeRwHL^Hp>ev}f=Moj`8^Tn89v~t(K<}{M7_)GoGsR%ZY@E%#T{|C
zJ&xh6$y{3-5zp{@*wi0_|Bi;)Ab}$NK5cbEjpKijccwCamJ#!X=yb#jaFjS{Phq>Y
zypvNoGpb)?_jN_<X0#UvB@~B1$~N-zF4(ghB>M=Z71;JIJ|5T&kfD)LB8(eW3whs*
zN>SGefnFL`+T;ery16$*Jp5y?2ejX6ZI_sChI_EHBA{w3yYM{U-7c?pB@KOldkucz
zgOhm)mCElmC`Cm<irFqlyYDqp@?wsXL^L@{(!I$Q^b^Tg+#Swl0emzv<$=x&9{2{;
zbo`Apw?<vw6{3gwIYHetVkjLg-W^V*u!>M`Jjm6D6y}<LQc^s39t*Q|x{G!w>~p(8
ztDD~7D$_Q!yRUXQ6c3hGU3O1VBRzJ>9H7@06PQI+_JbcX*vQw}1Y{*B)CS5tOKvhn
z;i1D!+i8v{N-(3Hu(h8}PLrF;hY|p3KQ6E5l)w^Uf01P5R1IOdDm`G|m)G-2t-e;9
zaXZ|u2;re`h%`xutU`=Ht$CHnqqWkf-)|%POTK&qSIRElxXnB2n5gc7A-cT1m=@cM
z7d?p5k<m~z?h^4JG*`34qb5LvVs>{egsUJ|MZbmCcCx#-y_W257xUTf#r7KhgU{~p
zA=J3w{Q#fIRXQQyl15F{zh$K=Q~CI1tA4n64G-Nkc{JN}(@iF;s7y-O@DOT5ArmUj
zS4|MUijVE(rPmL|dlVV2MT<dep`e`1pNO5LT8nOmdS-06Zdw!41W-!&s9>o4QF4W@
z67#0*$Eacwd$0Xwn}$MBi#YpP+<Th*$Nc|qko+Da|Mgycv+-?3{`>mdjm^*U-@oI(
z*4i4MK}P2(^Eo>7u_RE)<u{sB-f@LYNN?hl5*S@4z`mm!AZABlZQY#g)r*8m;(^{4
zan2j-avvr?V@(|wJh&@kx=7ZXS6m5DNfri`HIAvQ7#dLvjqx7QeGz)d^&#qD*=z+h
zT}`q96ce&Q@1P)tTjNbYO&aM4?{xHnMDUeAnCv9N8<mVJ%mD{^JPruT#p(3r>12L|
z%KViX{jp-xmr%2kw+TomMvhSIKw~xhw-^&wq>B#lFlN6hhpYpntk_0XnNxg<<ft30
zmRNIvjvF9&!UI~1MZJEY)FCHwfIr1CWO;FS3=b4N20kpx?+@-~O(l=x+XLh2bOED>
zGVhDXnYvRn*b#zbmcJs|3DKH~`ZsUE?^y<g!N97@kAUaO?*Q$9jf;%_-E8RGYcU#L
zL?3iNV;7s|azQWa1@{aVK*P&77@Ki%jTSR#u%Q4kYcFBP9z|#eSybXME#t|XKOZ$M
z&D&RdP5n>GF?xYCyiyJGpX2-6EFbwLoOyq1%q2QmP0hcFKE&V86X4Kr1sfNzUQysZ
z3Kms<NUnYKR=B&S9^iTZLFn9lLjfPXsoKqyvqz1Scys_`K%Boy1l4`QG`6A6v$&ws
z(D2F&F>H7*!!c(>R->sEK93Ds^!vEPqfN%?xUzyOx}MLXyofJ_QNEPu_=Z;AF`YP2
zyQH8+*)CR2+1(5%3wf?goKp(~oP*)cnO<J%r6j-gl7h;f1^XD2pXFr0(sE)08;uOb
z9TQ$V8^?o5GGIt7q>l~FQu<S-b+GM{*_`cgmq5{H=vzncU08X}cRO{;VzrN}lx_Ef
zb!*J=z*ZNit9f)KXNs#E*uTu?gn1}4{$3Z9tDx;p)?W}|9yfID(ZV|SU<X`d%9r6M
zt9TD*_tW<EC5EG(n6wq2uB)RPWJ5-W3P-Ca_tR8=hADrk$3G@Tg4(#3c@~Yt!S@rc
z<m}T<H;_0e6rWFkDtwB5(q$>oa-XI)Np^gKFm_KLH<qu}`|yGPG+lqI;RbY0n1^^=
z%?{~VRQc=VE*_yflDUcollYU>eoj)v4-^<nci9^boc)9iO!l)B5fwjWt*3aW1Fv=e
zBdXLtS+(!-jPBv#2Xhg8Ox;h~jOM!d&3h5u059Lr1^l?W+?%SQ$7~|`_!?^!Iy&Ir
zP;0}u<M9}skkRK+rGlrEkXpTenJ2>!&^B_CPp_?sZ(x%HnKbgE!pw+?$>1wA*dv;m
z5x>%>@*$p0q+yiJU0!s6yxLxQz5gwBd&cnJQqf@w?D;VEdiGoDqtae&FJZ>7r&0dl
zDb+UDyI^5|OMmw<eQojKe(~{Jw40h%<?Sb_<}YWTq?+>|Up={=hHB^%WJB#ob*ohu
zp;&a&RpFm{+|~M6JKFNqQ;#!$=6AW$frU3U6+0Jik|)-{#BiL)G0%#X@RwFoU&YF>
zpV|*DvG69W&1ypx#DSu!jrEcrU5|8S*@=66H4Dz#<11Kj${tg}VM$WjO=a8vw*Gwl
z=e|bb!3u~OTQn&PZ}^6-S~;NK$Cy_&zc*#{LH#V7qE9`|`k~*qTFZ-A&U(8KSG2H>
zruAwVSmS~tu`8ZX16QG;6v6vJp2Xwe*jPD+EK!hGOLn_DiErXb0QHpwQ(#;`#D2lb
zru#0|Y%L|G_J98C-|2Vzfc>wLiaymK_<;XsZ}XdPeEZ)<Z?pI9XZzp3#s1f9t5cUa
z#{+c1W{~Hr_=3HQK(1~X8m*5^9$1jRf0<uJW&C{}jmV6MlEuqp|NHZcy_ag-<CLkX
z$s!+6^?1(WyONTYaTFv%TM^bfg_VM^azi=bg4;Zrxe&B!ui`-x(9bSA-)p3#dY)L%
z5HSSyTd1HO0<VstGVq6!dD?x<P||$~Mk|&io_0)glY6OMV-=sVFA+wKFq{b^1}g0s
zk!<F{zXxrCj%8@8O5i62k<c{O<7xd6<|__#zMQdso{?192M~e7dEWZ{T2A9#o_X>W
z8#rORk`5Rf(X}AB6AP3Y1scI%-Kemn<k6D``Qnox_m*+2nVQXbIlMwwnwB`^1fCm>
zUknKHXGf5y(Epmf_2XPY9`OIz`1b3K3jJU2ef`a6`v1S_|G&iO9_+F2LKLL&Z8d>%
ztF`^b>gqOBiJ_49lXz!k3{-m+0}T7H$bZ;E>XqgFJRWTY^y)UofUkb=r}g!%mEG+%
z{C0PBb+`5YHp-EyYQ0JdAVtZP(^9LSI(nK#3Dm^_sD!EziMfR?1vL9*<jzh`ZsQyw
zN`dmZr~w=SZ~Qw97ZwqPF1nb;m)t)ExhGOLkp{$8fnXfm2XW#=jLHY(q+r2T`u%w-
zo({nZ{kalgoI$nJAhE^x?-diOAGU6<(QOCrL_}1exId1olUvg@Ko%BR_Bx4g6stXD
zj|&_A9~CBE!+57R`=&U~8o`RmyN7GD(rpg^pc%&s5g{Iz!ttS%&=7Nj@&0N}2q~BM
zNOiLX#x+MA2{AZ4DHwo}U6C;i6Rz+|R#5)9#JEgcQh{F=&wI<vyMW}a1k*fCCH|6t
z8vIIaxPc^G(qykDY^0^&T_z(_wOg5CcrYko5L}#quo1jE=?Cu^8IC7ZLTQy{f%#3Y
zpnaNM15}^3{McD^6AA_W)L;h_I=~g<jFDTp9|&Q5f>|TOjsXNPQ}Exd)?P47as=;4
zOWk!08pwZhN*;ox6|O}cL?g;|K`}P+#!2WKO2Qq%-(tMU`{6{A85Gya7{g3Q*=>q>
z!r`2H`PVIW;fvM5{pyQaAAyw^{|)9Afo%Ud;Aj70Rc<f%GWa9>?*;sCBY6LwpKib}
z`{w<7ddNleg*>$1!V3zOwhg==|9!!P&C}!`^SG9$fl_%31}VWUBYf^{Qe3xSvIH&A
zBD4UXhQUnxrheLaA*HO>oQbxsu%)bO<fa$%F%QUlx6)sPPKDW~(CuWHl$?kgvyuP@
z$;YDB6Vpe+t^+2vMRg5hj#`<Lx5S6ohG7v$$=R3~XE#LhSg^KIzYSaH<&TibiKJ~9
zp&z+~hriQo?-{*PSY`15@sEcWQ=v{;(G;D+#UBO(0LUSx^-kK8X|l;EYr@3BX=rJ9
z_)Ia8sdjcqMBY_v0ERfvW_g0t;yR8nry4dz@<a;=67GIUsiL2_HxnnDW&)(sghvQj
zdebY0j9R>8^p(P*I7l=cD8wj+SOZf#w`K{n8iRC#Hz&Q<K<O<y`X+2T%CF``_Tl@z
z{OW>XF2@9#%yhxcs4Ak*481W&Eh(^Y1V~J+NbmuK0W)+E9lA4F>Dm^!;Cq20=bS5o
zw|`6wM|HBX6F(^|GAH+@JJ-KsaQeCO?VondC0moSg0w-!)Nm1Do0N|SGmz=672vJK
z@a$faTzi)bJ*U&lcZfuIxl7opjnD;*A6dsII?5LWU|CXjL0+m>joO*yd~^k{kXs1!
zrTe8Ua*G!^cyN(<ov=E+XBpiBJE1je!gg)G8&K{Yl2t48jCMT*&GFWkTWWP0{Q`yU
zkY58+M%jp)NF`rF91PLYG}IN%0A*z>JaleS6Lc&`VcxaeFMLdIdSl`(Tm^wQfvqJI
zcsMp;A(5tVA<kWDGNB`Ji=Jw04TDC4D?8Fp?4q3El5m{<^5e=UsdiYckixsLvTZs1
zjk2Z{Wjr%khX_WsiEIjgNCCqJVZP<S^NUuA^}8^u4v?ao{}Sp|WD7diKn!D~+_kfF
zSb?F(`Mwn}cn`2r)-!5V^gswMVo$gA9hRI(v4c$`pJ3Np=61lk8Kno~n&-)6v|xF;
zql^sJx`CUAf~Rm}T>=@e9EsYz3RTp&)Y@*!a@3Dy?7ds#J^WOv0-}byw&ff(!+lad
zc!?oW?OWu*ENWn_nRX|Hs>Xy9a*N^pVZg`0^x*f53{3cVunN<HBg0#1j85JlBW*F;
zi;$fZhO5a$zgxc5?cCM3yAd)jT#-Wnl_`!gLAa&piv35GH7P10ifZW=(EP4ny&n%+
zq1UGGLg}^)H5qX}5^bm}C_D--=aX=$wZ(}psX5|>$4-o)ze(_m`wDB|Z0$uf%0bxc
z8ZJ-t%2**Vr7hO0637hDv~(4|y}`UAx0;pjfr_~wkwKwS1OaBrNdhOCJ>k?6t~gl`
zp1K6Iwd@jb9d-r`6_qrm5{N!zN2D{Lv-_mL$S5QR!587%ktFiPH+1D_ST@Q6IQrHC
z;=V<gLszL%624>kdkapapgOJ6$qM3#d`lYsZR^--Eg++Xavk-p?~_q#tEDK0jq;C?
zEKH@AfB}WLkrdW>6>S(5Fd*fjp30NBgrkOKRP1!F5QLi+UZh^PyTJoA!;*q3$)M@g
zBX-YFkia00(bmd<R7nv&Aip{IrDzHSir1n)fxZG8UB*T8*jVqt;$~7lz>xC>takqm
zGHQF$Fvn$?UeQu=T1*N?=IbZIyCyGrG;VRmzPv_`NwWFXM%*%mMQz3q91rvyo1r*d
z#rIMIta*E<gG6I+9j+%M;|0#`Op;26E$p7j8$bvGjMpSUYY|<8F>d-L{F0hH{?%%I
zg#P~oR{AE1Z+~M1=mYlO7wf%>{=fIl#<!pCzyGWHf85BV83g*1lqeg!`JMJo58+N9
z&aI^p4<Lueb%UP;8_!^p@%Dlby5-fcE4!$*L-a*N8bjS4CEA8JoKr$zkhAfafI%g^
z4bB}@i<rOB1RvcS8{r1&`oFWV41%)Z2kdECpt17c3ggNw91A8yL!~sThVa;t1vZ-T
z&CEIkl=>s1LW7YHIhPaH7z%bC+My@p(NRd2H5C6f_ep6mV*D_x;L6uB*^bdF7=lGX
z?xHFe{a-pVB0qJSJ7}nd=^!%((of)M|AF2{_(yJhUEl`?{vvzA&@Wi5jIFJfmH9K*
zij2E~jLdW~V5~n%He>*vMykRs0-G)k7HmA9up(iol(M*o8NDN;fQtw{hD&9RK$ltW
zOego<omA)qv5(M!IdDd4N!Ad>B}CLuOKzo3RANh#WdaIe?HJmFgjF3=Fm#_o)MznW
z53S&D`s?qH2>`SBElz!Pzc07eX5UOxjX4o0dLBgsIS3a<`%NlTdkpuAVUlR=yyD&R
zu3XvWTXOGw|M1Xm20ERzd!Y6S)Lvquco_@E$uXNb;Hwq>^$7xsI{6ih|5s8wVSsZy
z4aBpn;HAN}P+Tlj-7@?s>;PFHXogl)5UHjhkKe?s;G!gkh#~=%k^>0-F(q#m*}`IV
zGDZ^9j#kcl7AwnRI2Q$Oc>=d4&km^0o(ry#HZq<J23qDcJ<5F7NunDgp2rj7@72yo
zy_MKRZBAm#;h0hYD<9K_(Aiqlyl_bjYDt@z*hqo*;*34%NUb9%f=ckbMOcF_nQ0;Z
zopt1n`O=L!f3S;QRQ00nd6R_+Gw}>wc_G-wYiQ;~ObaL$u0!Ynow}LRDRJFusnOA4
z@aE4)I;xP3zBQXKf<Iqf8<K!cW1leHf#oU1m@rKW1?nZZxH!E^FgKkgZA7Ag2mxW&
zJyIDmQaY9cSuv93uEzQu2^<;bh2KXNkZK*wah|6#o_MrIQdsx8oK9AD=}6dj-d>|a
z?RDcb)aPgapGEw?AJ+EKQosZJ|Hj68uZsV<`ORnim;WgI&xSC^Kz_=XEa+2mT23I3
z>I@V8*3!bYV#KI0_~QieTa=F+U`mxyjPUUYupF}H$e=|5p`FC!t}#nuA=#*96z%dX
zV){JAhU`U-urP2VAbIr|cLD7~T1?lFN$A_Bny}+mv)&j28?21+v1=}o7vA)&cmv0q
zgcQPXcPK&vVUmi@0WQn<3T`GcIDHWQUOD?=9u?PL1bd<}f}5_$Qj^((Gwb?G!_2e6
z6o5+sfhaUBEpS<wibR}KF&m-iIPYDngLWUL@H`U~1@XFYJP`x|J|W{XvZFVo-0-VF
zy#>LqQ9iQi{iS~17CR`<@5U&&@$z6H5~XO^p|>anl`&a9nZqwavt~Em>Mw4OlS))t
z{j$>R5sV4LIu&Y6L=nEbC6{mH%{ZAJNCjw`78}FsETe2nwSjJ7N{?Fqpm?P-7hY-}
z1JGHY7PAul<%5SNssldGN|8JzF9?IGVLF1)1L~I`QI;0WndeW%Z=ibYKeHYGU%3AT
z=jZp;|82s5HT~b$FFxP@{~7+jx(R~)Y&y*l-&DY#s{T^){8C+dE<Z2^6n<zjSHyXa
zHWu;&buuPkpaongF_S9A$!!3X5*%HNgu9><(Y(xT2cAFg^^7wGz6#_OnTtsM(O1b%
z5hUGWg`-CMs&LF3aM#RJ;zLLy02HyezWxUgH1pVG^I}v>oA;M2{Z_*jE^C-|qaggE
zMv*S<F)1P0*I;{ByszqzwC2L$l0S`ty)byW)uPJxgPmaS8T`W3Fd*Jy4kh>}xP|BY
z!85pp0{rdf;#AahbJ@EdS^E}9)HH{Vc~NR|n=EuNG@haodBW-k<euQAxw`3I+v89n
z!5fCYOE>gO=n0I<re%-v9AUO_6-*v`bU{H^LelDB63tQ|e^i-?^NU$`y1^T0Ami*d
z>Y3oCfrrMBFMEkPtrWeS=f(riR4NWL``jn%_M18CC=Tv+17lLYXVo!!!YT~*4ld#=
z0_xV=+^lE2z4!cOhpd0?PNz%CI@FT2;YwdyW4}l|XvXn<->&WjPa@sn%0F^N#%32;
zp9N)Z+KGdt?<{@CTDfWTW)X?=!}+<;&cOzDT@6e+A$BY@4N_J))IN_SIZQ+*OY6`&
zA>E4{a4}5p`&&*WfC%0HM=}O>I3T+;0c-_Rsy;)+Xl8$l`M^EyoG=#V9R2Uu7g<-8
zM#n@nDvSo%=$y^D<aLv5gQH*`BGYG<aUMJm+I!D#I$cJ&MfK@pmhje$+x<>qzA+0z
zgWtI*J@x!Rv$V;GLzJ?V!KP$1UQ939#0S<#Nz8tnj~TljE$Q|B+B{O-!44nlCv4a^
z=NsuF1JRqwlvQPm=V#{bH}ijuIF(Q42AA`Ho51>gTaEwS=zY%r@}I*0HEx37v{@rh
zMk!KXXl^jlwM#e^-kK80>caB!CGON$W*_2I;lAGwgZKZ%{y0Rh$<Q{;v!Z4FtJPg+
zKx{#P?tan^JIyoU7AJ`jW+y3K-3D20i?~#>4LxBW_nBC`oM@&A1sE;rGCEgXvC(Uw
zeOQ)YWXTl2AxGmkG*qVd?0O7L2+k#0bIn|+Ho`5!7vlX)E~-HuM<0x>s=a4z+pw56
zU)iBLsgm31nxw(mds+E}GweF0H3W+_SHS;VQ`%Tyjq+?>^pT08U@Y7No*qm6{3w$o
zj<bb&Jf3>d>rq|&bsPCJh3hn!@yI-P>NOc&N^A2PM*qHXl#sQC1P)SWT_==8Vm>G*
z=z(>G+2xTdfqIb94@I*kTf+%hN4pPlkFz+VVCL(LJfK+uz^&G@B0!76z6YGmcH{E=
z?dj>s*#(;dt4}Y_FZRAaNYl%^yE~R5k^&WrBlZ<ASv7HGH@ll?YmDKlq#WI^^oV7t
zI@lIs=+2{r*fw>}Cdtg?$<rjj>RBM}6L=w0Ci_~xIojyu{B9gglgT}$oMWTZ?0WVc
zxsdE|9V@%)(TqL;z$$aXIcGjDUmd*OdwX;N{~n(oTpk|3I{4}G;^)%?C}YHqF*&1>
z;~x2z!9=5f=#cHYCP2s}sWC=*?0uW;^&DVH0sI|A&}JC!Ax!J2-=CE^^5~bPP%31)
zn|pb9a{2n??9JYVbno!wwKzUxB-0TeIRoLqmv`Viq*?F>Q(8SF5E@w54jVNVW?R(t
zFBO(r^Y=JzkESCqM)|AV@rQ7De0XuVcXasIgG=cD+2Q`d`Q^)>FaLUQb^^tcvC>se
zChWNTrtvLjnKb>-8MhNnG`B^`;061Xx74pd5J;O(k+~Q`tic&+b1lT|=K_ihrxC-v
zl-|2*69gJDOl&;`)dp?UXLkb*o$Yw@gYE;vpm*2PLhx$?eqk6cKHumBq|;|-Ow+Z5
zKz_tnLB?aa2sr5_<=*y?UcCk_ldjQD?V@AyZ8_ql2!+%RW!EZ=Qz%B9c=-0jR|i&u
z+};R#ovlFGXYpzw-^{@JA#=i-*od=#gRlM`YJ;#rSe%y`k+0l^q#AH3fg~s0xElt4
zTMyU&&UsTg$0tTiyAqX1!xYVlE#X}`oid2OoRvjenTa_${$qp=C$A>|Aqp&~wU>0a
zsvG$(L=(+k$=av*0tElCw~N9Sl)35hmu>t%%5Q8xJIihh|FivU;y>99ru5BJtHI!#
z5D`GQE+e}<;;<d>9sho3?KnFAEk})RbiV1xK4x6Fuqo%t8^v%@Ug5!;&2tp&ajD7q
zB|-xTf^nBwHeq8#_3D7Veg)KVM0du@uKr~2K-wsrv^Ao%c#k>#tVe9{x4;m)2MHf*
z*sS*qT0bJ_`ZOsmIJCJ-h5aHTPws*|@l04kAta%q#o%Fb?1U!x$J;2!CzpF~FHWFi
zhx?aj2j_=>Jv{!tvPVpi3a%jXM7)9>?Cc;zT!Uo9+Xt0!l=_?0hje$B<!%Hb_+Wbt
z{*rh+G8>5wj1(fA5}Md2w0ecgQ2W{s-v1Q#h&d45c<pSAjhSeTVM$dkUctw4#rw0_
zkHF7kDNMUdF#i|>tL{s!)=N{T=`%(q^C11+W3aXG$#F`?T1>QyT)y>&^267HLBt6Y
zTO9mgLoo$>$lS;#Rj|kF!FK!Ua{u)7^2dWScr2_OiaOXTr(Cv;=a?ZYXSU%+Mtw!3
zJm;)OwL0k_oJtZ&&F}}OzaOk`hCvT@WbXz1@7s{mh8&JLYFd`VO$FF7v&U0Y5M44|
zGyX-`V13BIRV6ST=#FPTF^OB4h@<W-Wk!}nNMUNLf}vF|k%)zhxQ-w6JQyVg0I;_r
z^1FQX(7`piwsjU!3-$c@`li=hZ8mps(kN+lNeW!vBl~~)6P~^6mNj+OYx!aC{1OTx
zdHG?dSJ?#aOL#7#GE0?qCrC4nMOEG5K;#PX|At{OxdOQ@;^oAp*wYOzn;Qpf<`yVO
zZH(K!Ubo)4pML7~8oO6Pn1X(ifM4$;NPVauoMcxs*mVhr@?-objnYru*|Q(_j@}*s
zrF?yGc5n=&91j-lXrjwyd<}OR`&>8%1xlalZsJeVne&TRNV8EeIC)*^2uh+PT)@tS
z6LtxcmlVH8Cw|;JJKTE(x5vf7o71Dci`tZ6PbhG6gz48cOUUmvC;KPI=NAV*ot`!H
z0zG4q$7U}CM`iyh`toAY&W0Xc>>VB-oHb3%`Nh#!1_uUpN8(o^c0`fVhL#!llxObv
z<lv|MgVT$Kex;d!qI{Z7^Qi0AMiB>2G9t1V`EZsxUlMF<eQ}Z0Ud{1}jxf~Th9;1<
ziL?%wi3@oad)Jbkjycn^mCsWKGjI(=$a^`Bj4}=>G~eN6LrFp5#z7Mnxr>i0bQn0T
zR|hZOe(&DI#v$6N-Ev-w*cW35Mr9dEBf&9OO$;KNu*@Z^G4_Ug!K;Z2vhB?DhSNf0
zM+y+Fs+~W-ARck={QThTqQW71TuV{X_Hc%T$4l(WG!B?#o;y=yD`5T&cUz=>_iD%I
z^;pP^Q#VN`bjcOM6B!>$a(Fu%zz>ZN5zI$qTy<{|D{eZ}4P4gFh>^gN8Fo&BHi?;i
zuj5q!lxUFUMs>?7K(XwaHYmI?$;es18=S=kAAp;ZwPY2U;sAJdxp;!CvN&Dk%(swL
z9uF?LTYJpl>CwjJ$??(8b^~IRae&Whx?!^MnX*t61%u!ML=V_ALBAtjNm(rpVnt~l
zsVFfobK|jp^!@44W7+Lf;4+ZKMboKCLJ<Z&v=pAVnNzU5jVTG59yTL(z1cfDI)VLq
zbat@!>Sy@(@cZL~S5Ro5S;K;LaKhiA-YcF=4E<BZ0H?d=h)p2OLk$UIor2h_IRgqX
zZ)j&{bOndg^^%}EOgx^-iiAMnm{!B8Jg%xrq%#e+fNp*CKk<-3)Z{Dq4Ps}KDRV|S
zi&6f%&1jX^yR5wBfZ91BXbzC2QfEAotLYgT$<d}#a(IUj5k~N!aj;i~85YeO8LdZA
z84>)_AhwfcEEg34AtduTuY&d7^(M?l?2DkpQGIwVQ5cubAB)5%=P*kZErYdK7`!xG
zA=p=}#4`sYnIz@CW^pIg^qRE{_;vv6ZgGD2=9RJQ1Pww%bZ;)9y&r#~YDGZgPM{^k
z?*X+C-z5}&8~`omk6f8x(H_|8ePe(><WNKV!a4)XJd7Q>O8p^_8W`W^hAg-B=S6DC
z@rU*-@}Srr$AdEcV=CzmM>AQQaX)GZn;VTgUGFAu!=DQTz``?u!WMz?EZnQ;FI~9_
z#_UABxK?}n%E5oAr#m|fD+{eo<kO#qpe;gjfuv2Pd{-;V4Q1m6BdxR`7BcZL)7*Cj
zrWCRp2aN(pF*=|HAPHPYq?)4dyg7L0H6%A9O+0DKSV8AfDt2^3?=MW{OpBLT%8W)g
z-~njABJo?z#;X{nIb=6OR38*ZNxyWlA{R43R9M(r$g(JnM5v|ssoh2efkjld?JJN9
zE>HK);FdZ--C#}UmK2sVt3;TIvQ!aX&<J-!2x!bKsHWp&7N>7cN%%eY;ClRQo}yJH
zD94J`3H$it!T!a`8TZh1$SBAC%b7HjkL6z3vSE`BdTJTN8Kbm_1co3N0zuKBD9k#w
zp#IWo)u#LHu`mFQ<5^R+Q)M_vpyrz4j3JGRt{Tg*%e|m2v)!S2Mtg+v6>S|)M+ku<
zaE=YOihybYIgb<p0(XsbLn^4O_6^zkxO>$NZP&1AI>oLLNv)9%f>@_IEwA^^FI4rS
zb3i&4ytSq>4~i-}6@NIg_Y*6NmX>H}U{qMupvHL(jmFMt-B=1Q1%IAVv6G~Na}J_a
zTj0n*`URF~23`6`UczfePD`&Q<PR}S*pu&-<-~hcH9fy0uQFT`a)zWuY1klVCr3wz
z$KPKb9lkkqMPm23Q*ix^`2-?Cz>7z)4iXm1mR)WQ?Nv^LWW})MF6xzPkb`U!M$j|o
z0O8FLAAzT9#bU9&J~}x)Um+P~gb?gd2-~ulJ!;dnpBSIfLJySlPE0?{vs^g00g5Yh
zA@tVQYtsj$a{tel#Bu}8JUMQh#c6bxOy^T!CfN4DOtUZu_WBjZlWq@Xoj7!s1V!Vn
zDLi+m45$)csY#7Vz{q6xVm1xXYa1>}t+>HP5*Q-~gTuy>^;n!mfpQj@%Z>dS3hUSq
z_+o7r<CJ;ZAyr5-RuKCXWaE#)pI@Sg#Nhl@N(N6~p${`ni%6m<&EJFUc0Z*_T*xi0
z6W*yK-z%|IgazboMEo_VrX?BBxG-VDYB&vLWs@BycrX&omEM~|7XL3P*@2ZQln@1K
zg@bg);|djk+soWM4di;Kw_FEMpE|j}VN?G6)LE?G{EH_OPcWM_{Z0G8_1Co4mUn!#
z%q*$jHJN(rkfD9lDuXLI8j(F#lg+~;Qk#uYnB(-vCfB^7IN0M`$zuT$Bh_BP!%NsM
zrJY=wnqHje>4)ci;<5daos_$~yUWv~x8ENgvm%5@QOik~!KP48al3{34CTt$*Q?R?
z=dw6^XJ>mqYfGzycZ+U{Sc&vjlQ&yHI7)TOQ`W6l_Hyt1;A1KzFvo^U_um~pxx#&t
z7#b=gFQ<>_q48mAtn}yOy*G#Z=fKULACQUrr>9oW#M@24(bUjK)%{R)A2Aya`esA5
zr$>8-$0slU8wl4&)P0Jl>x8K3q6XgVoqklOXNxNQ%iFz+gR^72wx3q#U(mxpPYvGY
zqG~^)ySC~@z5WgR?V*k4R{arucdK9Cd%oR1s(SXqYO4C;Z14CS-2|UlFNMmGtkqEI
z>$k_))$<4Ij5uQl-Z(bT12gkZRv?Dn;rEw}visO8?-iyz!yHD1IxHJNFd*{!uHv%H
zK&=Ui%NWyNy56($kwvJX{yH#!>S+WF@d2aTqrXweKHV%8Rorq(!^(L<RAT`05aq1!
zeVFL2--^@=GcP{p;vonB3<v~iE`jYJzfV>vat{?~m5WV1MlVh-&raR}&G_#VG>N(r
zdgv*83Qj^UV8UZZYJ_GHsS3^Pw+u<m-d_Z7U_qq$)kB6F^@&2tuy@2JvG40XN5&7t
zF^r`Rxl=UromGkOGGGt=nU<whq~*u-^7uro2vw)K^q$;hP$NOgda)!qiTBFP2s0{@
z7`6U&9L?B%P>|1Ul8WP$*oDwfe4fYEA(4x;1!yKQU50P@@=BxFjH8-=+_2By9$y^3
zIaoY=I^4x=QgY;7f-W0ux)8ir)+?$hl+P0~`~&I`b_G`ieZxR5x=e~oHm+P!4u*Kt
z9%R|1Lmp#f6%f-)VuyH(vK&;Zv&|0r*#pDJg%i}R`gH4ZEO8q9j3V&+#eY?>;(j0e
zpKsT{>3v<r|9P?gIsWTE3jfE0D5M`wOf5la)`Fb2uk&5_fA#kLuN7utc2>%4wz3QV
zhaSF`yU-Rce~^f+Hx54D4^HY6_CE2QPe@bE?w5q>hZ&az^o$uDbQ{66Ucd-pYw)K)
zq5=C55zrfzy^dIO<f4l^X;*}nN;vJnMwEGo|LVxjb(-ude_)VE@dS3G(=??^9$PO`
zioHpJvq>1slG0RI$Uz;D0TC=IrX2|4c1cQ5nQMkbT-k-k`R;akz00!-bo`orIumQ2
z?PHqFv@H0+8hnUf2hM*`Y_!WD4Nh)ro^{1$fDcDp4mqL!Qh?%Z8u%I(-BmYvQ}m4m
z&~yZP^m`yDk17&o+sT!aCsE4Lz`Wj*aWwh`0j@Q?neu7WP&v72Ge()UXi}~?W2__{
zwyBjLH&!iPAFtu=LuT|Rf*No>qzrXoaF(&3<;E)-*5)hVF2oBxXj^eC4E9h1zjv1u
zoyv%sSIy+_hQA&;ilevn{vW33PYc|!X03|QIRvU4#SGG!h^>M%?O`U^p#6G2nVcp!
zM{mR66yw(-{?&QZQdIJno8OP!c2c}Pl%pdKzdL#bf0$Y^b5X_i#KU;hEdKzqMT|=6
zIM@C<j~>}&=Zku;-Rer+Op&;6F<Kh8tNvBMJ1Oa?944V8k6MNEA7AN!;Lk@61WMKi
zfS)&Q77Sr5S2>$UVYbeHN5gE8z&By=^3^{4`?%e1lFh?kkL>JU&gy-IkG-z?+uB)*
zznq0Y3vpXl9?I7Do<45hQ?J;aU&rD+dgP#-FY2SSFGKd+lc2B4>M%ANr+@}R1$1_g
z>8$sBpYu>U&mOl@lILFc7ULg3a(hwX`ndjZ?&OTSGa@x7Bw$qD=F7#^7=@mKrxUzS
zopd`5AkL2Y9%U^}TgON2p%q^~PU0e(NHLWv=n@Xi6@}C|H1sSC=<MKD#PPd3$mK*H
zSgJ>CvL*d<*dSdRj|-w0lw0hu&*OQswnj|ON3oZfS*~FAB6JbEWxEr=af=fFoDn#A
zU%DZjSwUFh9}>*FLb+sdFk)%zQ@lnj?mvEnom&3I6%|1T(B;`+A=$togNwz;g1P~*
z5=BSuF*|@<0b98qu7(DzC@hk%NMjz)3ELUDjcVj$Pz!wwgA;b1+YR2_^Y=KTTc%EG
z=;N!T&$&j6J_W=9d1kKEq?56T)HZjNgU}=n8SveBl0_x`HOl6L2?fmtsL!B3669Gh
zBBj&ShHAZ5uD#ZG6TQ16-Me&7_gc}M7)`>C#q46ba^m9*a32^Nf<J)1($CO!3UG>b
znNqo%i8YDHtcxRLr`6hH?E8WEya>_}t9AP1!h}6AC=*V4E%9$@m;jSRuAbaZ;?FL;
z05540T^f>6-vcn))|LAv)G68xYX7aR>C1YHK^t{^juS$sTD5!{1{;s9nnwEgdd(lY
zX1i1`as~G~RUttcFpS58HTO2dV6&mK+GDPn*QY%%<`sWRyqVB0-tmFn+<e!%@RH*P
zMO$mnwG!$u)*NW8Os9L1cZqqvqV#ZuPZJg)@?}dfHCyQh=cq_k!1{hgrj_TeP%AYX
z5tgV(MS!(emQEwn1zi<qTW#;`nuze9#+6yu@;x*0TwGD8v^dh0Uf8f%xj5_r`6BWA
zvl-Tw)a}pa*f0LV=}1F=UW{9{yO<||3H8D`kXoqJPcR8weBm5;#Ya+l;S|_8;13e7
zHw`o3R=>G9_KG*mfLpva0~HoxKT9<#RDH=^a&3}%+-CV?WMFi;>KCwudvG`Rwq#M@
z!4w`$H?~^SUa$jif@eXrv9;)g(v3x)0ivb|15HE9r;9sAb_(tWxr%Uh!U$WKVhhv7
zE%@+?ORATGsIkX}p!y9Sz;kG$OV8S!ty=ZT9g}YC8|6(fdD(yv43ZIWNUbT?)a7^W
z4pq5Tsflw`)H|1G95j01(6qnHBlI%f)+yhG+M@QfN4>*7^&&yTgo`9%ie!f%w!Gq6
zuWkx=cUcEnOKnD%dKZQ;ZAU#1Bw3vI^O*D5v5&Ce4wy8usu8qTU2#7hP)Kd&`CL~+
zc-Wan0uI@lA}J$~1H}fJ_3RV70L(Dzb9b21-k~EhNEE;Zzvl<EgKKhsjRnT<{Q79-
zoa*Q|*TxI4u4eRU3sZvB`0ke|WEBDZqm8<nxCb?thPVUwEA5#_8}&zsUfpy9>|0n7
z7Ia#cBnrtK1;NdWE%~)EhF`rlaNzf_@JxUbgb>-FXJPPbXUqL?Qy+)HExb{rN37-R
zE!>^3-lfhM*M~2In{O11WC6W41P0Ge3;)v#3Z67n?Y#Y_oenMuqa=VoG{11YC7t^e
z<_>1w4YH`Q*Y7z*cM^|F%wBJCM;?%F5k@$2a7yy#McAZiXBu(S3%0idnbs|L&NkXF
z@X+6EU;&`J8^8~R0j%BfN!;W~#E8D0`(>+{lX?*bUnA$a;Ptm*@W(LtQ^UeJQ{n8t
z%J_r!K-IP_S_7a9vnbtJ*?=W5iTy6-sECXvmlz*`nZV1-^$pB9*s}B2;yi*t41jhH
zmVb$v0p&~6zuH=zNl++;$W@m~vW*h_4eoRKounY5p2H-M=&?M-aE$iitt}TC?ojWF
zk>@xg__h4xCb(U<7!1ig_seQ`s%1?#<>{hs`sd;h>s&j*8lFjjrR%0Kif4#vl$QQM
z(8ad`pEs-Cvh%F4#~I4P{l194-hb}<n#AgTeHKTPY7OrDnpo?7O$ceh;<jrzOG^(z
z<31C_p#^))$7*W%k1;Jnn?kZK`MKS7h$#@~Hxarhoq2UUun)Oxg=Q9ZpGX18-f*1l
z69n|)f~Ka*7uAB!Sopau&@h&U{w?3QX||fpH0GK7MF_hGS{Po3+k<><*M_!HxE2w3
zkOt`;{hwk(NmwNoBFz)^%rmDsdme5LHC@tYaCPK_nN6-U3)j7!beR0}Y69OVFaDPN
z-F*Q(U4=5si(ePLxNUcqZ-I6a{?d71HqX2vc)?QWHlHd006N0?Xam-zyfR-6mt|${
z5X}6M(<fQBlaBWrSTTIG`~b+8I0x<%`nG9e8ZWv%gF3Y~%x3q?D4igR?>OODTk`NS
z8()%`=e@<JEQ)rA5$sPYvHLLTu10p%0yg;^j)FGAm5RfD#~YoI8ymUY(;QV9z-0vd
z7db4cdSWSy+d^Qj>)qFt$d^0ZKfHj%%t|qlsojDLU%Ovh8eEk36JYHJbZM;)vJ$3Y
zl^*uVXMbyjD}zPzmB#;69_oKo8vj>7z5eOdR!jZ}?to6;!IH`Ee*dp=p5gj<4^Ps#
zs6E`5MCEO=OjO^(sT*a}+DAu8dX8=^BckvKJ9HZc*U8m2UKn@XA}&n_@P}y_f7Bbq
zr%prob6Eox?l0h7zHHupl6m{-Czv-`rwy1n^QW0R^F(HkiM>$zN&y<svI1t1won+j
zGwhU=p?lRGukE=L={IGjwElQ`yUs+{T6^TIs(dB0YA3a4C$(3b)SgbNFa(uXG??aB
zW<b0T8m6#kr?9z-M{ZXA*vzi`U{N=m8Fh<4Y5V$w+|uh!;C+#?MbHCM;gZeP`qcOP
zZ$Y{sb82A>UM7*TtAwcVgHNVK-r3hanX~FXFybV4?x%JK1?66_9;{bh{mie{@nz-J
zU-cEdd|+Qzj*{t0{bV%1g8!Cmer|)#qERex7kORbX0JSc5%xL=XQ0~P_IlV0*TeNr
zqcH0fG?TJuf7qLF!Cm+!nqh>?j1xnMb2zK`m*0e<rYV!FIeO;<zr(J>RX&IL0%St}
z_IH@}fA+$Sa5H@IFI2|Pwsi@2QGJ7jjARK*ImgJln;r+B8jXX|oXoa`n(UI2fzSjt
z<$Lq>27j&Ui0a218>E<QZhG2tK8BL+*LqLD*V6PxxS_gD{#u$gDjxaTV}9&i&vSOc
zxGDlL_?|<&Ndhh&IkDl_;Wy#8;UB|4c~kt;7&g?Q6|}cEc7xqOXWh(OlX7||_*)NZ
z{`&9Bs-xQMGWhS>n;p9@RNag4+rNAB2-TffaesT0>a$b3pnp&uS?DJeDvkgB57qrY
z%j)K?%A(Vxd2h9|sO<*PD*nfuQXTi9?hTmSKmJei3fhDAba#in9HfS64E*iM3?s~u
zB->|!dH8_(6*_}4tM4fz4rO$zOAec8jfhGz7(}IKk)XHF`Sq4pcD1?es+2Xf=K&2u
zOS}QZ%(0|%&w|1?J#d51b}G;1n}W<MwQjF9*Rn>nEa)_MlyAW`(Y3!%jelyx1!)EV
zm&1k~0Q`dPz_o$T;2+aPr#t+(X~v&5&$yNi9zErnV~U;OP#-KFw@U^>J)o90yc|`g
zBR%{0Fg-<hI?>OW&!iPcQAnT3W9Or#%TVWCMi^p(F<r{AXkMiE2<o23xqYyBe6kI9
zkp1a(p}IEw0mR$Bz{Rs;;MLr4-nSbIbh^ah!LS~^Jfz}$OrNJDTw}A+n71-b?Jb)|
z%;p5$#(x~%FAbVYnoL#tgXQuiOyh#7=4S1#_TRE2>FAgWp+028{m!J>QXP@=7@q>U
zPEvwO3>M8sefsO;#h^!!Is^LGF3cl_y3nH>hq9?d;re;MEwy+*{TA6bX6k)w16rjA
z@4pmZ{mfn^hpCCQP^6Ox6Z4=l2%e$qi53=}s3Haw=v_k@V_a(!x1`^2D5wGscUx~V
zwY|oe`97Ksm~q6o(&z)a50jG~#!p~JS80YF8)nrF$@EP?GNVy~ZZkJgGNBuCY>`M>
z_@{Y+h>r+*ua;&i=_Vji519Ugx2<zUo9<LL1vNtM1;j(r-j!!-4bOnS+lN|CpJ`>!
z_1rjxRvVtdRc;?@IepgXZs8a}qC6>J;hKlD-O%;jhG*Liz1r0dXqko~?>0Q!ZWzt(
zPNlM)h7Z4Jc(&6}*%y_{y3-_W^Oqeh)SX6m^=D9yzx2z=vt=`VktbJK$YIZte*omD
zq!>C_-S1Nq3VnWYv`Vn01Pjg4KlSD5MuTDlD5(9_?$lXle9R?GNkNOM{zs21MJL%+
zQ>FO0Ub44dFUyZBW#hY9Q?2;8QW9p&u7my0Jnn_^*8RXNQsW1@T*1=+Y(3Q$`1-Q*
zKAaDnUkOLyzynjBkh|)$t-H<+mXN*bCGl~kWEnNqiH|EKqvqzLhUTN@_M?XOqvj4o
z4IO}nuA4R+KCX|ysVF_JPe4;qdR*;m-nfPFv6j@LAZllMy)Zwnb~a8XO`XNZm7)Y8
zS})6wBfkqt@fy7X;!5$P81v&w)k$%fj_Xwu!hgN&adg~NHg#65k)yRTuYWl&<O;-N
z*}tVH6XPi5W^rxT@`Hs=8XQ5oBREy|(FZA=(v4A2i|cs);7xwLtMCYWr-#Nj$q-w3
zSh(I)P7EeyMk*J(u#(J!J!^OP?_?TX>EFXF{bha?;gVKo*Hl2?>Jre$`UAgSv^ZAL
ziCi)AMym)(g|=@pF5{%pd^yu?ixM=Z773ltC7m11Z_FPosc-p)A>ZK#wn;Nn=K*ZE
zK^w~0`96F@toWH?c=aEQyk&D9CSR<u`n=>nCXq(H8t2Cc)}MOt2FNN3#+|^Ehs`^^
ze4MAiK`i{j;V$gcJ-+t62j1^<y5F4#hvrhU&rVt7;kKRUf0<_`u2k*C0V>G6u2$dG
zyLz0J&Swj^KkE#bPbW5ubqoBUW3>uS39l{lLgsSGr_~wN7m+im&a>*H!kJoqmiV))
z^2W2k?TY_yZOOZx9q*;9Rk0<6gY_PN;eWW)(fZ<=;eHGlH-cL`4%Zl0g@;2kkP{|$
zYBMfX&YOjL_fT9D&wHCrSlGI~P6QzzB-@NI)Z?ISDz<Cq)X9#mPI~ed$AfX_%nTqF
zK+4G{2*Z#_9H)fmFeOW3aBxUfOwK?r@jPmbyNVSUO^OV272R{-1!D%zW(=3&K<&m>
z5T-z<ER&?UPJ&CygLnYrfJ&|yNuRuMYAHNZbSE^Z4seM~*KXI~(b~Kupz}3tDw?d#
z%KQ?3uIy^EO#DSD+uB3StMvb}_vQa>97n#t&tEaiY;3>+K%BfpGarhgWo}4GA*ndo
zC`%84AvqR+VK4(8@$>%eug*SZ1|Z}s&U?yxYY}twbXQkbS5;Si3--{T)IK~WZNp!t
z-A<=Y;P|UIjrF|d(RB=eOfE;!+8+O#xlL+FKXJC9R?Qt*4Jca|KV<O{`0Rq)fBSol
zgH*zYdicFC7fw>`q5eGl!HXmR5l)#?tWCg%oc%Kh3h9cP?mg2-%mR{1O*`VMa##kW
z9r<~%g1r4xJyngc{6h3#Ua)_f7s_AA=IA8k|1N14on-KY{8Mbtb*|x=1ZuGQZ>3wU
zvD{y|@U;r(rRPx${a;JJ?Tikj2?8wBe=QO0$-4XK^pUWbADu^Al(c8(l~o)!TX4h`
z4X_WS1y%H6n2zxdx+NJgjGcNqf^=4XWtQH{D+oU3J&)+Rc)BPlT2JUiRJK<`3kpF1
zGlX!H7ywwWH}U8>7GcH{%qHQy0+WtLm<47Ef5QI;VK3~6L%SPphFfxdJM4!W1?R0j
zP)8EAETh{a5MO5qJ7KlIrJS>GS~cWdrTs4Ue$h9CJPU~6f7Q&;3SSL6K{x0xPmwxf
zoItIfM=7m~g$Bh7V`%Uf4g05*-Zk>XvREa*8!589F?q@~rlK5Ap^`@U-SZ%0?>NRk
znht)J*n<MFG&0m~Ok^p&AyJQ}7oS06pMA$~=^Is%qJceRkEH-ELJ{*iM*|ZVu<7Jg
zt!6xzZP5Mjv3l5QwM^F%df89BP;BE4Su$QZ$Tq(0wyWM$XngvpjdQy>VVhIg5=0Y@
zk{M9!JEvp<H$mew?eH)B$m%;+dE<I9PKO_|@N9nBoS4waQ)>)bn4>7H5?IU>DzVJu
zlQTZ75?A^Hrq)`~c4ae}O$Ak78NgSQx*j~@m%r0po_=&dJHL-^YxYXF?(OQ8gUVps
zVp;Ay5eIX#r?BLnK}dJra5;c@WS8arjp))8_xYaUTZ_r&goo~#m6BQ&_!cIgDIqjN
zZCYO-u*@U7)4f{xAu^YfSw@9Krp(1zSmCR7T0_i?Cq9UeEdC%6@?*Z2#n)7b`!>xQ
z%Rehm;+NrJ|54gzv<L3T#T+pq+kb<b@OHQp${)}8@n+ch`|?f23&UH&S>}wXFm5-3
zjBUG1T!y%!8@7D46YGWlD=j8|hn7>L^xt||LsWsfkJz%{!@K6Adn8pp`pZkdCi6fE
z`VaUQe#p}2fg)LQgrCA<E13Rj0vT$E3DJQ6d{<w@I)Owgq_@ZMReq4prt!?F0d8-f
zq8790KF2vOYk{ArZp6}SX}4T5S9h%HiDK6lKeMPIzk91pM;>8$Uj)*#00UR)9Q}gW
zx{FE^E|?9>f+7+X`dGX?rh$#KnoH~%k3_F7r7yNf!!41cd_zCB>yyuXZnu(R2AbKG
z??4q^282Llq&-~&Nt=gUK&ePBH&XMXH|rBTGQ|JHV=$&;JejwK#)8Pt)0o^-q5LZ`
zsTpBgu&aHY{JjPr#caf4xXk#)5o4FQn|UmbuSUgH6Z_MUkWN&B-`qNHA;gQO$h6E=
zKNQN*+1)36z1rMyTQc8wp=+w?gK6#v&SU<vG_q$CmDDm6k{22Aa{3sK7>zrfRuD~|
zt)h&%Rrg4vmr+~D>fy@&^WNUYY`~0S-kc`Ygt?%$8{rKi>|ym-xl;U^_xA3e<?VBR
z`D^nse~Vq4-W-11cNe_9;&)vR`S|ZvyuHGAe~K#pGKsFwXUW;)WD?@v=zK#kN=4IT
zcLc(`H8r}=sO&Y@$?Ew!>I?W@zhFLP8+#Jt<&6p*=ZtI=SDcRocS(E?5=A2{ORk$J
z)6b(-mQndSw>uAB+*)-2BWZ+gL(79cU&b)5SzP{9p>=J~2j(gz*zQp?9nJ8dAFYuc
zl$x_kJGC!rBW}&*U`X4T>2Za{Uipe>XW1NPY#rFZw2)OBUo-_WTQxkHTPM9%QsP4|
zuTi2v-O>7CphYTZP%I&#(BT8$uWszw)C)y*ue;6R@njRRA909JG}&CxWOGrIm&+U6
z?d&;u?}h75i_jq3UMqBRx7pQ3E3Vr{(e=RxC|)<Kg4PY2Su@oHI`bh2ZJDJxrdZ_A
z#87>~obJb9KXYzNNOO5TYRq%#jy*h+*9+0nyqfF@TlMIcr-51l`ezhI_GgH~2_dZg
z2N2VRRssG%aDEri_!i6*UNDR{=pqwwVR|tAq~@y@2{a*=tT$qf1rBpeZ^_V!OCLZa
ze^g^EuJ_0kQs+t<1WHW>7o}FMQ#l#S$zzHweWR6oqeHK}f)P$)!cH@XR)Xv!VrXa|
z{187<8I9yhDud0nfa+A!g^45t|Hnm633nkA@@YFTwrGOr#LvUbE4X<sPyA6n<W=1K
z8FwRtpU?g*hwNMm{Z{bE-%o=%X3s%4rt|UGid|PE?v#$P3$)(-%fo}K6JWDIR=5fb
zzO(ZV0gW2)$1#Tm?(AS{$}k^4nmc;IJb`Io<}UuAJC)nqH&UI9MyjwyMNZp6@Kib`
zW#A;kZ513DW7In;x%JY(JH?R;wgUBzrPid2sVKd0>DKJPe^Ix+HI~JBuBCVut{f>W
zS)Ln8*;v$1Lsr-HtLmUcO=RWvSrU?PbTS53_#PpK;rmT}zw5>ft=6C+^wQoV`Y%#Y
zP^U2EiDc3aGUnBPh9?)@aOcbL_@eVa-#0ry&~LnMZuju^_jvz{AM}x)|41Jn!GGRq
zD{$SR7M_h`|Bjc<g$}1hfX-^lopkGqUo)>*Qu#0U>cP8(-;}xVUfF#!1p!JvmA%?Y
zzrH-usdDQ%v(WbvafHf!jH`Oxg~gYR=)bT8y<ns{j6D(5S;c>}#U9YqtlXUV4Ag;<
zLwTFztll|a4xIf<cP;UDa8a6y9jq1ZQvrnh$M0alN9|xve?M(u%S7Y?saGUmamhSi
zMdS&<?3Cw9!H<iu7*%pHv~tmrYl}MNipAb@PcHVnOsq-gr;Y100@J6%*yH%;AH(_W
z2)eeTvW%*y*G?`vb+V_ymLGrr{j(pw|MG_)!tA0$(h>gef4*=9AY$~oFI^_n@4s*I
zOZ>11)e4b#(s_y~E$rz#>Ly6TMuzZ*Cr{GBf6EfF(d^Wphffo!YbaAA8DBw;a30>l
z*q*^(cj25#Qc@F6K4y6H#$6+ODsgbhhE{Vnw3EVyR_YnpQg=y&qA>@4h+z3vSDi3w
z#8U$qkIe>Dn_$YJ`NFbt7PrY=iYm@ipD`0V9u>c{e3QZgUF%;Iq)~1$=|=I|(zHy^
zTZ8#rPxxyXDpman|F3&*mU?a|tWbMQWpOH8GOj1adU$>&m(0)ZmidKTGrx#?=9hBO
z{333eKQH$@Xq?0{Ucev;?RKSAN+O9(Q86gR$SfG41ryXUK&|;%^%b*^bHE1U5cDX2
z*Vvo1!<Ow!sRyEZV^uhmPpUGVSkLuzZQp195#%chVWFQc8Ha|b2RRfezF0{TKZhQ1
zV?8o(RDtB}GQCPXK@h=JG%|9(?Dum*YkVJGNgR8QxmCP<^ki$A65cc7OBdd$=0}N_
zf8yot(l(y_a^;MeS^M~2JlS5b+GDQbL#Yzaf8>jfawk150A%x7u;GHRh9PZ{RG%Og
zsRtfT$4_?IE}kymKzH&@o_^Z1&CQz&mu#7Co`U8lr=XVfFR5bsbp6jWjFNeU6&`<V
z0np|3KRf-6jZU%t=U~wPRR8l|S^x7EqqtwO_memP<hcCgxGC5piv>TmG;$=FD>o_Z
z6jU1&6^onn9`}^%IM)W)0HN=;!lPV@Z;lWXNy={1WdxU{s?(^3B-3O9_qDvSi!k9=
z1a)eO-`}L+J=PZ|=MQdQu;!>}0alhnohLI`pRMq(;_;#s9Ne!N)1L?uv6F|DHdT(O
zOuN`K4T0^)ItHND^5dpR>6DA^u#4t7#|9=D#R-$6jcNr95%_F6j|D21oITPytN909
z2;L@y_|tTRxPW8E)DORAqOT=<agB-e2SVuZ>Xp)UcK3aU9>D-YK)t_d%+9h=dOsbf
z(MYN@5b{%~DXm#LpABQGbj~p+rMw)P+kF0T6-|=yV=J9quP1mq+`7HF{X?GQ<M_)H
zpwk5wP5a$T`4+>p-X|Xt%F-dPmUbf%@P4XV1ne;-o3^1fylojV4ZIv0e345IkioLw
zTKR$VQR#UtF~NmbIZ_t3tm|1c$t0&rIQ6{vDX)%%rX8bW4t<Lm@vxN1p2ldjAukBo
z4^GgaO=qD@5PDaEC#Kh}VBrLRh1~a+yB6+if|&3mUk-w=F}R!7;w&QXnLH*B=t{)-
zmeq(Hr7$yA@L!1EkdWqsZ={}uus4lbTa2zK3_a#L;@GgPc%^2NFBue@$M~lGi&XOY
zbSmx5ieo&#kwH#Vv&M4#6AaiqmE%s<`}qtf;sGmfe~9B-T9lX}PQi3T^KHu9yG-xw
z_}sKa8PY7A;99xFM>$9DDiJu1a(Q3wkLAH+DzXB*MBqZB%7vw$5OV7j-NLlC!f#MU
zygE3bMzqRE^GHq=$y+<TN#eVhBn{|isqyHWaByc1rJxrT3YcQ0kPrOgPQVIUc>U}d
zHouNvU(Zus;9$Z(UPu$jCt)*nu2^SZ@ManwgJ>h)?MXBY-<*YiX&C>m^f}>3=EsTT
zAB6}W?a=g!tDV9QkcR8?Y_^Wotj6m?mvtmFG(jchpBkz+tQE)b4e|*#44mmUNXeh9
z<H}Kh11KbA8I~?EEXc5Mc^u+`6U=J&Zw-?zEsU!A(ws4hMOPjfyr2<Y&U2DjA{{Wr
zG#N_JK!RrLBeGsR<?Q!V>VJtT4YE~W5J%LWklxfH%&gyla#k*roYp8MC)95EOq|>K
z3$nM<9+(nleZ-_qKzm&0JF<2B3XP14jkLnESnlFBS||az_K&JE=4&j$fwb^h4#$xE
zT*+F_nRbS`Xw{{5)Mn5vo(SE6xNxUeO`Inrqy3_Asw4+;@V-Z4uWTvzgEn4LR>}j_
zQtE5$e1(y?46z0TqOyNnWZQ?;GWo@-rMMlh(K#Kh+A^QjX+tdTzedq)g#zh8Jd0j*
zrSSV9`vT3$+KBzGq){R&k&J-|;W8h&HaXfL7AegKZhoc0uB}YiwN(M}%g9`b?Buvl
zJJoFJEVp5k%n80M+|Bd7NevNn!Sd_vuES3Z6W}#3Tq+6e>1~tB<8VYaa!HT%OWlEx
zcL<UNl}UK_CdPRnZo1+Zv}aygJ4_quBy&=PNxBi#UpAyld=1$ruxN)TANUfGrQJYH
zmd;VRAGVfXE;;nJkg2on6`JX^ir_4GLgUaXvxH%0;rlWsfn7_cQj1!|Waj4YBTHxp
z%{d7_huF0-2<v4+lO$_5lt04mj)CHL;XNE}Nwy-9$*RqBT}-Z&!EX~k$?GjpS%B;?
zC_Jo9d7^OMn>TdCEn5h#=V^NX+!O@^eeVP67;`2rC%k&E;4+`Jo<x|z0|ultCVA1W
zluY6si#6libyL80s!n>BY*5_`sZd!gI<)j1sy%7npwC$!A!%qln@Pq|2o%t^e5<rQ
zS#{6sR&AQ%B9}(FGyXMJjWevM%c6+1(y-UOd~EhS7G*v`^Vdoau^McAtrL+i2<=`r
zlNKDXr!btT&&)QXyP;HTR@{sxA0~HrElt5d<E2C!ISEcVrkn^4w@vTFeo1RojaXXj
zw7JDjTh^xvW5T{D4aC6EyUAdjoXf{wpVf_=womH_YrRIV(P?!Z;Ee)x9b}K2h|XP0
z`LK$j3{>cHpQ#uhOoovdkwH_;L}po1E!uq4#XwZVVmY|&P}-gr`mEA-YP_%1O)b5m
z;?oLI*szS&2=!rjhus}gDz8|e(>!8rrst2=mIc(xYsznG@WiCmQ5qx-kPyR(;nOBg
zVmx96zv<K<Yp6-b8@}gt*dN_RcqVeBkJBn@aD-1@nV(!796Wj%7uNUH--5gF%i#TG
ze4R|czvTJ;AqX)8{yqHD<$rqQBsKS2@Z#T9fv`Krm8aWJ=Hoo!1yI~eXe|}HN7{_D
z`M`8S3&1I_!Gay9>4)$#0!>jCO+iMe(GY$UhqeeeCStj1TNr*Tr6NsTOS@BuHqvmr
zn`D)`4$r^Cb7oJfV72q|=V+VJ%xPRB$t0mXkN9C2-{w{}Ea^sX)OeyFQgc`}ec)DJ
z)}T$jh=-9{s;ttE(aVM!K3Z6r&0qHFW;aBoE4fiv@q4td$v}s;&f|01_;Agj@@7am
zLGe>(4UQ3U%-tT`r5j*EFoDKkPr}Yk1+X0%;O_UJ7JKxs6Yb}jeZFU&=V_=}G67|<
zVkOg`C|0Vt2=wr{cBbwGu*YyEmIsauWq`}bJ(WFzU@>|j^;#Kun9j!|C)7I?QAsx6
z70;+sZ>WfohTdD``Uu5E9$l+~XM82`B*|&#LS40X1@bAL)!D~!DLesFjOpqNc%bk_
zm>QmX-DTa`7tR8!NJ8XI$#aB7Wqe_++*K&V-U`1`hn&%-%^<`@J7ND<az$of&YUP>
z=aEfRDc3myt_b_c{C(l%y1H3hwx~tUjzI&#weod(kJc~uLFU1l@8}DHrGw&~`^V}{
zuJ!7RM2sKe;ha=2v1v++j0}FF8ZI+nh003xV19s}DYn?nd~zv18|+{em#~8Yt_T^(
zT$|udG+XKsL<NvU*EhT#((pRv@E-h)PVD^i471Nrq(>i=4QTwi@!iMaLMM8-Hcs&+
zxw8hn5sI6cy+M5k{~D#!&&4gC<kq!h7NCkWHKw%&FwY6^Ff-pgN2fP>UB^lI{xW<G
z8p+PWu5ujJ4?*y2p8uVJ%*^6r(jd<sVZBbWA9Dt{{QR%K(d}*c=YL(excTY)?_YEN
zN5_A}5GQ14BA0TwF|&g(La1?&d^Kb)3FR)HFUgS-<IGT*R6!zB{4obO9ky{i7FTN}
zlW&*|cV{1T6qAfTZB+_2>?D}nC?}5DhiW!0eUhY_XsvGyshc;t5n4n7Ju1U9lL*96
zYC_j>k6=Hew{(Rn<V%zTKT@roV@o#uc??!qCR2OPq{oiqBoob4o+e_=Tnf6c3Ctpp
z@HSH@AKJnuawI??hCmrZZf%bIc4GF7P+z}~$Kw{*dl;5Zo^0duzQwfUI4de?Qecrz
z(xTyTK8uEr4RMW)ngm>kmcC4SKA)o0OmRrmTbI_!1TpNyYj26o+H^*WgR3)H_0T=a
z!wn2N6yvsh50Alyag)$cF5??HtkJLUga$uGw<O21y9E98W)jUlpvNvxrt(=ykArPE
z;ay;4_yk!eBRx;a=8r<=!I?#w2y~Ga&eqK)=`+|Fy>N^ePV^sC9y}Hag5UGPhN(ag
zBlOWA14f{TjMz7Xz!LLhoHvng_|1mSKm9w4Fr0<!)oUp)eo&6y(4mLwA|I}1b1*Px
z<OLCh%yAOpP{{;z(pMG@>*6JCu;B~s9dEdHcu)AoX!EqbFzzuzLYr%nB;#HC3Jmf2
zj828eYLYvXr<FFj*5)y++i4c-%i0y7ZGO{j$CI_a-CM0jf4k>+-HhAq3{PJYlt9PF
z=ah2}mdAT7`XMzx*!9Bn(SYuY6IiM+djG0=SN6z)F)_+Q$>ZbPj{`RkoV|T%{{9N|
zTqHN43z`zMigI-P>g}<A#Tmj&7z0_2Il)k(CmW}zDw4AB)-OHsm%qr+Pvaqhb&uka
z6#PK@k5?h;eJGN^ngYQxyC%^I#DKl67Pqcjjj;PNe48d&25TVxN2^{*C04`UtwNvU
zcRpo~-&N3r2lB3v4p5j-&gNQ3l}30>+c^vgO*zR4ipJM5NCUsAlefoR|B6lVBHLiX
zkq@IHiO5b$KyGQ|DVmN`57DbYq(7amLZm;P`b7HE=^`S9&|sNk%sBR^kBDME%q4Wt
zP%B){L}iN13QRPH4JB_%N}Up<At6#&U~@<rL`_7Vi6~;>ie;9El2n-~xKPv{<&+Qx
zTdt9|iMjO43fwcU{zJjS<X$5vF4~3uHev+4zyI7)_dpZFUd^HrX_iY_$52P?A*P2i
zQuW<MY(+$ZDYWE0cvX@`hBvfO=v$98Kp72?^>6&W5gtt$OFuV~@hSc*3Nc=ZCYrq@
zwt7#Ctz%?i79-iehSo&tS5ia3EYixgWu8ARtmda@!5Qa*33-b6Gy?We!J7L&`=9SW
zG%x?p|8z~ocT_xZjl-QPO20@BUY(uqzxdUNY9$3&&zA`wcU>k;lWQ4X%RF1ICIbC@
zjD7s>{3)`YlXDmZ@U1=I2CinO9v;)@_BeoyH?Q@ABx*gGfxS?!e#6Rh_|0M^`eWrP
zPuef5*Rs_I<O0>c@uS79^5|PwB*}y5xANnq_v@X_rXzlR8=d9JUu0)cHuWz0$dA5`
z+aPrh{##IUGFQ&x*YzLEpC7I6G{O^f{bF=OG%9WOr*3|DF+%p1#@@PyDY%YX$rKo<
zbv0Yt<8e~_>uogr0N-t(lkgxuI66LV9)!#EshnK?Mq-n@)e3S^qN-}M!Co;*M`xIf
zHcdxljTGj@KH^S_e|6cc{0Q~?BNWZ0l)k^eZ)J1Xam8x{OMMOW4!FPzwnS@C^b2IJ
zpxL|0)mwO{#JqC7Kq&v}VnV%JM5mv?r2leK5nq)EC07fC@~{482{oLJn&bF78a}qh
zm*duTdbj?+4@s0h#_KYI^))+gAew|JH^SPBvzJr?s{S<H-u~&dNHn-qq?o;A14zOp
zmW<ppUk(=<Ggl?*O{}<!M~x?G=tE@ueB4%PII&C9)$y{D0kzO{@;0p5D9KW@WAU$-
z{?GEBMb}H4mV5R8fnBRAuj3${V_lnd_%;paB2O<SAI6yJv9@;vx5nYgACF6u`F57x
z&XPFCL-UW=)N~tbgeLrzXMj#B-eKeweG@(@aaj15O*kq{LP4#gc)<5VqVgn;rkT5e
z5>E7;MDGHZwx00LQgq;Lt_LOJH68NRfNFS%6I$Hx>?*Zq_6mF=ue6-xj$FzO;2y7!
znc5rGwS#41N6eeA;RAb^mW`PH+(SVx;1FvN3zW5uG9cU!OxVL^BYdzoF7You!P&>m
z<jX*Yj#?sqVz;vCJsI-zfd6etC~IxM{v4|q$W;XCsn!4E%d<IQ9@N9{1Ik(+;nFv-
z-B)zP0(Ur+?0N7*fL)O`IP3CqrFJtp*{W#K43mSzIa-usIVf^7OGZHWhfe9k9WN*L
zD~{nn_Ojk~Q=3W=;S2x}*y41S?7;nL)C-5<6w|XQN`-^2@I;~!qS3C|4keDLHvM23
zl&tp3X_N}iibP`O)S<qeS7#y&IV3KM(``)XZ6Ra70+oQnoH2M*js*89iVmbD%)(5a
zi#(Z?;D;cZCp33)LKBJSIFsA_k+V)X0?&r@aIj=6(Ed`|-aVaeO5{h;b!nq+kZfS+
zs@QTThzK&GGcs;>O{ld-r&6Yrk1#u*%dv>iCH~_UTLe7z_|Nz)cL~>=KvGcR0fkS-
zY&la&Zjv(+Fbc%qH^K7AVcMo{TD*i8tb>Pc=`v=YqXgQAJMi!GDr9Md3Q0#A>ScRE
zJxr$v!N)yo_?MLClt`83(PKEDa)Gv=t=qQzi^K}e2}V!icN2<HO`yjyC!t~#CLKYh
z1aIX{1J{YM9jXJv9Y(OmW#6!Mz0yf+F&0II!Tpe{df}mSj9#N?3i^aC5kY5hkwA?j
zlh4wzA*1*s)2Y-$g<Tba(N1jEYO%_yobGwiE~Yu?&n<3@cP1<b2J;n$ZDKUt#FlL@
z`<WXy2UjD~a519BAoZ-SFqHrY5I^Ls1^CB|E;0^2%Zo%)0W|2%*3xn|@f*=R4|pa?
zbm-q_uqccKFf*-D!^H?r*^|c>dMO_5gs4-JY^s9QCKZtH>yCbd6Pbg=avypp83ogd
z+cbHo@E1IvdY`0W7Msjf4tS+`V`a%dW#x!CKX^iw74(uNFp$TFOA?kH@ch+$#_6Y+
zev6Dd;>^7#xn8n_u2ZbNdsTiukvtg*7qAOHjK`9EQV31(L*IQ{h$St>UuR%Dz!1gg
zkuVIpk0H`2h8~!$iQ_EivqBgh#(101L5H?hs=A=MI7zHXl%ro{euSZJB>$ZE6#cdY
zLEK2>8jTQzgP_fmoG7{*!y4swifLL`1Fe)9;712?5KnGW!f+@J>c2)yl4}#1B0D!1
z=F+4qr{050t0QeK$>)(>M3r=t*c^#YB=RweF451hcc};>=0U(3!B|_4cF-840uSlR
z^#-WgfgXz04&EH^pS)~@@Si&B3WmD1lf(0)v$`%X6X@j7tpw^w(LE+~iG{pw>}(jW
zvb{ILH#i7byC?L~T~jR0-T4hFa1fZxND`C&%jQs^a2faCxg#`9j-n31?h9t<5{C+X
zPlpNloLRr6AQwsBmo?vTHz%b?MdX)6GohRrx$DygZb_O$jQb{sgDzreRs;%>wz{+s
zh(eSH|1F0NxdD?tV>ChR0*#S^iObttm9W(4p%@eK=Co}9+stpVKAV3SQ3z2s+U8OD
z;6l>@^=QKo_?RZ=okvcO^IPF*ENQ7I7aDx-%$s-a$YzcRbHAxc+|{QqEYBUHh@N!$
zXiTq&RlOu0`9jf=i4}~2@{wY^F>p916M#GwqPY{%sF=9*2Sw#GZk^p|d>=h#zMLsf
zgbj3zDu{qvJgnfD2Zzxvk6|>T_%lVut&j+!rj<Izps~PIEFFy9wwi$O57SmK6hdFG
zE#lt#qpi6)Tux#4nG1hj!6^ZujA#@!l}3&f`4y99d^xq^8K&9hiULc;ksqB6{79mL
zm76DpTBWYYXShfw{tFinrxcQ@P<ImPc&%wp)_CqFB^Ffx+T5lD5ki6g1z$s^^^E>f
zv`t>rU2)GjXD>okJZ!eX%t?}V))w#7gw5<Hzn3}?Ru7WDBZ62Xe@P|LxWa<wK(W3W
z5?TC;F*faFL>N{{>Euj-1y&najjv)aajVdFc-dWG+J*SCU+fYki0H<`3=Mx8Jz;7x
zsfITgFUqPiLQMmY82!w9o$?DryTO<-4x!Cst0ke=p-~VIuAV{i58dfZB<1%)NUqKg
zY9*KPRjLR!hIPdb0CNi-IZwy%sp*jP@~;Ho#*bVzxB-pu7Jct=!{)G8F1Vj<O;fO}
zF2@%dq<hLSx?s9m<q8SjpuQUv7~ZESNEp#-CUaF7C^mqSv_ClE>4ZYVYm*J+oAfrm
zn$wPWPeI6%ahMXQr%Zw{ma|pd$Eun>8CYyg1V*89aIxSgBV$CF;t81?L)2AA7Hp;w
zPNTbo0yD_z1Rq?_Zs}jq%q>8QcH)w<jD!gYclSN|8|Wn+(dm3@q9~fpn|OTdAkSRO
zvNLiNgZvD`7%lLHuT&Tl78wWgc&%0kx=HaM0$~bTQ}_U#z41nN0;G)dbq03NS~vM*
zyte1w0$Uz5{$z^e;HU6ZqG1+`Qb6}J0UyZWGYE&k0K?v3lr>EhJ{YeU<3Yzg#5A5|
zZd3i6B!M)A&I5U9PPnt&Lz{6#oQ!VKX=Z}vcndm2crIBUW$(Z9{5t!6owxSLjU|5E
zEV_%CQnmd?xQ3t}m?qMi<PU2gQI|h;arX8lJ!#y+ru;ztxs)CzKdm`YY3hkeQ%_XN
zpWt(c+6(tXm)argS3fTYnrmsDsd4!LEB@u=xHO}>N3rE-j6K0U^Q>nOIlJfkFOCmG
zE>`~KT4xQzEzeK)cF(`o$Q%4Kd@cXH{%`&R{|g${yOXcp?jQV7ex1F0dw6>MPJX@J
zKRZAAOQ^q1a2MazpU+6ur)#fHP7lxMH$OuDBl_CG*Kdwb-n=wdoYw7?WAm3je)hKF
z;&G`7^OaY->oD{{#&3(d_t*BAwJ)w`(d`YAdiVULcR%cOw+4gq<;`w;W1E^cPB;|H
z@Z4Uz)$R`ZgZ{Su=nOVCH|57>yR+5RAMJK$(Cck&$<I!|-D&GRxU<>TCOYlTM!SQZ
z&?5BF@WVABvot{$N@qK;d9G2D+r9Rn-Iez?`r8{@+d6QG<8wRO?)5gedIQs+?E!Re
zLx1+S`n}Db{OWD?x3=u9jeg&}w$<O<9P~{igMMeL(_5*x?e2DOW83*4jC#ZL_dLyk
zsXfAOZS{KEqi%Q5+15^M!l*X}rmq`)r9o$FV=&Mk-QE@q%1{+%V|z8K4z{;8J4)5<
zt!{VI^zv{vgQb5&*~0h);yi3`$IZ?L|LnHf{cd~HUg)=A5ZgVtFyITl);5fM11@yJ
z+E%+BYCG-T)<y>|cf(pwuMS#Uo%YriT<wRo4g2i&=3sNnexw5*+3tE@?e;sJ0e-b|
zCERkjEXv1<Rj|FewN<{{Yj1UX-9^iw-^Dq>zo9c-U^KqG(b?>6*d@^4+}iGT_^DpM
z-PS*QeI@wj_C~)WOJQpO!xRSSZ}!@{UbZ@YX56RdvIER&8r$d$1_N!ZJs51;UJbT7
zTU$$3PIn6!T{*F{-S3-4*WK9K>`S-0{cTu6a$^9j*H`A-?rv;vcgzYJU{b068uU7b
z*}8+R?OxYBx7pulueP*+8#;r5ZL;0lFm3j?wsn1Ud)pv07B9IzzA1A8;-D+9!SV-o
zGK1Xi$UOIeljYBTcT=C(YWHQ1k?7qm9e96xOQ)jS8*Fx0nZIq2Z>Fh@EuFLeHptwb
z8Pj0US-e2|t&Po2f5TlY?TszF9+CUDH|_5YSkT>GkJo3n-O)wYYr*$vjdh%**lYDV
z(ps(KEYPhD5KG=iIv{P`PQZfk#R80o*1x&X=>z|}qNUr}>^iS(!iDz0X>=Pmf!=^+
zNOuFeUb*-)Jfn%G-HMgp-fEYxc6;sZe&xD<nlU$kyUlLV8MLi%?r#n{eeQSDY@Wbq
z1M_2Zd*Py7VY2~I16*s?IP87f+q(I}ZqVKC_#cNJ!5nq%FOV1A1xsaxac{yd;%x6m
zKY*3m9u$V%gOzT`(uFm@Y`fWl+1R#Bumy{xH*ol-+uG<3Xv0v>+kma&87MF{oL*(o
zKD^hF?Pk+%H*Ma{dhl>}0sCAIvn>D6?o@9c{feCf1pPwo3FcyByS-_JF>KUZ_7}*B
zL2pZC42-Ky0%yC`+30lP7jerLNTRKQ{8~OY0}y6g+xp@rh~`dDe=*x^Y=NeMKL@SN
zo_%x+InsP_fE#YV@M5ps+0^|E8{Js6qkzuP#Z8^x?zB3cuHRM{#K+bqe-^fc4f`lu
z?>N7<xB4p$y9Wa`#Dl5tm>;}2K!;^2_gaH?U+--K?`-Q26i|Kdt?qWe%_`Xj>~|gN
z<q~xcrWQ0sM;YjVc<<O1x&<Tc3avML-Ht~Q;EL_-p7XQ4y|KN*Y;O+?g<v?a@oss|
zZh}V2+y+C~+V0ue-W*s$Z=fPkcyX`^vu18?dt<LywQbOF`kpNBwcSk#^u|tiv*+oJ
zoBaV!(`H!fZ@FsXV8GASx^@==b!KBwxY}*?L9f~N)Wof=wzmsyZ*FgT-RNz0`@Uw`
z1;yEY0(Mh>D4h;y3ibkYrVra>dz<^cYzyjCZ9(mdEvN??|6&n>tI5ty7u2nuQMLwM
zP!+dWW^5tFEdzHpdV_A){^~j^6bhm4U^ROIq-wu!Y?C&uBvAL3*<m0_KtYyH!7k@Z
zplvV`ZT~mA+pf(9Dgu~0PA@mN`@E(>1a7Y+3%V_^L;cmY4JzoyGE)rpEm*zgyIokh
z-EG@Npq%Y6+6jiJ+ptWx?4AmWgq7`J7y+-XNMay^wN(eOOKoi|TW_G0^*pl-CUaxU
zy)c07Z^Px<O)%WIMeVZ-4umCGC!!Aa?b7OYd&uQzmv!3qGU#I)9p5k;3<jR^37W^|
zLc{D4Mt|KImRG=5zf-lbZf!4F7CjJttJx2zS$54l4K}twz{yUv*&DdL(uMtVW5coq
z=<*v&dA-*KO=!i11hcl$b9MySBD7NjAa~E|Z(yL??Vt~Gz0+CBz8fGRRwM=3rPfx1
z(b^9|AGS8^Iv;e~)`;9}gT1%hPp}D7?{XmQhFynOK=}g0$<^$@_z`<!v+Y<M@M&T_
z(A=DTp}#Tcb@?h9B9+`Vxfyl9mc6X>d{t@w-ObIdWef(olwI9G1FOdkHi3$C50~2J
zhRZ`e;F}FId$6)WteKhZ16>EJvCB4y7c0O)a|Vlh883Fg3fS&CQW#ZWdsF+@>3|Hk
zkAVr>-7;DWG|+bR(@qcc@uwLJjLLq0!<V_6gN^<&p6$Utvh8ef)|0rgIoLJ}1A4n*
z2h{^Eu)jdF?pPbBGeBXqB5845TY&(QyS=rHxp9^ETxr_g+Hm!o4N&PErC|%?Vu#oj
zrK;lu8-Qvm+KsWiQ8gP3xG|UGVTa!6`^rpzbJP9i##X0`RNt;#4Z~TK-C!6=l_z+k
zU8#tGHQVmf2P)P?uhnHABiasIJPjTAlbu*kQyq9|&>3(`pa+TC4d!0AvzV>H&fo3`
zTXk?RRv#Cd(@(pRL2zsvhK7Al9n}3ksOk${MPLk|B_gKK_JAEs9nggbq>nuP%|1K!
zdaWM(t)@r6yS0?dHeq+za&=_j=xuLz1-pBzYpvfcF!w;Bh||V!6j)W;XjPG)#9^~O
z2oc`X2OvR%-E}%x+549j#QOj2e=FesDO@3k@%#t?z~%UVuzok&Mf|@(fA9(a?>B`1
z=W_yG`cqZ)3=V7eA?=D#X1h0tU7_(Tx0DEY&*!vZ6mn4x#t(b6VV4LDXR5)waVO>P
z85~@S=5j;aS|Qa=QYc|JbyV>}C0J5m`mj!qqJv^IPX)vu$3G<#JXyyYW49E{56w}E
zk5^sjKAn9K5WP~DGp?3o@YAqH{V3cCgQOK}0P+kYRHE-VYD*BD>9>>?Vw@b#fpTwB
z42*?9cL`b16$*R7WQ~~xgMO5RB^!I5Qc@|MetQ8pF1ZmfT(chKOx(v1e+u&=Jc#o6
zI_1pJn>|9M2u<1g%w81Wi4y3<frUbL3l>nYKp0j=KW39o4ILs4{13)}6`srT(asLX
z(%RU{)AarXX^pRUiv_Vc1^T6(or82tPs?5Hv63Mg=WE>4@H9r;+;#W|Yv9=-K(BNV
zA@;a6qX%n~hN)=0z8)TMXfoU<)W>iVK7><KoW^+-8?Q~L|Fi{W_s3XO3BxzjR7zJ&
z|7laIG{vF?PeVgC&d}+iMP}sET9WgmpQ|@25gYM&{FDSz#Sa(^I-OJGDF}m7j)$TP
zCblTI6U@y~AJ9Ua<;g^=o>S;fnv86)HU<ULfVuL`e5P2!$rRTCMZ4ZaxY*s=$;f3P
z4|YGxlItnghjC_!t4me0S_9Z%qD<3y9w6Dx^1xYR_#c|11uX#EnHIV=cn~YFPI@ML
zj?-D4zELm>*ZQTAI{9?|k`9nJu)r#I4Y|$ZXdWHsy4^myfa6}&8rLznLqQ@%*}abt
zQ*sJzfvOcN;gBc1d7pqVInr4vE^3O{<hxR|K^O(cfQ01vtR;=`JF1s`k$eZ#3h*_q
zXlpYC=qA5Kq2{pf!F%?#s$0*N+-vH+Dq2dv>L_^Ulh~=bBLfIWY0PWIz~w(A8H|hQ
zhg4}01hx5QeF3Mwh;pn{vWS8>@hIUqZf93xxXx#|PI4zkJm)J2a0J73e;SEm7u`jA
zbs<%M>d%=gn@z&Vb6;eBFD`KmOk+uv#rapjN79$9cJ6$Wsz{(vaW4X)YxR01#rH##
z520~kN#;iv3!Z?t+hdneHpo{jC)#k-WxOnNpk+{SALptsi&;95k^eW)q*gta(|CfJ
z(-@Kn#A_1u+`<7~j||2^anGhW%%kZQh<DknEy{23B7RIII+DQ$ZJjW*)g^xq)Q(Kj
zcogy(lh1%JujXUt6@y9aAwUa+E)|Hy*DyVQHW5ih0>sT4Tc$e%0y|<j8n-#>=5sfj
z__f(q@TNS+V_5K<#>LCCKy93s%&E-&+TE%`nhMD~3iYiNi*=n*#?$kKU#Ma3Y>PTp
z7Zhn%sdMhjg}St!Os9)mr&K(7RlcOH+m^ihe64N<Jew4Vb2uN4lMyDL;!QtKO4luc
zl#E{M%kUQCmW&p^a|tw)7j$5m1c6#&A7BLs3@Akvhwv~&nRSyJT>bXJw1ti&EG!g{
zY8;kYY)cBO@iP>J8*nawqwZ8Mtg3bO<~9rWt2FR{1GzPZTfo>>*VZMBd}KF_Q37I<
z>(0;;8)1E<TvGwu;x739QoY`)aI_N{#X)kjH9cyC2`ZGG?N-~1oTQOLyGI2`Xaa)C
zCJ0Z1ihOa>==NwrK<d-XW~0J@@W!3F!PDM)(yXv1glkwF$du}DGAhC%a;VCdXf(p4
zW-f?qpp*(?M>MGL8QBG)cdf~`LP(n4GoxwI&!+q|!ag+EIHLYj6I@86t0D4B=29C7
z13MINXWHe~GpgAobxbXIV|hehgjwf#$XCAjl786xbeo6Wr9WZpcx#;=TremyNKP=v
ziOcP&sgD?)Qk)x?XOYFV&WWd4M%S}AZUq(7_It5MfB%+;=+m`woIFdTN!MH&k-iR`
z=Ulp@O9jzjM&?M0&?WxoK>W|m=Yg4~nF88?^}@&i3wkKuZs|+0ib}m9uk+<a9rKsC
z*8xp!En5enTX|Ws3OGka`*JjWixrI#y`nJqUDk_WC~xE0WS+a&)1J`;Yjgs;*$Bl{
z&9Q1viwT9cZjL-nDpL+RaUPFeJl3!wmht?>az=WPIg^Wz>&~e!I5~0hEI!3tzU<jb
z!#D_vs=1vG+?GI^s-ctKW`$K^EK1T<@F8JjF#oz{ucRA)nx{GHf8{$TpemOyMi1qS
z`<GeGzjF4ES%IYr9QES_)8bu}w*oSAfE?i)fs@eG7iTM7F81|&WRgsq1Y5cz_3zde
zh<^F)I+RGmrGu$1LeyP0bssIj_7%;4)@ah<hZ<Fs1iQV#wIvVIaf8^gK~2}6KlQYI
zB2RCR<14s^wPkBnZNL+!m}yk;h<+u_^K{}g6E1wnZK?Q_Z<7!o93ZlcRA_T{ZGtW#
z;jx?LvvhjBXN!@zS&WoGj4&I!>+)Rd{WMrmW#s~9!mI0g86$oggP)j0<jk3_R<O)U
zD^4`pXxDM*&vAv1!k=$a#6@K#mB0Sn2QxbTru<~aQ{Lc`aL<cRj%lXAK(V(g+w8Vx
zJY5SO#V6mGs%yAX6EftQr0vVcCdp_tj!)5Ul1@$M6a1+_QC<@r>vyhbg?nOJIbc5Z
z2L~_LzN=S4Kba>htlwZiBm(L_fZ8<s%h{-@5g+K->Y-~((LJ}*b!4;sj7u0JfW&N^
zpd(RiqzZ0&2y3U`2y{h3LtdyFFH($48$*c`&n*fX$=G;&rT9Tut9+RpDku%w4!*W~
zoZiQ?y`lo)v|f0s%7t|%`E!o;W>7I4@HXluuTm6onO<<zJiCsnEv+Y(t3xBs+FgC&
z8ja2&(4TSW3~g6a_&U(`kXFv)o-oEGCrdc699(Qu$?3IOsWE)zU_rU%eH!6Asxk6t
zbB!fAdK72FS#rra=qxhJ6}+D0Ht1n<4AVLe_un2>v6kbaA}`0=eCDcZZ&Si^Fp3&E
zLINZ52%<*bC_gRATq`g>6M9}gQGT90fKxhO6F3J~EL2L4qpJd37uS9-8vYGRe|9|v
zWvCfWqDSfBY1EmM887MinNx#lBoGLBOs+P9(npj)&hi%c2a|nj3oW6L(n^{>v|A;}
zO_H%7x_oqMQXkP2P<DMPi-KX}lyk5SaD@w$Ue-%`yk`J5L~Hw)^?D+usm(Yq=O`Yt
zr<ScSAId(DJ^qG`KkNMcIo(oDLtgsZbox)Y%uZ|}m0W{PEJQ<@bXa%7b{p_`!~0wV
zdC~3+LAd;XZhsowg*ty;s%9_hM7P#zwW_v9{XD8mm9B1vG%0FfJmbZ2Gh$TX6bHfe
zinBxDHPeu*i+%2&vm2nAO4a^TWY51%{7*hUU>H<PdjAoXKUazWX>Sbrh4`P&W_Ppm
zDgNhQ6aTZU0I16kA%Sd%XXnSEbI~GfFD_x|HFKe%K8IADn1EjRN)XsD4DA=vsiV+*
zh2@6vw1ds3<e#JjtvOYQ@7x<aMk=1X)(CamV>dW{iXpBT-;$tU@X3Y)UvH}nJ1`UX
zsnn0;TE6>XXQO%fn8(d%oLo;)2jk!Z#Bf3kT9K?R=?<OjDjZBYW#jjeWHvmu6G|%=
zrj-_b=7=j4-$xv7r1giL(_2fmhDE%AN8c9#pwIFV({GPUeq$x>ESnST61kxe?~-Qr
zXQ4cM?yXWvu>)Vz4A5gIG7D=4;?hhI%?h{0dkpPEJZ)=?O~g=;&i5=Nu2kE;!kO19
z@D1`|y7gaC4+KiZ616q2U_uuG%uX(|D;<p#<~;}sOiZvs(X;~V+n~U)NoiZhv9z!h
zc6FD2#&uzNB^I0~^BLu|$Q?Ha`A}F5p9w`8Jp>bj(OYo4xFk%p4NY1;M=&0Q0IFCL
zJbTUucQh)YVr39Qm%$K6a50SPORv;Y!PPMVohLz5WNC(J)W6!Z+LOCnV7#lm-QcB%
z;IVjruMt{Ra8Z9bn(Q`8W}^L7b_>)$r)vd|{Y^3(InaS?ekazDo4-S$lRFWDq;@ty
zlglyiQnAQYC?vUY>%d)=FA6x%vJX*xT~=indI{Pp@a#g#FH-7UvD8uOTIt1TJ_Zz4
zf}tGgb`L8Wkuou^65g&MJa1$i4<MZDh+j<u9GMYta8;5Ul)pt&BkS!5aSkKs{gBYV
z2n!3zsp59w>otnikqQANfl#M2O_GC4g|VshFkMvEq4XB)-*`?Pf|~6{@)WssAxoCI
z&jDC+>zxU-50W+9$K^=|0`1^UOKg(b&a5Hb9dQ$&w=MGD7E{&@&jk~4mZDAXI5x0c
z01GQ~zF@(}oXKADb=lcTY>y>Kt`!pFMWZ|5d{liB30R@sHl2Q+8_6(=L4TO}UqF9$
zieQ~q6GAGhfC}Hh20xlb_n4-kb)lI_EFz(NF>s2RD^N9lI+Mx)1eaxe5A<bn9o;6e
zghBJN#NrQLR2R=}7vs({QAOU^FfuJVh^Tm~?fAV26udSW(?IFNiXaD+!U7G+kUVe}
z-cccHRR~ds?0Uh$BZXm}uR*L}&j>{iajM63j6k_VKY2|Y9=$qzeev?}tE1P47qEq1
z{Q2<U{LSgb+1tZ|qy6Kf|K2}8dh@!*>Opr@U%ZIABX?!&qCZ2O6d1;^wg)Tx(8{D8
z5_A%%Yia}dGLQ5EjDH{reSth`i+|LFax^EY^GK~;M?}ezge)y(0Rt~wHkuSo<Rz*g
z<oddN)Qy7KXq0529}%YWqg^fv)H@@nap-!nbwvUtaCP!W!Ufpx3yJ`VS{oNi@wtpT
zOe+L6P=k_rk0*)uv;iz$>Trip3lX0%ozYs0!fZ_7pWNr6f>H93RK<>-3#OxnsEq_X
z_%@!YM(jhEqYEDyVJ|>8$G?G};|cqpPA3Qj{1>zTJMDINvt<7dHb2?_zd`%o;fL_d
zJ-f<CU6ZI!w1^j+aj`GbL&Jopn<k<M^K51_Fd}xmb2*01)I+>haIUFCRFnbq$xLf#
z`5$A9*86hBB($@w6QB0LlEcD5c`l9q2vdYGlVd>_wIi&>O6)VCpsG`z<NQjazn&!{
zCxhW$s{&b9zT^gBY7v=Na`wod2vnO#Nsl9k(E)u%=pq+z>G7im@aSAodKFo2DC1I+
zX$_SBcz#Mpb&oO|K%{R__&!PuZ)LY&-?MaJk)B*hwud~04RiaPyM%RsXPCw@gDoF#
zCRrelUUAA6`&)Qh%IzD9>5MTFwqgxMSYr_=;x-NNc7jZU_g8<TVngrg*y#I9zWzh7
z6@C_W<^Q(B_wR#k_@e{=*Qfu33qAN_AQuLt0GPL1(Cps>K2}2grGhpE!Z>B0lt*w<
z%#4{ywnd*Zt>YDLK`*<|%WddjAO2qt`qhX3vr5UEqJG&}JZ0W;tDZGEH8<QKVJWDP
z;9eNy#hKK{mQS8O<;<U)%yCJuiVg?m-fBLPp<N}#wt-4rpw|HYi!|FVP>ckNlMw(a
z5$q7ND7T%ro<A0u_#y!^&ocpae~S|74QW*`z_8uf8AW;IRS6;?6j;FgTmLLm=3I3n
zJ<emIBBMMOil`LJN6QGDpjRn;WJ*E5ndi6jydq~Vc&`R&S?XO#yRG5vt$UFA9%R+!
z3a`Pm1ol|3Di@(j(lVL-?Z!YYi<`g;|8&JH1#VPsF&+l`qwWuC@w^{~Z*C=BL40$8
zb+cR<4<deGADe51V99#XN&EnnWiZ<m3+vy@yL<*`e1BNKA_M7Eb_T+GFeoYZcY~F$
z4AA^ILsqF_IYA2EZD7|iOlLnoV55v61jyGg#2w4OQ4ctTR@*cl$(p4AHF**}ENm6-
zl^Zsz*f!))5La_do+a)sWkfpS(3w7(tYuR4xg)8#&47pMHtI<-I^-`(5UC|Ii81gs
z$tI>GOX3zNG?j(OOwH54^^6tbc{IYnXHN+<klJmg_Mf;u*~nwd2Kh+%kym}8-_m2W
zP=E~rQ<V_NK}933Z0VJ0!ZEX8FTy7>nqKC@-WAO+G4Wr}As*+$R^7M;g20QdW9>{_
z(4<9qW7GN)lyk^llF7o??cSu?l1TCh9saksuwH%IE7eIWqCQIoBxh`B^5jUX7r+xk
z$+Fbb7BX_B<Wu17yRd38+la8}F==`h+wyG<hvjVGuv{<$l)+EX%T7^W=%JvX5p<eR
z*$eWgww6o0Z)lnGe@hvY!<#hC;y(nzS*nJ)|BPt0=u1H-C3J%T=!JBD@ME_V+UKHa
zcCVWc-1x^FGxyTzb6hi{7^Y#A1*x<H(=txfWdGXB^P)|R3o67|(1>&2Ve<{cX82cp
z89Iao93sqK#L9rwOFdGs#S9D4=KIz+h%<rjU=#R~)=iVZ7}QFqf$_K-NdJl_olZpI
zFrHDd&&oWNj2Bgg)WEl}N@$cNSepQvr>|jgW&CZ<;Y89nYhK8Af0K?4AGe4y`{V1B
z(tR}Ra~2`Ud~EkHt1UD8UZ?!U*>dxsASJ1%>e&a;An`+}n~<h~p+~LoESV%@G!h!h
z<d=_4QT(K5usL95Kkh*>f|9;s>kJ+ln%$?f{010SlKp`6vxJk9!^?z7cZEm^sJUq=
z4d)Zg!^W>jUvXKHDCwafs6Z`*rMAM?5ta5TmY^JApj&!t@}zM>C7R4Q`n7&LeV)9u
za|x{?Ur?n~FuO`5nU4)VwLmB9^$m)ID4CG7mqd!9-Af-U8J4OCamq!6mbRAA@VZLm
z$8tOD1-pzj&$ha=!Kd(?Pw_w2|5tr7|B<f1C;Wf?PP<>g|LOGM_9y?}Z_od?AX4MZ
z25RtKX-)1g3k9lfu_7wyOrwG%@P$vm2vkCgu$H5Ee#VFhI`+kry>S}B_6my58s{D%
zI7)oKhOP@fFUUo2#Pm^#d&o@SgIM~-xQE{t3YOAgBd5_$qTwtJ`P}Mc|Br_k`^QJG
zULU@^c=Pt~bRW;IULStLpQgA8`b+lA)+)57?e-h|&j;FpulN|zJ!+JC*?(Yb+k1>5
zTguP!{+Jl&OTfnNS7{o2Z&IlX2WsKnbGh}y^MFbc1@L_M#TESLOTzPjpKie)JToH2
z4mP(@wqs9&BekuucW75rtwkwe82wbf;1jboqhC&$BOhTkvHc*&T@>O&&$1eUWxcRl
zD@_Y}Z@-5B6NR3_V2I+Ev8j#;6Y^)R0mf`f!5(@*eT#0eCs~IXjIva2=a-;Tk)nk?
zCasHl+@OQlpD>}^@(w<-ygRE-J!@6e^$DGtuE|4d8ivH~H8&tj7GKNyXiZ=NRf0<i
z`U{Llj4Z&YXDUEWJ`7}-g;Pa5sK=8i7}%)Q3u`Mb!$(+!kGgIo2n}ZV%^$;YoPVBK
zzdO$h^1?VxKV(03ZdRoDeyIfvT1ql5(%cO>T0L{3fjZqSRPM8uShMU~rAnXV7Qy93
z&bqU+Ltwh$hd9@_c}|nuCVz`@g^ylIphX5Els@i*BoO%o3?O^~YXIL#1f+-_ssRzP
zOkpmk%PKZS45KGkJEAqx(HNm8ZMAF|UK4E>Cw9cdQP5&UEECwE;%i&_kgOAlQ=@)@
zpvcfAEISSjo8l0`;sesf<0}TITkuI6(qiX@&TdAj2B*Sjrq{q47&XdYF#)aB%{hTh
z+T`b%_~B~qRM%l*ag%y=aM0k<B(6Oag%=l54wl8`JdZCf!dguhBElut>n*28y0NkY
zy|A;wCySg1%QEqU4rR4_BXaj!lo<1@L69SC{gKS3fF3<GgIxrVS}y7MVERae)0x5J
zlQH^<JW}Bviz^4COX7!Nd`p%mQea4UVc+QH;O$Dd&S=d_5R(SKxI2gmqKtGc(jLYy
z;GTmSX6VwxJGk+if#}eEDr!}Aq>`meh^t*3l2Z&D%Ay&f@D#|%aly{m&9s_>2;N%n
zBnS|BZ0v<|(NU3cN%EcoA2(iN?1*;iKBCdc$HhMi4>b4gBn0cy4wJJz|0B?9yBs6J
zl|2!o$&#F@GhIj^_Rzt8dvBe-x3l;zN#~hnHmxMe5sxWP^Yk`6<IVeshd}$W#MC-k
zsvD6eah+Z^+vJf4LF!k8JAn(Dhd!IBQQ#c`0&jAJ8a7i}>0Aj_dttOVs!NStJ=Yyt
zT2(r^9GPWil){jBU(y|k+zuJ~cH#w_lcZxkJg?rOK1y+LBNd;TBw1{<36b{8>s~Ii
z?&tZgrA#Nx!po<gu`%b~h|s%k*0sCXfr7~_Oz)>6Wl6cV2wn{~F5*WH9M{=(q8BfT
z@x2x5`?Jra0Ty`d1Qdy#%@xHI|D-stIYoe}r+D@F|HX=P6_r-gi!SG|6k<LDOpSIF
zg>c<?`Cv|7r+5M^YXkKoxR3tF&!r|j%Uci((;JRUks|Oak`A6d+ujPtn4yaXuR5b6
z!D69oYs{KuBM0YUY9>Nt#r$@dPH;L*D=0iN3jQlruD!p%Z-MxkHnV)xN@v&W-{r&3
zdN%BK*YB^p9rV9s>$edIsBFF4Zf~wnyS;v=bvwFR+sk5^JFTjW^=nt6rK!9gjhW|@
zE09eZ<1L{dp?PXb%vs|z0D-QpIGn79e{d8U6AVYNzUdWJf#x#UPusB<M_<)mqcM4&
z?jtML$%V{bZ!xNk5K`$a`tMU_4h+?8nK)@Sk1IZ48!kny+Vxe@+!dG$81H_lVW}eT
zlw=r^A5Ql|U;fFFoJtBw{k#HgC~r&~ojplgLGc(*=acYz|GQs?ly9D1)%dyk??Jix
z^2>Jk!*iqg^PL}IjFqOtvlVvxdt@&3t-S!3C^?(10!d&Q%z(SZ9=#+57B&)fhfpg$
zZl&k6(lf2}$a*IUL|O#pu2)Ql`H-^*tgs*omfIWOa3co;ov6ajo$Hb1YW6h7Z#~|t
z2rbx_jF3nWp0wqQEdt7V!E|i-G53CVJK@VD1L^9;uS*qV3Kd}`XhaXWdxNMBp2AOp
zn@uuN3U;2hC4D^FZlj}l1uxTSP8_r6nVDv8$a=qtI2;f*I&_gZT{T+=$NSo(r}x9~
zu7%cnWTI(M0Hby{moqvo<cxLPGkf@-<+#ROYjPGh<7vg`D8A{IB0&;L$w^(M5f^hx
zCoBAxQi&p#p&vUtm(i?NCt4NNndq)|rvRvQ=6UC&D1~>1x?Cn3?usu>Mlnip$=<;_
ziGZj-EXd37d88Y@oh5fD)r_B1G-VCGA`?E8(@&G$Xt!|<4`)Kaw&^w}MzV&ER~axP
z)oBe*Fy&}q_=pGZPM<L{Q<?{YUW}NQPiTKC<%*6Cr=<|vfq!dt5TY0^a6#ygKfu-h
z)yt!pKw(FGnI@IzGo-i8)Q!*<DNLSD-w%NhX-AvTkKqTbn7fd#(4SwxH+>bB5~Dye
z%$#HtBTqB|%vv|Fo+%I(mW6m=XYqA38+p<}5<3MkUh`12LC{g+=r*)q*k)o4LN_{#
zvlN$D>)$2D{nP(`<oVyZ=q-PYAJM=5V>1ArI{$<JUyT3XfJ>jw|Ne#Nf4An)ufPN(
zIE)QI=O3WIVl&|feu84Qz?v#%1knDh+6L_7UL*K>K|7o?$6LjO)$D}QZ!o9<lhmd`
zG)bq|S*~$46I5O}Ux&^J^-Ci5SjD2?D6tLGOE7t7@s6M9%Be~=f{TltWC3l1ruSA2
zp-5B88a-#nE@3^P;yO!iK`!6A{p0pEjr8|k%mxN~=J7ND**}tOuNW&cfnt_rFfk4s
zw#mXXnA(_lfk84HjWop|k(?sG)g!USVNd8eGecX0-Un+_3f6<CaF)~v+Fg4xGorvE
z=jltLyIxkPi2P~JshBg~2Up@3LJmwp@0p@wO3`bon1DUi=qaF1;UJj95{yy3j>Q$o
zX#94wMPvpJ;Ee9Y^e2<IW>EM^$sx-GSvR~)(+_p#IvX}F(a?b$?AZ`=ZcM_nimOCM
z*(w^9;jSUX`Cb4Y#OSQIgyDJ@*UrNSRz*W~kUxZbVcXXpzrk)}SLpDNh7L=Myrqa~
z1gr=j$@k0`KO5)IS?@Ui>^gtGyAC&>Q-{q|xbq|P#cyb?Za`R|7!^f&_SASD^7x@5
zK(fGMVqs(6u?Fj-+t#G3vTo~8vPi}tIF51o`P8p^vtkb9bi;hR6`o=2mY*=sIDt`M
z<O6HUU|CSKDG*pAPmTBJ7T2*wuhUalo3b&rB{K{*Hiy(<#3hWLB3d!kqTG;C3tQnn
zD(?|yLc&4^MjnL}2Wk(g?lm#W%b{2?#bBq9W3aP4$-tl33l2$Lo?uY@%A(|gbAqQZ
zUTDTWu(fB<Ob@TdV2fYkOxg-j?DeGg7`3s|>6=~!DBQ+z6J+U-oP8nrZZcjGX8Lgj
zfK^FIf)=9iZA4{1Y;}(zogpFqWcgFGz^T%NJtDK)`7jsCK{q_HaRaSL4FvYLGT96Q
zT-VTO0l=`==+$XeBLmPWh>6>WVPvzR3DjpnN=!Xc3(>KVBN!<146Y>*2Z5ljTs_z2
zerwV&DCL(<n)Xe)gd`_fr?U|AoN#z(&pc;{mrGa_3R)u#!cY+;k@9vX4BQheodlsh
zrZ+o58<>O5-F^{ey0_!bY%uq%WJhuOYb_MRvLCZl%u9<G3u6#rs-Oy{%XDP&jaFl4
zl*O4NPaB?^CDJumBwbbQ3lE(*SqZ+zcn6YR(`$C4RkgsHfF5T)1~9SdOf@Mvi=2)?
z?(y+8CE7`%xzVTaW3!mJfUl2YEB0MwjGfO$cM3|n(^ET{MJ;ju=9%Ew0IPpgQY*6~
z)fpS3H&Hjnn@ICIjW>HXAB!#y6J<0BCR@f^nfhymAa+*HT`Ec2Ln@wO=)^JMvl$XE
zdxTBpL0muqSI1+osJAWSVFtrlR0B{%!+ai%r7}}-PgjTp&T4rX&p`7Tbpp{l<abwz
zBXl9P47z0lvwa2tDS`qSJHdBRRF1N|2;)j(64XW$UFNH}+O8E6s67E$CYPkCnO&Yw
z8OF&Z(U^C4j5c!$wr|k@*q6zWp=QTkp{q6!&d6uO8Q8f%I!ES?vudza<!pSeCVPa+
z1WK5=f|)8TCDC&^bH$Mgfir+cxMYzYU9CJKL;PAqiWLP)f!wY)0zS=G1I&cg8FMe;
zZ6_HfxdnHqUUN9Jtlyb_={!|*P=Y6To7Tlgwn{|vNR_A(a+Qj5ay?~FETz?8m636b
zWtdRC#LbO^V^KFClh{?#nxNe6#*;lhvO|X-s^GipaET;Eq~?s|^@O!ig6kAUOh~l)
ztIl{C<-<PBiow$;h1miUjrg=73jGh8d=u8UbU#I^**z6`Nf)iLClqu<(vL#L$|?8A
zS>v4+5a8&_N&rJZyuTs=E;EqeepVcNhBNBU!^1=C@$r$r4Wy83P3(}lE)oT<(_k@p
z!5^+X056EMKq@c#%;}^#R&kdL5~$!l2!>7VHbTcMpdB|&EE+-SfrW(fNTY4+>?|U|
zY6O7ZD+&0pvp|7@C0|Jae3rAPO!r(`Pyq_CQMD<r*DZya(mm1cKvkblGUM-V@Bzt4
z&aiz|&nl-LLNciU2_Y-fnfaZ(Y>Kmaa1dUpBkGGe11SRY;zJgSxc;0zRKp$h!<X&n
z;fGz+<h}A@&z^n2lM8yT*xJsH4*{LzHF&@Ap^kZf_-c|nryd4C36D3N0PHKIV5y@j
zT^0zDC#K(_|ITuE2OqA`2;w(DXSXA@3P9!az0Z({&&efHqF$$xc=Dk6Y@xvw$*6?%
zNr}Y#q4LDKN>@Aui)KDU^UwZ;eY-+9zL=|Bpc8;%F-YTfYUpwLEj&BMJ=q1zQ!f^|
z!5Xfk3zcAXX!-0fpT`a7oJ<5z%h%B3rCknB;ylvr7T#DxGwfw_2lKUmIg4(h35Nph
zkK+$n{20E1F{F3tqm@Jvo95SPI-<?Sb{Ll3{w;;1JcKX8+8Vi}U2l_N79m`=R+s$*
z$r0N^hU$<OG#QkN5}$)Vv(_+c-6z?w6_4iY|2vw&pFn|i{Z{KHpN#D*7}A3*p4}u@
zc?)*+_2IK;&8z4iP0(=Sb?bn{*M5iBdxKVI%WnW)i=#PZ_M`NmX7YG=lSktZwu$xM
z>7SrgTqO@bvJDx>r=-76=YN&we<%?4$4P`9u3rlOtJ`n)1||Hj?#8F{zu#Q`pRz)z
ze^7kX?={z6?>dT~Ej>*YbvOcHYJg0nBA6D^F<>95mhGrxf=(RGB&{P3umTc^crgd4
z&XKD0&gV=BUn@M*FH4-DxIDuzLo7~++JL7r)2)DfqE2N!ek6XlNhA!K*F-2$@jP%-
zjC|&}UntILHbF>Uw(j!sD}fw4=sU)}IG>usx>=0du%2G|*k_c%XWPsKH;j!g{5tIV
z9K(7~xT53@;&A6AfAV-wXItEsS*U+FSeyzap(7AVN$d%|zVpk06lo7HsHV`<=tPC*
zmpVhK3wl^Ugr@u!avt#HFc)Yzv4S8QUK9luIjUSrt=s5gMe(u=9&Z%m<sAMN`ev58
zV!4^1an!8QUg$JBb``OkT->@ic5?tw-8ESm;#Gv+H7!bQdC_SxPQT^yxV%<r1>0s@
z;r>-F29s@?wNj>*LBe6*G<G{+F)*Z0l&TIIr=d#`$EtKcV^Y@!4hUav`G7pz0eMiQ
zHR=;i4tXdsSE7O+{M52=9R2grtrk|pi0;6ibI_xQ=CsBfq;s+6CkWe8pjm>!z<6vz
z^gtWga3a%y_^#G&w1Lz3V%J{m$i=RGvSXgKY)dV9j5=q$D33M<4hSv2nBw0mXT=Gq
zBcj7J(sMi}5Qe$t;RHtbDZV4s`eIdHNRNS>5f^OdN=7fS1`b2AA@ytZkpD5o+k?RD
zap`b4pGkbkR3SqIaAo!=4w&p_hV4eCEV&0i&)#O8MfNknz6RS1{I_)j1FcQ#)~#YU
zCR&hG0WxWP*O)bi^#%J6ZNP$3<m|xWyC#h2oVO25Jywk7WD@V-(I8X4dfU3E&>ezi
zxlFFFVHpI*W|zusECVUs0<-s`Taynxi>*U?n%aKJ#5haq_YUAc_m!NUf#OE9lSJOf
z5njeP{ydz`Ifk?qR!3J<?LdSWjO!d`!yYXLK3<zsru&!KvGRF-Ve5Ne7%k(%%<8iJ
z>*==_?PmMeuw2T*te}r@HjtSgOeB?9dsl!(NdG3q@FH0BOkvk$3Mq~zB84W&GXrGc
z9GC?>DiD7YcH(H<xUSG$ap~^u?9l1bxeinpsJM41Vqc#9tj@l{F!&pN(!B?(N<Lv)
z+Vz9jsrEvb{(qbQ*bDa>Vd?$FkN<rq4E7B<zODTSWiw8rv5cW&wl1=G%wXMu-e+IA
z1fIh;fPxM&jxy8#QNtY|+=Dr(+4r%w9&Ab%1U@<9hidhDgX7=o^)UGCvvAGKypa^6
zIUoA?#i%J#tyJC_m1fz;crTsZWaTgb4&86DtX=yY1Ng>x(vM=-Z~<vs2oOd`92BZ#
zii?$*&E&lEakbBSC4qxkNW#j=RE`t{3I6q)i<gHl-o4_i4I{ydfOAo1PeLv3H)ncC
zMlF6LaWKsN$fBSu1Z_E6(n;;mt}5Mkq^>U`No%3zex_NmrxX+6$wUsCFt92%VvrTe
z$O(n=-|=u2(@V~hB?Xz>_YB}?cjhGDQ4w+zQ4poiIKCQ4e#*x(EAc+N&Ss)P-(dYB
zWEe3|RSj6DJoiEumg)kwmZ4Okp;Nmt2ZR<2%-C@JSWmoTOmCcJW!P5Pmx!@`HCyOH
zdiAk3k$G>?Zt@xtmt8e4{s*YPpf`t6rT!jqZjM53%b6$_1Xu1Pv<->WlJjrhFmBjm
zStE;f*!0Bnrdt$BJpUNJLKaAb*)kQTJT+ALfXJ!mqD-5u(8efECaI)_<;nn2AaHs*
zm{tkW$El;ylnS4gCxa4GRtVdQONLq5a1k10HfE4fB<!nH-h`vJaQl~1Ilr=|lT4|b
zKTgrnmlts+CWlS&{LfO!kQry{4t5`W)RSMA4tyiV0=ehbSge!6V@M`bMh;m548#&p
z&~+-_poEbniIXvn=3dzV+D2&I2}Sf#xt8l|G7K<@o&;3pKxvBPa0e0CLb6&t7SxR=
zir(+N0lAuBwy(Zz=I?0Q<n`j;F;f{*>0x_Hzq2E?pn;x5_Jt2vy32EwHC*|1hv8>2
z!Cv~!m+voZUEVl;d3?UJ!!?z?rkooe)5GIM%{|?kJbCVggj6<G`N)r?qa(@|P_jaf
zcwewfY3fQX7I}Ph?&53P!s}oT&p{T=wGf^YaB5tyWB`Xn1GqsY*HH7rRmZ}x1f!11
zeLTA(<cU))q*Hm(G3XblL<bbDZ>)U=^#v_s3cFR4?paKsLP)A$uvj?Q5k@1<Q2j}+
zN?vX2)MkeQCYYwOi4{C*qR#|HT_`1qR^i=db(JL;2{XzYapD1=(6Ry>@Z@ZCh^zX@
zF%Am|64o@CL<Sj+Mq(#qWX6CBIL;FT&q^?+Oa=c2d55y-3b&=8+KxBr-wS(#(sQ0Q
z>}EVo8Oj8ehy}`CP*nC@OxEHQI%g0xbs6pFCI4sE`W3XgArAf{v^jo$;a=%_k31HW
zc~Oo=M^xP}J>V?vYFDlk1%lyaTsEwtq0&N`E?OMqYnpO|Hbp#0B#i{6p^q!lV6(u5
zG<tF+IH7-wWd`qcC+H0E1cwa3XKlevFJ4w_foY;VjLM}d3Uro3iARnhYyB{da2Kq~
zbb7r<rWy$h1K45XO-BS@%#Hg6?zsfnXm?$jX^GF)I0X0kMCbiwn1N1@NxzzFVmyHM
zG*6tC`|P#c=M%O4=O!|f7I(0ZRq;3i&k7Mc=0cv8Bn?)04zorek$kG4<({wG#hs6w
z8bp@G{D8!0p;Y-b)-rNN=OflKKfh=#JI57lr7)TlW$X_XiY#V4RV^xDmvHqw^4_e|
zqzfqHSngRm!WhA1T4ABvgA{j4l)tQr$U!OsC$EMH#(^_%pW_}5j8rzD=cWfwPJ9h(
zAU1T6FDeFE_hm>UU<M&o-JYoxrHJ;x1ksYif`|FSJ<ZU=9>$aqbPjTE1SQh3E^;fS
zNFp-`+j6VmEmIT4iCU&j%~j5)DJ>@0^UPmn^E395vjRz41di+7Mj0DAwrrziipqSF
z6_hz<kVilnn$R&YT<Ek@P268=eRe-`eRdbF&o_wa1sb3fEfrS?N@P-bhjv~Jhx0Tf
z993_Hp_CzKGgu3OkJ5&Lq76_nMexacl<*olu=6XCtoM;5>n$W17U4)qrkO%h2G_sW
z>NQ-CB<@#~K}UPZNEMgreNgs5<Lz}M3ASN4&J`<#)>@q2j;F|tLU5!L`oS(OoP}SD
z`u&fjet+?S5xe1}+^6UHRbx$Z?5Jk+qnkeA$h*Qu1G^Dd=d{Y^&aXq6!ADYNu#hqf
zyzGKmZjj20XUS&cs<aPXz&|lL*xYdSVZ*Uy8!@Q15nBP(7_sX_qmi@>g!yVy64=d-
zoJpFdkSJSh+}XikP_uaF#U`W<7#vi2_fZNDayo0nR|FM}#wVYVbT%c+wfM|Vg*`Bs
z+`dO)#r=?ad^F`+&Wy*Un05=gTGbJOQ{)MoXW?~nCthGzDubLNl9^+;G`13-1`hjZ
zN-(6PfKX{hTl6}0M)&lI22YQZ1XJlH{Zs|yQ~a+J|7*_|e;NFrZns~G|J~?yKE?n3
zHt>I(b7}kM;))*RwB-lA=g$oSD7|>W$JS2XujCtp9?TDrw^bjr?hmkIIf1<5DaYP|
zX5=EndVhdTDV<XW3_%7FdMs);uMQ5v-7mv_s|OY*h~Ob(gIBcvJ|Zns8!jL-p2j>5
z8M^!=50OKK7r38$1fVFH^2g{<NeBbG!3k0>Yu&WMm)fjPMw7TH+YIgy5y1xzW6sb<
zA2l5XUdC7_1)=f(YP2}T$gx2Uf*j#M!{WgdA)<%z7%MG{2ROf(vDQoeene0(fVo$=
zffB$RhC}3;2_@1lunJNhzGj(Iu?svz%j5Al3yld$-^346!S~sGijfc(l$LK2#(t-Y
z-7Y-D9oq|jhd#3_k9NWDWbp3A+5_yGuvY>VYX1R05V(&&S}T9Dw)S=|H5oL-3DtqM
zI$;Y|gETU5r*ew}<LNMqv+{;ms7yoN*g?P`7dH09PA6vYqqgm{h#UR_Ok9Y50pm#`
zVwr5VhR+S9f`Rr`ehI;frpl#Gq}Y5)`X#*Z+B_(a`6$H*av~()AKv&&m&7ZWo78Zw
z^$1fjMr?vkr+8XlsD;*WoQo4QYmDENK|@HHHbucTAgT~K$uXW9pGX+T^DGv`7Pqsx
ztF?LDOz2(QCI@jc$4WU)gozBFiLTlwgi1o4M12~o8(HKTx5zq>B9j*SLu}#;!Ncqg
zxe3G+oq|4?jEuF*lCUs7hheKHz!6LC45D!2jAGG*8U3H>B1~wgmAIuc=qjnR28M`C
zb0@S=0<G9PH>Jo05N`~YFbW9`gn;q|AFYL@uQ^v9OEvQ05h5(Hz$wp_LQZG{1})8s
z6Oj^U=lkbJ2N(NiXNRZfDkd^`e|S@CqnMx*i1rTu`>^AS057E6iwM#Ps_~;n@8(pr
z@8(fYnS>M)JQq6Nhx^!8!$N!>&fSLjDeUe$I~pB`%*h2UY&?WZy#j7Ep^BNsvZs)$
z-rQo}zDK}S^VY)Wb`f`bk8*Gs(V8-0|N7?L#rf&c;meDI{o~`K*RL-2FaCUZfcUN_
zhv#3vd3o{XHC#H`hll>sFt3~({RLj(2hT3ve0A~i=&P>|PY++8Uz{EN_u*MXQCl&_
z-e*`-M+SS55prt*f^}aa%v>6?5cK)r;LYooN9RXxUN=t8Ue(FFF~pOnBHed(O0+=Q
z+V|^wJ3G)s%O>aR$&Pits~eQy#v57;NWeBmNe0w^#G{RIn!AfaPq4)&W9(J<9Haj_
z9owb0_gdj=WLJ2a)0Y(Q2~;8EP|D;&a4i<8G{_iKuo2b5R`VO^?$?@tKKUObcAzF;
z;$imRo?Yy}etB{J_2FMH_D>Hl&c1&0?i?5b8S_4x;Ru3nEGD$oS{gJME5Z%(Lh+M^
z2^*mXniXUM?ub%Z23H(#$H9E2RW=hALv)6xFa>C{s~bHVrnhlHL6k6EZ`0G*;;g*+
zS?o-OdqWK{lsI1ALMk|s9PCq8`!I<+Vt5r27s-qT0*4vJ<Ey47Ri`sN^=#spE84vH
zik29zrrP;m-yYV(W_XKzrFaM61-!`THVPr$THM&nzrNl-fuBz|CxB~DHF*5z{o{9s
zHK8T8Mj}#MQyO!rrBM?nZAokUr>FaW{ma`^w>__SPJ5A->#*)kY<c%`ud3y<{gcBB
zX!+ugN3UOM^Boglj@dhxdtUD;&hb9=a!9B)izyQ_jU}yL{clrbyBBiC=@%{{U!sHS
zjW2qbcFt!S9ERrWN0L&a0dS}v%_bZXQ&f?@5nry<MW3+D%20F3J$(ZDaEq-_Ae<k|
zOpC{^pEG*d(TUHBt<j>%ydZI;rBc$0QkWn-Ac64^$+4bZoELI;1gUNm%9AWkIMk^j
z<)=t6RC1B*TW?+;|8?2Iavoc<6ci0bKhGeuiy2K?J$rk2aP*bC(i$QDseHD=hr~;W
zCgs#D!v$aEeYx_hr*FPFYlH`Hj?Y&4wjLm(rp8}J$P=gRdIfX5H9X`n$lEH$TA4{;
z$}G<>;ScPW>gvQSChw4gn}KS!d=HL0Tqqorgh07vTtm<5BrBEM4g7CM#h)iXWHy~c
z7_K!Xar$?0(U538@o}f4Pkvmc^XK_EeDI?4&@qd}d`;QNP8kSEc+wNi!CAEm2$GE2
zQI`fMF{s+ab(hfiZ&ljx{*G=e`HDTA!E%fd?Qh&J0$znNiTN-jMDd8+HSFn({BunC
zPG@v1jFFZlvt<RP)$@Ni8fp01XW{SgiBk2=EJ9b4EBPtGoe*Ot8ZT`V^_Gsk%8~Ht
z9O5_N|35g1KE$u)m`3x9`2X9z&Y)ZL|Mz>{PyYXZo&O)ChdltW3?Mk}asY~epm-~M
zJ4>lTOb`@PUc?8)j}frwOe@AnSP&=ckatcF0N7y@s7=N1-pVIX0)u>y!{77uUaL*8
zA?R<I)m`8)HiXEpLH42AW)Yb~rmL_$Pz~8gJ_O8waWi-d0Bw9r5A}9-=-+P@!kb;M
z35IQfu^Y?{y8X8x!A2KY$QG#gtu%@GLZqZKuK37FOAo%~?_J>O>;=;yiL-CR$e@Ck
zqDrLR(D$t&mdO1fpnonv4^JlZ$%Pqy?azm&XCS_jMD;-E7uF_^@@G8=+|GtI9x8^_
zgr|ppdUtes_%Z}Ree&kzyW>N6IvR~GIC_wXTC;i9ks@~2gUaF9XFYP(3#D$@4MuL#
zY?|EO#<@`nDbi`gk%m{e*XaBZg^#g{Rx5wVfo@|g1c8z5-#Y#JRzW3TFCyoGP%yjs
zA^6Xb2nd>4e$&j;ro7hV^As?QZ7+E`sUi-0Pz_&Frz8}>mZ{TdOAc+39!vY*2AZ%X
zm3U1NFijrBi2(hZ`Lq=x(n=%zc40Ta4HAXw#t=XWSJJ!UI#p;jG6l=Tn~n-(Gg+ae
zr%C8W%+7Bbj+5!AFhJ_|0m<=!eN$J_T}oi7Xs!bDXR)oNhB^T;#l_2`Q=W>?GmA?r
z;{XC7&<e^D8lDWK)W{dSD0n)2N4+U#wtTk3=1be3FaPuFHz$Ydux5CXWb4@t*x&24
zUG~<gsdXO=k)(jGpKmGUi(|gi2^LZqaSRZbSF)m%BuL;(kA_qco(j4$yhx59o?~9L
z_Y{;#$C?fXtS=uHu5x#A<2l#){^_g3^Wqu}!{33}XM;v1o=C|65%?CqXvy-aO4Prz
z{<jE=W_4}w3H`6#9Teg}y50Up@00%b+lc>IH2>kDW6_(78lD@#p8n?Ln;n74mTI*R
zgj@C>ASn^QVfsNG+2kt_6c$SGu^24rZT6|q+`n@DA4mUuJTy>MKgI@pD*kKGDX#x^
zzyAsU@i(&m{rL~^XXyN93}1zVJmmBuJoo1y=pAziLFbk+m_66%=@X|BLzqlr3{Ofj
zuC6UsTM;7-KQxOcDWR8W&J2O#^GVq)=;F5^K=Qa1Xhml|;x@^8UQv6HRTGS>!IS!|
z!Ifwg{R*AWer}2sk26i!ASST~F;QWbj7A{*I1L{U2OmRg9lFuS5}9kA$HkyzjI~Nf
zDEmz4v{aO0a0{vsPY~pHis-_H1mcIPIzOxw;*dI=s?!ah;Xl3bixBau`}}iIe;!)J
zO_HzloG0_?Clrg|1KIqtX+PSqx~h}yV@l~()Ca=ct5J-ePneToJVWK4r^UcC5V(fd
z-eeMwaJn%~qtJuJU($M6iPXtm@D(aXq=+y3h@N4}rcclSQJ00rjl@-Zp&TFEgL^y#
z_d6<w`3^g`u*khd7{ZBY=<MCa!w%=p<LtpdJ$JSuubZZa3{<I9!}`hg^)j9Ke&0gt
zP>CkJYC;dNCpKR%)yyWiZ?Xv(kdn?R`P346M*t4c@O?5ghaz&yZNe4i83h}_&uFxc
zC5gRMZxk^~yrv;_^D@GAt;d0)xo8fZM3kpUZ_pp75*<dYA<0vCb&O^Yq9MdoF>MAb
zp7Xv9CB(|#P4)`nYj-`NVp={Z0v1RYV_0X1OgSzelbLGO4L7vaVSFdvjU<^QMXy33
z=pyqc^-OBB6gn+IMy7(NCgCc7Mli`&TraICttF94mOw-(?)a5t$mM}xMhV&qEc554
zNK~;A5v&9QZ!i{~7SuZo$D&z^c<W1f<_rFZ^9ngqHY%oLROBV%aIkYtGfNgBM$Sm@
zqsQV)gk&U$Q8^ttay=&)!bfk4MhF-7Fit7rERX{E6!14T`T6KJp;c~?m@38TVmTcI
zXQ_c+#b{}BAnNQeK!c8<xa>0+*jV0LnC-GEzME%@<%i%2pbTMJQ<9LN>x@Wdnq(m$
zInhbtn1OeO&R{)qRp$v+0)_6Bhg&O+*C0tb<Nn&jU10#DfV|ytojT*-Ew$Y&wGDp_
zI7t={o>!(Ux*20yPZ^aHPbB5n;%jNzwnlk4$Y!#Boia}qs~(?U(?DQ;VT~A(`$cM*
zNSh>=fN^o`#`obcnGNR?f&Ro4-^;yogP`W#q&NbDB<_#7dUlh{fMREHoE6|M6bbC|
zxy55GBrd|XcgY3MZcNK@lcT1a+>}L{y&6Z?8U5EJm67^LVw(ME(bR&%VSE~CYQ^or
z=5oIksZtO!_Hp|mFHqrL)yGllGnQGRhL6f@B+zw6gK<Qvz9dKw)j)j>PmxGO)0&D~
zH&t*d&gmuqUD`>n=g|zvL(^oqPXshq=FOseIWQppEixjDb}##RflhyvbB-?N-AzBw
zInsUtbJ5V+mvkkq-WGBUqa&OEoV}am8um+4*`r)DD7<V%9fZ{TWXptUfjZ9AARp@&
zNQ8T`t1#3*AX`11<Jc-E{fkP$M+*|pwm<|G^ZZE`DFbwi@(nb*1mh23n(W~9EvM|V
z2eG42LY;>H-U7wKQ3o)=I;O~*N&>aQ1K28NG45;<cYSpw#Xaq?43J|AHXB9h{K!>q
zWR@To{>(xvoKk>og^4F3>{)oJ_0w=jB(x|G9fKbq3jziWSk2Hb40TGG?#slPgD3nO
zl4_NjNA*tWt}`pxoQFkCjPuM<&zCAnD#g^C#R37dgo{sL(n2g1A+;PHqIF%yU^FEJ
zuvXBF7uvf8DP|JOAEodqu?TgJvAf;SPRpTSV9tI%29+eG36OSdkfkFnu%07)oCs2k
zoVLJ6Xkp=5Dj_zM4EZAQ3j|;6I3PSLib&UPJBavmHBu=vB*T9&M~*VzIWU&GMW^-N
z!(<ntdQQRc5w!+tK5Q&&!)S=facir{Q%{Q4tV$<*S1(Icg?zNT5T*dpv8*{c?%F`f
zu7*lB*g<2n{D*l3ewWMr&wTF3-8dDrdkuBZP=&-K5+sD013pjU#2Aa!RH9<BKcBD;
zimx|hDbW6D{VzHdREB%VQ^LN0qy#u*ok((+Y7mSwL4%%-Dpi>BSQpl>mfW}ks)dgA
z-}roD1^+)7ozZN6d>voTqL(q@WicrDsz-oN)PHLC+r5JSzt?GhivRlU)qh&T3gJNn
zB0jz5k{%;OqJaOpPe{xt9W0>WVVDyT*3Nqju`{*hF#RSl%{88J1QmzHkZ*+rJ+DPU
zBOFst8F_rh62Nb&VQN-mIuofPS_Z-!CHL2AcjqbLD&ZCeE#D<zvuZW!*&O$nv7|kc
zlTe3b0-U{0RP=!$JUq(20Fiw&iDn;APmsSHWz$EZfv!WpUnJM(B((~2vXqq7NW#rK
z1QW&tIt18}0+#sW#yl72(b^s%@hrTJn-MVC^vkuu+TJUSk4lCvwta(Cdg?}#>oF(8
zRa@+>?Jatdk%20Ezvt1o^sT*94$ET2Ep<1z^0Q|+)#b}D5h)#GBU2=jhsD`Ci0Tpz
zA1gi^O+Pd-XE?&>z^XPmXwq(wwab{%NF9!cSL@|>kKH9<^$DH-@(U+e^alG3%%fEY
zOoDDyO}97fgkxv@Dl;+KdyBwcAZ3!f<9CeB<la4+V*Vlez{~f(0;Q*X^`$L6Ym+&q
zb4&v)KipjD(qH4OeDUyM7>~z5it?=&h@elPc}EjQ#0pQ&?PESQJTPZ}eDm=lvQ%{P
z3|OOl^(`J!OjjJz&oK1nqG9@veGQg-e3OnrQ~T5DN*(!SCi@Mj?&&o}1^-i7`e&N_
zkDu>@6sP;=BF(Eb9j!Q;Uw{f&gZ?Z{r#oCMis%8CR^tv~{Ew%^&#(r5EG_oYB)X12
zVnr-wgyZA$ic!#Jbth@)NnY`(Kci#+>bEWxSK|Rh>t8|yQ2VCwwdh*$Br!zE=C^yh
zQFt?pufAOS9p(yj*7iEP>k;rC-TN4-Rj;96hCCm;VjPu-+#HThw7s_Xej1)7Tv324
z=w%jZNPhbYp@XGZJt6-U4b#iy?9VU5T7I84QR^7hS6-E%sLXSsGC8@j2>}9#AEUl-
z&6Z~q^u6A)*{mdcffVbU5D7*AJ5CtpI;(6ftda46{(A4>`bSOfX%h(d@q*wi9$)Qq
zQ4P|-&PaKy7#}9{OZAQF{h=9&d@574ySCR|M$wPhNDeJ~DUDa+kSCVK!5FKZQSEky
zB<((QiJf~7y^q*d4mdn&IVEX!t9iJ|2dB8ld^{CDN&x??#QiyUrWI%>fqKz+VNaU>
zACR!Hgt*26MklPiq!%#aPh8ADRS$4R;%+7%Ma?I7)sNljzK+MY2z~Q$8`>%G=-KdQ
z{%^_2&0l@#i}YMdvEQWJ{Pjt7K1)VPwA_ezddvQK)zOvL;ZL&MzK*jjp2FN*7aeRX
z^2@?$_)U%7K?1)FUjr9sJHhUJycg__6Xb97s(-mA#s@so*$I)yIhrzlNU+u%;&R~e
z5_uTePmDHC7^&Z|WxQ^f2q+n+=rX^ZBMP4|SopFSYBLiY$6UcMBnm#6s*&X2ZqKJw
znCm#g3_<x6pZ+=@Bx01m*xFr(;g-kTr7?GT%+8kE2+?}LUXl<gdZcGDMfE|)>1-HJ
zHkA~a;#YwP00w~;hw5UtHHN4V_&tK=(dnjR=`h~aZG>sLqeZ>$QLlU4>o?FnYw%#i
zf|@)H7(Bsj&{;iMDKsPsKa9pwFO-1#(9YqFMCr{z4AXSlEZndVh!V>gg?~Dwwp8Z9
z{zk!A97g5`*fqim*HOa_`ydI-erSxIKwHcOc;&*Wm$3|pLm{iYFeYdSmH_g5<@v)o
zMXILb^!hP${)S~U8sjqB9p!twmm~p=0K9}n2A6q}jX(h6xejf<hP^ZUJeTgt9)XgN
z6DIWIWD+ASnZ!01Vx_m=#}kYIngFFE#H*M8VfhKIkA=ZQ2Ak;p)qjTj^k0l7a9h4&
zX21Yd_?yqRsWPfU;Odjq#Q%JO|4L$s(>V-ClTdOBm5C89bo$-=nnL*Dms8FY=r(ew
zsveSc;~ug8q9T#^y}nnC?Y8K2BCH*_cL!bH5AVXguuZ?UPz2C4{AWBfm_zg|4bo8c
z$f<#F7h?z!ld06<;<t8J{npbo@mxkfg)Z1ob5V!iU&p@yd6s{29DgL0ESao>u1JuR
zIGSpqcs<p^;CuqsM%9~k2#(4E$!x+mIW37||FLugg=tHCqZ8$HMF1X{T4Cg5VFK+j
z3PKRTazUwvt<_G>>#KJ+$JGC5bO(Kl1TYrc8{#qt$%lZQV8euWNff?;UFGC$D_ql0
zuZ0u8)jWY#Z2%~4w7A2-q9yByg{%g9q3DfilX9OC)@l}l-w>!g$9$_I84e$QG8vN0
zt<vIk27_`xnm~mfp2T_7oKI&dv^rb4M}A5J`6+E5r8TbR$d&{-4=vc_LbfS)fFFSW
z;@<Tst9X_8AL6xKS?%Y~|84g-;a)NScccF){^z%m|4Z|4Ma}<Z@7>$mHm-f)|M4kM
z?UR<YDe7j&J+iIyD7MpjzrNK{F1vBI9*_hj#3aBFpd_|$zx!F&nKb}NQBLBxXS+{h
zks#(ivu0g?OOoGU${+k;+wF`xFBq0Pdr4d~m40~|6Os;Bh4^rpr;8Y2G)<l8<3#{Q
z^9H(5Wb9Px;AoIMmFiQ3brI^!MKT{<0gp}TbI8Re#TYYT$#PEg>Y|OAoFRb@;FLMo
z%wx#JcQ9eZA-Yv>j|(U>o#C_R`piFidGakhQwN^_!bU-1B#dg2O$d^GhK>#x^`bcb
z6s}dUIKvtbzCCeD`DUSL8uiNLKx0XdiDhI=$8VmaxlCsizxtSD4=+V|xw2}hKsoX2
zmv7Eqee?3{^sjH99X~yJoX7p0>GBMEdOlC4Q4CXbR;JY%bnT2gRd(|3!$>i&;kQ?V
zQ}cuKY5j)t1K1ZRux5~=zcKN>8X~tSC1+B70?Jq%e%Oaa?`xmZg55=F;a#M*N8XCr
zA2;aZuJyasihf?C^W};jG}i!QOmqVC__0n!?%Ig$JZKvu29k73L_0fnC#w#x8>Die
zBnO15`1JLw)3Z0nZ;xNRc=qD?-=E=$aF;f0_4nv6XHl%f^{<^Yf=QD2#@b0g#PnDq
z7KQG5+jY-$eA7Dk;P-iP1su;$#e5qG4+ah2kxH0B-D#=fi3&e|!3bwO<rK^VC+<aY
zmCpbFZ)V7ZN+b<D{bu%}aDmuFTBaC3NqN1#%AWk?<n;I}t|IZ>iKRQzK6|H8kr!zA
zV~thW=*kUrk0A4CVZVd+fb=njo`cX!g4xCJ-<e4vEW!Zg5+K4v2^(fXHsPtipz+)|
z(mW&RJKm|Znz;4Jc}$SHT#__ZJfK98k#b-9XU<EwS%5`F4%MU$8A3zOF<P2EY@}kL
zYImMM0pPz_xXSVlegOV|hz^m-9IhYua@nQ)01iJttKP^B(Lov$!~gsJeiY<aA!I<D
z<{(8R;bV^%$XZg85LOMK;?1Mxcd~wp`C(NxeNk1j!<`*CF+<FyLf)Ihv=8s^T<yT4
zok@~|lyja^ggX2^8!b4X;XfD-_6#AuCTHqEUTz)noNQB4ZNYb?cf6Cin|@IlCr4db
zBu}*Ba3&~<WX2)?1|2`9nUExU6Q9^k!RcrTVPcA@k0n5Ll%&v{r`woV8)qn|*k#vq
zR8>&urYxvQIro<8>vVa>%@06h^*79)Ib#qD9Fp6M^#%3;^(M_T{ObpTz2%VhLbCvI
zw6lb-hQnZ`kX->8z03zfdSBrkjaZGTgnkO~8x%!ZarICD;XY^+$@g)JUXmCX-8ibX
zV;aa>M0@zoV#K^DrgQY!mTDIQo_a1Xn*(VbqZTtPB1a>nltCmGa<oxc!p;2uTTGSu
zft>$__+NW_b^O=;`wu?z|DOi`uj?P{hhGNm<$-Y6e)bvT`4gA_PblZ>DPp#NAPKnf
z{O>o;|AYPA&*%TsIR9q-WB$wl{=Wqo_)G^rz4Omm<b?0|APR8P`QN*@SJ(d??0>%h
zKaukvtbZJQyMY9JzSsZV&%Y4tW3&hj)6w%lK;4)Y+<^ai|KQ8I{(t|=&-kC8Sp4tW
ztq|+unhwLp{DMq7Tu|00g%3!ykYHVku3ajyxlReDhcSH!GD{QoFe4FBhJ|gfy_zeI
z)fEG*u_x0=R8=1HM-n2I<T+GvWG`uz6Cayk@*Jm8HcImfLn0NmMpOl4Ul*sS)v2-9
z&*2js#B)CeNr4Oxjb3i9z)Tdr%jih8H_)-AO0Jt}HWsBIXmUo>GG*luLmWBSMsh||
zN0(ltiNNNJ#MX~bFa&f7yj@u}ic;dDc_k`q2L70BPtq}J+UDrOM9N8qa3!{oeuF;E
zQT-87&S=Li7Bhwa9~4zpOxaC_48Gz-Pz)&fLY}6hQXH6!QGga3ecv1w9Cx}X5Na$U
zop_;nfbAeGQW>fWVT<(Z;n4!@v2_%Qhw{-gf%;&-V~mAeRI^2OX#9}*)zMM(PW5NH
zCC+FWaTU*Y_fY2;vCkpD4aa4$zC^L8xCAxi-4oG+sqhJBCmPYr^A{dgo*u+e!i;@o
zI>F;MP_=xCd%&N^W7>EH15agT1hbFcGHoxzG@lb3Yz@LkA(!OvQE~KOudjW7B(
z<y4*#X*I3Z8aHyKJ43Y5nuJ1Bhs{&+n=felAwT<180BW`yRL#&%l%y%jGBCis&sgf
zXFo6M8h*DtVh3R-PtMrO8qMEjg2)N!7tIjYuxg50sVd{e;+3F=JcL^#!ye6wMUAy^
zThl&l)=t(X_!tBHL$_4Kc7`DV7a=^T+%RrsJfNNC`eC#^n-==;V-lXh?J3V-<wS3K
z@15|4`%<^#N&NyQ0}jD}^6I_xckeM}L8X~dG$b*tC`&=!%?4Y8O%i{H`?}MO`H!M$
z^n&zGk!}NXWD>d&%vA|L<JBMF<X+6v)OmLEoyF@a&&bJ&!T;vx3!=w#@ih_(BR6Ey
zwen?Gy30&$1-5@-K3BOOrIT!Dz`GKOHQ$p<M>rXBm9GgwuG9Xy6h)M6R{9)kmu+w1
zh3yfdot0#eiduS8Dgbxo0LU8c;3eSZrnMpT#<mT)xj80=#;yQork+fI&z}o~*s8o*
zzYoE~P56uG9aXUE6au@JO%TYnXR}=M>1dvdCk1)BRB$%*qn?fq`O<gswk=x^0-}QB
zD}7Cav?-^?JxfLoy4<h*d7&4&81|`!NMGm)?L<<n`B=>$5Dl$Iv0^<gsC1fuSXWGe
zGgaz`$N^^Jg-}|oGv$i;3Kff>ofmGZE;VSrfB~Y_h56u`5T4JaF!acO)Gj@g+~+(N
zv;`vpNiIP+qMeLq<)t{lqeT?I6V2>JYFOQ>^=^wVFoBav1O;h`C~UK;ADg+08gus|
z18Pp0+T16%eK-#;9JwCBq;^;IRvACcMOGsxatS;{7p`TNPDW)TQL*R#Y4cr?tvcAK
zNDaszA0;5}>maSJ(ln3Wp+WwN=nZ_tPKG;W=tsqd_f4ou&qdQ@jCVx`Gh&`AKc6su
z-Y7i>nkD3y@NUZ)27aI*g)iYd=x&nar3>@P<_NQzN?v6A_<ONzZd!g38S}NQX5Gkp
znYkl_Fe%iZ?|Py(IpOwW&RjlTcH!joF_bG`@y<(wg8d`I?R6DYkdxDv@uFfJC+~^S
z&S3zV6eh!c@_a_iF^9LzNp~cNHe*ZOc0lfBbR1w;$B5#jat5sw9;CiA1r5GL>P<H$
zA@viqRL)X>A$1YP3;bvNIDR+C&L=;>pY%M-|4E+M{KmiH*YO|Y=kdFD@XsIN3wA4f
z0n5&R#L$C7@j$FGYdHBHle@R_B`?y2N*^Eu-m?*A>HF*JAd|}PayrlzEd37V_7|NF
zU(2e~ajYqKNP$B+D${MvsjN{@LM|Ok?aq#z3Qi^{#O&QX9mjuLWW)Dw(=loR`<rw1
zr;4HZS{nEUlWCJ4&7o^b+t76e$IV&f=`~io6i%J`ZFm&R7KWU#Khdg)Lchsk9F0;~
zeSx0Ei4at7R6lKB5Xva#xn+=A0}s!5wZ+^!n8GoGBggknncBWG;UU@`)fk0`IJFF&
zPiVdTYFrlE675A(zCP{Rmceb68QkJaCURK81Y_D@QqYqOIWWpFUMM;*MzFEQP(aDi
zg?hN);33o<T~NFqie6<JNWu-FIVl75RPLRq?O3Kz+LPsx+hKp<3@9h4I)qBjG1IV9
z;G&eQ$yP5230FfJ{q4&zufNs#rUO8oJChps&qT0Ud!pBsTc@0;!}7*=<)Zk0Y<Sy)
z-a&7#zt0@4C)v!N9&8Vm+XuP<OohyL0BvBnX3rNzE=Cb5jc1cZ*}{sL0=V(t!4~Tq
zg-S?^s{$m>HnobmUh0%!%}u4lON7mKCu_{C-aZZ4q#F<y*i3eZI}J?QNbgLuK{wl)
zTWuFEvpu}k_Ta<r+FR}Sx?#w2;0W$7x@Kxst=p%}mej2lhll8wUS1GqVm>-1#mg#r
zimvIn{G>cF+A$!I3m5n+!ad_ax9D<(mXBlfx%6AATnuE%$tX?S@n;v4548W{{qcm{
zbbfvRkNw^Idv*Nh-Tg1`f42XAlJ;M>6^K?F1*{07?uyvH%1WEM&Dw&KjI&w^3T*(V
ztl^(Yo(NKLt|lV5LLwK&<Xdi-T;sTfO?4fAjt1fQJ3;TS!cqJmCT@0I(n)ompKQ+-
za}2!~eFQ0?irK7?`>pQTCcIh3slM`&s4znXOF#a4F-Joe8V-8(PWdii2wYMw#hegH
z6!p4kS5Ar@%oz^kGHqyhh}}+*Ywbm9;Xsv~<dtNugY!{fJd|YxL|PDeVy-vuk_0hQ
z<x&c-dLP?`3Aak@aLALu{)YUSv{O7QF2JW^L=I!}sz?41WByVM^v7cJQq7n*y|5tM
zTAaW-@(~IFE<}!c^ay*0$4I;*F%?A>zb&rH<LU{=mz;8<19<$Tn1qjyeEbUTyGe3m
zFI>0>0z3}I>xva4!%7l%(JUmT)&E7nN?#S2;Yhq5P>q&g3Kzb9@!0(Y#4x647gFew
zEFufTwLFpICPARc*}1KW(*WWoH(Fv0r&x$39S6la(Y}?FSF&olc4oAi2*c-TnW(OE
zLi<;d)Cn?0;=Qhl`6%9^T}H}FXC)XEP<k&^A!2Tf7NuezO{d8S=2jEsU7|9kko0J7
zY@%nRLr1Iss){Mq1U0zVQgcrd@w{Tz6qhh^+Huuol-{qlXY7HsDbbbVpqEw46K_>Y
zXSdkmzTe(0GI6aN+N%nakxo<P!v#o(B=v^AlUT#Ub`*Jn$8uOv$oAstjtxGzJc5q6
zr_2op<wi#mw#f%?JSmc@_mpL--rkW5-i!@kQ?1bV<K|nNwsf#z8wVfM#;8~zHjfTR
z{%$s???=C&MGz%78`i<fR@Pd59Z5ePBEui^AKWtn&y?k*Fah)~-@rii-~?WzB}KTg
z_JwQ)aQ|RxSiWqPfsoY5jMqm9``D6DmWFpz&WE7K1TV3v;VA83<1T(Tj2}Jn-B_)6
zlDn>h2pO$@C*7F*j@TYRjyAu&)aVxAjOV48Z>o!IE4fN0L`oz1EL~y;ROl_L_L)FP
zrZl8tEBs7v*j1y^$zl*6i~W#O1uX{EJQ=bJ;<2zX_Ib#f2pec`O}`mVFVZkw73|uK
zl2y_DB2}<6l2*dTE~V~j+Qg+dJyXH7jWaryw5NE(mWvrCRtr>bmRM8XH{y-%%0;3H
z;K%`RgyFAx!4&m$P$B!)7arZQo6gRWkkv!4Zpy%yVOGW!=0>N{`Iwtqrup`JTMOR8
zmBp3DaBpE#Rpq3?+g}q;$=djyNs_J3Y04-&f`q0jU8^25c9cU{*NFBOvQU~WkzDb<
z6W<f^kii=o)@+nek`A<T`s6d@TImtzG<6$=pK4d|RCuR5=afUZtFZm5C>*5E+Zbf5
z)N`e!;%2Fa$l?-*i6aVhBvR3V>8(@gjp{gSq)ZQwpksMH>K?={-g^I0p%2-6PjKR1
z6W#~^dxQ8L3YBWoEN&_qYmOO9%z6PSGoS;y{q?rQhRWeQ8=!#z?K?F$P@fBuI>#o?
zWI#zv`A=PU$PQH0nL$}h7F8M%CxJ(-PTthrsL?|C^Bhe`efjI!ki<H2BQQ$iyeKql
zUKF}_h%XjzJ%6F7JA%Ip^vqM(d!sYcEbowzt>N(Qw@0+FPt$pJnU0SB*kxHf?qi95
zVEc_iyRF)MT0J1hunTwbbTQSwu2l8A(ct*d2S1l%N!t2wqipJH^)Zj+5O?z#ZSq^6
zOj~xHi}l*m^9xL?{-hJ`De4b69}-NZxUI-S(*dm8drHhi46jJ8cie*nY9J<AS=T^W
z<x<A4FJm=kY7zyeAVSQ8rQ-qo92B{eJJkiL_OrjwnEneqDcUQ=v7xT8icAHnvXUtX
zNs&Q1kx?y5@BjR;rK$RIMpmEJR+*lNzZO?1x!a@CLegVaKIbuTPi4kr57isCRjdUW
z%BbPq>f7!!I3&UW0cq$3<%hCEoZS~~z`57TVOB9<KG+=ulX_w_P2>e7q)TQ(=jnMe
z9}!HG5(t8tl}vKLpb(g&><UyC3gw#z%t_R6b<xdXxmicWST&iqWU{8}=}M3fjY|Qh
zCHj-hLir~WtvI1)joQkTnD)J6j#|an-J|M1G-FPLqiOnbGD}J_ePfHR_mL=Ln>PZD
z9S7__)kfTpRgZ~F#~L|f>V>OOZ|AZXNoXdgIG0&oBJ>*hMNneij!7{q5f+oQ3gOTg
z<rprsZa?}8rJvg@l=o%YaGa5qv(d4_B(j086aET&>}7Usqs2lNw3&Sx#FRlc8l_?n
z=S-5slqk@fyRY%svIONLzWDlc^3Ow?zFnfH-VH$(bLs-26N1b}QscnHUljgXos2|+
zR);EobH<60N6AqX9L&KnB-Sk#B-&s_d<t&%Pe4ylQjH3(-h44hRfcZynK@(<7*Qx1
z6Dxkp&#GA;OSaUDrzc7iEtWS<l=XOtTtZ;xd7J(O>^ta<N=hZe`zjP=Q8q)3e6dth
zLv(v8)x!FsuM<UO#Ssef5)<+`p1L~j#JPTE!Cv^^tHSRg*l<(zT*bViLI#-%((W?z
zO7PxgbO)b#!S5n%u_N(O!&xsC#SptE`%YHR0a!uf@<K<g5)-&bCeGPd?FJN+$n}zH
zra>R8qFy<=Vv?sAEpx>{eCkR?T7NVw68aL_77DdK5HaI+jas9*bj0duHhN;EqrQC>
zaNBa{jKe55mz1-nbZp4CJT<J`5p8<J8M%3X3dG{a<dR8LFJ@;F#ClssxmJ9GXavNq
z9`efX>CUBZ`y@puw^&JL`?^~c7xb6L9VzUd&{2u3G?&fmIm$V?x{uiM5y>j2#4R3K
z9e*_M#`n6m4^uv%aATD5J!dVATGVX{Dz4|2Vcx9lSyQVB7bfRR;SZ-2kZU45qSc)>
zbfm4*LF21-uXaOseH0KLKayJv;Nx%d58d4>xAniVThT`TKNL~<+KVB#bOO2o|KY*@
z-h-O|&)&iQXZ(jx(*LKn6=M3n!pq0OD-iw0#6W1(Ar^U1E7p&W<rxuU<g61(**Ht)
zmBl15SOnB~+3-nLk%7eN)p7&MPm2;A4ZRp(>k`Jo2vP)6Ojr+Aa*A#vIBftAQJZ4M
zi{*a@Ewc@T7R5Kk<E_1d_l;o9mYPze5l}*9kZsiuCf~geY{i5{AmOT05h@nNaXc&8
zY({+DY99cl=kEsb7rbxalk2Gx<O@v5O81vjXtEd5yF3hUt%qi_<G}g~OgwFC@$fd0
zUaF5V-KVqO&&*qdU>EdT*87Qn3ytXPEF`QEPcgjN!<rthu)DL_Jt*@C-sdLjh3YnV
zWRfRWBK->1Z<dc3%>?_59o~hVNFuzhX>fFkV5%qss-5~4NxLrr0=P}=f#lpvum?1~
z14jA62?Syww$<DisfHp*N?eBNgLIQ&IjZGMYC|;^9XU)bOJ<BwDi$F{G{Dj*5}?y|
zkKBrUq%lQ5;0qj+`aY*9ln`#aAw~*&?y<>(xglquM!(Cz=&zm+xU&QdXTY>feIX);
zCiB~JM>S$|MqNA`Y!1S(u7Xwkl0fB>HOL7)-VU@0WpajT!uVmqTq960K@!t^*!O){
zzpSFSXs3~X2r5vFvR3B4EuIDInrQkm@sRX%e(0ivSOLMqmOF662vHFT9qI#iV3baY
zbWw1N+Zo2AjACQRG^}oaF8GmVb=%Z1GBp^*?8n_#)P*yz9s01%)u?495Q@VXxW$$d
zslE<2fSS_Ut=i)}d*sZ=$~OyPr4Ul$_{i)5DJW6KlwS!38gnrKz1Tq&U`k32lNYHU
zzs?m2DH|fKFhBN4>wAStZ&rttj&S0}B(f;l7DR|gP;Bd#Lm8~f3^zcX+J!1AwvEg!
zbB3JAgo3LnG-tAyDv-T+nwl6)GuFmJKv7%<{?k0y-fBEZF)n+8zymxaB-1oj3)w+K
z&MM`!BrJqnXBOyBM4SGUB~Xz@9W<FVRZ1|Q?Y55|y@^pfA!g_aO$fUG#{7{lNyH*R
z!Z*rbFCfo3to_IrrQJD>PcHj)1(xnMkLHz(Msu>{T%^;(Xv4W~jV?w+8+euecs}qZ
zp6-j9AmEv?m(*AN&IBW)M6lHbCw;~7yYy%Fe=GWS$BkeOpT)pDD_AAz#!q24cu6+v
z3D4w4J;7IRX4)tPpn|JVPQqEtjnFF1VjjDw*)f;rwKv_<8AKx=tF+OQK!&}Ptwft~
zgCF-ofKEeiKkOQiva6%hl4{L;N<^}W>(n8NF2^#AEvBm%Q=_DmwE=Hh3XUbp@9Su_
z;jbVPh=zw00{?E$(+S<vR<LVxv3J;n=CYByopU{)^A~U&7s$JyYd;dYJT)#J)GA(4
zR3A#!QA4T@4PlL)54X{abX;-Qky6wjcvqUxO_l-+BF`r&M^wiMG_t->Z12Lxw-!P$
zWa^GiLGMJWZ|jp@qg|i4FnX%fG(%I=&r;;Z<=Kctg@ra3OoMVPpx!`ozal8d=0Ktw
z=q*D)-hO0_a<`DWYehF2AFdLi3EXOQFpqDs(5+X^mb%rlnS!@jw;PAHYUMs)c+G;h
zUAx;3QV0P8qa;4N7KeuY%q-j{ya7%xynCBv#Mf`Ldhq_E4;>H1n5`bp$`!oDkW7hN
zuGlXb7%$XJ@9@nAH=*-A*?4H~{n>^C(*loXzihp3HL5x9{LP2u3v9YzuEe_aay*$g
zw6X)7M*>k5i4IrNE>#zS6QyK#R3GBQkI$|m75PwhmCA1J;oZtJek;gmb&HFIO>1Wx
z6N1QXvAUq(%OF?+oW?p;LRh>ArBN(yu-W2vY_^+ut$4Md_^GM=Td4~OV!6jr-s~_U
zfX<EQkSDiK@8&lhLG$vX{FjiGU^ttN29eh{9Syy^>1gof4;oD{h|ny<dD<~rb{1!w
zjjv44v7$|fsIO<eO~)xEaH!dhbTc~6J*l*0kd$JFADIfu2tJ4kO4<JheJK8)$A9@t
zir(z=<!_SzYk&X2gD>m&FS`eOpZz~RMgI@o3NgV?#+Y5RbasVOOnahNr<|H(uXQ;%
z#4#*i)-*Yk&VsQz4K5rvb@md90*%``*S4qhV2gq^QuW{1e@cwW!BdcIDvd|VDYN!Z
zns|F2Cu0nHtM|L<m7y>*e+k<(a2HpF(M!G0#P?hJm<{Vq9Kvm|9UqG`N&VNJi(v7c
z?;!G>Blk<5lw-8L9ACo>Z!t0~S>WOOR?PXW6OAoOs&t;?)wlJt`@GfdNyIfaKVcim
zR^`8xTk-;8ncy9|Rq}^&OT)ctJtrmHtJkNo?X3v`@sTd54$RTu3M)*3B~y%2LAT=|
z{GLXntY%j8$YG2j_JXIPm2rZWIG(Glyy(+xzqxtoUFky#N5{=~sxJt-kc^uG0aId-
zZFmc1+LbUsd<CttIB_n+>t9~P_pa|5KVkHbJf^-Ja1^V28F6_v;aySQTE=*B7_`Y^
zQi-cEN`soXnsX5QC_@*Ut(hR)m<|s|NtHNMRPsu|xGysDwM`8L67e`-xF$8d#2O}M
zz9=ps@v$eh3VUj#&Rk~b*p#k1ZgY2NZ})ItcWuIS!cAIN1!jgu@A=l6OhsQUDw895
zfu_-Fk;5@TH(K>phvSSf0#EMVWm~(SU&)g!XuGXF*#I>_%D<o!kAU#z%DQ21w$L@|
zna935jKxy|yPz4TU0^uIl!zybZMaZ?r@VlR{Tid-vPuq3C>~*c0bDWHPd=+xZl8|`
zFGv#&klTKoO|?dlA8@0i*`yCF9S-KGMVJK&`iyz=WWGGD5s&b@CYHQGhHNS1*xOU~
z464He;(WKVyh!Wb3dw~VWNY7FA6tGu;{-9&>ezo@h|yutidpURN}px<8N+pZ{@(Mv
z*t(7_nN(Zc6LW8o7fZf^-rr?vU+ax~O*@k$^{(_Jci+LdlU(<bWiR<h*LSaNGm=RX
zn-<jIDWateQp^cHW$SofmOUI?`+(NoOSR$nw{$!|KaAs_?U|Avnxo`Xw&}FL+J35c
zU9AuDv{PQg<-F|HzQce2Fwgu)n5$Ra_SSui=K8I*YJXwWq5Y#yg&<|nh31b3`Xl`J
zkG2<H9bxBMpReg@`#c3$EpV(o(b+m~yIZ%~#)iLYA9`Huh^Qr|7=Tr0%6C?b&wxuj
zt1%l78+9B6b(nF&HlLvg?=}o<d#1qf(68IWo0jSYM;+I>o_BflC>0LjF&>wr&TK(Y
z0hMxe@A6ofr(T>^!+!VCPC7lR%?XDfxkYK830Eo1$>>ZuvWH>eeJ<?j5)8OD*08|A
zQFu$;VuGC1{s?Z8B0oQRRaBH9#*ZH;1OEima9a5JV<#xMc60AaB=kj+*aJ%qqCn*M
zNyBG4g5tSLhIgRsGh~jZIIz;v8Frbd)kknF9MQ}$a!YCeJ)tP34Bbf?GKhxSkH5}x
zx)aT1fw7>TJ~KjKhCYTKeV=bMV%XqLtu$2Ajl;m?)MwJN=-3fQrTo0UKwik)-t4`k
z>5#7AVvd}K7jyp5-7QIfs6SBmP7_F10_{7zTi5|bnCFf=f8D4qL&dKQbKe7|Y7%r$
zT(t-X(}zQG0A{W05rb6%e}NgeQpS-<ncaW_4E9A1?;vLpUZ|@tnVc8os!=K`B*E#;
z7Py+r2<0@gE;Qu7<9w-R2lh*?T38cxX%G=af?1s_2)|>&Ki~`lRp><$zsj3mdycP5
z(lGL5K&U;F21il%4%hQTO@?F~&*8S5$8flL3HQgHe!u%rG@d&<=(D<*l4T$uS{gv>
zNdoW@P%KHHDu1H5w)VHU!Gl{isNIaGTxJ^AC>b@Vbq8<NyCrRg?@r-GC28DumA`CN
zZa`u8!*@5UD!hnXWzDLB`HI%k+UQXdUjTW0y!9Vjx)-Wqwsi#mJlaW)+Bokg6a*V?
zWpp~_=j?wvoEPXg*vg3m<t`ZkvLY}YRf}w>ppi&l6r1$iM(*7Dld3W^QMzT^en{im
zg>d7sh@RMclq$R`>L<T;9Yg2#;QE6<`<t?m$1@3^ZcJg}@)OlsfrtCB_R<)9+grzm
z{6$dFGX|J&a6Cb4&2eZ8eJR)3m?=J)UEm6l6omj@h1^1L<8;F7&t9|WUEi&B1J3yY
zbOZkE^~(uV)k!9^i=>C5(hZ!&2W@xHk+w3VW%L)_e62r=jwjH%wP=L&Cd0`ALcNU0
zuHAW>P4Dh?|9fA3*HCzuRI(|{<U<UaqK&}-dUT_KFn{5|d}c)^IqxD>fsta6-TN@9
zi@Y#blza+l2zm_l&L+u_K?SrHO{)txQFfleLdKN$0J?fH82@wjulOCs<(MbiyYx!8
zAAg6cEQbEe5jJYnBLzXTvpS>&4(UqcfNRGBCyTLD`Zu27^=IqdfV&Xab{)&Pug39g
zuNTkuA4W4g3l$u-+1|#2;f+npkXN;hr<(HZK>Kput3Gp9WLmuG)Jq3Anni>AYF&Bf
zd&4rh=@$wpE4r3L$AG@ceTomLu)tl|etO!`r=}6)9Oy_y#HAAyy93pB`(8EBStnXi
zQodP|Ki=BkI_jLH=*Gz?8~6MB51ezpqW~9GH9Oqdxw^XQkCUIvelb7a!4Es<^JE5x
zvE1>u$u|73bp-z)k#N2Y7E!*t+1e-d>E@VqSPtT0D9T9rm^CXA!900E5I<kQ+}0^&
z-q;}d`t2dAWfE8_Hl!@E(SxV;EqOk9Q6wW?rg5k0vjTW?70IfW_8TpEt@h%&qhD+a
zL)bd-liyyU|L8vII=aC;@cYI<0forE|H!=>{P?goS}eV8yzaVz@gORDkt5NBFAnea
zMjAC<09nSUvTXq&4zP#yl8{TGHer>=VR(#hyxO~dwf9hX=2v?Za8|~z?(RkUBMn&#
zwKI%uYiJQD+|cTsq2N+zfB{Ic8;>3z!N0!2+8dF;%0izwFx3_DKZIph^_Y8UmXgB0
z8?zOxHPntOEF!P3+P1=`lOytOYm4qc^7>pubNsiqP1g+zm}j<zTr?z~T6?ByeX(<+
zSOB}VrV)oHD&FHgAi;3qJGI1btezMVQ@5Cn=(Ev7VYs4G;x~+!Q5%y)K4{~V1Yi>U
z=EX1_E$|uxvE%S2CBM_?hvnR3A4beVHvb&AInJ+Cw)!2DcKCKYLv?Dk?13}}^Ir84
zymsv?;nMh;<2!*6XbM5K@v7f|>{lD_TUh0{=Kk^ONKxMd`zc)d*eTcSRH8L?>2!-e
z`t=>-+vWAtqiyO-*{5m8R;4r7{?ob{v#;_z1zlV-J1n@gD)Os7QX{{7<#n(0E6l%@
z+UIqzx}VD%hG9$vrCW3RTsA@DJsx53)9v=e$wgCHO>&!D*cju28p=P!-R3M|S?-G)
zZt$ov<W_q0%9&MR=MeOm?lpG&sLNblr6$Bi0<Kg!Y%vYXF#80`^Cq6!Q-oWp&T8iz
z7DBf*ehh!$9k5bc6V_Wl=(5D{pPpRIdLfTn-V78?zrMKqYymR)F=rvB&yte(hFOct
zYB@>q`StcyHmWWT4|db(LwLbfA{mENTbir7R}|q*6}Ut@vnmyHarSTvu+I=?ZN3KK
zGTy&uyl#A&ma^w{__&r8DdEQwiA*}?TAaPZ0E~Q}lKS<GyizeANhR33{q4Q&ebaEi
z)t*BdzGzyruHeT#Z-zZQ{3g$;zXNL;#Wh0}e%!CW`CqHvJZP))hW(=MmK*v1u?_54
zQdQmB3GfF0zkBx%_UitB5BBbT_W%17{r`fkpvF<n5>GfWDGG|>cHIsKCBU0yF}Y0D
z*i<gdDxH=+hoOkLkHw;_)#u#gUVPJw{`R&Pe{u5d(_T!y4d3kd`|<G@Ar(+IW2blL
zm&lGS(+ts#7jp@hr?k7l_@0Js!~K*@it|P4LtF_BIWHa9U9Ar{gFQie(AZZ=nGH{9
zKIo&H6?g<)XC5E@A_vIB!3mxo66&^i`talt{5|?qC_NNMGI|MR(<&8D47phqd{xSj
zyZXEF5ApGJ{O%BQeILdHc<^Jy87AH(@i7OZC-9ujo#NBhp+`e<NkZOHR3ANQ&cu;)
zIfJtL6pc~UpTvaXPpnE3W6-Z3uKsf95-$%g3WT0gjcNNnfhA67DP!5A=0@hiAwbet
zCPnd{^SAdR-nu~%%7G!+LYEBdfic>1ycB?k9xe@n%p|S;q~Y|8$($Qt8))3+@jxXm
z1A|bTq&hw?5OxH*KQCZUroWn3e<a}tb>Dp6bkDA7$2}<~Z?en3!W^a-#blJu|MnIg
z{C$Qetf9@r|HfJ{^-p_LgL(-ENVvmB6?5;W$eFUqFG};ljz!RHYg+s!IkD`8M!8Y`
zs4>~LoZ(H}>F~hG2Bz~ez`Zq3Ns*3Pb@lOUY3z|!b%dL+a|~CnUdJ)2Q{iEsXkBMu
zz5ggAuGiLk-W2dR*wehP1nXgzT^1;n_v7cd@fur5L_TzVkk#V6a#h3|k#KD$6Mur)
zjbf1E?gu^^2rzm;h;^F!!&?#&tI#0GTw0Ad2e?RbPSNnti-MzN52EHs0w#NuZWrUR
z^L=I4-uG*B%9-qDnno_5**?RZ{%p}m?DMzmNJ$|?j|TIdqr!riHs3!%Vn8p(9YPk{
zu8QqxI?5K)?)tYU$^1NRdFNQDiph$%<5RZ<qZ}Q*!4Rhecg~A%e5{fFI`h9SI^E{)
z&pC<2K%5$b-|pVs@R<p5vi%H1(xAlGi^-}EkL9P<-o7gGmVR!xJc_s5`o5~C;d|kc
zZd7v-lBG?1`P<EA8zE{`+e%V@N#B3L1k3W@(BpCQt@+2?6ssrv#f#HsMUzR@*1I=E
ze=bwbu>{oghAymsxY6{!c-q>((G4AhdmvBG6a38dsZ6KYZF{|b?`$}~|CtPnK?dx6
zv$mi3r_0eTc0$~~7dG4t222z4qKw5qt#;p0)Dp9=aJzSHDiUG%d@k1j?6A-25*sLM
zo=ut6cTmr|rqOj+BBm2N2E>$<8F_$6oEo#z198L7#dXFV5{`hc_dk_}TkX{WlN{b5
z>rxW6MJr3yp%fXy@OLSKiS#}f<UEO3mYD~e`}I(!HezjY#2>)RQnA&{E3TY)v)?E(
zZV~j}v=G-)5EEqew%DFZS#*#QYlyi7T*nR_EScj(XT#JaG>GJ_k`=3MELoScY~tQh
zDU4)skoGnl05g&J+E#z1{CXl%+B4MA0Y`q34#}Yl)AV`aPNGUkkyl31Yg9xdw?}Y(
zB%-DBIi*yXql=1V1UdGExK>{uBb{M53v4rt9;D2i{ph5SDI>^0tVRNLM7h8kKCEm>
z2^lA2mu^bh^GTS!kaGCZ_U;o_fzV!jhBI=zJ=`q5dM~e7PnteaKEzxfY1r|x0n_I1
z3O(^(nvDvnNkbt+vKa41pD>iPId!6%$G6#<oaZ`v36cU-Hzy?DA@08xqKZqsos51$
zry*Wqq~Vp2NzNUT3#k`9OW8srnG{ARP}?zAM)O4zj_Kbk5hMkg5fvh%WG3Fi>dNCJ
zFQ%)e2Bg(bh*(^krvim$cQERE)~pESdm>M>JVlE?5F7PPpPx$xGFpH!EQijV{EGQ{
z=uVj@bIF21nF}e~@wgBmLY-+RZ2F+y<;NoR?W=H`6SJa*TOXR|;-Bfyi+XIba;Zw^
z(<}!9Q_l=L^?O$ZXd#tMj$mdn%WF1CSJfET(!S#Ex=7K&HYBLg98)%xy$E(Ux-I34
zsW^CsIrY2^g)Vhm&lzFxGc_c)CtowAqvP(FUCF+QbgCjsr&7TI1#X`G6jhz#SSFiT
zPjvxyVD5xczlqf#RR?Clr4SB#(}*`Yg&3%Wi%DKoOGmJpP!H{F{6Lbs2`2F@mZt2R
zFC>xks+kw_B~=MB3&<~hg&!V57m_S*s3uj1ZH0KM*Gb4dVJ)5eUA<YyY?`1{WZWV;
zrR$2h(BQ&D_G%teODhS5kw(RnL=ZZRmEM6(N;wDaUEk}{_Kd{Qo3CBGuP=*iq<F>5
zH^LZ$iO^{+D@5d8u+C}<+s%Mk6654Om!Qiucj2<xG(Ne%tv?F&kpuA5Pw{OiPfO`r
zQ=J!Z%T%1Fwv6dZtlymVz$J-b%{B0#KKdIIYqpi5^w6ZqjzK1?%MCR=Vi3V14x@^l
z7{@a&t6Y`t#8V<dY=D~Gt~L96E+%xEX#cG)4=uBL;g0hFBD@Dwwc=DLh_%IO3-B%`
zOoVwlk(74{-^i4n4mj!pety5Kse~mbvACQXB-e)axF5e>%$4^S&?DL{Co=!-o519$
zEcrA7H34>^Y{k0O`xN9F*kn%(t0K=7dn(|=#WUT&c46}Lwj2RMp=)aKMEQDTJu4r3
zMG0D5Dv8>}u?>OCO0(#Y?$e!I@TCD1uM(3QnuO+@Y=T`IR5&SRz`{9Fl*t7~X!4EY
z;G4!Gi=gchWu9IneWRW_kX}CvJ)1|%@dkvbqHXK2(ucV2p%ep~iFQxUA{UZ^wppP2
z3Q4-8)i(0FV#*Vuc$=Im21r4su+Rd%8;Z!1b*2(zXmuyVxYqWGvIo>rVyY#Z4FMgK
z>Zah-kP|fcF1OvDsNFjA)I~fJnX5u&%*PV*L`plof-CGc1ScrBMoGcps-8QjQe@v7
z2f>0tP=*p@JPY*1QdAMz!>*7`vq~%iWd!FW=aHt*y>c0n7lShWc>&aJ;z^f?eV*&e
z3q|;YT>wO88A~4|TtwIiSjrBvd?d50#2(>UWf97N{B2nGMKYVE$gxsvP1mPp93g5p
zaH&5nFufDzo6v*?k}GEIM7C=W*nZe%IsATn6#qQ$+~4ip-*sV*-;d*C7a#cj`0(&8
z?Ehlw-ufA2963VA-G}HezHX?-?6`tzL-p_90>xw4AN;mGcVFvH9Zy<0TMnNJN^$}g
z4p+i6RZEhIF9&$v25ZOV_=!j(*_d$!a!<uxTe{qe<An26GXx4+6t<}$)mUoy7OyDX
zcr5(0yL}Sja+BO6#ECL-+)7`@Dn31XsPs^aD{|Sj=~Pz(Cw+x;NILog$<*3aQEhZ3
zS9wC!I6;A?cImx*`EBQ#H%uL>yox7T>FtD_F1ssmjPg9S2%|(3d_oGlzVFj{o=)OU
zj%z?}Ml6zKQzR@M0Wp#vY--7CRnH)MLu*64s1uMr7L^W8#cjxRSa3C+e(ebwW#q<v
zYPlmeoRd~8Ou|mdd<R)ICx`qC^wkBbqArs&%2>y6u6Yp=-~)7Io8#rYl!rpFW)PxX
zhvo(nwj7V2VR`|Ckj<GT(7WEW$p=ch8cA0}@`wyBE~iV}MXzMZIfc42S<Ns)e3}d|
z&=(_}mDvROR!!xT>3-m|uHCqT{XyC`>}Zd@n?t3tlwz}l>$TJWax@~7$-eV4DpOXx
z;4Oe1#!U{-PPKTa8>uHlD`z=K$E5U^qH+`^n~;?Un2Mp^v{ZJV51{WV=Sd5j0f~zB
z)<|AGtK_XvJw{sKMIqr?!iHU~G*;$DNo`9EqdC1bW-%dWIvDYvD3UeF&(l$@#*e1*
z^XqUsMV~3R`bhqdLL%MB|Mz7wqeQsB3I6BqgD>~@L;v5s2M-SRKl}fFivGX472@X@
z*a)1K!#bQNS3u>Ft0)hNF2Jiy2-Z0PqnVt`Z$SlH@sl0Iy7MU|Gn<zZSUC<2L8s5O
zVhwNs9l1u_XYzU)E+`i^iy&Ls6z!wX#jOCm+(|H`dthoj$$*)gBNi##kVX6jO~(cf
zyfDh=f^JIB?H9P-hrqlLoQqsbD7%UT46+tow}{R|1IFRxi|GJ!^i&oGQZELaCm8=1
zF0q^1-D6Eef35c=rTv+y1SPI?BzDgn&J6~}_E<4EBs>u5BbvT|7A}n*VVi*@v(kwc
zq@R<XFp$mH7DG|*YtkSCKf2^a4)#UAeL7DGMF-zvJRR{WpRlnUpX2f)v4)*>Ob_JJ
zy`U)}_fDNyxV};OC>`}7LMo3zX_3MnVL3bw6m^rJArD562`P@NC!8Sf6gU)qjCl`&
z$4Bvzm$6(3RL_ZlPD7Fqt4F<dX-HAb^lsS|X0t>A9JNYQ)D8FJn4nlJ>ZY9smh$-$
zzba%y&$3j6bd&k-5cgd{LCGq{&eL;ICCbvW$B78MZeyAV;=oi>W@vdYL6lBI4nB5i
z384Fdw+pGQ2!F%pbRJ_ecw$p|qaa6EI@1Mf_H3HMp3%dNiZJg}b8S(=o{l@SaSyYe
z^fbwr@y2D{9`s>0W_MTD9r$pE=gfowxoP1!gApEE{R4%A(|OYKIi^VPb3EEEE{Zob
z!R4AB(;`bi&%F%#opokedsj12lf!4+l-SdLtiIJGH94Jyvjr3JVb2LvY%t#!C>~LP
zp%ADUvCpDX(<~7l7=>qd=a^=LsDO?C!z~)N+o~yiNK1*52mwqI9Br^mvz*m+JRDJ4
zdf$7Mvgs;G=EZ{cW>)I?A{>+s%HRQjA@uHz!lb-Pue#86>@buYPtt1=b<(_8oL`VV
zYY3aK7;%u8xqW;r8WPhwL=kL@Fn>0m%nlRgWYTRp>!sbjjwJC!BW3K7suPzRUU)r|
z0Bn5b=@+~^O7UewlTaHA8^jlKt69cI0paEyg~MU|AIB4dkL%%S#_!)ix9JIw&8?@r
z?tj(>eP?QDP~1GiYz$%9;U1nOOJvG5SXnO`+*`fa=M?1%6`MJJo%>ueR~mp~s`mx}
zTp&KTXx^NkC%I;g1EL^E%1kJKJ$qBD6mtfPa0YEslSY79DZYdS0?CDC7Izk8a!~Jx
zYFV<_xa~}!i+C~~icXg-IXwq_4J<#UpY;AwdR-I#pqck{)!l8A)z@xnm8{vIWo~V{
z{c1@S4P)^!7K3)z#w)>Zw=4rWi%Qouk1cCOHT;T6Njj^{1e9h58lx`_p(CCaYr@$j
z)dR*t`-TMY(3TcPjHs}BP_WiJPGCslAu?GW%90T8B)$(B!M3262-0BaCM8L+`P<|8
z#8FjLjD9U$Ii-$JH<+0-+ik_hv7|VDg4zW=?^EoqaL7^gyt4kJ?1F@<i11bp_7^KI
z__7sKlBXspZEShCtULu7q@;XHztO(K4dmK8gZ9>KyXMO+yY49na0EO9^8_QzhvBl*
zQs$S1qz4a81NN>bUV(;gt_I2!r1p}rV9o2~!CeQmU-O+ySvDhPRj^3H>Vyk}j2=;Z
z@`gf%UE7-fsq`H{{4wf(lf}dEtK-hAZ(h7W7@D0O(QF|ap-4|GzA(SJOshXgl-Bp-
zc+knuR3-#Y3y0ak8#ap{$GZ>V&!gDC0N?N4?MDBEI~Nut?)>q%8wcI!oTnAPeSIdk
zX(z*g>=+u#5J~13F$IQ%r1dXjnfW~wPaEoga4^R%1@0~NKsUsHAMEed<G=UrJ-Gi_
z|MO|;f5NRGoT*9LVNxYq#ZQt=OSUkR&kzN20;#*q;0!s-lH3t!?GXKt*nVxL8u{yu
zyjHiv$W!9Gw5bxuk7tc8hbM^aW=cnP8bh|(_0mkZ>N$xKI9!k@IZq5LTw+#s)e1Y*
z$E1w!_Tuece1{Q)rJ{NoXnl87RGn@wCOlXCfS}{#+TcYs37PG*C0l_KzI}4yL;+TN
zEfCTiaUhUc+u}DG47(o+`yc}FT<x(W?Y{4exs|_d3t^t|gtN5~-c>ZiVSE?=5nQoM
zcqVcPMX3Vik>{cFi`zNClezRf!XbF4(+H~wy<j~<9LS3#<g)H_D^IU0^*mp<n~q1L
zlf@uD7Eov2po;;)fzh5q5sGpl`*0cjP!FL&!MGP2=LeSJ9m=#Q4dDgUZ*YRfv1SYv
z%P-c!ru48d%4j^R$o!GrRnpA>_kdUEsPMcZGl&>uT(G_D)TQa9T_c@Vp_l!JfSo}j
zrM`wKEXY3sT2Af`T3^{VuWa)xi<NI|C-edtt5Ep&4L!CkJszxhZQH)K4X^z+S@o(#
zb>`3dSuz@(!CqLKOm9VzRXOz5>_lGTmy5~M>{8cz3d~#PY@e)W%`UT&cIL~~KXRHW
ze-znEnA5seMe$*q;*MVfCIxr6ftz0Dx9*7RciYQ5^_q7~&CT6#=W(MakJlF1X%yJe
z0v}}~aa(7wnrE;=x7km%lA9-}_5122!ytV>o1cm*tBj91wNf%b=H9?}sj^EALae-`
zAb}xWFEb=C<Ur;OMPWD)5vFIZM&G1~1?olQYd~_M*o@#dpzC7Foy8wP*zf?&z4dqS
zJOA)5ixrW(T=He$uEBcIX_-zgQ^lmBrWwBi$Gc@_gax53RHF~)*`UommQ=NobAjW-
z#n8{ed+N)E^P#!|tm_IrhwUnmZYY@`aQCQZ95Q$V>p&m(Bjb1>`50*5U!=+9Qt;eB
zY=J?cI&;e2l%k?1vY-xM@-y@rgErW=UlN1~Wr@5nss4TvqaZ~`l)$ORSvW7lQK90X
zljxGp{axT^2K;Bpe?}ehoa^`d5vD3QxHEvO=-wS<E$-hL84Ww%e=-)5WpxLkSq;U*
zNm0yzPbj>4kxiDY>76I!*FO>t3U)UFl5p}3oTX0MKkvucc}}8qbn^V=QypreVVjIf
z0?inhnjqU4bfYe$-iN<;?`DV+mi&<Y$YaD0G)(>gBZUuUu%{Ob2_h*kNPmhP5yq^I
zfw3b#g0&^{bV6a`Fd~AmuxG~P=rfG>A<i640Xm5aTM>~?vLH5ck#qJ(%HCqezn-Z&
zRV3<IKY8YASmZw~&J)jEIoN6fGf)2e6#w?zd;Iv_AesL&`4|60*-khdEbgFtjeXly
z*OmJ2&|l-i=*?dI2LFpg9-zylx1CH=WZI1Su9<1sLvF|Bv-;%sqy%&JT*UPhikQ#7
z2(i_TVlrL~7}8I^2(0XkvnWn}#i+iuSDG8`nH$7y?zOh!t1dSrDTHS4#YrH8b{@$b
zm>1hlYUqi;O8lamIi#x|fwH1s&8lIF8Q$pwLilRCaeB2fu?RuF5#q9z?lwVA{CZi}
zr*U{q<?o5go)af10bl>9s*VPTvTEVoVQ#9Uo`(<cN620s%}@&o0P=KPmEPn~0mB#=
zT{dGn($rF5yz-fpO|aI(WL*!LRul48>6GKPAJTZDE_s9o&!g}mG~po8{#{<kWjm*0
zJwfw!u^DT6=Jl=;@yP8HVMIN;d!K|Yyi(Y31ymH4@$e&`uw;@YYW}$W0du8SoUc>k
zw^<h360_ksx??FHZ)r{NcDVq-8o9HrQ94+h6Q8lgzK(M3n+nPjeU!=fI&q)3Sjgq8
z3}xA@#QuyV_)@elv#c6Jha{bbwTF=B0fhv`Od`;DlI7dHoWXgbMZ|NBn*%vn<fPVZ
zTRFq3a$imMpXX#1vDDu^atMU(0{PhcCd+9uK5W});g!2ase`)yue`JTB-J0dDbg9D
zRbm*9>mY~k8u5l21X);`-ZrphqPTL&F|E8DMdAB3Q5#2gP5~5j`YEr3zX$Fpe4W#4
zlI29ye(Vm9t|vBZnF9wjIqPIeBKL$nbSaRqAa*tEvnbnQ@|B=%ubxP4O*M3MdJVb^
zp;|AjYU*l7B8nVl!tqBX$6D~9w38ZYLp~CE8{ViVNbU?akl~X%j|Xvw#TZ$I)cMns
zqV3SV*0{snIleQX)7$+x{Et|Vpue{{UZ&J4ZU>!n4mC^=oq9}=CAy$cJ{6ptHnE*q
zW6p6s&r%nxM}eZlc=~vHXDXs1Srns?q+@zva*Zkx>wq2bK@30CsBy$#b_Tk-ZmNzW
z${uKBY8*1)K%`0LIouq3T~Kg@0FR&zn`0-{kIeNsaY9u!onfYjJA&Q}UyY<BTnR-M
z*l2k@M&hViCSw$mJf}n~h<M8ZdlCRlE6)~AmGCx8<1*1s1oMIl0`+sBnq<$#;}fH!
z^U{k^3e|xR`@Skvo5DF8n4#kpXv2T`{3tSz*O<Ei<24uj76ZKUR>V#4)r;d39OJxn
zJBxKkqV>Q_wHL^ks+qy#X?wo?INnDJ(f1rPj)S=}#>kiiY6aRwKm^Vi$BP{>&(rC2
zo8)abg~$}FzbT#;)p0(0Lb>$dv)dFTmC{pr!u-T&-7D6gvvt|Y(!=&1x7_WLofw1E
zDCFK2+fhF@0Z8lvR0aE)F5rQYUg;$6UGF&n&oW;4>ArK>|0o0!x_L%Jx($8wg;tTr
zx2Z_wMA?EhA*V6aWsIGGPX-tAt~}@mYo3+$w@_O~F0H6mOLI}FC@&)R8=qcr6jtV>
z(QUC*X`mxHPN<I+?SSBg2)1j120nRX{SJPcjuzK~KW(H_Ak`xAc(a*k$V?C*VFpao
z*0<S(9G<5xUbvwvq3l_j4jA!_+!-gyVKK#MK?&#vZlS?6!y70b<G;GcyFE9DoyPn4
z3l!i#ri3~lCfC^%8*LPXU!I^VGzMC^kFEk{3_*NcQ8>$8VfWD&W<Vg45v{xRM^d`r
z5?!B>cH%at$`)I7t_aK<Bq(xwvDz{co>-gVHDiwIHp!Xz&BXH8E2%QSQ8c<igA46c
zGOaaq%_B#Y9%zQ7Gd!Tg{`Ivc%sqx1Oysx(Q!Fz_f!7}?-eXc;iqkJP!C5Ql0Almf
zH{?8I{Z+@Ko5?5yd0xCxfHF}KBPP{&&72)%e3>Ppl0GMY&hqeMZXg&KjOF$_NXmVP
z5$apS{llzu?gLCvgIW<*6mOGD^IqkQl_7?;FdrCDLy0-rnXMNgyql|O+Kfw>IVO~!
zGrLE~hNZ^03(g^evYq>^(vt!?S4_ZlK3TFEOY(<eVma<PM>L^}gOo;6|IM2>&)z;c
zK6!Tb;@Roxv$rQ_C$G<*pPrqZ9>02ea`yV0)4+71JYSF{8NNx(!8pFeZ<oXfUp{m>
zoF&?2o0NStg2VW~UVn4;^7;4Ao}RrtK7ITA$unr}>$9iNzyA8!+h?z!2hach?8LaZ
z8Eyl8`N(`mVE6%23aNM;w3mV!(ETY;@dP^u(<ZhGj}TE?j=rpY#g{Hj;(U>dwo-LP
z0{g(;D9)Y(%Lu~Il+QP?qZZiTt{!(by-tUW`3Ap3iS!%@kK0u!aIrxWTE~HhdM!ps
z;+9<O)o`Aa7X)kO6<lOoVp3U*$9S6u>-asaW9OiE(DjQ6k1jj+x^6u&@hE{syj|qm
z;XF7PKqOv=@m7}8DX9rLTV^pg5enKBbr!kte^$KhziIq0Zh;RD0bcL_d9b^`d%vFl
z=idF#@xPz2|0lb_oz~n68-A9?zryC4Fg}N38Raktxs0Vs7aAq8qTiYlp_(#oYD9@t
zi~r>wI>jMlzb77X?I8%*DB*G*yuny39v(|rh;Yyrmq}gblFZ`<PL`~lD(z5wNrqn)
z6&O%|5c*uSSu_C=E*Bk0RqF+R_7$;12>&)urwNB{nl6$dM8dQphKg0h4sBl9SZEjC
zg$uF(*$a&&xKjM;RT|Zz=WwL2XBGM2$c$}`vU0e9p%2nYakV9owcIzj>qn@YU9O);
zaS$`P)P7-HZX*Kj#QCj}z+hb;9he4rIXXZ#D~Ta5bjqkAA!xJ>Wu?GT%w@$XP+TY}
z88@klFm8ss#Gx`<#u>BR63FXG=HOyIG&ogc`d_+SpW-{6URNnOoKgWwJD*-jRFTd`
zV;)r=Y0M;Z9~bIvs?Mg=3wFIAZ_`>3FbGMgFU{+W83$x?%BkkQdMYg67H=dP6k)kA
zU}ZM89pP{#s;adqG`)Y9I<WppQ3-B<Lq5VOffFL|MVAr@xhNMxTUvaYxu5qiSU5@$
z2(_{NK6lOqo$#2Wz|{qWJ!Dkv%pH(gA5jIp_#XWK$hBm&vEQL|%O|a6@=kR$I(uSu
zX&LZpN~qNKTSfOiF4cXG)w7q(!V7)BypcFZn-XUMn=VtqTO{Ka5ixy5&#pl(co>no
z$aX_a&4Mu{zw`u6I2jD)Dagqp`BRtW6^WqMSUl`W)a(iL+SuKsEsP`8T51^{B*MDB
z0=Si2vMBO~5KyeHi5x$0JL-9`i^%TGF}0g*sDoa-cjVd&H`EGHFn2WY2%7N)0^5pK
z31xk+7w;b-nc-&lvxn=j=)&WVJ3+S*D+T43K#*j{MS>P5*)$WRb(7l)sqhF$L0&pJ
zs+CkQ($ek7V%v2_fQyMl+^I2@M*wx`@Dzt`gjTN9$eyHAM)%dcwU=zkMSJPMn@~wF
zKUb;FX?&fJYs^Rr#iTf2Dg?D_2#5#>wLEQDQH07mmAeTj8(w@UNy4O9CD>Z+<edNr
z7@w0WWB6#Tszy)O91f9effqsf9K9v&MLQ}M7+lO0#p(%q?dN30M&)XHx-jm;k!#*;
z+G7gJm}Z1C(?j0Lfmu*~0dic<h1IE_^6#^odqc;@b;TXn@9oLaU~2CwOLU3+N)wsv
z@SavMXPqVgt6bcIT3KVhJmt_ix%v1$@z@TSozH(kxRw?G2zK<iBKKKM4bcN$1utUJ
z?383dSqX2+oGO=a=&qtMlid=qaSrCqyAC^5SYi-3tqGLU^(otwFtI+^9xS&HoGx3Y
zm>t(?=6?9!Kl-TTMeh4H3ISEL-T~Ju12tf#*Fweg#10qpIcutSu@Ae-s~4pYgN_2t
z_eDa{Jj+yEq7>(KB=DzjV+9K*(c}!o;vgU00`U<Jq4R4y&M&JVjgaiZA!+U!`N?5^
zkYbiEJ*E*9w&dVwSEA8G4q3=x(UN$@A;5mp8~nU+)(`gqxsdPe_V(_(Y>pHse6ZWS
zVN2}oUhmy+TR`0&ZrZVjei>J9j{dsM;kg7hMDG5AW>8))<|77OQFa*$P^820o9A)D
z7EuFqLt#OUvC@WMj+Zr|E&>}E(P(IQ8R{L|aC-^90{Hwo_--j-9R!x18K|3nd{Q{n
zDQdNOvPC)S*3Ttiwcfpb1D<>p@I_?c40=!CG^=+Pu8L2dU>6L=Fr#=%d~`1iU)UdE
z_(C66^2`3NPbNCTXa;%OiyxSu?u{_Q!T!{X|J)CD>S|IA-8~*+dKf0w-inmyQ<maq
z^K6RVG{d%6*zAd+DELLi0w*z=7*b%8pd{=AG^eYl7n6jLmY`UG@RirA$Vj|`jG#vy
z6=Y_PX%bN~_7MhZw_D2vuc?8|)%{zV%*0R2%;cGy$s6rvYYVS)`!eh$12v#=GoE&6
zuDUT9HKR=ZmP=d<VWiz8D;ku&2S^U6CpB{18n49mAi;Ixeny%QuZ@<=!YKHD4gHqK
zPKlNonw+RrKStOU-f@oScPO<*BoZJ4`~)TWmZ%KKpu;sSJSb4zka5CQyv7<(Ut?=5
zRq3UB`-dOhl$le&?!xt%!Gq)G8hiFWO=n~$%M|(^<Ha>Dmvf?-hG48DbWGXJ9V6^>
zZw2nh-=%J0!(ygY!2&@G`PKWYWIoCrp=GcXI3VW>7;nTr>jW#v>+(a^`{_q71r#Pc
z;DRh;>fHAOy!MARBxyQLo;#{axv-|mwxhG+U;9N9cp#v?x}Yjn;N^#)z8?C;xTAwu
zG7MWma!<#EX%l8^knQh5!N?KtH%bxLj#WgR#6F(w?rj8D6cngI-WuEbsk*{$`<Qd*
zC4mT=N#=ZZtREhN4zMDMfwaPac?s-%9;{r=__tycoZwae7H$cOD%1~>0*#p&zB_H?
zl`Q!@g>OJUjH3_{2lgOA&a~5w`A<ER%De|ugZ$?BPm7nGc^7*^!8!jhJ}GMJSCj^c
z*=h30b>3MOsfw{^f3X;hL0p!bqZQJFNowF-3g4ih@|vtKGC3)Y;W?6~-gCYf<e8ZR
z0hIrZHoLjxxaoBpz4`LWO@7J_Gdvc--fl2|s#=!7DVaQnponCbuZV}N9=ysNNFqA-
zQf$y=dZ<n&3-7BF5GO=|<$C(j^T3FJ>}5K?q)1D38={S(Jr}HXjW9CkSK;nJD!yWs
zZ5wa3-nNJSc!W2g+iT`5)16k!5OnNZ{`C+bM_5+mQsEegRU&`_(3N<_z!abaBGL%v
z7qp(M;wX)mA}!ox8S#XvGQ^$*IGFRLV0p^)B{sbo>cDbWk%4$*1s++-oX>FHbf9<`
z4b*O@h@=$0Yy^{!baKvURA;w7M7G@OOJfD;K8CyDJSQ|Y^N~<Z92qx1h%R4(!HLMt
zmQ2T#bmg=nblw!YEr?VX-jDkpD1C%)M1&P{vxSjf8pNGZUWDO=)S$OZv$o)(tm!g8
z!qO&g?#^^VH(cDdk}H?@b&zSNWfnO{pg%V~x4uFUEjUjU`H)V0?h;!XXc<?$Diouz
zW4*!{L~yV;CuqRgVvdny7%Hz}5)$MG%@e_j))TCLw8jkp2K8;JdV2aAUxIM*)To#v
z_T-kJdAwUGhoPH~Qo{U=v9POJy0F}g)bF96TfOl2<L>{VD@<@!lDxbk+vlNgHsX@(
zdhQZ8@<X4y3}%YEf#t$B3E2Q+`{(k2%&uBckb`-@o4R{ZDkcu`j_0!U2D{S3LK1S-
zWZ8}PzU<k2O<t*Uu{cjDTW(G%NRmn9L7X@OBq1n<_EeLrlROa{Rlv<L2WlK4q5_%t
zs&p<&p5TfwV&t`hnNpRmV9}DB0EIoV-o_h)r%NvRFm{?339L(|#ex)$0wiIJvvYu!
z{bBFRA5HnPP%vYv2O-WuI4PVT<FG(o$XArux0)x26IFx4Y+(l@qj7hNLLqM#X~NJJ
z%@Ka@N7xC0w0(cgK$~N{qs8OL<cq*Ma;)l(RAq<G`9b4E(X*;TK-ImM5cEib-%+q9
ztd6p-hPKWLNf}9mheM_nft?J8E+o9MLj&}!%-QN#-AHjE>LDCCToREFRdHFjHO9vv
zUG@B-Q^``=D-&~?qH~OnYo4ZqeVz?-PUHktt;0Y8zrZv*d3rv{&N)HucLj(4c&>;V
z)BLa8Ia#-sqnq=kYJ#PRvbp$sOsG{Q!}p{Bffc{fBt_!f%>2|+)4bP|{o!RzR!)3p
zV!-biL57!tQ(+?8Mi~$!#}_u=_i9~MD^Y4K=xsJ1E~Xs1tsyu;hyXFQ$@w`+y+{Jx
zJh9#Z?}82zQ^IjE5CLVENqtCUIbj`zq6bX%w^Er%$-&n(l5))j>$O0Ox0$8M4aPWe
zRFX{z?o-m$5x1jwjZRy53!gS(d|}eY$sASZbS%7npytWOjvn4+jorkI5uIEEWjM~6
zCQ^K)3B2U|qSnHww>ptU?AE@*m(2d!u=oeRDwz7?csF?dGP!R05Inx2sh1c7GHDdr
zz^P5UK?3a{LrmQiQ#_G{ifq*4FC>UWDZX?lo(?YwRvev6Ebgx?zsh6MPu(NrA%~mZ
zW2XnI)rY8Q5G8C+4x#y(quRV)woe{$CW2I&Td8#|Yg*U3LxFaYz50D853Uc~kPfvG
z@uJ87HO$`df1RQueZPUJf4$OQq;*M!1Ryk-yDVHXS%X_IW-yEpKEII03BCj~wkeBd
zGj|V#z95Ue!gWMN-32h%^LRjU^TvxqS_v;IJg5T{cnQcXDKYEr#j+(6?x=8XAvbeo
zeQC?ATuo}$%(t@F%8(Jmq3`WGXG@2Ev#PzDm%Hx&(St*c8{l4J<yTQ-=X>|0#fM5E
zWZ7S&tPS?1A1uDi=m-4p6?+OBQr3%K!T*nH+g^C0%)T9Zlc*C1+m!i^SyB65y>_uq
zj_unrar^W-)#*IF+N#}a>9i|wFtP>ICkKig98rqmRG}kZsiv3m1>~t@kJ%^3%me!O
zo+hmLKJKU~%6ndQxvN!sJ(({8lh>vC#{L-%nV*jQQ|@lJ=k{1UG-V{vm#6EftDPH;
zny+w0M_oR3V?5|gi*;uyGB35HE>5B=BT4Ww6?b6OSYi{7pf9~u7Ow*5O&oX<hfBmR
zz9{Zh=)^rAX)YUeJ4U6R*3+<-t@Le10sM39&%Z<b&sz=ZF2Bvn3@wFU&5L9-Oem@N
zjUhod;J-gOxc6l({^#Dkg9o4Ee?G<dpY>ZI_CNRHOL>7<p@Dg;H6NftAIvwYfgW72
zbiH&E6|n$OJ!sr>MlD7>0`;c11i#Z+?`{T-Uq2Q8S2HO#BFaD|5ZCVETG;W<0!oVF
ziIwOY6s~v)kx*_jPxL7vlIpF|L7ryD&njhVYDZ7t=yo>+y(kPom)Ls2aS9DuP6<$l
zcc7mC&jm0TDpJAi5ehsI6=_91Qo$Nf0@i6S1|ParU!kWT{bQho#R07IY%!d`BBN%c
z|GuB}5!v)zI+^_UK78m(BJ~kzV0ebqErXdXhal5Lb&#UiaK}|7-@G}UgCAg8*rBkF
zRwI)tjdI>cN|B5c8ce3TrtnH=zmq`;0%>LKwk<#$ZNMk1pqZ%uc{mK0otf>pymLyM
ziGE&ifYJv}6D?L5KpOa*@AQn+VkVo7@P?Y1GDm;nnNfk4X+7DfIGc9zJF_mSipMn(
z?v}F=OvBdK^I`y^)1ur$0<!hjY%)ma)%MBoVw#PrEviGDnGm8l=Pa%uH#6N-hz+wj
z&7PED`J6`yO98|qds8oYES27DMFOq_1o;iIP3C`XX{IfS^}T%}CNnx;{O8f^sKv2@
zBRU%@t@s^?zpbGqVwr~DpXZ2OS}lXnL^~ZoJIB>hd#c+|1F8h|My9akL^J~;i8gpC
zR(HV&q;J%ON??oe0r;&h1LyO!p4grFtMH6=(z=`CL+f0bpkWZdR*XXUjPD-OYVU35
z3@1OLmC{Axr;n2O0`~gjt&6Ifm4`b!Y2Lre-e({XW=X%8pYPze9j%$Vuyv$&u(n4#
z^U+Rn#J3AvceQ)P2Bv%}E2O>uEmkV1d&P>OR+6G9y?cu~2@`2m0|R^1@&PF=+&+g`
ze)x_2RZ&c0{0Hfi?|l8(8UD@?`GJ(<^Naeg?I!OB{2YR((!Y)pDM5x!TFrs9WX$)q
z`7%*LZf5&lI<}89!YTwJp<zo+{T02)mxOn{yr~xzMyU%D{|9!b+@L>0v|^H56#s4X
zX|-qGx-QUtW2Z&?;qGnH&bXuu3vsL@1UO+TUARl*Cc>i(jBP1aZ(@}Q<;bP95T1oN
zRq7FIdK9|2$IK;l^(fwD+(b^(@x`&~T%B7lJ6KVE3@#$ECpxvHaKeBjg-R}cNH6qt
zjt-0k+u>|f0c+u0rdBR|nJSJf_ORo+*zMEK<(|UQRz90;GMO~OF<iws&9-Lm0#59X
z0_@fAwZl*6^1{RYYm-E(b9Hv14<}>al<8b`mH62yE(4PG`sr&5LgMrY2|GBbcaJ2H
z#%vgBx-{pNgg7{-Dth&AEwJmeSU*JWN!?yUpv{SRIj~$4CdwpM)9z`JkE5hc5t!Ly
zr=GIXZN^PTP42M`eejRhy+1B%F3N31Td)VYsCV;ZZcq-wLXv~SLUD!`J3I=vBR2u0
z#o`Z?QarvLb!-P59g8yH)PmEQG~|hDeB^ji?#xo<HRrM6sYrxTLf*e4_PIdjn&h1P
zLPqM0?duT#s{!94%UK#3x9-D3a(<5aUz0t9%3bBm6a3!9K-FWv3Kejbtvdc+f%s#y
z8lymIeA-zAQN@!WU{X{U%M@l`-2QC7N#<4XQY=p_{b)X-ZD77SX-|11F+s<IZ9tc3
z(Y9%Om|b_W^tmM(D5nIPoltH8+KOByA4aRQ3x5`g+Fa@OR%E`dAFmi0b_v0fIRLhW
zg_+=hIEX<$IU<1qNWB1bNM7@wj9%ebeGg6891}mEwl|O;(@DxF97DeB+!zK)wP?+>
z!u*z>q~H6r$t3<NhfV8Rr*0$`%Wp3s${y)ZH|+1ulJH*nbyyLpYt_6w8XWmhMmfBE
zXuY<}<;L+_rzgl2{*1T0E_;(p^dIdEK8WRMP^u@zbU>UAe-Lo4`f;Wc%=#sRTl(C`
zTiRsWs$7zDLd-^^PI1Q=FXmJ$F#Z@~h~~OTaf!sI-e~TUif;Q-Xc0|_;12Myc1JqY
z7)3MyI<OjJ@g0uHE>X*<gUHKbL^_DydppC3el=x-*0t`K&k%>GSomOMrs`#TZWse|
zZADUd8wGSWV#lHOn%3S9!VL5beNV_?*J4pbibb(^eNRD;IaN|Nj^2%bh~7;H#q~d@
z|B4@jNQhpB{~U1nf5m(8ALD)a?@Rdaf5-3MsTW-EzXSLYUwl9>W9M82Q2&n+GNpNB
zo9er<uu-I~>3dKc)lC2FwbgstHes#6-))P)38dIvRd47}PH@*Ca+h8t*6S0`8bQb}
z@=*_NcdMceitbZhlex0L-4GBVx;2{>m~!nGj=RJ}hBHJDxyb7FSTqwbCJv=uZXETV
zXK_@?J`{oz)Z@V+Na<j*knl7)8$>7IqbU0tNel3PrD#a1>t9kHE=naL!6|thp$s?5
z=BY}HZQKotB%=}D4e6W~FSsKJVz18CDEW)5N+B6?jx)?DMicRS6FE2MXFTX%h1xYL
zT}Af3<KSYqUahIsU?Tl##*`$NqK$$repT2sJs8G7)<IaS07VAVhvUgh$&Fn%j1B2R
zAAAY~;V4=P@s>5K?IzekY2tFrnFjEf&dE;__e;cCVd){-(XgwIX8L1HK@BYPa?;>K
zreZfx$+Z;mZiCY8!dw17D847f`M<FE%`}=QCqDQyQP7}xp9p7|_Ze4l`>}xK<CnXV
zhb3nl_BMaDuJ)vIuuKDaJP6l@t_|X*K`zaaE2#`o*!CUU_Ky8>C+x_MXb*kP_=aXR
zT9Yd~WJKqtzt$3~dL)X9v$3zvJw<JjS*0)>Lj-~q;7fFLVaSD15$3v=uDKn)8(2Dd
zf4LlEmmNxo%Wv-S<A|Q`bzMR)1k_T*bUloS9*z+_@nt_LpE~CQbvs~I)uYqnWKs%M
z#;_HIohO_~73m19m^{L_Guj#PKJ7)~9-&EUMN+33@i8Fi^H}*DN)%#po;p`(17R<^
zU)n|<0X_;YrJ5<7{3Zup{8m_a2}G)OSTu%Q1yn|FOQ*e@r%a#5JhQT$E{C4s5DeAB
zHytn)jDSjfKL(1Xp<OnqZ$xuK@VcC)So7BnQ-|(B9q~6VvUL7LR85K0E!9=(u+%=S
zYwcH+O;=X_>*>qw=N7p!*p%%OW5B`H@|<vWt>t<y+fLnJW#5Uc{wW`|uz7ui8oFxo
zb-nn;qO9K^u<?ZxxuG+$?k)6c=0I?X$I`6vP2rVv)tChd)zNvHonH*_incCENYjL$
z5W&^+)}>E)$h4119O>!33)Co+WS>M^^y*Un+j&m2aou^)iK|g-HNqY@1$lJQ{7|o{
zPv=DL<@v9C46(*_k~OBn(iDXvPybX9hMvot(J>NZyd7V5tpI5!wC_zOjF)Ax)xMKm
za9+e^mCjJ$puZG#jNwTYM}2c(42QPEP+)D)^h|p!?1s0y#_bmop4S0-7FL?2NXn2a
zR81(UZ(^@DqP^-WjTjB3CgnTCNp}M{94AxrVT}B195>`}91cq1_*=+dlE}YJOeX$w
zkJwt(1nd%<DUY}JqPR^=*V)^670tXhj#)2mKdU+OgN{k5&0!SIAy;lS`*e59iWtjS
zc@sexq}3G?!<CEa*w@RJ71(%6U~N_`4e|f?ckd&!zfT`muY!xLrS8>^iko#cC>l)&
z>oJ@kN6RQRo@_Wlv<H`z)kuRW!oxxpZW1G%#LvlPey#YA5~x;sg1Ln^i1)Z%{KxLW
z{V(hBAA5TTpYy+bqVXSgE5!1ZXpuaNqAw)OQ8B7OrYLLzg37@`p=BR^S<49tA}kut
zwNAZemTA=ozA{I*9>jeWZgrd{)nYE`)4V@p3i2@FS$L$sm&r`C$ZQbR!L7?*R19i|
zj4=+lldNP{uW2#L#>)_+)(YTkdO|2i_f9gcd(l46fn!KU#CitxWjWkQ&KX-g+MOy1
zXV+PVG`qPIa81HA;5mSipqqqxPCjLG(P$;c@fm|0E;HZ(9CQN5SxdrF0s(;OG)5%#
zR&$)Vsdw+GCM4xL{j_`p#*`Kf80LKtlQ(5OgEDKf#3u9i8h=y^Y?r$%ST|Zywdn+}
z)%^x5;ag?KR@>@$Px379Uw`RBrBOio@_HX#lny`M48KHW?JZ{*%dAZH*gTg?aU7M=
z2lgzw&?!XvbnuD};2xtC#DLjTC^?LQMfx{tm@uQ`b6`^>JArO|g&!i_KareihSBE4
z1`0)z+#lYl5^W7gq{{c%OtjH&j43sDg6VI)WHM(P9|-gQs-=1@V=GY6K*XFzE6R4x
zrGvm8Oh8a0F+#sobWmOZ>$Z(}v?EgeWqIzu2M{d=kuf-IA8ERtZ0cxf-d{CcpBg{9
z5&w4r(h^8MACUTQ1OD&cg9i_4{NLV#`@5g{zfY3?<E;?OAHCm_Jew^h8ZB63v0l1g
zC2+SaCYJ=r&Xr)wQ=o-N2rmmv#Xw$~=n?_<=7Rk_k>6pMfhpX*l)Iqa1#?PWHWc6`
z)#1+nUxCX4nl#SLvnNGfAti1h3aoYG(LO2W=^0mNo;Xz-pLQ&RTA}|?;_!&(b;lPt
zQpJ%gz`q$hYku&pgiqqY2vS`$s+H+<fWkI~tm-TZ*N{xk)4@E6JKVkiLqNR0ndS<y
zWX~69qQ?bD=VjLusP+M06fpbmm$gC5)7uSP3N;7+lx*<Y$rmq9X$D?Ag%7SJM8WXT
zSO}7X1JE)A-JgtpB6E}a^s|F`R{8C9!(d;06AtsmH!B7h>j;AZzWC-S4#r0J#20c-
zpt2QXP~?0G{?s8YK^(ZT%YS<tcKL5_Z`I|$y>+V*yZ5!nP*>Pvf}#r#u53Aq$Jyoq
zls8ZJ^NOUX%k%{mKjFQoBWeG2!@bsCxVh)tdwLT5dNA0Xu8NFM7RM1Pd$=~+d^a7n
za3ODXv|5J8uaz!TpVIXyHMKo4wyHY2O?5v<OH7T}6V6~oaT=7rn@pEOv5yjJX)l)1
z3yZ7bepi5DIb|O?D5fyUWcsKh4Gx`~Kbcz-;H|G}aq{!j$gQOKMqtc-j497ZK_HpM
z1qce|gvE4booPw=i~b#GkunI1FwY2c#bHvyxN58|-97UZt)$Q{pAB_hrmm9psmKYW
zGfZu9!b#VBWVL2U(*vb~2gZq#585F9BDfsjfn?e}bwWw-f^*9Rp(3cX-euI63gU*o
zN896JOvsUA${d@WUj*}l^MtXGGNTL3f8Hn4KQh@Y&4_&tPZhK@2|&!TYgfSCPnO%H
z3r0*dv^LX65*Ru_W!|DcmuA%{hZpIT8*zdj!VaTFkHX#H?qHktVT5sf!}wbcc2q#n
znv`9OWrl^?#+Aa%u2lLt3L(!iC10%$)aiH7H@|PZ?MAG&T2a?)6mU)`>x`?4BIS)r
zU#cV@wuVBS(Gs;@)ekt2QFJciu?M|>_3H0o*L|3x>mvE`ws(WQ60ShNA}}nhk@EwV
zf{czTB7RKDB5~;uHNEqr=ESu@?iKa*r(fn0Q=1rYzNvAG&<>E4Xgc6Cb6W2`a^fh1
zJ%ha&q*X;q69SoGj9YRmB&kp!wrX&5>lw-shea}>J#e4Q!`;Tifa>q>uX!_gxc8u?
zc>KWqe!s2w-ul+p_Veb>xA*g2Tk$`)-ZMx+BytzQHJt8}X;6;{9Pc0a#G4`{T!%L;
zj~st$T^)&Q^X{kctYs<hw+-y9<<nl<Ghg?gTjnpIF49GsEnnAuORc*tzsBo3*H(>b
z<zCzB+%U72oV9eVZ3h4BC&s^sRAX;HUNt>LzTEPKW7%(+e6!>I&#jy9iOYf)L?@9}
znxm1JtO-d&2ICwUKe)V7He9097o%pEjG2chGt4V6T3}B3Cuw<ID%q{GEtBKb<jq2@
z7>nLe_0NlJ_<n-bVKxb}!fOZnm@Z*4#TN2{alkbY6HNfnH*}N{A6iL8L1TiRkI)@J
zxG?4o*he4llGmL)|Hs)+UE#Cni!u<wmEmp-2|AIiGr?^2qt`ZTGNuXWX@4+$S|^*B
zy;XB&1H*!)fL=|Jd13$tcb7AaG+W@2^_ru9+&q=Me9XhuJ8f^t7%G`B&E567v)7Gn
zCcs)QYMB2NSV&PBQyI6ORDn%;&F++%U6<X6E5VT(dd>&<F6D1D_Rn2(%q+#XW0rsm
zfAZu6Z`mhrPvAN~1|~gV5{3H}%&JNN=E=<uHZ(G&YJIZBFaqLz>U$9cTR%5}2VUPj
zXG4i|{|x5d1^6Quo{x}4R3}wKX`EN=mGgikOoPLqGR!FUxK{FqpYHDcxLtqtV?PK~
z$$^_fFB)WSY2-K^H;oz&uEZ`47RV_(_gCOqU;ud`P9zBGUob#5va45f^!#BiCcWnT
z10Z1z=D;AQrQf`EZ#Yojk}XOKxWoP@bSS@2KHj*gYQ98vqbvDZb+uZ-fGAbV(QP3o
zh9;>iUbwB$zB{jEY9<U<&-)nKhpkfI^Uk;xaJ6-LMFB8^!oMyXaKq)l_nTbrH(cub
zAL-iOuU8(-!k59V{pYy#UOf&KmJLG4+b7s7Mzj~gzze3Z*>)zK&+^<w^Ya;I3pkxO
zO&z40WT&y+MA)!BZH<uP7sPH9e@zmY$O}>Q6<?mCv>eW}0bOi~q;S8?MrfZBm+eVf
zQJPAab?@5gC5@WN{LpFE@U<5SNGn4ob&Mi7{$KICr|G0h;spZD(*p`qpQo(Q-G3UN
z&$E%!Rfzt@gj$JAgwy*pgJXX&!936yVa7>abI!2=#DKIEqPNnk0?s59cUc(3NnGQs
zL`$Y+RCvYv1bcdX{dhnhc+v6GHT|0f(K?LIGS#H_q-7(z$#+UKquMOHPA8cE2kRQx
z8Yx3l>{As<3rsW#!x^<jl>DGM13g9IijTF<gx3Iu!*M=5E|*w5*BD8os&YcR<9u3L
zM^OHGTB(Xv#>MNOIhX?4s5cz)7ULyT`7@Pu>junhiX6j5HJz+1^3yCmZ#7~*uemwn
z<;;7XLI1M=>p;^31nMXXk$?d++%>Mer4NdN&1;dM?E(`%_<Rozg2)3(V;%7+zo_N=
zu!dTe$r8u-NvMXqV<f=Utr@X}!mdeqVmMq<VuJs+v6%FL5m-Hh4S^F1kHlFRQdmF=
zW2BK#9U(xGv<ADMj#gP|cGIrQ2f1Q*`%~b}W#fz#jGFW@0fr@;jNZsW_3Rt56bYkM
zj3<eo=wM2+v~=-na@i|x3*kKl6ketnhIrhs<sY_^7dyOALL~BLk|a}v+#-15q@7ho
z!DzwS=G65_9q`dKNO=u3p%0>jksr2I9KvjC&dh-5id79WRP*crAr$E%HAmM6RUK2z
z>k`^TE{YiE!iO-rzy*z;Nkm<>%BXdjCCqW-2Bq{@WCCLqFE8jI#mNEADtn)j_JrR+
zfU<Os{>+jgEiZM0huk6|inzc<N4QE)OqIaW*4uk3EV}bHE0arIZzK(5L=`+7n;Osi
zHjTve0Nr@%ugtcsNF+C*77WG=%5@8W;D^O5^$AFXXps_+@w`}Y406z(1QhK|Qd7oT
zcoS{8Xsh9Z?ahp@%v>|{m9Wz!&m8g!E4hVW<Db^#Ce_%ZGT*E5X}|-w9s}HVGvv0B
zThDpfHnIyV7$|BJ9A>BkS2~R1J2H8mP>PX8QWm&30ynk}$e5J$BxWJ=C1=8>TT4=F
zXPD)JUviZ=aXu9i-!X5KI=Sj%uytwnYCtf^T}#dphD>t8p?X3_epkHXh%YRrE^-?o
zBsjn$xKe<WBI<!n#1J3hf$r?>?(X*X;h*k9hdr>fqr{rd0{nQ5Z-iIc3YqF!c~Rd|
z)N;G^Twa-#y;$27uQy-U4{j^6TW7^oz+R+ItZ!c~J!!}DZnf$ZBN!_T8*dDLRl6;X
z<Ne!cSy+gmMIW3acQFNn$?j3o(bM2q5jvns&k|5l5N>OG>W`p2(bjd(2DzB|^p1d4
z%sEYVOOooJ7wJ3;oRqzE;0WN2QtpW+<an2x_Cl+1gpu@|?mCj7ddYkp`YQ0tFtEPK
z_iSDXB;6BEWi~04o{Pe6hs`x>`$8|CP#g4}ik!HF!(p%58h>S#lh|~_*4ol4y!v<4
zV>SmCjD6K>b(t5*xwsKvYMBAMIwJBVt2@*JOqv(N;vsE+%X-;lh=@F@A_>*evdM*s
zl08Ey?SRsIP#57J-=^bE*89mS<o;S*p`)p!!g7H0&c!N0jdMluzPM5tdYR=Y2$dkp
z4KGNeN6ITPqH{JqV4586VO9ywEc<vG`={4jb8t0s-op!*l0ETxWH1}2ub{Sfddl(U
zjOMPB-O`|P9FiG6&>bUZ1@p%mg-Qe}!4kE#IEK=&poD{?ah)0fP4^?^VQgHR)?eF0
zhTbT<H7_fXU+7}cOq*r*A;L?bTwKNPAMZYl-#@CIpNH|?yYJyLrrWjCRwE)Cnu&mE
z?LZy9_gysnWAUuQ^8cg|u&nA6M+g<`cj!!q2sIF9Ck^S3&`I|rd=@^S!@8Ofy`OqX
zh3F+L289g!(2LuOX%C?AzdVt5$32+J;J)E&1}U4)V%tkCvjfF~xxtw+^Mbv8{1_|$
z!3-w;F$%^OH98xTldnlPbv4g;r}!ZJo?Kp{#u*9BqN-&$uE5?R^gGgnKszt@-g4$R
zs2ZQ(P7C{8m#kiDV$cgNgb*K>RPlYfBzt{UMxw?Zu?>dJUM%`KwVuu{uzv;F#HgZ&
zwZu`U()j-;bu+`LspVI(;$LTMz0kik-x^fmu|2`f1M4Ykz5@-7WzT3dVOwGvUM3O9
zpymwnfQf3+NXP0QOH=x_fB+VcO2Ka9G%hG<pj0YWJFk3i)3q}<O%}Ya0z|+A`09Y7
zgcdfPKiUBN9RTfBvJ5i7hWmtLnVeA+M|)lrHRma8e^D}WENV|l&{LHMa%si3I6iSk
z6;^l{YA?D4Jo}B28>@kjJR(OVPj8tBNWwG)bggNI;?su_vBA9WADtd)PhLPbA4cX&
z9~r{=W!i!H$)+T(pB3XXSz0D1oqoU1v;pV=e1;FUhS{RLI2$Cx_nlL`sC$=Z*-u>}
z3wCU$l(4c!@sYxpY4!EIn92~!j;)#^cQL<=eF#UmgY`parfl1U^&HX4o)>@cv`9?U
zW+DO|6eo8SJweE&P!h&LR?U%Tpu3M3kc&{-N-gJ9HX~o02tq@1Ly_~fv4Xg$sX;@#
zT;oO0<q$>;X-?!<kKS)62)YTP*eD#%g>p>g1jm56w&5>ACk8pl*#!<BuX8H|c87T#
zR{A)*zEWw=xzOQesTyPaSsCO4klYS0hd^lOS>e<lE}#$jtI1@+sW!+fp4{U34D21o
zo3It3wB_V=5%oyMjP?&>5+vj-2=q*Lc=)6svQ{A?TiN>(x81>^_j7~o&fb``<hb$U
zKRk|iqkD(Y9*YAwGt9p}rWe}eK<rxLvAVc3A@h4zN=Ntk*Sg`hwOXlK^9&ooaho(b
zaI?W0ox_Dj9tRi3VoCTC`$BxLNW{dc!2~{28vmDOooAFe2Imu#&$H*OY`j%2dcKIF
z1&4vIoU)Y?q!ShjVIXA{Ib2eb=JOo511N+0&##B!jpa6H9?K$(Y7*MS4T?nT)=F@j
zb6NquZv)ZTgCmJ4<A)!KeTC%;lZyrIXbPWU{L^+-YfY3_#MLVRQbq{mhMGeYm_ZZf
zLrgLxGXY&<3hr8HadVla8j04PzN#nFg~WkePSBefHjO|V00nW4z>D*QZ0b&0C}!j%
zo_f40+C`}7oMu%ejy$>X{#;{(C;Hjgb^D&my0JJX9p2Q`<~=2BWM^Q1Cf68k($L03
z>OFGUCYx?nGSg!hN(~|$m^@xU_ZXaA6wQ8bGqecLbLPyvsldp~grUCBcSOyA0o=x*
zKy~g24+7HBd3K2`THTFjJN}ON3h`{Q6vtd@ock620g6TFV+7SU;hQtPMhilZeIy9=
zN<rsBA|wf|{QXApqR_cs%o`V^{sl*<J9uvrlBFOox>=b(EN26afQ#aA%3v^AfV=Gj
z@Pk2UA~m`W#}jxqVECk5>*~gs$DJY3@FnC_QLSY*&wWbxN|aSht}c(eb&w*K4tT|q
zN8^DPm8mtFOI0i~Bi>)5ZvUFYY){}`KXK`64ay$Qk*d(@41YFnu>2g*(5t(}D9Ri;
z!+Y_60!#uW!r`f7BLU4GYqPyqjpll+yV#Ryi1t4@0ueTX^MoMGFsG?BRbI!(Z=Qz<
z8XOmo{Dpepqj$vfdiD}zw9BaxxB|Am$G>0t=XyQh^AQ=ZOh?2(jz3+xpOnC~qMnHr
z<|~X>dp#>-`e0$pA`Lq-iIX&?u=An9AuZ@IP&!aP7uL1E2JNj(P&wV}qt_(t>o~?Q
zX!@ov+wiT8A=vHTzXLyE$-CurpC7`PZTqD^#P;5B_Q4MP()G-P)4n|dzO2a4k5K1@
zA4m@k`$N4qolP{%XV3Hprh9ntXh*83XX;s<qDF^(5BMn@;oTVfjsG4P*edv?hd&7F
z<Me<VO_(S5#)U#N>3GA(DvvSel4D0HrojqhdmJJHbgn5A6BSelMh9oXkWA!{@!zK~
z3i8V6Sz4mMkyD1iMrw|wky0AeWfUZ>gf<%3E98J4#+9#8$sPCNuO3EU!Q8g}+&S(O
z!NXGxA{h49LWO;`cGn{Ha-e;P)+oo3&_?N)ne*Y-d7sxg+>oQ`9&q9#{Si6>PxJwN
zjJNTz>DwDG<`B@80EduHW5>dcH9F;iBwfUXPg5Xxy=aIpn_JkSo{S1bGOKl2U|XeL
zRFSzI?#9Byy8E~`BDz;G5DB;8g}8GNvs~d)qk>`~?o5*+<8#=Q`KWg^PTeD&oK;&A
ze$tCip1*tw_j@($3x+CTU@rqTPdV|nE7_dpbXb?6a)pjAC(}|ZsiX~~^iua<#_<x4
z2-2+X+wJ+#JeC;7jzzXJMg)JhUHRwvn+fH%)x4F_TIw$w(?Lv1-qzCL1xf#X`1gB+
z8ONy$=0VG~MTKinIHhzOsIiV#?~wG(vT-hTi`{`cEI1W+=p>oR;Gg#U{0kPqJw|ou
zG-+$j8Kl-U#~GPCwTX}O)gr23BXKFb_d8BI+YKN+QD&WTDEv|-HU}}p6)(9$3{sUL
zeVe6Moo+B0j(*f$dxW`3dAvqamP~+kzold2o+(uxH!A!F{hw>Ka+Qw%S>P*#&d0S%
zSl60LU!XQ-eWk3hX{)q#9eL^a50k9mk=ubiqHI0}HJO|jlvY~O^Jr)QufHl*3YPa!
zZv=uHhar=TlS*>L!e5gYP!%&lU|(`lt)UNjGPeteSGVqFILQy?!M*xk)}L!*jyVHa
ziXM=D2+*BHp1K5=V-1=K`bD99#xXOH-ZYQ{T~o@7Vlr|Wob!YntyDCe7o-ffhD~bR
zX!OeFcsp*u-9EkEuHiv5RXQ!u<O#xlIwx%!b$)d{^K{&(%GBe;YYIsT#tbFbR@;M1
zFq;&)tG=z;dX0CaP`GfqTj97&HBoop12NjiE)9VIR+EA!ly$12uv7fTmlpQs=`v5I
z*{~;nkK#8l$bP^7Q238GyxSgnb_5>0>1U|P*Q-Y9jJbjErP_{Mdx@MZ1~0YGUrmbP
z`#_3#o1Q205$8iqM%!#)ZwhuIb2*U?1uL3t?~`OE#@OkhU)$~%MNA%GH!RAOVAf;6
zw=-zTD5%S?eVhjSq_a+%Syd)2p$Q+~vJ45vHE~W2II2YPk-^2zh5jl*)YJs-3g=Ea
zbdhjGBk>ew=L@`Z%arN4Vj=s$;IgY}4UGqs|Ct|jYffvZ85OH$KYj`QD7;fbmb2vT
zxU<4*HIopl9UbZu^3_CtKt&JK6+o_$R)MTAAvo%=nCI#Ij{k*;u@BqA#m5OI&T`wY
zb<!M36MSZ+n9x(txFbCkdt@Ca9k(2tz5XN}S5MF#Cnr6;{b`zwMw66&{HfRcnH()!
zir(O7CfCD08PPLSBd~?V>@7M8<-FA97w!uEnDv@J4~nWPrY&UyA1st31i5(iX|$bZ
zjBC;mvX-~gH9n9l-YFe3TJ{Wom`t*yOr5I@%-D{fU$>oFBG!fDcTBwi0qpoGsg?F(
zv)5y-A!nx!ljp>9HL-unqPh7AK>>l#{U=H45=|LWte!fpOBKxUdD~bmxhJd(N)ha1
zudOPl<Ynxh#mHzt36&w=T60hj#WlVFZ10qV6N&Y*<(jCM5JnPs_W+2S8JQZd>xYha
zVrJwEt15!a)brDpd9RBdpIPTt)_EI-HCu}H*zThm>$YmbbbKp~bbcDA<Pw~)eZd|M
z1*p;R8lI3^e~4zSoQRD&N8U{{#q(@!MTrGZU%d<UsWT8dB=0(@&~kAwj~~jMt=>-k
z>MARNE%<8Ld2{33^T1IfmqH6C@tZ&0FxRwY#4jMBpaoqv@y5AfEmrT?)_$+)Uh6uT
z?p2)DnRp?tA>90t;ZNTn;8ogVuPhhgYH_(Rmj7)!Cg42!62DasE}t+iK3`-Kmgs_D
zaPeyv4d&}l%^M0zCD2tt1Uh$v{HG)|fsw?>aNT!}dmPGJ;6K)*C^p7}yaE4vckf`Y
zhX1|)<%8YN_>Ujse>->XMDJ!zgva_~-xy%%b_E6+!tVMEt^e<j|Nl)6ob!2}jwlz<
zaf#O94?uxkfByIGAKc%qo&VkYUq1MJ{y!=FSG-c%_Cx#(!L{-rf;lOj&q|IaTP`GV
z{OG_rnG}DB|EpLq=gW?yq!YMG2Skh%^<#+v;J|vH&yy)-`al;shvVhj83XBAke1`U
z1B5}(``J7_il)o6|I6OH#W<F(i$Q0T5YDkUCqW7zLO}m`dUq{X+b-X#dQaWvxU1aV
zb*65+?CR;s>^aV|y<D}X?X|quT6R_K-YucPLlOx^@W2BCDLfnnLWmOJ5eN!-;0O>X
zB2pwk;(-?;kRU+_p5Xhw|FZsd@ujM(d%AmEGhJo>_uF^*zyG_4CN_bX2bXJsyGHX{
z<9-x<ue*kxS=T7qt8k`LyxgqSHZwwdl%hexM9>`waxQI}%XTZzQ1L!5-ZV}|!h*bx
zGv1w6&B$fGr3+IFA07A|9c4e%7CtucZ;>{6a>sQCym9!BHn-nxX6kAY{;c6yDr{vN
zg9dG*I#oN^HK^Yld&o!yerWoyayisnFR_OYS(a-Q%62lO^&>r|A>**gE`wmSxyDzm
z7>uv`z-JC_imrD7h~gS<N)v0acHz0>QM*0$s1(uKJhKWv?5t7WXRYT4P4;!;2|KMn
zYp~Bg`;1{;c7=8m^%%C-X#a$_2Rth^As?dCA;aa7iHSc3{)V$=B>iLD!=x5~30)X)
z{Y_w>9PYj!b(F6uXGUlUuHpAJe*<&a5Ux2m1T=4z%GSC{xiBF9GLzxnBV4-n@o0RG
z@Fdlzr>$`y+O{2j(p9_B9bs1}9)oN31^E#|uC~bPflwB@7=%jY6w{CaNRvi1DgZev
z0h-yof{h%EfcMUmFPT7h>BkLuZHKJM>A-)+l<<-JSc<@5*E2DaEyj7KNpV*{!r2U8
z4=j;anfUN;GSq*@N952KJUR>LjB!@{nMLWE@rGACS@hZ7Uf7ycyn43(MAgShaZL#I
zF8+XB^g67#7D9h`k@=jFLlkPlbt1FN>@sicOt}EbdQgINogx+C!Zr2g^f|-0cqxnY
z8T8i)rdK-&V<oBe3nd;Z*dDG8LI%Dfb!{K*2&SGabwVT2$nsO5=1Mj%k8bx8?oeE=
zCE%l5W_ttxsBTP_yA+n?l1pqkT@jX&6g<pArAwd#B8Vy3ThZ7Q_RdQSUarOc)OCe!
zJy)$4NSVw0Dm^yYQkid1=^Z~n9k$wsUpu5T314+dkqQkeXGN1IsPiqfb|Ise3&4D#
zK?<MRbD9;6;D}FzWwl8^*C#CsWGO77G=sry$tR{DU6_rfR~09O29n<km&8IWL--Cb
zqAjr?3YNm5KDDo{9#RwDzWpn@A7=ZQTp$(OiJrkniv|-k)^KAhVq+_?v8_Y4#}zqo
z&g88VxdW*OA9|%z8;dn#)uhN?Q`XJ%FGF*bN126)6#mo5v7v`XN|#pen%9+h(c~%8
zRfEOCbK+X*?*ck+^DHz7!F|LOe(ql5fLN>s8*WkZ8(}~K14n)1ysAb`Lh9R}w}G~D
z*>l2#vGt@ip#jb~WNPFe%pv0%gbP5HpFk&cf~Me<th8n_hjR4N#>Be5v^%!&*+|Ow
z{2;A`-X=OXg%PBMffixLh&qr&^22S3!=9He1M{T(&ZXiMiw15J?l1`wf;REqx!$gj
z8<DFg($>YUl*1ooINR40rld89*dFOxOB(Q?h^_5kQb0lO<Aajr*ku+yc4@K5QIIrN
z`iMN3wkh0b2p+C2+JKr_#mvj3EUv%ygvrHW(@wGK42OY-xvMgxsHZAWPj!=>id~8B
zI;<qA9T2)9Lmth_nG-$9q5=OQf=@)+V7T3UPpG9d)G={wu1#!2Fm?@yPVQ|1T+Hjq
z4f#G}+1c~t`fB2^&Lt_e50#|U#J<)x$)lx>nhrx5w&{v=WKCaLRFQLa;CuK<Bsei5
zAOV~q7Wx|aX?%MWo}kG(5LH17v<>_L-9^t&il5YKF@yH~$abE6jgZsYKPo_poC>qf
zEyANLxx8`ObQ855$L*3IdU*!9*9tx@gp>hygpO;YPT6Mkl)0|KZ3x9q{u~upJqKFr
zvtMc~&RZV4uW^x^xT<xvsM$O1#f(#^kNtx@h?XX{cqYO<SaDHC<XW70%P|Om&I
z4xvJe+j4Px+aBsYls5{U8&pLQh*r{&i17((oDvXtDylYr$Q25JLr(bUiUG6c1He$W
ze3j`1jSQLjoz=HA0HgKiD-S0tFN`uJ0u9TV0k9Pvd@JEfV*GLqrBTOY;F>)#78|IB
zPEc_h#Pw1rmU5S9Ji-I2YjDfBFz$V1cw#Js)u}7b?)BivVL|Q(is-7eW{XP-)PCxR
z;R2uAdh7sQY^(%)Wy}uwp2A0A1YS$?)<wpm=&|g827y&+9ZhsNHtzSoL^OmBnfNHs
zM#M(9MBC%RE`ag&z}`I<msp_E`#=G?Oov+`A{a}fbCRCqSqM#B%~%l!>J#<se8K{O
zN75w0KA<f(-}x(=7Wq1M`BKt`$+xdDx~_Gy0f%-=xW3_5GHT(AOep=6wH3+=_?4Uv
zT3p*X6#C&o@Yl5H2fSmnFmfq)Cd`ZNot_KD1>Dd}#9=6evdO%_!3RUSZ+I7|-X`>_
z_=DRN&aq9PBAE^T{1raE5mE`m0Bo<phKPOHNAHD}{*HD^w{}zuay=fb2$x&amS>HF
zXN}|b{>gExT|Yc{cHC~Z>aByl`r+X>s!!Y!j6`nxh!J>b76IwZASZ|DC@qH=Hn6+e
zaYsC#j|CA`6>%QC9<13&-zz!*Qs<pJy~AkLT&y4uGdUXyah5z(f*jHjto3qAGc(*&
zv7P9nP}AT9(7(gFogdSpHTK&t8hfpivx7g=P^*@U&m5>Li645J=TOakW{*FBN2Vj@
ztWWA?4`}<7v2;49Toa=G0wCg7KC3@DP}U*b#7lW`m3qA7j?d;6^YLAa9H|qJ`@}?V
zqv+rIHcO<Rt>p3$_=*w-&H9tWhPImZv$Og)?RvA>IBSXXfU8I+Lad2AN72(3A_b|E
zoU$Gqg1OT#B4X-fM>a()#P%9uJ}DPK7eeF%1_f4EmzvAa)Y&RCM)CrL8UeDilnF>D
znF`HwKQb%6QePGXev_PbO9`oLFjiGQLxhA^$k0rZ0s4%#6^{neis*5?a>oOJlra(~
z)GI(z%uQxV0Xp=?D3>a2Jc$X0W8x;J)CpQm3Hy4{qEN3!lM*e7?=JbV4}c57fSNEe
z*xN#A2_=q75&*a@4Otqx9vb}Txtj-#xxTu@kWLe8fL@)*@ESSe0xXVi=}InnF0R$s
zcvOlXnm6ib5XY^1UD8!#OnsX8pd-A*#%7B-%Lwpq_5MrO(}C$9nj>KRpTIwK4s_cm
z;5qNVwa4q_MEv*KGXCd#(*MZ7J$!V-bvBJ|j#WzKa-JPHfYl+#L!(LV5ulFV2C$ef
zoeuPhk;;3#ashsAuxBRJe>S!p$Lg5l9^1ZvgyMxt_+8t9>u|B}0bO8c79c!+n~?rn
z#^fJ>`R>>ghUCKHmrTiplvqfGNhzFsVF~<_AuPxBJp=C3cVd%^25D~_n$3ouzp2vD
z3V<}Q)!3MP?zxBtLuiyPah|1KXLyXgu38QBN-xb?k=#n{ExkKItHQCML-2uV57Coc
z$`GI{Z4u-U^f#p`qrd?OZxgo~g+#%C49K4>m+wPkP-zWWA7aqa*#&9@`QbW;+>$l{
zn$xl1>dF0v$z5$F4jW!oRS924iy~GN+d?~8pcXg~O_do?lS@t$=}B(r2BD=EP$kcv
zh^JE_9&!7@71<~71B8unBRbT(^lV`g5LiNQm@F-UobXrc=E>96*Y&dogU_dDCoc~6
z8~g0ZH;~z2dnc#goE<!S-eS*B4)+^pO;$hNht%WN*};=@U}~ByTW><mEM{cj*Eg*3
z^7O3HY_gLxc5rlhcmS<^jSSpz>!8sDXguCKJl{V!ewJs@0y{owvBQI-gBBEPo#d%k
zQ7xm@V<%79QR8gyIeZ0ret6LOhKBO=pmmI$J%!%t?6iIcmzDFw`WZVtKRZ2XHW*AS
z!+@jQJFFiZHTH|pKlI5OFB-=!)_e{Zo7hB8zCLc8;ou{)eA0kXB3Ft%@p<kaoB>~l
z6AC}=!Q#OH5A&>f+Soh54~>@%m`?rdo4jbW+4%W66oO2)Uq7lpYcw;=n6oOdFMH=_
zjUya1tVHwtNwam(I&U@Dvy+p3T5I5qUmWZ;np+umc+#W=J8w4f&_}CIT|*nNTFou|
z{N%iOK<jpJ+-jViou9T2PL6Z1$zQ|5z$oibb)Oda<e1ME7U|^d8*CNVfi@t|zJA_-
z^fO#jTCF;+LK9YPuca4*4q*{n`ZU>b<Jlo{$a@XUKEY<bK4>;_uwe&HEC6j{zhBp(
z({q|N?h}lQe>{LylP5P%+sO`|vikmu101<14JV*^AWjP{+un1r=6L#7@A(H&0%IZq
zGuveQI;N4<heEmH$zi>@c33~H?|r4k&5^fLy8gd|0V4l6O-pJNszyawCB|Rt^br&4
z;cu0hjBqY5o-U=0dbL>P3Myv6{i)rsr;i`&ZVIqLL~w~!(9Jr~C{3_xag#R9d^%>o
ze~l7r=t$U&CtMcFm`=DC+a|m8to}}RhFZ`b_e;Nqp0n^~+Ya3J@YSF=x*TP93E}_k
zzs`=nI^13}ck`?c>^)x6>fNq2;C^NM=yLMP^<LpAIBCAjC%V15x+=OO%jv+f7ac2D
z%kG}QS6n0BYFY&-2dhRy9MD!}caJGatSPUFLbgn1pT{IX<j;wQc6X-@rg^|5NX8zO
zFZIWsh*Ify$HGis9A`yl#JSd1gBfJqZOaJ0FOC2k0)_@;d?GvykU5obLaScuR6Hp<
zAc8ta4#^Fd))CJ@gT!xSYNy?-q={1)E-gG^f={rek5J3$D$ka@C#?sX1$85uR4#uP
z+7t1|B8{=^E{7=#w<es8dc#!D3fys|LtZstyP)%dc>~MTv4SfLz)VHT+I#dUlyrzy
zLO>(sxxU%u7Tb<C9b63V$P^cV`<v8bosfHE;Dj*D!f0THF>u1zqFprPHh$58;fM%)
zGKMt5xf<Ig1*p&A(-4WKaCwhRIxC(}USA@6Po@JL<wz{zU)rOQ&V!`^72hk`CjNA!
z+L3#HQ_OHA7TGahY#GdloHfv<Wp`nOr}&jaPaxmmd~{cbkyMl$igC|n<fVcz3*0Dp
zr{R;9vc{0J3)hgt*1){*DHuy3#1I&dBb+3BTXRRj8a=zM!EDs20vsw1R__MB%nhpp
zEeRoIv(YBdQm%#@4i|T6^eGC2+)-48&R7*4&;>Rc)EgQ$7tZlGmZwVU1jYh}%MqG`
zAe$T+Kf;a01}6xV8jy@U^0t7v7Y-f|r|4$tbOqQ=oAA_0wgs!C=Gr9j5c$JRt_XKz
zrra{1^$gkcguVbgw*wIFkPRe76eq!3uZVu-NfH*?N&|Z7!WxR#%-=J+ZpUuE*cX^6
zlo4{F9616d^$so>@3ITapY&9;$pLoBd*{~Yyg^8k*8HiC=gE^D_m<<kd6ar7ABGBt
z#2iuh_JB_S4N(I-7zYwNC{>ggUm*_%MF#S%up+-P0>u!P<9!~yL>QitAACySk=!kZ
zI5YXKz_0913U;J$-ikzMu}V0kQ%nhp)*!W|$@9G)a3U{*ILKrf0V<=J+apIwL|N~F
z@V0#|yao(>;g^D1k>Q2NK;Gdx7!og`TjQXB&;M$*h58Y`A@p#na!Zs}fGNGjDWCrO
zmWca#s71q(6WN*<XQQgi-KVMxSS0kt2S?_Ta`uqYiMKA5sB@-rw|Qm+0{MH1bSR)j
znEZvqW*|oqcA>l)_cxScTq#ZZR?#+HR!Ot=on5^M-Ml&<^?(2v!%DtfENS{Vv4MSg
zVGW0wFVW6&?aNF09C+}%=ihj*)_nY(NsIoFf``QdNvEf`U7*lA%W7x#>hSC-D}%W+
z<~pIdPH3(Zn(JiB{HWsLdGs!jn2W@&Q_>SCS}3`ot10w)FoxYC>qTQynYl@U*I#rW
zR9LNdH}cAKE84*zSZq!sclw;D+|SS6$qZoG=@kb3@L1l@JpoKzcW`P?p2u^h@5nD<
z^!R5~U36pMqRTCAxf0uAlZ$n%NbR<Kzujd(O1Cobt0)Y14RmL?$3Fwzc3#Y!2UMK3
z3xu@`DI@mpX+NlgCRy-mbFzGcd?4&`>XTBa3#GcU82XH;^W&J_o^MjoM^R@AP*W&8
z;WOF1M(C;dBE6;ju<|I1;b$(o<;&mxjYZhVn&_YNv*9Cow7QpL@120l888CWy1tnB
z{VcLUks@7LW{;Pdxel{p3kmP7GMZT)a3(jQuW$}E>Yxa7CcMBzFTg31Kkor_{`;Sl
zaooZ1bFufo%Hzi|`_J-Y__}=m`$*samQR1nr@!UX-}32i`SiDZ`n#{EKaM8wf#d^v
z?o+QnTu!b(<3t|&wm&NrSQyJ7QNMkFDzu_6#L229)NT99@*J4^As5QSnFzbdu|i>Y
z>RV!2e@mq210i&BtiB!aAu1u(%r2=b;T}zhH)LEFA;u@=Lhk=7+^TY^SS1~08Ux1t
zed~uqt#uMQ3#y~Sv4lR{=qaXv&Sy=eN<$39g3czn%vZ?1ftLR5zkL0(4Nd=%VVn&+
zu(;JB#!`?S+PbeLSm8t48DHn=aRwJ!8WW*NloWlb=0=V0XuVpAx6B`SGWxM0yCsYS
z?b*OyVAu~ys<06rct$?ZVNi3Snu!YczOd%+jBhAhkadBgL{GZWAC4U?Y4knI%4w@O
z03N{!4J7b&OH$6L!%j;URiPMsOO=)WJ9)4Ze5Vm^NmJ)SoS1bjo;dMAVC<++hT{Ax
zC+N>k4`Hb^2#a6=ap-OS(6pH;d@gmZg>glA<3m70@<lIk3q8VNYx1d7^Frc^g_94X
zhA-li#ZR}Ka1kFl1}LalgIm_vUo$;8hj6u9ql^Na?gI9MzpR4Uo7leV;iDRL!`D)z
z;-SysxJV6lqt-#@$R{6P+Uu(nQ6HYd@e5yN41CPS^aILp9V_>)-V?%|T0SM*^(jH~
znyP$XbT3_Wmdan8$i1Q#MK9cpJ3LKq{s51mpT-;KyMMS0-z`6q*UEXDiMaU1(c#R^
z<R}D78Hv}GxsSyX*{cq?U6-GHlgX#9zrq32#vFT<J=Uy<ETx3Q)NV?3EE%~;xWMOz
zXz5yBnr@1sHQe+qRTW9oJxeMhal0iM`BFKF$7G>gq#{y~_&2IU5+d`6^I$y?Ia<b^
zqVILzJv`4xzaO?jcMmdc9}=Z24;k%G%NJq2+D8;HOFXNF7>na6<&N8rIM56oI&>)-
zpFGcro!p%Fq?-D0s?)TT2UGE2RN0x7i^ADg(Z>L-Td_nlHMNV8=MzO!Q-Q=K6Rqn}
zGI8KuBoqB#dO=$181PCmedJyl<>LBuvG(LkxP`#)x=(-hTpz!5=}*hVq{j6Kejnn;
zCiKn!!0%J{>iTx_@Y&+-%&eOh@a(UAM!z$K<iQ?|fkiicVLgS0u4uk$_1<3N5slE0
zgcKv)+9GsMXAHn9!R@YWB!336yu!7$CR8lxK@8K_%LeZ~{v0r0{(t;pZ}0B1{{{TN
zRyL~ZG5P;SrB+_b|KG>Y0<w<1^ZmLIafsWN!zk?)i3xmwIAGhA;CPeMC#sL<o6P9Z
zp-czOFD=g|8P9<Ac@#%%jbAk2aH4lLV&YLYL4B+jOPo&8HxU!UBckn0y4^hQf=8{z
z%ckW4v%^W5y1tOm-Y^FkNHB*{mC@6H*)vBnK8m0~(F!tXCf#Y3XB=`g!x`^Ty+?Pl
zrmqa#d0Y`i70^lOrh}+4GQ>dEovx-Rlvk5Rmi>a9LI}rK&j1QDnXh=vU-Hm^Zxsc(
z5WLV)lfAHg^vcx?;9Bd3wzH%rX^b$UAiJ^#u#j;1V~Gav)-y2iAz<-}>&10m`J&q`
z)QXQu<RiwOBgug9D4|{k)b$LS_{BCwCh>o<D{kH7jj#wukGeL2);ipyN$3F(u0^ps
zOzL%>U(m2Q?u7mqD&o=Fn%+2fI3l%#VHRw6hv7tcsFa?GQ>1#I+kIwm0Yi_q+s}^A
z_uB0o`p`7wx7`~1^{-{#(ZTV_8Kmtz&Iyz2&(XDBpS7PI9z5B5`LbPaog5wPwV#|H
z9JUUQn^Q_5*K%;&Dp!8ShXKdQCS!g*juY<5i4zqH2#6&Y7_is_bm@X&ZXF6Ap_{Yg
z1y<PP_UN|C!#Dg2na&e&tl`ko!<MZKkI}2UPXst{!=_wTtuPt|?+bnQg&rl*^4qdT
z>=J!NU@ro+dzQun=Vj1eiox2!cPO~Wn&ZF~LOk@Fa-+aTzGRV5iiBiie5L&5*!K7f
zk&p^0K5)PBw0?frg1^VKw)>5j?bbJ^4J<1bNHDmFzGx_fMerITr4`}0dEi|YB9AVK
zCTBwAu}Yb3REPz5hf6nd@6%{`CjC$G6m%C(U=ICXUth1q{Xf?#8%z5C!TEnq^Bi?2
z$5ENg_GNANKt{9kH3uA5WXXXJx;o<v;aCa{nh=J_R|!&E%&ryJi^_$l9cX@l^K8WZ
z0FOr(p4lVZBx>;BTo#X$i4;8#6rBPxqiGXqkB00^aeEC$sksyrl$S<D75gUOvn$dZ
z;AlaTYR$XYX2jD$Ubq><aR@(S0Pat2B=RH+4>NQlHq9g59ejo1u(*StNZeuE7`lgW
zb|>zB%6G%U!foC!O!iA9cGNsqnNxjNareHousnDf(GcnE0^LzXd=8QSB_14`q79-a
z3F)**9|ZZ4;OAE0dZ~h~dAQ}9MfxOyb+fyqkVsCZ^2#feJgwp?$WL*&jrq_WRk&!h
z=NE8gHUZ;vLPMS_YB8@J7{$J4q03~8heRa2j~fzo#YGiV3{T#RBf>D*p&GfkonpjY
zbP#Cyfg-9HVude$o;w@S&Jxj6daEP;+H_o@j=??udLlUttxb^KlW%>zbjX5-W7Pb|
z@Wt!8o;Mz0EOIWDgpGl-?aRrCEnNBK3KS+q9PJP}5$93TVTiykNk7yfR~fozOZ8Kq
zo#~Z6TVzdwX9VZI;5mszTRNYqJW53b4e+-OIc0W8j{an!3|ut^krd%#b7%rj&0XP!
zyCdd-hfz9KlU7shwn}QZ4RnCmwc0fPw&`88M=miOxg5JIZaBjO5SKduBV4?2#w6Q-
zgN{6}+x6EtmtQNE@Y9+!J1ErU2z`>9f1Z+&-KFW_v5#mKk!U@|>ok?dV-6fB>Me~u
zUtnkdlGslIKgk=qP7LvK42>#5=&wdRm7-g5KwR9TBPtGc%@LeUh4%^j@+U$rC`a;w
z!3Qm70fM**AAUwqi4@XAL_rl-T6zwQY`XNKjR1n2D88Oq1B3$JjXDNH5j+i}!iI1Z
z2Q+;}+|Vr>d2kPu>n~R=4laX5?tAFi$=+y*=-JS`L2pkO&?xm)4Fla?4BU=(mQz8X
z!k)!SArQoB3MjC8cAx;75Q3yNgt$1O!Ol~ND!&V&|5g$0L3z%DauRF`L%0vR=$N-a
z%U9?iTmx#`sf0&g3tu557lDP3n1q}YRFY1dey`|Q65ryt%I{ZpY7B8dKYi2ywuGLp
z(A||d3FLpD?lAOTO9arm4m?wlzDrl?!Id{y%*Pe_{Jj@1%(TuZl0Z$)OkH63^xaBZ
zYd`?uTGJZgtYDbo^77Ly9nq01C*h5b2MO*;FOl>(n~vBtPm^57Ae8ZQat}ZdKb#~Q
zCgW2sxZ(cFbZ2kY4V`+7%T68*BcvsCvIWHk<WtoZI|M}r?#0+zv*38*ho~RU5Yprz
zKWHCRGw*+t+;+@K093Gj3H$38zxVH3|NC*J`Z#|7tCnla``>$!|E{eG_q&K<#LF7j
z5)LRprF6mul%)VCfpzG4M9`ee^4etuDqScUs(63q_os}aYO;$qN^k<VYvdR*C=*zE
zF7?9zHxG>LWjW6-E7=@!DR$owdBuU{TmTK&Wo2HE*MxR>t<UA?vbiF~v*;Ns+dX^2
z?A}fmk_%8WE1&uB^X~R#b+;9tD_r-gyW49}ad#!hqf&=m$Yw+a_798}V5jF6QS2S@
zwRlP2jYtFVI~>?}uuJ|BR}w`qDvYr*9;|4cKoZ$`@Gus2{vOA(Mb@{a?;y7nO)HBu
z)y`$Pi|<ovonfi?s`7err7g32X~=EE5W%md&rrNEiOt0(kQ9h<@Jv$iXqgv~E6TyZ
zif}fL0qGaJm!?+_jIyeNyDNsYvs04dQKp(H6t<#0w}G`)O&5_Q7+JfA`ypEtQ&%mN
zzk9TUQBAdWkjJsFp`CE;!T~Wu&0)73&YX!_k9M-f?4xjjWyLqDTt>yAJV(ZZh&7L~
zE48Y4OO_v&loLXx*$~5f#Ilf?g};&9oLW2}G6t<V%tVfP3%+f$$`*U{XpjrXuC0r9
zOmNiUM#$COAr_pZ+xqk)YrbOlR`P5mfLpn>0wc85ptOR$-W~#*v4YPVuD7C<(Lf=)
zJ+K8-uFx!S#%c>-5mt8LP69Bzf{F~7X;Uh{rmTEUV}RsLTId{6l*;U^=xd43L`GH|
z%xsQ#q!$Q)&>NJFHxu$;npHD+!>j=?2Cv_4LZUld^&fsl^!JVU4p((x8lP)O><zFk
znG7B`&SWWufM4SQ{@@s&2%q253;oa~=P~%&4&TKs!CwnQVf#992#$eQG~7H~)%eYI
z<O0U92R#`53)`o-9X#OKmY^DV^0h+yW?%|@Rw!c`WWxwVY@}eZk~$G_pl3`=w)d?q
zKpY^%_)L}qz|Y|e4{B4Ylp;Nvob-*A&sTCgJB2db#bHsl3n)zhXs~+H2FH=UceT!-
zBaYs~RmiJx0fy4Pl=Uwa_+szu48mPXY=qhSo@;DbjQd3H3)>pnL3ZWwJUGMQc0^AA
z%Sb{!g%*k|MFtTb6b)pvmLa76sxto?p`nXWS}b)%cSBX`fx#1Usvs*w`zu5m#%2U7
zQ0xNodd;hCMw$f>8eL~Ou!vQR6gKqI1+^}@uwu9(O6OT$6l7(f;pvUart*P1+KkNS
zksMmuN$`0L3yfdy26Tq-naWLv>5LXi#SV_mNel7}xJ5$Uyk`y2wQf!w3>jnVgzEDB
zNNpC%xm&YNgj?i&&@sXfTa-wnP4>Yvihy%>lt8Hn$fPl#h&oSh4ge+7S%o1KB$t=f
z>+0|htsK{1w9XohlmbcqD^}qAP#*;sXo`Ut+3_IKD~xeZucr7dKul;@IQE=m^II5k
zN}IGGI@FMnD6Mzbvqr!?3)(Nz#3n+j_@w1o*CW&E5z6hGK<u<E+dnk@K*Pl%eKZ*R
zZF_hzV+|lTvIw$D(f7LHFdk+5!K5<cC7g?FT7@gSM<Xw9q~%^p_^(umH6*?crLZEf
zXHy2)27XGX1LmZ8<8TbOLKx16uaa}YM!LWnTRqer3u`hIMP6aWlYZY2Z72wl6jr1c
zIaw5}k=}fasZeb?Q<~%?TQ=Jr*xgql)1G?`z;kgiurTw<I0zt3RTp%gxId`ha)I6m
zxaLTvsNQTYF@|H<FH{IIYKLwrSc3cG2bNIlKcg7xv^G&)A}w9eZ(zL@bA=mYD&7$r
zd|=2rIl+%&)s>vi@zRB1;3N03gC?k>Q<y<YO`I6Me?sj?E1N6nT$lDUt?weK-lO7@
zcE3SO*&R4aMH|&H%x{S;e6W*Uc_fDRXeIld#u&fRXKC*2P}B?HGd_D@n7QJ}L_W^Y
zK0w#<Adwu|>O~hpb`8Ue9y?WfYewO^Wc5=9NbFZ+Z7Z`ES2ZCI9M;ZHTg^E12-v`7
z`OI|eAyqkWPEk^4`M!2hLBIv<BQZU0Ocz5_w17J<g`>E|$}F-jw}T?Jly7oTh(bzj
zAmE=tD~g5-VnAqEXqj*Z4k;o^RUHh-DM%E)o?)O_V0|%*WYvP());S9J|yyh*SfFv
zG*U>sEnit$cLN2+Zr@1IOkj3*R`}UpQAjb$dnfBeWC~*sS@}dE;EA;>HgMojPI1}U
z&$FD%Rwr1jlcY-WwCc;p(Vj|6nTuTXBgL1woA|v9jH0;FL>VZ|tVA;xTW_L__#_OV
z?B;v!5IO%mwwb5vV#r(3*PTK|W1EznhR7?UB9tsAsySj!and}3is1;{am=@t*Ld`3
zi#a=b))p-i?cVU>m@{5;NxCRn^n;rLkYX;?KeYP6j#1uN<zkgw4w$<pN0WeWN7pNQ
zO-|DlQJ2XPVjd=UxDQOh(hzY@Kf*ZC8T_`?>YN4uDA}jC@qY4zauOV4e#8K`!c5Lg
zwTK|kSV~jg#}k!`C>wy9+3(GX2?)&|1)KQJ3n(o+t%~aqPY`1*H9=Q|&(mc+4~!&-
z3hYi1NLJ4}9~s#`@PAp2cBJugI)|CY&}lW(c+FHX3oj^d$OJ09hWHZ~XG~ccooN=E
zC29&NjFd>!m+YWC?+Z;gGM9N(V+)lwDt+8<So*y`qA1B{An+3>a@2E(k$7_5={-j~
zYPu~G;c`MuWJ*xO44renHJ?SyUXo{;!Y)O5r8i;_CUTKd<ReF8rtnFjEHX~tQ%frm
zXQKF}T$wt_G=*#|WKJA83IHLR#lAh6)qS+sw0`A$<)tKYm1WexCWV1tnZgpPLFtzo
zk}j+S?g;v;mH-v!66bV!g>_Xv69LQAD?fm0p2#x)ott_6m+X(hNpa?ZA<-rBUuN&d
zh@mqSG$dY@ws?jr%kV~o8sDZJu(1=^1FqbFtH!c&sj@J69*9?|8w#x?{z&89C}b20
z??XaD>TqzLDHc8Y<L+AgnLMT%{J8=^(OsU|MXQ*X8tla2p=eG;J|?v-5ofC?x}GZS
zP_uy@!=r!#VWb@dRVs5UF%kDwKB^6lLx?X#UMfnGrr8DBjPrQx(Y7>flij;CM*+t6
z<@iQ%a|tdPXj~{n9Xr0fq_4ynz4_-YH90CC!G|gJ8KtQD0To>#D7?d7%jvy_tN5I&
z^62blmIwOINJ^b@3j8NGdvf0apWuSh+tEr6dQZDXl9;h)`@mA(Y-SzTv9fQXqj>{c
zj>aE`<$0Re!wOpV&~nFtVR{!d48KU)g)pUZNt08&$q_iC1R-|Yi2-x`<pHZK19bi1
z`X4>_`Ua25yR!c)m#U?>|G$me`s1bk$NSO$ke-i(YM63_w%2TOGZo!E(prXBU`@Q~
z`TXXK8eD>!`An74Z^hXm8FKr-7B=5mrcBd$4(W^d%uq3Wr8>G$KWUxp<ruWvvnNGv
z_sz+%#9WBD`1M`&D@rR~>C&541Lq@#J<7A*pm=3^sD(|RP4olDbtT~&dF;VvhURsF
z?yQ>|C2NS-4&PY0;`=7Px!L_2YJOm|X!-T^spT0l^LUfQD*TeWiFaH`#kur7^MY6u
zeIlf?18mi*P#zZMifAz{7#gmoNPbj2u6BoN<D3NGGBGA@?9kUJz}`ll741xpr98Su
z9Xq?nKJU^G$%T1j-c444T;L|<-z~l6Dx?ADDF1Q>54UU29S+Gt1P+u6_bn}=0`3+9
zw}w6zRp4;`mD}t`zZE9&gv)`TTj4I0OQoN38pWpuJoR8;j(lsA$xo4N8r~+KdnAK5
zhYJ&L&V}_-tHkAEPCzP|P@JCuL1H`Zb-}+hfiiB=8p(g&MaMKsc_#iZme&PcEezdX
z6>eYa%IdtbgWFYn6I*#WK+)ArB*6M7(=IWMb|E}#MSz6l?-*>bfz=PRlT;w@mTCp%
zI<M0Yrl7Bl3o`6foA9(uomE&IUDK|yK=9!11b24}7CaD~;2PYWK>`F1!Gn8nw;+SN
z1%kV~+W^D#&-?9@ebfhi(ATxPyVkR+?x*U$abAz;0fBvk5@CMn^GWES46)dtRQ&qw
z^_Q4N^{sh2Nv~D7^EuzfBrXY%l4P406=K#qd|sl8M>~>YJ7E(j8)roNhZDt}n?Yj>
zXK>G(1LnSgiGvRqk(F^NQBHbCrCSFqfJ|pCXHbNREN2nB88!lv^Jh?#f95kMc5MaW
zlczkv!**gEN9d|V?XUU|^qD=BYKTrOqO1kEDPq4wPDcLiC>={54`6F@?or=qVreF@
zVi)kE)Mjnxb^mMO(du?X({S^n8IND<(v(B;!29mfJ&;e3{x92$;nCdv75y!WAqb+!
zL(!;=J5BKT6{#~NpQ8M?ZJlnxH{O#u-_&o1Ss{G3l^AR1Q1r@wbophzve<BMRL$t%
zak=12c>AspaU5d!AG}twlGM=8np*zo7O{<e-D!p-nUNNI$l4zJy?@t!CfkOUZ%Xyj
zYz42&8g{bBCnGf#8C0t|K6*(NVz!r8nR~7qlu*X+>P0jB;*!Vfw4|llP4-}!YKuSm
zEjI6Y?59>zrMB3^XD*H&^Hv8gL>W#4;yT*Rm#((OI1|jhuF@Lj%)1Y9$=D8kbKm2F
zCeDFC|8H<@#Q`Dhyw#FHC7hFWnVCJ0@~3Rz2!WD^7YcqHhM$q4Xns0}kdJxtCvDZR
z!Wo^>ks;vFm#r83C%RO)kd<DDYQu-r_vVmcq)YjM--%4|FB3Ca{JLO#g#^UT%HxWt
z??JrZ<Puq)c83`>{l&hUW5ma8I<q6$F=1sM;I|ikYRl54vfmmS`uE)k>h>7JIcg=h
zQQb;NI&MEb>xI<CEU|X5Al-%nJw!gNUJ*cXJU^p8gIrJJc;Mx;#`09j@V($m+^wA~
z5)_M|1sh`SZs30#pubnRO@OigrHsrcN91h%FxGpR=CDUJi6)WKS9->1W8coYTJvVq
zH8CLH+Z=Zg0keeHW_gr{C}nBFNlI<zsxI6yp~w@yPEMM$U72#q&`5$D2{F0Ei<@*i
zTuxqRb(z;Q1M(j4Yo>t6D#r9__5&N{YD~0=V$Ha`9C!Tdc|PHw1VQ2K+Pf+~)v9oS
zdleXTRr_Y|hmQ3<Nv|hBX`yc>>~~*P>ImLD3`o5}@_XZkB}Z#hTBJ0BqM$<B%Vcv7
z1iRX3>Ih}j_2(ZzQ}Li9mn?g%|2@!TT-JXE7GOD1e%yG5&}{5}WBbOz=c`B!-cG+O
zx;ouV7k_P!GkIJkPnKd~LeAG!;;`0@mg|!ePC!5X@a5}jNw?SPXtMi8j+LsziP8~6
zfczSFMCvE#*{4P+6Pe6tHYzi(hme5V6Hh0f4191k!bZ}gALQN7J#mM^AMDF4H=XBd
z_aQ#9AnJ-8EiuF(S4LLNdi!?ES`KB!EO{LQA@s(xQ5RkwgTBElSTez9bf~@`teA+5
z&I5CpbZ&iHx&91$0{*sw6Vo5!>s#39Sux<-9zIf3g+YJaBh5e+(8_UycR+cge#ZJt
zAQ@$J7CxTpU9TuOFJnba%%|_w>1X~l|44GIMHca0J5w%`;ZCK*m`s7P3%@;#{P}0u
z?`Alp5R+SH<U(aj=8)aODYSqc*@PH6@^OPyN1nY@*Tn0#s@P*ni)g*U#X4xUaDD8l
zdWFBaacj5bG@crQC$uhV`vHcQ`=I2o&MH|YkQc=d!|byBWZKBybunqk3(HewjeKbo
z^Y|3QqTP53DPsB=n=GBLJSH}6dZ<wom)QNMX4>Rk#1QXx+v*8Bi-r~&b0|FK!9@&m
zgIL1e;)&og`Q{9bM4Z==<;<s!0p#CXL3R(*v9_g}qi)uojI5BnpF#ur5y6-*m1V|{
z!VW^>u7rEFB)$=QjGk3H_fD6aba2u#`$<K>49CHjX~4j$khByy`O#PeT!h0ea!(U?
zL{|;H{9wD6rU_Q)_Vy--PWmg(XANmnh6<6DMr_=;Z&$=dPnQjLgvLC6xoN+5n-MW=
zA8a1g(+^-nGMF&OzEF@_tL;14>b~;U{1w7_NYo*a5`KdMQY4PCLibV6I<=I$O_xEh
zHPVSjM*34YYUrbTQ)PP4ao2dsOZzEG-k5OmBwT#YPs9EoukJ=p55;rQt3I^*PmYe0
z$tt@=azZ%^Ey&O7+CQ#S=eafLAKYr4YD88$_O}~6hkzvUy#6!(9L6%s%92ST6+6DB
zj49l+X;wBHEd0Po|Kfy;7k$2w9(4BgQ_iiKtT{VWLcL6d42!J32QazeI;IAnsqQ>}
zbf+qMCz-;pgu!3F)NP~Vh#S39tQjc8*L(gqB7v#cx4w04T2}U2I;;h>+@KMpkG@jX
zvb}ETF2Z#wiry1A`g*6fSVVb{fRf{n^$gT%8iFGyDCdf`cj9TQedP*49&?uJ*R+bN
z-kVRfSZ^Y~Pc>H4(_an#u=uP(B{^3WHg8rKH2+)SoWvttb&6+1PV`|lzS>XTn#lHA
zjqt5-5YaXU6zde?)os(w#SlUb?JfUV<s4hxawziyX{Im9hH+a5_x86xd@15X?}VH6
zx5bDLP|B4o6hHl4me8D2ITV|!1Up5UTTWz-Tq#cy8u0XQxtZFJUf%FEdrNN-dMEPs
zf9?oN`ppQ1;Y5kPQNp65_&Kg%AJ^4a7V7p<jDf7M3!=*0w7_VC5?A_t-hB4Nr+E%r
zw6LZl+ef$l&7bsQ=dk8~T35%XTUV@mw9oy7xu~2RgX7i{_N0CbksYAvewTpNSx0jj
z)!^pNgE$LFr8PPQ`sZcW8oV9F`5(d~ujf?|l+M>`Ye~}UV|=K10h&i9yRl-5o;maN
zGdu(c@!g3J+rIm<2)s@JshD%?1ukBN;l}e80RASP^7A>H&E;{4GjZRoe4<7C7rR+D
zMJC>JFTOE?-#ovB&E<pVAFj9ja3SQuTKO`#y1yuTlJbVm*$m*%E%~LBFWr91RS@nT
ze}~UBzVZGmdz?YAc}8XN_J;)6R)*f4hOvav5O+CBU!X3q1OC*~`7+a%`kz?~x7v=H
zK;}%SohW3IyErPDl-{B*oqx9cu=B59DL+@^kMSq;MjAnL_Eprj#K?d&-+r!Tb<I9J
zG`!mYIu@<ybhO0soPj0z%A$zP)~{WwG?H?gxEa{LrqsKfZn{h2?q|dL<h4@vX`(B0
z*1rb4aXgs0cDODM*vcqWuG;JDdnF`;ymTZ3tD}>6OceSF)jkUei&aVr3s+bOE$GR)
zbG?gEb^uuhT9Fv%*?5Mpl1{bl_0c5d$5~S%Gl`RWBYUL{#b>6TV6RfMFSKxy$8rl+
zQ5xxUuC2#<{e3)M)f~Os?uf7^x?K%cQ7=)`CQKJF)HWRq)+W2LW<ci<*q7T!dEkUg
zDr?wT;+5}5AEyxFY#!DiTuH`}$SSoi$USBzGgBWoefsQsf9qIus_)--GPNedB486_
zpv9|v51%4j_R<{}Pp-xK?KnYnM3WpluViLeeF)#)^)rqfb>LjGYJBQ@n4(ws3D%xg
z3&y#T-4PQpqXkcFb`Fm(0jDGEwl6E{Xo=QxBapn(C8S@|!6H@|Z4H-^g7~Ybyx=%@
z7M}X@3ZtN@zv{bcUwqEkh&(Ov6p^%Rc3JYz?l(t$1s=OtrRMX^Q_HKOS$E-wPyX=X
zqN*ak0tdN*XT#&7?mokGLzs9@o-<BEmQgdV#)^dTZqfxVHSWB{(4glL!8(yH(fdF1
zRjnm*Q;+OYrHlwc93gv%`tic1^l*_>Fkh|@tfcUyN^KJpmWfs+@;fNN($dHyv*h)}
z<FJI|#ccD?{XS82VqZ&SqFm3pF*bII`gqUcA9blrZ<1gBRq1`TYGXM)3{#sSB{!K5
z!yjoqG`i!dkQLZke3u0_!gJppI#L?%hS+?$U<f7CKCWs-(`Jo;JNR);&yX3_y&j6S
zeG>AEIN+~bYWtXOsDBEeTpa9e3Ye59E7w7=_lBB=#vq`Jz0&WZnKrkjv@o6UD8rds
zk5kN80;ZYsyr<GzOL|P-<d1hXX!K@-MQ$^<Z7ky$KDnbdrg~C~G_r#Qd!YQe(!7hA
zW<xz;hk?_ZZ*=Tt98+-|1(<Ki0_|Q3RpNJ=(0-fLdR8<^M;rb?Lz%<0dmq1cY_0kC
zD&1VD{9&^DtSfpdMq8=A4WBprpL99*(-U!%c@BU5FzjQ1621oEryP$AieH%8K{{16
z7cqH+wyQL8<9`{Y^r3WgH=|E}IpJWX0Zumqerg06Q!3Ijt^oDkf!Hb{@*n;6cFOcq
zKbxB~)6QiVw<lHy$Ci})0sQJh<>`wDq6{!kJtvWJgAJv4i5y8tUzTLh%}X4}F5)I?
z2ILfZRzqOoyI<q99tTj(z{NiUzD8e;fv&XNL1o+>cm(3LcS=*k_^oPYrWfyRIk!NL
z7sCNo6bP8tF+H9xQ<o;ll+-vtm5y;Q@C@l;ZM!<X+(k{wTyx4f<Jb2hIO}a_Q$b$S
zG}qut+>jq?!Zub=0*@rpP3FZ_<wNV@uvkMtt_b?<m7$m&X1@y;VShphLJaK<7j(Q;
z#G3Qbx}xSruF_rp$Ax<}XiKiw<es%)W9MYeO<|Evc%2}1k%QiV+S-TVD2(4Y27C`T
zp_saL7)uZUOBw!SUk%N-5nklTBJuw2PX0@6DsR%@uE8cwN67pf1i39*`YI_bDYRNz
z<Ikv|Fs<Ez|JkvA4W;HG{@8?n`IQN^iL?0a<zgiHT+#`0xwmGo)Ibs-W*N!9W%EFP
zb<kw}5+-ik>#_~p<<X$GA~JIxx$tcF4_r$%%U*d2@invDa++B=lJNV?VMAg<PyBq;
zW42SdCMGE^E^OcQpr$^yXC!|%+=!66HkcT__70(e3~`4neO)!vDcVwN;G`v&a^6h%
z7H1mC<t6R#DY6&v*giAqkw_ehyqEqR9`3V!b_u$>lE(lRMvIGI9{e94jXEyt9hVoI
z#Jlm@Gq~owUf{TfT!?<WDMO@OyOip)bsZ2ONoyZd+z~sm;=B>*;}t;dIr7A~{gS&9
ztIaih$x9co@pA=3)lp7_zdwB{glu0H<-^~THaz7Mzi(cH_}1|o){(^3D+Q4l^ctr~
zoi3++BsqFUb>bq5$1;=0!x&=Eq;aM#baRx~iktcF+Y=BGVuO$cHUa7CHT+C6wz|MA
z`qB{Jr?(e;?8KMfSE|ocGHEio<=FRWV!^d!nWy{Kiy}2m1PN1h(FDczn}UWJv1drT
z^s-2m38x3&&SY@~1l7s&-1sRkCVuZO7E<UiTu~BM)oKg$a7qaY2)08c>N_VG<j&0p
z@7RSben_=RsUs`b4X|GVDQAX`V<%Gz_uj35E+r!b80CT8M1}zTx~KLXvUn6onPH{Z
zsDjxBv4OisGJzuH?F5nDXr;O*@O(r{19t)MO-6|*4>W^`<JWNmB;g<z>u=!4BZ0X7
z7f@m2H{d<=<Z|N+Z7>xv425Xl#de(%@%8SID+FX@&<iA>!Em5s@4o?Vq|lqfw0<~c
zxFF6;Z~~{akGKQ&nX$9!SsNnQ(E`6+&ZS2TQlq^C1IqW7uU{@cBmpY$M?O5kr9pL!
zuIlprUG4}R9cDu=W+n%ra2)<Vb`5`Avm^<5!u<ZWg!0ZFwSAyt)W#R$8J=I7<ji6V
zs@&nc-a!Ki%YRSh8yzs`2WOz$<M$ytB;U*#HI}^7N)8h17+Bi`#ykCnE&(5A;7c#C
z(g(6agYLi!usn|9!kke7&E44P8g4{hoSpzk!ZTpVshtLsOuYo|FToiKSk&1tXI<~O
z>jVn;RYWKqN2fOg)bS6{?*-a-fU9wiEsz-mcw9X1q8(Dw>^T(B?DoAhVISH9EOB2~
z?g0&;DH?J`Faw3d(Xt?N`ZFg$Xd^MB`bwAt<e5znm@KtnRpVgo=o6ln*bQ{63{?}k
zHiZHy88#uKE)=NgrHS2x6WqZ{jM&@BVt~vQ0L{e}zidRJ0WDT=+luf_e*I|^s?ysP
zk1RuY#2sdD-%+J5ehBLEKwv{04|tzuQQ^?pZvjFR!Je!C8PD2J9rfq4x;b8$H&4&1
zTaNZMIfdOsWdXyd$}(-^Ej&4H??|94(D9DY9C5!F#10S4E&qgpLN86`w!oylAks@+
z&rPuCrY_Fq*kg1zm!y{um<-y|Uj?w9?2q30ox!^Q$WviG&;p#gUtM3oU#&g@p!8>O
z@ZHK=I37)gei?Dgd`XPY0y&?TCh`=6F|ei-=*H}Qn~#iQQA)hgZU?55tE}kSvGu30
zuVp-buxW%KSWbn0w{}dSuy`vQJKIXD(A;Z$m2@&zn;%FtSM4fc`x!=(z>V68KWB-+
z&w*jxsA{pse`$;mIKeKTx^g7;V<KYj2Wow|!Hr)|q#ZW8Hf=akN#&mXNQ?&Is23aG
zRle2Q_#lV;mA}^epSet8j`k7V+?Znn-4qlCFQ!v><YR^K#R<cw7D8<;RMXc64?SC;
zjjlKhTT0*o;>sTXx*NZyHx<GFqV5CEXJ#RCJc4zGRZ+6Nb<){;y7XpFOhqI#%>N!M
zL9FsDF?}3*$ytCwB@~SOXMXFkDp~5F8`RW@a{!EzHN6KO!q$Lqt`vV#=`uN?!N`7f
z4G?w;Fx&pM`o_Q{UCUL0^`-90SpYai-_YW<w12_l?0#1yy3ZHDr+WKY_uAG^2!WUt
zF3cc4CM3rYZmn6}q2SCEBZj64t_kU|Ff%9r`dg8tyz4Yt+IQ<;nC^{i9-cOCHj;j*
zgYOXGDzlf9`fkp6E5PiGwPRjOrVjtyW3bW+qX!g3trQJV*KfCcJ^ITma0)MR+`qRT
zMPg{MirNGwWhIW)VGPPGRVONw=^E4+G+$sb=y=F(<)lGM`XZCXb?bZLL#(Dh{lv$T
zudfM_>SijVE5!Q(@Q$_kTfBM-Y>|*X8qo;QDw9}6ss&YYHO<GLJl1m7z-0<HvSd0F
z4W#36*3z8sA0#W2tDB4x)MkXfvkUNb_8lAAE*@v%#mv!QM!X-5zVQPrR4G&qg4!*f
zi|q~y$!WuRQuU_#u61sjsut--V`RSgGAyRpOwx1D*oBY&JzD+hI@V&aD(gJ<yqIta
zV}yV|tdwDM+(SlezbvhVp9kOUg7)qp&maN`&POoz7Z`|)LB(`cDR=Is^24Y=WMq|N
z23qVyuz%7&{f5w@SsMLGI?P=)IU84sJA4Tn0|GbyL5@Uzcon<4_{x=!HDd{qlcd(~
z<ZcGcxkJ-jb+|={n`cA2;4Kn-k(z#xg|gr#DlC^L8TS_|jOgI^q42t`RtUR;6m}N;
z)pGlOx&)9yz^?2}Qg|(`TfWr~I#0PLWFKFq?uuWAO1}e%J8D{R%xOx#kRk3^mgNi`
z4{>8$l1;xlNy_OaUx!`V4z80n3k)LFt;$nWKb~OTn5l`oIVQ%AL6L2`FE+9))Gd8z
zfgh=*mgsj>BW%Uy7b2sPE|38vh(8C%y}S3RVcL0~jjyDnvI3&}mgE*-Re1dxQ33od
zFdCFfx=3U45Z$<T@=>(Imse-3=0I@sVh8%#$PkQYGf>_SojXr?%$yb~xeEB2Ob+(I
zV_c81_~pC0c83yyx{d(De!w<-D0oeHDR%%Vf~mKF78nSu6%^<HjCB{bP7PV?ff{cC
z-95pZ^IZgb>(gH2zFwQ|uTd!ySl)S!<C1nyCfGe4S-MWNM`WzwsM~cV*Ymb;Ta84N
zdi(cnt+IOIcKY&8e&_;{kM+&R4NN?-32Mu(M3Bgs#R^_~^9;nrr<N9g3GtNiRPw9x
z<3++7*uu`G2|8XmzEz2z?j?u?3E^O+-akOv**(LiTxvF~fkF<eJOJmW#0*84K`Z|7
zc;@cYM_VpR5s8QcG8yBc+k6bZgbxbXhi|%U7ilJ?CPZg0SjXHed&KX@ZwQYw3($I$
z%DO(Wxmi5=niet;`n{*j*R^#Z;Oc*4It^dJ`Mxu^5!&)-p34#ZPz<ysb^|GQufZQ+
zWCqSot~a`@;f7h-+Rc6QOJ*5;>!7#-+3ny`$)b0NWBrSPVDikIy`hUB5!@;wu;71Y
z0X4sf03L8-w)?$(Nnl{&v$V<RITA3RXL1pE(Mve%gvXP9g~XQtuxf8#04GiFp`iNf
zTp>W$7af`Nh3V_pHy-t<-C`pbOM%w5BQD-2?cGL!&3(UioEENM$azJpiHMm`Y3A_E
zs-l#U=&2P-TFO!~2U@CBtXA^A{3Y4q{cDKPXW3qjqG-D-BCc8I;W_{vX4+9?7FJyE
zstYJJxdx%{0C}FM2Ur_Y=h8q$BdqmxpM6T)1ajg5yk4;875ZJ0Ur5a@9|7cDnnatK
zc!hI3igeD6=M#HZf|)apuos(ep?dphA*|A?Hm+V<FTwMsiNZ+cxd5pT*ZcG$OuDWm
z3=j}LUGJ09a7?m&|0BMQ$+Y^#<o!9&6v45Tb_u+GVw3kE^hZ!$*bVNv?JwQm7z4ey
zI5h#@w%k4Xo?j|9TG|EV|D3NLns}OM?wQ-M-co2Yr_y#6my7AP{VXs~kPTXV9_|Z?
z4uZG-yCyqz>qqPqAe&D}w_Z|2k=;$&f!PITR4YLK>kZ!%ylEZC%9lQ!48oCcZFM(1
z@y4xdK$B9w@&{F{fC+1~#+^6S@)^#UVK=B|$Wwkp0A^xWsHrpvRIqIUN#8(P!v2~c
zDN-yWzDnWpwDSCbMzBctlX(1muhmrs=Rsa@EFPr(2f?R*GH^|85a~1+q3F9PaZWm1
zz`Bw9Dymq6a```ooA!M;HC#CwK8khe#utPN`jw%6svqpFqEK&U28=gPnPCX~V<5a+
zki{bmVD6O+mMs5wIi)1i&S47jjFh~!r!Ew>8T}@M!8-@}$M%aN*+Qj^&duf@+Y?x}
zZ0Z!*GXK^qf@{mR=V+ohCGaAB^ZXK<%)$5kkP<5CL=Of1QyPP)=Mq9ACk61IYkp=~
z;u_U$@o>%%%wo`g;T~U=S=eG|$3d3ndgwsCX4a)Qcc2DL#!wC25!XgA={60SHv3eD
zyoM2SQUvw$-{sF2jO>b2H#KPQ4^PE-8|b&rKKHzH#snNC8uV>Vt6}QGlz?OBpT_^>
zTlSU-ilWEiVLwg$LtjM2<QG#hg8rQ^?}}DCQMRkq#ajf=kt339v*{E4GFmc+=$y0@
zd4k6MrkLy=Z$7O85j#EN1AvTW?j_LH59F3Z6&?*y_au2nvO@PowHC$BN?Lz0bm-3!
z_@*u%ooxXluV-Wt68+VW$s6aJ4+2D6*j3CC)b4knQP<~-xHv251vVAT`39!kZCrNy
zcxFQ?I<y17z{iaf@OL#hEt+h0jSGjjZ+wCnOK~fBoHl#KQA)J^IGm~J%TQ-1L-mgp
z(t;_um18q}y0oQ`sXs28;_lrn3k33*9P9jDW{K)<hx!cK*ZG;(`iMuZ(wkVXb8_n3
zzU&1m$P={|_I4kmv=A@jigH{5XIG#F3ZL`2l~F*M4D}IsdKKzB-GHks6()B0!wXmI
zSV4k+1HN=*<}97WZ$E<4p~+R&t!pQ}Z@i@FcLK5&L4^pq8x%Tn`Q(9|a*40(@?!>c
zu67i`-}b8R)^Q5^TXjiyYucgP`n?}5KNU&CG}gwST)}5-G}vEMs9`U#qq+S8vM2){
zn7cvjqZg=!u<0&m@hO38xQA04mZKOk*0gJ@%~6^2TBC_H`^Rg)t_~bbK$Bd9Iy5BV
z2RZsK?uhbMf1{3&f*dyuYrA}Xu}_eaeV?A*H=cvD&tEL(WeHUnbdd)CUMX<=!YIur
z_sk#F(#x`Ax;Wz=GVGN+V!ATi%s!J;BAaW4+5}p>?s1lfV2>j80|89*qI~#nMD#*<
zkz}4)^n{CF$2orKt@p)nZX0*$A@22`23*i#4i<|__lJquVW$g!-!^pEw@d{*>&mPw
zl*FD~y7O?IX|y9P`-*y!JFjCKUZ!47RVl3hgq1>oMZH1LbL*DL@v};7ob0rmn|VAJ
zRmk?os~H}Dd49E^zFU3S08I=P|6rPIcUQkMn_@n0=BN2LKMkZ%n~b7yaX;;*B%8B7
z%0gO4O3#2oc5^)PoR*gX_U?ldfZ-IxGFSX^fQ<lLK`zAZE}<0PziregcfU<vALKSI
z+E#f84!i1g>ZOf?{g~`^LIrFHS;(l5KxWSZr1270Mhy%6f&W6Cym!;QmQXPf_fLR<
zb70Qx@d_8V!3zZ&K7)6g@@T8y&?*SKrW22fC`MtHdq_iM{5i~B_`AtI$mw3?x)jQ2
zNArcqnBbwPP5p`RM}V$xlR^U_z2xZP!bq1y97h!1r_wpn*HZs#;7jtm8QL!G6WfCh
zDUzH`|81{*c&6W>;UAw%!C$2sT9Ln{-Hm`oNLln2WNMwWF{SOQg3<Y|sy9s%-uc)A
zxpUV$ufJ7I0wV07r;UUS>B#z>@jKeQ$q;em6Pm_I29LepbqPx+(ox-3sY{*7)F?!E
zG#9Ff7Yd>dv=KO%E+tfs`8N#4_}2ZaJ9v+_y=jCC(e_~fLlL6L=9^=RJMK5<1kkjF
zieuj5w6PSoGr`5JR~=#iOV|OL4~%J&*I0~puMhB6fVNwZxED~P^L1|T8Au3*Eu4qI
zN{OH3?Ig3LvoFz;>ED8&H0MKuNUNMs^b(ec^MhlMbN?x*yx7f(1lUOm0|r9G;KTj<
z!8MTS!6tEX>>cX#E%_##!LGMfk8@%VpXzq0xS@T~7Rg1g->>ogt8Ouz0TK*gf^*P|
zY6vyQVBhfqie~BpRV#y+Pn@RkzxF6<1d!OjzdaxHd9$~vT=Dzz2*c#99AxvAnaT&b
zlnN3FeGz9md81L@teAWYq%JdA)OYd<cUU`0lo!ETy6=<N4OLM9J)i?QqfXvN-qe@f
zM$ZYFSpcUq+ar*$`vM5uQ+;}_DZ4lR1GmPt#KF23y$?sR$@|z$rK+a&R^xqE=D+A<
zu?kvx4Xz)0av52g{GDY=Pn)dnC?+4N0<6wAS&zbMiZFskP}v{B-eCa0gbO9C=3hA~
z6kPWjZ}DO3Vo&|#GQ0eo?=+jW-e&jd6ma&2|1ihLrMJKh%uf;G%E_vh+wVUXYCyx5
zU!3>y)vd{AoUHWPy#1@St7_mVuKpk*nDP%}mQmcubf#~yD6)Y;TqX0?<-918mq9#B
z_~`wsJ+TAO{&@j<eQ_HKs=yabT<=bN-|6dvp(fzw4%og1M*Vz8fWS-WhT#)n@GXs%
z=sOX~K9dPI#_nUeS`Kfi(xcn2h6%Qno<C0s|Elm(_>mdzoqlX~);G^$JN@gPK`ve=
zzTo*?d)*<uhUn}ub(vd!&JDcOr2YUn&w#WG(68X<@lJ!oE6#7(dpjH0Z~$H+u^imZ
z4tQWsvKyZ54kUT}0G7K2lKfzQLP4snOW;}r0hjhwagO6F7y8#M@>}6x#_L!q&^up3
zdM)TMB6Ui@aLj&*r<LgJpL&XQg0H~ImnEzVCH*(UKe2uPrU!N>`cg>DB?fv3OsV9X
z^%I39q$nL&eY9oL9(S`8truR|(%!E6>vB79tEcQch6`|p0^tUw!?v%9ipw)6vZ{>>
zN}!>h^5M1dlMe~771;(1oJk&-0Nd%t!W!Kl>a^Aiz$>Es9{IskA2`tv%9f=txabnV
zlqv3p(aima8nM@I3lYQYZ)4P?rlv=t)!EA=z#Ghtbr&s1)+j{Wr#H{!&UTxB==Rmv
z)CU*w8yP$x!JcQ1e}bd_cySoux#LfX@SQ>*e1Hb3KrrdN<_Bz}L5re~bf4MdQ)=qE
z8iJVqBo7pE4GeE7-drY_&=-EC`!)yKdaI!2%=6cVXtU=r6ip6aubZ^gpz(Vmd<<<O
z{Hzok*-q#0E3VDzFJ~t$8(S+AufTTeMeIn674-8`fi9JN5Fdl()|9GOp%{!aD+;4n
zMF0Af>J98-WkJUo>pm9*)V$vYDi@TLpEnZl0oDo_jolu0g<X#1w-%~<IS77iSMOIA
z0kJ=U;)n3tb%<L=xrOz=CK&kX95~Vn`~@q~_<Em_`*|wMgoe|Nk{~4|@6^r|qFo8Y
z01n>(5YNiQg7Yd4Y;4Y86;QD2tiJ^wLVAFzA|T`q$`D57*`~Fn8N*78n7{d*7q1Mi
zRHU6@v3%!&EBR&rt3_^`hHO7^hmJB9Btx07Lq3HznA4}3DG)J03E1vlSIPo60>33f
zpDMS!{yum#TIKLr9g5vZFuXgnPPAiEiB+LJG5)oFTppinFMiFy1n%dql+F8#I;Rs!
zz7m0rJ<R4c(bG*Nt?ruD{p#gBk>dS(Tz-9wWw@c_(qhINe}ccQ8XkndTJi0}+)<F}
zz}M8&*qzR8KpekHvW{Z5U6^Do6l?qepn3p|%92y>O{jQbD8uLafh%>JT}$Eby&_e%
z_2%>O`kyRNNdDljt`sSjw@)Vvp3`y!TAj7|ysJ@{rx&p$7qcgCAyX)=a$cU8UwSmI
z=v)vH=B0gq6%ni2E5yaY%iuJY+D2hR)*!qcV{O0S7@z1f_`!N#pD#ZN54Gm~OjMIq
z`XiQ6$A;y)1A9d77!DFUttG-Bm5wGaZI}6|Tvv142@bWfRD0r^rSrC($?oO2{tP49
zvG*`A0ZbCi|CE~m2yR0qjo?zRH`ED$1fmZBNeJxd-x=m6J=$pvWB+Bt+3gPTGF5_`
zLlWl74-}u4gx`&rQOH&uK_4Kzm0T1LCX5;{cLGhLUAt8uI0x@I_|-l_yz$Cu?rrLQ
zS(qbg?^0!HqMtpTgjVW;vGiv!G`L6hw)Os^1d<TgF(pTz_(EFfag?EwctDFn2nuz_
zJCgJhz>1C%s4!U38wE*XLe+gJ@0bzNJcG$&<q<KoipALpF7>R0kj*Kxggyjx?b}YD
zBO1ToQO4a=+Ku@>LJA?A)lx<ulU{$L^d{Z5F90*EUm;Z584*ywYC6D@*=)N25_X^_
zzMYN#04UA~=%c*er-FZ@o>nE)KGoB@jcLglT}`dt$BH&1fkeqylO%Uyl{$09?Q^~}
zYc9J@b6f0k?PhEFF6rRp6WCjbWG?_Ex!3t@+V$eS4Ffj<uzy!zuXw)7&dmI2zbaNi
ziaYfgjicm}*pEmF>sWUuj)RGf0>dPFpmPnpi4rUy%K44KTel#KG}UIyWP;cXDI7eH
zj+#Lu!)&(b3+?O?@h#tcJ%Nv#k0ZEKpi2mV^&3P3HDMKxzX1}Im7f!+C}0~C-`^>#
zi+jHnyhgW2!`WQMYjuSO-hQ``?tS;6z5w&LNnXN(EWM^RPmJcOl?3<Mk2lty&dK!L
zEu+WZ&N-Uoj+6?*Q8R$GwAA6BqpQ5IG?@-B(=HiNNtO+6B9Bvv1sGW8<1mPd^A1?&
zh2=T{Lw3D<cP4k$X`$?l)oT`F@@y<oWTf}8TisYgvY+IP(aP;?4V+{k%gmbg!cKC|
z-}#(^6Qh=9ZEotZ1X1%FjRf+~Gry}5%0K*ax!7J7V-V{gDk%B*_x8c@rq@&Dc7G@_
zdP+s7v${TK_{VizqH~<TQ_mkt-&D?8-Nr3N`!?@{U+2g`&Ky|brN-?&7LdX+2<Y_!
zk?_C-(Q}I>(8^tTrm_roB@TaYqXRNL2sOTkKUc+a6#Jsj`e>=*gfOvTW_{(~aN>2k
zUla-BqqY&_>Ll7Xb$B$YXMWpOZuwgsK^=coxVdC+sAfK~CBLhhZbcnLI;!Nmhi_(8
z?pkdSPqWE=M^_MBR_P&HyL3jfH}RDcwnra|(O_;iQsK+Q*V3tgvh~*DOk9a~O7D;T
zbN?ea|8exIw_^=@-T_hzKYRui@V**QRu5H%0+d1XI=ofND=3uxotG-tl<^&?Qq6OH
zKaRWJJg}LXKIMGW6)+jpkG$!T-w)Rw=gaGlW08UlQC!QP8w*!DV^^~OhAV?4Cc<xF
zejV|07%C85t4-JcGWP8=W2Ttn0FGwKWcTPK2cZvxhghk`aNq>%+kW%dKc+XM!Xe(g
zr5H0A4*RnqAc>>r5I~6*=Kep+?fRIyk3Ss=XN5lzd+Xqt_5H4s^B8?ZsnmNioR8@Y
z3qJ=!T5dRKH7hNXknvI9=(fFozQpMP%f>r{fR*phpo-6|32FC|7ki9e`E+kixG<d*
zQzNg@IdDFX4iNm=TxU`=Gt7=X!zZt2-Q~?Rs}n%OElmq#|Eg%1lN`#aV62>eefWNk
zDdaDlS4o`s>)Pwvxw}tMK<?lsEQ6)(mOj{fyW>@ij!<ZwmBsnbPiN~ny5}wL6KEPP
zk?EE27IHgk=zu};K6rR~r=t3uI|+NSk*EAxnw*~|PtSThDbMS+tc&6-bUpi7zZrkb
z!5T;J2t<cs*?U+9UHH`(3c4Y?5!aJR6ZVTy@@zKAB$AhJAr49gYgtOkCw3*uNQ|6w
zMWJ6=rYx++L<w*=Q5Q*lb#XSL6j|J+YCo4cJQ`#8d|?1_PA8Us!)AKL^_!aISG3P+
zWisA-s<ZD~;d@BJTJFr(b&D)T@2WHyy7-Pn-wDv!UlZU54K`M9xYTewDAQiV0|Z1b
z_t)U-S3cz-auB$xL0dvNjUUVoSySDDXnuM)!Qcc(5sGq`e8R^gFNh$)NThy8+H&LY
zzjj_;ol6c`O&Fg9-#8YEu3dY8l{T`nS`1&go0ogzm&?mdh<^Ke&|j(czxSUC@1U08
zKsSt!3Z6{2Jf&hx?=>3J!Sr#NnK^UL&-1~SX<G)0x17IGW>;`kUEb+(kK=Rn=|-oQ
z;Z()Dao@?O5IU2(D{Taky3v(>>QzLzNT|$6#kfld2NXgE#YX^^k%!z-O#pa-7PZ?E
zU~`NAD|#pWYfI~JxGdF1az)9<^K6T^)Z?}T506OdWp#+0$rlwk!d>zY2HdQ~CxMi&
z39m`9pbgN$6DV_#rJlH=br&HB<8mt3;5-bLHZ~^&)tyg!GNG!4a=PmOD*{1{*f=Kf
z*pMF6ZB#c!#?en$S3Z6pE~p$F{(z%p(AH|}=B~vz%Bk{{_NLse6cai%kDs`L|4H8m
zPdqF}@3e%}D{*+-zP_U;`<Nne2y&!7drjA5<CUc&%0dvp7+StJg%Sp^wtR-mc)c{e
zme<^xngf8BX$aU3t=r!I)X-C48jyZypAMzfE+=M>D}Tzz%3~U(#9DE&TAeBS9%W!R
zBm0wnyDFutBPi(;rh4BwHh-F=!+cmM^%ve;oDxZz?(}a0+d2%_--E+2bFqd$87)U^
z(qiw61p2<g#5yrHKujl(Gtv|`JLPs1)<pVkhDr?Jm%Fy~el*B%KslVKdQxQtvszE&
zPWGdX4HR+Wm<@Pm9DT((tlaexBmO~@b=IHp`BY6p%;oBAx~O8S){EC$r$+IT6Wx<o
zAJ>zzuYW?7Th+}m6u~Me;6?=y3xqfSLs7JvSU*EM;ng)3;8XwG#X-{RDIsS0fo&6k
z!|4n@K;VUwt}S5}xYh(t0%rl=dr5b?@ax?$xF}}0m;5umk_8VG+ux;WWTG%1KDW;9
z<Enw<-!q|~HoF6?;W^F?et%)X#6qeSR46$x?7(-S9T_`Ku3+o-EU?44xgO=jIMqFD
z)NmG_3P{7OG=0Nw$O&JVPsj${(fwkniZ8xC(a+f%7338n7u8uDk>FCGIeJ?ZBk@Nb
z92B;Agh?}njG`HPV2&nQh#<q#em#zZ3v?Y|(b%8%+4xPM8?AGXqjb*ojuy7CkN4i^
zAc#hwbxGu@I3)9VomEq;?zU0Zbfame5VeQwYeg7kHOE8^?M~COS+(7!SrTt#+>O<p
z>5G5757@f;TSU!Yf|$O6#<8|uB2N<jq3)8BYD}nMT$%^(=#!CgHz$x`x8Tk^vFTcO
zok2o$+*p7jVN}&tFoAWFiAd!<@2t3Fvww2=8<Wgr@e3qSFF8&6Cj{M9rtRc(W&;!T
zr0l@_bZ8fAFW<-*zZ~0P7KB!+o{Sa9T%)#UTT8aqSu#lp*4`Lo$ypF<kN=$>?Hwre
z(SMcO?&a20D+91~B=g9~E9OzU=8z_gvwoMpHdaVxEcb+}iZ~ysA<S6PpBWk#$fSoi
z@nCDPwxyaG=I(~5>ucEPPM1#Vu(E$TXo=}29Bmp;s=%j8aK?pNQh{?sZqpBk(z+9)
z9P9(K^mKGL1(cfJZFB#AqrC2z0MTj0(RSd7&b6VC4>6iuRu`twW91|)^0g~irc#v)
z2(rdxJkIO=^(u@zrFMKv547~SLR8y?eUY0!&phot#T+TP_qcBF3Ykw8q0!sq0Bhf}
z+@kCEyY^5tb<q9i_ac`fm2}pQ=KMQT?M*WxD-Td+5}XRgA{|_qT{)^RKJ15Yq!ixv
z9L|i(A$8SuS=#m1@!o6^WBQG!4*jso*Xm%=(iZZYMu)z3W^fVs{SdG?Q|uoe!R))t
zU|5eWRT>2vj*-t3ba$~@hnom(Ux!0A(Wl#wC##KDhMyS6T1Pm|WBLPEZ{-^zJ#l(^
zsZX*;?hpB4<=XQDrOp@qE3yt}rj}m@*Wf`aiYzfdfos9c<p4Q`i*HQ4J#81lVuWy?
zQH)6IY)8|Pm#Is@xH!>Zg7SB|!%Ve>h`(ftcgV!8s={;QjuBgD-!wk-xAb-O9{PCw
z&Ssvc@)I~E-+&9$&_m_<_FZ?nSdiH(8A4E)m#Vzn#74l(|AB|8#&L9gW<<2DK)uO$
zaXuv^K5Fcr==|61=cJtkqu9HS<svKf?HZ9X76OaI*C|=dL7k_>SgSTs!`{(d1mC=J
zvkcxjj~+AK9&t^EtEo(R4Fzq()w7DW#$KBD>JOA4xZde)21s_nF$L~th_ANr(KC!v
z_GUZ;v3F(B*_YNAG4Gez<Y$z`y<NKY(tikbCNf?Qek4Xo{X``C{U+FwwDvDf)Tf*0
zPMaCDTX*gc!LDXzXxe>zX4<(n!D0^S0$et!2W`fBsa*^*uIQTN`iYG9>kZZwFv~$1
zl}yK>)SChIGTxF{efQKg=aTUZ7EUpHmQF}VE=4x~FYol%T7JW~P_96gY=Nl^e&^#L
z-oP5)vv9`A7*2_s7b>6Q=db4%fyW9cLk*=tDCtzo@dku2Vb3qJ^OfOSg~}$xp8~g1
zaP~?$C$5_=5mNzSBiz+jL?AEV;TULZ{-m@Ba&}-ldmUZ!AV~wwz2`|G5;3<qC%7u`
zTh0;Hu)7Y?$Z1%*2rKg7n7u3Eo-MraN-LBxmcKw7^EeXa#2ZaNhiQ}~m~Eld8qRAJ
zQa2?<Y8v16V|69x3fhQO$VIkpt{8YE-%u?c;=Wt!Np4vV7@ZWCMI|aASxpWYZ5EeJ
zOlAAgNP*LAX|9V|#XX89xcZ&{hQm18(;njk)k4eVRr{e&S9rpPoJlZ&O87?bv|3=M
z5&59GbNW@4Cd1FfR9aQrgiP<>6oL22m~8jkzEqL>p0b1fCtD4r8Xl}?YXrRKu>NvY
z+5ltBp{kkMw8NdR5?ydxmU9jEK0&xd_e={4*g1O_viOgOcdOQS{V8iYy7vY-l2h~I
zQypc?;|-X%)cRO>`61Pg4!g~p*j_pVr?2BUD~detkwmnH3b7g*uX=<ZJu<^TS>cIU
z6)B4U6mNHEb_!6Q@NMR>&SQf!%vn$!8JnOOOcEA9iOJ2;PZSM{!86QO*t0va&|uhI
zbNg0T|Ju}K-pN}btS$jqJjbd?9^KQt<8qWTAKN0d5LLA#@sn>u^Z!!4+nC3^5fq8_
zamn+%DfaYwc6mP2=<6Nnzv_KWM#!Xy{}@4-q!(yk2Vw7V`VlZChr?+I8_OqHTx(uB
zX?k%z$bkZBl)AY0;NsUTYUB-!)A*^>7*6x$P}^@@R)agGBz~8*!dGWSb5HTwU_|%r
zZ&5wsKDmlst4~5`5)A%GR^D&{m#lyQS>3P7cV(Z_UkQqVrC&_A7Y^?~1rIBJ{y^1V
zGXJR~=aOaF0*^L+&WQeMl-eylbPEkH{rxei+UFgEne4d3*P5xgpsly~!UItsocjxq
zI)?fT%*+4>LkK__hm?ho*?Jj@h$@rRgF@Q{vPwrL8qIdmTgIu@XjvURQu0n57pkO!
zrDw`+0WP-+-QCbta+LEQ>mpk^)RbnI#iRsA4nuWfONd>VzitV?`e`BeFf^Bab}*+4
zJD)ZBdQS7>cL9O}qg-hAuvie+(LwfRE~m7=5antl;2iw;4Ja*sSr|eDW{1Hg!DKe@
zG7P~xa7^L?XjZK(`|uw=Yobz%mlnc;Nh*6KQB}83$jz&DDNnSA&~?axAms~Ba$k^f
z2gt$t=lZbaUj`jU@P?*prP}twG>IztPc-;oN7F(M1gv)Vu=3dXyxd)vqJ6$9XOY4%
zIY{|e`#+6`!0tN89J0~iw`2d=zrH=@U2xmg1|CeZf=n3zQ%S(oEpBOPJLICINPlrW
z?d0dL{!m`+)v^(iTQHRBQs&P$*er`_;%Zc3TtZO)Qu}6wX3F!Q(qgF6m%KNhsnnYD
zG!I(fkP7Ta?0yr<e)S+!w(RT&ZmuN33^c&T`BG9Fy=gV@APb!`@H_(UlG1?UQKW6n
zc5NNzYWr<ISfD_rAxmn$5ffw522tXJs!!G!y&xNp=F(<u#mLYglmiRco>A{oYm0YX
zIrU=S@a`0L>u$Tu{Q=aZjVZ$}b|OHH-P&$1`5`int*SbYEj$2n9-I3Zq*Ehx=~g@0
zpB_yGjC+F9KPTrAy-R@^g(*=UC0+RYYU<zn_XK*rUM4(UoBKm(ch2xYPPpIO2NIy^
z)<+x%$ZxEB?T1Fc3CivRB~p<UrYh|ll0s=?6X%X<*iz$AZphF|n?IlY>T4rha7Ol&
ze7jhD6tkjEACM<M4LwCr{GetPI4aw?S-2(Jx+;0<m3kdjLrHR7r?WlA{gGpcK2W&C
zyhyYc)b4Cz*m-aM+msTjYwrp;H;wMRmibpNO%NdoAwld4&TjFnw@h+uo8EC~sXTG*
zGe8hmnUK>IOMF&L3sT$?%*5ge*o%9G3lE+jAJI#Prh-IoO5e!_J#Rk3NT4rxmlwH@
z6Ozba;IamGWYU)l+A;;N|ACDFFv%Cy$}1CS98mf=)XoSxdh%Y%g-l33al-0A&<#jj
zF0@?`SS1@5wI#m*?ifM$Q|w#ENg=@B6}n)<S7j@IcQpgtpdbPu)ouXXc_S+M_?R2;
zGz2t(fgzYFumN0{yku`YCS`)Qj=<hea~nXKchJqi1uO;pK=pFJ@vMLIcn3V1q`~Im
zcYJ|E&MsIaaJ2C>1-nB5-{k@=Rk_fC*>MvX7)VM3o$cnnQ2CJoo1o_rNvLE!Xh;1e
zasq%j=Rjvcm%AoBux#M*0JIC5c*#uYdK}q!yaC^N1IHlG`$%BzJZ3Wiwomm03W9`R
zVZ(;O*I=j?Fi+L-<Q~59!U-sCfdVcQU?Y>xFKf=6RU0rvlQ=YB;R1TpBYA()2|VNi
zz;PjH`*dm4<QkR<bQ={EwnTz9>J;c$FBhU~#BOv2FVJAhO-kf2DrdGG$&Oc`d91VV
z<^_2*R6#H&(3Ql}a<u!Lb^~mglXeN#@#a9*d0D<+`QUHsy)y4(>We-GcQ6B3LeuW<
zUP~MPk6USXU#U1xh#!p4JT{mJSWhYx&St|p0?JREopMCIE`OpN8paW(T3LjHKe8KD
zl{7<bhCifT4>RB4`uO<e#^rxMT2o9zkXw;FV2&tJYX5fh90@uiC{{I&t$e-%vI_w4
zE%4YC<l(J)iHGBBlB7U?8zbwKHX|l9H=>SbT~k$WUiV#k9i>vuiC8qb_D@72ZnAE!
zM_OvHGcIc1k=D2sst6kn$)CQTzYn>#i)710qQ1H+yyr!_Ji=0u6`cu*!v7$@jAPG5
zc{eR)I0+hse8N{UN%yVuVQZoyg(2GiJ71mN@GpF2C?8FVHLEy|fCOyVn?^d*?h>RK
z_bP?`Jw0o-b&Pj8yIda<T!k?~5FHmwW2$c&Q+FsX^#Y$cTxV&ve$!y(n5^{dHPr1r
z%QJ$q_en?g)gzC7)5H@H|5>7&!a6;kq0}4yAvAK-E`Q#Ty`O$-JKdJo`I^gJd79n?
z{f{nR4%#FNJK4;+08-reCxNtbWkB-c0A+KICx;?O%*?G}Wul>zVXT_gZ)%^1gzx_R
zXX&ay<-hQ7V{Q~jeNwdBFH=12X%UKFpN-SReJJeCF^%E#zLK@~q4W#PcrUz~7z<4G
z-jMn9#h)m|-7jEPzO8leXsaPvsv$;88Y#cdv?GB0p2_qLb03IqB(;fq3JTymj(Igf
z2tDPb?7+;Q?gthdY|RJ57W@_NYDmvIZyFi~T`Shage6z(8y~PI*DDA7N{GKUC*3yT
zIq!`0xW1f{t;@SXg@nl8I{ox?^c)IV$`4V=I1IeAdB7yddh`JYHE&3W`MBvwirVt6
zRN+C~pajr8n6J;mS;$#O=>Nmob0uVn|01Z)G7Yzc(j0JNG4qKcg_>$?R6Q*<$j3|=
zC(I@>j6Xv+U2>oAB!opma}GX-Mr1m8-ye_g>Lb;|r|e4k&%c#-%D>Q&L{OP+$1i;q
zLt>5+A_SEB#@)I%&Fw<`Y65_;xTvsWR+H)9rIDl!`_rc7=eXIys~}!vk~R#L_iqq)
zQy(tX4`JW~EVZ+d{||e!$HQ}{dQ~5Hn(TG}_#Y5BbuGCWvPU$aiFi?gwyMI+Wk@Di
zF?Jb|pX{Vlm1$d*S({2aey0<Wm0mEYerkz;&cU%H$R)Bh{9I!De1}6lZg0`*m#s|J
zT*g7{LiN^&^xYrK(&Vi>HHXg&xe@)YpCAf#8~n@xSH$d&G<j{<&r=3X$+bXbHRl%a
z(gTb+<%+eJIdkVaF?Z8)>@}VI($uL^ped>bZT<N9k+R5f?tp_%A!{wYZ~OaKYKnhR
z!og$0<;e3HNBVz_>;C8{fk+g!c)rQ|*!^UH_}<fE-(=3ybK*CbO;nEAG_O&$TGx&t
z@7K=q{vGlQ5Zg%YnvJMG5b0WL@EOkTI(rNe7z$X#z*G#-%0|O^>#$BglSIzYcFK8V
z?=yYCGO%d4a3?lKm49J-AY<|-_QpcEUa-E%Jn_w6x1TXFGF14$zuB+zcS?zKcul#>
zST6m7wb&YNRIpA+V4+kANqKf`a%aiu*jKmCm=dsaCJ{#?8EanZ2*U!`e8LsM(Imvg
zaL1R=^!(Ww0a*C@nwEcv1HKGh7t(JcX~weXJB$ykAnQyO;_GdmM<AO|pB|GY*zlv|
z(;1f=>++wf^lj@&6t0v{_1ukRuK9<6y1?%5x{V;51AE}6xc`9Ow}9*BtMSx{fS;m2
z>gX5z7EVq{$G2N51X$+U*j1HNP)rZVF;F}o&^A~HB=Vkfy8w~R=X5FQjHB6G|4g`h
z!UhT%IPn<D-~hu2tV%|wFCB8mb;U{ddb%_ZLC<QBYv!+j>?Hs+Jd~W-(JgIU0%vFH
zpLQ}TP{>IN-zW}6<@PB55#Q;LN>i~e@oz<9iN&S)`}1-}{PA2pn}xj?6Mm4VF6Nv*
z&*1&s1)H`FJQM}nIvnC$vLnf4pgatyy8t!}6v7l<G+zVq7=j7jzC=^65~>EgT_QwW
zEF7c%_fh>%Ab&W0I`{F7sVcHB!*Gx()T9~<C|@4kHi3Y|m7%4+KBm_c1yA)Db7=^3
z*dd@{;(xQx>!GO|H(TJ0M1ThITQ`V+DzzeVz;s2Y$T0<R(=)8YHH0w)AbxcqAmxz*
zXR$A(Z1=!CA`ndthXBCCP~sn`A<m_Dpli8JusKxl{qD>D6MylijZgISjHvLwccJgZ
zyrJ#7*tEcyQaqG%Tk>-xFdfPS#jyiW?WijzBjtW<+#6^_mL`De#r*INklRn6v;O6I
z05vIfz-L+L0ooOnkhxNQ(Ru#Lvhqi}k6*<ZafEvL+v*Ahimd;Sr1Ora^8Nq#vG*3)
zduEhPbgXPyNl{c}hO!lIdn7wM9jlD8k}by`5oKp&?@c&3?(26x-`^iR9z4$N9`5V9
z-tX7zdA;8S0(5F2(_d^KymWuSI|IJjWL%QRow~8v?To%K5PqD1B1$bB34T6QVV$Hz
zaZDcgSFhnaai31EcF4ag^C;!f(q-uOHYiKQ?y31%v@M|I;lN9^eM$M10T>kx9)85S
zzgR@Kt*N@}iXGu=lLb674qb}@Elc<B*S*%1f9M30G-RfMmH0{UISaUJ&<pCczA~G>
z7ZzZ^v+Yu|H6ia!YOLNOvxN^2v4YyA3MP<=mGsb_1b5@WbrQCp;opvlW!Zjm9Fj-~
zdw<iA9H?Q#2#v61)o$2U31MUo_UdDu@bvdVS}$md!~zH~U~T5nzD?YC!n0t@OA2eV
z4qT!EAjcpbur!rUD!nkO^w}v02Xji>z`Ir0ICF>`krqXJhQft7BwxXv-6C2vYaU!V
zg3=Q3;K(K9x%=?d`xyvwYy#&hwjxszxRYZmaBd$8w9(Tl!wrkjR1eJ(D0p@ti9?Q7
zf##AC=A1s#=x^OT1|RUiJgntiu=NgXW7WS6YPw*k3*)c`RbCbi$EmuV?w$Y4W>X^}
zgx!T`mO+L6Kae&FUNd5ku&8GDHkb(J!_tU5{4M}z|9}A&W#1&S4aU`gjU7Nq&{Qez
zdmTV4PiK2#&4NA7Y<{jnS1>kwXOK}e`0v!rfAQ`=1QvJOzyvu^2h5l~qL4I;Az;OF
zfqcMl0j?||n*ew_vPg<uTbO&Tf2qVIfLMW%EEf>2EF?&12Nv^yZwC4x1}E$+Xlw(g
z%u95d=rY)22A@<I=b<+kr~+L<3@o+h!9^FkY#JXJ(Ez3sPGQ)66>GzA04AKkUd<%T
zfGNV*v}4^_F3`2npza=q`2u%^D*X+m1F#Xp0h&t}PB;a97fi2lzgC7<RteU@pCe=z
z{oQ%ADAq*{oA8NW=<Ve?1#?z&E3yOJM+aVo=iD5201>Fj0M$nju8zQ4e$~GLhK`6K
zPS76#Y^ZU!Y$Jrtu5CrPqMeMsNB#E^>|9y|7EGQ4AAN-NJTeYM?Sr9ZaIaT31oX54
z*{Pr~OGTl}P;zBY>~uVO;9yQ_4a)ijxJdqse<Auc2p@cpkqx|hf%<~2a8ie%=Vu@y
z5fH&67>M=)^%g5xXU+qVSy-Uni@O(pjISG~BS6}MifAm-d<lFk^eXfjCb;V1BF`pz
zNlG;TXKBv+?MACj13>fEeT8wyTv@b{P2wa*@FA_gD~-5zuuN>yZ?0L-%|^3Q>kx6c
z<^>rj$Q@gtC$skri;z&&#X>~Tizl?!S3B))|4KdX2JU7(@>M$(Qvm&Ea2ec<07he2
zsQU~I#?4>ip87<lM8?_cTiCTO@DBI*sKMtLpqn05$rRLxIVEoV*i=asJo|w<_Svnc
z#%~~jILQ3U0a~;ZxO8Nmh>P_lH%47gBCIvnb=xkcqQ+!<9HBuT{moBENPsX54p+f%
z5P1@EA(Q|e{RqmyBL85x-b4eFDy-4<x#X;v9i-nvJAv^$vK{^C0X&q}iW?7-KY^|_
znwxlt9i*-RM%-JY4vf}7xB&NE?AjYh|5S@68C0bF1VY;YH^Lqt_n!c(WUW(dInU-?
zTqPkrhGrjRZ&{=Qi(X*9Wb+AQM2cMwq=>PZM|Oj|zUjRP>v-@MKt*L_Sc_Qj9Hu#G
zAtIv1IABhXH&1{}2zYZ~(+IS!(YZwbUBnKMC4xV90PF=+N!SU`d3K={yRC0uKsmLo
z(D6C)2#^@&>YjqyP8iqXqz&-Ysx((Hu+mmVURedH(lGR9h7nV-%<%MrffEyd@n<;`
zE$GeP@W35Y?)I8ND3eTP?7FzDO!UkzVdra)PcN5%Ul1}^3ajye9F)~gf(k~EtvxZR
z#pz%3DKNs|wbb!<i|mLOM!#axlf|5ll41;+()DXotOTg!C0zs1R-0mqHC*Z6Kc4U<
z_J2H>JKA^mq!0E0b6zmbN4`aH>lQ!?m2%bwn$;D4yp2~*!*fL$58itMJ-Lz_0C2-;
z`prK0yv*>}C%QbQWo~{>B6ZP91AlV7QbG>zgFI}Q{)J(r3Xoj{^G(0(X?zCX5+V~$
z1!FS6q9(o)cjVHXVP#0vdd_ass_Qywv2K)a?JYTy*J>dre<o+6Tk?I^4Ik3pCi-2-
z#KXw-aE(GN&E*U0x`uc$(?sdfod{;Wx82`}H%68_Ju#<`swxgvo4Q-89$PftbAPc_
zb<fD#<gTmM1Xt}}ftx(c_IE3i7S98!GatxGcTO$Nbusj_uivsx#iA5}#WV9xP`2DL
zfA&XniF6pXq=4*G;Lb+cGBdp6MQZIRG_?09B3L}H-Lh1w7*qMWJeR9gkW`R7<j7l?
zaNGtBy?0@#WW=pObCzcBfrSY8EGUa0h4x>zc(g%=*YdacF%_Pz(CmwxY?)-2vZw3J
za<r|GGHyG_S%d8(+qxdefpRIdH?g``l?;Ans6P$9X~J{PXXuc|8zAA?aqZ|JAD)tj
zlcMunErFV<`YivAb^F_^RBjv1)IC!glb^jul<*!UPBhAPae71+O3!xn!BrFUqAMQ|
zFMbehgpGX)?@#$9BvRm}sq$6HUkQKXwQ|*qVxb)#eXdg~(wvMBgHRUQLt630^C6i^
z{g}01UMi?24Uyf5K_Sc)D%R(}DKnDp*{5-fj$!JlvU@a4rE^DdWuvO(Up^<GR;y2=
zXPjPmZ1rugb7@}hYeL#DH_bGJ{|!a)rnr#Zvx<=+^1`~ZW`t?TQ!-z_j3Z&TMqkHG
zi}*6D*2Kjhs6$Q?`Mx?qr>C<?>4x1Z{Lrn-x?1sxk&!q(Ho=YVK<n(`*b&ID%#5qz
z7hEu=*8}mB=PH1b<rIzNDAi=X_f-Ca*OyOTwOWMdO5UfgDG&FlHQwKf4&!(FUK1AC
zxU$et9Do><q3czVTI|u5tbf4C_F{c`?zg|*!n&gaNrT_(&7?dsf3Nlpd%?p+zY|Rx
zc1h(ip)hFI+M))GxY?gTn%KHGE6AR1Yjr|p0cK~>S%m1j>D(k4=2|r-6L<4FoCO3@
zRc-`r4iXD@9G`E6Kw8rl48X+6VF_8<4sYFTgIF=muR3aD-Yn3F-oJq{V~yv=cL@P8
zQALrS-w;=XRlrXS<RYE~&Kh=rrZm|J0^?2m6mhNMKo<GTW%W4$!sMxWt!*A}p=MO3
zYi>tqPN63A+e)IqfFJ)3M2ZrQXW}wwfO}Eeaa%L|ho=<R{El)vNO{YDpLZ<@p?3`I
zBSD=ewn+4#g1X_z#=92<G_UNEi_RQ*DHM%8`a1S#Ml@?+XSZ8CbEy8tK)FG>4yBjt
z_-^k%uc3}|n<keGYhS6CInz^qTeTmI#&kZ5y!Xhrnq&`V3907$F4Yp4k~O}QlBmtp
z4*H%Av2R)Ub)BKC6who3|C^PFS@+zG{eC2i$qER*>!&A-u${}E&wJti4ecA?8LZ1U
z>mY8}Q91vvJu7~v@}|2IwKp;7R--<{*Ouc9v-@)W`B!@{X1oWV;=CssHHbeXs__{9
zUZwOXh)QW;QM6XTqlx?HGO(JPa03%RS!`q}NPK<AF`9ixfLB+@Og&2Yq4U!5*dwMc
zV6i=gyDpoWJA%MUe-g3~1-O7|dc~sT_L@gV!Ye`jNQW^-=I1P9n@4<}>-Dy#;}a^<
zQ^t1RTPt2t(?6LJ!<!@_zaitA^7U9#o1WEOjZdm|Z_@j)^?U02zd!5X2E4{}&iorl
zpP}r`acjyVIR53~#f7iB{W3hbDQk?B(VEYg^8p{Jy<TCeidQy|T$R*gYL~m%gw&9x
zSAk13p+{Go3FhP~SDv&C#d=xUDNJhAE=h-I)a~W}_A7bjx(^s?tNJO&8dTn!pF!Ts
z_T9j287f&#0rrY!!JqwI@$!`}y1dJii3u(?TpKMPOW)N#@iJJwy+eY}8Q5ii-V_tp
zx$rcAmgf7`xLqMLXS)!IXO-JnjLhSkwj+^M`%i6;tB`xfzXN}+vy4qHv)%mlGKXy)
z?<d7-2N;?>s+{fF9pj0#dZ{dW>!%aN-BNIk<znxS***|m0c!YDVKQzMf5BEU%*o0k
zTdXMcG?-c8)un-q;~S~4y;v5+NBLo0-!EfB*r#j6Cbf-ZO&^llrjLVT^0h;9SoAj}
z6cnHR{iC9Gh9(7VvvUk!$6g7}9(N<V*dsrv__@5@wFXU<F(HcP_bc9cAy&9$??gR<
zX<z8PzzB*(-$=_BdO5-V%8d3>(LrO;_S0TMW(&szm*VKE!uG{~KPt|^d@Kh`kfg_^
zTT-(4zn4G15bQ;Pm|K`PR40#PV^mA96(28=R`_6S5rqU3Z#VZVSp0FRL*`q>#KV5E
z*WZjNV!!taE#c<{=#AI*GbF|Ap1<!1XD+?ud71R<XD|EPNz%;kZIm^7uU`}svr^SJ
zv35D6kx+9|{|t$7MY=T6TA92odT;mPWy+VwQW@#r24X;G8&dut;j;pmkUFy&^)Z`A
zPhA2x@s3NYWXp;$RH75~Wo2@lWQEIDzL8c@@GrtZj5&S;tePegfXkmN6UY^`*ZXGP
zb~&d%yOC=QQ|}NYW?y28ku`>gynFOB=ghCA7e}(ulgkd)%)flCrRBKFH-JjpkQBz|
zA7$=c+-VPNQsWhH>v75(K)+k?`N=%<@x8bKx7HweIs8U*G@o={QYDQg9sEEQjkmk<
z@LDdJ0LJ2*0d}Gm$C>{ZJFQx=!@XVNb-F$|v<jZK9bHm;-qnKoGA#ks62Fzre!glM
zLP-84R+nTUd32hUm$dCIZ4($zlHy`W*7uxUQ954xF7321r%pU^G6TU%Tw$xu_4}`A
zdFAV+yN&a2yu)TMj7BI3Y9HJbxl6wOq)V9H_3rii-w;7=g7;MUSH^zJv4&i9G5KTv
zRYbZ4yfm1uNJVQg(}*+u*QFy?NVAUjl0L8uiVBLidG5iHR8r`qn2Az$36jb|8357u
zHiKJ|nYj<It0}sN{`f*B>J>_U`T0fYpO0oz$d8YM^jZd88lx)}Y?b!&<o)0mv0byR
z7cdK_=-h%9^$NuCw~kT#e*R>F&dVK3^#Y~SjP<c{X2+v{7NUM_RME$^YfaaUI94d5
zBFm)(mx)hKwf*gbj8)I;1;21)-g@Dp;&Hb=<VXdX5G3&FM0qA|bqVwox$~ILn*Oor
zUqbh0ViGDvgfTb;p_H1TSj~%;#weH?*NbB4EYI@gb2YI&x3?ls-mytt>gXgTP9fay
zn5$=nThcr07xU$%$EE+{%i|(bBTHvideLornc;Zo-V?Ml<6CtLo?i^vpgR}J)95*A
zO7nllrI03+X9$CRn%l@TU*8nk)Os6=ExCUsCTSesct(UEKmwo3np}F#Z%rupQor1K
zG_H(I8TmPMAKemWU3@n4_Eu<ZjCC^GmF<RP=5N_8f3FU=Ib#bxr{AOgpzYMPuKRAB
z(p|0`11%oiT?cc=AtUlbNRE?M*r(F+!btsS1V(-3#R(bB!0It56Z`JXP{!g$Fxe#M
zY)jUwoZ;~Bt-%HTWdb-8;UT9N=;nLt<eVhm`QWjD7XKr;c0JS^+YmJyCeHJRQEGmy
zmCa9+zGme4ulwUSwWl`}SzYIE3VNWB=3*Vk#J+9T_Za0N^Owg%n{$04H+GyMUQYgQ
zG<AM_$nN#SP$zzp;H^Ke*OQArzJ}%c@pihbY#u*H6J<O9k_g<@v>~3g<onqE1jPIX
zfA51gkW@w0PMHiSn9}P99!-3MK=kaoQa)t^rb5>?=wx$<9kuIOO%ST`Z)CUID9%rD
zw4AL@c5>EbW^7ZRB<{Q0dP<D9ud~hDeX_i<wtIh#4t#WVCm?X`c?ILKWNQmKwz}e4
z7vuwM+^(sE3S-#J?@TfVs<&n<V0Ij6s|=}laD6afbKlqWS2Qz4bihX3VT5Mi%Ue${
zuS2izrw*!~le?r+>c@0vV9#Fv#)%6;Ld+@wpTkPJ=&gdEo!dy;0{us!xWr`yP>xbP
z4Kx{1F`SC@`W20xP0Ol`n|Nn=<`DAuijCbxo!U4R^Lo!$Z0jHm<%tGcVDMsFZ&l^#
zu|$smcl?Q`x-i`quW|Gp=e!N%SwzJR*3G+=3Th2nShhk@*{fa$H5&#htPc^*=PEM4
zTnnu}gq>fVt^c%VdpNC;eci53jks)V>4v;~G7WE5fK0$<%==}IZUx8d?O_#n<x`@+
zuJB0}>}S5G_3&!vnp|POXScb`c}8ow7Wcm7nXwsBtWdzLVO}oTnY-IL`t90jz@g1J
zs1~8PtjIDdzqCliQZ$#)c-uHUYf?p$<(z+JR=7d0w4!LsDjLyC#OZPMr#a2<&SBS0
zeU?n^%R8hc9vAB0HEP&KxT+#+ZC};5(9zI&A4jsu(`!A8*wxZvp{TlDXuQs`dCf{~
z`uhE4ib6q&z850T`;Q;YIm_9-a(`hzu$v%p>?mcmj;veXodDLV-x~Z3+Mlj7u{^c1
z>kVVitdKJ;KRV=W{639fQF?E{U!5F}YfBy6RasX3{AZcpeQTnY@1K8wBSN=-?_G&n
ziWK|fL*=*1xIx0ybNeF@eF>aTLDSg<5XNf=`lj1y@gw6;`0FmSNuVXx_ehhXxwTWt
zBe&R_!E}IWlV15MJNNKPU{&=)S+KSb4xXFv<SN6Sf^atPHu3`{tR{@{OEcXumC2=0
zZoD2}fWY!Cft*_#L`m0PJr=VQD<pllh^z*md%sTNqHm35{lcd5pi{2f7v8?M>-Dnr
zcJ#UOzJ17wr2L(HTyun1#y_#CjF~qU`-VyD{0tg@zP>Zj<8miz()N0kQJfDiPb!Qm
zv3yNNSoC4Pj4{!M2AeI{2I$vO+VGy;AY?}eN}gi-^=`q>J~Dz3oz{Jba*JQp=N4w#
zd$NY8D!5#vbDWWNgL7D%d8?aa)Ao4*8OENal5Wd3pROaaL(}*8#!1yR9nCR}(B4JJ
zzES5TC2YO`jxXnNskhOS970S~VFFn^O8ASDH_s#_j}apD3+|abc`euSpyV=YcyJQ<
zScU?NNJ!)K7$ClYU=17d;li}hG^JtvjUM;;xf}k0t25!sK{Ssha@C}yC~wjV#&$&y
zG#585+1rKI=;qw+9agL1D&G>Gy|!}{k@i-eO6yC*CF#t)1Ze2tFt&gZcnHGMXTwaG
ziZlQ+hG6J@%Ef2<)=eUPoJbaB(OmJmo|#>*+zUu%gN8qJ|A_+`0o3%wxnYOB;y=ph
zJFOcZNxjjpObp$o>SGi=U}?w|*ouVuEf3C6286)_XDFP6y2@~tbeuEo`{@w{gno7G
zzO5u1y+<r?E^eVN-7&uPo?)txMO$>iF)Ccp!%fZh1@2{^E^4FZtLC+*bsFAny&5G$
z+`*dis_I78ONy5ntwikjj}_{~QYk1R+h#T&vNc)1kfzT*yEZ62q#>*4ESRY(mF*qG
zJ$!g2-Kq4Rt%_W6xY=j4;7(Y@F|iuq#$ptgaa<g8+V(Ti6(tHSv*bNR0sjvoeO+es
zoyNa_;yTpMU@LLY_!Z3OD|LpJeF9AKutkR_T%ztc!HBm7-Bd+mm#=F^z{@B|7(TZk
zDeQCzciajk+0y^E#7h$5HdW<-Zi%hG6M4(W<m-=ZYNFiF9Z2u5mb%!vx0g{<rV-dv
z_Wgv~zpAGqoz?wEDb%aGB;N?A(+=Ftjx6gc4VQwdTHZL3yIusu{8+@09K5Vq!zIU_
z@Q5=7)}jQk$fy>ns~{>guHb$53%d)Sp0E`zYh5q~{PSNxYcENcivrY?sni)z%KA^G
zDw04?lexGxt_>HR%qT>+zI#(nHyj!HcZjC>-3U?EeX|$L9nu%Cc-H=Pe<Y^iWr%R-
zFtPRFXffz{O5lHtIi)tt5!A}EA{Fj{s#E&FA9|Q;EMPJ0p-(iM+fkM*(HB)xrh-#(
zISL~9Wre$9ScS^S$!kv(e)=Ks(E`k8NYODMn{pE#8zAYOzTx&O;>Z$DlIB73)&}3)
z-5Xq3A#=ip$jODfKkmLoJrI;xxc{qsYzexhWc&u;(j*%*rbA%Xi3}Hg_M|N}fV>sg
zNEp##Y#gH9j$XPLyn5#;_4<<wc7B|*fb@hXhNp1ZZpi|&=7qBeFRi?2X?8A2gDjy*
zF)8>rYG;lApD>sWEEwQ9BYH5?E+T};{PBQ1v4mGgRRAY>HaEy4ejGyrp0MO0xOcSC
zUf!U!U!hUlKd&tUZ&=?PPPsbl&}!q}x%DZH=KOAIy?W_m!Pc;tXZvLo4bW{2*ZK$a
z6jsu(Atwgp%0^&qcz^|^jNDU@V0Y3kKKk6T?D}u!aB?oerx88oiqvkZo%>G+?)Cpq
zp==hSUiv@?=a^qWGAnPfM&;3%ge!|&Ur(>_OA?vfj7G5v6_lAUzvSG*pBf?o^G(%y
zz{qe1C;W>nV)t$wpgk^8mfP2tIKI$mso{V7N2gj@|F!!jQFXm+PvKCZzlnXcW^wW3
z_q~^I*^<3{5ogt?)c0QrVW&5tw!LKrAM#w;p90h;q!ME7dqAmiB&f|=1+M0k0Ghxg
zG|K)_`ZK$JGrlY6Oz=0ck9@|xV9MxKIXTzalg#29w*F(GfBDOk7*8_?Ir_#zV@4Vd
zC-VPfb+ArQoQD}xmh(BXG3$@Yj{W<-KQ8r}YT>x}!kulB7wy?t!-3jrtm>;fli0Tj
z#VVdbLatygH1QWsl4k`@UmQ$6sGdl7ufJMA(&jK3<NiC~?PbL<PNt*<C^Fb(;36vY
zsww1zi#LV;?M=IxjARQ!I?VdN)h{3)jK!)DVg)nQ37O*Fi@l~cG%snZbO+K7vdjTS
z<FK|<O(zVcV&p~OR9X=1CE~TMEmcIsPMi72m#3`%>Td%#`xmCP>x5^k$_M!+5d<C%
zl-I9ot8ojnJL$ne(l#T+WjWtSGU6{LBg$Iwo^NClq{tv<Z=X6Dh-6Vpzu>U5CA&Zo
z?Q<1B_cRUmfKf4kFBVggFaWZtx(_t)R3r5DuBlPsdl7$D)4>}e2DGMb-sk@tW`)sM
zny~-{0I{Bbm&nV`V$#Hq!S%psud{BV<7US6(E1)eIjA57;J5?rM1iHirljuwJz$>1
z7p?<+=iu{GuvfAH7V;msQRIF*rf)*L_;xzri=Wm3NG5K4B7kn6BS|k<E^YP!F0qvY
zsC^Qyw(qFdDoP9Tzk~1oV#9tdKK7^GOk7tyZ2N=l+Kpm9Z;Md#dqQh#M9RGN79^PU
zkXSad!jkgR7>n$7*nMXi;0A%fe8Uo$Y5r>A52LDp$!+YA-U=ASxeW+f6uia-I`nAS
zzz{V+H^7_wa|0M~ElU)OTOj}~Y|-x#=r@7C@Spt{ce?-#=7HEFbk79nA$kSlKJ>UE
z?xVw!&jS0+^&j6%>WfYLxcP>@zwC$(rKQmO#)Ls3v3u+XU@G$x`2^Sqz+4LOgc!E}
zLqhWgVbe4n9b%0m0(<c3a)(Gb?4gm6u~=zC+48->c*bY`*b6`9Dle`%B<wy8TWm#R
z=I}x60;p-W$WF&^{(FyHl;UEJ9y?Mhme5~%2!t)|LB$CD1sK}}igG8s^!tBEzbb$B
zGD>u%58%8?s$s8EOftqcW2uw)h9cCZTx?*n;BP6<3b)02aJUq|lg{<v#DfVYP+Jwe
zj%0?-#jC-@^VlPxcgtarN<*FwubiyBVeX@8t87_xxtUY5z2;7X8>Z1|Y`6*(^uHXz
zaoR6X3zSmuC9nKK*aEPu!*PMt1;u|bD0R3Z5nR*wCqXwN?Yc%_f?Rl>INDIBhnWA~
zwyxzs_KIYXkoV-P<D=+GrYNjOPPSIOz{2VQZ5Au_Vkmt)TcJ<cgY?Ioax7mHNCq>8
z?q)<kxq5Gi%vvX^o+UGkZe}cQYtjOm600YmxNs!L(^BTz(rSCh8qAdMm>b(UfQhNn
zww$T;%91eb_};+ZW_1;495Jz7Pzc~zsRIj{m$1!8cxVw2zMp$s5-cE|dFdX}fLP~!
zMRjm~>fF^3j3M_wYYlA|)G+)MgY!RWF<gJ*Z;p!}`9A-m2i%_>K|Y!=%J~-0a>+gy
zJb#9Jna|WEvM27#{))2Hkz5p29xOLGITAGZl|AJ;jXg#)`wtv9wu#>`mHn-qk1L3W
zeEx{}XQ|eY)!iubJA)Fxhw!T}f)4iHa+tE*9Krb~-rok!Y|NHH89fk9fNsI4aJPG^
z_+RHQhMvki8J$miq{{H-rr~G~*No$}*;?!Cnr^eNsov#@RuhxIdy{IM9dW>Q{<>@!
z$h&|dFGBd*8UGPPQli8keUi5$@HII4QicZ{VQjdG{6t_YmP=G3A_Qz6fJ~#yJUS`v
z7n1v^cA%bzUTsGw#{rQ=$h;ApzBoh1I}k2#${WxrZZ~y6-}supVo=UE!$8_Ru1*2O
zVt+?wFjV<*&8Na(H_@Wvb{l2^aze~Z1HMNU(%V&%v^_~w%t?I7)!UxtOK9}9N+m3p
zWFrA?eND@1gS7WS&oVfx@0p$w%^lhiwF}9vTS=U2Bt0)7SO0qd2HbYti-#iD6#IGO
z2-z^gxCRnpAZIL_E$G^89eOnw;%a=oz?^QrK^^aee1Gqp&w-9AcNErkd4>gyxPjOw
zv;gMbWhK=h8@vc|hkMN%J|tkfD1%et+>+zmJjC%ZUBo6)$&*8A>8Cs^bV;FlLm?Sr
zAwYkcKe55;5thche;H@lgaDyyfP_NWf;_t4zD_)uD0xMl-4B5clkaw30R}KlL%}yf
zm<tDK_Wb-&HKa6%=k~wrT5eDeIVI^qM?}FPw*J@Ng1lO(12k^JD#|jN@IJn2XnO#4
zaD7~Zl<sQ2qCY&ROo6`Bm1l<^**ugs;gPuW@WTfARRa;v-eQXUN>iM->XzA9H++OM
zwQUxFl{jvBFVruV%xjY%FqlVt>woH1S6DlmWKBOR8#-a+de{3~#CemWLlh$Mv9iwE
zPvOq+2z>3x4f?>j#)1z)bj~FB@8+(-Ik$bmS~R$}h5_VQi$mQhAo6=0O4Fz#n_~El
zH++RC;{yfEOo=d!2lwPa9WJD11Y}+i_3yHkUm>|at{3N%lJAQhpw%zYnVxVFISfd#
zNc>yN__Xy*n4va6*rfCXvB%-tT#B_Z6pw}4z_n4JzX$~IFCr)qAr~8Oe+7Mtk}LR?
zeb;3<J>HnT#SGigJ^1Dr`j|Qfo;?8;JTTWFh;qNOOGZzgXLV1I_r;VVX(<KHbHMm+
z{J6hy28{y?ob@CQ`rj`lv~wWHF=W`ac_@#gwF~!G#22A^vA}OhQnh7rf&qhy0}W29
zitxw@R60(N1xY7B9UIG`8UmEDOEJ%=y|^j{aaELX!~Mrofc|lbyakNjkj}iOcsvt5
zKHy>=+IiEiSc^}x#rKZ6TIe_<IEqEAoI%4EP?i`LDm^0{3=-r5o~fD0a$8LX=V8&v
zpilV-IBOAFIY3Xb0ImouBL)h9D+K97E9ObL@2iwN58~MxA|&YF3A-t+I{)pV_9vAB
zPl-z#8GM&Cj@591;}<$LY!m-pI!?0F_}QNGY2owKune=t@bi`6{pEI@O`#Qwhjiq>
zW)K)R03$76F@=*#mT}vOjJ~;%a|B~pn@MP)NA(C5B{(OWC;{~EK+<QB-6TCNrs6Fa
zIsrvwC5~>T*JX4JQ!GtyO3J4OPmc6}xCTqyd{lh^$+iJ520-%^EE$%3=N>2F)7ei?
z0)Kv#(g`9t(w*^aFUQ$asE=FL?wcVQdl@dZ^C9J4MCAGj&kY@A&W$aE9oC_Kw*ES-
z5+K|Ce9hgO3YU7jcf)mMG5;+*=4*f|jplcPi5$g?{KO%m_rkO+AzaLW$-nbj4M>@G
z9*2$I1M_BJVa78@VpsSP209^Ps&B)b>78|pIk1~L>mS2|>>6BLmWa)p$hKx)hgl--
z+~=y<sS58_SMy88aBX%oNA1H#f-Mh-a7gnqC2)U(nKS<ZurOd>gY<s@-=!OZR3uMc
z2W&N+xAqXTHV$TtXC@P^<>NH&+tZi@ezFzavaX5rUw$-)C$kW5nrAgu^G@zd4_+e@
z0GFNu8Sy3{GzdO-qYbV$zF)Z%S>^Hs(bfIRRF7j-cFlDYQ#)((+f98YvP>DY?C8-3
zE&X=HQkkJ&JPL&Wm}KPV({@D+TxR{u8Mx=Bwsv<l{p?#tA%A16i&W=*YC&#&+TCcQ
ztDFfZqg|=b_R)^`iHDcCnJ3|n{`!CH!_pbj5jMdZ5hqQQ7ViF*-XT7PUDR8kG>nZ5
zo{m2bf{}w!mB8Ns+j9u{ollTjHzO{qd%&MiiB|o2o1On+xp_d)L;edj%Rr0y|KnPv
zuxP%2;CKman8}9#hiC|&8LC3$;@<YJFeF}un_r7Ij{x0xoaM<fvVH({Mv~s?1YRx7
z^(@6)X#~d{bH3enKL=)Lgu5W2pTG(iAgr5ZUP8@xt;1~GTPRdZ>Gy~dMd{-aTfxww
z*cvUtk2C0IAUx+p0R#RsbVc?(Y7o^g((B0_L0S0;QzV68r@f6n90=7P0wd?hvwTt*
z&3O*Ucc67WU8~FLquaOD|KCS{F>p1Dd(?k!D)WkiX>^T8ec&rhQRp8s!;#gg$)Dte
zA2CB1;C+`4Y&k7JGddLv_$$1(b1OYUIhQTy-pwZ+pswKQ9n9%<fxFKhEQ43Xm;?pI
z`*S}HN$Jb<gw^xQxv6NPfHYGfj|D<_4Qyc0Icb$3FblIb-p7uwTI<8K_eTkTE#VD#
zi@+^*K9(T;N)^DR-pD${^@0a1w`qA>I~M1B1*_kIh7&LhLp!yV0CL6{eM!k|Mpg(W
z95#F4ZP&B{j|-5M_6J8$oq?Doi25(Gx=-v4w_ngl4C*6xs0)l7KuU&5;FN3;vR*;f
zIg{_H@7U;L*T@hnSaixND2q=7C?$A}0kJxYb(ml*TzInYZj#q8dO$3Z0Mc)P2_>B4
z6Chd*sM3E>Zp~?4`FkR7!Yu|(kN^|Mlg$4Ry^SMu+6?_67-@&T-L<C;w#%7eM}!i7
zVSvNxzX;qy#P;B-z`D`_bn*~i7a+RYfBOJ>@Z=8GMwpf}WGftcK`e2M_^;{+!wK4m
z5lCYpX&CwYb_hnc3%T9+P65DkII-iKk2a1;)Q4%(eo$_pD%$#CtT)dYQVG^ixM1!^
zj}PpF|L$cThMHeE>q!Cag%afF`+nm?Z@Nx!k3&*%*NPqBXsI-@L!hjN^`s~YN7y+t
zp^nMujTPY-nOYsfN{=&S3NC)8I{`+fu;c*DmcG^t^B(P;&m#?rD;qJ8YYi@a7Q!tV
zec;LmaE;~);ImXJd8>psks$5;?w@q<!<^OD&e{Y=Q+&HAX_9Eu6Mn5HxQ;hDkGTw0
zmLJ-lcFP71??h7$(7eLU)U~HNp7C;Q*<=U&DxGYj53Ta-$gtTssItb(_wPTOXPETt
zoPTF%QQZ070Ov#K_l))Q0lsK&i+QrkwFZO@SKDylvGeYzP2x#^-EJ+Hhtygd_t70^
z=WJNcP~pVbD2g&MCFQvT$#)ePyG!~f!)xzmFT0T#rBGS4fv38?lGnp`+rt_)mm61J
zoqaWDmKu|O2ZC&_V=mYaxd>nDu0Ualw4t!YKG}u|_c_ax%_y-hz4w$w*KnJFgs_rK
za9})^Ci9Z(Bi|q-+69I04)HYBu(Q(MWg#|aRKxoq4m0QCJ;S&=vs@avk>S>TkkDG8
zQ`(9{e&~S;^Xt{U`qRIPSLxzthxGCF&Y*5s-_<)D7rwwl4g_&7rG-@iM*qYo+Uvyf
zn9x)*K@oC)?v+xINWas2&Wn+VAR_2_Lq31;DVN`f9K`B^qxZo65GiktQ^op(64$A`
z9;IY^S2j}5S0r;KndY*t+xsp>UemaFbNgX9)_vjHY_c)RI67&UXIjlqfq&;|{!0$p
zxGmoNmnyN2nzJ*6vonm|o_vl*vX}%rL>oYf0(V{EfWA;bfi2ocM^hR*gFG*(6B}Om
z^$Lb02Q|;tDyqL6Rj4ao@>2~8e}uL0d0H}4O2LkAH7T?+xyf@SfTW~OkKhsEVc4DG
z@SP{8mZv{m1~xNKu52!a+&b=kY_*v!znPXSQ%(AePPb%jDCuogE4%9&@v}8uv<TL2
z4rR?~cwhZPm;u-7&f6-Ip2pgZ?8e(lH>_6-t#u9H8(&PA+tE>%1RhH*Em!{oHv2Kq
zXG8O4sMHkZ%6D0H;Q0O+|4aCyWU~knFj|0cg3eDLEqJdN7ZD@_n%`F^P04`>J$JJJ
zA)G+<{NfW-XNh&dKt&?kNF1-XK=BE~JkN|VS_GrrfAdKYA2CL2kQw6xhI6pg3-&`@
zN@=k~u)`u$a}G%tfc~cdyu=_o(EmYl4H$=bwOC!;yn_?UD{6>ft|&nA!A2dQ_P?O(
z3>S0(qaMhj4+lP)V4!G-2M@<k&V#wwCLs0^#A4hNIzh=9dLG6H)d%3$IeH2gC^5oA
z`;ztOR%8^QX{7Km;2PLU>Hn~Hf5I;ni%wMpW?49u`VB<jU4W59#R<c)dWQG~N*8C|
z@A9wT_!Jn|*S=w8(wX5lOj}_^e^tP-2KBs#tM1LG6PES~*)JmD(UHdwrX{oPx;kg0
z4PSC3y)v8gdOkev>qrN?mX2JT9db9d4I0ecV5!Z(<Mw&fL$#gnGFvt@eQ_MQQ@>=%
zv6Fkbzhyc6D`FptjN5Qcm;uf!kT?U-NRq`RoIJ33SOTz{LBb4M8^@i#RGtdkXQOF4
ze+kSUNZoOr$)#f>6F&KyYhPea;t<M)Yndy%ekjVGnWpY1KY)M5zjt`{+BR>`$Nd@|
z@!7~*P6C0~@#n&u6H~5V;TgvzDpf(R5pDbTaFT%CGW79d%q4n{1rU|F{XFnu$MaOZ
zx<7fRCxqXEK#x{JhR)IP`@Z|sm3f;<a7zW7eex7T!<5wuEEl*&GgHsOwM@*ON0In4
z5Q>8CD|m)MIjO5_aUf>vDos3uV{<kZxR?gL=GU&Rnizy2fboNQh~r@IQf}<g^JbJ4
zrEogHP3om$`(Sy0i0O_gWri+6eX5{N+xa}yn1Qj0hs5KchBDGiu(1FsN$wWm@3}+(
z=`U7XV3ti`sc*E0A`;-J#{8O*17LTX4vSXDLY#+dpo0{!Qjsx@{gEv1xYxT?=|yvS
z^4ie$3BHhoNcH=WT?YE=zezIeul#nC)zTuKoW|^&YE{-cKXO9Jg!x=ANLXjTv{Lt4
zPFTmkd~F+c;@HlU>tdZ*>vnsVTZ%_vJlpba&vkCfq0R#Xh2aDd)6+4@6MMUWN*3(o
z42|SoRJx+54<}P{jMS7bC!qpGwvC<h!%o@y54Y?$0}YP<?HR;DM=EWXHntP&f7`-1
zGOQO@gQ^l*nc800G=zjwfGsyj8Jq&%qj1-tWpOyN7DLy;k&(+qcIggXEZv?N3Q|Kd
zS6}cPonc;5xE3rG7h!{X6aHuAD^Lm^FL^Wj7kB*VzjHJPB-Y`%l1%a6DuDz;?_d;-
zmRTJ$WNWm&=w|wZMsErIY#S|Wj)Bbe0L3<X%JilY;b@Y6zsW@{M{lTUo|A=GkE>>^
z&@w|e=i$6>SwD@Zn%(W5W9`9IhNM#~uT0%{+S$Z@EdxJUNmb}LhNV3ZogWQs@A2*?
zG=;1?JxVT0W&I$n)ufYZ%}=<Y|LwN(^Spjs7rsu){Cw>5XqRB{OtaPWV#oli-7vy*
zJ~!11u}o5HOsb1mtyf_p|KrjqjKBUrlqS<^%lLIBhs=qpvu?K;GhmRXwsQwh>2vk~
z<TC9M2(zf%fT8%ZC+WKSG6nOB8Iz6QvHdJlxJc^W6i#h=gLNOnIh;=rUU3MH*E~3Q
zXY_{>Ht93J^V`G#Ey-UFAEI{L9!nBCvIcpb{eIZUsnN2#s2U*jv)+{V+nSVCi>H*A
zGOoUx(U=LdyN2-tsdszf-*}|Mn0u-nFpGW*inrLX4l-efm#BA_>7sRZ3&5ld3lxoD
zcU^Joa`9lFs-XGVxSp$W4#r!xHPN50r-+42slJfoV$r2c)p8E`K^sj_X<_JM#`VBR
z9aZ?2koLHDbJ>|HQksbQ>VMc_j&l1yF?a-<nvEcwm!aA=RkL$!{4g||!FZ}(M2CIT
z{bs~}e<|FP;3&&71$zD;bSPEk`^dn$)KzugwfrMtmyg0Y)6UHU+WO>e0#_MUFWd6$
ziLd?gKg_;Sdw#Ic_|G{JjUubrGW)CIDeX_u6fw@>pI|qtfvYx|vT5r1M$cC9aO1;m
zWwA>OVFKZgfI~n7+{Df1y1#+c6uRlg5aG!NVJ*23!b_0#{ku@#zqKJzv{78zp28kN
z%j-i%lid;&Gf3uZ$tU_5bTv@>N$WS-GI75VJ%2Mr&%Om6y6GuY-{{Ga|GQkUVE_1!
ziE0Ck`U~9aB49M}IdCWi6Z8(Ps5J#4>umAhp{EqNme_0lgt`B)Mt7#6nwy3TprQQG
zMhn{*wOkvXE5OmEdb6giiG;CPA?vH8_s5xnFqV!aISQ)N*#%Tn!UK}f*lDPa6bC5d
zkh^$84e3q=YJ4>fM23GN+0I&PB&$$7pT&AEB@Y|vEtlAoqVorMa3-;Q7~<V3<o4+j
zZJ8yAWAwQImKyJofOIipwpmyZKgJdR0c}`KD_WZ8E1SP$&vO(OD?eeQ2M>J!@>gEM
zb;O5|dWGaW5PJ$`k{SNx+}6m!Bi5G7tH|6^2(mK>_sCbJ!%wMx<LPUIBPLYq=uGez
z4GBiH<v$_SpKz(!PCj}|oxtt;Ww+oagO%0>|9z6N9OW`XaJv=t8W{cDhv<OXJah-E
zJ#y45wf(g^g8#j#%u_knT%W*iMPg(MocY<s-;q)(WI;AhkQJBrO4Ezl05TI}Z`}o1
zU!qfDz>HH2c4&(aXbbsH@=J1*s_jH--;3a^crzh1Nab@%daAd+b2`Z|?}X3xAo>zV
zWhOQcG85!80_Sdk`yh<Ig)w@yO1O9H&608PuY#XCD3qZjW`$qwe4Gw2<T&!FF-7Bb
zE*~-NBXC~Z*f(rNv0)ytI9!<9cCn}Y7S)Ki+m_PZp&6PcKs3OgJ@fICrnu0v-DF<b
zg=GBn?sJO()pvHxuQ}zMiKyitaG?@m+P+)bu;Av2&S}P%S=Y&Usz0Kk)ggw<bo8NA
z>gmt_-E4?oSC=Ibq+-|!qvqtA^ksX_!pt$&v$9Uq&@U(-v-_(@VwlSL4ZnO_B!Z!9
z*=-y{&n0I&*ksLzwNYU3{8eKx%Xd1nWi>dzMSfsZAIJ)LVT~H>TzE~Nae`d0#r=q{
z;f$<x96W~V;$$@oF52pn0{?7vA77oNDY#0QL!@*$NZoj~jXGp4(?X8$h4$axh9_~y
zk3-aQ71X1;uP15E9RxZA*jvY833_U(^m{1<w2aqprZ6ZkQ%EU%<j^kA^}6M5{CAlU
z-~dq-=VZlwi(KI1aq}~vxIhWTLIxOtdN=N@l4stN?v-vtWPXpaG~5Ogn;5iMe<oIv
zmj2#6S_w-d0*eqBE^t-OJs{p;-(;C*{6@6G;s?u`U?-jic&HFRYM*PUdQE;C&wWbh
z5Jq6n1r)w-g%u%B>}kd$>|{}W8pDs$;~JaVT9VOW!l{a0QB1V0%anv=)dX~#I$obz
zoCtVE#jj1e@OU0`ONH2`oLw{;G0Zjp2sGTT8Vz-LfPKFC1ph<xA1<ZQK721$NV8}$
zrHVvBq5b{wm(Iu^(z#%%WThXc_AS4?lqW<m|3f>U&#hcVsMPU$ebm*VI4jux8v738
z&Emb`L8iVT8S-f@40;xUv8cgCtS;nOBt2Y$twE+R2IqRX#S1ZM1*#*S_h_RH+Zr`a
zjlZps8J;@OzP${<ThCc~>TLZillI1FE}wDs=Ns?%o7^X<&DmX!9}tlJ`1T6>`SUW+
zqlUZAhFA|l!@eMYh(j>snu?jQe1N66sBk{uk^-09d8a|i2-S1zm=7o26MbO%#U#Z|
zYFhq0EB9YzZEnaXCu6f$uD|xcxZ@Q1GIrd7dwX<XoA6UGC27@sQ#WYXt3SVT%bRNZ
z)qjW)z5)%4@J+x&$Q=kz-|IYckHBS@?G3_h^@4c>J+F$OmovB)@CxHsTj<|L+2Hj_
zZp09r0qqzTzep69NYbx)5C^m?ivP`DjZdJUY_(pg*AO7q-=@u_8f5O!vQ_q(xp~*N
zL9@;&TWOAupS47-oHoU>A_=Ph%l6^yCVK$)Q5S{IKJ7`fdF<z_jTK>JSEUHpq&o$U
zdgC7SB7aD)2?o3?_v~=npaS=g+#*NcpWbP|<1nLp_v61iy&R+-!s|E~J>jd+zv7?W
zdPiuw^SOa#h~aGRb0H4)cYka_(LP;ICp4HVcQMMaH}+Zot3>feOH|;duyK}%gtBaW
zpP|gRoU2OPgA!Mj{;}e^QH}bI7yBrb=Gf}b)U@3_C5{XAS(@DN=}$|nfIe!^b=SCE
zXl|3Ka73QB7Ik-K)9p?3``%W{XqR4~k87PzBG5bwBYZfv{i@BN?GG(+Rb9Qm#0_=7
z<59tf8J72UlGGv`CDtLbOZm{~{?)xv?vLr7CBl?;CAg_i0DLyQjZ=tAfyGtuee~dF
z(^mzQ`{CY3W%j=dSm#q$c~8TV-iU7CfWngigX&M*DQ5Xwu?7=C%Cn23rp$k)-<shQ
z>!NPiAF18GO~Yj5;e?e;BrR2jZO4HXj$c_eqpam2ns(+zCl4?Fs@Pc8SI#}GR$u8d
z4&m{jzj45K&1zN?YhfzKhxju5y+<bX_S)I(0oYGFsJd($Cc$tX*eE&Q!7}Fo6$@O}
zlpQQ<;c|c{-0Xz1Cd{{;y5-aNCgAwTkc835ht|^Y054<y2=SPwYj+N$7S-ywxgkwL
ze4>{U{s?Erf6NA63ld>I6v^*a4!E!_Xplbh{NovV{9V_XO)7?qpR_fX8-Fxd2+!nX
zIg=1!)v=Rn|NihbHLI~$IwkqHU=qf$Biq&mt4-Z@dM#Yvvc82-Ea8OC7PTyE+|T!v
zR~d_cdr?Vbn%r{A)V*aKsoec2g(B+q_oHxo*-p)X4@2-!FAwj({{Vc>7cZI8Vn9g^
zPTb;nunw6ILvcdD^49Uht^i0KhFLCdStc(^I~TD!%0!brYV#PN{jNZLF!VCkt}szX
zoWDc)kyO15EQBlgEH_CZnuOPX3Jo7XH9q>CIB#yK2a?I@Q|Lyb*`;$GBJz4qCBpXm
z>J93`f?JxK8Q1s<6vOMKdnm|e-ww1DbLaY7ss}zFmJ))C!X2|3$F|yEt|QobaM|^Z
z8<Qv&x?&mFMmhx)6yQ9N5^x*lB7Kh3013kQ(cP{GrhC^b51w8*)u^D^lpmi>$}W*?
zV*6kutT%NOYt7i$<=YPQ_o4Z>J?BK6M(h^8d&A+mxE(Q<s5@56pqLZ*uiXp=ubA3#
zwAVa)XD(^e(;KgKQU#@X(v7SC20PCGCB-L*R8zB$=XUITwFLI!otUrT;oO4ZL+G<O
z7XA6`y`ua}Yis31mtY@E`vG87kav3dxP<oJN^WD)rdeK*?7qOdSxWIozneui5nC3S
zW0Kp#42QpYRH`c$?D(f-X1$Gt)|K53wnYY<_o1g#YnpxS-KJ-dtiVq&EpFHgQUmgT
z-D~+<_kt?iNA=p}mBMA>bRx@G9irc{pJe+0QjVd<$zb01J57%xNuOkPc*Bv(fvgid
zvgJvK^p0izo2(M47whrX^wXpqi`STK6C9@;luUlRCj43X!0@gEO+vlF3ZA-uvK%f)
zDPFlx1xsozpW)iK9Hq6PXPj%K&(JtF&AiWHe)2=YycbhIH0j@|nIuuVjuqC2>5d|1
zn3cQBhB%r0ejG=rGiLu2|7a?=xyWub&2Huh!<-3Ys{~q_Nz7}!lWF()`0@|;8)d(H
ztKWU<)Uc4Ed6aqitBs1!;}5CxP=@l|1Wwh-!tJx4j(4T*Dpda?Fj)=ac3urB4w(Fh
zmFNC>&}d82t+*+H?H>E5qGEndlVIzAS&<rp+d%%=5Z7@8!+*O(f;Fpda`IDiuoh|H
z?Jhx4P_jw8;UP?wY*UqeKc6{=zVWBc^C>S|K9z9WCD#si+iB^{gYBHMgr90~F0GgO
zf`GA-oxQ%Eg?+-toC3vLvar(Mxd+7ta(Ae|5EHlup}sDK;uIO9S4sK@&3AHFPPj6$
zG&ptjdlrpOkk=(xTd5fBSKhIMJ($1yJsA(vTdgSUT)v}AqcCytYdTphdE&ivGq2NY
z#|o{Ry`JvM6dZqJr}e9ZE|od*M5qMzLcViRznWF`KT7^3z%Fm|XEO1ZVi~X;MD2~-
zrAlu_2G+;=1*dO7JND8Hz+xR53&1v!$?adKb+w@p*FF_^pk#-ANz-*Jr-|+5EUoqn
zj!lEFS2CVvn(ZLSePTMlR})0?-j}cCm~P}dJhbB**J^7Gpd{O{Y$9x6dx>-uo4C)T
zDQe!nrzw)CHbEL+HeZ*FHFT$PHPk^N6McT3OXRd)v1e*-e4<(|n$<}bO;WQt7RaHt
z99Q*~Zs0?J07ID}|M~~5*<zAYb&5pXrV9Bl0zWA^<Uh&}mj1pl@J)WjdHz^+RfCvD
zkanb@irtSXZiQV`;a<C`K+(VVk+0=xKTwOQZ+o$^2&y5HHp1@JR5h#IYp>teQWBVB
z=g`GF6FBWV=a)=8)+=Z98TfE*_Hc{P=-qkqTt-?1G2d)Ur~}OpCe3^y_qBKx7p5^A
zE|RAYmD~Db)RuY%gE*IY9k3)l9RFt5TbaU`X=M|v-?#sK?QqQ;U!Tc!=S5v>`)W!$
z1*h4kh`K=D7~bv1_@;v#+_o75&U3=UE|KM-_XfFGEHV%-@={U<pHHwL=b>4loPd4w
zYF_k;k{-kHwdR96cUFmpdr?ehBh+;!q06fDp(A)KcP_3prbsWm*P9cm@%5mMC-oo~
zp!aSzGmSZTI`F3Y{J0$mDc`4qYuK@M5x`;v<FJ8V?S_KSuFKy~CM2v(NZ2Mjta!$G
zw(KH&(TPE)rQp7*@y0XWvg#E7Mps4ZkWtn!+(|GZf9Dq<;ErXyYzW%?=b(l~S!o)7
zt?1pn{|uHC4})$qVA0OlwM<Yl-2ckHj^sw^<EfMq)YA!%X0{)mfh8h2cLKMGZ;HJZ
z7AH=uDEC}yv#3{oA$E8fVLHe2R4ZTLozg*sWuU$x;=Y#HReq21s@PFwr%*$8K8qvq
zPofKT`)NM%!xq18rqXc+X*hRRo2Cqj&T{zMw;4vIm~42CIPZ;O8cJq7%@~HnS+c7C
zrLa((>nl4sB?*<?mKU=7Qlt&m(l1ws!PFn@P!`V8UhW{%S<iZb*p+wR`6xrCm<)Xp
zQXjY1nUdvFyT%icG~QFok@S*Cn1!yVoKM|{gVgpWnfLxW>oiXH$RjN$fJx@ppG<w4
zbNO-YCP>N7#3C)VabhM8SC4XRZn`n(?5n#XF}#x;Dbap&>A@lzDT#;k#H<G2gU`Ic
z=omxt2%oaWe<I;!0nOld+04rQUn#LBM7hMqgI^lhTHGw#`gGW$J3#>pg0RG|6k~C(
z8<>AgSpn^!AerWUO$T**Hq-k6%VnFtV{FMLe)JD3NEX$^`LEWz8rD#zWmIPg@jaj{
zjPmmosr(ccR5up-^r}CbPoZ>IQlOi$z<OlAnsD)e#saE4aj$Cq%6D#D!AcI{F_3D2
zWj5@DN-}_NL93p|D9vy{U-(kB=ifBVT*Jys)8y<Xwm{*#9lf+B)mGOt<Ej~Q#n**d
zJO-;i$!NMM;rEwC%bcn#K+b2Yo$V^V%d<$>!ECGW7@~r4I4>jE#`wzZGdKv87H;QV
za+~Ff_R(MHrz|(WLHWwxGCRNi-g2De=-iG~K;=&ba}t@e+FGdZ?~~}r$5K>pALl3E
zGy0WSA2yUX;jHF0`{z;rGW9a<`*D%x=d^ZoY!T<KK<I}dUV@J>a_8Hn`)7eeP|gP<
z*Cp!zkEyecYNHFgJ?>hxK+yuFxVyU-cXudKpg07VLV*@5Zbgc_LvVM87Kft6Ef6wa
z-h1!2*8MNDk~!;4W-^(xpS^!iFk%p=MU47GGC!_$W7vTbxdyP$!yEvT8O9jIK(aTF
zUEM$s4)=LI8v}cPjLhtGyH-6eRd+7>SFOK(cz#Oz5&gzoiMRRvb8W&4xB^~;EWHMj
zoUTCnNG)#u7{NVy1UYb>c}Wv}Hu0BStJRRT9Jg&$OT964B<}CuB7}o0i}`23Sv3c!
zCWK`p!hR7-d5L2n$b;VRS-sCJqQ$^Z1h{6#)kHpY0A-~QfXSC$0N&o>6*(HDkW|XX
zqVAA?K+@#3U}=mdbN4r1AEnP{YTXdx(0QmuZUVH{ClXL(%ugk0+EIHZWh&4VMyP}<
z<5o8g$ldY00$ywVx>@XJ5vM_P6GL6Y6=4#KJlqGTs1TsRYHe#uDGP#kziTP|if4d*
z)L48;UaR@F8A!^+ar)dr-^?d=c+}?)H_A%bz=FY8LP2w;`e_j2ma09+#4h-77lQZl
zcLe-Lmlp<nVS`G-sR#M$9Kf?fifog9)~a~D*a#z1&*`7u_cDF&!^Qi&@xK<}8TCt7
zn*v@vE0%LJXZ&ai8nyBp!-+eVfG)lc==*C@{8lY*qN@0_+?@X^Y$pnz_Oc}GHTIy}
z-V@(W6q9p_oq{y#b94W|wcT#zBZrIJ?7_+jf<N^HU6IfXMF2hh+`qfEZ*1d$eh`HB
zR9kWU=-yC)`Eo&cAX#^yExT6xx$@kNc*SMtj{Z0Ie5B30eFL@^_6dzd%ebBZlIp${
ztt}6jF6E&&Tj4QE!wrF}x7eN2PZ`gt+wQY9-5*)+Qff!C(AonuI8@hWF|&?pW@r%P
z@zIR_EI57=_-MOM+E^F7fm9}1WWBQR#3wW*B``*G7=5yITmB_mU+hh#ZFEKyukIY*
zhU+(Dng?<AzxDz<{2ZmyT7R69-!6QbVojOo7M>PmWzgB)TtVl--7FZT2bEsB7cGy-
zvG)z=YXU~wS>_&yN0R>AtVMtvV9;;{j)Yl^jUc1oe%SiNCTAc$!}XPV=YBPcjYD?h
z`tp1@AM-fI@}vrlwK|yT`m*l6Os$e8L<&S7btn0O2kS5&fI2Le=!Ncfh<r1|O)h64
zJi6PY&ZoKyX!={ZJwJB!eDq50>iOod7rw9zuZ}3RF_I%~t)2bK+l<3EKXz@ac*|8m
zo%na|D9>LIW3AU0?D`S9)`Y&i*n8A`Oz=JG?uy#?D^LELrw8iAH|wcn_Yzc!NzrKC
z29ym<ts#+@Y1J8$J|Vg!cO}dE>sb{&x@pr%)n6rv=56r=1q2Ke*i2|_@J-nW{{;zp
zWs1oZ<%OmRtZvA9C}0*@wVRmSeU-#$4KMrsgFwYa##V=doTLy8B}8aG6xQ(#4MZO4
z-BJpRfBEH7{P%pg^{f2lh2~41hIh&wdCJnfS3P(4zpv_nRa-T^u-alrqi0DvK<^q8
z_p}fB-R2_Y^)-dHP~IblQTsq%k{V`k*aTY<wGmpG>?#43;HW`XQ1g8R%gbeyKrZ_k
zDx#z?caqB|H5C5&V&S~RlnNo2foR-fDyayODe>cCr3xE<b>nySlblu7<jmwlu<5MK
zThQ|4KCdM~m-w3y`sSLrAKsLk&R4-`Bbh6G)SI8LTu)l%TgRev{IjpjI5-~IqJVeL
z4w|iM-F#Gz#}EtVf54VD$v5Cv|BK|1+!M$Af-sFkm;ruYrPpsqk-q3WB|L1>FwoON
ze)3MiLmAm6{>BD<#_nd{mqfQpD|cr|U9JMMCz>iYp%MX=T?|owpxrfy4i*>wU>8vG
z{3vk`ra%l_hZ&IGgZ0h<{Ai^ld<3*0m|}_Tgu+ZAGVdwyK>x8%2dn1uoLf-cVyKi{
zgh0`Rmp9>*C987{WirmRrU2Y7O9tHh0450m{s>Sz<38~IN|Qjo0B?upPONnk3Cps9
zjf+=492ZVeW+Bkj3Wc1?(sY(5IsN%p*FYx(Gv1N(cv^x3OdCezWRD5`r=Y=!rU-CX
z&{We)?Xnv-1RMZ$@HBKiLHOM7?v$4kYS$1N(ex-_-LLNvH{nC_c>~`xUaJ4-FcF~)
zRW*^G78?4Pzl~`4vTG77R$tfZb*TY-(Nvpxpnz3_i(Wve3BfwgfI1=2oRuV86=ojd
z4PBUf-W-~P$X~o1kHm|dooBYpWgkV<K9SS}-}1L9ID@)o1Kc$^w`i7+^K|Rv&7UV|
zXgC8h4i&vA0m{_5ujj_W5ii-IqY5388(gE-0}(6}3Y6Kon%)^o*?l5%s4HRkBx!dz
zIEY`$hu$a1nLm--2c|oL7(sVt^c&W-9=%{_-`|jsxnR($xae6f)kXg2nZ%^&K(aCq
zwJZGV3qd)W{*m~Ig~sB7VQ^8rq#3p1$_Zs(H{R%p*DzZ}qU+)jQTI{ai?_FyH%-%c
zjLtR2180&Z-tkPtg73v>*QgTuMF${sf(ix~{$rre3TAw8>+oj2=BV-X_wl*S^Y(=1
zsg?Ws1>L)r&8@zM*tjsyIm{8!rHJKMwifx`*@f4RD$NyhT}D<Bf5hJwKhjt8{!&U%
z!{&zQilg9n*(})B{STEn-Ob(c*r0jJwV!(v$Gj`~V9N6KcS@*K`iap6^m-s>p)r25
zF9yHCG=@>QQ^r26Ls#egTNL9&EWjOD&6qXS+Ivzm^W`EneD+K_!V-;WCDW3O{_xTU
zU2r`xaGvoZ6{LGprPEVs6wj-ZR8!Dxr26iPEv`!{Lsdy1<un8ZCK++L8373L9s<TD
ztAN3ca{j0M1+t0!;6~Z1ZXwG6%eWzDrnA2LSxBU5K{gDH+K`VVoq7-x-%EMcekUFA
z7W2ELOFs2(f7CnPkj3Yr^WouU8;WvCe)heH)8xuM3O%${HVcchmtLA8f9+L0g4oqZ
z#5=}x%Y=7tn>aPW!)ESVw{!rps#Lm@7iAM>gKRyKA3dEBxyH%qFQNds@_)2qr7<)s
zXH^n)*`f1-?!*Bf$*I`Z<#@HT)zeWDoTdBdVuho-DEp2Xe%ZRORecwK^H7hyRC;ev
zB;j9v&(tHp?OuPNb*+99J~!0nzR``91Sbci`RR8C3Hi-W(tApH+l$`c!*l$v1jeY3
zS?_+t<_XqZ{EbX6^RMHI&kQa4f%&`d_?_roSap8kX~ICY`Hb(_!q0)X=04X4-+eJJ
zzUdZW<9~LsaZ;oCNcJ7v&G-h*WmL84<LwoRzrkmgPpY{6Cgp)xfgcYOKdYDCz3*A@
zO8Fp4qy$e-(%oL3Zn%hW_$TfQ;)WvPVkW&2IVVpKF~rCH+_pw*!Gj(3EjV<05legs
zRte?J31C|@YGB=xZW2%@LLV~t%MWjmCE8X}{5x*%32H^@RI?DJMuv`$+VFQQAZF>A
z24Zmk!S9B!FBBD+(WxPY({XOfUO?*vujDR)=3U?_Eor1TZCRC`%XPaNotxjmKLK~R
zP5pj`jOB=QL+PEt$4jh*()}?))One$%=PH=P3{_+YhNKd9M#=PovJ^4w9ko<U2Llh
z$~m0aJewZ>M)qRh1^*kTPp*gILV58&!6USAR<zuup2)Baq?$O~yy`Gg$8U$&8^M^J
z&pgGVUOnflH_mFMo8>Dj=oT(yKLz9d#Chvz#v}H2m;bP4?s#fZ`IIPeCQ<jo#TtA&
zZ%FOS&>??amBJWQf!%+b^~%s|dCY{KN5*FN&*G0J($W#wb#$z=h57Gfif8QdHPEat
zsohMpbqY!L$LzDD?UII!H{jDtXZx%E{*83T2J5k{cMNxOs#QPZ$sR6bo{*+4)`sGr
zed$mk7~>rXqATum^TuJ`nq;J4b8}7Cq*9zR{*r=slR2r-_L!$_k889gx7Jks%1!d4
zy!<oEq-s3{y|_V;UIdetJ+VmxqyGKLl8MLhUEoMdn0x)z_c=J0kL?M0#PbjG^*d<R
zyfbq;Yn9>+e|v{ZkDrz%UwP)dyMsX_p3*)z;17+zGrvYRgz2X^9Y6@-?Cxy;ev@yn
zI`#x=m4}rnegp8R1nFK5{AKI(>I{ws2;8F=S@w#laxp%MXbA=Oz3RUw@l%)DM>1gc
z782m~Rh#kZRgJ*;c<&VXW98?7_@KovC)`YJ)?h5WkSg551^}{SfKrT><y@r9ryu-a
zGJpT<HGWSiw!NjvPG7xA_;syXT42v3VU<=wtiLDm<}n!L4bLyWNNt$RZuv?>U2?sl
z2JO{YK<!AMB#Fm8)&uVx=?1}58m2rUju8G^#&t>F%T?(x(3*!cemR3ao61lKvol-L
z;dB05EdF^J)z;Z)<@|L%1;N&qmf7|sW+jFFQ_g$CuQ{6iVmUVK9Y0GF)Y>S+H)gj>
zV+ocV`CdH;EPuxuHFPB}4LMltK)tG~13BB&lDfT+s%f<!62~wR;<X+N@JwvzDE>(1
zKI_Al>4;$?RSeJC^dBvO6A?0*8^*=H*<JEcSM~+B>Eed7TJgWGVA1yad!v8h&x@8)
zOw>x`WNLN)YE{AkqPk7FfZXrY|5eE{6eTI&g5kq2%Xwq2<K5TciG5Q%IHnlLXs4lp
z7glJDjLj%m3hq(GUcrMzI~?|ZQ_r}BF$=sx)T<9a6O)jF(JOph7uuLTVyi}2^Vs7M
z<E0w-msbM{Rhhfi$%Pu-+q<FfuS#v4i_%4M-jSCNpcP5wCKg|+?95y<Q`mN@@c*ry
zUQ)a?H59yJRZdIwB^k+7_Dy@w64Hnwx7rYz^`x7#J}gCWgq^s>xlRw73#RMAx=%)b
z%k8tAS!YJ2i;XAkojn>wRHSGOdt>-yG_5WkQzQz*=qzAFXIP?4hdrBZOUgaHPX4xy
zHJ<xB-ouT7OQSTnm^FqkjQ5)^FQyVTZaFVg5xXLJ@le;BTP2wDQ0qTJmW+FvGwD9*
zuX}c7@*|7v*bs&d_qWuR>_=^Rc`mDI_)Wi~j8KyVyM+A|JwEL1T@8M+Laukr{_g#8
z9c?&z5REJNgx!&7_UZ2rQ9g>J2rU9<4AM~1;Yg~-o%Rn5!(72AVm6^thw{D%_U1ot
z9AR{T@(hUKhYJn9x6sNwI<g<OE#eiIeOW=rOvg?pdGl+=S6FlUO4u{4=LJP?9*FRg
z&ovZ@;$!*y+`Jf?dwFYI{+MWj54R%)oVhD&(VA3i?7gTYu3iUEZ&z|=6#g)J`S}4>
zma#Z&TPIZzWA_id0I}FlufPCmRU}Zf<V=HZ1nr|fLt)UOK+pMS51-rn$D1vI-tlTO
zUC0E@8TPlpO(NYc>rn$iym~>8Da@)ZgYtMh^R5D}R!b*dmsU}Qn7E!=WT&qF{oP6_
z+v~Sz@<^C*SU)+U)6o{a%16Gneu@TVXQ4o91thFiiua&jGbe&YL_{Seg0})e5I5lH
z%$<_iK7UaJAzSi-WL+ZhZ`V!h-UB*wM$8pEHFe69H2_cQ$!;`ZfF3fMAm#`FyPoP0
zC|wke8miBl`BA6Jb;(rF)+VNy?V$6!)Xi{lHj*6?A4eKlb^g-Wl^42v<DKoOEAAYa
zZIa@7L9LZ@e5HbKx#e|6{VeYnP^+B9zNunfk@kJ%y^kZCTjmPKVgcz#h32AP>hJcN
z<J?I<QYju77uCm9TWwz(Z;CkVHr=+e5I)2(2?U%uCDluMCgjBj(~s?gC;chLZ}<|n
zbc?RU)QdCH+%jE7mYL;~?03nkR=x{dzj94wTrXE`n!}=z(ZxOE3V7;e)?Dbot|5_#
z&aAfY^|_k+>&QHV2CZu0r2{(-5Fe4b=7ql$O)d0tI_3Wh9o2#)y~0W@LTWCTVtwy;
zka)h4Y?me~#;)Z_Nmxxv?c0ZifpwFOcJCS`IF-iAc;KwuuCKkB#V?&?nOS$Sxq2H*
zE>3>1oLg7rN_4ZHbw7$UN=eYbGhJnA6k_#F%k)0mM3=|IvUm5|QelXs=LosXJzzya
zE2@Y4hn9++xwFXWw2mEDy_?{#2?{K#$W5Ol&2nDKpms52aOK3q;%5%xicO!miaAjc
zQBm=|dlvy&hkPhr58dotF5S2;_7{eF9O<z+D`eN}ZVRE&-WL{<9Wkasbu+a)w(M{!
z)E+JAsJK(p_9ya|FdjU!!?Fwqbj3wVMU_Mu-&rXOY@2^kpDvLkOY=23;SafsePMAO
z$o$)cCUtN6#fv_i2T}eZ76FS135iI8<R50oY5GtSPH>Mzejk=zk{XA>$ci1;Y_{iL
z@_ksVfo79_0K9&)HKky~%!LT^Zc~mh)$aT)Bc%9^a|XYId4qR#cjJ+FThvTy07W5;
zbHY{OXzm)egP5sEun*xsWl>Xk#mu}W3vO>-`&^V_``VLiDT@1d|0Czp=EWPJIPEJi
z2RA;lK_f6%Wl*-abuRYo9`+yqUCq7bEmS8&>XBigE3ogB9r?xBfr}%Hb34#BJq>#}
zj??vfqw)CAi>&Uf9TDlS%*q!Ai5UPxX&=e%Esy|gI6kmv&>9yGt%(L5^gOO)RkOhh
z;z_;mIMKyf=<ybw7o(h3DdEUe2Ht+C490pLG0eezYKs$Aw)_!p_#B6eP`o3c(3@3_
zpbS|b>QCh7grUVuFEkBA5o;{ecr%xEXk>jpMs9pD;Rsa}z%}wV91)8N^?C$M6M-37
zwC7kX3}`J902SWw{4W`+c?zh@!y&R1Fv&b|*gX)2Y(x$6DW#?sP7m8ia@b|tuD_Iw
znOf<-25hH+)O7mz7f>O<*$Cu&Tj5A?tiBBFTJ$i(vW=keP#Sn<@*Gga1D_E~Nda`1
z`R)qA`A`ez%9LR+(=HeTIx`Gd)#2&9rS)+py95zH$^0xQY5#MoRs*df4YP0-rNb^j
zZyLjb)Z1=`ja7#QH_cS(D<P^p0Q-D^d*y$YVE7n_RTHX)8Tq3fLFy5P1U<k>n&2$^
z2II%L(eiDnJ+*$1K5aFYpjbW5@`2sQ{Lch!rqGMh^sm5%Tl^#b%o_mp5HztZ@zCUv
zlpN{+k}*sKgoFNEWB_HqzBi;ow?1M*v0>nHm^(%(EcDSL{AMN&9`dLq{fLPHe<ObY
z4i=pmSd5yQ>AuSM3AgxzR;~*(fUAD#@J!i5{iP;Q${p5ihZ7d=7nRIKJC4*OGeH{w
z$>A9BjHhkziBg`$x&r55TDHIBXSD#Q@8CII(ILPY7ZLRj4JFY22}W9kWcO&W$OkZ^
zNbLia7=YbnPXS$#_-&LJ%f7HpLQv3YW80));){Xg7v`S}31yt-vx`Qrp4V2yM^mLT
z@QPGp&?0cj^cZMKl;^|>2xUYpu%S2K`qDiS-bwz4f92=bI|IZ?0m=5DpcWvVtpoq>
z-I$&5LThRGp+P7RD1?wU0jQwC4LQ%nJD@jE5E2v{y<lVnL_hg=;1bYyf`WgwL_z_>
zM~nDJ5U8wDBN8Df2pTgNoS})-ehJC{qs6}iJQzMo(&CT>T3k@)W8_ffnNAiWB%{86
z*E_HS^juox026PF_CbV?ip*Yp76pLxl|?#sNe)-)Z=mD=r0xVy!zvB{BI|(GQ&`LB
zm-xwWrrP2*3rEw4S5^l);;;jfZScnQHW&`OL4|o;aTdyc418P_xMUBn+NP2&SUZ)B
zxs|dW3V$#57MAwBXOZG{$$1Z;z$UIPL<G%u`+MuV5+ZxGP?kfNUwK=RJQ~1{9Ihj8
z14sbt5IC{}&Lgssl^JRIy5Z%y)>})mDmD5c+B>(5N@;uZapgP-&}V`%&jCX)z&8$f
zJ({F6kn>o!lz|!|9ki`#f`8n5yqLsgE`|0=&v{(uE%MC{ATb9Vt^qUK;3dK<P@X6-
zvvJMp@l-sUHU?lOh3vo#P+=GOKqdPn=Oq}vD7H#g)Qbk~+&^<YYC7OsbOY|ofJHjo
z?*{*3Ezq9<{Eu^(WT7J?KrjzzG9JY53M}Z#Lz71O0g+VzdjM$elZ@0(xj+Vlp&)G-
z$g{Hq;~9L&2H?krU`j)s<#Iq%gF!-r3vn1pJ$B{thKR=~Tc0tAQj3{_s9~YE(&}Xg
zTENq`|3WzAvR3RJOs|CT672J6K_^j6V7?IwoKH)e2_^eb)_Ca@V~UeoDW~F6v9cAX
z%6r_W?O6mHLS2ktoKRqe1UMMYjga9YXJ&1<ktm<;8TuL$Ys@`YnEEnu_7cTm`vN~Z
zftJupxC!V5=-dJ3t^#yaXev&<XM*pprFg~^_1_W9_rh~P0fx}lb@+|u23*DfIK1ZL
z{{z3|p;?F5YN8gg#2;S7BS68kTy-wyHpNWD5rMl~1Ir%-dxZ9LIWiF(uWkT=A1K&R
z$*mW#@Gg*_@_Ehf^gG;L$b)F1!L&;9I)oaYZYV>1_W&rvn@ouI!+^CtFf9y3@c>Eh
zNx+E3k!0-!<b1{dbdGRa`on6Q;AD#QC}7KG2gvEUY!*SZ)xk=9{3R{j)}CDKy05of
z{55)U%~IpQ;j>yB0W2-15B&ZToDRnp<nLNu2KIuVq@eH^Oe>4v5CmA?1!WTTW)fTH
zE%ZY1R5Z{g<Pbk;z;|no*##<k0ipu{e-bt%Y?=1xE=~X#m>5CezaU9wNWQ~(kgzyC
zoAt|XTb&FvuxIbYpiNMqyZ#(s0XsQd8|rfW0+L^dgaNCMoZ*r=&j`D(>TKgAO8(dU
zv>KOjh<I4FIh~W;Cdi0#{%4Q1Q$7GujYHb2K;XoS%=>m1_e3%{%@Pl~b(HcfjF2`C
zROGY($M3=cNm7{V8OVP9UWr)l#?!81b%qZWOX%)Qgm@R@j<QZSzgux8V%x(11C&*T
z6zGQ?b`DD#^jh}BwoNk#zeSvDR<H~%O#xs3^a(p&+T9wdsqXZBAV4I208SnIVZQ&a
z@GZO_oM~|t_A%XDH%zFHiUpen#9zOFl-B0wSevT7h-poz@brH#4{2;6?^*Z@L0vpa
z;kT`_UV!x{_#Hh0;jQwJh$hymR$c)r>j21aup#mQ=z7$=09tj8wt;U~;PZQcY^NiE
z4Tzco26jQ8CII29E#==p^E&*W)@uY5_5w<H4mX6Ci{T&@!Ozdm&d2B9&q1oIK#QI_
z+yVmyp#k7n{SprtJdvx4fp~BaehqB32mxeJa4f)_%=rhPMg!6{Hpf?TBYfdTOY4B1
zl}q=RgF>wYcvgU~`4*N>e+E=QKB~d-veb|KSYV>3`MJKsna{oGB{=*AI1$`NXtwGE
z+hK%11asA>GG+tYx|B~gh0YB&K9{!4J!@%YHMjmu9X&?8!w9*<)7T0Yf;U|_eYv?k
zqdDXntP|)&lUZGUCOwamRTR$;X%m7!xQmC0{LF?6&qv#44>XFIH7BXgeyjPy#3I4Z
z>9~rqI@LXdu+BnC1$QaZI9|Be=PBP|HCi4sQ|9o4V<V8S-hd&g!u^=qNfA9E-{}H+
zwjG45lou7IPH$f)2gb{}8tiWsui@(&T*B(uY#-u)ka4(9-k^9fhtFETlq{YV&Nmr!
zKOYH3W4xYgmA?J=ux(;kPosCHNb;;T6k>YyzQT`0uj|Wu^C6NQib^FgZjn2<(Y-3q
z${hU-+7{Ye>sEw&-Mi6&2GcgBp8q?9?<1)`l^hAW){wOBCn?tNh|In!T(|ogQSRp6
zQ`j}Gvk;t`e#or*$KAcYcaG}dK6$mrlDR<86D&5BeBa>~d$Wf3N__-x^K$O_95$wP
z@$<Pb@G|K?eHqoP+3{xd{FGDyhEa^rtMmJoP|1Ch39R55Ks$u-O>cuTixkcNPejS#
z>gv5qD(h{u&cunjH)!$ZO<npBM+-WFlE}Qh4Rc3JV_e7cVHtA<nIhHuBng@UHQ;)L
zY9rXj@`YE#+wJ4T94;|V@ozMqI7?|;_5J_qB;o<M=Y)e88)dB*Sq2O=>Z7tFO$_uV
zeo}nXoE>BthmO2Uu1TTUv8>|Fh5Y-`pu520xm8=80#G0CuQz$Ho0l=C2Lji`Akek5
z!w7~59o44*(Z!L{&nq5(D&GWvL{Y`v=kZeVr{GGq(sJLy+S=T6{3hi1V69K<;_~tA
zH~kA0_=4J$3MA6=XmGLnxG{Hpfco7;_^w|@-sNv9+RVGIPHO*E*y6-5zgx(_T;I@~
z)y0MtcC$!z2J#>0<AJ`qo8m&?alV)HS(3ugZGwGcG#l+3Bd`Wvsy;2O%|ZOd%Vahy
z;nMuCI8@ak`Xc3jWySHgU0S@sB@UV++?=B<Z+Gd~nN9_2Z!rcZ>ok0va&15&m>Dij
zHOKVLo>F5XeXV|6cb}5DtQv~CDd;vYIxg@DW8b!diL~pDns5k`ANC$3`>g5Q_ioQD
z9EY8)_rlc7S+hVWxDJMqcqk%tKc{U_B0K54_Xw;yl91h8<pjwaH^ff(7snF9+fktc
zzWjqx_{vr3w7O24B01|39&X&;Vi$N8yvwA)lyX}k4qLnHL?aHSGqFAYU8v|=-<P_}
zJIM4NicY94a>aTI9*%y~KHUdivKwv0R|IN(=B&`@`SkXN{hPS6WNyxR#3@stZBD|R
zoP#2>nM`G(eYJkmYj8>RFqfg4Y*G42gcf^H4^Ls>QDjfUpTZ(WC0&{43b_8i^E1$6
z@S!_SKu~M-P3DKio7Ot^5)StyOnTl|x~4CbJh=Wo@q-;o{CfSRwwTLGnuvvK>8BV<
zi0wZIGps{<=O;Sd=L0tn+_}})cZQQN2+<|4#e<gb_#Tnk{nCZ{R^Ab-NLRE>_EdKI
zB6=ccA)~BN?Z=hL3+i*8C3s|=ugzJRJJ@2WWijLatCe;k*e3c%_)Ma7J7-4t9RjNh
z^jQ@Z-jqDedqa~ukT*JZ6?jdcu_K-4JGJ59pRx7#l-Kn2&3)#FPdH+9;IYOi^9b0h
z+{4MdlUQA*N;B^n3?d0m*H`RYjS~;Fi3PF^&aqIW4yu!EAKhtqI|Di4_J8GFfj*bK
z$=`H^NgRY(!b39iZBTaKj&O#vV!CPdvwqJF!cM+%@Rmkdgx5x=ah0lUsUwMT%p9od
ze&=Ebuu7J+m^v=x*Ck2(fiR#`&(6LTL25#{pQ@q$E#bq=rJYrsFEnjHo=7Wl6c)tj
zEV9m&0&kZUpRc(^0|pC;JkJIO20&n!znN^J%hTB_55FvCcvEs7JyCR0YZ(^R1m-7B
zPmfj7xm+dup*R&c_DzUI-DfBT?80$Y9P_)L8bJpFU*d}tyjL20HJCg-2h*c3G(I)l
zUfz0w+lY@=$L0>#N<5$6ao80Nb>3*A?=+UXLxm?&eMa}h9!FS(SH;&8YM`rXAb~=0
zECJC(2^uXqqWP!$pU(^aRXRUaR~wUtU*i7A^95Ab>){Va)MCKs;T}G+b`R2{3=7@T
zcs9XrKPnOK;6NM?vV$5@nRBy;@Z|bTJtZ*j1SZ|jf?4z$o#Kz-gbeQ#rx)|pf{XO~
zw-UOBwcmu+`$A#JS{b9cw#!GYv0Tst{Pn)pDP5ByM@C97?C&m<OoT0HDkKB}MX%KA
zPPgheH8k)?u^i;L2q5w-=3KVU?EIsZr>+gpC#XfZ**0SGKY9b-ttSNhLv40U@sQ#E
z#Xz#>aLxYea;H$n0g=8{*;jgHOXwy27yp*^!RKcJkwBtb<?};I1GyN#6mtAeLuX^r
z2WJLFNOZ>gEZ{f=(6dgG*)AI7!?p3oFI<b_H`70p<$4ZkSX3561FnBEnHHLMuLCoi
zt>BV+w)njK7uJB<U~n!X;+sWVb|No)`uAG1s7h+n;ubxK3b_XuY-oIcb6&t(6(@PV
zGsq}&C=ct1_8pa#7I#M#H4PX-Y7%=}2@=nERlzuNH1R-6IV|tdCft@EZzha$`L#jb
zJw2?*_*tNXqJw>&vvp}M?RQO+>#8#5zw1(<oyi4UdsBKvM2T86*7=^@HEv$NWBx(h
zoI)Z+_0$ZpBfGRNw5lvDD0m8cz~M_j()DMGQT-UQ=o#WDR0@3xK9Q_3)x8Y9A@_f~
ztxh7xp<oSkVBKk3#0>ilH%~4fgO}^3mJ?J>>*<1!6E~kl#pI!jFNi7Q+&JjzinqPV
z=~$OSGPoqTC{^`y8wLX%gG`Gu6P@S#c`rwOPN~L#xvx`djR6u_ODXeJKYSMESdBV8
zjh9k1H8eFU=~Y#o^mLu1D+45qNsQW<vC6{O9Q9@tE9tSZM^~I^v5l8*<|;<}k#JdY
zDFnEWAx_HPS0<g0u2~u)+qDaQzK(hq@Og&o--$RWDEz<iY@JxDzr1HJ)6*(BXsRGn
zu8bUiOZ2S+ul*QpRNbKApX<~|`+$YJx$K(M%|U)K-oE2;Obr8xely{|I5u`hA*~YB
zzT+nh4O^>vj$6Lu|J^`@h~U~vdvr*I6B!v6Sd~Z*lwz1kO2=}9xR8O>7nISVpG6+=
z`i{-ZT%-wd+?c0EM=>?vbNSyv4^WsZnsT<iQ!o;r1Oiwq-5rW)C8@&i7^Lb#1_cCo
z$DVc=j3j@)W$ii6j9bPRo_nIpFQM@)A~7;nX)02|F1i~~Y@)}0klTUJX%7Bj<8M)@
z`=|fk#(yXCW5xA=rfyOE*7}h9g7-WkC>O6swkgE)ze!coz|STFv+=i*5;@j}rc_7y
zv~;5C@_EnyGc*#qa+{5rtW{CPh^Fhc%xe+%oaSkIr-YHe)akQkxtx9<mKytqWrgpl
z>1L8bu&i_PzrRzw+mB(@fMJZMNOo|GU(xgDOGdsvcqA0#wu4}u;zcL1-Pg@8XgwNT
zlH&i6&e=0Q@#xmA<oX>H|EE~Ey>me!=~nx%<twytbp*Wo{xhBSs#H#6$2YLnz5ZS6
zn33MZLh_y}H>)mF;NpC$cd`2S2<k9sox?5a8Vn9>{Q^MQK$MhKpKT2H(FV4?%D)aD
zAt_K3372vcnZ2j+_N-#7t!=Ci5#(#GpN<?EqL7l><4M+jgQm?}``OPtbmXxVrzu_(
z&TzyTXxsG1IiE=6+ptw6>fTxrUH>^EVLA}e-+Tjal>!@`#WE;PLL#7Bdpo0vu)eUi
zW9c1(VUhxOA~jxxib;(;9ZmP+Z6~d7$5>@iEKBBls})aZeC3G#Zu5Yu&-Rhj@D8g(
z31zv)Gi02aFa8d__Zo+<`g5qp1HoVfcTX6Pb)Dzggk^t+aXzky4epPzg4$xr3XfOa
zd+I4R?|T1)E(>7!{Tl9ZV<@-w_3#xuB6ADzvIr(v@fe(bZc}*7Z>EUZC&fE5jI1yV
zHFMezfyv4`Se?ZVD|FcNoL-%4#87*$d04)-*)w<WrW*-e>nzUZsnRvvo2CzMpfO*Z
zz-s?-K#X(x%`T--c$B+~H6`k97NKpb#@@O&sgNlzTl_hD4oqR|M@8^7eZTrQFoqSm
z87w3zuG4_Q+t_MQ-Ktu)6p^4KP-|`Ft@E>g4Hh${HcB(gVtPx4#wHP}+t3+U7HqI+
z+LAx!hvg1FO!o`8OcKAw+k1#7`1B@zmNoI85{;(9yB~#>FM=gT9J%QfTUlf=F-sHf
zWQX9nkI-}*Al&XUy(ssEvR#BGlPv+D4&@qUL6*|0YQWz;P_!#wqf@j(GftAtO(Vv=
zECCA0Zb|FIE0A2a>dbN+E=igYqlleLYBM`KONC+fw_^|^zH}QX_2RkP2$i?{U7q&1
z?H{chpaj)l#1A84-w`P`DX{-)n%dv5MPs4kFU~2G8TeIv$5>UEf71D%I&<maE(Ua6
z4YgY^b5kk1{<t|(Rd5eFzpoub>e0~D<D15#sqiILgh%>pS36GfWNzr^Eb*wVd;8P+
zX$H}P*GcPa=|PQ`Gjm@_THS-(MW(Z-u~)TX1IoK}{9LmBwGy%YiDQAfod-5U2D_cf
z&IifPf3B|PwLYuFfQJ~vZFWEY+z#<YL*S);tdaZ@pO_i0G>5t6D5>{+*yfV<GWi_&
z<@+u`nnJR{#*Flf((B8Jwernw2_frM5ab%%)_bcvC!_$|!aM+JZ@KlD9e{$<!Dov1
zfCz&uI2>PJf;}y1-DRzr?IzE^E~4>TzFOP;lc33k78<GeDMKe(-@~+Rx3sTk*~5q_
z@zOc}z#ul=J935(51O%K`1tryJrZbU;cxBgk8UrdAPNE0(r#QIQR)V1B6=XPk~tIa
zscc%NyI>PI@j<hzQ%d0VXZ20(hxJk41Q;495~(}Nz!b(=&O<AH*0%y$;}{4OFqW~U
z!DX^Js%zvAc~-)OBleC?L7>%HiRpDHzKx?GzD1cE%;gTMJe34a%H0IfXl^}Ekn(GK
zq3T#^lI=|#hfdhnz4t+`j0YbI4shI4I*?U6chfWJTisE0PS!hj2aWN3m}W74hfzAI
zQtV>vNh;=LUc7a!Rk8zSG(c1OjE}-uH}CjUeB8=EH)Lq5q`x6Di7eHytUx-VMYtbB
zzOTmG0fMj*(S#JZuK^{UXcIjG_}iC<-{Quz_MYVh87seEeqN!4Eo0x4cYdg&Lx)H;
za+t4gfL^}<!`zp}+dfr^@0-cRwy?d;$^K!h8*P6!>NTd3W=ku$n_Kb3OP>ZcP8=<B
z)_)Fc<Inj<^=@aD+57!N8V;1@q(ghAGP7#Ao;b&mAhUaD<_QPQ9%WnXN1y_+Cca3A
zD6Cti{K?lQXAc7p;UVT5H}z?7O_F7^%K=Y{5CWhc&(#4mP1-OaAlntlEX<AIi!`~Z
zclZS)bR@Z*1ntPwaukz}+6M8GLo#9vsVKUw$%6NKLTQct2yBcxrhnE>WH}*JA5l{L
zUQukFeYBD5LnQF57VO1<g(lQ~==L7e80il0{uJO>p&K*TeJZ6a#`KD_U_$D1j_Q>|
z^`44sc95VN@=Sh1p}N3LUy#cCNZh|S_v}jOL`+HyI%h5feqOI`%Yrs7259YkJ@`Xr
z6-0;xg;zcJ+5`hkH*jWT6T8w~r9KgTi>D6mN^njJ2tytar=hwQLdyQ$C9<!hVZ1=7
zkAh@z$lID1l8aYhQ{;=88?G!VDwgV9v94GfIC0c4pbVbCXX#3&SPEz~_MuCgImu6a
z>+#+egZ}j|Ozo6&Pa4YQLKz#Aa;VV<qv*cf+D@OE2;Ne+hWF9UeSP>ic4=0<l+MQW
zpoL?c#T}u(_u%qa=d=a;p{URog}OXu0j9F*ekLNOMqm%#<{ZaIm6~Ac)vp_R;{<8q
zN_3BdB&cAxJnwD&^cm@w`OQUq1e>d3EhjU+lWB_b+vBW8hI0{qeTXFE6AkL3VF9ca
z7i9I~P6BwbJZ(weH6MdQujX$h*C{3zfS@T!FzoQ+h3Zz)gtj;0v$1nf010tVk6H;Z
zCWjr$ZSWPBZ1bG@2!--YHBtP}5$&Ye_k_EsLQCesC8UgvcK$x@IW2aSTgx9zMwD9B
zZ9M4sID#7ab96Vkn+#*~6sNKR#{+zy7PgdHnf|W!Kl$*V{xY^w>1Hz;fxN;!QbkDC
z?5rm-SH-ZmW-}O{)Je@8WN+5C-*t-mAd{z#?WC(x7Pj!UwjeslRVfYKt=w)(S#dH0
zUHhOx&uKAuHY>bO(kPcj&!wc8igjSL&ya4q@kroc{`gm~G97Iln-SAUVf+R3ifqQ_
z3D*4WhJ|+egD^!z!E7LvGOEkv%G%qK&<QDUw05!#Y$Q|)s276JyoFiM!WwTh%nsll
zZ0MFyklik5&%<e6(k<Zt6WCnNx43_}3qCK+A1>_AT8dCfT=MjLTrV9kcx=_{K&VNv
zDMP=+4|u0pMnaWAQQ|ofI2=>`*6{0kQG3=(z*|BJgw}TQjBzY?&iJ63>PyYDQ|MQX
zI{&7fS-v1NW-Aq1x({HU_WFHt^|x-B#yvMbm_@%|HX{YGq<?b@4dJ0@99pa$KGHrw
z`OAhtOhBgiL~u*yC_VBc?~!6>`<f{B3Tz{GWaBTe%rtuu2n7iQvVHMz#kP2DzSw>u
zRkYI$;I=}D<?c>MmNsm%Z3$1j28Y+#gAPh*<+64uuVHH>Cdd?Ziat1PTy{Zw+!Wl*
zI(XxRBV118Os~g1p9%PW6q1*AEX#hleE#*G(xOXioV15Ac*6Y7Xz3v&sarU>Q589<
zIM?oA`!xEd1E)4AMGtJuC(tZ3pbwJa5ykn9!DsZl-;uML*r(Y@J<DxiBTTscbY|X1
zwBz4B8=AU2YGUc4*xyMXrifO1M;WK(pL$l6R~7J;&sV`hI)dfdz<RQcf5$m`RAhdN
zm#Nm)J2l<cvMgqGeD@YSpcb2{{C{m@JWm^h-<U=aPMQP4)0Ge;g4Ck0v`*it)hTi_
zlRRVz6D~3bOwJn(Ow1{z?vdi_Br7iNP5#MIcsB4{{b`1m)g*26@DZU18#a`2gBV^S
zJJ<&v*H%N=rrGfIdwmgCa5qbEju*4q!lyQCfOPpO=@T$-{M{+96v^l#pTgUZlP7r}
zG#f$x#7$GqR+k_vrU{URv9!wK`gdW%&22d{hIvbRv}r%O>J~b^eo{Ue(hM>9>&G<T
zH_r<Z$xmB}xsYsyzBk11=?KqsdVl((G+Lriw1J4@(=;mWuW9>&#M(nl#jj-!ic1YZ
zQ@wXc$kRcS=jxZcI3ad^6IYm4-blSIq+A=>?{Y3Sidi^2^>(KTQ<f?MaQYRrK6*9d
z$)}CNM8+mDz*5Fr>eq}Cw$6t^9_D4NEc-<}_0V_ihBBAaa)$UM4QZ;|0|kP<l_G*+
zFH4}RfVOKj7bP|u^%QLT{a-k4Pq9S=Qh)SOxF(s+W3KQvI}*X{cb~qCs~u~aTk$JP
zGq3!oZY420G5+A5*EJ5xogQ*|j5|J~?kMsmNYRCGDt~%{Tg!>BIidQsoAn1%3I=T*
zeixh~S4lvY2KNIXRX8n*h3;^6!yV0kXttw7s&W^B^gZ|eM1c!lKC#mX#9=f3YcKxN
zZz(~9sffnN*90kvzW4{Z+Z{Of#Z~2<kLMDt*({ND1E|9*&Gaqa8-K$yduN$iSkP*!
z&i=}!e8Lq-&5~8<#|_Acao;^b)@^ie;^yCEKLQQtCR`r558@U-Bcxbu>wa>tlH{Ix
z2z}ca9k4R1hFDf<<o&z3_H;CkB@hG3cNe(9RyBmv9cVekS+E|^2pSeUeQSyjqaLjC
z_Nx2t`ykf%HqYVGn{MKK<M6dRWy^8oQEC5X>VP7FJ5*OhG3VW+=ErX%gpnR^yaci$
z-eh3XQY)kuTf8Hdf?n-l9|d+QD+<n2^Np(@EQu-G;He<J4lJx2O$$He<CEUqx0WDU
z{JeZm+^9nnSz2;7iHC_{P1?V}d(h-Jd!rFwD;Uz1QZ^)?rWwymiRY%g@O_*x=?o8C
zN3$8<0(>St23i|$NPEy<03Uq_D8m-`4f>4M`@(39AUeU1=#%b(e8#qn(dO==Qmyc7
zbCR3-%e#@R>PCeUSDM`%0!|Te$3v(pTQU?Uz6ly5_V}4vK@Oc$W_dU<IkAyF4?`D&
zbI_uIh>L%EDt%SkPv<@klRr-KBSu3CT1b&Kg8&G5zh@9hxsWkvSn+Wrbh?97paEQ{
z{0b3L{aq*XtwlJhh9NN__X(+$`Wh9YS*AKigADR>YX5zof&I@rWD90^Nui+7kMWOM
zo$VUCvVcxp!*^wcO!s=n9LTfK&&rp)xsuK_!GenKePm;A%0YZV6x5UVOlDs~_Op6}
zX>&-NP(}SQa^)iK>XGR0>$NBAkX|kf+o!lDjh+j>1{4;q`8scOkbHTKC%=a7d8!*v
z^G3tdnv}j<Q&QxdT{&L-5BUZq6OaGGC$0JI<cCJot=r9-kNl~}e!G2j-fO#VIMinb
ztTtZd|H!eIluds}Xe9~$bE+Lm6@U6ipDxylFQ^pque(13RWcCs!@tQ_YKP|8l%B0b
zZYq0~`T7>?S<O;Y!;}pbKe={L5tjB4m2s}Eeb#BCk3>Xs5@Lhj9N7?*R_jJD`Q%tN
zzn}~_|8|$Fc^VPml6kw}_qFaKIPL3?@voK#0j74E$0>a20Qq!=t*3k6cP!j2%CqR)
z0u;|(w>l*NVdriiiYlO4;p8@??sbZ}@aH{2sO}yle1kl((auC}cWZpj<_a+>M4Swj
z>f@_DO&+Hw>a8EMIrcP&Rn$z(5+~B^gYyV9-Irt;iAS3Mgy|OR5JpzNHh~n&?Zg%b
zzX~iOU&~~dnSyuv-#qa=%7i`2%ZEpDCXp~iO#F-xe!yACF{FKnuM4oTP*Hp7j27RM
z=6~X<6<wu8?1-{3LG7Qsg7hDSGM$9J$ILm(9gdoQNO-nO4E>9T9&8$k5?L!+CZXR!
zd7HqB{4LWG-cy;zeVa~i=ik?N1_i41!o*3UIsYh~PicSRdHDR@aqpDYlh?=Dx5&q@
zV#O(}rV$wj`stuKNDp}ckLPRoK3^(*1&-EPFhUWWE{2R!J4a8596PYy54=Js;UFYG
zfAJpZmfY~@WLLluq_OkW|E`rp&gf7uR3(RK9T(d22QZvO^7#9<1;xuU(s0N+x2aU(
zAzrLoMdZ$;-A~;=2hub!+5Q+HPh+DI91B7EGhqJ-Cwq8Il?Tk1az$;PKQHRv{h_SN
z$!vDq;m~3fwHoa-O))xbM!#6!MB|pm{O?Shv@_k19BP=aP#qicm*;-uC9E<MZ)Fh|
zV#8d}gWTaKwR7qR^z$gD^3&`mN>gpbTGk$=!NR!7k#Y79pgqAm)vSg%jQ3BkR}T9x
zQv2f97tu?K6aI+r>C$*n;{+D7N8%rhlZO@f$ixI>VyF@jbMgSHAt@MrwA^K^l1vJf
zcaC>aq4ANe7Bvj{RC?fI0%5-!FOMfi3eE)%&Pp~ShrTbuj@AeC*iQw18#gK$Sg#y+
z0-}i5tnv!URjszTNWJl}g$roo7&C}A8wHBnSt@s#JI>SMcNGasRSCme=9uTHFg0+W
z%Y>wBC9Vv$nO%0Swk6owir>(X9t|;j=3ZeSVJ!oZNsE`{LY8tELWqjSNihqaEh6}`
zj2H6`6iFWWekMw8#L2322{&jfBsFr-SC@1F$exJA$vk3VM{$?zujsWuTx0TGczqFx
zGaYy1%)xXe8_2y1kXoXDSwPDDvb*U9YP%`h;Qld}@_C83ObcNh7T&U;2Mrba$>#jZ
zWHoSo`j+)yg7)e^L76P)IojKsl*LA;iAEmb<rN`Un)I0JA9ndAov~htKabJ+!;`;Y
zRFGq`3hneAyf;JmM5M8Bb}v4Y;pstNOzWq}Y~4J`wdX3#ud>>H+{q#Ec^sE2&oeDW
z?Z>YfEn6?v^p8*0x1ZWI$K8b3N%~N8FZ@OQXqdn0(SHm25uXiv%2Nt9@ti--CMOVm
zE7tmL(hD-gxBl}st>t6#p~jf#>B6PPv`cWv^jz1%h>&ZZ@SDz5ji-B@u6ZjDLMr#?
zm3_arU^Pvcy`8rfqwAMwosRQyzF)_CrjPkC5Ory?V5s}mSrVzm+h<ilfSOck`^(G#
z(gbxFa{$Y}te5aV5saGR{A^womi^yvTiQ~-vtyOE9zyaSu4Jp)iKcM3+{W``%KAZ~
z%;w^WdeO&}W(VJXO}F@%dN=Q3o^Pl5K#`jEtW$qHxS=I-I!-;65mPLgrXBj~e0eqG
zWHhBChv{qj>i8HsglkG`lzDV3_S786|8b{b@MA~Ql(Km=N3Qk{4}ZdsP{z}RSFlW4
z)=4%xVuxhHLt47Y9?-x<F9Vta;ilwwl(;P&tXI^|XGd#c4iHB#1F5cL)&sC&JRPVi
z^%}x*Fcgb?w$|k7K<M!=hyw4cdW}$Z&VoXY7Mo)I1MjAt^Qi>hvr5zFqz1C;EDl4H
zjLHM2)GLw*c7=*#XYRP}3g-a%4O!(kQ5!dG(Vfw{0eWBeOPsk&RA@?eW5}yrmEGBT
zzn|w}G%cCgn=B5wrpa|eW`cP>l-4#!<>4f=?MqViYF<DPg6#iVN^S)xqj`UJG4|@W
zj@wWhN%U$cchJe>)LhE`fx*3CL<J@FeG6<h?veScT%M8o_UK5?%$&?}42PL6_FvMx
z9{f!sb84Bz8|82KH(y65vsm1GLpjy4h0N-kL2EH;dagkU)T_qSBIo+TZj9QEjr@v8
z{pRF1<oDmDR#y8Hiqm}c=M+d8<lhZ9%n5N962EqM=$vD5NjH7Q1YtobY5%crA*7nR
zFt9}g+36I{+FkOU`*`_4{Lur=rb}^v<DAFJH%YH99{k1T$wjx0LH+WCrgjvq7RFs|
zXDJMmkiZ`0sk%DGPt7RI<Lr04RAIaEns4tOhLF{IP4N9=HX+Pg(Dc3kCzkT5r<^Ge
z3?{z``oycv$5O$H8;nTN@khggO`<cm$I$fCUMlG`qw~Wn`|Q6W^E0lU`IS2`2p`1m
z$EgKtvAXt7{O>qZ9~Tydr(JzM#f?1WwG6rT8K#l;Es$2&xy}#>Y;FtnZEkC3hqq;K
z{sQ#mJYFc)a=@PFUhRP`I0a$Pcb@;f5P&@s`roH(%p|D_`zS*cI9Lp}bP<0aIGu11
zH3-M+AEqrrJPF6LlgN`@CqJh$fp{-zs8QOI;^I05QzIS~D4$a}ZglXYoc>-aFqBqW
zw&>wTp&jy<*mFe6#8@rXT_h-`4P0bTVlEGpDB%&y8W1XB5l!up9Qe&j28hk`^D36~
z=mfEUxv#vvac1J|Ae}s+N;42)`PC%z?evkEDI=PueYC3UOky^hr9tz#P7y<3*G;C<
zM)W6bZ%@S6qPy;~itbPNR`lS7Rj1OicMqg3qa*3jh%reG#_?1u7%1^S)jflwcNn<Q
z1cyQJ{R#Y^oFBDpQR1yato)+eS0yYxTgnvX^ZCnLIP01}9+xFpsy>TTpa;B3BU+`Y
zOLssog~Vd$V>3|4>Z?|6RVO^tGHzzf30X|%)asiVoIr|F<eqql$O;VJCc`9T9pq1r
zy)%nBZFm(ert^fK8hR1mGAEW2=Bxfa>VIE(L4~X^jD=N6p2Yp?Ljc5KdG%|C>cq(J
z@<$tjhF{lL>(XTM6=tIM)=#7H-?e~L8|FKGz5t;ky`E%B;T8+0J{-QRk^zb;H)whP
zq1{WBRyGf9@y}oO?VIH!h-Xf}^d(AXZa7W2>4Y>@(U*L@5#)uta?CD{xrtSu)yXrR
ztG!XA5XB99R@rfM0(A3LF-GlQ_oZb&3ab`bh@Cd=aiSHoAhE39aELu5C<TPbrQq;C
zmLsipoe&5dwtY*-B<{ER5E;1}924`A&+=TAvtltir8Awmsl?e$;`gi*dux+!_lRX-
z4Qj4sTcWl0yq>$f>E;LCq?@wXyP{PWi!&4nyw~-NUFg+8qT2*>;X13?OcbpGf40&D
zH?M;zrTzbGP!@HNy;=Q{7c^m=U^&M^HK(@Iq(|^oMYA4Lug6I`FGLQqe&n)oj&j%d
z@RRC@_B)*x&G9HR^V_FWUn<Oza^~*0gWm=~E6DzlkYC)@M)hj&BCIJ#kJd;M8)rLd
zkyM-8C2xh4EV1J#)*z0_W!wExLi6SOkCtywpO9-4a>!Cs!uW>=tS6r;iwAW5&gaj`
zYuUgfzhXf{W9Tl!j1*mKL%v@e(~>mJb;C)<eg)?h*V8GuvnTM)=?tMoR2nszjiaB&
zHt%rYf+P1Ox6t_)v^=Yn%aRnOnc^<mveYM^c<0p#^e&{^3+*<7Z>JHHs2yltL`^xm
zP6ls<wRZZkH_ur~^PhrV9MhD1HV=V4`8<HEwe`!;EClFWf&kyA0h8KuD7bm>2DtK^
z0Y)ao0UFMGD7Yk2Tk`Q*f@@r9+Kl9H`hea~?F0ep9KRH*NoMs*&Jsj#6--9KAB_hE
z@``(6?)l8YYiSDTp6s@z+{cE52`f3hH?Yt@y*j|{4|~Xs)2521J^Ku1t00R8he#Te
z%1r7So8Jnw8)vZxS>?OM0wUv8MXR(w@AxC0Xm@5})o8Ipq;;93XZLJf(>Izej8;G@
zx;tmo|7{{(R6k>pW^)e0YbzW|>|PH(Kc%nKg1F6Xl(Q*nh?+BZ>r^LZlc$5C(<>~u
z@kz>yh94B((fsI)6BghS{)CUq)MjHF$Kea=u&q14oZ^4YrgPb3%v&wZrD&v(#oR)f
z&FqsZpm}D>rb<U)7&=b#@ali~ddJ|%yT0Kzwrx*rXX1$_*2LDtHapJ5*2KxgHYT=h
z+qV1kbzk@Uyic8~bE<lGcXjob?*7pA-)rslV}fxWTcIzQ?YXZMf}8!4O~)z|U(C$K
zcQ)RtMaf=9{YR+-BeL@8R<$H_E?bCc!O}}LnM%i$b%Ad9OE=x6h)G~2JcFzQg3%*R
zIlF0i@*MQ;*U}2xyU9hbNSur{+ES^yB{9syRJHwGIE4%Yv>qp+@8qqNX?9B4R@a3^
zA-+t9W3vvXX%+$uRkv0OsT$77$4<$|2{8XWD)=GJkvPa5jOKQFXut^5DLk5d7ivns
zoc^hv8ahA3#X{C)s_h=;kTR0yz}!po4c*LqOb$QY^Oor6Y@%<SAnx9$a^d15jhoFB
zPr2ETrqPf>--?1PA+s_mOWN<Siss!$3THx*`f6^8LpqsNUS8CU7NQbF7Gd3ycwG=X
zdj1sh!#&-ilFK;d35a4$ieQXgsUg-v0=$TN@yG>e2Nh^2HRKZ(_}?l#t&sv=1$Q@?
z$QH?>^M<O_;tZmlsKH+Y*=BCb-kq>^XWV)Xwx<^v9;elr%bBE^M&N>BxXbvijt?(Z
z`bEg!Z=DDM-V~9tIw=q-Ns_u&3vF|J?zLurmSA)u+N}2%h$=ES1TR;j%V`lc@&5u2
zj?E;4?Q&@(sXvX4_}TM%i+z$|=u#aT*YGKf;ne__J;7~mAy)4lMl8PeGY_3s;&^DC
z<2mkntEu-Xjm+R7ii~oRDeXi9ksDq8r+=zX>{#a-4|sfTW^5gG!!Oxo^;PNRGwb1A
zh}07MlqD)SFr*TPSWXEl0CDFAKIQqyJ)XqQ(m2hQrpNS8RUD*s>Nc>_DUn+eOhSeY
zv*$*sY+>~J&gx=F+}LLgsmV5wLt?us2|m|*BrlKIUBoONTvQmD)AUF-W1jV|W$!RI
z9CfesU#qjkrw;FT{ZnsOi*4hBmlVDC;C5^%M2Q|A<odk&)h-+AL<IqLBs8S`bEbS+
zO>`V~c}Yp>4qS^18#;gZ{VT#afsJcZTUC$}gG(_D+XBh5NGHKCr@kb_T>(F7hJtU#
zP;$PRX2iN8h*(YLt&#+Mn?ygL=<QuP<uva7wnR^=(WzjrnHOo0^IoeN+w^P*jKUzN
zD1_4u=arb8m1C0jfSOkXH`-N|QSPObZ#iTCmK_&soAhIcR0M;upJjD{N^`l|#FUv_
zM|^POdKibUsbNmuo3L5Mc7(G|Tq#q8j!P@^R|@Sc9JW&{#jR!jG8Rj`hryu7dy_gz
z7sy3FkgT=u==o!E8gl8tYY*I#SKB~(2IBB6Tx2B!ubkg)ZE6;yjo4hC#P7P52;*N`
z0~siWIDZ<G<--yK%?=)1xfhK@bK^`1vd{^>gZC~N_D{p&v2<lDeHCEoHt|s<K_Zp<
zp+RTPTCzdvnW^jc=Y*R7cW!iEfxOpwAC~~FE<ft$+;J>Yr32S1Ef{*jM33qG0=R|Z
z(ah<yT1eOZCEi8yK6OEXPC-r1B8)K7+<eU^AEUuP7`tk-*~QJkaG>`mc;(CS5wDL+
zS!PZhY_!h0cPJMLe%6TQ6*-)vtrn7cj4!^Ni2HH9XqYZJjIWb34ob6oyX9>YbQ<CG
zs-%(aMNY3P!j)-KW`Vu+`q~CI{{`fhB`Kst7Z(c(YI*8;*!xR!r1Rt2(pF}w$CvYr
zuAft$TDD&D(Da68_kRzeU>=(P^#G*SkGnjxGZMEk&pL-lD=m(zMSx6kpSV)M1+))?
z0f6=l&>->CARW?{NaFJ^+kO|3>}MtRi@}GyBG9{Nk>e8A6KL;zbdf>;nvUf51t<*s
z=z>Hq-+5RE?%}Z<edcs<rGO?@F28eZ0TCbkI-h_j{ajBfF&C<t8$m?;Q?<gyZHMj-
zZnXgI9@R>x#&FrxknM>p{VdH0G2i~`#g*nG!z85J5lH-iZ>4OuPzn86LfmaC#d2?o
zL1wn5`~eVRUavePXlsBOvbG<~oz)@won$%4C7x|nVYMYN6>P`HWDi-rhMRd&L6csz
z%P!O59=(Dg;4eWC{3nWoJbWm>#=^}=h9N?&Ia^39-Q9C(_BFi?aU%G*8A|}?0ypfM
zx^mp-Vh%Bhg8NSe<d;)0wM59Z0)+iRX>0b%XuehN6jc;+P8EqJdL)W7t?R|EFi&40
z+4*OgbTcfOjWeYRT!egS;P-Q8$bgw;sbZ&_TdFQs=5S3#^4T2I+C&oiA)2yNRyc>4
zL_0qjdMXl3!tTO{xvbL|5zt(^3m|8n>iU_mHxf{nVeZ6#3HQVCcF&{K=^J1>NibJl
znKNKX51A;wChA`t$p&cnVQPlt+JP-RaJl%0Q}=>wz|?$#(XjaM*e!$T?KvA!$=`r~
z&o&vkKfRSV5KaI}K7~LgP{u7e)*$~gRUrT_zO66PE8Rf|O>}3pQROAgK*i&C3F`E<
z5Zj>DU^(o%1tgK|m6H)r;cRoe1Su<$KoL1ku4HKa^eIFD&G9GQC`XrM_X_QhfXiCv
zEsW}TyC@Lm=aZd#?1>nIX5EOa_PD`pTc9JH6lZQ-7TA0YPUkdWG7_Z^&eUw+tldLc
z$%iNgQ{+H0z=_iFZAP_jP<9g<J<jDU5H|}dKqeylHvcDF*`uK5eH1Qna4t-Q%DE#@
z0+=U3mU(P5xmvGLP|Yikjn&-1K#SmgYUeaW*?T9le+t7gLwNj-M<m8$kRAj}ij|3N
zlO&XNRxw{yO|<KO@P~sBC(9iIj}>~%RK&cKTf_ow_dRQolrW|6#Xi9V&55K4d+1MW
z-On7Kc^JIR7<%gt5@n>eoPHQLO=sY-kpXxN9{fB3j4uftLH~C&J<;+Dc}|W#uy#{2
z$^6bc93_~>y`P4Un%SW=8EQ50?Z4qUD}09{$SU>rus03+W{pV0WgcY}O{RbMW{3@^
zSL7cWO>;0Tm`g60BmAsW0Z{R9KzH4q3QRd6tK?tF;Xb@{lRmr{wv|jJ91^-tgE1UU
z5Lw@%O-^W^+;a{Q1fY9u%Kn<O@`ha~uqu}(F3rvsWr<;lG1Rm!92-60L(&(#On`P-
zqZ=T|Jva1twE7rJN~OH;?n{A2oX&gZGgjTm=y8DSLZ~nmvWQJ7cj$e-Yv~fg#`4=^
zg7(2z5tzzQ5_S2}Q|P~0kP$-M`iRHOwDcjc1a0BG?-8}Dbl1Sl`&}`p?o?jup$IN2
zYVS-8c57$UIgo4*2}ahC;YXRn4?50*K4Jr~t)uDCL#PuC*hE49{?ZLpB$A21Ep2FR
zUh2CxK|LW>JzHb+xh8yq8z&MH{G7Gj@2m^08`9s%W$RmGFwrsnmPi(<QH2TlYu+61
zzMU-z&!8JlViw`5t9cnd@EZEK7X{A!y3hfYY`vJO+RSr`dk-j3@EHB2pzVpMMERm6
zM!qPODt(QF#93wYQ!&K7lN5WJ10!F#g5S+?pzUvTd{Uz2FYaorS?wBtW+7P%?Rq*M
z0Iy73GNf(6`(Pu`zku`9Bfel4keC+2Bmm7d<tD->JL@Uf^dIh)^3!Aqb~&#rrXS`x
zR_Cl`jrv+2btAqf5J6)=fCP>Wi37XiCB-aB1yewXvLxjpKThLntJ$q=CW#^i;hR9k
zje5eMav7cKC9Gh{Jcnb*I+uXV<w9Zvj<9^>TiQyOJl`>Jfm)BaG2B{|*|RMWV!A&;
zlMdHTF})7G-{0p^oA_jCwYcJU%f^L{z#{RRR9>6Uq(cr^y<_5-_i3<pMRl{akPKW>
znFLOCVHu<YaShu!fh-{OeFwbd0!@N4_}m1ID`QA`=@Vc`Yuz7H)$2wZ)ZOTGfOevc
z9H{7$GCA<(*3kt71K9b-o5Q_qmQV`ec3Do_(>&^{K?MLbaur+X=hC0~e|+NRA^`^Z
z<bKlkvpevyove}Lp!|sAXfIXK^{L}QW_=u{)s~wUvC`07fO+&Bjp&yXn^k9m3n-sR
z-eU##{*QUeiz(2S0{lzobeF+V@>aEapw^E0K7d=h-UF9hyJRv~H9q6{Eb{0C_vpl&
zUp#0H_h>`oO{Qa?IGey1Ikd$Qb(BwP*(9gXT>SEk@iCrM4?>^`{MppsvPSOp?JB09
zyL?ZH{smxN`y?D;zmWV6GzQHb)Sv;{Gbw*=X;*<RkaS-FXIck9Cgy3dzv*EJ+fJ39
zmf$+*+TE|Q9%^9V9t{CGa@fDZp1BBimrM^J26V8{yJQ|BiT2E!h1KZ?1Aia^V{|R?
z!~Yy6(-GYLK;LhM{<(yaEhhSW>iNp0V7vWsM+cW9#>OgP=GcgWyk@%F|3f&VwR3UG
zc9k-QTX`Gnw`D?5&ZG+w_5B60?padseviMq`)cK61^jE!Eq3|B3nF@l2qB8=xcSkN
z;Ph&rlu?$ucb1~UpSrpOWp$5YVm)ZARF=*y3|z91*opDeiADPMC6x=|g7V^0fb^4a
z6Hnw7#Ou~1znb0&U~C#l5LPYsVNJv=|JD~>D{wHi_Ih{b*V#GsaSDLULMw+o)|bI|
zTurLkoD5SP<o$g;byPn%qB{3!%-}fU=UG3mJCSjrL2<2A*I}6mF1wwCy_ub*8Y)y<
zJ?euAem4Qh9vJ>ta<3NNREu(j?Zx#L!ksqkN#c}!lqS<eqo9X)4jDskFiGmtDymuQ
znjchy1A~jZl=`|k40CPwTT9oqlNRonP~#xrF7NNtGtj2#ON7G~i;o#EUgUicZQqtR
z$rgX>n|P6K1#|+_myru+9})7f>UAmP_@0`od~?Nxj+eHG_6lhGJ&c;!g*?4%#}p&h
z(>g{-dVpT&($GkR8xui>wyuK6lij4nwIMY7QZizsY}YCG_+9tpteXnL-1Mbv#@G+#
zfHPVyIi`G9M(9%U(Bdxx>4hhW>LKzy<%<R7GPmgwnUhbCRgT;VKfM)5a6Mwr<>J9u
zlg>HxnzY4fy_T<S*DLTpE`QO&otNGOp{$LKFu-XBzRLdx&CC40FFesj<(v3L(oUu8
zgjH?4%1b)?=g)!e1_1rMLh*EiQgQ2|pxQY8Ey7Pq*xL$a3Z7JQ&;zM}F+nK8N}T<D
zjO_>#Dl+^y6tGb!!=fD{%cAY~&1=Rf0E~CU-n#U*kHREs@RxXSv5m{>`2(lU+U6$L
z$j{H0Ns^b4qa$Qt_N*T*U!$Wi+*J|((ur^Upu;l9c1rcte$9M>`GWf<b97W}puiAT
zxf~px?EpORG?fw9PRBs~6*zTgmyh%@H``-LIV3y=?~do(v&%vB`Bf|e9!30+B%jtL
zZ1^&4*uUoFMep$)Cp<<#TK3rIiPR_Tgv=jV^b`;OSD1uUdv3w~+wkiuG@*}F7q{Vc
zC}e>mE~p)?ZHNVdrWESd17MjN&`-eX9f7RH$vPHI*k|*QjbT>=BktbE%&2pt{X!K{
z0~KX{PLa;mwo)b>o@9~We&@p4QpiRzpj<yl*86zp!i$0cJ8HA3%*$(%6I))49j_Kb
zsYSbhiXecPM{|?Zu=ma}HIL?3;tbP<KFmXz1<rQjQN(furJ-L#Y9_HU;QrLQAx9oe
z%*@(Zg<V^4#;rI870O*J&*ore#zXTv%DN_Ro`@d9Zu+l)52gmGVG(5%UI3E|;_DVG
zHl;u);SC-Yh^8ZlzJAwTKUI{w6C@k4ZTwBQZTQMGtLsAiLf>70H&*0Voe|QQmiM$S
zC3sIi`iXy6fq5dh2+PK>ktri>vaW?O#j#?8s7Stk)N{_3#B(TO8erw=G~gj!|9!|*
zmagd>qbYU!^q?Y+ChVqwP%@TM4@A*}Z;ar-d-F>b)2g%+%7D!Y!2gKx_~K9-AKPw1
zC|B=xESQrvRpF8aEjW+_#s2(DA%fZ4{EGnoB;KD4Y*epJt(%9zSohtEmb6<fN9sON
zCQEgd^c8|I28OlKPh2D58RIO_+1%UOnuZAIZ3fgy_^<~q;DbG-T7aK3E<r78RUy~Z
zYf^4^R%SI46nb$SnbtNZR^FQAV(x|{d9e~^xzv0~xbln*zdjR)`h}t&ToFx7PHe6t
zfL?;8pCl7s@W1eRI^w_hd|2>ZZcx(Z6K41$7rM>_rL9?LkMcceW}65<hPdE+47GGH
zMw3hthjb?tB&pj+{8QYkxF%&pv47S0KCV_?fUk^^@{Tq8Y*An}=5>|#v{HM%E460b
zL5jF;a!R%7H!zn~6*lGR6JMgT1;43dD-%sko?Mi~4%`vKk~B^W0+NG&AiD9DcE`ue
z<GBPLLXan%UV#TI!uR_j8MZ!z`!16M?mjFfACd-T7EF|1mP>7}3|JgQ5p}5h3#B;1
zUo-pr8HK%a{9FcOqS5!t|Dotk&@=HrX17@d$d1_bzr{<8O&_om2Lw~FM10x66IiZR
z$2nJ8V{58RY-xfmAhBPaBeCiEzZl)|UyQy5k9IoFesQC3O<dQAX&1b==AUBpUyRN^
z{D<}PUyNSyZv1<d4Dc^T_a?rMlPk5Uzu;Z7d4q3SY1%^3dx*6)4E~;Wv+B6@Mo-9=
zOxQE}&@!4mOJlhXZ;WSPb?1w?sBUp@xY_Xe0A)6)lLVT~%Y=L40;c$Z?)tGtx8T#f
zd-(Su_{9zzPNXcr7r7ICXr+#c<Hr8ry!P2&s;g!TGUjXeBTYJI5ZYGRbkR>zgbI=(
zPNCj!O#KP4q;xm9sVEmy=P~4*^<XS3(VS&c7FGf)lmC!(3W<yo9wCtzRAQ=r?{rS-
zxa)&jV^fD@7uL&0qu-c_Nx#_K%eEEriugRbUGo7;tdTfV9pTDj?lqziE{I)C2*K%l
z#zjCua5FScvE-Fh##Y_e>}@w#O0wR*05~xOk>@(+Q<K8KqNsbvknD+UaVF@09lA>q
z?NM{Qq<O=3Z#wHY6l}RdIAJD_1v2e<N+yOQ0)8=0U@R5|Q%);?rozLMD+G8?0q|Sz
z^SAj?7${RMJgSu346uL$;DJjPl;Pn0Mz%Wucz=~tIHV`Pww=u!yOs?c%FvA1I(~+c
z5~-*Jj%j1{e6U(^5uHl_<4<s2XAGS{KGtuU#<kf7s-)18Bo<$Yp{m{oiv)IjR$mTv
ziH7qPxr>FCe87dwEjuQP99a^_=_d5s?N^HLFU?ryNo}-`nr~NLaGm_hdk6$AxU3rK
z#OCj83^9XT_@ZC}*y&<}?M^$Dt{ZzTp*vD1382-Fwm2@mTp1tomTv+fM)v~lqz{U#
zX9+v(0$wxvSsxxHrMLw5ysVz}_&k&|zU4wC#`C*oo|;V37!fyz_~+{4Q=%0q9NREs
z8g&QkN*n@4>><eUs9IbcVZ|i{&fS097!G^bcj@K`^OgDKP&l0*hArRAa_Up37^rJe
zPhMj=%k_`WS2lJ#4<({rrR6M->57(}u=0xQs!#1n4hH&}k~d?w#l*zWx0~BaI;*M=
zDBj;P;10x6$fZ&Q8x<(-b6{i=JX-B^dY@9i1@9w%t-~Qfy?hDepLX;O(D$JTBSfuN
zulc0$`E$z#R>B-r{}m1yHyLhTNVTiC<)+B-F9R85KGOkm6{@cC^OtSWdl(N^5IV;V
zcn-g2#Mu3=Gzteo=iwl9o&ZAU;g<iP^H<>o2xiNGD)AesX596$JE}UWr<_37wkj{0
zvJ1d$Of{+SR}|lf-!nKU&=<<D$!=aOp<=)F6)-GqUSFh7OB<o3)4@)xGasSG`aVbN
zre-VS{XPSX$r>mE86<lVtNgcLZ3}h4d9UoM3uF#ozM(3#*y}#WNr=KR=isM^=qNa2
zJi1y}WqcL5qo)aD)ygUOzN^kC9Skube<^G`W~idxdt!>MKAC;wtv@|RKO<YAwSKQr
zU;4#oD?wFxixEoYCmi?c_n!stb@;Z*uQG((!;;0OVGowe<mNd-EJqe@$9o~%gqbmr
zGDlwCFpo#qZE%OpAJ*)`7}7;FK9ReAyoG0DyM82#JkaR(j$H#B!rDG}CVo&=E#D6{
z+{}gJUtYBI(y*yng||*$@Y8dW2s-6=efa4>$xi#syYMTc$!5%nm(71*IZZbJl}7(>
z^nw`(maoQJb~ZWbxFoN^UoIv)y+I_c87EPJVEH98?RcBUNHhhO$|KND=FuQCMNlB*
z<L`&;e#jjzx<h_FyQ4YzspGNR65@iV?J2O#ML#vMyUABQYf0VCjNZ0gj+cXlWs<CK
zVtmYeD{qJH3C>)Z>Nb`t1?Vl4a+Q}6GfU_}Ee@6hW`W}uiv#wlS=xE0>Klnp(^Y@c
z>6$r>_kp`pg_oNNE?M#xIUApo13g*v{HnRlpzI^q_M@S*8ErtLYNse|-5>*UG~AF4
zG#|psD{E=Ton&L_jj<a{jVl4XY#@oiAxV;GH>PQ$r55aOZMjG@2#~ME4q!)$Gu%v7
zeosBIw#|#+glF~BBM|O5<@|?_Gi_=Iw`y+ue(Zne8=3hwF|d+EQ&hRqGMHE~u*or<
z+=!>9*O`83#To<27c0S9*t0BrG-t7N$BHLhOu;G@RNIzFD{eD2V_IP+6^YDWEn|P!
zUfapru`-4yD4<-^N3$BU(0s>3Qf?2BSAQmI>Z8Gr&56hFB-y*sWg0>h!b#t75*V1D
z8gP2=ZOzTZ`OF@M*qWGetLTjU8PHX17{H+!EBMvUF%@3Su_8j}U5zNupdf<Kl0zM+
z891Z)VY13`cjUsNd@e6*?j*}fP%Lcd+m_wVPROGE@}M1)qw%5_dhpDr$8Ww>#Gw@v
zX;qhV-)l28F!3Ecx1`Et8~V80iPEW-Hv{FoHX@f<0J)kGN28s6vGd)>>VCAdoos{G
zLf21d!vlZtU}=;pF==<#jUsGaJI%4R1$%<L*<2<53;B5ci0Jr}hVnQr8t}~v6e&=1
zE!IRD0B){m4FL5O-meNYaDi_Y;Wu5?sf3_+w1S@Cf}Z>m(334b8Y(3E`^N`2HR-~(
z%zxun+&mS2#Ydw2PUh6R_1Auf6w)yF;ga<D7UFxnRPe=78rR7t8Qk~==J$>a=x7y=
zFdhO&Uub6!E80e0^UkEKCDOcD3Nc@E0Vh1aEmH*%=BhHMUwGNSwv`(UNph&A6uVT%
z|9~>#o?MBzg1MJuNb$q&i%7i)AryN#Y0`Lp`xNzUb#Gv&r7Fm8*N?g9BZBA1DzMHs
zSumm;@}V!k#bu1+bJMzE=P>VH5X@)?hs^O-P$_9mEUHkwp<sj}6p<Z-tmjP(_KjAY
z<nWb}xD7c`?!U5uR|xyjDniSSu~E3uM%j^h&`OZFFzg{HtOSf%?6c%#C;s@CZHqmF
z*fy`)J}QW9S4$l3xP%9>m|((!*f#I;KWzI5xhOBkes{Z<KIH5j;$`6WUhTXC&*)1>
z3A?pi$Cv@vOPT4s@WHZl2X^$PUm>)~rbP;hbxrn70YXmdL%pQ!D61#Nc8#jl!5)!c
zHh`|zyhE8`P`ok`2h~IwI>r#p`ebz4pUvXO*@V?fpUG1O-tA3^Q0lbTgwu&jN@k@;
zg7nSJyh8*3sMVDg)#XXt{r;~aSgUC;@cH)BX|h;&Xu>}UcI-3LCns!Ld}W)I<!J1p
zJgCeS8A&&@I_Rkw^W%Hp8H_Qo!^AO0@kQA?XtI4KjO;ByvD%_=oI>O$_k-wW0Y-C0
z44oOpG~*D;IJ(Rx1c;*hhBVfZn_hPj)vv=1jdt#V2*chOF|Khoaab|;m)mx{c~E(;
zdfPRzSNI~(JXfK|p)4^O_Xr45q(D8?KlTSl%`NTyNvnTnv$X9U+18)q55D+Tzwln&
zC~5_y86jq$lVsK6Z2BTand$+ylj98n1cYk_J!m6Kf4I6lpo{I2Ha#~ooNPCo(RmDo
zRhw^4w9_Krg$OU@4`nNsSujt94R?)X#)AzE$M05x*mD>vwRAyCyWBWGJBU5ET#8y0
zyDs|PqadKhnB{o#_kR@_d&H=>_1t+dQ*0}CK0(cj)xXJeF1y>hp_l?h>Rh>P!b|}c
zCYsCdwhl6ZP5K)z_SWG?U3&-Fz?f9;Pk#OL7kgRw0naCBPxB=7be-{|ls@G@>XAYM
zLN}6}If}m}4c?uOlQ1Gog^m*UaELQ*1@yD`B{T@K9vk8sB??W%@izBNvv(jhc@=MG
zOnx6IR=G0>LN^Eu6>Q$)wjQ^8(Q2uv6l@c3KCvskq0ma#3Bo4zYplW?Oc3wzOWqxF
z)>6dsp+{sL_dj^`&Pw|EP-eD5YBf1g{0--g#E=PvE0}YFfDdM~VFc@&*(T@mTK_t|
zTScq*)y>aqcj!>*$`}TB)3H+_&%j46e=yLg>$!Vfe!MmyUE13NDt1jG<-zW$cns+T
zWlNVsj2${wm}`yzsvh0!k2K}qT4_ot%S6e^fe7^c0O*o;N0u8#FFSZ5Gpes?V>7U#
zZT%e3lC3rKsqv+bhvL!bC)k*`bo$|HOWrRpSbcnGIVn#LFxJ1QlyzCJ$bZpw^IwVt
z1Wq7W%U`1yQDL8Pn!+ZjBJ7HlGV&vi?a*TINPZW?U`PA?tqYGb^@jt+2RlN_ohU*$
zRZphLt&a%{m)5>DTXJ$JWuLgLykoMCec8HKxv4wo#+e4XbFPb5^uA<EUeWcv`ROeD
zMWHF3Cg)JjJ%ywEMc{#MlEqH@DEt5{uZgBr*PEI9ut27A;;yOjD5HXz9)6>@u5V%H
zIg_0-lQT3ssF-7#72ullP(lBfd6!AS9kx8X6+x2#9aj5TEhU^S1u>CNhMK&*H7KfP
zPwh*yigvb=gr7;5bs0Y|;zlb{0h(C=>X1YnbtfYEVmnu;NI%I<AlkGpTG@2Y9R2Xs
z7H=Y$Z_lxsr<xxm?2QG#GNOaU8Me0tL+V1xpA=K5Z;*rwneHxkk2br~Bz}&!G~Wq>
zSrXir4J)<Xy|(oP>kuuNBsREitx!^iE=k<i2MPo6@cyODy5q(FQ0542eutnp)lQj@
zsn(CN+T;0L-G1d~s@9#6a~d^t<?s><h$Ng1uyQYm$1vQJyDIuOk|hj{1sX|~l;*Pj
ze<(9WL3KE;*4j>FY(tTSYbzpyr(P+iYNzV~03(%|dZRa@1?Gel^9^^)(8KSlJ*Jf^
zyblc@X}IHTbf^e>4K*QU*L!w1NNjAF#2A00YL{!NCP<T012KXW{VxXz2CnXY5b4u!
zbM5<uM^8Dqo$t3Knwq1-iIDPJ!aq~PA?3$ETrmVQ=A5lqv>IdUDN4(7Y6g{CS6W<&
zEdPbeo8l0WqIU&a?hsjx(oA*<Nq<sm*!}~S8U16z7C-+5%i#k{D7fy~qkNu39L19d
zGlK5)ZDZ0`#b_rV`{u_ZO4>qfB9nvrKE6BF4g{N>$6U|S<MW7`fgqmj#68vwkgWZH
znpBLO(vM(^9Hewuz1=GYI|3$Qolh+;&sDU;4PKFw?0)Rx+RE;JJf7jveXjuyPs5Qs
z#zH1B_FaE2^VcPp#ab@=#_biXr;Xnfr27$cvY3CwD`I}h@W|_VV;|&cOy17;eGHTy
zjQ!@$tD6KoDr$S-b^!wZwlv-XePX(zkr&(87(*}pJWvGYB~5=;Y)1^UMW^GT9*7PB
zv^;&Yq96X|g+H>-9Y;-Lz@SyZP=*TSdhjf^FYb7Plk8>O8b0&px5@AYIW~L_<MJ%n
z{!5X!jhv)1?w7-?J(OD9NZyNQNb$9ItnQV;D5x@mMoP*dr?V#^O&y1B{-Maos{3?#
z1v(k5Nell&kxl;J6uF7u3?e}x`A+pQ*Ky2hs%DU~fCK+b5Q_X#Jfdf`mL&-OSB)?C
zaomOXG!2i&j)Uh=2DQDce=B&UE!N`VCtbVwv{C0iQN*o0j>k)-r=y2vhpFJp9q6p^
zd2<km9WVTRK_V#A#psu}7(I5MfRCED-^d6k)oG4xi3HpU@X++oh{GOl7GV3BZ9F{}
zsEsEX(0@i2y1#YUyP}Ew?1YLF0Y<K6kiP;S^&BODIa|OkExI7x9wtn2NP7_}&+sSk
zS-zw&IfF3V2lkzD?=lCmMa;}n%Frl$Uuz@+5-03W1X2|+SVUj%DE<hYo{v9&+syKU
z=+7HOyM~D8;@AWs0HWUK{kzFV<H6`rTk*4^*N3rhZKBMyG4W-yWsCFz;~~J6W+ME3
zfspmBgS$xk=U?MJlX7f3qp-_S%UO#dmnXs}As)83zBA1`s+GezpE&RB=bMiM#;bu%
zj$id@^6SdU97|sSA{4q%z^&Cylv|+tntijzJCJ4`_~T(o?L)UCa_i3-%H<`@M{l&$
z*sDlir?Ml@#t@f{!Fo|Mo~;x~`rxi(NiYko-f|r>!-LNen?du(cUkM?Q{}?XXy-8d
zVMnYp<*q5Z4Z^hAbAzDCSfafx8ZwIjml;y?8!KG*!x8pqlG=)ux%DvA%cbb^K0}v`
z4KyzK_b+95ALXLd^^d|L4vzFwD3f^7&Sc%&X(rN(>AHF+l(aMWNOF%);-rFp^--i>
z<L()IR@cVLua=HW(!eMx0arv(_!<ZgJEY$Br}oQTp9ALx7d$G$qJ&Ukr5Sj9h=#hE
zIoWiaVCfwlRvmg@D@jcAW0;BYBuJxdfPFthL5i%Q&5KbtEudzwV?*!qBj>`LeT3D`
zhcyY%QjnHMp67TUxH0;Z%|>ewa?f3;$eIY*=QXxNULf(E)GOvL+&Q6GqrJkx_!Y$|
zx71fPkOr%ZtLw+=c{9-)uFdae{EE9PuD{Tz5ekGvitc{U@A7~N8$c}OAqyBVE2D<g
zy5UkMR?Mj|be1>tYN@y0$1%GH!$#2tTVhOkhiFo?DEHx$K%_W1iofZz>$nYqSbN&|
zu4zDFWADrvb&2L}3+U>%(bXd(ztK*A;}(v<2}}K5ofP8Z^;fV+K&Z3dHn4dd-~`c|
zbKzA2U-C1S+m00v?2o^5X}9J)>;X7}qf*mZdK~|SI&hGj4wW|sT9y>BAO$Hz4>cGY
z<ok<>SfxbwB0$I@iqsa2knj=W2j!7KFm#gSzTj-Q@l*{GAwIJZT6>LgY;<IA*OH&@
z3u;4m)ry|!d)P!)^Qi*w=hf*u@MQ{6R$O>%`NA~-*v4a_`3wR%6<9!WkW(=PfP3&m
zxgoVNa6D29R%AIK>&XqUpkx;0dSnTM_AiQE-)^i~4hWiV?czK_LpD~^(G%EVi1#uB
zqfUaU5<Whfu@u*h^Rr4?(!MY3)?QB*Ia+1YA83N$S&|7E&>MFn#5uYCX~EB2!KfFF
zd&l^fUwd?+&t2;3TB%YP8KL1bzuF`v+kv?L2N~Gp!;bbLjbOEe^wd1A-_MNS^p*Lk
zMP$vT%o>Fh4O3o`^uh7>uSv#Ri7MQP8QASS1Hv5j>uF_^e$5t9sepJ}GPUg^xqVxP
z{NH<?YPjmKFp{-IF)DHELI}?q@PqrClujxc$^O$dZD;P9yw=_C^juqgKejU$o<xx1
zPPCW8Q=GX7u4x%X#+*pTeq&a>j^cTGnD$(Oh8<Bm=et#lEtrywe35hQBlmdQL*%A^
zDwA_r(=MZ<NdDx}uK9f8t^js;H_rl73V>YtS)WUq`JmUU{#{-uAJsS3r+{dv57hgr
z<lAPj1N*}Jj&&sg*@b5U#rbEy&Tc{mSNrT!{TN^E{6US#nP&oz5bBDX9^LYK3m;L3
zd`kKGN5e#K?9Lmw$KVTo>G!EaU!<mQU1S06t<<eWB)_)1G!Rey{sSN+z~5z4b5Bdt
z*J+NJZHtu3NU@Tvc1j>Jczr%RKw=oGOKW$>*XAuO<sd(3<{lO)MY%lS1CBj#4e121
z#_(YeI?dcuUJoQ@ocUbRsrsmH)_pU$;yVYxvJ8LHjCC}UeOwuWcH^@V(&&Gk%h0?h
zGu!>4mS>>cZg1%QG4Ixzhqc$(yAt%JQ*Lgn$44FjJ7x9C?@@PhwN1nEQMd%eu?2eL
zE&?|zFM*8GK<oCeAJw(uANCEp|E_zYSA)H!d7%1Z7wEfE`NTj9l%0E5AM8$E<pDHa
z75)Z8J+y`p&`g8B-j>J!L-nEV4oWqI0DTB8(80Tl_BHq;z^3khAms1r$<HO-{EsQ^
zs(+Uk%A3I*^a_vw8xWWVeF~UJ{;>C?C!4sFx$vIh7M4@!L|UkBWq)N-d6She&QW_q
zG`hFO|M-2!^~#mAaJ=MGH&gw>LGV$u@Xx1MTR}MioKDIBbz(l3JX~K5Ou}EcEL#6v
z$9(r)gVm+L%5wn2fxX#X;_`b+@k?ctA44OH_C@)-sbtsBVVNha3<GB)?ykegrWY<>
z&?92&BTw|_lgZ8#Fk&G2<0|ABU|^8-adn1q0T52H2ENtmzHP-G)c}2cJ#U2TPuD`u
z00tV5U4=uyeouY6z-n@+l2<o;gq@{ix@-LW-hS0JGw~2dHklX%XR`cWqW2%7B3+Y9
zw9gM((SZgzHdybkcX-gMCji7IU<cw6@CWFC!iPN{=qFe>0YrZ}1r!|ocO8MCZdcJ~
za~-fU3G~aPVj;L_{oWeIYEgN^DShntQLw%AvI+Ix##+9;glmiaep-*<r<bk9Dh)*3
zFVhSDEWSKH555()2Yn$^w@>RYIXrjQQ;#f}P_GCLKk)SJC4X+SG}ga1?==D*m1Jir
zH@(=u<023ellc=SV9h?qn8!CsZbb9<okj>o749=BNm{(qXjl<tsb^FgcJ2VfUhKR2
zrXN@A54M(pO7w;(#=wSo7jV-w|Lg1zoq1WtwkB>rqEk-VUyqk|@`Qe920E0}{vxgv
z12c6wBm#lPRxZu~In%RJkMUWAPeHXIT25PY9`#nLtPY-6;L-LAefIfLPU%Let&ITV
zwymUtJcllG-O9>vmb7DSp&%9qAk8dLI**@7D2sD23s|ai1#u5Jx)=rS(G&qsS>%AW
z>`devnVq(o`aZgSkKz>8(+F?p_kntrX$7m#J+bPrzj%`trE(?bZ?lGcHu(GuF%b(*
zGi4w*QR2zI!eAt&1gM-ATt+W;Emk(NGPEoX6_jTNCh>cxQX*uBG)_-*sg?j%{#%Iw
zzgRYS(||;aW(#V`TGQEg#>MZecPHiMU|Ts?GvI!+5$<%EF(6R7rEZ(rG)nMpzJGl4
zq;3%|Yaj><fO>-X+}PZt{^F5pR&HSO?PaW2;IaqtcOp5~GtBSR=OB+4W&B&y+h00e
zcedvyza9}wpjlbfIt;_1D*h?E7^C}e&}<<sw?Noc39wqP8U&%UO>j}at!-SS1zlEd
z2DKqnLFz)Xt{9J6Hi_YxvCngn!8Q0qbxb^sqtzSTqsukrcup-YAq$bg9d<<l^$h?l
zlE`hhGbBvjquVQOGSKawD@Kvq+dS!9tXHmI`8^x_zwRue^?wI?fBQSodm(=0t>p#}
z_Jj`^F(6}qzIC776!y3DbXGoulcE8CK;eQ-;7=YldTn_{Cd3-bBWm>Tzm3mF$AdDh
zV?H5Y74j5TKiu7_9I)8Ey%)Uxx@Zdy%e8*wyOBOp{Me*x$L>;l!)GDj@9E7->+PEI
z)#2$|^D1Mq{!bl7X@`J`BS{<Ih6mY83fKb%ZE(dv{E3r_h!Wv4X7;wDmogQ}cB=sR
zs910qvLbbGg|-M$Cq&{rPxKXXAVY<q?&hQR_gkAL71F(rzB$<zCH4Km4!s(=@Y!1G
z>S`v8sb$U!s+`(;&qDt4nu!Xqn4|PXuxS_kUO~zxz8Uf8AmLi*a!sdZ(6DI$`RBC$
zfhzAY>EnBCClvM*#tBgKqvO1b>SJp5Z;;=W&PRoQ`U*UiD3HPKBKTc+v-DI3=<NoK
z{aNUvE{sN)=8{Vo=c%7B{QYYfL|fm%@`cA0mR*czpsSE^n#d&Bjp|vv*1DsNn5U!h
z2_?*ft4b0V=ZUN9SRg_asKS8MP!c5+s1Uv=tg{^Kh;b-}b}@LaAQcPdwZ0^(3*C}D
zre3HV=BKW`p#4S4|9z_SV7OwRZvT@x$H1pg%GAf@C#;KBAmnnC2-xMfr+c;2I4$Ji
z`10#wckJv`*Ob_TWw%E?*)GBH&AVhFTM_-m(${Z+mF<$BsDuOaIc_kSpd;5l<uEV6
z$HfRBFzTo)1Mt54C~Ty>3yacp?QDk9;JBT|8)caCukEUH$8%I@vQOck(V2Mo->a1H
zj?iy*t2aXA><-oGm>US4b(h}nGZChr+V@-YUS!Z971CK5c?-x~bqu^a0~8kqS-o++
z0G*>!_ki~H(O+JGuo-}#K%mmq(Kc|aec3)!uc7QAB25!|!}`g&BGUwx&n?>opm6ZP
z5=oEg&m&yyTZ<L;3MVyFN1P-Y(ZcgR5~Qvg;|2tx{;RG&h6mpNZ*_H*<GjlPO^<#Z
zb0YVOci0@#io~pVQ(UUGlnt!Xc3i=T^=cRf<|`HgH?crX+I{U;3V@O!jjvL@GZ9)U
z=Ggi@bRQRiZfwI}{%KsN3PD!H0U|=2d11F~4=Hl_mMT;Cw+{>2x0Af2g1n?TF}`1+
zPTL{7cQ4Js#Wg7=Swv@t771RwO#9DU;2Rm>D?|YasMbt75D+#yz@JAm7MaCoNCBX~
z%kiA>4ty1!&+L4{a*pC3<_dIlC#Q8@UO2Ti46x`L;8ba9PUlKa47sbYT#fbtUf#SX
z?jt)r?ghEZm7sG4im*=^a~n+8>;9Ye^dg1qA#>^hovB*X`%4Dkhc4YBFYctoy_86E
z&T*9$p2OoLka1^kD+t}ohGEtuvjNqQcZJ;yZRFe5B}n$D@5sa=Vh0XlZ)b%ip|?g<
z!E?UcpJ_8VX$BW;CI}x_Ro?aAKNOj87L-17K%0!uQ?fHa(3Y@<-PPS^t2*u}eq6bB
zsgI3wr#N)JkFHhHDAQFTH^g>9wxTLN7{j#Q%{<27wDEfjF>lG{flN?MXKkJ{%SCCb
zp9E@taMK~$@6K;b1Yg*{B8d}&yMWux@7rXVbe@&B4sHI}%r)5{{6J>+ocX&UScF<>
zI6}>0$p(-<Ll`92bjwM-fXa2_ltsl6Jdy}Af~u?Ab=7Ytr4>R+vA%XH5NSnUif;d6
z-M+7f1}@#q>@g#aR`TbVj>Rf8*J>+BV#;Gi6wWG_RuB4KFhG<7jdCMc&9VBg^^|^e
z=Psw3()w&wF75fm-x0kSt;9=Y9;I@^c5(aMyFC}QNlA^v?H*WYb7;as#Upl}#xsU6
zUPHbUxfqF~jk6tulo9_+-)P*U5HH8#hoMk2%g#2|upb&sHoIx}%8wUw52vEd{P@ad
zl}nVPVj5ppz%D>dYcSYMqhLx&h{iNg(nrd3A~b8!B$$g9o|$nZ$m@gvyztja9TEKb
z+>C$~6HkI{8}%N^TPy16nnz*=2=u>$-Z?#?g?&O8$>*urgSlKGJYjJhS`rg|x#sF+
z%c|qRll0l@%9z6K7Uz40G-fbP6LZ(;?mwwrS;EkWj!r5){M#I1VJVzjB|(zfeD;T$
zcQxvjeNvS!UcXJE<K&F3UEXp0<(LEn`O8v1m@>KZIGU(UywRWyg!SG8N=Vdn<WIjp
z+#`88`Zz-MX*1}lGfx>^pH>o8zXPaXe^1*MV_Ey(x~7{{{$t0wA6%fp{(so9U_8Tb
zPy|n<{m1xPFZKQ`ViDJ#nDoD5=v}-s&5W4NhZ|QMEL)i)DrarrX5%S)7%IZ%+P2Ap
zo*p0<GU`qnhI7BhRHG96gfYaT!W@tD2K8VpBP91m-V!BQke$szG5cRRTZlOa-j|z1
zInz2?JwX;o`4E~~jU9tA5eeU>arm_P7`73(r#s_;G+943NRTG`i$CL20Rutn_k?Ta
zuWr5fQzH+}QmfwX%b^LxN&}#{ls}(hn!cvmNx;IM|IJ1^f&yie{X<ajX=BT2wr@5<
zY9y~=4HyYF5_UY)0VmFvRFT4OCyXeX7DLa{0ea(o))1T`-ygjLpJ|9x94j{k|BOza
z*faU~)PO8mU!=0Gfh}*)5tYReYgR1XYd<;CLagOiG7lHmhO0QkfThM37^`T}+=~Da
z6GNLs@Ddzx7$K*<0JPcKvFtLwwl;sEmVJBcsDWfwew<M>B{oAxv?}-gGI3+pwmS#k
z;H7yHmNst2?GpCA2+9B<X^f1pdu+O=LD`NG<+`8THI3)aP*G-~Etg{xswevgOm-t_
zdb*O?90$v=QoK|fc0wb|+t~Vt%&XKSXOV&}u!;3CEmU&}Ou|g*q+D99CLN%oaQ}5=
z?YBZMfi`y4yTXhI@4$E2o8%V&_@L|^Fu$@^0LWr+H+2O(02QL%yNtDj$Lo7VHh{N3
za9M-dG@4m-10Ky-&#q=PU$Q4~mPmJUkSyv5LMqPkiGB?sk~QU5mDR)lBJn_uiZqDX
zef@DaY1TmImcA)<BFmY?qEekLn-KqxC@U3Pbu(<db+WKGnr==`Gu&xg`&A@n>uPL*
zY#3uIMzDAwV-h{-o$Tud`n_swrts0SnPnF|<#(W8R_qgc*5e_baNlSB%<ln0_?d5o
z&{=WJS+%dbp05L*uPZT)CyyY^lAOaWy|q(1slk73{_K;a`YHIR<rTWuIcZG#GDFdA
zLg=pulLk4~-~WVkrl-rE(OjVXUscxhQ&>lb;RV2V3tTJ*1@u#BLBg!YzryT8#VrsA
z*To-sLJZoWb)~(*zY>ng@I$Jd-XBb0wVOc=dN<D}I`mepN-6hyr16eMsY1NT`;W&V
znM%z#?VT-Kh6#%JsCM=P<7}&dcgddf-#yN>&<28_A%Ud`?Es;|ZD%FA;fT@QRicA0
zBK4G^^y}jfjed4>B#sTL5V+HFiJ6zI(ZooYuWKk(t_UZ0hTVTb8#U&OP`;cm5euj9
zA;08uk;B@!>7nkC8}_c1HIFv*j}kFUb8S4aur@8Fgu%@YBCneJnWT11IO=8U8xdx1
zcmJ0yyJ-vi(3Rz`Q~nMB{lonK%CfK^Sr$iz?$0V-T@@0rNe0)2>+T-x^TlV8H(drZ
z$phDf!Eprwq|6fKV-){@u$wE!O6sO#XzyTz%&Pq-gjLSI#Tevs=TBrrfF+b+jOouQ
zPlzYs{V_VvHqOOQ4Up6CPCZ5G0V_(1*XXDGvo80#kQRQgiuF+nMkWG@cj8e`*nKuR
z;rg#MJEv~usgy!9z41>3YYju9WeTU0oha%>&ne;VN-*{c{vj|+`6j|D;69`8=Nl2J
z((n$$6H|dmv~N`HcR}y6%ZVkm{Vxn2vaK)|(CKP@8h&n;4i4gV0=ND^$%K#pq^+>8
zL-xHGS=g3LemAYn*fw9NhAE{Cl{gSYAY@9G9xTYIpJ^hfgvy2DYxr<mj)GEFl-K{J
ztc+6mI-f2tvUY7qXu$g&1D+tb0>eYRPe)M;T|{5?Z+XeT&4`8A@BIXANK1)x8Inem
zx-6>%MocFKXe<0TXf^X)<J+<F4>@>8HFQg{d;c$uIR$Armb*UR6N6HMMntDx|7;2#
zK94>W!XF?mYuxj{cB*1}z&x|E51QajOcuCrVDeqF{LMr~V<gCj|9nWMr?KQ?rm=j1
z=aU|ThCs_n<iyf}O4sL<?Efs@=x>NWvuvvDaJ5c|e!a`$p_ua7%%z$e@hHC=qTS(Y
zJpTR7K|VRxRXyHy<)~oF(_7K0qw0U+ROxJKRVwuhholEB48EX-8>6pcd4i7jM1c(Q
zHdpx1sk_izB;W4e?CHDad^^-e%$h!-p!n!_qnQ2+LIvex>evlbB4tH-O#<yHdx5DC
zVzhCUH&h{?SG@Y^O>0WXo0?-K+`BqehbOlln5~&=WW08A>h;{=3jqSA8hCIxTCu;|
zUBpMAE*0aA;l+sjnQ+HLQmNFITAy1S3RaAEf={O=m<B&WF%%NikL4OFWJw-bwQry-
zRW=Gtv%=wyQm0qaczeP2pVb3Uh)TszkBUMq#W8lJkoYLavncdh44Hiz*<Ny^1phfK
z!&`F`K0X}pRat#r&&NGyZ364t@(#q_nKn9qo84W5oS_Bg8WyC6x|?(UNDB<;@<pow
z|Nm&SQLZG-;5x@R_gzNxk{%`8yYV8d-ZX?r5AiKR3pr_r-XaM)=3J`Xd7N`4iN{}W
z)%>N<6Xiejc{=cm#Kf*zvAvGfygxV&wP<G7{-7GGxAAF?{PoWMuQxk|wSJV|9X-vy
zDe<MQw1&qGLM6&#_say|q`VE?K#E>34u)<=CibCWjv2&p=*%8yUjrJ=$VU@mc5`wj
z{TJ3%C`LD@B1AH!KD@5YKBaxzt?7D?7J4!AU3zj=eQt4D!mEEYWW9<T^$<V9&WOVx
z*+vB@LNyujPlO87d|)7HDuM$a#Y(z9VYFtESI$~P)(m(8vS;l<_UxVBFz{;QZ}NvZ
z-(${)hB?@(K|XXzy)LGbc`JABq;a&!Ta&<+RnqAM5q}9c{Q-^z$G;gWqD-_00YR|G
zjA(&7BPPb#TT;gDB~QF~qINhfLjG@GC!1U-l*rrB009c!^PD_7DzZEBE81WG^=N-F
z=tef9XLNyE30nl@SJVsbVZH|`w-Z%O#VG$0OT_vHuTwai{zpGB0QT?au=jUgiVC^`
z##{!D7jbn9!yJ{&syi-x&(4_tW6}0%{%?ym&IV-B#@GFiMVlu427^Ug_eAaqSr`lx
zJzMkek3@T^E%>24`;SCx-~p0oc}Wn4{r|T_i<V6KABon~;{QjY?SS7i{zsyn9i1kZ
zoZVI${7RBRiG^N=q{o~%OB9htlj34>ZP=x)#VTeyv@EQ!yGjz})o!WiT?74e+#IQS
zyDqd&HqiKlV1V;3P9{>qF=ZS3pzRWuoaG>fZ1CD}Kf71~ZN<GgUF(YgsSKthND|jS
z%1~5<%pO)%-#7A$cM%*lCvo)WQcV3yV~%0H_1i_F8gLm#@)s0dweM9&bsf5DD|&@)
zZ238zZ^B?EQQx_-d@GZtVjjBl98<kc60hAxCAOtYXz}N-;!oQ|s~8<e=XI}MqGkjr
zvIWD=A8XH!O*Ug<?>c<mj6&NpEI*$H<EfQDs1l3zZkXUNEFLdS9$2w&(2=dk-Ok>I
zgS%Q>cmev(?bG!Ty2d=SB?QUcX(~1A7Gi>AWDViWdyq+o(<029W2R#N?NC|&37QE*
z26d=%F``<Pv;ID(A8vzdgn#PwWqub(q!^6Z?16!wW_n+xNBbFb7x(!D^@D%<?#EAq
z^gWeB@4z+UDF1mBSPDEUu${=s(%9rn7{REYD8q7ttYs8|>;7|?n<PRH7*;3)dm3i^
z=&LfYBf!iMPmfhQC7yU}$N-Jscct={w7CWEESMN9|4mV~ivOFUD*wMJs+%YQ7__j)
z5a1Uu89fTt^|zByZ$R6Qn@$v-ozUoI1yunFo6Mw0q*gdYKL)sME3h^VdV5^j2L7-c
z?k0S|>r>9R2G>&{*`=cSmhks&`>kbhva=&ugivwjYtk+BGg09K_wT`u@9f2#-Zz~R
zIkSIpJCp#qmRULfNm0F&s`t5WmMjz@VNE)V%OlM$U)u@G{1G5&EPaD;S^`C=?)CZp
z6QP=FIa{cm52f%xM4*iw9j8goDU`W#XoEhr#|R2pBP7PYj<PO&%DDCXuRKfmp!mRc
zlvoq9_Dw{oDheG=mBwW@C7g9g<c8v|Yyash8WM(XTPQdMCCLUqf;+22GpIc^l?rN4
zDZ;lt+)`Tp)1JZ{`%imHGJ+4x*z9ysd%JaSB&0b)@!YJ6Y{3POh=RHN$dS@mR5(9w
z4MI;uQVRNi;!|Fp|0h1xTQfs*iw}xVd2=~zF}WuCe!GPx)<TShee&rhK5h{bVG)Y&
z3klO=&;()%sl4EU{lFd6A`T9Xl!3C!DK#)!_n2$hz>Wi-VZVi0f~}$jb*IQYAqDUd
zzNbfuyo-CJz&TRUnnC@3YVFSyXO?cpo$POj+?n+FFt7qo{Uf50|6H(!czNKCp_TA9
z)%{gZqXTo{QP^^5DVCHtOh{tF0x>M`k@iD|zXP|AT%Ls{oRj<Go$6tJHSmv||2>z2
zci@2$z--0*WbwN&P=pOl8@&gcatB&N#Nnf9wh}DUy?IIb%biaiEX_I(v(Ioo8C2>N
zl58$hF(3R}8`qLNakgi2qF7qRlrohgLkiWmfL-dRoJKQNI))vWIs^vusXIZIW3%Mr
zFN=(JujMQA7@hVbr6u+_5aEbWgtk=m%6=WcnrM9skqJ!;RO|X)InU$j6?0OpIr;Bz
zzGWaQ41pIqcL+khIeX|q-K~9crIUtJ&0%&*I0|RZKA1TQqn>31!o9X^?WU75b-u6(
zLulwy-7);F&l!GyuV5ZWz)Z`re|V1?SCm2CqIT@eO3I9_=b3!ze`5WA2)pO*$lgcI
z!=0p)bZo0*+qT)UI<{?h(6Mc&W812rW7|f>PHOV~9nG0pbLNk#wO74?+O_L??$6O$
z_@R$I=fTtf+ICMEO3lXbY5iR^gY5{LzD^r```E=-_cNI`U$8M-W|;(UP6&4_bhC|$
zeF+p;>Q{eG@>fQMal)MbkBtxVoBrp>Z(K84>8%aO_gpR=M<J)39QXceJT>Fe25SNp
zlh(8?EXf>t6j|x1Smtp%W`_q2!0D?b@8AY1)MrW5aS%WG_~W#^<Ou>F8p#)#FKlo<
zl5k)AD^<6i|8x)iPTX2?+2T>Ki28_h(12H({6aeaE`BmHH3e?2;^Rwql}y!5_>)F^
zOXv44?UI9v+-1!K*nwDW+8{`ky=(I9iZ}N1%oo!jpd5Gf%Yu;%0+$RaqNC)R-MVJL
z#<E)8+9^ZB1z@*dtRVU6NWEzMuBll`JmV{N4?}dWo-3{Zt8*=;J$=7!rJcOD(MQ(4
zMgHNg%^2F;qK)pJ^r8_E=Xgvzs+?g<&Y2w>5SRrZNt-n|&%L#5JoChk%2h#0E9}$U
zkVYqIKC)m@Zz&uK{hX5Gbl%j|&)PgUjUvpUTHZmk1$<6P;l(br^ia_HAg=8sMggWq
z;Pqp#J*rdoAc-RX+HvO?ogNu+`|ao|DZu$Fngjw5EqK+p#2b3IRBC&1DglH9Ev@6h
z`CRHl^dB@a(=|Vaq8o84{OS5EDnFPn(x08%Q>h+Gidoypv7_gRY6X5O>ZM0xRlIr8
z1Ewn87zUi)vZ}Ki?B~+y0OIT`ie84zd%I>uUzAiec&@-*4BAlIG_&NPA2h|4uyR5*
zFe7R8aIE$H1lYYy_4VTH@YxsybL~3e_in6B6QpGv9eLwL?ItE#@qKFqDh04w3fhhy
z<W3vNI$b%qd+$GJoX;B${JTDfr0mr;HdhdbLFzmAG#`)3yZ%I7?N3{hvQZ%4uJxy_
zrSa&`i{;Nx_VILh_yrjbmdQ<}T8rKcR3Oh&N{8F!Mh<DTvo(~L#>L6(`Hh5{nMq}h
zKGe4}_(*I70hQ8^P{IbysY0g<bjNua+?_L03QA15g`rL{&N6Z<TJI?DR4McB!^Y7&
zkMf7XSG!~MVZXtmzY&VV9irGe-QDquYn`$d=W4Ke&On?Z_9KVunooSMP`YCI2v0ZJ
zkz`;QpzJ`(%b{7QM6yR3%UN&OtaW3;+OkAun=?t<kt{00fU$z{VdAIHtx*cq_VEd=
z`@$T=0(JXRD`~J@_%#3Js6m4NL7Xs!9*`6$VF@9zCsb2BD)jCs?y0S$9fZOxZYTyJ
zEKV(uuY;puD^tW+h|--!uTk7yJ%1}sX@71?!SK6$1n1Q^G$+EQ;Ur;5Z<r#oD~n(2
z+%ynEY7#?s;Qh{|E<}xI$`kTF@?f!o%ZJ%W%8S@!@yRQ-$BIN@w9J33F42n?x9eR7
zZn$%Xj$qM%e@_xafbq;cdNqmomaNyXbbbuL{l*7U4xaKVRF4Wbprj`pE`a6h{bDwo
zoDXI^UACCHP;YK{!O8r#_rsUL@FByLtPWKO=9%|eHPPtc&(4T@Qpb9O)cV}_6KNEA
zr1?BB@G{SZd|^UoSPm)jjlqQc#ChX-57jP7RbrDM1|nT}PU_eE77_;XvUEVa`XCt%
zVmMh6YmkYTqTqAJz~=NF?YTT!l`P&Ra177hQ*$yWxTGMLX$V&Gy9$>r##gzMQD(!0
zy%1nPiA$XPKa);sk&k2#r{8<NQ{$g(^ldnZjFQ}U^zIODu?E1}Z6eGd=*w}QxT41h
zd~^^>9gI#nIeLauH@O${HuYu0#a)q(%T|Y!QFoU98=@tQ{}-bD^<Rj#NZ=FE3PoD7
zySlvocSO7OKOov<K>XA{h!y}#F`D}iqNVx=(Y9O)v;7C6O)|~({|`ia;q{-0Ha*tw
zMf6bhlgWNAIAOMIhTIC4i@UmhbY$IbEmrV?Z0WDx+&kAlk*(8|KOmAHWJ{eta@AdX
z73Fm7g!j7c#1%1bH4_G){EXAzO3xifdLU*ii_}dW`8cf?7^~y}#uMj{WeQdDFj;!b
zT%x`WcxU}}4*F7fWGU<=*YGxaZ&zFzkaFZPpUFgum92qvpeW;p-g*D32W7PdMlRia
z6p-Mdv6k1lLg53j-lU^a-)E*2sZ%Lz^;6bd9f`F+m%I6h(&3Ir63^0rQ4{;RX~~@r
z-H*+I89;oPtr$tzg4o!4JC+!eotyJi7Y=h$13&X$g0_C>|08In{t>jdSpP3U8})w+
z+J^Z9Q>!fQAI4%{L%}`~8b-zUdVT^!4v2%L2mErDIPqk0aEJU~9lv@E5>Uz1QX9&{
z5E4h_8}Wyp`#MDky-_5anpb<`iPsN*la{O>5>+|C`mwM4O(9MHqxYZvCP&h=K)D>>
z^J0qMKJ+E^|JrY=psv0-myb8MPkypXev0|rZ+fy*;JC3=WGi9GtYF-<xhx$R<+$B<
z&Nxx9G-J;#707)al0V}cUFVjIqMZ3pLF?T5DQH_6XoxrZS%&^4XnC{Pa{im3<@pq}
zkKg`BLCdMCom2Z?g0|=11+7#ZMc;o3+WLPJv?~7y+TDK%S}FBUL5uf~phbcIe+pV0
z0zXG5>i<*Fj{i47JNmy1+Sz{z+V)RDoB1DtHgXhoj2u5?Djx68Uq5AxzU18-?^e<?
z)wp$?Lh_eR>#^>cra|`m@R`ViD(Y*aN5;CrW9!}IMYktdvMMTVojE$%WV5lB$~Q$+
zp)0(?Yg*b}i6Vcc&uu3EoxaXnp2Kdk7*P}5?~kjrL|N+R9&xqkTZx!N1lp1h*yAH^
z81|i^?*j$t+Db3KmvgS;TGvU4-dZKQvk&Y6aC!*LXJ`d{f%2U86uQa!6uuG^y5hOz
ziN`BhIX}El%6(HXq}AV(*kk@q3|H=1RG8x~lG@!<C~HRL5>2$BeXEkUc1$cPY6_07
ztG?VMF$Hts8+@qQ@cVEhu%PR9OKZJVogCTo;C5w*-E31q(Npji46y)~{=sg~lR9HY
zr_HEi^g&P<SjXyu5)4oG0+Wps#FU*pe14ds>dx(bvAkZ9Wxh1(Y!D!LzE%mi_m_!y
zJ=%E=r)3Pfehmxr=y?)fOYrD<=aGO%Jr`t=2?~4P`$8@Y=oEa2u_-<^#&sjwf1=`|
zaDKBW$qfaLYx(cbiH%a|SzL8TIm+8k0NB}QK)2l*$B1E{T>y~c=U4hF?34dx=bF|A
z$Uo(O<FnfT@Y#s};IqR2jn8th|HEhX{wF>Q-LrNsH7D=J);D(;!*NoFv~-o{aKZPa
zSSZNS*uQXG7$E$_4xRWz1KNW>TXJqU=fMADgIT+JL|Q~LmcvlG0>KZv<d5O|tu%nG
z{e1rT)K}L0-#S9mWfgS{+efP0f~H8goShHw)_g2HiFKQz$h#Ru*roWdENKC8Yv#WY
z(+8k^ZyYeXQ$F{XoEpFW=l+sJkF~;=_pM;c&y>c0c7*Z_w=9~G1gZ@?cB>&DP3qwd
zJ4(hQ7ej67=JDG)Qh<DiXU)lVgA|x8=evgfdS{O}8^R!9xPW$rYAnsY{kp`}#Z`fx
zdbMlC()cF^4aj}Bo_>xumhk^u?|zpqE-C2uCCfC!!}MWWM|4Tf(J!~@oQ00paZGMK
z?9up$+OKKH)`SSEaP2(QVsucyO=>jw*@2NgrdtiECwa;sk2W;|D>M51_rS!JL_Wd5
z9P&8szw6Oy^y+$6WbKjbW$ZZzW37MiXrBl3;wK&rHGX`WVDUQ^T5SkE-qh3cMFOH2
zO-)x{v_!C9UR-YCyhni>r(KxWBELyi?K-}Cmxc`|i;#ZAc5xPSp6i2*9%@Q}OX)z3
zcllw+;UClToe(k8mjX6s%k%z-jRi#7Aj|3Iw`qd~pKW7NKZcbKIWt@fUgwYpZ{*cr
zU!@h$2jd=V{n6iVU0}!crYH0a6epLW&Gt*L$U-#j*RPJMdsk2?O3HikijL;=B@)kj
zIlox#8PFuOHqghhQHH}n%WH#BA7QwC`~lX2iZxH*Sj}<3moj6kE%Z(WRjW7uhs~}Y
zTOD`&55bzC@rOz`Bi@w6W*xh&Vue++aoaYRjmZ^&;7STwQ}Y4oie)`L@EuG@nS}gw
zcTjqjf#1B2KJdF#(D!daLTCHu_h%cARSkvDoV=84Yc}2e-OzO}L1v@!n&NC*1u{>K
zk!EpadDW@9*mgGab?I=w5EKd!VL*bXzm7L4e|UQFlWf78miIF@6O&nvR!>L5)@Npo
zENj|}E#Y&du^51$t<hmFmJ<Cy=R^mRay7c9W33^@26cF$#aEJ6e+K@@2v_XPR9sM0
z(x26p*t1OE$V@N7+Q-@K_I?~UKGCSa(x6BRe?}~m&SrSEV`L6iA#HqRQLq=NLhxJ_
z*dhvyDd+1T&X=XrJ9$x<{XNxbYW~#LnrtIaQ|ZQx<DT=DZCv_Y87RpRBhAb!p9)I<
z5iDw@!jTDFQz=yF`1%ufV8gu!|1dVvp-Y~!U%@5fX8`JMy@mGXUxgza|C>Czb}pY<
zUcEK(uK8QxNW3)HTv@e8uw?Q#zvqP$;oSJu0dJi-Z;Fgf)|tDU@M&-5c|xOOS%kIX
z&<o`Ox6!%@oRp!_P^gh3DNm~*OD2_}lNhVzF*%G<tUnPd%YjU=tsfEk-+j<vfBe1x
z$jKi&@yH*usG-r|Wg2Nc=FN4!eg^wozq=MoP!aoRE(j}>k(GH2Ldg=TB3$zZJ}Ha(
zSPH$xAj6gWQ^0`YyrS_rO&^DO%d^@7GM1))1C#$GROO<JIAiD6p;?@>v2m6R|2&P3
zw8k$PCSUZ9?w0LJr{-Bm`ql)6Hy_UjYi~3*qVl#h2`KJ)aXWgu5deaga%<QU9zj(H
zl&`>FTOg3JeZ}+PMyd-aSLKcC5p*!`QS=JBJ|%kk@GrP;=mJear;v<MqnMfD+%{eL
z`qur?uGL%DVbkavCa`}pbYkdl+ZAX9>W<QG=*9L0WW_Qr_Zf>npx;}&Co>rt&y3d&
ziI~uA@cZV;S+@(N%aq=5Rm>o}s2EvvbB0b28XXls9(W_=;`EY<L4?8Q;K=b1P<~^I
zOCXfIJ&UrYrU<#;1^cCwAwHubG_H%IbKESS<AU+Iee9QSRxV555muu=_5F0$?e=Q>
zh_fX+>34RdUY;}%TaF)_0%E4iY%?{O61Jv;j3}wF)~7Dbvb_+#S=U_?L3)LE!M=`a
zx^j%l0-u&Kt~C<UO|}?45)ghcWz?eL!GBjbVuFES?0oZXDbn<KhIsF`_O8Fwwe`L=
zHa2eZfpm5-_W@*n@L%iDU;RL$@@sr2l8*+gfh-pU-#!%W-{BwpnSXvkI{Ll$M3t)a
z{^HxtWe;@#?(o-TFZ8XY9qcuT{J~qiD0;ij$-mSAR>ZW*@jW}&?&c)WizlVP1<kHU
zF~_{M>G+=cZV}nps8!U(+PS?*2g=xdkU|uj-jC;$x35UB<Qr+dro+;N;q`^qhx*Z@
zi*664qvb1p+bc%6h)Jw~t@UMd6*bR5jvMDn!QSTObE%DNGt!ja#YJ9)m-FLS=%_kD
z1N|GoHQnW+eWWSq@)a+y4t$LF4*zc9;qqg#ii6jM+)&4XZ1-#TpbkW^l3yvHhc@=)
zl?X800pl;m@FZen|BXdfGq^{8WR1*W<M?cq41F5(zPT|E;sb$_CEmsRLH^G}i60>B
z7tr+?qujfo-@C=Cph6Tt<zr!$NbhlJRp7QTcW;TM=B4|ueYf{?(oVd|tVO!+BZFaX
zT}qZpp?g8Q@BQ6K9^pETS{@h@MJ?dP<TvrlzLtl%@A20}0HI-&&V4U6dC2y%ib3lV
zxe-cCs*wHEBh+6<YLm^hm`E>Uxi)!Tj82H8i~*$U{i5rRK@cY9W=6y9Ne3jmaIRlO
zNy+}+%7fTtmq&M3qltGZq3bgHp{P&fAM_Jw!FRBzV<VVMwoIc7Qxh!OcAUNSblh>Q
zO*8SE4TGtNoT{7Yp``UXOW5LZeNN^!<Bu3S!R(CpO5+M98G4CJ1fw0~%%<1KJv_C)
z5U^V%mz|oeEnIbUTD4Zm56QP<?xuw6n1CdxNlo+;G)AvDy?JB|4YS9@>#hN6Y+RDf
zMGqa5`5!`Q-*9y3wwY0rJiDJSA5Op+t?~O_%!rjEzV=Pj3I|JrH?&tR=ex6J$Cx3)
z87i%r9eh7y(|HWEFUiv7+@o9~u`8pJ02eVA=Kn#%ge(n{y#yOlYHRlS+9c+)ZIJlZ
zUpx955(Kwrl%as~2gBV#jEjmT{=j6DQ*MLubulXDqEv-6*GA9s#nJGhHNehCr!+uB
z`}$Yt78^3R_|pU`)b{%Dmn_$pzwj=kMlLB|ZB)%NJhDT%(sU#xy?cNcodOegeo98x
zZ03gcZ@&OH!LfUXHpZSA$6ISJ#=zEo!Ax#uL0ZQv9Zkl8&mTb~BuN;f-!)-HXp5pa
zl{Nf0leb82Qc*7?Vf<Zp17bM9?+4xEV*+*=dl${Q29s}o$`{!49@P>UlDasS$~y8X
z1pUhIeFmQ&DEgXVE-BVU{8bj))s%t1GJ=@Ul5aEo^O0{@zTRnx*NEWPCS8$X(dBsU
zbiIq}%OaA21BV1YAn85Ig$y&o-ZY*%vk=-rE7*a;MaYHd!`j-<Mh2COtdZ5lKDTJV
zgeplL4&J0-MV(wqhY=^9wFI3~qT|=WJmXFnXHqd5VhhOzld9d0@iD8mlsrV7s9RTQ
zxf6&s*pwqV=4zHxbb=+iDs{1BEeyic2T|sPN+nYy+igU?G1iC`*8EU~iDx4Ckk}M~
zT%v$(@T|Xh+}#rggATL68nAe}UNR0h{p~&vm&P_neBJO1=GcQ7cZO^~TZW1=(uxCX
zSFEbow0_FrZJ*r1G8Y=m5Q_*2X+B0qy6?N=$DVxZXawPJ#3btM#j)j>h0@8hmc&y<
zV`wxYBLE+q1L1U+{={IX8|x<AGC|xfq3%_=`gv>}hjqFksj37H%mlLE@lgpG05-y)
zt0pTz+Qb)Ek<A<<7o+?JF;ekFX<_8TIQ$&yFmYl<0?aCQo{i@L)cCP5a{TDH=1xQE
zeS7z}m#q`KN7Eco6L1a$e7;RpC*1*Q_}_rMi*9=~^w+_*D<t{AYy>Y+3|<|lzN|fZ
ze|wU#%T|bOK~$g#pQnDUJEuOp%I^m!-c>4BCwk+F6djCXAW>JA6g#3wur2qL;o`0<
z98S+Bx6+bJS|>N`NtZ)fA0&avd<Wa79e^<**&@l1`DtNOraAMBM<7i?@~-MsPC7cZ
z6QGbqMPfKlSAZ+M4&fv(>GlUQrcNGmQk=`YBqwV748I)-okga6OoxN~HfmB%YqZvz
zI&SIi($dg9>}QLpJ1uh!HNjIs>#z6kuDL1oT%2_$8maec!0dEWU-l9=TeA{3euj+I
zdwow#{zRAhf^HMU6>#I%6Y!y$ELy9GleBpT!O78fFI4w6lZMWHKaw>%#rjnLS}akQ
zxM(Ujy;x6c<8(^Fqt2!}f$-wG4aOSFkd3H*Kq!9eNl58LSZ1Ok$I+L~G&OZ;%2zlm
z!X7a++66-)58e=voTs_qoJoy-g8sl-NXdN6Q_Eq$&SyI2BUu1$d8=p;q|7k;cK$Nh
zee#eUFqG&djz+2~TEaB_H4M@2e7<Hqd6(LjS%YWkn?2xG_#vi~xSp7JCkrbDAOcZy
z_Ml5<#!|wzu1Q$4nQtLYcBEcAi0UF}Dy<+nm{U*od66?_ZEe+|Uw%(Bb^5q}%U;ZV
zYd8Eioy)&Clev_`$PennsO1_xbNFnWshcgK)Ke=f(#zMkL{EF}rHCCV){io`(RW*e
z5Cuv=G4}D1i27;@9jExw+MD;d$i+j~82ZgS|9n=YQ-GI=JoQkQM2>=@rsCW7w$@*7
z^J)VQ0JqAnr17Nn287WR&aF81=O5wuzDZuyNExe)CJVHs_>f>8zfKsR)kwsC&1PM`
z-7zC%)OXbkWl_w#iC6pnN$18`B6fN?4E+qL(Adwi*=tv3xdta2ni)+%vF^Lip%#9S
z9Ld9oJGFXI1ksXr4Qa0b{F_t(<a83*q4$;ymy(s2eG@b9HWPib=ygaAF1bLI<|r*_
zNFby&?5xO@=blun!OWV?*N{zQ!HV#4kw4*~Yj{XUn{{L(dCTlIrUO7&N;5O=DyC=N
z;oVEh;OZrCk}Jtw<3rzG7E|o=7Ml1BV<;lOL|$>A@+j-xGw*N0Z4~)Qn;w*5>;!aM
z7ad`cIcA*JVOp!M-Vii}b^IC&?fd;vKz3tSLgG;hA;VdDjwmhLijBBS-JDax`cA`B
zz%sy+nxkzsL{dnNp`A!lp{6hwre;XPP`ee?@Vf@za{wU-l9?S@jTA$}r+4q%$|Rr?
ztke*3EeA^L06zy?zsX%04ln%}GwN!RuE>eP<(d%1xof*q!6|5noEm=H-g4H3TP4Fv
z__bBQLZPCZEIPVdW5B{s*X3rgtTPwSl^_tc-F}5md5@YO!TQ?qL;EkVZf3u@hAnx|
z`Z~$_w2#f2M*kOk?C~ADnvT+eHqQo&75;s)PG$a1e`-By^&78-$gFh<R%FZ1FSla2
zh$luxXJlVZ%$_@{BgYE3LJZ~?hhFtD!=IH?hm7`W=%RzGubE_xRel8Dq)6BjXC)x*
z`rNrPMoYjvli2_C(R28T9=REM(^Xy-rg`&EeWAe4F9$zjnLp>Rt@9y8diP8*(Oio5
zWd`0%wPz6PljYJI&Upr&rPt=TPUVxxBHf3sK`Hy3{QWq!TKI6I?8Hyi@G^a;gOl?G
z1d}+(#8a24*UR$*l6D<8AkFYa^QKeaGhg#2q-&H#IJoDPr(I9!mdloQuEn}YVOF4k
z<LgS!KBBm#9NL`V;U5A)bX8vT6#~0)<EiqKQZw!1Ki*G7vrQ>Pxer$jYXh9h=i4o7
zgJHV8oVRmy$C?Lo{Wf}ODss57#?aGP`+)UzYg!;yeVfKZ=n3zN_vU)0`?dJPr6R2j
zsjG%P`^}=pI8Aqr2AO4Rk4BCuT@4IefN61cv3g-1<o5}ZI8BR#z_)9D{1Hj0VE09u
zMCjYyMcK>i*Cksii`k`jZ+5achN3mpD;0LiUUN?<z}0x}9Cv3E-^IzmJ8XxJ7PYwx
zQ;*U#KEIkZi^!CmyHGySdf}6X{6uc?{dV8yIFj7Ety>s65Lb%|7|C41H@ft^ov(O9
z<W$NsaO-{MSuN4v#7OzHDAPWC^A@@Kerx)?{ow{W&8VaU7?F4H{ph|odN6-pj4WMX
z<5~Bwy~rJ%lK*V7TscQdPx-HSe#jx+0#_oOUrFDn*^1Ngcl-~n3$zG7;yS)}XYI^%
z-Lv@Oyyf?@ctj3(n?`PEn(k%^IEQUKwQ#lihx4A#lX{;47?1yMzBsNH1_G{;W@Cg1
z_wKq|#h;lUjvgHyu0rrvrYz>Fa#sBxGVSKnUnc?IkWQAoUr&50_MStNfefH${Ate9
zKkd>3UaJLL1DG2=%3BtEPtv}H*NymCd=37C7hQ%;6S({SfRlH**h?ZiS`aq$gZ`;Z
zW=oiT<vv5hc&wA$mZpOK4-LDfHBQss5u|*O6%_Lw+>pOu&(sr8-T3)=T<p!g;u;<F
z?zr32^St}{<~jHTJc`8wy}vo`_2h2Zzdds9$9FWj-?>%vZ>oSNJSKJ8mj_%NcDe$c
zKSL_o=AR*zcV9AlzX2f|<C{W~=S;(eZQ+2hw3b$9Vd@CR^ST_DbUa#_zn8HLHrh0@
zY2gdQ#R5L^f6}ePrwy^$&m*qOoSwQS`weV;)&TKF$220+h6SFT1+YjjlCi#kOO0=*
z5xdqu%C=@d?~^MEEUzmkEj^2G6J%1UoCrXJ`?FVt2^n>%KZmth#UCuAy#~L-#Iopi
zB@;t}J$|8x;8#sltN0A5e6J}3p8MtgU}30?f24PXa3eBp!sM<-feAwY45|G7_mGOt
z*7qpB4)!+-rv4$OM<%|3eVenis*CFQJ9|7;9XK0XqxYbA8_Vv}IiVWX8_2UjYtKUi
z+=QgOrs<QqvL-gLf_^Rx*n|&(?DI!=X%lPu6vXmqrir^(C-q9bc%zq7%!|*oM&v<?
zUv9e2%ln11Y)_5j*XKXQf9A}Sb=n`2uH8%ONLgJ&OC4fy#*`e@j++hfw*<ATH!lVn
zG`xAwk3CA;yn`~+{y5!_Jw=ir)W8pD@_j4JU-GZ6)kjpA8y!&CI*9VoHLP({F#H;$
z*sKSHaNh$k_;d?$@ona^a1`d3UvyQyAFjdr5x#fB=lbL01372IP!3lfE<5ftBg}j-
zv@S1q&|CBjl{77&-fY1=fy`p(AB%YKS}%tN6g%Hr6f<<Zg;@O7Dd3p2X+O7lJPgF+
z#PMj3#MKE4A%9EY$XwZdZB(O8vdQm@d-@uvXLzVxT~!90mXq*B?^2a6=II}7=3Ar3
zh)md^IInuEG|^x;A<w}4Jf!%VEMRJ)21nvAI!Y6xFU_PN&C-(w+etKLq)=9(EI?En
zGU!SEaQ-#6G^JpRcS_YmAodGk>0<&_bpiydiu5qu%eU6*K^Xd_2L<Z_<wAlCse=Yg
zmow}6rj7iK6FG;PW(hn<XcyL`)9zog2f@u?lL2TbgyT4jFAlp`KqI<Qrj}RKGgyvz
zRMJTX?V3FRc-Xw2Y<K<?TYxc`gAMq|adT_m{RA=Z&{<_&4xfX-b${z!zr+ySu*Kz2
zQZM2QA331<IO@m!3j(pEOA71bBE2V@AAh7Gh88_Aznlt^Zxqrmd9IG^#hyW!1M5#Q
zG7?K;nQsLy<KT+u4&qPQ+3364Gx$foz9N3Hai!cl50hg73b31O0jkQ>+8N@>OWlfr
z-e1GgufDCb<T#2^2H0>6qqp^S-vpZC-zHQFR(RJyfJaM^eI*@d4&^!M>Ui`6MD+lg
zW1Rf(fBF3Yy53s_0b;E|L|ZlQpvEMr_q+pNgQS8iUhKbgVHHxvX?`A8ks1329G2Qm
zNl?>e$|P-lQHqEPqimD657d@_y*Sc$6&&yXN&zA)=J&qlJVK*G=Dd`Sp9@WuKF?UB
zBcQ{zWT2NO<jA10n~gbO_K<d)iQLnOnU2)p6tHj&$f{aN;7t7eW5xrkEI;*IyhJd<
z2Y|d<6~h?Z;zzflC%(^ziYZz*iaJxIFwl!B4I2v#6xn)JP2=-pa~aEazjQ%&c+{<-
zBrW~rne8#aoh#Deky~^RX&O8`4Nvi27daZ<;b9QOI-GRubYCG0h3C2@m)KX2iu>on
zZ+pl&DLLBHh&SES=)J(x$lR$uDK3zms<SQLTb-#PtBQh}Kyj<wkg49IZJ0_r=YDxq
z%RW1koxISh;MQca9hY470><ym?*m3HQCbO@^5jzPOe4BD;<2kTtzgiF-P;XAL3Nd?
z$vC*vvOO{AS*gV{y{dBDctJSZ;x~Ak5o>uGYdA|8DzBma*9!(Qr#k+7Rw#9Tz~C)n
zhBKmHwWm=}(%yJgJn-TAxbiy5$axh#Q<0l7*GW&=T;QNHD}JlLu{q|CE>7jtX0kBW
z@UtKMGiKJwIcJNjC6!IvGrY1qrmMRYMb_VMBP2c~GT;wx(ljcGN`!q<G~rZR6Z<xx
ziOCOMe!yPG3Pr1&tgfd$%y*=#v~Gi0Nn<uZ++^*5p&U}<T<6a@HGGV$ODh@(OhG|$
zT#L}&Nca};9o+yL`Tm~oWD;4lm$-IsSv1U*E|ihS#}2PdjsSLPk&GG<7O_y~7So{G
z#MsMkMO*B8-Qb1@tJpk1DE8Zp5(nfwdhEqYt8{x#Vgg|Q7tG}*Xx@5j_f239t_+UX
z_nA7&g+E<7$%Wzh0#6rv?oT=}h<24Lzn8y6H=Y9tgQFwUs%$zR-L>o0H_Fw(Cbc%U
z6*fo2;e?VqUh+j4$4Q>R`5nhsI3c!*OO&44+GVp<N87Rp+)(IYI=oWav+C=6tAEc7
z*{jSVbZ#Um5oE;^rQGiGw*z+hzVDc)Byq(4b=Vy`h3@meYg#5ZZr6H?YQXw=ETI>P
zO`U5^hua!A@q~SG7#5|>4jD2fweuI))0kQAMLDYP!?GJF{KT_INUJb7f2BO*YO-ls
znvOtbOYe=DeS=7<kho!o3hMAcTals555bfiRWE_biVi;$KAfELDUlcGBEjfM*1Y#8
z=7^dY*Qz<}IMnGzYRv!^c&{>^3~;|k)I{i6D$6E!wqH#o1h?qiHw@OWz(-OqRYfv@
zpcnKp8&m^q@@-PamD%?2X{a*7)!k>b-+u=||5h`(4O=k0-y`{L;zL05bw{N^SxJsD
zZ&72W1uxjmZyWO`{}VTHi>E%8P*E9l#H4{9>I6@J*X=2rSAW+H4NzmUv}Jc%(op+D
zlj)Bt&HfbI5Q$og(8Hi3+;dZ{Up`fdAV4)L7mfxyA_j%oIwHmJa*ge$C{Re5bC5CP
zw8(B`2`3|MnoAkNj0#sW(36luW%6Q9<hJp5D2fJ)lKX@P%j|s=Nx4KsbNmCFM*5h#
z%6ux0UU#XYUN`Fl2!W@Y7Z+M~!uPeX;Rwq32DuX&wB_~QrWX4Q>ekiryWR)AB7-Kg
z9j*7hYnyQOQ{CLQ&hZ-++!YiQwY@tiOV1}ZcPgs*<6y!hc>$$84O~K=)q4oum(PRl
z+Q&@VR*LI>?&sfIT16i=wkF=I^xd>;+>n}M{OxUlgj;mf$bFEnCszi*#~WRI`J?_5
zAGv{EeGx<Wl;LqegnRbGK3^4=f4bVb`;}dIVyIB}qLNS3tv=q&nqqY7RBq{nf5pd*
zxX}k^O70;@^zHQ_licO2{aWA9de)jT=o#ttQs)6d!sY3R=plQEgJZcrWR9p#5R^5L
zhg$oWCmSwyLJ|CRUCVT9Asv{-t5NuTg<xD0X9+H0^Q#&m8*W`8YP15<(3*zH3*Vtn
zM8)>(X#&PMG_@Lta8mzKe9`fS3g*T9^*Dy99>Rr=uasyCs#b)5PD%Nj%*nZua9%e*
zjvK?W^PSeIoKJ4S7vy9Nm$GoQ+Uv@foo7}N&x05(-|C~uiTHKVx)x@TiZ1F0n;^8$
z*I71;=q^m<#%mB4`&P)%;fmm2?#FBtzQ!v620K{a2NBRmouI#?$9S-kL2u)4L+z{o
zE=l-DfQg@TXA=nRl|h|X)}Srj`6W=LwR5(Av7%w4>tgY&tBR9S&`?n+*XPmSSutX9
zsRrcQ{vFqWWB|U}HIBVR58COj=itRj?t}A`QD0yb1pUm3#kHrLX!bn1v*PGnIy4oA
z6zgwpqxU=FctY{kv{<YNV@Oua{toqx{##1+9z9iG0qRbJCI*=oV)KLh{)fW{rB`UI
z%XH$#9Dy&8`xA+ZQh~1tu5Ydw^J?Usx9_K{GX9#U4~zkfbH=*ID*RC2tbMPVJ6y{~
za|`HWeO+obj%Er*`jL0|-NH*WjT_A#^_Xk^XtEYgGhif#_q>3^?{W=drmV$Ydt%`I
zoqieGI8t$WsBQJIq41(=E*sfIF98*!qi@mO0lXz{4j@GT!(l+A-gu9&D?An`|J#a1
z1ZxbH5mzl!2r@aco$?}(mw~{RAUCJBA^tZJM9r6jmR6yG-2pCC%|yS@-KxX}jomXC
zcMtR>z`9|n;0aIhopx$QsgikOqZdO%Et*#ja5*~?H4fbOt?wjiAQZQ}v+o+03(ShU
z_}TMfDIVy$&5IuEuz~!IXwO0tV@oJoZdKB7LuSWGuaCdu;%?704;i@O0HIRQ7B3JW
z7p3{KoGMzh8pm#mpla>omjgZp%&PaCeNE^>b;Iw9EmIkD64GNnF)bkfB-82UpU;1J
zq1QAob@IoVtB2C8nK+}Ql-p53=w;rId2Q~^U305zUXTr*diJb)Y|n{?y~l`T8o|_)
zYa!8Chd9Z+hEAQN<lLbVw@LU1ve3RkaB-B=na3>d5DM5{aQ#vWNjHgpk3}G9mA?dP
zg_ZCAq}3oT#+0gL%6DM?BWQ~S*QR+a2yP16lKp(QSW@Cu_)Xa9ms+g3J$0TG7ZpPf
zY@%?G3`1!tWg-t>qC)$K@F}A8ykA+ZwS7O&gAy@Qd8UeHq!v4>TUeRU-z%j^9Q)CT
z>-R@uohxW*w)}&9WblRFE3#aopC7WH_x7V8ulHvekq^$C-5$ZBH_0<b+ayg_o%8Ls
z)3?`|$zhKAFBSX~!~T13r5f~aHG8~2vA(*Se`XLrr$w~<P(Brv=6!Oi;w?z{u&1`9
z_Wp~pLyr9<wc>WXG19zbb*j%IYaqgTKCdzng3X2b7Q%3*vvcB?CcY?E^r&;hO|OgU
zi3xi8z}+LLN=my;rD><mn#ChSN*lsp1wFwQ7_ieg*)m)No0h3_yCwIayr5Q(H1SR$
zv|1z&`|KbJA(RMRh*OQJu7t5epS8<`SPQ1lRah%^4bL7W)9$sLDE^)hQ`xKUy=S$=
zBadQwo|pD)aaSTj{Y8R$qG#aKGw#Dvc|WRS$?_^MwikFE`>@Uifp**XCzsWWh|n&R
z-LEq{zl93j-f~|&QcuC5iX^t$r;jcja*Uih6u!jS)dZt2Ojl}V5G{WI++^}uPOq~L
z7Z!F#NR1D+oZqSiHuy5O!gkM-k8*hhUZ5sx`qnbr#OR4G&fu_Lb1iImjFs?*yMLZ>
zxm6uu{W(OS`km)7;E=PNh!Tgn2yt=wOek*9r-xUs$h0pUkT-~OrLwF}Mbmb!Ap<5<
zOn?<xeM3}xd%RY;;lAU%8~Y^6ctEdX-*SCRiJPbmfG|617I<VVWgq6%GRd^@?7|Fe
z9IUNFz<iL8gd!A@nOe+EUh<eKPL;c$&cI)WOu`G~R!df78k_K{QeWHG%dGF!@xDQM
zg*U@S7VQ0Uj$UN>VGcAJQP}Pq@_{S`b0uf2h&_WeD*MT0w|0^Vh>s<?;cfKFS$awt
zsF}D9AZvt)yZGL3!SgcN+kw7g(zrNShsxhqgrJssntW%+cnfQP_~c)(Yx0QD1-?=G
z2yt~^ycb*1&3Y4YC)#;Yw^+7L-yys?LmZARrn~4KdQ`izhKz0X<{pkK>r_ehov9nn
zu7Ydb|7ouLp7L@!x6a%&y!Tr8FsWP(QHg??{4gI~my;|?vYE($>3dud*3)8Ulw=}v
z%j)eLDLOqJdR%KD(Zb<&tfm^xcPI|_-<X{?J3e+OW0;cE!B$7$Df^H(fibf4d?sRD
z$Q||OKkL})Y7u>KKgv8w%{3DiRI%yoDk?6OPNwZAnT0ly2@=H8VMm`bAm&;tC!|Bd
z4Nq|Aa1iBcAzwHdZF=e<9PdwG9w?(CrEI2t;y@q;<EZy?>qqgX;uV#o4UP7*_FeUs
z!#ZSfdTI$Aqoh&c+Din%B-%(Ft5V1riUzYX<%>j528jWbzUCAR9_n%iNOTa4xsu-0
zh~>u~o5+^V4nO?hZkju0e!%g@wB{njqr#SD!tV_oaHe!%68sv{h~x1SK62r}Sge_u
zEvce&g#3#`nGhLNDAL!X;RKpt5+1B7r*vDp&;!)q{20>YkTMZ)l@}HfO+C0Z%0tNf
zWvOzO!8sZC!9!fR2<AyAi)!A*@wxfEg7}itjH1<xy8K&d$dG6DuT7?O+!#M_9QstZ
zS>ZtBn@;G(@fR;NA>;yqo=eH>H0_B$a+gc`vGC&V#<&0>4ZPKMP>8B)rjdAK$eR{U
zg}-)UWixu^B1jo{>a0m5Fr7*c=L^hp`FeCnm5#TbWi@9z5GzNsBRLd=;_#P9!%&lg
zv4dd46V3VXks-Bh9<zm?(C@0lH-P#Bd^=t<l-^ttwkdFuLWR_|y6(|rRhCdYI=Ars
z2h2h1=y}R?+XR9$qE9~ZmBXj3dvbY~ze=$Ml}+8mTT-4Js;|>mi!J{M4y<%wVq`q|
z9xU}D@L2Ck(UljenpNNqD|hMsnC?@2ctl`aeCQMyAmz&-IFn;AKxqc1hrLLM%prsY
z@({YpXqrCX6$4buB^bv8S|{7Cpgyyv&K)De^s(T;=5@oVUxLOp#mvN)wK7lUKRr92
z`DMSTX}*+aHs{f(?KvMDp+}Z0k9Af?tTIk=)BF`3>gi)r@1IFzQ7~L{v#Ekle;_jb
z6ZL%$&NPhIoJ)Gz)psG$306A%q_qAUbAz-*%YD=1kK<6kPJ)$K$$FE|!S;z~>6zZd
zQ>0T95>-ra>0J-$I*euax5+{Giw`h9)^kWBTQt?7!@}lfCm~H}1IKl#zjNAa-0orI
zQP5n&V+mgx-3rD+#<81FdbuX7Z5(F^ZWYLOFfQ!i;T(gjDleANzf&9%@>PakCW)eT
zZNHHRGx(F0$gp{)=bz4ywFaP^`Ovv0b!8dC;c&2N@pjA1$0DJjU-^>Iakbjy@fQt-
z<gPzE6bm#z=jW|JK9JL6Po}hIV53tFNLb|fsVxMtd$22R)HSk+R~7Xr=u=`wTJ3i+
zJG?p14;b!Y&51zlo^wO|qLg&EWzZQId@SKMIQ{#V+mKF3mtX>FQVOaYu?<%%v|0AX
z(}syO`3E%eyPp29oN*jN2BrRpG3Ma5IZ-N7NLTP*;7n7IYR}-+@Lu(oy%6ubUI?s)
zzS$F?cce3}_m6K<0n8guz_(k_Bq$XRD3WH6inL64pMR{x`lg#ReJ&L;RlJFqUYbZR
z%!@;k)F4T@i<FQ+z+lyn0aI+YQ5)0-(l5ks+RZJuNS*dc#;VoTeIz)h!<T{Q=RU9Y
zd&T{*T^iVoJcXi8(S#>8ITyTr5*L{j#pEZ^8#ed1nkSpVjv%A@K`F!y7s8>!rg}Ig
zD;i4-XM_sAgqI@fK<bvT#}gPCxno98=O{{Up!?eT$Ajm(<NjHH>FjHZ0PgtJ=lOFD
z-0N5Usa(MWf6wDvkbSxfX~_z7kN+dZF)YW)Uc6?{6w(2gu>+v6Au-$71!}nA>m8qn
zsA$MIcfF}wb`JkZEZ71i)>sPxtR7$35J&ebo}hZg)aOzEQqZ~C<+%mW!5VKvle2wO
zt?5jc$2ZSv)*ZUGRf92I&Aw7-iR5!c9fuk2;bdQl?Fi{9m3(`KTt-ZV|8fFB!0l&l
zkHNx>?w>gf!RdqFSp-(>ZwmVPCO8BuADB7nceUofQkJsy_db*taaT`!irev&9!-L|
zEdnx7twg6%a)$o4k$TH%F*6W=GTMHjRA$W!VxKJ|RfiGq|5Wgd5v*MGud{MJR+GG%
zaebLw4sWsx_YL!XR6*9AA5N^u;dgjlN?SHfHlxjLc*#7aEJZMNL}c{Ey1z?UHQCIG
zl3)}qd?t<`kcbgXg8-kEGR%`l>}2qA&1Up7lEvyplATN7tVsxC#|_`BnfmQj8aX;K
z!`s7o@>p|}ds(*e<3lnvA2<V|-2uJsKF_^_riMXhX$`%J(91n89~}hd-%t%)e8c|4
z=Yy*FYAAR8*>Dv<KuD~h6>3lId^~iY7@OP#h4b&Csv~=+Z$@vUeLa<Jts}#e2EiZF
zR|OZw%E9(0>#!{z)6s6A^*}&VoBfXG9Ny1Y)!KPF6$|@^NC?=K?z1P~yKjY4xs#;1
zxOC$CSsrLTdtXAUe_dWe#0Mu`PQdKVE%&{d?((^8y~1sT<~_|6A-s(}a81>Z<66(3
zvdb+762cZv(_4TnkQ&wef)<NEVT#GQi?j-X{<lX~EdFzUEW&%e%&dt{VpCRh&wHp&
zBbjn&s?@xC@=dQCFyrJQolzMX$|M+c((|iZA$t$KCczbk_9F*HG}s+Pd8qxSwziMw
zvYz#J2OBx~#=abW?@;J4HqB*m{w>G;v`g!qpI`3QlIX>Faom#z5?u0pqo*iD#nO+L
zIibyMklv?!9wfdh?SGD^nNo^!##|XXpcH3$L>Fa^T;0^nGz-gIW;hJ=-xvmaX=Ms&
z_U%2Oy0EbIns5G2GA2wz<eEx7xSB?vvAL&K;xsF`(AO|qu<B})(HSK+TRjZ5wKd3~
zxcucZei&L<Ra+&u>b4Ud%$dcj-lCfar(uA^Y}Rv;)6LJvYpMg?Lt4Gn9P^I_@Yg4;
zmP~}PX&3uw0aNsOfKlm&2e3IlxW{DLgZZ5;Ty9>T0Hq$!Hs*e1C!&cxyj-Q(8tOjJ
zZ5*y_E;94-@Cxwot?b~Wv9wX2>%~ey)R{(sZAjnvP>vlAdP}TSCs5`AUGNPe>}*c4
z_D=HuXyU4D4;C}PL;n^-{D#%NrlT?Ikb^|Yw?xf|PiMP$=s+?h9L8DD#xE!sJ90^e
znE&?kfH*HLP-t-Y?==C_E%-BiE26=z+ro(=IBZRi@M8q%R!)sO=Upo&^Nfd|!Y(T|
zyTZ{kIi>)|@2JFCjUnM@vsG1j*u^(3!x~#VS5?AAALx{Zm_oxcCQ~Y=s$YVRhlf5>
zc!m6i5GU0uAI}H=m4esG+yk5#_;Kn}XQJ_W{fc1@>R!SZi5nnI)Z+1qo8Pfb!o?JL
zv_dFPR@e|ot0mph?G|d`S)4aB$M5I+sT+lb*%#)W3Umd%Qfn?xQvE`-0W54v^8y~1
zwS$@8X`{jK?)d_W@-*Z11AM~X!|eHw#X5A9;iQq$xe;1Ppz0#coyHu>FEEj<L=)Lm
zm)}aC>!Fli_d8&GaH+#{_Uh20_`@*iMD-)}t-@<GK<-n9@<VAB$3G*1sXBbpYO-a1
z4YQ!L@$u_dB80P$`y!CwprHI~YAUHunE4fg=NAo}_SG{tf;g%?zCgOdD3dfY)%5`(
z4Fc`g6zlUAI3-dCnO#wx)u`8;M--yGr@gp-Y9#&XIfwK=*U3m9z^`f1NpCBy6lkar
zXG#1P9Y<*EvC}l-Qg}_PiN(8<w?sAjlO#{(mv8o+9&I-)b|CNPx08<}hzpP?*2fq0
z59Xh}<|1(9Z9-sj2Bx9zKc{wZcbixxAbbKSdCT^d3bZ@BF84mbe3gB}h?=-Oo|xT)
z_qGqkIln2U3w5}77ylYii30a?@<N4Lv&kVK0^(;`33G;X!A{Qhw*jA;B3atL#~EF%
zF%SD?)bo9wZ!&2_OYKuXo<M9^jureES%B=HDEuoIsY``)yVcG=ua|7#)hcY|``F@r
zyS3x<jhuzX_{kDpL@eN&ikGk3G@bR-Giei(741;GB{}oWynomYO0#w(2vwpaA)hz^
zslZ#k7Nf6rIf)U?Yb=(8b-_xOZC%z^&eAHEx5|J;BwH|I_-^!8aCQH7`lW8BER~F9
zQ)L$ms*Y!MV~yMH-Ob}|TpFhkBdMOa77|+}pS%K*nTRm^l915mBDrmeg@ur4mWxK<
zsTFIqJc6LxkDe#$h3cGkbe`M94~~7=?<i=IUxzTco*&QoTKX1OfhH#gor0>$Zi=ZW
zPBbdEuis;{8&&bN6e%ssta76T0vo1Zr=S-Zr652OZEvu9b*<kbr6#5&#@$^3Tkr)i
zOM_fP12%jeDP;}Y0kqF11a<>F!EX^f>Yv@hvU>}EaoYHWNkoJn2eo2|Qwq5J2G9~6
zdrIP(Q_eDzs7QWyN^(C*bM3}=l&I7;ZS`N{*R;EuNhbcIpwOZ)rLg?j+C*-8L|do{
z{XuUuVn4|7^=C~1wyAKqS_{mLhq-ml!aC0i_ez?gc;69$+%6vp3LdG=)nfqhT?GLJ
z+3Y3|o*8J<FaQx_B4DHl$?P9DtH$O<`Mp5F%^_HEI-Ck#j?$y*GhP6#m+IB|{NP{@
za_G2!xr&n9jc_0Ian%WiK#8L?my`?xl!|4>VbP1H4;pxZD+e$0#P`jxwRgYjJd2~8
ziT#OVwC`u0TjB8{Jld}%35SEETHp|Ui-~MOmuGHqZ`P^#LWqfX>^3usaqedneOLe5
zK}pG!Jqvr*2RVeDgW%=U-rv#iz9eS;fh{xg#+(#47oAU+yahnbrbCIDZ<K;XguYus
zxm&sj6{WSok4c0vSmrgRZQ?<Iak-kj-qZ>tj*oj5B_S@wL)9vOiW@;4i<1j0T8^vr
zoUv@dP1LmHrg|yEU#VsInQSb&a6OzIxjf)&4j*xGtDQ{%eDiAd?re}B@Mv^(f=yue
zM~5`{_zo*%3m>na5mq|U@<aR&U++%iuFw3}T?;j&vsozl@4peSk$$!JIJbKzC_+GY
z@WXWaB!G)jqML=1zM7nOQ|iUhn&InQ$}!J)d31LDDxXB3lg#^J77KHoO&L48dvX+`
ztvTam3?DakfzOb;f{>-Gq^mNz24h`Zrlm^s?)BG{&1Wat6x&N<F8Ek_@^VMhWEYcQ
zxPFLFU0rCHG)=g7Kt&CI&ptYvB`Gr)H8KC8h(>iqHcDobtuPAGo^#}fH23h^vYJ{X
z5~4r9>Ft`Gkav<kK+eX{5h7?b*Q2c8vm0Ul!N>60$tUyS%tKu2TwhTGz5q4!YH4l}
z$7RymAUTtcIwfm1KzEjkTuYGenToFkL9-E|x-Na$?*&FQZ4|)A%27*YV~MEX-r^wX
zS8o_Xib9+|4j)GMfZmUZ7Gi658`hN*d1xdQuIfV3w~W{5p}UPLo|%NK6n2ae@eJpT
z{=jyc7|7auPw4@eKRcOfUlN7iTAs;8><ZkWM(Qwl+;CnV9fp;A@m;<GQ04WCxj^Xp
z2f`mjK_467KU|#Acc*)gzp4c0z>6ny<aov2r>5%qy@`LoR&~2y4*#}W3tWR4PSmU1
zak0j>w=kZExQBFy+67V>HNMOe#AG`I>~U~05>~D$2I@SO!JygQN8+3d<UD9DbzSi@
zhGe8%Z3D6|_h}a0I33XTTMU;4hBHr*2m~eUu0{!mtc9SY9}@QT6TD@{zkcD{42iLV
z9AP@t@ZHzDr9FbhOvM&$e`tgLNga*#qSFxx-EHg~&-DmNz={e_VlsCgit;)h?)tqc
zxIwLQy_YaLtC~wvu2ZDuSGBPReD+e{sK<`$Ui7u^>+03-%cXRy9sBF)TqBV*{eppL
zG@=#k+YVvcTIkx3g501FSS#^4Qh4o3oh-kp<yT_peQ;eQC`<dd*h5ZZe~0uJv$zL}
zD}yf+jL^U8?EP5prauQO(2U?Ekm>71r(!jZpMJsgyc?J`a(pwL<8ou@>_+p6TA0fn
zTwWD!!FIO*@Xiz94nO((ePWKid&$e<u*K0q-cEf3Kkh!c1zQaT8e(nNG&mO_+-3qn
zb&g)%R(dwEj2vebhsw_lW*LaX(uO!#F-ieP>KbtD3gHZG&?_x8{phdid@!Q(m?by)
zsi5&?<>gB^)G3Be>22LO@92-+^=obH%2ig_Z$7Vrd_)gn;l#b}lSN2uNK>yd`P(B8
zh7l0A??XT)3gSVpg@@_{AcH~Rt)%aoYD|52=p#OYny541xnd5HSf<QR1}zoB7ADCr
zdco42fU~OMuoLn2Nf1TMv!yn1eP)Z4DN~O$L-lTSoz`12GSjd-%qU%sa|!(4Cz3ie
z=<h9(^N2cTD0A_axjIMP8?M<!Ch-7v>Tj!dNB9SZa!!!h&S-xe!-sQ+$rntvb=Q8S
z;WF))($L$ZE0urq^LqZ^pW0hk-Sl*QIHV|{Qtmf5c}&9}8o)BSU*7L&d_~KwxluW_
zDv}y_x_jBAY-r{P^0k#6IawTAh>*5?--n9fjCe%$jBNBcFIm2Nk*YJE<+miT@6!WA
zvl$G<Cww5_9d0@%`7OD3y+i3Pq68O*Am-DbM%M7!h1s}9nAwO_v$X4oV&ES1X+BF?
z^A8&4bFy?0tkc64qXuYuT9ohxpwsf8k*svGD=Jd>QbShHotJ;zd$}=H@0J+mge-nD
zRL$Ffup@k`Q+Uz2Fi_<?pc`t$P5S9cBQD!DLzBW3?;JcbWX8Q`i^)ywwZ~l5%m_i%
zoeo=t$5W5T1NEBXJB}*X&@%Pdvl^o}hcTR`bN%L=!|~LU!+{j*9-JMVK;~GEjz0Vg
z$9HlLCWcQFYh>?;n3Eap=dOWpCXWRwP|=8L@8OMWY!JmjR}s%+1AFZ0SsVXgX7T#r
zax@=@T&lMk&x1LsUW#XI1bwQ?xjYc7%CY>OMT;`$D_QwGT~m1VWGxbfm)}=d)5<mW
z3eA*acC-54?C?-(J0=ZBM=cc|{&n6r+apbV%KgfY^rP<o1!q8*zdQO29(#REk51wu
z{dJfJZT^3q&U|>j3fO_9Uz<%fe|RvToRkJz7n-R~&i40V!mxQjuo}xVZlwIbL+%(L
zUP&<mb-y_{14tSfas&w}qM;$7OL_<1BreJHYQ{QPwJI-nNH}z#!ju{oJ3Ko*Iy-H>
zgPz}dwYMYfR#5)k&hZJXj4j~{Apr5$QS0=0Yj+PD{M*}IXgkd4oouscymKV>)v$BY
zD&}1jTY!tyKw@##*!^5$a>T_Do^W@IzT0NFG6btEW+o<N1|uzfZLs;{$&h_QeM`82
zayPbM;$mppb66bV7l#e4=-hCNQL$Tg090Pu!;aHUB9D8P$fuEe#&2842jtH}311TR
zU)TX_JI+u97wgVzZaQtx1IYcO$fm5HH@w34L<4<`<cIkLfCI_wWZS#Pnnj&!pa?RU
z^v5phSVBnfa+;7RY=@_^wS9@v#kp`hyL>vP@PzhT);!zZQY`QWt{@2MT|_^!Z1?8!
zt>)eqj_Dc3>_J~L3|~6NiS6~+@Uqj9u36|Ki4I9rkBT?OHww~F>LjE|BrmDXc_W*d
zk>Z^oXUwm884!oQn`nZw$y>86T2_m#E;0NG{~v7VbK$8#1cW>(<vTt^Tm9A>%uf0C
z5HN9_O!&+}LE}=>+TDiL1|-wYuV-6(AZU0euu#z>%m?q9%~0j+blOUu_NKaO>CR>}
zW+Ywa@v52-siN6LlCfV&$)u`v`DWNt!BPO*<BiVZqGiFV(3y1Y>iK9=MK8aNV9;8w
z&+By}L@UJ!D$Utc*TA>7w>iOhUW5rk>4fC~1v~>Dn#iP-nofqbiXA$=oD2&g#O0Yt
zSI0c*P9|7pj+|$y6;XcJEGJrw>_O9~Ou-35aw+#~_>$r2DGH4#k+Q@ZF4Cjo+DHzJ
zQ7TLr$2ZaNhKXr`qoSjh-92pV2K7o(fbif6+lS4weV~X=xrB=$c}2@)6J77>8ZUEd
zVPJ&x*nORYFgj-&!5BEh4l+)ZUyB0T%z;=1L>Y-3v;F^tDRKIgFVpz{*R7=g|MK#a
z|Nn#hlUGb{9{@l^u8v0l;8$|vF#zf>s_^Fr_R1N0unhl<24J>McHml@p4BWhA^^wo
zbzFj-laBd(ULPE`(64v5+1ffe**QL)i?ZX)lR4*Ner`_o_BsWc-VJv7>FFjDv9ZMj
zZ3%Da&YDsr6=)qEJDU{RFa$=E7+nl~=X5G8{Ize7m0u?vyKnoO6c-xL<QRpb_9sIT
z_8Q85&n_()P@4a2RK%BM`5cgb&w~F=Y#%z3<79yqeQlIQ*hSc??7sfJL>DXEU~2^W
z<JE645XVlhR0=G%0F|*tD1hG@<yzMpmoAsf721&uL8M-dS69(u3&3T9mLO0{f8U_@
zjR-|uhLO)A;&Pb^BuS}=ZBbw?2$)N&ycjgXUk$Yi8GV@Fv7k`kW4G=0U4HGp>y7z+
zbUu#xRPko=9+vcopEZG`#QUS1poYg7FTg}SyiNTja1mCFcW^=i4km+B-*(5rx*%nB
z_1Lld4FB*ad{}sLD$4L=w*zIUB0$*F_1K09V>nmY|0(boJARdw2<O1QDjV4t&q+(=
z3a|?QR)7)jdp6)H%e~Ksuo9&nw90H9!CvA$vNa#>?g8(hm<0$M#5;^PPV2mIcHvWg
zNLeb=tO_3pPbova6%%TSL{xCO4192D$saWZ?OH*+%c+pdm6Tcyppt!5uCvNYY8BBB
zlYSqzw4;g0o07l;^exoOTBRUG=pi8c*6p#<H)79l+<e<|mw$Lc0TlTnCk-j@NJ-!Q
zW2YRSIe0OUJH0^1@fm?k=)0X3MSKj_BVd?M19w=~yRt;oL}=v1p}4#(t%FQu^!6!l
z^|4jnOf$^OCbazO-EF0RA*oZ3OPcU&$aNK%<6@drG*D&fkf3P`h$ycQ4SIR`L%YAa
z>IVCh&cz9~+wo5?>|qIHscDH~)x0%|i--ytSz4ieNH#vALGMrshjR=)=mDA9aAWF~
zkHIQK$CHoOc;%haO%o`)8u@I1chlPr69=6X?k7r6q#)Aoz~W)G$cPny7Ks;PMaC^N
zhrq%HD-fp;g&c8{PHO6;PAu5!qU*a?r|@%=iIgF{SHtB~y#AgIM7;Y2uHlmHvV+I7
zfk>f;>8a`Bz7nV86nz**Uf|R&Kq(ie;_!Xj8`JgIg{3*u5}H_zBSL_EVIzCR=x;8p
z5F^ooeam%T7UQ@DXho~)*!vW0jho!qcj{sQ5^EFnk+J6^#O={!zTRXfG#?@$9v>Zt
zkN4COxq+Slk6<ABaL#e>Q1mj<8jp-6mRDZf$E4#MY*525T!8Tx9k0({;1`kTD58Q5
zEO@Dr&Y2g{RH$&dGN&!ggT~Y)mNQ-oxv`PxpcrEOOU;VL3iXl=<+oNMUe9G?3fSc-
zVA0Vc2L1;;DhYxCI@D;jk>$cE5_t;VppgkFlaO+uNanxfS|db6g^lIpCT*L~?Ky+!
z8d$Xuw1ioV1T1_Hg*fSH1D=<~gdhaeiYsU%1Zorb2?qIbx|a@!=?nYPrF>|~HD9d-
z)|5dbPF~2SI$R3TT)~C6W-QS^gVwE*VjL84NaFIOUtJ0<6{)=@a<p2cR!_@iOh0Z&
z&D-28i3gqcLOhlMPfQV)I(uS^n5Ce=cii%k5)7L=)RN?84tH^|1rb;yGWX9w&pg@R
z+QXBA)3;j(T$RANq|(ZFm935)j8zp<u9qSgultc=YvEe=3j6&89o)HY5hnBkCcr};
zp_hT!tpbI8Y#NV{9?DVE%%fMEn5Pscb=3;zY9|{S<y|#>wkUI?<X1Z7iq2#Mi=LMA
zXIrUL@iR)Rj48aDBB_9<QYh{QwBH#lN3(Tzctr8kQtDr>=n|uRi}G?5(W@g||JDHs
zU&Sz@yZ)RnVo^2^g>*TBZswSTxT_K3vReE@UU5!BvN$$f8dS+Oay<g6L|->cZskKc
zrI!r-#7a{k#*Gh-%PlB@er5ODmT1gxj7%Bri*%M0&x<gmDv+mVXJtmlmnioN_6)dz
z6CSKdJuwncMO03N*40k^3G+$2(I4uXh_N9;*hA@P2uXfMd#ehIfKsGOMsX>hD@dWT
z3f+iwhP0&%hk<q@l*K1Z>jNL_>>O`yIQDoAGrdUjo)+fDDuvq|(ZS?t37v=ir!cEl
zsu3@>+)V>rj)SiB6woC6N*w-ur&mgd3d&(>RkWZS?^`Fkf9x!wVQf116w03`C~>DD
zWI8g^h<K48qW2&~$X$DroU`$)l@6_}#t*Cw`U%r6!L26g{vl3xBb7sbOt2to)GL*h
z%~Fz{#`71&2(aoYoc%-Phg80{lGCB<P1=1YPL5uxVQhg`(G3@Kq%GNu?x#s>KEJNR
z!-iqT`MDVSm#I^<UZ+2@mbsMJS=Ms6l7~}@*(Ix9u`p3<TYS@&ljE)3lbsWB(pIuT
zq}T~Hq$n_3WbM2HZ2+3GhLp`uenEZpDh67-4YAFNuzfLplN6mEMa1Pcs)z15X!fw=
z@lcLVK+Ax%80noD>(n&^$l@(vuwiQ{)rCz-MKlEjuPDkJdhR;`57tiJ7uW;b?;Woq
z!luDSOPI>EUDnY!6}JUetqxm639!0)h}nlZ5Y*R+0L>r^gg^uAz)v%9t%KJmj<{k{
zQh>-1A%M#HRT>L&86k|^H&01;zR>uebSLGG-amQ%KP%5)TJiiJFY1luC;y+X(f{WM
z^#A#hy|JA>doyu{L#J&|x@`R%9@Ngm2S2$(-?eK!|7&vr%5d=s&n>oa)~N?K_*)#?
zgzv&ro5Zn42$FxEqJZWfrzpV6F|PSAg<!)&5VfRnU)%Kh=F=1%kENl4Lh<twz^>w%
z{-A?lnXnFi(4kUa=$vD!0@^XhnqK<~w`Y(+0eMu3xM8;LRiF&l)3hFC9Wo1O&yrRF
zt@ETML46h$5wu396jK~i6g{U1IA%;JENrcY_M(Rg8jJBu`U!)8(pU#N(grn-`W~B8
zLF;@v5~!iduzyR{0XnW&EVZ&KRYqTOYs{%EpW<rz8O6X_5C_GLG6Dt-vhpnss8|l_
zhEV-9lNxHkK_kr)#&}0J4CSg&&dMr>eFl?{h30uN`qB^&`(xJzO>-3JxRqAGOSEBj
z?XiugG4Peob^0nSz(`2ENFyrJh>A31k+sx`wv3%0o>JU_sKuEQwKyB17G_G+f|;l+
zX&K5x0)pB%HtOYC;PgQpm8^=uH*K?)rf*j45K3>@5^va?*aYHM3WExyoxQXJI_2!+
z&I`T;lY#t8RPy)qJN^8IV1UkoIFcS1c?W8|yOQWh*a*xy<fbAv(w&Q#e9iJ)iX;m3
zBSJCPD5s#zav&1xRMe7#!_!crO0=;?sJ)*(cCl`ioGG4Bd@98n%4J{+KwI8O>Oh*a
z6K6YaI;<iV5STFuX^^;4l_+{TmzGWk+7B+RxD?OEQ#IkK7R3|xC(`2!Jjz3SHXWX-
z2~Rb|Gbe1pVS>G$?+wJ7V_r+K;sB2?Zn>m$42U2HLk5D{*c<IRJ<xbD7?~m^r^0O1
ztmop63t1Zb#>VzFYzW-W|20-KHTx{A3FSQnYXUA4>miT-w#i=O$y@jW+pRxgJq!IY
zj9uLW#?hgb{*)seN7vB3CsG-q*T_JgKw+$Exr9QZY}M-v1?ybSs52cGA?txmbH>tr
z(nQV*beLE|Q>tI702*cDjl54O0utVo8bXqiGe{av1cW8H5UjwvEadH^orkpmBy9si
zwABK58w&#S4+}bU)RoTF$QwGtaiD8O`dp@sqUHzMNU4=fIIF9oQ6cgvx@@X4N_Es$
zweZS9ZgDD9PE=c<q~6ZPLJHNznta>h8WE*>1y7<>Oxdzo6j>0T(c0c5KZ21L1TNlX
z$DAI0w*wqP#wcr>30r{KULqtanOF{o&bB5HwFk_z;_d+@Pr4#+Y=p03g;u!uNR(WO
zm%J%r=OTAR-wiNgY?`&JfJ5<p*}sao6W`9Y_BLK*#r??cWYL<|pq1seEA3fgAGkvM
zx)rXPZ)}7Yq|{xWux1IqDsf{@<p0h#xff=?ugyyiSXBYnV}Q7&KtdOg&P^-DISgA<
z>5yJ(Y4;nx!94~dT^307`zGT3X8iq4MB+CBpn?8s92a_}4zFV^!B-my_Rl{Ewz<LK
zL_>tPQ8aP5u^6r4F?F^?6n%qT&Yk=gEg*_ls2a0Mg7OfqngzGcis9SXKo4HtnV4uQ
zf=jkQ*nXf1)D{(!R@pZjd`t~BYQ(Rzd^p6nnE|H~zY(ifafLV`#+@^J<XP*qC*{07
zAQJ}HGjM;h?+#0YMmfc-h^05PO50aaX(7o5ZkHyQ0NJ7q@W355J*U?zabu_U%Dxh>
z%1fbUvBn*-7<poS*r>0u59>H%*4X0WhqC(8#h30n&MAE9mgNLOZ$CVKr!6{nH(W>B
z_yr~LD7DpGI;`g`pKiBYo?|KhWTz=OLSK8nfkFpEH0KZ*Xnnl!d?oWX`7hizb#;x1
zjM_FgW>Fn*P+e;?CH{^zJg?nYm5F{irzgX)E|YmVm~5NsmDm~x=o&%u%X^_*4pE8R
z?V3zk*KgI56xGn`Nb7N#6e~~Sb#o&ffl8Hjk<q3FLs*ywjy)_Ts;b%ozpbH+#x#1)
z+pOzzsj7{x;?di~9=yRo%ChfH54R5)#-jg(S=7ilvm}fnJoTI=WL1qkZ@{GCN>E;j
z5uk7<F(#>k3D>4+tBs6r`PC|#NKqYB#sG&saB&q>c@q53AAcoU>1U=iPN^~$Jedra
zZ}L{-zFT;;Y~Tgs{&jTyL)n6%DT>@QnHvNXN9@IVzT;GTlOaC<sHJQ%fl-iS@nc|R
z8H(u?0zb0A#x`NYk1Q}-)N1EdYm0jAyozm6vz=Ep)OaBpe394U3(@3@yf$BmMoYjn
zBK>os5k9gIQVvoC<X^HtdwmZt1yvaTlGz@|?NfX3QwJmg=LQG%fmj8E?Z6F&_OMLA
zh8+!+*&X)cW!zyeyUc6M)8NF5QEnC|rKH{z5rnx!&e!~FCU3Mk54D3lu!ewZWV4X4
zyU6=t+1*ZdHRyKyOa<iZy{N1$%jE@9sPT>QRSoZem&#e3elDi=xttPGd7*^`hFeGq
z>L3<)sAYwB$tXVG@Y$Fmokd3-yf@PhJaDn$9Rr`T!&9Ms2_9~XmkJc*r3TB>mV*lW
z9!)^}fbv;j)<^)tmpstA1g#5D)j;r#*eD6@YJCH~YrEy7vjxdSAtPx^mBlkcL}tKo
zi6|sw2Aq_L6;`i3UqG8D{;J|<HN97*c+^WR=9H4MvcgI!l`WN0@R<<`nF{!@lk4Tw
zTSE(6O~OvATD7_^o<f0(77i&WQIU^cY;GvB)*#_@(TP4Ub*N<t|71aPp}CR<XB3l2
z81R?j71l|p(qVX2*`ihoS5?SXH1;XUImc}fxTjDaR86L^n6HhTxQ&dN%M@u=<p&-7
zCrgE2h>A#=l_My{P3X38&`6n_@=S^-)()tUd?jp=2H*~CzT{W*_k+BP&jgi<J`b(i
zD}7_U@!XW@RE+l<<ISq5SKWgPw>L%=hT=n_lPX9)+lOUjfGY^aHgU)32a67?coSL)
z9e@rJ2b2cE94w|g84Ru?KZsKL(e<K=tjq;F<C0x6pbk<H75uU^bRhpz5vK4H$|`=T
zpMRxu7vZXr%7hwZ4j4K4h!XIyO4<PaN~39+PLi~p>vLKP<cghf`nX)lE?2C=YE;sn
zu1aG`Q_*k*4j1?({Iu%jxC>|dS{S9f?j<o=7{o*|qKw_)HTqa!mASE?oKfKRkzuJ-
z=>54Nl@r9neMPL8W1YcNrj|)_#atS22UC()p(=cj7v$MpZ^(1`F@?25q<eaN5A)B{
zmQnmbt41=sEQ}QVhqP7142!~Ep@nm*goXH^uheMCk@)zJDe>__N+)PH&Z`#wx0&!l
zmxlc)ID)u=9rku);#UAaaKH})C_U=ML4oK?aT>rk72-JH-(Z7z5JSk6c95^!5TR~A
zv%$1I<%60wl>PhHYXkWrw6-s2KYm}rIXtT6dv+i)-1EpNF=CIxf99A5<Vhf56Do$P
z#?fvCnpv;l);N7r54%f*T*5z*guwWtBB#ya{!wfH<Xw|5B7@+P2R4K1LXA^ykbJK_
z3C39#f!MMdR)?uf;+0Z<2%!dIqKaohTE(94dXoUwvbOJibcV2`d3?hd?O7!S<Ny{U
z14ErC0Yg)X&+H;e?Tttb9(uf_p%c0YVU>D`BB8_U-TVW4fkxpDc9dwGGoS=OwpB0R
zmZD8!3Vo#o@<}(`3F$ASwvfl6(O^+L2#p2hB3N=PGU3V;u4ejPYZfM)EW1qhUas&-
zTgJ9e9-V$Id%uQq$Eonvtzq8#O^E=>E429=xWm@Ke&}uA{qNu8@Id>Ppnc-dMR++#
z=P3JX-{L1Mk>8W%JT(m8hb~j*xt7gl9FqvJMlq}l!Dn2jD~t%6ST5g^YAP%$!;sL0
zE6bv~2%@n0hCviUh@zIyVkl&7`F$-LIGfIlu;ukuL}4sc8ifS7W1uv`!Hoh%qohSe
z8jW67jI;=FWhFIK)`|xBe#+2jvx!LdJVrzBae|_$Bs@f1wwpUJj_RjB75gL#W;0O&
zJcE^<8^9lHFb#x!?n5edfvJX5E0jHy8ff+~YM_rbgfSvM_u*5#7N?izO1T0;U4hOX
z$2j!E4CSqRQLaVdX~;vaNjK`QS-t4q3P77)S8$&9y|&$NVY2d8Xz6bCFv}rka>l%l
z4%N82ipn^V!{7%czSoVicdv)IEUbxs-CJFpKi#l4`u$3M6RTS8Nt);`d^WWG>%-A{
z;&wgkbn$jQ4~w}e(7x}oP4<4|&$b1CP*R`jo-eO6zmf&gA=TF2%-v)BH;Q1$qk7&h
z6kJaJ$Cve$Wj+2|quzMF@)ZB=DgN8PGyYpLb5Cp<rl;_^|Li(br^!i;?H0pi>Bw@U
zyRo@1FptfH#bzUqg!_oj7J*I?r>!RJ6?(ij`7||vnN|#MC=&CR<BT$8EVo!_w6GEk
zk)~D>1xw4T&KNB%B0e)}TCAB01sN!_1FESgt6I<Hg{n%%Z7L89N(qyoCWOhsnimP`
zc35MyQZ%&0!ze&hA|O*3IqHthq1bSShw>yk_NEfP$g21$kkscHND3BkOrJ%1NLuJO
zN{E7IyYN2}(aAB43v{}qEH5wSVI^r)R~ZXajv(6D%0j`rX<boTKsQmYNde-jh27~m
zL4bi5ohz44%=vT9HQ!?%#ulxfgHX0ekpm5LdOgerTUXBfw1SPif|azQHm_5b2vOwL
zX*co;Rx~Gs?`3Tp?0eqW<l9E{wh@ci^2v8WMGkv$l@2A%4P_#>gkgMn92q^`V}wSQ
zZWJlqh?h?30>HOi1mAKhK8a?f2%44Di@6CUI2j$rp#OgM`6fTp05OlR6l+leLVYL}
z>Lt_|%^_bDttS@x#Z+(>Az;E9*A?|FBP2`+znKPqV~B{763rz<RxqlIQhAZ!E=t>`
z!cKRSc&8a*W!RA&Or{s6oV!Il6<Omtrfrm6dF%Kp62)c_Y+rG_6(P=fgpZt^4+Q{`
zos}DNL`62c2rTzUZ=sMZYW15NGp+xwF<P{qxkaoNwfrn%wy0A!=4UCD2XgyU=3)w1
zR%DkVwfyssR?9|_YVzX0s8c%`1V*K*YNwN8r_>&kD+;(VZj7+4r7Jyp<QTp|n04rw
zT>ep1&_pbm7)f^uCR3acLv)#RLJEYJiFRH>i|vnSYtI$f=<WCHGNHhxhLX{&ercIF
z6m#LkoS6DjW}2trc6;FijI<u}4pzE!5?%J@jRBRCymPNNV+rc(<+PlhNgUQ)3ODXj
z_<2g<6hG<WJuyjH?PCYN1-eDUlo4V)t!K!ycB0P%C9o+W%)S|o7e&*$E{88mbe}X{
zJ0g4@HKyPe5PPAe`>j6UA;?3VH$1TOYL31Vx=gBSbg?p72_dFImg&V>thBDT7q2mo
z{36Ki;?8=T!O3-L)iy*KZm88T+f4*t%|c#{R$7b;!lO}0+YtHCM#X0}mbs3!+J-Hg
zc)hg%U(M;B^wy&1ZZ*?mxY*DtHK>NR0<O!MCGSKS6;454z_&TGr0VOOx*U*&O)Ln$
zLmG>=643ka**JSGAgIEtfq*V}C@wW?@wusp{L~B#i89=)X*>JST$Tz|_(FBaP?6TQ
zIZfI)OoJoVvvJyb7E9Hm=$x0)*|pLVW;bF*6hfFC#vn(_a>yKFknz=`MI{$CW4v63
zd9d+Ci$Z}4iZ?bCc)Ot}`Z-ALh5@VHFrmGSQyTRL#}(<-;TzjZkP<ItTk&G@20)Z-
zw$+}Tw_ZG75jO$2^IDH~q~T%G`(9~T869|iDR0mSH&H9(I)Y4IG*DenW^m^~MK@)Z
zc60FpUaN^R`L`01Wghi{f`ObzysKcK=1aP^Fdf?^OBTMjkjHG#P2iuwy@fAjOtl^1
zl%r}Wn?0_E0@)*LDDYTgYa-=yA6;QAn{HS|A->wBkLSx6<Z|jj{t$y)>17PL207(e
zr>{pk*3RwF;z;C?TbIDO6oinsy=@`Xp2uBTbs`>TYU(sR)SOIGkyI+#ovs!<{rdv`
z6}kW6^;+l%KRmzP^^ctUA9d@+ODlf=qyBRF>Hf!8c>lwC@pSbgy802{W``54q#LjJ
zW}P1j(tBmgE;ju^XiRw@pdkh0a@r04u`E7(N16g`i><G-d7yi~!sf@HJo>os9cP{!
zO6awPPaF7WnLkDhvLGv2Wl@-w8s!iHgDMDGAs7BS_CUC-2UNEm^LgC)^=xae6j(KY
zr6a4TbZMSZ55qsKjBz@-h0#V=Et?XLYXwU4=x_t=h`lJ!>*17_H#VqMS~MYnRN!CS
zm?>^(;U{|pZ*^cR0jy&BCjN3}t8x+f56WC0!_+(XBmY;PTaASLUwQE)|G&oje@|Ec
zrn~wV&b{>G?xh7z8hK21JoF2r_VhgT+(CB|B${047&<N~lNlZ@EPbBW1`FL841Jhl
zM=kG*lnE<&zxJ74CFDnR%Aty7_T-69O+@qAl~rL^4aMruF0XhFCETjBp7BDU*g0DD
zh}2#q8<0?5S?Y1hLXV%PEc6<heL#4#_8Rbaaj~pg4!A38&*Jt1s{2Y_ZgCf=Lq05o
zZeL;2^OOrbSK_`-ICFrYUPG+ER}D#PgCky!eEP0P?V%`Y{J*(qtmHFujSW1`Y()Ms
zVcqME%JQc4uF<N`t9;suEWM)3Xi%0H^dmQTRiG+>TD-)r#4jmzE?2Brog0G&0jrnM
z<S<wvpxYBLbd+YLBt{?*^j)c!Q(Mg`5O4!|W>-!OHuZo=LLT?8M!pN<pk5JOC32y@
zv{DD*QoZq~a+;|5GCn8FuH85p!m6=M;GAQkhtdZ=W;C(WDdFQ|8GD;^yx}Y_0=e5!
zUW8{l`)pJ$&qTYN1YXWMwTlC18fxRf?|UWdMV65+2HxpX{7q?4`%=wh#q@uQe0fXl
zKTrP$l|OF(v0grZ{$&68D)s*t82{r3_R1N0?wI{-cf2-R|9~I0Ha)9ZKb?bu+y|jH
z*i6|%9K%fXyP)j&SB}rt9fbeMr-uW2CJTSn+lTPUy6bTZ%jVo1_a8^SJQ!hA+F;yW
zE4b0%;cdU9&?4#{;dUB4Wu1_FC*SG2PJlcQL+A||*RE?96}-!WW9&Q6`1lScoeMVb
ze1~UX`sC1Xc0Ga7<M>=y4KJJ_jKVJ6NOrrxt+K!;7-V)~U-E<(PB*M=`hA5|;ilgx
zh$>n>_ZF5sAnzN!lh?v+$q^Wr3R6`HIQGf&KL%2<VKFs=-%SG*f=L}B0#l;V^ZRWd
z6fmRbU<=fg3j07FpwtjnWfuJB3O6<W>x1<CzmZ<{$DG)8mE#c`C73fb)RrX}@T5KI
zk5OY|Z4ah!*YzD9HKUBP9){ex8i`Z0hwX~)toowDUO~&Y!-ny`Y3KG$vP&MhN7}RN
z5oqxk7Lz{ji_0Q?feXN;DE9gE$z^Jv6zhn$p6~wU4ac%8E{b-=MOg6j>5L1dGxE4X
z6Qv{_FSDhitx4NRKHjy-$t@qr!7^_9v>{cl73reABV@a}M@(<W5vYHz-9N?%G6^})
zN=S8xsQf@9*YT%d<Jh_;7dB<Dey7HVh#so(zCQ)q3eD;&KjZnm2b55?i4rJt9W-NH
zne8d@f%Ua!c0#j}o>f7h&rOM-QX(@L84;r0r(Q0KSAbXe)KUuO)B^3CrWOuf;XFK5
zEjq&&>Zar@$?LG`D>2BSs1nXN29VpZP=wIXuwn?4jA%5d37mH{r*SOP8q?HLIi@rf
zG~G6~k^t>yn$c9yM{(nOk)XnTcp_dVyQ2H>RQ!<2NA)`WN#&&<Q%c6}B~@S}zHfi<
zU?F)3H^pz=9vTJ|prfs0;O0<8Tj{|Bh6QhZ9j8j8!j>x{@RWRgy~eKb^$NWM2*s3M
zx{3tCf2=<-&VuHap}fX-*OUm1a$bEkUXj`M-!_cZT_Qgh^#6ESkK2Fi%g>+ezmH-6
zg|W7W=MzvB-?`4Gr%2GBnL9`{Aq|GwRuN0>Y{J+TzWTb2ovhw}joWEksfOBCWT};J
zg2@QITj(ACuD@-E@7medRV%!71pzr+j8N7U3T1R|-o=2l&X^)s$vdiD*FN`#l;;^<
zg%_r+#&TFB2Sf?zh5irg8%oPWST7NkF2dyMsNnC|?X)^}FjjNrm0Y<&)X12w@J$+t
z?JNSTu}Z4bkvMJf8`{hBM57Mu`O&+fHfVM2u}$u({~i8!IM1JsJsN0?YNx({>ca>0
z(6}q;VjMV$Vqc4?9FNN;gSKh?gB!)7-M?t5U2~g~H{ryfz;x6-6aheIYYt+|EQ|yD
zD%#T1j=o#v45>i3jnHH-K<IWL0xXv7&3c&{AS2aw7sbpYBJ8p{qnHNC;l$-^K|<B4
z+VS7z$S*`+yw=Y~0JB+2tpf=K{DN@&1^C+fuXOBt-X{ZgXmeX{Gq$)LKrL+%_*a9U
z9otUHvN!UqP^MT774%oI#&F0rb+fB1J*higtG*yNyG+~dY9c?)gd{x5B6zV76ov<;
z2r^Anp@m8<)6Om<tYFDKQ%UKOsLw=F9DS(~3(aTfEt04I2W}@APkO!k)BhVQFY7Bw
z{r}1T`^o>~Yg73_Uxr>1=8S{G*2(GC>29;Nb#k(Ee0uvW0nE`8oTFc?@5XJ>WWL^H
zClui>YxD6hauS(s$nN+{%*h_Z7)AIs7UAIs!!olO@>n~<5JW{?V`4O3ie@FUGK7fZ
zEXD#Fi}+)mE?yOYHk99Mu>-t1qW^ROEh}B2@i7Wc`MI3$g?R%g)OY4dig3`TK;JxV
zK!HLL_@!D|W~s}tDp@kOKm__9|Icht07wa&7K`AFDMRQZb`o}Qcxosipc@Mb_++Y?
zkS!~$5OfHenOO$uN|sJ*MM|4xP1S-01|0pMSSJl+Ck={q(twj=#BR#qW2PmAa*4Y-
zu%dX^{ZIC_SR_2yc-kN`&;w-192*t_-?6(@Z`i+9y+JyCI|wSgJZcd>34f1uD2WIy
z=T(f7@Ah2S?bak<*KxWP%wM4UmJl{p!X=2Im@1=+gt4eeL25e@4WdfE?p%<@!&;QH
z1SlRj;|mY)^K39aU>|wIE^ZYgA3y;lD*KDJv*j>!`=pQlq~nP{Bn3-tMOWm>VS&e2
zInV;#Aj&)w0dj0gPZIdKGY()K?f0<<Ks-XvqR$alg--*98E3n&E6-t(j9By}ckl*s
zWM7fO)rul{4!uwE#@*-|JI6+OTID&EP>Oo+1cSOw2E%GuJVq4(nn`C${cEI=&q}iI
zkf0L3@vz^Q2jgquYM+)Y&!GJHN(8>Cm9vgg*uRXEMHzOKr<{1<B9NFn{w1s&m$6XY
z+){$w{mll2*2C(=S54n^|8g|)zS$6_<wS()<E`D3ofDx<muMMA2Z|^~l34G;bxd)Y
zGSZS;!++|3>S3mJ0FggVLYYH8i(DfWA$e4#E+v7^ic)}Qg`*tHW$D5zIVMq3W$46C
zVG}4Ri_$vpn?NlIdgq!#lA}}vB>O<)S;D*~a{dQOyhm$eLr=#vu)f(CG}I^GaHvK(
z{_q<tDs&L+vBp{qal=req2|is^x2cKH^5dh$=EXeA*gk_i4W~$85=J&7>hg}VH%G>
z_!qe(5`^=Qo6^~XMg>9QvG8Y3N^R(YXtsmWXG+xKXGvAH5LML_vO<0%rQE+}lfXWY
zLejTAuTRE!vwKAy4WygT*P^UOZT5f2_^X2PxxKn<Od~mvHct$KZ_YF+CX0f*S`c~l
zR@e!7bta@&1@@bQq30tjOH<^Be?();-HvLbms9PwO$m7c)7{N0qM5~$)aK@>jRd&N
zUqEY2V3jvE)F&;Bh!beRs>2fQz&_VY;1jKc2^CPL*V)iORU8T^uS{Ed)O_<>qml1j
zl_+&?qoRzua_+af>Wl)n?+xXJY|IyQHFluey9~An9hde9pe*kK=01CUbjCLa0avU8
z%5?}`D_r=|p3CgEcS&Ypv}c`zvIlz!*n8CGL<Ss1UGZr>)gS6+50e!L|M_H$$}JgF
zk2>*pok64s`P@ep9#PNoM~&O)DxE>3tN3Z#!vZ)s9f*UEvadT6P%p$EV26gS5-|J`
z$M!k3C!|wvgka{vMAh7Y7uK|}!t|)cZ|2h62OIoU6VXMnwxnAu8-zB=m0rk(;Uzoi
z63d2Mj3G%~Z_@5Nn<YIcc{pu{97>!EirFc0plWZ{l;s{~U@(G73u?q)_Ly#tz;2x#
zIG>37VXES%vA%)0`7c}*!_|#N%bSoHydQe*mD9zI7H=Cl82CIpwCEclp?&+)e^&lf
zV|si<XNQmLE9mL)kt;z6^zqN!B;uh8qwh^uFgQKTZ2OZ~!L<PP^ww71n&}Apr1X(z
z?jn*36|rMQM4t0xq_RP!l%yOfgY8}gKP>*y;2+C>mi3fGrINj|QA)Hf2ZItE>?WlL
zmos~CIj0Agr|-e#>>e!Ed&_BUH+5U0MJXi<^j0J4Eu7&cRw73UQdR_3%w80KYW*3x
zQESvRqW-O59KG8hjP-A0`XOn|Vn`Z^j?EnlQxRnla+}r=gvL4tXfD>yobqW*U{x9+
z6Qm_&oR&Az5iOU<QB1`kf94fkfuginsD+~ksa#17{5rT2n=&O=nu>fzT$x+{vM4XP
zq-ens@XN<?Svfq6SmooN<-9qe3PwvZNxG0^lu$Ir+bP_r$Vgj3ZKtBv26#}ahl~@m
zzyedHzr)ny=>@Z1LYwQRA*}xy<2}15AoNsS8`L;N<`f8}YysX^V72w|M)&QLeqMLH
z#ilr;Om~qjQ84}1*|GupQ7)dXm<VwnC62wJL%NT06EB?s@@}BLG!^c9!*j;>SBbj?
z)3tB<=H+mnv;#hF0)upVx({+gZ!RGNGPaZJlVRYV4;|QhU(il4nQ9`te7J|6vRt@B
zpSB?N!PKRQYj0PSyun9fA9n7&mOwC<5SO9p;?AL<bNqk1o#3*27vJ9k@!#s!ay<Uq
z3v2o1lmGA682^8ny|E|b3)k`4D}OR%>uva3I~UJ#Lcc-j1GnS#?SM6H-*?#hkpHZ8
z@aIpUbfFzkToyDH|JkvJtm$+*_Sj?VAK+K5!+#Yhd;}DtYmGM=xnrl_2d!vCk7|E~
zk78)P7VqUb!~#eu?jFn-JOS_by$&}`+Tsjh?0KwxJ+gzq3D|%DVouK|r(4ZmS|H30
zn_H*5hX<{_-TmEDR*!)lpB<d;?(d+F^gaP+$G;<HT{?NUy=e@J7YE!s+nkHMv7uON
zd*}7m+1_bszsz23H{r?p{-y@-`tZ25zl%=W$NMp;kS>svE#TaN$6dFPhq$Xizt1jh
zygW3f;U%Yc7m8<}J971$OJKucqpi8yqJf$YgGX-XBVWHUZ>K+TRO>ePE5YOj;;K>v
zEl#kZ3BV!!7Jpb8Bv+qOiJ*A=3xLF=ag#CeLCYIapr>`&JIW#IChTKa&+Z3KnXw`n
zzt!fqxVM?un>fSCfc?#I0gXE`$4@ZXP3|FUbc9y?)D){GKPi0p&gn=m_E;}#=e}KM
zICjU^X|Yji(9)AjbX>zrS#-(F?oXUR=B5|5tgapo@u7+(=9Uy;;&+qIrh<N#=F<~G
zHfg|}*OPw#>>uUFJ0RWbUpl}v6*k}_jOQ7YRL~&_bt0Mu(*fHid;~!qB>BYW1^LLm
z>UMGt<;4*WN8SR`T6BrTEi_X?H59kF3IiW<{>447aoQNpC+#p-t}V{Asrw8w0BOXj
zJRpEj2xu_84GLId7z7prmqcX~FXhcdLsP$J>UfsZD-4-Qt4U|EoGM@;9Nu#koDuV?
zcxSBQUGY}jtqR@r!Ua6%x7cozi`q*HQP#p$)gSA!b-B06K(xvqw6!*+@5@EB(p3-%
z(@PT3%X~!dRf;gCcD-UX%-DV?fUVQrjIhC$_GRUHh%B+Os#^jhu1iqj(zW}o##N)l
z`&i&?08S%4R=rWEyexiOGZ~Jnh7m@wBq=fsodyd7l?MAf4l7JA%a!NYi6PNE2ZW!I
z*nt`L{7a3ziz3;DIl=UYUY+cmwqBi`w$2XDPIk6i@3xM2w_fe-l>Bl&Os5<niy5Fw
zOsY(x4`Rk6JPN2p3~69+z5IJ|Yb68Tum}|?Yt_T?lJMPJ`1HCm%l?pTS0}$fdSzN+
zDvkRby7_=EM!lsWY%aLSGaC9>38xtVwg508K&0Q<`(CQC5IQ3ZM>oRJjd0w^%X~;E
zr>-5{Wk}j-O1k=~$d8gyfGnq!i01cP)KMef1HK$E%n`w_KA;JRJ37a^E8amQRqN77
z8E!Oa4>m$anrLT+3k~fw1LuDeP(j7EEK$pkT=GSwopmg_S(3HH4fY5<8Fn2X6Apo(
z`J}=_Pd}yB{2Jnaxt(*^?YHk{0nCa2)p+qDe*gc)3OstU|9yq_Kg$sCtA-)Je(L#-
z^EHS03ZQEPr$u{Y-(<hq9phCO_tM#;t~?>YVAEtLIjuy-j9uZO&UX`H!nRJuj^dTu
zd~<TT^=kcusuxPpX{BD-g*9?pL|i)Zoetjd!!^V4*H1xTBt0)g2V2_#hJ9>djHb0*
zywcjC<3)^Vcy|jR9d_u5YwYZuox;;yNoDviiPr5;#@rb?1vlV5IXOHo-T^`2`LrOK
z`v%=#PrW{j<gnwEB2Vp8)j@k<g_H*~xzk{X+vd3UXiKshUC$$q!^0vo!Dx<KLvFtt
zs@*d6S3)V877zA-f@*2$F351KLS*``Tfmz3-Jtlz=91h*h$FpZD?uSjl;Lrm_@S&k
zfUL+<L8J1RQmcBJIP-+;IqN@Op1kYvUtav5=PzEqh_C;3tNyh9f35L<tol>@pJe<W
zy6ukfV=z=X27l>6FCfwmKhI!eJkWqW-DEfk@J^3Cz6hp`8KmkB=$)qgmUjZe2Z_LN
z@EGJ#%k8ay6D>;_H`idn+6{zTN+lZ(-_DV=Y0+&e3{sFV<+JZGOoO;#7JX~TZ=LX2
zr%~a1YSF6i8Q!=0TfI=(!EUa;o(y?%l2zlG2fw%JTjd&o@7s6@i{DySA4sT_lM^1k
zu@ho(hcPVtfrLs4F%kw8gr-AF{YKTdFgjwR+JVnJ+*w6a$u3Y|T94$`hu4{Sh=?$<
zP41!KjJWran71N{HhyUA%6Zk;KZP!<Qs;jT7gTR^(mLZ9Vpje&R`1g-vrUUH>K)iZ
zd8fs9z1GLbL2b2Y{iBEy_ZlG#7RVrmkWR9-Y}Zrfuq$?PSiP2P6^-;MmzE58kZp+q
z9jj>gQMpcKz;~>Q8gVNwU~nzORS?%dT>0?7APh&u7dc#)$|CAazf_(xc$Y_ulM;S_
z<hk;j7PyWjFBWs;QD~kD&jr^Jp^2%&p*O6`2NhIXlC<y$9GnH$!b~l=kWwrMi!I2v
z{COjFp%TR^Iy~YrqLKHbc{_$Yv>x>@yBhgOZ2uzA2ol+MW=NNi>WwmBOQq4!Lt?ZN
zl+g1=o(59U;gdZi(3JJsM2e#sR+`#;l>iw7gO3wm)yj!@Mv0j7Z3{yKm)G!1y;A3q
zakwtRr-vr+S=+ZXWv5=@6R#YXNbQaSSSQT}rB_0M;xntw)6lDmkd+3jvI>xs1{3yY
zyJOvDDn>n-qvSC;-ccMShJln~U~Bo9O7wvLqyfpsNaIFh#l5{~F?odrj``x)s6>yN
zd|7;sM||9YgV8Fnx$zvsIpJ7tZu}61bCO4Z<cBRg1DzQA<d?)_j#yFPZ-Q74Yp;Zr
zQV43`VC<ze4YeKsPOpdt2x?0mqiG4iK3o>@pJHVimFEJ+s>m~V<&wFb=!Bv2{oHpP
z!_~0ld^1*UBzL*8oN~>nt;`mHsu+r^hs^<Mm)O+--<l7o8Pu4iE+>qRR&)C@yiJ1B
z(^RQCymvwb!!*Sx<@I^Ph)KffU0#6-(z?A-tlz^xX<}+MlG7=ev>A_c7Z|I$RH(gh
z?CyF4!@6K-$|y!#qhh7qgG(=tze<hi%LUH)z!{FA;#(9B9}6t_re4-U%cEBisfdky
z<YU|I$2-I=-XVH%{8g+&^m4L8+{!Hqn>r*+@8RYv-i`DcRl4%(l1?7z{2c@=@IX$`
zB`HSt8-=*w${zWEbete1UJ&kCLJcUGVAxc=vKeh&`Ch~nJt!(K6-Z<GP9)WG3Fvxb
z<w2u-7h*zOQrS!+ObJ5vvBMV(Y3S{ApyvXZ>>@v^)GGpm-s<*BTExxQefvE4UgnZ}
z<&Mi68}+jJ`m0szJH0W=A)$#<cwcL=V=ah4%As;c1Y^vvG)!Y}grbdG^av4X8@{z5
z^Qdcx;(<OnD`69>F2(7tdc7(?Aqo2_ipDA!ji*1x-h#&lt&Sz^Y&aKuvGwajmb6HH
z5+Dr$g3fFe6%D*-7+{P_CCfGQ@r{qHBIIT<BRh&!@3uAYy@jkHoNuBAKP++k8q}Sv
zURzds)M6PmtH_1n4M+&_gw)@mW;1VjlW_)n$;ib#3~8f06DEt{D#mHqlw|OmgrA1#
zrmIXD5(z&I|4mm}3Kte>Wfm(Yh`0?y-i8RlmAGf(&W0597}jNX7_O|W=X--3sfekm
zXszD2M;Kcg*MWp#m30Se(Q{=}-o4~Wo#IKJxO#j?rZ-Lc?x29QEgOltEI_1rKB#@q
zQH6=k)5gn4`(*`cor<`O`JfU~V4BT{peg6r!}eB24QWF-<#f#?G<A(K?-DKCb>g<m
zDwmaQVEAmUAU-SF{G#y;%4_(-XJ&kWaKS3imPRwq;&f_Oq()>#YUBh3N6mLJoHzAY
z=o$lXbqe4b<r^=NMIuT&FCP{yyp&q_CjWd<zjj9n<S1WV3wCrZbeQNmvpIL1_9QG!
z=f-b^!gPf%>I-)`bo>*dklri5RWMMhG%)423I_BjK;f?#i3&*gi-Yh!$~aVAV3Y<B
zi{h7(e&6WBMt$uA&oQ~U_@S%?gEB>?R>DN`ZI4^|KwCb<{U3KBYR)42un^MvXBesf
z8*QipzW!>*sL{m8A~KK6Y(jJPkZEz#^nd@y+y!-%vBe5A6hpEKg>)pYisVl%wZIg8
z7Y6@Tn3V^KDt#}z1cY8S`UT=j2?xfMzQq}k#R4h=7-%ncLHo0q{9nfb?{p{mT#97L
zVO3Z9|Ji%@_O@*$arpP|`ct4i&t@r^Rzym+6JK<W<G5}1*iKIDbiHr4j~-g0Y&N=8
zlpMR=>}P*x1{VS(c#-U+X==BzC30pk7z}^`Fqom!VQDQf!x5*Hh}A%$qi1QNFttLe
zL{RsZDg{oQqN-EQMAENXN(#|*85W0T_X>|jG?t8vrr{I^MbiK=990WXh4Lep>Cln$
z47l(M!b};o`;Omqncpl31E#|d3~<|JLz`fv0ocYP4AvINe~Sj=B&M^7+tEFL-F^G|
zzq%&}|M~i@m7jEzE6&n=)7f-4o0e!;{CR%pGJ64dCpLX{<H{+kvr^$As`Eo-kuCB6
zo=oETXfd2e7(sqjNAQyPPySXb?f<>Kz2!ane}9elzqTy*zu-aRN<H{TG@eDlS9}v}
zFP4FcECK4}jrp~|EylU8L|}K*pWJ|9o6X$t_nzi7-n*U*!}$FjcI|Hy!g+D?6U5=7
z5=u)$`E^zarEQ?#Wu9qtGr@3omC!d$^uc1d9R8MuPs1X=u-A>l;UMV$GJ&zV5}oZF
zI`Ao+MZHQio)yu+%m87Z90Ds5d#(}VxA{eAzfw%Esi7tz0d+j{CL=|^MfK)7Jc`EZ
zV{j`!M)5cpCtsrRAo&7n9nC|XFwI0y)l@{uOr8Q0-G12XlEKipo=!Vu7~{q05^rBx
zL4*1AQ;~E5^hR6^D1a50yEw!eE^6Xw0E5(MiusBVZfD0Ve|JqTgGSSx9nQ=iel%*b
zhhb7(0tx5c$(YBP+SQ97Ig^p8)@U8ZLkOhO!*pP+`aRdV^b2mI^5hy|b5O<?N*~l;
zj7E1|#kB+GtaL{A!i`O^ct3JqE4}>Wa;lg3%b)w&=a%-lt$p6?U;?f*^Hu@w?DRu)
zI{?Y3Gle}U5Xu4aK=mABoj{c{bxm>G5gBZ_ZLjEaXlsSVX>b_!DR2}{hR<7UgcH&S
z%7V$8ll3PdpF`rsbUK0A6gY+XCz?~j5j-kRno$~hqA9Yn6!|_nqNPYgNTwSBBr)qs
z5J4l|pfY2~Y%vy#bHsYWZfgob-88sOHcWYz%6WG_*^>o0!t~`q!)5!YWSYmNaT}61
zNgl@*&`R8A@toT%%VKLIX#>(Tpr-X-L*?YPA+Rz=!TXRMygU{j4`3@m+kyoW_VM}<
zEiP5RhXDdpH}R2dH}5qJfw}KsM9g`IRtrWj%Xxi$H=WE?uo19q29x1%@&Vy6l@r%s
zwAr!}+mYR35M0KF!~?BQz&mw`?Yw&cR>$Gn*USlOd|==NV5m0<$U83l>m}0T%o7^`
zzANb?YT#bhl5R)77pa6eSOc`%79e5n3W(ebC8A)6)VHJfWYN1m!?-8m?EE?ymvObG
z>6D&@D@_JaFWsE+L)-t$@+0!}tjLDIzGvyLXU8yl-bWl6I@LHL<+y@*%>>lBNEK?Q
zXc8otSvaKl3Egluo6Izd?1@n?_C(c-E1<Y5CjF&Ay@kh~99e#y%^iFy3J*3eYfn>1
zVB|#dH9$gVaOv-5Es41Q(KdyrcA1P7w`1;|YN05z9-;8ka{DOT#jTi!G+YSa#hmT!
zv2RmTs1|)dZG-Q#ZxOTr65bM3uoQ>8<wayIkGvwXkV_T?BO(Y~t4wZD3UbL3u3_P8
z7Liv%0=cZbqAU33HIp)O<&{91W#|rmd|`@*VAI81&kVv2M9e9X&jU}tWoootG3^6I
zTr`ib1eG`zZ(=O*Ad_w6@WF^Ku_KExaY6JtTeN%Io~4JeDEQhL_xhaJjq|oQeTNMf
z-SK4HjmV~oE{7q;^6EIbd%gg_3~ZM6;WESzlPF)=UFBbARKHK0Yr$T!T?p3g>|bX^
zVrpaLynYEp_3}2$%G+Fq+Lpt#%>`|pU#3{w4qF2HnZxvb1ARY%9E7vzg)tc8nl=B!
zbH`z7ccDOYY(XO1*4UA`sVS<HcuF45){vh@48|{u*Uu6}uVnv!apn15>z)x{O5(p^
z1enzR|K|4A=HveV=lD?@>;X(7ccaN7<~Q!(3E1@*#S5+L34=NK>MESc)WGlgVlrNY
zYwCjDcazE5$6Y<-nN%N)O2Si>^9L|{a%)`J(A*`WHNbWKnily$g8Q_Pe+(-5hxPyK
zPZ<9R`oG^w_y4y482|C}^#AnU{o!O5=EWRD<GCM=I%a3~&CaZ%_5q~pb+AwGR`341
zkDdI>>;GMfEw!={P}cw3{&qV4+xGV3`Tysg|Iq}1dG|^js_<P^_#VvqIM)a4Lm18a
z=G@QE#jkq3M&7I+QN-y;CK4C(dNVJQ(aK|tK_1iI7_p?7NlV7X@8k?aFbF;Nf9n6b
zd%&lf|J$D5^3(l)(|_#$UuFEqXMbV;AKi2D^tHH~fl1H~e>gh)=d082vk@X~Fe&dJ
z+C4Zr#Z8&+`O(Rn*KZHr{&)BM?Ewd`kT~s7^zv+`w12(rlttvj@%r8Lc%p^g%L)3%
zGcI(8UUv))X!R6lsiaHs-H0w$=@<UhPT7x9_+gKV=G(>cum*5=EW(4!m|p|KFOIl^
z-xvkAhvWcXA>N=WR=uSdz^pe6;+Ub_w5WS@euE=scQNe)lBI^oTLpLcFDb5Gwn2_3
z7#0YnoJ4}a>~=im6gJB7<U=?Ut&&21H!VYcH=X7oA5y3~{KQjI2>EL%v|L^^?adD-
z4riC|`&oc_Q3Sl0I-EUq(T3@%)x9Rqt)5)e&;r$kJ$s}&swTg@AU27$ri!DhQD8fW
zNMMx5g{(V<-y*a+6-vJwqBl5NaGVb?9L+-3pU~+#9-a3mAI7MYu?ycBSau^ClrMQ+
zwjO3d;Tas^8}Fy&DgYzl*_)Rvz@4EPzZz2_bVu+i<9-YreA<Y{gNal7X8&wKSAOM{
zl*?Hdycc})CToBc-oNAh2Wsc?3THxth)d6+FjpamDw9gs=L|Y3Y7lScdlZ1kIR#X?
zFvCX)b!zk=H)WpI`Lsj<DtefTC8jn^JU(#3BF6~$IX*sbT(jes_y?ZUpRRV-=AL6$
zax|$TsM4wYLkwPpuRH9FBF0w=*Lj~&GH4cD-?7HJ40|sMqFuTOa3LdK09rt$znV9N
zg87TG!d!&9NR^%zg`8`V`8xd4#}yh>hT;4_V(tPJFGNB|bEuGM2N?{WWDLp&mI1Yu
z8c%iUc~zvxM@1YG1B_4?cqYSiGQ~+-Yr5P#ltW~hnQ%>Xz`Zo3{3$DEJSrXS5EB&!
zm=lmZF(+zS5n(=|T#&X&K-8Qx9n`rJI*3MU_d<=n>3GPzqWq9F4%1~=bEhU6keyFw
zn7;$4I9e1Vug0FH%uapUVxo0~$N{+c@@G{_vQ9Znc5$p$)ul&H5}NNv%7&QC#sC~a
z!6Dwd770qEr)k8aX}|&`V1odM4qVE3O|hU+!P#bu`E^6TUYfAc5psKqXiuJK4DN|H
zb>=lS(}E@%^18$BLGG{lnw?``m1uV1G;3%!Kt70OD>^G__NQ(%YtMN%P4ygAhiW}%
zmF^-&MA44-=`IGPRCh`D5mX1gkK99t(=*LDyn;eDc(2{ntx%o-Hgo{OELObd_U!&X
z*fQu@9}HjNeyG%ljP7XZhXnyHoMR^)vSj?r;tEYu3`~)S_0Fl)^0c%7XN8iEtCs}L
zv_hQ}*1Hu-dbd(ZqE2S$O3l5PEn>bRqP?WEa5y-`{eu_LIGEkNp6;ZTXyjF}zx<5#
zGU5R|)tB{sF(c;hbx32PF&xgFT7@BNM8Q2V#PiCObGY63blh${IJb@QBp5~kEMNXy
zQr7tXlI1OZ^l@FOjBfI?jIPGHu9mfb_u1c>$9C9;C({xS9F78Gvd6aw)D027IH93>
zS9RL}M{sia)|1VCT>Ik8{+9)I|FA!t+wFdbzfzNp+&n3?paf0@3raxV+k!Iae4@31
zQO4K5Hjs_@Cs`X5D7jbyV1?{U0EfR?UdgL169nXgDEXg#nPAl6Yg#5qb$Fm<LYWaN
zdgqenu7Oq2+)d-GP||Vrl9o5&4Xk%7l=N<;l5`V36<;f}RlC10?m3wT1qF77iF=2I
zLGK!D6qv_*Ap?CIE#~}w;Vk@h5zRvMi;3~tA02&9@XT9;xo0pC#P4fs2}zz;K@y{m
zwx+ZkhY=hQc8z#k-J>55j^7>dGv3$lPS2g3Y4rjG(yXPBHEWIroa|iOzyPiM5aeF0
z=!0KnR?HDIVMR03Kx!c4xjbI7*nlbPZ_P|%p(4$~lAX>wFyO3kM&PqU0QRPcJ9`{l
zfwEo6E55@h6uAnZSRU)KWvWTW{yR_ykQ_jql2i|GkN2D|C-eDa^cIic4kxp5ICFY#
zuVzLR6_q?u`|Sbh(}FZJyp6<dS2@#VuTC*}sBj==wcQlBS(H#5x$kk1-?|5tY|K(C
z(?;1g|9(uhHzrEKpdJj*@$?A(`Hd)!xC+|Hi1bz=`$nLa{CObeHo!~~wki}J=mzHb
z#bfi^n_qQz>dyiE>GPc%;A2#%1=?Nn>Ey$1_9@=-m5=p<dNhCd74>LbbG$7c%V5lR
z{<_0xByU-&<IGWT+YN80lQEc8Ui0a*Po`G#Kv1KdFyba5bQ#)kg%IiVdP%TO-h0Hc
z9mVUECwKg(@4Uxby7;FKf;B9~rsXc6?%%LM7tTuiM!ylnGF3f>zx^w976AcTE#7k5
z<0U5L!)tpr-PZ{;RQNV^v}S15WNZDN3bShKeMTq!DE_LX8afTFlfb{Lusfd&QP1eQ
zyye!u@@i4!m6@)JuQgPJ17yTaDAy8UFwnp&9zb9i-h{*5Wvj*5#)gf6h!Uc0YH?St
zr_z5Wo!#Eaczs#+d5+Kd8m7i9cMz+Qc$v_smHVxwTur*Y>F_q+-vq~2^k<8D#K{^<
zd{jm%%s=h7jiOr|Em7~D#=PON^muJ4ua!TK(}^}x-qY~}DSaBZ6O%G`Ju#nsk?toc
zw*Oe?le7%IsxqwZgkq`hO5P(ToiEpS1lp{`eMCT1+Ccka4;db$SzqS1>!0&%z-)Wl
z@xZce57MT60(Coy=kJ7WeYv<|e%o7WIs_a*$)6DEHcS_0#1QNlIXj`kP$CF9E761d
z%uNsIa0sHJP$>8p<Dz)3{NFOt<0BgJ1z;9LY!4kYu<g`|pHc(chEHbk#+wXvTaM6I
zVAKTAO~XFk*uW5K)Pm8NE;2;(P%Z9UXx2Hq4yGW^I044n4$(jx_xoZ^&`{SzxF0O8
z6l1>4P7KUK48(_9L^I4gG!$F#3891sB6Svq@-hc-xEM9w_d~kQaRc3VKAA?n#`Jpn
z_rd;l`^kLYYi>Pn`#g0jXybNkYbkA%C67V1A4(_F%7IU4n9<pw#uXa;yFBX<O2M=8
zZiu63gjCBN0LqR;h2xO(4F`!&#X#!R?C;-9;z)b{(ZZNd|9@e8HKo9s55rdpTg!6w
znWR;btqt$<N-MV!ZHp+G;6`@NK@bgxU~qQHAxPX}6~W{CmY#Ld=l#m;R50UGIvQVP
z_f49JkJD8|fdekZTU3LtOYlY0xlM=}pEVT5-!R?LH~ik&!P~<hx>4)dR*n7d|1y5f
ztS>^?{CISD{`&2!|L>@Kc6Ov?L(*!Q<apLvd;_x)xcW&TKG}HIc%J=1i5O>wt;#Yg
zt?BB*{FsE^FR!u~s>~YWllGTdIa{@!xzAG7YBqT9-H;!|2!p?kTEWA1!*6eGZ4-ds
zXf|7|mWGmoPY5=AO9^S(I=yF&_H+5!EGLa+kWI*YHoz1yy)r4no>8k@mQ7);HvT<u
zn=iSIbVKe7(g@MVLb5aFqbUu0$7wrbIM=L3Gcg&w)6R6uq2851aZ43AsQ~tB{05I{
zohKYT&qT`Sy<d%a1k=uy-;}|s4c=lfV@4~1)aJ%Q$SUIN=g7|pW2$x#xt7SNi=^?Z
zCeP91E#ktdKUrK3L(JLNDaeYG!t(nSVC4#N85F~eFGFl+_-YND{Omg^Sd|5c??5db
z&C(VWGF8MyKZylcB^2N)oILe&k_Val)=AQ#r8u$$C)C7l4rY6@5mZ3ObfCpzyuKap
zSqa`nPI-PAUcaNK;62Ah?{qeS-Z0_`5Y>=BA^(O{ghp+IMh?8DjRypKX@e_b)-;#*
za6CvjEo`yzWmme_L}6TeklaR(KMxWAWi;t`MJj_8!@!ipf7<l6Q}G}DCVYL2|ME5F
zfATExUm6$z<{yJu7=DFOU(yj>ULL)8_kH*51cy+t)L*_kgaPsR_2ECyv`8&6G(#MA
zd3at5q-BfQ0zLDLcT9W0zI0p??8)cCA`|#8LtJ=}9XT2Aol&texK>BHu7i0-r_((^
z!Nhf442VA4wLeC8i4$lqC*cqbf=NFZ9v{zDB#!c;h2wa-47zr@Yk&MuMXOrFQXx}m
z>*GwxnD`{Z+hi+N+9*QQ+4}gQN>1wBw^Z8tI9)WD3#-+&Z>g;Hac+r8p?aI2=T_6E
z&Xs9sx0=&zYc#ZW$mheNV3kS8n-y8%SxLMsVxwP?kC#P^SekK-oJqpo`>q_(rhOXK
zqVbhu9X@!v)~YB}N*PL4$V*{VE2F=BA$@x(MOaG<Y-w>VE39QjwX~q@VzOcML$n2=
zaWI}&7gRn{L1lELsshW!saj5dm4d9o%k0!>CcZQe%)J-Wgb+I~(vdH<8ce|&31|c&
z>xlHqkL#~5eEl~WVbH*6PHDHo*=h^Gvi0BQW-Gn^^V?gG>%YhNpI>GEUloT_TeRVR
z_=(4(X`cA}@2U8H>UYv<=*^R8$}$e<>xg;ktpfrZbqCRH*iT$RE+Tl==l=eV4qMZl
zi6kX_)xkqQ&)emH(oXXkc%Q@j9NsU4_Z5Ag)AzkncuwEv^!*flqn5$^*7LXrljpho
zyDz@`{JSsS`{KPX-UIpMAT&8wJNnnVgJZ`LGR1??gtj1=P^vZXARxXD`%cYejuN_w
zc3X&-IoUHg&^!w#)8lX;ZjtRlL!8oK2<OaOhKzO_jp+CZ9ltUiez8sG$k#dYy(K9b
zO!+!fei>6z+`g+zU5LB9m4u=c;%R9X2hOR-I;>esYma$&R;WVaTT+x+!zC#BR!ZoJ
zBkpnhE-zID7$y=%89ZD-ze*SiVVa+8fX%oVjTs+oc9!oB@|$Ppr;_lk!sZu#J@(B$
z3=1ns+c!n!Ylq?HczoJf6S-`Yfwwygdy5&mpwjp&64%n`j45gphP1JI&pGXMwrWpW
zP*wQUR-b(ObjqHvmI^pxD{Aj_121S%0fN_Ejn_gp_W;$-D*Ig!($)egbI+J%T75QZ
zB|~!?zP4$!w8|b@wUxN+waD>h(`8#O+jiMAao`01%c$7hle5DvP+E<i%g|{^o@2=f
z*^Ke^2I~j&;HRHqi2pqo{o}4)(HU@rw;HB<=2>$K{(C$5aDvMzT*V+^KK%f9`wCy(
z79e&?7`J=W0w6NMiHpnxee|vUsr9pP;Qgujvmgv4x8*~R9{N<sGx^Y`hb=1h`EC_<
zIek}<bOn_nUOB4<Usz$2FHw1uFHH~?`O;$sB#t&xvg!dZLI*CZz?!&_PQB+QL{DM5
z127!&v2X*AS7Uz3yorWqYiBK|(>mVX9w_<-^E%F#yVxyT|A}x6-K$y9kHYavM}Q^k
zKYx3BGrj)XY(M6Idp!U9s#blOGrjbY-YcjId1)P!D9;Em&dbH6KEE=K`7#|=o%qRX
zC+7aW+jN0|{!Cs_Jd~uq7nJtWP~!Z6kF_1vyGrd(cJY2jRZ?_{U?WAlmP?T$uKrgZ
z(->SWrpr@unT(J0hMXyRpr$NstnsU*s#}dR7r?8G=Z#aa?Qd-gynnpA8j++C?m<L}
zACpzDPK4yl^IhMsItdYTM?BMq?3?0FewUN>BEN*lto!6e=(bIB+6>B})~dqHaDsWe
z5*lQYl+ORKGQ6Qtsmq<hS1<sI=YOx+f_JI;|1tmDSGxcEoc$0i-iPdmFuDXgVDB2g
zHLmH~KdxZ)IYd|ZuYc<|C+#K=M}zY^j9^e{dZ{h;lSD*J-W$cOiZfUq23PX-5-jh#
zBB~zt;y7ghr0$)H>~}I0Oy;|!&Ft|DD#d%yZzH;FEFD)bCyQ|(<Eim*%Z&y`GQ=y`
zV|z@2&8EA_<Bs8l$7E=F%de|=@@;m72Zc6hf#x*9;vqn>ZeO`Hc<ogxjQ}5@w2GDE
zkjgojyJ55VdL}YAaw)5j2oZ)vi1z$VjEN8>8(91%su8FoKYG@tj0UG;i6|Zwuqufq
zM=<Gt3}xa;g}D4^is@>`v*u8*$DPnmBQdDe_U7^;)6woBv*YN!8UK^Q8)agDW+kbK
zaV)MN!v}u#xCUZ2Z!VD#_VzcMc#BZtZpt>;fKOB!Ps@fkmsPkTmLi03X+osvif5pf
zPsq6Flo>&_L6hISsWNDK>2Wf1J72p|UwO#01Yju~?+V6)IJ_F6<(eIPmMo-j)JaO=
zLk?}}u2_(~)b4(kYN^tes^3yW>SohAK=7a?zl8;tWikU}e&uqihrbF3#H3~fDq!zd
zsFEpLsrc20;#DP$GjsF4vZm2gN21YGm#SqTH54m5aw2vz;7Q<3F0I0`?w)O^O%3@W
z2&sae`$tvrV`jOYUZaY!TU|8dT9RBcG;}4ss!v<b+y_C7_UiB_iFLdb{^4X;i0@nQ
zdA91R9F$Wi<Dq4tAhr;yGEsp<85eB}31eG?D%GjX(z5i*_}eU_r?zb>GPt!Y!4!wN
z%BWj4A}m&s<{02yD8;R8ASAA#1&v@}SQ>$q2t6NPcQ93h#iN(!J<k8+J<@xmfG;`!
zYx`U2{O{ZT*7oE6-`9BlxBZv(|GY=S_L3+a!u{vJW*9JSmJZ7?>WVN$(HPrEf@MOD
zu_w1Rw!^_4`1t$B1ZOhEqshsPW#f~9iN+uPE)%fx0+|gxg_uP#9GeP4lHl-<j!Ir(
zSldGi89tm`LGwd1y%~Z$Uz5(ffYtZT4kxi%JX&4C71I5#<tJG%lgT+Fw3dxEa507t
z80)Jb**C=)rvrq9q8I^DJ_zL*HLI7^as-y7jh=R<{VwL1wB5dJfRIEh#1cjrz8GUG
zH%5pJvE5$=`l2^*mWjaOE)}n18AhVdz{kf_&St(jNnu!6;COy3$1yh`e=_;LluDzy
z8Xex`)wyX5QgjYxv*7N&m9i6Ax=x(TgI1~Kwo1r2?OIut5+eCkDkQ_JRGk-9rAKT2
z^V<LDdLQ4z_OEdM-`@07=l|YTYxB|m|0>V_w>DrA&}1*d@g$nF{|tJQOSbn5{%Ks&
zvxfJVasv=+g2UG*Zw}rbf%)|I^ysZ~;J&D_|5?iph<tabao~#J#f=xWT`M42Row*K
z_)d96s+ifl`{&!()tvxVvazcsZqmsV0w>0}86O$LK$n{EmK2M+R4c6kQtfP9-*RSd
z4`w^fDu?gf*DHgi=MFQ`=rpvKi<`coWnj~}?5v>VY~nHty2*iRnYcnbqPKj~mg5R3
zyW*MF_;H2SPZd#)u@U$3+783CJF|3~sU!3h!YJzZhaus_Gk2We#VGP|0j3bUS(@Hh
zd5A-5XXsBU#UiCFV3Kg+o;%j@;5#9X?U*yJ6{*6cGN=+uVvcfurbDc&_=qHhms9fe
z44;~#6u;}G9sgA9;;Z&-t`y;TF%p)o7}|>C_>1GoVHy(aV}ZmTz=gpP@hb^r?cb<B
zago3md_zJ(D_P%e(C+K;!9VBjC}<JCT$zERrsVHIdBnbY73%u!&5Go?x>PMAbKSI3
zDYD}%SIW|rF-AjqLi1pal7^F~vos(-bQ5I&S1E0o#>#@OUSh?Dv%qG`{B#W{57|FY
z1FlxueH!pSC9c|lmNKgf6&=@RWnCO+pk+qHWd29mV7Qc1kCz{&s<XcAI4F&Yy^_I?
zrur2mBWZ)ZRfT$7S&fusQknCTLEBhNmG*o)bhTMEw<LR~s8Ce#44XqBzvq{hSf*t~
zC9YV>YBTRcG~z0=@PjnzN;C39HSTJ&^Mf_?sx$S&H+Z#K?Nc=PeM-K6gWtdO`!#sQ
znLS^U`XpI0oRrP-c694pU!|mY)uD3PDy=pyXKaANqRK6SBI%{<fodhiOR8hmHmFvP
zHo2s{QTdkXJx{U)l_id`{WMm)Fl~id0J^ApP1o}~MHwp$96a>iblG-6)(R7T;9%q(
zzp16J$Xuw+Y~}1(B%pF;!T-s9Sj+v|JZkN}>%)IMaR|$EZOmsTk(S$bx7|(GPw4#F
zPAE&x|K^j~Fbv+`?*OpG|F6~ZH`C{To6X1gAD`u?@M>y!+Y6@{lAuHGROD%^o~_BS
zQ{PP{dW0#yu|A=LIT#KXV{$&Ty&OiPNO-2rS${Da-O;!J^h6-7zeNK)UE<y-M;C`j
zZ_vl+Oa?DIfBo|H4to{zb2&V~6A3=qhu{v+1h0cJ%p>&r?;`mxbAbHLj?NzIF#Q>E
zg}!ORv3Nn5&%XVi@%c3j|KI%2_}g8e2p>7tcR@6CYP(5#xPN0N=6|gJ2OnV8*E2<{
zYcKzoR62vdA@Dag^6@EwJ40UB4x94hPP4h#?;m%`y^c3QRtl7zDf^Pd!C$WZZ81*T
z!|)DD<vB3kv6eAGiXp{*zYa=vnYwzc?$(JZWVZ}KCCMewoH|yYmMh)m^mBZxO73#{
z1<9Fg^3S6Kvmi29r{Xr442x(guSx4$e3mT(9&(j|PP1Fs_vW)ZkvLnWL}cvWj3W6D
z^cvB0Y~)a}Bur$<W%Qlc#_zRV>{%joSAs+q7ZV4lMLemSMT1}%htMH$gusLRSs3)L
z!#?(X)fr+I%t989gUeyq7x}#Dd?lFou7wGhbnhoP8a%DD=xRIxwE9y$7i8Abq=s?O
zAv2?Z((sWP$88NwRbKaMbq8y`E<<J}LOC#n!kCSTOwpQ}BuKoku$%n@Jdbyp%aBui
z8YFL^1_yYCzZ@~*O;%VSeq2Gik1k>lYyU0!LH9C<qh6!8_*DL1etRop|804X`JcYZ
z`yZR=|F!j(y##_}CbRd!48s5b`|%=Vmm$bE9J8xlk9GE0yRl8V<gxFu&Bi8A%`CDo
zvu^kM(|3p6ZVl7EJMvqnjn`<!x06?=uiwJ6{cSWTh0WIe{`l34!;6dV!TIZxSBKpf
z?_M3BzoNM_4e{>msQc>l-1DEwS<YtNoxbiKzdrr`<?DAZj*oHx&_m?-=)(5)yLYFD
zEE!%>ATN);J9u||4u4OHnU_Zw-Shu`bA(oqzCalT^J{*AAQ;c%#&t(~hs61l0ZXKJ
z`0nMwAw`<)c2R_>Cm{dr=4P{*qSGBry)7cf1(IDgB~J(*Bk4}~a!TDpnO}o?^hy0j
zlm22D#-u)%A?9wJhO^-vqo`$lc6rCfH@#jaNYG0}?hwgj?e#l;WlC^AzB)W2>GH>9
zc+2}YPK}$r!X1#&u1X!fW5ApcUv9!)tgiHM8jR(HC8D?VOXkmHCo(2(@=ipr9k5$^
zxBk3lVovuYL`MRkb4()|WBkL=sj)h{-Ic6pB_ZGI(Nk}$rrv60VMW%Xddnz{SA;Cp
zQ8dnZ6aPA!Yvt+uDWYhz-Zp^7;UpXkFy;YnZtVinQ+VE=;1w$P=9txBw>5fy>m)2c
zeToi7w{@Sb!<#!^UHpuH8>8S>lmN#4Eg{prc!B;A|JW~0ovXOeJtWZm5P{;M;hEKx
z5;aLk?(Fcp_+Sf<UF&l&|AqD_kN+Iyp(L*C)`POvqni5e(|2X|!jl$%B3CxpAm<%0
z>thEBgT{c;PlDUC=(o^tpwh!30B|F~iw7Xa^?i&TfkBEvz8(tNuc?>ts3pHxLTy`w
z3(H2BSa3r8!evX*Fy`SDK7zV=4eluiHE!$gXHg&HWwhWq0&6l%7T(i6eu*Q?V<uPx
z!?pWV#tdWfi)cAs+<LQ)MY+fn_X@82^o4r@*Zmr2SV<bz&YCi0>3JD?Esb7{7SlgP
zy#4VZ_6(<{(Qg$|_tVsMURx>Z0Fgtzie~i}w_lo8QDA(iqt~=+8BqHvs-Lbmf98#r
zjF9GgPrg^<oB7@s@5NxJziNqBFl+YH@7jtiPjugie_g;*zkdqWVj6Zc4Lh~~Z3&=6
zO^BmhzPAN@((nnStuoKjOlSq^tS7j+isR|+>nk-vF;S?5`>K+cr%T?d%9Ps6vru&D
zbJmR(DroVU?XdHfP)jd8v-p{*g(i}gF_W|krxF0f*(2%R_{(3VO)v=VWE>2?ye7#A
zj-=!pQu3=v33OW4vg%YyyAU_QZ~+v)uwgG(Q#D1k>M7rmmSg2fTC(s?7cG1(l<he?
zK-0K>(g^y$Od^a-%wL>^!@&W6L|pUW;b<p;P&o*Ez&lk_SkAtT@RY<k(OPVu-fJzO
zH=4G4uU2eg|2~XpY@ut&^wyDK;IL)J^Xpj{^kbSZXvPq+YsAjufSL#Dp3VkBtFrFF
z+ruBaU3owROM|Wm=cVY@*{0|7BP1;&lmaDQfPC&dwT+vJBaLxp00UQYuqq3?^9iqB
zVoYnx0*r4ZSaM~RYPRUq6{wPJ)ovG4txQ;%9fIydFq@*;YCRaQVDLOBpGZ$G7$1g5
zHJd8BZlwZNquw64T*)3v9psRlaQ}cxoFe|<%xk05-UA9fOV%$U`*oc!RM`KQds&~#
z|6{AY+05+!x7v^U|DWS$O-{Mm-Ym_%hp$ggUY~Y<ux;L&0M<QMWA_w#-W-gW{5RcT
z8kujS2^fW8FzT$WnMT3kAy@?cXw+i7cfRW1ira=SqmlRo&vw<`b=KniXE+;&Lp_Dc
z7_1`g`|HIV9%zAqC*y8!x)A5W9Q_G<AByPfV^;-n2DB$MT5u9iXw<L;qa1Tb1G2w>
zkP{)P1{x<g@}SAWKZ|fV_ozg-do`R~215{=Q6CilC<J5ZJy9|$mshvCR=0XmH|a~E
zTBLy+n6fDvw{_V*4Y~;)4IB`Fbh1#j=R>R|8Q(n{j@wtOx13p8vVvg!ZZzK20XS*`
z(eDb=mbR6r^BE}Qpo@WNyO(z`z%tq_k~?v@<p!T71IJ!s%|Z$23sU>s&Xt>bX-Xz*
zD$zX1<^*#<_8bRgR<GBbCs-}eLB%D%AZv$2A-M?`2HkMfo8CD{jfg4bR)k$0@CWGd
z=aZM=P1FmM5+#iZHgnP|v?ARi8axcxS%kqM=}>Jv8E;IcFyz46+k44I!E}miz95bl
zBU=37ks_Y`%_kEEg1QPhiSz4V{vR=$jE8r0+PQ$`4y}j)=^IQmhy&6Ko|@P5pHboT
z?E6{Nj~(W~vJX~k9HMbs+wf{njR(Rf7VC0Oz3A+2+`FDl#?+ulE~nq_?d?$+E+^IL
zbR1G|5IPq<Z=~H~;=GoWI|rHvG$m%&!A(e_xWvQB(F8OuB4-(`VYsh!F93rzr<Zgb
z)$-W%N*gR1z}xM7f~}a=0KMrGZOCdshK3;X-7E}A6%}Fegv;ikh8z3RZuOyu-jTRw
z;eg8xX<{8XEJ=ah;RpFHe;M87nJon!`XrXf;*BK3KN?HqWFllr#S~|PLRwRGK>VKY
zSu0{R6HZ!|Bgoi8wr6CaFY#5-Im?cPn#B5~KGIDPUvh<dGn-7sOZ+Hbejm=0_n1L9
zjA2kzPh5eL4BW~g534Ini^hY=j&Oc6p%`sO?Xa95a-d6!YmiQIUsWryUN8+VqhU0^
z+bJhTiLwZ>k$Dx3#jBixMvLJ*0?=L<$CKG1`eyCq7nB7qq#fMyF)V%)&Q1*69ZMy0
zfs4xc5X`2y1-Da0>Jqq!;4HjC4rqwpDI=%@7dGc)+*{0M;dssmmw2auXa=~DG8X+7
z#=@Nn^;f{fsy99frZ_6YI6aw}3Yejo0xlK;?P#7%`W9xhLQud7O&A#X5P>sb?jq-}
z>HkOGD#s_EHvXHpwY`;I|NHGn|G%%&|IcSH!#J9S>~sNRQ5>+nK7DVD`S<RJ;C(PM
z-2ZGu{;K|iwhfZx*&zEUnv8;RH0`Vv`K$@|vg7b39AAdBtCL`cXU}~wZPRCBF#hhA
zW~s}jSQ00EFZp=FcKE077GvBW-jh)Jg%BG6!?4ZP<a0cpGldh3f-yyK(!cEHW^$4a
zE1sFai53omTSu7d_-9nh;oTX#@D7g>(QiGK{|WR>?yn?|pRmP^Yka>=l9FiHce}KP
zy`C(FeORFd<14(w%od=9;q7z=#th!>k)l^l8khlwOGTaMG4*n+Uh>7BJVvB>pbM*E
zFlL>_jo423cOSE`Gz(!6Vr+xB)c{5U;118@D2vQUb56PrhPVP5KlOJt8IT#Ymlj@!
zEgTeI(<QW86^t9gKyU*|B@_CE-v;`H?XeO3k-Ky9u11n{D7iI+WVDSus+;o*`Is%7
zs8$zOu%^z2a|_Qm;F0|B)VV<j{AFHv5Fyd;l<U<;dF+iVmN$6B8#|^N@l{WbDvi42
zUnp|^bnj@NE{ZogTrDN`t%}nt)<W7Ld%5ZkZo*zt<QJ!e$eu+H{|s6OoTEnVW7ENh
z-DR}#ccqn*%m!CXrfViBh}2E>(W0GFKR@bjm87aosX4TS9^|NtJ|2U@{1Rvi^Dw$H
z#<o?;VATUCRe0^`dXR<T?tHpFeJvxDrW3rP;b8DK95}zY5o+1SDvDvDwNCsaLBCO_
zu`q`m-_V|XAi8d=Mw{37RL-<s>T_}T*2S)blj_T09xy(j=J+OwNuCF$R8O@gTSIZv
zcG3t&=MiR5J8?8DP;~yqP4x%9=ajX$_B7-U8;%@-PxZ`#pP~J=vvW2N=8KqilBm;A
zUmQ$8{KHX0at6vJw?xyI>JV7wk9hj@yH^({N8-}L1dQ4lt_q;yU>jq2Ng-V-%pT5j
z39$`F{cyH~#4?tjp5cz$V#4TLN?=PiL?T?`-}@p(E8G2S9!}$XDWy$l>|SD%yuAXG
zMhXwFIU&UUel7>EM*OM_?Tf!rl9OYbAwI_X6Q<2D>7rkcNiwqZkL<%Wu7S%~c)J*n
zVfFKwn)|$?Ip2xhnC`j#?)-V-Vut1gezHT}+wz(4w<HHW{!}b`(0Pzb>|O;6SY!la
zhvT~fhi}3sL+j#30MuM=RLZX@xLZfV=Lb*UhhsE0Ls~)uW;hGSxY(Qw7Zkeypg2j~
zw%hROvQ3-0&s??zpBvBc7d~xZWSGWt=CYO|o<kc?lh~9G{tR!`C=Ws6>y{)^C$srh
zzdOPm-%Y_ZG|Swyr!Cb(*eDoknvg6PxnW>6BDgG{`rInY1Lqcmgw?T<(@=B`B~Iud
zH2N(d5ktqi9EKwa#-YTv#MJ~F07V3yh77YC?F(vRY6djL<FaIUl$6o$V@O<#g5EXy
zm%viC7vWmAN?Cl`uoc5n6{RaLmv{6R7g9RqML{k2sF_z0Bj%Inm!emu!VsA~SIdu>
zw1^EOrmKL2VoPu+978_K{%<!1^$M=Sm3M(l{J%Dvek+~-Z_|I=|NScWe_QN_VDUa=
zKZMa`IAnX*_^okG-~LjGKT><Ihm#rJ?6qX6Kj%A-T10$|-^Yg^qknd>Mq&^W;g*s-
z*k{~v-f@u5{=Tpj@YXnzQx9tF?=qSW+o3Y+hcdi}u`_$d%LJnXDZ89K9;(0DF+emM
zBw<Iy)G@ii!rP+QT?1Ui(s44pwg4O-n&9G=fC_lBE3F!hrNk7VT-`hbi567lyv}C9
zhi+mp>?at)6=m0wiX3UF1k=nUf!BuiMbmJnne&blCHGV_EJA}uVLu_@s83qb0>Nk+
z7D`G#s?K)Ws!dnJwH664%mfcBX>kBk?bVpFfj{c<*Q?9CmKmK*z5hZLJZ1Yy-4zO(
z9hE6$WcqG2TA-{Pyw%|~$^mQZ^oqbZ9XkXMuW4bh3V5pwuwA9f5>!&lKqceC64J~;
zva_55E|zQtI7hx3Ty-huBDJc4x2ucAkUOnx9)n^%#{W|5KeVi)aWI~L!u;>;=5{9k
zyZ0FX^{dSP_-u*Pk2dVTyyH*4_s+XcrKFBppDswAUKc@iF>XX}f)R|UmV?P~IQf9y
zKUkG`hi|;U)P{*`cvOG7Qf^3L>L$4%Q%T%BVcpX0Z5NZ6Yu}N~$V+eIj{NkKLbfso
zNaDe*EdZ<Z{INTu8?}YmcOK2}bbcFpJ3wtXHOH&j$vGphKJsgg@CM^4q>-WG#uHY@
znj4SA5eu=fzwgz=;X@+*s5r<h;%#_Zvc+G-5eq+G`K2<pIB0Qx+24;u8)2h+qPZ{*
zX;f>(AA+ePzW$OUlXMkbwviTur2vdgsuh4=?^{3xM0hd>sl?L>#dfJcW4suJw{zV2
z9siOc$SJBI5T%voQl+>YbtToRtb0|-*M*<a3PdrKO6DdK3}*8?k-Ra7Y?9OxvrH6q
zs?@SggIg+91FgyB4Uuf-2A%M7*&8gFe6!zWKK$P)$mURh+JgU!k~$Qiw&DL=)J-d@
z?Wh8(-Nb)7DQjSCcz`)qEn%^rwrwnI*;u&fJ24D5Nl^rPc68o-@$S6)?)2T+(aY|S
z2X9{;yf{8`=;Zh>9Wl7X{udD)K571k?al49{lC@Te!TzxRoefL`QMFWpepvdjH2}*
zS=g5`e^*P|ZdmLENzwWCw7X{9*dnu{7=sECoW@>>VNTWrHzty3WL8r`4_8L!iD9m(
zS8`R6JOK`Tw71jo&I}+5H@c5rDZ<eSE7b854_GaaG-Ib;Ssv+*&;HJ!+#S}G5<K-w
zkvu@!@DPbRONwj+%HD$R^fX>f!<iD6$eLoTW-`IazEP(bhC8ar$AF@gV}^<lbc&`>
zBf}v%<!XXF^=hU!ZwXbu8dX0<^&^)9+URyplQzx)*iD@(BnD~@f@_?eNR9PtDNQ<6
z*%0zcCAzgF4}fTAW2;tJS-``tRlt^CCNp2jET6zi_3~G&m!H&2aHY3_rH`L_pQq9F
zr{xk!K^Kc51#GRmZ>_suTe?xI*4?+(-CwHi{xWs<3+j%6y78FPt5zz4L$RK{28n-s
z6N!qosb8yCCQk$?_lb_2gv?pOnvl00Y(@q9{d3Ix=2W=vV>EtBO}Vd4i$WxC4_=)e
zfmqJYj^3U-NPXQL($#V=7hz*bgd`+*3Q@hNO5kZBfsd(;iNs2|PXb9rW{EwBe6+U9
zp09wmU9|V$XVCH!R%T%_`{WWE2sL+^?S@))Hli<ri(dcpvypA~i@}$`Me4x3-Emr=
zz%|v|*77OHG4Zoi=L^_(s<!__Td=(_UTq(!B>qFI<)`95`tV=tasTIQ-2Zw0xb?%g
zeyo8NmN4LCm;C(OLUOZ$d>hO599(i~OhIJ=_v2PXUW5kWGn%>6lmqSY5<7@i34xQx
z-yL>FDHjq}3(&GjO5uYY^QCAqOTis>91i9tKm}A$L+n`jxR&CSzv<t_-yOfs>u-3Y
zYHxW-FQTi2{uj9?XxuQn!7}uVp&CY-`i)-zi*yY=Vl$#`JX0t(=4V8|0G?k{P3r&~
z_D7y`<AGTLpM2+ruedEO&SnTXBhVRw&TiCguO)U#)0)rXU%Sv$z(OO4Uz*#WnRP0`
zx-13Tm`}(H_(Y^O;bHKI?0y>dzGSWJ`^%L>C{lDNe>=Lpv0B(zR_aK}*jq;E^p3Kf
zXuDzsY(=!wMM&XmN9NRnNYU|XY@Z+J=ZS|3$!$`7yZk=Hs=A{f*QPs4F+BD)0dHfA
zIV42d8>%^x`wJN8Ot6{Ak$fD`4+#O7y)6<-&d6H_g(@Q_z)ENmMeOPtxmk+bFO|4U
zDO9@3H(teBa*opDgT@_Ds>Ttpw0?t0*b3MZFco>rp_pwEgX|J5fdlUPEM0_7SOQd)
z0WW(6(pj##qZe(fdC69qVV<!0Xi5$|&<Uq#0TB38A19G8&e@KHlIf;+7R#myE|rme
z!D=Ps0scA+`jWPRT+fzWbBi&Mi)8JuEE#sm6XU=i6^+WTbDP3Xa#{-rK4yZ5`It$s
zl><}(K(@2x0q2cOP98#yxy;s=*+cN1`zqU>yGwIz9XS-++yN}+rV9m!oeo1=8QD3L
zMqb<zPfv!%#&2Ej&{v@tdv(M&DaKfV6xrMc6ypV-6Jj*ew86Z98IBh#&>TE2F3B}6
zhv}+^nwL}VuhK4}mpOwGx6v?9Ce$9xnsmSBG;-djXZ746kC(DhQp(1Il#*LWRoeRD
z3U2H19sA40)$?cDVyj+cq`Byes1j<?0*{o*kM4NdYQ^X4d9?!Z6G(y*>1co^jAB>~
zPFfCud;DMxhE0!t`*tDVd%(q#(!)g_JwFGY*w9%c&U}M2>_vx@**Ki-`L3VmAFbFb
zK&)q2z~Mu%hvjAgSjN*cy<hS$2P=x)l1`p>^wyxryECUq;iuVx|Df?sFkW||r>C7v
z{;A7mS#8_!Uunr(xm5m;Go0;wcK(2~oM)x2o4PHi$@{Z2`#fKCpQ9l^W=?LJju?UW
z(7E^vIZ*k!{C_9oc`%Hw#`kgnF1!ENYI&LbUz=Nx=RaR#{1@-9dj2zDJdavm&oS`k
z;4O?}7`Y<vrC;>2RrIo*%G~zy_33%{;P}<|r)z%;hXwp>7E#?QdbwTn@>woF-Lvz9
z^H(^w|F_E8R({`+Z1l1#k0QW`9LD~L!?K*?n37Yos>fX=WQzTPE$3bLKHmun4r|m#
zhh@7SAD~xU>WxW29m?p3J%#5twx!37E6b~k4_KEs(ajH&MI2sFhW&8%uNmfC#|U!H
zjojuIEDDrpjJIOZJDlIiM3_Lm2GW5AtIq%6n;RYWMm`CWOMUXO@OT!*nz!b@QGSHY
znsj$Zqlo$5^Bv4EeSm5of{jn3+f{T!>CVQCySnhPE!`hob{X~p3@{G_*muL=DrO%-
z;ByqkB85s{oi0H=(VflMeGhn{-EsIq9W$w>OShNCBvImf`yyUwnqog3Lj6;K=6X-{
zb?n$HkGq?6xmA)9tF*q@=1IS359meluv$2hZ`q3L<(Aj0Qeffiux%kL7qKlL|8;T-
zH!t}SO2@=0W~*I1Nn{JW=sTRcogZ9ACht|YuSe7VA?$xxbz;uPPkH~(^O}Ap|Ig;5
z|JT<V|Kl-&hZY1<=3U0!4sHv-_-_~uJ56OQ)6<iq({no{4z_V<f%cLF$fo7U2l;17
zkmzTFT`_^uEwJu27*M02A9m%WJK}j?vm2)#-#F!wqeYGZzHQpO23@G*5u<}2>$PMm
z&m$ih=YraX(z{pV`J4I7nb!VYRd0V^W&(lMiFo^6E(U{ehKliFHW^<H?-&sVUG4L>
z`vA>?CJ-8_mq8!EBGBWFjgGkfMy(9C+|?`^_hsgVbmE1GCthIFpMLpyzo~LV<W+8f
zX-20hPk%1wQt_(Vt|I2IQMyYA6->{SBuc%5i_~t<a)amVlBck$r*2dzQ<y2~TP1C6
zXG9xDP*87(xp7U3v?#~?VDRK7fWiNA7&?5I0+!t7pH<X6jXCfkqA20Qn+IDbf|nEc
z6)=$+4YO0jd^CiGS)<XA3DZvQ&gcV4TTTG~JmNFJ(aY}ZH%D&|&R@Uno*w<znivoI
zi_z!~KP3z0XE4{@guPu#b3$bR<wN{?K$NF6Me=A)<)zrlv6f`ZXTUVYBJ)zLPkGoW
znC)M`e9d!D!?b=Gvc<Gd4K=}#pC3Y)^rzDa*eKF7fDr`@Whl}TaPOCM%ROcG@7nxG
z$)uESp4VY6$Hs&%&dv>e14^f1*pJUAv`od=NhSISl&l)byBQW#aAVJuP!G)0$p{A?
z*-5#e6xVZ;XHmpl%jLyEY0-w4p1X5)YIBv-0PrNnlT7Pn{B-)WUd&HFYXueAGAVwI
zsIjcxaZtuG-58csb42?3bCux70O7^goVm*qz)Y1Nyfjfnl2BUPbr1`sM30A)4{R9T
zfUVmXF^ESznXMRmc%lRSK+7pHzo>W%!m@BQHI@MxV?EzlN{7^y=jC+|hk6lOH+yRC
z<f*bqT+tqM9P5E&8KS>DqTH@o5%D7GINE8X9;!+NsZJW#Wgo65?EM(GQUA_UoooGr
z?|2)A{OSTu52mGp%|Y<{l@ZDWna5IpS%hdwgl7Y-iIlkHGzub~fGG2aZhYUs2~32N
zwvOX+>O^V>CDaZ|s3m$NhY4MU@Q%=sW(=s@avGwlT*o?z5nMi`P``yj*yvQ20v2$f
z3g8b*3pgk*K+I*-WT^)Ss(o_=M;pXO9Q9KsbN4)y5OFsAv{EcDp|TiZu1*V*KRS?F
zD5kZ?5YUhNziR)Np8LY&{#5yY+WvMklmDl={kZ@8b>9E|%kKZ`-VZ+5F>l_yetW)Z
zR5+t&{xw3wweq6Cm0fVu36bk?X~Z~r{UGS~Be7DmMU3lxi0HoCIztC+;9{L6PxHL`
zQ&{G`XlO<}*jL^XZtT^FYJOB`6$+@PC~rVDU4Ajh!*69}_`+fywUVH|t%OflL)$&e
zM=h6`74i${Y{)=LWpp<9-3Ci`O%fk@h&$)pU)=Rvug332XcrslVM=5P1vP|mQhNa1
zk@O@j1w%wR2u2vrr;353o3#c`Cv!*fB%g*?l+mbJ$d~|DWfQGEawuP&8klvt?C@KZ
zOYsEc3OGs;#8i$H=Ro)j(9JRt`9OrXo`u0M`!u*rs~qPjjJiO@p=&haLoEjm33EXC
zDGw40<B;+b*FLD;C>l>@)bJ@c6Lb8%-<<-Jmg)aZikE+%z=nF!cc=yOE&|wa@-HB4
z+7SwaHCeeDI57RY@Gt+OfEBL?)d55=LHzi*LiwEbKY78;y29gUg;hX_|3}-~%EbTK
z-t->r|F6>jV`KS6K-P=A$^J8%gDrm=g>g7$d%xhfe~g0pb=(-@VNE0GH5TvxLOEcD
z(FkPF$#_ZAZ+<Y=StVu6<zzAhmdE|>G`gA0oub9Td@|JSS@xtK4F+Jas|CS+lTVaH
z6k4lyP1g(oNDq$~FGgSl?(HKzk+g)v{_p=1k$sNA*A>ZLok*rmpmI-(d}Aa4XJf32
zlnWDeJa$7n$owR18bmW5KEmZeBv^;Z5Ery05SQd}&KChI_JJVoCyuo|gEAH}2f0Mc
zA;OPNQw21J>M4FOBurvV!54SD5!KKNIi%2^PZA0>WC)<u+U63>ocEHb_>+L672o6X
z)ZX)gF?$Cu7;~Z|&$PmHZgp({4tXO3{yN$mq7~+|MYy|iA>t2oo6+P3%=E=<hUdFf
zrtGVsIEmNkk2joX0}I*kDwSh4Xk)sFue+B4tRd_wpwoA=^gc?rgIS1q_QI5Yyx&h1
z$3swWc)RR<2X~@3Hs04VRWfmYdjE4{5Y6Iwrg~YpIP6Wv{p4I!w9FuYK0(Ip(>gn(
zv#a?v3_bG?6I77JXv}y&h#9)32h-_navMRf3Ws+r{B;ow*<uQ9Fom&lF%NiE)>s`>
zobuWa?_564jl%i$M4V%jqXiZM4flQnM#AajLpX!scf?EKba5I10_;6*)w`_GXt>CW
zC-xRxEae&y!5ZO3zz7R`UESeOjw^>*)I$bfeIJ0%(CSS%vcmwa0C3#xg+t+r5ydf#
zjaY-HmZ30lR2NrCpzLUH*Nx&XmrNIh(ba?@_F@vmCsYTQS)cnk+cdg^eC~}B1%0|0
zozDUo|K&!;&dys{^Dz9S?bwKe1QID9{Ll+R-&(+Te!)B06!?xn`ByV?500=JEXA!-
zjNq7sm$$L^OlM&)ia}<3$$Px@UHDS25J$mn#987OBl@Q>r-PCZTgD^py1GUylZTUc
zI&`Iz%=7Jt2CU#<_6N0a#<({j>uq_f2SF0MF$cJ!G-E?JEH#4ilI7L)%DaEuE`ON4
zzvc419rC3B84+j1MdR8f4<@Ks1PwQbPAGuRde@6tk`-=+UBlI`TP@o;mc7EHUtYQ!
zjqvC$b$KTsx>;H6>CG8NO^5JvHYb{mXgruuE~`ba7shc;2s2`#j^rAo9AXSd#OyNz
zkZePzmJah)g>}yfa@h8&u^kX6o`q%4#<Ew;GEKgxlX<v<rn%NYpxC$vgc?1DMly&n
zm>y!L)sggge!Yk}y#S2)>yQ>UlNlKCBNk6^hF=WlFvkUV)Om3(qlMBC51q^Bh!hWQ
zwr&+$fHukC4~BU;32tA_(Ps&#WJm9QIueYW-H{IYCY-&Ue89jmc$=yrE_mg(tY6Zy
z30!%jmY^0&Cy5;ohOl2#KBg8?c<EXd*Xwk0IKjJpSD5m<0RAdn4&ZuPl<BgzJDnUw
zAd1)tTyB-+h(p3v>rxq6xmr;+9RnSI8(x7%<7CSx05rtNE@FS%+A_zw-%?{E#mzED
z#?p%a=KQAH6~OXusg6xk=Mfr(c(1VmMxEqV@74VjFDlJuIx=WlMp3^Zz59gKpv?x3
z-2T<{%fsXZ^m&i@n%gMuEug@7ibB4~KMhXlT|nE$<bVi^31NM9`8s{iI+tmN&d$!S
zizu9TftXyPmg^J1=Ib}SEzof3OEa@*0$wNen$Mpl&SzzuTsf(kP;dorlAdURDC45N
zoiGuosBKl{(<+co%i6hg$dh-6KXlJd4vvqH-geJ_I5;INQ*FmwnbNwXISXq@auC#w
z#(ME<mHst~m9E#8O8=p6=!$8oAQ1?ek>#ze^)WLG2DW$ph@S^*j85fdnKU9@X*AaO
zoT#aqWqhr`X!g9v+zA(_K1M>&dmRA@gI>Sm`=ALkeOozrz7o4}e)y>@iv0#~Vd$&2
zjgE2XR3%Smq1;mXoR#8gGAxjwwZxP>3#4g9uOiz5$y#YDIj<!%7TBei!*IlNZmSD8
zp!#c>^S|5y;9}6hC~VL=a)Ddch>z!Xf8O&y_yi`?5cH)_XY17u0?YEhw!L=h{LkNN
z`j6*-Ut|0yAM?L%Jvsr>eDn6*>G`XZqweYJE*h4v5C4!OJ~DFj-mUj~@}2nuwiv%p
z_%XH_Kl?tSF&~T7S+ckJph2GQw+nf>Bz)j4Tfnji@w9K-7G>*Dw)x1*V!lAkO9Si@
zFfUEaOHW!g!*Cwc9a}t4g26yjptEo|IN(T{E~%AdDWtL9!JSrk=V0YrVpJHgc8$%f
zj19b;)o5xZ)QCpGm6IGOz{gQ=%TI)1#oAPav@|8qZMAU$WSON(U0Xl@j?wEXg(+^9
zxXk{1J^k3otDD()NkO|(nzogncDYVpn$a88h0BG$vvZ_jlW<;I#gUF_e&14lG)j@{
z${fL<R@8swdQtze{?gbE8BfL}s1Sp$!juz-J2vGl(mEAVE4gD0Rf81(8!U#yjVU&a
zxIBe3;Vk@h5zWHBxU(uwbaBl9ICt`OjDf_W@dnH-Fe{I6Rz^$UhY%JY<I8Y%^)kc`
zDGsGY;l$<6PO=b-!#Qr1^V_R*<Mcz=cajBrdM$sc>QRvt`XbbJGpF&eJb%B3!xp-0
zyf@u7#4v6Q^VtJKo<o89>t#3(pmm-1wR)gA%hIJ|gGz_|n-L`#POl}-rRH7%R7Ef9
z(6%Ff=Nn5M$F`NIb__?+xW*k~gm(k5_jF&wH&bZaH?{ak5WpaVct)_fB<-xHR_!1E
zY8^GeUV547qLubdmz!vS9WT90>ZT^dq=R^Q8>=}e)y56KIs^Vv41CPrT}!VsmvpW+
zw&SuJMum2+7R~RWH{L3gd!cO{$!---K&#>9O%x14p`iLWmY{bSjnx}y5x}Go_9F6Y
ziNp9FrRRH%S0>@Re*R8k1rk-<-rJQZ`TZp~7yp`jQ%mmuM7>Yp|Lyr1|8MUx{@-K#
zKdbAvi@QjV{@<%?4*dyS%l_ESo~4||GP^#Sb76&@AB47t_*PsPdrubL3MwZUR$Eo~
z-CbIGw<ymk&c6GzX0-gL_K=&#;i5k|viQl_-dK>ZKN)E&vo~|8Jo`$JESoxW@(Rqy
z;y0OBK$OK5_|Mf!TH(Q*{qm&m<xAgNQu;xzB<v1{=1p_{v#Jkbfk&2VC+IN3eR!<-
zAL&ltug4+JaA-P!<kYsBH{mL7gnUeU^dbCXd<eg6=fNBTd0o|@RnVXRFfTv<3-I#$
z0^Iw)Fu%S>hrS1P==&U=e72dqI!4|E|D}5jE-#ScUC^laiG2(|lZ#=>!?0{OZmG>r
zJQI6=SFrbK<)?6~(~42JvuW{>thSMvMb!2!`89%NJsv;1!{hz@9c^-2*1=K9Y&8eR
z63@nxxyx&2g{EdRCS}A9Naw^6+<{>=#J@sKHoQh{mx7kZc!y~t*noBZCAlfcNl=_!
znVj}JOz6*nq@JD)`TN8?kQZTJ;S~>cKbZS){w*5d7DOg_k!}p-iDLVCYt5*0;I!P9
z;3{Byd#vd)`sXDpE|0#Zo_OIp28)5unhlUweX>hruuIY=3g3lEx4aO;P3+2uUgQ`O
z(145kPy>w7++WOa4+?jrxc5jDgR<lt{?Q81KA6|305|u&1y!HsIC}1>qreVxiwtdr
z<*!t=;y&t4%a3sb8cx}VYfSouH?lQCbYFYC7!Dihw#E@sYirMJ=MDJ2v21f%))#i^
zrY-?9zmZ3wuo0!jx9&AS3G->J*vM7M+nakC=9W|k8)+5j{L!=f@%#@a-Z&f%#L{lb
zGr;2WKYz2m-AbMRd0XD*<N4p?`QKycr}Cq}Z{wSO7bp)Oz+3zbnm-N?Co@c}TV?w}
z8x-`hf;<2*8cdfAiWLtMAL^J+i#`%YQ1xy&#ifyoBod7yBgjZm{1Gr}EQL}koCF2z
zD<1~|c`d4vx7XR-XylA?wsG;LWk<`b9NKhf%hFM(%~x1#csc@b&+1QT>`x{>g}>{&
zUV<UZrR{W)7{{w}G1jnMmY}y<gw#a;evHBod&d}8NFMWOJSrRNsi?d4$cK*BELdr3
zRuf86R<+26xh>~AyZfFkHl-r(PG6lLy!~%G=c>u&R@dHsJ88`TazKs0N_zQ{0=meJ
zUj9vPl<{w<wIXO&R&xb{eK!^u`^GNXC_bRr83~1qWJFJfg}>pu%MZ=N*$fvU-_F7T
zEMhQsl!m{<zQvc;g0<aQBJe-sZ!@Zb&-v&b_D#Byu#M-+q%oq%PF6RGw6*JP*bA&)
zB1j99VxTm4?pGpTN3f@M;fl9dB=j{QD>)+L(;9H7dls)aSS8w^1}tlfJj{?KnH9rU
zx^QFpniYx~k#f|=7+3B$AR6SW4v91<S~0UU%Lhhl1(qHdmz3GZ^hKLw%(RPSnG+bh
z3G|2iPPsXwd*hlpW~8nic0P+_{LSb$+FNg2*S`3OU7F<s|E|K>5t(&fvoL_%7w!m^
zK|RD>Vk9AKNonnO`F=k4Gjh-5^&noql=Bh12*y_n5CHY5GX#W&e>a`-zGa-aQlTs1
z-tRu*<tl>3TY{A?3G#ZqSWzbFTzOU3J=||ioJ>?-1tjinuzb{f-MOv7rk88WUnW7q
zfJ3wo`Ng{zOY)_BPPon$`km<TReevgbEBIo*<uLi3Q3&*9NdD63}4*R39k@>I_K4e
z;U3&}0j{7vh?!p%?(%oI#M%VurMGyM-K*y;F*Ub<u9HXDQl@pTc9WqrIr_>M?h7v6
z3c`JX(!H;6D+=KO+QHfopIf>IFRDrxAnz~T!|Kv~@n?{(5}PTr%rx@rY<?!@XaO(X
z#?J6y);?M#1ggpo_)|sI)u$Y1W%=4=6{*)#tiM7BusGy3cvz>pa$CKE+g;^J=iNBs
zfu*s76u8)AV1>Tdmcq&1XsL=?#TzJ#YbxAoxRw^Tf$M$Ypn{vzpQ55N$9i@l^B*!0
zIUea}UmFTH=dZP4BBTBSL`zrTO0))d4?PF{@0WOlON@AN`Ld9Z>S~hW-UhkJRE%hS
zj*+(0L`6yk?CAW0Id=ArS4*4#zI%0XawJ><C~W%zjbe(6JEXYnbV!7L0L>kNBXgxU
z=9B4LJgzz9S-L16l+Op04ZoIaek-#(in-ODzl&;Xg7Z~Her^%5lJLU}`ep<Ci{`Uz
zOLIR^c%_w*!K@|An$OK(=GniTG&h>_6(3|&I0@Z-WH6hIBq<cP@TyLf62q-7xkZg*
zh3MOIpIUn)hrI8gWNid+iu!F~uin5^VTYWk?*EbRw35HFkvwK6o&qISJlUxzrx<>H
zUfEC9vJ4w42Q)=6b0g*B1BA;ZeSKhO`syoNz)42^-1Y3k`Pf&B|L{k++gIZ3uj6@-
z?y}rP8zk3#w%HaQ*&?!LpN%NXB_3Wp+iW(TZ+R};^x*H7%bIFOT|V(Q+l^;!Pk%)}
z=;|mxkdJr8{iP9iA<yB~YkCFj$`Tu1>ZiT6`OGCw6qAC#3~ckc*W6A4l8<KwaC_T(
z{%{rZ{mo3pK&m6U$2Rg%r^W6e&qwM}KGHho^-NTN-uU#Ukz<bRmaSA;eG#ZdWN57d
z>^md8U}nz8vc$Z-u`A2PtSdh=Z4}9@DV2eHc^LlJ9d@AnHxE#JNv9~Nmhj`W(&Gcz
zLD3xtO$PV5=YYn)xFn#4Iq%lvvrJE(j->tQbB=99EhzAA5_h+@y8;d2*`6i>>;ljo
zUS^ogFcUAiAR*(e+ZGww{;jW=XK{pw*B?UM6}pM~Nwk+Kx;#;3=hxxL!k=Hlpr4!b
zvm7-y`={3`LCw!JT8>(jbF>iC$X5Drw_VB@z8DmQ)^3*}7sch?EJH4g>ixV-b&bFS
zqDQBfm+srihm+u!35;5Q)a{-MA}q{=TZWu}=Vz;8=P$bcvt5w}aM9831Z7U6sN)Fh
zWUq-^uh~m95(G_xQW%&i%3=IX1yk|X%>!IVdgc^Pu;SW|CnJ)laC5hR1fIWCB=fbu
zg8w&~7P#kzIS)qY7O<*obV2-Y`fT`rd)wR1$M{cQQv1g%g#D`6{_U;y=4QJ6VVrtw
z|IhJb@ho4%U6y!?F<ohh1v4AHbTXb$$Jeo|zP!DTI{?MKp5DwReHixl5JJ3wiJ>oi
zqTuNhHoT5yO3b(TeRdsZQu(IIIEaLRYLnw0Ou5IwZ+GGu0uABo12AcjO)*RpOP1xA
z@%&;+VPD+D2kLCr6AzI&K0ThDzkPlBefQPr@vGA#c$y$Sp}Pp(1uPb>x~kKPs!$`o
z01Ij_LFlk2{dg`bmlVdH#Is&fM@*-RS8b!KF=ZqMoCcwEF|1CvAA<Q7_Hi`|XbtUa
zGS74D?qq7Ku=ECVE%jEBtkiy}ka!*w)rBlK#ntN$>kWZXb_P-lu0j;nl5N2SS+PY<
zCkMkQi1&1+w4yst*ouww+1nw!or*0+=Z{tLH$xb`UelGH)l}WM_<Z`hd+_f3^~u5c
ztHbWwBQQr_oqjL(Uc<4xeNO#>$7<f+Z(@FO=T*$3I2rNY*x#83ipg`anMA1cJg?gd
zKRcXkl9Lz7if>Pzh>kMU-idG*mO=si+cPxUM^Q{E)En$zF`vN4sE6q;qu<cmG>Y+Z
z6UGhitJKN`5+is@c+vls)Z=f;MLMq&H>j-eIC>vChAs<yaU)f%;{Sx>)2h9Lw$Orp
zUsN}z8|b~os`XEpq4gQx+icx6Pu;A*lX_}B?nwt5qEP5bJwWi)WqeQZ#f!`NEa=U@
z3K>FW{s<Y47Q=Zo9o~IeGF+kKLq+}w`Sl~*#r@Mu52~@E^j0?zcpv@{O#c*Ov%k^$
zFIFTiLPN=tb(qAGjN7EQl3lBP`))Drb$ECVn$ce#{rKwesQcZ!(?c$K>$s)cvTl(L
zOR|^Wv8*R-0Kd)KTB#zs<4HF_>Q36$bU8JmkX*qc<+9$|QMTZ40%pur2n{odWfhcm
z)Y$KG63^roQOdR_Y3=OLg|W9`&_4uXyVz)?m|DZd^Zss<^(%d@*FF02;P~Ca`RljN
z%cHY%_vqry+gkSY&uaD(f<qV}4gNu#X@f+{OZ1Z>-M2^ItynB#CZ+1<d}`-N9PNo6
zViXVGbY%*J4!zFh!R(q$WaRhR4ck{=)P)dgaNcqWV3!^^H~jomJ*ETUx04U*$p_l5
zE@+4w-(@$}Q`j^RFVfim#@ZPb$tUpvD432kL_|JdLCj`!uv<=sGm`SkAsJa3@PiIV
zRn3$;%qtg4GkDfozG7)gW_IG)nixfBQa+f?g1bb8)R<1sFD?cYk4VOzQV%bkRys%#
zK)I~#Zn|4ykSE1T$Oy#>ve~*aZE@+cAlnM1D0$6i)b|jN!%i1Nm?Q39PmPM_iQd%}
zKj}RwFPaaH0P6~C#Lhn3@I<n8Tn}om_V$^l(y;TCRC5*v@tr!mZZsODl}&S`JUy31
zJVb$EPNG&#Khp)iMli0VM1_OPT<eo%C8hbZd%GpB2jbWZH~d&KMa4!kn!IB#PsE5&
zakNlx@if&1Rpozs;x9kX-A$af^wAJA<z*iAvXA=NM+L&@!vK%h6fbMbz-Hvd3TGie
zq-|GB>;;{nepv;lF`jU{ptcIOh65b;2oFD0!;M*8FB88mU#>YUI!NFias%e+^QGIX
z$=Y1jdzn0X<jjqb?89XCK88^w9CI&Bk|vHbsJ~!&aRma+4eG&e{umP|CW2h2)xga%
zLTTX^&@mHR5B-EiH{yK<uc-8dz87qJSXDu(yL!piu`ZW6Jc17NC6ijYrTYa{3*V?v
zc;Ubz8fnNtKjMf&97ex|s|_;Jxk-gzq`w_D@;prQ7<=9>y3FZbbV=yS0upT`(dH!D
zM51kwXcv)CcNRw81t)PqBrXgRaXyK*R*^QZNSi9s)+^F3sz_U_NSjxrO%-Xk4HE4l
z5=lkcydrI?Nc+MdVXcUCtVt?z!7Fk>6}iwW65A_clE8{w@QPeeMK0O~iFOf*q#_r*
zA{SJViwlE9Tt0pK4k?xBA*f~W4>}C|uH&cOCGp!Xv?<}Y>-cF|{0kREN%$9a{8SCU
zZSdb#{I{3JNB-N2|K;(K|F+`4D*lDR|ApfJVrhKj|3dM<JU;S&q4=+e-&XC{R{GOk
zLVq-Tr9bT@^hd*2`qN%Qf7)$>|F+`4y)?ekpY{^^qv0$4X)mEa?F)nd3&sD%()db$
z+Dqt<hOhLey@dW;sP?;1`g5^_{%H70e=e5L9}QpW&&3k@bI~^VZ!7-WOXDm3xmZGf
zG<>B$7fa~R#f8EDh2sBWX?&$W7fa}mhOhMJBGDhs^!k)L2>Mc$0{iqQA&(h}2Dv&?
z<+CfEo}#U}aTyLKvv6Y!24UFWQ3Uc<{n;gd&fWr9n6z4cH>S06*&+|Oy;`u|xhY={
z0v`?sU20u}8zQuhY^{Lj<5G-E+eRT<?H5t?dfx!aM6#5gsa8!4R!n1$i^7lzry(bI
zr$ehkpW9~1Edku*pv{`tjLHoFp0nj{p}GfipFWl{jYzJdp#=v(>`Lf96(@%_tD+*5
zxeC@WvJt}ap!MEKB9PUEj}@znNUoxx$sDjj-y6+ZB^o}J>7{!)DqE28q@dSSDemu}
z1<GGWvRJI7@Z5dAB%cP4JQ^78l;Q3AV5sR)YrsY=Z5(XDgP>K;ZOc^zG$q0vMB1vO
zS1!ep$2h`TZu%WIh)|)~halNuOfx9f#K=%NC#LS$xJvEC^IJs~cMY+ozPh_@t?gng
z>$rl%U;s%T%Ux3<dl^-u+qfJ~dhhXiEnX+c69ET;#XbSaBJiQ9D<amjsVkN1WmLeX
z1x5PXa+MCIDBV*NY@5CJ)dGXR6*Pe3L+g)PMtj5XVzfsz9)~l=@41Sb4O;hYL-%c?
zxg5`WD(p*pV4kOmuH6RU0DyxM;G!zXvWylkZCF|8Qmxre%?U`Lv<b~YX0ia|>-tWs
zsgSxd%>s<Im@DHFNuWaL(x|2ds=3w;WYODgeKD``!2lgyNDJUXZ`6yVQIlyhv05ck
zAduu63sO-{E<~}*DZ_;*cPVAKNGZcb6=f*lJrNaY1vh|*OvM2UiI5IlEUN=ja;O6r
z#X3MkG&cg{A=rb<tEt1KMTGX+ZX5TFo3$#2&xNb0hBBSJrpxdjT2EqH!L@_Sz@pkx
zmaH~D&B6iR#BLW^jBPbR;hr<n!)AuLZ88V@3bQObzGRswX5q1n6hfZGNqrEANM=VD
z#=U4C6OpG6??xBA9*Npm;d49UbJz0=jphqcr8cfO*>k9vUsF9y&`v6@Lt)JkEU&tx
zZd!GvZk+^EZB<KK#B3IP_^I_XQ2d=%zaKeG{_b)y7=$zN8tsjyc+D3zU^?Cywj`R8
zgHkoSp?Pt-r@MW$1|0`DsiLH<fsQT;Hn9cCogIL4cqLF~e*MO<bj7Qmnm?mYwW&Sw
zok#E5)^|R=+sa>sV>AG{I`Fc`wX|l)+|O4k*rO#M-u_jKKKRmwcGbq~y!9ABiU=>2
zmTw3ije;uy?k60IB4zxUeWI3m%W?TzKY!f^;CemMk~<5B&~#OXDThyG9oEF?u875-
zxNL(@Hfhpt!zqlW!8|PPv^1pYS_A{Z&4uz@(jiqJm5VkVx58YIB#wqz;pW1SxFqTM
z)ZeQtCR3sX3ZlaN=5u8dj*Z5ixVM~^Dh3b3E8nvaw>y-pgb-FA=Pu8wAQXh-dWu&b
zL^|zwF{-jHqv0m0xCyst&xp7Df6W3F4vj_<@xaz;lRk$_ZtIKE<vwAK^lYdVR`|q+
z@ZPCf5N(8jUir0q6-N}b!Ad^fr<&jv9sDWS=8=qWzzi?6<sqLX-7&pQmFJ8=?B?KT
z0!xhp6mYQm-U@UA!f=+_Inq>dHT<L-r18Z*k$5PYqEJNEYNXmzZ}kMWXa`RKyE&+7
z!OOs8OLXT{96(0Hi5wa?ml_!qDX+E?o))W7uwl&RN}|U6m^axP?6^cuZxrn%ki)n6
z&ETo&x?+jEBDqpA7~rsCQrCg2@T0B=m`*V0DL<8)rpDGb$K;&B<io$h2gR5t))nx{
z7%Q?<x4;yfqveOIHpE-Ej~5EldLtoKFU3p)p&e0JsZhLPAYWBjHfAm_Y6B4CbG5|M
z37d#NZ}ErtJjHJY&!|Bz`e&&V40w(&D*~gZV?8y}Koj2c!5@Zmt2dX$?M+)-$xuZ|
zMVw-(4LIHVo#|_5f~JDIy54g&?&_=2wywUlQpY+)Hu2bIist%$@+w~-iY?WWCH`~(
zP7Q@4oL`sU{F+(1070iZ0#eWX+7i5^Pibc}CG*Ty*Crm$u+#?GY(}$WB6j$~XI3G0
z%eKfSb&_U7PDEvw=~N+lBghi!`^l^Pxm+=+{2L1|paHOI0{PiTo6a*!myPgrpUgF_
z4K&y?esae9Bp4ryuY+lrI@3I`kL0-AeIfoHioY-QtxaS4eX)!uwC}ud0Y4{5U}8@?
zxiE&G`onWFj^;9;Zy`4t+^6TLl6*9{<v7B;I4^eDVV=aX0~VN6FY;USz}&cbkt8cP
zq<azHE>G~=2dSD$t+1aJxO}%vl@pc8CHF9*s8Wz2qE9C$6=*ya2=oDDW+VOC<YyGR
zoGhgrUnX59w^wlk>SgXQS6~qrb&wKzx#D}cg0dnj8S==PEPqZos5C9q4~#*tT9J97
zW7B2k!O21`2C53muFbn@W3HcUY4$T)n$>6LinDPQfvA{GeItswDG2jjBJrxU58Qq_
z&rBlZdg@)CV2zu?ru`nc?9v`fJGrMEK%pM1m4dC~jigp7oL}dV?Q)8Bg!9Vb8p9;+
zm2-(g@-{0_rvg?eL84VFV|}$U*7Y(lEcpNX`hH5{|K0Y&DecjJy7+(1?PfOqUvq2g
zG5+7@_(5kFnVKFQjw!XvWWeJez*>zBF`LVW5P!47_4Opk#4x&?p(g@Rz#!DZ4<#60
zh48!qkMYeR*f%r6#VOlwdLH{e2#4(ZMK~UZm%*aX_O9ST<4QgFM>K}XvN4!-1P$Wv
zY_gd3LZH_V*>{r}IY-9}iuXkxg4ZE%HXFsrJAOHt{1y#|0ph<|Tn?iiJC1tcI1brO
zgZYhSgS~<^JQ;lo<;^ks5OANLeiXy-e7Ts1eI!AyhI1(=m;Lu-!Fs`%@g&e}G7ecV
zXV>%jbm!^Q5ihzinO!{vnops$r;W8Wr1N7iizW-;Jb-o29Zd}}C!X}Q9Ry?>zy=$I
zqsi=!4a49)`*0nOkzvr+$%s<DqX!<wF`L7>n*lQk2|-~#=}&eT595p}cdoI~VrpJ0
z`I{s%g^ay+aQM&e!SV6y!-MlzuTQ(juTEZ_GfzfoU9&}t<RO09=&q|6pnwnh1F*$^
z$EE)NWgiOffHe`QgzEKQAd#b_Hb`wsjmSf}+klW5!)#G#(0<GZ)7%)Iu<nhHRmW%(
z-J1&NwpUNs1Oz<G5Ue}&ym6!W`oBE>MG%yJNX>O4(<Mi=dDIKzjSVB6F$LS;Whq=N
zaU6c&70eJ=UpENY_+T>2)_Rq?5{MbWWwIF0`;!l2$t>mdQBU{wn@Q}s`88^;sS*-N
zgkiCYs(!OxS3=eDZ<kkUW64UDu(453w{1BK^@n5uc&ODK#YH(APm=cCufT|40R+A;
z3)z(n)p0y5>MTiF8%ve7aj&wLDk{}Kv!(Ia3X{n&p})Uk1J-Z$wTgYrQvPOj%HL!t
zCmvaKIg-S4x`IF{HROnpd82yjE{@D=R#hgqo_to>lUps2(k*gi1tb5TA~JeBre-b~
zpzzCb+^k_mZLk>cNeG4~A;||+a^ir=OnUgk5zL_{M<>Te2mf?lzrn)Ge*Py{ug!vB
zBHqozMppSnF#s&7+-?S&6j2SsT^gCOL`o02Tf+DpY?R;G>o*|VD)sDPcv0r)<9-z(
z32bI#6HJf-E)8t4msBF5{e2-z+-1_uus%%9J+KNv-t1d^%PpsqgMS{Koxgo``aL!|
z3?;m{kK(T=G{ZSkM?+7&VB$pu>N2ek=MI?@k+x)5I}yT6T<*73Sl#vaI$~!B1Doxx
zMnlC5NK%FE>Zd!Xpx9QMP2E?HBJtidoiRE!c<@KxPNJ71c<GMmzPSA0(+A;L(ZszY
zeyeU5NNC^q5u5RT<Qd=mG_=`!Z<=_H+C)bH^hdh!tWZ3Ml!6-hRwy904g?zg$y|I)
zrMN6t$tLN`)o^l&X%kmcN;4OeW_^<8m^tos#bT7l>Hj_c{I&Lf20=U@1@r6s?EaMP
z|9DNmmCpaMwfUI;;p^Q0+5F4y|5&z8cvw|Rct{4=rOOR5^p2nbS>)kb%rVlyrO4cT
zxxY^U+;Z0dzGbif$=J6DrsAxp0A3utM1YUTjtq8x`1<5c_vGxy!(`@29_=yjS+luj
zL^P!dJsV^7yu-P7qIiJY5@F3bXS(s-<&T|Pu}&={F%HHVe0o|9JsJ<P#ZPII2?*@(
zcfWgcczken);)c7`W;5@x4(S@Z+B_$Uxc1tvz4kfWETqVBA3KSe|9%pjQRGAu%XU%
zqif`N{7%p)ld{rj<t4&BPp7)&Y9Y!}_LkZ5^i4L5^DRE3BJdp+^zE)tM8|-vIS5sA
zMVk1uI<Z}b%-zlt41-qsr6kAf^!{i3?QTBrAlVG#@MGbf8sB6*+Z7Dn0V7SzczG|z
zQ5~u=PD;;9!X*voyO43f$Y)$5#{|7fd<Vnfchp`x3E5FYJ;jssW>s3(K@4iTgD*5%
z{HeoF<+4aQw8Z0&$pH7ezPw`J<SCqabZP=7t(%5oR#9eX8IsA+Na~s-@H=PMQNJIK
zyT66A3ACM)*Ud)}lv=BoyyxRZo1F19!n-Sv3++ndLi=IIh4urC3++ndLc4nRsxmIL
zm+fBJVWC}VSZG%t&#DXy%M1ML8W-BdJw9pOqR}If<pYchV$yH!euf#{s<5k1=qu>|
zUu!<s7=M@v{fq6n&82#7S>I(3JEwp10CV~`*}iM+tI1sxaV~4txL&V|oAUUH*WX+^
z@Hng;hj%F}0iGEcDOd@JjLk(9C=W(L#?FWaq=;dpe;c<#9W$>bdft1Z3<>nO)%b<%
z|3x#<=U^CJjl=%Rn}8+$U#(5gPy2sudXM{mU*rB?^ErDFjwjKa{b$gdT(Z4i@K58C
zo;AFGT#bTg*yv40U;pM`b|XoofYN6Yz5jKu!{IcXiE|=Wr|Z)$5C0zh7V;fKeh`k&
zra|w0IPX#>_I-kw3<PeC)wO8x0&qSHa9Jss*x5mhBu|S2HVr)J&K)WiZ+R7v<b3zT
zL9iGq{vE+EF<WE*u5(BkDw{O&R}#z^q`s6~_`YU3kvp%;Zti=+*Mv|>Xjl~ww-lF=
z2W9BwrF~RBb@5~rVpY2;o)gTHU2^*ST^JGQ&|oB9GE~OVD}QkO>ibh~OCVC=UHbf!
z*KGbwr@py5r$M`HlkZtpf?oHQg!h+(Z!HPmUJ`y&!Dj?1%qI@bzf=|_2d~Tv2VX*%
z0>Rh0m*$=h(VY$#akt&QyqlBLi^Ct+YnraVdo`ZFnVXbr>+OUOTW`!Q#H_+ns9OEe
zsLk&koC3UqSCyNDgJayXO8lANe^!gN*#5f;C$jRFliO7dz>@RdRuhc9l>N8WZa&(7
zU!(o^Y=exz$3tLg2dXD~B=V=<hcK+oXLp^<o8x!cXa9QJNr>)66JR(Dz<9Rjp|qZu
zifo^+PtRr2buuR21N+_Z77U#3bQbm^%DZ{~grBIIS<n$4CJc16{03$a{<TS`w6-!|
zOm2BDPM*?Q4KKopQ@nn{y6{Zoj;wRJiy#^pEr=RxFu~BQSW|3~nBNXy2b?0%X<WmT
z`qMQ_s#j6U&d#fGKfGo1dzYuC%>~V*gw24SE=K1mi|}n23>p5)q8VOwZc@&{=rRh&
z^Ow=c$Rh8ul$)M1_jw)q(FiXv;6M!@#lf)VWjAfA3!J<hao!BTTGmVs>J@{2xCWja
z0^3KrHR=9Oh`E@w%a_Q4#hxu~RRYj)nyv^`E`-A_n~Dkb$a8WEbaS4Y=)SNgtQvpG
zeg3TPrp<zPg73>_173yWzJ%5uc>2-;o<n2eND>2RJ@(ecg2wS`8?jfbPld;z1s*?#
zC`Ne$H4}J4gK&ZTIuLs*$onGfmUD@IjF-)t<nPkt&)3VV<-YRrH~^qwFq$H=i^a%{
zp49$v*b|{@RkKWPbK8{*5&$HtmD+i7>z5Jq%L!`LMg|o3F@E=H9rum-hSUh(HKO=v
zG!;SNMeS+$qx^Rp8*CK3hoO2gqq|x&nvauPU<o82cTI8CFR6Ru-5>EN(@y}z%K-u8
zjgF`~g3&{94+-8G-#poHF|_88hvfnTz~L7Q_M5vb+VkM|`g+8YvnykGnUSJ3h(MU!
z`LSsBOx>u)xujq+SxZeOhe15Y+^KQFtn!p$c5a+LSmh~VwF%<SI?~$)-U@@7u@Ci<
zm@TWaRj}WO0Ec070Rgd-S(R3Kud$aT!cOM>J?)JPC3V>K)EjW6Q}!#iy$&dXr(^~$
zL``8S1ox8HUn@FQYTs6BX@ULX0<kZUzQ<FTZw65hHQf`NJu3b6fQ@$92v2-Ayhd~2
zvJEgo2fHT7$u2uV5XM2+CV}v49ilms$^pD=NKQr@TOBrXfs{%IC<}<AK5Z<VY>aY(
zLJ<UdKHfGcsb%`WEe-mU_p<?w@s^iDXpDke3dm4eQZtPhjWt9&CiB`PO2oUF3>TxY
zpcuB<Qou>*%0~R_BAA8!Q&{5^A=9y5{43btoDQpHD2)|~92z5z(wof2U>zj=^aaI8
zKMn_TR=~kY4)*zE`j(b`39eKK5qurr?&@Dq3r|SmBWSP`(tI*SR(9nFI(;YLlLR=!
zokqQ|mT6WqghZ+Nl}+Si?VEn8Z?+<>Z(_%EnQ<!y-X|Rsd!@@-xxEsAm+O^)@Ym~=
zZNF-l-15u&<I0`syWwOqyHAfg45MMXS3!ens!m1hW}cY+3xE4Rr-%qO7X<WgW`8q~
zi+Ua5mUq2Uq3Em>cS4Pn>WQ?GYi?zV{)l!fnXTk?1%LL~Rd*In!vKbwXnj4gU-hd|
zUC+_CE%f=(Nqk8R4BxTZrWVvJZY!s>R&HtSveH`lrETVv*6!z(wpCV|Le1iC=alB<
zmiDZyG%vrj=Xs??d8K*Hyr#}a$<vNjl*^dc&cPAxGwO0>;%NABElmS-EM*UJNE52%
zq-UnbyM2%TKLzJM;bIuhg7MWbTr~((iT}^mc01+&<2Re_?Z@+<ul4-L^B>QD9_ak%
z?8ld#wNjUvn{XBl?s!jj^kCz1x1!~GohK{|xYta?|NO%Lyf>o?q!C;4!hdVG07~s=
z8Bhl}svO)#>bN9%LOX5z6pL25!6n5i5k;|)lR!FQK$fW}U}pz|3Oi8^U6M>FF9Y?9
z1<KDry_ehS{3!_PGQ!o<s6iqn^w<Hhfymz13_bzSWECD%;}!ucF^qsYJTu$=5-d`P
zFaG*z9Xr9R{r{i6ckgf8I2wiDe_wrywfp<IAPv$4NXdzHc^*Y}Vx8KOBgv_s)AsN{
zBqSo{O#~_1P5gfLcV_m2U0@dgDLH9VyNyi%Gqbbz+1a`5|HY@2Hq=b}`yC&r!&E4u
zr!%&iCO4~?rEq*g)pMIQp9u5|m^O=e5-r}dXg(i*!YBkdl?D+^v=4-T{6LcbmFWM9
zGj}ka0RYU~c>>cz&`Yj7<|PmI-=hH)!9LjUP_S1o+2?N`WH&}C9xMzG-oXuDTx;-a
z%{RwS!iG-=0ChK$Y12@a(gTfpK%+$ssuU-C`^<?g?u}53Ph9k8i#`m5w7(p|vS6n6
z#wD_o-5qq`q#?%w)``kU($}lO=zJ25VNhKF4Q;rnyCD1ZO8@k%|LVp0dH>?&;Ykrq
zHbx!fRvyP^axm5|I=JVe!}>*s_M*$|3Yq4<3vJ)OjmlR^z91TlDlVNJ%$rMmT0eDM
zpF%%ye-ig(g>QlQ=J*u2K6P9y;-|n-SSY?ZK83DN&Ps=(sj7QLlRmvKxDla6mjT3c
z1nMw9t|DA^v?V}$^|Ai8wk0Dyf#hMi{g%s%WiD7}l*R+Qp{zeQc%`H55RtYuNEdTj
z-r&UfPqYy=cP#QI1qz&)vFjDKyllvuvj^q~MIo?bblezxIX%KpT;7_m7BOroym7S3
zKBJ1Z2dMW;Db%~x5SVcT-X{aRX-XZnHtYGjR8G{24{zKyaiquWZ=GlJ<R2gZ)|w~A
zkG3q;<D)aH7<()RET(P6#I4QzSZRvyo%S{`8I&?fG7uC)a#h(F?fBLqSwgoX=mz&p
zm*{oMf!d=u8d4IVc*%1-SHM@nea`C_i;GpIg3+N~)F?>@E2m;nhn0?0nq%5s&;O&@
zWCma3+3L%j|ATNl+$o&@zs7(5qx%1RJ^EK4{hd;gPZ23$0~6wrHaW@eaua*Wm)Zmu
z29r(!3XAr6`DU2E67ax>ZX3Ujex5A~RxNI{$AKv4W!-`R0|d91wM!642hao;!5W1~
zw2N>}BtYrvC|kK8h;~byhQDP5vV_5w<{)JRLjcsO&qhS@rHv-R8bKCS+71h#)&*=t
zpf8=USm@p!)CP_eR`ZtIPFo+fAlCv)!vRVk+oOWCT8A)C=*uZ&d4a3Zvg>4yn?2Vh
zpf0in0rf1JXhKaif!bkb=2H<B6*e6$HEh1^CLI?Nebg}vt%%<-X&VmCR?x`DppwGN
z&uG;5^BzK>S>MkM8CVl@DE^GH_eec(A?uu6+MeKh`tAR`DU(~~<Ybh`Q-7lmKT8Qq
z49QvSBFq*<sa)uL{&N{AcW2~?x&cMCLcLmOd#k^xs#aK0t+0Nz!rIjetEyF>(_BU!
zg2K6Cytfjppapa=6K9?lz1Gcla}>2XsO3?%HUuwW%NAs~9wEt#jg`rLu`*eU6&#OU
zeLQw?Ja%i3#|@gWd)FrH-dhuP@7jc2WzEi0-17`VTx^Wk1roVJp>;+AD>k-#rEG#}
znPi7o7)1PlMOS2~Sq_Hgv(aFA0HSC)_?yKH%S~>9GVu0urtirCsu<3U&6lYI481TF
zAb`Om{2fvY03+>)nDYwT36>0_3Yo<`shnlN)ZmDVf?5Dx9C(2rcGTXFr2-8^b}UBL
zp@Ahsca@-F@_7XWJ7U@HSs+g}t0JEPtxKNTuZnyI6f|7RU7Z^G)RzLxG12N&#b+6m
z)_WoEy+90sP>CUhQB%f#SW>J8wO%u8)e37>YhC(T7nRZ1!CX+zow$@EzEE9=gw&VG
zG@{7!a0=FlIAm<**(Hk{_gju2{D7TVwhN=aBeLY%%TAWGq{1oUG0Q=7`$r8NQ_VN*
zWj$e@QP`!jz!U@mSy&TJA8kw?8IvA$8G4EpX=_!|5+$?I-MLnly^5t)l~=NERIqEL
zw<~laYZM1zo!(~lab749s#w-stWh=_sw-bs_nc5+mTR^O6l7_KG6GGIplY@VR=<u&
zVPi>y&94;DEIL_Y$N|nAn?i1CIuh#I#sZh4{8|K>c+_V$)wx0RE~;}+U&XlYoYo1}
zu&d8acwuhYbs}`_2;XD~|6YkAYv}($`d>h=PU9~W|6ylmr<afa@HFUs)&Kt??f=%(
z|Jw+B4Ip-3fg_5y3@_DW*FrR8D}XSIF`*u@KK<V4?jc)TRIG-g)nCCe)MTLGp=<G#
z<z)zOS;pe&YJxedM2@ii0!8`dbFK2LBSYjdv?vI*f<xr->v|5~nc$W)qgG7{=1tL;
zb5zz~rzK<yHXu4)P#bS6Uw<|qcf4uGXTl}A@HOzJs3{5TU+F+R4s`RNv{PmyL1aqf
zXlRgDiEkQEGOB6N(98=9^5N^ZM=$&5uR#WSamMZJu%>O8na>lRnF^i2a}>yqQwG`v
z(9laeje~5UdqLdV5l6QoW6%!mlssUvA_6#?O;N3$Vqqz7W0mEklwGt9ts==<oN%rd
z70pnj39sft*~SBeY(Ud!T4a*m^5F;K{A!vkPZ!J4EYobKwk||vq|^sehcNGfB|gn@
zdPE6=KW4senjUxoyP5T35T=Ar(_#cta6Foghw<Xag=ap?&<;`!Z!ST#*AGl~3|oRN
zD2J5MN%J`-MaJ6Xml<wZ5%d=id?OgrJwyk1*gCv4?+^FP5s$uGMO4SFuW%F7R))%f
z4zO+2<sNih!x&50mrY!N3PbXa#rVf57d?KX@|vO%&EkwN&Vp^LO_)tCZuRXmz2oke
z>83W^8e7#_FP(KX2>lDzS7R?5AX^*M_Y2qa^Og|{#uB@v=1Ou8AxyH8@*zxoA&{z2
zCb%cuZ!8m_n3GNR9OVzK<=-Ky2`{?Z8XOOqQa|KACKM~c!UOj56(H+iZ=ywnqY_0r
z{H6oH$pf2PSD4w6R~VbqPLL|u*P_(ISV@MfIj75bye_pnm=~}LYE=br>jbD{g)Ts8
z+hgQq6SZLC>@jm!TZ6_*dDi4;xN9OXZ(W%7DiSJnIt`mX(x=B~&n`xb+3I@ai6(x`
zRz6d|qvlGKt0CW(Am19ux3XxIzeXIL#W$;npC?n0kH$lGi9S{GJi$^_L10Yg{}VpG
z{6FD-U|-UBHHwQ`<{PwIf_PhwsE?UNoUX<w_=ysYP-dK%rIxC43LMoM_;o(RG5-Ol
z{df%ZeaPGEA<qkZm5r+dt!G2>`od4Q#@UMwEi2Q3p&<sF*J*#NZiEWWz^f&DORE9d
zcH^>-+6wBX%Xs-AM)$W5Gn(C`8KO!O4lNI`1d1+c>>`!x!GLw!UG^avkMU-0k}O%e
zN(RX=M(G=>Kb%efyrig{Tt{*nEfNe`To|e$oFt_7SuG%f!DT$`bGHZ8L80=4*p*Ex
zKd3&(R?7{btkqPTS&C@c%Mgxif~5S`RvvkIN||!JYLTavd!tpp3kx^CsO12(?s}}*
zXS$Z!z`Mmm<Zb8)i$>~-!onzx^wFv6pwkYwW$WL<Y0p*0dW1qGbW4X@Y`XdEgID5h
z0TPwV=pxu@Y|cQwCile-pM%)bT!TbEvy~amMsEY`2WO<7d1swvv?%Y+O%2Fx=Z^Tn
z9r42gew~??DR%oRx@6AFt~krO{^4plqYx!xx^<)73XJEo>C(VhQO9WE^!u+5-zkX%
zU6q2r_5QJ20Tb4M>He+vm`4cXZ=2#ZiUa)DW3F`*RbfriG(LMajX%JSK?8BVh?jj5
zSt+*h0gnI=4XxK-R1EOLdibHjUzzybdidRyNdQf0g4TgrigSiEoUi6Q;16B-U^O+;
z==f|NW={^&Bon@ygNOfPx?va{2_~iB-SyLpql?qC<Ny7lp=Ydr*FV2Fyf{7*z4_h$
zT)a33v7ob+TYwz-H-6Gm0U+602|M=odxk3V?C|*f1=Q{Q{KeUY=dITj5wEgH7hH@%
zrCp}n%|%Qjq)j<~;T;~71fnWK>~4&x&GA|!vK6P?!L92mSKPe!4oae6*)c^nG}3y?
zEzI9`(t)-n$*+=64wZAkE4BfbZ_24{E3maa`tJMwe}=kdI46&0!wnq(EAIb;Znxn7
z7jAdI-v58i|L`^bPp$Yr>)g3Z`cLQ%>9v^Gqb>CMs}L9Ul>pW|Q&7B@90;34ad3k5
zGO(U@L)_I*a$UQd-EB|_5!xW+#oJ&pOH=6vP;uvX8?*@=g?KupTMh#i_#7)Evo3zF
z{BS+|@Se&qw5GE;RIG|<Ny7yH<xYofhn+1Na0>Q-MlC4$tJAR93~=QKmfKEJjJE_U
zVJ7U-`zkwjRTe@wsg^DETPwU8*{~MH`bkQK4+dE=RJk?n6Jak}7`oQF$cYpWy7p<H
z^$`&ylK>&)+Ryefj0}gz&>C29X6q|BhY}sGgakldhzT@+#D)hQwP+(;9FV-}qT$b|
zwIrp5o0W#+4p_LJ)}IA63oXu(wK%9^0_#+}=69-Yp(HV0v|ue87F!rYnI&}1F@(M-
zY7Fs*+!)eU`L3D|J$Bp6c9x!F-{=L^AfXL}0x9)6?GI!!0Cd!^Q2gp}5X}r5opJZb
zn0cRYiyvggdV%GmW9{jQ#ixsCsPF8B)vJ7lRH&I)=X)JnUn=$dB>5N*wS_6S>1X1F
z&$?s=hi|l4Pue3}&<3x)sc>&pdlVqLE-|qoG%!cwl@Q3b9v>Oh_N&sXQ68mHiwU8N
zX`al2x__=xq28;+Cj4+}*&KF_$PwL>3aUyJJXuE;tPxQ`<XPo|%QUB|8pWN0`4u0#
zos!ir+p-I|R<i`o8ro|iqX+xl>?-0mKZQ}zW5y}sKv6O#>{gi516FF5*QnwhE2)`Y
zRLCH8<c%unBIPj-w7?hL6I$U<ye@sj1)r}_>W!2wPx1ng1Qc7$$=Y%XM^U@XiCt7t
zte%jpd(x#o^}CAI?2Tacq-$sOq{~@7S${h`>FOKeiDO4BIP@5+(A_kh=DD*n9##&#
zTp!mfz}vKg%!Llzc{+X_{i}%})R<wJ`q5TIckLqF7+rf2Ps>!RCW2L^`i&ER+g_k@
zb?olAa}8MQmSJu;zL2K$KQ;dQTr-nGCExlw;EMR~Pq%mS`rjbvY=71Ne)a$Q>i_i{
z`+uPglIHo9pZmi9OF}(jeSEu#<5%AJaxM2SS$-YIFIuvc-&YB#FPz;sD`<V!t)fXq
zCAEW`)$(uTc{S{;<3S~B!NwhL-thsuaR&z|G!=A1*YIl^ciKb4r^#Y1z6OEEX{R}?
zB%}QA0a<q)3K}i=PaU2N(rYtQpk0?M0ie^$YQ`Qzk2(C2&`B%EJdxbS^_3&}9?a?s
zmfo)XpyT__4?5;{fG$&WJxl2L9<zjw?dg@%+{6xRuL09=4Ufv4bK_|4%|m}0>JxU!
zE37jo+9+N?)e1{c!r{q~_;BehD~JDRUhvIJtU8D?BF3bvLYKDIxmuBe8*1c3C2ILN
z<o8^sX`X9P^>hP0{LqVbYoTdYdW2TTNH5r_VWn3Cs0%>#?DTkz)-%(~T7g$=JtIBY
z7#j9ECVCmVu+W=<K$}c00n*^xFj<II`pyP?vKVf_C!gE~d>VYW4!p)HFG71!7pq&u
zx}&k+!M}s4ARmu;OF>hCR*=2#rbH`{E35-GR9AJ0bk`xWh9zKi`IT0I-BJcBm4($D
zR9P4PYw4w`^*=W%uf~^&{}Xn1dO;!nPx#gT@2mZvQwj7fo~}npyaGb2WfCvg-h{ts
zU-K9LJ&3=W57smvEDEF-2ke4f&Q{Z5JnTn<S?XP|+oD4je8{Egn<Ufxn}{?suotct
zQ%1Brc-QbQyjIZgJ-}#qxBdmCHggHcnA=BFOYvMR=@|A{p^=gRkJwGeXN7k`l`~nP
zq!N9QCB4(3uC{>xuHzSTv_EmBj?=+m*es*|k^*fU9(p%H^9GL)s6UI*Hi9y{crvJp
zf<7pu$l>YXHEIGPY!}l3wG^BT^e(x<SntcdeR;}hcRE1>-aOt{9+p8PyO5U6#cnW@
zQm4)PWa-}orjs!hCo1T!pE9gNRnCoc_iZuqIF`+txudB%>|=^RiEF&&uox%Cj545=
z!&z%F3|66V3k80zHo;IH@-#-XZFA$q99c%II@-~Cna=9j+ZJ-2W$yoX%z0L8a&YSh
zJ<Vf3h1Qn5a4GTGGYZ<uhrLEN^C0dT^DL&XZUrH9zwUhe8(9n+5M8HDv8cn{Q^x7#
zUEV$a9M>^vY1!EE5aj4&`3aAAaDdGoi^=MIb`w&<y0Be<ESw!RRL@BDmf<zpnGf1c
z=C9n?HFL#PTIOo7KBEOWZ0OXa(aLlHZ1CvGJndd{pK4^!!Iv{m=3IqEu%KN`fq^MT
zc{o)j6;j!tSj1Pfh6)w9Mh+^X7Fw!=SZmc3lc<i(M58SClHyhZOQ(brhD)B_f-Su#
z<dS~Cf=*MdubHQHi7t+|grF^&L231%RvH5qFIe7si`kM#xt`Esr}_HZ2LML84SgIn
z%h{R0<?AGmf*ZO<4t%@`IW|?vMFCpHO*ykLXE6T4ChVq}V?B*WpXsf~|6podj|vz3
z8%S%j!TF#OYQ=UmK=U^6f#qA%)CxNw@Pt(0z(Np?ms=>h7zW6UaEvb;w9tOu=f$g;
zGmcx)M@%`acY8KbSQMG_(9I~e4J=&xY7|A(Vu6~f&u1k6#~<Ut3S%MDxdJae_!&Ws
zMLxHHt!O-%!S{d0i<!@6i!BvaBLhY5F@G}bGtnZJ7^saHkKyDb<}(G-h}1NPdA8kj
zZL?zBU}j}RfdWX@%CA4gpeyZfzqKPju8C}$@F%AirH!mFlB$Bu&{w55R3~J!{%SFs
zPzaA(5mILn%hkGg)pZ-1ZXq7umc9oX7awMPTSjwSiYq`q8r8A>^9@o)G^k6oRhwwb
zNwieH5=tvgz!KJ{D4(D$G$}H@#$ynvFxyqO9$1^8Te}Im4sNZD(5>AFF0xg_1jKCk
zcfhVz(?R`VV+berJbEf=-(e6{(EisPOO*|Bry*6@Bvm6zNME#a!`}1r>KX_-Hx@%)
z<Exo3moztqqMO6f@)2dX@7lqRpQ2!IzY;sVahEJL@s6_HH{Lh-4P4pK-q1C7p(Lep
zcN8@&FG{QC<?5PscfN&9ESZ4RGmlXnDKz`-oKVq3g~c@fkU!53<8cfn-DFa{F$mDr
zPd_`5cvK3Z>#A;0G-_B62$wd=zj#2nd|W)1+3@O@TC3O=0JhdGr=sEX+}$nhz~CZV
zeRDpE=0E1NE@r~``Cc)JyU*y#J02de*V)&@-5g@3QgovqmEc>l#~TH{tz9BBOEMAb
zC#kQeE-F{+80q0R)sgJpr+VY}4=Xf{_;@hP7O0{0;YI1?ZUSkcKn}A<=J&}U_q~7~
zN)v_L=eL9o$EF-hg<=p8im?IWjRjXT4&ucEovS%3`0FA3PreBAFUS%5;oqkZ$@!$F
z9%Mob4nXsVu6!t{j4~+WJ;x4<Cr5hUExhkxP-~+hY)w$58p`XKDxkYn(7h7qKP6W<
zA9L0a8<;wjnJ?%=tk58awSowUU33}ZV##D3Nk+qQ$uf?@1_E=iR&hGkngzmuB2K&O
z)K1~7UAwN}v%6O9GOTrL7c?g~r1fgvgwEOsn64XUP>0YUPpNu;&#qJsl}TJ-moHvj
zXx4C!4Z4V<nZlV{?eDw2*moCWbvp#nb`8t8T#FccL-S@0Be}dplA~19U`|_ZiVc5L
z7Tu0`ZSuW=7uUQAT#L9Kt?s5Wx?K)FAKmVcY^`SRkHfs>T5;mO8Ar+Vlx!;s7xyID
z!ru!t{F}yUDlYMJIFH1gDXBsi@t;$)Mwu;nsALKzjL~%I<cHrym}ofZj>l{i-Qqo|
ze%VR}<qtC5>TEjx#OAX!O|VHL)RRYp5&IzHGcRXsv6@1e>0pt}p`gVlA~}<^ps3Z=
z6+hc!y?BAZTe4(VQ8G^3ys)F!(R<<+*c{KL9Y_R6QvMxQ_T=glM};FyYdLH2_Tb@y
zp(-(ZMe399lBLaNAsY+PaL8MCFW>&cF#ZTX=DukGZP){CB#IG*XAv7397LWy`+gi<
zr|eP2pW#Wc7Cr4GRg18bue%^;Ot`2N>@>BD{yQcvyvZqNw4KY9jxg|?b1IvzS8iiW
zqflWxSzT&ar_`a_0@G4eJ$j_Q(4LjqYP@X&@6cjvYIiiYV`&DWj{Y#T5S7mACiE8$
z_PCM#r^@QVBiL_X1j?O!w;lGM+uKjW?Y#eQrxSj)|NLtI`PG1PV*}1Dq2RmV$x7(P
z^a5`$ybBibjY`;L8i1ZKUr)UV4S4V&O{QKXOfzLq%%PcKGsjl{Fk8Hj7HBI9O^GUa
zXp6xZ%6l>%-%g-$fm_U$yP)->ZD*Ft$j?ttF8aqOuZ~Y%@J$vZpk*J;<odJuGMRu#
z>s>rzA7KXtc@k3s!Jy5^D4WgYWDa}pN3YY^rN7bGY%<?vl_q?zfmxd|-*h(ZO9#r^
zxR0Wv*zDJ}={uqbV<W@1&*$mOaE-;)Rhs&(r>U*4`K{9FN;t(-Ry4JbY8nM#Fk(;k
z);*At)j3aJ)h!fdrf=&W=x@9CVva{dGOpRO<FjP||Ao4d-Tf$`ZKiH@4a2Zyu!<S0
zx|y5xjJoWo%TwRL({uM}rgEZ?Hea^s62@WWgE8egsMOtt)(U+_Cv3pPG-NSPbm2%S
z-eFDI2VUTJe2k=Ni9jb^oS_{)YMd2J0+&JY-7P7k17fVZTenICh6i3Tz+#!NBdb;*
zi!`Ze8Z{SX%m5inDm+@@v9fj+5Lv-3vjIKcYRe$I3vVZ>s<X?MN48qfMq7qAvot}r
zP8aA;O&p$!&hlw2x|D~2pA8~yM7f~R(hA_3`h9aYXHn%yz3=MJPP@<ng+h3H;3a!;
z<oL!4fh0^jEOWuB!Hzbh*JdTRy6N0(yzS&fZ*LPL@-;Rb&9=Pg*->F_UMFsI&+kd7
zUaj0};a%<9%@EMP>1G+aHp}oH_Sj73v99z+#^rn8nL}e|4u6+@+=u~<#-Ki%-18n@
zHw=kioqn0_TX^bNvDuSNq;oZQG|X;BC8$NkEd*HtH~JM~39e=xmob@%ZLv`)#tG2~
ziWG4C)PYv3&N^8vM+FZYxd{cOH>+q74^J|&0^?T}D(B!wC|($wR0?Vq2_Oq;Y;7nN
z<XHETm_Whh1+77>vJWS2@$^f$EtDGGi@9qO)BMc*Z1kf2&^xc8Cy;GgvBEso|J1?i
z=CS^_OX`t4lZmpVTfu%N{T2qy7U#*Us0wC9+kisXpn#dx8V-B^pkcV0{8k3@GQ{(R
zA-`~I`4sJM@ot80YLY7!Ho3+8gMDE-FAU-nuC5u(gbs=nHOM|+T5M+V?nRSK+66or
zmdor0xrCFRax{_KNw3j#%jYb=bm1+8XdTT@V<VGyEst>&(Fq{e!|=A)>zj7i*WS9f
z-oi7AM6-2<vBVa9D~es}Z<%hRAIm8;?4u$-)45Sr*=ZVJ(9mkPq7IXKJWt$w;n<TB
zuXsFte#&^X4b9aiWq8pW$(Z<h`eH3rY#=s<TEb$d-7EUODZ$bDwXw#Q>efo3emz;F
z;-=d;w$hZ!9vuhUd(agzT^nG=8~?2oRbSOX|4Qm0Tb~#jlr{-|vEA{|-hcF_h+ktE
zX^gJJn<>9+QjQIr7ZWv1JpNXRm}Gkp!8rlWJ5_47PBwz_RMm${!h9ilJzK<?R*A>S
zSggpQ#b@X+5y8*>>}PX-E0=Bw4QKP2TC(GK4`F&D7JU!rBH?JVHD5T?o1T25dw1s7
zL!?3Zaj?=z7$17_<cRu`2m5&DI=n(7EDRkP4VE$<27;=F@Vb7tp~VPrv^XVx(Hh#I
zVL0SR-TP29D1C@E06-zWqOm)7y{JomPWe`Y4^i<gSA(JbGkmqz-l%%d9G&o7oIEYy
z;I``;zCDdx*#^@?_xTN&LZ_XX?p0>hn=sp5qG+|xQb4%v>e+=R$&`|>dyPFy<llIn
zCSw(}PFHcU<!#rqbjY^16mO~h;lW<j?eg}|HVgDvYqaBA(q7y{<8Qzbjz&{?$WnXl
zcf3@ued;P}r@Z!J5%1b|(mPgv1D5!zDpVWIY-P1yA7o>=3Cov86h0YmvKSMi2+c~f
zhP37slm!Sf&E<BUIJ5-5wU&UnmB{w6PzX$!I8`Qy(7h2|2f$Z2=S}d$vfRdj%-0n9
zxediQ#t^n$la_5>a`-%Vzo8G95TsuegTo+OA47H<W_r;um$FHL1TGM?QX#{<_ZZ{o
z{a(1s9zTBH5YdGA@c_{O_WmF0j6gwT8jQa!#Fk;mpG7bY4KYy)KjCw=FGP*H@f4sH
zDr7ilCOFxJnuN$tRLF%X(}p1Vi0Pw)3yEp7nzSmag4e+HtFV<R={&)|VmLqH4WSvT
z)>u<SX2laFYmrCj;hSUrq5Ir@RhWsVnp{~pK=8!@sw2(zQ*jrG2RHsjyD%l87~P_U
zgoV^mUs(WOK%l?8S;W`*yU%sSS9@V(R>Tcyps@Qci&Q9IstDEC=CVR<oo|0@yPGZH
z*>807e6D|NsJeenZ6k>o%(YzFxu?F-P8exbam!%+0;HN?+09o}m9^{^M@HK5>HlA{
zN}_~~Dz<U8{`LCEJtm=4ZLP~z<W9q?GAWpV*jB#W1f?{iYK^eibd;@tBwlW!CLmt3
z^#GR+#oO4zn1!#hB|`DlxWB$|>@+!DNudj5)f|hUE`agNIBTtJ9dpN;X1wZyA;+f0
zG-pYn)+}spxG&wQ!hD|7-9V=urO#D1-ur2d+&<NDYz@S~z3idp*eQ+>hpN;uMK)TB
z^mf1?W$JPuFMFBQzqw0Us>b=!w5kRTXW6;ILK4Jgx=Xx(ElkurQ%WuEW~eZz@5WQc
zLGGZN-y;+oB<yY4|Hb0;IiT%GM)%~J;HEe!Hd~ML3|W_`;*r44kfmcb<cV&CvYC=-
zclC7*E|uy`yD7>0)(ZWYr-M-1wdDMEh~Nni%J>ud$)#Sy3mbdRbC<3xUUqstC3h=T
zFlV*blgipazGz>LXM^`-$e>?GEoL9kF}Fu%{v9KRl&wjnD?Qg18wzzSLwVX)m8Lf3
zs%Fkl(ciPu$0_$Nxl^^xD}lZ=@uB>%eK93AXq?=v>0lPW`Dz(U?ShnXADM+d(n@vK
zUyfVf;F!xs<;XgGQ-8=zy?kad#j2*(9%8nmNivN%5i@dWR)0}%*$kduVNrcavS$4a
zskZ|8_o=@%cv)sFE3%A`t?U6TjaRh7t`ov+SrWobgci+;lL>THc|%)y<$=tWC4tPA
zf##Z`ZV?uNT%>igTA5%4zf-jr>(zTvR`EqpC{HC1#*&8C0xK%MD69D*$XT4oxO7oO
z)q{W5sUBiE3xkj5{h10)2VaKP!pf_LXceUaT2XDv7U3``&sAuki6%iTY!m!<okk%%
zC!hE=U!ZIk{5G_gSl%*(SY4#%EiaVvL8qhC^1tXA1Y{s;_hl=f`tYB)?z6S}Y#Dky
zPPCD2fIhU-35L>5mt0SM8y|8LE_SfbI&2+;z0zpe&e)ckhh`ahSSa(5%2H0ycC)J0
zsa4LI0}<-R?p~#ORUx8z-;j`ZV?Rp!u$K^0X@^tnXbFA?MspzDT%(gYU%8`zF+R&(
z1`JS3z0NwQ==0fG;H7FJEyIp^1P|%T2ag?R9jg{n-tjol99XV<Djz7-<>jnnM6_C)
zwavIHd`~+*G9<E=bg)4iW{~{Sp>h_~o1|G0fhfX-=19Ye_;w6~|MoVJ@05_2@%fdH
zH#FQqr%@Suzfgvjzon6%EO|i|ldssPUC}zGUO~lbNi*^zy3L+P%MlyL)9d9(OFSZt
zL2<J|h<>`{DM4_{DQ-j=h+{v(v8mV<Re9fdB7SA)G*5GBZX}J+M&of%lggMq-dE0f
zUKF;XKzXl-ReY>6`KVTIX136e%j{a31s<M+4ZX3s>A+=_Vu<WGTCNsxs#?jm8V9>t
z1|A?VjIW~A7$kh|l*s_Xe#?yWTOv`ghA2%%Ko<MRS_HDC<n@t54D(R_%s3+AiXDqo
zlP62-KCiK1dlh=>d@nUAc+&26@vk0d2!7BFQJ4M><;&32b>pjUbKjxtwwg+X`r*FY
zH8s3UmDsc2tZYUW8p7)4dWeS1>LJt~-e|vn)PDb@{eF3{WjeybMxM?F33TKwoHG04
z_-ZMFhaCowU{1>1Uxy()h4SgmBQ{rA!5*<2`TM6w>{kAM`G}3Ue{FHiziE0=(7dH=
zFwF?#pu6&ABV?!`!*a-ZbJS>$jF97ud4M=?6uvN*AMqD$;P5wSTxeOSGBp|eyeVli
zXjugrmO{=Mhis2#lNkmdnXPy-ANYWH=6fM*mJTwkJ=7U$3KfKUjq3PyKTjrxTeq*E
z<YGht%SyXI^ni}=h@F|4piLneOI2>qc^}{c=;Hu^U)F@?8OvXhHmMG2`uo7)k6dtm
z#sUl@-pmi$c)bRpJcU}QZ&aw43_W{S&#jsoO028aLQOYm0@vPi6jWB^D-M%!u#0~g
z4jH<>-hvV$N?3&=$a_G%@HX@SpC_|7T7tcSZT(H)q^xCe*Zh%<B=QBgumnyUsceJ<
zU%DWr`*yW=b~c{bD*(<OZ8q-Q!Cq<b^I%(Mj5p7{v_dQUw0ysT|8V8Tj8h}&AhsEi
z!97sHq#W8c_B~M4rh&(!5Z@EUd#0A&3$@%V-jqka?--O{-2-=L1`X@(c?di5>*fgA
z2HfUrkVXR6P7Abk;6XX`8;$ZCjj|Q^brVbHz^|K!K^b9!9caLV3g|b2GRRx6n{SA=
zTgEb?P;1SsqEMULu_dU7wxej_sI#}-Xty>2EjG1AU{xbFb+A(-ICTx6E&$oHALQdw
z>osJ}`5J+#^KGM76)3F{nc8fm0;QUfsk<u+Dfs_9XdSrrKF%gon?H2rgKbdibbK}s
zvnTr?&cSzc@b-{gP&z5$@LwPQffEY+-Bmchc`WNY#;4BrN>m-0x=cVXOmX1}k$M#Z
zw;}3oEMIOS)80X#K2lSZ&3D<nOgLoGE{p62BJf_b&#_`3v1ouRgESpr^Zvxk%0q1_
zyojotE8)OA*9{kdN)!HbMC*#0hPNb?(2ShGa}#)83LKeiMCEJ@*5Lz~9<0UnU=6kh
zF1`yft?hxwHV$l^Bc}pQ^U+)<%C%<;!C8lnJ?M3wArWrdm??F_VB6W+oSH?pD8{L_
zN7Qz-u~qPJu~3grb;#|mqbkMV+4eBsW!>YS%|5)2ex5CUt*%e6Mh7W+<!o(N4P)8S
zYp}OnXISUyN^16y<<-5n27hZ3EQ|{ak57$}A$Qlzz-A5Z9He;PhQ-ldr0|XBJTys;
zLjPzs7VTcNk0_dOs1e@ezcfd0M>T3N(bbsH;_I79^(6T<W5$=qe;-BTt3I{p24UZg
z(C9n8o+JKy80>6^X8iZyX{Xcu8vp$d5dZy;7Pfsa1=?{mIk1ItFGOcg7lS89vqk(Q
zS`0={j@~{$eDV@zRC{zlk<=+KOuAY?!!Q4wX1QpfO`t_TM2jJu#Rg-{_bm(_a6WF~
zDJF$ILA(W_iwE&w_?3h?9=27(+0_NSET)@~>k8og1p4^dvwmOT^1F3~#uFy<Wx<d%
zU;Oy?@Rf&1UeNAbHQFD2*7?{Sbn^BY8epfrtpL9%2khb>1-!kzT?VX*$JyuxY-y0+
zltS)oZ+Chb@YBnyt5V?Zjs{FkK+X7FJeWcM!Ko8=cUa_Q7NF-bzwS$&P1@RSf4kG^
z?ZmClHyX=<n+pp<rxYP*2j7OBZvo+{h7c-)+@E-=B8v8pd5Un#!;oslw=fPbW6ZiV
zTVMrfaIo8Gj2a($;05p@<%vPTcPs@Px?zJnY}XAN76NAGh8t=?h_%O5YNPfz6^J26
z>}rTv6XmB#GIdXqRv;)`mKR-XBnP#s5eleYDXp%A3OgNV%`RuNv9oGdFp}e1e8bum
zo2(a#!v)c`+SmpG6^LpDn%uiJvBZ${S-`PM=cPDQ+vw1oL^|e#-K~q?k@y><hoTzV
zWR}%AiB3{-1{Ld15^ELn9(dE$B>uR>H0}V<&<jxOA5Jf#^(tCiqlG>ZiW#6@#tR3v
zX*4ycbtEM%)|tC(Rk<tPM=rd4DLP)z*r2?p8<zJpD{l*Rst@=%;OO-AoBr$bpN_<G
znxv~}%+8mqVKQU!Vli8Q5VQgTYzk7$ExAsm?D7-q)G8g?ls&;QU%#Tk`Y5d&j?lI8
z8ol@kXVvj1mZGMW48&lx!T&F3a8hJHuQ01!g2}=F1}8R%Zb+0ZV)QJRaY4#PWUifY
z7dBZhj5_PuP*z#{kSiGh_OVvGgs1D4@KlxXg$Gw*SXI?2SOd0Y0%tn=OjHFlet94y
zRbjXamB1v^!c>ugC_wqAGiyequiG->SHgud#=8YVBRJjQLgpkS04fQMXCH`;lMn}}
zqyq~oEUL>-PC8P0tfU0p$gLY)lx|i@yzIlaP#|S?MYMo()FfUe121pMRoGI)21rQa
z7i>hmsx3H!ZxKSLa<@~50g2dY@4z3}=(7F0yhwxrc}_~!+l1K)EV#we2?&NRmz8KI
zLs>(x<Sw~I2))j?I|8RAe^=qOG^7mYvw5tWz~Mbf)v->@J&of7mL0kD>6yJ{dn|)D
zS5=M9o=(RC#Yr9Lq>QMh0<5Pq)stX$sJ$GNrA!U1)p1d4b<7%cTs6>GtE6_2qgH6y
zP$O1oTJ3elS7?RVF)A5s3sTn3-rsk0r3Ujrz=RnglDy?eO1bvLDm32j&hsF@*C_rS
z+fN?+K@r?CN`e}cb<n-qkO$S=1~vFB6dmYr7BFpCWpK8=lnnLzSF7njztgiXI&!;4
zzXGwy(Ny;t{cx{VYa<j&YOP&V0i&`oc$zH=e3mZ58=zQ9%UEsoYL{0;sw{F4C*vHm
z73HmuQ7lViv|G2dB943Sl_jP(3-c6dJ?I;#8=w?R>q*qGc4>K(>f+|J4;rJsv4>VO
zeK+g^51~dSlvH|kc__oK99Hqid9K4M#JXAt*6J!u?XnJftkb>%>5_xy;x*gptx2b{
z3C6Q)ud)Mm#99c}Cde>5*@*3S!l&I*^m4R16%6Lp(=u1ZIw-D6N?e2AZ=Zhi?RI5F
zD^QKScDYEdXHyEbQo84gGSpH!OZBRWsRM5<l4>sX)dpyVQnSISlh6h;j}TU$(38+k
zN;wkNA=YiW$eCYiR<bLOdKwvdj48v%B*?`!Mr}rE8`nbvnYl*Eh3+9Tx$db?Mr%!j
zP<2}>Qr4~^V(WBGMo#Y<PNq)REF)VR!`t$%sfSkDHHt)`Yluu?=Bz_XYf*z(ow=ik
zSv!cxt<ymnIlY588QDQOUC|=?lrxz<#CN20@&o_*r)e}v1_%7@yAc+DJRQa#S(^OQ
z8s&q&9(aKtaODt(PD(&KKGJH5kT1L(?zJr*l-CZzp*>{i`sQ#)3?8FbpwM77Bg5SQ
z^H>FKJfl#w4RMdxXh-9*=W_EW2gnEmVWIqp97zhs5s`UFdXh{VUKCPD4tkD0(z8)S
z6nF_n2op!dPTRV(GEPn;JMcAV3zbqL(MOYrq#@OkXqYj*<|Y&@rG&C+ifTtpGGz>*
z%<@Q#Bg5CFV8>8&hso=Y9T2)&(*dF@)*K*(v$X@xDHk}oqlrZG>&w{xRM>yTX&O&2
z<Hhxtv;XS!w!6K8{Z}{mYX9{Iu>U%m%|9&&*E?#!QK<7R`yq<Q?1xo6oyM2ZYRLAk
z;YIsez4-5Bx=1jM;em}n&|&`z=5x$W61*>Ddq3mTe@~+2C~c49c$l`ML3{Q7;16nm
zri{(<wr2GAEk)owC~<p1`~7p+(}br>Ho?iVU+lgJQ0DN0-+fL+_v{ww6z~R2L<JEG
zTr^IWpPsQpApMrTWN(|KOn!;o@KQ0LIT#FPE8Nt%n}K}Loeds87H_4?f##Y!vAgDC
z)qJ&^Gp*p~XpuD+*(vri)B|cAZz2%1B7Vfp5WWd#iXXiq$V(ICrRR#PG4X+16X2Wd
z$1J3xfR+uin0)|x;V%8yqi(`RfG61idgF~{!;JwJU|Iohu`vSwze0+5>A@GDUhabg
zp~i{cbiBJ8GA0O*EW`i)toQbZQ4Din!9>7Hk(f}+F2)w|a<!PI+%8}dr>pVObf`Wo
z={2Uhtxl&6QgXM0e`Ekn?{E#|P(bJ98>FZDQY2mS5x(#??<)?NIAAgYKYB%wkb{I8
zNY05*vGF4n`(wkHa1qpPLBa&B#?T{Ewo2n6TTSW4hQ|@vsz2`E>Udg%m_B+%(5yKO
zDYqutkL#k1W>Cw~Y&?t?Bzh(T%r29zONKlZhbkd**0!h5PhsI24n<l~Y}*_x4s|CN
zIiWUNZHq!Jc{ybed*U7+Gn;;teg%GzxkD73hpKQx2X40nmpdQ%JqK#fMCHC0@^Dx5
zj(`_<gySbqB0!#`=v-<gzg?qNcPLGpJ|d?T-vITrh+74pnDPnq?h4B6d`XG@H<kah
zaeapaK)L)Mbhmq*y!;=A;aB<p4=Mk*NQVC|p3agb`(GGE9%Oj@r+rE9+QENcgN!@I
z&HF!2i2w1-D!c1@GZ)*lc`4#UWgw5(#FX8wq8-<t(cj!?ELuVfp-DF*V}6YDyrQ5*
z;_(6gOM1s9ZwDTn8xv}#)OcA!cx<8c85bE}YAXmvV{ATA$~z1xk7>h2Z23&92SO?T
zfImb`ozlvP*cAMqT^qC|(BWAejT!z`L^Dr2rd$fV9xl;B1}b@xOkgO~jJ2gHDu?l4
z5u-<JANQNRQu@Ft2+vf?3*2~6WT{nL1=r<;vh$l&w219-yK)7I9oQs!1YMUS`SLr=
zEU+F)3{6H7RjSvyUZo2xR95=JWNs@TeprORKoJ6E39r0~OL=Kf-TKSP+~eU`Nq>to
z0ncP<qoD^^0VtRcj}RK>1hmFke1#brj8w#z@sa|iWJ4Mj<N1i*Sw;?v<vXrht`5_M
z%uysG%CwHk^4owXT?gU)eXPxNPto>?EuV8DQna_D`F#9I13#T!jN(@q?I^<|G^np#
zQw~JXD}x885<g`=Xk_zXKWvmnv;_jNZjT8Vy>Jw@K&MvP62(^dV)Zf)i<5wl6Iv%l
zwomnE66zlbwXzn_Xt=mHSiV)#c$sA<nc#VPj7c~t87Qyy1RG!yWZ%(QX^qh^xsO(x
zWUV&YXfyx;p7kW~J3g;GpM@D+V_cNcQkL$M=o$#qGz9|k^#W$6dR>0)W`Jgmc7HvZ
zU*p-N%Lk8`>64!JNWIm?y;>Ur`K;{|jA%RQX0YoHhpv4%oS_I=e>`-ve(P;C9=a+E
zPT;F1(ANo7#G2!wYab8Y`s1N%84og5iY6AaG`?BIQxKSin+-;j<5FzI(ou4?+{*4;
z@v-zKmB}qizM}RBePNW!$y^G7j{a$Js$!03A%Tp=i4|l5HoBb9iXu9Ih`^-0%yvdX
z%PRWg*<PN=0lQgvK&X*lp+YxR%vZLMN~kjtYBDc*u+f0SjrbR@0ZZQW=hZc4Fv>G_
zE;>LH@BAXk12%%5@+KIPkB8U9XKb)n6u{gY4caIqV#Z^5d5)ZO{ta_r#NiRoJ#OEu
zu<}rZ%Yia7xeW$Wc~5AYpKQCEEKBbJn2|^Umc$?S&hd70IsWu&_94c2CFZJA7~jYi
zrXt$LWwx=aMl-Dhc%C2t={82I1S;3zrDOKnh@onB6)hDxI59?SNXJ3m=|0YqsVbB=
zN@0aWT+Ae)G2n#33+kT*V1Asqk)ol(QMUo*7_<gUoZNz3T;g}|ekb?V*g$-y)_ZGL
zVVQdFC&mosD-gAI@R`|E6_{GhbWD0(j*8=9$2Y_z<5S(bDmq4I)u5nNTF@}ZNDUHN
z?eipf|GQ~Gmp2u(*$l>~8rWvP8=u^LZ&EM@fJwnU2uu>jNH9sr(NJ?!<f^IsuBaUY
z;pj#CN0^^jwuSzpbfx!MCdwSJ1ugqaZQWmNu<+}-2`ZP-g>@ZWY`%~#>aC>a6^JGX
z-YKe_p#u}gbQQD-uR2s-qTEG^4N(S->N@edWd(NqZ6{){q`)3stl06kZFuj7Yh82r
zYu$<1`Pi}YF}V888F^}D<hSSJo3fD*&<&kR3iYpl2ULFllT7(_+cNGCpuTbX{?i8c
zKo$01+npe1{}pzgc6Yws|NH^(e}Zki|Jhm32<(q@1q4+7#KS8KRr2%GlZ!0Ga`8JZ
zMp}qv*3mAJQs}ER?x(ZyEncn^Z<CIpF));1%o7@%6JUCv!L6q0YCfMWpoUL!I11Ww
zA9bN+kU5qylW}*qtQY-@oFVu*u`QO(3pi2&1^u1Wz8^_J@rOk;M01M0Je1Gqgz2+m
zRqT-{6$s0Zhp(TXE?}hwQDhH_!vBdsgX=!@(Xx+085aF$GQ7`XPcR`?G`19N7CYtf
zqU#j<c0P9(D?V1&c7=fJ)V5Bs)wSitu3Ot*H|^o9*zrtOwwRlPBGmNdM4At2GGQi&
z!qBj0T&Rf-mEH_pb=6(a?@&5#I++)tbeg*in%v8R6q!f@a~d_3;9Jdsn44U;7CYk^
zoNP2A80P?&Elomvb9e@o+Y&kB7V-5WPI*`~*at6H*DMIOJKD+Sgz~wwlf95~+n!{(
z1Vzk6DcrM5kDb6|l5*!Ip8QUyvAhIV2*w&Jll4wa<VhPNDlKkfPayH5=(_}OBc~ws
zz%l+r=CS9@vfMfB1ldg(kJ0~&x@elTlPOvo0z9-gQ|apES9L|nzdvCi!GxtS-5M}G
z9p-a&iyU%Dv@M<BS=Kzn>*&P=sf2EJ<tBh=9b&)AN4_TJfh<dy<nw^>P22-k%`CiP
zbDzVReKsAEp&1^Wp#!dFT!}0BTnz>=3a!|sz@M{V5&zHEKmT!ms?L8P`3>U!P`7kk
z`vh2V{(IWX+y8|@xc$}s?+<zY`;R^P)v*41_4c6fPVN;4c3VNC`$#`4rYtk2EbzXV
zt*%E5cGEo&k|Jz8aqz=HOtKUZso(l!(f;F^Ng-|94ua3)0SW7FOGUwsN0x;t-8*6A
zUW^Xwb92M<H_zpp8g`3LQ@^!s_RdD>8WwaGV=|->r^+JDFlsC<et%!sx3T|n-noZ1
zmE@DK!JhGg%5Bez#+0RX(HZi1C;b;z<e8g0x+83$tYKrQXzrFbVsiNVnAiNGi`XTm
zh33DDwl1O%*heojc}lK47*?5_&Anqp)r5?6*j~=&*m9mU0LZ;DF8*@gB#EI)lh_u%
zx|owYh+AEi9a^t+2TRHM{u~DUCA<~W31=R8A?ZoYmm^qVaG3%@a0vn`u3Cp|1T*<E
zj;Cxg0~szM51;HJ=j8MvOVsCM@h+m~Y5@z%V79<}qGy@a%>F*rT(zks7hneiWPgO}
z0J$~^pWY>!s$4Ramt16ncazB7bUJAkAXOoS3R2t971$xWT*T3PZm>L9K^37PfiPC{
zFrT=pvff7XZeDkj%onIXEN7VBQn}|H1`Rfx#eApZjRT{Q+5m@Q2gzV*8obRT?y%z*
zzF0@xzuypoMl{qN7AaULJoqa8_T=sPi|6@$L}(OxepmX*_PV5q+BlJx;AHRBtBXC{
z%BA7}j*xP@f(Htz?o(WKcRnpUsh&=%S0rUmTcv0{zZ+jAwAN>=s}XwxQf(5A-z2xM
z-c~ZmMG<#qs|uoaPBo(TJXMnF_p9+e@L5H1117gMlBWG#38P%lE90~dS(r9DyImV$
zd-^I#83Yn&f_r4UKJm?%e_HIod*pr%>YK6ujnO<@?RC0Xj%Fk+nU0fps>x;r)v46K
z$Nl}6*8eQy>v++}jV;~S1GLKjGk98v|FXTa{Z;?-N7VoPr@DR`xB4fiDt$6VB(NtF
zTTM}x%nP`RR6+8MKuT_?j0;g?e;>u%-6Ek(NVgEvB=|hR2ZCT757jiep2owzzJT;8
zgu?y!tFEIX!Xv|UHDSNNu<3KYK<@VVXUxIG4g@SlXk-7yI_wz>*k{dhUPFY_eWR%F
zsprI6--)Y4J7UN9XS|r@kjUgd?+iAaiL=Eds4Ro_K=xq-quxXCeNoJ>zcTTdrM;rt
zWVs0N=n1Puthp!hs4S}ZG_X0G2<KeSC_1IGI`)9FEw42R8m3h<ZxQ>UwKWMJPlCs2
zcUub{$pdtwnFc0CD-D|P3I7iH{dkR8@h>tR#HhA7MIWcfoJDGkJ5L+DVzw1>PUPrH
zQW0;|_3N#6!QO0kqJ^_^Y+uCVBu;Vvn^E2ZK0DeYt{ul{NAaJ90%bKAvB_)^b7jki
zm^O#uOi-K&wfxd&BQiZdruZTWnxjjE53pshQFO~y_|VR>v@g46bnjz9`OUn~3$85C
zgwth0Npgw~Eyv=c?$VO0BLAcABx*aw=$_J^jWBryI~}NVu%exHaMjLVaLHCgD}6~N
zxm&`&mE^=@=m^f6Yvo(Mdft}l3k%R;vjFd)j!dERcB>}BgcmTW$Ie8uGpR|Pvoi@=
zlfcnD<az;g1x0<5_w3FXJq<y)*P=gYgL_Nz-}VsH6xyRdh|=X=VY4{^P8tH+XH}qq
z1if=fishUD0YmBurbwC1(F~QxUlN<iB-r0KIso20*r)NVC1fF}u#tcH08xd<xpHpQ
z35OPpu<^T#Hz<4r7U{+|7&DBpWkE8ksKXaT8?i73t|B~#NW@H$NH`clTjP(%(hIl1
zK-X3}O}obyI<58Tge_4KolbLK^jU2>vR?X#;2uv*WS?(a=epXbnzWL*77?)#H5o44
zkFmRarry!3>?yR9P?))U4UR~&&h~+DtY6&4G;=5oRXeIdo*bWjIU`a{w{%BeFpFF*
z<dM}>h^_zvaw3X~E|05vbkIK3XD~*T>z38Y%uR_Cfb&PGlbZYd>?C7#T5}T^j8IMs
zRj?hr)n^#G&gK7B$&zTHQo>dTxf;#Ri9)k?%RKFB6H)<&G9Ejas>Q|Rb{Xw-G+Qh+
znlLh+V%9^aV_$VkmQl3&=+&sXAlfS(x=*Wyu9yG;P9=9=Jgqr`t<<bOcQvP`AgEey
z8@>D3cIJ}jFz5c(vJ+(ccS;*~f0*3%2Q!dv&_4XxGm5tCvu~`Mq#IdZlJZoND++l#
zyYjFejRWxjkd20M<6S}@F(q9%qt~j1X*XPh_l*nrYH$2+b^n7tVDVjo|JSkq&HH}_
zJKgQ4U+;hZi19zaxnul)An-MIsMC75o(`Jkv|I7KO3_%1ysM(ibazcv^m^ezkHrG>
zTZIPQIFerOqo#YT(mGNw`URX|Vy;<xbIdVLui*K87GaJ5B~Pk6YE@fymMicbMZ8l$
z%%N?PN_#l7<Ka-jo7hdM&2MkJQf|p=`v!0Pd}?P=#~`?BZuv1XgDYUbl!75Th-uxy
zu?53FUPNP<R4_w7z&eI@AQ)ejr7<jM(DdU^ln7xG{R}8PIl-7e`Fy0JD&EFS*1Mj0
zdd|3N=<00G;9Nf-3tvL^fM2-|y}Ssz-C&O%xj*kTzGz@uyMenIX_nevScQt2dpI3F
zkC%Ui6;2qOqGV2R-zST7`3xGHuQ<!(ax5dnz;`nJ&pzY=mk#44*K&rI7?dT&QyjG+
zhRe-JH;fA@%Uj22ZMw89L?1YTe__u^rjbb4QHNiX)B9qQ+WVYb1fjth47&HUs|f9s
zhkH^+)o1a?K|Ej5^a(x&9BdGiVXl6=(XF@{Fd|1{JjibIwwsI2)XSOlPr6k-*emTp
z#_fm$a-C4a>KS{WJyphJ#H4CaMhloRJJHt$p(ho_U3VY$?#!mHOe8r0#1h#b%E)=0
zyS2x`ngFMB^6xodlWpNRyq&j%r+3TQ9HX)x%@*i_ngs?`;T7eswXFpP)N%Ii5H9&Z
zUvM3nbeQ+0if$CQS?wUJcVRMZardi4=BDF^`ttkE{Sa6FQkcE{US>BB#bpIiQj*JL
z2&;nLE@sLTe(RxaO?GQ~tJop$OoGZD!278%GojaYy#kR0Ov*Gx8s0mpd^yRI_)9je
ztd0~8OQL<5pxvudt)mXws}Gum0rQ{FrprS(YfW|b`BjqbCW!(RY<K)_*lFxChrUuT
z>fM!>`e{a*pw3@h^uK$1A>0@GKOLSOAAa}hg_kz!6d!K;L8s%08I~)3Fx;^CqH@w`
z>tTrk(mp~Qjs5vQR3BBL|4I9pv8zwv{l1X>Zzl{oPxJP_y{Eme`ky~!{I9S6-<gRo
z*R*7-t1;bnjSW=);#qSuiuFz2e8_)6o_USu+yh3xxMu~A!K5Bh5fXG{YGrhj&^{ip
zMSRr<2%(z5&s=w80MV<y394DoX3TRVmeTK>N(qc!m|q`Kr|T|W#eVPjXr@9MD4%CH
zz)ZdQ@*WiMdE9$nB}7mK)@ZsuA`2RI$_W~N9(}FikVC;}d8Y^vKac(Ilj`j2pSAaY
zQm!X=y#A?-|Je<L{QXZ5evSY6_5R16K>F+bkM;iN{HNy?7eHFnPHMbFsEoOs!RNZ|
zTFAKMfE{pc_k;~Db-3#Ro$Cs^kgE79{H~^;$f=5gf~P>6NxR-wWY-iNi1y>kQNqbL
z8;iVhOKo0FoesMN4)qrbjpv==scLz{={zw^N^;x&NEW>G^OFvAHbb-A+0>{92z9fi
zHvk0A2XATK9rN;3Fd`Msu7+{LAC&bgV>fHi5QebKyFo+PxikRM+7jcxL^32I#g)&J
zM9qD6g9Mi@rPL+KNpnor%$h6`PwcTu!GmcBytJV{-ZaWd-2kapT0(13p!IppHpUTh
z*ETk2Y~02?^HPmQM)5FfWd4L%fYf7Iv-0o0wMzskdmC$nb1Fx%oZZQ;)Dw&1o>T(*
zj3-3)N&;9ID7~T2M3hIbB&39iGpH=bj$_H(%v2aty0yks@@{>e&(;Z#|0L8d#RvCD
z)e+9ZsPDV1iIjeG;SU-mK_*Q6fz~mZj&f}hwivX`O%d-2%>DtP039L5pYYOpw!o;Y
z%4ZCu36OIVZa*=b0?dc#6MiML3MMnD@Q6+6DhBW=8^jBeDpt_o+*EHKEk~(6f<>-E
zg+?8ewyK?ieH6L~R%?S7byUWK|7a!d7e4=`x5GOf0W0D^1Uucl{RYy8hY==l%x
zKlHvH0yjDY0vVt>h!#V1)f>Er)*jCWd<Te95zTKlW0#-i81XP=69ULZpE$obJo-x?
z7V^`h!;9n7lm4sY*T)x_TEYN3dwX(m{Q3n3t9VVowa=iE4m|arzxe*}?W+s#bwi$6
zetN$5`oMts{`9Q>`WVAvoW0Hg`j*&AZ;W8r2YN<~+hHGieKO<fbrl5<T7lh0n51w?
zy{if+x0lq<#X#C}G>&pdV-eDHo($e|QAgxa=TfL{FC(Sx<jTvS`MoFB0>;R=&tG~I
zF1{Jj`#pjpK^H35=LRY?5c)H2&#|YqqNV|`MSIFMDD2h~pR>(ypU$ist^k<?k$YpO
zx_cRC#|X@HKV_Gf%KsG0h7OY&jQxG7vS<{9qs`emnK`*}FjX)1gc`?v5FV4OPkj*-
z)oBM@0JSnq!=<tNn`(qMRqdE_Fscl1z~F?b+4E8F!xmI5eP22pWi~pSPd}Rf0C%(4
zbsO|#Kq^dUgwu(BVkvKb6%VlYO@9(2bbV5Y{Ut~ti=5Qw%JG|9hx?OwIhqZ{(O$$T
zIZysssTbt2NlJ&QW`kO4L7RQ_sb~=mIvW4PL-_6%mwT(7xWlfy%`1Bjn@*#Rm&BA?
z7k#MkE>bV(*)yIWL?@Ip{W6GglhC*8BK+j?`PYGMqmTEqh`KsKL%O^laJIi%QpjYA
zj^e^)SY{Q|z4R(kh4oMo^=T#QyDPdd5AFEhRO1hZ-cx^Dp%+r;1T>d$=K%2ABpUa_
zkD;fCibW+H3=b^j2Vp&Ij=ZUU%;+1XB6I4jdKPeuJX)!5$-srZ9H!f3PQ)D>V$L`s
z%v2J^PuR1vV{53*0-tp|wNzmovcBG<j@rXvDHn7st$-0VRUXk2z0Ok`?o~Za+c!4c
z!O*P|^Kd0-u!Zq75G{{KH`IiohHHBDWD4gc<v~z1_+vEc0k98%1z`nhphAmYxq|?7
zyb3ZyBSNX>u?);!uC9abx7*VBiFTgW61SdPy3t8Vr#FR#(Nc2tS|`Wqj<j;M)}TI}
zz~8wSI&$s+SBraLOL^sIBjxkf*1E&gJ_~Jm6?#6ir(19-uh$|{W0SDYlslK*y^jiX
z8`)wgvq1A%c!~a2TpHVQUY=wUo3?4WGqgF7vW;acTNdVv8H|?+Loa!L1B-!qc+{Z$
z;$T@0gw2~ZDZ-qz@=Cda%Mxjecn0yM(5w^+`j#L2GVFK{Q4f^wS|QE{Kx2H)1*-I6
zd;6fn{|4IZa{Vt?>Zy1~8)|{8;{QI~+0N(x4SHYW|NbHMzdNlpeYlGz#J&Gm6Jj1<
zjq4b%`<T`v9`5F~yRgC>p7p;wJ~=%5pZ@8a{+m~Ce>gsYBeKL`zwR^dLBZ;H@6{+3
zN7^&_D~sHL`>(}<xrW|}4~%oUhOFavzMe1n(T60B-%epnp3X5)Cp~|cWG~L+@m206
zlYZx4D%$F1`i4Jj0MoT&l-@C_>8=r?uQDc|Fp@$4&@^c5>CT{W3*?|`8d=ZPvVf{M
zHd?HCY)Y8=L6DWBUj49w8cMymf|9A)k(N~nYeXW<LebR=MOehF8Q8fpilK_=dDk$8
zm4xcXF{~n#RV}&l9+XCb&OAf4BA>`2>lZo+pQM42pDMbBJwyawkkLLt>P71olEP`5
zXx*%$adMS#G2GRS4N@uitmR=q57aI=2MY{J39WzuEv!I$Y(aZmg~o@iK!b;B?qDny
zifM{=tJ76>s!uFbd2jHZSb;>teO|EjL2QZ8JDn>ml7ET=5QWT`%rRP|kd-6=toE6!
zvwfw}JZiFN06xtO=UUJn;7<L92iEi)G<YQ09HdKA9k3QF;f20Bxv^nkm`oY+lG^h#
z-8sLG-p3$V%KcQBLIDgO3aKUOM^k;lMunp+OoA~;lJjLU!B(v!TOpyoAmcqp*|dHZ
zN}p*6b+Sa>B|GPOmn~*65UU4r_FZh<>xmhn6eSXi1f=%BtWDhYgdC7I%)Q7{Bpy9t
z4@JjLM1)P=F$W}N6+8IhT3q1jKoLI_BTg>z#fy=5KU_Q^E}pE<#gi&7YWD}OC?eaY
z)-T%I^8R_eM*q}r7g_dWY9IBqemgzBw{~h-+o@HzomyYComxgawbpN^R#iJ0w~RSW
zmbh!wct(2{OVQl?h4g>y|E{Lf7?!hPOD;#PVLTXPfGODh{x6*T7gIQtw(k(tB7aym
z?!C&x7l$tF@Tw%tMIMEFo^*#LF)Zw*xI-~)yHL;?cn#a|5_P-_-oeq1lcOuP4oB<p
z({P8XaB!3S4%~d>?yceP(_7ybIXc8u<K6GlTX;d}U}x|f@nc~MmJVGOR9+$SCZR3{
z)Iq-%sYX*(H_<S%kSo-3Pmg0XF@@zP`#gRyiOEmI@8OJAd3i%3QTeh9jmLs^=Si>q
zZ7*(h!;XUU+2v}<Cfb*DG+T{_NKEs7>J1s{C??%q{h$kfsqdK|UNeYWVMh;k6@pv?
z1rUALkvQZhlpj~KWfD)}h!Qq@jkspO!^H8riefiUvFoNN|FQ>OuDJh62iJG?{;rPy
z(&^;xe}e6uu>1A?=Z_fwC9quHwDJ1pzpoZ?{6~)Zl0ws_ai1*V7B=Jg<0WdrlH2$d
z-(CnXwfb~li1R-eZDEr3FN8$<Dvr`6Pi2)J2?w$Gheb5Rbz<+Fco0G*5IJCk3`b&$
zh1wq`(e>`SRD34Q8S|E`Ch_B<89UeY^<~pgfz(1EMteop&>nsF4m6G%q>Qv#+~vrO
zu>tObpK3xGQ|5sMfNU0#fv0u)p7Jd%8dHP@pu<t|YxI_{{(oir|LX3$f7SjUh6VeN
zV7uG>>i_qL_5b@C!=)^SKR1KGZ63A|s4%SDRVkOZs?_UTch(;VeGDXGHNl`G*&>GX
z>=dmE@OW`}38x4&Fu(_NL|9G8A>iZDEWWz(YI`1yS5WM<pM^g5?m5G)^CDqA1Cm#7
zZFvazjRF5V+^rGLcYnX&3?%MjT=)B-^}uh9&|BwbM<V7JEnMhW7!~(G>odwx!;&x%
zwW}xz)wz>WH_E?TydcZa%v#_*{2Pz^Fs*K*aUacP_k;)7tGA{P9S&lgdvt;I7eX`B
z-<Fh;Orq;}?Lv+b%U*^M?Mtrsd}hfPPz;@|ndTgAUX<nPvKT{KE9y)yDx1p~72iFy
z1D|;$Bu|oQL%%9)WjEy)Bg`!U%_cVtN`@<<><i@**+!+bhenvD;u;N#5UzpbR+Fi?
zIM1#cJi8Mj72duQJ-NaF=vZ_abVFE!@htUKIp<ANDQ*yQq=Sc^w(?cxF@e8)W5JZE
zYVCt@ywY1WoyUVDLN9I1C)R0q0(Ke063sr$7VlH`As%D42%cS{4f7+O<0Ev{P}-pV
zc_Ct?aK}Sj+p&j`EnW<Ci1ei6#iVrc9w%rE$2ts?A+$N@`HUq?VI4OZlUTDPCvxs#
z4`kzcnv7>t+3BzYeqzxSlZ=dGZd`|?KzvT)<;4h2(dg{yY4&7TMSf^Q8C&>D$`;c)
zVU^3Du!12B7<|mVdW$(mRK$C9aOa+aN&I2&?R*}~%W-W+urW|Lb0@7)*&W7tn}_`L
z5JS#Y{y8QwSdKrvntdQXXoBXMuT_NI=nV)#1t-F(`i)FO(`4p{AMx^Y++1-g`IQwg
z_!`YQ<G+QSrMW%=B%k^O$fu$^y)&#P07dE$e&-kuIVtnfm9I%e5r3K=Y5!|w-vr<g
zR=X*h+niqgMX+mxiAC<x<0;C;$!%=^m<eLVuji|U6<1_^73~G9QTlo{7@eadM7+2d
zMbn(=jVBt+Si-WJ;4i144kZeThCj~|Ov^!!M9Y^yWV#jTjU$Vt2Q5I^vL&)@H+kCU
zx>(ZJMNRqv96fxr5gm-!a28|aCiFa^JSQ;!FX0F<pO2Gxi02VF8+@EEcup8Cpm3-H
z3WqXw03T7pL$=#++VUCMUN~!nB+e!>?aEMA*ku8uwTnpiprveS@_Ue)1b}Zf{kdkd
zkGYrn88dri3vl4^D;7uPLchpGV+}S440+MgGl}*L*DKQAwldOIe5Y1c<N4m(7;P77
zHL=dZCJn6c>osq#xZ0=1!bx%Sb6iQntP+{io<)nxk!g)c1LBxSu*NBiNp!auYCk6x
zB)OEL4&dLEohJ9()t`4ug|M1piV8gGvaX{7Re43ZCd+tl-G%P5jdtp7joJop2+F(M
zb0TK)>*lgN?_I}+UbZurZoQ?uwl0mB$Auz_Yr30#M+??kTT*Ut%6vaURw~b7NK1vp
zP^++}(pli05|HwY++6HQUaN)ThwZJT%}+3-`pQ%9<GR9o4Ie$Z|Nlf@%9_}pE}^Z_
z!A0G3VOY{jI6%PagnI>?N+Na@Q9NH5HbF-m6~)G~SS?3wP5x7bABbu`L6k?2G`9A{
zmnNh93^jKDHFjm}!?3J>%`GxqXB7QDn{xf_vbyft{<TTX@3vd=TA#(&uq@-6#v1@t
z?cc4gS+^7BW?ioiYqnFAoaW6*ui+c>9^b67ZR6bm#u}L?w;S!-Z#X_}Qzbk2I$GlU
z+U?!jb?+WFXXyM~#y!0$j2(MEUX>p^=Or8FdON`Up0<GbzttWvue}LWE&0FEYCr!M
zTJGn+{dzzDS6T4qzuo!B44zU5F1eh{D-^Ge$X|wTu2BMID&A^B{#Xtvh3_&LF6cu2
zN}+h$S3#MXUrPXmAYVqxa_%Urg>eWd1vE|s6(luTzYJ1k0NtwCXPm=iUT>Vt>kpK9
zy<VHw?zXvEQ`53)b+@bl#rteCM$N7ZX>9~E`eqKt>H*@pc$2Y}SkGrW-15uiMdCS1
zc95{Ul{naV=d5wp-b7UhHb&pMo2Xa9n$uRk=U}Y@K@*2-VpuCPbXI1&q%s3%yKOsa
zQ-xt=Zs2a}oigs64YA|mt^&i-*i|U@3b~sb<Bd6|7x&OofzWnxnK$Z0tSY*_SXO5q
zw#w-!li%`1lq*JQf^9hx%I*KSozj=J|9ct)JNf)?+u_r%@t?lNe>%926mow=1H!#Q
zx*S3W9O$p&#bP=$-Ym&5CVP?0fTcK^=<lwIuaNKQwehA1#5Ldg8$7Zk(<@+MOIUSe
zY90YZZF_$4ijCrEh{_-yGiZi)r$cOl{$&h<DejZX6w@7GXe+K~1<Xk_Sj<v1`+&6-
zJ;Wx_M>uOSDhwD*DH9GqsZY=x1ByZ_cpi*FMCXEN%EJ|5AE9+g<~<y7<<umZO5!a2
zG`YlU_yN=;8OB;i##E21)pWpVqUA!y7`J24xYH|*m&N2HAm<L1ignA^H<>;`E|4N+
z_<`XjoQ%<21>*)%<S)@!ym{o*J6}RAefHM`4y+$iPB|_#9-7fW55d>=2pd4of3ls4
z$(jR+Nh;~56czxk4!ep5XhTDkpsi<QM<W3b-yCC^aEKjZ^eOn3YCJ-JV;!PfnCKX7
z1RH6FF&A$!wqn*C@yB^Y2djK*EN5HNd5~5r9-M@B<g9}vFY)v?S<I$50LY3*b{az$
zZyb_HOIn^3I#3^@9~Q}S8Bci;7z|~K6-&^PXtv^EwWv>J0eFI!{c#8->!V<m#zS`b
zN$@XjoGxQIRfdeLh!`xDg9t;k@Vywu0gY%0#ODjy9&tjPJk{Y+U@mjx56zygSJ7g~
zS0;+YgE2i<<7F}*3l6Z=Qy*rDhRP>S8pjNlQa1ZAReeI;WlmTah}qgOPA17R)mIDH
zC}z-_BV$pK&i&e}AqL#kU%;dJV!$Q11EaW<p08NHKY+7fivO{Leeyu-bYE$pT%UVj
zI;*}tf6>2qdG_M)`MJJSuF%`}K>AUNe`v9rX{I7L#Q4a%TFfSDd=6<4xrZ6_3X+1=
zXc?p47RQD$#AG*_GsBl_me+W~3GpdLJqsPp=RF@yR3r`+So>NTZ4BWfI}#b^-OR@h
zZGR$?@q)n?Gh4J(#6c|e(EqN%=IG`W_XY3$PcS_C@Uf5cq^}B!(+m?hbz?>!-Iz%V
zM}O~~9g{wm;llpp#>J4&CAc)OV>degZlS<O*6aS8<E=l@nk{~dVI&H`cak!+e4mfR
zH^+VKb(ntr*;SwL6vp?bXWtz^e-2Q`@Yluh_s1{Jn1?b(KOvjGt0YFo`mjV?CPR4D
zU(Wgz%c4IRM=7w^U~7TnVEC?sd#oH*uzkEaJwJXYR|rzhz$P?OTmrm}QJObzp~bjS
zCk;H@6KJ~p8FmOLQOcOGG||}!nsaf>*E1j<!Ft8Ww2I<J!Z6<A@d|mgj^%>X6|@Xp
zO|!Xua@v3X;=8v$^v^M^QUA@^>GQWo7ya*EogV$=JQqx}3^1Gh>1YKN7~*<(e020@
zv<sU=AXxlq``hj_o_GlNrHAj{y?aRFVHz*VH+>po9zWa&RJo<$-eKs8^{Mb-OAa$4
z@Ztnt_b*<&esg+;aX>H54v+qFeDZ^eAv&IsR0cd;_EDstK|3dLx?AzR4~JjA+_HVv
z3gC2`Oi}XZs%iKrVL$cBRu~p2Pmv;hIqB4((L?~sh+QKbTF?&o0tsysu}gUQevZm<
z_F)9e=P<rpU0>6MA&3mH3RA8_b+`C{+uL9P5BA}7Y%=}$v9QN+)#U(CgfiU(*>hoV
zZGC>QoWQnmegQ51!;AhA{F}vIwYLnvKwugrq0D^o)8VVPJUQ3V>D!YF@7<d-{{-6Z
z?{w-nx#HSO{srLV&q944>>~w{pAlAawztO~=1J|dhxk7C8X^BaeF&`nU5iNIG4D@M
zU;}TkOoaXY6V~92@JJBZWGP<%;&KwLma_>CJW7QLD*(De^F45YQl8;QJx-Q9vC=3W
z&$%zP9Q`LWW92d{pR&?VxwMN?;n53U02CRJtJ@y$?e^#ZGyi1JbiU&-+43R^6Fpfd
z`Vu9?ltkz2Sec-s_<sm;*H7_)<P|~H>-P^|T^#@P;;7$muwQ@G-v4y|4&LR)-s?l0
z-TmjMN7>3`E_<{teE;_32)^Sko`22P*h?iaWMou@D3e?N?8TcGhZij9Y;9#uYP4$e
ztWkXZpi#!ph}`#B94gs0?>fp~CoWjWundlI=yllM{sD_FXRy3LTX2I1nD06m2yp>{
z$%7EEkfL3-6s~A!=dcA+xSWXcMVX93Y#5)#1wi(C1#;Fk-|O%@h`&Sas|dzCic<Kp
z6xhD@hQn)foYrD<8ut6&AH#axr-{aAV88$B_(Z%SSm=&1jw6!{x2S9AYf=Br8ypX~
z6rI7K2)*+L`_s7_0KZ$YiVGYc0mrA9vhOtvg5y(Eq(n=v=d+FvjI)P6trYOL@gLux
zn5TyE(VI7KPyTv*@;o=XU%Wed@dg*Z^9HRe53)~3Z=WCb56_NX;_zq8<SoN<jDOhn
z`P~pLsxM!N?U1qjAeL@uqJO!u0QE9IqxBb20^(VuZG<A;nbh>YO@#?KmqTWZFxc_~
zitU4#XMZD?5=OZ}@Cc4Z<tjyRBo1u~TCO=tRRO~J^=R2vx@Fu5QLQkF=5sus;+`47
zUP`tETA{dKE|P0lfVk`yLDNeebX%`BRd_n<V_fJ$Me}el=wZs@?uY|A+a>#AvsU5N
z1SXcpNarj86^jHRGo0H&M2?tWJ`1S-&jE&;9d+rmXO4<6PU5q4Lxh$*zA{8VK4i{y
ziI<cyS9mby)I|4W%}P^J_#DCb6eoMqB3#BXKd_NTgpGx9De^o(MPWvgx@x|_-2&vQ
z)slNAajFgmae%~A6g6qzloB#!k>-fFj!`t-=@Ju8Pf-xTG@;zN0nd0m+Ut^gH{uBr
zwL7GIMb!+hfD~pGY6V9I7GGV#3NI9HA9yuUOU4VEO)xnKwEHK1ZcMQb#xdKd0v{a3
z*_aK7?3*;l5+8n1&VTx-oUhW6b?7oChfdXIWsEHBElz-=%LPaW@}h=4=CTmVxLLUO
z0N`kO^B$N(v%wk$^e^fcVOCAFh6c{eEr$ImlDmts+jxcD=3#?Gy^NA;649K_RgX!z
z;^o=tU;FPof$N&0jjWu{tj-?^jUL04jZqYk8R%)<-pC>SvQ`nS*!^uHehH$Br;QV@
zxTVS_Za{NG3)=vAg9AW=Jp-seMGdC62A>YbbkX)Wef(JsAe`f{PR)N|_xq=MD2es>
zu5^nXM(&$lWA|=!t)nY912&;sW8<vTG8x?^EVjRVfqr8w+GSsau6Z0iohskz?2yRY
zBrWWu*`E81RxE=q9<00-itJIc-(kD(&t7qOaQ{8{`uK5_H0to1=a_MEy+HbJQSy)M
z++0mp9I3^?aEO|)WBiFlm7_VM`xzv~<zU3h*Wt$6D`Vx%5W`l~ijqn1*5dbzt~+%F
z)%#i>)?C*9ZipxR90Q_MVf9BjDjvhP-0A3uv;}qvQ63fW_U3YR&A$1zO&61RecdN{
ziEaqcrg^p?jjFiQiU=Ie3fF@H9K2bt-7OV=OhKp6K&reF5oq0gXgyJdtt(R*<DOS{
zK8_Nt(z{Lx0>zZ|=GCU+Q>DC6EiwG^h4=9L*#GWhue*<Z?LOA-`&jof8g*9sN7%{c
zqWt-O*2%hSm-fwm3Y2Oa+aTSl9>ag%%JyhDH`5k;6ixrUv}2Iu4SzNGoxY|j5s6TS
zxPMEXk_e^yC&$uT&`RMw^3mJ#i__Oy1*Ce4)&|uIPW@Enq(#+Yu1Oula{9dElZzK;
zCx@?i5Bz{2`G90Uo*huA<@jcmpl$=DDt8bV^EOD>SEBKaM;natne>HLZ7_-!tcm(q
zJldOYxs?>kZ%EPnrU>;>$4X}~Zqbbg!UyVW7S;-uc`>qe_3uHav1JqK{(5+J0*wKu
zvfZuXSF+<InNKp#<v^SoT&YGM>1KmI@Dnz*3!SbXib8e8t`H@KFlniu0c~UJ92M#$
z2a6zD*j(6rnYz6%oY}b6qd$n!rAP*Av<<-u$yk_~DogqO9JN-k%aVir&Ms@U5_@Yh
z*5b9Yl=3%CMz+*Ry|(w?{=Vu!x-;;UfotwQ4W|hzc(U;r`Nl;=n~wC2_5fQO<9W#Q
zyRS4OzU2@fH5!GhyB}U4@ykMicyV!gKcy0>Mk8PHD_6;T_gC^BFS$_b<MaNDAKxCn
z5*)}K1`ABmevE0n=#x+TMv-^GKRLaqOM_$rD-q#eWl-Raahx8`=Zo1#{>{aJ4gY~)
zp$h-?+oJ$qK%l>u+*aVl83=HPC)SGOgXWa7Q$g~<x-@z9`ln|wSkBMgO!BIpE?&iH
zTFqr8)dv-1Ep*?W9A6xs(abejLz9tc@yVQfMgV5K&1_J-3Ly*_BR|vN6~{MWDw!Ie
zcC934b522Cam&1s9h@0C4L0mR_5j{utF*W(Ur2{!Ds#Sw2T3X~G(?0>6L`3qOg`y=
zc(b7jRYj4HLXIeRTsY8aW2(z5I|c3mlqaErP#ZLM>?Ft<#7;;1R3J89qe@1sYhFKQ
z5Og%bI(2r%({>i3>a}*W7)9xAR9c?dm}F!kiXvIeT+@`a@Q6h|)1EFtu95}l8H2U8
z(9f4lB1MCWw3L`%tJW^sw%D!7Lt!1-u>QG~tc;7w#-~B36)&DQ=>R#N*Xx2c8K~6_
zRh&3FVxzXG>I{RI%I@GZN8>@h*rY?WDE;g7$l5&A>XS-l9G$XZi&S-s$&Srv2@bPI
zr?20<dhxEZ*DNc)#<2D4CjIHyVAkv1d*roNzn3w+Uc-tmb8)La-??dxJQW+a?OoYj
zzjgC`J4W@Z(~}>bpT7O>)r<AoU*FnYwK8Gu+!J}R;||?%f84L%G1hA2+&S*tS6U%j
z{fxJlTwTSa`P9+~F;w0!reN2Q3KHf$06L9KgQ9f0Ry|WRSi*UlBJz@QCMpMyKhC*R
ziU7YXfaR1^qPyT9vVE*QRBATf#={}sdTVWgcVMpGW@zCquiCW?$jT8pipEzO!~AiR
zH32c#p8ac;UaJ!;*l_gT2CY!lUpgaOB4$y45sxX2SD)KibK<G+sN|m*bm>*Rh^GUz
zWLSQP$y}3s_RwB0;;C(ADn#Kcn4p~@=c{c~DFlA@JJfQ5SYn`E1bWhpgJ;i9W>b+z
z-c|?EfTYj(4)Y6K1PAPqYDw*V2=B$>#`BnPGpU=!vTk^`GS<uhZI(<3Q{X)w_T$@V
z3_SWFpd%EEBQI(X;|0`cDAvp>>`=qL^e%JQV#Hv@^pIp;9JS)hhDx6j%B(3EluU=q
z;+)5qMQA^cEF>DP5K+#aH`}tLXD<S+J1;`$L-}CPqP#E30F%I_O*Ee8J*_Jsa^#^N
zh=T#I*`@luQAO^n5?Pd^x>cBn2zpqBNfu4dT7`3dXZ8GG>%5Frj7`RkZTHUo=lWzS
ze!8r|A~#;;EXv;|i>$~a|L3%v)i==saG=uvjE9(*5Sj8%b&j17CgYy;b!~X|I?NWy
zHM+549pvwZ&(;90tZ{i&569%^@!jlWzEcWc)~Xr~A)M;KEG#7ar=wyt+CC~!`so~N
z&-s#cz|cE0WqMjhT#a|X$WoEf&+P*F;*HIqZ1DP*?d9w_2G;QYwW?aIkx}6ds+v{Q
zzx;Qq>c)MmSGc;D*AZc~8<0TUm(-PGY#W6|9N@kb{p4y-7YzFmMJ<Sz$)JiEa`E#S
zy;_?4O&v+7vWBKOB)Va`n#3QMT;|e66Zt+uks@kcHX2WKfkb|W)hTJ@fE&K?d|4)n
z9Bf^bwEWqOhhPbxWA@ps6LRnC46F#FvGes`{M?YIL&QG(n4~64=Ib@LG7EFAjVyv9
z1SI`(R|9?o+Car;m?KqZKSI-vJ%sj%UFIk$0gDe(I&TVJBDDFDUil;sW11zsIij{I
zvv@?Zmm)LI;-Tp}7Vf+Xi5zlSI-yt;3v_8)P=qEED3bQ!=wT;@3Z|0cYh4D1{EIA4
z{9B5_bK;M45m-C5kD1a#N<MUSu@18N9~FgAv#AS1-O_e7SUB)X#(&u*a~bRRwkj4E
z#p0S+yA3XKRfaKS+IaBslOH*9kaBmm*=jjoE#1fRNpw9;mS_r>h-l8zpEzxAHal~6
zDCN_WH;JYm%p0LxHbhVZQu}FMtGWTz^Q=D0*^I$xxyB$y81HNx!$AYylRD2b2GD^B
zN%&doaD7ISKFiGCEW%u)pbh-IRnP~@WO~CmVwdrV=W)Z>XfkpVdQ{GMRHp@ZhDG;T
zIe(?D<OABQ%cufk;xAxZrT!n0^T#C`fpTYBp~9RkFi;7F6O&y`2|kEqo1$Gw13-<e
zEK<73&u3|Z6&F=mP#h*72Mew@<oqZik}1UwR0PJrC$X@Jx^V~L_{yi)V$eG?v{jYS
zzR1#vP2+27<hF`sCj#p8kWtT|mMQJ&XrC3&Q`SU^2>f#P0b`5+UrV(7p9o*g6fMI(
z2^PrK|1mZj@%dOZAR2cA&XiY^X3=65%Md`8lP}NKn0r2puS)i)zrvyc5K;&w3~e&V
z7nkM>z<7ieEG`?_{J>iHyfPJnwmxgcEBm%;*KTS;u_So#h)n{2tA_Emi$sVdLZg0@
zz?N|>bnmy>^1S5Bred$K3A+y=cWDQ#!y2tnw%NJra**2uAF<SIG)tSM>o?h5TTI%l
zm9<oXZ;5nwW?>!pezy1!Erx6u!>Mv1yu4+Eg5#qj8BN2svuX3=?ZK8_FJY7^#RYMx
zBq!_JM{3RWd2{ebj<wI-54L{066Kv6-j#VAaerUJQz|GES`JUSz)%DO;YCI3@-uVC
z)o;)q{(Wk_Wus>cpxT@W%+ncR|HU36I_$a+_8-#js-QUmWhN~+6|U~s^UWqZB6EGZ
zR3MXm8C94a<0gepG$Mg5Y=YMbdii6(d-T<B;(zRriP*}pDF_7GUIi9zWq|o%G)v=p
z309kY3xogDZn#S0tJPRaX;gR%13^X<4orm!r04<7CwCmodeU?PFUe@j6Tl4P6g?mM
zJio1;7M%a)xo*u|>bt*N$#&&JVX4rVQHu3UlfZLnQ;y}z)Y+8uxhhjOM%XCke3t2*
zWzcLE9I&5AckM_Lt>=U0e;<fXhZPt{)KvJl#Q08ShXxT^Z)u7{l*!W*%*>NZv#d`x
zm4MtrCdkN$fM+P+VMZDl+yCTJEmzi)uzO9B&+?Ova{(=!HI0#_vC&ophm<PS0>@KH
z>u`F~6>D|0b_J&P^=C6lq{<hpB_J8*285YmShb<n6L8c~IN#ZmvZ}-|7$J^A04CA(
zY>M}!u)~W(-IRiNY27fLO<R<u8#tg)W*EmfN~S6ekeF*gOS;Esmvcjzt#v4qwTeSm
z2LE8+D3EU(!?t7&Z|~x33_a_a^I1`78R)r=rjh3WrS^C^gY^@$HpYt|&-S!lI$$@8
z2whVhK;KEAOOGEVaf|^<d!zx=fWl-%0G9i350b^3EICM4XR$i+y{Z7=S#ch*TVKo9
zWnF*2SYh@h*NSRcYj4{kiLgp0guYgsyq1>)zhaPrq`_{z1V@Qb+$M5F%FL5NLz~JO
zttqE91zKI`Pf1Vnk;}?-B|Gczlx3|zEEyc*XZ|ppv7hVVikM|T@9!rK&Fd@u5Y0WT
zoDWs}xsmyDS-mGnx=YfN+43Yh$>hP0h8X>EfB)ld6^S?FReC(Va?)T2In~oBxgLR}
zQlf8@q+ch~3fg-)+G9AcE7G5FwZj0~`JhPlLPcDyAL6A{`z*yKr@0$AfTTg_FKQ%9
zUj(Fp`*#f*(y%TgBHS|yG-|MI8twty!B$4s_@;;zNJu5)LB_&e4?_n`3;aW5`}o0Y
z?5+iEI|eH|q=ytr{b9~g#7Zf1-?9J6XcAyOnwbku1x*fEk7&-h$4yiNtVfx9Sva)|
z8D#*hN73a#RG?_WY(QN1<Z%%fFdI-WI15>*>o6M>fsN0M<}Vh3Fb}1P+?8eWiHJWa
zmkBkyPy_SrnHxSH9bRx<h@XpG+Q7-T#m&ufD`hS|aTcJnkD=LxYqh7lmcuK)L{l~m
z86Z?f1?3wY2#Aa2Y&FJzDeh&^QGy{y`}NlhWzRTIn1Kn7r*Ke40fd9nshO()fV9P`
zHvX6_y%+C}q2h<fuil=$Xh{8fJ_4x<x%j!*1Q?NY?FK-kwW`k%YE+&8@1^R3N3G&Z
z8dT)F9xblnjL9nrzi<Ec52+Bcw4D|$us~k>#-n6nb7CmsTqICEy<EV>IOKt+mNNny
zLc<Y9$v^(q`3JVypBH~_6k;0!RG!mJN%c!1q?T;v!^QQAQeSWl6fcJxc0<2kjT4w@
zFmovUMTAUGFnxqTdT8khLn{_!?>=Wy813^+l>J!Bj~R-H=9h1Uw5k=bNB%|Iz&_K;
zo1joxeQ0I+M~Nh21Y)3X31^Ngvk|h3qi8;dDht6UdLQSLGS4Uk3Y@HE6Qs}VjbIi4
z4RS)S&-gToBRrR5fhQDFG^O~z7?f1pAm&0!^XM403QBoFZ(-P+Fp8)z3>AcqlP6`@
z3%rcbh8mN;tQHH(G6NjdDXoH+V->TTXX4P4&B)OIso26qJ@J~2zc_#K;(474Q$mi*
znA?Fzsfz8Y1reuY6R15xTTG~W+fLUUvSI}BMG-jf8+`7gN($b35B1j%mCK3ups;d2
zdc^W%R8$cAqEG<ewX%dQRbYsk&&m#s2ZI@e)mbS_%^c|NmV;~~+YCKDG<btXly~y>
z)vJ;gYUr|rIsR36WOI{DP#u{36X&HdxMh_5*@CZI#1p^G=lnS$#_;O$gkCI7#TOpq
z{~Yk1=|pzR0w|<uo5F|4Q#`WgaXkHCuj;~=1*}N6GKjLItb~)BmWur7-O{2{x!5U<
z$eI@#RcgjM(xo<YI-30gOgI^Zo>J|Ih*4bCn`tFsYJMVs6jO4s(-R(_8*M7}j3*+4
z3I-11NlR47={|*OdPQ!q$FE+TaXqNaGSdBqvW%eoG?>j3Zc(7JO`!coW&@JF=V?cX
z5NjQ9>kC;K_({>rhCJ#srT2@kVLw<=r2Qy;<{A<YuV%A{q~gIQ(1C<amm|`$S)-sA
zeDe$>KCVj2j6$^RXf))fm@_N^?zF>1WtFftbZNC(n1_wD&7Kn-I_``xOD7Kx%_Cc1
zB_C;lC({fbj$2XxQ~31TXDCW67c-v5CNnR?qDjDxrpsJ4x1Wa3xQ+x+T0Gqe-T5WU
zPdv`}R6Pn?jg&&iL)-rU?0xHc+qSak`JJa=v%Z~0Nt7&7vYq<Yqd0Esvl2VYcDl3H
z+Wqv=5@plYt)%43>dpi0=h!cHFav-DNPsuVX`9+@Y>C7S27|$101Rf9)4&_!>;f^u
z=7D=PiIc-INi+yq1`!Vf7-~Ru;w?wmNa?>nKkWa+C91pG?ifwr=8UG_z+H^7H@#w^
z(Yzr{bNCeU*btLsI)H*{63mE(9YkB>HUFRIpWo9@UjEnloJ^DSsJAtk&z~UwtG&J1
z=_K;MZf|z&=lri<YyMZeT_XSMSCzgsk&xv4qIYq8$U{TQ!5NdUK|j+whjdcnL9xl4
zF*ArB#!MpdL+0fOK9rDY6z4NR`SWB-G^ymvWMoLNe%|-Mfcn%wKmO;D;cRZHP921k
zG`hV+6b9ys3;XOA8r@nFPtSBt3gR&tJz2uvyLn|3M`|w&;c$-F*b^4X5Z`dWYc~ko
zM_W3sa_JSX{?xnQ<Nj08nVbaYRD|mgI8ur|{1+5z*gvO>EvF18tT5t=J|0gOABee4
z3)TLECw}_W@4Y+z;iUiOAEei6G>y`C3{}G>(=YuP*JIEp-G8b3`sZDG{y*P~xoq(p
zaPQrR14@Byv3zby&xD8rxp<ryPok{0$~0b9AZDEoX;DmFjv)FeM_ycx3<*08Y!ruf
zia<L$=#GvBDPE;OyE@RW2$6{YNLp(F(OEA$SueZzQi@~!(-9=oBce(7n1x{LWL*KC
z>-79I@&u&>`~ODs^VKUP_FzBeSxv}cmx!lp=5=z4ZMB*Z)7MZahM_n17GAoOvrCE{
zN}vDh=V&A=kDUdduB0wzmzG8M(`;QVIq&4?VzI1F7YEdpY{K1X(02_VP`4V3YioF4
zhi8Ji3T>5NLFD7g-GZ1-#}v0zO_Mbs*Q8`^J;w_}1HMSCIu(e!`jT@7?CLXYu(=$0
zXxJ;wAO)&(6xDHl2$KonM*IcWn$$i3))b$rH1srVk)lo>1Ee{w<P=wAhGP>Pv08o>
z+++6k$%9(zY(~S@uA&iax)6sN8D(}fxT$-l;NT{fmDD!CInhwhgu}9(wED>h>PGrH
zQC6vb=e8vc=F_Pc3LbkJr9F2lL3Q$>fTE=Tso;1RH3nz^T|$bpuN~p5;Fd^dV&w~l
zz)8Jes5-k^mH}8T^L6KGW=e)vD!}3whjyLTPCq2;5KHkwYC5DMoeVT_Wf4`_FZF%$
zIO>+@n;uCHx${KRlSBJgac&p8qMI*pNMu(Kk%TO;SKVD*9hA0iO;@1M`}AuypLw~M
z@QAxP%<I--ujSv&Tv09^S!DW&x451S!{^20YKzD9O{~(=07B@@*;H0)Eto)&TK_5E
ztJg(B0o9_f1SU2sl!I{65E4T&mt)0RvC5S3n-1&`kQ(U}_vM7qjk{2N9MI%<5RkV-
z1M?xCfO$nnIlb4*(!`Hv3zlcpgN?Vp(x~6;Rt1J%r>ASz?j1e9iEqi`J(O=Eg!Goc
zd=`d2ED&9|F$(9A_v|{YK{wRkv}zFe^93XtVqpdtE>hKUM6b(YyJ&@kHb1%{j+Gt0
zcW;qe1AREy9-_Nwu_X(=5l%sg`fDy`O&(eo8i<MR-r>pK8%*Jvak0l84k>r{lKf{u
z%nspY;$198rtyubxFs~-Q-K{A1nKu<_bN1A#q>e?%4+yz!?+M`EorL&OLXWEsiS^T
z-Tfk($fmPJpAV<mv`_DloATr(-%;YXpxEC7!SvWIxkQ(yFx%cOKX;jQeyP>@hlXYq
z8+eFl9(W6yPllDC)7Wfe((knmr$LW<$<zLVDWRHbk<_+MmjvTEc`yVY)Wk{&5b)np
zK~8D%F4DwNuOSdcq}uoz!cl5gM=(osJ1}*NEwP6ttmYhP8!{sKu9Gv7vL())a#eEI
zNm$5vGl9CZiJa5)<#2*8odYs4RxF{p9|v1TQ(Iij&C$2b^brALBc6j>h7I=sAz&gi
zZ<FA<iKGM;M?nN-cXwDG=F`x$J9?;Wdi51{6cwJ$?irmU9avXS=U`;M)<A9Qp?uSJ
z^iaCirGRrpfk{@?PL#m)JhgM{>{y!zg;*;A18Y+olz1({w(NGz%6M0#L*%;r)v8L@
zwPmW-UW_gDW<X9~Y%4KP8R%$5^|cZzlJSnV8^6t{?$cnXm2$hC1vrufT`xS!sa?1;
z6J1>UI{ZH<vly?cxq#&Oe{6L&9ohfGX*-)+&;B1@q5p?%|5;r>QXUudZe)GF2zFlx
zX>KsPB}9CI=lRQPA;1n^MmY|d9ePg9|3vYf9e6GWXV~A5LNKI|QQfG*!{14`UL3q&
z*Ey4#19K4?zkV*%Cb{-(oO>x?yn|y2;9!R`uNivXdpXtYJHY;86#1c8ZE!6^sEo|J
z1p6d%A7=)#dRlg@&6=gYHXXfZh*JEAog*S6wJ1IXo48a3`QB8~@@6E0EcSRl>NG?q
z*>Hn?DlFdoTG@tF#1IpzL{PG^88}Kb-vPsFrRH~vV$hM^r3hAP{-%f{r}^^Iy-M#_
z!XT2aKv25*S{x;sU#<5=43&CcL{PQ&MI2>%pMgYyfB2r4>037yc)=MN?VS<DSwSEY
zRCR!yP9?tNwXY159II1FK)7sJ-73-%RbEvzxKwcJIRdgUDq;eZo%87|j5B}L`92-O
zC1vu++pI;Mo4%AOAbEL|mY0&SVqZ(3r6olHUAUR+Fe6sRrf$r7%!@+!DzdBE#G}Jj
zX%f;QtEzQ$_{vO9B97I~l$EjRnzs(~r4YV~>{?yARyJ{T$kriURZSle$C}L`1?p8z
zq6}(RFptXcpej?VnoSB+6-_85QR@)B-^8?n-=!?8yERx=><If#^uj7)NaYF)o}`wR
zjlxi3&oQ9VJubl&XwEuJp%w5XNnelolY^-=TdGY;X*`vtqcobT+8~W-v!W{C4l9x(
z$XG1>`l8^*sIU<lx!%x--{AOk14$I3kb8e!PBE|vzli_WC;(s52|*^4l0!J6^=Dzx
z@$7cHt-DPD@0h;=!9;!j5e!3X@-VS|S_|EjPaR~NX%^~EvXP8@aJrl9j5IKg9;Q<Y
zhLWkRr=y4ImV)WwJp>&{R}0dq0HTH2)Kk_i1rwhbR{?sh2HMi|8fvoaDqx$|wi;|(
z%L{$8GGR8=Ft7D6S*^+jdu?qouq^*3RF!P#6GieI>!ITLKTI%{V?Pu*|F=8sMEr-X
zt*!0n^Z&2${J;Ht{?E?;BkK$d5;XE4W}NkhjjbTubGXa&UyZP!11dw+spI`i>arTS
z8TYYBuDf!CNadzXpcx)^6*S`YH~daGy%WkyGoR&pO>ng1_h%@gn$a-wuc0&Kr&Idm
zJJ}<8hkaF~J(5xy@jzw2vioR?KmaH=iS-Hfo2Hx*yo}L$hb8rH_BB|DG%tmcWF|b~
z`&Xy#V}MQwWf=vsWF-jxf2H1j=J(Cit@s$<eGyv`xx%N~qAq7TwKX=<soY4t$912z
zK^EB0`O2Wm<^KK`zTMwXoq+YFBQVh^zsSAb64(FJ`oyUtwnn#B#}p!sZsHPOzu;Af
zOx<F~%Ed0CUF<^Y#r`ey3%@zvXE6o%-TBm&4_=~3d!0h<b-s9eg>btHI!#s79ZqUP
zd3UmD*EZAvdFtxN@jac1`v9bitTmdPtOi70VzVOLUV{s5(-{%jDP*%n^@rEcFyG{m
z>On|t8od=ZJ#P1j_PgD4K=IwiyR)PZZ}<0Y2$W(@T8Q}^XLYy|GHD?)`T+GXC6-50
zy$n3}ma4zxqzhQ1>!i_TQb+W!R>nu5vzd6lqMiqIr!moNkZg=JH%!EP(xA^-LlK9G
zVU*yZj6pk}NNf@&mbmZ@EqsH81*}l`CLQHknu$`>)&0f|&OV@Dz0v5+BQgNzM0#+t
zo5Tb-)MX3d`J8pD+=4#>ceC!HOT|CG$PsI>vISuVW4+OBMfu}?S^8ul;zLAJh^`5d
zkzpw^@*lim6DI<k90y)#@!Oveg%rYEW5%s(pJtK@=iUHvZVg<B=!#cL{l#BHmYj<Q
z5^Ak7lDbbll4!~{hU%EtoJ_;TpUWxS7?d!E68^z4!u@?Vx(GG=(@y-l(WQpaZkf@o
zs~0l9P|6AndM%PvN35y^y3bnRDYd}mj4ZZOdZ7A@=G0Sjnkn^D-T9*PTH?Gre`?bJ
zq#I@#PHEwG>bglzfb!#|7K8dd4S^Je^{;18<Sis^#Y!@9UPgIR>cTF~n(_b{<2vo#
zLU(xgm~DPg@-lA<PqY(ou;PW@n4$_ddqpzxz&lxW7r3z7$*#WImEUCRvo5GYSJz6Q
zPVuHH^34Kt`)k9TCc6;sLid(e7k`Fl4dSn+sG#`k$S5fOncT>_THPtZi?Vps)O4-a
zYoR6!PdA^M+_7V=JZyChc$%s=3$)q)LiMJb{1i2B73t;S7php%%WJ9{e__4nM;HA!
zA20eJPd=U>z3u<hJ3H>Zd3R(Yz#r=J`}0rU`M<YtS33GHa{h1Io5}nSn_JH2^ZEbR
znE&Cm@q_D)jUSdCSxH^G%b~G%MIN-Sq6h!ip9a3$8U=qM7l7!{;`?AWvEtLWi`l^o
zybDj`?D!#s+PU+B1w^i6M}7eE%3|5<FJ`Z#4AZf4mtps?B7%Nr2cw%MmyTrIvsEKo
zBny)WK{NDeg1|kqv)2`;5ODGAFL2W%eu&rd@F@#M$0`+r1DmEyYZxUrwx|=Q66PBn
z3=79n4hVn0DhVWnZb*O@X`3+ldXw3m*LTO`y#z`O-*-@sXL)$*Plb1_1XGmtCqVfG
zWU>+Xbf{H$g`LwV0%Ue&%9xRSh|!TNMk6WQ2tD3<ggDWfRS{D}3H0pY+A4V^qkpAi
zKkKw|Js<+@Dalu~tV*M_6+LAY%2w2K<rkuAVF8>n%J-WLuk1xi*sIl~TUAd1ey*pY
z^$*rWEdt)$y4G1)QfQQ~*4$|&tE{BWqb!_z6(@Fqsx0hJVLY&GA$t`~hfK;j?x!$L
zWV%_us*!8)i$=4`wluPc6M@N#s-P?gBW%p5LBG+0vJAXro#F9djt5oA;MZ#`y0z39
zNf;pOY5{^cEJrdFk&ww?IP>Td?0y#98kgR{U6Mr`tj{PGo>FxlKyF${QhND_iDiYX
zcr?RLJ}ps$s&JOyv60s&44j^%QB64#Q%QUp89mE*!;S#il_0~lj7#a<)l{$e+}3cm
zFfVIldg0e*@<A4XI}PWv5Y9#~m5|>kA$#>VCF;S_Jzn~=G^Ao4!osp2{t_(qY`7ul
zE#Qcio;mXjgzQ}O{n16QfByHg3-ePg3(uz*4|&a!K$90H6><ViQ*@=!N>TLV#~t<K
zuKMw&`tg?f@oV+tZ`F@&4RTw9-QHF|cQ!Y5FSc|qG^p&h)6u@^YTs<)(e(3X>mPCd
z7pB<d96aovDF1KU?sT`4`G2>Z_VfPl*LeTSG2Z^~|Ac<<zyD*LEXn*Gy3PM|Z{3Np
zH}uG3uQjDl`}gql>)#6w3ByCLAI`>PCWTGm=+z!}!Nxfjc=z!j_2xYc&T~38#kb2T
z8?fGRL&S0PKrDd+)vogeyHvvWe#w}>VB5dE`aSxAoqb<eg!|+7?#(PXunZ*R6!H!G
z(<?kaG!6{fu<?@OV<tAW9C(|5S-JsjIwXvj0f;S_+LDe{5hE_)Pa%>Y2jqU9dsfi@
zemLMR)YZ=cbM$|w+qIMWzq8eO*8g8?{`dCl=S#ra6TtLMKQad2voWYo;k!|nQeMGA
z8Jjek!EFafPz7{{JJf0Qt?ypJ`5i^nx;7vkV(Xf`tZzKwjX_=N9QiKb>^_t)C>W`f
zI?BMC@e5S|P6nd7ONg%y%r<qrZrWsd9XJIG4zkMu0Zm(-&oUz&r5RWQNQz7oRS1cd
z4opRcqhyxU1Ttmi6Fa{S@kY0;zS0e~VA(tSU+te`!ix56=4-hBL5&3S&ThFOms*pO
z=a0-_e?M?)3g>GI6<Fl01+)b}N4Nfj((gYW{@6c%PppWev;M`8y^{oB25)G=1miIp
z6m{N@7()xTh7s)vMn;{bNqUOlGNX=%#6<QNk789R=Wk)=UNvEpsDMhkrKUl%T}jjB
z!A-Ga8J!GQM+J~?z9yt)Bra0cG&CJDW}SBJkCw&d>FmD;GIfB`Ucvm!vH!aGE@A&U
zThIAlzE1xyV*lBj<($A&USxkt0}{}{sfZiQmeZl}ayj*<{=#?1{=dE9OCB{Z#4B@G
z{+&lNi_Ye7AVRN*!>ES9dVI5;UQRzgIlegVy*vKrQU87K;_UeF=)C{tAN_wGot=W!
zI+}vXMvI=D_RlYR7srSF-ud~_*+l|V@-?3BgCm-BJ`;k;gh^HxaKSWwXv|<20W+<_
zP66<A^5QSvbm)(e5QA1WG-++9%s=$b$TU?m{`<eRZ=KAyom#SyZ%<Dy`ac~VUYwp0
zo+O%>e95U}e_2D(YwwDf4&$*m+)bc3C$mxStpDctq<8j@{^^JQhj$-;I6g6cjUbHw
z-ZxY}XYZ~<W?7x_PtiH%E?gQ@tTC1$nEzm#hjVuT$!iHgFOtVC$DbHk=BE#5*4feb
zHRHd?6c`b-PowMa;?tCw!ePYMW(@r5mGKu+<RuP_c`%!M!T1pcbAqB<+f9T(k#8xB
zi=k*jLR*GK`$!d+BcDizp!B#7OoH!sb!FJ*5h?Rlv1(z)PpVQ&MK*!yj}%CFV!h&6
zh6*f-mX$#{80F#QN5*B44omTjF7C?mh(WEw6NX|n9&jW>9^$4yGA1LiIQQu{O~P61
zP-8NrB`2d+G_T@nBig;(MGq-jI^d;;9K(QAM*gw723eI2NNwaFfDe+jCB>fA%fN!c
z5TS-_O-NdJ$6AQr<VtP6bR-1dF@o<hn@2mrxT6iB-*h+$DR;CLcST!458>mV5UWY8
z*;%Mqn&=!c2p~fxh}4RtdI?E$6flWm31Cioh%DCvOD%+myrafsgr8Vry?F?=$A{tg
zX}X+vWWm!P6Ro=lISD&E6EgAAiRtM5hl_t0Y<<nRKyVFnaYY^_N?ZgPrf8?2?qz5a
z@@jiPWvxhmB!Pz%c&G)IdL{`y7)4d9`{?j{9po^oSKUE}Z)ukZzA*i>-tjrHr>H$-
zLRSrG5ah_y1sqv;YI!t8w3dhl>D_4mZYb^_Bwsh^duF5XXhPJ+q6ti4Rvs<~i%>Yi
zk4*v?n{{BeWzdhZ7?bX-Nr(S2B<ZU<4p}QH<R<VD-)TVau+9NisvGO^FrsC3R1A;|
zpn`wQS018cW11O`ju(!O6%4LrYav^<4%R_VjmlODl_6kGl?r+IWjR~;a1^U;%SJ2^
z8^s0U4gm}I>PAMd5_k-MK#I|Fg-}K?8#w~A0aC&XQ^DCNBsd&M%~)cf2Ob0i(J?lv
zvk@^^cvNQOk>X@{abWcbl!_E!bfY|@8##<_q#4~<1Ea~>ei67BQ9jJ&bu-QDW;tFr
z^LgDY$?Il$UN>`i-AwbkxdvX(XZK#9<}$n!SxqV(kzecw_nCHfQe_mZH#E%>JFNwX
zU}|4$1&DNLNxi44K+)$ikhTVqN|93oV3qu`s3=fqJ<}3A8|av}wi*SbMC>d<5al-V
zm&HZWNC@SS?lOQq14mOM^qRecVF7zcZ76u}8CTxIm@TfofZ1LyZj*UIFbhHhgTO7W
z-6_pgA#Y!44Xq)sNi?=nn<XHJtQ^%i^h++ElVgvcDG6;&*ENz3&WHstDXdabaOxu}
z6!1h@K3I~E3t3&N<4QDM5{<Ep=y9pWXsTeAYO;y~G%En3Rlr@E3!_~aW7-t}1siDK
z^a}?;GH3v`46#6Qi~Yi>kyvZtMk2Jj(6HAdNC+wo-<C}d#USFWwY)pp2WBz*UjrW#
z1A)AZdq4CdT7Dy9;td%`*jh;oeLPjyVcREGv;e7om!M6#t;vU(*he&gNR$3itOkW6
z>0y0B{s%2hKPT(NRWNHWl+&DSBaS+#is+*Xkp_AYQxR%m^NAP;R19*+P$C%BC**(7
zsrWGo3R?+vt?CRBlz>s_fewR?c%kw^h1v%~^@E)HwUW?shc|G~in6(gOvn&f(jk_N
z<V1Q{LSoXy#^1`59e*p_0I|A)O({~#uzXXBH$X{t5n{E2MPlO!sD-CBmDs5mpE;7m
zlF^uw9+ntgX<}t2mm@ou-3nSo#usZ!ky=LOn^L?1O0tU(tEcv^n3O=dOg#yBOue2g
zfQL5bF4op}WwkX0k3W`Cw%CG%gd2*d89dTcmPW05S{^kvGBM(f#M2zX%OE<1I{7po
zwO*^<6|`{fGK~w0$JE;>1@KU%dly?ucUjg>wxvU`#2>S<XlS`h&C7=3X(a;Fs8vtP
zqsF!->=YyMw6gt}MxA_`k6ML0o2$f3cc)j&u^as4d-r=E=W`Fzp_J~7T#UtgON;6q
z><%8#4WjiB$1$pjQ?npmj8*2M3dPfMv;fh0++B_60-R1-)%k>$9D5asQw`1nU0-ta
zR_tyy=Sp>VX)cz)UWTKk23BeAmcSnE^qkBVMsx|%c=Q%K1Y|Oi&gE4ronkzn<n1>U
z;jV_1K`t#aE34BYFip>vVTl2qy9Ki%TZSdZZ*^LV$Wr(?xzjUEKfi_Ec*NeUYtq)C
z%w%P83fC}m>riINonC9}39LM)lqYadhQc*WEE%1gC8Lu+hm0l|{ADPtCIM@c(PAw8
z{m5uQiiX|-FUV<cL-q$lGbo^%!5eXiCzAMkShHRj=^uvBl!|689rLGz^-<1fDnfY{
z%?!&IqI^(->4V=H*9RroK3FT?)oc6sz|Nq^gtNU&ofRGWi9m*CjUk;NN(YGPR|V?S
zZB&SjX;f)Uqikau`o=WY*qCOO#x%<|rm1gCvs7dJk$7g@0-xntYvNB2;o#e>9iZ=W
z6rC=!=^IsAzL9SEXGYd4R1GAu<Wq3@O)*aK>{|I`!L_k9aTL9*CXxeLA%atIMr}8r
zQ4~t@R#UvquRYFzl&(&$!g|`aB}!WBY8#UU`HoAfcJ_Lh(=Ljz0Imi@hMunLLD`H^
zHrInP!}p}h%#o;aMWrCpx3RxQtx$k3)0sQ=-4Ny)sF}`Q^|(W`#gEN)^_)_==}9I4
zrIMb(;E!oe=y_Fgg(I9d74H8;JH-}0O*Kuanr7i@3Nng~Hi2DF+L}h9O7`Wc8JKP*
z{1j(qB8*6U-*%-aA_Ahl4&Pr+1q=~Obv30rQmH5(sxFO{aN8Slu%93Q^N0_wR49=m
zL~a%$a%(+AK79z1ocJH}u``>FA6Lr(l^6fh?j+-XY;`y7=lCCAWBkv}KX3jgDFg(0
z;q*^W-u)vL*7Ge;d7y~{dN>QT?^H2D>6^cw>E5KD9}_mZ3;)i0haGy3fl0%3IsAR_
z;swnX>`zT8?^OFa=;qfD|7l9Y#q?+Mr;Gn&x4TX%{-@(~pY{LOsQ=r>kM8o;Gk)~^
zOEM4aUBkE5HGY%hHa*WB>A3?f#G?~Pw@R$V_a9ErAb`-tS?};~$0t8zMi@0PuFi>&
zT=3HzLl7QNbAK6n{b?WKG4Uv)?7i{oVIIWsbPzPQV?jfB!#=Dj_85S5!c(kF+=U6i
zyCN(rwX^eKOlUl%-4KvkNof|z(*j5qyc2<^mwjl*j0l|Xh0P|$PoPO&02Jq)P(EOa
z)+zh2O4&QvH5v`ShL$=hi;FX|Vu96;o<nCms}7CzlI9#nc_Q<pKYBzKU{awZ`*=bj
z3EL|<LQz@H9+9U!7^*7OBL<67AQe)}aTHP7qwL}m^$bSJ9vQPyAImmaPW!@~Le_=p
zfVTZfAz_O_EGLpTlK}HG@%Qd62nV74k&O0$1w*?hqb&fO#?ZT5UKtaj6u!~j=fr8T
zPcMC%82&0ga=##>9+pN(-W0EnQe6<zC~F~T8KPZ?-iRtkEieEz1XuVd9?%%1C0AD-
zQJ&MsC>R=1siAiTaRTVQ)RDU!FTz%<MMwsGGr0KYwUnCz<|J`9Z&DKikha3*1l|4%
zl+qnv<iN_?dd2Efz-ae&RHzEqsi~-<2s=3<Ts$_i6x6t{jcVMF!y0FY0Tx2^#({w4
z@Vwd4dBwMf1cCfEN&sL?9De#F#Wa?qWf745eYO!&OYQ6){nUH+v3GHNdeT2U{djU=
ziu2K)ePEK-HAoZd69~F&C(a3SpxY|&lC|ghuC*|Q1C%7ZHrusYMLMfjRH{W(08FP=
zuUMsH&?rXVAAfp(L}YV09vYV(nlq4)Yd#-8LfAntjvz(^eD<eJQq2Gg(dU2_W@Dn+
z%611Or13Pw4~IZ5gQyX@k?hhvxjK#fP(mClX3F4T^%1Mdj!w|XVEpD}Ik^a22+tKo
ziaZVv>0MxI3H{3aAheh}2c}a8)bKk(o&nJ107CK4_#0wx7pv4MTWLpIX;vK*yFp<Z
z0$5fZ5kP)v6jB^o@$5q&ouZJmu3C9;XSIqO2pqMs07P-wu9aY2qbg$9FBcCmgsEDc
zG$w75@<e^n4C-uJ&L-QBPaIfip$Ef!@QxO|8&|FZ>}mm>p$r%q51}etUB>CdZkkgH
z(}%Ol46TqKSS4e?YoP{1Em%hn)*YEjkgguYi9^m~(etB={+o{%{f{Rf&yU{rf9jnb
z!;0DrYk$5?%bfkc*@Ud0W`n0Z|8ea0)@JhjXS4kr|NZNX|GsMI_djz2A&C9s6Kgsc
zLO`m)t$~A+?hm*j+hdr`hz&c3ZG_OEqc<6S{GxaGw?5IGr-#&neRur+_`+xd`fJaf
zi&*ao83wT-Nu%8<+nXS^r9qanmk`*SRLG*ZCn`B3M)C3cBM1op9;;GqPA1Qk4zFip
zFT8!!oHpTJlj0~!b?~J;^7`rRL7|jOT<sN2w}b(7rZZzmm$$~mo6Leosdr${DGZl{
zWhe+E|G^tJF?u=A((=^Fm#U>rD!Rddz)8RAiBlS)BfdlmSTjOfWx?>QJQZ?0MHLrK
z;iQZe1*R%lM2EH|JClcCM>pWNquxSekSq%m&&O12m=lM7(&cSx4IqNm2O6LWzF!h4
zyCsyit$3@4qU2j$_4s40MI$`vZg{3w4+Po)7fW1}NXeupP83FJ;;@yg;Ec2n5>q##
zMeVMp95^l6@r{$hAhaN9GrWtyOVyGFrqJxU+!po}A$K+9n#qDG!ZaX^UYWiMAaZ?a
z@o_9BkQtqpf1#p$udWeiF-#TZqB8b^f|HBCf@H0TQ!PhNfwLUNs|v5~M%qf!yE@W>
zs8*KWvgk_-u&#oYB)A&V91&h8(vmdi3UTf=<QAE6Z$Hk5q)fa)Y_X)0SqL~82?VsY
zMG$0D5VMHxr9+{nBb<&3sYl)E;87D{5=~=RFNN;NgR9F%;9Zdw4#4rKyOAATiD1@F
zJo0yn>GvdO^cM>!5Sbqiz`#PnD(rmvA`q=~d{(Vfn6XQDHwTMJ*dKZj+H)5A804I7
zaMDE!^~-xWrwNkmtzGF2t~@X|i%k#j_)3DsVU~{xRktO@IMM7_&q{vR!nO6U7|j;{
zaKIK2YoyX%jj$*iZ3anF*j3hVzcCxx!H>XAadm6-*k-Amc?SgrRugZsUdIQ^#-~K;
zAzFu2HxIquGo{B`%o3TQ+ap5TleBU#E=4=WxHRbp38$FU)_5=(iBw<lod@s-S&reH
ziuJ*}cNfgn<iIe<mtFh~drR{r(Jnp2`H7GOPw9cg8+`U4J-ftb9OqjBDSX+*-zubU
zRaoCjXlVn44U$wFe)Pi7n_has)mu?t>`ClDFs?~4T40w;QT!+(j(48eAcp9CaJ>wY
zScRVf7GR7Q9~D84->5gTvWfvIKwn4H$B!~A5HTyL=09EvfBY!Xbqw)2QU5(Er&)D`
zZbT>NjqoT!O(b7Lei8WGdB*=YPL4|A7qr-J`cVQg6{B|_Q_}On4Istj2mkKf$Eb?=
z{N2Z0{H>5$e4Lb8;h{7bzF&?LxXI55(T~*VM=JCq89i;y-_O!xVYUoCGT4V+@b|MM
z$FBc=wu`?NwZhY6rSY>%Sz@6@nwkc4s04m1D#6FeT7-ux&Z+AeVKt+4HIdf?Wu>CN
zkz5~zIsfTxikG<g?vpoBtvBS$F8+>dwNU$$k3jE7<G;nPKPB&*{o4y>Fb0Muy+;N+
z-X6NXQ3W3?L3?8wduhV`G^Dctdq|Z$d3eW~>^;oK{=i=tPyz0{4h0-N*c<x<INGp}
zCNi16B8b|!u%p=AF!Zk=)v7^kw8f(_!$Sn>?&l7!X;ulq-`nrCF+2MHU0Y++r8b|D
z4*~bKRBk~V<(<pC>2hNHYJ5L-uVPKGZ^)a=;o0mSeY%YAc>AL#JA!oMlg|R$3%)Ce
z`1g%(Rz#bz_np|iru(z|qEB3=k(g&|$sFRw=cm>2N-T=-UNHG&#f+%mchVJ}d80j1
z8a@Gi<l%0Cq8GI21xhgaU<9@~L2ls}$DFWjWZ>#XhjMkJOLxmR9QK{<_W~j#=*Yg6
z!kaF<na$sMqs8GYn38ENfh7T_bHAn7HVL8n+;&L*R)UnA;@h$J8+wGsRv;YZiF(Ee
zugT(*EI`AVG4;r7%um!9-(C|*4D7h&-@^9}M|aGVGvgXKm`(7sa55*eH6&I940`;!
zp0tdQFy&uOy`g2C!(8s|MMB0env&iQ?Dl@}nslIT?@ux)0>|+LQF!P_2`M6mmeHg1
zpop>0Sv|mt5Yn7|LS{~H*&SL4_ZaN9aqWiG1)86>g~){Pg*=l)XL#(uZo?P^9xlou
z4>pHonp-iQypK=2KE+No9jn6yU1kC_9w3}nyZ+urVv&09m!%K)4fyqiN7fU_d`vPl
zo5EhmwSVa^1g0}`;&Z9cTlD>~PfLJD{_LL2lGG_(&bC66elG(9(k#2M*qM6waM<k=
z1xnaXb29Jfm>zLpxSUKLO(hFzD*vE}<8sPh8&=Gw#ycnh(;b=`e;_sTjzj*Bk@K}2
zCPxyNI?Qwl>EUa1Ov_GQDaWqeJC6*|+v)5+N~+Vu{N!W_O*c$T*mS9-J&zQ7F;V_f
z+Bu0U<VjmcQE1Cd7y{~sr+KxV1U$19rC!6}+7pBqk7{ku+u#TeeuQMmB{AUy@O-iJ
z&*T3^Y;>ZtR&)Q#^Z(k~wo~{2JKN9xUtj0_&nEe08~@K=gr0HYd!aWq_HN+Y|0U}9
zI&6)}6cx7IL2G%dz6HAV=e-50zul=^Ptxm&!~!8k!toF;sVyEA>&ae~%S!A4%wd=u
z91d#w7)ge^<gn#3(l8s1AeG(e-$+?vTYKz+reV@Goe)c=$Gwv9x+c3TAQZg;XPpKy
zux_w0TlJB`H?`<ufmmDo`PD1K?3unr3hwVC;lfRU&npqsrsU8};S+yaBh^I2t%W+y
z$RUR`dy2br0BVzO<1t+cvzwt(EfmA<3-xXU-8d1h1*1vX_Z}93HzDHFcL#%I;10MP
z#pR7xcl@HO0$U_OuZZ1RsN6Lf^BTca6psZnv-Fwgju}M!k*$fNH}k-gUBdW%+PI8G
ztvh!8)%ebcuiC_g$ZIsA`+(37iB)LU>SO`4Q@Heo-HyHULVA*S*Mbjb6z~zg?Ky3J
zvl%)ZU!jN!7oeeBbO-uZen-gP+k_a0{sFvhZppWT7(a;p2LeYrt{#(%^)~$`u`M@}
zPi+WsW<m#w0KT#$zuN;Xfzbn(GD(HC1G~oF{2HBc!JQ_tS9)`@aC$KB2x<$cw6N=H
zHQ`W<ox?2`#@?O*1>x^Y<C}f*i+qBn;R9ord>&)m6Pf{bhZcxv!djX@*#`U)pm2dV
zG@@=}kSIk_DR+A1u@{7o_}s#8z@?~|&x-i?<iDmbccZ4T>oVHrKT-E^mU2Qq3p%No
z0s%WRewb1luZ&BVtYE*~K&J#qLkcK}Mis6i87Ov6hSzCxv)#7HznU;3Lidu<LQxZS
zOq2tn*(rdgMgt>aXC9yeqyiY;yK{JE;WK%)Paf~@!*i+*QkAfEXuCDC?AEBZ*KV~p
zM|6l#QUP{X#0cU}Ue%0)1N=-iF}=XYHO_$pa|seRT~0i*jP8&9NlK%Ul{1+;=ES<0
zF6{O}8-NN>XiEe(wT!bZ+`$vY7AS%Upu#h|U9(D3N_RRVY2hx&3vUj68+UGv^sY_1
z7J1YJrfu+%ljx@VYY$URgPBNgyc&<bHy(!M4Gl{`Y6nx)M{9IAi;k+P@3o1;1l>#^
z&c!=_dOmjt-jJS_%d(-p`*}<ldF2Jd?d!OPsTrNw=S&{(GX+(;4;|F->%bRH+}4RT
zRmIvf^>)R?GSEbz;x9$(&1n@(WM<qG2m9DfU|8X3kKg=9)JOaKZ3-rrXxO#A2pB&6
zN9sL-<}<P&)f_RUIhw(8yCLcbc(cC`$_B>F4r&k#GZ{6yF6endMx~p%tJFv<WR;jS
z)3C^}I(r25vyZV;Mz-Wo=dvN}Ru2YV7{Z;=ka~lW;Xr5+HNSpjWK<(wNL5U+6LZ4m
z2bdBC$&8^mJHVJyAs{*kb!v)^qrZRby_0J&nDpn|<Q7Kr8paoIh)l^Iogc(7AQ*jv
zFF4qHw&u_lJ3v(-l+xEoiyCYyq?66QT}xepug2vzFc*qM?M`WEIEHy7EA{D>aX$-g
z*`6_31R6vJ;RMT=;K}733SF3rslg9p>;nMm(_*nskPF*ZbYw#?rwYdivX#CeK<AZ&
z&4_DnYAiz@pMgk;NA#Ou!y2a7x@uX*_a8pi*pww2$HoLVWW0v~oF~}ErN0mm)06)w
z_`@$t7wqG3If1A<yt6Fwav5R?=fN1cNVy;sLd&E671}7>ZjWPGD8`955B)JLrfTAa
zCPilfr}~3yu+(V}G&ZSWC%(vIuBZ-T7!+<W7;sXlNq&P#c_5U~&)iYZkiKIu4Qc?>
zPclWeiX18H96PQSal+v{F>~DM!W6VZ8dio;R+^8P3QFKz5i1Bq4X@C_yaVBwHk~ap
zchRjx+SPDCVqkZJ)_2D>W+Edi4~X!=9G$Ho`QFQ<wQ&70pZsLT#X3<pQN&B4sMzP6
z*4Ulb9m9+&p_+uci~zC?To%TvjV?;2?Zk|xkE25-U)WB_EW9gN(plH%vggui8~d32
zi`MK+)Vc27-=DVmgiZ8zu?DE3ki_a*OG?{fLw?!@D~1JoV$a&OM8J>&2;;VCaD9Hk
zBuEmPG$~oEQ6nh9R`6p&;X%2<=wmx-Va+j!U;?znGSM*QMQUn#B@9}NDi<>sl6<o!
zEgD=I<usA?3fP|9*tN1?(&drZRsiMz#t!3jmlkhv3oAOFo_bewLY<JMpbySzcv3%i
z7uP#cX^3G+mBQ`5y=&a=ZGtU+D{gSD!Wgd6>bJ$2Bfuyaq;KZXzw^VVZeabkO5YQx
zLcL#b2TVMGiFUx^hx$9^)F9(Kj=PDyD#fjB@pbC%cG&}+%+mVpwCHv=;26%BsOMhR
zCfp;&A{j3x>@bC7y)ACoc+JgPk$F9NLmgc}d!VwS<1Xnod8AzvXJWlt#!JZsQGu4{
z`06F@x<{HXx|MB(ws70t&%AcdZmGw`Z!(KVm(*1QCZ8%ic(S0|Dx)BR2bo*)5cQ-I
zvd0#6Hc*FYN{*sbWS?Z%A!YebgON4|L#Itvk6ABuk&4tct)ZgqY#VjEtxveoHIp0f
z+$B1TETQg{%aR%&ci74_gPo3}%8;SP*cqA{8n$(98dYo4YaIn6vm`{f1cc<q_1Y3#
z<H~{BF=!h0TOF%>EvsxTpGqV9R+8deY^+%XRBRO5+<);$=y@6kDErVmBeFx6T|IQe
zhY-2ofZ-Zvv-`;Le=#EuB3DLJCz^lR{<bD=J4Eh|rhrT|7&?~GwhUrX7|pmoBD3US
zyS0gLTGLqoF=h9Z=Oc9QQ{0$<*8att4*ij~!QYCWjq@342>9yGh;cH4l^sTwiH~&&
zKc+-qY=Q989-y}ve8aWPDN(+zWAa^3*G{ZRuzk~LIg$vYJ(V_t2_D=F_BBfZ*S1`T
z^<_>A?Y6iVhd#PoxX9x5${)m?WE#Rmc<qLK;^=dK*FC{&7OjT<fK6GX2=4$RI=z=B
zf7AFzM!uXr8R|=ajkpD#jFwZ7%0ARV_fq<F<GgSUfwwRu&-^s;`AqkjvGfE?FN4}B
z$@Ge3mteZ4hSiu(>I`uR16Ddk(1X>YjM4JEJm$i3z7o+zIG$O7z;JZekEV#_6i#9J
zJmDJmo*xX2_vb$y!lcn+H{wWzgKOg@@00t$g|Gm9I5cpB*>XDUPr|#wOW5^>v^@}S
ze+lgmTi1ve_P&Q6Y)OU@N{`+JS#a;55gtu12G^}fy>t6gSa1ljO8Er^#TA9ofpNxe
zDD*T6`^++WjUG2Clo)JXt&JX!7t5>z<ELml@sl4HO_9foFemgFLY#GNsNp9a*$!FX
zH`rY4gIfUHQINyet~hI`#d1B<Mp@jY=8atBt4|!t5w*G!1A`Fk$zrq@S78P4R#RZ*
z2rjL@MD(1|f@M|hT1K}2KdJlHTYzKsDpvrq{r@+&ZD%W)|Eb+^p8fy7%J{DkzV+MZ
zAh7Ga1rR@3PQ&GVJ_|^78$8XQH}Ku@_n>UxUfN4sff+Pb>(J=;e>nMg*zdz`EB}13
z59{l{%mbIO(D>o-5Pim{vk+o2kp=e*j`LqO|C4=ex(n7sf}H6h(p0*e4LixG!Kgs(
zCf0vH9UWero*n=5sDFNb^n#iL=;wabk4}I4VdzEI2L6J+N|^=mN@TpEPEbIMsQ5{=
z!u(aSkAHDsT+s6izGr{&%6O<5zjC+w=EE+Y0er^5yeFp@M?2uhN4osdg~2o>Ljv#F
zqOAvg&N^(JV{<4`n?+;Wwr$(CZQHhOCpWh3<i@_SZQGo@HS=YrrmCy!{DAK2)6dy^
zEkF?3RwUR7W1Ye1Jk03@IIo-liMQO9jop~JdGSyq9A9rQr#XH;Pj{c+V70%)P2@J|
zzY9@7J<M9uF?c%VxVNY)_ti3g(6|MTK}kt`(e#{`$-hsWI%|(X(bn!(ozv3PVH0o;
zqTb7}Jgi4gBuB3KIpQsq>tRCQ_hE0bWAZQh-~X0LKcgJRN7Ir%-lUZ*MqkmrJo?;E
zk}PX(!MPZ8vBV$^{rxaKYmo!#Lywh2h&fW5^o$QgyCDa+X9?8{%cNYCDNBL>^sRx5
z1?MSk0Y-T3Ut9v<X@A^~77g}2!T&tGevYyIi|gh8GQh-_;}1}Jrw8Tc&wq2nV?>1p
zd`7Hy+7W;`p4Dw<tG7en8Qshc7isW%-Tvzp^0m#`@D^Sj1RMn<H-G@#<YwH^--U@I
zD}s!7_XRoz&Y(D@Pzh(gP=E@FQ*{OkJ8hsa*zA858!0PC4^M%@(Fz8G3}y!~r<1vI
z2j>?Xd0J;lO2&=|eWN6vaM^h)65SZ5(wK3lD8RUfGjJhumzCnzuf=y$8K#Sd{;Uv3
zqet?G99{HRwNOUQ`XOZH=nMD)itmA}8nng#z0JeJ>uHTy81g~vGbXBQ$AxB2%U<ig
z!j4g^UBT$ECDd9Yeu#SI?uUL=Hk2$Mi&0CVOmmkF(8p$nsPxg(W*_O-Ta8(Iwv2U5
zs`$Bo@epsnzcRge@q)iITz65iVju4xrEkA`5hIB}%CAS2&E*bvZA2L3wH^&U&S!{|
zFBUz|DooR_q(3k2l<+Sjx!BtjBgaCuoOL<r9St>_aUFj6ckp0rw(=_HQ*Z2oiYOtP
z!8IB(*EXAR^`Bk}h!c_{J2F+W!s$tlQd}=eHZ;Y}pg$xEho**Mz6b|sIELN(9;7d0
zb{Kr43_0c<TOI?M8!^m`6b4A+T94^Vf!Tc-TKkbmr$T#Z5hI)vOt9`!{-}GW;ZM>q
z#{j1l@P(IVGJWfeSkbIDK=N5xNLUfP^?ri|;(R*O6;5n-V!8zgb+N5<V^lh$@XAsa
zsjZxdWFXx5SrUY*n6<@{wFMl0jICy&YV(a0fE%3vG3h&)zB&WZ9#Temvf%ym?}@EU
z3h5%bPF7OFDin#ECP^Gnq%xYMcwq7GT|KZ^8*RK4XxvPbL>r1k+ceRJI>9Dg^j<6Q
z&{dOUhbqBGTWeK?$T~e;vt%|cO-q}!)y=Y_MK#DeJxxcOwA0OUtwnXf`j_}?mlhfL
zdZ&@+i)V|(@R74|kkb3-NVrv~^l{R}S+BJACv#`aZC~ET8MV=OPgV8pDc)`s8?FTT
zeI8;aJ-v$1qYtMpY!`&xlN9G{7vIJh9By~@46mnCyR)_J8yET|P2-_L!}_izjx;iD
zG9xcKh+$>_f)G=5Z1s0tzp`&lhB+L<$cGMU**LtSnq<kOayx*QA>Xz`nqax9iG<L;
z#!F++WbZ``*dILRlwyl)mgVBM04CWohZ#E^1V&D2Gr0^UUcfpB6=&c7$&@LewNvi(
zt1z>H$OcHEYmGQ>p)C@p8Nn+kU6E}0U?Moi5D7h>sA)zD$ed`Js~ueeHgU7mrn5Cq
zcRQSyb32RE=~OOG?tF@|i^5bXZOT#QlHQSw>rCzyI5A<Gr5q9<=gp*?fux8wIR3@G
ze-h{%UfYsei<PG<4#j4S0Bb7|OQkNPs>u215q54w>GtkjN(o}*|I6*%>3>4{F(OA*
z9Q@&osw-9#ixh)Tf0#P5m-HQS?3Ec|N`5=y0qpCv^m7&UbLH%NMHl~^*UU`rqfx$?
zUDAY<Q0@lFYS5h6B0uUL7QjlJc-5@;9RilqQoy1KWY^(4ghkscXXms*$b-dhDO`{d
zz1o^be(-luj-_FtC64{tyY%bKaD9O9nUGlo-?|x6>`5+IC!qqlLUV1`Z5_Nghm)_8
z)a`qT99P#;6iXO~?Z(c3B@?`pt%eDR%C^$=CwB8WD9y`4dh@y@^Z$_uG3!y#i8}V_
zDBlJ#4VEFK@D#SlJ2cb#fhH5u0TUV`6o+aF*9)U%!mG@xsfOdL5u>LV!<JC&zhJf-
z=eE}|fno401fX3f9G`Yp{r6Fs_h+<0oMJ!=63N9zE-5W$kW>Uy7A}1V*HhAe!8ize
zk3}8}N)j_VyELOA_ObJNdNmu46L4xwN5@<FxnWg~jvD5JK6JM|GLNS;(r~7Sv34L-
zjw3+FhsH9yT53N!HH(g!Xcq0OpF|uA<}XDbNZ8op0daCsJ;=9)P&TV=R9psufsW66
zaRRZ{P$9}96^jz*hjEJoh7Fa+t*%d83o@=mq_V#wW>9QW2swV4&99C2LwSrl9_)om
z+dRhobHXbyfMQMs8?6-=M9Z0_1huwv4E@}TQfxvya@zQ%ZMTtubWIflPQ{<lPgpf)
z>yPv+=p!(E5bKX(4zQElMOVk@qh;}v-FJiUZ~DkXiQ$4f#(q5wHc_nTwou4o4yGKg
zCrstB-)~8akNjC=iF@FC(MZvBFPWqN)6z;?HFDnpO>47>N#?R8Y*<03Jtr!ZT3N8S
ztSuy!W2WDGo#}PNUZ5{(goKB3mE<4(XgS!ctPQ%S0?gdY0p`>_&O(GAYFq5Cr0vy!
zrqTpdv^0GhIy)tUonhqJFwp^-=}3uClbB!kcB@_=-C!a{K7xE4-f=Y}o)6NxQ16PH
zzjI5IsjV+y3-Z$a??$*UPW;o=X-xmY;}4bDAWM$g9r%Y_Qud;XGKg-3&)R>I;!?bh
z@-l&fY)?evATfok5gXu}UrLmO4cufxBspZJ1(KYpQf*)zOm~Bnig7lIE8iWZkxmn&
z3pC65SWP~sm~Khg(@Z*u8EbkCOHs!x(ye7*&Uhmn6Z&NEjhK;deDg3CXyIDq$x(}1
zLBi8YuPK|8A`FiN8d86D6&vbFPPyJR+RvG$XyQ}eQcqW0^VLIHaW;jD(x(}G0Ef<4
z#cKIW+2Qx{)_89B_|~UZtz097o*8C0TwVBYGib~-e1Hhq{z5^iEvIl+WqVXoRP!3%
zYY6(z6A0Tl-WlI?`@debnRq(h9h@smzoqqk{LPiw+_%No7ko?A`oV6#r@%<fzzLqi
zulEgBg2ky`6!Mxw760`sub9WQ_Jz={>n)*7j|U0OLcfV>G^Gkot<TFTn9Lv6)%ioB
z`2@z|52LbqD%kvev~wFyh}lbSZg2&?8%W`THa2_y2!8Xx9N>?L|GH}1X#J{Ht5@>I
zQ@8$?jJn}W`b4UNE$k=SSFECG`k1f0ngqi)eqfDS9gJ~Bz-kxTZ+kury?2Ule+Sg4
z?gN#jTjPHRB;mgjolb83bmJ$QwBRI~tfrZ0fnoWh^N~x((tJtS<jkr_-?&UMOh#rT
z+qHzZl&3ZOlwnAxGl(%^<8_9db!G#&COxt}h<~1Q?CuW}d?&uZsqW_XJDa+;sSFw+
z!KHD)K|MMZNt1Tb#4q<{l%;-fj-vNfe~cmuZYoqy;rFEs_o4Cs3bY+i7vJSW=}x%C
zg99*nmdwd0{s+V&u?a5S?nFf_sDzaFWwJXC4wk;xX&R{Os~b(z4zFtZ*H`0jS=E;_
z@x-m#Y=Je-&LmF+Pm%h=4p0hx)8K~&Pv_`#*9F73lN&CRZ_MA_y=AZ_1z9}4FiNPq
zDrA3AsJHYtTob7GC-=%aW3BqkZ;SYbgMN(7X7?7-84E~l#URXwnBhx!^G1Lwa8o(S
zOL-_4xO)0Wyp^u`u-}KdnD>s?sj-oKF*~s?fhExz4ATQ2)=tY|jRH;%(J&u~f@Ls+
zSzI|bn+zh1^#sBhN33ycwowd9Eg6e~iVM0aMYcJ5JcqN=Ssr`jVuUO63NGBzPP^ES
zbX4-phz7iw`cCHq3;eT^cVJy{!X)KtxNa%>q&sEJLvKK5iXNIbou+$)1KT7=#M3;7
zX^(Y2fJQ2b$Ja4eW{`RB1~?eA#{~-bP2_EB8Z<n@09*{z0pKuI9n?J4`mb`EhYOT!
z7U9s;^!HFt6f4H`dW@d469rOk5+bkL@j>|9>Yq9nZ{ZqdqRrGCfWg2n0+gpZoiVm|
z(Iv4G1AbCqDAYw#)8JE!%`|n`;z8qe41^Nb(~8;pM+&&ffilaTBPW)7Aj&iZ-8h+U
zNXAIncRXinj{Vqiu3K9+BQm;FF-kr~LUJ!h7KOY0gEaA&Xow8v73cNcLnq-Qaycy`
zeM5}jedZ6iFo344rPd4YyGUns=AO<B2bl=gMn1%R8<IwLAvYw8nlR($T$mNzvI<1@
zW6y?dB%&hiQprDsXC_tHb-K;vx*MDAzmW9JIzRr?TZmYYaqP{#x!2fIRr!zyq&ieY
zT4GGpzGT!JD9z;jqsnDyPQ`^ZO<|=r;#ASWI=c1Ls3v-K7Wbl#H?p=fG`J=yPlJmj
z4oO<HHqvPZ0T8M|=tLS*X*^JdNp!;4Y3bz?dpvDTRWAWp!sB@f<~K~Vasp{*i69n=
zhOz^yCpu__Aix!9ul)b`{S*Flf5QKrg*W#_Q|&cEin#oQxAnHJB0lkVSa)0ToB6pF
zz2{#foGu2WR2_}vfu_BGDC{LP!-0qt2+`xuiM{C-km9WuUx4+=2XdA>I@%58s_nHP
zRsifWAOi~af4L6A06)>dhOYKC6zuK!?}&Shdy{v3bvVbXNB3#`-kcpI4nCo7a*Q!#
zya<l{BodQ~;(aR(r7$Wjek=`17(j3F2Va6z-M=myz9GAZGIg^kYnE(>#C-{szx#tp
zr{S2d{+dZ8o>(TCQ-s+H8s|EVjfbMi?NqTgg%&K<RY-ryCa|*J*kISE!BEXKY_8Fl
ze)b0ZH59*2&VFXh+^H*mHlCfo|J1Sl#9vis{Dc#DeDwVwzyJODj&~F?Kr7hzlz&;y
z#qbu6=XusresA`NmnSvRS0*DsYsk5l%*}b;!RvW@75@3;rAUW2FSF;z(|g?M=H~s}
z@a*9H^x5-|D!qMeq3S}|<?_-1q078@AopG*yVHT!A7{zUTY@t;=3_My<>u!2w~rIf
zfrQLQ<fRS!E0CzfAj@en%?gq1f{~S!5eBV{$BY1(-R#gm*5BUD`Tff5i^7JA7(-8e
zRL3gL&p1n<z#x~%AHpe76TZUPRgc5kzJVNtc-@w4!!F!}X$B0^jFmtcVWt<q71E7K
zVX<DjnAIDGda*o?5W9OY7d1Rd%ENajSV42qX@f0ASYTu99hmG37PQ-V@M_2HjS<2`
z24&LL)keones{&+&(hF24S&!Y)<!;ymbQpV(il}YBxSZ{8Ioz&Zr~YqVOa&s0_WDR
z#-RJJKJjM)q(kFq>hw}!1g<^***bH}3HdS~yfFW`p>F=^{YoaYH7Uiaj4w*w&UP37
ziZ1$31k20(=TtxE3GT<u=N@>DIqETvKB)pFvUW}N)TOeReoWo(JO{6*G@}|Os+WoE
z9<do{Yi8!oMS=|bvnN|UEjME1WOUnwlhH3UH;Q2bS2lW}XnP-_%>HKJj|WE|iS%FM
z4wanYJ(svPHDrX(GxNk%ct-YB7qBIKjGOK`ArEj25e7B1q#Gk-nnAh03iul=hd%Ow
zp8$HdCu<-=lkhRj82q4=AP~h2kj3PRp?H=tdif2aq6GQ59F~8<t@dWof!FPm(R>Vg
z*B#R_lD}ih0r8z|$wUIU+=02E1rSTpdPZL1wilR}RUn;x%n3uONKky_CU&)X{bLB4
z;&x}4h=f$TnS(efL+VJuX6H^}qZtV9jV|#2Z-z-KV#K+K?!HxyihN6d19W3mOjkB`
z;;3YgcgoZ6BOS6mz0%n}KLT%(ObBtlJXe7FZBn25p>ebssWsbgy0b!4sHM~%)csht
zhAax2|9PZAO#IZtDDDJE_=V_Y+0kN-QHbWkfR1aPx}qMU(o4smH^aeBS|?WeGVL<r
zRB{nf6;cW?_JsDg0pYNG&KTxJz=NJyRqN%m;v_6+Fr5WvE!Od=1p*4r8bl5VNc^XN
zBXo7>oJ%xSSPTIi8pVJTDdYqGz*F~7wWcMbC$!_w1RM(q51!XTXrBiQX}BV&^SS0H
zRUl_P;nP0OWDTphKi3-6;`mZNT#8bRm}P&Gkh3@~PIGXM3pddu(m=;391DjSgcvTH
zY8mIa`9)=oi8z-dWH@?-P*f?i&+?743&g(u?u-fvqM|ufGXlP#8jQAlJT+7D?)vs^
zS%{R8AC#GCd*!)}toik-d*m<x7fYIxWHy;Oi_v}8!JQ2N4e*BVepp+Fmzvt2ak1J8
zL<kyLpu!og9@Kpm!t?f)HX_D9HD%zvBs(iJR}^!=G3CJ6-8=mtNXfhiHDT37-z<xC
z&#E(luTa7=0F7kv+v0?F8PM-A!F=cetk-qp17qE5J1>|-LK2g?|3>4{Y4V!jgP&Zy
z9Ipbg2;I^h2czXcl1437kcDedx4VW^O7-pM)w?@b85@3MI8d=4>Xy}J273Jz<A&G{
zd`8-QWgd4dWk9@T2~vrwe?<rw`cvhGsGe2-RR+{kQ4CX2V!|LE&W2A9Kbo=krk5GD
z!W5S}%I9+xtzy$GM+IkJXw5gpup2edkiSL)cdmJNbbEc-a_?Q@*;74W{}oGN;ngF&
zG80ZHsr9n-j++_s?luT{<LUU0t!j+HljX9)JZT4#+n1@EC2sa%gy$k<YrD2tAdLE%
z`ODwbu?EV2crjc2Je&*5x!2a(T3u6P>%ovc2)tpH{BxUzPYEsG8Mow$;Z;8V+Rxxa
zklc^J?q-F8w0SbeBD_ESTbn0{Gu1vJ3-JQ6eG8<J)C*fIpeSy}yp{kZ>qpAan(PYp
z7|}^RQKOu`PNhz-A2S&fhmM-AL)}V1#mL0ibqoZ(Y&#$Z4&^RK6E-gfzA#afk;kfr
zd!xSxnKwK)GdzcLHOLl)IBl(|X>C2OA)-JcR<(#1R`DpS`P-8l`mtbOvYuV6PIUT@
zetx=WUD9p*tLe4=IOhjsJO3Y;PT$9D%`srg!B3N^y}R-^|L^(ypMNL(`g3pc=PKl9
zt`FBmr&hwN63cke7m@R!Y_9Wz;pO>|hV5CPac)YZtcyNp?_g~-5tKs5$8dd0DLLG^
zcKgdU?H8~QeiIhwco{J6w(O9CT0x@_b2R^KeP*Y_&_P5NpV!nE7|30f>M3zmhrJDH
zBXUF*-@?udjZwZP6P)T4&@2CVnUgK6BHN2MtV>R6^Gp-MZXNPUJcHTevfCWi#oDC}
zS6^un5$o;7HE0)rm;X&dFDchZH%~*evz&Ml8s<9-mp;94n>5-XzXtYOfq-RiYYq}#
zqh6H7Vk=(Ja2y`B|B+oBrBqDfg%X2Ggg-78<xBP>l72rMh`C>=HzlO*OsAyP7c;uZ
zxi%`qb{91M<A?ZXp%#H?OC(;?Tr>>P)rjz)0qva<q30(b>E9QmiGcpcGDojN;7*tV
zMC7tTM=6=jE^1@_eD`4K0qAJ}kcg~rT6hc)=);?M{+{sNQ4!1yTJz)>D0_&7LDIQ@
zhmso_NNn+n8{#sg(iXpG<%yZbRvdM)+dKrMIjyoZGu}a`bp?*B!40nXQ>E%VzUlhN
zZ!_l|FAju4+d8mdVr{0vxVf!&KWQcd0aP$Vl_0$P_1g3%tid_!%rc@c_~Dx$7u*M|
zmdkKRgt#0-xPRh}s-ih?6s-11{kqX)xvZVtjfH)wJFVA*I@Q^_T|+n2RxhDS?4i=r
z*lF2nrhy&Yn1!R|@ei|c$Y~gbA}a)<`Uyxui9e2zlE+<XGh(Z=Sx_i9&5gN%X*e$z
zu%l37hsz~w*0D0j2+IwDsOr#|q4i}wW!Vi6s(Nl^Vk7^y%6AfLe3tMYLF8M3A?M?;
zq(_b%SUVIhP3#+9*w4#Jx5F0+=eprcS?V;8$?n!s_8>4rqB20{rGREC;JAa*xj1!s
z<ewP>D$NZ8v}6|NKa*=2{xq!oH0?RnTj<txvn<#B<W9TYf5-PMUGwbxyiu+EXv?;P
zJa$9oZ-cykWIFZyEJJ^C+goUh-6uNV0ufVm2D&q8*RP5e#-CubKR$DO{-riH(hj@s
z0lvv~A#H7jFsXt!${q?cB?AFr)RQBNx3GLAd>R$NCcZ_RG_dk9&O+|ubhP??`S87Z
zCTAI~X^ndDX1eDf(ohsjT)!#Z0A;@xhg)WbmI+!Ulv(SnIx!q9;k7IApY&R=T?w%#
z4Zxa6q}DCi@cvyJngZ^lT+bfgrW#zVzxK1zdn)ggcNfazydH7v1nrz&>YY$k4yU-y
zg326PTFO9WG3gWcoOrB}JPg$nEM?X;h@xIhSgNOmJKu|keE41Jh1Rt<yo~7kjK)sn
zt^)CdX5hYRl5}N>(<_EFaDZ>Gl2e;Ai6Xd4j*#j4x75<=tP0g^>sQd_(txjwn4o#M
zO#9pvoa4-3j9?C*(9;~OXCvYI$52h+dkKO#$fHy`UTup;Tj^;JOfSzk)Rp*kL2k?C
z%p>65k#+)>g(`sUeXABziE0``M-YjX)rizQN~OXDiZny<+z#y5TkMyG>WU&b$Sbl8
za0#FTqhdpfk_KRAmo&Mq=f5T{qHIGT8ue4D1SmjBCIZk))?=v}k80#6$lBl_@=Q16
zCZBNN*^3d@<z;9B7bQ4H1MH%><P{9w%y$ui!NgEy#1(p)hr^uR743!X+3Y5_?BYhy
zV|XpB@q)69*XtlTM|I73n5+{$BP}5eW}?geKsj$abrNVqWc3TeVSI|pp6PHlT$psC
z=}Mm)#387y$i9Jk%Apq~!(BjqYJ~qY;&d=_Ufhnym9Ej3JV62$Qr^;<W%x6JiFvnB
zQnv47;!U)o`ZWI++=daWIFeNwFEu0{_FM8`g37?6l95Fe=1nBn7%4GX+8-t<=$R??
z=VkJ4;7>g7;*uqo#EF>dGKk#(_In|iy_+=PXiZhP+ms8+-v~3N2OUp_0m_z!r;-Og
z#czi#y}l+_Kk7mR2#xEc#tB72JfT|eBcUPRVlTEAAatK*YX(E5cpVn*?Bd@x<srPo
zpx9MhU9P3VLS`UqxuAqZkl}1P&Ro#fmtbrc9~Q*pG#^k8LY+^}Q{2CJ3Ez_wadUJs
z?3Csekdfd$>16=-ZyQP3h=i$26FFdHLXxPLJeiA0PDKe|^hYRr$J`oZp(0vpyvGBO
zssUV>G{9=fNX%m-%Y*0;EaDIBBQ%a7o=&1%c*&-a6offZoVga}ICyOOCZ*sFB5J6!
zNZQvsN3yA7X#f46Y?jovx(0c*t>_DMpQO^pF6lmKzS>ydO4UQH6e4i|T02G-6$P5D
zYvl&8^Y|+Fd@s!gadA10y(F>Z4$pLd*;Y<QscV}j25f^W!zBCi0256aci|=jc9tqv
zVPvyl7k;0PIE&gv#Azwqxv86og1fEWJvO=vU}BA=5{IarkIbO%pw}{z8ZBP9KS3<2
zODRN;B(_owEt&x}(^306X*JW8HnwTz7hW47id)1*`hg||2~N_)?;T)3l<fh!or?>H
zE>iJe<5O`Lw(pJyaOrG5#!}uj%40;xOn>}s@P2f1+_@P0Yk-&{l|LqYNuaZjT-udm
zh|aa_DOhQ}HPZ^w=c1BnbuvFWIGbY4z5o)`n!)VKck}dGxYGmzlF`yv-xcQKV_>YH
za%YVIEfC<nMWBU`OLK^l^gTMfT3SpO1w~^cd%6hL<5t^{;|09%Jf5*yds<MPl^TG7
z2x~NXH~(_<kOPL|s70Aa62vp0M%&O@V8x2xk#Rk1xAPP3=ruN&UTGB~YgR2?fH#T}
zk?bberVAJLl`4hIw*yF3>4u!{gw4lD(QJWPV@mab;Mb&90MC^KWhwY$^^hsXby7%j
z5(lUsT7asMZ6;Fu6G|Fh1$r!jI!t~SJ(DhMjoJL;^PU%Hz8uBko7T)yRY6v6apEej
zG&fLs)q`t)V%63#+b$JPK8c`r!LYe;o@s#^a+avu0Tl|JV|#?7vc-Le*6I{U76okA
zXzdaoJjwAyZTo;*(IBs<*=kT%<5m49kHEn<p_cRKxfsnYm&d<Rs?oE@sU?%g%vU|5
zPM&5(zj&bWQ!f&vw&d$<5m`M!>HRNwfu6!MuwxL2wn@J2zu?q>6m{3#sK2ep(;x#{
z)H-r9o^+YC=ulJ(;4F(mShJ{zYA_d2hP~Jh)6&wjqk>aTHIuZXX<F_LSf<j#p<J+;
zL{u%cYRZL{Q(oEBOiZ9{wNfM77J$lGnR8*!UhDN4(|;KUo1qbQ<P1)XR)gnG<?Dzb
zQ{r>*S4C`iig_20l786mQ~u#|hKcvz%F}0LVpc8U@iLne^thDlCW8}S#qzrbk1lkW
zl_x2FTd>6F<-)A%eeJ+t;QeTiJTSt;$|tT?F3ASfq8%L2z;sjp?$KJq=tk`#Ja2@)
z%Hw26{N6{=c<vLVuW=7+Ss!d<nlj7dRpw<Dk?MM#8X(Kk8GC2til~B>kHL$bbo4r)
zN%=$4C&Iha^LSf+4!?JN;fcD{Wq%S5etv(XlV19+u4L~|*M2^JFY1>cx;<l=(OrAQ
zKJ|kgE9gD1C@<J(L+_|}aPnYa)u&k9I`T{VbLT9dH+v&gGKRu}a&SZ(rK<M=H3xoL
zP^6D^HAiaQHg-x)9`IeP_bz8z)GU0iP-(aOEmhRsxJX)KuPLnGBM2*Ui3W+|IRmJ^
z^BBcSS(*U_YN=X<vmr%RDb<RZX{Nv(kq14I#7Z&6Vjor<Nm=bgPF=aN6d+0JQubmG
z+b5~)3O?a#vcy&{S4LU10@MAh!a~#cDVSW4E5)Vmo+zU6N@#W@9VuKbQC-UK?ub>I
z4vX{Gb=K?d-MN5O)wXy9s9EM~M*bM<5EBKjk+I>2D{G^i`UtSeVH=?DtZR=?3z29x
zmwJg7!f_lac_UW~E3X}u6f|rCk75KZn8p;iFKHN+v8Z74+Y<f~K;t>JV)pzrKxZw>
z2|4Q3ZwK*;;DEMq*Q&uR5OOl$veIVwB!`M`!~RSsaEW<TDW5sD+6sliMJZZ(F$qAr
zQ!g!1RS@FqqFpsfCxLS>u}HyuDIpStYD47^IVGP6(uSX8=>#Vh5`-EN1<r4a@i_-^
zYSBomgW7am;U}WvN`-Jz{c`0kqQWTb8glV1Tq~NlLlcKG2u9EDIT@f|$!(FIT<jN(
zV?#1iA>LCa5jCKr2xz5R1w2@f(h@0Wf?!x4b{}&Eg1y|VWaGFF-<wY42hze?%2W?V
zCz_ojNc62}!|TGMN2msythmO__Tk@I*Lo0Slm1m9-rn}wnP9`GO$dfw<$A%^Re~~u
zXq&D1a6FtX4pvW3gg<awzbaT$EYuJKn{$AsD?PZQK}ZdmRtJUS=qJcBElG&T6chuZ
z#yTG{U?58S02;Cf3*L<80oLz%XK7`=qz{qUM+*p_P_C^w&=sV}cKL3~>#q*MUA~~H
zmtEi;&ixP3=fJ)7909i;LeV7aHZmGAc0=W_I9y|E#KL%>u7gn<aaKL@5oot&uohhK
zayLU;Kk>{Wv9(CW?X^$4oTPaSOZ65b^`aZ8oP=ds9?zr9QfHv}(s0ygq4BDtVpr}*
z2lmIAmuvK<4t|U2x|5yQ=eDjow2vJ--;J$O#0r8q)KBOJ+)sG;CINRY%^4M{fSIxa
zMjlK)o-C6JabuNzbqL`Vn>IGu;O`_AX3x1m^Fv*62t{#VP`eg_*A1W|bUP}}5L5{@
zaJ~xU*Y~dhrqi9y(hMA(0fn1SCA<D!J^cAFm=EU7)7q>)+Lkiuxd7yEaCe6b+8IgG
zd)@<1zR_TfIeXmlIl=%zG=J{;*c+HV?}ac4*DlmnZAHiuA(6tmK52G@4UrYyEL-Zy
z3)pVdLEkn%%_bJ^l0|h&`0dnK6Ii-U@~OYBl#t;`wXZCFD789Rh;;%7CqOc09~tQ=
z?YNxi!sdvrQHPnAT9AjXYol>~Pf_}p{pL9DjiPx%dN}9vgnQl9U0OL;Bm?CqsI+@p
zjA^1(vMPmCod(z`A4hg<43t8ZwM1q_Njj3&v0MK9PVwcFT7o{I>-Kd+0N%7X^()gr
zT{!R|<BEsI+L*3NzdGuFt1bM25fpF6b>V{k0EkG$0)jI06v>i1ZmpS=LNW#Hr+vM#
z`mbhm`Z*28M~FFU@pXe-CJzCi*$Npv+lFBDx$w5p@&UT<JgD3eXi^=%$)mE_LEY~7
zEDAS-W2#sV%J~!$N{g`+Ru`03YQ&bx(n+)lAm;bPEBL}gkQ(P=>D^;Y{m=&bYf*(@
zuS54PlKh_Aiy5zm$;Xp`$tXq%Fy_(a?i?aG@IdDU4b75mbCu-R!pLM4o9^MKb07%4
z*le-!*G3pHoow))hs5Zc;9&^gL%6d56b1xsh;y^~r8v4B&cC5?Bqf7+hf+5xR4~tG
z$icbRP9q>bEuKvN;P9YEwzK@p#>8l4SEcq?iFW-BopwjjyR!udW#Bvdxr)-Qu+zTc
zg@cT{VhWGXJ9;5SFC(Z?a`>d&donF6gGlxcQl+BLSXYgWrI@9o6x%3~WoH&ERcH*1
z%6TF%IfGRNir$Qdjg4&G>=mF<W||$0=E4~{F1yIYt5{fdvIT-K4w%?U@y`Gw4}Ruy
zLcIHYEoCA6h%XQ|n%f@Hi_ZRsLhlqOq8I2FG@)eUEd~$`9`)L!vG=@19Dz$~n-2@f
zQ3*!U7(?y=qMXD;wEhBqyeSPE1CM0Q+Xz&ops?L|Si~*_?tjfA_M}Y%v_gkAp03lB
z8eT#G2oVLgityQxTrh#)<>n#Vs9}#5kdBAsA6wJU*CuscOWU?AWOvhqdIp_8UhbXd
z+=o{wdQ``I`<hxAiFeyM8Y3)F$oP=AxwdkI;>oar)<r9c#sDb}Q;;bRGTzZUngkIG
zwXGPtqECy8yhqLdc9kA&;FGDQJnXjgi<YbbDC!Ij26Hrj#S#4<6lB3<`1U}nLJ%`Z
z4>Pq(M!Wrp6LQGuR=X@uzG|XO^`t66-eosMQG9*7eNU7_8mBJ{$<g0;<6Q^IMd&UC
z5qnh?4&J_-Cj-Nck~>M?CHSSEgS5Ecmrnp)j$HlUy|@`|kG*cq0A>+um$zWE$%K$<
zLhbH`x;Pnaz|jVPC^Tq*U`^e)EM&nWF7;KmaQa83r)cBQ4)lf72!qouveqwja16rX
zy41fz=V(;=$>|;<7L?Hqx<K^gg4Fy1S~oEOOk$O<elv)4&zl!Rrt7AXauQNa?L}Mb
zCfZAF8R%3~VnYi7JYFVWBwi4p*R;=)@jsb`ghPl$VHtX4OG@FAC3kFZ1_?g>tt~>R
zx;$&Cvc|82F7R+NK4{m8g6q<H<`-vr6iy($iUh36{VlNx#LK<aahnzx_RO;nE1=XK
z(i7D6w@^%a=+y)vTPD7sn03%9yGiiCuMXpxdraS*<KSnwh8IfK#(Q4n3x3mLurs{|
zJ#D0#@KY76sU;tDttjD%B@&bUn)A4X6nU<ak-rq##+S3R5vFScYY4jHj21M;-4TOK
z?_!@2hEPbd$xj4wuc@<FBA#SC58&`EyTj1cqf{uUfRrqXXc6W{m{dNPbGMrIT|%2t
zA*F0PS#6!M78%HDa_YtyR7R=N53B}q#U)GEi%|-Ff_X!EZfa_0`W#0O7xvh9UX6;K
zV5cv<>Yklo@~anvtKVhaPW1iDr6p#!+@tx0`Q4h8adb6pC@6XyI?Rm}0>TFw9wwkK
z!cN2#8FVmrA~=hX=)jpoIo&a3iC1c;g-u%7-#<g(;$4alc@Uml%P|UyL=_1hmZrnq
zT}ZVSq!n^FrrIto5&bvbf8I|<e|U&Y(kOEi`_)A!ZrKS*Pl|Q2r8Sm<1S#$8IF7Ic
zC(KPq)aIE#ZBGdYN;#Z<f{@C_<zUF+ysv<_t16M|uDX#7N-m;oGS8-!bk}T^8{7q7
z@+Sc#*3zH_XD>ro>~X=_7a{brCrZBXC@8@0e5yKzzVr*VYP-ohf2`Yb<=(&T%f5Vm
zUfzC@S6)Bt6IIVjP2xD(N;>h7*ff_qYV3bk;daK4Pg*%ik*(LFn?DT92C|{=ZuNWT
zwJDIIh#;a+enPmwNDux<2+AA%kciGAi;g7&0m{Rm9UVT}&k5d1tL>9UY!AWOomJdR
zv%{n#*NuChA3+l;NpZ<VHC{)n1f+;;vH3!>4w9vC6S(U0VU_BRj=rpX?_%k7esXx<
z^xxtgUnVdMCU(Bb+V~{5<)v^F9t6w^9Cj_7ixl%H5S<!6m#_yxe1a^t2PsS4;^+;`
z>Fbm@83r&<c%gyyMU(!uZ^t+-j=1gPLEJ(?>{}`{VwQ{?;U`)z^L8?$3{_XUhJ1Kq
zo<b!aGus<9><)MnzKPMem<x1wxtoXhr3fYkBs5^s-5vh?lCh>Ca`6ihEM4R{{$6>0
z8s;g_jkg|l2U7nakH<%+L_XAvnpVU8Glda{bdT-QnuPUya(NIyPnbkX5d9DUDIgVt
zOWlxa-v<RWgsvpGO(KXb(QtF~U<&umv{EnRdNw!<;;%Z_P3LOshecHv?t+xio#${+
zVg`Q|?B*a$mP2mMXJ2*V%2zjxToNj!8uJ){ONTwT9MVafJuv1dI#bNR1`!u$P7s&C
zFTf$8Dbw#)C)SxUj0Ss1N>1PL3C?d1%c2h|;KzDGA6Z@21}7juBZD{V$qsRCG{mfj
z>r4io{!8p(@5rH#*I7L60cIa(YkjG45O??^o_Bo{e^stQRG!Z1HfBx!#U;39i>4o8
z(QNJNEe*+80mDfP2e}7N$hk7q^VAOkH~SkTh1Domj)#8%nt@T_o!?=S1wW?U9BeGz
z>=9tZ3H9bO!9fy^**Wg@Io#$yF5xB-wuuWfrY(@)428YAKu|{MJ7wxvXpCEPENqk$
zTk9{4+&H@;D^nZ2f4kEJ&OTOH85l=@9KymCjqTLJ5vAq2BHVHZ91R?MxrTM_V^&H0
z?%%3yqka?J3bLQO+J0`^)p423jn3aC%%NTwut!QHiP%~bUT9V)_OJ9EWB{`6SN1ND
zcz*vS_b(hQ$|Jr%(Ql{3)S`Rwi`)uswsvttVegJ^d;UBQ&W(S-gXF-R@tr+=Ct$IV
z;TERGoYfeyE&N4a12Tp2+WzzLb@?Mp7M*|ptUvRu@4eBW{Nb-V>bL##0rm28+4?_R
z#BG=J|Bs8P@Bi4>r*LJ5^X=jC+V=hNf^L4SU5G+^uz$JAB*%}STLVFHtM8-~kEp=~
z(3+hr7&IU_uC^FioOu0c#pxFO$LGc`ei1r)b^*St^8IosTIk6zDkA4y#R1G(2kz1i
ziH`MwNIWoVy*5v-q>irZQR+r40BR@_b=l%RRr^V{{E!!>H(ILi1}!Z9E7F4V9k=FR
z8^P@28_I*to-EQZ5fL4DQ{CXQ${ZP0353mxU^80jzg1Ame(SlgQSG`ExaK=zigz05
zs3lNdYP5O{KT=E*NYojyW{E|u`LaX9`Pb9{v%UV932m4N9u?pQwa}p`r9~7Ipy>et
zRpkUpy;o>!<hsLjoC!Zjq$B0m(A7Xr!|(d)d|v?&nuD#&ZD<+?&Kl(7<$^UM2msKb
z(YGAo3=|WB015*jQQF`sEuaQ8rEM7GkR#SI#pcp>Uf2RbL9Va(TFL6te35J{p&~F+
ztR<Qq3J^v^fCxU*TV0rp<h6dYH#zIt#eRJV`tkNzSx}8pLBKr{2>?w|5v7;ogb>r{
zWEBL(V++>AN-evC{7xQ7L7iY#2UN0Xc82t7F+NR59r}<Ob#}Q$f^0bj@E)ch{Q~-8
z41NY`Aq7Rfv2jzWyrY510&QYJeGo%7&9hin6Py+=Jrs+^5JfnI<TaC-+itQ4rw&;(
z1heGSEt8{6;w*;_@i?$8vPfjFF>rP=gS`{hBQ}mj@`&H^ez+E{akyd1W(h^lNTB9(
zgsks!SvjHGSf$GF$;nsV&fPb>CQ|YpSMZZxsBQk`7yf7~uJ-mXY@zjv=jMm!!{67G
zTfn9ZTy=L&@@Be=xDal~pM&=z_p8X?ahdyb<8B<j>NpZ%q>@i-RV<eY6&`n104fo9
z;plK4IsqRh5*~a$a8$=@Oom4Xj7A8|{n^kFR3~^EH0U5vap#q&gEFP$ACLPK!G3(-
zXbhTIxA+jhsn_NiB4ql%)v_gJ-D1hgg!@9Tb7)tn7#FZ`0{&28hxEh&!+_gNN*pkE
zC8%(g=vV^DBl|z2fvz^(2X0>IzU0|;po_rVMW5DMezRx*K}o0}Yp4Lo*@D_pqh627
zH(R6I!*^$rKevwu<Drr!Kxi2i?P<kTHjar3I10TRDbXbKgkps6p$vt{<cRI?6#dYS
zYZPO3Vsv@?wBrLR<&qNdt@>2Kb&K2q%5x;^g@qni7eb3V=!L$sF=EcTveva#mY5L}
zQbf^QwK%$|ikS$p-3u8U7>U~bT5{DWk!nUp((VAb9vBoo%F@M``4oVjWj^7zh`Cc7
z7l-bUM8=eA8)%>Q$v@Uid(#}S)*Dy`nSj#Y`vMSqkK7HO9arOJ1(>e&S#c3zfCKOd
z+Y#FE+t<JChs7cCA7^uS2Io0lITkD5PLXypXEc#C??<%VD=<je5FxtE6+B^jjS5Z9
zn*9pRMcJmwn3htC$&N1P&|>Z}dB2ptCrG0U76pYX@@l(t`VgwgnSD_=htvhbZ-$xS
z%_)Ig;#I}UEl{1)SJSg#VoH;ru#%hEjh!~ePA=F~lo|+p+F+V!&5Ylvy-?2J>;oAO
z^PEA$P_c+|e6?nnqD4=JU#|-6gBNl#Yn%1OxoT-N!<$D}n$mkFZem}{7L@q`_l?Ip
zAl*quGLe+l6p0#|uVs@A`!Xs?$S?m1k35_=GY7}hiHaAVocx{wKCDzZdGQucx9)q~
zqDcw+Q*l&vroNI0-baM=h-fG2*JG>3GQ5{7ECok<2FYfKvr$>miD1a`l|2AP#pK3x
zrWHy<DtTnaoomn>Vb}S^fgeO4=%)3K2%MJm^L2+o2an~(gQC#M>+Ud3hlTZM6p!%N
z#RnJbv-KYxwJFo!sb4D&d@N%KDXB4dQxVigrUS^@p5w{ebl86Mr_yCwOGMOG^E){)
zg|*leS(Mv6Kk`(O_t$kFKC!8BvsMj_*V;3>EYiaEFuV!J+^AVMqjl1qfenV3kzMk>
zd$MNBGL{(oq}HBO5Y<}5De2H|59Q5FHWD!}rQ)s2si$+H*mSUOnOKS?grilo#SAJo
z(B`#8#ZS_cQ76u;Hzt*5`K7bL<=zx_>~?zry9w^SAKN2T`D-gd!itH?wQVEeoMH${
zkZdB9ff6*_+)=PDf91pNre@0ivYi*Vy@UCQy8=>c2i$JtAeJVsC;=Ioc4U&vY<0#K
zsfN@tWh^9}4s}Bq0RfqA@TWGYs0i>S03u2LwHgsgpm<9En$w=78K#{*+_lC{%G?8@
zf?rXx89ksaj0TxtP(C7xl?xKIHL0)?nqU!Bn7|wooSf`XBS+SSK#~=d_(6)$!q^D$
zgkq$oRLK~U<i+dmxUDM`YKdY`gZ=4d?GW%8M}(xtaif+PLNaZM^`e~_31?y<g@=d*
zrlH><JLo9`U=+dsLnID+@MA7k$DnSp8XSi_FD0-8fQ5cIsH+eNXR`3?%FAu<Rr;Yf
z)NFG5se4{_8<l+ZZ)s!ybKvir`mJC0_vd2cL^>c<rz^xzgAwsiE2joYPa=t3o@o@B
zOAPfss?nLD#HW!;x%fV<6HeVdMQdEJvfFsEI2j@}3{FPth8**ChElN=`9p@(r}kc_
zR;gA8gOnH;=mLRzFhii(<t%XuJFjn_-K}W=kDXMzD=>SNrCdDaw+y<?HG<S8reCl3
z2c?^RFJ;4nl<Yf~jQS4bAbvGOh~@MKM7dY6>0=X>%OJW^cj`9o?1MvHJ#a+w#USUC
z*Sf(`cD{4AVuw?0b=<lL)9oBUp(-1KC<lh6^jAhyA7Bl$fUQJFB?LGWZPhXT3Y#Hd
z0XxK-*N@0N?Or`Qb0BN6qHG~?fE;0!q%!%78W_G~A&YF8YUs;K{~qgS^fwq?ah`}F
zAl|+STNFDrwoqOfBFcWf-?d8A^tVyMVP%@vsu#6nGU?`eG;4Z8x815oPw1BjFNsjb
zE1IX~G7xomYfKRXVcLXd5@VBIO4U1!K8~!PJ@Kr*ztK4Y2|&M-Hni$@tKW+=**@2e
z4|EuuGdeXfJcR+R?{@XQGxcS!lIyJ2e<>Aq+-GZwr0#K4h-E`H)y_=$3JbACPZ08r
zm>}PXpltvC(|&M=JpAMK3qa_*yqcaq`HeQd^!3hv=&Km#{BLw>C>M{%qXWFR$-&kr
zuuLMd1<<H#*G46aElkmuK~pAYqGh`?Kv_u-LgNseQbi~?!diB>KXPBZG<i|{(2-JL
z?*e6~G0S5mdvJB)_KO&?#HmIT%#klaU;rS<YMc(p{&`7xtjCmX1hgxbVk3I!KVw6i
z;dJ}gNw|jz1OSz*<?eZwoK!Pf^BpL*+?5n7ZvO(iZZ7h(0@ND--Nzw%dpNz>*wW}h
zb%!=QTi%ELSr1o;Y^2vx^X_v|{cVi8?IHEcGrh>Zo$aR=xHmkhqP>;E`jqA&SV~7>
zsru;@v4c{w;$hP@ZOP1y9zC67wm2|PiA=ZSSeczpLFdlV*Hi5_1H3*%efx21Lc!C5
zI8uym1m@fY`Gmq8Kd*~v^p(ehGs2GzV}HmlB9w+6fnShIPcQ%a#|VM0f0(z_$8(i>
z#5wk>%lgL7$;+h`edB@E*NZ<knGg}&5Zc}@nlK_xczbAS{(&qf4kK;^?SNt_Eh9e&
zPWX|-kS*e?BSmv)^(?{5<P6;7KWLrS0Aodbp`aMv<TBOz!e+?F1%!2S%PDB+&>*S5
zmNx<B`1z!$XW;uZkPtyZA|S&a#zYYE+P7lR;ZKV7gTO==O0B^zdK$Q!+7AjfF>u%)
zyyKJSu&|?|qwm_#Ns-IFmmG|FqvJPjIR@za^cd)=#X?i|L965EK|UTiwCHmZoBYXP
z2j2xeWG_1-#L7D6nWc#&T3AqX1VhKA{5V5J(Xf_9X%qCS)krf7<TWG%xs_LxTK6fb
z#UCVk-+YFDcmzq$_&g`lP-+gH?J5bY9c=gwOA9Psk5Nm)Nk<*biK4k1wh3^8Y*n$H
zEliqNKo3;_&s6}ubU{xys03uOJt?68j?O6CNzlZOR1PYX@y05E7XOx5N)kxS>j0j*
zqUMt|#)t7!fOkzBv{CmXNfe{Wt4s>HQ3)#r8DBJ~tULPgTZPjU5k`tu2=1q(yKkJQ
zp_fCkx>hYVYo>NjQweCB2DVlvymUr^ffa;RPQ*vhDG%yu!lqZYkAtTOB_ku+A}P|A
zq~%g|c5CC6YnPPTrDm~JCAMjTT2%4Os-fQNsWYNPbYtkwnjD=#2biD<w?&n&t9(36
zRB7bq?gYQbH~VKA9;qO@l|>o25YlU*4weKx*NNmQi+<Rv&wbZbS^UV6UiGP}ez<2m
z->58pYd1XaR0i$4qWbn}ite~q!33_TK7Og8`tH;e-f*iv`j!~>=!)+29lw9Og8J_4
zz4t21?zngK-k>eMppoSb?cChG>=a`H#K(Q4OLvFOm$k8e`45^gWTO^FD@6C>Z}MRQ
zvzx3aS?+6dbi;N#aO&L_>A3AmD(^SOxH!;zIQGEAjx*_CY!Cj@V2npZ4sexj!+fw~
zx2U6+A^u`$6yqn74C4ON_>^`#=6xjJe*9^ADY>x49xwcPLdl(C=lczQg9wbrwc=>1
zA^m9}6;EQ$OCCQ0PfP&Gjs{9-PKm)k<s0>F&|ZR`g>^9|q+DxA#f&CVP}SA#QB_x0
z-&3^Vc7(?pRB~?an#AQCdl`{%hcjy;);^?gr7fjoyV&iOa@+yF=0D&4Hs+RPNzcK-
zS*y#M!CO<8XC8SjKcnz`26$ZRwEi)U=TU}d5W9W!MOzf?2i_V)P9#?CuxX{|ER(I(
z2SZ;7mSlk|#2xh;2Bzjw7MK-j4X(?Ye|Q*o;ld=X<4`7lQBc}{t9t6=u9t6&{8lPp
z!?g%r<mO#Kn1y-fpx*MIEtPO&qP^b^V;eu<ctb0If1)LlHk2q~nTihV^d_;#vMl5)
zO3_!GjH)$6qfChIu?hr*CH`l+&lCJl`=n_y?|>829%>*cf8+CTkhJ;iUOZ^(Z3}mH
zvuR&^e`%Xwo)NE10#c}}pJY_7Zpk}S4*TPjZ(6X5_W&4VirQ~`*0@f2@jU~ru#yl9
z%}_uXERROS5|PQvT+PK(+cL;$&*~htBkPzc>SlI<t=};ZBQftdLFBiRH)#r|?xPv~
z;O!op-vP3@1&wTRUURm|!JW%HnL@wOkU|_K(|bwNTqV#&I-B>)=%393n`k6rV#io#
zo|K04NW%oxSqRd!d(^~-nf`NBgXWaB0AwQTPIbsVO=@@=6B?6lgzl90cbW5EtHg>L
z|J>q0R;dgM)!Wf&+iHBSDc2;r2sg(lbK_2CASIBE3G(vJOwaS$E3K6)^2%f`1Jel0
zj$}@|Smh}%r*d05xiwqO#b`P)W*;dW-exOW#W&-jQUU)q#60)l=*fn|#xS4V=eF&p
z+h_gQM5vS<^-(Jj@rZKRv-A#;it326(#`XQK-bQwk~;Lj6QN<2E)%@Z&5yZ2N+H16
zz<l;r(~#>T<pAy*c$~+i<(t~P#V_LL)^x<b9XIxx>Zj?z&O?sx=V4^<D?a=cUf=u^
z9B~8yy-hg_F`aAL$V~HdJXmNqLk0<HFe^=Hq9Xvgx-exJCmpda#WRdhf+kmISPsw!
zh*ApB1AQ;}&PL>$B-sK%gsBUI_{DpkP+Kwq?7o4EpRGh(`mWV_cxX(_Pk`~bkgq=c
z1@SHr%rl`7nq}#9Sjh&^1^`KS?iT!2cU;%F%ooVFYp=!H<N7G?Ap(Z3*A3H=oiMMI
zn~Z1_uG<avU(cqq+~(j49`QA*^ED>Ni%ZUqphkpL`g>|U>?52<=qhYw=K1Xj`v-l&
za04_3jmByWjSI6ZkOHL+v)%+ak8cv`Iv|Lsc%ZrwFWM~us9g;jRtFdV+LyNNw$#l_
zm7?-ZGCSqd{rQGssUNu$^K2jZwv`w}-LNLv*F=<QX-O}Qerj-RJ*&W)9{JI%da4;6
zr<>$gW|gL(4l8z@CWHG@W|KPS3Y8&~^>k-#7I}{uL+0>B0{XkTR(NM54q8<#h^`8~
z;Xja`u$c0JPHDIGt7pC3UUf?ytm1jCa&IqDA`4S36k-J8iIe%HyhcBo^)xEfYVE6v
zW}%N$CSOhJO3xC`2BWR?Ka{%bwXdON_LU0Ey@Cpeo;bfIX&;)2iu&`E6j5{O&~FY5
z_YLWjxUy_F-}-c~ik&Mh&;3ZsC!T}wja`0D;XDHg=ahC7_xLTXPeU~?-f&&o@XxW^
z<oa@MzZEy;GCglBkb28g=Bk44VkvM9Sw(s3mct;^uh4x1^_Cy;o13ZczBAW9hnGKF
z;ji-W&p+X>{{|NEdwwoJeA4ubw9SrNP+U6Z6@?)=8&=bD1xN({Oa{q31V@&%H>8fu
zZ|jrgyHJ9)tWdvQZ$WX0jv|*nEjcrOqsBWW8=TDE5vG?+6`pkyMcx_HB&Uwl!=Ac)
z{L89%1t@B_`vfSy#~fyR5ZT`n1)uQ!;3lf3*_kfj1a$VobhA<0>Yvx}hoRlPE;);n
z^;U>BbuPC3%XfdG{uDeLWid{Qs%&K@WlF<#kRtkB!7!e5gF~mVq`H*K>SO795f;DP
zR@7@J-E1T%bJE$fkOHh}CyX2ZMn+R{WL4D>^voP1%=zj*ou<+xq$vD0;OlahwiqiW
zK5Dp?=Ax!M-Gv?fvXK^A<kX-@!KxegM>I=L{LmREWP2a>#Ps5nc7gk*-1xr$U_hV0
z-f#U1J7(^`J5g{(4$Dm4sHx>7c=Gp^={u{hyRN-IRo#uv&hB}YS2X%EYPwp5NaR>I
z@>+FLwyKf4*G4X_yxdUU@|7<mw^Zftb|O=bNf+$y8CM<c^S|n3_ZiBkw7c9@kHKWm
zi%g0PFPnN?_k3l&WZ7lmuP=es?z&)FTdWSXmyy3fXZfmywRU$iF6X*nPf>B4sadUa
zPggY88r(sm_x0UDlDyX|$tdJl>|m&325T91@JX{rc`7sgLe9Gx;QFjTWTjsE{C^tV
z;r&0avys35=RDm1dpQ69O3(k#_SSHvxD2K}HpBMtIul=Rm>U4Tzwl{Pvt4pD8=Xad
zIE{DC_O`Z80)KEYiKp8N@W{XE(G`r}3)D>3d+s_X`MvJ-WBzhW_fkr!V_F^*4os2a
z<g1UbpqbgXvnV{q3w_c>Na^=U5NF@w$-Ia#7x8P7G^s>JrrHZ&Y0int&=XOpxWb6v
zlQ3-G8|iE%z(v<HvuOc^IQCF540O8`IVs7O7<9fDxeB&YsqT^E@!3^yg2UEKH>H;=
z0qF_{h&)}4!^Jhs<v14YzK1>F?dkHYDg;m8QhRzLVVZVJ3T|$r4w?OZ7O_i+$=ycy
ze=1A@L%K(2Lk5-f8VZ^Shfp{ScV;CJDMiW}?|J|h-`MP<7yI*o1H3&=0d8WOeQ<y~
zyAtV7v(Y+0An4HoFHCwj4)MJN_$(6Ox?6;1R3X)D`CN+XV{JM%Dxhk-!d^p^N1I&w
zZOHXTn+J$<c38T%vSMrr93VtzGO(@$H@Uk?D8(#Uor;yAe)&MZLhk6aZs^!A8ehJc
zX=A!;+R-%E4s{51gu%LCW{)m&UP(qeDmjkp^oFGcDBYE+B0l9>R88S28~O!BC)3ag
zZjDIOAuIeQHY9130Gl=I&Mgjhu0@(FZ4x3TIuL1o_T#Gpt7Uvs&>jq~MXQ{bqt&98
z7g~MwzK~o^rB(6fSP?C#f1`T)igHRie%jq@&}MU=Dpxy~<6hUBbeMue+_D@$KPG(|
zwwP`mV5fqC<x!VNja@PMei2PLRc_~1l-mj_EmuH6TNY{)Ej3ZS^Xa+dTrV^|>dH0x
znksx1#XYC4FDwv=>0}3z-26!eBP)3y(;UFm#)bcmPo5v1oLWcE>{EEQe`3Enety^>
zb^iM|@X7XM2m5E1Vm-5uo;56sRf3TxNZ%$!8SrrGUKQ0eDcK6BYSPcGlIkjWElT>g
zN~$a6b)|AGN=%nhEuicqr00ev6NKuF1T$5YJ?bZx9i{2;Y-r2Oj^>mHTC5n_^q8Sh
z71dQx$Kg~)FGlrb)Kd(aKONo*^coD)Gor;AmeChrn4x2PXii}0qOjP7Tx?Lsrq>Gd
zy$BX=-i1!z{P4qzw}+?OjsyR9wq5vue;#~zj!Mn7+0DTmIUO|@sJYuNe84{sK0H^Y
z=GmJ2k4Rca%>!!QwhJHd&w~%oQ>k@r-4#b5r=!*dYTa!YKH#4RA71z21m)rU2WQPY
zJOAmpPd0PsKVJ9Y{O41g|EPPUhm)Ys>?SDP@O{FYpb2z(=EOHl^x$bo@XxXrNS!?%
z+ne1Cr8aFrmB7EtAUeFpy_?fn=Q=^q4{<lWlL896^u1rZ^s@IQ)4Gtklf*60k*jwV
zP{f3+aP<;8tYn42PClHC#w`}3h#4f3U|;5-FFb5_KK6@c0qQzbt<etS{cBh<ZmjBb
zD5jmZYD0>dSV|-xU}!OBq<~jTFe4P+RWKycTAHbqNlBumV@mF7iIr?Xvu?$#SC3}H
zX$V@VV*kRkFxD9x%XLF)`t!1GxJo58K?gr!N6m4nnpWkesll+)M2cUtxw-hv05ive
z(}JOb>N!TPzg0NqTV*TgN9XF8T-h{o*V0_+^m6A?y`yO->FkZAySaS6k*#U@ImA75
zaW!P9v!0o17UWoo6PY8rngJlUZqjK6ECJ-!a&nnF+0$B25=8!O<w9~bmr7KQI5XvC
z{Fzr?ntb56e7W*znPxy5Prv#>toNy=dz&;1bWE*zmCSpF^lOx$ntP3=-zmtwra^Iw
z(zSUCijqEUb^g*_r4Z&X9h;M{DX;?-_O2Yg4XL5>;>`N8rN;LMx9-o`{ufV%^I$;}
zV2S<D*?6*{+W*{+(|z#&e(?YPDy)9utdaY)oSmG$vd&Ik*l(Y~x<-%92(jQtCMa}p
zf!;3gd|SW(5&+>ieKCGL^!tJQHh*)Af@khKp(Y1NHj+;XS1BC1-IaM*N#P&0;^`e9
zo&5wqfBfle2}$WwRY>C7Nj-P>09|nmG}jEv&*YKF7pGR7)>uovs_2yxcS|ey$!k{R
zDWqmSqMrQ1_RTt$$a%C&p%R~;ycc~fV|AKxne#!Z(?5yZ0qKukUd0y8Uz6m<F=3@4
z0HYjMn)nEL7t!L=yhlmSqpj3UMKUwP)>YteCw9OQfVq@O8V0FLxhNT{bs`u{tkxzh
zQ8req3@fYbr6oeQP%!bX3Q9@s?DP>?-zBg{Ps4r>c70RD#NTN<0<1j<{maRiUi0*$
z$iL+`bq$yfzY?4)Gpbf*v|yR3^CF$-8S7Q7d+BzMwwCN%D^W_fa%4G5@{vaAwb|t<
zNx|6*YF9;0DNoUoa>`R$oGHmpwgRzB77`<K(E_s9GZxjVriL+<p1gXmto!GPuA=`g
zMzo|WFIN4s=3bC$dAD3puoZf!OnGIUHAi%re#>tH8Z9AGoK@Y3sn|hylo-QARhx*x
zw8m7Y+WvB|?MaMwx`Y;-@cpN>FlDq54BdZD3sXu9!O>@s)WVe0Ld)VO%4(4$wScb2
zGQY>#92feYS2tmUDanPQ8?A)=lWKipC1pLW*tm-pmP``C)Njqk(PTIbbPe`hw!yl#
zvuEjfb~-uv7ePD4;w(aSvs^g^z+N8MH3E0Dz@9?+^D$o++kbzL7WewBZvXAL+58_H
zp8Jsh<IA-FKA3(J({J*G9j!k8`DCw>6X@^JDqKJp^Nqak1R|OY9qjebiF2sruIU$e
zL?$+>pC+?7xS9+HLG<&<jx62ZqvMNx%a^@CX))w?pi5u2uO#GbOtd#gFV6N){$>51
zb0duseCAm7^GSuhp7y$~Z)+LB;L4^Om?<BrN!3ldQtu+X2?kPt8E%+7VZEZHsrV(z
z!mKE2;`VACN9{0Ju+1#NlGAJcZPMVzcGghDX2(>68WXSt#ad8mzHR2_@WYF@uMWvL
z2nOUln8FkUQ^+mONOXioGn}3x|2p^sq^;k2VIka&LA-s(-oAskSUP;ex8a2q?!9Xe
zfU)19nV)fKbqvuIFas~JtnDbds#4o7?-^1~7tQNL$mz32L6adp=A*MdVp)Y3qE4}(
zHf$ejN-R9`Y*tYDSV#vzIGzX5Gz2}9kAPVmjxPySCWn*DZS{$N9%r6hPd;RxjQpDt
zr}=_WQ0z%xW%vjO8W!ovpx1ZojgH3Momu56ZUbcLW`29}@9hF7^vr2TctNBMMu<Tb
zMuB7;XSea7>T`cM3l#gqFqr;VOwMQFZ~*f=zd%2lL;>2_q0w-#(I$)oic-EDB)qM6
zxjqiHXzl{|I`LD}Fw5juvS6^8!<;)wJu9l;i`gtQo_vrdltc<tEkT#YKIkei0}TSy
zfx@vrp3<gKgLi7?9%PKqJ<#-_9X3ic5J^`tU?@O|fB|37aZox9LBo?Jx&7tlA$tAr
zGQi5r#^DqdN#2>LYx;;qK1F=ogTP`kqrpS<0)tU%F=iTc%7fqg=kfDY8k`lOOwv5f
z?6On;{VjG*)c+*u!CVey1^_Xcka*+|hi!&_3MyMbJ_Nw}IEZlZW-`Kf5$DhaS(A+X
zOQ;iSb`iY349Kq?+xg9=rTEZO{Y$kpV5QUE_&?YhO7&j7Iryn}`U-ZIhbO(WpZ1R|
zDr#2M7F5*F)-nxT0dr7jrHy65EM-9<tS>ff8B95(N(*S%_+3|uaMP+j$3msUi`$+b
z$1v6MX;vDj)6w;I*+`RG{$&5f=^?cK>FMFgnWZetjRGh(>Y%JbD7Bv!LFVLA0ITbg
zs;w60l2Y@m?CzNLb_Dz+66_sXIk|&g)^Rc|y+62X3!#n$%pEK%vpb5{p=WpZY~m7h
zR&Hppy9R7-e%iEMK<^sSH;U0+djrs)7||WM^;Am~NmrqY-9pnZBn@XrbP>vn-f2^d
zFfbuJF(CAfMLacOq-D?wXAFqhS%jl`SPn(X8WBwh13iKX{;j@%O(VRX{u3j7Uthpe
zGXgJxuV}cK!Fmgl95J0Q(G-~NJZtmRO|n=?Pzec5fcW58`{4g8?7zMKWOyCUCsWwm
zENTlbw*Pva%}zIa|Nlw1^I-pdu>WcZd`F%I<4HIr|Ks;3=Va#{ezec&v$pf!mm@zM
z;)da?v>s=lo&NG%GAR?Sj?PrjKJM4Zau#G$K3dl%rYj?gp?N8McNqehprQcZv5CbD
zJK?w?->)Yv;@q2EJf9Pz*igPE1N|(rY|GZ3By@B?0xdFNCX3fE7|8Jj{C;ntXjrad
zR%e@|F@=SmwN8WMXnHlFd;jOSalW3!;S|F$$U8XfVG=68rN$(`i*IR~5G4U64U-Hq
zPG1Vp0aS2|7dekF{sB!E3{|{u5GIUYjB$jA^T6;nwfkvapUxr~TXQ0SaEqM>{K2uA
zJpR{&_(u+r1>{cbI+;il)An?7jUl8DCLohYv>p5f!SU{p7JEkhwGHqUPA62(&`s)h
zjFJie{(yg24l?!{=G7TG#A0HXHaFCna#K=rP@x?4;Ks+SE0f_23F%F3tatV^25ENw
za>b)Nqt$FZLoEXfjLQXKJ(%GjQr*o`w&8`H5|{B4CH)tw$HO836Fs#aP3fY6R!{<y
zN;sO0T4P+8l9FL5K?s<^(ey+YyAXaTLn@vR5}6$^8yPHj<2U3#^3Pwg(f~BMhVZkj
zhH{;31wYqP*U-dVY}aux8%$mlHn~cfBGWvfssW#ron|GAn0%Z?(&H@J`70f1vHfa=
zV=dgK)<4*yVN4CH4L5tpYYiHrkw+G|lpcyS#Hy1$a-sFJ`HhJhb9;)mz}&7ae6h*?
zV8O{gH`gOxQWlk79$cQ?6?i6M4f%Ajpz!%pA3Y+Pl|y&f#)Zv~$MUn0`b4^)AmJ5v
z03cg&&eQYs*PaA_m<@;0B#1wMX_QrY#j|2n(|G>OG5$T8Zr_n2t~CIc>)&QHw>IAL
zT6b$Hom&enoocNjf>|98mdpWcl_@)oF3nHGpbNha=T*)r(@uQ&^PBybd<pnH%Fo9s
z4A;UFAPQfmfdhzH0p~9b`Zs-Bu72mw6l;0o)OE$t=(-X7qsNmq)m$f0aWk_bJs>=%
zvq(VbQMfQKh_h7^3<D7SG=^bAPKX}`;)HnIg3Uj496FKA#?x?!h_?hD^{?1O8?vf}
zQ!FkDVw7B4J=0B|CmRNwj^u+x8mkWdNW>8$o)Sh@$0i;6SaaDjxQ@di3Wgs;6N^m%
z(ou$af4xnb%}^X}NStCy6_te$lqBI{D*@i-0Z9`hfzS_ufF24rz(^_pSkG_e2Msi=
zPs8ynNHM;lM|UEa%5n5Eh~u*>e{9{_<fb9GRw^wy_vkB)6@HhZ62je@XH&AXL%v0!
zeT(1T(VsT?Pt*&;|A?n*P2bt%9k2n<DUo127>ex5AJ=r$fAGWMETRUfaF-Gcc`-Ql
z*%SPhGe$6=L|6<m6=t4;kJ@X-z2lSK%fr)CcHB*+m>dYn^Q2p_2h?r>jZR2K_zU@K
zu!due&3$mNby=+klcZs*xXv)hybA<KgnYPy1;sR%W>%FR+m#`#JT)cplVIGMI7y{p
zkcUnpuFVTw=RhA<;+v?^lJc%upM?YCV%o+kc84veQQ|>aMa4)E%2kX)pUTBFj)VFd
zM~a-)JZ$r3FoPuP9JYB&m@6tBdw`WK9@mk)MpM__w1$eJA=lTbODj=ezH~3r`%}?^
z73ydwLPxX2#MoG=ie@xReI3f0w)zihn@(+mRX^)>ISSC{aXKM$X;@JogBS-3>0eFa
zU`*l}-M08sBoNHu3AT$*E+c;s42UrKC?gL>;t9F%heLXj8%<`HS3`P2J(`Rs)5$o5
zPBy&dLob~2fyazYX{E6oX&mBNHu+}&ng$=DlNOQz9{%A&?@2fTeGE8;3i`mp2&P)-
zf-_vjTfvQwwp4Lcjt$X@pi2G_CxV|(+T=8#Z}}PUhYugx<6t@r&*?Gn(7*Qk@8RRa
zIKCcS&@axD%{F{Tq-%61^(bZP7Ci{2cm`h4GxI&xp*P|Id#Y<$_ZVxwg6fpDfO(xx
zV%B2Hx6aNYc}S~QgrriCf^hS<(uq@YnUy9_n>g(bc}%2itqGyNAVeo?2WHE7JqHAZ
z^x|G<0U72ZjittR*f7q@w082WOC7lh_2lN%gpWI3@UlWsC7^rI3Ih)zJxl=D76(S-
zZDu!cJPu;iJQWm1G(m#8u>sHbH5R(m6E)JW$l_PeVzs2<rgZ5{n&iR8@nGXf7RzMo
z@hMi!JG{5_a3|?=zmw!vx|39W{#@c_(&NVucaxUAo5UEQ1`J<!CuzaUOsl?mQ{JCG
zzxy{jgHQpudK^^<sF-R>NJ@s+B1mCSxz7tlvh#Mm%SD=7B&AnJTp7xEU&NE4`bMyB
z$$g%U?1h|KH*(m89&cfMyqyEOzJ$Nj6`lTMP(`{DSdn-ouwwZt?otZ~Sc_jr!bJpD
zEaG~%pxnB*pf+pYg7Ox*1@)xnEvTn=d<n{}c?oK>?j<PiPA)-xXPK8f^(NGswkOFp
zlYC#&^dudEXy;FR*{byZ&&6!G>;<5b_)o6eeUgj+?>*fAdAR@caR29SqDL(pDr)54
z^!OHD<Jy;c_JKw!;FNd-`a9iDM|KKCBV&PApdf7vUZez^)Zn`WuDc*EJt)3^PsY#N
zClS4FzZR!Ny>*&~$k6&Na+o5QMT)wT08>$6fT@1v08?29NU0hXT?vYO2}FINPC?90
z<W7R=EE+$%{e_;gkAglZQDHpN7%((+;dy&J@rR)w)0`MtcA2M!@<^Ods15hvZ}vs{
zoBeg|=((jhSz^O+Xl-;#yzZA+n@!ex;}ZE`%dcX~uf0-QMeA7MGG<uC9-^0(oe-oe
z_E&f~P@#j~*Xcmg4i5(d%Nz{IRvp&vn(^f*7S%)--9pE$Tk;_YBhX1_m*gslsP|IV
z#%U!%t=tP!0@eFCTKJp|$KCre9B<S#9B+PRhU4z%VK{zb6-<Sy<v5@8p#W6`ee2k6
zXOB=aNM%_vnx|=q<b}z*9iWBe>6hinXr5-ce27Q(sqFt#mV#xuED)vnA2!`g{IAW8
zhy1?}_J0F`uhag|)8Mi_`ah3P{?Xe%Iev5Wy!SKaNdK{i6Yz_pAAA2iIflQ-uMbc5
z&yG)eM~DCHf!cQXy!YY=LA$+;;=_cWF&I$r_~&2fz|V&i`!Se|f@yS{fAjNnA3Do8
z7`*a_!%4sFG=hh0xeEI4dt#mue(0Yjy@j&(xhYE}VN<8u(rgo4kji#xPW6QJiG%`V
zKj6YjZcV#hx0A`ymfIwL_%-}Z5J2bC`L7$l0X}ub|M2T3e#b}J4JN*~>1&FPM!XM8
zGBSt1saiU3!;T+sbh>O$Jpa}0B;4gF(6_=jcHV~ZGqi4P*KL@fouuaT_Vp~j>Ye-j
z_tuRO-c6`;3kkJ(^rnqR>@5rSB0qEBS(B=YlM5?t!&IdlOmNzq&}lSbA-&E*XA9Qd
z(T)%&7K<TX1Vg$5M#o~Y#Dmc!qF2OlP*J0LL+PpOJSX&{55IPPQ<`cQ8)(PtkpKLL
zSQbF-?ouFnB@thA8qD#Www^36Xt=+BXtRsAzo{K@*Y%L<oWD&7$X&p~Cg0ut@T(QJ
zT(6NKWiZX2;Y9%`W@MfnrUHad5DLm8!wjJSZEZ<IJpGDnDRAV*(u`~zowxcI@=#Z6
zLPvXMSU-%&I~$RT@b7t0DrMi_xSWk+y6zHbfR3kD*r+f{+v4VzZn!2di^lEGGap3-
zA}|l!-xC-<a#bYL&{d)EW`1yS5%xoRxmNx`N1p-=16T&g3}M47c@Sr+^)QO>brh@8
zzvp4B7XB9*$9?S5v0)sGm+3f`tigPca`QhP!Gng6x7E2M!$0UT7<JU8?UOZ&S$ZX&
z4J2y%&6$V{D8eRLA7zllR1g(dj(N#$U6m77-)?>87eUF_cfeA)*01E-f#ujv-FgC+
z)<kJQI+=Xnco((s5-TrmnYgu4s&umAmQYJI$NdZ3M^|guES9$z)<9rAeq0SNsg<Ug
z)5)oX4Qb6yaVk}y2=l=d`!(5r@d$nS48SG!-%i)r$majubiD`r?-ywQbv7RI|0YJ)
zHL>9tPR5t)59oWF-EG}Ai@UK95lw<l+2OcH6U<p6UVts$-!g5E|B8wnV{YtQB}nt$
zE>qmY3+ooYXVuMUXE(JiM$|`>L<Xl5!L_t01YNtF@BJw9Z}C>R?T8%p7}Gj_Z{4<T
zn*3rj{A4>9=A%Xhh@pEq%(~|a&6>V?kNW!ioiQrW@6%dQs#$rWOj<Xj(QJx}A{idh
zf$9CU_>5Xkg+>X6pon)G+VBJa+&?*barEOBj~+$i2i!DS2m;#qSX7!p``cVF0p0?H
zma*4qv_Y8S7Ev|nLm!?_E+c=W5j_jRcOH)8f<{0B7KK5E@R!^MNB}oW4xI$W6KoVj
zS-;><HoJr+M%-n;ERZ()a+8o5g!3?-@SbZ~6dqdjaZ)z-T1sF2oxVc3%iA<Vvr8jG
z)Ar2og4UH4&$x?HHXDyQgAKWtBn@O)R5X1*p7I-wdlsP3Z`K7y>1$*2ox`SVr_=7Z
zYE!1WGaRdYXO_$oJ+>iDJx=lA-qzL?OtgRl<v3LId^g#@qVt6x^(L?hq%LpLZ3N*2
zV{fu#s4`M~HHoIhSk7jr!~MexJZ#o!e=z#lSZWI}R{-h-;W)a>qbB&a+jYI}hU?ks
z*BbaOx(%uU-u2wJ>utDp`ju<z#RNo$s9eDWVbK~20#&P+!Y^7!8quQzSXGLn9j!Vz
z?eCSLDJP)Zg0MDi_E8<}WdG<ndP6s%t;F}1)2UIZmfM9QZTh!*<yvB(+u}<@@7iVM
zSJfS93`3uBl1^41uPMzN3(#n7*c*$WIZjI|7~0uch>E*u<DVrf0TdK&;P;~3aMUWe
ztn<`qJPD&I&)l`pYqS_gfVPrLtI=4>_+TH4GcJv?9FYoCSOB8x(oW%<Vv(zXSY65t
z9AuoVxUE_&ig}NJF2bxbb1DQXP++rJ4Z%tXHJRlTXJyfH_=RMfourd*3-O$2bvDt`
z(GyBk`Ls_ZQWlClQ^mKd5%AzYoBnN5ZQa7JOy35*fqqirDdDP=<yBPv!OIa(G-cH#
z&eU`9sF9Z8u4i853i~Qen<e(Yz#sO0|6@rLV6pwLv*~nl{-2)bKG^@hMEjph_J_eu
za7+F%f%(lJPv&Ijy&tmw|M#tr7M6B2`=Y%!Pu?7zy?AxlJ35Am=;h1fgF=VQVK{<4
ze%{@Z{-q|QxLV)`KA6qUm5<k1J!zok{siwOQQ=%nhQrARya9_6h_{$k3$N1s{J-rh
z__hA{o<jsDKAeMalyTuyy*T&vE}DR5!@ou*r|Pp}Xjq8ZLEK4jiI>zdQ5cmK_J6zx
z_1)8rEy9q7w*-wQ)cF+N0)Mj+teVhW{|KT<!vFhS(`DL@jg{lc9AfB%{VPi!Hj|x;
zQnO0mhfdyM4YW_g{l0HV*C@$3^nv%=0?-kFz+EAr%K)F1QuG910YzwKe>R*l$mHoq
z5vcd)|5a}m{$VlEv)eky&yNZ84|=3O8xC97*r~be1`YWg{5}gK3=d9SJ8agW@qQ2B
z+=|yRf$<xTTQFi^UK!!Mf=&1nv}rIt528yY(;+RIj<&6>v<0AQy)f>LsOtzSb(0Ul
zz`F2<F}o~_t0uqwxo<z?6{tvM8qEUHnR6*8{%D8eiwOq2IGy$Tz_Jhy)sARIu#2G!
z!1-h{B&cuF2hh2Za5g?Mvu5v$AtYRn)OfxK^lm5Qfg(gi0lamMCQRTdNhwQXM>M|K
z6|p1QorW-jXQTZFQACa2hfy?%w#b<!$KsX3|29P2-&{G`pua?^GHsD;JYZxTs?&1N
zQ%9-6=`uAqtzLuERn%a2msjE4UZ-MB^kAR`ze8i%eN~#BbpR^@8LA<n^Irx6oY}=B
z_fa#X5)_*xBQNLEmhkYi)mWDqN`L$>4$A#H=w~_fXLZowM;cB*jqvkg5R9kc^fp<%
zO6dM?hLbvU)dLHzWiXY3#(bvd0ZUxL6(l8ZDRDD4F36Om3un<aT-sHOcssCSJ7%Sb
zyU*15242oXe+Y{`)B;mFV4fGc__RKRF@4{Ee@oZGxLP^X9#5)r<=Sq;cq>?img8ue
zE?;-ENbVp3{m7Od3U_*lcZ|+@Z;sxa9zO5=vVZbo|Jln!D{ega1AXE9|H15LX$PPZ
z|DPw`=9BFHztes2|M^1ie^$S~`QSa2w;<5@`0xm)Us&2+y*@tKKlxYh>}3DoA1{u6
z)O#6~n+Djje}bcZ6?ZQ`MaG{B_dBz(s0yxc@+>7HNCdPmEc8(r&>jMqn5Sws4$(&!
zwv|$QW6rl<VMqR(4H^RmF=DZVu*1=)DKT#K+N@WO{97^%`~mHPFx|4kE?9Pw7*+fa
zSjr%XE?{4Xdd>uP&zK<{^jf4b14aQP29aP|Ds}}zFiKNt4JR=TVCA&4mG1%XJ!CE;
z1)A(fPEXU6#18ucw?U&EWrF#=dUNno?-Z?BhbJtfiA*!iu*7J#<JpLAU>Q-zpqv)5
zl_5c0GU)qtc8!+BX`<FE`l>;r)SxKhD^T8M*Ce>X&2u=1-gf%S0gT;fj0P<K9R0LF
z)~!9<zYl3gqA_~V#Bd&j)Cnh!Z&UBO*utAZ*H8}|qpS5T5mybBD-10~uU}Z`yl!LM
zwjuyoPu50hEQ|4&I5@=^a*W2sdMA=KVVWkX<)v51w(8-dvIwt%_YB)?dnqVDy;2JP
zO_l<@E~J3^CYyS=R548)rp$UQavG}XnRq*!f=Y`15t}ktwvj$U;HL>F5fvZC(8@-7
zv`UROh>G4WvvRdX8J1%EHwJ8!Uos(4t78o7l61|5n!x-R>P@?GjNS)0?M&XMQ)Q<h
zdS8czaODO^K7$s+V+$zoky8L4b(#(>qS1;vQW-@!eZ*mra{C>V=+1|e{(B&A_n`^^
z?j)0g{v`|Y$zU2ah$bDTh$;gr?@t;EqKlP)@q(lnbrt#7bkIUTQKJNYk;0_@T&b`G
zKI!XGYHSyFcBk`QCz&pACOmP<GCl9@y5##}kjF0BBAeR^;RB~EIdq<I4kRTUURWp2
z&MqBj5jEKzz)w1k`kjIcpO{WB&2>kmxO3vz2hJW7rs&N%AyXj!R@v8!Nd#z4L5HQP
za9p8z;eW~SB58o&N?n8Eie9cP{t`p5&SOwIA{O7^=9)KuTEgKk`!C<@pS?If>Kz=v
zIXYwV5to(MrSEF`LC&|LSiFeYy)-rBH?)kRv4(#H3cxqJ4zzj)+STE5U1>1xk;`<Q
zB#-3C#j_M~QF*&i6qRJr9JxXsD;9^z8J*FUHXUr`k~bYG$RqM07mL?)s+eMxZ$o6*
z@@<GF&7&rKuM!(q+_`0XNm3fU@GTcE1z2UxDS^tWxr{?~V2S}sxvG%Qsk$2n7`|?V
zMoH9T=a>t&4^YYRn6$KHJ+_@}Ox3KXe5&Q<vQOrf-g$}6JI|=RTBSEkG~V<Of@-z%
zU*G*E%=%QRO+_ZX$?LTEB%@a$sYVj`I<LgylLC}%ZpZvS2oGB@Q2o4p1zSB#zbd9`
zb@!v0KCf-5VhOsm@av{rTE)3oRs~XK5!WHoisljJ1=$=*6T3~zSz*TsOFB-d?KmOp
zIII(YNqvW=?4?3pyaOpTYV{%ovvyZfv8(r|$L{CUpITqoY!nBVxK3a~VOl+Rees^9
zrB$nkX(`n1X?iNz9#>EQ<@0t8t(T8hldfxs+?<w^-$EK8wOcbIv@D3snJwyIN&FAA
zngK8mhwG2gq<xOIvk8G(T!Q*K0g#BEcv4iVU0erD&xk>q^KdeYhqvT17~|7`;gJA6
z2I4k97ltCZ(j*NEr(@1vP<R%}sWp~4XqL_3N>p0Rq-{P%&0w-?lT{F1U6JNk*{-IX
zn$Tu&PsdAlP!XI`uL*4i@9KEL7$Byje$n_gDTAn1B4zOHotC(vt>b{)TZz1mcUj`5
zwvOTbmB{P3(h@%|I>9`#lr66mE=XII3^*GzM(7P4@wWqO@xGEPd~s?|wai_th#X_C
zI<=1vPAk>@KQ5LJ|5D=r;chq^UN-(qck?0t=NIY!;do!I3x}u)rVZkEEE3&1@Z%8l
z<quIfod#p9(1%I%o)yc~;pc}x?7w+=);l}yogDuY6yyInK3V%)c!37XA_(*!D252e
zgYaT04->JOI)UlpivE0#W>U(1=skTxV7m&X1OrU>)DNjKA6VyH?VrFd6~H#p%>0cd
zJ-m6k+1u<k)+FGA<5#bHKOVg~=$)ROyf`>R9QKAj+&}BRJlsD$v${6m{p~vPFGoK4
z@!$aVC)03*+YRg6)&*70TQMvl!M6Yw4C6pZD&riSh$ZVZ{MP<gBfs~<r>R8wXQjJ2
z7b}c?ejolfrZl8HP)zcvIB{Il4dNbwmum3k3@s^fTP1o3pQ9;}g^sD*DuLhdb2|f}
zP@q8UNfwT^PzL${9~x0|3=*4kY;p=e2f<FJj(da_jyQnuPZ&iE&g}GL>w?*<9zRw5
zoif*<Si(1!{xvB%tmi`TuAT?MIj@*GlP)bGlu>xq%_y{#cS0tt?MXE1B$dnMpB;7Z
zB$a23<4p<}P639a#b5&Dr=X$z&ZZT#&y(~3c|xvj#-Jz>I}D_!LE>;w53Yh`i=r&M
zLM(+u<by|EDYr(Y)RNSZ0`$BxAsJtywbd;YK?0E%H0CLEiv+D3gTQW=vCC|ZdeU7=
znn7@(b=yAfQ>+G>j2+H1w_9f480A;(U=<e=9@95}u4-CL?(P&-Irdy^F>!3=9JJ~(
zVpu{BCeWtuCea?Ly9gQhsIvUXt@ODpKQBPzj`$j6UUUk$G?Fo`TioC~N>0hy9mn+W
zo6Mbplk)5WHOHxF?qpLh`_s~_u^CE$LT5F=%)hCW9JS&C$qq{8<*OvAu4D6~Gu`qv
zH!olF++su}YrvR0S_~#$>0ISJby&!eaBUqdoBrmRC5!%60gD$a%3!jD7|@kviZSn2
zVqT@LU7!qN&^%6-98KZH%aH~_YIf6FG%sC?=A~=Vyz*M4iYhzQf%pa=NA$9IsCXUe
z8Qic<@SNR^{L%@-Rlt`no;ao{MnX=`3y;!}ibUsj81b1xSWgn?*;;7L*`!picHw?K
zgbnmDHDO4rHw$t@CwcfA!G?JXqax-@c?RcEa%jhLU9u|xpa8U{lNQ4mOve8;C0G7D
zATMdSh-cwcw@jxKkUto(@^`WvZfzkr8z7l?a(L2jBwOs1IZ)x!p+kf9Vr%P%p??{Z
zM=5{YQo8^>oA4xL6$u;pS_U~~!bC;eAD->4aXZcN(OK`s(aRS{hrCbV6=hWG94pl&
z-d^kx-{#J$&&Z?m2Kh68x7j{#Y?F_RmFDtJmuekJ$&c>1XiurA-LbPu#@!a*HP$@O
z%3X_M!%EE;wEW}Q^d3dAnG=PXmAe+j6G;^Ogk7lSUM)$iDRjB(18?H~1-u}M77D);
zmCxx&&YEx0eoQ359GsN&f}8`K^g9bGWJ>lKN%32Pv$Iq^U{t2+rO=Jkja`%oZPT}W
ziT`KRja-^b&OtmK+sonR0s9i<T9GrdG;#U1-+=8xvGcM7e{P>Ex&XtaX~t!$ecRvZ
zLz|}m(KJB;GvoJ}k1{zA`u;2qKm>ShKr2_3?Q`2duf@2}JZ`!TJNZ@*{~YRc-mitj
zV)%FsTYNEyiv<OuaO?|{T#?8^ob6&d#J|~fD)Hmx%h5Dt6-wPolyaS-2@?VQk-LSi
zig<3tos}>ULOdC-$wP5Ex`yGkzs_XpRAkw;QVbhuz+Z=R0!-jV7fh-$VD~wFdZ1R3
zu-|7LY6H4f^0kIET{>L)^)PA;TetRjaSO)_YXWG>%EX}QRhg&lzl`YnemflbmsZ10
zzR6`6saNdf#VuP?3SFZVmJ&kOAcT%`3A2E}M$OK&A-aZ_bPcaj3xD6<*G>`eMxI9O
zeRbDaG9%)-p??{WNi+ze6b!a{{i~{sal5l)J2nP&v9k|qRTGv<22V0QKFev!r%hwn
z&=YO=zfT51;;Ts`xzT-Od^MR32lObqa)a^NT70R~KQ?RjkIg0f$EIdEtU6*qK$96B
zZ)5jBA)SMAfl4KSo!^pq7|?);@Y+Z5IyYnPn3fFZ{pNmUHk<qUNp0ydy(tZmsv<KF
zoC*_SGKkROLg}8d(myM6Q*|6wBKXAW+hTeAnC9nEBPwN&6F~pw#|iWs^W)rJL;pbT
zU47BRV4AR98T2k@V@KZ44faTbH{_opnT~@yloE8QC{7_kcPpR^9Y<5TNB2iYeAmit
z1Stmzez&lA4f=(GzL9Rv5(56{JiMHNmX_O!a>v9y@@U_tkCO+_*q#nUdeU)5zmOLG
z_()w;P6HUR!+63XcW!={DN`^V^UY!6pu0b?$+H39h$T-Wd>pao!a!t@|Nfw{yX&a7
zBm-zfVO^r0=i@H%0+_fcrQou{x)lmT(B`um&=<{T7XZDHFvmqtpe3GBttH=l`z*j+
zwt=~b7mp8@oOjmq+Bcb6Yfy2O;<-=npa8nK%t9CXd_nz-!q9FRaP((?YIgX|!-tmR
zn&C|~$srpXP08_DN8I^bsHP@1730|(dnK~VWgs;<7-M4Sf_F7a+2NVO=+$?TgvUnH
zkD1#I9k=0%nccJ`W0?|2AnUWPn?T7!;?TdU|2G~&F6#nb693<I948z9&w26?|No2h
z|Nf5rBM2k%Dul@)AUh-asfPXg!gGF#bC2i9S?bMc6X^l^pzW=3XD%FqFT?l28O`k{
zE2E$Xs^RLSH_xP?Bz)sSQrJCmK$YTlO5S3Tix1>S&u}}3x-vBc;5l+X^5d&^svbV$
z9;jq@mpP?Adc?xN^Ji%sz2qe1#!AAP$Ok01_~2G9{YKV9Z>550%<EgJ6i!egU?mc{
zkx43<PH!ZV)QJ?R-bhp(NmW&w!Er6NWsx85z>agzqQ^@*l#bicB6V!HV@C5F9g;_b
z`sQ{@%&Cg@5G8MphWUJQ9WA?MV3fUw)%}ZI|EbeL5A7iF;#xq-`rq|B+4cWP*V%kn
z|G&iiKOXtXpS=&rPeFJd49U(F{%T**Utg0Nkj>_EFo}Y_wKW#*D&3#Gc`=^8o?^J$
z^a|8Z`=`AVv|%3hP7Z%KJUKi%I6O`C3ZlJb#FrK}oU!ICKBVcgQL%p*cfpd#>r?b#
zPLe1<o|x@-g8hYjxC;AM7`7Gz)Us%+G_Ea3eR7Mo^U-7uLQq|LnaO;Fgt<sa9QAwt
zV1U8l(2Xyk{9Q-Ev`3RJbFe)NP}>84>RX8~CGa)E;8?8Z2)=-cv05k%KT0E005sBR
z$mI`U$*ofUI4ymR<?Sd8{<gcz?aKf8PjOVi1TYPdlW_w?8TY3%*tb)m1_N5-%h@!}
zL{nkDF_J)Sxprxwo(z4Y1|4V`&}`iUc2;(Uc@5Lpxc+(E8-y2_ui6^5#)-QoOMT5^
zs_48n1)i-$TlvWE>aD1=gJ$(Hxuqv5QqroYQKx#2m8YF|*xBgV8yykYlH{`)>dEkk
z1w~|Pu)~dAY5I1F>|(4{rdXfL<dVh8xU52PEoWgYA?nKHir6cq<gJWS*GOrB>bWbc
zp0~2<bq$mr_8oVs|Hu8SS=8h9=!G``W%)m!Y~=L+jm-!9-xsO>qqW0v$^U=?8IYqe
zh=VcNd56FLd*n~A;`T5I265Z(w`cFa_+7vqB>AKxmY$;Wdu4?>q~R{;0UPkANJ~}o
zlQ&ekQ+{nPL`hP&b5D}=8yH*S+>fCRFJ|LDUb^&$;q-QkoMA5jaL_qtRNEX6Xv)tw
z@aE0Bbfny$%uqdN2g~Aze7xLj@~7gExjcy|z5hp&n_$FPS=K1W7yI1As!_c;fHt)w
z$x8+*xSLSr0iNGjM(DFV^qG|jcNtASuqeRzMTAgoHsM#A?=h9;P)Ksfl20%J4jWRk
zKyG9`Lu;>1-|fO4pdcVcqtnRe>eF1T+D{{))18*m3-?fZZ^$~-d7ZpTv)Cz{7}}vg
z(Fmex4Es@-f^f)(<8T^6$NB??d_iOO<2Pdo)EIa%1DDFZQ4{4&9Te7qq@6CUE7TDq
zHWHDMv_VG--48`Gw7+xj8<<kDZD7|o8&2027z71UqnGpGBTb81hT&TK+PxtX(G8S5
zoLdpjNn6Lpg7uB1L3pDgypi%2P{EJ<n{YH6VaV2VkUuOm;Z(%QEUvuA#O2|=)>r<`
ziv-Ow&#9x{NQQhJL?<wq*IDyq<esmSS5a7=DlDf|Se_~@C)2OBm2~RxOP9Jmf-&fi
zCI>WI4=kIYEo_xm<uY3^b7IWoirSaf817FdJxb@J^w}?A@L>lpM)jw|+n19M0nSWv
zKIAGDPs#CgMkyCmgitf#G&u&f2?q78+P_xR$lAJU=iVwkAh$kASw*GkL>)*>CP^Us
z$#{56&TLxsH^l?#7(ma^^DNEHoM(^%EjC#&)Jn6v4Ejd42qqo)(IeKGcR*KG?0S4Q
zK}@EM9{EQsd$)yYHH4)SLqkKq=rMhArzD^gOc}q=!eH71Vpf?F(~l9#q`}a)i2#*R
zm;_yvOl-==Qg36kn99O===KMDChLI;`L=(tXVuFItEKB@=&HV1)G)Efz14_;wQiOK
zcX9LR|9LX-)O~m_wb_Uk$IRu!V8lZ)r|WgRmh%GBG~8Q|gwrXslT_LN<GuG~e1J;!
z|4%l%x&42~+j!XjKkWY>_WyPF|LlC`Mf~#&_RD9<vaLDjJF6R2Fu4`1gz`C)BF@!`
zI>;uu1N0$g+xQ_WN#rM3cY`tL%9m@|y{TKUH#P4}Neef%ncb;&Z>om%XWP$@3ZL}U
z3ze+#WZc4x@zC(>BFpT25c-#Bm%yrAOomt+dK1Ve6cG1YVa$k$G3FEZ#w7_Ny|}`I
zW{&*JaEzytLD~V@Yjt+(s8+E0!M~Q%Am7L}%u0gN+vRCo$bbGrC@M?XM}?E_&3QXc
zQ4^+6Ek_j5BuR-`@If%m!>>#5&`#1@<z353sBw0?@ER%ZAsg%PM`^N;cV;OoH%9%A
z<F57Dn53Alw`zM-MMG3Y3zTWfIx}Vc*q1Fl*>FL1Y&?4DL=_D@QGEkXRNuf8eO?A0
zlgOxbC(2oOOyDNdPE^sf6IC?rMCxuWDmCpys%a-$*tBD+xyJ;h+4*ZRmNV)^75Dp*
zy5EmVjXIIKrH^t(9cfqViuo(5WaufbMSf?ghd0@E_ylq1ww>tSZ9C;{&8N@EXxw3(
z_}8%Oyh$$VLdV7g<Kg(S-ENn6g_W9hOoOO`O(&{h)5*;em2EmvsZGbd7n_dRo?{YW
zkufJ)#F)dP<5jTaTvRpW4DQB`Bi}QuY`GCvDDK2`lWn64cAI<h{|JT`{0MCc15l~|
zx0lWT@x<+T5B8r=VgFHAxjVH4eYJ@|^cghLX278rQ%2Z6QyPV|QTAWIu-AB=z9<;d
z5F40k5T=)y>ivTN*VRcJvV45#XpJ;!Bn}c^2mSB@x35rE+^)7)PC2;${1m372WbrD
zT=IiVJm}h)4`_({dKg~O%dUD&kA<`sh94S+R{5PonpMu)l0!DJ%+L$?a@#ORdg$6%
z0FSDl(8%<noj2Fl0i&kT5!1R`BhRP<3lfUjr=+SkAsF(-V@Q5aaoVU-6QP1HQ4>L;
za!nG1mDWUV6P9Pd<zJgiG$u<&&>VRS$W!H_wM0WOE?Sfep+26^E_=?#cgn3+Z$L)L
z=<^1W-)~WIOd-rA1#M(BRcRh8jX6Rx>4&MQP#3A)tdp98U&wsai+*9obm9Md1<^zD
zm|kJ{kUad`{=kau*c^w#$eb}`63`<OLHegK&1>8+gq{GYCcWw5M(V2c{}4uc*bn_-
zuYWZGb>aQ(0{eh6{om<k^nbVW#C`Ih|9^?+zm7)hZ=>G--xpC3e95|g45E#LUOXAj
zgGdbpk!ZnZcB&D-;JP<kxbg$BcV?-2_bDxa&%0+Br+iO&CZhD8CbSMXshupCP1xE>
zu&^Fzts|VRd-RC5=Z^-J$uPYnLuQ5#^JHGWJlkPMh9+)?!)Ylq1yndQ3KbJC&p8=0
zFM}$~n8^rc1}oskCP)5s7Wu<t45S#2FLz#GG;|${W0oAKI*XNPGrlQp^b(+f!9;(*
z9?(7yn3kb8F)z?+NQs1sk@P*K22Ry-28$FlFqfpm8w5DXg<~Ig-*rT2Ml7kGj95pM
zM!Y)Z4Hhl1rdW&Ga7^9wIvvo)VKJ}YoH=}T8<L;RO_p=>>@Q5xboXI2s2AXCW6cpW
zIj<5s!wCyuu=j97^GneG{|F`*cO3t%+v#`@@!!6Z_-{`itp1u<aAp3Sr!WqWFEjxx
z$_?+iz$~Q^NWKw)MeyXh&?)X4IRuHuiwqZ->uH2>JbF`$O(s&=jfSlU;r9U>>zUU|
zy#Q48(4#9B4G*GaCRe(xl(vrUi!S%8fv4tEj7EMrrx=>RYf9@C8T!mvavTJ-@zqE2
z7D4JRg0!&+(&i#aPZmLXx(L#Diy%3jg|HUt6He7m!6GfXgUX=YiwdB*7|f*uK)6zL
z6@zst+WBNMqz%7O9SNg0qsK;N_|U>m;#Q|Wuv}<sw^O+qNe^LrgS0MbTuMHlXzm+P
zS8R8k1JGyk0hAV#YxXKCd-$NAKj{DE`akqy=xHDBEdJw$(|MAM|L8r~|G!ZC|Fd8`
z38&<L{Ql&e?7YK|_BnmlcD||zkf5@I%!HzP_S4DnKTmtl_MgKFa(rO<81~ov-@mDn
zi)$jV76o`4i2A~AlWPAl{AqMsSV|c_A^tX50{<gf^go8*$5cUtt#{}Cpyv;VlRk#^
zON26wW+0P~27JD3GGN&4e9JRl(QL`^bn;<L61rRDTk<!NQ}Ex+le};T$F~U3|1X1x
zJ%8p8_K}t)2mVf-n0OvieUDIFnq-F}IbTaL);2m%u`tmK<KreYi01w@oChabT3Wd<
zl`=0Eo?=v$3-uH)ltoI8%q^B$y|5NC{5DJjTnY<dYU-a*$!`n!;%O!K$MBjCi+$>n
zVK9gNDs_D4aLme#zV<NLL@{1il2d*^^W&gRgQqgzFPgwopaLj1#5(nclN{^m#X(v!
zOZJw!0y0NKJ_BCh#5#qIblk)1xWkLyCDbtPb?}0n<}r75#+y|@>QykBM7Kq}Q9tNp
zdwteej>WFTouUpiO&TBdruypa^0$`LAUiw6dYY18`+M@3xUyDVLnz{^WR`_)jnZ4X
z(&hSgO{F}oRf?A{<*8K4Q>hf~940xd>qKE|1;p+NFF+0&rMooAZpxffp=N{C)@)#`
z+0&&-K2>W*2Z45mP4hTsWl{+(U*N+Y@(bSojX?8ZIl=BQ{=egF<m3N8#Q*pr&wn;s
z^bqLl+ywF+A9EBwI_{mG?Vr6k;CBhD9T4$L0Q_184o@~Q#~EiZZf#`X^t8d<Bfq%Y
zW)ONzKz{MI3#oJgq+0+YJbWpwx7%qT_!~#(I-Le>Om9rwxRoKj%8=bcNU8|AWbRrw
zP{$jte|co?@dxmat4m!n?@ABhUsvz%jxck#ePcBW-g_nQy9MuEO8>U-y;t(STkzha
z_<t0>?-so8()S<aJF(v!%|>S|dqi?$^+o&w+65Gj)YD#j)%F+9!jq0hqYwm`a!WvX
z6cn#fi5iCn$jVe}qByRM1U~?+0IzE~^hBc&1SHih0dbNtyNwdiq(bFXCD6Lm>xtoQ
zc~ifw-qdfaH}%{4P5rj0j`{1<lG3$AW?V~N9d?9A)^-UTJ7=l_$<LM2bf6ErfG<z~
zr{0e3tL0T=v;OaNy(is_{=c!g;XLU7U!?z6%Ww(gYjFa*7^Bvl-W5E3y?=6g*!wB7
zJ5XL_-u(-fmKVK$93B6&$3b2lKY#P`5S6c^<1^e=JcD2VIy^I^9uwvNc=QH#A7{On
zhx;(QcQNM3qetW$R@i{_dM^$R<R@0I*B|=h%U-WR$zV-J;dt$DvvKm3XHyby{hQ=1
zeofwrFo)B~c|03W@NPjc=#BjAy-a1_937uLKRh{n-g~wG8djp$ua9A`gnJ>FoS-wr
z(*a%lSmrc(5(G52wiKc)bCkjvm%#Dyk1r2@+CTj%Ax%ii-Q{F*ISlwT*t_!ME6!rB
z9z2U1cZbdga(wwF|BT(Z*jrl*#<LNv1a#_ta5NcXAfe}>e~B^vY<e^HeL!z|oXN3@
zT=9xt)KFe^Z9xrVi*Qm;*FJ(K{&=0dNaBgb<a`3X+9#}vJbD0*5IHUliy21uOoYWE
z1oG<?*e5h15e}b!`xG^t()frh>|rnlDl~NAI{7yg;g870(7&WcexJ<7Jnm1n-nO>J
zSe~Z3NSptM=X4*iAHZyYnaLu0JPMuU%D)c85N9=$SwdD1CeUA4_ypiyeu6Y(q<9&K
zoM-SN9J5JKI-U@<wq=fh9+gCY5?ow_{SYJ)WBj#in6$!qFuX-Z9vgT1=-y9`ucwaq
z;@T+$=wbdPOBu{80&5s4j&&>U;v<?wiAbRC*|V2(v9!HMm{cC%6g=gI8ONq9DV#jw
zohgfr6=01IO04pRj#{Siig-IRMVfMp?5)e~v?vL+ZSjDED%flUk{7YKnGCP0)fP9f
z3DQT@gp=A!A;IYJR{8?kKZxc5>hu<qGmmMp&VR8$7VEt6*bY07&TaA`y5^2Sj{#~D
zGv5=1G}$5waeCFmvYhrtQp~!9{=EXcrI2bgk^qFbGht`8?{;Bl@@^;PasmrybV^ea
z!wdN)y(|h;zot+T>z$O4gi#EUVkl7zK|4q&^1JyY31!NV{Ll)aZvMN3AetVeiZ#}h
zJ`;cNuPr~c{dd`rrE|<#n6nv)ZBNI>qct(S8X#@6c?t6nh|sGkPsFn5aCnrR{pc!e
zg}b}&8iLt0tqLe2KfHMR>Trvnh>95xPfaIu)=N&O6P$UW@oQb|XWI#L3}wDPPal8`
zF%a3!6bN8+l6hkf)P*?)rr`J+5<0qJ=bbj@7(gNi{V^vI@<=!TQ?dB(_WZD+u$dDr
zEj&jpvubc^I|bAXEhSb|O*1hP896D0=HKk5QFt|tD|dE`UHZ)Lzkhanj6>-K_I~S~
z9b&u~sbz8~#o)+=G7C1Oc~p7P+Pj{C#&ZrVSjIV(kDXj=CXJ9UD6Q{`%3_lff1B3!
zBM%iJOBFOrs`ywYKb7)jch&1HF)z3rM7I(#rpOAH+-26<g%{a^8qx~ZTx6GAVuhWp
z##*Ld$#9w$p*vwGg&Sl|O_k8)HCH=C`O??A#n&y;OxBt9!$Omn>;lWoQtQUOEEvnL
z6br8t=)<=96@sq{FsEp?VNCg#CQHhLx*lXP<(V5#1S(JS4A?NQCuL<PU3^3xB(*yk
z`c|I2Gc%z=AQOx09;Pk*AQD|MtwK70EOp2f3>f2Z38h2^g+szL6a)b#ykEoL0DeG$
zzXV4g@4p+TNx~nx@k2LWVK<g6sk3oByS|=8(_rvewyGZ9Oe0?yYEc{0nAIOn|Eit;
zy?nJ?1c;LJKgY?P|7~`?&cpfNmw5lnCC`I6i~@2rLrb_%b_Vo!d(3|KKKSqbk#hW}
zBl4A=AExtOI04#$Kax+CF7l7U$;cmv*L%g;$M_LdFlU*!Uis1QGd~(k0gWB2DaUQw
zxrY>gJ5?`8&R=njMZf3k&5!;pj$wa9<K%B~Mc@a_rj%*|G=@-tOJk}dOE+}3bK&qZ
zRFNpbTQu4r)c9g+i#nwucX;JT{*kWvas}90Kfaz#W6Cr?A#0c_Mm=#jJO@R6Km;|F
zt{aV@pALD@kM!{)GMn4V?;FW;F2%k@BgDZK>_P+9>!62YK9At$8uXhOPu@g(CE;x`
z@GcVq<BTUWVK2wYOUAv(FhLiO9!s@k&E{Ow`MWEp4b#IQ#8?M$BLR#!z-^*si6_X^
zl|Q_|o$zCKTN1!COr^GymE0bgStz@XP0<=vFy<5k!4)K~xY0ws<k*5R+Y$58dIu`o
zD6th0IfpvQ`f?#*?#Gqe@zNqtKfJNtHCkcg@jOwE(%0*|GAlxBWAAo|(?$aV`?W_p
zZF%E<3sljDKLe^f+{)yGWjQSh-mv*yq-;`lmQ4!!7~eQk?={>byvE?av88s`9;k7~
zE<5VoK=p36`Hx$?v<cm{wa2<Y$G&NZqEL2|YfOX#dUm72@VEfwUyb7Y3A_0x?6W8b
z4CdXtz1u!)?!wU2chZTmX4sEgi}vHrb{bV0>ovGA^ecHefS&cvUwO|OV{b{ijt&A#
z6=%BA{FsMf7Vgia_vL#VKMH;xL?Jt`jG2nWog?UarFP3qc2Cyj;JdMZvNV|;ktTCQ
zzm#@S%s<(}tf0_M@nc)+Y66gcuIKt|h3@*N+;Lem0nkZsAtfUvt)%C!C*RU?K5BY$
zu^{K4nIP{$j7{44P?nf2)S6imBMnw$RoQB030Y0+-Pm+i#WwJ#KDnlAAa%CV#dzM6
zeZBbxI~UA33FB<kbNN)orV?}%qwJ*$C_0G2MpUn~?wsxMr@c%5Sm>lef8ADVIh%8U
zhRv2_@iPN=V@de1>-jMl<60f?Y&91_glWV`JQ>bt);DzBqxA5ea?53xjEMb|1qN$l
zIHoo}ZDVk(_IJc49;0j^Jdj#|kQ-DQMFI)LyP=X=r|QN{+s8m!7FB>8Nm7YS+LB5m
z)R-IL`l6{HPt-}MTN#F?jnlru=1c9|$|U*NWs8LgoK36(R>ndF3@ds(4r1YKl<+kp
zFo;hv>Np+FGjb838z0JD#bNbfkXV_?^)wuXf3W0yJewK7<hO}C`u?>?h9fH@(8hMs
zZ7(L<xGN^~E40dR)E<HSM!4?H=4wc+G>jht+aez@pg*6W^WrqP#$3zvGwE6AdKde1
z(Lv&HFoUMy<zx0vmVMXlBm*ZD`FN9LByR`gFp7AWwgpy00eCeTXaVwqI>p`c*4F9N
zpUz_77C&f{v#+WU&`DDWg60#XFuS`(vM)x^NY{9P2mRu8(gPdG(HEV>U{d<1nSk{1
zhck0jdgnvxFqx3USrlC?GV3Am*k8&1+oNg7?&|*crt3b*-T(GB9_+sl_diSq;LcO>
z+#iQXVloQ;AU_0AG$A{KOZ;_#Kh!(qc?+^fp8vo98;8;V`@g~v-TEgQo-3H6zo6SF
zxTF9ZG-UfFP2dVjM~`NmXUm{^6@wp~9UQ-UeR6n;X_-#Xj!*V~JnWqu(qKvZXNSGl
zFW>z5;wVe5+1$?OLNQ%&&m^J{m))hgi0{s`ko1_@5ED5Ii_388U#R%f>#n!wd=Ysx
zRgqkP@}s0F1XiR(g)#t7SBL;2VIi)hG6?nk;xMuwSPqanrg1>oNo}p746@OoE8wtf
zwG=P?x}Ey>{lFa#^O)?-N6MV)MSwd-swsd1)v%tnA6rQ%s|KjZ?G97<ijM+>w*h&M
zql&?7S2;kS@T@_lhy}ql-BtyyP0BET8at(t1o+v0ruu5kHm1NzjB?i`bcswKZf#M6
zw;qo%Oy)t~5m@YzlCDyCg~@IZyzlNwhws~FAmx-_*pY-cNK$j*4kFxG$%t-k9;K-8
zL-ixzoMGHAmJ1wr`KYyvt$b><VD5`7))n`wNC_pr70qU-+Aq@G!>cz3KlM&u!8CPv
z(mVTU|Hz`D?4TUUE8*AhH;lJnt50reT1e~-vXf=n1;V(87o_spk91D0bO^xSh#Lq~
zdxrMrxZn}Z8dcbYey~9l--~=2M$tk-{7{!~ly+&uj69{(gcv0*6v$sYj1iK1%9+Xe
z`X|1(t04Drtzs3^KE}+Wusd9<DD{?u-Z!uqrj1{U)3HS+S_Si?+TEGM4Vu?*s%P_v
z+I!{MVdSt`K$-+-5?RIbNI&hk8cZ5E6r?%qabG86UdzVc-EHN9&3ODrK@v>rGKJI5
zV^<z6a#4Asp)#B@h0AvSky~{23EoO8AiLfK%r+XCdntN?DmGcidgk0mwMvp?DphVm
zp}=uYdY7-T1pFR>-|s+a2>V`d;N(hyPVUT84*DEQz2zvzYBU=_A7Rs6XuoUh=6!+_
zL9qTv!DxP1@vV5R(-}<((KkH3N}SgU^6u(?W@bezBlq3iPOfRzl8VT#yV+$x%n+_U
za*BuL#cVjdz3*XJZ6MSe25#E_6LUWAbcv3obiGvi3w1j_1`3g|Oa+O$z8kwL^M5xA
zCNo7W(~*_=K^}pUr)Cv8l#ygXpbRt>Z+en2rasS(q>`>HoujGUcQ$UJ(G5L4f<Z&M
zXoBbJ(Ej+b)7@@1@yosSaM58&RHg4ip8)2diE+d;$5)HCaIzypHyg2Wkk_CL)Z?ep
zG9ZngsG*s^Kg-ckWgcI$^TX^z`^K(kSac)JRho(u*2sS!93&mG&eSY=Hr@CuL4%X6
zR(2cZ)YwHyXzpTY-ePFoYH0ak<x~N3s{nabfVz30vhAM(yFaHQtczLUE5dpJ>s5rs
zZ1Y`dkCW@Br~>0jL^`GA`{-IaWJgYqX}$nBX7|_P1SH>!o%Pt^!hmnWaUu`ag#OOJ
znp7Hvkj;^&0Moyo5yflwKZJ4cI+_e-{V5~S;F45d$qbk|fQqydcJW6s$KBnL^Ei>h
zV~rHpk>02)3Do9-mRhcg#;D<z&HrqXfqAk$n6P@fXqCf>lQYJD=HCmynp7pIRSJw3
zxfJ#@|6Wv7Ht9rjgF%Nb0(4`Uw*&PHka*aJZK*#;g-cL^({dH8ei;^HuVw7<7LK3f
z4B?y}a;Nu%tjO-zPD+Q%YYxl2+*(ok+}K$&fVI)FdHM{nKUZ2YeOfU5F-^d`*(Frc
zl8}=}MSSCB&XiyJOB6&WwyaA7T_Sn>9y+SmSqPtMvjQb)o)x-{v<wy)h!?>M)R)27
zR3{&gm(k6a;b9rn6`+7$tMQmm>k_Bpp|K&@Lfa%_;>7PIlO~4{1&N;~1V*(bmC!0J
zp{174DlWlFt?X8(y}>T9V@0yhLB{Q1&U#N(oI+NOC6B!I%&Yq=IoGesi#GIa&pd!1
zWG+zwAvY_@nMt*o$W~trhXRVty3+J27gECF3ae8jHI7=lB=fQo&qnFuK!1onJnB;A
zbUWExjJgHd>29i??wU0-N3WR~h(1n<KCVyHsQ2!sD)MfLj@(LNTM2A(4Hh+;y<L}i
zln(K$_OvK#*muxGlhgD{&jTEEN3V^>wy%q07PC5XSieYi$+WL%?V$cJ^zoXPq1RJN
ztr5m~wgfS{A8;BAbfzM{3}vGl3HoPzJwY`<W`m6l=_Mb2_rzGs%vtM`vewKrSo1Dg
zOl;b)6UOpp+H^20Ys|sL1(YP_xKTR6C0ld>gUrW{WV^Fc!x}w&0aztwk8}*f;0qia
zXatlOQ+CCOgt&9IF{V#ArAN$D**Zq6Z-q3t5IuCFfrZd6sc@*NFQ2D!rx2joKb}O2
zwA3q&N<Q2?_acNsRo9n2c>fG&rFLBWMMmd*l(8L1x@YVA@l+U3ayV#0qXxrroLp}^
zcQOdfkf<(sMU>n0a7gMf*&?tEoc)2%Mqyc2d0|M0n+pn{D)RK7|Ez{5ptA^1N%Ns8
zGnJ8aDus=cj8A_!Wc3G_%u~||p4`lyGOOI7s6R@veI!G26wCwMD~Ei9o9)Q{)Lqm_
z>(;lL`cv5!D!bx{q%Q!~qi!SF<BdEDAKHi!;#WOJ6}za(cCVXsUBjfShUld2)C2qv
zel%cb=xYm3%&vA~PJ3S>v0syK+*_!HSAQfvFv18--b}pGp-3XZUz2ae6qvY`yG{}i
zIzp;PD6&jM*HrxqtU_lzicut)R!b+D+=|4ykAEIp1wvEnqA>kiqX}zTLL`+XJeQ=A
zoezoxTvQsdC1K~J&7tzr&K9o}VXN4UZT$q2=x?9p#GyG+(`cLtrcN8JL`X})xR1+L
z6p8tRcX+kRYDl!wXUZi}o>Mf8_ph&`$ql)JxoQ~1WO5DV!bA@Jj%qZ727O->N)x+q
zC)mq*=<RK7Ny%05JS-zcI&1PWsi|o@+ZV2***GZh`E)BBJLW;yYL_`;XBRi2fBt}p
zo_P$#bgD>RpGC9$+xo6knFXWKknV^peooZWfx0p0CwvK-yKRnK;T4^_Ybvf$n1F^}
z)61<)JU30p&Yq3m>~z}Bh2C`v?bZ`?puZBES$R5;f>R-p4Fi#!*wcwcd+wf#6qzm2
zT#D3s)e2EvzF3DcL+GxzE~5qTagrBMzAMt5L8|(tDo7S|uhS^-4j{4(2q@AKnC6|*
z@FkJn-PQ3>Qj9X-VNm`MO-5%?cs&fJsqQHDX#9on_f>aV{H3_UtEz0$%RNlOO6kjo
zZ)5Qgx7&$x3Qy$D-Xw!Ofi-%X_h4T^%&SzPmwFE-#KEGtLr~gFaW$df!=0p3+c{5f
z<uZr%Y*T3om_+2?7WdOhcSJI|er<BxgGU!-x7-mYY0ESmp42t0$nM+oM#iHW8x!BC
z?qo2wWTj3t90jB<RkD_=yp;n0D)Utia9x6w8aPAcW8swuslSZcnE_@c;y4d3!!fQ-
z$&+9_C>X=GB%8!YUYk5uX-Bi+6ds2`oHkf_$tWg(!i+nXltbrwIE(Qg-PuaN$sS%u
zcASp)vav9`zy3xnG5N4=citd>itE4U+vH<8-`Il4H%oJq1U(RQ;4A;SNcENV89~yk
zt^3+q|D<}la5=Q>QY7XQs9s)GBqRTtRxVLsY?5d`wz{;@pA!~L8K#5R(}+K#3RUJA
z-6dr}_-aL;CL_IwEf3Gh6SlmRE!EfpKl5yP8n!%@Ew6yBd^ci(YI4q!c4=~nFOB&z
z+gh;l>h9W@ZKLX{{5iDjN~-Xc^@6p9yl`uK;O0EJcZuVA%Nx*&r<ru-TU8e<>Vr8e
ztt)vO+j7EE13crp7)b$=q}fWuS&5BSLNa5O5fS%)34;%RjrEC_&5zHhEk&5N)#~if
z6a|TrIP%yzOEqYX3sO>T*|V$~Cp5i5WlmJ+DxCbOD3g^q0-3AjRNza4Q-!;_e5ox_
zjjt?D318A=Se>_1_}!llZ|k+D9&5GzOO#Qs$Lq1GaF!SsnMY$QC5%xVQcudX2FnX5
z(LJTv$*S9AjmCCfHIIYg1-GH{X%O%Cmbp-uO_lUUXLhROx66yCN{%_wAgyew%+$GT
zt}F(x)0vx#Fw?v%Zd!|Q(^@q*=!>ipA(Qd&mVB5*?_+8YjxlN#Ps7;{;;oW<C-}FS
zaVAq;pGMvC3M`7B3^v_ZW62Xrw2gv;pz0%NRR=-Y!=JPY^65{;seH~FQHmVR8!hi<
z6g%rseo_qn=0!COWf|O++@?ZD-2&7mwyq=A9{PXW%KpGecqNp}>#S(5gV4X6zz*Us
zbDSs`mU~kAOck1ZMP<Nd;uO?S#GssPj+BVyxH!oi4vn+Zqdq2vctd-5U|35Wby~pA
z<wZ_+q$z!-BT7qAR=oM=MbO5x5bV#INl@UUvXtBj`BU5U1tDt8y?)U4<L4P3D@~Ro
z6Lx70^;acTQspb6s<^l)a~(G$<D0rIA*X(&@h<;Jn?gNlVn43^Fj50ssWzoO=%;<@
zD1pYc+9RIupN8+!9bbTz4lJq^V6{BEB}FG#fI`RaEQRmc?vnVOCHePKe97TQL^|T(
z)psauIfImRZmb+t0o(}-KL+10D$qq3#nWvg@Bs@&#)0Di`p`gvu5$NSKXfZz!LBwN
z%J+#SNpX+hwAY`|PESi|a!Ih#n!Kv2Ul&qd7$1*xgtYfar;!jmpG>Ed5k4318~I=f
z3H|{aND@AcQ;3hnYc0Cjk4tzK11%jq%oZ<LjEFI4bu4bJxl&5J3gLf{UJiw#I*nvY
z=2%G`)YncywY4R5Y$D$!l}hN?Qp=^U9oYw>u51X&f6Y04y?{PB#YlA{k-F8StL3`B
zK&_(nJDP=z2HQx8m~X#i>m~wz$nh-j&*QEX(L#02tQF0oiX}aRQtKdG`1?zy;>=NV
z0YIg;*O~7V)}2mcM-F1)80`2T3tCCAxkv*l-H&RwkJ<W-(Loi5&zuuVWglAcg?XfW
zcAF2GJ;I`u_NXL&b$=5MG}6Nky5Pz<s4{gL8c0V6sd~X>?yy_AqR?sIRjyiPAQ(0D
zOixd5Xto$qMA<2&?37cco*yc^ptWVFs1rc5nJpz5Cgq~*WQ^7MI+XzkS6-|}R*vFT
zDGe?b9S!lSlm_=&*3l5JRHM7%b}z|gkKQndt;87te--yp22#CqvTT?YLg=!H@8{tq
z56#}=UL#f-veuTO)7qKvtW4irRZQJQ;?ZW@>h%6}mAvVA&Bnnn7-5WO+$HiLwCk!{
z9q%Ct<lb%vM^TEoL&(u=bQbyHG*%tLwzjT7jl=A4Oux@N@G(cRgcb{LTkORrBGAr8
zSr^k%N4Y;MNB+>9mG`TkmDgC`>$tgLm77Th*b0th3%ZdBuWxx;-Pt8-GvpR-;LgSp
z`DH~MlGy|cJZv?KO`RkNPP1oWfWzy+pDMdMns^6bXe@S28l?}p>2^9$n|?LGWO*or
z?buzmo}?R1WTd|>otCLqd|;Ni8-J&P%cGEQDVS!&f``93dV>+ef7w5IvH$Glp%pdz
zoS96{2}_Oit$4+V{1KQ7rUCQt;uN0R<f(~*TS!5d(84CLp`oW~7{Z41S4ml%(-l*$
zR_h`P8x<(nqMA018BhDl(lkW{%{13U2Q2B}PSU~qb%5`6G~|V}SxfNo)g+oSDbcT1
zgf6P|8;H!%4x6>wp;{4QdvX$7rujr+r-Da(=d(-lU8l1}tdT#S`4~x+2d1RUVkA;j
z!Pme>o51a4My@bdPcWX$F0aVR{;S4THXn^=S$PdcaiL;YMA9+Cvd9u>Zp_I6lLq_f
zv>kVYpPwrwbjwTN!b%3bO_FX~P)@O^9H+B^PQs|@q<PAFulG;RdQX)6g5$D&4RgG`
zne^=P>7~Q?N58YFHMWyXfjVGpHOVty%5gu|#9klCalS)niN9Op$4-aF#HNH4H5{Qa
zbr-uxJn4fvJ7B8rj@wxuD_8NjTk=B!p?k~!gJIh4D*uoB#PPg*{vYol|IZhj|7YWC
z&EH-%1<u*tTJk+N-NR#;jeFt6j&;U!syq@4-m`Gp*xi+WNnt<$7+$=+j$n6+57(%H
z2~+2uJtF&J`yaAB3coJ0Pve?|Y*({#K07LRu%y=I4r8qtxhqK|gp*o%BBA9o`&7*9
z^NF+i6j*NVG+U2d<eT`t__CGkua-^jqlZsJkfQErKw+HzqQ>NqA0}4x&nZ_>rCdGc
z_-ol*D#>}6e2x&)v^_Lssq7`DCLNxlt~lLsI;bp8b*E1iE@ilx)29jtR#}t}1t<<H
z<y{8lst|>i5~d#Fc_FshWE0x&>?`|2;wF@N+{nkX>Z8p1J0IYWF57ZxNjzY!p+Npt
zItoooj<Dot=L-I$C9C(<tCr61a6~0+Bw)(XY?VSNnq5>)0a1{%q@?O~gwBEto48?b
zwL;f6cU__L5XY?2{Sr13uumg&^N1ORgTXMMeh}~EA(MGta7luLGEWTqDoY}Ht{Haq
zdq9S*Y!}eK)`rC~v9BkYeZ9hNWg5ISN2{qj7WKbkidkW5`l1=6jNPnQjl^{zne0o2
zypnIELmWAz;GarnrmFQvE6XAx_^9VtSG#37UWHIUR+zu*FPeE-DXXUbb-mH6C7FGl
z3thLStr<&7lT^+eRFjj%=+OxS)(C&*O00&gOaVNycJf<27SGM5s3u7hY5~`5YM=#+
zNl$Q{ndPb>D>F{YwL&ZB@FlxnG(Jn$e6BQi3RPgbeav2dc50(Uo!Y0yoodI%##;t*
z1H9|ds8rk{AVl%5QxvN#gWt?PattJ!`G-y#WjjX~BcSkEF%3JBr>X%faqv#!c40SF
z2zVLKeltYHBIx*42y`<=m?3J`;&Ev$9xq#q$ECG+yihH&TYWNj49IhbT~A4*M!I&E
ziFISq0y}9|8Jj<pgYK<&RSq>Np5zESUQr}CI`(M>HQI?EyK>++HSQY)RU#{=Al&7Y
zTZzJP0~St7u#w4fNrOt@$>6iwV;0i@fB^WtnzXDx8P5TTg@??<UV7hjOe{*}vx<?F
ziFdkX$a=TCJD!b#n<>m#7;IJqy%6JF_PA@vex&dAI@x5t6^LP=odc@rG#aWqsaGC$
z^{-q<CCH+eol`SzjQ}7=sWb!QIN*t7G!ykM!Rvb2MIcE%Bacr3t~_&!($wVc=IB+4
z_%qcdbe_;syv02l8_^ugq=qWu)>lPTlyRrf8plv1`}DgGU%;2jC#Lk97>SNM(tST>
z{D7zUoK3e?@`qKT3j8w-3t!$zJ({`44rN5y*AX?!B=RTiVmKk)8%N^1zBFp`Rv@}&
z?wykrYt;*a*%+~TwzE^1Yns9|2y!8M>P$h<y=W4>ylQnDWBcOCkNIVjn46zpgU$5<
z6@%@u4MoW>n_5IoQET<5>&pP77xr0py7;fY_4~`tx0)$+1@o<DUZ^tP-a%!W>0?K(
zW2K2rrIdN+tU@Zyk#?fIn|YTDxXPKH;g(KW+oXPJss<S5=TdyOE+ub~w_agGia^63
ztQRJFacy<!_{c&|FV7D1eURAU4{EZrmXUrHOOIRI(xYNTPWgdq)KLp@%8zaPox(|9
zI@Y(+&3PlYGcT|yaR?KN9#8csu+p7<Be$(DD4Ij)P}Yivbp&ky4F^JUa&w*h&KTx|
zMLE$Cv!qxjT7pSFC^>|o*hc<Y7UnzcrWMmy>sDzMOgzkAs^+U*VK;L+n$tpV>=+m3
ziJ#Q5xk_kmO|*(@lzM2B*?~`-MrwhYxVZrHC_ehi$<BrfaD|Zfrh;);849Maob0@3
zD7Zp=&O&R8lIdOvqgoWCUK5^=-_^_C3!;PO;a*UkdqGrWOWg}HQz*I@RE<K_`#*UK
ztGZcKm9p_>QJ%b4q3U%n2UVi^wO<bE))M?emxH>s+M(*@poiNTU(WrH>mVA<@Gksh
z94~MUq~!jG*X?Ym_di^x<2VoZKfb{GACB{N-~U)+AtjPKgJgW(OYFjf9&iE+!uHzV
zSh5hVBAve8KRG?@ot^9-oxVOsZRz0n^}l+rU%vVA#S!^aN21x>{+LkV3aY?=arWZ)
z2uQp+I<wxsKCzEr<@z^W7Ic)DF3R2mwD=0kq7M3-U7*IajUbB1&JOujPjr`ji*L0r
z5$oUMZ$(ieP>1z5D4(WG(P8cG9+3t^`B>+k4ekb@AN68b!9ee4w=m8g;cCrRbZMW*
zWbu9nFDqseUB1soS%8hioW{0Kk8S0<F8PFw{?vY-%Vqgzy8A$ad%IH3Ln)LQy$y4L
z>9@qTg%_XANBNNlP~a%jXckns+LLNNV^|eH<Y-{uIkO#pSBFr8I~N^!baNfC4UK3y
z=1x#dLw7?6yNQa6NHd?t_t?{7(0cOP0Uv7RaKIhID37=UK|zjW^{$Y-0N%%yq5Hq%
zV@NX1ug7GhrpAe6?=po|zW-Btp+}CMC|&De7K0c3M4pb~)XIUjmLe;N9t%w^+RCV?
zR83##LX<Xgi9c1dO3bb$mmR#Fo$=t&a)|Lh@uh720?NHX*$EYv*NbmQy#W>_z5%t~
zfOA}QFhz@?9~kM6TKi_#Xu*LAvPwpX;iLpujz3O`GY%P6Z%`h$o0U=(D^r{%HO-V)
zS|15!cCCi=&lG2nL_<$(IH^M{33M))f1S=*d?+O&4!FGTjb}r837-!navjpClWuJ#
zTyB}W4kbJ5H7+W+mBq7diYQO^$y!R-e2t8+UyGfg1}vgl28B|9v>J;_lyy5ix-CMH
z3b3W|PQE7&*%Cq1XrC8dWc^y=72c#-U?~{YLcXO0zipJ%{(7CN5+${#P}OVCh>F?^
z*6C9=a7$RHkY?7obooJgrIv#(L6atDz{}e(Ma<PF^S)w}0K9eGD5xB*e3OZ=VihGW
zYBrRLm~S%meWk`@6)b7NYqE~=gTJzbtG8%-`XNL;?b)q$U870$#b#?Qz_3ONWYw;#
zHgaiI%2o-vvUFk;H5Lqug{K047l1Ao1h`NL?oaW}x9Zwbmtir>mohBYyhRuuVj($(
zi+G-0->;0BNM)WYi+4D=U+FSo%B%`L*Ou<KciBmo?xJ_uA(!oochvKi?RR(8%Mhzv
zCKWrKPH>f!{e`<E{lev2Sd~PMcTp92Z?JGxXggJ@O4U}8xytsNJ86|<EUvs&7OKiJ
zt&&tlZWZ?bm5vxil~U9ost1Xl*U;-~cR98gUrKAPuKK#wR8xJ?#P?Z!-tr4ozixNF
zqw24q<EPb62Egj7f5)AF1+%fK`ZfB0UA_0yjDC&Ivh2XGt3`jBai7<UmLBq-u;Mkk
zd||~G8|<I3tJkP_VOP&A^Q$u%WEJ<K+OA~6x+6=1wjPG=x{`ILT0Lraz122HVV}Ai
zqr-|DqHsD8HPDAp<WcTShoL=xIHW$Sm^h1_e#*XOsb`stw3E6tY7ZOJ*GSc-q=iJ5
zr=EH}E<`;=zYE?=sGE7$`4%gu*7aSC<8n92jgHAr*3?-NH`%zrk=2N-jY=)i`Y4%T
zB(DJ+kI<WU(kBw-9!u!eyJneSb<w`}?Bns-Rd9liOg441;8812ul(y^Z)@v>Ivn6Z
ztntb$Pj#nrf_Pn2vhf%%IY77fhoK+;5Cs7zw3j$lzz2=A-cXEFtC!5n;NmU?Q$Yq=
z-l_@atM*i_=KsmWw`c_LV*k%>cXP9wjsN{*<H7&)%Z>l-e6{|cI`>Zy=maE59Y5E?
zF$$9DTQCEQm$$us6wnFKVoNmegUK`gD+c~A@fCqq0oh;{=3ojB&*9<wYpU#hxC+K(
z5S-60FGDEz!VibDh}}b=)xn|u%^Jxt(`-_$LhFipaB{eRcGx>ReD(VHWd9`fMK$rf
zZFI`aIH?}~vj6hUexZkI%A)L>swh|JsVcld6JOO7&)}sh6PEl_rN@;#Q&V`=C$)Zv
zaz5{J-ECP1zw~~i)9GyM{1gkXK92RSi#2fu!S(d20?r~HkimF1f-XqCLbVQn?9Imo
zBAUC6)j2fYZ-jEYlS%6hJCxJwYN*MjAC&?;PASxib}k^@5=bc+2h%}Y?1S(TQ5ch>
zz~zzlK6AQxeMEJNq-5mP+%YCasD)A7J$<P>4Pzn0T~aLoi3YQ%(88K-<jOpu%~z?e
zn<nI3(qkESFzOc6lOyrKT++ZGgL#sQnLSa8GY4lOn%<%Tn`(N7=8`(mNJaxT)wFmX
ztV^ndm+4OPEt!Yf%u7A>_0<HQk%EPp!O%a}otDCRNX{z<Lvmg@xh|6HmV+U=ZaKM4
z0Na#e$)OUZO@hg9fYLTggP8&rTfi|C2mN9Q(9qo-x$Eg|@$N1c;<|c!T#q<aLO^P$
zr~r$QRDM^ZuRJU)tdzcqq=_bt8~{`w$4MgXZW<*(P=mK-wgidwLK5pGBw~!0k*5+T
zu`;R>vPNzb2A$lvQdBGvR;NPSih8w&!Nrt@%EI~KvD+x9We&ECyfyNgFs3V&!Rz2P
z1?#%fjGf1#m1(rEquDqZSe?cqBgo44_acgQN0U`DoNAX<b!1ujp;bjv+EE5G8OFtc
zO?DK}?J?WSb1jU7Xo$u1#dMd4yKzwAhQVM<Y`d&)ieoatj)_GXiLJ)Y?#Sa;ptR9p
zay!GcTiXO$0-1HFd4k#_D7Qg88%{}+xa<+HAZ~&&U(F@mD6^9q4%@C$rl$Kj*@)!D
znEb#~BG6SMqe5Mf!|4@n>;Vc{2Jr?L7of<5^I%JnkgP224=mSCgQ4sXaMRktz!>cR
z#&A0Ov9&*Fv-JuOrl{NYGlf{%-41_#v;Pt{zWed|eynyiUbi$-N?JKBg~2)$tD_*Z
zqN7@qFD`5+Xl)jfOIu(-O-0}DR$I-fma4Ajr0R48xXlc1vj%P}gWIZsONvwpve4|u
zYh$F^T$F07h-zz5s-&{!LzbVQ4YJXf*IY??t;*%KR#F}*Qtn9Ex>GVfN(8ltY|6z^
zUjo(DNF}M7j|WyT*R#m4jLvqRLV;S{)$9bi(jdrfP}n5L%qzwUp_G!1TTN9<MPE$i
zFCeV#s>^9_1%y|yBf2vo;~+7U=r(JvMOiwv7o;qKI;#?_%ycYRjiq@>NiEG4;^%fQ
z`L@b|)ahs$<c-F9r@c|q)3i8pc&z+@pHqXFS!ftjuhWnO#-Ajd2#D;ikv%SIj~r>G
zG8N0w;Z!PAJ4Q<iEy7v6&^pzwOhB6ASwoj9t%+nui%&ad67{AX4V`+^j)uUZ)6SB-
z$kWb}TuJyVUOu(GYqWyp(?EDt%V&N*!`-sl;dv6}^u#EeZMIJeL#w7nT^T7N)$?Fg
z3SBH^!cxj{YB8rCmsC8XF~u(>bXnp_-PVB|`|30bTdxG(xLKz|CEbQwVgmG|$iKB3
z-|Tj9Zkvs#Rzcb~&a#vXpl>^W5#9%PDsE$tTAaBP)b)e`eW5a_%E0}}DbI1Hxj$dl
z2X`dxqTzr<XozrCFjdOdp?zx6-B(pgW*DBeRwh?@#8$_>$H|iBXo)hT=dV|%!96!m
z*~pVdx|)SlKe~-+O;_CP+6gr20-v78!II)za6;6TQ8aVhrzq}%<&UPb)y>_}=K5$e
zafIN5W0RBF)EgDJb5^2xOU4Ds+l=(*9gBO^^+oDT#a~AP7$gvqn})kC95oOWsVE6Q
zB)>Eg0V(E619cIkRL40okb&TbW5K^hQK=D@8qGUKGahigJMz{kcUXm8Ve1`GUd=f|
zB7~iH=;jMq*~PNTc3P~oX`A2bg4WIzEWaG2^3sw@%I{Y!Kay)3<!Z}gjpm&BNQ(?Y
z|8fF5n^K29cQFq)uRhwKDh_8|>(04H29BlkIFoZRl~Hglqt9BMjAE4%rj(@}Q*uO>
zz<PG-)1^s@ffm03KM?U~SHa(f@akghD!7?<1#|}Jk0Wpg%t}%S@2smuC%DdfuIS8+
z=nvxw5BpE=3;4Vl{u%jrjdUwF=^1?#v)xp{yX8KO3cObiZ)mK!)AnQ0SyL&nmdm`X
z-i&kCOtlEGpo9#h)Y{oMWlf){m7yPGp$a=iHHJ;X)>c3xd_yK_SJRI0n~!bH8)Hst
z1JgD!ahMqxJaRL=q2ep2j`)O)81&hpib{jp^QfBFAfbqoeK8yN>0MBR-Ld8jGg#3K
zGpISk4E{fR@7~|Gtt1RTf9F%6?Qe4^o0dgNwiDa(StoH@?<P*-OHOvXUHxd0vYF`O
zl9Df{&HH_x&+&Y*cLp~CBzTjRoWzlv+7by22Ebr2m>JB>!(p@pm3WhTO<d)senK)}
z!rFBh+=yM(oV)UT9v|%6c`89y-p^U@?5f=5+feQ0WUgu_bJg0fRx7&?pIucc^|BJI
zlPZPwi(I{poB2NM<@>y-KS|=y7uPzwvaxqI-p=a>$ALFQ#_-qr3jKb>&P4Rl%K9ts
zV?d)foJ!)A84_<U@j9YKAvVL9<!R$@#VoUzQo7~I?dhvg_6470Mp>&2Nb;uXOa4Vx
zx}??I;NVUeG|Z|i%Hw6k*Dqd^^MH7Gg9!PV?_=69%D)7pCj5xV|AgMT=V{_bC&t_X
zvS`7Lr)<)I6GP4b=6eZk%Gm#So$aHV?+jki>(9r*fc2P7rp3(0Il?}UlN?n#-6y#_
zsaA4SP_7uxZFh9Ca!$*<N1hB{vL>KZKn*5$DH}{bh;yNzRR}GTMIaB_%L-vWszW;|
zLG7=6kU20luZlWdUoFE{bF!#23~SXPug)<fDa5N#j;hlTq+?*au;3X}EtI2AcqE=m
zLjtSeQC@D{_l$W}f>XRQIYuWKfw>B}Qk)P{MbymGpoe>^wSM<Y!-T1eqe|(51-V}u
zp{iPAN$LE?&Q~h*jW>3_QmJoqHYVwyQe*XJ+O*#C$+p6%;-;d~XN4-&S4`D$(w0W$
z;hLx4l60&0TUJ>_zjb1)*l&#{X=YR)k|Q7qdcHw9=-CV-Byi4q8jb=gY4boX6>ici
zxqTEFJ@<}%FA6SDh6`x`gIL_T7m+9W7;?g&1S~KFMj%1~m1%eegJIamD<l|@3y-HF
zmc;q&!j$nP9ywN<->OKVhI%}4;R1^3If~)LsC+k7MpX|~gR>=z`>bKlIvQZWh&Z?y
zcz83WpaULs^fHL@CV&FR4w`!i-M}a)vMa9Ml?>(`D<07-(@LBo-F#!U>=w(VLrQ{`
zu|QaWTkmPdO~p<~#nTd(@QJg>j4(OZ;`4%hG4lqzEv+IWcQGpkuOh0GJ{-{;o2e?`
zGYIZ+5KU>HF<t{56;Y8yO5rG{urP60teF2>>vwqP>e-9f0(bMW-4F|LS@AXf4w08<
z`jcQV;M0PT%vo#zh@VmrjrQjN5`6|t`X!hcgQ{sUIjY7}OT|YcmJV&VD61<*IBW3C
zX7fQxh_4V5UotX8TTW58q|xFu#-sp=`5?=5J_V55C6EWhi8nPQFwTeA$v{kRA;nW+
z6a7WPi{uB>v@JTZZl(BNG&$XTvA@dWe>v@~&PF=^7kpaA|9Xh=zrN$#uc9ZjaX4ji
zE<nJx&I1y7aX7pqH&Hm924n2d%_QO`L>77L#lg?}XD?5Cr>}d*um90|wf`Tlk01RM
z4p8DKDlr?El?cXuI9P@z<Dtn$FrOHhOg`DAOJSWc^XHQ&*!!M>mSv^vH$xbq*L2}2
zrzvW{>(NMG@H>j&?HKk#o-8$0=dnCo!HFXr+_8J^u*<AXCu^*RKN<fG7L<1#ke5_d
z{VbfC!h$6(@}BRr_IJ}5@+7s-@n!0rng#6#B`)eT^1^AndmIdQP1RsW7SY5WC-^l;
zV@`NB*#|y54*ZIMbMd-Q<pha5C`NjCw)cp0m)A$9y~CrIherpTCcNPgWd{&0vCXW&
z1-&Jn&Evy8BahEv$z+`AyykR5&x=;WK2JWgbvqY2E~iRFDx1t#qW1V-fm^!@ZkU1B
z8~a}5KL=qLWzN9IJrQ1Ikxu5`Yld-B!AhqcE$PzSv)@r>pU0Q`SI+)M&IqSayYK94
zjq=l?iOlpLG%2(9?+4OW?zC*DrNzU*E1~RQZ#cyy#|HZI?EWCrSU#@{@_7?zE{LuJ
zi;eR8aC@7wAa6Xio~!myjKiS>jWpo$SdLZSVixLW!%6>R4twE6&#iqM@UuzVZY$u6
zYVM!+c_mTDL?`;sHAV^Jx)*I|3Wr*Sn4Z1sXdZ}qU-prz_E+|)p65}1h#Hk)@562Y
zlR@)#(kN(^D#4+3zt5}DE2wFW>10f<W)b?eRKmm6@TgLwUAqRfp?l(JUlEd1z3l}-
za&ai>NY?0g%^J|g>b~u+&`LGBSih2MJ+sNPdLvo42DG7%{j>DgS0BkmL4pxpL29Jr
z*seap=}O5C{=@TvExZuubDKO*#uK&4xf0oa#QADQgSsEB?ni*z>VCy`fh9!sm!n=K
z7gtrSNuEX4v&u+4i&8Tj%|FG@BCKDNR4ca6yIvrp!ep^susV;9kV!`i=F&P)&7M%g
zdX+nzu+vhenR+w_P;?x1DJ=G>-jrTLXRiA->!hai`W_^89p%@rJOt+`+Mt<;iZ3`6
z`V&sE)Hr`RnGJnZL$!znFeZKK!!h-uX}mrj{}bq8c-N>VIdtY2y-s0|%j<8<(dNcI
z+w9zv$~Fq?!=zM00iC582=>V!a4celZ&Yi_XfR8TKl2*P@^niEj#yycsstr%r75sf
zC1*P#Jnqsar{X5HgG7PhsuGl3#7hiDmFz$9WOyA!&CAbX|LJTvo9(py$L+M2_MZo6
z|1qY1X`%t>-v?0;JUpY1Dd$O%X(4$R11^C#+B15(&;_ZuI2~Wc$~PF@IJT3QG_)=E
zi=k_|hh6JkU@?pNlZV#<1NzKE-+uaW?=dSK2}KxVlC%Ug2(Qn@8`|Pya$)nv$m3ha
z&BK9}iDGj;nGE+>l;1yvgWx~dWjSe}fpL!}vx`e{ZwQl8ATC&rHnvA=;<Xgco%|qz
zPWTu&Rx4<o+ZPS8$`Rw2G;Vp&VVm5Fn8V__YA~dmqQuM(k>d(HFwQX!9?bu(#Bo24
z!;3KnLf&Pq*mL8WyaKGHbKK{p0G_h#{?jjJ|1qgH`$c17omZVSK4B%mcD|l*R3(l&
zqxl*7Cmiw8j}W6rieXO?1j6x!#h|A9m1=Z*JMr*)O!h@70Il>@An=&@``R;F%s<ad
zeUxc|q`dHCQFw016_isfKYx<&S_Hg=qpQE)gp*SoTuz2CA8#W$8$NUFW-J6N+%jpO
z((buRyX-a8$4hzVT#V#%jIugxMV7dRg?D=Rt8hG+fZX{*mz>P{eV`USV!Cc6J~%#p
zeN6t6mmVkxDx$+w+GOCN02%(1@iw%;2R^Z=AsGiJ^!aQznmA6-D#C1_L0OAjkwUGT
zpe902q?TwxxcBMYKlYE04v&7x>)tluBs)*XBAUEXMW1&S=POCHd(M*V=Cn`a(8C<_
zLpJyK7)|_e5C;AaKWgsRz~Yqry@U5QJxE=_1{9)OS&|}2UirP%9cxLJD~UA<OYRo_
z>FwoB8taihbktX4#-rzZ!@GY5Z@eio_jN%+0oi|ZnCj8jF;?;}LT0?gomy<0cT4Y&
zx+L{}%azIB%4+gOl~FdXznWv6yR4SeoSJ4T)4%er=)+eSlfgInYt!A}^%cEsp2~i<
zKVr&|*b=H6{z(n_!%yoTdVwmnSN5PTwRY04Z>AB|ofMT0>7CEK(iB@-jyh_vB05#d
zZDh1SqJ)wAQN0w6yO_@=NL(7uOaau{mBIzN9a&Js_og0>m!HUX{;j@2Vl`bZ*dymq
z`8=t7o>WfluD7Gi$`~zVX($6=UIC*;E`d?4HIM||fn60rPbU)s1uozW>rVg|5O;#x
zY2-2CZkq|}f~a@#=7x4QqYDRbeuv&rkc15IB(`NH<|H$bnh3&0Ha-%rWRa<faB~F+
zmCLy)Ha9o3_@u|*XSWWiR{-&1HHQpFS%3*M<XvGM$JS)u>6cXZWm%*XAt4%B6+U9R
z`j*|Y<-(B5B2Cdic1iJMEhI}JJxDsG`U_#!i<*JixG&9{Ky$#kVr|Ir{>wgwz}c4>
zZd~bHV(nvqHN{P0Uv({MtfJCdsIMsh%$NT6WX>7c!8AWj^3PBb$lHP!H8R2N6~A48
zq)CTHmy31uTFs3WG}bphBbMfsozt>gVfDJThp3i<q~J)~ivn+mn`#g*@N`>PXD}NM
z^?G#s{Gtb54AH@hCLe|AIsf4LDCdw`c8X)P`X!I2RSu`09_32VbM~p^RBF$EDoxI$
zpM-;eoIzj@J-}dsE@Kv@u9e%hbIfuTjjYD{IsdSudX!+06;W%qOY#;RG3^mTDgu!j
z*eq@hVV@={o5HR@J<XV8LAS%0@?adh7J{M=$r_wT1$wj;`%Lz75h)+>!DKe3%DI_H
zN(WojAc%tTlnkQDi0){>Vy;7<rhTKY`e+gj_$bqn{Zxgg2<?@LvG@U+Cc-hgINpb&
zs2Z}*#T*v7o=Y_{<`tbcc4pE~a!JDkz6hpF8>bGr*@KCdwxt$TDNi-1_ndR@JpPRH
z&Q2b5w4tG$?}8FiBU+gnpq`bch$<uj^<HGCq8?eGi^D}QMypyNrHm^duZf-ygOMZ(
zyOa<L-(Pw|jMAkm(6Q~2cZ&iq3_=?Za-Ub6FekN_u}qbV{#0r}{9=ZjIhnF!bM)D@
zb#_mkhZw_cjZzpCX;@F1>lT7q!Cx#g0MDm}XM|VuolP4{ha4b&>g}WAz-Im{o4n#N
z&6F5stRnT;CoANY9&yaa=RUshQ*YoOojN?V;GRe^k!ZqpDdy(v>gA-{wP=Wtzi7Y=
zZZxCS#SutazZlTm5IibWJlw#be>}VLp{Vsr;KkMy9TiIU;(j6ZL^(d#f7yG!e{yhA
z5PE^%5u~$(wshcE7s0g00zD3>orF6BsYT#*kG+Imr5fAeLWDvuVcd(MQ5jo7yB01y
zBe%DyMvVORSJj&QLU~e^jkbauMng%ytrB)+lBwd9{_I&lu5+O4Kk6n*t*2FC;n9Qx
z;xm-g#CcMOGQJ!2rjs7~M!l`z2^(=3MEKn8p4^Q_0S4P?2yj2jN94yHwd9)@4WimY
zpyZnutc-Z@qj}qW!3>mpI&zc-d)QTRGtlIHAnKCGtU`uhm}dlu=^hx<8;)XcV&DKY
zmz-(z(AXwa!rf-~BY9c+$Wn(jj+^n6KDtkrGG^eUKkF;3{K`7(lQxLrB5vMhqusPr
z_LV%NY-TTzkMPX!k4ZH2`8$WYEPXYm*5XhqGDri__3}A!Sh#<jSn;iFH7GIfj#gI7
z*;7L=@9>yBLO2j75tIt}wgp=_<>Q$hi$0N|B+EP~m2I|25)qUlTD7SRk!-+1|D++z
z+_^4MAsWLGDwYjKmofTm7i{g$ri>_BVPEI)=+^GY(55&_&)B9Z3S@&`pXbE9mfLJ<
zbd3NDMFPB^ViXA&me4N2yI{|FT0T><0~8R{_l6tcM&}D<*6`=tCUn})fmItkNK1h5
zdA}bJjtLG50`Z?U=EvlIKsYAPcz5oF$^C$EO!@*y{~nkafQY5Z0%xOxKK$wjB6L95
z?Y4BciOiCqCI@9fkkbB~+nh#J<*P6?{TxQ|)JAEKatSWi<Vlb$HW{k5QMeIIFy^rz
z8F!G{AKE>B2F=QHa$7l?jZWD$d%j$mdei3cgjkn4t(rv8w8)yMpM*&g-@WppkHJ)g
z;}vCSSY8Q?SD=hL9j8~uRA#YyLE_@7Nszk!e5Bq8hPMoz#6-kRc1he~p<vtXW_I`_
z^6#HYEYCE*ghqh>sV(SbxYH9BgHL+7H*jE{O{w6;H^OW*BYoSw8qbD96@+#s${N_U
z=2C|w95ll*NrS9fjPOTF<a+y6k533LH@G47>fPQJu^pS!n=qaR{<Axa_FC(T@{u1#
zW>YG-9v=Ptn*2rA2+}7LTy^1|(+m@Ixr&9aBvzi$4bvkVqPSA|n5mw{fYWEU{1F>@
zrhw+krxHDNv)N>bVpl+%)n>Sy@2N(Z>@K&@46Bj_Dx)TZag@TPBA*N8{0xzzANr23
znZ1@iI>w3MGeBEdY!8zy*wn*MkBlVyXBpv~d<?HhiYWf)#|&Ap=rBMA(np|?H-iF`
zPYU<{i6g~=E}R{mogBRAz1u%N+<*4+z{)V99dXF{RR5ThuUGxbbX@n5@as$KZ7Dof
zJ{{zmDbn(s@X;u(An_r!eAW}4DLV#cdjH4Fi-NS`v|Mjmq7!A0Cd|7F9%ZEX&q7a+
zL~7s9tN)~=kfU7mjnB)ppZk;`_;BT_3V{XB`cokG**NII&M_N}DY>yekMI#zRo`1Y
zJ@RDOAls~nMZf%s%tg{qf9S=r)G)$UFqsSi9VT{J4F<&~5}0(er!^io_}4!bvjG-E
z1BOHkC>BHJ*5XNwI2WaTv01-p6EA{6*bmWQ*(C!HIPB-<aK(C-0xy~BT9;xVUseZG
zLt7!Z_B|Cc+R86sa|#zWsAKkF@BSF5F(5vZ`lCSAGPn9>ewE6zoNmvjYDtM|6%M}q
z<U3DoMWZoKbwv#^Pjy9i@O7yUC=N-;Ts`vp)>9XW3b&M5<P8Buv{w;dGDeXups_9R
zPI1o1kLhyTa~iTAa)-!W^er3BbXafANsgTRO}cAY+TMj5RjzAbZf01`gl?nCB7kmq
z!^1e91xvlkLiH*+nv?o^ljN*bt~05oF?rZW{ZjsvbIw;z!0EB7b-*t?-p_To7e`;^
z2>xgBWS;!z;hRMszZY@nu6^oea>=)G@UC|R&Of6o@}5cJ<J)HiKbE!-HQ>}02UA4^
zvz4urmp~Z`%JBr@=!Il8c3aIhu%}=M+yr(ec^das_QfkI{EWlEU0hUlY^?}~v(KF7
zlrVzgg;Cb3tUBO`ewvfqihbz5GF{J?Ah>$R1xa>!2Zch1#hTNY?-rRxdKh@|^r7i<
z=s6NL(~8<0EZ#3ZbIN3LWk)P5xY8HrD}&~T=BAT5q<!P_x|*Mw3tF9Xvzp`^(rY|8
zl|+>lnq~+0)<6j1)2xM%W<RqgBIkA4QW4d@kVc}qGND>|MSW1Mf}${3=z%{qcXRp+
zEH-`3^5;wB|6y|)&zl3NB>#`&xS9BGTb<2i{I`dg|L4h?+j3gpZ4Mw)IJkUwdMZ;r
zqFdCP7nZ)yC24t!B@=q96eB#z@&=&@LsW^UOeQN4cf1+Bp$nztp3J@}-a5(FDz{XQ
zO41f{zQ%_vaVUOR;UDyC6X`VSuFRbNXKEvttpSorQ+Cca&Kq6rVxORG_^mgL+m?i)
z-_@5medxBx4*9T~lw;$7IDTm8Y2d@|2PhjVW!VT)6=p;631gU9q5YxpLsukdSA3fi
z06YOCmXyOtzwKza58#Y9WLf766KHDrQ919o#97Cd#~4N^a~)K^giuXI9)2<iu28CW
zg0;;gkQbL!uY<mmBdqenn7Dgz&1{;ikQ?rie*z&qpG+d3-9s>z8BfM)u}4b4A?Y;6
z4?~iaQ|RHvdBoDj-);V7=TY8L#vHP2qWUT4Xf9|o`fx?3*;6j#X%&UsI7CM_vc{rg
zluFGUybX9{W#_+Mfy_)D<4$GwZSpu7BKV>WSzdNx-b!g=Wqz9*hcPdc`KS(*c0(DM
z8G0KoaA2tS;%6Qhk$L%k1Cxc)Zj@ovpNv1uF7U!xV?o%>2T@`wjyuLx)Q4p>N<w_B
zuQTf*TOiVsC_j(K|KuN6h!)^UKGgy_*_<=25D~XY#y80cNd@a#Ti7;YsK=aw@2S{Q
z>|rW|6nm(|i^>`+Kkv*D?8<#7?^A|7$#~>n8;eTfq%Ue!&IQrPx)wQT3uRvwx>%FP
z&jm3VH1nGDrqB4Il&V|PZu_iYw@t(hjhvbMwRl>tp!niA+q8R{oLqIQJRHBloS%F;
z1&mSM>Pq_=O2KzZ0ydH~m8S89jbw_zKhsF+E4TJYnyZ(Mq-p$kNHp{pfp&Zu4D&me
z{<uvnljuU6l<t-6LWd<&u|KiN6L~Fw8lMX-&n<J#iWG43?RP7-<GdsqIoKsNmipa?
zsQ+cB+PPD}m+61E+^w|!cVly7ssDYj`d??M|E;3`P4W)twrN#8$ChSA?Hp6HR#DLD
z_0YxISWhvfei@Zs(Jh0Poi^boWkjy;BF6x8{lo`hCkZo^(kG@%l99t?dwC{E<3g?-
zF@V>u{M5E&h4c}^DL+7DoKN2sDmK&d5LR#ifIxr0>sDyjQk|lFLbaxSF70t)uP>FT
z`JyE>V6#raKsj7$?5Cv0ej+vIL!#*}=$L|(>Xf;`$aY<vszya5+c=vHG$)zZF-dHS
z1GJ(V?oWcjfQB5VpN@lpFgDD_F-_4<m?Z%P%iW~o+~HMP68=m{&#%s&|Jplw1v1^i
zaqslk{UZy8vPs*p5-7igzXLHMiObmO9K`Yd;mH9A;U^~t$EQhMz%NxDy1GH6^+ZJj
zIhCc&7Dt+1eFun%MoW=)KR{0tD@wu^Q>$uw`;e--xa&B&j9+60JmDX1OKZ%Zdf-_N
zq2E6I-tC5HRRK1TQIDZ7Hr1V>i=`YrrFc<!O24^PgyVnaxP$ags|qhKjZk3t+1`X+
z)iilyI2p@f&vKrQl~z><Tf>IY??Wjq4mI2chi{}AP6=qhk3D&JEU?;|lLQ853*@?F
zeeitPorLDX2Ab`z<*SXHSD(y>$m@=Mz0r^n1XXI*>7hjlnp@yDgw66@o$`fKYJR_Z
z?eZe4l>OwvfpV+YZLKK%-Oh!)5<6=WIDQfEbBcFkfh>N^!(%-uw$6&>;M+|))-CMW
zT2gYYTa07VpTJso8@xFE%4*~RSBe-bnQD!k6^_v>XQdEz3X2v`Lzgv`6hXBXOnz(h
zf?L?gwkTc*V$Z>uIsf@bGzPB#qCHHdpw()h1CP_ICQtP3%^QXuUNJEN0!T0rYrwW`
zY5}J*%o<xM=D}t)00BK8sjSR!QMJE3u6sVX)f5J+Itr{S{Bj4oUWCUF{(#9kMyc@H
zF*gY>y4c=z&f<diHsIBTzm3uo+BhiqGVecn)&XQ$rUQm6KiMdj0Lnjv$8ak*ZYmZ3
z^cHRWDwMK~9V4+QTa3lJtyj@DICO2erV#=O!$-en#wAN#a)(2f$SOs5FS1A4(z9aR
zZ(hyf%ig)y|7fkXrIevQZW*7N%R1(#Eul!oJ(JyG0_1y%LU5PX5E7fB5hU7i4@(^j
zAM-v4S38>3Qr*~tzN2o|+4F$G)gisZSCVdpGlG-k0Kjieb+(QZck@-=g+`KCi^8Z}
zo?TF6!>~tM4G91a-b3-Gq59_JSu~j+1+Siz19Cg0J~U(;+%S`DoYS^E1koOI&l~!g
zx$yv>=1hl92S-$W=Tt{8kqV6(XMXsvr*NjK0w}|y6y+(AH(D*U6jv)KS6gA(mffLJ
zek-ALM5MoyuqU?L$lJYuEO9sfjVfoNkk1Jd=ggPiqSd}J8M9+qy2eB#Os0r&?YdcM
zo~~%pnp3sQo`n&<bKvA6Fq~k-{CR7~my;+}l^Q1&s{b9%=2rby(EqmD?aj3QcXOk&
z)c-!n{Le0V@&EpB97g}||B|CwFrLQV+CRLH-iYk_umMCtbIhLhZt&?tRRg1EB3)W9
z&zSQc@h1Um&FlArE2_f92s$IYsg$ro9B4$(r&q=4K_5q&{5rV_=&cJtIk>%ofe3t?
zmZZ0QpvMKFYe+Pi0W<)A6;1pZwgcPM8f_YZWA-Q1MJbIY7}1ny$PHyG5YCxj9Q?e0
z_VToM`nq@g`X9Yl`~UI!SOyd?DDu;6TvmeLW=`wUWt#fD4BIcy)co4BaXh=a0`?jB
z>uRY3%#3Etx_!!3@5&ZDob^um{wvzp)pX0JHW3%j_Q?7Qe0%+J|J94v5oYimU(k(l
zWgR372@hu88~Jok)XO57v&kQLv*EN-lQu~=rZz>}sRG9)a+|Wrkw;^4NT~9a3VO^6
z@#9BP;G|T?ZNjc}il|7_2m@nO2Erf^4unAwK&ryvxZ4~CnypkAIbbqZY4v{PHlJHh
zRLZG0Pg@1;805?0jFWas7D>@R54kUndnxfLU0C=GFkwY2+|Cp>M)o&0)G4l;csH<&
z9!tZ24f*en=(Eayn=N-UEC0Dm`R{?se^kyB(jTcR{bfOQzCAAjllk8)tGqoniXe$L
z(XN<zD1vTrr#5-5yGnQku5(fY7xK&L&IV5vLAgfHi>V4kRN3o_qK&)@`@C(#z5Qwd
z6{K#cLO=x-moE}ip-M;v<5e{CwR-I0cyd)>9LT;9TWrgAFqK}bxk3I;$7$Uq+oUC~
zuBnzhXd-i;Zc0A13fe`0YmRE9b*h#YCh-auKN_0K`p{}%@|~EhhzCv*3`Z1T?|rQR
zjiUw&SE_<{=`=wf=|cuTqlBdQEBJi!a(<11=acDlGD0e)1=gapc9ItTqy<u?rql|z
z@)RlOkEj<j8>1dZqbTUbp0fU^5LMG{XC9v&#R8(mtl$AF(h<|_Tl-u&mApOPRhXXX
z_=Lv}dh(;Ss_?&k5lrb3i*?}d1L2s%RVepabMoR@84~95FfPA6SbT$qnvPxMyrTe^
zp#wK1Dy`%B4#ourbcp9Y4}jkNGkD`ou>stsa6cIb0Vb{t*-mrCU-}dJvVRGE4VrT%
z?Bf=!x5viJd@}W3a2rmgi9}CGnv+KLUxSRv_JlKiD`HYmLlbgb)Hze#g~ttJ(5k$Y
z32Wy~c>U{S76+F!_#lv^+;fTlM`Uqo;_?3ANjisYIt(Nh!GM-%E9K+Dz6zSs9(KEu
z4v}6kvnvgUX*LJn@(_BQ&CpnJoHucg6pPY3Prxek!@?sMmJ%Bw?S-`7K4+wlDkczv
z!s+mi(jYg1zSY<ITbsO{!mrSOg`IkvPzEdima%x40IK;XfBp6LZ_xBxX!;PoV3jFW
zdW#SBE7<^2Y`t$~riiyPJPCad?Di1mathn7nhVszcQVN|w4j<u-%87<#-`rV-9E1a
znF)YC`)}zfGt05lwi@QamDp)C$F2Nhw`BdP<8;yKJE>D}GOlYrSpGcl^MB9}y<u-W
z8=cP|0H)Ob({}aqzuRhUEYJTB^8DXky8iz-f;trfzmU}GONsfq_)Rh*JG>WO>lKdG
z;?7uwBef`+_%@GWD7s918?%Vky<s@gB1Zo)3xjFz$U9=2Ca(y>U|B^df(Pn5tzM}!
zHN}pP02w*BGj_#y%}Je|-c;B25%#8fY3xGv(r`4qEG(VM4MmUtMF9cLr}@-b9=l|f
zI2YPwM|V760~~(;6uP|-hyX{3>tb-|1gz2~H1XN`VdOL~y_gb_D$>DYI5GW<WD=x(
z6fJ~itYyW{3chR@-~NEjo8MUO3cfVgqn#*tBO?52CDdjS-E?m>obp~;_;!u9(#VIv
zkSKmDf)Hz^QuZMeXEJyoIjxl{w_ek%PMVZ*?xAMNOEGwP4Rx{dAD$YRnS6d5P?`MK
z*>KYLKRcUC`EP0e`PwAGM?YZ#5)k&7cgoT0-iw20XTPv3RXsTTzc_n-+I#l$_4EHc
zsb=eW^Y-|JEw8;tU#q>R-TH>?Jzq9}_JWN)^VtFxVh2bo=}YVZOm$z^5|FRzf1|1S
z@1@Y^3Fl~rRvuaL5y=fmJt|V|f8QGPBK^2jjtiYP)ljF@<dF=Wa;}P*w~79h5&<P+
znUp9w#WgZI)I=C5bWF<Aj+%9&mGq(yR-7!6|E3-cLJ#c#_izC!iT|;+;cTSkKiBCj
z<-dm*|KsV}x9Pd_C6$1ud&2$s_0ehX@aW~?(E&N7Xj&|n>FHx~%Qzqx8E$jr1$9ql
zI>lr~b6BcyIATE8S6Dy!6+DdfB$Ce!CpT!C#u12F>OFEoLE=Tv<|RB?S%+UNg&4c`
z_D4kB29!tMd0Z+hvTdCHt}j<+O6TOTBls(+CX&nYJ%FVtV@~nY-sB_fbLa#tt)bLz
ziQ{ta#XwsluYU=PV9gJpQ8YcW#Iqh8Xqx&}-}7MVfs{Rk|ASFDrhmL!es77s&C!Y`
z=<cE5>xZ{HQd<ZZ0UiNej8uIB!@YJjp&CS{C1K#FOc`1V-VZO}*^UC9fx(r64Xsf;
zSt-1yHG+<YaqO{HuwPYLfnvSj_6ioyc*@)%;7K@|jfi+E;CZ)|;CZ)cc;2lJPmM-F
ztRF0f>mG5)8gW-(eE#~2IIH1wk2MP4tZ>@xx|mXuPR{N|gFoHvcH~pcAlN4K4ZLt0
zS{gnm^dnLVTYdzWAF@i2$)=Yh<RZv#&I58W^WZ!)4FaEFN=)h*!i3W3M>U&Ass3CW
z6kUf-9SjT$;xK;B=pM1P5Z>(+(?Oz+)B+GI2}}!>w|dZr{53-}7|TqVQy|PfTV?}H
zZ7CO(%-u&HH&W^KgiQO7{o|v<qhGegos)=+KmsPSD}lg@NPLM1*gQER3=_?OxWc|W
z%@+IKmGAY=8B_2Hbd~;mlK1J8=)kDKRt_FXs9^en$Ay=9;-|3xj3?tY3?D(*QI<>V
ziM4}$FSU5wT*7$Bc>^BnTC8ScSM6M|*5R~S5iZB#JiMTtr*&3{WmQ>=4eD2tX<nr3
z`zksXkG$cqZr8%Gp=+InOwS|^)8v}YY**W_(xKMVk(I6y+f*IZQj}8*W~lHz?Pxjy
z7}jdSc0RP=105Dtjy<Cv)6p=pVT5QWxx0|f*Er_)oT{T{<pnRG-EGEFj6fL4IB>8<
zRn^cFq+Zp_yQ|4mubZk0b(2nrq>p)B9EoT~TEL5hmr_0)s8*ZwyM|SVpQlVfSzsuA
zS!<Y25~sJ3LJUPakQHtet5^;QnzC{wfuj%U(d$z&th6*0RViiZ;X`1I5rhJfLZg@6
zE`umg@3*l{ax}xN&|Ww+P9beUqZ<>lJGpgr!l_qRryO%#WvKC5W9_kK%dgsKvsIfJ
zZ;~Ey4s-4?=j{}Yx@e-He&NVhs9!kxvc8fJo5>4I0R|Fm|0cw6BLvSjEL>I&g8)v+
zismz@q)0ixNy_Fjt@*&^GO{}0U~eevY*M5SxZKXp2QIg>MsUh1Jo1igvI_juCTl1a
z>YMd-BCr|5F|qJ6dO#*t{6k@JdHI210!Ya7AVxi-s561`S|sr)-2+k%HHDkNb($^g
z5EB!kf_}DaIrOmtmT1B6ve3uX!KyUKf#)Nwmn{db1g8>s=K|NtmV;KPQ^3vzt(Q$4
z*MUrsOz%%8>9gOQWD>@2hO_u^JXkP+sBq^e4RNZ?IfOP+;U2Zl7$AMR5BaFT7e+WL
z&;pWKt3onsOPZ-dGvs?Ano)QwX@+lpZq?mS?tgBz4REn^2~}PDgS*@>zw7;yhhvl-
zqB#zK^M5x4K+aJ(&>B9<lF8X$JmVcur9^h#!jggx@ktt<l^d@UOBXa{2uYcPZ$qkF
zbk%H8!}Y49kJxjzoIN#G1EeTk01s-XM<Y`A{@TZ!Q~kQqj7_o%1^|~kA}p+^3L&>8
zIvO@HoTagW+_5s{o>A<1_ewmYSq8a74>cp%#DRb420_es%(R-!y{6;m6>po0zxQ)|
z$(@#s!FL)tz&b`?gZY4M7=iVxqV|ot9GCfj_<r!mEcl%9U!0cHX=UQSIIX4s$AirO
zLY@WVNjN3{;q@oyWcLI9X`a((P3PY)MqW5XKRk2DmlpZen|%z(uR(Yo49V^#ersOR
zZ+hJa?K6^bBcA#%aGCC9-ux0wM!__?D+wmE$95DRGK??!9=qn8?<P{me`5|4sU~#Y
z7MR1h7w=tJ5t*f#c25l}!Nb<VMvKWcdOVH7B+`6R_u#w%TwiHJfer27ST4N1V>Yzr
zAe!KHb-;=C|3-Om)1)Dy&8kjW(3a&GM65_Ph;q2^(-#n_MV5?floUW{4=%#7C7rkz
z*}_f8dY!aj5K`P06HoHR_Lb|<CM!1~-@R8?uS4<aCQR};2uE!fQ-2WpgpH$^i>y4x
z3)tkA&z=C`+INkTDq_Nu*x$E}3auF3wN5dRI*iRIe^E|F5#b7^R(YJWFttM4+seAq
z(;I2!$gO2qPDtr@<`=2;r2)$)8okeamHmsC)10W+UR3vQ=U=z9zNv-vcIKfFA5O;k
zE9(zghA%C>Qr_CqOKopI$D}Y}Kg5enMEydeplF3*g{cKnYb@tgYL)2_^JyqG0$;~E
zn;ZGsD{T=iE!Uc1t*NRAwvf0U0(EjAe4=+7&odJKlR7^R!rQ>7jx=%h3Wr$)WruNh
zAe4TUDzQDPZLZ`NSMn{zRfcxszFlB|Yj%NaQ|9S+XP$m{vi<hiZ7y?X+lA2H`xv31
zVT|-!)L*FQw(3+np4>>?PraHP(f$A>d#9D|yxe%R;f8a?vi$*G5zU$C(t7QA>t$yy
zVP%gps;>O4FR5k5q*`5HT;E#Wuzv{pzaBaY4kw;Jmm;t%|Fh$6q~kwzT3bu~-%|g#
z)c=|9jLoG0lqoe1j}Ce#r!V2-+cWs|=HU6^{>#Jv-akEjeI!MWM%4;JuxZpP->F;P
zuZ)n`NE`LaS8qVsA{(`<U^Aab6`P}iH7HyQO8*PBu(kB7zis?3p`;%9thKAOUN
zo4!_0q;nqLZhi|Xu<-Xr{!#5WxAlAeISt>|Ii&sOI=|OBq5bB%96l2U{Ry)131#cD
zHt@jyB?~vq{J&u?@H$c~y?BOqJmI3xz*v-*je^l6y3<k(H#8cdfn}ldk8gf!{l1fp
zb=q#xyK{K;zuTpS8uSX?E)6Ru0-)+Wg+c$f8-BCmcX{wm<-&H`2>!Y#2eyUt*6rT>
zW`%2RyODzV>3h@3npHr{mGxDvEN#ZV`o+aVBV)|!>?)Hli<aB>qN!ajzHeah=~MRg
zuRfZPKF(Qv;-%S*EOpRdDsh)e+@%ti4L{>HL6Uxh@)jO~R0P$s+*POXzvu@4bgA@R
z{yZ4{?=Xy~_jCU&_y26UY5&jm#^zH0yVU<K^}kj9KVL$-3v>U(HUDtEKe2ZY#QAf+
zuGDazxpxCg*Uv?`e$oZ`Eod1jy?m37pZCGM?t)sneSQ^gpLA7yYq~^AK;Nj-=lyW6
zyULa>pI^e|6Nh>+%0gA@tg0(i>r_=08tN=WQ<#NY5C5uq!fZR8Ir@ALEnyZ|B_(0D
zuT^w}YOPu-LIalf(GaFYu&Q33#%k?3tIetIOYd_vw0-$im&(5HNB?&}_s=r@U)$Z-
zO#6Sjn@j!QQvbKq|5f+@EL8wz92H+40<W*({b?#+;QX0iyi>EdUlp4I&(DJ5mBAPL
zeHPbH-9E`~f9S<{OdKN>1d&bHgEx~nL}TLKBYB*WWt2n7MKHDGbMlz<ZCR6C@ee7t
zP!pICgPHf$vkgnm;9q;=eqdpG0trpYP8CkI!**$B#P@gJa3+r6afKe(EPVp22W77D
z?`P=N3avn)mnYGfS5f}0`R1ndzIC+Ug$m?7ZK6<5fW&-lla0D{Z+P7sn{0}CycQ?r
z74{w<8IQF1-lng0-zf${Q!Fq@3s3>*P2$m?W5+(;<f;K;HbOYUIVbd$3sIzwDnQn9
z>SzOGEw@qE8w=-iu|CEO8yMGCqfzRRr4}xC%o2qY_pHW!+_U7kg^fnHE1{>Hv>J`M
zJ(0g*6M<1HS*A9x&q)fZFPX{r#!Sp^)!z<3k=$f$FLAlfVJb<H`5mtximjw7uSwOa
z&1{P5FS*V4#%*%^$iGQ;llYkDvYQf5b4{hVv>h#NN8hvUXo1e<OIy?OXW{lA#?0>J
z0bUya!*SC7-&?Jv{pY*4|9o#g+-!eZm=`x&xero3&ARq^ow%1*aVuQKP4t^dl>D>9
z@$}6!YCL8I8>HJMd9NLKy-J0~$w1K?HfkNt7&R=TSJc)Sa}Mz>XpQ-bU!-gKa)E#8
z1+GoY7hdDbI2wkP|E(!>ITu*0`}>j)d}%(Q&&wBP1m9Ck4>nkfQL*aBiK<Fgy;fB<
ztFhKX6s;!6b$d}&*=lO5Ggsg5p>Q>UtE6%@^|^}DRj*e|?P|pJK8n|jfcqwzpw=L~
zuHd>~P`Q1t%D$+)zOGicD%9=Fsg}*|_cau=MU|JT+3!OCd#@V+W%}Pux0Uw)-(2c{
zm-^qO|9=ht{|d_EjLZ7FgXgEOkL%n4FxM!!1yEF>Q@es+9jAi30EHE5G*NsXpsY^a
zK+xZlI)ACCUn=UCiuwnms9);cmiqUl{{5lq-<PX<sa5|DwdxD@U|y=%m+JLzUcLUg
zJezYGnnMkLKkw#;r$#T)|IV&}$IW{1_i7T&8wa{r|J!mlH#?d5FYtP)|9yD+U&kT8
zc)^hTG7HAz;M|+}WcQ+Z0gwKD5CuVV5Ir>IEeP%~GK8rIk6!m)96USwrFZfQnFNS~
zFV3Ey_MW|b{ro>q6h&_g(+~%}crpYT!>sYemHG1Qge}ZHO+wKZEf-ICh20Yhnlb-f
ze*&XK9b-f#_N3RJ3`gFFNwkw!lO6~96QG4a1(N{kUkz^!z*3vJI1%yw0&k31x5M#;
zm4j1*&>zz-UkRK6vxW&=4t9QkBo~go@pMrbnn31ax!66Tg#Tar(E4aJ88<IKSN#7@
zyW?i=e{4AICI5er_P;gQ1UiOI03I|i<b!_?$5H6<?H~)~>GyhzWH3fW(8;?OXldF%
zdwKfkryz=8_&qZ6`q4yvrK;)42<V)gV@538j(l<zg-{E+&Gr@Gk3?PU0Cwz$1HH+g
zUmy2g9pWx_{L0j>`hG!;%MB4;!?+FUUWgQdTr^Q%up<qjQ+gFmkb|O%g6h<wSs$i=
zcw;{)e@4H)BLfdb@|XY<CbMxk<@9_CMW90r?@R9*re+ugo_`0O2*w1rJ)G92LG5>P
z?Ngsrl5f$YU8TFgU^X0@EO{dI6$kbhtJF`rP&eZOya~p$5m%PL$ui!bK8LOXxt@m5
zRLYu{es9xi&jDCbwak0eu*gu+xm`QydqdzZ?9WbF>Epl~@*2rUwH6m**b6(`SHq({
zUg=EOOLAakI%s$#V7+6objxZS0{cph+J6VpByBjwVog8vE+%7dNP|QJl{8~mOfx`)
zMooihZAyOK_u?tXa!)nq(hiXsug_M}nOcBiNwu;`fp@HcGSD&n!G%mI>L!eX{c~)7
z<lR!GGLJ|xuz0y$i%O6{Z$T9Hd&6)PPUGDK-rn{$2t*Ay7N^yBtmDQe$ZnTZhRbw&
zK=Z>m$X6$i);Yp94ABZ<t&3+3cyitJ{11~5qo7NW36!&=!_)oa|FUK&Rj_z|%_3^k
zJ#KV`YzQ~t&>K?zBlzX{9p^bzXY%?Af5*r_J%TKRmW_fz5C!9Y08HZ^e2HPvn@;kX
z(<74iqbXyv7#Gw`@jJCM6&PRcdas{!hP}XLbrSv)nHQrcN;7l;61f7ER<B^MfwJ`Y
zg`dM1`m0hM=dZkjIMf#9^64RB#WGZATG_TXN}1OUo_7b9oPl;8gCxavv7cd@-~GkE
zfSfdbmtAti(pG*#@hjT%Ml*~L{a`!|r*{?(Lxb>uF_{kOtfV;%22<pcmKb<kSw?o|
z#V-<;w^9lwK{b!~v_UMvsBpm3s8t4RU?3Ie?M$7wv*LL>%bB;cdFSn{*1QRR?H0}9
z+2ZE#Y`!@xK(lfq=K17e<zO-#PHu3cq#_^_J@L5EkHTpP%%aK4{)~~LfG1+iQkxwf
zUWC_y|M>Z2{KqVakmuV#Byey5Q5R)eCd-n)z55$V!9qr*^6nTI+JIs|<)ZQ$mzY6V
z_CY=-Au4i4@SbVqZ$Q*WfqfPW;~x{B1GEUvhEv2J8{%z_=-KR?$}Zz@0Nof*0js3Z
z8^mGCrD{Jw<V3jFke!x5`i5@VB;fV|s7^bLGXy77$yCW;IPs=9NUN(CJ<4#0!F4d)
zDOfV<ifQC<S2>|}&O|pVw7{0(ye$|=sZp_8?}{6Jr%-lI`+AY8oh@(IuFeN(qibvW
ztWx1<etBjXc&HJjYBuvpt3+n0Abl+OZ1RAQcIC&e7tHLsM~*M!`Ufpp`(!elO}Q@u
z#ejn&(IpNH|E9a7lElX%qK|j!VD4ECm-*-bSCruB4=kVrW*GNkm{mn6w7J@Rdz*!}
zW*cWPJ-q}bdpQ~UmJ_T!p{?Q>r4$_Hj0v<XI5$}9a5+>NAL`0W%c8Zxf)Qeyzb@gT
zCx}0Ta|uYqqm|n=EY=*OMKvu8!<!zyfRgZ0DFgWbK$mtHznb+gPe#DygXr|q8(RXp
zozzgI8Q6aQ_kaHvwQYO{Vwk|raUDc=Bt}3s+vjcquCE!B;1)I~67!zg@Co1Ib}N)Q
z7n6zKq%DYM0vtO$EF%ukM1aOxx*=YM{Y!E)iEzgr0w}8TMCP)_m`f~MiXYXVd-R6E
zDFDLzh(0MJ<zO$u>QS(Na@v~3RB47sllYHly2Bp}RVNO!*jYh_K>rQnZrV)MW9qYW
zsDP@jz6bk1q5Z?wMr?cMo4W^ldw&|!DM{claF43de~MtU8@d83doWj|S)|B@tv-=d
zJU2G!jR-+}(uhUZK}G=^@RSU69!#N4NgTjr=!lI`p?*GzE`bD5d3;NOqC*qo;2*oK
zosYY>g0ru#e$1VJ?>tWRR9oG5+zkzX?hu~!5p_jY(sRpcthy^lv(ahfWt1hsRU8f{
zW1zp4TV7j0GjNCA9pevoY5t%dN^*Dr^utQvtci~(L2!=Jz-1zr|E}7qY-Axcm%?=a
z3Kqq!f}AapTRcW4x|1`TJArHSOyF|XKI^P~63&CbE9hYl&Pn4Cw-vfQ%Kd_FUBDP7
zAW3MJ40^P~BN(ds(3%0;9p%q(RHKe*RqhX=6b$XssrWi1V52NsBC;-A_N*m6!USFf
zV~pP%aFy#C?$j_#w`445gt^D=j#I)m6x13*U$;!_-7asP{9WKdwo%uUD%V(dR~_3d
z#kZ>i&J<g9*ns8B>X~j7*fntA+?v1~Df7Gag#Iir4UJRfyo7VsHB?XsDgP8e4*@WE
z^nsOylDZ=2l96j+lZnj+7uW?zQ$BTQhX?+IQ_09hQC&FPQ?(JwoWsB>Fs!-_YveX`
z89$|hmg?sQmo0zj(ouN)m{`C?5tqiU(^6g0=|lD`IXJn<&fVnb<R)h)0LOzFAg>~C
zkC9sq!m$raMV|f5tPg7aI)O@vc@Vul-c{#qkNgoq|5DmCogVnK*+4wg(G{TgN5oYO
zZ;#RTHMx38)eI3VUN#pL-NYycMLn(p41=59v#YBBM=NPGITf+_g=U(_MT6+t!+;FO
z>~?^hVthaK!XbUK##)03q6R#6fYgp&uEUVNREB|S8vza<)bZqoZy%Jw06id-BC0^1
z@50dK98WVixa<Jvru&+ex!o1Om~QPwCETJC6abWw-nU13|2(!#(ey>{q3|8Ch8Ce+
z!kOAT^?)Uusjcf?kl|)4RADCAMd0J^N{XjsfEj4sfTG;Fp37~qLTOF?(YED6Rz`}9
z=%xbnIHFuxj04WEc!N{DQ%sU=N|J4B)UHU9ZAOvpgd*EH^wwUvZBue9D8Na!olmmu
za*AyymRd@zC<U<IDE3mDs0v4$YR!f4MO6kMIAiv)IK(44*U_&{*f`9u<)lm>K=+iv
z?Qy7PneTiu8PYEFP{>3P4?2cYFR|VpzoDKpKGn~I#M$k_8qGX~9YiP|$q$L()?k1{
zwm4N~hRvNYwVBC=#q`)|mr2V?`^!Le4yi#JdF9n<*yRZ9rCTO{rF7`I;G_Uu4Fdf8
z35HUb?|Z$&qtk<zz2~oAy*Ye&aLo8#6pXy+Bl*d3oNZLnOsV|^<^I_?3cUU$s%;R`
zRpg;Z$c-0`@zC#wF;(b5J$%btrRaf0nNM{Faz->|=}+~n${-hmcsD1AR;u13MwW%r
zKGG;>YmG!{IK2$V<ZS&NRr=Axly?nVbD|CtmwZ54KL?VY4Dc8-8*vq0{Xx^r=$Y<U
zg{b{31fpu5`m1n?=7Ff;yHFhSdYuQh=BnowtO7jC0J8p@nZP0dDj4QLEN4yS-88_c
z-LrBS>CWH^MLo6Rr^(I)_4EhDK09KMvXdQ(RZ1Chicrq+g@9V~dPB_Z#gg1er9Q<I
z*g(8!G6OLqp3bg792mvdeZpLGsnI<dTG%=~rr$wV?<sOkY9)cgVoWXTRFfT!uTkWR
z5xfUudd;lv=Xd0vaD=f*a1B61W{KfWc664=r57{XJT`QN*qY=Sb{rMKa$X;u!q)im
z@aTZQp^d<h{3(*nas?+|3jQR!%zNxzTu{`nfw_aMGZI${f$1MUe{Kq~u}88ElT&m8
zw@!&?!^d-_ZI7&xbI~peZp?doHl}pSt~N3oR5-Xp+mb&S{|!cP$`nQ67<Pa70^1Ih
z9(&_;c*63`-b2&_7(IX`F7+ftyi;3ACmc5?qX5Cex{>N+vQB40cNZ1@moQqQjxB3b
zAu#6^^H&8L=K^nPFQ;BEQ+5m0mX(SHEoQ}J9f0A$<ATBMQ!2s#n1#W#cjO&uJ;92A
zHT<cAAr`NIoZ`_tGI{f|OSjnW$jTfO;lZBOvY$2#R%!W;y8QCtO5;DOfZ=CE3&yB6
zM#4)f3N9o^!tDW%P3N-<vh@_@-I0jL(6-rmZbWu>QD%f6)drHae?XB|b3?k2(JE{B
zgcSX>WCiGx&8rnwn6KlqUmd%X^ouxD9;W{uB`6WpO4!5=*4nKn$qQB(ej}=Caf&+q
zRUbRX{*H4d#eK55%`V}D=-M@$Os+711U>mM-pB0DK&A+WZgTwq-P35|ZhD>xxUro{
z;j!UXg~I=U@dKFx&xO6_hH}BelNR3YzMM_T>`I=WesbGSw^Lgcp<of@qx5kL15iB5
zJA(nJNAd*5qaFC<71g5P{H<*@pSGSn>A*xeFlTF>=2QIab_|}3Fi#uJ&DN6+D=kzi
z`eh#1ZwO|uu`9dAi8_6ter3m0xK5TJ({SLH=ia)>q>Vg35cilgPewIr46;`q%z$ln
zJxl`mlm$l--}#yaQeAWOZo?D(Ycgc&@QKf^D~rHKY-2x)yt__FLLayh{ZFdiKPGB-
z>03glDZvwKVJIB!iIaKaW}dW#Tv=yYDo+e^De<k3jn&m1>0g51_qq}dqoW8jB7EF!
z)A%YmHSh(x7zp6lR`W9Ou(+<h#vk-e;08{`P66oP9$;mFgY*i0;fj&M#hO`qldEKP
zsg#M_NAQ4cx$@eu@+b+sYU8AKnw@srdAbFp<ZiUwr$AAw?ba40uqPW&Hrku*tqr#Y
z9PCMpzag8=&W5|ya<)31cE@p_Zt^;EwYHl8&)IysvDtDut<Cnv6H(Qr&26>ZttU^n
z;FYt5Z#JR1c5}mRZ)~-;poY8UK5cF2K-Zo$w;W^>Tbs@kz^A>jAu!!)wmTbb*J*FH
zp1Sb&X<NL(<~FyW51Y`xE!W{dnOu}%8*Rom+MI8+6AEn?ktmM%2fPEv_47i+MK-T-
z`BE0sMpAA!h}=*pL#Nj^Lp*_Hm6ZAwDBQlO&hjeke`FE|-nWQmLA5BFhz}}A;qi6o
z4ST@XEQW)%(m!ux+qbbbz?X{IYRtSHGlOgAGq|sTE74#5b6_b0Zx{#a)31=NaNKIJ
z7E=f|W$LeFxT=7koY)k$;Xr1!n{Zfh@t4yaC>D_S11ch4Yo~RlkCehMAkz!u-iSh{
znMZB{-%7@qD-DkK4^IwGdiy6Q2gj!(*v|lsd>A=xr(s;Q?+0?PFKdu4gp6osHfNac
z5?wcPSO>?#l>&G5&QqJBj!or`n9z1}@Xi@#T0{5WFDv)XY8Wst7vRm<DjxqLGS~LD
zylB%Xt@*C7=9id<B8q4uMU)6QUoowG6LVE&DXTa{#sHN<;bhu0Ny0jrdaLKfaS%<5
z!%lxr|F8#m*TK<lIVd+2s6jq`+n|VjDO|3oROX6kImmZqAmyC_Jv=kXzrmhdp!IYi
zK;64MOmn}0E^ECojloX!-tlXh2CqET{3vC(RCly1V#!fayKi5$BQ;RY$KqWEi+2^V
zIID?8-YxrVy}a>i%v0?hZydPaPqS>?l$klM$uj>-D*h7-yK>*qNCol#wzjr5)A662
z&1L?V2O0mbgYlm>>cxM0KoOiwM#)E1Ik0<bchHY@0DbH+oj4O`WEEbpGf0(5ESCJe
zm1t0(4m5=-er4LyPG=yMsPLKES2Hd7B`x_@ebuerQpI6N8djf+xPy3g8H3xLqM?#}
zs;m$~&emhfB*oxI9*?Mbi!k>@)Xp#wSGjX_HM!aOYOe*?a)eML_~cko_24<)Z7;aJ
z0%9Leoz{AX3T{v_V-Hk=pE!1`I<hL)XcPx-bc!!5A^laQUxWw(p4^!uA9uxD4F1h;
z&H3!g6#iL(snf1bwHkII?rX6fbC(sWfb?fqR9)50HGu+1vP-D0mDa$(V!hdQX$=cN
z|9xw_&GfDyXrjJ#b_Hw;*)5Ao&Gk@l!hvxLVt(s29QUKZ1Ga`7tbd6yBak1WXGaiS
z)5jxDzC6lVIs%$hu%{Xn;<E5tpc+CblZd%&?8m0wacAe2?Q@0Qn!^RmWYTy*_^}f7
z9p|eOEH<ariYk#f%vT7c5A|8HR5U^>srK$v(xt7fFc>gS0osk`aLjHmN9ddg>??@L
zFqr-=Ce!Fnc*0-JB4y5{BYf(1nu(SiU0r*IC#^&$A#co`nw~TLD;L-k7)WB%fGxo{
zhio)n5AYgH((yCNWX|{yz$e*U6$pTm00AEbjCZFE^-f*tIB}M~J0p6g-KQy<d|*GZ
zhapg6ZmHzoxEf#EbhM~Wx*c~-yOX(|b0hN-<RK8Qv0rt|R}F4v6J&FhOShU&lyFgO
z-|Kcy?nan7PCe72KKzWV5I^m~cGjdJ)~_aUV5!Ch^yi?HFuWt<*>H%SQk1daB(JO9
zwt~!rR%Zq~a6V62lLVu!(d;E^*bpczWj9FL60L>B+>5s$xfdM?dY3jd*f9v(V2$rS
zCAUi{R=cf6as%H&*?V}`K%ue^;xQd3mWm>&_5*TXG`a|$lI>HfbH%KAlxEGwAslPN
zr#4n?2n9#gS`k%0lvHI*AXRl_eXv8^07h15f1s{Vd;#Z!;w4hM0!gBE7~F>r<L4~M
zbwgX}E7oINSGQAW1r}5<3I-NKsj=!HHs%6|DlZ_A!M9dpPOLv^bY*0Yxl8ul>CU5y
zl$TF;`l_u>Y9)kLZ=D*6Y;F=;5M9-vsxLt@pY)sQrTU*6?`qDZ57qTQ8=LL4{%2!z
z(_QL+9%B6O?=^CFIt|3h>Hg{AbFRcTrT8_fNSG@>nP9vbp2w$$?==$~>Mo|#s`P2c
z1g9r#QdqEeEMi}g9@;hdM-j1=eS78ID#c!5yxn1(j}5=Zy(=5yjqnB%;0a&}l<aAJ
zsRrqP;pr>PkYEN`G#FaWS*jQUmRG~fnb&y=aeJFyjd+b^^kkcLG)K6j<7yN(SnG#u
zWRj%1ooY?SC@IVLd++SPOo1RToRGE%W`Yp~JV2QRGQl|Tu)u)grztmir%J+LwSrq#
zfvn0q4XqXmJz7V^xmRx>HGb?0bn0PYQ!Z;NQzfUp^dR&oTY{l=GToK0Tr;BbeLI;C
z>aLAMeTC{D#hVuxACeB8<m58NitltsPSf|M9v&lCe78Gt`B&<e?lkEN8RZ24aBPle
z-fe4}cTp%uG|C5o#T>pQ6WdlI+3otSX1BB?!tlzGhkr_5ylvVOmyV?TUOrq&JX0LA
zSRIXa{%Ev|Mx(8bMh3+;oy<ZM`5L2}RC1Y1o2lmcaMJ%s9S@Z(+KZUJ;xRR$JH44G
zXyy;AqD~M)tUmiv!wvKa$S{t$=V%H%+AqeE(M$mUMqv!I(Z8f-0#v}St%>PD1z)}m
zTTT^i$`X0}YBU2QTHXeOEt)cCx5RgBtHZrM&Q)4FHWOuNTBBF{?|UbQ|9xQ0p+rF{
znJEd=ZzuUAZH$^Hb4}7-8uBJ7U}(BV0~i@tp%_?O1NPpxsD_`z^Y6x0k?QDh203u@
zv7%5;4o-W|&Q5!0M`tGoFM99xj}P~sy*#ku#uv#3SbY9V!sFcM6j*%zbKTBnD|P<c
zYHcjffB%y6pB@nDb7z8r<KXw|31B$#RXU#}6I2k^dLcfU2E7#;52~fqDfY2M-$&*J
zpG0U*4AJoaMGzfa;qK$KR!XyKe7-|XjROjEYfRniD?Fi?YqX1Ab_!lbPSHy@?R=RG
zKe<LMlkXaqhC!HJQL(8sh+5|O!YaBIDso3lM1Y3KQbjK*|Ie<PVNC0sN;opSgFFgA
zDOHvX<Cl}0q8dK{f_a75)g`0gsW(FdX4yDMGsF)=(J45fZ+65BH8e3<q!b5{29ub1
zFe9toM>w~-Eh%zS-^*u%tab@MQ${X+L7a_Ja^%=iIbL0U#8A<wKopz;l%ZPX$5|$Q
zo8K8E`P`Q1)a|8)d^&l7+@6smb%Jpg-9yptqI;2t`CQOoulO*#px2->kWI-|Ms2-H
zO>B9ffpWgdJ$VQkHH1$HzE<2q=v^>k2@{EjNm@pZ6{5{;S2d&u)HQ^^G96&_-Q!fu
zl&ZOd4g#|sbwp%KN+ahU-ijWhS6p<1Q9ME7h{NfOr%a(P8%)TD0~?D~e;G^4uEsDq
z7rI6!#3LBP9LV@Dn1T!-CE+PcXN0+8U(>sRa${xetPC#!BOfoF<yon7duf!-d*D_&
zW&)qjh(r1KC~Jq~Q)@Nl;#NkK(-c*5EyR7Pa<pKo94%<nN1uTzM+>6L(LJd0GmXze
zsB)A?mF%wgD}Wr1udSC2?sy32A^;0#w*5DU*o%(TQ&*fHdJf&*)`#eD{04zq$gXpA
zejSvvoHDSYcd);<bGmYt*OeMXp?14nr`!7IFRa@FH$%5+LyUo-&xiCwtkPpBfA6KB
zz$hR&MIN`TN)o6DP|BSysvJlfIR}lDf?LXqugBtQ*(fEbS7=P?XE~AR_1hKds|Xl1
z_-z?=$Vt0N)*<GCBY#nN{m}*uj&0{^mfaQ%E}*iJMlRK$@ZKy|Hb8@$RzXuXVdq?E
za5LSY;h<N3!U2-n!1c5;+Q<R2xl#cRfPivsAwt(Vbr+%?2g3p1sdc(3B`m{HWOkL;
zARdUnaFh3Fu;{o2J<;PYF1oih8lSu4Nj@p6mPo!L&`-5hj}aVm$`y~SK$V*-Uqn^O
zu7%U9*vVa!TovVWrVK1*Yy8sp9Cy?ore5)1t1H}d&_ng>QW$<`%DuMC6+k74<ZB&u
zRT29~+~-!z&4*68F0KYPC+JTy9o`wotPWObK#LhH0rlR4mIjoAo>2yM=R0Mamv)6^
zTGn8CsB1Nh-bhI@$7}{Q?G8{ie2UIjW?Xb6JiKWObGxR5{QbNb=P4!3^Px>yUWckp
z*<1~OrYWn3%jcf5Z}#LW9G1>!7d>a=sinr~Yx_!A<F+paBeVwP5+w6vN;}|;8oF!L
zRGA>@AT{!C<IGiSgJ%>TG{tRYYT}?l|KQIqxd37MD)6QRzOJq&Vha|Fa{aGhELhWU
za4qyBiW(AH|NhYWjcg(Ubn?o2CGfL4jk8MwAv$}^Yb#sdEwAswzufxm{Q7M_uTrO=
zQ96}<c0-3@uxCH9$&*|RHu8bJ@B3C{M>!QYOS_)BXlx5t_dS40IM^)QEh}?c(+9L;
zla1W&Jjt!iA}-iuvuHk^=2d^+A0XEKN~5UN^NZ?jmUOmo2qjQ{K{!p>lFP8zrQcWl
zpV=t5^G4o1uYeW%|2ge;dm|nHr`>Uu{(lei{@1VG>|;QF4Z`zaNOmvrTl12Bd$_m1
zvc7)u5wpasKc7UwUgqg9!33}1-|b~9ygb`u*%JPkg;7Awzg+Bj=EcGGwjbc2gkx_!
z&D1@4_aa*v6H7i~1}@a8#aC$IqUE`n<+-@o6SE|Z7=xJ`mIo1h#(@;!MJ}Sr?24RO
zRHjHG`V;Lo8@EAmUVUm4{y^&W_3wMMQC9klwL@ho+J>fgJT>SF^~Qt=p{zyq)|RfK
zdMhFUqYT)i(O5vd>hMU;xJv?^_QZH}2@UZ{>u^VwAC%XaOix1DBdt`VkZ>A$LliT$
zQK#?UniYY}^_)p`GTYK5nqE%m83AGO;0xTQiIewB<c-!&`j?~7pW;HEL_qr^5wL~%
z=0Ghby5fgj+5-CMz&HIuxKWY&d@_8G7ZoPMcL55cZ)u<etgBoIXYE`?!S!>|miV%Z
zRk1n#AbU@|s99b~ZCb!d?rU@0;F!feHT}udN@_M%lF#X&4oPo_tj$tez##cabQL8%
z9jqjuWmNQr+Vt>DMvNnkgMJXlNGBm?X$mIr3HTF<FDJ91Pl7Rx_>+K@nVEE;Q_1cW
z`jG=3O>Wp=<A-Eo;ai1?1&Inr7z&Hg#k(e0*Eq4qqAl@d7psbiJr+%tPAqShS5li6
zaFU7TxWUA-PjX^eO*XOYb9Q1yZ-}g2G_ivar;}RI+1X3w18-5K!MPW&xwa-3zf>Bd
zN?TzpUBV-UXg5JAPeJDKV}>(gj&CFmq@UzUra*%czz;t%=D<U%y%n+fD{=bxD(C}O
zN^$&?&&TplSc6qL|DsvH&WQDaOzjLvCF%K)PKOCk#<hAAcpq6dl`x8@oF}7HGx9#N
za9+wjj#p<f2NGj6c*(R&ggYd9@K|q~N@-y))MnQ(E1A%E(QHgJ%F-eBLmmwe8tMf@
znxiz%#KZH>W20Rz(Kbw(c}}~RoNL#cYo^}UfFQ3QycaX^>g?nc$M<RyhnUwJKKDJ$
zXAUga8{e^e-t>wc&WP1N?oAsoNq1Ndqos_43z`oc86AT-@qHXU5Ipgs;}4GrH;3|G
zBd>yCv+})DRdd>Va;#C6z8GX3XistOe@~`KQn!s(V}(Cf0_lxxM{fo@CPj+(%B5u}
zyXx5VH*Eq7Di=!;6t<3A-^y{O0Do=MI3`oA#W?D#4JN3qVkRq<>S$oDs86{uZ!0y=
zPU2>w<ZV-d*opd7@3<dhzY*ybXd^QwJ$@91GW?T;sOSTrVQVAEq5`aajVM^+#xAk6
zHW4ARMlo#AjmN?SeF&X>QS`tJz^>?I2PVBRF`mqhmK!Bri52leP_x1)=>5wcGPQ(f
z)L|8Ammbl?sxp-o7Pww7`LaiTos3x7Dz%l5d*o;ATAjKMufvv^GP|6S!o<qbUy|>@
z!~J1AL7AdY@AyqfjJ5%hy_|++5X5AII5mOO3q5ATVRjAe;6AbXgC==~Y4|#whKZNu
z@-doX4-UeN+?7GC$I_IMRoPnGZLBD-Osq{(fM9O&!S~JHo+0vlM}Ds_0<rL*Xpi*~
zKG{Ytwo?rfw9g^Ir#wiR$ue_Y2FOle&c{iaw%tGCJT#YyI-B*FXks!FI5Q@gzTYAY
zKk|+XJMAPaLmi!5(@m#kZ?sy<DIYWBg4~V^9$QRU0XYvZE`XU#;kZcJ&1O?S07Aj8
zyMuprpCC#|g4)S1r5P7uqL_D|uGJJx%L7=-;FGRAD5(7Dk;&v^8Fn9OE!wkmKxP9_
z5r}pb5F3RcItCCX<Ao81Ftk+a%2qLqN_}}!45MR&A^e+8FDDp%_+|n-)o>W|G;t!=
zG~P~$f_OGWvL>Uz!|Z;8*^n>N5h^z>0<l$zMU$9}8n*IT*sjyXe5IJrm*W3f{@mC8
zGr02lAA9la>S_{A=d=Tr#Q$@)+FQE)$8EvarTyn2=6`Su0Uw$e^5NeHQ4l=9Fc0a#
ze=iQ6o&C}~d4;_9&GG9OXU|W2&tATM{+}m`nJ9*7h=U%z_7WNGMQ5YI8(5hqY+>%1
z;{_5`6wy|4@01HA9L~$L-4ohoqM3U>88Rz5*(0KMe=;25vz@}4(!Q?L1ff>eL|KjI
z1{K|;f*9>BYz$VqF(!{!s36*C6f~kL$SOBvs-!fQlKHfO=EIzloy=2`<g>^IQgZX^
zXuc{O0NE3==zaM=+6^wofzK4qpN0QBZ8Y|$_<w6_$^Rdu{r^dg_^;XBW8*sx7bFvB
ztNw*=_K$&|G<fcgVBmp}{phD)SWw!<()zYUy%c#s+LwpF9QB_47nCQ?rm<wINO5cJ
z%B{8C($=6zaciB*t!;KnTZ1Az`?xVaudWW{XFjrNOWVTcU?;EClcdy>+)|F4lycm`
zPNq$9Hrq(bZTLHRrBuxFOKl~kw(?7<_~(~;nv{B)TgquEy=>(d(*^^IiRi<puumNY
zH;&>2{N5)c8iem+GV!;`2o~UQNJIi?5>8>A-;4=$skX_~`-sZvKIUP8wM;iWJmaEz
zL;$xM1mIY13;lS>pPy(jVI2WC<%O&w@VA%+Y7ApM7JFm_t2BfS8{2a>$@$$BQ~AUb
zK-HfFw9ut@9q?$m&gL4HAbcxM$7z9Po+t$nb!2bYGy%3cySOBmK?Kura~bw8$xZM#
z7>tVm@R&^}BX5ds0>e8)kJ?J2Y<KV(dt_+4L)#YX@;?vQ!aR7<d;R9%c>nbEaqsBh
z9|?9i-l$>2g+m4uQE5QsfNArH^JMUE&gKu@mb9{lQ@YJfr-1%w5>fZ`{>M8QuV9Qh
zm}oLx;&Fn(9=fo3z)();1`-?4->8VbiZJH=p_X9W44651ZH+!*a2RU>CI#>L?t-|C
zgc}I&FjS7GNbjfzloZ*)`7<E8sZcHFJ1+e9VltlY<K*mV(>|UkG=|P&00gEl>4zR<
zn`Xb_V0g1H0yv!bah2ie9sX?5%8lH4&WY)L%I#B3luya9J?sjX%1_D4txZ;Fu-9AC
zCF9_R(5x7KFa(jGP`l5ac=4P2d-CqL-^m@7uyJE9+G+M$MHmW4i=H>_olj<CU${1i
zFAUNCC5Tq%v%znSfUVQUs=KjC(P64O(u22XP6?=%_$msfJ(?3QQ5v5ej;C*?(Vhh*
z>G^_IS?v<by#1{ai<bB|^Y++iB+Ja%TyMMU&1O?y)=3khH%XP?*6Sy|TuZ9poIppm
zw_(h!Om}e9R`w`jjRu>m+uvHhD_2dhg%2~TK_`>QM}7AM25A^zQnVW{Lfs>Vzqwu8
z`~OZYdP)+zJe`shxll@}Fvr9^m%D;}?pZ8QM^UMErBWRz<!qJ~+pGla36wHVZ&^*p
zEyIIxm5Rm6`Yza5KDuR7;8z;9wqI!u{CS6MqZ0hhYRGO?0`_EXWS>@o4^mqtY#nzF
zL^u)6U9)UHMMUZO>8f9CJ+}Lm7Npysd$!trF%N>Fk%BF7SID;f5?pynw{pq0xR_sF
zv{M=AM&**5<t4csvI}F$)MY!5s&V;nV4j`5E{uF}g7Y$_{-jZqe!Yx~&tIEz^7*Ud
z+MK_3=U%lp?^XNBy=qgocb`#Lt5anc)!_y)@o3i1*n@MgFa3dNYFB1ZjmU^9#k`9P
znn&R}7~aX~@|qvG^jKy7<dXs6nqr>a4P~PiG%>(q&rLwJQY`uxs`U7}8ABT!Xno@7
z-;1_7p{)H$xjlQ{eX>K&cZ~}XUaWSXDA#BQ*7<JRZOpl>&IRri&Ss|`oJ4BT;mn5*
zu9f@2Np@2mj^jRQ%+W6u&ieX2dPaA>b1~6;`_j!S2C-Z>kyx1`OCNy$vncxy%~m?^
zHP90Kf5&mt`Tw@I+@<~Jf!_c8UM)C4XH!6fI`*CSFyPXT;n^(xFb4J*7$44d((VP!
zw?HV<UcTOcLH+jx?A6sB=1*s;b#|oJleK27+NZ*{PEKDRABeUnc15+4wphKS7HjNN
z(lNf3i1JCz=daI>PObNEj_v)I_Ss8&|HM8!X^=nvhwerkcU0Z+Ua*g}5#{5I-9uxQ
z0GRklGv4y5-R}NNyksRHb-QQcJB0!7Pt@;N;H1&ukO*^o?*~zY*~EV&dwcK;J_!`@
zp{+fdrJgaQ4bND?{~7-%(B=-T#QF*P(om3|s}GpU=zo}(pUDDd*qrG$!T08@#q!<3
zbC~YK|32vLzk8o(b>G86#G48>ImbUr?3cV*_oz<MCQi%7^z0k(yHd4g99*F8+G^R*
z5Cx(?qOdmRhq$(Ih$~&vZu0u`Ls>UNF&R^nNdmEL5tHPyNuqPLs9ADa$^Q$RsCxqy
zbLHH#$Lo1;0E|F$zkcGGz3HZiS3b}(g~m(VeEH3e0KL=4xIzDT?ZHNh7twJ?gEOT6
zqpAR}$0GS%ch6!Zp!Uo1y4`b*9~a!0a`gj||1mPxc_>1QaXmuab3lpw-)^Vnf7j`B
zHkR`LL-hY`eW&@qGvbpeCQGF#%*vX~kHB0SdD#%FxU`NbjB(0a#~h|X1|KZV3s)R%
zd5;EkHfPc<LZD)V`m#9kGE-1~R2Z*H3TwqCk8}Af&0n=dM6+|>@@O@^@30A{@+Y0^
z0T<7e>x(qrK1);GMTpIWr)Od>_j!@a35X1rlc66(XEdt)IsBh`@eU(XityydppBm0
zr!Nd+NBnF{?^sJ(YMCrDWo;(!Z9n6mZQRn5D|&h18Ci5jrc_SHh*OuO%m&-G+cI8V
zI^3b5f^Runw&R+sw2f8z1MZP#tkW^oF}jNv1jutXZD+#_f5Ql$`4DZ|9VI$oI#^$B
zwGDeCuiBQe8g~xt*zKg=r=|S4F!@hQSqz7Si?Bb361Z6Y>uhdyT513Pt<BA){Pz&$
zzwg?Ep9UjJJMX77q=|AsYGR0^IHDVYxD|l3%s^fo{Jekm^0asQx_A8gAH7%m|MB|x
z(ND8+SqTQqs1rUpfwl`pUX;?p@nEtiA`T`Zkspk_@x?Ic4Tj!Dyp#Wa8hO`26nn#h
zGP7|s83JWq82v<F#8W$wcTb}J-XrR=O63GEj>C&Fm$0GaV_O6mi29FNJQRrm`zjmL
zLsBJZA+hx+uD#&e8_qD(LXQVVN=nIByU^;MZF+&Is_By$uD!{X);B5ls$CQh@opY#
zn#zjU?TAJ&WDmEuPetdtU1g-HW?Xi(+dYIu8=Z;otIUUC8k{9VL1V8`$oTv5V;XG0
z60IZ!)3C^2f04YB*=A^Z7p5FbHJ1lNAq}SPhw_QnfUdL0abucyojNT#pg-fs2EL`W
zU||e*3_z$Yzyu_3hATtHxi&(kMDRJ#L&_|GEXLap&Sw`Q%3Z3tgWD_M-GNUep<$Fe
z9RW#rmi*L6-F~33E`Vbf)YR#@A&n-5XeET0K}(X3HPh+56s@FYmLyfQJ|%J6A`A-n
zX<qjAU9u@$_L=cruK4AO|JGOh`>RM@<5A$xZn=a4uWCspD$IgNj7rQQY%I$LVp^pa
z&qlbe`AX(%*G|_?$O(I*E$uJ4x>Mv%N1)o8sjyJ6XLT3Ka<zPp)grXMl&%iO)Do&j
z(ZWCq!Bmj6jOO)D=sZw0Ca%jF6d^ggrx7Z7L!X|i<X#XgH3u^+DIle^-v%JFu?{Aw
z2%s*#;eh-KzjSacLn56k_Tq#kx2Xt~MjfZM6VRm)bSyX&oMU>sjk!IEaFbTDq92K<
z(Ak)bNyEyW;hlmCLhq@Dg)J|g4o_KFNx>=kD5^Sz*tsi8W%Y}w?z%<QEzt(&N{@wh
z_gOe?{N35y?x^sKORGh<olSv2r}HmDAi1dem#(XLwSRhi_@26#?f-Jndwu-k;JEkt
z&FSImqXg+$5!M<~+Lt6SjsVo34^R>(M*xa=gg7%K$wRFKNVofh{GD{jHc_8-;F+6x
z=E5@vo~^H_PaX32m387Y3V^#6fxEfDIc!Pp53`p7ctVqwAZ8;ii!a-Ppc-<3W;=S8
z0LuB{@5u85cK4iK^bV&?3ppNQJlNz4h|lh>Ux&dBeD-3h#--|6=LIjdunE{SjFDBm
zh~4dWW1cD6tT;s_&9}GRyyn|!Qf(^&8ag!oz}U<$&!6}9k6!flkDveAd-3|<<mj~b
zkA3)ca=QQgKbfP~tAnFc)hSawmh(NyYS_sq-ZIsKy^}L3#WFjq^wRUtmcU>AVl6fM
zH(?ySi6%ZsPDz2!->x)G(5vDyj{1D<K3dM*z2`2kVX@uGke&E(Re4(0nyxCY($uW1
zt$f4D-*fQ*F7lG{)daJu(30cRHaVKq=!zpd;>dm|gpuqxv>9Y>dcyL5Q`lvr+ig|p
zo@ok1T;KZh?#zn2P&9Vdm+D{oz?%KZ&OMoH5%h7@O@@|}5U6<MCWI;;wLjBRi~v3c
z9(NNg#3LucB6)NVEW&}Zv+p_C^g>yYqz(V2#qM@nChEzn<S3Q+9eGhDcPIdCdMJ`d
zawybCiNum7h+Jk|z>eFJ9k(UpZ(B+uh08-fcT=%uKJtf`^8rgh<gABE<IfV@*y%^9
z@#o9tD;b)TVb~8U4oxEDT0i7sO)v0dT53R3%8o&{3)oK4I7=qnPH+42afY<(E%buc
z#TrGQx2-*iO8W6?PHBl&b{^E9f(a(um8d4S+e-bipBxc^82A06eG~Ucl6+oO9?v7&
zmNjJ_`$Wp7?xnkC#g^sT?Y!Bm2;qs{QP)YqDk)sYFIZ5_qFws09C5NlXikP|B_p33
ze&*OItIZI8Y!1DD-s$VtakK?o9iLL44m~e}iXyW}WkzEj=8*-Pt59w4eV(S;p9idZ
z>d1#%b|^?gOs}d!)l+BNpm8;pRMf4VJ<GA(M9r06So4fTKa04g?soJx7ds76wsA*S
zumc-&7SXu6fOoYe`O3QzQ8`?j1xv*INVhh5J)6S$2HyC=Zd=p(@7XC3th)Aa&#J?3
z=0I1hL&lfiyxc!L>Kz=Ny~21bFl@>ptU{4Jz3c2~SGntpMI|+i88TPU=|1_p&Mdd<
zjjds0H?X%X&WA@SoM8(p8;W!tZLclZCvwS%nEOgKr853l!4CHRy@j!VrIZif*=+`X
zZ-u_JQH~JE<q6moc_laPS=eM<_Dio7?BrXSWq?zIp;*ib&DOd{&BhtCridwA)r$Q?
zVowd@UJM0yE0}=mnVPwo*sd(-8h&cGh<+3SDYCow5UG0^-)k7n=Nd7xaHIN{HHY8B
zR=uHa-dh=+&Bj(yWtG+28(D?VMvayeBU~1T*$&Ne{I9l{|EqAgx6!;1@tSt?IbkhL
z=S$Q1(saHwoj)}5t2~bKh%mdVln$b?A4K7G;4jE*xwsziRH<&$QIuh8pL^UaW3y$0
zxK_F3*E`-zXN#q?MJ)^O#+K9<<HuFy>7t!2K)^0*>zi@5_$FKx3?oxudF5O3;;rys
zp-KHSo0J!2POde3${f}+@4lT)rXfa|Gud}l9m)3guU`Bjye_tX&Oviuz##lDnXf#H
zZq?3zedg9V8FZ1dyi-9}zBIg6GrUToS{h&Lg5CH$a8bG`SZQsvW47*LJ6)RO3QTfy
zS>32STBJm7XU%GDduvW(nZCCFDz%;6$KXUQBz3G#Npv&=G#VOBj~OF(gGcEc3`YA`
zIL5o<ti?=bYjYJE53Ft_9?q<0EiED%YnR*$hD^e(0jUCqO<euqV)f=3Ggfo1fn!w{
zFnR_+xkCu2mOM+G=)QV?H=U8mBzUsWb8WVf8J?}0%00T5>tG$xxp+Soar4{-AXRNs
zV&X^fR7iTNI~P2jpFSDd!&PI+FEt&-lfiM?S_n?nXM^n+Xz<8z=kiF1RpS5Q;c;>`
zZ}yk6_<yd`a@=(OmyX+B#{YYW@&B9}@t>E`1C{83JkTDE`FC)%|Lo;K@ATl+o7czt
z$N$wkJ>Gx*pNB`kq~h@%y_RVVX$~%PBKwSlxsQ3Sm59?S7wFL>*gWWrEGXknXIyJ!
z5fMb1{@y7mnj4RXmJy<@TEh@a+S!8%A2RNmj+ddsaWqh-u+x(p70x3<a-(SHgvypR
zG{>KahKJszulB5#O#!Z{`p158c?1vTDN3>nWaL0Cb#Rm@)GS`KeuNph1@GE=oN1Kk
z)KSIfKp@bd_E>9_g|Ak}5If3hX^&;Cq%pMP!&XE*-w#n{#AhQH$Cf%Gja(dJq)m&a
z3Srn7!$OZ-8!D;AdFe(@ajFdNH$*M?s8)HDljqKiQFAH+U|S8fy;NNjda0_J{=-lv
zt+8<}>c3K)m(J}!D&wPus0AO@Dh2&FU{tyPS@g2~7c>DZ92l+FFQbdKdUUZ?pDxyF
z(#6_6=whulU98oki?#Z6u~w5V*6u+UYo&BC0>L@Ff^FL?ql}e$l(ABuGFEC*#>zb?
zW2H7_tkk27mHL#iQj;=P?m-zVrIhgpw}BPW#CkoNSg%hL>osX&{T?*2UYjP?>(Rt|
zeVSOWNfYb$po#Tdno#-=<Z%~(j01vv6!7fI=OC4jF$c28qD#l|EZ_w|vL`=k`2EO$
z&|}3FHsaG<Tr4UeNZT5PHRIXHYW#?D-2_e%D1cA!D1}u#l(-o%UK<<70|u%Z=uRit
z8(;&%Y0OkHMWXPYi@}tON_4wCm|3EvNl!qw16oe!LIP0<lGWv^3)WF@I9&I}cS3~S
z#j6h4C~KqhR2r4hIm{oO>;Ti4>~>qtt%lMNfK?NOSWxy{tiY~XlRxADXf^(a6f`6|
zXl{s;y510bnA1?UrM*m-#&@;4!~xK1MP1s<bjj3EL6>qG`cx^Sk#@cu(YN`TjWWl^
zY@VY0BW5-!kl~CeOCQZfr>R>Wn1hET`HTz+h$<#JDmt8|AUSqvRwVs+nt@a!0h7aW
zES2G=Lacxvl>R1;&`NS5dWOzR<@8M*qDgi}Nz|utQ%QDeNEFdmU-{7+wmcjZ=D4Ps
z73MpQ-{U;a_pz_#Ob!Es^Im0OsvOhraZpb-_^<k~J{Kje-HVde=Axu8eTtuplGg4;
zNo$oTX*`KW7yvqA!NN7^65r*dg`?$lQsBjDdCC$QxI_*r0*B@G0ZhOdhumR)nEU6k
z)kq7(I#nm0M8m5ifVu1J=?9)Xs0ro?)a2Q`P!qgH-tF^AFc_2~d^jG2<8XRc8}C#L
z`N-=nF!G3k3z(D`4@qc~I-gw-$JuyNdp4T4NeA)TjZ#t^UsCjlnFIqrS-mNeWUsC2
zOiHW8M0VFioMnJBi#!e3)Q^1boYY=KU(zhD3iLW1qJ}rLXolWUIX_PZ<cfK0#*fkt
zYsKPmNrKRkL#0OewSxF5Ow8CDJVs?o8aYGG0XpUITZZ0rbd^Yw9Gio8`!CP-@ro}`
zMvkS}*{}*~j;2b{IJuQYdObNdaJ~;K3c#YKgN+d%zc~S!M|lJX6hN@J*R%)e&8Tw2
zwurl#Gxk4FcXPa4+Y;yEw9g$o{_;Ge=>($&3oKQYxNhH<i7(ShM)N8OKNZrpiMwa6
z-p;T7p5`_s9b>(YWjxLEOWOT`xwVa=THME}qvi%pXB5q^{oc1SXFqhXvm9GVqW5b5
zeeVQgiP@x1X(KvYW^{P;k$ULVX+WOh=4xY^Gj*B&)!_dX#vxGD{H|XW{J&aTTbpVB
zuTA){^#6K*{$H&pHT=IGRPxsfepeaAIOFW^*djEj#IGu-#LLy(xeD~Oj`k`~M|&l8
zv`qM>n9|z`9$_00?+OOjhypbDKp8mXg?%)}#>9h%emEGw%kea&hoxd+sV;VhXK!bC
zDf!z+&6u4hX#RGphfvD7yr2suL;Nes5I_}X+ZtIRwRpf^%*`$gh25^y@3InExj|lu
z=dSB~B1S+7P@Kz<4zLQ8@V#5*;TnCKj51V#o_1R{)}0LTgLliHKt+6_cmD(H)d%Pl
z4i0=`-7*dk4x${KKgHc(=#~AUf#U~l>X(Cg4_Jwv<RJjUaZsK><M8BzdTT8Ky+?bU
z0?o!D1)6qN15JCagZ?Cki*9B;hLD|ngBtQ1g{XXkcRR@xB~{cSG?bToLoE~Vf$1t9
zYi&MO+iWQHc<&M&$72nj$0|Om^-T<JSL>Trf{#&G>tPB89kVXUV3)wqOQi8o58~?J
zAcdGq0R(jji(o0E*(idhj2=JjYlCZznx6k*f?0nXE2BpBp)Q2N4yXz+0md*_<uj2t
z&PHo#VwusbrTV6j6nac<ayA9kKvcddsHM%DLd>Gr-3b1g&-)q1Re8T+in>PBuAvcw
zaSL0`=lb||PL6-gIsSDn$G@J7<6m0}r^hcEgAe^%VCVRneyY|n$*n0y1-u&HD|{K6
zNZ9Eo4M+i3)__nvbsz=~3}uw{58vu@*=rM*1xVRNdu>|mqGZAaDc)U@ft9RIy+j(u
zXXS#|6}jNG&IKvCaa!?jTZIO1q#$bVR%)QuO&t*JrAxY*IaC*^V_gGbU%T*!u2;uG
z9B_rOkE|_tFCWv7E-zg)zt?VsIaO;>=M;LC5WY8Pi#A2UP(e<#p@Q4UXz4h^W!x3l
zCT7=FR^~-73a$cg%2p>|JZVLqF41cWF7%^t5*1fgpJ%G;4)Q9r%@sO+UWJai!h7F(
z0*JXewKj74#C41*FRmnRBgXoL^I#6t%^X}p9hXn@&V*E<3HXejJGrBosldAlE!&Q3
z=g->{b4xI)PG0AqrmLu$z1&((E2pJ_)ov#Pme%tX>wo?kL=)r}{=GCnCHkMu&Q>d}
z|JhpJ|6AVwd)WG)DheQ6f5*aZY4nMt+zpVFrD7J}bOdxwE+<2uP7cDMqN5L3<FLhw
zz{4F;l@0QT5%=U&<)S9qOsxiHAkrRFyI8l|(x3JxGdw(Ho<mzdquD;lw0P;oN8XXe
zH{PnFgONIX_J(iGitcFY)b#?TZZEnqFB`WFL<YW}p<i`r*;XQ|>+?BvI3rItnXw-#
zO*T>iii8o8wPc2uamg%%$8>oDUTp+1X;r<({}e%&)TwjPHxo}x({?gY37W2<+v;0G
zCo;qDX6iZJu6+7QxE;aKNbS!n6iA0@`)(`UZ760i11q$ksa6XIS9<4@zvs&J4rWpy
z+YD;)T=D*C8r;GOyI=bmaJ+q0$D8u(PtVjS9a*Vm7keB7?cjeDU^R%vejhCckz(0u
zRK?MuwJH{Ocq0vMcye>jBQvJD#C!Au9X#$cvRbTDM2N4+ZkJLuEC~9LEyZp(r%o9E
zH;jyQ63pwE93L~*e5|wJnRZI-S_Msis;GaHyAFH9cOB(GJayzgqUqd9_L0J}-ER3V
zl5lX=XO%Uo=(*Mll!e$zMS4T|A$f%2k2g9GFYxA4%%q>z8pud%<JoYyb~T|afLK8k
zp;8ZDU-bK=)9eu72;|ktyXQ|Dk92OFMaaO4ADKDvC;c2Qk0gBHQ$m19rsQ)qocVP-
zEPO3a<o`c=Z{F9&u`CSVf1mjjb9|mlBrJp^z)mpc9*l9q@7UlMaF!>@Pj`?;FhSDj
z%#6SsUw`+vw%%sZ0@zuuNet3VFV)r6)z#ItbQ->1a5F3XI9RAX+|lN=q>8EqPj!~6
z*(z3Ew$D4(hoYMc7Tq+h+GUwcclb|n4xQJ<IdonZr$k;CU~U&-E-l(NF#mO|Wh&4A
z(=mB6CU6K^-+vQag|FdMls2>X*+ZZITN|4j?)kq%kG`D$Kf_PZ?|0!S-H#F?`P&*D
z{Oexfc|Y#?2S4t=e)saA`|9NQO?U70>$(TBG24{AKG^&IfGx`okMLdhZ13dTgEQO-
z3T+`Huls+;Q}kDG+>Q$k`*>B<@XwPlxJv!~GbrzcLo~yzYOVw}3X26bz8HDkcl{Gh
ztnc;XVeeI(+yqJg0oQ-K)#<cz>%ZOZw7#tWzoh-w8vOdEB#c)R19|;pkc8o1FnCuq
z7@O#+doN!f?{{AvpZu_Q@-i2(%Y6s^uRZXOjaZs<I0#)w{ZIlIz~tj(2DsF}O7SmV
z<46)Ea;m}A?~h<EaEkrCm&dQy_Hjl6qK;si#UqL`tFQDU3c{kurGd55FeHqnfHLTx
z6q0aEjK*tmLb#J=5tLD!VAlh5^1y}^WgW#?$f{19K)lQw1w(&49i7AB1g4d@2U3Uv
zbiH_-Vc5j5@1GONVx>tELhrpWO`)GGz6!_w(YrS<4o<o+4^IyE(LLa_38)JFJ_lpO
zmO28!gHMd61RVT6Y@crrULTyEYT_w?=KbB58iiMWN1{R+#q$o$>3Y!5hRw@eBrTq0
zX#?gz9$_A=<knBK>4dBgdvMg+MgXKxzXmvl19u&|o)S>}U3KOZ?d6L0s*7SM0jI1y
zunMbD@K&@BL;+*K1A00MfH}}(R3xsiu7ZfL>i+`GTqN;y;@jI+_f6cN0zK0+o0{Pv
zRikym-|w?Bu&FyN4Xk#*wSg@hW<Fjr3~$L62c>o>vrVu3H2_A;a|mVI+a|~XlE(IS
zpknW)ldu<|Ulj*YlIk%~2pos{#BGf-a24-sFa5!E%q~`pDyuz06v0O=-DmY;{rfCm
z1;Z~rm@E2Z$g4u?Ipv4DTn~s+Rb8jsmwy1^eMBuvjv#53(I=poB>dMjLIWs&5Z-8?
zdKk4rT8aipy-U8jIS<w9K<dKbVv<+W0rRkcU-h6yJn~b%%OH}BZ^l+(0~byTt`ndS
z<Q#mZBy+TaUw^CRp1rU8{s(|Qn<iub{WJe#MO}&qvS?jJ6)^1=xk`1q5>S;|&5~Nd
zd7AIZ?P`D}-q*{kc4kAlN^qjmsf4{~{h*G&UbTKmo3(AdR?ltYJa>tpZ`rfTw@2~#
z5Z2p8n9L{xC!w&POFDa)a_C?**J?H7ku+sLI;Cj;@TAR8PMBa1$5CeZfc-k?@5g{g
z{baA7d+=sDR1Y|VZFxldSGWK$rBH7eq^ZNO>6SPGGT-(mEXzsZ8ED*t)pQ-I8$GVJ
zO#w1X8m{4`g|x{5@hi$P%}B`=e_+=eP({9oRU3h}gNzfrY%PxZthGD4ud~*QAGGX#
z@Ebu`)>^|{aGtdnYPumy_^b`z+HOY;da_t&_Lt5bdUn2bif}aODPy>BnJY!nN_8@&
zoXNia^lWB@?=jUhRnVViS5QNtI`)@mUNJ(BO@$2uQi*P#G>}}Jiandxm<D%tV^oYl
zBkg%S36p@ST;@Up1uW2Nb=ALaIKx}@yT1j!I?s~6g<vZ>Sg>J|)vo{c+`fGN8&z<O
zNHrD>95v|mTHnDk|5bOd^v4Eb#fK(=ldNLXa#>&@%@2^hz0aj3wDGp9B9>X$@q_Cc
zG^hM{rCAhS$foSK=jcU&HY&=&MR`&<#Ud;gImIGV%m>hZj(%;XVYc^@daeD|fI82A
z(=Nf%ZyAa(_sU9eT>#DMQ=D>4p9<x*)$2O+40s8y5rT^_<Nh_y7aXbY^NVmWI0*;r
zzF{@0hvnRygKU<83>Wr+_afg1F^RX;e7t)1%=<+p_lMR0aXKKmeR(+j-^SDS)4cv~
z<LT2c`oF)T{_lUe>d#36vXNK*<=z(59^Z!XD9n=Eg;gKUiq*;wek#_&8<zTGnPaij
z^sIw1!Uc`8Xz$cAU7jZq2IY6N1Nj4ZI_?i|{S7Pi`l`{>aGR(F2vc??G9|Tc0;*3b
zfNS7y+Ra$G;N4zvhR-o0bTAr*X$tj041{w<zv&+jfOMuw8aA{Nf==boKdKj`X3)Eg
z!fVo1h#sR1hGu>oqu?q`ncPIJ;inOv+Y+s^;DvGg^4KV6U~ym=h>}xPS0xH7L;W+G
zf)uKNh-Ne+=Uqwh<2JXWOCcp}t3o2Zc6Ii&cB9qUY~Y1EoZmiHc)ZO7oSK)TAUFRa
zNW&T6urQR9rxip53%Qpl>EAbB{ZW@h9*UD_9V7Ft%?7~7*U$csXL13(JUBW%=)QgT
z;`QNv_weZK;N;cb{z1)cfkiK-8f?J`g{#UYlNB4rjIbsWy3u89JX79Xc3Clv%$03F
zv-3(a_^W<D$O1eTt5OOjO@5*3a9g=bxeh9-ikp#1FE|4<E*k@Xt}IJKw#RIPw!)r=
zHI1`|L5Vru_$4rNJ*q*znz>G#=21j0?O|5iSgEr4>M=z4dlAsN;DT^-;d$UnlOH{&
zNtcXkF6i$Yxv|{lAN*ULIkt6=cua(H|86M%_*PwD`#8U6_u+~kV+3#02QDoNv8DQ=
z_f7TH?Wf49ew>g^9YqcDDX*|RDLI+5Mo_pYY@uNi^(6gSHw}lpYkm0mY))UhbDG#u
zFm3KShe1J7J_lqTKm9Px)SO65D}I`2Gw~1lNQfmhqTo0Ph19W<SL?$KW#e-W#i$T2
z!=S%|x!l~*<;&kt0Zp5WBH<+r8Ki{Wxg>1uGOCV4$7&o^SX7lbm*Of)7!4-#KEJ7%
z#KP6UtY`(&^Z85bgKrUryQH2ehC+a7hNO|c41$U<eLh_I?l|V+PlNh+Ci==)d5$VH
zY9HrNTVU^1*m}I&6h2;X4rh;h?ZL*qw%oW41j`M3?LmgUn%e@?rG^T}l)~uvShR3|
zjq)*LnBSQ&VCNW$cWLTceO?%H9<{5$Q167z^?PA+-CukZ>+?skK6e!2UDl7Yg^6F7
zi=$n)x0>hAjTH2(tguV7RaFW?d7Q6Vx+l<PYei{w<3w19F@clkutht1YY86a&Zrf^
z*)PXFIVj#2#d&;L8BoeQ{ns=|!u}CK_u!q|jn4pLT-7_DD=F+j`mZF*^@ERUP9KX*
zBpM5)%aZx*>1|S*8+pQXp;=6_!kjXGl<rL?N&J4Uu%N_76YX`y4=3sKl8hkz>1cG@
zog`r|!no5I2cUA8Gepd$hK&J$-s*OI7FRVPC$_ZgXzrfL%c@iy%%4fh`BKHu3_Xn}
zQalDps?vir*ujX@F-m(<Mk>0(3Kgre7%1s^icCA-y{nZn#a!n)t&y_wgO;}=#|Zm|
zc6Otz{o(j5x@65cllUv6x;lxDnrf2pz8h-c@ITJFJIkqzKk5BXf?2-B6zT!}KiW^)
z1^e%ft<IPGpFjWo&tLZ<=j83){y$IOy?uLpa@Kuw{PNxFgYI|v`<(JZRo1Ne$7#}|
zSL@R(8m_;LQoMyYCEMH|f@BOkc*>T%tInN|y-M|!4?Wa|thpPXQlr<vze$1#2*Wr(
zT%XB-W}T)SHi-mggqX)QnN#$`UObw_X{4=DE3cGFcuLtVQUNQc)am|ZJPgyT+g&TN
z2IA{L!;-4D!{K0$WBJj`Q_Q@QPy~+I2tNt=-lqiP1uk4Z(d%%4ibmUh8fHXzxMe)=
zO(s5kPj8-g{49lEL9~qn{S(~(PqX-CG#EUz|6ixI@g(p6*V+1V|Nq7R=P$Z<ygNI7
zdHCv;FpB)9k$bz~O~0&7&G6y7j}K<?`au}L0RVWtvRN&0QQ$lA>nK3S0lJRnt8bx#
zzGHN*b_@j#i^66f5GINj#0W-u8Bd3O|LFJ(ugyUy9!xQDDq(y)9if62&K@~$i3VHA
zQ+woFq2K$XFuRQV6oudtCKHZnWCkyzg~?(BZu`F^=ue}1u<*9DbASxAZ08cU057*g
zsVeI^B*BIR)HbIqpGUCloPx@7lJ>bG1<`q;>^PAlzAyH1U+n)hvA_8KyH64Suibgl
z%ANn(Puov7zwG~iP5ZyY;cei*2ol&sccOFr`;SqSHN*b&FTLx->Zv*>I<G2Dht;Nl
zg#%J1YAgE{OiP@=m`3))e@(;b!fuoVB=4l~(e{}X>%G!bq};Rm=kXMF8rb@;<00(3
z{$&&b@$@cleGOSkjp61EyEAU+08a+upiffANXa6p-lLb;rvc*P7R_FoGeE0BwK`3E
z(CdXmyca@W#qli+i?9y^102HW)23f_idGnKM?rlH8D=-Je;%Yz#^8edETJ|)u+3`N
zG}OuK0_ycvLOJ@u1&}2AvkiwTx})+Lw9pKA#SqCO+n<x#(mtEiCDy8H1O|DC-K4P@
zpaWaJ6E%t+FqJSxH!jJ$2R*YG2}6e=euenkBf2t}&b586Y$y?=L4aHUZ8xI=I{I<Q
zOIlZl5<IsJtULz65GNIiun=jAjddgFaT*x!OEeOt768su7#;|0!`lYWj{~wZ`tz_C
zfJDnKuJ~d)a#7Bcz&H}>ix77IA_38yW<xwYORG!O3aEjnYAQcP-|jR(5>yn+qY{(V
z8=$%jF*7zy)t%WS6c1v5&3_qUA@ZsOlB0lNsfzCJC&8_$iVFaiNie>Ej)BQs>NdWO
zuA#qxbR(FLks4|Eecd$s!eVj&XH;sk(+T!h)f~SqX;0cBoN`0db5%RAD_l9PAh`j)
z$tM}HnT70TT?7#AetFzAZK3Rk0l?6CN$vUpVu!l-e;NGW+WW?BBdNnD0S2$G(4Lt&
zqG|yFH%)UBw@EOH`p82tzAFz7_{c>xrnHy3Ff^22Ceio`mysX=`b*%Sak#F}n<9$r
z>R-k;%nBz#lyDLUXr}+mWb{ioNuyyr##RFoCu)7%FJS=><Cyvux9>8xz(&<!rjej~
z<~<n>b>VPOzZYfe^;5>HWVDJ0I)wi!0u&;EE@uisJ|avZ?IV_RMDO>8af&gDL~g-i
zJU@O-!xACCql~sNd4qu}Se%YYJiKL>X6nhJVw+G2ztSz!m=K1pF2GRz;t2Z@2n88C
z<0~qK_NM(MW)lHqo;?mP0(80~F)P5GJxH06%kb+Z%mqnofT{)9BoK`uli47NM`$HG
zjA+-ck(7p-0Nj_#_8J;t-PM4&yD{mlr|^V<Ds0B*F~&B)erdlZ|N4O~C<KBve5EDQ
zoM4gG5g`PEZi;f#_NK6%=vo9ViW9N;9iYp)%A0O#!p753A8_;%U7(NCst0F@AEU4b
zdr6dz7yyizkOV;SX)xr@R4=9rf}~-wO6n$^U{DE!3mdr#!wGH53EnTn1962gLK-#=
z%a9a1!g35^aR)V6q__tLb|Md$=J-ZBU}()q{2Io}_-7w6K=CFNA8;qPzO98s+jjqZ
z-f#W&b$d>8bC#=f%-d|Pj%>bpX(?BiEvvC^U<<fNvSU2s<g2f(K}}Ro4UwmifiaCy
z;%}CY<}hR1CF}$R0Qts%B738rdY93#58bb=u>nBJu$7LrqD5*<#$6kR!JuB!D&KMo
z&Q_#(M#Dvzoy2j*AxM=$!uq<Z$t;#^{RcTYHi_d*nv&E<JdZ2r+)5uyEaP)c=OupW
zgpdQw?1C6=WZ{IYo}hz$8Kd1rBai!)c$$6xA_VSU{_0@dZ)i(m_a}#QEPgC4mA!(N
zY-1CoEel9LBZneQUI;M>==At!3rc~ij`^A{gi=s>>QIaB0^9i6f=ok_tF2^GyME#L
z1>8YN$a;<&6e<W9+o6`sVZ<-Kf5W>qNbwUi+eH~B6A`=?(>Bo{K2X>J>atbJc8{c+
zm@QnnV!guUHFmdlIarVs6w!XJ`L0zT7<Dh00N<k{fZ>;fZDeY4TB4myM|;_J&ae2x
zHcF~pp=OuLnPTfUK_^oZOD#5Sq;Guaa5A?GCaRjdP<h^}x^U2whB#qLkI(xyo{^)`
zC<Fllh2z4sQ$U`JDe8T)P+|btQ*BO`BFN>S1u+na1Mdq03J3&XgxA*b@8%MOo)SL3
zn#T(xt9Cv`0V%~Tjb@t6ueA7=#c|!Aj?ocNBvTqO!lp*s$3R3Qx=9(z5t%^ePW6Tv
z4WmqJp7LYTQXuRHxTTu?l;wTeQ*n#90gT{qQT86=LW2d4W@D2C(cRPtE=oa#gE|Wj
zYEfz78AYs=kGN}VDa}wA;d1FmFfUWQkcoR!y}Mc)y|Z7_)eK)4`G{h5T=K*Byh-Cq
zMF}r68~|4`Ziq(NVxgROcqP(;q_cL876&Yu;vq!VlhOjAzUqMc!sIpd@_3p}rWwcs
zfWh;!g22o8@`i1$KIAWNpe+2`(*jKKCI(B8NJ2L+Y`2xaWpVU<e;)n+oudHHv;S&u
zY!v<fzr_FiEI)AE<&eMK?+*4}9-N%|x%fT!HSdAzSnc3<xpjn(K-=*9U<wO(4>wHI
z>b`n)hWX@3cENBgWPy`nNB>OVEZMsXlef&J;L&SfZ{zbYxp)&KXoB1~`G8;PKxz0W
zPQveP$8CJ}mJD_^Tu1TvHC;;_PcmEc>0JH7_v1-8{`R%0_ZH5`<9HOokrMM2Uh#n$
zKa%m$!FyoO^o^XbHCXfri{hs`##jT|8V+l(4qqRf!lVED!ND||>ApNTJv%(w!&r6j
z`tXQj7#|*;o?!^(mxm`Giwn`;KYsJ}_~_v13=YfJ;n3}jW8WR0{DdxR57NF*@OR(r
zo$Y^jc=WCRcJJ)$;N-}Ev<f^9dM?Cc-v9B(cKb`*<-gkfFHP%Rc7b{E-#4G+`G2R=
zYJcJXpO^pB^y*#UEF5E4P5yIwJEF@*knxf*=)aB8nZ1&|ISt*F$uYHkYF1a?%N&OH
zzkB^NMLeYa2Z($5_)o1T8(-`{{)+Y=?M;>c@iYq~Ji+cr?#GnBH!tG&!c7BNY-GdV
zzf52Y!t?EE2#`ZFJ@w=t(M@pGZ}!8#m^sP%x@%CvZ1(Kn==AtR+3P_W$FiiXjCEjb
zHrM%oV`)NW@dMM#yVHa2=}-Hwy}wJq?`W()2Jh9!C>;l5^CcP&%oo1Xvdm7fx%Kq6
zH*8)`Cbm7wC~&`qqu4cYLA(4Y!$%cy!54Lza8QsJ<MT&)YKt)WCd`83k)Phb3EpGG
z4`ZO2bIXvKAm*v;Xk?FlvyeB26y}&n<b8ohAH04`&LcRFB+>AewY(u;GKmUn5ibmz
zv(ItD=wY01dro?3)`wB%9wB7Anv}Xf93E|SJax6u?MCCQ{lur=-Rzlu0ecRICQ~J*
zg(CE3qvO->>bg*3S|~zqo^1N`ySgrvnCc>b$Fwun*kH8q4vEMPWZ&)fCc|lp|NLFQ
z{mnN|L8|zl|52+D#EUNGMe}!`!^>97YLBxhI;~k4Gv#hzYU7)fJ04oG^ShWB{`En(
zTX$Ac?{Xb~b>Z9_N+o9;=DE|aS@dV$oqz;)YSFq8{vK6V`}XZStCuJSQV0Xb>Z~{I
z2i;^k))3_J_nh(sw>Dne_G?*;S=ek|Fa{Rew}w#Q3A8wwsO8Ta%4kqr!A>F3d%O%`
zsU>1^HoHbSTW}lY(5~}Z!f4sR4#MG_av-L{ZuFAF+AmbrOx{Ap_hFJ2DtJnKd}cwq
zEW;Dyyas_F#^^)6Oxbj9=Mud<Oy-ni$L=|WPH4H#DfI3TxK`)uxsYWqqw6_^z8%Kr
z*4E^@trd%-6Y;z<2g(h3PS*$TC-aKv1dfZATY#+gQ1kbG?m8LwKn5KL3v|AZ$s^c&
z;tjc{EMEQvCKrXBZ1Eb4mt1&3T6GpcACyMr>O!F4-Pi&^zj}2xr^MSJ>s`(x_fc>W
zE>e)MIu|MVB8h`O#`#`giY}wcWPY*3@nE<JO}!3o<7sw!6HMl94RJELoZs(TOb0Tz
z)Cu1F@dBSw>hy9t7!2pPMiCSjz#QckIi5FA<SJ+J{H?VY_Oa+Zwue?r3-G8t3g0zv
zpgr+}npI~0Hg@nn>`mu`#M|QB{-7aP4|Cd@ht3FOeLQu{Bf`04dJ+!iL40_8Kc8th
zn&z%S%MPbs6!L!#{h!hZKZyVL#?yTKkIrWM%l+?PQvdhAQgv6YxI6vn&6|U>lb_0c
z!Dp59X8Ak03Y`Z~la=eiX6U#SgUIQ+^i9%YN-xrODT{tg33$V%%a@WJEfSX~4(QHp
zqY=|hGihc`APedgZI^nXr3pKvVX-rj>6Z-4&|)d-MHwm6c<|VpfInKmOPo>cVqvyY
z&Dv~L1xEInthH|uy=RwUoZMDz<hr8zFux#$&t15<<b=1&KU@HnnS{O$xL)<I01w+f
zxhbxpnv*;lp85Q7?I!BOGVMI`M<^4i_k)2hFd|#lG`k&QIIiC>-pZeU|JUc%hqC|L
z*eK?I+3Y;|vj6`T_x~r$-~av91p!jDzdb&FJyQrMlv>~(?;+xeN%wlys|_7!q20C&
zLKfQ5JAgo}Fd;K^*RR`3P|Ie+#!IHecbW<gf;1zqma%^>s=AW01#rYyD&;XOaY3dB
zTe_b!oua>LAH|aiyiL)foGLf`G=?uZu|kEWA>zaOJ<b=9s8DSwx$#zAF}+^wj%n5v
zUFpqM;g-mHbX|-sP`I1v#beHH+r18wD4?{b=4~I)?>rd65_|7-ONh3gC<FOmSaw7F
zFc-Et@kdX#e0&}EO7&Lxaz91kus^fh!8MTQKDyD)EOs&-|9=>vVSzsJBe_rvg5DzK
z560I~5|442%q)FMt7)dH+7v!A3Ujx{g4gxS%Zo*x_(h=m)bgLD7)0#9IcSdh_mThF
zPqsQB^X2qEPrumzf1aPeOUxIZ($w?h1Ni}sro$}itE<*v2oICsmgK9DH`%o5AIK-F
zVSH9OE?nC1(EEoH)%PR|H9JXD`ZbWWcq8K4<if4V4&<eu&yM$hh2{TubpYRIg@ETN
z>1z~>qX~K5q1QbZB1sM9F%jyFSCZ%xC|XN+OW`Z9-)QWS1E`LJ^WXpd-`m@Sm?nJm
z5Q4vR-vY=ECb8;fl=~+M##g|j&cqL$J@2uoa&3zJ9>w!85RyM40M(<2GPH}45Dl9>
zf#z`o<Yq?VHpa(S5@@JeK^PbWh0MV)4l?m>Y$z`;Pl_X)+&q0$u#TsrbN2JsGo7*{
zrE=Ax(j9W?@<!x|(KK{IIMzw*5St)bMgK-d4xPO+%`{NVxrJ%toGS(y7-7IOQT$T$
zMkBvy?xMlkAEnDlxqbF@AK@(_`>KXFKE-`)(qzGGnf%*Tb5_H4S`F$E1hi(eS@%4<
z9{ZfhU5qcnoq0{Zo#!FBqXIlYDcCIhA!oab5SA0Eo`Oupt-+pB;{>Q;Aq-=9Pq9H%
zNTAEmYR)qg)8v@<c(Tfo6#CxdJZ(_?6Y8ha@HZO%rix9Z3f_>cO^pcY0VXfwVc+j;
zG6f;H>{!R4bp0B<c@8A-9-4peZ`PaCfS{GCKv%nd*4^mWq*s-(7<xGgc~0Ri#*N_l
z=X8h$c$WZx0+DZUc15IH4)u>MJTLTH!~Wm{cDdBtcZr|nCY=M>nAuVaM|<YUNfONl
z2N6s>MqwE<Z$w?B;0&i1gdigCF)RYDR~*8*lERt<3CGwFek3iBx4y8BDD`0ei4kpY
z)9W4iLPj;a@Y1O(K~FTOp%v2;xw1#W9mvaBo5c`3GwDjPXhmWj=0uF1t%m>9AqOtq
z@H<Ekoqp{DmiUNsQ{bZd0@I!z{GRR<mgX+7*nOL=(!3ZZG{Cl~d?Po7$TP0S7zs4U
z)B($*lMV1EU8}Df>#^4?d2EwsF&;Ci;#7{RI<gaPAmkrwEm0y4?hlBaDTfbrs=rfO
z4c0!3+6KclJ?~JsK%BvzYxuL;b2-eA8L-70(-DFcj^pXYrOo>hzxe(V&L*%minav!
zPw$Gh2^}<x$8{M76SYApe_;yETO^ZBVhV;>XjP^*MVMKZ70WY>D|Q!YWb^&Tvm$N0
z2_`!gYj?@bmfWf*2%E4wwewP_SPv$ObF~Iz_Wf2WFxaaSI?SuMXCOpSKi3g#HWd3P
z3VS4F=oUTIMQU#xhr;ReyJbHnoFl1_Hh}AD>h);Wh}QtqA%SSmV24A6S%jmhGO~i8
zm9~u=CbsQ4ZC9|)SrbviC%j4@ZXeY#PDH8arEDx=u70Rfq{&AJ#v@(?K_=Qd#ZFZ<
zj0K!w6dDf%QuwY`hirPXhN)FU--uwF2T}woLrJu9)(jy@Urk9llc@(kw>w||`ixq{
z-4hoJ{{0$uF?eGw62vkIZJV?t`ZZ~wfxGx7{r=k8QqNTCVKqqM{79OzBUw8t52L9n
zE7_SN`2{Mp0kLyyFKd>8r?|QXul>Y@%xQcCSR_8RX7MSUpYM<iZ$b>J0+6=*yr$?P
zk6J;1I{jkZGQmT$lP$weT*#$h8(+6OH7WyNJ8DyMInPNr@VwJ7^!K!&Vu_>h77z`r
zfL4sZ*TYQBSVm4L>Nq3~n>Dj5L*Trrtp*$Gw|TXka4H@gcBo=EQCteX;e>s@q#8?_
z(ku!J{E3AEoCgj*LUMVkqO~cRLq*`C9UUmzsqP_tfyk2J*i24risHl+GF;<~4}ckr
zm_~s{=z&PE)+lHRYhg)ehu}rZl{nG}hKSPAu{zvnkyz$mQON0>UN!D&fHUAqDlz9g
zp;gJ5x*9FjW@}(b+}IQo^U&Ke;DXKhMQmnbMXpstmJmTpt!MCO2QMlgKaT1;Ka?)>
z8<+VF%4}en-|AMHQeEnbOI<;!O)Pa~mlE)!hW}f`ze4bVR??USX$>4aGx<zEx+zQb
zK;I_Vn;sv9g0t7*B13z9{B7;<=+$xE-`UZSwak2c^sMte+f{l*e!_cI!V#?y@AyM1
zm2M-=?#IJ7!EZ#h7z&Q;0?;U)gRQ~SllVpz0FpaK4ZKwh1IbNCs7Ck=`=W*^y5lrK
z&IyGFQ<R&vQiAuLNX2+b!a=MBIFR!IQGGs|!gd5l_souOl2FV<=*W|fBy!#;CPgd{
zTV7eXrbRJf?Kdh`>~n{bRd7eg)k0TV7}?a6vQ8e(VDUJgrqdDa{Us}-;VazkV#O}i
z;;jttPk@SOYl#ahD#v^a&8iu}^jLrrZdfD<WNWa{_GC|n{d%g`7N4RNOCsK;FiTq@
zW3K7%G3JGDkD~XMazRLkZ9O=y4olN4qqDRdCJAvGj^rc3fmpP>q!?FO`8;s|DGNp&
zuoek1y$L2=u2mz}&FKo77^WhVKu9DT5^S`6CT#F40X_SC3LwESuMlvN+$o(qWOAFO
zT(DzLikXs*b&Stdu$t`(8lUH>B))2Be3PPP;f6E_+z=sz4kvPW(|Z746v|W}f+h1^
zb&E}B8W<T)F2;Q2rp9#*_k!mdg^Si|Q>~6ABB9;5O+(00D86fQU7Tx6D@XNN3!r4N
zl#IQ@nzg|#KKiDrEr0KXj?SIXu_>aLlA4Yw>naVBxS*|+)?0vO7<P?Ea`hZzFzWg!
zq>FBH+cQ~2tO80|iZ=HqY&EL7kx@#L5sC19YgP2L${P78s;?-Zu2|bQ{4ndWPy;L0
z>W&oK*8wn>bB@@{>pt~ler?B2)O*qXGWKc5?gc5!<SHvIh1Y|n(@$OHs+ZGqleH6R
zEF>bGY~nW&YAw{j5rUgoiRIf~g@Dw~iAN|;U<xaQP%FEj7(B0Np|e;E9o2%8?>3ZO
zhZ5`v{*(h@XKmJScPcvF0Lq6lkD&w*+Ygs$;gYHQ`QRKdNg-@gKMc}aITbc!YX`;x
z+kL<r9C|V*c!KXME**Lnmq*E&yCH#`&)q>Un{w>VeqU*SWu%TkD~DCPJ*7C*M`R@U
zM{17|LU^R&-AQXiKBAQ0M=ZUF#;`VyCp&x*(*>mMO70vw5o_y;)Y#7O6%dUFhk^sf
zFCC?3lt8<pWkPEp0fNwiI<Hj4bc116HpKfX8AIG)Jocg(Zy_TUeR0_L95|`L4QyS_
ziJ@z(r0@^;0uc?R9Hf!n<(M_04UhKPOY{h%u%L-jndv2l)5ZrQlVx6?xpE7)DJm%w
zoR%U{(h8*M=dxyCsnREr?NspZTa_ME7?KxTA*)g=8ahS*q!^wyE)*x!>Pmp;7&EL+
zL40V~CDl+^`4pz+n#eCDX@AvgH8%!A*&3@lAh~@lRijXZ?<3gk!#d|~(5yMnUn*&|
zmcd?Tm3ikD0X_{yfSVvx^|~mP3%dvn3JLTP>c1a(-V4!<o!MxhlA<om<A<?^O~BZ(
z5k$dNczXkzv!~T-m%%lw(_llMMik<D6b7g>X!@i}i~5{Y0lMyxjU26iWI>he*5O;U
z<zq6hY|~+9hfITmD4~sTOxhHErC7F1<lI8SlobZw6DdKKp1n6zuHUf)W<)z5>bya(
zH}S5@rV;`}$e=)fv}^?$+NkpVdo%jS1Q4cEB+XTrj#c-D^_EppI8PU0=D92wCrzDf
z7oUnA%4>L#-9Y^SA7g2|I{qh!C!{2FxmjD6qgM1cuWDwUUN&>R?B`D@f5VLpSSSt^
z$=jk;d?X5H*0KrdlWtHZ9q-uGb`yBxc)TWDh|i6}4lwD37_a5}0O<Q6h?5rc0?zYp
z3`{MXjwt|agOACQ*g~S{(vCX3Wl?1eD=?FA+8h;hh`hHF99Wu{WEO}cqy;EvY{I56
zXOx_HcVRquH?o<N&bPAZriIUO81|j`tEl(o>;OV!&Yh4Aja+eC%gfRrT9i*=Y$qfb
zVnBHwxW1nmsZE#FZ(bPD{X{@{-9e^R_v6l8ka}pT+)<ZJy9$gnB5zqw-(2v$0H$A&
zBexME2|{3ZSKF1uTE1-bxg*<DWIH8Adx~24ZFyhIHtR0A%X3ZgUbz}~Q}s~W3HZ8c
zmG>Y~?2`!x$U1z7xXVffhGyaJD(<Z0qJyp@ong*t_}w^*hNVQ+P+Usl{$5&V2iWg1
zj4|)f7#0&M0iAom#py;xSfz8i!I?~bGAIa`CUv{k{!ThvPuFVT(zJ+~)E-)H(&yG=
z++_kX7f>u4+N{-t9P0im?8==%-QKlkZDt!%!G5tt_-takRSH{!_bqd#-jmBafUkjv
zq8{qIV3^@x0Eacw2{UU399&276y;CK>@ll@;?NXep4^pX2_~WdWE0mourLfP??7=F
zI@97Y5U<-%rvp=VgN9UuNos_7l_MWQVa=mp31WH-zL6M-;>}pd<cOA5zw!O0s_#-G
zUPG(3y3#+*Xc*ms<sPH-E@W=Ube12*e0@Fwv<UgAapm-7QED;I$N`16eUHhQt&->x
zrlCw_)Q;4*!nPPOnXVHhCb`G}-XkL|K5Ec5DQbg;+=yHE;qLO0>qIj<d|whcIJ8a(
zXvsq)g#c26sh$Us`1uZ*HVkVmdld`GNg9B+Wg8@%kBtD_tXylknwzs~I*T#i=T>>m
znafq*UFP&77ADd7f!Nj<50Y|0#qzvkJ*B%9VN0=5YwI;dEJ89C7`B0qCTKor(JA*l
zV#7sG^AN)de+U(TL1wXYuDJPr^L<BTgul(&R5tl)H_y^A!<Zx(Gp16`Lb1k!o4e2y
ziL^!#^ZnP+m9hX~yrGCyShBjER7uO{p*FHhiIQ)XWaIH)b#GXh6&ptK*-S!xe)pAB
zqwZV-W}VEAY{^UQE_`3FQp$&}42T6$frOSka6lIE4n)o&YY#mJU|h2Jm5^vdOPK~!
zly^{)5G*3%FPTP^#gNj1s4Pf8MPb4vId6rmeOT)}Fr(^M+Iy>8Yw_4u`+Ad7%&tcg
z6e0f{b4^KFt9xu<>MoO4n?j$$yr~zHg`yzAM<nQ_hCnQ_hc;#3$wP7)j-Yo@%McC`
zn-2Z}-|n3gsu&WwgLRZ^G*3AN5?h+0J`Xh{XWF8Jh?rc{kb<>Yiq;KAV_f(uqu{p2
zm>LGFyOia`V(UB`P4e>449sEaJhJS&B6V+Wj+=R;akeyvOE+XmdpT{fR%yV-M>-~)
z<q>Yzm(lpj44*7F^oC<?VTP1_e19okbVRP^taTe^&D^e*Cy=5o32ng}G?gOexrft?
zFNmB|sJs8w^afa10mcd;N-aS3+0kq$e(8qN;A=kBT%O^`Q4wteOcKA1=KJ|aC#AF2
z%!*h+D&$$ZPV^*_<YP>LdPL45(xW9BnvqDV6TV~MoS3TK<@6%7DjS`T!MjKDmtuA2
zDijmL;h2tSc?aJ7*c=7#F_UG!oR<@+*j$BpyQ`hSvzPy)uk9D?$3GFj`O#nVO+Wo`
zl3jND(Wu^pt}cT~Sc5NVr(bUxpwIDly<VP{X%#d41%_@8OE|tW3o0R?jM*8nSS2V>
z>MRlthpEYlBdRf7O)JZVIroJpLrat=lx+gSe!4cD07fxN5g@b#Yk(dZ;S40nZoD|I
zis<1}tgO?5ux|8{KM9wj^m=xNIEyS;YnfWYwS5zTW@b@9x9pQ7J|Bi7R&x&W@-^Se
zc(iFtC(Qf#s`V$Wj+svSM1B!!rO1sG6`B2MqV^fnsHk=%ndW(XF-_H}UXY)|zOlJM
zLjB2K_iH~j(<N#24g{E5#OLRT&$Ev;0PQqU^hZ0}bfj{f65*Y*xeG2$MpP<&(^P`6
zM;I1aX2vKNB{FBjlkG;dB9cB5%{XWxFIg7t=WmaY8=}$$PT_Vw-V2T}7WS2SsI7#K
z)&OEaoxgw}og1lKAD%*lmd_)7=o^Lut0ds!yZ8pr{M!HJCUk1%<P~jJ7pTj9+1L@q
zUZ!@X8xjavn-rrus26Gk@zqm;={UxixawTrLIz`pP&}d@WmcOTqoU!Tw3z_w(e%Qv
z-EVHK?S6)efmIM9rK;fuWOT_)G4KcBjj>VF*(>$*xhE%R#-n2*<@Bo(BiX<niA%VZ
z&OYtJwPzWHS~sIN2?d(8f;HocVVee9Ogc2_&OCjwujd1eK55zN%YRx1!;ob&L4uW-
zOW&mDWZ<Ung4x3ukz+j2Hk=&QmUsilG>{yxLp<krxP+8LM4(hS#qPCBV9`mybgu^I
zLU&oRYE>g8vZHQpryxL~2sK#f<bw*aM^O0DoY9nlr(sf!Npju6P^_le5iiKPEi}Ig
z2bMC})f5jb^HlFxU9lK^-8i=q0mi#fcMt`O0t=^}X~7m9QoEKRA~eWmCWVL(b!K%c
z0ZjcOR;!ZHq)g@&!I|5P5t_(ISO({q7Fw4f;=m$DF}lKRlH6cyUK(l-r$s0wML1^G
zcP`F3q$6nP(@F|QWaWf`d{3KaN15%Efy>wTU%)JovBnU!v3$)18`xKwB)N_`i+WMp
z3)r%hy{W%f(DvqU=C$pT(*Ne7Uziz9SUX)bJL;K0u&fX3nGmjuO!KXx{zWVafP7%_
zS+<a&2?CIAtD89vy})@D_usy2uwhj3G5gg7-nVbx31`H(-gx_9ZYA=-;QXA^6)!Y|
zF1u)Csm^%4%Q#L|n)Y0JX1U+fh6cbC`xIu=$j``42cC83Ded8L%x%oFhqVoX^IgIc
z@aW_cxTsM}9k76$0L50(b62#4@&z4&oq0fWVVnev+Aze%(BL4z*a=?HzlL)FI&}a|
zoSq&u{JrmgWTo`0H+!VEOr||G>p#TF6`v6FD$S;x0Nm3Ca$|q*<PG)Wn48*rD!B<L
zPqj4;UxEM*hQUROr!&b$jboptC9tu;??C5yj{k3ojT+EGYTKY0lUapMkFmd|DUcBF
zpm@8&L^Y^w0CU|exBQ3q=X2iofOiM5iJSv#K8rO)S&lWiskX0xSuY5sONdp8kC}UG
z;H3K$`@T$>ERiV+pdh9Dhf9<4JN2}<t~ja^=;V?=lv@#AzLcByW~`1;Xs(iKa_|)R
zTK!I!$}VJ#ZgE5{V$H>BuPLLKTUd?I%g{c&*eFu|U~D3bmq%Dm)*Xr00*|)K5<X)5
zU5pYok9*_@EQ#?OF=1o=$#-;~=xK;UpkA0urdsg{^uO&(^nH%^+pr=b@e?|qe><5<
zJYXf{9vIE?4~m^REg9cNb&IJD+(n~iYn;nCAy1Ffs6V>xa=F~3=;!v-<N+nV8^{QK
z9AaL=%Z$lx4h>C*R`NK~-Pbn#^;OH&l~=p=>Res71h*rg%-?M5f0;<WK{a4IW*ze>
zcB??KE^?Y`|E--jV3WzZl35o|J*hdxSa>@g`<M<xPpBa)bGgJ(F(URNgjG0JLlgga
z#yQ2y>I*`DvbeVPb;W&H@PI}=B)Orzg(_(6dbX9AX9b^7ESRR7c5^n--g|rKpD1fl
zmW5u5b5^R=Jx)?!*ThvBJku^=&KNgnDmSK&echbCG~%jjbYmsHEWK^zQ^zy7c7|Sc
z+uJ4=1RTiFT6P#+g|<gNId7tj%dTs%P$CI4Kw1msH_F?X*xE=Yom^X`Ck1i#qAT`(
zPSNZ5YB&y5AZ&)rp!9oM!_D$RlD~g$d^`}giKD&;9PBiFZ%j^8i7vreoGrf*>ff`B
zTdDVk1CcxuwGkq07Ko>0p^3hXGW@jQ$Y)_qS;U%SmLn_Bp&7xmSL?2Vkh>D~AyFE3
z0N<$?V?>4rrzLVl29e4IUQ^<(=lu{0Gr?7whEtFsRTu_|;GuL-3T{Kkv=fW_BAz%r
z?bq`|XljenF$>>k24J!?aLB`2HQLnNbfprq2Ln7`IW~7^&iS2=V`?k6)#`3w7bh5p
z*jvbbaYO0)^u0RMeeu+3w4j=@Z69E-SWy{UySfx)H7};oFk6eX=f@jymGIcB0vW?J
zor*A|*a}ZfkB)1K=&cO&I@k~U>3NosFdsIQYJ)5p(Hzh*%ix702UlEZoD5$3*LfuO
zqNQ>}pNkb_7+Hg~rg&50`hno#$*7ma`D5Fww5vf<u?=egSs2`oy!eSFBTGA)j%;SA
z=w2(eoLDqvaer)E2wTsN^C%%SQkzEE|6b{PD>vAnX8C>??Z+bkHuWFj$DrxJu1Y@N
z^{GrpwjBeg%{7(#v~gWuBNi#n37Q@2lSRdn6EhyNUiNL@W4klU(xY-8nj;}Q=pZkT
zs-kwID}JBE5E~d*n}qnJZEc&8+HaEZj&B+`2EV0V{M`E0+;y4vZT-IeD>uy(AdPl(
zD;v7}rdH091+IhzRp-ctEi&f}hbhWuG`Q71IdY6+gkZrx?1+&TX+t)YQRDApix_E<
z;Z1@rlp{BZ+*6=8udoMOL<ir>L<8CxmE;Eq_gJUpOFR%IGDO;lY2nq1j0K=+yHinX
zmo8_6P^jUq;sWkio^bIsE96ZMb0z?c2;yQy=h+d`Y<Z_*aoduO!Lzp@p8a`vUXTe0
z3Qr9!2TcY`lO?}vcSIlgZI}^9vzX#5y!_a+?ilcf0<T9bb33)$6(vufGq|H~RM@;|
z`daup3B}{_c*3I98?@-O+U(=$01ux%jKqgPSx&1{vVC?9oAJ^&RJLDi_~-lfC65vs
z;=BtiZBW!7sWXgKi!n;V&^Vc`Vn!`^EiDA}uyo{PQc-WX0zq1aBawx3%QHwlYC^}w
z!$x%!6at9|br=}gh7LSpjiVgvfQkmwp&#`lm`J?Gkzm`T>qXfywPIUYP871!^^|5@
zdkQ4Mjr8YX%23p=7m?@JXubn-Yz=K0psp-G$O%DQ$uv~rW~z`*Q_O(mkdH`X)fHkI
zeNM8}v*Jod8F~Un6q<PiD5A6P#Tck8>M0tqMoWPXSYWiG45hX!7zjUfGESW!KBP&}
zff3m3>&y{_Ouzx9gbM!!3`4mds$eNLE0SnNnWe!qaT{1)qr!#s@5B>8GCGNGQq2||
z4{R)ea^HSDEGyBb65+T%)NDjIw_)mS+PKqF=lzi=T$^m+b`9O=C^SHfXevc+aZBUZ
zTMWOYt{gjv=v}~g4%a6DfJP%iQ!65n%XEI<NrF>A$@Gi@%5rDB`WP(;1-K$~Qd}`2
zaIyBq*lvA`Q#(P%`cc{olD>+4l2;0w#?F<+E<56QtSyO0F<OdfN99<XQ>Ue|efa7W
zTmAW0kU2i^{iyhlH!$AdlZOAa;eXxmzkyfyy8ZE4ao#PDAzE4-VB~jOU`~$#25jim
zReSuc(2y?mjb?Tkp-l%Ml1-irrvr+CdS*hASl^A&JDx&WX$aFP%D*VJLgJx~w;dKO
zr>9}z$BHOdvIu<)%mL?IG-3B{@~{b22@qCHHFV0en$f{Yt-fH}=qV(qz=`hiw@pT^
z5Wwn0E=G&7Rm;^>iZ>kT8~8GWZV;6<r&t8Xwlpf+n-1WmxWCxmV$@uHrDOCM2zpj<
zTV?^I#HX^LZ}V-%7N40|E4G5i{W_XeXQ>;XDyLtHO;}T-Bmij!KCqZ)E809;(I(79
zLI)OPb2SGXd2D>EY?{cbfM)O-FEAvg4(qviOLvX}4jQ8|yL39n>X(6K5^UXl1Wg+`
z?GCz%=hnP>D@zd99W6!si@g}>kFG@Du-)9RrJq~oZ6&%&C+-?~aNg<|*IEozp)nAJ
z-X+;MP(~9cUM<F(9v4FyY?sL+Rsh$4;&v#9J_oChZsTG6sBnJC5~B%R3b$@edW1cg
zUuBOf*2Vt|lNe8yuOc+8wTOwXHvS!A5^xx`<57@9KNCJ|0IV~9NWx_3a)`r+=~4*e
z8uYQn|5bjjK-2f6j;(!YHmz{LT~u0rtVEu=M^d_6BMFkspJx5-?R;yjSF_LD`S8Pb
zx)et{ULp{h@VE>X1v9~7M~(8fm2861E&$FWwknsnE99Km55vjuRu#ZXBL7RAgw%hl
z;@SId)E)wq*Wz2`ZF%8{uCELFpEEU*;+2Ovbr<ATT#CCK@T6jto`$z0qks-KBqT2d
z7>7QY&}HsL>2YV`_AKJAKGo!zh$6fXt!8Tg@*Dle7>gep{)Y|vwn@LY@Y%=wS_zg|
zC4nr2wg~JS{-(c0Bor{-$mcZn)5|!?8nPq8@i?7e4ja@^l<%H8CfRxDaa6dzrImuk
z5%n>=9|d(EhS%YcB%F|QRXcZGu33ynXKJ$9OT(<}zJ)`6U@XBh>DiqG6((#3A8)r>
zt<t$c$+`poHd+rc1ML?4>fo=9mON;+%&!U#bwUv<t68}lZqxS{V$<GYY+ChQJR!p4
zr8u=z5p8Iz`Q+=?#?vkSy!FkKufO*FwN`VZ^Uc=QCO>Vro_@ne27J?Lw>uk~^y2Fd
z7UP;5Pd2x{!S}7rtxjv}8{PialPB$OxX~xA&9A?CN*_1jXRA#GI$KYlZgwo&JIe8m
zY>SzL8anwBDBDF`3=|R@uYm;deCGEqRJv|9vDD{$r3lzApmpcXEq~CNzIVws7ro2q
zhPUwEHVndj1|=I~5opZHK!it-RKloKMT6%Joi-I#KL<(ZnAQ{(zDD>JtzU&gxXIyy
z5qAvMyK#h$RwX0Q>c3YAABO+B6#H<+4lL_wniZv2F*wHZ+8olFEjDry5DNIy0a>AC
zu?-R<Qdd@lW%L65ap;V~eiK};&|<Cbf6$?0i*NZ%Qh2`Nb2S5v#6Aw3)hrj+WM1j}
zkAw?%RYdwnqWd?k=vej27<!y>W(zczc1|b_<?N=D)tFYG%jjav0nz!P(2cQ@s0oXG
ziJ1hMl=9Fu${ooJd$#JXuTIA}%GGs@p<9>hFB8sJ4wgHN9M~3&=Zf)AA;0e3XF80p
z8PKUvH#-gD*<IB)rpQcTsn7Ne=;kc-v0Y4x!yb*}V7R?K33^vy*3FUt80!ur2bKas
z!0(PHHE-5{r3H<vd*CFH#w0UEDSlE5E%f`D;d`W=&uI!eE^jmIE;SzQD(bG4U_nf6
zw>j2E>H4QvM-;jz|Ay2%XjCQf#Mz!>0b>BVXOJYx>So84C-^3Xz>jSs4jsbCsN<Xq
zSfxy`2w!eyDF7)=R09=Uhn!t`cSYq{uFr0Tr<kM|X@veDPfeT6CfgAo+D+XTlzRi=
zx%zEs+`S_3XIv4y0frKj3m8R$v*tu;f*iw;Vz9s_-omh}qBD!05e<W4wMO2zi~yFq
zA;;MkNRWM&`njtc<@~@XTqU(nt**Qe^aSe{^w6@$$fa`d^Qj!1n{ZW@U6j&H<;7GI
zUm(wwpe+#;`PwpnWk||y-;JoPb86&|o3e(f;I@v1j9OF`M_C1UROWvv8zK#FSytzr
zl7>YmVxR}{W3lb6NEbSa&^*s`BHWVD&uPnOa+sS$7((8Y4QOHg+Y(ny5<DyDQfqX-
zPtz#J0ebv%>sN|^ztP;>8sv`w<kn==*p`PAsjuZNSFViryO*<?Ql_NZa!S}erg=3a
zYut!UTPk%ys`Fuxh#&U1&2{|*Px?yc&qJfAhnAIRoAnt0#n_Gy4=@vsC7^+C82me;
z=yy6J9?d?7m$gHpD2qF>2z_Mx8Qsi^D)oqHi;Rf-AsqXMDc`PoY_e8V9~+BDt@SO{
z9GlgfyY)j+pv-VljKbn1x`@WcwiBcckA-hX4Xs1S<TcL5Qkw6<oyF5Iu5jx77W9#G
zp-UXuWs~All-Zo;O4~A0k{Jr{jB0gs8&g_Sn{%Q$r098;R%w{?bhD+D%~iF}#nzro
z%DGl&el7GyIY)1MQ_G6Bw%pNNmp*q&pU)}Xb4vFXIrig!)+hRSE}9tl&-%n&d2&Qn
z{|es=q?~9S!CmQAl0B$vWfYRD$TpYXLldmc<;93Y2-K;e(-_FztZjEFf~Fi{+-i!O
zsdKvK`82dqFovpLz+e=P@Slm6lX88@C!ohgDXdMJmapm4){`e4{7VWHA*!cefBp2y
z6M9Qpm{xP+o2_<hgLf(lJgn0B_&;Fa@seH3jrE>!_!My%jK-6xzU0F*n5aYS3qwV#
z-h#AhF7NoNFjJ<@xUof|gv71uUsKj*=OfW`F>(n(-K64!=r$lOf9S(pSY=9fUaLLa
z2BFl|$}TsaNSrTM;hC%T$;o*w_~Vf9TATlOzZ7OC{a2EG!ih=YfP;x$<2ai!D+R{<
ziJ^+doE_zE&~dF#j<hO>%odUdOosIt5VtI36hPsVK?SYHW3<u_hKtYQFuwSNAdLfP
z7T!WHa2m@lM@!IGaGrib$e?xf{GZP%^I%Ql8@b!o2>*0Ymf3pcsZCB?0GCN`q#%={
zmVI-qUNz**(qQW)j;3pA^E0=eSEBC9yPVNMUNSf9ep1H<BO2onf18fQ%L;EFOn6OI
zS1%yj7>R?U;c$o1AX6UPXlv1vXfW=bpb5K_dcEm+)H4i@?aSrxy1m@JpSgM^v%0VG
z#<lab;Xm2Lgos=Es?Tk&A5YH_MM~gn;%MaQZmr%#s)F6sU-vhft<Nn1wrJz@eNLy@
zdFsPpa4hydTDSntuL~1oXVq5=1doCttd2L58z?l9Ps%9U5%)x8AUfGTw^gzp{T{lG
zy$Rl{PSN8xMGL&TcXs^pxUxnxUcAN>MOjoM$Z?-<mL#*rijQ|?sxEoqMYBcUvr@4a
zqd3?oNNtffWy8}>t^|NR9b#avc?^fE+d+1VjLNH$meXo+nRV$?_UY4}d{*{6(bwQb
zK^{Z}(_Bg0BT0J_M0uB%#YFDMGHb^O-peqFuEYKc1A7nF?M%cB#;sE>xpbb+JR|)6
z0Gr^Ii^IM82Y_3AnHC3n{SN^2zov0kWQAHkqFJlh3hQK=;;_!UCi%mZsVWtN{Qbu%
z;WFn>j%Lw+0GOlEVlcmsFRV;?<WF<vSXK=41vsC9o=gDmCL9hWRe>tCvN8(^cFF(w
zpG8aVEL`yUq9u11F1T=$&lfKFx0xk9c3-E2rH;-Br}R9IhhZ=_GAC}}r?AKL*t098
zkJ3CeG}f(SC`on0EKbP7G}Ny{_##eVXx^e4jM%<d6WJOO8CeJDWQi(xLFA~cOaO1?
zO%)B+5QsJG%IuzQMpGOg1@?;uoXNN<i!)vh%ql>g%qpNmFGqu>U=15>P|dnTEZQjX
z5VM!9S*^;M&}4I!h(@}Imyh>^9efoHzS7<ZRtLxqWUl#I_bUYME9H^neT7h#*0ecF
zB~bQJNk@4)K(WPUC@ONU7Q>vIZ`W?JSkDuXm0{kXq?Qw&-^K;fQZm)~jT;uE`QB7c
zvCsqqb+BVx2UR#d<+bUr+A;Q4S&S3Kyz;iu9wh-B-QH{8W5$6;w@j92Bi#ZQ0%CT!
zHp(Uv@5~$`$$p@7w=<piN#U9933*CwR-ETlTjzKi+^K8}8$y>W{EL29)S4cP6)+cY
zrlbD3h_Q0kl7+=vkb<%MT{v#;`gRB<^!4&}8O~_yf1CyV-itz9rhH7M5?C#7aRGe{
z<Y&}jYHDUfuB&xti~-Ns^Tv?9$O@y-jrYrfO!+5apCjzK7ZU!eez9sf_a=EUgVYv-
z5cMsVa6z%BDE~}Ohvb_G`KjRmax<<jH<Hi_5`D|J?BsmtttJ{;EoYNi-Y^o`T$<L4
zwFm&}QTRx9>*WG?aC9SmxKqs{K%;=dFE5f#SVF2!0M>AT1_OQ(YEB&Nv88A}rh6gG
z_7IJ?DI6s=SWMQ14W8e6I)Wg_`$A`ZWe34`*a8W~=VG&3k=+axI03LoOmJZ@xtcN_
zxU#ue@eT^%OmIw$VJqOldFW=Mx8iMik&c@y&TeP`A$iXp8R)Fz{Ue9UlxeOQT6u*q
zETt+Jw6L-$!So~9TI6cX3>`<qE`Av&slw#U=wqcW#e$+&vfbk|&$&X=KCo<@6*#s<
z*aqhF;m`ye6e_js4sGQHg%AVaL$wDN97vpTvl7LRd%bC*hZ1wn2Nx418j@TDOPxR>
z2}fu!+Mh~rx>Qz@lbSrw6p9mT8VuNT(ux9XTuI6N38TYu_JUAF5S9?c<foA%fN1Pd
zh<*<KPf#JYSbe%}Ee>W7h-h8ABtW$hs5|+D>kqjLleII>EQKRhf!S0hRWr@Cc3q{y
zmhP>z@38yeZczKs#+Zqwk|rB}b=~R^8Ztpu9|_}nq`6Sj&Vy2-U}LK^2YtL&#mz}o
zCyCzx6BX6*?6^`xH(!RcUw0`xXjUP-t@O@1VFho8pZS`$BL*u^{4Mmxw`^JX@eBe)
z?P!0Y08ty-B3juVDrz%^doIBh+^8C;h97(^GeiIIl-@tl?p5q+RbKXMdjAc#E}gW=
zC99-35I*<aW)H45yKuF)Dw}Uh%iK+yA!|RWEZ<=eIx<l1twTe#SC|!jJ0S1C4L1(Z
z520EodIhSgOrn5mC$)#W7W5u!4st38{D2q5re7=f{R^6P6U0>c{oaHUvKFuPRIG+F
zUaG39Fg@zrWfH}(3sWU!wM!`L+)C(RmRFJKbarcWW%RLLzsEx2)zV=Punt=_boj8-
z*^t1D`IUo;kG?J8kicdc+3Lg^gvN1f2{)8TqC^}j6LdL{Bk?8ac@?@|rFhAOI8qj)
z^isKqv~!}99~3$ouUY(guKCKwmzBQMN<Xoeu}ddbAaRn*bNgK>`dez3p&Ium^QTX|
z61x*<o7HW0XuolI!$-W*@}w;0p(<jn9G*>|TBDMB+S_e9GJ!jp*emTlFf{m5uu_GY
zz;cO&p~d2hgDcEyDTo#CoC&mMuyf#62N#dR;H25&phx57L6<tSL05$qClJhFvkdI0
zp;!uVsxT93F0nAkIgQR-h+%+B0gUDAGr*<yHnjTz`raD@7gXFTO)X}+2zys4TE=sb
zHrvRUhD3T!|71G0Lgm_ppc|}+M@*0W*4KJb(wjyZh3!aVm1u*4-3{RQMlr{uv7N`Z
zFvjuiayWtVGjW3N?~W7e5Uw@9pBN)W`$Zbu46LYo6UIWaA~ljq+NJ=1m|UYu$7G^{
zpH4^8ECt`Ga2t-sl+cme82gtJ+en@1ON1Y-{yyym1^ppDo3A=7x$MZ>gL!uFHq)LY
zpiJ3<sW$|yT!&~HGqw-II<hjbU44zhX)2=3WHN%WO~(?<5rc-B7O<W~)3|^RFi~GY
z0&J=%B4yH{xsxDe4D6XB@l_>ab&Ru!--F;ch~C?qt2%&apa>EI&C*kN{XSi%jZD_?
z2*XB?!_TIokdSJ&SVg2US3w~(t&ub9nt?Uk(-F!lZTF;BrE<_zMn@|9>iZ9K>Tjbp
zna?gNo4)@INWdkg06c2u6m&JPLR0`u;w>)+b~+lP-W-w1*IDC~@_1geBYRYC88IDM
zoA11~l>=0D`*JI$b4r!EjltV%#T5S;CgX6p;$P8U#SWd2)wFpOO&#p}VVIkV_RqAF
zT9s-$HE~F%UJ2)zqbZgkh?<y*!Uj&bU~R3+u!(uAT%Lxm3%CQaN`$6_5U~TqxtI=v
zZI2to?W0TGK4-|xgEbkLdGZ(tt)`OwZ0F1GKF?REvp<nLzBE5T(LIR&@yy|HP*%Mn
zn~JzFk#JchBsey0Gf8{K7pO**>u4ajeppNEr4wvoD?)+X^^RwEV<#r983m&m_oZ+R
zk_b?w<&FsxN&_O{L7=4a`u!>`RESbfIV`(MoqDry#b`rVV4y7TL`mOPMi;FSfG1-x
zfY;=Yyvyjpo7xQueA#Z7C#-TSF;U<a&?zmR>x1c%u7fGSrtX9~FUG^+=H2H-ef1<u
z&=9Z7+kj}T@nX6DjCUHo)SH1GNoJ~X%IK11DT}A6*^**J_Vq6|P(yp^2!lyHgFid|
z#xwu%<EXAfO6W4bahczsOb5&SR=2`V=u%f)>IzCd!BSUtDXZL&%fa>n_{4%kx1frW
zz#&hy0gB8;)cCD&)fnnH>%|)3d%z!(zaRpuJZ?3v;NKAQUJ4Dg8m+qjxH2Y*^<!FT
z7g}lGp%sLsor9%)Cs^7!SlT(Xe1C9q`0A(b;c0j8?c0;%A8U-whQ^Uo{-c-wB*8&=
z6Z|%eM<H4PfLJ-x@L3yVomrccNe@?Yy>3KgE6YZB>v$QM5D(^(Tr5Q3FcZ?Dca-bP
zvn`JRl1b2a6FONJ&=eb??7YJnduJU3t)2Wj3U5joIPEMt>!GyT*AXDJc#k1rivjwT
zkEH5T<@r{cagN-fBYu&yqjf)<&qh<QQYz0apn8*APB)`~4tcDvIvvmjwtO@Yue!H_
zK%#e%L%!1tXWl!bP!VB}0DzH?l@2`l>xg!<JiQ?=FndF>^!;Q~mxLi<<--Rmh3fy0
zbFJryBP&`mZCKVgFT(`~ISP{t9H4#RDL~rZ#p_Az1Hc3p(XIfpT?wRAXB)rB%DrlC
zujw)zwGtF5qhtytkiSKMCQ>oQ7|`oLScNMfQL|2<5@QBa7A;ce47lONHi5=c6Coj&
z<Q=;2ywoC(l4fB+IQP2Svm9$HFC}Nib!>?mpZ-#8M|rX-f4BHe&Pa_Xw-6S}p6c3%
z4VdcA$64;Y71v$ps$Q7*MGlE2Dt=!At-`oeP(wZq%HNLs-NFcmXO&Y%ai?A?%7~0C
zy4aMc(toBYF))9kDQV}XU?Vp*G9{r}m~M6wSd6sAaNs3UF5bjRKh?40tf<T?E2)>_
zSwU8>oz~ST-gs{buPMc|TFTN~65b?HmW7mKi>>be_$v~&k?bbs=_ifS(hHKypPL<n
zo1>JQpJ!c~t9a}|;)UJ>$_`4{Hrpx%lzuhCoSeB?79%sY@hhrQQqp;uhB>{Q4hBOd
zah>wF74_JWwkS+vsXpW~P90S&S+t?(<s~86MR7VE_^8rDJE{_ZW;A8%ucj-32^ii?
z0qhX=H<B`<3R^j?3O>x7n93L%%yf-a*dw*sY&N)+HIQ65srg!V=@o3?9Mud<T{`-g
z9fDbg@pJTRX)~^3$l7AJwY(w|lfPk>z9=Cv2rH#x)`k+ZIFhcFWtJ_o$nuoZ@)%n>
z7Ea0=%N=|b0f|Dlg>k#FUWm)Ai(vtitj2ZRtQOCo12h83?80p-ytP>24i)ZTVNasx
zp`;p@Oc?nNG58jJvQ>*{%Q8yC>NZk2T@4au=qiDLQ$iXAzT~LN={SN-mzVK&C8BCt
z%YtL!*+bbEAJ=(c$|a!CCuNPVY?w$}M)b2G%U_A;u{W>c5bRWpq`ZVBly;n38#1qC
z%O&2%+*=f8V|pJ`!N@=$TxPrT=Wlnf!|bb6Ha2xto7?<wTFElmPI^zm?I6VH3xDKg
zC~2b`Oz=i8WtJtbNiv#JEB0C0F2L$g*#zhdoVoewGDmKy;n&iuXp+j;wYH~Do=Tfa
zM#}}0>iqdzwMN|En{oUu(V7<y;h0pt7e>RHR$faDpk{wv^#xz*%N}zCdFGUpEfN)B
zb5>M@7R_i&q$3iING>9SUi|zNe|^mlzG-n4N<-9YEh9L|s6lRSREf>*1)9>E3uW9x
zI`UhV5ah!ce1aZ$8woO#7?!i7XW^A?-Q2_+emla1ou6%`Z9+@m@Vw-f8sp@HSk28@
zX{b)q3oS@8*`ro~a0eJ}UtM8gm|b`pzNZ7cvj)@Y`I_sm%Qw5umgE}kM-Ahef_Dk%
zm{OV!OX7K+huICP&KM|MBB&TQ-ySQ#@C#-MELSHzF!7hz0p4vjC`bh7=re`RULR)^
zmGNRxywIrjsA|h3^LnA_)>Eryb1}2=lB-}9Yoz6@o(=lCNx!$$!zc9hDgFN1U1u%5
z+8i(lSO*jyNEtZvCf9qe-xBKYT}Bi8u2?2O(Y#J*TSr?EEX)UCWommB5fO6dB+e@v
z|LM76^O?0ExOGLJW;MVk6*<FoB14ri%2PSlG^0AV&lB5zp;b2EMNM5lpIRkQUWqiU
zn_`+w%%+jhF>5#-xn~29tNvjZRgU~&DBTUVx4`-Cn2@~X<zd9#jwz2y-aqIHp^-VD
z@L0>hp*ISovaU=_Qyjk=*OV|))+)VGt+uUEaodu~q;e0nVu0%K>qw7jgcs?w8{k)0
z5#=dGRPkt9KA}Oa!sbV)5Vz&lT)$vTk#wF<T6NB(LeZT|)HF+wi<)DHR_-CzXu`8v
z(p0H`m)iPq&^=|)N8ju#Fe$KvI4(7mVVvbQXYcu9zCv<^c%jag%ca@;wV1>PPyyjZ
zQ>FY2m0cykMtKKkL8SZ%&(Tqj`s&X=I5@W^<OY#k%h28A%_XwRlGhp%9Z@C1${Brd
zg6L0t4)7jG65`+%9YcN}(9*Tx4}xJDE^;DJ^%Qk3?h&`py83iX{k-~Bc_XFW<pC?M
z>+>WI`V@MPSN03_eZ{<5Y)!*C#rPKU8<ID~Keq3)@|PTlJ<c#k`WfAgb+i`cYf1*Y
z<Z;}KCsA1L)=c@(DH{$S8~~7tg?NuhfR(g9Q-VJy%r1O>HIqRVe{X6%FD<`V1B(&l
zAEB)>8%`wHG5o8JI@Iv@(1==$Nh`t%P6B0n`WCHA#uxU6IpIh5SzOoz(7u(l^RrR-
zM1p;w+7%;yK>IQvF+fn?I6cNFxC*-y9r7brr0o{5z?`*O5WA|0Kh#ojezz8T<C1X&
z=`P&b!f#55HhfQER$wZ!-X&0nB4GFi<V%DLAG=kC%Rs$$tRZPdwTdC_R2UQ2I)toD
zRUk;HgY=DQT#S*fzL7h>^}jMed{uc^_Hf6!Aoqak<FW-n#JwsLvxrldX^42c65!~a
z+@QGkHo4R}C59`-W4hC5T9z19+oo_+Ceu8Lch;>Pz@*djaM}D>U(|6<=sFt}BobC#
zsoN&8T)ei?U|(F3S7)U!o#BlOGUBG?iOXKOPiK^__Q+*RD$NT@b1lnc>9r5*UT0QT
z@p3yHxBwGNZjpNf2wk3!=@P!-e4XvK=bc9H{m%Q2CUm(BGW3oUMKcSZjr*5DJ>I-k
zqkO|fIrZ|)n`uRCmIf9y-<T73au3_?L--=|nCAo5vd7Z%ZGH6wSkx`EaOE=HG7IF+
zoh*=}fnPKD!uv_Ja9qP;1sj1@iyHoK4IcnUX#GdV5cVTS#jCD))j;L-ZzVN7_phpE
z290k%iqR9@zFOCkE`<nx(Uwp)OM%L2gg`_NrdwXiZ+jTb*?G*5zV^RyA9p+q0KMfu
zaUXBcqi=xR-RGMgdi^|s1>rp2qDQbcaB19sXvWN|c65Apur00v9E(a8TrQAGG`3t9
z&?W`%(bf9X*1*GCyhvLNU8Q*Gl#hR-9J!knSai@;xf4gD;3CZJ$0}7MuE`|6NnIs1
zKkvoEv@yey+^&K8K)xz+TSASVJfGW=2)SpR>XJTpF?7}fVge0uVPV13d$l(3hy9xD
zdhEMaW!s~y#Zte<n=?_KAvmvOd)8us+GmZSvTZ1Cdi(0=t2#z@t!wi0+V&2<jS11l
zW8f!#O&K2d>t4t98r;Tdu-+$iZzFNuv-;gC(yM!$+2z_%c4-&G<F#KymWct};yUWJ
z&WZ5o;rVoUg(lo+LXSs~Cc&-62Bj@ADlpANaZLB_?C5d(E4PWVVI4j0<VqEgbM$y4
z-%uHjy;+wjOUs8|rF`h$K|b`V<U@ase7Nb19B+{-MMNs9S5i)}yuZ9ymSozf8_6vI
zgeo8~7<qQNje-?@#p&FZ;83e|MFYqb@hef#Hi=wb!8J?Eo}I=<W2^C$B$9&rmNB8_
z(#R;gBO0lea?+b%(&cfJhZc%*E;G8TKnG*!yyRB4jrd-(rjm~`?Ao=K#cMg&xx$dP
z$d<I<)k2{8RutQuO=G7+0g4Ds7>Z2SD;T|XW|U~pEzy}<VuMZVR<~x9*`zX0b0{?I
zser;V59|{3ITWEgHdNuFw1isBqb1R7Rm>y&=|W}ORcI?MT`>)%WjB^6yD4QUJc8VM
z+wNZlO&TUaIoqvF^Fvv=7mk_WBm>N|b1|At?XZg#33bpJGE7)#@EC?=Ql`!H;Ku=@
zLae5JWsGqk<~+31AWBd!rM~XR$^~jo5;R;#VuvfPaCQzIeXArX10+S`@YaeqFDt{1
z>Pp!gRkBx%4mrG~dCI2)q0y%T80*m)p8Ms~p^(WN#Gbc4&ee%)k+UYaC8z+q{<@PK
zwC=vGyA$!5lkMaZ>DW1TS~`tg2bWP%<VV9;px&C+DRVeuiJPV^!8xLu3iG2fAX_n(
z;z35Pg(lz{5`&>4_Iu+zi)c#TCVLfS_u)~_T$Rp%62PP&A`OxV2Tc~p(=;pu<!&j{
zf9Eb<h<E3#;UDd8Z(G6mG|8GUDyD3Ex9OioqX-5@S%OI}!YrPa>tV-l8z;`13VdUS
zD6d;>>#}6Ti?Y{XiUzdBES00^;*wkyr;~mlrwj39ZJ7?$U}=tC1E`tegP6L&&4`|N
z-KTYH5HQ5r+)rn##m8ZZh$Hkknnc4;*|G8phX&riZ?~1Dk<|tz>e?b^6Aw{@SM}4T
zhK-JQuQ70JZ-`+5Rs00B@xf-P&M?AU!K!xCxAS!cW8-LOCw*7$AHv@H@%XDuq6q5*
z<bgi|At@P~rU|a&vH$JccPYb9iy2WsS<h|a1`2?=LOSHULS+0mfnx|IejUWp8U`E~
zg22r3nw@!!TDcm>H)Fmx#5>5VkRk+O{}e+XVb2mp>oipIDu(A@4Tl^<co<!Um@pX^
z$OKA97tjYgoTf+&Ju!PHH~k<QPAMG-$S#3|r8)fe;HZ0gws&^8-`zVsJvcc7;ih*L
zW~UUn0vYB{$M3pt_Ff)z_quOSj=w$Gd-LY-=-cj_L#X!81K<<he|-7*gg@k`u*Sci
zJPp78X0!LC*Z+F3J{{|wcAYk`_5B}zY`53bq_@s&O#U=4=k?KopC_A}^cQ||f45ql
z)|3BeZ?v{HTAS_mX6HXzosD*9>p%R~=Rm*|NICKSd$*Gx+@Iu!qA;PiMKlcg&Xohf
z(3GR$<PbNDn~=7Q{l|}W6nZUiVc1-fzs6yJ0k1AXc;1A^9z{i&+$L1FwhyPBR(sQb
z5su>s?XN?qk@!33^u3w#_vS?$U;N`@1ndL^pwVt=BOCtyWr8WRfmfe~v~%qA)RTWi
zH^EiE*$;OG1{&XKJWYBbG}RAr_a<Y}bb9_<sB?E<5*w2y=}r9aXgCb8{@dyK5P02d
zVE<#-7_OUm*F#Lyb<2D6_O*5!>_@0oJjd8H*o2$`1kVk<(ZuKE3Wtgsw0Fv~$@cpC
zh`Vma$;CRfxemRp<1QQxU{4zeSEa?+Jv%r$Jw8!?zVrSr*#>5n@y2itNr4Zmv<M$O
zZ+&&u|AjY7Uev(9;R*11kU1v4G{lF*Hz1!3amQHYLgyeh4eX%H)J{_uz&HPSX1!7>
zo|`ziTDy!<6Gy??VB7fxcA!3C*^XTS7$Qh#wHc0auf_dH_SgO`j5F%3u`3`=bc#Ou
zgCKF+VFNOZ&#g&Y2N341RAh(#HV-(X*OGs<>;Dprdzad)zRPdBBElX`N3dn~B>7I*
zylDDz?ouf=ar4I@{#k!}yT@C)2>JwOK^OfFMO76+P<o-%Y313tFauQR|B}EIp^)2^
zngU<yKf-(INA;#NG=N$>Bf9umgghky)uLwDbR0c1$rELxm4imH4^TN;QKlg^^V%A!
z=}6?jc+P2Fu)^$(aRS0ha2|8UfRPLDF}0Ld%>{Rq`g<skox@pKoioJ#`<HY7?^^v5
z^LKN}!oC+$wscCc{N6N+_l9WYD-FB^F`XWiPve;c1q=CXpTxtIXXz}SM7`52PJ)Z@
z7!}koM=5`0h4mu_2_`b2ODDh%>lW8opFvFnzB9Y&U?}c+mLY&5>r(hg-yLGCNy&Re
za=6_oHEAU;YFaVP)#^1z061k^ssAXHhMk-QD}mfQfQ^APxo+2`5S~g?2q{UOR(njM
zd`BZon*j>5Ub9a7+z>VCeZvYAOBN|!hifshcjU-2#>i(wve)e;(FA4qZNm*wJ4&C*
z6X>F9x7H?mB=wI@@T>4xGzh3*@Sppe!1npgFCc)cW@UNd>5AB%(cWSr(Y{eOU53Mn
zTAq{P^a2-Hh}Q>EiYyjEKR5oI6QC~cOOqhyh>iXY!{coNPT0geOpxACu03!9Zb#=a
z@b8{%J+j-pef{p+!y|*_ooILGeD@UHzknLTbbDw0e0OI(+BGHm!(I81VcC;#C=uIZ
zaZE)CCDkH|v;c*rR)6=5;vV;LI|qob!yym`*<D`50jsDogTN@&cWW5fpAO{yWdxLz
z^e%6ai}L8Fsf5r|>7gXxOOWyG1y_id^I_b(!mGJlUN5xSe2>ZseK;1?l=2d2a6D9^
zS5EI7CIj4}^@Q?@SXX{5ip<Vc9wV&T4AM9BzE&q50pGu;YVf^=v&7@8n-oKmna1x>
z`L<ch-^Dk0OV%(;n3WX&X~_qSK^-GU@+tA!9*EjOym(VSrpn$NT!VNQpmPMp$ww_$
ze+_NSLur%v!EYEXAmE$Lf$Gw_ROOZ0^!J8o++d}`AbKD6dCS*Hh!ZQx4JQ1}a+(X-
zoJ?LnQZ~@ZH;cJbST?|Y`>>bbsTV-HZ34;lh2++SWOED1;o<mh-Sbp$JGf(`9HyiQ
zmIJJPmmjE&t>Nnmq}aea=<cp(OKkqOKVd<YzrTQ;R1!Mv=uVstOygFY{;r4SAlwu?
zcw*Nmc;2D0({MO=R?^6wf%CDZ+oHyda9mnDAXaQ|k4b;BgFx(VZ<8wgh#plmU}|Pb
z5M`;VDKq&`bbzxyFWCwk9MEsKrV=FiZ$+zsoHJ>Tia~=XA+R(2HM1fj3<Ivfy5UO-
zvR}x3m)Kk(tP5=obeNL(LeT}$QK811{mtHw-M!a`-yU^e{B(A3y4$MzEB@bVYQ9#q
z+*4N;70Ou2h-oY4w(GylM{|v+YVOA%@$B)2{G6hIt2ZaXjq{vMj$|03oMNjDYZ+M}
zJS#6~ND#_e^yuRDCAkOhh$pvIY}bmKN3Z}(%*cSlc9j+ql8J0sbu`)U`f!5aV$bO0
zXAXYf?VWZ{UmYIp9WYD<j=~$afF%B+fTA91QI7{lB)t&mYN8!1P+0aT?XuqUxkb(J
z@EmBV8xMA@tcdpbcN@9Rx3@_l`D|84i1z0edFNX=Ob=PPS=|aJ4SjCF{vT=hnIen7
zqgBkn%P}@Mn7;|LfNW%pzVhY4_lNrj-B<69_O%A{<oM{@?%~nv!=nQ+9IE=k-4u4)
zDjT8?O3`kx_DqcE9-?<Eb}pVLaU<+z{<xm+T~n92{Y-21pU1y02TU<iWGOId4j~Hx
z^N?skJS;LP1`@8Z91L<zRq?ZmWL63U+xwPagDTu*MyepGgrRyyGz=zYdz6whLn;6q
z9vz+?!jAC&4!Zlt2d`e8b`M{5&%WC`>mI*7J3KzZn5+j!doNxeyi{$V=BEMYmR>;j
zY$)uJe2x62R&Vxra~c4oYa}<`grW|(4Kvk-o#a5t?JN&z>0RO%qh^vMkrnBn9Pz7o
zDp1&#T7piyZVpwt7IJBeMSs6{a=7>Ma34-WZ{EJ%gPpRRU1LZLE#U~eE>5)z4$bZg
z%fdCLd{SZwL*^%JFk(NV9<V<;M+x_l0a3qdLy*Y)<AUZV7oFC4F<Vl3J`8FhDwr~J
z;J(W4h;Z@61Aw&VLi!kvDbMYFdH(g0FlC-AMp)J6_2Gk!L<fB~7yy>O*#Bs4EQB`E
z4XC`OcU!k=X8BnaxGSocvKi#)Q)=&4VW=p{3bk~C2b!8{K!--ISpGk=ZXd57skW~8
zt3rbse~&hAJ^p(LE;(~t!5LK-ZUStk27+f!sM#r%K<Yk|!tOn%#A$ry4CIK;h*3Se
ziO!xiyVm69r^Y3;yTe}wY1s<2<>2QS!fMVsn@2z`ms0LNfI~4nh2M+QJsXzp!a)3$
z6)`j25uso%=v3T*|8QJhZA|v~aMQalL;v5y$`M<CNN!%3(?wbN7bd;{6W2ex_nDFk
zRz8D?W3AtTj~5`X|2f!q1!H60{b%Re?%t$AUe@Yg#PP*>IG&u=b}igJFfSj9U0Z=%
z9pkiWiK8&9QA^Ec)GnK->SIO+I000KYIWls`H3LYpE}FIxS?QdVAanFr~b6D9K@S8
z#0{+e`#`NfZ7v7@Rv!KaHc=&w{B_S(7Q+7+2dsxX$y-Ca$8kP4QZ9Gz6IGGUKi#oP
zgAN@$B;cRvKr5%`2ZQ|QU+1A*HvRL2?4@-@eT8o9b70$@N-mvAfLHaGVXDKiQEVNt
z<6jgz?Ym(oH@Tnep!|Ts*8d7I>yIR0Ehbu3^1i}>|K4)dvJ%zepgo8<b$3B(aX=qT
zih2n7X>qWZ6Q1s^%DKZvK65OkyZB#B%3EB%n}e#~P3KY}B9#b7ca@2R7Vah%l?g<j
zMHC`6)B^yVlY>631oVIc&^;!yl6|sqP))VHZIoKhq~5h%`zo6@^|FrWMN@yz_4_Gr
zxMx6aNqO4Tq*SakhA2i+Y>zJAM6MDh)tN03TK=s{C?vF!b(NM2A6^!;21ePy?;sA&
zl-EkJAHK)Xwo&Fz1E8Q?MmhMr;cklUU3yxd&ywyqqB~L&gQ`Dws)n<QFLPt+?8-=~
z;{1_ye%Fy%pEqWX5`S&vhPq)7wZU!7AMnN>X290x&H01JUz<4$Y!)%l;O6GzVe=1(
z2kZ0Jf;honTRFsRRUxLqEzd{K)?Im+CDpI4`Yg5@3q&-szdMJUC)oGr>@P-8r8c@4
z%AsaeY7|;<QM9ZjornU(bATPtdG|izf@Q65x7k3m^ZqeNVQhpABG|4YRj6^fR{wjH
z2IF`lmj5aep98JWg_O{_C)uayPmPT2WZ!P~xrQTGlc3G5AK<YC#lIcyg<ER)FWH6d
zWq1v<x?2m%9%G0V+ZI*#U}ji4c^~$s^ZMA{PNw6^j_kAc?=h1<U~axI)A&%+c&9ls
z2cqZBblrV2_}k*s@K->-@7m;k;&oM=vyw3SpKTfab?5J4PMsCgdmTr8g|a&zGnY9o
za$kS&v!bS=IrjxKkJ43UbxTQY_WUd>oqaI$>PL#g<bDX};5mP(wKe&13~m#n+xF3k
z9zS*haT}c^+T&4-VhTvyg+{vZb(jo>@y*Ul4niU1kKNibAm%$YA+-sp)#vW}XSq1r
zjy|{`FY30)EsDH<*mo6u_^AEAJr}bZad6Q!j0KgTI}fZ?HX79Iw(EBkUtu&_C9xa`
zLQt;e{D;e1cI-y%590u28+O#A)a=YqIk*5jiT&4q{TEbQnDDBc{#?q_BvM7Ez6xAl
z6&T(oIwwLrIaE=~&p9+AVCzu!bHoyRsGm-I3=?z|V<?vkj=8w4V(KYBZF0dU|81gF
zXL5mO@Wp}4GwyOZVtCg5u19n`_qiakQJ-tJnx9GeWp72S<_e724J~x=<{fUV5<ck7
z!P#E-I|rw}K`OZ!>ii6{=^Dq=`4ng6+ubqO%)iT`V#m@h#nz-gY^2$|de%ZSEX`oc
z_Qm|Z-SD07o!s}0-1p7g_pQ5N7JJ2>uyt$^TKN-WSo`uC?`lu)dU^Z!dzf8u43E54
zCzwoz@O#|vCSiZt)85VHrT-mG7APxG+PfqMb}`pgH=a}#mcXUd?2J+&-bhjbN4nP2
zf|t#RE1evYV~{d$$EX<?kPB}zb&c}z@3`<Z{L;agYgvC~Wic2q7?6aRw{l3jz_{ow
zfTEG0yKhvu8{g$NlZDB=U&TEL4%53x1b`J47NK6Wcq|`5lR7&dsERoggOY473pJ-=
zG8CH=gkjW$k2sSUCo~sZ&Xd)OuV=-i`x1xq&l~@9FS&RVyq^^XbawpD&emo-AOCZ+
zv(fny|MRaH|8vVfm_o~;{~`&m`?0?RrSU^9<Y&px(s4uoSH=F^JNcGE^DGhr)RZlW
z1!|rsS)w#RDC@<hlVP}gs8CjYJ(m5G8p#C><#1WWGu)4p1a)z{p4ePSgrz)4$CDjZ
z(%&_Kv81$a1b0FmFUfsJJ<+3QUP(|T{dQ(>s9Du=(V(<y7{OO!Luq#${mBXzRoV-D
z-rnY5S60lcLeV4~#n&N7Pn0$c1Sl@6qGfeW)v{1YM&Adry#=H5s<g5W31!vdXU|BU
zm_PHFMqt*wH&i&Vm$-1<xwfxJN-M8W(M3U#Fk>@;Y3I|6>DJ~A#Fxo%hcK&xf&HUj
zl*`nro;kAem3};#5oE4n;KhsLTzt9tVcaSg`L0?HvsQyNw^;nNg=5N<0+Wj{ch8Y6
z!B>`Qcxlc<yd)PeZnrdo+%lFcHdJzElFn8rR;F#90$*$N%>VGQv>#7}Fe-_{Cp->L
zvpZERQsxfNr6x>gtt4s|yp&i-xyWvn%$MuXYQ!_pH6|QM{e)D8B`Xkw=`#uyt)IvR
znqoHP7ked3P+Hn;=oaSi4^=@8VfNlqK}E%a^H*pUjg^vEp>`f28iV@g(b}fwY)iax
zrB2v8)-#|QYRA+zk?lMuYN8!8aeZ|@XuA9=2+;wY&Uu@7OD4}cz~Le-*;U-eUSLdG
z-In!2Z^H9i=b~KA4a&&vads`rHDbkD_41J#sj#O4H7>&~G;n!Vfm$B9pdnyaFyt@%
zwb=)PREsA~du=yVIauBw7l)=8@y>~ICk(@Lv@6)X8`{qEm+6z#nkXimPR|cu2<hSA
zD1=4OukoT<1p>ja2dnE!S0sIw);W6j`nBkAlplV=AsRn=OWj-ahG8(CPHMSM_2VT+
znP2=f;{7=I&DCQY{Dy|ca!p(oBBs3K$$m_|_TbOkFrofx9CuQJl;ag>^~^i<>?)6)
z9x(hjMgX4$rJ8RE<ChO92{X&Ple0O^TipUD?@<e${%Fsjxmo)3x6kS6DD1XMhh>W%
z>VUJ$*}F)uYZvBwgXXML;t5qcp4hu6u=S!?Q+|#laJUVan;p{{H}WSvS@PpQ&9M<9
zKRP?F6HWF34wTCiiFHz2kVF<egf7_J>|^Qj19HwX3lC|bqwK<KsQPfbXcKoo?k<6`
z0`t>=%{US-jM9fbB)c8|?#JW;IQKg!7f1JCN997(GnwgqkHBtkWryHdN$KbPaC3_2
zR%9#1Ytef*iX4}PjxNZx_iqo^g9eZ;`^yghDAege)buBLuAr%zk9G}gwPyG5$5ACz
zgHSSgWliaqa~mumdPZ|(@loBNO9SQYsJD01nG}iG8LQ{gqtl<~(pGLz3u|NMZxHv=
z*UZ@;7SZ4=*u)=Lr&FoYDOFAt8>A9*nkq*>Aq$NyN)R_!t>*78qG4JJp7MUmyEz<B
zrrG_pS-B=`@mhhMH)0_%9Jbgb>W43BCH)oEU!R6X{@f}wOfo-PkJhm4xXTJQNeR=<
z5JzZU#0(irIonagH&1Z4EAPPCc&ND9hEN+t1u-Z`N|c(`EchKo92<jC8BvL$1(qTV
z*Itpbtt(@wy0OY)%rWcC;w%_;#k-)?8?*`%ZB?ZJ>W*SuMKyb@Y-UModd+Xw{r@uH
z|Ll}|?6-e~X*1x6ahq!Z_BkQn^;<LGU8{iCbK31k*U)Zx6TdF0+xw!&{k`?L(wi_*
zwTb&Y#Rc81Gtm5^Akw;Saw6AlzotI^r$du<f8C*_2hg*AYDF@C=G>8#OMaS#U91jB
zYByW{W6cr^C;^PLpafvU?tOdvP29&1%KSR7g+?Lc+9^<Xac{GMYR$HNBD|9t+#al^
zz)2EagM7a2ReJ2$Okc4#N6Mjf4tuweVOD&#0|?S_3n&tp8=UGHdmK?t8rZNU+2duX
z+G<+-0AN6$zsrdaRl&|JlW6_(XfM4IcbOjR%q5-_h9WfV%7?uw#k7lJiH2@9kLu5~
zThx9q>`lpF&tasfT(?(;$_tBz-{P#OLz3whC=Vqi+k(z0O(nu}(@dIKlU~}xYu<4~
zRqVZm9s})CpM$pcIUp%C?{J`65T672vIcw1syHLS=bRa{yEUpx(gRklg2n8}9fWF^
zJR;WC769n61<>OHK$_-fT;(i&`ERe=XLDM5D4ADZEX_Kmn}uk0c6`nohE1U*Pl=1)
z-~Qa#Lw>RUzJvWYId$LN{=5C;>C?@;{r5)e$rt<YzoPy3<`+Be*>>Egr<hxHAv11O
zw8V<LcxBJaba`WK9fLr^PEg7{*IL_7NKSd>@zhVLlo6#?H9<0o5Lhmq4l#W?du8LB
zkT)a}YX+QH8%3JS)KaFasUC%+IJxzQaZi%I_e0Dn-p7>K_<rcrYO=9FseMCH@%#ER
z74?Xv**(z@%k2hqKo7gNRmsoU1r$45M6)Vgno3JvfPp|U0yK3+9RrVdX12}RzXh*&
z@-3YdT2i-v)mmm79daQ6k7nE!o)jyb%&br_U(oGq3x?OrHE>dBz=6O~&D!3)d25Km
zwWKo@OW4)VZb(^1xZ2xS!{7peXt8>++{$3lm0{Te%v%?WT1m`nXSR97qJTR$*YiPd
zH%jdv=JSGT!-u(b7qxh}6B>$p#!MTB1)1KRO&L`CpTUU1W;}Pfwwi4OVGZPd<_|VT
z9?bB;YSg&}TvErSnyDFFuHZ(j4vr_*W7yU08wMyvSN-sp@9pI~7L&Vrjs)@xA0Sz{
zU=&;Xd|DYy$h~u(eW_pEgRFafA4haB<8ptsR=aOMtMwI)5|e(pd`wH3E1b+8Su%TM
zdRpe(=-(B)h7O)R<j^d*iEWng#c+@nFY{0E=yI<+e#0N_4(xVk-tR9^{m*r`&lk1$
zdM8)hx+^6H$t4#u6)>oGC+rR`Al&O|{vvETp3sY~aLqh$yDZYpx)9HU>da{IO59&r
zba1*#>FxPq-8z*P?%A$2=Tg1ATl-SnzU5x5wyrVbKK!nC-~P6~)*M3ThjOEx<NSro
zX<#&nDpfqGD6X9t%0g__3R?1IkXbL)voAQI(nNisuwE56V2fO>>hVj=L@QFF=m_xi
zj&~I5{I9}<rI+z^*mpuE0)DT<ai-c+!S}d6RGTJzbHVxs)KCnj!{IHg?kJ0bA>KkY
zH8967kGCnC1kO5HVyN^Hc$LAbEP85zjCUqi<M?Kb3Lw0>LS0%0@0;$;nI?@-dW)<s
z9-Mn8(_wcMWWCFpA?t!gL^+u19?+^d?_L`4d1z)6WQA+g?)yn_qjG7%>;X6RCH@Q|
zGVp6rGi)NVZ4mh)R!M$=KaR&bwGT`ttCRrxAj7Sl)PbT)3d>YT(b$LmzX$!C{QvB|
zYg-%1(J-8^H@{*wx#E?K5J(7c0FxUZcGpjQdcn@&-T2WpNCON=Gs=u)z*+zIr>f7V
zIZ6V&##gdHPj_{7b#--hbyYQ2CX*TH7pJG<69=!Dq{0;M`-E9s@F&oH6eKW;5HN{=
z3}j$qKLQ2?KBERO)$)Z2FcP1;_dqAiWJMS~<`m219w*%`YBd+)3m|}M{s5YwtL-X4
zx+H@koXedAT%C&-R+_{=K<S%03Dg-Z_Dpv$vuDMia-QPBDtLo7=jwa-Up@ZYuJ;iX
z3G%z?xe9vg>)0P=bWybF=k=jbP?+tnFyA*F_P#p0gGQ(?e$j$zKKlxqP&bdg>W1=}
z6R!AvU`Ik}ClkX_abjSqXKW1Ba4-p=j9$TO`CcICX+X^AGc>H8`%{H;y*N=;5bkY6
z7?E7V6W9mwmITBJhojiyQFwtAj%V;Y-ro>GhOQ-S115apod*eU-N|gqoW{wvC>#T@
zI*2a3`oM&%uqzLI+&ixV=0||WeaAAUl1(KD-EN=imE8^_ds$+!oI8r<Y+{<VL30nT
zr@Omj|2k}Jv^QD=ulC8!G)Vh6L`+ogt$8%(a>HQsX4A5hE6s6}!{!X8@Nl^q5QoS-
zGy#u{9*MJz9BWHCzexUEhq+2XT?!L=JCZ_|xX;DO5L52KR(TD_8obWH>lzk=ByR1?
z$h=2nOw{kwVKm@#x<}UWV$pHR$469C%S1>pO{O20LFg=BFl*P{Gd-2ckWVF%qlt5f
z+9;?oc%`Jf1HU(1%>6$agY$0x3-13ry^XDnj&c9Lxzl;L|9`mu{|@i}pB)~*EOq<O
zDk5fsksqC{;a>0K+?|$l2*7nak2}B#w9j`0pen_93`ZyWE+eNh%K(Qe2>tbqKcUT9
z@O_pNSYw~KKLV>vVo+Pq1r{b5)r%k`xnfyAXrm*U==~FNs8=ro*sZ_A-YeezFiEnX
z^Tv^Jl_l+w)~$+oz=QoFBmNymOs$T?0$bd~u<kk<Sf+Ebk;x0PX@&u}L<qlui*ZZM
zmBpX&%nNYvxDH~2y`F-{YPQd`vvQ2n%;;}vyLXd=3Qx7DD#P=uFbxEheFLgA11fLA
zFkJDV%?yV|t4H4HEE?$Qy3kGQblVWmA+;!I^L^mw^3m}%gly0gxU}MHHaR4cJd+YO
zp*-V9(|$bRoa3Y$ug7zcvsYhPo_r^<R8p7ew?To`eRrwUs6R#_3e_W!Q}Os`YO?`#
zEN|oqsoSVD$oy_Ho(NOchtY6B>%)MWH(GYLt2cOrRxXeZMll|A3TXo^Eyibi1~Z+7
z%>L&f$J~Sv0Dubl<L7ofMge{7^w(V3P^u?YBuKC(dAN9;MRkPh$7p`h%3$nk5S?Tr
zI9fuLwJ0l6OLbRWoT~I&fH37)S|TlmX-UIrP$Wt@CTm$TMG~`2AC1LH%*>&s|49mW
z2&_|ML<Z<v8BKT=6^;g<W2-S59rSICCMh#HNzK_w?qYiWGlQc=F}q={S`24^zl|{s
zW+#Os!a6m^V}QPm(S#VgmKF|(RjV;3yMx;p7U-krMPtG`H3npWzKzk0&iwJ%FB%W6
zRbxaJ_}duMFf27FR;|XU><-Lffri1L`yPFHujO6CZ_4AK1#gp<yIfL3sU>Beg_&!z
zfHKa<Na-@A!^wW})J?^cP>|7h_P6B~W0y(R!M&vGpgv&-w?WxKK54%MQ@?R7==z^~
zKvzVm4pGTamP%0rGY2^*@#uywn&@8fs=%ubVdbEfN?jL@x3P5u$!XEH0aa>j89Lxw
z8p@!=SO)b5Gr0ZJ*f<V{B@Sd*t=^f;=-^f+>3UK)N7s`&Q=|dD6-_ceD^w%nvpPDY
z2KbhS(wUSPOlMMWIGx*`8~-FN8V*#cF&G{2t(=kL=wf!}7aPz-#X6^DM<+`i9Bhum
zE_u7swO?OJG+H7}x^vJ_m$?I@+=!zfnpPNn4J;a>n_IA91^%eegavi#Sg>@^w_+X{
z`-K7%>(nuf=%DY6bJN+lH0N@A_o~Mkm;?YWgm*LWAR&fzNpqTnK{Ryh-fk*&(rGbv
z<;kN&R|@@`#!TwOg0glN!OEgUf-~=kiSUJS8f{;hm)7!l$r*v;h#-EI9#b#t-+oih
zDPh`A;k?H2QA7OE!E+Q`@1Krhe@X(rd5}Vh*qS-GJfBeagxOt4mY>FD@_Ytncggyg
ze{(jNR_6Q1OOT;CxZD%gvM?-9re*S#3$43ks{hR5DPDV5=Bo82a8({M_k^u%6w4E8
znSABMYuSv&`X0x_vi=KyuDS<1x8PTR!C8<LZK3@SNB`=-dH15sR_YweTs&9720Qkz
zi><E<^Ud?vi5MZh@vr%WOwW@bJ&#AjjKwVK3%5Sm;x=d$Mt4P{Fseo)s9#7UhY!}<
z;EHB)s7XJP9X6#2I~gpuDa21R-8#yHl0H?paMX`yQK5(-mFgV!4dAz8whF>R+XdFC
zW319a-^OU*XcH$z!y%PwjK>7-xLV^*kMvHdvezS*IZ9@if_t|b^33e2<OQ2@O_q7X
z##`%Mly#t-14GrVl3`NLyHcB|I7!=gcm1i`HcHQnFu*ML;#0@2k+Fx|D#4rw<H~ef
zTQ1$4NZt$b+2_B?w0ped%XSOHdqFtI0jDzM)|X2;2a5NCbfu0LHq~^6e_2G+k-H!C
zDs$Axq8C^GmqxEJx^MKtv=xCL^?jm8S6-Em9xaz*I(qktVnnm8Ot9a6yB&fN?Cur4
zuImx(|K1M0uAKxU3h;R1Ct*smgkiR*Af4W&(mNO{E+_o0b#-7^;q`~=8|?ma30)TN
zj9|%oNaL3^X#Dg(5c%b|L*$=pF!I4Ypz+V&5F`IwgOLyJ10(<Z@_y)0rI3C3|5c~*
z)B8YWK>XXG^4mIO{^355`K^P_Bte@){UF(TD42A4w>EQX8jHTqk$OtW3mrqT$+*$E
zWAeScb@Dx}Ouwi1n1DauI{zJ1rQgAQCf}{|-$7;mJGjsMm%ZOCi-1on6Y%MMCg9uK
zr{E8jDfq+FVy^(S6`Tiy%jG2;3Dd|gOk~j@wVHK&%?<F4g|SG^h{EW!#E76)y%Fi)
zZ)HTMn3O&!F(_KG-ngtzoR%x&>Xtw$b9ejUJcz_vl+O>1(qK*%|2P;0gDKF>z24Ps
zX_49|4GSa?8difxn3XW1y*L^_B98;p&$;*Nb781ns!Md#vw_5-o#BH+CN3h`b9wqh
znG2A;07bF{zW2j~J@GA{YQVz1GKVzvDIAgNulC7JA}lE<pq9UcVPJ~gCCt~ri#(#{
z#LjY-P^Xmi@0h~R@o1(_p*0<UgXI{EmNXx{_wpmw&=C^?a6a?oXj+X&-X;07?v%`n
zpgJcMlfp^)DV`-E=BG~lD8+nzso`9NsWE^0>B9?u>QlElUJxb$;U8s|pt$bzKvOYl
z_}v<=hJxBX^Ts;mD+Inh!as9qUB5Ri^&0eI(i66xNS9AchC;cvHy+Q%^cC{Nd@;&a
z)g%aL^=`7~-l@y&HXH~`w=_;?JkueOn~yyI8dEvY_gJn6qge_#cu&1kAH;P2Ho-xw
zG@{T1Aj4*sPonopJgh+@Y&JiUC=^a5bRt<GnJDn`baG_Jq;xr$bCj47lR|PQ9DpjW
zVYAejwB;lAh(c1E9NAdpkf_DGwbmek<=AEv@_}Hjy^{#=mDe+L>L#;TgIJ}+Ua?RV
z8pa->o<?&(no8QDNJKZh*I_!Pnp{iI%zThZL6c%7V8KZHIOM%W6S?Xj8k()VKgTrO
z3bVfn{HQj!j{_l8=3@7{NaXQ*9?a?tUlPMw98aitJ1oDXVHEIum8rNDrZ1(AWHG&>
zK+MJTb&<$p`aGD`nZ6{3wV0kz@phPgNy8{$`YKa#JCfeNt}p08V;e6<wW*0luBguj
zSwqs7!mpm7XH>i;S-+Hl6p8xsQ*k>?AI_oOgwev7K3u$h6E0G{374ncFe=^>(}#DV
z--P!|y*Z4A<{?~RE=M3O9`-YC75&ku>;fQ<IU^_-b(@~(8z!kFYK7sSizYzXNv}?y
z#j&=Fgbs$wrsA#e*SlnK>=oTClD!0G%j7QC@R!6~K%HA>t}GhXBjJbHWE3!fFj$6>
z!pg}R=nQJDeewq%nJG}c1O9|5?Zr<L|E9ur<R@pKhFMh20b^1Vy%X`$+8cULy^THi
zv9Gt-^Hx`fP2TL+;U}m!bnanp;w*+dWe8+cb#Hg|QNi|W9kFFDVnvo=r!cx*b_%0!
zik<3_XDRGd4V61$r-ezUPOe0f&ll1;#WCixRmWl_&LSJgA`;5|5Ivn+5jS$w->%r{
z<cgYm$tMexDV<x9BZYjDeNn6XvMfZk&QYh0q`^dKGZp5nTinuyiW2?XrU>Vqt%GdO
zlGq=j?pc>o0zep@311IU=p@t$|JpEWKa!tvsZ*^_l{gZ95n&F=E%;)CjN)hjokTvH
z*hA%xzl?(#YaHbHs3A;pgJ0wWRST0`XEf4l5n12+)4};XE0X{alP<tzJbFN!@A?z0
z6{HQL%_EO|=ogv|OZKT!M}fS69q!fGKMOb(FK&R6;h^P-Uvh?yNxTDt0Q9+MieVC8
z;Uml)4n+Mnh)49B5c&2$wH3hX<D2Khz=jC0)Ndsrg7D7+3ZJXOeeVy4AT@Bafl-p*
z`{eQJr{4R+7cVdi<niH~m%Jb5tIuna6wFYS7B@DUxolHu)@<g%sc9)w(NYN9QV5zi
z=#u|5I=6JXwvg(bmPW0XPLcc1;#urnowc*rx)w|4uyYm6!srymY$k@r_mYssgv-ns
z`iGNvI*rFw!Ht5`Y0LE?3D3@_#n4<!^1E1u?wPJKyCwf=6K_nmK8?oQEnX`8Js!=*
zIg?jLK71xHas&kEC{f@P5p_j}-UPvC?qQ4BH1A+{t2s6H0^{V*T5c-+G8Z4)q}2bF
z+OQsYGji`6^X+(!PVzRX#cpC*yIB8A-aZCjYrw3#GiGm%UiRMjdtSC~CMmh+nX_!o
z>~i+B^>yz}oJ`K)vl-Y}Po%u|Z{peX_{yJ{QGm?-8ou;Lq*8t+?i@gl1l-w)#pjO)
zI||Zsf1(;{G_V<#G;G7|99sQHUZnn6U_`6bwaVgpCk1pmp8ClY)@wKjRFlzh_$8=K
zw&PJaP%+p0U}e~W+f5%@bMmPzA9-mO9QcGk@?%KqBBhd6$AETPlfbZ;U8N`z>JuKD
z!yrqIRYiaok>1_2LcR@;t4)sV11n^8_=rI-n83vhuJDLadiK>m_yF9@K=(sfl&|XB
zU(dq{69)v2JAFP>()6(S^fV3f?a@VmHM1!J*P<iIPYF<YP&o=`#;|m4!*N-aInH7=
zf%kw=3l{iMfF3u&B6b@BNHDeqIAgydwHg_1@XF?M!A?`LTa``-ynsIJTsJ>`7$C)#
zZ2)$M1Ng&6M*T-zKPoVwSL{|-H%Vnzax-7NYQ$5#95kA!0e}1SY}GY)Er!-^tv73b
zKA)YQj)K}-E@`gqM--rY7p31CTFpniIDh*q+mW_oF~Eg7%0ko*WlJtnBpP!l07c#C
zTXx>QyeI;^A}2aEDMKCw*!iyx2}*aAM*|rGjgV8%2Z|tYl1Ic3BRmm?xTmhu)A<yg
z9@2TPM-Uisop7HFNoRf9O`~A^nUEUUd0~#}lQ_g7Za`jqp-F;BVo37=oizX99FoB3
zr&%<ZC#qfnCB|WZxBPY~;_tX3WK@3y1ISl@s&}dBLzAK<l#wJyePL%YWD|MQ>=%F>
zqj0(cn8Ymkz)%H3F91o$OGC;m$lNT+Ai$(aUVy8Wz5x}V(vs^*5~R3q&STdhJq%w0
zbZgC#KJ!;>PHXpyoh0!z7)*oVZl<u7`S8$BRGh)^?{||aRTD&QDl&+F7JU)Z8;txk
zW$5h{b>he_r1s?MD0TMqQ+N%>=kQCgS8fGiABO`NSo#_i@{tqfGd~UXYNcco%r54D
zPa>bb$8g%6h6AFf>J#_)pfNp1cNv(luX`&ic7~>`L3mFUJ=iDZagz2&_-&uF1$!*|
zV8tA%x9d%2Cvf^#%JxN9M}a?l7maQXIoJbaNZMdm%OoRFn9^4^1v|4}=;NDvS|f98
zw9f7>Q*r5!a!u%YmHbz4z+f4Eg?mhG!rAbokEZ4!4TLp1(H*^49@Y{mDYu|1t|+mV
z(qQ`k2T9zp9=C-L&v_b5dDBrZrJ~xK0ID9h9(k~SEEMEAf9m1tT{{qoIkEcuxEz5p
z6^|PwI!;*4BD)HyM&3Cj*=(1Nm%xD{xNjmLF<$voc*;8d<?y)w_rs%Chc8||?|*pt
z=Kbr#4=<1BpyS_B_@xcQQ&~_&3a?jDI1T*~tb#;JK|OAQR>E*D!W7D@%aq+;Qk5ZA
z0K5oJ{n=>xBKQn~fNk)KBBFg38@KAT9SXGi^jMLFTmxaZC|-LRbHfS#Cs?RJa#VP7
z$-dw$`%)$3Pao!HywBk<`TMHre7c;+E-cj7=0|<)j!=I*KkAR~2=(>(QD4tO{oA~&
zEswCHha4n}(JQhP+>IkW{k9BudI|Yab+7trXgSWRnn(R|$y+=Ef%^i9S(%J(B#KD)
z;NUQygF|gPl|-{PZR$~_JUt#fKJJ;vM^yq>9>v0lIxjh^67s=!=D~O7&K;>1%LaGB
z0Czg|b56Q3>n{NJ;P>w~HMfgc7Bc0=S8`<K$MjwKg3U8a!ehPO*h-J8?06oW+v+*D
z%|)NmDAu4!U1}83;lYh+p{`_wNL3nl)e)B&>x0kO;s{WLQ<Xti9BviIF#47kaUmI!
ziykkIysPtYaPCcG?q?D>T})hAA_)j`Ypf7~ma6woq896vC!-M`9<dJ@W6y!upbcA|
z%kpbDoYc6rleT)d_k_c5;;wx*zT!ON?Ke(QI`LtS$s1`a{Hjgdv_1dlKRccNhnH`U
z-yQKE&km1Y_J3LV!?*n}zyYt(|8DODPyW&yY!8M{PS<BqN<9XH;X0wpIw8V(nhe${
zudUm;eA?%(J~rUrcCSZ%!aw8Z=H^ak`wyMXjjhd%UZ>OR{$ZoL)!W+ogST-T1eoFZ
z%=6}kV-D^c`3Efi0oH_f8jb=Feoo-9imr2H%3GfAjWCGvD!~7G&sSHaa2tguiJ#o8
z00i_Yh0QepLi(e#0Lt4?yh5I`o7|8vOt100>2)_cJ?~i%#UTpi09qv8{t5ZJozlPC
zXK{S?zh`4V9JL4W_<+G7I3C9{^z(Zj4+C`h8za8y?Bs&T>6nP(X^@Ol?)vv8{t}Kx
zKDK{PeWqT60E9EO_qpwL+Z%1~)hUc{062;!?_YaYe(FW>6pjYCH=N911t5s4aC%Oh
zBnYiq-oN6RH}E4bK0$wB*c}4TpL*xh>120(eN4M<$I00`z+8vk*4xBoA|g*QW=oZ$
zcZV;YA0B_eQ2$?6{sgswEae-l7_tnuiIw%okG+3u9zM#F1$hf!;g=+yO+3~EsnvfI
z4`-tQE6_?ONhsbRlZm*{Q<?y8^P2eobp;Xb7Xi4~x6De`FH!c-NkX?un1C5hE_G3J
z<_=TZ^Jd<G^payu1w`C6E#EZu)#1n{s_I}!3*jK=gvVXurg7q*1%C@+n^Ar*YG$~{
zWn=s{fQ2*k=nrSrBpAn^gMJd621yWsazN`I?Cz4p5`EdU(7jGU&~UADG^W?idMjGC
zm*Fv4h80lpD$NKj{^lvMr0J8NlLF5H=@Dr+58@Faa|P5p3b0C}_Sn?w<1O9V9;lh^
zoDV5X`(ZShP5U&Oi)f@88<1D-cy<Hw!W{uD;bwu_CwD0OxRUdMLTd_Bm#i<}d_-R;
zCNST;{BQ`O(J`EKI^5k&iJMlt-S(Jawg4+u3mO#T*RYzv+{9CV)W=@Er%%03lj2V_
zPSi<lrIgFhm6giOmnHSzO`MLz<+;=LG$!08U5za{#FnDvc<k!)AWsw<06`K_@xouT
z-rWV{Mu_1kr?rrvhNrR1^SF2{Ijfrs+8hyGl{6Lhl!~CWeP^YldDqEb?BvSG{D>Tq
zxxvAiFO(f;cfD~R-z~@ba6BLPWwKEvx9ESr<~b;5_hKIxl@9j25>OpZsBDMzN2C3$
z4Vp2*BbGae+Y4Oiz{fpz6iC+QPusXgu8oe!bkt<g<JAKtHz880q)0hT&gd(@hD}2V
zOVgXS+y=|Q9<&6wDw`H>%}J8K(ha35L)}UB01Zr@OoX-a!o4w4K^is$&^J`~V?t#W
zvln7hkXhbS<sJ5Pucne(2SK0*>~LlKt|Khp`-971il5E*dlZC-h@>@jaMmo*O2=gT
zR3>T`O~{8n6L2Q11G>=@sTM@DF|(h&N)O?L8wEqAOwc=C+L-Qb@S&KVY6A*FsK3RH
zX;s?n$iEWZ5{(H;zg8B-qBm0t%Y0X~cVSLk;j&kX%6QUN<Yu!q?uQ^KuvisfEd|1V
zx$*P^9Hq4bVm$GNagatoPQAgoADvOdY2rth{Nzpj<lEjmIPJo*8Y&6^a7L#J7zd9b
zt0|0*K?%~?2zf))Lro4Oqdz34^7K5OjfUJUjWl5jD{xN?jePIz+6xYwg#nY({#)+=
z6g0%J;{~GtmdF$i{|RCQ7yzJ1BH7jqmxmu-J?|eLAHO{M&}eYEa^P(Mx&HO;WB<+J
zi<kYw{`;eMKOY^wd4osyH?N@8-(EIVIc_Y?M;_Hihg3+>`DhQt_eOKCVEcns`mE9h
z;*pGyK9UiNM>5Fs7@;&a6GLGibjl!^HY7H`hM&mjcnHI|FR{XJPd&ZvV~G;{shQrx
zQ?H>~Z~?7@23;Xd)R(Pq&*RY>{~}JB63a1F+nL>;CgSf%+yGSOSB(^%JW^QQka3!2
zYaJY|I(jJCQ$Xm*Fc?N51)>vG0Yfe6Ux7u_Ad3cU3<xQQ7VcUaO?Z_q(&m7$%i3CH
zvaDKUS*=c%RR>vCEwZdI)+#4IjVGt*698g;+1lDrc}S*0H}w5?kkZGqlb0Y((^sc&
z1K2i)EHv)1xBhtOPyNU1oU&}SJKMbd`1&<ldkispF0M54&vA%xfPo*)CJm!kQCtY~
z@by0q|8>k^n(D#;|8DSP$?#uBQL%U6>Q}<3jPrNC?rlg*ZUtcqm~~;shelA^Xf9j`
z@n^vhKL6aHgWhv3ShfC9eC4RmX^$K1yBF_vJ%5V2J28Hz6JP~uo}NwGWCKPbodpRn
z@aK@Kdbn$i(Wnc%9&DcJD84Gq9?q#0<I0&*{s|_B{)#eVB}=Y&YgFnUSb{}Mv^~5g
zdkX%(?vNHDB2Q;0l466-9WcyIKlqP4OdK@Ame*;jf=u7+pv!&No(tVI_mqcQ#5h-<
z5<DN<0#QgPode3?YcjcwwFB=zXW)&jyrCd(;bV>`7g{rKs9YS&pLF)tB$3>c_;sc7
zL~iILF?1H})jg3LpOjEeZ5m~mx<R5E6Sk4hW~9$QXW*w#n9V-MWaYwY+WVwiHcF_?
zid%|PW(e6fzs%58RVq3(9qmD8Jjjf46Dc&`nzBP`&bJ{$GO(>6OH@sNQ8L9rgF3QB
z*dxg;BFxa6cu3r)%ITb@;m4zPp?jirN*I$gn09P4Rm0Frm!_CfvheTwg3u*&RN9ma
z@~ozcy87xY;?^2onJA<F=*qtVjTO3>0a2(SZUi+_FAbwX;9YTJ(AmsSe4t%G0U{()
zpV(ylDNN{if-Wpmn2B~BCenAV3??e0?>bFDiqY}_mTaLFXvMvlS0$Xa4oD((`L#db
zy&`@ho_#Ck{ud|VSqMbt8)_&nc^-81hy8W|`z>dpC`iU}I(;1v_U=e`ml$~WR(ZGZ
z+{oF2?Ez)>;@WvlO+=j`VX}{h@h&wW42Zq_1Ra~F0kj%&@47UA1wNEg2Pt?J{0Ndg
zil-cqX7~3;adejA`#0F0+RjDu7B%F@9(SCk6QT@uRlV)mf0{M(8KxcK@Ii(5{ztyW
z9GH5CMRcxMy!!bqi0Oak-1-ucXI^aa3oW7pq`%5Ri0NTpHa<y1zEdO0vPnyjmRXvC
zCN@*uN~3K7&}h$C?$)lCq@$^J%QnHy*`c)1KFt(H*xXPCma{LHIaB_Q_vMFk@WZ~m
z<b8Q5=i}w=)5|*`FK6GDy?uB4x560@`*`j?E_S(lu#Z>qMtL`R9+QoD06*Rb2_bTW
z`pE`^DU9YzFw`R$hw<!W6p&N>Y>JbUN>>^RZg=-^IK*G5AC*W<i9Dz}TnK_zJIQ)_
z->YWlFT&4`&gb7Zt$vN16FL@eW+M#?Sa4-%l(k<0=0D88g%**{MmjgIhS4}4)^D6m
z4Y_vI-}0tnK@*W@AXOHNx=yqatG(dqbfcwV-BMOqOA$7CH#yt`0sRkbmk0DUV6G0T
zhiA5b+ar&-jX8WLu$+OC9;>Ib2oWyfsH?fh<~qK5ibIF)vjLxoho|L?X{c0r6;-Vb
z>lG1%kDU7B&KZQNM}OT#{3p-2;g0<*5&zlS?rj?JpS|tQ_Cx&VL;UCW7ytSEmsjuK
zmyP{YDwm4;{2O1ZBEw?k=+8rvpa&`MBQYYHVh=Z+B!T|nc!9?T;#NDrhiZW^z!dll
zEOpTBt_``XuO2R15HD)x!YUgrYA14fh!VY{C{dzp&lM?JfeDm|$!r!V8y}ht!5J4?
z0ZlD#EDzC)XwW?N&W-}jY2}LkwDQn#?8V%^8j@bh9SW6J4F$B4(@QhbKsj1qWRA73
z<$IFQ{lRqRj{t~~%9|;d1Ud!o4p3E)=844)7BuEvh8*gpjXWwGa68m8C?IiQJxr4D
zGtfXP*A^^VnQ5^l!&b|z4Ry<steO+Bnzst(3rm%dzwMAzZ5;E3qv|ACSSYFncAhX)
z2`DcFH9xmtRxB#5-(aaDTl6d`*R1<Mz2Z6gbe(VMmp%9LHlPgEb*zOWw3UUvSA^2s
z(X3@-SzRcUnM3)C1V+=lH)Z~mgt+R{ce_JC?+o+Mo9$W|oBV-Cn@}4K{oV7)kH%v>
z0k9<X+~O!m$4>s&!C)pMU}<~vQ5m4~ENb|!Zz3@lPiTc*+TL;Q7XubdYS)V$b;+#1
zY;ccBTs!WXE={qJl`nDtnj%3n-Hw%wG$%YeXX~6RINRPm%LHX-w^D<#U55p3Ltb8>
zbxvfr+W%UoQoQ)Bg>V0^qOJ9CYhdp~;kL=bd;{s+IW%24nHy+0^@afqaF}8iMi2}f
z7_mOEEj4Xh5#1z$`mRH|pT}`B)PuTP)QL65!J{phgcb+GyN>{;I!|t#H`jXLJ5fSt
z0TrueD0o&lv=jjtU|lvST00QwP;AvlceSY{Qp(0#Yj^=!god}TMz~rSe&Q(NI=d)z
z?Gd9jE{e&0qxZv82MM&GTqR2O!ivb2H({%*7v|j*iv*>aQe|MUcJm>N7vZArB7sA>
zwc@cEijOl#r{kPe&lyl{-XE*m2)RObAZSHKFBZX-q82vKX;b%(Fe+hfj+m$L@7_b`
z^lgStmx!Iq^KW#8*QCkdIrB!r7IVbdlybxTR@^U9MAej0-lIqMLZ=>L-fzFH{7*Ho
z+@mKOtS)F25xKrDS8bSj!GE9?Y80IodSs1)>5r-B6ULl6V!_)k|2-+<X<9m3#o>cW
zJ=qM8MQ>FXD|@%)g5}yQO1f1Qd2E@kciMn&(d&xtM9W}XO^sTEX(wFF2ES-#-pJ-(
zp9F%PB>@Pf1X$LOD3>jOn-9EDo78;gLL->X<Z8E@VV-|=sxgaN6WCtVTobOmdVQR8
ztY>>t(t0XqyH@J3aGk>6(1GOm^5n6Wi{(>>9so%UM;YRMO1nRaXVK74Zra{Y{%CXp
zU`hUpQG5k`OwZe*@zMoHWzpgA$!vs%z(5UJab?yW$`&Rf973XI!khVBG>L2G#@+&U
zR!PL1E!LY`I2Ckbi9|JyqMSoxDd)e1NOjBmvAmQw8ZLsK`LkC<$6PX*O*cWVRh$OX
zXV>x_wP9wUw6a)#&3b;CB~A^d6DPAr8Fg3!y{EJmon*6;h)bsc6=&;Zs3gdzI*%?x
z6;U{slE4Z;V3cucw(ih);vTEXVeK}d+$_@FTO8ghBud$PINww~-_$+X+>%nh_C2WN
zjyWi^=E|$xRh8nstU$X=uttiP6)oq7x;%+A&+$j+mx!__USi;<70V-OY4`IgcaxrR
zXTUA~_O0b;iMN*J?<+miTWFD2pS59m=(Du<aFzOSmAcr()r-C6yyFzqxJDXSNiF{G
z97^yb7Cjm0nv`icM;DN_1JxpkEof<>9o-{_bo77=h;^j&dx&HMy-LDq@ZK_^b2^zE
z@yNSwF4(S~qc&0slvZds8LOy}#a(EMOimZKsZMe<TuSBAZC7d@G2}X1LS@;ITHbXT
z+rC&1nv$fI>Z(#jTbg^hDMm$rBCKs$_IL-9&O6*?b9N<#eR14=tIE5<?xqT_&8HBH
zie%fD(ovY!9qc68v$V3WV^-GE?rxQ3Z`W(<apDSI_A#UMIx_9>6$#2aR+o3SJ^mir
z;uj;)YS{ECp%oF7Eq!u~`>OGM0fD_rXx}}5|Mhts4S+KvkYOM>&TGW~_I9?+_g}Yr
z5AVM|#Q%Pe@xSlhK0k!tL7jj1wp<Xf(V|o&u(oL5P3k@ZJ5a&Q^9t<YHM;zZKVP4x
z?7XNH9*M$?P+xsa`xK@MN`b%v!;rOz|56f<V=@R}Bv6)yBbxCDbTZg;&jXgc7I6{m
zH9AlEJ3wB7ze6RE>m-1dJO^YM6lgC&Kc$gUef+TR7&NzRv~qKssQ(;9;y^GC0<?=9
zBS}75jK&f>J?yInjtt^a21Jf<K{y%5nx?5K#+hnID+|DtUIsFiCOi6Colh1q0!2A-
zcSm-Equd5o(6)dt;<jH`18`&k&@d>B{dN()!c)Pn<~$OdTfe-YM*bQ0wSiVC_k+AO
z#fchNT}7Cbjw7!EH2=xrnj_SMLaU82D+Ilo*v*MbuLMXEry%xte%j@1oOwaX)u{Uq
zJlJKvrt4U{S#h*_x2~||xqxK{{LTZ`xF5&`mB+(&baO7K@*A>w7%jc)74FjZQ5knX
z$Nhw3BxM54-7nhaMTF`kW%heXj7O;XaNG2eQk-BhvbpEOf0}|s7*YgHNGLV@BmqMS
z`#WKOV~Yl9R_blvl5xxS7OXwQ3&_08jbCP1FY;mFqkNQ-B9xLMlvop-heu>0Tu`vQ
zU{s|UBqv1Ms>lO~Jz;^#4<gn9wDNXSJ*2oqQ1KGOitWH+;WSnpzrB<R$q3}0ZyvHE
zy6-9Zvm(14s=8eKE19BOR87>Xa{cjzR{rH}Q#A=rr?!{G5gJtH3;A~ICTeeLC;5Tk
z6=B`&TiqI7Rec}GIO_JS&)R)?D*O%|B<6NzaL=XI=uz2es1WV#?)F|i;ynTsU$G!=
zL&?$NxQS_&S4?s$5<LRyP<^7n`@p)-tU^KejcCbP<RE_gO$vw#0p_`{cLN}G{VxP&
z$rr)xu*VG=1BBB-T|3Ed;t2@tA#gP!7xy#~&d#|c^9gaaWumqeeB)^PU%9)pD>&6b
zfX8xE1=P5U(}u`VHQyG`sRmP&$UmEV?&x<CyrVQT<M{HiAjXEbn9dTrqXTi?mnRwc
z=i-8wz&2)tazLy6=g98^^%nYwJMw4N8}5Vyt3B{;`l(#dEc_QbNiXz(eHDj`Ph#1K
zBKkDDpvQ&ry1RxZYp<7407(`AeJB^MchD~!&%<97KM$`CLrI69N9XKn&S&J6dM?__
zT3wt4{+4{j30O5IV72N5tQr%rT73c>57A%T`9t*VmGH*od=xf!y~E+qOV8tEx;6-t
z!7QA5<6u0P+z?alj9%cuvYmmZnT&2Qi7!U_kV`Fkwfom)WM~kIgt8kI<!Mm2pWDjm
z;KEbO?4YA%Cw(wxV*xL>w<*-)0Ce@&>pR<>CmTE69%fI%WI`!fYh!$M6L{1oThx*g
zPmpW=(Fkwh1rx4ah3Iw{4M8tUVK11Sq)oa@y0hqGg(anrDe=??_D(mp;%Bjxm=NCV
zaZYuvRFJwGpK{+hs`jSW?V)Xmb1lpzOV5NwQcBSz(~X@_P3jX2S+wP4a?F$qNd?Uf
zYY|N2DR8}~e8MvP$~yU~m<9{q{^B(MvobpV>X*8mIUoHpz4&ZA)fvjbSlewwvD+>Y
zZ8P116r4n&$gbS}o*GvG+gfa)J9=to6%@qfPAi<JXp<!{l>&Hvu2O;N^CO2WCP-6r
zN->rL*M)J`f>Ae_OWAt~cPZUD-lH}ZBOd_yGSvZfWxSI#Nz&LjnKN7{)A%r1OwuJd
zsw$9xKT0HCGTn$xU4yyYPE7w`*UmV}^6AaU(IMVR`?|8gR2)Vcj@wAQL4S9@kjB^I
zW3SOIOlDB<c9!w4TO#2?6?heMGgN?Oz7eL`v>4s+z?jmtm7$-g{ObUar~uqMj17Tt
zOGEU&uKd0#V8RCSc*V-8q3oY!GEFp+I-RI;bOG;=E##ds<JK-GcDllAUfER@#U{U^
zx=YcTWenZQOu8F6ZQJV9<X$u8gkmrBmJzSV@`AK$-!PU9-Cp1o=7M|33iDp%MJaZn
zULG1(f)96VW$)JJiA%0>2UhOjQ0h{NMjls<wg0AWhnE%WJ%_D|t~iIqN>fg?TG=)y
z+jPUj{W70px6L>CO{kh{*_}!lY&hI9W*c7DHr{AovQ0Slq1bgK*2rodj7wWB)RAQy
zy^Mqay4?a<tCZ<Nr;KL6&_0zkWAHW=jTws|Z-t|~hGl6A8;5r!anAYu?sG1D+lO>Y
z^<^E^7ZJu=>A0^4?&23KbXP{@&KZ<zgxdY#+yA$a_+Q}iL1y%CQT*?AZ)0b}jQ`!-
ze2D*ji2waQ<9|OK9X_uT{j1b38~e)_>>P2w97Or3Uxy(=YC8)E{^c0aG-?9weF^}E
zwK3s3ff<BJ6KCW^1GCdoVdzl-;G-<SBzPz_4uuc#!VmGnb>oHU^7;wrF772x0_M3=
zHj?-#Gm;py=oHpjBi9y+dwxkz|56djx}8mgB~ycxJ8X9@fu{%ERugv>LX@ik&JR<b
zYrJNlXVnpB2P{|JDsqFCD+0?S*DGp->bs5e{S51T-!jkl-J4{lom&}#L_zOa>rmLb
z`#>B|r3!D(e^JGV3Q+w+t)q3L{iTJ)(9YS<#Yh?zyBt}gk)crBaB@gtQyj6L$rf|O
ztjh_`k65=EA!mIZ1^)0|G`h)*TCe&9wAz2E_qHV<r)IV20Pd3U+fs{Kz{A}JP3C;N
z+obT~p6@mw&-H@03ag=+N6SW`i==$W!o+ntRUf!4ibiLb0S(fQ1*GS`$W6Q9%^dF8
z>=XCR#xoiHM&HJvbFRFmo#HWS0WWvwM!;8$f1ewf+}QVuk?*${%FE_gnME4V>ri)i
z6t{LauJE(ifl=&g>O^+V3=WhmDuRe(Z!ObY+2OLb-!fp-EiiM3E(3_}-s&cvr(4PW
zsNRivMyATFYf&z1%iNBZzHz;SYt#%+0E~!mIs!B*-(t`Uih1w#=XfO<$0ED{pv4>X
zg=03FLCo^urs-^qQ7zMRf9jpY({q}v)teGbsqu#2!xO_GePtTJk$SWOPt4o$t^)5W
z9F4p`4$cFA`bY2N#!Ih!lE$C(`}&t-VjDX8<#?BZ0g%|pkBG67IZ1hkpf5gyF+6WE
zE7Kuj%UeIgKLA8ajD^A?U4yKg2D}n)amZS<^Oped{xHOa1*al<Ph87iA#X7<rbHen
zFKiZum+p$9AfV!<3ooY~fQJ5B2`kD3{2L>$k#1nl&MRdcuzKgxH!s-%lgT9?jG)9^
zW#gh}CAAlA3($No)LI9^`rM!HWi@!sy3iYv8~7+L(f-KUo?c4p0IDTxD60vJcU-b`
zhdFWr49?>i0089Se!b;&&}inJpIk`??EC^sf&nvr?p9Dt94L<RQDni;+FSSP7y4Wn
zjXzv!f`pa-`JexB3CC(_5^^Y1oRWaL;%XfKn({lE+WNxTWzSLZNU11YNGxaD_kNf#
zCvH;&kf!lOD&&g_*g-Q=oCr;TZ1k5wF!5$7-`a!gFrAVBS+THl0{C82mzr>ORg2Zv
zXv7~Qb~;=++-F?<9A)>TdFkZ%t2=rEa{Pl40Ea$-86nqS;sOxJz~-QxgPbPiojA(5
z1T5O8&<(1Quq4HqT9ohQwM+w4AskKVhuoNWHusv67m?uH7*3&|gCq^^gaP<w7HUyg
z%Vvh7++HbTaO#|n!ifh*{X!-|xAd!Egzo7Q5Tz|&UJ{9Aa1^Iy2mY_Ih^Fq)?-tkO
zbKFri&>Amm*)z~@;yf5!l7SCrlTnC%<DP#Vut0GCGVszFyOO3O3z-X>i6I9Fr-CpV
zB!Q1~ZhIdIx!*OS|J!YPuw~FWO1*b~^WY0{vQThFKZ`L;?@Eo?&{95xp#8Cw8+wxq
zV**u2Hk0NTd^Uf0nmyBwS81IScp)ax4UK)w0{2JN6M{@odLGY4Lp-w1qVPYn;E(EK
z@~}Z=j%Gh|FnS;*oack<DIC4wxaok_a)4=OKUZP<IkR1yL=361ElPowa3xdbt(g()
zDui&g!xPDfBJ^pt<l<f?qD+fOOhl6kh#ol+O>9IHC!(4#-7_=bjQb&P(@u$VB}v8O
zDdif1w9Q3En~R(_ml<s?tu{>64teZahbOPNuu3kK5-vtQNXvyLtI1xOqLJoCc@%(F
zisEQZ*$Ox)Dd&Yh4NN<CsEMdo4RIZ#CYm-z>N4BF=!SIYkJ6ZfQ_*6BhOz`y@(J1$
zaC3=eFU2?^9<+O-uC+l?^KuwPL4vG+cExFUHj8K6HYFTmv1y;aUBt3t&7zcA<nXI<
zuoRV6s;Cv{iicM@Au%<f!AiwDj`%Kt1Se#mlx3|+(5c#hnc?<al6<3t6weiRppP37
z;(FJTa11-xyDqZ>!LhF&6bMcI)gym6l=J2hz6N%nffKeGXQaGhvxX}uLp;d<P1QQU
z*PcGFw%F4*2+kuvSOm&tG;OLRT_{W)gH`kbciM9-0|~e6n23ZKgCoLn#SAJ9GfD9{
zkD^w70!?z&2P=MKq+VT>AC2LxIu$h6Lam~T7p=4^>sS?$3tp&Pv%S5Qn!D^An{8G8
zwhee$41Q6G!7qw9F#~rehhInzuMOACfL2Z{B^hF=?mA(a5=(E*8M`c;G&wkOjRV`I
ze-eKV=v|JvJ=0@Sg3gL)$s<=rcrYTpqp3VjhCw3EMfAhi2fVKFbP9jnkml?QS3Zab
zdPqIPg=6q(oJjpcM*-B8)EX4QZ9IeR56%Zn>mfLi84n{4Ebs^CtY=hoeuM*a9D!;z
zisMW4GMDE*TFJw!Bd(1kWwa5qS`rYl$SC2xI)^<O+~;MV%^b06Ue4J|BoCzZ+1fDV
zJN_mZRy#e?`nCumzkbR%*!}Ujd934;CT8o8O@SCnppXb1%fjfCLJsMp3&BhC^r02u
z;+N554k4nb?)eS1T#$W*QPnoOwxSW53zy86sz9Lx*NKMb6wF^snZR)ZUh8l+&?%J7
zyy|YGZaK6IQ>n^bE!v)J3bt45%tCLx@zS-(Y^ZK-_3YK+UD%9NcN;Auu_g=eydcXI
zKCZ2)gV!txMv~W-k<L=u5bh?M<)SiAVzQCua&t}+C0^-<8l67Gw9t5=K;bbfLo2;b
z@^nUEo`sJO8vG`z4d>w)Puz{Hm`=9t<5Nb4rhF)Yp~gEi(UYqwXTnK5Ed#{qd@{p0
z7(H<j5=I17XDmBOG_`h8SUa!25D&VNr&5()of6li$MlWn=GuV>cEi!VPvYThFx@Av
zGoJe2k09a6G5(;z#_dwYELCq22rx&PG8l;b)*2qSOg4t^9ny_qP?>P_zZx?4mPIux
z-wCZX489#E)tTciDK`(B8fg;9u9zioTXfS*s(cniZxOpC@hU{$yo3Dh;pbrdId_B5
zo@|e=Qf}oDI;&b?5RX!>;gSkPkG;9wJ`FD|)BW@Hd0O(}yln8Q7lsM{$PK@&FB$P{
z6V-}gxO0n<QUi)gYUCZ}3Yr$By3hib1WcX;DkBAkHt}^j(??LMH{@I#n9sQvZ-;f9
zrNiuBJ^tIST#F{O#DHcZRSF!RJ@aua<wW`l9a7);lSz>53;t|*>{?7Vgm1ONTr?`Q
zY_GXmK$YeuKh=VlDbMNvV^UUQhXY*ul*)!|DLFMqIa6~~b82*!YB@X-=boHo0h5y~
za&j{Fo<yInBV)S6^x?cV<YBLseO1#^Zxvj~W}8Y{s;!0X&H^JuwV|9LTRC>wmTLEP
z<B(}}F_BON6gxNiqsh77=rkR2H%MPagM@tJMvgB*LT<_EWh_08(v90|T%Cu5bJdth
zq_q!1H2{@|?%(FQb4B%_Y>$I+8cdl&LsEx28*zNpU_amnf7GW|+dfTB+Mh55iY_)O
z7zH1+gjXM3?{a~PiK1WFBZD#l7NFH43P+1Zoh9n><@hrKwkVC2OnvuJ5gX?!V#J<3
znvEm$8lv{x6j+K)5EM~s8kSjnbg3jO(^NC`^kU5gM62~!X&oA^6_j%Nj}!<O2822I
zc?^6Rxfz=GmdV!cw#qtVX<o)qJwgVGhQyl=kQm?4cUZ<$j*$E#zb8evAe2{8Zv=n?
zkG^@Vi8nU)_~I%3y1N@WG}*CQVQg;fcdK>(p4ILL)%K26TWLUeP;2kGTB}UeQQ=gb
zjQq5ZJ4B!E5c|g5aFyr4dq<p|G;px<ya)y(yzTz^<+JxB$0E6=hW+OfItWHA$#-fg
zVB;~NCfe~G3ssHArai)TFgsy77TM>x5QZA>llTl{Zd7bJ@V(5Qfqo8NRD-)6J>%|b
zbdsE>4h0joAW8w}79_e;iP{qTw`#jPAW6R@!NVw=hA_b(VwH_Ob(~nt{Ie}BBdzn~
zplD9n=LZdn^AMq^Z~9R}KhRj|TU&ZY*IX1Y%AlBY(8<J6of78ZCv>r~qH~dmB<_T9
zGKvS6Dc;!PqaUbY4*bD+&?e8TVTGS#_%`toCh^J-rhYhL4xl-L^BKYgy!YiHL=w><
zK#$M;Wccg`SoDD61Z^9afH*K#%X1yuE(fqx^Q!I3LBmmJjCJ^J?-)3+0q{hd@xXW@
zgXvOMqey8Kxf`KqJw&VQ0My;ffx806J6~djXdsX3;y8jm92%FN;=9L;=Zd;5Za-<?
zLdzCy#BJx1(aSTVm!q=1Jd?c~X}xft8J<zN`xJd<y8C1N@9NX>ps78Ps}ctIWrjyu
zi|WvDJZO!ZYhhRO#AVaprH(o9bKkopKUa~uhX1P!8My+v6GuNzy%;?c&f|E>4pYJK
zED(;t^km(lp4ccRJ*W+Y@<;F&NW~Rpi#3OEX$=990-RdjWz$;&6y@Co7xN7|?L>cS
zp9NDPvSIkptlMl!=q*Vra=O}(o;YNzX^@0;WuN1dC9oI)Yr4So2dpU87mR`zNWqBw
z1PVq2{X(f44e4Jt5eXK$Bc(|_d)XmEfDcFjq~1kW^|j<X7^UAcqUPxIy9D}Oxak);
z=tmFQc=*I^%WE*lIJy@$^=cnTuvY@REr_p__@FUo)kzo(y;DrbMY0C{n1X)s=8a6^
z;IrhT{2%t!pt9PYeS$sb4?a$X1>6IptFnQqv)4NG|7@(ep)kg>9|mIKQw&)9f@;s2
zbG<fN_pMPSB7_+?et7Ds(a4JNO+ndm{!m;`$TK{3JbubPBg!z7S$;^*zmh7S=6F)e
zeo)KVq4s_elj#QPO@(tbgWl3#h0{?rUM8Db`B+*K8OZ^wo=l=&ft2}=GjZsyTuhj4
z8QtE1Hh!8igJfYo1I%C$B={HRpvg#2sJ9?P%6trJMLqthX7N4qOO_sNRm{{zNpHkF
zo9)=NC#xh?tBAuP>Bur#sFxN9!#+K$R|<O>ATkaWnE<m?e1ap>JC)0OUUFfTC`RJa
z1~~l<t@1WE;}tDEV+NOv_=WSo;B=bu3w^VBH|!vA>f<t_Ez9L@tn=Jf#$#KBc|6NK
zSVeQFX?Y>Dmfik}`taMrG10S$E~%5_V05~dmu^fSF;?e~dOaj5TXK@JS}DXHvVT2f
z|9Z&&b?4c?zRmPq^JXj~&zxY+2Uy^<G%)f@<Ffwi0+NK8?-rKL|Hbxp&w9TQwE27L
zp1?d|OUDxMfXSy`M;>(PN6o_){ZIF^$Cs4+qKrz8$aKr42c<pRv|wU1z732P^!SzX
z(##(o{{1fgE%f~_e%oQZ1D5~(*T&|?PS<$<YiDDl^YH%H!~0*~_xoSZUcY<(H%z-&
z^&v2&q3cO7_v>8Z494yt;j}|SN@WQs*q9ASSVCSf+Xr9vFsBJ{#@Wq^)G2IF0M{M8
zQYNoNX_UQla3{f|FB;pnZJV29W81ckZ)}?z+sVco+qP}ny7`@R>%LR<?)&3ab@wy|
z)l)NFJv|?^0AaFcm5xnXEbU`}AYy_01(WROlkegPUB9y(b1w5r|MnTopcmJ4&Kt77
z)Y^C(^otl^Y;g4R$^rXALZ69U*k*wI93t<M0}>p*VGYbp@J>QQCrL3VLy-JaK5d#<
zNI74}rKdve+Ro#GfHT8+n*Ok+<Y;qc3O>AeO_kWm%{#_!OxX1`yxSeyik-Qguufrf
zVi_k?31V>p#8v}J<pdN-MYOX?_5l}VuD&9HtZvDq`rX6f!tXojsbuCDnM<Nc;8F_t
zIUVmbZ7#BX3t?09wY+-j!r=z@Y2V(k>NE50oA$Pi^I=}uhGHSIcDMDG8<w8&^8K>w
z;<A4!-Vb}X{%v%x7{A$g+$kf3>%DKhrED+gFZhM|%^Di$hUK#?b$lPkh(Jr5RfX&}
zXoMXKzjn8OJN`9&Ey%$<4*NYv`Isws{hT?4X?-cpRzsYwRUp!?<Jh%iTAtCiPf@39
zIF;HM$H4aqT(ZrtaoPae>nk{KX&houJ<`*SET5rnb8Z`MURh*ZdS{X1QbOlKTw;qi
z6+`c+E_wHm6&F-TCnY*7#h~MhDXmlJ$|)p{-B~A=?KB7`w4tw|#&`E#x$FAwHC=|;
zdcJv)0VMyyc490u<2DVb3fy*9jrJInsQ%aY%ciW5RWFH3;GY6N^pwGDJ|yZGb1LfH
z=p*V;wOFS7&E)*)j?NNB+Q7o`w~O&K-Khq-Zl%dQ&8a52w%Ta3YQ4zNXe0`3{=hG)
zIwUE&;Qy0F3!cG8L?C{tS7VAz!cEQ*I;;~cMttpghTt3qjKC8uK&@&^g>vtMz6O}e
zy5aw}n-ZmV6DA$(VD6kEb;2VOJ0+7F{Y1G?+jkht(aVgHcVNy|vY_;JiO6z=DZsVT
z$-gEb5K6A><hb?59Y`m$!+KdEZdEVJXo$(O|G-Fic`C}nq2zq{fyquy(Ct<yu;AmH
zi3wOH#v>>aOIJ*eyff@sBv_9|2o|mGqi}VFA@>d2hN3B=B+!F5$M4{H1(M=Aju?yn
z!W?!HE6FRQ&zbqh4|I>ICpZWz9i0~Rn^#wHtS2iZ7j$C^&Pb_8=(Cb6t6t|Q7t|nf
zerN2wM)#EyX4hEC4Fu&F)83J^o^CqPVWfE~8DNLI!1Z>B72z1r>z)4g2t5}fLp+>(
zz3D+Sce0Pu2Qz&`>8Kp)uNlq^Ye?jHR3)FQ@?gT?#@YdE4MtV+M%5#6K$5$Q8*j{e
z@>VXxgZ3##`)V}STf{QMsPG_qJ%rNFQ-MoDnYA=s(bCia-^20br0st9U*TTc{a)Mt
zsAX%yeslakp)+OMi?;jG&G-EBV~O{*+Pw-|{wL7Lco@lgC>i#lTK6Tvpyp_x=BT9K
z4E(t%>I<e^3Z_J-;H#tHD`(n`VBeLT4IY2^QBRAUL-vs=YZ<lV!x>c9cv_YJ^=&ec
zSP^R62>mjf{MtmNWSqv9><Hpr8zPW?aCCL8)s9<;zxlK>E77=@UU0)n?46H<X8--s
z#aR7D{;q>;cbztOo6T3vJS5JuBed@Stv^dwpI+q(f|k9j=wFs9Ku^-vhwd6V*w5NF
zYQN*#_zjkviEY-{S&$;(3M3eXl8%rul-gk{OHpYnX_YR?vMnJm`=H-{AsEoAPIZlf
zQ-!bGq3-J6!|`D-f^NihIv}n?X%}ynHksu-(LHN)IeCA*y~hnR;|Q#Z)q6bjgS#UL
zp4vcaa~4_2!u@4$2c`X!%faoN!b3VbE;F%<l{z~5zih66#S>JXOEgE{FFC0#o--zk
zEk_6Zc#Xj7uveSHfB!bE&L6MsQv-SX&?63(;|P}HhT61&+H^wVXdt#p4VoDbouP#^
zq{Z!1<Az#aMQR=wUKtJjYy2xcjS}`>B55OIjCYMIvAaIp*1^Sw68A#ujto>b9#pmx
zQX4l?TQQN%k<bbqczzmazB){w8bWZj5>m4>kxg3Q-y~2&N=%S(otWri23ShO!~L5c
z#cs%0r@Q_oP(km^kDaW^t$v%WK^vZ9qkBV!is_J~#LertL^&RpE_NO+@jA!$!WW)W
zGrjK5Jvqn{G0i3jJ*}l4Df2DS98X5tEKk(;30zg)dy1zF&I-39*$Y;CmCK3D74xmK
zwG7U>=?=vvR(qpd&q6EntpS1i--}Zn5k5yTcIOkB7Q^dh5=<Jj(rb{+yK*E4B1&3g
zlJ5o>!E)44n|M&0N=O_(eMk+Q84sAD1vjLH>{FwLTIWV;mj1ar0Sa$FXZX3wN9_Gi
zj|l(&_2}O@>owmca-f4b@mH33?`qf!*5)DZr<q%6-}IMyKVza`YE;*<7VK{5x3y7=
zgbsa05(#W&>_CMw!hg~Rx~evmN;~fVq*wKS*OE1CQ~Rxyjhb5Ns=QDt^|=3&Zv8YQ
zYS^aqTS*%=HPKZyldG*C{#{Geuw{g;#E)2%<F0ZeQ`3stFu+K?ODg(>k_ZojYdGF+
z;gvr2_q$Xu#P4lB<R!8?PNJdkr2?7b{`c@cFI7*BS(kmxW`T06Sc_BC<QTnIinfia
zU7c?0pqyb8Iy<$76i$+f#ivK$>x>5+)@aM4XWW8X>7S#9#c`h4`kF$UdBr8?WLxgB
zW_(j#g{M+YQT=8GfgP+Anav3p*?QN8?N)kCv??Po4P^mi*TXPn5aUF!ToLbO{HNF8
zgWTU~+8UsSUUJ4pBP=!m#y}SkS0=w+ESh4h_SL<e#-hM2=)R}fdsXNKvRN9kys#3S
ztRdq>HU3yiFsbg`Em6?aMIjU0Dc92Cazq*Q5Khx%=U)o6SeNQ_D5Dbew<u8YC&6UA
zKL{A*XpcUJQ|d?b;SU6sq!xe<nO!IauV}Ybs$3TTVyL0QE!^lZjK+;DmQSoWmS6vD
z&zGePCL|2np(Nrh>2F?P<e^WX>tQJwed9=GSv_;@RQ7eHYg}LrG=R?sxCv>%H0k*b
z?le|Ssw<wUuJ~$6Vbw9!x>K?(@0dfjL@R=^9a1?$cb<g0Rgyi#QBl=C)j7`?2fiu4
z_-bmQ&AMW9;Z47GI<0pXC$J)CL?Lm+V4IC6=Zmw9zTkUcxA@Y12-{E!&#~Ym!RV!$
zyYqL{)zO#bG%%GSFgdori{C3_F4~>(HeGtOZ7oQXzWWVBS%8h0a1R^1;AljH76mxH
zX=$CHcK~*)Wjjf!)mXR8<BqPK!90J<G@U7Ki6<48oe7&kW$#J8@{GvvtO#XoSk3B>
zS6Wz2VVIpBVx}2lCOE$hkJrk?dA|A64DCt&@kxH#a)0V-znU<jb}T{jo!1HnV1?nt
zaDBGlO%IXR43QU{|ANbF#gPq3I{fnYbbdor0T+6lg|SI*o>A}eyfFjdnMKRx2j+r8
z-=-C17lDzf6*Y-M@1+%`7lEOz6;+Kw-`s?;POf!fLGR|Im9G4<B|o2q8nr_!W)=#r
z2=plDlQX6g5&)qVC3lFiIY!Sx+j8>r?5eg7x*cNh4$?1B1dDMD@T%7TzwY*LcCLyz
z(*FDHghR2`WV5{n-GS9V-}DhC$p+m6N@Gs`33<P~|BdXo($@qM3PO^d#plHAzzKCV
zP+CaR73OAv>yZw09E)(B?qHuMcK?BZ!394+glM?65hYU42J@*(qusK-c924pOu|C_
zP+T0d)(N^>zUzvQwn$&5&_xcbcHw~dE16XSmYsAX^EB?T9|p~55;A{EwZr~5__tdk
zfkzt@UwOv1<g>$DeEF&*l1+Fkvge<aO8=tp+mTxkFF3MPdA^+bIHapkvf$j9;RBZi
zj6hoc|03F{&-zn2&~qUar^ycf0yRR4RXi>&{Bq4=6XmWfJNYW|GpgGaVp)#cI)$k0
zK2EWnE#D~6ZXGl`#ONHOZ>DKkso5|p{Tsp@Vn5>fPmw9asV%Bx+V20lmkKPy`AvD<
zYz!wf%7jIK4w|?mn01%f0QD$MUvO43yAQ>ss2VcKUAiSS!XVL@gn$nTFSB$Eo7v1U
zZJ?seTz@=;3QyezJKd1(K7HGwrI>DeS*eXoV(H*lzhn{SOmj35mw}jQBA9vjj+;CJ
zB;reYOEd+j72q3+DRVVWnK3(>$w@#?BfLv6R<aA9Mnyg_ATHz45t3;R*`ghG6z$&l
z%(avDJJg<}jx&0+q@M*NPr&!8KXiFl8FU+1GIcg2cwagaHEa6!Ewl|7nl(gmCTs}X
zJPV;>J4799<Yr)CpI@*tP#-G1{}N*<#-vryJ6ySxyX0IF*l;3!Lfv0_6bTLtZbv1y
zTKIJ*s`QL-lQ+;%GYr|Crf?){F4&R&GT@6|$_PUEsaM=yV>YNRug@IEo-lXDt;;^L
zz2L3jUMQLg0}XSgeYUz@{~{I=tQ{8wvz|+2H}0sN>-=9#+-UN1@PrD)J<z)eJ)AuS
z85}ZJ?E}Lm2A9Ae=xH%?|GvetL$uUk>F8ct1oN}4f%%OAZQRhCw!9WMtwd@3-Ywe=
z*Tv4YW}@FU%r*yCdNml?WAtr5!@Qc^Y~_YgrBl@97`@w_o7a~_2pg4vPh^YPw`$dE
zWQH1=%S2#}=3h&UhlxP?27`6F6+=XklQd8x7ccJ4$OOis=+ATCmD0OmkLpI??aUYm
zc<bjS50V6SWBMLpzfAzhpUb499Pl?1OqZg7Shg8`nVDG474=zrn$w1+n%rN`3FBH&
zoXOq9M~S!)W+hz70zoT~%-lf0qJuB`ngBDHFdv13DVj!(W2xvEc>u=Al#!4-US4|?
z@TRN(mJZyBEab_Sk<c?<UJC}O*Ep~jfH|)PKhOQAaGd19l-^xC)c}5q<dW_HF%9+7
z<8`nGj5)H^C+v=OGPNh{j(u{qCv49$lNklMX`Ba$g6td=vZp6_-~jFH^qAp=dq4n~
z_;;J!luGX9#`vWAkFP6k>km(!1@hxhJ)t#w3iX~<ym|lA35e44$ILyTU5X;@11Igv
zxn*cBAoq`LyK_B!Vr4U?G4sMYd$wFV^z}QK-l3Wki~w$G+_nd@{DZxkemvcmmF?rn
z?U^#F7=mYN&?8l#KgM;C>XCUR?bbP<c#S(cAFY+Xb8sn}Kmgqk20OL6<oyl(d7tEP
z?R<1l3oB)(j3qw}<$D$c#$|!@TX+ab&47HgS*C{GEPa|LQ?kIQa=L(yVAhgF8z~2y
z7Jyy8l>2>m_qzR7>&Ja12zb7|jOs7+qwYz@5toXAIbHF?8T?{P8q2;spL${u3v`eC
zbU#FM`EjzhHh)*9pi&c5JQsXja2kzg*`v^IOtzB4G#Ib5$z0o?0JE0&grugG<tbm;
z&$*?~$kFjNZV~HN%Aj~!zM;OTA*OpK_=pG4)Z+X0i8Q?($9x@Qe#+nFp&z>PCXjb+
zYymZDpU8w360)<}lS#(6PY#g(V)#q$&ybYS1Gfk@)luqijBEdOLOb?O7?9oHK!ruX
z1~vz92DkXoR`x#w*ILFUj4+O<mB90M)8bo}^+6vSakF}XJ8jNfuAU{l!{5c6a}p``
z_HD;)(k=CFXS8a!%WzwI;dyrUhR6YT78K0Ux2_6%J1_e!k?658E;Za<QFa#x0E7bA
z?xWSR3nBzv%rnPjGj>J2V&&%y)>cc6UWBknla$ycRY^1QbL@N{o#dVEukIIUOh_FL
zG%PEGQsBtaDl6cxr#p{~-8|Uuq=?3>CWC_Pl!@0T_qS*^$FZIw$pq)fTS;a_4e($}
zOK8@?)O)wYuh?xAt~A=|G)={KY{NN2V*CX|72M^#PcY76zmj~$&p3a(woN%_de>#T
z&_@5=$J?!+b>C$XB++*T4XS^XMBtF;L|<_L)+ve6$*d1#7@rcQSF7li$@qaW9|}n|
zsG7QTX}RH+=mD1fCtR1;l?ydY0{uGXxu7k$##N#I4WeQY&ky<O;q(*RooqK|^w?M=
zzCfciP$G9<jl`?{G0aJJOPQ<WUnZHs(c{JSRV2hEe?+&Z#v|*7B&}Ys*L|(F06NRk
zgH5XI!Iy5I0xp|3<@_k%K&wOghB`6*S=Zu)|2Tz1ptb#iC*0Y*SwiTr<U&wF^zrke
zzm^<7cZpD6QX7AV{QyY>N&(u!OUa9cC{)IW@ZR{*n1EDq$YkHK?O(o>g_lwfm?Q5}
z!yo7n&+2vC;q+fxb&8%8mF8wY$Ne@_ZU_~)@_kdybnkE(hKzfAc-cg{Uryx9D-$_<
z)7Ye2pTu09OhLgHG4BjW?&U=$O2pz<YM;09JmGHw7c{ixXcKzB(*Ns#N@<&f0f@0J
zYu~gy^A^f?!+==aiJro^Hngq_w>O^w#U%;0?T9Ef;LLxeIJHLAw3PNI$15{rP%+|Y
zuo{Tx2{{6bCCiP56L1vKzO+dLc@YC4!y}eK`*K+MzKEL}IxP!YBF7rH4D2S~JnCkT
z8iCLf6*>-!JZPdSgQ#DqaPB|fFj;>6b?$P%qjSqcgV(7x956eXqdMF-qWItOWT>yq
z+Hj$1XsL<GX^Hv2;u()c<7v+b-g*frsW?9Df5oaK!v5F5u2c>Pq+JuVJ6)-6LhaeB
zA%^{VWjB(+?I%O*2K~ixW`B6?UiRm3>skwpS>?_0{CC>s<>k4Y)dK4lAy=Dqn;}<w
zp_GBfrwpCCE$Q<NU9vqf+GcW#OGE@7tj?BIpA8FxV+S?eYYstV(Ivo5NUGZ|m^E>(
zi?q;Ha@bVQ@hGBLVZB+d1<h{{=A0c*>@Zqc3^w!V&x<Uli>bz(J#zUeMID?FYV}n!
zLq}|yOBfh|O1+hZ$=+qam6-})+SV&+vr&8ILE3G8GO$El>031xnVYSDIPR8oND9_D
zY%Sl)mX)!^`9vpk&Ux00!D?FAet*Gk=I{IG3{xCjwtXbrDrxJbq4%WbT<v}81G4tq
zd7S!-00#V?;+i~2N4Ow<*evyZijJ~F0;J*cU$Ee7<-Y=YjGc=+Y?A4f`SVuKK9Z7|
zY4EA!@&u@1_=3nh2Yuny^u~3O=m4$D)e#KW)tO3J&8M8ib&(Ly$*ossKv|oZuY4?7
z%aus|s?QCvVhsXiDgKvq3;Em=wH$d{zvS?t!yQ@w?!o9DzMPcac?D)Rr^jsT>L#uG
zZ2M}p?<_6h7L|H~cH5Cz#`u2GUW`jA+zbhN<Yo!AiQaX!UX2^z3P3D8VS|a<TAZ$s
zIR+k+=ZY^>DQ}~&`6rKOoug{<&tHG58d_!b+-+jld#bwwZDNTjqEsHRW8K!Pm76s;
z8!E%SGAbYpE=pL_`N7BlF_=KFf}Qs~XZQpMeQ0G(COQWq{Y^%{L#)HI8m}wkGXHHV
z@dNlPgmOZdarRFm_RG|)H7BTc&kkjT!>Xa;_$dut@>w_OYCFk}2CXy^J`tnT0iCq?
zdROj=pK`x<z}%}LKVMZ?$h8fkOp{7UMYpmaEoxFNqDz;n&(77$CBxy2?nTx1*4B&9
z?GR_n_{P_RH|d;!yva7CBmeTiWDDCFbQdp*uDeiaJ2FTUgjZ(NEIP>E&6)GiGIm`E
zcAaZmL;J&N1C4vt7=HCz3z~;{$D%rc=rM2PCaUO|NX34co_}(sB(PF809)aoHwZnY
zNgc`7AaZQ>D+!)7U0g`jD|pqL)p^(2%$5E>Q&Wgqa!@M?U|XE%YzAe#4Tk(Q>4vOb
zMz@RNuv_+H<n<VP5djtWq_1YoXA!z4<%Cd4ANUDWxOFU2)2@g#%)xa#z$Rsrk-IK^
zO!(%Wzb+;cxeRAH0jA~PbRbi_N`0WOSR<3sT^h+R8@p87vw~m%n^~3JlE2-7zug0{
z?Q@pmg#Xgf@7eI?7O9pLkuj_hx7r1vu8{!mS;)IITX1Y_|CkY%NQk?%>^WQ5pQ=kG
z(AGpyMdMQaZ>J4CIXw3A`O_z+86Rs&#l<UuXsmNeSlAG=P2zP|_dioySJ`6jdr+27
zk}jk-u%F-}sLU+Lejtq5Pk;D8ld$(DaOQ;6v9oQ&32@Qvt0HyP2Wvq0O4<wH>y9ga
zKsEmkYb-=I+*B{PiLU<9jxkyAB4w#}j^-Ct79=IQ4Z=ya(eO}Ps!!zoj|ww|yQr$Q
z$wNtXF-r2wNbF=6RTrhXnsnj+Q8AHskfOyNN2!aRo!m%krZA^ED=FD-1VR4)R49Vo
zw4*9iou37u8{U{)p<$kPE>bqVos0<Z4EkUxewgx}j>)wRX1km#S}VKNOY)3fh78;+
zXQ7kIO9*pz#{5CO7n768u*+G<`orjCCFRo4r0eX|H8P4!v#&I-YvYE0%Ak>)+N~MC
z1cJ(4rb+d6k?^dQn~qKTk;A$k(yPTiQ>6ZBnO>w^^V4uyI;4wgxgX>0T^{01u}aMZ
z@aCn=2wl48IoEWV7;-c^TA0bJa?eNo5Kq*0cyNqoy9|*`L1m2#g|F^cHcbv_)&#58
z2G^>ES~Fu{V=WCsw5msJdxkc9@K4zH<G2;<EZ88J!W34|a>ZZehuiY_Frmg4$l)`2
z8R?{~&Y0OB75Np{it5GZDRJWHH^SupD~{4VXC}z+M39yq{;>lpd{G1^a?mutO!6{}
z4{ckKAd-a<)wQKL)AN<A>-IQeLI%Ct?wP>eZE`qb;sdSQtxcYw0W~}oz97o%<0~`G
z-gi(N9%7`-ali^FTE6=TggCB~2+iA{92ccJ6kniqayb#`+cQU+1X>Q_7#Iy$SCLYk
zK+?(!Qht@~>eSV+<nMmzn6|V<_VAmrTJHG`$iE8vue`unC2oQlV)9$nvmezU>USlQ
zv@zesmW%;ZSv!OlLQ$^`3#ObK0#5Q4j?pyIS8~O!f4S0D5f2n}#SyId4}p*|*L<7(
z%$e;M{CkLREKBZ#1kbi3-k3keExX0tFm9N<b@kVWwu3EQ*Yw9JLauRR0Q$cRd!!!<
ze(Ha&g}Xu`;@b)}1}Xzpo1@or8EgeV)y$<+-o`VlE}T`kpp;a@`%`_>vwZx!Q|1b9
zF0wEQtBU+D4bFU^3fScIe4oTw{ctGXT^{F^LIkcU-V1Qy+aMYbKlw5`iKssjI&|N5
z7Jdkm2sZu&!{$sApNEEyK=(CS90QnfUo&!autR}`4tUXW&sBCTmH~^1d>@MeI;LzL
zesU-uVq!uL!4o^FVEZr5GnAyMS%Yg=S91gYknQa+l10!iLTL0SU&3FR`nR5VSvdsY
z^O`7z0p=f<HZr=wxA$F&y4iqG`s%7*VDZ9ixU3O3Yuy1MFn}!XPaH*X87=&#@LE2_
zy-chXG68b+lpqfY#>+Vujv;lQlH;%BKW%+bMFE`TokAgC!#rO5`U8^d05L6Pha!S%
z2Q>Wz82W3Po0`7ak$2LQhp&gYt<jh?y?}>&T`-S{rw@zp;}cyN2q7a<!9amlaho0d
zK7lVdh<7}p97t#kpyrN8)(=pj#sLi~y17R(=n~-C0d2f`j7#q?kRhDVk)}LZ)QZxR
zY~B+@?;XxEx0yT_TQB+CwQa8KKb~1rCAi1(<&Eyr9_3{g8WkSpDNy$c;WCH6Dv!@G
zcex&OeimOsOLNJ-;8%$b&e&cTv0D2YfY8qRsFYto&r$0&vI!?wTjhc(I-O^KCpfrI
zgXIJlPB4^HaM=D37p9s&4Y{u3Mt^Vq2Rh1C0FQzfI7?C)5qSfQ5d4uc51GBeUP#=_
z94Xx>=+s<Fbq=KS;KPf{I;X^^2F~Z5trJoX|2oKwBydQ@L<E_WrNRKN8MF`u^1>Y~
z&<SLADxzb4%<Qqc5g<f4z8X9Rr0%&G>`Rt^Yyqh`9z*A=Xj=;u=sc-lS<vGUqjdgT
z*YqHYM=*BC3sTGaZZuu*PxkBJcz3tgSN-?=aLe>$-Ct1N^+iI@+hD@v(|7_;wt45I
z@(=<=2ONzUzn5$EA&GA&DA?Va6tD?dPcMY^39j^<;UYZzsE(6+gPrVtmQF{blQAmU
z=-QE#|8VS=lgNBr-%=f{gXCU}?N8)=e0O@~mJx=0bn<J3&h2=$yZ%G8pP%ytttXRi
z>S0Y=D;{2gqert}Mm`K7NX#<FnPqMbzkR5VM3@A4&+IHxjjn$|#ojw48yt!-AIF%z
zi|~O@7@l&@+=7SLdyj=LBqv<KX?6loZTT?DOR-)M*Frsv<zACkPc7rwu-8XO%hSQR
zctRAxSg;3KrP^<7Fs7A0^%U~{gB9~d85l)sEX*h95M>%FZe&FC{^*wKz`>2cA3Qj1
zNQF<#{u=K&5Of`JMwG1+q_eT9FfAF|Iq|FJI@11Q6@vm>5C(D2K&hRZI2db}b4sBU
zn90-Ov&biVC??$?rER)ub_ug&6Spd%hl<L<j$CXrA=60`cwvE&0dX!a7xhSLg;{Sq
z9~WF5uIc^Ci2Qw+$ccKa-CC>^jfY|EgwAc&5`VtMAx*jW7dz>X;&0iVB3VR)i7Kn0
z?>Us8UshvQ!ZCyYT-kEs6^1FRrp`rHa{W#PzhCzLez&BF+v|7Pv4yKnv5Z{aN6_0w
z@%lHUi#8P#&Gv<JOVy|Dqd-U?40Nen+5)g;{_!0Za6haTqN@J(v2RV3Dy?g|zO)g%
z^?jd=I;pJ?qM2@8{MSMNY3}R@*?-*p_$Q+y%elSDZ+*k&H0qU&QV#0eL&Te~3?UG-
zpY53FO?QS@IWnG9B9&oWZ(i~TAqVh|!n;RwOamA5^~yV4{l|P<vF31tGf=pSCFk$p
zG)#(U@hO5u=ZNA@;#5X#x<uh5=_I2FexT&hb|bPn+(H<q-*xi+d2_P3+px@tn<k6A
z-D?<ib&)Ltj4xG+g}RM*Kmy(u0=z37Kfo0>urZ1tpUkDVf5wNG1u&jKb41DW#LP5m
z9Hi<QiDK3Z*Agea#2Sg!+&9RAP%e2y5^!ocbY%Sa%P@(RdikwNrs1Zq7=e{L9$M>c
zH*)`F*&LE;TTlV6VtMswB|M_T;p(sHRi2FaNU=v{P7-S+7i?E*$sO(E&W1ipQc9wn
zN1gOD^m9{^3SJg&2Cv%59wum?BeErQw~!PGV};Bk@dTU38t}nT6!TP`Mb0tNi_lN1
z6-^;pcoHSgrP2&&Bm^nppdpY(PuVb~b%aa9PtundY8Gg<InrPbMzK&SF)cc{{M(1B
z<2p(aQWMC9|1{nszo)?Jx8jAL#sO`*$RN}$a6BU?xxFyMxk(Bsk)chT)ypJL2G(fK
z6IsvX-@eoiEiz|h-;ZT9YE!tCr~D$y^M$0P^$S!|h}dOa$N!IPlJ9qdPRV10)cuEN
zx4kOhYue9^=M#}}o1mL_PcWw&@V<L;^BSgpy``jkl9nRK_wmm04L*Wz{m{M6+ndz=
z0I?pln>7EZnLI_2#FYWj@7nk4JQ#gx-sG=bOQEZ^dcnuK0O#9^zu$T%!7KE4bZ)1f
zd*O4~)X~t-T(x%o)b;!C?B><w#YgY@KX{M{rmwmGqqGG33A*K&6-v4%$kZFIr@%9s
z+`_kwn_t!r6Q1+#?W&qX^?O%xkCKxXiwCd&3GFv9(i~wnxWZPb(Z?NM&V~Yh>;KSM
z+FjtyftVo)2>SmPT52%y--7M$d8d{e3b*IsZH0muSOjvlTuB<Heva@!^@A2CC0y?=
zy%MSw*81&0@VW8Sb_u`twG*Vo(uyp*)%{fSu@Gg%hqTAx#1e%5_m?+v$?<~#l;z^x
z7y5KZz|ZhIx)7EmgojN2>CKaUQBxadxqQjRtqqxeKC|wH8|>hLlYgfm*WAVJ{rz=_
zETNvmR%+CMZj36FMy5G{jzx!VM=eekZ2-!-vwX0Ou&Q<wb2?0Lbqq{!D-p;l#{#pa
z64(KZRus&dek_SAR3$CQGTkw69yX!>@i5ZOg9jJLhCW2IXDf17MGhuA7;_|+-t*DT
z%OCQIi%fCW#Pw~aJB|O}NSW8iERe2z9({hJf1I7V_8X<$KOdZa<#qpb5D$d#P|0$U
z2MZ0Ob37Rw@6nJ)eDVz9WpGU-uy{Yk#i+g$F5ik3|MHkTJA>kRL-6#Ffl<2|9CKP{
zVc4#vs;!ZZKyeE1se+LCfL1={ZLpt4#uT24g5N>Ij^)7y@5Z&9Nx&i${e1#Y^(Ol0
zT#`ib4fSg8<IZrSAi7LwN*JACxY|7Kkn`q<^1!C04@q&%XZdZHw4M1u;9Ykyapq7{
zf^YnVqvh+47Y!|#Yn(tRi;GshXde`lvTW~&H1CUYjrp{kXl!vKd81;d?z!o-#xz3D
zZ$PEk>KX^Kgc%}Ae_jz?1ScGnOL`SVk1>oni9#HeeFS|GFqaA7*%#PvcE(ASX^X%Q
z`*&+<(Qe{a)^b}Dm6T7{(&?Onq0MSRn*tZdH@w=0k#$pG1!!{B#Z$q$17rMfoA_)Y
zAG<W<ydWv=!qXG^s_<^fJM2LT=Rix&FpDQ$(d7lvH&uEUex@qc9n8*ajy8J_TuNRX
z#aJnjarY*XxPjbUy+1sU>mNQ}eN%=YqwqG;U>Ufr9L@Wg7Y~I5a{jKweGJbDPwtS3
zrf~rJHNIXZ(2N)T!cR_AKzgi}m!8CCDKgs|y2!kjO*0dzSkOIxPTYaudB(0lP`J;G
z2i*AlE7|lB8v#MyGN_xxLxC2kUJDx`n2pHBczWuuK`R^WRC5K>9zlz&{p7iUS}15@
zmZ7ce)_#RML!$`jq^s@ZH%?@<44jC9_|33!DV1UcuEDYiE966MUmV?f)aqdY9H<J;
zKdAgIds@_x;~)}yvP+d+yvrd<u+lA0DU#Fu>5kn=t9Y@U82O{a;yzJ3Ur;$vU(lNU
zspuG;Kg9DUWPYRj{Zcw7dm>S=jQBc-M*Vmc4;^$AF2LVydj_e6S`$hUsx&N-gA}G2
z!PfU+O8kzmTZpJ$ihg;La&+!e^86^6F`~Y~zLc;D<=t+~e!G6Mt!ql;mlY?;z8_Z7
zx)5fdPffJ|=`_*t#EYdrYkz}zoeWF7Th`n!p0YQdqW{c5>t^E)wg+1*?8fkz;;i;-
zDSJ%_O%sg&nJa9VU2b3D#{#ZqHk+BiQ44??1=0Uu!n^W4=M>kP*V#8!Wy@$TUZGpp
zX0@*@azxw4)OL^8k54zfFwj<Y;?UMU*jd%IjqhO$CAKpHm<qL857n&eT2r-NLyok}
z$T2eN4>JMoA_mKex~l96%5vlp4v6L7kn=zmJRtkYuM9YwJM7|IPu_MceLG7~`}((L
z;lo{f^w4lN5M%K8xK#`O%OA|pTs?+B4h5{f^JsbFke~g7l(t^f5Z}De5MOlg8f1*X
z>`(cH=S|bSKyoDxKyhIf;c_4orWb?A0aFgd9u~T++n5+g8)~ClNJGpfe7}n*>g!8(
zOl~<OI^g5oH<Yif2Q%`qIJ3mheshE6bqr|k*4p`kE%0U)`<E!^*)fTNs&WHjog8;q
zI=EZ@OjJ0zr>Qo=tj0H5rQPvrrFE%}SKF8@jJhco59vljPOw~F-LavxV(83oN43>D
zam)-?e$~WG*v6{;1vg7mGqWq`26S{h`+^9fV{$K_(uR#RU2I3y`A(a^=WO5UT0E;+
zj$driFLS9@+0yZ?vt@e@8tbaef|%1?(-~!fY=+Fc<f?%SdtJrJb?Wa3cGMk(*Hr)d
zgIe;Dkp1i70N`U2gzyUd*|%0wJGHOaW9-4~&pFwM7!|`6>zfs6VBj}$d9sj83}V~a
z`>bR4`8f4y!3DCC?k>_%!j?|X6~x91c}gX*`c`1KxiPtZY!8nVcU`kL==-sI(_eY~
z*}uP^CQgVmw)XIK@#AtAB=Xh0<#W-^?R{JCx8mpbaCKbR&uOm*Jh{~oIOGQy#m7_p
zUO3LUlJ*1~BDOO7{PC6l8rR)o$7Pe53W3{HRn<~IR5gvZ#<z(qEZR}$zbr(*9p4~f
zWq5O~QMP*zNvwgBXo3yt=;0PHqV8k1DnLb4mF2yP>8ig`3A9N<X(#bC6Ru8!NM{or
zLiz}z$@q`Tl4O8s0D9>zmKT8&0_p2}=R6s~{60lJ)hf8g%CBHZ#i0>A8k*2_C(hu`
zAj^atsF|Zbg~0R!8?Z0l{(C9wsah=XPDK1;9O^79IVMq}S4FM*U{I^}FzFjY`_mwk
zNkGX<58iHGJRn5D!IvZym~$Ge-XBkyyoX^T>>FW)7)$PqFioH=p4MoLeO>TLtbf=9
zRcw!n^Vp9Lz3wY>XCW_gKYU>r5B*fzj^!odo%n=)C}6zQKk95c@x+|jKPuApEW8+f
zvo+^vh;LQ}2UqfU>S$F@()ASwJd|AeW_wk_c9o<%vdm*uCeCeJf6iQEC8hA(DA*2w
z8N61}P~+gc<i}KC^)demkJ+Z#B;HK}Iut965M9H%r1nZvbBKq*Xq6#+5-u*(&P!=B
zrIvpDYPZ@sAzRIT=yi0>T5_|-&4L(k8(5MnB#+3WXfemV=`FA<Wr~r+OsfFyMeUoY
zI-8^lr75R`2R%~U@bu3EE3N9s15^E{H@hxWa$#pru74Zvb#l;-O3pzBDbD@-Oe!f<
zVX5<=*Tod_A3|*}4V=c`yuRr@O*w)q-u=mc#hO#jiO*w1wpe@^RHyw_PGMR?ztn7t
zo*Mn2Iq?h|`6Fq2Yk$oi!LQ_oJ@}}UldK4fj}j)8UmRNcu)s}BW)RRq5LG9HjxLws
zVdAQD!jw-v)k?V4>CqB}K7a4Qfz#K@7Xys=ZDR5$6=*aiS%U$T34t~=%*M1Sx|qS7
zB|NU*YB0D*8;5s1XCq+H-Vk|2nkxQhut{k)1R_KL-$xoC72le|V(Fg%Ep;Ax$Lr}S
z1U#?cOS!G$0T2K0Ajmwu6A_xRM9|34nC=<GVFZbFBay`0Je(O!F98{Jm`rXS$t-!Z
z^Yvs)YDIJuZzNq5N2M>>^@JGCKUTg{->?x&$^xbBNEtUoUIe>O80sPU2`-8Cz)-jo
zqu*U#^{(mFb~WclO|9GNsuPBNw<tdmu|W)pOO`Z6uNBZvN4PpO)sn_9waZq?B0l2d
z7PB9(2pd|+96L%oY4seRu|K*}d=7t6u++mKkznjlES^>#Gs#P3Iq8)B4{=P^j5r>A
z+EsPah1#T5148rYIQbXrypbg7?(`@-w!f5<T+>Pv8knfT&-=7i1mh!9$Iu%}Ry{3B
z`b+Mzz|ctn{dYx$-j*hl>bKs2*X)j^h_$EYIBAP))8Y=)l}Q{BR#?0YuWhWwtB}TA
zr;$#YAH1g`a>*xJ{3_;1_&ucSDXk+Vl`WhJ(SV@j?UkaA&xq!T%a5Tv$Y;to%vhF0
zL4!Kq<{@}y?6Hn^7<feo*2qEB9iNa~OV^p%5#LVK!MOfoxQXiN5@fqM|9%nN=vNuS
z-yX=a@6B|C<PR;{oW?T@!ku?_N2;Mp(XiPxk^>KpL*F&xlX=$X&95HEnP{ZGnFseh
zhYa)Uslz6(-X7RU@Ele&58#9zPYQcOQX=RNOv@_<{A+46><2tcP-@eXltscqVWC-F
zniDM=XJUl2?ZcTFt(A17-R`@^hNN)E$^KVn(teFoK6XUSf4-Ej-2z#K<FkMjKog!p
z>uWd9=4JL&bHWdTy`lXv{`;Kxd-*y0`@Ek~oDos~VY?gf+~$PB)JeZbcw<v@@dcb#
z!70q2VU{3zKxUUtkl4rNc7)tOL)abY*roD5Il^v6$e<HP`_E}z`J4GGkc`Gz;hc$^
z6p#gUmu@yKkW7iskJ#x0@(bDkKbm24a6drRF+Sz4<f0H+jQibL5-0|-kXV*~%u;w3
zu&-}iqttdhUQ`r^$Q1%b9ShgSpR_;E`e@urE8Q(d93`{_=sgU#Vyl-n+ZQn1UwD+n
zE?i3U+iX#s@Qvg;PT4E_tNLd)IQjhko+Br;bl1OVsh_%yjCCz8*$7<AS9?M|do~^~
z`nJA8FeU<XjbI!%v+$mIP;MIkT;uxrBSxb<dn2gS$krqp(|8gF5pnoSjxg58gZ*iD
zaV1RMm{2d~e;2s*V<tloj_;tK)m%Ci9Cmz&2(Rb&8^%2^RRu%LsTyDbyppV5%Rb_#
z{F6KJ?};ddFOjb69E9&yFV<Cj=4sqA)sLPB7tR70>cES~w_*;SF_3<0RjVT&f#rPw
zsWbMBL?exRM~uz7f3&MS09%b({PL0U%*Ho1DzDG7IpyD`We&-bAs5H<QhaPh2voTY
zuG_dyBD}Ce#zs&*VX>Fuol^XjCfb(<{P(!E7}_rF?fXS|TzZ6g0UNy1fo!V3i&>Dk
z=~J+9W(4DmZKShMzo{Ja9aW+sB*?9%M*VOk8eXfDOXpX56n%iy_g-1^geQXmw8c5?
zwD)<stC+7R##!mr{#}Eke(hx4?5m$eaCUxE2>t>B<H4n>el8f4yAR&coM=p{LK`k=
z%X-F3ELOc_8R$y2jx3?o+=1Wv9HxWixq4pa&{`Lo>|ENpI}Hk@a1v+W%e_F=Uj$nX
zMnSsQa0c08FnFtx8AOMr4;%8K9&;he_9*(*^F&&&_>fFEz|A9nG!SL)$moHN03^(i
zJ+&PI5$SOOoWe%Ixsndu#vd>|{*19O+mV62;%Rb4uL}8F5NaDpO#;759a*x)@?N6t
zw1l3K3z8wEx)1&eiZS!F<$DXj834nF%=7qj$0`<6gd2y8M3w@0Bz&78Ipp`w9inkC
zxCs+5J3z2ak^A+b!^EwTYCnjjMdB-%BvOHj(2EH>X^10Q8fk2M4GF{ffyG%OOAp3Q
z_|nM@VTR)u`t5Diz!@_fu$aaz@e(}Yr$pA~|B<7hOq39q-C&`%?Y7{8e1M>O194L?
zfwpD<CtElOJS9n^Ad!$%h^So5RL9~NKpddgYyO(GzXfCp^F@`o^cadx7DJ)>oGnmu
z!TV3jB@qb~j`{if7o`&3zSlUeYdNYP1JCI;4Izb^ZA4GNbSvhh_A=6;W=tPxkn-3R
zD_bRTL$6H0@vBrNHM`>(;V7R(i6%Nbq+xN}RWn^fhytq&a^=)W=m0~#&l5uX5qAzd
z@}yj!2@^7zd3D^EK0Mi>B(X)5V_9;U&R$Ws;syiLzB0WS!FSJy+XKNcK%;C<{mVXi
z2>$(<lqq25;w|;<?FQN|79_B?WGl7%lq5^qN>L6c!a;f`8UJzvXQCz@PZRc=en(xn
zv^g<d2}YDUS+yf&V{TPyg*9}m>z%kAMk{D_u`X{lU@j-A&~a41{W3B<6?LQ8Bjm}0
z$XHj7<WaiFw^gNECzA1ZTYq~|kD|9LCE8yT;8CjmI2#UBomY+=i1w^iASM{nWciUP
z(a14VspZl-ieakZJdUts^`-Sus*{FEe`&#FcPnV%2=_-X);f2DL@rri)C9?rY706S
zqixxoVH5Q}7v3ddpg%k2&Ebb2R+g|ir&`gmRC>wnl7^MyLN8$B7Tawfz@4@nCB>E^
zL_bQfVYhG)ZZqM3(&-w+ZrHa2Y-ol4M0WJf3=G}ePhPNmZEO@P#Y3ID;K(mceL#<O
zNTPTT*-}D>_sNz4Tx0c!7ydB}o?#6OC4LRqSX%?c50d4DlY$dF5~^gaVR}(#CyF78
zd8BG+E@8{ZF)25@goS>a7aGJYL5D`hju<f+c5{R8<wuvkx+XhF!$+ZirYIV|3eS&E
z_rBcweq#D3fj7Cd71pcEm^xJ};-E#FepE+?5JJ1mK47@<J4u4y4RnGxGeOJ~!kd{a
z{K6KWB@QO&<fC4xNB^?+oyRTVkH2<5*WsZjE7c04e`tsK=xuGquiJmbO0q`mpKgF=
z*%{UFqaaGucZ7_5s7>43C+EJQp|q?+i!QV;PqJ55zlDaH^FVrsJ{cF4hfkbNpFV}*
zNYG@7*lnbgE6HwthIvljweinD3^IxF_wOX(rVPY?_@0fXn|Xkk5`oV8n_fbH28GrJ
zhIMg&t2(yef^-t(%!dh~jCLPGW)Kn24;NQ}@GX)oaKILMyZj~5g#vaa0Ea2Jm<1&3
z`fF;)MMO!8M3dyIN)cQ!?&HB?V5<HmyTdnmX2pRFG^ap`)ihhX0txPiG|1%=&zha@
zS}gjPruUDq=XoFtL#Pqmt(d>g#7)=Qc#M|N%`Zmt>e0bqh<H}#y3y%bh`Nsv@JAfy
z_g=nl!KbapPFqq<v?=pX7&7Nt$Yd(1AA#xEX72h6vbKG(+Tw|U+uMFf5@Kj%Xa}IV
z#H`!bOMCjxLf}BOPO`e(3<;2##m&XVMb*Woya(23QnY>uM}3nyH`U`JsuE^il{`mC
zMB@HtQg|UIHR_(LJgd>_zX2bCBgNJ5cpo8<Xwdp*(r9U6w_x<TBYs`Ya{R_J62mB1
zuT}#=YLJB!HrgpGK818~O*6XDSX`6JJ{btf0*r3z+7iiPAJ^1)uaZT(fearQg)9eh
z5K}ntNu#z%W|sP}4|89RIYp8IxPqJ|Fp+%zr%^I+iKWopx=7eN+LO~==Fwh@iHRYx
zNy;#QSx@gtHnXztdhm?Qy0LKL_N!~+&T0@<>M2M6QDKOT7E(4wg^;0p(ULNcwnsmn
zP;6Ip`CQeKMWj|oHv*b|r%LoH@bc%(v?0!Rtz~RGd=k{b%zLCJ*!yra!q)lzz9K7@
zxDr&uB!BE)9J1rFZ^#Vh%U?PraQBD%R&If%POed01jvkn^X2CWjfu#i1fnJo&gAZ*
zfrCsz-K2pnz`gYORR8wTg_CKQI#HGUsMx7GaQfedv`U>A$E^a>;l^9xSnKn!BxN)G
zXNjeZMx;c4@l_Ddi>2}(&26d$ZTZq8{^{LtU7&g%<V@0am7ia7C%4__DZuwA7Oted
zw=xlIT+};`$I`nC(bm>UgdMH@UC~h=hi}Vc58Ey%V9e@?kF`!>kdUXw#QVwy@4ADD
ztqmpHQ^uN1Ok%Q_Fk{Oe)YDFv;3k@7nuBBlI)9X+cA63Jp7I9h;O<yZygIVb;wDYR
zaM7tK2%kA_n033PupPGVzT^SN>N>sbaf^;ubVy2gqgU<LZEE6Q2&B%bV4Pk^D-TSn
zXesvs?1nh!cZ=>R8hN+dFg?A5tHe(?_oy+VA<&yCt6Ye@#62RhUo*KAu%vn^9dhW`
zDR+B=Tl3fJ;leJk(z7jzA8mLw+gSUWP1a$qDRT(oJ^pgXx*==I0rHR7JcIj_y*fFS
zx0$hd1&5Qf3a@OD&hshUzhIaC>nHI5hG)D8#hkh6v*cP6eaV!Ae2Kr_t@<M;!>J>V
z>U#k(eA6$3MLR*#YFpN{@ePeaWhe4L$|yt>h{{bp@x)|`hO6|(_z^oNRg3zOu3I>@
zfU(RooPLwui{{;P>?rvA6MEWBkk6XPwn1%@KIljLHLy;AV^<O7p4k^8DTh3`J=T;q
z2Ef;{b)Lx?N{{)yIm7Q9;rY#flA2=+$wB_4(4i2%+U&~y>V&IbaH8rc5X=TNw7tz9
zn!j11*bzZVY|=|`RjEC#BUQNm5E}^r>yx|_HG%?yS41>{k<|O-#y0v}($5`6=f%Er
z^y|zT$ROFC#lmr*kqbmC*nS5y6K8z0gJALT0cXg<MRfcVk+@xdfZFPUh*Jut^;*F#
z-0d9fek0<%fnAYbxEJNbrs*^mLl#tKq64=^(xEe)@+uY?O#H8{Klxh2-y{|{r2#pZ
z6-i*;r902R_dGuR5ng@hg1a`6s#gUL`V`ajUaG0GP@9EVWM}IHnmPd`p|i4lOcr~c
z+9lBos4CqQ>2P@Ka}SNwrl3u~Y2at#&@mJSPh`1rq0a>!W<-u>Wl`#L4;zAV#rBrO
ze~hoP(k(Z|jQP5`tXr5-mHORZqkfz^`IT&My2&R?CpQZbM7p_l?jd&OQC`wmds)rA
z{3~;m0kwV+yKcYN2zD<Par1FDIn{9y!7fyH4Ps)!JV?Se3sk)S<w;gRz=bJfu1ZxF
z&q!|Wt-XdBN2{~n60%#N3$LfTE=3baLnin~I|KDy`TvUHA)l&Z&rGs;(};jiz?LLH
z{l(l9bZi>T5z17VIx2$%YYl*@Hf>Y)IhZP&uTa$H^;WiV+%5wbUD@nX?9Spp>u_37
zTOIvQadNe`n>EDHCLFNf_4*tdZh9!A>84S|r@sVqk-JqWO2>W`Od2xT`urpXE*9Vw
zTt-kiX3$@@IR$~``KyCu_eTa2wg2~MutI!-p$Aor>6n&=r;k?*J@XqNe5p(_6#N0g
z1J|ID7lc}7%|H6xq^mI$vVaQ-&u;L+CRt|YMRa(|HB}#anlj;+3YBP^wOc8%JtrP#
zi-H_cp9~gS3%EtZLtW@mtV?{cV{K5`-)p959c5vI-|0@U^Uu>u2LrU)08~WVCRx7Z
zw#;w^)6HQ0444uX*^LF$5~=$ARQ=|Gf;sJXGF6OfbDifQhn_lP0Do6PjyXh+_!?s!
z-1P+H%>?A_1kmY9`?!JfHJ6WpGU&xA$yY+P(=}$-7UPe)1~&;0RIz@bTR`DvJVhrw
zT8b)hg0l~NajnyVt;%><xGW>ZO*xpu#XnE<zuJ_ygVk)9+OV1i#FG|%W@fI-F62+7
zPT&Tbsrai)UUvf$h*{Y8rXZM5r+J{;O@^j^m`U`wJ20wp4q(t{B)!)EEDcu=!}7Y}
zk<CfDfam0Hs075{D<mvrsS!p96{@O(O=WJZe4Iu<n<gt{y+N7luW(+Dz+2_d+B2D;
zRE<^2(@{@tv3L3VOrQz`4V0IZ$@_V+Kj9za9<{J?+x`({2t-rV$1c(jHqy?R1YYgk
zIT+c6&YyvmY69VLwdMhoKueD#jJiH%vPoT0+#H6~t#w1F>MdB||M>>vJ7w&>LpdJ0
zE|LJ<)v_bqEpCD*MoOBZWJyvVT#F}*EgneZHlI!pa&I2eIOP|z_LnQp((jI~cZ+c_
zb+N&ECQ$3IAp;R=Bt$_=Tj@6r*ot8|D~HT{z%5v?h$$&xj;|eS`?eu?v`52uva|gh
zVu>c$UGXv3AqO)*#<**<&Lmh(K$bJb#P(PW4S!!hU-u_IGRmC*+`h&TWI^L8mUa`f
zDJx{q$a*#XkoYOVP$F{2lRiGLhV>mpN5O8i7!wgN9baTc#T}SWf`HA9qENO$05_J;
zQMkxTH<Gf42pk)(7V&`_O0?+|u~Pp6My0EZj?qr^4*km5Feq(Pi1y<zP?BEeFWH;I
zcBd7ut^~#&G2@adbp6nxn1i%bPJO|YsLCAo;lp^<5&{-w-H&f2{`&*TA~;SjxB(+P
zB;ODxvC>nCvS5xig{2|gWH_9#Wopn^0rcvc>J+f3V(+e&th#GUT^U_mnGpC=^6@!-
z_uTpnSjKBQqno|W#1|ws@GZ%@P6O62gp*qiZg^Y&Ro@ZSTmZK-zniBZ-6?l0<|44W
z7J6YG2aS|7bmAn(5C=_p1BthPlm!)s2JMi3Ljt{w=+&|{eZ%()0oFR~^ebcnq=1IV
zbOx}w`IZTZS>T<sEaAa-4zR@LPd)sWXi9X{R{~F7MpOU7$tC5Jc?RhS=pq8W9;Cv1
zwSg+E`LcohUMp^XskB5;epWT4Wh+0XdFd7&;eb$JYq$Ssv3s!`mKub@uP)$%_s*Q;
zviO-l*WN%zH&$jDy`sEY568P*=TtMKN7gHlfv@$`vRe<c4ykgN?-?}V1A}W<WTzKo
z0%@TyEo-BI&A0JwuaVePH0?J4pBzpsI+d`dvK}6ang_tLIOrD{o@q}tRtm27cDt3+
ze_3UsB2}^!opY{Ry-8=Wexnyejo!#oi?RDRR7GVx*fraom^0ykUjv2n+3)|*MCTx)
zycA~f`5qG$H*OzrbBc+IqqR-|;7+bCa+yx?s|;r@YOqtI$E9jnrZJ-Q@xX=1&ISn)
z`v4gRz61v6srVJk99@dHgR5RIXUHAH*TmFtK^lIHKT4@<HxoK`7)hiAkd(QWW$As9
zM-IU!;TVoA$#7t)A==TxRL34x^LnHJYCEouC9t#8L_nLNKnen-K>zyq-P5%6x?uI%
zm#5`>l<c;eF>AA6%;X9W1g^us9>1^u1!6#*zcZ5y%--8*Ov4)(+S?<sw?{R5d*tlx
z3a;YeKP4_8Kszkm#|#uL(&Ye>EKoOl7@&clV*QvDkJNLLUnun=`E#pyD^ZqL1$#&j
zxLS`)l7ir@CNYR)Qw<9*t!*DDabgdpFA+b#|9@JYxtUw@Q_c0x>{Xag8}rCT4oW+I
z7q*Kqg;gDYdIMDZtIU7kmTcRljVg{=XWP|4WA+eY6Rh=(|KDs|ccSmBcEloBTx`Am
zw77Tg8v;M~LN;iJK>0Z3mdCX3aJ3SHvWmq!;+pD86u}agT_w>N5q12ga~`AoTTIWw
zuD=$2L3PlqZ6AD1-h^f(N>JB{j3V(}!>c}1;CItlbMm%x+*D#|7GIZ+MYm~RW6-Ni
z@f={Z_iUw#XTj=k1$Ni>sb<N1C?v_6Tbs*Jjo$Trz78+lbl(SLZnYa+Y*Xim2}R5{
z&H6*8q{Jl_>3L?}dxWp|-rz-42~zOBdeOuNP)j5V;ssVbn@hn`Su2t5+e*Ce1#ioU
zf9oVxpmX(QQ?rQp8WS*3xOtqb%7yf?k`+2DJ|7&pCCp3ivSW?%6Pm-ZM!C<J6UNfo
z=~d71i#n2?^rpFvBKwovl-B7!4Xq`ZC0`nMR_dHS%95jfXn6Md*S?KD7gsP2!dkh+
z3(#iJo)gvE3SGUe<JiQ?DF}e*vWh6Ay6!5tPTrRYfl!#q42<*sDCqRq60~StFtC*r
z*3^VY-%9_p$t9-3#ES@yH@Fq%O@N-#p(LmoFH%U+yN*p*GD9S(dZ>npM)qw6C2$a9
z!RY1)?Mgdra*rmK&zm_FmQeAomTxI+%@96y2+CMhQlK4D_9z}zC;P&CtWx*j8)9OG
zx1nG0z|_D@JA_H;C8WYv#66A4UO2azLvZ#{hHk4`7O+3<UB*Kcyf(?%+nfpMQ`of3
zlq+D!GMZ7Y>XQOtxCg5>8eAwOo^@gOw*!><JvMN>LdZHJI=V8pm30aQ+Adf6w6r)v
z97s@w;m={QVPvgfBiPz$c(PffB#*fYWI<PLMvp|ZI{%j2k(fL6=jqC&KjV&_V}7DO
z9b1zA#-Qm^%n@mhGAt?>QaG1(h>&;kQqEF2_EcdpViN4J1Y~zc0v>I#GgA23MUDNL
z-kTs(sd<z@rV?bzAeoX0B-sbq%)-t_7RW0tGCLrzBuHWb=+U+vToJ@8f=DiIc=4|z
z=t@`rb0AP#x#abK1N`C0E?7QBT&~d{>-67M_Cb1Y1z$G6AAw}_r3{h~c#%qpK@yA|
ziXi^F*TT3za%R#uAgb8wUhT&!zw+bCU_j2J!}q0DCLkF!T8QjA7$MrxSy8nk3x}sy
zX#l1>zOH(#QgpD}(Fo25id4l|J+5v&3L;D{gDEkLEMmz-<ap4*ccY|0b881HYEurV
z{FV+%JghIKIoWoD%k0_)PO{Z`Le4swE{lxSm66nB=&TW_sWKRq!fGjz#3d%fI*+D9
zHMCBr6HL<~)=*Y={c)O2FD^wW9pII3pv>rE!m|-qx8a8EX>@=FV;=Q%nVn6V^dVMu
zMLT*r{p_0E{O3O_rYRQZg(LyU#>>CPy|pbU==ZEv6N^xzXfjCNCIh~m<SZ#>9<!>E
zK7d{1M+$xZf9<3l=C>`<_BQP%VuqB(ipDo%Y&2`NXrfzKxcYuYt~vAQp^hoiwi+!e
zpj8oOftYl_C~_N3LFl8{MOq(4Us6(prOC10O?-Y1%cjF^H7MzTGBEA#f-aF7AGYwH
zT^?0chgANIZJO4(a?sXU$UuQ4@s&CFIM0SzdVzV#-o~ueSy!P>+%zXkap|G^&?N{o
zc9U7{{#MH~Nby9*j}kLx=B47mtGL^ldgP1Hy6{E#l_JVYE`fFi-O9LsGB8??W@Kxi
z2Jj6(B(vkrj%9fNUZjv?UAhl@0O)SQWH&O7Vaj=cIuSb+#RafE=~v`rj7uGtx=!$<
z1IjMP<C3pVcT?&eZS!CPKCa<?OZ&0A$?+1nH6FHu@7lrN==pz1;$&NVh^X16sp8_(
zyq(DUE^NUIG{l<PIyT$KR;}O+*5=A^%00Y9Z!5|lB$3k5?!$-y%o>0n{eVdFW*_Ks
z)TWBwG-#kMyNEYS0ZH;PQoB1?US(&pZZp0NrCd4bG&bj%jnJb8!(xopHd7-2I8R~Q
zi;;ilJYRI@jFz`^(sIU(^3JKxWWsj&v})%_vP+rlQYO39Bs<R7KxCJ?WS0`zrS3{L
zCM<&d;R!442BqmVBP?11_}oW0sTp>xC(6I-e27eqp-6ekMb?rDAIEr7s~in*tfdSr
z%!UK}9)GYK6k^oC<q}ELjjk14>p#+RC~+Ce40@hFL);nPKOwQurBYqCIK!sKdEH2V
zI-%I4CduOD*Y&C5;7p<+(RGv<6HcOKCa|HRd8qiIV4Z=Ef}u$|c};YI$wNW9OeixP
zOoD`LOvszwqA&7=goS2_PVfRL=sGFl>U2J=4-7Ig92zMd6bg3rrGTAmhlMp9<uJ-G
zVm-<{rW%0gpO_BF07W0<rf;d$#eSfrlXcw(CKpo)LC`)}Oh-WXx4}i04UFVRc?5!!
z>Fmd<45fh~Z8<{0JdmQH;g8)x?Geir=gLEq59Tc=R|?JKfy%cS&^98(Z?%K_;w{ef
z7G2hy0ctN!%+0a2@j!g_aO1x8&}GLEugf^}vDx|VTXLhu>;2&V#slSde2#LgjQ%}~
zK}INUTc8jUTQ-tQ-Pf=mc>0v^%3XJZP&lXKMN`00InZNkX}PlE>mHkS-4sBuN8F@8
z#umkP=2-Xq`57hjR<Kc@$Sma0Ln_UqTG<K8oMbHKEP&DZW<ku*nFupwYcAM&_FPmS
zbInDyYA_elDjdA-mqv#f!(HU>m6?S2Yg3$Ze<4HZ?4or`EA;U}i+d3JvXZYmCg-^`
zS1wYnV6JS9vlC7(ZwT){hjqls)-EDNDzTw2NvRr#C|ys60(iuwa`9QC%=#)?M>JOu
z8|C-+45_AnU-o|q0o^jm_J`7hP*HjQz|4v1Q)J2+1#c1UMk03_dF`Yncfl1iv5-h^
zYk44&tBSuZ`uI8#f=CIXhzIW+BM^0(+T}g0LY1m)$o95dm`_yyaDx%^MY^&}l+^UK
zNTrlnMPlYix2ekjHLfc&$3L_9inC%1!CO4ft><5<j*VHrW|3u?Rkvo5LC#a^X2FHY
zm#{GTG8d*nX&tkq&XQ~Dm?^fmjhu^i_Gzpifv)L=u*DVAF{ZaeKUmN>MhI9`+6=h+
zVzFX9AGZ+CyajlcQ+LO#w_{^JkKy$1$5#0pz6k~r`*nJu3yu8!v#iupuFz8+p)PuE
z4)CdyLYZT$p2Al<HE^8D;T1DB(!SX>$&H>~{)4*GbvF{cWWeK(?m|qOzH9bHPp-w9
zoP?Fx7e8dKMNekMnju!_TC5(UCo6)Ii$*O|vg0}qR#_w#$aKs~DS>*cEc;nG`fG{~
zr~(LgN&xOpao#AIx2z}cPv|Wx$@`Of%gXcq#NL*}C3LX67mR$bQhJs8UR}{EO3wY4
z0aqwGd~)=(_xR|aVFYSND+t3dq7+Z?J9>5p8~AI>lUhQA0_|xCfR(F$%a@v>{4L_n
z_wC5@Euebj`xeRR0R8U!Ac9}@{U2cQ<}3Ul?Mt#;<?ldS*xe1j8EpsO;4dTkD@etk
zDgSvT{=CBf0KE=nFS2!CHt&n}@<A#dT*(Jl{VEt@&JvPD0mHLjrq;_V`{kAK($f#t
z@D{`>p_zB7W}DRwwS`mi-cXw@l9{<gg$ro`c^I7CVofcUc(}$9uw1^wOT>lXfi8mZ
zNQIQszbKC}7|jJ!#24gdA<TW3xJ%PuFr^5%lcQ&k&EivqJ>*x}l&pA|S}nScZiP=4
zj~|63x|3FkPK^O(=jT|*ioHkTcp_?z8^fi#UQz`QFxofCWBgrZdycuJ`It=4%287R
z2=#$%WcrGAdgT9$xKJqr9bQOG?4U5yoWYS7TK8p=jKs!y7TuuX`JYbIbg(Me+CdSP
znHgPvQL!pc&6ETd&A<aQQboY&jHktA?+i}m(6s59T{`)56;Xw}r9GL|RB|!xZg-jm
z{Uq;CM^rUNuLk7>0R$ZVb;L`2kqI9m86k}!)sl?K4M-=Mqcj0LJzKUTw;y5rXHN@m
zgrV00*55g0L6N{IjWOStbcri}V1+la6EJ-eh~(!}l@miI7^bVQj0gtyXGqpX^jwyC
z4)Cg>lzzN}q_G{Kr^VH+4bZiaZngBR`8<AP#K0O87ck|0iHCe%mdNK3kdIApUfL0#
z_z34w72&*WN;r>vIrBV(gKD{*Ympp13&kLS#^FEDOQ}F@B$)334JD}dc0`clNp8qS
zL{5r}l1mIIY{u?h)%fxXL?UR$o{F9M-33uQ10TGws-uNeWfu!ZG@LlQddWbfi!&eS
zqs@A5(vU5j2$yIJh?Wu9*$Kn8i)pTl{}IK*f?)a@GDF6L3o;mzRg>JAZalq#Tsg(y
zsJ_dN<uKNuV@l+*E=Q)KVayOh7Xc#+0V9O~^ZDAIK`^HGw@WKZs<K8XpRXG*+a|=F
z6>qfT#2Ty7c*lOXYrb34<FG$-qaAH|P%fnfcopQWor)%6Jx2ZVY~^~4`lzPBdW__H
z7!fF@z^q|$T-iz~S2rQ*h<x8|VpBL@vxbf*yLp7JxWS>7EOTuwaic=T`WpMP$8Pv)
zSegOAI1(%3$4)?q!7Cykpt=E`7IbhDpBiT5s3o1QT{vf))r(ss6U%{+V@f<c<h3*M
zH}3obieKrW3;2)&9UH>galo*bbkL}=-S)lpS0ex-)vy(I;Eie3lq;<zAO~MSa~lgn
zu6ivO^-Y^FPJgX9_U5*+Cd13ZdM${(Eg7IaGyCP0^-^tvHn;5i-d>5fSLWOI7J-v<
zI;{kfTYQ7XZh>A*1fn?38Jb4Xtt`r+TC;^s&{#l@52c!Wpz$MDC+`MTGsOmzX1a5v
zFibO$i83GNU`#R=Bvd-)^|I_ZneZz<vjP$wc}4`Hg!fM3;kh!l?Mm`!Ym4W88)>C7
zE)EORF}Ewl4La&}RK%LcR<c+lWhJ_T_9mIEY+{{T$hN#kM(Un!sdY?fE|)et7g9Cm
z)EaX->sUxj$3m)(g|v@3w6ILIb4;t`Oqd)?3l69|IN871+nG|TV0J9Q7E!Rk#4Si~
z`6USLRDOr@s3zx5ewX;K&_Js!G&E!<R~?uL`-5{mYik`)Gs&k`>KS%z(BHVs)mWWG
zh>c1*clA8a*)!J(nBXZ`a|rqXpHC-%<k|zxSo;#oZ+3F1PTVu&y~VGc$T%`LdwKOe
z!K|xTv`}?k8zVQ^`J)x*VySI&KZl{S;kB(l;0F%V*E_sj&-OixKXqTHHt>Gii6E#x
z8Omovp!cMFpW5#$Jt(z^YCm_}H4S=fyIAqRNKHW)2c-Y@0pN9(fA}C3CK&sRdtSXt
zyWmw6qoAgx^QEm1{hD?rKj48G59*F>jJJ{wmB8&lS5*lJ>C`-*rhP88&er`n1?_V>
zc8^^Q;ANRI=JMm?Jc%j<vmiS|r%G#ukwFrCw#Be5gkzkkpgnP0INB#x7<kn&>x70H
zhpu@uwX1e~MfoVtQS1*8eWp$hJRlGU=AnTHMn|i~qGo`h3iy!Jp;YlLheJ2xeTn9I
zb@FlYrd$G7#mHYv{h>s6LXe}hutG9JM^hEcH{?{7XEA!vq6~(2K_JupKQ!6(hjB!i
zBS%0)wMZ##1V2V9rML3z2C*`e;WCNT?uLz!&S16_Kl*D?H#lh#l|!<uUJgy_ZbK>X
zs|9*Lvy(@Qb@IsSWaTfIo9$fPyeMb=Rs3KzIHq)@QSkJ~6JbG=-c;deLYF@*uaM1?
zoSrz>txEbV+!GiYVpTzEv#-8Rg&@#wXT!tPas!ssbvEKv+cY3Djw-oKFlT#<t#0`1
zmuW$}T+(b`$HS^zhww5iYj8&F>p0F~<|A>^nF!E;Rj@s;g9^O|NF62xVlW+eW~Y3@
zhf1l#Kg4O?5`Xpir?@=xF6GPeQev~zSxUUy1Ibt<BE^+rF3#5)t5)JdcL99pN6jtO
z26rR_mc&F<7%&yF%KTW@9i?=IKRkJK%y7h406mhK-Og^!hT^taSa{`l#bwW$7BWd-
zosf~RC86T5uw!+lbB=al?uOwg^yz8I^pZ0DB94{J{y7#{!1T!S!hA%6F?Op`_tw#+
zID#71)$>uSd8&n$9NRLSe34}Qdc#YbV)M(wtUyDVB^1p*V%p1nG<(JGQio2}g|Oho
zdNKj}#q6xqn;|z##zz7w)tZAEt7DByOdALmiZ{}#3BinqvRZM^qv)nLYLyaYnEDd$
zSyN}?E$y`&%0bv_l$>Kpl`6>q;i_OF_6gY1mWkHR%A5vOW6}~uZFBPL7CJR}Tu+)k
zZv`ilou#q}O0l&&o{L!5mT4I?2+(d~*syG(YuDtVvn$-8^s4MiQb-pW%Ki40BAy89
zG!$6sR<E(~r%6?El0KQN^;!Uav%X)I6$k(S7ZAgNI-7(bIFV*{XxW^>bF;37%H=0T
zF_b)J=IRTDL%X<0KPdc~RV^xt`e2u3bXGR~$W+AIyuP9axR({+TCVH0$jjK&`?`Tc
zw}_Ux8dL<xoL%Wyy5l&y3jX>1vxDFa4z25CFu@v&!Di=MRYuW_#P(#5kdW-O4jS43
z^tFO=mjw?xn{+g8!B4BZ0f^a{l1rB=h;Lw1V=Vs!?>IYV6p(Nad!zvDVbN#|D+&Hi
zU2L0q7MzO9XKApFfmhY}E6Po<Nb^~0HW}quz7D%WOzo`vB{?S<nD@&+pB$dPczN{8
zlOEtzg|w;RX6I5?MZ9l3`|0WN$;QFs$HylE^C(H+QBDkDf@Kr1Ja_>+O-IPdwZW6k
zgdi76*29>Hs^xvUliUJDvVUp0zqD08`b^c6veu^C+0>zkW!&%Kh+DozfrdMRnXKsG
zarjE8T=dWrt7<*NL|O2^D`bIg9~_a|HaEB?Y<4ybC?TUNGX&t*%J9YY|6vxNpX-g)
zqK>(T@*XuJ!3+juBuY5e_2eyuT~#5%$V-(2fg?VHe~HLYU>yA^l7LZ7%T9AdMKZGN
ziW9a)6P>CK-ILIXNXpvcjfR9-KiJ|i##9+^>DD7vSqAM)9SF}<Cygp180d&cQNMms
zL3TLgb#>ilI|%$Z&I$1$MHj;=hp^Tg$`jE7m6t#%2iH1SBTW@gjqGkhIVX$_vY7L!
z&>@zbq5ElHguGHH?L_Sk&7r8K1xo$Er&JDUG-+V)Y#vkoOIYYQ%5jM>#J7*zm^{>|
z(nF$oo0}I=CQK37>mV0>Mgu0Qm42E!|5{%Ap|P5<Vsc5=WGmM6(0=!xriOWfKr<-D
z=$QTl@3vR`VgbtoC-S9wclh(q-EQyn$@7yJFXfNJ7tc>$9vq$?y?AcQ5xpN*zMA_V
z-M6pve;<Aq|LwmX^dI&Ie>>lprYcHdg9g4qL)j?u{sz<1hVt%3#|^x%yY%;g+4~<p
zctAhlpYwC;+xy=={Hl9@^V|EI54zn4TVKJWt<7)03O2v`2>+%i=mx=^;kc9go&1xP
ze$Fv|n`6~Ud8Q?AqwgFthU=I-at_zlw0b;D&hjX~UBR0<v_OK;0f>x-pj_lp2Oh7`
zIW@jzZ=Ue56>M#GzYQLN*pp0xlS!OlKBV0<`nyx`-<^vryZG;mQIsGj8tn@>G{%!`
zn!`3a%m#7rJ(_hWH3gZeWfohMehO*5dX~LQhQkQkA5#_j;3<fYDOTp`pwQ95qyiD_
z<?*d*a{LrM1c`$Ul0tB>0fG=_2!X4%a&XPgCU}<MeIuL^my^kOdt+n7U3aqlVgq1q
zKyMqJ6-T_O*xUbn^6~_?_K(U-yy5374y%DvbY)|8HTa+Y5Dv1y-L5b2egNJA3(vFZ
zI1upZtKPG0FdZUGT;-3>@&v}sFOi`O7y*Iq@cM8!ctKlWE{1Vd6bU7_$dpV=5XoSI
z*QjLQ;`DlCvnD4?%88_65(*Oi3o13pQiCdLMASg=WWCElj38NVrao9MSRu=fMvl}8
z06&tNwXNqAGe?l-VHkFP9_OSYtgyHj2o<7?(Ot8-Y!&RU{Kk1w@J|#%M;VVX<E*){
zNOx3F+CXu&jh!n}#=}s~5=92G=SiUl4?Y><qx%<n^^WmU%}0!14sd(>1Xd@yi2s2}
zsf^#7HD=9|{_AZZzTDn^4(kJlHvjN`n#WjFsfTAiR!E9>`CH6+f>Y}0rdH`HRR5k&
zM~`);w!QK&tq;5)o<_Imt?y~eZ?I8}e*C`$-w&e;>}3-#>+rl^kdiQNyeS>Ed0TI9
zKSn0TL?tn3CajqW=pHq@)3{Swh~^{j8nEV>b0C)9o^8{{dYbQpvOFgFz>3C#Vf5ub
zjA&Z*<|Oa4@xXX?dNGmDkdojN)`E~3@gw*`KT>$)Mm!U=?&B<(-9LWv&!a=oe1Cd=
zD4v|0zI^fghu+cir$^79tORb`(4+x1?t55QP)WfUN`ilf^f36%%A2rDdjOOXOgv=F
zjU)Y<4oss$Fl_1Ng8#KmOG217!V<~6V&GT7+7Ef)9ko&nd{>Sa%dBGu>axYCUtED0
zhRF1>L<*={+uag8-$dJ(;HSI)=OP5nf)z?mX4&u5mv(jp^*8`ch3djIqnn*_>#fGE
zK>@S|E_H5&0Fe}9uOY4D(AzWwTkT{pi&6OJL*Ng-5cnU2z%6L^=Yh5bV*5;p59Wb*
zzXIYCHX;4@KK=K>A8|v%G`BwThQtWTQFKEahMshEn&q(7In5@~kWw7B{vdl-Rka-6
zvKNIQY|(p5)fwhnha^X<2HQEMaf3=X4#|TL?~GXMR#sH<yDwB)K~Y{(WB?$Ad3qM#
zU{XU;+$Yym8s%T?>3=D1K`7@QrYq4Y1Y)dEn^15qwGok&m>&`EDEnM6oeaO&V;WyX
zqF6D^55AC2I8oX+vOS8~<2+#Q_wEILK0`4~l1^H70_6;iK~tIKMOvZRn6APY`VkIF
z1SRMw)hNA1)4gq?zmpuQ)6JNIXsKX_CME@PCNb&GO=9wB&&}wOG--HnO3KmPpv=(_
zD0pCk(k<C`LszD~Y`iPFY|94w!j3eN31sBfJ$QQb!}H#=gP#`~4ySl|ZK3y#Rn*pr
z@v^=wIok_x3C0zv@k%0~-NEY(tsEx+>*y)mg9qu<7<|b6IQjIb-Hzc>FwYqyKd3rg
zq~S(yu*EC_Rn|q|_+vJ^^1DY-5jQs&V3{&xmIr-tW6Kr*hPb^lgJ>%abZG11aAs{D
zTO3lg)ofvyjxntkPNfk>?gecw({RM}olDw^zMfA<r-Fk}GXM+=GsM*yN(8Rxojdc;
zEsa!24r1N!1#m#=nn@@!z=IGU3Ms(TlW|CUa3m~>aN_P(86*X^xH5%4BExd&8XS!%
zAv4ai2^`V!K+~ev*vV}ifsg!cPT+HAPvPD)g+9i4Af!8ux0C7F699{!6RQ%I%w~(g
z=`E*eh|O6>lTKKx$`cW}8`|kaPsP#s^B72D5I+Cu>C;w4F^3ANc7a*#vbzAclns_T
zQxc(Cjv=aY*7UKSB;E%R@}nZiiE}sQng{uX^F%l`NfF**(z}fey8`OIa*>+HS}Ge$
z(yh1Q&ocJA<5v?_&Rd3+-nPd#y2CJDdRSNuv-B~75=)l2kf(%4Tq<>8_^LT0GA2z>
zT$i(1;FQGV1H-qW5=`kXqoHhBmui%+Iyg60aUjI(J(09-MZ4+o5lz=7Q}IIGq|7D<
z{*;@&jII?h>P8jTn&*5`y}5@=Hf{?Haqss;wK_g=l!BU#JLW0SHop2zdh$0LkLXNM
z`Mto@a?8g{NfF+er#E7EUiM}2JTVQ$_52K2lT1?dFxbQ58G<rl`H1_qXRfp2;;<2c
zA@fw*fx%ZBPm>(#DMV>H4)tuLNG@N@3Dw!UtoVA20;<0TdX7R^N!3wqMAZsKtDM-{
zGJP~Rb+~)H+Tx)mX9S%8Q@W$#ReqBVZ7~I1?#tk{@O@8Q{~HQw32EEgvhR3s7)kdz
zfjVu0+SSG&bF2=fX(Q<SdHD%D&8@KCqjHTS<S4%euhX>jvqi%2xMXf`<E-#E@AN~%
zHPY<IHYJEYtP4jg$H5|ax1jC#`m=5`@7$jS%~65X&M<IqqGL4(?a6N#XP;d-+T#yv
z&jVoT#j@b|tvL;cxaR9wb=0yId=F`w?7MtyphWIdO0*<yTTF>?4=5gEc&m08M=Uv3
zWHHgXgzgfz!CmMzRg`##6wxil^jmYgQZ>JAWLgpuB8}A*<s9o$L(|gScBLj(r80v3
z#-);I*UZMWvr<VrUK%mjFUo)WpA*ekrhAiLEt@Nr96T26!pjI}-0FB_EmuQmmoQuv
zjN>@2*iI`Bp}K^m-Zs38)Z{j6M%KOAOwa0B&ePgs@~c1nzvcLUXG3z}uW$gb_5beP
zfAHOduIvAO|GNiY{J;N*{@)M2_<zsz|9<iGr_*^H!1ecyJ;23=y^90503kfU<%c!C
z);`m)TL^+Gq&!YWMO*sxatOM9#O5M->qPBEB78XX_3e=HB9tM)0yIbr_$$^6DWp9p
z%Jb-USB%E$6$A_s15DqqluDz_Y7HkI)?r4WWQB8W+aJ?sW|8SVV|>-ge64OA>C%jW
z=4<smISiF?#r;b3X!ZV@8haaPoM4_;ji3tn>?h-PfGJruoHbd<EikYlh5)TJ2rEs2
zADz_230F<nsrioM5v>m?fXoQ?rSN8^I;tgJdM8Vhv`k$^Lw&vZMwh@1>>izW);f39
zI(t^w>4LNf`^kFu4QyL;E#ZldZnuX)?VU#qz+4Ffz<ZxIYz;t+8_O8f%)ja@3lk-b
zi&mK6U<<LdPrG7$eJ%tcy}2l&XY(LQ0x{8K0eL^0R{PUIB$<<O8&dZ<$K*Eqeli+U
zjmRe7Qt>1>4$L5Uk@ASM^7~e!(cYL5`~P>5X}|j4cA#m+=6%a!k+5<+O+~B%ADRo5
z*(l)mwSJ1<tSd71EUgL3<g>E9+0<QT-PUH1c3VMghzyzaX34AecN?`URV%hxyEdLx
zp(>Y%&o{KE&F!p~Ld=OUXk<+BVA}lB!1WuuRSC&PGZW25v$ZwxjdzzfpDt*%^-N0B
zpC*h^ZzfwRH<L86gtM*r^teA@&o;T(sZ2*bwRGl;9Gdcd#fQeKf0NP6ol-w9N~rU8
z<a}oKRNj+LSI(hWGe>>g)eX@6>V4huYb}g%uvVXtv^sEF!3r|pTiVjB)h*51GM8qp
zc4^i=)za9;eB0VtwuuTyyl8wV2Xj1(()cuEeT$yFpcN9Ac4Pax$q?d?#9_9%v-}-D
za0zSR?ro&8K~*ILPQ^6FL=t7Jn+d#)iLC7fGM#Z!_4znh*I(5De!t=aKQ05eru4>p
z^F!H>#*-X_a6vj6CZlAcvCeq6z-TUMqDQu?kZlOd&giYx*u*v1WiZHnfB(3rsk|{W
zr6})g*>dL&n~OKGwY&Jy?Cr&8?C)rQ*$it?Qfx}AoK-d`Eprv@_bc8W#vb^8f?f8R
zuR!HqGurOl^(13FS081G&vWAUp>#^lROVtl@HcnoR5tcJtm=<~K1YMOJN}vxpS(E!
z^$$#iAO-(g^-@X5ADT@6Xo&XGtHQMJ?VV9QCq>I&uKvuVZTWu%S2f>mecWyEM_Zi*
z_k(YJDn8Z=WW>N#qyZ`{{~I!PQ?|)nbqgq6ml3Al7;Y0v-FKy1q^b{H1!>l|FKoaR
zdg6&R(IyY~Dpk0?8;^MrXC|}~x4SRKyC!fOrVZ+2X1UUt*{K)IR!z}&J$$k2MS+ay
zXr6`Gk1``N1n9pm$yWW?1jfxF+7X2d3{=3h*Tn)VM?E9OmmE6XH#8bJ)9=c12F#)S
z!sD_!)W}|55pVbOvZ%=~<}9UX^Rl*0DJPlgLZu~pIv;?SB>>h4nx-~$mSFz;W)1+E
z<jfBJQJK!zThAXMe0L7yBn6wF|ICveZGPzWDbwyg{Zc6JA_%BSvbG36ug|=;0K~@W
z*gPmc;%m;i@mt~fG&4M1Pfzt+kNlYd>m|M*=(Q#;Tn)k(wq6CVNz8U_q~tt@svGgR
zKwau8=3Mr`$D^xB&}Rv;)w`=4DSy{xnj@3ue2}fk+m-o4?iDQ4Ax_0clB_G;;jERA
z4sdO4>css(v5bY4-G?sOF<_s5qZ~iNzY8Douk=*-EQA9#)V`l1hk#v+od+Vp`64R$
z)sCt#hTf3;w%}B=ECT3&&w|SU%|D!mF~>fs#mYcH$a_+ohCpFj&FcdV)+;Yrqvcht
zAFd{X8Oam85}i5Sl{$u@zBl?SlZ!zVcIL;KXH7YFp9gT~nEtA>Mgg|^nHl9jCpaDV
zzkohn8TW7Z=n4Ke6>wj##{EpCE>E*F0nMGa;4ubg6(#lC6r^jx&OVi^&a*}*+1+vr
ze8i*x4VPaxxmRl9Y^HJ>Z_a^+9os?4jr%s6&B$y}(XpA{;4sVbft}!hi?&a);4G#<
zwiwfsiRwL{G#i3(q#uc-2&IEbWk%2^z5KR(EO<T!Z27-6zTt#3_%~M09LlGJ4|>c{
zLpw)<w&5O}9v$`$PEMY@JPpe?^>Zzv-`v`ilmrYI^Q#v>^`0F(e$qSW9lw0>!^?wb
z&yHZ#o*ltx|M5iDJ*!XNHJiVhVGf)}Q?8|*VWLTARdX_XGlBr9J53Kxe@pY5une?!
zaHY55kYktgNIC#ozT)e_41ahVNF!Hw{ZX}JdxPQ5a&5Q#RYyH5_@tgtKA;C_rLVT%
zTbme?ko#??-g)C%=8kN+Q*4Wk@ap&$(|>l;b<AYYHVN3zB*|JcXyU4Bf!VC^a%3um
zk=Ib6h2!@tUialY?OF?v7=isvBEiP^?49fGNyk^HYG?kr$#l1jsRhV(%}2ItbCPXo
zARn5Hm&n{3Q1zOZs(lRpgJ!ETVv3fUrmCegI;oRrRxU!29Wm*JsIFX<pm4DiQ=Z{i
zS*B!0V^csirId2@_YgZ+ZAq$a>DUevh7|iv2fdMgM|of53e$)YNvBy@rKPS0XH&dP
zm1CQQQ{OpVm8IKZ&)gA`@7kV7rIobId3>}qU2X%F1m1ZOZfa5ZZ8m%xhv!LNOi0WY
zSt}@QkHuQBRxtytOA9|w{;sNAU8SSNou_(5!C+g+H-1als==9LwC=y&eZOP0p-Rs*
zPuUI=6utjI@}3)*GV2%}+-w?QcOWiolzHS%RnxA~v^%4ziH6NL!YOytbr`B_(v?lR
zwM~qPtQ$R4j>;JQx&gl8``N;b9k9i#P$z7ELI3>dU#Xan>U(FUV)&{zTT3=RTX*2X
z7Bn}uUdM7uB~SH>BA#?Td7?I3MhrhOL;fbTJ8!@bJshz_pKfL@VN%6pXTCgJ^qkZN
zQ8>Ol+D79Pa9VY`hl$$KJ|O^!cOW&V2*9WrPx7Xe?#sc$dbj!1yLD5yK)TB{JC~Ts
z+&Q<HxtyemLO~E^-Yd;bmW(2cTKp>xl8ZDGE@)V6I6Du{r&v&U1XP-ghqvp|V9@cT
zaujIpiNYEpt>cO>!{GMz!C=7I9r+$UbTa{NZ$Fz3?Jqp#=LH3!CsswTlAEupCn46?
z@ehWbt7t6ckK}mtM=ODUZjY0<)|a*b<jHqfF^#sd-0jNDjn;CnQuz=qi&v&ytZ_!L
z2xcJ&zLbtW>aZr*)IGw@JbA~&kcPu-vpvd3xWT(BtK+yb<Od3kTTFCh`2&7$D0)<c
zJh~zQ09^OSY<*x^>@%3pil+x%tU^$peqTeptYph`%c3|^TZ||F5DqAaIlz_pz|r9D
zVY#H~$R<yP@Lw6>zfwh^io@cyg#88=;@7HdAx!0Zs74TiHYI6LmD!Ud5T2caVp+0E
zU~pH-hpuH<AyYW>NWUHVJ)ZrHBekfmDUDv^qS_pm6Oy7+k1@p8{G-a~<Vp7>%azR}
zhhsBC^;|^3Ex*u$gUP#6grzo&Kseep>R~BDsV$1dE@6J{1Hsrt)hZ2a5m}Rn%CQZK
zxq%$`m};b9!E#J=h?X}i8K791X*$F^cv+flk^vEQ;!coE{)*uWssNi&TMcWlrExqc
z#La%pH>n^}7~-8gypyyCNb8rySUS}8St`FXO^b?n<jY2uL?_6agN>ww^{%Nn4Frl!
zvvXm$RVyVY#u(iTlwDVuLX{y@b=q<(k?AYw)9l4~*YrzNj!XPjWq78)gc0;~G!ChM
z%iEx3Z$Q#E9Dfo`pGSq6PJWj!y<O)i!}rKeQZ2@R6Zfau<%@?XFVNdo-y&bV8ou^z
zkay<Rg45(GS6iCAjSbTv$f;?J(~b|Gk@V1Je>*cjHo4r6CmpcY*TFoYe2x4*3l?c(
zGb6CVy3xQXS|Ch1(#6#zt;?7AhRDiZ65T-h+Q`L~QC*l~^e5Prm{D{*Nk*tHXpf+X
zBB=yl65NE6Z(=S~JlB3W{;8l6l2pcVoB<;OqU~VD(SVbP4nk&-Kb<`3!PiuDQNHQ*
z4u5)laCq43wahe~lsKEDbjHL9<%t(gjX%T_{j_=UP&%%&ah#J&QY#3xciP74l>)fB
z;Vl*Jjz*Pa(u4L7aQE>oNFYgne<>r?<MfX5syse_@>BGu@)Ucm&WdW+S@0}QuXi!b
z&VcTEeD;xxx#1Mb-*z8|LU}VsK!nw{Yo1D8sMD$LDoISGYVEciNYdMB-_|l1fZ4-2
z9L$r|px1+=6SjYU;`o~5B-81L^0gGs!=Ig@!a;uXv-yo)hPLd;ya0=DaCXplaT(?D
z05C=$1;uMNziL1vne|ZnI)hDpbu_r?kg5dz?N}cUBk^4*o2=1SAZX%%gTDvP^KE+G
zk{L*>`SoskQoa;$*Nx^jb(aVsY=s7q`d~9d_XF>G5irJ8Fp+QzNT5G}3_7=VC-iZz
zAf7JU&}Y!*0v?JcD~4D;mO`-TX$`nt9!CS}Y6o8LYFfZQF{_Euiy_$I9UBYhB!Te)
ziwdz0(J%z|hfwj>2DNq?W#t-MhC2;c$TteGF(JT%vB=!~FXKGA7)2m*=GkyqzJ%4y
zZ`gKVLcsi$2BN)X5GI`5Svse9CcjoW`RdFA$(kS?C10^Wf(Hj^yhb)7bV_qPEh|+a
zY|GoU&lA>egOpNSmo)BMsjSQBXBw|v*I5an#<J@L*M^7CXmMO(@sZG)PI}4QN?q%0
zb~exDE6-Ss(O$9odj(^hkBfjJd}ZpGqHma3@%jpe=B%{z;(2{lN=`I`BJFist6sO7
zgbQV&6ix|dkJ_WjbWSMI-bX#ee{OppziN88Z!Pz5-}*ccclhR2+kXGbfB2U`=~u7*
zbb`Up_MZNENut56D;D5#cu;VwQE<yseZ?qvrBI-r6AE5cpy1X<!L1tO?MxKBvQhA=
z<%4dczy{w?I(lbHNAH?RNAC>j=v{N^=v_1E=ra+0-U-p?R*F82#kRUL_*YxJV)L%q
z4LmT9;9K7j>>$J3Y^y=|t6!#RH9L0iAi#Y@5w0ElDt^5++!<J&GkcjW$T?vKH3Hq|
zA;i@XkQCx7DQMx9pD7;J%6SzX8(j}s)*H^0^~zikq`jf-z+Ek>8Yc>Dl;MuIk;`d-
zqC6XHIAfF5%z6<Sw|pSFQm#+ZwReIRbW1|8IqLTw|MQP22>Z8o1F3k{PF|{b+*HiH
zzg#i*{s$9t#r;Z2($&u`2~|8c<z4;dt9I$DSLLsM@x%MY`O*}TtKPiwfYNV&SxA1K
zebRs461IW2-+?e}tfhCf?_8M`DaA9|cY3t%cvQdeXn#?oRnKX(zf_F&ojuxjYP9dH
z(f(2ve>Y2F?%LY;lMA++J5Jq?D_h#-Q^=S(J}^)FcOHMIJ4mz35;;GO(Af^I#INtH
zBcCNukGx=R0C;p&AwIO42(0BY4vUJeyxg`HX;XT4$J1n{;#qDoQ(gbiGIbNWmtyXn
z2!Ey&zBG}0hR|*FlQLUM0lLY$fH9@645`|TxXs9F^9tLH+%|qm-aLTHQhHfpO@vtO
z**zow4oa(ckrH-?_EAV~x%PR3dVb3ifo<W}{p)>m-t(aZ<0|&kNJ{oqfvFLfmzR3z
z!(|%!VLbgL$%gKfZAcILcbRa=19gUTi+{p_nvv<$vQPpavx7Nu_;{On)A(>QoOv?r
zn6N)mlAXp0b=26Fl2fNinw*AtcVsF8Gb;hT0RE;k?R@&oJLSb)DEW>D%!kRp<KKWE
zFa=Llhx29OnJe|nGD>V#WW)H3OA$BAf}x$>KU(@XlL9I;!1;4#3o7ztulo=g<H~$=
zNr@)rOe;ZAQj$Eq`6A{?IDsD7CO|ZMp|5X@Th@$@R9=DA9@xZw>m8*VJ7KF{&EZf7
z?sz;t!pqBrf&JB{z_5oTF(7kVNZI+*C)W59Rq!RM-~&b#C@#L7zybr@$BZoy$Ho*E
zloSIzIr?!j46P@ic(m<r2637xn+MOtdY@-gIB^HPNp>COgWkJ1&-hk8*x<IBj`88X
zx5292jZR76`BokLRyjP+mrmjxgV)-pjp-deQ>$wtQ|ddbot3hS)CparJc~!P-Ne2q
z<iq1Qe}wj~uw>bWrl)M6kl&a9;4K<doG-jk=nZ~yoM)T|zZTFl47XRG!tUSj48>#h
zwA&SC9~bciq*Cn0`KUZzSk18zZSHRgbQagiq<`s@l{AD}EML>z_IhS4l#;*$hwiKP
zESc!FQr%u}z9}VKk&bjuBa5wW5Lh$pccO05M52jgbf4{^Jte!jy%b4$F_bPvEJIt?
zyd@Q?@|C9Au2fGtd6WV*nq@qt<Y44`!4bHlw+*Lo+&+tLk`Ztcsz4M^$r|8uWJu#&
zXb@p&ZM%&ibmTFuSrB1hx$U*!eiv8d{uUm0NP0$M0=%0f9v9zCwzo$~nlQFhr)|Et
z^!r#q^J8ywca)(ysC60?cI^=;slQ{}8mg$IbK~?DmiKR)Ay)vnts1V&kT$Gt3FvXl
zm(#D59S{~4t8)ZV5sap=x#6dbkP4z<mR=O-hKFlTNDfArUPDn>n<cM=>jjYcc_vkq
zu&r3lnn?{c;(NnowW-2hrAktAN2f<#8cvyd75O!!2RZZQP{GCw8u0Nf_CgrZp=fo9
zHx`H^YfU(BWt9{f0Pz%YJ_!w|yY_llGWFg~kGjX>ilifQ{czEuo1FR1Qg^SF58Sp(
zeR`;^ij$*vTOOHv#IAbA)3&B%PMi)Ce95mu^SicZrOiRK)fkT3)`GQgd+0tkHu`8N
z-rWtp8Aj8ze+le8=suPj798Z47G-V+-{8j<v0`JZkMc*i{E?oFf@D4_eRNy?NI=2`
z7cbdE1t01~60b}G!w(Wns{wmqBw8xzjvm7C2M^<PFf37@6?@y=SJhI?)3HPtZX?64
z7FOp>umH^wJt#b_%v-#0#ZK4bKl<C|z}j+ZrsFD!bD_m^Ef)ur=N@c37pPAUetLfR
zBL&6w{_*7H^CwTkuqu*rx2Y804$5<Xu+PV<qI^wvZG!@|zScHT(Y9RDl*GwL0rTm$
za3p)cS-)ELW$YP{&ecxU4J@MOyHAFi?>-i4zMCI4pv6VN9zPjwWH9qb!jQ3hJh<|?
zwOZIyr!rC0mS1^F1}Lm7L77110hERAj=z&v%j%>Hoow#ZtAXT@Sj{eR&TnRcxSa*!
z)td#hNb0&uk$hF#?$x3SD9Jmk<i}VVvG5QDVG9oN-g$&3GAw-j?|gD@T^`bhwg}rQ
zYh5~nxWJ+P_f5+j0jLKjX*A};c#@G>fW$vbS+<qF5@|R?cl)#DXm-s)J#fpf2ikFO
zbLxY3P+TJopoBe-U`(E6T~{Sw$Fz1JmUT%QDfeaQq@mrVdiknOVXZc8G*Mi-n_CY`
z()mo41zVMslvjQb@2WwBCX7`}3RY^*9LmnS+BFp~UpX&tKObGFLP>d5!tN(gpWZD)
zeNs_z3use@?&R04-p$ah-Zjy!wmyJvRr=~(`Kx!!*1(J>@5&#&t5MUC+VsO$>fy0|
zc>GZnx-FkVN2j~t@59G-y^^=pRLNuDa};`=A-yc1GH!i%mC-(NZklL~zJ|AD-(0_5
zHJ|>lbw{=HBdS-LOLM<oZOf~1m7RqvHLs72y^pMbIXJWk>@N6xe^+b3JS6|%lP=nK
zAJN!iF4uQewwK=($$$4rN&dS}L-KdF@@y@|%JWH4Zykb7mZ1itjm$&$KaP25>tmRQ
zs%%idEUS|(wn2R+MyPra?`DDc<+HJ4{ZiZRm%H1)Rw+EzJKaiqPj~BY?L{m(b#}0K
zUb~bqD9vY4T6B1|Mv2<7nue#UvHtS$##(JaaR>fR4g8nZovl>&x8|`?$<LQ>kNTyx
zQ)#Yn4Lt3V{YF3eML=s{-dfzA6%uKzIjgcvW0%){a{td_KUum3q{eP>zqtXVsnNn=
z_pMp|e;T{V(m?&PhzaKYhcLmc%r9f#zl)6hDI{vm<7P?M7I!KM+kP3^qqBJ9@$7il
z{XLLZ@7;5vV1d2e2Lzq|cy!Wx_T==(7ms`J+l%K<U-iC!@v`^g(f@mLcnS}`?;Z3`
zj-MPJeGeU+9R2d-ZZ#X`F7X*7Ve|xhHseaH7aW4NYlRb;0-9aGioh5_XY<za^i6Ex
z+MXX`f=-3ENhyS@t!K!6uIXGEAEVma#zt`Z;_-_xAHgwP#A$0g`2OhUXHVcqmIrBe
zjeeO)fqt34PLU~FQ8tIiKT%&2)ClG=?0hLB_oa;7pS_HnQ40=e_<4qMEs!yW#AAtr
zaS|6G;KUSZ29$huYecOTt#YA_d#XkPisWX(3H=El?M=_=IV*cu4ejC{wFNvxHU};3
z)s_PZj8Y%K3>T;XqFNZ)0jw$@e*sxHIbT^>c~6ml=^(%;p<;|td?fs=h)?h}^V-9o
zf9`gBr%#@rz=14&9KLw|&lgXBIz4*vy!YeESM~qs1X<_*KKw5J+kZXiKkN_wcD^x9
zi|Kfr<&$`@L7i_<UmHc<-{1w=kV!y_$ur`vmlb_%!oP<P9?(zt=ltB-y#L+(ue$d)
zzrDZtpxb@0^%ba_-#zGl6>NU>5&liFR$UO>8IC)--^o8bW$_d}PvD4vpW`T>;4wx^
z-o^md<l=I29pit4!?iVC)oqxZ<xzf%b!#v)z%WT;Kx8z$h~aq$9<NY>tnqD5ZNtM>
zu(jEJ7`%*%(X<GjP17sTAn;@lcI6rW-xpbSF^qwej`jrv8r?}Y&HETKKZx=4AK^%g
z>DjNq5z%1_85Bq}aCC{&|5^4f84e?Ce+-9T(hr`(CQIRje%lGQI-8x~=-d$)o*h38
zuA>433k?0h5TUav<_G~0q^wUQmfFFqY#Q{jPS4pS0u?q(;~<&@my^kOdt+n7U3aql
zVgq1qKyMqJ6|BRPV(eetwmmuRJ$v!^$y3<1f8okJO+R087zL)p+gM!<{--|#?(Q;B
zU_|gLNiXtjIt~PIPHY4lc{)VDw^jb=EKgw22P8_d3>A=k14thxqhvy;1C8b^&KZ#u
ztij)$i)@@&swll6c$_}T2x{)gt~RIu(jg7Ez+*1G5EUK@qDKt~)~y5vEeR$8Ags8U
zBqNNDHpWLMRf>n&1Q<Wcy{PojAJ{314)|J}yFvf9Ka7jciit%&sZmB96J?2Wv?Qt~
zH#Gi&Gc06dnn{5J<rNy|IG9fG<SqOW1qJ1$S<$j;a26pFVQy%M8%0BqiTVk&!sutf
zb2h!W#DrZuIUvnE8^Mt_ftIww0O#NWgPFe1F8*2s6O1-4g6nKL9ANDDSv&!RVPsPp
zT<h>AJomHfj=y@NN;aheew)`(5)z?`lP_r@=hGC$8<|kTV~=1|_5cNgX@Ano#=A-c
zP_f(lm}jF9RY#XT2_gG7o<wQv#dKth`$G>`$kt21`M;;xBmn{OQy?8!(%p~_+i6au
zpX7Zf+uCWAUvMIph30S=6_X_`1zrT=bkrD)q1jXOspu<?!qE!0ng8{W8ODQNxY-gM
zvBw;dpD0a)I3hj^etX}f1EGAHeY6AXp=7*DH8y+Fd9&<Mnt!#lI%E&r{LA>ror}{W
zPV6Jhtfa#^#t_v^B-)|SrH_COJ%Zrx&dXrK9841(bUP>^w`{jEM~{}sO|(RY@){~B
zKoCkMam${i&j4o5a5f-;2_GP5+BKk@>CI~#^UzS}tte=%x8z(4^ud#^>qC&sLVeiS
z@b}`-+d}<J&=^Nn5qaMrULb;mwM0Cj>DZ4<tBQAs3JN-Hn}`qA40!6Lz?g02u}o~M
z{#mn>_80^~k@*}e9%H^3{2H{1bkZS-4s(x?G>r~2T;<blz!n<@SH%CGLdzsVIgkuF
z2yGaHV0ashqT-5eIy@jMB3g8+>v0cYaL=eK>-e&<vM1x9Y<!5va-Ob){FpTykZx`1
z9oIJC1w&md=4_4ENfCDzGCBu+Z2K%pLn}*<`N_Q?zbtH0@a#<YD&0at8$opJjVxG;
zX9M~{3B;M8<bJh5IsC~1=Fl30bGK%Voz*_k?4!)#cTLtey7?&U8{I5(eWRPDt*-{P
zg!M&WpJILIUlwf^UfY|wGCr=dw&y{)kIz^iN`%S_5DQiEQx{nlu2#{~vy_Ri?nxoB
za+0ewAm6OaH|{jdcXt@|d7hw=+1gBM(d3$0Z-<wvdP(If!Z!lX)&Pf>#e8Rd-RM@U
zM_0x;4rujb4M++iI27wkV?a3%Xy^<mztjsT`YI-a?d^=+VKiOs?uO2bidC8P9epz^
z$noa2!}iGgX3}@|nj_$2!?w%i?3Qx#d0Up4nrvH=8u9x#-+1n;JWQKJDGvuWpO=I$
z%<t?uv5sVF=pYKl*)Zwfa^|1@WtNeQl?LBXZT4vRjb-#in<oN<bMAE)nRHJWLf^on
zVuGoM;5)jd=70$_W6~AzI)8|NXXoc6<P`vZNM`3OzY3<HXMlj_F;d91?51&2-Nlsb
zlP4UW%w|HiKq@OIlIJ87X`+?U(~}JLiu*jIFX*O|Akg2ipi~!;jQ+yX*#P3nB+4h#
z@u4J&F5RAZQ_)^QkDLvt(buOS>x$%V5BRGUkqHX7LQ!G4_ADWw2JiOIrCHT9MW?0_
zTHF)F!9{{bT~U^U?wZ-E3`2u0%uvXyCF!G8t+R+p35G6GBRQ-`R7PMYCLdS8ar9su
zJsLt=y2{E4w`>WCeFRu+Bk{q3Z^DvkMX_juz$aY^1B7*FsnH+0w4iIjX6L^wp6G__
z%0aemk*M329kwi)bRua06?@W?#!(L23pNP8qP6Mpr~kxkn@9=W&L(JA5l~2iiDi8)
z_+#Lu>SJaU_yt3|$fI-$j3t@eS}9wTv=74+Hz=cIl3d`XmR%(8NFfWdw{cEPWp2ul
z3bL5@-AKbZJerXJuB+U79u~gPY*fcfnU3I4xYk08K97MX$Knp<=@#hO@MkA@_<(Lj
zZpR7b@WqF8&8f(SV0F9rm24Ftmrmj2fMxtKDz4+<uw(cO4JbG8RhnI+8jlw)<kp7}
zfb%@XR7hK>w?Dn#2_6CSxQ<DMr-XE53XTg|xPZ?aPH8BPk$7P0^>|N|j1T~(FGXHC
zE!Ml6poWEX#^Ci2yjQ@GMl(Ks=pb^avJF$qFGM5!8nrhXQz7p6m^&sktFyQtA=eZW
z80XnpbT+&NPD*?ex`^IJ$uOez6}%!E;?LWbLfApdb`QGWB{Mg74x8&DCR7!1Jd%5h
zuCYcD-fI<|;4z}Lh?N7_c`_lE4I|C~(e`uzT|lD0@eUdDHN%wPz~VtcM;eN3E}>;9
z%!NVS$4t=0bVSp;P7oS`8nIs(um(aoUYKIdJ3I)+9RKh0Mnji7eYyd2*+42>1tRl!
zbOA?&PU=2i@Zi_-qTXSKSD3x6I)*jQ7=POuUK5X{I_biwn)9BfRpl8o%QeQhH2><$
zzoq#9fLNQ2UW_Z<e`fjrJluNtt?U2uaO=A-{y%?2|DOlJqd3iyNpLcW6Le45J)^%n
z1^?Ya51{|P7{S@_=kx;l>E!gqv)+s2W==rH*NuFDc(eYL!os~6&+P}qFM~b-2?(xo
z;9(~oJqCGk4Z|o`IWvaw{WR^jOXX5jK{Ih5?iu}V35aYP*tN>arC3U3Pij7eGc3A@
z|B=ACZT!Aa>96vK&q<sM_=oq?93<4YaW9Y0LH0}g@h*QWYrgb!(~4qhy1I`*rNk2S
zZhqhep-OR;+VYzntL*b4>x;_te5>1L^4{Klj9b}WYOCB`GjR8)@!cnq(n8#cQ0Hr_
zhs_d^?Jq31pTEjrgK1TAjbK|?mc+|Ppw7;_f|!l_1&GjGog&K%MufIlmG*)5gQ?~S
z`%QMXAReIoQwAhrCbqX<qPCC!X~Gv4sph2TP)#gyW7n{Rrl^>Mb2S8M@wGWGM=V%w
z<vcW^Nfq89n|lGeMw#{V@Z%8wLq+KDZAhQgIPTOy_SKuh=R3g+XddJVW(^wUZWXT=
z>wAyNr0XJm<uTw}F$T~VCm`d*gKbqt#Cwd0Muwv3%ves1tB<0l1l$+1;pbFqW)VhV
zJdp<JCZ$N5GK!koqYc@laQkaU_PY3vbA;DaDy;!hI$-Zs1hhnGG@O!`rR0K=mDI4a
zSrDt(odkW?8g{b_*nNnCbQKj)jS{3z^`_7Wmb7|Nh$aike$W+;b(B?duG*$*JALe$
zrqvwVt#c~K%dB%+UF0+yhuPRuB@~}<@kB1Br(#^4Qk2=a!`#^n!-R(>)122mVM%2N
zXLVCuh|r;byCR{dpdgj&n!A>uDq3!)4syHRHzW1$Qf^S!8|2Z-{u@^toP|zr{Xwqo
zBWC(Fm+YHWZ_pW+fhfD`;0Fj*w=Ys;&vytfuL>of)%BcF-?E5&aOcqPy9Itfxj`?;
zK$;qZgX1G)eR5Khf^FF{<fl{V`y`0hS+^F2-ELOK5ZBDxyQO{1L{y3w_hE)WSK)an
zwv(@RDbMIxG!|ld`CJteg-p>Q5?}0Nfjs8DI#mzAZCkuf)k4)QIOZCn4*P8jU{*0f
zfvB1=TI(>OZuHY~SZq+nu)-yau~kN{N6F`5<>GNT((}{HqAWnaR1fRSUvgf%W$-3c
z16Y+k%wPt2HUg3)-x<&od-%z&XRuPVoG0~+{O6gbA*sSXX(p&PAF8`6mNn+GER2J1
zA_~`!U97d9f~~|iAHNInWd%fu_8G<jIchos8jP8>nw2GtmARJC4yw(m=AsK#7#`JF
zP+pcBysT|dvcf9($`Jc>J^(LE0IU%-HRf7~pWmKqHE&|ly%RGl*?Q+^W%C(-=ZD|K
z8hj@lcr1T+X6SbWc6USaVi>7DvS*r@7a^P_+nnVPK7!G?={S~dlQv-A$tZ2&YFSok
z1Lob$(h``kbZ8p7XS%$<G#=;ru}g!}Vr^JM07wt5*6@=pAmZ;_{5`i3+wJW`Td~VW
ztYOH8Sv{$>WsB)AYcMbC29wVkOl}RP%C_#2DfB#6DrH0UX>NY%U%5rC@r!WVyO`-0
zVYajE_6r<S;+JNe!jT~tGvwDuR(CVx&*6yh#X|qNSm;f<{Cth{>VW!`MtbK!Xyb<t
zU;bjdZ*IF6Tw)b1vR@4KUkvqM4E4WPL%pz`y7qa)Fh8d~zQk$iY3@F~gP~p=C`&NZ
z-!ZGD;p+OuZT~a!|7TLD_Wqyc|G)X|W_Qc=|KEDB`NjYLi~s+h&;MT>FHM~P)u)X-
z|9P9veHnUO#M41$MmfWo_%<1c=w~DKGey8s{F?}nj_JIQ@{1{IJp?pMIVt8i?kDGo
z3Rgs6f8~SZuR-|3@lP_GHyDHb(GEJDPD>5!$xVcs)HeNrYF9@nFN`T@-Gb2^fGqTA
zM>RUl2HCbA?dl!{;%)48MD9^fe7!zFy#^FEN|Z4i*hb~RdKsFhC+lglu!P;+Jp<z{
z($&funsFCze*BxL1yMF1y}-!K$8ny#jR*THW@(7BFendriLyO0bQ1==#lN&TZ(Di}
zD6ja7@ve)>GNk>0Kb9gt|0Ob$hvrn<qEGpymXn1(d=vMl+K$D5+UA;VZ|BoAjKm8|
zBJ$-^QMEOei_f*W5bEvHV?HV`!XYL{Qr5PrgrvesBA2Ggf}Ko9vPbw8`0%K9o#rA9
z05}gtK;OT6dRYHF=mErVaYMZzSv*z%WdHg}prJ31Ry6o|NJfw3qf?G^<4`^kOr0ty
zoeSnX<gM*p^eQDqe9?uz=zojre}~gSRHXyXi2wff+wP{L|9!g&KfdUHU-Z8}p8glG
zUN;cx%1<XxSY^3)FZkMiRl}{RF5Zu==1L+r@M?<X%vrdXPx>I1^e@FN9##MqInxjU
z+$$wSqf_%70h0a)QxR$qqJ|QoaQZAF1{j1W_*eu}P=s?JnaQQBuW&+djub#Eh1uIA
zpG>1+@W0Mip;UO}2Rx5HPJzDcP3{G%o+Q5%iD}C%c<9G@HkNwXF;!`pBt|yly+EZB
zI}mpkGLMi@Z=*iJ6=_wjsj33kvQW8sSWyP?!59ZxlUD<E4?<=CCS0Bn6Wdjcp40T|
z>l21@HbwcyGi0{UV!;Vs;4vuK{4s6;x+Lvhl?J6h4r^h?qiB4Y<#7k-B-Vd&dZPrB
z5>wh7`i)YI195Cp%?*)SE$Uy&5~-XiD51&}An%N!*M#br0keU>V@~IT0D>vJlxdCu
zxhwM46}U)Bn-j60q;I1<i6}Q&#Dz<&0Y<~?=(Y&rf8(?j*z&mWyr7hA5=C%Q@&z??
zwyI&;m_k8K?lg<iR+YS`X>CV-D(Xn-_eTcq_T){X{F+EXV^{YfZsYJS?&}I@Mslo!
z3G(;}<tzejIpy?CV@m3D8NE#a{{p2{upQHAakV>@$w@*2^<l_`T###Gg##r0BEj#Y
zG|r~Q@V3J&MHMCimisoL6;O$(R9Z%uz%_0o{1lT*Q5MV*FzA~$5b*?70*GTWO(RZr
zjW0OkmKqSy2p)9A%hR|0J}8eF!UTeXmN;Wj#zslMAp4LoLY<?YP{>q{Dm&G4kmH$_
zl5>F$N~$%bT#>S`oIe?4ae=A7gisaq*VejS=vku*-wJdq0#%F2OoM8)(2$DgJnqnO
z((Cod!)by4$3*pT#CA9DKV8vf<rzyntz9IjeXgMw;zh5_*awtB4?FQq{}Qung+yB|
zB#Xbq?3|zg5>?XY<Vh^Aixm*9ugZ@Ep^5@$vO;q3ulTQmHS&jIa&%#cJC5jPoF$wh
zQKl9|j>t4zNL35B)RR2N`WZCHDS9X1!yn>_kWaH|ItlL?ZMEVD;}(s+=2j<Da?EHM
zMN|oi{W#AlJL&fzo-p^KoMcueNjl|3AKSq<9zS@1AjU>xhI0xwi5No5|2=hYmqM21
zsKs>ck=q4W0}qeEtkUtd!;D@V_)$gfk>R3$O^^^PgCD!r8U$-=iH$1X0vIRhz4B;Y
zCvVz8VwNFpvMANdqPd{C9>HI}@rETp+obBnH~ux8e-*3NvEfSUwS2!aYvTvzo);-I
zd&I>!aWIQ3y(|Y2te3|_%t+aCwrBOJV>pP|?kl*XBRH^S%09B{7@L|FMJ@||Fjng?
z_xJTBvgg`UYUbSMntjy&_cX~%vj-T%I39;xZz<#Vj?`>Qo3a9PPgFMFGu}StS8!sp
z#CvGGA^KE)@3pcp;Q6P5F(^8dWz>ZL+Mnh*o|gl8=i_!nb$_@`FT7`uxYd^8UXQ)M
z+FGU&PsEB_`y0XF&z?#G>ytxd#_gF?&{ss&lStGQ>!%|IrteM~&ad$l^kBKqjwu^V
z{z|!qSCm8)=USdkhL|B%3tCz<Wo)1p1zJL&gjx@3NR3TZ8b?%$)fcRG3`Flhb5qd&
zO1+@;TNGJ8p>labm6k+wF`mkfe2Y-t0jmNmXBHNt+6&gZ+VxPb)!ml}9q+h@-C$Fn
z4Yj1{u&o9%1s*WX1qz|rKL_3Zaqpi8FOLo$J$(`a42b~2@CmA)+lw#M)O_8DNx{HO
zd-403q-$!^9>$;%E7w4y)zCB5OUECtf&Q(WtyTpc>`?PL^>D{iuOv|j^t_S#bxt)~
zv$!%5oG8k&32Vl}&sugKne>?=S54LBGa7JsPkM6c@$NK&E#5nhn0EaE)$#i|?F*j(
zSF+?6n=b76=+3{-<5+yt2v64Ls5E_)cL>6=JzMn~O==~>+M4vyHBAVumbsC|+DLGv
zLC!Kh7-Ll&v^-kS`EoT?$NQMx?T02Fu;{xx8OfE*Zea9e9~{Fh7??F&1+-PqbDzbd
zBAy%+w`t#)1^I#c@Yhy+WqBOgUJNw`PlEI6^N_yQM<V?>no?m_o7Sv16${*xCM62u
zQ5D5$MafJXmt;thvOBv5Xxpa2A}P3$Yh_C&g=>L8t4RD7!~!;Oa`&Q+Z+tTb0W~p3
zBZLEIAQB%fpCEua{?9Nrqv(Z4&rf^L4u19)4HOL|x0<)_wmS8>vzcQtKb<`s=^1?{
zZZ%rlY~@O{%^Z_P92_6D(fU+el1YooVcm?g0zId6F)UvoME&_{5i5VSzAry=e5A`h
z{EZJsw+GeCKAdVhYj(7#E{v}YwmxGzl`Oo3BICnaxXrk33B3MpXMxZP&|CZCTY;qk
z)=%Tk#3b`RvqM}jJr3z{*yAgpgO`Ut_IfL|ycRlPUQ2xt+_Y`H6ut@>|7@D*p+tvw
zmjk3>(%Ok+YQ)>LZ%z&6QV7ywqn*WEp<EW{Av<1*jySQj@}tu9jcFCAr(#~4AG2a2
z@iON+c~?Yd+;6rrX8-LEf$?T&kOYFm`4ng{`~UE0Xu5Yngt0grA3@97v`Rpdz!@oj
z*j=YjCRqik6r1$@(a+DGn2h-T)=mSEbQSRwgq-0a7IYEebhBVT_}eO9lF#m77|n+N
zuQ<4-QZbYKR#hj725+Nu5?#d7TVW`R&Zr_OSRLgIM2-u2sVc2Txsd;Y;eF8tuS{WK
z<yD0hisi+@B6N==d6{RaJxS9If@%Elu-CJFAS~K%)hU=n*(XLvRYZ?k5VotjP_Qo(
zs3#aEJL}<<r{TZgyA<$W2pCF6{Y}7Z6+~aeeX3oD9P1*UJfDtG`+J7hzQ<h5Cd-Q%
z(oflt(-~O+$f9PKWGwd%GC6iFHvMs&Qy}*OT}L+#jJbQZr+>#G*uZ9ur2w5g9st{4
z904T|v*-V-boTpaoZHzonos%H*^?V3>D@a*7(25_8CHO%US(9&`kz(k*z#ro_6Q$H
z7XV@rInXhcsfzn}$uyucfy7M&pJq7Z0X4iu2MJ+^*Jk!4r9xwL)1F<~tW!${$TfYO
zF|&E&|2K{E+e)q|CL4-&4sZO=f1Eee&GT&Xl1g>$7);3yZ?~99$v#q{mMas9g?cn}
zNKdz?2XNl}$Y;Ypp1gei<Y~xUtQ}nB$$)FYaZCD@$sEM5wu-dDXd;Un{||I3pkVRQ
z-5`{pTJ0cI!0-<R4*w9B6&<C1@9*yiaB%$-kF6h0;Mm#`&;`F1$HP+)00@oHq{QmU
z%Z|j0?chYQaKY2j{36|u_>QOF@sLDikv&0Hd^E=sGT})+aG0g%$pwwkd4_8SKUy`C
zX{7@O@8gqrQtF3c?yOV^qxvR&#{IaYKtCAKkEF-<@)vKivw3QX*`AeucK>g2we58Q
zpArB2;NiD!{O`Ble*5r?|MwUF?>}w)@8c)mAN=(6be=e1-NYXdyb(OM6$Gg?jv_2b
z9S5PPbUM66S6-?s8uZ67$t;gs@}@z+Fm4-~YlHI|mUTo{#3`V9BfbT7?aOniq>T*D
z<*v`2tHk#o!FWr~65FDwVeDFb0C%)dE^{b%v=T0DD0A55)OuhW)6h1rZKTe6Rc-u3
zSNmz2Qn7gUlDfoXI)}$UnY*TI%H(#F$aIEuEn`0Izu`RUO=fqU(6DP6p&CuDt+H8R
zJjMtu3l!f#LZ}SaP!)(fUQ6B`5K?R6YlIKMe^p)QI{e(9A3qI@eTz(grk!AEbH~cY
ztR0D2ppK1M1If27PSK`Ces{sq$w8n*+2HJ!_4K>%8EXo`2G-ZMkT$zkaTE1IpX#5t
z{C{%WA1)UE+1&!6zZC!Z-GeXk{~tj9e->X3lHiAIm~e*7K@5#=L6Se8Tw`7Mr;`DG
z8oZ2#<ICVU!{Y6A-#wjP!H&L;(&8$q`|dDHCsBX09qbZm|94+}DRqjnV)Afa#pKTr
znR@c-@adePsn}$$&{S^V(^-Vdfvm;yZrTuOci<R5gA+zPE8w%>-_v*+8{SiWOl@(C
z|D`v<s7F6{WE+YHh{haQg|5;RFUUb$0}desg43?q7=8w)(gd&1hPQ-TmIDPqjFO{(
zB_J4b!Wt&$IIC1Hqi|SEvui;27|wx09&hHCddeLEnCKzR(seu&*Qs;S58dlgj#80m
zHX_5Ab8c!cqfwMjNy*cSkZ?DWNRwFE)uMQ?+UCbBLG{Lyd>5bZ?~56MYBrWwB@E&5
zDHKgOb2*r^h=-K7amANC1wQVbh!6G+420&3fDD4$(&ov;_2lQnC&#BpFP@*^BzT2G
zXXV499lYLqV?2&;`tcY|_`M-)r(h`W0V&*xXC6%_*}VY6GKNMD7LXjBxG<0iF7xcV
zr$B}}3&?xeis)47>p6&h(M*_rcy~95JJ&#QVXGo+$cRU2wUqQyIr~;i%O<}G7d7d>
zM+@@52F77j26bTabr1;c)Tc(GtkeCHVtpE{j%~FGQHjC4dj`F9fI$qxv2i<_kZZE>
z1q?ZCX(X9%@~DWRfhT{9rLvDL2lc=mDo@UFHk*U|x3`mtNGg_0_KfkJ#TQ8$iq1t?
zQ!@E_uX=KXqVZlbS>Gp-d}r}V)5zK5mQ>q)K!a{mJx(Sn<1ME1&5f&ny}dJHRK#>6
zQBi}oP9&OloO?16;hAITA<Pq(UcfWhqKGRt27rw39Mrb~63<0EDMW<5>@m;au${D#
zdf;SiGf0Xkl^w$$AjJ;`1*-_SeSsk<9u@H%!~obP28<r%prk*=bV`z+Thy;N7>@-g
z1`q#({<DFRDh{@WxN>p7&+^^VcF=8$7G-cCmBMtlw=ag-Sv2hN_%N#!JniA9Jqf5}
zOf$h<M!+>-V)yzNm(sQZyE6a^_S)P+1)h7{J>3rtz}RekU!eg%=3(s4H;)j8W8?rY
zp9UIpyz$0SlJ73KGus^X3^ZWQEL6yVHtnF^BFX0$CBoe^CMj_QE4ja;na2_ai-ox?
zPMD<P5=_Ex0ygBS`QC*X{fiSa&Wao(zjwRFinm(Qzw`B*ko&?6#n0jqpAo}&5?j#%
zqPVF1Xa`Rx!CXSmcg6(atV8PR0JQ*DY@xgCqz7yQh=x)%r_<pOKX77JCM6;Yc5C;a
zxn`?@L+<6hG<(Zf!4m#%_CuHWXkj#(JH#Gw&d|X?;K1U#;=n>Ow6l%$$J6<2o~w=(
zem>j#-^^hJMTUM3!ruXho=hm0H1SZ>K~NqYkB1nGh)!tGihLT9LYNJv3CW!0*f5GR
zzKf4EP^xWC4ptX4vsdw_yBXh4t?`}K4vr)P6yuTE?%5luF~w1Dsh`qdw*@|??my+W
zw?P5zNBN+4odesd<N7pc$@66<b4IkB4-TOx&REP<ZX5HF^g((e1y{$Us+1s>AkyY|
zVqg}i6M^zf5wO>E*M)L|P)7tVW#SP`#B+ujC~h!*H{)2A<t(^$#WDASd!=P)G>5#r
z5aA?ur4Ce4=Oqg3LZ^x}1X;)kUR7(u^hEwfM?woeN#Y}G<KoGP3_s*n0~%0vi+17o
z9P<E4+4h^(;w3PQJEdUt&UZ>nGrgO}ZreV6XCD}0!ugU-@z3l3!}plSaJH9k0cPZX
zyx-mII`;pETU*^P_Wv*bKYxz=kEcJrd~)#kWKKgnx9T0gcrl;7-D+3mDkL+ToRFlY
zH#eG2M4Ukf(|3XFQ~+WyR~Ih;YLzL7WaKpq%E}5EcvLR`07)nBIF3+s)o=uOvPS1k
z%Ada7m&&*r)G2<4)uFQVk?7WWxr^;~R%Bvn?}r9Jc^f?M09<VO`YvJA?v{O15o?d4
zJWtrXPgKTaj(Qa;V%G&x9BjN%maD{jxC+;pf(bRU;R+|IOU`eDOQ7kq7?zhpz6WUa
zQaNID>N3>g3Ym^m_HU&g5)Z9_drZiePB?-ZEvg~?jb`gE$ZRPN0J`akjB>u{>CkKv
z>|>Qaa`Pp)QVjZE*|#4A_r1T*O*f5n4k(8)8dOtj_-{qRgSgi@k1_lncuKe>%qDKK
zC`^~t3f6-yErh>C8N6$hezT|cHdqU`U~dQi3*US#ho)8S9hSioN|wcVxRe|y&&Few
zB@@;f*H+y4z-#;qx0trv(vYWV^6zQfLjvE`84~w{VK|z~lnB-szgBA`-m1T?Y+wDi
zy$-#(6*mGFhB5P3L;PlW*sVjaMlB#uQNw0SB5b~?%9LVyk~-}L%m|Sf4jpmdh*!{H
z1Aw`+qHbw~t)oX-Md_~bt$r=iEt+`P7f9gsh8-N7adK0!sOQ7zLYha^Saoq&qTr;j
z5<&WTLWCEWIWc3w3CJd&QYtos7sFc|8P=N>oe9VcEafB*M2K0RwC;hr(sz=5(jiD)
z$<$rQC#~V~K-<_F({!q<Q;~mSG{uYt@Vnlo0u(pw$eUmb=(fqBtXGuXOmYj{nC~?j
zU&4~${GuGNkMfJYDBz5rf=3pmv5cdF<LKPg==jcw?*&wS8PMr;f`j~`Ksun=$WQ*2
zN3hHDe*p(D!BXv9GqWoe8!$gu#Q@*$)%{AFpkptfW~->i^DyG4#;!{B4oKJc<vh(D
zfY@5vq{K>i=Of;&24{;+<;=JnoNFo=FUn=;tza3CI*+8?qw}<f^nM%4rmI1!^Xe*`
z0!En??#NVubeK<*A)dOXC?u>DNM6~PueD&`-GJ1I(gl4&QYq~+wFk7gP|o;*sa^8o
zG8s#8*6c2iF&%Ot<yg7<(Awp~4D|yeM2l-N$-F7(6;5eW@NqB1|F+noEk?{aQ^aS#
zEjQ&zW6$tAXC7(d2tGR_!iVwu{$yU>AKj;WM$aF@g}ZEBtIRbZh|;9}<s=&LJ3ucd
z!vHxa$y@-G_Z_uq_K!Y`LF30;b2yUOEh={s1Xw=D!A9NOmteU30Pen}J404_^a)3Q
ztK+Q#-}Z4Fr|fJafq*(Mj<S3A^kHUB%Q?<PPG(z{;lqr$?E3x*ARWu}@jdVv-U!^A
zAxG@$If+x13l~v-5E!34RPBq^bXh3k8o7*_9-ckDGa;E7XMe*SLi(SD;<;PbElD+^
zRX6vlg`8?+pOullZ1g_QEXrw4hShWS4q26aKz(DFU0~3d6g8;+v|=EeBvcFV&rSZh
z9&`h8Mw3Z#2Yg3IMh#PV!4z~1p`&_T?{aQwtYy}z;n?E3kb6C<$o?fpcj$g=PInyT
zIL6`kpiIS77Yw+~Ig3)SH}UjPbuM|l;(MAt3XlcrVJoTz0oSSM1mBY<6V_UYK-U_c
zuV)~hvdr-oaQd2#h&J)_7JGsQWVlicyMLKw1vEK}42Ms4C86X5==zCc;M|@hSBjyD
z!WQR1E(!3@TQY`{)via9jx;g|w~Mx@#39xSQ}mNp)^w~B8l0)2b);3mK%6Lfo~j#A
zqulpB8mV_96D>*|ZI@s$q}IMU+y-rNK-{z)NC@XPNjSCgU`$wGeYooi$oqkMXQ)-d
z?jBI}fBs`j3VZxejZd}Q-Zs7?v~apAeMO5_gU|%lVsJ)*g>bX2ZbbvjKqAT%q7gnC
zq9+^h7`;N8Vo*b_pqg;2WztKDVlYb)7F6LB3BvstiDr^zK`|onAqz&yB)NdzB?Jjk
z=9J9d#`z`AZ%4Hq;!UyHHkd?-IRvzOFl(8>p2K<}!wk94=qwsWsmPRY4L^oh-y>q_
z?Tp;C2}E{o4;sxK=n|d0O$KDliTZg~6k7YDhJ$R1@0jdJ);h=HNS)1K!(7%$R9ul{
z)a6AL%fs}q8{=sYEaPSZ@@SGKNCN8b=q6!W-$x*3hw_|evSpYmTZVwEXssdqtzgyL
z2Wi-Pk7=?KTRFbAx;6J>s9z}Rw2Y2U<NdwB)1n?(76i+K)kHO$RQGUgr(<kaYGZ8?
z#kw`22NAIEJnBndlSwuKf&-b7^KDV60JC?+;6<6IV%ww}TqXnRo>ehJ?s@)pKQsmp
z0=D`zq-ii`Fg|HpJf{WgY5l#UYt~%LlwwJW&JKme6p))|E;+&MhZ%ax!qKKG>BxFE
zgIGZE;H{(m>3KoMdHl9#uZr>4PWk;li0Un~zJ$!8%2!P{6LaufX9ssM9L{_|rdZI2
zIS~e3Xcpiyfy>CQoS>)Qu}Hq3MLCeKP4<eiO5+D9$0{@9-As+PQ@LZKGRTaLRom7(
zqefdzS0urydEo3jlw?l%45;R6N63<>N@8HjjVVF|CzEX4aeAW9`*oXDwlMjlJ+=}<
zsKJkgSs$3)(xR)j9%}bM*s;bdIEDPq%Psu;!UNjm8#d?geB6kG*h_7UZY|jCY<7{@
zY$j(eId0$Rii^5+G$ELDS6+!7=feh=<Uu;jcGrVbJNU-s&YlLoT0FL>1;jbci{x!A
zg^AeUL?Ge6jSIe;<)UL)<jQ3=n5Y9<7A%PY01~td5QR9vOMNbDmtoxF#T0e|s12$p
zl>}vCZ5T&yW%Z9iCd(9>#}$u2-m6GO-6S{bM$0;+TE@Lxt<23K2`|ytoSPUA0PU~$
z+QB{J5&R_v0>7QV5l4y8jQ9`Ik90i`Sbcl~+?Rt+b}iY$rpObHHD|tqZY*(8w55&*
zOPGVCIlR^6dLEiOnL(XA#EyLh;mYI|BRd%nlZizcD6|&SGZ1>EI|VAI_6dY!uqP(A
zCU}@NT{OpSSyGNA-6`(m==?6)-%u2o@gRhr<+OwMt;OL81Ohk%(EAc8n3|2mu6eV<
zI_F`Qg6N)5dGTvD$B@KfYV4{2AL$pY-SWZz!SvJyx~AEs(*BgT&DYML8inWD@?u;|
zSKiR56neHvmhL}iyL&eA_Ou;{zPM2S+5JC32TJj@dYTlA<bUfv{O-F_{<nt@zW9Is
zA^ktU{gVH!KL6X%Gx+7`#q-|NqmzdD-`pl%pU&0Q;D6+0Hk=Tv`2XO4_|Czzk@+<U
zM<9KU1JNyg)_a!WL9bz)!O?sU(oEb(JDjWuJfV-peE>z+%ajWAhrC)G$WCe1BH0Xn
z2YL1?pkN%)PZ5cM;-4E{uMTnIa=;_sg@P<`rMy$DDb{AMJ9zr9gI6cwGGWD5Co#`3
zhHAckBNGMi^Q08Smh1=*6uly<_{(p42F)h`a!_<ja@?*pmtE12Ki&@vUmG5h0QNQ-
zPEp9l??@=P(-0W+at5MD@))<M62fMW%Ul8;9EL4bkLq5_jD`Wp{Qa`~o`o0B2-r8A
zY468_liuNr=O?F6em;IV51C6JA=-4kt2Db#dyITc_|Zep<Jn|W-Vx^d#W&-eeH77s
zOH5QDtz}=WN#D`;CZ#QD9t~;lL(OK9>A>K7FeR+E@G%x=$%`?2^dDeJmc0aW0>;pJ
zE|aV$4dS2wQZ%2%?_)_|lN(cs|J`r}n(yQwk=nfRjE*2@HVNLZ(u=$EEzgub0@J$7
zRF^W>H2w$&%^b7*`a>*eI^X{KI1^jS%q}LM*YfRufYp<8{Jgf~{)br3;b*p-yB}aV
zho9wgx-+egDf~3zyhqRdLkRXuo7d+i;@3{>cPH#GJeyA}_E*-{`IyVIbOm7fv-%{e
zz?|cExj~t?J2Moutv;P*@JZ(Ran*tkHpLITl4<-WS;@v4!tZud&Z!^#ewT9ihhNI!
z?_eqGD$=PdyBgn(X3iV#E#cDd|BJ=yKiq$~_0Y}#^lkUy{V(_bU-CcwiSPeU=DYhp
zsjT}p$Hjj!|E+&a)I!kEwN+g4b3iWpGfp@^QTeK$0qYC?Q_45>4#4o>WK?gYsgjl$
zlQ5J2zsg%=Ko&_n2w=g$vOEW*V7`|0f(D0Px2g#c0l6knadaFk)>mg7772P4QrsKC
z!$_kXJ$p&l!qrCKS==}n2hvfKcx=uB;}s@FFt~-a0SYV<AjI@yyLtxDfZ!82rw>Y>
zG7$mW1$q7!umXcAmKqmct4UVpG=UL>8Gy@q6C;LaIVYHht$8V~$VRya3@+k{=f>Y?
zFR%UK!(D)-L2PdeYZ*BzHu6e%J{=vg9&A6y5Fb35UuM_OFs@t)2bAKKL!8*nq{rd{
zvn~;i*1L>{W09@IBY^0=;>icAXD+f%vwaXn1ac0L5{DkS@F2J+HTt?(W=Di6?io~r
zRWU3HLpdYtN;Cw24^j%d-3~%M0C<wN_CwKqD_Bzl7&L6$HgO`Bk*H5NOwWPzGJe|J
zZCfuUg;UG%woMe2MdQZgb9Z|24SmxX_D80tc9~jAB(gat!qS<DiEkz%Ozs2iBFo!~
zMtJA_V#QHnb=18eWT&;gy&zrhwt)1J>^FD(+Y3I~4-LS;bP3Gb%pFj|mRwFEc}?i}
zXdW`3f0<;*oBJPd!RA`3Dq^KTfqF%W3#fB8Z<x9xb3J+X)6;{~CqkHd_T&_ccY7wx
zbrvQ$=vC*v38qbuJ4}_Oq9b_Q$Pk24b9vIIdP2K4fVOOo86jw9GSqy5=8qy)9?Pe!
zYQQn7GOWr-Vw7L>#(74u2?8G0oNrv-((VLZH2p~xaEMgODAn~DX6V+nJ)PooM7Ei#
zOi<pbRi>h$<qJ78m7nC5@{YMT;+ruj5FiUyK@t_Ch%K9z00R^I_kxX8adNG0v~+5l
zNjAu~u|A0?b&uKKaNxE}AeFR#d~^u1Kd2Ny#3$Z?Bdr=^uN<)c1~Db@Fmsx;E=ihT
z%so8Febl-S(;PONps;*Y74M)v?<t2{<jo=|2T@5mK&*bO9)fdFBC&E?a;{`%l(nfD
zirF~Y7{h%{f?7otZ{BjgHnEodgUw?!VS7+*R<q`u+vO7`xmhO&&8t;cETx`kJoCmq
z&2yqwFU}9MAxS$8H-i#77s7>C3ajYM`Gd@Ft|Yl?6-S^EVWZXkkT9oGpnVX`NjdRz
z#~fbqErz&CFZlTm--R|LOKWsE@c4+Y>>6jP?U7x2y}hl^Q}ot;FTFX8(fdW^6fg{I
zGUKDzfAHgzEzp}x5FiEa)2BPOCEO2qce7fk7!G@EGWq}6`_`^Dk|fdndifRAbI!6Q
z5JE5BSbZ?y9_|?93)s_hd%Ze^RDjw-(x@b3Pmk|!zY&q|tgM$LFmBU3+l{g^BO@at
zBO@X+BLuJnoZr`8R858deLYsvh?O*BB^xiQ(j&l>Bpo$5$qGlzu}L_x%|zBPhxi^<
zV*IALAgmho%o>f%8qLfa8@z@m{_EDq7&XLubxR>ibdY4|ZTkpEIZKM>B~Mn^=26uc
z>;fv9H45J8tQy(F>0eJ&Em??K=uiSJ>X-^)ruxU&pp{GOW64V6?B$$O`xNbKiawwB
zByK(!j|KJH^^@GQQrYuK2ko9@Trb9V>JELFr<Cw}`q8P!o)%Vjt;h<Rd_jLql`VN)
zy2!S*jJCCuw)Kp*^^~@ajJA!Gw#|&TO?O+Kq+{0|BZw4WOgHiu=IX)_Y}fta{a<2n
zD~wn(n*8wBYIcgOXOkkP8IMEibOabY+ZBcFiu}~eVO*H1uC3_WYOby1+A3}#Lxi_V
zutE?SL;yR*jsUpJ8gW!}S|v<6o5y_Vv{~#STfUREA`0e`H^VuamsPR&*;(x9i^97(
z&dv(9Hx3V^`2F4v%NBD&_RaYsA2U>8GL5;cDK?Ol%{ZmNHF_j(1^mnTGDH`is4GFA
zN4-0B25}?~A>x8vR5gt6lxj{RR%Um?>{_hmB8^T7vXHk`(<pVD)0NDecfivJv4cF_
z9F+n&pARcPiqe)<AuvhT1v*x}J$fA^2Jfw|_LPq6Ge7Nby$@Jb*RWFUrK5BsSj_UW
z7qHwoQRoL&R*r1A`ZK7iGYgkSZ=oKF0abB8y=rk2*05lycTB#La1QC340|xzFent6
zJa#PWBV-wHwQs6xM1-vMBTaI1s{ZkZ4KoJFY6{T4eihUd_#a#XH4ZK=j?XVCBg*g^
zs`;@_SG_vYnv=ck{p0Li`}E-GxP8!mdw%xI`N8Sw2@vA+1R(zHxRhaiCr`HH(Oa*U
z#R2tX4p4<PDy${vx-E#5tFWGhX`>*fs<2Ll^@l~@RoKWHQ*#kxQiTmFY<z=(x(b_F
zqu+S2(W}BH6*l)W-6BVc{=zg#O^aQO)7YQ%oe5x3#l7S+Owcn9w70GIlt$$$#OR@S
zQw%^FPB8x*010Q=gYz2dYGnDW%wndc6oG;XND-dvpl?=6WZK-{f)eu`+$}u;i}DlI
z`{s5fq5LWAz&h6bVc(fqnMKT}X<W{N^*1mv(^=&4EX?$K-8sz6<dAR7C*?FGvtzzV
zv(Np;s{l5O!(-961QSUYyIVkrrd{O|=sIpC@Kaki9Sb4%;AmN`^@jv0h|Y1{Lj~f?
zxb4B;BBWi!6n8QV`lu%hDZ^ao5Cq~0XE_nekT(cFVeodI;Iqrlbof};YKX2KiDbdP
z@eJOF9snr7fu>kHd!ahprLft94&{*tF|cr2z?fz|y0F>N8jEvPsf4@k;hSfd+6l&z
z`xbgsGhZM@b(Q5){&go62YM&fz6&tz_*K{+-nT(bNq^}pF0sqkz0~V+nf10@^u=-p
z&gq!$^VYo4fNq7&f<0<y5)k`Uw?^W}-!w!l^Sa|hy;Sm+RsNzpxUbddUtDeE_%K+7
z=~swqRead6$T>IToP15y!SatNjul~})M_@&%n6N|k9lZhJH<tW;ix7SXG)#AsN<g7
zuTB~j$vdDwa(xcj|HR_|AH&pN4wK&g=ZOE`+-z2@_y0}!zvuV=&+q?#y!Zdd?+;$L
zFVAv4{#SJqAO7pl{DEYhu#3yJ@H2n*5wpnN1|xiG&T<orYV2)!PM6@XqwyuXUu|YS
zZt(~6r@_bY?T|pUl>=yuNp$yhiE}pEm!OO`9`o}c*Wm5!(Rg?_q>HzdXbJNRu~dI}
zyY3#n-ZHF!<edJkj3nTVZ`nh8c`di%y3Ni}+1<SWs|(fv`_G<Ul?{PcY*7m;+Dn2*
zUjZTuO7OQHAf)}B)lBnA@1<h=CX6suI{S~a>NvzqMD1}%1n7kOtgg8K-d0Uxw+EPy
zMZ`9Ep(AK~ayEV)d>KwBW-C@kw-6`+Jk!ix^@AG(UtQv<r0lfXmo4gmm`URnby=Cx
zDBXqK9%Y*q6c~E<!|}&}<vKpScz*~oCz|q1ZK9UWefyU!{rBabQBzV>ud=&Ke0oXm
zGYEaR=X9op6XLs)T+61+M91zfdDHkyV2-z*2`dB4vg4u2oMRXed*vO4u3G0Gy?D%F
zjyqpdHQ=l(KXX$=m*)a4Z`T{K?TwYANu8Hi=V8!28}z>%bUI-K4I(yv25;9T3pkT#
zr&BJ`+)5LK7AVXM&tR~+&=%0@BX0ea)(QhIo`^~J<MQor7+znq)bS*Fxzw_XoqZT9
zI}V4|tysoMFk%FAva`S{MGEJqe1Q!kH&A0c$$dI%g7Gz4c~?{1PsQmzpRHxQturoh
z&%&;E*M^@ym6hQKYvZ&3fVs;PZ@O**iJ$_>{1Wv0`$@YYpAP2Y!0usnz&7Q_GW-yN
zdtMSEUVOwZ80-rN0}9IoRNAn+EB6lPhRwRBI(jb>8b7&>i=2b;&87Lmjb*hjCY9CH
zPVrL1zH^U|U0HI6b(kt_-Ax4*oXpId<dHo#_OdsQ8Ek_}Z86o9WKAkd<$4T}^)~2y
z3@7;67FgQY2pX$tf{UqOtkQ-39&tJt(T6q{7dEUGb355;+DUZKXX#^K<d^|jn}rTx
zeXyM>S9`qy10fa4?8dArYj!vsj=QpBa`~3|3pYGx!2pvXa0nWTSp7V>mzB|1$fK4N
zmzji{ZwbiZNZp7+T$jBr{Z|*7h><@WRdmT!7WGh&vOj%MqNhO?LKd0}fkjP*BPgQ3
zG^kb|bha&T9lgV7aOfKdxOU;G_yxdA-o-x-U&HDi!2Z<bqdpBrySWJ!BaHhNFrqH*
z^yG&9|Lj>1@88nbHD3+|WFaY@LhNc^lGF`0>AcdOb3AE*k6gPCa!W<8vRCA;GkK{|
z(Y}~o9Rn{!C)aO6pikE~Q1v1b#^y!WcW>TZ93Qd!zb{`O`@GIx{B9rnLYH0Ny?$NV
zGbflv;KV_&bI}{zlogmDQpdi`@W;>>VA(xwjz9-z%<mPvKqh-);>zwx@z#h!DNf>I
z-4?!OkIT)L9KN@TnJYM+lHHgL-!CAEy`KeakTdTFf#|1Knu)-jei#g<BVUn~5mFQv
zglSR6V-uo<3**dh-@s9`iIqOYf~4$|Gh!HYPlw&Gl^Y#<UO2sTAf!!tAG)3F$ER4L
zW6UwZEJ!sy!8N6!ejAR7l0GeMc997-^R6jfLBRh##FyREyO5>WR!oeU)YalaKP|h9
zfE(s09ABgJmPP4g@8cs(DvvuTdEH>Upf@Mil)F15oP=lYe%u4bMXz1OAp#8Sb;MOP
zHhr|1Vw~Ns_bt{b(XpX|P}UToH2Rt%uNzE)bameoI%|fJp}y{k?R6c#{&U(J168lS
zc+r<mANN#HRG?#wa>}+^BAVELWggS46~9b6P10V$5MO734fqxx>z`UO-|ek1eZ1@O
z@QwvN!0Jr<kVu*8jOhFi*)=QQvlF3b3`O7^>qY{EFXpr<3(B)~453S~Vlj8d{D@TR
z^PG4p&;Ivu{zs%wJOWHV|J$rJ8nO7_)!K9Z@8|QsAN2h1-N7aNmG=Zt)XsGVC>Lw-
zBS4}q9Rr2}h=}`b#PA04ig4g~Y{p`5W7z9ZZs;5Unt>vyH-{|6s0Q-v=!`{?;R1=k
z+)-3<ab~C5MR-qzxVYDuqVA!6B5<Huia_9fy0j8`euVD>2RCJPz*+XfNeAO1s<7F=
zgaf=4sz&edtQ|p(PpB<ep0y;W*^)Cd7Kf7UPm&e0{atp|hj%3<qmbZPV@?C}zz|m3
zT8$?BQ!wrY$V7O7kFiUXaNtPpn#k~1al4$O(6SC6y_Q$?eol!`>zS5V1~dv5ob32g
z)0^tw@(X+0U>LQM<>-=~CHu1!#efXuhx1@@R6vQ!c3_d_OqJN{q-0l%ngpj8MII>;
zo&38B)5)1j3vZ}lk>>hvdB=o9?QT)-EsGvq_D8z9UBo4p-}Td_7w8M|_E;if1%?$(
zq!;7b^Y!1Jq;lmhyz7j<_&z!WR!Uf@%o#u$XBXUcw8Am2Xz%`I#3K7bF7v@SE^W^2
zqId{bgzFtE8@=u0!ZO=Ikq;L}zGa5Pd1#?A7Xoe%N$$s0kLDpk+~fWcb=-yM<56i?
z41eMfa!I_6AufcRjo7mHxs);*?&M>0=JM?+G_7%!z0Zqrn5Dd1WgA5cc?ti>%fz_+
z^~a_#-j>gHB)VZ9`zBr!mz2OT`H%~O_~T<j>jM`69zJx*1wnhXYe1NDr4=FNGZE+V
zM{>jr|6ZDN&-CvtWovyowXn1`xEM7i(XWW*dHsXRKS!T*2i-Wh_eQMa*_?CQ{e=aM
zn2!xzbxP0X>PxC;&sIITGtdke=L|x9*~*s&aOu*+9q14{3I^hZ)a44AtY~=}uQFD6
zA-;-WDkL|5KTj|uY$xczFHj#3-yI!rT1R+EmA&a34I?t7DSQrd4s`u?8!NXD&JX|E
zZkO;Opi$zrUU-d-svbf*=eVw6SQFRS=!1KF?RLL=<BKom)5yI2c8626^Bf(sFAvu<
zzYKL7^simrYoFaXXcc_?7ctxJF07~a6u7#!iTRo7E5mk~16gZ*E|p5`WhT9T`^v+E
zu^^tB!v)QaF>_li!2PgqtPc0BoM*4ZvsdD&dnF#(7=LU(h;&-a>9hC_T?nZCYA%GI
z)Lr4MOLbCzm@rm6XUfoth+WRy&#+tSeI&aj$Q>cv5^*mC&o-%i6+ehu!q_EQh1CQj
z`<uH6+T-C&UV^7(O2<;*p-`UHEd1F^HvtnjpItBA@+-<D&z9k=b`lTBFpGWo?QU-@
z!a#Hl{2|Olri!mQ{_zg)dr|mi+6N52oQ<4yT=(x~i?Ww_CqLJ9dlQd+4L%2(dgDI3
zwYqeltUuyL=F}Lqxbf2gt}3xgWJgcrsy0<lKIw|zHJasSHm3nzoECpF{Vf>ZobX%Q
z?19j6(%tR*aWER;wQQx?&Hf}I$Gg?U1uCAhKBD=y@b?Led>XQl%h;R`9^yW>tzMT@
zzodp1y|kP|2c7*!uHbUHWXfJH#qtqu=St-4o75mZ;|eCvmy$e=;<^uQSbrw%$7%S*
zR|j7G|Hk)U`SMg(U}r%>sSI)ti1)-l(Rhs;udJqmp{nM)4l9NrL7CYC0jS{bn1oha
zwps*2OG;Ie-VKVWNWA_+6(kOGU&79<93{5QqZmEzJ2!7RS>fX7e;?}Be9Bg`swMqA
z`4p~a9qU=gDxzbtSz1vKnnYjD+VQt+${#rALn_g4p0`x9YiVL(Y5lQBaMbEa=xQ9r
zicrxsT<wQ&$`@$k|DgUU{cuxg1Z{fs^dgTi$KI@~T52*1zlVbV=_t(d2<oY6b6x>;
zZYH0Y=?@dS&g{CKfH1RWM;ckGLiVy(P$Lr!GzIK+cs+@$W#{<ZV}5N-^tZ1%Hu}@M
z$=2WsL7PRZGoU%%%`V4Um~@&^8FY7RDTFzDHd`&F+I*HU5>zdwTDp<{lc<+{7@W{A
z`?rzXn<s128!1tJoKRivZLTa)@iU+z<aVDF<zyTI;@|&H(_hWo2>X~JEHh!1BM?9=
z9<Cy_q0g0ds+z~;np*O#(?6U}@8-8f>-BO`;449TDmINmkG(lW`2d3h<1Xmp{Q<31
z0isuG9|yzx0lwes4DUwx6mkOJtrzA_HY=zh7IMxmnUW;agwPrOvCR#Mq+xHEI6amr
zqHxh{5mO#Gtdyr_q{vZZSt;ho9@3^M<aeIQs?ih)hgqXdY^Ggf^IonN-`*Hnv@PTk
z!pj~KvYliT2`WDXI!+B@nF;1ZgybyFhmkaSavElFyG%kzHOyQj*U9Q=;v|bPax&>s
zExJowQH4x8*}`<Q*DT8oit6#A#+>$@?0jECL-npY6@6tJdBs7_=)bQOF!NNed8;?(
zJszkPGY1_QX18c8&UloQRrp|{_NP6Ws0mvT|IV--p-Nq=UeB)H(5g2c!qiNhn|yYr
zgTEfq#!Mcl>Z%vDBM@^b57EV#%>cZ~2k2fZsCpcf$g)yOhIw&$c7E{7ar<v4Z;slh
z?=CK3;FpJgEj`5%HlFqf*+tzsXOG?e$UiC=)bX!RcUX;u4J(&lN65jLUq?th)2&07
z`HMkmJUfTJ-GOiIn|OwhcQ8C5&c5yi>QNw~FS)%zPPTdkZ;<Ra+Z!a8Nv^7mZ*@#v
zbNh`X_O36D$3EMxIUPz0$M}cF_u^=hqsw$OWxPey=&=w`jX!}yOE+%Z2lcf=9xaXI
zayMvvS01p3xDHUW?*m8^%Y7%H_E7$>n5|8y)KgHYKOicNI4X@4R2nDp0c^}Qro2yC
zmlkS{TMM-$$5@PBI#Jy98;<&o9M>W)!alHe0r%O-b^MmY<!v~Kmmc?{aIPB>0=Ipw
zPopcXhg3gVAwa3QHj;GQ{EjrE-(1cygN`%$$K1VUIK12)7x8(SnzMYh_SYbK*$;&g
zo#^Y*X6w6icEQ)K@Hh3YGn`W(xdx&#{+&@BBNK`0`1eEjvXW_^&za?BI1VB26^L(x
zx|{6_qMl{t3;u}N=Ykg;_IWl`XXCQQGK;tRe^gW6#bu9U%i|<dssRj8W?%LoYyK9W
zs0_Fm%wyI&x$zOqdf9Kbtu9$RyQ*aN$2RNbaAL;0#6I>#BKxpry+Tbh%_$OsLQVg&
zYoMlmnZcT$!E#q?mg#P=;MAIKc@Y&d65_380Sa@W?!kZdH3_iit*-V|s1w$-!u}%?
z*|?~)+ABFCmuR}p6{;{k1C(yrYeb&UMWD^elW`EZSS+8Bu5q5bfvjHX;7Rvs@Yy%G
z(%ov#7$%Q>S0%oYco-dJ&`rD<A_)Bu*tqnzW!iEQ(Gk=55a_scv1Qu%644R-5i{9y
z5MHCLqk)Sy`uH79T=?raE-Gz`NQdwr&;LA9{+G{TXPTD&B_sb!y;-eQt^6;|&Bo?)
z{+H+cFF#=ZmtT($-{nsKqAKUh{=%2)T*+TJ43^|Y&-{W}-l#M9!_NBB52l08Z9?)F
zm7)cxNWdt_)F|xqu6wX|qPaGmVA2*In~x=BiKkenL94K-Fnz$NH(z$`UI0Ok1wf~H
z4ZH~81G1gb6l#ybhO9VTzRJJkNS;i(;d`!qVNg&jUArJYD2r+N)nykkkAo{D5jXgI
z16+U18XHdsB4DkA#qw&qU3C<XlLhTkzOvZ%t&Lpl-ZCt$&wz7(n;xf*XfgnEGitLW
zf6$q1mT8q@$Va^~Q-g*5qDR7un1h1??X^eaVb@o^7>*thmeFl1MR6<1v+-s5KzkC>
zSIsn1RV^Eql$$hua`TEsc4BLps~c?f){|f+4K0JGwDSC{r8WA#IV`~kXM5ff-D=5X
z5ho8$cO7?<{nGms^5mK1>l&kon~aAbL~ju%*e|Dp@vz^gO8wz*RPkO;@pyaiUx;<o
zBjcAt1DVNcQ);ZMuoIyBjXBxa`LGbHpGQauyYJ|BIPG_FS5+{r?s4<vVw$#S*c*rs
zp~s42I^I$1u*<1R5skcc&u_x+p&d(OU{<pzT>$7{v1Ijy*Id!yX<|VG@H+$WI{;|H
ze^yuZdbR-JnL+?IG+43E&~=%j{;QAty|nMGvFm8&7_mLneG4)lzI*WQAz3mp)@8hd
zhZec04xGZu>2)w)oT(77=&4+TV480a`51Hp%t9Lmps>h7#@r~C;WPYH3IhN}y-ulh
z?=Ftpmw!D!J~+B?<rR=CTvH)<R9J@w!4HSA(RnbqVR<;XoR$Jwf?QGLX!izXgKw^T
zW4yni7r~ArGcrjSTD18W-ii*-aq@x?Fv9==FFD<i^oh+i*0$z4b*5$F5I$fAl#))r
z><>F1DfY4w&n43jCnO?cg(5Q%C;PrumroY$zxs3(75EZliadYvlWotl+*PUa8Y{&{
zE;(XAXEp!RARdTSx#-m>+MV0!;A4AueI12(TF$$2%lSX)UPz0KR@I9*O`g!sl`>xm
zJ5<J?=Gc$EE}5M?mOO$9rW-2W6;@)JBql;<snY4Eo%URE7&?q$)qBC!@Do3)q&!fR
zSmA|N+cYP_PSeCHu3BpYh+ivt%ijO<9p(B_kHDNKOtos?6C?Kq9Wa-HOSREmec=gO
zZ-aqX-}q1CkX7L1!pDJ2viQ~%<eav*p;TXzng14A3X{qJ;LU7W&16Q|*eu7cub4Nv
z+=hY;rKLpuUXtYhu52sJeDsq@eWdiQi<8s0uZ7yxetUNIS{#{imk}s(0cgAZS1?}e
zuB<DL`th0018HZFX{v$2N4vYGfJlM)+W>(;e!sFr>Vt1=<nb+hezcG$RflEK6?L+g
z+{ag(g^*nuqhn+kQ+whKrg;8?+=sWyIMtz$ab)C6ki-^RWlRsFjmd<=U-!ZU;fZd4
zKgQn}vsCtBAYEXV^-g0pjE(P$7be1UP`t`AoT@I(RD0$kF_cOju=n#kI;OYl)zoaW
zu1Jzc|MKvPSa)}~6ZZQ1JdCk^XN>ZIwP-n&)^!rbg!`UzM64S&AZQ+bp6u@41)qDO
zF9oCQ`2wdk_2+*YmVU|I2>@pGaWu?Omi_XlpZL#95I#N7&aT*hec4O{OPT{JjGDth
z){EJ&QuKs_e@??`*hWWlTfn#0*D^0Jd&|Z6$(ANMd-Y(umSY7HqT*I7>4gr(O)@;E
znpT@z>*gQ}vVRJ?&OWUbqT|U)R{1-yyss@=eo~Cei`XcB%HAomy$KrJxS`Kd#J_#F
zn|;R7!viW@I~ZvYw5kBxRg9c|3O;-)J&5-6%V_#AYEMpmNA|z0G!Jsw#B&=S9=6-1
z#ko?@=sXQ4!QaC1AnYRy-HLDXtRU^SGew~1FvOQ>zRO<1jxGL<CgG^9FQ6<(3E4Zb
zR*XrL&6C-k9dW|saeF)>!4kK_xqK`(WOae6%{M3J%*gVvCczM+@U+RanhRq~<O5~R
z!9tweCUS}ig4o8wTvOtRm0U<UxXQAgzq&B6X|($7xVPQD=?|}hKJXuM8vJI5&Fw3&
zros4V`?YGlSz5B#+czztvS-@uOJ3Zc))x7;Xm>j4ey)5LTh&qT4t}iiqhtECXMxc9
zKx?z&-Hdx(`g>3HYTPp<CGSO?m*sO6?TdU2LuY)sR`jR%F3l*S4Z6(LX*Hjit#oO9
zg$aR`^%=G(-9DaSqtf(=qB{-oVwxE(o1`8=&Tk~gOk_jQp{hxoBG^WxCJa6wPP+lO
zdcdY3+tN2ocWevw+EVxBZEwN}=AMJ*bS(0AJY@F{@uvMsnqisLw+RNItZO*O1!KWu
zRJ=k2Jpo$<9w=l02JhK0PZQpt97LEl`5t{y=&S9FgXos>CG+blpl02BAZPj!zrVv$
zw4zo<m{cnk&CraIG&776dLbiAdIMgfhzz{Z`!pS~VzyiqCGfdSO~jgceemwh;a^#n
z=D!`Ezd3%b^kUyywEOw^ROFe_To!65IwE9scKL$Rv~Mj%OM)8wbxBu&m1^czj6rcF
zotP4@iiPcshM-t}iJzBYtp`zaFTQjV-Fr&S&FQfNb48OY3bePZ3l<^-@|<{ci4?dK
z_o{^8!PZ>Z+H!^Mf;<(p>q@M+?=<&)S#^Kx`6lf1{oh&d|Bh1b{$}6*Z8SHUo7Vl`
z#@5!x^ZnoF`@cWp{ol8F@BY3maPN0C!<}Do-&gK_1Tr-4{D#gPqWm6kFdFrHA^g|l
zdBwcpC>*0&5s}|(paHL_OT_R$*#3LCgMPthp5!Ehp<&aI?Y+TZl8lDPp23h9huB33
z4a^QvOB|zGIUVSj5kMdG<cdcxtuFpKo|F(JHkhs(aTgjgSZq!_uBbji63B2wf^L`Y
zAjR<T`nM4~I`+o5kqfDWThjS*(u()X!a*cM+N5WV70tXBj{#skhc?1eSl_5I70nx1
zO;&wF$-TsH_RHCVr;i_p*DxW2PPlJ05wEV>F+j2FKV}rl7ivsafJ$d$83RIV#mem1
z8n61n4T7(-=nbede)ZCn#igS2uFDW>h3|4FG5`89h{D{AQTEQkcsv{QzZ~#aZxLe(
zrI5Qk&nVt@cVn;YUA?v(9FhzYCgswK%K-@X5zBsRd&y(=Yh=em|NL2(<yufIwg7Ud
zD4U-4_hNt>2LW2VihM6^O>lOA5zXL|b(ZO#6|lZ)RZ>r6@xeB<^5Op};)96ehW`Z%
z?P~&tnj^pw(t!&H*pv2?L=2Y@ED%gQrC|JhCVb=puhP-6#+@g0xtU|#OBRa^3s+})
zASZU0Hs@9n&)u25k;F1==GsJfFbIEi+wYZjy@PHSm~b?muti&DKbWty;(3R+VdtYK
z#XIUC3`S=<h7yrWew{VI4K@np#XcM{p8Q4(C`O$)n@-@bihC=Gru|8Kcr6CxCv9OR
zv9CC`ve~xAKhb`t9|jcRFLYn!;;yogbVq0wn3_p;{#XHJncB>bE|V2G5w2-~a*xyF
zvzb4`t!$}=R$RGA8G1~$5L$bNVv$0b97@F^1jb@S#%!CeO8WSK>P>R6qC7|^GP4jU
z^J+C34Hm7_D9{VgXoTcX)@OJPsm+KMQkR)ulQFu;r8oe4qx*n;jfj!il<GDs*p*LC
zhniXAab(*heJ7{7<4TNq8}VNCyqK~uMGtc2snmdJFg&FKQ6Qo5@NP&Kjz9?A27_*&
zKRdvi+04a2{xPU4{bRy=fPar~BL)rQUw3=gB=bm8Ib<fFv<|2UsmXsnh@utc*0lJV
z5rp#{vmwN0><X@9lJYBPueC`@*U{J{h3$w5t2TZRc*{z0sLpf?cK!2&`gl$rPHg#@
zh#H;t=SR#)Cy<Jg$QeMiOb()_PZnRWP?Vy@p2r)Ur#6VGy%N4ud0!U;a2^9-VDN<e
zLkpE&w)UA{b`?^)^Mg&-!{-=jT&F2A)-;X1h`i5G;R}G8N2i~Kj4@?j`o=S~{UT(Z
zP5qx~D_hWJHk*>FrAYjai~tSmg;)VP6&7s<Xs{Px2S|m4NjAm5%cy_}Ti{$P9;;%}
zSlS2@n&t2WP-H5=e_29M{(H=S7dIWut`}xIL<=+?;>v<yKUitE9hoi}k#ad<bE0&f
zj?y>_rLlohl1<~#lGJFH5;a;`b*xuS&TU!53`$BS8Q&Q&roXeak!>F(rg)S94d^=1
z1sF`h_$G=QNZB@t#MO6^CebQ!k}O~CMqw*PLnzCPakNO4!ubGXh1WJH#yJ_wQZ#jg
zY^q5;y-CB-q*25kP*_QWAOlesh<Y4Ey&#B297N+gu}dC8oY>0u9KxiK1v5uK=_buX
z5Gk&@%|)g(<7X~mo?#8m1A89xXrWUQvzt_woeaZit~q<4+ZlQ)tnbKj(a~Or>0;1f
z(YA{Yd;!Lb(p;ES(^!Zl(GrG=#!f7ymG+^be`u8Lj8W3IQJq=F4R%D<4E`Y#?}?JS
z%(y2?8Z(+a7D`Xj=5eS&hJ|4#jCSWYTeR8Z(6nr<>X}&89{{UH23CzstQtA&F_&(t
z5qc?Enb#}V-IW{8$_?9qb7o*GV!@qU$G0O4^d2w8SnjE|nwDlvwl+ZN5bP^HT1D%w
zsAWIs71OSS#%s@VPLt7lu7uc078?htQ)NMiIO$zvWrI$|)+9jA^Bu_$lAG~WWTI80
z^&uNzSxOUagt7^iJ)H|o^DQ(@i!QXx@|rA#fTr6g^9|Hc#R?8J!R#m~my&IaVt{fR
z6bmDvZaK)rmdQ3lLFz8vKoAH#(#35pcTuXSH^h&fjxD#CcM4HMOj)40xL<?`qY-vq
z<3*pN1)41y*c=9n{(_b_<fe&aT`)5!c@4dpan6#6xF;tUn_%*ql-k;SVQmc5rC-yM
z9r{}5GL50nz{>=Lz7F7zJQ}YyIx~r;J1G)%pG%cCs9>kubf-AV1=paLotx90t0+Us
zj;jv(Sk1aFy@4aYl)MaOMZ#tw8Hv~{y+8B&-&e0L)1Uuly#L+UXjU8c``_kP?fL!h
zpYi?ghWEdF!2sZShtr<7Ke?TDe!u^L-}}Pq$@Scq)sv*@AWe93Ip%Z<u6q66<O>p}
zHvo|s^eL_|##tc(iFdOI0;79Cvo195O!{9a#B~?npz@qP7=o?c?u`1=2>%}g4a3yc
zs*Ts)zyB?eJUe~cKD~H<NMVR~s?~f?kzpLOzC?zmv7|$o3YPwosRf7_hr?k%{Ni0s
zKf=Go71x(IXnbXkObT@;w|CSOuK{scN1S|z5p7r?C-maUP<~YT<PNqc9@2Xhj-mct
zFn|#c2N=bVJ-OB5m{7<PjQaC#GzM@jkJ~n<!N>4$h-tY$)2Wyv^2uc@Axq1pd>;%O
zDOY|jmvK^K`zn;}#e!<Nkl0sY-HM>X!h>=Um#zGWSq*)PH&rd=M3!=vKLoZ`jRHhP
zPrx~y`!kb|WBnGy>DhHMri+_Q*3&+kKKN&=V4rzMUV&xN_e<G`oZJOB4~j?+I#7tD
z`Z%>-L5RekBbCXK1E16k%A%v**zMzlZ(uMJJPF1E!JB(6WqR9>`euI%uZ&+~INSr3
zW)R0f88laQTOMC<8PDvEF7*dh&K6Q-5BmECy)d$}U|-y4Vjl%dm9_DdO&o=p%Pv{w
z#Jj-F6|Gs;eHGiPC~|khy0`JIU6*sNpO8{wxEz?d*ANF``7pgCA7)XwE;r_42W>cI
ziVr)%pIH=;bZL5Vmu6vDdA*tolfsrBnd;HZFP&ZP%&Z{DsSYjbx|JtJNg+4pBBi#%
z{F_Q+z1-#PEa>YLgtr`>vrb=uZ6O~-+r3%jHU`14-;(Ol2%*BVcYQsv-;Rdz@#yu$
z$M5_mgiAp-Avy0pTu=7AjP~=r{M4>ZC<OHMG4lN;OKPt93bUVl$^r7Bkn}<a9tHsq
zB5zEVakj*giOge3VDl-Gc1+KNy-RkJB)LS8+Ei(t0Yy+NU5RnWdbjlu)Iij%vmmb+
zSTs4yrJWKUwsxtrzz>=a9kU~hb{*~W(8avhmS-wD^Tdt%W-_@5Wtfuf9&}6y6$l_w
zOq2g!cM<kf3qSAcZ8b_$e|Et5z06d9iXe7C`m_Xp+SO<oy&HqPqmzRjNEzY2Ur-2s
z_JIOF%W|!S)a<jlidALJ4u`{W*9Gr9xHpU0-R**#m_)<g$c7_#YU&bgW4IR$M-|$V
zXe;XhcVLOWRmcdsSfLwC0v2VPez6q9W{h5IuPU<omSLiSYua5V@EATu^lB{;rj;*-
zODsF<oSq0#0spP{!E0$L5bxTm6a4nVGwO;s`YKDDmaK=OQnR@(;lhi3I~?{;gWrea
zl0xgQjUMrC82who&T2-UJ~6Ob+Q4)egs}D#69awlR;*D`sg<|SOpByIh)st8CE=$k
z)e+McO~u+;UL;qYNUp|_T;hP3DA4OyE{7A4ZI~ReBovpN>xM51jrDXkbE`*PV_PWD
z89A$+0qb=!x#HpK64c|&pC=cb-i(D9zZ|}tUbFH&GjAcum_7IGV|`4@VTt;cl33{#
z&@+b}(BCKu`|#SXS)%zg@(L+z7<?Na6uDLo$b2-VSmH8>Iv)j6gZHD4iy_|)CHfXF
za_C6dJ%j6L60DR(DL5J_TI@X&xC&FG@nj$P?<7V8Y<{B(A7qcFNBs3L=wjlwR&HGE
zjS}wzuzVz0_-J=}Ft^eqw&#`=I-Ihwj2XOw3x#bKXCur@M^=TFS<cAZ5q6!Z>g|(w
zl;ZSP%NnS%opCh*V||&rY&a|@BM!}{-Y1x>-D2#l7E7{R&~jtkk9(7_JsGx{Mxy4a
z7quhUdo3l**8QXj;F+IRysPOoz%#)AjAB8rRHAv_jTNWJ3ay?YDVCDc#<M!s-UsNh
z?L<GiijUq1K&F`teB>>^<csx94Av`^jE$Z!;z<|{lt5pqL~PSB5o74XHbMIoRFk@c
zrHNn_+%LUl+IQ40ZzpB$4Z?Om99a9_Z}6uZuxx;5#}b$|y;O<K@$9oFdXk-Jj0rO^
z8s3NF_VBtrxgR2LMsKm;?E3P4=qEH~L%tafyKOY|h7qtIH8p|#3}oB&u0g*10#y%1
z&2*^!;SHx_9NbMfc|E+TD*ymql$qF+WL`syqJa}I?k01skXY}|&ab{@=kT@td;2Pg
zLWTfYSPy`W;%rQ{XyiQo$?8tmKk25-9ms*uo80%h_!j*-K)E03RbY2*Q+A=xBR~2l
zETc8b<~YJUkAwfId+UnZ^=2p_uvrz45h|5RSyo%CRrHFlsA9{jV?v!NPenEkS)h|n
z$a|l(|A2;HsXzL={uTP4v}^xUm%!rWW)_4&FTYmSY#W|6UJ3>?$*Nc;N<PA{+rv*l
zO{mTDCB8k;Nw61dZ1k?-7(;C7e~BEj%$*bYT4$v)D~@^tquA)sK>#vQRJmT;8hPE$
z@tcl*JNl;q*;`5IMl*~s?|Yr)eb+e_H;Uim8HOFB`Q03iD(6;R&dbrs5Z32|Q2$_%
z0cd1W;_H&@Z<W1Xw<MWt#+z}ypf8ia!9Hsi2{k}tLZqirsg#i1mvCfx?g2E2bv(*C
zqu;N_!#k?v8;~&D)Llu-U<(Tv+L025I@VC`pUw`Mfos^h`fe@!2dvTydokXHcb(A}
zU-VG+mQmT!2Jt}&nH8t3)HAx;S&xNUA?cw3)@RY+=Bmlf#K~m=f-(`}SZv-FBVhcD
ze{lTw68J+W_lRXZWB2b6l0m!_aqo$yM=)8hp&`4kM%UFeqKdJUg<jxHgBv5B1zTQw
zb3!(^PuWx13(DSA*|E9|9CT`s9u8C0`Dl=VoiZQ~;T6VLXXk%EI6p##GK;yASjrf{
z!!egj=5~9Z+H7BjcJ^jgscH+~+}RewsycU~O6(+T36vO}r+1BCE7}P5G6Z0<S3*F{
zt=+hQ1R8l<->)*hr!eMM(+%qXq+5szuL72w=P_3Y(w$##GY&G)Mq}1qEPK3jY?5{b
zok{%kfg3NHsltXKcNfFf`mDK-xewqcC|vg7Uu!~1B_!x_F}i<*Rf8G1!!c26gh^-y
zH)WQPr#DcyY5yyt16BP_ZK2VK1go~8#M8HnF{zoVv$5RY;+1tDC#{YOFbxKCp354-
zF|;=mmAReAEiCry&cp_UW&MtLN5=?kW-@Ep=hamM87k8$eu#+z&HKuNlJbIfneFNz
zD{dyY-rW=p*w7=s$E)4-V(y!iy(hr}FI#qg*)&(3Z09zmIUh!yad3Cl5Aj;NV_TA$
zOvxl_*N)mRr;|xIzV3h7=cm@0+oVC?L}o2}^r|4BeK5}XH<WvH3>$nf@O;H)d>I66
z>*s5O&(B<wp*HJ+Q(^Dp#srmPj(}LnRoS)UxeRA>37|zNH!`gpmxYOw(}l+!+G2F6
zPPTz1l|i&8?14&dbD=H7_livE3WNRKv5B}uxTQeEKsuqg4u`B^4fu{@4U5A!wTr4!
zD84S5MpvxE9K(pwf;TNp)Kq%F3bTiCl}Qr^JPyc3<N3&Px3(z4ASAcmVn|3>u6G$S
zQH-EPm32`V2X|Ej+?5Mlg6`zvWY~_c?6c{@gF8Qde|&y%oG`fc^&|EgECR+rK^$D8
z(C%GdPl{r@4s7#4%S@?|3ll{#kU2$Bab<}1Dr4Flf?Vu6ITQk^39uHwtJ8an;5CEb
ze`prR@1WNjIy&>p-3KE&QU2}swc3X=ClUM^qovxX2B5I1%L7V_14R-IRqW_a@91}L
zO|l8;_)OumJNO-KxjReIH_u1uPGBK@;YzHFcHK1v0!`Cyo!pJYjwP-w@E@YM`T>Mx
zyaE)%7@+yy;_+`5!vD7>#|!_i>Ugi3c;ol9apCv6{9c#s8z0n7g|vl??Hi8Gr~=#3
zTCGy0Qvd;bO~5WW?=qyp$^>Q1(hEVCRX#v_mN{8gd7s+ny4HFv-nyZ+Uej7Hm>aeW
zaYMWI+xCaOgl~2FCMFxIX0Qv?$Y2<#F(=Ca1R|tkI$@Gydd*rbkzoo#su}FU+BfA`
znsO`~#%MS68f&f^o3h5Hgt1kK9$RV@o3fLwS?Ey&Qk))jxqcX{tjZbR)9TbzotmoC
zFzVC|IBQNg4Fk@a6HZgrX{x?9p$^!y7ysw{vVL^(z7BQh5U^faugeGI>T9*KRo$vp
zH@CKHwe8J~=GJEJxv8`H_a!f^N<znJDtm-kl7Gw1mVw*Uz%|vVH!~(%G~Y6@SxdoY
zEe#z9^UONb#f)@81C5O{zBMo|UzH5z*d7>@uS&8wwg|@Kt8%Q4ZGy4*svMA3rD@=@
z=E7yez-7&a%cg<LnhTdL1D7=emj!QIV5!`N<D1ZTUpp|gV)=)1l*^M|SlDOd-c1kR
zEn{p4nXg1U@V-a1an*~{=hODvlXc-Nv+9medG^+1UG!ZU^<(Gh0z7)a2T-$(X9a`y
z>vxyG=@@@VYEC`6WJ}gT?$6NWU2iarsN;G{*IAHuy|uicxPst1<+&gEM2j+x$)J{9
zuZ=-KUceu~@QGI&lAE^c)R>^G@INx!G-R80J%6jFYSnPI;;dOGb7)<7&@EVa$OQwd
zR7-nl#xdQq2D_oD8BLa&2IQ@>9UJdb59qZ1ULE<wYu8lmnrb13lGe1Yn${gn+x4`@
z4b`~eYTQtb?~%spOH-ZA8v|9Y!!<^Sy(P<>N0W<vhykcsK-l$k2B@|duP%Yr9Kh;v
zV08hk9tT#J!0HZQjX1D|0M>{DYe--X(?XtCHoJUC^GfF&ehk#B@b1pG;(4bxi_R`s
z$F$DMMu&8`u$QYnlA8m_fD_-X3@pr+rmDn71@e4bzt$>kHU%f4S<}_5nc1xFYF3Xo
zE9T0Q2YRl%HQMcR!3o2=9`xPC3$5lYZuXgFd10r23wv>s@N$K`U@aY^*b^3AjEjpx
z;%<-mq~!zfi*Zd?1FPM8(E;|lpXuf{{=FUpuU#6V8f!18!AeR+PP|o7Q%BGCqiJ=;
z^HcHB(C6K|kwG)-RR4#h87PYH@nSii9J4^t9enj?4hauMDJEKo5!QP6B9S7lVFb2G
zIMPqFSyy{Au0{F-mmyn_zPb+QScYZe?oR5Wq>GTN;!=%HEMu5^$s;kImUCw@yBuP^
zl;<zWJ?)iN>=72dgpNI0mN;RRtOtnvkq=h2A3ljU9TG3`h*DfW*^{?BVE*oV{l1KM
z7I=5T=ic4)j*i9Y_99*=_@73dmQxsXZq<u32E<_>&=G{cR!1|{uug#S={M{ze!biS
zLH9pQwTzh$4yX;%CfMw%n&=%ZmF+_4J6=|;?3n*J>(qDZ+dGYVZF{@cXl^u|b*fvN
zwR*L=xz*g>-l{dK?n<?tdab!t+u7LJ+}hmPbXTf2w`&{qtwy81wOwu0Y6-QlyNzaJ
zYp1zght3_f8qi@4aIQk7jjgSnaIM;ORNJa-)OWUPo7L@&jpoKq9ja|Os?{pn+jT&G
ztFg1ay|L9ysMf4(RX1xp)n>i1UEkh7Xq!%GwXMw!Kz(Psx>K!gB~+_dcAB-uR;{)L
zwAgM|q1u*n2#t+Kv%U?~1>82U+O~TLjqS$9R+Uh!Z6;J}RyKAvws-2AFq42>1FG$~
zhtS;K*=p3b>M#wBdP22&Wg9EhcN(>g>c%#7T61uR2V;P_s&3bzX0=|&F*v8bR)NW^
zZvmuw9VVlhP_0=3N&tzQ8ylP3KtrfjcaNbCeAKMf8as8Ei-c-0hU!MGR;_P0x2s#*
z8&Iv`oCe^^t*wnM;LX|&RcpG3u+gaPK#$wN?9^$~IfMqX2k;0odlOis=H|;PaCdXF
zUfrm|a;Ya&t5<+=8=D&&HK6U*HWI|eaGMqA5Ls@!R^Qxd!kVdVxrb1z*Q$*s$OBjr
zTM5;g6&PY2CcTF9-vmyoZI|T3coLO~&Il5jaraD2U?vF5M7TwSrdyH%#f^W$)oRtv
z4TgIe)`we4ZNo|f;?!YwV7Vn!t5tUJYXjJ|2BLO5q1p~`YHh35go-;Gn^j!mb?3zF
z0A?VqLC7|1+tq|>^$LhsSRo+C>N~Z1BcWQYf@^Vu1RhBDorG$em0DwCd%L;W+^KCe
zn>d7qTWoK_8VB_1JDVFjN!4nVjRuI~Di9Zz<<?e0wXMqL<_^%Uw!PH^vC&M}1Yi~#
z&7BRX1tVzeY$jBLSpb={wSi&}q-Z0d+D>I-8&(DM3tQdRRwH?@fF-%Tv$I)kQ~}L|
zYV`^rwA}<r0b4+AJ9&cwnbWM-n>!#0Hg~oXs%^lYU$22c*{E+e>$oP9b{;@$W3yi0
zY&4pin~D1vqE!bTZ`3w-s<nh_TTrbET)DBi0dqw=&z5r*wwfTF03PTZjqPSawHj_I
z8;y-xv(|)-Y$KuCPNlI~-`v9eu2J1Vk(aRDZGw^j5^n<*Ah2daHJF7aOcL#juxK_D
z_t+Zn1qjB?28z)}LbZ*`_C{@Us}9n$y1BjCOx!$oKzuefYa4)Flkc%AE2Jx|oEdvv
z1z-Q`!A1M<?9Ig`MqFQVR7ei0{2xE^KMRQe-uoPO@lASS6!`S`@4)EQrWOCaxz(&b
z$AAAb#(!^mFT=sGhq<gmXfgKoujqFrV!tal!{N=391s5F*W;u1#mWCY&KDM5ubwXe
zyI8Kp!>|K=uCX=w3A)~=aEx97LAnWx(r!m+w;Ex{`cFYW93Yob4IXffb-jPx9|n`M
z*JybYzftx!%H9^~z$-A>@#=Cgq8<I+C~uUvS^#0sipQ?P(6Jq;tq1Uv4x2U$*>^UY
z@Lwte2|94%36>-lY4J!Urm}0JN7b9^#7j=AGzZX&uz$Vh05&(W-b?x3ICtPqL?%~i
zqF7Wr-m+LGq9iC*bVECtY&$(0)y!$AkYKUoPw^0T?9Q|ud5%v4SbJhRcug&yoEcFL
z-;MF7@}~#CwhvyP{PL##@*kJS7cF7oX_4auRBzckAKueJh4-`ibJr8c7iD?6rU#Ex
zLF&xtaahj0(S)vy>!|q4ONm;|4Q2{<0%V671Bs4cRU5oeJsq&D<Ax8HXN_V^vM*!)
z$%c}znP--&F(}#sEaKjF{4)*)AN^8dk|`ObM~!WdyY1)fSUPciorD8NcpyG<Bc9!F
zgAc{<h{r9n@rW!k<icY?#9&czvS&ddA>!y1>7F}l;{UAh$V>_UxM)BPW|;y95{ca>
zPl4=SoFI`tb{Bkvwa5BCd2@1k0ulujk;Aj&SFbMGC$HL<e?7QtpS`_2IfDup?c+BG
zFJB)Y2^jao@yD`v6?Os^YXr0>_#>jD8^6S&9i4~Qfax0LaJdh??r`v56Yonn5isyb
zB)mgyB7j-F>)}}0)_mIwK|Sr<eqm>`gFao41aZI2J8;u{1vNZDXx@rbQg3UKO17fZ
zIm#k^e{g<saCCAA>KbT)2Ow$LMqukBl}bfzrBQ3fSzb0Gw_W$f(d0xs&?eZ}(-73P
zF&8g<C@c}(l6=gMY8!t8$&Lw0%+jMr(6j?4%mb<OVYj_b59dL=P6b5|wm`0yqu4X`
zL=kvKjOpMKsB~~~aeRKsgX;jMzKJPbe|hmjlZ~K+pS^><?C7|C(0+S<_RIOf>FEg&
z=kx?1{_VJA2JvImvCrZ-tkxfY=oHg?228$wm3I!9XCW;CUfy|PWl8uf3$nYWh}}pL
z>WsLW(zy*g9|_WVFt`bQ0l<!CXs_(wnDK@RI=c-a8k*0x<{NE1eqW9evBPl@gG|I(
zVbVBrR4O##f`;J2+r_}TW<qh7GT1b0AXPPpM;9;#1yCFpk}f}vTtU)&8_^P*p}cbC
zdy*&#Y{<mPlNKZ~%uC-=etcW~M5X(BR#Fm#32KsSEsap~3l$cwefG^SHfz1AxQW%P
zx#k9MLULPrSvU-m?%j!SYCXK#+^h1dS`HJm_Cdc2m%UIOv7em%R<Yk&a=+E${c5-E
z9)#xdulg+1cas4xgC|gc?!5anL5XvN4q7AR)r?b=JV%MslMgxVmjBb6M)_A`VautJ
zOppd6*+OQ#^l*!Ez6sAB9}xTd;0gIxb79=kOr}kfQSY(%=R0PgY$Cyo&CX-O8dwyw
z&FyX|XcO>o<^1bH%<%Zqc{q21M4#seSS0d~<VfINi-ltU+lMUDbItr}OxX%#Lp-e?
z4*Fl{wgAg&5S3+^=}|BS#r!smu$F8Apu?bB$xWf!Y!tF4`CGYIX0?uf2)pPTTSWP6
zA!8<HRK$ODJE*7yv<R9n*!cNTZJ*h#o@aaK&t&f`!qR!RaegElXKwzQ<IH0gvj+Y6
zb6PeTHcc|3g^d`F?Cf@o%Yu0n+l7ux7KF5AxIC=MvOtUFAtuzi=IQH6!}+>3W_a!n
z^eZ%^n7ewU4;A^_Lcp}j&fg!res|nHKYn$5e*ET;8)bzus!QYuty%HXC0P8P@fkKf
z3dg-qVYjsq8fuqyb>Ngbu6qW|%5GW$r?!kC$*5<>rQd@n>fH>^M)t*D5e?kH-($7K
zLS?_afd1pt6)0|XRl2N&CXdw3TmI1fbJ&@x)aMKqK6bwjru~Ubx;vf@{J{0iFPVY-
zz9r@Vj`<~iII_=2Qze&Q806I}#Hol$%1(YgJ>K<bw`P|HQ5Gw;1U(hh(z^>Lom<CO
zB3VU2d^Z5`KA8@HeL&RH3y)re_Y6lvAi-5X#0!Vs{cSj4?vUGvQU(k>m43RCKFcUz
z#MD~D%s;yiKc}8~o$;}jSDY1e;D1&=m~DxFD<-h-(ifR-N>^iR*ID}d`U+RCSJquV
z=4pg+-W3rDP&*in`ta{Tw>=KK(~gXt<F5U$-Y7?1o&uyzF46YC!tt;@93@rm4{v%M
zJZ=E6DYZgW<f%eZmTAtCjJtv5h>?-&gf|y-PY*Mjpc$ZrMO<GISt^_a**6g^d(P3d
zq-g^NMm}_|d-y2DT4X9^hJ)TQR3ueo_vHk;v$VIeKHDawa;NBb)8;rB%W_PlzGkRU
zXJ^-<p=D7z+yJ@2iFY}1aGYrP@@DC4*4ygzm(%*Y!~b~x=SP13Jsfw!E(WejdH|mO
z{=3%P*s$M!Z&WuopWlB!zyJOL-+!N-AA)2(et+;f-ve-?e!dsra={jV0?v+%LEF6z
zMyS-7uf5rG>vbTOisDK&TKrMgY49<8J0wl0P0Dk(%MPVsXZ;k8nGSzDj8NyMT4G*V
z96M23p*~I~3Pg7rimI62k{JZZmcTNiF~(DcnHzhTD<ZrhrewSiyByv%J~6uwpa;T9
z`1vAwz$^mudDqiHhrPUo+PA@{(8M1N5q!l9h1j<k$0h6Il35k_kPU$Y-EVHGo<SK#
zRqy2&K%I1S!h;?Mb0tf!zJJ+5kT5FIM>q~)?7*W}Up$_hjuDXo!=b=OK>scnfEE#(
z0U7{4xr24u9fpinSD-3>gwik6N%$F72XO$dMp!YuNo01waH?o5DKuc<%D6%K>uAj4
z9p+%2))K7DDNZwcc5U-Dm|7B?(T=~UM<U@6!KAqx;#?vDZehTEHsc)(9(j*sl;n^?
zrGFa{X@m)gia4lC*C9HCD@%@7-W-+PUH%qRJqedp*)Vv;7PX+FJr_KycIq4W?8es9
z_hjtZ^L*`<eyr*^yc>QBLBA(rfQrxRiue6()im~)oyEiy#`z0H0q9r%<ZLXTtZS`U
z85vmGd-zxV;0D20S&$*mW;PF&5@#)qZ&Zx_6a0A>c6-=Fj(<o`Sp!(iFaf6*?+=0X
zqiI+%9{t-ZR{MRK5FS9U0(L&qkOc&g5t0_Pj6zs5`4>@HL~}U8y_BsefxF@3$_BMe
z4Hr5V9$2ElXlh!7k7AVYIBT0U$lqKI-WBRfh)Xcm<|dAN|34e0MJ&lfyZE7l6|y8d
z5=f#9Z>3303tYYs94YZYxa=bS-H!`DD<+2!X|Yfp1wx-V2W$mGnUi`2_Ju-#jXh(j
z;IW$qihEu-m6yqpsvp3?PoK)XKT=&i3{3K@f||FVw4rgLGe4K|<ob@t81Qk==?hKV
zELj`oaEUKr8RA(5AtFt+n833GM<<G4*`2k9xpO#6hPj)IAv>9hH_5Y(v%+7)Zlm-A
zQy)lHS#s+(nIU)_jo;E;0sOpgRo9TF^~`FT;9^P)t3*+xUT7RV9gOHho3BS3R*S`Q
zu@xOX?2n4X!y3y_JigXv%M<Zdr8bp4C9Eos>llym^EE*@Sb%LQfX-kujyW^nN=yF>
zEsx=dOh>xTj22$>rPJ4B21W$&hog$Vl*<AFdX$GF`!f}@8opNXYKmID|K{E6*C>?!
zNQ!SCk9A`NxrvDp#4r}sCbtx&(-c%wE~Zz<Fo@{n`b`L&+4b$P@WKymB>0)$QSg79
zrPzT8NVTr-z>hnS1x1i^Vtok$R?wvGfavm7S1TU76mGXFsnk^$x5H^4?9D+BlOG^M
zqJ4D-;(PoBZC+-Ph5v=dR>+e4l0Rkuw%gmYBW9viWEu{o4#YR^a|}T@4o^f^0UCg%
zj4TpzFtHpGgYe!Ll)*1nhrToOwJBzCC0q_c#Aa?}Og-TXQHYn8#{jU7*x$q<m&41u
zkwlL1zLAuQc*zn9s?U??JzaSM5?%GfeX%$314fAh8T&j5PWquFL#>=SK0|tz4FvfS
zrW~DGxg{f&gE&e1S}=J~KMV%bkuN4#ay2p?=W9XF_^LojQ7{jNz44@kO8H~SI00h4
zYS$`_%yyJK>Gd$^o({WV{tQa??&7h{CkH}mDo1_jb`smD$_gDxLVR%`ud#hCs!5)W
z4$)KCfq%UX$3#h=IQkQ3+pue<2;r#et^EYaRK^R7@IM8rtnJWg_`0ljWt0$6dHk)|
z#=wZecLUt&#!OUo(T|7Q97@9BxXaw4;b%|@I?Q259|LZK$%ye;8~8GX&q8K5HzyYU
z3K5`9*46pdwAb&dt{LXB{3Co%<wN~LcEYoge8nUyy-&cdyTuZq7$S(oT3YTC&pjvc
zoq<E+)3A_ZQo}p{r#=6>560c25Nwn&ZKjcR5SVfPS8q1$^S`ZXb>sQ`@A>@ihdlqo
zq^Yo_9KJp{Kfut57kLi?T@9Tlg6oENoB0-mNfksyk^v5iJs?H81YNY2JO+rD@uMvS
zHyo&wJ3JiWzq#_{VSX=FA;RoxzyW{_Ui$+Bfljwpbbu7^5pUYD0l&LAZeRT4@U_IU
zH(}zQJRkwzf7ldZN~=+ut7}8tt%!j{mMk6Dia0~;-v?hJn9Y7aw2`Nr+oQ09wn9YW
z4aXm+AFhE8-lh_#ZQ902h#?>Yvx>tdlzjdmO@<YI2;laazwN`koza?LyQdz?8g$fm
zpresvAeWN~8fZU74JP+b1K4#_B5U(w9e$0f9mAyR3apKhd~t<Rf!W_)!7$!)cPKBY
z?q$>2Y<V7DyV}SL<A@luXI_V&!oJy<MbUONe=`M%GX8Ql(#DaB#^HT03S}!7M!*dg
zKzRlk1H|jPz`X3m;Jgm6C+dhol%C^OklclU<LcuQ%?qm&&;V=kPN1~zENvwBETOx)
zKDmyAy9D7BFFo!@p|g{Vun$w@25$@rGXut_V}`}**(}+e(zOLQ4o;-mBAJf*p}P^m
z40E7wf+dKo?br?Y;BzmUhnI!}wA2Sd@_2PIF;72af+hBH8H{hji3KkS6i=s60N<0r
zXzD=tZ@|l8Ke3x(f0~T(t5=sU?n8kEpN?SvGz-8wrSV&m^VOvr{9C+=e}suV)5$^g
z3=m34V+i`FtVwMQM4!oYCwIY3m<kp|=^!)#&(za<An=`|D<v(*!=Q^<fU_XUil9;?
zVGYDQ>1J}*ZErNPfv9~Yymzvf6V$o5`c*)Vuw#OY`(Wg5bO9FFL`DTs#jy2e;&TCN
zeIhpJAug)0$mpm+67$kks{GJ+gM>lff@;{$6b!6dG8C3THWLa~O9A4h|719Fl@{tT
zb$)WfIvHH|Q)Kxs%#Ik2v*5^zZa787zj)S@xu?4c1hyl#bu>PlMw8(k>07U+gHGn^
zR@L3eunJ7MvH-rmjT}Rkg|3d2k~Zh9lr-Z)9Et2bU?2MFwsSDY@{G`VOdluE5%)Nz
zsQwlVY3^CxPr!$NFkWfx$wU8JFg3exhr_=0;g`|WD!BkVR1_`I5ixAf=alN23?MWv
z@tVwA^!%Wegz2tfx8%QT`1|E>a*Mz5fT&KErvM~10CB_a)unJn2w<%6{P_Lx`Nc6p
zWS&*$z0f5!r}0Y>@p+M#Ak4EW!o@gYXXgTREj50yt>9^A+xn@KX>5D@`7zm-^HJ6-
z&w-!>XyK?o)pGrl7pA;9kYmvCn=2X$G$LbGUU&&diW+73d7^n^O`IcUUX;tu7@6g+
zclpqs-~Xig|M5Qja2zG3f6U7N*ladycK*lBjpnoe|Jncl1N#5py*a!*JJ0F!SCt)p
ze)k5;-UCUIslV;v6MM}TG<y_sH=0iPWgL7FH1w`Q;0AyXBd1$K<f;$6h&-{CC42zn
z*faP6xY#6F?KXgH4@a~a#t+hsI=&f<wA=gCi}0l_-uU7_d|CL1`__lZ0mbejs#r#z
z1w4oy%?I9ps(#q%xRG5;eZxI7d-$Bf;!dA2QgK%RpQ`X1eIDI&cp)f)$=>nOgGga_
z9rPo45vOe5=b^9O*FCz8x2Valv$>Co&v)IN@Z;0Bm;b=q9q$e=+sMnlS(R>D%rTW^
zD~j!vb8fHp*sJI{X0HCwQ<s~_*RfTVNMv3$fyl{0wV-+|^jZ#*>z8nVI^pNZ?k=L>
zLz^ppr)G{>xV9G%^=QsRtx+Zw@jUGGM&n@zqujDrc);qo3A}oHete2YfPW35`>@}y
zD2+B@c9vWVEC0%hdN+4?^=x=8r6_A21>CmOPhZO30WLh2IFvV~NL}t_cxf{SZ<3Qg
zRP9p!yIFD8xLo!=n@qsVY10IA$FG^U_po@~^YK5cl2gLZBY*94sT8~G=IvW|vRnC3
zT@q8uZ@J0$2&^;#Y2JikK8nIgjbC+Z`8W=9AR}1tIE*rM(e3iTa3G^Wc!QS-VLT<p
z`RP;W=6W?p_^M3j`6ZT=mH1v=h`j`HbI|Ql`HF|(AI)#4(?0*U>AW;<UPs3kuu=dZ
z#v*inJL-K(uuYxS-w<IEp@FHWp1&oYNsZ%<7C}W8eMC?(fm}7`&}qf1-tl6<1hk)E
z++|S74Z>$Ba?ho3_gqZxIHpT(S3pjx?>QTvM1a^Nyq@+2)&-vF&_!5AC9!_JNg3}Q
z1f9<G&Ri4f+NWU0=^bZ9_7__YE7TPCK`QjwZ<wT#xTKrBm}fzpHMN%WNNljqk#L+!
zMC@)?E^2}5sPH9K?KZogK{Y(&L9)SzGI9w%#x?{A7WmagQj5zmg(Ih#bGJ6-oIb8n
zv0K)cU_1?2Q^8w~xe%%uep&pG1qMzg`IL;#r$ytVh=Kl9Nsx$hTsC4kS0BfpuRQ^N
zgiGbK&*5flLV4xsz7lMJg}S?2U{`XAUhT2frRJn%LeQ}xIEC(M7z`F7b#Zt4$wdAn
z!nff!;W&pxW{(r{vI*4F5ckpMWwU*VY0ldq70Mh@S&;lzX(oZcKvC}*xUa5$Q}JI!
z@OuW01>(<}!Q*ZQU=`^6^K|wGQuIFwjxh+N^G&JrXWc%~Uel)r><WG0U{1JSvhw$<
zO1ZM0R%=&X?3p({>YeS)zl~`U_sQ&IWwDi<2A{>VUQxt1A6_g+b=n(1WnhoVkiB0_
z^0l%J%gtq1)VxhTD)};wGes=?0ulgYDn%1N)uVHnj3ZKp4UW0T7LD_o6Y1XI8b#Tc
zxzHwr8TPG5A@yzV=5}6MbEF@J*m-H4rMd;5=hxf-$Tzo_uRVX_wLGD7{Gs%v2|HML
za>npo@Hx>KUV!?;`6NXOI9W)FJoww>zh@iy*+x!2ZvbxYSvpJ`$`)+~D|z)u>H~C`
z+YIh4zzPN+PeYdOe^;`!H~3bvl<)G-!t_~~E>4)j#KeSYZ?JG-+IwVM3_8p!Oy7=&
z-33U~Y5&Qc<X|))7$J8E;4kSB&_x<6f9fYXchmk;JkhB=3)p7?JL@VGGs#z=Er+N5
zMayAL))f}S{8~X#K|Jkif|z4<FeoN*x!8PA@k-Dp%pK<xMp8B9e=IM9l3Hk?Q5RXN
zU-7tC1XeC#`ju<_CU8?a2dSyc;+|YLFhnnBV-D5(ei`E5|9%jLkYjzK93iJ}F$}T3
zjt~7o2VKYi1fNBB8P{>3GFVLd!}O;kTJqV!l_-R=uGeBU)>`a$l2yDYQ}fcVmNiMQ
zNDCVnA<&+%*jnODU~cVVc`jzmzuCoEVxh)|&A6HD*&94*_FT@%pZiagKjlHncg3J4
zQ#?9@+5vUS6zH`r(m6!p72hwxG>Uq`Kt8}mq*RRfynO7Olo)Iyo!ezu;<D`Bg~1>y
zr@oR@|0F_*CS43I80mq^39?WBF3HH46iJp7L<=R$Az6WB@7k05AzkJTqThNS@cs<%
z2Y;<tzv?PEXM+8DnVlS+kEb&nj=Md~dKsBK3tP_Jh^}W>`4yN#BLWPruffWMdRiBV
z|C*o}{1DM`v8&Y=qREQ*W8Ef<o|V+{M$rXvfFsR%Ex2TC9t8?}2_IFbxo#6Au)q7}
zc8q1QZ);xCZgOJ<Adz2(f4MJqCtd1ZO<;ca)$)H%hm&47V8$nTF<KZZJ(`qEK<?Hs
z9pX|tYvmL(d1zribXGFyYsypLIpcP!hjq{S|1#eH%9%@i0GyTouTgE-?|-+p>d)_g
z|BUZ{H=f`BroR7$^5gRZkUhB`0PD4L<^Ge4b=K5>H&TUjrT)vA>n$x}k2AHK7P)6%
zYb>MGv{W7=E5VtO)#SlavT2FOiX`39n*GVsu(^Yk=HUY^BUc6&<jqT#0VX{sSss*)
z1W%brC!`<CWclROV_8k-NjsK0{)Av#DM`jMsk7(=V_JugkY8-B1VibG!_0xqmp9BJ
z%EHoynQ-UH8m2*VB@GitYqHw1+Nhif#V~J(tl_J=xe|@JdU54*kRSKF?>c>%W8}u@
zr0X4{OJM-<G2I@@DLs<1pyf(%mLNYwcAB*0W*JRW6PxAEFg8b8vxLpr%+bRVH=)~M
zU$xeg^30<8Jc0CR38~E3B{pX+v%)FUw3KHr`@_yh)7v;#F0@1RcUkagr^U=EA!S_&
zVY1M$uv4dLZr6e$a)x|rN-!!pHXKznp{Bf<ndw<l6R(L}NnKsLj3+`{k_QHi`{^1D
z-l1vvjJ_rB#S1U??QqyX4Somlt@~3W7F_hVDmqhZdwO~=TunVHP4QpqFvuaR{q)wn
z8v0@N0r65R-Io_)l#n=9Vlfk-B>YsRI%3))kL%i6UL;qYNUp|_EOrg5Xudo3bwP>l
zn8;f3)>km0!^(PT-a8PE++Pow!^`Q2XHN2Hk8k7@SiG${&ix*%&#IYWM)dWoeg8%o
z!PJg5nlq=RQkgC;4=zs*+XojH$LE*6Ke7^6djB|k*FHTsI&L4d-=3fSa(-}ndIEzu
zJ%NUQJ7$uOoSbd2P{Q$qQFB$nS*w8{(D}G>Ih+9Ri7Rm$E(fnqet84E{hFD`E+K0$
zZO+kes`C%p@rqdc{A@*4vcwYyLW`bp92-)vq#u#Qrf(eeC&pw@y0-Wf_zb?uXV`d-
zRBaSC%}Um09yy_>@9gNzAK$?OkHSG|mwEo+f4@)h2+IRr?u56)em5LfFg7mw$7nFu
zXuHDxdKY{Q1uVHLA}Jeg!`_&%s3fL@W0~LdB6+YPtUte$c$5;4xhi1rLl^}k?f$8d
zaqe7klxdKkacQc2-gp8e?_4rQTh-;%^TH^8#6Rzjov`!K9^G<me0o~bUiULGHU7PB
zY>9iaKKr`%f-0{hRAPf;s|xCw`=tC#Dn(By{jpu%wT_JeS$f7d90^Bjt!9r%aHeLS
z7(4k=(l#cs!73^C;(P~my5svlp8xr#=YRd-RnVs>z>eI1+2?;%NB+OfTC@6m{`Y4*
z|J(GArzi=$m*enLcj)ayZT#a$KJt5g_VVC0yK|5`-JhdT@&VvzRI7Ra&2)zdBljvB
zT~L8k$X*Apd#qu+NeuN0W9<0<9e5z#;okxM`>G`9e$)E|eN0FOP#{<gC(C$=%K{PS
zk|p>Xhc{DDX)u>A>@@hPA5K_5ss*`mZpJ~s#34GvS{}sHDZGpq{mP44O9kv&&DUWj
z2HTH4&EOQmOD?fKg?p`)a@~w)v6YCk-!HA-Y7`VypU;ZfR*Sm&QWjm=5GbG}Y8{5s
zYmIVAs2DR1)*jQ6KGMXI`Mes2fh{Aq&DJ|9)L-Y;EaD66dH+w_|4+`6Bmd{x|2Ob&
zbN_ENn$P)vpX2}kfXcwhS+4D$s<}3NchG;Nz!)u3MkBh(Ly3OtUGFY{QGRw_aHWbY
z+246E_-LKT5k&LN6?+&k9iTS=1oIH;hr!*lH;l^06^vz$gs_5Rr$-46H48N<`nh9G
zon<8+#l?>7O!Iq!PCq(Na~!kbOG}?2ZX8>1CmFqSQ0-e@RT=quAJ@HcG(lS$x`>^h
zvz~6k$$qt!(wF0Suy3AKsk=9<{%J7cC#MuaTtuMvj=lhOdL8W#1_&=zG1g$p26NtU
zx0FJS+*75p8dO=M;kqo9Xb&TAf1d-^T@B(0+0CY(C~HtMkGeT?Uodd3^~|I>m^70`
zYRDASO)j2L{aDjaAo9;XkBUF*>{k~u`wVkFMCNd>s)yBIlVi6Vo$xmq2r8q0c1g;0
z0g7FDDS7cL4emP<dY(J+fo?`<iRD_JmU_;0&qsVsys)rD9(P`kr8dy4OkleK9o{W@
zX;{&DA22nxtl-})0*&mQ)8EHW)(AL<^9gdE8=}{|BrsL1Hl|#7t{WF)*z}5Bw@_70
zUX&v!6HsUZEa>B$b1oBT9kNr%iW-VMm)ujP+b~*yr9(Qz0`z~GSvScJNZ0>2HmX~7
zOaI@fZfrj5|IhmW52*j29{h6LetU3v_}6@ffKfl!8vwOn3qJy&R^sFk<$5#<JM8^!
zm$;yZu<=k5H!A>4?*rKE0Rq!I@&yJ=?)ET!xpzxjJjghZxee_E-W!g);TXWe_v>N5
zKfK>1SY9ofsA{`j=QbD&!hYoWFomSkbW5ya9R#CRaXh@&Dvf!i#@MLT!Q|ZotzL)M
zySxQtXuhvtp1a*14j=X^4dG}Cv;k}<P~`bYjZeL9I4r?PyelFc781CtY5$`}B`>%}
zLkfvP8nf46bUO_CnEIJfi&2)~bZ`X_Khp0u4Gr_Kv7ayqIx-LcIqku|6AmT>4HFM(
z0J%SwgVg3!hdu^=0@Na}Th{t26Fk*fnL4i#2S5)5-btfMo@XL6Bw%+w8qrg}?=wiO
z`OMEovJ`&nZe?8Ke|}w(=XD8QQJe3D$s8=CJ;BIipu8_mG66Z?oXjx@<E223VI;gk
z$+3cxU!i0-&if8!E{eE!zSKu4IS97sXDND@-}k!|Yb5hMOtHzbu$L(o;CY^==%Czh
zQ;akPau1Cc(^i@@JcaWg4I*?QF5(zHK_9YwL0$C*?WEq|(*tyfz4ZpyC=wFKXn&ul
z>$j-O3~u=KSNfa6#*{?@Qc*~5fW-GkYVLjYr}hP>-&cQ@%Gw=ojljP4H?`I%>}!7;
z?aevz&7TCtoGRb^8N<aQukR?6xY>0bWd?jfO&}Zy)853VHxZmrochBN<vmg*ENrn=
z*blEKRuLJaSmx*u=hWAK7%f>uWOJ(dbFpV}@hmQ!U2FVWc9k=(U372A_Rj8z`RD5G
zS>Qc}z{3|TF?px`?UH=vPbYj5hx!kPWHf#yOUbtfM@RU;;{CzvlcT(1GU@%RQ{cH?
z5~;6y{33HsfW&Eu0P$;XfI3$>AZnj;gVfp30aE*%8>Gel4yb00B7if{HER^2tG#d%
z=xbFgMCTk`II5+0K`+%}i`dC-W}O0DtmHrnT=-lVBo`kkP+5(_fJxe<fW_~HfifYL
z0+lMHgH2NBQz?taa4J;S&X@*~9U1;v0t0AMWr*qYDkON5s_4=wsg5palImE3E~Q!?
zXfFAahFwZs5*rSokW4eR%CqY5HJeq9)?ij8wxqOL`U0|+h*pa)2%F<Y>hz>o5e0mF
zb489wbn@ki9g*31rsxqahxrK}lBI=xm?_kReq|vg&V<oFcgl;=9O0YPDE9ozjA8L`
zCn)nx?NXWmc0h^0J;m<BT!j7Wv<_qO4^DuApIR7=*xbII^F{V7p*gY{Q*W^Fz#C5`
z#71g-4EcE8`P^Ns$kq$5nf=(EKUSB5mnD9E0Z3LX?S$8~)Qlp6WSO1f;}y?{qE95n
z^uW6yLFYEa!~W~=-uo1cd%;ydj4BHZ`9ZiGg`>$WakoJ1x2k*>%j=*}Fssz;Dkw3`
zD)kS0?wB4)-Rk0Vt(J|0QNNyzkWs%OLnAN6JSc4P4TDewWYKgUR?D#N)Z|e|o~Vh(
z{UD4$RqTD@7dh71JpvRHOD-1-gKpcUd#Y{>6Wi3>TS|1tr~$_8JAI(eR!*yMPlhA7
z4ZXj}q5g$mT9Fqn*iX7}5sX9aT8pfK-vlsr&4YEuuVLJ=n->2H$3wmppx&$D7*zHE
zU&!Iz3a|dT!LLlS+Aaz}U!u8dsKaZoj_>i{&+GB<ZdcUxyaVs0cc^|IdB@%>{!^4r
zhJXY|pci=!@3XhDYt=zO6#!MyfmOu+>F_JiLk0R+(wLj9g*MO`LpkXSWX-FU=#9H;
z%&Q`9j(~#1wxH0mkx*Qu(V!^Hg+n2kskx+9J>M?ku>Bar;|CM4hMt7NT~j(>USt5)
zR|f+u<)$XrmGeOtiy%amyb;mTJ7KTy`!Vvb>e~c!HyB0IU9c7>7Z9>?jhWzeX?@)v
z1|Y8ud8A%32;{hMb0a6oW?;ho3r3l653I#Wo{pUE`qhvFoQ1J79GP1v(MHt-0KDdF
zz{{-4Fm-MobVU*$$)Y&m^=W~!+l;X7isv4%@bLzjjsf{Bj6f0rfHqvSyPBq3j1d+y
zzs2WxF;gd1j(%s_7x&RQ;SAL58P@XzGvlU(0bvkPje3SuQgS3lv9*|d!(r4d^2B&2
z6N$J`iz8uB)Fg5oiK}rKeB5(0(#65)ab7-(vC$%#CB`d{!V!y|`iz-h>=dWWaF@NE
z(C5{Nk3~ijW~uwjbO5^wIRyo|{!)s^kF%N^B*RBWd&zZZq^yom6STX4c&C)T&_EqV
zT~l40-9R#!m8z2JE|3T`dR0=})lR2WaeY@iV^YP9UF6*>Q1<|9st{^lo1_|Eg<pn)
z?k@cOWpayxpSA{H+#{wt!=;McVzelJJOGi?z|pEvR3A%UaaOE<h^r*diVYTHP@CKp
zW63(h7&f!QEb8$It&#QCGD0c>H31K6_GDP2*er@Kb@yzF&kgsiiqFl|*%h@m=A30K
zI8(M=>1bQe%K~!bd09cyViuN=Bua^EnZ^~d2$n)}TyP<Yz1<BziOcTHIPDF{?=YN3
z)}8c4IUs;s3;`mEu@V&x2^X!JYP1qtl`HnT*A!<M*H{CigI%##ZGz__5nshVGvKYU
z(n=CX&omCja(gM$J_tVbZiIn79!_s=HM^WYs#C56T1i^>%9*2i4w5Soiqc#)%)Pvg
z)GxMNq!$36#Ee7q7KQ_4KYA5&4Xk@jckd^*6YZ{j4NcArdfE>XPX-`1KK0-)p;#I-
zAPrW8^wya0Jdm`xGawyme3l1}WGw^A2{YV*W`%&cuIf0z&VXYESU#i2Wbo$!X{m6E
z72epJ4T>^k>ib?7*hwxCgAa5iJr^6u2HIQk`DLQQMY%|`kuii`DM5VA4O+>g(%zpc
z6f$?jr#*vqMogY3WU}nlc(MGDft2xKC*RoF{hl$%$`NQ=B>RVa=Lc&89kr}dB^FJA
z%WC{n54>p!K<N;Eg^v#qFwaoPAC87BqPurLW{(^D!wyMo@twWv<FyiyI1~92eIDUc
z$8E*j8cPyMZ2>5OL7RYgAAE_B4h$T<T}{M*HUU8fdE)c}Pn^7;y{fls(MH>5_T0m%
zA0>j*#<i<PtD&%BDh4I@lq|)J`VOVUW(=e4`onSW2Gg_}+rN%5?ePp*1NZfqFx_*t
zl{2T$<GQ{1#hZ#<pJF)o)zD3IUv;qJIh?_^8zJ5e4*x`T<tLnmyg+Zm&k8A+8ejfE
zB^|%2lJ$iBvjh#R+7==k>;oJ9``?}pGUn3x_rIkD;3weu6XChMtU-n%V~h!|{*tya
zanZ*FsBa#YmTdJ>F&HQlCWA@VitCs96p*IrV6ZiF;)f}G1|Xb#wq`L%CxhPrIzW&J
z#eG?-qZQ9_s$sdvQrM04<R={2>FkCzCA;AdlijdnJwXd}vKuzg*^PsYxhE}lbKz;R
z8$#0A4Sy$`PtC?|>aqs8F4{fLHnVABTz2c>r(^TgmC6(2NWbTheYC-pY>P}o@q=-j
zK;t*T_pF;$C+T!sY1O%wts3T5VcT0)b_AxC@F4jDqi>la!FkK$?ShgTye*d00M|@x
zl_S&R;)!iH>O<06VPBI4x3wY|vHsqh+`b(~Q4g%y{~L~nY$&dUA*tHygxqoY2M<`!
z*mhOF<OKZwbuUxpBT&^3$$B5sNNe<yYE^T#nr)Obsbn_pF}=*-5FMm*5NC!j;L(&`
zF_XSFqQdHLu9r=|Ht51Ve09jU!n&>+UtxteHlJqaFRalOgkN%uZXt@Xm|bJSHJ+8K
zW7*`1nxo3s6ty2mUQjQSem9O{9yOfJUc%9T*~<rlCdh95HLf*NO-pko1e7|1B{7Mk
zWq5)9YK{yqZe7Hbxh5uf?DWFZF>^<_oKe|LP<NBKXU&|E6;1Pv^U<0~d%f~k(Yvv-
zwC<s={TDhu(p@wc8@g3<G1@%(xxt9Bv+J^#4+iH%qpetFrb->-MJh9Rk=%#cG0tMR
zQSzgW59_;%>bZqv0zNQbgJJC4|8aEg2h+QqiDX==w?R0k>TN9c*;Vhj`Yq$eF;!^)
z9dHBP9dFq>dF)s(uMRb5DkURiPxAHWWtQpC_3{y405DKegsCN!@S0IGq-oT0>(sV|
zgjb5X&qr&k#Hy>ECUj$M4ilcqYKOU6n<>aiX@%!{3#;16?L60mSJm!hklLf`QLElK
zwOiN2Rc${bTPA(5&R3(F9+d$i;o<4Ra;@<&${4IKmUiv$A(e#VPXS(Z?tk&ZXwthA
z7o`vu9j>w;T{=;}_3kMu9xJ0%AMy?|_mSZ~7@^!S_d<$gr`=f2esZ9{?7$F=cs)ti
z=DNcv2n2yfoA7<%H8A*~k%aB2_lc6Q@zbC@dE7Q*Ktv|D0zkGifw+AZ+7#-uXLRVu
zUZ>Nctj&8_GmfRqES^Iy$>$Lh&Y3=tMI}>wn<ts#+b5e$F)_`W6d;EAwx<=3OaoDS
zd>Svb9>O}G(Q4)Kr?bhgWiuYgroM?a0PFtQz1SCVP!OYlvyFL8k~+#fA$I#w@;0-~
z^}NlDGorkWY)|+MZD;IP-E1%>&(tQZX;1hP*B|XUmzQyjQeie4h4KQ8Tr-A=X_m$+
z$Bs2L?2YraB5Y(JP^^_Y)3x7Hzh{qkmvW@#=4vSmu|H?Pa3j|6bMD0F{7;OBfpK5=
zqDf*JsPy>noApNBj{n}=+Gsq-e}9hu{sYE;e{*#FYy0)d#bv(m?|OBD&ny4+;G%ta
z_U7X9_}91R=pIwW?+?yT4vtO^0S2IUaCv;OWc7r#6y)Tn@-jrP3=?le+ZBSLaG3cE
zcEm)$!U%2mE>3VbAj>oG_z@I?p&f9pbLVSC0=%RvK!!)Q4;YC+?jHxk`vFcH+Do_s
zD0VwWF`r<L82nZ74!pbGXM%=7;aNllyx8AmX>lZcgb8Np5CMZpg~8vuD)C8$aAE#m
z0>;Cv_xsQpE-J>u!AA_8{wWwt&_w23DSDvL`S{LQ;LQPPxC4yg2>!yuPid>ea^u;T
zP1<O=I)i?1@&!zje@=U2dZgc3U9Hs!cE1PMh+f9DXuiNolqCcVKv7b5{6r&0Okm<z
zt{+;GY93&%2VI58MfeGD>&qw4G(<%~Hf^2HpMh-{{g~skjWb#4j7GE|6}bP70?ryN
z+Ps#TM~fvhP(kBCU#c^I#UFOe46VT@9!6oX=d_ZkKE`%XNq$u0xVLA(oI`Oqbg>c`
z+&8-xwyJSPLCJCeBbA3p%1X}Qv~);v5u~?4Z;UGvC+8-Vle$K%Lm1*zkDyjmMo{9B
ziqUvq5e#eY*UI%)*|Q28<z}npmnv@JUmIb4BF9u9kksj5XUi|V>%EjpQ3ypCorEd+
zYL4P`m$N3vsv+F+O@|quM0ZsfT$JXImNTrm>un&*X>(rTV~4@J-qm3ZG4K}>71IA*
zQI-ZRk5gX-6Ib)F51-8lNP2*#Frbh(Mo5@L%Iu4$4pkm|g$b3DL2p8-Lf91nGdv5(
zgG1)BtFe-Uu2sO@Wmu3ByM;4CZ-D9u;pc^;sMn{!XL9jXlNO(&d3SnuhgsFaPLIb=
zn+eP<C_!N~(VGtnH17}ZU9FJ}smV0VWMm1vI30D-9m9gcToUUU>vIs^w<SBW1YNuc
zvrr{T*F2pWe3spD1y_wIOi8*{WSq|BH_MP?GOZW^NNS?!Xca;%PLjk#bhO5MQJV|>
zHVXZ=6#BNTB{9iP4fw`K>($IrVGy9#{hZm2E?Ka?*UHf`d!7LO&fcvGar6y3x4K>a
zrh_ind$IjH2MH9PFMte%@vuqqZLx8${wGD2qU@qn3?Y0fm5L^R2&Ky@Xb5|by56M<
z=7{ai9PKbE<{5lsc%>=gs*R7A#BV{#z>sTnjuO#c`ZYfUemMYyvk3KuNO(gD`I7=Y
zgf9%dp>)c99Tjh|E>V~9Rw-oG@EV%<XQZoL5<KaDW`kxUX7G)$gx+6yWl<7qL?;<m
zZXtbV@H_{NXQb||G}Wk>k8UQRY-*UCNI@-4#&R*_F^esM8Bi13=3!6?XCE4B%<2m7
z=xnuC<BfKA-v@m<Y2^P+*LoGjc=cs8@=!DI9m~fsF?53_1swqn6%WRXX@*V=EjHgV
z<TN|z0Z~-<{58eseJc%fVX38(u<X5GHq|mO6UiusWZ6?n91-+uPWk9qy{gY@5(8d7
z7DwR-#VY;!cwAz4`*>3?de$ulX@6_mb+%wh6Qbz?M9sRCAj_W2Ebku91b?$Zq^&d^
zw3r#z)HICd8=e)Bbe2J;u*-snnrCrp0h?W#OkBqzt)M-T8kVEg?k??ymKs<Pn?y3r
zkCVyonnHTg{j6~Yon@>2m?+$)OE@gUd2D2S|IRD60Jx_ZLbsvHUVQYNLrUsqoDwRM
zx%OcMEq_AM8GGntgWnj5P{SRotkoFj_|TK6{1dBe+ys&^yq_I??8d>C1<^F{DZ5-Y
z!7P`gNhAk(%1THB8q1zY^P=7iNUDe{R)*UnHp(&w;SBVUHvWi@-vVGi?VF8zl#SD-
z6PGrt5%l|CW@9HY;TnCG)6K&~`A3WB(hz?ls(VR))|y7VgrN~X?Jb&wQF}USPlid9
z%%<AH>Etah=O<Gl`VUAd)eG+y%6fy&I1HljX^;(<`vb#|OT?C024WUV%~rYRGnK{@
z?eN5<LMCoqsCo0R54?-5uk=>dbM9pNF%g&1)zTh)yXkbi>!8=Ccp>6Mnbo9Qsl9>1
zXUVxIR&f^ASLB{pEv>A7yFJO=lH3DdO4*mH_OA5xsq7{TS>Dy;#b?mxo9+M)JJKg5
zwXLD5fJ`8HSaNJvVr-X(;1!+?n`v=K{YNhY7DuMXif#k!Y8O*A)HG14x#f8^p9Q4Z
zbQ*Juv-aOUcn;2Y9<^(EoHZ%y%!pHPEP@|!i_lKmq-wo_cfLxd|6-<5yN!@P!Dv$@
zwfi1T>6?!?(#C7b`8@t>TsFRZHCh0nW#+>#6<%{bZ<o`-P6$e1%j~+01#@OpI5281
zk2>-qHA8>$Odzd?V%zIeS$4@Lm}@1bj2xP^`8E`T_M`@zs0HW)&OO&r`gZ#yN1lqY
zP6hXYYMw3LvG7vRz6(Bv+MP-zH;~3Pr_?}Ri{d`i^p1Q6P60sSVP0C9S&@-f8y`iP
zabM1PvU-gZ-3Q<F-9&oS{XdfkUU5~x#RgALA4tUVdH*fG|0dqEoJnqpa}+k5(RkR^
z&BAmMx=f9^<mGU9dJKLm!u9akK7PvB52T5#zmn#?th`_t@EH7G)ds*S7}qk3QN?{!
zmW6BPF_ru5Eyw8fgS9zFl6!t1dOQoA8u=n%p5a83u2}B`S^fTYbM!<8=De@3pQ`5=
zXHMsYbvqzS2nBhL&lB6L^Yp+tOMlE^7Ld4$9c;$^Y|_H*{fORhC)~($dnCuxL2TEx
zlrDasJLP;MzY3m$b^$3q&)~eC>!MhOJyNP9?f<1mAWYFs;c1d5ouAPy!5{vmaCP!H
z9*}1<QF|cHooOOc-8+v)kcZ^hha^j;%%w4>`DJF4zz@N_@)lQav>%6$>1jAO$MVUQ
zEZ^NXpwuHW_(Rgm$?u=7O-Tb5{&P0|R{Kl&G4+f%=8vXi<UG)tb9eRBT^2tdDDvb{
zTvFYC^>FllT;1jSqk0}KUnA=v{9YH}Abehc8Y}$t&<k)7J}<yLR4=#yZzkXC0_2!K
zFF?i$tp{Fzg51yRFO}#;*I)m;U4Q-O^{2DK{sXVS{`30FL-c$mAinc&(`~Nr*cwb7
zM8Siq$K#T0lnQZ5E^z983V={iq#j_wrq9VNCvDF&Q*2eU2w>m)vQ8Yu^SVyt9u@oT
zp%!-XtURymS?7j7^Du*R%cmZ$k1}#MtUOnY*7OTYGo3#3iAl;4aq#-@2miP*vX(F8
z9D2s%#DZwffmX2-q;G*<vcG>eY!<XB<aCKXRUCD%(No2-|7371ZhbsKeAD%+$3u9Y
z4Fgj>Eal!ZlGrVEq$>6}jncib`0@KG@+NTdwHbWEja2KUU#BX9UfQ<e8;E7!kr3!D
z4!b<Ni*J)J=YjWwU~6wSzNxUeRW)Pg9jc|Ac6CMnp)ekVb74FPA06YH$y^u<r}iUa
z+@BBQ{v%<Wv_}<3KHVkqDC2PLT{8|O*#vZtWY*0pUt%IX3&wL!pU2MsgPFv7!@-RC
zf9lo7#%3)4&sOs}|IeQ>|Idc^x6u1XFu3vdzrepghCg(!pI5IBE>F(h<V^LWYA0so
zG4t>6g;_ZN4o|Gp698By9+_c<rBz9_71Vykvie2ZYmQ$><RDXFlUU9P3+c1bzHBO5
zcr9<&8?ou-tt7BNT;X;3*WF#7K#%=Lj~Hx$p%zrMmjsW#0>sI19NdI|gFVsH{?2N~
zBOv0x_fj!_6Gp&{9{Z28D$kBO4zI&;IOv4?tgc9m+Ez`CS4tF_lTwq5jH54f1dUJ5
z#w-WA*@~4Vy)et>fe=)MJRDFK*JepiF=t&*-<=9%nVQUw?0^WH58avuC+{FVK85D+
z@-$TAO4YZDjoK+w&PHk}vQIC;rp!kZ%=U6N=zn49oQLCx4U|FKb+J01bnSg`g3M^8
zEp-c;N_WHRLR-qUg8xcsEmx?Okl3?+@8udu5-*8WoU;B45v72%DgxIKX9W|LYmoR6
z0VnOUhlf7mV23l%vd7Z}m-s2nu6NglYChI2mEi~f?u&ZfAM|G3fs~dv+=lMT%Wmdk
zPluPB#TtyH{UkBZXPX@`;ecRWuw+9qWZ)x5#7NuiC2t|DO?=GS$VBQe%p;5WzQ$y^
z<q{Jz;jAp-nP-Kxv@_IZifIn9C3D#36@#Lopg{WawRYl!Ju))#duy+Sl|i1FyI$?5
zdh9uwPYd&rNBi9U%X@8k<|aIgX0s51#d4YBXFCDAHb9b8cGhzzQB?+ZgRav7%OBt7
zq9&F>eRmg$P9nxgHg*8!$sojF>+37o6Cvxbtn1mwSZ{1M0!Z0#^ac#X%l@$QQ9{sQ
zOJyk=Dyzzx9S(=%t_6>F^a-VxL`O>q{DI~Q<xdOl&B`e$(vk(TM9vn+!;v~=M`V}1
zF8x;*#fXwW994AbRTlMh@eFmOtVIB9D)p@QFbD>(qRM<8t`mjS=Mkpg41><C%Ka&f
zLY`Z`tG-mIohvE^Uu#B`3lo#ydLN*=Zx^k3H4Nvij|7WXSpW`C>=S_NIn2`CRX8Tx
zcxA!`vwQ=NpL(;Qh|Rg&V>o7J7Wd^0K!cKjr?qnx5opUE<y}Ymq)r>g1ZTpa-2v1w
zd`rt`3{THzV&4vj{nOz0;kcxan&oX5<F0-XAy3%X-XQU#LE2gxY2)1xd1HlZwGY4n
zCSyR^o(9IiheI*T7!XB41H@M&6guOk;XM&0fgpTD2sN27?n(xM#Ll<o)sa*!;>NYL
z8K|`CpwenlDy_z-v>KyQa<nFwxZ|=+<6@m-<2sl6x>uwda=)X|cQL&Jp*4<9uHR5D
zV4v;QE3j#GgGsQmUXoIf){nEzn|H5Yn|T9Nl-Rr&QP8eVKMV%bk#BV>i)H95^55M7
z*(vpK$fkfD0(dvQPa#Xp%ch(aS3I_7M}!-$dR2n~WUafyFdF>V1otXDpa9t&c(+5A
z#J2!#Z^Fq5;>2?7@-qykN6`<?HT*Q4rd4h@M#@x%)xN<7mCNA-rUBFrzr^XjqQH@e
zUYKZ0Sbij_T8`DHU9-bffF%DZ7`S6ys>Mc$8KCx0FR0Oad>>iyj9o(OF@6m}Uw<}n
zR#gv?3Ef)m8gkTdjWt|J8HebRMnjE$(uc2)sA`U=YK+J=9s%1Pi-fK~i6+)sj3uu~
z+11$$Tieg`)aII6Z5z`^rivFf)!JlzP}xNsLO8eVvh6SmbEk_hTULorZsB7}K`#H6
zHOVJQAvZK8@Tps^o{N8Z6SiUbRXe{59@m^5o%!r6r?jglpVzX>d(n_3ufGrJ9Hkqg
z=fXoZ9>d>FJ0XJw+riqu!tu~!=3t~nXQzx(uZKbRbl44BxoK@%k{ogxwFQ`i%ag<Q
z!NtY#`K9k`hHDG7|FZXwvv=*&gQMg2LHq6b*)Qh@r>7^-?db_L{M&IUVUJl=aMo(+
z8;*v{!RwP>-av1^rnr~>KYQ=q*2a-64)5Q5ica=fRsta;fpNk`doXrlKgTva1}A5K
zIX-#@X@H4^MwyYgti$`+->T|+PtRy{!Nv}pvoXx{R99D5S65fpg~s5X(dF5kr;qqe
zcP=>DC!BO6M>Yu{q@zjR!68Leb6I2vo}`qUP$N9-!z^Zx_!SP{R&ns-DOVRiz8nsz
zT6h=!a}*{-mI}{M8DhpR7+ud!LCJsb#pBa{NChxJ?df!R#c?SDR#;^N{$uulhoLL{
zWmgr7Up4EHO!~>f6NS!agxAWz6P45zyy{MNyj;0Xxmm_@h%;H!^EsF2`g{Fp_Cu6d
zR(mWpVeh3T{uP@al;|?1)tpg_Ru&vyWwqigkg7Ua5HjoLP-?P(z3<25keb0%HDmo0
zC6SdU0M!4pM-J5W`BB3%@)~z`)}drcRLRL05uL5JsG+%gQ*qS^Rsb;-J4*#tW!c+A
z<AjBYvCYf7tYTNzNzOs~-u%(u=RW*@qEQs|f!~uH51?ZIpT?8c#-`){^JHVA_2~ce
zE&Bg#vS;BSj!;~L@I}IQPwDS^%KxsP$MN}J+9&AM$t%$J{@;stCB1^o_Sk`F{Br-e
z^UMCbSNqRFi#>S(NbH}yIK~!(ZhzdP60K_CQriD^S+^qf+0>3jFT?&2jp34qVAeDi
z0zS|;i7$EfTo*;|gS1{gwC#DAL|3>g(~X`{F+jm(=H;Td(_Q;|)%bQ-fq#p~L%I)-
zMi>&<X7V>u<js<1=o{;9t_sfc@GKbjM`Dj%21;Q=pBf@=>G=k$kQ4W30-n5}uVtTG
zZa(c588P7NNm?ZMc&ONCs%m|zvx7cQ1Y?-iE4FPlWUR6x`l$X^j17?TEnR7>uFAuD
zA<dDXf=+4}?8-W+=0x~N1Uzh^yJ@ydut@#{2;Y*wCO?qm4inm0`q9U*e+%*!^*dO1
z*TF5W|DLc6z<&lT8ey<32$cFl*{SKU6Q?3KfV>OOG$?3xwJ5%6*nBgIL%uQwDa9Ca
zwp@BQ6ruBTb-9w8NTJtVhr?V1VVLI*)H|aSXriURxh57$cDXE(bhDvo3d?0JEfHNK
zP9rx)Bku&Jk6^iahud(nHuHEo`#R2lc$m)({wNXu@ud0GjsIwEK6yO<J)Zymis!$R
z=dXS#dGr%a^UrNcdaHAoeUihKJM|&%r$AJ>I)~|#E==t7mPpA<@~Th~b{vL8*kOPN
zKRVg{oCJgZc+jKkpEvPfbRJ)Z$t`hFcmvEe;NfKSJt%sbs=b?fFOG<)%|;z$#gp}?
zPd7H7Zq%Dke`sy~Sk<Veffyt|Fy@>X_0vz{HtTIX3b$pgBbe*)fX_n@4cF)$i}q@7
z(crv_!Jf$fCFZ-xfxl1tVJAA<6}IY=>fYXF4Sfd0HBL7k4@RAS81!JeI(Si*3RgX<
zcg?-&fX9%<I-P^}&-Xj~?+#veI;IJ|)9LpAgO@F^;r~7CD}%wo;hUq*o8w=&H#3NT
z?pfK72j~2rj4Q^SXVC~W*1@~*DooO_vMeqfIY6VIjV5T`;zORJChu}h4A<@w|3m&0
zKTN!9+p&jtI*$5qhM-}Z&KF~#<VLGAqMN2Fjh+%A<d%WHpBeq(erB}aFPc-fye!ei
z&u@O%>TGRJHPKs}`BS~MVNG=oOTp^G-~X9rLE}n8ksDlr+((*`FxY6Ff-=i+K6`21
z#uiTn{TQ7y%|}(F!E)o~O#H|i@8(w*QT8kRtli#dRkPpXCj)tPRk?`v_SALN003>Y
z*s5<9cg+skm3}P1sfEBlpZw)TWYc=^<yI50n7f{~Hm6=s;>`SB;KxI7<U!FrEMt&)
zgu6k|*K0_OL-x;qqm?aI)kwIXa~J3dh~Q0BFHDN@u(72WcdI8~`~c&X@;J&X?1#x1
zl}j5VN6Rk9{qcnm7wTakUA0fxX_X5PCbqDOB|)n8JdXj|MO1;$Ppc-2i3T|{AK=KK
zpLQEo!a^mgn%>MX>ES%X+}gATYJGVOXV1f*P`7Hfevn_oXV5P`7_lp6yYw0F1UYiw
zp4w|GXDuNuw>G_Vz%`opuL$C<_#T4_8*RND1QsRRo>r@MIOl~)rC<RQI~x%`p5-Gf
z`=89UFeN*SEA=;^c}2s16ehZ$D#)+%4yc-X(tEsHZ4dN`!nY_XHd-Y`Uhc$jr6Fep
zG^=cPmsQ@b3KXkVwl1-QU9s@1w^fW8FfG+n2rN$V-L1_Uyq%Tsw3cQi{ND>#4yqjA
z6)edK>!6amj0DBEHZ79pHdhEcli`60%T7$l-;I6Rkv!=|bVkKi-@ks19|<HR583O^
z-lJ$_x?F2sQ){uZc7=MZu?7ya<g%87p2l?uf7bFlXtuMMHEY^=r}WQi@gS@&v5!g1
zlaH8F)f8fCN!{h-&8=}Sa-bC^K*9sOh@Ns{7qz<zE;AjbJYBG>grWWJ)q2wxCN$Me
z=kJ#MTe@7n83RLJ1Xm$zkPjgjHoL6#e;ds@x0S&ECt*Lh0R?ar^RL{tptWCLgabBC
zLH7jSUjuDh8kiZJ=#gHahF_-qf<~AFIV?QVo4X<swLUvZa-_PgOj(K=sbFt+vTl|p
zcT7^>4B<dd+2XZo%{?eDqVtOoJ<--%RZoYKq;1xaUYlqVLnBFl&~nCMY5IQs-x`61
zio(`7nh5EGPKvO2EkNoD4#l)U64)YO!}uD0MT^%pS<MCt3cPNz%Qy+;bsR8YJnr|{
zX;@d@dsC8Fh-=ZsTC<voiYPx#Qr1XdYl7g`QnL5Qx41oSZ)f*7wX;d!&I*(?X#+|A
z1^?T&CB0gib~`mpXrI4k_t;>3$@2o&*<og>_2PkAZ|+hP>kkhf3Wl5|kX;lW+Q&#x
znn92Vt+nP9D^pNItw_~}?JYaUk~1JqwdmH@txomeF+Q!j!=)0JeCU`Mb)ykpH&bhn
zf~U7cNi<Wl;55DpIp>(&GMaaO+|V3VGib2m`39{C`AgOq?Ha(RlM-h!>nu=#u2<Pe
zzivt1879#c>|5Tn7+Hx(d|_gMU@Uy~!0?sSy){bKlNNWtMDzEdaP03fwArA=7fm$6
z#<0<1eDffqaEjJ{d4bDqzAw=!Hu+}?Pw7H|T3`+kb{!`lc}Q%JZw4vF$#!w_&XX`0
zQSfX)26kf?chiz6hf;xmjRG^;i`}q;b5O4By;*O~&MI%=Di@-~+jXWJNqMy=`PQ1+
zZT6U47~P=XeS@e~Oi5PgF>fCPa38Ujy~Jdo7yTp~WJOsJh)Om5R`Rl?Pb)pzJxFU+
zjv%-vY5jU&AEWbZyxb<?V4=^q#=6pRjwB-ZG{eF@MOLO843~Xb8uGZ7W`kH4_sRi>
z)C-5B3-=x)I}bb(Q_Y;3d%<hs%pq-fuu*T2R(SJ+xL@31Vsme;tZa~g*R2E0hd{Yq
z3<&UuLs?}NXn@2KnzjCJfZRPohZz)B{|I`jm;KW;?jwPCaeL4m23_hDd1r`r(B_U`
z+lP<QQ=!g=Hj;ldB}VJe?@qQoHGt6#*Z&-J|Fl0&)W?A6AI%jBL48aOlJK&E0BQ^&
zRGt1?y8qse2VZKy`|r8kx6JNaD+ZxN_bt2op4)pb?2iH^N_w#}V<0VuI+FmCWcb6P
zFLd;N*h4|+p%N|){xr$?D3>}y!-MxAUx27p3_RzbR!Fpgasj+9hoMh?gvK9DLR|HR
zI#-Z@Y#T6i0#jW`1QR45PMwU!6D0qml{41Nj36ZbFv<BQZzkkOTQt(19B7G=BEUad
z<wr^gv~Z+7HBdSIun^baCC&96oW}4HzVZhJhEa<@|D-=bU*>-=FB9cZ;SpyIxhE-m
zf3?Dq<7->)QA6lwL+KQT2^l}nZ2LR;6%;dP=85TOT?JDX#9%;ap=B;>6|3f?aZ~&H
zNzLqZS!xSbep>OYE;ch$Z@XLa#*rBAP4mT=h^zR&p#KjKbw9oihIu|f#r{9dt=7|)
z8~@SV+IaN;dG!DJ>-qn@-v9UE`;*S`ulq-(oPg~1r2;tgf}Ps+2S4J+Xr9)J&UW3n
zzjlcUEpQT&_?rJwWAH%zML^)SYFi`kpJ)JwNp9kZlb$e2!5>BdsY2mdcpW6+w)39B
zGZ6peM<5RQFhhKgLFF5#7*K^t9gMgobcJDpsxY|<b-!o;&2(L(4+kNcP;rYt#i>B}
z1V#11IU|6rb9{EjbC?LCg*O2v>e{BiIAM3s`f&iK(^ieiJRofzfm(q_o3v|t+RDs7
zZJBOmkAGB8VMaQ9HY(y{>%7RD>tJ5;z1~wD6}N3tTYk>uLsIdafK^+Lm|p{MfJNxl
zWqa}O6!^BLz;{#QTRKZ|VNPPbU=D}7kGY8NITx`*g{d+US27>VS9@x?RoTDPa)K$8
zkMq(27i22-@<rJU#omvLGZPo)A<jW2C;xB`OXueu_J2?|-*Dk*{CiVjg5jLO{+dLi
za8X%>%|1RuTH&089uu>8$|W{`_F-qh^Q9klskgBF!w%qi5)hl9d>M$nnTAVc75<D_
zfb$S3H|%A79N$yNabGJeYsu*$EmgLB*iX^?0IM<kbuCx!wHg0R+Pe5(cxMCuR8~;c
zs)?>;6E~H_i<_`J)@g+~1ULGNhdl+pcPbx>G9pm7R({N^?4l~4DhlvECc*4sG32*!
z{u&5BWqxaUQ8h7#^;{XNt(~V#rfTm8-&}_3{JhLlWMy7hHfF64W|A+KO~X7fb)#2_
zoI82*Wb{<)2bSVlrB6HKSDCXUTJ~g~(i#2Mq7P2;xvw%Q`qz`B))^YlRpk^VElOBR
z&F?cm?LB2%^k+$w2@);W=+Bpw&_+rbVflx<3I!L@drg^FE+^Z}NzkM;pqGND&8;|J
zN-mR(&4cJktTsimv)mkPuKv-VUjI-o-Y!0+bPwhpm=~nM%mRktVSQ&>W$00hDApo!
zm59%#2yD~DR<Mz=?@!u}OB))$g?-zByv$VE6H~mqf*sF3Yi)DS!7of{?gr$b86_m4
zUk)(<K-=pz8p)0_8;+cHcjU{=K-R)ip<_VY)mX_)#1jAcGdbFKU5~jt9>)Fuo1mM-
z>FoJG8m-NZO#Ro^WBu1}wf^hl{eRK@|C<*l`<*xY2k#D#OWpt5?fGs0I2GyFFL7yt
zcxNd<2||{Jec`tX&*Vh*RF?L132#kTycW-s8h@jH)Ei2Or!yVEq&y1-@PFiw&fP_M
z5-+*8QUVQx)#m@uu5=d1mFlNEg~4eEvmb)y(=I|P9>%nL+qfP7^y=;Yi_VL;`_Ep#
z=)C%=;=`$0gu^;%@*EEA(0`1h1aP?uk|^lmeXqbIRTIknq7M!_oyjRUNsAM(Wg!&w
zf~RG4-tD5}_-6env=dI(Mlyw;-@fOTW!(!t>g=C%UccBsKB;Whevl>`n|Tl!-YNfH
zx<YHu?1J{9wf&F7omr}PIx2j>(_vNizqTFMmnVZ|zsfR?o90v%ym!}&e9l!HG+PNr
z^qh8TWb&8C{lCoqKP=k(C+`0ntyaVJ|J!)-<mu!7|LFhs7u^3#`u)8r@9+0!21mbr
zkuTG@h6Zh7I7CM|7;|_7w4vYex(SJgR=T=&yIMy>g*vVhHf>%-p1lb^hDR|8{|?2Y
z_IlF8PJ07hpe1t1TgkcQ1_$}HPJO%@X%M|}D4U)I-4PF^MAihhksqU@KFnUiPF7UT
zp?*KU4toOL8J4I)XF-k=SEWG7A;$!1Gtn9owx)0|3;%$70N_LHTaM#|_aWo6doj5a
z!jzm}P&<n<M65b{c8e(%@Y<B{AX7)g3$K~YL9s!QFe=$cn1nEP;8RS@*2Ab^K|~IW
zl%N`-e+hcgd3FY90OX@NA}xhzb()01M|gTmohUBo;);Y-g9&u34meJAj}i2!4~yoI
z)-Jcd;3khThCtL#tqkwTXNt)S{Dt7{Zl*`wn+?|W8*+`9Qvo{;`)51;oI(za?QKL@
z8BV8g-@!hz4m*WDMzYSXjY6@b&TIf3+uw{>xpqMhm~wtso)3}YCG3f)x35eXfSIl;
zKP?7cs#NqxQQF~m1)hP^^J;z>8bK1aQqV+{XAunrqxhQ(zNG<!6!(tS5<802L^Jbq
z>to}E{9TyLMF4-sw?V8Y(D&ua^0nm51vCFjq^;P?t}6Yu>^d~AcpM)G>wdz%p{U@D
zRwT03W$~uEw<mv-Q>5+{G98;0pxp}q3}=>aOpgJ4&acIHW^)GvaaOVCgf{b|QBtK_
zf4YB@PioesS4Cgu^F2pI&5}Z(i8xNx_9E`L0;yhHicmW^V#87uyLS={K1zj5qz*4g
z@>FTyC5yoWfBtE4#2$No3-cUxOBQXrTPkye_n0H<^*U!rq+!}#;6#XWrl5Ex=}56;
z&FBsLCVwr>u#;!c1?K<&m=9upc@jOqrji-qDr+u!^4;W$_cwpjPSAsxjD^$tekctD
zX&S$`CexaK!P^$)lxvkFz{489@%zR!w5W+MiYT)2)l<ayeRF#1)Wmm1G}`>8Xk`5U
zWLA3B#Mebsd$M4vN%Jn7&R3#{a9j^{iz*5f+osQ+=}9?(T-Fz#TsL&HNQ(B`^|QoW
zqw@P6&OMH)k+juhS;{4YhuFQiznF`Sd}-*elhDdtPMrC1<1F^Wtn(&2Q?FOGx<p(f
z|9eo_*>8gn(|0tVkLfIfN~siVA=S*1u~YYAo-~D9q1g*OnUzW&$k|Z{_%n6D&(b;=
zq*AR+k}j%lm2?(mrjE!m*`vA_VfP~edKV1N!;1V~M|qsFO6FK94z<hDRV}w%y{JuY
z%_hA%Jd@w5E_dAO?3{YP5fEJw-YP~uMSs$lZrc9LxSs_~pl@cpnToA#tx;}6bBEIS
zu(M$Fy$;EGB%HUTCCe&wPFeXdUk(3jS1f!t=EpM$U*P;bVf=Iq6U$FXu@EhN<|F$Q
z{wjZ6=BMmeCtUUQDsxU7W%=U%mvNp<`t-@9mPIFXY%tclyU!*AJ}0tsm=Pu|!-izP
zsOjUZg-m*cIlQ6<Td|dk=7$B&6@XvGj+{OkAEqPmy%AUa)>;Ue{WQQNUDohhAAleg
zqQ66WtWn=E_;0!y$e#j-`bFKhst;~%)NdR2w-`2DYHV;SO+9(lMK7Q7bC-uL(Cr~l
zs`_p7{-$@+CCnx#*y5)B0nU5g@PN?OsW&-gPnm)-<~Zj(o^Za@3CDM~dElp-3Fnz_
z@+5N)XBdl4B9=`ielL4&DX`pr|5n{%$=Vb03M=4u-0J>{!lI&X6b?qE@3OvE^TgK`
zd8?;XK-$QkP%x04FLtD&EDK-7dFtLUo#QZ7?igh;_o7t$Z}kW@?cwQOh&#u*>03KC
z&3<5dJSsh?qtYS{Nsq^)uj_c^XN0hSrrBUmoQuTM(mBAey}ZLE-{5aJ&00>RJIf*E
ztZCIul75AW)KsYxR?3f8Tx3%F!h9$5#q+;;jN|_n;{U=?P#gd_A^(4?_4J7w|MzsW
z@fiR282|qljsGtl`0tMZR2wN}@4|C<TPBt|fIrwLe|qT?GgtIUt6i>o*pf&J)m&!X
zi#Sfh3ih!})z?(!S9nRr0rjA@Y!z;ymsF%>(P*`7w0L<}11|$3VY9Fh<By6;$m+~Q
z#_#`bFO|cR>Mf2{_F<;d@cf(I+K3oQq)ytE$_*%~w^*qQivr(PE!x?2qh~?nL?d!C
z7b3#aHc$+B8QgRRohCuFLU}|>{0Efz0qGY;xj*N7EI&K}L{q0^6Z(?KmPgZ+`l!XV
zp!~$JI)4<~HuN|SwllJ#&)BEpEZ^RbL=c}&s}9J6q+^&|!ouFH+#9f;hhvVZRg#-e
z^ZVl$ou7~1Q#o$C8aHOEupS#36|l(PPe;wIs#S6^Lx$ap0QF+kzddSv*%E{Cgm{U0
zA`7b#R`oJS_j^734d$>WA03ZR-A8Z6efQDZ@ElP!AMKx}Zma#|oIh%qty<2bHz3sA
z9)lbA5lEHaoez)0(QCdLnopngQBUTN<g@3|Rc4%j!2WALdY95O^U;f&q5I+M_}ufT
zIka)ZM-BVYQG9KDct?DvGJJU1>G<CVud7QFs?-}y0#fq$w%hly$jL<ZXcn+XBg-C*
zEPFJv?9s@wM`Kp@Xk^)=F%NrWCV}*y@euU0%jh!dru+_L7(@vJ<wO)vjR!2OpV!&o
zs@tvm=H|U2)_zgql*6@(q`sHCF=M}|{m+;(@xC*1vh2ym;)TkWr?aq>J-icA6$~t(
zo-oNU2VIfb_veGmdF(ytByyAc1vY=(-<j?(wR<etgJE}2N%i^j<RX=3H?+|$!{2xt
zhH7Qib9^U!uzui0T355e1ajiJoFbnAU@Fup2ND+SsbrDmI}H}pyBTe7V<UkHHW#X$
zLb7VS<Ji><OpSDa6WMfWwuvUaS53j5IR(4W!7uG)L%%Vf+Nw_FrY<I6kKKx2VY|Gk
zOZwwuHK@DKG@kEf9?y56tRc>#93mdi0!%)INiLL3AyLvD_X3eJ!>cI0>fI<2LRQ!#
zlt!PXSebSOVg<uk^$ooW8?*}N2M%cCn#X^pUhaH#$efu7ZkMQV74+pIk*gIRc~>hw
z*Q=HKA&g{G!b=`Fc6gdAeOfdUa(RndE%_{{>Rw;m6p&{@+MYv%T6e$t-uzWsZ3Ek+
zqgdYZxAw7RMPabaqTY=trVa;)KAf&4m{)i!+;&onf5P^^y-GhI&UbDORL>9>Hmb&?
zk@!a8qVGU7(w1X_9yGpUA|0MTr-B9)x<gnuqNcEx8rmfSfsMH7pu7w@WT?6R4a#r)
z9?4~RYL(yshfKZ?h))#4=$PYU8lzi&Z2EL3X}shOmV0I%><mpcN;{`P8g}9{%5!Ta
ziRfN{o>p1D4b!T+z<1sa;_7d$4?DRrde5wWpSML83+no1?D#dRLNL#st;pb!9rxYy
z=PMPl$KLJa-JP7Y&2$+s07kBHmFFupb|QCZ{&EnDch6QR4^P#XYJ+Ejr=&j8Uxqu|
zRd5iy&uY)xsHkBQjB1tVz2=Q>g=-q!7igtcR1C|zIGAvAz>Qyz=KF_W|BsX51!#x)
zcL0U<|7N50bknu}Z*FZo=Kp)l|Myq4{~x|PdWk-<rS1T9<5HOb--YQ#fI0l8&Iia}
z!lo?%i1c)_*;KVw--g%EjUv0{YOpHQ4~M%BfX9lg_gs<Hs3~TV-mDdtUz61s9)Ffs
z-n1-V7-dXLa7EdAtR+AdS7$AYzEyLx)Btj7xRzQH`4wHud_GTo*NOD-N~XfHucfH9
z&?2x#kELqBe)%e{HpcT6adk$&uo|v5+<8j48jySyT%}RWVirnrk$Q`}iliZwyd*7G
z>NN|xIDbYuAwR@dP(aoodVc*_yU~>OOO20fc~bf>Ts(Hd1k6`E)<gSmy>x8OYL*MO
zd}-OdJ#Vg(vjw|eneuHVKKGTQwJGrx)OsyjlkG2Dqt(fY1<l-iSbR=Je#sFgV&eDk
z;&{x@e;A!tkWo^9CV`2sqrnhn%1kn8h3>jVjA0S?DxQ+CWL)$%b_Zk>?0l?E_gGW*
zv8L=_zNRew$(<KP!*<20Gh5AN^e<m})}x$EUww9Z6=gzI+Nso&k~M0lf?2v`?KDtx
z3fMj>Q-2L*>aou4W1ZVM>fC0pUXDTJ&2jOP^3UhFb~zpe$tdd&f{9j3-@=GdU_hh=
zO#pekiTQjtG3Ep;_#$RTPBE?~O7cnByP1;UitlG;9F=8UFFmfDE1S~mC;!@}EQooo
zZl)!bN6;1C<CG@De3v(>oA$l$xW#ueCL;6Q$XG*K*nNx%b)MT81*hmP#+-7M*-gDf
z4p=7RQT`2$_(ml@!)NqGg@0G--o{2FHU9D*-b4kd!qgtwA^1Sii-&g!bRYK=iXRq#
zOQAr2wRaQ?CTF(EycBh1!^0=)JU2h4JTMl#yRg*)Ru-3^qMOSQV+qd3Nk~51XzLDd
zcnMOpn8*E|p`?HE22}7fgll_|mX|l-Wp{Xcl0@g{Ve*rj6eHro6wo=XE8gQSh*yPW
zr_zFx2Y*o?j4w~%MbIOY`==a>s}-8URN^9vig4r&TBh1YXVMs^l9TXHjRrCVmREVb
z3X@wDXs8bJimN2GiQJboWcCr*E$m|i%g;yex3RkENESkchkbHZ#Aug@7ls8@;2e#&
z!zhG9l@)Cv!e(>lYSPq5FLfg{cBa`<-!Ct44mMYsjTZmU4nMI9x9q!e*PJEw!?|uf
zC+LV}ZazzbFb#G&ConvFxi?Py6o#Pm@yoX60V&Bqho&!d75yjOm7R}+1nUcf5DG@J
zATb-kF%NWxL6?}b7Xrb06p955EP<gB0I%ZC(r}bgZC(;Z^^ECoePs&c;mSHU*gZfS
zV#ZoXk00WtD&L-RuUUmMNN>da(PK;N)aECYO%*eeC&Iws9r$P0&xRhKOt!ijRh|5{
z;-b}ar}|s;0rT8yRPn(o%isu)tCkMqXsVOv+`N3G3t{rHpp2s#tah?$HSmk^jmCFJ
z#nhS!Du7t+3g<E&lH$ITDNU=IRJiv;7ssO{>V=d<P6VN=nL)V_(NmfcAtD@#X8=VX
z_#mxkYdjU6q60nAdE}O4qU+}|N{?s|jiR8B7xI^#GZ@4nQNuu0Nsw#$AcQ>=We(4z
zi$%^QYO(v}%!9NciBUQa`7n*Sv&PB1qajGH2Kj0*dLCUx@SBLbt7H+VI(hi)<blo%
zh;bo;c@&Uaz#3~IE!wmc*Vd+<>($I$ug*T#2HrEy`NBqCh5=Ty46ypy7~rqr1fs$5
zgNk#@_*oo8?%PA8$Tj5c``53HZQwgy4}Ow&wv~K`%EtdOj*{?|T!8`RO{GLRfC~O=
zdkpAu;KW4$a|CC>c${`<Cr-`fwTKPMS)`3(0nI!Jt?V&p^EZmi5RR^H@SS&m#<ga3
z#n>sF7(bfO?v~u&GTB^xGH0!sD{&s+h;&7Y_o+IJLa)*2pD8<!@u<$P!coeBcphB@
zGJ6wut2-Z~SP8k_1gO-lr1cZdnDT%*;teWNAGK;;$d?(~S)3)kXVRpLrX}%b)nfRa
zLO%35=2>2O*lu+%JBc>o`C{VAGDaLHUG4=GnT3#)>RsnFuT=2rccn^xVz3Ts_}wZ8
zQuDEm3{vI;H52^axv75X?BUDvEN`|GegQXykJN*8P1Dq-FTSLcnm&s2w#PgzlxDN>
zz)q#+_R;32sGX(Un&t-JHfxrwT<*B80_Lk4&RVn7N|1r}a`>Rdh?CZDsQY|s)L7$g
z$7*7{6@CXiE5xcvo;B9ITik&tM<wI}+9RK><lMcR2F(`78_pT)IlA6X-uF%8`(~N%
zEjmJvAZsdw$ErSklbm14s_i*YnhHvDS||#QwLMOa6_3iC{h%p7Xcm5Ow^T4@<4*(U
z#s|Yb?-`@<r|&Cg_byOHIFC5$$Rl$+Xy(v|!daLk^Yxt)LN1iG3|1hGY!Rl}CbMlM
zX@8_mMy}`bdl0&0+H6;xPTcDwgkxfAv9`=5_3G!hup9i(#30X{kWQ{@{HDjI=106~
ze9%LYp5cD1Gobk4J^x!{8hu6`6HsQQiq7a1ZaA#3Mpc);@}|c~cG?G3A-gkIgyL5?
zD-Xrz95~&CU%|~;RX;K^50$L+;B`}9GIj7IOvfgho7y)nDgNF)ekzT*@g>ik*r!!P
zW116}7{cnFB}|KDsfFV2$b757(!bkC{_ghVLG<?zqy%OfDmFno!;bUv&iUIng5NzO
z_}v-7-MyTpm9pkPO+SFSTBEs=_<e)Z<On#vvR4HHJK4je#6A-)Qck*v3V-qxS8%+t
zj~-M=r}1Ep4qUl|-^=TDcogEw*wnfhe9E<|sgx?&p*V%zyT<%g(<Dbh6ilx_SM~;P
zr9l^y9o2B+ZGi<OL%p4X9!)CkOc%FhUoT6O6~4(0=gRR)5_Fs*-0QwvHD+|uC>j5?
z0Oyaz0L}>m`_i5zkIo*SE$Hhn685!_+rveMgz={n4bfjNMr^T$>A#i<SWfwTw!U&u
z3M_sSfUOZJew@MjH#0<!B~Zbd&K@}Fv(i>UcP!Hwj&r}gXY=tRV||5;HH*=CB8&x@
zo~Miiw&2Q`;8NCQcf-#&8yjZT6#LF@_IZX@jS$NX#I`NZylMfrQcv+^)BMshSI=yY
z=Dpex<Je+P8>)S7UvyBKH$-|A4}DK*0hA7%#^!Kl$UQokohaCk5!K&xMD>h}WkwXw
z#a5*Qu;=P!wz%vWX=4Z8&PkoTFzz|mazS+We2_~Aec#_SSh3*lXo_OO=cMVRQ1QIb
zXSdt?j}^}g68B&-DOh}(K}{jGrvN)wxVQ_iVjQeX#EzSxDK#sUQ?Uv*GWbTmC+@*_
z^h<7_xfT0j{vqBf$Q9%FYR&L!7d=#DPxla-ZoV%SpYA}{gYW`ZuV;bH2k(W;4e4Kf
zF60ak7sO_Jnob*|?SiNkZ_}SQX4|*|eNgF=OU==o=?L;B=F6oen141F6-#ePD8HO`
zM2*#mB!wX}WK{UfVag>{n)$+)XHsWMMK#X_d9Eud4(!2&JLg|%8E>BT<L*b_iy5!5
zzRs{@xg}Fzuh^Zfyw5Lz&M-f{5RQc?BE_x?Xr}D+mYpYXkT@In`?r{i?vhFY;mp8J
z9AD!f<~6p6dHGM6#0361T`h^bb#=9FN{0i48TM+XS|Xn97?IKU6BM25xWn%NG6s5m
z-w;X+=AVcL03_;tZ-muYU$q-mZLac+O1M(u9~EC<HTe_fcwOxnfL+RD;2IiTNw9Zl
zn$*azuTaj;$P~NKe7Aut!0E-yP|h*dwPqexWPqJ(z$IbdEon7f<Hw=<$MiWv-{p#=
zB8$ZM-bIP452fmO6pHv-nVg+cwG$Nbel@-p5^G1Tsa(g-Xmzz}`<Kd|iAKJeSu3O+
ztr`sj*V&(cvWj}N+o<B|tJr*3tyay5W7}8JaCHD`K$X8`_9$gYmWRc&KonBz7Sm_^
z3-!+`(<Au@%L<Txu_nmrhQZ5@#IaF8_s6O)z)Y$@cJ2e)Vd!4dALya!TLANJFK4~z
z#+-Fvm%57LaSE(;l*Bz$p=QiF;DNNVmzPGtAD!<5Va=iWx#8KGCmP`EEJ!!H|1Xfr
z0Y>=t@Z`ldGNTYguy^WZS;MSMDH<l5s2?EBP+xTY^|*9JH5-3Vg7jh$xdII7z!&Et
zvqxy=(T}sM#rnj@O4*O6UOi><HcVbZs}FMtJ?K0Z-x~Leh~!f|JawqB3+dLK%K#mb
zu`Kt^XnPy}imZ*t$X(Q~G!2XB<R1~w-3KtQfjIqx02ClRI8>@^eF`|9!0ylI##biM
zg!uk#JUEb5z8|DJpin-(k#r(cW)k@;#rA(KF?I_GvHJ@}+3ek8n2n4Jaa995%4Ft%
zlJdTS_ZBpB-y@t5{vs^2`OL?SAS&PI7(_e&<1!kkUxOQ`!j`pfQ%m&(&LtIiTq9tH
zaQM~(aZJ;s*_(a4isl%uu39Q=5db@?C%2i5?BV6|C@e7=uz;QQgApi_*r71z<X(#)
zyXcgWex)sQdD4F<mnVz8HYpotbZ@sb_u5}xRMJDe8T#UvHFmtw<Zq4kk8w$l2b#|x
z;Y4oBcUc!ca@SlE>ALYXM^Y4$dhz!7@SXhU&EdPFmzcKd<<j5sf5;-e#{awZH2mSm
z&F)sW_ruxxc#w{V!#Dwrc%6D(r%u)Z-*wK-;-90SheD@*k=sWDezrC@>0kJ9{@vKx
zYP9~=+-N-6Xlyo{o2|b!TAQ1##@|@uZ=d03j2k&)bHi~5_m%umsoyANXLu_Q{|+&u
zetgCmtp*AR2=0r~b%_6E2dk?(@U9=7CP6|e^(mKrGzfu^LH|62=XH3DZw}(&Z9?Bx
z4yvrxXl}A+;UJDsfeGP@gzcWv-}RLLT|bZG^Zz=(3?f`9mu&%ua6FF3Nf$*$FJwQ(
z$tB{Oj!%CN(brzo9u5=Or9{2qH}QX>em}tPkGL-I8f3%(jucmQ)~Yw^?9~~JunRb<
zPmW%*Yfy^@@rd;zRIN_OShf&ATt}k|TFFGK8vA!VMlS~zpN;}h)#5>j4h|Qi(Qtcx
z{gQWGkCXFtfVmF6t=E^Z#K1t;81Svyqy6X4`J(+hGz5y2YP;k}2oOKn5_Dpr&RT+Q
z$Uiqq(6N0{d?6;64=k{tS`oDZzNrQ9lJo*TVM40Zr><Zp2H-7US{QpFJ_Lox1L85n
z;+{h+3<RxL3yQ6nhARvo{!%<qHa?;hu;MU?X@+6>jh7rqPAEgkrI8=8P!7O6)#yr5
z#v|wo!wpJO?Oho34neMQ<v&i+IZM*1ZcBw32v|CkwbHi%uRbEz|1t&b%wfz|(9R;r
z!V21%Q0IwIP;iP0+GQAMS?+>#L#Uz-_;kxx+`yI>d0+yln15Jr2}Z?+SSrFGA7$UI
zjM;y26o7AxMG%Eo-opw4nAz<)Lsge{rdu<oluL?CVxL^+<|3&$m*;Y!WGrL)Dh0q-
zC4Hg=V_LIRrhhZ6Q}&u;lC|}euQaDihG0i?rYgy|>){pY!%8cpG5Q*OwWa)pHcAwm
zFMc6es^3cCYI$#;T!iWb77tl34pa0U>s|zd^N^Ps2nHYJVV66o)PdC#kax!6Enq3<
z>g#yi@3B58L9z!pX9wbvx5ln7qV9$2<udq4zAb482Wy}Pd_||?^n##ZRli`!u@K;a
z0W~<;10dIAVLzlZ<yn*fS<%N8J{ur{_53{UQW~^bV{#IA?O=N#yl3@I&64XOT2!8@
z{I#U(@JV?m1<m2_Iv?z+{V)xIwR&b6iO*1S>)z{VEz)ao&MI5eIX>ERXcg-!bWihv
zG(6WFz^)HqDKA94@-HN^fyx)GbjAE$bv|diR+i$clPTWt0n5_3@nNPM({s8s&F|49
zYqHGg$n;hDF=dNwfJ89qUO&NWZtf;n-TBHg+ZN<u!t_O{P-FM`IDU!~=nV5h)mT)u
zGa$PMtY4(E>n0}lq=sGdY(@)D5L+-Fu75YI5fzT7Fzc*2nuZ+Bz31c2%C&E1E<J*G
z@@%So>>g%S?~(TG<~lxVh+ndv(ISM87#;6Sz84qB9xCv5@8WB>-Tk|LW<E2^Rrq~C
z?@31cG;_s~<6#e&;DBSK_Vx;*2n(}@0CaFxk^IQ@zMQTr1B`D%v2nrt`5j*^r&qW1
zdL9$(zy5Xq-^Z3xlC$ESAVHCr6mL;c=;(z2bo)C@F2CIDRd_cq_py)XFaD?V8jfdG
zbF7u`w3&k7fB=Y+4lVf(=YpW$zwLy7c+5Q&)jRGiY<bxVAdrfLmMN;al9_-0sXocU
zgdfo6L51VdwnO?1Tr9C?pvZf!V)b@z)6hhla%ecZ1z<rn<R^#~FGsx&&qjh|2RuW-
z383)*!X&QfL>P0k(!v<Yq&V;O{oD7)FP?XP*?;$H|Jmyoy1A`>iyDG+Q0g5zQ+y#(
z&BW=8q?JtDoFhZc&1RViiHd@`#;GUx-rS$~@4a|@itaB1W^XL~dAm{=J#T^FR!ac1
zI;Cj`_dEI+_Ze%O_w<s47#W8}X{oPuM%UxKSxRwys)dFwM*m4w7EP)0i6`Eo1^S=)
zczb)l*TY|AAX$~IdLywkUMsB9oCu}r-%#%ZGwUY{<;}Q1_4sCnV*O;P^gO!qQz;v$
zGOwPhRL25+)oW=zT2RHzBfDZ|!HIq;bM)c?)beN_$vdK=ga34gcC$CWeywio1{klX
zj)JS^J<FOlwz(gm><oLj`7y64xadfo!Hxr?{ekUeD>|+DWKXqp7aCpJga3qR91XuK
z+}9)2fjHTA9b%Yg#r1W}Ekl<Ebc&0*5#9>~35n~Gao_Quco5dzGp3m<IUF@TCFtx^
zXeKuZNhUFxqP>?7hff>mP?;I=jg{U!$iY@hOx+}UR4K8CqKL!qnjS8xswF9M8jh~R
zkdvil5hktAYj*oG)-k7)8+ieQ$t_O_$3P<q-5KX%{MTr7aTKR%1iAqyz9Abk5#38O
zh^`5uu%uj*cDNjvMFOnx!L{hAW5@K8O3A6oQmRo#O3k)x=UcQ!M)?xbsWHEFdH{p0
zl$feXbXb5?nolN8Ypr%|nI($dI#$3uIWnaTxtm+vR2Im;fV#Of{nf&Q)He;Y|NUv4
zIh~ZyIUhC7M~(BV)i~|z!7p`-_pNK4?mgiQ+Na^DlXC+(hc;^Z`jpg4^|k6Zp`F^p
z`L?xGd#q*Y^6<6RoZI+sNPErnF;R=v#l0rQ(K`-N(xqEewmNcafcf-F-N;cZRe3DT
znX4nsp@~W6%%GFBKeBwphy6I{?dFvhv+r*3V@36XBFDZQFRRUhyfO<-2U8HQL891=
z(fGv((U><nU&ro#+4%28#Q%@F7X{J(lj8qdn_Ewxy7B+bR_ihT|1tjmuNeP-a`3Wb
z^gnM|GWH)9>FkmJ@Uap1KSSU?001#tK3*T%Q~a3x?lDOJ3kB)J(X6?!K>dkaU<Ib<
z@j&^Y{5%MgBJn5U=>$XPW11UvpU>b$q4v37%@<#vKi-TCnGjT;Pl?4x(o<hwdmOzS
z#Jo}TiWm!wp_ee{i=Zbc#qsl;TbB7o!^7P31LI|riqX1c(0jHYe{i~d`wc~=^P$_L
zn<m&iBO>OEF?R(;xxn(guRcpO`AiX)GU8nBqmNPNvqYWGl1}oA#hn`{c>>QLQrx+d
zpBgC4#Y33t5Yf*NZJaNj;VX)G*W&uKM2C3F&u8leX{@Xp7?X%AWGdBBuy}o7eTGuO
zQ;y1i^{{8c-dQRaW=+kfj<WU=U;%N~7B&Ke{fvXCI4~T#;S)dQzC<mY2_fO-!wo00
z9gma#JdD>U$#4?)RuSX*8~NOE*Ea4q@_aG}u4XxKHN%0cB{{HsahkQol%|;wE&r7U
zyR-Eb3}#zd|7(@+;IBlrA)e=*N6O$0^{o5fzf2b-tWUQi@7aEec%Bv3kGlbdIS8vL
zzj%~6S;RwH4s`=hdSE$7d7K_RgZYp}KxDudJ;{!*lhI`KG8i3lSQ4c*6VH_?@cSb9
z1X^YC3Ge}tuZc0X^{lkhg3xHUwx<EC6n$fM$tTF@Q&73sui9)0-D)5!)3{?RY*#ge
zLbsT^>BIh8g<<~>o8Z=tHx0>){E=oQ`cf_|;rL@9XbbHJ7`y+FoKrvN-k?ZA^>Imw
z`=mm=eg0c_JDz>P{+Rv(C9~WXF`<H-gx4$!FN@%zsOFPFaLpb-<3q_^qddw^56!YY
z+*zz3mxw`T44S-qSbLPf%q6Kuc9tp{=n~EY=PoOR)zoT9Ea!Ei09TV{eA~?Vwpfyz
zq&U>IbbVWBxdMt7q^2Lzop)=H)X=zj**s@2m-NtIH}k&EY%g*u%sqzwws4u~^=bfX
zI>4GXu;zy^qbocz{tS!{8p*;~A7IVL{CRp+S@t*MV;}qiSSrTRc;=E)nrL%TGM>3)
ztQs!RE`YTRI<RfoYKLBfdG3tm-B3j*t{<!PY_!~dWv{N=r+eVf^XF7fnj&?fNAY#h
z$$*kIsPn#U>Ti99P!mu_o=tKg)%LFixsm3O`~LG)S!p)9JTYuX<gJnte}z?wGd7gT
z`=Ed3&mmEi*&>s3V`r*ztV=nFihLX<g=z*-YX(167m}&UxcHvZpSZL)@mka8Pn_S<
z-1SIqm3BSmeme8H4|C_g2jKtvE*SLU%OVHh3I4w=boO=pe_LCR{=eU(|L@kL|8JrH
z@4NlC&kx^}asXCMJs!Yb$6Vtn{E?N$V`#gz*%`6(aMVd?S$EPf?9u%PppKY#ItTBc
z?|1g!f%xexk=6d!5H?Iq051dv+vu^6&?l%yER6=x+coT^<0J_Om}H*HgpFYmnwZy}
zgJ`KQ(0nh<Vxx;B40@ol*8ii^FUkyvSRiz3)7T%W+ATIl(<c1^lDdwjDGldFwLS-_
z&EH#%VQW`#Zp~M>7MxquQ~w_Jbw)MT`SsP?jTRrx`;=$77h`54*rU+BDqyEU8sb91
zQ6B&M_~gZ#lUHwEFdmfnGwheQunaCS0IaesVyGes0Btn&>thNu#JfijGJqjIdjqT%
z@d!lPvH*oQ(G9O3%%>4Aa!+Bg!7AZd`o{^P2>c3D1G5dhenvjTFo`hXDWi;Q$n?-G
zxyA2*ufhb^J#T;|n-Snez~jbfLNLdF+#loJOkMHD0NR6MA`szfp5P1oP{0KVLX4rh
z0ZDNhjeuw)4k_SNsNe!iafPM5LH#o7CUF{{jZjvs!xlD9*M(eK-}v!|;AZQ`+j@`=
zsU#;-G8y!mK*{ZGM5m$%43AsRW3?FI9hy|>%RhO`-CaPh%Knc~GPp89M@yG6I{QaQ
zuV2W;a*R$i!4MRhpnHK#fwJ>D8f>&`EWGK4!x5hsQLDk2J4GlC2;|UdIKnxoz+aLo
zz3?jR$3u{#&__Qcvh>6ApnI$6(u=!eyeOk9wDspf0==)llP=`zZ-T*5a2}@Xm%(6T
zoug3Ss9%gO`+r|wC#dLfj$p9Kx@=ReXNkt?8DCbAR+li~PH;*L5_XVsm8y?+6yFC!
z+8>|OtzTv(ksuzX;kXz3Nd)6slEo@O+)w89TsG%m%J4d3mJY)%<8cGjqyxRtI1gSQ
z9{hLb-HZL#xZG=Od6fEi%p%t)t&grcuvv9oB8iNMpch-^;U}GlK%wJKb4Le1cmC)2
zW#`>-NAXleqOwAk;E<pGaN|9Eez?uv52CYMxdXzU0xCeLct}Rp4&YZ3m>50Eg(Vxp
zVas9+Ig@*Ag*p=Q>OEz2<iO>IMei}rgnq%Jr}%b-ZEb9AHaE6ft&JZxx1RpE(P%t@
zvjRxP#!WBS2seK0p8mLb`s0(+-YK3TcppE@sWsLXKNzseIORx(NS!sH&f2?UmUb^-
zlvPcB(?~AK*{)P1UjN6`ukAK#wW?M8x6*0>V&<EzCMETodIBX4jR_>usL3Oy<x_{h
z;g*5_{EPpyo!!?B)}kiUQ}#|q94Otwh%sP>&uqKgD3?wcKTw>+cy>a@15MA*o{FRS
z^6~ODZ@%N7!;?0Obv_ja(q&N<{___>|ICN<uJOL@AAt!0<jI&DJ4{|}HYBUkZiu@D
zm0S%5qbN;-8K{80)Nh<jr-)s1t1}{kuw|I-jkB|HRbX=*axom<3_;Buj7&IxI@gUF
zQu9fbcFKkX^G;U7Mwbm-;T(t|-so?(X40k7tT)bPo}#mU41(V`IrJuHTBJYpc!1)K
zTZXXVMhj(ia4n*+3HI5L>>EmK36+A2F#nvkHVWs|oLp`ZnQ4jvPiQNr^XVD>j3-Vg
ze&w4|96C&C>nY7?Yh%het)C6iKnqhzGdcZ43X_~~CS{i1siu-|yK*W8^j^8Sv<TdA
zXZJ~K(uyTdCv;glH+LjBJb{wYYS3rSx^5QFI?b>(!6#a?4u#LuH9+5J`JlI^<f#mE
zATddDG>JPlT2-Mb&&+NdVzU8^h7g~$rscPu;I|psEgki?x6!)U4{wC1@baFoT@ELz
z{U`H9r9eJ$02403CVo%yb51IVWj2`%vf&hTkW08|d6%LDn+&1qu!O0i@Nh@*9S8NZ
zI-dgaI$F_+aiJ>(C;Wu_oGODr$`8BIRXAYZqptNmo;yHgUi(j&#5JDX<Qh)#Xc|fN
z!5y2{OW%6<F_($qEj1BEm-cIwDt1P-B#m;qic!T#aEkfcEORns(W1w7@psk%Oidxb
z<tWMBjGZt%Did~9>Gx&d2Pu-F&uL%bBQaZL5#K;gr0%lk<v>t0mkkKw5=M{DcxUHP
zQF~6#fH~6{L3mSAfPXKpK@W$1BIrP_;WU9<Dtya#HF_d16^HI+Xjzi0Tg>0NQ^xOX
z*T4_&Fn`Y^!TJ5eR1<Vpa+>_No7SDCQ*Y&RJPJ_bP=i&xZN{V<!bwCd%izkh@c{4I
zB*Fd5NrpMIHqpU|pu>$c9-m*ZFdapg*b`EdwhTN64`V>921xct(XbzZusFSC-7xA`
za6;p=%1M>2Gx0lpRjt-VBph9pHt{0T6K*YTBPo4^t~?^4@@7?-Vhc3C*?0T%#{3o2
z!H)RjHDGx%pT!+m-$M66Ki8ZhO~YiR!b^uS$CzD)ql>s_S)jO43pL0-Xm7kO>wv&>
zSn7TPO;Dg?@ktDC0^C!P`eA})IzVOWMnl+Bx);F^?O0Fi8}%QhUMfI^qafnO_Y*d*
zv8x*U1G%;rOF5XoZp;O9g`tyhVtd+rvV~qImBMdeVoxfK`je^^2YABpa5@$*H#dL$
z;c3%paLYrJ!kf|dc5s?ju8?Q2^~MidPkwwNTK@s9ZdULH;Fj*};eGSRt?J6H0Nr}B
z`LtDC5gx@@%ofO2`2$u9y?y%R$ER+|Ami;1n_HBX?P}$Uo~>=-fF)M__`EX42ji8o
zaM=~I;%a?-PdX`Qy?ane1>jjVQs3wIrI7k)upoU@fSz3)^#aIPHnQp{|L!Z)M!kqv
zK9d&e@7z*Cy-lXmK_9*fD!A@Z1AU+xXyN>PDFt+T4b;1q3Kh`90Sm4|Oi)w9`!~8K
zHr1ct6jL#5*jIe1{JQxB)zcdLfd#!@jWz2}fH>!=sf2F8_B*AXjM*}<_>?Ktp{GJ!
zb`@|aZeT+5AeGVw9YDVm4>P!|vzAxYEhKoZrh8YA9BMv~H06!*x5uBa=w|``f3hfA
zt_xtH|6jAwYGwWZo<7}r^#6Oz|4IITJ}1IA;YBZEKga#3M*e_+(k(lV&qmjn(E9bL
z$99RQ|Enur{xzKmUp#+*aPsQ#Z8?8JqjjmIqH5*OnpjlVQ?!$dt}po=7=8NZ;m}M$
z7G0*<G=?>;%}_t`VD%>W7#_t0zr)&ejLPOBRf9Yn?qXlO3klhR(MBNK#d%3*PBvO9
z;n$bSOgejd(KTjdSR;3RkuYuQq@xCRh<U~%w3t$3nzRP`3je$edl6ld%F)K`I!-<Y
zz{)_qH^;vmfMiI=qE>)xWllTj?&jYoJ620YJ+sO7HU-DXglTtqsYuH(H!H#2z67h&
zFm~CLtJA<NPQtqIkz08;ryF}t64&V@Lw;J<so-du`OavE<j%&UUc|WJk5{}H0<5sa
z5~9qsT8QtAp%PwUk3;iiz!b}qQspG$K}8083Q%VHH=mu1H|Oy+XtS#M(=IA_QZfMN
zBpfv#R84{vnH_-;Fo&}u0m$s3j*$UGl<GAv8)OD88TL35-MUF)(9M{7Eh<W@gNqe>
zCmIh!BL((YgW8DFtE^a~`pxL=1C7GS2c|<uS67X8DxA-)3RGD!Xj%Qu7-1o3qn$4g
zm3`Nh!?L?PQ>HW`lw@8Ju!U3AAqTLJ^YgHWV+ruGj}HabD>XZUj6S$W{+p7UT%L(?
zrc=+CPwW^=t0o7)Wt)D)w`nDgK~3*O15o%omvJgmM1JyGvU&Q{Jy{PPa99imm=7~d
zu4YFA6>z~*FZYi-2ZwKuPhR}b(L1ciqaRb5`{%C?K+6W*egEXe@$~o?QEk`y)ETx+
z;?eexX>>Vk^2AseBHZB(f`0$D^KlSg4?3e5dg~@3X81OD1MWENh1ZgawoGW@ANJ=M
zJem(;qYMaRC792}T3Bn=*rXOY2*`OR+AQ-S(LWor@!rgHC#z*Xyt%UwwKaGTrSEm3
z(KjHA&@6L1-{j^AldoZQo{vheP7eXU2<0Bg+USoyE3c9O645{AK8(aAoryFIePI*!
z&x}70hl=KOUl4WS8;)jT?s-$>$K<+$(3!BiWtQ5V;zMazpfrsF$-|*k;*(5kCu_zE
z3%cF$Wwj^=n;bo+p_-h5E&uiov9Dit5<PjpNzfmh`#dKvwmDGJpV*!;V73b2R~+y%
z_loX8fX+<Mu55z)8lP4w%pmB`X->V?G*vm(9QDn2uUDQ;b{j_bUlGrgoa)B?&2i&{
zPHaLWs2g)v&t`3@-p~A5{PS5RFq(JnO3({4HJbFg%XOl3Nmb*r_7{=3gl@$5SVay1
zCc?UPU4~EWtJSI^_AA`J-oWlPk5nzD%p7aS+h8!hq>_f|Nz5^}=1ES#1e}QM7-%Hs
zDN-l*iYaKl2pG#TgNw5iC$fRP5P=_--34U$v~XAyK!S+Q!a7A0zbn^RjU2Dq-v%R0
z?S$j>m0R9pf~fsPr4<rARx<dqbpt1KyENQzd`z6p{c3Qqntjd6y#2%hO~vu{u<+UH
zo;3<T>+?>n1yGR9EW0X(){402#ho4FmtE=4;?V_P?KL*^F8H##wAeYW7QAyDiPO;H
zG;;rvrv+Y{v@BNStXS!a$pVH(uJ6-}c--%?L5K*Sh73<dVek972wO~+T@7Mx8GDcF
zXoisYd4`!JXdcF)rg%E%KG@8@XKQ2{Uo}(B$_roM)HB^4wzvb4f7(snTlBntS?RmJ
z&Yp*70Xl`Bjr)B$3nh<6PGP?Vo|S$AWISgi!}|3yNDqgj=n_?jS9Z1;bp=y2Mb3Sm
zxdyEzW|dEyd@|+tnYKclr%`uETfzG~ZWbISanE9TGsC}NYjMj8-xQxJ%t>&?B}g&d
zyAqoQSIAUuoNDinUvy4hzI(C%{22M=4aWTO<UL`};+nJ^+Zb1&_rWj67eUgSb$NRE
zVUkSt-0jYAh$;bQ0=Lb_8>)LgXmii;Af9EKLe`ko*5aFBV<ack*AaXrC6M*;ETZE(
zWU^Q3F@`$82qspml#%l@o@{x*RX-RHx)<csDG0`fluwLOWCepB-`d7~m7I#_u*h{!
zm>|wvy2-kb^YdWFsm)vfhMR|(_I*ZCh<+~Sr$(TdxfU|E9N$sZ$trCg6ix3nBU`kZ
zdB*GmsF5hyWJWVpvL{cnwvjloAEfObQ1)aKTuxY_lT_I>$snmXL+RS52p4PO^mkp3
zfKK59{xq#vB?RTZ!BaYO#;F^P-T6_S?wR@5)dU#>xtA@iXjVD@QA)bhT+NIJd!F^<
zIFX65%>*<eQdM_}(dWikt6nDO{Q&WGhj5Z(|1uZAy75v~*Rj+)qj7uB0K1~Ulu7cR
z!D5O~_2d6mG?M{lpc9UGnHZI+j!|6~1yWAro8SgHiqn6$SRS-liu-S>q10Hxf}<me
zI5}u{B=xQ1izIk09Q-j3$6*Iu<U4X~%i@<BTXvO!0v;L<%2baKr%aQ?HS^bk9o+p{
z=Hx-gyeX_~B)e!V*3!z(JSU!FN>!djeLt^qAc^7a-CTrhEvJnE%3h{!Nfi_4eDflw
zpP2mmuOKrklKrP+F4w9k=Lr}}qhfVhu*W!Ga1BXa%p0dJ@@bgw*kvDH7jkOKs>}iu
z0ddp}aaNt~pp<v)^*x<%%6%J;biPq}y!BM}bk4glc?+ZbA;Nm3d=cfz7gD4=uIPj1
z&qwbug@$^=vxgrXbUIbXH2h5X2$`6gzUZcFZ+ofJIq%1(LBG@Cs<GM=kmCPMnZzsa
zB<TRoD-*1bXyzWdQq3Ha$-fixp8s^z-12f$d0Z8q=;JIAzu`!~4Ekq3q4wcokC`xA
zh}b<-tq_5gTs+<$oy>-U7HTDMkZ4p5X?u@&c)BlQqUz4Vu+ai%9&99ucz)iTpl^Q|
zNtxiAa?v;5c)`OVXumvriFox4UGZj#&3Ok3`Qe~{%S}dcl5(?vPL1L@w2D|JtuN;V
z{IxxCclq!<DF6UF%)bQ6LecjcktDo~udtjB%=2zG?#|W5c}$U8W@@M8sPiJCBv>8Z
zH!ltI!x86%Y#APmtF_5#M_868#ojBZy^PsPwS|~sL3p&rHoNTLy9)lNTH{5C(h9vK
z$e)A;dGEN$Czi8D5qkKSXebRO8s#Z*_FxDr5U}?ZhR3%%Q<KFVZ%)bibXLd)-F%?M
zD^becszJr_qhiHiodHNpt=^43xE6&OHm*Qq1Sw0%0eH&DNL6vFglx{&cilQxlOsG;
zzm9a4fXfCw`r$|t7Y8NhL@x`F?BF8m_i~|3D&2)e-c-!845l&)kNE<iONVd?IE9hV
z0MBqK&x1HIfEvK)07+icPNTWz^x~zIWo7C`IE6uf*MhsZhd1dw{tYO|f72Dz%pg^{
z*Vxp|pXA&y_=WQ<9hCg8`Ps{>y%G#$elUTu3sVC!eQ+f3^{&(0z?7i5cN!C~N+xFG
z^52gbNuge)`~1#do|(Ogk8&>%vp0zRdzCa`@*MG|HIYRsPI{_Wj*qM=a~hf_<0YQy
z@7iw6<NCb}L<|`X&P`jLyQZ@NF)9+qh<6n+jd@F+rvl`j4u$>YM&Zob>AUe@06khq
zOuYVe1x0md+M1qf{#+d?9y-PnVNtFM6X3R}zgUTfbyHiBw;Yys1Z|XNAYR~|Tm@@V
za;1vCsTiLENUmzZw_IDno(nv_EaBVUbEI!sA22WI@BA$GC;Kjs8~&uZvGy2yd#Dy9
zyPSxqOu6xF47)Jhah^t_%V5Z3w0PFx=!^ve;bNpN1Tig-@G4?)f~j=sv&1<o*CZa{
zEG%-`*qZ~++zf4imd#8}I6`mBQgy}6Y;@HJ8d0<L2bIE;qwI4(nMQ>%!`gM#VDlDM
zB`pvF!imYZv=+2mnql&43^lfFR%oR^4c9Y+FMumWEm>KTDdj$7b#)_Rt1fNxIv8J@
z7kG5h#idjg**Nn~MqXCU{0VNsZYsvy)aeX-Ws@zWGIf))Fe6^1*?EXzfV^b5CYE@r
zyT8lJ&ESBmpTZ6Z$C-TGZAwxB`-s?fhpny6Hh@*{09O42Fqzn%d%pO6Q*Q`LbLQRZ
zygDm_;P6ye@?9LKSfI=DNuFrR0yL5^s7Gr=Uo$hqS}EqRz?5PQ^E^<*8Iy6ATw!9J
z%M)^f^75w?WKfxUH{g6=?g=i_JC3a0R4RJtJI1%(d@7!a!Cs`+rTX#aRmtnelUyZ#
zksS}iE(mW1oGp_5EB`H2&C(n(YrEZeFm8PCoJGmMN6&h7rV8%LcB1wybAR$0g5(j>
z?8XL~?V7Yz%<6PJK79cyT>9$lZ3u^^Ud5NWtFxOue35wpE12t?(X^d<fs+2H_dQt;
z!1RcPSs2Eko1XSVEdncEg&NL0;`!m=__o`}jP!Xe4Ur&c%kOrl^o4>;O#_@l?Qrn(
z(R)OxBDLfCC5lcC@0TzU#zBxL8&$+qbMtp5h^irLX?8F*FzQfnHnep&PMYsb+RNoD
zGGUwY+`S@cYdnKKW>&^T<GjX%)3mBR$QaEu?SVQqw(y>^(A4UhUX+JJWo}d_jjAEu
z%1sNZTPv75(uj$WBdKvlv^}Ft%uJ=1BvN!%v02Ct1JFKvJeM@)PT;-ft$SC+^F!lq
zBnVB4lze!4WmgO5UChja=A2h^@E~e3gTZRp`-~HAx^~Pq<2COT)$&H*@JB@r<2z6R
zxP-E|kNpu^lqoYRmhQ$(rBX8JO{8ve6`N5kSSvpfr&wW@5XTjBX-$1_Ta*sSyvQx@
zSx3#n{eR;3jxyGS|3$g_G#^)ao+>BYnMgM0p!w&YejAoWp~VW2g^Q3JA>w)-Tkx@(
zX=@Xi**l<i9$xt$G+RH|y?+rgRv^ku4pYn9M%eKP%6_GPD-?*+vhc#iVwMjiQQSFF
z<nM)Q0#w^vy1o^~sC{(x645I)3PrZ}P(`wIs0@D(QkweFn@S#LiB$q<d5LPe0nQ^)
zy+}O@&M5^a7MXpNn`{<FeUzJ%<Yp$3g!XnVZNDH-zHUDZ2IFDHF%784+TlM-u~M}1
zQfW#{iyM!?(`*>|n}oGR-Da5v34UrMjnHRz(9ikV9#?dBsDjEXdUjSt&*pixU=h%H
z!++)FN6Za(FOs&})G_Nu_pcn&*|zUFY;#A>GwUiKQ}d}P<U<E}h!rm)KVch{nX^j7
zwG{mL1w~=($J|M0%(HVg$(gON_%lo75<^QwE2~VI+)>85!y+Sx&krl7qN+)C8{NmQ
zU@BR_F5@I*FyyEkg#*LIK9FZ<<?}^Zi{qe&L457f%Q>6eh^Jd9SkT@(WW%`w&MA|x
zqqDIDk;o8XKti=`m#?;pSLLumL95bK>a!>0iV=S`G&4_Au9)#wiksx2ljDygD!v`L
z<>hi^)mJQ&OoWZ(AbZ&rW*s#$;af*m`8?~OF+l|q)Q;@?h*?A<=h;XofEn2~pSYYt
zYAQuUo78fgH~frKZXUEV7ac~V=MVbo%+6U2;3D^ySzS4zaD4WBn}s(c3`(FXNK|eK
z{x{HA7}q`UDi~prK*^VN<A7E%6N3H2xq>!_^r4y&MO9$A!VgNb-=qX{?xr?>k~xnF
zIg{FE`mK;I()0Gf++9#4^s!rKlyIMQp<uz=YuZTmiK{bTZFk~Kr|Yv!kLK|pGtWs%
zy7=TR$SBRJ$(kCaRw?5Ky3@(bJ}fxHm`3llVzoY{7oCyoKqWM=GTMbby9BGT1843a
zb9Z7Z6++HIo5K6y|8>W;Y@!2aR`26(iq~ak!WE^%vq1QvuarpW;OLFM1QI5x{hN(d
zJJgd`!bElw1@q5@{)=uE#H8-G{V@`B2}<DCfn}5Mk8uRZv(sCa;=vHB*(E(pcM_w-
zPXa8kc0mG>0=Aay931)nYs;i5`*6VL8iSxm>1klyXZoivd5JglKpdZ)!Sk~uzNEjY
zIX`kMq-;-8+4(2!bU@E{O7xwCYj*d#_#Lij2LIwy4v;_)PUF+Hj}-s6WQW7R>VIxU
z#3+=c8|n_$6=!D6Un@FMhkxkb+CYp5_hf-)!6|FUTZ+d7iqa$>phq#=DTCrPaMeEO
zN7jnSJT${$WU8Y4Zj3AXo&0=SWqO^WCy^pT#C1*|aqW<Iy69nvAeps1=q)Ppx*+a)
zA~{Q8JYW3vXQE;0IBXZ6vN6XwsLUFVEqL-XU^7Eh_JXlmY;^W?l4*kAQzD7$kGw~I
zx{}r(`h)w~j(8PTQLUB^0HW*&owv|yDCN;E9NakNCEBI8uDy!|(s7fURM_|Q&-djO
zQ|r`<2i2^>%G5=REhvk*mQ)5ML}bNpFv&gcO#XP1RmD7M$WycQyCLe6H|O<5n1tr(
zSWI#D9L<l-oUB;(n$y{+QMdr26?&SJH+0+MPAsc3<Ns5Yok+5(;3>|lb?=>K)Vq<@
zdDyzgV|~ZS`=tiGx<}u6=f(VlgI&|D^ProT#uM!{j-fQiv35^X2l66cSg+C3b|U|l
zJwAIT)j35!&Su$_{#oXD8Zje;mWFNGBc9C@vt^dev?DdtseFfbV}x3Zy-Bc*{A(B{
z>axUWZ25pMIk6=@8CIgsRvBJ&gGtOlWrtG)_~Y?u8WLU-xIod`U}cAcc(5kvVhd#|
zMh)HabF)CqZsH5&&dll_y9{nRgHF>9$Tli%xMOrdv^@+Y)rsjwTV|yGHZG1<A!0W+
zL(T+li55Dp`8Y@vPK!l^hstx1%6y^7z>(4MrU-YEsYBW{1kgjt@2GqQ!(soHpmWB?
zRcI+<*nyo|f<yVq6ZA3588SE4mtV}8mbWPFq@cF$%JjV=wad@LvGPTk96~G{43y*l
z-^0Ea>*IePysj=y29rfE|9&s%_jj$Zcr^wD<_=;aW%LLrJiKiCsfZ)bVbcV!=van~
z;Y*pg9Q%`DXQI?4ci9|ML@Al6ixvmX)xrP#PqW!MdGYr6@SXhU-HYe%4^Cbk!ij(B
zZ;wBp@rM+bHU8hNr{NDjZg#i2y&ulj$Afe{9L5Rkoa;1`>ojreY0_Qi@@w76u5mE#
z1@#M09}W1~+T5gn;m7&+=|*#F^KZ?K#*>Z4X0y53`dg#*WUKM?Z>;e-2rx!fhq1Zg
zxP$vjex#~?7WG5a+lHX`;XX=JUSk34N9Px#>k$9T4pvvSwW1%LCP8wGIg4;N>Vr52
zLI(Zw5T4iJF}^v7hqnoRTREt*R-?Jeo`r)r8nI*U8N+r@>F>Jml&PP`@%ew9Uj`9g
zmRz<49K!K99w%MQbJ7cOH@if9)A8x=VV43^=nfgqyh<&36aOdb_XGU?h$owS4U%jC
zO7vBowd##Jdj;w%h_&IZ`sC;}yAD#+SwR;-HRlv{M+AZSi1}qT(yGS(9gpGsJz(+a
zC_qF1AVd?$#b`9#USGfDUDxB}d>vq}LvQPKN-8<f*3Du`NOsScOMmA(-87#pIWic|
z4B5<o@Nkx_ov28dMImVJ;e#6M#e;v2^c@tv7{u3<)Jldn(q$PbGN_Tk26fs8E%Fiz
z3k>n@4J3$6iw_f;24^AYI!1E*fM1>9u8m-Yq8#*ks9eEx!sZyHyf`o8Y01e-GENfo
zwBoic`$%yWBoSp?$2|N9GvJX<WcV5ndvrxVl9LZQe0a^_5c7yr?PVAYMxZ$Yj2Qk-
z%z+?G4u|KxfK7CS$k=FzA@Ht%yrTrheGYt8_vL3a&6n>meI(zbJ00b^^y~h=k1a=3
z&@(TiE}w_WiOlO?V<$|MLwje{`$NXy)85&CckmJ*V|Z4hu_TN{{3`c^tU{YQ&53Eu
zXK}xW7h8L5B?*b^Km{Ig<B?_bZ&yT-ft@T2AUB7>f$jK8f{FoTIynKz%N&4}v|fOe
zFx^XQ)8o{_1!ofj=y`+d3zV3ZihPR24T#1E`^S}veXAjf%{#0yi4^)EyGSUi+1)We
zDdr)@By!jUG0g<{Kjo8T@q!9wQUvJ7a6qB|l;H=}I+rUnyobUY8l#ywL=XAf<rCgi
zn8zBFUHaEJ=}}I5+-jAoDXUk>Ba-#WF%`VRT}J!Fz+Pf#s^a?mdwsuhNBhsOq4YN6
zGw|Z3dqF!4|ByQ-@Bk7Ntlm;bbYL=NZ8SighjI5V1@-_G%J{M(3bb*tUgM1v)r65v
ze^gNovCR%*tugR*Dx6g^-SX!~$D*O?+QYRdqT~miAnoAxzQ;<eQ6as>r!;e7WStJa
z$NwQfjzc9oT+v$%+VeK}Kz>;v7UaYzmyFvpXEO2SJ0B6V8za9~RhuLIsW!-6{Il(p
z#hri0o3!6~OXuY+Up@e52^TG2y>#!E3iiAE+3j*<SP8*C3mK*r@pnsio;2g-7>P>A
zXhxzxB_kb=l`Ar@G_yQG<KmF_d%cQ;Cmk&pDJFd~kL*PvqpZ3Yr-C`fY3;?OI5!w6
zAdJ{Sd+jMZv#RW)n%ae@%<CbpRd=GEthDp0cT>NSHAtREmv!{k<gMt{&QjK2P*+Ma
zzFJ}DNz|j?@f*iMUG=N#jAZb~H>-PAr69@ZG#KtyDmePip*cB|ATq~PYDHT3!-N-J
za!ckec}jS_%M=dHnJcf;jyR$@wZyP9S%KA!AT8$#U#`NK;|Rpk@G=-)#7Q_|ZIBsX
zdQjU3J4~PK@rdVDkzDRkq=(;4x@UO_g@|GltLzoNvAUR0v&>)a#!;FOsDXh4)DqY<
zAWJeUD%MzITw~3#rP7J^7C@rtui-zF&~OUj4bh2lJg6BS>ppsmylk#wB4@d<JR5^q
z=3_kz2i?&0bT^a-V^FPe)xEHFE=h4T0i3Gkb&NBhg0a+17?`I?k(H6d1^TmsNKJ&d
z48Maxp%Nq^3g2$fy%4!!Gc#rA<qT9b*XccP4qIbu$Yvm{*r4&_-7Hy@^KYk<-VVAW
zS<8)Q;5F~AZO+E%$rXhFt~vs}G<OUiUypcM8KF-*2~AMKx3@2$rL4(jm76jaR?S+W
zD{ARhJz`t<EXbbVuhl*MfmO)Wa5s_xGacpqzmpf;W|idHDO2u?PcX?;yRf|(nByGR
zXy-JdF;FKSCc*h-0F0Hy{r-~Ss}Tb#Aqtx7AQ|E~*IPUCAvxcO-`8O}wH|2o2YhmI
z)(5TpdLWf7{!vW<t-1(_a2!)uEwhSjPT@9HOWIf(lcn0Y6-YB;w+*}rx=x;8gcbWb
z<ZBYtf4SaqyA}pS$<hWMuLfOnt?9m2g@?vsPo>iq7@B+2FX{I>gBl>bQCZqef>HN^
zPlB~n3@uAJycCEXeQMDX;2*4o2M9IA*e9P{?^@Ddu1Wuz6@edo(CNVES7F*B%qs|{
zU~95GZwx4tW4Q>th}gQbPxbdTvkRNh2cUwx-|#@@RD|WInwqVW>N@{AUrVou^S?O3
zR=K`D?JO^uIvWuQxK<`I$zf)3J}{Z~L&5Ipu+kSkI?TswrOps0Kb8<TmI9Bo-s3sB
z7jhhEtyr8r@eG38fY&-fihG2fO7c6x#-8QtJ!T*e<sC032+LW-<s1St<E%Kd(3A>n
zMsF1>&9Kl-4clae37TlGDvFhXTY35@zALAbqClEKD{Zah>!qudXd$DUa)NnOP(?E?
zCU@UsWG^0A+68*4FvERj?Ns#o`E^vm#raCA?|3!cLZ)>(x*XD)>&Mr$;3{(c_zwrL
zIHU6mrzMOWz)00c*&g6okuq{PEKn~Fc-A6U>RT1)r<Bx=0?%bn2^vgE0cZSozWR}+
zD#LQ0T5b{L#Z}C5Ge!|Ra&15b2}!6bYgxPJ>tH<SUf3mNiQq+bV3qGAK32%A6c>2S
zly>E37C5H#Z|zeHzlKSL#X3c&2tFPB@9(Qtv0af=Jo*p!PNlTs+|mu7aZn2-a4{Ws
zX{<WmnF%Ue70BcC$^B6}(b;Y$%aC*xrQw;NrT3>!XPy0sQ^NNZM}=5~`<J#SF@q@3
zZLK+%Gnh=}W-y3al0H81R29!ujxw7YIp=fQxi$~8{-t(dCzXpTm%&15J=t>Klhz(4
zLQluC1+5=^ZA2aNJ=Tk^#=~7%=UyFWjDA)epfbTwj`i6}t0t8!(Op|xewp=!NZY0!
zRqXUaulL`-J$T8F`~Uso-P;$hD;1Agy{oaVIY`$GF6SCbeiko{ut^8X9w6C7F_J3&
zt|%wDMw{b}g6Gp4FqpN`6fR~xLg8_ZVyMRboZB;tOc6dii?BpL1uZMnXwW5HKOwit
zK`itpmDj)&mG?&u%uBdfnPG3>3zQZQa_uXXSScw^22pvFEY~z|Gz@wbD>qr5Sm3@V
z{#%A%z&!#9CW6ElDdvc?18aHy2MqHZV3cP!5Vl6^?jZ?vkJqti5wHYZK2@Q{6dsPQ
zLx7`PE(Lix?()jRC!tE24|mJUCB~XH{Qq)t$ku<S6Qmlu2PIL}h$b>o;dJ<wa?C?B
z8|}0B8U7GDY`vsj$Fo}qpJ3FkvSQ8>Zb~54?5~Ixcn>!KC37?$K*ylE4z8kJ6o94~
zl5ItLb6to1J_~PRl!xTb1#e(s2PuV2^~0MQ2bRWS*kT#RL+u@`svBJK0tNC#7oE?A
zvw2FX8z~4o<q)CK1P!`4zeP6Ary!~H9{UapUcO3?;X!yd?&~zum4YBAuYxAf&1$o3
zdPtlC9#Au2Wo&i|;Ii7rbkZe*bDUDE)oM||)okP~|Fgh}DE~9>lugP`VVq;`^|0b-
zWQF<Fr#REXDK&GX%}bJwH}N`s7!IfIDV8WeVCXV!y&yW%3%OyVD}_LYq6k#Hv`)W4
z+DW7Cv*g;|8MccW{WD2jG*7`(EE|y~oKK*dJULf#lW@$7T-$YSChqMj!k-;x`YQ-C
z180GATKRQY&f{r<V^hh4wJgo-ziz$Fxo%Yr{xjh|_IAR1_5G5mmvf_U2SZ|T1vR&J
zCfv~2A9ycocsaRl+;=tpjI)S2pVr-?*=f~xzsUPg@H1Vm_}%`N7I3{e!=38Ho2L|o
z2|T<7Da3t}=D2;5LTHh0k+x%Gv0GgJ9gu^Ork+O0oLGU58lCUDC5f+b{@Hql2Ml!`
z8~I1<&&R8P5a@q+K<9iDo<{@9M=HNx$>(bilThwR%1z4i6?CM}wU6A{?99LO8>`=W
z<`>lwJ^?bCD>tu(3EmdsIOP~<JgfE?DgezE18Y`d6ePHU)86xnZ##LcF@y<y|Gq;l
zcD+FPyH#@?yG1QT{>nn%E@YM!ZwG)4^xzb~@Vb8)^H<F)hj9vX^RciH21alKAz_bn
zjyC-`9&#0B5Cfb5HHlOto#itV4bJP?o4rhW-;bPQJ_1F}8O1a~AN{l|HBH1KABzb!
z3*}2u)~KoECBR(Qn-d)H4D93;XCBhJj_!2VoSCc6W#N$b8<6jVDvKa+$jMW_a38sX
zv+|czGUsx5S-7^DagZtUzx$dMLgx#sNxqZMg7rrDvsFfSwE8HiNY)7SUrCwNTATE$
zLQPWZnWU>JN0uDLvbEw>E%}mw0b?K;ABghfW^6?{`$3E9%zjB!cx$2-_@K4i7H+LR
zRk!Nsy7?1vKl&~qHiOcuwOH=#WNcl{87A5TrZpv)how}b2%AT-E~!(ynxRmn9k;X@
z6m8|g)||qJZ7%&=ySX)Oy{qYb<{;Rb0zt%ZwOL&Rk}Vs_R_<W|=RW`B<5C~f=!9WE
zZ^Xor&4rwPJoD^vKy6NpCR@l%(zsgwam_Gk9v1P8tu@!!N;AN>e_FJKT`tiUCPs?6
zYOcQJCS08Yf!4z~&U!p{O)B{vcXQ}C4dm8D$gS+rnHIeeJEjAw7oP%0RI5GoYJDd=
zY2|m)nzECY=%nQ+F5D*N2pLXx(Ry~9fmg`}KHc$K6%3ub#wMB)zL=Y?edzc3>1H$9
zQ#T9q++B!2?@0MKOmIp-TTH;thhxJE<1K#srTu>W6NB!$<=lQJJN?e!@;j}=Z=Soa
zkAv>p-Pg2v<TlmEJs88RK7XE@b?+{=;_31}{DynCg9Z2wYIqpUqv3IH!Siqgr#sAx
z`Ud^&j*^)BhGN;Kl_CqD_#R;5$NX%V!YR=PQ^DvXjbovAq!raf1JwxE3}Vdb)R;aP
zAI5SsZSz@;s?>W*V}7a0MRc;*%sR0pojZm5xhY2{J9C;&k%+a=xzIYwOn~K1jFk+q
z!2i={uiyPLzMiIREgX`$*Q%7?i}kV3**Hr(*-xe26gb&`M%Vis_2nMPmT|_Pdq<zj
z8UJ(a<#JoF!Fb}0L4?0G6FaiokRpb!V1kwZES!2USuVQN4ujE!T`^Nmn_BMotVgV`
zfF`lBo)czZsT1of&}wCUYx7a5cvLDLm5TC8#r*|}EK*<4SbnvN!&G4!p6$*bl=0xh
zFjN$V-_B8gjK26wMPGcG@QW`KfAN)Bj~-(>KIN;sqT^Sif2JVU>Gln)QKVc#j?;0_
zCkg0-Zm7V!WcVuV$=pFE9(F!cC`s}5=MAgAR~JBkz27hF`By&1<cqkIJ%%R8nwihz
zY<a%?%u^W*fyyP^6*G}7#iVdDPkDy#^`I1fT>cms|5x0_jNHroj$^hXSE>A|Wx^O-
zY;eRJ7xQRS(MYa3^26&8v$N!J<5DoVJD2DQ{#aer%rt9C*xz$pMbVW}PD|c3abD2C
z-BP}{w*p$|^L>|>FFyZhp@rzTakQvEDzsB6v|dG4hWZz4wlmzdf9Z>MCU3;cU#%}t
zlXr?wJOH!tVwBm9)?{7wAzU_ZJ(`6-k6Boh-^*Bp;Y5OZxHJs&%7hoe70LDMI7x0b
z^xzbreRJmEujZCnc-MSA7tFrX;MaV?EDgn;0?u;VUs!QxQJ?g&>hd>P{~0Th&rtt)
zV`F18TmN~p`B?w?8>s*MCcNlH?B}>2p$Njt&G7LpJC4sr*Fh4p*P~vY3IqrJ;RQR2
zN2neaw0k}N7_wK_!65w@6}3Bv;oQ|7ZL?is?f>eEmfv7a>GhR)f0jVw<ZBt1@KGP+
z`^&iB3zHW&!=!$(#Hhvpo+eRvwvE4X(nY+waw?pGQ-01BgB~K33xWa%b=;ZgjC~3x
zX)@qL8-xT0@`QhfX9(<R?$NC4GTZOp9=|_2I(&EX;(5p1pN`)h<nB?&|2}x#d3kiC
z3UAK^q;x56xvZSpd*+#1z2Qp7Tllrjgn1=}iTgPmgh|wWHa<I3rj}10z`Vi9p>;MC
z4_^(?Vq+)t((PB+>G+JkQ_Yw0(JOfv6&HrcM#iDEMwcO-5$lJ8^U;O=GluhX6*YaQ
z1XeJHrFRBu=Wp^0YFbm>FJq@4D&79}HjW&xC<ncMm@Ko4D(LJZbm?|Ohg91^uag9W
z^KiG>X2~c0Tt57ij(VW!1~(O}_v&3a(|j(#Xy5|8v!jF7VHsY>%8iXGiQz%Hw@s2j
zh@(>?J@;>k@T3w^<zX__IT)_c1$Y38BOYxK0CmhR!^=3itr2W0gFC{+#l()6VJ|}I
zj<s;>3>K3KkLD$K#r86U!1>UzOK@!I@KNk)YVZd@2N1PD8?;AXC($TuS1z0=Dc;Q?
z=4<zL1TWUw)AR*Gw;Lq6-MOE|$!=!tHEV40NG9?}76j-XFo5lCnwNIwdy#~*lI5ui
zXQ3Lq(D6H(i%<Xu2^y#R9o(Fw!4)(oIorL6Q;<VKcwYrc6bwe{78BM64CK;fBLsFC
zj_42~y)U_;GztG0M<CrnvL~=V0MlZBT{vUI4myA_W|-)rZlbnp2(wCN{LtTT0j>Dp
zl}bsfqyb+|rWg?D<I0YDZt$QWDVII`ijS_eq8^^J#id`R;c$ob6?JTWL=SSlfr;4f
z_nEwH>&7r<P?WFGu1&^-vn0M0h=?VLYfvoGx{sOo)Cy9I@`E5IbT9v8uMTl;;c?u*
z!mG6KEKWi{tneo6jyY3+x)crYTCRfnNp}P&!oD7S1ldwE;S-}o13$a~nd2NJ?{%Df
zTsu$V@sO=qj=J4=ElzsTfU}}7viN~%7<iIJdn-M`=}oH{rqTgyYGQW<fB4xzt*zh0
zoZ>ZK@ai}D`3K*O-ue3M)x~^Nvdf#wSaxW)7i*mV;jUTDFvDU0)-h^n9Z6cHX!VU@
zi46ox`bmgIiA9cS+&{S1#bTTg?kyp7wJXZBy9k<+9gC7Th^-F4AOLPak-tI3{v|$T
z+x*Fn7XaO@iN&TuPZ1(k`6?54nnhl&*<G1BDYvzUCsy~|XKYo#`LL4%<}MGJ89AkV
zA42ql+6oc{M8LR@k{wnrI>q81kL3-74ge!*<V?9uH#g+d{(&VU<tMIH5QVsqTsrRL
z&QC=!%I==vljUVYHt#j6`IChkgI=$DW&6A<P0;MKJ3E}(?gBL3`eo;jaX1d^aD=>!
zMwPQ_eiM+W@pUIa#{u|l5RcP}Ef!E5jQHtr7{kh{v0FSI(ghTO*xtSlBG7(d*CSw+
zs#BD9$rkrXB9+Q+FvjydY~9_D{o6Ww8DFDkevS8bO<nY(kGNAsm%xS&BoIijP%i`2
z1P0-Gz&ACAo5XV8lfSIXb#J5_5LIhMi#AUjwS>3JJkoR>&*ZRiTmuliKSEoLR94v8
zxZjt?QEJPbZ&Fli`>?2ZncmY72AiUGYo0^txRM9~dmR!{&STW1QOky{Ze$2xIJ*ac
zKyWsoYbxvtKDkD{30nsrO=;bQAt|{FB701K+8vTst|YxV4)|jrq(10qqdKMcrFVcJ
zeiDVGxDgV*EkX!+j)jo#+V=!r<GGU#4m~<kSUXLG9<vNI>k(}K7kI~xn4&@Alx{%c
zYry?FxaGq`DuF}^QnX}n0=*>y-A`)xoD*g?QGEI2xz67^)8FX}Yx=Bdu=CE#h3;H>
zcpQj3NSt~a-pe>0p$TV)A-9YSn~h=*HTM==4N{whwzstxK$oW8F%<EYwJSSqZ*LP4
zPAe`C`2inU9VOCUPB-DrC;{2?lW50XHPItOd1s=&0Ym7V1=xb^;q@#HR$K&`tpDmv
zxODu7v{8}}AS4GSfa{Q}ntXsMKX#Mm%hhqAnUI$ZWkuLCaI)M->nHIj=pP&INaEWB
zWXzt1u&#SY%(=l4KgAoJ(=g#{n2rl#Oem96Q6o)`M=W%LYIPP~2Jo5P<v)xO%fdz#
zLTk@*OO<W*n&MK-8pjAEHz$F)si7jk@2!9Y%sQk;gR|H;qT`=%G^e&?ZRwzX&$S4;
z18Y~R>n*FxpWor(cTbsTq@q~GOhAM4s&+Pjg+YO4;&4TWW|Y-K3OEC5wMf&1F^Urq
zeaPXjt?d|pu|N0ZZ*AD0@?W*`Uv1{U`k|n&tsRjc{N{(P&emq9gVoqOo#C*n{`mFP
z+pW!92pcWsNUoc#u39uj$#SBmQq8J#eqI-Tny?)Ns6NVLs=0pfHHTfp1%z5TZ{2~`
zoIi!>O05anK<M@!Afa~^tuz_WMd}B7*N7d4PZb<^XzSL_HgqnH@l)+PB1XS!j;m_^
zvBz5|?`~kt5iSs}G@~zoFtG9PF?JcFyysF=;3FEKEKSB*!!QrJgHYfdq*zx>3%wXK
z^zrX$;aE!qSB7}2w@$G~Xt4;3I=%`!FnZTcC9_I!Ntq{(oV#wEWEvT1Kbff}b579j
zj{CfY<;cpvrrQ9VK(2d{Nl{okgzZ2%ZHkh(=pS$O0xA}F)DV6eV&O_l2Tc3VNAK(G
zP^&L#faVPz_NOorTtX%mu3#BD(ty5%Uy>g8K{8weuP@@h;3AqPoEGT@E~iV&E-^(}
zD`DGbspAYMx2%k6QZ&CHp8D93Pbh4k&!bC>O%M~!@J|jFe`7G?=^nG&^4Et$x!r1)
zS^mazYWZ#1V_Ij+-SZ2}FNZG&(ELVY^pJr$)s|~AZN0$B4xrB^!>&t>JMmdZH{N4~
zdd3NUad@^b+gAV)?hIio1F{1>bFvE*Y?%plwx`ne>RN#r*Xp2+i0UI9{9#)@9nGj+
z@!wGtuvE=+*-$YBAGG*wFV8><|Gs)|p6oNM>7SJYzPX;nTxB(MKdz6u8Xz>&nxC2`
zzVmnROqQzv^7+qnBLLP|hJsS1ta;eXIEvecClDYi!N3p}GoEPx!%NiXN!`Me51f}l
zY$gJ2p<vK{q;{q{8D9Y9G#2)C>lAqbGH!GoLq8zlB7kL~LkB^u39R2E402)P0*@ps
z9YvEkrjutsK&NLC+2hkSzI$>c#D$NL$p#R8Xfuwqq!|S%KUkVbNx2Un2x}6wj7H=o
zfSaQ1@>e>>Q&zx$Zt(FSX_2rOj$k*ytszTG<5+%<SC|1ABgv2zCzl+W+<I|~tvmTp
zO?E=5Ks+T`N_nRo4k<_pg(A9E3At);VIBWdWwkeOp8uKF@li~y*v_=7k)*9jk?L@k
zc{v_R5lH)}i)3B34Q~Q&bcb_hKS-4SId@}>V49JAPSN;_GbMbE#{*?A-~<Cz?+oK<
zaF<Lo{3VB>a6&5pO$^<aqD}_P9G?%OlpY9Mg775tE?+(r1#nO9W}NtH1K4#5u1HW-
zQKrwsvjA93gac$6`rBD4G1s}~4=jPD8(GlnNlqUGpcAk(?(^#oyz<2B5gOhf<Deg%
zp_b80;vu(^*<+P9#yu`FjoYFQ_j>6>J<E0n=PCKkE*?zG@U!ghMDT{3*D6)RqDcXH
zZ$O#?(@J}YpOBKyRc<Qb`CD+)qUa89)0%O@TakU54~30_m%%(7E-UG3l3axL&N4c)
z%BHKwRvHdD)j{^Vy@qKbh9fJh+UZKU2@|c}zyLz`Ov$H-hiD3Qj)bB*G%K?JIq3ks
zdb9$#k|+pLpKYM4pK%`X7*Byaf?6f!P@1&)za<RRrihc;zNvQ`S~b>i=L$bw=nsHf
z$ascxK_}@vua%5uZKxDzThoWAnn$s#kqJ$nNUejQ2(_3a4^SDC<QF$WP8UHkr4ceG
zN~BbriS$r@G-MiQmoH&e^O)8AGo>q^OVrxp@S$+EEGm>h8U!|f)|PG5HJ<46<_+qd
z%2@Re$MD2^<c)Y<Dba9sPb0$CIGCyvHvjo7I*+Ck+jR1fY#ZOEaf6Fs)3D(;A}UX|
zl0I?%#a|<2>@X6KAp^zrqO2FAN7Kcl>Eh9J@o2iZ7t@6l6N|D^fPjF1Q2y;annxy?
zM|>WIyUa8LaOE&>H>%oRXhs$RF$dYR6-dzWC7zHtw|kO=@E>sqHO}V-(6?jp6jRQm
zYt5SO5zkW-&*$F9b&{B`(DtB+QLW{j)@y8;hGHQt2Ss`=4k_EjQ5SM;F`Q64+43M$
z8VB;@Oz&7>d?~)n3ofOsAoB#_JVtrmZ~W((H|n!={AcS)qv^zdZamp)JjQ>11M!~+
z@$fdGZz~5?)@n32*|TsEgGP5e3gL@{?Vi%#^_2fzKab<{|2n@6qW(7+t0@B$i-P5U
z>bz93hm+cQ!V4$F7n<33lz)h6y91>+NZL!?K5Er`g})B|bNehvL#40aK0G9fZZEOC
zMwKetW|y5LJnP2e!Ki}1WwfzeMac-0zWjgrAAaH)7_ymv<hLTtj~c-;Gre8JYpg3)
z0tz4U3WdDc-<JNE@oZhsk-230GfE>=5)lq%nS%j=>VYH?A_voG&_%g>K!indt*&ZH
z?{F$(JJ2ibc57>HJMKcMns9_a$z9xT4YQs^SHMNvOCrNNEuI?M$H**(hP9mnmJpTF
z{co3fXiBvJz_RdkCSAHLG%-kHD5Q~;UK0Gk?j)_st`ODpyD;b-q8I75SR{EU?s~g(
z&j!4Em043PP<si&T;!QRaslbijbZOBO%F&c0saq>yMQQS(h$Tl;|CWjWtY&Fx$Gbd
zm^iia$2EA`8(&`DR(N+HRphAvUZ9<Dk9m^0?uZuj0(J1?@hPH^zB+pw!eD!qx9?xS
z7S}^w36&j)?w9B5>xaQ$JT#I&&MCb<nc6fToWyS?iRQrLG@=HjF)k(SeEsk*c(cia
zD3XTjD+Fa_U6S9b39b{h@nQh5;)P9t2do0Ts2hnZ$bcneQg>3a2?irHcSz*L+yJEa
zx}2pTMvGq>i%U<PAfM_W=NfV$Lg?2d;-(Rg5X$hfC5Y@HSz<;?hS@-&?DNVOd{I)2
z$YD((Mi9C!M2K@Ht#j3rY(DP>ZmkQ{BiGvI>cEf81#+Lvp_#%kSj*&RL4OzTkL^s8
zeOf8I!Zyy6Iu?h!7mzfcN2UhpWjoCqn)&3dl9!)7WdRpWj{By~!U?sA)q4M{RIN^J
zWAfOSC`k_cUvoXca$CYs#{Z25JWyR6vgl>!P23yz@j_vRH=@)xY~S2ZV!RvV+1~Xn
zz>*BnH!cQb0Io3og+0~};Ba`$Dsaxc6lrURF*JthuhwO!>z>RlM}0VH9@2rA+@zQ@
zB3IZWQFqnb-OT6hB_y6+53Ypwex$A48P;&xZf_$3ij&IR4-di=D8z3R1tY$tM>zoW
zmm(3pi!N{~ut{(p{x=9$X8b-oKr6lJ%vjm$#|!5FXYXCR8p)AG(fxJ!73#TbGifje
zLsj)zJlsPAUCn6<xIpzx->y1diJd@p8Qa?rD5je8+pi+@3MrK@Kd47jdr#xcltQ6U
zC=?1sgpS9z7oxsY`ZqWEH106?Pi93iMGH6@nYv14lm|!20_21Gy*s!T<%Bm0by^ZM
z;vL{W_+$pPJi#31Yf*Me*|G>dusgoh=L3hsU2$u+@{Ur%lio|<eme>uz*)qDsraJG
z?e%(twgW<V9>NJJi<Ns;)Rq;_!h5^83a7)rLa8{vI6FCh-8?)#Iy^ow(@z>v^s#~1
zr^tTFER`ND!JIfZ3bbLQw5TIwwcBzxx+je7k;RFnOF)CA#X7o*<a%SDQ<=jZ(&$RV
zwsdUGvfn;qLXrpyt8teLf3oguC9#D(sq-b$R#6gZMRN%>O^a;*mois&?_>>^Qz!+|
z#^P`azO0cqrX6dyX7z&Dpc-tgcu!p%(@jJa;ZMZP3~jpK?XX4XTl$#0qn_w6agZ+C
z;X;G^m^a9Yol5F-5R!Tjc0c&6e*_aNoNy?PP$eQ|ThpNAH45G0E&twqU%u61_LkhX
zC+)5vR&nCe(D4cT>bC&-n-(^p@|<JYgw&)9DnZgFgdm*E?G_HMeF*s&f}hPrZNKiG
z9q#TQ!g24L)1%#sgL7*U&}%@gRug20rF+<zS%2htJzg1L;6rOT`=uu1>#%@}QR~wC
z75p1wtr%e#d+&;a3T#lzm=DGB7lM1Mwj~w>N`uKCpFUAMDL4+^<QtS@pvjKyQ8(Hd
zk~Lu(DMin(iIUQUV-ZtaemC2hwWjWmit9dpv%0pfJET3U#pCa5kf<qMb6?l-b-ggB
z%%fNO*LrUMHm2;KdcBUX*T0Z%>t7qW1b8tY0;ty;_<G}uql^CaB9}rhpOixC^$UFc
zVh)5$5vZ0}cp8D=6L>T43lRpNRSe9ov*m@rmpJ{Oc<{Yb|0|mGe&s;j@=C24RKGDc
zvWYcWA#yxg4}ra?dXTQaI=iPK-YN)(SjlsC#>f^xIlrjgcEY#@agAWnTV!Xw`#0eq
zJ!=atU@Y&D;WImBnk^KrIjoE84T)iqEicEm+pt1P{9VGnz$K5%x`p_?#}QTpwq@XD
z8eOToD1!S5;77`=1F8+64O>~64T>fASF>ii#B46ga?i~64Kka3oXw`RE(rdem0@8v
zs}#a4hJcqC#Fft9FnaL0CP>S2={2!B21MHh(2l}LvqvUV{<fn|#KnZy#A)-yqx82+
zn84Fr-{k4mk_{ex^dM%yibE1EJ44Um=zhcr8nr!s5Hg70_1$r7!`l)-L!X7?>1g6q
zN)}6;1Ev>Dob}}v^&HhDPsi{m3qB_kk!jyNJV6yDbHHg7(hu3l5nIXuTl(>uK3q%E
z+df|_dAjC1TYF6L+)Y0X@_VRKBp2DO<Gi5Kjz}lx;1*t_Ixf$`EX`Y~x%TM7jbTm*
zuj`|<6}uiv^k&jxO`yQievsv8;T~3}NqpD65NMM0E-c(^rf^Sf%k|arHkW6D$I&FV
zcaNVTWk69QO)Pe1Cl)gVqU&GsEK4VrrM9C@2|`MDnkmKcti@&#fyvt1(Tjg-mll^L
z=Bje9Or5chBG3Gk_gen+^<QQ=i}*iCClF=C0A|Pkt-pA&@x2rO_u|F(&+&iXX#C%C
zc-M}iKKtKc3~2+lyX`;l*FPchuM?@LKc8RgbLe1p=-{shXXgh6!v|GaPXwXISr(^w
zX@^<ykqc$4uF}5^h1)9eboGWTCRG`UhSvrD-+gpV)LM;nytME?>2D`N<akvpxQr-2
z61HyAtJ=t~SsIHC25aD}tu%P}HPfRtt6H9pr7;#Bx5V)_QB<aJ((cgPqO4Mqnx?Ad
z6$+-Q<MA!gigqZ1)+cXey^AEZ#5+5hW-^`umAU}O=z1jQ2A2#rAJ}?a0bq+*br#)_
zKGdZ;Qn%-UhFz8<Dty2uMi~Hx3YjS*CZ&-_tex~T{pKeY9X>HqWf15Q>2zPN2Y$YX
zykD?2itLOetYU8E>EfY)UwL6i|715DpZ{qHVnkz!kxKuUCu)mhW=Z0bdoB$puue?m
z=-6R436@<{Yca+oQVSNZ7084`ux&Dh0;`7Fv85T%^V;<rLQDf&MZkOFEJ$9#wDnB#
z)}n|O(4N;mj4;(`>4O4ie6k#oO%ZfCwoVajp}C9MCb{@=s#g6gOoNFE$~rHeWX{R}
z{e-I|%wFE1WZCKu@K#W5iVzT7|8$XuH}riYZtAJ-$w}dT0yrt#!R!f7!Owu|7Peml
z$t~j|OV{}X?`KVAcf!h>`Y<0RM6!tknv%=2j3k=$>?T96kdRXhktF~qQf04=j*gw8
z{SNWGRq+6xkYjS*Wcw<*l(xDLuHi#+qz(`pc}W}!>_=v@(obhfO%O~JlDAVCnvJ`C
z{LoYZb`GqDaAugyPU<q7=1z@2t>8W1%BuXuopx#6q1UfYtSNZ|dx?U9xjcIP8eMs<
zv>r-B;AOP3fHq>4*I7&sNTeLHp<v>82w5Jqp^`mBqZ&ILnJdIX*6pqI`Yh}|+=mY}
zYYExCINWRQo}V9_T?B!em=KCnWxt)gZNAyvKWOeYPtQ(XpY6VRbI7Uq=8)I=<$zo?
zx7WMvk4JF3?jUYQLz9TaY;aT4k-!N&&3?$ayAcTHz$5s?x4m9RxUA4vIoYysYt!y)
zmM4huRKc9euA7R15x+-#_2WwYJzKK+gYWDpB*$CDRG00o)YCCjR9|7~E-LBhEql;g
zPM}xnKyZPTK9x0J=W`xgC~J-9*mlxW+p<Y*GJ?v)G{%P@>8xd+=vskt!Wn6srG-Vw
zJ}Y2+3Le_-U$dB7DV&P4so`N%iA~#hQ>|LeyXE+Et?XdorL;ompd)F>zc_AN*o@8l
z?>Tnxe#Oi-dWkim)3UE==0+FV7_7awUF9gSz(QmV5fObPfj1tvHVYvWP@f`NtN~fG
zgqj!^N3eb417hTB(Sacnc}4?7MUN?6Qd;<ZKhgz8Mv5s*<cLZ}cIiC{nv#2A8*Yzi
zlvo;EEt3f))bE)1DP7qhN-f-Zo~v)92*_mvXQiGkEtS4<ak76Bs1vu!Cd2gqTuZu)
zF!?$32XCt2ax(6UOXMMY`%u-#E8Lmbl#NwBbJ54$qr=z7ychq@u+JcEQf@7s%0?&L
zUw$_Pk@MCkGE5;yVn`sUW)PnuUWEEgCzxO`?2N3<Nx#E+E4d{J1blnFhRP+Ui%{ZE
z`g=V{<<0+d8V!)70qPWY@;daWZ7;Cx-Wc+MwS>mngo<QcTVlaF<gOvGH#c|NZTKO!
znhmZTEEgN3B&0Xv&P0%GDEsl91*MWc$bMxvON3j5;o>fz92v4u3nel-bYQIR(^i7D
zDqE{s$jE!Z*y<7)Teh@XC7e9DBO0iX9tSixfgLd|xE}q&{Gb=EEl-*}cN0u9<z*O{
z61FAb<>t+cHdE`;M0O>y(Jka3R|IbM{2=N13c8X_*=AZ2zi-uo_Ls05eeq@^bQa{`
zO-{JSg=ga?HEzzws-gTRC$w!$;HhaR!W3@{Df0}qc{rPmP@OMK5wv@t{(G9x;?+5i
zOZi2n@&D2kd+;u6fqNS`@x>i*%xR08%Eat_(;M_}xNcv&@#^gEY)Dh6iIzJAnkVN$
zn%-MwA1f0m<$@J#CtmozW7PF6YUo4HUZ3koDbHTz9rt|yN3Q=bpX}Z23qW%Ge>XPP
zU%K~ye82JH+5h)j^#6Uu4zGFXAH&P>uyykx?sECfU;Oh@zNp3BzntUl`QGkvA+KFo
zaH?BBz=@%l*DOqIdG(xe@3Xf#t_$mz?g{(NxwmGHCUM^MjAc>pRZrDyU)4;UEWbZ5
zRs~#9y_}iuc<L*zJMft0WSa1VKNnZ#PUHqZ1j%7s-&c#zP8WldzR%h~yjH!0RVl!Y
z^C_RN6T6;@CcbXJQ{9K@oK6iXN{FL^Ag6H^6+jC3i6Zz#7JdX*HE}TgvDF#FZP3QL
zAU)sEy^g=-fGgC$BeFb50aO705l?WtAl$|Z$zl8WN;eSbUut(>Zc8f&gzb3_=i&WU
z@x$;1FPYLEMI-4cS{D3CpHZ6l5I3L8HDC{)A8Gj!_w<(4H^VP5l8%9t>XN>+TQ=tU
z3t~l^PP^Yi+^R{;x_BLDy6F~p{3slETQ_*=ce*2Dnvk8!^^nNfgh3eeuZ)%GNv)!(
zS2-oEjPvk4OgRTk(-4(F{xYq2Ma=D)d?7ASn8d&wl(rEHGZe!5vD0gPD3F?C8WVKF
z*bMGS--?eS@kkS|0?!vCc@d5l!&V`CJ*-(vfmIMQM|&Iye}`JQ<h{4c;}Kj?w_b`%
zfc4D*%3`>yTFvsPoSnyH!qTa`fNp<qX_tAU&u>#*En)MH3N_SN$jlrzR|ia=EmQX+
z)zy;tF`ATH#eJU=&S<2?6z|C*EtYdbnB)>W??9ApQ>v?y*;ASlU#Eg^rEPbh<N3=3
zut9(Js(v<nM~N!!SDMuSZ1_YGo1hbU3kBr%>qdhtE|{oYTXLt~l!#xQ21+{LqO(Lv
z4_vB>ZmZkj&9dLoTB?#gv<;@etB@i5>V);^OVxEG6;;Paa599f>CpqZp5G4W)YF$@
ziuHln^3G9=mKuQ~Y%T177uYBYNPA06q+XZwt0?Ghr&MccPRr%I^oslA**xO7YK;=w
zQ`IyaXaX&>iBe3tl(hVC781^kqOqBHP-eI#I?er#h2GE95zR|^%hgG;jDvgy9iY}K
zQzjo<F#cY0{Jz^^^!6m9$JN@A=4ppPt@(kjQpDFQ#YsN%!q5y)C|kx+IhMAEW*Iwf
zwL@1cm44RiVuCRZ^GUy}z0<eGl_Z4BsY>lyP`;$`Wq4T;M>P<d(}`9_H-jkTqS386
zyiq|p{cynP9d*EutqPFB5#uz3E<UiqxZ4Gdly_(pa{$=f5Q@NA%@Fo)BQC{ivo;<e
zS0EFS^xOcDD*`rHc$wUIE>US-R2~E5nj@5zzWc9;UBw@<*TPjh?DRNYkP1rig(I*v
z7Ixc=<9r3jW1*^!W%ye-%m`y7DIbc#Lk;gIj7QK%7#0|~ekVpVLXVB^d(uR2Jbrt9
zY$rtGghF~=bHMHRF2)-U`KWz}qCPaiccZNjz$8)Fx&fE=I%3Cvtk=xhzjn;8OQjip
zZ4-LQSIav{`<mADmgKYWez28e4Rl=tUFSe80OIZPGS!fQ&%;h@jJ$-@9fbqj6!osI
zhS3P2E2=xO1+v7BL>HmU5p;GfX4v#73UjM&O>%fk;nJ4S7ZN8Seg;G-z$C>i6|=^N
ze-wuuaifr(Em%`+LsM--Q%zn$1n+E`ss6Xd1HAuAkYQQWUUF!@K%*pJ%LQNQ*4P5f
zaDDt8Aiwwb<|^)rzTj)pA-sh!S$=?GKr}1$QW-4(jqv4WF$pLU3Nb_2Q}ysMHSMlS
zYX{dRiIe1ZAQsBE=LgN#r*A8;$5vmO&Aqq#yL)@hW(7t4>r$eg3*J_A)9bXk7;tJy
zK{Rv)c#MFQ=&!O%zNE$^Qd34Ca6vO3LPUZpsb<Y}ZxD}eZb6&?pkAM^0w7s$qubu#
z0k}VcgSEn|LSv!XyzcZaxx{G-xeu$sFI+}P9hCc0?7N~oZy}&t2QvTV6bXS4IL~2i
z#dub{HCD6&mTSm#4h!gE`(y1RpHi1#wi6Fa;(>3uYah*$u4NGTS+P}n@>Sf~Xp2w|
zA1eK~XU4)z)$=-_bs%IY0hUVdiiz2sk@%aX=(EM1mU<qcM`d2Byi1C^Vv=rR888DW
zl#p&Sk!%z8h{`X<n$6)uw<V?k+J_b%7s^a(k7g(L;#wPg-5*o;c~|F?iRTMzezvV(
zD%FnwfGf#xwxivjfLnFu@2iapfA{KzRU2!n=JmIuUA}qx83ljymxHt8gQFlYLeC~s
zqf#X}R0%}fo(b%w8K$b}M1-WaUf`l6oi2^nrCgF=MKUW<5x>oB$4%)V^<U5bp3wjE
zW_)$kLC@mJ{6E)Utk>QAuit-PfA;_UHuJweCvna61^xN$Pd^>O(ODsfP*S>3CRaUK
zCw5{}0GuSQr4qRcETY1WsH)AA{<g0AoS60N_FT>90qXFX#Q}vKS+js-yR_ybYx@;u
zqg$`~y;`%`Jj1^=vtrSNswMmFl`5I3RCD?_T_#il{44iKwK_7TW2#M**?Fg0aHn!n
zHIRY~2UW=~i*osiQZtvz?|~{OT}rd_iMr5Fxe_rJ<}9E8q%8IIrz$1HPfu2A7b=-J
zlT@yz$>P6o%F>(xm@a9lhxUJ+w54)Nli8{-PRdPGnq2gz^AYwfdqvWcW>)+wxD7w2
zES<zN*ltCuHI6s%x00`PI-g$c596%XW%zwjBtC2Ad7Jum4LD8c*L;dc+1y5M>bV_)
z*YC0c{{FqSs`+#IV8wHl&Sz%6vJ|Y7uT;67oKKz2)M~_#LBE0HD@Yqkh$G%@C0wS@
z?YyJTO9jZLy9LVY$!noQJb5$ekyfsBHq~76EPKeaJPCQ0z2sR=l4qebqvv$O!|ZIH
z_j1lF11wubr=q3RuQ?U$<gSS%t>bXe+9Q{I&MKwj*PhUEHi;LXlPBgUPZYnOFooi*
z6DgKRr}(X;Ff5a*S__y|*YPha*EIF<brT%BtMgV)!eM7M&B%(J2Kmw6#nWaakVHFc
zYH6M>#q(EB2ddaP%E|;OI4#7o3fdZe2{(1uiqe^*6{VE@q;UGpiS04dfHSu?Kihbo
zZ9Gq8<N55?7dL%p*&Dx|kxEG@>Fyk|QcC7-BvR5z&MJB2jJFX_u?zUpGd{|MkJIFP
zq;53N{(oit|HM)9#P`3~*VpPBiTmGQJm-J>CjI|5*xt<`9**L$%g!U-=zwjv)SG|C
z_u+?j4J<fb-@z6BpO?=Lgjo)R=NBhuyRQpf0&j`}SOZlFMfrJ;|81Tgy?uRn%;l{O
z_3-%O;Ouz!D5Fj;Q1A{Hx=Cr+Qn;tPK5FkK?aXMoEeXk0+7%MDgCPz#7$jimL(Vu*
zLl+hKn6a^8viMc}F>0?!Px%LqecZw)#wXG(e3RgY2#YgE$eHQ^_FG_830oLQ)b9<4
z@nt8f8T^K!?3$fmkSO_d_eFB_!`gh(hl8hKv%x$>lH0!1@gO4DW8Z*;-j*m(((X@C
zo$MtRJk4$M>Q1?;K@c`){Xu-k8{BlY^Wms16yYWp5kfzA0C9cC1ez|0Fs)q~{|)i2
zxH-b;H>j>-lVN^xA<>7d<lPYt1d6ZwZ~)P@=n9*P$(y(<k+}^&HoMLGHazO>KvIdj
zfIUA1)X19e5mTopj$L4ZxX<}qQ8FA+1l6U@->;#k0T7YTP?emB@HZ)G>r%kTEwu)g
zJY<m|OT%<%HvM5vN^9tSDy@lD<`^GrYR%tLN-V@j!u*mFq12?phJ`vVH~st#U#wuZ
zq|7h6VHY)NEoZtB)-t<mUDcQ${Put(c*ib~u!~H9rb(>k+h4kjzA1SsSVPA_iz(=J
zL8LJ-=B~llzlo~W)}Ie{I|fX+WC_J?zS)iLo8frW<J|}pnv46r?g+R0m7SfnMo{-&
zBb0}~)vhp~@#P9MjNW+E9CsB4!q6kmn5b8Tzvsl=Q)zjFs2_!+Z9)YZ65HU_(Y>+=
zx@KCa&9Q5k)B{TAXGNP}HO|u$93xcSo+sP<R6@mh6j9fr!q2<s&0lxV4tMwY&i>+n
zD{s3O2j`X#p^0Ox)oMg=Gzt;Zl!w}}Ae>}qI+pmJ7qMJKX2rJNm_~{~MW6dAC9O!`
zXz(R*c$_H%cu~j3YIX-y+YYaIidtTo2LAAt@1~ltJusDcAEaNUd|EDwPgRn@I#BhT
zl?mcWZ_=GKl2RFmo09K#*rNDO&Q~xHestRpY)?>Sq#53a{eXY2q?~{x-Ds2evj3aV
zL2~%B9d)8nlsnFf*hN@1m72oRhy6^F5F%uwk0U<N@Lje?$Cd+jix_>@GsNkx+*3HJ
z_*9$JX0W?_y8@a?DXHDP+)tSg^o$O|!@2f+{-3n4Xqz0GM^S16i)sK`PLpH088VG&
z$G!j-R}XUXJwuT?REOZ=AD)Y*Y9KRh8FY}pJudtWoAh;V&7kG1iu-(3Jn$`w!KcTH
zBNUvA(J9B|$cPCx9wjpx7TS1}^eHI43$S*z93|O!)>*KK>zcy&ZLC1g!SK)V{}b8&
z2XQ+q{6EM3zfpfxf8pByUu|qW+y9^A|NnyV{}*S6`-Q{*P2pnE|8lBM9Q-fqPZj$w
zoQ?{|{>vu8KStGj4*mbiL;rJGfJVh<FhJqR{|p#8f&aOv+QHkI$fm{pXR>%k*nd{N
zsiOYVJMEJu*+KuAotrY|U&H>&L;h($ri}QvNHV*Ce+qW0cz+EjGu&TrinzfcHP#<K
z23Vq4l)nNh@2OAOnhn2!xPDFKjIe%Mrfg)nu3JqJvw!87{!HRe8PcC1{9h=dU(8xH
zNlS$DCswoR!udT5TA^tEjC#+G-hV+y@71OJfzaibR@42$^TE&x&*A@`@PEfLof^YG
zO*DQhiaS?Upx9NX7mgZ%Qxwv<LP`LorNArCl`nJ6RWOT?w(OApL^#-_@&3i4(N{dN
z=qq+mdorlpeYcW`jL!st#dn;sT&X9b#rzRrzS`!K9Xy9q@T_-_Ly(<8V5Ocl4E{o;
zlMECalV#7CESDXV<;0j|f>`#A$#U+PtR-u$J?9p9&Moi_=N7Qzo#FKPcbI>lAh`Q+
z%PK;?$^fWX2E7v8S6Kb=i;0(B6M@2rH=o<t(ZWvMI;mTKUog^GwIsrg*Ye_xNu|1~
zRL>1kR*kHXMzT=T4Xls`@?x8PJu*+Jao+x3nWx+Uao-v@WwiJ+7d#agxQa_@Zb)c8
zT9UYcRuZ^zkz&gs!xCxfNaO;c&pL(Z-hQk6Y8!#mh->yxnMo9;R+57`2W;wGENkyg
zS5!(CKh_CQLIX^%TQ_xm$~py<n1y+nVhoqfC|gX*sLVfWj6YvYKtvQ=q!Y|CW6Tp~
ziy^?WTQ$o*C6sXwB-G(rHs4k-yeSH`rHcAK#JlOFj36`1m}>Br8ctXs@ud324{VKX
z+9OmVDGpp`;tFSp6pEobrJ24RN#DZ<P6q@ST;2!v6?sz@FVF@jgI>24;Wt5{+Ct$R
zJO=jHkatIG#m|{{im%5KezBZ8b5N<iCUW?7ibU?7t#2eB&-BM2Wez}!r{j5|^AtZ0
zvY?x%2WQ3?d=x{mD{vh1d<gL+Wx-K7a0;{6^I4icODi^Hf*zkST~H2!Urqv{Vzxp-
zjAx4>wG}D0ncQNO4<X%dq?xw7^{7Bqj}55^vK~uPUbUk3Brnp%ZHt-JpVGqk%r&2#
zYs%Xq3xS!DMbcW0^eRIZiVP#9$+uR(=qOc(bR%LS@0+MmB;6ZJSyWx~qe<yzrgDLr
zbz0N1Sx#(|ESz&LdEff9x#vs_$5$Hv_0Z~MB!A0||5|(Ts{YD}|JwNe)pP#0Zz2Bc
zO?1<a+3Q{>7CX0g#4A6r^WN3yJ{&~sXw-(He3RDc->}o(h|8R;a!2D25j(sOyTcE0
zR=K@icNDfpn?kw$XG<1@kg{(pqKt!i9d)BY*x>@Z)fw}S+(coUQ<1lS{@dOW<LrGM
zjc|-)7?+IkQo(3%kI&zpo}Qdt9PBr#oH{?-b3%nVI1-muww|434sHO457>q4PVbWU
zpx?tq2S`7RfLvD?%QWI-kaR;;Kf=z0A_|RM4ljdKwEgN$_#t|YkWVFGZ&X+dGIcz_
ze+3%N{%Ej$!FQb!o#u#L=xCa`B74+p6~-0vm1U#q2?;cBjO?~vlh8fhRXIx}hWyuf
z7`3rK!+z9?c`@Tw$r&8nvyrE}J?Z+6#?k;PL?1)2%Sdl@7V_nC7<KqOJ2|b0%xT!h
zaqr}m24dmy+Tm~ylwxA2ylAKjuZ4ZW6ZvH*;mM=r*58(9q>F}SX(ZHq#Q%fwAC3EX
zs3-F-@ji0SmUHj#hOQS-takHvtkdd^yCaC4?2d0Q#pLWn;T6npnMjU}`1aZh=!h5G
zgZc{C#Z4sl%cmVaXg~KlFyBX<fkc);F8Idw*Z{c)AOTxe5dbDq5_aK)Ny?*$VL^6-
zaJfhu!MKZhL!cf9yV4s#o1}@+Ac~MWgYYgI3}K|P26hv>4|xnlH9lpo<GTpIi|F7`
zSm-2m`1dGm-2gb?DyRrWO>15{5n_P*nkM}q5+`hsrm-tq;v6Z4@oLb!h3t*^phV{7
z80PZO+seU5-ouu(WA=0_D8|(c;rO?1!tS-`Vc2d1+DyzaG?zh#ge<-d`#7OqpT6}r
z$2p&qb9B)Y_O=5}g0zMsfgiSfHSWTRUuH*sl4Vjm0_2M%x`1s@42CdfdTXee;S8En
z(~nbsuxApQNt~P}gg8OuVMa+p4x)C-WIR(}gr+LG`GJeM2<8X`ya00JAppA?r?S`C
zSkPQ5^-AgQB^ESNpzIR)wd(&FT$-Q%x&G>vqyKNz*Pin~JnR3n(EqoS8-Q;tr^M>g
zwEq_LEmJvjd4XYBkrZMRelk%S#XBygH{iuXRAV9R)f=o5cZYFXDh5Jnl!P)%MGab-
zx?igc6Yg>LPL6*)IJ;<GoT!tWgMXi%9h{#Zo*XxSK0Tdq3zQ3X78?6Ix%Q?GqbMcw
zdU(`(v4TTg!M$9qR9h0Mhv67UHb6O7l3Om24QAD+V+`|6tBA+I2neYKQ#bD7(te{k
zrzUU(C#Z3xTDua8PB`-DS-mT%IcN~<VhQW(jarX%2Q*}ZW^Eyvs*?c;^>!_lyjB_s
z^+V)Ksnt|!?c-keq`$|%T=xbKRp#I2Kz7gF!SztLDQf8n=iMNx{fvfYHDx-JivKn?
zxw#2Y5M%k9ZY)zeSY^)kR&}>jx9vU)^dz>u!TkGNP4cjh2Ebc#6C4lq{sK7Y-GJkk
z+LW4Y_cxK?K9GQdQ*mRUl54yP`&!Bf5%OmDR0@z_@7MpA@7^l!d|WE5^i4NWG<jNY
zCUQjk5=l2xhm(keX5(G%jMx_FnvO|FqHT?;g^9u0pkmTI1M*h;oVh-|BU~|$#vE=y
zE_jD*kQf@={8W<fD+~Y67*`_q%luhs?>-(z$78NK3?3kJ0D1ein|$|U?0I<sYYct+
z<DeI|;lLeTur#ag?Oq(bJ~{g>i&Gt$;mSrMA52z;Q(GzyWq?`<=F`#uV5#~-zIpk_
zDqlc?Xq2?%WRy`{x=1#0Uu!OLJ}k_WOXxVC3!9u7(DQ?%gT0FrNAy(LSv2&^pTnyI
z%;aKhZM<RU)j?;77?u_u3cYRBiUF=+G7fdqT`RZk`iOs(WkLDGN6qbgORiLdaa2is
zly{u!A#}daa|BJc4n7%;cDl<o%h$(+NABq7b7!2+#GBH?q|=sYkP1@P;HX-1a1wQ4
zL>BBmSp#B!a)-nEI?us~Vo0$fDZy3TR@4UP!IM7ICou4Q(Id0$mxHt8gQFkMPWTVc
zcvVsN1QppoawP{z0o#^C!eC4sMQd9XA_#~P@Zn0vq3Y>*#apSWd^d|$@U)^RS*Qh5
zSn`CY&Wt(3S&FqOGJEXL4))*fUGQCDrlx1DO#AUId6KKqd^9-H30xhqY29LR<;)AG
z_)?ptuM-U|le}^n&McF~ExMFSv*$^%=YBBz_{1mMZ6w`*yzr&je-n2568<0SuU@=d
zcl<xTU;F;q{`+kIeYXF82K#SfE_<xLhGhoPiaa6YXeZD5@HkbP{?zd=Lg2I=55@Ut
zZxCO{V8N6pd65dW_{>&dFwTO__Wa`PEnaLex#iajYD#ObJ{J{Bx-#EhJ}{-{SO@NA
zlsY#<I#TyHaqEUxM}VO=mWqRMa$e4hih*3|bvqC8jNGS}oQ`BnmAw1z*xtOKhJ*0d
z82ALTT-hf)hrTZo45ErR<ar9pV4U$N?!=>q%}qSU=XwGB*%l4q9)EFE*+f7=uox@_
zZu#qIw0+zNMwJ@>)sz$nD)0|diz<7cGOar^gj&uAQL)o+us0)2UO^tP+oe%5B(3R=
zaa;itmT#%L0-q?WyEcQVah0fY&VXqqJOuE-1?nD-0G}wnlinVBW>P;75i)4J3zV4a
z;${H_CQuatg3E>EAHIjMZR{>kLB5rklP~O!Vo4JO<G7hPX9`~3m}Bm2X3@ZBzw@!9
zJ+}FlUY8lNb^*erD&4+2Bg3wI+$d`2eYa3xn~*b|-=z?6`w0xPyaTck;sP{NtMi6y
zb>_7jRc0iNwaa-58?@Tuh#x{ozFY8JF~`8eXFnQY@P7l!I71ZB0&MZBt>Vv$h*f|m
zMJpguR=)BbdfUoBh!WSattX7?9&7pnS($gwepB|-!Ukx?@@NsyF+rnrIZLyam75-t
zRjg)lH%!{#%#d2l6d~hcbEDg&g}weks^2~|M~b`TZ#K9Y9%vr3ck@Z4G)ZJ_u~To%
zk!RtlJlo1n1z5c%pE&xQEs>+;o0`Z?E=s|F4UgK)Ci&EUsd?3UJ+E~|o!;o_@Z;e`
z^TC-G0&J0ak+dTFXNxMwF`2~#vJuoXNMmD9Czf6-VRkM#vvmC<|IF${S0gjMrkbSa
zI_rUz-Z?rkH6qH6Mn4+RiMdOw!d^m)1*@7RFusuK-m#{8hfMc8LC~=TLB3dUCY_WE
z&RZ=Q?DzAM4E8xLQ9jR#3JB(t5RRab4HS_SyMybxM^y0F>-gGzyWz?U4**@+dbKgJ
zSaG_e<%((ALfJyqEZaUP27;+gYNp8?pPaBj)qRp8<nNP$A_dmEi&g^GdSO@_cE>!h
zHqr%{k8D&(>0bFHm_{?@<3_4<(_mRB$w|Jf>o4ohmks@8W6pxFtlU%Gkm4;g(}v48
z)f4k^-TJs;f81~+qXH&{p{i~kH*!R!D}S`Ef94mK&WE{5%T#ZO>~lHJ`Ux)xc<J$k
zKs$x>IiQ>K5>6D;Y{#`f1;yD$m10OGT2Xh5lIoL!r2g9SBEBl9IvGUFFK36t-xmaC
zC~bb(z#!5-3CvF`Du-l$d?HHY=liJJ^T{s>l*Lz6LTPxw^<DZ!1H*&?Uz*Y;dLac+
zd}YLs>W2I@GBX5ZlkAE77@4`E1eX9M1(T{QA60RDG@?q+ekr5$bHe9m_EL$aOjC|B
zj-yYmP%67Mn~iD1aS1B0E6Z~tDyJPywjrD0e$3yiDYjzY_Fjxlz8Mo!Rb+RQb3#s4
z^~ir&{$Mkor)eHu2fywfU7X}jQFo1>Zqg|%whd^H^_=<ZOfc2tBI#Qo`Mc!I@Z>?v
zMR#*k{z69Ppv6IC&a~5@tyNSZmG4Q!tn%{GPZIR;F6Dr7+eQ$R@)F93PhZl!bbIET
zg#zn9Pe}1B=c5b2SwrPKkke8=>Ge}Q&r7GL9)@~i{&aG-cd!o)mz|vZ(VciR*}<5_
zpA@z1*_waWWx<k2CJwBM1x+{;)Cbu=M`f<4cRH1A@v7F1!#H6IV4<!hZD570dHl^D
zmk~ymvx~gZGt9hOAZ(72SiFyIPH@G}|7M!Ae`7jd#;=eL3rfZ~D?hn;K4p!wXvlW{
zZD-$5a1GcLs45u;V8F{{w((n`aI7){Bg0h}e0j9XC;0SC>YqDjY2x*)ol>fJVP;Dg
z-=LY3?;Us43p7+fV(jd20RUHR&jcF7+o6YB69~)lEfb+>$mElNnPc%R36s-QZP*hZ
z%+B5YBomJ)s<ai!(fKGts7&sSB<St;P724`h0pbe7o#wN!iRYbR~o!)tq!yqq^tA<
zTLUAIEskWCU71gLVkldXUkQ>arh3b-rg_+}`rPOzPsHMN*Zi+<^53ld|DB{rCH((4
z);3;kB>eyR=V$-_XaE0a|Nqb6|L@G@r2oHDM5dl2p>TbYm(EE;ryYcWJdnx|0Lp}G
zkTWincdyjgTg=F1lFgy(uS_oofsqat0A~=j#)Dye7j+&u9Xp*Ni$}w#bA=qrf3nt1
z+-VP@ZZf4Df`=?45dhN3X;i^ee~-kiO*eojT+<XP2W_~XiI<56cl^@==8%r0FTUgo
znWPWX@5weSvPhcL*pCnV`(5vY^!Wy0D#x4l%~uZ5RC@=FocixWqyQ=qZC(6$bYrNW
zz(Nj+%)CP3z;OvUopVrP#3Z0dT)dHhq<z5o?C8p%g^*8~`$$nKvIzuKS-^XJy`v;K
zQe}j}#Ds~>UxG-t(decpI!|)Is63T8NGdm9>VSXtkd%wF-Mx$E?%v+P`T3L~DIQQW
z2uU$P64vb8R4<s88hcjUik-Sp1fyv77D*cAjKFXEQ6IEBJ||>c1&*DZTW0oh4g+}#
z32;L$9l|z=XW@&L48aK7P0X;aKYD|#Rag><6|lO<cOLSCdPCIUPxL}z>YLP1+vK<l
zODIE4pW}-fdtmat-f-dBiQrO`>l<{a!2k|W;%yb)LJ1PvG;kXw!9iWD=cw!jYbEbS
zww$pXN34mvR>#)r!UqX|ue%*}H#lJ*_`%ozy0<HEIf|$7y0`FpMPDQ$DfZOOtGl)f
z(cN)=Yy54xVPJF|#KQ(YY|LR$ii1d_m6xy^-ohJ|c?|56cOk(XBu%uCXq%h2APsk0
zZ*1O3o1v=R((=dc`z_ZDoA^7x@--2%6;#aK7GT+dCYqt{EJst?+l1tWrD$_AaXT(o
z@T_@|xa3OSPSprR(<%2CRk2H=2;Ota|7G$7=~Sscea;?j>H8{jP35+3U&=GZO*NJH
zCKFEKPJn)N(n-xSSSRV<vLD^0t>N4LYXY<4ZU7pyPAN*sSxSY{&HARGNBQsxjv8FB
z61c2hvhrNGDutKQ*yu~=pnTq0*qk97&U!EsQ&>kB%`k$fl<(OD^S4->&m3oR2?VB@
z)94H)=B9ijyuhZK>p4KR449J$_`3M545t;I_ho>7dXhEX#1R!6mC2%nCJpeJKtfCq
zqBBQvF)23F$PS3)IHa>2VmaIw8;8?Kl2T2~UAOp#cz9y&Ei8g2ZsBR<k&3P`X~IK&
zN0Bf!-$Vm6G7lnQwCAHVZTiwF1y1n@nz>4vl4wjLVtA8nTa&=36zdH}qfDi*lx<~x
zTjQ^kLCdpd2{BR@^jtSII+CVLI@}pd`>fcOxli`o#=pWH$h;JVr(}?#tWk2Z<pZ4S
z6!Rv!BEGX+W!T#T)282v_Ouhm-4pc35HT8d_A86f?}}17;lY_`MYTZY<GaZwq<=n^
z#{ehgyCSoaTA|GWf*^8(9sy#PvL|CvSm;73+%N4xmN}dK2NGcS4%-!{N_J^+QkzTw
zWqda0!NpeSNJ<9o8LSIdXv0&j2dRw~UI)Bfk~N4n8&1ZBMd8nLA2~<1$YY3-t3W!S
zxk`T>2RXkv;|45FH(&Cni?_WZXQA>OczO;;2}oJITK3!~=YaD|!gQX~cTU`y#g`?g
ztslr-rz{P8Di_r>qY3SeZh5N3Ft_pNMX^le+><d-QPwFr|MCIqxtJX)WV<S7yBCsR
zA7tl~D^+or!`T|hBKoXW7P5Q;O^XQu40SU-1as=nIFx6KKpu~i>wi7}n^XLcxl6pv
z{U7V!^KWka&x@DO@juV;KhN<$PappixpUcb|HoU$V?XS4=#>CNc+qQNtO=9~TQ@>c
zNL=*1u%rVzK7UljPj8PQ+Vgw{Q56=b8AeseQIYZ%!Yb14a3;YNdRWZaDU{~r)aA|c
z7UUJUn7azKISwD-^)0&C4I+70%PmC5kUK7nw&>o|VBK~hLN@-z8wv05o`-f%T-(fN
z1qUdIV)M@+!ubM9e$>HRoqIBvgTIzx8}3~Zw>ho$G2X<~X`A-sR5um9WJNhAFRNyO
zHr`f{Na8B<%ZRrIVJV&XLzF0NtT2^Ywi3uv2HblAvaYtdNm3V>pP=mv1(AHdZMHKD
zEHdg|EI)rz3ZYdSz0X}tgA|n%!v1OP0AAan{BY??k<E$Q<b0f!tL43w*}R-h-5yWp
zubc}_BO@!RL~^|D$tljBt@M$f3^mFoDia)Xh%^X^EVAO2oW6m+rf25-Qv3#G;E^w$
zI|}(C&y2@DJ<SxJhlA+Ehr9)eSK1*t7w(ZWRXb*?evWBgyn}SYcj$yCr~W2GKYp=J
z|En4D;PY}H`vTd|GcK79%9Hc+bS}y>7?~{pPWIUxJg1Qw(>W>UHex`@S~~xI!P@I?
z?e#LXH{9AA(+Z&p=|v@y&Yh0}#yBw3%=zlmI{ma^ecCuIpuNmhefctH!-P_dP2uo7
z@45Cw(#_*g+I5=*^HOg-g89R@&+&-wgmJ5T8L?s14+pRZuFc8GG(l!IQ$Oh3QB;6r
z9hnTjNKMnUtg92|7pZN?B5UiU`sJlpIm%G6C9~$EoFV?uG;Kb9Bc9PTYd(IXKGIJi
zp=QO=PsPx)aGRdzESb9;Ql6!A5kYg>c$M<M{#=``3B*KRkBaTGSo$_=&Jy73;-%uQ
z+B0TLoYeE3)yn0lI**Z<;T+bkQ8PDfPB8sakjXP<&UsQPtUN(SpL-;8#;~~MA(t6|
zzR9#n=*VMoQlE@R{qcyu)CVv8w{W-O?3Ap2*oCqQFEsuV+_Ctd0879T%8>D4mZv80
zxFv?%i&C=nZA5fUFm(nVWif#d@QS95sidL?(tzSQwD)rI(wZWdh*NHfThU}w-}1`p
zS0zRo5$C3E*;0cZoy{D&Ip^B)p3u1oe_AT-dDz(|$9I$!MJn@s7@Og?@2n5=DOXv&
zQO!FFlxc&$kR!pF6F>E6(V9m3N{KXj3tu*v8Ob9_EvmoF*)h$$C@X6xoPF|m&gUix
ztt)>?7nLHul2Yb5o-3XMX!WzQ(n$819KI9J$Bbfufd>wckLaN2b!uekj*!fkx_oMf
zN6Y_Y{{DYQQFyi28+S*J=U);3zxw+6%QeUU@8!#vub%z?p8fxx{r{fc|BuXNkLTZo
zO0V03BGTLJCL(FNon*}_V>c#NmWjN3DDe=U0E2s&@$IF^{;bpY4wDI>Kd-0Y(ZTLd
z&ApSi#}`xh2s%J!;T?zr#PrB%CqSewCJyI3976>pE2cT;-$(F5ksTpX(o3he%LlQf
zn|#=?!TLGs^u<te3bal3ZiJFZ!_0><OfUxLY(5+NAb3Vx)vBdMVQDQ$_s_RSf!k8J
z4l>kRD0Om^sNpZM-B?JwXiCs@XKgEGcf^~uZ*4(57QC%MS@;ZBpX9V>`YRJMi}%Y(
zhzL!7FLWQ4;gnKw{1e3I=WD&m;oWxHO%INDv`v<NS(v10eU{{TdcsUZlFeBVR-Ptg
zI7yartyEq!)eN@{;ZwIax!)yTbR_7iri^?4@u@v=mGvl%B`Fx{@`f!nHl3MCKZ@;-
zmv9C0of>jwQ0ns&DVsb=M$9h%X+DBX9imCgcJ#3oA=6KLY?vd*ik#g6jT|1zD*Y+n
z<F%9|Qz*`X5@l$xIUNNH9g$4HHxs1-%3+H11D~WcQAKo#+075nIh;E?_`YT&Z@Urq
ztw1M30DM4$zv;C+ElKi;k+z6s)tQt0D3bFdXV{a;IHczC)Vj-reM#@dW~m;}XMpmu
zI^+zFopK>xrDXMlN-po=oU1-QU6g$e4bsEjKNF+Qytn!N+?;?mpJv_f33%Dh#hV4W
z9eXgtOE@RCWII-p=Scl8IsV@<4wZ`kufJSN#Q(qgetqNF{{L+Mf42WWz5PEim%XO|
zSvN1@c>qAUN!*ut84fY*T-{`;uV2J<@ADe>Pmgx@4t_p4+CMlGH$YEr;`f4@($H_^
z0U(1!PXOqpl_VFyAj07=I*7IadJb6dkRFpVz%L3>-fcQeUS^uOE;Vsur$>L0*WGI`
zEUnAXHeKZ#0IM2@T=_nx?luCVxZz%_pqyk0um^T`CG}z}fr3cDlyw&<o}#!TVP6hw
zBmt#}yk_+NCT`te17<uVL2p8_DZh-Ot{T3!5ErDuT6*HTETWp;)8h<PBl9Su(;7oz
z^gPP0jMvNgD{dd7yGX;E%fO2#9-HvaY;MBXDqj;i^!^L)Wx)n^17A63IDWhn7g=WZ
zL*7%FdYl6vjneNN>#Uo7Vi{uaHjmkg4VqA=?vf`T;<`=W^}+<Mi<1LYXIfgjmsb`c
zy@<j}->Jjb)DhUABk=U&M-ZF&c)BfQjc-204VyzYCgqT-uAR;+b^yQUjDpGI3*7q4
z2*uN*#G$J=rk)luFLl9EU~ij<NT}c^!i78mp%TxEEQA95yIHv=z_%cl$Gr*hOyHPf
z=<qU<W9=xNX2W;JRV{H;rWLvq%ox76dvWmk<m@+@jNZ;cpLSlx+#$#~EIS`xIt%i<
z%oSN>hPSvw0)fGSIf!}N_3()D8i7piB&A$wO4``m)YDSUcN}y~@4ny=4n<|feXH}c
z+wbSZGy3344et5D(ZSvYxn;nAxj?z7z>*_aCS>ne-Y-yaP;Jm^N@omiM&sFoO$aFm
zn!M42Y~Ia&fc3GP8r$9T^TXH2xnr9V^EJ^xX2oVrG<*tRf#FLunLTKPLUPc^8$D>|
z-RuV~@6r$dkcsVE({+_0PwVpWcznx=6SpGk4iKal(4kq--M%Ozu*7v8^5?cj*}}-+
zZcu7eX(t>f`4k2s*d%WZnK=f8gFJV5xVLmncmqdiR<e#L6;J(ia<+G{fAH(>Q6@u8
zG2MOlC6pbd6c~igq#u&V<c+4Vc{h9V>H{W4T4?6tWL{)-^Vz46)G3@I8X3q&BM-%n
zf>2j>MN(EiX{w6nh1rvoo>gjv<hQ+}vxEJ&dl!c%$C<=+k1~ge{OA)A-g9a>)S1lv
zDrG8Pdtv$b+wQeeAgq^0hoG<XGphSXMbIPnuvD1#{2<b72pA{HchYy!(6Ry8XZ?lu
z(^4NJKv!7ZTFV#2Ni3Y4I*~+kj34qEuAfgypU;1@&wtOxU2o)9k@MfRS8MC*&iU_)
z7yQrj`S0`j@5JZ7<J(&}soJ|4@J^lH>~onuZby9U5RUNJ=505IsHAv=hcgH5xO+Js
zT;Dt&9#408jDjF}V0_-X;Y)ZYYJTABSk&PQ+Fg$iBuy+5wHXeG(7t3jv0mdr8-Q`N
zG~RQ1p3K_eC^Ql3aKZ*RrLfEK)m1e3yl2a(-Ml@X;z*f7D1MTRS}7;ZU`)cS;fxjw
z3Hop~hf{v!JGTWc4iy3MJQC^cloDR(La7>h!GM4;`PBvIhU!Wo$Tos447cnA^;ku6
z0t7hlsdWoyw0d$O`u{VI#!;>46<&9}xDC_@;3Hd%R0<fj$Z-=_-pE`HiNtOuTa1qT
z4n%#ZYVsvq_Ie%GjiUCj8KRAc>x;TZ4Sq>m>+taEm=}xM!SUOpBZR<}wt!ZxNN-D}
zRhr(AR1)dhtkO<Vt8_|iAz+N3*J<!~2v|3oo(8)M4^W{PkGSxG92IPblc9Fo6ytFn
zjhbS#gG!CjyQ-RT_o_!Os)EPOO;L7p^EUk0yzdP@G_MD}alZ*|G=j?Ny1<sUAh;p;
zeer$rQY5z6y1oJ0VJ*~y<#B$<@W2CPjAB48KL@>AlP5sdLCRp^X78$L3hyLH?_Sch
zt*=}%G$WU9DI$ty1kYe&gRpze$(T5s;Y8bBYXnJLDoAFm7F2#;d#_;1k$HUvUv}`j
z`UDK7Myw$URP+$;h!?6XBmm?U0`az?O3Ay)NJAgrzzVn{8)W~j0dDD%(5@)<-40u;
zsG`d|1|7Zw!V;P^k7o2hei><?NXOS}NKw@3M4cuVG20G@Hv)2#Y_}~4wOwxze$lTa
zDd^p^`kU@J|NQ$7Wj9Ey$a$L=V(|BHU(MINIlCuh?nO_e$4Hs6{G*wvnkU6nX`(00
zj?0S)coIgHC5WSDebvL|m6SFW`|`m~5tEwt37R!b&r(%Y_M@wC+!@KtAc=zha^jwE
z!cBtUI0LRbz00uE6e?&lZU+^%EoKuP50>bK5XNXg4aA<m35SZ?oT!a#_!Jq%rjPQS
z`b&k?=w{Hn=Q91F+v2~vz40)RckuE57<^itDC~~=fj6JX9Jl>cxp>|aKkxs`?*DNG
zOtk>y-T(G#<CUBL1wKFT|DX5&&;H*}xP>3|$Xxb#eZPsj@omU=@0<f7zRMoz!;;~s
zEwxo~uNU;;G7_$|aTjhC<1~RqMk;Wh`NFr0x2H!3Q|!e}eX|5hrQO<!3yRn`xSgY(
zad<@?k70+d-g~Zc6RHVUb01jirq>f!u?a6?%b#9uo<&|kx+8`iDfM#U+8gYHbs`ki
z_7+7d<48>7s!zdmvltaH7)QSS_=P@zQ2@4~XqA7kJkBogm*v5YwU9F{hi!1MzKTCa
z?G@OGa4b6D5+l*QiTKRz@!217b&G+H6^UcIcmEl|w&9jtfH&9QR{o~Qsb_g4_<Jb0
z89(Dt4!M#R4TWd?!EK)pH>b}?$PBqyQo3u0u?yoveIa;4_br#$qJnT=(DWAQK(1Df
zA6FWO;!Zpe7!r4Z<U7?<wrQ9~LcfX!9Cuk<IqoB1r!IB@KJvbg5+t%J%EEH~M?j(k
zLD&UxrE@8>%|BiUH%DLzaj`IGh^j(kSm9l{iNba;f^7C-+zCeT1rs##3ixeHRo#}b
zpqyr?cZ%;1cpqS+l?<e?zewuSDGB3s^}t-t5g`DEG${Hsc`ULj^NE%Es#sNlj^P7*
zCS(?%)de04-rg4GKQAfF@wb&eDiwX8UA-tH*e!0aRTHd{BQKt=PnYraHKf71;S68L
z90`M{H69G(y9lH;m)X4DXc%>P<2?k|=2A(THq~Ur`%!)K@XMLP6xc5+^6QsT3fe9w
zvJcuvyU>V#$QyCuooQEuQ}n7m^;Pa$n@9Mz-kR^i*g5o26^(-qU4PkGgQy%%Ou0(9
zl_Z4~Ma@jb`%6;@D+mv2tUJ#BkrI_f-H{p;(a!x1s0k3TB@op2!HPOEw`nRJ*HUP>
zD1R&v@4mA)hsV1Y2hF{c<Nb4~8ofTaP);uFT{=5RE`TE12T*ASAK1<g<9N8flqE6c
zu=!92%729xT^#W>gT2%K9$<2h2BQFw>|OC9!Nsuh9pqsCJ077#|HGv}+!~9jggg83
zJOY#uI`n_9Y+(bE-lG~WPW;s)_}HaUPW#)X6(GKMDMfO5oX%Yjhg)+!?4soox+=Tg
zWxuag37udCtW;c;Lvm!`7Gx{IFU$2xrGU)KpAk<rC4R}95^6@A9qhe5J3svOpxj(=
zMPtgO_^!yL@X602^Fo(4Q$v!)cO-$rwh@eKpj=f1SNl3I+1d9=GqUHSkkhx}2Tmnd
z@!ykYjruePH|$m6D2DkiVc5d~M^kW(VR5OfAv9iKj9g@MF8Ed_7*3ZJc{S+~rN9B2
zpzId{l>hJ@2<r{lE#Ehk0U^n*L#S|AJbNk=qG=Z;bPMD=M5sbjTiaEFQ2apKvv7*!
z9BM@u4|;boqy`no1j;_*)n4_`z!pXBOflEz3gr3}y$|`+C*ia+9Rt}#G08Fsmlj<N
ze*%Z9ix&NiT!R9JZfd`ZPO3-oaO7uQ+&c1}#puhbIn=w>TeLY8!|4O?`x@>}G-84*
z3S^~{kUs$F&?sUWWwJw+pYVX}$8qd#mG;D@duU?Sxr)mr!Vt}Zu>FPze?BJ#ns_WT
zWWV!utIFQ&{=0&(_9HI)29QQLz6}Qt#>xmVNJVx@5s>uYpmZ_<KIutgsU$q6>YE~0
zAm}u#tPlzw4l6$5%Oyz-f$S!lyP)A=k6nd>q_oFgn?P9a7*ImnI~uU|@WmkNZ?g0B
z%?+)uH#G=`x4yH(Y0jJL3bSJ?l-4BCE&e8#gNjj}YC#VD3bm3&xeR)33$}v)92zTv
z+BRtJ-j*NjJpunL-HeABG`7e~c8<s2?eKOHP)w<NA0fBUec^C?iJAwH>lhL&qoKFk
zW?{Px@A#vH1El1H5~G961__-zVkD3MDzkv^eEY)Ko*e6AvE`G0)Sx@2033MXaex**
zA;3Wh8#zS9%(R5%@N^Y-PmoOxUegxFi1ntA$$3es%jxcheZIQ2n;<7>_s89S*!rOK
zBE7&!tm1BeJdz9O2-y%_37~k$y3sx9A7@p`qDplSnKY2{c5y8lS^Ed2dt_{A9iz&~
z*+jTC0U_1+br1W5w^ebxIN#aGc%+Y%a+GVcmQN+`)Q-a7*;DyD`24;HO$J=$ko+vP
zwjrU5_Rxd3I%+Zq`*bw0lxOHN)WmPiDn~m+gF9grSIYroD+{YcTEc5f^>OzDMBRj~
z!6DxkRIsJcs70bh%9p|=(0dIGu4Dv9F>k>a<yIYr$UUwQIcE7Yv*wFr#IKH%(wWE4
zH<^4wxsDePtBi~n7;_w*x`A##kD1af^=>;(_yil_Qf&yW&dgez3|;;|5qBiUf88BK
z!y5G(`#Bom+Ad@(yX?V9E_~(0qfDtZ3$^|LbECmmK)f{q7yY4ocpH^$=R%&Qs=Q0k
zh;5SwAaw*WT2BbxdP#<_y8zZp0$8_YZM~$htvlM~;d)5`>yEgt+ZLdVi6*J1Yp&Zh
zaTsFXbc3VUdM#z5)p5QkN@h*CdNS+Mx+TKv*3&wjluj!tlD#gbsJ4!q@}u=C)r{*R
z%>;R~URwNvS*VT+06wlaU~DYgLUJ)Ex)7)t49cf?k7J_c9UpbBrYMf|Y;~Mf0$VZD
z7vKsE=~`n{+b?F8eG@#M&&NI%PI7SZMF+4@44<;<)M44lF0!syCgF-H*iZbH%}?%)
zY1~V`2LHc{K^TvQ8R37q{(l=UUH`uq8|(FF|G#g*|8K9?f52<RS+G}O>udEF?8m6v
zi@^mb;xz_r`x2jPL-Aa@?)9$!d3_tkoo~=rPr0j68DP!-=Ed3W;l+9LXOoX1zi4JS
z7|kH+xw}dw;Uq~`8Vkaz1qOTG`KE{!YhHE2>mlBe6?MnA(yu-qHro#zP~5^$ohCR)
zg&4$AuP&$(TqD2k@^xefUj*85J_uxsi~C%|-Gx$>ot?D`TU_L&IIU#o-|YUod492b
zak$sqJvw}S-2Cyki-YsV8u9Gsz`jetrNRYqhwm6@;Wa6ot(Si~O{TY%(o%jy74f%H
z#lNs|h(<E#qInrZazXiSghEv&cWG>H{u)83M*N@fl221k9zxY0*eTW#;YUAmD4Pt>
zeZ(*=VK>{v_QbfsDrp)bjX!RsU2tn9=CSh3IG0)_Gx2C+mh4gatTjwr%x33!ZOXqi
z<H9~p?Q<~}zwmv+mTUS&fJ`PYb<#Y@u94f2cPMh+@<-tV+?~Fo5oe3+tatw={DTX`
zAI#59I3&TJTlVyk^#}yP89%{X<8OUa(9llR;rsz(`X>uGTOf9-4F4id?0;Zu9E&4-
z;4D%R{K1&lOH;YWkrY8r7~;heQ`sL2a1BjoqAy{dkaK<lZ%+uuGGw4ozcdaC2ZQhd
zg#@{QT8+xr&r?s$QZu|%&*G|bcj4hZ<MKd4f^jO_43|=d{m3^YLVNZmjxnD%H)Uq)
zB+L3s8xBt6W^X-_RjD|4)4JBTY|f_D#LJQ_Oab7#B=1R|nheH6vmEG)Q9DLkouW*S
zK>%_%=#4q~-kkrs$5-1SgqTxyr7W?{n<38^TUJR;X<Ece^rJaGm*M@~dlp?Knd_NL
zpN>muQTetI!=^~NCRNJQ)UE6_6GBRruBE~GbS)=AppLxd3!%*(^wOXbAQ#CQ`}}%3
zA#zXN-@+<U;n6Lo4^7nT2|zzx@25ocv(X?+gDIlG_;?^od$E+4!rd+o@hN*d8HS&^
z&%OckE!aiT^wpgLp*+yDLJMqoC;KO?8x2QMd%MBfy-1kZqK|O07j_@eM>g&<X>I0D
z;$CCO_j5BIVziq$mKa30y*u<WVpri^4}DJu(N%MZ8L@lhUX*R>5dqBC-Jusw5RBcJ
zNP*~rH|m78#g)HAfJt6%;|WbDl6F&@P^_h`1enDq>*AysK5<8<TgR;7oM540b6>XR
z*5@g4onOy=U7y(^W(|1p<%Z2`&6;B$Cqwcx^wd8%DLd+E*W}`8<UyYT8F5Gr4gsE9
zTxITzyIyAuqzyZ)6Fx)(zVLDiV^~L5`HPASf{^^-CgiJMuwP|oRkl}UKUS&`@&YgR
zs+inEyhBGPgnbvmqAs!kvSEY=j#qJq<G{yKxG%uTTG$!laxYz!!K)4e_`wfOG!9Mt
zg8%=f0u{j}>y7eiFkvYgw2oXAZx&>Xi$D#XvNl$9{z!<gGJOOuUe#PhB_04EH8_eC
zkoX|SEyw`@-YLW3_*Obqz+>k=e8BK=HvFG4xUNW~SRnd~vE^fQH4#o@!e50`JiZOD
zqap9#P1Kcl_>Jy+>=zRtBCynm*BHbhBk^F=pq!xERdkQ;yK-dF4Fqx|3^(EX`Dg%+
zCc$}S69ZOTh7CX`iZRW(xccwv3I^>m8r{S7be&!+6qXP%0{4hM{TKH8qF<`l9K~JE
zSpx_ul(283uuJkSNI$syxYOYcxd1uWj$$O%@D|d)4KV4;sq{?_PGuMR^2L3mQ0P&Q
zXF<2H)02+TXUGS8ZexbBfVc8CRHLOb0)Pt?8;%E&>4U^F1D-l|mc6|VN39zOPa6+#
zN_IywE(+CWKIl5tfa)1=QtyhFL_9V{Q|G&H4m1Hww5l}xx4|vnOXr*LE>b~X8riee
zb3vBX^O|`7>+aFv{wCMmH5TYyH5kO;D>Oz2aP}|hrPbD@s^VTy;Zh{BaVQ5%`O+{F
zbW0K#fI$?UCWAqm&=EWk2*Xx|m)LcD7l9b*$W|1;v+$DdA5b=-7a7#=bb9w<A|UFD
zmv$sI39V2T@Gb{Ga*6j3F690}<c}cH*+1Z*C5k)2Ri_t@g0l)+W#Z>K`Y1v&B&W<?
zqKueMP(;l<_zhSo4>{yVho8}j1L=tZc1;S+E@m2)6^v|@MF+g|Qb2dcw{iqD!-PZ0
zS*MWrF9>+|R#?3PiGA5Jm#QlaJGd?jwXo1$qR^f$q*E;z!NRv?_mN;h5jFwR^a@xC
z1>xC0c3^e+#|~d$P6s=ut)1YPm3n0recJS-6cmFI1LPx_*>i=C9_}Kc0a%4`DCG-5
zME<$vVFJnX9xfn6N*R0J{x6&Y^&&iwS{7Y=%a5E~BCxZ31%kec)d1cKI&tcn9+w`I
zm0y<YVqC;8J~nGOvG@qB)fSY3qRr-~yZifx$FG}6P~j$GULry}oJAfL>BRwwS7)(v
z9zsH%d{3F@e1Eg||9gAb9o5(W*Tnh%>#ttENaX+LFQ5JYzD57Pm(Tuxnf`xo505YS
zY%1gcXv&uN0A$-^{{GdACf{=E&8N(Ra))J|vr%1K=#x9F3&)`Xx&4D*5BCn5KfOKP
zlW%s9{<ZttdGql2=<xVJ-tx|wL;(pZ66z=Av)$bixp0dCG>8U}r~+&H6V6QlQEsLe
z*`3NBjN(zHhq;5qG+mj!H_c}B^Wp2CA(OZ4FENT8o&1Y@Fv-uTEXBP&g1F&x2U>z!
zTtVHU9VK^0UHp7-)`SZ5CQ^LKD;6fg0<+Nws98b2+oo-qCKg$bPoDwiX5%Oa_*8?R
zTruW|%NP8YqN!+KxpkhOADmstL@|MBqQbU!r00Er&wSdQyluYO-9KpVHc!t^UZ3s0
zc>}7$n?nxjmjju4ITtv`nZkz0LZBa?bO093%;6gK1f%galbRtX@s1l_60bQE>9;s1
z;H4BJOCX>#2I`{BSVSL%LB~HU7THJ5?rW0EKYAsGm0Mh+qElvuooz{*iwek=|5YV&
z1Ac9+H%Ic2sR2!Fqi3q{C(&Y@7XVbE;mBksx3Kug8jyKk6K67!fKut9iUP^>|AF+s
zj2zeO&B2*9fexq~;K#xsrKF4oGlGX5=9+LCK*g6CYrzo}|McPKGy3JMu~2MG@PZK|
z8B#<8G<$T!WkKf#?ymNg;LY<^7un!uSXm$eQ~|G26SHpzJ_<q37R3jgf8ssJa}4Qw
z`fh(W&u)K5yZxu>_IIDX+l?7^8*WkXciEmYsz^hZP*aGM0P-|6c|H?$wm%<nw)2Ry
z{p7T<yJ)W|xMUc@Ezgc{`K=D3;kYw0)Lqu3t7>X3lZhq<L)5ZKX$Awo31~a4?hJwW
z{3X(T<#W<~#Y6X%EV?&TF(2JmGU+}g;pJ8|jsE8sXD3{JEKmQX+0h@4(73)h5(kOA
z%nAfmsl=K5Z;!oT8|(D#u&V63T{tL#gFl3G%c&NF?kSH$E+5zpriW4o6`P_Q1N`I>
z8)KJ@wHRxwPXqNSV(f~sYpzeLPwL-_`R9HLZ`kDx;~i-yYF%El)-`KiyYE|9tbOIa
zY|Ac*4648mXtjYn>S&!_XRlt^6hUT!-tvCd7WA!Z+WsIm&}$#1p>6Fa`S0jxD_L0X
z!`D8lQmPh|=EZfvHZP^-z#_D<MZe;ShWRY^ON|B}PSL6^cqzePCz5bN1xJ2&0MVkb
z^BJTf1ZNV#pa4Qw%q(3VIFW*riVzy{@H9cB;o$9UtTR4jXlCP&R{vT+sWzWhH&!!Z
z%LZRH3cw{4Fd|?>uCd*A8w_;eVDP}Ay9g2kLiR|=Z7>M?OvGq#U;w$nwU0#iMODrK
zTMnA*5}##0wh~(qcKBr*z9<og5SEvfWD`BaV$0akYf@FzTu-SAMEG+@7TiW$E1JZJ
z=T&cT9}e1v9*P)Hh0Bk)%QuWF*^xNop{eNvC})HKj=&qb90*E@#zC_c1TyF_N;0><
zmkz12`<tNh_2g8(D%`4>K*D-Jm`)05$l)duh^BKNJl9H)&BZxXl(L9$sfb{eZK?iQ
zWuey?8-%DyzFD`MRLMB<*1JHi@w3^s%xf)Povrc?>mT2y+C`0-cYCc-C01X!!mklP
zQff=R&nIoiRiT81MO~hV;$A4Mr9rPzP&#O>6?R(Vj*<eHf}B&N4<--b+;tR@LtOsB
z%i|wI*1=@Gkfa=PPyG*Oj_v`v2r_?!5cWDJ&Q`$+Z6JQ((xclx5^06CWu5yQIE})W
zT&TCrC4RgppCIsGURd40*TyAh*}M?=LX5lku`GYc>-j`p0s(5$@hzclui2fFIDJ=~
zGuMDK*D8hzc!wuP-=<9pb(E40dVu(0lf$}Z^ZX{h8ZkVub7`4RDN$6K3L4-%pkCqL
zp#feEe=@5Wmu9av%w3A@LhZDzCfYXiwg<2rac5J4CcA}&#9fNo7c%9NMVi;A$l2Vt
ze*XU}_?<+j;JTLL1CV|G|LXg-7i;eQe;aGh`G3Cd^Zx>8-M^k3y?t}gJl(z6`?=78
zw^PA$>J7Kyv3D}~+htEa<@v|Bi&slC*?`D`@TLdX(?bDKNG808TI!kz^n;ZNCjY}X
zuh8lKpa<zwFlX}>yThhN1FjswRrECkh13<^z{I^(JDUN8;rcH2!g328w4*`PwmcI7
z7#Hj<zK(afMxdVgV00t>1m0k}q;LpRk3p|5PMa#!379lG9P`eCGaPKOASWL79*(9&
zPxiu2N97O}UFF;YMc{Hp;U0m{O@UHXJ^@O?&LN&TS^_6d7p{!Ne2I%hp1KxkU=Ur8
zA(p)F4gqaU`Ew$;b~U4U)3iXPj>D%sLk&FyB;)T3&#Qi43VpbUk#Qm@{ZuwhUY%k4
z<i#E$p*8A%_Xp{b{e*c>F#Z9d3y=bis?HjTXnzga8^dNUjuz2$GRcRFmkR1vL2jPd
ze!DfSn4T^`e=QgPPpUcL|K6gA_%Vi3F!5*0AIFD1A4FGtwsl+4wkWIm+B8*jlME09
z?N7_XDw>{WfYelJ2CO*F*|HajLM_f*MT0<#L-a<h&bFskKu?*z(g4wk$fYjZu?bZg
zO3{#=S<^3N^*ret2_rs2R*MPfdlrT5lWyl>S6wzQ=wvdLC2x5X+-F2qx{z!X$wDIG
z87B%Y<2Y^|K(LchTML-IUUYSJhPw*P=HfhW73xYln<_DsA;3x!ri8#P2icOz8LkzU
zZOF%B(gGw&yolEWSxjp$@re%um&wV(zrd#WM?pg4P2nYPH^y7{zUR&B_p`zx?l#jA
z;UCj*;h*M#j=&}EUV$V^B~I3Nz{>qT6?P<qK*b<6M;JFMC4_p39<4GpBbRZvP0n-y
za9*X#A1KEK^nrW@whPQp@&!0AFh47%OSks;1=bTnAVV0C-}sG;VL*Oc@Ld)O+!qMR
z=9R#HR3&J0_M@jq&L{@nx4MUJY0o>|W4Em5osEwiVdcpeT!o!sB!@?GM6cic4?6If
zcbC1<sCS!q&5L>qIxjoX)yR2;7BzV1!3|h4pbu6}<juh5RJiH*TNJWigpB>NMC=zW
zVJ!|Iv#IL)qUO{bTFhu~)~&(Z_BMwze5%%-7p1@IqQv*BzBS2u@~=<si^Ot8e)|?K
z`_YRfu4eSXv#?oD>5H5d`&o3A^Q?0ViOESOLn5-so-AAPxT_#goI|M~;cH2NIQ3Ei
z!q?&et!d<gNedJPOXr;jDEV9%B%P=pkmPe=kO?v3f$Nqj13ZTcc8M~8b7*0gD9H=1
zBueE8r(h}GDA0(fSSem9(1fR0b`$6*o)Ep>nP`_OF~O<y&VY;sN`uuxKLfHVGYQT)
z7*B|AKDMO+<BF647QUATDi*Q~s8}EyY?{F#ld@na%!KOG3$q}K)bcEI@f4NQ&w|2E
zXFKgcMVciSMTD8YzT1Zq|0ogZ4|=1hHG&j1DmS+GutwA&IFg}q7V_*uxHM-M!NoSU
zh_!H}m7;4vS{W+N)5;`Ps*Hj~+H_Wt>`uC3G^?{k%lgDKDT~6`BWlwJj7rYDQd$ZY
z09O!LZZQ-(d4+@wA*HCqaAX20B_)76#YMm&cp*W+emG<Ud_Sx6pxwc>$g*cLK1O$O
zD{?~Cq46>JZ*G@636$U)RSkBM!!iX$36kssn*y2R55X~GKfZ-CT_YCBH6A9E*y+Be
zm+a5xX2Baj#Cfs4Z$`B*503TSB#_i{xQ)nHRh<U2v)Rw_lz%3tU*e`B_`K~DCuBnm
zL`fT5f^wn|5XKrUaV4v{MahN5+km0OlPZ$W|8TggE~;W^C$7p>-Q2FE{S&i>omLPa
ze(dyGA2b94TRQ>dLls3uwY^?%(DuPQ3-9gM`W~n<#$t_zYbXX&n-Xt;)*@@;Uuvd;
z98p29U!$5qm6S_QNf4-s(AZ%wi{uDcuxt#!%Dg1JZAVv<B^{-w%=}#x4qMH(n5Uwb
zHRC6w+rUtc7Pu5)w#B<}5W|&SLveWsIPTJ10GwqTff!hf{7-ySSNss?Us&8hL`413
z4RXE0>-V+yTXq>CFzrHhw-88EyU4m*1VpV}WJ4CQeABaVqmpf<UIbCA#(F;1R{4$m
zE?DJXyth1s9raH#m2Ga=k|l{cCdyfs167J>W1^I01~ANnC^1zM(~@3Yam6{llRks1
zeeIRE;5VoW_(}W7mMNdW*)@2OvI)#x8^+c`hMAR;F0repB=pwvnj0udy|ug+1Vdto
zMEtv^>fcqCPMHhvYJ|a>P#atk!H-jT8$u3BIgKSx$&E;p!-5bYo0nzk!!)wU#ofSy
z{fYw#d0fNs5h(!$wloeD<a`aqM>GP3Zm=XA6sMMbbs1bq-Iub3z{8L5$!>sjRzs4O
zRtj9cvOrVTO#&wU*HEOzl>(NpEWnh{1z;_*$%<kOM^qw<l#p7Aag`$aINIFA#EyZ7
zTbAh}rOZXvOJeL<@r?@~v)UCTiKI&gCX8$Zj`T9&6$())JpE|uv2l^~qi7*W>Sm!w
zk8%>oNlEQTPLHpJi8KgV@02}fCCMAv$k9_If*g`mZP$8zThEX~XiD=QW|XBRd2_Pl
z5$}avRP@Wc((0&O5K<})FD5%x7m3S)LoU4ddO>+un51an5N7kVPG>gBNoINeo*m1U
zXWQ9uV|k984Wp$o+nQa#<uB)Rf~l<UmxCz$u;m|ok?i<W;nDXD{%i+0F}|}L#l#?%
z9V)S<5N3t;e{g4DQEMroN-q1R248QGe_UFoHuXT7LaHq}n&@(J7TArb*Qrf>Buyag
zmK-&9Imz&L6Y6yyZ*#(}z<*ZRdN!ByZm+<95(N|d&aq#C|2QQb&VO7mSt_o=ac4xO
zo|QWd_tJ3h^54f@t^%MXTZ!GjSL{H_$7Jd?Yp5D*&6l3dqIFIg$z7t9(rfqaZ0IuB
zvv);;XJ)qx#q~$LHE-57IU)Jh&%CK`T8H!I!}=yUU^lNfHtirG`TE7CmG>Hx<eRTA
zai4S<J@mTmO}I#EbOWm)u9txdYQkWqN=>i3X7A4T&FTASQmp>orwx0HuD?%ew%&pp
z?~|IUx8RHSe&OPX0K-BG3a~-|wz8N5c)7*@Y%Am87F%A9sTyFl7&}yn$+rD5MI}*}
z6Pq`(sg-(yHPn~&G!{_>HvFujKELp@jQadClXX<7S5vZ(?&kY~W)Dx@_2T@cNmG>1
zG_i{En<n2(_)e1<_z$vBZn2e>60GMko|bv%axH?^NN#>{6T&QQZk)FAev}<oDp<O_
zzYrhVB?^RWD_|f<B?Dq9At{zzu|>8{kPMZ8p<tI>Ns?l^OYNdz&;ng0tT}C$3DYj6
zO}B|iV3>O=gP*$LU3@L9K%<*MZ+v}|xI3msO<y?C2?E9xO6*+eg%C+%K_X_ZB9-)1
z^ngIpln0`<J`3bRa#JAn_DnHh^pu|<+IbMkOPyqfGcKIl@fC()j$EtxZS#s(IkCpq
z9QFt^{$ZEP*t;0+`9u8`w06P~+DrtHPAvv30*UaF1!<4Wf+BD<t6Nac$iyoGMb9Y<
z%Ap*<vY-;KstKSI^fDhPGD*2X@H*`JupJ(t1+WNEd#7SSJF&ODg*c8>Nrk14F3Q1!
zYSbt>{hDSFGY2;LI9V%DC1ov{8nlt{I)_V6+G_hCQ=U?i13Y?Im-JLJU$@FMgMKU!
zBYZx-Jowlj3=gl4@t&nXoS-kU)g?GWSXxztmq&-*I>&Ei3O1dq20X*J%Jm#Bv1P4J
z6n4j8vb9>(uS>#okNR8i94MDo{$EK^eY}g8v4Pz&mOh;WF75k}@Elgtp1h80I21V`
zu~2QnH9?@%6+d?*pw*-ZHw(+TJjjvP)4{2Yy#5Fw)kG=|bBi$F6?caqOxhwl=f=KH
zu?6SrahHD@Cc*geJQ0oVfYJ*uS19^RwmwqP{UCG1^y5b;S!L0vm268c<$@;qz<dV|
z%-IiYjcq!FBCF<>AMuU-RB)sRo2Fj`g(X|>s$`GPo=#jQGLuMDC2@sb)f>duvAKFN
zNdQ!b1ilvft0|Ip%LhRZ79F}(cTrDPRcaAj+NJPFn#D{5OU!e$<eICJ%>WS&O;bV2
zS?3NMU;SW^;$sx(2h8=)znV9z-ZdRk_$ylzEGxSBHs_{4|6~+k!d&|2pLzoqq+t0S
zV!60TAoEvN4^z%1D`c^f;!p_Hz;^klxJta?Gp^p)Os3f@sm0R)Kt)|n3X98D>~T-5
z&t5j%^1(Oqg7V;s!#qY|)R52Y9aVxo&oI@h?m^NpY#Lu;o+DMq?3%%yqNO0^{GM_r
zl+@gLCKAOH?u0k1UNzRsooAQ<+;FD^m^dkLxYLKF!<}a$#lO|Vl(Y94a3?CX6Y;0q
zZ~8eju?-76Jc~-`vSt%?s*z~KD=AaCR3xAxUj{kMiZ}vv(;?rH2}Nazd0h+=2MZaD
zt6B|GomMFGiqe{W#iPx@;@xV>{>ID1q6%eW4cK$0=CHCKYloQyJj%}EO{!DMWGrpG
zG10)=j3rx6wm*?djM79_lxvE0G^Mu`g5iVMqXD2!RdFpp1jYB3S0Ld@HLpZ7%bM^$
zsX&b?o|0sB%PHv$O-gBx=0!?wS!phn%8#nL>6qshE6hgK3)^pc?Wj?BPvAP$GU0QZ
zwJY#CeG(sD$l;mm_L)2t!?u<UoRvDCS9H5qyXcMhc8h-sDs&5-Sp8rI!F)_;ZRSJT
zDk8q7X+U%Wd&HLkkTtWNxhd%VFTOoF?G1<VWheUoqCrnk!#73sz{IUc9#MUYg*UmF
zK+Dvhn$Nt)t=ugcf?9j;*ZUQjNu}>ht-80?WWAhdm*nD}*eY2}MnQTzabj5;wXrjn
z_x8$7sk~+~>O{9->AW9|WLEtaP2|q5;sNiBES0(AR3jOX9+lpE7Rp-?kWQCw;n5>)
zOK(3-|4Q#c6XR(&zT@<+B>ak}cQf3msxek6nd#^g2G)DJs;^+(Q}8imbFTQiq*|&^
z4QzaiY?bA>x_jelDG+rZh{xpK7E3z&TSUMpU+^X&i;NAk&)cgf<4OX*zGO>d8x!8@
zcRP$?HX}j*WP4a=v;&ttSDaDK+t*gTfa(sQdOn~>&$K64b5yo>Xg9R2d6v@wNNN+P
z$ktp>2Wu~0Ed`<Of$+Gskl0HnW&^&nW`p31`OXH_s+tWFvl-2Xr`|E=i0sTcL3h#m
z>%g5JrDlf>vhINNO%$syVyalL^knhx>D~Sp-rsA^ceeA@2_V(IkP>r1BJ9ip`PP~T
z@?>w$Gr?+k4L0-%G*j?_>k4|xFL8l7b<TT#KBpae5_IMko!J5`bCb^Or9X3<?)C9l
za`}Xo&)uq93?%pJI<>x#aocY>Phmi6!`;BUcUt5M#nK4_Px^a3+`6~;pVMf7M}PsI
zciu&<5nlrT6SeINamDX;Z_Jxlmn$`)I@{5dsjopwe9{6$S>C}9bqow~9P9N&sW;<}
zQ%b}9Ux~uI?Y32TD|zpZC>pF)*;>^?wo3X}B|G9QGOEK~IO<KFF2k{|JXNM4^68PM
ze8NKztuH$ee=k9iCCA9N)QmsGE{gkQt;1#4wT}WwO<TuKNzg+Nxkz2Q*!pVH@5y>+
zDH*wlt5KrPUP{s};<iZYjQIjc%(n$IYni%Q0WFk1%hc1`vO+nsOufTy4z~V0SQJ1e
z%D!Azv~niizI;}eN`{uMJW6{rJ&}D*4v^G*RAj5310p5g)PkMaW-8NZLVV|%Qg@FI
zUmrK$?EX79;e+GBp|UJsfc)@xBjx0pYkBXk39oTe$Z;H(K+tPfnVZ2P8(a3dM5hNs
zyp)P4Pn}-a_9#yRzAeO|vKGiB^1<@tkr)1*>U%<z&x!Ov0dWtROvL6bHS<qOvLA1L
zAM(EEY@hA-%H(6e6&}<sGWMYsLC62WbuS>Fe0he_QwybK6G{-vlj$cXX;5aKpaqjh
zp)L2=>J}P<5_PV%-X<xomBPeJO>f>F8gN-lv}(Da`bJdyv_k$JZ-{=MbJvQDB!{bm
zd_8btz3gu^M9cm{BddJ5vHahB@azCCb46y3TfL@;Z8Nu^CSO~p!Gr_I@s+1>*T`bA
zo}*KfmQudhI{ud17xy?V*KN^%cu=&OW1}uw&2mvHTFG)8DO$-r%Pw7a@>VD6By|BU
zQ_p9OpQLX2ddB(BP4o4u$QY%sc2D0hsGWhx2=<t~EJj{kC~q#zytvT0W9`45|9#W{
zrnvv{{NWaEyL(vOf8d-Jx72gu7Rc=TAJ<>jU)G)bA78zE_2T*d$FKVS$JNyZ3=-}?
zU@8t4>{ZzMT784<-3;R4C=R>qJmQTG*mg_3`Dc6|erVUg4MkK()Xsb3K@0NBwIlXZ
zZ*U7}4#$^&M6Hq1<hj7T9je3gH@*MFolXe#PsMdrM_fpDhkUzJW9zlG8aup#Tje=w
zy2|MhyAOw~+Z(ZV42tOGm@__rz~nz8MWQPEtvBXMVwd$UM`6tWbR!my*v)9v-&|e2
z6;0QAgX>iebCtKYnt2uG`EPID99*3J*4+Q?c=yfWo_@}{4l|>Y?@CN@pO>1Xui^4b
z=fW*mvtay~N^fJA4l0=0tqZ}dcn}uUF?+l8PZ$RH#9I>=d8_A6?=tNC)CsSL!5ZH!
zk_xr{czDyi<U76KItpH^iqaalR;g4}N?eJTjgAcNpzju0FM(vQMjvgA;hTDJv4?Nw
zG0_+NY@VTK8MxY5mPK=V)P|$>=H~D|><0#Hm08`-qV=M_o_!~k?4PXuK!CB;qb<@Q
z`B6P?S-q1V&5JF&d+NJ+v!%=dOE5u;_QNwIxm_qk##wg;dxP;jTp>hr43{=lJj)r~
z_r#AY2~1~#&~AO&B*f=evq34NLBbd4y!ySlc>)cHpBK=C_$j!_p|66Xmi+{R3NpS!
zgxAtw>~=?S2{^~mHEyZ_uQZT9^Ccl3fiNoqaj)0?W1J5Z@GJ+IqRRgCwv#e!Cw-E7
z_v}~lAalS9o{aw!HAn2&e6H1MkQR4n4f)xxEbR9?4{{P?RZMED6UxG=L!1BLCF5=k
zfmE1XceuC+0K)-@6J>*B+V*Fi86S(IQZkVsW6B;Zmfg;SK!6S9Ff(xrGhe`axRlGw
zk#E8~Tu{ZO$l@m8{~5=48S-s!+(qR?`eW2g>J>CtQi2e}<q#b|`S0cB%7Quk!#?s-
zC;iRM!MGdTG`WPh<;#8$R|P3NMpfu7q!1d!t!5_{$D=?zVNVSzmiYcXeqY1+Sv4d@
z967f4;h?>Z!xSKGIgu+{?y)k~vCjJ@V3X2(A<n<#UUhn|;Pq=_2)-%ynD<D_0Rq87
zmw3<7gtdlPVvF`z4nd-}*2xqdQaMdtm2PtsatRnCY`!A*hKgEhTt!aa<}e!Z0^1s-
zwI(<8VjWZhS!7XK0IO~S-=<3IjXtxI=M7eXqdJU7{mR3x(%nJwE6|~L#p?xjy=7Jh
z-w566vF76{`}?uZF{6CN#`2&c6#1A#iN81@;j0I@|K5%v7k&iSphL)`*vkyU?gzCm
zkyg*Yy9`6<qsT%;dJ2WAKHM9hK2gHaE^`t*K2~zXj|BNtgFk66#i%c;(eZIX!kK3h
z<n?f%5?9sjnUk>oAim=i6x%MDNgwMTNS|=Sl|~kW6ONNJLve&w0@Vxj(TInK?SS@@
zkz_kr($H?C<`s_8R|#D80s*07SL5QaV!4z{+F}|tlYh+#awiFXwk+x>9a?ums$G->
zT-i#c2vPy^kt+r@J5C9ysWrGJQLCG<SL0tnooiN5p$ni0|B2rNRqB&QdqIz!B(1fL
zSa8Rc4t#?MV^2wia>LqYNatpdCDuBSHeLOyBd-r~vfD^39*&O4%qmDh+~33yv7W12
zl3TA6I#icq(8!pVKCquY0o>oa3B}?E*2QhMUY`-e_ce!2yYJsazaJlSgq%=?62!3-
zrEJo8%7YH!U5pYXMJhkD1i(`b`Z2hRh<asfJK>nGCm#w>p+e>(LN>-3Anu3v*;<sn
zPX-ZUFmmLE7k>QSUEFAQZDq)klhksg#QU|6qE@XGT3SL+k=C-yI91fX`?_X{ngoeH
z2+{fhoi-_@K5)tqnG6#30|ugy+Lh}X7d=#z`Vl7Pv>NO}jflm(ENQTzXero$j19X)
zbw1034;3d_X4arv<90jl1oTDK`YeZiE0tNWy5^Sadez~j^$f`abvd(LgVB>0%w<d$
zRneSjJhv@g9NJs26l}}xSBZvHr_vk4wU8?@vOT+Bkq^ERK2c;zAD7#s$l(>>&?r?n
zY0P;&G5P)vi|ongu+&Q3>Ji#-#{D*z?61j&O0D+$@zvGI)j1dUnt~N^pt`!s+Cs@o
z>`q}z3VmG(+wGy$*tHL@_BxVbBS<UZZ?uqDV_+TP4L5w&`kw~9+Y4C%Hj<pI!ggQG
zsGkW7ZTpV3-x^QeGq?^eAbK%JSuETfVOX_;isk85sM+ieN^y?6ktBlh^{1J&w>ZnD
z6jgFe=NZ+YJ*({IrUAvJdts}05!8xRm<%g-yW_P@*KLCWe{&pt9C;1aS&S$|A5=v}
z(&b^0l69gZ<v>$#0>4<yMaV+n5ygmafHpTRf$DtoFC_kT3tAFpH-(O}XI$57-c22d
z+k}Rb0%E-|h=o*rtE8@#wMlh!PNPYIFuwz>%tiS31QodA`Q8)vbSF}KC}-1bmF7+c
z1thh?x?ZC=DteJ}be%`d6v$7`=;xgIRLGF47<HAHWXt`+tEPPEi5ww9Yg`a3v!y2X
zR8kVMz54S{Ta}dHtud4%LPxM0-5=9KHi57>H~9fi(l_D3Tu7sJ39i|L8j+$;;I}l!
zvKlSBm9Z*2yBTk*MBTO4MCW+XhO7LTiR$*roFL(7;||qUzJSfR;wFY|b%_o8Q437g
zc%K9GPb3l0wgR@ax}fr2I7VNY@J7&+ZxTBZ3%-kL!iYpuMtTnL&5i)`%5ss#5x-7`
zwkfR*lGe)#No7nHA_)}4(LfvX1Y*zbBi4@F-G9ilfVj=^iAN74sfXJ+NGqayU`!`;
zXJa{+>+yiO2KNq71z2<!(^|+RK!Q5SzV!V-Dbuxj!)Vm(4cgHF+0V7bFJ#i(%fofS
zxCAm`TQ4veGV3Ix%4BF_2DQ8$;d22)08G=oSi2`FWHtuZIFE@RgE=^oEJX0ppx+y|
zVkyI-GmbzcC*5y=L!_KH;NtTTK}4ib;z?eXOzxh9-Wh<?sZJ8AN;b3mlFn)N4HRZf
zmJ@^WsAjn-w4LO?O5PGxZa1|cR|I(@QCcyRHv*>nn!1xCaYY9PyNW#hH({SEkWqWV
zd(j8Nq37@xjOZ@n`#HrOG2g9?{xM{|`z{yKP=(8wodzdYucMA*pb*>!M>vN%cKo@+
z1Vtc*2VQMupyl`j6vV70Yk5)QZFKTbJB!4tEYyM=*M+iF%tSiA4I!n%;2{t5LtZcg
zbG{B9pIjVh+?Igh;%+N)#spS8n-si<b_-RQexQ_SK}CVK!tpQ~l0!O~R2Ha@mr1~v
zF5g8U2=PsKvML-3@8eE~!W{9v5?{gjf_DuB6E=gv0Q?+<%BTyks(r<z-riX*gsV#n
z9GwraYhws-1>(6-6WwCs=gNKD9^J6@Ep{7!jN0n^)s-%A3*MTo;pm|Q$21(_XIJZQ
z*8lq)&(Blczw`XR>o325<@$fW*m(8q|NZR${nzpT7FiL?xqm0>_`JW(D`nMzw@FJJ
zn5`P>$X&TU2*;S=!!4@H@_>_9+aeIu;c>UIP_Xv4=kyoNj15!~&2hcF@ZCDChSVk6
zL#Nvndl8RcO2Y>!d9;0j$J*fFl{me=kSEvDbf9Cer4{GXCzUd;+kKC|GGdSSxaA|S
zENi(J<Pl7&Q%Dp51T3WBs2^MYZW_m6(bQH7TCZV}L-FH+$Taczf?||iep?uRReP@g
zWD|U<Ih<xcj0e)05)HY=6&XE|rGur&0)#J7FXWF61n%^&P>+=9V8&gtT|x!MYiWoY
z{)jL0@a0>^QQYr5G;g8~pGyK`XBNhBn8o->Jg+1EBMz&Gz~(0UIKfn8qLCn3gO?Vr
z8rEzm4q4qk;itncaq#=GC`H7{mvz#hMpDDw+|*gz;&97f=to-UlH|8aOs=btPc06U
zoIj=%yyOLcBn6!_I@|Y<qqe!(LO)i>G8$jXrz-l%!s?{DY>pyzYz3}V<`$){>yn6W
zgm%XZ@Hy<fTOIUnF2<MJV!S2)@E!$~N=2pb0>b?tpb9=`Pl-~jfG(6^2mJJfkAL|g
zhwMb*Aew5#6Ud27PB14S2gk5E<o!=fSFCK!E55%*U1)gI8+YKwM6`s!PMNDw4-C*0
zVzmV>&|ED<JC@lcFUZX<Y<;0N*Bo2IPnJLQ#{;o}u8N;}zl?zHxITlgjqOG5gVWc)
zX6v@!ghf!XoiZ#hu_(rT4;pQtG%^uv&UsT{XYPTSxdmVEZz5G(Kvo^O6_>U22qXzb
z8UYXznyPsxTQ^bbLpJWxeY1&l#eX|;oE8_YPVHDWt*vZmXqL!^=Gb>~0K(Yh0L(KW
z3GO1Y&^wO83e;5Uj7kow`t*Zq#(`Y>>`zh*G5x_Y?j(yLy?=1cL?Uy)C7&#9l=y%r
zJUKE2mZfjV4K!L+)sedtjXCNt{NYrAvizNF0#fv($ka^#&p6gU|M;V^P(XV&j-hEf
z78I}@Bi)+0o}qCJ^vEvr#~<R<|I-4|&(S$FjzKxckd5E}_(S`9K4wrylLN*u(R153
zvv|J~-k(K!Eqavd6KeHQ`mxfn9fbp~hBYOZ5Xa24<0E;Ft=%`-HibcsHnMy<lgHe?
z{aMalv)yZAM39$_@o3YYa$R&9DF$=f=dvMTcBSV>!`83U<E<zO&tLl_f_K)XNDdXD
zWi#H{ul_BnnnfniAwOPA?_+H}gq+MbcASnpMUYlP8Osg2GYGFZ_|hkp>TD3*hMXkb
z>t;;7@sojGg=VxJABe1@bw`NWPPB%hfwElrNHG;DF#%g^eujV+X}aUx+y*`d_O)mg
zNU<al11uC0j|;P&4wJBb$=F2A>lqP=6v?NGNSq7Tef11xsVu_od2xEwJUwlmpC0Tr
zf8O2urMZ9bQ=lkNWxVj{p)!@&Pig1v`9UC|f2}J)%FL+HTRPi4b%?&;k4OCyr~QmN
z2JGfJJylGykn5yP)%Jx?{w<7uq^-P{Zk!=s>eQ;CHUoM;PFG5@Kq8xS+0z?5Y;H!l
zRzgNa%bYXZHt}&z%P9t(YS9YH_@XL!*6<=W*zLrs?ZH4#+~SQ;)Acr5yb}!pk)=t>
zCR&PeVi%Gs#x)L*e@81>o3c!x0|&E<iFx4LBQ6$%;H@hv&o%i(Vp>n1gQ8f-ZrTEH
z9i^>O#n!0V=V8bvQF)aoKM?~Mk9?tRpNpK?HV$LfK%ZA>AWgGursqCz*oHuIt`YNq
zOXgwFITT!3KrL3EDV>6E!arwK_-kcVD9;05UUCI*AM$c6r08{JyyB9c72m2;+)Vtx
zQa!(l_<wEGH#S~6{$DTGU#&g+e?9wu{e}F$%6Wce`+UI;5Jj(b0%!j1u(gkCb=X`v
zXL447b(EwUmUGUQgyo#D*|1CjeYR#5*ucheoYjh$O}H>>cy2FsEJSMA=?|i2$eVc>
z#(b+chlXs`nKU5|Qu#U=@(nP5QOqcWjpn#DU2QF-YZPgOXsHVirQSLL2(IbH*WISH
zRCtXiKJ-0ikHUr-(8^DA`kssXp7E(ui;_G*qq->6fS|_<O!xD5Su*D8C900C-W``p
zAO_-YYvkB&y=T-Xc%3|eO)bLPVxq;=A}TNFn#Y+frIR#o=GWIz+kc^GpGp4BudmAq
zCyO#8{VnyJ{plJ0)uko&&X=wlJZ;{^U2y_qYoE<Gz4o{RiD;HYu}j|PD^=e)z0svE
z_@e*cRvS#TtK)fLbLmU>(T(^H6!ZkX!~D>N2&Zs_Ai+%wl&De6WgR=jC@8*Ny&88^
zAklpYV7ba7HER|QF5?j>QD8e+fr?D5{c!aR0v(!B#QRXN>wHb%>IsXniLf*5(RHK^
zttC+B1qI|bf(=I&x}s<I%B4%WM)i8E1Bd_mDi&D>V*pn`sK1>CQ8?^%Iie2;Q3gpx
zEEo?VBv0`)_E#@1LXmP?=dUn)jH9PE@f8G$&4wep?G~b`+k<bSk3bVF9?QOf_ttIr
zA!0++{zg3nI{yFJ`}V#zj%3mMH=m-N{T)ZbLOhJ^T^>4%F}AtE-~%|>oNTUsgEWF!
zq#0#q1ZExYXTSC6=S+`YK;k&$oK4Wobai!gb#--hJ>oR!_hs4jWJrU<b0=bLnGpcZ
z_-2r#RI~?<Y96*!9IH1b&4W`Uak(>|*0wdQSSWhXjnUCz_C{otvKSwz&FJAwh|Wa>
z)Z_<A&qbNI*a~y=0p+msUc_b@g9%JR7WR9Rc(+P-;|d1n(q(oUo=E#*l~~Z!nJ5V0
zOS%3*OMy?5{uK}b*|Z<#p<EUPRtJi9LyYiiV4+sM=#eJ<{DuM{<IX5eC7>YH#mW}g
z*)T?v7IQH2t319a^qP7gTW77UsI|NE;dKA#aARvZYHcl_we;&(bUTJ~%PW$H+>J2S
zuz$nnIa>Mdz_TU^PPG&zP9(ih)+cmage{8vA+zwDB6jg_nE{DbIbW2M189zmGoZ%}
z0!Dwy&k~d(B%BgML+ZT?bo?OQN*&>iVlWI?#DNsNfukDAT%16j_H$<IQ=C6;0(%yo
z<1*;)96n${$Z?sAl-g+!kTWYf0)%}!a$XdYxVW85Usz2!mX0i-LKp{0L@8LCqP8D%
zpBQr6ux1v?N-nAB_Ik$Ce+DuSrBl$SyUr=rmAzOB%V_QebT`6h-154<NHQzzHY2av
zpbtAvm&vL2-SVD`WCMOv)T4l0xiSl%GsAVmA=(3RDKU$NpdSrr8>RaOxxkoj3@Ng}
zBZuH&No$#6ikOFVO0Q}YseM-6n&<kM!fE(-nk0D}F)Aeu`z>#<$6Voh2W_8<%R><_
zgDf<eLwM&0wWu4UT|4<2)(PjcDg~=6i+Co<VB6fS`X>myR>}nCUeUT2tR|-zwe_bG
zQ#Z7QyV)7qR_7b5z~>Xebh3BRi?Mmds{5)!efn<HjDeJuc`vlKs&<Yu%t$~BW*R%u
z943d!k%uZ+Vs+x7ZL-P6w@{Q|-C|&L`fFfo@=f@(V8vmK#gmV1b_sXuZHtbH^@)Dv
ze6Xtog2+t)<RT5pJAM499r3Ury?0P|NA-l@4FdEypfnDYSAwHrRL>(g`1H{>y}Ayd
zCLsl|fCMmr_x(}Ew5lOdL<<LviKvb!5f%NQOjJfXPQ@R!QV%Vr1i~)8+u(Qi#8b9?
z$j?{8>^yTbr}*@F^X_rCtQ~(e)C<O%dq#i!A#~h^I*0oAJ`I<oBCKPg5?ZNR$p-za
zJKp;81BOMF)IL7ggTMz#7k45!Mxp-J#T=>w+6%lI9GzdZ&WU9(WpErH@KLF^5Dhx{
zh?Mzm!g@C#D_=7-IF$wTF>FAgGC$B!jdV=MfW*-87>(AMem}X+Hu&__?YHC~c?*tW
zhl#Dt)?}U+J^uOz9?rIwySZxBZG9k$>Mxz+i#|ir*e~eb3eJ!z9NXZ8*dZ#wKo>Zx
zjBt*UeC$JoS5qHz*!KJAdjgb)cEENL^nh;epos=`CeL879XA2tt~1PcNlK=qSgc#)
z^$c#;vIi;a4Pb%CNSat_+UiG_A)ZD9k1fg>oU%y)c4#@nKZhym9hgc-8Hysx`4v!r
zAzDE@d}=6-;sg13<Dnk7us@>1!G!VbD6Z13m!!+^z)ZO?Hc$cUQy8GQNUrfje}m6b
zGa#5Lx}l_}bo3c$x~@J~v>C;cI>e#o+-Q@A%v-g9EIr+XTuAt6%G@L=$0i7z(K_9%
zV)gVhyy?sm4T;ycTGi5|$miVxu7J!S0o`-;xixUL>Ku54xXajwNgf)DAI&#rFyuoa
z-ox7Spn<_$0<q<2WKgWOPVxZ|FOV~Ps+Z*orC-?XhS>WUpzv@_llH12z~@h%EUh=6
z<8cw@V;_WW47vwSQ1oo+>H3Qmjh5gBK!`WdOL4sO4yFQ`!U%#miI-qmj^KoXMhclU
zDjS`#v4yoD3tlh4;|pjTlzVH@dISIb#iWnS%+sZ#6|r=F-aRYaQZ5~>xy_eu|BnlB
zwRAJO(tIWWwPpBo?b(WW2UOw5>(4TN`jno!aMs4cd2YkO#};%K&QlMZ=g%aZr>i!c
z=WCh$)P=KNg0t3KQ~3PV!e_PlLgMq{g$K^F0+dx4ykI!+=Zb=}y2g*;>8gCH=brBv
zI(_uJpjAUz*f=E~u4GyloS02J$8iLUy^c8ES?;AEY(fw>H^hy*??0$u7T!|XfQ8aV
zFE2EvjWUdJrjQxyB0@vdo3k>WqMN$fJlVctBG;|ufwv@mG%Ukd)<wb_G;FeADn071
zBW1tzhLe-X>yA<+hups#xY?61W%PpnIV`CBVjvUe$r}xU<++QdYrh?&6niqkaM7v`
z{A~$s%9t_an94MkB9J`^W~Q;Kv>V)59y=!=r<4bRQ~Yv$bB2~ydC=A8o0Ai<M7=G`
zp_H&?PtvVzKSK3}7&A(l>trIv!z81vQMn>>Gk(o!h>CVC0N0K1jx*?U^hE*0wphVr
z%I?$FwvRR(3NbNTyFIRko2W*iOJ+YIbYNr!)6M6gf}KxHywjV^SCYglHaAcT(m9sL
z^muJyEvgc)WW*#J70tH1Uw`t)-)qO-DST+$ltdN&+4S$tu^*mv0#Ky{|7RkTJ0@O_
zx3A3oTl9^eIy^Z~gE&K-D(tE=u1s&PUXW8jTgNcwAj1cP8|`{*{FSAZlg9O!?JsQ;
z1SW?~k8ASfrcM*>>rFf^<*1PE%gVNc>ix$Y!v^AiG0e#zono~GcuTfXoW0WN3i>$g
z)jWi%t7IEa&?b>*Tu!A)Ojq#znvL(iaAvakh;CNnTs@kq2T%`m47cGeXSz+_DA1h-
zredAlK(j_|yi-iQUM!X8jG&aUc9V?gAgz}!*!Tn|lx`H9C$Y{5tkX)Dk*Q$)34QBK
z!r1pb-TIctgg$j*Wj$WyCev{{{=85HYl`J!saM)2)^Hv5)E(ZqV208K+te#-CYgR0
z3sGP8j2llmY(!H>#et}Hjq!`C7&am&A9KSqXc_21m39@(nl_r@Qx*Ge)?1-a%amaz
zW`1dI-TN&jwM3SMz7yNhO?GJ4b~Xgz-g5cNe`->y5%YJ3pk~}wMaW8e+Wxb}Vl^7c
ziNpCz)HA;7?00Vb&3E7K?S9w(e&^uB-icK#S>Ar=EKLUQrf~--hTxCih1BeD;-6M+
zU<AAc-3iIFIKoQXk7CEh?r}kK*{S-ScuBR{V*)8Tx1_O80b5DriLlWa>4nW<O1f;t
z?BujH$uYECX=vta2%CYYh>yQ*kkv!Z@vkHPYm`U*Wpxmxlj$khcvL*t`1r49E6<;=
zy76C6SDwCDdx-yfi2wTY#DAUc!S_3-dvA`9KD<ADyLU8gD44(D^qIiTQ#*?YzA0TR
zi+>WAupD3+FlOx=k3VXq9W8Apk~x(0t*vV!*4Ej~y>~Uw@tQ5UCjJZD`z&SM#2N0P
zk7B4?ST(3!J`nV8*mV6W8bopOZRQ$<HELnE!Nr`VVwcg{$oPH+I$`G}X~(3CMji{6
z@}SG^kk(aA+WKyUzWJ$JMsLDSkUt7vhm(81x~!JG7>X=eLZ8&I3&+i~D94f`3h9RS
zgJ(Bb4F3S6=?)Q22Y9=O39De&9%X!KDKC(N?iC;toQIk6E=3Nwi+>jEdk_BSo1;r{
z839Sj9FG=!{sZfk<r#ZXM;-LTY<}ZuZmzAw&LHeZfq0+9+F9(IR|#H}i61VKZ$vnZ
zGLRhOfB-tkRxhJ-@iK_du|xd4%n<l=yY!Fz96_)bfV_Y4F;onE@XH8u-oTvUIe0_1
z-j9c)d_$KrQ>@e=9&;~78bamU;fv&kH8z>L2T6w&b@>znf8i%bqn!HC5S{C3iN|V_
ztF0CAz*lQcOly~zqH7eDf-b*h9P#_73<?*<5N~bdT6BxjaS2^mhu`LWD?>x^b-&&e
z=eQ04>+=hOs*~*5xAPV;f+i~HrZ0+?AunRZDGG6m2qg|{Ik}|t`?IFGgz_|tnsfvl
zgmkebZ4crv6FQUGXq#b^m$~JFtU##i;kvI=>{ZcRHPAw-#^Q^;8p6u%u`@;gx(EXl
z9+In+i#P4)RqeHqQV3X+ofwA?FZz*T88f+5qb$>$qCGmgRT!Mzp!-L6ggN>o_2Tf_
z!h=xc0F^2jC?F9Hnq%iyG6ZoBGPxI|dg+SZDDLnz4=rlxKI3j5=p__zScBXP(hx|5
z;uxV6wnR=VXbVF4yiaR6yNJ*iT?-wg6L0A~+{SS;^zUW%Wh^$XN)*-<J1}xw8|->t
z7{pbOMnM-Hr58mx9T(kxAoZM#o!Wv$ZsRJ&Zp#_C8q>GC0r|3gYte*a>6GUd+B87q
zOY2x>Xb}|#3~Cyjvf(TWh+lyKB9Yir4#l7t%7+xzLKMNT=`*yC5CNp5K;H{_$&M?+
zk)5=I(hk4Cx&Rvk-~^s@I3!OpTvNO&$mbpvJS|CLZa_UT19C21N3l?ybF_r)Qr&GY
zihmp(tFiJs`D2)dS5Y#`gqiI<>_^4{hVE_$a*vFLy!Z+|!*2aA3N2NZn{HXeA`m5I
zixR2fVs{2Z)i?RtXAM{hVSrfBu~3%!^DP^I7|6H9a1FggcoPiBqY&hHm)ED6?I=@j
zN0HPi`7D5nA;M4rkRUcYX*5)GXGLe8g>Xm;V_nit*;harBlJ;?lYlFLT^NVa23fU@
zgPtS-JhMJ(YSTF_w~jI2GviRn01nRU2v8CcgkokQIbTANqZ_lKsU&b6++;Gy6CoLi
zt){+@kxn=pPBVG2LR17jH>3bV3cLuolJSCc2_Z`0qC^xlYci8_^eEkh;c*XUiTtvt
z9AB`YgQ=`qgN&tQANUsz3I9k&Ds)tnh9*<q2o=3&d??V;HkJJpPuY%Yc)-ECn?cyQ
z2;wLs$@9yIP8_&x0QC_e3`ruPp0Km&B1cx9g*T|V%HG&(5fv-d)Bd~nN5`i-ho>7t
zZB+$12V!P4kTIG%MJ@Wm(5Z(GyBG+EjD!)e41*%}<bfivq8h(N5yLGTt~B74sTKrZ
zSfXDpngSAZkHbzbqTC$BQVs?rhj?Q)=!V1mqTW<Mc5q?DhMegQ5kE1piF)^8-o|J^
zm<qIjDi|CuNZz5!R}gj16eI&4v}8Cl#RDlMNs7RJq(I29MXKsp3U#(486}^XNJdQ!
zj%8d-$v1b=ieIb&Bcj&cg+&Et$S}ObjMcFHJgQqWB0E%(<0ySnbhjGt_JuC3{n0tD
zVB-$Dt{U5igHhba)M%z2N6bNPY%R4mmF9vv<W+l*qbWpZ>5RgH$G9{|Oyf7c3&e`a
zi`0MoP%@yb`P;FB#?RZNf*Ua5`Cu`m>p1I-Aj1>w81;ltA|4%yBYEkFW8*@N^>*n*
z0amF{#n`{%C8N1qY{`JgJ(WY%ii+!6KMr){L2*v(OH5y~-Z6bG2&XsuXKw#O4;mPh
z_8=&tuw}4UNK3sBeq>fU{fQ1z3eXn8(UFU;C)4!ZXDTzbP9|%24O-Zi^>M3l+b&uq
z@2biw9-i7JrLjBjf1Lv-Cl#f8-F+)gfnU}6xp%Du-Fa4@gKNg*<OYBqUgBo0nW(*D
z<s7LJe_fIhZIUf*fM^!DEOsU47Kq2{@MnY?E7{#{X4y+fT`Ss5632K^JUw^?ChE(X
zJoB1^C&mvXbmaE*=*rCTz0$Uo<;gHZH7&|nCzJbyPp*sL9CX6zglS{UJQM`&bX@(;
zN^pHg;_}@2rF>1470-TWJ9LtdB^+q;Hq1f0(29d%x8dm7&qA|`CiCQm_BfgRF-zTd
zPR8#&C!IGBRPYF`ai}(1#led6u>#jR{ppok0O3k(Ef#0#8qijo2i0EuG>7o}BG_bx
ziQm!)3s<t9H%eXEhwyY1x*}C8WpnLbrLJfy%3IN#l6P*2yHheo-<5Dh-9NLSVPKCI
zcm_18m&$m_pWE7L(H1Rlo3tTH-Z9zO2NvRdSPo32B_Hm{Kx0+0$7`&}&h?~ZF3_fz
zQ^aF5pN{+cttoTsFvVK&>!DbzbfsZ|Qu-T1(9gY|*4bq=+*<Xu#@Z-W7{aDkZ7Foo
zvf#AQ*y)58+@U}(^gHIE!4;OW{Ejvyl&mIrqC^-w;$S9Oq8iR8G_YJ5T^ih8X1r0+
zqt5kiLK)VSZH>CBkPK$I2Quf_I3VJT&zh<L2bw~P_sV4Ep*v-D6J%~&;_zCgqC;Rh
z#ST#UDlA4<vZPX~1`9(@=%v7tm5D8Z9t95LL%o2M%BPIh47+f6KTo8YM+Fos8Wym&
z(}?L<P0R9T+~GQ%a7anpP@R=WO46@Fcr*-B;Ky7Ac~j)6%v+Xu!^55Qa!k$|?vr&D
zhemQ$x_II>1C|P65niLEQJ+z@CH;>sy4t-YZ37i<FJ85L8l4rpwO<3*$mIgPYOPdJ
zfM4;Y7^$O3hLWK5EIF^7C9lPHGHl~LVw>&}Ev?1H#Dlkcs=lpCHxGTL7`Zhd59Hz?
ziCZR}Xs4BYelEJJ^C`L6bi4XamOVAAlp=j;oLP&gu)3A~S#382hO>xSI+(t)vixLZ
z)Hz-3T2}#RHjPX$t4uAxH&Ued(D9n_0WCEfCb1b0s8CT%%RgdSG)$9D7<N?-%f324
zTLSl^tvfoH&mjPy3&|RWvR{IRHDQ5xA6h19`*CVC7JMGDM}vfp{tl%DR_5t1;v_d0
zO^P<v^SdWb<sr}~i+pEsI4n|yo>s%br+`U?Jy)O1ou=nTAGTzltujjQKnkTIG}`Aq
z%RE?+nR+`A?M~G9DwVW{Y3jhX^$0(;(@d3cuNLSYmD)f3qP&?3gN0=lc02HC_@0@a
zTC=Z0uG$pi;#0EUNY2PiRB?e-MUheQWYAVe10S{9a~}?PA`7<hw*SbmK(*eD(x|Jt
ztBUNSVVCrD%Z-415@o)z1+G>fn%hsZmV%7WELEPf7??Wp(utZIm-eBic&RcK6Tk~l
zieN`pxe)Y5$<5`aR&ZRC$v346&iQ0amSzV4Vcx+q_4;g&yUOQ3k`$f#zJW`2etAN5
za+T2;r0rbhxs}<pNS`Wl4|*gyDq6avbmey3^{R|+<aYy0*zyW>EO%DMEmYXFQizg5
zS^p|$T=!E)mp8i1510s1E;Fn2qPUw`lcD?*EeT*Resn18b~P3r{WJRl&WvA*!dPCW
z>M1UhVPdXN@YboV?~#@N=9rrkFMC&71yy&Vcm}nW>*jB>eY?M0dX3$YO>IJ|cgUM_
z&{_9e?dXlAu-T<htAq`HCn*JVSARXJ#-UniLr{e|-FD!&wjH$Wa*&=$D9-flr@`*r
zKNV%h74`_s19zPFsx<lZBh;ICvTyAu@W&J)Q^|%oH-FnFtAK=-VRv=+Hsj{*+FRz?
z)%Rldx3j8m+~_^s-M*W4zA|m5Et_U}1J&t1wLa{`Z|I;YPX0-e5*WCr)MN6_w{>Jn
zYiVWJC)b4TsKcrQpqeZt!>`CxGhtQGg_8FU+PU%}L2ObJiYa$h!&jg9;8n8w(}73&
zZu{n!-5zDXe16$(l`dZl*IAJ%_AQ8`JhG1@MUGgv4wm6i!=%iWD(9-DmdYd3%=YXu
zZA)5&8Gy6T`W)rl6T7Ej%e!e;i4e10-}_E#u*QS|7SXk3S6w<)Y_LT#6dBs%N@DI)
z9cxBmtb~yYv+`r(Y}&;8CPO*y-=cOFvfuD{1;x5gV<_qlYY1FP1&^%F|Ni&>FJ+g%
zy}%kTiYj++-%c&L$+K?FMh%m1BqMGrzwzd+1LN9-T)21cg;*71$vdZ;()$2&4No|R
z6J!QR$Kh;5RlLa$k<)RqX>H&H3eK@PJNnI+<)TvNXWx8I=LuHH4dYx~d=2Jtb{Pk!
z<n}VXrN8KZQnK~?_a?L4XWx_xM>su070xi$J;JO9w$jGz8Htas(+_Hfjk4%=<ZP3L
z87v=sdFb?6lvj>kzn^{AvS{rdS^0(kILq&>Ucba0Z=5igd97Fbd{)nV%UV^x&Dpa`
zYPaJg>dw7+y=8Z2cjvMyK)8W|3I{f{SQcffgLp9k7RADmcTf?K&l&zy6=pc2XmL=}
zbH1xlsqJ}lEJh_^UKxyi<yfh1RZJ9o(@SK<0GXg(rv6tb>bwtsR8LN2r;!e_?IO>a
z^)a5cK27C1ImIGs3DeHIFPhiRX6p2F&obXe!MmS%zbuM+R&c#KrWi9^OarB#33r>g
z7n7z;ap>WXU#-nsQe(;;M2fOyG$EmCkG_F1CdVlhBh*Dtf4@ajwK}F;8I>&WY!ki?
zs(jU=?riZ_KG?U|N^W7wv}?AwDV*%gQ5Dya8lP-J`=(|8)VF{pVUPx>4WgN;^6}4{
zo@(|HG7vnhO!&iljqQWy$um)z6RD7XqUj)^_{AXM<|8U1K@T&PWMFC>aGeogF300R
zB<kAVpH@T<>(A{11w!oT6Y7MNT#mu<OW7&gzMzLOOGB3$8!r@OzEOAADegOF=b1hv
zw|q!c5N2xV2s&(y!l)uosh!U5eiNBiq~QBQ<TRZx|K|nXw!BZ0{_@@xV7ZI-O_lA_
zpBl3||L5Auix*Gb{GV&<&(<IEe}0wuKc9<Ng9vkxe19{#Os>RM_lo}b`+1U__rqo<
z8T=WNf4)85+j;f==;)xm_x;}C>F&{o!_)TLwm;!#)yKXhpD3yHl2jRofKb-~96`ys
z3KK@+#Nl;Emr{$7vVt*-XZ@seiD~Yr7_Uygi8B#jQU)jOl0iwps7xHDn<VHOp6Dha
zlQRw}M+d??!_-qr4)X?&@Uj?`6HpPcl6!C-Zr~?B02DsRqIe-F8<H0|)wb@T&d+p`
zbvX9c>YMz~41Z$-A%^K=aSi(BHT|9aG`PX+*Wt&X<@aHy`92NtZSC>30^;@tgq8il
z$GcbzuK$Dky%&?ux`W+h09<dk!B>Y#9`$Z2M{MZHK{p0pZpZ;iE-*@IbP)zadAlmB
z^2N*mrYo3KIp}4BoAO>ma0En`EM(W#@MGA)l_5)g>3m`sBY~g|!m_|&kdbr<VC{63
zMA#v|y2uA28RaN6Hh8ToR8dEy8v?~)*@gwUHV9&17vi8%hGnMdA|XeG273@kTKDYq
zlPn~qE(-?;N4yFzFEJ6&h-@U9!3hCe!x=2%Fm7H)mk}IpB3{Y(YnfaA*V>D$ZPKHa
zIhOJiGBQ@7$IRg0D9<^zK~QFlc1t-j@oE%uCZb>3pbUs!7<YKt>bzs<k&JM(@}0>H
zSLY+penHj2m551$WI7~X4(uJAg`qQfL3~cID3KOjbdqFfU;?DLH~<Yn4xF3IBwX-n
z6jNh_hy23*5-7~r#m>+X-lVZxHu^&Ixq%XTNp@BDt<Vh~OK6}p4DKycxp_IRS!E~d
z!hf;vrmWLpNk%xsT6LZ2Eei{>QkV~G?}Cpf(Z9p*Lrf{V#ox3ZD`|oi+U1|^ntsNm
z6r6<r@?8_}nrn5jC2Ff;YfD@*z?V@D&6dQfOui4p+vnpkUQLI?1f92G`V%-9Qh7gO
zZhVwng=r6SAme6;)iTwCrYsmibFwWiPx7F1`7X#V#jm1vxrZR>*OK5L`*8=I#a%QP
z#af+9rNF=bV^Kwo5w{yq6glmnD{Ml}W?Y~{O>%TU3AzB~G2mijCh38{>QoWIyfuj6
z-p;}cxfas|vfr>2aZUbYhr-!AfvSs&o48EV$!Y_$?|=h2msn&e4mHbxfl-D%I^Xl^
zF`ED~vL!+>0hKbwVuCTrRiH7T%7Um-IS#es@LQm}BIxx}i>G^At^Hn@cP`LOSd+wo
znUa-NxK?93NxLDsnDzrWL)7g`A$c;0I&B_ln}kNK+)xS?hrZT`I`n5|Q<sKd?P<PI
z5cV(%ez&o436k$5I>#i8UCre0k}gI+%dCYoZWS%eEW{i%f}`5=^>!{l+16DxYMNY+
zh2iPCcw)Y&Ip0|9C0Z@@`{W|(!I>Y|BLU&xn{MA~KsE!9C#E2-tZi<(b)Y0bxfBYR
zFYy>rLadl(M66SHx0M3?LTRpHC+gDPj8TfP>0<6bnm{al+2@c(4U5wOO<-mc((IH)
z#?GdOAj26*F>U|2zjyGeW>rO*hf=bF5pbBQZl=mN5xzf!-(o&o>E%baDn+cOCv6K!
z%lg`;>Ec)=exqcdBpWE6b0|d(Iq`(4a^%N7OCCVvOsJsG3Yk}Azejet7tLjHc<qo6
z<+EP);)xN~s-@MSrvH8{{$mMFl=#;G=&{<Vep>li_l#sS2wII?;j1SJRukSBG&9%^
z!&*G*_lJ2pSsrl>unH&!u`(h*PXikVA-PQ^j%|k@Sv6>1hruPkY-nON7Q}S_c)Jd<
zR<@zY8Su*Grd4q4eUM((u3dR*mRN`W%&;bJXl1C@&Sz?C!zF5jtvPg*gzX<lI=b`r
zARVWr)E`DV&MW_>UBKgI;_a3-(YsiJ(5!2<l;uJDnsPp&1qh8Qpcxhq*!=Z7R4>m$
z<@3xrmS|x5%HB#7?r5HR@yp{rhcx&5it*=E@n&AML5E6!S1npR0tDF&-Is2Zl>ccr
z81eYA@bd|kMf1*zH4@n;Zh~GQ1!-2f?b`fkky<YTbP4w<1f2hc9@}bwyFL4s-4k^f
zbIxD*+|Wj^cvDBSaMUz1kgKT#YK=xQqPs+h$t5Ju<Cu=#hb8~lT2#(zE1=@^{yrH(
z$5QZ`x*$|)CWa;LcB!{9=yUA;zgccVX*agD5;3(1wWgZ@lu67p&mBC%dcUOJ)Kvq1
z;c8NwHVrV+e8WVUqn>C8iL=ubJ1=)AH+4T82zeC6`B>_uY(wc>x?7QD7EzAon#vRr
zvosuK7qqvj^!Nxw2CMft>5+zqx!(evNXq36p$de6CX!R?>^|9E04(hd4xG4?(#_#4
ztd7(xZePEl>ZSyly=t)gM<~7@TQ(gjWN4srxd44Nv3PAcAg0iu3!-bT6`-)K&##6H
z@qrejR8W0V^|QO@+14>hxn3IKb!pM6rqA^|eK_eO->k0vRh-?xfX))US*INIr0P<+
z01AW1E`pRaHoR+Lqn=|k(acKblq<5sG1IZ0wR5ol2DGiC_RFIWhp%>yf2={Lb$fS|
zR=dTP#ehsGynzsdk@ZO5aI}^ES#+*8J$kl$7GssoP@16m8D6n#4ztZ3ighog*;xN}
zKln@s2ed<4#bSY|Cv3~>2RF$mm(P|2+WweV(1^&wUkZSp-WJ)0qg0HhYD?10GC|Vq
z0hvdww!DQHDN$%m3tTlG*!CPtC6r!3Tya~|(L#G$tUlkI@FA%QAX`>}+37T*o0I8u
zOml056FRbVgy}w2NExh(7Tqc3m1(Lql(8M-h5{ItdmYo#%+<Ddx;E)UMTN;<mWiNy
zVj2_<kCWgP##U@-7Pe~Fk}52|A8)X4lYO6UuqXvfr)dH|ilbjfVH*YimXQ=K+Nklw
zJQzvg2|Ff8h?=%{KC%p+YE_%<#%C6lvS8DE(z$AqZ5TVN7tSAqxD?<TbypWukEO~Y
zw+2gjxAz^{c2it$(r;{J@SRW3^2tPDZMm~i*gi5V`fb{Fq!LA4E3<P@-XZ##D=f}c
z4hj(}S(bs@QA}_jFHTooP85S`!!fW{oGR<KgqhqtcnrFM=!X4}heAl1g5zVqW@jfH
zOO>xu$yuq&ec(>~e-4xQrSuvwPYbR<6a0VHo~{=Cf1W>Id-mY}^WgvUC-DC{JUV=N
zaJ2i~{^1*CGNk8kC;0+ZH}pCN>3K70^!MVcC{1F@xG!(Wy~~-(`x+Ng0=cna-tX($
zgU+pb$<^QL51OOSa4L`9>eJ&<{6C`*3)aJnhFD!$TW<+^LfdvVb~Q>uGa5)kSk|yR
z1C9I&6V#h0HErkHz?bbB{HtzGmJ&$2W<j)G|AGirz6I7wqe1n1?JnSdEl->6+KTls
z8%8mOV>nMSu%j*04V$N7M|&c)^?j`T-AqHe*|)nU%F0U$S;tS00hfsNZBu`#-s62g
z6=LGNU1<<^lL2(sza&Qlnf01oIAFn?<RN+V>pOLr(LFSs(H!riTNb*>D;FMEcV|Jy
zW_z~?BkXh6oOfSC<6cpk+b%ie>Lqe@fiK5rjS2Nrlqj+;?}D~(Pys(o{#DDx6Gf4_
z_%D~uRBFIZX}GciodTGK!!VE7F{j3NKU{$2Jj`=Wn}VrvN8MyeQV|paP>M{UNMm&{
z_|=<6ucL?%)SEZqKU>>2p77+!6QkE@4K2VpjFR4VCfvqGe*)S0X7^PsXoxd1Z>%=h
zTPYy=o)J4J-cfp#RFTiv;BV=Hy1#rdgdPK<)gfAE<|pEV-~5+(Xj_j0!o-HGmtYf3
zW9>6ICf1^Q*}PF8+zJtH4Bh0yx1eGc5#n8tD4TP`Rr1CHj8dt&ONH)O)YX2%eW)R#
zpV9e7{E(!VVTw`rI5P3MUQ|E-Ge%gdo=Jdc??P?Dyrxo%7Av#CSd~<~hOQgz70A9{
zxa36m(bAp)47zn$b^RnH+&tAq7P+MYEXySo^XxSg1nF}IYZI#oBBi-!Wxz(3(<PWe
z|F{AHF`~k?y-<1{b=n1ANd^1=CS+7%q{5FFKojM%8WO4F-H?MSXz}4O2ZT{JQd~{b
zi;}eM%&?wEcAt{7%&NP7`BU`s=G;Ie2TB;`NO;n@2r;;ck7vryX$#-g>MhZW)s)k=
zi-$U!!Qh!Z*MStP!N-!B-iIl(&Y~D%n>S&uAF1;WdCKNXts&k{!9qkXbuNdPQ<Xec
z?mhT<Lm<B!yET9EZN{Ti@`IMwqWA^+SEI-N<c(L<)#g#e@Rz05dw}^G_W9fzQm&VW
zdvAe9LV<?&C17IC#3=Esp+6a8j)fXH{D`ow+i0sT&%m36`!Rf9dyFH%mbjlNslxN^
z8_r9V2)@xQIM)tmAsPd5Ura_a+pD~A90ZqSVg=?7qCQzQ<>J#bV>b5_(11I*7_wnV
z?PNt)T)WD%635-SS%R&9iF%;hizes=Ak(`tmzRbod6TS*d<{ysL;8}6cXUv|b)`<d
zI6;J<Ucs0ufF4$&3eIp|VHah&V_t(gpN2FPS(2B3DL7PIJ4F9?+`Ksz>0l_gT<ubd
zxS(gZz6)_gL(d=;Wliw|dPxsMrKV&!4;h?4f;39#c*Uk~c3>W+$$1)3wQwRp!-j=l
zUxq{U;ipXbw2KDujU!5jFz+$CiT3f@pEN8<?519vkMd?uX}gv{WVKORtc`09iG@9O
z^d`BG*ZCd<`B1uYD>8?gzD*^JaLYR!4Om2Eu+gwFqlbC*<*Jj8@@)TWcLEF@g#$>f
zAoq;>48q*yj64i?zv~6ePYN>2cAv;{?yh=gTtqZLel4|T7j<%6j%@M@9r@wXs=H%b
z#{TgqC-}j53H}f5YBB^+<80G}0&%U;;86~SP&@o^Z~)Q_<q)1LghsozgzA(OFJq`h
zU?h74Q~{dBI=YGDPTV7Z@pKUMT_K~*z1TIkKW3fRB!km;4a@+Bq@GQMm3+LYKvAG>
zPl`uCk9AmbN=t9L<xg0Ix0IAFf1uRZmQg#)AC-erT9!q+VAR$C)mgCk1$u(C%%RZ#
znv}au#}J9@e^a`#yh9=D>;RyBaVkX8C~O7OroUqNX><d#SZJ-7hdA%#YXC@J1la|f
zfVq#2ji}qI)ob!ml(p4!%*>Daj0IhzMO`?n&a?Lxb%ye+C~LZNC0lG(a)HrG74Fx`
zkhKrTw~2(Xbwa_9=`d`2hWS3ExjG1`aCoep*45O@Od~3zC7x~L&P~&8eR}ljs3y&9
z^$oGQva%wg+(e03his7VslbD!NRvJ+<a`u|j@O2~-OUoN4o>y0@kX2r;DKI{qn(LN
za~Teg6@2P4CxZdkqRw1^@_4WE@LAsZytltiuEVR4v)hAGf(O@J?#}oK1Az(vOF+~y
zB8RD>F&D~W7y5U>F&0|633%L0HLh_Up_mQ2Hlb)X^%_P00`se>TpajZ;!e2#^hh*B
zAiyAn<KtMSvQNteW0K0{X@C5=ID!&C%ICOVUMDGviJFS(d7RhFSQXtKioKY>KYI@M
zh3<XoiQj916j|gzohRO6NYX!9rFWHJ4ynrWrLYH+kb^23$9#oNYg|%y1`)_gi9F>I
zgKZZS{(>U`ik`w5q3}gSx1)Nq`6-1ngt2%yrwHBV3N-p?fT_9RWBkODWCpO|Hu}Wn
zG9%=qG-a4SlgOhJiX-s~6DO6PlKYQD><Q)r>DcY%i`F*bVuO-zmbR^dxK<t688L|C
zhHWt|Z?2j2|5ern@}JOWq#6Ut$UstOhu<ED)iFGh1Lz+05gzKVWdS(!92UutOVLpv
zB1edl4B}R-MdJyB;lZNkGXo``0TNnHXecEa1R4Fv8`NHe)tgy^D6uGVZ)DmdV6&cy
zJNv;n5_hAnvlvjaV`k<|#&;@%F4ZBCkyyx5jsZv8gz)U4!k!qfx?^+d(y21+v(rtX
zFQW)*PA;2{Z9QFn2IO0^OoNDn<x20MA;jWz+^twy8_moyd^9*?gMsoH(_$rKGkK5+
z&Tp+;WTdx|K78+!VViOcU5wZ;+)b`IUnY4OyW&dq<JjiRF*3-%%@jP;2e})iRz7)R
z=*6Z$*k9C3KH#`VvMK3EDgxzF=FD>#L@Xo0O{5*uxoMg>wgVa~f5M^1<X>As1pENl
z55Eb!1|Z)%vH?W9D<VPl5QDRX3u9hmt{#sW$N*#YA(IA684Z|eTL8*YR!ArYn5bEA
z%Go4ePzx`Y1m+Hbq(~ohJ>?osoq-A_<F0Mb1RwVox!v_hr9eljO_B_G1C;dTY;uHX
zD5<1L)UZJrW2>9G$T4tmE}fz+NGV7Qu98M{KoGbQnI#XX0FpymKOsA^s?>KD;!Ss$
z*ZvJ1%_NS6THAF{?^RjIZBV=qfF7$S-2iTilLXPD@}E4525qVv<v^m2LJE-03||*8
zq-mFl0m;V^ca>)8LU3tmv$UJ0-f)m(43TF48?y9S0>?U4`^@7GbW1n<tR=V=_35no
zB9;XVjd94ww|_=;ij?TmW<#{8)jgUMfAU+ZPi&z2ykMO5*c0L8^!VuTO?&_FVE=H>
zo;7#EWS34b)WOm2&Vgm58iuI~UGd*IJT85q&1};kr{{uLK$)Euc^Q=A=VaP%=yuU^
zGlK>7fs4d(#db~_#USW|Mdg8}+AoUdPoJ-^K7GEn_Vlmo&tLrQ>B`Ep4+jV0iCFpA
z4W5Qi|JFJC+xpqxo}G2i;JNPSja_wE-U7p}K7{?VHqv3`?|0uU{o~~A((wsM-U|?m
z^(wonwj|YR3b+3!^q;MkSX--8Y)ox!4I@#kH_un=$^=VUbEQSH+8DTXlK1(M?N;J%
za2p@CYPXmF^YG7apZ`mF>{Ms}nw<aZ>5KJZ{;##w=MVmW5B`6D0{_3`4~PHrVeiA<
z6xqM*PpguDsrAT#AU(_~xWqN5aAx_Y&`$LQIzK2OXxaEdslAz-NGT)3Fq*U#!ANW2
z^TZjSD><PT?oj>E+@N2>ZdPv;k`e*1GZ~#?V9CVS3Y1J+8X6@&iRIpeca3%;sr<rC
zB(=jeD-z>yp*n}LN;@<6Nm$T9J4`vzj+2*6bD*of=dwu9bz+X6RK<c7f(=2VX5W-J
zl&CD2OY0h53W)$uMs>QhHhHMrp6KM$>5PV0+b9jswHFvc8v#qDeez)s(lnsFU;X4f
z>d*vB4;#!TP(#vQrvc19MqtUqI2uEt6-|xmLuCR5w2o*DOg9R0rm<kem%4Ljgc>fX
z%}jG68Uh`ipedZ3ZsN`b@KN+{K8rZ1s}wa6MonL9lfFM142=&Q$Q{sZ^l=dBjF%5$
zG<4si9=%|y4b^p(U1Tpzua2Ly;-&+=U`12TTiK?`5m#hiAl@RNNWtueEOk^KJ|jqQ
zybV)@B@*Q-=yPr|S@QQ(adJP11`Bo>BrX8E)FQCDh5QY(1Uek8oVtU0Aa-|7oTbIw
zgc*l3N)ct6iW<UNY8fL6|E7-W+Bl>I9>{mJLt<nh8LTdJT>@mY@MEVx;-W3Y?6hjA
zqN&s#c_y1CFCE(-I+U6Vof;!q_?QYGVTf6f*+p~Fp%6_X^u5X=h@djGFyf3A<_%@{
zsNY{kxkmTS60o2m9TE`C&lP<tqe#&{smd_(7imc0p<;kp6FbWkk2}=s0O?5k9M-*S
zNZ*l<+y!$j@uKwH=!VL-F_DCH9kXinR=w%-$^JpQxXI?1Csh5HowK!UcigPiQ@YnD
zIZa-|B;W8alc`G$aF|?&=~B{LLMeu0>fw?Z1R3RiET?ZEBsf(U{(%8qGAw_y{55~m
z!o*?rjSpEvZX~5|U>ofwM6Ca=g0QtMYR}g({%dumE`3n?bPpQFRAh+vK15#z8je4L
z6I4EfFQ^35AOg{quE|yr4yC=^<5fOB&WT+tEz*B-$8f^=)0jvmYngIQlky@%k1Q7T
zXCP4NR?gWNWvnHrEWfBtom?E%O~ZA=GRmokphN_)fi97)1dWp+-{vDQ+T@Lg9Q`zj
zWE9zWx8IiBG+o@+9xUKRfydy-lCTWrv9N=f@vQw<G~J*FqKw^|4nBorp@owtNc{S%
zwrD)kkV+BcK!7AZuYp*(d<inIF)k|HwSHYolMa)Bd4osvM66QU24+*>&-&!JYrybY
zIXsXL-?e$+aNjY?<i?o1Lyi@ME`Hp^c+&lVHzC}-Xh)IPSQoNzgOTU4kJ*fpgnBV=
z8`E=_c7V7awvw|zVWoG^TtxqSXD%~ul4ve-Ah%TT$2jR^@rNRH%|op<N(iP_=YQn0
zDt4dD+xCL7CEZI>u(6PwmU3E!ym$FVS9Y#=n;dsaf47SUVmc^n^L{!v(-M$-;@RI+
zAWCY~$|GXFqQ^zHV%jlR;oo1Dz$8Ykl9%>zBlF2iyyZu~SWrktDQ0kEcUUD0`R-V<
zAkLXK@q1*!v@9yei$zWHq-_O?)2#yi_JO}smX;I!Ab`F6R^<4(v<9zaWtMsy^m|^O
zy}%a}nf6$I9czyT{WSyaa$;t*9t8a?@o~83CJQB==qs6f0SMb##01<RuE#Bbm3!m<
zm0L5u$!5T}jVHca#sN8?B2<f=`5XlD(Y!?t$kZ81EQ_@QoS5tB6|*R$xp~gcr)Bqf
z>|}{LspGBR3YHY<E-jgxzw!#kChh{h%@XEO$9(1~WLT<L+A}WiX)`}fP5@;=DyCHw
zn&5+|J1*TB8#dxZySDH-AJ6nj8uXP&x(<sL6Pc^s0;LNF8b3$-Fqu&%DWYDzcmd3d
zYpG=-2Mbcn<X@dnkO>hhKcUYHW|kjWS@~>P$HcpZu#0VBN9~nlQ}!iR6>%9^JFrQy
zh&EguCWDG6{gN&fdR55-I{OotT;|%BW7x++-JLYb(WZ*-(CnvYc@%5+Ht7>`o*);T
zV@REXCmrYDF*)9(3HoWvyjQ~nt#c^8Yh)B8vpk=*c!cWcWykmy5-;us!yJ>Z2Q-i<
zL$j&K;$WCvBxJ596@EDf;oU4o`A&BlbCW;LC`l_DQ=h9d9tD~880|;0>1w^SfYHlD
z%Icb9f>^^`L^WG&S+iZ%UPm7b$@48aPWfi(uP57L4a+*6Sk4o8T|zOUKyfDp78;3R
zVJ@S~6-K|M=#56gVt__#bI$H3$)y2CHpZ08HE<D!?uc@vnR^hYpR6hNn!tzH4eZzk
zka6fD+@+@%8EX>&lMNI0cB6pC3(5;98=&zlx4iRC64?v`<kW5`uWd#{91$<}Cm0jn
z15%pS)@E4RwWv|Ot|rW(s&Z)1k8Iy=KJ#G3gA9u_axDm1TAK1A($bk-K#*Il)K8Rj
zP?%Uoz0G3CmYdE>n{Q=iENwX59RKMWGOZ;wO%sal!Y#a<{B=w2*)2Qa5-YVblbn;2
zP6~eh+w80IAf*~O`SDQc+GJ85B-eP+Bqxpp{aM61Ca-0_UqGp#=7VBMDBBW+X;H`|
zUK*2V;?rOn4l|Vyf$uR)-FX79K=~5PDPL^5pTqp877xQ$6u4+EAZ5smtc*0G$bcG9
z8gG7~yHS)~Fg$fVWo}K#G@)H~L2h#v3btuuUQUZJgf;s@QveCy#5Ck(mj#4dQ}CvR
z0^V!XH$@F(7T+~5$cWWyDc@dZs^dOE9Axe-wC@l@@e;9F8HL8s$3&@Nw-jiG>?w6i
zBE!!VY^JT`%<<~yG)b_$-`*+(mVJl*;Qe!Z|DO}8kzJYkYYP9L)s?5~#r$8-o<Do~
z;Q#YA`v0tpmtmZMQgf1rv{i4N(Qi0D(C;St0sZ}afEw$c&KKxpAHLn2Ee)8rO@(t1
zmB>{!?E~AX!Yp74IxoAT=p+tUfhr+_q$72uByY7<#W6VNTv4?JAj|9h*GDpe3f0mk
zCwGQLDnX`Q56Uh?<bz`eaZA7%C3~V|Egec=Vq9@3KqO17!iVGufJK+d2?y0N^6Jkr
z96hCOS1BW!Q|3r3io8O4w267qWULFTbOE0wxll4IIhpvzN)_~t9XhWv)%bC^lO$*r
z$<O?0%Tzk(JIHo)t7Jzqv`+@}VT^_3F$|4DO&HHM>>7hXSmdYy%t+BY$&JyBFulJ{
zwM7*FkDt<NG<KdFU13v(c#t(9%&owyNhZ$PVAR$}QkggF5e6x9%y2n2fF8AFy4aF!
zNCv!^3{N()mmwV6RNV@zTm#KzFv9dOR&6`G(`)HD<Rp%gFK~f1d&RV~Cb4ZfU#bZ`
zSSS)wt!lu9=gu;O5QaHmsJO{ssPZk9c|;4bUJNrxF-JOIjLL|QtI=WscB;|>4$c=0
zO4q`a?PQLq*o38X%}mXQ%^6ZP*;!3yO4u|tds#IkJdCAT&SzDu%BAE<E8K3LIgyk4
z@h};-3$Of4C0WY3oXP8Mvg9#2c)z%GG^9ZMKG|ht?cI%O6MRg~OSj4dGA4nCtO6kK
zop~KNS+^f40p~_ljT4x*cu_L?jB4;`cfEmCrL3uVvXCdiFtT1CkL`L23PB0LO^R^c
z)EU05CZC<$AlJ>@sY=4XeUjB%i6!b>qE${(f=23qH;YLVtH(Y&7Dh%2z-sNeIZY;0
zT5nErdBRpx^9*-Pt_R?-<|w9nKda%G3=o1M<~@54LUQ4Jo-g&1H)-wcS(=<$8k5RS
zFym`(AOj_Fl`a!;o8(XaoKIe*XYO=Q#r#d~Kex%|<X9=E%I_qj(=~<_1p^kX^h~Pw
zhAF90DDy>FP#gv2EpF+RvuTZyJ$C^>VNq61Kd+umX?>R|?a`iDTy)B4ZE1T_S}lk%
z(V6zm*OW1u&OO~@_qiJ;=;fb^2*KjFx3TVd55o^ezI(L)l_u0<032`sTUlFOUn%DQ
zUwN_eVE=ot|NY7Ae^PGk9e%%me02D359@SHvj2J8`0RgDN$?3_+6Lr%Qa3Ekq<la%
zdIy{5<UXO@ae)~qHjrIGOdhtzg1nP<Kl@h_*#+5~y5g7tP3RZ{nlP`b4$*bC47BZ2
zGODMn-u%b3N1X>hY$6&p6{}%ugpv+AS8dwfv~?1-y*lQrO04u<GA5iVj6z|#NlF;a
zL-Dvj79@W5$%gcqA<=2i)|h$g$3GuaPAF8HdFZ#gTJv(^!w&mr*#8fbPSCGR0We|z
zUtM2cEyn*pUt4|H|G&=o|A+m5(*92;u&H-`^)-=+3v2@6$SaFb#v7)TGeP<8{$%D~
zbaGT@&kXTYy&k=L-+puWVHb15@gw}z-Z^c*J32Y7tvAGrdI3}CASu0_+VZRj`$*pf
zzI(89_+~~3SvW-lX>y>k$?a=~ZA_8%LH<u6|DD^;(mpWh{I^od|ML9l+WLe1e>ngB
zIpsh7y?=PRcl>&1cW>&Apwz6=3xpCBAVl<-U=Im$Qy?z{v!Y*xSC|SM2T3_LfX1VY
zd^I|-YLbEN4WDsJ65xUSqA49ECVE1^0W=@s#xv!;C<Vc|0Zce)CWy((;;b%nn^r#+
z*MT0efy$=h8l{y_=n5Avvq51QG1yqx$5?x~4u~(DbmT5}y2qte6Y5&Tx$3%6A+Emt
zkB8zlI`!Vg!LGz#52vSx{h$B*F$(+LxzB&)`mgKHpFa41KAivlr2GGm`+Enkrr!3|
z*HfJTq|@XLuGYIb<5B0Z#gLm*n*i79<4$P0L8-cnX*}aeO%dQhnEzJtf0Rdk{`oG<
zgK?=pr;z{8o~}Q8R*?Ts;Z*n_|G!50|5WT=q!{M}lJg{lPGJY=s3(7qu7k^Nvm1W>
zV*lOV>5gB-%Qw&{?(|16<}FUA&@xZH*&9$D;~k8(YF>OpvR^*qFwKu2S68*$YS51`
zR+2K^g&eNVS??*FljfkCgjxJ87A&MSPtltI^ZO_pQLgJzH%f5&p6M@x?iFS{%&`7b
z7WR7>uFTz1YCM?D9N4FiWhu@t;2TQd$IvsC!cEm(!lDa7%0;AMvI#{tLbltTVSkk2
z|1g)r-~RSub@f2}`m4Qjy*v4S7gHXr{%vK&BHK4aO>`~d)R!3%%5x+(k92ZxJSZ<N
zihrsHaK?gPv_@D879}8kU>1`0<fnRez<~q!N|t*ZP#`ZJki$%R3!|ejvR*a}I~1A6
z<zjF;$t96cZ5H|nKMsLxaPA%^F)WTWWTY4$j)cJ6OKL`uwy=I-epD}_2Zwcx@*v2|
zrbN7serPtEBF|gu>KGrl`w@l28s9Q$ZN+mc#S#imET8nCbuGv8k>nXvge`zQCiSz0
z-{kPNHB*lmmAYx%^El;M8-G{fa@r85h(sJ~B;wHkXRdO*q`=5R1Z6DZxIV5g5s&Y1
z3qFER;<#$;6MJVGl-iw2VsK{6Z4on;%ZsXeLF2XV^0Ni82>&lIcO<k5<4zQ28~8-K
z7|PrHnf~Ft6$*?d4raY@2oa$edDa455yO&7|CGO7X{<KZ8c!SRt%<<fSd2Bz!fr(`
zW#H=ocnvyOnF4rrjTvXV?O_n5vI9g-b>hPuK~IbM5IqCBs$~IrA8`492z<GMFKw<u
z4mG)FB)djYOZkZuQS#36MtMS!vzJyI3|jsNn3bMI5%W8+WW~CXtu!*;v@c%#u;Ufo
zt?EfjiQ=d7g9$?8HU!h$Hznl^w-!i9s5+FqClWds+&?V?{MXs6RJE7g82Pg?VZkaI
z0}^L?q&17ilm{XZ|1Hh{R;~2r4BiCz#(q(shQ$lRo^Pg#V_7MT<i6(Yd$X^N;Ubn@
z)A78Nb80@mr~g+QV>K0RM5g9$)7OgAO(m7&2Zl<pk&-YR1aUM(eH_&94NBXBrCCuu
zcZ4tBlfcaIA7`3q++wprCkrxHcRI@o4Y!O66=aBFLj_3a=V$a>6X%l?&R{hKuF2d%
zR;y2l*Qr8C6i_PCP9au&F1s5?_6o%O#2RYa3=~z%BM4d(Qi<6Bww$jGn!z|41!l@{
zo+tItus%`@B%gko4BussPS*0l2d-lR+VC|D^`-ud^$YI4g3rT`c^W`3863LdaO1E#
z#U@+uKWd!tZgI2v!@wO(kU0&v<foEj6<b+@vfj2Vu-q6$N^z|?BX!Z5ic899nPui(
zXRRsuMh)N7-AV-&rtPjZO**}b?xr7|Uv$U`=bf)&ic1o}Gts4Q{L}51L;guGq1hTg
z<u2@&UrZD}nj}~qviRp>40Yz~wlx(C+-|hvnZeO{WWN{f>@pg*FTy^?9!bEP6PPM0
z>x^DF^2i-&)n{YOchenKqO$Hb34;|Ig8*efn!hpHSy@4wnP;m<cYC%S&p*|ppPPjW
zn}#$fP41jPRqs7z8&x$J68YL*wc-7Owwrtm-{h5JsP1>g?$;-Q&D-*vO~0-A#^ax_
zyV`1eW1XSL;vg3suoP5Mt;)ihT@XH-Bi?fu1{o_#BBM45{-WkTW?4ZTGefS)c#%ru
z?aZUl%$%3;x0ouQ4s#zg6Kqng_34$ylclG+8+Q5xgS!D9+}M!F1+{v;UN!&f;H!+r
z{=QIg#n2(#;fkM8cM?o6jXDg3Nu%&8R9TIj8pibflp6ARn2)7ZrHWWQ@#dB4GM~Ql
z3XOR3nZw_4x&M{Q{=Mg@88k*QvG4JEq(gxUC2^A7W&$o=a*Rdklh1^&jQ4@IIRyP{
zOjnHAh7c)CsCgcw-M*|&)*TH7H=E)-N%)Qp%V-TkOhGWjsCK$ZqI_7Dmf=%|VQ-C^
zJEN`=PmV|)ryzKM*^~i@+LrBwh+__a8S%xYDk9zxy?$_hj^&xdbj)xHCvnwh*2d!7
z?eiY+f&8Hk-2S_#^L}y`^xN2J#(A&QYz&9}n|4B3am&kG#p|OKX!pY&NE5t5$B=!W
z7&pkaq?ITcHWsYAfuPH&0{|78SNkRMqpVP7ZGQ?sDWa#nvhk`G3X5d8#T_+D2uV>{
zp@958W8T6fLk$F5h9z59t5epPs~UU9JdOwMgbcm`FMO&}4kV^N1;N_Oi-fb-b`tff
zR0s0~Ca>@*5V!8sqhZ`Rqlv)KTO+J7^{XuEyD|JXg+B8e4uzd}#*c+$*Q|p9Ihlna
zI-&%zU0yPZ$?3==HK!Od<^y-eMSDX#I7ep;_ss?IVneM&teDc~uT^4xl}N>qMkN^U
zwxgJuZ{9zKV|w2rU<C+jV3bC!MXSHGylKQri{Nx?pYL@jK~X>l+05hesErMtAZY`f
z5r~9<pKTSsLhYW&Bh~vmCCq}(xAXp>{r-LX<o(`m`|Zx|ckNeuuXk$(EEj*c1)VCo
z`EatwHD~LFfS)EX31<|Va^a%y5g|p&CaXz;)ui8QO{0D$>%<uaZJNcd8e5;J_71!;
z(*i2w=I=DhD(uH&J6?{s9SGjIiXBWZXv9>?bP@jVAfWD@z+Qn?ptjsvOXT)RylFb}
z_cB%nR=>9A><f^Tzu|;UBUz`N$tgweG^@txQcG*)gPMljkxXM-62prA$M{W{7FWY5
z&`TYZ2~4;-9zgN)E&04cPDq0!4k;bQHOs0f+ih&@cd;aVe&f~a$Dl8Es&HwDo$=`G
z5HdR=u<!|H?|(8eEsPrQ8rSGp#3=c(#7#C3c~%!oEu)`n!r^y8qtw%>JT2kt9RIHC
z^qoc0PY=Ol0<`$<EQ*8l#wx)=?58ZQqXAk<EGvW8EWcyfY+kmcSa`Mf{r>J=`}K#z
z-6=`+Qk=O|3d9q9WSv6k$)-#WYM1IE;K-qh6FK}A1}lfc<-Kw<oZosVi>09~PCt}Y
z-eTogWb|_zR5>2cr^zFk8}G-D32_T=e6oqTZ4Gr5MoB6Oe4iNo`fCv*e4;Q?D3Jxy
z=HB~634ID(l<-M`NO6+49jxvXqemr-9+fdd6LgqbM7mFmwn__NOD=#iMutrHicyJ1
zTV)zi$v~S(_lZ%fM59)jMhYWMq<h7vM5C5PBYP(qjoC<$^_`8Rnv&Du>Z185s4IvP
z%}2qe%D`<c!0?<<E9_}^g~d_{izOcx?)3dTOq0qvoCgb<pq0{;t>`JkIkU7)4iCgi
zeK=ps%d#FBbpUWB4AyrsIEjiQW@3MLSFAp5a;^>dV-@QsVx7V;1w}fenwuBF73C$l
zz<VqXGRu>5E(fSe(xG-#BS0#4jWWM;a5Pmb!<~DX|70a+nT~+*&2@0o5ND&Dx<}v$
z0p9sNO;6z%Z11l`Mol%TJ{c{+)u3vNwc2U6SYN5vNtFWB&Iz%a*6&Csl+>C4jrB3u
z(id)lD(hxbpa~z2#nK1J!jQOyA`5svvWkm43U{R2U-InsjDWtt`vW<b1hmFa;aS5I
z7!&h7goX5{Puv{uXRVkjOz{$9nwM4$-s9*$fA$l&=ox?L!IZkd>)ykbaIr1CaZoFc
zjK(w}c<NAJi7>ToflAPd2Sd#76W}sQLo8~jqceFj;c#XW1_fuy6_yq41o$Q9oauj?
ziCr00ijWpT6!rTzi+b(#qe~1KRSWoEm;yEpb!Ivl5IXAhu(gbyAcu)VO0k2ofR=BU
zDk&#oMR!kejX^*9H)prQ;)|H?2K6M_7Ar$JG5|z`LRy8w5@Hd^Q1}i<njjF$l7uyA
zaw^q2ta~P1pNlHlpb#%1BwqHVoh5_l_%O%mpuj)mh$z+kF(*W~Z|jiPK)jjn<w+1>
zYKRSC7E8=4gUU)(pCs?HCoA*(9P?2@_##Rxd>Eg8Do$x0I7O2+8;|95l*nVowZh|?
zaahxg<8B7A(t7OhQ*k8f#ck(h9GW)2+rzCUcm@BDn{_V-R*2#$TdS>3%TurLon|wc
zjXJnF`$<#K>g?g@9UDL9f!Gz2PFuB=x=WAQn@_!Y&Ng1snPwiaqexX^y*h5tQ-i1&
zc<vi1KRG!)K018U-akCpKim^}d89-!&J(ef<6zmAM|oY^m}(%+Yk@3f1qZ$r5;1JD
zV%v`_t0swV)$voUHpP=C5gJ0N&EmEQeu{o(*HcxS3SKBC|93RRO_N#N&(}x8hVY*y
zOWWocjgO~T&*{;Sm)=ed_KTP3a?pTnP_0$KN^XxkygkZ2W$PvVJey8GoTkf9pM-ku
z?pnod?2PniQK?E!O{IBhGaY5Tq7NEv9SvJG4knvX4J|SWDt1!w$*KM~<_D-?o$?lP
zN6ds$#2Ou$KjrQ+AKy3vr?^#UB#j3pwT&BN=FIXe<5#orS>1O!`#tfg;QCukaMiTV
zJgI(V!3AJwhxh@Umn`>ZTo6^O>+X9}OOJfF_?(4@s2N4|U5IB_G7~<<>A0ORo*;*3
z6T9<^T3gC`M(Le;`c8?~yPI4~#Lg>M=ai-FDp<Af#u;YrP=HP?tMFub$HH>vzULN<
zGYG@+v*MS3#Wp3mbT}i^Rxg0N?8Vp_F~@@lq_sH~c{B(!QNyKyR%f$Vk`d*>izr9Y
zC=1R*ne>Q>L>;U?Z-JuBFt^}GZG8<D>vB|&bwcN_{a)E3*Gp=A{2AkrQ>-Q#Q^l2U
z3CZZP;wk;pr%LXHCO}q2w<|3q#iV$x+-5ZgJsK}!Xn9d6GEnpu4&Z9yWX9HH3Z6{#
z$_e0bjcGLkl|lon$nP#?`At7Gds-pk#y&JWr8pVew#-io5{_JPj@jJmMmxRFtXZ{C
zX`kJ+JIu$M3%BTb#=f=;XbtgDyB)nZP9>6))aQJ~!z~tB7SqRQ*w0F=GgI$q%5yQ$
zZ;gcn6O}cnGkOnCep>n2X94@22+019zd!x-<M-AjibX#po2KJ@^t%Ist=t{Q)iCpa
z9bP+SM-`CT7ckO0VvYM8=oo3HTnk$Z8ejkDogoyq>zS64?AY)wZ_A%C{w;(JxSWX7
z`%>2}z`ED)_7L-5VrEO?y*^gsMzuE_$Us~%NV?%dIeV&=zVn-fV%ljp>$J?t`jG#*
znE%;HuR34;=hc;!XQldY>#Gm>{~z){f5rKqe>gh+Zs+*u!{MtFe;R1_15|f0+40Wl
z{>%M?{nH=C?%TcH?@sVYQp>0=Pf7$(*Vq~7d&bPdd6I~v-=*mA3@ZcRl{^x>Bf>Pr
zGOS^S=NsITKv%}J%ZyI9)~0$d2b5=d|6uR9j-HvNr(#R2K3!eG)cpjHuGF;`0FVf+
z5i0d_iiu?5R7cFDG7w{i1ejtKw~gL5lz$rLLslgkBy`JxCJn3%pThUi5K9_>0Lplh
z$i)Ypb*?YM7zuF}#Zi1tiKlVcs<Q@87(E##c#NTT*D!DFjL-|BzH|<XO-5+GLjmMu
z0Xst>DwWVC$~-bpVQY@h%a8r!WDr2cb@Vee54N}A349_SyxTuKI;QWQ!?78L^#(`?
zjhfH#*dv|)c_bLr5XEr99i~YKrk5+$hcIkfOUTL@sF$!I#~8_V9U=z_Ie)OI@GOiy
zM7iV<I1V>)_3=^w4$2Y`53(IqGfIQ=L7>{gd;{kpe!&?{d%&I@lzo^%O7K}2W=-)S
z1{so%FhgP=69eFs<1){2Ft{o)Ex1Il3+&Uum@RhA3ecr9$QVALIUHeAn)M4zd@Xa0
z<6B%M;v&HEPMwPYIS$apU^eQ{Nr<jkF=C;R0pmrTh_ZJu>NJ{Be0gIe;vo4~B1zI*
z_=_aSo84W&I5>|JYsG!D^g}PmgZ>iz%)U`82d0_Ug<d<g^CwU0bF3iXiKbR*P<Bu_
zUU`_Bc)+g_h)|@5Oc+io7060hdnCY-3}>N536wp`_r?J_fI~NaZJc@{46{jb+WI$z
zS-TFug(<#_lj|k|JmT6|R0<2GD(*JKm6L>?Swea75ZWcI?Y#5fVghTElOEUspQ}vs
z!%;d+sNj~Ad^J1c{MNQgInOJ#m~?)Na?aNaBJ(x^VP>|W8J65byQSQgIfuR>b08^W
z2%)Bnw!<6P)o>nx-IfW{h8SYQ5a>mvV{lTI;54z_(O_t<G1RCjYU<U>M}dV!`(3ky
zvZkr&GDG%#KDywZ1%sjmV4E^&H0upwvm5+I9s$_+W==@<`Db6HSv_r)9SBYzOJ7}W
zj;+~u>ZdLJ-VO%(X`imZ=jrb8gzg^SX?OhSab<V<y{9{zj1yd5zzxVJ<Z~Msu_tvX
zSW*LAYTk5k@__vhw2q1$IL;xUYh*)QyUKA79RqPHvk?|Wz;Z<@(R>-`$*OydKRGnL
zY`spBE&Sl*P-Q{f*w{eV<N~zT6lr=Gnw{OyhDK|Sw%jU*Leei-=n)&^fKmAfKjtJF
zV_bA7up2DH5h^|*B@-JAf*i=9aL#0jK~_=ROg@>a>tR(yl+6%oke-iF9A|1*3t&I~
z2%wOKNR5Q!eAErCo!a%O#2Bz%m|_h8(5nEVq;$+wCNhxv#Dl6gec~L)MqdV<PB@fm
zLDKIh*8p_Pd{ZtJcZQwGrKSN@v@}^LMD0<%Gzc!bK{({7D%w;AWGI3YsLb1fcnK5d
zol}@`Dukkj5uo{8>R$xX8X&{WZsI)n*g&YR`+A2Q_L1fFd78E;RJNKPt=zxGl=c$0
z0GB6-wl+Y^I>i(W_$F_PM!Tj}V>0YX05N7ZuIrKu=v+U!#ja`6kZpV=$L7b%8s>BZ
zm=bBd6V7e)@>r)zM@3|?OJJxCL5-KNcxjB{XH)Q>OV?2sq{rGO=yo6g)camfe}FbK
z(V=S;zR`LpmG)5o&9VQvHK^vV|Atki){6DtUaUWVu>U>O|Ngq`f9}2eaIkZ_*WNik
z-uba)1uVW@kj1;zwKi+}>TncX0mbN~Mj<n3?;(V#BwJG#hr=^32F_p%9T{MHbWjYt
zO_-PK00Zj=*;S`-%J$WI1G+gU!AA)0U6O{$75ZuHet5N0sw#&RJ~=(mDL2wgg#qE&
zQ{jepN@37#jysQ=N~NXKIGWImPK0>ulCESu=9W9(AML+t13paRy6<?Zx!v_omk~TF
zjR{YPQ`)@b$*rHczT9x`<apL3{pn}pe~s0~t)m#HxBH16OQ-8;J7E9tvEA?|)t${Y
z9E!QDm62bW7jXs9kLJo8DB=~N2g7)2bzU4LL_dlOt~dTh^5Ve{J3pRCu;wP&KHy8|
zeGNR8AWwBSKX1-L2fo`yfS+e>rN@1xa)zjHcTV?r+dC&Gd&j4>czYYp;6EOHXusQe
z1%j;o{`lz4@y@$<`vC9VK79P$UY%O&xfI*tiZ8n7M+y2gKh`E7xHboZtDE+-ReH8|
zw^-Xho=zCT)4Lpkezr!>p5EiI8VlpBFXqGb3FBYC@A2zrPwCnEebU4p_p}6hHi1CT
zeh&iaXY2Ir*%wDNyVdgv6ny@rP*6X6M$evq*;KWEyqG}Z7hf`o^|R;n?8SU!Uno**
zR_aehXZy!rClLLwe>9@&XD{g4UwI<vq`*UbyK~b1e&=|9=M@a}bPvv6aE3aWd8$b9
z!gznamq>N(nZBZ^6I$6<p_SX)BCa!{%A5xZ+yG8wZ9HbehiBn=6l3&MzZu3|{DVg%
z{rjYSkh7>PixYD-!iNkzYGXndQU|@q6X#GOtLog{(ZoA^@*~6}**CkCID(E2a6;6G
zGBV%P9S=vYN-*O}8grb{>i0b$IOh@Q%OF+Z0QI{fWu3mh4C1&axBFood77K*@Hh6%
zw=&~dSDo=T7$JGun@e0yAsDS*#*4@mSJ*9}Ki3uBSI}`*(vEU}Et4xukVYB0704w|
zcCe(IY*-#9kG30P3;rMegT+oUo~N~4^pu_^wE($DtDd&E3z-BkkwE_<Q+uraIK9Yt
z;2cPA^<zcTAF42V-XeOU2GD+re#X>!DgCA1X;$L=uk`G*WO7w7fit%+R5(#ga1)q<
z78MKFI1$+6mnjEq=YfUK_k@IAy#|%?g!mDv*fUk}15TL%=xk18o^qQl)4iZg*U7r6
z%!Vql?;-K{Z-dU`F9M(99DR{UO;4(Y&tl9iuJ5HCx}eSZ+NnT$_;om%fpT4i-=-m5
zN8~{Yd`_1A&Zrw~2u{ugqm$9S8H1O<O}qoMLCfcSRbk$CNhY)PQ77A0kDOR#vqJjZ
z!h?&X-x6Lh0fY`ZIb!}V<J$NFpj`)PHzQxp2(vEY{lrCxsXZGCz*PUR>ohrU_&RnS
z*_h<*cQWb)#>=j#{Trt(HVpcG-3&YF1*vS^#*ztH+tU==u%iqjV9hU;^-9qB64?VU
zdB~lpdZ2VmrMxiDLPK|-E6KtB%iVwcLwb`~zhSppTUlLOdAhcyZ>CO8U$x)to#KTF
zxivVw22G1JAZ?4D(g^td`}X&H$0z$ohxMs}N{UH{mY2uahD15d-xyxKuha4vH;fs#
zr|JC$ZFy4m<g1-NR{kr+K}_M`Hw{*gCt>y2i<Ls_ez2<WLbNsnj~il5HLH8ETKLO|
zu2hTf7Jh?Gqlo>Nu$|3|)BJmx6E|mElZyA(556{ErT>o=MLDPc&)V90DgF=seDMEy
z@c;R9`2WxnYRed7zY~!2$-+#56pP}YT8;})8UIOB&5OBWA=7t;EjW@oM4ulZcNXT_
z<$n}+*v}ekNDR~DD(Z$^3`PTB42r^~gp9zk0u#IBJB^CJat<O-CF%MP6L8h(G_j${
z@J>F|WN!lHz}?WSO_8cLn{uq){F3HxV6$xlSMR5~(ht}0khKp_#oI9Go>Iwp>nS?C
z<G<7k2engi0d2~Uu@Uf1zx2j8mR0eDT5N346Q@!~JnHwkTJEQ+0j~lG{h2-bwW=3S
zofqcr&wR07)%01_i{}$zzNl*YSNny|)_`$_ZU0Q^rtUXer)zKqcnbf6gXifp_}_E*
z-wXKPUsbVEoD<GHNX|6n>aV95ccMR%)m`1R44RD!y|)}3Ep|3AEY;AlAfnM(YuX1k
ztAZBY=Jr-w{t&rUHv*J3<y0HD8aIDV9}3QAp-{y)o`Du{R9#K(Hkz(mD<hC<h_$)t
zQ#D<Na?6V<1ClNu>NjtkTKSmp`PvnUN_(B>{-vxm=D09fUgtpU=zD9L+@wY_0aDoA
ztgN^_MPoF<07brUO~w6cu-FQklxcW7MAqg+q#^DZv8Q*0*u5jVeou(rJL1oNBgF50
zAkXh>APbXik-ziMU;JT)K0CxaC)QuTVq#6fY161Fheb7|T63^91XK<zjhqw$v=pL6
zu;OB#ujDcApw~~Xzp1kix0)EtuI1m*I~Q<ZReSok|6KWYYW1dwa8aAFj}*V}5Grt9
znFi+)YvYFwFBGt$Hwqj6n0{3wRi!6RtC`+8YBOuJGb^)vkW(kgsd1*REcMsIzC>Af
z>8V0eWkayWi5irUvb%=)V5qWZ4kstLTSYMHFzjGya4aFVWwTxhNlekBFj*}s-kvn1
zmB~jJgp-(5X6Td;o4u&tuU(O2ik({8&BN=x`6zS8lw}+|ylZYoygc>TacPj3S{2-9
zi2u9bZ#e9(Mp41$vtsM#^-@I*`pm^t^Q*HGs;YSusy*GJxQE5H+N))%+8y!qMLX`c
z9Erww1u$M-3>KFiLau!66;LLoXqm^^s+f;vE{CgIdst0}MWMX&RF#n36xZa3=avmE
z)@c6hFTQ-|oh|ClD#|0@d5>KG*dCu-{`k&2<nqSO7)|ZhyY7q&7u%zA>K5O5e~(K2
znX1Hh-q}_~U-vh>t?km>Srvuvyt`I$faPMrciv@rgtFXkMOWj%*WD7n4jWq4dv6DM
zDHum1U6J=zeW;?lpR0_~GZ%}H$Wls=9W24H6L*hIG0F<8>@9ys)><1EcXWt&Pma4~
zmuK#Ui$QI>g(P(W4NMr{Fm+yjTK%~$TNiiqHJ=x@sDRH;ZNQNR{MmN)SNBD;)I4-&
zVjSs9wJ%m%VutBhwWmWO>`?ADc$YZHMy8_3NHx1i?53fCF0pto6{PIR_}^$a_bPD1
z1YYvfv)_7cG{jkXh2X<`Tqg+r-d2mVkyK8WuVU6}Yu1%hn!;6k3O(ydQv0sPH^mAo
z)HIuw5$#nsiGF(7!6;So{_%;-D*Xn>cnmtuE)OnxU#S1zyWryqaOL-O6daoXY`p*9
z%8Rw<D@Fgm)wPHG{}29uU%CI^>-~T1y*k<d-@Whmc2AFvOU{4g_ddTr^6+Epj<Y1$
z*)NLE(_}ORqNO1|;iqS5gnlkKV7mz$iJJO5{NTUbFQk*7>kDYvgj}CM7u0zY^l{E$
zt|S<77Gx^Tk!;VQmlQWcdHgB*h(Oq1sRXtd6iPK-YGL!diMglWy#FBJ162cH81*rF
z$v}DiZfAMt`+qdh$&h9f=a7?_hY>~Ja4RkygV}5;y*l>U@5A|!ohUKPCFQ!^`kT5L
zE$33(kl|hO+a%4X9s?r2^M0TDfnG3Yo8+w!v!L~ggOzTO-k^a6&&0dY{<H{Vb>jj@
z+-vu1%;}qj=@k~k3ptpUIAAI>AsSde4xuMl&<<3kOvZJoI&P?L+O@bYouq0jb#pu2
z4f3E}bBbjzwrVDc0`Sv$9Cq7-px;kAwHRpxK$2eVv=0ABg>0fB9LHaKOr24%?dhhB
zs-1~cTuGCnzk!+KrfLj=1f>YZN6R}aEVkJ1&qY~3L9O&2*v(k_#$Nz^YlNjm<!Oii
zA9wr{FrX)e<BGzf`JdC8a}p_!x|fDwjlQqzsC~M1=UF!F><ev=(FCW+UvZ+6_&LOq
zXqH*hLbV7do@2Q5F}p1qPKN_F>E+P5Vop(mjRF`}4u$iA*Joi_BH3`1U9>5!g633z
z+7QNTt4KN?2fcjq1k=3ao3rxH*rbPJl09hB7P2`=%Fc{OB?iAu-_@BmnDpuztk4<1
zToS84&rkSCX*KQ(0p`GD*mlN_RpO3L_cmCM#I&TW;e{aV6PDR5u4R1`Tp9S^>)=MU
zw>IQ&GaZZ!%8%6<0&W83F~H#>zy-!ijDmq6zt;+k>^<x@CzFSMFD=bX4mXcTAw8uO
zA~OLby94=aRKTPB9y0<xU8IS(Go274Y|h1e8dw3nc|e|Gi5p7TbZa=2?Wq7oJeg|}
z^gtDEcQ8^7lkxOHbg{K^wn#2ha(vTeJo+tXrEMopsLF0{EJEcI(1XS1ceDTOjrw7_
z6QB3PY<APg82iuK>gw}?{b%LH^9TFSS84xwCQgICcpG$sE*xZoe*2>P_lqR|bHsao
zIM_ShIec@lchY`$^a_5J<2`+!IJIlp6q}^sH>Ia<!ekKU>CHFg257a#Mr^u$6=j%3
z<Rl*rqwWtuie(Zrs)FmcFR{@G5M^7EB?#K@k}mwRA7Qy&_~ja!BK6o9kN`XdYlR~!
zM8|E@Bqzg*F{SL;uhD!$b)oo}GQ=Gx&c)MLo`(A{M=7Rn$yJUQg9Oaq4W^s#0Bbdb
zq7{9C;hJH(!RX_A`pYEK87miC+zhrc)sh~ZgrWFnihN*^2^Y-(rUbgj(pfI{%L@yi
z?ODY6vl{c?CpQ_aMx3_^AM)Q^=6Eb)9EV))&BGbR$}?V0eb`UG;{E^i>(lwS#_Ik5
z>C@Gx>+b%)`eF@UKkWa1;Qe1zC;%&e6YqlbQoMwnGEBQ@n;+os08W<rNgU>1^}WyQ
z)AC&pJ+qRwQK8Q8W6OHP)(88^AQ;*|jdIXh@NL}KiQKSXV^Pr8K-dU&$kQ~4Gu(u%
zy?cfY5WrS!eBO=U={i?ktJ-{2co~nMz36<DQbilIt^j1dcu^TY3%fnuw5STm?l;IS
zXF(Qr@pkAk%CcTBr?R#YU1FiVyCKm2O9}S~7?(GOzHdaR>;6jUPM4^T!2dd<G=)#F
zxZ`zF>|SEOq1!D9z~EZcB+Fc1UpMgq@(kus?E!hv>*5Wt<kAu<Cxv?=*35wQ4)7DC
zT|=Bj=jS2oGj!%|%AVeRcc428<K}gA84bg3#0-kRmfr>WMf<xjjl+KXfS0t!BFpi%
zVK4|&OhS#=U{blWa2!$!Cqt)yW+7jc0iV1Iv*<h){2yoXbO}hbi{UrjuoH3qylbTM
zAn4LvS|=o>L}Z<SZuZoiT?GB!5>|T5)6sw=5|%l~K3yh3+kc$PYsg;EiTY8_8RA$S
z1vWiTgTX+;7E}ax5S`<tB&J?e3xaYIf-KAX!P#=s$%89N^dCr(j4w&9D3&}6FM_Km
zNwsLjMS}&ZNBu5fafNrUTp&3PF?Y8d6d}dgRWLRXD_LTxrX#HLK1wr8l7TmjSP(Q-
z&|rGFVs9e8Nrw4SGRnVUY*C02Sd9%1kg^M6X>f#%Gn>+oxsvD&7io<wwqLD_|2kpf
zvzC0Maux#bM6kYVFti^U`@njQ@Agkl;8$IUZbGtk7^Wm=<m^%Tr<{sW(c;c{>0=7P
zT%4nL1$=OEbw$v_DyOp0EM|Xv9K~pb4UGuN1i@qNAG$?EY9H%$u`Ry)xGB>jlKnU4
zi#akD2ZH75(%H=t8-WKfW6`jG(-gwUmgJgiqee1;4QzBt3Y<TXzp-*BpsCAT<63aI
zgJE&r8=3&Fvzp;WE}m$RwT<kcslW}_5dLj)ZSf8$<2@Muh9ENVN>pnC_i-jPUg9K7
z48hb8NRE$9p~0@z#j#u|?jrI|E*0v+T%PzTPY2c`597{_(5tUT)w+<%;YXMq9ytsY
z#wN>}hT`z%ml3AvIE&&sJq+}MZ8$Y!p&O#PBp|TIzeeY<VwbX;0Umc@&8l=nRI(OZ
zkP#lW1fVjt9jN^tIiAYz60kuCC$Jm(x;VvpS#cq4U>BM)Sxc7VG4x;vm?^poW6E&)
z<Q^`rG8)^&sU$f#>h}#LqfA<0VIt)k)(e1p)!eVfY-E}_V{4_&mTqUjb7xl(%Qc>&
z5bFu&VWDDfX2j`LrUnJw$>P3p9lF>w3S453DCp!PpzRGZQ&*KN1~D}p$(Yy3qSx|*
zApOZvpMJmstveqMPEkR_-*>caF8oDkYrqz6%1@gYE&Cyzuc5=`FlyubHZE8^ALBi4
z$#x?6-p8NrQ3VD|e*<$Tt>Fr_ePCDgWRqo>zQ97E!@Z16DOm&q`#U>`_<F@;?w>x>
zwbN%eg{!1L2zHzNr$?`j1l1D6BFQN8E(GfnLh2KDpiNZGK{UARt5J=ghFcTJ?#dxh
z7OK(nhf>F+4>yFXVUE)*8}LUOU8$8W!QesYFW#}+{qWdl8v24;=6o6w*gYCHF?sww
z)<fb9U$Tw%N!kHkX;@EQ_wxN-M^6g&)VuCtkmtvn&G}(~d{QtbB+8!#vd$cp22q}E
zxma%Lh6a<WGyw9;jg9wxP;`#?ID1UO{!}H}xHV>a?01csH|S#Age>@Gb-{2M!+&}6
zJ*r?^Gg9>xp6>T4hX(zda<@4!lrM^e^jN?`(%{5n{J6nwMGSxA5opkcZ))(rdSlRz
zdYu|RZ@^>xpvGT)qB~X}CE$pYLq{$4NC-i*JWXo)@ng}=RKXfTmb*@q>#ThFN{BT1
z9XnF-*PWza`8B`8uVMH(OkhYSP?riYZu*)oXbmlBw41sc9`NRZb>Zuf*7&%nY8me9
zdS$|$DcMCc@mO}5fz>P7MZ<Vtm%C;cO<F&@nAi1EQWKkDKq(JWZ}!OcK77Aj+{U(4
z8{`3YmWAz;xWV|yTB-jC3LV4RDksZ0zOmnGRCMc&4end;e<pvvD-R6vzckfCgmAJD
zulvEdcJ+J3qugF;;DyY2f^JA@LaxL3&xaUKf?;=iRZ;3K=;0?)1>as%PqDxloQ3F&
zcY??NqhTHmq}IQ^F2ABTH``CwUOa!nUWF1bI#tRfEouzF5BT@CegGWTVB~x7uWVbQ
z4#&`q0nJbi|AlJ?-`D6PS*D0xHPNta!rk5m83L;@#1c2x7z|*6uC4VRl^g19mVwbS
zI|*|K*e4;I|GF*PSGC;jhe7IZ`xx5(NA_OT9E4*{O)?q<ST)9xNQPM?fn!S;YWztv
zKGUv=fUq@wZTt2@_brw=(t%8s8`1={IfAK#$*Ag9wy@){*N6A&BkG4hD&Nu>hh`q$
z@z^9L%ZnC5!og^*HVsCUOlYM+|5<gBGAONSoZwUV%yzWJYugHiyS*R7@wQb>GCQw^
z#H%G9+t}J)NN|ESrn+!&rD;AK4Nh4Pl%H;F>}P7c=%R-?Cj7i$cmO(YN(X3xJDW^|
z(rT^NpTNjbe&_=}**u=uW*rHJ4oDt|2A*b7B*bI*1j8sF!+ZR1>9fx+jSY!uoUix@
zWra}6LCY6oW&n1$H2U~5$il5c8+RV8@9Zw+WY~<oy;NYMlI(Xo()$JUW^F&7c6P_Q
z74+`3@&p*Sr6b+Z;lYnJ8%TZ1HZH>BTAl2?+fz-^g|E=G;q*=(wp2_nSDC_>U@Ho%
z;n%F#g(z#I8FfpMt)b|uax~T5EQl0cjjo05SNpGD?;Y<Qp0*D^yxTj*B6vT(-#cly
z@sq<n_~P`a{o!P<efoB<y}NUAieag5k6yKpUbo-voF4E0<K+&%J>r)xkiCDrKiJ>h
zKRx(y44rToHFr8+(;oO3UljR6zIsK)gLXQdFw0&>xjVYkxBDjwP529^t^FD%0nj<#
zJHgsJC+#2hPv64ek70V?(|1S5Kel&vcj0aO<iqY;Kn+Isz737uAMETO@=)P*eZi73
z$l+V7Mq1!+jC9Zoc5}fmku;-<xuue{l+@}*K4JGtGF6G5QZu=q*?1!UQ5=It1k1dO
z`pYBC!2av6*2pl514mo8b}beQTWV~L)yAtESgR47KJvlP-GvRLEqO4oW)Qw^W?}xn
zVVcye{norZ>nEMd+Dc=^5{`{kGVDw3od5ZZ<1auieWU*(8K|W%e2V|&QL4`i0NoZ+
zUKd)Z-Ej(;T@uZ%5uj4;Hc%1OdVYgdcvSZ_j<=TEsubgN2#JOtp&IwIh&Eh@PE&1Z
zUh<ej?|kHe_IF2~X)&6YKE+*|kiU(pDYZI+GhWj)9PKF4`lKa%Dm1+!b&*F}TWu~A
zG^s|@rib6K3kz&_A`9`BZQ`KyDdFoJb9o^g(e2QD${h{W;zb8mwQg6|0q;t?E9z;b
zb`FM>N+Dw`lfVRcHvuP4v&*W_OM7g6TK&*L<F{83-R|#3L)5$TA5B9nu*4rrMHHW4
z_JUEK3@GmoxxV03c0=^ri(;}jxn>H<(PQ*MVU0+to@<MiX0^m8)F8`7u%yDPkoIrB
z=2h(+1<|X)x>}N7^;BDxv8r`0!p<e#n$tU3@kn0U_M%+go8yHyMn&03Ny{#QZqp~*
zQ;|at<G+PehL_hs6&6dj+G@Vq5E5_q{j{pEjh~|h<4q_V1Wcxej9Pcets@rKA-Pnb
zM+JFgMC8O7h#~sPbxHOo<XAe7VqJ}x4oQot*KJv%8@R7-F^#2Bizg1Sk`}I1^yykm
z%haEobqdrR23-}ETDGkiY%px0MnjXw0Q<>i@pFC-D@1G)QZ@S-Z_*(~C3Hf1y+pno
z{_abM3`1zdpb@JoSJpDa7npSS33a)=wzNuJra^oj3h~p*#xSb8Ef5Gsu$%9NX;E7m
zMjI>j3Yy@*U7FC75>1-eM~!+?pIcAd&v7>Gsoq*)8JAt8QGAL1G)qgOLpdLdGsk&H
zU>my%)8IT*zDwvhN+vXt6}_a7dJC+JC@<7+pqWx*q9rO9JF*}Cve)yH;CH}S^+mOS
zYRzYb;qV;NtbZH^aaRr?8}_BBLUimg5Ne3>MbN*Jx?2T`gdqP|6xAqjD_U19y)^&>
zt$_4`@9){pyrdcy&%DApbQb`+E##TE(88z6+NNOM%f~3Ij-^+AU2O}tti`CIj!{s@
zSZy4OvFvgN;|%ON#myJCnDRzyTa;Z(ftRrEb%Sl?mySmp?5w%e1x+x{DD4oc7n%Pi
z0Q$JAn*ajky9t=-v}B)`;Gk5CR~7+Kga2c07Sz3h#Vo}OHw-P;H@f61h7I){Y!xq_
zwqJq%;b%ogjlTfGd~B%Wi(;{liZWPHd{x=-F<}W3?s9u&^KvWxxcTJCr5W+!$0tuP
zcY3xdU8gLX)oQ~$MQ6x%UjJB!fvrBXG#04_Fg^_-bpTArxUumr7}h>E03iw5quott
zzQc)NJWz<)s#+^e#6f`v$D!=wj`GC|Hp7JIQ(Jw6M+MXDEjZbffQiCvmgqvpc;Xs&
z*+G=1?0}A~`eBd1tvh;We1V(Lwq;D+wC9ySy4>c^HZP?e6cf>0T2qDS|8$9|Td?~Y
z{lxE<E`KgqbTk)Brd8>A<Ynqyo+D%3E0j#&Yo7^TMjIRB95EJnKFIxnE-P7}kc))!
z`>4<3DK*;w#`(R>FaT{3@(n<{_$@Drt7rg{J<KT_3NhU742J>>xn6Iti9CaU1<?k|
ztcU2zuPXj?Kc~!mSK&$0N00A2%m1*x`l68kVQuBbL;i=aG5^ELUzaEb^i2@<#hXzW
z$KhEp>WZ!N<~cn2J9<esd+Ar524eqo?|A2Q|NFi6$<e|0d&iZ@AjW)H3i>>OiOVqT
zvmEtVT(>F&R6iFPzpyhm*pZt-G;or3UIl5Cj50WzDgbKoF`--%tDg+kij?92x&7P<
zm{d`r97+ZURv7>WCy|)KsigWM&eNca-ty+Vw}3-<0Y_8l<ITI{y^})Z<qQ$5MG}?6
zS&<$8{;4>XFvn|TJuPHlD8f?@l6W_X^EAQ0qH^AbQmb7<3HbA0Z-4~UO1j%m>NDn0
zojg4F;C9w*F2n!x(Eo4HNzS6Gw-${jVoP%{#i(BC50{rGe0p+nbo@RIfeX43Qg14%
zz1u&O@Nv#Qms3}aSyvDHfA#+VT?!M>xeU|yTx;pB_W#xArTpJ(EAaWl{{M&H|G(Nj
z{JZ0$!@b?__KvYU$Ey!`Mdshb$9z~JayhqC#mi~)I90c1MS)=OpC16y^tqpkLO>}K
z#plIL61F}8;vDIYVeV*f4%3MC?7jh_%DGjhd+=@RektYK$`?NS(+@iPN)eEGa0-l=
zsK*n?Jy&j}F--G2%KrnrLp=-A^LIh|%P2^@`Q6BWPyWBPr|T~s<p0-r{`(trmkdBm
z?*_vZM0&p;iLE64w><k@;3aUHq452x&wvMe-|rp1+&g~rZs+)aKI|O7QcJwBeN=yk
zc$HHFRU`u{K4qo9{QQ58?Pp~Xa5x%ZK7pYBD(dxGqUsomU_5(AS%9eBL%&(luck>w
zR4JG$(h*7NU(A7Lw#07=)AWAl_+;-@`{j@AS4aQ&5uOw`iE`J=2k(AK@#I@Rsdzu`
zBygOb^4lSQS#bdLH7PxsvL|5aEDwj&TWNc$X37V9>Q{XJd-opm#GLTcyUYK#`uyqh
zXNCPAe?9E~zvuoRh<_z`4;S`ebH{{5paZ~};Ff9urC5WV_VqC~LO9r#+%$Wrh{IM+
zFC`EsN7_ZwPtFohx-S#V6udkPuEPFuy(-c0$`br*O}xji#9Px=Y*E9%7r?drK*V=4
z8x?Hx4v<Uzd|F6llQ%qQp6dsHkK!~6zINj`2$8*=gOd;M-ya=U9-1pYnyB0ihLkqw
zqUAfx3gJ;n9GRxH5oAH+OZnJIXk2;jbRf-=4pE9(&IgRnEbvOEz7xHE!s(fYv=}+V
z6z6ceiUOLwtw3C)VQ>4JiA!ZL?4{qd)Z*J(4j4<d)~+w2&V`8bEaZ$^XQQZ}i_s9!
z@b!VvXsQ_`f?F<rcaE+JXY#8z@807_)*1eq&%jz9V6T1V)~*=EU77Fg&HDq>NO2%=
zOV{FK4T9X~0M2EcT*tUV{y%$P{-4H?Jo<C^D>}}5GRPw&knCi2y+wx|eOH$JB-u$e
zcK8f1h_QhoGlPzG^#1l&M|V%p1p~rIyz*`k%=A%RU0qdOU0v03%tTu*hpaM%8wMs?
zWy&GHJI6QTbYbK{R~jlPei?^xWHN|I!1Z9+IQAf|Q76bE`TF?vZi#Y}8P*8I>{jY2
zmES6nZ-k9WAL#TRo#LI)pcoi&{pmB&axtAx#~b6Idv?IlxSp=l3v|@r0dH<X?WT&E
zfUT6FHu-2k%Lh)oZ&T<+bbcY$EuDJt*gs(I!2VYe00BD*VF@GRhV-A|UX}lD<i{)D
z|H|&XlPh8RWB$jfRH#zPXhJl^PV{SY^9%s@2Ibn?ng##S;B-yVg0lLTS(aU)DMCX3
z$`Os5ofc=26UVP(E!;p0jcR{fu2kv3tK6b4$V0E~jYF}$Egtz!pHIphbi&ybkML=^
zv3w&|M5|IA0%tE*#Hwhiuke~$02V@CG19kd4c#%4d}X!tof%a`2S18D`jDhHHyM%a
z?6oOgaBR5zLLZ~x6Uy`GC^!OHjUm%^#G0iao|Eqsdh|xmwBuk@@XYyh?E22p&<&&D
za%L@9DxG~;O7QIG@uh=b)vHE_-(<XcIqoN4#llo5ax%w9ArH6mU!QnGIZ`gfNqAyu
zgDGfY4`jT9eV6+lkj5*$40dAeZEjvB*IC8L1*6K^H&=GQtQ(QhTQWQDd)L(O{W@Nm
z(6Mr}+?Se8kO#^um*-&~NQ^i%RF=+un<F;{OU!LI0{~7Dut)$XCqW8^TmX)}%lcw5
z(1#Y&ILHNH-wuK64i^YOn_Eo4AgLk)0%sJseQz-k4D5Cf8g9xqi!BHZ06X;Jvn6p;
z5rzIu0pNxggp5MT+G+FW#+$?D!EYyxmq+`}^NRz0wd?rk<<Z4?2An9nJMJa$TG$TU
z4^bo=hHQ;%_T2&UZyfUO=BDalD%#@U+zV|*z+ogR(^bwacE*FjZBuI#<%I7wV2EBe
zLviicALu(eua~Fc4*k)W#Xr_i#)5F)ieFsLo^{=W;fc*MB{Pa(Tm;KO#e|Aa%0*nx
z(g<V8<oPw1El1fX=V_bg)O=>-f{#CLrQoo|6H#uh*oe4n$Te(cPI@<ZId1pP2NXQ_
zq6aKw7AUeG!;Ivh$!-N@fN*=q_}xN?p-K*TjS+gwep0C1;PB6}-G4s*&wp;qw$-w*
z=tOW%{bMHxJ|Vhn#8C^c?iB$sO>D%i<bHzgiJM(o#@lSFqpJd%UEEYd@A!a<mzA!O
z?lQNx**coq4*{P&row}PvJ-khPMhw4ZrV?8AMf}^eW*@wW1@mI!xs5)@0Iw#L3eSR
z#vwr2q`>^4JoNb5B|aND4o6=fdYtYWcHK6H91p^Amu>_o^OM~VJd7od93A(T9aly-
z$Kcom_G3)iG!9{y6t-RlRUg>HTYZ%i<A(ErK6KH#l_14$`tz=Y1N2=wtx+>fZ%Ql?
z|3#N+<eyzbP?&-Ja@GI9jLDbS=BVu*Y*CN`<+YvY3iWX>c!dX*A?7DGH!!e;nefWV
zeRV>Hozw1_bLnCj>mdGm=*NE#62B214D5}m15(f!FVqFKWZ(gfjc8=~Ma~KoaS}!J
zek?!GNyoV{;yU0po0phD)wh^Y?kh)7fH3(8AlfY%p^S-scs0c7C-#H8G&-um|6<*w
zJNP(>rudJ(K`j$+o}OGpNjCM>9~A<#T#+0gf@29U5l4(2+guhD@~4scbk7o{Z1pS(
z5~}VJb(~Syvy6D4@fJbQ^hZ70n%ExS=5Fk))U4Q(J>!Yj?FLTR!1$ErgS{PnOV_wj
z*)1c8J3DJAoe(Jbm4MyZiCyevo~UuG>{vqB?RKraMl;+*^^ep(#@g>|@5ow-*IGs)
zpuAE^j}Whk7O!sr0N^W(;C)Ml9$0P!|5m!-o>_1&Di~d#0=i(dxjUHNd>iq@$-Z?N
z^NMR!{ymm|zl})5^6yR5#qVgF(=L_~TplbaeeSXv!xJ@;TNCQ;sk(bm{{6BVma4lK
zuN(V;lSV1rV^5>u;1xpgVVm(IK0J96zvKNeF^e=wgE8G^=NY5vvP9$e4~Z$X#9qd;
zto?(J>2On*m8qiL>6CE@vA$9dlE4)1<t`s_dZZJ&kA_8ydfu_>kpfpLyVm<kth@L_
zqst613U+yyx~F|t(o*Y=Llbl|wqT_UPw$+Aq=#ewxz7K|r2c$7q-;*XqR#&}>d)5d
z$@BjoYtJ6e|G!QDzyHA_Msea|>Q=G+9)JCFV28b++Q&>gRl8jsf5<*WuZ#U2(CI51
zHf%pTP`T6l>7Rq(*?JWXH(?e2`?e3yDV`K%algPAP03^P<ZCmXARQ%}2atV$wy?t>
zo>L~D(I*AZ3NqdO68u7+uJdVPS3RGSFF5b=*}w^VUMJ8gm~b0AmMQlr5c;w@x70lu
zNv|$aIe~%JJ2ItGu{&dl;iZrKRoEV6Ftny9<#50>i6l|YbPV7r4P>S#l@u=uOs7U(
zRX5=%voMbp^BgwYULYPt<<X)alq-)cLKUM4Tg3JyhM)ikCdg~Amuj##xy2rAko=a1
zOK5WA&dn&^HawWzG!%fPGsGjryhi)pWxP>%Fu74EVACDNye3E9b-YP<Fu6%6;P`pX
z@mEEe@s<ZM!pi7GW(7>qcV1_J|9B69JDd#rq#1*Ey;O6>$zfvCf5JkEHw{mwfQmk=
z4(xKh(9H7~trN=7$rS((Ksf#_yVC?L1N6!>*u(!$F)3_BrNOcTxiEpFu#*t65?JnU
zyZ=jc@l&TWq#j-0(M^f6bWuU!svmko&BXA~93-s7lJaM9t{ngNnlg^6LYy{^Rzj=q
zR5**>5IuOrg?-!ie3{ctM%M)9)csv_%WD`Ys{0uK-oUrFUEgkh>^{Z^cOUC_;^vRK
z`Iw)te~&NL@kP7iK@V@c?^HTIfb7yCWYX|4#~^&EVgh!;ZresH-L*r)PYSGP0y&wc
z!b9seN{YykZyKP##CTA#5xmSA!OOf6yv!Iu$|oFsq7%Gb{yvO2Puh(-yNdc<wEQ_y
z^BE9x(Hh5!a}}-d3i%Q{c7x0orMDvtLl&(<O4-=Nho!f$)+UJZQc0(FO;pbiS9<kG
z7pz2+N((4q4n*I<wc2rXv>F1j<F&_l6>Pd_EU_<}xn?K!T1CU;ThI*C!y##!5~iin
zKJg+aEk&U%)*sA(vZ_PQTEsbvbYNK{=B#=qyOCj9v{Hd{<=cbb-Qha{q>t%~nfoyW
z)zVwGj|BpyBwBrOFusFdk!}H5xJ2J#<1cxYBEnHaRSS@LtW-bZ0~4<S!wuiJZw*Xj
zFlJ)0E7xw|L@2rFImx`pc!Cg26^toi$ZGX@*g1!th@MCZ4I)3VvjrdvJx(>M-T9`>
zd{3m+p3J%L-L~TT&f>_R_etrXCkY`%2MeE_-lXa2(fssgn;_I3Q>tU;^cXs0=ENAB
zz4+Wvchjau0&v|!9l-!YCZKMqjf>MUXDxr;vIm8H@+}}kuC$y2Fy$}l(~Y=s>$Zfl
z^(_vD#4F%>(ERveXYK9}VE^6lRtnOWe|)(6_~HAzDhbY<UQoirkSMP!jE?*vm1V&5
zJMIt-28Q0!7Y-0E(+CTGuRoSsID+){G5T`e(s#L^c1kWg5DFa@f{UU)up@4pYif*_
zE(ZOj5#sBRh9?Iqi{&E>aijp{gq-$h)W6kC4rouW1B_qW?`!6V=bcjCs0Eo|olH6|
zZ3#KUHlD4rMWSzqA^IQ_U~P14ltR$l;i76ujsq_A;Pe?m^))dB4TrQa(Eh0JwnNqp
z3b%|ehOC=m+cIA<)ap1Rpx=OeYWlZQ87-kFs_I*fV*<7m9ZN4+noZ6}y<!hLE5xDT
zM~P)L7H<&+(zL#^J4=q;hWcB^z)*1#Mvr7l@nN~adZ3_+GNiDa5R=$LQX}}tIjXH-
zEa^UmA>T>S?QqjFi}rX(xxzs!g++)F_c|pt4hF2_T>6d<;+i=MPB!001~3%CxZNv}
zPERpl&5^QeF!{jaaV->*0C@n#ir4Lys6Z8yN8@h5;tBy}(Y4#48;!{>*BSd7{}NU)
z>H*oo>>W55%vRgGbYvd2aJ$$S(C(%<0R_tI!%pXaphNevXAdm#2u55|*r7z)s(m@9
z$`+GBAii_u1}-4bdQ{qW2D|4+jhEQA+G#b0z?wy4gsuqH?NumYiS5>I{n_?vYj?9&
z`|%%3!;T&FwraJ1tXrk}+S*eX&!Ke6=giq!Ba~ULISe<4)YV*0yv5Lm@V(_+y2Fn)
zF3P))?RHz8Hmr#yW_h)Zw<8yR0Hb&-->oEG#9pqcNAw#df@~cG9$GOm9^$mlx&8WE
z-1pX@t(D{hqrc><2}KoXwLH)iSEn`N!y15@<E-a45TG`zvDy&f0Dod!CYyH1NIWH+
z|NpGmH*SEER_#a98>seZRN^?Zo8m2yb1T27Hx8kxp$Ei`LXPe;u<W!IsUc`>zAj_o
zH+B)CuZC|DNPe|@-e!9!l~9kQUy%i(%6x1(H;HP#I)YZGl+h_u9nsdNOl3sx=!&Z%
z)YUXqgz~iWHJF%IqM=!UP7<|+2AOQNBr20sO7wK%?_cQ?873P+NQ#sA{uKo|S7M6@
zF6)ki1QO(FeWF38;*t_eppa-dEEXVA6A@YcAdl_YS5Aqpy8u~eO(l>y*|jpD5N0QI
zzsOo%!j@n=WM7g(#Ahy2VW87YC`P@OS74D@@ZY<%t`eKINtz0h<@}loQ`Qts<w%K6
zQUTH0W6VM{!$Hp*_dA^NU6Ufnk&+v#cZ8XtXJSi4n|bPhg3LLWe4VB|vOyt0gTJzh
zMG@o7Hn6sh;=$=uq-82Hq%p{DSfWc(=q0DtK?G-^jdC<6kq&g;?v4GGSGM1G!mt!c
zT;O4PL(Fb*lWf4GZIogw2PC!~dIQ*TlJ`YS1=Fo*UechVmb8q3f*wd;zL;8x0=%};
zx?QOen(O$r&h1O@zF~=Q5@aiPhDL!+wcJa*E5!A!NP#zOwpl=%rr#Nq7I7|la;;W<
zR;@$%%~FOML+VFl0rKQMGf<RCjqqSW;z;)+w6Ho-B9#S0SONi^@12N${P@$#+R9J=
z`02;mvz3i!D?iSmkl^CX)d|EGRtgH}E1AlYm`Pg_pH(N}qc%5?%$7-6QkFD1@b;D2
zVo4btqu#O(u=vm_t?)#~*(vHyRPY|k&UP5?qOwxr2ivP*Lcy6$=P4OF51MUKrWR`=
z%aU%1?9!Xfjk4E!o_>W#aH$ixO#RBYU1@=^0ns5{ZzkD6XL6ZJ7g@!ND_qz&UF>C5
zVl-9x!V$zb(+rGdNX4`n&uC*Lv_6y7PFy*lM=W{&7$r?Ub`YsJBVs}2mlZMsqIf8#
zPw}Q1xmUVeB&MR(mQYY|f>>Nk&_znpMPsxAVuJ?&Z9tO0nKVHY2~5>aNC_P&0of-<
z<46sp@bgkpH70|cL-ecCNK|9^3{ZIA3Jk9fY9oOz55O|;TE)2mzTx>(RHK38gi<w;
zrY*hrhb7y;BvynFWo#sjVX}L?%Z3L#2pySBMk{F<LnjSmSPG{qIZRt=1xK4cCMr25
zJK&Qj5|x^?(L)E*&XhR&4c@`Uqi#}>k>rrihn(jFpILGTKqo@+SI|@`TdA&DqE?CQ
zV011F^5T-JN@I{TGzH_4<FDwKZeZMi(0e3_`j-Sepk<7>C1nRzv@>vtRzLzdzNy$!
zPjbdeBQJ1Kp|v8Eimpv}?Gfe+wT6ArPdYGQX*PR-7S5ogq#@{7Zk3K6K)1Na7vD;H
zUo>kVfiIeMQ!I;#CJYqGl1mU^(1HZb(3SZhn%+n3M_XeVx<5vR;DcgI^!MXR?M^;O
z0h|ePH4d|mHu2mb6C!1bcBA%cJZnR5xRRK;k}-2G`+HqG{r-yNs#Wm|(HUQJA>}*>
z5Fe;jgZeu5osxZtQ7>_;hvqsT_b8y%z=qNiPW7@Xl2IiJ_#9=0ph(7upsJXl1nX{e
z1bK5eFv7r^h5xTxVqH7L^P>$*JWV`$W{DpYkAAYmKT@F%8dYS<Yt{81(?JZ>#Ue-4
z0KgBxR}7@CC9sbEO^s^XbGqF#r(0g5MY|(vDlP+W$7pBZDhIif)8@h9-s>0Ab}Of*
zIEZ<H7Vc#@r7iBU`<`M)xI;@kYNVNFV&~LN5GnO+vq}HPdL#;(P)pmb-Sv8Ty~0-1
zwWrUN6?N@Ln7#TMY?bENv`n#Kw!NT6+Z14iwZ!&BK2l|7*^d12G1Bty{+71XteuKt
z-8jKDxp66COJ(6XWn008XiJgOoN0{!i(2V|>IKw)Yn&{E#NbgC4ZHy;%B~N)AdCWm
z$4MNShYbm+cS+p^{V7w@U5m?V$$W~dCO}h<JaGXYh~sCu*Kts^l0fuewFH<1?{QFS
zIjGMi{0)>hs9YrK0D}C7^qG~S?@Sany;0~6rW>T=h5xp@G_vPe8ERPmzAJI)KWWbw
z?Z)h+cjAH^A`(673xiRrUr=`kP1WmmMge)VG-{JKaqxxo;qG*Ii?kxH`YXJh>xvBD
zz%8N}Oe0oN?X->E?GQFXB}zZb#!u+Ji%!PH96m-^7=;7xjHO*Z?!G9M*re%N?#{?K
zOE>~B8EI94TbrTWP*I`=jUJ2ojAKMo5h+?AVM320MG<L;k_;Loch%i;>^G(5tB7b`
zq#D{5@oGExND6#jCvt*OPtcH~*3CDgQTMX+T9r$TS5<|T^i&TeqnUV{YHjk?LMjui
z;7Eo*(_4y=LdW%gd@K(uchYsz(z7kHXPbqKymrmT+_+ZKae}t*wrJnCS$frT`|d~_
z*jKK+q@8uTTQbxVGFB&4l8EUh{f-|&<yp>jpOQAR@oYmXBO5<GMO|cVT^cb!^%=v&
zTzfuQwbc=P+JBGP*rLyY#b>Z>{<j?k$IPxIot3fG4orKih}m09v)Ws=yX_QVuFH`e
zKk-7z0BPf|hzk}g^wS-hwllR5@YqO3V-&~?ZY<6)9>TJ|?8R(6ZP;r;3*MCsXNXKE
zz%wWiRCWiCu2e_tBu#vQ;w})sCFG<X>Y&1=6sV1ALMSFnZ^xLS(ihl+%@Rv{l`TWo
z<*EVbM47rokZFEhp#LLv@~PY(vGEz)U^AUy8Ni87u%FEZmS8ctVPbWhBOp^&29=>z
zF^Ld*B7OfO$1gT^@k3L^J?NqLNeSXJ`NQ%!CeKx`z+Cm@T;)v_UZ_bQvShE?=ukFu
z@kUn@W|%JOZeGU}1EK}ErkJf|`lTcbn4T#~qx~NrYpeA;B9s`cvM8uc=-Z*LXl>CZ
zmYY0BpOUCLIUbU0%lRL5<!+LDZW7ubV!nM5v_PJv+>?|o+MJtQi{~Zt;G~pv>j0Ha
zdv>7a_P6lvu)~xmN(ndVb_{Vc=CO=(#K$Il0AOe<#sxFeS0Q#%0{fsmQcCz`(76J;
zrX~n9^h!`Ia97}$El&};=!2{LCKy8*MiSm^aHl0j5m+JBp1RS&zNJHRK}Dt-@--pP
z<7_wMr`pY!gC_UAQDl0FU@XO8<ee1MS!bnjeR3o60#K=JXjG=BO6by_9#niZl(rp?
z9sL1e9C&mfWb^g260}3nt%Az^9AJl5>CwKH@qCPC`mxY40tW&jB*U%<H$}(n{odev
z@g>|1slCuoKdt`skIjv>r)nRxjz;c({PZIqDTOh|XIO>mkciJ5Tv!Cb?O@=b^NL=V
zLxW~ZivBo?SAOl}T|GWIY+-;|AWGyN=T5)d=uk=kI!@qRi<_G&j?p<Y6ksCCVBv-_
z4r2OR&B#QyN@_VGe({QZTkSz&b7-ytUV6s5<2IuCR*V|9mMQ-{=2K*bm4+Va43c!t
zle?^uYl-OlyEq4@aN_~)XJPt>?Ov%p4z+WC%%NXzjcm=n4UanKrtOSEy-m>^F@qgQ
z%PyID*atq&W`tczu>?k5i2hGbh#7u_VgL%Ps!)rE1l@46wA4VKudNS`*n7X^jR*ZL
zzx{#M`8n=|ezpe#+vh-Y%C?a36fY&E*rc^1jI~pGT3>y-zFDjPs17tXc<rpuWDUrW
zq?3*~?vjzV=q8=yXE5~@b?Om&rnla@R<guWs-e8ixka1vXkShuB)&7pede<@c{2e|
zgoD)Ub^UsC-r+vW81A#F!+n+>?z2h5t;)08gx(unlAWd@M<v-}AK23L&CWYOk;)dX
zEqunaWFvh3S&SC~9n!txWJID=jE=ug{2yQqIxZp-)1vaf)@sT4KlS?hL;m-N{I6ee
zB%af=!#9Wf7pG@O|8C$y&P`>LUoa>1PO&_!sl`Z7WPT3yD2l9uIZsnT4w6lvdr40l
zDKayYP8iNZEVXYu{@aN+WQFV$55Z@U0aNn7t=H?x{NM2HA^+<`{?~6;ew>_ANZ`iV
z>8p#QmvLU2kRTI_mcDQb*eubM$tP6wH+ZQ70vNB0WP2^hk!H!gmN<_=O1Sp$DfEIy
zQEe}v9r;khPA!`7lLe*a0h!=#J&-4P!@xyXDO&`$=#_%)M{SsK=}x;cn9|s1n<bJr
zn?mpuoQ;xnz(|MU7z>KLPtk%KlKCPQsfwe|(J6ZWR(4kvd*hJHvtc6eEYp1paMH3{
zP+a<1q1DGjKQIoZCxiHqZLRBuAzK$1P>w@SBacGboeoX&8d+z_gwKiFFuq6VJA*MH
zg)Vt&ER}UeT52fD2&|i{XfJK)a)K++FnUlQj0E<&)_!5K2XuJ??DcM2zS|LxI^Jd9
zxqKu?SxLm@Bi9t#MN^(Wboz0Jpr3uv=_#FI_1gX5j+~Cm>mGltzC1d4)!6?RHz!`o
zI@AzNVAmdaV;~i;-?3nW-U3NP<07|W-{A~)SjWD^_*p(;9n%5u$S^IZZ<Fa4=RZ!#
z(uCE*l$~uLi0&8(Asi1WWCEu4?J!Lt##D2F%qxm3*Kyi5tPT?m#*v2Xn9+g^(;kN&
zhA?Oowc)#<Z%2V3y7l%HL_M#!D>$xwONKl{s3|?D3e3>%J2poviGb86BhY`(^JP55
z3%dD%K{JN(zKutIiBKPaL8B(ZM1dGFEMt^gag)(10qtNfL;JRhLJ2-rQ7{FoZ8`JR
zUO4FMl+xlkBBp;!tDYH@w5B48fF7kVZTk*qb0q76bj4}BI+AO@$HO6ELp7@67aaV^
z3j&v12ch`@UblNp=2VCy&BbQlk_!r#D_|lnnMIie1rVMJ5Mm)N@U#Xaegp>jA9x5Y
zG79MV+M)f0f2$~pti5PUlj1iB1A@>AK?q3O8kVP~$h0&hTOc30fG{5WhT8>MKG_wN
zEsSJ}H?l>l<IM}|h(4nkxX=(_RZvjckxL1wHmQt{T6UlfsTylZy1t?)_~4@N3(^ra
zzA5R6&zHP`!^d*cmj!cX4m*8yIYSeRYWG%L0wV*tg3Mc7I#~lxoF5tjsLWnuxKJP8
zJht?z4Wm#l0H?f=q&$jEhZbH>6RWUX<7)3oL4bwsb}9N`#X!<23I;vph_6mZ=JvRy
z{GrD5M^mp_rY1n%v@y;J!W2=F!UJiPIw|}>i-73b5s)CR7f=d2c4%{(KpjSgQa9{Z
zhoHz+FTJbPj(hdfKYm&TwXU-|><9Q?6VI5c0WRgLICqC^CWrApzLGFTtdk(5Wt~N?
zgaTFuaq_Vd&@|kxt1scpCEjtW;uY|s0N0vwI=V)$CsI4@{<VD@XvzSx0`-=90+x(^
z$BIC1p$1a`=%`wvdmo5E;Vla9TKy-=0xCU_S+c0haXfVqHuu@D^D#RIN$S$4qt7-t
zp(yETz)^4mfG3kcls7*;<BQ>g&a({*=N*R^N@6)eRE``U3lsKe@lfBGDpay^QZrH#
z42UIz0%#!AcDY?qa#;2dh&P94zp-LOIbVIHrFf`Sk$MI^fiK^*`I<K<FMLD9U?lN7
zzetMym<c#Sog1i_ohZUg(LsCZYB;_D`R?{_NzCC{=1q0f8~vXUsM6U<a4*f<Kp4<}
zn`EcIa-C~ZH)K>pU49QoVz36>i;U?Ei`S*OddpJT?I;&Y^!cv{?XV<i6HmKh(TKS8
z09NxAFt)PRCYff~GDYq%u?>M#^DB`sRvStj7Oh!do>@e9Gzs>{v34G72L?VbBAXab
zyL034>5f(twT4&)f<TqriM3U(h>D2Muu^9;n&=NeWv`VhtMJ9`@)zJ5@P+7w(*l#;
zs3iy=08~+%e2*3IONT1p1q#%TyRZS(0jtT8!({TTVJli@g^yVS>gQT9mx}ty=>;Kd
zJPh4Fl^~Jlu`**zr2}1EDatNtXd{fi1iGf4J|&r0D!oU|Q|>u!@tCh)J*F8WK_+uG
z2zfdh>5C`;$(6^!oJGbeYV>2GivUz)$PiqBaD@&)q%xT3QY%km)qPGRRDmbe3GZgD
zxd!6#GHf%>LuXX3JSKfd>OHtDF=eUJ>WD)EZ)G<EN=?TaGVC5g-|I)-nV=evg2^Be
zy4WAgR`gkxn?a{cON$c$S1OW)Ub}rc%RL(v1R0mb8*S3~#1r}<XB759A^eDf+qo3%
zTq*i7Ty|xUUk<}NbWt9f(_m(<w#@aECj8A!wRreyP+YGxqaG`yMtk&gV3p&n)wE64
zWY50<;bQ4)7P_P7Y<|YKLM+{=WJGj-5!$X$;~({G$Zi7uvm;bHm{7P3wW-dQE)JqD
zs;lx%#wse}N!Pu@nS+TcbDf=?GnN2G5uN%H3-IH+bPV`7f7gnlJfco$upl{Ibh;o)
z_4uL{-n#0D9-uu;#jGbUjawuhato8H3FThXJsC<c^^>m%n?@9^6t5;NSFajp=Z6Q)
zz2BM#r@#INPm;$9neDCq>EoJ6$0*J&?bpVmPFTG3V08Ou;nLT$wB7B1B&`)K-AARP
zXkoTG6fNEMuDI!9rF$PbGRm{cxLTxIj$kVWVTLfJDAUO^q=^R2hZC8v_x#6Hs%AYJ
znQ;EUzW(%SGXGEg$A|O(hx4Cr`22@+g*T25j-EfyIrqsbaxb5Otesl)(99=sk0Y7*
z@lfVb%(>%;i3VZaNHYPGZsHdi%wsclR)Sk2@@O7*=_r7sc_{OAa)8njsZ(_rCS|E5
zljeoqm}sxK9%`*?D6zarJ789|p%(+Kk==5C>|>J@RvA7X0rg&kY<xtGKiWrseY^ii
z1Z~)COILP7>y}zSZ_(jZ3p7N%mDC1u3fTlC%R4eUqjCCn%k#+$kJ_ZC4w#hxGFylv
z-|j%}1rz<t-5Q!sl+_NFtrOZZN*=CR=(QWdm>jh+V?%-O{5f`g2i>xVAC#XWo2#80
znKfPV5Lk`mYD(rpP=arXqu|yS>fphLzB(-TI}8<#A>KK~iyHfMr5PFo2twRw1G+eX
z54OiPH@0xen-7@z9kdtI_8vb2!tG;^%(k?bLmr_LQeFF$wv}y`l%_>oqM;uxtW3q!
zOmcN8xOt;b(`S)2Ptr2X9oljNAlwg%0VzsgDgd?<SX>=S;f~v<9V(jU$;u1bJqD`Q
zBWd)}l#eK4!_A~~S{VhC#y%7jbO?RHB@NKP(K0H!Fw!y3G8x=Lx8?R-v~Q~`3);vL
znd`U_Hgi;?6fAeZvR423F{P-Q3?=@H)Jv#ym!)K8anA>J?>_q9>Bt$rI6mj6i_`x$
zp4OhF@;|PxJ>-A<7WKc{ns{M5eeq)K42Mq39(Tm{B|NBJ>IeUHhdw9+UH_X_08d{X
zp1e5D*8Jk{QYzmI$HQwox8&;6Raeu?&1U1(tK-Aw{(iHGmb0iDFWlA3ezms?jIvKs
zBzkSUVoO0GWm0m3*TfB!)Zm9g@`$7cndyQf#i!xGLJ45d7Y4%vz}(12<7w{Luu7(C
zx65{2X*C<6=RN`c7^p#0h)1okar9@0uWFKUbYN3#pvYLK^<0>EoM;q<q$G4Y(0JHu
zhF+5xrpY6*0Ofa7XvLD%gu0x;M|V|E)hd@}ljwRELkiq(Ne>vjh5(OzFWia%z3jhn
zpzE9Rt#~4C-~qg@81Mft2Brix8I+Abw`7eoya(cx|1`@PohMcGRV9*R^a-?y_ZON6
zhx^BkGcw<l@j*o_^55H1I>)p4C_eE&yu6}V+_uosgMJ;c4f_-JYUH9<z3PBANjOx@
zB5Q%YqSgHjowRnMUhsD##H%sD_AcDPA`p=7b>Hj9dd1(Lw%3;D^~F)};&@+bt-xt^
z=xI5M6D9p3Jbzn4w!69cyl-CyORP3G+UUTr=Z2Mny{7*yEpexM=-mDR-NY@>{@lGr
zJst(jabgG;fBhwrKmjehwBLI^0<^QUR@u$y6@=y(*e$b({l-N(`gYlGTErx*CNk~H
z)~8rGpX*<YOQe}ZVgXxvZCRS$cjxSN1`I2*l?QJY0f2p5T^n?O-Ga=NG3*Xe!K2eI
z(50?Cw}Va*Wj?Tz9W7xT?2n6`*Fpo;_=X8_hcYTh6Q4HQUVl8mNAF|nIOIP#gb|S9
zozd&v*5{|DyDvn;U!vFay3sL2B#{wy;RH+ak^yWFP<@XD7%5C}IMysmikN+hrlKGw
zL?qP?|1MXGbFQ}~vP@*^PUo)<_m3LK`t%Ec!ROZbegkwW>+JNGb8G+f_}n^w{rvgS
zua#M`{PpQhyL0g_ArT@7BZ5#Opq+?)&CH*$pC5i;yyC>#p-r=TH|tox2&cdp#l>jq
z&EbBGrp}N4eK;REeXBG@7!kDuLH*a!)F;GnO(FTe8^gS6b9c|OTv_etE2XImTNSlj
z473XW>Q>#onx$gU^Dut`Rt#pUC>@HiVt1yBDgrW8>|D(=#&v6bzA-|@`Njwp=Nltb
zoNtU9*2a8ego^Wx5h~6%MyQBmEJD{8&dt8(`JF=DsKRs5wyrumb?d6L)3>fVJB6pl
zTAapHV=YeQsj(KP^VC?2Q+jHw#c4e?*5cHj8f$TSPmOg3iieH1=NIaQO3p#+TyJ(l
z=X$eKIoF$=#N*>DPT%qI6({ca_=;0@e0;^pIzGPQG#wvbae|JIuQ)Zw$2S8hpLqVj
z?icEVi_S6Gvf}KsEi2AG-Lhh0zDuSJ;*#>aIj}&aN=m&f3vq=SseIclU;V!J&JtI@
z*Wj~O?V^F;F0-AEYKJ>rj<wTCw6m7m&iV}ST>V~uhgx!GJ2Swe+bKdYRKL(9P{}!X
z5!aiY7sdN4PAu{M6s@F9St`?jiDrr8*FzUCshxA$RF#upNd&MN*GMn(S)Y7sty{HS
zJP#S-Er75bR5>RYY<zew;*)xzu3qO7tS%8JlC^B2joQm26kVc|Xm4#|d+Rg8mqWx+
zdo#i}6EW*Him42$^gL>$t~-Ywnd)~2l`hq9VFK4xCH~cKOuZs$xDGTPN*a%fhgH6Y
z&U^TUbhKWiqari@a@-Gj9Zo^tOR|AP#Z94Xb!V+?NM@^31o=^26Di1KBL30nqM=Y?
zhW~wxR}E$QfTA-|q}Nxzhkh6CRn+rm=#|GxN4o8hJ5KbPkDzi`i9=L`P>wyjGXtSS
z#pfUtqw>sz64jlVP>iaxPFay&a|q=V_nJc}M%7seB`%bthYk0ACd01Q4`f!aC34cC
zw2Z1~w+xS}_V5y<e`UG$6A0r~Dl84HuZ}7tq2&iDn5Dady~<=C8QpeL-L9>w4oQN`
z4^l+YfH~pZZW0jYfgQ1EhJ=zqMp#a^sU2KmBqBn;tel+U+{8ELgd!a#4@h1%{&m>g
zKRr3WXfk}9lVDQ0X2v<o;hV;B`PYj0HRoKfmU4=+8fPzR7Pa^-_(y$tU#TzeH}&Ox
zsJ^^E)tC3J`tp8OU*6~H%llt_d5@AW@0a!EeY6VuYoRa_wiTCWS;Fs!AEd~d!?)}6
z`EJ?PDDw#T%bfFmd7rMYZ8gr~>oq%RPWKDDx8D~K*gT$Jg?zwjxlUk_%J{eT2mA8A
zVYBi@F*jq3xu+1-q#aZwSDNU=Rm=6_N)yH3(xL0iJ9mARMDevKO`pvH>|qu%1Vz3r
zAu>9?a6;?Fu`?VGtY)P&U82Nm#4L}`@%z;rjhGh51Nx4q1*@y#vM-OhD6TEuPsA&a
z(DIwZvvX{$-dsys%w&yv_p0f*FvJ(fsEqf$wu%QP4~ecKCigK`t&C-B)>Xz4kt*co
zwGBUGEyvq-<85a*zCIt|>qP)xpAYc$LV$0~2lz%2z&GXte4`NHZ1tOyV02~K5pA?4
z4MknQyhm^Av-Y-epWZeydn?9WWJ4D9Yf+ce_yZpzW>nO?hX2t)D2!_U_LN(RZqL16
zdvW;az;uD|k@L#};UiC<1;R(3O4ME<0Z?dyEpusB7St{v(lQs}qS{62WiGdh87wa?
zxN4KRkpyl&ZZtcGSeHnij~~s>|LPJ+bB=FgW)66t@omh(-|sWN^?Am(exLDW)62pZ
zvRZKU1uwaE%_7zp%p~f1G1qT47R)9T%o_`5Y#Uab`W6TCx)rChiOg+1Un?K8B9|u9
z@i1^Nhm>YPQtQWfwF27he2uRcYJ6k9#y66U&n<1uZpf8ud{1>C-XKWD#Wo1yP&JrA
zDvh(f=H+PI40?9Q@tfVzvyB{z>Ex-dhI3xVUaM1tVCHLky-3>|^R>OvSrECXJ+;=Q
zI`^C0czfj0x4`zueVp1;q#`4+-KAr(#p4-0#9aHE#(#U|^>2sXzy*C{_Rwe(@;}z<
z8>#qjYtJ6y|32jZ{d&W<y*fSq?d0?&FwXP5%)gmM^5XX<A7kW8e6>0b-Tvx<8(@B}
z^Kd+JJ2LcbfDzkL_2`rHbnKEr_C%){VyJ#TIsiBh8*hGV?w=kUQg~aKfR>};$YC-a
z6}Sz<%azr0u7CLJ#o^gW^XTOKqH%I@L_*`dLhdaS3heIGIV7El!vrY!K8C=I_bh6D
zaQac)yZplA34JM{m5ys?a5)v#&Uj>mv=eVzz6%{>MzyoW|2@Mnt%y%uo6~~$)zWt|
z9$qS;gM~rnW{9?IK$eKL{N1uj2SIu6aFI}Wx_}oVu7fDQgBfqtXwvxp8ptq6Cju}F
zle-s2o10uL`W|H$plSH-?FjSUj0|L(i0A1J5^(}M4&A2V6>d~5eMd>DCeOlKsF^e8
zrP6m7lw=HZgmpMi8pRey5LJFCWG8+Q#*(b5Fu<Hr4b$D}n9HT_3;>Pi0^{^j3}#`s
zym1&q0S;s6+;AruqbYr-h6CeKX+}ozd5f~cga(|j>JfVxsCx^XSvBQ*UKn7+!+|$+
z+pmm%FN{$`9i}=ywackf7Roi)3)ul~N%&Wmf&L`|L{y+XO~^7$6(<2|drr6Owq3&h
z+wT7ocGIQ5UKbGnUq<Ddir9hYYvKufeyrWyz?W*Z%0GuI@Zb;ngHaRz7-DUx$~k_h
z0s4d`$q6{a9+1dok79t+z=T_Ms>0~1e6!*=YZdxauRu@qZCJrFJS`DAad=InjvV}5
zL?slq4B}+;sBOu`lDLVZN|K$!xk@#`10%4-rF%uWByn#e!)J#!Mr-~L(9_RL-gwa8
z@`uR#DY$r5t^iK?LrE*4i%m)K`E#Puc6qXI3{|bAI?;m(840GIOnRuZ8WEbeO5b5N
zJ_`^pqH%G5)Va9|EIA~5`k>w_&XizoHB5N}9PxA<LhGC2Ei9^5Zdq8SiJ9HE$L-#&
zXkpgMAZK>Z97>am1)Br=rb&g<^PIH`49NmDT}+%ywMCiI_i^W-$ZR6j?UzRt_*2oz
z9_<!z3C#7zIV3&Qoe;}vVia-ci9s6Y_4F3nu={=C%Y~+@84Cc=Flc`Ku(Nje2k_kH
z2Uu1g@V&(?`~_A25^q5~B3$wEA3JyWrW*<<(e>^2N2q@XANZUD2meJYln~l?><*Kn
zrnqRyZmP)XEr+r!Q{rEE4sD6IL+4Mi#_O@j+_=Me=LR!NO`Mx{k-5?E&ofCh*89yB
z2)VYLE1aBip($H4r;NT-{`iP|mN;z56i^_@QUaN?;+DSKkMkDHMQ|fIFX6_h*t#{O
zC}qry)L(@)Xwz%Eln9}%G!)K!dlN;e2Y7UPIY6lt{;JOmu~HK^^tTT9tFvTTzk~1V
zBvdw7G|8lKai9p#@Ie^-9llU{)Q`6B+qZWg+wkwM^xc|R68Il|V4rW_mcFaSAJ*t0
zey>Fj>-r&;tkJ`DCRW*pnxuCON$I<vy=zQcV5KmDLJ)UTGHb02gG|bVf9Y_Z(@>|&
zRoOhv;Sz!=fhp;?<3NfP-|2c1J8jO7kwFj8d%S0rcmVf(K6>uU1gxA{LxGc^85ko-
zXA4BL1)>@Lc5rSe`5*2Qfy2u<)vjDm@fGD-?*)hq@q2Bn^UnHz;_Y9&$o}GWMnB)v
zPZsvS;E31jEs$DeGRar?JclUscI3#M3YgTf?+lP-T?YbG&@QISpgbX{mg_08`;>5%
z6T@11+&YCRH03)sC4{E*@GO2Y<29zCCm!=`1Au`x%WS|)!9!V42RK$^3OdduX7(s=
z*VigJQj^tp7Mp;yqoL$x-j$=W&u!w1l^;+nQ%(_1DlawZtocTC47aEbHKvBC(3QDh
z$!r0XKnY7W;~auBz%UYlaNmzF(R3m8xFAwFqzyuE4LH{#mm^w{ZA7%}FzIVLjzQG5
z-F|$vY{mvu`c7hjQk4^%##DZGXDf~`8PdG%`mp1;!et^ACBlY6*e2nXrx!J_AOtyy
zc&3@gH8)kQQIm;!O>9q?X^%K_oME0pB4PW@w1TWIiz|1qiK)VACyIo)19T+dcW}K^
z7hwP&w!4Y@(+3;vS7QG?|8R>&zWGd}6Y{^+p4Hb9_TRPIkM#%p?}PpKtF`~0|LZr*
zVVP(9O}#d)Y?0yjQTlNq!>?Iydti@tji2ox?7-9{9%0gL^N|)+yV3@_EDRf{T<OaG
zDSv9)=d$_#lmSteR)2BBoVExi3bCsK-r<>A^gIN`?9wUN<kVtEL!joibNqU0$yYuM
zza1u0u+AbLAEm`(icr30{tvt%a1L)m-(D#HuWvlvNS*)HAI|^2$^4%`txyusbP}j4
zrh%dypx=7xr*lf7jR$3Cja&heS;pWy)#@tz-&~lF@?Os_QaEDL?gE8O^so!%96R<X
zP>+<mk#tcxJ$-R}_;cg@=jQqASFcXbE@Ek`E<Mbjz1qQ<ZT9S-*JL1bPxcJ{r6pVR
zXi~B&*u>#v2b^EixfwaWiw8iqk^c~LS`ItP5EW3vvVL`94%HNMO=nGAEv1BPRDAW|
zc4!aWHb2T0dq#!5zSsV6+MT)HnLX@ygL5YgovcjcX3>|n|G^25PGzyK_+F<e`{n3l
zy<Rb<zvH3-h!T)ZXb-`YtwQ%0I)Z>rB{$Ef{8_|u=JNY*-~WxAwrlsFs}sb9<^NvW
zc$Sd=^|kfdgZ%#%^FKcOFBkpfY>A$i<QJ2j|1qyrOdf~5xC2m(@ufDG^XLPG(m*De
zLL^z~Ii9ri45xz$LRL#syVG&8G@i#r2YoBoE(+9mbM}?(_7Qx}2?_Hh&z~{(@=dt8
zDWXBB^R|G4LtQz3KuN&KQ6w-@ey>77Bt~j8>I>+lfd?ouCAUiSxTy!TA{@UPeQV3F
z{Y!f=u;mXo{X>=O#NMF|+x-0>Csbcs4w7Gc#>DK&iOSM4IN9S}TCm5frXCytOUP0k
zrc`@nw?Ul0BvPKa^3Qg^J4dzWoJBmCqbkjQ<JFPYY2qGeZw~h_PS1}1ec0T8eb6vf
z)$HbNzujBKqO1Gkj-7mMDmUqcNUqj*rB9&uWOJSeJNbkCA1D}Qd@!GCG|B$|bmPaQ
z{U0=i2mS9`wExuBR$y*x;%9sO!4W?@ZVR-*?H>NB_UP9?F9#sht8H)ajjMua_&v`z
z03}|hq(JForaDGzG}6Mza8F14{I%<bboO5)HMc&79*p~;yV60MN5G1iY>ixYFc-Jf
zz`SnP@wxdRB;UWb)}ZXZ8`5bufPhVN3+UECK8b;YC)Km+G&I^QRVG7;p|bMt^XpEQ
z24EJM_Wn6zGDnUkX75|z|A>OdIP?zOZdZT#AG__fTzgpU&CUOx)oRK8|HelB#|QrZ
zE%N^?(?2|@Ug`(`bceocf9sZj#_NmIgQMrqoBz|;KixZOoTMZ`R*9q#02z=GV^QvL
zb|rgGE>*E9MvM|!5L)2}xR*_qE?1zuEC}B}-er|xFIib?Np#IRU8)4Nm{6{5zr$X&
z7*z~*VziZ#w{#lk%{Ps+qsGC}e)HlGkk`04JkN^hivIi;yZNA8ETPN)8}dRlf#E0c
zih%mT=?6|`$k>auNsX^9346_K7Jm_DvRX3Y&R*1}Hmkk+Yw)WU@5G`f#X2&7)MxBf
z|Ej~UdKw0co)Q9ihsv?Hk-C#axVd>W9F0Q^$0qc*t%((Aj;MnEmRUi+RZ+cVSl=y#
z2LqLfj}%vqx{U|G7CR9zSrNLa9@9#)p3!Tn9tDvzu@TSalhvY?1>UYulaxDnvjF0_
z<J<`PoYa3tSC**la!^(qL+mH}Q$)ipY{v`kiU!5yH_)Ew*nFSgJ(r@lThrReM4@hj
z-foqOuoU1%=Xjde=7#)0biCnXa;<enp}4X^2zBtbitPiwLo8r-;SUH60M16YWBtnF
z_RxRW64&T_i!Hqe)k0kRF38H}d%L}}CP1Fx>!I{#_O2Yi@1d*ah)=Z2l}#P6_Wa`P
z^yEeJ=ma%McuF^`qJly$UrokK;>L=IlY$j+iUT2+fgl04vKzHgh8kCvSfhLjcX@2~
z6^=5X`JM+1D@LlzgbFJwG_kyCp?h13I#)wc{CXik?5su25*Jg5B4xM<lGE|S^40e{
zh#wA6*%jsQv1)~Wek?*}_)l}T6zX|+D2}%D;3n0eViH*V=R4upA8ygFI|kAm=#l_n
zfoClv&~gyQViWcayu7Jum9A|EVQjtamMdFEEkdcVugcI|)d%iJ={6*8${EXhXGb0~
zS0uowDVCXAz28~e67RR8rr_6;ClyT^_SNt2-*NC|;y6&E>N|n^AE$hi=-g=4#a3Nv
zwe9t(RX1wYzWLovv<Vo9bG6OQ*F!hdeOPX#7(>aPH5;aF$<CeX300!Go-js*IXy<T
z*2~LR6&NdCS-HY7(}fH*n?lICWw)YB#IT}B3jafqmJ!t>OFw<Er+&ls-^~3#^NC5j
zf8HAZ`1EPc{{Pvt^&gY*KQ?L`5B`4-`~Uxj{XYlF&fop(_gUxhSpnR$ECLN<H@LT3
zk`cWJ_6JO|f!COZAqDUz@sSF^y<~~KCED+yC;)V@7zXqK<_C7z?ui!8hg~J<jG1cq
zSe-9F1rA<nn+g>57XwMEb&Vwq|IOd`mRg*~EDig1!`f3>8>&>giTX@ar*E)CEAx3?
zdb1|z1su$rQW_o7A*v3CZUX~sf<Wvx13tQKdLtB+C?FjS+K`%>Nqr=3HF`^xu5#jB
zt4b=6Sz?!Flz>gt2PghoVl|^ZwU1untH=uHQ30_8NUIQ$u;w|T=Q6i}6$rnp%V^cl
z7^E3XrJ=kt5IX3CeDl%MQeNbPiUT4}L_AGB<(PxDIHGI+;caw+K)W-IU|Qkq*UF8L
z1KxxMAs&=NSA2tz${DH<dV4q&b;AUWdPx2Q)^Y9h`$C2@7>)-m4jf`M$5#UBf-_y3
z?cnxtKwbo0-y1NG!5*raUBF1(Hu}Z%Jm6-5@<FkLQ@w<(=i9d6xEdQ>;>jCk*Oq`M
zy2P*D%nsb{1lZ+uV2+DnhidCALfVA0=Zt04&+H-HCLr}PXS`ALB0Z1DcI|#ZA&YDv
z5h4RvW#t44e(eIw0&HLV_DI;7tvfe#0r*nU1uRq#hLNDpY{;Iwu%a;*M>b%?Tgjx`
zQ4%!g25Afr(5mrsH(nK*1nJ;SIw{eHi_X_X=-cfN2rWe-agggE3S-#hC>Yf9<G|>U
z`qGkn;Iw=8&<zF=97my$>K+qs6!xPJIKoN0?0c=GKi4LsW(5{eXLHkT0gchO;7?5A
zPG3jfwfqLq$Zvh`QWbQE@>|;rEY(_+orju?esa6Yw~LxKQp#cb(NF5csER#BKeM~T
z8^UHO-@(u#(QkKPUq<}{aP<g-aiSq=Xh|+38G^~$Fp|MgB`{(`7bp{`%~bW}Z)%qO
z*>J4^2jaCA&1%Ms@>O^buNem_095FWq%BSKq2WF^%tU1p5Aq`&TZNT2``W(MLtLFG
zz)Cs1T^}|Wvzj3~V-}(9Nc4VhU_WT$_3Nnb=oPFDSQS}sd}rWYIZarry3r_wm^3QR
zkSHsXn&ZJmZm0JG2fkn|68J-_f0xvq=zJ!!kN*7-`US8j@KWJc3~w_vEacCrO-QJt
z+KDrX%<2&yV$cixkhU#m={SnWNc3bY_tmnjhU(UVv{M_WZKOwGj4_l@?$8Z^Sls`R
z5>>?&PPV?dx~PutxXpdl76y(GHj5z1*+_#y>Gal?8f0Y58+M}ZWNa5GHEaRCVoWF)
zw|mU-+>pav1S4<Q3DhR<Et=o`6V^{wKo*gq9>r@m^sK@r^jh>cp|1I>Zcv#CH^rdI
z1~dAEox`AMn#Z)h{BOti%G6RNZhniEuaX<{nbR#tJ&7F&RJm4(*>5G1rF$~tRO<sY
z8hc1v-X!{~vg>8iSd)w$LoGlh3@W52dUY9O39uD6Twcw_8`7b~S{`m<!*;vn_{e!f
zxVsjabrbKMkZ_oZgW`ln^koj2$z&Fq;D>T%m#}tGYS2a1I!Q~(Q;Zd&C1oy1iQQDJ
z){^k>XEF{c-cveg?)}z0IQ{iEctVX^SqzGufSYc2J1qg!>Ds*9I0k8&BQuI|gTJiU
z>W)}5k22D@rqL?1bqeSFHfFMp&p{XVlRy)!TzY)LP%P;~BeA_L%87S@B7*odED1YK
z<Z|m9U>kPaur50<Fu8I%7e4MHw3lf$h;Xx0oEno%j~C;Phz(*Zizn(r8OIJ9#ml%I
zj$w8#(7$qEivzi}AU2x=^XjTFz}8D84rN)Mnu?FA5eXy)zt3sGsGC*GL{<iGp4g*N
zHCd@xS0=|dP6bmv8m|-m=CqJs*Q`vYMTMwskx3E~nACv8wvcoY2Pod=&WvsDOmA~{
zMmTp1z_~jkoVx|!+?lb>odRusKVzHU8*Q4BsnGn}c_OK0(3g#L(9#S{!ak{+ojJSN
znW>xIIY+ZQ+h}&@9L?@*quH6Wo1NLZ`F_rBzE5;hL>ZV&gOkO7Ya&-ljd(6dcJ2+y
z&SD_hy*EyF7lf1Dd*ftxK{(mDHzYd?g5>*qLz208Vp>Al50XUwt_I@C1gVg<d&q0B
ztYCV=O(Zq|@qI>|kIV__1O_t`;5LA)v504_@m)%emN_HPY><c{SGM<2eOUmCWlIsT
z_d+zoJPFOS4`C{@qahU7i99K=6;WFClf+&mclR^HPBast`*`7h&mG$S?diy{S+EVL
zi2<p-k+PdwF=gIL9B~)6SqKIzIfF%Z!Wm0F>Nf7y5F<A$YipU@r+pQuDI1&1*aFgY
zn7#Yd#c~+J{md5e37gw?Q6>VWbPn>{lY(?U8&g<GthCmBWW<VS-B&&kDZS34ByTkG
z0ylI_a*FSplS^i0q0(hh(5ZjGH&;x)xzd_MlvB9o$_!j{<tyQu%5SDU0cnfko-2P(
z?)iyPl;)x<#klCoXXBzPb8}ImXS{Gge^(a3K^M=TiZk2=^c~IiikxoCe3Hw#C!s{;
zS$lQX+&emHoc(tCN_mN%9bUXXJ8540_Ucd_Qd;tCXT;%aD+zoo&$w^N@F!Gsyqu{{
ztH(1-rcXc2+0J`1My^kPqOseF+a;;d7uf#j)jOF0DJsLLPXtIl50;3y4Y{Q4s@{@l
zxW_hXaln<Mt|Dfr5NW<EXA_jU%VAIPf|T`H@jHRYzA(zu)Ej@BN6495%TK?AR~N^r
zR@Ky`D66cUGFBy#eObP>`bqc}wE|eMBTYCx-3k>e649$7;&;pXTcz@qu(s8&p0%wO
zVQpn>F;0G88<Si8J2AO5sZVBd<(0$)B>CNaH)sXfgVqw~*{w+Dp#9UAFB_%g4eNXI
zrv)@U^R_9?Bnz_5FTVB6V7{dszL4Jk0ei?+eKhlhEmoFG5R{XRS0ay)m~e}`87;?K
zoqt!k07GtRk<>b8YYQOIoc+h&f*_ak2yz-VKAB35v)I>`3)t7<28ZRv8`-7;cTXG1
z@@zJeJaATL#?|WQ!`147a8(RsSc9R{##DgxOuFGKA252Ru586Uvr2Tmwdj@F=;F(S
z3!%on3n=7BRcg~LtBgrz(J9vhVk%6=zl2xswC3VBO;UFhirtM!RhxbvV%FObG-y@n
z>%^eEZNjbR+}y~2w=?18%A|LbDlSiYw?nr@G8bnvTA2E#bkZwwnXih|ed_JWtt{`B
z=~#^&J&N>|-Aj@7aZC$yFHp>A+L@dNTw7JNdqMOnEt-jCeLD976SZ{WR%|c@d^zq6
zEa&%`I+!&vtij2^UF^I}f~$+k{!8=N8d~nKQy+GaUr5d&m9|I;wP;O?tgUb%Q|e?a
ze*&<S3YW&Cw(KPf<XfqWQe^%(YC)Ji4r0A(k2*7Yr&G9HnH%?W!>n*`=i%I*hU>zM
zC|@DwF;CDevscjF)*>{UuH6r$@(>HmwyXrkR`ShC?wcpkn{413O?6iZZ|*gk7=uxL
z-}n0Z`{{_%D5#$=?HjWs{N%!gqM~f8NQAm?)vxN)=+Ed&_XA|Jw<|to#5SC}MbX&G
z4Ap{yV2_=HRa~|*x2<EbiNF4m-9+Kp#L^O)cXj-h#C1knMP6&kB6?eCr?2qkQ&ueE
zOKc+djEUScCUP%6k$drp+|%foFnQ3a#CFuB>P~fKbYpZ8H7_z}?;h>#6`#Gm^z7|r
z&0Z^a_F8K8?BwjVW}3Z)F1=FvR`_QliTkhT$sq$i5xw1)TYM4n{p`#~2#l`yC6~^O
zF`&}B);)K4I?@Epv!o2Da{rmFy6;-aR&`7U+2+TI!F}0l<&2aC-q~lfg1JD>1ubtZ
z;vO(_;5ajojmsCwLmUm=h3`9|4wWNg8nrMqLPmfUi$eEeSc1oxCc{VbhTHyd`#tqD
z1Ad_NJhM6tL(r@|VB@&g82W*NO6`amhNO;}PV#hOJuR>LHZ)7oXyS;4W~i}=gRZ4$
zu)ZKX)bbEfFN}%R)d}HGR5zM1Qv*rfX`dChwW-L>3j=s?xPRO@J8Zr<KHX~^H(%r7
zMH%ySS^K94hZXTQGbX%V9&eL-Wf^)X0WX!C^^in1m3dZ@I3^MKm6CA=inkiYoe{?Q
z3yuWwQ=T;5XpeP+2B)`1Sd~Py#Vz@gk@NcG_4(mJ+JgBhMx`vK5&)cT{(d)sGwaZR
zlZ;6)Y>aqSo>B%-6B*-f(7<-@ZZWXdr|33M#0ZSsHKoG}$$iz~T6JQNW_E?VFpP6d
zdV)Qa2kx*k6;C=_!;-kIhzY{-{s1)&&bOlAkL4S@VR<Y54?Zv7V4ev4a?24lzc}vf
zo39&pyI(QxBD%XUu#}6(&rf%M2eSJ*QMWrg>b@2ZHw}q;klGdtbfUliA8`ZYz5Q}S
zorNLIK2&5UnPV-Lis+IUYO|!29~IBjpd1~mGVKN53L-Enmbghf)#=yUYXI6x4PdKP
zL}jZ|7`A!@oEd+!D1h~85Xz$U%5qe&Fnko3G>$xcguqGt1Ak(&4g70>QV4viYvsed
zR(*=zxm2t;%rXM10Wv(<N({ms7J?9JPQ&jCv}Kf;0k`hZ%7iKn`E*RiV8}ped?1Bk
z#L%;Gh~SAa`|t}VAO6fb_gFInPJCP#I*f!HhYp?;>K)2W#+Ttym_S?6EkgFbth5kE
zy~O5o-r#n5yG_IQbmUzZ+GmO-&EOXdjaa(n!1&p_*ZBMcxP|5)ho$G$y3JnMxA2iD
zxKgXgsQUSI&`4QV_9pKhrPcbLcx$AlQCfnn8v1P7p*^fm6w-~|d}j+i5OTKom=LGf
zbf(|af<}_zT9u8$jDZVMRiv({z9WH1I?;;7j$jnorgw<R(oC5*wtc75?A?yM5Gxud
z<hJ1@(=wj7%s6~9t6Eo-V@;uzJ{G3m8=`S{JnTqQ9|i-B2Ql}ZVB8NeUcRu!iG8vu
zN0rP3RZbt)N6@XrMv(N0Aqi|iNuE`5g%KX_ig9beZh~9GlR4!b)GW-uO|2>+&}Wc@
zc80i?nvl5^iYgZNREcejfOM<cN^Bs>K>$$|8b(?>${I$|d9V`-QvNVj3XY>FDfQAi
zUT-T!UcI8!)SH`}dh-nd!0e^fzus2%sER0ny{Q^tZyps8>n#w_1Q;2BCWb<Q{xHtU
zFq8%FBn3^K%tX$vdD_e_#rKcO6%6n`VaFVI{LS%5F3^%1%zodoK_pC<iIX6Yx0L|W
zO1@2JlTDVu6X=Y5vy=X<St%&wxqKoOYDTG4cPQ`Kr+WIi>WMcz0(4(GexcG%5XP}R
z7haEDm|&QH`FBg^^W{VNGw08->pMrW;PrUC$JuwMWh>PAv>tN*Eh7IPC)=5x|L@tx
zMo#{}dhO}j(^USyXZ45te-HWp{tNQ|y*b>!IGvFHPruK~|F`-@a;n`kr5dN|l4-iu
zyOmFr4C^7e)kAWtS(00&6=I!1+&DTk(`zSlUNJ?d@-?llPPh2zt_mg&D$)Sn^~!1_
zTLF$rTjm(E^jR_(nQ6ik?Hg}I0RS>_OIq@;BYziIzSHeaNWm1Z#Tl~cb1U=aEKib=
z)hPQt+xIk9WmbV8m9HZxn=hig4eupX85wTajYo}2Uw!!fozCxVTz=E#RPdVMclVuT
zcF^|vWw)XuC^O(wnor#0o2io=@ByQr68u>63j|-MX8HuMr`x_a;M@|sBIjhqP}x9q
z9nfE%AN~6BP@oIExJJ0f{f-VDKQx)HRE2`^z`Q&SKaK8ImszMeKBdul1aHOL;oIS(
zt>x9^OJgX$ca4{>%9ScNhgEeo2~0drQ@|yy39ab+o-?jvWxzL?$C7|v@)x)XMp>7f
zl6NOdgwTt)9GM1x{NZ-8>2!;E>4gF_@JApM4CztvJN+|}TBtaHpQ86qgy8(#LG$4{
zk+~;`=NAgh1Vd`1x=1wiS;IjT8_O32G-ny+GqFSpmaLkYb<&-tmbe5x-Lra^=sUN8
zoT|mhwIH=-(qt)daS9E{#iHy)THdY|!4d~eUTw7&(~CJWD?L`@>TiDB_3WjXzY?cM
zc>Q(}nF{9<#KW`)@d(3uZ9hq^?LtwY?#+KE$8Vo?=CfJblLEZ$$sEk>Yg^>mp{qXu
zd1iMs&r)Cc%uD?<BJ?w&v}_z8X{ukzUG_1;U-=pq{>rCW_$vh#{>pu|nxgA=wn*Z|
zm+5Tn7Fm)DSyhV}?k5_sxnNXecJod;K2-Ej`Giw%hBIB{)MvJvfr;!Wg^}8^NH)6|
zBzzJy<Z#eLB$ou^m^rZh-<C?|38t^MoL80q5~{h<LMDrDt_<;#n-t()`sAeO+X~G(
z>fID&WHN581X#O;+fBxLHKOp{cS`QtSQ?`g#5E_t|2@^!sX?(*6ZYw*>^_sW;Avan
z#Fdi~9<@!m&GzGX^x7a17+glXP4fE4a*45SlXAjTp{J$bOIqxfi)RCe{u0@wOhE6M
zXlmxY>(l`)M5CW4Xl^19tnM?i7t(XHbJLPnn(9iXT{HfB{J-|z{0#p!`j?*XhP^>B
z+3~B8|JQo0{%kGb|5bnbbmPJQ>)Z7II_lrru*2|yS#7&5{QJ*=8&;jp_?ve4f>m?;
z;`Hq3;^&v=DQ~aD%e1SP$O}vz@8nhE?ELVcx%XT1;Pltu;7Nv83oK#7WP{GVen6~w
zw?t``UG=2dbNax{)t>Op&tfU`9YBUrM=ZI~4>Qcqo1c|0hVOY{CEJnPC?!2M@(R82
z`s0Cf6tZj)@Dso1m*-2@x;*^k@=!t66<2%CrHf};@&O(+UmXrR<`e8d))k&AFCSoe
zkIU;P)BzoUvtQgc4Qm$&w&Rs+7UO~nk%v<GdwfyFfK<1rQ?Rr{Bv&t;a36|vd}mmu
zLM^-ffj}fM7*tCF#em4|1eG3nqbApoUNEpk%Uf#9-|tZME%D?DKdFFpyTnE1d*jQV
zXk*Xfif|sN6a?L}G1XQ5ooA-3fR;a(WxGcH35IJN^cXNzA_kwQ;qxSXUOs&6QZ@}f
zriAjcY$$?9Y7H2deM`u=P?D$v+f|N?Bk||h@o!5U-%Zw`h^Kg?OREORLh_bimS-7Z
zsb-o^3C#49dfr-<moyBp<$A5rTFZDY7qV;@SME7FYIaf!8pA@64`-*W%2X_~#WI%p
zhzN;u3_1Dou$RXumVl0ov(gDfB&Q0>Te5%YS`+U@7Bfc!y#O^#>xIk8>QFw>i$V-P
z#Kn}y=(b8D@+@>VvoW1Mh|eli$&0SX*r-pWg*3esqmFtGb<~si{4CT_FGL*-ldK*{
zsIXo^>Zs379jX_sXHrM){?t(~LLGHQ9TQ_ot}ZVN@xr}w@LDU$bsyAr(rt)qLB+N`
z3dTrY9nrcKZ|&IF*-03~B${I(AKn4z#khmIayw4KX?oxcT2OdXylsPW#{6``OdIl>
zW*hz@JLjngi2vdpUw#v{VGJXyi`X<9BA=$(!CU$1F37VDl+<=l1U>SBLk-d$qBiRe
z22RI?Z+-0UEnr*@F4Hr3f(}LDQ1{;GXT{a0O0+cDAv>vvarL;uCB^O;C>Eq^l;y>H
zgEYcx54(RHq{suktb2~@i+#}godE3zGN{3qrJN-F)C>ZuDMK?nuN%+nSu?Ii<0<+I
z3KLjNBo%=~(_cN0_85c}CQuFAY!sh?8r|_ss96+185qg~XgY=#wmNI^y-0ob)mr1_
z8XE#Xs!v$LFs_T!gVRkB+OQf2qT>W@x@yydh3IlHYS_o(zBG4BREzD2YI&QSdaPJ1
zvP1?CiS&-`L|;rMRuTlBEGyZidTe4*{LC#E1);v3Xfz9idNv5Pi6GRs@^PBIg{)1e
zk-dqWG$Gl2)F;%)?xSw*RBLkER*QC~HMLodKQp$gHMy6nMcdPw+Ofu;ZACrRWl!~}
z)w=2_^=yCa$DbL!*JbbZsHeK>J@rgB6g^ArqWQ9w?aKXh&5|^amVULhrZN=k9Sf4I
ztR*BPi_3aaUKZMr*YbAc)voIYq1u<%<g}=rc}-$m?afvHnJob=d$_5WlOXY~nyO3m
z72f98)IPr!ZS-rX1?TVeQBcx&2-1?7Ix%xKkwS;$q)`;#|JSrD4FaY}MgU1bw!e;V
zOVW^pvQALqlj$%rrc5BG87L%N2(Cc}8d~bsFb<^56$$cM*3+%(s)5Sm6&|soZBLEZ
zQzQ1&h&?&td;>?`fF;)zQafCxmSXy$1}ef%zGewy;a@l=IeR8TSY8)0l|-TV@6y(t
zni_sBvhU=PrGbGIS$=wok}@Siu$uFEQCv4s%w%&P1b;3<n`~N(8`y9~c=sQgkx=1N
zS-xMoLwDf*#{n%c{Y+n?9LOiR#&BIlEUB^+4B=V*$NL%&a*XBa@)J@iPO_{=(o}^_
zmsaZkJ9fs-VV^FC<6QBQbNB%r4*4#g``!Ni&Gb^gPy=fIi9_Eu5n#nM5cAEhe0AwN
zKwgP=QX@h11p~X^cLKs(bXyt6re#Y~tjPOs6?sxH=TQOllF}v$92k@rW{a1Ei~-en
zfEeUgKD6|b@~iUE=`d-eFL-G2ar&{vxsDwG&}8&H<@jPNI$gjfwxczg(i4(%d4<>O
z6Aj&|oNwyl5VuzRC}Ck|2?qC5d6LwV*g3*XY$(-4{*i;pHnT0ty0P*=nv>&fQqSGQ
z<A&TR&pm8N_EO}yA(y0QIBdugBr#KR3cG1Ta=OTZ4;iMDP5u64GvO@a6Om250NG4A
zg(yHa^;yZL$SK6kWK*9`HlObFAsR7Tkl$)6Ak<P~gS@j06LU1gc?`^o6y*f1u3{Ao
zbK<*g9k1jX!;pZSZaLJ9?69-4g4}rMw4ETZeYby0{hh~So!}!>2+P2}-B9%G0R2Ox
zgqTQtSw|z;)utYI%zS83O#R(^qn;Jhd|7ujkJ)H9l+>cR0aIU2pJP~3d5}}p>_b^I
zdRz+FU6j5zmHAFU+C@%49;o*o$1{^!obQ-r(o&i4oMcjq^Br{D`?zBMbvp3`BY*MU
zd*ivt^NW;1vzXJ12&t3LE>ePVF(((P?(38H=acN;_uaHVoQ(XfPDZ}0;Udl%_5MB~
zq7&kHtC+>(9tSw{p5T~<l0}^3#M+zdG$+#rvq+2iCp8q$Y4^#sAZMPbd?A~EBqg^B
zj(4*9n>q`yv~9^4cj^q~t2)~I!Y5seo|23cu*^yMY{Xd1cv<LZGj>dsW6GRn%$#}#
znj+ko*R)~m%rY#{O^5VttHjlahlt%@{QlR!I`CQO;#a}=-_JIlCGLOKYW1~;`(F?D
zzrKn4U;jFQ_0s&gd33UW{QBVVK;QjJy_n(NSCq(HATyR>vvDG%L1$%!!ZW)d;2pa|
z5=QMJMaya1@RkBK!~70$1CU{uP}@)FNgTKXZSv=>MH3#AASiv58B$(0{&m=F93Q<n
zIXq~dzB)W>;Pu#(!(W<5>ay(7zZ-kUht20lze17oqkkWkRVUCv#n=qSQ&r(xOVoB{
z&~T0h45bs0D4V`61WF8p@8V$rsXyf_+-s1qExRfTuu+2wZLuS^EXTHRGQ9<kUH}0p
z3rvV0bfLUVNa6Sb<S)Hwd1H`f!%O&b0~H(>uPuwN?+t(yr4bP6%~iWxSPhpjFTP9~
zqv|)IJ}0jl58bW{^LlOHX3Q9C(#LLK+8$UFNIcy0Zg5Yj=M*4r5|C0MqKy-I$3_u&
zV}}{op0Yj%mTC_NDo>b=Cv@^0fdSRV;U0E|GtN6S^`r^}>-b?={|0q{l8otgxjm_B
zkl<<p>Bs8G__#YeO0kqrsM1}|f!Gw><zi~vtZ3re#a;D`Xmp36{3wVnjFRtPIpvIj
zvW80359;IE7W~;3_`4D>j7qAjv_Rc(<oae3q3Y*%_nnE&iRVOQiI-^#NOvfa?ObOk
zaA78lr6Mna^jyG~-|pRtmW_<e8;S<<DyIYRl`v>KHo^%Bno?0lb33L$^82@xaw%=;
zjxWeq8^B_yfsPC8Fcu71jt314?H)affa_y)`B*#B_=`<R$NG?yC4j)7(j4~QXrwAI
zMFAZQ^z17~COQb5fjtb}b^xyrnQ_EsLUvT4iUdL8UkfAPM;Y;=c+U|Yx0{N=;_!Eo
z@g!IlK?N--W~F6sg5_1!lo&}IWF1Q3z>{3Y(*+FD#SM9*kWVTqg5D2w@(Gko6G*}a
z!tyS?ae%GZKvgSrfJ6YO6Uo5&bBy8iu&ro)g(?d`AP;`x<CV5@T<Ag6u%rYOvkv*M
zQAdc>))=;N0iS;=rNNO2w8p-y2otX$6yg`W2|%e1c)VJHp^h+3LFjoHk{gNe3V$K`
zL2W^<Z~F+XMcun(p93-89rsD_E6U><AYVLMgNT)^V~3_G-kT&~qjxH4wYh3PmZGc%
zn2rgw6I?SpymanNQ?3Gq|D=rD(l)8g;WpDFrt$LHcJu~qk`b4TwV$8yGNNrmci5He
zG!GJG#!o}SZxE7*At%lX(jIpDR-ago+r78s0iKfgoR{J~9SQPm(FLVwi9HUajuWJs
zmndG2+sR2SqWRJ(4v_uIMKKgfP2&pK%3E7C<~uS5Wqct>m9nanZv{-%F@B`jk>WEF
zVO6BQtZ*}=en?PRQDFH_-$9n3$pjfpSw44AiCBaGqbme^lHvmJK}EY9A``(wTC5<X
zO3$aE=9sbXxiLcPJ*WIsR-#T>b8vBrgEqA@6FH|Qc1{&~EG}d!T!y-piM+H9B^FB{
z78SqtK`H<i@KhRez2Gdd$io~gV;@rt^(pfTo{=w{qDb4KvCm9ZpN}@=;O~jT8ObTa
z%v>ZDDW6h(Z1)}8SJ6^wT=7S3c{mb1ja65gzWk{aeLXL-6<p`&&l{j>&~CQvAjHUE
zyJ?lTB+Z*c&~w#UzUg}2#pD;HVF9HLJWTyGR7#qK?>gS_F*;0;#&jF)w}&2RG2FbN
zmP18Sl^M)8N3UQ9s$T4~p0K4da7d*iYg5>9Mh^c5y@wG_d`ga8+N%riI(V*Om!Wl@
z<g&Lk<i?Chfc1W27@Jo8RIG?va(30rIuJA#sGDb4iS5TQzS)TSjkX(%$g+YFb{bXI
zx{x|tJ<p_hm7^AD6%V7a@seh73o>HtlQyNyTlmygQLMN@V?P!SMj8R|y&7EGBZbfW
ztyOAla-)Q2D1*`O5vVvMwk*8cQVzkiM>5@sT=I>OprSXp`W|V)wEATjHg=_mAB-k5
z9MImcW$ut+HnZ3sUG|pU+eG~q5~p+H0)fFx*d)GTr)=bV@&WqT$Jrh>;<g>F;Yu}L
z5srF1RL)MzMsugh_D?g~m;0-%1{pbQmpZ>VJ3V>PJUTf(Iyq!6Xdd0l>^%6XmXh{5
zSkgnyesxINc(H#8hlV9RN-OgfNpTf+_aiAhFw@8<*e)s4)TztT9&i!S_jo@cH&${l
zMP~dl?$w+??cds*&&x0A3qFX1ZCErq=3X?*J8FUrclhr$-<C)18V%9x?T969Fj#x1
zPlwcO?ifX%h@??sbrs$M(}Z=}S1(sqlsDTgU#Zp`C-z&z^l+M`#|1z<hI5EMbi^mJ
zB#XI5v|Pc(-noI|jH+c(-%`j^1o%4^cp_@=kQ>PM`Q%TY?r~LgtLR_bw}EK6mzQ!5
z;`m;QwHZU*8;!a*3Uzf8v&NL2>M7#`Ij0{uNqFCh-JJxmAShd2*o)Q`Zc^emJSNaa
z_VmZQT+uyFqY62+JBD3CM{bjJmd(egh)~5{kYjKt0meA$W^sa;AJ4VoV`MTYKF(U=
zf=Egpm|8=O(Sa_b`L!t#O#gPV6rBhg?9SjXB_kwOi{&nsPy)*ECr^9HC9Lp?jW>z&
z+>JNA&D?m?8`^kl&1!450|KEkne#(TVQ4dhWrfVfZHL@mgMfop2A+@hKBw(=U1+#(
z$<A@XdbpV+1;TBQ(fFVZg&Kd!epo{o;?aiK#PVhN<6|tr5QWu9*@wAj(*agLsi#Ey
zRgL|Zeuqww`&`wH-%&mzJm(s0aiVM_veiA*qKKAE&4@T^%L%U?XQ&k!Jhga7%S$~8
zy?3IFJ8gH^?vGi8jb2r%)C1WAeIRulDPa$VQlR9wREcnRLp{K*#j8kchcRnx2WMDo
za=_`M%@cJ*fM0tc3Hn#4{?V4K(@}OloNZ+Luw<%2aF7mLwC#4!S>d4VeNe1K-@XM{
zmC>G+OhdI_G8In5C&P^2LeGk=IbB|=qZ0ekHY#>@0b+TD4~BNNh7FuLdc3nE>XB@2
zC2aIsTQ;;znlPnkRtJuM>2z`n62DS~BS#Lc<VRh}(isN{ygA^h@EucX7zLQPct<ry
zAC)Y57Q@ndge64zz(rOo8`Of5-4?@MQ(9q-jYO4ppk<8k%#)k>3YyJV{4#4GeWVx!
z5(J@4uM9Q8sqGB|gZ&zN9Bcrmt|I<7ouF<RBL^vj{#)5ve-0f<)(<r&QVc**j99Rx
zN$+uXCeTuHu5%|gB7urnji&h%%<$i6Zd2V*2MBB7PU;IFApNL*KU%>SlJ{s$U4qVA
zRXxPJxi7ro4A4qLtKYCFGafvty|eU=l@)WeNlJA{4P9#RXrosITZ&nQTFNA3*b`*Q
z+&gti*SC~o$XDb4Mc+7j`f9@m;6I!@{cfZ49#1dm<+%sq*N?e}P4@r#v0h)RC;Y#D
ze7e5=;Q#dv`hV5x;=sNF&i%9P4?wBg?)2!(KQ9Ni+pmIFuAIQw0SX~QeIf9=;Wez_
zEpaP7v3w_T*}}6>Sk0@Rk2MD#tfE_dM!o=PSFsl-uf?&0dY5?N3_<PgOP4XMt-QwU
zfRnffJw92&iq8?0a|w!gjyp0kv201t9;^?<PwMKr>P<F?8w^o~%^D2ACj*T)tdN-*
zvFWRN3UQ4-?XE&nb*)8>IzC)utk}ZL!_{N0Lm$|}{KM6=ZTR77KM7B{Pr%@=-F_dB
zxB1-L0wEWQUyd$*K7D;58YjPDTfa2U&Kf5dzirWhJZSU6xss+L^fF-YDc`q;A$bD6
zJUrX~8Ok^Ij*gEmenYsQA6=Xro}Xi5&ri=pL%eF7T^#MdK5m?eSFg`rot__7#ku25
zrY4PEcFM!nCd>h@A=-X+oDh~mpfH1;htkAHM;EXIa)2m0317xyG(F6!mPZt{2SXI<
z2_hfLAx5zYVDQ`0{qg$xx_UVrllx<zD+jB)sQ~zmvsaD%f5ErIv&Qk!zYh=2509TW
z4*uu#=;VS*oSz=QIXp|n#xGtEI;4##fW{K*0Abmk!L2guFilw)^n)|U?#nAq%D`Zg
zC~RK)_Gl#I-K5%Kg9VF6^L{5iP9#!PeUfEh&kfFB9q7JJUy%oWmJ3^ETV}sI=d=Ra
zuH8RAzSxe+@5cJtp#bN*CHDScIWMUM58AUue#}1v_JFBtqy`+Dn^D&W)LYu}#m-^o
zRb^TqSW+D*;KR2gqqaH@&F&aW6uH1^mW~Bbii3tk5OXj?_~PK8rn*cYEMnpR<mvIY
ztp*0l-Vf?4dNM$hWv6;swXjUL?*Ys6d~QKks#w~@X*#x1WhK0GdML>=XLPl?Vem2$
zb}}AZ_;&0j9}PDm{3JFm6b4uoM;M$)Q$$(Hb}E)F#;bBA3r)zQK%pM0=E$)MHZi_y
z%>54$I8{|0v=V1{35C^(6W4dy+=MDTp=Tt9E#FXOC*UHoa>813hfM&ii7&U!wPtXH
z%}oLhpCo*h4RjqiU3=UQBMX-2$I(EZd&wfC40#x;AYdoL$RBZxd>=s_qP@%GW`GlQ
z(Q&@x;FM!xOftM(xwi6wi(wy!UvR0H>9A*044Np4+B&r?{f=jw9W>qGNV^Gk%EVdB
z@;C`>f<66^d<(O*@4;)@My|!4^H~`V2wu<u*<Aw3`w`~0BB#I0XlyYO1+uuL<4@u-
zH%xNAgSKGusI!NZa!S57XN&zle|LxPKv6(^t=Nn(-Biy5acXw9D$H?{PQz`{l{|v9
zQx$u+iV7^AWRxuenzV<PWPqmS&OU%0)`U<+L~p?!o|ATTo9faT>x5E&C$JrM5KN3?
zFLNB~azQo<(c81tcLwR!;%kkldpO2O7fCjeKnq}9BBQED@>05^_CeL*-MP82`OF#5
z3>jonLZOV~`<`U6%!SfGLeD`naA6xF&Y>!3KCtret_OaV>h7cCmyay*=$t-}8;_{@
zN9Xv3)FcI@BpdWM7&tbzK!X9DhWR$k-6dYH9@v+#GEiRug-YGI)(<V+o+>Z*R&JX*
z);08F6PeZI4gwF2b&EV>NG)=dENyVC<cBB=!1I8SfsDk8cn-24(rHzL<}|kZK<|OP
zr2|Okwv^)D#cf0y|8k2Eh<zsEANG0b4^4_F%k@yf@&8~D)#*?=l*QT%L818t_YLp`
zC>DKz&1o=U4!x@8$^i13#RJvj!+8u~ZrVLM#sDSU<?TyCc$w@P50C=QYWZj&k{V0u
zv>^8Ah`kUP>+&W*BwZ@pQ?xKr<B*F$u2>LsjT9$?hkI38g|XE}<}t~8Ky6US)brx}
zIR@sOIWKbxk<{=-BLgR5udsuK5Yi1M&T-_$RY}RwLf@hFJgsqD4H5@C8Dlr62kC5Q
zt-8jp5NYw4B_AgX$SD#sS48H>$dVyo%7E-WD}|~EJ(e0anvDCExTn%25j#Pwe&cfU
zLDD@)x(7-3S;X2WlwkK1VGq*k6G^LCMO9K(P1yk^cye+>$QN>nFmI&2kS&wyAS6e&
zfp8<~mM`35`*X5Y$SYq`prxDK%WM+&XE}p=5LFMN>c3J{X%|X?ch}@~9ohx8IbV2E
ztZBDu^P1=+t5NPAOL7cm5>;WyE6G#2*Qi6r@mSuC1;BxCUD1pV@P=@=pe&Ol8tQwd
zNf`+hy+}}!7X){$x<n(bv=sw2OEIL|nAd2yl&>HxVB_}*W$L|zE6SoALG<?&J`aN9
zL2x_>j-rAiOISR}g&E|+Hz5L2XUEaCj!A4kn;sidRQe`W8oOg=?tFmC6PxTOE-aux
zYAa2iCN1>-*YwbGfNR7KwfFgq_HFlni4XLtkD}#KuLOv%;27;gqdsy~k1n+6&A)M$
zBqd7J^u!ZnT0`7Sil~Rc>ZIZpdOWW3;key*JHP`f^qLn9#UmLy=;ERl3IHx|7-Ez#
zwEzR8+6bre*h`;|1(CSrc9hWqM$y1eD_Vl`l{vEb-9A3JPPOxfK3!uS$Hh4J^+xQ7
z11>AM>GQl3JUIv}6DoCj4%3}#eUIBWsN(R(M*o6M@m4wGWH0-esi4{(jkx+L-T8ys
zP^2|W3w&!slXYNcFjeGACX<P758V#!0ZL-|Wp^pT+7cI~Vplg3HW9J3BpwyyeTZAW
zK%=WyA7zEMN?KN^lk8>7p)bs(8+w-xsXRJBjj39w{#2AsHUnju`_p`Dx{+V8tz51Z
z_w+E-v?QGQOpBo?#Iur>kVW{;$e|<;b{iNIU1MU{T;I^f2B1v1<U1AKN>LhkBD(2f
zqe3yfP5Hfe*E-;nL{Ab3ZVcN9$lV%?1LgaUO^O15<iy-VW?W&6y?ZP$ZXj=csTE?$
zHMIxI<mTo)Yp<qPovil-7M-~W2#V}uFGIs)rhDbAkkg_Y!YqO~@#A;)CoGH^+nBZr
zP;E=<6m~`jMr*}aP6Dd@b|YyQ6Jfc<S!a>WOC44>QH?9u+|)QQHS8?$w*#UH#NQtu
zS88|kD1N>mm#{G+TlV?K$J%QBj_b?sQ7dyTM0L0G{7*leZzaZBNH0b8`Uq|7frF)_
z&7Q-;0kx?cv(F_OgXT-GGwwU$QR2Da=a2NNohB=V3Eb?f_U|ScQ5Dndq1wK76~3!>
zaNydP-q7v?7M4}{s?-L{m8rbM<>{M)gQ%K+4^CpLxN|M!$dgr_7G|MNE@)cDE6H`N
zoK0m@J;xrY3tuv{0JW|(5wl$)mIToQwv7F?8wMI8Ds9R}`6BN6Ix{aZmiqlzirG*;
zXOS!=dmx{k)5Q4Pe<8y;(j;_W{&VKw9lrphsSsSi39eiWtjQpA1R31^2IDqHUg?he
znM?>>&bb8ooM%_23p-Ho#8ZdB6rL0N#Zg3F*!xlSqhZ&BIq7;|RvknLH2Tm$-K0Rk
zC&qa;HTfo`x<+l7hu>QnEg1*kdG(_rTO5_c3xVF8&SsItW9;nmRq%xBo1BRV6Lei9
zfp+70e2r`wgH0eJ@xmA(-Iks6qD%bhOsOC##GLx$&B>OQh%6S?TxoN2;-OfkCDSlg
zX4^C^(@dE(Npg}mVe};S%4^n|it0AMcOV^f=`}+}a=xkb-#dR<hnk;mQezykQEhwu
zfTN%{s%VCg!82l|_KZ?{6H4(LEVU;7`b#tlRV%AgiKYK1$}reBPWz=Dd=P)(l|%TA
zKPud4%m5F}JE|(8iWSVO6a7rqdE_E8DbzX%9_^_4h+}vWMY+Kq1uVn32C-pPa9lBb
ztW{)?9I~6X0o7>wp>SlnX*BwTd<n<()tM|(Ggai;4UYA-CX<aS_0lHi)z#IK3bx=;
zD8W)G3VBJ97Zk1)Ft72hk3`R=Jh|j!OLdk>pL>UoRP*f~P|0<#-TQqVgArl@y<VZg
zbB=c;)$pL_c0-imE+4%;!zv%ex+OL&@l@PdBrG|%M6yuL5_O8rK;0T7%15y<ys8{v
zSCk{tuJWZV!20eEGyqfS26lhcvyCzsKQ8A@?Oi4E7ZV~@2V*aK<N%y#Yb#y4mQ%X+
zE=qL;=sFrE(za5(D7(+csMpS8*d|<h(uznbGUtNOHuk_pwFi$8<mm^uOPA>JY}hxB
zl3k3z4)jGIT{{SMv^c2YR-LMaxO{y6@-gK{ku%w`l5pVl!2T^xn|OTu@-Z9w<@u{_
zPzU#mO-?Xw0W$&-QK$8XM5bc>eKeLJ(H9{+hUl$iky@fi?VXu43jf1MFY)<|SPD55
zvKCKcr8xhv%|XN@d~>8)ck6^gN3!_-QG6;f<?UlG7wBADB<b`S4PC7p*td8P>fEpa
zxPKe95kzpX^8!*jpJJiGi9usc|0<9H+>;nr%LBGHT@&@~CStylgc*wgF}QsWXs=Xo
zSX}a;o9|H49qNoZzLSjP(a|G$63&NY8JD?$kGxQYj6<UsV`%{g{Jl<$<%syo1p3)w
z`axFveR<|ZP8|As&B3AI_bi8X@JWJ&i16`?$BJQ|c-Iu%7y0!7aY8l_44nrPG!7h-
z3)5p@yt*N3siFe(eX^mM3=BgU#8)paTjI+r_OP=edCGvovQiQPJD_NFx8w~GwPJdw
zWO!361BKdDC#h26Vtvb?nG3lhRy3>0_WoH-yZxAG9y(V_yv6k(HD?Wu1E`X3Aa9hy
z6;EJ8Po8S~PT+RNwicvP+$_^`=waww%V)n2-)VZ#03X}K_J2Gh1|Vg-2w?vabZ-gB
zZ82=aC$yw!DU`7&=%ALu1=*pBUXC?@MGkpduE{3{d5ehw(jOADVT*yZp+y*KUr}UI
zPGP9h@!gc`%g_mk*|iD?VyzS4v|M}s2c(ywxS`t0X^PxBB9sA%=%ub4arBPep(x)m
zR9+>}xD0K-Q~8nl9eS<6_(g(1la_1<;Y-ZP`tmSp3g2U%LueoSc=8i|Vi+gwCWZ-A
z96OM>XQko)v-hUiZ5&I3X#Z`jA}#mFTObP{0ZOvX>3$GJNw7Ix60{}RzBomoNS266
zRR9Oe`cCiRt)lnv&Urt2CvO993onKoYXCrs^7VB~$0mTP%*f2h%!tUy7*Y-N+y)rv
zuUe>DWO;?lz=NM_%zmU8LzJUNZPp~!yGHA@v08k}rCJM07-m|R{F@BX0ZSBwxoB(v
z6M58gxVi#_nifAj>V#1b=XF^ZfPwopnNHD`uy9ANtNG^rUn9OhhV>T2!2hlc<*sK9
z!eQa8($h3CF`{KxbSHH3h(o3hurT_Q0j+Wz7%XQDGjlEPJ!6z?F@n<AUc7+j!8jH!
znuTm+hs^gdA-cMLRdxmgflz&=iIH3s_Q#5~(E<zt-5vH-DF&(tf#vbf-d%Vl%bweE
zyYXZ(fsnV&N`=m?Iy6PX^HJ}eM5jn%Rq`&WCyOToM~p`;iIU@$^a~$INoy$m3Jzgw
z_!#fF5A-)x;aL?qvxgz=_)v&?J`PO2ysQdwTDRe~SC=+UXT9MtW{S}po_kboNiC;k
z__rsPH@OXdf-Az4N|<O9qnU5WpXFz$5)R{I8Iw6$TH@}?5a_}yh?@b6PDitj-~fq{
z+oAfHLZpuk43am;RhpZl@C=9QbdC6qee|aAC(MYMKLJ{i=c*rdo0DentV3}hR@J%v
znN*AFXp#o-Y8sZ5)94(NCj(nx8>w8VTC7dikxXR`cs^TI4k+ORh3upKptJRH>nb4=
zbisjQ{lKo~jrxOgVzv}O4PY^o<QU;UvwE)6+zJ&mm=}jqCphu$%|9C&;3GUc!<miz
zs4Oa*JPNl3=ZB3$3y{;PkJfh!t=TaajuQ4wLsTS67sa#nRJFVGLY`aXs{G1Ymn84W
zc_49BzpH27L5ot&4HAS@rI+n!r5%i9wXq={F#$abkT4;oW=8)A-%&vfWlQ9zJJjpf
zgfVeL6=@Z)6T&13CZn#m5Os$>iPXMW82EI2!*9q&7RJM#&!o~9_p#Arr42GVf#5)p
z@FlCmPv9Ot2{c{)==t|=gJl3{1JP@;guoU-jmxH}5dDxpYK(HVij-&ptO-^OOE62^
zKNJ?MIGCy=PExvLO_sDMiE0jKp4P-yn<iz6y@oBg^$g&;(}ncbx5;9yoU`}y^#2%{
zX@r>+>nLABi?fPi`htP8FH$@5POUwI&3N|~Z3GNvVCkbh4H@)&C`~7#Daw6th=inw
z4$m=eIqb$ghHZ$TgYdi_?$Wh)13y|H3%Q0S5eJ7GuD09OgRA~Y4LA^#(WrNOaOfQz
z`l#|Ap#0`zZ2O$P+Pu)2fO^3MIJ<H_tTLX&J+WF!<L()o3^YnPNSl9xfx463us2_S
za)pPiY~mtJ_1J4veo2^JJrf>(1yL6x2%_A;htp@~NwaK&z@r83!f3#GD&jEc-nwo9
zbeQboaSRKcY1)~GB8S)^oz0O;K0g{WMCl$)&QF?Bv**Mr*Vw54QJiKvH<%w0ciTx0
zzUOb`uJoT|FI0MBa#6%A@X_zSE9*-JK91=T^Yt~f<Cq|<+rq!9v6`+i>4!0@*_uf*
z$*F>=bKDwDzK&|53tP7QSDUIrfr9&Fa5)%W50vUbC3?V<(pPMs;3&#r>pmZn&PiHD
zxwcy|p4|UlvUVq)R%0|T*|cxla-3R7x0&-?CpQz18cIq0sL|^^@=0O&==c$7|1O4P
z+bNT3II1r%qo7BM!UL8%UD;XEG+zv_`-5AlYi>MZjRN0<oXMB0hJ_v^&ZU;?9Hvpk
z$tzUmS*P_hrWTX1hn^rtr@?wK)q@w7T1{JZD*%@H4vNfhmL?&(X5|*Ti*z26bdGc9
z%VA?c)*fZCvzqD#;{Z*zlze@E5n5hA&v6tWiT>UV&2<u(Ceh2iTd6%Rn}8JsJHB{B
zPT9j2VY3wjI2T!ltPHT^oU3EAk4JoqHPHc^bVq!=hPPVNk}2~J%e>ndz#q4SZq9p$
zC+$sf^>eg1SbFy>Dvy*x#gTAa9SR;#?W+}ev+@7};wT=X6BP-F0~)-qQNwH0TySM$
zh~S-UJP9-6d&*h}^KvO&>w0dy5n+lv{&l^So@sgHcJhREVN;F6pMRCROJ9WO=U40Y
zFNU6a1Pj=CLTfqmq~|=L%|u;H+qEfVl=AFs@Gd_ZocAo3RtzG|F5}|S_9ISelhE<L
z?Xjj7&-g@L7kPWr(}8+M*NMKNf;4#?nRb!(z5Fh>KbKJ?K)$<@&v|DXExd2(ik%8_
zyIa3{3WTK$;L4H$WXBkqF*2p(0X2FGmZF%lnXc1?-*lgU(<LMoH2i1^#tgP>sM9cJ
zuw_O!6=N>8()deZOjZoz7_mjN!sK+;Lg{|$Lg{iNJZ|+Pcd4kC^RE?ELoOEk=bcwe
zcmCC)+UdHzaMp`@_z4SUF3ee2n|IB)Sj!-C>Z-{PnO=H`i0Nd$#%UN;QZ(Miq=>xZ
z`m<M{WrM`HbZ&^#tI)djoO>OnE;xPnIx-iZxC+U%mjI|7rHxi|yWKcG-Z^S{U*&}0
zZYJ9tx!V`}Cwn_bP`dTzaHqX@a@=Y^+i4%|G<M4-FHRPhp~mI(C9Ve0O%Kpn<Thq`
zq3g^&cUtLU{yDN{$DZivE8d-*oGB|_^ZIP{@Kc1Q%{rpwM!~MwAbKtd{nlsOBlEKJ
zdoc9*S$b+~P;D)!n0ttUv~ZGQq<!>IcL}o(5T;3D2M@4iTp3~xDdN6+<?e#SJw=H*
zg$Zk}n1wQ)2t%4>`r<d7mfw8g93<~Im-?VkFsIC{=HFw?{uF2Ye7kIZS?E5}&|T!A
zysg&#x7lhb0c<W2?7qA0UQ*b7<*<1rF>B7IZ8<ePfB2RwJ$&k$l1_aiomX-ekh+F)
zj71hu=a6>75DN{)=fJnr?!K*9m)e<}W|T4`+l+*WAD5P>5_XdEx<iXT1uqsV=1{?)
zuAq>jJuJdXS_^I6IEsegJ4YcrG`ZN_{b$S3tdimsy&*WL4!jn8tq~3EVt2Z&VRMKR
zuY8}YqC2$&?4vK8;|`<26**nb3BV|hTNlPZR1Ur&q#y3x$KSYj7vv1cF5J5VDYV=T
zC-<D|-MQylRxXpi?BVveS^UTMTF1&olRJqZiBw<SaYYw`nSt6F&EI927lI;;|7)nA
zOlI~+lEEUM70%<w7JjFpvUsgl$>I2OEXn(Jq##kP()-Mn#n#E+SDdu(zkT`lFZ_4c
z{J)c47&q{aAz9wu3oa?=aZ(-MjAx|AEzbYDvQmHYG?oAN@#;7AFZqA}h<`7dyF1>?
z-GgVmecRRqw~vpukHw?K>cyjSsnpy*ZZ&pyJ*;pHwB>(%$ghAF8P9i)Th0AOt9h^w
z<zyD?wuI1r-aPs^dwGfNgT2Fp{hj@mcn)L=&GE}$XvLpt|6AXkbl-Q}|BqLmJY7xi
z|9b7|m;L`&%>VsFw1S>^6?B8H*cb=B_C@zUFNWiE_HXLb@o*A@T(ONd3NO&@3SFHh
z9FbK<k90Cz?+t&8dc6Sar*auzRYkqJLg|a${KtETyA+@X4S}rG(+EOF=#jYbDG@UU
zL>`_}i0$Ek9MWVC<UTh|IgkP7GPJgw$$-3bvb%HC*nbHGbbGgPeB6G8BRzP2um-2#
z8{<;+C{w)s>XDEEw?GR2dn!Y67H%fxF^PlAi&^3O0DYq@iyf3D>DkfnI@zReYiszu
zF~++Mz(gzRhj|sYhdo^ZzdIGSNVFxHJ&fAngd%to&j8spIv=#r3TvH{?@)9iPCaaq
z7;15m|6-7=^LRKJ*#%^jDYu*saukP2I2e=r*iZ&0f=LX-DolGH)<}e5lccw4fJS4O
z`<$Q)?r7Bk%x?@R1e}C2!Wvf9uk;fJBZgPx4vtUGq8l`$y+MC-o=}~7o!}qeCNN<#
zwrYS)6eJ;o4#|&K=r9Dmxfqg>S{%X1I?gyqdzY-q@}k1wD17cPyNF_sM8b{B?qYQC
z(M15S$m^A1@iPMnedcEaIt|AR$75ghYl@OO4y=st93cY;^Lbb{tzpHxHg+EL`?Fdb
zUk|x0!0;*5w3?*q?ur_q2QX*Aq9rQ1B+!H2Z4t`J<x*zemeFHz5J~7xG)@}E!48rL
zVBr{mB8HL%0f_KMLFZwn*5ub;e=UyGvWwM{=bseKoh+devX+9fPTeL@GQbQ4D}(dq
zRoL~gcRxj!(J1Uj$OZ<N@VYm=F0-#Q9vyGZGS{KvWwV?`4@U_r+C~N^+~c^p6rus+
zVQ^l)RG%powwS<4_|n=>DhGI3t^5{(!We_TxRoTDM>=L-O-}i)>r?V<Vjg|fLysfn
zk=JXw87ra!d2(ZxhOT&|$|a9zeAGlYL`qi9BNTESOp=KT!_mbVwRRzOc@V+i#l@r_
zWF`>Eh-%Xpp$s`fD~=E40cWUqdD5&JpyyHl6sa1o78-gyJROdsPGSm$XJ@c1vEgvg
z86p-u3@n14!gvFi(8?~`9iYWH2**fCaJ7tw7@@`;_p9M`NE#Uj0TkpI)(|N?gII<)
zcs~hHtw6&xN#6A~QT~q{V`nlL1mj5z!-9er7FR@1_wfbgYowqawBa~(0AW<Gm!Z40
zIGKjr&A9d>5I0~C-ikhyL&IQRM>2Be1^kN8#qKsWMXI|}GT~V)D9b(=S5XpHa7zX*
z?i#(G2|Y3hIqW=oloOW;&O$XdlE+XpWwy4)8YhX@0~^Cx9ae7~>Ol247KM{Ww@Wz0
zCFP3*QA6Z34tqo??Rb6wvWvn`#A<7l8^^#xGqmVycj}SfbK3G09dZfLPpl57{Y#+)
zurGSy0UlghV&ZR8K?<~_sKEIIm?Z3tP|wUonAfMAo-10ddTqMN9pn@qsZtyo%3A5V
z12U<O<Tq4W{9HL~`eNy@`Kz8XAaxi&k@{~_Fd7;=vFAu69FPbnI!Hp_Y5^X#+mnHm
zEZS}1^+^)PKsG2*n5;X{72J8}F~&icK~R_Ki-%23WHA(|C2Kj+&M+A0#`7=+`qSN7
zC{L}S_>*177puCOJqaq>&h$x0mAcy`5UR;R!9k{a{cJewg@F}Gj9ARyVj1HWW0|SA
znFwh_!R?)h-VzYIUnp{45pBGgYoqWr#1oEb5oDZe1pV`h&ol6=sYB}9sg?AS${_P=
zXRq2A{aUk83WtjW8qjDclto@Gq%8JuhkE@cxoL7GBS*R;<7Z&<9($d<fK{$mhF8w%
zd^{Ocs0&5CKT-u$A5}gZt;D$dO6EIg`|my1b|D~3aN9QgpU6Ivb=bLCNzE##%OGlP
z)maGg?wFpk5f9FoG%nyUSDNI|8Hhrf;F;fm_|W+H;n~Lz+aEu0g^#)h%_T+owTi!f
z^A?7he){&!+cTHAVrC~^2x@;IoB6PG<1gL%OTT@T^i2yaFpF{-rU$L}>lv*@;*x{#
zJTO2jH-6>Tul)9rf@D#;FpOn_F{X*3c8J<hZFIAeaCPo4;-Pq0>ax76893|^_^BGr
zir_Mmb7tanU}sxzCL=c&g>JTssw-K$EyK4l!W&7j8Oi=k(X8uWKzft&5Q9gd_}vWy
zJo11%jz<q0_jF3c)xn}|>ZIZhdXHcYXk)X8O(1PnwMKf9WNmpl98|B>p@Os}_={DE
z?I7o%KPEaN+A7xNg<8TiVZMxkN5?o6grr(0@5pXI@d(q$Y>u;~<o(#F4D{jB5`iUX
zyw)`)W?HGzR#%TKd&w422a-{1h&3@{U6n&VtP&d<nu3EwMOy)J?KKs=YjH7+BNr#!
zXG(4^7ra6(f26#G6ukQ*Dz8C?!yi`USX0EkUS9QAx8$SMb?DMp8|>8geuA!UxFy|V
z+YOI9({LJUZA<(}Zbk)AE!~icE0hoTANl~JY=Nyucy*i)-#Yi?!=(k#0AjAe>L<le
zX`U;jsBxiMQ|janprUeoa;>le#v}bs*<e?LlDE_ndvIWZa@Z?=;FT>WIOdJA<bs%B
zIDh`Jwsrz1H;<RW+L{K^I8D%t+47K+u1>0CGoi8vBjO^V_`9e~U>{A=PV;vda00vm
zn52g~ra1=u42pf~comE}hKofXa<p(<0Ji3lc59B&zQl5$I$RB8j`332DFbH02LcxC
zA4U08XZw<kNb{yFc~prZ?F9D7^pR)j81QM@WlMW7{ErsAZ3TxVV-rT+;eg&RBUYFC
zn643*8GYd?C}@ar%Ugdz)pj2;AorP^)-}teQuaZD;7YRChn)2@-;h)&wAGu{3;~9r
zNh!gwU|WIOFo!giN;Kzm3?~@{#L&jf6^0Vn3o)1{+>1U8ot-B!u->}(Y-sSU!K>6E
z?6PXS<4L<405@f|Gl#Z(KM505z7R(8kqN?G>ZFNOAI_Fm09zBOlY!^sTc0<rhWK6w
zQ#1H?fTRzh1@q5RSf2uo<l&6`^EoPErj{D)5jI6k08zjL<_ij|PdFwXl0A@Ukl)4J
zCXn!&7U<^UVH+SyXIkLviu*Njza5ey<oR30EMc<B&d$QL0*J$D?_Wwflnue77{YeY
z>)3r>WVLPI&+AD}<9Y2(;huNf_ImlkUWe}wLiBv43S^ivgQ|TPv}sE7EWePCx-m)*
z;v6{gm4gmM?<7$Q$1|O*hcvB3sPL%Qq1zG$b;kO~@azn4=21T!CQ;0>WIvU57b;b7
z%r*&!pX(1kn_p;m9__}}_Jw-?390wg&GP3Y;z%;bL)CF+KNuqAAt-y^iKwMCAu1lQ
z3}!}(P<cwqlannGJ$2*>)Tw}<TM3)wK}3Aa>qvt%c9rWGk2smExS!a?Z0B;jRfHfN
zc-iOzsFKx!R2p$G`@*qf0*Lz$e~u0;=pGccJ=YlJ@xNe@h~z0_tUbUOr*>dL^`NNj
zxyHzc{(>RW3Ys#;`U8w{S_c+%4~p6@8l!umYizqXdbwxvVZc5@Y8tRsMBNpX9Tx9Z
z<i4u##;VAjl_t`*Jf*WB>YQzE6evgb3KLuHkyO<nL4c_k%|%KvV%54U9`uz+AtGe=
zKOGM$jfAKaqC$>GW9-3%@Xvep7GAq8&AXfn(RRE1fBLa}{0B&O<IgVIjX$q!mnYr1
zvee-$VeD{*Fm{;fx+zd3NJ=fQy@#EP_^HaXESby3NNMjiT1U;-?c>9p?dA)3eB6Ay
z;~BZsH&JXYfzi3hUeCyp`noM8P;OX}>T=MTgTXwcsoStE4<@sp)lr9^OK<RnQhoMA
z=oeabuXE_X4W*i{Dk>62Y(N|LCLCK{{z0}_#IXjmKs3SJ7|ZdvSI7(EeQwBa_8)1g
zAhu-dLPkB5ED$XsiEfy?=ESbu=yZm0SK8r9UzY3Z>uNaZ^w2zHKpqk3nX`=cqRH}k
zIP8bx_;y)aKrdVNoXe(}){sWHY_}#;VwrFUNUB&G578<|Zr@JW>m|~F)mVd)X}oRz
zD9A+p?VHyklqNT9pKiCfJ&EB!GKOZX$jE3R=rlzn*>t^HvOyf%=^cm4fC-wl3dvK{
zEv9IAU%e$4dHP(p3RwqJm9{4s2ZOp-U-aHruf6i(`|58V=9+t7y`f(KpzI5;CMvLf
zJ%Em{ep3a@<$Ulp13rlcp0L1U&ng-QuFrfB_2M4h$R6J6ZkQgbesh`Ix&{7Yo*BY%
zIL3V7tinz6MV=wHMITLKG@=||3@N?2nmyu^Dz6_}H^E7Bsb(ue?xIaw&_|ffSr>;7
zFZIm2>&S{CGVp8qT&O5QgZ!$L<A?OUVjK@FTVhF0Jw|18MhEz@v;r&NZOR@D@3WNt
zpQc=#iAs0S3Zci+(I-k8qftD((N`n6M{=NYwk1(kNU@M5O`AlvtJBZX%of`~n8q6}
zv)Ez`lygR(t4Y`%nmo0Ru<CecSY6hKeYf2iJ+?%2$B@aBN<2SnNmRCEzjQ6uiuoC9
zoU$V^rfW$>lb<zfDPq>_#KH7%2s<eZ`ew~;Y<TXAMA>85hZAMOCoT9En3M6#EL%@7
zmy=bZs?@mEaf~c^Xe&?5a&Qvqu9xz>MBCDz^tg$i6rJ+<&I_;cdgmBAx82xn9MMDl
z<M80P^`iN@EPhd*nZ>S`uso=kGGB8LV8gbo3BUdt+upLv@}sZ6c6%^=JaQn%-pnz~
zM#k$mZ|A_pJaAq=44k)b9th4H{h1Hco3{@$v^Nhlv^THcJ`kKY4~3$)uODV;Zy#uA
zZy#tizMZu@N~LN0Q-!oGPc5Z`a+Z-x<-{LS)G{k)6x&{F5A-pI?VW04Ew-E_57UgL
zKdJ<h7&s{%{V2yb|1TB)LplU}Li~^V)74rw{>RfV@jt%A|M)A$|2Teg+}ha#GLR$9
z=y&<yJC+d(*O=$I8+Ljs^T{W>pdk60PG$8OX!|1UT%y63$_q!HX6PcW(y@<4<FM(F
zX0d@GLiQ(p8Pm0yDD@d;o`UCMORSU~0^K-G9O@empwtI>olU8@nCME{o8Md-;s$j=
z1_PV|z2bt;0gh(|2YNI45wis0^J$RrJ*64K7<||x0<^Zq?>9EZsxz#QJhUlNFLM+(
zF8%hw{&Rot_+?r1EDCWf&x#xg5SC5*>#w2E&s4np_u5*e_A56>GJYy-iVi^#$e1Ol
zTEII89aP~avb+%UoW$mh*zzNrypXY>b}h2vB(#)AgWSNX3u^+zn>Xla`?l(qCN&7F
zk-AYFc1%<j%dtG@4N(UwftS~`e%)ItllQK?NB1hO>!uZaf_<<lu3~tD9ZyKnsGXud
zEWP%Z-uO#z{mN^<^2V>c{pf&t;krel{sJ9pmw3iM;FFIWH{i_b5F-N%R_ub7e;O@9
z#yYQ0nbUwuzs9uWzVRb^&G6(opUb>~GIguWo18Ll1+HFG=B-sm?a_2xA6_O87T1SY
zW-2gGlCu<vxAentP~py;4}l{^r0XzFo<<WQ9U}FOnv6t%QIOJ(w{AO%VQ}mU6N!O6
zFfr;3JRO|dv3Q<MqFz^<@ye!sqY57(fM{G43`oaEaJqwl|1rU6Efh$=ap}hVThd=M
z)v9va{z*ExQ%nuud}DDM-v)#7eZwIQtguH?g`f5tE*DG`*}<`LB_N&t_+rR;(FcV9
zaW`k&W>F86Xpd*@Zk%?VWqa)|EBz@CoZ}Lh8Z^Zm!k%@3L+cPfrf)g>p95DYG22=P
z6vF_C1`;(Nb`z{Vy7}c^4C5}eMMI@44|YlBr|9g3g~|q++ePgY7A02R#nT~PIbWs2
z7vS(UsnO)qURhMptP83!icveZOtY4bg~XH?8yQLmuT;dG;|LANY==`7Vu_lX)iv#?
zjvm_m;Fd!6@cz?yKuIrR<t<{C)!`)nSaZ;3x%DDIhw(Ntfp#xE(+{kIK=e^!>hy-4
zOT1d;!se$lynPJ66RH`e&?=|VKyslt<jLbLe@)B;WYY6<#Gkqb)jV9y!Z8Yba9%TO
z4e8F`DALCOCHK=|jNSmaoj<(xU;A%93JT*zZB^~vb{mD*B+w?k@bL%Dq>CLh<!!lO
z-5%gr6rN4nZ+yk|2*{L$(w6L7S$}pd8$!1wbUk=K33__wh0x`tT_yS?sJjijGp|2p
zXhTzn@VZK@h+ESs#X(;1FFT5X*=TMoFujpv90TQ}k36HGaWl+!aCVj4C1ey>eNekg
zk_Bv*=S7*BnoZoSN)g3t_|JSer=4P?1pK`XzZ+%WswBL?KlM9SnGXC8%GJ*l(L#~{
zj%eWxbK^(2_$f~bZo$xWa0@>j&4S;*e)G0^pHK^bNFW4#Lt@!RTcVFXgUqNXH^x0`
z@)pdk=!QeUqmrr1VanU;u15aUcQC1wsg@11K@Q+B+C*7IMNA~pj%E(T5QZciq0H;+
zz+p`iv@TU`8NUw2Vj0!#d=i=%L5kaG-U0<r!#FW<3Q&+Wq{%%5NvT+pYQ6}fGo!}g
zriGOCEKoTy^FdYEM4;+>4osf1`9SI!L74i7JO?AvoD;6&w7+Skh^6DLda2I1)*&Jf
zcbcs0Yew5gJ`G*H%K@H?!z^^iB|Qz^tjsL@JbY%s%a3%lf=wW^dNE+uaPfCh6Ki5c
zHgFvVB#+nuHxsiOHTYCm00663!xX4G`eqSO*Y_TvnguZhtckOk4_AXf=wcFE&$Aa!
z`9D@M2eFrqi0~h)*xU*<S4%aQJ)MC#-|gDt*ufBIo3h3Ep-^Nrjzi0ES|H~<Bgq!K
zt!BCGEc~zFm40KdDvhq$4O=3#9K}a#Cp*Cotek{1)j?)aTS}^&ovHTo?%}IOyYaet
z+{XX*e{AeFpO>AWFrU<LX49&TzSH`g&1N^VA_0xPy@zRdo~2<e@EnA1H})w@<jy}%
z8oTY*L0co`6FX$@tHPz4orz3;46@2a`~aD=FTVH14<z=|Su(mp?_@YMp{QmhlhKhV
zQ<gD4GidnHkqUTMFJT$xT!gpO4h2+=2DX|-S`7l@uI5w-dK4<s7(9_F6CJVXG^lkh
z5y_fKbZXr3pgs{mASW<@x&q%s4R0!jjY8`84%L@0#S;Bb6bfjaD*Ho0**bixvKjiI
zHS{mTvG`9wouw3t1}p><Rs&2alc;ELAr8q3Xq9u3;Mi?RX@Szxq-?G*g15>{l*-}b
z7|^Y)shf#a%*(WZmj?dS=mRocpWN0IIgtVzEiXofIxjAlorTE3A&m*+diFVd%X!)v
zCaD-$N{^mKi5f{Na#nt{sk(_aRZqc>l}RYuTEcj}@BJU()P>)EfNy&H%l0~T9c71$
zH~pq8-t@mOm#YJqZet)6op~O!!G#6cnCz9eQZ9?%e+Rzy6|XE)V#}1U4ekJz?CzGZ
zV0^Hwt>pX4rYgYn>ab~9EPd49LQF}NQ+;W^>antSW!zO52Z+2%r=GFr!p7#PYSok&
zPikln8x7>(GZTWZVd11-$I6tJn@DPlJf+QYrNiI9r<v0~zDM(1Xlzl={r76_KbHRR
z{fY6>?z}rWM(O9?p8v{?+0+}eS$?=JmN5bRvu!lB5z?e5g^(2Kv36NS?t!`@RL%0V
z{rTbHTAtdlkqv&r`~G0lTk&iD3P8S=u}7oLm38rMv$ifSH|tUrc7}YF-3cOjZ^#E)
z)|^uG6$3ft^}td^OhMe)@oH#C1*e2`A{uMYVpK}(Ri$2BUbABWv;IUI+Cn3zp=~@L
z#KQ@|y3xbX5zv-R6=WnpZK{CU4*kvks|h%};Qv-Fd;k;={#BBiaat2%5C3dZanV@?
z(5qIh3c!c9I@DGtudNj*vx+tIy4t#!Bx^$SYfu@z4f=KHF_B%UvVcL(YIXId&TpXk
z&q2jCvHqc4(G>lMIjOj&E?L7hmD+Fy9rv;Z^xC@EQk_ZC?S!n*P0gx{*&E3uo#?wu
zxzpVt20>haXutb0W!$B?oXV!Vl+AD``|F_g-hOHS)(g-7jB8{n=1Ep^;xs4&eBbVu
zwZe=9gqlZrQLtfYk8+PN`H~Z0VNAXhWi9DMXTr<q5v6jg>~>U!Lji_s{ajBw95Fyw
zftU+VMjnyi_*Vh6l;$uOoQm=x;mM!t8JC-5%JDH)ye?7pekp%vtNrZyKuOE9af15G
zd~6vZsE*bQX+fx{8S?B^tw>B|8<8*W*#`Ueszwz4vaP5$Q=7@517beYY3TzauV~OF
zacwPSLs;Z;BCQ&=uZb_7`C|M29wpqZ4F>-8*8op_eP+IHZ>79U;4M6rrbtt|M|afa
z*=1hOEc4cXGe;SMGriawRcuC?H#5t;o;fru^JaFJ-l}3V%DkOfW;Q0@@{AN2*K4EM
zFUdSVzy1G<7p>(mYM-5ruPJ5Ubi@Do?EmZVHf8@`fBNLf7yJKzXZwFq-~h1lo!ATF
zOYv+NpNAZIppU-;5Y+UOK{)<v`2nQN`NaoeKT|>=h@;ZX|3&L3ywkw`3@R)`2l4p?
z13V;LBpKq#zOO8RCg;6d7PiS9n2g@oibmTY$LC}^ieO9faIFxWa60(Ykh6;c9>^RT
zKDkh>ij84A*t0ffciwBXb~JpL9|4PEFo4P98q$w7WQ7Hdr#tK<cr#D>iO1E|YF#`!
z#7bc-Ui5<V<Ple`R|Sz5RG0L^8$njp{Sll?PAOyz%!bqi(tF(=jRd5VNuNR{ni3nW
zEzt{ZDZV@2-oyb-dxDX9>I^UrgPtGtgY$f63II?-b0S>h_^kz!755QvEq&Hoi(-U~
z?1RxBl2@*ol#SNnmIsB_MM-L&3`A$dHgI`@a>m1#H(NrrSlFNq72A$OT52OG$psjA
zs@SM33j26AcPGiw&-77D?c~{<y*!%$cR+~0vzxuSyGaW@jnL_x?akTQUSVeomzWCE
zfL+E7B|fkRim8xWOd`9K^;hKlPn->g4wq?VxI3xx(<#X%s5gE3uwT#vICZS?D{uc~
zch|}(@LzVB4|ulWa%Bm%3un1_c*`Z^t97lE<vEo<NF{N0-SzA~!i;s3v7dSbf{Kr(
zOao}dRhgR}Y;JN_`4^l=%`n^DvvIrD^{~2+u&@cHd@l};erg;&_gy;pJN-I5+Bx1i
z`VsToAD*<VL$X3)coTMjsUUbyw(AQCKcfoIjy)P&{Y%IGtg-zA4qp6AMqe=V*2;dM
z!N>JZ1_7TqUWBQ~e76aA$M<RTU2@g!MF{BB;|E6oz??Se4hlqS+UuUU!?IuDkJ^2w
zzYZf`@dq_`=xX2pK?`4`mcgF^1#0_$P8=v_v03a+^6MOa=VUo7WO!5g-P)RAzYomt
z{%rj2GqSrsIk)?)%<j+4>mHib{hM*Rhh}tl;dB3DHuw3s+`kEv`;0tpehkdT;p}5+
z4x6)H&*F8xnTNaHXW@0dKO3{_eMWBA`;)V~-e=`^y+1d@>pe8b>;0Rtyxv3eyxzYN
z)9ZacuGjlFVSByL$oG18Vti9$6>X&$A>J>R<_p1Q-a5)%l{I;rf3Fwwo=z_eFtKgM
z`jiKhRAqrY!%^8<Wjw4XR(mGnGyw*u5<S&3J?p4D*3X>npXu3oIA+#Lo0+Zdh-${2
zautCa#W&7`Zz;mDh>z7o?tt(%<iSQGXDdz2GQ;`g0SOvW?tto*Zq6)C8%y1D$0AZw
zL!%MymYg~y+H~15)n^Y1YvzthnFrrtWXN@<ExEg`Im$MmM}x5Mz2q)}-kDx+=!8wH
zX6iW1%F5X~vsT+@8^|4(8Zm$eSZb+-mKo4J)>#R?PKRSkAi9>PM5BX>BC&Xr`#cy2
z8RrZl$o*i1;Q*dKZjXIclI!dRBV9%QRx~%CT&wD0!3<hWv`@yKa$op_V0(4Q2RPoG
zy(;Shzz+F6L%q#SEW4gA6!h5B8w#OSUg`o6@0Hu*Ekf%;?MYeuKBW-O1r;iz!Q_hF
zl*E^~3~#w59O1^ssvwARkQ*Cy`IGv+u~EaHs)J>WBVAi}oIYf3HNx=E0Q=VhHT*1e
ztF`#o#<}o3Pd+RURtTS~E9Kw4LG3;>Auuh%{PV#g(?=vLXnUCxpfT?y2aGH&FN&+E
zzeZuW&~OF!<7Q_x67V~@-mHsp0)K*N4c*PZ*uj0V|GSI*-({2}K#k@!0i0p~_sus?
z)A4^-SHJn@i~ZkUF#his^S?RF|7g^((?q1QI<Zq9_OQz?t{ksGZbNQ8AvB#K8moZ8
zWf0yFMai-(#jwqUz+|je@@Aw|ns!E)Cv*hb?u>5G6quqD7az~(JDLlA&-slv<l`;i
zjN|L5+m$W`b`d}uzJ~349v%hV=w@nxupeLuig-#xNhijT6VE1RScs&Kep_<)bz@^z
z(!zT`xD0jo@`cc1uV*XHfNq6?5Xtt`D+RwAhbX{!&LLOgdUO#DI_P<+gpt43*DCe$
zqQF)_hT&pmQ307mEK@JFo*$D*W7pBmHQJz8OG{DNQh=x){2ZYZWOy^u2YnBOz(s1j
zENn-0K1BzJVC=Geo>T#)k?M|B89)kJXFmR0BGV4m^|w|3iT{oNonNhTgbISSX$=7%
zj5Z(hkM2<8`uYMXnl}*tAx5^gEP)N(xe(sSRiROKug7a*gu!mt#c3P{m+R^Gb$-8^
z^Bxv>FzJo4z)~KBk0o1KH(DHR4Q!q5XmhZRNBX`FHiG%s(ET`&ND^Bjc^{8GT}?q8
zh~-T&G7H9Ri0WvPT(nPv&LxaCT|jLJtI)vmX9v-D1Wi5>_DV22M6E`&W<_gcesMZE
z(`g4~N;e>pr)Ry}Hjq7_TpeB`G&kO0i_e=M3)O;gXRDMtk<ik*9>$j*%{-08H)rJ=
zSV!svjrH+Sg!)L5{lyuQ_HO9QpV6Sr58Q1^OA6-M#^ulSRSV}>q;C~b*i02O9eV*C
zYoCVaPzU>1Pyi))E&vCG(PH0m0PJr$YTxeAUcaQa>rCPV9m>)Ad(D0M9N396dhRcd
zFb)p}g~7<Lw*ZO_x2^Ic7$OF<?Qo#nqz@p&7{Me4jxXFw96tv*;ut6+#sKbyov0t6
zyPnJts2DDsk|txTGR-LAZmH%o0(_o1Ic19@-^`Z1@t_dyI8{s+vbG*CH{mf}H!*|T
zSDOamM(Kg^2Z|89NVPZN)5N8?Q~OIx1R=j=`H3Yq)lyk^RI(f=R)$n^=c6=*vMIWS
zo3Kt8@YN>hmorp~O0RP1)%i+4=F*SnEB%B^Kbcut(^H#zq-y!2biI#;2c>BSBOtZm
z6sbs&2j1Q;QP7O?5*X#>MjyE3rHyC>7}p4b{<*(&`74G1F#Pj!Q8w_a8K)^KtHHO5
z7^ezcQk;(QoWp!P?=YRGsqrcZlC=N))S-PU!e2W&vxFBT$SjIVT`bicDMPkXfIrpF
ziowKEe+~qydNr$FeX+3SVm+sB-Kx7frS57@-6!tMneMEY^5)Qfp9Zx`SvL(HmaGnD
zOFvGPuFNj2#5U_`6}?`(@?zeE`l4bL#9JXKm^=05%wTyAa#6GT>-sdzWMAVt*t8M8
zWDOuwm?qFEUdbswV+*Z5#1_&EzQ68-Lr|qLqD_6VpOFQ|*EJvh)Yl8P;**Sg5sso&
z^$fq$UepU%E#JOblhDK_w6M8g6<aZ{ZPsU2>?RW{+#cA+ynQ5#)Mc5vwJyy3ZW@qF
z)7I>YU9yr_5_(}VU;BC8X3F(6y>&JNKB{c-@`kk{z2~b>%8RDUa`mHm43*|xQ&?KQ
zF6WI&EN;>q$wH>c;tMIC6z+t6-RZa*jOyHy)r#tZuM6oeTZLrBauz>JRy&mpSbr*v
zrKJ~^h%3dPO6Af%BIa$nClATD2%9TDebo4)&L6A%@p#?crjw%WR$I-H=hd!(`AF}f
z%x+FUx()o>Q}ysR01&xbRlYyXdtbLVGmUi9#yJg|o~$hO?T2;w&>}m868U)5ef(HH
zer%mY#@Cy+Mn-dYF~K-l6JYEx{}WNj+Ie1b=t7Oy1<w~D_~*$Kc3(H8>s-3NIIaAu
zDZk3)SErVLY|208@{gVJnF`<7Os7`FqjZ6lRPoBx;@{e(>o)K_k>tBnX_xi7^kxNj
zx;2RPX|KP{8~Brq)S?K7W0U5YHk2rF8))JIwLH^`dc1ghs!RBV(4+*Tp{0ST_X?J4
zD_71L(o>tv=C@m0u^Z-ZPQ%~X4a@iL7%C1hgA)a;;s8_cHL$`_$SqVL0zFMn@#7-d
z>S^9=*J|nMw)&Bavizw(EtstI(n#Sff96kT%zThL^Z9dMs~1jwC3p5uXUu+(Kl{0p
ze>!9GgZU=EzG+R@k~Ol_)UVp*Do(i+GhbQf&zk(K%g<H$`8Y?eUS3pMCQ@3QPR3*S
zDHC|0mWcE{oK~(+=_F#*w9(B6g+R}f5wt2GbMTfTgV8tSi4G;}reE~kpCkP%s}7v_
zm;U>j|NavMfw6sl-XF#h>H_T)?s#9}Sqgtru$owvmO+rwwo;=;Eo;m0v$*o*=EN)q
zF8sc>xgyr#V`C(+!uy(VwdAJdpWmAUx40DIS6e4`Nm@2r`Ta?)BEk?WH=B;JhHBd)
zNT(77mW?Vl@qV+m{(fTwYxR8!v|zv_b*HC7t{GrM!cq0JHkRsbZPG-R8A^qJxn;+j
zHqw5zR?N>-=~uV78I-N?ac>av5bW+gv>Tc85>H*@KgcCsDQiy4*v>Vh?YEJxwbXjI
z-sK(<?qe{k<~ql=v|J^n^NP<_cVuN(^J~)%DQvMmUl&|kJ+tOGa@EQH$??wf_K%ID
zX5-oJj%V9S7nO1CqVmfOR%REOIz+Ze|4R*VrbRPecPK`6J;aB6#}Rip9SBC{#UI0N
zmrK*lq};}wHqz40W2#7LmOxgm6y`Uc^abaIvLKw{RawNTa{<TiCg-?aliPkj1j^H)
z>UY@((@d>D*f)Fld|_2Mv=BOl0e&QV6@9d}AV%Ho+IPYI5wMwB?q&3hUACZW1BuS;
zHx52Zv}JVbnTAfo^Jw6e{mhdkYGiAxR$i*1scYdi4k4n0W)ggD3{}>%FYEfPr7SW?
zj>p$f0GE-u@aKBA@Fj6L9F3zspr>oFNb3lz8{UVrNR+1_gv_=I9wXT1nEP4I_O6Rp
z{{YES#w-c-AgyFll?J!(Czw(#U9(bHlkPs!y!#S3>m{ZxmPkbYeVO91s>!p_rEKof
zVS48M;F2OR58ffM_?bXfE`KfO7Or7g1i3^NbIa6KnMz@q@%1K^qbdIRvQl%p#+@b0
z3T$u*ilaDSlz!>Ls?S)327=WpP-`)-j$!GHb3{*Py_5lnFXV*f$=Nep0dh$Eot*&k
z`6!y1&%s8g+4tvl?#`C|HgRLxR(4!?rc*8TzFzR&kR@yy{&z@EhA`QhQ0vkV_Q{4-
zkFLK;w!@wxjsv}dXaSiQ&Q3r)X^L7$it5tW)EH8?m(t9~>49GB2I{3E3k9SVCf3k8
zb;FNM^(Vb?1oMS#^zk1^iHoPalRxJ*-gV|;EbcgD6o2y;l+XNBkxI5m{sO>6+v~#6
zO0kSqLa9qoi*{iJ1YJNY2bUeSLUacTX=O-skVqxe%O!*T5f`Zx7C9Rt6fC&N?J9!l
zv}rF_gcwq*!^`|K7P(|<8B!01m$^jJ5T8z#`eeAM=AY8xB_)5@qK8FH9hVn%=&_9*
zb39)1om~nXT{2EC!mX8Wow$`cEKReQ<U!6XQBT`6;xfhF{Se+7V@BzsktlmcsaE-*
zJQt)_3OPGKCC`*x&m1tOy6%W#vq_oUO7w=X1aU{HUUJ2+bS;w9jU-K$!4hFvca~9?
zXdOFVxF9vWUtNQ^ZZ)HxAr}%y;K5RkVW<Og-7aTLBvY=7j&sPUn}1%jXre<64JDn+
z`ZRZuED+)_FYab)<?)`a|BB{{9e6RuD5c>wVhF9LF?pIDmYt|ySpriy`%24;HsVwv
zn`GC^IUGXOhGn;&P7^bVQ1oKES1EF#)oU?N-eg#8j#6^<LeLwca9Ok03beDR3~N28
zxh$#rk|iACy++ZbE-3g)UByb9Kit{pj{YA_=nGkY91nZApqICi9K-=;;)Sp8UdBN;
z;}A04|D#rayps0+c>2xLFY%xMlKvmxikCsy6E7#>U=W@Lldjk}hX>Vj{op^NK^z6u
zvv`Zv_5ML?hqEnD1{6D3`gy26lacgkh-#W{Fd8e@2&DZl_x7N}vY(XHb$NO7f&LYZ
zuZ}Ji--tI?Ko*a}K@fLsR~CikK;!x4PV2=EbW@gk8II4wKJ*$3$6+Xv;o0~)h{JVC
z508!~acDeLFrYK=yk!)*hu!FmW&?vfqb+jQGv?*~iP#Mj%*Or_^*li@?I9B+2!YaV
zCl_oSOaYv+E61`csG}Zq$t7i7Oa1j~O|>FnRqcU;SapuM#G}OauTjdci{U8b^pwzd
zWk5bj!m~-wM-Wh0{M2l{Iyh;G#{L`eQ{(8UvEO>LPTo0aHADIpGAxGG83E*LjPpDg
zqlXVd*xNbUeg#z;&zif<)*DRR`J&m{-#I=OFAk1GLmW1aTFvc~-NunPJUKc%INqrO
zPvQ90G%+(#N9P{Q0N)MALDWld{NybpqifSbjl)hz{yVP!Q6Y4Mq34F8J{f-`iPLq-
zdVO&nW8xL5dvvA@!I)X|MRU-p`r=6qN(F;U*t+5vBLPKc0Q&`qS+Osk4U;hz-D?Q3
zQm@rkDnO!ZG#w|$4LP|Fwwp>{M}tmp(hbF<YIRxvXBf{mV0L*jj(W@c!@=I+ZoPW(
z2y2}UC@%=9fwvF#k6TBL=Kjv}?U(IWsbnPSx2DMrZU&p!x{I@xle{F&8D$2O66QwR
zHkseVC<!Osq1M>Q1%PqhllK$cU7Q40Y>Q+<`EN;O3=N(A53$|c{}KI?w-M$)TjhU#
zsMJ2<cY1poU>00V{$|O*=Xj@z32yr#?36A#(JT@?rbm*d;=WWP>)moX-BTG*o|${Q
zb53=a^X`?l_f+Cu{RDc-54TFB*WqC+gHahXUDgu|+rCu;e!}-MT}WvSLlU4<%o^^R
z2)CH%5o;QRKJ0@KM6S-HhmR38(B(k_^tf8e$q^~r%*hd1T(pVFiv|5%ml67#ulIIH
z*$c}8=aB{IMEt!}lxXt#EdY%=m~OJwAN?41=={Z?<KZ=4Aa7Uk3o02p!yZ4tFCOcd
zxt8d9SP065>PyeVQt)M@p1fCIV*Y}?mg9{thCcpj*L+SfkMFuwkR0EJ@lZCUM$n2;
zi!;^{=UeGhWorwtO=Whqa<U|4Y#tiQ<10X;#0HHLRUVOX1naU>S*<E>cA;bT&(Q7{
zy+;)(e;x7HlI84$wYQ*pSXup6o_){?A!j!LkyPmzL&qdUXOM?_02Ov1=4MC6Qo%d{
z!)x5Jq(QY7TG^`CqiLfzqhW%lokioF^&VfsgPwXoGl2Rm=sED+YIivH-s>|%NY+<e
z#)1;#<BEE$Z4ziw8J6`39}w7BgGJ0Cu9m3=br8-iv6E>wZx|u@diC-!f;(0vqUOC3
zbdkxa>!e4ccrpl=LH+?E^9WB<aLUu_!qXu@>N1nVyEfiZL+#?r4@&y<#w+U`CFb1d
z$x|h+taoMW=-K=E-LD(47HUt*rT=cNQp@A5<wgEGzvsOxQ?j-HZm-D|UW9mV!_nbN
zd;yZB+!BdrLsYj^_GYNUNsY`mFqX<)c!j39ISW^lZ^=@4Ci>AJ>QDMO5y%K?%XQCd
z+?1;~sGmH&p@&8!1gO-&n0X4khP^{IT>@_<C&<!w`NSNU^()TN`r1!Fd6s$dLh=Te
z&8U1D+`@ur=!iJ#32+P9$bz^@+lL9){ztaGAWnVf$%_SX=BtSq4z>{|LvpEF03j-7
z?qg>oejkr>nM7-Te$p!7Co}nl%`glZXF{ooc`xXnb^~8fBDi(UT5gWfRc}+=sBL06
z3GfSgOAJGQtE;bem=gVZ*z2}`)4zVpDccYG!yEZL%`Q;Kw<Qux?iY4F#qOxs$R+hS
z3HapbBH72pzU2FQN`&kbH!BhvJU9~LZ6_n|@Yu&WK1B+EyPp8WvNo&fsw?FTT)-f+
zyknv<?i3siulIs?L$q)K3g8*`toSnb+!bx4EW=RQ_MJ4<mh-dZJ?zyFn-(=*-HE&$
zCJe)b{U(iM5S=)Gh5??cufPTx=xQb3VJ%6;NtsI2n{rX2CLV9&*V8oA;?8PxnnN4S
zp<Hu8Psu1=RHmsQ0&@n)2+z*onXN5_!oZ3vfZrXWz}W=X!uv|D1R%KPCd#ol)9>->
z0kCLaBRIozUuN=FpQ05#k#Vc{zuC$~=nfRxqE+I0wGuyD!wg9BBnL7wa6OdF*8u0p
zi=pPC#!SZiEKi^Ww#dW>3PfXAG*Az|5c0>adFOA8-=h#vJg%af@eNss!a_J*geO-)
zZxVXblifRrw3dS8_AttBm~>Dw>P9i=Xe#B(6t+b1o3ee-`wfm;q&Gn60=g)DGu<+S
zVU&csaA3!)GQc<Hwlq^&me;Le9An6&N88&k*R0pS3?8j(aMl9&{dXZd5GP~A4pBkF
z#by3;_0*GBE;nUw1xYALjuqZ|a%ia1W%<a6n=si7Vo?Px!`TYup2M>WoHn~rA0!#*
z6rK|0^=P0GiV|SM2y;xdwE*~@8duKxciyK3d+I~}Zm_q)h&D3RnY|-)mn9OTLD^Hh
z$1}8`k|Gp{&(qV)l^c0I;~tV=HD;LZRV3m2h*H!&mE-BGk!eN1m-tVgp#OjVa_?y8
zczO(|>H7c2^{2HbDgFQBm2bZ2|Nn~m|5{x<|KI;BiQ@nLzr_9o(-{VppMuMvFE)Sy
z0qdy__-XqZpIUbTf2n{_uHKUOP)1>?A7?a`^q1IvlCQ87!kpsF`J>I^M+-BL+j5kC
ze~5;xGJSc7lIHU$ILA!p?u(5)i0S~eCALbOC7S!cUZg#p(ZRxQY3p@7GLDlCZf3)-
zo2~&}a;)1;QDMH?C$Tgp+@I@oRZA!O(>Q|S`Y;}#A#q<x_I&8EBKaglPDO%A%m*Dl
z@Bacj4wgTZrCTmjCk}@(#^<q1A#`ybUCDEQ3Lffyhm$dMr7HF?Gw3N|96BBJ$U=)&
z?n<>fsMKtrgU)0O8)Gb36OF$(ya+%DIsRCJOB&xIgOyu^drE+QY8>r1_h0hv#OtjA
zYIslzy5dgyn6K1i8J)1Ax_2w6I+71+k;BSJRVr(#(eC2}b}C(O;l<8BCQ;||c+`u=
zc!@(I$Y6ZYmK{aJorm04JqX?@qELO!8iCZRBe@HoNhJji<0nAip*6vtskwmN_D!t?
znyWl`YNIR=s#(!M{I?z81<=?7K$dX{g0WqJGFpWUV5wLTDu~%xFF0RMX=N`33zU~2
zP+~XW+tQLbqGFZ8Gvhn1`VOj^u2yRBgh8Z?&7q8kgFq!4SCK!LYG@&mkx^OQavh9R
zE#DMhHx?SQi}tziqb&yGdvyZgpn7_9(Pf9)JhaKQFv@Z!?(LGuQ`qe`k;|pkcA5}0
z#8F6*%>XcwCEjx|Up-ne)snh-VG^kW{2}S!CEn3m0lvaUAPKdHbAa`wdQ>vpgc95*
zRT|)Fknj#XokYDcH?DUM)_n;ZzH;2T=to^35{)s3Y>K(l$nMX=YY~h_@$g0lZla~B
zw<IdlV<<jCYB)Go?AFi=!<htLX3<WELf9WTI3s|OOImSZ5#0%I#-elE>4jBwW$ctI
zNkXA<Nlf>YY@Sa^TS}u12EfD72q{(HCji(`bHH}U<8b$+MIcqAE5>1}T_k5CzEegB
zKnGc#krO0u>ridYx>f8<pzhS99V|`iRY<c>t7XiT*JeZ{wcr;CSkAJ9k4jJ;ZXgxq
zn0nbQRCPe7I9M9IJIbnf&heJ8)A>xFR_^TxhSGysMaRuGYOe!Q7=Cf|XX?JFv5v2#
zvbHO3d~v!?7+79@f!F*ELfomK=zfb*V%W$9wIz1}LFBKf$7{>A>#eS^Qt%k4zkG%Z
zl-8nKg{3F2X3Eq0oAgfsPk<hK4a3sq1!u#MH%DHhOr1gPoPv%Y-YGx-1(>D6ju{=z
zBhb~MW`q&m*zy!s$mRnT^spOzoj?0*h#Dsx&M&a*FQw3eWtkG~+yuCyf$8!mwu}W4
zso3&ScoxPX#d4!b!+-<Nt789TXTNpa0QUVuV~=LyMO7R&woi6<eX$4U@Ay{ioB>Iq
zE3;R(K;ybtT{Kw82SHj=LfS^VOpuP5Ac;mCk@}p>aZo~{8=q6AmuO{ho%l*}iW6?W
zH;K9la#57HlEkN?Qk3rARJ+57B<b2pwYE}wy8Mo&;tXq5@oZ6DS*@<rSLqHCMp^$=
z{7@CoCUK0*6&v`zDt6+Fu;+^dHW!h8Uja!^XRuo0XgEoDr=vL$3NX%AQFI+IEXLt^
z6he2Oc(w1o`?l;0=v1XvuhCk@h!du{R6m{po`)CkJk2g>z^Mi?K%XLdK7iad%467z
z2(G~diYmj}xCr~`$_hPu`W<%dDRgahWwo+e`?e}dayc%4f&gyI_hOCBg*xe5t!F_J
z-a$Awl1(w3ErCCn-SgcH{zgR}LeF{L4&!(j=e>y3%}<j1c5pV#eV#OpJe^lyJnR8n
zSWP6UFk;p+<=R9i$nAFB8G7-_BuDVep1qu$I8)1~ZlGpC2l5R|mn+#n=BRrV20ht3
z^EEZ<8K*6~Z2ldY{M-UIHc*^H%8IFqrVj;`vA{F{+1oj)#yoC)&L(SCW3py$V~)4A
zfwScc1xeJj`?c<ZvckX<s^aT>$2!3hRW?@OErsd*XjPJrrNvC)*8n)7N8e)p=m$6Z
z`0NS(h!TL^YXE2JprGinyQh9(Z4~(mGY86EnV0e$OY$v?t1R&R5`b`kT;*&MlQ{QF
zM>K9WPpK^IB#s8WDC7sr)MkuqWC<tEc#P*+C1iH9*pSRM>`F*LZ37CRYT1Z*<y3RO
zTnF)h<X<DpCjg980I}k>N;+0DoCp$e9Rg(dOoshX0wZ!y?R3>qm24rq;fRjgAW4o%
zG>tL3H%6yCWAtFt7s23M_ucBvDZ&i#paUR*J&J7{k{%CsZ8GWxY(gqkS}25HOHfAc
z)V8ITimV~=v1~=|-AHZ!=NzdVBlw0lC<8-7a?SBYJjr21-Q5ZcqM;XMqbdWQWBe5H
z$1KqQ(mR7vUCJf<i`HEgjQZ#z@%4}A(9bOu4MCE7#B%!_rZ*wVFR=jC$T$IOEQlNF
zV1p-dxr@~)K8Sjh|HM6%lP80;ggI5#V#X5Yg9$ON86Ln;Z1YANKqQ_OA)2_RDQ0ms
zS)`QG9Af9!*0zVe*P~e6yz_4ScN$#xybrVH@q)t3bLu9Z>!GBt+i(vg+I>{TM?V|V
z&<O{B7yFYwniU1TW|B-o`?hpVcYh<N|4{XK(g8U*aqH|4w}*Ire;(>VrkK5@YS2{d
z)0affI^ey)LJ!t(*Mz(;(>4GFOY79Xu{Mq}W5Pa{Xm`JqhaS0|)G0}AX<Sokr<IMb
zm)?Hs!rB-N@i-5n@Ve$$30syI4mH6#HS7GwSg=~ghINaZvs<2ZfWxD^t?kJ6MIH=I
zPmsPiK}_%gx~GOJ$_sfM!>^?px#&syQ#M~^>7^Pgp@!0U02z|W%PXzC5;;$~Q$?y#
zd3ZNEkYsYonst250}^rqvCuKA5kHs-v=Qjy4?0XnWYZhUAZ;(W$>2?+;e&$+`Av5r
zm0k>^l=G1y=$#K^Skiqd6y<I#k=a?YQ<C6tcZ?{yfIQ8&Hd~B9B-vHQVXo-8ppIg4
z2kLwNLm4gvFMCL5MXns!_ks?aF4ELc54W*mK{uy&$Lh8ve8PxUd`rj%auSXM1<HOt
zfW8w7p)>I7v^h@H2!$G=vRUt<w<Jmn(#63niS%%|!gKW=-7N1pI(|(jj0KQSeCN!u
zAh)GEBX|V1&dJ#0z+6!jtfAC77m>;ue|KaATaU}oYaUQ@;Om+T;$Sj%QRxWhBI#UA
z;vb1Lc}`%1V|=k;9bbU(Q{my+D`%{;;PE`q2@DX2JdP*Xncehu!eRymaw5pK+!-xi
zji8~v)i#iTABp(=5a0HOS5il9E_sS7y`oC5&?{yDg#BEJq|>T|$+{X7@NIFCseB!g
zyU<JEj35XB{gEP3pfo5JY(DN<Ob`8ta)_iHmrWrH`gcMwew3~PB;=d$c!U<69sxB`
z0VcRm1bzzGJ%%fXI2=i{GNrbLtpH6659ilt*^}}Vh^=+Be{XG`{Ij*VM_wk2jl~mX
z<gV%*qK-A_9Y$BXC(ddE2)#**I<<SE6T#?<4S?^!anuXUWU=mk5krSD!~iqR&>fh>
zhLGJMDVxMGYA}t2AVq?%4cXU|3fRhlrMZ{23}!2hEijfQ1n@)VPWuRrZjmN>?&1Bp
zdzC^bE(erKPUN0&){Dzz)K+r)H<Xf7>N9UeahH?+6>H38?g=}{2aA4~1cefKX|yEc
zaFnpjliFRfd$1@w>{svtzY(Ab`$ZXIo;T5@k3e!*Ow!kHiY+JM75UKdLYEe{*=sp{
zGlsWfLf(q;MG^krkT*Tq3n5b5yZ27+72hZ7SLYLWh&tyeSTpd}*6t+|mGbQArwd5Z
zXG^a(#Y#w+CJAt?trt-RWrP=yT`>QU5?)Cr;d|y13f`|&Q~*tmF;9&CK}Q^-|Cw^x
zQa|bl1HJ{Vyl!`0n`EmyV^p(4$M6+OPbEHu$<fPupBsV^uaG%sA}TmsydR=5SX&9j
zGAy%F9guN)`n?p-7>p`YvJd76VObVqoSk|4+$*dDZ`nOS%fl~K(e}amZRQlrzHV9{
zj{>_qJFDzkG))_>dSPKfgG|A+{Jty+m?T0i`+-c0Vl0cUT2N&fbzRuB%W<p35ESV-
zQ(8a!z{e-HyLX#xpYvz-W9g$PX_En$65#<sHLni}&*e!$Ti_tH7Tq3)`nrv|xm8GF
zDCdsaDs-cf;auXw`ke4q)FojstqW}*l*XM^kXCDpd{pDUq4l!3)?6k-VDxr9wTd%U
ziKt$XqgT`ovkmg>w45WonWqOQtxQaj=-h>t{uvK%xzsmJXU{!{=;1y(O3jE8M8W_A
zD6>~GEGFRENcYYyM;+RWsDee>!f{Wq+hs|$e<V#Gy^U9v@r`_uOK?QWS?K$qGdzus
zq>QfPE^VF1o<-7e0BLbvGLdM^90W=jSuz?B6F2)lpFO3pHomF>dxC{^RVOwDFk7&a
zh7?)CoMj|gETAJuVxUhOh9lvVJYrOK^uPLe>u!4w=3SYiXwsG=@TJ08=bmZRTt#CV
zq+A%!oztMe(Px+*7q+yuny-AAQp;+On>tCt_-_eFZavZnWhk5m5AE?x;eE$dotsu@
zI*7*?Lo!}LQQp4U0C-n)wJ%Mi2G*gAuVx)-P}xY8EUyWq&T^zCYo_+g*i_y-%JEpI
z{GlL9&~0*r&;vJty?I3Vx5A|tg(;;i-axwXA|LQYvC?!;q}Y@9>Z8I6I=Y$5Hpv`I
zsm@uVh}pUk(g-fFOpr^uD<ptfZK7$k)g64D>5=OpynPXNIG)!9Fsoa()$0zg2X5gL
zzNUJWbC!p>&Dr{?>8YirN2D%BUa5sx#RK#0!2meVZg@8Kq6!SbKaYQgFTYlU?z<rd
zI4e6>6FE`{1{4c*hqyHaGX9m~f^mmE#vx(QHwfkJ;BI;?-AIU~S=}D{1WUG=?)Q07
zd6BJPO@x$LIeOS>;+^x_(2=|gB~u!S<I^Px-iCMte67cl<&lynaIM0NjSX{;v%%HT
zrb#N@$4g7=3<kfLPBH;36oc(}a;Wg8h#0W==;oM+bl1wr>j=#9xs`12qou6RE&@xE
zOYllr-dY#)RoXo?FapRsL~RjtYQw=<m&xez001XQn5xyP_zf7c=t|4I6!RLyiNi7B
zH9VroZm+82;Ru((_Ao}4LW`kX)ke}|)Iw1!KSts8#>r>|GLTo6RQ$Yp5e8jPuUZ)!
zL#eb75k~SXVo&B!AW1vWo*wn6hjau34?u6_H0Lres7IVxEm(4w#RRG;Mpf50VVXzf
z7tQa2J;s+qA~FF_HVO6|d(cvvr}e3DM!Vy|gJ035lXR5Cd0gU0WrCxwTI^F*o+92o
zWJPo=CMYy0c|XLbZ3<X$e+m1}YKd8@WfE)q?GtrukI8b-44se0d&Tcr=4=_&fP9k$
zBDHeTZ&b>vu98h%1QVL>k0p-dXsLAOLYS-kpRdlRTsaWx<>FAUs!V}eKm`=N@i@xG
zzb7{4yfAS*h~}}WW9Syl1<|5RC22I8SPgVwX<0`QcZ6u!5{<3$?1^E{9q5t~jVm=n
zQjzAi(09Urhu$RejJ8sC86pD4_oB$&3}FBS-#kJXS_FNk*>J%TVD1J~Ukz-k%2Md`
z%1}mCH-ZWG&0*Mx5{;;P58dnwvIY3W)qh9b!@uIDRfXevfx?InGKutfpsmB#SW<70
zmDFFAEL$OEG=tcdT?%>|a6oUdKcB=q?59L7f4`Vw`32ioKz(v|P}|M8LrETeVp;S!
zmtXYp4=D*T58y|r_h@!OqKn42ygl_^mM}3skQu$l3h6un%U3Xc-5o7m6z+LtfNIT<
zWl%~^QnDnQTc9;yWy>abnxq-!?h$(#(!_yWkJ4JGtPo*7d3O1?LX<ID#WVAyw<Ei#
zUCb<#G(L2}2Gb?YX2W9yZ$GZfo^BXCX>g2Jo+w!QD?aVhb=K0zg_Glp=cxvK>3BC#
zTwQMYuQYcrTLwLb;aedg6eu9kH`^4>fX-;Dx;Cd;-X2Z{%Ipa*2Fz_B-%z?2jKZT6
zYu4<itS>#{9hx~7J+zLUo^J4nEXq#Elha<|Qh=$lO7sfCq)W3Td`+t<B8G&%v?=OR
znN7=LM_#ew*wiAB*VfY=z>cEvZ>jYnH`XyNe%T<;_;Kt8{V}V@xEOMI?Oe!%KaQmh
zVV0qufLwy_6k^_0YQ_RCbwF_krtjWaBQ9wdD~GIme$pAG#`c<q^E&_Za!h97eENy?
z7&6+b*CIF%&=rkt>u2bs=D<`c`+#)zq&x9TT|&Gzh}2&z*wYjGb;;p7)T?rN(Y(>?
z#XSOpDE?XoIobPX1yi}c)&fgkV#r81a%-l9?6Q5@bI%8T>*`QqfA6)|Hw!CAbKAe$
z>atagYUC_K>{;l_QG?p}kZLP9cR<;nPyV)dBH#}!l-Kwb{=EKG9!pMP(3iM2=&Di;
zL=T8&#tp=`_u5?g#Wp!pz*ZzE3tpK}e_|FVR?UZ->DG$r6xPb=5uLvr(C(HyMhE!U
zoEZz3Hy)O0p+p0!JIC}0wes)?>@3*z`D*W_s_)tL$!ZAW*(YJf+%jv8Lo-M5w^SR3
zxTu)^H1%@8y-ADDmy?$E9z-0wvVoeivf<BCQ~C<kKvDtZn$uhdk|KA;X#a-yjotv{
zHKiM3Dps~mviU~MmImNs&L!EW7%miA+1*v{oo0t87H*6go>;iGrB<69+|=gS2Yj!4
zD|JJe(ir*Qk{uT1#7ZN88WPZ(S~-K&;t)kLLQ(18F#gXR;eTet|EWJ-t<_WUe^$SF
zy80#l&tEbAPwg)8|8oNf`S|y-_+i+47l>!b|2IzI?|(+;qkc8M9$!>~An06#gK9VY
zONR~0j~b+d24%tmO$lROT2yi8i7o75JXv_DZ)F<VAYTU=$GG^5q7e^<y>4V#?CX#g
z$};ZNaFT=<PzsJlF%WN#E|^qH*y<r#e1zx#z6QT+dl-2UBW+sG+;A6k#(F=ISAM1<
z+M7sQom?xExdGj1vp00V8vw&z(+30cv++%^oi%gGVE`y=9g#L4MB72DTtv?AZbSWy
zb*Xh*04r5Gts|7oKdV={)n#3$56T!p<X7Q~4rP03s*Q&|dDmvT9;*vIZ6KuY;AVA~
zYrwE!^G4wn_7JN4k_5&8ZI5p3lC`zdsB;e8IrYj|sD!D%W8-%TlQZg(B$tHY8ofUb
zf-da2DhZx=?CXtx^Tl3OysF~b$33$KN0vD^0b=A>-stTEigd<SHv|HLK5jPl#P`+X
zPE<V}2jeQ9lB(hxU(~+)_9^reeti4nDM$?8KK-uhbQQiD%R4!8ZkdCA<g_*HMB$k8
ztHXI~g7n9Y5a31!IJ}5@K{xD;E}{Ub>dGqj;HRK>i4jv?RmBVHhT77Bm48LWrcrqX
zR7mRT;l*u&!|YeZlT{k$cRbG38d^B4)>r0f#chO#FAv2wL204f#))+$Q7ORqchr+_
zxhJ(Jb?8a$DUUZV;&raLn>E6<ex^lX(lzFr-3muHi!{r!ycLDsNW$oC>5#@pY6@4K
zIGS9splx4M9-5Ys>4Mq6n1m}o+O?fB>@;yw0~AAc-TVv67e>pLXZj8K%Jy^@TUMy9
z`ReE|AiT>^&M)l13a;TTU<8ju3NPjsM`|zFUlwo93F#_MH5)3I(xp=dWhxW3q$ap4
zQ-MCLFwkb4V&qtzF4EnS^_q-QU0uXThO|!G4nkQnudq#zRRY<>WpBXH;Vj&}73V|P
zci~_-IluTjs~i*%7_+4^(Y!$3Na?eW>ovPfZt@rg1&LJlrU0>G?{B<FhkDkWiB|$+
zm%XrW-bjPk_cbG15`6r!&~z=Rnt38?O{p>tCUaEDj*9IT9g`%>tPxhJYuKolt-v5h
zFFo5>Jqvs+tqvp7uiCx!`ek2S(O=AoNpc?jx@ix>Ytn%+ILgU@5<J_L*yz>{b4TIX
zh5^1MUahn-sS%*#m9*)s;%~2ybd<W9vTtWgc(Qo18t^0>ACYUqTCbGZ;F|zl=a`e0
zL?(WnNvEp)vt&Zs4_0L6M8;{pN=@VmEOuNmm$LuLP>F1#ibPiNs{Nt%k<4x2+a>1S
z0cq^2WaX$_Bj=UChHz}6J!V2WQ}bw_wna*-N+mK05w8q}BWdJiK{fVU(h$s=kcbAA
z!f|A;iz@Cb%aqL>zTAs@G`clL9&1CdWPlT(9v~$JiC0d)y2@%s?}A=JszmUoqgO`E
zzLbW4|1!*?lG|$ZvMi}DCc@Owwe6sZ_PWkSNVNAb+8^caVE&ic$CvItzO)8+srPaQ
zO;9YelP~jj@}=3yc6X=EJ59Sa)!C+}%urz`3MRzK^{a5bC2H;z6f7g&tlGpkH6=Vo
z=wQ~nG&Q!g0j3Y&ftmQ?<<C*pU^B1F8+|keyTZhyu!Lk9(y~~|*g1CbY~4rg0C|3<
zdM3-g{F#R4*rx()xz@22Go5;%DNr;<V&B)Lf^iZ+Ai2J2Fgw=kd(<-YD;i(~RoD8G
zRPYltTvUXOjT(DQGUPs})XHqkc!?zKN`05(w#6H~w!TK3V1fXWg{+htz^W;`(&gr6
zACnT8iW?+d$~*-Licd<Mw6iEseJFH^1snokO)Ak<IV5ACDK#3U{Ed&w>}xYBE|j+Z
z1SDS;8h?tE^=ET%*h3NH)>#}#pI;HQN-w%3`wKYKQ7k3O(S~r9y2K?`B&MYEea*u$
zGn<hG9w7Kjo8s0<$T0^giii{u2p8ZV#x(STmYET#b&X??z;TgeN1c+l3sQlY@|T+{
zE|G@i7FwYIY-d|afU)9m{nUxX@{r3vjOb=MJycuFE&8jKnpqe~D{>wpmfd7_S*AHf
z@KxiukK`(Ql`p@#NiS%I<&4RIP8@moPY}2Lj|toUFN)g!u_zTAtJ884vgy#y=~&Dv
z`mSrqpST$|8KvX^PjUd$Zmiv?QXHpLIA;Y#s$OIUJ*krHvr3YnYej-oz*zyV6LBNs
z>dKhGVMHPHbl_la(&PgDZ1IRcZ$M&=XBoW6F~PDF*c6;uhXCV{3OabS%=^|AS5d$N
zrI<!`U^HrlZW6+AOThsm$e5R*w`HU&4tt%pAds`FJ(8z&SGSg95*dz?s5cxy+X|8%
zS2b_l$x%|gf`D@daDFGXjHy@%LwFU&GBfqq3GysM3a{~8ayVo7raUHVc-Og3k4%(j
z@{~M9KS_F#9~s&k_^JTjAX?F&>De5SpgI0>VE|@~*UUD(lQDT4eQj{OMg)}HjAX^g
zn(vIuQ5q6(Mim_$WxF*85<LPS^e+nuWh6EYI`w%`5k(-|uj!*P@ucNMKfBcdEy+y9
ztaw&X(e>NS<_b=IFi0u*)V7wg7;n~t0q+dCEw}{1GG=hpDYU+PiE#8bU;t+B{@^&*
zijpOPT>8h93A_UX-UbdHCYQH<0U_~)vg*wem`{(AJV%h^JF~(<ASOU^et6`9;K<WL
zBj*Q3o*EXpG>W4uK-rqH`_n;`;H3_VJS8OZ{7iqoaLD%yQ9Q3;p~o~c=x!$Ft{c<z
zbA-tKV<Y5#36T3q0dgNOKCX&pYX!5FOY3DOVx}!DosCI41N`bse=-bGh6C2fcypgy
zQ9O&Hh02zXAEM=Z*)2rAaAJ{KqbLYPDl@as2uAuZiw-<HGO&{+`;UkXe3z)e(V6#^
zap<^Dh+yC8xV89w)yuL;Td8qm)FXF}EqssA!hh<>!eU`sWMP<~n<^h|yH*b6vfSPr
zSdZM(QPq;Rx4LU^;*u2$me1*Pr}I<>Z~yJfzkh-KuboY)$P9S8{qM@-Z&p`Q_P<Xa
zKUw)=|NF)M_pfWb+uYwifR{V1o%ZfY#)volHZR9#UM9n~)D+3Hr@cAN5tNlG*3r=}
zw$V1~;1Agt-ReX%FN~suJ$adQ+?+5|GTIboV#!OwlFITpCH0M?Y?8o_hS$n$EHIX5
zsnkAm=B7#6oe5`pw18fGUNE7lor<}R->!tGU5<jWdu1wRT(VG;=3<uo-ka!Pogb~@
zEJ)@SS2E_kMzWy$ImeK&Ikr8~5+(X);c=PG?qpZwOX>ZC0V_N#WM40Gq$m`VqYZpl
zaK*33U{(SyCXhs#DFXP*Hd+-y&wTsR5t~@2cX(-!pVbHv>6JGkGFpNsMpFG?iI38C
z)2Xv=y{G4NBF4L@vRK~a5nkFCUvu^R9rm0$vVOOUm+$nL#idi3+?PFiPvd@P4}9~o
zj3dWqb{#|&oKNtn;J<d}`43J+S`bhjn46pSud-A2Ln@Z-yA<Lon-(s~1?=*j^?N;4
zU3M|^^fLSSqw|YguG1agaQdAOhAPhZm+h5l+<e2<bC%-fW>n_I6P1OF4FhpnFHGH9
z`ue%}z+>H8FCu$&h|?X@J`^8(GKXigAa2MT5GYq+#4G6c?(o`<dUdKXGP|}LFYCsp
zzA4UXwitn2zvC$=EiwFI^1nKPZ>yWMICBh`F8|l+t54GYf8W$<wJ-AjUr_#E`L+Uo
zzPrmIptQUw_77S+93h$I+U%mEFn*^Yh{lw+6Q~U39d+)-C_g62Lj*;8nC!u@P;t?0
z?aEO_VN#_)!!U-;hxwPqM(w-rzCmo%@Ks|nz8J>InrIBVad<6uC;g-w;E7<P2Vehl
znDnaAq#AZ7TSA4}q<_0nSoy~U&LX=Z)8-98M`)L~k5TDqri94a%c!O1c300W*2L3}
zOx{k3?BlQ#qV^_reo_al#(5laGIn{XP;!EZryGGKGlYYIR<S3;v+;EhgBYgn1xtKE
z3^khDJNw5wRUu^0FaS)HB+~v+YT-E#b2pUt{?=K}#M7YnQGzhe;{cr%F*@GxD(Z$^
zd6>MB2m)-u@`IKUZ@{c@Qo6V^;G7sy#)4psaCEbHhSUv_V@0Po=_+?f4zx1_L~I`%
zzBy{XeAQC$p}*E)GpB%zS}V~Y&!U`qIvCrpG{&HL>nLkLJBU!5l`9_{iM_@TJIBzd
zINCWp+Bx3YZ#7!YgZ*Ox?|y0=9X0k_&7EUk?7YU;^tcJkr@8YSLE*jhb#TjZaOk4e
z7tlPNt9wyDVuMj%t`}G$syVDXwMB98LhS7vZNGw_jc3i>X6p?Wc+qU_V>e#F_s0Gk
zS{RKZz}U%d<47Ey93372w%}#!Rig!ecEkz(enDR)#-5AggBLAmYDXX=J=n+8$Os9l
zJwMn!*;8E-KQ(uEaY96M|HV;r|K$$02@STehHoYfzPFAV&v*73M?WY?2hie?;Mdg$
zq%98zYO{!N*O63xBi>wvi8ulQEbivfl|WOLmpiQ&J3v`wsh8pSEbIfJ#lm>`Z?^fZ
zRHnqCwNpVtg@wJW@753oJev$uuqsE}^K$<L=wFh+JL$ri*7^jr*Xh)rTyRoltoQ=E
zaxA+dUJL;~t<H6U(hbs;dbO4!a8-NYKs3(DkJsI3a;IJlM@%6260z5l#t>(do{u1K
zTl~~)y*fB)(JGQl{mnY<R5UjUuR;bz<_x%ngBS@GZZCw8-VT5;XFKHV1R$TshOo{?
z-5E_njCaFm0hv_qz!M6g6TMM$V>1AQ({;SZ1)|}L>o^+g%VKBBN)*mX%^GG7?^J#9
zqz0ve!6nEf;uz|`h|U1^i{5Y;`{LO!8Dr7Ch7c?DT5Y8QvS*E^<K(y@C-=d2Q|W6-
z{>7teby@#sB<gLz?DAwB^_KUC10bUH>cyjsvaq#tM8v1PeX!p;I@sOaIm+q`^9$vw
z4)aYQ^Nb%01ukQ9!}gK#O_DZ6QZaNi|7S~vTr<fzWH=+c70Yc5c)3*EcSo}+_yES{
zWq6Hvk-+}fX2Z)HnFI+k1B@VjQ(Gp7!Y;0xI(xxQwaC6t$dpopVU&dHMwX+BwCLeE
zh5WUz$H~SNF9y4*{)P!Ozixq00R$|E(o}g6uQ8*NyfqEFQU$BWGeF&o+bul<Jg5xJ
zRQVnTxz=VIPETa&C*0NJaIkKU@ZMs-jfN$sLFclpuHm!Ow?M~esm)$1nfhqq=B4$7
z1C<7XdZ|MD>gL^hbK&9#wHZ54R9!%mtgg$d-Ws=8rCH5(+mqPB+w)p=WvBA3&vukU
zax(HY_Nl}C!^7xxC3F9bYtw_uW#241uP=kh)3Wj4uQCOJ0v>vIOT=isEFnwhiCbDi
z4JcbU3^=xW7CQz5OPgxGKjs5NNFJ$k$_Pug$)*Zuc<&mC(Dcy!C^w!(v?y6lq^Zaw
z9M$arc*rB+0pU0uCeiq|Y}qd&HBqVQBtDaH7Z_(RPfIQLoYi+|Y2Q>K2of2z4L8NW
zDGp-Fq!aGjjeL;N{q(*qtJ;-{9+%oH!HtS2k=H%7nce>FS(Vf((k3ZT9{n-W4aL)2
zvGL7DQY!aaTp_+fS=!Y4_URk4cQVqUy<sSVM84|nwyfmIO+B8vm(%;z+EQa@OUdn_
zo6ViwXq?8KZq!o%^ioGh{`C}iW%v?HTi{F+n$%~e$TFC5o03(W=F}`*B+|MfJ4wWc
z+*Xcc@zf%KlFky4fo^3|gMt-+W-PMqVrJ!~AjtLxu{&Go_z)Hem7a1?!^%|Rl2#dV
zH5!z?Jg!tuX^5@y*cLolHL>mYmKq1+!E&2lxKjCAGOb*><}c+mDI3s9q6jri5i~cm
z7Ph2~>rO$#@)s7wSDOB2uc1u;2C=1@w<&g>3z!_8$LoG;(^{3k|Ly|K#jeOjZw@-_
zyF0oWwr3-Q?GzR>IA^5aQ5Bvq2!FBl`5gNHz2^SR0D#l=|Eslc>S_C*$F-HmU-bWf
zMf;z+co_w7EEtNzq1ZSd4gYfj$26s<&aoG&P_)1QQ;mjFiU8hi?N{y8+21_Ri~FaI
zYjQn06$de%vW@}j?(n+OLpfJ)BPP_JxoFNLvQ)<LIYnZb60#sb-)T;j8*v~5%*iyY
z?8l^Bm#k2HQVFk9QX1c;G)xtruOP|qX-M9e$k;LAw2CIsyR{rRgMke<_0Oc=l)=o%
zGsBC+^C0f_qyyGzIj3hFA3%>-x(nv164C=VMYPGhr9jBTf(cLHv}cT(>80HctY7K<
zW(8@}QU9p8-O#czpZN?i))Z{s1kZunx10OB&HWt{Yy;(Hk}H$x88T+3Kb|8AvTqV;
zU`V!H0EJ=G?09UYe7lvL0BtVdyToOiSu@7~FI{7XA)aOGW_jV8w11Sd3~*k`V%QE^
z#z&sIhslsr5_#GnNR`#m$%;*Ygt*BIAE1GmxNey#%*Hyi|4IA!DONyfj6~WtCI=I4
z=s7Rqh#{$ZCNv#TD=Jb5Y2P!}apiT{gS5Z0N<KVoUd9)&sW3P~-P0IZG~d)zaC*;<
zmDEgE6dsaloKNhw(pPY;sR*-t<5D5?c7baBHgrdCo%XKU(5$!5eNy)^k!6|`4Tu@_
zhQm=I3@;lJZoe!DrZpp1#eKl&B!Hd@1<%OTb0Uv$C>OIAY#e!495H3emaqfhed4gM
zsoQRB59)wS;A#zliaJIL+3;|8?MbRWwxAGz2`a8-Ph6uplz-aj;2w`XU5g^eROy`k
z=<sL<&WO*Ocv{>!63t_}*gZTrh9^Jni03;mn)`5^e3Sy5(-{vbvS}pUmCWv;lCWfT
zl~e^98X9z}_wMWK>95^AG9>R%%3%NkO;R+J4k?z`g|=l#P{33ddAX6JvM^|a5uBnT
ziML`mbB{(6YUEx}?6{ik;Y?yMbw`Y-8#%NfvmvPD=1L(Tj|ya~rK(scfLYJMav{w$
z+{^^}$+8KVa7dQkFid3@C&ly9Y&tBJ&HKY~s0~))knXh}3z=afk)g#E0CU<P2j7H>
zXUDuKx?$3Rvlq|WVE{Yd7u!|*&#Qg+O}$)|;fVvzH)geWmIu!6g3J@EJfw-L$bcSj
zAlz$+@8Ot<Fj30qILmn;ozTm*6>qidi`8#d|KI=lf31F7;}{~HNgSiyROE&LRY0o0
zLN*zf5|P_kKFW45;&sET``?t6{lO?0VJwyAm?JURfS^(yEECB6X%)`h0Ow6rhNR9$
zqyGQ?pZ}Zx%Y>!Q11%#}bEIi8s@A?)ty19bO!$|sw!T0Musm3es(2ZP;Q-G5eZMG9
zwv5p22Fmt1Ks2n1Z@(#5b6_u*@`e5s@h&3VOuSLL*%VjnCfvsw9`-r#6da#0tVTHU
zSv`QErV?Fg1&)|9NDUqlG4FDB7$$?i;oZa-Mw;TRriU3p=ZZ-R^0ecVqaD$FA-0cp
z8m;F3OWEAck2^<igw~iM2i$q_qPgAN*>7csg+n4{V}_L}bd7=L(uL;e@wMdP#XY{h
zV{b_~NChfgycQM0=^~9^%BGySDl2*APJYlET?7gfEPUiH0c6mXO)kF3D`5!Bnr&I~
z+hVSsj-YmB@L~X5=K?T7j?+H_K#f%u%s}{TGLzjsyxw-jrTIFS6^e4ON_(H8U0Q~}
zxri<XbUiPiJXSbO{>VS!!gM8zv{05hizGrVY9Sd27Rful=x~Z@+PBCwlQGIHWFi9#
z<|-Oak|{mSwz>+f{!wk!+Q9HPJ3Pn=4>4I(fud*DhMwYjVco}Q2*)P^=-10)$vjk(
ziH~zuUq=$y&bkV!9nLewiL92~ty*W3`?ws}s3K>DM>#dy!lvrET{yRB-g=Xn=B!NX
z?mrfEk*pBOn}--n+1L_Gb1AZfd?&IzlIR>|Cun4-^`>`SiR+70xDu}}a!X~Q5JXg`
zvD<7Mi=&-?oHUPip8q|o;6Ymzb<kSuEF#&@X*N$tl{lrN1)s0=*}LjUuxQ`SHg6^<
zm@b)t+)}ARb@aa98#k5H-~U+$=5&kakA&`9dba`={yCi5a>Nlw2_P0>WiBRjj0*3W
zQ*}2iB~Ce0;Y04(;uQLSnQiXl><$3>H!I1aA~rI*PH08>bdD=%J4EN9PR!l}X$v&r
z;Wm_d>Cq&{^leFbZoS*0&REG>>zUIki|ILZyUiLwp3ddpPk*0tLTzQD*6e!jG%X9G
zOv~};vu^8MgNTBNWoY5lJ{j!}H_5ThFU<%hnjamFweM_V$u~pmcfDwuyQ9;htZ5?Z
zL>J{AxiQrUQch&G{PjLN0$EPk8F9k&+^rW*-kpMz3MW3Pj1FpOOHQa#ovFx*R;t5x
zJy6V2x)#m7D_q1A4IR!o4^u$q{0SY=pOkrPrQ(c={sdUlsoixCHhL1B!j?kcRkR=(
zhw3D^c)u{HHVQspFjN;jE=LAzLU_~dJCPvES-_|sI!1+&(5YTFYwhFAO(eOXQjjt#
z^*jB>bN%&2)M00!vq?{<JBF=}R@AzE6b&Mnq_FFRWF=VHF@1&@ukPyn{E=S8A06yf
z+&|uH!OnfqsIS5f{3>!|zltg0U&R~|U`2iqSk{^`Auk97+mChqeylU=HVjnq(2moN
z+w(<+?azn`E9MOh+n*5>R?HC>cIs%X$guX3c<A`B;sHX$+WnJE;tviK>(F`~E_Qo3
zcsDr*^uLUQE}DAy;#su)^0?J_=08j<S)*HgFuBK;zNUwmy{kNwC#_8hKFi?kXg`^V
zKRO0&&N~x{Ht(eyk2bf2j7ppPE*ogbj@^^4kcvZlH(Bs*F=^+hGsAl0u5zad<N>@$
zd>{#Alb2drBu(SUu?{PaZ_EF9IapzCF}6`Iq=$-fYj5j^jdv?%2ffWU{(#|c#lyzH
z6%QN;SKJ{Q?z|>pR$yGYTvd2nxv=gRAGdH3qP<hjD*i9Ej6ZxV6&OcO9ZYVFPRlIq
zet}=7omF*fOv@N8Kg9uw{LZiSdFnShV>m`yG%x)iWo|)4C8f-v1g)r<X_9pzkQ*Vq
zubjT9j-q34xime-(ZT{$C*8%SXPsxtbJhFGCaujx#de#oZjN-`OtvX_*-c96Gy}ON
z?5!r!L2tuFl&^no@Ont2e#_51;w7>G9iuas8eZ;<v&G~whBVl2H;#{Yj#{4do}fSu
z$6nzS`FZH~_ZqFE=Ii#0qn#aeyZHX(WuvvzetFb*j#}V$^SF&+6`;9iyE|=kd2gQ_
z9_}2qPxjGKWB1@E_`Y}We5ZY|zx$?qFAV1N%P)yL|4ZTRJ|I%`Oe0a-&Tr#P9f!TM
z2B$K^U{Ob8H;5BSBY|ne?{{ilO}zzK&%OibE+L=i2>d(+eHdJ@4ctKocTT}o@%M_u
zR2QqG?>>q<jU-ozJy&pRCxV|`snQ5JJ2jrfP&(}O?3tf8E;bt#&ux)9JzSeE!(p@M
z3yGZ{2s=lO{drjIU4vogEx1B!;klz+=MFd(1<t61@8t5P{69*iKidBbnITHy7x(u6
zs;$<Ztfu|H9zXr&i~rYO)Bg*$TNH<4{{-VDHN;Pi9~ygNqf1Y#g9+yB4BFShWzg47
zU4Q97(5;=L<L$<7bN^*-NEh=i$FmCqxo{Acxq`XyO_3Ffg*UHd^oPz)?<Wz>EzB{7
z|7A;%VQ<nOP~a0hDe`%*6DB6<Q?dRzz{4xhIb;NMCPsf@Xea>I(Bp)Re7Rv<1cr7m
zrRH6v+BypZ5SVDu%8*J3lmd*g8a}!jj%jrL;RGa%?j$B_z?0?X?uj^~aGTL@%6Nk>
z7c@p-(xG4Hx7hy?+RI|t6+!l@^fSH+|9|%0y|0ZcNgw}v^eQ^p9j}Ci=!>r0W&|b)
zJJ@(I&Uk)FmQRCPAP^Ffgbm~M_qG3?s&ns)BrwS&S!Z_=)ZOP+r>ah!I(42Bj&*q~
zb)RznbB&BzxX;HMAj|D_Vga?u4KqddF`qGr5=3iuPei9(v><ua8n*IkG80}U_Z&kg
z>0(l|JunYjZVEQ>ka$Vz_rgqUc)j7ND%62G4RDzB#dSR){)({jXs%{o69j!Mrt6Ue
ziBo-%dJQF&YoE}1yA{(Q^-eUG%|Ef?ozj^)m8Qh3av<eO?d-!~n!KJsF3x7;Mc1{x
z8h@xtw>zKw+kKMiP2|`WGZ2r!9`ED9kgo{}*Oi$p-8;sz$i0_n(!Eh98DyxSmv^d{
zb`Cgi>D-O-2$FbFVs`OaB}XyH?3LoMQs4WLl^-cr6<KgEYTnW9;9oqcJ2mg^SqxjV
z>%CDD@LC84&2m>O6j~ZY&pbJ{st03bzl*x3py=5Jx<O`!cXYJcYBWM?WYgIDaY7zv
z3ncv5OQmJYbD?ir&`+{e0r`{z+Fsz(^-7O%=+6B<13aJVk=5I>3)-WtLi>~4>x}q2
z1w$b!$pXRCmmBHmga4QALn<;sotw-N^qxEy(MtqJ5NJ6SmrSML|MiCcQkAOzezX7X
z<(|;-W?@q2(HgjNddsKRQl08n6A?Hs;j&-|u*Atr1?sDsmu{1I&CwE`D~`_j4R0+}
zu<9~k_4zO<i~%Om>J`qu^=gz0Wbef1man%tCij}|JB}(MJa?GTJ%DpsISHe5<gLne
zMG*aKE0+^ZU5&y;{soqBa9U{a>dh!!0vn;jQQ~BU5(RQz$eDJK`t=I+0t)2C8U8nc
zGN3<?5)oxd7_nmphdr3PjK~UKOzcs-ILr2&TW~+J9+>YWn9JS`c?ShfSCxKYpD%vd
zlQ)>jJ2kqnLTm9f;w{Xe(QGi3yf1=P*o<O$RKmfb<=!%}i0k#;@nm@K1$GK+mAXwK
zyonG87-TtB>2e^C*}D>Xa(1$uW}Wj5kR#@5Q_WlrD=}08QHh;0SNWHdxp;xrv(p7|
zt^iRp=L_%%uj%=7I{`u#=ie)12;~7w3Hk9i8Cl2x)K|Q;3)lI4EB*OKvF}4Pc1B;5
zD`vUI?9b;mmWRi$2|p0LG@y%DA_aw)OKKWh#fcW=Cr(4urPMcP?l+%ou6h$olShe{
zu#T<UMi1_#P!-gRwJp)*4d*S3vwD5zymq^0EIhRr5w;X@__W}@vXFL%=Ti@U4}9-r
zsMzuhPpzfoVZ~*^O6D$T_w4NU`gW*xT4EQcup3<UqeAC$<$}oiKsj_vJVF#@1rLx$
zx5TmnX|*ochj4c9neyde)p=B@QI%FjdbtpvK{@Y^lYZa+L~x*6=h=cf#iAoV%c~RX
z&-3RR=u6$+EX&?(vnQ6&y<}C}9zS6-p-gMnrUC(*T1+#~`};{7f^!T&#X*kQ=d9N&
zSxb<dIP|dvQEraAWrg;(Fk`&w^XfuN4RYj_mr0Z4S$UE?hP2tzhD@3cYCV?dHO`n>
zVL1xQb2_KYpl7b->2%2?SHdS7dBMW-dXTFni#m~G@Me0!Go0HEmD$xe|4q=U<JU=n
z0jE7s+39oQFr|G)Y@C9PoUyRr-y3#Jwb*32KK2E8v7(|NSq*&KjJ*@qUZYq}EErjf
zIM(*kWg_fM*l}RpOWvn56&cRt*-*J|5R>Q3oMH}A^lUA=Hgft~F@^N8{5$V^jq7XZ
zDG-YV9k}>QX@cel;>rk`PaCCA^0eX$0N)<~|L<>;@hCaFN+!}XWgZ-JE(iJ8Jr0_G
z{u4&6W;31tr_qSMod5hW^MC$rJiLs(9a@PtPx0@+4$g0`Ym*PNi`5vL*kn}eB?VZ3
zA~{Yx^<GmCM&2f@jsf>(^w;z2yTXDYfliH9U&qjL-zDDWW$&W>*XwCBs$HPhvl{;Q
z2TeBmx1+;Xd%J(zJ32Y$)GjBlj}BkF-95IBn&y0xf8z92$v$vOYXp7Zv{EuhPUP%A
zCAXAD?-%!E*j+EK$>@4$xn>=rs*nzD8Q!Pv<;k;kcD0IJO<9+)4kQ%Krn^d~|0Zl&
z#NX`Pe^2{_O<h!^3K+a62#3;wXe4?0OqKOSz*vE6SiH=*k&AP#W=bAhOg;Q>LH$Sg
z>WIEdxtlSOR_=yO1Y{nuq_6XUVko)}a^8*&Qr|6i8G(MMEFY@?%E5@V#Dc7&UFi&E
zQ0kpj22!VnqC})!=`)ozq>_>{16wh|bnAaI=y6&bivuXO4Erq#kBx7qi4@4&(HY-B
z9q6r$#)kBg(fIcKqUKnz3KogCZhG8RGl0CLgw8;yAnnoDENT$xhw_O;a<k))g?YMT
z75;Ok@Sp9%4$S($n?8d)lH|lIo_xQzdkl3&0fS{Ja%sqLL&5=*t6O7#Z^f{Nc=u=f
zU2%>^EnAR)_4QZDl(wfRx#7M&a^CKURF03*n?@<lM5-*Q8@-EUc%v?I@$xJ5sX3PC
zTC6j9sWl~CG`!4JsuyJw7ga3@&?5P>QuE$U<?S0a%afQR@@E&~K9IVCt6QZS#Lcte
zJ)iiC4c$$0Ht64TD1b61ZYHGT6T0`E*88O8ykqM$lXg3+`lPq2?+Q_v_$1+Ma5cMv
zqjUwZ4Mxk<khi29IQXQfx@d7R3rn6R-Aq5+q>8CJZ_bjlR87S>x4U@{cRqJvSZYxa
z!T41#wIEnc?GaBZ4`mVG*D_bA+~_~Ctxr(Z@whBP=X=wcR4BiS9NL$DjUbZwH}mLZ
zZpQYTWJJ!ILSS6tZi;iM*svlCB0eA4w>CsBLdY`3ON_*riF_2DLz0%cef(I3pl?}l
zH}tPB>fmAOK%%2POeu&$Pp2A0eZRJX5Uu{s)WlL#T~K|At1r~>>+1zA^Xv-3khg7T
zI6k{NA}8{eFe&Ko?u`6}*cToTt`j##oWI|t6m0T)`rG+_qvT=%?@ZXWtu4dUQOeGl
zd~HGy_`FNnX<O<XP@C-HK(M3_#>9srv|oOh6W-7L<|yo{9&%L=X1fdmRR=3Olj`Nl
z&RG@z-8o}4H)GpXFQq|8-o{^G>QZtze**JKn@3C(R1pZIBv-f9x8j#&oSQFSjYo8b
zIV}t9&k~>S1r+093x-)(`raskf?xx7&dznj*6ImW!o!qU-s(aL9&uLpIW<g@YwQ$G
z;vrWM*d=$d@QuXfTdf2FZ-{@DF^QFp<-2y2?b^|Ux^`rB?I^En^pN0osB1eF9j65%
zhaj?9)d)uu3mKhZ4CEp2)anJVlc_`CO^X)WL<8sT#-h?kcIl&>(xRK{5X-pbN8`~d
zL9I_cf&4L|=v8r~MvfTXX^DLn`M(Zd9IE4j0T~J8pn@=98H^_btlOu9bGeiI%05~c
zc%#w3x?pkjTcY8LYcl4to%a`ei@$E^1W9a5aa?uzOuj2sNBTs*s}u+qNz)V4(k&B@
zRj;~y-rf}l?p<?WOg+x)AU+ZY*61JoEl!fnQEO)0J4=px#~$}i@wj(*+&et(oy@p*
zoN*sz$Gt=7;u!Z1k9$Xqd&e2~PX4%eta0z8#%-NuZ9=v*+`sxSlr>kh&2%M_FlAQw
zXKkkLGpYk@snUoS%Z7F@<C`)Uul3?tOuA6~l3Q~w%SOQi+2rHnX#DZBIP9YvW%7ub
zvLiy|s>_ieia%Fqoq291E_-S0Qhv{KN#CEz@A-ip3qcby7;#)7FQ}rNjG7?W7Zj4e
zvavd!^s(nBT7q-&zw%;s-Ie)-mu45qD`Q4+nG?$%&S-v1-_ys|Qnaz?^CR>5QNicJ
zWwkRH#gqHP8+j+#^$f{`7%7jCzBg=cdRoE5nRZ~yUXN$V34vVQozUZSE8wB{JYl}b
zv}L{1w?eIFY|~p8wxyZLX$#}o$(gVjPY9`aQ%;1SWYz{%6LpmTew)_cnP5z5oGxx9
zW4WeU$-K8)p=6oM4o7}FudTdKQq3(rF8S?(<_bnv61M`2(yy@D%oej}7_4YB^9aF{
zMsT6{iQdp{Sp4sA;M$Epu59w<j1wiHQz2L>8FeOJ${vx$YTB82=aVV7eIoa>C*zxk
zjKUNA;#oER`!x(I{AL)Rlbmqf{6k34k$H7wz0!)wk%eDJOFg{Fr%+iBZ}sW)*Q45g
zyfTy1yT==8ZG&#Hl_i+rnc<6P8dYG2Qe6wiV9n6m5n);XubissFP;x8)-A^66sMI<
zOaH4|s-P*t#On=a&>&p*GrJ`_*z|w7^E(W;Lx(+s(K$X*ER^*7M@ih<9ZxBI@Vndd
zg#OH^aOYZ`5^BHdNK#gx=Ufa|cGg23+%k}JTI}K`FX@MI{sEK5Ioj`g_v2gdLp+*^
zTuxYT&JyEEJ-z4K5}8B(l8sl%fe!?H<f%yM-q`!`l(DCKWA=)ym&{$q6_56t8HaVZ
zx+5T~PjAn^`syo>k0*N!;6&Kc)LTqWBHA|h=?K?wI!h?g1s!LSyFq{-eHc%!ycne9
z(DS9cP!lz5q*^JN<a@E_y)Z<*^J44WY3ki6zmv{-Z|-Ep3P3T}VcoU#_|A4IV_l=`
z+t~qSlKOVHpLwzJ;#$vl@cgvGc7g0{P77$b92;;1!o9GO5<dN6Jgm64nm&!8S_B>{
zMf$Kze&O(^<M1{2Xe*D=Q=dGX->U*(O`=EYIkZC}<Ry{E$b~e1=ShcwBXfkqJYFmt
zRx1kxWEp6XIe}e|j{iJJ-Y28eWODv8p3ueN-fT@r+r8X>^}4(Jx7zG(_7MCJzaEBR
zD*m(C3cuig{3`#v9E@)0g1U*Bd^MgyM-nD?r&ICXjs~2IX~vO@yU>yG`MBnJ-QiH&
z_n3Opi`OfajuIWpNaw*Mm!xSRky8;-$&$*uGjfShGA7oD<4KABODMYHe(#ygZ>uix
zQfB)e3H|#)&q7fs85~#*fFy4IUMI8lQYol;ZkO2OE&D?1N}!8F8e^F-D?Wv(S31+g
z7%?$qo~Geo${NRROWsG*9Vwx?vtbN7cTy{r!fXf7kVOI20in;5C+~ww1lAj$-I|1A
zD6vMV6Zp8Eu9;2-@o-ww^woE8?5<-tk$`Q0hFQhDPExfFMrXrYyLf5hjGZwl<I#Op
z3|fAk>N<FFH2yG5dgoN}I@|1DbJ&7X`LY~AWcFttXtH~ho_c&EmOyHkV9)dTwn}gw
zhd0vwO2Cwp^YI84z$=*+j!GVY{`|o`M%*2;Sxpyl<pj7L<SY*a;}vUCt8SHG4kW|A
z95tvy=plq|DsQe2CcRaPw!TkQIW5usym&ZN6{8#Vn)i?H(NXu+@jty6`)_s+y8AEp
zjxZbF?Rjqwzd6RsJ<5sl=Iv`j?|b2W-F<`KUzZ4J@9^!hDSh~j*M0R*?{E9BUWnqm
zhp+#6wEyjQ$KH2`2QT)Hs`Rw<mQpyq@F?JJ?+riloJZb^z1Q8N<ClA{j;YGMS9?d_
z{<DgzUzG6e{*kx$^7X;tKlk?DtV5Gjbf>v-*_nN+dGFopqrEq8yu+iCKpuQS_z=4X
zZ(mS?nH_YUY@>tym)!XAAy**3v??m06QIcMclg=e*+1AnMsL2^KYm4Ze{*;QD~9^J
zzx(!}d*r=-d-VG7%^t?Je|)f4!s}OuuU7Y8eRG6H_h?X6uN<P+hunJi&6~F`_qgSQ
zy>Gh*=#FfIilY0k4`0367f-k`B3E~J_w7-47u_J>!-GAn!RSks2Kx5R-ZyU#xNGRP
zJ%?yE(#;<25ow6R$KQ32J(R-dC1=Y2arpM&1%IqM@4Y_S{~j}RfT{k+E6|AN7VVbn
zm6EJjl1)_W*0+Cf;C=J<)h^}nAtrcrymxf;_VqDM3xB?U%;|lOjt-Aj3GC71;o=2R
zhabP;T1q>_T1Q8FyC@3|Zq&piiVQ&*wx(eXeMh$_ixB$6J#hH4Wbvi<Fup7)z7%uf
zvL!l5eK^<?{6TE>>cu{FxLN`S?!Q1C-2<AwS8w+IPH0v8^z+T@y<Pl4RlcUCIp<mT
zW%t`Xn$40DE_(l}d;FH>bS^(195Ne}V4Q(pNf6W{RC^4wRQ2v?ulok1`v-{q=8#e@
zRq^*8hILeW^LF<;Lr|I^5h9BgP=biG(|xngbz(HfU`sIg@!`>Nscic8YVX?vin871
zIel|{w7+|7Dy=XP52?9->_c=)G+mSoin@k+-*oq12oVDvzJvDJ*~5D<74e|p@)BfD
ziwPCsUh`Ow6bWmkFW1C=%lE%>?Eiz8XXE!+r7j+c|L516K?eV?6@9t?^+&k>B~jRW
zI<YdW8UZ)Xxdi2w5w_8ov*rxW2Xs~(-(oW`VM~79z0mdF<H<SQw}RETnz7z2)6Ffl
zbBjTJ8&6KhA0ATdZ^_8}ZZI6eghgK$6@2{dF}VdBiaTRePdWB>-n>{H4z3dQxZ<Wb
z772f^Z{oo@g#hPm!g)?Jc!LKocMres9%Zlw&GYQ7FK^L_zPFh=f|G%CtQpECFM#lK
zd&<$RLPyje-nRG50$jG!622Qe^9DEXXXdTEIwG!X8=#YUS#$_3;$x`@k!fwkI~bp-
z+|ZOMU4@a#(2J^fS@o`}-VioD{kYsPceJU#4f{$9k!C}MuX^9880npH?_O1Fhhh@i
z)5O7g5^v2YC*pcLB?~&8mM3Mu>Z{nHLb?W^=!aP28#&-nL`@l2v$aKEsJ`;PR3J3a
z7K-Df_3nmqMcp#8v&dUD6z_G&EzQPiV8t|D3B;HyZ_^v4kW>ibLk#AsdWG59=5bb1
zKq%rZ!!J=PJpPpdk@m`$qMb{EBECWoIK(suHJT=dh%IVe!5^2I3PrP*qFKL|^OECd
zt}3;QB<__jpJR)&DkrdfX+K?&j8KfdOa!~KBWg19P(bt46qV_%46>@M=tP5SD@#vZ
zYPz<(p_s39AX6@-OIh+bgm}J{N3<;Y-H)#Lqb+CFizq~h16c~O3K}pXh4d|F2tzIk
z=;mzs32)pfS6b{6$xuM>w8LnZ9CHQf`r+~?GcHy*TPU*a4b!k&8dC}HC8$ys1B)uC
z<Z!7B)=G2fV4!+2_-P|k8QyNAqq+5=LatF%om+OZpzP&kbuh>)Ygqn6{KMttPyETH
z{=`hFO7r)xq+s?i8-`;iwb}TFKreR3lTk9^jAAvtD4E21gs;E(hNpRW{n8HFdm
zd^%KdQd7WzT<Akw<bpScu**Ytn|1ehYyXcqMEj<22e1JDyWVQ~Df=IPezE`mi1-hF
z2hEMI<EghB-_Tj?a0qQOp8P8r{*}_VUBq-8HYGTN?Jrx1-|QCR@YUX%<Gt58n+WqJ
z4?H4!?#hO-$U44V%4(N4RGxy(;@Ryq=l!N#o|9}0uCaC;j*>UfNab&=oaU6_fvx|n
z?QcDyJp?_nGOtmmtNibvO8*WS5y{R^Pm`H`ctc+Ns&^;;)2IIgkf*cRB<WS@8f1@B
zb=*uS+K3u-Z1X$YFy~tx7l}fVh5#fJbu$CcV+J_~bxPW{^=mLExMlgH#A_>xo*5Lt
z%lK@3iUm@mL-U#{b|bz12?Y?+g{8hAaIM!b-w~)oALV#mBvU|qx9L~hYxjlo_!gt<
zPOrp>O$FP!0t|I@d!0-;7b(`se%1Ss=(|ll_r9s@B}Jx9H7eJrpS&kyQ@mu#4k@nS
zWOSLlr@IW278Zcr&l498UP+arkR*+UinOdhHBpuge0}mE4$GI~iW*T%*}|v8+k~=s
z3$x<yM*^WAt)ua>L)bDus}T+uERL_XR2mg!7V7LPuN1BJr&9&ew`dObuWu%FKaBHC
zz1H_^#mx#&AbVXUDY%;cV1mu4pprQr1h<aQ20SqWYFVM_U!P3mw_9R-HNj*@V(ix^
z*5?$9C%&QG?`!jdV;#h7N&IHf3zG+)t@FN@+)0mwLNb%WK~XVP35OS-osB0wddYK%
zI;9?Uz%WyQc`74niV=u$MUEZ9T@H+~PA|T?K0&*CvP%+Auabrgl{X_Z+lW#+R}opB
zKd`-AX2!zbTU&mu@)z0-uY2m5&hJ!6tZGX96$Y__eZ?H!nPfX_D{|}Ore}ms^-eiX
z2t7-Nv)D?Qr}T~Ja~i+CHf6Ls!~b%4REdxj!7nSKo(+ql`YY096$eE&U0h!eQk%o_
z&-SJmpS@4c1nLk43t4m;u||L;OW5I992J*bpZq-Q%TW@s>!LMOvtd24dHnC?N-Y){
zo+Jbff9>(-sEjK>F|olNRq`}a&g7iSytKM6=UTsSXrAO{h7^V&)Uxa*Dj;rhq!UW0
zM<H|N3b^biw`wyFsxM_;L$BkzN?AUvvbV}8gp6aL)*BNxE>T%OTazQHFlftyJ{c`B
zOQs^RXAb0~A|I31kz>*KO`4T+hDS~5SgCuLQ?9#Xm=d?s<MH?n9pT&jI7x&i;w!o0
zKIOgDk-2(9$W%(WUdDG~#smR+V|9yj1$G{phh&!tgap~!)oayV??x$pEy3o#><~8L
zBos=yDx0hd=B+BBqd#t9PB!)-4lM7X5i9}qKzjOyMefdf?%mW1)X<IY2Ukc=ME+yL
ztf_cLRalR2=BOOeuaw#;El`n=TMA9Fb-&ShhEgg0U&<;ZDF}m6MX9eUch>5Ib75QC
zH1SN(NnkQ-Qc-BB#kmq4SzBj<55#2D2_@M#oqCW>=0Pn88%la`*>2S;E1#OS=91bf
z=G>HMo92Q<R`pvL!0aQI4l5dou$jv>_6W*0>0+kP0(vOAFxI|RHWcq>Y{Z9US!hic
zn%lh_icBJ0C(}bFxN!|yA8S4)lQd!QjluiG;n#>Mq*Ppy14=udR)~LGg~o1NdZpw^
zxvGiy&PcgEXC|Cct8!tw9G{M`&{7A4=%fpZ?Fbz-aLdnSO>3T-poY4=%lj9a#<KHi
z0ohr;lq#0IA?+_S6^8g(wh<K=FDEgL>}+aDQeR45S<_oARk=)N?AkMqT!|{MBB)V^
zjf(A&<zov`{dRUelzT9<6*N>!72aH~tU%2na${F7|9YPNMpT+=$^Oa~uI()~^8^si
z&2D<`$GJT-uQ#lGjWPnCtm~Kiuk4hKMk8_sD|w1h^J1#~4XYM#EoBNYXDOe=wvJa$
z!PSBh2;5xGpHp88Q)thj=~!OtjZEV?lfp03{%dW6OoiD%Y02~glfz}(PQ^j8T6elh
z3u<mfl8eQb9QI{Zg@FzxVoT$svc8iVJhX=;mehQ6-TsbkX1M~G=f>PeJ-ODsr&(-E
zvYmNDs|<}Mk9^a#4d;-J%t^`W!f5N$p(tiussrblGhbS)1LhLN0bJltoUGoN^+wM@
zz9G1HT|G*Myin{>e$?)$cche>Z5*nTktZ9OOvhp1%s;R53p>3e6`zEN={{tcMp(VZ
zIY1|Sqw|EX>_`atY08Xaf6=x;_`k#7MFh;vU%ROL=<&;VbgxeTC3qG7S}^zXf*4$m
zSoCEfpFGL*d;_vB!5MQqsrcn-828=W9_hW_+$fPJ>BzS^9iiQzC-ZcuMat3qY^5xe
ziQCbLqfNNN2OTvU@BCAq|J}@u{#$td7c?8KO#a^>_;UXDCI0Wr;oUEAc=tLt`tNl?
z>|a6XALBvgg#W2y4eL6kIfL6asaK?r$%=H-2O`gYmN9^K{GU9+V^>ts%b#bbRTqib
zyw*7DYS*#w!)N5)vY@`qA)>~U$~ia84=v0<Z>Yf%L_%Kr(M6?Fpz&at1>akE0$G@j
z#j%f<PHbcU#`y_CVgG@D%Y{ycOKS7pIY-53x6b2p<L{SGde#wu!llbOlQP~Fc`#Me
zUrV)vZ}DH9GE`=OrV~_o?ng;`(^-1tlug;KZ=YK&5zDCt@FAY`wHHtxXr6FtYRll@
zO|TQ}X+lBmmYt|j=a_i3wZ)q^2e>@_1*F1!km2VKaZvSmW0>~am{Ss{n>*n|k8h{b
z)lu?cMyIdJEmkArie4KBjy_u^nBsVJwMxo{QQ)g8jYTXk;EVjjQ=Jr<&2u%6Daue)
zf;gk`h#(DLQ;u)O6D1qyE@ot5k-Vs^d@K1C&|k6~T~UoO0QBNwJnWT3lp6WnK)Ma}
zvxZ&|%cE5)SV?ElTw7B~YZH-?d^DDcq%l$f6kvfvyKX_<03UyF^%T&YicF*DRnhh$
z_hOA(F4Mf5D%ITIiQ#9CT36VqZUQ5nb3b$9dPb?#twV!(eryIJ<t1G@C&)Eo$`L^*
z1^mq{w$Et_SWNBN*y-;1YFfyHQS86yDJKQFDBR<B4@!*6iZ52xvS7T6cx^m@zHpw&
zkBoyxc!qp;Pmtc84%}!0bvn(wXvqbihKO++BC&^3-peJ7a1o=*E=!?tt2)mb$pcZ*
z^<k?CsfpXZL?z0oChoF|qg>b|R94fNs!h3Csjn1^q6aSJBZrY}U$&PsH*{XL)^iSd
zrwgJm3pxIPsG&K2fApFpt!(_~+HFbVm7ae+P|Wn2_6DtqB5Ss*(Wpj!9)oEVk<?}?
zZnIP2PQd&CVnbMQ+&|4fDb-R<PssDn0RjaRAx}wBiwY1p*8&mb`0QvDczLeR22wI1
z-rFv3Swbv$YkA&XOM<L5bkMhQK#~*<2jK7V?TirEP51?*9omH(9#*NXO4SyQM3j|A
z_1SPtm(sE6y`jT*o1fjIX)^4nMl)*n#7@4ytvDmk4BVoW;%C*f(*#<NVPgoOWR&9p
zmR<nLt!bo+&_9!j9m_FSxt`rlCgj`F;Qh1_H*VkDe&>*bCr`~&4I6SkAL8++eEZ#Z
zrgotL_)v+ClvbD#b=NHuV@fZq6x+jVVRx0M)m#Nq6*dVO^W@&<;uT%-B9*^D>1&P9
z*LN}s)&VmI16PoFJ4ap}34>gft60vI*x@v4dg9@9Y)%QNOmauX#g6hUo<a^7DnO_w
zy9zWcG>a&gn|SdeCO{6#qt^2RKgA8?W$fQn3Kupy3n~k(sLoQvGRQ>3FL_V|cA`)9
z3AlCIe2T`eZ0b2-En;~)8&A^f`qC>(?t&uM6FtY2M2&*EZihY4`<gJ=bc!M(ZPh!y
zoyiyl5!g?(RoVyJPlhaE&H7U*_u@;=wGXMe*S2Iy`|^ibhMdPXnm*kPc^(Kv{gDNv
zZz>=BM0bp)F6g~|yXe!Bi{9CSMbAuLFS#KfBIz-CRc3HLO0R5Hbr{8*Gv@u^ZHn{I
z+;&<E4KrGqb$n&REw`C2cbn1Hx3XN8>qz%fyd!0bBaA)*J7KcBsxohh;LbcJa#hlf
zWX1TtyBa8q$(Vtkp6-^&>tE)M$W!=#g+4EK0Wa|XHo~x;zW))_zu-T8x&M)y%JW4s
z#kSCUbxWa|vA5ad-?fqWeexl`im(4V7-6SS>rcL1BKht5q5ppL=J4SA99Oh{@f0`F
zs}!-%*w~y@hu~-}-!vH-0%Wx$-O|!HEqTT8+RqrbSooDJOGSR4&Wm80@SY9mhbio~
zpb7-r#?QUzJL&Q+{@`x@YhhiNK0f3Ee9kp@^UX+YUA4!uuyOll`hT-?<Y+7?seGi~
z`2fq!bb;#?eCl9`c^@DvFjz>$FXGX;41%?8_ufjcHETL%41yFR_%W+KWg3!ahV1-|
zy*NoE;$++hA7T^L)BO~F$D4b3gV>EiKPBnAk*Ilc3!DnJpV|&K5uD5Y9gzD0=IJzk
zpSb-=h6#g*>C5_ex{5oV5_5%B`Oa(yXGyu*r;oW&tNbuIe#`tDwQ^Jmxx`0md#lGH
zZ~FG+QA#Y?z?(?+SmG|W9YCcl$4PP%%g|)XiAuR|XeLe*(~n*id~zmkR9PNvx1u!7
zWjAlcQj&L}$ce==dP7JInZc|V9EXh?#>X3q;ZNBs5PHL<ubJ}(N&2cUGmdKgiEuK>
zdN=auC5~7yadeSF?k+(tcg{2+b1B6qFZV(eW4-}DOf=O9vLGcFxs{wI_hW@{D6VQ$
z4OXvN%^eq}rJ%s#H<zjV@fzsh(!r%jUAkYh!DCHCeTE%nZ!}@1Y1YY97T?entO?zm
z(&SKUsdOMH!q(CiiN<T!@m2CI|5;w0GB|>CmHcq^Q^lIkOxyN;+?<T1Sz3c1%o~PN
zg*~mQd9C`D%rK@3)I*NvjCc3)>2vi|Bym>b+Q^GYb;^H;@mab!bAVe`F~zF$VXE|5
zQEBnrmb+Wdo945#+iP>xfk>idNzJ#M=V^vO8X4k>y#kw~TwsVILQKnpLPDnTAdF5J
z{FI?dx}vj16^ZY+=E9zsY(e&FAP&q@C%e=8DQaSB{7WA+%7=q#YLz4nBNj+bm%7YJ
zqh+}m2p_ao=AV=!ivQRgk^4&wn;-o2m45g9c|~Wc{17YM%dax}H?us_S5;4byv&)0
zd&+0dJmhsUu1KW@WLrj>DJ4D4x#qys#kjEI0>nK91;?T9$bo79<(<-g0gHnB+%-LT
zZDZ-$@a*)ASxcP_^pivOh1y1Xk%69<JV6&%Cig>Z_20OKUSyxoOD3T*U?_Tg6D!C3
zMlQ6~7vugHH|~$jA&_H(o2l`R%2OR#;O#xmN?v%__|bUFUq8Y&Hl-hK5SI+}Rk7)-
z4BzswLWZw8v8YtM)M<nIz0mf}vwZa^9%S#DVh^-&O~D7*wRYyotZBPpO~wwTm{fNw
zIY`Fc8)5*L%Hv~$fr-_&G}l1R<TW%f9<hm^OD8$JDjzurUtBrmJG>J44zDU7Q}>YZ
ztj0;H%5<0UinHs2?`{*@py2yIsojq;G1k_)GRT1sgBeERqG;C)c_X{iD(t;@h`nzY
z<Qs0w+xMk8c0G|nW?YoUefi417G*cP`!KUU-Q#7=c(W^2eLG6}<%}I~^*q-GE!+H%
zZ^)Ok=BqHpP2Y>G_;S`@O<D2co3xQ?6Q!HA-KSaOJ-LQ~NxJTeCUi}}M-c6fTyAGJ
zcUV3*#ivN_sd?Y#_B<jjCiB3lLp{roEVrJs!g?}u?~*Z_6Vw=^Zx-A5`)M0rm3^3X
zzreB=YBO!yOZ(Z&825S%^9+0uu*gO)8@uv*%P^;Fgt<EawE<Yl=wDlrX8+1sp5d=N
zdD7ND%Gmm};6L1`FEZ$#?1WRh{lB1c|E}==GLO<H!Sm05qoCDDpZ~UkmjC7a_sjY3
z6Q2Jj)HQ5CW)~7!hYo~?@pSs-MEKuzBK$c$8~+Q#kE#6Y+f#JJI}n}m-q*aa=Cc{$
zUKe@!`T$Foh`rWniIN^wApy;#7*xG~j&CspZDLT4z<B0e%w{+1Yirk{X=2ee)VYS%
z*0QJDlpbSe@95i?-J`$1?H;{2&YpAUeUf(p%RDK#5-M#-PPQwM5-qFp)p&+yq{hkY
z3k`m{yhh4UEaqteIvklxR?(u{;RMZ+{mi+%jZeaYPx3Cw<MYUTo`P4x%er}4c-b1C
zHOy!Bon^dfnm6y<H!bt#TlY=dywT`6c-#>N??!=_^Tc*@zIwN%QQG<UKh<xYhnw>v
zN_<Nv)AmW~_Lq}qnI|rj&G*$id6kXK_Uzk#io~=<sl{{ZBp)9?R<6YSk~!^BNq+qB
zk~#g@mfpp9l&)@fJbXQP|K0d@n$WeLWb*f;P5awzeLZeNyK!kI=)yNOL&zGu%MmV0
zdp5A?S=Zl{b9z-Vh08euky{F<$^rWbM8g9WK|FOD692L5Rv(Gu2UA74A_w$Q-6mI+
zZ19H6G?4eD9JNF)TceN1GHF`5Qdak=<lK`3+sx-PkI+KUA%!J5z2GR^)5H_XkCzCU
z%u=ksjNPdl>h*IQuqO^}5LD&^+n~J3^Mukok;e^E3WZaBIWg34#J*Eruq@(m%UiyL
zBw1hQN>6SkX~=-^{>w9zLSYf5QxZbCuMWB|UmOzT5N(t(LED}yHw!h7>h~P=BR^3(
zNT2NdrbUDMIbUmhw(tlKyv5Wy>>8~2nL;zKZpJ7ald(G{hu0Y+@h0TA94Pla$y075
z-ox*N+xNk9u7Vd{124P+{-<T{fG=_X`%gI+z*RW-+R6(4veHz)8jlWm8$^eY*Yd1G
z=c=YJPbULN+v{Y)=}*Z+uQpC&tB5zI!N>G^Rle52_FRuT@)5}qX3E6X_MxW@M6>H5
z->vBHE`v#b7+eh?{DYXcZ`b0YuXb_r*UIEgMm7C4+r9M5Z2bSc7yh~J|G5+CR!?-R
zR83yK<r4?^TWrL#<}F0rz*3IJbgLP=3UW}ND4}ANe8YQyb?4T+-GU7>UU0YG`eJ=u
zGoY>N78SC^Eu(023frG#ZuBov!U7vNe@eIGvQPjz49#t^aOtrC)!h<HoUL1*sPgu_
zWoN%^k@uO>P4e__-2%T<Z>j>#sqFJ#m+CLZ9xMAb{&6mTRH_EvXS%P?W_#s+Jj9N+
zDo}r5+Ci5OdaPF6LsL58wf09grg0X=b&G42Q)4-k7vSA{3eMiavDXS{+0rz;&#NI1
zPJii2hS#tj0+j?Tg~(r3hc4-CSIg)0;tEt0%Z!(+CAu`|2W2NZ$<`LnBu7=<FH&81
zUJ7wqA^vk;eCEK^KelHd>yGy(nTa?<2w!|Vv;ci|P!zAL89sGkw$cbD`Y23uv&L}`
ze7~j--xOLJ?ZAECdfb;obVq0OlQTjiGk1?Gb}owfHP7k-KJ1^3<m1d4lubJg3a9U4
zI>6iVB*}Zv^KMfWG$m(&1+VGd3G8Sy7W&V|Bbyd2>P0`*^f#%d`?_gK{r+b(t%0)T
z?}R)v`6T!@)sJ)C4>>RAbNgYT$*Yd&1MBI<pg;4{<7QTLMjneL;iJu{5(?h?q?^sQ
zEk<=SNzMjSn&Na*dxF_)098-$m1s=!nE0c3kPwr>HMU6RvaY^IoEQgHs`ZyFTk>Y8
zf+hTD$(;Bj)9TwP-+0vd5aja1&QJz@7<+W#oZt$kQ(<3nfQTmhO#DqJzbSgS3tE|T
z9`ZhiJC<GYJoJ*^XFB<#*i1b_0#&C{QfKyQdYhIjdk<(hC4V^$_wyU>e`dq;q%Wrz
z=lQ)jU)YPKWiKOad4pC0H>bh0z-6m9h>+>}3uMfx|3PwQ3sz2h1=6M4i?OQmcK9q&
zhZCB3snjv5May=0rbNs%$j}LMhnIWT|3+*h4Rp2v-V$F>BbJwguqBpkMG<dWNWGDu
zomKy6F45fjHGK-JzsyvBxv+XENE(D;u@sL4fV#!Y4CJv?GsW_nBdD2%&KEb7vme%0
zUQQcB2BeO!%cQg&(h*MeL%6w+g2Kki<7x6-xrF4TCl6>;#<L3S3p-TqzuiPtd^E_h
zxlV6fXP4qHw@Y~Xpe|W4O0w-E6Vv+lql~c2Oj@#E-jCDR>d2hlYN$7>vy%d?%9#09
z_9RMjq}+GtQcjqyY%;a|wkvTOww`Ai*CP{ttfPbiL>RAxz@@}Aian!@l#Hhqez`8K
zNdDuQLe*6_I|@ppa8;F7|Ddp&{)*~|t@L3xlkw>=x%T48fIw9w`kx71wyx?p&7vH2
zNx(TJcjUTC?9v5Pb~<K*MdyyK2fK<BxXJHCoO!lq%dc%QMaXNR@a*o`ZVVTC$%xcI
zeB-c;2rA|3_Xz}9CM4ta(%DA3n8?C;Vzyp4oHHX?&v3q$$-FmO0A%>a<AhN*QOa!b
zwfkrzt;{r{zR7sQ>$AN&*|#FDomY1D6#aykuC%_^iwy?&%34zjlyA29xhno@;fj>g
zwx+JrRb~4y<mM7=ms2qtpKEK!+R&BSX^w%Nt>sSElGF2Z`N~+*vQoPNF>FPQY$eL3
zObuVyNHigH4X$i`7E&7Tjq>(H8@YPe)wns#$Z%Lzf}cp{`C%eMJB=v}OHy>2?U_ze
zA^YX{J9(_~JfKL4^Im~)rB0J%B=S%8a@B*Bd(Sy*{iRcV=I8ia?3SF0|7^K#(mM<D
zO?<Jn_0UX;6&pL=CN@P%Pj1u614J{nqlz<m$zWnnIOopBP}ZB9-q+skd;DvYGEZ8!
z1^LyFqpz(?-s#~fAFe*WQV?_thbaSvl-q6Ypi*P{^y~3K#>;K-0y%1QrcRNx$;oZ1
z&U@uuG#0tqB;f*-fK}u;Nj-?H2fA2YJ-pTrX)Q@VRd})DoZH)t@vn8SJLC&ii6;(K
z)y7<{XsSL+DXQwLdLt8fZr;)5v$CMYy8&gT!-5@;X5~?}Q4yfGqWy0MqaJLg(_}`*
zM?7S8Me$grCDU2T>@g`yI1t1Ibr67@ENyb#QXu}<|Dfpp+#+Q8ClV@E=E1$Dd~FgS
zl@CN=!+8HCD*msp^(0VwL93H<uN=ypAR5<Itl#3da|cl8<kc{i$^*}qBdtJgwviEG
z>G?czx>O}dz&=|7ITJtc+@I&}Pw4^F)hMs$CIdrca#q-e-?E7-=H!RfSg0bK#yrMY
zKFg`h6(VouiGg^|7aueECg%)q*|c|ahiR^|IB+HNz&I(4obN{ReY4cX^aaWMG{*%S
zdCC2BlBdth>}RquNu`vZ%}>D1=-aAulShiHt%c~&Ds`5^-cU&sJq0zrXapvB^;cO-
z+@s1lCw}QeJ^kX5D-IzRK7}?>%3^sQDHMa!oKtRn&_tGa$vMHZp6Jtx!1q_mqvtwH
zsM;nf3yw?@%<iu-z0R@k{3GAO$<ICNUC0sU2EEG{a_2$kR=+Ra(9V|^gMQzkH0>Da
zp$^Q@*Ul1bs(L*bjoCH}jP}hr&1`$a>#}T1wlTb}sRL3vjI<xV%X=tK+Cs&lis*}W
z4{`xo@L3s+SX^%HEr@x{M67sH|Koe%S9h`v3vbG&2i>O2yIk-D_rFAJk20I4^7r1I
zxG0DvBM$|1!TqmR7XPzZ55h0^zy6T-zp8YL%jbVb-haasO1xJCEHltpm-Oqe*YWIP
zS{o)wZ(74{=JqQ8B3K}+*u`#P^>qxx>$?P1^e)<ey`DCs+6DSEtKom!f8={cFT2M_
z`|nQn-n~A2wfE|Hzk86nWt8&)s$fWz#QXXUW>X1s?c(eCw~uVNO+GmhhX87GrUEi+
zB?>cL5%iWp^!SU<DNgUbUBd2uI|An%-Vn(}QuJ84Yw^X8{r~jB|M|E&{X~CKb!&N1
z#a3{Tiq+3(BiXmZ8;PHzJIy;{D#QI1fw6Ql!7J+zgV9goJ~_%xJ@!7+;Ll;ZQs<`p
zZ}w_pV7>9#Ek&e?de$_UPhK}TlwH>>BG~oMvh{+(v`cQ6-?)Tg6mXF{BzFYW@$Kv<
z?@si7s_YVq`4(xdtN8P5d`&ZF5&TY)T#jTgBC(dnjpbD|5$#?%{qmW2XO0Lmd9sGE
zc5a;2ji1mSXVs!bMWAO}Hx#5Bj1FV&?7%SgLgJYUoc)av&FNhZ-JJIiC7n~aED2eg
zBy`n3MbE0Db8T=<(Fdv5hThI$jkwvMKigo>t|LYUb)+0sqY5-}?^zO`;o*jWLAgVF
zBduH$%Tkbf!t3(MS8_L)Rhg>g@1MV|l)qnnTLF24PeRzliMUle7qwe}`lT;_q)~P&
z{C7Oz2p9UZOWV-7`UFzjwQdC9=UeyZ_wuTO;H49^gY9iEiOL!RtozJE6;$$>_m=<o
z{+aiE#kn=)w70(gdI-&bC>9_$|68VjiHp>aX5u$hwXH(Pw?$;Q9TTo^s#qd?`UAlr
zeGEdsc2M=2{O=&-f7cE^ZKRORco)>A)EiyUy_@@b#t0>+KSV##C!zNoe+Trp`38H6
z9?kyjzTB-5P!Ta=S0air55BF9#!!~=lz~O4v6;XY`JU?zt-Ae()%~fMa~svh|3%Ji
z{3bY87JMZCMt)cqyxWkh97Tee>++}nIXvC1)9a9#ZZ6X|m@(_jn7^a|)U9@Y8DVHu
zTmKh@q4ft4hM*O8T49?xzTK?j{{{1RI*qpYGi<f63FE&S@(&Um`fsE8iwa7&Ngvb~
zOHzxuy2)IfCsk~i{1T$ov>JXQF`Fl6ooeU*qMUX95OUV6M`74#3JGer!=M>J?u20Z
ztwysI$#+<vTC_wtKk#=3INAJz%JBa}HUqyc#5wASCmsLaE2Z5web8Ji#vPWLHcQRF
zO{CjaCw>9(o+I5sR1N-5Zij;C4<O*pIwZW+2pdsYZ<9NhMKFxoVbp4d%_a%7c-U&S
zf>z6q{HWb&37MZG+Ck86MUAM{Y1YHG)E7b6@tc0oX@t$XtTgZ&jZVGgH%JeN{aMSe
z*TY6jDw#P=L{TFQp^Vhpk6wrMpd*aox(_=*whM(KbxP2Tgpvu`;jgM)gpdZ2C7D5B
ziKfsYp~})M@2cpKx2d?vT=KH|hc(pq%r^M_d>OmqY4wj%gF$Ey&bQia|77Uk*3z`q
z*z>JlUw5^t!S7`cz-7e=h7QQUf8H}LFM~1<`riHccH#}kqjQ-X++(*b{k19|fUkbO
zjeC>Z8v-UHmG~bhckp88;N)fZ)$z%T{cpW(uUUUmP5DRIa<n7gkLppU-e~*HupKp8
z*oI3rRu39}&<e06>G)Al4_a-qm=@|t-){zf7(z!jgQyARs!L(7g6&IGZ$lA>u#BJ-
z{Us`Fwd(Cw+pk9fbZk)9Wl$Ye)M`epsMGF9T`v@H6PmJ#9@N_*`chxmNTUO_9tA<G
z6GpVzlC~gBL68JyU)?MjRCz!jxAn8R`n(eoeS>z?lsh|Y=K{ap2-<3U*J#umt%fgt
zM)<TD$<1B8Np6t^P4Gv;K5hOMtV{l&ZY>Zk%Zv*ewnB|8Whj@9M=cla%yRK>wqN!7
z(X_{2x5pmZWA}?z6RL<RuvU|YSjBp14_;nGNJ6PVW6EkQMQQ1lt<O_k`aIB5xH9e0
z+wZd{fnw!*&}yfSjy#nfJK@Vr(A@a<_Z$_6Kg<#0^!AJne&gA^RWWwgM6SJPRiu-r
zAFofxcOP{lpFX|JER`?SQdyPv>cm6*Th@;`9IK2~@{9}}k0<9fJ_73{=aWR=uP7o>
zEYle1Jmk7hY7}VsjDS=}79})LKGh;_mGGU70UezSJR*FjvLs<dtK)uOd?ROo4%;N+
zhrqhH9hK86<rxj8>O6Ss{`{Un#h!Wg*<%4%oW*kUSZveg@BVNc&$h)vo~^MvITzIx
zjYNL%m|^6dA@(Qw4AFXeGyWi+i8i^*-Y}5SqQniHal3m8*(MRE9|T5zdyRiz$9KD&
zky>G}Xs*{0)D-a~tK0PIwXlI*knac0P6&%3NLKN8-?}xCJ^*PzmcL))p<}q;>#GZ#
z`@yL-s9`@v>wAk<dTi2?=U7^cYi$(O`q2TT5v0HnB`u;_hjwB07Wy5vn@zv3)5sHL
zRe#o0k*fF1J=4~gOEi@zq&JUf&N9`N^KeUn6MJ_zlX7v8!KPrja_V$9hX}25*Cl~S
zS^ad(FRbg|7x&8@!7s)BL9GsZt&OdHtJ7$Qosif+{7mi-YmKM_8#aPi!S;)MmZ^s`
zzf)<a29pv-d#BS3njy^N`IWYo?5W>rwd*a^2|@CkZI;u*N;}Jmshl$-r;S?FBBw{x
z2!eVWdf_)HrcyYTkkcS45>(H7Dp9qz$NxTgrA@b&+IDYYHY1s6N=&mx5h<l;HCG9(
zuTwuYJ5@b*#$m65uT<W=hbj%*x$tC~BgY4^x#czMGM4&o1`8Xz9Ge_@%WcqUw(Ho0
zM`74(VxQY=@sb-IeD;!y9cViUnyskb?2vEN?})|x;4?K(qt*yoe$+<w&=Ad*PaEh3
zHMf@RbJS{d+Rb*m1tknc)QH*<tDu9=)j>837)4aC`C+~3gX}(tPtIQ`=!4&&HnQda
zu_}pVD34_WziG*@^tt77Yg7wcm=+j@<n?L!VYAZ-JM~t*-46VA7(~PcjnC!WX02Wa
zSA`9~>HDpa{Pyj70QSZ2Rx=VSUG%wJ+^n@bFiJxRUz6QK^;RRmzXMpk5Su!WB|P}-
z1+^7LU@X+#Zunss1!w^#Mw?t^kWGJq+;o<e8!V=faUWv|>X?uYCaBr0V`9*1tGz&m
zg6MOE2wc%<b?SA$-D%W){J-CBfTW#fGlE)!uJ|JeRO=xE6@)BM+Gi~nfr2$4s5Qwv
ztdn20-R=bKCT0(F*n|lRNq}DRpG2-`YX|GI)e6F(*$AOxu|vXcuU(Jo^*R}+ohOm3
zMh(Uzws}ycXr~1(go151AXt6`Lp2KO;S<RgmLdGp0?#*tpwS3GFN_eZ4{3vRH=-w!
zE{rE?w6PYUdT0i}+3?!|#Jb%MX}+F7zW#ucVazqoVn_*n=pwyI2XGOOdP4z^BuzsJ
z!{l60XK-G=dA@UdPTyv_{&j`XTa-&vUbdoE9m&+Y=hB+x7qnb!mJ?APU5UtZA{}27
zo}AyR?2FkVm=ry-n;~Bq;nSarRna7w5*VyX5KN;H*(&7=&Ynx*>=HmL|Lk9y@s4wr
zid%T?S*tsD3t-B5&qNmr&a`B);$yA6^Egt2l;Jv_B*S~m_6;hV=G9*8GS11~kmGxv
zcQTeKw(Ly>DQJU#CF8z@Gg_g6)s>{nc@xrg?cBaA8)$S&m3X2KKxtAoowl}}`-CFY
zU-Gge#Ux`JG6#YQ`X|<3u*sUcx#E0-aW8Rv#t_!>+vnd`po+??^oQFMM)AU^-+IqQ
zm#lZ>vnH!xRln3k;#W1g!v=YIJ$Qd`t8ylmxkjb~Llba92+Hy}*tAAEc>cU%lYM1N
z{<R^L%<ZTrZY<G0@bn%pZxTw4tcN@Amgf4p$&1Jb5Grjz>QbeZFu+L~0NUMaP>>w;
zK#uqEwSHB`J9M5lh82VT|Km(3iWf3HipM0C4t}kFTu8pu*@vj;*d$P+^g7ceD*y>+
zgP}KI4iIqu=mlM3mf#9}YR4_`V8)C|G)dwqq47?~ZZlFL(p9_DL?#5+Xn5BN7>%;+
z)V%$f3OkTDXGmDNk+A~1-@($czD{dog%32Y$5V}=MYV_pM%KcC1s~#jISx8(tQBQC
zkd)r`iY}=gahEs?NNHU{;(OxZ&OJZgoL6R3H?S?QnrAeU{DM+5J9%kr`29JT<Me5l
zxhv&OCuiQDUuHRtX75@d<=N|5F+W*=bMwY2*B3dKXVe3Efo^hE<{uvoUPo?V%be!Z
z!;|t~ENMU9FbPz?yBvy%5#?T7G;682WX1Qzcv&SWI3AbHC(mS8@=*5#L3F)Vri=s&
zLhpnsQ`|ve2hh?pdFEYavKfm{7tLAM@p)DC!a;!%+$0Wvv#D}WzdRULFp}bXaYUT|
zy?D2wZlaB6umZrhrMaV>B>j_VGGw#{Eevv~T&+0Io13jBE3V3M>X;78Nno-3*~X2i
zKo~cLIpcJ1>~qaUy)X*RB8Sjyf{Cb;jtMO1aBIu6ViV-dEY)UNrrT1XWy>QK<Xlkx
zl9l&2oBuZ-|G``#FN8)Y#(!uuq9{t?KTs6e7yO4mB>uyyh25|;{KFp|zu}v=uXd03
z4`1bjH|V$XkQ=hl4PI$6c!R=trGYY(szebcNWc#S_sN<;AQH)&@U(;spziU}*Js1=
zH2Ip5hvb2v8#ckVgx;4<6Ed!3MpnNT^f#8?2zft>t%mzWcQzJz1NC^mONKYtFNlwc
zErk(72C1~;46;4uJLU?+tFvTkb~oQ)d{xS&VSjNaqO`I=#3D(w&H;dB5}lcfuqUDr
zQ7(zkF2rsowNofKw=Ev8=cGK#xA^Tqy=5|sISvHl?qD0rX0?sdoXtYYh-MfKukWQP
zPt=gMWv=<}j<#$!@AZYr-!J#)oBQb|Q*(Wn+tqHa2To_+vmA?983`%ZFaqruW5tar
zlyk3N?pCVyULuX7ob(im;5m$_QL2wNZFsltI^67I^{&9Laxffh`mGS{mI`m17l4^%
zF69|ZOT6sq$@Oh+XY;LP_HCl*@Pr<@UM6NpHwRsAx6Iar!fMq%BroLn6+XxfYo#((
zuA)L2);WIGAi`1JQafqnA@*Ecgj;!4)8sWw^8FzBu=$oaXIput_D9$eoF(PNHn<Zz
zXuu8<__T1=ta0VZl1s~ZY$T#&vFIMYI9%tvLL9T%Wp77{`Jd2rnw>)Jt0<gNsovuE
z=X>D#A-~0@?TlcN*W_Evy(%WDlJY5bIq40f8Fw+;y0Vppyg?P&c-yy}o~cj_R72?w
zd?Vl?%c|unow_X7x>d_->c4Cu-2hC7iFSVqYEBu7)0>JTC^X09eeMN59zK6gDwdp{
zJm15?wQzRz<H2Nj4^4SGn6UxKV2M<FW(dM2I5~@_GtM1c2I0}lYl(7B(dfEc9(O)F
zT>;FdGIy#X3R7IV9-=<|Nx;Gi!TdPBA7YT$+0rVkGA#5gFM>5;-<-%p=5K7V10m+d
z>SU{-ap<b($3v>4g8-|BO>bUeKoCH5+MEVj9bq7P`;^XlCsQg6SV{tYW^Nc_BA|LF
z=mD_;{$eiBcSmeSzq1>0Y8i-H2D%pQR+r~{RJ6>;Bp^hke0^6HhbA$IMI5B&w>+qN
zg9_$;9!ou+fH5kjBSg!I_tln-l<V0;U)l0(bzl`$w6ppq69uEoYgQe)%qPbcXOZW%
z;)T}_;RA_=e9Mvf)Rz@i(b6nI`OxzipECeVlK9GwAk=HlH|Yz3j0}#w#*{AfVQ=%}
zjrzqE(|(6z;mAiX;{Ho1bLuL&7pbku-ovJ*S8}YP{h5HmVrx*gV^xgyP(hVW27Wx#
zP1ll952F?7somaPpQw+x9a+^>R#a?rGs{{hod?(n9rkx+U6vu_zF1!$r~#z4J&jd$
zMoI(^#jW6(9+WYI%<<t6-)@SW?N&>dT7NI@6$zmx+i7w>7?ERjK!V{?lOVYVO2{u&
zdQuuhd9b=IOF|A;xB2rb4aQPimsF#AS#dGO2OBPzr5aAL+-o|v6K1is=^U71WRxd^
z|H6xD-oA?GJ<WwEz~AGaJyWaw6+^!1CZu<sfxLBpxLv2PJOWFskET#e)BZr7`1i-O
zk>-Pt4`WZ@=+5wRolE|Q*B#wkb<!wf?+l;BX1u)HoQZ>|;;l3tSJ6onr^1Ox;=Ih%
zToSo}<T=HZSVmS~lRe>kK5F}~w*A!U)GX*6@P=6Ept+|ruZuO$(-a%f!8Gl$geO1{
zYU*`W#&C*QRsFq*Z$uK77x8=EQ9~(xH#onDuZsnm7m8^renAR|8@!;qsweTp8nd*E
zO?A&CFI6oORrTgcvt!wsYBHAX+;8)fe7k0xJjWKj(`PQ0R8zhOTc2BccCb3TkU~}U
z;$BbWl^-P^3>QM#CX^6M*?KveN|C|RKR*w03}Ht#VsY0*Gx~(erB#$ML1aVTP_384
z+Rg3s;^Z_wyDAU#DyoZ{muAiW*-?#B6^U1?t3MB%BQ6T@8IP}++ETOx^OEWTxs=m!
znuw!0JMF>N;5m%z%j~>dn(@$x$?6f`lrL$kDgPc|*qe^2QY>M%rSt5WrNMu;Rrt?F
zhhOqxkA7g=cuP+2<)Z1m%uKJI;>&qcET(96+f3c1RX!Efras0>Pv=<H2!Fn`M}DaW
zYUg(_J($EW%6@j*aSS|p-y#<>fdY7Vg`kB<361dM#AbOQxysj9?h0<{EV2COYb>>+
zl<l1`qP$R8MP`P&fUzkpo)`CMWpGyY0;9d&pc<jTL|qcJ{`Oi`$L4{tz(p6!nveq6
zUN~AzjRds*|0R=g*{Y~w_Vn^g@k8z@<{LTOxf9Kcis*R;k9wDD_Q)rS_pC1eV0Y>#
zQ}gP!joq(n(7L>kNoMj~g}LN(I6k|Qw@Iu<3mC+>nHDeuS9`)uNnWrsbXpBnoflk;
z(`<;TH@{WN9D6Q>8e;;O56gZff97l#nb{l$Vsn_SR%{jJbXe67sCxVNYCzjUs{(#B
zZw9X8n;V9y?z?tZj~xH@CPY1YfOSzQ!b?vbdN;@3SR~l?Y;u|7vHa}e)`(kPae%Xi
z{T5|6-~CW*5~&n#YN6Eft=A0wL@p>O`qftIsl95sg!^G&X$?(WcSU)K0<lxRiilo5
zGMnlR(O~3Q_u%Mr;SI~@V`zWFHILy~JAJH-%(APr)rw-^)CVrXTrRe1FJR%UX3;cs
z+97#P?p`H#v%S%5a_{I7j)5~RIm~W}fMNPMT$cXcQDu&>pkRhq1anw@5&Yx!T`fvK
zsnuuBYw!{FT+a5ID^I*PXBW5RCZvfRPsQF9%=4BvSI1s_b~c{yHUK)&=DJ0TQ`O5o
zlE~k1iN2gnsW0V%iJyCc^!VXykw1CP{EINCRbHUdvv;{+6b%`_1Ci1!TOQACuPNyn
zTUdnY4Q-P0#-&2C@``MvRoxZ(ak+}#UU}QxqpQ_b)3QjYdo&ic%ss#JCSyARsi%L}
zuH&oZTe4=S<>T^I)w_BgKpEoC=YjYEa)}qlxIV67u^rwYj<DZ9NcuBbu4-zHdzX|Z
zt(;%xh$2QZh4iOym;aO%{PY7>S9_i~4Zu;-FJECO`7cB+phdR7Fy!>XzY3m79rLDw
zrlGfIB0)g*hKK<%!w3h7-d`3p^y3XAwk*~IeLBr8r4Q*aP#L`<D<cUPyA(>fabsyk
z0bg_Dm}l&{$sVe6Q*6@eu8E#fY`7R+Pu4F^$3;cset*>&%Tx}gf6PG>*e}q)|Ajrn
zGQkp-<`L(dk6qJ*0WEr(b1ZsXwd#K+N2C_F`Ac7+A#T^-C#6P?vi`B&G97DhrDGd6
zY8flrFSfERLkMkgcgv2sUG-8w9aB5=TlpWh)HJKJ<tM2;*RFp<=jU!T&YOEY;uSXe
zStdpAzQZ=*-3~go>zO+cZp?=ze%%c#$b;{6BrFX%N8WBVJ5Dt_kF93csb*JKa}mEM
zeKDC(oEzCF)`bprpu%pb@$i<%{my)_-xtUB*Q=-ZtJmC*9R^vV-y@Dofesi$H|+Au
zcPxVZ^{6H?p_B*J%L+LP*=8775&fFvLK7&m52S~^|ASXj$>@Ka-d>-4yxJOk`k!mB
zd-8E`^~rmO2bZ5PMe1en>3=TiDZShgFEMraefg;-ef#~NtotYNh)6t1R^!P;T>gOG
zxF00L-g>qN@3<aR_+x((pM4BIeWY#-sC~3`dHLy+_v6msd{~w(>Vm7L^Us59_gC%i
zf4n3}FLwx3>qK0_(f!|7H(+kBsn;U9LQD>;KnEv_FE5E>9ZX#E9$ZAq(JQl0l8zMJ
z+0Zk=P4&i!2xnwAioM&>;OASzEOb9+B-*0;lgtsQP*tI+Y=V`fcOG_PGEuYgpWa~W
zy87d=H=a$afdC?tC}fvgfqF4gsd5$vz*(^i8OJBd{)v&Jlc^ebUNB45%Rb!6d$?Q4
zM#ig16EeUY<i6BbgDFS$9P2*ieYRs&wo_Qy&T^IQIz6*CDXFjIeG6_<fAzrNn}@^~
zmh7CCcA$>QfVpEmwWj%<%4eN5)~}^ePG&CFwbj)@&c;8RXqcj1#mdC7$iisDny@uj
zpk)y`uI@YgY@SaixHnY!pSUi6L**;h-swL{U#qC28dQUAR3!DfBny_d+AJOn%h#&`
zjb{Z~8<PcV-+sIy8&Rdpe$}_a53<AK)o#8*atHWs#~vX2<{6ObPh8WIgL}852lq}5
z?jnhMw0Bt%%uWN7k2YlGs#N}<-toFa@ff?}uT`46Rg~guqg_1Y=#ZWACEiwY0~_8I
zAsrXk>Rx<0exJ6t=NRzAL|O2{<cZaRF<Y}$0k9|z)fSleDYHPKF~Y*<`-@_wn3^@A
zcuN2)`Xa{KGQ<Y6cMsK_==<-znm)D0^Wpddw>7)4qL(p1T3a~PS$vBL=S@aV+}{ID
z^QEnDo`Dn9pL{#~>1}SM;>^9W?4@x}1mHYWBR)*mI)U_bP5B7AvO6Qb9SoCWx{A6b
zJF>o>d6!$mE|!3z8Zz&4YdE|SrhpqOE1%jUbX9{nWYT}gGS%#`s@Z;SQ#l(2pp<cc
zu=68-mvd#@F!{cpY$0fuE64Z4s#S8P$MI;-uPTDs<a?)w421C{o1Tr_>Pcy?=GFiF
zD%Ao%aO%u`QqayW<)ePeogpjF#r0+TIIqvU-s;xV)oS&$|BH~=VnVWw1{Bn~!3nzQ
zPw?Hf8*RBu0kTWX!u5jDU03Y*fZTbwUNp4pc>`N|CLY?;{*!lA**^WJXPd2u>{GBS
z!5(Bc*T=Zbbtq}fEOF1N=HNcq_BJc+4EBS2S6|<L(VOASKgrz<U(=TN!(uv4pm4I;
zKpR7RuTG|5<t{6uA=NqKUXz`hJ2dN74^LZqsQXiig2K>^yl)k2`|zGK5A&UI<5s~a
z2Jvw4uQ<nND|)R%*M!B#Cn>A1;|Yww8Qpx80c;EALnN0anoVx6C3S3*kCIai-Vb^b
zxJd9Dfn@aE+jwLM4mr?Q#zKk%znb0S9>#bcOA}QF#yVzdp6{72CMA=5X<{;grZ?k}
zIu#QB0UZTMkx&YYh^tiC8ZaEK31tV^RP<z;NI-$9br~$bFQSVdF2+OkQV@{Gm(z^x
zF^k!at&pF*>*V?r7Obw@ou`7<V?hPW;WmxE;>&qe=iR~hJZ7FSHx(Be%w%pE&fUtp
zfHluXwXS68TGA-6X_Ffku(t$ohJBuqhnk^1s$Kz@Ro+#)SUE`L<mT2XrF@xtow-WE
zsgVPICBTP>)m24l?__Vwh_kBLdFdKhNAw5A1$H(MD&<X*IN&lV_$J0mfrQd#pK{fB
z7vm2S#>~j5u54hbAAvSAA7v!>B*cOq5fi=`KEIoP=#c~LMHoq*$q=n?94}bUJO|`R
zKsTk7u|eti#+KcQzH)fxL}d@u-G`aN{LxnvSy{=A`<;ANU95ogW#+0%b}&-64D4o@
zr6^=kG-I{9oXc~*lscfs{<aj_M^61yPP~R%f8ZWEXJ4##r8#$&bN9vbw!&gRxoE4K
zQ}N6AE>*XDlB)hC-QF3dzRPLE8LER0SBMOk*-$%Qh|RMYA6TKPdaS#oYZRqr7o2C_
zZdY8DRQ$sMoi1#gigQdAl#pmP`gVp_ogxw*ZkLp)WTRaNP)Gx~$;UE>{f0(V{FO9{
zqZp)8c}l4bilNnizcM?=mu28Nm5R-aKEhmdty`6v2l1uT3yu~#?03suT5Hm`;*Tjd
zS$-jr5|0%F+Zc!0&)$|R<|gS)!7U^cA?n;ik`Ne~n@FA&KA*nP<!$S#q;q|_3n|An
z?G&vz!S!h(=Y^zObjXPSx3{%(1v?dSu0v4m0CLkIvzw-PHg=tV5J7Epvq{9Q6l^m1
zVv35G<Cx~Dw_prbV4pX43{If5?HRU0p|e!Ij&D2$AVk;mf;}%8aZql|ta~+Vx{bU`
zdq4|D;#NO@LMU`NpEIiIVBqYS9v@bkGlDrQF~(#yHGe>M<eTl)PsfYeF`}0I7Kb_J
zi<%RP=!74>#gKAnCNyYTuk?=@=yc8@&+!sGDPxb0%|hv%YuLQnKd#KDW9l5`c$vEN
zLi`phXt{s95XS<ay-h{)i4|KTz(EgXVThg%0o69te;N4Gf-t@%u}^jXmI6MtKu@`)
zUj+D61@2wX+ndrpXOav>$24ZrFN2Jl=T2D~6*Z?o8j#V(IV}cfTm~np2uU^P(*ktn
zMc_&))XO>5&cnTwKVLBrsHoS*i+UK?EBj!&-lOqeGkznx-o8L6a=mg{+3AUEtBgDr
zJC)@@N6mqag^8SpDAi?*rX`T29^wzPJx%$(C7G3x--)Q(E{A``(|<3t>>RdR04^(7
zZjTz108I<o_?p(KcsSg2%jVa(JA`&iSDV?q<dkuDF;8*Mo?FX<&)N%}1B<|va@_`t
z90%D0dH?Z8@%dAbi#+8N-nMqBs@wKSfiQv~j1}EX2BA2!&(522;dg*^8++>EK*rq%
z!5DYbAjTFHo1=<Ef2qVq2B*zi(Ku~S0b9Jp68TunVzT4ZKbEr_HpuK^49m#8T`Mh*
za6Grex$wp*`(@!OTzr;d>U3A?{%4>k&l`_{PKF&`c@Plv!^t$IyNwST$HVO!N8@C5
z)T0Pfl<ETG$)>vaMJaeTp8laIi1GZ&6vUie{Lz^3;@9WGi$5A0=F{-ONb(vj`3t!F
zuZ91v!yNJf;OFDN*Mn9ojsM>C8_h5H?|(r2_lHBj=TzbCUVvL)$D?!aJLuNS-bMSb
z*VATHyFediHT>`Y0%rWd;kVtR{p0Um=7YwY*GnVAd$}u?)W)e}&f)m{<a#jT1brvH
z<b0AO0+&3qAr?R@TV7O%J3Xg`_%7SRBf*9XJI0%CzF{{wJ7JjHshzfMeZ4n0r)OJU
zSpSRXHP-nwfWUh)xu^^NS-yovgce%s#o4YuyM-o=p}k(5Li+RD2>j+>yue@2&H4L8
z{{+ECZU$5ugAN#QJ$FO3N|DrtcVQODzqZ6_zkPLuSX7<1pu+qEdHd(w6o`+%K~e}3
zXeBI<Pp7FTXUSlgdNjC>&r^>uMs~u;`th^t`;Z#^QSS1+EgOZKUPD2v*FiOqw++R}
z`FrsewV2d>_>iAVpgX_fyb4qiOuE$-!g3K!(8Vx)g^Vt--EVE_s!XVZ?gLlY6@RQ*
zMajRTv$HZTpB$mLY2)zuwMJz{yyXE_y#M`Q&-r}Y3!M)Z^kKy-OTw*6(onzd?JnsG
z^g(2-kXyj`?+4@CsfTrUbao+*o8z;y+X-<oh9ZgdqsYG~$1DBAGw*ivVZf*EP@|`e
zPLsh{CQd1ri8-?UT(v19R-o7W73#nBJX|qvoRh{ZAxOd8&6w|koAUjdBI$F@992`>
zN~-e}i*6iEuXMn-$qZ}$V8%&N<zhlsi<_5{o%vo_hYHCvIZM2@L~4@?DGKRaCmLG%
z;5?bxOIBH=LLqGCW2~3A(|sFXPvuFudxiI~O{hCRP%Obm7#fWr2pX-Z6V)4yhTo_s
ztAV$+qI!=76`k;^lwqE82e)86x7&Wb6XC<EqiGwpD5!T@VW-n<1wktce5&h{$%nvW
z7>ua-7pnT=g?^K}4TaAuXsVX@uCNw&G0)aQ<@M;C?r6x14fVn1mfIBF_Z6=yCIPlZ
zby>isv#e>=ZR#g=p+m#-$bD{4_T(4hE3rU6fU%@`P{3IxhkBN^f?6~5n_;`&>NNbI
z(G0;_VIFINZ`!S>5rxeth}z(s03W)n71V;L5w`s>Y_?EkvmFrk;c>f3Km8_N*MqR#
z=Eq^JSr0o+za4?MgL>5XJU)~;q(7Gtv(-EiC;l!O(aO&K0B*EOrCE|PH+!kv&0A#Z
zY9`7zcEw!U6)(@t2TQZ_!4mv@P{7a!`5b*fEd3wJFq5&G&2M|uxd!-><8IXI?Xb~o
zcKoQ(YPHcf=ymA+C_8rQCaof!pc#S$9V`OPRutw8zfo(~n|{>6XPs75Z`JFacB94L
zfdqcL-o|jFR-@Sp8$k<HiWGU8?Fe5sTkUobcIx$dvsv%(JEwm1BW%$>%?7IR(Msrd
z>I-Ow{X~XS%!Xn~a<yv>49Ukz8ezhs4j3i$!v^NAS&y)AcCaQka!BWNzXLM*t$GA*
zt2d)Yr`f23Sopiq0Kc?a4Gg({Kx=Z?B>w8un|0763|g3?$ZwK=t3gxc(>$d)FAN%W
z^a@n>!*&2#qb01{&1SRF^ih+T25?*u_^=1+olZMK>k$_9$Zxm9db`oUjMcN#TCX*m
zm@ag^8G;63tKJFgksq|dKT*fWbc(uLn4U(v5!C4)FuEUNsvGTA=%XJI@lKw^`1r0#
zbVWZJfe#y`9r&FHQ{03N<J%$u+j`zV1sl4hC%ImO71ZfK7TX|x1S+8~tQFee_;#bs
z60&fbAqDM56f~hzAnbLbKJhBJwo`BWK_|qNh=~qsoiJzyp&#LqA4EYT>IAKJ2h@VL
z2uPRYOm!Pl2Gb7R!7PADqB_xzWD@N_t}r!iE(PL3{-9i%J_!|ZTC<7yAu|sQjJXLr
zIZ_;9y4%!In0(Dfgi`1-<~6K`=zN{TT26Qa{qOi4a9syX2Zm^ZsXE|lV(2;)VMOL+
znCU-#2FCM2R_Z)>2#n)5+I|Q02*L7NXOK+_{Yb)w0XEP)m>jA)z@KdltR5QnkQwP8
zWeZrm@!1AJL0!K;t3bIvvQ_|jK4ln_@ryj(`$5zzXFVNxC`-pxi;$H?9=#=!&bcm+
zV&;kF^FzQ1k>m-QB>r}4o(WvPV1k94Ba;VcH0~wSAAb75`Bo?^5pYF`Fd;F104*Q2
zI#9pRURc?ED1)XCp=sgYkR-p>@E84q&v=CtJ&Yu$_|Rrh1XwGe{jmf<uR(>hNF_v_
z4lIW{N_wBFrb9mT9M;5o9YPB;pcO##!z6<E;vXz~jfguGLBV46Zc%3&E$9x2H>@Ol
z?$_(skhWTGhw8dRuv$7;c0%e!9eOfCAA$(WDilKlMoK6efCeYcja3aRcL;^r!qSHh
zg|s|1>(K4gZ_{n)IjoE==xQGd(1%j?u@cbg2_+iAPU2d_$ZxS$MAw5x8~UhDt8ECi
z4VwkpujzKXp}W1*4EQxH6JbO)0Y*rQ9+p>tQNRp97yJOk(fSl)Y(UvVYeG|ZA}j_S
zEXc5-NT<@;5B=?qF^b4`0@0v=LwwrAO54VA9%8A+e1t9TLeL}=2V`i05YWb$&L*vg
zn9zF1$8r|AV{GdFL~YEzU&p^d8lO5(%V50|pg%Bp>UA;3CM~a>Mu*PqFzX;9);=r&
z=nslTAh|WhHabF+0}BF{cZhY1o`Cl{tp-+%hU_#f9ng(f3GKFMIqksQZeT)NVVypY
zTvCLXIn*1GK}GTc6AN9(%7S{EVD!2mMM$#&vn>QefIi?0@CFD2hQolcXt|_l>28Df
z(RQ8IXc$-l1P4nOW~2q$qF<oE(sHTm1RrCru=Ya;n^@c1u*)Gw5w<X}FkoE>QnXvd
zqgWzaU<fSZY}Bv?26L56&d_F|7C4M7T3DQ<LLfg)OisN`@(WuY4DHY@K)awSj8X_T
z)@RIM8!f<^fIOh65R-sZ5|IhkGK2^<LaZT~egu^i(pJEwTiehrYGI;jJ5~?c5jEv^
zh=Q0}7#85)HaAfBF>z2T?G`BlFaj|!>}IkHFkaY^))=vOL5Hzt2}$38hlwLvU{=^o
zm{Dvar0Bu;K<{84!8C!0!@40tLt+{*#>2>_TbpzuTeaW_a04prG|0$nwlM^-dnniv
zmK4=Q1~&R0kj{XO2ga+@t}{wYDC7YYZc`7*p2GOsVTfLW>UFZ0np`VPCx{JgBVam$
z{4n_&^gr0kka<W+N(*&#w*%4>Xd1Q?7?ccB@KCdk`oTst`yHvBL0PEOHdH?(6?&Vv
zD<TUGG)EOJo4m_uHjp+5*@p-w@<|6b$Qp)5p{c<*BAdLVguzlx+DE|PgRX*>zyJe?
z5Q%Fb$cy<4Xk7t|MPxg-$XAIa5Ef~K{btM2mQXJEJRn|a6H7rcwP??YDT0y(Nm6rx
zPheJKTNKd$VZCeOQwUW=Gu@KP4BDWMzS9y++}h$<L67khb%za>2x~3^-%JKs=HOpg
z*O-b{3%t{&sliCFd^Vbq;2}H$Z{z<v5eWd6t|l!{&<4ayf#2#_gG4zP_fV0L2`o&I
z_6BVhXw4-0)s;Pf7C=O-)!=b#acD~zVvLw9Vuz-ycQ80;MKDa0#DLatT56!;Ncgc0
z7n1>dA1u{EryxNAjP{7OvEW%y0qsR3x@Iz9^V2Q~h6Tw-$a4rP;3xPUByC9ffe?V7
z>$J7R|Ix_7tWd{IQYc~5Pt6%57CTa(7z@3_VnUql^XQ46gw~}oHX*rS1`Gxx0!e9q
zgP-6UpE$vtv%t(*gx-?r17;>hB2@)SV9ywlNJ4jsIm0ZGk_V-lP#&aXB2utm3)pL<
zQ!HB0ZWnV&+Z?P%(5w;2fOQ9w5n_ddMey*o0$ZyV2+*SCEWi@jZlY-{3y@%pn>L9;
z=VAO9ag!W3(61dZ6M3q@Ky|bok(Fc#B{u40QDPlzk%l5&30h+(+9WB&&P~i27%c*I
zpvOQ3Ff1t4B4KD_Rw2Kn#4Mrgkn0Po7U~5=fR#Wp-eE>0P1V8#(+)4sKo+4Ti%ewU
zG@A2|Tv8VPQ)D79(SXgQSc)(Sbv9*M;8XGx!lr~?g3>2e6=Ve0WBwa#e}y0p*ayr9
z^8<p@Ci+B07V=Uhm+FVpZK#tbi6^-@J7lzxwFToNBq_t1*^(AAmi#)*BP_HKZ(?KE
zCv_;H7Oi|iCn&Oz$uYKY0+wsQi0k4`JNkp1S|KmK9r9&?isZf`O99p?G?X-Npd;|C
zO~$lOwlS7VEGF2)U=z_MS5c9L9MalAW+1s-p_;>pR4^2C3swsxwxPBei$61CT&6cm
zjm;rNvrgtKRuW7Lv?9bEV(s&qjLy{qv5;U{Y{TyBG<e}7n+T>`E5e?yLDm&?b&-V(
zBLHR+xl!?d(2-D5P<=2B+q8(1xg<z|9S(GChZbns)^^C3hD{M!l<Y#X&4wlGT&B~y
z&Ls%p(5>N<-h!=1Mg=*vp%}4UZjw627BLXBK^}kFZ?v$6k@$g~$<^E9l_jEGaFK;f
z4oR##*z`e|v8vX|2Bdmn$#!U4Eq8IW55!bsjU_9o(;>$IgoHQ%OJS$gc35cf$4fSC
ziQW<;T#8=qKHYCd<Qxx(<*_fr0#DXC7B2QX2BN1vITgv=qjQLmmMhvj(ViaGQ9ved
zp@j_j@yQGz;Xo%MvJucCxmmGY4w`jEih$08z(-K17yz^?Z)4exhRyDfmweffW^7b^
znAjL4>?Ro7Xr@Ul)$$)v=W_jAh7g9;7ZW$-Pk<VxBMRtM?2<9d7$l?wTPSiiH^gQF
zyJ_een90OY*nGhvf?fj=u!qHe>+M#tg^bMyd6NP#Bbd^sy(~6aSfC)MEdElS$><hm
zDlj|1WaN6Frx-?^JmWUupg%EmY+oBKGN&VQJkVhaIfv*>OjujXd}dpP3p=pnaLi;Z
zL$;+R!F7DJ$hf4fbsGbM?jy4wTT^ng!&<0I3mJRijvr9pvHQhNAOBCoAh#0jp!}fF
zLMG;`(^iyJ2RZa;!G$47o<80$NM?sn@P-EhPi`F8)Qt|Y7@ssWSXEo}1$jc~4`_9C
z1$#Z>>pJOUEb1Yel6*iL6wm>h92%WYR6sLW1x<2XV(|<fQRhSYx#ZZFp{}WO@r-E;
z*@m={hXY&QvA62t?DUe+Y%sfb^21m;03?i3UKW{jIy3TxUkOZP0#fej<BBVVMVqnK
z@wGDB#pkQ;8O0A*GDTU+5sq@gA3nd7yfOV;wL)QG%MRXqE?QdgbOb`dS>r-ziyFhX
zmQ_6}tJ>n&(sW!YWs~GEaK`1?_Cl4)L}CANm^x!)#@>Js<)(=@1=*6-c+^`}31k$$
z=;uVKo(|4`)hN~FLbI)K)0^=JmIC6+5y$i$_TMl8j>hBJROOhoU+>+`CUL<xJ$*cv
zLW;tWr$0|-$GWix91rTDq)r(vZg<b}ywq6{1$zG_KPvBFz2IE@A@Sz=pLDCOzhnbe
z=C=Bf81%wc+kcr8_pzSS>cisFi`otRzbqM?uwD>a^0>(KC6By|<JZyhhYPxoQ=Uz3
zlN>E<9~^Ia<%cW|mD(Al3+kTY2i;VsAEfGTukr6|LJv&({j!yh-8rOA@wEXf@8yV1
zIz-G43)?W)`eoU0>B6bdVuS6?QQP|1TvbZ}L?S}FSYgm76;IE&m6?&!Y!;QXb2rMa
zlTNV!zX^1S)r{;TW7N^_g^Q0$@tLCo#X?l_3eVxy**m8nk!RC-RbAQgpkvDp1*>_g
zwzBacK=K8!$7;d+E*Vl5kvTxV&QdCoeAO~)Aeg>OCJBMDI$tn$cAUM}LGaO2E5!-K
zV?w~$w5%%dYzw|`d8}s}-Is1WRF(paoJXpUB?DHJo5gxQjqXahfXq2q%(@C2j5(d+
zg+XazHHl8TIR@c&`a$3|0-}ZiYODoUL2&0-9+kpdVFkIsa>?cbV9#rgjhgiJ7N@iF
zVnSEzPA9n*=@>gFSWt0qGpOq=_T3nLrNk~6R&2eIzDd+|fJ)>A5U<FhbHA1WK?<ey
z)31lc=CH%>1&oa=Sq6};2#k#@coIOij5>J`-dah*gg+c}O-s|#plf+0Tu2$C#vXvU
zwj|zK$pJ}wB8W5#AZ;Ez+Dg8LeJnPb78y%(q6+k6Fq<<w8B{ZaV#erY439e>y)4~w
zDGObuQSuMcdR`-!bp@Q^63F;Gw1t>u4@4~cSo%D)GWp1bD_ah%ELF6iMhmX25Ueb(
zh>KSC6j)_XfmHT%IAy;UO4&S!va}Jg0HZ8}P-Y=qFNP<}o#grWWS>kf&E>GkmPaN-
zw=HC{LR_*!05T)VInXx5YgrJoMN3AZFfT+R^SouS$Q}trHox1s;5=gKel%Fv+!hU%
z&LiP*3@*ge;lI2df55KkVjeU~G5$-V(efK9{1-a3{DS}TN5p>#CH~772p7oW>%)KS
z9p%Hfh*w$YmQs#QZ!cQ5S<hlAurNyA5<5Kc)tvp^4ON)h+tpM#xtS}<FL%nR#p6_|
zxtq6XYWF6ZceZYyyK{SrjoXS%?)CNYjY!qLEQU5!uq2E&7OpkW?C`~*la+2NKIas7
zr}uVms+6!`Fta>7!XBnJIi-d3?Qn3FxMQ(apqlp`p1r5<uJ21SksQQ9CQX4Lp!N6^
z`v%MqcSUOoW{iZg2MDrK>Hc^)9DktAQ7^$TCK}BmYi=lBnalV*l{r=i55#p3S%cc%
zDCKXHi`;cw-RpP)=AkThQ>!koV8gt)q?6=i3B>ZHpkE%6s6}C2mlS+;5j42@!t8z_
zm3sTjgTy?HCu-YEVqeQE#aj|-?$d|Z^g@wl_Qx#qssbD8dk#Iz1ylcwYOC4@DK}!<
zOb&7Uj{(4M$~P4mY4NF;Fqz%7)6Y^N40tII!25IXyB+XuF_fC}f8sTdNMR8#+DuT{
zhd>>Rwu-=x3nzgKZdicG_POcF&4$8ndhnVxCmSkH&-7ty)q=dHRMCRers*Y){)lfF
zq5&nX)RV^HPKmG9vT3MRJSerP=u0dYLb*n<nxr%{KlcCAD>L{;h5puq#nJ}T>2^Y*
zLYYsOS~+Uo;fR>~UMv+;SVSuIJWA<>B(sXtQsZSW(QM2rKRz1_2ecscZ^1zXQ}I3-
zj&HPAEOTsQ=dgXanil(Y2)N}98jr~4qZtrt)@ImbbbGB2H1ixRj^52UBV@t6RB@@Y
zd7p@*#3dB>GFd7fxIj4G91GEHp5JD<>|g5Z9ZEc7XF^kc9r@pFOY_O)D0_ILk<!!p
zf*-Pul$wvurpxANIqw$oKA$BY+quzfeY%mUlTKw)-=>$6f|5If^YXk_q+^cyd!Ecp
z1cL<~C3{<0P{6`pS+YbLwk5Ziivui2_2w3K?u$XJ;)$G(o>jg4rw+lKMyxlP7R)=l
zhD>cKyXmn8&9SE!fqBMA%9>eZ!`R2HgORQyqqCh}=|snDQ*nIQB<Vw?j?VbZ`Z#6a
zySo@wOPSNmV&vodq*uWQSRxa(Sz<(>!x1kG*#lrzg>nlVa}u{C?iHG3LhF`u@BQb@
z{?l4m9uu_tKR5heaYWJO4c84H_NPxNR5jToD{^4A92W|)eSkgh{Mw?AyCZoF`8;BB
zI`KI+W^2aTcx)XwAJ;60qx02`x9zP4gl%x+ZGsaW?D9GBg$qVCpKR%acxe+(KMZDP
z7hc)8wpEbP8Ofu+Udn{%@QXmbKnv;^p3Hip5dD;fa6#!%m9~_<v7WPqu%^WrQ>qjx
zdK#d{AAB+ze*S{S<fa-1AWXPo;vz|Ewus(0s|r7F%`1m1gVm7m7-hArTsd5Lb#<`e
z;s5d9iovv`Dsn^6WO*G-p?*tV^qzAc7luQX{v@Mu%Y&`0!2650iVl-$_l$m&>DTID
zEv&3j&uLRvcAj7?)?g|%(tTwO`APkMzU4ip{~|_8`A~Jzzv=Y$I+2+msrHL_Di1|r
zg`dqtAaHq_V3oN&_nJYy@^rkitf7(?z$`$l6iHNo;U@k5QPNL|mUu1&OfJF9CBhOM
zYz5#I3LCTzpPoOr8OPK-s1C9<)5EG>EGUM?dWUrr|6}FmmZ%tAVAW5ba|^Vd<+ZXJ
z$aZwVjzDqEHc5c6sW;RtMHyy_@Qcjns-pQ_E$hcz&#LuP_hijWwYHkmj2YM5_A-`N
znnu~G&Fth=xw4fm@(XCkVgox9?FFv9abbW2X!Y_i;LC-3%UU<LJO&M~*P@8-b~0$-
z0syo}1bVOA?Gc1{SZlRAQKL<lT?jD8FKYL3A>K}V1SlTX>UFwvR<8$5x@p#F)LUU*
zn~x3dwp$eN;*3wyu1D>FKr`qvaVH1~C85O#cDnUPgmq^^(mQ-P%#T`x5>Rgt&R*Dv
z2%CV=1KM;op5V?lit+0#Pm^A#IS{zYF~KAw%R~gHV!-v{YzI=Spx>}M?*hf?cs!(K
zDHYR90LL-3L{^jg`I=nl72BF5i;H#nS6dEfwgdvtiENj{@Cue-V+vNS<_UTFSSTKg
z6sSY5&<~GuSCuRg=k9K!vs-7+3TrR?7Uqc^yPg}mPHRebo?s9;LF&0PNi_bq#;ALa
zLq@Tu764%@8|b{edSGYf#ipx0LCRcb{Z6J|)ypkrRw@jV)~hAb8{@OoXi~caj;5Kv
zm{}OQ3zoD+9-w8x!wC<>_vwaBKOZ#QEJ%IfcXZGVqg+J8dJ&4D7Mx-f!)O`g!e54p
zIsUDsk+KOLtLZlwKQj#41wh%&njg`%UII>QbQ&$f%p~wjx@k%<Tg}LC2fQLGH*ruS
z1iX5q(QE~6!U~f3*x7Y^>Dg)hK4+)M1vw8ndM=P-Yva*#d$8=}9^}vN!My1`s212|
z^U$K_j<mh8JTz{z21A9QHwc-jLl9VXfkK=&<gnI^I(2ll9?>Ou^t9P(+BmSyT0IO1
zd(V$R6^0xLI~ww}<<pI8Lj4Mx1SRRW6iRdpL?M`@C<qCckkEn@Wr{4wxwz2t5k387
zIl=e+R?x(Nf>yi5NXJb=4D&n9HsM+l)^84(^jt429x)~~fkw4315Ll(ZZ!y-sYB>u
ztvbOn6TGuP{EY~9i_peF3dUR{Bs4+?C5yMyu&06bJmE(~grX9(2>s7*G61*VsCRrZ
zXK1?93>yr37IYd>9gNoq$!={1gpAo}EA(fYXUrx#N;lTq1T{m@V^KQ_+bOv8<qU9z
z`ursG)Tj}RA3=?@2yZ#86XGK%9z=}>dH^15=Yl(XhHJ4XHhlpq3bGTFW1E09Lc-Lm
z6E=W=b&hIb*kYhBA4~@NhXhpGY5AR2*!J5rT{fz7hu|<99Yz9gM}&k$U~HJ|Hlf|r
zo6O}Ab2(x4VG7#ld_<5mKByK1VZBaxg`lDXupQM1*{w-%fCSn}{UCfYOk}H5CoI&q
z;06o~g4ROMf;tH<K7&dlbT;&YAkTxSZP&Zxe#QZ1USa`S3h?;<XYbwn+qSWU;pgAS
zpMvean~`iumgJ<J+Lm{c)NQ?++t|)bXWOiPv_#o-BvK_MU)s&{+23;kKmY_ON_MVQ
zer;lt00<l$oO5ulACwtc?+Q7RVW!%srgbPcfO&&?lDMs4RItZV$=QkuM{OpdxyZ+^
zX-}{cDM{3($qpK#{a-|;;mk-7&NX@k6BCVaj&&(`<Vyr`MIc$LM^*-+k2K{-E7S(c
z&*~a;Mi?c5X#P!_-!}hk#Ikx@<_NWR!z(Noq@k8M$R^tIYF5oB)EkOgO|;T2wNiXV
zWo_~OHR<FA3=09r`$pJoW=mtMZfGSO)e&(qW-f(>dghR2H&WZ1uGkL=eMVu1KVvF_
zyjx`#{#1TNPG_RblZf1BBhOFH!Wp`9Q+r`&^_~ZQM7o*=*_ZvRKzhGYNEm$QWN<N0
zaExJq--y$YeK<!rXcR|H^t(Av<d9?p39f<&77c&FHV@|H8r$O41PR%t8#usiJom1B
z4&NNe&}aB;h<df!0Y=Z7JHRfb?5v#n$~n0quyUsC=)({VFymW<*@VpCqfs~rDOB{r
zC*Z@(#@I=Z#zcETT|)lw#z&Cu3Pc)==R@Pb_T%XcNaDCPm`<_H%lQmeu<QML6#%G9
z+Q;+RDvnOoU7S9&XHg~B)BEt*cwEhAqpDxU<NNWeFkA_&s7~5BXNbBd&B`3l*-*bh
z;~||f=3ce7_Ac?Gw>703&7Mc|F+6EYmsUeltB0eju6xC9ug4<vzoJuyaEy^oF%1GT
zPy_b*&D>}Nkw0Yz8WB9JJ90FXCr6kr@4}&jxT+I&=EGkznIS{b2-m7OW^EHaC&^lN
zZ;r(jd^{D%RSx7Teln{-cd-w1nF~#^OVBc5kIMu;7E#1id?2gGu3AQ?az|3*wIjk+
ze2-gY&imOZ)d-nR6b*z@`{$z2dI+dJ{3&dBOwt09PX9b;nNDCN^-TJ+$u!^h1c|Ln
ziZDe8HKMUn81xiX?ypV9F>~xu;*%h!gbrMH|Fm$#(P>%q15;aJm*Alu+eeY(g2D)z
zii&z5=CsLAn=l_JiA)19kD>qsuoQtOgeM-uSUpm^kjuEz?!~j&IDn3C&4Xt+!CIWb
zffT?V<O1Xfd}x?;xM51YFggtdq`_^`0K{Vo;dlaUZw6F~JjFAS5FHB9F;Cwyc(#zz
zk%>z4wRDm=qowr)kbk3d+>S6Ra=$djf+<rs8q1i!Jy-Ig3|acHKN@0WVf1^D#3WCn
zjHzP{3759tVTzE`7|;W6suRoya%%Pbe!pU@bDb`dlkIcTGCr)&ON_l7oj;0ZEZ;1m
z_bZJ315ACD!DZTnhDv_GKfRpN&95OW7dKg(=V&G#rw6*5GR-8xIJg<Xwzgpsb(<UT
zX#6$xCG{2g1)jee%Fr6|xaa^{{{HpHTCw4g{Xv(z*zvX;wQL`wO8%=0^dpuZt8`Yl
zqYrD&(Qc7Tsy0VKC1f9_D$|BdJ)~LO>4PEtFEkfo&Hk>AJAo$~H*;+ur$g)bQse2W
z{$S=_RVJ18c8$hyT#|#7Xg3x8hRXoYb?P;H8B90G!>Ekh!tz5w|7YH9Fq0AiN(#XL
zkXeX$u9OGEcBB*nR>Pt{Lh!2`mnVm9S;yTX*cf_$5mRP6yh@;Gl+L7rD8DY6N%oku
zyVJV-H2R0zs!(zkKk#au=CrQ-DXj_O?n=yWHmA~EM8n5B@4LQm&8Dx2Fl>i$u}ksE
z%h6*zQ?#4^=?~(w@Hn2)joqZ)2XOzG<yF#jhZd<{$p?RQ+5%jLIXnyB25MvwJJFLy
ze6YpR_i0VMd!2vxTE4UQxkn<b%w7nhf}bS*E&fa1O_?{7x4occ7%9IeMv~|8Rp6mi
zqOD4W-J|FkPdv`wFo=_gu~Bym^JE&6CVKiFpqH5D7|#B>C|{zvr<?h@;>ApN={?n)
zOi>$Q`i&{?EA7jdy>JUYc}Xs5B*6k^U1m)^Z9xKn{E5rPrJ_t8ZZN>_ZSZ4w6-22v
zlS_~aFM;4?qiDUbnLnM5Z(l|r6dogdsDpRez!3{uJ_|-h!<Ik1jNv$F6xGY7if*(t
z4f1g|(9Yc!ChS?T)0@7oH0O8X-)2+48N5?DM_JlO_bI-cH?<oCZzeU6J7Kv7p0*gN
z==)CzrDTtW9J}5MQ_5PNrS1l5^Ys(QHP(o!LJ1`#5|?nw4&O_INtlLscx@zaeJxf@
z$5%`WSEME}3diG`?Z8-xWZi?%QBi<i?885r#6!z-bUKCILxLb=WFqnVZIS<$uI|Dm
zRpcVCWmzoMEm3P$<kpGf;^%8q;KkiK7E|ri4N-#!Q7(2KPHh_qMMfH?9Fde4cSP5o
zR5g64jj4M3pq=5cS3_{kN4}n6qf_)+9Gz00vvxxqDfw?cVk^fZTuq4XZe=G{9@)B9
zW<WO^8uL7$!#aznNVC*IHD-nEUF8}OtEd-{0Xk(cntAT!aH1)b>C$g54ir-9{W)Gp
z%nOiAj9oSZEg~qp?ht}CJl-ClqV+6l%D~_1*ms9}nVt}$5LI)RwR^gnmfh0|?r0l-
zkeh83gN){rslj`VmXp&ep0}@c{^))B@V(PT_<n#}TMfGP78(tNhWx#zi?UMz8+LP*
z2)H<Ll(YYrltoIjH@}bkQ6*^`b9Ut=lN%ML0$z~L$eG_!`+09cw`J_m1^I;UXnJ-f
z|81<2L_jm$-61PdS)O25`XjP3zxUKfE}J8!5+;0xA2>qaPw&t2xNP<$o#1QucR&g_
z4jHbmoLuStOqSM`s<bHgn@nj<(Z4K;E~JMg341}W4?*AEIi?*{zgnK6Pn_~gFTJ}E
z|ANhO@Bp`4^84ckPlACD7|UcHPd0zKVXy>rKS5p1t#)349l$H~RdxdXmFjEMof&gJ
zN4}G#;Gk>j;}#{5uD5WtpBj5HRpE-mIFp1I-q#328D3`2&dM%T*$7C1iu?^#tHRvj
zp4)_|C}{EYim2i8`O8<DLlwfj;#A(1&L-zhS3NH}vOrr%d7$MZaro7hr$ATUbQR~j
z5NHdgUF0NIyog0#bgYFkx>wES&S|3WkpKH$^1o*x*3KmG<@w)tuzJ0f|NWb8_nUS8
z_b)O3dq?Dd|4*Q0f%hT=@*a8nm-yS?K!UhPTjL-YrY(Qan!o)s<bXf<`|HQYhyCN1
zFHhZ>;PnRieDGco?}a&oM7ryFr!CTfqiwa(ZFK7thatsHAX=C6bKFkN{!4w~I<hPB
zKv?||?p%^K!8{X~70_x@Ue2xI6`No56czA|G}IDRU}3brv~BnKi_ScGB~b^VnIDbw
zUeFZ=(d^~-FL%Y&6Q%cBw|REk?6QSMl<+MD9KmLq#Ap5)NU;~;a0pw3*G-Z-`6IIK
zC2!yf8YvP&#cLM$G4s@{fxivt0*ao3c;{1rE2AWyP+HxVtM-+V{9Nd(bAcT~UoCXi
zuw%jsg#~~z)iFKV$G2pc$Bx>Z_Z@>ActOe*7iw)~5wXT2p%TR2AZ;-SsY+K$x@q-{
zz*-YUOHW`t^kqoDR}Kz(TiHJ|TPn4-$kN6Kf|trt6{aDUX)V+F;DS@O!Tc@&lR#|0
z>s(8+bJO7tjSaPVxFZ55P!)o!L<%$xMsynRUU3$T<7+VvRhi_(Z`R4;_jYnPTJ(E5
zLa>~EJR04@F)Dd`S$N1+Ls#*D4bC;jD!A>`!g?zW&QW1g3%Afug0jJF3cQ0GPVLgw
zqTV71*EAf}lF*S4MJ;nkxkMKS_%wwRYOUE<WlJA`JTMfp!Cf|_X{!)606xH8A}<+-
zYxVcG(cRB)qfL_1L6VYOq%Nw|VJdQ|?9H&XKz?L3PUzI^S=`G*4Dit5#l#cwev4gU
z=q0)<6wt}XDm#*f1)GkRn)4)$^%zr37E?q-ofZHoN|6QBis`3Xx!xKJnzu&t@%UAE
z^=z(6*8?2%R-XuJc0WfNWN&b%5H<0g12wzlsEKuv82h?;)a)wM#H>P9QIq_^?g2F=
z^Dk&2R$GjM8QS3!=5@hu93lzd{uzkq&)yU0|5Rw_kiV*EW#pbLhb<}+ycqLn0)2me
zgLMd!+rFGdO~<;QUD<UXyB<DqJ$!n9SoW~-@W7+J6k6t<qrvVj`^s?&71cJEu~l~=
z2Z%q>el6_UX>>}u;@iTmbW_N>(nuA*y3^Px>00{LHcvQf+b(O{H5b<@14LUNcR|~o
z%Hwv;y(yh<m&dKNwjFch7PRf=*XF=Qrg-H1bfG5%#?DWzeKlHi%od$|i>}_H;~Hbf
zY*8{rQT1BYuI(ACE+SCS{^#70;SuD-X;MGuaLs7cu^V+9BX#qQ@+0khGR(UB`7>*S
zIhL_FN5?1LBC)NY<I|Gcbb)a2^p2uIq15^_1s1Ht8i5@G^8;4*bm5(zgn*yT%QoGt
z>1$1@Ez#7f{NP2IZ_jkqCJ$|4MHC7-O67r(71_lZA4_D+-NJnlUaaY-ls?eW>GVAv
z8Wn(1P{zTD>U#d1-#9#GnPNNqDSk-l<q`XYO)wv{A+q^mL-=WHOZV9h(N&m+Xma{l
zb=Sc=5(j9svq%fG-~?ZY4zPs^IJ2+VlxiHG*7RFcT~Ad@&gSw9%PIu{#-W+hmcowo
zNQPxlv=rMcO{fdWT9(#FdUWKJsP73Ib@IIwM!p~PyTput9J#~jcbTCdBVcxc@hJA?
zE5bKap2FKxj9S^{SG!V6rz>hV{x8gB7Dh5o#=`tdic}&Dg@2n(^V3L-+Zb+=VWt;4
z<^a!j`PFV%VGbhSc1XPXC<UBFC6hdXc+M!Iu4bRKuk>?7LtL6eTow*-NeT^X2wI{x
z0(8&@6cHemj+{d#;A3}bpFCAe!TRu5D}A}M%9p>o`<gqet@*2aSafHJMJwv2>VgAh
z$s35sC2+AnVBXP3uxE>OG4agR??VH{CXXWA0iBuAenk(Q^M>Jprv-Xx-)hic&OZy1
zsx8W>rq3~>hQ=IOJeRqM#O)$X#Rn;m)KHA1<*a}O38O*j%8ZVN^HKrGxlq?n0kGZZ
zEP#(l!!1KI1HD<9M%*Br3@EGrRj{98k<tm0+R`_Qr{US9#oSFZXwIiYbo^?_EHUiP
z3Ex1Q>@1!SE&$4&cM=4KcO^*^b)xmdVCIKotZeCB#Mh$SB`sUj7*0Kk2*k}0JI26I
zeb07hXyl&!9YJ%d*GJP_Fqh!l-eSg`<JH~+Iwu-_K48E(Dt{sHv+Q2@n-;n+Zkiml
ztTk=e4YRaz)#P28pP17U{S67l90N$%{nDy6mHOaRN&|Tjl`1Si$USPDHBqK=l(<r_
zL_Uyo&_}*(^07MMxZ}-p*~F4dkSdYMLL6udxe#edg+Q6V&@DbHMmSY|Te=g?Q(2(R
z#R@GVfrbe5_Q6O1p3G1=WVDtcsP1cc*1@=p>k2{p=FH<7cjtG7_pEFZ#%-(e&4_P3
zq79v`$P;#T@?IDR(fRBGE>0LnL00;<=xLhnC74QV_iD{mjQ6s>QxL_IFv5h5Sg{&+
ziF8RY=Qs(^L%fd*`b0kFdTCP!kl6!32e9D-V5n(@6M*qdcmWvVc#)IVa-8j_V2tQ3
z9D<z*iEYgnoU<VDGkaQVF{iURoaWsBfw){DLCq-VLekoB5co>u7UO}BQsKA@nDAmg
zIc24@YMzNT#p-CZvdU4E54TRQ1tDSQv(2)Q3JvNTS+IkfgNrWL7wAZ)Q#B<6NCMW{
zD2|x0$YI8<vya~^3l+f?j-AnYo{^Aj9xcJ<_9$-!I=7=PZd3j7UIBXuG3FBUo?;#$
zU!};SSWHFzvpyS(?9jz~tye(z+8jDnzUB?@4y%UFjwCGQ?ABnou5?z+cyj@Vg^8A+
zF@nFKrh9RUBv<R=q)9S&WudTeYNse=Rjirrj4bIT+`C*+V;53eFe3>?Wv6^CjXP#=
zzQ}E|6z=A7zTZFkcD@kwU7RZHlg8k^B`W)^f{?Xpr2y@09SUca9`X)KNL>VSh9z4I
zDG5%tZu%-b`P3!*m{G>|Jvh^@6~!qR51@jTX%I9Y+$mc5Uc!~FcxA!Qt<dB}amh4{
zh)pgJPWJTR<V=dwf}}PILX%6n<l>T(ct)b=?lM8a&Ze2nmjt7J8jQ)^TniH;K#E^E
zfwrr<L2-n!jA69|yEGZN!;C$dR@x!2tC8HeTU?dE2~|6Q7^cLb!tg_le$hjOs|m7&
zizYc#38KY^9Ar`ijUKe?jM}xscD=jA?IK&!LwAd`A6hvM!h}8!CvgRMXWPP;L;M;B
zU@jgo2_`XmYK+x*vU9<hPsecj<-pcEywuicA2So7Q)Me90N2=<5eXPOWy#WXzh1S#
zW%%||q089b3SKtOxPhxGkv;S(2~@5ee7gkcSvvL>r}xe&ojGG!7fiw#$tq}iRvO)q
ztTL&}FcoOgtmWU#Vp&;l?_mAq!K^>Le)075=;e!z|Fiz{q5j-4{`UmXRsQCT{aqaY
zi}faVtoYwYJC8c+_}?!v{<po3{}n{<<n-`)M)J(iU-R8EQNLQD7A=DestJ@2K#h`p
z&r*-*HQdgxg0$W$(2NRY=7vz@13R7c<w$)@P$FL3km9tcTA<(}*#1V<;mZWY6jy$K
z(;CF7IV~^B7ubx`e9=NJ*pO0sYI*!4@87+LY!MMj{5`Fdyd08NXJo3?6Z>=9ru<q#
z<8Hoj@yI*7_AHzOzsvSO<|@@o=iD#&aR%=n{FT<7hyDV1c9+?yor*hE4*j11sL5H3
z1ZwaePQpc%y|{1|@HBUW^hf%BbW8VxK3#P~sA^{gxc%P(z@_(&!5<53Zb%g4aV4;o
zYvHQP=_XrR3(tQ-`9DsAe~{)&m>cj$aG~x4?km6o`Tx<kJGT7)P5a@uYx(~Rl>gh`
zdB^cZIP-o)gY|wgJ#X!F9{%lo;)i2Qt$I+<=}8d9(2J_Iz;yxtX`RutR>$o9<>m`|
zd3^Yf!>6Y&kB|QQF%n=@oBSlH@k#$DJAKf?_RHQY)B{nnRa!d+X-I~(Uwwd2jhtYb
zn6IVowg9~H2XBMffmF8&10g>J{6l$5{NmR$%^%mdz?VL2{s1}|aO6S{`zI%dT0Xb~
z@&5alFHZYMFP<H}I22!cI5NHkzuqv5?~5LXPoH7lrKbR^)4ps}yYR=OhKD7P_7=9C
zgwZ_hbB|`VE<4=1LyZ@;?*DN7@;R*Pw1&o`{=`q;!qjg5Do9X;5o*)0;qA0K4?B;(
z>wfd-VTBn^!+v}fB>k?@akE`%#x;a^r{O(pbvxhebiVtp{qV1c&2ELyVn>W{Y(4D!
zwX@Udv>$#~z2gpk@zKL?+wJb7@4j2&iw_=cY0cQf9^w)2ycYK6<_znE|_EGm?
zw|m$W5z7kMe5~z;MOZ+5+ulBp*$ojM#A7s?a1J2718wnHIO}=+$)pdWZhs1$8{QP~
zKy5#c72M3QxMRSj#qgB}-Hfq*?=*hzLep!YI$jiT(}_2nCn(D5t~zKnM!y4UInXRR
zv+EcnPCpu4fH;cuA6E?x(-;U|lFq0x+G~gqB32tS;I1@hUeI7NJ)hx{d|ndvz||;I
zW9+fbO;kta0f><MZ!tXdEQasUG|vKhhTIG5i=zAA_*PgZQq^P<B<Dn`DOoR%hU*X|
zunqvBH-wRg+YG3P8eA>lZE!R7BPvr(O@8n6rW1cStl`YHQ|~vjrBK6+)<6%^#{b^2
zU#0qB;=I6ydGgn8u0vOJNUfGTxt2S!Ww)U>e3)zaP&V8#8a}ccBB6z*^KmVG*ub@H
zZzD5A-zk7OruZHXJz$FX;~e0A>-|nnqSEg+0&FEVEZu9liwRZyNaLmh|B27z8+?Iy
ztUm+xPEgBelzRQH@@8gx)b06V>@KhIrrWsbR@$#_<^Aeb-LG!d{pwcUukQWsSJ&7t
z4OXP^kmpxVbtcTbl+w<mD$)yr@}rXp_xBE_i_?goMne$q>07^dwMMYAd^?^~Mha#7
zIKj3=8S9iXU&In*RFq~Rq!H|KBf2ycZD!S`eQUO4lY6?0FBb3K$Nld`96cjJA8Xo7
z<Q?#*+p{Dbs&^u(;YGFk-@|Vnee=+~|LwGQy6gMjhr0hYH=A%EF}P;eo6kngzvBJn
zDxB=XiJj5)C5ZTak6&+m_)FvIMGWWHd+JZopK&}6z5O`(JsAIOlmx*A9_A#)>(du~
z1N`Y<PmYhifANy8F^`W=UOxNB;c@QXSJZ03ZO?gzPw3Sj%ou}cKJiXKFr24ana$&Q
z6rt4E^O~KkWb@Hvn#9u}If_7He1(oZ0e~76m+h?4F&4rIuY$uQiIdYfKAB+Vub!uk
z15cm((XCW49V(^(&ANKCvDSYld+{QkJ@?<j{9YyTIeJcUy;mRmKP+i>1hW;)r{_J&
z8omaZ;c@yFB<7y?4f8gs&u7O%>CNaQl#Y;&AHry;lAoXxrkCELGMeJHMc`{Bj!9N9
z1n?&UNi_+vq;HCB8u&0A(NQ=sLM$-v_;^giT+sf4*gLz3hhFXLVSGLg&cCkfzKj97
z?j^waL4Jq*y9$TM;Ni%?l4giuEPD_9@nDX}ITgNFo7vP_G>n2WV1E#j^P(7Uh?AX(
zFE!BJ(VxPrn{a|2kHUQr@ySO5aqTC=CM;_dBtU}#F?%&-mv(_rB~Gl+Io8PchB2J$
zKo&-EqWs1m8?I_5-y<E!mkx0bQPbw&Jz%p!oMfd5cRlf_rryCBi$=j!FS>-k!Wk9)
zZSQ&BC<3|#KeqM|V23v>C*W$39FP2Q8te(tR(L9Dhb=p#*+Z>DqL_+LGZ95jJMDI>
zZZ>Ivgm(jT!*^}_)y*shQBp<@*0NX`6$BGZ({LO|SY*!X8=duIxVz5JWH@J9Yj%nn
z0WB$O245Ksq}j2CYw-Sfr`ia63@<e+iObu`tJ`DOX!s?Z0GW%p@-iGFWj4<QQhQtk
zf<0<}aYZfkmF?L>^SbsVrO`1`q<QjW#X(tE)a!T$-Vdy#&wL6O_7XopUwL@5&L9kP
zZfp=hNmP`l87Y?*D~XYq{yChxIc2e?EY-^YvibzbsXUA1;&~JwH)2w|yMi+zoxAMR
z4E)fb;du{k8pca@s*{0=@mN{n79zAUOuhOPDc9Cf6wYea#Kd)}>JTrZ;A9p|?>LI1
z`!cW|eg?3Hk>RQjgMJUJ<Q@QTjCVh~U<_HbB^*Q(fLN{(bbN^XW2#G8I>DEIwHjy0
z+VU)o?~SekIF7%~MhyzI=N39q@J5&ZfFC=Ks&~Anx$z=bGk(7`e@9-&ze^&G98XxZ
zG2v03?d^FRM!_P3+X{wie9{;{o$I5csA0TDfx>t&48}8G>v4&Ez!-jz&Id*pk{pdE
zQ(P2qOa6zJ*fGXP$MJQi!T)y6(MNtbHU<MOmY^5;Gy(ED%x~be4<lXsUHoeY|9V(<
zTpoBRr(^7NPx$$}o7LHWdSgkE>tJK?32-;r*N0UU!6xs8<i;j7>IGYQJP)q`H!TX-
zSFvN8-ryn_yd?oO#H)*bs_?=Z;8i7UNv4J=o+57_1T@T(hXidOWVMV~32jjs2nudW
zVn+617gQyiRg1+JyfwAko3&`GlW%gvlvYV8c&b}4F2$hu(X1AswL<jEpEvB9$V_I^
z8b_Q%`0{;Rj40-&#TyMrL?&y9fIh-uF@N1Le|@O^%9#^@MB2}QVCI?uom9`@bl0=O
z_Fv`UoS_YEkTsU!s_*QND9Bq&OYQX{$_V%MKQUc{qd96rf*Geu!73y9n;%d*U^sul
zLI3i#c1YxP+<OuaZ{=Bt7!BJzU8M8;o9sk~@(!oVMrb@~)^l+d96ea{r#CI{e$J<y
zVcAAn={VY)jw9Ohwze)!kp@RXIRA3_hG%KMz87L5HbKT0(mtr`jZB0x!>D4wjwyz6
z%AF8B(VnI*|Kw_ioUCPGKqrHZy6E0C=*-k{*OmgmQn-i{g&%}(&;f+G!fpNqJkcpQ
z2Qh^+3W*L#!0<)xvpMS7MMxvGFmR3U++yy;(=}u!CanEV?ArRhtD$>^g$cdcr$RuB
zq*y^iH77^^EY-q^4YG8jk)A!gPG{#`5E#zJgwK7#GMK3*>|E{%O$L01%lYiH+<6?c
zJzCxj;F5UNKM${hD6L_^)$8mRu>&NgDbA(1yFy-^)1}l>i9}#J-I18v8jc8gq3xZK
zcWaBKU%`S9VTeNv;xW3*)atFX@jQpmF~{(-RXxLIa(0F5xF9`LI95<~gfPb7Hs~P`
z^1&cZD0x<PoQ3`ykCW1W!#w7bNc}1fkphzeA3iTV%sQH>nHDlOMH3fLL&*e@8uq*z
zzzTlq^?)jR{{f<kx9jmE0i{w%HLP7H3L^7MBYcM5ja-=+h+vzM5T=X46fOXB%m<7&
zf|lOpMo=Z<zErB}RT2;912_?Ni%O~0;UMG#V)Lj?9&XQos%V~P429zuW^Y7&&3WTD
zsSA@iBLp~dJN;7(yNQAALGt(Gef<u20E567H98n{)}z>)hF38j>s{@uQ&#tCS|YJd
z@y*;^fbGOyqNZ>Ri=$78bP0by)(;1u_I#4<ZJf+5Voty;z7j@!#^iALiyHAlTb@2D
zMpk>Z-#k@2r@n~{N6$7Ek-R5Ig`eeN)~oVt5}(S(1(A*pB<yGu41zTElUw6dEtCok
zBzx}P;L%V*?FE|SYDn?)LkZs<B4SKXfzR3V(Dsg?o*G!3MgFpFKoZ;9F)IS3P5?0u
zua%v;dHfhM))xq7H{KS0%Y6>bNx-mZN`im(v}Z^vaS-G2wD8p7++5PZ0y^OjFH!#0
zX<v5iO;8yW(0L$PD-J-f^WkmKbgh{biedz@Me<fo$`Y6UhZd(=javzhrSvyS{K5^w
zJ5e!!by_1xES$zUz%vPOgD_ZBUj%D3$b{H3>7MxDK7YB6gFjU%&$<8}DmSi=WsC=`
zqKUeE;F2V`bmf5CV!(y-p29#OdDFS44tGWP&hf$oDVvDcTX=%91!5n9<xT;UU=5E)
z_8$W*)N@IosKN3;q2U&TgwBb|lEND*<cLLJ;;#9Hh)Esf_?qk>wsKo(yY$KF9y-Mb
zm4}iRhgz#s*U6(p+}E&Q)4tgUTn_gL>Aa>TB(`f7Fpys{ZxQ-ulnOCF@dM*iaINLp
z9@vD(L45ohNqhvc+}4Jw=Rb)4sQl3;DunPNS`0Kp`pAuL4xX9f-WeM>!wjvI<z_xW
zemI(sGlE>Okr_NXLGxj5TF8i;g9&62H&J#D1<VN(7J^&$dAXsZ*)<o!tjHzjne9ki
z=&|nA7*y~q)Hu1%ym8Cb6vY{~WGeGKKuu0R72XU=qIZH8@Q?1Z=E)QXPs6@;yJOj5
z?0c~6OP8;@jpUkcyZ9r{Pv}=`8*>IRR(m~1_kbM2FnaFya=IfSW8-NOu+B@?RqpUE
z2bTOiS-Hg<lU?4>StvT^G*NIh_9M*BX|I4(DhIv1l56R?7JAZ{JCf}KCI3=n#CWdL
z6rvJkraKAj6=ponU}^wG;6%@RwDh2b4ndAjbV$5w-~AhzH<86$6FiYw!bvIXAs^4(
z#njXU8|lfaQSuK5fZ|nb9Qao#!$u$ljN_OE$@5sHO(QSfcs`S_6<siAXNc>7pl``(
z&G0i4>|be8^7m*#cJYSfy!K>5f1auqi1ypeZtC_8!EygG2yihEG2qCTIfL}`_$Xh)
z&16SRpjc2Ux?hXz<OO@1rFiN<!<l~lB9orfcxCD?-OTp@HFO$C^x-Dc+uzi1Xoz5i
zRtsk1W17qS$W;dF&_H+%E*(b=Bvo?kwWJk$`uwz|MQYM2)_(q{r+I<gXm%lf-RrHO
z<<F@md8|EMIZ$PI0qblybIMXvEN5+f?A@nfBOb18#10#Aw~~$cUX8@2W6P&wF<$Hw
zREz_3bIo9v_x0Ov*S*pPJuhl%b}~m>-7T<SIK7D&@LsIj)h*dBJNVkV{lTr<4g@T0
zW8l@=PQJF2ukGY3vyJLTb6HoX#FGB0+t5D)#<!G(KfBkAhS0846KAlIsR)tlHOkl1
zSTsMQ0lV|V>Aut9Ci<~un3ziExV2!4TV97*`r0vJ?U=B3OjtW6tQ`~9jtOhWgtcSB
z+A(46n6P$CSUV=H9TV1$32VoMwPV8CF=6ePuy#!NY}C{D=B1Fgm403>3r?kdvHwE#
zBz%Rrwzjt^ftnwBv$XdR2FJgHnU`Mo9&UWb`QP37|DFYKq|QLtdG04zB5XJ_62X_}
z|9iC4`Q}?I|NFP!eEV>n|8JfDZ=L^7&;R%A@E?aSo*W+k`26wl-(NpI{{FO|%l_wX
zvmp0h{``u>e<gYUK$bi~8#t@Sv|N1mkPz@#s%qRx0gLhmb_<$3j)txgwKRcLf@+>>
zFCSLRL8zq${9$G!A^awL^RxNp+w9Gc=9|B|6AgZ+B^ta#s=-%5G9s&51XC9$FV?zv
zm@^sh8(uf9jDrm$F&`x!RJCR^?C})(;E4tyb9OPw(hEK%JN9+>Hk<|^KGXT1u*m|a
z#n*pMRmf)|h+3*O9Sr;}<2C<&*!}mvw*URR9re||`szI-2h5)dAw7wD1u~rwR!3D=
zN~0>Q)BK;w8w*oj>5{0ClQ2KE)iVwPdI@JHJL@3)d@lYiO+P3VJ;j+4m5alBPf_?V
zpP_I>848VEawjn?ZE}uHwCsYPQu@J>k$%vblTbW7cRjRH41SCpg9TV^UP;E2VUUz0
zo2!y*E`M0<lyZw^F$p6in5GA~%Jx>Zch28*=@DG6M_HmcL8B^wG#uF=b!1a(R0cZZ
z^d?f453w8L36PY#CLp{&q#y=7pPX<S%z})7s|{9?wQ#ioi_;d~G1cIE%QXn{%S6;A
z5fnxe7V+<aH_e*dJqclh`QM^kgnGa6q)*FLkP^%u9wKOrw1vy$E)=X{iClj&@gUM)
zmGV%_aoC{0JN!?#DC6N$2@M-6GvP2yhM3*iFpcKXG@p9V(MIGQTvoTX;7~Z;yR~XI
z*SW7)5`03IG?8YA%g)?al*t6$*Gj`p3gL6tSd)>JY_KE`;esrNx^_;nT@gU?(eS~^
z|25s}B4Vr*lZGhZQl~DvL#4XZVXTSTxjEP>(Ra4)wQw#{@mi!x{n*=7J+NoNfm|l0
zMs!edH4{g)%)}8`Gx-8iL^|S!K5ATzqJ%byas=2E1>T(BDc$6Kk=~CHL$l&kKh*%5
z!sXmKdS)y~ZcdaAOyL8&ZQJoRYBkVKb3n@iY&NRQJX6Dd67+V3G?F<r*?s1ftQ---
z2WL3y1Hd&dpk;r|kd0gIuZ~~YAP3qY=&CLal^zy70eL=9hiGV?A~93A(FLg*K?a8K
zOTiS--w)~P*4EzLeC`(phOj1m{3^Vv>YktXeAj*H>2PE>I@Jcgo@$rDFFq!v5>YRX
zOj!q}tQMHEIM^bCT|`GJj)G`L)tNHf(W5ayCKOyHlM(&tbbR|VI=u*<1*2Kb75~xl
zhnL7Cl}$i80zGU2y&D=rmVUAw`*xoA!w@E^rlhI(Hw@o)?<*5@%ZV3=Y}(Onh0!n^
zpg0XE_{JVu{*iOhy!*~s!GQk1it^Mh>%8tuxF)uSF#ru<te$L3=X0-g5_Jt|E}u;a
zI0-RR0>L%%bWJBt6cwwp1<UOCq2h7qGFa@KTPn*Gc!rI!*l$;PU=bF(f^$e8wT{sF
z93yn93OAobc#h!}Y{lw)z=#~$29r~29TcvkycdveIJ^oRQ9SHFx*(7T-J%QRBcOah
z*4Xou51$_CgZpAE+>ZOvK%n>Q+Wp9+4JRC@A_Fh%jw;xPzq}<zaz|*e9vSzyW;DNR
zD3G}$D+LBQSlInX24&F)-AB225;IJ}#ZN^TS5fSXHo$(h{^()PTe6KB1o~;qe9r?q
zzT`TQ2O^S$G}RnARupdGm`}(0iV<!0{OE-pHzUa_4YQf2vbkWPjvb1GH}&nFRwGIa
zsUoh7*w3heYBf8)>s#1Fdl#z)DzTEl05uww*5OMZCVVN2mMIJCK`~Vv081_;l_Q{p
zpdRj*#_lQ-&>>t}6fv|M%U<XnxA!5|wc4B)%=JA2>Jtd0Dx7G=C@O0wOT(j#ZC%vz
ze)d@Rp)A<el_R<eKl*dwN7W6ldTGi<AN$exm}zaZ_yv>UihAI?#G%}ut=#m+S=-7#
zs;%7eMp`lT;=Nl47P~clO7_|9ZSQ3?zV(jD7K&<$VV1h(@Axpr*Qi0pdxpG<{N=aV
z=r0XXFFj1<P=D8>05_8hz)95QU5Debcb0|Wu}Opj)x1RH?P|zDLO48uKg3H8P6(R<
z=1*{1e{ab~ScQE$Pw-1vi5aFQ=ZPNPH`4ym&_|oVV=UR+@X}b0GrRC-XokSZg$rP6
zWBf4<Qdk-mcb*&^MBeTfS?q9a$zEHs*Ou&$X374<7}b3kw%<?D#IPZAK57-MS26So
zmY^Z9Lfj#Rt0nO@;=E5*=Z274Qvec(=1~yO)A6m=8O>)n+Y$?!U|m9;o^`dYX;wzF
z)(H3|Mp<YsROTaw49qt=!?-Zf4#`T$1SN!1H9>+B+9p}k@=sp1G!CD`Rg~dcocgs9
zKhNnU=@VolLD>8fzNzbabo#a)31k$J1XzXECIYfhMg)us9H8Nfthr-0oueF(fjJ3c
zQsE(0lICnJGXh(=)jj~W==+dzjGq=WUywi1hA+&tcG-aE4%=*MXA&2>AvS8~4S+6N
z@jKgZ0JE4M4Pg!8?AFLqO^FG#=~Z%Ykn+{b?_YZV3_vEjLVJ`qk5I}&wc7y0=oqs@
zgewRaQ-5~RK(iYjV@gwTtB(alG(?pe?%lvog3){oCkY1&hFXZ?(vB4iohve{8E0J(
z2|;UCf9*w<q3~%Oj7F)E6s(%7h7H7B&!=Dm1lO)E0|*fy410KB!+o_g6lf5JTU+%E
zCBhJJAvmAFHp9<b9g#=^jt{hMhG7%+&b&@|-+<;sYo(!;Gv%fm8w-+wI&|<ELbFZ4
zk&P;yPb^D<@foB^mrRo)rb)SWa+EE(1pFR37XZQ7E7FuVvCFonW_Y*n^T+Hdn96W7
zr<Oz@ao!Qv$OXl0`EH$2cI~(Go)XI5zYEOT`DN|=;&Ohmq&g?)*B&uzkC?Sb%>8-9
z#8dQ{d>SWFkkq0^RIlVDV;)WA7~`!yW7eKAYtNXqXUy6&X6+fX_KaD3#;iSK)}Ar%
z)uwcBIRqR&6(2jJfYW&P<>VKzGbfd2O)7MRa;8gK=p7|e!{pp!M3$J(&r{0C&#MFc
z0IyM$%2=kYQ(ig2<SZBtfdo4e^|=qxe(`4-65I#gcv2JJ6yF+8S|u)9=G&Xnx2`h0
zl@2^+NQ7<+n$_TITAmb*07M4U+>~Ai&^}X2h#kaFEtv$lAm!+1lLGlq{pVitKMyY6
z!iCq!`TTz>=YMYRJnFQq{Lhbex{ucRpFhMOnLU`dus@E^JM92S`@@}w-R~Yg{A=gC
zHta>`uibCId9-0Q7G@kw$hj!PuvC_#?mf@5C3hw}Fb`g{*kNfVN{4gtg7$PFui7O`
zTls*L`>NlfNljOnmDUHn$tLkIsQf|T-Z^2RauHhX8DN3ttyG-Ybt5OPfb1_)DptvD
zYj?${b`!TYOD`DiQDFAjI0B(JJ@}ypFYCEXc*Zg`5sEzKNV;ki)quy%L8VsLDr-p3
zNk5^RPN%k)>q1{gOVK2>d<f%BvI^e(a{0!gP~>){nok(RQWH(t0F6TmWpdt5ukwkw
zR@*gJ3#f>GH~$?ZG5S(_cUkb=mfEr0E?{Qp!yt5>nqAdiuQl6J&#{v4kvW+?yD|kR
zopj>~E>b;hDqk>EfY8a<R4wpv*~W^d&e681K@))>ey{1n!p`gSu(iD0t6+^ZfrEwW
z5TVV+qnKNR8o%Z`z1XvkbEhLN)}lyNek#2MAJOd>62sksujF1OQt_W#{)?~tV)1XW
z{P*pSE&p{vBwWjXAK=gOVw+baw;>YL7K<vHG?m!$5$j2Q4!sBA^lH|yQ<aN^%~F;r
z7csmt-|Pi44REz*F{a_7QU_<_AT{%Zq0e>0d)x5F4KKuB@*dc3DWfQOM3+8L*hr+G
z<AzHkl(3FL5c;InZnPz-E|n0keRqTn0$cY&H&Y<7vWV>DXg;RW)$C$19(!{%tP5-V
znBOZoU`&1pnse}5W}$B-!+%@Pso;~^+s0eiyaQUN&0_g2i&op>x4kl#yPo$JX5f-7
zG@dC&-Gm=5a|7~cFZr`B7f#x$+FQ*n%WH}aG@P|6_(~$c-sTK#`aTrE+}fgAqPyHD
znlEJ4t+TeP=Z$&4G$0m3dLs`W0}xw17tr$HHygd(Vi#V0*NY%DraHqmgor|gXl%-d
zItaj^VQMgQ^cSHo5u70tNTovTCBDVeWw^ZxFZWB-Hd;4<OvogcT!)mY9M>IK8nT<M
zjiq5IXJPvalj~Io^SHcpjt2wMh&;5j?|wnw%S&{!cJzIxx$^vSNtcd9^G?pTG`*cn
z0@#D!gM!_I-N-ODS;igcgtmEntK5Qm1^u&VO`#<<ec`YDz0X`^ychXz7{e(!UseD2
z&CbJ4PXD*FvzGrpFZqv=ptihH&3nM<;+E5uktgkPU4EOL<wQz$aH_1)TP@!29;8h}
zpiJkJhN$A;kEa)Y!#m3y>1C}Sb{_vN_aZ&lYQ&byqnS=^GPv-1m348M9GgV})m(yS
z)a${f2+^~k!A<#VEeO1d2#j8yAOus<3?rtS9GLA0igSOSrp+k0o?*&e>71Gu03sB9
zfS+TeQnP7@07Ay(;4Cu;$5LPwYWooQn;9t05P*q)L!nr;l@)<7yo)<iy>z$^G%cU(
z8&(v;zTpqsmQai8L09vdXx|j;NaAZ;$IK7MwWxX76u}HP`jdLy82+6ZvT%J7z!oJX
z`-A~P{$LPH$?YwRAYCqo1>x|Ubax?gVVk`r5~+YTiu}W%KN+ILv3%ayt)Q|hS};pf
zUA_z)#IY2Nl?`rkXlq5fSeUEwWmsXf3#D1?Be#d7c;{AfT-uo-@p9l7dWecpr(Dd=
zqQE7!7ILDLPMlhh7dD*+1B^4k1QjHzQ}zwlAOO+dgCx#{0O1Pr7?9c#LPT^ufenD8
z8j;PMqpWjx9?xPbi`WNQaIawDoI$_`f_m<pceY;4%AuC`+uR=}RPig!$~_IWw8Nd2
zpCtY*I;{6NW5Om)vM!(RSr{Cnn4MuM2enGPPG*f=*`D1~84)Tt->aCD#n_e)nk)!b
zwCJV?lavHE6rVNjKJW6u#i~dWzZ2xq?KP_q;#~M9Q|oO@(UYT6g~zghP)Rh?1JlH(
z4|1fa0KXP~x0tewLXiB%w~Ylj%5vX|sCgy;Nm4)#&SDeJ1eAH{n_f*m_>F_zHWgU5
z1yqdg^iR_~tmp(|1ZbH3y)Cw38f{$TV9Q#k)ifHKEogpr6YG0Yba4_E?r+E9*dr+P
zlQ^(z>PGx2dSJo9oCWNueS^^<XVNQI2K?z9DBIEuV}uX;0v4k|AD2$cs+*AoJpYEi
zUpH9@0!FTF32<zBH+z3R!_4P&{~rx6{Xu8d`~So4H@W+Nx7%6Y|3A(9e^pmo?Xt`G
zVXFoVpV=R5pV{gTKTEm#Ebru_tz$B77{AUN?qa%2``+A5OCY{|p?d+NdCIu<!^|4m
zWlF3-Nj=EZ!_iol;N;ho*{b1<gXnyA@qsi3rc1MN+t*xj*i2RRwqLEKrVr5En6!9&
z-rNQ+PO|I?%c+ww@T<uj{?c-J(K`^;BFjIB!!i})9pZ*FgT4tT$e?q1D5|F<5p4Iy
zxPRfF3&T5voGJ1egNq_{!G+Ulh(%}m<d%0~4>zVr{2#J>W@dz~G9pmJT#&Wyl$;@^
z<I{V&d?P|j!ud2hO-iG-eUUS)z03PDSvzwE;tqyfSK1oQx=H8CHkcWE_;!!cW4~SZ
z{sSHzMD<<xXCHHbUE=)(U%G6T>cW!W;ruRhV~0gk-)r~YYCsmB%<b^I54hiV2b{5(
z_Y62aTZi<5a|}lGReCf4iP<MIcu;FME}N6NqXSc7Ga*9M>@>W~t;raDqj*F`go@|H
znUV|wPv?bIAkNoKroYa2#_=+7Lw6n&``cd2^Iw$wPYF;~_5bR2zUh9O_y1bU|DT!t
zkC5O!`G0*xi=cS=0Q2;`KQrOI_@j-@8ursYs}lTxvJH)O2|tVMH)_K}vw7Z{XjX|A
zxx1FQ6%4QaWTcpvsCC&N10B)t2gKA0E(b}eDKT#fyo<51+U=sj85``}V_A*BF!F9n
zZHXd*XQphkQ+YPERG#tK%!d(u;JZFRCdDv7K!!9P&kg090GaSKili4FCxikhER(S_
z(b9FI2e%lNhBZaFNO?9jbqRz*5b4!^zK<{sWm@icmT8sTDZNXVZ8bA1C6;Mbsqf8%
zgLVqoOV5$-kc|c8#FtNUD6S5RQo-H)BD}udXtT(C&5VS5#rc%Wl3L;KaT1<~Q6sa7
z8QFKGT3^rSR&LK08JpXC8&xuo4E59^cAC@?f?u={I&&?%-c-xr)8@oCLh+t$u1F@m
zmov7SmP1GWTQq6dIdI9WXsfj;&ydIfpW>k+w?9fe#H6yp*(;zgnhOz%ro~ZiNXv`S
z%7akCH1ljI;~a{!*fcfbu$E1S&&B~CsA3Pxf(!rH<zi@@EV8+d-=O{-?=@qz+Vj>p
z-SV)+!0a!n$PqQZ#e~m>L9Yem<_fqJhyw!(kD6V?^YaUj2-9^CCPRI!YyWn4$t7iO
zE)H+T+!-!NSt{&+>I~a9Km$ZI@l(^I+v9eQpuBUC7H4i?vA?6KG|kxN!6L&q;4XXQ
zX81SH<DZFj<&LNuxn(9rDJ0LbC%AgT7)ikd;VQS0<7_p9sCD4^t*KcOJy<B%&L^GH
z$U{As?ph@;@Wvq=-FQArv5JFog$6<fgp)ozZs3;yOSN+!a9@%BY%q?2|Kn|wY@1d(
zdrbj|G9Zqat=YVai}&-ov2w@4bx7Tq?pS95kaWy>fr~+UmP<}0a%W(YZ2jQKG)<LK
zBGR(wNrI+3N)kcIBmE?ogvT*|CunFJ<({?L?T*O9HOD;5M*6YGKj%|as{DAHgnHT{
zN(HbK8yn%2za>bUR4z{FKo<#?)KJ>TY+Te$&y-gs$5tt~)9AuT(?>d)puRt$d7!CE
z3Ff~6Tttf65G>rWU1p*_xq1FLjWh2U>=oSL0Gv@#FWf?8${a*E#dxA8$meHpDqv`Z
zf~j}mgG?IugNtD3b=uA}U!!TgfV*SrkQ&oR6$eR@0D&%x4Mf!%G*vl4Ek)rOOyF&C
zo<qUq2XSJHHy@FwR{Lxw&!&c-)B>s2^o`l-l&A786>pbib%Rn?XQtVmV%`v>Ya-Ml
zJdbRS){r^#2uotRwa#-`Ho`bl2Spm^s5Q}k%j0r1btH8Z%*dqWbJ5#{X-Jtdr_B`{
z1UPSO8yNA7Th_rUBz`UiLsQ;D(Aw#3qhTRS&|M={rZs00^R~^0q<AHz>vt73Jzd+J
z>nc#NNC|X+TA<&O&sov4tf}{onx4xqUdQuWqSdLmPkqm4tI*kiTa*PTy5173qBugX
z4M#7YQDF?Uo80&q-Q*49Ibcvt5<B+*_><_z)!}9rFatcoE|rny1B@MW<}Gvg%#bSV
z(D1n1B41H?$9zS1O#CWcP^h!q%Nt%muExkFd6}-YEo*IyOWT5TrE-CPSeFnNDj5mF
zoyqaL_3fcm#-i*DP-R3tuCVYVJVWrfPJ&a-`T6N0a&*3`UZP;1M2!U)Wv#LJ#55Ka
zY=Nnb5qoQ0$8x%kwBePXjI|bItp!<*;W_!;c23QKN`4F~n2(15`$?QqO)h53Tu=CH
zJ`=TUIC!4wCJPGq8!G7{q49X55BlNw=!a3yWO>AvP|~DpCC&R+(kxWOWFTKwnevHy
zx!sHZ7jvSO^FMUI&G~;l+*#-U`o#Rdtl&`I^DgN$f(ah622p*78W3rO#$=w$>@BMB
zJgV9kY90B5%Q={(4M(|A)^g-0M9U)GCSPm4Nt^_`pOS~#y~V_^>?d`X_m&LJk@d{X
zSDzaCyhXY=CGB#Z+vAwGOG>=nd3XggUJ#c8`%cU&9*^T|<ZRa=NS1LlzV!wdASZ~j
ze_Fz)xlpCf5H(X2P6p#SGDUqnDak132V1;J0#X-(L^Ogk1UH7EQE`0`Pj5w1DYHQK
z8Q$@x&>QZ9^44fo=}cQzk$DiGaK0hl-T{s&)+9FO6O4n2EaFW0+iuhlH?%Ew>ECN#
zD*ZU%d6nc!Qt+Y=EK07VFDF2CaU`#DY7rvq(@?0@eX;*uBJiR@UM^io$jYt(>_myQ
z_?qHza0||YY#u<`#&7~=vR=7)W6%mu>cAG$oO+`qo;VAz8zP9)<Vf{hgr(7OGZl9L
z<?%xDy2``giabcOQnO;*Jr_`LwzNGMIxH?d026a#6ttY+;4;^I(0odHHVzbMFh`w*
z_0B=gg4<XG9CLxms3Bs@!&IIK7~fF&X(R{`czWqdBJh@z2xy52^EJKnN@TJ^Xb$JZ
zY!qNp8$WV`>2cT+93uIgyxU5^<_d)I$Kg`+Hu#Z#tTj{0h9_?Gzr1~;OTxKt>lZ}Y
zpO9)UT6n9n-IXoF7((AxYhN4fdTtr1%;G54GoZ_+c)BH@ru<Z_%X!l>`d|Cd%<fJY
zVD^<r(KH%by$40Txn0k`JLR`|FqluWqzMQTb(Z9Qi+#=)f>Xx)w{%b3cPF|i0tDU|
zivZ~=7B1i9U}6t6STqo~@0I4cv}=A-N4%K}I4KUC-hzBMAz1vbjB4Uxm|7eV=?gPI
zXcevECapZ8*&P$Tjp>K|2z4^NXnOGGEA<X5(EssHCe)FXmtN(lZ;l0GtA-~US5agX
znZ{T}&I8r6Ttq?u*-%^TWb~<-(D26O#9S&>=3ksl<48&ozk6ps77~iB=Qj(vu;m&}
z{B9i$_F2b&C7sFi_P15<|K0Yt`TUQaZ`b$#Pxbzfw|&;`!trN81TXn`eMY-%@Rt+|
zqF#_I0gAvtiACwCMR$Bgq?(>%!N|A}sX|?wenVWKNojXQ*Yf>>i{gRz;Hu9|Y`I?7
zqF250O<9O|Ae3f8D;n>BG+Lq~LC;DioYlpMnq58UrK}*uzPlWSb`6Jya{WMV1t0f+
z3AZMj<nyk-OtqI7TU@soz5d1;H#NuID*R~ixQ?LycW%(8!2quYtH2f+(tec7R=p}{
zuZuyuWxQMxw9V@>*k0ccY}cH3&HwL#|Ieqx*~<F=cBh-;|Luos{{MN!e=`!Ste;<`
z4TsiRx}_zV{*wK!bNx!^bycz5b=Hmwg=}&j|3#uj2AzT#9}VI570zzu3n26EmbFf!
zU0<lxw=1EgnBg@oa@RFg`k6(b*D0_DR8r<|3v}`>{2|DZXG!c2g9J@C=)S=Y1=pB+
zGGbqYa7La6E$=b9t4-v+7eg5*NW}bThM6qUO~H>;N?z#J%KHsZvgA_6^SR1BHF3SZ
z%B7|<USB&4tO!vWQ{F~^IaipzR+=8jE`jO`0PuW5W_VP3f@E`@#e)3;?4Ht>QXUxO
z{n_9o4e{`d^4=Bq_r1w{JPW5;X^f~jyWo7UqNPz8LrC-2K8jxc4E4Fw@NK|K*crKo
z1VejjeE;uRf~@X!X~um^efJeU{>qgRn;aDGtctzFrD__>t14>ND}r;wyW*@s_L!Mi
zYHH0<4878P$Jx~{d!3(M6byni^^;rdnglZMZcu3KkbkZuCKxKbD^A>GDDb$YDIsc-
zx*B4+Wu~;u;AR%c455r7k%kgQiF_wZ>NhE?Z_`}qwNoW|&3a8stx5eYs9YDQG^y<Z
zm1aKcxd~63K--SD{!j;*vKNt8bw_&JnCBito5hxIdh_hen<g3>P<7fA=@~{oOt&Bd
zgriy#9?I(vd?@piFSUVStagJ4N_`cqO#-A~9|UW+mcZKP`$1b9K5o)fNQm;Aw}>=d
zJBiC3PLFOWRDPc1wVAhg3tLM(pP?l^&H1UNT6PnAFXe$j_hWZ8RLR1xSshL+(80Hz
zGssvrDiH)x-dHrR{G8r>W-e#zKVRJa|N1WXf6Vrr^Z)Ct@Bg3M{r`ID`@bUtT-gQM
zxIvdU%t!lj4*F_({3X(kCE{{?pl*GYR1D1Di~>aE$(=c77ru45z~8NEeQg|y_|B!}
z0+fkM(<HbG<9X_G1v1@rIFmrb3w!NGr_pM)8kd-r)aQsiDi4!O!jP+~eOXpjTL=W9
zRCbHp1zXw+7Lyo6Mp9W`X*1U{Kg6JD2w7UuR-bRIDhB6Zl4<jG(KZpHElx=nZ7GnE
z)|K}P1MHoJ5l9Ef=`%qJFY7{*D%fx7f0P%K<uuM}_&07V9VcS}GEJOk+a<({Ezh=Q
zm~b{XTJ=8DZ?Kvr1iznkmW4B#cDr2nfvfwOolW{rV;EVM|8qdUKmI-(jm+OxuK)e*
zH{U$8`G32+w*P;SKY+x|%_h(cba3AF=Ce`ruW0JO3Mac{TpfkuAc**Vk6)830Gj{t
zohM2{;WdZwq=9Z%;rYevI>5iZr(0W|k{!n3Sps)T908fYm}E&TTNJ?a7Cgo`Pvhxr
zLVasb>t46r{tiADPyEz->Q4dRg7FymMV$N|jQ=)Dg5UzKUO|$!;^h253_;U7iRZ}x
zBNB#z_X9>BVqoF?>=Jmh45UShoyr^9^Z560Jod5wtNGa&M3iUnG0e?>)$+QncFQ{&
z!Q#kuTXuQ%%)9nE2!0r1_29EPoERM9I-Fgwmjr^T;r%O~lk!>krp3r7X5PhYHr?Ic
zp75t}3EME{HhgWnwXyMU;V42m<?!go!x#M*ub&?tA3c5itpEMd4?pyO+W0pNxrcEI
zUT?UByC9lRJmcU#NBA()1t_7XItx!vffpn|>fT`jknPP)c(}=c9l-TAg<Tti=;PhO
zx=^P)L`D51m|es}Z#V)f3lHI4!f|79B0YD>|DMJ|#eF1a0^l2s0^~oTVmn$Km!5*3
zVNXL0q4O2AzY0{Wr}$jUdpW|G#01R+z%Fr`7w8y|g8>D<ozYj3&Cy7*dp2T&`lJ^W
z*O>TJ#w!FoW+fFZ@$8nCm<Hn!8WI>p&pAs-#z5nUo{OyxqSkfzHUt4Q<o(9qGE&;N
z;Gl+m%G`M@e5$_|d-XN3<FCbLeeI=#3z(pXF70Zv!H{OR?Wp0>mxWgxI5C00dg3z&
z8@%a8D{t4E@<EZqfPBqQb`3dOzLv!S%v3mbL7ClMA@uS;iy9wcj09DYQ9mu}@(+w2
zrY`EIMP2?=q`aVB`Y%E%GXPT)3+QvnkRGaVc2#8r5TW>aO}r;B()=JsnVbjSNI5^b
zhaI`cm4>hpZzNxCk~wxsRiQ2uDNfr5N;Gm!>9qa8^V|Bj;Wzd1wpxyBWUW%rX{X9(
zq{r522Y&0AA8p{1RX(i$(p`QQ>bEZZ*3HkUflsY@@!b)rZ+5{UgiRmMFl7KRiPI3~
z)3Y$!e4X8E33H44I$pD6tL~4RVyI17mcfj77x@%KB@{tbxr<C-!(>q2mc`k7;Pr1B
z#tHErT>67LgN!<60C1+wDoUMIeo@X>LXvZgM!{82OKrmwRtTVGjsTZe{=Qj<$W`G;
zRS3$L2r4Uk<f&?ZY8MGe0B*n=_`ADj;s?FGp*ih2!Wpjv$)kob%NxWWq{BQ0jL9;O
z<0SBhx8B<*zK+yL_}x|#&}tR-P<n;5rq2{-R@TnQA1s5_BnuN}36O~f7d7N7_BEes
zwhCUeo3m%YkWv;H)o$|P@@hWu6q7U{jm7#ixU~8n?f=$R-b!MHPpN4LUh13%z!*Sz
zA)^5o3<s>lpO0txk<7YQXphs=Mw-Ah)I3%ybHMI}X$#ECHaGdYHbzaGKdO~}GHd^o
zZ0zFARmFL>dUl1O#rxZ>yL%MvxmF3N@2;SPf6N+E_?af*(ssf899zE_o9?>q#cOQ#
zyzb`GLwBkSy&CvxFrE5RAS^sV3r~?aNqK}7+zyv0l|56E`9-j|QOeIH-97Lm%zA?P
znXh3Ic!UczL~uK#>$M+xv$XdRxE%fsckT4L_i*FiFayU3`xag6KBE5TRXo0p;tA09
zbQ>iLKSG1=#bVV+vHqv?ZKrL=e>`k=ch>r!52XM3kgA+c+k5Uu^I+`#>m2{ypU`iu
zTm0>B=Mz611Lc@}(Q2HhFQ319{Nm{4i<|~We`6}ylHj*_m<acoIp%(Ym2+Z(09$VI
zhQXOkjNi>OdxC+3N$bL`8FHKb%(=gMe0*~FegDb7`rp6&`CstFX%{>_*f{mDGW1QY
z{)eQu-97K!g4SKFIX&1vJ*d?;vmcEi_EGvd0de$TgD<qeECvZFJo@3NKMHSxVIRd+
zGWuyZFZcrTCG3h)kFXqKIYDWf+@*1w9b_O#%9$o>pU!R%eQAKi+4R2;&ONCr8A$p_
z-`CY549n+_|Ld^-_}S5qF94!1UmYGlK7D!Ie{uNF{?QNpe;huA-;VzK@snqV{U45g
zh9)OR|9x1KpTjTpdUC${%<)a#aofbH_2_LQd(!Xp3V^l`Z{gYkurjWVN~0sV<bpq?
z8XU(h={Nwa<3ISLtGh7#D!VVNhityqpJSvtPssykPpCkY)mJzP<xI#35Alz80OIy+
z7Edr_S9Vx55$v6tiffpK3qs&q<B+Xd@m%hdS6ta^>dWD1N$L+y=gBx8ye;o-K_tJV
z3S&6>8#DL;W8{wGYYQq)(|rCAotz%OeDP!d=*6?67l-1CE^$+nY(>)J__`s?`XHWq
zE|o+$s(F+C8LDCZb79+unNxGoME8uWd2P6NVIY4-xm5RGpx2u{P4YxXw?DEXnCgxE
z*LXkaPvR&X`~rikLEhOw$Sb-yK@lPbtF_2CBMoL57pNJjj96teg|}jXa(TKF#E@tQ
zHWFHqrYi3xrir%K&NjCA1sbDMDz&E2ba<fTa^R5)x-cv0+gHO+0KKhe!)mQwMnPJm
z26e-AcJb;J4K7}^T(1Qqez}(idC@uKGS2*a&4pSvKs_uQ&Cs)SO;*c3CTpbS?`@RR
z0&Ts}zM%tNz47+<6++gsV<<cHjrwf+`}u>qyGwZuI-+%B!z&^TySr+CY5EP`!_91S
z{q*}R{Xo41vQU^_1jAZ3hlYfaT!=UxYCQ@gXrq#N20I)GHN*DiruS1Y#!SX4yQnnu
zoB@r929!x?d>$tt=1p2cz3KhRA4$o}QmWGMl>5TDon1Q{Xi>ru;8)DM$j-#^^$RvC
z(NH8+f^^y-lMH_p6AICFHA`fzJ`F=Xk>fm}K1nde6rNXsaMKX68)1;7xER?no0gg-
z&KPCXWXJ-e_&UDJ)+*Ge6p<vSSCf}uu`p$M5Q{SdnI3>d{zNT|TRxzLWWUh>W<Z(0
zYBF!9^|q|CEywS5Uvwg;tOJ|gll+Eoh{BVW_oC&&jo{hg=_xJlY0Eo4JbH2R^e3;@
z`R<$V>T-;)wLxSaFWhjMaa<S~NDaX%WsWAH--%+UU#p!P`cpXIDit>B*y52!-)Q_n
zkSb6<mY<}%Tm((XS6m{FdLXt5lgSIH$q^{Ozc&aHy7N&cW0}?)0~d4@qz62b>`i9p
z6xkE?q`}5xFKijs6k^ok!y+S^Ma>MM)|PiIDw&r$wU&bV^4+HLttO1{j=w<|si3{D
zUr0cJUygz-hoy9nLYYfuRVMd%dC33wf6eFOLz-YlF7qTwiHL(g#Kqt=hFiASzl{D2
z;$S2y8l(+HZSXFNDAH|+&Zq&v-l_NN=>PM6O~dnvPd`%B9LCE@3oJhD{?8v>Zqw|%
zW6|*ir&X8h_!X}ES*tB*1^u+tgHiD`6%i#0!xQRk!(cp?F_K@lUNum5B*f(Hk!WXn
z+A{dEE(1<g(C8RjiT&vqp`^0dqnB;vmK;y;3y}q0z9y(Wv87e`!e-C?c~6ZjTXgLv
zLZY;mvzZ^yHKBuE(Tuhi&S-nVi~u`P3xr1xw_%<#>GO$Pgi$&_OEJ0?14q?Xsvwe@
zT*#LEUftWzl9GS-hysfQdo!n>9ZEA|Ou<NPCgbiCOFq-O@ZE(jH)fmcK)%!KPTeOR
zI{uspQO0ynf|&>lhK@o>>5w7>4?GXzNq|hxn*_dMJFzJriSAJ*1nJ@4^3MEQb_}=G
zA(VIBufp^(`C-+(-x{(DVnE%xI|>vb^SC{nryl_Wk+8)RW&12az?CtqDvHtJT7+9Q
z#)G`4g49erK0SKce|&Osczjx8^_x;QB9e6AD3W&OUoT(xpFjTou>ZLK>iFf4$B&;s
zKYH<F|M?LN@n44wuWFaal8fVpkR>nt^l6Ma{?CJ9Pl1pK8f>usmiCn4X*||r&#bd$
zU`+PzNkiM&?QOb9vNDDfBni|EZbYJ8Bk!x|)LZJ$hE8I8Wcbe1GHq!9ml!D<u)?MN
zCd;DtRCpa^Trw`Yb!4X`lYtb?k;KM;WJ3@aCMwnu(x;1TODD0}Oo^cpYz>z2JRA<k
zG|5yu`{}(y7}F)noDV%pRY_P!fgIL)$b~~uphnqLigzy5;(IYi1Z+Zu(d=zjqcuBk
zFfKvgeDe#-ZvTBCfUaQFSB@0hL<8eJMS|<dpd$+P_U!j&D$ea^zC4wB^y<B#OfIr2
zI2#iYQ;I%Av&f1%QX}na*PaIHk`FvoEI!Az)#M@@Ub`WPUM6*leOIkpev<gNwR#Kp
zyxvOZs#b=HBpPI4ns2;Mfg-3ycycpMnqW-Wq6>G5Xvw)G=yu9=2<@LK{#W<^$lH}y
z^8eU**nZT`#s7BK^*`4BA77^b$E%mm{`KPJ^P|Vl`X?`+{p0XB=Lh0y;dI$RMf$J6
zgOYeY^|Z6ue;yC#W4Kvvdfu<>jbNYFCq8MnH?hsHXGusVSDb^aeh@|%K@y^-_3W0c
zVceUObtmaA=G>CiJU2*%08A_*?GZ--$t<w+)@x$Lv@tNHc!qh^=*c-e!u%Z#uJ;y@
z)PX@OH)gx6CvS(NA24l9?j3Mj(jk|8B^ywJ;AqBqP*Raxl+)nTCw(kdD$-&JMKjDW
zkX^Tz^9ph8!$)%vS;m`VRacH8KMuMv3t;YNjSMe=W7vQ~9|!r8mfEoVF7Rclh3AyJ
z6_|Q<$9}#<gHX10-hl{!AhleviQVdEo17<o#zEm#Z`jversriR=EAj#Q*&6u(L%c?
zuM1^gsQ>ZKVNcSmcOsj-I_HuMYg9&sn9yf<%4$y^a5rgX_cWggdf@;aPT=X7I21^@
zK+4CdP=8a}9`qna+ff?)HV@DiC&+%F)1KTcrKT-x1+A>Hp1;prCMD04L5ShRKMx~J
zutXgwI8xcTBsJqQm$D!xWnjYi#k2HC_CA%>x?5W3{9AA4zZJx7IG;>z`_m*CgejFs
zP1AUQ^+=StOD(>yWuF*}l7E18Gma3#7B&LN*ii8)_TJ`Y$O$TIE)DieJfFSs&cZ%`
z9*9%fp48GAYJ4NC<yp5z2_v}Tx>wGFPW+p2GM|V#(X#BWxd$=J1+-;yd>Ii%dxx>F
zfN{X`sIn7P0Ug^W+q_rG$!y%mMfKI9`f^cuLV`mQrq{~)JrE$X^QUBl^g#hQdM;vZ
z;RGIahX;#0D2pf38sO>5>>H>WlA0k-r7eh^&{k?HBHN`(T)?S0X63+Lq8aTMEUB)C
zoQUa6=jjCj&ln1UE9qEH2~xxc)IM?@ks;pOpU@Ou!EfOJ+8ns46%(s87{jnn8L#kh
z9}n0uNOSP`G^UE3C1XFpL@Yz+h6uN|AkD(tG6W#i7?$+K80(uE?O8X2M*EFtY?i6V
ztF~J<&QS`H7p0#Kwa@Z)<LYWz&xV+H-P~4OWVpxgda7~WhtA{#zd*M)dv4!3&D>JR
znoN0<zTUE8Ae~z-d*BnNGZ-BrZk1#?aQS!3&QS?{L|4x;9#&qW5{)Y_QT8ZbBD7Fs
z#=~D$SseB=R#x?-B4)k<%3k5LYIKw5bS&twIibgAsT?#*byfZ~KEJkW&V`>omZbg@
zAmaLTY<pWdm7(L^&a~zREqU*hUv<}M1?gOk73n^WR&=Dfa5L4TAUs%_&#I^Zizel@
z%~>|{&-9*p*rb*A@((+SQ=YOuUh%6ooyBn)yoS1GM$_8Cn_{7|mmQ$z1kSsS;=*6K
zh1>g9jsi7<uxBspe>nHd@p+3+NG63q>nFjerjMVIoB}d)|MVqKT>hL?1#S#~I-&w=
zcAf6~qaTY#x9J3h11}^IJZ)_MXQd8{<w`%(BUmD(y6VGZqi{AK2Abb$)(LK&L!q7(
zcWXn};5$S*HL|+p%7LkulK8#=l>%z?qu2!3h%Wl1PXN*wz#5HHi8_^l3<&J<t=y_K
zu<AbC4pBJ|VD3oOpK;%*C<0YEFVlS()7n+Dl|JsiYgzta5GQPvk3}*p)>gQ8YJiIT
z1a_v_!Clj%6su9>&oMip76Aa}Gz3%_X4AU92q=G6oNy)*wH#f6vR%$L;cGw2Yn1?3
z?NecYOJC;}-!5eLhPkRDo@I(Em1s~Nk=Hb6O@r1nsGJ6!NWI4=NrS5CTPjc=?0avd
z$1tQ=0+4x#*7RmgZ`Sli5sC9JT;O?s(vE}VC?sB!oHfZ=lbj;$<QFU7+)cR2S)G(S
zN-h$YolrP)&;rN4Cy!4K`$sQ+Iy^o)J^a3Z`ma}qCp8bB0t~!gHyplJ_)?`vF7VP4
zzm@dYRD*`!ROYR<puaE#+G%&B$JWB{yP>E1ZB_7Z6uGerd>pg)?BMt~@5h*>*Ji)|
z0`DY29kT9Ox^F(w8No&OLGdFxr;&3|gS}L7wRu-3J<iw|5#0uv76Ws1^lXqL>RBMz
z5_<5?Fbgm{4FL@hK7TY<1IjPrSx5;7{F#;Yw_sjKLvTJ@oowM+DtMJ?9JLHN7!%Q#
zuA-%WqA;rECrVE$_=#FQm+=#A6@SI>6BWz;=i}oSM=yTdWvAf$VnYnGngSa@c;F`K
z{$QF{y>R255GH5iTe;qS@9)Q2;sq@NSSG~9VfDlbn&+9858;OS?52%;+CV^(wYBp0
z(rnWp%}_-Gp5`59!7t<~n*@~E09~lVXfWILg4Q{C#w9XIF%5l&fvs5$T5-xSR4}@n
zpQE(GkI=6iIXlMA@TC5U+q%;|3uj(!G>-jQ-J^eqMZ@t*#jYgr86YHn^BN#F^+pN#
zO#wuCoS)+B;0mQpe>{m1EymaWZHnPH0QkWS8UH--2e*QE4lV%Z&><4#1XIpC!882k
zAxwuxtqtQjd^a~IDgcW(r9)nv>e_y{UANQoui{9YCF6HFWtuUpAKl@2@f5N3tCK#y
z*wdz1m4k||UcD^W8aWR49l6##Z(*V}I=wvkeMjSLM6IFnN6vYsE?_=UaIN#6oTuR;
ze_$2Q3?EtG0Bnd{MuJ@}s{JcY2KI~Lv8*|Enf<g@-hrnj`3grh^PSgq*>FXK8heX#
zM7Dt`T9Pl3wM!FAWsX`cvq~nY1=-%TH1NniW=1cgCvQ&A8bhPQ*@yJ(ZpM<m8s!J*
zy5c?!$M8=6)9Aj}^P02^?KhlwEi3B0C|lf${^J#W#}y=v9BRd*N`G-vyk?Jr{2y_$
zcXySX?KowgbB{&Bn_@CsNL&?d>Ks(0vz<JVtwj7f_e%LbPeP*;1w0@{ge6aqbRtyY
zr2JV<NO?MPJG|EwezuO3UB~|x#{a&ah2wP9`hT6y&bQy#@xR?i>-xXz_}?!+{`d9i
z(X*4X@L$<r)#zWt9AZ{<vJ?TZh#SFCeu|SQNR0fO944IUu8D7PV3i&jt0|1~U!Z)K
zXsX!sFr=98zbOaL5myeWNG(eySt>dhNL-aeR|FmmMoI?Cb?0q)J2(H+rC$(BFrs&8
z1n}aUUoa<iyW#D4Z}9h)wYD3%FjAudh6%U@d5}rS?1u;YX9q9h8J8ghl5!S~!`bcb
z{`T3y{&sjk^V}Z}4`>4ad-K1P=C4D)?Qg?#Bam22j<0R2Ub9SPK|7@h^>K>*3)(j+
zf@nQi{7^;99>5YgchxB_vgf$Up?HKow_3Pt*wdSI_q^}`J#m}OP|7vz2nv61-oQqP
zEoCoDp`E|N<md0+ALa_c{|=kJM&f0V_^!CO4APYZkDto8ar}!kCFt~P29n*Nw{MSH
zQ0h5V(*Y4Ie;9ZYH!|eUNVun=e_PxLPC*90>SMLFQ_M&wDz1_Hxp5XKNKQKt!U0_h
z_}Nk5;f&+3;qWk?#@Dr*M(wn2MS){%#}!u|c(?4y@vWd&nbgyYF2;*rbjzl~9B%#z
zjt$dhqTsKR0MK<9BBgQAZv6f@@P~G`R2fZ-)%x^R6Qm{}VY0?sq#3SA@6|DLoD)ZL
zX%UnsXyDHW7q@-DfIjVh{)DQFxI2qs?wIDkrR9XEoDo8u>{WD&M#fk?Sz<T(v>Qe)
z$pWUvusM^lCHaHhwhnf)V;t?l8J26qSObB!o4IM!4VH%;B(d6C;RJ_K<rA!Bwk|r#
zU-CKrFy|?ZKGE}JVR4q!(?VzO!2>ftE#>wIk&_~#GxAa#kkJboh8DAxSj{XgcQz4J
z1?Ms}&LXR~?W1_v3%B6xUD{msvfwCQ@=;V@HIJc~>-ONR>LL6wyb`&#HT|ez9?$Hr
zrl~w)SD{UB5{tHHej2Ld#Pw3Nk=We{H(8ib;iS{KS5`tgx^Qbn?+8weh)2Vjm8hXD
zM;3;Kg~jYAz#TR?JFt<-NT&?R70?AWI&3Unb!7@jCp7dC$lL1EBrZEZT&mEP9ilCr
z1+*pGjfUqWIGoP|;nknToTW0KBPbi?0RX<?CMZkN57ZUYsR?FOs|5xYCumEU--OVM
zn(^ccrC+5#x6-s1h`!l~S?J{-PFY!=;{&=Z;_3Coc@-svez`>c-h>g6or-rdIM1A?
ze9zK)QA*3jYHg`jS?hn7)c>Bmy+s}>|2>M}Obmi&uS-jSmg|4J-*k3t{qMt_Z`bud
zzeN3S`>)=Qb5xPeF)flmp8~VoKbKEhDLwhyAn;qG<cn4UpZwRquwvcOi&xJcKRtZ*
zI;RA7G+3+z7RNG^Vm318Nc+f|xgW)2@7ZhgDF(qHPD810^SzY&N>?%zP%__hZxr|-
zV^J2!Obqa}s${nFqw+}OtY=ul9$m*+qLOk3%w*&vTU1r#t!#^m5o%sH8}79NYqC?u
z*xWQ5#379@Xm8{b#MX<FB$j+tm+KWfmnQ1Wj?cb|ur;u_n2!M`de4N?g#DgDj5Ps?
zaQ{`6VWNwQs&h2<&r^I!KPdCR4bECN?FV8Uc2MK~J+IBy{au*rq|Xg%{EsN9*bv>Y
zOgQ{nZ)|RRlRkB?(cjpj52SfEPvM$^zl`T%h`Qk@lSM=HG?>-etxjE5!Oc0CYw0Gv
z1J85`8qZrt>2ZAh+`j~>_8$_W@=2`@tCHsXT(Tvs+abe7u&mWppn3E2U0u3wPLnwt
zC{cGx6N`uQL?v6-5278~0AHGMJuMzrCV-+a6B-tFKNyntBhQyMzd>kn8wO)|g>_^w
z5g}g$W-5-rL>D4{qu@G7^io;blv~~j`R`I?5V*9=B)+jjC}AN8sbduv10Wd9$6UcU
z#L6}33xyMzVZBZS8oSckOD6k@azqVo;3HJ<L?vNo{w2a(Cd77z0h}~gstlzV%;s6%
zMobR}Z(G1Ek{~93PzswEkL4@j0gJ&FME#l4<-$x--_xTP^om}XNK?Ji6Py&I@l_73
z%;a6{9Grd8H67!#YDU`k_4$e{nj2Yy43R@kH7Z7QK`CCvX&Ro5g+_{YepiL1G`*(+
z!+tO+x_W;U4HmR`9!%oo_E2J-_6E7HU@UZlM060I<yxy?HfXh6kYb{f+q56(F%dO$
zrzDLVRof+Qu7Yl6>U)Jirv}F0a!xPB@}AS`XmS(x<R(UcY#5t;m-@c$Ikuey&4KPk
zX7gzSkQSxXpJfzvuK<MO7-<fWd_Ov$BhCtz^-O+Zd)tU&%5$Y>t`xE2)n4J&!HIW%
z_jTp(zApam>%HRYwNJi>ExN)o5bDv-;?Y4?4U&jsP5(4G5X#O6-h;D-{z470>D@FO
zNrDhNW{DrA=;=Qs?#*wU`=V9eG`BsS4+g++U<5P5jOQ$0)({0skWV#K2xv!LWh5*|
zpXfm7_&WivheU~AusE@oK{EF_V<$lrV9z<jadO*B0t5d+{x24tu|e*xyg@uL<MJG7
zX;`WkiMiX``S-$uRH&WoukLHz*^`>irPu@B4^-&%XIwU@$FdNMYkT=dD7eW+EW`<P
zG3+`#RF7Lfc;J2gPgrnFT())_%<780V$_8^ZUCaM#V3uyzOtn=954s<VI1%s?mAB1
zvhb3~4kv)G{(11VoGV^jv)bT@`g`;PRTxa^=dG=JF5xMMd3e+Ciqd-t7=T2CwKj3h
zS>iG(tV4865Rs>%6w`b<^k;!JFinMO&T$dKOfaRH7F<4}AfZnj&A8sVH$Ixxa8guG
zroM;2qm~Z-sax5*;8BY>_Hi%*WZ3TUqV}}JSlU8f6JeAW2U>vFJRk*ecs`HkDezt7
z(ilDyq^at;;=Ikpi*4e4!oRn?&L021Z{R9DGq<gL60|HeiTTm&6~aKwgm-hrf#ewZ
zK@3Df+<`@@&8X@4L!Q23PH6C`(UO}U|J|{hZf}c?VJ=4coJyOd0KK&(KevY;##5#q
z)N|S2XmUCZHG8VLTn$x+2u9XS@eh4#&;9t7iHUaZIFB)UIaI|9%zdKk&S!wH1*o=D
zfNKhYSo|Uc0?1VZowfRqNot4$)xcVKmS3^DfD<hRY$9+IzBdE}Lxf^Ej@<#5cJ>_k
z2o*fAxR!a6R>7F={bJ1LN8-9G(YQh!VU~+uuZ$J6*Ij$B%@VXusjIqRrKyWPBLT}L
zU;Qhd|0M}P?!OgjF#PFse4B|>mdG1<K8iT}PLSWPR+qObAFNDt5aO5XXt7*{Z4sh)
zknjn8oPNxcBrKwwq1)k2n9c-ILYjnU*hRz)%qOyu6B7CO=Xx$LDdo@IY6n<Ms9-G&
zzj!plWG*bJ5Cq|LBKwC))SSJ!UpkAY&`Web@Y<wT9Yg1Apk47qAI=_adt2H_WH9m}
znpIHAo;|QTJ@=>kY+wI*2;YXW|KX2oO4fQc!$Z2|)r{G-VLnZMQg6`w9YD55hWX>j
z^YW3!FS}kl8#upGS-~N=ekwd@GW@X+kjW%`xitie&Xa&K|IJDdoZ)je9T51pPu_-8
z*6Au=GJws7s{#}XgIjVN^WdQP*4%((k((EcIOY+XMMEwD1_!!mC;u(enzwM+5$1Rs
zdh0s5xw7=L31Y6Cz@D>%Ha%?K)CGCG>g!^?tG^}3R*;Y13Fn55O7fG!lx!j1HqoUb
ziD;21?Yg$QZmpHc(7f`^Z=MkdDH=hS6-1&!f_fj~khzMb>1Z_)Dug7RhKfh}SfvE=
z{Se3dBaBsv0qujhDJ(@4S0;!h#BfPMSVjbwS@Yh;9f{vkgl`$5w^5PeIjEhYbcIAN
zxAm@BzuV9$8cJXV1nsT@*a|l%Q(d^4yj4sju&W5A-=)67Yp<9t+&@jYH+oRzSSTHA
zB^t2O`7b#8OPu?nGhb<8PWD*kkXJt7t4cJEb8VgJ+<A6f=5g=rnW^9>Wr*3tAg$N`
z)9iT9@UJsj$d#O1ls!vJKzVX?s-B`P;Og=4G7h5|CugI83U)J+hMUYNw(WCMFTG7?
z!G!Ao(meFkll1);wgZ96jb?l%{|%4*vdu45u>~z@M<mNvxf0_}7DlI4(X{y8lQLvA
zmBLN)K%$#OwKGOQ>u_af@gEe;Cp*@vQv7m`VbN}(n^39=^_rqMyjsQrXk|G;S4^|1
z%~NPJHci7qH4VB0O8f;g^KMSLLc2){Ui|$4cLRQCHj8#ZaBO|fK!g`)OAI=8)8{W6
zLuiffD!dC~pRvwnN9Vj^d**rDn!ZfMv`I_JQ!b&S+uNrvzkdn54^B}M59foxYc}I4
z&(y_=a89<Y!`C@|7HtF6Z3m!L$t7Ws=gLJ(x!DWHRq|L}``3Tj{{MWWN}c}~`u~6X
z@X=1k_Wy5pAFciWzexZ8Cj9l?ItV}~mCqla9v}T&6ak>#Lg#2Op2OO|7ROJxv$QV0
z&cDmVGkkq7p#wam88>iB?)<QPD@1@gil#-0f~_u@OVm}6+<J3#37w#08LtvYN(UFv
z+ssK&o_RSJdthc;&IWloMX@~fUPnSY#~iclPKgHJBp#3D^u&bZY_x2><bxW)4^wi7
zr5rFDR_<lGb*7#)vb6|=(g_S+v_((^dN$*?(OsxHp{BfUx)NmjDbr(u^ik*&Cep`>
zH6CmLUe5<4pHo1wzP&0DVb&bPxIc`DhJHza#0kJN4Wy^le}&Odnk}RGM3azU{pn*W
z{0oQa6w_AsWXjmgN%1L&`U@L9_iri<2JI@1_k-}E6<~P+c<{dp82a2us`7_VF@bQe
zAV8bma;JCXP3SZ-7i^R3^r`HVxpQ7^AM;s%1)b@`&xglHPy4T4KY0fK9KAR_JpSSF
z)5DrEZr#yS<e=<7J~=u1@x}AQ7pE$Fu-Ul*vg#cNX!b-(UnUov=rFTc?->rGCT1Go
zk<h8pc0c%kasb%wI_!7N2m;Ywl0pg~sdNcLV_bpkd?lLnV_+W130xcYjK?8fKlrh+
zA@m-L`NFY30l8$R#T2P`aF4LxnfyT2N0{`7{>-=IhlF3Qx-*wJqZ>fbRs%pV7Ge0B
zYoqg3z2fL<JR04CoNIK|q0F1T!YtLN%EqZMAM&_+5l0wuUFzYFyV9c8CqfxpG$i?8
zRUVRBFB;OErHIik8QDO^UoO!p38S@hFWyvL#o|vI7_NWPdg1(}t{m9(`e^241#`jY
z9thwK<^4B5q+_sEhOuTw8&PR$)N;|>wBc47$eJDPS*6)g%SE%(hFfVM%`BqUn#3rG
zSBIeJQ1FG~vF|dfId6u-67$wOESPtG?7NIAt45QiNU@4WM%VLGH!N33EkP(UNIa@#
z$<1fa*@q~#-3JwAyX;hu*;#a-HU84RPQ*23n$eU$u&?tCT~SD;Af9)R+tjVH(X0}K
z+f4I7Kkd}X9w}Re%cqQ|a=^2$laFcUWKmje+l1L9j9%gp{F|6Ik;*)R@{;!4K0&nZ
zgPu8H-RYC!f&&v4n<vm0-vIgM;hS>ZW%H}o-JV5dQ&H)R$~=5Cu<vJJA8Vl*%YmIV
z$P(;weXjE*vBL#+fd)Nu!0KRUifQvj`XYi|Lc6wje(w$J4xwi;p4h1Rj%};?b0qLq
zbhyJg@^2jHXluoDwBIuayti{yGXeBIkBX$L3bLyn75~O@RJKZwO37Ja(7uPWqDYfC
z5AOlA?+W?s15ep#+`M+pWmn;{Y=R{7<wxbP4p7>xmVE7R<2gFolb1cl)sSu)-7qNW
zfpF^&&<R~6nUku>uN@!#ux$FB6ZrcQ_#ctaI*`+5xE}`i?~hr(`@9&Xi?Uc8^OHCZ
z<=pnuVwn}N-e#Sj1lE_c-737(%e)r^ha2AHFQ+CvhBp;?88tLH*+NEz-(Pg4X3zXD
z`d(eq^echW)w%h7GKFO>ULTf@J0;A#bXiXA?~{@&BX{5X;jajdsuY6##Meh|#Upno
zA$S=T(}$H?mqoG)C+<%2(K1SedlzZ$?ZjpGS(`6^f*d)UY#CQx6NmT3k3aYY?ZYwk
z_s*a{xR`&hT;4@MC_KTAwqGB~4|^oVF1ruo2SjgU9yVex$B4?rzcnJgFTwAFYd$}m
z^!VOI!4Iw`yH|O#0&QO($&Ya)#V)%K;|D}<V;(kQJMhTx>(rel*-JJb(Y0s4HRACE
z?D;i)xUe(5b#+Du`r}{{U*Vw}k5zi?)JlOW^Y>CM7BgWl!D=uL{KQB@9!#LQee9?x
zO-*;hq-1ED6eJmsL?B+>HI5;gEjLb4mfUDQISZ()k25Q{1p>yBsH!I;UfYC6=9a@@
z(h?yYE{{0fS4jZ75)UX-{;xhJ#1E3IfL2_(RidFJO<6ZdqQs-uaz@tkXL3@Gy_RDe
zqvprUja0lDxUf->1kr#BefqGrt6=Qh6^>yU#BnkVBLx<qpEvjeAX{t0&X9fWCzN2F
zqlD(sfTEW%?&S<~u+NfU5{z%X6gEQU1$N?t91v5&BSQ)!q=*zhO$_TIpmf=krrJ)^
z{dj~l0yD%@C(1S)q$#DdNA4%eTn+FHQet9Q+?WwX&R7n)t$Ck+|MJCY|L6sJpi52R
z1M!Kq89sL$?YP>-=J%XEYzdpAo7c7THm6nCVVUcWrJlr4TvbL_r53&oghZ7asVt=w
zPjQ+AS7AI)#iS^zeTk`Rq{Pud0|2eZm0Qf7(3u|%kD@`sNearQ1EX5AcW15I-cB#V
z5y+k}jw-S?;_4JYMWWyuqvaM(Or$fhg-;GDc(YfbKL#`A2Vm*72sC>7Qo3sn5v@3<
zhI(+&<I%H6W{7*)j@8sCTPE!57VuTWg1lkxmRW$vnxHL#f2_U&2N0px8w@`;ma)`i
zx3`%KBTnGT-T2O83|~xiNB1q>1r}&bJZ_pI4b%5}(TS?=rRdlTr>J}<|Hl>mWZ!L+
zK-}pC@=5r=?ps#a4@h$>%~U(Uj`rH}`9Q5qab1IdB#LXQaydwBDT?dT1%C{R%ev_b
z6xVEc9~4(6Yt|?(^W@4aFoF2-D6ST~vkJvsOcu*52zMj6@7F{A-RJCvr_aZ<G=FTN
zcWIgTo(S+KB=X)f0hajDd9(MYl6uv;wN)6`XY-xACc*s?<Ryh$=LeB&*~bYManBT4
z3aSMYD3MF=MLsV0?Be*BiX!l*;iRia0-D}+pPdM7YaKp+5%`{!4iY~;3I+a%Qm|V0
z{ZQb$Dj}8>cAdNUVJNVaTni>pBB<Vrz+CXz#bI*qs(A3D0ps}`4$WCWYI}87g@&IB
z+hX2U-Mj<<O~N2GRr$KzM$`NMv4oyl<UlzPI<opk!6`}tN&v~N8lL=DRWx>!3`UIt
ziH<C96`U~58OSf*;|#3ylZAc=0#-e#P$UloQ41;w;^f?q!W4Na6@wYyQWd_G?M~?&
zjX%t3zs_a6ESZ7KA^|k~agzAAoQzaF{-tEq4jD>Y_M5$uZ!#f6s52dFmo+X=LQACz
zTqV9r7i}|^=!}ekhvQl5z9AH)NrB%=SL%L}&W2dcXg>4y_r0&RkAIC1^vUe+bK@Wy
zju(9gFl8{kt*MRNk`Qi46rV5wcJJJqhVkV^gw^qV$Cv<3F^NUJB{l%F(WbF?mpvtu
zui8@+k&Qjgy?S#Wux9ncmUH}uF}+n_ogZ^WSgUC)fOW~3?@HcnZ_nak?CpZ29sOk{
zYT5}P0jFmUKMb*i7ErWZ&mRua8qMwC=#P?kVkb!~{8$k|D!TPIJ4NhXzAxR0BI;Nq
z;u@bb?sxM2GL(B5U`_8|-dLx5Z>cUcizPm4QIekG8+%nc@sV(KD%9$&C_vb3+}?kS
zeHU`M)dh$}Gtii2!*!k=GjRF+vH#O=^B}NfvrJ&yv;AaL^L!=rX)=!jL54Elmxf*%
zI+&*67(@^tH}lElHY*gpuQfXGYDW1s&bn=jsmBB|_IVPSO3BNHJ!ewoI+Mz`Z~=gY
zhqI8DmU<y2r$q@?FFVgWVJ1bZE59Gy4COFp$h*=(38AKC&~NCO@{1f*q*s;9)6NLX
zjwdO5o}~IlWEuJLjw)IEjY2`uC`@=_W_z6o=^ysllnM>OUCsF4y)LW@&#Za`m-=`(
zEc8#rf*4tUx`=IiT}#q6kA-aAC|qqL=j4;JmrvSo-mSW@+i&UrE&9Jxv6&;v(^`6P
z6+|0u^^Xk&)s5EwahBh#w*01AzC+o-#TX4DgqUR!EnCRe_=zK{rlXZ3zoscb*mL09
z2>(oU*}7a<Lz?XghIaeRGc(&g4I?yH!kL3~DtFR_BUbS@l?z(g(NwN*k%y^9*{ocV
zX`w5nq=N47K3yUcWYkD4S_5IGq0_lqJAaE7&y0m(?F8L*mF59!ZC98_A<=fsuYHfx
z;}6o8j7jFR5x)dhm#BI%g$of0BXenpjqVS;7=L|li`yjSZjZmx8uC|b6}G`Kd(WCI
zu7;(ek2h%+ov!_9%{^5^d=YM~Pkxk%m22iL-WXzcm&W}y(p53ko<5hO1w2{naY0v$
z3dpV|opyIspv}6eM+IHf?qprmBZpv#on$+FvKUzq+M@VMuM}~#-6(<lg3;t)urqt6
zzb;KWOppJ;HDhc}r>%^J?GJqTOQF}I*d;+HcmR1bki*fv#u6S_zfcWPmJD1l#;<}+
ztBlY0`<|yGgNxq_1v#DCrAepSz!mVSn4IPWd<w0Kf?#+WXI0L6BD*<!$4GDf6}-@g
z-Q9IHGm+04JFm)AKdMWV`=XtPW+6sGN1P?eyHV_=Q@F(WV}RK#oP@voT&~TLs-QeG
z{=x2U26c-B7hPmWNr=Ud=upS$ED8LHws~;GYu2k&)<Ls3d}37Y%2VtD0>WB}Ko-x~
z^}a69yXsarRz!#BVA*nH9rwbmEmK!veADbx3+ptIUuR$Mx##GZjB5>2pLVw;2&L32
zD^dOmcU}<dm&hQCs>%<zBC|Zfq?I#_zCmHDm2;$cv&5P=N>_b%RMbPas&1gLPv6td
z-5C{ib?hoD`Seh6`4#0+(Z)G}iZ+I>u;zlw;PJJz(aE22MVjf&=~!UaE>t6kpK$~k
zGr^DCt_dmcs;Uf0_7(2nSYX}~QDi^Mi63b$0`U8Q>-fMrOk?=}vu?iHv=?hb$~7VX
zKYMT9*G8@^fZo6TDLT2o%eBE6NHUo@Y;y@X3GXDp12W5*$@8zV8|cK_xa|Ow$@%QB
z7A+;Ux@`lQ?Sz?t-7S@*QmIrb)gqcd{E&>ggSeA)2$jIX;7>0`H$>GPrDx41J?-#G
zDR2S_aAwYbvzg0k6HMm#(M@@hFA)~Jd-1p~o%-ytmGzTV_+t-*%HD!?T5wG1R81DC
zt*uBdig?^8u&7v9FAMCG*9>%8+IT+dm`6Y`bGOJBiY2Xb0g|6T2D1(1aBbzFr6te1
zIqfVhZA_P=XM(+@l|g9b<u`-Zg!E$e))aB-tj2}@$UO56&p;8Tp)B8p$ZMSgY0qYg
zm>0LB>O!@orCMq9DryTuN6*sM!gf^JQdzbg)MLz*Du^zw18wPTPZguw-!zNMn$T;}
zdf*C66KJk5nvfNCobs;D)NMUPqo*p(Y&0h>);0JLP^AaVrX(Sn0hb}@=A443#A5i2
z)=lOtWZA<{Fi3>Lw1F;yG1x<>&rMZ-Wj<#oF_xDUs}m~7=8l0E31*i2T)T;zx;eC)
zI3@0~n^XjC&$*)nH;whO?dGmKO5ujFK5fe>-%(tA-|vo62>Z{mqiBQ6&%dLHmOk^2
zB3t_0JBo9Vy8j&of9_*%nQ0rCcW;?#!@AYp62$6%-rh1t+H={zzfS7&`^f*zNrrEm
z|9kE0wbfky@29IzzvTb^lK=ZZDgXDgos(vH4scZ>pX>Y0IPKnZmTz3sZl-U1q5`4;
zuHycUo4*?a4+0tJXUX+&#37n8H(R&5a?*LM^TNBn>WT__6`2gkG)B7LF?BpiU8nvv
zz;&|h^>vl|TGw1(FUe^=Q+1p0+D(2v1rTlFo1zB5qI~5_={9Tb?2@kKid!ozAdXE+
zKyIVCk-1KcHYrqod7NU7b6S7!|2s6>bC(P|DZ-}xA?+l>9&a%^<BaG($9hr;0czE8
zwUn3I`~Kjn@3&f)uu@8njxIrJr6W{M-OZbK+#fe&1vIj>RI|RWZ?xqzZTFhX3to><
z`R(EusI@dPdC3gq;g`(-=Y0Q<n}j@>7EFR^Apd#`9n|zRkl-7`VTB8{a5$T{TPO3(
zEj$CEx0ah?p1b6iN!-=wlXGrE<<+CK0TbjDkVvH!ii62=K!|Ya$cUi@F*y{Ok2?S}
zErA6my+IQfn2<c(%T@6@z7Dab61+l<&{7Jl7g^;t^B(G3c|XiNno|4OcL`p%OdTNV
zPS&E|ehcx5EolYokcM7YBV3Z_QIeu<f;Yu9_k&R}acJfN@Pbo|EnwtEO@?a?+G&zb
zoudO|aafklvcF0wY-1;hyXcuof>5klTK+&N0sNm^V-52B8&LXVEK|cCV=W1k%;(E%
zWAeC$Lt$*EQp#(oNC?B^6P^jl1Vd<^N9P2Re_vgu?MsknZm`4n2if8oZr9Ma6r)uF
zH$~%JN4$~9@AH)$-ie`85G^_wN+UW+66Tn;&4*o#$vjYBA4<mQY&8_XJnrlcvhlQS
zMiea|GDQ320tT7H*#z(n&0h{^Kp+?c9L3{-GDZMMO`;7TildSvrU6=P6pTSAEw-Y7
ztx@zXESL3Yl{EkxzMOotb9~a=Yd!n1wRiZ#kMP9YLF!Q<W*=g>*C-?T?ICS)8#*E>
za6%>Ck!Xcz=;H9uXBF!3LNXyrt4ZUcfq1UN%nV`-N*Z+rAXg?egw{uJ!i_|4$P`%{
zg}XSj4dOm)+&)Y%QYxWtyFma33Gj5AVY`5#)Ttj06<IUO#s`z$DbaxAA(9g|O);gh
z!>ta7ZJQGDj0r(Xr~oFBf`owc?zn0AEqTgxb#&SPSF`nc=ZESn+eocm=<{!^q~-${
zZAT~6XYSl7l0zUfyIMR8kud=)(qJuubTdxdton2+qqY^-K~O&yXrLVQTtGv1o-++V
zv?4XS{$b1X!gRyxy^uE+t<Py8qj4K!A)`?G3kHx@o^}NzG$_!vA2SB9R(mFQNQ<aB
zk_z=lIl)iukFq40xj#}E<V`8nv6=zSB7<u<{$!pjUrt791QA-U6;|k8jOOcqf6&jz
z12@9}F=IH2+dztmsHevQ)m88oS-Ww|aN>3bY}xWrAVM_%Q+X1A>*3DT0VMV*=(O%D
zis|(*Es;(vHs2TmbI&??2!_Nulz^_%%#gmgJ3J|!Clr3!)VJo_`hKhOIjE^zS6^qD
zP>Bi-`(aRkzRwgy{6ZgPG_ELscPWNwtn;CVe2@yzBLZCC0X=FVuRGXs&_fp1*7&~o
zB1M;RY8#H-i%0KwEEMO`Q9TA33C{o<g~aSY&hcz@A7r|E9(P@P?UhBc(T>`eN&CH$
zMHNbe+f2Yr-3%vZj?VZq<@H8`&bC~3#$nCjmP9@>AP=P%{TB%kL)uoolr;if72Cxe
zt`D=u`Y5qAG=_#yN!S>M4GgX4hp{oNb2Rz~TQ)0SEhs<P5vs8Ew(?dK1X^UOXd(GN
z+qd_HC?>D@y&&?w+xZT?x`f@-d^^6(Zr7`$(VzoIyiEdkjK9<i+-2P|(9<D*(%zW&
z<Vb)xQN9G8A*#2U3JpkSnJ;eGn$<XCJ0_?oSL-!icR{SL<4Xdh$G503Sv0ZHf|^)m
zENb^9ed>)SnZ0w~G6#{sEdmg{GZ^9$2^?om2UXF&QxK%|Y@BE9@UUUL@u!(mo3Otn
zJm4J{e<zB+^NXv7t7$!Yhs#w8lelE*xih8SF>ID&5*#`Qgn=1EsB&RJoeW`&N~Ocs
zVyrSjv`=CR>lIcQn8WSCK*@!%$ySBxD7tcrV2<HguB9rTMB=`HT2uFQI$a5!&sA?t
zA`&%V5kLZO@lbrB01v#YAiZ@l8VoAiLrFwVPLB@{Ug$I;-iooh8pMEIC;ORNOZu`0
zUl&io6nF|q)%Rk`n^KSrBPEnO*f`+o`Rmv@eEInt_8)31C^P<;YX9-*tF<Rz`Su@c
zUw^Ux_+tO@A7cM;bbPq?X7{wn_`|*q+IP$#=dvZVTCA@Dd)pPhrYIEY)E=Z2yoK9Z
zKgj?)E!vH=JRG=4)!Q5tIiMOF^Mj{xyeb8MY{5J5^MX4UtJ@lN2R#w1i^JCCqPj$~
zepZC)+`Y~Gz1mjO0>p?hURXS098p)Y^=Z~7Bg$6Tio0Fgp2l9QV0OyE9Ou9sPXS}w
z$r;^*j9s9#e+4EX6x?|o2WlNptp#xP4Wn$syHY$BrH*r@O09Gx)RODy(4Dy3{X%<^
zeG#)u-ka<nw2qFO&zr}s=lef2_o^-qbLKCywOmJQ^{IFM68!vE@7{1jMMM{xbf^<I
zj8!v33wBCf#h;kXB%5@{Mk0smV$I=oec|a|&NVHF4nRh#_;<DH_37JaHClHykH0}T
zy{5U8`i6xhqBY5pGSiIeH$#o6+I$HonNA7<%?<TrLH5g@E`KA8JxgHQPIgFb1_v`J
z5-fmoz_07YchWB>7YTke_qS8{2QYl-CY;QWue(TaF9HigIn2W~U66N!6=^ny0C<P6
zo3c?-e7uKk!!dtUZ-6@bgBGhg(fhuoH6e9n_ZZ!NZ&_a_>q^gpl357+YaZ|KwvOIB
zdj)^?4^ErM&v$m4RkxiQ9yrqvFzh1x%V77QY0ECAZ$pJJjx7u+3Ah%avuXuHr+c?a
zrOSf)3sLvd=;@B}`{+`{^MaRE_{UM9;4{7y8=Mn15y~l*GqN5isnpRjnwz&l;*aXm
zeg^0Iw3FK+14c;7GR%SU+i&H3zoZ^ArP4NQoM;qEP1#u_;GI#z7FYg`9Ja(-SaA9u
zEadSQnDY~g(y4b#*d+J%cXv*kt&^SCXm5a)*R+R<@yY$s^3_ipLLH4Kh(w=Cy2vIc
zFv05;Hz>}cx(MsQaYJc}vD|v}VXXnrsvjQJ3TluNaG9>d{EBUSTCFX9c;v;IIXT@q
zh2^_*a?(6Lt$H*AllY)25Dv&R*a?_t>>qr$^J;&u^<8uK^zc~!(Px#`{`1z4hi_Wn
z?*OaVYwfi54o_R~e)q8X{CVs9{gb9Cip5^PIXP{;frqCro2}*#N5EwEPg~F59PFO%
zA0C|K6{E9`$IdSd=Ybtv$$-?17mnmsL%8h-O#kZz{i{i(Y=~#gVmCo7Dr7TJ46xkr
zf%sg==%H9=Y8MxA(<z;u73i@AI=Y<J;UcZ6-J3+)$KoN?MNt4_w32>d%Q!+T@)LY@
zKpX)z+hpl@mEcltV>)cUd!notcsZ1ytHhYCBW&9Ev!os4?Z=DV-3TO(42XI%w723j
zY68R!NjBcD<TGJ&#6jgJ0yNR0%Xu=QZP8F#!<qKKI<=r~R6d5ca`!kY5271WD;$do
z$AuM?u)xq~J8e3~ip2E8@zJD%G>OL!jFY5$zSDU(NGZLM2S&WqklaTW81Mv)GXFds
zW#gA|_Z)!v@J(6ts;qiVRVkpx=_rl+7ZVsZT3h}Wkx#VqutvRD6{iF^pm?zIpvaT+
zfrGSC2!VDHf(xLQ8~sUwCx(LI)36`-c&a&Pk(HG<nYp9F__uiUU}fj#a<tz=#|h5Z
zvradRZ0?e3r}05wt!w2ZTPeTO7tAkzuU~9GAeeACUkF|Y${TzZ7C{c!!~E=MEsfp*
zC}HNW^L04895y&|)Y!}kVWo11o;r%2U_6EHJ_))Me_nJkkcJCNHrO3{i9Tmuu=qLW
zppwt<@?N&$T0dx$;odNx|K;iu73S6V(I#~@WVnI#mbMD*TCq_bDqyj&6wc=RrKON@
z2L3E90hqGzQZC6XZDFt6@&EvzFNe^pHrP*rMa04UrjQLxvY8h3c{A#Ync;`i>Bo%<
zG|}|K)yqj}V#WnDgkQ*KKWxt<x;xU{m4q}B-K_*-uIroxO8FRVsoHp7&wZ^c7~Wi3
zS`74XMtZ}kCQ?x?lo8)#R^p>^gy?QkPInVURg)l9DWdxHS!5O%`xlwzi_G#xX8GI5
zEVtZ!?kcntY&uhgmWdErCZA1cnfwt#%jAB9mcZswnNw_;X@j|)uzPP(OM%ooi`3GU
zQcL$9rIybkv~+(Lp{09gp=FA&TrLXVR%qc1F0?3c{K>J73j1(KMu%JO#!uVNg4nFL
zMVMT&D9{3rl5JXOsfgOmjobIoSSu947FN$<Tt=qYE|Qwm(8{H#0&$1^)>M9&Nq?#g
zXFCSF_b72LTV_#g1;>aE49x!NS7IIWKK%js-W_!RTKFy#%5Q=1fX%ADd?hVxaaft~
z+uUqLRM7eYI2PmD>b4vIdczm!b9e}lxpm$@0C1-AB+P8S<DNZKZ4m~ch@@r(T(5X(
zIj33z6;-q-LwZsjYLJ@0t})OU@mFcZ3}7^<yfr$!_(YTKtJ+I~3RTM89wL3XEVb&s
zogYNrl))CboqvSh&V4KKTlLRckJqDoQjAY?nTh*$i(kyF|EBieN`ASM{rB3}PoCuB
z|E)cJ^u_-Bi~aY1hW$6}HiySMFAAgoiT8yO{!Tc!O>8zp3?>9)qVrP5u2iEWQAdXf
zmR-lFd>DDm=$3R&7yZIbI;5Yv`Cp0y`Ke$dt|6P^7)4_1ETKvq%Wk08Ea8Kw3Yh1Z
zd%3NO9FRC>Ow^|WUvt1>`<+ALd+&8PT(4OEu}-k*m-L?51UO-cq2l3JAbt(nfc%bn
z?grZSkl=`_>R~QT<9`Hxa=92ciqTf=5A2~ZpLtV>Q+m)fo5Ic?jeX+}M!ymHyYwTG
zN6F2|%GvX<T??1L*(nf7@_2m(VNk1Hw*x(z4H)&;8);xHbM#E%DU8>8fNG|svK9Tz
zfp(wfC#no|VF3ADmd|3El8G<#Q(KTBGc<n%MMJYw2DKFTWo?vVwxFWTP%Q^tk{Z+y
zr`NxdDux8To|RN_*o&>^Ja+Bi{_Sr};S^0cx?pA|8pMW|8JoB32V0sdYegBZ>1{Dw
zH3z{0a*~4Cn<;y}jV+`TVB|TS(zWOKoG70wc;Y33R$+jWs~w4Iw7%mcpau#m(KP@+
zuccRc+utv0UXDrqvdZJjk1RhXF&dwi91ifyBuRdCsez|9&UMi+@Flr>(l^^gQP1T{
z3hhFn{l{5M`8APG;;pDaWR<^XO3>l?6xHFQ=P^YDz)RsNINXc`27`1`h&?-^x#k{V
zb1vw7e-~}3njn$V8n&Nqj$=KlV4inDDO<KpS->{F6<RWZy>u&wh8#S%xc{g)6NZ(U
ziVb(&QHG#M38fH>hl~&$DV!7=C%ZT+?2nUUaZ0yl!c{(V<CIR$1l5(COF%jW3p1Ic
z+v>;*dnG7#wJa+Q9Z6KUm0A%c{FJ^8!H_qdKBQ90DuAaU@j0lhQ&eK6UU{&}Nn%RB
z_`FuuFDfxpzdTswL{$KhYA-mHKOJkTU}+~y0nfCm%6se_Y^UOLjvj{r&$MEF@an$K
zJFmV5(UjmWMy{Z)9=bxfPBX<v3jIx;8}wIoHYkwprb=sG2Vql_N2AO;J9ztwUsQJA
z+f)3aI3~3n1{>5Z1)I>Y{g<&PaoVPY2pVjzc=r{}7Vp41G-r|(a#<7?#x9pldiY2m
z8CT~~0dPe{3agGFI9pXkHmi=<I7h37NQ=51I>U>m=c_LT+2Lr?PpaBL#s-QKL7J0c
zmUahyq_0V@chee<l6HzQs5Wh(?P{$!R-JG*3QVLCIzP8C?HC71ZU7_0i0Az1C*6U!
z8@a-IEs4^mxj=Efx<ql)8BsjI!c-+rG3a9;U)AFjwyKY$x5CF(o97L0_Uw}8*nGXT
zU#rUVQ$Pn{tG^LTUetHaQX!Ruh44!70|7xCgTfjLuMe&a?sp}0)DzE&J5~rbbb|Ds
zNv~!MY63ugJ)Bfn*4+u2K|!dvmSJD+jLZN)@h-!}+zFYr2L-CgdU$rGD<{j8koEA~
z1b|i?<g=0|8gwx_<>dVQF8Zj?rE9VWhTMK$xR)31<1>mIGm08{iC~(3;l7ULpKOQ&
z$zSDc*=GVd_Mt$G;+dh@F|YCn(0LQxv$%fh4M;!shT)XXolTueg>St>=$zpVcMo45
z9XC%fl)b%i{bK)7V*kO3=VmdDD7ODteY*C@xBqzj<mr<y_8)&o`;RZCAC76qo74SQ
zMP?uBUC`tMfFZ1dz2>twFTCn|&t7N{)gNlW<+@o?S|s{GvBFA)^$)Q2(+dpUi)QLJ
z92F|)uGe4^H*ObnFbah<+Nh8>+fGLZgi_C&DrE4WPjR?D7Ff`h1mCW65a|4iPpZw2
zGU%5bCT<!9hB*5NFPq2vrx>qe|HZ-U=D}&}@Thsb1G`1l8dnZ$(a%#1O}C*$$kNmX
ztJe|14~NRyfF^%^eb&(EKUY4&Kcg<-RC^L?&d3vrf11Bret7-a+U3@!q4{KY=he>f
z9k6{1G*i6=l}2iY;i!T$jVCaW6Frg|HFFDc+q!klrQ26sF0DGtye-}t!pc_7BE03o
zDGXR<YU51qB0u78XoE8Im;<~y?+k6Jr8sjk*-|Jon7rM(7X>+KXM{H;H7!A5MrxxI
zPVW?l$>c0=!PPQt?s&rHp728csk>II+#(xsXe(^#2>iGKwlro|gD`GUUqxWMD;QP(
zJLNJqcm3Bf9Mn2qFT%kPXAu{+i@e%;u>Ido;IL7c9AM;@!_F#X9cx-H@|y_ycb#fH
zP;lbYx((@HDR;>_wd|oKO<HqN{S3-QMS3K>#O1-?i*~HCL6})&aNC?1ak>-L-0mJq
zT>auw@wYzzlf8BG8c1^GS$EKWe|WCh5xrre^Hc&;{6AJ7!8`Z-|MjCskH4J%|IYp&
zang-mOp<;-Ig2NqX!8OdG%n18|4aL$^zW|(pv0N2XRi)-|M~pzc<;x9o!9%jD!^k-
zD^OmzNKFu}l8@xU;b}8^8Gq3EpGQfGd1ujuBf;cp=s7Y>+9{^?qEj~_gjV=Il-rB~
zt6Xd?heNCaJ8+9c!|i(1Xf)V`7V{o+ErQm#<oKnSttZAFRLFnj-hL{JW*ixh1Dgx<
zzH*O^u2iOqPX<HwJ)w$Uqq^ES$|#qXellX&>?9d)&WnBqh^`zeF0`!4v9Ir~naG!T
z=7SQ+1ie<G5i*gRK02RAAuy6{jQ(pX>4-YcLdwUo4!~g>P@Wiiqoa8k#M<7gpdY#f
z)ztEwl+zp|BT+$nl?1x65M2+A;SJ>jmBy*-#hs`^y+*I4Gi&lUPAED?PQRp};Uyw{
zs(R2re#-Y*#n!RbfLc2uQ2wn3Xi{p@X%fSc6hP#NIb{$XIOL^7Em(+N+*Wxr#3h~h
zO4?*DfN#eQ8_~L8l9S4utcb$8pQTmf4*SQIs{F?ynJQN8NAZUIg<MdYhMYLzk*L1C
zwOZpiTK3tJ1+uxNB0z~mvb89fSNit2m~YhHfTxTVUq9GVT*X|5htOd#LEko~k9&?R
zqu~HcK(xPgnez$r`D{=->x>nx(*>PB*n)|awF04FYFk_Kn@0XeC;oVXSz!ivdhBPz
z0R|=Bj-a>aC^*H@8Scg=F(2)dUA$bSe0}gby&#7q12M$Z6;HRF|5o&BeRVsEC)c^G
zuiD`b-Zk80xVns9&UA4R7@6vHvO<EJN&<*?|NJo&BkA21rd!+;M5iMOZA42;?*zXS
ze3i$_lbs(cFeEWrAbdIAMr!G#`mW~0MbZnT5lY_PXm#RoT&?{i7tPYUw+(VNLY%1r
zxzI%RZ=svnEhsrxDo?+vIh*n~xFI`qp!yTe;_U#}d&7i65}J5sx0zZ7vb1F<pb*zi
z>?buos_t~(Gn0mSq`*_baRkD>>zMh62d{n%3m-vNXL=v#yzct;s^9zaT7a-*Ccw*U
zb_R90n!MvTjb+<Lq`80osX%|Ufo5EySC8k6i(e+kNWv1koQWv-X|90z1uYrIppuaX
ziJAuGTa9`lYNSAD!qoSak~Sw7(G+h8I9A5~5W_kAtedpb^HztqfK7F9Vc}nNQ5oFm
zyW}}Eh~-DyFIu`kul}z@?KhkLZ*A@AS6@H&^nYJHdGgg4{omhE|F`xSBP^#QjP!xg
zL*i(&L*E;H{@%KZ-^V>i{TBlAMSp1dCb7*@rJ-9Qq$yN6)-Ohb$x!XVyMs~EdOhe&
zx+sKWyiF*=nvmSew*}5dDRgfKMDY>o-+-txDyIXmjuOn#4O7jBjw{N9lEm#x9Z&}6
zM4e6yRGK_)8iO9EKr*m%PsT9L3@5G^_tW8osFII0vduP>$uK$&nlnW)0ep6HyxdLS
zCti0N7~cl^6T`pp6~aw9tQrN&QJq<9L_6IsH<U$}q*BE^Ex7IWC%rQ`hX5_0SU|~K
zU@xEaP~8P$Vi!+W+(Fnb37X1r&WA6U03fM86A7k0ujAp6zS(Msp!fa3RUfIP&55m|
zvC3=(0LMgBXGwGcB2%ALyJpd1VwqloY8f29np0Q~j1_-cO)B}vGUDVJptX925j0^`
zXBgR$V>1FM*w+m-%tctI!fHyXMs%&J1ZIlJNRkA_piSGUA&bY?k8?q-7jY3H4qX(T
zr`L$>$r#ohPM5*Q7-L#me9ba^b@rVN3HYXwC1VDMQZsd|m1#E$7f<xy5m8>0QOIyk
zc#lKC6!%L0S`HF+!(f^b#)U3n&N2NX(ntpe+DCV!jGGwryEo_`hV^L$LOYa_2&mIy
zG1IgeQ|xT?KOZ)T(;+d}qU>@o>2}JAUsW2M>m^WAvo+>6C$NT-UnZ$qn1<$YLBj%X
z1LtJm-S`I9x@NG7kDT4mf`S!8KsCN!QCSQfO<yTkX_r58^V|g(Sl_U0-~b=e!9=e(
z(TG||z^=Qj7g0@ObI(VE-svbEc9Sv4>O8a^7!ik+cA1`n1CtAEam;F&IJ5wWV|;*_
zW&SA<HaMeB((Ea$`G{hG3SDQEz4H<QijF5Ko}V0)DaURkHd-^ww1O0SgNH6^U7{34
zEp~q%6PM8!AVhXLDmezibjlEDH;dn?Ka|0cnKK|_9KT1FFFB?PaYkU(LWPr)U<QU+
zs&!8#V<cBNRkk(RuIEM{=CW9OW$JZLepp+EiOBn)EzMEn#gMYNqGQ$;l-8CeIR+Ip
zOv#lxEv)}tnq1AEz;xOOr`&XzP{-=pCeLnl10mS?Ya_mO(sxcRFOac#<3xw^L&v#x
zaUi8V-~ZutGy0x3p%nQj=2qxL-+n8qa({mqjw1L+H7kz&PADO=p8iTverKmZK~`nI
z%v3mLg=7BA09q?Dv#`c-^jm~FgJ(C#$+=3sL2Jz9`0&&JhD{hnZtW9&l-y}!!i%GL
zjC6)=l0N7dxYVHMysOYX&8Lmo2z(l8A)#-E7KTeiFy2i?izTgT^R0UDox%8&*O*H<
zPpq@{z#50%y%#dJqRwLYe9~v}U|>#18B8FGlDy}E{FsdclcO&lK`0swy3u}SL?%5)
zYSTn2bda9|q0IAIWOAx%A+89p)*Du9N^z(PEJU#mP;Q=_p$_;Xh|*Qtotmp2tyW`|
zvSVv2I#VAP02rq=h&JINeX3E07+rW%m(}Wcab+POGYo>v9|^J#LP7Q$B*+x?ywqL~
zj&NxEczuI&^#SNz%EYdWfs%AsPzmmAq4id>p_JGhgdlw^uwulOP`U*88S|Sc6dQCl
z2*OLhFv@5t6mNU6wF0Fhnb_Jf_|!Z&)PmmnR8h^PXDS@pMS={)FQ);7wwMrrTsaM(
zV4DiT2o)7zW*tkYXvHLGmc$k$V8u}KkQZRJ(1L>1qWA}@=<UO*|Nf(M(IxJo12;sP
zU*s=)k#npW`v-Mpq0K7XB)js;tecg!Bm3Q;54#1B<Q>UukdB>9!XhgXCzUZ^>$b18
zsgSv@mnA9>9ajBa8o=toHh}ehKZ-f+)Iep~g5xxt)9C1Ij?F=9A<GjB`OvJkLm`u)
zAQ_XHMv2YPY&}6Mj)p{2-G!woT$!W01~|nFNkTNKlilsh?osN7KQ@X%ebGP5UM0>U
zeC^0-SHuR!j}c|L&B$TVQkE{4xk^~4?L!I63^6)cv5;BoT|^dkTn7C8QQG#*Ci7Nv
zjc;wO)xHG=wH^tnvzi-RpcqJ>cinDt+}zIQOnh7mixy)GNL@A%c9^@@do3tcH6=eP
zF5z3WO<|l+zmaF)4v=ihelZL8b%Mh=O!zD<HoCxzvGFnG&<i)hu_*z>hEosNr&=ZD
z)t1(3(Xwe}BZQk?+Uax?8jvnm*YIZ^q2___{?tRII%1UX%s}hWni%UXS*$H9DV>_$
zD9B)C3kn{plrK5<$=*`vIsHd7a_&hvEB^~{W}hd^ikcVaTg#8k)ic~HlVwcL$YwtW
zgZ)gr(Tb3;;QxM%Q4Y`Y#=kOGyc=6AV2HrSo+%zR%9KM4-F30Xgs&l$U+GK^u6d=2
zAIxG7|JiB$(Mc|tSBv^BZ-(G$FrJo_>N$GzX{zy=cut`Le;!Wtr&Pil*XgkKJm=67
zP?O=7Bx)J$c&SS8euz?f5vfPMGA~C>+QYgY=@f{0MIRfe=p=DDZ=4eFwTiF2@k>le
zS&7;c90Mho#P5!>CKLo98*i%+{Xm|{PQSC$>6{L#DqB4KuKO04$Cw3J+Xlzo;bk1!
zsB*h0p^n=d_lEcZqnP3luoc5-MV3cGKq%FHSS>*L9T{e=o;x#{l$G1M*<NczB~@?g
zYAg=xO+Ns-)~8KM-q^RSaIHuzz5O+$yjLw$Vb7z!F1@j|U2q$Yign)kOU!#nk>#`=
z$u9*B$mE{qHfw0{=%e!CNB;O#6cMcp&x_|)!`<@K*_xGlkBvrQtMHO@h7DncEn&7z
zVYY1n#HEZ^o?mR$7SQ)q@HD3wj=qgnfn3)@M+nb?DLg}ghoZ%AjkyyvViP=zdzWC7
z@@n$Fo1J<=V`&#HsZ>O<&5${Uwa0MpQT3W45*3eRI1t%%vDKK+hD{q@J#v@GQ8!Ne
zhjjJopT8(8@p(Myj(Iyl0Z%E6b6pYn?#LhMnNEwQjP}KMdVhF+j$yNH0sEk$eV6)!
z{;%k*!wPRM%N98NnVuhwM;dGg?bI9y1ewnOwg&aYV9cKRm^-X2s(kOp!gN=UolK@U
z;fyMx{;U+>L>+y^?ZBD29?0R!nK-P2rbSr1E@HC^T)O3<AeM(}PWfrL4k1}op9kCX
z<GTpm!<%KF-imQtaQt%I$8xyVX|wDjk5F|1`^XumhcsQ<eJrt?E8>82QEQQ=E1h}B
zs~G=8WgU5yQ1+B7vJXc?wX<Y=l_dS3`5cjP-u{lvjsh-VXZ9{)OI%DWEJP2Z-JJsr
z_YmzK9{m_SYo31JY#yLlQ*&?mWdC2yXy;%r`flfVf9Kh&CO;wj@RNr&8aGAp#%{Zl
z!X#=FoQa1^a^9BK>MofZxit@1(7pvM9xloGX29ZY31D6(7AAjRva?+`Ysn5gJ$I1%
zz--wyH}RX(lBN{fmVUD2lV4pBRyn$9B}-J(gCIclZW@x6+gARWgW|fwF<gJmbKnJN
zplY%e23GiaI>J*z;0;?hyQ{@!(W83K>aVLBE=$-btSQG}Y6@PlQQoGeLATKmndMu}
z)h#4Zr>;Mhrn<+orb7tSEvcoKhN)wgK~W7$;--!u<6^ujDi$~mkZK;Eid0>6N`eg4
z6!Mna$$XZ&(9UGeruidb!0CBqg|<ZC+RNApa;)g27d;Am6VnRmCh_~E6QMUPyK#UV
z(AT}kj+3vdT9CfKZr*vD1V3m0{dbYTr#mTP>01nL<X;L(-zcbB_*Ttm;Eg7bz$>OV
zGdx%52ncy-n?4%!C`gg(FCrv9Eso?>2G$$aiRe%yx5!Wz*${6dNK`47n~HtBt3}&M
zJvx24f6{v0Jbih%*Md*z1>Smoc-*3$x`q3)-5_+#1nP}T6?v@fWEj1fe5pc6(|%~m
zYa>(Hm<FRxGV&3MGk`7&VL%v3DsNxdu>HA;M|~80o{=wtaYA4pIAvj4kH+NR&`;3%
zHy+WwA(k{?*k{3a@u)>r<K9Ih3Q1Kp14KA7C=0A=(Bq-jxG!SLtGn`aRUGrADmOhO
zoK+O=<bFmE-5izb2c^K%5s94WXhP(`E+@D#@Mv`i7sRyA*IDDu`(ZflK(#8J%9R4f
zwNry02@6~5$M>TF`pZZnv+!M0e06+A^WJ)(sd~`4-H9QkY{%^(J<7j`c%s#ls<;)b
zN5Txw^wDmp9Dl-!`OjI>LFW*d6k1W%1xi(t(mfPNM-e5c!135f7F9<>clM4N!oQo&
zgkw01vv<K*FVE0MPx*jx)Fob+?0{(d$J}%>N?d7{Z{Z2^+fn~l?lY>J94f4z$K4Dz
zQfl!ScGwY(DN3%>Y)t1HYXgTr63-2~!uu|1S78JjGD3?xcpN*`JG6&ZtPvqH@-LRw
z%Hp>a5RA0TOUEr*iB<YtkKQ?vTCjT?rVO^EKg)IZ(OHA;T&8rt11+8oh(S~}fQAZV
zRU@YpWu(U!jP1#2gv{F^A*T&Er96<+^wU}T*5w2rJ-&c}__VWLY8ykoTi8kz`E5`v
z+DWqE6>}y!?IX8%7PsF&yHRrm!++DKrJr<Q(&}V&6b41!-j-g+gK^x=uc|Dnru}wz
z(jk|&>tQ!-r{jp<q<xR~=M@AP1IE%4yJ)_{eDi0<{E;7~HMf7CDxuHj4o**;Sb<(-
z9tTtBX&<94QluHnRdh0HHC5^kI3A7S8@;s5My6MDRl>E<a;vNVE3+2(yD9L2gx&wO
zX5_M;`oH2-vmUc*L&@9=t2$5$5gMOy-TOV*f#s9fv}BX)ph|v{{E1`({=D2HZJw4j
zYJV#Jl6J0n2GHngkfmc|r(j&hm4mq+Vs4fH9{dVwycp;R3j|(uj(xqUx7z*xEa`&8
zfNuQ`h^tG?lM4OYwr7r?<gw<bvv+To03UB_h{n$K1{2#D`2%`De@Mu6Rk0y|uL@!I
zySTf7Jx|8eZ;n7Q)L2${rG<%@U{a<w<am6(?Wyi^+NA>K&Bre#hTG@^?Bt7QPOl1g
zA0|_r!@}qaa9}#Z0zVsYqrlPAj_@!JHw8Sk`Yu`yEmy-@U4FY1`NRd)ox>>ZImH~<
z@wO_&+>Vc}B1~0%^k4@&*VuVEY0kF*^6mv&vcaV&w+R0yuh<A9h?Xku$Clex+&xio
zeC`%<ebd$lHz=R8(`gpNT%}U@-q{2FmvWn&s{F_wb5GQ|bl6Tb<W10x7LYsEzTi+0
z-g#Cz)AJFv0C3&Fa89upL;zqZ-*mHW4+r#7hjKtv+vD-hIgY>JP>k^j7F7?_>WQjo
zuZJr~#S?%PVt~;R7zRX9o%vLU=V)#32Q{H$!$-v5<fv|k!BGXIIHdi&)f@v|;4vq`
z0MyK(Gk@NdPQyDYiAJ(GKSKGCdp)lnfwk&6(Jrq#aD+Y2>@_p-MzoYUl4o1}u<f34
zF$B)&gICdpWpw0IAwsz;4tEUPE+#|8Y(^ARLob2A_3uxsZ)Hr!;@wehVs4#3qljB@
z7?;Ubjp_j8V8WX?o6$1KX%J*(ico;@DxhW@UB$ExS?5gaRf+@hd{3by`oFT1SLfPt
z_L&4(hdPK4x#xHkWLbm_i{G~4#&D8dw$5S_FRfr%$?svT<R6U2RZ;saxk&p+(&8EH
zE0Rw2Nuy4&TZ|;kxJ86l6W9R86X0DpHOr;PIc(?X!ARWrVN-TN1r25Cvml_Xgruaa
zttcdxk@RZhFDvpoVPv%!%;k`tZFhe1WxE+=PqET0*xbYAHI$CL<FeOySnAtw4`io0
z&PHFM9@+?pqCue)gQ{UzW02n;d~2@8CBaBNhniM>?&v4ip`&@^<9})GZD3MhxcRwu
zPWLlNDuvO(T(VPJhMNeUnk=(j`B-u6OP(nc{2~jMjY{5pC0E=UuuPN2GBrjKcgIHD
z3C^gB?vuZA;7C*`AY!blATbmp%GrYzio&-w&T5D}k$(DQA|~w&IzuvMiMH~oBP?oU
z(TIQ?XsZYTw8G-az!Nx?kqL-_6raZ6N}Y8aIg1)<_)?Myk_A(I;F2bKIb(n1LNWDB
zL@lSDrev%dP&TmGq%RukrLt0JK9!&)UD$p|7a}Lnm;1^F{&J?>FUE_7$K#Q|M38~D
zQ2>WmQ4o$qn1NXk+gYyNE!jE6bLdiK_hobUpV8^dW_0l8^|R)2boe|vKKwp9JdSn`
zU%h#KAS`-<t;i+6CA)sNRy<ykhr40rNcQ_dncL{2Eu(KwdKk_Tl{VR=msJ9mbGX|j
zSI(CDlWT+Y*6UVTJ!xVj>PcOMD|k9Ey>}$FU?9~r?zW)+)dew+P_`a94`i=lV?`UM
z1-I$7x3yx}0?Q0rK9D1`uw~Z?c4aQ@d%?}v-6ZHx%~aJ)siwse)l6%ZBIUls0Pfw3
zav`x&Y1$onu@)XNk(U_74Ie8wU4czB0748!pUv-T>NeIp{9@=Oxw#4j0yO!&ARM0u
z%bZh{#&DA5PWMHJY1bA>aD4ZG!r>(gFKkkSiHP(@L}e8CG1tYS2!p``__X~)A8nak
zn}h?>y6Wzi@bFa7;=|M9ANLPlM5l+*ar2~k{9QBpSM&IgO)V)3?{4#?9_{YDdKI{N
zueis&6W->}M@fQ4L~(bN#GM;5EJgb41di1+akv6|ZmjD(*qGHY@?*dfHwO(SuUe=~
z6=wr`;E=3kT3!&*Fola%xGE#IaEk%-x#JL<#HWgP9GkPg)=21Tq>&I6-cGfWdX7h2
z1%yJjDpi*@+L004@+z|?%AUGNI!<~NKGsAhwSklSDq6H3e^(2*=#&k3HW~x=uTo;F
z3fs()I~BvpIEhqUWqzX2r>t|nJJVEb&S7x*vGWq!8_>ZrZnu-+IO<IBIvJK$Hcf%&
z4A<4+Il*By>k*|FB2r>#q1Ahr_*SyTOzFW^h$V3nq=(3Ll#Eu7j$So)PMRLqVKhm$
zv#8BlDBK@O0W#p9N#dh$$C0^N9X;u5#c&E|5qs?hTTNR99=7*?da+#El!O^Yh!jmw
zAlL-(6AyuYkWQg73frCb9h;%`^;|$+3uYzfGASYuZaS!!jyl_V;K)8)qf^vJsriGc
zs>^~7CU+^}OzFvA6LG~1TFDW`rbKwaVl{?|?OgH4s*ff1xKVYDs7X;GPoKQpp|}sh
z9A2ufF?9h1=SIl1j`6TjnT+}|15iBl8&!rUDkWXMVIhS4n#YTi21>x34V(`L&LdMK
z5~sKMQR#;BZ&2cOnP#detA^TCSjh=SrIk8WyWu%ZAstg-I9Sr#N=mU$HdUlJVd|y}
zFz1z3bWloB#WRTW*%8)Kd|O>M(8*OiRdJ9oFI%{<6as=4jS<DhEYM#{weJjCPN|S}
zM`FnJCxg7V2x1jmFM63DktYt4NO*8~0EgydkOz+r5B5+xL}~COdJczaJX8B8>3rwR
zviZqOlkHs5%ls_N+qr_1zO%X=jO6hl>x4TR=x69Qz^q_LyJD$!-4cBbkAVSVTP$tL
zWjiypvb5xjfzpDjmk?WVY`01-xn_xBubcDkysYr<vZ6_^n!DuFrNy#AMLdW%1I*_V
zVNO1KUuW1S!%aH@_IWxmYDghEG8~YnMy%vPGRD2iBI+a=Plk%uvJ%Lgt8o`}J5qXa
zSjZcq+|}XTH5DIkadRN)%DDwGXvKT|eC3S-&Su<~!+fpS=m+}f-RSsDa)(2GwRr}m
z5wyx1;aFNs+1N_=HTRN?h{MyT2G`<77ywI~0RX%=u3KOmmHf?`nH`RK`Bq!U6*OiO
zeMBMdvslJ^ycOM2%V*16i&b@JexlC2l^src?cq&zp(?Jo6@a&JzLL#yZoj(h>LOha
z49=^3_LMvQoZG3ODi3pdX%xLT*NN1n5cBr|3eGE7FPu}mB<7Ru`06t03u8}n0X))&
z4Up~n1CuEgI2aytjh0U{Af027saoH<DPDn}0^EYG<#a`xrchrZQ;-Mv+hThK^^qAT
zaW=Qj*;Z1%Ib}Bqa#v@Qx<uE?ear)7&*mMzAAL7CgCkrQ4siHYWkG^nVXUoUGXYjM
zfktgO9rB68RN(d6+b!?WTUp_g_?)kbd->XiQ$#ONi?+90d`)Rf-F{8VmJG$(rgFqA
zdg0Nrg(RvwxXQ0xDhcbheAzDM7OmHov2ypiVsXtjwLpw6{VuN&nKb3p<7F=Sdh|O%
zGV>{5uKE8YJ{9V^*fcN;9H)cI-@?$<x+!I9n2QQ%197`shB0%%O+#jsF1|1hcNvMS
zq|1Szv7s*SI@?FE%QD>fx*R|-^|#nA3qzEb2(Kn;pS-t{ItL@lp!QZ5k6aDAs}7Ee
zyICGdeU@&@D=<m?h20biwt{Y|`7a~Yh!X5^E-9@M572--sTT)|cP87x<9ZZue4pAu
zz0~LE<L;CQ$9^9L65Ze*wJCG0R8YD6x$eIEsequ1Ys))DyM$c~8S60s#|{h1ipY2>
z1QA4qv>Szt!X7}EbJS<(n+}nqI`{C+Y4bSRKiF&j5W#1>j(>Le=3q|-?wBEdg`OI2
zX2rdMVT<u`azX0F4{0V&Y_6!=(xSOJPw{r*E-h8;ZM3<iM-RdgKlGTFhPaG-eF4E1
z%!hI*DW4C=twlVVIv)&u)8UPuVLrH-+s}tS>E!0aaj4*LG#_g8I3LcT#>@vp6!YP4
zab{l7vgaIT0u}wr+^(9|;Y6$4P&0dd#L+`vJ9hazTNw4&N$TQ9t1UHqbEb^!+%_Xm
z<N1)ZSfp<&n6(fn7Vsszv!JAdq|%fXQGzpDqby`?=XoDu*r@fK38l?T;lXSd!t`qE
z?08;NL7>6obHIvl`k_5%OE{A`EOQT+qE*SwKuMI$={n3ejeVfW-7GtPV~>%vQgary
zUbT<@s(l>~9h#MLyg9ftv>G18Y2xcVSG5`MwfUyaa=y+W;rznncwN^XK6qcCf5NKz
zhbpl;P(Q||*grgo_D?8O)Q`>6=;Y1O(c$rFbMN5{hOKgcq~U6ZoB5YLqEN;QlZ(G-
z<xgYTzt{TzvgZE^YyKYBdVo;xzU1w(S+ykKudmaUVGxMFybD*djUdDk%~e<<G|kR*
zQih?4_8hWxFcHP4#Q4oei4|eQbv%?sYsSkSO&E)~_z-c+=5%-^1nvtkMMog-sy!q|
zYPYE3GlgeBRdnzwFqJcIX&A@Y#5hpiDuuxgO1oOt$qnz!6^wWqEfRerVb`LM3Py%G
z)7!0foQ*dNYHe3-XfdtiaT5{<z@l?1Jd~HhKyr>nTB`Kf?UfBk<<@2dl*3R+ngYp%
zVY!jt4-s)|T2~e$AB^7>7nmg|wgBU31-&6m$XVJ=$2ZY%G8zst?Qe<X<^(ij+box6
zD6M9ip;gQ%0glqib%O$Sd@2_P@>bm>xTJY^f-=1aJ42VHC{kT2*i21gifBFWVx7*Y
zvMU(MJ5-d+HWZW>oT0p%YbdT?xU+@ZP#A@J6CcD3T_{wWed}q=@LF>{g7$+JoSzCh
zBYHl=)RuO}3lcz`NeentG5&zYdrUiz74%U$58bkh{<%9vF(nY*zzZ!`jzBBa-EA^`
zB@NyRFXyMYpl?NXsKPT?S)iWX$lHB)wFI&xZV3NkKP1b1t@8O}O56Elhq40<Lc?<6
zpOO{eARCX8xEI+lZ_(yvRQ28&zobBTxSp%PKqj7#ezVH4Zh8$z4eCU>v4U!TL~trR
ztM=TuGum1AjPDK4!gskD4(DO@;pQe3(MgHnNj-XqfAOKR6@!PhpZgCt{K{BF-Cx1i
zhfa}Co*YaS2zP+suBVC0=H%ByClZdH%FVR1T@ba!r4F&+a5L6dwzs`$5VwTE%Z0Qv
zON5b?6?yvut?YNMt7)e`E%d#ORcTE|>Q2fTm0?>xq(E=<R0YD9cSdfqB}b?em{UE}
z$O|^9G0HG<_gp^e`1PmTG-(GkrA?ZQXJRUz*@@8Y>-~hyo+#i33hc^s*@j%vhEOqn
zB$ur=Wx>-C0Li0SnI81w_X%oI2ZNppAry~|cEmYuvLRKGVq`<+8+NZZ96MKI?wdK~
z<z-5sytV_#9po3?7_wyCm6M*xN?1mJs&XVs$7!z?1TI6+mH?MAy)<y?_fm5P>z~tT
zO6g3dy-3vAs@fe~B{U~}&%N5kSr~i>us-9-?8S@r+_SRs9lLWV_X{UVS$v)gT&Blr
zLO%zQXyWm5v@pjxo-9fz0E}zSo!aAAJ#t%7$RR48bh{;v+Idn@=c<*JeL)U3TC_VF
z(a_;Pi^)TsjwE&H2mm1ybtkMsfLdyzJw2tCD@*Hlz0S|W?kN=B`6>m5zr6Oxq%^u^
zqMl%4YMHvoB<}H(*lUO)GjvEeDNYk167%NrSEEdd5%pA-NVZet_cq4}+Ns8FIB{(@
zXA5D_kZ>uGhox{9P?0zTu`=M&66yf3{iUT1H=LCRMaT6C>J+uuR4Cks>yavwN2C;z
z{ME=Z*s<DMk>(;&CNgy=B9L?b$n8J`8n5MQMLqj`eKIOc<3*%od+(~3!^!zt6z+zN
z9*AN^=&;2!Rb*_iVf$jX(W0KP+{|!HM^yK6a13J#f?{dg9g^y(fW((YUN{FG15yQQ
zw7QEzV<|gmGkXEsV?Ii1tlD#}8lXP%78Xz^Ob@;oO+LItXecJ&QK2E#{KE;@2jL;P
z<aNPd9P%<pMO3y0JEyGi&;@W~p3J5*^LU5FFK~xtdT2)!+;FAIj{6-=Pyb>&TJdJa
z1Dkz!nrUBpM*E9n3B)yub}l-uBlyC4)*XWLMQNiM!&8OIPxnGQRecnO-RO&TR%FIa
z2Fij@3(Xy#>hmGHuo9iWsh%bjvC+-~Z)q*>Y~XR{!cEr?fz*+=eQIHCZ>g1)Q=E_<
zWaF=g9rHI7I5U6cpt6xpl`e$lros2{H0WhLjJhGe;#|`UQ{)8G29>x)Gr_H1SoU>h
z+1kT9`@H*Z;g)auqqqF&yFmf2ECOcR4`$=+ffvs<D#fZqwnx(sFr&@n;?nBavJg;v
zYR^TewkI(+T&}K5R6te5U5t_Y0#RngPKC9!lpre=Hb#yVrA?_Aa!Mf_&c006E|toE
z!&BfQXhzRTVUBi=Pnu*jLJ>fIXuYib*Pr2sEC`nQ_tUSFZ~pP5{j}Zr=6q$+&nCkm
zdC9EMpjNU`dxc4?c|9J*D@tsVRM)r+wXq66PoF%YzwqPzeZ2Pd*H8az?eXeYk5`|p
ztvz}4U#pLv;_GPj_dvh|=R1n-496ebU*v~kFk+AlbQ6NVsHa3~rzx*Tan!}I=T`{@
zKi^$iGPWV`;tc&oQ7wv^59DZo$hdov!1D$?u249s;SC2)ukO~ON2{yfEW@vVL@(l`
z8@-q$u*A;d3CO$`@St&F9{gY0AEj~Qe6+0qQ72CZlM$@3-9d+ff%LG$*@V)hYDE_C
zre3C(>+8X<X}23={Ugrl{0dHfXo&Wq5j|?GHllr0-%uD<UFGOibQQCGT_??esLwEV
z41!Qs=?Yo(=*Pi?LSGEd#xZaebgPQT(dBqNTwht~anp^#=wbz6u0U%m4d4suc^@<0
zFjr9ac#1n1#_-=z0{4f~LKTnaz+O|SkWD}*8IBSP4#Xa9?$tksa+&no=AJ+50X{1n
z7X_-b$Q@YRwup+6<%Q4Yik$@04FS}1oi*VaQxGM+v#v2qu#h;!l+Jriw>at}Ur98Q
z22U&6ljwW{C@Q4wR4Gsy(wja7260V+c$1-SsQ@%+s@$8jFQZE!O0=m?E-<nZKtAs#
z*UEYV$f7S4_UJ&#nLMevXcYH5gI<Keg;uUcDJ%sH9g}4oey2OQsE#fNY#2hWS2bm&
z{&PEm7f7=dopdxfi<PwoYWs-}&A>Qb)}k>AqHz2MqK1+8-AHBF^T=$mDv3otw#Y~i
z%4jJY2BrJz`lNePr&^TAj(jIsD^0%MBEB&9!%&rqo<UJuK+pkl=*4{%+gPEHu$M(W
zWDwM3Cusv(k!TAnvpCEL?aa%n)wIubc-BU78>S6N=L%-^j6B}4E2Ep}Vle2)j!=VR
zbYG6o0JYKD>Z)mFg<;nX#zZ}s_#UkH4E}P6!mff%u)fMe$t+5L&JveKD|G89*0PB;
z_i^f}dg|d>B}g7~w)nt`7RTtqWbJfFE>2KCTb6k@(U*Sx&$Qo>Z@E)H3`Y_C(=SD0
z_^El<r>0z=C3*WSsoQ-;ck~-&vwwC2hZ#)F{#*2l&NlE__l#T_;?C|M!&BhFq<6~G
zViJz)w_M>w3tv0LTU6L>Y?IFS$FzW)cNJ2#<NuYrlaAt=1>3;7(waD={0!FMoro-)
z53UI8c7$*Eu)@&{@jG39W5&W(s%cLuc%(4iI(qZ$73{0~2dB;B=R3R2YM_;xqgP!A
zPJTly5O_i+4sbRs--kHuQpjCxV&okGpt+aL<NZ_254r#1;C1uhv~_sYJl;7yJPviB
z9xZLv?g!Aa0%Vul5I%%b*AsbTQoUgwzx$a^`ImoZ5*}`tt08lg)hi`<J0#AuP8UPu
zr21H;PA>dgCLsK+YJ2ysc(J}(^Aw*EtiFDRvWSC#(Dk{s!ti40E87BVFsWM5J@jZC
z;NcF+<CQmPGbrKhuyTIAvX=$wi)jD_2-ml{*rQVV-sb!%3;U9D+qG4lWm{!bsi46n
z-8j$|&dOja&?5x&)4R7OJioeyyW3nYL@oBn_gm6k<!;<pBf&uxJFH4MlAUuvyu6^4
zH^wyTXEZjss^edv5I0UaFJn$}^<mH@p*bYho~3;Z7?i=euYK7F8pA4iK|1pYla&Hj
zp4`PR`IQKQWnUEGiXv-R_Z3lt9Z`fIic24SfzybUvuBCFK)}=Sk3lycUH#UzXPC{$
zgC1hh4W=d^UvkDH5-MysCX@#-Gi;Xp0*7ymDjl6o&d*WE&a~8<bjl`o!S~bjCiu@X
zwco?>eP0hdG|wT6RPo|C0S1*N-g_tPCr9_oTXib#w1vi;n+|_<k`-EJ)$YP1SLbnC
zA%_s>XzZFZ`o17eC)K<qejx9edyZ9?w({K8osp4D`E3!W1CyCv;5QXQ9u8%zHP7Ty
zxvq0Zwpa7FIXOT_$%x~aVVXB$%DwoyRuv^y1jssHy*#P8HnTG$!@Vrh7}28)b*u1V
zM@;>mTHBW(Zmnx|MknC<L24>T<r7M)pwsd4m{yq4EUv(YHY3*y(Ht_q0+xq5ph=H|
zj>qG5!=aX)RBOhyRSVR_!2H6=m8UA9$FNApq=ZvyBIdx%GL$YNn97S;VLb{bt}S4B
zw(ty0&)flp8jvC@0D<#Mw=NjmxA1-4x@yYFF?~mnQ~T+*X&G=`^48jvs1%#~0r8AN
zhhptbDM+G|EUHyl+jEf=W)*k&G{fA&5XKzxQ@)hKp<7{jt+vS{$j1-Go?LMna&PmK
znJb*l0uDy~P8Gv)iEW}_XW@&|f=-3pbKJT4!#7Q54<Np<-;w)(2w5gw{#Z&4j=N@K
zM);Ujk*Cwn(xdvCb(B_7$}mnD-qNdHaLCvY37|Fpw)XA~#bpdtSz6-=i$QQ31(kVr
z!<F?rQF(dU-tK>@Oz({oc%!W4)fs4z%5Hq#jl(9bc6JI>SK*n4lG7SpElg|sl2yh%
zc%f~mf0o<H1n@CZ4zXSJ3<@N}EbYQBqPjqB4@Z-Jq5_^-2z8Lm;;wqCt4R%;eZ9TD
z-kJ1zH!Zv%p;#)_+I@&?+%elD)r<UG925CV6+I#y+2oMr0-b}BbnR7Fl}}kf2DIsV
z2`};~!cdJM_(?W}p_t@fE>ls7k1{VIjw<aQ*?sn+k{#KQwYlimD9Hu)lLuS2S4E3k
za}rxH7_>rQW~blzXOax}@JRhRk(A@Tp?U&Ao|%%av28vD)2NS<-rz$*%gqt`X%SP^
z-N9hg+1@f>WbM<#y+iSxMB;WbA{VxYD1QmB7Cb&WzFQ!ZjjNAxp$xeHVj}Nz9&z#D
zMN;AABrXuX_#DU3XSFcY4Qlh~xVgJ?+T7EuEC9fy3~*FxRn#1EPphl7DKVl>wCsZW
zmf#yG&20S6qbZ~C{1`)7WVtTYx{uE1uOtrf%5g6lft=pi4iC_TJ17~cvTgIP3>@eS
ze7`xv@I6I#mFhJ*45uJ;VsWnmZ*pD17OO;Mg)>5CeV9<q?M$dIe7d`eht+9qe>9$j
z3hmEcsH+7&BDZl{LVn<8+92RR3(=W#VqlA61j_BDAs&h3Z?cM6C$>tS7ihkMRdgwQ
zGV63$ThXl<XU0)CP>X4H{2kjw=6RXu7=b4dx#d&Mlt@pS6A~Dmwpj!9QZVrF4N2&$
zh3Q72RxC{TdU)U;asM4;j!N2dE-pN46Pv42r7KgSi;`LBE?sFq_%&={BTiaL5~0=m
zXmx<x*&2mh&?+=A82OB9q?|Mo(9Z<U+@5WXI>Oo9<@^8@CV??ny>J`%W{{-O`oY(e
z5SCc&0@ACHlnXQ{(VEhWgfzic18mDbqVj3}u7G5Ri%}AfwT*@7sB%lydKEs0+SfJ{
z)PA3dGP;6yu!ERx|J+uL%ri?-x@itN-J3vnO~)~S*i8_XWNTuOXcao`D8*mb8l4#$
z`;dJNy@JCf<GaDz5BcS}Vs_ex(^T3CDT|VX@=2rh3g@j-dT&o0&rzs8@t+I&{BtPD
zg&vEwK}Gi;(+K|+T3^xlYSGz5(ULOr@kuGO#DaHW@rZq(iMSFdnzjXM+4gACPk))9
z&0#MctGkknr}auLjKsj4z7`sD%ok1eoH{;~o$5AdTL~WRaoTiWp4S`E(H!UXDJ(MS
zcb@YEedHaY^53+<<?In1OT(jAZ(i&lP|-+5q)KKhPA1TmDC{6KpE*jQtf{X~4Tw^Q
zAK!3=weR!6@Jv<gfNl-Z7_v-rMGDF+VP_;GSh72s&=rxSDP514j6YRo^kncX2be#t
zE4kV8Y%KPp2tspU*dW%obkQtwmGImk{#f!1T`Jy^N<*OTg;;}q4AE?;;1|;Uyrdhn
z9KAQw(F7ICIE|%-eFc;gZIh{>fzdjfW+7M%Xf`9%+rXBEF>0ZN=dBl8$mifWqk$P9
z1eaCV$N;u#$C|F#nzU75PO1%7^WC5~Y&*)$=;^8&e`Y)Yt}3a)zRv4iDCpw?`oGGW
z{D3OM|Hfp}uejhEQ*mWX&6Gb*^QtnC!W~)2udqc8mH||vnaz{JJ;O>y($gSabBks+
zPN723R!#%5rWtDO7hu+QpH@B;o!$Sez=`Be{$H;#J<@ppFzoa-v;WuH)2C~XJ^!!A
zU#)%h#sBMX=>N60HlOQP9>62iT&1HVI(XAOI6c{kzTf%h&g*EiLr)uhv|`KR)>ZsI
z?)_(Yktuud*E`4mY@W9E4|5)5fdT+R9pyGt6|HbP>{1pGFnBlCbl?lkxa-T1vM-V`
zbBb5^h;_?|k^Tj@yFjF)c<w^<+I%`NpHlPbUEOsrM-h<sL~3w*8+AO}(L4M_1^5J>
zE-&+kcltwmUQM_8rskdcn{K|QubT(Kqdi5xjf5Y%P~xg06lM<LfBGX4q_p>VgQpK5
z+c|Wpoe!QK*YWrMMllwT)!+SlKqec=ZUsA8wY~98ZstRN82iEwpyJ+_;1uv-sM!TZ
zBaF9s9`p=Y2!v0IO)8tpNkc0&tW?EcYBcJR!sZ5v3-fn&#=bfT7SOz&3)=}n3(>7d
zWod<Bz(;?!MQ8jOS29%5dE7_EaH3YuOAvP9Oy!{AlAhJ^yp!GZ?1=gKR`l?H{)dGt
z24TEdt;YMIrEX)qpJuHLI;0QH?#Rx&gLn+bmGyNd^WT1pW~|v6z|NA`s_vrUE~AC*
z!${S5sPIeOgYjYP(UY5AFM&a{Yfg<!KYr+hAH|z3T`}pN?{wZ_v^=-!OV|g~aT@pC
zs<H@aY~$|3jnLu@4FKq<d-vX#cfnaz)2QeOsR-vD7F5-)g^3DML<a)=K$e*t-}hW|
zW;lsk%2F4mlzU_o`t=ZgQ}z)0f`6DS@Vm08_;lNThWO__^*Rc7yMYs@;{D3UaK5=v
zkIkSe^DSb-&ph<c&}|o^(VQrC+Ac%stUVzl6`;uNq<eT9W>UhcRHS@xk+@tg!17U)
zL@+ev3%F+LR2TN)OpVAsU}NYSNpZCHFvv6_M!kUtW-sL}K#d!KS9ME`b}Pe71$8bl
z$WZ%5JRDZ^1Nc}8ZOs}Y_nUSqYMMlIa|x%`c8b=7SMd#BRGz0J7&T!<g>a!Gi-rhD
zqKzqqLkn+CzJX2yowU^@y4w#my7S&x>}`Hu;e-I6e)@`HIIE(t)v3p;ZzKC<O@4VK
zzdV*-p1f7)G9?>rz7DHCqyW_qYseQ@YjS2t5OS(L7S;6TU@NL8!J1D*P0MC=T_z2~
zmB|;|>>GIicB}9J*u6zzUW#-T+VKO+)l+_KS7<4~{@UTSP|fLkWYEyy;;1=V#1hS1
zp?<McABbun<M8gU8(=guFP-6JT)298c*xg#@%1xu)KV#+9<4t41`D+>q3i6z^^;TP
ztpY)aMDGfGnAycH#(6qQfoR5BRM9T8W?;&t@pRh+GTxBNuh-S(wOTzoP=5~oRaKVo
zTCFzekF`m&%0Y~SZs;}4y|24NtXYPOL7LvDLllLm@hFPa^Mk|FCM5wM4o8C_oc+gm
zmPdl%RM%%o)FBrU<{*mEz@h&_nXG7W<3>w<3X`jEx3>QGfBv^-@7u>|U!N|mu<Y;?
zF_xMrh?8v4@5*^vj(&~54(QjxzeLh1cplN|>cD(B_!oYxMFDFfJHvYEyY`;izKcq&
z^M3W3WBk+S1Nj-+5j(sxLW;-QeDihvpq6_<_B2p4XVU4)h>`1Pz*O_Dx@_a!NvS|e
zjzYS24$)OU0>Mccj>vE}epgq=-zb2nr6s0FVc@EWx-pz&m#s52jyT1<c6A04wC0Me
zEtq3WyU*4nG#v`Zsle+yE4rv3A;U5vvt;~yH0Y@{nN>~iC}uYrNSG(+v{KWYga-9s
z3-Mpc^^4aaw*6o$G{Gb%;hJoB2buEZ^Z0fpm?aGOLXcnctEw%rfKFL6@asJDwJ-v#
zdb+?8-HXJ6EVxhB!FV~i%JM>IfWJRdaB#_>EdJ*cQ$A_O7%Yl125hXzh3r7f59j)5
zj+~^AztCieE_q~|{cU0;@NS#!lwo4@x>g86FFPYA=2oNj-U*M8oOpodqdqLZkLo&9
zEL$sp^Qz@Vrqu^YyP7U7p$Cj@?-O-Nk|qW0e6ZE8eRN43Df&&H;&A|z$2CY6^LoKV
z=OfIwfW4<hplQWrbs^>6^tDhR!DR7=L`g6O_f}Ko7(Ul2$^G_1jdP^ILrj@~f(IVG
zR0bvT3A}5Z<Beif*QHxu7zEj3rRQ+G-%#bO6VB5omZa4}<E>dQHAO9+zQ^bdm8ysI
zJR7PTIEQL~cT&rU*R)M>ybXI|!-pznhWk)>Pz&<lBeoD|FuM*uRPIA2j&II|2n)`9
z!JJZD;rhE8Chp<9J+A2B?_w5dj@lp{>%guU)r`~nEUnRJGIGGrdajkIrI2My3%uBB
z?n9gQN35eyYToa*rPu|eT)J&C$@zAcIrbHxnLpoZ`Qy}*W&stbP&vDjK6xtboYs68
zxo;L2q)qVvXmty0Qp(TBDBIQ~!I3K;?OeDLaRahd;5%$8{$}K8gJ|cesnwu&of7hm
z=DUy&+{yjb;=jsi(~K%;3o!Nm|Iw-+|8?!@qc8XWzsFDI_cj3da)0mN-#<G({GM3B
z>GA$<^CWkDpMRgfsIOFvdDzL(9?BxuYpbhh`*kqr8yy}!U#)!FP@!Tg(PH#}9L&`Q
z^&U`GF&)+w19p;>97ov@cA)c=Y|*<QS7Ggn4q^oVPcpXS$3q{9*~YIz0O6_)RZ+aR
z8k)4P6I81$fg*2?vBo|{`(S(j4%`1bji9SJ;)a5g!c*Fxd*f@Yi;gww5pdr<e4G!L
zYEgy7IhM(XB%=AiNUv4EkkNG3^dOkPFR@CN0#&NGpVS@12KVPG>2~X?Nfp_op&;Cd
z;rsn>oncoQV+R9NXz5}5&EhJIY5whia?s#4L)6oEGJp|NCUEJNpAEVb?Pn3|poSD;
zm4oVnm_l)JOZt87v=XB<0E-Mm4Wplq`aoY`Z*;zoIV`Xe!cN_&z*uCZQes`f*4Uxl
zDJrsu4u|9HiraDD*4d3&s--6#By??CNR#R)8wzfz>*>tD+mSFtjaW#Ot%e4bzYmHQ
zc$G<aS2!9eU*<F;jZ~EcQF;Oswx7KsIu%bZ<Lvk{a~}YG{2-oiGgaA|O~JuWIa$U4
z<Ef;&q?VXPB!H?YZzC+cNX8g(pZcq;Nu5j(^y}Y2q~yIgWeX7rao8jI)G;t|Q()y=
zC`~^iFe-1;_@%;Q@C9Jpe_o7)scAo^q#H7{upx%bVJcXz#f9gF_#TW#u<>AiQS?I`
zIJuZef^dCFJ+%gVlDb-7*iz9{S@2<J&IBfFj_IsMf#ZeD<j-JQ)SoRwiP2K@aOENV
zrs}lp;R*7nhcX01^gvuPQPH{tSPN*7U5)Se@rV*BS}-u5P_u(!!29%=ydfWI&4TGG
z9bWixE*0(+s)~ZR`BXrV>cHY48udvJwj6OGFjSQ2E|d0q91A;nCMJg|PzDAD$4wsd
zLGsAfO;urO)JfxuRv$$-llTT7VZz#Kt${D9;x<84q(q!tQ&%(_0kjtPFEC*HcWH99
z`3j3}mxF@?&aB|xV9^jb;9a&?+je9N3j+MsmU$756Nh@1e)on0nNhY;lEYJ-0#p<g
z7UARGkV(|bH3Uy^mU}}xoGj^{$9xUzpeG-B&Rp|WZU5bI+40UbE&}(t$ED|KYN{k&
zC^?Sru|HqLDrJB@<%L<%p3p2^jxO!#3J(-I1QQ;vbS{BQlAu}BVO;55UMdw_P(H^<
z5!Wx0pk(MQblznIGX-EJ%oN@dcQRX!`aYGjvXuF7LmPJtIZQc@(~)fR+uw5l&+gw<
z-CAuUG=@M-8K|J|E_gu)LMNDpa@SK-zaHgUl`(D}6gN*Zz*8vZHaHgsmb1a(KEAwd
zcKJvS3`B;hBcnoDGUo8`RrehYnHSNbD(hiIi6!oJvA~ynky}nkMrF$>bh;2LB^~?g
zgN|V8gyye9Rn(kJD%STK<BY;Epaf3<rf1OWN`wR0SyZDvGwYSpbnH;OXFANgQAnyx
z5J+IAhCPp6ad=&|knIxJeQ|mlO<mt8X9pxD-h0#Y!lRmQ*k3IOP%e2f_klIv_11iM
z<~4t<)_g$N6|1^ixT>k2p=CV__1m^W3me|lqi)g47WU+Rd#m~QkA%OX(}Hv5ppcVu
zZo`Kgfy(M?&n*uRoU(KgqRwYEk;IN4>7u%nA||I84?vVlv*$Fg-_hAnv`$9{_mpIM
z;yJ0xaS%{3<&wN;0^DdU&aFMZ#|+N+H*i)2wy8>}yYeV#{^H)Y6xnOo8XBh1@8?|*
z0){L$&RpQ67UdFC@`=lr-(hXPgyl?WYgBtm(WsKnajl-o;Y0?IN)6|R(CoE99?AIJ
zjW^!@DVO519lUw<s^+Y)f{wY20U%4h5a|bD#-RGKyxnOLxyyZWFU{KVi1<5m{@5Lk
z<_Q7@DSx)MqP^4B-u`jT8esuEPJ2w6$X<Ly-1aOH3>UZ~1}uJdvInax1L-kCU9KG>
zc{^ixQ~1?7o1DYqcHYP6YQbZ*h9nDI7Dz>u#crRp_D`UTRes-qKbT^xR`cw}1ovf}
zB&fH>)kDfsVWB3lq)32Neg3X!fT<;|vKSdF&@FFj@qRLAUCg7b6W6g->&9+4Gzz?n
z)bE3@Va*y);giB2d7bGQ^+ka*5m}hrVFcXinYyfcIe#C6MD@ZA#)S(pxU`M!%U0RS
zwY&Y6snK9-U<{XR9>JfB@nyBP{6tEzvR}_rE8PkG`_Z1Tq=VhJh3fx_&5%pb&!KMh
zA(VaS9>k+6m3^=!2DZN59}Y4dN;s(HsJV%H*=+e}&wvhcNmocd15W$ijP#SEiW}=4
z9L>U%d8xi=5*jaY{#9pF6!@H@rRs{66t&nbi}ErI6r?R*9mev~Bi&5!W?jiC)U3n;
z_0{Fffu<y4HddDy$%o=}v*EHhcj_-OnVgFGewHXT8QwH$Mu?Op0c%l5kH_PLVj#AH
z!NATe(zA;I-kf;Cfwj{K%Iv8+JrM08u;1b|ESgoMFv3pzQw#(uf1&$-K4hLGc&smt
z0Z@Mb|LE)0wI|;F|C1+Ao_@Li|6AsN{gV2XF7=Oh4)zXTpEOTTo5#5ufA3{ss@LO$
z!iM+9CKG2b9=gdU<s#)|lZC!<nt`2+;p{x*`%uTa6t}W}CO3YR9NqAydRN!*bU_ih
zW<NXO91L60T197n<Fo=Mi;gmx2WVB&hG<hh1%82o$zaX@ma*&J92_3+HIJKnt=Bt8
zt&=xLM~BC!1z4K&2P5FzNv9?8bvoeChU4f4rdQu;oDE>=o!JSt#L6laQ-LEEiZ-+y
zuXeFTY+;0|YuNwZvv=7@DU#fJ6iOM}?&w1w48U$4HnZq+%7hA-q=@)LuWcg`j53vV
z{i5Bb!6EG?DP7<sY#t3udzI2Z^gDPgt1d%3TWx$;!?Q+bu&%NLt6ag64InWdt*tn{
z-sS?+k;W-*4RPbpKGHLtR*+^3;CoZHL4`hXkBsra-O2uqZe)J?cltNyN?5(?O7M*f
z!^qa_rM1^QIBB+C?wq`&_b*%dWpXhXTy&H5^$xO3l$|bH_rY3T5ZFBg`VwHyh30UV
zJ{PKCVGezCNAJl84PT*>y#=Ek-?ZLCmG`6-Gc#dPt;{@U!ZlyD-#4;ZV!JiAy-xix
zl&un*t2}HTCS*!-JPOG7tTMS8#dP&JU~z#`d8_L?=*r4u*x?Hs*VaKsu;m2bwl!^_
z<cH|;Q<ZxV6S`EO_Mm+Sl$jjp2PbS+0rp3lRCH<(+7@(r29AYGmUlWnGt$LiM41A)
z^^D`)H56y{e)h+-#l(}Y&R*C_&{`+cPLixIYdF27R8mKA`+YKgmX7rvl?9TElf|6M
zPF{n)_n{}z*$rYF2Gr%@@Wh1s$3K*7d`K7Lh}x?GrUn49t3S3|;Hk~_u5pldZ!7|{
z+!QN<;Jqd`!G{AFHZ*fiN9)DNh8K27gwvgKW)#|V%>h<qMmIfT*pz=!FEHIFnSHUs
zk;Ms&L*2AyIEZ^)DS-1-hnAT{;hhLw8BtK}sMzn+dfEVBK%c)$8jL%52*oaLsJ(1*
zae<z*B+Fz((J;dVr8{W9M=MP36I4vD$+~adE)XLoIM!}j|JLK=5+=9AqS^N4@HT^W
zx1+0SPt_v5M`zIg-!XNu$C0N=4nmV6UcvBbg%ZuXGZo0*nXN0R=$f;!Q8NU2RaGIs
z`44)d;U_7E;2{C80IyhN>n3Bo{WGb2aNWaMBh`}tjrVjtGEAlWN>YD}Nepd-g`1NU
zbrv46D7#lUwxHq7jY@M?cUgyHmBZ^vtw00q3Qai`v3(eHF}+N5mR?W?+k<SZKvNHy
zXL^LKfa3@oC{7Px6~OBxQeodS45bn0I5<xpFd+1BHQlL4N!-442l43t^Er=VEhwWB
zyMf82_&vrrLvg=|#IuGL^hX4HFvRGLUA@v+cBZ~EC$z?`ap&EHh(8BEhA|?1CmScF
zzoNEq-@%TpsE$f@m7sxfmZJDg?PU%Rr$aWeThy7!4PuliLS2S^&pEEG3j-CRaD6sw
zL{$?8U5ysc1vg}rToce<^e2w2kb~H2UGyid_VxApx*?pZylCmF$mB0}cOAe5qF=6o
zu$&iF&yXAGwc5Tx@{Y3spI%fd3FL>99>sahHysd!3fuOi9@Tih73>7eQ~Du6D}%wH
zBT1>|Ots!)1DlJt;VNWOkoUT=I~bD65<}$^CBtfBA}AbOJvWyOj6%*scO~7p=SVRI
zW=^?rE8J)5R_ZdnxTKwy9iBUUa;9B_@^MylSbpy8|7MowxTqkmp-!Z<cv%Ih*F~>R
zr`O9EGl+l;FyN5Bb~V^TDbM7JOGMlD+zn)LmYk!xA}co;f$E&D;W;PR7l(&0UNuRQ
zm&e!rzREsI8*;4DNl-uZsk^8%bXlqF_Mu}9SK?RNBxL^$27tj<R)#D5WtnLdp{*1i
z2t4_;ZZA3l_VcIPkv5&ztY!-jGDZUtI5=7ft?+2~-$_@((6wb%k4~|Qu*Qg7jg&eP
zn3ojc&EFQbqNmysz}am|<Dr+jOV3tIgqDuvmyL?NB4DWB^RdJ+5OCg51ZH-nT{TG?
zJpX&XX)5TTLE5GRcYA$ZowMZ0Z?5`OSx=e)pbmL9NR615*Kzh9(=U^crZ-(8VW>Dq
z!bq>;p~506453tUkMyZi)A(sTx=6+*L_4%x5c@h>h;%9FOD*~q8d{H5H5?`!0Y`QU
z$}W2it$Qu&2?`}o+ST%VX3YgN7VXtBz#Jc3nf_-uBvLE6BsH#w5IQYkN_Q4#fEAgF
zBr>KwzThEcIC9pjxIbo7?&VCR4D!!S)T^uwr-k;V%HD&uDR&QMsZ33xN)@(?PgqdU
zfHe7+KrHRkNhF+OUasCi%qJd<aC8~A_)&F1zM7WP(VbJS*$_GX0J3^0efg&(d<+5=
z^(|1*lJy1|*fm1A-%NM6xBd0A5z39e;4MCxxYU8Yv?a6k;)Vjd6E=y7w#6F%$*uQR
z*e>B20A2|p_zbx)>d;vhV9x+f1e5~*ZkwD<jJJ4lt+V0M7@Y*d?~@xG<>C1`5|_w@
zt8AX2axD<n$E#>B>$x%f4X9l6t-~~HOqpG6ZdN^P`+%)&n#dme9-YqLZ@MVMzzt#^
zikX?KA+wY6)BCse^u5iS45;UfLq^$8$Jwkfo2^+N%iG0?ZK>Kkp(z#(*g%8|{;86W
zQmyJT$8X=Qr(=~8xmNc$=ePFN`a4RNYywM^WwX=jHFeO=@lM766r1_C!(d!)p(#Pl
zw%NfpH+>3NUUSS7#mcYI3BQ}cc|VE!nbqKB>X@E4cC}NKOH${&?ZxktU32&@D5S%1
z+2|#r9FqnB_;9GwjM;(AP0i92D#vHd+C}W;cK^lby>#?2q$ilqIuME?xI|e4iABMY
zuOtkYhQol;aSo1(sf9HBU<-ie3HhUMkCjW+R!sr#yzc>j@ku3Gk{H=2gr?KDSzrs2
z5Z#JMb!HkLY&qF<Lf!UQgQ>c-w()M0t8X|gAw-;*aq+vPU42*E@XASwQn_m6>ra&e
zkTx6Lvm2^UoJM7))9KzBB^jJG`<;dmf_>PZLd(JCTbn5*CmT@xkSb4bn4W5wg9;Rw
zqK&}v1!geBL&@5Gtg^rOI+=;GmzE4ZRTijevyCVo!D;h4$|gOJ)j5%x?zBhB;c@M8
zZwQcWZ{ImR^?_2IBeWxif|N|%2|*Y+fI{ziC`;rHw^2^?Gi?-pufkXebj96lpwxlh
zYN%j<_#b`QVs5!~^rmn>Tg+Sw5reMswJvjM<Gp3y3vZ}IDd`4L2jeJ^LKvR+Fke+(
zi9hKp(DE{-?t|~a5dv=%posy9bD_HclzlnsM*1FAvS`1f<v-MZ!Vp+P9p}c@!-ys4
zqnK~S`2MzMMuAY&tqNZAv=i-sp{@lp(qp0lX*=mmSV8YI2w2f4xMw0o6tBBHlQWS~
zapHY$h_MmHVaf9ap@g@(U1b9;x7)tcbow^h*1K%E^dNxH4y@`gt-W;zCttqZC<c=2
zNe)uJuRgft<#c8-BiI@}-l)tx&iTX;{g>|se|jspLOx5ZP5>?4{POpeGv$+3=Nz53
zM2Cc^)}mWxBK8zwGiD>s)yicg&b6f#ZrpG{>%qrgD$nK?LwdKu#xr4<X<xux@D-FB
zQ4S^#K*e8Im+;(<(}p-J5-Dlt4mJfCw+N=Tp4qNeZEy0Y$>_I8rePL-hh`tK2zvVy
zRDLTdFhoeMVFtQF<r1vt^Ii9mpYQk=o)Z|!sC&bKHijz&H#v2Yjv38nm69*&kufl^
z(_bcdC7)cw_L%_<d)hxYi=GZ$;?Sll@~af*=9DL(^3s;lT2qd}O%8SLYFt#zKz}y+
zzZ!JAu8{Ophy#ncrS7X#7A>RM5X1NSPH$kVOfT>vh2p&DR9v(87+5<&&(8S2zWn^h
z{mkwEk+jpe`&=#OEcajs<96;G?MsJ{De)g4eYLvg`G0(k#lQG}{0;LzKK+mN|Hx%+
zZ0_#wyn6oTVE1(Y@ZhAib9}t>BdnZ2`o`jtp-7NyD?1#ntqDQ0k6SG&0PN{T?*7YH
zDY`yf#Jyfz1<5fc`SH4@am!*R1?9~=i%YY$t4pA)0Cu09XMu9473bbhnLUue;R01m
zDC1)rdC`(h3Y!DUxPWAi*j`$KVWYP-nUZMxkpa0n$ij7M5$Kjct|)LRyNO#Ub#4DR
zA{Rx#hVl{g#2unLMqj&nT0gCEFvk9kYLqQtP(VD@$4SjyrP)W!v$(#lLDlN|NtfrQ
zR%Gk3$JvqV=0Q^#$f_6HI;RxNs|xBa&doD8IgD-&hnu=_BfWCxTil!0V3=!88%k_z
z18&_7-At|%JPfB3v}Zh{<lw5<0uo?B86V^VkSW=9D7owp8d;UZv$$chRI>zpZ6Cn*
z8bvw3>jG_mo)yaLlSq1W^JTr>V!i&UmTUVS*K7MvwqV<TnH4)6bZ^YA+Mj97?vEpR
zY{z=hGC3hax5(I3x7JZ)H_Xynl%NWL%=}@={oICe{B0JhMdCdyRgE;YG(oz9*H<cN
z=QEhXc$Az^x;Fj}+uIrylQPK8)&aw|V|`4v7bU+;V2Zj4TTzL(bs%|e2|8ky#`?M~
zQY&T>2X+6+4|ib?rp?eI`k`nQ9eCfN3%2v++~EM0e7}%A7<hN$3->iBW(xh!%M{KM
zJT}laeoZWaH}kXGZ|0+fLY1b!(*2Ctxv?QzgTm8XjN)$X@5352DuFTFoi%7+vv1=&
zql+E0&B&+Ef=_>#zPq0yU6+!&>ZL>I0(Oqpoj#gJ&+?CK?=&I`maKqSy7P->pxjwm
zxgTbc4*m?dDUce@X5knW_i|?@Uhyl8$TEsw=93+c)1(*o`vk<UZ{<N5k1i&h1`WZ}
z3da_N7au!&k_#b1LzheR#G=6eWb6woYMX3XOyOU2;p{R!_e-wc7u%V}XdDj0T%eST
z5RwB7WG3bzwsV3rP3He0x1ImLciGOd&AB$cAH46-I`w;o{dw9Z{{KRuUv9sN-@{O<
z{^H}+_)B0l&XcUESOfP(|L;7Y4}(!QhyMTT$4?%+`v1qPt6!~t(f|KF_5aNY`q@X%
zM#+cHAlih|_~HL9dU4uC54Y_v_XZ_#qjIc>J{=yPOuse|B}3N+_L1hx_>@H!{ltm2
zp$Xxfmpr0V`++DI$gl!YUo(K*cwKXLQg+yL_KG@x)f%=LL!bEC-Do{JBY^OkQY`9V
zn>uF@?pTbr=If)=A6qA<$8UB|TZpf!53FX&?lr&L-)$0NHPTOy4-a0n_77g|A2ik8
zr2O@y<&AtH1Ze%>+N5gXJAd+?>VvR$J=TcW&>O5orp`{A#}C^OY|q(quBFUf4u@6w
zRCHTE@Vc!#X!9%P`}%34crq($G8KKbI4sI$PIIs4ew*4A4ulx}bOPcvI{A;;xO*23
z5VwG#psQRy>C@{;mr5)}YXv2CIvp&*QUwouhvP|XDLm<Znkp~jZ2yHK-cM|zar;#@
zqmtg}wr{h@+P7I{?R#Bj?LW&pYkLc={ij-Km1xRZYv1Q$YyYKITl+qio3BgL8Z@JK
z0`#8AB{uuAv8sw_hGk((-}|z-_LoI&ot%}e6V)@bPBdl!EF$N3UMkoAQpv5Iv*~Lm
zJcQdXAEA}H#qxQwI(7XN7P;L5dItU7@0To~nb%K02He7O=?*TmxE!BR$Ysj*X#CP?
z#o~w9sBUw+)f;cKvbxzhR$bSKgflF<Zn?<k=xnC%K4puj>k4&hXt7;9erSe*$s>Ig
z&+|rEpQr3Mxm1B-aoE1yX-#zV@>^x*C9yVjMSOLi>*19rzTIXu46{_l19Pr%84Z&1
z8#9o;xWk--_B-i3#NSDOH|p<rXBs`cKPCN(&fRGNbke`j3fN2UVF}0^iGP*eNX)VZ
zw61-Q1)%iH3b|A0Ulp#2vbQJyoQ(C0yB_ju0-HArg2oCkesR*@-f~ggdRgRTt6$32
ziRkm4)``9pa2JUeDJZy9?tQt;wPcXOb?~3NtQce2AT98wt`2EfGS3o`b`-O%6ItVa
z7E6E1IX%BvOdrgOk+wJL0Q^TSn|`sLEU;|+j^AnRI94%#wFPvfJAWUm=tVq%=919}
z%}>O(6C;e7j~vP9e9rX*HD;8zxa4P-wV)ZZNN_lRN*POa>`t`L`1}jVR?X4*-Q}uX
zM=#tYI@g!72xh*cHQ=s2KcTm0r}XykMCxrx>+L@+vA6Go+S`8_au=GVfDwkF{<~Oc
zh|A6|PM*J`{m09=d+sL#E4Tkxee(FxSHAz}+S=nU_8)%(`;Y$&8;qCmWxD-^DiyH3
zuupjFLc<FTyv%@J?wquC|M8F3>z&h=c6-<kUXcVYQrOn+64s?wD?2uP_<}a5ywM)M
zs;bYfqPi+$CU9y~*uiRKS9hl#cGA0dQN#FLeast6atD3uNPhautlcKCfj(c?>QUhF
zFZVH&0uK)Ec|SOw>siNRVF!BO4-e}4J_dz{hr7@Z*8g0zL$cl-JJ<efy(?LKccX=}
z75VuHA?Uk!kK<|o-UAA{Wt0u*>~6!<?ae;Sv+~jFbY7pLykyUf24A^Ce~Hwuw%9LX
z{U0X(?*n<yCI7EILdicT|HGdz^8eq^{`;$$?7#o+M~9YRS>a=YQ=;_9Ag#5b&(Tp#
zK2+!2aZDdW5i!vmv!hcC?Iy#^1QWf$_+Ifm>KO~Yi;k0Sf~jpDHNIJoqW@>o2V^_~
zD(4Djj@SP2&67I*{u+P(;}I9abblvfgxZf@HDHRe%QPA3=8iBQJiEZ5w<jfkf1ma{
z-5ZmR9RaW-c6x#Fo1=I#z8s9GIuHTc=#4PlpH_wP0J6dP_=>Vz^cAHr?hqr0jCI|E
zD0)5`^wv!uZ3iij$Y9+iS@rycsc;=|vn~e#ZDGF2C`&LgST|~OZ}go5dCc;DW}1JQ
zbh`u9NZmd@y^Q<sDX|8wFHB*V_AeUtJ?HGem4_6Licee|X)h(mwL$dkWUsEml#VdV
z*;RbQ(OET~6&z@$(?!4GjKi2x*wnbwp=`;3)Shx@Cl|o;b6R=`27AW%)%eGxmJBmA
z4={^EizjUjMpG$9^#ZeKbtmojy!vQ_KnRHIx<>sNa4Q#u!ZF1@!ZS_sp&M0u0}vKc
zl^BySjtcFH&N10k;S%T!)F1RxQn#D`qmv&G4v$XuPuA6UOlNbjL2p<o4adEkI{9n<
zbg;CB;Yg|64F!S5E^Ky^b{Dm{bc8$DuJQ>nIj?OvE$uZ=c8~XuaIx#gbmubBJH%Kg
zxfmr$WXgqp9@&ov+#%>uv>QcB@atOrj+PpY27MG@R`J(b2nZJ16|o{|-YKUC<)t1W
zt!FAyg6<n(W)%aaLfiES+SAxl&l=tc4g!N&)dR&8YUbW<xCZF0)}lDeKxoXE94+j%
z#ww@d&gkZ&myR(ihDoJEKC=zZd<s+9rR<2t7ZyfFwa}@Q9B|2*74`6OrY5$E{_}eF
zP7P!6vbv?2-Yd~7AKJszYLrJV8z$&mO!(87;*5+etWyWoO$T}i1N9`0i-y4~mQhZz
zG-w?2M;LS+cer}=7=Hn}KPfV^cQ}!MsZeLKHg^kZ?R*T~bl4=mp@WNB6r)MsxuNk5
zUsQ){j%mTk>CWl?Zfob{q<MT=tsbDKFpT-f!#AzhJA2L6PV4CS@Wt`Y>(~3x;Ol)T
z|IemQf@1Icamndn9bi!8?e%r+k6v|vJ0yQaC&s3?7&{(X4Di+Md&E#Fe$?R9QnFnR
z9g>&{(hcJM{UE{ZyL4@g7BTtZFOzfx3vd^MH>~Xb;~!Dk4??@gpOsmrPMMkF+Pud}
zE2Uq1wU=fs<Q#@@%rzL>J#TGA=W#bnYV}GH%1&SIpR`VYJZiS!@4?}zPCeM%Qzk8n
zLKt#&wKfk~E8GBx4>4m$--@1C?{~hYo1%@5HbV$kr~ljFoxT4N?fi*U7TAdgDv@Bd
z)!Kctx6|4=-hJ6>acFb%3M8M_$>E#h-DWNN?YGE0KlyIoyr3!q+Ro9@t0q8F$;W_8
zrx$$~i-TsZ|2}VxkgY7JGPMnnvd}MDy{>K8q=o+61o5VG9p3_RYF|LisgHj$)Xk2$
z0OCYC_|MJb)Gb0p!L{o4_@t!TKNFABmc+9x<P|Nq99h9qMGm7QSklFe#X1l1QI-O2
zFnA|ZAA!*ABF(mX@y%I624}qMW;P-j2Ly?;o8E9dK*B|^AlSrxPLVzsv4B0Cm~9Yq
z)uM1mxfM{9iq8fgl4xxej_|5=IR1z?AUh=^I+c$vVMn?gbUS7g94bYiqE6AzsnJYP
z9Z6`1j?gDWZw&2a!mH7_B<T+(7nfRkGjcID3C&NsW6bS3ZeQX6RLi{>zfvaOcA8l{
zmWyb)S@y8~AGWK7SZ4ud?q&O7{P6coBS=2nVewmyb@@qS?P>1k>*n&~bB&J2jek7-
z=FwM=o!?H0<!>5~13$7zQHe+ABHw7Fx~KxxW0J5e+Qj2g^38e8^!ZmlGMz^&_<j+F
z=|#H#6{(!q9^LApC`^FisG&scU+dA1wy~v5TDmH%*Yl0CZLetV?q>^h&Pv|&si*Gz
z72?&*&7;QJ>OUTR{g0>e_nIj2b>pkAbN_t<lUJWgBU4FaI)zLn5Qjb#4gv(S_H~Xx
z=AjP;T#udx2&AIZtqCHTM{JcOGmSn3r_%>gJ+2j%=|cZu<K*CmSD;LiwKWi-HgcY8
zVT_bUAX1oAQl<ncj^G6xQvG+jh^r~eb>94R6qVSQ2pC3=nxN(7sN0Wzi>f+ls$coq
z1FZsjbq5#m2)4c+pfw+gG#?{4(&^;MXF5SuK(IZ|#Id|q{L&5thtbCrfXmG{uTY~E
zMcYxdw(`C)OYQv6`>i!^upm`Xyt96CLF22d6!+s1NGqsF<$SiW%Fl+9hf*CWEwu(T
zO3u3pQx8^~bYq@{wdL!Yn(1rI)HO_jAC0T<=ep*g4Yk3jKsF6s*OmH8wWrg^UyKq;
zK0m&U`ye4{7$)a3bcM6Swi87NOKVOS)>f`f%crOTcXL|_!OaS7D0mHr-4qj^lu)Ac
zllIR+$cmm#FBvT>UFLf^^Mp7cL;SD|y?%5}<+dsL6aCB%S;8*<0s5elk-sLcKu}~U
z@fo~DZRj(Qjs}x%H&uCdvW8b>XLt9_@y_m#5g%!5>!SJNq|2*`9Tcj`HSp81ah0Z=
zW6^JbgWr%n+p4@E6Py}k<~@{u@#nvRUO{eE$CP0bzc)h~^)k`$ezVzBn%4T`RbA#W
z-e9WFYmLXr^4eGG^Aq^|G+2N6@oHVydE(Un%G7_<cqHoQ5yMQ<!OV_E0b!<&dyYbt
zz{xhIZ2t^6S$_Pu9zx1DIF7IN`0#TPN+0XmYpb^U*ND=u_2;jX<!?NUG>?xDkE7Sk
zlarkn%@c`^sF!3JCKMO>{5q5nj=!=9u8%Zg@kGcCC`9b|h9cw{w!FEey0w4seE(qo
z^v8-$#3T}jMZ<dZOFintJ`P8-E7-+&i#i(&x`tOqO7d62g`?N(#n)(yz7?_~2fl!b
z>TmR(=vHhEG4Y)`3h-8cv|kIgK`E`a+*5;B1g~sOiM3D(GPJQk8egeGlxPZDP%Wdd
zbPdR(XI54cxBzLz<b?YLzF+Gvm?u=5*Z?mWjH<uXWd7(Oo}qrJ3EwQ80F036XRcjY
z1pd*(Loo6q><(Z5#{DWG&m>0&N`O5oo1Bd)&7s;I@Y)`4B{V(IJ;}AHvQja*vY=DI
zK8Lca@btkJ;Y@Z{z1hyaz_BlF;Vj5xd3n$>fgZi3fvgy;nXxUx>;Ip<FYj*~HyVEa
zef%j<x9?W6C0UfO#8>OsPS(#lw(Dbe+fAw;MNu|mNmNM6M^b<HcV=)9;3dj-j@>%#
zCL#$827|$1Fc{2art;Mq)ke-Nhgsl}A?k8yROU2{9hzJ^^HxqtqA(uf8>2yyjS)3#
z$#~6bOqCl!5$IkhF$@hV3A9i_%-b%hxx`H`>v0X`xC`rMi0fvBIDCQ8!yWv0W1gf`
zBnwyB-{?EgZ;j9QdwE6aOB>(Zpf~mD!Z$L$;We}8(}f(uJnhkfB=r-YE5vqZXn@T1
z`Q{3R!jqfm4J=|LQa+Cck)Cue&g9HQ=K)g@fPI6B=seHFT*^H8RCJzhF6#Ew)Ld{T
zPf6z~=cHjBOk?v+rsjheQsJnM&Lr}inIVGPZB$~2^g&l=9A!=aCthlwNCwC_g{4ly
z78p#ly)|LFY>T5WG;H)kA2mR-^eC?$y7yB82NT@GWf2}1xuXg)pdDhy^$H0QJbRtx
z-^gWEd1;H|F0@qvX0z3G-MmUV@5Auuco*47z=yAffj7L)MFPG}hewVeF$V6@axES{
zU4F;AUk)E+3T!6&{tyZ-uollhjkfl>0Dy}JJd0cF55hzYpu7c+TN`K;w0u_KEr}=I
zQdLJ+9t%xCTivRED1KsiC5jxO8Dr<msabfIWDeltSA$7IC|dZkV<>RZ2QeNTXs-yJ
z^~W}u7#D9S{5*R&L}iH*j~RKPWUZCuCHun;ee4=b9{+sLDjj!;DWD27s6s^HY&QyA
z^g1TD?D*R^A@IT+BcK#WL^p5#?|z>8QU08*<Z&AYKc*8mfk@Vw8~;Q_QwMKFCvJvE
zGORViABuJEvQZTc*)=?e!-F_xfrP-9^se+N3LT@AC{}pzCH=e^`w#q&)g#92yd$_x
zdA=gbGlu5p2(9Z;;klb?AivOmSjDbR#8v*e&8F=BkgxU$2ked?U22ZqG7XVDPt%XQ
zVh%JseL8o{^(ea#p7-5gmLWIrKo3TbC0-eJ3z0V*lYhC*c|jeL)qp2PxJejZ$cs1f
z^g>oAX%h12Jm|81TlUnbkX+_*($_pKT5Y^=>nr^Ca^q!uGE(-;M-7S_id2>X(8V9n
z{LJGvJLZrlAjqRA(`+o)EFwo!{EO#aP2)fe$YL#t->Y~ymJO(B)c{%%AUus*6u(zA
zKr_1OUivvqxY*TN6Zg9C;+CwT-|Sk}Z+7L}>EhLPVXufQ$`E&rI9*^H1?=)Vz-U6J
zQQ*2!jqTWjPT+MfpvAY_tl6@b_{p`vI_qGknR}pPWqcXh!|dZGz{-u<hxTEms-5NF
zVU9S<`437Zb+i*(>S28qyg_VaI>A75hP7V71PnPye6V$JT=am3=v+xRMt2yF>0lWc
z{{}QrAQksT1d7U&zL0xN$|D_ZonKo+3jn4sgbgfSZDAfzI<QHEk$Zqq_ypE(>2rXP
zcEn{Ede3hBalgy=!Z&_!RU=CP`B&m)8#|2iQFwh9x2t!w3+uQsKpWR!k$FvYN>?)o
z1AbE{ffk*p1GE^_U>uDBy*qv)6QcZP{E8?raF<vNu%s;3m_ZV+v3d@kcS046fF|Lf
z7KHv7P|v%9Hm*kzXbAi`Uw{_g4e0(rmJoo@fyH6m1$FqMf7eJIjmd)(ewL*5CFVIA
z7fQuYHMm4njX!fGa0W<*4mK`|ss&?=X<Wku9qHCJN2mug4k(&d5G69i^8j<(5foL4
zM5TfvJyA$-%&}vg_ts{Fq_RamKYF-jTbq;zFn*$_jf7D;rh%zOQs?va$j3_p3@y9~
z8?&raX}&*8Y#r6G>D$iqfi?!UHxJk{q|GJBPgu(;)hr-rLdk8MwtU%@(Bx56kM?aD
zRY7%}d<3r_`-37=V>aHP0yP>B0%|NGg<7}_O78#=52LGrMSr(y?Bf;whtKeL%Q!KI
zof9-xjUyWeKHs{8Q5Uyeh@unNAud&Dg*@NgR;RK%BH$srotv|P)j-p#pgYw9lOeg)
zthDiP7t83wLw-Z(G}O<Uqqv>1tFbGjjRL#Q#~E!;WIP#m-2gPPn}i3XI_$lVTUQ}l
z089H;5*C1@bw+hs%SDLIVd1$o-dMYX|I}fyW1KA`xtVrU{E<K3s;`!Sp_jmS=(8zG
z!5HJ2Hojk=&jM3%ZjEjX@uKA<&hPIM&!6xaCY<_?_}-zSth^ttWNUL!y3V@*TQODO
zBNq4=7x<VgfP({t{*?5+fZ-s})Q#AcAkxOd?u3;JcDT+0<eQ)xm*aE@l$Su#k04@3
zPw!QKh~y(8oO)%^tGG*4=A%|dj6;klIZbG2j;^X8Vqr4t9puE?6f$64e{W^I|Cm$$
zV`lk#4V4HLATIxO1UdmI!De}LDI@B_(tm5%GrsLfNzvH|X2t$JoQ6zs{!#j@?@_k6
zr#Nf<gPP|aK&X-?wf`l1QFWpv?a7R0`8KmJXfi8d0(zj`tI+DDY;g~@c}=!>O=ItB
z{x@5@a?C>C-rQxFU>3>jvLu|FU*_3zbQ8{^UjL<G3qnWfrAhX(@74_1n!*hDd$mld
zok-v9MJ%#Cipr9kg)?`Q#7zjiVGw__K`Pv8Hxyz~cgw|(an&L7wnIHf{#}ueHwvHm
zt9h%MH5gyr;6__PuD~0+s8SZWhZ*CxxG@edyyL?cGsZbnS>v2%bKDm<$2niZ9_OSD
za^>uC&J+f@vi7)J-5zHc=l(>@ZXw%quK#{4b6?s1mS@=t*1r!EQ+&?w1a_u5Q;8Eu
zqlLtdElJ9r!HpmEm~Z?E62=|}O$fIO%4C8-poC$laTf6HBW(#le*0V56S9p5GLPM-
z!aR0q1Hw~V3QCy;D21A46eutYlmtfiPXuTEx4K*BQsfc7gLI!xT~DB}&f-@`{R(SF
z0E%1l(VS)&jmL|{GR^9W2SVp1R|2&=TR3wq%^FJ2B7B3GubQv{*+K?@dTarZ_m(>A
z+C{L6VS4Q>gjHx<i2bTDK7!=&tvPucYvF`{0j+>kryCy7v<rerA1iV7aGG&?go~gW
zC=P>0O8)G`Kh;>p_y3LO5~Ib$c*RQa%SN-x%8uXfe)uNJAwy`G16ULXLg_QcP0NM3
z|K)!We8ZX1@0O{b|65M_XIIYtnKa6!zQ+voksrWO`qTBC(sy)91SO~f%PW^tJ8i-%
zSux*Lt=M9QJ72S$$R7Gp*y3>GTe+J5SS^ND^cD8NIau@U23L}C`}2_kK)~k7j<55|
z+p#{^bd6q72+%R?y6SIjr*Dq~*p`K3(Eg0*4**l%Dd6#=br2L=S6b4_5qDVdt)5BM
zG)*Uww1A*0bVdQ5R{b_72*fMgvCwteMxz1$wmziMs0`;nRx@n3vU{x$MwNNZG(ttx
zZxJR$H=QJnC<f-Ybt_?XPB5$f2~eN63b~TCw4Qv}l&~w%=RfgI7V;$zGVviN$OL1D
zek3;YmC$u*ke!B8%@}yPyG-B3?d8T)lUv##^CVOvhy{A^V$gxU7V%84`Y1bOrY*9y
zY(!fSzgNiqC|%~7$`+Zz{{RHHtDDem3PIi-J8rxmLz4?Eo;@_{c(|x}&ZQ1SSz=@q
zI36L}cAPN=C~xG^ZtwNp$=>14ULJAZ1a>#PjNH3njqNwsX`@D>3VyzAu#>w_m)=#4
zouP$??Cbl&!XMU*HHOq4Uh|lGhFg?T1L(E7xX!j6yX#_jpJPBiB%^9G2VoH$LMbir
z`Vz;B0L>gU3{ZxyC}>1fQ#Z|1n|U(`mky|6wLsgcWqqlywwih07Mrc5+>dpu)qJ_q
z%>BN!`m))yRtoAjUc78AEv_!VXf9eW7FS<F;T88Ku*AZg)mUt<uB<LCuDo!q*22n5
zOBtpNAghg~)z(sL>E()R<+amlyj*Tt0Ivm2ys%c5TCF7jD%xqTtTtC#trttJ7tqwo
z!s3dxn%$PwfVwNIFIq3GrN!kYRD^b2Yl#3YV?eX!%cYjJYOTV*(87y_rNtL3Il$^m
zjpdiCFIQiz0yfQ6=*5cFTz0Kxg7%`(Y`s`sU0G?ZynNAmX|>!|bHQ3&TCuD=++JAA
zEr77pv|a*sfamHWh9BddTwb;;_}W}rTwH9ve6jrU#gf}vTzIj(v}`TrAh?S0)vT4}
z)-q6Jc?HO1Spf1yl_0LXTzt`5vKCiX7cCs7m6uD)tru=<b)mJmvbeI;OpVxm9@|b6
z)FjtnhS9o3&=GVVmew*|bAN=*>@ik)xdk9gJ))JV@ynnji7Fe38lSW3Dm%TlVUQp`
zN_&4U5GeH#QA@^BG12^-vI1mAQ%Qy$k%3xn%>qFxcFU-`p1=wtxcq681=YNk^SZq-
z$1_PpvEKQS?ELcF71c^uiz2VjKyLu0X8t8hsG^wj1J|JNk`4?yuFCJwxdVkZ9hS_f
zzu#jYH+b}X3WH`3rRGb)#{$X`V~c}b%65^|Vl&g#v!solhf6VbqaTyIQ5)T(%<Zcl
z#f8j&csM};`jxs3i?Xf4*vg$5KkEoF^1kEsV1*OuEXZVnXDmuXF=E)H3<jnlmln5>
zyd}gwzM9SEVdiMx#HQGI^F4R7P`Rz+F_r$M&SUIg03VY+MkJl96op`jE6m$$t}}1L
zYQf+0^SQIhCSxwBg;yp&ci6%vuH5=NbcM6$It$ysdGF0HIIPeU7NslbmV{^_Oih~#
zl!}cDsd;Nyi?e3yRYqu`>AIDMa6gkZlwU0lPI@wxipB9Q;@~J$OB$LV;eQr4Ro47d
z&JAstKH~Z+*630e={&3S)J|#BQ?E3oGBleEZAQWv^~bab=-$bXRaY?O7#`tsi9a-#
zFf%@xv4{_=xMB~hrPLIyvHwFiD{1<K1w}@US1S+hg0Tm&iGaL{34_=HBD2-5mv@AC
zTg^e|T+dgNa1gU$`E$I{ory@+o(mWjGt6V)bMtgD_e_(<vYL?Sl6cx8j|*Fkk36Ph
ziQAcr&AAzn<w1Ph<L}w8SY*vK9E<VY@7S+cbm>bw7RzS7glBQ*59(w5<TR@>^Iy&R
zShUrX=OS4*`VCH>&i|MnU~;vZdmzgOv%QejS6SyFfwTGPRLJrst4dTCpXn=?S%o-;
z%%oKh_7%4cmh0$jhm-s^4gcnPo>wz@@^z}3FS9Ld)*a2CS+jCWFJ|;$u2}V`n)GC@
zYYpoD%iLiZ{-)M)y_TC9p5DTwOKI!K79xnz#Em_LPPH%#8+T_`!NwDd7Hvg4WJSd&
zrGe)}{0v2f#pX`M+5XKumMIpu$eatiVSe@!CzTBE1_L(=?&7%Jd~2H?C=AyiGa|<v
zM`tPOb%$)8c<xbcWRN7+@!BqQt!`-1$!&}PE%G`_4ig?|QRu=RQF-L`0c;NggWIyY
z3Surd@5uE5x!Kwj)qLKXHvT~Bp8m-9(0C5J;|_Y#Q?e8}iXK;zJXp0Ieg2Tl#f632
z9e|oQm<E*FAyOzk&VM<_DcU$n(EXY%!`*w;tuITrX3MRw<W*Ph-j;D;AF3~jGa>Z$
zUe<YI9J;-6Uv`Kmk}(cc(7DnZc!FaR9%6G+P9=|Akc&X;0_MW`kc`i7hS#1*4LyoA
zkLJBB9obj%qG0DyV}&T75jGhYoX%qcju}6V@$nReNCO&W0Ad2YX+2-NQP3%Uqu^=X
z;FnqM(UXXMzC`Z*D>?OFNC2(nB!yLkhgI!*K*sfgW;z<jGR1Ch30&~^VnO|8s{NN#
z#}Y367Q!m3-%R%Z1>xKj{eOY5Wc{OYG#Z1P2nQg$q(3zBFF^$v*&PGF#>Z|dtCNbr
zkH$#%*6EMCT5?Y`eB#m(!my?y@aKkAe^5|$5S<{Xbk1S78?fWO-O^5vVdt1AA9pG1
zPh4J{uzk-uD!I$%uUhQS7R}XM&8k9!-|oQ5;atKpdWO!XJk@Lw)Yf!RUlRdJCfVYH
zWVfB3`vo^3HSRv9Bl^HSi44cbhx<SOnJI`Gl?OvbY$G6#VU@?nHh6?Xc=@mhPaRf5
zcPqc8NC?Hz5VYb~@bU_ZTrI+XOYq+^{I>%Ct-^mV@Y~9G)CHDZPx!8{D~G|M(II*>
z9<)JC2++D#mUl4NJ1)~}xPWAJmgFznRLn6AU@@v|>U18BKm4W9uV6}qY1q~@uXV~`
zl`*Rr5L!QNxoVN?{(O9#a<yoDUlQLn!{pVndERohf?H776X@?&;oa4edVC?kXaNz_
zi0aT1u?8}bO*Zr}bj>cWX4IO1aa}8WOQ9te<VBlX^Cpc5ix8hjeN2R^5J3`>fhn?X
zB+$yp=ch<prbu;*hhFu{Eilv-28_6B;a!ai&<B1mRntKy=4HT%+pBcIiFp}pBL4XJ
z@D4hr9MNU`YlS1aioaicPl!$#ULzT|LPgiuFi!k}Djg7`6fy1HLca*IOxnqi95e9*
zF;D7Xj=+nAw4`Zbh9iNQl6bM4gtUUEt!YTB$%-%33yp3DB|cuEmt7*J$2Se?kwR}n
z6z1OL$?zlUYYw)VsAl4siDcsHkGyj<KR@KW!xyncqIa?ODGl^T^Qb>e-$K_c)a8^w
zX;PITymPJ-n8ox<AEX!u$rBin($+->iaz-oR<<bO@`THH`H_=RTMccI%EFd+MOdd(
zeAYPWkGNN&?BoJIU6>=YS48$%m^6c1Ee%xQQgq;M+irIfsi3H+Ofw2g#cg~^4SiNU
zk#x_<WQRjHaN&n=8lgMG=%ih-ZIqlgEoPmn#+(TZ^aM=kEYVR*AAYsf@`=s~x3}wN
zRv_86#L{dVH9H2>$gC+=$W(5jolD;jT{8hP^GA^fiVG@T(1#1kIHYyFgU;!gOf~Lo
zQtozeHK2=fFngMfMI-T26r0OUenm$$r!xwLH;aDGh7ITxtr8S5gi-2LI?BAs$0azf
zdKDTksM?}#;0R(CKYY+GL=Hs)QVn|!Lfe@s)MGCk%#8@6O$?=P|K=8|`sj2Lw>6_`
z_8^yQkw>AQ&}5|}dzsfZCsc&XNycr`dE+Ljfd|sFs{82YmaHZ&r0O3#0uMq!Zyc#0
z?_HW6Ja`ry>38`jv5C$i)ZYgMiWi)!j){b{t7(}-G<gL=YD|Yn;d&}*bIvwXk|o|{
zs=#cSBa=6_7QZuGH_0god58K%!AQ+d94U&qR?HRaGiuOCMVEqCKnS^ehPc}|X8KJ(
zO}XwMs`?#3E{ZmJ<q21LGp-Phap8~9iwzmeNOm?}H<bWgvW-z^5*~J@GN3c$9S#ET
z=)A~LoO=^cKQt|QTQiwCs(}A=PD}c)$e}XaR}zbCq|Ge4@W-j0bVlKWQTUl>DCFOH
z2-+<^B2{x8xsAs(k+|C^Vd*9Gfn8W&19T4!DzoxXcDBZLkKS(YA11R!Cu4Igq|T-2
z<_L;ZSZVb|(B@}`>9wI?WdrrdrmHT^USffZ@@91>b286sD%B*3Tv&j2pk4x8CrLUI
z$@2*@oV|pJ8WSjEkmSD1jCr!;8bX-IF~?-qiJG2!DV?u9M|3MAW<qw$-0wB99$eSZ
zc`dSP>=S6So+xE|D#f`S%S4eLrBjV#Wukp?%ykzUNx3-U7Wz6~D95qQq9tA5gv^aY
zdy1aS5gJ;|1zq%Aqv-n-^E;t6R7hb;%$T3&RKpz6NFq9{8nRYx4Tr`Z;rkg?BC^)w
z2SufE=8RdFVNK7Dc8}IXn2f5tLfNH=OZ~feFGTsLcy5P=9tMi!m6$>327C*UFlI?&
zCD>rea2ZH&5uAh9hF+^+9dqJSr>fBaU6ki&Aws7#(B$l_NhBl>g4buL;<VfuZ<f&K
zdTyx>3ZAhxw|TlwLwgD{G_bO3T*>*GPEBZQGgF(I*>f|w*r_h2R=WpI={SI{UYj@t
z7<tudtl+h*m6yvai_O+*YuOY6`sVf4Nu@C^*uT>1BbQ=aNjq}kttKTdE^mb7g&+BN
zItZ&`Ml7mZRiD;nxiBZ!Ww|h`RkzYqLPsxR=|wE9h_*8G{Y>d~negn;t`bI@ue=mP
zNTB|5xjJFBQ3EKqm^c8q;3Q1NH$wQDhJh149I^9Ak&&SB<*fv_PRdMhGqs6;fZP<=
z2a>xf3RXsgV2nX}2r_o~Df%Zgpn@>Jks1>Hm?kGEGA|e<)qm42Fv0!-sN<k&<_|Tb
zcNqo*N@OhT2b%fdj!Yj-J-M>Ek2ghgInzO+&*ebo+6I6rZ=EDb-j4f`H-c62M!KbO
z*XWsKijxlf*EWoG{HZ;o-Bqtxb;>a$y#!$zKz-elugV&lkHKBRx}Ie<xpguZ{V%iR
z-^ELQ@>8T~14c9yM4DE15b~X>p&%s|e0;m0oupBj8{?_GYLTTqkRS{FAVU8-K$LqO
z;k|vu-KcLRcKwghF^ZH7&3>Q{=uLs0=S~YKcCUK1K=E7F@+?4~zlMal&oC{OxD{~E
zQw+kIg0}x~WIL{Mj+mK=-*F-UB1I<#X`{V2?0Z93w2Up{+xE%c+0WYt;sS;8te%O7
z%ZP*ZgWUUw&Lx2DzdcTYiZ6!TKPAHA4vLAfcsVu7Nnl?HXS_)gBTTU`5}4MQ`Ql?D
zvtkzV22_E04Qf6y^6GY0+ynJ0(T!oU8@DC8k%lks*>J_+N&DgkJ(F=Tv-z}4qXYRu
z48wDrClGZWf~oVRQO%l;nX}Q(VU7a)GiRs$1^7Y)LCxxg^Hkk%zCu5Qx^4DDUIm!Y
zkIwWROEza{oV;&YYa3t3X0kt$PL{D`o*wV*>~9|s5%AW`Y5T{S|9SrN$NS?V1JwE7
zE358{mrKry(|yrf7!OrG?FHhJg)ndyPBDOj-G42;EmALzZRg63Fp_)YQnrB};VVl^
z^e_BL{=3|2E&q?T*j!$0E?L%6>wlVy&DKime^~Qt5MUfecEH%9;UqivJNbiKIE)$6
zL(?<-i;=qVIt2}R4Lwyb%KnW@vG#Z7=a~|peIBTP1|Z-D9_1vuruHvfc;A56_$7|h
zU)ia$R?}KxZ)~^E-i+Ph(CygcF59?(7mW+`;_u!t@a#qp<S~aq_?`OW0OZw<-*wq*
zKNwInk8$S%hVvJ}JV1pXgz_59TmO^S@7q}a7`8y3!wx*h9m3Ayx`76S2ER+?0Dh{<
z@d3NB<sKIAnsvr_NeMvEEv87)smA{4kC|f+#bq+`5w;_C8AYSDg@plcy5R>G3jlKg
zT3cw)z1SgMm^B1!EMJeecYfMCLx~Tf_RsvnV5zb*32BZymh{dB-!9C}v3KglyFlE0
z#J=~3mH%)x;++j!!T(elEMRW0SHJN>x2n5M#j7Tt>RZa5ymPR9vZq#z_V&TS(J%Xl
zZ`yCq56<?F5B8wQ>;1!h>cR}K%tprUhrj#*U06P|G2Uc~x2aTaU9OLPxeH%MD@sAf
zx-JE7$6Hh|%yQE-3O(M@uhmU+%T&-$QE)vG1fWDui(`>~t&VEjE!j)8-I6aprn`;R
z*5fGW6s{0AR}=c2j*s%S`kCC6$-WJfoxM$WR@GHKj-l~=*o1E5KI*HoZ@h6UU|lGr
zI)oMMGW~GAdPkg6J#qA|I3@a$n*>Jg&#<3027lj{>|neL$v8$=J<1j=CAcslXr!De
z!+*S|<k%YAP3*bzWxMV?xbIG)^Ul-uUhV`{?>k?B2Ih7%mj<2MM4AXl>xsj>c!SAk
zgIQoZy`QwdOl~jpG@9pa+3#lmX9v#Z0w(EQ*co^2<Y&GIEZ+ZF&6SmAa{srywEVpP
z`y=lEme?zI=zExL&4n5P+vw19BjnGG3*W!^`^CWa`VA1Qf9O3RPMfqSOz!^jN=>ov
zTVQYPD-4oJ;lwd_WXKa~+1Eg4N(**_`SK#Xl|u0_ZT9B)d<}Cobp7F9BcRC;X^fx%
zvIvR|OG*!@3Y!ZVv4h{hAJHPyj>ri}3;q(4CkK)lh!qk|-M{wx82Lip)w#q(e4z&e
zTsb}6tFi5$f32}bqfw=>QX-5_rA<WH*=e^anVWpg>2-d(*9PRPv88vT?4nBUkxOz@
z%Dp9*wkn!G?-J^ZGn%1x0EBJxPvS@p6yL^ewZ`J7JLBn-@pMx)t<t4&<tMu>g{*j%
z*osGmZsu+}(G5|jA~)AsH>ej6#8gsMY-Nkdn!Ov&rnlM~6Qc3co$>U^czQg&WjCfG
zcuq$o#q)deNSfnTI)~t9-k>Gime26x!;G!@j1f0E9s<5qZ{TK~P#4d4_<R>Xf5PWa
z@$(HnKYl2vcX{KGNVU_NvV(GHihKCT;ZghkoG}_E$LKb@3$aBZZ=f^_1NP;X`l7$Q
z!<To7m!I(Er^L$}eEImni~F88g2U7ZPTzewjAi@($N_bO-qgzm)RAk3_<0xq{1pFu
zJjoHesl*nVbaF%%S$HJUPa&$%&N9u~Kt`HeiaIfJPE1iJrl|8&6m_O1sZ&7H6y6-e
zhm*B<(an)BnU<n#T1!tawSbAqm3%MdOY*%GG0FE&BxNQo)_EeP3<cz{rTU)g^&_W-
zFi$<fAUb76(a9S|CvO~`yn%F{dL*5v8A_*kES>aVI!`v5!mw6$G&`k+S6c51hgX^O
zuydW{7{^~R@@jIR<FB9aYhtwHuQ&MhiH2NKs{D|5$}vF>wImbd&?$#PyqDa>`D7N-
zyj}k{xBsD?Clvq2E*=w2WBK;Kr6p@+$+Z8qT1(5%{vUrt`(M*))>}=>Vn5pBE0_J~
zdL6gVHZJk0aY;}ACC0zgW0Z9nz8>N=6dzJ>OE$|V+v}Td<8n4-_M4H>xmB3UcOdBk
ze>`GhvbNv)-ErSlpPjxPhT@}o*$F&AZJY6rfjbJ^5QD~o&>Z09lzP`2xO82|?z52U
zGG`oIyJ1$G;{=qDy{hv%A*bZfAJ)f153hW)D<t=|i`QMk2HUfpONxaY_%eDp4=F4M
zNkXF34q@m!9>q`=afUl~h+`zqA5`yppKajd;oN;3^K%C;lm5Uq*}HH&Xn*!LoA>|z
zJiI@IpS96Fd^~P@asB&5C*MUdQS1S0i{L^Oz!xKX_V18nT|2UQz@ZBK?O}Z8J8{Dw
zrizF}n}W6L4LJ3Xu1pb}Roun}@GMqc%&6sF1g?vTqtFm)!(g${r8%*C2fX4r>p)MD
z3QBySc;-%o5mviy=ml|5)5M=HhphtCc%S$U{n0oW`5}yE<%U4>WQTak2c)TU1yZ2z
zJ9-%HBw!fK5O0hC1}IQ`lqBrh4m`?WsA6Z+b(0K--L?z~h>U@BlfMYxEvS@Wa-1Y1
z5(8^SS{gZi{GWB0Mt|C%G0<83ub4SQ#gJ0>(p=2__L=RfaoXfVm<K}*C_=!bayk%B
z=uI`8xj^W%gdnHBoWEz}6N986=8NL`o;j&iCnO<aUuY^Xv#rp>*4o-J0zE^R>KDg9
z3xk;ZWy0Gq!f;sTOrH*cpP=r~RF$wWAF~OOhX&0X7jGLr5SL>KKtm|w-Sh-Pu}Cp3
zO))k#g0HZ;hFs!#I&vWs@fiD31e@4tE_M$?bXV1nku~3yp`@{7#f%SAMyw_xGX1nD
z?(o{g_8LjEUz>=|7;Ebe-CM@sZ+shL52GrNV9GIVh~l8!$d#L~YF#eI#ni89*=1>|
z;qu}JG!B&TB4O*C=p+YJsdAjw*r1J*FsxWLR^gInb5kr6>~FBdtg+S{e?Q+6c@%3<
z$%%q~Sjq15+S<RM6BQ)}YZ-to04%^-coo~Ytx0E!*egG{<2sguN*=0L7rtDs=m-%O
zaUd9|ny|scj5%J!`+b3)FtCw}?zCI)V^Ac<Lm+yW4mcYPnTJq_+_&CPN!<*Tn+oNs
zl+{d<uc4Nt(7ixMpbZ6V-rA}(G0*aL5ZHGzbgfm>3kZr;r-Q0JOoV>~%u}f*ZW-tD
zqgWy+hPtgwX*#b>Odkn^YafKTwobux)9NP*6Bn6<4d7-iIZe~6{}Bc>h#9cP%r8V4
z{II1@0d5vZRQ-W>KpG9q>OXLVtJ<`ozpL?#5CuNx(inFa1RFzg$fVeOawmu*27zY@
z83A(}QXxeQBp0C@t2~LW@&^?>Ii8<Jy&pt7*vlD>87rcB{!rEWEI#muYP$1r?@u(R
zInF3z0ns#Ny_c$+yaVPMFNO+48UOlOS#CJO>*79e30ycIPR2!n<1J>3Kfxtgc+rP-
zR*F7i_<0<63<Sn5VDSTe+Jn)4*Za#D{>$2Fsa05bC_a44Q@r2dOgfJB96}2Bn(Vjc
zy7eBO5N*nbp3pm0q8pWUM7UGx`mO3X*5zH7TZm-$1(U=osXu~^1Pn)|Dx>a?#b~VO
zk$#WkBs}E>4w@o^Z^xrLEIu3;KY)*qrT;t3X7IWDZ}0wdcMN~$j7O>*o3JV6`4BJx
z&jqrvnLZ~I^9K*7cAX-7WI14Jl<32MjYqhS$Ix_2V`?XKp&e^*G!?BtH8YjO8M$Ru
zpcOZO^h3+fK1}snj!`~;ph2kXJEF<8H4UCDa~?_^Q^{iteju-1T<Xd({2rhd?e=<}
z<H4|?d(+TShL?Ev>ys^u(?jf^d5S<HKW(egNeg$)%83g$;h_iGO@_=emx`EFN7((X
ziR7&%YdnhFz~c#clP2g6=Ljww^Y%riyg*TzWOS)WU)sR2NYg$OW^I}rwop7|`WO%9
z(Eu~S#5vm}q|P04Z*7We0+VAjZ{ZX%btC)$6BIw(P_Aq&h)*7MLqbwhiFWcSnWA%#
zqf;SnHisT!o2E+zxr}IgUJ|rYq_X`eUvhnpMOQ3=YV6%*mvkn4HxC%`_fNTY<c@dB
zn)Gntk=PT<b1ZzT+?Pu)C)wO820DMcEaYO@B}Q^X;YbxM9`Vp+7AhJ`z7vht7@9Cm
zfj?g#D6_izWD8CX52QF?(o#}(Nh%m64Q%)TCd-tY2BYmh@tTzf{J(qwd(zREeo4zv
z_sul`I#0i}rg(u$3N$N(Z4YU_igb(mh8|vVmLDVadoWCB3{K5bQp|DZfoq7a1ll5<
zDIT@q>X?`g8s=E`Ve%7eUg#Yd1^%^%S>8(bM4%(0T54DTRY0o0og!8fCT-@JOXC~g
z<V|I)7CYDKdfEM{vX121gUhTEq~LtDwOw(HA;eSqT7iHThD@^R@}HA-=xHNVs%Vxy
zD#;$1*7CFtP{0m(<NatsJvC__PZ`CdI(4#4^jYF^M&(b8Glz{Om68%P<9%tej;KwS
zALb>_hLk~w&d`3+Iw?*6IX3;+WJ5J~(&q+w7D<ynTRQ!&eXpBKFiakA5-lNFHq=Qc
zKjoAsoSKv}s;6k2_jOcb4{J9$DCe>XlLkM42;uRGX}lhEhHJ>}3DiKjUZ4|90KifN
zO*7?8F0snDk<HOF@+<^>RXq2i&wgNM9;QXDv00qFhvz4+PT4EpznZPF?N-BLl~IGO
zyr`=1i(?G3$PJ_zLs$lnpqA6w{LrO>qF_hewKdodyFt6-gM{Gr?FJsIRjTv6b{#Vm
z>aY1j=zRjUq#l<UCxTTI_qQE8j5cCgZdG`j=v`xj5@|Gu{`eurxwKeSB?@^E<CJFR
ztW994I`Q(5#dKw7?KDhQenuvkD(9TDG*NO+kJAzb7%I(hS^<o)FBd+Vudp&mfp4ss
z+0y@<2@V&fl4$nbUQ_{<rMk|4(kYygwKiRcT+>7){yR}i?lO6kyyh#|+FWfUK-C2K
zD%f@n^rk9h3aA@(vO1<Jp4Ys2t3HC74G~OLp5T>fmav!YPS+;Zsbl7VOcEVdpa2qx
zz8)F`l5cUBP|-&E!=|;)K5T4`*4h002QxOWm;s@E4GMi~fYPvn%_fY^#s~OoeqLyz
zT69=5N$CKY@URk87dSyG0bo8)?C_x;RJG(*hrb2yQ34Vg^DHhs2SDOt5={j{|11Mt
zA67n8C9L&yXxhWa39~6yL36jLBmgcD(ywT9Q@Vv!%`n~a5Sj~i49m-vp@!qr=~i9l
zz{T*qW>o?Et93JLYpFUb(^Y2*8eC-LiRM>YV&Y?}NhR}-u~g@T>%x&RKxFSe>Faic
zdup%mo_T=eeV)*!9sMSmW9nN(*Ku`yl6*IlI3xuI?P=P$=cCj-=4v@MW&DW?NTZ!7
zE!Wg>wY5bjtZnXt99A^=CLWr&%U19;pxh+H0gobakPqC^tRWFV3dcob;V1Je=;**+
zO9BD8?cZPkqe`_Ajt7;P!xrXP{Y(B4Ta&EFrkq1eFNySSg*GYO#J)s#Bj8Ig-#OM;
z2KXGaiP-2R_XcP!L#9@n%?w}44CW?u*Vf`vrW0ksv=BA3r-tE&X_)j3BOmYP4v53-
z$yLUoOyKbVr?Rrp%h^5iz3fu$WR#>1gwctA^Xn)`7T|B!W34Guk_byqmOx31CsA0*
zr|iO5vO?R7fj6vHV28`i<}wM48-QX(%Ue9Thcvn6=Ig|b0uOYrNe2S)v4?P<7Ka{<
zM#5zzi}JDjYIk{#cF{dGee<kB8_A)?h0W{%qJVZkyqJp4^%UEY-^_6QLB|`~+z==1
zM_ImINd*iVYE$=65mOHv&@O6s>V>K0Ws3JCj}!>`B$ZIKR^qmp(&U~%_L~k(#Y+HZ
zBT$KSWHzT;5@e>eCsIAiTOTrxJ|=6FQ=Cj0Th3ez)0m()>-tRMFprj?IaEw2Z<|<1
zprpANi>i;H=A>h$2a&s$T1mbe`j<fM+YZP>+4~{JoZ|ETyiG;I1XR4e_!`O|Ejo{%
z0h2VusZ3*;X6{EBxr<EPxd!g1F>hxWw^IhT2^<=$XiuWFPOEa77oDc9J<%p;s6A;7
z&e$l3Ejf(P`9qbt!?x7rKo0>>u@aPJ76e#{*TMxqUf44&gA?je#kV%-jJMLwn#M|M
z7{l(kLT9LPY2Er&qnWpkYTN7mU*GQGIbG<o8}uAgNm9eewY#XU@tO_xi%o~b=wkt5
z$qq&SwCK`~{>IzDW}7v#(@}2Eq^bFXX0$7^)FI`5`+!!masB)^9LRaA{^7k!8;BI*
z-^JGt@9Qf6s5G246$VA7z4`edZQ29DdEnomvq<3C!wVpXIKGpe{7@t1iz5=uTkq#N
z%JtxVY{5esV@fM1AJjh7t?GiTx~{i$j;A2GEkRAP@M(t{B4<d2IyxWYAY5Xq*)~K%
zP85mFh%<Vhid!--b?`C7Y-ccvTS(4&{X<or>>aj&4+tEGJkmA_!s8<8jhi9tm`!sm
zo+^nEj1j@X<eZe)4e0W~s)a4ZB$zfAkPchOt27hrKE+A^%s_Jv(aXce`xwlY$raPA
zo$Ak39dAvkPULXk6|(--#%A70)v$^^xk~e;n8RH?pKp>bSGCbq?SobO(2BnYSIDgO
zt93TQUnBT6qdrm&wXIFnoZcYzI&AT7!!HYd5fhKDU|fPLjnx=oB}&f`=OQLriTgy8
zS6iDfC)fvzMnHX|9BkqbeX9p4Z0i;tIs$a|0mtvEo>Lz|w5Y>@211CR4_y8Lonir$
z*?Z#S3U&WOwa)(q%neEa0FxH;o3gSX#{7q3asdAp4Z#&6!5hDlqJ+1d)9{sOIJZ@p
zqgPyvWlj06Rtcwh2-KoU19OPanh!K<PH()(xuhwRNRgMyFd1D2{`lfj=9zT-0JAlU
ze3PM;wy?7IvB7?8HD122vZ6ntMnF<%=Fv0l^NhhCreGs#!M}4bk<>awX&rGgeZ#GF
zo@?P$`;Xlx-0VFFWfYp4I+Ul$$GgAOwx`CplBR8%1kB&X4*k0u#wY5;cRH|7$GJ1?
z!JE^sy)pf=o*q+PwtpC3)a13<e4e<^3i>ZX+dN0z{~gEw*W-V^_96-<a`~wEU(IGK
z5&yrnxcZ#`?K%F}e?$DQ*ZXH@n6)S;4p{s-jt8~@H1;Wt_?Grk9#q<W{VfJO3_M4F
z#eJc-@&k;;MftoxINwUW`^U*vb|f<4?Vi5bLC)qjhJ3mZx2Osul?f<kDs+H;b%5F7
zOvWwiImX8ACcjZl_ZZN0)kf=2lf8SVN_-yFNB7B6p9lB212)Qin3v_~x}0_Yd2}zY
zYJa%@H-4{M_wPD(@OgaS_Sn0RW4lX#p$A+|+@%=SM>G2EiMh<nq0+G*4{zb^fM4{a
zi*VyX2NXM^0<SOFRh~Ulc*&6ulQMBdLXHI81^CSyzK^eghFYMB_+^9bjspauf%G|d
z;}mx>HXB9Uq4VNK3cPkt!MNZgZiGNjDYQ_RH^RfZXbqVF{c-eOyTAaCh-~uCg6v~x
zvjgBV?mmq~{KdY*a2mpmI`*RO`2H2Ew4j9BZGc*9y?nV^V=o)%RsE*#ckDjfq4;b2
z1`R;O(1|kJh5pYJ#U?YAG<9}@ueXW)q3@M!7uR?sGB2A<Q2)69o-m7RgUUVnggZC6
zx@s=1Ij)<j>cZ38u+s3Y!3O~uZ5Vnr9>b>cp(Z{z2ChA<NE4)RHWY=LwXPQ0#_Xps
zxyR^d)X(-wFM>%CatxT_u&#{w;@MKPSO`4Ci%plnQBa9cRDz)oeKJhaWp#e-(FVqG
zE{7NxP)q^PpH(;t9^k15LSpEkunQv35mJ%gz2pJ><SIa395&v@;|BvC-O<CmWZbj^
zE7}bdjND;qv70Y4v`6Enmnkt`)#LDzM(jTd*F<GQTDaBaeY3v2a$~lbf<`w@Ly*Wk
zURejvMDWgB*LCn{gv6oyag1av6jtV?DpQ`yt~9tBG8kj*sdUFKD4G(T!we~^S5||P
zUtp?%E+S<x|4pM6j(m7T+31^;P7_j69!U_mIn3BFn$XCVf;ScU5~3Nn*ctcfwy!Rp
z<$0EfOvZPJw0LuCz8MrUr{N44zun+s9t7g0?FB&Z6Z?bRsj;8^{-w=M2X^3hKY2rY
zXg5&hd?AqMV%=mk9$?MGdBQ`R>NA<vJiPDjx&aTh7J{zPU>w|@_JzO*St0ne-14CL
zQf5RD)V`8*rqEQ`E6Oi`fo{~0>HbnMja6obmH5L!!F~`r&(#gXhEcA1jQ<bmiBJyE
zwD(*xI-fcQPabH2vUMEKJ99y!)nz@ShDLYtkxukZ)l|;-i8>q1GHu_cuAplL$@?<8
zAHH!is2{n(1~;OgDdgQ@SAEFn*uDr{Zg$YbUc1$dT}$=;x8{4ibkH<=7~@1IfZ}bv
zp53T^sX|>p$OL8SMYHiq5WmhWLeL!fdU8l4rC7S8e(C9feV7P6zJ779j&RSE@UN))
zb)6jZ&<PoMts^u98R<soGQeLF94gH%;Y$Gez88c%Mo!-jjXgpJpKxAve;V@LR30{q
z>si-zZLM-80ZeeXUWE8b!v~(_%mTqQbZ@X!g0qrJuE&qt<g9?_jTmcGaXqQx8ZFPy
z>)_vK>${$uq_M&~6`<czj41n;=o>$7<)P3r0@A9u95Jwp2^!fWb421fqXs2|p{x0{
zZ4yaSoPj(X6Ib$n2BLh74U&1vr6a4eihN>{n>|%>k^&>AnoG!zj6D(W(g)t97_PAy
zO8E!GA(xEd{h;}#pn!^tZKb-5$qnQ08;Q?2fbX*ptCRHjEvMJ-=>%D;m^BzVb?4^|
z#??=^&3~tWnI3lPr0GF~qZkj#X6}gASu1CRQU}x=+7*yEclFkNK}=#BqbLdffe^=q
zCA|V&y-(;n`Y<N|{0T6ds&vjemPVT;=Xr)$${0FrW6u^%87KdAK#9N7U)|^Mh8D3i
zA%axHTmtDlA#W(oC-D~_q@u;02i@SpHLJ_Z<{2*mW=2aX?KIi0gqZz47ts`4v(uuw
z=CgIkErGC2<%@d7FwF5tcDQ24%~jc`KMtiaPK(TCuHSOuPOc9LRcT%-BlA*L8YU+R
z$Taa>x^}lBSHvFd1uJGZLDm~49ne$yMdjmbGFWoM+NxBOAS8#XfY~Mh!6rI;|HSW=
zPci~Imq0?+hdm)|_q6>v-;ARs7QnWZjIwc{-)tM_R3;d)&yG#6VH~?mg>McoWtNAY
z7iH|%3Hdfhp2LfSh{qkno_#0gRhH+{P9;^CW^I$4V|v}e-W~9c%Z;%C;s+#g+P-Q~
ztYD*=v@K@sF*6;Up;d9cI-cZKLmqPo|E&IaFs$PpA~y7c0r&4RFjFR91x;S3(@Sxk
zs=1z39}np8USnlgCu1prBEQQ@wSjJ{@y9yTznL{*H6Ukb_yBl_Yfp%@-oKXFKRj>r
zC@QDu8i{T3wvBhDOkAa`&3ZdO%zm%6zStltQXqMLh$8qcF+CPG%@@?a4BAPG837S8
zg^0NP)Z<6y3?#>;-z{?x#zQi(N%wn^+j*FAW*p8>iUYz~c0ziMiS<jT#K;MM_8K#V
z+rxzIRnSZv?{Y)2CIoYWOiFPnBxM}JE(KpmPHCZ>Ix<=A$(xRp2*Jj-Gu8uj3P#Bd
z`gd`tJF|!RvG7enW<;Kf$43*kZjk^?ojFziiv~=OnbuC6dXQOLdnc_V>81UTGV(<+
z*fi=)%OGJfgP>|5;pk~)<hvx}@Jdh2JF&uhrk%1Ug2U%=6P}70T#=Iii+1V8NT(5y
z$3)xm5(FLO@YZ-Ixz5ke<>LdOj@$Q#7pNohL#qN5L;@0isKtX%B5$ESf#)=Tw=;*~
zYskgFE1sak6%!L4_}dTNJi+p)qoOB2ESh{=G{sEMJrtTS{c}!;lG1?aS?)k4<CrH2
zP8+ia!+e6HqjKk2UqG1sQKZ;^r>s&Vl<NbLI<-&xLgYxOXYpr>zh~+9jiukidtK~g
zP#gu;+`yJnbF~3BcDm+`^)IdXK4f2=Gz~EpN4qbAGF`YR-MvqOHE*el2!Qh6E%8~~
zp(pGCSJFP3^i4=mB`2U~f!Qm*LQf5C!{_)>xc&`m!@R>r^j7pXhV`t2#nQ0|5}o+X
zd!N$}dF(C|^r7cdUm9LRf8bvFH?%hS{jMJ7pC=3eq3!S(4lMEpa*+Wcjj@fqsxn(s
z_#xT8?!%}fo(MrwrVH_j%eHZen!f=W)`T%DzFPgGKMtW&eL(*oCmZp=Y_X3)Wm~hW
z{pbeJ(hYY<KqoF*#B41)M0ZS@5{v*ji!(0_$AShq?9J1(Z9-Fx(C=TnmAHK}P{PtP
z3^5IHrCP1(PXa9?XbaVn$^Iy7U(wyQ?@lVk;wCXO4QnLb1y0y1IE8f!IJIEfwv9Vn
zI*y}Ao73q!_cP@TlMi3kHL|LwNtFG<YyB5o^1tZH{{<9)sq}#Fcf~APD`mJ`^&?}o
z%s$-Vs<{ZCJ8x`G^wqCmy-hE;N!HF<degj;k$_|YFEaHRKd>pe3bUam@y|orgN0h6
zdcF(zIQE<?SnfvtXv{Y=l(Klpl!WGoqae~%#ulyZPDKz7gBjpS&oD!aAm^<U|7JZM
zMqT#(6pO^@?q%)8r*K%@*1|JVutBTFT3eV$85Yuw4Td4`l>?C(>iP~()c9SA`1~AC
z(&Bji3kfH}4uu$`#rSS6!8ls1Y~2GAr~##0>#4kaM2MtoWVRP2hhgQzXij#oOtZOi
zu{38y%EdB=#6xuC0XD#YbW-OMIT6QNBX#0SJRGfR?jUqxd|TP$k=<iM;Y!8LAwrkn
zV(FWf;_E!uJBz#CrTmt2obO8O$mP`INh~29i#C~;_R89B(6Cp4I5mz(UA5DyX}%}A
zM^k}ToGm8_eJi&Ia@YZh`T43L#z9A4&nuPB6T(-nnhYARg;Cc@fO`QB)z}Ycq?nox
zVWq^zz;wy#2ti}^UkaYo#IO<t(-VC?vF}j34*bd|x21iMQu`G3etLL4?)Q(q>w|M+
z7t$ZoE~ID^p}oe<M1P##Omrr0B{VIRZ|N#=-tZW_zQD>fC{+4utIE%s@|V?=V^y}$
z8Ch6N%*_|4Wzs&DwfRcvdZMaMxGZH)PVb}%t{^5ye9inhBF1z4kMi+9Fk(Qt&=~pg
zKdk1WwUmtiVKuGi_#e;lKmPFXKTg}Pf7#6m{vp4mf^Y2jftwS4BU{1H0&%H0TrwX}
zY}Ro4b{Go9VLh4ZkY%P6$q2|6Lu6Z~Ln4ny#XjF>9JA;9KPLRJf%<i{o&&kX_^1&V
z&S$(E$dVF|WR~4LI!%sP#<F@;B1oGgh<Wiz*qho36Bi|))M98jcozFt;C7MZ3aT~Y
z0JRyJQ?U75D9T!cFl6=<E&qw&3Yjk|7YwJVfMEj32w1HEZf;6q{7sQ%O@5rWa@$KD
zp%4>fcHwY5IO=J7QxcI3>{O#eb;CkGZRc~dtGxaIr-1Tci@*Rn5=nAOC~w_?AKV?-
zcm5a+DQdVjH!YyViGTCf{s7?DfXVy(r$S2emcX$GVhjiv>l3uZ7+`W}F<;c30j$PD
zuYjZsBTVO=n_4*LEXkjwSW9!LRZG)vD<?%@X=IS7*LHs#&*l8&E0YjRoNd-5X={ai
zk>9JV964&3r|6%`x~mN*@AJeSsXQdAZ<%bR6)f!Q7EAUiyEoinhq|MkWx}uou841f
z_S@3PTU$)!LVFk)wcrvZ_*AWHNoV%y8jbH?PTz8-2Yk7n>ohj`$4RFpo`!k03bRao
zwY9nD-I6<+zB23S%p<NW@Cx4W5;zt;>iT^hxVp@4C>YRW$(5g#T>8~RBG&4|HG(uO
z5d9;rr%o4pI1xwScd!5_`<{!DQA$Vct1Z(=>~FO~Kgpul{#~z%2Gs11(R7%wE>f8(
zm&AlHl>KS#vR>bZS@5c^JaN%0(v&yKo6*b+Lp5tHnF^-W(~aG68;g%`V^OxT*vssL
zNH6k`26$Di7v0=mJie8<AJ9s!`BH9_pRIXc$o_Y-x4rvzuMyow4>pbE+y9!2%dJJz
z{%5Vi&u9DJ*Z4!WHSW}n3zi5aD6~vev$$UeDq7ce;Q3>U;fY~@K(yb9V29%7+N_Pi
z4QFdK(QyH_hj&bHQG*>~Qge{`eK!niGXp=2#JV;hD4H#=gP!MP@xntF=`T3saf!y!
zH~@`qW`8JD5~u(ep<|)OvTQ&~Dg*h?F70b_J?#3-^)O=I8i>R?v%6h!139{EM}C{;
zfX?<BS)evR3M5)ohhOjT*HC;VEp%K({?WB5Ljkr1>H-JXze`t2+u9QVX7+p36(kfn
z<-1|Ti4%_#zkh(1NI_N%+Ri)gW3OS=9rt=O2<C<yR{P!+(IvdJ(Rs7yx_uaEm);Q{
zz-WxawHaa6B>FU>@of*}cmIy8n;ss#HAa`CzeQfucQ@Yx@3fEY3-{0P3*`1iyYGe3
z%*-}0!W9%o)J)F89u3BL(`5u39>>Edf;HyiRT^LyycX&9y@5AG7gFdO@ZFF84RR9b
z<k1>ZmM(-qDf}+X*6KBI(xrEC3E!Xq1t^5R4Pfx7h;ZB`5~24lgSu{D!`5mXke`y^
zxsc4p6u{F%{TC|D(Dl6t_h)o_v*S7l04>739)<PfTr1eH0wV#qP&5)cNJK|iJ}S_u
zDraY0^P;?AXrR%X@`2ey2R9@=&XK^yNJf~%M6Sq(@f-mi0f`x)F(M4zyoqlw0q4*i
z44cf}V+yt?jQ!-LC3`5sL3$3Vw*wzgoJq(E4>tuD_R#ypgF5#7e&4@=$yLnG)`ah^
zjPH#`V<!7Rir2Ij`0>DN(C4hiW$Ci5iDf6X+?t~0))Xzb3R+Hn>H5w>IP^v%HwqUT
z&S(V8xlxB<1lc0rt6dQkPE~eqZP*gKdS&^lu-@4i7y%}LAXcO6-%cpg$}E%Y5bW(E
ziHrOLIE{xM#_xi8jfXHHG9=+OMmHILM+tb6PhyG^9>urB$Sy&_S@G4tR_lsUpH?;2
zxQqsU-Ui917@{{^W1X?r@3vtY5b9Xe(3NIp{&@Od#~nJC{O@*n-Nli4Lj7-bxoIWN
zf33yVv;Ox-JpZ*8*&BNtU3#$cy$Z%dw$XvV8yDi8dG?!C`_OazJ}fpnb`W5G1OC`>
z@bT|x=^WS>jb4yf{r_B$06VVZ*s!{8e1NBh!=Fl2J_Zb-(SP})kr%oBzQ;C3^s4bm
zy-I-jqa0t)Ae2`q0JvH*7}>&t<D$Of!56~g^yl3;FnOpSU3;ML2d-`8tl)Bmz@il<
zXk6-F(~BQKM`f5REs5jr_0;Py>aG*svAK|6m$td;3D0yJIS!=WaeLA*H44z=D=3SL
zhdRDL**o6bK2s<5{E=BrVVp>`h|854RK60SgPsI-c>ea)-bwp(8wkCpU>a{(OF@=%
zFFvx?oFQi@)|C>{!j~^D@B^@m?$%$aR>@^R^SvJ={Qgg1i{lR}w(`Zd^#D`xxDkPZ
z|Fa#x<7It$wY!6_wNe<f%8mFH2w5b4H-o-+puBEuuvvCA?BA)7QKvA(V>|`_nT&+9
zPy}exM=&Zjq)azxP*KHi6kZatP4dC6=+_Gogohaiy$wFL+-;f}hIXh5%xL(ccDhbm
z#J?IC&fi*F{>ZrXp%dhRi16A;Tt3j-0(4ZP93y#+MbP||bU%jF#h{ot3TymHa67#+
zCexZkMV90TJn%zrk4(E2^5UNj+3dR6Ec?%Yc->i~uLPvXm#8d;UgH2G5Ou2B)@Bwe
zRb^jRX#8{pO(OyxZ-r%cVIdnaUKr%89jbHyyymQdDJ4ivwtVzcqU9%QI8CE_V~PnE
z^5h>KV+zZLHJ}3hmLEz_YP~@fQ8Dmg0Rp|9bL#gxoof<d^g=)XkM{8McQ(_f!t_}P
zJPU!Gd3_JuFoSG~J7wZeJGgN03a}l_;qU9;-gEUK2Y~ry_Gk#&M<sccvdSteHMUX}
zN(c7VkS$7?5iBo{g;MbgwkpL1cr#TIFufFFi>(@4Y<&gPL_=KdSy(8AoYi_!WAIll
zdJ79rhoEY#T7Foql_u0#X?_Lzsirc9m(A9n;LmJw%~OA1p8PkQ|DmC4=dFF^9>BIT
zocbJag8ipyt+bN%AFH|gZ2$Qhe?U2FE0cbE6!;@Ih&(r}9G@H=@130O@13%}Uw019
zclX+_PmbQUw+{~3*~$4{b!NuycH7~&qpDY=(YTuaB<;}E=TrLM=lnmqc7tory+H&1
z)1LoZORFo1`QKb^KIi}aO7`Cc>9KRDEN8+CfI|3SfTO!X$M0+Imj90*od^qXBY6`A
zwk$K`353ey$i9#j{u*W+(TCz(QKUiLKHg6p>EI5jsD6Xafk2TK&mvRo1os6trTfCn
z%ue5PuJDW*(R_y%THah_cjQSEY^1oUj|}M-bcB21Mr1|j?hZ)K>^|)J!@q*M?S-5U
z8#cQP+}`GFqajY&@uX>Xi;kB)b-rwnYGc9P0!VXct(}{hdDnG&+;jIlZm}(?be=sD
zZWek|l62-F9wGb$Uh@c<$@d8Pf7vb})N$lzPh=}9<m|qGQ||)?eFij&(6hM=gDcRV
z?|=Ww-!$@{ih%6DYYV5=+d;qK4IeN6tyXJkF?s*9x%zzn^Q-*X_#2EP`i{_Lq*<%c
zoc-I@%mxlLj5i-@^D}y!Z~UoVhgo&)U5I0MiX8%ykoyb#c=q-{CTy<PslpHnV<GOZ
zzJWp$4>2}pQFhF-anRqK)#M!G{zS7<>zHK=shUDK&9ZAR^g3Aj4xdC#UB11QB|W_P
zusJ&mz^DsJh+@6U0SVPacZ^zeM*#(VUbRWh5`ck=M@C@@7?Pw5F-_%=2*-6gM)K6^
zX^U$Vv?Phj^hR`rgk;wZoxmGWC56J!pOthM@;8h>{^j(?qhH$lhhlTEf3SC|N_Sn%
z&f|7<s4*2KM7q_1{!ABvJ-XB&?;LEOp0?kdY#;xqiaUP4Z+HBFI4)J;=-^=c)zQiJ
z+5XXCq9Uxhg1CKecxBR8mf;1uuG8NV`}>U{`PdPi^j;?zLYC%tH*h?|+^=CD(=4UG
zVtfg^pI=Yp3dx5{fo~_3((cU^11c^U_z^71Za259vOr#MRe=J86^uzelLgD+T&91v
za|P2Rw|D$=0Y<#QM8tWqGN_ar6tQj3_=vugIRk+=9`d`&Kj#l4eJ&asUVq{cQn3mB
z(?AuL?13m7X11doK&3zh^)g+G%K_4w!)!^hkQ5cB0O=`E-Vj}4r2I%XC2l-q4%u)#
z$a>EMZjb32VpiwnCEgO<kH4T2Z+H4n#aJ<c1SvM~)(>~4ODrAdr+d4Jbp!o?e98@P
ztV3!5HWtLfHEG3*HFkZ~Q*S4(e5Do#1#a^CP<9=809O<Zd(q%NfY(c{>tAImJgC!Y
z-ki*D8p}-LIr5>?*$rPz#c9(iS)z1O7m|fdElBR4J9jzdciXT2nWz(z)7n&8Q&~Qr
zV(M_GsPK8Rd@tmfFpb2-;mkj2&Ts>jK56L9G&_?rWG0|V#(#5ibbicP{&cdi&l>xz
z;qijWg%Zo*<L5}(Y4@begYU%+<)@oylQu&Ww-pl<*1XRJYI1=Nl&^0-p(cs5qw722
z!nctT&%>NI%n#8ggm{}I7vpPIx@=T$r_w4^t%Lxau#~4IS)l-X#5%aKpmhu$bmQaJ
z^MUdAVgHwR#9saUV|$N3l)MNu;rw^0xtfgs*jjzI|9_o7$(zfkQJ2RAD!z#UOt6)C
zxg+m#hKk<Nr(n9TeC5!^T3ZYrF^)vWK{DgtSjQhzj<t5jzZF*-IG*tBhwCc)ECc9L
z@yd0LU02uHJzH4NvrS7V@Z*+_V;N%G7qPC;>rq7Wq2NgL*ISUw1Mia?04PlFBd)(7
z#PH+Trlfivzm*%ZjvL*$?odwvh;q%wHbQcj%m7@evNg&igt-9lTrrM@AtGpKnZ(7L
z0ahLlj)p&DesGunE$G4t_CsLFR95mu3&}J;Pfe{$dIeTP^keTfas$r~c13q8P+^^%
zM6(+`qT|^_qq5D!;G|m3ZS*>)P2QT(QLT++SD&&?qn}!vRz_blNnE+MSe?N)iNGPx
ziO$mxc_NQgxITjcXOz<|cdwP4K>mk30@;Rauf}XXo7zy_$4hMSw=K5C)LcI5$=B$;
znDH7g?H-Hp(jCzB_;2Bc&)w50`vc_`u$oQbpZ*!<gvelwNf-mVF2q8YTpa?H%b4Z?
zf)&pq-nD44F&c-L?T+nSRU|s<=8?=~ALBH+G!Zpx{to1?bz`WJ3r;^$ItBm*x-RgB
z)5f3!Q7B~8*4o;@zO6_X-A;hBA1LFHher<s8#sMGbSV%)x6Zq2dqFn+@VOdypXX=L
zax=&(A9$4yvWiB8xXuTe1R&KDcu`R=YjN)z+3c+lq7(sfTfcpH&*`e%YRS|5l|}<8
z|Da`ZSPwr2QQS25Os{bsmqlW1`0JGO_`(8TNMNCvNaG|hk*DORE>w<JnHr1d20%Cq
zTsQ7?qvKxSoU3Vk3Ag}DaavGmZ2(kp?Hw<_PD_rk({ryNs2k3hBlPNL$(I#@OWck&
zRs_v<qOsj4N{A2q&>cIQ9v3L3#v@J+@nqv7L|SwG58?gt{AYUoe|ksoN&cU$#l@8V
z-&%gw|G&l`JW;5;IXHTyB7U?_Pj*h(>>O_HW~(zZ+q=8%o%7SPqqps|?UOfqXBCY#
z+Of;`j68SP-9OpeIXgPp-#e`|8VmgYx>sS=ON#jcKwcl6>}~J-C|$|hVc@X8d^T{q
z$0ZN&pwp1U?YDdFoujwMM~8cdXR=yc1p(YMD)q}AR>bb?{kpevezyGzx-N^U3ipY+
zSjX#CM7+7P?Ze&elil{g{;QL1>|y)t2p$f9f`9o+4BT|#Mr}MaX><QU^rp{jrUXoC
zo^F3vB=TY`pwDliVuc#z1H;m-@FXQir)TX~=lch{JRb?KnO7P>owBXJeBODx{S%P(
z{Nw}(+kUlwxP9_Z9NYVaxIzP#9jGKx+ub|fJKWtn+`*yG;*yLut20m~W7MB73Vf&X
ze{bA(gKgT;K8^inab>ZY&j0c3|NoWkKWQV6Hqz4>dy<>jg$1^aR+9lnXhWbZybB|D
zz&dwOY=}<d1CQLy(LO^;^Nk<$yER^y-CVjNEJx1^(2<_UA;%Z?C)mqGmlgU=rX1zO
zJ%qkovbul~2jJkvcsRF^qNDLZdV`T2QkGIIZVNm@7nJ#~Y|-tz*Q9)BwRh<U-Y0*U
zgb-bNfdUZt{k~RL+KaU65vckdzk4Uq=9Y2P_a(ql-v(e8oL1pD=mF(Lqd&Rs2vl@#
zfu}HU?q35}M0pGBu4fN<%OOLO{z)@1(72q^ux!-#&JXrZ+W)FC{^OtW$FE!L?cH`T
zxZ?%all3{Xs6CfnR@UT2#8plG6}xsXJ*V&TC%&#NHms$Ex#Ru01-5`YxP>{u+wO{3
zhprt)H4JxyLF#Bc>E;C|t%orz$LOtJcQCEsjTgEy*lfMoSR`N<K7#7{H$y&*HXC3a
zjEN(PHX9ah!sb{=fsingAPgZ7B#48-Lj+Ob8IiIMSd25dptK6*&bMuZD2DFi<2Vi}
zKDkD|fBo}cUH(y!qS;t|v9!8W17wz4t5yy1U$z!kUoKU%=%`iQ9~xgaje=`yuc6N;
zzPLOB8&-@0Y4=0<&sIePmAYL^+ea~GqN^1SzPd)cPTEi!gUXT@X-#09AOaLnLBNrq
zG-Rd$l*TEaKuNHCNDo0m^f87WMyn8(WYGlcdr^cF*^5$GY(DtUyWwoQQ`9)JD{p$}
z-~1`F+@qU?|Nmob<24r#MISXsTmoFb$%s<hw_ez-;51rNc9lgy;(X{u|HOC-b2^a)
zP<{M14Y2Fx*C*jx0Xmy)hwcr_;kGGREd@!j+w`pVEo>{(>wm9bpG_G6aMJnTYHKAO
z|L<A<`%3peVfV-*D3iM5(e@d}n9(nFw-BS6v5Wb6R`)3$Y_Auw`hKIq>c8Nf#`@7B
z6O6$q0-6pKKKxP|-0u15UYly9A=mp*?q*T`+mk=~lw1w95LsDMSE&R=mJ?Cb;!oG9
zlTOqE`*@=!ex<_089H!;5kbwmSrP8^gl$EtHsGA0PE!QCJRLhDff>L!1BLpcRXJmJ
zRV0KHQpKLLQLYr$5~7VruyMAPf-%H%E4xbJVG059b20tQ3ZVUf8FVDXGrS+R+0=p{
z;h9Do!#y?EuW)4KA%_U`#Q&h!o!{h*K$+jJ-ka%D;rm0dOLqKwKsF{EIwm2r)dl7{
zN$pztU581;<F^itTQ>e39ebwGM}%2@lXc)<18=7YYYalj&*NTuu-7T#f%25()B{c6
ztU{-Zl`5+)oK;S?4|m&dw|~VP(~ac^Vl`pxAAn>MVx<u6TH)9H$=1eDPc`ki7q}lq
zS9>?zwl}<%cn?7a%KpgxnyPp>9*A5+0>s+di3^)h>2#p;5w*ERL8dz6UWMTD5*l&E
zSD8Co)g|B<+WaGYT^G5N@J0{Hcag6OzV~Ey`(D(5epEz-S{_&frB+-Q`!C<t1Q1!d
z@6)!Aj(~$fh$ojcOGSC0ysU|#w)9WcM7T2Ag-3xmA~-_pBarZ~ilvn~emtL}7_sQa
zZ{tE3($TLzU}l&;kpxmiO%vn1A%WFQ){{-JEq$mAfTC;?U^bkI;GwJ38|(=>CA8^s
zS$>!BC50KJ&oRl8Xi`?2S@XN17|(f=+eJRixs}BUe_m~kQ*uFQWpqj;j9@HD71o+t
zoLib(o?B(2;dFtQbJogSYdNog)od*>b5WUc_b|Qw&%<%;-H!YrPk%6RH!vyw)ACZ%
z|8Hq=x%sUBe{1_MAL-Z9!~1bIV6LVJnLG02GHE*?^6FNj$wf17^4Bigt}NCTiL<wH
z+BIurw;Op2Xa=jvHCH~<L;RY*kgwC5_A^QWfGq)ZFI;;!q`6Hk&AuCEnpLzqZp{1W
z=wyIsz()~`Q2_eB{Ujbb|0P$HF$GMT|9IG+IR9CBp8vo1`ETe7Vs*r+(;HSYKiyK6
zO)f0NTE-1zFrjWUSJZDQQofc<(W6ar0W&a0r9u>axWIYn{6Fw7Y_f`!5C9X`|CQC`
z`rovk{eS*g>pw3>{EvQ!>qRrOu{=k<imfU?>x!SW38}wkd4+2F^f8{SCszyla!RC?
z0{Enqg6~Tuc<B5;_HWQh^eOy5n#;>eDgTe<=lHMxb^5=Q2#X*QlrX^W7Rmu=hJM7e
zTfR|c;M4!&JZEB}<Btc;a{@!Bfm+!SvOL@VcLi01sxiOelX#mv*8AT5r5`o_+x{r>
z1`{m+ljc7v`-$~`wfTJi?~gVAIj|)M=C;Z*1W)nhop}BeZ*8a|TWlNW*LL+mT6a8^
zxBtD^u=GL*6<$^I3NNM$FXk3rO&4CxE&MWF_~o~e9S@!Vr{1va_20?+|BU;;E6@7>
zA94Qws!#u4<Jm9ue{*(Y(JUog2M4WVvF!$G0~s*z^>#G|MCEf}fgZtOp+b=X3t#K7
zsJyW`;0|M&REpzcgm&bGy*otU-z@|)kM;S_O}1YXb$M21bj9E&Jqkc0t7{pMw0_P}
zXX)$qToPF>Qyww@FP~`sTTLrH|CgWhfBqroe@~zPy+_ag9`gC65&)Mu65uj9H7;}E
z!8Ey4Lf|q-3S1_|z~%IE0Bi9%TPKVjBMI_g=?KnoG<g9M4VT-$pL2@mKi`@DN0AIp
zd_S7J3QXMprSktQSu4->pa1gn-&U<vHLO4*Z(&gyg2Zp#5+q*5MW5Wj%cFO&0GK{)
z;w9^IbrK{2LgWMP+hHVuXTX|Eg8c$gdiwc)?Dy}6{=l>Qr+)w14Xnrde=IGh?*A>d
zRu`Y=|F@q1@kkqUKkYR=SsG;L=FW+AV<!Atu31~+ipg@9goJ#cnTYp8<O|c7q$VD&
zQlWIs!{3c4u!rHu4@KTgX>cKA*VfGTSFrt&A_W;{5J)@n+XH{-IWd_k{7X$M&bVAl
zP8!uY03G6+N}`U2{*G|b3$$+VN)38dsb=>_3LxPrDbLwqp!!4+x^*Mq$aj_aGZPlM
z(S{`N77W(t<fOuWP=$VLzE5CFzN0#-R3{fhSx*jug0tW6!_?&29;Cd4H3Vn}W8_fU
zCg{#UrnYz4KThII?xN`w(8nf%%x4zzL9DGJuUy(ecgGIhw#hUZuL0Z)-9~)5yo$H`
zUbd{4`0wJfwL;Gz8tDXdsoCN`mY16^=tpzWYAqXm`w`$X2O7{hi3bAh;ze7+P!384
zGI^6C#k#z!E0S}2mCUY{<>6j26KB(z=A?UV_f?E4rsYsxp<qg^`k5E`%~MX<b|>T$
zt&LwY5w9ij!s=BfB7~_9VF<5K@+ATriyX+}l|~7X98B*z;g!|l%(*dp9(u~d^sgbX
z`2Yki{BvX9!G034<(Hx5oIs!TaDcaGFgm#FhF@qtq~=7Pm{ZKM(#<4#1Nb&luDI3G
z%(K#+#65Yup;Ct%zDV4Y7-tczf+*lBsfK_ZNL*s@r>t+@z`l?@U=L%!xOpZ3eG}Ge
zi~`s3F!58xYMLW*CY4vxg}oscF4z`jW#a>xJ<Wexe$TbpD03Xe=fq{p@}sj{FgnZT
z=okc*#Or#!o*Uo_Bhc4|E!~m2IaZp>{~ve%AJ6gg$m@sJqxb)owYr$v|F@pcf4^7#
z|B1AHu2n82wfLnkqs1>jT8r0EiOa6>i!;epW=UxvvQ7*OR|5pnxcxWv#iOTr0Az@H
zt-_M3uw<;Zih}VXP>76Lk$73YENP38z6KEqNcBl5K6|Qu`SpgE*T?JG-28i!|1yF1
z)5rg7Su0lh{O|ev|Ev6o<NwJ4o-Y1hoY!52?h~&>=sp8%QRqHVFs_mc-FFIYqb}%S
zgg8-#hudTFwW@rdSAk=d4dx#Ej1`G0(3{;PHtK;gBefmBOQDx`jt*b%zd1kQ8Ja48
z`FwnG^nZIhXYJFY^OK!D9yZUIMF|A3;f6Ofy#47<aU8yk&fRAOIn3?vQ?>W_0ExLM
zeu+4V#q`Jtmsnh>Ow>flnq{LVN@mG#8bO3HJ5pATcAUwiGW}?#Il}k@e0{ukat4^P
zqgTY?9Q)J6L@}Z|Y9F4XI{qHle~itFDIcbc`%`ZHw^rc)=K61~u3FFgzpwI#yi~l9
z=PvXxM|41ko3x3lQGm0H%jm|XFn2ri^Gt;@;IjJ;Bl)1c5ff4YA?^N!3-2578o%uL
zqdOiVud-8Rt!DEj+wb4m?3E3B0oY1*@ZY}=yr|)J$6KNjA#_S24FJHF^EDc1FjnK3
zV%5vody%Ud!ruCyynf%t`p1+(iXC{4I}BZR-C(Uov%&T;--iPTsVc_@?8Xi;f*{3A
z=V@iT2tu4H2*T9ZKm9Rt0As&H!2vOvBkXl9qiD3YurT0FH~ipY0bnjbYYPo6&Jg7l
zZ(PpKluEY3KfQ7Nfg52yq={)*GP0(qfD>`N2_9dA?pU&2Z?L6jVv*nIeMY7gv6a?}
z)fjB2jl3&4;6!c>yuBGCC2^41R$gHw`_D6cou>tB`(Xdg;ofc=SDWp#qm%aG-Y@O_
z*X^HqWa)pQS_8V)uTbQ4|6h9*8F3PzR%ddL5*6m0>Ss`htex0?MWkM#-+EpTEX96s
zYARj`rE769vSh`<gaLNf8;6yE>Z@Hj)#9bJbQ8QRCHhDaBP%sd-hd#I8}#vqPIV@0
z40H&UPJ?68u!F$9tDu)yr@?WlvDC+T^^p%9Pd2A=(fs2?P@COq!+*=>*ew+=9A!$C
za2kz!J+`nw^Gl?r5BzHn6Q64N<b2k_j6E<BZL}%_iXoAf0-@S*s)8CX#Pg<g@YI7B
zpkyQT8$U%izR1MzIX}V9RE!fBj)wz(CBNp4E4(l`Fs6->5va!Fn5x`BiK1y%^@hh-
ze^kLAwH!FuXvg>aF=3NAggCIAOgfHFQBuRoNZ5x=SY~P<K@;Rvap)oe(gW!1ibH`j
zM~O5!Y=d}C@uUp61Te5aM(`5~04XwXMBZ@h%6u?cUCrsRo;FFm%FOEXr%GTo6i7TJ
zfpXP`7aopLnq-kBJ8zO`L^W%WYl<vJl7=aY<&Tg_g1F%{3BEWgc`RdP(4|$)V<Ri(
zqfSdA(HKfbg6|v=6ml(z)I^37CvXoKOnqKnPnb&A51>wVJCi5b^`j@5oLSkq!77jA
zpHX2#u1cwi!lO(K?AvOk*{HEX5mQg}z9{Y4BgMwk<y)IhZeRx!BncXB{=|({glhSW
zY*UMnFNM{V(xePt6NF1$6E1a4xIAV^OGu9=NNG1&oRlMvAzfms7=$qO<I%dA|4uw`
ziG#`T@|8(Uly&BYQGw&&1f?yVQ4dSx=hVZm7#%Wx!b6bqYM|bp{GQdC{Dy7G-^rHB
zsFXacAdqNZE+tkac6-qwv(CDacuvbzw;bpCQ<lMwmV-}%sl-%?k8depJ?sl;EgRbX
zwKbk0kod)FRAqKQZge!$wb9tcq=+d0nwEA=W?SR&9Ce<G)KRjD&ARG$tUdG5@~p}z
z0-#<C{}mwf68Ck|3HyL;HBC^=t<G=WdkO2+&uR=R5oLJuS_<{Oll|BKtN;Y!d$zd=
zANlV%ouL{_nKkZP3}w^$y^f_6f);?QOiJ@uMVtmL3<+vIgcr0{K<ZX216Xrlbdx#^
z@rOi^n$vOuW;029|Fpe*e0*~BD*+v}G8jZLT5AG_m|~1w=XZ7aD~N!cyqAkcic1k@
zH<bd^Y*I1eV>mIVs=g9jQ%=b1xc0Tl$JyM8qxul8%1;Y=os|tVPb_K72MA+s&s-68
zefA`h>U-fs%dM}%t`GiMdFIDVe*CIZ@<IGuPPC?ZyjaZUFnXS}CB0O!c^k$=IJ9N-
z|DqkZBiD}LX?`9VSEMQ6mhQ8?)3f&R_Q_e>;-}L>r`>365o{66dT0CKpnZD&3X50h
zeIu#XRSQdMN<+0A$W~d9ykMMJA+vWavv-TUcZ)d<ESB!wV%gp;mhRo+^u1d&d$+`U
zx0KVsQt93;mF?Y9>E12n_l`LG`EdUE{6F{ne|o>aa_9f8RyzN$wb*)&|M7MFzZr}?
zrWbKpIO=;52`@^r=LXy<z#sPSnAc+--rjckY5AQSG4~dDVdl?x-@^~EbmX&9;9k3U
z=*9xKGxqxEn(PL5m^*<v1K=N?mT%beF2;e|o$-2=!=p2{_v`-Y*=eEg86nrE75CEZ
zkKCYfiMjz|So!5-|7>sC+H7{V$`@_db}lO(JJ~*bv&Ss<^5sjebr5XO0QSiFYbxME
zYnjbiGXC<}yT_PdvJ}6iXUJ<RQ+a)UxKlOdIK9#<yZz8D56q~ElAlK>!xzGZPEw>g
zBmL=S(8Wx6mEDoE==Op11pBAxoFojiD10B;?0GCgfYx@m+jd~K`2*p-R(XXSq7|y*
zSu^=Mw0YJ`pIifZu4v-Ryj;@c_mJtjRy1%iU}zbL)@_eKiMKCYRyn%_B6P#V`^p>N
zzv#O^+95C>kmk+N(VK(4AGc3`Y`;D_KiowMVmE0BJUYg%%+_}~g|xz_=f}rKCuhWf
zSv~&CXRWYEtId69U9XJ|sKJOiYj3yxYJ2A=^2KAbF`_jF%^;6tVw(-`?f&WM{^6S<
zfW#VUh$gwVyMEsxN^S2R9qh!-W_OwwdO&|?sUbp4Rl#Ac059LzK^I=YcbM@^Hlln;
z?*(xZ2$rOa)-b1t0S1I4xi}E(hORpxr-mVlM0ROkWBONj$ilJXxMA2E_d(RM+ZU@f
z7WzU?h8_r3o@`Vc6M|@ceOwRRKBZ&qj4v9{3c!YzkYm@_?9d<9g)<g~=kz{_yrYE0
zcrdVoyV)6?N{9aNfGQkm<pF)84Brac{avrus~Vr|apdE*Igt(~`y<Zn9wTWlCgiWb
zlJoce!QKgbyM4Cv;~vn{>D$ALckkxm|9_b^A;CsV6G{3P9zmn6g;1|%eV48!nI9>*
zYvvbz!qUb&Es3siBRn_hFD7t${rW8NM$7P;oF{LOU!Sm6W3kb!CgjcAW12o#fO8c?
zS7ae_1@)vjL6TZl{`4XsYId>4!eE3~C8wBXgy+uuyqIaQEXaw0plPL16N`xk<Cz9;
zpg+dcGbSrXr!8bVo2|n}f76+tx2#z<E4{!oTDqTQViHd?bB(@avQ;>;VM7IM6$Lh~
zm$RfYm;;2l25E%w$0YHQq9>*%6tv75G9Va%jmJ#*+T+^;Bmh@b5`6ay;3^j#s=vGU
zdi(s~jLDZEu<64A(Uppr!~~F%oTZ@`z!JordWaNx)bOA{^M@Ka+lNQ!BY61cfX(iD
zp^XNtqmes=XURt(Gsi=r*oL$0==JNW(mONYaII1sVh%3#`pv=lS;ot~U&I)3XxgO3
z3dZA}#xQwvRzMLPKxzw4BLtOtxA%+wj<27kS&{crFrz-T-w>n8M$OrbO&V_my2cbw
zun?dDGEfc<%_=#!dCgo7oX9UmA5vWRAUK(VA-?cPH^LLkM*MY*_oJ%`nSx7m;^oC4
zNWL0)!_>Pz?yWj*aM6Y(92cc-VnMnJVEuEh+yJ}S9XnC7ZPx*;)H)cFhH`ead$dN8
z3VVLP@83{vbyO9|tC(T5Dv--P?-tYz6>G)0y|pZc9@E2%8b;r8Tn3sBvMnCv-;w6y
z@+rr<F3__J)j<g%ao*q_-d;HTE59#FQA_OM-2k>8sQlc(u5*k|<DmBh+<h5}4N%p2
zz5namy)||OTT7sPDB~WyJ^guy^=z-tRVBLD!j&()qE!<>Ka4~2^d>O@5JQ0?z`eoH
z1^nLfwHJ=<KJ;$f_523=1yH*5$Iu{cU<uz#AT#6G0u8uFI^Ze5ihhIg34n(#MvYJ6
z+9=S5+WWig^z3BsALp>mn#IxyO^B!uKZl_ofV>#MkCa+tlo{Bgg0FTMf+AB;0O@H5
zUPvsHTMD}CxLvwRT!>0^S=iJtj0(Pc!>H?B7Xh%ts0d4UFpBO9zK7$BqzY?%4}>#z
zQ5${x&L2nZ@TQRLo=@x0tlXiZAN$ek_RmNAyX|8Tf@1t%kTm-<pE5Kt1NY+?7KxCx
z+hd@I)q2s6fPX#b5_b(`uCcFSDiAxuoLgDy!15Q~g%RvP5*s7~BLOXzdP@4K`+c-l
z#(Jjl`t+YW2Pt{q4)2_Pg2eE~(C*4q(zxqk06Kwhcab|DR=w+9dyf0Cy0C*Alf=!g
z&Z(S1$Un3FNXniLh#I@+qM#9Re}eH)mtoAN7(fA;*g_vfW)CDWo=TDkBx!^o{XW|2
zgoq5Fij8(Xl*DpPCfplBzmLU}yno4X>@(SjY=8W0c(y*`8&X={Igqm-@kT8!@u#4u
z?4RAIw(A@WbnN5U9lMVL0=wXLr|&ygcyWS5fjyqzFAX#k>cEZcYS9$p`O~W)_Y2D0
z4d`l5At2By-P?BGLupu0Y=G-B76#@F3rnK?Ah5*-xVYT4kE+Xra<JPD3JZ|R(8jae
z!eV}p3;j06iI~s_Doe3|CKcnVNinoOwOCk02@L>NOtnjIG@4k*A38StwkHzT8}|B>
zOMwD1x}4PL2wqPv6FHZ~2n}F+Q=;58xwV!ksAOVcDLc48UxUIKcRB9$`jdKy{4kLW
zLqGC*pum#3>@mVez<Y?;DI{61_&YhA6c!O@J$~`b6;^tGRHhUOs$xOVE0$s2qynL1
zlQ~jqBd83;>)z0HyYTn5<CvzIo%7vowu&Y-sD%$H0~%Lm;0s@D@q7~&!NAq`5V+;R
z&^M^o*GF#G9!8#1WA$G^4Rm|>1MRKXUJ${aq27(eDvP(y>b;>~r>#OAHUSJ~^hGH4
zN&xMS0ZU$7&=TyAL#RNT1KuuY!5cdmq7);D9<BC_dIc*a9;wm$ZP<i@N;vSZ0YMvf
z0(?ow{nd~Z_sHvrelVqX@CfoKA}c3*#|PUxd&r=ocWCF*wk59*w%?q_OsigTZcRbq
zgn1;4BuyU&&KQTsfkFaL0Rce3COSlR1oi3c9Ll{WHW-J}s?XWX8Fy{c(=cjM8Wcg&
zHw%gVB5W7p9pet#&^0>_@&4h@J3A89x&T%{slVfNfSnX91qs0HZ14QIhrsp^->_Mn
zRh)5W+3TZ|ojo(mMq*ol?d%+EAHJcsP3<w4N-WlfS-p;Qs9%i6bvtk_H^V_&+J-oA
z!1MfUuf2V;^W(i?<uR<$RQ6==U>mOo?;IVTp6&g5e1g#~L1~HT@B~zoI;kC~#&82H
z?8gB+torlb$?5*l;k`yvkRvI}Sbj6vEoivHzbt8i<hv?%bixGL1R6~I+dkMkJ!P1J
zDiaZ^PBcw#Fk%xJ=>ND$xbZ@6slc&imo~-}#l`84A4!22#=|Aov4KzJZF#$cBBo$Z
zFafEo@k+90Yzs_OkHf%ZQytg{IfmxbIC2ZvhSh&XCWEciI~b|17mi@N-gbik6@c-e
z;|A|&2AY_0Zo`W0EJN*V!7(#m@DbeE(b2(A`)BQwqoXr^0xe)APE#-_46%fYlC<~h
z+1?>B!>lvT+ni|D8qkp2h-|e~LF5Kl?Ze4c;+^coqFi+=fdy{KDYB=vGPyLW0-_YH
zemn85CX|K|Mx3zNXfi)wm)-^NyPDs*T=51}v9rAs)_R#XS!WmmCKQtWD6t<aQx-Nf
zDGJ3CiPRy+wLivLSUUBf7D2mzWit}ASw`Aiz?8>}|K+K(xHJJ%jzt<vOaMGA#o@8l
z>Df8&yYm0j&abD812iH2&(d-$8UN2(T7Ay{@<)vSV=c2YyU%{KyLOjtM0US@+5P*a
z9}R4;-+%?_4<7=kFr1Gz#R?MZq+|O~y;Cyq*H|Lr&lbO{aP0<O5A+-?iBkH)?Pdn|
zS*%%GKQPy>$v^}nKlCE++C9NK+x^j{UCa3J%8fGLzp)1c+sy8vby7tN(mJrE7f9Zr
zP2#$^B~wNhl~PxLjZ*&@<Kj1jB~p~!(q5%vxF9;)w_d2qsyDKzmf+s*gdzzXFAcK-
z=tcxN1!)u2v=@8B?s{CX19a5Osb{m;1(H=S7rzh%#hRWTz4$g(0x7mBpjVpW{(ymG
z{JF*E#J=TtUqz?KJw^fKx~OZ^%WWYAJ*zjbge{&oX%a5o7oCvPbR`8hrzhF)OLaud
z5!qs5Y|~va#(J8*q#@^YCZ~n0-f$TruRUq=cx{c?4QI%92SXxQ>qhYp$>KPkxuvg?
zrHN>X;wCz;Z3-5ZsJ9;yT1O)q0+;=b)h)J`LN2c|A~~V%RbE{}IkU2=FT@nvm$hRt
zsL_*`lHt+ydjFSIPZBo2+D}>46ovJA7+&2h$lQu+|JHn8gGJ*vi~rMlFAEFl%mx*=
z8iy=jghCxJOt)GY)G`;GjI}0X3N>s3q%cMOgFH3JBFdYDm77+-8FimEDXca&c~MMl
z^urJFZ+LHrE8ZUtBOxUe2<!i|_wM~|+{VIi|MsWY<oq5VQleH1V6hkLa_ZP_lk+4_
z-`ed-nv+*Qid;#wDN-RRSxV~nv%fQg%VL*%qb|0a+HFK&F&GSh!C(fLfrEk(3Rh0J
z7|LVQqTmh+PACea)6})luyifc&+@kIO$OeZf%n#FnQ!nZCGD;~n`T!-ytZ+j(E$O%
zfpy+GFym-1|IX<9o3-zpzQ4`CtAbt)SC@3ZgcgyA9HYIB<_layu{}~fV`2S!IZgXs
zpjW{1Z`<`!uyWpTQL+k@Y%`R^elAX4fs;4ld{lAmlrj!<{{9}c<=;t*Q~V6du?Ke9
z<XoBg6ZHhp5;Z?iXp7z7hvd?L1f0Es4%0K7KJdPww+AZ{aeV)GT_T6X|J!$%+Py(L
z@Tf?K0bkSZ4ONAnM8tC|mJ`vUn9XsJt7a~+$QM;i^3Q*+ONq6QN>1c+?d}$u1m7GS
z{NDj>*^mf3I0y#nDI0a30_eD$O@6t=qiJem-1oM2J%fN2U$XPz;;lElxWKzov;0uo
zJHqhI7p8!JQVIX0<Nee5lD3*ICSWdDjI=a7`QqR$5>L=y90Yx0uE~M=`bB<w9p=A_
zi^cr9hcDC+bbnBqt#^5UKnGLob3R6g?e+V8uWugF)Pe}G>`;H^Zqp9D+WbmiP))Im
z^#@vxaBn?ZE{~vZXZfA6cXMbV?AO5cCJ?|tu_6}gMkP&_-(8N*Uf~N?+2S=|ad=bL
zj_IrV?=3&&$~&@pQI+?8(eqE;yz|Sw%{v>8VtT^~i7}#aNQVl+n6$(B(ffO_`5KKq
zWuR4<i2JT#1~nNd=WoRys`f21uNIe^=fzC7Em!5;tXQYKjf*Yhy-l&Vs;rw;7y7>5
zA@HH{*g@N)=!EL%YPcz{xFr1W<3Es!HEN46KeWUA<Hrs#Ii3cvAOF!_+>afK`|*c=
zbb$G>3rYX@p}n|&bSUm09a{1qwny5zfHE6lR^?gkUKU=SqMzXc5Pj*sICH(x%8auL
z1A@Qo_{Gj;Yo_pE&FrOicF6F4A?xiI^h#1xh=#^Bi^%=J&~!SSPJYddOBbt)VdNZ;
z{KD3>`87>G7%=@Xc5%#VHmJ!F)aP&wC0g^mxCAxq#%w6%^?gEh^+)sU<OEg91Gg=n
zJb8xSC!<L=TP<s9U99IvshqBro@|XZ==~HtxtRQX{nO6Hq!92;sXfJ`dBwbK&9*%t
z07&R>mDy!Wrw6_SB3UDmWHftd^PwKpz^1{Vk}DhV1{yEBXS{?jXa@4pkwB04{bjcL
zN4A*vT9CHFy?0&jyl^d?&MtV$o>7ii&lP<7mC5`G&jKtj!j@hv{TFnh(tZE$Pk(y-
zAn$*`KN+vR{}FuN|NAihKeW3D8c=k-(}bMOEBsE+fqmYS|M%VhH%xHf<Dc_@mG}RH
zKm?!1|L^ntpYL#`?5p|WG+W?_d{3>MdB^#eS+@H3c?oF!6aWmg!`xK!a$D{7q4%ip
zAU1qZ>gcnJ>z?x%O9`mw^<MXTsPOgryAb^A)0fYWzI*zEx8wN|?3|`l_5N%1i@tRS
z6dc+;hxkW^qUqeeW`F46(=X@qRn~JV<vuJ^m8sWrV*P9L;&<t*dh1)!-(0kMy?nz`
z32`&tig$AE2eI?@R@iCLR=vi!&Srmq%?3_{9}=Kj<jKkE2nJl%Gy90InfDmK9$%ge
z%qI??@RkC6Q~Myf+g2CXYQ7py$@|JduvZB&o}4tMU!b->9L<--n(i>K=*-pY;bpX=
zmG?FPRBwZQ9q%i`joZ=f?YDvJV;28a_Qr5DG8Rz&uRz!tjB>OcU9x&R-0yF_4YrW(
zUDHCQ@B*#^%?xQ~mgZ7EAexnlrUMp0<Mu($IHtUx-h%mt8#&qaMX#D|g27w1MOEl~
zCBllYMCLt+MWN5w>lK}W;XNq6l(n)YukjsfGZv{cetH}H44m4Y6_|s1e+1&{x>%Iq
zn>DH+p+wd0?kHO<@VV-@0ljklzbksY8u2<&Ak>L1e_5=0YlsU^k(Zmxl=3Bxo+mjL
zL+WncmKAu6bg#=%a5Q|Yfel#K1~&8ta|Sg;`TdV)&E+#3E#%sliq+gbz%q5Xc1+{&
z*q`A*D3608e-;O0vHz&hpWfog*e<dRQUD21{4Vd;vsn>&83Uo-@eHnh6^C^0;|#Ea
z;I~f*K2!d|@HsS^hf80*_j{$+G@J};tHJt_jlwf|^=rp#(EAlsnVl(jRaw7=VeqV|
zp!XY=RaM){(u%9o&u;+lWQ5Dl>*ad2E%f@(S%6Xw&(y8T$%s~-W^|cRF`y~fW(Lg;
z)EitXB}wM#-@RsNy?UL$QC9)$w8vT_Jb8j(I==$k1dArppwUr-hY%@BQ2|hh-J(1c
z=RIl`y`JVG@J<%lFJE1r*jFpa2e=u{h!6*~9UW?F`1ycTR?q0Hdb|}CS4|pd@aOhk
z=VawQ|Nh0Rm;d?NJH<l<bohm=latHo)I$e3z+SjIynIKOMkqhbFz>SeyuxD@Z!`6n
z;Ms6`vSYv%=_f1yRkp(0eK>{rUjxJ|gHP8Szn{Dt_e`Y+SRI{acHfOkq9BaWJ2)_c
z4ysL!d3ebB+sukXo!xugv$|QK5PaaPw2A*3mn}WokUm|^mrJ$W`p8oHYzp?=UF+ku
zSIA+Fd#(kZ6+!HeS-&J-@rgihJ{}|IE<Joe9$Det0;M;S<xTK()RM;FqiUAsc^d$5
zPkr+SrY+WLWT#eAk_mc~`WR-sDxDqp+YE&evL)W+TLG&tz3k)!=&UAN>{YU>L1SOP
zajPW?)#^S{1<rR{2T*zD@9J(g6>;b83E-Gm&DJX_<NCz_s5+^Dmn+hrG&u*PLg~m)
zlb<OKI*+RaMTGghgHG1+1pEEq(4&t%3%+9kNDI?ySZjxhO&PS*pLWF2S(#<cNOZdD
z12W+IniYw)*|0PVR?L%C+5wbm<-qQ4Ri?^mKF{|4(Z;j4x&Qb;)AxN$rWF(BWY##J
z!T>oo(`hlq8T@U7m&%}dR--7cAXUz{gLaU$c|#HF4OD^swIOC_0R=cvVBMLvFg~;(
zQ>gm&!<h;h+{1A0AfDHHC~Ota<vN9KojUa{W~z|PZOsvWROwdkz5DFE4zf+liYnn&
z5+iNWtujxmaNFIgBym9WEavZ)Dm0o;mlb65aj#aWQPEGHG|INJey{UrgGz%A6dj5N
zZ@Cx^;6FYXFIV`Vd1MH_;ql8>1m(Bhc6U|IPFcShqZ{beO|MQ}LAbrQ`FVi0N}-W>
ze)0X~%2|9cLQExKWqm-~ds|qdz?z%S2L&AOt0}|cx>3eVQ@2s3sQ}DX#Ocq~#3@X?
z2ax`FxEl8QR8JkG@>L5Jhb!-4*BdY=^LmclPFRd~FqiXMbJgfAX9MIrSdaE>?;maL
z7G7>rrohqhjdDFky)eyYW~8X$_`?ZIPJ87KOh2kT!viatQ8BM2Pl1k3P++kE{NedX
zQU^?&3BJvCEKHh&a5B>cA!g`1lW4!&_CfF+gsN|#@_`qqo<@n17ZH?^0>lm!)d^$a
z#18HuYf)5q)S|jW1lKg}?iQhYCF!OL9E5}9(8E%3)2bzhen!pB(xo70eXpFiqC#fH
zTDOO)PZOY7d)JViuyPr7MQcF2QM;9NCRc-Y!YjABw}XZ4))HG{E9~4B7Pvd^Ex8eP
z4ZaEft$4!{bcRROg(w@Uo&PjkC-y7i3}r{0q3n!P1|4x~z%Do&Mo9^2^B^f<ZR@o%
z?qa9WsmH<_LDn^YGt(3{>a3XTnnBlalOogr-ynw>xtow>M&zb>w&d+{sculcP~Uf*
z+qw!ql$*gTj_wZM*p-zTN>$g-|9yV?@ALV;mh*p=uLN~E5ZHMBkHv8$Wa<2$hywZf
z{NL~M{NL-d`T20^JsVyuCZlXRop}56#jn})k0*;PI~(Hr6-zuocj)@cS+4LchO@&>
z&(c*gEnqR#){jAGkLj1o3@@&F|DzqRqW66DS_58~*G^Se>M@Je9DTBn@%$fFKY!>8
zFEaZMQHl^SZFir|7uha83bL!8s(H1#yqJtlX;eKZXf>w`1|x-dbe6px(K$Qi*=>$5
zF>2p#%SYz;_P5_1z4-gnZ;zgR|F@T~k6wTOtrzYJd`@rWos4EjqqDbTdk0h-r(^H&
z^}vaoTxFwy-Xy2^o8f4nTxsCqr7??lxAcjP%i{&uhFCb1ph?A2yzF|2b1q(K*86aX
zLpyv~F0;j|$0Y6mdRV01>FF2J1tnh>)64;r^9we>znuOG`m5JXVJxpEBYk*9!{7YG
z<j)7cF+a?97(Viy&@cD7{+0SSrC*`H3-#SDmI#|3)34NcVqeg2>PH&C2oW0uLtmut
z&6XE#Mp8p!)q?VdBpxFxfqik{@qY2F3vQHolYJ%&ms#wV8#D}F@!4j3<?pygz@nYW
z&U%tiOFyLqBEvg8@Idd&fq3bB8gw6Sk{v)B)2><Xnw&8ZzFmm0pSP&+ZSVRgPGPZr
z>IGfYI5klf&F}Brt+K^aURxgd`9y}K2XbH-LlB?jyIv9RWapW7J}{Y1VP;Unj5xW3
zNV~2840%5NR0BFvEj6x}4~F%o=eKBjWO+O*Ro%vWlzxDJw(`MYTNnm*Z!!pQ(Qk+O
zm5EHR2r@#xTZ@FdaH0hF{!#unXY=>&BHS}2n3QnIedwGYFgfsM2O{?w10w0VH|r0G
z>w4!9xV})43yML{=65Oqm<T){%pA8fKTN#I#g$rhGh%F2c=$-YLEyf*aXDGhJ1WQ%
z#04fJZ5_~KNq_;$njcSwGf%x4s2PS%3#MA0FfAt}$swX4zl^8l^R=qbz*Dl<RuFPi
z1*tBMJTJksmUO<Bo)_R^*nlo>=z65Y!G*VTKmparyZWi5)!Cf<P}#0T5OioyT}K&h
z4Djjjo<@pMXiCjjz_Ki1Q4kF<81gidl7jF_gyA3ui$lqyl+uzgh-65?lrxz`4p^Fm
zEQAK$;eNom6fp(EC`sekf^k3Lv5cU;JHiiRArlOSekkIIaSImtK^R8_1|1)PZi|Rn
z;-@SLB2%G@QjKCi=3x}U1QMjG0KtJ^9R;#Pn1skafhmalf{7@R3It~w!Gbu^QHga5
zQiVY0fO#qsJYgEdOMEF<80#oBWRl1fQxm5QsHUV(IR8n!sl#<t`T`<Sx)~Cvv8qL)
zP_i_p65;_(pe#+&6r*&PS9NpV3V{#GKNBee2Y!&Ip^z-nkthzs*!C?YCjoq-WT?4d
z2ns=<F$`r6$bbbB7Saf^vpK7b63ec+@6e2|*|6HhhZ#erYik^VEEbl#)xtQ8k3Xv&
zK#v|}n=dL(J{}n-i6|If{%~LpE~@yCCVu!xAC3Hz#^fG-JX$+LsE&_P1AH^PxLhrt
zuAV7h^sn);?gJ=EHP7EkB(A0Yh}T`~@BUZd#7#8%-0ak`qCD~=fS)(dSwY94_upDt
zTmJyg#B(&E90Lfc$$$WKoA&zN6ARL-z}?UD^(x2#CFZ?@jr3w&9ESA^IBps0$3<&^
zetWN{5RC$8Q`NASK@1Xg;Uh1g3R2fqZTtaS-pnt}ENCu>(yJ26wGrt4qbaEB-V<bD
z<vRl3R2-@{(sqXJ?8hD&8m0rcysJT>3%0zYH|XMsa;bB;nw6*LF#8<op#jaJ8_q1J
zYKx=*)k9+XYoI+bU{W`~YKxrScC$#+Ryy@wV~;q8+Uu+l7(jQnNA75o6xHqGm3hM^
zO7XU!8!F&n4@`__#x=FKH#s~=`v0Y%pHvti_hVZWHBV<}8*4m1sp|Qn0lZPqyO!vu
z+_qhe2c_1xn-33dJyi7|@~^cZzQxhcv;{$jU=9R|)04TH&#m!b)c&HzkE?$xNA<s0
z%&#Wn%o}<Oe1g<FnXHU9k9+;kkQ;`xRd0r)s&BO1vGHe!uTPN8_^2e8%mj~h_|`n+
z`ki^lb-w_62w8hNtQGQ;IkeRD+@Gr2UiXXowQeD*LqYYR=SzB+-zfLxG(b8#$v`AW
zwaITVS(b{#z5gmL2QtvI&M}bklFkPVNM8fxT&7V8$#tQJNbOe!+qKpWF>As@7RUq}
zoCy)2eh#W>D56wW>RB*|uX7!%$SBR0rkYKWbL<#D!FmoL!_~=fwD<dCsPoAT*G02N
zmRf#To}xBa<ds;dTKiUA^wF5iEerT$xR~JLEpZt#4W6Yz3b*L*Ztr|&R<^P+6Ac_P
zuOR@s$+_-#Uq9|hH<@wS`upeKqlP%Y(hnTt<2{~v(;1hCr{|B{g%H55>a5p7blf$(
za2p1!K_2jdj`Jh}0}N(2Z-=6hjL@oea^w)o9t)Eo6%wO6)DvjySv#zNqy(Z7BypU?
z985z}DfJC@b=n`BWlJ|E-k8KRdnDhjr>-6z6I9?^x>5br{Nmf}1V-Be%&mQBio+Ua
z!i)pcM*2*Uek1$c*^Pa(8rvO5d^Yy}%X^}rDsdqZwG?=6N$;)Ndv<fYc+;x(7NC^c
zyAIn+2dS|YZlU+q=sjSg4(KmaKhEN05$BU9-{A9rbg%Pz?T)iv?00qn4i0AJ`<-P}
zZ<Q|f7Idi@{4^+0Kp1K{!Us)>`&-v|=aT;Jt-+zT75MAxuy6Y|88Ys$imbdYD=y&A
zc39s+0fy}|22yk1qMA$O?&#u(V3lv%bC+oAm7<Nb+ETyXrqht8@f>Hl)7*4C?RmeQ
zTABNe%Kb*=e&gmYD4cHxZ~7IzrPJsTtnhchM5^Et%?$>2hgGhg7dzenZ!2K#cU5k4
zrR8fJ<ZMlAnH{8MeWkEUe($QuPMdC0lv$2$f;E_o?LO?Q`Sc3pUjh6O9V^4vDiuYg
z_-Cpe?osd6h?Sngnqf#;VI37<ooP)82E`)fk)Y*Kx(L@@L3v~*Q5oYhwwf~`8YL{^
zi7?SPV6ot4(xnd}XPlOM4S<MJn>%F*OvyQT6v;T{CK(i|K!i3LvM6MnZK81#^L{eC
z`yrWE^T2pAJgrpnEyU$AdvUGwKwQPd3LcQx-t{3sKc7dyCqDv8irl!9A~)`=$ld??
z<sG%TJF0UBpOv|m6)qG)<KhZeI=D~S={{?4>&bt-nfx4YIQfqsZt~~BJ<GL?)*f7$
z<_a`NniT|@Bg7mD<_a%Ya@~I79&a{re;keTUYSmJ@5OL2JkM5m3UPLfPp>X7k8vWO
z!yJuIN3dqt3v^;w4SoOWz{|^X+&vynpHEIs9!@t1KjHw%SCBTD`Zt~X?@|4=dzTRC
zuy^S|)z1BQ+q^7I`*?t<;_xO>!U~5s@4nG_@TqKb+H<D|2kVDU-GiqG2hOR}ik-aT
z3C!Qy-e~h8QXY9T933s@?*?nCA$*)2mpM+i?X}ek#d^bQj27g2y^C6QT`gRhCft1`
zx^^7RwyKL7x~EIm+<seCS6;d5clT6H0{Pmn+fX$?{O5+<)RsL|9Qzd$erZ?i!A<q{
z9WYm@w(Wp9u#H=)RQb<as%p2(+iXu2+OC~%io2<Mze_eU8>w{B<nsI#KnzbmZ>!#6
zTh-O2(7{WGb6`2t1L;zg3n|Y1mhwDB)6}NQlGc^9s-!g~tte?dNvla(Gb(9Iq$O!3
zN$W^jMM4cJ3jh~`zo;W+L4}soI8hgtcbF`%@7#}qByk3AxT^B8?y7V-jWjvA#Z0QN
zH8A-xgrPgQ9AWqzedg;GKM*^PxT`*Il4Ci1@?@$SLVp<G3DC!g<^E<_y?7JV2s<qX
z^wov>bKCblt43DVMUfSDVcbR96}jj>k1BnfNw$Ws3X2vc#=%>Tw3gvcvK8#sh=XQn
zjp&0OcdBK^&j*@5jRQ@*^*|FZ9%zb-r<LON15I)9KvVkpK+_6X>D^>;x#F!VTp6%I
zi<6<%;;J;N^P}sZWcc%51HerV(4q$(x2>sZ4X~^$9d}lwWLQ+Bj2<P!%G<Bz=?{Dk
z5h^<QxlY-^=N)#ALDSi~?(g=FLCe`bM)l^p4=NBgft&SK>q5tEhl@J3g?pN)&=yt4
ziA1@r@`btEwU?Pd>E(LVKmB4`RYGT_=5077f4!A$DdhFX<#!B76C^L8TZ3&>Qw#iG
z9%@gX=plobQ$T*nAb1hr>=BqiEGZyNK$<|{C$Yo;SS$vH8yIe2G_b|PO(4wiL|iHu
zZeX~9(E&r3Fm&Z)Swb~PWWLy>XbmPuqd#!-AuJbQQ{;LIuWK5y4l8T%Z;(=3#k?MI
zvB7k^cVUAY%LXY}orKsM$k=B(D1a%X3NmO(>U?jI_yIM8H;9BEX;2O(tqwBk!$5(C
zu?0;GXk>#d@<U<;5A0WFu!1S8(?jA)QvkDAnZ}w>|0+}h5a=4DwlY9Vq`wNMihdaC
zbZGLZ{84aQDqu}@sj<v!5aa-XALhl%oY*Kf#7d1I%FzM>u7j~3YXXR=Ok*cZBZWa@
zG6@^R-Y{61Z7mjCfW(hOOBK&sTiA5dEJ@zPAq?-eW`p!N$IoK>)u7^uX;*rB1k?%r
zM%ue~@BGPX>dz+2m47<F+8s}>k~G-`U17XCn=W_JSLm+cOMiK`I-lmI(aQ-s4Nz%H
zJGkYdHHyV8?@yBj<XbInC`xt?sn3kmZ}pe&HCQ=os!j81Vj29dH#2T)sj#4V2{AF6
zVs6L$zr1N}dWjXx#>SAAU~~kaLGv_ZFSY|-M0Z_jfWS10VhXomL9Lb|ez6m<YNZyJ
zYp4(a<0%8x9biQrEGN7i<wOt%DZ=t18@$SZfyPyQAc+O=LOb$9u-9nt`il&V<iFTM
z0ZJuD^Cv$iaI%_DNyT^(ZGjd0dNG`=>_fs}23#yB)A_6_=k*3TKM)(m&0G}y8*mZ2
zGtp+w`DD2mvSF*0Q-Z5CaySUwRwIYC89D5}jGQ$^p0}+IhxLj&t5(!=%Z0S+9`kp!
zj?I4FQmT7DH(k6BZoBXn+l95+E`0MQaV&}I_NX^1den*vZmTI{ce|m)yvl4~T3?J~
zKO$ulG=-D`psFznq+lhGVk%OV#FNxlx+^GFlq6zM3`x%wq52X6LO}|(h}0LE3WYF&
z>=Xb!$*)jsE9ktjbb@*o#EL#YrUF2TNkc`SpVAke`Vkc%Ktd>LP#K^AkVgRtW(`$X
zf>A)G3+P>eq7QUfsvanaA(e$HQ$mDptPSbPR4GH>siYcRh~HTk0&(XL^paZB>{k|k
z&3aG?`lG2qY4Zf8_qT38gTaImO36f$0C$;*sLKq-eF0hU{2K6{nfhvHFc~lzg@Ppv
z3JE*uHegkmew9uA`hj{vHg`n$FWL{zU4Y@)jJ4kdaG`1ygBXmys8urnn0uP}>r@LY
zf#j(#Q-($#F07!i^XOn73QG!Mqhkn-RzV>&ITc*&(<cgn;G=ITNX)>55|x)kHa9^Q
zDLi2DMog+8+R-)`#)%203AaIw7VUQ)sbJ`blG|XQO9Ppo1|}$F9^{D$hH0vT5vZxm
z1VfRSph#j9l%a)9Of4WN0u!|4Krpb`(nx=WR*C`>WSpCzV9W-kG{GoJOfZgO6HIj5
z(^r`Xb68<ZOQijp#1;eS$SRolK^WO!B2ADRHb{V+*aSmvVWsKK1o|y7!6df9IMv8t
zcGq?Dg{}i6Dny`4q?!_FEup!L>0T0kl&TEUVupU<O!Hf)n-BDmFf?2gaLZ8CYl>gu
z#4<@>8WLD14lTb#IqO80bwt7}lO)2jPO9-nz{%1wNoaeUC6<d)W;-Y_-3DLdysu>5
zS3%xarZb_@6P5=f)22acIWCAS9|{rY<WRwO=o1@T{7j}c$SeWO)ErsU@>m+DdC>NH
z5aisH3d<JU2BRp=IV3i8i<sG<9yGYcW<osFB*HhH-49u!8H4L)1IFM+5J^oE4(>=Z
z29JzLiZmAj6Y4Pm%%ba#q1XzazzP&*2EOp6X%GR-#4OvSmTgkg>8Kl81`(#k;Hw$T
z5*XYH6i}-zSHyPUGhum!C%I(E$F=1_9PPFzm@{l+%PU+&c`&Z(A0FpjWyflen32gf
zjR90rn6BWd>57n<*+DYhDX}IR_W{hfjr81s%%$=sAR+@h6UbcV3mZ%#+t#751DBbB
zi}-D;vdGHuNZQdFhgJ%6W`kiUbAc>#R<}LC13MPDv|?HaI{@OG)mh$@p%Dnsz>#hC
zD75_&2li{KIhT>u<9XD=LW_|AxX8}GdIp2GwxNUx)54Kq?8LMiv}PC>eojpTaJ&FP
z!AH=X!pI<;H*<++@`biHu{pWr?MPULP7LdUJYl);6Jb7YTFDgVLL}fh#|OFWae6^m
z01;XM8Ryb5v?9?8Kje7ZvUbXcUSO$hWFahX$F}LYumD^OAA}8Z;iNSf1Tb0!w6*SN
z+90qpE#*0~$Yv9|#t5q`#L2~_{i@|Q1!Yc-mGNnu18C6=!B~$*2qwaklWH#D%nl^_
zssWg-JO&d70E6z)LG?bXQ8q`z=`<VTvc|;%;wQZ?@k>buem7a2dCIx?!T%W#Os4-c
z{@>Y`1NGf}M0aV%G4{a$>+8?k+f94!am(@)>d>`|F_74}lYF4oPzK)h^}+QP9-hGf
zgz9$UX(?u#mS3**zB2v_{JrjbySTOk@lYj8ogC@pO*ZTAcf~7iS@9~iV%3sY{NSzZ
z?BDC%`-Zlx-=}9j?Wfl-r(M^l)6V>L1AtHfl@N;Voi-lXQjqGWvXHQaO4&wbR83{~
z<-x)bSBrzzO5`wQ=t*BKpn0^ErsaAleJ$i9%qL22MWCK(wLS2q5@0b*(@OLvsjn4=
zl=*soN!79&hG25g0v5NP&AbL0icuI~PBAJQ=xovkjT#|gGB(;O*aBvvVZOHZc)(4u
z9Pnxk97Saw)H4Pyw`hA2guM*{GChxAP@64++afTi6(G=iO=$v%)H5;$wK6F}kn$#o
zYK;oUpqerp)GB6(c8=}=m@;%DgN@{CEf<U=U+Z8P)J0%Ww@w)1i2@yrO~-~IXf0~;
z2xIPRWju@{Qwy0R!=RqaB(jFkogW#zU^JUfm(c5ax`esi*IIYPrEPWGt5c!G*ZMxU
z{2?rX0;X$SCYlWj4K%mrx5#h@G*zh0L^S%fssu`gSxbQCF-m^uTN6!Ko`ay#JaUd@
zF(?C(o=};uH6@N3q9z&D3C&UwHw7cdXdMHCy3vHN)Pe!$bHfm5BI(|WO~pcvwzmoj
zX!b-myX8cQhdOj4z*ntRaVbnAapWUmgGyh01O|t%?RWtlXDAEp*b|}c4k4MZZL&}X
zCL0E|<`GKt&aQ*d>Z+ln1XWgdnH2d~X+~rux#8MKLX%jflv$0(bUq$W(5>X_l?Z4q
z-8n#MVtCNkYJ6n#q6CPI0MgB#L<UO=0xc;CAci`rSu{wktO;Y^ntafghI5!f#Zqnt
zEZ7ykR#bVA8u7?cA=iw-(K=B<5gX1%NugC;0YzwaP{g@-z{=>aK(8^1ah*yJ*d(;0
zKb3aCq6wurhgkswO0xw3X>Nj1U}_O@TQC|ju^FYQVOU?U8sYwmFtdU-h(HABRWyl4
zo9Pr?J*bCn)<l}|pW<4X?ts*`FO*{JlF;@&wz^%h3aqeC6WgeGC`oq!?gwcJBNIDx
zQKZIp;As*BvbU~X#Em!;F|$%TwOxo3QwzBek(u^oR!;HIaHzz=q+1=vinirtY&$FB
zB9|||w&i$e1Om<#h5$3BpsDqs#62u-xLM~&Eq^gwr!&GY(X#Ck?(dmp4H=oyi*uwk
zO#&mWIS|Jq8#JbgU}muKNZOe*N^Kv+%t}X;%8{;0Xl5d8_}ECB)W}Tad|fdS$3~$L
zP_@|1WvOK{5UQp*lGyYuGPcxB%<Y65;WDz8HX^Zt2iid|IYc-$NDW}?7Dd9aeguM2
z8-#I~m}xBr(#;a3v@?AK(jo|Lw#2{|Fz7yw!`MhtLKa&Lx}gw2w<H#!C;bFCNsFlj
z)P~dqV^jGAkm<3Cuo@YD)x4OXZli}GR!H1#lao}(sx8j1F`;sU)0-W{ci!P-xDmF)
z;1Bd0gy`TKqkzP3Ivlj^fEKOW=KaqC>_^-EWG%a&!JT*Aq~M`Qcmk}n9RwSk-wEhh
zKo)}>G8wCP-Js(@N~b)9|5(#mA3VPk;zFeW=Eu#4hO<6`@P?p*MParRMCzuBdJrMQ
zcY&UGCLMH8hFgkJHKq_SNj%G$reTl<tf^E<`;B2BK~a|y<c-=xtb~nJ%>@fVfnp$#
zc7nLmzOPT*%!sjN&e$rX_}yXnW{&^Nr^KgW;PQuJk2Myq-!|T%XKSKVh;=k;++EMx
zIM0U2-bn?XGUL~>z2WOP{;+&c2N5{&e$3bDFAs~_LE-69A%l!YL{)<p290~qT%Af8
zcRav1zWV{bmg9T>+K2blIu?A;%W*n}i<uwg-qA-myQlkcoXu!?F>i$WMMw+c?P9+A
z#YlDS2oLu40B$F5-(86A7P4-7x^J8>;C;ep`y49&)lc?~>reKLKarDt<Bd-CjUQaM
z70o(B%fY_*&L`8s;)7{f@<ix^;N3I^u&iSdrEw6Gs?m6!OyGNjF=!@Xn07wb7v|^s
z#3!J;h0XmS{zd!2IrTps&PKn^m+fc!%7C?MLp0!RY6EXp8~B5%4N=>sYJ=Lq!;({&
z3Z)@Gq^@*@ZIAZB^k?CI4((@YKCh`352{c^ZCj&l3Psf3Rt=W{N=?;pwK!K5y5!*w
za?u1RG;&@<-k2wJkjo}OsgZ|8<m-95D>vl<h}sRe7oG#>9~x}>qHq3f2fazK@;9b$
z6PLd63OHT?Co14X1zcDG7goT@3OHE-7gfMT6>xC{TwDQ{RKO(_aA^fxS^>u@`a@ki
zE`MVcaI6I9wX?#FWd-OaCc;AY0q*vN0{+E#Q%rrsOJ+%k-y=Nmr@x5^W~3zGj0cJN
z#<+n0g-I-8uD;QcpCpX|GEi64@XBu#r$QtW40WEUZ*=-Hj->>nHHg$ViRU#FAq9&=
z&|2vm?z^NS5)q5>Kq-C0oe7zQagu~W2Id<}W049leg)U~a4#reB8Y*!(0&7Y@nvP6
z211v?{1m<<sSG1Lc%;9@Ff|4t<55b>0rV|Qq5WhO1$fF%e-n6XF5p3g2gvj{CKASg
zkT42uUTE@=NxW1pQ}qqE<pdAXAPHqi!w}mFzR55ID_+%&V_T(h6s1rJp(bkj#<8{>
zY>b$xZ@1CX4ZBpS>EG(K8a+L_`}(xc>98MZJ2aYUsM0yr#`~yc#iYg2VF~=fG<V)|
zr?gFV=QTQ`b+O}LEV9vL39Zl*YAvteeWy~D#Dp*ZclKpobgu>u4fzm8OA^H~uE}xP
zS?R=s7D}q{*lMdaN~a?LS<@0YF8w8bgo6t~Mbi?v__;9Q@Fu(=(I90kQ~Z}S!OEWt
z6OW;{%LQw?UmergZ1VDff9QJ1hp=7voovYFJMy2|=g#btFU*4d61IDfDnK>3j!IjM
ztF+O!N*~I!YV#45)>{i{hxJ%}#1!>tGyrnFy}!wd?1yy)mEPVHRNAy4YkW7A8lvfF
zC@o@VtyWg1;hMn7z}i;e`2u-Gp6;eif%Va@NwotX>+#6?4&v6O6HlzOqujYsY@H=p
zpF(k9LmHpgQvQu8A5Zxhhd7CL7lVf;l#3W{F7x1-HFZ!z;w=~B!6sIz5xJewK7iDv
zpW7&LK;v8BtG73mPG$azHFz+OGK|(*8LiAUF18jWU9-kkfc?FCk^vr-k>ngLqmA}v
zV;}4a&P9xuLAINHfZ(atB#6<R<88BL1Z*i&be<G-_aqZbpX`5HnT<bzb=eLs#DDL-
zma7-v{rPJaw5k^cs3!m7QQMwx)V${#eJl-wH7rQ7sB!UDsTf;!Rz;WE6Xig$LcjQ^
z{v}y-d#!>s-d8C&kj(49*QSDKH;xVZN7k|rtldFo4PGOuAQyN&Pj`KzFcxT+Sl{t5
zdOVKZeC1KaqC#3!NQ(<;aUm@!q$P#4w2+n-(pa9x^VLHWV}&$UNaKYxZqvM$cDS+a
zfVVOdm5bgaN+RaOL?msD`DrAaSR`U6CSvZym<(+!_2W>-1hxt{B?#qIA+RxQcWGnN
zXJKq(p&#-*Cj5j4PD})O8b{9sI(94l>-_W#w7{y3ZfwK{Q@yy4`}_FMr)xNYdf>5c
z{w>Z0%?eln_955Hyhg9%N$Ays&~(njRD;}QN{`b}CQ`DP*_pY^l#aK#fyJXRZa#F$
zXqsjT6Jdn^M@DO!(giy1-~wT}-oov&;)G*5wA|o-2wkk@tODaL+8ATeo%OL#zw?|u
z^{?jFUw!|)RU<2cH5|ih)5spuQO?azkvW=_q74{2e^ayw@jee;i3oiZtG7O|*Py#~
zP~A$d_G;FJKI-|r==R6+qArU;ZTsSSA!sM-awhLqT2+ysQLQqpL)IgX8XS>0zmwyJ
zTTR2q^Xubz<$*I*{DYl0W3&R&ByUT@zIpO30<X=!AH(ZvsZ47Ps9EMAEPFGFc>A7U
z>a??9#nd;HirWmB%H<tT%CiX6+9Zji$cp%Ge$<#qf;5DzX}ce_Fy<l-6B%;7AJIOs
zGkuydmiOkP_g0MZ0X_3JYK2U;DTOS&vrhQ2*rB*|6L(s=iCgsqIS`^nQ4peW>Bc7C
z`_heUnf*n>qJ<D0mTtDy$@BGtc7+3YVci-22h|K@i)J81i)tX-8>iu*otm_&XZ$W_
z-rNIqVf%qP(jTfiqy9jh$+O;3w_V(cT`dIIx^PI&9>;PPTX-QR3bC-XC=}E#3dN<R
zo}hNAr@XjRT+S&j;D|yjEW~6X78PP~A(j+ksT1P`T6lpLUZ8~+XyFA~c!3sPpoJG`
z;RRZFffiAqMHFZe4lQ0wuiw~SPv`{<=Kx>g#sn|KA~&WN&MNXH^pZvSprDDd#64`7
zgoUGr%#FE_k)O!IVa7rlFU0inm_8nv6c0-#ZcG$noEMH-M($zEv~V;tPTW`&7mjTf
zVqpPN6l2Vd@vsmpl*bBbtdK9K^VSP@aoxgQTw^cDd$^y127^?$v+e#VZ(Y5MH(0%k
zH?toeL?bL_Iv-x6K95#I5Oh=+Z*grRjKU-hqcp~YoZXKg;bSx*P$oHJZ4WCjbQ70A
z8H~qm+jDrUGzz06W`Z=ypy><}LvMGn6p$lmItLY!qRHuUZ5l<~!rpcXc6<Bu2zmc#
z3fwep-e1L{s2B(}?zDzd140M@cXq1Fa4bE*2!rO^?~Y#l{pq(y&%XcL%hyM*zyG#u
z7AnIJu+{)~I-cj@nOyw4!_es@K7vAj`E9JM4|OCVd0?;Z|JK8&V0#5=7uzd`s<pks
z0b`A}7rrvVn)l7wLm6LT+rBw#*fC~pFA^499PTwfhjM+reNCo8f7gWh&*UNSqur_w
z+jiSoi)|J@P}^1R$rszKewo*4Y3*;Z>Fq|f?Ar*Z_TbmL5s+Wo0-8$_wHRu1Z)Ec2
zgY?N0HP}e|5B1VBqowB;Fb$BUVs7cUiY+TB{J3PQ^?4!k3siBgsdCcGO_oZF#3ky+
z(!|gOw^M8yy0KVDCl(bfi?CRBoX0|X@z2BrJ>QqdxKjh}3+cwf$gRO`wCf<P(XNBE
zM!OF0zAcXX2qHzdZ+I+Cq-}b&BkEiA>KwL3ufC18doRnQVH&ipkBT_D^8%?rSN{QU
zmPn*^=hH<~2z-`;$r{H&l9s)gpqIoXh*A;Cq~0eC(^wdfLM~)1D$p>VaKV#Uw7HgH
zFaV?o(iEL!l+j>XB$1$cG(w>f5uiz>h~+vONQ*d#3Uz0!lX}YOKqYc9`f1dWe`U?|
zuKWCV=C_31ncH|I@62jY;@$a-^HwIqGY9t2K{lIZtCeg&c2ELrItwaW9RSG|4?r^f
zXvaZilWrzE+~)T#=I_)?+N7s}nX*eKB0sSMp?7(4oHai=j)x0s8sS!_J+%Uu5sKX$
zZNtukzGyO?pE}?wKmNJZL50!?pcA0x(dZboteD={ptan9%Oq0aP;#j~=J+vV^ezj%
z?7^ZqCLcYxN*psORTu;YT@|5~WeL-U{vO7hMUe`_#3mA*H;N=zE`IQEB-rOng%eQp
zQm(@(iXs&Tt5$I3!U&f%QhL)tMUq4^6JcRUuT!W<8Vd`?!#Ij<B$YBU5fL)|C^$xv
zFtw48M+vu)C^i>#@X2)<g*FoM#1<;!BuY&rVnSHj!0L}JIZ>Q4TTaYCjM_*Tq<JI`
zLR(G@?O`cMc$C`uBr*&v>Ldw5o00|*v*h6QYFlk+g4mXm#;NW;d|(OMm)QuHabP21
zoF+B`1jIIyGG@UTV>uW@Z!9(<d6=gFZwMO!eZ>~aSs?NVPZC=WbYox%1$8Me2PQ4k
zrw~cg#1<-;?M8?QA@fLxKJQE<it;Lmga>&9Z>yU~i1+Vp1WL6Cfc3b|6UsQXm4i``
zw?T+YIwk_<Nn%k;7UnGq6KiN|3M>{_s-={|R$E31y?HixL=U{PybWX;hBi+W#7vJL
z^b4CtmYhg9HH{#PZTLt=+?Eqz8`=o4Vx9sr!Il$4$JuJfER+^o42)!Hi$#=J1aTNA
zwx+R6;yfivrEMzsoKp~|L2S!O0>&)rgt45(63%k&P4Klp8wpv?6bbr%w2_GAg@TCV
zwo8EIoKOhm%?qL`=g|}fuuVys(~T*QBTHK<qP(Ul@{5gt=(Xgek;sQ;ipMK#1Zv|%
z(!8cXjTz{;;TgrDjWB8C4!#;73~l&26N~daA~n(l{b?j-V4%O`h~+8dK*L00^rT@T
zNf59+5+`{Ibg9h)<DBOaW+fj+aEs7Hpf<urgm6+oTCzNna7!C>qRb<a710oZhH@ft
zULWWy2SEy=D~}|3goC)YU{Go1MA95Jhw0QdJWP;LkVjGnTPRc83taLjPoZm2CKAcm
z_8gBn$sMnKy1&V%`yF)XCqROd18s~E9f654^dZ7j41Hh&Bc~>o0F`-K4BeKefiMwy
zn$%RQ7)ZM$PYcmGicJ%dWO*72jyw%o%PE1Q0~MPFjOwH@kuv4enqmQn0*f<<P*mBN
zT5d8ioMSX3zBkFZ!2+?6bCZvw(j!cp;Y%4Hhw>N<fgDQ`GFt*Z<`u^|mN<p}vyh-g
zi5xOki)1Dygqx2=VV)LgB|yjG#DRnf%!R}iG+Q3nda)B@+^K_v`7N)zOwi|yjYXi(
z<T03t97vcLoiyNGCkC=9kHI|a;0%*k=2$@1IkbQ#80B<^=tj0K#-k*!ZHUHb9t+&k
zMarE%g1OVd0<*r;O1SuvV-aEE&?!JBI*>eZ+8SESX+eRykcH%^V+46DaN3jOq?$v<
z!YLiJ8|gqwmrhWFsI4EVm`I|$1(9<dEFdRx$~az2%CQ8(X+hN4ax5%ysb-0AdIbif
zQvw48z{wZ6b%5fW7G#bLA<-S=^<#oLlz~R>)ScAHJdM;#8w;dEX8?9Wo)0udhfXjQ
zoE8jFJ<HQ@CEC^jCQKI#YS4K;897t~CkZDF-40sHpm7}@46s>pNFF=mA>gho2yo`h
zA<<t`4jF__AK?^h(s0t377OVE(v6AOiGf&hW8B4pF07rHu(Z%pe(c7WizSSh8w*@o
zL>xN#L_We%C6b8)$@6&yb*Gp)kc<f@7Ubg(V_fF>5~xI;mZUIn^Rxtp9Lr;Fn<c2s
z=V_?)=V|DJGRFebiUS#Oht9;A|1cK0CCI?(l_U&fH^veNi%8r$2;uY*s4}{QM-=6;
z(`BIYx@5D&>9T}zm+T;Q8wM-{Cm*^Qv#}VhDVE2iQ;AqBo+hS+@~!SWXyv8y#)D3)
zdtBn~w-0x?x@xAqvL=n!AIb}mEf*k4;2&|TrO}_4Y+3Y<!|=}MF<^*=cpWwt8?Acp
zpn?Iy2&6^Wcx8?KtWNQft3UKsx83~E1fGioy&sjH+@(^>cE%1<9+^X_SVexq8yP$h
zEjt-0|9G8>&-I|c!DM|WVy(v;`kQ=4!~x7tYnvjQ@B%K8feDv@)J)e9RWM3^z-D!J
zxqva>x*<`5tKE-?2O@4eWFz8+?TDkBJpHcQ5g+Zv*jqmsD;i!h6>;-JrXTpV*rShf
z7O3s5SP|d;sn~mI;Li`melicmmSh&4IRe?Dmz2<2!c{2{*J;8`h}#h3I;7Xac4#HU
zgaVUF%C!m!p+lj?CnEDraFqj>&Y2E{YTXhaj8S`27-9xauU+oo_66-k?SPnPQioy|
z>X3{~stiqPsPhIeNu@f(oDflkCv5{2U@TPu5yvW^S9Jq4rC#W(IO=f@0G28sW3B>8
zNL4~hGFAaz>BDWkM{aD7B)A7fbxQCWg4(x>aWz4QSW3m>yC3S^3<yO*qC<(=4NL>|
zRyu^}K_3c9+-OoeMgeq~sz!i2LCQ5<fI9<J6#@BUPOvhHeJMiar%vK#47sPn1vrH)
zmb8_q%E!ABYP%3IB&vae)V4Yc66`%i1wQQ>g{oOuocKbj9S$Dh6T~_k3*1*x;RFx3
zDLjJXt9dG~koXp(&@ZK+uWtQn3J1+bwG6`tclGWi3q#=RNc9=070MwVg%jXtsbONU
zJT<(G{fOW4*%95>RW{LUl~i-^>^`2`Bd6H(H&lP{M0q?w7stT$UX$8e)`*%dq!M)J
zJL2K3?})eO{6Gr2XmMUEqI-CGgU3O_MH+8(^%pl)6DCqJ>03-LbR~{Uz8tsyTRe(@
z91&b3B1+=8=}NI6FLN?v5$;Q-tVw+je^$o|cq#*)L}AmJeM#s0Q<#Q$B5^WmfQ`tU
z1jv}Du~5#+yWMgZG<YQb#XsI4mUu4%FK6H=3e_DA_;zo%;C~<EfwznYo_rn;pWJvT
zI)E0P96*aMS50K=p}dwec^~bjiEKKF)_Ne%Qz!DO-9YP;Y2rg4&uhIwBJXg6MBdpA
zw1$Ot%kUbBX{37V(6sPC<!NvkP3c?ud|oLVl+$pgZ>NxzZvS`9FYK8>g+u+t(}CoD
zdaIuN&D>Fo;^Dw`M@FG`W)x|MMv0HYWuV?;X5=l1E}jVTmqmAheXpgYo4lk5^nZHt
zJV6(Tc*&dIhUbCuyCw0`1CJ;!lE_bDeC8Wfe0+!yU!uocxOs$6PJnR2c$$Jir`{(_
z@mItYk6G-8>~2n}MV(V>@nBa#^rdQZv%1+gQ&9r9JEj(&lxL<7rDen|T1IepKYlRn
z24RXPGdIyPB7D3jWReb^wL0A4ag1-~1YEY=JcR)OMr#Us22b1Gj=+_al!u8(WZL<O
zkXUV>Y2Qxd&Eq~*TX}1ylb;IHr4Q|Z{rAOk?v)RyLAID?VXJ0Y25h+UA=;En(X3nw
z`LXVMh$c5l?aJjYs!`enIIeeYkqd0ntNLzO-C9t$S3j|vAVS{$B8X^IHIErLk2~X^
zMwj4?jR%ODZjN}n-a39)=j=5gMx7Bl(A+R#yj}ryv1D7e`-_>nD~#pwMP8@0CKtjC
zKnuZ=ewd0e72_(#RZOUu&@p-?Efk50r4$Pp#X_!Pp^8Zri&ZS4m{iClQ!$}pp^8N+
z7OPlFv520t3L~y!Ld6s<u|gKBSfXNyilr)+QVh=k1ntdpW1B}k;4EWl-f{*lRR?jS
z+q?zL0<ME$%3>W%Vx|FD9I`|O(JzGlO3(eNAlT&^1H*HAD#%4FG=>l#`xEMY3p!}0
z?(#*@wuF}lVAwMP;3;H`BHGb_<cQN(5v2*xrh-u-MMyywL@|#whJ?vb?TAFs$rKDC
z7R8YY3i6h&h)lyk;SF#VUocgR7_#Zg)04iY9THRH0FTVu<_m43$|N#D77882!%er+
z`b~dU>HIHr+QYp?W0Zc`;tG9jc5K%A%itgFMVg0p|AKpiLMBNZCb9L2-EFB)N)SgJ
z?8T({q!Dl=s5dN@VUs2VvIqtQ!viE?XAkHk&ZMsd+$Zo7O^eAC{f;{eR8c$hk6zAa
z-%e)PaPj@c3R|1Hq1V?6C{X+wpqAd{axy!m-pa0r=NHp#>0K=5SCerz_Kt77-lz|~
zbuk>hrEJfhe*5jwtG|8q`o*i)yIV(WYqy7mJv)0l&OoWunnVrMx90%FIziO|aSinK
z&GF(5>jc#WXxs_axDH5I(m^b<PEhSY<H-r@1VuomQ(u1b>NWNMa^R82FK>S00no{0
zbU9t!!1(*}TU>_wvI=de(UP~2$#OAd!#WCF&}V1wCX34z-;|K2S73~sW|gE!2mjZ2
zXseEpXpQFDZi-z{ay3{3m^gs;IbN!qK4jLdeqT>c&uUsabxKe##UXl)y*x0265T=V
z5MmCb_P*YLmlyE{X)Rvi;x%WS>wsO>Fq{8%u(H$+q88HaXRFfmmC(Z(YRh{zoZ$(`
z%Vl<QIYn#A8)wItr>7v_eBTG8<|Lq+M#%Zy6A#b*nFD|HT~8!L#(EC;`^L#v%DbFT
zfv5FEWFl{BkyRD%peW?R0DnM$zexn)eZfx&96yvGSQ&zqA$S>rmmx$MLKGorPCSPB
zSwBGQ0&Ch!Af2<D8Ml!27QJ9q0X^xj6Ht6Z7inOkNJ9Z){mtiV{6k-|*?78F^ICLT
zkrBJGHm`zUtq{BwLX;suUwib2hyRyNPP5sL`WwYrl7^#bG)_)-FK5fki;MYUm5q0C
z9PhroJkJ)B(Qx`4gr+~bxaeF4T1sFC{z1R*TmF?`jQ@d=f1wbJEB=Q7z3hrVc)<s#
z{~s=wtKq`)?u@5gxqp#=s8^m%mfp!^ntAYhF<h*0W_*DhG4O`o6eh*hyA1z&&$hQ=
z3I(NexXK{mcrgUq0x+QLo|5kB4X39WeD~oqraYTp+$<<>?^)l2ZcU-M`T1}O(jP|n
zD4R|v-u`^?Yc~Dk$s)_nhB#F${rTeb@X@3Hn9N4g%W>xI!@!u#{WJI5m*>OD%s>0m
zfpU44Do|wFLz<|NtUqTn{A>)`N2XHytDB2#HayR~qgRk=xEPSB{ql!{=Qr?qGScxM
zP5jWTB<pox($*2lI6E;oJoSTvKAT;h>-E8JW?*V9pRS&P%>`}$dUB4-e=mO+Q0m{&
zqFPV=5fbT*AX9`||J>KaR^S}4Lsisk$WfoOqCCt=pEu80LC2x@-^0=Tcru(Bbb1-%
z{AfbZ0|=2FxzvfeMM>Tppk4*$ex9>eL5yCEH`vt5i7F7oDr2Dx_2Z&7K!nLIzYuU=
zFV6gCY+-|Dk5)hPBB~&DeeqdsTt(8Xbbiyi9g%~mRibLwQ+6MsWhdc!Z6`sIT3Ssv
z$Ar%>*Z}`>`YZOz0Co=^Oz&)moG>Qz%YCkYrT$InSLp9TeJ`vz3kws|uhh6=U(j#r
zN4g9_FDojq>FIlI*ywe7PxU(xg-i9`zU`CJrbcn;9%~9&CfjVU{2dqbS>){ivK?<d
zS>Ou!<nSQr|CfS(LdfcFd*BWTp6b6=4{;#qF0C$XQRZ#ZATTWY{f!=JAC6X+!|74Q
z-pTda-pM9?PJhF_PriwfO0`*0)s}lE9e=ipH7akc?6CSR6)cyq@7ChrE}SUAHExqQ
zD@4%B?}oUlp{0Pc@N4#^Y@cGWOb*t*YmO)-J0i!f@lpcm&FBtGy|E#b_tSjUV37^M
zke+28eb9ORob7<k@)E6OThM*;^nwOlQin}}YWb@$ty5Cr%YI&;Ll350{8m_r_RLpb
z(O64dnJP`l+7chYm1<g8+Z$!G)nYjPSMIsncEI47s{1A#ajP~i5MW!{U6$w-M^VMX
zMX%UfRKy>C*Z;cPe?zbYX4&|PSXXRf0JhkFp@_q>{l|m&v;Ft`wEzA*%%<L-FL71!
zcz8MX_D}uOp*H{glf|LVNmRX>UoJ)&U;*pyPxHk&Qn<W4ev{$lXtigx$`<EKq!=UL
z&3~Otr$fyD7f@Wl!uu9%%-J&Yu6(=z=X>9rK%GWFmCf>(Z@qV5kIv>RZ#)4Nd~$pV
zJdY^oy79`i&cOR|e(8;dGjD#p0@IU(=Wt~<e$Q3WXu0kJ&Rr;N*Wc7!eX7QonjQpL
zyJ(9pFNV+t1OVDog{}H}V1v&=1YTw1!$)eZ`)H8?D114}77Nh*l@?emW&ib|_jowa
z)`^;Kz8WsGeS^z;e5|CY)=sJr5B<?RJ2?S$&$kS6mJP@I;t)6Cj+L-}Uq!vLBM+4%
zFA5ng#kCo|Bu#M!9X=aQpFCL%XXE*~HbW29i8}+yTOq5BS%F6HU$c{DlUEGHA$}6V
zg(<bFF3tPXaA|FCjTu^!^>p~jlfM#mFxd>e1+V@rsy@4F<|3@lC@VfAA%-aLg!L8-
zAWwL2!SPr076O06-Xg?b*;}aXa;mR)&UzFm;j7y9JYj=H&Dp$@aIgr~JYSKvjXA^Z
z`pi27uu~5J7<Nv$Lb9{qwO|J)0)Y+|qPD1m6NzOV=vJr!Q6wW&J=qAfvt(OH7=mHA
zmNo?5U2+#*M;E-d#m@ma9Khd2SVyc6ip^|!F<)lA{?<7XUHVero!lSKSG^P9j)e*?
zv^81<I$F$^O9QMTP2e-NS-&u$I^Iy}AS8ax#yPx+*W`k1(3<^XR$zDRs&am+yIYKs
z7K^nn6|@cVu2H&YKFT7HM|LFsBU{XSTKUXDVgNC*BFuVG(cr{*N+8t4==H$)U?TXP
z11%?eybr9>kPilr;v~K9MP?9Ak+W0yn+?ys<&q8H51sjgFxng^Ohq_G6j}OFDgf|l
zEB)XAl6|wQUeMwzCjfiWGGARdg&F7~8?FjxE(@xBX)f76na+nRkp7;o(eGWezjyur
zay1`sy7kjR|3^1<tfc?rp})`i|7ZRG-%kIBF{XB^8WiOh-~9Q-%cJMtzkGf4&C74U
zdHKSqy(jaz*6`Jb$KJJ3>&=H7B~aBOPHtR)VAsoETF1ZMR<eFF>)nvNRE0i$_1)34
z@4x%*#mm>dFVAm|v(w4!OLL+QFAkEBrJa>N9!oo)gXqAX!Zfv(JkV>({oJ6q-r~5#
z>uu{B5l17K2d%nJ_Yt?R2fUB3QS+_kFN;<0X6t4<z{>-nx8)&~!S*#JLv%D>_H-(q
zrq^>F9;)weU2k1)n>$#xs(kBa>w0f@m*}8+W0_bYeC$E$?lp$+s=^kpZH#O-{!-0m
z>s_aef~u}7fjoJ#)|V>bv3JogY%bPi%r_f#{dyNW3dFVqDEvCe{C>Z$buMVV-i1G0
zEQU8dyaiyxeSdkW(l1EwyQr(>rhBG%pb_`2*yml-j=QD}d!~?N+w-3GOmlp9-8Frf
zJyXxCSPQ{Teo`m%4NtG<u5IkUuIX_SmlmeFZ@0Xot9+;IH)n;uHp^oVoo;R&UFmi0
zE8OHeAW3ztuH@@&KN~aGcdmY-_wI!Lm_y>btj;(4?&V3(wD2J>-i4w<w-0R2{Mi>O
z1XA*E?+r533G2vs6<zA5h{JOz@*Y3-$|yCmt-O5d*P0HrLz2464zn%qYET#`&`WR1
zz0}*nnY@pi3|D500qO{Vg}S+D9(!-7=ib=O<Tt%QZ3l*Kx8>KZ-t?GC7q(-ZoXRzv
zZ6gKFa&SV2e3AFB&0lOv(|Gx2D&nV2M##J8d}ZpZ-MxHo@%7gG!bQi66@edK=wSm@
zq`wI9zLNNAK3FSz>!84=wlw>_kih#b9JvADd&|OV2NqPe6FujD{q)t*-=F^Vo2Spe
zd3N;r1t@w?!PtDXyG6p%vs-cSA1ariFf-<zfSr1|$d*1{!m#@Dx?-oNMO?T%CC+yb
z71hlo9`5gzW~&xhXMR$<dRwaETi!Q#q2N55sXHL3jxAP8?<`wnz6RMfXA6oX7W7W<
z6jWWD9F8#1Q?dLgN@dQq?~lEc9L0%$RKQXwjgoX#Dq8n7o?NgIt`33K2$<K*drXWv
z07!3Zez~G>p}6s?`ltkCc#QXDbS{TKfjhpj)g!wFc?J37>2u#SDgK+Qsevd^(m(}?
zQOD@eQP(xk!14?P<p8`lz3Y8Ho8EY{EW<k@hBb)>U3Pn`xe;-jCVUM;?v0kM|B4N2
zAyAf?iqxHIfyzFnslxy|`Hislf`VLj5=f#}5oaX2${JK8P}Wi|Bh5&Ztt(78+zwS|
z>Yec;L-o<<;({^VuLy@@vLuWOc%D45qNTzfY0p&DoNM8&tgDlH7T(y?W>(*)+CF*W
z&hqOjwwL3jN2YDe?s~Xi=I=_%^ImR?5Q>|DceBV&)aOFUZwB5QN?8|b<5krbmO0&L
z6d26yFSC`qTzpgUjm}m)#uWH@Lq9O;?4b^I<Rx1yaLM*dO|PQuiT5Ra{NLG^dsWak
z?*4&>tc3-9Eg)}7AOut0iykgWzUiIlDykS&6YJPmLD1`ZSCgS<mNfhKS<5$T^slY0
z>lUtC-`8wGX7+z%GoWz8fm=UhD@PIXc<Gzv^b_AGZvDL~EDoCktWoRws)Fv9%G|}#
z;D8UDa#d^8B;H~msB!REH?h8J)v3Fyq2ClB6gg|Olt0{tryq*iyYMG1)qel{dv7>h
z&h@2Qbw<k@<M{zPB(VI#QF}qC9l9GndJG;d)smFrR<&ZqPrM7@O%usCoeTT!x+Rt1
z+&937pbH?_)92psx_9AU1K;F`{x+Mee$ZTr5H|pEvj*{F4gu;rP;xS#?>~Kh*c)CC
zhBwvwfT-saqvE*-T*XsG6MDVdb$@N9_O|Z5%}Un8)8_+D>skb+Kup*hVrdsXE!kqz
zP&q@YwC~iW*DnyI+h4=pw8H+Vr@TT>7#g5IF1mI`zqoi=-?Ubis>UsBE%w$Wu3anB
zb2PLVhsNl8ii2*EUEC$RxLfJsPNkC<B%Qm}bZ$kjwa_&K(H6!6Q<j=5afG$^?^(87
z=H1&kFn$=<4-DG3dGf?8b(cm-BU)fwKwFKs32|ZkJ@>AA>ezuks$YFd$|{b7n!`~p
zfM^2<TXa(ohzj~<<Q*)T`ox@1TsxA0Y0<lc0q!YLwzT$XE)Tr%WC4j2&}qCL?oUpN
zg3|2dkxroT^bHLKv4XDQ@B|0XX)LA}Ch7Hb_@GTTs!Stp)`Gj74j&GfZWML2!K<pP
zX`ab!mMuzrRciMdk@~|}iBwu;D|bwL9@cky4k^(C$vY?BMQi+5sHS^rV|+~>vc}i@
zoPJy<Srv^85M??FgitHyPOvJPasv$S)vWT|Lk|GBk@)m^Zz=j)NSC+mVJyTLt2<qE
zQ{8ls9h9iGZw9z}x~jv)DsZt|;tFVlZ|PeO%ifZ0EyebdZ$Ujbr)n8)hH0%m3Lge~
z`xX1a#kQrDfpHHh7XvI+bS#6dmC>n?kO~R=M(ud+{fdkCKr%5yWzoO3R?ilJn*rUB
zuzDo8*PzrH<7Dpg^oViJdun`{Sw&AJrAbJC)#p|16jyxjX>`So1o^uR@<dR=MrRZE
zJ1DetP67{sfVdvhX^MCA#oK{*oQ;N;%gnA6ZtJbQC7^kQXN+m5<#6D=JDZH~xJ5R@
zLl%hO7(Eirt};9zfpW#kaOe^<&Svv-AVv|jt*LglzzM)`aYcg(-C*ii1aZf?T0*-;
z6W+EB*Q!I`RpXuvCsXwAquX*)T&9Ql56Ja~E7W>cbl!rH(QZPal|5Ybuxa!11ftgs
z{+bP^dW)v7%Eyy1cq3{8bHD^oL1H_dAGs4#PxT)#!4R7LBwJ*&5l&NzF~B?^T>k-$
zi9DiILe%(<Uj60ivlnjtapeo^Q-<++s%3Un%h9VheW$%KD{sx)Q_a4m`=#%oG3VP2
zr{GmgUSJ!#H+8ze8E>u*!qbGjoS!S_S(B;ik;!s-32j0Lh?W=Gh}vwZR2b+j)!JW8
zX2ZqJ_ZP~CtkO{xLq>sSNTm;kWAACM^Fi#2jMA+uLz6Zk*GW(m%r4Kf>lN_y@VdzO
z5i3cG5`U}W`M9%P=o*Gs1*V3w|2{;|xmWlQ<ukl2pfsTzVluN3lbHiyw*Co&(>E$Z
z{jiS0=GqxKT9kZ1$uf`SHT4ATon5h}L}b6-5@-`X?qF(Y<0Xwd3PAbTt2RsSaNzZb
z^(t3X_L)8OTFStCmCFhB8APwfNUqNk_NwhJCkLru`P8+S$AlZhLE*%hZo|(<ct7y@
zU(Hq<90P8<{})M7zW*2U@bmfK-{JXRCcI~7i^*~|zc};02F>nQ^dai)kIq)V{t>-F
zEx?SNy!9_<lbz!Q>NueNem)Ycjsz>WY2VCp0=%W(E!2t_>KDU>+T7QB{iFE;S5Iaj
zc74pQ7M{mr?|iNSmtQ=>J@fh#$7+*_l#HX{YB=MiMYGZ&6Gn3(-5z8Oi(m@COulMu
z_qtbmYj21AFXOyA(721M_(k<SyS{Ml11_{<?Rw;lUIDb2PzliM0oPF82#pJ{aIcHI
z0jez)Tg!pB*c#Q|oGL6rUpHIr2wO7uu8SXxs#+jmJ0KFagAU8!_d&t?j@)tvX^Z1I
zj5_5G02#FB9zi#j_}O0Z6SQ{u6V&x=@1D6%#c0FC@RYk<f?Diws&{bKnJ#BAB=_h7
z_fakNDwo4BxGsxzz;k!CP0#XvvX$1D8uAnu8j)Df9`Fw6wO~WwmLaw8?x%3Y>&>L`
z5^0p7g*0A8nxViUK&S!MOn}G<$WhdRR+L*owX$xXk@-YA{m`C0_8*l_WGu<s?$N~@
z6?1!qj$!laFnW~1!>eS);L4$sV^Aftx^qQ+4+nmd(t~U=jnK7wm`jL`-P*`KG74gr
zL~+8?RHQlXjy={yw-Bie11V!JBPN4>olR8S8NRJ)<V;_stAOiPkqc|LHR>#)dP&z@
zPS}{WRP2LYSM09;kFKv7J3_bbtI_;&w(4#GwCMkFB!aU3FB$)=|NkEK|HylOK3Oep
zyuaUEzMWrr`{OGL{PA=?Kb>a&XnuZJlkux;HlKhvUS$*9``SOIxW80!9})cV6jbV|
zLGpXD84ByBe|z=f==Ilseev}9t43==SrE^D_<^zA*Jq1tIR49gK0Ty0!@p0Kc;IBY
zx*SjD-gurZXaBj%SC(-V7c`+GJlSic5>6(#rn_1qg7rmc%mCoH1U$vUZlKoq3sdJw
z_6UWxLYa|&rP-<nGKQ{b<7V=fhgXI0ya27YQ$3%3|J`4XzI*leXGgDI|Mi<^ukF>l
zzrBPec&?5UeD&>%-qKwtRAk_Bfax+TeunBS3kL=C6HNJ}b39yv9L}Yjo^`$(UcZ|B
znyDqj{TG=043nQc`Jdqgc;^s0e?qT~sl6c`Gn>NdW1h{+Mm-HuN-+o@d+%U^(=hsO
zLN@YW&R3I@o8HL~s@B)v{<6pruJ`y|zX-KjTw0)K^K%$?|5Jop&-rW!Jv`09)cW$D
zhHo?P7|6o|<kT$LVh36ZV!vEwmznS0==*>03r)hLa1B@U^T~*gVW4Joq<kMc4__f)
zyroRo!np4d*xPLMnQpUT)}Fab_W!wQzpcF;fv`SIz3At`1CvOvFaqnta0Zpds0jL^
z87?{79BT6?1LOGSXubgTg*@F{XJd1K5Ir64?d2QTy6`{0z+$PmV#2pNA+Mr^z@y|;
z{(+ln_?7y&1bT#`4McQylwFU`hO^V`=v_8^t4csr+h_;X*P&nbG>9y+=UjKO?KHpm
z6q_lyTkGuHQ|r7qzgXQ6>*Wuiv^EUUzo)Mh<*aX6`N@;K?&{@&r?_)-3{xG*){Dh_
zd^rMH;4MLTdFR=3IXqQ&tA`#Q#Q+#Yicav%BOxY%rf3|(G+39u_X6N>@#hTYoym%X
zKTTYt#c+8xaPW-hI4O)lhEM0X!!)A_EvM=jKZ6v21n+WYVoK^S@kGez%wLzW=sRS&
zTo<yAJb0~9XoFf|eDdUMxEQ~~4hGQC<pTE@Z+iV|kwP*RF_2dh7BvW-WiEBP3wlFi
zmAo&c)r<XHT&kY=Nt2^4w^7aYmJEe2XA^)sx>)JBYL&yooH_O$Rr_K!^b{;4o69Ga
zZj`~~H+tKfW$)ZZ@BQ>R8x6e2io&0J`H*&JZ*Kt{a8S6v?<OCrad8IhwO{Da$rwzW
zp8hmh9+~f_+3ILAHcJZy7qf3vwccR0R3Ag6`0id_^cdG<N?ONbptbM)AJg}Hg{wWf
zBOv3?zM3qG5BgQj!R8;Z)oL=k%&I)Q>J4d9+Kztw=Eb+qdrsarJ3FPzhy8qOtUDjr
zfx8hO*jyX%0R%K754?%X6m6PJ{?y~|pM7u+?N@Mx)7w>QnEFQ(4W8$~gK7ChRH(}@
zsxH4^T{Jb`FXbmFk*qH?UBCS7>$$7^Or<HQFt2J#piSSppWavSH<dz2*2YU&r|dRh
zDs|TC0kXNo!?!CB-^?d7GYIV%D5bnF`SC5Sfm9t1uQ4OmPg2*-g&)-G?568K=h<qw
zsRyX$^`9^dW!e8Lz6AVv{pa_*|BEMvUu96}0yls4mp@M44d0Ic9d>-3MH=$5b6B%#
zvs{dJFISVPihhTK4<A8M`n^7WnD1)B47ePggE*~8$33y5Db%kPVy2vEHv>WR=vII*
zi(Y|@&BonxEZ+?;F0%2_#c;CNq#WE2XebBmhhaV8=Pl_gu%>Dgmv1K*^@+fbSwltY
z%bbi&9h*Q_bcwl{KaNgMYC)j2r(n5W9GxuY=SS1*WYrOrMtM_v<8XnFoXnbeW(~5D
z+QsP+PU?*SFtOKn$K^THpf*`4467y(8oq|HTcDaKQ4lSaQAME+(*pGC<f0woNGY}*
z&{0=+f$4%$Z`Rc^soI8}!1z5nzBw8l5&Hajobl!PIh5o5_I|JU>3IFqX#LZ8{ZmGt
zP}oykA63UiZMtm($`7rkVErnNJv?=MV|;b!6)+>;*c@wV`?@LvQ-MS8H+ycuRDcFF
zx-V<Rb8PSo*YS*RgJ-mkXSe~LQA54Px4~0Uuj38y3>)e-z73uVda~QJ^YPvAj2i0Y
z;MtIM>2UjRYN;%{zF3&GGqOwRh=jEk*MZO&JDtvthts2z>F{)^?6QW+U}+W2ORiN~
zXdv6z>qG<VN`u=q_e=xtN`u?2_e?{m7($o4nw)1#4*Z1Nv(6)j2GLV*aXITf^{(87
zZ}bZW*j#*yY82yS9Ao(-TF}fBXiH~=SM1246OapP(ZtQ#BNw6w#57PbP5T%{0A3H^
zK#3M82Q=&g>cFdq)>I3Wmb%NDsky4J9AfvhLYB978GfHi3J#U-KJdzEhmfK!>NtmR
zI^$)vc(Keoq2^7fvE(~pmo3=I)0r4};T_v2=voetH1MK<7Z1E-;H8CPZ_(d?kv~+Q
zH{zzmyLEgke)vz^vHLrf0sle2ga{nnl@B&95dH&R2!R>G6bJ?clHHmstlj)j;-FN(
zgUE$a8BNW}?ys08NfFZ6<@Z`%qsT}}I>aO?WK;QfVU7op9pzd+m=Gy*Nn1hU9t&#~
zE0G@E;XN1JDqkd1G6ZD_$&|R2nvKYDSJ^BZ;na5jjI>@9Ybok3gLAz!-d@@r)RT$=
zWv2p-C<yPfVS~+zzAq)Fnw{+2$UhKMbtv#oW4MyHIu?2t9jKbeHYnJU8__+p+byZM
zZ*57f{#(|25QWJtgEf_LH|4@DhPM=Re<jKGlI;KQs||w<%4{N`>R7MZ&uO#171X}n
zCR<;TQVV7(@z4_6kfCGStd)G&hV{WpZmjcdC|M6YByG;>^#-&RbKQluyOLBxNe5DD
z1VXjYyAK&sQ|@6C-UlqVw!Ym;e`p#|&26W_K;4ao+Y+!=dQ<V|wL_qu%ez+Xe3RkT
zWmG*(!Rz1dFvRbRs(X+&w;?5O>U(XKF0|BWTz6K-&Nm%lhxcSwZeDQJ*NsQ}eNyzn
ziodtoSLuy(T&K7G@>)l2BIjvmGjBC@{S&lrEF5~1Y4bS1<4F6EZ8fbJW?h?@kgysg
z8;Wnj#hY<$tPK)U(Gls!f}gO5n_|CRG4j^MY%H#+yy))bZ7!S*Q%<ILw!RH(HlW%-
zsvG8P#<qd9HjLSda07v7*s%*vT;ki1{W_xEU_H_>T6fGS+KR}V@Z+h<S{Zj|Ms6#Z
zbu0MZ%4?RL_g0MPR?IE2H%rr7B5jtPo8gMOOqpGn@-9r@%6N^)Z;!c60^SmBn;g6i
z&Nex?F~T+pC^u!Ju9I9NF4Q%Vo7P8Vwf@tUMhEYt0q^8icv%&_EyD5e7A15U1+D7*
zX}0>n%6v$BX$%;S0h@5OPur@{1D&P0&gjZrm)cjQDQolmydwYc2KmJu^M@7rM;qjq
zZTSmMtCZ)69vwT>txp`MprO}FPOi%EtOTF5!1EG3lPwuUIYZQvAuMI!q9ubYSDDw0
zqRIH^ESp|ri=H_Yo{t(YKZWj%)KKeT&i-)E29NXLsOU=&Nb!B&n@kQXOVz{64B;Xz
zw)1m9H3TOUm;Di}*N0MS+!$i{t5tRRDsNli7~Ws48Vaqzt<mBg1oF8s8cwIWuju{N
zf%oQex$=ffyrzmy8_>J{60hAtSo!_VmI^^{zJeU@CQAi30YnHe9g;J4<?>-P#6nIc
zvoYO&Gai2WPR&dkG7@5ue%QrriiQ=uUR_1)4W!au+!uMu`hsHRp#P#FOQoE1c>ar)
z;@tDn_;QrJ>c0`DVLhg*wlVjqy3SC?YtztL)8rCy{a)9;xklj3cn#Bdwl@2X4_i8>
zu5`L8cH82|)mlW&rf=~o*=<|4P`Nd%`?|X2ic~gDYwqJa<sLQUZp}QtOXjScxtZ{M
zxL{Y^uIZ|A9Nz<ujhe9<#rW<h>YKDYeQ(&jC85L`6oU;Ly@Z+H9rLIfb9i^m<7&*o
z-7#nNm@5VU;#A$4bp0cj3D7rLdkmij9E5%HSG`vQh}TFRNFG+ZXf-$kUma4E02}+V
z9+>dD(9*B3lyfiExfZIcN6AV*iw?cvZ@*O;O7GwAt$`i`C|3Xoh#BzM7=oIIh(KLJ
z9|5zueRODlU%k^p4(Slxz+2ACEk~-7!a>fklv3r4+d`FkL-Oc#TpZd(kYfTF)+izN
zPB5UN?o8U?jyu3n7ZJiZWJ!_)t-!}c;QG!qSB7aezlVM-FN{@zjR<23kTLuRzZ?$=
z##NwW!U%<nK%FH_kxy7q1)CA9I`kSTiUZ9;#jb=p@2gAVhdmIVsz*mvosN+CeQ^hR
zTm?;SxNooC9co_58hNFB#adG~@EknU^}NnH?2d=HLNszu@d~+9c^F-8lOfZFv6QLc
z8=x~Zt+grAl_ZUvdOlf>))`a)gNomNW6D{46<Z9W-+trfjJW>4f-}9R&McBOj((|_
z8sVbY#TqEmKwJSudJ4x2k>lZN_>=tEJMexht`B?fD_f}gQv7~?Ls8#J3=VsUy6=zb
zXR@xin2U?u4HtKD@w?#?E-rC5T%n6Ayc;g*;*w===DGW%TBlL#mqlxwMw!#Bbv~uk
zp@o`MS_7>~BcEwCNsi@S1CkrC^>mjFvN}{Hm8cf1PRVG@P@2dZVU;NPpgy@i&G(-a
z1JCBOBl7b(nVo8Pdq)@Z`LxL=Sd0JnC}6VW|4RxHefIzL+5h`z|L=G4|NdpQ|MxGe
zmdX}*6XR$M;l&&bSajq_<2qlL{%>4=R&eC+SHn{6Ndl<0ruF7`xUhVAL2xT%i}C#3
zjFzmkg|X7eJ)`4`7_ByZy9&%U{d}fbub$3NCnLYc(IboYyzT9Yaq;+aKKn|Gr@Y4A
z>Y_Kv{W-2zAGm%von|m4o#z!s@4VhSAPA?I9sEP@up-BhXJc=81<@fsIJp`w-|FW+
zCu5B3+SNHVMxwEZ5pv{lHl4pi9~i)nOI2gy7J5qc{BpXQTmVYov+z+v{R%FJ%R0>J
z!hd;rt|8DXdK>gJa<05mP~VUv^&O=G=6wJ|Mt1{-peKTWx&mURH`YABFD_>@AiZb&
zf3E?IVK~&MUYre=dGap4ymJN<J~WSQT-6oGc$wqn8rbW`!z~r$FinL2sNhVs(s`)E
zSst{qZ_4Za^w{-!>vprA6esA=wyGs_XQ#PObeqTZjPCkIcK~cnceGNjU6!j#ZExqF
z-kqUP?@^;+|G4z#j#jb7^IY%yxx;gdLF4wmILH}C<5F-209#ObTBS14<BsX6m-BZ6
zjgKS-C~cGZrRlHidIB2s()8wVMtfKly=u^{ckaD)Y{{c>m!{>_O$&7X558xOjHhdm
zQ#tU?hSL+8Kb$7`<q{@f<y@UUySR+IT-)5p<>d_PjmS*_L-fY(;Cp7AsIR6s-nrTy
zRPS|Y6ibVvTuK2C$^BD$1^RN?(03?Is|KZuYpolNb78mpyxmfZd-&9c1;f{!9ni4o
zk+`Ba*ZNZwv%Gi0#o=PhcEPKBO><<qH)XHOO%kPn+IEV|R98{%4)=P;&JQXc_+IN!
z#SYv+4!4h0@o-|$R7>o+*$^}4aoN(%SLtKA&d#b`JZRA1YV6yB((}7Da6kXMSNk8v
zfO_(010Uec_CJ$SuD$=pV)oho|7`z%w*R}^|NA2_m!MM)Z}%kJ=QrWZn{egF;Ec%A
zQ}16dXY|&FKHq!j{j%piBdZ?Lz!iqf&$onrvF~nF8Qj4oDE>aaJ5&dN>3^AD{An?_
zZ%Me%!jN&iT+OH1tR6q)|7tjT`!!e{y~hf;o*b3#FLRn`Ff7lAUr&&Zfp;;#$fJHO
z-H=NW&Z^5}ZPjNL)o*P>LoX4B!+d^~El#HMcO3}jw<NTLMpQ%b4bg<vP+~(gvKlIE
zLlYIzM7KmEJ9J`emBJ24TANpNK+@XP{5Cw<Wn8=C3?E|3hZ)8inHPt5FTU*{+e}EX
zN+D5(iVe_D!Oz{ANaGw$Gko&dZ4j(}=W$F;k=%6{6PEO@qnNO;;I5-sp<%ZievB9K
z@Q!$}^@0DW@j+WT8&0p(<LK4x(V2gEC;maFP|Xa)n1x_Jck8Hi7W*J$A#JV7Cqiga
zCbXT7`_eE-OY(-NsJGs;_SB|@bW;Jk&G5`=jh6YN0Yub+Y}O#^4PVu=sMgv&CjIQB
zIxD&IM$_Ts9G~x;d&im9W8dNOwc3lC;G?TxFafA<5Q0gvT;d6>i);bV05w`%jwWD3
z7&}sH`?wJ0ne{AMUp~(Uh-Ep)RpHY)mU@aySYCDl<|HX>cCC1lm*IQ=li6KJO+Ok~
zh>LldF;vvOmYH{f6G#5NmrZDGTIqyfwUSsHDkEfm-lvQ(-4?O6<?_M6)T9ij?}j%^
zPakwyA`lFk<*I;i;DK%E<+pUyTeVC4ov$W%^9QQ=R4=3J2BiI^A+6L?A3mZz)Q;Wg
zJ2s7480=)z6bH>-=|yLJd_eEl<@aNp*BQO@%N3rz?fvxlmx1>f0rRC=Yvrkj9|LE<
zuiB)k+5DxaU6aBie`>p+=FvvpFK@23NC|q1P}z(v+0?Y!FFaekITcl=j-kO>w_bZ+
z**-v7DTML-AuI0?tV{JyZ$5UF=EZ#Pon>-b5YHdARBP>evHS~6G?Z#-_eD|MsS}h-
z^;o!(b9;}9JF1vGup6ZLMymSG^<jW+?XMZxH_t*Uf%=MGZdAMUdgN+>Km(0r<o7dk
z7QpMx-_3Ai9EbPPvwXH|${Kj?cwedufQGMVDNA>3@%d&Dp8CR|db`=Y)jS|khB%hP
z*#u<s2v;X?eQ&vR#esf*vLp{$UsF!@)}%(FrN-K({c64%PW7gJ_qS|yJJ6j&eVU@>
zd0Lb*RMyXhr^}a@=edNz4>%`e<9u_PzRl5@ZT`*d87WZvRIK;Ue;QExttfZOIov^~
z$EOuB3bwBwHg^Xn(!713DRqaT`cl|`d{q1;B5Fz0uesB}$+^7vhV2z@R#gzRbKqSS
zXrWT-X;FWvN3b&V_?dmUwMv@vf*iknuUGVYX;YRpKLDI}6bbb=4PEr+fgXTfZ#dg@
z_ue#(({6LiRkT{o^iyiS=H_T_N<BCWQ;zreu}fHKV$?jZb5L^?O4A9z(MRiv`4sW;
z``3D!xGEmhvFuP7Aa9z+X;mev#%T@B4HS2vR~O#Xg1|8P6;PnYepB}p8FXizLEoG4
zjkP)|3~|G`s=j)>>yM?Umm5r-9u@6R=R@3EAm*w(ZC{$z+%51U)~j6NU)w7`;=M|}
zc^y#ns%O+sdTV{t{ofTBW%C7|&p#htciaSS-v8w+ilWl~uVC@#^WVSA{a@+5xCG3Z
z_thf18qd9bfW|<1|F`1bzxDoa(+kptJzW}~<tL)m*V2=#`d!r`;N}0%-n+K9jVlSl
z^L6@H@SJ%jFXhMz0g$@HiD%MIx+goiJ#jMIXM3}HXo-p$>(-K-%VhKZ_C?`Bf+PTf
zvYa@HdggRnV4+Yb6sih9p|G4L%3F~hI0W9qcoRQ~KP@hGc~REv(07>5$H~Xx2I|Wz
z%)Hfm6RM|qCL{%q#}$bWSCpf6PvjPXqRu+mykzSrUn!*~#20_vmVk0k1a2g^TGSl8
z_4y4u(J<@Gy@xPxkAb?j$4EJ@&{CjXrAYk%D^}iav8${_X72Kqyg4^{bMR^p$U$__
zgBZ5CH1K?3WuosEGyG~Rw5!Qv;?IlA*=*7JzB5Tza1YH4uuf>3aN}LFj^oRN;v0;(
zO4)Fn&VG>LZam&%&gh3-_D(oDR~UR4cJIy|KW%&O+8$lmPj@8-+X?`;{+v8<<8Jly
z&H36EgJO%EQEWN?Fr88ObTNMdnN-xe;W0HIvo#N0Ven;DAZ|DcCmev;{ZsLJ_Jh^i
zr}0Nd(@Q=uMgqNKzg6go7_<`EbfnXq5AbVtIrH_|NZ>2n6%nNSK}$#+2j>@ORZ56t
z%TV?@TW#`vVGp)32+W^xO9lON440=FGi%2IX~zNX@&YEm*HXAGOYK}&2-z|7QHA*m
zMWr!Ns8F;L@NUX^H>JE|hG$ybQ7sHu#)oM>a7tM|fZOSbbiUUUH&jog^PTF6bgt?N
z`pcpx=+~cIPh64tsKR`Wo)BZO3q6tJzXm-amJr;n_@)X%*6@Qi`@a@!c?O+89RYQa
z0v%nVBdkuLBp({tRqgC3)S+{O@7?9JL1L${-38jPq-{6th_#BiTDbKf!WP~Q8^puy
zMlqe2xtq?5+~wPqcJ5Lb>nI-ThVcpwU1b=BZ@>Mru5y~dl+tIO-zI8%y)Q)_L_Z4+
zPXD;aFMH8HvtuBwNNi>LwPR1LGZ-pWtr0c8=|^)^0xsQG)7=LomHU7h)CGsSpimJ&
zU9|MK3ysP<J3{#QX8_hOJ%T?G{P~XkiQ&%+_9uZqr^Um6c8~JopL7u3h~t3jX^gn8
z$@YlX2_MA!V3k<d4bTW@&naK8wmmw0QdUwn-`Fx%DA(eZNCEXRSG~3iit?yYkqMI$
z1fddgVAX_T&EEb+w+Xr2tj6hNGTiublHHNbjHR>2T9RjVY`C&o)g6LjqlJwE9udd#
z0>DB*MD(z0x{1e@`rP-Bdyvu*%n12KAFokt7;+Z*tv}~)@*^U_&YwWGwL{1B=B{NP
z=Q5A2GLQE%Z<6_GymOH^3&2tFf^PF=@7{zm_i1lJui1ni+HL6J_C{3Wg1RY`bp8z#
zO4g`Zp@1IQy%D7i?~Ul!Hll}iD|(3cQg4#_otV+0&^w#aa)?fG55kn(LUYD^@8#md
z)A-$Db!^Uqsi}3cnAX%PpKdxdRH;(mitY%_p(SlKx7NxV@s6!ks@1nv*v@kwCjs9G
ze>!i5pTawjjoa8Oz|P~~j#&T~G4C>(>`L)p!_nn9?u;(482=TLuoV9l1}NCafBlH@
zU*S(3##PGW!NNwLJs&=K{_OXM&tL!g<dF{Gx7W{3yu$2*X{SjE^81U|5EeG^_?NLa
z9G<*>_~7JZNbUG=2qDs?lH>bcFNlIb%_2f^4<}31?VKOWq!_k@BC4Kn4@4_1r{z*w
z@eY6t%el~aK}R%UYtQ_o)>k9jO6P}u54R)I9*FDlnq@4?QPF=5p-Ce?bS$rWIUuqT
zmLmd3uDC*{Qgr~wi}`XK1wkhe-{9sM{J&V^1;XE@A9|F4h`P#yF5g29<mm$2SCB#3
zaG<wI_yML2qCa5QsumNUvp3(25S5@ivX<acrE1<g?lN;!2OcE-W@p;jcPJBB3?Fz6
zkcFpoRj?TO?A5q~gO+~_(6!oGH(C}z(FTRi=&uE5$W?F&pUg(fPd>B`jSC+*r=zVv
z{l{Xxd9`?$yr)`+V-R9+LP6mASUlq&y*2xBltcW!ycD0_E-!~;_9tAY0I&ioE`Cqr
z<-J0IW`4B22UC9M@|YooWew|v`=qI9Q$GjF?aH~8jf;hXe0J=vY1VJ?mqXLEZH4NN
z7>ayUV2W{eDu=C}+FWg&>|`%quanir&n4)=`qO;mXKf2vQ#hANs490{a$YS!{@RKF
zb-4V%Jrj7u1cdN%G)mTdHrN_h1YP0X^)${W#wl(4e6^T^a&beVIT>|P;{kj91|LM^
zV9?7}tXfPpdYoYja#iu$DoNNB%an~Mk@`qi%%H8?%_SRoJ?FLX0sh;83y1uM5pEro
zi%7L7suuNH-)^^B8%YF=UZ2Oyq_qF6i6qKEc^r`s-x#g`=E>-yyW+3g=*<>e<bKwI
zndJRm`OQ3THn>xT$nV@QViQtLY_9RCa=EB=t2`bcmitv22Z|RvyH7TcsOk-A{2{pu
z*vKbx@roMwMYXPiRxvhrj>2O<KaZ0;whg=#Vr`$6af{IQDhfh;cX36OeQSexS0t`r
z;dg4?cV*vg8`9RTcRtlz_T6(WYrEE=H29SxHufh?-*>piu5{+ltOTxSdhX&>tl5!R
zCJ@DyLU~c(5>l;M6zo0%WxpucFA9FXMZw{IRj^+b{5Y$EVl>N7y(*|%4(!z3xR%hf
z{c7Nbtp*Nn_;TP!oc%xTa=;Kv2jG^BSJjhsYb=tbnl0W*rp+pyQP`U@)UmcheBH9g
z-j1Qp^+j3zIPX76;a)@j-reN?!o84`^MB!BAOE$F|JtYVa!BJPHQkZsU1KyZ?*Fhv
zz3A9`pNx>-RpXcVm$sMbO@P4yhJmv{qwvV%AK0%T`-tFYUw&l2Q1%hCpVVb!AK5RQ
zeIzaM9y0sLev#~B$Y6>17;*~a$B6yJ;v@SNi7ZYVI;hj(3_9{>_sx+gTP;2m%4W%o
zx_N#E$~FbM2V!YWYl!sE94xDC0LzzmC1x{L61ADGt%5oRtZJw>-Q`{nV5I#8J_vq<
z{3q51LNEg0W_YNLZeX7g{oFTy#`H6a%-;!o@0!1da4(&hzehkFZkfmrvm5k=&p<vh
ze+Kfg`7=;Y%%4LW>q16*lno4mZOZ?ZtQNkUJh)PV_-IK<$NXn#>OHa#9<mP}su?^~
zGkB<G@KD|0Vf)}=``}^w;9>jVVf*0WZ1BEl^e5KoPYh|9HVF31D;gB5vzJ0)0R?IV
z>WFL<OmT{WTv>{OTtTt|#H$dc0A-3$6yRL>u|`YH;Bqs#+zc)^gUij}Vly{M$6`>F
zltiM|$5JK?b&6`1)(%x7MQz@7(6qp4!HJOn#D)xLlnh|MN!|s3S>gqNS>_o4VoeYm
z05ZzG0KmE0(m;fD-3aTt5!Q7htm{Tt*Nw2)4H%`Ou7!cBlpsEWnXL1lp@|cyjuWVk
z6R3_8sE!k;juWVMKyh6+a9uZWT{m!DH*j4yu+j~kMkYFqOmdAZ&MymJbvilE__^`k
z$5W{+>vv6E_Dpp<roGCJDI+;;ziVHBTBSrBsh_an6aAq)Q?6XDtSDvJ-tK7>b6&1h
z&g`?Bg4ToT6)wq?bV<#^L5ZcBMo*WtJV#Kks=v`NYn3k_+cwao&D^gjI6bIM40fOe
zZw{}JJFjq4Ug;FfI-VuN&8KCeUpX&U>07Z0=10{9^szSvBn1GaN;!Z*0j^&D*&Ohy
zu!#)!Z4@+oUpWI~XatywG6z#ii>S20?v<9>IVD=fU2U|T6J>nwB`Qdf1*e}%3bNl~
z7d+$tZ|TQm3{OGvfq=i(_aaakte(hqUJpIyH*W=WUROJE+Aozc<4~osP)AD(zzPc}
zj6<c?cN(c;g`Er(Lz$10{Fx6^%AU?k-~JiF!QN*ib$3W@{TFGZ0F?mfQngOkv~6!d
zr&aK!W`~6}I*XT#A>4bg!ZVry9{!N<m64+WwsF&ZrbS9+<A~)ooyJQA-^FPCnZ9Xz
z>1~uBB~^16U2Yb`Rl-lrbc&GAM08~O0?TsaN21rA{9`DC!%AQ0$%mq1%?pZiFw3Ke
zvfvq<{YA3Bz&XfKSW+t@<<t(mTcus~gLeEx^H;_|u8bjUfI%l#26(Hm3H~v?pbTHy
zG9;*%qSFkXG=vWg@N6bjr^0A4TP`l=V{z!EpAK+}l$k3O=%jlwnNTq;!Kn2rmPD;k
z5pnI@VGl;-7?td)x9=D6nT8n!4kOAKSm-jXaJ$q)Rr}XbcX;j89bSRDgKMYm;0n~;
zxpwOAn5fe_&>IL{NH=LbrN(2mmX)AgNKU}>75vA8)8T(Tc=`Cj!^bDXSC5{)c=F)Y
zqtmKb$HI)nApidrPcM_T|2bgCB>0O~+r^g6=Gd~+9B13i{3+_k8#w5juKBs*VtF`C
zCL3=(g^-joLGVmD4s7%w8$md5qUhv2ri`7x?P%B}&20>pu0$Inj{b`TH6+?;v=K>6
zU$GrEw&TVM^6D=9nV)U)o~b>~<fH9<mL8_Q<7X_1f>B)9XS2kx&*uHGGk8qayc@<o
zoA<)lXJtdLULuP;5F!aI2<m!|YPyeutMxzF1p|sfvvN;q1h1rHiDOh?fs_LZ2z&@x
zR4%M5zI>?!gmQ)<poqdFRsn@@Y6d4`^ndzF7&riJU^pOyN|;u{v=XM3Fs-C#{V2=c
zVUj%wVHQ6LVIBbqVLl*Z5Mr1@D+FrEWs(U3=;&G@fR3I40%iO_ffa-?1X)4YaVNHd
zuoF*W1`#|#p%ny~&s8M|HXp4@5JroMFF4~14y+(-q9Q8@lN4+PA?A}_1|s-^Ln{cQ
zC9;ArTDn#cMhozTry=6unl|sjl3!4D-K(kAAWp`Rg$<iO5%VWy7txg6O<iCcU@9vQ
zQ)?!dt3Hfz%lN;u)gm5`Vrcu*Xj&5oXpjF3!d`^S`QLi`{BJ*I{9hMJpC;$y)cbug
zO~HynJRe2*$va(4HXq_u;yu}n0fF~2o-WV57mLj%S=r#8TwWyJ;}7wCeUaMWP8Rb`
zJlfp#?lFP?_mP175hD$^NHj+QPM%X7kN$8=LswJ+r8*BE{r2GXlUMY=&rTl=A3uBe
z=#Sy6f4q1E`@EXmb*KM0dGgB+6V2%!H$A0P^2-WG4sn&)?y)Ygve<Q>Kj~O$#=8Yy
z<xPyw!j1p+<?{y*Pad4UVyNzB?_DvM7Or*9hpTx0Hlb7FKDd~9)_1lGUIfkvSu4QQ
zc7{4&xW2(qA2CAi=S>|bh|I5dXZ4la6lsfj#o4Dn(>DrY_@&~ezcr6pQPOCQ<>khg
z_0;{d90=~eWeq78h^v%JJXJz9nVppV%v9WHl*B_LhV2waa23DkM~m;*P^lS0$9X4?
zX*s7Feo@Bon>qYYL0946*jFT|!|q^G-RtwVGqTrytMdw;L_cNJ92D-BiXC00Md)DT
zCKrtw)SbR$<9O`J+muTG+n#3Z^|j?yNVPA@u<?CWr4O^pwW)H$jGO#`rG~fVzACxi
z<mD@M&gZwUvN_=Av%x^AdZCK*U7Y<CI&2-0Dwzf;4r@>H2M#Cs)$4$|%YeAAxeb`l
z#|JN7ynOzLFDB5x;`Y0LCGCrUjoQ<H?eS^J&-dHYmU6iNMRTt2dCL`Td*c87SIKKC
zQ1o9f;Lh3jf6xD3;ruwsBwdk>IxsmHJXp!F_9)p3-u=<^?p^qK_b%XBCsUSd!CNc2
z4i!71kl?Qki-1xysuWt50ax|^VLP(!7LeWlM;Ld5vj0!QXz%~;@Bi%Y|F~pNc6pXj
zov=H7oRNE&*(((#r<Co@=iVQ?^^Hn}NLPq;g+vtA5kFt#{bhjy%I`cChruFiHl)=0
zrBihMZknM2GVa3~Dq!QLtD%D2VZGM4JvVFLvl{#2nWHf4?ZQa8T6>L%VV#H(XAujJ
z>LjcnURRPWlVBMRMGR6kaE3OElu4d&|GRJ>ixO3F9~+Am#DX-?vpiWAb-Ks(x>lR`
z$?t=%ZK7#Oou+l42BAi&lCLfDhGUZ5$UF|rQViEmcOb?%s10b&#IrBOvnraoV3xF;
zV6-?iK&VDuu4Rl2$4A#cMi_tfF`^u9I7kh3hS+?$7JX5QwK;kI?DW;6KVH1d*Ox3?
z$`k73%6wFCUWHl8lY96$OLC=@5^X7L$*6khUy|uM$(vEOAa|><#N#A;y|vd{d%g8l
z_0|NgPYl_vyGD1>weqth8N-r19*^k)`_DJtVwtRB*x(ZDbl9czM$>q`X1!(0YWnHJ
zVs*h3S5TSNsH6jtjG*Orom2rT-|-L__$?1~TtdWj4!ixd!FkqD$m2C>X}qE`9W0(4
zLEcfUh-p>#p<@U!65&9N!ZXFb=!S<m!-c38Yfc7hf6?u>JNBM-AoMBaa)+7HC`h#T
zI?8$-UURP_W3Pq&2idLLdCi5>_&T5p>;48|p;rBqeuhPYLwly04zC|poeY=$X=>HC
zWl#(b?U`y8+x$6la(xH+zt}kg(+#Lw`wC=j{7<hO{}a*w-|zqbknul1_x}G^h{~x<
z&8r3Gd{2)`ma^beXYPHnVqQA;FLw3(9>1(Qznl$|X)<FO#C4|<=$MCHtbGZZhhnTN
z_qnpf-8@v|zQ@n*4_Znp)a>v%YrB+4TzxC=Jf2PfAN&{4sl|>+l`2XY2_OaDZcpPc
zuKBo8zT+TLVe{}0_SqLJ8Nsr)S738Z14`G-&A`{Z6%N~^k(yor6La+NtAJjFmYAbh
zvW^Rpl3u*Tp$h_$-7=>NX3!lL5u`BbIDgj=2sKw{$kmvqmVRo8faT)cs7|1CUWTdg
zq!2G7&?=dWOTu9Y;4x!lBq;cb<77v-R#F<9_Xs2|`%*|U4Aw)OE$S^nrBNkFMfAQ*
zRsn*|ZW{n}m`AuM8g@bf;3bc$meuLTtEooo%=O?8h<+564T5W!n4VB%42ec;w;wLi
zKNu}G&kL7~O{ts9w%fRBtr$4dx=J@p%1Y`A{{PxB{coNBA9bT%Isb1!x_keBAOEwD
z|EZ5t7m@D9$RD;dr#n@`R0qp2pP(%+X-=}+<`cPXFElZA<KX%6iK5I$=kbc+fj=s!
z@<ROv?LGG14cu{>CLe{?<=CL0iY_m$7>K+ng>a_K;ft^U3W73}h7!+a0dP<sNE4KH
z^((jBa-OB_cl_3U7K+Wuo+#h*jr$5Bg<Vxl`mX6Pe=(=PP<9`y;v}zQr<EZ~Gj6A4
zVY<WKr9WyUlm;=9Po2}M<4V=BL1HEq(n_{Jt6B!(&0V*2q{ab-=hz%2V{AksJJC=M
z7<-1B#gG!QW5ojJEe!`1@-Ufj>GB1nNI&}-N2{rPX$4$qs8HcL#~<xN0AvSDSjNqU
z{|qv)Er#nEbt>8IR*ekk2hOuiN4xxZ6#*LSXY93%hH&P0K9d9w7o4YhR=TjGkxuj~
z^JnrkHQSDOX<0k11q00ocAU!`r}dQ04yaLtCQ#F)F~OT8<9d$2wq5_7r^}_w-H-b9
zA3>yY|AR#P```QZ-+uj9cmKor`UlKct#OHL*FM@J&x$<*ojyVyT10#qNdL$~xia~I
z@_WItkMx0DtXe6E%aE6(1Zl}sQ-Z|id^9CUB9qP-3?r$vi_umIk#%wK2sy;TvkoX1
zD`eKew)>@u+Tq2fQAK=I^;BV}8#HA|`=DayLB+a3{klo6uQaB(I;LqG?QTqoX-x64
zZyqV)Jf_6yI#e0bKBnY5A5)?m(|2vWt1%6I?TC_NKWZ778afYY=scvM(><gz<X0Ke
z-Q8*99M_KM1vRQW8PP~TqT#XMGmmKGJfe~Fh(^vM8fk7Rem?^mnFe&)?m1i}bNAQA
zpXTul*26c(VPljaW17O7>89<yG@X|whjQ<nefQ288%pfzY3+To8|#y?fsBCZdspnc
zcE#90_Tb|F-UIudJuqkt@;@aXmn#v`X7#{WzA9Aqz<8<soGa8I!hRJEZsy3XlJ(_u
z!;<hZ8Y(+(0o<hGm~X77#Dk#e7$l=jGR`{V2bpV>^&u0CBZ~-erYJHbQ8!fqL^EiF
z4q9=@h<)UVZp5&4$f{dxIAa0Xei(ZW!^j7yIF6R#!TjAqNt?KLYJRy*^gesMtuiuK
zo67NeIDHQ(;a!$f?vr^}v)O5VmoZpb+-Ww_B^bV`iampyakyC~U9%z}<<*2jbbV8h
z?$Fk4W4CSFw(Z_+8@p}Wwryj#ZDY4>+qUuDKL5E7|9!YsGqbW*DtSn)RFW~r7}L|Q
zQyzn+gu%rkc)d|g8{Sw?Wfoo~k#|(7j?MHk>-M|w!wcs`coJU+DL>NIAqA;;9P&-b
z_;4#G1!>k>UqoE^JJ&`?e89%!7lHDQLSTqMc(jgxbbJ;iiMNa67c(+cQt7y4Q+0H_
zk-(%e>AR4zF*GyswkTEtP2NRnf-TVo3B$UYs4%B?;-4)7QBhMu;yGE=gc|ztEi^IF
z6Rek}T!$H5sa0|)uNiUQD=HI##0~6CQLZyhMjq+N>p6jzd95J=4face=YdX(v6LfF
z3s+yTx%dNP;#<BzRXkF(4h}hS<>%E{Vw3zp$tc%P1Llac_eA|O-sc9b$@Tzs_w?aH
z&@8|5eX4>xZ4NaxU{Py>n4~zyiBl<DFNj^LooC2T2O&WCDFK;H!G;UUiXLE+si)k!
z7b(nUQJiGtnTM7H167<}A&kC^cpf3ArcuasO#Ovgcy?VFQc+f2p@2s63c7;W@dkR`
zN8Mgl-M)ZE;|jXwKNH(N>XNeRk_9xPSJ37EnJB-J>UPwn<6lj&N@b?vkrP*G6jvGC
z&!Jk$7jFj_Kf+eIYol4&mTM`A(C#q6IO0wje-fr%aT9JYRadSl7J}^4Be`Omf3hg1
z`>g5Px`D2aRZpr3mK$Gk_K@5(E1y6fg>;&RAI-mLJT7O_haZL6eOg_5kb$@avzNGB
zLyDsL?g})5=_m<7=`y5yc>{HG2Usy_4_7*Or}G#CmyZF2I=UUd;}9i2J_$ZP^nU{W
zO`sTjsRT%~YIrPipytWqD6U(RR<6^lgA(i+PrWyf4qf@#qodwOhL&Jw-LhxOYQ9#`
z$RiBtw4&;d#w?7W@(mskP`p$#ZD$@@&lFJHLj&xs28i#G`%r^2$)IYP#9(Ly0MUoW
z>|t3&{&DGsXW6gP?t8Vlqvy@Y=RyDcdV<1FcX+}1!Ga?2l2%1u9cXp?F1w=6gSXo&
z?QhecB#`W)0d5t8g7|f*<QShgfh@~0{ZtQ5bu;^Wj6*MDmP395Q8ixqhW%cXp&j?*
zS-CUgo1V(knWu&i4vsZ8Iwr@ov9@uA$L9Xdsgr)&%{7`=zlB31n{%_jF9g4zzT8B0
z2f6{RGk~rcK-I+UB*ji#N|lH2@3-c6mFTu*)9|29bNk#e_>zRSluI4pDCj23wdI94
zw7f(j)SIY>Vg|E3%DKx&?1+`#38>gOsi)$Grlm#18bpOpptm%tSdtBOozdkkM5cni
zv%j>Bl{cUO1bD4)y3lA7D%DYI&3*?Id-JX(we~cTM(a-gqWq~>;+I?K)eY#nOd!oL
zh}Jbu_MyhX%iSl70;^(`JX&mHz6rFc*AQJL1-mtLwT*#To1?fPjrt(L!_z&C5kQnT
z27X4P^x(*40>12y_q@<v@czXkdC1CIJsidLKeG!%6}KGn3<?6Nj=%F>{(I-(u3ak^
z#?5z5z}`UZ1wMjBhpDCqmdToIbS|B!Acdbr$9bm0P!ka6pCE+`6LXZB4_M~pP&A}>
zcZ#Mr&5A0d5i|YDWC1NKsqf7aWK>u<)5&=FoTJJ3z|%<noc*Ly+ES6W`46qCiS|Be
zRJB!*X;JCNyz>m<8FM=N)1j5d-dyX&)|x_It=eM4rCO<TUd^w4SLzlq<Bm|o=Pj+>
zr$FBmJW6FeS~58o%YVaVPkKnpatbiSH=X50qG&ju$rm5~=zNY^n?6S=ZWPciw}G7L
z)E~zVcZCPU+Jq9@HDXmX?468Tw`~eu>dGRo1{vwUm<L^IOl(6d_@$UOY~vu`V&rm-
zc7l1bDa&G&OLRcYMi+t(uoP3mn8OVmhOz<`&yqx_+J~W#^Pf&3P>3Nhn`wr4!rNh>
z?y@XXiMBZXuJY>wQgc2TI}NM}I@N+em!V+L^K!27a6Z~Q$_)Q*Z`YZ^`uBr_?T2cg
zVCd$4MvLZGN!IOD@CFrh$;2b%AH4<cyQiz3IW_~GznnjME>YEa)XKlSKYBiu6PtL2
zcwRv-HSEc@+xQKTZag!IIh}6m;8e-Q-H-^>*Z=?`9de;jN9(T03NPQ%lH(r$UEGAP
zZ-6hruNPpq2AHAPaq~rvCv*1KnCH@HRUcVu#g+C!hX89=_rAMyd`>R}fk0fb=|Ac7
zoViW0efdgpIQdHHxTtv(zB+2%^j-1y>-@S_*S>Nw16_8X`B+l>*jm}Z$4eRjcdzf3
zsgic@ZmUD_Fb?K<Kv0}L8+dTfv2U1~_RQw;zhoS0amrodI)EcvJb01&lgY0ixwQ;o
zB#DG_3%9>8!SX$_FhK=4FnGsrk&L<aOeoUhUDp{_?&TC-OT7WH&6)*|+pYC_0p|Mj
z_-NUDr(=;W&D#6uu&OwgILWQ09~3o<{Tfn=7%a#`Hb1>)IH%{plc9;~-bi%tc#TLD
zrzqW-xo0qQj?~31hf}&{v%ak>?HBP1a!$r(cUM^04d@o){sV{8rM$3<ClbVKKVlC(
zP*UX%@Ay=N^)i^Dn-g*>FVZlk%!xKfOFm>QUFnh0qjRJ&XmM1A?e)QuPIh~#NVp+f
zuSMepIHu5Iu<icN8Z_<W4MlU^g7eROt@Zy^h6_O3v+w=h7y;ZI0BYaArBm;nI16aI
zb;zuy4q{52gBOfJ=H3*dsb}_6lraW&-$j8#*N;Z2!Bx2=^JRQ|k^Bm;u}r?kCnR;f
z?x^oHimgVrJ%!-<U3AQO7-YPc8Rh8)aJ(sT?o7`-(UtWXG-*1^7zlXzeMDn+&d!R`
zYQ5DT%j{_!cyQZN=3aM6&>D>5GR>vvQcSsjckPagCgLG_g#Tk32S-43kmw)|>);TD
zGdv{^YVRB9fzVMmQMPzJpKDw=VF;s`$;;u*_Y-h`dVOo$JEM8uN!lA6kv~(Xe7)*F
zps3a7-v_;=#>$ylK+=`=GsGvj0``)PI|sQDA&xZ<_Dj9lY=ot;y5aSmc{$~xXNu%O
zHLgb@kpv6*WqC?_$4{$<v3L}t9yRfno;cWy#<`^3M2|ZFzl3>m$9xB~6nC>dMLy4c
z;4Qt(glLO*_2ZP!%yITnEGh(+l}fIYfA(-PBe;~tps&?+#jwGfS<-OCy-2<-WpgbV
z&DvT9Bd5!x>atoEN@mhr0B&uOf*gfY8?qSYw$ya-*X9P#zF-PM_S?GrVnCQCy%pJZ
z4VvIOxu)5nxtzgpaAa<g6_0Dv{jftFuboEsZEbH<h$gsoeyahE$?=f`kJm9JVNl=T
zFmr~>TSR=inw>gY(uDPoG_dvlL)gsM(HpDHmWg&;{zAE*TpIkMJIe#ES0!n#=bKY4
z{=#cA6V5cLppg$)w>fs#%z76ebvzY9v$s{IuKwaH_bt>(UPUYV)Y+biPr?UpGN<UJ
zAQl3(B;BvC23j9n&oH-l>alY~99c}QjN+Vc=5er2e|4bVm=tLEm)i<8-87S;2Kx%q
zR{KiTY5M|c48z+5T`ksS7Yp@+6VB%HN_7i_RKPMuY80q}<s@g6;PQ(B-i|fEuG_)$
zbBzc9aUOc~k_Xx_1E}8tY}^3OS^?2C%kJHBJC<n`i!i$_{Jpa?FtF1>$VC|;8jP$X
zdak=Ph@&LTJ(*XkX+0Sbg-vujWeKA_9@G8<Fii2iGPji$h0y%J28V(#BnudO=i~F=
znSg1#JR{O^VOk6nFQ`#f>k(PdlcFL~0|Yho%&Q(E9w?lcD|D$5OF*bp+Sp&zAkim?
z6%%PD&Lw=%{yNYZRvzsW)^fN++!mKIW!Ghz_VXjZgxvwyE8UwMq=1y1zBfvI_1`|z
zg1rECHvk8KeAuDpr*Af?^o`%YGt>96P7Z=zBfl>ml|1r~Spz8h`|N%9$1tNmOgWQe
zqhWErUbpXKKg-?hZ(L|x9-sH)$nBDV9xuqr!3R7KJ$dt#?M>x~B*VTl7{&0d{zkbM
znOX*oM2j6%tvD>vzqYBO<doi%OFLV0F_rzJ;ZC=ij}Mwi7)jIb;hlo;gFgftRl9{c
zj59D=RsIFVn_2jGMZ!cOP-y#~W!+Q8X)T?_vU$pPWJ0@m?v$`y!FJ(F@dzhpr`d8?
zv1TnAWcKG=f9rt8>2ahGLv4Mzc#4#>WTRD)<5ui3J=k@l=}&)UInmaK5#KzPzkK)+
zqxRYSl_CzA2i`LseU2ex@c^*BVNfr<hSnQ@Zg%7ABTlwdjn-&eP{S}qt5?1*@Db?{
zx-n8(>7(94tcX1y6<k{@JWgcRg|18nZj%xFk~Pp<#h440(_uT0dn7psmD9K6z&$N_
zvt`d086sKr=%m`o;SB6>QM<FM8HCd5>9PK^q{J;NA2{80N%n1Z95)WdmJ21N(8eua
zZ2#Mdglbw$eTq77C$*0A<<0d?h7gVB#CxJ-&=6jtqY1`3Dr01mc>xttzCJANX>1nb
zWRf(YB)ju)Y+~=n-XC2*FML?+ai3z_n(>?vDP}vJOp_cCwO>ZfPiD(~qu^khUikL`
zbS!Mwp3jtlj*{j+k+>kl!&<<2He2vICAz@*%vVEY%mT)-tniTW$Gz&aDeCN#SurjF
zI0G~2Gt;NTw@#-9onQ%l{Rn&=q#Ms~?r_4M^_<Jb{nQhdhj-S)bV?cAB@Li}%`lV+
z8vo)u)j#g6+7(DU>WwVF5<#vEx8hhW6N8V%@0eooIo?bX_vU(AxLo9pUHja#;C8?R
z=7}+irLf(-@*&=Khy%tOE$EOki*Q7Jg;&&Vq;i63QF`yvCWtZoBrVG#L}EtKTlgyC
z!KCU|ydTp<Sj7E+-lg68E9v|A=jP_bM$RZ6=qBSufcyS)I}lIiFbZXHu;{WEmShwl
z{->Dso$!&Ug(~Gd3HC$pX!MInLi35?r7zVvh+?B&n~#gqeGW=+o+~Ce!q$E<>Ur?B
zpU-?0DZewZ>IXTZWI;aEyLg}BXcEZd6XrvWgFWr-qBj7*ul15#i75tq38MKQP#OZR
zJ!}|+C1LQ1U<rs~dHX(hh(6r@F{qqO-(Qbfc&QVg6bi!s4MX{NTAz5}0ZKT4<Gf)q
z-?Yos{F8I&6f1d<#1#$3%6X8%<K4Kz`ALDFp!aRiHN?Pus5H*9T-hsbwwqW|^}3kE
zsMF!SEUYpcw~6kzPH;{G#<}W{f^5H`V8KznxmLR@6l;ZoRyi=?F9uCT46D>&BWYJ)
z;GXC&r^JRWpN~62J)OLq_>9=RoP2f)X|H7At5l<=t`DGk_TG92uWmXX{3^1%w#Gl~
zh}QD}H#2~jmtGi&m5{#*aWZa$Zf-TYJ|P!;y1N#6yVN!`;U{g8FPhQ$t9Z$Mx=x#y
za13Rp9VAB9l*MpJntZ|M02)oY&vz0XNT$!v_B!>qqlttGnvN=gHFmJa;2T8^{dLpF
zQ38C}c(Ax{Hr=wt0Kaan*Q!SEjUcy_5ZCaF`TZ96<;|_meb<Mtk5@;jzyBUq2LB@9
z^JDX4_PD=&zN8`K^80*VzU(cGG=2N=&u_~b*_>#f`(DiK#+>16qoG4-=whp|hYIn;
z$GP|CLCCD`GTg8;h-p$K5+#z$NQOaKFcI;PR}BU?r`N%uD0zi7b&J?`i&);ox44Z~
zyGdT*%|FApKfzaa53X+;v~C+TyJ>9jTrTreJ;Pghg++BcDX<}l(5T2R3N8hMsSZvH
z)s$HdtiLQi=*!ssfuU|}>5uG)$X<fmPUz`Z4GGc>n9G{{JS8QqSmAtsQ%xo>`X%nk
z7u@ukfG*H*pzi$BSyje3M@R_IS7j&;mK#RbLeFK%W66fGw&Td7xf_bYo7vgsArkc%
z&449XLm}e?P4QTD#X~OvZ;EE#ST8~Rp`tQNU6iL*(t`buVx$dw;m@#`RmAC2GKygB
z*6J8-Zkk3Z6E61gKf7A?g*ob2Uq79(ZUy1C0)f!}9())Ff6i<%yhAif#cK#EDU_){
zp8yx`&uCLLjghtHYTi%JRYZqclM7#*uMt&Nk!Qnl_y0>)DbQ|{OIGce!U;gk0iE_&
z$(A$^)n@LFPpF@(n<~4O)RYW)W;6Lxf={DYgF02{&77cN_2}=6MN}dbt+_aF;$DUX
z_Tp!VBQm&{p?R0%(JE=Dn;&J5Vr^Iu!?vxHbP5@AW$9xz?ME!5m+CZzhu&#ZYBn)V
zVX$)QD)Vps0}j1Y1p`j3U~wjf?m35c`jZv;x+Q1yjrSNjK>QLydPhTg|M##gw^uSn
z-#9_v8SG6A{Y`A@5TpD)iuOLLhOl>(xVJ3hvygrM0YmdOs;2Kh7jfKU_+P_X;$AtP
zV(>J7b2rY&Z^5!Vh(c^Xpfff1m}R#Cl>G0KbGSl7Mk>xQI(2zM+VtD-Ec>n{`bglE
z4SFfJN?p{Z4_(v;kSJ-7bh=t%{hS1Y7V8M_4eLpEBZNa&F<>#hSj%N;PsjKG+(S8d
z=J*9~50{=Ft6MR(+AObA6=TnLr{uC<(>l(OxdD74^OUXWol&~b3*_;lO0AdFNNFQB
z)Yo_5H1#<q7Rg9OA%`9bIF#W+GjOP#K?A>Ggl+5U<RinmK*dzD0*D<Y{gG9X^Nkgh
z;n`9Yl+mODe(<-9;4HK~RutK>lVK3>@`jA&b=RyR9cuZ~m!b&+(-#vPHP=pbP--JV
zStesvLO8fGBsfVCP)kwJ<yn86Ie#|RUEVlJe1_6*)ks387437F;3u;oL$8AEgi$y2
zp1=+zIpr$7(bz?_<t%EZAfP~)U}mz{+6~8iG(|K0ssRV>eC1*sv^grJs0Pt2^7r@v
ze&b>U8xaELc?uu3Kin>2T_0j$*OI=%=M=OkE5}CRDNLd}wy;O`tVfFZCyMb8Wn-Ac
zS<!5N5wOmbhwBo4<iDUE6$P|611XR2%jb9yg8R^ARXOKNm~=!biMP-^yW<gR-tC(1
zi4LQs!<dJhIWj=u?qMF)(D*vTDAev%(TWcP4k}#e=Gt5CJC<@<n}RxjiX0oyEV?vH
z2*`#I1f~u&xs?E;JcKt~Q;16hmd7mU?EX{|G$H?H*quxYM3LV$e;Tj8WGX*NA1TuM
zf!{b!38(vi!-^+iq^sF0g*o6*A>Hk`R*$5uSjKSksaOv7zbCJ+N~O)NS6+l$lZ;yo
zv4;_ActHl%G9CegeVOw5)!p=Tf^(e08`OUxCM`l}E`f3Eu5Zb#?1C4=u~ro5<81zc
zFPh)8qi?^(<Y@MdTH@4PTJJLr^ZMuA=2nu#XU(~0b>4VTI5VxpbuBmwe8$i+=s!@}
zjoqrg*}!pwWic%XZ4S*d>Kai=gpL&<CL1m$ZkWJRzlq7|h>pb-m1+NzwL4k#MQE9k
zqcRbrqW=|A*EjQQ9Ff_a#c#}k?|yx*&cxj~`m*W@)GR!aXT<SC8F`$*_wF<4ky_bx
zr;|Odd{%SFG9asg`Mx5+``UcvsqcybqySF4e6IqvTD_c_>+xt3Pdtwq7o%#!3EqAd
z(oQ&*lQzjjZqVljFlusA$$TmlY+<eGm9;N)<g_6X7@o<baf@fPD$AM%PrTP<8T&9a
zzQ8*Zl`Gzqo%j6#_P~&6izN2DyjZ%%@_p4wpGU$SxL9Npa2xz)HCW;BM>5kC353LE
ztX(@awPNVcA7X{+`vrnp?SE+d7NbUm&WIZfn4`pw^QdxkkW$Rsl&Us=y*=R3NyOJt
zE^RYBCsJK;+XXQn9qofw%ktd8dG?hTAEtK3yG=QZtn1pz94u`VFE7D2bEQL~4Tpc%
z%Buo9a#o;lX<~BnG!7C@1t`n*@TSsLxZs>f1)5Qf7`YPIH)fElBvPOTSB!psiEMAK
zf7GP|{BYdk>{)dc>Q`yI0CH+ka?7NmJjYy`ugV~0C^C!BT(A@+u*KPvcJ04GpT7Sv
zWnLoK4TqH`dq9c-b1D$YI}#DS*(WbRE0(d7enYox76AV<WC?1fR-i)voxpDA)6<q6
z8!)7@RD5$_!EDAsXoO(<Ykn@HK9vvsq&>To<k@vvRZUHFE??$uCN(X~dvu21<H8uL
zc2|#bb#lmkixwg-jR|Ox9wP~K%;sRuJeO87LiDst%X<IJf{lxC1?RiED|HsuuB&(J
z#deB%NLW$jQ5H>TgJsdSOyjjJLLxEyxlXl2i9nmKIfA5Wkzj)|x|Ufi#dqVx40W_I
zw{lXJxXwGrf~w_?)ysVcKw2eTmQl}p<SAJtYRUYla-1}(_9sJLlqb)VmlphK(}}e3
z(jw{V80s@^T+c8kIg@AejUgv4+b`vwRcD?6`2F*Z1@Zx73yNcoCJ-A6IlOoO(yYjt
z5PXaukN~^YKJYk0E*9qM+oPv+5=!QVYEx0N^V3LuX8+82R4GQug)RFZqs6kUt~NQp
zEU&iGqxF#So3D?u)RY~}s50<XtYL?+apfK@S`{f-2{<TcZWZsCll*O05)Ad#=-B=J
zui5MdQX`+#H%p+dfgr}N1i8Y4@nVnM;mD;2cX-%S6kh4WdQtWUGw0H11__)R+!iH~
z;Siw>9K;_FtAADMDxd?|fSj`mE9PDOAsqB&ZP)`B-04uIy-QOgx|42Gx8DNQfd?~r
zz25D){^0xgE7<JeR%|_()dj8WuBsw!BCM6A2y%DTneZk^wSdnVfDS<)wY=LJ%s<9f
z!N^qzd8ks>UkdD-8QZW6T&gZ|Z?5z0m79N^$jAfKXr%eRuHV1&iHhoWXS)yq!cy{x
z9mE6th{@G2*@czQOvnPiql4do2j_>ap8|wa$Hp^*6+&owZK^%c>$8g*!0YghTz_u>
zTfl3_rRh>T^o{ME-yQXD!%L#=dQKkS%7vxUrd();{$a$}1FG}1E(O91LyuO?X`?{k
z*jh~yS~;k}_2+C-s6JqXSo~5*^HA`w9noJUaO~3$cAJG>Cu@#MomF?fPPon3>LjIU
z8%uGP#Ra1Q-7)*I+`8}W(U)OxO8}r5%nzk})^2U{11|n2D%nRW>8Pw*Mju_%r&G%9
z@=h_*OPmIyjM?N%do+CNGmqHFAuP~IXjAt!NKI1g`mk<3)8g@Vx+nA!Cl;D~zj4DG
zo7TO)yHIpay5+s)zFpfjtIN(=0H`fXJqCelT|vL9*Wi#e>>QztV6=z?tu4l)5|m)A
z1X_?LcZOlA(h~x)&X`D0Os$8a(Iz$Gu(BA#mOE*=IO210Rwv<%hoVs@CF9mc;SB$K
zd~GyFatrm@)lG4vqOnfT?C8U=b6M-hjW{(6O~(SqwqY=(XaH_G-`xrg&9*}8aWr+I
zx*As9prt86x)5th#wLqg3uj3en?+`mIL2KYJ0U{+Qm)9MFw=7@T-<t}#CAa9AerN-
zgD7TH)F>ows`NG5)BU}RLXu}BCkHuk#&%ZDSt&v%CJ;>_7)A^;GMsY!3ES8sdXc%<
za6%`a7N^+cn&e=UVet<_jZk=q7RSd!`kv<S0n-!gk>Xj=8ky1S54G<8wE#lq$@_dm
zNj=PS-Dq71o?K`7&ammaO5*Gu<w5IghUZ~o?S3IgmHD^Yn0NAK!67N49_z6_^V0&?
z&SWVaXEQqLtsG;b*`3&cu*hq7!uw<19V_0)XMDt04i!)22F9zl_>{Vf6lUT)5%#=1
z&Ne%bNm0=T^~2p`|ANF$Zj5B2oyoxujxD^oH&e-sWS{IV`N(Sw$AUjm2gL_D-fj(I
z0Fh*=6M`GQqbJ~Te+4mMZ`QzySa)#sz=M8!q6z_Gzi7;jOlu4`_|nIT2>VNOBr@R7
zg$5I1Vq6MTFd6s0MrH=6Mkog}q&ZF0?&*!LVjPCmZB0MAz}^gqnSh7Sz77G#g^?b?
z%HO#OBL}HzGKpgv){I}x(2=!>a|*Iq`_fU>nwp3vx{5a)x7=G*@U}}7Vs{fM$;%Mh
zO|kan9v8n$H%-4RDKa|wJ`8KJC&u7{oH!NtI{J<v#RRWsaBGYJ07qOm@w990gW)ef
zQOL6&iIxC-k$o?qcLxC2`*B>VrDnHTQ%;-7Mabjzd;S<asrB=@9|iiqpwab(U2R^D
zdrG-xptsMv`RkAJ!G4u-Z1ZUE_V8&Cf`UTC@*`B_X1UsN?9QhaLsTdqSFsL)>%YPW
zz&*oQwkL5WOAM}_D)IFQ&QV(G$)Z<rqUk+=P<CHuxbF<F7x&jk4Dn9-1cPk}^ml{V
zPDYX_Q{Ah^jlY;X13AByq6hkZ9}WW5rs5q(DnhnE%-@9}NnQuv*?T)C^U=Xl)};nv
zBNA4>>0ASAFH6>3=o)@~gqh`Dt=l39JnYTqMGamF)^>oWu#g<b4@P+xw_80=5E=II
zaj)d<+1Z5~`$jZM2d^fWGJ=E-T|tr?vc;e887=k2GzgnW?9JAnH}HI!Qg4LjmAfL&
zx`l){@d5cEdT-=f<nOF7nx+g=UX6cURprrm^edf$z>)$gr2qbv{=cdOiI@LTC4gqi
zApU}pz3-EFTmm^9Nc>>CQ4K@jbXnOI<|X9r#*ImAmJ~57hQy#8(2po2{;ks^PbGFz
zIfUC3#DcCX%0~ZBULBpvnspCcfu4qXvGJt#Uq=+2#rSjJUN14GzRcZ@+V6oUQTB^Q
zVjay~e_^>&DBV6A{^E;Z3F<Rd7WJ`^B`a4`*=l=N{EH}=rp)tK>ZP;$7NI9PT9E2}
zWBBprIwcsM<v>Y6O)KEahbvk6=dAs5HZe_tiz|g?CY&Etcc5wa9_j|1n)m=(m8$rL
zEQ{(i`M7Rk?YiTdw@-DdL60fj0>L#M3<=c3fT>RRMnf3;!El{%YVce_#Kj7ewf;(^
zaWT)iw57oPHMga^R@haBfyAyUdWFI-=+a^HGrPdf3>wGF7keJ!D<OF^*YK<*$9`c~
z{~;4e8Vwy<BGd#NBU~yvDY|<&S!$cFtSk*FP*Ad+X11DL!YX-act;XC?tm<-`Cy*)
z>v2@;P1I++n5U&IVn^1RPAUbx*jT~x^5JiwoX0{Zo)=hKf6&>%C%aL^Fss%ey`xl{
zp)C6*PKyuSkHVp)-=OXi6vE~9yOzJjz-(OGY(5cZ2V!f+9Gf+2Y^=<2QHjl_(W5L{
z22ysY)x0R4+or`qs60`i^j$51Ysb{KE>#KWOOsjlQjqsL>IRs*vsGo^9hI4NXAV3j
z?7nm+N`DOKvtLWG?a`hWz1j7DMg94OQ>8D14O}y-#VqYuO-QGQV*#qixPpLcIr|Zc
zJunoteGy&}3(4kdZ1QhQ!5~J0YzU+@evH~M93DB4Gg_e33ZJYv^_G#O_|0GRGqH$a
zIb%+%aUw%sbm9H?7t^(6`M1VV#v93olixoo0%$ansVF}W60#>+5JEBfLL+?;M+SNm
zDy3~9W>h13)Sy90pW1z=XE>c&nGo#8%4BCNHa=0`4g{*~2N)R}bQD1cS_Y17kFIA)
zptBG~Om{;Ls)_TI8m`<mL$HkAeXbC&S)ed3ND+?S>>M&0e0xDKvq|QF`mAj$J0f8X
zBLuPSGv%8XfGFkd?&&risCJrJhjn^D^)y#(EY0tA>|z4fzHK^kN?V&GiF$jb8B0^O
z-^BZ&<<q$l;K}QSLKIe%oz-d>>S%bE#D5>lG)NoV&?ZD@OV^McRqW+z83#?d#YCz?
z2Ldvwqv7==&RU1B9hphFJWvCKb3RNJQn72Y`KtxrWm!&-9>x9TWHz$}Qm8INdFM$^
zP;?B^={NS5w00YLF+!_gq;ST`HfZ>2kGW33=#5VsNx`=id-(FgtG4K-V)nh4{*LWf
zj&Ev>cDNV&tRrXTwUA=%RAT{m4-jU4H9A^~i?uECgpK3UnSfb)RrsT(gp4UGfBxOn
zGe^OwskDIJGYR?a-LhOv5j=8T@dib-2}c&TNBR<yu1C;62>WvF;g?r(q1jd7>YZ)f
z=7nna4{f;@RB$~1`ko+DZLjhNi|w_d^fz2KWG0X=0g!sn{^r%CKyQ_#{3vpyS~*=Z
ztZB~jlx0Ts2_L4{H1KHhVz5ThsBJ<z<JuXAyx1YtgxoBM%wj9wjzt^OTtmaeYN`#E
z8k5Fvifp?|SUQWKF7!6hV`WGIt83%hJEXRs)KvA@I>g4<nwTgg=+CllLe_5pYnivm
z^6R@(c5kj!*X?02$QXIcX4p$hi}8W<2ok&1-B<ekg^vr#Kfye#Qkvx?V}#b69?exb
z0FK%Gui^t?s9n3oe|;3l)beL(hw+6Bz`b>eR1Ce(v7-;H$ABSo<317|w)hBoW?q4k
z{WbF~k*T`hz__y5Tk=5DSlirHlBL;%*FdXSt4OQeWuf+Q?dYvp94UEk3(rmFPXf6M
z)iz4WfmN1Uz26?bZ!oM1fp*zsSBr7x&ZDR#B0VOcD;0UhPa3i}dntEp<nv#%s9WP|
z+cG?{pVT8kJKNT{iFrb>sdCy|$vEGKbz)yIDK;V5niWr*J3`mWse-F=s%xDVzlc6>
z{c<zQu3ckQ9{ij(%q%zXbwgW_fqUSIvM3FG+lu>>sUC&UT*}qiM=RTyo>s8Uda~=f
zkUdC;T5N8Y`BM5e!ZFo;d!arTLUnANHK9@4;ZJ|?^G+W3w)vAYw#|)G{h-b^ecFLB
zt+40%M~Aa<#{46Mr7|uC#_Ty`^Up=sR13%UR!b{CfYgFx^)+_=>#OHwTkqzjd&tl;
z%f{}q;YxO$XY+-AS_fhx=BTDslBlzFOZ(bO0)DyEFR7*b3;<nem1@{Lw#{C#D;VRj
z3k|=C3GoP)@*dc73Pb&564m{U{NJgx_rTUu82TqLyEzP<bC~TyhT=tx1~Glfat7n5
zo_HmF-(P+Sr(Z*oCk))8zbn{lVjeksWo=DxRwS(HH^+9-tFjd9Uw$~PC$e_}2Fo|w
z5HXo=`8JFH;K*Wa&2jYE^psD+UAgMvdnm_z=AL~i&W7EKbYnO&g{C<VVF@!fxM@I^
zd1|gS`_VgjY9IX?3qdbVxh=C-k?EVz=&Oc8S_jCo*QE&NQhh`-`3vbj!dZPpwV#o!
z-osljNtPdx?A9cz&PcW!;zSoD8cYZxO5=<t_>pA^#!`KLwM(SDg}3C!l`#{{Ek7Ws
z9+Rwl@)wSKi)znEsAR;MU%Z7=KOoug5h$AQ7gnE;R7^@VJ$Z}%e@-HMST(NCWKgfQ
z+N<Dg0%}z{zQr7+@(Su`aLwt7F>72sInEjE+Yfse3NfJt47idnc<gz4u!$wR0r?8`
zbfBEZHmydsE@7mFoP0^a)O+u#YX&()cR>TPY6oJWdoR#+7EhdY*fY{(>V(I6S8<vf
zYwUIDO@0kI19b%~?3V-<xp!_RtxVu8L*fy6%BFz|9Lm~Fz**#42C~37%GT|tKJ_?X
z#j17dVg5CAcGd!Npv%viCl-o1%TfsY1KS>W=sjaqpyaYy#ElSX$X){IWsS3v)P
zkrBbToXdy<s=nU-O1_akT)b|95X=23z@RP<L)ESgQ(U|X4FVGTh4AW>YYEnjBSb;C
zRVRS?IGVo_=efmLq1G4Tdr%goP`hyz_Uoy#&vL{e4E8-877<^R^!!N?O~+&;KS)t<
z!2i)4Zxey7i3Oc0AX|SrQI#88Zb7p{Gm447$BQFJe%_<w?NFV@4M%mG25_P9T~_g#
zb?Q^R@NrEous|)4IURR-re?C9Kj1v-=$NJ;-myxmOiau-Y{du($g#Fw{_~n3cSEu;
z(i=_Cu4m7Pu3J=}FMBZwMz@Ru`z~b0Y+gTRJ}Yn9C?rr38g!Fz&)|m!b@2z;wE3G_
zA?(4hisHQ%#D$cBUST<4#pT|P+8`{QXd$!KhRrjfUpM$S1F=CbPaRG@5a%4ihoT^y
z$&Brwd79+x&2u3>QTHKZLsnhBNJqD?9b#ycoN=9g`&EoE>MoFkk4*odpl7XTC(Gxl
zwMQsH+&GyZTch}39Q`xo?e%~yoIr~QgszP`+Mqt{pgtU<x7CLPMa`v`8x78AY5Jx2
zOND9Eg|+w@CzB1W#cNG<`E(ta$sl?e!}D9np_h(+6Kg4!1J63$s^Tb8!VmNECZS^}
zt-mdES;u)mEis^>puJ*Gm%cRbeA!SX*~|38t6I9bz_*^AmTu+Vu!@2H^ixl-#?q~@
zuQC(&<-yUVds*GuVeWUICq=-i?l0W(WxW#C%STv#kn_hUc#!jQ7ZpdS<|sCp$@~Sl
zL15jY{`Xq_))rDw99sHn5r`n6)hqbZ@n-GDD%^PZf*KhP=-<_R@Pm|>FYjM^7%o@O
z>D-h=x<yJ(1h634xZr$)$Vvk*Jnn)Y;4be#0&V;ke7)p98_{xBwNBW>&4TAkOY`<a
z)q}nRl1zJcrSn=EfgQ>tHH;y}Mn{xbwM`NVffti8)g`tZJ^wmCvo4u{`cx^Wc|_QS
z08!A&iS<H#j`na%2c-2$<x)#wqe1<OT3QL}qjXJ0SKP{?zrk#0GtSP|e2*I}mgr9Z
zA&_3N-67T5NEL3!?X~RK1M*f8{GYA;r`ieDHQ2nNnb&hwmBR!0$+yOd*N2SfX1G^p
zz`NmZ&8xf2MS7HIdqlA>{%Z-vS_G4By|kK;598T5g+-V;I0P_)N2JShSEMfpH<C!h
z*#dE{K*iH}KYNTSoqxHuZiQ6;y3GY|2_tX)k;AZZgz_E-_;t$<zJUt36N#m~2@r$>
zK~7;+opT_K-Z`2tg#7$Le}%5+6azg`38T976NO6>)DPG$!tH(O&tE7v3H63nlx>vo
z6;6E87j(mb-@@`8Ar=6F`F{G%1|A|lZIu`5;6J_to2X$tb+$1~yg&MF>COJ-tt_(W
z`#v)i9qE+lq*NfBhY^lUGK9v7&c4=HL^O<yf;SHyB|P;Dh%k3_H<YlWpyy^MX!ec$
z;agn)&gP<UvExv?@a+=TU80*`F9}RSX{C~HagHyGySwAzT8Qyb?}XzZZFnc}aBHop
zwKZ?MvG|w6`V!{LYgX2-8*CG~j{`bySYlVkWv!ZLRDK=F2vgBzvro3*qVy6CQ@Qvo
zHMP_mhn&`3AIIY~Bkl#bJb9cEuLv@|ojS*3y<ypZR?dYbTof?~hYApmh9X|<hE`Zb
zD3>>QnQH?TDf<xqffeuzO7W!4faOAj?Xe*x@Y)j4p6oFdC`e$-i-h<f#Lt0T%2RG!
z91!LPeCZU4u7J+L7OSxZ!#b`eN%{?HB_;|fEd^1YM5*}08T)>|Z#^)6OBU*(iZ6~t
zm<wE(L-%9Blx_U(*cFCLe>ji<z0L<VfpI}R1B5N^T$t6^Au9Yu01kUcqG>`F=3cBX
zsw)&oro66e7!&d7781?MX9;tS7H#%+CDMIi7>^ZNEV7wPfVT)zf$jC?dZ(V&u3j+(
zes_PmD(~KT!h6*%>OL@W>k`*rd%*UwTA|UD^{vVO^P!~-h0u8&a?T&T%9jgU>`WkV
zi7X()_4E`NW?@wHl@j<9b_EFHULm^`h~oP4B-vzHoH=oqM6{3+$Aqb?-*1!m8DQd=
zz32xNztSC~%g$24tKa-}t7o((_0R(8sMm6_$X)%pocui){s=1Vtzk_F1%34Tj-d~$
zSI7j?4|nUeN@JYcd(OKuY?8c<CBTBqem_Jx>a6ta`_+U;<;!7^$VyZaY(_pW^)8&_
zr$Er7+#>|kDfMg-a3J8Yh?u#Up3<CSzEJrngB2o?*ahl+M>f_agfH`Oc(rd%9O=>b
zTD2R4pORqa59SXV&aPrn@Q|1|7&TR`Va)BLvo-58Puoh!zDb8(L-VQh7@yXRvptLb
z*@zQG$(9M+3Hr4`Op*;C8(oK@=NrgJc?Rc;p3UF6NVJvwYr9+m3DLH40|A*tA3^9{
z7!4t;NEDq2Td2nX8yK(p^UMU<rv0LXL^_LT02boo6-3oV5Jqom+s!jaga-kspo>f0
z1;k(H>(k>fJ{btA`a94$;R8Xco<2J;oGKsh*`hvm>d=XFfakjiO4~20s_^{7%(q1}
z#GQ+E^%8UDgjA3mKop^ssGSH(A)3^9lgK#i?$0NfGxb8ui-Mvj4kBEM-yifDzvPF4
z$tn340$lG<8C;QTC)Z|KjO+qJ?Y~8s-%XTU`=#d?US90Xqmt^0`Qp`7q>JmCBqB=K
z!5I!hZ4PQ_?YZYKNGsLKZQU)`E4>!E*Mo+yK=fB~@9Ig;pLjgNwpBt5-UZl#5LAJ9
zd>ucsqlp)Ku#OsB*V4ye{86DV(1Flfj6;tvMALXSq&*m-L(`otz-1m~lITqF4Hwp4
z@V!4@o`UL%J;w?e_<ENS+7r`OPXbhQ0J1HRS_2$!GwPi$cmO};9se2_Jv)wM7!g)9
zs)Mp{^z=liHZV9=Fb@O{GDipzAx;F-HhV$17qLRAV~f5J+f?xP6?zRQP_sW#F?Jb;
zU=T2wYyKpM3+;f=D$D&$+en-}Q^>CkM0rhrysw!A+9(grs??mjhQ_KY=kicrFNPkU
z_L>g|hmym~Oxww!SbH;$LsQ*J#((O4_QQ<XJ{%q<$ypxv8ip{W4#Pb(gj^GxZFI;-
zjLbX&^|_+sN%U(muHw?qKY#RQA+YO}xMHe3C+hrVi~kgk!V-sW%P%^s39QDaCx4@-
zkW9xBAYt34L)JgYIZS)p?~Wzyon7eb^KtvIiTe8D{>Z$Pm_81jh-3bYv1X7xOoch;
zz^<@V^kA!$f$G+vPq^UCQe)}I?&^Z#5kAAn6I!b=fp(|yF>DkoJzwV3Am2U3(dE~7
zGf8(>ue0#gcEg44)8t=c-lqBtYd>dLui;a*zD)EgR5RknrY=mdea2C1oJ%UsTtkU@
zoWxQTA+N9L7qwu_{BSn+=#IC;9*k3F1xoX}+<E1O5z$yXd~gb4Uj$HY5~;(dJeUv4
z_M0)h>@XG_m|xV!g4ME>VOllae+o_9q5fddH21l+eB3By$;#1Yd2Jr2?zQ<$6{pCt
zHzNBn`&@S3E?i;jv-npp*eY8P+rE=$cj8fN%sJ$hav7=x`qQ1`56=W5iA52itv?p3
zvrW)Cy5>3}Dh_2VdOEqXD9dQA;tkNVDW<CVG%;4U)Ggd9huE;;&{xdZ&|MP>sQq}u
zKOMVMys5h_;ZVAaAR3^NAk0qn1-y6rIqb0G=>Kq&6*<eQ0zIIXWq4)XCXZXGAI4r{
zu^r2U6443rNR~n05ayEEQ>@{tj@4pqh2o9Uh{Af06m3NF6?uM(`jY8aqe8*?;x$<p
z!h5CG4I=r$->id>K01m{3+mpn8%DKx<qPxds{M8lv@PnoP!|TLE568Ay#ypqj+_O*
zp97iz5?>U+6esaN-htBZLH3JyF&{}hX%A0Fj_n$AVvBfxu5#uFOuIEF%V2oq_+7;(
zN}AzTp)NxA1R`YX@BIex&%aTKg+v1p3lIUy{)32OfI~3-AmApFyYYvb=)aroH-`ij
z%lbKgt?Y!mRpP$guxi=24F_`c&z-y$9<SbxML+=0a<)6q4V*g`o-KtJ%leeyrQZu4
zAS=ax?NGF~3x;lwjO#y}3|+pGQo}$|J$lI>#5(TeIH0QQ@2_*}cXHSOTOOSnU0*M)
zjhrDId!{2G2t{|!4()|KA7h5u?!9zsEctFe9?!8gkNpJ@0=`LArdIg`zP!4hZc;1D
z3IMRBUuUCUiLtv}G&epYgE{#Je7XLA<t!#Ey4?4D@})!H3=d)!o=j76b?32wDEW)C
zoS;eEm!7}t+YV_0SZ3Lsg#^35W88YyXm7ULSq@~~&?{TrD5rUvntAXvo+l+1q@})h
zEFeyD9(qs6UffS_>y?<J(nh54diY$z_(olHsQBLw5Q497t7)_@9CN03(B0Z%q2q*-
zcbJM7{)liT+ZoR;huvk$JG6uetymufOAR0F5b)uA|HnVz%<hL=>8|pk$F_pWZ#t7L
z#@vn0K(~FJKa_oG=4^jJ<>frtdBevm3*gb*(Y94n36c+;9k^2j_ZnbL2sW7zlWK>~
z`5Ln<zbz7D&d9El#d}`K>&Iz{OTi$d?2`fx4OE~a)I>|Md`{>tMQ%ypZ!g&e3OgLt
zvZ>XoDD7Na@y)<QG$w#XuPZ>E<+{E5KneaEeeJavSz>o$9p;Y`3@e-;y@bG>cyq9n
zY}r&RklbNlHMu+k@;HIUX*Ey}dOSe2_8!`>{J@z^$u+&WJ%wy?fbMy1@%~wsZ<xEN
zJ4Qn1-|$rigsPGS{2NI;>@RP9Q#iFCw~QFo{619xYMpgsTYDy;S7VtQ@bvA60a1Ji
zNZQ^80GPf2_BQ~GPXOJQyVM%l)zbl2_Wr=sKemPF0sJz%XT#?oGNUBMLvvfWF00_{
zEBa20RYaQZ+gSxZh5+T>NcpGTHEUE@fr)f{*}Ff_G#2iBe*wtmu_K#$<{z`>%&3wX
z298U%^D6(m00I$IQjLT|AKqrSW;*-Lz>S<s4v3R*TaSC#ycDITY_d;-Lu3g-$>Fg~
zH=)m6-u@xx;^aFup3fu(LM`S<!Z<=iyRb&?dm+=SHd=C&bKJg#$$Ktg8u$AIYDZ}L
zQ-zT$#O7`(CQCf2uEa+w1?o&2>0r7M`lFSXH3LurYso;ByQ<)l_H1Qckt^A|WqbCj
zc^l%Z(PIYH3X>Z=X@rpGXYVr_mWdS}o-WG(7iI%_I!aPYkpMPzS<estmaTj7TYa$M
z#fnKX63pc7g50A{o;FNm18=&1NwD?U$o$#MG@Ya`Y&-E2=eg%cN6UHRVzneBuI_~F
znb4aywWS1X)i7vKyIH3;#zQc|DzJg-+7hTOP1Q-qmYDTl`+ulZYy+q%)*dh%I_^&y
zUT+dAtUQol1_EF9-#A{Q;zC}pi*%ZtAIb}7D2tXbOJpidXz_oPn_jQ0^0L^lE~toD
z-TP5UQvxP4n>0!@aBX@sIVJ3362*lgmq(haD+P|Fs3Nj5F2=PAC@W>Zl@yaGSCT-)
zv~&7|4VHUjo?XC1iat#^!{?c%@}=0Z#8VDRCf2=D&|`_lgAFmj2_@`}7cbh9yGVlC
zcNsC53%<oBQpI48zdBpognmzTjKMT%{yWFm%@93@&yXhXaO6F~5%r!ByZFwt^ZBMI
z6p=`)kv4jG&-?Q(Z}Ao9#n+?{xLE-RY`*7d@vKYNQ%!<k<o>^2U%<PETtj!K2wN*Z
zWj~%fR^`TDAGdRM2tPGIF~G;=<J|3CV_4ibX3zUbl!QIK5{Qp1u&>`;=7{+{gOMF)
z@NQmG5)m8gV1e6(TKnN^&k9T-p~5qZ2JNtML`TkpQTv<~50ENb?1!FYfr4$A5<@7|
zj<?84i41Q)QcpAyHX4Ye#DsMFEvLtw)&SE*>KCf5)191PKTl~-$PrD8cSxVZ4ShbQ
z(gu8`5Hy0{KDxFyf{!oG5wYe}(ivh*82_}N;p>_jG$ez$*#u(=z86!pvjQg<wVxqE
z@SdC<F*_#eqtJ!|0zdJ0!b+8ju|^R5@{-Yxr}L1$b;2#Wtj+yQshWw2PiI5TyyF>7
zA{+tnm)YE{!7S1Hx}^8@nbpM19I4-&BwBUY7-{LYfdkx;8I4LiCay^ubEpJ1trl>#
zA#ehZp`5;rO;u%#m`#@XAX7{g@PN(+e&G}60^xV}ra66mbGUsdWERq1Jq`pXCld*=
z5fk!+>U(bHzfL(LW?o0b@7uD_iNW)fs_Z=K1sR1`jic~(w&qT$B(QoqNecZvEi^_<
z%mRNu2I}3NYUnmkX&MUU$|NhAT~jn2HaFi8OUj#BxDwYE6Ugb27~+12{bw=~A-{yE
z8pORK=H2AEPa>$&-y1SUzFZYelXhswOY35&Da$dfu+1EU*`5)a41>qr|K9Z)J}yyQ
zx7u1pG|rgN`QU52M}nW)cDhE2*NseTjYl$E62Ht^!W#>w=;ZM@!Uy1p+vavMalF;{
z0P3X;;*V>1++v$gGK?yHvNVVyAr(YZHT{{gciI@rI<*zpS|^^W$JTWhDcRehokRv0
zrc=+Gc)x{s`ctcU;5h#R+sT5eng4kQ;UEh-G>)$xBZS5hO(dc!f8akKnmvqPoY=T6
zYyaaw5SW6W+?8xdw2YSOjW}5HY%o>rIwa;HhVgZ0?t~@3H+>sf0d}jv4s%vZ+VO8#
zxDt&S=YcrVY;;=Z*JuC*|82MO-N9}86r)E&e2SD0C`@QoJWb?=C9r7*Ryr1?Z+`sS
z7&RMgnE;ci*|KxsyG4<4*GJDQTaJZ`2G7a@Ra;<tTXtO74+FsN#Gb-)rKpIg7Pw_&
zI><)_TDl5F(xG5|R+mZW)dLzPTW|R696u_#1i+nIPjYEZCZLH#w2U!kPX({>I!7KP
zhz(g&;Y-yzx*qTv@!+2EYB2j;xSNlGN;uSC^-K1|XuOeaBZre&xE_&95EI(CBq2X3
zn{DiS3poH5#3gIRmizTHyNuI(4J+Xzy`bLHW}V;K^6&2eHq&Fo6s{oGh_&&$a1k~K
zbKU(NdQ9<;S3ThA9IzBCTbV)qG>feOu~e>w_1DS<IJ+VC`{q?~aMu>3e)iRpkB(>(
zvf=V|Cl9J+T5>yZ1?7lv5dIiz|5Z3d&q|<%5jjK{4#Auh&fLGHz`mx>;3j*@O<3Sj
zQFU$5+N(#YUH5CtP#f@jnzFpz`cBg0VQ0Ytu$Ba6;UH-&*ZJS^+FeF-J%2VBB|oP=
z===`pLkzRn;>_1@^KmvTc4jX^x0c%_1)c>fx98|Ru~WUqSCqYBD57!`wji0;IsGTq
z{@rS7ps8x61jur&vL2kOfK8zE2Xu~Up6WJrQ{BR)zsh~`j#qfUmAvkZ=SxQ0%=J4a
zlI2J@qUa&ic#t0@l0w^&=)r2+&{gW}*)48e=g;d_{?elatp=e^Je@|mX?ZGFd6$p9
zV261PwHYU7*4s{6ruFl8K+daXQM3{owQ!WAYIm71(hoizyDyvseOY)jQ9`qHb6AdJ
zvEZCGzWs`G`eO8w4P8^UG-ngS*|<%8&DT&GVei!)J9y<JvM%b1X8J(gvex`8<gT*o
z?H=@u`L4(;MRlcpP$?-(EKkl;!&S93C_#6v0{cj5K=%TlRG``U8!Q@dTK0;A;k#lY
zaAX!)829|IR0CIl`<Q*ovuMh0eA6$vBx_RAJTVWI#S<0R{WP~=(S#<)Y)UloPh)yV
zet+U|Wxir!AUcL!ulkBcpl8M>N-G)o$dlUBt6UNxD~^eBqQVu|s(gF$$M4s-X0-^f
zyUl)($73dk-j*#xn~U7pc30YsdEZgKnPoI39uASGr0Ut1pl6h7fw-OlwZ1qSg_YKV
zyPR?5#;Y*9o}Bup)S$D>d{i>02CvluM)CkZnx5-gP+y4`K<I^hk%bR<e&(kDW{4se
zfa3h=`32zn@54`*(kC-J?x$`##0s_q^72MGF28+d*zkr}i+nR?3JcklcIWJI-K}BG
zJ~j^ia(GXChwm4DauKx%4)MX{Jms>|Ir8g!M*_srZ{Zf211<x(ZQW&X!7rQl?a0y%
z)Ty#qLA6WBQA%2r@|5m1lW*}s^ZGruENgG8^W_B~SqfwKBT+B=HJR$HA(uMQxt|l>
zOK(B4IJcWpevmLQfpE(X=8aK8dm0+MqkD5R>ABr`LPh6lDiV3beAXq)p0Ra#$O03&
z@Mi2_$J*_MalX|JBXF~QROfaB@@~{CHg|u*bbN4zlrC3kag4jyJ%uy~e#o7n)L-i4
zUVZ6Dy{RPmvJ%?)Wr;)W^g{kM(ga`@7=gbBgb1}J0%|v4UY`B@0DgIZnRGyHGQas}
z1a^os>mfyQ>aHwV2zoK%`I*zrPbr=ZaaJH{i8nX0Y>J3Y{w2^zKw@zpczdco9~r#?
z4P9qop2cWvkGmE<%iVLl4NM(xEI_$gAc$jmnmYb8rdb4<)ORrAyAW&T_d64om3RsQ
zHI9&r%u7%PLTPLc%cxRG4#AMj9-|`vl#f20&7SKW2i4bg`N>Gg{{g5#SHA%QI&}RI
zuz61efwb*5s|1PrR+owKQUs?HWmDO7CxQZD0BSX)0<~FHO2#Cft`h@SSMmH}_}|HD
zp_5VCd-jl~aUA*Q;uxY<9XPb<XBh>xkY)LT$K8BhT{KbQmy$_4pj@k=eB6-qv4-|9
z<{h;xTb(KKR#E92Jabip7(m3VFnm=;ckj*b{eLE#)Ct^ta`F8g{D0IVy|VuwM0oH2
z@8AE~`~OvA%ch=~ho)@EYsRS~ZKCA28wk?0LtvkOZ<zoH3s|H7h18i8WAP`}y9u1~
z+#cyV@f&;vQ!3lzb~%7>yE4r-SVK2!s%ZLMQ^SQ3CIip-A$qT|yTyU#=W!+|4O880
z^|t!|<8tMy{%iC<4hSymf86bM_xgXo|FhTsjdC7VG%nnz)>sI1?7dG$$nR?OJ6-W*
zkIDcPW>An%9BQykcwr|v^7yxaC*5O_eGGslyX)rw>^D*XFe``vf&pNj)Gr4h44|6<
z5S|n;2MAdWk^zL0CJ7L+0-OP`-_T+fr|&`Z4f(VCChtE)dY8Gx7(jdmEOYxq2}K7D
zb=H5aHayvOjy0^qs_>WL4;lVY!5?z`nBu>TS4p{rsA&rkYavovh-3?+%d^BUZvn~_
z($IH!)Bw6U@k9A+upSU)fPFh4&Hzzlha@><*A5wGkRTT}$SCg*=8y)^kaaz~uA&EF
z?>N{&pt~3XeGb@vERfJ$!Xb$r61p35$j}Z6-HkY8l-;J$fkyS+jq1A_mAhNl>!{r8
zy3R-C&e!oED)XSg3S~BXx7n09vQCANhiqB-#OFevLn1pQunR>T6x%_8Q7{hxl-NOm
zMVN5V&<+aB2{8v1Y|**ySQ?UC4WSw}gsJkG#T&$w4`LasNJ*37Iur#>Mv-BL3}?t_
zUDkxi;3^D}C{j(2C{m3LD?&2otXhN=gCdHsYC;$xL6#L(!eQCwAu5!yx`$XPlNn`6
zp^VoJ3uU~nY`C%#qfjK<);04E;KB?JDp9P^vkqKal!YNL#IjH)Gps@Ax=mLIAiK5@
zKvG{=fDmhr)BzzeJ*_}k=mK;1LYMA^F5L@Vx)-{1FLdc%SnQr|7L&2*RS_oU`5{8-
zdr6eAdUD8TGk@knCQ#M|4CC5(VY1cB7mRWd?t35TI8r}h!zZBkc22M+wp+&{qOeGr
zMO1adf9e?*7VFCim#6pAHyahJMRM|{^Y_VW4Y5Ph#arKD;JQUbT{+>Giw{1F53Zx6
zS47a0l#D*4>*Q(t(P!ido8L)3E?ubV7m@ZMhWilc5*RE*l`sv8m<E!+w_OpSpojql
z62+l23RFUYB#Ju{#V_7BoG*IKC@<Ntc>({ng^DvtR%?g>s;LuYb*Q*fpPqL94zyHL
zt(#TjemhigY_XSBrGhl^eUX^%W6Ng=ZWWrO%+?XpFJe4wK+K@TWS2Tr9@R>RZk14>
z(w$1B8s(?69P(2BdOHJBG8=W;^=6xu8uoF!lW9ho%^)8H)&I&AXRkwYjd|k~1kNPN
zMF5F?+Oe*-t|pIXi^VjF=S69hsSD}CMr>_~sNUtL;ha-(mb^{p{^x-0lJGaSL+UcY
zi{RZhR228J_#nW>BW=z5zyG(Bw*pPGX5bIR7mt*YP&ZFq(V}(OhvVXhP#YY;FtN}g
z5$s_CftVs0h+%@k4lu+4CUk&_-l*{@l3llM!6j+f)r4)zQM_h5%8O+JnLZdl4BW(=
zn;odazCe)kty?+{n;D52Bkh!9#z`x)F=J#62&o4N9f*k(7|utCJ5Q(lxdagS^<9O(
z3hR!&_-&j*kUPJ%#rD2A$5$R)*pFA~dNGHM&{aA~leM=wk2kUn_8L1RqJH&h$#(zO
z3EN3$@gUywrA$)|c_aZbw8*r`Yk0wp1-<{u9&_UepD)*@ktK$aW&H=nC_vSrRv@)8
za+h-mU*FrNhPEkRKd`6!9(`P{{9ExlomcIY0zk*)d~&mm6zc<XFsb*WaI58PxL7BB
zL)9(p70jR^2tU<`MKR^mWNRrMFD}pEG)lWbt-J}!R(s{wJ`4T`%ukN<*l*pewTUYB
z{|4Om?!qpO>2h}7EqtT9#bsq!AW@NK<9nSI8ROS@OiEB*@~IKAD=A9x*0Z82Wvd!N
zbP!H2zX0xMmz46ZsE>$8)TQhQ4ghi{F4+9S_E#J0?EPzz#UdLAA<4%c-+B4Bf_&dq
zH?Nvr#fIPvW6UTV2^#`EvH24*e^Qo)28hgAH$|vs_S*nZ)Sd<)DgOHy)vpl$Ihkyn
z!++}De+_zJ<^9)i|NiTb8vpY@(s)ksc_)`t4{y#dNAEuTMDadbUH8f}`h)#>7;oYy
z@u$V*=2){9novFdaQJpH9Vc^f+qZHss9@la+56eyEdLq6-2`y$3q}0B&_WSApCow^
z5*5Qj5vv##ide-iGtc}j0OjTdKK5ugn3p!$@JB}i`HMX;g4EMcvByI>lzilUJg#(-
z{)@KDQh(C_b~1iaj559BuZzw33o88U>^;EWetR|i?fJ|9`QYWlW1m7QsYr{>3G{;L
zaHb8bVJaghizz!8po{TwvZ9rXfcbMOMSMQaUac=yK&A?Z1APT-tU%!KvPW<bX{r%Z
zH80S&6WZE2G&dBP>IGgqF!OsfTudggSD~*=mj>);Ry?9G@%&sCAit3u7U{R?(vC6t
zW-sxGw%XOf%RdSa&4dWq5TOQ$uniGzfQZ--kp_rD8=|lQqR56QsvrU<IGIw_UN?lc
z5o|DkHnukyJ{zwZ44zGz8VsFH1R4ySZQLrurrp4s@a&>=4iA}z*4i(vqNkctPG@P$
zBGr(!Ogchd)DsrT1c&8ySYankaHMEJi3u)jd!Ygsn&6_g7q#@}5oyh16FyOY?L)j`
zndgAnXVml!!SP#X77`4jVi7f_ut9UHgzYmnYwqPEDsZQd6w6zQay52nmBdrlLZN||
zv=kb!cSnc5)BzXtS0O*0#c!+lAJfvEkne)NN`Cp|!YD(X{N8H7vgJYxYMJ&R#3&;!
z=<3PKhHNL)HNsX;Ts9y(A+8ZNgM%dr<Z5`x_pqooKQh2e5~}cg`%Qz)==?%BZ1JD=
z?P{6a#MNLnd&yB}5C(%N><0+-dILh0?Cqdb7FsY{-02QM7!L+G45B_<Ut(@=x8Lgp
zK`$CWK06XwFdIGYDweiaZOwu?H2d=9p0~X!hq#RB$JaQ75Y1Q`LLRiN8ACm4WT6`M
z`)Cjhx&bCp)bCNMsk2`yw4gQ*V$dO^7li?eC^iy>40X`yc5xUD`e7fTZWs{@YGOfM
z!opFP+OwbzEdo+u$n7eZvYqhqMtD@%2(LJKMm6uJkQ72Ia_JkB%V^TdR%9IjlNTUs
zac1hzjuXHH&M(_>#%Qrv!}?J)bH-$0tjKCOV=@eOoCVI9ymdyO*;UthAqMAM#L6pf
zI95xr5}vQP)}dMws=@htcX{tF@7?9SyZoKH%gy~|&qI;im2K(};|{pMq!SjLzO6i9
zjw<N%RnN4ymCMUe9X3D})}YdKa66|~^0}eY3dz@5kGebYQaLWP6StHPnmewr7FSp2
zc-nUH)%|)IT(`=1_>sK0y;I+{u-R<#5*>E0Egr2$tFLO?5I&p&yM?3c*c5l|%1vE^
z|6Aa$$<uV;uEi@ZcDcqwcHpkXS$1s7#9fHgF`eMHz3h#4aEMM>ez-Odv`yD1e8M~i
zQv?=i1J1@v)*wXeW<?LvRl=T<;w9X-c^S_yj%&U5WS*?veq!FcEg7DIvXE`k^D-cB
z16lA`4jOrKtP8x#F%`s1Nr#z&ih)pZO4Yx5IDlK!e9)8@4(=DL$TK0H@!Q@3x<v5q
zmy>3~IE<4?J|21YK#Fsm0(;}>z2cz2X(Ggbw~GocA4=-mD?jeuWs%gZ>ZwRAnERqi
zu}ZJzxISC@Q1NDo{b-N!byX$6v$7;onIYFk?n4F3C^klErac^)SCC%dUG?m}2_K)c
zsd}2_XcT-@i_MJW^E^M2)&~ca=9@a#6lcEy^UB^RZ(Q*+Q;qMLDjO9!8$3RDhl^GE
zHl4FjY;hUA3KLK0c);A3KnAMw8CIaW+|up)-u!ZweB9i<8=tMQ&)4B;aX+OY@|?sn
zc5%?CqE9EjIu;;H$}-Prvr7GFSKGd$Q0a8gT#xdgiqf3)lUDF0`88*wM@1mbmJAza
zPZJn)FI(+a(Bv@Lwbm-4iUG+Gwmq?=7J5-L#g&1A-5Fa)D#&HH`?VmKl(VZi*G76H
z?AOJ1e4T8^*UEN$t!&5F%64*{Y$w;sc5<z3C)djM@H*KZUMt(fYh`=r#CB;2u0^-!
z$ge@SS1I46Zm*KQQ{BET+wpa>9bYTk@wKuYUn|?mb+VmYE8EGnvYlKj+r#T*dw8vE
z53iN&9DChWsBQ26Wch|2t^wKa|DY}k%J+Z5fJFQIKl}W@`~1I+^8Z$MdxNn{c6F9q
zo$VK)+QjF_YQ8I83G0qo1-_bhS|rrk`gls^`4-esp0R4>kGY7iJ_LmH_+FSGkrg2#
zw!UO04VgS+eI8Gziw{f`IxgH|GIL*LT2Aw`=dVh=lW%=9^jU%#&1O<YKZg}yU$HTF
zNg+8!N*?5<q7!CVu!2P|CB!I8g$@xvOBL2L<^mS2=b(sW*?=qk^KR<}z1nO~Bz-03
z9E`|mMs|;R=Aiod*gY;M?AE^l#uHZaSAb+DXgUoHfa>)6GkDXa21H75@E<2>_?7fU
zS5X*pNOcpKv(Yenb^GaE`vEQMRw`$AwtMc~AX;3ffhOa6vq_<(RObi`Q-qYKNEwL1
zNIitCbAmLUMZ_?R_Nm!5so6PbV8a<IT}O7nyzAHwnD?C60YkUp36*|FS-1Cj$=&jj
zi{<DnUVas;`qR9DVPl}%s~DNs?plAj39q8Y+(cN>W^RJ4C^a{6R&?9WlI<JD3A1!e
z@PA4MewqKhKL5TAt2!?VIr%wyon7V0o}^vH2l`h?8I-iF*lZ^mT**$lgQHgKCw@Ah
zMqD5-*^v`)IkEZH?+WCjM5-cU9#W#00JjRN4D4Sx_>o^Yu!t~StV{iP<O*MY2f<O@
z8J-p|C_HlQ3eOE3x5T5{r97v>5BIc#lWdT(Vsl)88YCtxt3=y!w4xJ*95vL5jWnVg
zcrGMa64>CzJ!O!;7U2ArfwoG0w%4i}VjI?5YN8uF>#lcd!7C&La$z3bWU-=DPIw-B
zMr$KMF~xi;FSHx7b_VABo0f#0>gsr>IX{Jb?^A95wD~_5s}Ysigvxw<06@+A?;(oH
z@BfCV8}85l_vimV<N5z$l^i#U3V0e*GC%$?IWlAqttJXj;W~k^HL|uhLp~8ZM0pv`
z%4XGk-PKg`=C>f_R@5v%Rw9LW3(b|VPg^QhGQNdKs&5<?Zg-Vm_`Q4gq4a8B#OcbT
zzu><|`CT}Ec}kSCY!cKDW;fs9zx=we`09(aorU%ZWGUaQVxE_rx<F8^)d9^}^T?tH
zt7giugBh0aY~^m<ZSat;5~`_$b(L_-y3wE`5825>TJn&cJftNL*~!CN^01vetR)ZI
z$-`RmFegvDIAB*4%d=19Hfa;nJw64tcwsHS!q!<WKEQiYe#<+Qx7kLEDKF~G;*adb
z7a?^8@s=+qjg#f(d`Kmf$+!HWq~I;jI%WQ>nPlU!oEmt{>mj9p-H<0o+UDsLK7$<Z
z>@^tpgI6c{Z<L|qw>&F~gBPpngNIUbr^Of7>M~8@Y2QuF()sdQ_dQ=l;vE)n0$0=7
z<;>5RB}D+PW}e5(#OGaab<S3ccntmUS#MhAtAnKR!p4-FGi`hCTJ2(oc%fLg)V?L4
zIC{p#iL@uS2Lkqsg&)=bAFe-*?2CXJ|DXE(q~!lc7zKO(|3~!yyHNTxIUlFq?~7>)
zItcuEit-cn?<boN@hb72Y{r1Vdl^rc=iZCOhPw4OxF?qviTC(JJYQd=Hn<b&iBU)F
zu6K_K{J)O`<d3-0px!PW5%1CC-yc02f-)XHdp>;f{MqjhpTGX~$s+^6$#aV1(I2|6
zzrB8T;#rkIsn)|szdd;U<Q4tzv(rbz$Il)<`eXR&A1@w3a)1)(YYK4skCP|%7ZX&K
z)Zq05CHF;%V4ph-qQWUHd6>MX1Lxg>ufs9>AyQ+21%@p;>hHj#k0=lc=(19HRt;B5
zNnToYdC0N1k!b?nLe`AWpJfy63~lxo`SL|UQ+a$Tog-ht*0RH5XISbylwh+>>I3iX
z@^VNU9)eTlvCKgEV^$32;MiAq*@3Xh5!D{lp(zHLc3#09`BSzBdH@|>OninwJb+bf
z5O32U-nv1`0^Z<#&%MEgn!=&BK_)eb`Sk2ika_*yZT2&NRx~@ZrM+RWxCo1@u>448
zv`S)lJt_B@y~%1Z6XI7MeQ>D_;&3d7+*gOb?OCPC0U!3&k-aKDkXd!$s`%6)n<cZ+
z|6}j%-`lp4Md9=7&0oQjecm2QrX2yKq{Mb~HcirQ-kT<Q>U3}Swte)_5@plY*NRdS
zw<rJm#o&VkNdN@(VJD8V+e9P>17I*S7yyHr`OQCO$@2Y-DS$%fD6_I<gOPEI<7!<I
zPAkG|SM&&m`KcQooHv^1<eB&Xh}G@pG}=?qLBch>1!CQj$(XwJtljTt&<n=nd_9$;
zw~_l*tyissgf6UH4hdChaoi(xY4xekjIW<O=epsgV5L7WPAMD{!t@b{nR`~}<eV=^
zBS-E-K6cF2{9TwN(eU@_!x9$_qg2PpY+4P6t>DBMg6{Zivo2&)DWm0Je0&U#$H#yw
z&DoboA}md_QY+j<mmIEGk<7^ZHRT<x|1RKmYDjZ1R%d~$I{)!e@&2dZ>EiwR?>?^o
z+WYgLTK7Lm6IP^qKMHw6FQ}fCo<Z~Sm|N6jrFqsN!DxdoZ`guEg!Y5}znTuCPq5+p
z9qIgRF^s2VK$8aFR%LS{PyH=@|E<NN?0v46k`9`v$VrQ+AGzC6g=6kziC-ZVIM+6j
zo9niI1GQrexh68QLZCG%L}QWCxIk&V0j048rF~*7<f#pw?x^q<tv3c(JqGO9Y!QEg
zd_!IJZg@Hp3{o)|*f<#NO7pF)-ObJq7~9PDn=vI!^QyFmU4tMO;VMaDNJ>k|0`QC3
zM+mX*W3hxE%*OB{czluuQIzNwg)9Ij{lU&ED27MW9X9(yPc0~_>`M_@vK)d*lkkAi
z^fy2LI6}cF_nP9vN8J%`CXciDBkxc;DgwQd9Uaksn8{t*P%o`Qf4~Zy0-f||hidYR
zT&12}0DUFLK<!0ipNix~pN}GS(}LfR=p~{<C#E?kA2}|iD3L1qFIec1fBB?<g%3T&
z0>1^8D}e<DfRu%QG&p!~36|*7eB>VTrX6zZ=~}?d?E;PMc};4V=@US|kfZ<UoXTFv
z;s12LV>Xzu4JOe`3w$YP9_xM?aI5xzTqgl^Mf?{(Xm^VFpF6=m{>wi9^S#dh%!9hh
zzFq?|WnT7no+BE)ps}W6`q%VBxQJoHm>yb0|6WDYLDU~d(~IS0E~k$OKZWvfl(|zu
zu_P#O?joTMM=e}V>~r(52J|#7$O7)3Y*?NWhIx$@zL4heKCPWW+EG`RV$R;zsG~al
zObK~D*@<fOs!*^wpbC#}Yc~&Q4jxfDa_PCkl?5)MC_dA1r=kCqt%h<^hO1iZD4pmK
zLbO=;eew$_B?l`dhZUrS?1W~6^ysMw;0;9x??tG<`Y?=Ea+AAuB@O&K@@8FWv9MIL
zMnVB~<vMK0PiLfc*rP|BV^&M&^wfFy+e7##roaDS4_EAg{&pTZ&RY>y9*!N39r`}?
zRU@bY+IhbqYgE#PNi<||5?D#Rc`4eGXgVC9==|gis)gWKQ@KFNl7n3hCF5Hvne-P{
zsvEyT-FU31ozn3wln#5MnTv3>3i3BpLH=F^-L?wKT>+KY=dM&&{lZeSx2*S6hs{HN
zCT%`wHtHbndVMVfWz}cS_4E;0bJbNy=`UZYngT1;<S_(mTSYjhpez;^fwf99cI4|&
zLRkXNZ$bx2<@|=kSzo2@s$W>@^Ia*v6mZM{#|)6V;*4d$`a04ZYqX?6H$oZd_V(vm
zSGMCUPmgNzT%j(dt?fX+>uxTHI_sA7@!EB^aL|Om_z|CKhrgn`UZ6i5Fc*F7>AQ?8
ziRI~gjV*fZ8GG$l_gdd?-=g1!*8A0654^|gmd5Rl$N$szUTqQ-yRRoeWqD2kud_Z1
zMg<SywJ~%8PyGMBo-wU^{xb<LqW(Nw4lXym|BqTIz(MK$f6(2Z|LpJo@BP23uC%bb
zmrN3sE<QoJyuzOEwY{!(Jyj|x2e0CCznFdOPvA|qen~!t^ZuAFfLw!VS}>uD_;RMI
z9U5dtgDufuGa6)x#y6u8fqe-hoskhG!ZwdTMPr8!+_59(oYEk)drm^(`xKrHH1!86
zz0HOs`a}Z`$XYr|*zJ2|c>NNp!eHV0#ZY6d+VFbCeCik0u3s?7qTOjmMu<RpWOCUI
zN|j_>pdKxSC`pX7@d#>|P<Z*Ri2&82vqmg8?1aB5f`D_hE=w&}YxJs0Zb3}vvs6wY
z39Xar0eT4r(jx!oQ%A*d;(NBv%^#Ee6eklZ#KUMw=Cf!BlSvHjdX98};v72<<*O*^
z35e7H3ilyl#Y{~pASxV|d{+9-n(%q0wq!9`B`K2WCXbJqf-1;cq-Bv-R_)t>sw#Bq
zOGtV`mNMPvXRhL~)j(n70%a>L2Q<wJL0IPHbQv#*O$r*&W}1vBysR++TlsC(H0cv;
zO7&7kWi?G2i=i~`X02j03U2*krfJexFh5PIM#gTMELW1BCQaY%r^y;4SaX`RM1X41
zSvyT`gut)CS?f%ber>mY_)M#NHH5p%2Y&PFq_~H?V8s`rf`xJedBC6sqLM*=_c(<c
zGPt1#7COwsNs#9Z@>~Q2JugBEx9;fEoJ}cNY*IZ?j(Jt!FcU5g(LwC=2!Qk5ch2At
zA6%tR>>Hs)_K9@4+%`vUmpD!HSaFXP^qlj?iME(g%-r)72gf)pq552SF4{`@K9^aL
zHkVZ62^N9&f+-+csOq%!v>!i}C;qFak{gP%U{4G7G+<8y_B3QqL-usep3ar7B|1V>
zOP{jqUe@uGGHG+>R26CS1DVz_ptTNx$cibu%osZu0QIfvHAh8@Y>PuxK}y3laUD?=
zgZun48eBWeOF}#$LQXgxri1Q;7mFx5s%kv1%d4(cro(6Bff$?r@e3P!Z256E%<Qo2
zsmlJsIzuW$VP$tI!BIVMTo2r2UnvXZ?W&BfcGgseGH*qx-rHMG7WA)aHB}TuR#ekk
zNozIMU$<at6|o`vK1_(3n@#EF@gN)zR^xC<;D`!L28;Nde2k}j4Bs!pc$pksg5nqs
zVO#eyoX^Q8*t|ZEh=Gt@MC=O??q<iYBdT7*CiO5%-G1-HR>M}d8veR%H9j)4(I|<)
z%VyE!q(2};K9omIRTM@qEjq}GUXh~G$ez@)qonqv24q%GYDH#H*&c@^euZKR7~HV8
zIn&BK38~8Z{H*9uCVk(GL0Hk{oFQ1t5?q@(G^$p^Cc0A-1H)9WHYCQ88A?F|GZ$6c
zzPDrP-Z5uY*{xr{WlC&Q(!hU4i+B`AiSy5AuV1}L=UI0;6Tchm95<d07SRL@eDe82
zI9^4`k#wrch{lWb$@h3D1qxsRfN8{E5uiAGEuskI-}3w9g}3L-<PzTV)QJwx9jNar
z*Zqq{l+0(-VIq#We})Qx>dyfu%!(#+`m}dcv=cb$x6|>BlZ2B<e4(f5+~woQaE>f>
z-;+A+NnQ2w9TbU$UQOd=Oo;g>=`#<fTpo?-YH}VelB3$kGo{^oVz@x02$A+UKb4l&
zD{0&U<LZQtkLU1z80E5m5&i$WXD?p7dh=tSGNIawmQ_};o+ZLI*cj2@HpRh)rfuQ{
zucO5@8q;73HpQ{JI1HstaegF6i?ZPZe+o`A8w5e4#4!bA{5?0jwd<q3&%zFT77R;J
zHj?WsGuII$OF1ewD#vJcXpk9=Z;1w*(O^q7z8MX&L=%|NNG>XG()ki*g+aJj#IVd=
zE$c2(6-wNV57;Ol(6K)x>yJ}u8UZbT@RTzG1YCml(9c4fl~=Itl@=w76mz9R$u*p6
z$(uFFy!NG4iuEr=&9=GH*6NnSX~99hpwuSa3R#*d#9BEwUzk_TIZZ28(6l^#cPDG<
zbr#XptLHVBL6Rj`T=QgHwSyJ$mR3lcoXqg(L*L5c+tA&$Phg5F_8VMzT9M$}TXn+n
zjjp)pinC(D*$2ozh19M+k#g<XR<1qUy0s@Ru08#`UwfL?Y8$Ua`O-684P-IX(`?uK
z)!}wmhrxbzxL+OaSBLx6;X13sr#o04KHaYm_v^#``f$HK+^-M!>%-exAD%t?>7|%D
z^{YQv7u7s%43sNA{5736?sUa>=Vy;QSx*WB7})AtSxDmjLUO;5+}c92AVK>T<Snfr
zx9B#@5VrP9&;8Q#D|YkUFGTkX(fvYnzYyKwLbSNtoX+~xWa^U{S{XUW&qofjjvM6P
zk#UT-V;rOH7{}`d{0a5}e`*E%StD{!@O4J+*)Z@9+B9y?RwDO&#esJ=i?*{5v9k}c
z^Hqn~3HBj&YK7QYBUVoEb;iosFw_m&G}H~=G}Mj%ErhzM-YcoKQ?jklHI;#DN-Hh9
ztp{_!n*6#@x6@!5s-|MY;a1nhnw4<AziY&#(eE<o_82OI(O5;IvDtd~8VPKogE@>$
z<*vY*I}vN|Lu@HzOz@3|jIoXZ6MWe*VAc%fg4T`5vT+QUeI%FLj^wg!Q)JyEo3#&Y
zOa{YU#$>5+tYb8YDnhY*^#+frZ)+Jk+l{q?YQ<V<85p6$Mz%bgGciYnL3d9KIJ^FO
zUz>53I~JlQz%*K>RscH`g6tdQGn*G7^#;t5H}jaxk>q(giZ50RI#VXj3Z~HW8w&bg
z0^NU+8F4zeoP>+(gp4G^2x&5s<42D!;^pP)95Q`8N@lAGd3r=}ejvpTvr^UGf#ZHW
zg{=&PAB(>_lnn5}d|K6p(!hF88L(#m9yajRo8Po_(FU@Dq&xC<NU=RA4aR{VH_3li
z@ZhyWsa4_gmUGCWNOH=BQCdFJTa%Pp!;%lFeIaB+3H?z=-8TB`6bd-EJW%_?piVBr
zoysI6y~`PeYO|0-UCGL~Xc_{ou1<`GA3`R}PqHH!;6fY3PcjmHrIBdq-NUw_+PQ70
zc5WN0eZZ-deWZ^SZZTA)(5<L8+lp$dV<^}2TA_^a%{Z;-d%L#JN<os=h-m!n#WA*_
znQiFe4MPV@qCqE&MMRH#QO9#S0sGI#+L_y8qJH8zPpqeD(~MG>N1j+u(@(5t=}yq>
z1lDu3k5zN@1{1NFRQmlLyv8(K(N*mv{A7(e_{kbmurPpBbFhyq=ip9YF$FUq;|xs5
zR51ZRv7CROSWmyq#Mo^94RZ5ukeh#l14g0j8$4Ohz5$zkgZ=FL^~}Eala$t8qPv+R
zeGF=f{kPd1+0_MVS1x6-^&Hu?o+G>VE7?xda+co79BC%icIU{7u4?DV?izDscb#P_
z-e`_w^Gww;wNt%H?NrT?UCTMLYrjOro6nJcZjSVGbEMDKsGaN^JXz0?KAR)`{T#WU
zBfsf664sB%Xa8+BNBXv#xGlQ6v7RG+>p9Z5pCkPy^6zerG?Qw}bELVeHQfWvbELn<
z97#E0gE`W-nIrw`InuA1BYn#`(zl)?+46R?IWq4~h!pOG$j`pPll2_Q9SYHYj@-|Y
z_kE7c9qSH0eL|?u&;Df>Pj<Zf{MqZ*{j*=bfB*9AeIL2BND@!x;}ncGBL{|E?LERW
zu<z<qYLuY^*D4-rxnSGXhSDhmcU;YhHv#?p;6J{hKT%95aLE4dbfd>lf<b36d^~!z
znx^4-9s#i)<@pFCt3QsD<<Ve1uc)I%{&a$X{!9K8{tbeli~j@pt+w9^5DM^rv~bYs
zp#N}Mw?TkavJ4lFvoW4x=f09Z;N5W<C(bAy)0j;2aIu6yI4^-CJSTL<@x|rxV+4OY
z&kqiWP9eG>Tt?)>`6680K!7VS2gdO<B7_Xb7ZG`XL|#9jlj!_rLCd<&8xC%v!1+F!
z&f=wWwv0##qIl2g^HIW{k1l4ji@#q?!gvf`Aw7<Vw()GXS_~qBa~L^4%oY<CL+bpB
zC`%EpY8fpi3A7&`ewzImkH;aDf44dx#{=heJRtlVIUkN3eAGH}UX4f(2c(T@k$111
zk743WXT-LN!5Q#;MI-=F@CeSFR-W^(*~%G&Q)hO*3}f=oG=eDy99xbbJ({qpk7kRD
zM+E01Qrn}W2M_)hPY2@_Y5qgf!RgWEL+RBiCHbT0zy6BQBj$S1gU_R3M26$lk1yZ!
z|MBds|LV<)m%sL3zdC#W-~nOXCFxbB>)8uQI+Yi@gZ#yAudBV<O<vRB$Ye$$L%SkG
zUPQyy0K9$r;inj4iA}=mNUpF&W3#Gldtxz0i)lDMJ`NX)@TPweEl*oLH;<D{fGz9N
zNKqPDm&S_Hnyb%@u!AsJp1uGUdIdc$7XPjFhqm>W7|<X3Tefx>@E@69Gh>^x_s`zH
zdftC_cJ}h;_paqAI8TY?O)&lI?JxbGp1pY4f7XBZ^V=VPe)iK(5HaZ|!hp~IHw&6%
zT?<myg0@@>R@Q=TYAJ~dDo1KdR+A$B6OFevkC3Fe(QH@~sg``Wu;NQ$t{Kd=SgBnr
zJ-5dUYAoEt$9a1?ZN@?BHhkq>dt)z_EYPx@DRbzvcZ8t7iYO&<(TJrcX<1z@Z{`uM
zP*&}tvhui(l{yp!5AAoL2=3lu!QBCf)JwH0Xx!stDo3wL15WvitX)Jfdd`b@l29l8
zTID>a{1$O+XiL~sZ!xa}@3aopTc6kL-$Bi)w*hZYa6;!%r&X;*NI_yEBzz(b$K#v+
z^>p@e+F#E4G*V=kM5mM=dM;Hf%f%|%ivF^$8QodU_|9tf@2uvaX3c+#2<DO1+zVzC
zv3UEF8Bv8~@tiQmqV|*6eCiz^(-rSlJFlx6?6y0q8q=^rcT%-aZ`9pM)dBUY-lXbH
z`7HJ5zI{Hsld5q(Jk{;<*_~7!<h}RT<1=bVF<Ypx3T&OImYB>IY?%z_R^D=+Ff(<M
zx{psvXC;y)+bvkKDx0{iw}~a|x_q8uacH5k_i^F$Mr&Tj9y72fn!7q$)4Jt~g34`T
z&4Fn)bf_?N=tLFvjn#!rLx=8Q=*&Z6J+iwTxa^l^0FkR5KJB34!l2>upkXm+*feN(
ztAl15jx7u0Z48<CS2J+fRSuhCxctI!`6v2q1>ZDWzV&cf>?Yiv=DfYZGVVh7?xeg8
zksGL>Fi=5xpju*}0@FY-_YdtTe3=v6?GDrD2ik!O-26CIY!%JJcyagJMej%})Y}=*
zzSEY6DtC@-P$;%F=to4lrTvIKlx_m`!>v@GmXLRzt?ECwt-n?k`Ti@5iUthm7&5zc
zNC$%x#u-xTq%Y3l1#K?;3!DOBJ55!%&<q~ul~)F32OP(M;%HUiFn)wlXs`q96rp9K
zs1F8#r-@DS<b%_)0pxlaEJZ3(2|upD55wC~=Nkcv{?_{AXcRA!rS2p|03{I-wjr4e
z4PYq6g^WNcE&~`&abY7+iVJHyd2Z9mbGuHS+jR2Wu9N3BojkYf<iMtr1G`QRY&tox
z>*T<ulLJ*JmCI)s_9H3<d<!t!6LeeK*mX2jfOcI0Zfs*ai7Lb$)9Bnxqft0cc2&dH
zh%_)0X$yTA?3$AXW+GX4L01KzT5?<Q(WgGO>Znl5d-Bo0#bHYebkG)hxPJBM?)p#U
z`)(;98n7KvT)PEGwBW8tKs{UTlmu9`;I2pjJLq>Q3Fmh$3Fmh%3Fnd|++_>!-L~Lv
zMdAGJMPVR`!rhF*;Lc@XaM!cK;Lb&1Ac?|Vw%`_Ifi39tCfd%Yg{lUiJETEsq}?4A
z(kAR)C79cQ(l*=~84%rfxLY#dv<-Jg255qRr&4i#_fm2G1xUrYEERX#27JeDxNEUE
z{}RMvAdAHvjm6*#kc+|H&lZC(Kr9BbSln$JYRZN3ym)vs4JTW^3Pmrx#}n|2!YA1?
zW)b_-XFts_ZFge|P<j**{o7X`$k^VU!gqLgs<rXmDf|W8ooekb0`2x9P)S%P@uz6G
z3sEhKCV27qHAJ%|<dTVIu$gFndDj|MrF}(d|K41RTpNS`M;57DxU*!JnhLj>2wgp=
zaBGRY>p4FQq_5Zo6lhTKoy3-L232|~d_fpp)SuJPJnLNq<*0UY7ZmF%!rQvwLxi|^
zh<RG1b#FVz*K5>^cTsQNs8V!xOD89F>#e<2Z+Tsh)F^nzhUQzbZEId4+q4zgg|bSm
zQ@A`?&_9KXnrXWLQ%()R3rU}h`S(p20Z`sL_pwU1bvjV1(rq10)vDANTkv-xWULK$
za7%NO7G%Puar;J<=Dwz&_8x|q>I`MImjXZ(d3?D_UvsTnw~d2%O;>Y+)4JJ#yyEJT
z;TBDCVWGXXVF~ElIH8jzKz{()oA?67;%++<mLIJ0sT(i(SD0?hAgzf7b1XQIh4sr#
z!x1L^3CnMm?k+TVw*=GcLWJd)VWt2y`KVg1E(A>}{+6qb{!j@gx&CWlRogxq(DI^F
zhcg@XOl91cC}C^I{~EFMm=@u`D&l{&f>x(pi2vnxy1_pF*L{rt)pq`O<oqj~UO1;W
z<nQZfAL}cP^|eaiss`8o>@At>1;I~YrhO4E!`Cz%Z%+~Zi-m1WWmCP4=E+EGx6~Ym
zuG~-O%LUPXqdwb@W64qzYLW2YzqS4ldv4+lD!sv_H~t@K+FN6Dr?WmQ0)^9u%wA%B
zk6eydy<eA>vq}oBg4RcJRzbs#M%x-%gy;W4jk!{0%)CM8Wuub5tG=L~V;)>$o}k1$
zxWqh)Tebz>;k+?q`@!sYP0kU;tkOLyfc2TA6u`I+FpUHLq_IKb=qDCz^Tdj6&=UpU
zK-k~d`TrbDHxr%jzAJ>vy&~TN&9`eqxiR_Xvkl6^y;U96GF~<2JQIxlh%Dx>9uZ!;
zhF{}Hej|VB)rrPrmN8e|@|;rb>!xW{+hK}mzOHgoI8DRNXbs*{ku-A1Hy9vyAOBZv
z|1XE}2OHzR(*8%SmXC_|Kib>>_cZ?RAzcLQg8<iA7(9?|{EKNc?ECmUUS<ipU&J54
z3_ibs7NjvQN?te&;j8Hq;K5KLpL`6)JUxzp?WGU>^P4_F;-b7Q0bn^tgf?OFoW;QE
z6riJ!?woueC9a%D65<mDa5A2Klz`F61JxudP<knR7Sw~#;A}}&E8$`Yb#Y=~hqQh&
zTSUc<#MFdj_O$3oobSk5#B*atwK{aGrre1Vu0XW-<bm4u(2y;kPv`A?8BgLrqdTA;
z?A3p*#QU6+O)_PcrIYq$C6f+qN;-i}MYUBevaM|Cw%RG-IN3Tufo!DGPOvSiaby}+
zqxIOb8U?msH43*|Rx8nLwN;|63{+dyBHPNAZmaDRE~DFg_Ay!{LO>mv)8O=HHf2O(
zKgp_D{607k{Skt|8s;L%2SY&`1+Ts7YSI@d2}!a?mI*3-&QNE_!6sey2}oK+3f^!I
z9bz({6iP(Xp$`5a2Tzva8TM1MYJQL<)X6Gs6$81^O4zp)j&_3J;C0hSxi^meG|=Po
z&ccyXx7!zYvr(6+lDce?mOYX<tq9Sd%?S{1!V&w|C1oU(MXpN=dK`x`j-QJ8xX7xR
zuaz-h$5$~Q6%E0BtMPap{sYW9{$rw-<3C<Yfdx+rWn>O4mpG71tjUA4n%(eVs^fqG
z%FIM-HZqe0aXZYE%%ed(iU*vLhz<9QF5cq7YE0H)WZ85cPwDmzS+9^a2`p&li|9i<
zTP5QgqJ?9!V!L=y<SE>G(nW2|6h@xrKK38CTDbH0@pvqqTdzY{6G>|jQ%y!ME6Ps1
zwR>l<hha>EC0ldaG+0HmPn!ZOKV@rkUlr$eeXgtAWTwq>l~+4uu&!HRsQn(_e*dp(
z|8Hiw_h)hb-|<_;^MBX?-0%PG^MBp{t-oyNFCn!>A?dO7iqt>7=v8hH+PeslxAWDq
z-x}t91L%E)mdZ0Aa|i&>N(W(|pY{)rk6H6rvz-~er7%mt?h{4xfT??(`;ee{sJV=h
zg1fVXFa@c$?3h=(zev?zr1XSnYcFxNb1P_ZJP1^YPqKsTjp)+)eTsgc;@_wK@6%vi
z3T37l@9l6#xIglcCwG^SaCyVkxyx?nlpD{0SQo@^y5NuspA>sTzp;m^4hWoCH>ASr
z4mMT|HdYU|IJ>VNZyQgG)EC0JqHH0OFn`__dpHjK|Mw~R8#xZ9Kd|Ej^oQ>_9r`11
zoX58+UXUKpR=^OKuu4E&QxuUTi%yDzzU$VKPvjhk;)bzsy1!4I$K<~!L}o#5(XEo4
zW6Tj<s;ZLVGbgX-BI32&(ubqj>>&^Gn@hCJ17_!wAaBmuq&gUB_Sf&z^&f9_5a@>G
zEzK+p|4-rkl-HE*GMDNTHTEarocm`T4as+x`=MD57Zl(1T$SI|E-W#$J}s#gW8ut_
z!REbK46vvMUQ`1wu7MZVzzgcv9y5iu#?4009y^D(#!tD}9z6r!`y<`;`hQGSO+Wb<
z&Z{>9D%Sr#^1H?S-?+VB|L@oTd)FV$odBKdKggrf>t9Z;DEs^ygJ_gcq<UA_$I?Yb
zQZv^IuF@6|c1X@~nxn}%J=>K=%F@WPG**^wHFoWG-!1u1o80NMM!MJJId14qg%tb%
z!x1L^F%9N^n=IjW2@Wjbz<J(CS!T~9x6C9OJ6oE{tDMLgjLvJkz;b)b>^|FnEN##>
z{J&eM)9w`gzj15t|Gl^W_V%CJ{>!}-Gw_MZ4j?3)&sNhRQOw{}KqfJDa5cZ@Ou{9S
z3;@-4T=CsczY_q!SG+_DI27-E8hUMs(#He(jPG!w<-s9nr^Gx<Da0O}14a+_RC~&m
zt+Mh=svNW(O{Z(>|B21I-}3%XyVdcF=l?;cv)BK7{lC}$we){U{j1OUdrmS4$Ke8<
zlsy2l^&Er2Csl_JEjVysz`n~7^Wz3-qR^C;D->7)js<5};1GggcGL4b4#eCNeC2g*
z(V9g#y>|6MC^;01&dLHRYQbonpsoQA&1Q>ycD7L(5Va#h&-oAyklRrmjAry9Fqpzr
zody9Z39#=1*7W43Q#l$aKN_TT04Q({1;IfZ3z6KM;KX6gqEFOklzxPWYtCcwkv^gH
zBc_k-6ZVll;q;?VKX&*>`sAk{0}AW&k3mC(2B06?^bzxq^r@{JNJ<sK66&a;6h!bt
zM-P(UsOU6w_4)LBphM)-or=f$&#)DN+WPPG_J2qJJ%H#IzW#d~T8FjG4`}mSy7B>S
zKGJ{2+Ta@Wv#$-IKtBg2(gn2bZH>68T|qKiw<K^_76D=LX7;Sx@-j~6FEZ}tFEWm%
zFAyu@OD|B$x%>srxRq8~s0J6S!NqEDu^L>g2Is3W=>ZJVPc2_+!wc!hVw8+!^E!ke
z2wZCQ7qBQs6KYK5Qq3+D#4MK-#3rsw0T4Az4GXFp7F0JZux(gi-7uf>icUB=fL=T~
z05(#I@|I3SIRMfMDF?ur^MR_vRow`xx)D@$BdF>|P}Pk<>V`_&_$qCqU0JiQDt#kX
z(6C%3D@CCQZgU|lFLj?nBTnQ`So4WI92}geL;RSOhObvR!q_qvx6wIiP$i?<@vGGi
zZu-scPE=1NAa0m?5mfggu<OM}da4po>ct&#w^BukT^`aV!%Z#9Txw7)_bX56MvK`*
zxZm!zhPBpE=nJ)eFlvUqCfI9&yU+wFqy7NFXV1t8Fg}G}$te|L&Zq6Wq)I8)BN4d4
zY_=FCSs&Rmi`W;JUemCBSEbi9cHebQE3{qqZ6mc{CMP9m2L?=UfOe`yreh~E9b1v<
z*osW&OBb2W1~VemH*%RZr`Pu1aVLlLvNIjy(-yzSVx4&eM(~ue2f=!LOb=qVo!*32
z@`BZpZjsintFULQN<YO@zbsdaDgW+j)+UhNrgULIkhkFdbGJrB&bei4wt{ek7uU}n
zLOuxYKO+1)mtSJ~G6h!c^R6JGI)JLTf$8VR9b~+GJ~4RPz^=W<$p+~^Vf`n6D0b1y
z-*R@SrhLjgd!L@Y{|{&1<)!rjaFzd0w-o;ocl^Eo_q{y--G_d>&GX-7IK5Q*`{a)t
z*>RWA)t423$&msx#9IHHPR=$cdDYMpI>-$PXh!rXjXX9C86uPTVnQBv(w7wX#@#_j
zt&#J{ap}JY{D$PGxqF!QFzce;Fj!t{Edxe&ZP;esk2<sTy82%vPob#>D<*;p{g1o2
z82=+^qrLyn-v4K>|Fu_r)S6Jf(ouFPDMRtBQnF`><lJ%8%DZ<Q6&*Y>0CqYp0dOY|
zfRNlgBmnN{kpeJR4+(%fd!zt?Xj8%ABL!eCA4e@IK^hzY`M-7kQ0@>bh{pY9Xfbh)
z3vbe?;@=B?y(z;V*yCs9=AJdQtW-(BD#4_ZRk(=U;^H%#Y?wC*v}NQHf;7~5%~HP-
z2Kd+vh*LlqG!vwsL3YfLK?(`OXo75K{lQ1#hK&l8d!mr}Rf~;#qnJVHNiodQP5O@k
zk~*g_NZ$-e{ZtraV1}eFD-5#DAWa5=Jk?g*(5b&dUw8hWE#{ZR2(?-Rn&*GN)9wVt
z`ycI~y`TU0^Z$PSugD1oxe=$~B+_RCTSUoaI9J;X?JOt{?ENCI>mljpX4N{%&n|uH
z?uB3h;=3I)0MW@FM(S#gk6)x|EolPH(nZzmCNeq_JqAE!jYV`gZ5wZ^8X2}i=8Jv^
zY=N$6@{|9iTq<Sf_bu<*ldj$tuesDI?7Q&+ZmwON9<d(yP5mVs8(H9#CbKAyRYmFW
zQ%L0d)2}0kF;uhN>^2b_y6h9NO!)^7u@kyU`C3!yQd3r*-fco;rk`kB=~G$EgmEJK
zK+(i}9@4luluBcFk~uz!vZCom=AA|#$x~9Ig-xL{l|;7k;?|2)`dt_=9P$MJW+?{w
z3%X4+m`#_9aIox$<Jt7$ufO;=$a>pqeEw|IH#lvHR;Wdf8%LrdS6uimfPlT2ZwwPy
z7zr`f^oWil@MYgKq>nLIE!?Om-Kqq}AS7@>jruM$isA3;NJvG@fOR7Uq~n5znu(yS
z<;zNbD6334l0dp3O9l(bdi)!l&?Df-KkTHYJ#=|dCUV88u$`~^Xc&DUri9Dq<rmQ?
zT#c76*vm$-Wp}RB3DJcT_VVYel{&dSR4C<&Ymc=vmL_9LH<g8A%2q0rq7?@gFwrWh
zFFyO9oQazs-An#2ZO9(U7GTJO+5@j$SpwVul0prvTImp<_MGGPSP}K9GHY+k<NwRX
zB5ROsx9QygQc7Hau;?}`QlsC+I#|V;j$__)yn&v?7V_d($_u9tkT@S*gZ2L)_JV^e
zuYFL%9C-@t-cw!8o`M8Otk0Zam?Ar8E>wR7__M+NIXlqkITQC>xX1Hhc>!x3npU2I
z%tj+3A}q`w`^mhwT3H1@M<aa&w5epG7`YQZd>jp#crp*A#BDV6988P1#BHN`x`LL^
zTX4e2U{kQ*`4=n+y!=BFfuEM2sD-xt1a@DS&oK0nSTUZhjVH=5`|jv`JR4ktUBZ0L
z{RXx8`0JK<jT%N9Fdws1X%BX_4)I1D1*_0enu97`>l2coTQulQx&^1|h~1ewsQ{)r
z1*KzZftTH@vciiq!j$aGoN_~4xv8v7jTSW6n#)gWGt(Eczh3fRw3(5~eDdCh*O3!W
z$*`ZtOV}G?Dg*Xbq_^?-hW2wPrh-cKre^8DAQsZzdzqy>=|}~sQCA*k)N{MdZliGo
z2e=JBQc5sTXUY$LyP+SFw)sq%H<cbMlNJb%w1D^cxA+g~EV-@{tqcsN3fl~Gc~`$X
zWQ7Hnu;4YY_-+6Ope%fAU<oJ|IZ|1$29~yqT4bT=AsJD-I&PYbiF<AaVAGnFk1v#(
zOh^;-C|;yS2D2NP$0$_Dd0GMynhIwt7*yGwh{)=YSAo@4$S_$Qwu>lio)rX5WUep)
zFE8W8FeM3AwO0b+&Gob9_;$^a*~>UWv(;jGDYd7q7$OOVHz>#&6y#T#TASEr1zBx-
z>(f^i;?be%C8j8MfD4iA4C_mE8w33ehZ!x}7VCp{<@!L`Q#v6Y;&O`%wX|(l?aEpL
zQOjXlO&J@mY+GuXQ9P2EQlmFU1)gkYJlQq?o$PcB*Q9QHdwUveofCfSv#DA!!v@M-
z4Vyg;?U)7W`c7;Jn8%pFN1sR~%XEiAVdTFABOzQu3Xy0qhHq|e`JS%-I4TJ4o>SvJ
z=L`T-Jakd8TF$b4ko@-w^abp$VWEj~E7GI)vt>9wlLEZxi!uAE^+$1og>SDw^1^KJ
z;5vu!?hri3TT41;pgwF*r&O1Tg118y-PVSIEV!bei7?gWP*x7h<vu&nIEiYJjxj9$
z7D>m-wHsOHb6b+(l-n|b`8yOqzmi<KVth%*6f3=Z!t`ddmj>Uh7DC?i3bM2$RcT8~
zUm~SU%wVV77Z}LdDW-naF8%GcPW4UTYtI9}dG&{M7=RD1WNyw)p0aq`4GQD&R;@u{
zo6YD;8WoI-!@{o9k$+AdL^oW<W>W^+ZC9t`GCn}4d0kKz2iM+MYcFn3j^KZF<p|83
zvKDXebi~@G5ksvFC8=GKBs&vT%_O+MBWbc>N9=_Uqfc=C+BtB1r|Gy?<VPERT$3MN
z@?$KozDzV>3?!lbBY)QCjSqr6uFnsOzcv*5Y?)ziLP-Ymm%&2aZfjGiFLQ?-r`ayv
z)jTwY4BbfPos^s6Z7|URVr*o~L_$N=(Lapzb+ivET11^4T0|WK-{c%_bfvjrl*{N<
zJ9QcOG!CO-?%+{<v{N`bW*^%ld6}4}aJrF~9Xqyr4*k{EpCDmXHMm|im{)^O*7G2+
zI0%Ep7KXLCW<Q(HvCG9O5;7<{=!h*-pNj<Rfx`wUaSyR~i1MHKLDAVVTr6F&jZ^+Q
zo4eU$%#L85X3b;ce<q(iQG}k1z!S|VYO~#(xV!o7zxE+Gt8@>q4Tg(}rY#StX^K2E
zMFTxW+k9hoI}|N+0#46BI-S8doDiOOh7sZMXaRoKG`d9iVVtOXDouZ-t{S=LkSt?}
zzJ!XC%h_r?bk3u}Y!W$yb}bzZaEl;Cs!|Af&LP?{!*wkfj!m((Iqr(|x-Pr5M`|x3
z$4V{cMkDnb2v~H%+DKq<QUuwG&<01wFi!S=nbl=?rM1R#O<yJ(+84ZxIM=pe^MH{R
z70H?cFp^<LFA;rL`d&d54N($Cjl{J_5rd2Xj^b(~+vxk+i=Z!jv99zLh)A!h%e
zJQU|7|4bUgfd0R$15C%VMaMF*7^qw=UJh3}aQ@dZasinx+1TZ#Ow-tnLb5=UCZ41;
z;}H9I{gK5vdHF)eweGo&Y&iEnF5;!xeBQEMYiFf0gTSk2vOdicvMG&I;l5ciZ8g@F
z%H9yWtV+wOD}NeC(~IS0lGUy97-gf&K^sr$x115Kov7q_O6X@%o*hgwNq^cpiBG#H
z@xehOeR)M*UY!yX<BGif>w!5nTmCp(j{IQHb2Jl75-%b08{@UyO)w2BOsF>`8GMJ2
zK6_CbKIUYlJpbj_9>nyQbC0(0igBJkNCW)Qnl;{27&dVnwF4FIxvc6&@iK-wELSmq
zU-{S%*(at%_6Y`&eKtX)chO^ZtHK{F$5wO#V3~+ugt18XXO1Ys@9HY<4)>9(xGQ){
zJ(d-5Mg(}_I+XyaKvusseCl^5l`yx6GLA7lw6cIH{j?~M^S>@bVYlpcY;K6JDq(C%
z;HsSR%(ZCB80%@f3eN;{HR>YyT8d>lu5O|<*QF{~zXY$zNC;(3=1z6pq9!&sO#%NU
zrz>Cc{{M8gj7RYxg#J~=0I16U9R#KLKLPo?zyE(P<G=kgz6x)ge}>6w7<~w*&gqB6
zhrh$E|8aC5Z~PNGpHPMej{W8AW&iy@et!Av#hHBbKUoe5HIFWPiU0u>@d5r8k2vnP
zXZ`>4>W%Y1|C4*}1QLdi#Q2?kOrRoaD<tD6no~=JdFUq0{5Yep`Ku-(+)UNh-{4SB
z!nT?cat&##BVtsKhy?=oosxMtfU`eV#s3N8<+JJV*>EWQeBVf_qv=d(Oq?bek`=5v
zSp%krJt)T(DNX)~*h=Zo1IUC!OrFp5-a%TcB3hEHsQM5~Aq$;!xroByyV-0^#u$F~
z=j6AGvH-YQ#VyJFmD}%3>D@Mn^EO&d2UOdCpfT8b4!j^m>Bn<Ic=x-E7M}AR>G*e|
z({$yMwc+S&a2X9(q@{p^U!Q{^;ccVR5%Ch)Ml50D7{attaRk=RDQSg`%ByEtR~X)3
zru4=VlProKhU19#`NkD%!F*f(87;_1T0~Ink_gm!6io>k2a5<2SVlur19+@lg6wsC
z9T`T>A;RfI(|@m`Rg^fB)p!{btB*G|AvytY5Q``tf{I#R68Q|r<MjJQwB%HY2*qw3
z(XXQl#t{{LjjXe~uNfm$@K&CMEDHw4_{s9{XoDgtSx6H-Q8V86J>v12_TOwoE!=VR
z$sz(&h5grVw@UV3E7<4%-p7C1$A7Yj|C`>OgaqK_c`O_b=_)kI-$bBu3m0LwmDi&e
zbYYnf!5L5NK(Xsv-a2K7<A#{m@(A77a}Hy^z0VMvK)ci}=_DWdU)#`RH6f^I1Bm5F
zvD>v-5!#<#$W%59&S0EQGq)pG9*y%Yjl*z8I65j11T9zG=cfSGL6J%0kRa3%ZK+f{
zt5CpU4u><iI^^V+T%kfrP}()XB)FvPfR`1)<svvAK3W-U;c9geE~DL2zmV0J#u2SK
zFzltT8!cUnp@Z4rN!_*S0)S^DsxB=01Ydm)>lb0y-~rn3Y((L)nhR)y6@_qBA)gjv
z;}V+!j*AQUmYOr@Im`Yy8ZBMF<vDHg-(%15QKcG%2z-4yLAta6y~&PAaqfy-U6x&D
z6Sc~v!G5Q0DjoZ=8cQseMbi*Lt<)74<7@PKP~_Lah($hwU(SS4gEv69E~0h|VqpZ2
z8#z!$0MV>8a#lb{f`V2{il}DUAi&88!)oMM#M+t6;nMg@E9mIotC*}F$$AwGiqKg^
z7ppO`YS?*;Sa0%G!g?_=_^e$Bje&a*@ruS~fqz+G77-i+i&Yf;gyc_Y*ayK0a4ODd
zo&}p=boFR)b{?1kvvGo8B@9X@#R9+fe2sv?QK>GRL0VjdFYwY<NFfa>&$6<0hj!5?
z89bV$9-XpusctfbriYL;!`Hq(++_U+=V$cvsUibF#riL3cRGdjUmKzQ{{Mdcw_pEl
znExeAlK4XGk7Xh2kuGF?KI~49C!wW9r2Ow0BFqtK()Z;NVP>MqDd4M<t+3|CQkF?p
zFMV?u7ngupxXDE_Uee$t^aspzA$wLrVgZR6BtFq1^{a3Nl_=W9ytRez9VtoecA`g?
z<2eCUxSLYxrY%`xlZ7&FUS-aNN(5Ms;4pR$Eim|-HX+!!3GGdr(6MiVAs_U7yh?U2
zs9%(bLZ(D|9KcdBLKg_Zndqp&F$Wh?%D~Ys1qB+%2RKbTp##Kaz={#?lNCoezQ^P4
z@Ns&&^MMi>#&z^Jttc`}R==#^Q>kWEz{J<^QH=B#wqRJ_{-+#-I{m?F7#<B)YoGtL
zamz3J|8()*{=bj*f9Kn_|8w3z)L-z;+y3jfZ+?97_LuKpztjLce@k$@{8jb!hhN@2
zcTA#LDD>jx56^yi{hs{$=Imwv)teVDf9=2j*SnYCwV-fT`tjW_%*Tj*89M#`^Iu*(
zd;Yvn^l!iKeD_`PP4B7GZM9o1X<-A+=#S>8Bl!xM9Gzs<vsqFUN#{cwrmIQxX~|;;
z^Wx9RhqL7(3McFaH{50*57JiMkMkA(bTMD`dC<b|7+9J)nxO7S;|LC#Pzz-d>6^}w
z4F~z)vNw$k%~3cW&jzsDl$LMOFln<917<VI`D``@M2jftUxwom;G*v=Q2}fjQ`)R`
zP!+EOfN7z;AAowEfP6-D3E`n0r6c|9^2waj$V<sszHd;j0Ggu5FKCrjA>M4emO@X`
zSGjt~=d{-%@MT7EIO@zkFZiZBSvzDesgV*+mu~B*&5WB|{Z%+uQ=+S1LgZ}TjVx(>
zG6=`vq8}&IaN3{E#oDEjs{{yFRR>-9aP$CAaMY<cUov;<B?iHPkNx=~8o0SJEP{<z
z<FNuvV)02d8O(2(T)&_FW0ox46nA<8q2iflNv6zL)20-#7FfiHHo2J&E*G;Y+i2I>
z&;nJ?l;-CptEF>#>O6dY84a#McQQ@rJcPF~{KazBPKRUlmEDY>aDJAJLM&a>l^Q&Z
zQWe5QyV#{1grIUIkU$0YJeif~9XdK!{o~mgord~9yh9zEsep*nhiE}>=G?|~;5b{I
z5F}9nd}2<JUt1!TiZrsdBOnC|t>4BkV1AIgrmrSHMbw`jDtZA#%+SNr#<PNuku^24
zC=PAW7pKVOoh*&ZNIZ>~F<I8Cv~dPSOvfUOS!r^#lFHPmOGzdw)G#zb!j~V4rh<K~
zRcvV|LJ$R1CF}Zj)MTZKgiaOGYG@T8otVEqQ#i#W+rnExwqeu00EJgk!L}@7i)y|K
zlQ{KU+tkLTh-|c_*?N`q<?hW_d&ODLepTb$z5z?smyP|(b@G>Iz*ehhphYxYevFgo
zZWJ}zigKkt@LuU_l|CwHeY915%*bD;0)I0z3#!oQsAO0(0#y{lrD7HHj+@o!97uQ|
zQCRlLZaQ)$WU3h{SAhy3U#SveKcC&|d<Jl!=%p#`;V~tg1C1p}=?8flqcF~ggbqUv
zN}k?qCP&$GA;$)Cbp;QH99(WbwVY6qakAvXar=|*lF0Rd(27jFa9q2eb_FVbiyKyC
zRM1v;tyK8axqUYk;2n1W@45s2jyvFQvjf|*w03D??ZmcXvL|r+JJ<qEh0&D+b652+
z`%RY6-PDAo9^oDK2+KXfyX=uK^~m30k9@gD{?@E(dFKXcwr*-#-m;l%MCn(c^i{?@
zvf4#LJU^&mr#)kg%ASp97nQhtD_lO{LfV$e-L%;DLKSVQLXv?{YRUCuoBDAjeJktd
zAe6##{k~28e&v`{*6*kFqq+VxWxgVFlxi_mklxJ1w*yZNc~r4fqfNl|nt<yy0oyde
zuh#^>UK9L!hfy#p#j_~1!5Wkixrh;OiqS7(^f$nmF<5?~j5G`Il0j&>3@@SAEW~T#
z&zLihA8)z&CG=ZvzMseMZ@Kv;^lLPKhnpc}wvabOm}(Ha99&z9jS#w5QrsrVKJ&Fs
zet!g6Zjj{gdHM2<6GL9wwR%nFh>BigbFZ<v*I3=_c;u!*71CM`(b1@J+B#|xn+h&Z
zL)*sT;5u=_!C*y`yo2BG`Hh2gj`_J}9Y~x+EHyNe$*BeqIiVo>oJ|!4gxepVIZWae
zBjlS$$TyFWUzacae7}9U-@Dy~V-ro)9vrgTsY7;0ZP*GKwnOIIg7G>(TY8sCcf@zF
zhq#!1gbY{MKS=}5jq@+0T}A+I0>Br%*I3>@+VSqxoor6wn#X`JavmMq8aqk2=0RY+
zN^&+eUgJP;N7d;M26GjDIhz-Ux#evEr9P`f{n#Ri#TIcz%n$_OL~6Uspe&$B)@~kC
zq+)fv_6~!pN)=oh+C;PT!}3vOf&Vn{*~wP{ew&Au>ssx0N0*!1?9lbfi<xv{$=}V?
z7*Jv~bEXsheN{Ny^qi}P^jSJy6kiIK5lgejyxN%sTf}x)#L{qj^0eTh=nxY^DHDg4
zornN~S3;>tANDn#gD#@siW@LAKs4bL<T>IeW<LSw6OimdY1Ds)@-XUj1Zi<LqUSf+
zE1b(K%D(dqBH?&O3KO|z(6pxfz&j6ic{EE~OhpN&Mi&iN)Q5LqH?Sr>ed-|Rzj+6b
zS+5%R)X<U%up?W`riz`JTb#+o81Bm8s#}fi$&KFR%80tv`V2N#uEsTut|na49j$9$
ziPA;8NCYyZ;@bxW^VIq4FZ%TPbA77_VuGfqc4rCYtI=9IIj5}RuvhJhmkMlR>qxkE
zWA4<fz|!K$kQ%mxUjF*-<@5J1U+8?Yeti3WJx8pKy_COvV}ems^@%5iLJ-r?7a<QV
z0Fg)&r-k&}p5u2cPzpc`lmrT(t5I@5WJ@c`10{lO*UsT44DvOG4;83;YQ&&jnJ(yV
z3>NnHj?ESON^-MHtG2PS)hcxM$?dy{uVq&3)|${85iQAxIMKtTlhvlC!cNSCwM+uT
zgq)j?8|w_|B>WUlb~>zS4d3*Ld~L=Nu|o&ZupfO0$85I$%5UVNtzyIb3%+%)VWIN0
zTJU$SUC%GmP*oEy*M#rv%9nDLKb`f@K~>TuRQxxKu8zLY8^~z=E6AWi$~Teo=<)jK
z&lj_B2%lX+NnMr3%VJe7h!uW?+qzbU6f+;%hQl1*GU6}v#u}Tp9AE(HKu38&P3?50
z_=GpxCrRD0?2F(n-vCEeP`HK#ktWyF2QZc6{TZ@+OEK`|1l)!|N(MYx0Hw<G-&wp7
zJ5cG~yIq0YojrXK?5aI<^d)@Pu(P92&aK+V(HCmFYFkh$@i*@LmKVi;z?KIc`@{(s
zG~cI_MoxrtHX5yxj}k<<ges1m<T{?uL4sMp%w+knAab1J?seY7{hv+a|3Yxo=|!@6
z{>LtEcS`vmyZ%1@?>_&>KL1C({Ey}YAXLROu{B4TmjDe>=8Nbml7X<-D%5L5pQngs
zTnW+b4ktm(_Dh(jNq9bBc4^Z;DF&=h5Mj85%Z)`9F3dk+ii;RX99O#(>OR34u2MS6
ztgDq7NmTI0yd<guz8&7c5?wh{i3)0$`AS&)Q8`@+AI1aRy!u0tjv*_6r2^QdfM2(U
zpl%KAx;1q4HGDRvyCiD~MczVb;0QfaQPzQA*DR9t2i73AWx;h0oU0}lUnv8CD_>E3
zjRLF^VT#w!5aSFnmJxrhlIS$$9`%le<=YgNaBym-&Td{zqUW|C(V-nEBA}g}3K>h8
z$bB0#g6kPHD{$bGB7%_B44RPHI7-Y6N_RUj>+=`DezZAi22A;c&2fBUegTOQlhgTf
zK~__v{xb7JhMk&B_^Jzy;sv1@yAdT^4ysEf5rG+|#r%dz*)K)%*dsbLZdRq2wb`sI
z@(85b^B8u<zS#g-F>eZ9lXXoAwFNy4DTE6kYbDU8EP7&R2F2x^+Q`CQ*(7bYqD@RY
zvZk=#c~O%(!!63_)127#(U4Ta3djPdOUx|_nNmUX=)CctG>NNO1%;WEhLA4zx!CWQ
z0Nsb9+3aCMucVshq~c+g$GD8JDs>SjGVcXhrF<WK3>VQcS-_C>2wl{W1q=EAIKGa?
zH?T~KK9E({$JyeVELq6v=f`)y9Icb^NSGL!&qBfLQm|GgX)O-Z*Pdwge(nFvVF0HW
z7MnlS`#)`jgTnrg@1tP9|Fhr!+3){Uq&kAR<^`FVUxzod)v~8Bv}4GYE#x&n+wlX$
zz>`t%`!{6b^S!p$<?gNUs+2FDowU=LYaYf6+5Aj@_6Yu-!>gzH&Bbqa3QtQ<!fR+^
zyvWSk6bdPz(&beOg%wafsX7nk>uao{3gC!_0vk0;I915AD{3hEEU_7$l_)Oy4qD1(
zUkMnNeg1)O2aE}D`=DzF40dn#ptXrj-YMARDa<v4xh61(qPdoD)}>%$1~w*OU4R<P
zQ0!9Q6{HlykZBAmY{QRhc;&NSr_SSi8C}O=BgMP+#$fC-7AG$bv3Gswd*f`GK&^io
zY@#mKTzb&#7NGp=bAEAQFUG90eguh>_A;!OVbgke5ha)5Jc0`}v=j0(cL~C!_q6Gc
z163~PJjvN!2rtifg%k?nh+}u<TE1WjloHO0%P}-jDk+uq9xg#XpN(l6a_7JEcRC%h
zNd#M$-t_^(O;W|Nczt!yX~y38pxeC8$<B5pUvBZ1#6FL*zRYXZa`_2bv7XA!<EMh!
zJ^cbKUqEDu*B6jfj0JibI*)t5O8X|(f6Kw8-RiGu{fBV7;}_R|<n4a_w_pG5*MD1E
z{}FlJqU9(IN6EsXCL*bY+gH?P6OXCKb?zlny~L`Qe&(>9Z+$!8`ss}IlzV&A-<cs6
zeWEUZ>~)m5Sq01h1Diu_)adqZMID8;9nx%5E5K1ba9j`EuLmBaYojB2M8)@@WwXeV
z0%a^;CpA!ZN}@8)bKNxV@^eV$!TnqOhv#H9ig#IEMajV`$?;W^(^g4N_oPa2K+6a<
zL4A9q=UobrT#xcp+`0(eCaG71ZkE(5Lbpun6_MD;BuEBIp!$@+^(o=kr$oCxCAzy4
zsC=!}LZzIJuu7<~W;&`6s^!PoFcYclX_o?()nds+$^s@9QWn5<3;1=L1k8h*xEVCN
z)xxBuf{dXCEzr(sf$P$uVrHt|IKmbiN5T3#w*-W7^LREWZtR#@TKq|ilbXO62RlvJ
z6XHLv$&Y~iu-U_rF^x&Nu8bf>88<XLIa04bv0i@?^!rq7bP9RHAa4ZZO%bvVDf9{u
zcC!vXdDjOwY-c-V7q{ho@@~AAmv%niJuR`;sVl9+<FzX4dZ2qUZ<%eRxP6u4ZdWVr
z%S9B)s#|*Asq&WUu~pydB1Q$SE@H9)G(}7{fToC0<9^e(P~&=f$T(_I1=Xhtu1^)e
zK2_Scq0oIRh3?;#Lig8H=)R0#_X^$DDD)KePX_xZ99sbFpDKl3PyO2{Wm1oqQ&xNu
z_|r5A7^Z>s>MTwMe=6(T()3_DQOUNvOy?s6s&w8_1z$W(0na;CAvhlCCzq?yXiN`x
zzP#g+HTVBnGz7~7fC~Sw0QUR~`~RrZ-tYhK_y70%|D_H6VKfTi;9Rwrk9y9BXyCF7
z6XaLP+k<rDhxpSZI(9xrPI5V0jfc(v0y>b6k;VfBNE@O>2#(j&)#N-{knYaL<Jm_-
zsMRzkBRyewQtH6n@_0&yFB~2fZ5O1vxtGl;Z{EJ=^U4Pvmx2C!h$QFd5&@>92@rFn
zR{upn6Vk9~F%8GZ$8RVa_Vq{BXMw2K+X}fZ!c~&Q;q-RtMF_T75rO#WChZl)BRxZq
zDLQ@1@0g>9^yWJT#ajP!v|KH~U7RU?hxvdIp*xS9RT2%49Ooeq+t8<REJ!aOf_xGh
zF%06ZkTe3hp#JD&j0ior^ulO*4p9#vS<9PwJP60*8-{^Y6<?6ik0{-MWV5;GkccYw
z0>t~(bQpbdip@y_3`sDLnyV=^zxfCZwg&fhVPs#hF3UNuNu50xOw1zg%ycL1JPRj8
z6(ml0KKl?kWW-Y@A?gae@56D*EQE_fXGzupyi1%p=i%TwT0&flc$v^@kB=dKn8QB2
zor`ct<u37}>-L)cX2w1sI%APS%}ry`LTP(w5RqrEU;X%||I@QyDI&@%$!{6HKTh&%
z7-^N00p&!%5lGmDTh)-bg^rUoKrG+pNe5Kw3K8~U8kfz2{5_X7LrB|+^6n+EC_v%L
z`bk8i(rG72?0T;44d|k6E_8z5j5BqhF^)wvD~-2NI>~6fe}oR!-3{R+*n0!rSN|Wz
zKQv~|(O|Wq{oiSo<A0!i{Exl;fB)=%qF6~U`7}wRKfQU|fBp8&k1yW-^8M?V8i40-
z367V)s=og4%bVwpPBaSzUcCI_*)Ol(lYifwz3jhw^Wx>N{rCTR_Y!8l0_ZOU;Kz5r
z(BNPW`!aO;G&0!p=Y2Bs^!wn1TX@rZ>U3M}R!dscu&7pZ)H%BBiNI3jRZztus1#0~
zb&;<}luH5Jb7rfh+r{1!cG^dtV8Z>s(PGADSa#)-m>T^heC?m#fQ9yyLDB>0VieGP
z<WvGjFJhAx{Y0a;xh}M`*=)Gq(XV7KPVwg4(>D(5S|irC!vbjnF=(SaN6>^vpo8ve
zLqfvGTomgtNutHlrJbgs3ZFq_L6;&)qZ86<(yn_>1}z6R^m6eDY1fF!0Q;Z-f3_M7
zqJ)`RQl*S+XHqmnJ0yHHM-<hREY8Ss?usb>bTLQ9eE?czIE#|$f06f#Fu6QpW!}Gi
z@%ET}CKlpq&a^-X<|yY#&>W0HQ&Lm$x~{~nhIQ+nS#6lL))F4pTrOD&XNgQ~`J5E|
zx>^0RG9V1>UVl>DcIRI>+3x2al6e)R3!CZcCmij%r!4N;>+^g-?s$!~uKh3KTG|`|
znC*W*Xtle={r@gPd;5QH|KI=p|IG3o$CG%O^rSZss&^3&dYj!5aH=l}kZyj6Lq*mh
zrfl=GbW3$%fbFxXZwe^x3ba`IHCk6c{1mhM0u?nC?^TMFXc-ob(*?h3m|gspB0j5n
zh?G|%gwx=bJBha1$%2=RXtQcFv0K<-)dlyG?n^g{@vc<V!<q{vsZY$735a(|Kx_*v
zyPsFP@7M*ns*2E-I%DWk&wj3@1-OSpNeS597jk^+Mpx^~5H1cdzki*3CHednzlduP
z4>slrypw2s?F}N=wTs?d^a?hM$<^X%F*oz_Qn~E{{)49yXI&HK@|>~c!ISsGQAFC$
zGbzDnqcsY;o|HoQ@zO{BqUpJ=LA~=J0P4$W`2i13_?b}dCF8Z0`dfBgt^6&!CQ$z7
zr=F<QoyI064fHg?Wg3*m1xV$x5yA2lsFb&sR^G5O#=&AXTwkPody(o|M$1!)R97id
z&~zBs3YoGCq0m;y6s@3{3KfwkA{t|mj<tmfP_7|WPz)6YO~X?Z^u7odbOsxlH87CD
zMz$!qTs|^emoAal?dudU(s~G&A;PxsRZX9*clf^3@$q=}kr*LaNZ;+sVAckg$By~b
zaQRD5A(`uoCP%u4bdr*&Jmwn41mOE3o}@3hirE7_9c<(i$mO!p*+e>#Qaa=5X42W-
zhIF>qmCksYlMIb`Qi0imaH_5r6q`VviEy%)vT%;6d~PJ14Cy+;8BbAp$aPFffbWfP
z##3Cy>w%sQR>Dch<+9P)zHmn2^edIj@>&my4Q8w9GUJDB%VK^3C<&c>h2!9Wwqlbt
zq4S9KWF1i?Ly8I`tYbg`y*H9bI)jaz9T>=9C5S*SDv3})yG<&*8Ogtii&}CLm3Rh0
z)yezxh*Ww2mrVMUld-fp4Eqo9r)UTcpr_}v8DTya$q!D3!!b_>NPaMv)C3Z@=EvUs
zPx`7ML(&F;a|$hz<?L)U6y-&6yCzL<ctz8jtfE3ah*mVcDK!*cHOl2ri{+KYv(oH{
zv&KeWw!_ya^C62Vd9QZ*ppi^<r_%w~*%?tGaG!Iy-x0k19YG*Hp?q;;uqG21(km)C
zxZ)^cUEVD-sMa(|XN3pbIo#4PSwXd{IAcrsQ;OVhl^a9-*tgQHV1<#E{CgEHBDbOH
zHTk&U+X8l-zol+F%{8b1L?GQ6!Cq`bxa!dBI=>{*;%Df2#IEaberMM)VA^rs(Mt)>
zsFx3E=_nj0^wNb?l6ihPv(bp~Gz3{?zxu9t^A|LZ$`GY;<jUb)>1vBRjD>>%X`%C%
z)1<$k^RPl|!rv#fi>yqVr2!n!_&pMF(V`^-j^Zf;lWXL)6v|Qk@W^p_&7u>H6Kaag
z7x4$8A&wv9W7Be=6M8A5h8kHB>h-yU>w4;>?o^0u)d|xYGGi?GFrzf=Y^}3oKIZwl
zJb|_Ds>ougLq|qR<btwk%4Qjqmb4W;Cs!F`Bk@L)7Fs&xE+>zv<3hpwq9);7@D3%-
zbiP_9zd<kpxTDl2|8><V7pYBMu($+^0allxVmT5e`%9B%u?k(Ef|ZaeA^_C;(`{5{
zfV68GZ(zFcRdhZTIViQgSsdwR@tpZddtp9iX3bL1<CR!&4h^Hm+Q@w8sYrFFB(oav
z)(lnn%xC3Wuu++`)U#7n-W8dBs)NM#puX-nyrzo=yZHz)hHRE>@f3W@oi>q~CtWf<
zAYU<Q6gY-EqC4CZJ)RaEfZ;;Oe@eOH-@Z9}|MJ&&KRc<=F+SXexQVpq+_jC~UVR-W
zOW{5acQSDdZ%<J#z3D2w^+B~1+re+4FgJU;k)Szq(91x#S_qz~H}#|m>JUZ9ylQLP
zi3Ax}g-}Q}NoQW-Eo{ulQc`C*2Q0$bIX7pn$J6n28Z9O>@@pKz)zcmGwhUZiZhF%l
ztRcva2L7OF<DAq4N{+NLu%-x@){R00gb(a;5no(@M?JKT7_Z>C%C0LXls7~qn<G21
zX(`19?MQ5%bSuS%^YX384U^*fVl$><Q~M}tS7O6bb>7CD(Eg&t23yP=wqhd!I$4Sh
z*333{HQBVcAT&%kcRJZzQNg*($%d_U`;&M$97o@v%xJAGymOgRTWI*sWac(!nkU^&
z47BkMW*Xg%I6+@77Uhd;<+~yl*@6^>suL2G{L$^R9@VO+d>m#`k)lv_O@%WHi)xA`
zs_W2fXj;^zD_LEqwE1Pj3=*^j7i3FboAH;Rt8puq9u{?|O4X?=v)5`>CKa>f>D)YL
z%hUNn`_x|h_&>(`zcwL&s`7tzf_5?fPut($|Gk&@f7|=`Kf8|qlM~G^C9aRGPSi1e
z(wt^#K)(0n2X!E;?+LQq@FiJL#OZ3%U(7xxxsZq~;sXmsh=}qci@qlOE1iXjvV+;!
zx@@+>WQ4$_MX6|wm*Q5hH4BIet|uw|KG9>{^ogy!_}#lg2pVGZiU>3SQwTr<)zFsc
zqSj=U=*z;7CLIz|*?hSu1b(ESdvZt=8rFs|kSiM3DByt>)IyX6#7U#baF7WN@dBY;
z-p^iSL1kD=foCQEM>(cW=}c5Pv@p1Rf-i?tGuRr8ts+L~i$KW<!J5QBsAJ=>c1TQM
z$4F2}NijJnj+=*1DJHcrACoADFOlNnq=9Lk=i|^kfA@<+q`)B>FWY2Dgr5mOB%o)w
zlY`1{Cu<_KEZ<<^CMq>*7^S?E4PCM21)yv&K5wD~#p<B!!~$jE0|>P5pGa?R#Ey`7
z@h44!{kS~Reo6OGj2=V7{`pg<e7K&K_N$l=jJ#7-JPt95M?pJ@(is;J%}opxM(*`y
z0#a6*0vsog7#Q_^Bh@WrnBf)d<IookQaV#2lu%7dC;IoFO<B`PDhe2(0?(fuPYFXs
zfGy=9_PBC&dHdNYs>aj`J62(bohZ>S1!6|lchLgED)Dg6gmcMj=(<A5rcKRhSU!oA
z$x?{>Mg9>1zskI$hCa}PSF$Ko5~V(8J<BP>vL4LnCK{&eCZODjK2&Y3<{P7l%Ei?u
zZZh2o?o}B{V(DT5G+k1Ne0<YhNuLBcHdK^i&&I4LsS>PEl)itrdJ=yPdh!mG<fpG+
zNecYml9J5zd9Nh5p(HC$d3fl<(os(zx$5%=e%2}PC6_jnwS|YX)j8~(m`h`G)$)$C
zePK~`sdN#6FAyl~X}jJVG1!qHuxLb~CxnmRu9m__oU!a|<goe@CtO4j*EZdQVc~CY
zqUDj}{4e;!P7(o4jixB6`k>gGHb<eDc{LQ>RcA~su^X9AbzjU3+OcRt0mDvDT{hnT
zPi_X|iXfmB`~RrjD#rf|x_)QB|9>y{|KaUV(d969ew>YCkW<*tCn#^6v)O3*kqEc*
zdO19z4l?0*e(AiMEkPF;;9jq;Bj?q}aGG4lCb;Lb=`tKFkDXJ>@P8k0$o)yfh9?c6
z1U5@HaQ3g~uODo9f>3!hK-<Z0pmFQ`jJ#nc=68+*pVQ6HzqC95K^tp#_Pd?-Ny#^h
z(TlA0E+CiEUw@UiazB4o-EF1|NU5;G>#SAWzc+klyRz!Fr+(X?-w5RUnSAXfZ=f@P
zo-0vL-iyyJkCBD;(fipl9G~S(bQZ3j&6QGR*-DQ=G#kna@<`O1>~ehh!qz%lJE&<~
z#?Yb|id*U2;C_%}Aj1(sadBoMQjtn0<sJJR*A}<zRc)%X3D4SEZo_Uj+u8QIyx}f3
z+vVPrciYotkpV2fM6%jQuh1K1wUOIO3fh%hNu4gsIFM-#zpOPB8xNp1*4tlKIFAw_
zRh4pGCH>-Cs3?yAldGu308V*8T|}+IXerHZ*WD{I``RR7w64^kv;2*<9@c6-&(RLh
z7p4kxn^F}ju}T}(9-uAmG%+vcr{KeUs-QiQaPozi=?&rc%siH%z_qdfA_cC~0?HfG
zLKv^SYq3Q|f8UZuRxFi`3fcM2tdQHFZ7URVaGMGl*Hg%%%DqC~E9AXGUQ;3O^|6&c
zwz(?8lrcL*7>tjP;qmww5GBz#8bAW-MAC|qqL$j=CS|8|SHFgr`KqBUsWs)TifiYc
zV(G8hFMjR)-;qt`C#(J6cB_jE`@cBowD<eJ`}|M$KmXHOxt($^#c9#W18qu|F-=<z
zLaR<oPQ&Z)lz;TM#(((dXmpw{3Z~)6XK{oilSKm<P!nJdsVJ4PrbfQxuP{&ZENUWC
zjRGC?TIthSM+CAg*^#Q^GTdjoN@*VlvuHFTBqC*jASXO%T>U1C7Lc|y?6FYF<=b!=
zJ59C>*|Z^taT7~XNwrCaY&a6hEQyK{o>V2UTmrAf#fV0!h2k~%IV)7)>?}KFnY)pi
zy>mr^`b7%0;QF=Ttx6IC78B&L6)R;S4NFakH6G8?>->uRy1r0xzSs7;b<|vnfR4AM
z=M>GarMoN>bc~eoW6f~5k;X?`FO5ej(5&L(dpTHyqET+_u{8?U4<28oC^MBQbj27J
z2C4yVOTk>}n0g3lV=!h*j~6U*vlJDs4V*C~)g&(R09W9(Wp6%Z30O-4JpZ3sPoym+
z@X!w_+Ub}%>CtN0*@`;2D+6qK{C`=-x2&nw-NUrKHTuU2V5o_$7Y&`jycc+b=*HNB
z>HJEj%Sa|zFC`i2{Xy&n2UlMEK*NQ0r1YgBu_C2UxisSy6DfU8N?*?rsh;8GQ$?c`
zA|i(U*{D3z?oKsUK#JCo|7w8qU_a?24e347*^wNomPSs?*eT{RxKTfs!6h}VhsW!5
z0oUvUuGtCvMNT$?uZ(k!v0KKXm9rHfO2g@|Eoo+XpauG}xT&h)aV)cX_>N_bidYFQ
zr|?r#)p;ByQbq8nI0g{7gt<(AA5iF%J7=7<qpH#8tu(QbY)SL@q1Jc`lTRUAL7!^4
zFM&Sm_DLxybm^&`kZPHBio&x+d;zh>)A`jndj)0lY;YOR-7fi*&DGgzxkWF%!v7mm
z&d34~l7J4L!cLQVqT>A&)<l=*Mo?<s)QrhsMNg%dWJoZ>prrhi{1pxRvw6~o-7M`G
z1Y`_Ki9XbnFNJr`Q1qOMyG-Y=0y=iwN$-cx!+0T^P3g}bMKceF!+3f@HHG-i&P{pH
ztm~)`DCpQUa!^PCMcRS#P*?%wi}6pPd{d3w_7j#M>@m_%XtV>#<!Uq%%hMK((w?TE
zDDh>Ph%iW#Bhn66w53s#f;{>b9YyxjOg{mHY=E#0kdb&L-h+P9uxMZ~aq77;6&sWs
z9Jx|l7>Z(^mV=fZFiqxt;M)O%1=K$1+5!7uE*-QsiLTwT(CY}V9ZNeNIqnri{J;<h
zgia*z;~HN1?ANLDIA2EBaW*x0V_2)e??dceANpP{03?|v90)vE7h#$Lp%@jf7#}a2
ziL?iqLJMl+$;lh&6_MwvAq)FeI(%&NLmR3qTh$prQhQVbBTdB`2<4Qq4+XZ5|MekS
zB$3@2a8>*->=)yI`F;oQ&wuad`R|+X!!U~G&i^0A%khnKIvrloUw?;_-!Yic6Y#>J
zxA)3__NT|&Jw2vYWWAJ0X^d5>0@Mo#P2#m|r$Dn&&x~4@W|3He%=_j8z39w-GRrkh
zekI;gQ+2sm<z8adOJ6Eq)o$KJ;h@E~QgW~4<VuVjydK@gUaqud9zk6hWZ2u&mBEJn
zJzW{!yvJ8RotiR|^4b=wW~rQ+iiMt$FZl4v3l6V6Ln!ieQzplFMQ%b$Zp6yB1<w1X
zIqANz5;_#$Zkv%9f;}JIFFxuVR&r9!JgKn$OXq5uNb>bRVTt;MPumqtZ)~@Sd0*VZ
zgslzUc|KYKvLq|g?r~v*LW<h2l`}5z6}AP>R4j=5K%JL7Sq#`-0Bje~&1U(9bb%!Q
z9;I2u^Y3w*1tR~RlDB(O*$7xIE%U|cE9q)s@e9&^!P+l=;i6)`IYY5O0!@dV#UhQ&
zg=iJIV!T&YSuM+4lpV0qJ)1&BUcR{v1+Ph(Bj*!PXrPzxs7UFM-#)9;l(lPC^1M(}
zicvn`Jm(ZgLH_vp!448``*xtFNvHr?VGXA$MQ(ECbfhSS8WvNOq8x5mO<RhRe#2ra
zlMe*xRF-Pn8kSI*3DPWRs*~z!p;Suc7R53z9TljA$3S6Udu_?oO*HjdV<mojj$VVK
zjb{t;CW#?m4F#){;D!~b&IBUce-eJMq^yc$HCPwPYN1wK&1%vz`y+-gc>jO4n$ojq
zn-c)@{(lR1Tb<(me;4obf85Lc|IU5~zt#@EB0vo+CsP4bZolK6GZKsUmgh6Chf%se
zG!pg%H?f?igIcYlkvarb>XeoigQ{J_esHUL_oY>^1oU%qg9gcpJ}r-rS;eDlGo{fe
zrnS>hi~K!X+zqv8)ptQHR`EL28btB<Ziodn-vzB`y+%^$>^m_Ja$>1c_!Z=pVb;N=
zv^Rrg485VDwC4Pu5Y1<O0lZ@VZ+HCC{qN2`{^xz2|6g4Y=<mbxRWi7|CKKmrOkVu`
zoWD4Vr}uu&&!+rwJc*Y{PkI9yX%P>4Yn|+C&jmaumnPvUSHbi+;N>S$PZEEK#IC#I
z5CILT#B(JFxZ#o##8N6JYeC9G1c0x0<5prwX9+P$tR@gZrsZW#Zy5Zm3wwu(4Miy;
zkrNIGda)a&nuDMt<k!K0ydRu3q;lRNO`vt8dkU<^1JOM<R`2WTz^rRZci%)8(TRNJ
zAS3R4F&h@Cd#ki)+R>u9DJ?$N6c~kI1X{r;adn`T3wGsC{@ZlcW<tult3|3Fa4Jn%
zpsY1ypv0VcFL#3#mcq#c^jY6d=DWXn3rV%HO$#MB%NFA8wGh+Ft6M1GEL-SrwS~Tj
zK2gy^3YTx;=UOM(Qr>7OV-LNNl>CNUe<&}duy75<tScERc9AY2Eo@0b2VZo8r@~ku
zG%YR(7H?`L#S8130=-#tD+H=cVa>{gsjYI7cUPY9)2&=jTuFgVU}a{5s)Ee4uqY{Z
zf@@b<Drl%uR^Ar7?m?l7-KRpot<CQZ)_*pspRLw^sI>m`gZAG4d%yp?&&seyR)$nG
zH%rLyWoC(1E_=aLC#@{(FQVS@@e8radl$wFhdjZ*(mJ_hbV!pWG@Z}k05$0|OPU^6
zOTVY|H?#lg4aWgN<eZ*558o3s&{{gvKZI8d2mR>L7uUrX)9giqzMv=x;l|~gl7&ol
z%RalDEgC8s$s1@<p4$+cBA~HJwMf9m>Z?PH$l0PfOqww|EyL19XZfO8MAws2ao(1!
zyl}rL<eL`dvoOKKKCkQ~|BfMG3Bd_-{6u4>x-PIxv)ORJOUuD^N4fbd54f%fkFVer
z+T6%0q5*N;6n%<43j`zA=1GO*2*0^dLEFfRUXk!n%_Kd(PzR)fW2K&zBWL?P9WU8r
zU9@QgXZvg{YB%FS9+f(kw$$VNVpbHBxLo?4X98Kq(&3j|o@XMN7vzY_*^W+jS6yC+
z6r`oJ$1@7gle8@ms_if@{c)oKw~(X5V3h3w4Q5lK51Fs8aG;?ZMvT~m%+X>Rj>)g&
z*?hj3eUfYNiWPVqy%~RABTwRF5-taqWKLT$C*BYikkEOmY-0*_%pRo5ALQ?#0g&?{
zVPM5}m6SGF$0bR$SaQdf)YS?`=7&WTadpth)W?bDYZ{5g;Bb`A{tB0)K5~mwo3utj
z__61Rzm%yJONFx~OR4bQdFNjH|FCEb#Jbts{|5yKm+t>|y1{<^w~zn4_y5`D4c=6U
zw|IwFbAN;dZ=o@K#Bce=h<Jsv+bQA|&Mvr!S3bYnQq|<?jCYZm1lf}mdNChdCQ+Xw
zW+koeAt=H1cM+7}`uhk<@UQ)bZq}(9w+Ql#gv*TNml+9_8L4$r#YYkot;s=Z_E8V8
zns>UEuvp7yYOcXwG1my%lPB^n-o?eqU8u;~I&z1sEv`BMVEJaFnz?PRHA37z-@}b!
z7Omoi#=rhzp>ew`ah48}URL?*FCHmR1)IC`^4DL8)o+F@W3{?#wB|jYTKDS|l&^iY
zuC>n9y0-n*x~~1zI^X_k9o}E9+ve4}*2Y)sHe~lABiFgKR>jeKCRy{gTAj6mm;w7?
z(x<`g?q;=s%c}*yJnf<KeA!CpOB*9knz>ROWhTxCm)%=?U!b+8;Vs>oP*RlA%WOR=
zz06jm(o4S<mthr~VFRC~pRy_~)wgOSyKCiIP>m$E7|FEH<+0qQ%8L4{si<eG#yd?M
zIh!Ss6FTgki!+ZFlNB^$Ha)UXYN}BSa-)`NulNr+MU)9~Kc3R7w00G+IKwF>ehLEd
z*~KXg>snS>&y{=;i<SPCH5d^6k+xbK%YF>!uGA^dDHO1bno?^iUV0;1TW67uYp89%
zhT3k|P}|*mptd)^OJ7`zYcI#AfcE$4vm%x^>g&`|C{SHP+2*q8Nx6`1O_Hj#u1GF(
zH?E8OKX-WkN5#hS2B7);591DQ70>@rx4S?8yN~C8KCzt^ak7lVsdE;QN*B)QK)m^T
z{4u;HE8=K>?6*<c*Va&xRkKB`r$IOl7pRy$v*kId2InyNL|59kC>=0&J_diZh`1L~
z`Yw>(r7w}&5FzMz8|Vw<pGdffElz_xB`)gb5Rwo7H(JbGjcLbffNSa3;4!7n10kST
z(^#GmJ(#Osr;Fwj6utq%rw}F_foQC5G}@vyrlz*5n%d^BWMmP{09CO8&`_PK^+Ple
ziCPOcWHPoOFjxgczxY)}89M=_;2VYtKFM#`E9BDLJY>1Us-;iFf`#(zp?xAC5j;K?
zkn{<0NW_o`NKC)APZ%V9!W@!5_$j1MAv+w>XCE1)-w-<o6f&TXK8Fl=t0`pA5E~B^
zvQ58X4%ud%V36&G*tekMFw21p*tirlL?RqD5EB9d)G+|1%anvN4~N!)Gl)Qs?>7Vh
zj|~7A08rZi;Zq1hqkjtk0{aGtHf<-?cLlSqw6r)d>k2Xe(5_$u0PTry00_+F#H=4}
zeK&m84Lv!1){VdbfX%J+SvRl&ATU$PXWeM&yAf24M!>pZ0>D)OeicAa1t52$tOeVY
z_4J)=!$oz|P_&84GD=UIXezVxw26i?OHZ3<CbRUkiAJiB9%;HU_ej81sD%`+N;{;0
zRZ1cStkM-JV5Txub0=2Kow(;r!X<J=hTYsZc?u*@m_rdTS_X2@3cb%?pmfUMFK{~B
z&=-gm@ue5p9LHZ^K0UHGxL6G?R)dSx;9@nnSPjlsQz9y~B%7f$dV>O0%}^AmJVRky
zXe2b71{~Wo6WcTs*J>uN*G!a?ww9^)lC=5NaM+-?WjNm;<uY7QIXaT4wN2w<*I}IN
zFsg>bmE$6H*sq2Q%!I+E4y#m)uTm|3&xz(qJcj*a&*%DuzmZME^d$&36HD;|dtBDV
z3rGguN`*fI;{u-{!o_l2M;s#ckfrSsf`Zj+l@zcnEPjmwsBQsl60oe>peyzKCc`01
zIBW%nEbB#<^<qnW*s6WVk}k-SF37UK$g;mQ$-NaEw!~*g7i>uvY{>`MvcK4}zu1xw
zuq9ovOqVJ~#fHUcUO!dCA<+?3O&AJyh^jds*#IIjbcaaxgH$Am^=ZnCgr~cZy*}w<
z=J81%ssBN6QdarIgrnI}OP^$8k{DndNY!@Ptiqb9sg6~pwW%y%v6vo}z}TXpHS8*_
zRb>GyH&k;*E8U;eoY7uo5b879fczuFI56I^=UjQtHJz4QEvHH9J0QQvk1O)yIy-A!
zVq4gP6OD41B>Rf|Y3avPGO(c1DnQIIqDnxYVXOuW7)D$P*k%~30Xw|cs0y&ldtC|m
znD@F8@ComAB_Lu$gR23V?P~$ZhXz*yvbA-q8Zh7#uLPt+gDe2q(BNvoF6(tQARijP
z67UJ@btNEXBZH~|*~nM`vXQ~nfNW&2uUDy-f}LME8Ds$$1ly#2Wx;k#sy8RtsIrai
z3hBZH!A6yB>~g@$Ha@OUJ~%JfdU?e~!LDp06Kp+K;Jje#bu%ssc6A#Avj)Hg!ERNz
zvBPAcx{XY*tH+uNHmV+LF4)y$%>)}&k2S7Pxi~M_sCukr!B%Y%a37~IN#cv?SGHSF
zY$ukRi1V#0TjDq~|M|Kj!dYdNWsnwa<thbNFHo?>ngd%bH*jI4p<)4X-fkFsdC)mH
zIK!&<VoDn&24BfCtln3$42!jA*ZF-TQhU>Ha!*O--AX#xbY|mWALJw(b}lxG{`!R0
zb^7bq=r7(`e+^i9fEYmoE&|0)w|Ca*_SQS?H``6_p`ddUrP@yGBMQDT8plSY|Ah6Q
z{9%=IsZhKcC?T77;ySsm|KD;E4=uxgRK|boqLTk#+uz53+{gde$N#8^^KT#OgUjpg
z@9sfG{OTV2$#yf5{05cY;L;m$xz8rZ2Q4BlWYGudRtSA|Ia+;y5Bl9YzDE(-9Nwb{
zZI13yguV~KJ=h@Bi0vWs4leOdP~sh2;vI|2cq!kk!!$~cQQPQBcCiFCT=7&fimzuB
zRLLl4Yxe?udq%;{*bACh9&TQHev@7qs>Lnx<v<I4$Q2=6k{RF9b!Lr9T$`;eV{rJe
zCjk2^p4eCtSNZeYoxjZdRjL?~OufKL>t7MhhAexMCNk+IQoY2gm)Ui7i_yc{;ltYT
z6CbiV8(;g7;<{sMx<8n=f%JC)Ib4m~gQA4Rg}ajev8kq9Xj@nA)?NRtF2aRv0=SCx
zpWhBj>%SJ>-~YOmKev4~iLB|!5;IO_Q^rF)1qrOL$ogr#h{E9wWc)~$P6nDd=Mh<h
zMpLfn5|2Rt*CKML{M|T%%V=;74Om`=ONI>=bF_kJ=)j$CII1VwK5~XL21q_bb|<o+
zn_ehfDclY*x>Gm9QrH|)!z8?hDhNu_^2_i8UH*-ukhC1uf>cbMr$h-tdF~&^d}*`l
zAOV}#>N)2pS#AItZ1?@kH)n5u7JFdGb0XYuekTRb2@03IX#i@MylotXW5}7wb>i=-
zq3E6y)lfkD_MJPMHxyNnHqYTE7yWm6ME6g_=@NX-X>)nur%#!434i?0{Nri<u~+z-
z|45$xwwltg{p?llQEnX7>19KX-dti#)DMTl{$w@g%v6Z|-E*EvqpA4(eeUzK;#hxQ
ziU7g3mwB?R@%$1tms>}HKRPLVBy7vRVO08d5l$xTBMMacQAxvoCZC^WgF#C)oCD{X
z)0E%SCcyje$@}lK_fWgK$7g_;0JFxkg0!s%tXaxJ?;(geZS(;zNLoRD(}JX8t*y~%
zcANR2B-!l$?A^Pc-~Q_UOe1=qxoKDz4a9o^q0V2MV~S*T-XF#vzGkM#e;h_*Ued?4
zIOdRy)1xxSaP#S~#4`l=dnlB>rd8{Ci8Y&5!}@;6y2%jH(pzK?V_qvZt7uHT^7NxK
zxs>X~1~_HAuObw}y7u4AV7#ULzsScyx0L_4)85;E_cH$zy!|P<9LCO%vvCa44*U58
z<&AST8!bPAjrMvu1O(2{pi7*0v*j{cnBZQoh$;5!V>nH&V-wu-*>o8WmdDO1W%$1j
zIHWjf_@>V$ZYMbv(HooPr8@i9^Vbiym_&+)PtUzOBduiVQcf(EJ8ABj6M0gCp7sax
zmCI)E5@zQ+3L)~t{PjS}ANwMnAm*iel7EYpZ~g3TE@KT}EfFi_0`|85`b*&UQhezz
zpM}aT&F=9Oxe}U&1_;{#`PD$P)v^FYyNYRKGJ4@IwZI5I`qr1;rYUTjG~EtqJoEh^
zfXr6x56JqA)z|E~(g+;U)J#OL5$h!Hhko>awhYH-^oYKhOB&UXa^{K(@j4aa4J!24
zsnB1$LNY+YoyNYx*=kv%LRA{qltX%AU|o5DJx9%1CJfk>M{FGozAFzweGI_tGEIvZ
z=sp86sGp)EJmNW~D0i~-QaSzcH2s>|>`;ouNRh^5QpyFz7@m6y<)p?l%0hAEOlgEc
z=~Qkw3!P-K?<|y7Y5TdfasW=Do62zFCJ0j8G<KZ%g48f_A2M1ykh+hULP-)0f1f^-
zhvK{`4F=_F(_j#x0dR{!m`Ul}a&qPwrRVDy1YmSN2!&Ky(w}UkIMphLij4&utas(X
z8axe~=Ss@UY4HkLfGT4)4o1bSBD^h2@ESMZif@uB^iT|v2bgVC@C{`P6Vn`=TaV?z
zek|7-%Y*HW<$-Z5n_+FaMEkMau3RG9v2@6AX$CWF`miGkyUk5})UBR?%fp%(Mif;e
z6W&ZEXE0DnqaG)G^Niq7!s=iTB(Wy2DS-_$d|}Xaf<AoexM8n#)c$Yh{CLo?o$~#3
z%J-!y-{0PpFP11yugrs_wWCJkD2c|=U@2@6VSoFLy$(vRD|-$%;`#06o<nJ8);fwm
z|IPW2p{ZT`m~LQ*;uV}*;K(xL=0|rJrAuXb0agu%E#Z7CIKe^m;6iu2LW=VfwZZuK
z7#@$0p)liQGU|zxmRZ{FlInF6-8qWbGn3_LA5r>?JpY+YqUGXd^ZOsD<Ku3(l>Zg&
z&wuvupTF5_AEvKoT=aS}l1Vt1OC{0L%2Vs?$Hi<lFT8wlGYu#4K>6*AZuzP|@pctn
zKbtON&8L5mQHu$>c&RFs`<nm!Cc2=xR(=?V7lo3~XVa^d_Un)1*?BmI8^hX<Ka)98
z`|;HzykH-JH7=v^obc~xH61{Gz>JVso-~^<Ma(!091bX#6T^VmeQ`*$&hxEvKte=k
z$O!HTrRrP&c{8W><;^@I5Yf@mkwZw?djM`@L<-12(SdN~Q8^_L1hB_EN4|50tI6bs
zxkJ!Y@(ucexf3r}i>X7?($fo-MYK6au*SDstQfkCLW40eeW;emdFnAQyj-W5kKSOm
zSP<!cKm>6`gq#+7HT*=ajuKUzXN$#q^1JsyoJbH_kLeH)^K&?R4yF4phjJp1z8{fM
zOxRD_0gvaYiI%4;^0E{3m%Z;ulULIr|3pMA{X`3<Bo-Oiz^G(KE$RA-=$m0Qh$rEg
zP-eb}l4!~@1cB0_-hmWUpcD>+4y2+w$;@rmEMhtLnSG9Unmc|YmW3AL#;8ES{%t~V
zp96v~{)4WRgqP(<!iT`CnFAvo4Dw|ZPlrwv4lW&H%#roOl9mcr1UOzo(?rfX0CZ_B
zId?7BC1C$4!9`QJ99wefVm^oQ^01dggocE|bT}Fn%_J{p)A0>+wv?MmUJs%ZQI?G1
z@o`M_J(<ZEPbQQpDLpC6(Mr>)4qnZ}cp=Jj$rZR^URZ#keB&^rAi$(J%oVA_>SeTu
zGGfgMttjgpu@XvB&5u|+8lu}=eo}ej7omzAy;w~dQ+%NI_9^AWTj6TaKp|3*Co1HX
zal~oKv=^z+%G~^>XYQYLmi^|-?%`*Zr!!G(Or`bNC+?G0>{_?y#6!YoWSWCiv=Dp3
z_X4l&b$CKr!jWh6nt`BIVVZE9ocgEzkSE<!g0#nWaHJ`fyv0EO9>c=&FqA0C`g<b2
zDM+bGAp9G|A_>}^G^1Yn0kzT(`8iyI!Kfu*N|U*ayuD{wQ(f0Jnh?5l1f(O~28t9D
zTBL>Eq(}z=K|oMCNC|?{q!&>-h=6oKkq)BLdljkDt5oT5*3SJr@B4h;x!!Y~@BBD_
z$j;h(&%H8N8Dow)6G&GZQ_gR3@X_0zYEvkT@m>*F&<cA`eF1fCeqEB#^==nwV?4}Z
zvfW%<^`byZPm5UY15T?74}z&)7M82s*N59&wTms%BT_nU(7tiAF5+U!_$lYT;`(k`
z3C7-~-)4O-!D3wVG`n$zNu~Aa_IqNRpYeB^FO#=&Mww+FFg|;cX}{*Z_Lt_<>%47a
zbY<K0XUBQhUfoWjdAz)Y1ay%Mb;+va-?Yt8ZV@%J&UeRxrMH!?r+9uy`^55*ihiEH
zuR;ITcr~@}6RB^R%~r8rTkl1?(`c*99)`AFzvG@|NyR3<RYsq2-ZEKJ+47WSIc-Nw
zTE45Z+z^Y`)UF}|i~U-5y^U=>#;S<F$9uRXJQpt9`u+*CYaqk%QjL>BZNp0O$_1Eq
z{92#pZQjZ5&X8Jpd4);NsXbcnk*KAO^uEr~lf~}B?qsi2wfb$@XZsrK>g>O|$4b`K
zr<i#cRIA<i%2n+jr!bul@g|#wV07;#<ex0r-PvQd+B&yiL%BJSpSJm&^@i=w9e&?i
zE_=vPkN@qULYM6PQRuV3wV&G3y6WM-JlN>_mk)L;P%J+OEu6vcgkK*Za%N{OIk>Oq
z$6wyKeOHhuQP2lj>a*PuAWw9n&Hl4{BdxxCKB;bE=$3e(o3MCb;zMK!Z%IeT1HHwe
zIX5qZ&gxoiV|~Hzh3!yNPST!NQNe3)^r_f1%pjHFt&_az<4$LAoS@HKE>$wLXJ-jc
zR)fas+TlIWcD2Oqbm&K6kG)CV@ZReO!}CK0`uc$l$#@!DMZ%uG9yyO4NhY=I3p9&9
zN;H%nSIx$WmFwnl=~p)ow0&2#?AB8~4om!W*k?7^w{|_%AQF?yRr5PX=0;DfzeQ2F
zyqm0l^zY({5#EM;hF`+cPBsFOO`>keraBnw&7JP!O6jw+aP^ToHNK1Xe<LODIZTSX
z>zBQn@{>rpF=%QULGh6N@jV-l3qs<Z8BN`%Q>5nEk99A3ZEz<>!_4`g+I+#&CB@4p
z4AP;c!5?F>B;6#6S10MkdoYs~P0atj@CqX1V&+TfjGMgNq@Altv~CyqjqzXkW<>_!
zqv@Q3NHym<qV3VGBA4%)>$R{<Q|397{SP;<Bssss;49Y<-#1e54vL2HGjiv_c=+Jg
zOx^)w0irEBq&#QXtGTcn90n?0ig@u4hU4LyVVceC(S;)NJU3sbutc*2^bg?i+QZu}
zbK@z;@#MjB*6|BLJaSj@V$#U1uk4!b7c&X&zrb^Q6`!eNWDS!mXgEN`?p5dEZ*)nw
zFd-RAHHs9vLqC3&?N+j6<Ur!_){4Z+#(xrwot>UEelksYGZRa1ZelXP6(ba{o)rl{
zyVA&R9zm3V#w=d6ltr}!3=IePlQO1eN5dN5#dm9kC>ba$^0#yIXS<NOx>B6LhuwSe
zYHfs2!9wxw?Mo~NElg*y5_^S3SC{KN%7a=NPpEJH4Mow=z;tanb+8{u@YKkO)zlQJ
zl7`AiHi>l9An)CyrGe)piCDwpIR`h1F8&CT3#jUT#3{+CIVRsYCe+UbeBZ`#mw+Eh
zA}rF_PLrP<_;de0<h>6{a6|OR#O-MO{~kIH1N$&#a{;6p+8JM2<3ds)0e3JMTD8Os
zy!g+wrIQzS2`+1yo3;@9qp#cxqBo}_9lWLeQu}cNoT}*TH>Kk6Y8Fk>YRUt8#cqPN
zdj&BrS%vG-F8%ij6(g}EP7!ld*6ZPOcfN=*upA&%`7;Ra>yPXzz9Y+t%d^D#gXwgC
z_AXCv8T@S!`E;Xe4nph}jVM+IuW^uH2R4vP;bWyT3L_=KV`YY&!3Ju3@qBy~M`7`q
zw!1;(u=D%P?9)tQ76?}t;Twvq1cgpQZrts=^lsb{zWF4n*;L1$X;2ugzZ2Z9>gtlI
z2!HIJa0TpjOHg+vnf!Aa6wz7POX<dEcQRe7k%{$TyCLXD3~>7n#V9Ho7*8M>UB(=@
zc~1CXOf*ODLXuias&s~(GnPKuMBGS`@IR)XNuYQ_qp@?lK1UlhzvK<`kMz+jGMpL0
z1}{l9$J86W1$!C55P7k3oxpQcsSVM8wdBAAY`XaQ^*=6bmFmBf&{%i4c%N$#?{EH4
zD^hFNy%qWBI)WS17*wg?9P;t6yfe#q@&zz~crW(ieT?#2iBV*9Al7C@oVZ}6IgY>N
zp<bf)9JYBM#EIWz?g;2{;uNg{UhEXigFOsJZZgV#<JDKBS{j|(Bue^Dc%w05AVf`_
zvG7+}+f2IM0b+bSd~|MtrOPxs67T)MZQ#X1z7<8Lax5npjLwIlqp&ccXWS1EbED<(
zY06S+JVPwyOKN7UP!fkFqEUKiljvZRrVL(<96LZq5hs!Hcn9$h4DS*@Qk~Vtz~jp0
zSVM(QF|4e(HNYFwi+3^`zQodQPKXBvp$ZJbcVw1}wuW=qn#zC7LVPd_Gyi)QDuG#u
z56!~o^wAR^?y=HZ-_%{ThH;Nnb+boLY`a_yI8@DDy4hBR^|cdmR*zq`Z~IRtI;RS>
zJ-s=CBsqi8lG?(5KGxh*%pw!LnFp3JvJqHN*JYLE!z3q(1Ry^mk~5K;dr6Hf2A8SL
z<HYh{qTw#n*I2+AnJlzTB$+?9Yd?eF;GOhM%zH9k)y^248+{%yC2@JUR2Xo%UA!57
z^ByM4n6s=c;m3jnJlXiNXbb7+T$pqIcix^%o496n^S?Y5uyiYY2j_P4tSB{#a~&|9
zUp{H%{j*@JcU6+poJ%tc6S6Hx#LS`9LPpo9kPqCrU2=h$<3$=d%pV(J(~CErM*@~Z
zla!VN>|hb>CG4IEQS^$EIwP_VN!>=hxkV24Lim~`mekx#{+(cma{)mr?K3ykO`_3#
z#Er%TemB+lCRKK@HMx-cWBr%lsRM1ds=vmjT#y?vj&;cz_lr%5`;qXL8^clU9ydod
zFPg~R(o|P9-N;U5Liqu751HxBtwnfyP^X3XoMncxRmw?+)shcmCR1!s?mXSmEMl)0
z=A7E`Hlg2Bw)9~Rl@s-F;l+9HHzL;%sUp`75I&d7A9g8gIbuFOlxI|}Dt}0DesP*|
zlc3?PNP%PIBu{6|ZMTb?y8J7Oiuebdnq6Q-CjC2O=;pJDUFMAU7h1vG`>F(*wH)_^
zz+i}8yoGrQZ7c_PvRy1y$=Tr)qFz}Te4k+zu&&`!%kLubq%z+{${pGUtEH`^sZsc2
zH3sb*oaGPg@I;Z;rI%@wS)d6qP5YK9Sz?bnw^d9OE$56s-mc(GtOOHtUcLYqGhe=t
z<wGApPKER37Yju&GP;xwuy*r%)X-3X;bGPk5k>Q1#Ng2`aP~>CFZxIYc9SToza*VT
z@B&on0jgo<#xmFzeK??T7GZLwur3Xy1uFvim3^8t8TCUwve=c7{8*3B`1n$9|HJ0`
zgkXH^HC`>au`FZn0}I^yfQ|Zu1BCJGQ2LC7&o6ziB{U7!G5sz2COy1KWV`;sV(LR&
zc1gYPnvoU;ej^9>+JE4!0Ns0VRUW)x4oT&j>vbz5KVR)Fb>bPjltEzoaH`ZPM`%={
z#$rnO#1hPmg?7-53DnqlvBgvv8+Gqs5%hXFq9+|ZD=!<%D+|sK_TBXW*Tbb`Lg+Kz
z0HX=*=I*}W<scVK&7TY463MnJi6+hUbg^6<_sT4$NZT$){IUn_yDzY>N<i<Az5)|r
zC<R*pH5?r|;PKWs#j~JZ+ylLE2zcpR;1$W#1s)2#O0EX(1RJG~kL_rsj5GebHD=Jx
zLNwuf&6`^$nX>}+s2$i`xx|k^ALWA%qiACBQ7D_}J|soqXNr@1txGH~@UbHJIxu@`
zA^~Fvw>f(IkN}i^-Mz4x)&{mH&9N)lF4OGx2Z-8Y#?*nXvUq+iOu$$9#!;|AD#COu
zs50?%0y%Ex!7Vgtv$6?yP!xD4@F5ODTliDW{~f58re!dTWdAEtTZ=X1XQSJc%ry~(
z*JDtqB;Tk&m_J%X5kNK5U=TO@%0o3qSPrqs3H%Wiao?@*$T>2+ST0R(!$zwc9LOL!
z^rMgXbQu+?;NNlHZzekW+Eg1!eTLP3Pkf{C>cu?EcVvNYg<!uqz#jeXF1K9EUEl!C
z{|RWA@MT=bt%_X;ssGs|hlfNYX|6PLXcA&YZUgq>sY&AMfW7;b@QHbKe?amj5@z&e
zxVhcMtJvjxL{cAl9AGc-10tc#>5-`|TuO=?tO!kE@2`Yd|H2u{ClSe|03+VggC#gD
z_69{`?H!Ja{ZT&{)UDOZL45E!z86W_*Oy-09U$H4&18t}u#{QUe0djqegOiKRD>>v
z98oCBzBQ+0hzl>r1i$bGm!ym$ES^kA2OAp)eSQN@H`kj+t2dvzpJDBy5f#|Av7a(R
z85hgnlVP=kn9i_kOyl|Z3$gd=@suLb%a7o3pRjXaSIB7tCi>m+0-4Oo*_SR5?VuD6
z5Xx^jfr0Fhl}lFwuQ&)II4GRS{n3A8egF$Db6H*7{{vb>PCT)f@)wpbU<>VCAilwI
zoRrHJ8sGGiW9U|+G|>U#V^sJD%e;F%yzFug&O{F_k(-BV5J1C{4hiTQ*KQI?S%!?x
zL9_>KMX>VG41zCDnabL(1iFcfB5^Ar`5AR`ZF2Tc`$4TnSMT?ca&PwJSd}d4HNO#R
z0#|-{0nqqjR<@@J#G9Be1E?~;R0ih{BoPvY;S5j;QZr{HXGc)(hG5$uoDq7g78U(N
zSO6dwQe`kE@2*-11%<mCL^=MC@%qvrcFs`Scjo^mz$V9Yu_6+cb(vnh1pzc?Bd?Pt
z+<|_PV+3ZFLfjcc?h2vAs1KzfR^tfaV%2W`^&1Q0Sw7$$eD4F0&SIEk5kOPQi6P>{
zLz8U$$bpT%w2e+E1Wzg(U^bYOqs0(T{Co`vXe@Op7>Q7+e~kq5S2#2h2Km5|5$I61
z%k2#;4p=$X+d44j92#J-LBIif(HpoOE}C}t!jo)XbA%@0v0NE^IvwmPKGWo*uzWtG
z^_<+I9N>E`Vt{_*mqx*_Vtwgi>N(nr=v~#<hHf>nn@>?z!1S{QZvnLR?L}_(fF0p1
zlgL$_uVsl;#aV#L#6_t))cIJ+S)EDWE|w)=X-o~k%&+^gEJXVY&k_D;SIIoky~UuF
z7&*pH&3k~5x&#68SgYmsDwV~Tr0QW;dw?M2NTI=%b~gcgJ>^;Qrnd<)wet7Nwc}T<
zhYQJEhYJBTzvz4`a!tO9we2mP_09S2w{$1>u+ohTCw9Yy)kyExH!<B{QtD^sab2dK
z4o8c)SiSH(zlG6@kK*i%ahXWw0xahOA83}Z8tPzqBQ&ZB4F(9LrZ13*xXd|53ZO>k
zLfNP{4FtjJ#y@x$hax=)ibc`PXS0fWeRu}^4wup@C~KpeFCZD61ItR)!5N3R8n$*0
z+k<f_oKvGPKz^QFV}{7jcx#Rz{x~*AFl5^RUlfh=<@C$TU}W?qOTm&*lP?8BfjEIX
zqK8IdsVo!t=xzcZPfT~{_8}RHj|0hg6-$|<NTDUQcy}?VI@blbARq)da>TY58Q;XK
z4#K!aVt?#4hm}j>$RmYq4wwBV>>qySUXhH>2_i}o*pvf3U86@T`O*Wmg^j>q{D6%K
zqAj9>vkF^ybwX^_afZ++YYez_D+&y0^<qrbac^6nhJww8lqLqw>e4;#n-TeLYoO*$
z4JF&}#mBcj*d^L~`X6pcxvtvUn1i=+%!mqDs#e#yuO{P~&Li*;Tws}c)H&|EG~Cy1
z0<P@^Up3NuQ+136t6bv(XaKfT7djx@6xAN=-`J;X&aw!WRE@#i?dG|oLZA&I_Dk^w
z3mdW;&Tp}E#0U9!+`;_P1qq<f^`FZXL5Bcvt5aG7^9+!cJj)<ad0RLtFb?;r^4~z8
zN&|c<)DZZbH+!`B)PP+8-K@-Q<G$!_rP<uePy-7*Rj(B^IJ3C^0X?N2197@-kF?8v
z0E)03)~o((VFTgDEtia{^DQ7uooy@%AWY5XpURbA<d681o#k@zcTM&Zi~M0XAF2iI
zG4<i@A{uhZI)9c6f8hF`%zqHP0u$W5ys%U-_6Cpw{tTX#&U!i!E0)wg{97dPZAAI@
zu^!v}YQ`Ji0!P-zr$XibB168n${{ioX8!PBWJqsK2uFtA8a%|2q1gM->*a`&T`L?J
z(t7~T|C<asmH}qA_o@>jL)WOllX3nqyxh8G1h{2q5_l+xl;2N-Gu*+#Qstd%Mtux;
zZx5~i76BjvRA%K;rW&;&c0mJNk<j!|;@>9T#(<~29sgA+(@kJ?Yy&YO8BTPf!$ZKT
zsH1b<`~;MU#dW`EDTerQSTCMc39y)FnL!3KPy!}a6hj2i?vJq#$lCrNz}z;x+8zNU
zj<*bk*eFU85by~D-QkO#`Y243K&E15VhBm@cCobnQ687p`^r47M{6o-fK!n@$NDQl
z0k{eK`U)Vpg}IpoUnF<|zr8dV51-bK?nQ#zT^Zd>TYePU{-F-&7XU&>X<|5t7?syS
z$d`go_%G!`wB<)`(hqh`^0%-X8GZTBY5HNxx9r<t{PKA-l(w$hjd=jhEz2*z#%Vul
z<Tr1TC{jF9PM0O%f6ND1+3;pS{gay|mO;`P@mFLn*t#AytpYrE61vX_d)f>16JgY4
z3mv@ug>=h^lnbM|dR&WgA_39FH{rpg=3kAtkaFaBm#bwYI?A@bEB*nlq|GD{MitBe
zS`_G%><I$!WWd(4cUVkYG;`C`Oy58L*v6U^G2gf-Mh#*<xcLHu4y?SH<>1xj2XWWr
zFbdVs{`Ig7Ai=EggSdNytL|X;g7~r6KBuUc7^Zv#(COvfmw+7&(}LM)fTji{E0ANF
zUKwC}NU@8@W7q-0AqY!99cb~5FoTS@briM<tud)uIR~s`q$sr1d1V0-eaI&vfzbj9
zfY%0SN+NkAee*jwZpBSwh|azz)ep8WhQ1t^=_#BaNVrquykjj3R0WWDIk^OHnWBvm
z%Fc>$YqDBq#6S!BzoUBZl5p$@z$0l@i+{!8y-VL>wMaT`P@ld3I18Ak8%Cti&L0^9
zNa}#KQQw>?iebqz&Q)<{>2y<nF5MV}2Rc%IZ;~ri=wOSg1SmqZ(_mEV+gC54kYnE*
zsFJwEOBvrH%%Vs@$VUSg^$H|4FTX=n|CKt43wZNOwTl&SMm^DuM!wSkq>%MZiwBWp
zxImhApb?7xx|s5_u?9ip{{yneJbUqfjO@%n4N&G;QH=LD8HHWh1>qjTK_Za8cM1F+
z8?1dgn7|6r*pLs+P9Y5_7>_KnRQZ2_?O26eL=M=l*uTbh2l(G>X763X&-_0{cV!#K
zIt-2VrlmDd?9O?=0wcBABe&e0cX{RjY7^B<7?(fH;UB32ARi%yZ?8wXMQ;JZ@8Z`6
z<wCs%<zP%0gzTXR-JRQBFTN>W++J@}1AYcG+bl3BUs+f(`N~q28Hoq}8zC!6D9jB1
z!yB$^-;Se@AG3TeT11Wy)**#X;Pp?mZv((Ls_Tro5=br&sUsjLs&CZ1joDaO$_i9V
zTcQr&h52Z!05JM~Az1>1RCi$q!XW&?NmK`NML&4;<JFtmd2`A91{J}Q!L72mk00Ca
zX*fGHnBpSxx3DK%Odx{vCmV&u)Kgu;$0q0Oz6iv{AIvWYWB5~t6UfnwC&am6jKtt)
zSOq?BYlN$_D=iGkusOqu<ZEBNy_ABT6V`<LG|D*xF@qAtH;7XTDdl#FwNOUaQ)L-X
zG(P}+BpC>x**h)$$W)e7=V^`^=@#r1jW7p;FUdKQjRxiyps53EP_jj*&Cr~=NXV(k
zOP(WtQ*Md}XA!hE7=HDSY&U1?1E+9-d8?(tSATUr%syx0X1XjZ!5q!?8R=jU3rU*r
zoGf~V9KTmS=-J}m7f;wsA1KN`3<qzi9|Q4suII{(G{*WSr7C#rZzwFCy9`=#jW9bx
zvA|RmajT%>SAPmg8K(WYn15_P;(~0WmX3O;{jpPX7jK}|SwTm12*qkP)43q|4edw+
z8o=70k~(t%V0?I4@u26fv*K(T0<<q(fKRMx9Q0%gvbdfuYPNvVpk6%JPzk6bo6Fas
zk2C^!f}#6Y|85D)M1eZEx0}HA4{}DD2+0>Qr2{oM<GaS)0S#?Wd-p&$<!oJo=rdFb
zz^53z1)@vG*Ssf-niv_^TNxhvnRgfOY|^rk1MSu?m?kw_*w3B{fJwt*aJN?c)Qs%n
zrKL$<K)!l^z;2*DW+4{lSpF}Jcl|{MqVzr|EDw(1m@F#)!=8+kLm~FGA4UVQr$1!i
zG+<92zThp6JxzR9!m+2ns=xomo+cbY3t&&`A$<R^r-9Tj|F9<u&;YTgSoQy4Pq|Oz
zF7yVBf14mmXySW3?n`xU+{~WARSX_0*>-F7c?~6i`R-`m4Ji)fyRCcapdI2))}Z0P
zxf2F(Cs@)Kzz7c>T!IJ~<XA<M-y5=V-Z<Wr`alKZO<?Jz3PDG}t)$nV?!$pw>p<uJ
zaVr7?jN{y@PKa}Z%$!_FrBgx~oaKvK8VC&M7tKoH@jo<t%n|(GdThbp_8v2H#HFPT
zBWnkkttFN|!v%yT@lEQS=0M&BO48Chm?&BdGgXIK6du$<8*MG6yadzLBdHHbx+Q7h
z8kyzOz>qpHmj;FIHoQPd;h)0DFKQD0I+R<)S+KRCfd*#em=>r#A=u$r=#}`94}oNv
zlRPUy#n;r)$}B)dkM+Q+VT`wh8>*_U8Re7tH9~~P35e7c5^qK;!FHPZ%@U$Gy+>vb
zZ~{`vC=bMy;6-`3*MD)Q$3XtMG0FY6NQbcD&R3v8i2;Z-q)=Kjnjge*rV6|mpc+B0
zETRIJfob104f>=Q8MskTCrOhD8Bn7KEjUj;bPL6irW8Z>$J;^%acUcl4%dmK8bYBs
z9d^4R122hnq0xp60{v?D(NAqNlsCi44bj7A#X#SkR!R5>ly<OOKmNs9$9cF2KZ`(-
zCaF6C-c-fQ4O5+vCAeAlr}0L72zdro75@4d3<Rvi@EaWVVuqqH>&@@_KVi6t&p>?m
z<A3Yy*4{yR@1gr^!ShqBR}=>`c;zTmYk-Cbi&M0PM>vKlgF$GxbU77q{vPD%`W|a9
zr;Nw37_mF!k`=v!*uvCjuUY!7%v87Qss7<i1~tc%3=|7)ogmpKWDQxV@MgVjy~%~A
zM~EIafX@PGTO9(#>o%*AhsVFOX8lOSYZqYWoyWg{-j%+Ir=`hw5kNGQ`4O)6UzLo#
z63A}5i6Z=GKlX5~i_DD6R_i4Ef7aYx$-jtWK)Al}3}ipLjcg-T8GTgr0QLjT8(Vmr
z@$G-B?xmTLa#eDGYejT5<AEdyp;n`f4kW)j_x-|+$}u26!^>1ZLF6d*#yYMUT4PqJ
z95HH>U44Z*-g1o!8LClK^JWyH==YK2P!tbkm7pk|<-ekMTUQq&Xjm>}e1!kQo~kEq
zFmvp4x9Gjjap8>Rnx18jl~p7hZGq7>)`uyYQRNJX_9M&mTR`T(SGpIEY2xN<kcINI
z!hoSt1F}-K(GmnL+1IYORiRzGc#wR`i+;u+KvGtV<)SjFQF0FH|H<*1g=j})j_hBd
zyz>4`9Y)A{6QS^5VBcN){{`%Gb~Fsjv&j51WC7au0H7?qt!?-pXpe;X#Ds$7UK0W2
zmD+Paz1lI<3$1GZ8O$$NBrdeZH$~;y0s;myy{)Le97tMWV0GA5jUpC?PsbU02MG(9
zj?TS7k&@vx=7xe0sf9Nfh%j~{qE%UQN_T<$zPUOJR>~Cq0fNcPC|WJ;S*&Hv(nmOO
zX)iu{xJ~pYOnH_a<T&05;uFH7-Hz075j~VR0TI128@m+{SEOb5fb6A)mClIKC;s`r
zg!KJLiRJ$Q{C)$;<rK|*C^IWX)yE~*Cj9~gDVqrYe&nVT=LtN*=f2`1APm7#fgzOc
zh>gQO>||2df%$$k>`88P7Ty;RMTGaKE58FLWL?Kg7XwXB;2Rk23Ya9-#jU1dp@Zp{
zl<v5A&)b>rcVJyycEcOmttNF=g5CFO--yHky>E~K4I7uw996#&)nE>jZ&I_bO`wi2
zSW5T4_W<4~W958G3&^C5XMu4@15Y~$IWYe-+-EumUUM<{yDt@hC4l$-dz`-mQ&FT#
zx?!NND@z%xxbCFh1Kq(=;ku*6<Wm~%UpzzE8;1v;18^zgIi)){23=ptB{dqu(*K2W
z4cvPqgPtq|4a*mZOxV{3Z{1C%t$-mtBM;2=8GRnX*w=nTJK0>&o!^1JY1tC^7@txg
zC~xmEZ#6}NY?1GV0%$<uZKpvEb9a`GKtE|JkasUc%l?GA2I=t|THpcsT$h~%26oa1
zGF(^=iYB0#exER`TZ?IbDHJ5nkEi88^8D033nb4^7loHtZ0}*|ITj;<EI<AJ)|m@P
zn4G|!Uxz?J{TT(<nG1-VPN)?iBGLbas43)}h4N{-KQ-WI^JgcVXD-!lyO%GXBeT@t
zhS-!g`IE&wGRL)9X$;P<<K>_MP=d)paODy;{7^a^P#%opEL#U?0G1`fzy+EZA`=|1
zOCAEe&P(?j;C0hPTmR-!8yVnQz@xyAcH(%HTIfGK>P!VPdI9dm2^#**qwpWA145U=
z^&8?*IjEDx5NK$k5uqXHDZGDA7CC@V$Emi`15;>YVcV~vqdDT%ox9)sDPm_cR;7d4
zlzJENZ=kzjPzRyN-sEu<FdrGTbUATyBKQ@^sB0JlK$kyMb98{^Z2cMqx%92$0g%nz
zuIU?v5dy^q=)o0N&=-2Y8$+?nJ-ZDlD=46G9ioHe;VE5t9Nv2<^?^X2J7Ve#jL5e?
zU_>s?d)(?ns^RiiClyeYgK|N|xG%`40KFzc(v?CdfJ!oKpazf<;sc~8$Okv%(*yO1
z6+};d!x2Eu^Jw7oU`HX3Qgd4;i26jqm<X`RZ%E~lZB)Fhq5KWSq)S&d<{<6KfvtIh
z2uxF(NiIk%>Oc6o1MUISG`AHrKr#u$ckce;2K4DZD<nM~^dkX%f}*hAnW@%^V3ue9
z(5LDM9DP!PrMf{!fIfj#@qf`LZrfzwPa-5Rit{I{$}5QB1ua>;o2=LU%Ih&*J|Co3
z%rE^SSglytFO~wpir-)IWMeJAV)Ep1pgdi*h?7kQ$`k8{uTbc6lHyws1S!XnS1bVP
z@46_A!hm4Xi{x;?Li|Z%1INI|a15-^93h-<1LT(JYXl&%qE+4Z4LlS?_^{N^jfB{%
zR9To0-Z=79a<`XD?(ho2RUL;=X1N(4omnr5;>*SHFMVOC>|tB9wO$wGPYAK@9(|DB
zvinZY2_jCLbhJRln>c$grR=*-6!fz)OAx;TK?S`+dQqO`%g=4ScoqC?KP$BvPu?;D
zMg|Rr?TqM0r-TQ<1d^c_6c>Y6K5j&XIwK@B;aB{=^xuLL+2c5%-%$&E1k4U$4YO6c
z0v(a`$y!e!w>fTN+|6$b6V<cP0wCdrJ8TouH9mYw;q!v)CPpvBSjm0yUur#iI90L>
zM37F;?Et0#Ivwp+q5&zyZ6ljjabiGaUi1gm8TdCXsjp%UA}D+m=zwaUWi$DY#$qOO
zp|#qAKRHF{Ah8INh#{_E6!NjH2FYud5KkVxObxFTTc(zwX(Z%>VtT-z;@w|ZBj$Q>
zRSsKZhG?!o=^uec>~y08q_Civyj3>O0SgFJ%`J}9IKUa%r$DiXP@@!K9_+6!_tA6y
ziP89Or8?BZ1H{^K0DW((N<M@r_URC!020gOC6v&;JeO3_x8W=hT`{`>3KcdYt5f0>
zlDB~FqExdjfRtPI%U6jEeCE;=AkFdUeDYZk|DnzvTU&gSV7@mi?MX7Ig5YW>Fl`xZ
z>w6syqIH&o?I&Gt6S7?O(Epok-%V@V)sD95A_WnAYoP%U@;7?zy26AGGVzNHnsB+O
zjh}X1ACj{@NL=P#qDbdpim0C%$dCP=l|Zu(*=Gkj{qk<${|@#)N^oGW`ERhVrYrnk
zz&_d)Pgc|mH#^fcwJ}+)%HVDBe`xnSPPHFF8ijo?8WcY8HUh_5mN;xfj%B>U%yAis
zrv|f{h)g8U5I(zQj^I9$F{gqVh~-%$-kGw3Or+x_V02F8K19OoM<Ei1V+{0(<h^aT
zmZ!sA)q#lr7QeN+<3=iNbPC~BbpI7b2>Ge9a>XEejD5Ew+>Z=kyZjDrWy16tC@yh8
zd72|KK;_0|OaB%juAP0<XtpCXE$HtV@4zv+wJY409ONM($&G>UPyEsP80Xnij@=MX
z1JO!lliUSG6!SOnw5x&4Mzyn7pwtq_@(I-+Pj}~6lTNj>N~{FqDi__jx$#a6gtXu%
zzCwtN#qZfxLtBwHm67#jZQ^NJK;%XDEAdWl=`fB9&#`L!!y9^~Hla%dLV=Me;-!g$
zM)5QdHl3XQ1qUD!6D%W|+<FJ%razux(v4=uv^UUEai9qp37Q8+dO+0UjTQ$+z&83i
z$wMNB1zvZL)w6{b2qz5;;2^G^s<)#jz(WGS{e*`^zU6aKK3ljq8FDi+w|%;$!?Wcc
zt~i8kjl6ai*`uXlOgOnE*G;f%XFbiTF_$7NbmRrBQTK{R10yjY{SHjU;J|Iy=K;6`
z<WCW34F+%*sN`$C>(xW_EU^3OblQDp5H<?)H9>C!NT(+aaqpeL`+)(Fga2YS0uh4L
zp=RiUOfr}x*URpJt2CU!7bO8J(P}*X0p@^k;8&uWl_gnxsZq&qQ&psZoAGyj+-#aG
z%4QF5B&vZ52{MAy@T5j{3DZB!{EI}fae)f~iE6p!Q#!zf=e@%>9heFf{DwN(O&mQ7
zQ*Q+H%(Zu~N#G2&pLop>%a%xj<3GxwfYRZ-dcr*&SjUHS|AEiG24efkOW*<w5KWwX
z@qtwaJnv6jz-s*yU__mj7UDm>vh;BFwX1>b`+)4>>UAVPQ-Qt)rBS-i8Ka9^5P*fb
z$k3oE8lWQ}#B5SPHd$PKK{rXpQZfiy_uQbKy^4o(R1N@RgE3T+3llr(aUi%qxN-D~
zJcGZ8dRm&#5`iYzO#aIVaKsSP=&!y3;zy!`)D!4})YR|(>OUZ0slt8-z(SMN!t9OX
zI8%qYk#djZ5$8z8R0Lh)DV`M=qKcJ(^OSMikPrpD0uqe+(Qj$KY!|@d1o!~5$jj`O
zdiMa$iF3N1WQjVfV*B|y%gQL4;K?}w5aEitr=`J~I_4sSG%O&Ww_Y)CglLnB1ZaSw
zQgq}h9*#EI3~b^!j<j(j;5f2O`vdeq!ZIUnoizjvzB3T%LGj{1TD=wAqC+U{7je#l
zQwQewuYr#poOaFvJn3i&&Vqah4Z7P9Is%?FBY~(Gq+s;|Px1mX`YRR-`Uy==0a<Vn
z$X7<7_cvrAnCIi51rbTb!<LC)0N-?t8cMvNK<JT4R1IZ5RJt|2cm=v(&L*sek^}$@
z%+;4YndO6G2;05o3BVZe{7mCeG&yt%8&C-ZU_9?{TOa^KiXQh>l!Q|9(x-1q;TL&I
z?t=y(w7@e~zk-yCyv#??0|ap7qVh6uBu)DS5#-3t@?6M~iO&0%_aM&er!11`axsU&
z{>w2)r9gLwF0tfs;`)yO!Dp^HklE+%zN(>IvG+Loila)E^LYPFm1KWef!rgS9#WdV
z6bNldg+Snzp`sp8LwRKvNK}h>sBQKy$5=o;;V4_^XpVSk&1ZT80jTG*{Q$PMUQtFv
zY-9=%kiuFN8DXdmR-lftl03o(3Qi0tpfjrr1Co8t@r^Yj`Gr;>u)eq^1T^7|#!@G`
zXOL>|bK^SHa8h69ME8d49w9c;R0#9(Ds@&%1YkK}eUFR*b)-51)Fyr74JMsleG0S{
zt?DxHe(fiyV?1a#0W}lxwS(Y(P#b|jW9mxVGL?g@Kt&WCaH0Kkpx{=Ed;>W?hYY-f
zYA8Mw3m>fv!wrC$#VbIA`!8sN?g8ct>+Pcf14RQ|=<yFnQUx5zYu9q>YL*MQ`zoLB
z-T2zGGN+*eOQ1uH$mVltc`t|g7KA`M(_`ywmyGrxVLXto1GDd!NC0?Ei6-miRWip(
zRrMI?^=a=!Wfn%LLhU7e#`^O{&@ojob~#7T5%)Xlr9uO8kV9vYTkn4aB>8Hm_afJF
z`Uum28lROaQDksM*`vSWp>+l*k@H6nrlo^+9_d%Oz`aZj3fw{2&vmm1C~(g!lIx0L
zUg3}C;#hW1S!h+xURUUfq092cfWJA+u5Y=-+?+raBYKb`XQ4GLc0>{4S!T@xAQKOY
z&!XoIv>~1~wjHmQwhW|tyjaJrHr}4WvH()5PtsSDCGKHlV$o3W&SA%=3s%BzRx%`W
zjSv7qr6Ck#)x7uAM7=hYBG)_G2^AG1(X?;_Hc@K?e}w2$YY5I8*Y%E`M_&h(ztfZ<
zWBUo@(O~_Nb95x&w0iJK6iNo#2$$xPT)nhWe4LATYIs(Mh=lG?MRf!n$MX9eS9LjO
z;x;^1P}Ym?PgPw`0cE@Wu#YTJ%8QK+N>C3}Gys-cM)&t6ds95Gh?*R{7yq|!?|oXo
z;;m&h$rp;U;II6n5)-Et#--YPB7bzWz(BHhSpTVZT|S1<r`gnLrAe9q6o_wRtf<vF
zMry}wH@pS4TBO)K6aPaqU8a03@Hb>P|D@!qG9O~jU&Lc|iB$hgAIjIF`ybywG|M6L
zt3{vb<I*2WVh`v2KP6}DRBf#AddqPTp$C`oCx+ApZ^;r6_a9dmBm)n4qsyM@Gn`>F
zsAW9d=y)_fRTro*o?HsrSow-0YqdXDiXDLl8X<-np^jk*V1X%#1-li?J`%pOY>WY-
z$Xl3VwMn51E0CH1OYHT0@4^}b%HH5g`tfeXh%>0rb>2;X5rsl3u#(1*Q$@Q3u)vk}
zQ>+pA+<dYGA*j2Y%p9PsY{{U#7}Rm02MuU{!T?R;M#Ku}KqNR4sAY%5iR4t?Q`r%x
zwX;7#?EVfGf*ScMo<@!$x{8ke4wwnh?k_vbjz4QD2dQ{aiHCi6Jc+xOAHlK0P5#GI
z6l_@dfV6010yuuX0JH;Ci%?k&1I`iLT0g_85!J86I=Qtr+(!<$*F`9S9qX98gJUGV
z5F-I4CmhR!;(%eTF1@Gw<Jr%JhlFLRU)YM|bL-TEKVQ3vqZ@JoIEqI6an!SAAbSq^
z)<Z&<P&yG%wck+5hrwiyS4%k`^5HqSFP1AB!LiIM{m1h|$;5rYz(9ccr)LBZFS9#K
z^?<|ugW=`tBH#+L4I!+D#CK>=cB#C(#M9I!p0&_|$msS2IL)~j#EoTZDgK8TJsW|V
zkPt`f{|CjF-YkKh<b&com}H;v4(KyC_XPm*HV!Z%?<`sX+9;cslz(55!|2darVjvg
z*z|2Sr9FeHh`d#4!1?V4EOZ{t2=h*(W`;q~1KT(>&yB?MrlCq50I3vCH02gRiL66H
zsnOeDIJAHuBnkE@sF)z_oh%3prTv0|4WQVgqiUfVc)JgQl<wYgXu+r<NLo_NUZw&X
zO*VFN`vW9ev4ReUgKMq9*?s=HkFO}*6Plt(egjSgvU6Cr1aQ@497}T4Odu<ugDMyi
zoTg;E4MZ^DN+%-7DhM%sOYgPdKaR^FSZYgUg6L4qCLn5%APkdyaBntHas=?Z)+%h;
z{d`DJ0^*1F=(`_8;;~#AmCAOIv_w%2#3?{AAseAlzy|nA0wFeSBoFE)1VLvmG@@0A
zD*YA!5Z%6jt$?tcAiCKZfcUZl|Cx(+57@fUk&U2;D5sKiGd&KJbRp2ipUT=Y!-AIX
zZU7ds!q4H+4MppKw9m-_9(DQc`7AJm;O8a<Bm%%Xl)~ch0JsfQ&t~WuAav&-svCt(
zuzcDD)B<?Y)Uprx2Jp&6zJF42!E~u{;OiL0=P5zm0iQ#G{8N7s$}o-!fF47lJrZAj
zaV~9c^ynV}$t%AQQhlNo!mmKO5&qnM*^mDsLJw_|$!?%GXzU>&>uj6=B;p#tmqc3O
z77^zuI{P>Q$vH}Y7RthLBj)0Z!L6X~R@y}bVnmRRwX28=_2<%xz$qN39I*u_|ILJ~
zBG=HxPeG8bEEW6ZnCcA|@L@S4gcs}}t@LdYfCUWs)FQHwzmE0mL~tu8=7P=9zd#D<
zgo2$N<fA}hGYpYj3lu4CEQ`50sDD^}I_?XVHY(u($%Z<a1xIP=@PY9AWJC*h3VeVn
zJg5-j27xo+8PvTI{tj5xsAz?b7myZ%S1lVw9u0iBF%cX>4#Q1>6(k4)pD^e{_XC!@
zBm@e!0rV8<L&^?yFS(_wx{R}8_IEWX)rMDH9^S14(!`($Yb7c3u}}rf#0|<+`TGIc
zi5mKrh(J`93ktb%WZ5-A1mIkZ$sS@v?_%!Vgpzl7)m-;o{!22l!VQA{DiF>zTjUsF
z)x&yF)e=oVF_2R5?DZ&IivmEkT$>G$QNTeSteU?OMCx-RuY3!7jX=H~O|%Jx<&)bM
zQ&&qt2|Ntd4@&BA9t3P55)Y*Ss@QQJq!0v2^^lEXK%gMugamqki<Pa*bNUR+S}4<A
z;|%D^O^_!+6Is3i8Ii$Hj36;a4No2Z#@hpeM%PbJH8g+5P86iUqd>Ngz`+^ErdtT1
z>e8ANaKAaAw;6$+qjOatw5bdRRY@3hZgVM|2Jmqc{0CEEAYAmi1}YrErnq4QzL)@M
zJxfV6P$a&XF<MEA8-i&{sP>)J<nE#z@L;Z1FOnLhf>U<OoWL(_E%^`uNC&GJh*|&4
zK-cSd&@$M%3Y929J|XW#5_rCpmij9lx4e~&v1#t$8=2i%=3rN?{DAkjpl`LKug}%B
zP=}8}f9NG&>Z@~5IITCk68FK+_)MZ~mindCnP*&B#XJA{S|-*`zgOqm41E$l_fu0S
z($zl94CP&Y!Bod$QujuRJzcusN1WaE0{GebIwp4A9*lnWv+nJwhJ%fcsneDpp4*4X
zJ5y#R#%7KZ<^Ez~>>gQ^^*-L4-_K85G#a$$KGV5c4rn+Hj)=YYarb_DN8&U!p^ey%
z=9;som)FrkTi;G!AEg;vgWjyet0>Ikz}9Y8ccSj}-0!0kPkX&}5ihR~bU8;`lTrH{
zYVVsL<!^U{Z#@-ZReMh~(04fZd&S%p$(U}M;^Z)SY@~EFc<p>{Aa?(r$UrjRp5Exx
z2k*NU*Eaqb$ub@PN$rc%UgOlrF|tVd+<u;LqC)!oGhg=dxp;N;=%zN^3OQ$O<mY>}
z21kN-Lc?V(e$oYUGv=L%GF(#GlDT5a|2^C*O#Ll;`~p$VVP^8fD%M{Q9>1zIbvP|G
z7;~Pz`)Mo7TQfqe;CSc<B|+U2VS%OKu9eJ$6!o$N$Md?fnWD2g1vA^tpFLj^^u*jJ
zOWrfjXWO5fMM$oTx>WLL3=!t$kUu{)>5Cs$&9&oI*=}zCyjD@k<K5@fY1&e!+47=#
z#O0`G#p#22fp?kiX{m0i;o@uh)1=R*nHd8H*`K?{oF+Z;Oi$Sg^rtlIcrxEQx_+<|
z`lK_`^~>&ApfLICZ_A&Z4qnq2Xn)_7l2<vI)UH)do~jf*A00cX|8(eY0B24g9oc@j
zdAP*Yv*lV&`-~56>=@zr>|&iSiZQ0gFS+NeC%GpsJ-R1wCDm?s&Zc8|{mXD=>u++i
z^R48a!+kT=q?w-wigp>NjUDOh;QP<Vdrt9`&s=mpcW)gx-`Sb+N~yZrTl{jdr8zs>
z?AwzodOjrl?EOoHqhI5$>e$Q<3~YV(5@Ah`;58cBJ$zzV$5fwh8u39pug^;Rxk8rk
zYSpE4o8|{g4IA!LRbxBjzXr~)NiRf<iB{>lT9zM47xvju3l7VllnVTI)s?$4`TcM?
z+3Ss<>XlUXsE(fPN+!LYAA*?0q3=UmODm=tBb8ywv^o0CDt{b~2DJAEe@Y}=Kn#?L
z`%|8i&-)2%VoJ<{d&`D$*m(BSeUFCpi8Rg+e*J0iJUV@uD?s8SB!=vLU7lI%sO))N
z#5XuTZkvQftwmJHD9-(<fQO3^`&CDEp<3Cu?!8sWAyvzGtDFG#$p(GYT=0zRZr)pC
ziYNuju<6%PpM0pJt~C*r{Ad|i)b%m0-yg}I6+N4?N}OK%?WqIixsy7E=g%?apVNWh
zbH6eF?5*;?L&T~0DC7FZe0Edc)<#?N>Q39<{QUPJSJ#u?>OG4;HAzwpOyp6N0)hhS
zl~DqMPTkF}>l};zvBO0SXJZMuyBZ5EBaT@wS<dq#KcDSt=$@(wH@Y1<mV8*@+c78{
z_m4SmkKR4q*?Na!OpkPkSabHYPB-3rq4Z6UF-E<mCr!gFVr^=rJW5&swY|1o;q#s6
z_okia-a=YY72Sj{N{T(<B}<wVbHe62jl->U6XW_S=dp+$(bI(GmCEd$@U04O6f0d=
z!Ae`^`DEJC{@?QOaGM4%cTpCGrs_Mtf}>t<Eq~t|>dV`_t~~9*pl?(E-oy7GYpK=F
z+kzK;Wn$>(w{|D#`uYR=GUk^G=j(ejk8au5U*%f&VstTc)PGttSzIr1MMUDNbxMrS
zvF@$wyhDfID^P42VdPIeP3mEmTXx2-0~L=8idLiriVLzW7z-(Ln}yuXQW%V^3?<Xm
z1=%HiJl_{q3KpkHUYB4`9A|T!s@X4T$&S<RSP2*OKmRU9<nMFzy=nja?*v1--`TEd
zD04b{s_E+wrz-g;hktkc-vmoAn#E_mdYOY<u$Yvkls+Cvm+|Chwm+Y&TMaF8^B&B`
zklx_cwQ&`%kxGp`^uASU8o|f)L(Jh*+(3Bd@a~VrriX+^;uBSO#ZZ$MeV<CE%1_;{
z;ZMrhH(|Ua&|N`)|I%)t@_i$3!MRHLW}N|*axw+|#pVG_RI$W5t=aWY7~khVGI>7q
zU<6g1^jX#1o~+$vW>*vRE!%teGH3G_)%Bg51Nl0+_Ze~st&LTFT~04FX}Hcc*1{g;
zWYBPOFv}oTVzzPrExTTcncvZR<<#kn2J`T=Va@n<F|FQ}U+fp%CqB$G8C;qAol_*l
zfUpqqb9Z#!x%+TZ>ETB7#;k^)+k+tr=3%R$#pdPBf|=y6?*2tzQZ-6mj@DoMcHPPC
zjm|*eKwq0{Gv(yy!f<42>HW)QU7T;C)R>hUbk|20(vj2kw9B^Iq!JzFD)ughf_+&_
zu`{s;i8J3ZiRzyU+IGTM2WD5E=tP|Dc%L|X+WmOKxbQMz`C!03%*d<UB(d+YtyN-M
za)%b94ShTNUv|l(TGjL(KAxe<<}F=azn@dx!8dEGnd-e{JoCH#7!f_Z%>KvBcjuc6
zS2z*t8gVCG?Ede5SRL<eZnuq?R*2dU=Tw_d#vUtd=F(n!5xAo2tAt>C9sfC>?(Nv;
zFqy<L+qe(sV-Hh#1lq<UeAIr6zy5LmgISxV#PFk}6LmZ89@IpIU76rj@MC@yQ#x^4
zx0*WZukLg&GPKYtMYK#68rqiX1&t~E+?LvP{_J)wbf|ScJBOqQUCbhotKTvk7k4JQ
zRtOt=u%_1j6MbrHs`w{JI_g~ebcb&UW24fPDiR(yGc#vp`S%K)?qBms_-vP&5#A+9
zer4<TDTVH{JF9)O&s<n7Q<k1rJ!jL9s;jN7_MDinAF{3|SWWgimTA|Dd9~6O_aG^P
zZ$4Yx+mtoxeLcH9eY%>Ong00$k{@X+F@CN>8#jpb)AGELS|1(XtW_@FK}k#L-fDj$
zUi~&P5taIC;aRmu?|W8V?M>ZVMU;^*<NB;}P!R<~`Qr7aIb^d;l7?SrH%`Ai<D(fG
zlP*uReShb%wVgn(b(mgf1ks+m%Y$qRYJFQx=4WBPzWrLIoUgT)`YAK?JDCXA#f0LP
z7=xXpP8Zk>%!rm(tE!9DOwJEx1vAoWvYg8uH#%Cy^B3e}4D#Q~`tVJxS)Eh0d)2&?
zNH+#G6@?{vQ(E`3mCw9pPmUaRz3oujKA!beWrbB@BP&ah?&$?96c(S(+p^mI80ZXd
zEcL2?E=aJEZhB5(k|{5KEM6qa=oNRE%sgr*two)0{@dtrQ1R&0-Hux6HWYi^*ny(Z
zr~D|Fp%Lauby9y5nF9~HN<F9UmpNWKe$ufAelNeh;jcOCI?sDPF3Ojm+9q+hZ5}U8
zd^Tm!bH+e@yh(X|cP6m)JUjG6!*CaMNveh^f|uy?@{^3iK$^#SX+KKU{HA{&w!Ixv
z^1vGrcyPV&^WSy*%GD8}pG9Jo^b2JL=c{z9j@<UQ$#bR)l<P}Ra^L(g6ez2*>>FX=
zULQ~RF0OO_GVOwn|ImZqV>#adIuEU6hpOG>>uHgXNxZVWXRA%VKHb?i^xZ)B*u&TT
zsmKR`5(Ug`=t6d}oAlYcmDO?cs~(9(-vz;cwKyT89qYz>mrm!T&ocF(PrCk({5fOu
zFZwXPqO2`hkJQZUl7nNr^WmYz4tD{GH>+izAM>SjCh=5z5m)E16#nU|V*i3NiQCc>
z8*vN}bz)QC6^r1Kf7j9Y#PgQ#q~pNnEMx8wM^7F3@9Ak{Ri%AiZax+@m41tsvrk-w
zYtvqhGNq{&=mWyAVs%IAF(bQK)Y^qI>J>X5VTn|Kk|9r_GF4N?0ri(@BMS~+>{>nx
z*}46CciYa*aPGZ@{i6|Ug36+VPI`sZ8ZvRdk=)!<wZx%j<uBI87P5{3S0~loejQ~{
zDx7B3HgD28nS~6FZlyUN(d8{4Nc|06Ig`>_+MqqPv|B$~>>b-<DmsnIPxa>&IUa~#
zGo-62`>9(b>Sm;qze;CU!gJ}yRFgS7D+7JYNHLnvR`a~um@84+Mb+`69_z{yb~Uei
z@g_r11VvosIoX1Rk#2gQ_T3Tz(;`7@{_o>T{ZD*9W`_^d#7xvOTofG>a_Aau4<b<s
z+xa2b87#6IH{$5n>GxB$rnK5mg_mfcLn@u-Nrz8;a(JqLc}FU$#F3py{BF^KPR;|i
z4H*&2G!@-|fz^T?_m2E0m(o6o)BM`kYdxqf<ejUFt>k-J^Hl;RUhOlRRkQF)X+Yd^
zySTqjVO3Ch|5w2wPuWJ|)`C}=(jsiDFmd53Z@hxWuIutenpHAE3}ztFvB554n_@=0
zsq)coRQy5DCj&Q&&r+5Z`|Ii9apa3*#Sw+5Da&Pg^?pIND8W5Xt<l(@;nwy~_YCD~
zI2V$?UlaecaQexSuSgr-<n(+rOxbF$=Cl;WPP^)?k?6kPQ1#_1BB*+HKYOQd=*c9>
zF*D6hrLoT&i3TYol67LaA_n6alv?!A-sS?e{&Z}$ca0!#F*_x%2it|CBma-TxvI+C
zAIsifml-8RJ>iOT67|?Aka$<Ia<cC9^qNCX(GjNMSfevcASa<Ud%m>l?<=Fxs=a8V
z9rfUGJyR!=#I@apW8U3*_GiCPp9B-zPc&uJD}Q)=arn6LS;=G0tp15Jm8hfC*BV!L
zOS<pvfhl(9Y>&pjJo4Rcvth_Jqck%%UN33?OF#BUz|_r_A#~t}_>EVS##tWL)jESh
zWd2*;uDXT#x1&maFpR{t_YB!a`%C#CKTu%4C)L$f-?^STTe<vWQOeznf=|kJUpZ0C
zQKHzz<&z^<eN|4Tsquv`_Yzg)#B7Ef#%(30Qsy6j6SI*VU3!D+SR-P#$25rZEF3pf
zQHb^Kg`dyXos;hScV?@E)Br%QIz7|4IaPUK-(Okt;K}*oi066k;UC{~o8`!!#|+-e
zCnO^;q>h_Mrr3N=cc-L{z9{T=u~XEaEF5o(Odjj+OZFD8vR0TaH&wjNY~ktiDx29b
zNH{Zp9aB==vv@&_VbO1`NM|#Qn*4?)-AZ$r(Z_5XdM{Ij&#$uw#Pk|nIvmSxp4KS3
zJk!@bo_%ea(Xy(&EV|y*F|%yh$+LEo(L$1!^<4YLSV?=StE9xl+Yz4)xwftv+1o|F
zWu0W`oJpxVPm$`Y12Y;@`TS@`Oz!QoV`fQ=stbM3;&o-0i3s^m4QAvOv`*p1=c)#0
z`{%{@>HB;Wn4Vz${kpjB$0XBnD?bAqFK7f~q#f83{Y6D9quP54CQo*4UESuHe@z+f
z+b!12&m2#DR@NuI$34nLN#VCYuW!b}Qi%FkdpE44{^b3;&C)J^1NASS&ksvOdVcss
z6j`27MEyEl`|PERj4akui&|J``23;nr~l659At3g+U2G94L$5KPI$L8m?!?kpYBx*
z`3ho*4tP?s3l6QO-QO@fy;52J@l+I%bYZHOBI`P75N6%!{<3lnokgBh@o>(Y+w1x>
z1Vg@k`5WP-!UJz|bK3>!3r-%$C;ZV{XJ8go{$%zojXB<Hh|($VC|q2}H(<*|+Bc4K
z2T@wBSzc1=^Mz+<W_F%2U0AIELvLI<(UyQ_Op$8cCptW1oIh?#ojY#o;hwS<-ML#Y
z-MZhyKehjddurdG_fU$h;e=y~eeT#M{!g$+?4Mxi*Jo@Rkzac_W;|Yx`B`@H`D(13
z)=v+!R;{b7KgK(x{k9sJ^UNaWS=mYf%Gj*_#jfd>H|EoprKzd{JZp+AFAEyfx_H@3
ze&QQ*aofvrc54Jq@Kc)v5T_8>SL&v0?*4wWtSNf`K&zhDL#6T0_f%HC>oc>neF@bc
zPPgXbhM)c_92m>XNi+JXZSO6Ue(-~%C(J6+cZ8c4BktpTTRQ($>7W~GPWQuhOG*7l
z*SWTZJ(Izfwd?(3y3uLx^p2U+a#`E1?o}tNdaPC@e)T!K@sLg;Im)T~eM@tQsM7(x
zUP)FSwdj=A^e>T8H|D3i@14G$t{zwoZ6`VrNS<2wcn$pix%K_<+$)%;BFvI4%Am=p
zE_|_o$6oB0PqEj)j}h`@q~l`Ylt&+bnV*L2RZEx86$i6Kl@1N==T3rx>Loo2M6W+<
zdad&I-Z+-t>o&LPJ2xs$raYEz_~!4_z*RQ(eKdCJ{^M+hyIzv0gp@)F!-3Aofw>zl
z$Ai-hc0*6!(!J(<_$*g<)ouItP}3Vwota$AWXhIuw49jGa!z<W<ZN+zu|s<)WqIjS
zx1Wb}P~W#3nSVtme<xLRI{FcNG)Jl|R`Oj){u6KHBldCE(W)xCN~~bJVPU;3S*E`H
z<81Y5L+1m53kBjXFA6&YCpCyX4)$ui7jthC|LIM;##k3|sn9Cg?`q)5Jx1Z=+y2T0
zkx`$nVpZf*<E#pz+}ujW3TfJ3h>I1xalPP8Tcf+|Xf<9#Tp?P}t@kTdFVSNyO*1=j
zk%%q|^ZMR^YnA_`k^Gr`Xh;F)WPC`))vpw0Vy7XpuFKsYk{!$8O&w)J3%{O=-SDjA
zk@63oc_4qy(EZwTC)S(8!;bTqGR|Ov@4qAdQdGRZ$KN5Y*}}M&?2)0KES)p#WxAb3
zX=G7*PJ8FA`Xl+(g}XKc>1TN^md`6Mx%+)gDr5~^nCh)@Ci#<#7>?BZ6R6?+3~nNB
zS~qX=(H&5}q55qRf?uB(5Jl{652iXFOcjRsN{0Iujheaw`axb2sgfUQ62WdnIG-LI
zPFk1TD<tu`wq06`x1u)c#;@%tzO~5K*ZxD%#%EHG>}?wKBzT@2zI^BOP-$wkBte7J
zxSn@w)!_HY--7dpjbB4{b8ZkO#`=uMj77vwwES4`(@>@Vm`z`&eB!p{``p0pYJWjX
z^7p~i-N79FgprPA@r%J(cdlPNV`TVp)i?O}{fjLxzJ4!h9J=`8$F7OTX~mCQ?Oi0#
zRvxpq)nXL*_yk_@hWo;AOl*z(dQ=oMb-mYHzrHGUnCWB>Z{40DP`AQeQ6*<Wkgl8|
zDBgd6WcBaJ+Y8^a(v^Br+EnEhQXXoTRzLgTI8s4Bkm%f-E_P(;-oG%*j2yqi{g|cu
z!1YU_M5Mm+=~MYp){1utb(`16&73}^=hOXP09!z$zkKD!)?0Dz*JQ@}wP)tl*jmvL
zD{@<`S^szU*N^Y`+Q<sCe8ux1D;8GPyu`86+V7t+zx(>%@w2n5(d2F?zTam3ua0S_
zUReLDo94s$?_cHl?@;}II6AF<e?2-s9~};_%SiNW`&kM4&B@hhw0&~5zmhDL;c8;<
zI(>6(PeKi=C;Gj8uHOf*hCd9?eltG58V?JTU+byy?d^x<|NV2El%AT54kj0;FGpAX
z*CMoRJGnY~_~q2DMvh;Oo_ey$!_R;J^><%9-G08`xgTlg#r-_+_xqid0)6fW-?wke
zIpLG1o-1Bkzuz|(55s2{=fl%)uSR8XJs#VQoY7T@s`e{b99^8cN<BC(X&>Lf>r0-#
z?ej9lxy<X!8yTJ3KHTYOI{NC5->WS1{}wroPp_wPf3Swz)2Dy<ZdLVGF`H;Ld_fsk
z{>JY?+hzZGJ-kxS6;-~ms+Be?dsViG&rf0W<)#;Ld2DzqF4jC9Mh_ppJ1c9P8LR$5
zD^GdDoyS%hpB!J5Z*p|gt;1PT{J@zYyl&~Lzsn`7U&a1kYb6hlFHVo%$_%(={NG7s
z%<qr?G1T$H`2Q;$|LfhmjQ_vt8()j#^ygN-{rTHa-~93G-+uAk!RKFo_r+)5fBA=R
zo>pPdA?kjYV(_VbkiXN{xpG|pclS}Te@{nPKL5iv-yeMWja}lY7Lq)A^z>+WT6*#1
z;$lN{Wkq~cT}n5%cgxlh>d)iZiI!^P@_~|UC~D(D*xUk!?aj^2?eo&F8~&W`<+ghn
z*x1~j{HI+R+BSzj=83cuF}6nz<zvNW>f6T`(~ZjohYz#D9VjFB2339Yr%z^X<^A`6
zw7>W7m2ST-B4u}^es_I-6nb?%SgPCT{Q8VZ`IqOH*VD<T)6aD9=likW&HCm~av}Eb
z?$Kx|_CIVM__*hk->nUbyMpId*jHBH7mD?iEAlf+4}N>f73J8k{;%QD#o>5(E_Qf-
zJ3APAT6<wbUYuAtr#(w&J(?xjSiyI5PPwt7D4W&ae(dq{Z9cLzEYe;s`qO3HCi>ft
zZMjLh$->azTB&WBJoYL3JcCs!sV`2BR~XeL*e#ulOZh1&O!&B3BXdoUe`Jdkvox#w
z_I`Enl0ubyB$Kz0d{o(q%=-uJnZuWVuK!}YdgViVZIC}7&wqu#JSJtZl!?H3%6hx$
zhPCO(CmU@G??<^zY?rb;(~m!yQO3VKp2Y-@nTNH{ylk|cu*@v3DwqB&J{2=Bk*&2Y
zvV__FdH&w7YdUcDNqSe!KC}DP%dOsrQhMpdVlQpnx1nWoqY%DqeHWEF9=zz+t;^;k
zcYAXG;Kl5;s2x47&9O@Ec3YS==8KJ>_%!R74rQ>Ic^fHtm?w93CKvPmozeH3vTeNi
zN~Q6So4y9zJnn=Bg@*n+VJo}0&o1^WVxGEtJ=~`1m%pFl$CZU&EeA_^-~Xg1r?cE<
z;k}5sd5c88MZ(_d;p0^!ooPj8?{1$TwAT6y#0P=3bRShG8qBMzNB`*2Oq_i$Vj=9&
zBevF0w%ev_eO&dq%szwaO-b7}Y*EB-u0|)_kd@S3c`xlVPrlX3cdL5Q=e5-<l=HO|
z-G{|3?9Ga>lkx?AWD#2*8A&_uUbsK~rso{pu0H>-^Z#}JFOOHJe$h*z;w+VAF`#S5
zwXN_(9Xq_9UX->Vp?(#;@zI;|zZXAOiI;rKy-^n~9Z@0rg?eRiZr__XGQ}SrPwkHX
zX!_0d+2`Y{jU}6RcA{tTK+m&-tI_3XIPEzI?k!1dtos6H)&q+HTjkrc<$$V{#CK+s
z-UplXeoB+xOKHq`Uz&08A#mo4c`~*om0}f{L)oLTKV#)xJ?UB>>f+(7jfxriy1kHd
zvGJPHtxVjS>8*KQeY%mi^5$bclk(l1o(zv)Tolpx_wne}ldsHcL#gz(FJrVSp*;ol
zVU6egtpB|8_RsD6Ke1Wd|M_tK`@{ZEfB)xyUYx!dR=>3u^qw4+e}6N6c6qja^=kV3
z!EiV{dOkYeJ|69V*!VeL+Vr`7ucyD&^A0;bYc_h`VV`H!HqX24^1RPYo}a$Q^S-ut
z-pdZp``h5T^Zw3l+dFsP-TAP&^Y<G6f4C}N#nBI=tAop{3-i5rXItRr@jr$^SUmrc
z%0Tg9{Qq^1|2GcgopF2>?WgC*SEDlCerqK-ubvExw)WDT|K{1*>$QZ@{IH)t_;z^y
ztopYyPQEyPzWbZA$slb%FQuMtm;e3hEQijeW!InIe)?Tew_Te;I(?M|q0#XRJBq%)
z6!||}J@Bp6z)JH%Bde|9;BQ;Q$ov3wcL7{Wo5RDc&iPC|2G9xcHvxPAS-R_exNWJ;
zdAj{wc`m!8F$R`~#|3&1<=j^8CEd5O{I?cwa9i2dCOE%1x1}OaRHemaEWkLU(aDLg
z32#*o>g`;Ioix}Pq&n>uEqgnio#n6U?NB#^-9gGJ?WMSrnp^3j^4p0`6YD|P%T1Fb
zPcYv^m&k9g-W@b4m9pl%bBA(f(WR!)<z}!GQyDtKtz?agS?`q*CdE+irh2yy=@z=+
z%~`U`p~+#V33<29gPomQ$%3+rX|NaL&Q9r!Sf;kSx6(yrx6{;xtO+UPX1BSudcj-M
zdOhy!?xawMy`3BfdxKl)vgpzv?uNJ*1~HaKt#7YiqjHuW-?PT7cXOfNxlLMP+njZi
z2hHwY9R{Tr)86jjR>HPHi)fZUEKQQb-fpvdD;*75@9pk{uv^F7W@j($+<~xd(VeD9
z<sc>dpmyqbD;+Ib=bfFjlk43gZ_REYzm>3U(o`3Fs2S9Ir6*!@8(nHY_}wC6dnPR@
z4vJu>Tj}`VV`ysol;Un`imZlP>1fer*LorC=8|pP$+zc6gN9PTqFqW`)_Xf)=l1%w
zA{zHnX~*52Seg{)LpPDpoTX}mqL_B}LabA&2e;*yTM^yOO^5^k<OeCn=2ki+w0sDA
z)^)pkVR!FVO_5vuIVh%YwwhT{n{<12=+C058`IT!Czv|CovziNxgMAW-V{Z#TgrMX
z9SXE4_}JK2m}BX{a9e&&+nhy5mk&0UDwePJ_Wo<y<}6LIyIZ;~mJcz9!R>UkXKBXr
z@f0o7?4|NG<(*pz+XgNDmv(o{XTLMpD}(2)bTnw4OV5`=nuL~`?hI}vq(w^~><xCy
zAUz1pZr$v>t%bq_{XH2fOC$G^r4s79m@4UhysPG_^PPmjhmBju`2jp{SMWQn!E;N>
z!PYJ*Z>!Pn)?TaQmfl-h?jChTvHCy%+Ah%fq~Xjv>v8qeg4D5u*{TzZR_vA7TUu$q
zP#+AfSiN<vc3I?UiS)6|Q<NXu<8yzsn6m>$tipdcI%xw<iRB`Uxh_M|&TcVW!ZLYS
zMIInGF;0VA#@RBe)Ol~2Jgy@5K>q~y2F*^)&0Z6Vrdi58tSWq%6~3rYBM-$A+A;HF
znLMl_59B_`gQC|4O&K?QI9(=>tH>j{r()RDp)N*EX}X<VtNX0${&d{CMpRfF`ha><
z|8-K7AItrBxYU1@H9=8F#ZcNQGk~3XZ*Q4AtRipwub8d7W*|4aq0}SAMe?vJ_f7|-
z5Q^ZGZrsbEm<h||VHJ7X0ZlPri_YE|#J$~8kDW#GxGMKJ%RNLhOkx={iYBg8vrHaV
zk<W53N`J3pT<q^$41nzGy(>CrYkn8JaJSoI!wnOYq5=4QXul6<kH@&9u#)S^`1R=c
zsU9f%VPQtSVWm2~IQn7h^RG9TzxaXsAP|p(;o%wv!|mzC<yWJV>1X~R%oFWWZ+$*1
zOmCTY-86TW%Ze*z>uBD#G@A>X{h!p2$4_P%GbTMY*B$LmVZ8uV(!J=U)5!GLo`Skp
zt$9*+Wzkd$wD)++UQ3&C_1{aK=bf{<-FsbDzVtS=@e1YJ(%97r-FEO~ur=6s^M74;
zwmo<Ej$4<*Qhn}Rll8*(_NDvjBGr?gwl+^!*5+wjo3-QP;cBwCuG!Oh&2Ci}!^8MK
zbB#ZxL7%=|gBJ04ss^>3`zM(c)ES~aNjuy1EM~0;J$>hf_S&x~L|k8r_&$pff3he)
zeH&5kw*1w|eH#^hn%=eXpT3XAryNcD9O65*|I@!m`#*h`_SYgkw)y37YMyah3z4R<
z$@0wVAgp7=wduZTr%Ux>>gAY4+t=)zWve>1_S<5VElXir&!SGo=fhKZ$0|Xq?Mqwv
zTw^~y;B>D?^?CK=)9kvROFY+3eP>eI>D1W{gl)v&Nw=W483dzO{_)}{AAJ?z@zRjF
zR?|e+w{cyy=6fY#vlZ``uj_u^Y%Pf4?5it=#oo-Wh;sqloh5yLr3m%?ytQ|Kce53k
zTMuc@&JJn}^hxe`YK}XWUVXx`bbAb-%;1ZpZ#Ntk^nIb>4Rm_gcdMy81V?X6aCP{$
zIlK))&nS>fsoA@M)#THStKrs>%^oJd-MBj3Ix+_slh4+YcHAF+7P}`Z|8!Cv&zT+1
znH_)D7JNJv!wveQ#C=*a`*ib$1i!U{kw9-ueYQ%jCZBH2k{z#1QL;Mz?Ye`+Z51{q
zpKWd(e|DGrx@Vnl&%2lU=PXC}=i8NO1?BrF-`kbxeUu~qyaE#MnB(y;p5vs$@0ZCj
z-oL$F<NdeK)m_%KF4NbXEQtP~pdSr?qa|s1K)0hIL)JBCg{c^!w`QxG<)_Y{*m;Qa
z(dc+|e3!ekE6yU^zSue&o`>uV9<ncW<PTjhI*~kF3^-cVuEVXv)$77XntodrTgqnW
z*3kV!tq#+!|M=&>PDd|C=ZB-KX9s7)D?0~xeAg3TH=h3rO-{?_zfw#e&VT(H@Bi<W
z>@LoRlj^hK<<<CTbb4A0#*3@Jasd4K&_1e(9cTLB2x#e4m+|@Cmw)>Di$|qKqluoK
zn3Og@MdeyYJTG77gQHT(V=gsr+kxwn#+`;<4bMhXdwilOG<#a3^mgGfJ}W(1TI2b}
z_33eSII3j5mRZE{_Pwr0_{sEmeBthcZc@(6J_^2SmB-hu?%2OK>zL9~!(#83{4dT%
z)%9dl@TB^GhF9m~^Jf!#R>NMsHIb^moLaA$m3?j%EZtTLdu)woRrluQDUQ=O+bdln
zeRgp*dU!n@pFaFt**=|KUyhI4+`MjbaqmZr%@e_6FY5H-bX1z<NLiKk{^7_{onN1o
zdYJ%>b?*M+d@}Rp@YUA`U;O*0Umbk*hd+Mv{lWKt_^Qee)4kT+!0Fj>8Jte8&&@wV
z^{8@B5WmyUb`Hcv&0f+C8{;-tr?(>=4cFy0J(=HH?zwa*<KIViXL<JH<mKpSen+``
z|CM>$^z1Th*}u{MP4jE4xS1AvQos3gXzWt<-@<ktJusi?+q?cZZb$w${N3}viSVTG
zo^!o4OS9vD8+h$(C;z**Js^*ijbnj%JMI10xs$ufJNWFXSUXQ$y!q?*Q%kk&^`no&
z<Hc})OKMO(@P`<eqm#enwExNO=KoOYN6xID&YJztV{555_UA#h!M$rs`f$T81>bk#
zkNuXh;k3Epk*}G@jk>+?K9hjslT|%GGlM=lc`<yup7($A<A3<JwNhHT)s6VLjkiDd
z`!(`fm1fx(v(gq;#`zkU<?GxZuH_NC>C!^F^*#9G<BmQ1m8pP$4Qq{k{z)?D7bh1k
z<+Hwht@W<^J|LZ3oSt61k~S;4-hevBOmKVQsSIFO7uV08mr-fDUEP0JNMD=5``LwG
z@0@O&+gCMPLp#<uX%{c9r>^YoEhwjozgCB3t^cH#cE6Brfb|e|sZD9A)3fbU)%Nu)
zXAN+)kk?_w@e`w8_n<&wdVVr`x&D}~C3v9(%|f>3qh^(HUr6lx-PkNf{`Ke^_hhr>
z2My~%YW(D7m(A;5KONW=_H!?aW4nQEXI5ekb+c>D9#X2)jw!U1(_pVzIl(BOvXd{|
zxp8q>^6h{2NoclAavsk-pqTv3zVs8TshyF2c{23r(63Ca&c^3sGiz=XtjRAgS>Em{
z;*vnE>~%Wo?<Q*9yo$68i@9A)BbPN3ol+Gq;eG%2;l|m6b9$m(A9W&Y8J%qU&Kjru
zD#pqgB2XG(T;|3{@)cp(egB8g|6neDE?$mg>lIUbD9K<&|Mcu$cZOdwQPsDUJ-<u!
z(Yd7&Yzmq}tjpM@-*I<fM^?(u2maFz0S)3-hhm;Wr;77#y_XvM>4(efz1_XNrX}bx
zSx5qd-JPA?owy@A6Tb`>*Hh~y8$j;Ursb!{72Egp@nSuxo^97C`TcI&E*!ro?XkZ`
zu#9@EZ76O8dc!}eUOj(PtZ#eKPK|Fmpcvu*qk5!ND|uN-tP(3rP<3<qv1=HQPws3O
z&y&h9u6{Yo*fr?mmFALLOLsJrKWU9*Y-CJiEMyF1>|@MhEaC+#*{x%YV{BtgW3!A$
zb>S7tzh)bc>aKM>ocNC1YhKFv<df7fF^RNhn8k-?$wB@nX8An{%}=uU8ngITT6;3@
z?k&1xADhed-OWy__)8hsi{{z*BrFZMOG9l>FBhG><ptab?ZEKM4zQiitTVv+=ksNm
zVZW|yVCxK_tB2k>m;FbNPSp?uxTE!tjLW-Pa(+d6G1?fG!D_H(G({b}R^zT2K~0?B
zv)MM74W)y@Y%JB;#Ty6H9~TAbeHw0F*nbq!`sVs<bTvL2o_;<)Ia!b$TT8Lvp6#|m
z;%?;O#vhE{zwx(H7(4dgT8aE%-~EHxckx#HF81xaoxbt5bGv=F)3@*TKG=8IcSp}(
zjjyh!@opyGV(Qzhye7}Ayj2$0*}>~SHyPi0s}sDLC%DB<JY2F9S6hgSw$$OB4K42e
z%?z#9&T4Yc1=@lMYX1AIqT|1;GloX5FE<_>7E9><>&=H7hs6r|_|VLs*Y@PMH9?nj
zy$y838sp*88skFS+^H>0v~&b~p)I-zboRT?pnIPDo!hG&U575F#XjBcx7EtjqlM<Z
zVOjC@{n33fU%$TipiOd9%DBL8DeaABY)}3dzqdz^_+)G~-|j}#^%u2`DRm2W8V9Kb
zO$+Mn-JQljnbEXqVw)zmX-GWd6I+nyd<L;mn%Jg^Z5lvZuGE%mUaVO`Nit|dW>JHs
zG;=lt#5N!=W-yCsmSf_&rb)|bo3;GkB4%wB=k?g>HyG`H7cqD<6N9~I*NX~Swbkrw
z2Vg2f(FlgplHU2%*6w!g%}{L(()NI)u}pb^!TYdQ(!4WEy4xll%wQO7XBUO4%x^=>
zwM=!1&n}g124TiEz;)OASFBQDKn?b0l__Nz5Sltw+|HT!(&U51z!Yb>7Os2by3@>S
zP|p+C{rp+B&AhOKowl$I3<h(y(Qetpc6)~cxKOo?y&GU!KAM&&&7k#0%QeheE9}e?
z>~8PmS*hZ@wbLxVZI<166X)`IFEv|<-<k6dJG0Q1Rordc)tsQ4NyM)At5>hK$J5j8
z^YLW5{p{l9!{hPG-M!t1#cDWycz!y0XiwZeY~^x$@_c%BI-fUvQ>Z>8PA0<Ysrd4Q
z;_2n^X!PjO(_#x;Pu5J+nL(M#ykGrp{QtA}rQL1Z$fEN(=gfaVJm)&3L{hykvZQ<E
zC5h&Fd$v52S#ot+WK-&xWGie^mNJv~+g}y7M(@o<vYd$$OO0-zP$(1%RRy3h2bhcb
zElpAbi7MPWRoK~Y<4KKqR8WBGz1#Whn>x5RGI1(S{wh9hPpFlt3G}~4{qhM5EH)CR
zxDED-23oh_Tl$M#9&aIQ`xI;!m}^m%IgaguDsG$Zi@sKIpdEcav=Yc>_&TWjkk_or
zYf!i}LW1UMKIS#D{P3Q}jVogaoAQF@F~-oI)SAq=aXVbRm3uUp1`qGy?YE0~($NPm
zoE@-!Pv=3jNQBA@2E$bnP0*lH3jEN%z<qL$+r&K1e+2We%ekpBG>wvZpa<=2p<L#A
zQ^)|Z+-M5vO{S2(EmNqV?i)=$W~*AA*UQ!U)V`s*Hw=J2*TnT7cbJ0TO${7!?-q{H
zVBzRZ7LKtxNfdjdoF3JtM~708<=d$%*xtIxaWFG3bgosp?!cicDd-bEeE@|{rw=~Z
z5>$OPefG)l`~lN&LH(kiOc&HgstubKdt(7Wld9dO!`|4w$pAV~oIZdyH7HSkOYppv
zc<LBhgyA=EX$7F__!h%&;L|sTKX7OTCddkg4XOq70nnolHW)v&!8Vu<)W8AlOk?<g
zrc0X#YNSQ$LdD9Xh_0+N@y3*?Ot09d3XRFAU%Y6c6@lOz^td0cZu-lM!Fr7-m-|i?
zqIzw}Z<^5fO4%ke8cZOTZR@7z0gs7^sm+XLU=$4u#O4DB?pDRE5;K~u>9%8;x~D@W
zRx1?;EHKf#c3tn@TYuP1-C=#Py^;D)FN4dO-cDi2WUW^hTre1&MtuSDY)K!eQaG>%
zDrE<@ucN_;P8k$|gl-l^5lWws{P=_huSuWKTxI#(0ewZEKpfI{KFCo`JbA1v7379^
z)Ex!~tYt^H*(2KE^3ifV@#uSoeB@x!_BR~%XbmjekdGR#&BjQF;?cI#N5d13mhZDi
z2h><yJX)qF9!<{`kG3WAdZHEZXlmk7qT^sZ9Qo+*w?HLF6OX!Kh(}Y`<)dwjN5}ER
zqwBch(c`qIZ|(Fk&1=fqJX^l?TnPbmX!ht0G|Q2Xo-G~?!3G|n<BCVika=y<n;!I=
zCLTRkKDs^!=3@vl`ZzFo8{i`Y1PX@FF#&3soXd3HOXk4wSpi!@hJG=0&TkecA803*
zCAdg4Bty|&WBlTJl1Vf{NMIe;lKkSNtm8=5F+E)}iDydI@i|-vIN6p=V#(gtJ;_DB
zE;~pQ-3D*nw6E;6ue7wUL}z*!*Dif@1Wh$xa-8N!J~T}?MTb@_hdyOv317E;`KU_-
zbkTBTP03@v>!**h*EKEWCf}57VaP|v@ly_Q1>UBs%SS$FaJ5c685Tzo-tpoCOZPZq
z7@Tat7={o;Hb)5ux5F93aD+&5I2QsF@-YF-!rP8dt`t6+6ezkF_~yVC1Tle`bjdcp
zWE)>}I%<rPK}<n0cq<084Gb;?3Mku>D_lA7byM<+;iZxx9oLcvakR^x(3N59N?tKc
zCw+9w`p0n7u99O_^TfzBIF1@L(iB}`_@XN;UCa))&O62BNaGrU7`G0eJCM0pSr0^{
zCT9XWmHDQ8^c+cR%aj9G7XugJORDOQl;e&qN2_Z|DQxKS(X#ARAloUc%N{T^ITj3C
zifPl71HesLT~A4A34s6vb|l#yOZJPa$+te|T%FoNgis2bT#R7YQ#t?g84PGGpFG_Z
z6m|q-dxCBl&`%Tm>~jKqh2C>tJex3Eo9V=viab%!lHG~)G$eC7rsQZ(@Gb}xJ(Yf@
zl}?;eG+C(>(TtSkwN&^h-C)WLrX@4jZYmxvDHEmiLzb7Mm2*CHgYHWzo2F!LSJK`v
zWd?&wAIz(z(uu1um_eu&Ky2RAbag=rHQz{qIkK3^Ta0;mgA5FN=*qWTaMMRSMMnyG
z-%T@c*$s~_ACB<onGzkJvw@+@kwo9}47zMQK6*+9d{p-(@~g^ZEQiS?9HF=4Y7Xgf
z=Mlcisl)W39f`)1;NtMVMm<sJzefL@J?gRd;w2tV!7YR|@1fr1uXlIr7VO;yua&{W
zbC>#P;A8DF^dWPt^vKQ4;mr>2$H519s#?aU9-3Y1mAT=I()icJzc*d7hb|vTTkNyV
zKRf*MDl7flb%ePMj<75h%RFK3ol{v^{O3#mHMFc=<ktsuF}~W%c2oVGF6-aaWH4EV
z(i{x$eiM)Eu$=5qX(d(zmUa`CPC1tCy@r_~&U}W_Of+E1;IlsSs}bs9<8s}y2VBV6
zFrP5F<$!+1^>z$2$(~@IW}?^g2V7(D^#Pw>eCCbACzu?#fW>udE*^1xR%i?`rwE-5
zlr~#F3O&Mx*;uHnU<`<fMjvo<&(I7}tpRX31Or87`p9Pt^uKU}5rn;b1Ymp~!AEYl
zn1-wZAGrnuim#~61SI*4jE`KKG%b+w;t};4_UM9cs>?^NV_InF@E(9EgA*ByqyblR
z!Acr%B@7>V75K=hV_CQzfj_#UV=W8R7B+cUt})=+xaB&c6?O^?ANgEnBWswv^Bn;f
ztY*>aHuO55E@5sTaJAbpY)N%o!(&h0fGhll<PTFKXu@>O>qNVOfdO)JesctO0I4i)
zC!*EQbtO<TgfjuiBeZ;LAZ;{L@*F%0%_HSl-34X9;S;Jp;F^+wDk4W2)d|j0jv*>W
zj^R26K60X&ro<L3xPc)U0&OJTTdruBWuWoR9!(&-$H^`^(Z(GioCxriYgLAA3L+WE
zN2Yva3hP&3aSXU2Z$ifj%q%(fOiOl$Y3l=Slv%bWis2*IJS@n-4SO(v2HY^SZO~<T
zm)U0e))phuu?@ksjtwM{N~tb29?|)@w}De>z+DkQF5WqSsV8`F!1Z`XmPHBR3IW8)
z?l}UA4+7202_PMHQsFi5rL3{sfwcLcF9ql50+j9>V!(o7G2n`-q4`2Q8mN$S#xT%4
zVUMOOI2$DeS9MLOf~$k3n~Dc)41Wvw3ac0$ReZpDmK^=QEe9;xP@HpgDPTZpmKi{r
zi$_Njt#AxkF<LUN7^S{o*a3GN;j)V<W(95$fe7GkG#jlp(J8!n&<;*k&lcm~M`s!D
z0AJD<Y7uscCHo$!E?rid6!yL+iHbX#cn9Dz5tlHwCx<SIR96l>j)IBo&0FUfLY$ed
zE~U0FyAUNN7jhN?6ZF-koH8uIp*9XCPIVY7+?cmr*;$TZrt)RLjX5Z9N>W<_0Wqe4
z)O=9l(v~5(nV0Y-f9dF`6T;8qvh6Ee_7i>$+YzG|=SXRqXhK>WfSlpTM`4<nx)^MR
zW6PP-@ns*lx|EJ6l^xy`OUy(_cvnapU&u`4eBLnAb%jD<LepF^m-&*(K&T3Gc&_MM
zWNe$0*pL&hgMMT#ZA?!J9-ssFIXE~q*gS)5o8y>*^&JqD+#qx<PfTkrpe}rrww&o5
zkQSOHi+Lh%1_JL>*K&m<rNCSXffEWda7tnoeA4%Tlen1jKyC2FqbnNk0WdyRF)s&(
zZ#ge|sN3*ihz&B=Tl=J{W7*2*_xPc7-IL4?-HjJMbzB+i>+lQu4W8((Ih{V(_>Mno
zO$(!0>$ZMbG?Vu;E{f`ni=x^Vmf`3fGM?c9L+xtX3fp`%*L6T`a6uZ0g5}F_(0w4Q
zeZ$8a1nRZ~;Wi%&{goyR0bGNq*%D+@xE3g4jurOtvMYM4HjTX#VmzCDBEEwrs^c1?
zQx3@jNH%T<^?gh8HNCD@n=U|Fnhol@ZG+V5SKYXA112qPxv5*AM(H4y`t&lW=H=i4
z<!PO=x1F*#dtzL#uzfw>-|L>nFMwe!O=T3uCk0-vUQlLtA~-(AiOz#5uz59;<Fsob
zpL->6@*|*<{{@4)?)?q`x>3QPgTOV4_~*w*<>(-Mc-UG<hejDeGo$A#W%ZuzT^X&i
z;cv914Sx;W{l4tYe$2rIFSwtMr_js!Sxz&3hP{4igk|b?Oo>qKos?O~9+s>2QnJHL
zxDgNo0{2t;?m==dQ*_ncfTPs2_ki~TDp-E$I{`=4I{`-zWGCQgrJaDI&#&}0ZUl6<
zu(KTuLYSuULM^_LyqyLChPP`vzN@)Z^{Cx>1}3Qi7O`Pjmfw0Gpq1_eG#|u1K&z=Y
z%rCY#l(PeqU^e_EPFn5-%;&CGCY+wpq)Zs~%7k%e%7oLj23w;{7*;MoO^GUzZgpoG
z#4e%BX8g6OzP99RTf8nP9CxZ_I87_5O=^bIOskqB1vygH9Ai!uh&E*m1-Mz4(d595
zEb!VgmIB<a%V=}pRu*`%Ona3lKMGOjG34QDP5D8Cjvp#c-}Nz&g`R(>^OMWDcScD*
zqa>eMlFuy3XO-l$O7hty`RtN>PDwteB%fQ7&n?O4mE`kE^7$qC{E~cnNq?{-&iQwG
zNj^Q7k2JHwtz-r0Cf&3$#e29oXqoWi;>k1i4iBw)7QT0IgCKurIxshRnxPw-C*J9X
z3IADmA*R9J(H%mb??O(E9eKm!#g6Nnre}j8Z+Pq--L2`mwhczD=CF4*?(g(W+tghP
zbXocimuY;{F&*8-4XE@TE@arA<$9iF+M0N$`>yGmU>utUhlfi?nr>?J!fg2t@bw+2
z*Vjy5hd%Iqyfxgm9NeeG-?=bJYIswlPu&Ido#g}lY{$`XR~~<dcbRG$y|7%u(@js;
z0TIiwWLY4&rQ3Lb-S*i#T*WgD&DT83w&*ZKTERQpvcSq`Z8MNozUw$XG{R(4HhpJc
zYYnhGT%EmJM|T(OQl`AW<lic^cjrz|KI;R6ZXBdK5gqAt7}GtkwMSm_>PthA&s_F9
z)AcI}^)8$M?$_aq`8=MV#_{`UFqt%|qHeQgVALw3RVpYsKqKFRG2=O|i=I2fZmopk
z?hGbd3|DHe6-uZg2K73RIXVcvfrCQ~vzm1tbMwc{v}4r4wI~j@?b<&6cy)R0A2ZXB
zz_-n5ZF*SSr8~N5ToL?550%`9MYCO=^EW%A|2xNRf#Xt-FBm>n%V1ZTtWA-w{095X
zuQkv7yRy!jLR$IN28Z9m(aSfrqAHFyM5_9CS8?^;Z{#X}{lHazou^lXn#vKPZZj(N
zj)#qgTFCR&Wv4dUF3X;J-b>1~w~jLWkEW5&;|<9GO)}JxAtO*3o{Y_u#$s}1Y$q84
z$<^c&hesOfzZLc4z9JF7$K%m$aGM5;Xk&=;L%2(hZOyRpV2ud4<Fc;~jgrGd7O`_i
z>*#@i<454DkGG{UaNc1HK1%=`ojQ1R>Ikg$@Mh&<z0#M-`}s7!cOhQ$1?ab(f$t+s
z4zRuL*oJ}j+Da?R?f|SE!!TVu(Xv|~=>)gtdYX$yrqMJNJHY9-kKv=7_F-~ddqBMs
z)PL+g2wiLC-fk85|F_@Ssmk-}yYF?aoywER+8Dy!X<FcR>KC}3&!zR~HGWpzsr5-S
zwP#1p^cb}~VlgO7be_)|qEmO)*L3vSQ#+X$oN4=?G^s^A5@wA~6gDh+^N5;ZP2f#h
zk(#+!ro9f784z?yoS$?NRtApZ)S=2!PUf4F`Q~Q6xtVWX=9`!K=4Zb7nQwaf%}AZ1
zBBf`(>6vdv=9?kEkp?>4N;=@#6pxjn*L``8uB1%ImMMMUJEoFyOjk*nuA!uK+mb1N
z;95LoB2^4ggUMn`XflO#w`Iy6=$0!}*1$5-lsWJWO-Y$r`pv)q4m`D%PB^`|1kJQ;
zRUe7?S=2#efEyj~XFv}s!QZ;cJv(F6D}|Z7ZIwboc6Tq%=+&@zm+9omw)KO4KMx-7
zeGHQQnnmo5l>*ndY|plJSC?~p8>Ijb!^6;UEVq8Ms!k_*-P29W!T*s#>y!c$<DM8M
z<~8KncBO*Ws!!DL`kqCaG%dsONkw<0&MgLz98icm#d{jNx$HlQ>;Swxdm9HMN<2%B
zhrt@E?w#{+GshMQ`=8CjE3&X|G};O}SE5=mFu+@dfdO_!c4b@1)A;7;o9FGc_iWy#
z{i#jb`(4JCy8VYE_U!rSr?8aYK6)+i1RNeBvIaN>uf3DCM$d1d=I3HoR`@kP>&8`Q
z{e7!W8)s(I))4N<?1ybOtFEPiXPFvRaKSp%Re>S`#SiL&G}g&lD%*K{a~_xO#AMaq
z*+xvAdg<zBpPT@wSH`lL3)<M-$1mnJP>ZX>{cbH>MQa`5&>b*5JkNC;Ig7Ll{imCr
z=37wIZw~!uxrXUlo^2U&J!_*Kl9>s^)Y*9jQ?F2QZ1&x-Tj(@8-_z^b8-})Cg)7K?
zMzkg?Pw)<D4hgz~hN`bNzuT3$zeL8*IuzaZ4n?<}Mq>9&vq2>>&02?|{JeFCqTQfp
zm^H36(`?~T++~M9io04fyIrlSV>EZFgV+r^h-o$`A$BuyYIe1<>9Jh5XtTjo-Mwlx
z?_Q;vN?B)A?_L#UiXFAC%h6T!9boIsj#Rn#QE>qpnUtAHSvgmwR_UsAa}G$Y(gB%w
zIcA-VS+}B@Nm-ecok=;Fl$%L;nUt@jj0`SD1{WiPi;=;_$lzjRa4|Bt7#UoQ3@%0n
z7c+y4nZd<Wa3Kx7ek*(3qgUG~yJbB!Wg3~3qo%k!x1^j$uc*oI$`vWyQ+M`yR%V~C
zuBOzS&cL%XJBKshj7*9<ZTVhgFS|3@Q&VOpWf+-#&5pYB+0X2Qc0D!axS2iDnUs~u
zX=YQpnlh|RDpQ}H`KD*erFgFGPIjx@$!>*ZVsGJy7@8(Fi<}^L6BvztV|N9=vD?N{
zSw$UfnfFz2gDM^N(j8hTHbxBge9Q4&+#KD0cOG6QWdU-wq3cbTpXeB=(Stg4!);n6
z#*^HR<#?`cQq@tb+n%Rmz)9COp@deqMbM&}r9lt0`;KGCykGI4ktI5QX$%0S=HpRt
zDD~~h53gR34@pRnzW51L>cd0wemNY5NzyIX&tYH_W-&RMQLdCbGK+25?Y5#4+=Y(6
z`#OeY#oL4EZ(|)h>;3|Nc=vD{!hbZ2XAC0EZ*35zS?diVtEcO=1`*!YqStS+(C^A3
zvYNJ7=rx-Q^rrhiGciG*)LtuebF+Ix_4V$acs72dAvHf+v;nJWV}{;fFj;p*yVC6&
zS(;UQc@;|Y0Ht*|r82K?Z9cYLqtJ~2{6<z#Uz1r0p|bQ!c33)6_xIUgqbj+@J<mGz
zJimZxftnBEoSLd_>KQE@l0wNg&GZPNo2rinRhC7OQZqqC>Q>^UW6*k~THjJru4yYN
zC!_9L*}B~{WzqXJMara?Wu_@ZX@M~?Z8c>%Y75p;c{RU6<<<NOmDk!><uz~_!Spds
zB5r{+{U&8s5&DhFZko43*}Wg7)WoYPH0W2;vt`_=HZ!(1(0#DtTvzk_e3Tsw>gj2Y
zZ(6oj9kWR1P;|qw4AXYa5-_^q8K&X6W>e@R9oWw{HQ&e3EqO5b&9M=oYA_}P117U=
z`nKuXMKJKqF<_uNqETyktV{G%u%fvVYWZ5Zo@z-w)pVU!da6Np?IQbPL{pyl(pA*W
zub|wR;XQq0b~hY*V_t_EH)nLiXykI-dmuL-gtJ+=SlBJMALMd3?jN=rBV^ePA+l`i
zvuz=^>y$LRMPLhZ6@OwE*i(fKESEjHi}rzSBDSp!b`jfF(<XxoBCNGTm}`O|1+kZc
ztyve~2c{<Rg_50>$6sr0SjY_nx@%b6)$W42<<hHZsHaskY|mlOmTlNPq}0IGb$X={
zy%a}xT$cv1LLaWH+ctX!(Ls+qQNOPZGZ=qwxrXjI>>1`*)8l0w$2M3DEZk5FCcDm_
zJ<#}kgFpKyl-M&Ex~9Qme4($yr?;W7gy-41Na&VD?{#4b-!)}6!*U&0CVbm=M8dS#
zb@43WS-wnIhT|DB;ke?UnKnQXZpnmYc(PL4^&DR$9Njb}ZVo_@=s2#g%X(ZL1gcC}
znx7_I&64%FKo5z5XE?rWk7rw&1nqg6CBOKZqf2x!(tvC>kieDo_^!{p5AP-e`sy-a
z*sdlMmg{>m0SLG<;p@80rt5l|O~(*LGGQB5`UQBylnGFsWTl3#nQ6lCJXsHPqb3mr
z%`L45rZ3T_knnv^R%+_98zEtucABs-JfuiCPTB<1Gqf~;fg41^!cY`40kuj1z<P!(
zW7)1R8waBxrGbTB9+3b$#gm|I-AXA6Q>-Oh3M{5ctZmyiWwUL^qt^xYzQTm3r!=sA
z%aUar&DHt%!HCMfBhhh8rKJuOk%V__$B^|nNJE(bR!qNuOpx`s&~dWau5Q^9nhT61
zadS<_lK@=H^<+z3+jG+|j&Dm+!Rr(Q*VkNGkEiLn1nud1%3_|Or`+q|m6<YO=_ylq
z7$#LF96hZRM4TbJ1W-;9g{PFfAgWRx^<e<ZFTSpHqi?v5#LYLIw52}si%fv%mFW16
znGQ`Kw?W7Rv_?tzX-fecG0<^2(s3=B&}|`i@V*UG;D#5Q>26xa^o4Z6&@!GF7#L~X
z(bF$9mW)Wa7*s|iJWbQngzKeWpi5;L80SWs(52+V1TNr-1hmGK3DZ=*fV9-pgl9<H
zpcCyh;Ybk;2_Te`aMSicUnu~55M61)OA`i&Yncritt$yX1#Q4|DhUr0q@$$?Ux8-X
zzU&3VHk|YeJwzuGj_t~xGhBnp9nw7A-=@=jA0u9xAVF#9c}!6}G?CIVoQ}>?7(E^s
z*%v7fuuQ+X&~52A5GH2&&E{CM6i7QS{bpgDAo<O7Y(4#kf+PI~)KY3NFqV+~28^nF
z(@kGz(co!H15qGhY7UAjnPPq?k-|BK=fta8bwfablw}yAoPkP@DQJdw%zzw9Q!oTl
zD4wOu8t@)G*G-|gKJ=f=30jny=5(32Or%UxEvGwH`pw}=08hD|k`pE{H7B~#WPM=u
zxk^emls4Efzol)rJq*7jQx51eX$mGHB_~Xb$~WL$B?Yo6O~E{?z_dKqPN9ITQ*Z%I
z&`I&MFyeAm%5c22wHBJCX-ZRTH+@6tBbYlCC@||QR6;*c3dOWM1y2)XqLS0_6j}qt
z6bhQC3(1@Y>KIy@(iD0cIH{&NT~nzYv>RK=X{&gG8boRR6-$|(lTr{lSAha@BE`(W
zqiiV@%~U9eI$H`w_f)KP&s2H^2BT7g4hn!$&QaR{)hQIz6&XTBx0bd~H+2Ow(8!gx
zQ*|=^M%7E1(rg7!4eW%p9B7IPo?s{_6x2{XOTVE@TDAcuOce@h&}li_QLqM1GL>%_
zi&J6-#8r4uL$ahf4ObZtnxV=94QIYICx$Ldb842-M>xfbZ#e1O5(?D^Y&B)NN(#i1
znle-<80}q2nGzT3(|6UBu0pXKT}^2!E~aZK<;--1qe^6ZN=_r4S5S9ybtR{+n@UPc
z#~-E)J1yryBhqi44+A&-=E0ED)09dx50&}!8!G+jHw?_0LV;;T$>|sho}M!QVaic!
zur;MuJj-&`l<p}|OiyitX)1jLstm8;E1EQ1rOQC&RngWxrOQ0sP|?<Wl`vo-DCIEX
zo=mxDP3dXMRvO`Q#Z#oXP+sfY16N*3Pd@N(RoAkZ8?U$Y4R!#J2Q2n#k3r;JkqrkS
za@n`H=CU2vHyB-wk?1UI3;Q!*d|7yY+BH{le)mzk07(R*!m2%_r~k+{*wK)e7`1V?
z{$>{3vIt5)YCLI#u!gmbzRu#6TQ=hp`$4a27zJV#mo<Fjb9OD>b%oJ4z2dS&q2FtI
zodz9oHJG9bD<rFM1bWH9it|8l`UOBG?2>M~SzIpXFznl{PUJGzFHX2U({0*%W4bkK
z6KA*S_f6L(KHJ61Yu~$U)?5f|y7l(~-}9x*XP;$<P}7ymrn~-)%UdDv@AoY~nERG<
zf{X4%0@=d-OK3WwiwXp{@9E;nkWae7pSWYJPu(KoNfTeGrp=yAlRsG!p6Q5prol>}
zA6@597IR|aMN4d<3ZHb5k2^B^xRyberTQT7b(=rAy2YPtM|=efmOoj%tOnDS&!2SV
ziDHHOaW(d!>ppuhU6(y@7q5nP)eC-VhFxU?nQiu9y9RskEZQVAD_!<rlm;d&_Q<X5
z5f2xnXq!AdF~An_Ty!n)C*7yj;w?GsmKk_*G><=dY-!NP*0BXTdJz_VvT@akEgxyn
zVXPS%E(@`EiyAHy(59GB-Zf}m+i?fBX|dpoHm=Ojn2YF*W58USt}C+o@mK>}J46T`
zBar4xs_UAEy~nV?8<HK1kyUrSfoZel4#UBly!o?h;vx%s_Hf%9gJT+a`6MfA+IXd@
z$?p}y820>*Ap>+AMj0LNt>#Oax@7@hJFL$@!C+Ay>9YqMZSy=Xn4vts?G7Ac%}aWC
zUzZt3r|eV3cC>BWT1kV&)87{Rh5OFk9)|Y@t|xVBa9%g6H<xm`8xD?Js~#M0QQ+Rx
zbF(oXv*~Q*J{rT-Jj3+emCoqms;Z}(KDFr^%q|S8k3L@m*Zv#gsDSJ+4bw9n&von0
z{+cvMp>637E=>A*o$_w|$o3T&zO5OaW7X~Bx9OgLAEq9|vvC5d$?H&)5^}nR@0u)d
zf2*iI=s<x1W8-i9MS8A{CpPfd296wyo!)?V8=jE=(#L?&I0g*w_c8DQ$3QlYx!FFB
zx!L;2iQTv-uVG8xXFG9X*KI^=+>b}thP?7<=6qwCdEdM98c&kg8=NGuH;!h`^8(#M
zJWe8rRBaCm3inu<n#kw$`wripmn#PK)NJYNXNuCJ`EARaa!VkC!G7cBKpI4SryBj$
zqN<zOy@ADeMi!50<nXvg-T=i+k6mQ0)1X200E$V&X3>MX1JY2_ZCaB98b4mp2?7!7
zz!TU29*h)?h2OIA@PpydwCOnm&joA@RO<1fPMASyDO@|k`%*wmVcPe>gk$%2`uNv$
zY1-A@fu(O2TivXRt#01gQ4oIOSsZ(=57CsB#mzC)&CeL1>Hc&Kw^7H?wh)Nhg-KVl
zeB6w=ijLvn)m4^m)4gVmv2YC6#T%nE!)`i%3QP~CsSjF);Wu4?g8mZUusqYV{nl6G
zxNOCYM|z^+H8xjf?Nu4gen5=o-$!u&-xiyxXFQ+<;d~NW?R3d}?xr&yW|KN;)~l1I
z{kaZ!m~~N$n$^iYRGfI6c=T!R&^YN-lj^F^?3e;OxB9@2ewg<9CqK+u<?>uN#C7NR
zt2S<YE%5+Q-BA&;M~d)OEwfVt7_~&`Kyjo*H>y=m6-u}xyFXi~y^5G0WHbmA*WE0T
z|J2!Q(?2>(=`3Zil)+LaOPM@HZvnJSkEMK?vUHlV43@H3%4R8-r97Il88BOCDU+ov
zmU39iWhtMg9D0eH<rpkwvJ}I`Wx!mP@>t4aDW9c$n!<H|t(iQxl0586bKAAW?dG7D
z@<&|O7WZQ7n!z6}Uw8SV=juFz?pnIX9x*lqe@m~rWRGBza|k+a%wvy+>6#pZg%@3#
z>=qTevyL6(bAYxKuAzZZuhR^M4+R~Eu3Ern$DnUb$M*m?_UL%FY0*brb6msa5Io(s
z*s6yEolGA!M|WI@J(@J6K10&CGzM2gPra$LR=7}%H=bT>E$HBh7HhaiUXss}M72Ff
zJnELoA8|L+T6(_d&oV9l1^-qTHGlH}tZWlfeH8G*JXx>j=d<70bx98+5DcyYnzrY;
zmgma&-|d`zwhcnb06WpE-(v)P3F?mS+E$%r1QH2`g^rs>#D(rH9LFe}>;V&V_i^h{
z^Zu}=K68H4T?PuO6azFOD;-^DQ!%_cU9OJ;zNe5UA<HD1UC<s2Z-VL7BuvQFJid-b
z;fS2ylFqOTJ$4lgKhk2)j$gh!d;j6->5KQL9jxT#bQI2YZn{)`ul*d2S#iEwrz8tV
zy%t#-Dxsf6(_f#wKczHEdW6QUCGnU*rg1b}P8PSosgGWwqv}yPKpIgaO6GwcRM(1`
zo?U*5=F5e#YURh*ps-(rHPwHQ+m`ApXBdteGSOIgwOQn~%(M}_58Ack`{?4bmWu46
zzeWtT`Yb%wcpZzRK~U-;iM_kExx@l?Z6@YttIZ-O=`J0*fB5}pt)DQu2xoov+i^qB
zw}#Ge<c;^1vt)U770(ypXb)+=H>HN&8JI`$ESxQ(U@{n9UA3tL^)%2z|G+QE&HdFi
z%hms&)5t0osCLF5K<zU$^AAM34+1QcMKC91V>tQ7eIY+Mmt95)8Ao8`z~8H2zCc;@
z0y&~b0x|(HwD=U_Z}Mz+7lal}bipEo59jj$bQ*vFb~q$wArLZ{T!ip`0I%`Ov-s+E
zPRn+lbqSaTKKXOBNJ8>D3X^a~4t~Pa{{U}YCWA>Bj*<b``pb_;ygbG2eY~6xLx433
z$+w_LAfCzc{HGAN<1&qD5zeOx;*E({@h{P25@7kaV7x^`@-iBttB70=aHG|LoQ$Dm
zL%>ZIdHWLYy#bX5EIyDf^XPmDoR1*rw$_E9QIGsBUXo!jBk}nnh~UpGB*8)$A5&KK
zAf8|B0n9z9ZEx_^SAU9T!^slRf5b--RmwrVO9q#ZzWPddD56<Br49|CJpTE}G!C(}
z0_w#fp%<$n!Bx-4N5wV7vf=L?(wX%??nb*h-II!cu`pK_AqUlYWQQIsZm+^wFb&D+
z(O0MF2N};gr{sp>!Z6`Qlf`I%AN?dZ-5L;5nKDN2Vy4Y@#Add5!T>Vw(ZOlY=ov@k
zxJRD$$TLLplmxSjNl0h@V}h}UKnKJfeLd#F$W!3Z&?F6*mPX41C76%zY#1bqgVUo9
zT}}gF5s<_lkUlZQH1iazp-mgcNrx5(jg`ItJx{*q7vAK`aBbxo`e^8zo#XC+_4~KH
z+dGuFUEzeJ#g_}L|527;`+z6t0{A7OpV1wP7r~<?I2E7f;RIfGo&otdOdj0n*&s@e
zuRtfg={)V_DS@5PEyou7wiG*AJ>)2@p62@J?9u9IP^VN2Uiuzh&(C8R)=gZ+QAfob
zSnR04W4Z4nng$miX3?Uf_1H+^OxOeS1kk>uq*|NtI#FNOa3F$A;ItiH(rk=-E+zX=
zvdd4vY1vO=II!LlNCRW$UuX(nvlghiGpXUF!UxBza%-_m?v-zkA27eZ4U%CnDR8Ld
zBJ_(N>QCt317sz9P`OD;$mh2JJQ|9>z{wrV+rwWVgq~(n&y*DB8eB^IdpM5;q4JE3
zdHZ^1jo}#Qj9%0&GP9UVfV<kj%|MkIe!>ht9UMmRTYP@niF!YkRLJK>_|{#YQ(0&1
zQyL3qpwIsi9GWsH<Ec|fa!#EGvr#-nrv_i*Npn0V(;msQ@7@m6h>)nDM}ftiP&H~C
zPbTpvbc&#^l<cz}Kbc{MaB%q*cYl>2vcG>a8-+In*MWcvQ@$Ydq&_z;3entbb?Awd
z{g{bN^$tSm(QGg%rbad$jnIyv)^-Lof30SweTj&S!f}dFx*jcG0bhsn88F%Yf_+hF
zy}$n+*e{qL;U$d0gy1jsM$9a1+yFi}a>xO>IGcx8VX(-oYfbeW!tZGB70`v8xZ*X{
z%LfUnhyAI(f@+d-$$s@v5ky6DXNQdVlCtj9$ntJVklYv&4WXk*BEHs`%HUTzw${Uw
z54O~UShSb915tHnoEe?0!Y7xpbr8lT1Dq+K=L?J{aj-b*7-8S;vN-nCl7aGe4;vla
zgd-|*hZ9f`@FEe=LBLvD-oW1*&*?i>z78*6$0UZ5dCW*vTZ-yHrd3u=t4fOU_UWvB
zRj8H}EFfK0vC~N>2Bj*eB&)4vRpFN51)?UEscf(sBkBRK=vTK`fLGEp;lFqBr&qyG
zF)&;W46_Ks8rIfo6-)du>Uce`ri@<Fv@x&5Dy3T?zd5WcP*Q<am)|LA#5iq4SrNa4
z+6uoEl&y~2b5)Z3W*m+y`~QUJqOJ3<+5d;v`{ew8hUHqn`~QA}{y$y&kN5vYGxYh%
zHwWrKqg3B}l~Kr&Bf4<(mtY<RGuk)#B~9|_G8$fzWfCSNI8WjUpw1Pb2$cCS7(pq7
zG3Qr*vyX$X{sh`AbXx)7+40l&ouEg~yFw?T8jaA0&Y%+Zxm#vkpi-*$NG5F@eHG1+
zMaX#^Pe>6QGdpO}$0X>HG;KVFid64orxWzgyMtgp4{kf~7fNIBatiZm9uA`fXa6iZ
zOyd?hTNg@ACLO*h5eV~+LI&y)CgoVCQ>jST)DuviKv0BpP^E%-yqt}wKa#*40u0q)
zVJQ{~n{?QVzd{v=)QbqJ5Co{Pb*L>*0JK0$zv)o0LyDQ{?<Y-p89j)wK^k(W6wFvI
z8Ix-i1s6H%g!%@kH-boa1a$#0OciT~c?x!SBVp+vG_d&z^+2%tI%(CN)Yf8jJ%ATS
z9j#}8Le6~+yQGjwG68PQD8R(sN<y8_wCuRt=*l(h`xr*yWjq;$^Z)x!wf)HdeHV|9
zJIAUAp1vAfDn)>uBW5g9^e8oVO7m6u-P0=B62eLHj5#;~{q@aN3FRm~3YO*MlWB0l
zIGH&J)*DlFK<)1JNi_IM?#@-m&vMpspgj4C?+SYh6B^iFB)I~~5KN+9sBu6!aE?Ye
zMQ#`-4THF`*#|1+HMVtmlLkl!;3}hi)$zX0*d;_Axby|gKcGEy#28LD;Buc1CECdh
zqhl0?Bh7Nu!G)dlh9V3Oqm+V*$iM%MH69Y?rd0R%W~4-$;ZDbRx<?iSj!67U!U?De
zDQr%$U~+XCq%ugN)@><Zay3efcj<;;3O)aISLZG2=RQYd_mIdfzP~D)v{#l^ZRORL
zBo^Ox+%PKng3R3vO#O=)nCnf&Vy7zqsH|wo(Y4NqZ#iGDK4WJ_e-}AOpqviR^{4PJ
zZD6!<)uWorUcOK~i$$;fafPnfX2!462va4Z+L@ge%Pg@fDmdZ#gyQ}#Y}Dk7CcWY+
zPud31AEweJfxgy*4B(?~R#9q2*O&1H*jkIr=>wx`Rv$$#WH>rTXo@tL1~(}AP`thw
zzX#z8(s9Zbpi;#PSranBpW&w{36If$0utXmi^K7l2QGvOxE$8eA<2w2|0S^Xi<9qO
zygqw-!u4*B8`cXSYqFEQa*PoLoQd<$#I9$#a`OY@kpUi;Mv?Oa*yEF1f@3SeMx0DL
zlVEy23eZBlil`vN$aSh33KsPbS=s_jtd3Wlt}`fEpu!g{7iMPlmu@%a8G8cSN(`BL
zwWT+pnkjKxX$Z>nB1Tt0-V<;h4g-7*t-XqqB*OK)VK5tplL?!9KV6111|=AcqS?h>
zvOEW7hFWJAj3TI<<BH1hproPHXK*~AyA@!}iqVVV(E4IW3EllrN+w>KZi7%u@0OHq
zPrmQsSroUR+=fQ--H61r1ouy8zW1`TSo|qDUSI}L#80Cs%w?H#V<bBZ?(aW~C;0;E
z=Qmf(^CWO$-{|PeU{TyMll{E-6nBgsKV+PNMUID5EJ+iVF05sRcSl_5oGl5=a2Dan
z>xkMxq#O^G^}u5ODJ#&gDp4o|e9SS#onGA*c$Ts>1q1xdaqbIb=!QI?j@*LNEk^?e
z1V;n+m#vKk1(Tw&tD<47F-6u4rBd~TX-j2OZ2F~4)}>!_L-}|Or153h_68|EUvZ0r
zc`8!xL^@fJ2|80jz~WYY_=cfT(JOt0{%bxp%}CaNJGi8RGnmsDet2jk94{wyCPLgm
zx(-(I>B-Y*sZEmGFp=)SY1m1ibZEU!JP5CG`I_szO0jB(rm6u^P%TQA8f_p=(Kt&%
zl&`Z3C9rIfovkh_T)-tNUGMGbJsn-bWox*l#pvQ?>FR4~cD@u@y7*dJ0N05tm!FG`
z;479k{HH7uS1xrI0MHfU(go@A=8QVJ*~3-P-_KqAC$Ik|gUdf`W&KyPG}p+l|C*-p
z`}*&1^882N&~*LxMd0-ofxj*BkK#t~Td~GM5uAnfOOkiHGITYM0juN`m#Z(9kc$;m
z{jO}!DNO2$9;O-~kHz}0@{0OhQwQ?5`SqI~Jbes-B|y)bVSn&WRV{tg!DitFT>~Gq
z^D9@hMs+G@7kkRuhFfUW<Y`I!$cNOkT&mxREv^NJ>|wP#mIm1ut0rK5=(vn>3ZT%@
zBldQKj+h3O;UC|H@LlZu27473*=0F+FBdbUmnO54!rZRtc=`aA)+zk^FaS21C08-}
zLsO6EXI#Mqw!p|f;i=Sen((P9x*|W!!(=&G^xCgAumUZ8*UYJKxtwSG)(O7pFhIqn
zfK%%4f*jn}dMb9-HV*)<po=!0_A-Dzbh>T(yCr?~&r9FRhCC1FAXi38t#3d8&H3x2
zuksl)%Q$>@vMLsy21$4zN<YSfONFnQ3LcZE#Z!98&+|p6gPw<{sk_&}zzoZ2hk7As
zwrMo$qW>+Qp+}w;U;eK7{|D>;GzjSmn?GCZ|8#99um5Yt@B2T0llwmn48f_J<k=-K
z+*>@5@IA=TpGKG7|4b9d99#mJ#Q^^OXPp2q*+p4?fST`%eBAyBHB1{YdsQB!X2sv)
zs6NPo0$_Mp`nX$t?p0AX(^Ic7onX1sj;`TSjyU#u+OY>hZyYM;UbDwul~b>o59|;t
zGz3qsLX*_PbyyoMG#Q~opS4<Nb*3wmn;f>LLa%r_j~&kY=AW}~Y7Xbo141>2@fuGz
zS9W>R`DW$#a^{?I=d{ZfYYGRT;iJ}5jvdpS-IRCm6ftCiKA^wx)Nm;Wm14F5Ke3L@
zpF!<Y2J7q1r>Ubv`6jT+HkD=X=8jC~8k^<04C8KiEc(7U%xon}Oi;4Pbv_P5Q<q&!
z{h5Q1Dl{k3^#DVL;dZZ3yjn?Y@YWw=nd6%%!A|5Xel5;1;^qcHn@mQ%r{+ofubF=3
zYs=I|nUtTGt=UPI>ii1fH{N$W)$u$d{4?R71^?{a#*mvHxh?C&%+iq3m$fDsxA<tK
zeBCKyQL3_l8J|<6_f!||i<`2YnwcH?4BTzYeC&KE)*&v2o0{Uytvp0($xOSqBAX2?
z2@Dw|bo&Uv*@2=NZkt-lK~P(AOCID-3X%dEh`Lt+)sfB?dQTi4a#g`l&~l_Gd7*Q=
ztubGX&)-MP>@Q>VGx~0Ic1N8GJ#lQa%2kjg7%p!f(k0a)`gBM<ABA)Jb&iqv7EDaC
zrCGjO`~EM__<o+~Vg{fvt|vtzA%npnGd><uUlc`?O&`jqqYiIJS6N%V1IN>o`h8;<
zBQip{983;mnI1{~E`N!_PpTW_!xhXfM<t#_sB}SHI<hFf&YYw#ua+qF%B=-CH&U;{
zyCf>5uAiK6I$cHZ(2(!hX@mMcJ6d-D>No!7BZhgISgg#jGv%2p9a$wF*Id>QEBm~r
z)ZO)=mGBd2BZ8*`>X*q$2h64o?3c<6hVIi&wj)6&qyE%j^ivGv1ksxnBu(&*yRJar
zq=wam)64kc6b#81pd9{(h^N)Is)@;e>ob30;=idqdy!?60h_2(7U|sJ<i$2ocEzsW
z97AOH!j+wj&uw%=O_zOnk}s{E^ac<|CEMi8u(nho7!ThZfnv6}H-#$qUrh7|zd8ED
z%gWs{_Z$7eUnTm(OD@5(RcoEBS=8JGdcH(Gj=GW0UqR1uf(koBjUgd^s$)qMOQyr2
zK7vHCBo86cP^VQ=QihVCKft59QQ7;sc8+uHv?_N*<(fGhXOXokgNEc<*``LPtdZl6
z(KAF@C`e<Y;Dz#GNY0lR#L+8;SPo9egG{7kE*oZ?Ve_5aqKEV{LYpnfrI&G{8Ro03
zVPa`Stj@)`qN}iRuIMU!oGZEtBj?-No|E%UZO_X2mbT~Rd_&8axtx3RC0AnKLa(;x
z-+a;b44f~y3I{8i=IT6LD7*?67YeV!$A!YHaB{KD?RmM_<o4WLY;k*jE;hJ5M;FVk
z#M33c+@7n8h1>IWv1~bKm#x{ay#C8BbU2&G@nSpizcoWMj6(cxUH^Uk_qVwI`}@|<
zR;#}?$5`H@<E8Q#i+a=<<wYB7{y1mNtEWt%vSW4(O8R>WM;uwE9hSLAj`Me^6eUL|
z7X`QJpi1Rw4oZx|v5cB?97=&7U>?<t9!`K|rOfhDR#h!ldH#cPkAZSl!^i6+OF`Nd
zAdBZ#S{BT#fHJc+1LoJ?_jUcg|Ep^Lk7n~QS#9T6tNGv5o&5Z-*^d4D{QsNW|3$uw
z0+2v4c^i|1i>vs5mP42?!_o4nLSOhU2q)ybC0;Ic9xO*tXmA0q{%1T7!@+p|+tL)$
z6Cp#?iH=m&VKCT(|LIsvpqrBLEQu%A;XHeigq<EaS>UW6U5D>!7Cb&uwl9PzlQbRP
zXQv&x1|s5M;~IxtCJg7OAe_8@_u~D*dy0kF@p056^xm(YEciB44kxK2!mAm^lPS)A
zXA>-PcJibKDqZMY4eAn$AXL_p=TUHhE1WC0L=?={iCd+rT7q$2S~O-Az^pM%fGNiR
zUJLvrmeBcad>A5w-cCk`_WQTb`T3%p0oq~r_MHiKyc$uKjAj>|ihk~rf)%?05Z<jE
z2qBxIgRSn8Eugd&K)f3S9MKu4Z^-jE`y{!YPD4EEM3QhC^=ahJ2*j}Ln6j+VET1o@
znNuXHk4#IcAXi1iJ#i%pcA8nLnW0dBf$`WCc)#nHU#$Ljh3D<~pZ14;oAf`^a$M8R
z>3_PdyT9vy_oDyp{E_WldHd}<)bIAmtL0=7^<^MkLd`^s0dy79tI3wr^N=nlCv0|r
zLIVZ^k0gyi&0)Z}ez=ccgwAs{o}<F^$FEp{xAT~)W|#3ayb3NtH7{P{d>K#T^LQSQ
zKE}AuXYVSw4kr`}EB`E>EdnSX_NPHK*=PPvn(2RXMMaH@emSJ)kv%+*Y?HTGh<u+G
zN(;=GiWkq5efH}4G=f*Rpp%{lv-rA4{tr510#Ht7Duf<+ikn`@cavZ=4Q4%7`1^1&
zT`tI<gL$w_aEI}?@yCxbc^ZZvFXJ)j2a`F*a(=a3;Gwhc!)W#kxdtfjgJ2d4AaBDt
zD!H`Qqz6gR_Gqr}V}y~@n0w?j2_o|BG79Liiwh3rcoN=(5&64l#-D@bWfacH+bCFo
zD*Q3(k-tRr*6F9=`~uaAKgX9d@>dWBpW=8Vaaqg})W;;4jphjF%^cb0Z4BDxrv#xq
zyA0-dH}3xl=Ckm&M~>$~a!Er##-BKpcku{50SZ$*$@^#Ek&wI$qeT!8L{Biy92k*z
zL+7|Fxfb>O<qWsveE0eT;R}^{mA>=nySFbNbtS;g(=K@(&-!Rc0JqXi3CxqXV8kRb
zIS<JdSQD3&smJqpO4a{hxwwph6v+9@1n<51bQ%9Kjolej!{8G}>W1vm9QTr4%!4Vt
z8F&HA&aMVVP=JlZh3>1mq$ihi_~4o4w6et|SP-9ZJ3Ei^l~Q1qLXtI)mP63tfO{#J
z<?Iuf9Q4ZKm~JtLrt!Q<JYIZ)JShxDPL3z(un^;);^kxnMSosKG@MO1RY4`p`of4f
zDI6O625$u-v|aO18co0@-3$TVFIg0GXeXLYg8Bdgu$e?K?7~qW8_kK6@0CFcN=Sw!
zlTp(c&>e+fhD{P#WizW#!;gsz(DJ5Hf&ww2Cr`sESR<ShceeJ!;89bS^9?=GkAxFF
zv`=oiS*ef`O!oy+dXKhs6pe2gBzT+lW5)8WX*?LWSNJBmq`b-?VVQ3ckM|r85L^J7
zZfNNd7^#;if}rbg?3<z%4zfpLF0nhnzJULn#5BTKm&PtYt5g=2-IJ$+Vr=6MJOqCk
zUxjQ)L^vwIXn{e?;wy2vf~^17lhg0td^jb?um48=di?I)@$1vSJ)y=r9@oI_dlXm%
z=5YkDKVe|`*#ZrIJ~h60_w0Kpbo}(><;m&ape^LvlhfBP-oGc`zIjKE$=l<1rzg)o
zygYtK-hO!Z_Raej17PNm4v<Dxt7K2~9we7^z+J-Z1l>Ih@tkkYA5jgOfeGktqtrFQ
zDGC^#j8Rtg=suMLW!AlzEvZ*Rh@ibAz-ea>@%mFVMvH=+oxOVd?#1!Tv+vK&{wS{z
z$iDrf;*`Lin_V2Kuc8=UhQT!RGAVnv7=<^(@M@8HJ7nJLKNdVDnKvc%QaO@jv+>9P
z>bqca5z`1|N7xducz8&>w6&tG$A5WohMPUlp1*kd;k&<_y!dN2>>iYTF$*x<;`8u)
zc>xnCOsmO~o`RXv?ter-JOXBoXxPdb-8?!duj={VULU_Yd3N^x^!eHA55Sx7{PN`0
z$?1Ews_<qJ&hQ#~YKno*7J^!F#@c>>3e^5SQ{<0wA(#~z+V%%GyN1Dx&MP<;AC005
zsOC|RJfdOU&yvOMBz#0?f&?(2aY{qfBH8T58aYC_BbUW`bQ&FU#VT13D;$E&?GF|#
zPRMK|h7|RM%5fRZ#&~FHA!HK(e#ul@A+ib`^_Lg#-k-dAefH}3&mbVo>hCA7;k#4$
z{q6DTv+rNN_{)oz#Hh^l{`B3+>+i@TbKne&M;ycFA?g}5Rtz)-wFm5;B)XUdizVI-
zOs7sh=z$ObdcdadG3s7;0C$fLchNhYvonx1&(6-etSbaEC&|fdjHgvPU@gqXU3@I*
z3Y1cyrR<%|7NDD+JbRX+2J^*B+>Z*?g%h>HSMQ%;BfGVX7Y*kzPK|s_VI@@vetpJV
zA&}_O%7ln4W~E$H(IC;9J=D57eA12S{3qIkmka&`h9#SzsFCFR$jy|KX2TGxpTRWE
zd+V4yqq}p*A?+PDdg)Axp$1XApCW#eWIP`Zi&P?ms&vLbWSf=wgw4@%)pKUy_CC8B
zq0izS%1!SH=}cpog_3ZJmd>y{AW!4@JpLq<W9oNi2$4><%Hhq;*_^^VOW}1f6wN>7
zxK`Nghab*f{O}e8+p8Ba>ApOB_QMa_55Ra>;OuO8HCZP34<ixd0)b|}%z)|?8R+q2
zA`)0icQg%IAWNo<Jbwef-hX)e_RYIfkq-#<0q5%|<U$(7$Qh80#w@{ZgU+JvJ`B5q
z3Q2(3q6HPMKqlM~x*((Fbb6b<r*b8W2!$IZXG&SggdL51eMlhLKRToNM%|LaP|w+N
zRw&<5%i)SU={9phcmrzDFrva3<Q?#0Qr*iU^BK!b)#Y4AciQwcb{La4R79SgVYk_B
zIiL*-cugz-h`l^{c5-_5?9FQ^bPCc|C01JG+jnOl-oH3Id42lo_=mI8%#80?{qu;P
zCJS-iXQw~<LajSb=e~24yR7#Hmt515W1?EqpzSJctb9rHc1zotg#`_L{(_Q^sv~&Z
z40vP_zo$A>r~9N0jOWLv$3&ioX6jc8?@6iHt;VKpjhT*-DjXx`k)URWb`_sB$pKN5
zs7ZCqp4OvGK`T|2$k8M^KdMzVFz_}%Xffw!*K$MaWtb#rW5Bqg*LYQ^lh5CLc>40i
z*;CNx&Oivg`}XCVzmmS5sq7R?(hJm{@Z#Mun$}D~#tCDzq$E$rADu$j^}Rp@ru3|(
zO@YaYI@y@=b1k~S^2u6N9AZns&cm^zw2lhZgpTR}gI95iejC6HI}fiW!H`<xOl1@D
z?-#tt;-*J-_3l2+-U+(wiZlS(pM>b|qers1D%vAcYUpDd&)HlHvKh%nZKoNG1|iW<
zPekrvV9&c$uR;?Q%R=2fjG(RxY5E<6&XcseGkBhjp(#{Gt0EeNDHKAZ3K$i{(b?Z$
zynEBl3x|wemSZLmt*=hrzdvE(>>a?8ngzAn#8|8^rZO_F8z)ylaM}R!FMP*b!Hl!;
z&DA`*28*6MNBAr`y?TOqsbgWdugt06;he}imQZs6*>{Qy_>YNnLUV5hyFrcz{+eE$
z&B3@>&?9S$GY$jab?9XIFXeiUj>z2YP<%24c%^4pX@?%7VmIqlREkJpttt$BM{89p
zE!4zvJ*gW6zbcfW4qntP@L(Z`bb7v4nOFsVk?vAF^Ca6J$P$mqvf}F)+EMlG7yQ;M
zVwh3@|Ip*_SZ;s6*wS9wN3=uB^U@BhK;q}mj|7*UC0N`e63U6O947aO#B!FfldKdQ
z`U@9?mc4tYgj)CKr-X{g=RhJ)h<opj0s{ApL}~qZj6_jNiy4qw!+G0qAoC}e0p2M9
zn)Mv)Yq~O$ppF*MT^XY=yrS;i9!X;6PUB$Fd-l!p8&;mWsS@17MOB!`Y;BgAW@U07
zaX}K%%U2}^;{fLWv@w}NL&&s)(YDqmC-%xYF5iISqix#7iaGIEVs&rmFogb}GSyNY
zjGhgz^ctjXcnsP;ZpVw~haLXrF}cpQQ+}Wma;kB0ws}J#q|-iX%kpWrNBTXo(<6JJ
z?_vT{2M0ZJbR@Iv9Kt8~*N1<5hducobM?p}ygh(_M~6z4hgEIJDi^tilx0LcqeZq~
z+GYE>O*VDLY|Pv?Us3L_SfqWq6|-k1gLc4M<!qIS%ZjFK*)c3~mju$q;%AGaA|Rqd
z62};L?#JYpj9lI=M`pJ*`|h^n-`$oByxWq4cgt9K*_gX^#uDYFeVKOISAZ;YwE-#N
zuH4LKjQXqA)L*xzomFevS+}OWRcqR7Q_~-na7tV`k<{;W*@vp$*(K6a%>c;cfSNKx
zx!=$MjYGNLa)|a@4$=NfL$u#^i1ybQqW#r|Xutgs?H7h<Ww-Zp-9Da2xSU#%x2lcj
zJ(BIupt}w{4vOTlT@jv&mi*ZOULMq@@HjJNK~}&kbZx(B7-Ek6Fx+X7PL0EGr{yr*
zX*mpcR#NtM+G=|{Yp8oWt0`hT?KQETf+{9EWUoPpH+INg%MRIV*&%x?b;w@Z4%u6y
zL-tndkiGUDvRA4b*A3adEsMOpEsMOpEsMOpEsMOp6-AyHAO{DBY>|j=X~&gmcGK<o
zEiwLgS>C%)X7x$9q)^NJtH91FV0(K<N4$N{qieQTzpiDFd-ax;6l$4&71)KA5gL5p
zS-OZR%5d)o^<!m!7V%U0vQxFZ(v=*;aiUn}0lkzjJ9#(Jcdy=_y?g!q?*%|vnsPuN
zB&7C$QiWT^XbnImDm-E&AhicXcUtCY^sRhZ1?Y7F5Ffu;K+omN_JGvsC8d#s)DBQ)
z8I^Pz_I~hf0dhBtV8}976xuNPmskp9%vrYf_iGhgYN$2A#o(!oUzLT+dblMVUxqJ(
zxdOf=d|L{)0&pc<$;NeXMWzlPb15~N;ifp3;rn52d{csy!z~9~3Abbvmcgataus|_
z24xvsHn>*6w`7c#@<RpSUrT)8xK&^te3FYD`6UuO6oGrHqIo!6Om1<v2R$l)W#}Tv
zPP)OE0mWUw@cwEsms=I!S=vVYb21-dj`VY09+l){&x&2FE*RlHI)U^eCVZO|+h;;I
zlSE_CRc07q0C!Sj8|c!-xdbv}du94ZVq2cr;g1J>Z=&gPN`mVkn$TTn49XwB%J0jP
zd%NhlJ_?bqldQZ*l9Q_0y;h*66gFEKJuBU8Rf<%J)0l0<PAkiD2+hh*6@ZqpTwE;|
zo!rZ$q(fc>K)G+Tj8UeJCO^+f-m+3?gujtabsBAw#%|gIh!S&XMPS48jq#!3oC+X{
zsC`@!?p6FLPwgOYEuC)_CYr$8gjj0sZ4LT1fEF>_CW&jHZ%&#qUVPDxbE|>0hfxJ#
z6iv6lq<oKA2J&<{IWv~gmWEbk4M|B2n*tt1*LRFMtzmQ27i2iSDl$RP%k^MA7foW&
zA2;M9fMIL_#`zXt=-P%baNjD~9EPz47#p_*>2b9=9#ef|S%o*{ZKyd!vkf6Ef=wYL
z;c!C;!(?L!@ftc03JW0M8FqsxhI_{O<1jDv*O-g|^v2^R030_50{y%dAcY}(n<vxF
zs!*Zxwm_tCxH%AD#MVG+hxO&gxZnkb8v}v<+5#rwaC0C)*)0Hx7nfV;WbB?TjLFMw
z45!O2G-q+SjnQ+tjnQ+t`RKX4y^0rkFgJN2bGtI!aWFhvMx;|}VdwUpuBr_%$L_{W
zzX+z&fRN3Hy#TbSrZnS#Hic7-4^nOiJLGCbn7@RJfNX(Q;e1TC1ZTJnIOk!Kgp;4N
zEy2;Z1ZPG%biB;kDn<41ZnxCF+by;4c5Ce$vk_e61HAy*D7-OOt}MB~%7TmO3YD#<
zVl2x*gZgs*j~}c3OEW4y9@BGt%cP~ge=#fWq(P4{TfIoo10T=HuFemn(pX%^z#3@e
zJqA@7N5f<-f4G8!{6Qt+jA@HxBMYQb@J2?u0#pE1{<9(j#-oY~%BJkqE5JD!-Qyd#
zAYB?bK7D!o^qWM6y&xF+>^hi)vqe&e$aGDm1S@?9bwxC4S1`uFSR=l>qq;sM?E3Y+
zibqR~rjV~qJyYL?g)`^p%blfcZm;Z{)kI9xz^f}kTe4&Oq;rAA;F$(_JL3`~EGHi4
z5)4Tvieb<Vu{PG};_4p05kN$);Uo;^>@$5_huC^Ou`fPzgBtP5Y$!^?a8!Ek>BB6-
zRlmt?CjJ|T!DEr~J5F$)P&6a|(0clhEZ|W(j>CC^;V}EBp8!fo!_>3zUbtn7w{DP$
z^Kuo;gDDNHi7T`q@UJM6JrWKs2Kyq)gKR~25lzF+>kltqicZ0$UK$N%DI!9oF{RkS
zcIwn#%;U+RMwGC!rNol;yuX^@GBGd0BKJVX7$62icDXY>KPcjmV37YLZSFfhohiBL
zH2^eV*CNJnQEEvf@@!BmjmBk)7JHl(PfrF?n8KS;Jo|=*a<~X-%zhe+oGz(j7{s$-
zFuV*oDZ}e<j-fOG@Q8f6#Gt4c8<B>(On4NGWYHftFYU6ccc~Lz?rB=z#ADE|B01S@
z0b016uweIFfi?^l!33RrEuqQY5t`3#G#is>Jf^!9jt(^z0P7R-bRRx7)el)virjXX
zTlHd=KSQ3W^bK9g-y<h8_>%#hTiusiCJW2{1?4D5BOdT82k7Xill>4W3RiY7_6Z*#
z??ODxHfpn<V2+L7)XNn{6Vj<~TUH61#PNKU0yI3vDg_pEX!=Su03FU~LpC;uDJa^+
z?SP_<-wr5BmW@H7h^|)BSJak;nyGG>UDK6TQ_0WEnB$Aa*be!*M>g`;(WrmPFr0t@
z3uDk*TMV^Xc(Yi<pMrS{J-Mzq2q!qlICup(O~uYb3}sYWOt$MT^)9Z3oL4D?3=v_)
z9#A{hsJCMeD(qOJPA_F>7<A%EqF+Lck`VpE^wIF<3X<@<R24j-ssdi`gKAYc&-ORn
zhuevgSupFCjgDOZi0l~RafR$Gf@HTed$9mu!<-duphpRYf@+<aI|s@US>Ise(B~n$
zDYpcAp<mvIi#^<$8HxV6n&Skzh{7ZjLP?38#G)ths7dNhN}iR*!jPU1k+o4(A%~<S
zeuGrTgV{xx#*|_^CNm+Yltvv-q5y-1p`MxI+Dnn?l0J-U8TC!X3yUUrV-f|OIi)V5
z&C;E#<6s!(NW%y$+WS@@NfjiSQkwjZ<Nd0#*_jG5uA~dKf|3o9(4ori)ZM*4#=z~7
z*TL%^rnKyFj<lCfKqqAPIlgBB&vy0v*^e~yYr4i<{r&Ga7}^4(S!CuF$t2qS;^r!z
zg|mgIgo`6A15cflqBO`GeV58C^((vcXHMCTVYO(@tV$ir^BL}U#Q>hGLs6t=coPg4
zYD7R9p@!}!&+rHx`eQI8K|Mdi7=9hTefx$jy<{ynG3qZS@j1O&D?8?SytK?f=3_Y%
znWd=qw=9h<t<e(Fcx|kMST|0=bnPJ2Tt)e8!A+}dK&Q44F>6bx!OdBFJS(B5BR-KW
z&rh*6*%4|!yX^ZHV^9eq47r$m3a+kDd}8SAPjSWtkz^$#h{M$FlAXJ0KO?A4W?zV+
zkkziC+;q0AwT`EStx_u}soD64K(6nX(_qFcn%uH*s3_F0F~lnL3lClWez6=yaq5<N
zyIjya16fpZdhNw5Oe9#DPYq{&cp!tV9@U52d_vw|f<nriMe%HM+X%0RAyxmO{WxIZ
zDd`-%JVHTHfL93JsG!T^t)GV%(QL+8M}GT9h7?TW13jO*_P&iLV66Q3&u8&;8qeH+
zz72*SgNrcvrwFV6&tSQTfmH`1A=v67#>zS$y`Kj2#Un6PSOD~dj6w{{#SV)On6{bF
zn9f4MQKLHZ$i1U*5-RcYDMvHqJUwHA?C~pCv$yp_RrVJm{A(hjai-G#Oomb2XV-cR
znPQTLjQy7qmNS1E<J09WXbAGW&ZDSqHj$S^J$i!YckZOA*`Sz8P--5(ngtRarn&aX
z5EyxZH~U-(hcWYoiaty*ST`s-8uVR89}|NBWBL9|iDrsLjZS(Ts8<tsEY(>G@m^B^
zsf4SJg^av|xymuj$NOwRScWF0NG=DNBTEIGsh3ce&E@X3h@95ks>Wz)<;|8s)2RC~
z*Xm*vbd};_tz#N|3_;g|A^eB}c|<N_4DddS#W^z?oo~26xrEX7)Ed~7mGS|FFxW!J
zK9(&l2_0A3*e#)=#IN1K`(hY&sm)i<er16MQ$(Y2N4}yFdeb1$ROU>>>G0|{(+U8E
z-hp_`-Yz6>igSlDnz#b2a*$@2UV^G-br##SAx`11+Zwwp8gIkMO%>)YEs|*u@-w=3
z_E9x~iFt^A$8K&nME{7NN~QCBYL$0$LuZ^B^xGNc^#tG{hbzr|s$w;XV%3aDg>Io&
zGseQEi2r<Ni2r<-h^GOCTVmdXx^z5M$A&*5eVuX|2!o8R30TyWV_vGfbRShw2S#No
z85Wmw3?g6Dh&o;MLV{+fS<lyIw9iF!UYT`BGw)1BcgZ|$qd{+{Ez6(ovMu-0gyg6D
z>yEqZiRfOM@x`{_&e`ZrvHxNbm+`9esEfGIZ5q+074S{X1T}K#TWm}|+Vy?6X)vVk
zx*N8QIzdDO^Ue@*!_aLNM?2!mXyH~Y<(3`UBvun$SuLaV|0@{vtU%UO`jxMSQVS4w
zM5Q;MC_YUYym%U1L__s@6jAP9x$DE;e|z%!_yxN)7khY!SM#DrG(<0KETepLXA?A)
z`e@KWM{DF)8M3RS(r{ULvnO6aqqtkAF&_4z8@Kr8IJzpuuH}xxg!V1Ht~<N|iX`l^
zAl@Kgx=93vvAESu)br-~n|<;;PUFvyc?hI1P=a&~DnmOT&o7pER}aSbI^yB+krHT4
z(ECmP^{eEww0K!gxuTZ&`4@gdeiG|fkbLs%la>H~!Kl;Q1o-oI0V+!4(N+NFVQmbs
zpoSc63m}KJ9U7{@HU*f6wH*xc*U_DH3Wr<?P)NRAay%ME?0UkIk^JrwFVW2eXOQ27
z@Y~;4(ZCY>`}oMPPJ>2gy_b_qxm^#6JSOwYr2DV*QY9+9Ay1d=9^$tr(g{<7<;#_^
zeA$5IOGHUQQD9w$<(y+VQL)5;*5v*2e8G3y-Z6Fi>r%IWpVaNILETa;F-~zV-sc??
zcxPP#@7yPWch+U!og(}0$$+DGOxnG5NxOHSq}^MWclQdsyVa#KJPP&4uNs4gRmBZ!
zx)i4haBVQMb%EXqk|OT(7U6CHlJ;3}p5&OaGE`M2bVeKCTV<}R_*0Gw@TM^mT2-X4
zmB?LF(Ott=+2$_m*`SKbN-~|$DoWX!tywLY+?ti#OCxwU++{u6tfIN=Ya;P&dFa$s
zSsU|j`_<iZ!?rISYHGYcIngd+<EAaVU)H=ouDln9yjNblb2eNzXz11^t-E5s`w@tD
zE(h;T{N0b#yFX657bd%N{<=%nx(}|pANiHn?(TAk<m<Y7aamd8eN=XDty;H^)++z@
z`Qct!;l8-wJ{VwoeSZfby_+iCUX1T`YT7X9_u5r%y>Z(=osle}DcdA4yu?O?S4lL%
zlM}du;4&l^(RDaePs}JWS(0fmnS{_hC`6Lw@RHqlq7O8(OW%Ech>6@T2T|1-ud1`c
zRq4_f*?_`Liogx&%rWi-v?JfhZWCsu=i%|P7XX&vWhUe?!NY6(NSUD790PITHTIHh
zJz5O1LQdQZ@l6tp!*9qm4CpB%oyd=)IK=(^3nDgxvXa3~O-c4qmRw@s1XeWHJX*!7
z^v;v$W(^p$5#KzI=8M~JFicrMPa@N^s6^i<{|@@+|0V~B;J^MI{JZz>^MC(JcCedd
z_5AGj3oQHX@|Rz3TU>3BeF4y#P%-LI>15w^H$W}L<7u?Oz%+SG=trdWhshNlRSf8{
zpmTAHP(VW+o!`nF=g};f-|{0w$)FT9%-DixD4ICf?j0SHqIUwNXWd}{eS_PqzrmeH
zBX~Py@!0V29ya?r8ioY-M+XB&7ee^KzC#*gro-~?OyY}w7(aG&8a9CC<4_KHXqj`7
zP1k<x&`1ypdkcnUWemFU8;LPe20?lf6+as+t1=kQ?}TK)cRlQroNCa4SKU@2LvqEU
zS-WCeZ^#M88^A4AeC>Oi;J1$K8?j>y#@}&*dcu?*z;8SYm5)oIoG~IDJhz0?(&Ovw
zL8Fw17FSf9s^@`7_TH{^lcXqfT`Y4wD3p==6`=}1;rgm84<?muGps$ZbbVDgW2Ele
z%;wFV$|Eh#>$Exy_G{IduoZe~OfL;Cp3WO*o6rjBHBPUsfaxz4nEvwMF#Wj#)1Mz0
zrrE2Yi}>bFseFEfv$(E*py_(k15MYxhO>v;I^)z(k$UXuE(u?DyLZWn-spkxOYVa4
zFA3x9xu;Uxe|m`Gq!#7mL84rV?L%XZF9uP5;wMFwpRk|@-<7}+l~H%`=mWi0Rf$#t
z&mtmNG0=+Czsk-Xr9r1#9@VJ<nmhlqD$F$S42H!01h#CFET<uBEZ*!&O}<YdjW?J!
z9plUdH0<tmFpu!&*n#LrezHG}5~gUZi~Py@VlWA*6l>)0FnjK3L61<e%Vw+Csat-+
zKd%BWS{&AItMaM`gBIP*nKj#^zw_)D&$H<fxi6P@OLj#g3TKO>X1*qBw~HW$y!2!8
zOUmJD#n6$z4A|kj^q|RB9SZ89m4+tl)_4(U*94<cF@8vDV6SFz-_>lmdsr1egi(A~
zxQ#o@dqU%vk}xJf%B%^UCEc?3xoP)v1=2r1K%_}o2#L?u?<5yYBx5$Oa_Nnv`DksL
z|0y+yOR2Cz0FO;ssw5sm343Kt!-q8}vY{BhFyw2go;+a0zeJpK;ZwTlq1B3!Scgs)
z6Ry&%H|i9W?hBKCIZYIzD46u`1(P;3xl>GbR-wtxy<pOYCU=U--YPWNyBAE_&}1`A
zDz+%)2Xm*(z6&B`Pfk*Z-6z%Xs*aVHY!TpMR@|l10ZWcu_(E5Gvle2Y#!}3ikLT(~
zE_9_0F&wB?M36Td&R4V}I5~!GrSxv6!WaWVsONyxcg!vgL#|FvG~bQrlbq(IXQ!5%
zmI(LzRS5T2M7ZBy0pWf%!dtdcbEp5?%JC{QHCdils|MGr1Z%x3MWNrYzPdcGGY+pm
zJJLT_ApP^hMY>|uH9y;5-wQIOE2=A-hDh<z+7vIChB+eKy=mBAVVx72T4!B-ZtkjI
z@_gXP*HS%sz=%IUoa?7wuE^!5-(`0Cr3lZ$3lWBVjVR*^mdiQ}t777O7S#~v?fX#;
zYiYoRb8#<kuz|SdYL=EKMut)u1{hI#++^r+UP6+#d~lPM`4Xk`Rw&)z^bnLmWV1wT
z>Xx3oDW&tegj+^;K)Kc1_uFgQo%LAqS)27`wP?$xS1~kLqsNFem|xT7<bj)CJN4!l
z9ua$Z=GRWW`GrXT{BV)3HNST5$NbvauK9%&-=g`oQ*M6UIWd}zFKp-ov|n~w8D9?_
z`C6(c4;b+Wh;!*Qzw;1G8=MpG*tFTQVxWe!Y`b7mr4fvx>j$n8?A2=oc&_^4X#{)q
z8UZ5x^TS2DRwLNEAB|vdyBYyfe2W^v-u-F>d+jv>Z0G}=RQFnG1P>kgTB;`x81V;)
zbE!tKcPAQw+$&w76r_8p6}<pw#?R>T+`Aj6T=~73%ZN7jp1nMKTldR7#Ma$qza^iw
zWxvTreU2rDaypFefDC1;3|h-ohP~!hhAkU9?=PeAB1y06A^n_7bWa#U82jWZ{uIvf
zkd4teFCHAU-})>9VLh}RttD+(3;hIxZw$|dL9!^h5vQ~4e(vVg_e}DTduA=H)eiam
z{cv>DeyE7BY7ebLO4_g%`VVxdq|K}EndBk&%vxA~o}p5zKO7tsL;F>Yn~#M(qe4nw
z_da&FR9jO;4b1$pyCEV?txdMk+9vqr4AQb~M@O4<sgNdZ8(CdjQzgkZDrw-Gv^Cq|
zn`%Uw8rs;U7$A*t`JF^IEwOK|GkUv#pV8d&uJn&R39oWDf#Bn9MoVpneX=q3cMx^8
z5p@Cou2A1Tu?5(TS!J7M*E<>=HJ&M>HQ@~T*W-7uPhNkwj|uwX?IgsQ2eWt)vOufj
zcruAUVI)!-w~_^iU^aFCb?}-5^N?IDgLyDpgwWxOn0$)oAK8_$(TtqDc=3WvgT*Bc
zxRBNpC9`1GIq#A~LOSOdtnR#<|L`?@IK&T-g~wh1h(nB`dmaK(T?y?e^lq@^SJ#fo
zZM>wpF)~0jK^yXlefxA74KK+#blpe7@4ux8(hvesFdK%X6Ams0q(2^mIO<cxq0aA~
zJ?pa2lo&6AIwvWf^q$->7?IAi-CbSp4it8ykvRpu$q^xqo>1k&O+5C*nSy&jA<rTN
zj>@|jdU=;8(o!ScC+dZuzpjo|iJMZz>TA~i7boAnczyQz&FR_k*{kDc@7|D3&?S!_
zbF7Cjf?)g>Q0_v_c^o%Jl;M~s-@`#vA!J!Uw?{O&V?=W}K3qgI7aFXAs1Pq1q$i_#
zq}RYRhvIhd1fkP}XBDQ0TaiUHpz-Pg>DSe<x@%c<Sjv&V@+;Wa#C271#3sJ(rYNt9
z?_Eo!ts#2XQfa#sy(g(8kzNDO`;<y5o)wrLY^Whk!X#N!J*uJK!DrKu4py5c4?dfQ
zv^_-cSQc#$(L0t!Tc+s!$fAQr4e8)PXh;VwG^B&W&1KP=>QNp2KAVPgu-fE!aQN9|
z(bf>XYgx24MDJP_ZI`0=B#RpLqJsya7ag?Fiw-xGMeEC<x_&<VY_jNZHHJL=Y_e#3
zh~BX*+8&~JEQ_{G(fg4_hmEr6@IlC;!xpmW>$PRk*PVc&@}!y$U#~AywgkOG5Nrm$
zY4mOey=lm80r#45vKjOy?#@Eb!e{<68n7-0KECRzk=q#GL*BD1nmN9znfQJ!R1(d`
ztt)}N>s2zwz~EuGT}$W{a8S=ml|;!e;XH2H4$Mj$vSJnd^0aoOT7(yLJ*%mTwmWpq
zp2l|QW^tN_7t2X7Z-W!hT82~i$**dZrY&!aJ-xjO#kG0iJa;8x_DAf4TEtJ;$4++S
z6H*$!kf{Bdi#Jat|C8KIN(X*b)i2q{PBxf+ZPgd_5|3QfJytNsOj%i<tNH`0d)2C!
z?W!J6;$TtL>TlV{RjVFP7Hz5)+tf>|e$GCwTJ?yuYPBlli+#=NUbX7UsD0I0UQ?=G
zudkDz)?hRGOJy@!_sVS6P_?Xle_!km$+S7$G?HP2f2C1KpC~Qh`B`06KopEu1|nKf
z4Me@hVK@nrgq)_=Hz=2KkVX8SK6Fm;wh6X_B2LcuZ6ckWakrCs`M8_zwV@wk1_*wK
z!IZ>F!ne-A6FE2_WBj$qZl{4y1A0w{*jBURQIFDwq&&hKRB{2yIG#^~1t*(q23~wZ
zcJLaC*T7y+l(uou@p7SF{XxEFa0Wo(#Vzzg@iq_0^)+L-Ah`k<i*e_1(&P1~mxScx
z{Q;3DS#t0wL6-|aQNgQ|WG`)$o1Mn<Fr-x^>Lnq+=B@$}ok1JNx<SsPZRm?i#jIDj
z44n<gEQN6@ee?XyK3>B?!kY!u1LK=fgCtksFdFl7CZsd&k+3JPuHoQ<ghZ&8(u-gb
z<treD&}3O1o$ELnb<dxuDJ=YCEe5M#Z-#Qp46{Xg(QRi&m(#lBzsZd36P@hluBvTw
zvka#RjG0IOHF$)7IedKc?)Al^C%NvE-NAV)Nv|B`N02g1-XlA+Jf}y$gwSuvB!u3a
z;h*%9)9~gBKEdziQIRUz@YpRK60Fe|c<*@k2nfZS@M!mBHh!dVD&n2faYWX>lJ(bo
z?M#iL)yo2!$SpEagM;OfnO+C4Q^F=hz0noviNxxW$3O;pE{q=I5)%k3Lup!9MmadE
z^=92H1c_ks^&yr+ufU8y7YT_yM5rW~!&7#Q)9y>Z%`zXM0{Z>n5Pw$-ni?6x=yM45
z<X^ISRrXP2j_lOhnr3-%l_o%dyb3;s<Z?L;W+c1~`{Q{8Qe<+=N6Z2_5(fDd%q<b9
zMQW=_2YTU1L#nD<ni=08;m7VlJw>o<{-OPNc=%2Kn{M_~E+<F!T{w-eLxK#MV5M`4
z-*_HR$vnEagdsH>6$`Q(xNy3M9^6Hg`udY%wojK4>YO`}5irue!G^)}QL>9o!N-2T
zFfs6E&@wz@$%N8_54(D|q|)4&tS-Cz&ET8e%!gh2p@(>hM3+@kMmqty45$h(kL0h}
zWP1Gaq4rc?Z{1gzwSwX;`l_<GD*8(4sg?Sv*h?jSqzrIG^IzBpkn=zz|Nb{V93e5k
z%+c4fKP&puXza=IQYC$8h#oZB_n(pLJ>!cWU#)tgydRpRUwN65UXT)R1!<R;bb9Rw
zQfJ0-gvSV2!NTzA_i->bb{FKNGbJO;?jd;;?mpVZNjg>g4}?}oC(a&i@+cg(AR2|Y
zCb(ZK^nw~yC1c*C@}`jCQF=W#3Fq^8{<WfCX3pGi-Mx6S_a<v<#mvl5PH$36r<45{
zCwcp-&QeKz4Z@D!-ycsG<4JIl5cc~JKo00*r_2)RbXc~>3WVe`{&a<i{5KdXyN|%$
zdGue8x>D699e^nB<Lt1Fh6)3wz=7TIPcA#Pc6J^Wc&Y*$>K+_N;bcUerpkxt1`N&r
zCf|&|A^SxRMMH4~Xvq+C&IQ`42r{e!`OU>Qf)ZINsTg<fcEZ%+D0M*8yAY_ECmji;
zXvhJAPNRxLx(y{$3pLHBI4P9D6jKYe;GAd}r@!XhLb-!sIkr#FsOHB*ey#JmaH{co
zz=u+tbjxaq^0gctb<_{!ag_AR;7ivAJJ+;a?C)QOH_Ts+chj;vd>NB1Vv+zlSM>VY
zqCvfw3Nt%O$kCCK&fGJ5orzNdnc7}Dn{~pahpo3P)$r#jx-9wxBzZKYhx(`wW6saJ
z$!ze0E)s&~>@bWb9hPxt5?}no__3o?PD}GjR|}9go_EegR$cqCV`LnI={>`&jDdNn
z$7+;zU1qKG<(uz}GhGAV=D;*`-|#fw@;ujc3@h(*k<b~7P|V4Q>_{MmleyGMmT?9N
z{LZWK(X}%J@X-dQrfY`p`?lklzNhPc9w7kFP+^22=L{DLpnF%B>b?&bd*6i%!qz>>
z)f|V;A_|jCtI^$4SmG-jBNvXL!unoAbORU@?;|KjYk~*G5BZLJ5MSkvhq94`nd`3c
z?O+DFCH23F4zrsE?=ld#5ZwTgRz~YvW&X=f?@BK>c@Z0kmDYik%TznEQI4gK*$m|r
zDBGq`NefP9W7*rqZbb)V205DTZZLU`r-F(Y;9(&ps?*)n{dX{@a%5O>F+PJcT4$wX
z%S5pfb5JHs2zo<Pu9OoFX9RYbIwgY`uPOZP-LE7hsyYGEQBpCS>V{GdI4&lr<QGM<
zm)X~zO0OJ0v~2#|c`YS!&1OD&wbk&3bh?%8h0pcPRRvQb;H9epDP+ueBRt=U3Ys&=
z%2J><HUKD)06#mM-1H<3b_n&hXI7%fVU0qR4;paWxuHziawO@wk;KqL9Bx$=?Is`F
znfJ3A>J}+{nC+q4;!KzshDsg=SIfu8<s`kWBk5#RB<Xc8v-DFHTS4l-a;*;3)>*tD
z(ex^wF9Ir^@Z|odkOm|8X-Rj*4FN_%)!oYw!N)VOsYvG#o`a!S)Pvci+Y~B;<OwUf
ze+#h+lVLD9pS7h6hsbDoP4Ot7i^((?UPAILyh@@;Jj;m?#j4n$<Viqzo`iq`<R{5;
zc*#OP;L;+#`T7tZclNTQyifxTo~-v+4K4A?jj^_f-K2=!NW^f({Dzv11*?(cJPU6Y
z!FhZgcKSD60C7|Au}QN7W?*I9q}HU2=Mv4X1KedszDa^HSnAU-m?6{YM1CB_A>&to
zGzZ1`Q-LTnUtJ$T4j8ujo&ox{@xLKU8vom&Pko|y3%!%=4YZjvf*Z<px_w=ZOcUJH
zw1j<>ErD@Z(G;4owkZJ$@{SH@Ll1U&g8~q**r8B8XA#0#B;T?;qXC>eQ!xfCwmG=z
z<k^z%5qyD1BtA^8lE%%=Y##!E!rX<C8Js7b05D=FJMwsS=&~%$gRzI<?>Q*bWJiFj
z!R_1SFTdQjKrZ_tY(z2KLal`=GNl8$EL~0t9aXw$R;0<D*^)F$3)MQwcsZk2;3k7S
z6A$C7TWYrzGyuL0O1!x((ZyC$1i+Ln25Y^kzUYeElXGFdR!vYm>h8ttJX#bc_#UZ<
zJ_KOPm_fj)l+_vInrFVLx39Btuj0|TYzkzLvjWw|>*)ncsSPB-H017tj9L{8NoTB5
zcD(9!88rK14y@Ig)kJ{G5q?~w7t^3iXY*io5z_0Varaz}0igDUQdCuz7a1#{QXgNQ
z&(H-tiGGn4*VKy{=2Amf%2wCBin=~;Wscx%$TnnE+az@6YqCQqpjsU)RgV#1sUlX2
zA<TY_7?#XaMGO&8TMPkUdkmx5SxY*emC>>WYMZvRe4W<RY=<k$w+hsD{glOZ%X8JM
z0A$q^_Yr~2`&P_W+pXqLAjOapd>npwef;ikXRnS=zd!r-!|P|KXoEg|^Zaj}akp&g
z=AY`q3%6{mrZdHRSOEYs{pnBRtgTs`+UA+#fB!5f?YB=VZLOc@g`+7qsxWPux>I>o
zNhhqMxY;|8#^N5(ShPT+G7NMEpQ8IBqmuv}JpY>OHri3`-2-LV&>K$LOm?(zLlD#J
zZsn|3g-CieOf3@V#JN5?)VdoXl;4Dtwd1-2;Rk+#3eW4{b-N~ff!NREcu^c34e>-*
z^uD!_Q3{7sj2fiU#})Z~6ir)LbK7fR{k0T;%<YQz;xx4uyJ$GQLW3^f&L;Cw98;!7
zVfL<!L8Chvbo{enoOA?uWifqNSiN7{e9Kxqti~jRYWAL?>jL@O$|5;qWo~dQ8`9!X
zk~)gzjb_Sd%^rfPuCS>tvSNk*zr8Q*Zrex}ozFVw{fAEGJU>vTDC~=5Yp!A|j?VRV
zC3(iVd3ih#36gL~f+0wYOs@a?tLg=4tOTjWNk)hxl8x@_>gw8ht1hpycP&-9y*VUJ
ztRbzVw&4Tdwfn|`R#A%u%~6X*i_|@LgT*)+lKZ9wtz#1w`U7C0Z^S}V?;i_It;GTt
zZSI>Ew2n<!=nsH}z7dOi?-5!@Ef%2f@4ZK89rfxOhQ9ZW1+AkNi-mvxSkOA^X%Y19
zpB7k0JzvE4j|Ht`19vVSfIAnPtQlqCd$y@qM?Dslf$td$tfL+a%E0%G1=g{_+X7p2
zeOo-lA~YILx^S}sXRjn!WgQ+qWgn%+9dC>WZMLEve$Gz~mBxq4@dgU?-C%)?xFZ%+
zpzj6?WW-&uh!&%}>ygp!Ju<qx9vSW4Bcr?Pkp@^)dKse!@G?dly^PTVcp0OOUdHGF
zyo}LCFJtroUdCvnmoa((FJrXP%Xk3pT#Pn*8KVdAGDaJ{jL`#l8KaF}#^?dOjL~K<
zLz_Ls9D>$h%sSB?V(g5y)MHW}8ttwzVYSp^GV~`C|K2g-wbbi$p6TmjGM=(r_sp@Z
zmRd}H3l{$U^9iq|o@3_&@&FvmYT1NI??Eu>ZNlUl!@7?@srQS?CzX(fm}vKmiPjX8
z^*z&M-Iylpd#1@cN0Vw>tmip)onL`k-F5?^uc&P)<j=a3J>P{ZmD!Z+${)otx5&26
zuT`z;7O&);rFCmrqxCE*+xAc(&{w4g#WF{aLi5*mL63Pb@k882{fDPXg6Tui<=16I
z6kP7yC%={m%pZ<AfNz#Gzup&d27ZqQGA-_gV6zj_o_ngJ%IUz)s@0sxFFmBsX}lYe
zSGXgAZp)Wx$C{J7Ds|)4s1+VusRQ?!ZP(vNa^b-z^|st1UH3}2#8SYS$ZD9-5-ns{
z<&G244b`*4rLcATX)1v|S!@Qj4H>yxF=YaXVHa<)Q7uC`wkxF|Ofo-wn&!5lOw0HG
zHQ#2#Jpt1{EWm=V)1394H)l}7;aXu=i(W`EZmlHKNR2|r;Q1S<X=WrGHq!v+XRVW-
zhvlVa7+v7AUh=}g7e@94d*yeM<F7O0WTyXverW%g8Amg%b)?L6`b)=OX(^^wKuAy@
zw*eRYtJA**{@qFGPU1(RHz0a^mqho6lV(J}9}@3K-Xaw>q~c6Fn&~-uK?51R2qPF_
z6eE?2yOvz>GDj}5@UMURx%%=97EhPOsa(c8o@Gv>M(?9>od!2@;!o3`<rq(8UJD5c
zjXWQcXJiT=PoGNhe`Y_0Kf`NmNceMBZAzqMRym0pi>*mdp-gg?H#v*4-<nZ%oTYR>
z7zc}h{PZ)KJ;=4}^sG10J7z~EmpnLeynB#pzWs63r7K?#x&e~ciCZAsJu3Nm!LOby
z4)t5@hR>WMs-Aia_11=A$2|_j4)+PUfUHo9^0ED63wK@rNN>4lb4teKBRL>bvJdGu
zqhKBoJmJd@l6^e<vDL{RgB?Ka3ZS@dxR(?%++zdTQ5jJtJ)5_m2r9y%7_VvT*SXfO
zu=V?EsqHMn(CToVgvq`d1)xaA<LCy@@!^R+pie-q*Mvsao8s{p9w_aASEjb`OyDY;
zxT1+_HWAQ5CAC7l)-<3NLy?U9I6X5+4~{~@x+9RnZb_VfcE^gcPQya4(jaRqjPbTk
z-l%ych9p8V@P?B^%)HOfv(S8<B&rGgzizSJ^b-uq7%zbu^B{(<DM$Fnd}OJ5kLO;Q
zrPB7UK`F1&(bO7^%DQNJX$Xte2Ftsiovdd6SkI2wxwnj`67{+1UD3bmTNqwMTsm)Y
zd&OEDXYy$VUeBsjyB2K~i_d`!QekE8$Cw!%br)lnzq%)PR^lNRXLv2G<Jz4eA$fo`
z5kJgb2Ag(fIg#)Fm^?PmpDyC6rGNO*Wjnwm*SUVaY1ff9n>)%#Lt1~%!JBUrdxb%n
zg<rD`v+(vU2$o}<M*ez@pa|T)9X)^HkNxS_Yzar<_H7BL^hUQeNrI<g{FcllZEe$<
zpe(}BWs#Kw(cSa#<(ysNMlLYiCXDN(Gs>lu!SI!p@lz`dA1#y^y=^*z`BB@}$Gxvz
zDr6DYwyltr+NnY|JlM8ER%*8v@=YxFZUNJZ>g_S+qb%|e2(gM9AVhiSArPV!H9&~+
z*h3&hD{6oc<-vzQh*ngO5DokC5KNs`lp=KZUU(Syf-%^zAX>&H8kt6zOfJ*8@n=Ml
zP9Ud;a`Wd3xygY7Z0vMRRYx^dS%9ZaW610soU4=dKUPQU#QCiRjUl791pTZ4x^jtS
z^?*ZPj5!DuQj$`Yyxx78BAhlr{7x~48t(ve470b0Zv1(JNr#f<`&GuerN9_!>!OL<
zhzyPSSJVcL>Spl*{T*t(4#v^Vj<wRVTi3dr`_q0jxhBdGxdMLR1bWcr>jFX1m5}Qd
zrL+DgD_G%E+`(i3ueA#0ThF_cv@q{rvwE6Lzm&K6S;cI=Ue#uX1{-UOOZ4DlQ}on~
zm&rin`U-(aw<lFK0nEyWX^0m>jMt=(7ZrrpOLX{oU+}=}!(hros?s7pPIxl{#zBBy
z!LtK`gQ5`Qg0V{&pO8T~hlVx<#Pv4-Laxsii}U5MRyv1UM^z;gZiW_NH;C*O>6(R2
zAvxw`Jae4M!s09nas+I)KajVQq%VsBO+^KmN3gV#kv%yVHVb#!FxFKwclF5_4E%8%
zlw5k4P_k7GmGyDq0t37iM95KspA?a}0G(Y_fh80HvH*}rnV?x|&eoyA!d0{`PT#%+
z=j&rO==WCIN6W3NWp~d%p#B+_>K}IDV<Rn8+k`sK|F>Y}t@(JE=i~dl5?<VTF&szN
z&}k*NUKE1GXQq&<LhvS2N15DWZ(eOMdYN@M#h{O?g7hm@%axRiM0JC35qHCBmtLpB
zBhjqbX1-}Q=w!${nP<w~0DT_gJr1iDUi!y(c}9Iu15C}(K-%I5jj9empqeKk!R=c3
z=gET5%bC=UTD!hF1OIEAfKIzD3i%n&59?ed{`d9fMWG!~|E;^Y7sR{2$*A8o-o>pZ
z_tFNwdG)><QH6hDI|c&t+Z#f+T{nz`SA*I8QZ>F;s@{T_cTYpEw6?&9yFPb-zysD@
z6vz8lj>|CBkN#aK$BZ~O!oQS^8>G{3<ygJ}X4_FU8~f4cu^(@RWAQFv2^i;^T^jaR
zY?CbayK~Vk5V+sE!E=Q?MTx(KifCrc8!CjvX8X4`Elp6qN7GFX53}vMEz%9i3xlZZ
z6DD7GiF;1oHX5ERh}*XP-?Z6x=!D!YV)uwsUPEo8oNlF;b~Z-0l=M5$Oq=n2K+Sa<
zrS*_)l0|!~dhc4icGPr_wAx8Mer24$BD!3&Hg)-K@98mTe_K|)-ohepbbAGWEfm?U
z+{N9Ryp?qBM3Y_Q|5h~Hl0^#34qr>l-K(anxy-PZh2_b&>>Y>OcFSXr?ARj>CK~Uw
zqng_`c}nG8=u>iYNR~ErfSOv%9mg1dmd112+raGp8(Q2)W4k-K9_{QOz|KCJ*gw9B
z{kOKRe|f_yb?8njJUdM#)Z@DQa?>qlF7I{}T#49@I6cHDw6tgMhK5<yvnxV(K)a^e
zafju`uOI^t!Tw}hUi!+ce`|cdCfk2qj{ky*XVX532J{`7`ja4@`MrSKi?h{i9QMKm
ziSdYjy4xM{v)#>j><Xieh|?cz|2i#uHCo2FuXftU^DzE+7|p}QXcCi~a6BeUFloVH
z0Kfwb`Ur&tTzZnLzYNGCBL5i$({Knq6#cTqa{+kgW;8umjOHwK3eEiD=~EK+!)S6u
zMvKMl=<qO@c5cF7!fPlNccS_55Pu!MiRMA~#sAj7gMZy~ED#^c(CC@4|FxIU0Gr6k
z4)yY@*L{rI0{jTb1*=5>w9Dj&#w|uUYQ-d)j@PXB7QtkOM~um{6S46#-Jwd)q{;ij
z`N2IoVO?~EGdp`J42-7J%5NDKRg0Q?V?U0u$AUSq&N!Tei};yb*-56R88B+tJ4&nN
zTc#P`vW?SmH)(m$XJI?}{^|NcOvJ_0tWe00Y$t`}ebSYsf|cJ}5Yn&|J-a*i`{61R
zLi9OF!>X0nH1gy0;C$0YVWD4_VX)}F_1|t&2<Znj2M)%9xjpOE1PpqX<>jd^egelo
zC+BBpXT-704Un#JS9IEe&H4|t(sCS;ZTfF9_h)ggCpK2K2&e055CfJ0D5^j%R8ed&
zj(pssH;v|#ZoF87enu*@1T4;9`PVV%t_&P5%^aL8Ozf;DJ(`={&raiQ@<4AQb7>p}
zJiN+9U1ScYpF#g8P;e6FOG9M*)m<awuXaQR6>7Y=8)Ob@kx9`3_P*c&yW$DxbVMKk
z=8BiS5m}7<1<}Y;n)1za_^ME4iHQ5X5w-)~gkWe}jT*=^-zBXn7F_{YZs(TGT&2xi
z5oM0?FT&5k?b<1Pos1!qn^E41RiT$^>Y^txy1{%N&3C11TF>v<2-3Nu@>O8y?%j;|
z^NFcB9<zA$=6(0#?aQ~V@)KWb$7&O)-?Bim+cinmu`#WqRsg9XW3hLLFjb0t9df3^
zY%CZhJ@((Yj>NL1mHb}XDEW@{E$he_LZx*tb#Ud6C&pX(cChpOH8%)Ka`}E7_|VDI
z2oJvnt64NfM|kc}hXDrlgLE<oS9BCbkM`BLC{SxRfxr&}p1nDL+kJWV{pp9-S9R_U
zXv&hZryono@~9B+&o17aUtXU7^Q?RJ@%=?TI+_YVF>+9gA6S&}BKt(`7^<e~p66M%
zZFsJxwRWjdYD7tmX=^HLq-QEYutk+WoI+(Ue|&rT=KMwX^6DiXs64xX@2}6_oL^n$
z`ttiEmg6K~2YBOAv>f-zH6Qr-IH}m{MRPnnhGPdYE^3qO<pP5ou1U|IV*bzmJoIr)
zSm<{=N*Ccc1n@*2g;EoYh&T|TF$jj7?8pT&g|9}ySMU$`co58K9XI}(4&Uftu5Kuh
z8`5MdA>Tn0V2p^+r@d%0gDey{9SVs7U`Z7;IOQZ9juw>NZ=(4xtWX3^o0D;PJqKb0
zF~~<}*g<zLUSw;&3<AQU4PYyzx<eT21QTF~Ji1x=X09HyCCt;4zAN`+A!2AF-pV*{
zWz%zY^iVwrD{>@M48v~`b*BLgN8`nDb)o+*6vAK<T@375Q|d!`DWUpnO1>;FISqzY
zr8%8)<?xD*MKA|7lAh0%v}gkphdQAj4)MfgtL}<<Q7`0IHo6P-mO`qs!u`osnUA-B
zTzO$dqjqZQoqItUyNRb)b>6;h9gYl^<;2+@iK_bw8sLJ~;=ZYtz^@{f6gYtEvX`_7
z)rqn~!bzvM@2(O%5@TfHDe7BLcR`HCgyEUhjOPO~i(tqC$`D*c=4;@bTWTy40m;<}
zDa^q12@L^LXHWuwsBqG>z;un5fDvL07{$T-Gr*7)b2!HN5US#XRTwYO7&g%IFGkcW
zV)jm0(eWCazK)i2;?HoFvS^rcHhK<~`e+y0G;VXWqy*6|n;>K1++)fD%Opw&m|<GU
zY#t5g{-lj)Oph|4870%nA<Hg#t*QLA<7D2-+=y@8$`+i?0EbmL!RbRpZaMZTb^Czy
zghiJ{G{X3ZNB~(FK)hm7Rdfu)%-*#+%y>#7#o|U!aFPTRupf@&PURd4J7->JJu%ct
zwXG_+a5)-U#^%fkMKx=ahbz%Tv!!?%R2YxbxEee<?LO&tg(KMQ(#Y6<+w=tCFqj^&
zzqS*&p4qc|eRpuUoW{%9ESfKZ{^0_saLB~9%TqhO+3ar(9~B<kH0dWi`A^eQoxf>@
zY8k4jX{P=+Ro4wm{TosL_5jZkC_g9U)^PHT`;$C>2j6$R><8pqykx91`gSi#d3J_e
zaR?j*{eH>1{iN{v^@m~_{may<{4$Ogz(SLgy}f0OIy7B^K*M!hNTpzo_x9*21M*-u
z%Y#a5?{~J!0mEWR3xa}8)T4-9fkD}EJp)9PzG&@{>d+lfx^)a07VB9+M_2x09<H9T
z{B4TX(b3B_U>f#vKTmk(kCRym1gd|~tc(Kz?xZ2S9s=Cs;dy$&d`_M{Bj55#C{PgM
z`4-dPE+<!WpACm-B}YeMx~fIX{om=g#|2>h<z%w%$__;puw2eMpd%P4;r!iZ40+Bv
zV0_Lsg;fcR<*aROs$4g~O1vTo+8(`Ag$5Uv?ob(a=fN!S7wjq(@_P~_1@A=R9g8bK
z^C|Kji)QnTE_!NhO;st=k(7hTDb8((Uq?q5Jp4^NF68{Kan4J~dD`*&pCTMTplLnL
zXZe^(=piD#6n?L+0?ho~HkmIm^Jy=OP_K~fBTw2BgLfb2wd2KdFo<c|KhG}CzyGm&
ze%U>J|Ni3LN9DA`=FHmUl-J6#D209qW9Fkk5r01S*GjA7U&lI5w=E|WKqvln3BEx8
zE%-0=0>Y{)MfqSgqc_coiBb5OylhZhRF4-9(E6npN`vxoIF+l%pW9U5i(<Jr{>&8L
zOFZ0Y9K^q@>F<xwkC&Y|7!P<?{xb}2p1lS+oSy*9@pyrVMdQzb@?8}QN;QD}k3iMW
zv_roWD11cTx$HHpv{zkgucq2wTCad0+CTH>81Ra`{*X}Mv@>2dXy<8JbI_2jCzAk}
zzDI)8hSHapfZ!=ZkTUQk)-ZpT2<-`$P%aBKp8eN{)7J{T>x9!mgxz|%?Dc{;&I$#o
zAw5VWmS-3Ry+t%fChR0)Q+$~zLEC`58@~+wVKikz*~F^sYk|=s6wxnxl<dFf6{O;}
zjn*+rXt1~tF$x7W(Ptphaxq&j=-g_6?mV5!9KqihoPya52c=U?h=I@hL2ECKfd`d9
zF6y%o_HoSL(j>42rwp|(dw=`0KF$6A2uL*?@8JG#@c(twQSH3{uj`Ng|ARbYf_)H<
z=>iU_G@MhQI;Ks0GRBGSn}AMt<CdjlD-fFjS>xs=9Dw6=6%f)N4+BW=Kr*|>WVW8u
zvdW7V(N%D_Uirb8yjlj+X>jc?`{daW5<0^q;XlIZJoGz*`3Z+X@w<$ca~Kl5z^Fvu
z<A#P1mxHfAfyP09AMabj$yYibf<N9wzlGzmkLBOfEkNXT*bAm{Kt6Yf-cdU^)#QW7
z&(=wi_pix~&(^S{Agc(1&iyO|o!aEbXh|17qU!~WnqXiN5Pu=IaZOm&bO#r}JcQZ~
zJA2YNh>f)5kx>|lp;4Ol>h<Ap9Nc6xvhmeB5XD!oONL~$%yNNcss>Z={GG4Fa3k^N
z^y=dLqw?b2%d_^y+4s2O{eP*R8;)N`e*e6WH+!+wQ<jc7#dkiip>ry26aIS<jo<j6
zqWKA#9J3p<{u{64N(v9huY!g0>UH-GKzaSo^Rs_(1lnYhWC{n$Hwxm?p?d=<bhPJ*
z$(giLH1m%A*=hz{5%iD1QO5+O1XAM3)6ZeF;9n;^9esQb{tN#s8u237v<+VhWd88>
z{QGwoZ<Naq-+vFSeS7-mtbO(4`?Itm_>0X(9<z^(QIj+E{e_><YKnV7(G#Mi<$;1<
zM{H(MnazjYu-_(s#|jfD-0CY(@SEocT8lh+LcZm*hinlSC&9jzdX2-y5^aiH^6u>X
z)!Ezbi_<GbMxf2w*%Am~IYhgIf9SeI+SRPFN#|mYE&7+1uINt$wMCawP;FW&=|}K8
z%6l?YYWom61b-x>AK~xiSepJ~883VO*hh!!kK5OBwc%&EZ$YF1yZzRmwU6l}(+uyd
zI0rxZbu@$qEs@?4H8STf*<=%sc?wy^(eO2RM2k_I^hW+X{?|NQEKtQ_rdO{qjqHaE
z)^SR__iREQh$jHIK^dOE{r+9|^}AQy*Jp2EUHyO&P~|GPOG^X!O3CDesLWygU0OJz
zHS+6V86H1nO8tDC2KALN63tG~%D3`~+|0J9sC6tIZX=4E=Y%6y*1?w}zaPvg5OPF#
zPYT@OE6p8D`{NWmyB+qYpGk(O=zZ@|-XlFQp^Jb)<|}rH&OZ6PNG>v934{5Nbtfyx
z*qBPy3krjigbgKF9)OX}!|Q?00~WhF``Eqw;q>L%g;Zpa2b&i8{^Io2o3po9#X`|9
z$84xG9JB1q^HEs+ZM?(k32hIXEdtQdWCl(Nht0C_SJ1gE8Muv?hq_<9d-MMM^;!4w
z>h$WvW&26=3(bx~3SW~!R<^&OI9(45$80P8z=t~fd}8fODaHe?jSyq_UWGBsR)R4v
zzbnR2yaC3ii-q4Caa4r<%n-3OM+Rx=w_gOu%yO+s#eJ4E=k@v9|J<%E;9UQL?p&b_
z8HFX)O{@P92PJ5UlNdDR7^NqjU(by3`1CqGSvVi|=qx9NyM!9Uh4KVb(_Uq%FT)sy
z2kSQ&X!_Ob4_Don?>>C@`i%S!p}#I(eJ8%YJbQojL#FJ%LNJs!@1r=R9j0Jd|Kj`0
z@V7wGRNyO>O`HJfFb5McmPh%Py1U=D@U927imGS04fBY6n9@x}Fp6LbgrY|npbn=s
zPT@h5wN{3Cf*Z6`^N{Y?83Tc!bFVHwoE;0Sf*VSy7VcAh^%`8=cmG+1%)k8k6uS6`
zd{6rr(UIb*W8tLR;m<$mOp7rfpN#<~GB+H4&!Yt+*>}M(oT93E|L*+l)n&>R{|sPE
zzfzzeCoWIW_aW(NAsGPfi$t1!Mt29XUobQ>S0<TY?L9hC#Sk$EWHK<#WBNBgHIp57
zut5eZreJn-qVc0F>zn0xQIVBb+$WPht+>Bk#eD{?-=N}LRz`;ZTKYJh415c}(;t2E
zYhO79nF>x<Vcb^=lmBdaiVhukpY~Iau~mx8MTFt+m0$Z%2IL#9b@(|j`k2z0FeF<7
zwG@>B`E`(@GH8OzpeZVYEGh#`DWC#WGG3A^s4YhK5mJ)xeEev%PY38|qk-^yixcAa
z2F5Y{n?T#5Q?XngF`AY|t){OVLa1p9v|(VdGMlh1scI`+o0KobG%a&NW=QbaZuFiN
zF_>-Bf=1;tEnqBL#y5S+H~k&?rf=kBi>7VCH;o|FwFMf{H&~%f=$6#A6|zmrm!q2D
z93=T{H;T`SD9pBLLZk9b1ud`@V}8O>(O=SiWX8+XEHmAwgJ~V;$M`2Z3zJ&jfo{<E
z1|6Ud^fvR4iilqED#3{#=y?vmO7VeT2(FdGJ3*;WKKBu&emP3?wL#xAl=_WO>g##Z
z$td+PLkUU}UX&J;6h?6srj8cICR$XIg0_`5b-OByY?5zlRa2#WLT{ds1vVT0blAdI
zk>%@<Srps9C}_Kq+J*2z^+*EzyIcK5Vk`<W^Cgj+xJQqu+_lJP<!ItmSgE*NGBoa9
zQ0fsW3Q2;<or;HOw^7~hyAcY}E~C0#Rn6kle>@|f$t24%mok}duR{V#=39$NP3q<@
zD^y<RT5qVm;74Opl^F1O(}eYN(@eN$ie1~}!{`gY;T-31+)YNcI2+VuW2<BGB^h3d
z`8_&H6xX5Y-Q4sZKkdWjoJ5vAu}dQ}zlE55o6Mah^Sz)cbn;mr97D~eZw)%6ta@vb
z&t}Ejkb@4ZUnA_A*9g4gHS~r~CRJ#|TZ2rD>bEdN&7)n~OW)7^;frV-;rSRuNC8Uy
zr2dSfV)|j?N9!qn6|z_C!wl0`6Iu$BCr0wk<lkCyw<ld+n2O9yX5xe&&RL2={3Exr
zH2B;GeA|gSPO_KeGY07>uP&dN<TI=Cg-D8|*04I4k>oOJa+ygkGpxaEf-Pn4Uu{b#
zMv||#r4uvBC$*Gqt>zQW;)rYMibXPi!{*R%jX7CO#<{g*+OAU8wav1wRm=L{MKBUg
zS1Uzm1!AJt$ceE6DKl!sOvl+C-K>!`gkaVQDnc-7Apnx5v6HxB;}XeBg-|zBDU?K0
zN;U3YICEQnnbWAF-^RI?&D>RHHQKt!iak;f)l8t8^;yjns;O^6Ac1OZgqk$VOq*3B
zyQxs>^-^i1Vs6ySxt<EJZq`eB3e()6J5rd&2AFA!Oyh3!hqjgeFgE)NMxC#a>JO`2
ze=NCuUsjmtB{%SO@6p2N-$%;@^DO8>_XeS-YrTnWUUCb}KTZ01e<CbL2m_hZPK3>a
zCN(Hwq$P}ICCs#hnU<(D!pqbOFYC;3n$M`qXQuf~Dc|laZEP@!3C99kpI&^|9nO~B
zn08QiFtg1J+n2nrV*e^oyNN-ci$VXm@-<39&1)2b{+-D{pNRnbF#Go-0Li#od^s-7
zVDytO<r&O=@}=HL^wE{=C+ys>QoWeZ=%;HZb@|LBpV_|+H&^jr6>tCjK0tD;>A>_h
z+mD;?|D8qS^)#A<{+MoVpEutET)+QUvox)+|JTtT_y7Jy_y4}}r^{eWejMW8XA}CX
zv&LWlF$6s~?)0L`Uv=wm$stQw&e-nW_o<+5+9RBVrz|;B399CJZ!fn27|dguB4*4W
zF^RH>CIWX@oa{ku%N}lZ14Dru9J=A4OGEo~V|Y)uh6K~)gd5y>fXDKbOM3bHbHYzA
zw#YxkS8_z*KE)yIm*YMBhkH-5HNOmoZE7+QN%H>P>mT2~dvkvJT1vcp_ZoEj$sUc8
z=Py@X+*QdA!zkRKr{$gy&{}bu7lL0@brH6)MVO|N`i_pyr~P0>=-=Z_dD*^A!anGh
zltOlfz|qk~6fNQ_NJW|@KT1xDGGJ{a2>5`>a|Vr-itPy>hN0UJ(1*b$cd(C?)`3Rz
zpd{f@m_&AIA?#id+`&AWK!P0A6w{#?Lz+;Uby*RjAau6xh7l9Rbq2zs(0bWn9fnha
z=Myr=uZk$ae({4nnHG`YGxQ=hgTBKPmTbS}b39=Aj27fgJ0?$`hNQ)~4~qR1{tRuW
z_7xAwAvSBu8w%$ES{QR-#cUT1<sr;-72zgzC=Y--VN7?dvmH^%X{2Y#?pz`^Y>{uI
zvgv9_YThs2y?sUNJz{4|)BTJ|o>bU*9@=|!k#ZKr0XuS9QjU_0?cC)B8F4n3JiU(j
zc3Q6dQp1j#3{xm5fiqBKBT$j@B{`wYm`D(fLVSavGed>Qy-4`po!PS-Bb=aYE96O9
z5Ioy<Nm<B7V>V!V=9FY|;y1><Y!U{YYq~Y6)t1u&+||S&Qm*LJI3GR!`Z*1~DL4|V
z?;=xbf+;x=j@OeXx#FBx6AnS%DLH;B;RAfy&ip*kDC3IJO%{F<hLrk)U-r^deG-HI
z&;O8ba>bY&C$V2LC{yK3pGF$`gzX~Stc;r|-)e=LiVHvg$n_-RMNYC2i1K+8m09+J
z>n6@ol2Lg^TP&NRvf?K~%Lof2=$8<WG_-FQ@7`Tq%D*w32Tdde8ecAl{``9$8mHR&
zadnI^xGGN465#LxBP-Ciyo;Da!9?iS`QrGG-D@aQchYl`oE{#YFPOJEL+#h6u}8(>
zQmvKhN~VZu2%%my2a}QH(&bzuA1V-XMrbJEkRAPPLfHHx2whq}?b*um7}g^>c})ES
zWC5yz)BUMjkJHT1RRF2iO|r4Ng;f`+N-h`4oixtA6-pDFMujQXR?3Nzrk7S*d3$Ct
z2GT;KkH_R|%{48Y)!o#gC`BdJ*6C~#wF#_~g|6jqWT>AI>Zfw3U=+Tg01*5qB2Gs9
zE?*5)Y80~2m|;i=^wX!G+kz01d0>b!Y*~ElFZ|7y%>0PLbG5`-3Yi;*bb=^eV7-|R
ztTJ6W1KgSTs~6~3f~l48V%{0!8MzAVDY-3K))aAocSk0iZa_w^q5>HayZnN8$FYne
zz<MRx!{6@-IinF5c|9fh6d7n<UgTFOB}MT#Au4H+^pH>%+V8n8FQr%tv$78<vF2H_
z<aq|NTnaMEG{`nTpXCqS{FG}Lugr6NQ6)1KjUWCAI$rMbd0k`U4Luz{d^c~~RH=Il
zP333HQWq{Uu3h23pikEYH-md-`6*Hw9tcLWS(jJBt$c3f83ieu>X+f9WR5_f0dJ3v
zz>Ck!6cqibikSuhrJJ8?ke{-7hvkBu<P)KCF{o;s8Rur}X72y<TxxQ}0O<6Tov)Ex
zIEqxZz)3pOAfxd^ISRaWWU?GD!r3@r{6z|z#k%R?+rp#-a4~aao@ZYi9lhYuEfM*)
zTm?I6g&8rB;3S%cy|fid0<kSkn(WkSCWkl-$h10(f9i=98j#X{1I7uAa+EV85yrm?
z*FMy{-V<R)g;SJHMfuQzXM1H#mx-3PR~czG5z92BjOa~hAk<i0^rZCFN$MkHoY>4!
zxq_@SA>SAjEE!z9IDP%Pd->tJtFz0iuBOnLwG2eQw?aEnv{s_`GNtsgQha}~exm3V
z#f+L_#))E76f>)FG*1+>qLfurnRTLA6~*l8%Ip)xt|;YHmvT-Nr=pZwgQ9z)xUJLy
z%#`!0YZKA?l<Xx7rJ>)vwq~?4`6}?#RHYKn`>9Y<Ha0Nf%|CS*q<`CIm-XM}0)y-}
zj{vnH{;TcSh4o*{(jM1;AJ=~$*MG~_fB&RQz0Jo_iPu%)igr;M{gnq+C4(kuY?Slt
zZ|l`(J^DY>a%^|gdTe$vwrUx(YPF2H`;{zvLN+sMOmp&aLBVg|$m0qB$ew(`k8ch}
z_UsFOeRD9ft6y;Rn}d;E|AOP+9E>~`D2xP}gOLXZg`q)nFdD$ooMgCA7%Vi0ROZfC
z1devwy4~U)d~VHgqoRG`FMFf)cen-+KkC$b(W&Va)EnG%O9|e^%N^+Tqe@A>iIgv#
zR_A-;SL}R{W^glb!)aCiWE%8XR>NrJ-;fiG<-MZ28k7mf)4|)~G8(sR@5_a>g@jSG
zd^ZZ?y(6MX;TPMi*c<wzRkd^GO*m}>Vo6bGom?1*Q!JiW_6^m%EG8S&x}3F{1}`m9
ztE@{))avEZ616J0v_!48EiF;2SW8Q6(xIh=c2}2W6MA%fyY+lt179FLl6>5_h&V#c
zr)P?sm<L*xMFx_cr)fU^hk?XA4!Xe#+^DU>KX2OqO)q=Nj{h~sb{@z7f5Gv;>K;@C
zf$m@!=>HhZgW#_h`WfMb6BH(LP4BJ102;G-)W=ab3Rlc;#KPgt!VmglBz;L`j`==G
z6j~N<_w753pm%Zh;`Hk5W%pmF7jMtszLI7dxVX~#P+Y$)zT+6=-trjJtm-qA_LDLT
zc^NDgYzI|4NqdP@rb*JyXUX$?H=|*9_~vb@b(wDN#uS<&w^wWu6NR3Tcp~<#B^BYe
zwLx3H-HUHn5~tCj*fg5r<&iI<Xx@*RR_6iu<nYw|k_FKya-+nP)^TAYR00<&#>pI<
zh<To^f)%<M0lO&QB)K|3K@6gu-B%<`OS!R9+w5AfE)5b)lTvtW+}%3RlCw-V>020B
zcUDTPok_sj(E|(y9lGDULH6Th9V~r8K?p3a{oXH1F<WOA&AReCu`!izHU-ir3-f?a
zl^fQS$*!tdszj2WUEL`&<=JQlcBx|)DeCauFbd#HyZ<5>+!w<%VwWmr*?~=@&ZUdx
z%x#xF79I~cpObbu>qhw?#XP!+dBgFmBAser8G39h1Eb8CXDFWf$kkTZBP-@r$@gP_
zhyaTv*qblZ8RmDH^?_mWMoQc<iM$5Gy_Vv34i&_8AMt6miP}-MR?Wc@e%n}b%@~ye
zmS=M*jpt|Vjrn=Qf*H0#3l40GaK1TYLUC+U`cdqpYLS}o4a!T2M@Lk!%1W`y@wkd6
z%CMwlu#Tl#$9t7BnH9qFJOx?BsI07A$sSYQA#L7*HUZtDw#k#UApFQ`4j#FdC}|oO
z)aTDt+^e6-p1x}BrPsBJvpw{h8!DjGu@J(|MlVE>W^CyN;{~vE?pBKCl|)Q`^Wnu0
z-OD#H1UtLvUj1<TRv}z>NN5XHkWE2d(5CDl$1YDdWp!f^kY{=RXqPzvR02-O33LvP
zMV~$4dvSVxd3M=7y}UfTxJvLt-6kQV7En|Z?PjGH3>9L<Pehlp{*r)ax<|k#g;ZV#
zYY7auSpa5)cn6>wU<};B>;z%v5Is@sXZk@h_j8D}6y)1?S7l&%KGs#tLKi!dFcCbZ
zS53Y5G1K$}Jd{ZaU~4<c%T*goo*`-8X0w^MlGI2hrEd;SwziZ>*ETYiNmUyyW-({R
zZ1UG%*cbk>i96D}Axb=_XRHS+JNR<Zf8)>ChAWUxfX%gER?!jR6YzixZ1nQs?YoPY
zXBRMXd2{-{d->u0`*#;tcYx7{DO4KFgDp`@g8*<WVQlsG@~r#A>E#a;!ymf0MebD;
z4aWhsQa||d=(hM>zCXRV4+L5It{BP&Nv;l)_nCDmx&#`t?%Z#g=+aLHJntiDH7)`-
z`Aqif4YdZ937=Su$N1flwWJ~^T$w5py(Jw3VV0Jk0hwr>XbE{P6ASw(-Eu`U*LaEN
z(`hh2htfVeIH5Ap+MfVMm>BA0ha|%z?1bo*qS^R_YAEbbODT`N%9H^TfVJSu{vq^T
zW;!(EJU5&R-R{CuOD)}Vreyv0j7K1*I^8yLtM^1!m(T1d6~QA<6yTe<Y71Nwbkg#r
zn=LZ{%EqHbI82@A040#<q)sD9w+5Bm=hYrZV3TRrNx;iDmuBaeo|Qpoy9h~oTNu5R
zt7^AubAT7oO#$HJ&cwUe{}<1K9<ECa_%83}I{@qF{|(2{%-sCHp=$c${QqBZ|DXMM
z|6_3z;J2O5p_p~2LMBd9#NU=pcYTy*K7ZspIX(yTaIo&8Z3Aqki|$Pr2Yk}&;}eqH
zfA!-NvQDizo6O>q1FQ1s02sI{)8z!L7mnKp=v4(RWvzs*R&FSsx)x*wuiFGO37w8X
zuyU}b{<IWOA<v5DInKxD%3pF^sI_9-;VEu@kmd>LW{7S<zbS=d@cuQO0cvtEKB=Q*
z&aaR;J-CuNw?a>jO2_HNx#uw=Fc918kXG}s*BytG3<Su6eYIv@ATJB#+*Iz@Wf&~F
zZ~eClv+tQ1(Sk2W-=a5XFmEn1J(@(HIemYg@KZt=@V@cF?37T_tetTGVYtIbj4;ya
zzVJK@-~B_CKie4({mH~fKbU_52fykLZphvbll<aqeSu(3j|$?)#*96lv&XZtArd7=
zX^U|0R%LUcP;V6Uej%JoNy4Q>L7DgQs%qj7L9>96L)U^@3(?J`S5sle7N2kYri9tf
z;}^^)W>TLS5pC67g(CXKBD{<UQS=#F5-np~9i7dCMHo<x&N_!Y;a_?k0#+dag@O?m
zV8}nAw`BEGSS&ws>u~M^FW!uTd62#)0+8aj$qI)|uW0YHUx(sXt5_l_Fx*gp*2vQ7
ztSrNbMTmewD4nFBW+xWsCzsQ*mVgr$%qfI1UdN7`v}s>~1!f=<PQXzuvIj=7AFn%w
z&cL9Nz?yW+7kNtbVt3(wvWZ@o<2SZ1SL)KT>G^CKw2qk~$*IY-SX<0%Y8|yz0>kf8
z?(@k3f-AnKbng1yl{D<k^zc6M6YbCmRx_nl4P>H~!s+6GnSts`x~2-+unMIuFdEii
zv=O0{hZ5#iX`GE5vljGMk$~9kpv{2v<#1QXE*bgQTua(zPg7d83q+}`5D$qXKWIn*
z{^CI6xUBvk{=hc~<Y2WKi*_EGAUgo-%%U3s4}q0nHmt&In8ECzIcCE$%rXpdP=lEi
z&g1tK%NNv!1S`L{U@^dC?TR{v_jzJTeDr!jH{q~gO+%3^*)=pmy>P$6@5Vpjy!G!~
zjMY{u(bCtBmHsf(uWASPfl2w`CuIF{oULSaPnD$f8S|TzNw{x6{lsGV#Nbf(V;p2)
zBQg96;_>O{90eHV5W}BrisbK5VH+K;vOizi)rXSV8U`(`c5Fk}bk(w4S9dMTaWTzt
zY}fW&!*eaoa#Vd2EVbjgn(dl~t-+()7C`Yjp66Le5gQ)e)`J7pY7<bujZrgo$MOu_
zvn<^M9CeDW>Kcye>V~7co~AXx%z|dPx@Q`iip^>hr(@|ld^b%?bv+m|0%p#3n5miz
zKsC#D9b2;uSEGrxWt+NbnTF#6Xw}_}oMqe4HrMfN1KO#2ZDMwSUIsj-qw5;<NpPUK
z+aU*Z_Z(LR(%G78>#j(1;eXpvU7%nC)I9pwx(0mUKw~)+&2Ur`zydIH%kC`uW2vSN
z@SvA1&G1zBOI{?ozL43zg5w*qy~ZjwYzN40nwsWlrl}hSChEGO8NjfrX1XTy%SOg0
zj28^x1jB{LF@QF@AUICbm`Heofma)2XF0m5LSLCC^cPT{V(e))P|2||cCqWYjc_xe
zEt;bOqk7mjyPfo`ZaNO6S(ddOY9Q$V5@cP|4cD|;V-3@=9LuzTT`XvE6Vyy(aTUbc
zu#qV}%1hgzqIWb8hy=n1|4a}hH_<Sv3euz@y<CeXZp6pvfW$z4-Za3CDFv$2L5MsH
zssV)#Z8WS#lyBIIvOho^&vKwjU;z{A1vX9PBHeM3E|S=6Mn#JCm97@CT%Sa)PjW6c
zU}JlH_)JJ*Wx}7q1hulc)A<v*NHUiqyhhoGwOZ2s?cg!6X__*il4nv6u}yMFW-8d*
z+`8T^n%BGg_Vub7*sEw^{@WL617!Uqz@Fr!;n486V3CgR5}z#Yg|l^sPXu%@Ps!ZM
zN^t<uU+G`)Vfg0&J{hv~bPTvkZTN2DUE25+T_TmG5rbabp4uB$>{l$NW4e~+V=@i7
z4|EEpqZ?-ODfapeScNv|CC4&MaA2rPhi|59fd;ghjW<A?9m_EcCpfTVO91Ii6<VgI
zg12G0IwYB@2j%#T7&S4)6(pr%a+GfNq`L~s)}1iq4bEY$D20_@Ay*ZcxB(anWYumC
z<_qTrJ~36<#nqkLW_<*`dywm16_YX@bAU$80rOznrl*3>q|@#J?QeoB;TYhtSm2Ve
zUbVoFbZm4e(f858^nt?%E+F`F;Gt^hT<V%?m@0UIhNruxCv_43vTHZh^iwjMce|bv
zU{)C2u5n*gPu=CaZK`NE3R$ra>^}ZwfL|D1Q_+D`#6(ZRHwXV3G#Ncdc;K&z*H@Y-
zKaMIf0HlI&>K2npEN^x&lh#3x)xqxuNFaQ-FvSAjwoPm^!x!Mun<n@VDt0&bbF#lx
z1H2Wm?;bFY%{@KcGdf_SEgPI|DzxCso8S|nJ8n}oVFI6ktgD`?GfRHpc61MbV!DfI
zz%vKvhM?D?J2*(8o|o-7COEyShC`tPbnie5pjQLll5;Dj3I{a)Xf#mLY4_I2a~H2%
zp@K-)!_%=M<cdKnqbUdAIL{jN=_=TfSO}u4s-4AhHSj0lcug0Cu75j>N6}69dO7U$
zhT%WL{&UcXx;)WX&Ea@NjWOqR3nnjzCc$cN!RqogDe}z19iZJA1%6*~nJkDO+cslY
z=(2g<D9q5_^XY84h}q2UoW@XH%$JK1fgZ<gJHQHb&5+t^@;UU6_7XzjJnVGV8An4J
zn?pG;sGwRnI|PGWBh7i;IOmGz1P6iTR6A5_XqY?ArVN(jao2AX`gzSi>3lYnW_vMl
z4RGvFuKPZLjF1P9*z#|!ma-2S!&Ns;Nue{~7$BE^=?uoW?{liOI*Vu=#u!wU{GH&&
zUn(_O98xf!afK@5<6H;0%-XBMzN8`!O#poxEieX(Sk=D4uyf-v;c!`3`Saltr8vfn
z=zY<hghatPNoZ@UlhfnAqUjdro;3BkG<8*)`nfcfS_It^YRPA6xN?Zz{Lh{={JCXu
zLq(+wS{VjqIRDUYp7;pn5lp58xDQ)=>}96F+PD0;EwrS(0V5k-%e1Uf(j$PsJOJPm
z#xmP{NdWFt`i}rlL`>ggz8+K;Z6o#%!#TV4&>zu8H&(I_B&9ns?^c}p2QiQjt^OVf
z`$*WoJYl){TJgzIx`Ip>)98Gb%g5G@-$)eOx@_qi3E*xR<=;pEZ_&>5O#<%@s7K!<
z0Plc$|5klcXea0G&&b<#tM!flBsJVCG__+oo}+7;=Q_4wn;_gt(=A=q9oNNRQnq2?
zdVUccZZ^TK5r$_uj;X39E$%t4XWNFVsitN)Jk2vSNB1-U=ooYgyA&EemS=jZfl4r4
zyJwGKdu~3>qbjBnSSDA~v@Lo8m&<KecT^XMsN0ssSK(A#u0eb~NYO1jqu_CgTeDSD
z*DYu+F4M^Xbyd|&!!&i=T;SMjgOa6Yo(v;9p66OnwS)U`bVJXA!!0Z(K-66zyK9)4
zCIf<|YSdFL2W&I_8baI1tXf9}mb6tB+GGHQT_-rufphE*@Rp<Nt^>`rb<++GGy^vh
zsg??i=YfH8OwYjG5q4P@Ivv*r0_(WoqI<yj02Zf#JE~>@MQqyv7SwD6oAv)y)wO`s
zj%L~L-ZD*7#@z<Xje_K8D%~l98!Ft42Fv!LBN5N=Y?L&86S=o$?=92<b#WZvT+h>0
z!&N;Ogcyhbl88%^x@oD7j!VRrrx~V&)X~Z%Po?XXj^#R<W;!4dj_x2FovwB2xK#yy
z*qR04;DwhH0Ey_(CoW|643IC}7^j=a0y^RYX$B}9+qN~(KOP`zt7YgQ<_N<CJ>__s
z=K<oLotG)oGo4BbmaUPpwGuInE~f5}i(6o77J)vujx!6>h3)#mTx{3p6*WKdVd#Zv
z@QR}YWjT$eIjeV~qaG^mK+jw^$U@uH9-V<K;W(Nqg^F=eM1i*Nqugh9drK<^G>=}J
zhi=d}#QnBnbwlb`O>973?eF_VK1ez|1xe59XLtjsl;w^zGawc8aB=Io*m^8wE9V`A
z-%bir9?IjbaehR+HF7`{{IhE9?_clI0*d~1gBBn+KxOP>)uQdWzKizwx%P<kYr2x8
zi9O%j_slil-JS&~8nC&!chR<|X>S!#JD_}rd(o3FB!{?teV^u~8C%U-k8!b5E<=!&
z9hDa92h(Um&xt_(^dte2Ed6nS2tCCWJ&}|<f5DEd_4`!h7ZI6{*h23ZKI4d90!tFg
z9U$dcU-hFPp8g-aMm1iJ@j3+=9%yEI3~5TPqs53s%LQ9Np0h(Wai_lbN=S_qI^<1$
zinty(U?Rww3P`>fZph<%Ao%_VjQ&h_K;$<-G-sl{n$36FZ4^|U1}y%>EjCfqqn7Ri
z5Gbab0a8>er245=#raRmIsdvb=l?-i?p9)VeM_-x#`#Zo<9sMCn7_#RjrF0lOC%RA
zrTp}8E|^HgQR8P6?T>>Kes(7p(WJgqpOxAz#>F=AA8>P1yj|$$2Js(s)zY+F{0G%B
zALBp#HSd4I_|F>o!C(FokRO8Z8ZUtw(a(RQ>z``xf4XFXc2ahu6CrHyH4Z?L_am?k
z6RIqsnIWowI+_W6b}}3d$YQjNSrO<Y0EEp&Ef=z5d`JP@3B`mvJxko_pm9hq=^Xnp
z^aRZ$0t4axQV~7$cw=Tm&fEmF-T)}>aBY^&Y?jTcLykunYSe6=w^^&x%z7h`Mx$J2
zqg=GbV9Bcc7D`4jUiLzB=zVV3E}|P2wDDt~`~rf5o7Sl9qCe=WKbzJl|LC~3qv15G
zr+S9#@u?@RW7{~J>RFay>ZX{H(mS?lsjlf7c(hP6OqOhPOxM&b({gRYF-@K4Haohd
z+n#4Su4mc0W8X<K?LQW43R>J-%*V0fwpo+uFTPS_II4%6l%a7Jpx0)z_Xf`1X)1s;
zG>hYTfay5f>DsF88M@=J%+M0uv^3XJp;|*11T#!Lz^Uu5W#}&30**&Pao!ft)WBw1
zMnYWO^*mL_6H|t(>MXCO+7?jJ)os(UJWJ#?RY!MJ&BlWk2A+;12U<tPjn}%T>zbqE
zMsI<z>AEK5(+tZs!C<#(qT#8k=UT3A(H-4sX3w-OM+2Y2v`wA2P6eu{(2b_+IRGrl
z+=Olkt<$T#&ZqCkxZ#yK>^y9AH8U%eJKGEj3nzWJPkQuLb}$O_U=YlMDPF0^6ypBj
zUbQ0QnT#5d(3*%uN7L|-h3#sV<r=^SBH7mQq=jK=KuAMNI?Qto8>p{&uCAF$pJV5G
zuBsZIV}MvB$&Tl!x((1R=wVC9S$1|;-7dLY7L2Sc@D&%&F%8dhEzMS4&a`+k#I+n9
zH}z|{Rzv5TUFc-Tfi?ibDGhYpz{LtYb)abm=OM?^ATJ)FRV~}%0Ca3WxRRg`Jdj)g
zz=CWR<kf83(o8N2ATp|BJGyRLHa&Dqg$7iSYJk||F%~ZY08P+6C~1MCN+;1NR1*)&
zfP{E92pGpeGhACYQ4#5&-4Xzr2>?u0wJgh1Ek|GrO$CWD@jR3vFaTKufrEU|!*<fM
zkPAR+potvK5|Z!IMG_EZ)plKt1JF&w(oN5H9r){}O#n#-`37y{;!1?*1w8ud*iacr
zq{o}!!Ec~|iU&xA`b7*hJeCY(aX>$F3~XE#0&QU9@lJySFdfiNAlE=l-O;@rCDzy|
zu{7pQ5Yt<nmcrlR2jrl{&6E#`V-#UJOv9YroWx%8pF-T9OCn9tlSO)|Xe`FjlI>{&
zxkO^Aj%ouRBpmA+D)zsQ>x#fQ2_GVVn<{Xz1%xq^WYcp%%Rv!%V<sBG)6hBsSs~rk
zl4MZ+E@)c~l(-5ePb51aOfZl_5TMMpG}&<Q*gEK5*Gv^YFsunS)3iMYIX%Gy^g37!
zGzS*w{j7}7P?@P*#AjTYbsP{P9TyV;jwg5!SVgz+@ITsb&EjmKIi`z8?^PY7KVeK@
zCNOq79?S=9IAelT0rTQ&i|$w!XErcVKyM4r9)Y)@rRo87p$07h3u<uTG(n_H%hvF)
zP=W!t2q+e=r@M~nr4)rKARnIahwc=j0ip?sDq1bjo*V$KzgZYN03=IIs0m8NLPthZ
zO^_u^Gy&KGL=DBju}lFKPdo#6fYt)uNP0;J9|koH9#gjk5y77U-EV<1b6{A^+hu#;
zDgY0dXnYglgu;exsWzUg)LmByrvX1u^ILYR9t<$?j*1JcU{+EU35WvuKzSN?080P>
z&H-)U+7=MaN&skvhfxtsT}^c?(AmI16b7=H#dRzw6<p@E=}N9p(%9SJ)o7qZz$~O%
z-UO^bn7|)|R)}5z0Fa7j?ZIIYz2K+-*1=U`OVvEij#b(vAOC(?5%HdixPfe*r~qY&
zLFeQMUwGU5f*jELdeInPcQ9fT-X@C2aWqqkI#;EmV93>r=F?yfqsTa@&`#ZGdOiOo
z-bqO%uRF{IX_W}FmKEf?vD9B5fN6kmfWB1G`!zYk0K=+|X@g^g$5uIOIjBBioZ)yF
z=O7gX%Z0oyssux|6S>!H7^ct@tswi{`hp4t6%7JzKvlZrOM^;OsWwko7Ak;L2fSDf
zKp8^392!0a9bORfR1h#As!A=0EgX07=RjIP?;FrIO=uGsBY^h+{s5>;Em4R-0&v|m
zuq7_|8Ug^QX!J}B^j17i_zJ4#U}y(05VmRF+SGr20Do9zkV<&@So)IxOeMUKugn|K
zC2u6@f8-0Wl`8IXN{x&GCf0W7Br6!PlwZMc+qwo^<zQ@(M4y|O0xS=v5;#2(P*BdO
z<v~0wRcyr3I^c|0j$@-o3=X*?hU_4OCT_pL{Sc-Y;2U5sf#Ed_-vV|}FunmmOz@Kp
z9ZZmCT52)~H*gFDelsR(W}~q=RdG5WnsXJm?Z6x?GMFw02O2OUP{qI;Ja7wdEOe-|
zkz|G+AnxF+;jU9x7;6nZ5F8WG;07Wy^neDV5A^tKM<^-q7HsG;8z;*3bm)Pz;b2kG
zGX&2?6Rv>@lz<U9Xg|r7G8}rO8hjWJ_x13+AT%&&0EGzXN&O4%f`xIV0E(x&ZQf)D
zcV&U@06L`#1(h*4Lf}dO>*zc$j8JfD0yqSNJz3Hq-ZQ~PF)+Y`OOe1tXt3phV+m<6
zoJ*$%aCQP32AT=HBn}z)7J37%ybBbPrw2d>0@c8Qc08BsHSioYoVx&0c+l#t=MwHp
zS?G7JENHOL?r<DkgVLZuS7#BMu}zeQh$1VL$I;RI2y)W3ujJctX~zEq&wyc0b7y~+
z7kLp)7xQRL0ZT>8liAI!<t1kL8+X!_zfZRB=xt3^SzqMP?Tw;&zdH`5!^J2*#;I-e
zba3Uup9DZDobTo#3}0Z@*DR;PYHnxCC)ea;h)k#bqvfIm^6&D_pn~3JDc^^SMK~Qk
z6Ynv^Y63nGh!}Y4o-H#|ZsR!9n=LYdnOS~u(O7y1gjh*W`(5V3Y&)e$%yIz?^YEOc
z6+pJ*Y%FHZx)=hWGodP0DQ8}BhQ5lNWEg%9ruR$HcCK&#iuB#ABDzTKZa<iZpM4Al
z8$W=~NbbigQhB<Zyca^RscL$a-f|j;!)ef`7l!o130}?;VSH-6Do-$+2gno#t&lB$
zqs15CMM`>IyMWqG(I{0abg*>1L37Ydt!&*AnYTbdSdI(;AXnH6VkL)Z74xP6#*zY=
z3u20Z!xr7anBEjAt_ulUQV;GXgZ~1adzsRLZEO*QlZezV;??FIOXU@dHEMLZ)H0cs
zBo@D=Ld(X)iFwWlDx3L#&kK~uY#~q>&C|}6Er3qjn~u#Tf+RS#iPoa^Y1P=hlwnCg
z>hVl7w#z`}d=I{#g)Ty;H~*08tA8@2&jxA9O}duHT`HgZsL5Lx9hK*;ccQi}Q>s-T
zFPM3J9-qhO@p*h6pU3C%d3+w9$LH~Rd>)_2=ka-b9-qhO@p*h6pU3C%d3+w9$LH~R
Yd>)_2=ka-bzUuS;0e@BqS^z-F030$Gh5!Hn


From e6d09fc7e3b760676699bd972163857ae0416f17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 23 Feb 2024 11:04:53 +0000
Subject: [PATCH 298/314] Refactorisations

---
 niftyreg_build_version.txt  |  2 +-
 reg-lib/cpu/_reg_mind.cpp   | 23 ++++++------------
 reg-lib/cpu/_reg_mind.h     |  9 +++----
 reg-lib/cuda/CudaCompute.cu | 48 ++++++++++++++-----------------------
 third-party/CMakeLists.txt  |  2 +-
 5 files changed, 32 insertions(+), 52 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1c105f1a..53c86ff4 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-416
+417
diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp
index 0877e2ed..298b8a87 100644
--- a/reg-lib/cpu/_reg_mind.cpp
+++ b/reg-lib/cpu/_reg_mind.cpp
@@ -399,7 +399,7 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
     double mind = 0;
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     unique_ptr<int[]> combinedMask(new int[voxelNumber]);
-    auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
+    auto getMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
 
     for (int currentTimePoint = 0; currentTimePoint < referenceTimePoints; ++currentTimePoint) {
         if (timePointWeights[currentTimePoint] > 0) {
@@ -407,8 +407,8 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage,
             reg_tools_removeNanFromMask(referenceImage, combinedMask.get());
             reg_tools_removeNanFromMask(warpedImage, combinedMask.get());
 
-            GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
-            GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
+            getMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
+            getMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint);
 
             std::visit([&](auto&& refImgDataType) {
                 using RefImgDataType = std::decay_t<decltype(refImgDataType)>;
@@ -469,11 +469,11 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage,
     reg_tools_removeNanFromMask(referenceImage, combinedMask.data());
     reg_tools_removeNanFromMask(warpedImage, combinedMask.data());
 
-    auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
+    auto getMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor;
     // Compute the reference image descriptors
-    GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
+    getMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
     // Compute the warped floating image descriptors
-    GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
+    getMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint);
 
     for (int descIndex = 0; descIndex < descriptorNumber; ++descIndex) {
         // Compute the warped image descriptors gradient
@@ -526,13 +526,4 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) {
                                              this->descriptorNumber,
                                              currentTimePoint);
 }
-/* *************************************************************** */
-reg_mindssc::reg_mindssc(): reg_mind() {
-    this->mindType = MINDSSC_TYPE;
-    NR_FUNC_CALLED();
-}
-/* *************************************************************** */
-reg_mindssc::~reg_mindssc() {
-    NR_FUNC_CALLED();
-}
-/* *************************************************************** */
+/* *************************************************************** */
\ No newline at end of file
diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h
index 7fb44cf7..2cc081c3 100644
--- a/reg-lib/cpu/_reg_mind.h
+++ b/reg-lib/cpu/_reg_mind.h
@@ -16,7 +16,7 @@
 #include "_reg_globalTrans.h"
 #include "_reg_resampling.h"
 
-#define MIND_TYPE 0
+#define MIND_TYPE    0
 #define MINDSSC_TYPE 1
 
 /* *************************************************************** */
@@ -66,9 +66,10 @@ class reg_mind: public reg_ssd {
 class reg_mindssc: public reg_mind {
 public:
     /// @brief reg_mind class constructor
-    reg_mindssc();
-    /// @brief Measure class destructor
-    virtual ~reg_mindssc();
+    reg_mindssc() {
+        this->mindType = MINDSSC_TYPE;
+        NR_FUNC_CALLED();
+    }
 };
 /* *************************************************************** */
 void GetMindImageDescriptor(const nifti_image *inputImage,
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index d4b5a277..08493d4a 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -138,31 +138,6 @@ inline void UpdateControlPointPosition(float4 *currentDofCuda,
     });
 }
 /* *************************************************************** */
-template<bool optimiseX, bool optimiseY>
-static inline void UpdateControlPointPosition(float4 *currentDofCuda,
-                                              cudaTextureObject_t bestDofTexture,
-                                              cudaTextureObject_t gradientTexture,
-                                              const size_t nVoxels,
-                                              const float scale,
-                                              const bool optimiseZ) {
-    auto updateControlPointPosition = UpdateControlPointPosition<optimiseX, optimiseY, true>;
-    if (!optimiseZ) updateControlPointPosition = UpdateControlPointPosition<optimiseX, optimiseY, false>;
-    updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale);
-}
-/* *************************************************************** */
-template<bool optimiseX>
-static inline void UpdateControlPointPosition(float4 *currentDofCuda,
-                                              cudaTextureObject_t bestDofTexture,
-                                              cudaTextureObject_t gradientTexture,
-                                              const size_t nVoxels,
-                                              const float scale,
-                                              const bool optimiseY,
-                                              const bool optimiseZ) {
-    auto updateControlPointPosition = UpdateControlPointPosition<optimiseX, true>;
-    if (!optimiseY) updateControlPointPosition = UpdateControlPointPosition<optimiseX, false>;
-    updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale, optimiseZ);
-}
-/* *************************************************************** */
 void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const float *bestDof,
                                              const float *gradient,
@@ -171,15 +146,28 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const bool optimiseY,
                                              const bool optimiseZ) {
     const nifti_image *controlPointGrid = dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid();
-    const bool is3d = controlPointGrid->nz > 1;
+    const bool optZ = optimiseZ && controlPointGrid->nz > 1;
     const size_t nVoxels = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
     auto bestDofTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(bestDof), nVoxels, cudaChannelFormatKindFloat, 4);
     auto gradientTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(gradient), nVoxels, cudaChannelFormatKindFloat, 4);
 
-    auto updateControlPointPosition = ::UpdateControlPointPosition<true>;
-    if (!optimiseX) updateControlPointPosition = ::UpdateControlPointPosition<false>;
-    updateControlPointPosition(reinterpret_cast<float4*>(currentDof), *bestDofTexturePtr, *gradientTexturePtr,
-                               nVoxels, scale, optimiseY, is3d ? optimiseZ : false);
+    decltype(::UpdateControlPointPosition<true, true, true>) *updateControlPointPosition;
+    if (optimiseX && optimiseY && optZ)
+        updateControlPointPosition = ::UpdateControlPointPosition<true, true, true>;
+    else if (optimiseX && optimiseY)
+        updateControlPointPosition = ::UpdateControlPointPosition<true, true, false>;
+    else if (optimiseX && optZ)
+        updateControlPointPosition = ::UpdateControlPointPosition<true, false, true>;
+    else if (optimiseY && optZ)
+        updateControlPointPosition = ::UpdateControlPointPosition<false, true, true>;
+    else if (optimiseX)
+        updateControlPointPosition = ::UpdateControlPointPosition<true, false, false>;
+    else if (optimiseY)
+        updateControlPointPosition = ::UpdateControlPointPosition<false, true, false>;
+    else if (optZ)
+        updateControlPointPosition = ::UpdateControlPointPosition<false, false, true>;
+    else return;
+    updateControlPointPosition(reinterpret_cast<float4*>(currentDof), *bestDofTexturePtr, *gradientTexturePtr, nVoxels, scale);
 }
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
index 1357e060..a2cbbd99 100644
--- a/third-party/CMakeLists.txt
+++ b/third-party/CMakeLists.txt
@@ -10,7 +10,7 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
   if(result)
     message(FATAL_ERROR "Failed to clone Eigen!")
   endif(result)
-  message(STATUS "Eigen is cloned into ${CMAKE_BINARY_DIR}/third-party/eigen")
+  message(STATUS "Eigen has been cloned into ${CMAKE_BINARY_DIR}/third-party/eigen")
 endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
 #-----------------------------------------------------------------------------
 if(OPENMP_FOUND)

From cee3df58cebb180df28b71c7ead1792398eab31e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 23 Feb 2024 11:34:14 +0000
Subject: [PATCH 299/314] Rename Measure as MeasureCreator

---
 niftyreg_build_version.txt                    |  2 +-
 reg-lib/CMakeLists.txt                        |  2 +-
 reg-lib/{Measure.cpp => MeasureCreator.cpp}   |  6 ++--
 reg-lib/{Measure.h => MeasureCreator.hpp}     |  2 +-
 reg-lib/MeasureCreatorFactory.hpp             |  8 +++++
 reg-lib/MeasureFactory.h                      |  8 -----
 reg-lib/Platform.cpp                          | 12 +++----
 reg-lib/Platform.h                            |  8 ++---
 reg-lib/_reg_base.cpp                         | 36 +++++++++----------
 reg-lib/_reg_base.h                           |  4 +--
 reg-lib/_reg_f3d2.cpp                         | 14 ++++----
 reg-lib/cuda/CMakeLists.txt                   |  2 +-
 ...CudaMeasure.cpp => CudaMeasureCreator.cpp} |  6 ++--
 .../{CudaMeasure.h => CudaMeasureCreator.hpp} |  4 +--
 reg-lib/cuda/CudaMeasureCreatorFactory.hpp    |  8 +++++
 reg-lib/cuda/CudaMeasureFactory.h             |  8 -----
 reg-test/reg_test_lncc.cpp                    |  8 ++---
 reg-test/reg_test_nmi.cpp                     |  8 ++---
 reg-test/reg_test_nmi_gradient.cpp            |  8 ++---
 reg-test/reg_test_regr_measure.cpp            |  6 ++--
 20 files changed, 80 insertions(+), 80 deletions(-)
 rename reg-lib/{Measure.cpp => MeasureCreator.cpp} (89%)
 rename reg-lib/{Measure.h => MeasureCreator.hpp} (93%)
 create mode 100644 reg-lib/MeasureCreatorFactory.hpp
 delete mode 100644 reg-lib/MeasureFactory.h
 rename reg-lib/cuda/{CudaMeasure.cpp => CudaMeasureCreator.cpp} (93%)
 rename reg-lib/cuda/{CudaMeasure.h => CudaMeasureCreator.hpp} (71%)
 create mode 100644 reg-lib/cuda/CudaMeasureCreatorFactory.hpp
 delete mode 100644 reg-lib/cuda/CudaMeasureFactory.h

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 53c86ff4..29aae8ee 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-417
+418
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index e319f92a..ac7a34b1 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -103,7 +103,7 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE}
   F3dContent.cpp
   Optimiser.cpp
   Platform.cpp
-  Measure.cpp
+  MeasureCreator.cpp
 )
 target_link_libraries(_reg_compute _reg_measure)
 install(TARGETS _reg_compute
diff --git a/reg-lib/Measure.cpp b/reg-lib/MeasureCreator.cpp
similarity index 89%
rename from reg-lib/Measure.cpp
rename to reg-lib/MeasureCreator.cpp
index bd586b8b..6ff56f71 100644
--- a/reg-lib/Measure.cpp
+++ b/reg-lib/MeasureCreator.cpp
@@ -1,4 +1,4 @@
-#include "Measure.h"
+#include "MeasureCreator.hpp"
 #include "_reg_nmi.h"
 #include "_reg_ssd.h"
 #include "_reg_dti.h"
@@ -7,7 +7,7 @@
 #include "_reg_mind.h"
 
 /* *************************************************************** */
-reg_measure* Measure::Create(const MeasureType measureType) {
+reg_measure* MeasureCreator::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi();
@@ -29,7 +29,7 @@ reg_measure* Measure::Create(const MeasureType measureType) {
     }
 }
 /* *************************************************************** */
-void Measure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
+void MeasureCreator::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
     measure.InitialiseMeasure(con.GetReference(),
                               con.GetFloating(),
                               con.GetReferenceMask(),
diff --git a/reg-lib/Measure.h b/reg-lib/MeasureCreator.hpp
similarity index 93%
rename from reg-lib/Measure.h
rename to reg-lib/MeasureCreator.hpp
index c20989d7..c0dfde67 100644
--- a/reg-lib/Measure.h
+++ b/reg-lib/MeasureCreator.hpp
@@ -5,7 +5,7 @@
 
 enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc };
 
-class Measure {
+class MeasureCreator {
 public:
     virtual reg_measure* Create(const MeasureType measureType);
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr);
diff --git a/reg-lib/MeasureCreatorFactory.hpp b/reg-lib/MeasureCreatorFactory.hpp
new file mode 100644
index 00000000..d51b6db1
--- /dev/null
+++ b/reg-lib/MeasureCreatorFactory.hpp
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "MeasureCreator.hpp"
+
+class MeasureCreatorFactory {
+public:
+    virtual MeasureCreator* Produce() { return new MeasureCreator(); }
+};
diff --git a/reg-lib/MeasureFactory.h b/reg-lib/MeasureFactory.h
deleted file mode 100644
index 9c1927a9..00000000
--- a/reg-lib/MeasureFactory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include "Measure.h"
-
-class MeasureFactory {
-public:
-    virtual Measure* Produce() { return new Measure(); }
-};
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index e9b6d4ed..482089fa 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -6,7 +6,7 @@
 #include "CudaComputeFactory.h"
 #include "CudaContentCreatorFactory.h"
 #include "CudaKernelFactory.h"
-#include "CudaMeasureFactory.h"
+#include "CudaMeasureCreatorFactory.hpp"
 #include "CudaOptimiser.hpp"
 #endif
 #ifdef USE_OPENCL
@@ -24,7 +24,7 @@ Platform::Platform(const PlatformType platformTypeIn) {
         computeFactory = new ComputeFactory();
         contentCreatorFactory = new ContentCreatorFactory();
         kernelFactory = new CpuKernelFactory();
-        measureFactory = new MeasureFactory();
+        measureCreatorFactory = new MeasureCreatorFactory();
     }
 #ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
@@ -33,7 +33,7 @@ Platform::Platform(const PlatformType platformTypeIn) {
         computeFactory = new CudaComputeFactory();
         contentCreatorFactory = new CudaContentCreatorFactory();
         kernelFactory = new CudaKernelFactory();
-        measureFactory = new CudaMeasureFactory();
+        measureCreatorFactory = new CudaMeasureCreatorFactory();
     }
 #endif
 #ifdef USE_OPENCL
@@ -52,7 +52,7 @@ Platform::~Platform() {
     delete computeFactory;
     delete contentCreatorFactory;
     delete kernelFactory;
-    delete measureFactory;
+    delete measureCreatorFactory;
 }
 /* *************************************************************** */
 std::string Platform::GetName() const {
@@ -104,8 +104,8 @@ Kernel* Platform::CreateKernel(const std::string& name, Content *con) const {
     return kernelFactory->Produce(name, con);
 }
 /* *************************************************************** */
-Measure* Platform::CreateMeasure() const {
-    return measureFactory->Produce();
+MeasureCreator* Platform::CreateMeasureCreator() const {
+    return measureCreatorFactory->Produce();
 }
 /* *************************************************************** */
 template<typename Type>
diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h
index ee82a04e..9e2ca5ad 100755
--- a/reg-lib/Platform.h
+++ b/reg-lib/Platform.h
@@ -4,7 +4,7 @@
 #include "ComputeFactory.h"
 #include "ContentCreatorFactory.h"
 #include "KernelFactory.h"
-#include "MeasureFactory.h"
+#include "MeasureCreatorFactory.hpp"
 #include "Optimiser.hpp"
 
 enum class PlatformType { Cpu, Cuda, OpenCl };
@@ -34,7 +34,7 @@ class Platform {
     Compute* CreateCompute(Content& con) const;
     ContentCreator* CreateContentCreator(const ContentType conType = ContentType::Base) const;
     Kernel* CreateKernel(const std::string& name, Content *con) const;
-    Measure* CreateMeasure() const;
+    MeasureCreator* CreateMeasureCreator() const;
     template<typename Type>
     Optimiser<Type>* CreateOptimiser(F3dContent& con,
                                      InterfaceOptimiser& opt,
@@ -62,8 +62,8 @@ class Platform {
     ComputeFactory *computeFactory = nullptr;
     ContentCreatorFactory *contentCreatorFactory = nullptr;
     KernelFactory *kernelFactory = nullptr;
-    MeasureFactory *measureFactory = nullptr;
+    MeasureCreatorFactory *measureCreatorFactory = nullptr;
     std::string platformName;
     PlatformType platformType;
-    unsigned gpuIdx;
+    unsigned gpuIdx = 0;
 };
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 2190241f..cc5e8be5 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -267,7 +267,7 @@ void reg_base<T>::CheckParameters() {
     // Set the default similarity measure if none has been set
     if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc &&
         !measure_kld && !measure_mind && !measure_mindssc) {
-        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)));
         for (int i = 0; i < inputReference->nt; ++i)
             measure_nmi->SetTimePointWeight(i, 1.0);
     }
@@ -360,25 +360,25 @@ void reg_base<T>::InitialiseSimilarity() {
     DefContent& con = dynamic_cast<DefContent&>(*this->con);
 
     if (measure_nmi)
-        measure->Initialise(*measure_nmi, con);
+        measureCreator->Initialise(*measure_nmi, con);
 
     if (measure_ssd)
-        measure->Initialise(*measure_ssd, con);
+        measureCreator->Initialise(*measure_ssd, con);
 
     if (measure_kld)
-        measure->Initialise(*measure_kld, con);
+        measureCreator->Initialise(*measure_kld, con);
 
     if (measure_lncc)
-        measure->Initialise(*measure_lncc, con);
+        measureCreator->Initialise(*measure_lncc, con);
 
     if (measure_dti)
-        measure->Initialise(*measure_dti, con);
+        measureCreator->Initialise(*measure_dti, con);
 
     if (measure_mind)
-        measure->Initialise(*measure_mind, con);
+        measureCreator->Initialise(*measure_mind, con);
 
     if (measure_mindssc)
-        measure->Initialise(*measure_mindssc, con);
+        measureCreator->Initialise(*measure_mindssc, con);
 
     NR_FUNC_CALLED();
 }
@@ -551,7 +551,7 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::ApproximateParzenWindow()
 //{
 //    if(!measure_nmi)
-//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
+//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)));
 //    measure_nmi=approxParzenWindow = true;
 //}
 ///* *************************************************************** */
@@ -559,14 +559,14 @@ void reg_base<T>::GetVoxelBasedGradient() {
 //void reg_base<T>::DoNotApproximateParzenWindow()
 //{
 //    if(!measure_nmi)
-//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
+//        measure_nmi.reset(dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)));
 //    measure_nmi=approxParzenWindow = false;
 //}
 /* *************************************************************** */
 template<class T>
 void reg_base<T>::UseNMISetReferenceBinNumber(int timePoint, int refBinNumber) {
     if (!measure_nmi)
-        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)));
     measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -577,7 +577,7 @@ void reg_base<T>::UseNMISetReferenceBinNumber(int timePoint, int refBinNumber) {
 template<class T>
 void reg_base<T>::UseNMISetFloatingBinNumber(int timePoint, int floBinNumber) {
     if (!measure_nmi)
-        measure_nmi.reset(dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)));
+        measure_nmi.reset(dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)));
     measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     // I am here adding 4 to the specified bin number to accommodate for
     // the spline support
@@ -588,7 +588,7 @@ void reg_base<T>::UseNMISetFloatingBinNumber(int timePoint, int floBinNumber) {
 template<class T>
 void reg_base<T>::UseSSD(int timePoint, bool normalise) {
     if (!measure_ssd)
-        measure_ssd.reset(dynamic_cast<reg_ssd*>(measure->Create(MeasureType::Ssd)));
+        measure_ssd.reset(dynamic_cast<reg_ssd*>(measureCreator->Create(MeasureType::Ssd)));
     measure_ssd->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     measure_ssd->SetNormaliseTimePoint(timePoint, normalise);
     NR_FUNC_CALLED();
@@ -597,7 +597,7 @@ void reg_base<T>::UseSSD(int timePoint, bool normalise) {
 template<class T>
 void reg_base<T>::UseMIND(int timePoint, int offset) {
     if (!measure_mind)
-        measure_mind.reset(dynamic_cast<reg_mind*>(measure->Create(MeasureType::Mind)));
+        measure_mind.reset(dynamic_cast<reg_mind*>(measureCreator->Create(MeasureType::Mind)));
     measure_mind->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active
     measure_mind->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
@@ -606,7 +606,7 @@ void reg_base<T>::UseMIND(int timePoint, int offset) {
 template<class T>
 void reg_base<T>::UseMINDSSC(int timePoint, int offset) {
     if (!measure_mindssc)
-        measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measure->Create(MeasureType::MindSsc)));
+        measure_mindssc.reset(dynamic_cast<reg_mindssc*>(measureCreator->Create(MeasureType::MindSsc)));
     measure_mindssc->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active
     measure_mindssc->SetDescriptorOffset(offset);
     NR_FUNC_CALLED();
@@ -615,7 +615,7 @@ void reg_base<T>::UseMINDSSC(int timePoint, int offset) {
 template<class T>
 void reg_base<T>::UseKLDivergence(int timePoint) {
     if (!measure_kld)
-        measure_kld.reset(dynamic_cast<reg_kld*>(measure->Create(MeasureType::Kld)));
+        measure_kld.reset(dynamic_cast<reg_kld*>(measureCreator->Create(MeasureType::Kld)));
     measure_kld->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0
     NR_FUNC_CALLED();
 }
@@ -623,7 +623,7 @@ void reg_base<T>::UseKLDivergence(int timePoint) {
 template<class T>
 void reg_base<T>::UseLNCC(int timePoint, float stddev) {
     if (!measure_lncc)
-        measure_lncc.reset(dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)));
+        measure_lncc.reset(dynamic_cast<reg_lncc*>(measureCreator->Create(MeasureType::Lncc)));
     measure_lncc->SetKernelStandardDeviation(timePoint, stddev);
     measure_lncc->SetTimePointWeight(timePoint, 1.0); // weight initially set to default value of 1.0
     NR_FUNC_CALLED();
@@ -642,7 +642,7 @@ void reg_base<T>::UseDTI(bool *timePoint) {
     NR_FATAL_ERROR("The use of DTI has been deactivated as it requires some refactoring");
 
     if (!measure_dti)
-        measure_dti.reset(dynamic_cast<reg_dti*>(measure->Create(MeasureType::Dti)));
+        measure_dti.reset(dynamic_cast<reg_dti*>(measureCreator->Create(MeasureType::Dti)));
     for (int i = 0; i < inputReference->nt; ++i) {
         if (timePoint[i])
             measure_dti->SetTimePointWeight(i, 1.0);  // weight set to 1.0 to indicate time point is active
diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h
index 3b4b91c3..26995020 100644
--- a/reg-lib/_reg_base.h
+++ b/reg-lib/_reg_base.h
@@ -42,7 +42,7 @@ class reg_base: public InterfaceOptimiser {
     unique_ptr<Compute> compute;
 
     // Measure
-    unique_ptr<Measure> measure;
+    unique_ptr<MeasureCreator> measureCreator;
 
     // Optimiser-related variables
     unique_ptr<Optimiser<T>> optimiser;
@@ -143,7 +143,7 @@ class reg_base: public InterfaceOptimiser {
     // Platform
     virtual void SetPlatformType(const PlatformType platformType) {
         platform.reset(new Platform(platformType));
-        measure.reset(platform->CreateMeasure());
+        measureCreator.reset(platform->CreateMeasureCreator());
     }
     virtual void SetGpuIdx(const unsigned gpuIdx) { platform->SetGpuIdx(gpuIdx); }
 
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index c994a471..eaa7a6f0 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -489,25 +489,25 @@ void reg_f3d2<T>::InitialiseSimilarity() {
     F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
 
     if (this->measure_nmi)
-        this->measure->Initialise(*this->measure_nmi, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_nmi, con, conBw.get());
 
     if (this->measure_ssd)
-        this->measure->Initialise(*this->measure_ssd, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_ssd, con, conBw.get());
 
     if (this->measure_kld)
-        this->measure->Initialise(*this->measure_kld, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_kld, con, conBw.get());
 
     if (this->measure_lncc)
-        this->measure->Initialise(*this->measure_lncc, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_lncc, con, conBw.get());
 
     if (this->measure_dti)
-        this->measure->Initialise(*this->measure_dti, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_dti, con, conBw.get());
 
     if (this->measure_mind)
-        this->measure->Initialise(*this->measure_mind, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_mind, con, conBw.get());
 
     if (this->measure_mindssc)
-        this->measure->Initialise(*this->measure_mindssc, con, conBw.get());
+        this->measureCreator->Initialise(*this->measure_mindssc, con, conBw.get());
 
     NR_FUNC_CALLED();
 }
diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt
index 9685b0b6..ad3e7c46 100755
--- a/reg-lib/cuda/CMakeLists.txt
+++ b/reg-lib/cuda/CMakeLists.txt
@@ -76,7 +76,7 @@ add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
     CudaKernelFactory.cpp
     CudaLocalTransformation.cu
     CudaLtsKernel.cpp
-    CudaMeasure.cpp
+    CudaMeasureCreator.cpp
     CudaNormaliseGradient.cu
     CudaOptimiser.cu
     CudaResampleImageKernel.cpp
diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasureCreator.cpp
similarity index 93%
rename from reg-lib/cuda/CudaMeasure.cpp
rename to reg-lib/cuda/CudaMeasureCreator.cpp
index 793aa61a..3795297d 100644
--- a/reg-lib/cuda/CudaMeasure.cpp
+++ b/reg-lib/cuda/CudaMeasureCreator.cpp
@@ -1,10 +1,10 @@
-#include "CudaMeasure.h"
+#include "CudaMeasureCreator.hpp"
 #include "CudaDefContent.h"
 #include "_reg_nmi_gpu.h"
 #include "_reg_ssd_gpu.h"
 
 /* *************************************************************** */
-reg_measure* CudaMeasure::Create(const MeasureType measureType) {
+reg_measure* CudaMeasureCreator::Create(const MeasureType measureType) {
     switch (measureType) {
     case MeasureType::Nmi:
         return new reg_nmi_gpu();
@@ -26,7 +26,7 @@ reg_measure* CudaMeasure::Create(const MeasureType measureType) {
     }
 }
 /* *************************************************************** */
-void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
+void CudaMeasureCreator::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) {
     reg_measure_gpu& measureGpu = dynamic_cast<reg_measure_gpu&>(measure);
     CudaDefContent& cudaCon = dynamic_cast<CudaDefContent&>(con);
     CudaDefContent *cudaConBw = dynamic_cast<CudaDefContent*>(conBw);
diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasureCreator.hpp
similarity index 71%
rename from reg-lib/cuda/CudaMeasure.h
rename to reg-lib/cuda/CudaMeasureCreator.hpp
index 76f73900..368914ef 100644
--- a/reg-lib/cuda/CudaMeasure.h
+++ b/reg-lib/cuda/CudaMeasureCreator.hpp
@@ -1,8 +1,8 @@
 #pragma once
 
-#include "Measure.h"
+#include "MeasureCreator.hpp"
 
-class CudaMeasure: public Measure {
+class CudaMeasureCreator: public MeasureCreator {
 public:
     virtual reg_measure* Create(const MeasureType measureType) override;
     virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override;
diff --git a/reg-lib/cuda/CudaMeasureCreatorFactory.hpp b/reg-lib/cuda/CudaMeasureCreatorFactory.hpp
new file mode 100644
index 00000000..d971855b
--- /dev/null
+++ b/reg-lib/cuda/CudaMeasureCreatorFactory.hpp
@@ -0,0 +1,8 @@
+#pragma once
+
+#include "CudaMeasureCreator.hpp"
+
+class CudaMeasureCreatorFactory: public MeasureCreatorFactory {
+public:
+    virtual MeasureCreator* Produce() override { return new CudaMeasureCreator(); }
+};
diff --git a/reg-lib/cuda/CudaMeasureFactory.h b/reg-lib/cuda/CudaMeasureFactory.h
deleted file mode 100644
index 58061a23..00000000
--- a/reg-lib/cuda/CudaMeasureFactory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#pragma once
-
-#include "CudaMeasure.h"
-
-class CudaMeasureFactory: public MeasureFactory {
-public:
-    virtual Measure* Produce() override { return new CudaMeasure(); }
-};
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index 528a1642..e1bcd0ad 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -151,13 +151,13 @@ class LnccTest {
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(0, 0);
                 content->SetWarped(floating.disown());
-                // Create the measure
-                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Create the measure creator
+                unique_ptr<MeasureCreator> measureCreator{ platform->CreateMeasureCreator() };
                 // Use LNCC as a measure
-                unique_ptr<reg_lncc> measure_lncc{ dynamic_cast<reg_lncc*>(measure->Create(MeasureType::Lncc)) };
+                unique_ptr<reg_lncc> measure_lncc{ dynamic_cast<reg_lncc*>(measureCreator->Create(MeasureType::Lncc)) };
                 measure_lncc->SetKernelStandardDeviation(0, sigma);
                 measure_lncc->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
-                measure->Initialise(*measure_lncc, *content);
+                measureCreator->Initialise(*measure_lncc, *content);
                 const double lncc = measure_lncc->GetSimilarityMeasureValue();
                 // Save for testing
                 testCases.push_back({ testName, lncc, expLncc });
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index 12941952..d3a2770e 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -82,12 +82,12 @@ class NmiTest {
                 unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Initialise the warped image using floating image
                 content->SetWarped(floating.disown());
-                // Create the measure
-                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Create the measure creator
+                unique_ptr<MeasureCreator> measureCreator{ platform->CreateMeasureCreator() };
                 // Use NMI as a measure
-                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
+                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)) };
                 measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
-                measure->Initialise(*measure_nmi, *content);
+                measureCreator->Initialise(*measure_nmi, *content);
                 const double nmi = measure_nmi->GetSimilarityMeasureValue();
 
                 testCases.push_back({ testName + " " + platform->GetName(), nmi, expected });
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index fdb769ba..5342e1b1 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -90,14 +90,14 @@ class NmiGradientTest {
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(1, padding);
                 compute->GetImageGradient(1, padding, 0);
-                // Create the measure
-                unique_ptr<Measure> measure{ platform->CreateMeasure() };
+                // Create the measure creator
+                unique_ptr<MeasureCreator> measureCreator{ platform->CreateMeasureCreator() };
                 // Use NMI as a measure
-                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measure->Create(MeasureType::Nmi)) };
+                unique_ptr<reg_nmi> measure_nmi{ dynamic_cast<reg_nmi*>(measureCreator->Create(MeasureType::Nmi)) };
                 measure_nmi->DoNotApproximatePw();
                 measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0);
-                measure->Initialise(*measure_nmi, *content);
+                measureCreator->Initialise(*measure_nmi, *content);
                 // Compute the NMI gradient
                 measure_nmi->GetVoxelBasedSimilarityMeasureGradient(0);
                 // Create an image to store the gradient values
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 6bcdf88e..08b25515 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -1,7 +1,7 @@
 #include "reg_test_common.h"
 #include "_reg_nmi.h"
 #include "CudaF3dContent.h"
-#include "CudaMeasure.h"
+#include "CudaMeasureCreator.hpp"
 
 /**
  *  Measure regression tests to ensure the CPU and CUDA versions yield the same output
@@ -94,8 +94,8 @@ class MeasureTest {
         Platform platformCuda(PlatformType::Cuda);
 
         // Create the measures
-        unique_ptr<Measure> measureCreatorCpu{ new Measure() };
-        unique_ptr<Measure> measureCreatorCuda{ new CudaMeasure() };
+        unique_ptr<MeasureCreator> measureCreatorCpu{ new MeasureCreator() };
+        unique_ptr<MeasureCreator> measureCreatorCuda{ new CudaMeasureCreator() };
 
         // Create the content creators
         unique_ptr<F3d2ContentCreator> contentCreatorCpu{ dynamic_cast<F3d2ContentCreator*>(platformCpu.CreateContentCreator(ContentType::F3d2)) };

From e568f9574fc5da690ec5616b2167c4f81b05a4c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 5 Mar 2024 13:52:49 +0000
Subject: [PATCH 300/314] Refactor _reg_maths* and _reg_common_cuda_kernels

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_average.cpp                      |  21 +-
 reg-apps/reg_benchmark.cpp                    |   2 +-
 reg-apps/reg_gpuinfo.cpp                      |   2 +-
 reg-apps/reg_jacobian.cpp                     |   2 +-
 reg-apps/reg_resample.cpp                     |   2 +-
 reg-apps/reg_tools.cpp                        |   4 +-
 reg-apps/reg_transform.cpp                    |  35 +-
 reg-io/_reg_ReadWriteMatrix.cpp               |  10 +-
 reg-io/nrrd/reg_nrrd.cpp                      |   4 +-
 reg-lib/CMakeLists.txt                        |   3 +-
 reg-lib/Debug.hpp                             |   4 +-
 reg-lib/_reg_aladin.cpp                       |  10 +-
 reg-lib/_reg_aladin_sym.cpp                   |  11 +-
 reg-lib/_reg_base.cpp                         |   4 +-
 reg-lib/cl/ClAffineDeformationFieldKernel.cpp |   2 +-
 reg-lib/cpu/{_reg_maths.cpp => Maths.cpp}     | 559 +++++++-----------
 reg-lib/cpu/Maths.hpp                         | 363 ++++++++++++
 reg-lib/cpu/_reg_blockMatching.cpp            |  14 +-
 reg-lib/cpu/_reg_blockMatching.h              |   2 +-
 reg-lib/cpu/_reg_globalTrans.cpp              | 206 ++++---
 reg-lib/cpu/_reg_localTrans.cpp               |  77 ++-
 reg-lib/cpu/_reg_localTrans_jac.cpp           |  72 ++-
 reg-lib/cpu/_reg_localTrans_regul.cpp         |  23 +-
 reg-lib/cpu/_reg_maths.h                      | 200 -------
 reg-lib/cpu/_reg_maths_eigen.cpp              | 205 -------
 reg-lib/cpu/_reg_maths_eigen.h                |  36 --
 reg-lib/cpu/_reg_resampling.cpp               |  45 +-
 reg-lib/cpu/_reg_ssd.cpp                      |   4 +-
 reg-lib/cpu/_reg_tools.cpp                    |   8 +-
 reg-lib/cpu/_reg_tools.h                      |   2 +-
 reg-lib/cuda/CudaCommon.hpp                   |  16 +
 reg-lib/cuda/CudaGlobalTransformation.cu      |   7 +-
 reg-lib/cuda/CudaLocalTransformation.cu       |  20 +-
 .../cuda/CudaLocalTransformationKernels.cu    | 147 ++++-
 reg-lib/cuda/CudaOptimiser.cu                 |   1 -
 reg-lib/cuda/CudaResampling.cu                |  15 +-
 reg-lib/cuda/CudaTools.cu                     |  10 +-
 reg-lib/cuda/CudaToolsKernels.cu              |   8 +-
 reg-lib/cuda/_reg_common_cuda_kernels.cu      | 157 -----
 reg-lib/cuda/_reg_nmi_gpu.cu                  |   1 -
 reg-lib/cuda/affineDeformationKernel.cu       |   2 +-
 reg-lib/cuda/blockMatchingKernel.cu           |   2 +-
 reg-lib/cuda/resampleKernel.cu                |  14 +-
 reg-test/reg_test_affineDeformationField.cpp  |   6 +-
 reg-test/reg_test_be.cpp                      |   4 +-
 reg-test/reg_test_blockMatching.cpp           |   2 +-
 reg-test/reg_test_regr_lts.cpp                |   4 +-
 .../reg_test_voxelCentricToNodeCentric.cpp    |  12 +-
 49 files changed, 1026 insertions(+), 1336 deletions(-)
 rename reg-lib/cpu/{_reg_maths.cpp => Maths.cpp} (50%)
 create mode 100644 reg-lib/cpu/Maths.hpp
 delete mode 100644 reg-lib/cpu/_reg_maths.h
 delete mode 100644 reg-lib/cpu/_reg_maths_eigen.cpp
 delete mode 100644 reg-lib/cpu/_reg_maths_eigen.h
 delete mode 100644 reg-lib/cuda/_reg_common_cuda_kernels.cu

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 29aae8ee..7b53aa00 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-418
+419
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 0b57a922..07446e4d 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -18,7 +18,6 @@
 #include "_reg_resampling.h"
 #include "_reg_globalTrans.h"
 #include "_reg_localTrans.h"
-#include "_reg_maths_eigen.h"
 
 using PrecisionType = float;
 
@@ -112,7 +111,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
    // Input matrices are logged in place
    for(size_t m=0; m<matrixNumber; ++m)
    {
-      matrices[m] = reg_mat44_logm(&matrices[m]);
+      matrices[m] = Mat44Logm(&matrices[m]);
       matrixWeight[m]=1.;
    }
    // The number of iteration to perform is defined based on the use of lts
@@ -196,7 +195,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
             matrixIndexSorted[m]=m;
          }
          // Sort the computed distances
-         reg_heapSort(matrixWeight, matrixIndexSorted, matrixNumber);
+         HeapSort(matrixWeight, matrixIndexSorted, matrixNumber);
          // Re-assign the weights for the next iteration
          memset(matrixWeight, 0, matrixNumber*sizeof(float));
          for(size_t m=matrixNumber-1; m>lts_inlier * matrixNumber; --m)
@@ -205,7 +204,7 @@ mat44 compute_average_matrices(size_t matrixNumber,
          }
       }
       // The average matrix is exponentiated
-      average_matrix = reg_mat44_expm(&average_matrix);
+      average_matrix = Mat44Expm(&average_matrix);
    } // iteration number
    // Free the allocated array
    free(matrixWeight);
@@ -230,15 +229,15 @@ mat44 compute_affine_demean(size_t matrixNumber,
       tempMatrix=nifti_quatern_to_mat44(qb,qc,qd,qx,qy,qz,1.f,1.f,1.f,qfac);
       // remove the rigid componenent from the affine matrix
       tempMatrix=nifti_mat44_inverse(tempMatrix);
-      tempMatrix=reg_mat44_mul(&tempMatrix,&current_affine);
+      tempMatrix=tempMatrix * current_affine;
       // sum up all the affine matrices
-      tempMatrix = reg_mat44_logm(&tempMatrix);
+      tempMatrix = Mat44Logm(&tempMatrix);
       demeanMatrix = demeanMatrix + tempMatrix;
    }
    // The average matrix is normalised
-   demeanMatrix = reg_mat44_mul(&demeanMatrix,1.f/(float)matrixNumber);
+   demeanMatrix = demeanMatrix * (1.f / (float)matrixNumber);
    // The average matrix is exponentiated
-   demeanMatrix = reg_mat44_expm(&demeanMatrix);
+   demeanMatrix = Mat44Expm(&demeanMatrix);
    // The average matrix is inverted
    demeanMatrix = nifti_mat44_inverse(demeanMatrix);
    return demeanMatrix;
@@ -293,11 +292,7 @@ int compute_nrr_demean(nifti_image *demean_field,
             affineTransformation=*reinterpret_cast<mat44 *>(transformation->ext_list[0].edata);
             // Note that if the transformation is a flow field, only half-of the affine has be used
             if(transformation->num_ext>1 && deformationField->intent_p1!=DEF_VEL_FIELD)
-            {
-               affineTransformation=reg_mat44_mul(
-                                       reinterpret_cast<mat44 *>(transformation->ext_list[1].edata),
-                                       &affineTransformation);
-            }
+               affineTransformation=reinterpret_cast<mat44&>(*transformation->ext_list[1].edata) * affineTransformation;
          }
          else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]);
          // The affine component is substracted
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index 828b050e..dd439f62 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -153,7 +153,7 @@ int main(int argc, char **argv)
    originIndex[0] = -1.0f;
    originIndex[1] = -1.0f;
    originIndex[2] = -1.0f;
-   reg_mat44_mul(&(controlPointImage->qto_xyz), originIndex, originReal);
+   Mat44Mul(controlPointImage->qto_xyz, originIndex, originReal);
    controlPointImage->qto_xyz.m[0][3] = controlPointImage->qoffset_x = originReal[0];
    controlPointImage->qto_xyz.m[1][3] = controlPointImage->qoffset_y = originReal[1];
    controlPointImage->qto_xyz.m[2][3] = controlPointImage->qoffset_z = originReal[2];
diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp
index d4858ead..3f051047 100644
--- a/reg-apps/reg_gpuinfo.cpp
+++ b/reg-apps/reg_gpuinfo.cpp
@@ -1,4 +1,4 @@
-#include "_reg_maths.h"
+#include "Maths.hpp"
 #include "Platform.h"
 
 #ifdef USE_CUDA
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index 27b517bf..06507407 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -233,7 +233,7 @@ int main(int argc, char **argv)
       if(!reg_isAnImageFileName(param->inputTransName)){
          mat44 *affineTransformation=(mat44 *)malloc(sizeof(mat44));
          reg_tool_ReadAffineFile(affineTransformation,param->inputTransName);
-         NR_COUT << reg_mat44_det<double>(affineTransformation) << std::endl;
+         NR_COUT << Mat44Det<double>(affineTransformation) << std::endl;
          return EXIT_SUCCESS;
       }
 
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index e2fe543d..9ab79df2 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -307,7 +307,7 @@ int main(int argc, char **argv)
    else
    {
       // No transformation is specified, an identity transformation is used
-      reg_mat44_eye(&inputAffineTransformation);
+      Mat44Eye(&inputAffineTransformation);
    }
 
    // Create a deformation field
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 70ff5741..76b55ba5 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -498,7 +498,7 @@ int main(int argc, char **argv)
     {
         reg_tools_changeDatatype<float>(image);
         nifti_image *normImage = nifti_dup(*image);
-        reg_heapSort(static_cast<float *>(normImage->data), normImage->nvox);
+        HeapSort(static_cast<float *>(normImage->data), normImage->nvox);
         float minValue = static_cast<float *>(normImage->data)[Floor(03*(int)normImage->nvox/100)];
         float maxValue = static_cast<float *>(normImage->data)[Floor(97*(int)normImage->nvox/100)];
         reg_tools_subtractValueFromImage(image,normImage,minValue);
@@ -892,7 +892,7 @@ int main(int argc, char **argv)
         const size_t jacobianVoxelNumber = NiftiImage::calcVoxelNumber(def, 3);
         mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33));
         for (size_t i = 0; i < jacobianVoxelNumber; ++i)
-            reg_mat33_eye(&jacobian[i]);
+            Mat33Eye(&jacobian[i]);
         // resample the original image into the space of the new image
         if(flag->interpFlag == 0){
             param->interpOrder = 3;
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 4cf0bfe5..ba427d31 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -16,7 +16,6 @@
 #include "_reg_globalTrans.h"
 #include "_reg_localTrans.h"
 #include "_reg_tools.h"
-#include "_reg_maths_eigen.h"
 
 #include "reg_transform.h"
 
@@ -532,7 +531,7 @@ int main(int argc, char **argv) {
         if (affine1Trans != nullptr && affine2Trans != nullptr) {
             NR_INFO("Transformation 2 is an affine parametrisation:");
             NR_INFO(param->input2TransName);
-            *affine1Trans = reg_mat44_mul(affine2Trans, affine1Trans);
+            *affine1Trans = *affine2Trans * *affine1Trans;
             reg_tool_WriteAffineFile(affine1Trans, param->outputTransName);
         } else {
             // Check if the reference image is required
@@ -955,10 +954,10 @@ int main(int argc, char **argv) {
 
         // Update the sform
         if (image->sform_code > 0) {
-            image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz));
+            image->sto_xyz = *affineTransformation * image->sto_xyz;
         } else {
             image->sform_code = 1;
-            image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz));
+            image->sto_xyz = *affineTransformation * image->qto_xyz;
         }
         image->sto_ijk = nifti_mat44_inverse(image->sto_xyz);
 
@@ -980,9 +979,9 @@ int main(int argc, char **argv) {
             affineTrans = (mat44 *)malloc(sizeof(mat44));
             reg_tool_ReadAffineFile(affineTrans, param->inputTransName);
             // The affine transformation is halfed
-            *affineTrans = reg_mat44_logm(affineTrans);
-            *affineTrans = reg_mat44_mul(affineTrans, 0.5);
-            *affineTrans = reg_mat44_expm(affineTrans);
+            *affineTrans = Mat44Logm(affineTrans);
+            *affineTrans = *affineTrans * 0.5;
+            *affineTrans = Mat44Expm(affineTrans);
             // The affine transformation is saved
             reg_tool_WriteAffineFile(affineTrans, param->outputTransName);
         } else {
@@ -1183,17 +1182,17 @@ int main(int argc, char **argv) {
     if (flag->makeAffFlag) {
         // Create all the required matrices
         mat44 rotationX;
-        reg_mat44_eye(&rotationX);
+        Mat44Eye(&rotationX);
         mat44 translation;
-        reg_mat44_eye(&translation);
+        Mat44Eye(&translation);
         mat44 rotationY;
-        reg_mat44_eye(&rotationY);
+        Mat44Eye(&rotationY);
         mat44 rotationZ;
-        reg_mat44_eye(&rotationZ);
+        Mat44Eye(&rotationZ);
         mat44 scaling;
-        reg_mat44_eye(&scaling);
+        Mat44Eye(&scaling);
         mat44 shearing;
-        reg_mat44_eye(&shearing);
+        Mat44Eye(&shearing);
         // Set up the rotation matrix along the YZ plane
         rotationX.m[1][1] = cosf(param->affTransParam[0]);
         rotationX.m[1][2] = -sinf(param->affTransParam[0]);
@@ -1222,11 +1221,11 @@ int main(int argc, char **argv) {
         shearing.m[2][0] = param->affTransParam[10];
         shearing.m[2][1] = param->affTransParam[11];
         // Combine all the transformations
-        mat44 affine = reg_mat44_mul(&rotationY, &rotationZ);
-        affine = reg_mat44_mul(&rotationX, &affine);
-        affine = reg_mat44_mul(&scaling, &affine);
-        affine = reg_mat44_mul(&shearing, &affine);
-        affine = reg_mat44_mul(&translation, &affine);
+        mat44 affine = rotationY * rotationZ;
+        affine = rotationX * affine;
+        affine = scaling * affine;
+        affine = shearing * affine;
+        affine = translation * affine;
         // Save the new matrix
         reg_tool_WriteAffineFile(&affine, param->outputTransName);
     }
diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp
index 8b399680..e37fc34c 100644
--- a/reg-io/_reg_ReadWriteMatrix.cpp
+++ b/reg-io/_reg_ReadWriteMatrix.cpp
@@ -70,11 +70,11 @@ void reg_tool_ReadAffineFile(mat44 *mat,
         absoluteFloating = nifti_mat44_inverse(absoluteFloating);
         *mat = nifti_mat44_inverse(*mat);
 
-        *mat = reg_mat44_mul(&absoluteFloating, mat);
-        *mat = reg_mat44_mul(mat, &absoluteReference);
-        *mat = reg_mat44_mul(floatingMatrix, mat);
+        *mat = absoluteFloating * *mat;
+        *mat = *mat * absoluteReference;
+        *mat = *floatingMatrix * *mat;
         mat44 tmp = nifti_mat44_inverse(*referenceMatrix);
-        *mat = reg_mat44_mul(mat, &tmp);
+        *mat = *mat * tmp;
     }
 
     NR_MAT44_DEBUG(*mat, "Affine matrix");
@@ -168,7 +168,7 @@ template<class T>
 T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) {
     //THEN CONSTRUCT THE MATRIX
     // Allocate the matrices
-    T** mat = reg_matrix2DAllocate<T>(nbLine, nbColumn);
+    T** mat = Matrix2dAlloc<T>(nbLine, nbColumn);
     //STORE THE VALUES
     std::string line;
     std::ifstream matrixFile(filename);
diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp
index 225d6f11..9462a634 100644
--- a/reg-io/nrrd/reg_nrrd.cpp
+++ b/reg-io/nrrd/reg_nrrd.cpp
@@ -167,7 +167,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
 
    // The space orientation is extracted and converted into a matrix
    mat44 qform_orientation_matrix;
-   reg_mat44_eye(&qform_orientation_matrix);
+   Mat44Eye(&qform_orientation_matrix);
    if(nrrdImage->space==nrrdSpaceRightAnteriorSuperior ||
          nrrdImage->space==nrrdSpaceRightAnteriorSuperiorTime ||
          nrrdImage->space==nrrdSpace3DRightHanded ||
@@ -251,7 +251,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage)
    if(nrrdImage->axis[1].spaceDirection[0]!=std::numeric_limits<double>::quiet_NaN())
    {
       niiImage->sform_code=1;
-      reg_mat44_eye(&niiImage->sto_xyz);
+      Mat44Eye(&niiImage->sto_xyz);
       for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i)
       {
          for(int j=0; j<(niiImage->ndim<3?niiImage->ndim:3); ++j)
diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt
index ac7a34b1..039e26ee 100755
--- a/reg-lib/CMakeLists.txt
+++ b/reg-lib/CMakeLists.txt
@@ -12,8 +12,7 @@ endif(USE_OPENCL)
 ##BUILD THE CPU LIBRARIES
 #-----------------------------------------------------------------------------
 add_library(_reg_maths ${NIFTYREG_LIBRARY_TYPE}
-  cpu/_reg_maths.cpp
-  cpu/_reg_maths_eigen.cpp
+  cpu/Maths.cpp
 )
 install(TARGETS _reg_maths
   RUNTIME DESTINATION bin
diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp
index 93e452eb..826b13ed 100644
--- a/reg-lib/Debug.hpp
+++ b/reg-lib/Debug.hpp
@@ -68,11 +68,11 @@ inline std::string StripFunctionName(const std::string& funcName) {
 #define NR_INFO(msg)        NR_COUT << "[NiftyReg INFO] " << msg << std::endl
 /* *************************************************************** */
 #ifndef NDEBUG
-#define NR_MAT44(mat, title)          reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title))
+#define NR_MAT44(mat, title)          Mat44Disp(mat, "[NiftyReg DEBUG] "s + (title))
 #define NR_MAT44_DEBUG(mat, title)    NR_MAT44(mat, title)
 #define NR_MAT44_VERBOSE(mat, title)  NR_MAT44(mat, title)
 #else
-#define NR_MAT44(mat, title)          reg_mat44_disp(mat, title)
+#define NR_MAT44(mat, title)          Mat44Disp(mat, title)
 #define NR_MAT44_DEBUG(mat, title)
 #define NR_MAT44_VERBOSE(mat, title)  if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title))
 #endif
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 35b5a2dd..032aeb97 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -177,9 +177,9 @@ void reg_aladin<T>::InitialiseRegistration() {
             referenceCenter[2] = (float)(this->inputReference->nz) / 2.0f;
             //From pixel coordinates to real coordinates
             float floatingRealPosition[3];
-            reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition);
+            Mat44Mul(*floatingMatrix, floatingCenter, floatingRealPosition);
             float referenceRealPosition[3];
-            reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition);
+            Mat44Mul(*referenceMatrix, referenceCenter, referenceRealPosition);
             //Set translation to the transformation matrix
             this->affineTransformation->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0];
             this->affineTransformation->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1];
@@ -207,7 +207,7 @@ void reg_aladin<T>::InitialiseRegistration() {
             referenceCentre[2] /= referenceCount;
             float refCOM[3]{};
             if (this->inputReference->sform_code > 0)
-                reg_mat44_mul(&this->inputReference->sto_xyz, referenceCentre, refCOM);
+                Mat44Mul(this->inputReference->sto_xyz, referenceCentre, refCOM);
 
             float floatingCentre[3] = { 0, 0, 0 };
             float floatingCount = 0;
@@ -231,8 +231,8 @@ void reg_aladin<T>::InitialiseRegistration() {
             floatingCentre[2] /= floatingCount;
             float floCOM[3]{};
             if (this->inputFloating->sform_code > 0)
-                reg_mat44_mul(&this->inputFloating->sto_xyz, floatingCentre, floCOM);
-            reg_mat44_eye(this->affineTransformation.get());
+                Mat44Mul(this->inputFloating->sto_xyz, floatingCentre, floCOM);
+            Mat44Eye(this->affineTransformation.get());
             this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0];
             this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1];
             this->affineTransformation->m[2][3] = floCOM[2] - refCOM[2];
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 610405bd..62cdd753 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -1,5 +1,4 @@
 #include "_reg_aladin_sym.h"
-#include "_reg_maths_eigen.h"
 
 /* *************************************************************** */
 template <class T>
@@ -81,7 +80,7 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         referenceCentre[2] /= referenceCount;
         float refCOG[3]{};
         if (this->inputReference->sform_code > 0)
-            reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG);
+            Mat44Mul(this->inputReference->sto_xyz, referenceCentre, refCOG);
 
         float floatingCentre[3] = { 0, 0, 0 };
         float floatingCount = 0;
@@ -106,8 +105,8 @@ void reg_aladin_sym<T>::InitialiseRegistration() {
         floatingCentre[2] /= floatingCount;
         float floCOG[3]{};
         if (this->inputFloating->sform_code > 0)
-            reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG);
-        reg_mat44_eye(this->affineTransformation.get());
+            Mat44Mul(this->inputFloating->sto_xyz, floatingCentre, floCOG);
+        Mat44Eye(this->affineTransformation.get());
         this->affineTransformation->m[0][3] = floCOG[0] - refCOG[0];
         this->affineTransformation->m[1][3] = floCOG[1] - refCOG[1];
         this->affineTransformation->m[2][3] = floCOG[2] - refCOG[2];
@@ -143,9 +142,9 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
     mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw);
 
     // We average the forward and inverted backward matrix
-    *this->affineTransformation = reg_mat44_avg2(this->affineTransformation.get(), &bInverted);
+    *this->affineTransformation = Mat44Avg2(this->affineTransformation.get(), &bInverted);
     // We average the inverted forward and backward matrix
-    *this->affineTransformationBw = reg_mat44_avg2(&fInverted, this->affineTransformationBw.get());
+    *this->affineTransformationBw = Mat44Avg2(&fInverted, this->affineTransformationBw.get());
     for (int i = 0; i < 3; ++i) {
         this->affineTransformation->m[3][i] = 0.f;
         this->affineTransformationBw->m[3][i] = 0.f;
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index cc5e8be5..4eb441ef 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -402,7 +402,7 @@ void reg_base<T>::Initialise() {
         reg_tools_changeDatatype<T>(tmpReference);
         // Extract the robust range of the reference image
         T *refDataPtr = static_cast<T *>(tmpReference->data);
-        reg_heapSort(refDataPtr, tmpReference->nvox);
+        HeapSort(refDataPtr, tmpReference->nvox);
         // Update the reference threshold values if no value has been setup by the user
         if (referenceThresholdLow[0] == std::numeric_limits<T>::lowest())
             referenceThresholdLow[0] = refDataPtr[Round((float)tmpReference->nvox * 0.02f)];
@@ -414,7 +414,7 @@ void reg_base<T>::Initialise() {
         reg_tools_changeDatatype<T>(tmpFloating);
         // Extract the robust range of the floating image
         T *floDataPtr = static_cast<T *>(tmpFloating->data);
-        reg_heapSort(floDataPtr, tmpFloating->nvox);
+        HeapSort(floDataPtr, tmpFloating->nvox);
         // Update the floating threshold values if no value has been setup by the user
         if (floatingThresholdLow[0] == std::numeric_limits<T>::lowest())
             floatingThresholdLow[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.02f)];
diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
index 073fcaa6..8314a51f 100644
--- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
+++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp
@@ -89,7 +89,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) {
     const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads};
     const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads};
 
-    mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix);
+    mat44 transformationMatrix = compose ? *affineTransformation : *affineTransformation * *referenceMatrix;
 
     float* trans = (float *)malloc(16 * sizeof(float));
     mat44ToCptr(transformationMatrix, trans);
diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/Maths.cpp
similarity index 50%
rename from reg-lib/cpu/_reg_maths.cpp
rename to reg-lib/cpu/Maths.cpp
index 19ed9210..6abb2f72 100644
--- a/reg-lib/cpu/_reg_maths.cpp
+++ b/reg-lib/cpu/Maths.cpp
@@ -1,86 +1,29 @@
-#include "_reg_tools.h"
+#define USE_EIGEN
 
-#define mat(i,j,dim) mat[i*dim+j]
+#include "_reg_tools.h"
+#include "Eigen/Core"
+#include "unsupported/Eigen/MatrixFunctions"
 
 /* *************************************************************** */
-template<class T>
-T* reg_matrix1DAllocate(size_t arraySize) {
-    T* res = (T*)malloc(arraySize * sizeof(T));
-    return res;
-}
-template bool* reg_matrix1DAllocate<bool>(size_t arraySize);
-template float* reg_matrix1DAllocate<float>(size_t arraySize);
-template double* reg_matrix1DAllocate<double>(size_t arraySize);
-/* *************************************************************** */
-template<class T>
-void reg_matrix1DDeallocate(T* mat) {
-    free(mat);
-}
-template void reg_matrix1DDeallocate<bool>(bool* mat);
-template void reg_matrix1DDeallocate<float>(float* mat);
-template void reg_matrix1DDeallocate<double>(double* mat);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY) {
-    T** res;
-    res = (T**)malloc(arraySizeX * sizeof(T*));
-    for (size_t i = 0; i < arraySizeX; i++) {
-        res[i] = (T*)malloc(arraySizeY * sizeof(T));
-    }
-    return res;
-}
-template float** reg_matrix2DAllocate<float>(size_t arraySizeX, size_t arraySizeY);
-template double** reg_matrix2DAllocate<double>(size_t arraySizeX, size_t arraySizeY);
+namespace NiftyReg {
 /* *************************************************************** */
 template<class T>
-void reg_matrix2DDeallocate(size_t arraySizeX, T** mat) {
-    for (size_t i = 0; i < arraySizeX; i++) {
-        free(mat[i]);
-    }
-    free(mat);
-}
-template void reg_matrix2DDeallocate<float>(size_t arraySizeX, float** mat);
-template void reg_matrix2DDeallocate<double>(size_t arraySizeX, double** mat);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY) {
-    T** res;
-    res = (T**)malloc(arraySizeY * sizeof(T*));
-    for (size_t i = 0; i < arraySizeY; i++) {
-        res[i] = (T*)malloc(arraySizeX * sizeof(T));
-    }
-    for (size_t i = 0; i < arraySizeX; i++) {
-        for (size_t j = 0; j < arraySizeY; j++) {
-            res[j][i] = mat[i][j];
-        }
-    }
-    return res;
-}
-template float** reg_matrix2DTranspose<float>(float** mat, size_t arraySizeX, size_t arraySizeY);
-template double** reg_matrix2DTranspose<double>(double** mat, size_t arraySizeX, size_t arraySizeY);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2) {
+T** Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2) {
     if (transposeMat2 == false) {
         // First check that the dimension are appropriate
         if (mat1Y != mat2X)
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
                            std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]");
 
-        size_t nbElement = mat1Y;
-        double resTemp = 0;
-        T** res = reg_matrix2DAllocate<T>(mat1X, mat2Y);
-
+        T **res = Matrix2dAlloc<T>(mat1X, mat2Y);
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2Y; j++) {
-                resTemp = 0;
-                for (size_t k = 0; k < nbElement; k++) {
+                double resTemp = 0;
+                for (size_t k = 0; k < mat1Y; k++)
                     resTemp += static_cast<double>(mat1[i][k]) * static_cast<double>(mat2[k][j]);
-                }
                 res[i][j] = static_cast<T>(resTemp);
             }
         }
-        //Output
         return res;
     } else {
         // First check that the dimension are appropriate
@@ -88,43 +31,34 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
                            std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]");
 
-        size_t nbElement = mat1Y;
-        double resTemp = 0;
-        T** res = reg_matrix2DAllocate<T>(mat1X, mat2X);
-
+        T **res = Matrix2dAlloc<T>(mat1X, mat2X);
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2X; j++) {
-                resTemp = 0;
-                for (size_t k = 0; k < nbElement; k++) {
+                double resTemp = 0;
+                for (size_t k = 0; k < mat1Y; k++)
                     resTemp += static_cast<double>(mat1[i][k]) * static_cast<double>(mat2[j][k]);
-                }
                 res[i][j] = static_cast<T>(resTemp);
             }
         }
-        //Output
         return res;
     }
 }
-template float** reg_matrix2DMultiply<float>(float** mat1, size_t mat1X, size_t mat1Y, float** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
-template double** reg_matrix2DMultiply<double>(double** mat1, size_t mat1X, size_t mat1Y, double** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
+template float** Matrix2dMultiply<float>(float** mat1, const size_t mat1X, const size_t mat1Y, float** mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2);
+template double** Matrix2dMultiply<double>(double** mat1, const size_t mat1X, const size_t mat1Y, double** mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2);
 /* *************************************************************** */
 template<class T>
-void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** resT, bool transposeMat2) {
+void Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, T **resT, const bool transposeMat2) {
     if (transposeMat2 == false) {
         // First check that the dimension are appropriate
         if (mat1Y != mat2X)
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
                            std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]");
 
-        size_t nbElement = mat1Y;
-        double resTemp;
-
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2Y; j++) {
-                resTemp = 0;
-                for (size_t k = 0; k < nbElement; k++) {
+                double resTemp = 0;
+                for (size_t k = 0; k < mat1Y; k++)
                     resTemp += static_cast<double>(mat1[i][k]) * static_cast<double>(mat2[k][j]);
-                }
                 resT[i][j] = static_cast<T>(resTemp);
             }
         }
@@ -134,27 +68,23 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t
             NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " +
                            std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]");
 
-        size_t nbElement = mat1Y;
-        double resTemp;
-
         for (size_t i = 0; i < mat1X; i++) {
             for (size_t j = 0; j < mat2X; j++) {
-                resTemp = 0;
-                for (size_t k = 0; k < nbElement; k++) {
+                double resTemp = 0;
+                for (size_t k = 0; k < mat1Y; k++)
                     resTemp += static_cast<double>(mat1[i][k]) * static_cast<double>(mat2[j][k]);
-                }
                 resT[i][j] = static_cast<T>(resTemp);
             }
         }
     }
 }
-template void reg_matrix2DMultiply<float>(float** mat1, size_t mat1X, size_t mat1Y, float** mat2, size_t mat2X, size_t mat2Y, float** resT, bool transposeMat2);
-template void reg_matrix2DMultiply<double>(double** mat1, size_t mat1X, size_t mat1Y, double** mat2, size_t mat2X, size_t mat2Y, double** resT, bool transposeMat2);
+template void Matrix2dMultiply<float>(float** mat1, const size_t mat1X, const size_t mat1Y, float** mat2, const size_t mat2X, const size_t mat2Y, float** resT, const bool transposeMat2);
+template void Matrix2dMultiply<double>(double** mat1, const size_t mat1X, const size_t mat1Y, double** mat2, const size_t mat2X, const size_t mat2Y, double** resT, const bool transposeMat2);
 /* *************************************************************** */
 // Multiply a matrix with a vector - we assume correct dimension
 template<class T>
-T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) {
-    T* res = reg_matrix1DAllocate<T>(m);
+T* Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T* vect) {
+    T* res = Matrix1dAlloc<T>(m);
     for (size_t i = 0; i < m; i++) {
         double resTemp = 0;
         for (size_t k = 0; k < n; k++) {
@@ -164,11 +94,11 @@ T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) {
     }
     return res;
 }
-template float* reg_matrix2DVectorMultiply<float>(float** mat, size_t m, size_t n, float* vect);
-template double* reg_matrix2DVectorMultiply<double>(double** mat, size_t m, size_t n, double* vect);
+template float* Matrix2dVectorMultiply<float>(float** mat, const size_t m, const size_t n, float* vect);
+template double* Matrix2dVectorMultiply<double>(double** mat, const size_t m, const size_t n, double* vect);
 /* *************************************************************** */
 template<class T>
-void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) {
+void Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T* vect, T* res) {
     for (size_t i = 0; i < m; i++) {
         double resTemp = 0;
         for (size_t k = 0; k < n; k++) {
@@ -177,11 +107,11 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) {
         res[i] = static_cast<T>(resTemp);
     }
 }
-template void reg_matrix2DVectorMultiply<float>(float** mat, size_t m, size_t n, float* vect, float* res);
-template void reg_matrix2DVectorMultiply<double>(double** mat, size_t m, size_t n, double* vect, double* res);
+template void Matrix2dVectorMultiply<float>(float** mat, const size_t m, const size_t n, float* vect, float* res);
+template void Matrix2dVectorMultiply<double>(double** mat, const size_t m, const size_t n, double* vect, double* res);
 /* *************************************************************** */
 // Heap sort
-void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) {
+void HeapSort(float *array_tmp, int *index_tmp, int blockNum) {
     float *array = &array_tmp[-1];
     int *index = &index_tmp[-1];
     int l = (blockNum >> 1) + 1;
@@ -223,7 +153,7 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) {
 /* *************************************************************** */
 // Heap sort
 template<class DataType>
-void reg_heapSort(DataType *array_tmp, int blockNum) {
+void HeapSort(DataType *array_tmp, int blockNum) {
     DataType *array = &array_tmp[-1];
     int l = (blockNum >> 1) + 1;
     int ir = blockNum;
@@ -254,31 +184,11 @@ void reg_heapSort(DataType *array_tmp, int blockNum) {
         array[i] = val;
     }
 }
-template void reg_heapSort<float>(float *array_tmp, int blockNum);
-template void reg_heapSort<double>(double *array_tmp, int blockNum);
-/* *************************************************************** */
-bool operator==(mat44 A, mat44 B) {
-    for (unsigned i = 0; i < 4; ++i) {
-        for (unsigned j = 0; j < 4; ++j) {
-            if (A.m[i][j] != B.m[i][j])
-                return false;
-        }
-    }
-    return true;
-}
-/* *************************************************************** */
-bool operator!=(mat44 A, mat44 B) {
-    for (unsigned i = 0; i < 4; ++i) {
-        for (unsigned j = 0; j < 4; ++j) {
-            if (A.m[i][j] != B.m[i][j])
-                return true;
-        }
-    }
-    return false;
-}
+template void HeapSort<float>(float *array_tmp, int blockNum);
+template void HeapSort<double>(double *array_tmp, int blockNum);
 /* *************************************************************** */
 template<class T>
-T reg_mat44_det(mat44 const* A) {
+T Mat44Det(const mat44 *A) {
     double D =
         static_cast<double>(A->m[0][0]) * static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[2][2]) * static_cast<double>(A->m[3][3])
         - static_cast<double>(A->m[0][0]) * static_cast<double>(A->m[1][1]) * static_cast<double>(A->m[3][2]) * static_cast<double>(A->m[2][3])
@@ -306,129 +216,10 @@ T reg_mat44_det(mat44 const* A) {
         + static_cast<double>(A->m[3][0]) * static_cast<double>(A->m[2][1]) * static_cast<double>(A->m[1][2]) * static_cast<double>(A->m[0][3]);
     return static_cast<T>(D);
 }
-template float reg_mat44_det<float>(mat44 const* A);
-template double reg_mat44_det<double>(mat44 const* A);
-/* *************************************************************** */
-void reg_mat33_to_nan(mat33 *A) {
-    for (int i = 0; i < 3; ++i)
-        for (int j = 0; j < 3; ++j)
-            A->m[i][j] = std::numeric_limits<float>::quiet_NaN();
-}
-/* *************************************************************** */
-mat33 reg_mat44_to_mat33(mat44 const* A) {
-    mat33 out;
-    out.m[0][0] = A->m[0][0];
-    out.m[0][1] = A->m[0][1];
-    out.m[0][2] = A->m[0][2];
-    out.m[1][0] = A->m[1][0];
-    out.m[1][1] = A->m[1][1];
-    out.m[1][2] = A->m[1][2];
-    out.m[2][0] = A->m[2][0];
-    out.m[2][1] = A->m[2][1];
-    out.m[2][2] = A->m[2][2];
-    return out;
-}
-/* *************************************************************** */
-mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) {
-    mat44 R;
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][0]) * static_cast<double>(B->m[0][j]) +
-                                           static_cast<double>(A->m[i][1]) * static_cast<double>(B->m[1][j]) +
-                                           static_cast<double>(A->m[i][2]) * static_cast<double>(B->m[2][j]) +
-                                           static_cast<double>(A->m[i][3]) * static_cast<double>(B->m[3][j]));
-        }
-    }
-    return R;
-}
-/* *************************************************************** */
-mat44 operator*(mat44 A, mat44 B) {
-    return reg_mat44_mul(&A, &B);
-}
-/* *************************************************************** */
-void reg_mat33_mul(mat44 const* mat, float const* in, float *out) {
-    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[0][0]) +
-                                static_cast<double>(in[1]) * static_cast<double>(mat->m[0][1]) +
-                                static_cast<double>(mat->m[0][3]));
-    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[1][0]) +
-                                static_cast<double>(in[1]) * static_cast<double>(mat->m[1][1]) +
-                                static_cast<double>(mat->m[1][3]));
-}
-/* *************************************************************** */
-void reg_mat33_mul(mat33 const* mat, float const* in, float *out) {
-    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[0][0]) +
-                                static_cast<double>(in[1]) * static_cast<double>(mat->m[0][1]) +
-                                static_cast<double>(mat->m[0][2]));
-    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat->m[1][0]) +
-                                static_cast<double>(in[1]) * static_cast<double>(mat->m[1][1]) +
-                                static_cast<double>(mat->m[1][2]));
-}
-/* *************************************************************** */
-mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) {
-    mat33 R;
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][0]) * static_cast<double>(B->m[0][j]) +
-                                           static_cast<double>(A->m[i][1]) * static_cast<double>(B->m[1][j]) +
-                                           static_cast<double>(A->m[i][2]) * static_cast<double>(B->m[2][j]));
-        }
-    }
-    return R;
-}
-/* *************************************************************** */
-mat33 operator*(mat33 A, mat33 B) {
-    return reg_mat33_mul(&A, &B);
-}
-/* *************************************************************** */
-mat33 reg_mat33_add(mat33 const* A, mat33 const* B) {
-    mat33 R;
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) + static_cast<double>(B->m[i][j]));
-        }
-    }
-    return R;
-}
+template float Mat44Det<float>(const mat44 *A);
+template double Mat44Det<double>(const mat44 *A);
 /* *************************************************************** */
-mat33 reg_mat33_trans(mat33 A) {
-    mat33 R;
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            R.m[j][i] = A.m[i][j];
-        }
-    }
-    return R;
-}
-/* *************************************************************** */
-mat33 operator+(mat33 A, mat33 B) {
-    return reg_mat33_add(&A, &B);
-}
-/* *************************************************************** */
-mat44 reg_mat44_add(mat44 const* A, mat44 const* B) {
-    mat44 R;
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) + static_cast<double>(B->m[i][j]));
-        }
-    }
-    return R;
-}
-/* *************************************************************** */
-mat44 operator+(mat44 A, mat44 B) {
-    return reg_mat44_add(&A, &B);
-}
-/* *************************************************************** */
-mat33 reg_mat33_minus(mat33 const* A, mat33 const* B) {
-    mat33 R;
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) - static_cast<double>(B->m[i][j]));
-        }
-    }
-    return R;
-}
-/* *************************************************************** */
-void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) {
+void Mat33Diagonalize(const mat33 *A, mat33 *Q, mat33 *D) {
     // A must be a symmetric matrix.
     // returns Q and D such that
     // Diagonal matrix D = QT * A * Q;  and  A = Q*D*QT
@@ -522,126 +313,188 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) {
         q[3] /= mq;
     }
 }
-
-/* *************************************************************** */
-mat33 operator-(mat33 A, mat33 B) {
-    return reg_mat33_minus(&A, &B);
-}
 /* *************************************************************** */
-void reg_mat33_eye(mat33 *mat) {
-    mat->m[0][0] = 1.f;
-    mat->m[0][1] = mat->m[0][2] = 0.f;
-    mat->m[1][1] = 1.f;
-    mat->m[1][0] = mat->m[1][2] = 0.f;
-    mat->m[2][2] = 1.f;
-    mat->m[2][0] = mat->m[2][1] = 0.f;
+void Mat44Disp(const mat44 mat, const std::string& title) {
+    NR_COUT << title << ":\n"
+        << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n"
+        << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n"
+        << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n"
+        << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl;
 }
 /* *************************************************************** */
-mat44 reg_mat44_minus(mat44 const* A, mat44 const* B) {
-    mat44 R;
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            R.m[i][j] = static_cast<float>(static_cast<double>(A->m[i][j]) - static_cast<double>(B->m[i][j]));
-        }
+/** @brief SVD
+* @param in input matrix to decompose - in place
+* @param size_m row
+* @param size_n colomn
+* @param w diagonal term
+* @param v rotation part
+*/
+template<class T>
+void Svd(T **in, const size_t size_m, const size_t size_n, T * w, T **v) {
+    if (size_m == 0 || size_n == 0)
+        NR_FATAL_ERROR("The specified matrix is empty");
+
+#ifdef _WIN32
+    long sm, sn, sn2;
+    long size__m = (long)size_m, size__n = (long)size_n;
+#else
+    size_t sm, sn, sn2;
+    size_t size__m = size_m, size__n = size_n;
+#endif
+    Eigen::MatrixXd m(size_m, size_n);
+
+    //Convert to Eigen matrix
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(in,m, size__m, size__n) \
+   private(sn)
+#endif
+    for (sm = 0; sm < size__m; sm++)
+        for (sn = 0; sn < size__n; sn++)
+            m(sm, sn) = static_cast<double>(in[sm][sn]);
+
+    Eigen::JacobiSVD<Eigen::MatrixXd> Svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
+
+#ifdef _OPENMP
+#pragma omp parallel for default(none) \
+   shared(in,Svd,v,w, size__n,size__m) \
+   private(sn2, sm)
+#endif
+    for (sn = 0; sn < size__n; sn++) {
+        w[sn] = static_cast<T>(Svd.singularValues()(sn));
+        for (sn2 = 0; sn2 < size__n; sn2++)
+            v[sn2][sn] = static_cast<T>(Svd.matrixV()(sn2, sn));
+        for (sm = 0; sm < size__m; sm++)
+            in[sm][sn] = static_cast<T>(Svd.matrixU()(sm, sn));
     }
-    return R;
-}
-/* *************************************************************** */
-mat44 operator-(mat44 A, mat44 B) {
-    return reg_mat44_minus(&A, &B);
-}
-/* *************************************************************** */
-void reg_mat44_eye(mat44 *mat) {
-    mat->m[0][0] = 1.f;
-    mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f;
-    mat->m[1][1] = 1.f;
-    mat->m[1][0] = mat->m[1][2] = mat->m[1][3] = 0.f;
-    mat->m[2][2] = 1.f;
-    mat->m[2][0] = mat->m[2][1] = mat->m[2][3] = 0.f;
-    mat->m[3][3] = 1.f;
-    mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f;
 }
+template void Svd<float>(float **in, const size_t m, const size_t n, float * w, float **v);
+template void Svd<double>(double **in, const size_t m, const size_t n, double * w, double **v);
 /* *************************************************************** */
-void reg_mat44_mul(mat44 const* mat,
-                   float const* in,
-                   float *out) {
-    out[0] = static_cast<float>(static_cast<double>(mat->m[0][0]) * static_cast<double>(in[0]) +
-                                static_cast<double>(mat->m[0][1]) * static_cast<double>(in[1]) +
-                                static_cast<double>(mat->m[0][2]) * static_cast<double>(in[2]) +
-                                static_cast<double>(mat->m[0][3]));
-    out[1] = static_cast<float>(static_cast<double>(mat->m[1][0]) * static_cast<double>(in[0]) +
-                                static_cast<double>(mat->m[1][1]) * static_cast<double>(in[1]) +
-                                static_cast<double>(mat->m[1][2]) * static_cast<double>(in[2]) +
-                                static_cast<double>(mat->m[1][3]));
-    out[2] = static_cast<float>(static_cast<double>(mat->m[2][0]) * static_cast<double>(in[0]) +
-                                static_cast<double>(mat->m[2][1]) * static_cast<double>(in[1]) +
-                                static_cast<double>(mat->m[2][2]) * static_cast<double>(in[2]) +
-                                static_cast<double>(mat->m[2][3]));
+template<class T>
+T Matrix2dDet(T **mat, const size_t m, const size_t n) {
+    if (m != n)
+        NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]");
+
+    double res;
+    if (m == 2) {
+        res = static_cast<double>(mat[0][0]) * static_cast<double>(mat[1][1]) - static_cast<double>(mat[1][0]) * static_cast<double>(mat[0][1]);
+    } else if (m == 3) {
+        res = (static_cast<double>(mat[0][0]) * (static_cast<double>(mat[1][1]) * static_cast<double>(mat[2][2]) - static_cast<double>(mat[1][2]) * static_cast<double>(mat[2][1]))) -
+            (static_cast<double>(mat[0][1]) * (static_cast<double>(mat[1][0]) * static_cast<double>(mat[2][2]) - static_cast<double>(mat[1][2]) * static_cast<double>(mat[2][0]))) +
+            (static_cast<double>(mat[0][2]) * (static_cast<double>(mat[1][0]) * static_cast<double>(mat[2][1]) - static_cast<double>(mat[1][1]) * static_cast<double>(mat[2][0])));
+    } else {
+        // Convert to Eigen format
+        Eigen::MatrixXd eigenRes(m, n);
+        for (size_t i = 0; i < m; i++)
+            for (size_t j = 0; j < n; j++)
+                eigenRes(i, j) = static_cast<double>(mat[i][j]);
+        res = eigenRes.determinant();
+    }
+    return static_cast<T>(res);
 }
+template float Matrix2dDet<float>(float **mat, const size_t m, const size_t n);
+template double Matrix2dDet<double>(double **mat, const size_t m, const size_t n);
 /* *************************************************************** */
-void reg_mat44_mul(mat44 const* mat,
-                   double const* in,
-                   double *out) {
-    double matD[4][4];
-    for (int i = 0; i < 4; ++i)
-        for (int j = 0; j < 4; ++j)
-            matD[i][j] = static_cast<double>(mat->m[i][j]);
+void Mat33Expm(mat33 *tensorIn) {
+    int sm, sn;
+    Eigen::Matrix3d tensor;
+
+    // Convert to Eigen format
+    for (sm = 0; sm < 3; sm++) {
+        for (sn = 0; sn < 3; sn++) {
+            float val = tensorIn->m[sm][sn];
+            if (val != val) return;
+            tensor(sm, sn) = static_cast<double>(val);
+        }
+    }
+
+    // Compute exp(E)
+    tensor = tensor.exp();
+
+    // Convert the result to mat33 format
+    for (sm = 0; sm < 3; sm++)
+        for (sn = 0; sn < 3; sn++)
+            tensorIn->m[sm][sn] = static_cast<float>(tensor(sm, sn));
+}
+/* *************************************************************** */
+mat44 Mat44Expm(const mat44 *mat) {
+    mat44 X;
+    Eigen::Matrix4d m;
+    for (size_t i = 0; i < 4; ++i)
+        for (size_t j = 0; j < 4; ++j)
+            m(i, j) = static_cast<double>(mat->m[i][j]);
 
-    out[0] = matD[0][0] * in[0] +
-        matD[0][1] * in[1] +
-        matD[0][2] * in[2] +
-        matD[0][3];
-    out[1] = matD[1][0] * in[0] +
-        matD[1][1] * in[1] +
-        matD[1][2] * in[2] +
-        matD[1][3];
-    out[2] = matD[2][0] * in[0] +
-        matD[2][1] * in[1] +
-        matD[2][2] * in[2] +
-        matD[2][3];
-    return;
+    m = m.exp();
+
+    for (size_t i = 0; i < 4; ++i)
+        for (size_t j = 0; j < 4; ++j)
+            X.m[i][j] = static_cast<float>(m(i, j));
+
+    return X;
 }
 /* *************************************************************** */
-mat44 reg_mat44_mul(mat44 const* A, double scalar) {
+void Mat33Logm(mat33 *tensorIn) {
+    int sm, sn;
+    Eigen::Matrix3d tensor;
+
+    // Convert to Eigen format
+    bool all_zeros = true;
+    double det = 0;
+    for (sm = 0; sm < 3; sm++) {
+        for (sn = 0; sn < 3; sn++) {
+            float val = tensorIn->m[sm][sn];
+            if (val != 0.f) all_zeros = false;
+            if (val != val) return;
+            tensor(sm, sn) = static_cast<double>(val);
+        }
+    }
+    // Actually R case requires invertible and no negative real ev,
+    // but the only observed case so far was non-invertible.
+    // determinant is not a perfect check for invertibility and
+    // identity with zero not great either, but the alternative
+    // is a general eigensolver and the logarithm function should
+    // suceed unless convergence just isn't happening.
+    det = tensor.determinant();
+    if (all_zeros || det == 0) {
+        Mat33ToNan(tensorIn);
+        return;
+    }
+
+    // Compute the actual matrix log
+    tensor = tensor.log();
+
+    // Convert the result to mat33 format
+    for (sm = 0; sm < 3; sm++)
+        for (sn = 0; sn < 3; sn++)
+            tensorIn->m[sm][sn] = static_cast<float>(tensor(sm, sn));
+}
+/* *************************************************************** */
+mat44 Mat44Logm(const mat44 *mat) {
+    mat44 X;
+    Eigen::Matrix4d m;
+    for (char i = 0; i < 4; ++i)
+        for (char j = 0; j < 4; ++j)
+            m(i, j) = static_cast<double>(mat->m[i][j]);
+    m = m.log();
+    for (char i = 0; i < 4; ++i)
+        for (char j = 0; j < 4; ++j)
+            X.m[i][j] = static_cast<float>(m(i, j));
+    return X;
+}
+/* *************************************************************** */
+mat44 Mat44Avg2(const mat44 *A, const mat44 *B) {
     mat44 out;
-    out.m[0][0] = A->m[0][0] * scalar;
-    out.m[0][1] = A->m[0][1] * scalar;
-    out.m[0][2] = A->m[0][2] * scalar;
-    out.m[0][3] = A->m[0][3] * scalar;
-    out.m[1][0] = A->m[1][0] * scalar;
-    out.m[1][1] = A->m[1][1] * scalar;
-    out.m[1][2] = A->m[1][2] * scalar;
-    out.m[1][3] = A->m[1][3] * scalar;
-    out.m[2][0] = A->m[2][0] * scalar;
-    out.m[2][1] = A->m[2][1] * scalar;
-    out.m[2][2] = A->m[2][2] * scalar;
-    out.m[2][3] = A->m[2][3] * scalar;
-    out.m[3][0] = A->m[3][0] * scalar;
-    out.m[3][1] = A->m[3][1] * scalar;
-    out.m[3][2] = A->m[3][2] * scalar;
-    out.m[3][3] = A->m[3][3] * scalar;
-    return out;
-}
-/* *************************************************************** */
-void reg_mat44_disp(const mat44& mat, const std::string& title) {
-    NR_COUT << title << ":\n"
-        << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n"
-        << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n"
-        << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n"
-        << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl;
-}
-/* *************************************************************** */
-//is it square distance or just distance?
-// Helper function: Get the square of the Euclidean distance
-double get_square_distance3D(float * first_point3D, float * second_point3D) {
-    return sqrt(Square(first_point3D[0] - second_point3D[0]) +
-                Square(first_point3D[1] - second_point3D[1]) +
-                Square(first_point3D[2] - second_point3D[2]));
+    mat44 logA = Mat44Logm(A);
+    mat44 logB = Mat44Logm(B);
+    for (int i = 0; i < 4; ++i) {
+        logA.m[3][i] = 0.f;
+        logB.m[3][i] = 0.f;
+    }
+    logA = logA + logB;
+    out = logA * 0.5;
+    return Mat44Expm(&out);
 }
 /* *************************************************************** */
-//is it square distance or just distance?
-double get_square_distance2D(float * first_point2D, float * second_point2D) {
-    return sqrt(Square(first_point2D[0] - second_point2D[0]) +
-                Square(first_point2D[1] - second_point2D[1]));
-}
+} // namespace NiftyReg
 /* *************************************************************** */
diff --git a/reg-lib/cpu/Maths.hpp b/reg-lib/cpu/Maths.hpp
new file mode 100644
index 00000000..56782eda
--- /dev/null
+++ b/reg-lib/cpu/Maths.hpp
@@ -0,0 +1,363 @@
+/**
+ * @file Maths.hpp
+ * @brief Library that contains small math routines
+ * @author Marc Modat
+ * @date 25/03/2009
+ *
+ *  Created by Marc Modat on 25/03/2009.
+ *  Copyright (c) 2009-2018, University College London
+ *  Copyright (c) 2018, NiftyReg Developers.
+ *  All rights reserved.
+ *  See the LICENSE.txt file in the nifty_reg root folder
+ *
+ */
+
+#pragma once
+
+#include "RNifti.h"
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#if USE_SSE
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#endif
+#endif
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+#ifdef __CUDACC__
+#define DEVICE  __host__ __device__
+#else
+#define DEVICE
+#endif
+
+typedef enum {
+    DEF_FIELD,
+    DISP_FIELD,
+    CUB_SPLINE_GRID,
+    DEF_VEL_FIELD,
+    DISP_VEL_FIELD,
+    SPLINE_VEL_GRID,
+    LIN_SPLINE_GRID
+} NREG_TRANS_TYPE;
+
+/* *************************************************************** */
+namespace NiftyReg {
+/* *************************************************************** */
+// The functions in the standard library are slower; so, these are implemented
+template<typename T>
+DEVICE inline T Square(const T& x) {
+    return x * x;
+}
+template<typename T>
+DEVICE inline T Cube(const T& x) {
+    return x * x * x;
+}
+template<typename T>
+DEVICE inline int Floor(const T& x) {
+    const int i = static_cast<int>(x);
+    return i - (x < i);
+}
+template<typename T>
+DEVICE inline int Ceil(const T& x) {
+    const int i = static_cast<int>(x);
+    return i + (x > i);
+}
+template<typename T>
+DEVICE inline int Round(const T& x) {
+    return static_cast<int>(x + (x >= 0 ? 0.5 : -0.5));
+}
+/* *************************************************************** */
+DEVICE inline void Divide(const int num, const int denom, int& quot, int& rem) {
+    // This will be optimised by the compiler into a single div instruction
+    quot = num / denom;
+    rem = num % denom;
+}
+/* *************************************************************** */
+template<class T>
+DEVICE inline T* Matrix1dAlloc(const size_t arraySize) {
+    return static_cast<T*>(malloc(arraySize * sizeof(T)));
+}
+/* *************************************************************** */
+template<class T>
+DEVICE inline void Matrix1dDealloc(T *mat) {
+    free(mat);
+}
+/* *************************************************************** */
+template<class T>
+DEVICE inline T** Matrix2dAlloc(const size_t arraySizeX, const size_t arraySizeY) {
+    T **res;
+    res = static_cast<T**>(malloc(arraySizeX * sizeof(T*)));
+    for (size_t i = 0; i < arraySizeX; i++)
+        res[i] = static_cast<T*>(malloc(arraySizeY * sizeof(T)));
+    return res;
+}
+/* *************************************************************** */
+template<class T>
+DEVICE inline void Matrix2dDealloc(const size_t arraySizeX, T **mat) {
+    for (size_t i = 0; i < arraySizeX; i++)
+        free(mat[i]);
+    free(mat);
+}
+/* *************************************************************** */
+template<class T>
+DEVICE inline T** Matrix2dTranspose(T **mat, const size_t arraySizeX, const size_t arraySizeY) {
+    T **res;
+    res = static_cast<T**>(malloc(arraySizeY * sizeof(T*)));
+    for (size_t i = 0; i < arraySizeY; i++)
+        res[i] = static_cast<T*>(malloc(arraySizeX * sizeof(T)));
+    for (size_t i = 0; i < arraySizeX; i++)
+        for (size_t j = 0; j < arraySizeY; j++)
+            res[j][i] = mat[i][j];
+    return res;
+}
+/* *************************************************************** */
+template<class T>
+T** Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2);
+template<class T>
+void Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, T **res, const bool transposeMat2);
+/* *************************************************************** */
+template<class T>
+T* Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T *vect);
+template<class T>
+void Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T *vect, T *res);
+/* *************************************************************** */
+/// @brief Subtract two 3-by-3 matrices
+DEVICE inline mat33 operator-(const mat33 A, const mat33 B) {
+    mat33 R;
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            R.m[i][j] = static_cast<float>(static_cast<double>(A.m[i][j]) - static_cast<double>(B.m[i][j]));
+    return R;
+}
+/* *************************************************************** */
+/// @brief Multiply two 3-by-3 matrices
+DEVICE inline mat33 operator*(const mat33 A, const mat33 B) {
+    mat33 R;
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            R.m[i][j] = static_cast<float>(static_cast<double>(A.m[i][0]) * static_cast<double>(B.m[0][j]) +
+                                           static_cast<double>(A.m[i][1]) * static_cast<double>(B.m[1][j]) +
+                                           static_cast<double>(A.m[i][2]) * static_cast<double>(B.m[2][j]));
+    return R;
+}
+/* *************************************************************** */
+/// @brief Multiply a vector with a 3-by-3 matrix
+DEVICE inline void Mat33Mul(const mat33 mat, const float(&in)[2], float(&out)[2]) {
+    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat.m[0][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat.m[0][1]) +
+                                static_cast<double>(mat.m[0][2]));
+    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat.m[1][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat.m[1][1]) +
+                                static_cast<double>(mat.m[1][2]));
+}
+/* *************************************************************** */
+/// @brief Multiply a vector with a 3-by-3 matrix
+DEVICE inline void Mat33Mul(const mat44 mat, const float (&in)[2], float (&out)[2]) {
+    out[0] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat.m[0][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat.m[0][1]) +
+                                static_cast<double>(mat.m[0][3]));
+    out[1] = static_cast<float>(static_cast<double>(in[0]) * static_cast<double>(mat.m[1][0]) +
+                                static_cast<double>(in[1]) * static_cast<double>(mat.m[1][1]) +
+                                static_cast<double>(mat.m[1][3]));
+}
+/* *************************************************************** */
+/// @brief Multiply a scalar with a 3-by-3 matrix multiplied by a vector
+template<bool is3d>
+DEVICE inline void Mat33Mul(const mat33 mat, const float (&in)[3], const float weight, float (&out)[3]) {
+    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]);
+    out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]);
+    if constexpr (is3d)
+        out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]);
+}
+/* *************************************************************** */
+/// @brief Transpose a 3-by-3 matrix
+DEVICE inline mat33 Mat33Trans(const mat33 A) {
+    mat33 R;
+    for (int i = 0; i < 3; i++)
+        for (int j = 0; j < 3; j++)
+            R.m[j][i] = A.m[i][j];
+    return R;
+}
+/* *************************************************************** */
+/// @brief Diagonalize a 3-by-3 matrix
+void Mat33Diagonalize(const mat33 *A, mat33 *Q, mat33 *D);
+/* *************************************************************** */
+/// @brief Set up a 3-by-3 matrix with an identity
+DEVICE inline void Mat33Eye(mat33 *mat) {
+    mat->m[0][0] = 1.f;
+    mat->m[0][1] = mat->m[0][2] = 0.f;
+    mat->m[1][1] = 1.f;
+    mat->m[1][0] = mat->m[1][2] = 0.f;
+    mat->m[2][2] = 1.f;
+    mat->m[2][0] = mat->m[2][1] = 0.f;
+}
+/* *************************************************************** */
+DEVICE inline void Mat33ToNan(mat33 *A) {
+    for (int i = 0; i < 3; ++i)
+        for (int j = 0; j < 3; ++j)
+            A->m[i][j] = std::numeric_limits<float>::quiet_NaN();
+}
+/* *************************************************************** */
+/// @brief Transform a mat44 to a mat33 matrix
+DEVICE inline mat33 Mat44ToMat33(const mat44 *A) {
+    mat33 out;
+    out.m[0][0] = A->m[0][0];
+    out.m[0][1] = A->m[0][1];
+    out.m[0][2] = A->m[0][2];
+    out.m[1][0] = A->m[1][0];
+    out.m[1][1] = A->m[1][1];
+    out.m[1][2] = A->m[1][2];
+    out.m[2][0] = A->m[2][0];
+    out.m[2][1] = A->m[2][1];
+    out.m[2][2] = A->m[2][2];
+    return out;
+}
+/* *************************************************************** */
+template<class T>
+void HeapSort(T *array_tmp, int blockNum);
+void HeapSort(float *array_tmp, int *index_tmp, int blockNum);
+/* *************************************************************** */
+DEVICE inline bool operator==(const mat44 A, const mat44 B) {
+    for (char i = 0; i < 4; ++i)
+        for (char j = 0; j < 4; ++j)
+            if (A.m[i][j] != B.m[i][j])
+                return false;
+    return true;
+}
+/* *************************************************************** */
+DEVICE inline bool operator!=(const mat44 A, const mat44 B) {
+    return !(A == B);
+}
+/* *************************************************************** */
+/// @brief Multiply two 4-by-4 matrices
+DEVICE inline mat44 operator*(const mat44 A, const mat44 B) {
+    mat44 R;
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            R.m[i][j] = static_cast<float>(static_cast<double>(A.m[i][0]) * static_cast<double>(B.m[0][j]) +
+                                           static_cast<double>(A.m[i][1]) * static_cast<double>(B.m[1][j]) +
+                                           static_cast<double>(A.m[i][2]) * static_cast<double>(B.m[2][j]) +
+                                           static_cast<double>(A.m[i][3]) * static_cast<double>(B.m[3][j]));
+    return R;
+}
+/* *************************************************************** */
+/// @brief Multiply a 4-by-4 matrix with a scalar
+DEVICE inline mat44 operator*(const mat44 mat, const double scalar) {
+    mat44 out;
+    out.m[0][0] = mat.m[0][0] * scalar;
+    out.m[0][1] = mat.m[0][1] * scalar;
+    out.m[0][2] = mat.m[0][2] * scalar;
+    out.m[0][3] = mat.m[0][3] * scalar;
+    out.m[1][0] = mat.m[1][0] * scalar;
+    out.m[1][1] = mat.m[1][1] * scalar;
+    out.m[1][2] = mat.m[1][2] * scalar;
+    out.m[1][3] = mat.m[1][3] * scalar;
+    out.m[2][0] = mat.m[2][0] * scalar;
+    out.m[2][1] = mat.m[2][1] * scalar;
+    out.m[2][2] = mat.m[2][2] * scalar;
+    out.m[2][3] = mat.m[2][3] * scalar;
+    out.m[3][0] = mat.m[3][0] * scalar;
+    out.m[3][1] = mat.m[3][1] * scalar;
+    out.m[3][2] = mat.m[3][2] * scalar;
+    out.m[3][3] = mat.m[3][3] * scalar;
+    return out;
+}
+/* *************************************************************** */
+/// @brief Multiply a vector with a 4-by-4 matrix
+template<class T, bool is3d=true>
+DEVICE inline void Mat44Mul(const mat44 mat, const T(&in)[3], T(&out)[3]) {
+    out[0] = static_cast<T>(static_cast<double>(mat.m[0][0]) * static_cast<double>(in[0]) +
+                            static_cast<double>(mat.m[0][1]) * static_cast<double>(in[1]) +
+                            static_cast<double>(mat.m[0][2]) * static_cast<double>(in[2]) +
+                            static_cast<double>(mat.m[0][3]));
+    out[1] = static_cast<T>(static_cast<double>(mat.m[1][0]) * static_cast<double>(in[0]) +
+                            static_cast<double>(mat.m[1][1]) * static_cast<double>(in[1]) +
+                            static_cast<double>(mat.m[1][2]) * static_cast<double>(in[2]) +
+                            static_cast<double>(mat.m[1][3]));
+    if constexpr (is3d)
+        out[2] = static_cast<T>(static_cast<double>(mat.m[2][0]) * static_cast<double>(in[0]) +
+                                static_cast<double>(mat.m[2][1]) * static_cast<double>(in[1]) +
+                                static_cast<double>(mat.m[2][2]) * static_cast<double>(in[2]) +
+                                static_cast<double>(mat.m[2][3]));
+}
+/* *************************************************************** */
+/// @brief Add two 4-by-4 matrices
+DEVICE inline mat44 operator+(const mat44 A, const mat44 B) {
+    mat44 R;
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            R.m[i][j] = static_cast<float>(static_cast<double>(A.m[i][j]) + static_cast<double>(B.m[i][j]));
+    return R;
+}
+/* *************************************************************** */
+/// @brief Subtract two 4-by-4 matrices
+DEVICE inline mat44 operator-(const mat44 A, const mat44 B) {
+    mat44 R;
+    for (int i = 0; i < 4; i++)
+        for (int j = 0; j < 4; j++)
+            R.m[i][j] = static_cast<float>(static_cast<double>(A.m[i][j]) - static_cast<double>(B.m[i][j]));
+    return R;
+}
+/* *************************************************************** */
+/// @brief Set up a 4-by-4 matrix with an identity
+DEVICE inline void Mat44Eye(mat44 *mat) {
+    mat->m[0][0] = 1.f;
+    mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f;
+    mat->m[1][1] = 1.f;
+    mat->m[1][0] = mat->m[1][2] = mat->m[1][3] = 0.f;
+    mat->m[2][2] = 1.f;
+    mat->m[2][0] = mat->m[2][1] = mat->m[2][3] = 0.f;
+    mat->m[3][3] = 1.f;
+    mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f;
+}
+/* *************************************************************** */
+/// @brief Compute the determinant of a 4-by-4 matrix
+template<class T>
+T Mat44Det(const mat44 *A);
+/* *************************************************************** */
+/// @brief Display a mat44 matrix
+void Mat44Disp(const mat44 mat, const std::string& title);
+/* *************************************************************** */
+//is it square distance or just distance?
+DEVICE inline double SquareDistance2d(const float *first_point2D, const float *second_point2D) {
+    return sqrt(Square(first_point2D[0] - second_point2D[0]) +
+                Square(first_point2D[1] - second_point2D[1]));
+}
+/* *************************************************************** */
+//is it square distance or just distance?
+DEVICE inline double SquareDistance3d(const float *first_point3D, const float *second_point3D) {
+    return sqrt(Square(first_point3D[0] - second_point3D[0]) +
+                Square(first_point3D[1] - second_point3D[1]) +
+                Square(first_point3D[2] - second_point3D[2]));
+}
+/* *************************************************************** */
+template<class T>
+void Svd(T **in, const size_t m, const size_t n, T *w, T **v);
+/* *************************************************************** */
+template<class T>
+T Matrix2dDet(T **mat, const size_t m, const size_t n);
+/* *************************************************************** */
+/// @brief Compute the log of a 3-by-3 matrix
+void Mat33Expm(mat33 *tensorIn);
+/* *************************************************************** */
+/// @brief Compute the exp of a 4-by-4 matrix
+mat44 Mat44Expm(const mat44 *mat);
+/* *************************************************************** */
+/// @brief Compute the log of a 3-by-3 matrix
+void Mat33Logm(mat33 *tensorIn);
+/* *************************************************************** */
+/// @brief Compute the log of a 4-by-4 matrix
+mat44 Mat44Logm(const mat44 *mat);
+/* *************************************************************** */
+/// @brief Compute the average of two matrices using a log-euclidean framework
+mat44 Mat44Avg2(const mat44 *A, const mat44 *b);
+/* *************************************************************** */
+} // namespace NiftyReg
+/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index e91ef03a..fce081f5 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -185,7 +185,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam
    params->activeBlockNumber = params->activeBlockNumber < ((int)params->totalBlockNumber - unusableBlock) ? params->activeBlockNumber : (params->totalBlockNumber - unusableBlock);
    //params->activeBlockNumber = params->totalBlockNumber - unusableBlock;
 
-   reg_heapSort(varianceArray, indexArray, params->totalBlockNumber);
+   HeapSort(varianceArray, indexArray, params->totalBlockNumber);
    int *indexArrayPtr = &indexArray[params->totalBlockNumber - 1];
    int count = 0;
    for (int i = 0; i < params->activeBlockNumber; i++) {
@@ -432,13 +432,13 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg
             bestDisplacement[1] += referencePosition_temp[1];
             bestDisplacement[2] = 0.0f;
 
-            reg_mat44_mul(referenceMatrix_xyz, referencePosition_temp, tempPosition);
+            Mat44Mul(*referenceMatrix_xyz, referencePosition_temp, tempPosition);
             z = 2 * params->totalBlock[blockIndex];
 
             params->referencePosition[z] = tempPosition[0];
             params->referencePosition[z + 1] = tempPosition[1];
 
-            reg_mat44_mul(referenceMatrix_xyz, bestDisplacement, tempPosition);
+            Mat44Mul(*referenceMatrix_xyz, bestDisplacement, tempPosition);
 
             params->warpedPosition[z] = tempPosition[0];
             params->warpedPosition[z + 1] = tempPosition[1];
@@ -664,13 +664,13 @@ void block_matching_method3D(nifti_image * reference,
                bestDisplacement[1] += referencePosition_temp[1];
                bestDisplacement[2] += referencePosition_temp[2];
 
-               reg_mat44_mul(referenceMatrix_xyz, referencePosition_temp, tempPosition);
+               Mat44Mul(*referenceMatrix_xyz, referencePosition_temp, tempPosition);
                z = 3 * params->totalBlock[blockIndex];
                params->referencePosition[z] = tempPosition[0];
                params->referencePosition[z+1] = tempPosition[1];
                params->referencePosition[z+2] = tempPosition[2];
 
-               reg_mat44_mul(referenceMatrix_xyz, bestDisplacement, tempPosition);
+               Mat44Mul(*referenceMatrix_xyz, bestDisplacement, tempPosition);
                params->warpedPosition[z] = tempPosition[0];
                params->warpedPosition[z + 1] = tempPosition[1];
                params->warpedPosition[z + 2] = tempPosition[2];
@@ -757,7 +757,7 @@ void optimize(_reg_blockMatchingParam *params,
          //Can have undefined = NaN in the warped image now -
          //to not loose the correspondence - so check that:
          if(in[0] == in[0]){
-            reg_mat33_mul(transformation_matrix, in, out);
+            Mat33Mul(*transformation_matrix, in, out);
 
             referencePositionVect.push_back(params->referencePosition[index]);
             referencePositionVect.push_back(params->referencePosition[index+1]);
@@ -802,7 +802,7 @@ void optimize(_reg_blockMatchingParam *params,
          //Can have undefined = NaN in the warped image now -
          //to not loose the correspondence - so check that:
          if(in[0] == in[0]){
-            reg_mat44_mul(transformation_matrix, in, out);
+            Mat44Mul(*transformation_matrix, in, out);
 
             referencePositionVect.push_back(params->referencePosition[index]);
             referencePositionVect.push_back(params->referencePosition[index+1]);
diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h
index f370df90..bf8095a0 100755
--- a/reg-lib/cpu/_reg_blockMatching.h
+++ b/reg-lib/cpu/_reg_blockMatching.h
@@ -14,7 +14,7 @@
 
 #pragma once
 
-#include "_reg_maths.h"
+#include "Maths.hpp"
 
 #define TOLERANCE 0.001
 #define MAX_ITERATIONS 30
diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp
index a2e8ef60..dcf59de7 100755
--- a/reg-lib/cpu/_reg_globalTrans.cpp
+++ b/reg-lib/cpu/_reg_globalTrans.cpp
@@ -11,8 +11,7 @@
  */
 
 #include "_reg_globalTrans.h"
-#include "_reg_maths.h"
-#include "_reg_maths_eigen.h"
+#include "Maths.hpp"
 
 /* *************************************************************** */
 /* *************************************************************** */
@@ -28,15 +27,13 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
 
    mat44 *referenceMatrix;
    if(deformationFieldImage->sform_code>0)
-   {
-      referenceMatrix=&(deformationFieldImage->sto_xyz);
-   }
-   else referenceMatrix=&(deformationFieldImage->qto_xyz);
+      referenceMatrix=&deformationFieldImage->sto_xyz;
+   else referenceMatrix=&deformationFieldImage->qto_xyz;
 
    mat44 transformationMatrix;
    if(composition)
       transformationMatrix = *affineTransformation;
-   else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
+   else transformationMatrix = *affineTransformation * *referenceMatrix;
 
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0;
@@ -61,9 +58,9 @@ void reg_affine_deformationField2D(mat44 *affineTransformation,
             {
                voxel[0] = (double) deformationFieldPtrX[index];
                voxel[1] = (double) deformationFieldPtrY[index];
-               reg_mat44_mul(&transformationMatrix, voxel, position);
+               Mat44Mul(transformationMatrix, voxel, position);
             }
-            else reg_mat44_mul(&transformationMatrix, voxel, position);
+            else Mat44Mul(transformationMatrix, voxel, position);
 
             /* the deformation field (real coordinates) is stored */
             deformationFieldPtrX[index] = (FieldTYPE) position[0];
@@ -87,15 +84,13 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
 
    mat44 *referenceMatrix;
    if(deformationFieldImage->sform_code>0)
-   {
-      referenceMatrix=&(deformationFieldImage->sto_xyz);
-   }
-   else referenceMatrix=&(deformationFieldImage->qto_xyz);
+      referenceMatrix=&deformationFieldImage->sto_xyz;
+   else referenceMatrix=&deformationFieldImage->qto_xyz;
 
    mat44 transformationMatrix;
    if(composition)
       transformationMatrix = *affineTransformation;
-   else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix);
+   else transformationMatrix = *affineTransformation * *referenceMatrix;
 
    double voxel[3]={0,0,0}, position[3]={0,0,0};
    int x=0, y=0, z=0;
@@ -124,7 +119,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation,
                   voxel[1]= (double) deformationFieldPtrY[index];
                   voxel[2]= (double) deformationFieldPtrZ[index];
                }
-               reg_mat44_mul(&transformationMatrix, voxel, position);
+               Mat44Mul(transformationMatrix, voxel, position);
 
                /* the deformation field (real coordinates) is stored */
                deformationFieldPtrX[index] = (FieldTYPE) position[0];
@@ -207,9 +202,9 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p
    centroid_warpedFloat[0] = static_cast<float>(centroid_warped[0]);
    centroid_warpedFloat[1] = static_cast<float>(centroid_warped[1]);
 
-   float * w = reg_matrix1DAllocate<float>(2);
-   float **v = reg_matrix2DAllocate<float>(2, 2);
-   float **r = reg_matrix2DAllocate<float>(2, 2);
+   float * w = Matrix1dAlloc<float>(2);
+   float **v = Matrix2dAlloc<float>(2, 2);
+   float **r = Matrix2dAlloc<float>(2, 2);
 
    // Demean the input points
    for (int j = 0; j < num_points; ++j) {
@@ -220,24 +215,24 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p
       points2[j][1] = static_cast<float>(static_cast<double>(points2[j][1]) - static_cast<double>(centroid_warpedFloat[1]));
    }
 
-   float **p1t = reg_matrix2DTranspose<float>(points1, num_points, 2);
-   float **u = reg_matrix2DMultiply<float>(p1t,2, num_points, points2, num_points, 2, false);
+   float **p1t = Matrix2dTranspose<float>(points1, num_points, 2);
+   float **u = Matrix2dMultiply<float>(p1t,2, num_points, points2, num_points, 2, false);
 
-   svd(u, 2, 2, w, v);
+   Svd(u, 2, 2, w, v);
 
    // Calculate transpose
-   float **ut = reg_matrix2DTranspose<float>(u, 2, 2);
+   float **ut = Matrix2dTranspose<float>(u, 2, 2);
 
    // Calculate the rotation matrix
-   reg_matrix2DMultiply<float>(v, 2, 2, ut, 2, 2, r, false);
+   Matrix2dMultiply<float>(v, 2, 2, ut, 2, 2, r, false);
 
-   float det = reg_matrix2DDet<float>(r, 2, 2);
+   float det = Matrix2dDet<float>(r, 2, 2);
 
    // Take care of possible reflection
    if (det < 0) {
       v[0][1] = -v[0][1];
       v[1][1] = -v[1][1];
-      reg_matrix2DMultiply<float>(v, 2, 2, ut, 2, 2, r, false);
+      Matrix2dMultiply<float>(v, 2, 2, ut, 2, 2, r, false);
    }
 
    // Calculate the translation
@@ -271,12 +266,12 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p
    transformation->m[3][3] = 1.0f;
 
    // Do the deletion here
-   reg_matrix2DDeallocate(2, u);
-   reg_matrix1DDeallocate(w);
-   reg_matrix2DDeallocate(2, v);
-   reg_matrix2DDeallocate(2, ut);
-   reg_matrix2DDeallocate(2, r);
-   //    reg_matrix2DDeallocate(2, p1t);
+   Matrix2dDealloc(2, u);
+   Matrix1dDealloc(w);
+   Matrix2dDealloc(2, v);
+   Matrix2dDealloc(2, ut);
+   Matrix2dDealloc(2, r);
+   //    Matrix2dDealloc(2, p1t);
    for(size_t dance=0;dance<2;++dance) free(p1t[dance]); free(p1t);
 }
 /* *************************************************************** */
@@ -284,8 +279,8 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m
 {
 
    unsigned num_points = points.size();
-   float** points1 = reg_matrix2DAllocate<float>(num_points, 2);
-   float** points2 = reg_matrix2DAllocate<float>(num_points, 2);
+   float** points1 = Matrix2dAlloc<float>(num_points, 2);
+   float** points2 = Matrix2dAlloc<float>(num_points, 2);
    for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
@@ -294,8 +289,8 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m
    }
    estimate_rigid_transformation2D(points1, points2, num_points, transformation);
    //FREE MEMORY
-   reg_matrix2DDeallocate(num_points, points1);
-   reg_matrix2DDeallocate(num_points, points2);
+   Matrix2dDealloc(num_points, points1);
+   Matrix2dDealloc(num_points, points2);
 }
 /* *************************************************************** */
 void estimate_rigid_transformation3D(float** points1, float** points2, int num_points, mat44 * transformation)
@@ -335,9 +330,9 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p
    centroid_warpedFloat[1] = static_cast<float>(centroid_warped[1]);
    centroid_warpedFloat[2] = static_cast<float>(centroid_warped[2]);
 
-   float * w = reg_matrix1DAllocate<float>(3);
-   float **v  = reg_matrix2DAllocate<float>(3, 3);
-   float **r  = reg_matrix2DAllocate<float>(3, 3);
+   float * w = Matrix1dAlloc<float>(3);
+   float **v  = Matrix2dAlloc<float>(3, 3);
+   float **r  = Matrix2dAlloc<float>(3, 3);
 
    // Demean the input points
    for (int j = 0; j < num_points; ++j) {
@@ -349,27 +344,27 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p
       points2[j][1] = static_cast<float>(static_cast<double>(points2[j][1]) - static_cast<double>(centroid_warpedFloat[1]));
       points2[j][2] = static_cast<float>(static_cast<double>(points2[j][2]) - static_cast<double>(centroid_warpedFloat[2]));
    }
-   //T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY);
-   //T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
-   float **p1t = reg_matrix2DTranspose<float>(points1, num_points, 3);
-   float **u = reg_matrix2DMultiply<float>(p1t,3, num_points, points2, num_points, 3, false);
+   //T** Matrix2dTranspose(T** mat, size_t arraySizeX, size_t arraySizeY);
+   //T** Matrix2dMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
+   float **p1t = Matrix2dTranspose<float>(points1, num_points, 3);
+   float **u = Matrix2dMultiply<float>(p1t,3, num_points, points2, num_points, 3, false);
 
-   svd(u, 3, 3, w, v);
+   Svd(u, 3, 3, w, v);
 
    // Calculate transpose
-   float **ut = reg_matrix2DTranspose<float>(u, 3, 3);
+   float **ut = Matrix2dTranspose<float>(u, 3, 3);
 
    // Calculate the rotation matrix
-   reg_matrix2DMultiply<float>(v, 3, 3, ut, 3, 3, r, false);
+   Matrix2dMultiply<float>(v, 3, 3, ut, 3, 3, r, false);
 
-   float det = reg_matrix2DDet<float>(r, 3, 3);
+   float det = Matrix2dDet<float>(r, 3, 3);
 
    // Take care of possible reflection
    if (det < 0) {
       v[0][2] = -v[0][2];
       v[1][2] = -v[1][2];
       v[2][2] = -v[2][2];
-      reg_matrix2DMultiply<float>(v, 3, 3, ut, 3, 3, r, false);
+      Matrix2dMultiply<float>(v, 3, 3, ut, 3, 3, r, false);
    }
 
    // Calculate the translation
@@ -407,19 +402,19 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p
    transformation->m[3][3] = 1.0f;
 
    // Do the deletion here
-   reg_matrix2DDeallocate(3, u);
-   reg_matrix1DDeallocate(w);
-   reg_matrix2DDeallocate(3, v);
-   reg_matrix2DDeallocate(3, ut);
-   reg_matrix2DDeallocate(3, r);
-   reg_matrix2DDeallocate(3, p1t);
+   Matrix2dDealloc(3, u);
+   Matrix1dDealloc(w);
+   Matrix2dDealloc(3, v);
+   Matrix2dDealloc(3, ut);
+   Matrix2dDealloc(3, r);
+   Matrix2dDealloc(3, p1t);
 }
 /* *************************************************************** */
 void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation)
 {
    unsigned num_points = points.size();
-   float** points1 = reg_matrix2DAllocate<float>(num_points, 3);
-   float** points2 = reg_matrix2DAllocate<float>(num_points, 3);
+   float** points1 = Matrix2dAlloc<float>(num_points, 3);
+   float** points2 = Matrix2dAlloc<float>(num_points, 3);
    for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
@@ -430,8 +425,8 @@ void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, m
    }
    estimate_rigid_transformation3D(points1, points2, num_points, transformation);
    //FREE MEMORY
-   reg_matrix2DDeallocate(num_points, points1);
-   reg_matrix2DDeallocate(num_points, points2);
+   Matrix2dDealloc(num_points, points1);
+   Matrix2dDealloc(num_points, points2);
 }
 /* *************************************************************** */
 void estimate_affine_transformation2D(float** points1, float** points2, int num_points, mat44 * transformation)
@@ -439,7 +434,7 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_
    //We assume same number of points in both arrays
    int num_equations = num_points * 2;
    unsigned c = 0;
-   float** A = reg_matrix2DAllocate<float>(num_equations, 6);
+   float** A = Matrix2dAlloc<float>(num_equations, 6);
 
    for (int k = 0; k < num_points; ++k) {
       c = k * 2;
@@ -455,10 +450,10 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_
       A[c + 1][5] = 1.0f;
    }
 
-   float* w  = reg_matrix1DAllocate<float>(6);
-   float** v = reg_matrix2DAllocate<float>(6, 6);
+   float* w  = Matrix1dAlloc<float>(6);
+   float** v = Matrix2dAlloc<float>(6, 6);
 
-   svd(A, num_equations, 6, w, v);
+   Svd(A, num_equations, 6, w, v);
 
    for (unsigned k = 0; k < 6; ++k) {
       if (w[k] < 0.0001) {
@@ -479,19 +474,19 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_
       }
    }
 
-   float** r = reg_matrix2DAllocate<float>(6, num_equations);
-   reg_matrix2DMultiply<float>(v, 6, 6, A, num_equations, 6, r, true);
+   float** r = Matrix2dAlloc<float>(6, num_equations);
+   Matrix2dMultiply<float>(v, 6, 6, A, num_equations, 6, r, true);
    // Now r contains the pseudoinverse
    // Create vector b and then multiple r*b to get the affine paramsA
-   float* b = reg_matrix1DAllocate<float>(num_equations);
+   float* b = Matrix1dAlloc<float>(num_equations);
    for (int k = 0; k < num_points; ++k) {
       c = k * 2;
       b[c] = points2[k][0];
       b[c + 1] = points2[k][1];
    }
 
-   float* transform = reg_matrix1DAllocate<float>(6);
-   reg_matrix2DVectorMultiply<float>(r, 6, num_equations, b, transform);
+   float* transform = Matrix1dAlloc<float>(6);
+   Matrix2dVectorMultiply<float>(r, 6, num_equations, b, transform);
 
    transformation->m[0][0] = transform[0];
    transformation->m[0][1] = transform[1];
@@ -514,19 +509,19 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_
    transformation->m[3][3] = 1.0f;
 
    // Do the deletion here
-   reg_matrix1DDeallocate(transform);
-   reg_matrix1DDeallocate(b);
-   reg_matrix2DDeallocate(6, r);
-   reg_matrix2DDeallocate(6, v);
-   reg_matrix1DDeallocate(w);
-   reg_matrix2DDeallocate(num_equations, A);
+   Matrix1dDealloc(transform);
+   Matrix1dDealloc(b);
+   Matrix2dDealloc(6, r);
+   Matrix2dDealloc(6, v);
+   Matrix1dDealloc(w);
+   Matrix2dDealloc(num_equations, A);
 }
 /* *************************************************************** */
 void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44 * transformation)
 {
    unsigned num_points = points.size();
-   float** points1 = reg_matrix2DAllocate<float>(num_points, 2);
-   float** points2 = reg_matrix2DAllocate<float>(num_points, 2);
+   float** points1 = Matrix2dAlloc<float>(num_points, 2);
+   float** points2 = Matrix2dAlloc<float>(num_points, 2);
    for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
@@ -535,8 +530,8 @@ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points,
    }
    estimate_affine_transformation2D(points1, points2, num_points, transformation);
    //FREE MEMORY
-   reg_matrix2DDeallocate(num_points, points1);
-   reg_matrix2DDeallocate(num_points, points2);
+   Matrix2dDealloc(num_points, points1);
+   Matrix2dDealloc(num_points, points2);
 }
 /* *************************************************************** */
 // estimate an affine transformation using least square
@@ -548,7 +543,7 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
    // we need at least 4 points. Assuming we have that here.
    int num_equations = num_points * 3;
    unsigned c = 0;
-   float** A = reg_matrix2DAllocate<float>(num_equations, 12);
+   float** A = Matrix2dAlloc<float>(num_equations, 12);
 
    for (int k = 0; k < num_points; ++k) {
       c = k * 3;
@@ -571,10 +566,10 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
       A[c + 2][11] = 1.0f;
    }
 
-   float* w = reg_matrix1DAllocate<float>(12);
-   float** v = reg_matrix2DAllocate<float>(12, 12);
-   // Now we can compute our svd
-   svd(A, num_equations, 12, w, v);
+   float* w = Matrix1dAlloc<float>(12);
+   float** v = Matrix2dAlloc<float>(12, 12);
+   // Now we can compute our Svd
+   Svd(A, num_equations, 12, w, v);
 
    // First we make sure that the really small singular values
    // are set to 0. and compute the inverse by taking the reciprocal
@@ -600,11 +595,11 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
 
    // Now multiply the matrices together
    // Pseudoinverse = v * w * A(transpose)
-   float** r = reg_matrix2DAllocate<float>(12, num_equations);
-   reg_matrix2DMultiply<float>(v, 12, 12, A, num_equations, 12, r, true);
+   float** r = Matrix2dAlloc<float>(12, num_equations);
+   Matrix2dMultiply<float>(v, 12, 12, A, num_equations, 12, r, true);
    // Now r contains the pseudoinverse
    // Create vector b and then multiple rb to get the affine paramsA
-   float* b = reg_matrix1DAllocate<float>(num_equations);
+   float* b = Matrix1dAlloc<float>(num_equations);
    for (int k = 0; k < num_points; ++k) {
       c = k * 3;
       b[c] = points2[k][0];
@@ -612,9 +607,9 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
       b[c + 2] = points2[k][2];
    }
 
-   float * transform = reg_matrix1DAllocate<float>(12);
+   float * transform = Matrix1dAlloc<float>(12);
    //mul_matvec(r, 12, num_equations, b, transform);
-   reg_matrix2DVectorMultiply<float>(r, 12, num_equations, b, transform);
+   Matrix2dVectorMultiply<float>(r, 12, num_equations, b, transform);
 
    transformation->m[0][0] = transform[0];
    transformation->m[0][1] = transform[1];
@@ -637,20 +632,20 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_
    transformation->m[3][3] = 1.0f;
 
    // Do the deletion here
-   reg_matrix1DDeallocate(transform);
-   reg_matrix1DDeallocate(b);
-   reg_matrix2DDeallocate(12, r);
-   reg_matrix2DDeallocate(12, v);
-   reg_matrix1DDeallocate(w);
-   reg_matrix2DDeallocate(num_equations, A);
+   Matrix1dDealloc(transform);
+   Matrix1dDealloc(b);
+   Matrix2dDealloc(12, r);
+   Matrix2dDealloc(12, v);
+   Matrix1dDealloc(w);
+   Matrix2dDealloc(num_equations, A);
 }
 /* *************************************************************** */
 // estimate an affine transformation using least square
 void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation)
 {
    unsigned num_points = points.size();
-   float** points1 = reg_matrix2DAllocate<float>(num_points, 3);
-   float** points2 = reg_matrix2DAllocate<float>(num_points, 3);
+   float** points1 = Matrix2dAlloc<float>(num_points, 3);
+   float** points2 = Matrix2dAlloc<float>(num_points, 3);
    for (unsigned i = 0; i < num_points; i++) {
       points1[i][0] = points[i].reference[0];
       points1[i][1] = points[i].reference[1];
@@ -661,8 +656,8 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points,
    }
    estimate_affine_transformation3D(points1, points2, num_points, transformation);
    //FREE MEMORY
-   reg_matrix2DDeallocate(num_points, points1);
-   reg_matrix2DDeallocate(num_points, points2);
+   Matrix2dDealloc(num_points, points1);
+   Matrix2dDealloc(num_points, points2);
 }
 /* *************************************************************** */
 ///LTS 2D
@@ -671,7 +666,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition,
                  mat44 * final, bool affine) {
 
    // Set the current transformation to identity
-   reg_mat44_eye(final);
+   Mat44Eye(final);
 
    const unsigned num_points = activeBlockNumber;
    unsigned long num_equations = num_points * 2;
@@ -705,13 +700,11 @@ void optimize_2D(float* referencePosition, float* warpedPosition,
    {
       // Transform the points in the reference
       for (unsigned j = 0; j < num_points * 2; j += 2)
-      {
-         reg_mat33_mul(final, &referencePosition[j], &newWarpedPosition[j]);
-      }
+         Mat33Mul(*final, reinterpret_cast<float(&)[2]>(referencePosition[j]), reinterpret_cast<float(&)[2]>(newWarpedPosition[j]));
       queue = std::multimap<double, _reg_sorted_point2D>();
       for (unsigned j = 0; j < num_points * 2; j += 2)
       {
-         distance = get_square_distance2D(&newWarpedPosition[j], &warpedPosition[j]);
+         distance = SquareDistance2d(&newWarpedPosition[j], &warpedPosition[j]);
          queue.insert(std::pair<double, _reg_sorted_point2D>(distance,
                                                              _reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], distance)));
       }
@@ -754,7 +747,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
                  mat44 *final, bool affine) {
 
    // Set the current transformation to identity
-   reg_mat44_eye(final);
+   Mat44Eye(final);
 
    const unsigned num_points = activeBlockNumber;
    unsigned long num_equations = num_points * 3;
@@ -785,13 +778,12 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
    for (int count = 0; count < max_iter; ++count)
    {
       // Transform the points in the reference
-      for (unsigned j = 0; j < num_points * 3; j+=3) {
-         reg_mat44_mul(final, &referencePosition[j], &newWarpedPosition[j]);
-      }
+      for (unsigned j = 0; j < num_points * 3; j+=3)
+         Mat44Mul(*final, reinterpret_cast<float(&)[3]>(referencePosition[j]), reinterpret_cast<float(&)[3]>(newWarpedPosition[j]));
       queue = std::multimap<double, _reg_sorted_point3D>();
       for (unsigned j = 0; j < num_points * 3; j+= 3)
       {
-         distance = get_square_distance3D(&newWarpedPosition[j], &warpedPosition[j]);
+         distance = SquareDistance3d(&newWarpedPosition[j], &warpedPosition[j]);
          queue.insert(std::pair<double,
                       _reg_sorted_point3D>(distance,
                                            _reg_sorted_point3D(&referencePosition[j],
@@ -823,6 +815,6 @@ void optimize_3D(float *referencePosition, float *warpedPosition,
          estimate_rigid_transformation3D(top_points, final);
       }
    }
-   delete [] newWarpedPosition;
+   delete[] newWarpedPosition;
 }
 /* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index c3e17149..d070bee1 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -11,7 +11,6 @@
  */
 
 #include "_reg_localTrans.h"
-#include "_reg_maths_eigen.h"
 
 // Due to SSE usage creates incorrect test results
 #if defined(BUILD_TESTS) && !defined(NDEBUG)
@@ -82,7 +81,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
     originIndex[1] = -1.0f;
     originIndex[2] = 0.0f;
     if (referenceImage->nz > 1) originIndex[2] = -1.0f;
-    reg_mat44_mul(&controlPointGridImage->qto_xyz, originIndex, originReal);
+    Mat44Mul(controlPointGridImage->qto_xyz, originIndex, originReal);
     controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0];
     controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1];
     controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2];
@@ -114,7 +113,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
         controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3];
 
         // Origin is shifted from 1 control point in the sform
-        reg_mat44_mul(&controlPointGridImage->sto_xyz, originIndex, originReal);
+        Mat44Mul(controlPointGridImage->sto_xyz, originIndex, originReal);
         controlPointGridImage->sto_xyz.m[0][3] = originReal[0];
         controlPointGridImage->sto_xyz.m[1][3] = originReal[1];
         controlPointGridImage->sto_xyz.m[2][3] = originReal[2];
@@ -152,24 +151,24 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     mat44 halfForwardAffine, halfBackwardAffine;
     if (forwardAffineTrans != nullptr) {
         // Compute half of the affine transformation - ref to flo
-        halfForwardAffine = reg_mat44_logm(forwardAffineTrans);
-        halfForwardAffine = reg_mat44_mul(&halfForwardAffine, .5f);
-        halfForwardAffine = reg_mat44_expm(&halfForwardAffine);
+        halfForwardAffine = Mat44Logm(forwardAffineTrans);
+        halfForwardAffine = halfForwardAffine * 0.5f;
+        halfForwardAffine = Mat44Expm(&halfForwardAffine);
         // Compute half of the affine transformation - flo to ref
         // Note that this is done twice for symmetry consideration
         halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans);
-        halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine);
-        halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f);
-        halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine);
+        halfBackwardAffine = Mat44Logm(&halfBackwardAffine);
+        halfBackwardAffine = halfBackwardAffine * 0.5f;
+        halfBackwardAffine = Mat44Expm(&halfBackwardAffine);
         NR_WARN("Note that the symmetry of the registration is affected by the input affine transformation");
     } else {
-        reg_mat44_eye(&halfForwardAffine);
-        reg_mat44_eye(&halfBackwardAffine);
+        Mat44Eye(&halfForwardAffine);
+        Mat44Eye(&halfBackwardAffine);
     }
 
     // Update the reference and floating transformation to propagate to a mid space
-    referenceImageSpace = reg_mat44_mul(&halfForwardAffine, &referenceImageSpace);
-    floatingImageSpace = reg_mat44_mul(&halfBackwardAffine, &floatingImageSpace);
+    referenceImageSpace = halfForwardAffine * referenceImageSpace;
+    floatingImageSpace = halfBackwardAffine * floatingImageSpace;
 
     // Define the largest field of view in the mid space
     float minPosition[3] = { 0, 0, 0 }, maxPosition[3] = { 0, 0, 0 };
@@ -197,11 +196,11 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
         };
         float out[3];
         for (int c = 0; c < 8; ++c) {
-            reg_mat44_mul(&referenceImageSpace, referenceImageCorners[c], out);
+            Mat44Mul(referenceImageSpace, referenceImageCorners[c], out);
             referenceImageCorners[c][0] = out[0];
             referenceImageCorners[c][1] = out[1];
             referenceImageCorners[c][2] = out[2];
-            reg_mat44_mul(&floatingImageSpace, floatingImageCorners[c], out);
+            Mat44Mul(floatingImageSpace, floatingImageCorners[c], out);
             floatingImageCorners[c][0] = out[0];
             floatingImageCorners[c][1] = out[1];
             floatingImageCorners[c][2] = out[2];
@@ -299,10 +298,10 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     // Set the control point grid image orientation
     forwardGridImage->qform_code = backwardGridImage->qform_code = 0;
     forwardGridImage->sform_code = backwardGridImage->sform_code = 1;
-    reg_mat44_eye(&forwardGridImage->sto_xyz);
-    reg_mat44_eye(&backwardGridImage->sto_xyz);
-    reg_mat44_eye(&forwardGridImage->sto_ijk);
-    reg_mat44_eye(&backwardGridImage->sto_ijk);
+    Mat44Eye(&forwardGridImage->sto_xyz);
+    Mat44Eye(&backwardGridImage->sto_xyz);
+    Mat44Eye(&forwardGridImage->sto_ijk);
+    Mat44Eye(&backwardGridImage->sto_ijk);
     for (unsigned i = 0; i < 3; ++i) {
         if (referenceImage->nz > 1 || i < 2) {
             forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = spacing[i];
@@ -320,7 +319,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
     forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID;
     // Set the affine matrices
     mat44 identity;
-    reg_mat44_eye(&identity);
+    Mat44Eye(&identity);
     if (forwardGridImage->ext_list != nullptr)
         free(forwardGridImage->ext_list);
     if (backwardGridImage->ext_list != nullptr)
@@ -1570,28 +1569,28 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
         if (nodeImage->ext_list[0].edata != nullptr) {
             mat44 temp = *(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
             temp = nifti_mat44_inverse(temp);
-            transformation = reg_mat44_mul(&temp, &transformation);
+            transformation = temp * transformation;
         }
     }
     // millimetre to voxel in the reference image
     if (voxelImage->sform_code > 0)
-        transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation);
-    else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation);
+        transformation = voxelImage->sto_ijk * transformation;
+    else transformation = voxelImage->qto_ijk * transformation;
 
     // The information has to be reoriented
     mat33 reorientation;
     // Voxel to millimetre contains the orientation of the image that is used
     // to compute the spatial gradient (floating image)
     if (voxelToMillimetre != nullptr) {
-        reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        reorientation = Mat44ToMat33(voxelToMillimetre);
         if (nodeImage->num_ext > 0) {
             if (nodeImage->ext_list[0].edata != nullptr) {
-                mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+                mat33 temp = Mat44ToMat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
                 temp = nifti_mat33_inverse(temp);
                 reorientation = nifti_mat33_mul(temp, reorientation);
             }
         }
-    } else reg_mat33_eye(&reorientation);
+    } else Mat33Eye(&reorientation);
     // The information has to be weighted
     float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
     for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) {
@@ -1611,7 +1610,7 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
             nodeCoord[1] = static_cast<float>(y);
             for (int x = 0; x < nodeImage->nx; x++) {
                 nodeCoord[0] = static_cast<float>(x);
-                reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
+                Mat44Mul(transformation, nodeCoord, voxelCoord);
                 // linear interpolation is performed
                 DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 };
                 int pre[3] = {
@@ -2173,7 +2172,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
         originIndex[1] = -1.0f;
         originIndex[2] = 0.0f;
         if (referenceImage->nz > 1) originIndex[2] = -1.0f;
-        reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal);
+        Mat44Mul(controlPointGrid->qto_xyz, originIndex, originReal);
         if (controlPointGrid->qform_code == 0 && controlPointGrid->sform_code == 0)
             controlPointGrid->qform_code = 1;
         controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0];
@@ -2211,7 +2210,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
             float originIndex[3];
             originIndex[0] = originIndex[1] = originIndex[2] = -1;
             if (referenceImage->nz <= 1) originIndex[2] = 0;
-            reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal);
+            Mat44Mul(controlPointGrid->sto_xyz, originIndex, originReal);
             controlPointGrid->sto_xyz.m[0][3] = originReal[0];
             controlPointGrid->sto_xyz.m[1][3] = originReal[1];
             controlPointGrid->sto_xyz.m[2][3] = originReal[2];
@@ -2228,7 +2227,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid,
         // The origin is shifted by one node when compared to the previous origin
         float nodeCoord[3] = { 1, 1, 1 };
         float newOrigin[3];
-        reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin);
+        Mat44Mul(controlPointGrid->sto_xyz, nodeCoord, newOrigin);
         controlPointGrid->sto_xyz.m[0][3] = newOrigin[0];
         controlPointGrid->sto_xyz.m[1][3] = newOrigin[1];
         if (controlPointGrid->nz > 1)
@@ -2398,7 +2397,6 @@ void reg_defField_compose3D(const nifti_image *deformationField,
                 df_real2Voxel.m[2][1] * realDef[1] +
                 df_real2Voxel.m[2][2] * realDef[2] +
                 df_real2Voxel.m[2][3];
-            //reg_mat44_mul(df_real2Voxel, realDef, voxel);
 
             // Linear interpolation to compute the new deformation
             pre[0] = Floor(voxel[0]);
@@ -2501,7 +2499,7 @@ inline static int FastWarp(double x, double y, double z, nifti_image *deformatio
     FieldTYPE *wpz;
     int   xw, yw, zw, dxw, dyw, dxyw, dxyzw;
     double wxf, wyf, wzf, wyzf;
-    double world[4], position[4];
+    double world[3], position[3];
 
     FieldTYPE *warpdata = static_cast<FieldTYPE*>(deformationField->data);
 
@@ -2527,8 +2525,7 @@ inline static int FastWarp(double x, double y, double z, nifti_image *deformatio
     world[0] = x;
     world[1] = y;
     world[2] = z;
-    world[3] = 1;
-    reg_mat44_mul(deformationFieldIJKMatrix, world, position);
+    Mat44Mul(*deformationFieldIJKMatrix, world, position);
     x = position[0];
     y = position[1];
     z = position[2];
@@ -2947,13 +2944,12 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
     if (inputDeformationField->sform_code > 0)
         InXYZMatrix = &inputDeformationField->sto_xyz;
     else InXYZMatrix = &inputDeformationField->qto_xyz;
-    float center[4], center2[4];
-    double centerout[4], delta[4];
+    float center[3], center2[3];
+    double centerout[3], delta[3];
     center[0] = static_cast<float>(inputDeformationField->nx / 2);
     center[1] = static_cast<float>(inputDeformationField->ny / 2);
     center[2] = static_cast<float>(inputDeformationField->nz / 2);
-    center[3] = 1;
-    reg_mat44_mul(InXYZMatrix, center, center2);
+    Mat44Mul(*InXYZMatrix, center, center2);
     FastWarp<float>(center2[0], center2[1], center2[2], inputDeformationField, &centerout[0], &centerout[1], &centerout[2]);
     delta[0] = center2[0] - centerout[0];
     delta[1] = center2[1] - centerout[1];
@@ -2962,7 +2958,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
 
 
     int i, x, y, z;
-    double position[4], pars[4], arrayy[4][3];
+    double position[3], pars[3], arrayy[4][3];
     struct ddata dat;
     DataType *outData;
 #ifdef _OPENMP
@@ -2986,8 +2982,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField,
                 position[0] = x;
                 position[1] = y;
                 position[2] = z;
-                position[3] = 1;
-                reg_mat44_mul(OutXYZMatrix, position, pars);
+                Mat44Mul(*OutXYZMatrix, position, pars);
                 dat.gx = pars[0];
                 dat.gy = pars[1];
                 dat.gz = pars[2];
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 75c0b6ee..303057cb 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -74,8 +74,8 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
    // Define a matrix to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk);
 
    // Useful variables
    int x, y, z;
@@ -143,13 +143,11 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
          else transformation=referenceImage->qto_xyz;
          // affine: mm to mm
          if(splineControlPoint->num_ext>0)
-            transformation=reg_mat44_mul(
-                     reinterpret_cast<mat44 *>(splineControlPoint->ext_list[0].edata),
-                  &transformation);
+            transformation=reinterpret_cast<mat44&>(*splineControlPoint->ext_list[0].edata) * transformation;
          // grid: mm to voxel
          if(splineControlPoint->sform_code>0)
-            transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation);
-         else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation);
+            transformation=splineControlPoint->sto_ijk * transformation;
+         else transformation=splineControlPoint->qto_ijk * transformation;
 
          float imageCoord[3], gridCoord[3];
          for(z=0; z<referenceImage->nz; z++)
@@ -163,7 +161,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                {
                   imageCoord[0]=x;
                   // Compute the position in the grid
-                  reg_mat44_mul(&transformation,imageCoord,gridCoord);
+                  Mat44Mul(transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
                   pre[0]=Floor(gridCoord[0]);
                   pre[1]=Floor(gridCoord[1]);
@@ -265,8 +263,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
    // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk);
 
    // Useful variables
    int x, y, incr0;
@@ -361,13 +359,11 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
          else transformation=referenceImage->qto_xyz;
          // affine: mm to mm
          if(splineControlPoint->num_ext>0)
-            transformation=reg_mat44_mul(
-                     reinterpret_cast<mat44 *>(splineControlPoint->ext_list[0].edata),
-                  &transformation);
+            transformation=reinterpret_cast<mat44&>(*splineControlPoint->ext_list[0].edata) * transformation;
          // grid: mm to voxel
          if(splineControlPoint->sform_code>0)
-            transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation);
-         else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation);
+            transformation=splineControlPoint->sto_ijk * transformation;
+         else transformation=splineControlPoint->qto_ijk * transformation;
 
          float imageCoord[3], gridCoord[3], basis;
          imageCoord[2]=0;
@@ -380,7 +376,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
             {
                imageCoord[0]=x;
                // Compute the position in the grid
-               reg_mat44_mul(&transformation,imageCoord,gridCoord);
+               Mat44Mul(transformation,imageCoord,gridCoord);
                // Compute the anterior node coord
                pre[0]=Floor(gridCoord[0]);
                pre[1]=Floor(gridCoord[1]);
@@ -539,8 +535,8 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
    // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing
    mat33 reorientation,jacobianMatrix;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk);
 
    // Useful variables
    int x, y, z, incr0;
@@ -771,13 +767,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
          else transformation=referenceImage->qto_xyz;
          // affine: mm to mm
          if(splineControlPoint->num_ext>0)
-            transformation=reg_mat44_mul(
-                     reinterpret_cast<mat44 *>(splineControlPoint->ext_list[0].edata),
-                  &transformation);
+            transformation=reinterpret_cast<mat44&>(*splineControlPoint->ext_list[0].edata) * transformation;
          // grid: mm to voxel
          if(splineControlPoint->sform_code>0)
-            transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation);
-         else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation);
+            transformation=splineControlPoint->sto_ijk * transformation;
+         else transformation=splineControlPoint->qto_ijk * transformation;
 
          float imageCoord[3], gridCoord[3], basis;
          for(z=0; z<referenceImage->nz; z++)
@@ -792,7 +786,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                {
                   imageCoord[0]=x;
                   // Compute the position in the grid
-                  reg_mat44_mul(&transformation,imageCoord,gridCoord);
+                  Mat44Mul(transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
                   pre[0]=Floor(gridCoord[0]);
                   pre[1]=Floor(gridCoord[1]);
@@ -1342,8 +1336,8 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz);
 
    // Ratio to be used for normalisation
    size_t jacobianNumber;
@@ -1580,8 +1574,8 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
    // Matrices to be used to convert the gradient from voxel to mm
    mat33 jacobianMatrix, reorientation;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz);
 
    // Ratio to be used for normalisation
    size_t jacobianNumber;
@@ -1946,8 +1940,8 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 
    mat33 jacobianMatrix, reorientation;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz);
 
    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
@@ -2195,8 +2189,8 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
 
    mat33 jacobianMatrix, reorientation;
    if(splineControlPoint->sform_code>0)
-      reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz);
-   else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz);
+      reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz);
+   else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz);
 
    const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3);
    DataType *controlPointPtrX = static_cast<DataType *>(splineControlPoint->data);
@@ -2629,13 +2623,13 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField,
    if(deformationField->sform_code>0)
    {
       reg_getRealImageSpacing(deformationField,spacing);
-      reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+      reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz)));
    }
    else
    {
       spacing[0]=deformationField->dx;
       spacing[1]=deformationField->dy;
-      reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz)));
+      reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->qto_xyz)));
    }
 
    DataType *deformationPtrX = static_cast<DataType *>(deformationField->data);
@@ -2738,14 +2732,14 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField,
    if(deformationField->sform_code>0)
    {
       reg_getRealImageSpacing(deformationField,spacing);
-      reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+      reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz)));
    }
    else
    {
       spacing[0]=deformationField->dx;
       spacing[1]=deformationField->dy;
       spacing[2]=deformationField->dz;
-      reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz)));
+      reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->qto_xyz)));
    }
 
    DataType *deformationPtrX = static_cast<DataType *>(deformationField->data);
@@ -2943,11 +2937,11 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
 
    // The Jacobian matrices are initialised with identity or the initial affine
    mat33 affineMatrix;
-   reg_mat33_eye(&affineMatrix);
+   Mat33Eye(&affineMatrix);
    if(flowFieldImage->num_ext>0)
    {
       if(flowFieldImage->ext_list[0].edata!=nullptr)
-         affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata));
+         affineMatrix = Mat44ToMat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[0].edata));
       else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field");
    }
    const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3);
@@ -2982,7 +2976,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices,
    if(flowFieldImage->num_ext>1)
    {
       if(flowFieldImage->ext_list[1].edata!=nullptr)
-         affineMatrix = reg_mat44_to_mat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[1].edata));
+         affineMatrix = Mat44ToMat33(reinterpret_cast<mat44 *>(flowFieldImage->ext_list[1].edata));
       else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field");
       for(size_t i=0; i<voxelNumber; ++i)
          jacobianMatrices[i]=nifti_mat33_mul(affineMatrix,jacobianMatrices[i]);
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 722add4e..907b3f3b 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -506,8 +506,8 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
     if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+        reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk);
+    else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk);
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -583,8 +583,8 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin
     // Matrix to use to convert the gradient from mm to voxel
     mat33 reorientation;
     if (splineControlPoint->sform_code > 0)
-        reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk);
-    else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk);
+        reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk);
+    else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk);
 
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
@@ -688,7 +688,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi
     set_first_order_basis_values(basisX, basisY);
 
     // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
     const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
@@ -756,7 +756,7 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi
     set_first_order_basis_values(basisX, basisY, basisZ);
 
     // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk);
     const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     const DataType approxRatio = weight / static_cast<DataType>(nodeNumber);
@@ -863,9 +863,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     double constraintValue = 0;
     size_t l, index;
-    float refPosition[4];
-    float defPosition[4];
-    float floPosition[4];
+    float refPosition[3];
+    float defPosition[3];
+    float floPosition[3];
     int previous[3], a, b, c;
     DataType basisX[4], basisY[4], basisZ[4], basis;
     const mat44 *gridRealToVox = &(controlPointImage->qto_ijk);
@@ -888,9 +888,8 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
             refPosition[2] = landmarkReference[l * imageDim + 2];
             floPosition[2] = landmarkFloating[l * imageDim + 2];
         } else refPosition[2] = floPosition[2] = 0;
-        refPosition[3] = floPosition[3] = 1;
         // Convert the reference position to voxel in the control point grid space
-        reg_mat44_mul(gridRealToVox, refPosition, defPosition);
+        Mat44Mul(*gridRealToVox, refPosition, defPosition);
 
         // Extract the corresponding nodes
         previous[0] = Floor(defPosition[0]) - 1;
@@ -1003,7 +1002,7 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
             floPosition[2] = landmarkFloating[l * imageDim + 2];
         } else refPosition[2] = floPosition[2] = 0;
         // Convert the reference position to voxel in the control point grid space
-        reg_mat44_mul(gridRealToVox, refPosition, defPosition);
+        Mat44Mul(*gridRealToVox, refPosition, defPosition);
         if (imageDim == 2) defPosition[2] = 0;
         // Extract the corresponding nodes
         previous[0] = Floor(defPosition[0]) - 1;
diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h
deleted file mode 100644
index 42c0cddd..00000000
--- a/reg-lib/cpu/_reg_maths.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- * @file _reg_maths.h
- * @brief Library that contains small math routines
- * @author Marc Modat
- * @date 25/03/2009
- *
- *  Created by Marc Modat on 25/03/2009.
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- *
- */
-
-#pragma once
-
-#include "RNifti.h"
-
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#if USE_SSE
-#include <emmintrin.h>
-#include <xmmintrin.h>
-#ifdef __SSE3__
-#include <pmmintrin.h>
-#endif
-#endif
-
-#define _USE_MATH_DEFINES
-#include <math.h>
-
-#ifdef __CUDACC__
-#define DEVICE  __host__ __device__
-#else
-#define DEVICE
-#endif
-
-typedef enum {
-    DEF_FIELD,
-    DISP_FIELD,
-    CUB_SPLINE_GRID,
-    DEF_VEL_FIELD,
-    DISP_VEL_FIELD,
-    SPLINE_VEL_GRID,
-    LIN_SPLINE_GRID
-} NREG_TRANS_TYPE;
-
-/* *************************************************************** */
-namespace NiftyReg {
-/* *************************************************************** */
-// The functions in the standard library are slower; so, these are implemented
-template<typename T>
-DEVICE inline T Square(const T& x) {
-    return x * x;
-}
-template<typename T>
-DEVICE inline T Cube(const T& x) {
-    return x * x * x;
-}
-template<typename T>
-DEVICE inline int Floor(const T& x) {
-    const int i = static_cast<int>(x);
-    return i - (x < i);
-}
-template<typename T>
-DEVICE inline int Ceil(const T& x) {
-    const int i = static_cast<int>(x);
-    return i + (x > i);
-}
-template<typename T>
-DEVICE inline int Round(const T& x) {
-    return static_cast<int>(x + (x >= 0 ? 0.5 : -0.5));
-}
-/* *************************************************************** */
-} // namespace NiftyReg
-/* *************************************************************** */
-template<class T>
-T* reg_matrix1DAllocate(size_t arraySize);
-/* *************************************************************** */
-template<class T>
-void reg_matrix1DDeallocate(T* mat);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY);
-/* *************************************************************** */
-template<class T>
-void reg_matrix2DDeallocate(size_t arraySizeX, T** mat);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY);
-/* *************************************************************** */
-template<class T>
-T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2);
-template<class T>
-void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** res, bool transposeMat2);
-/* *************************************************************** */
-template<class T>
-T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect);
-template<class T>
-void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res);
-/* *************************************************************** */
-/** @brief Add two 3-by-3 matrices
-*/
-mat33 reg_mat33_add(mat33 const* A, mat33 const* B);
-mat33 operator+(mat33 A, mat33 B);
-/* *************************************************************** */
-/** @brief Multiply two 3-by-3 matrices
-*/
-mat33 reg_mat33_mul(mat33 const* A,
-    mat33 const* B);
-mat33 operator*(mat33 A,
-    mat33 B);
-/* *************************************************************** */
-//The mat33 represent a 3x3 matrix
-void reg_mat33_mul(mat44 const* mat, float const* in, float *out);
-void reg_mat33_mul(mat33 const* mat, float const* in, float *out);
-/* *************************************************************** */
-/** @brief Subtract two 3-by-3 matrices
-*/
-mat33 reg_mat33_minus(mat33 const* A, mat33 const* B);
-mat33 operator-(mat33 A, mat33 B);
-/* *************************************************************** */
-/** @brief Transpose a 3-by-3 matrix
-*/
-mat33 reg_mat33_trans(mat33 A);
-/* *************************************************************** */
-/** @brief Diagonalize a 3-by-3 matrix
-*/
-void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D);
-/* *************************************************************** */
-/** @brief Set up a 3-by-3 matrix with an identity
-*/
-void reg_mat33_eye(mat33 *mat);
-/* *************************************************************** */
-/** @brief Compute the determinant of a 3-by-3 matrix
-*/
-void reg_mat33_to_nan(mat33 *A);
-/* *************************************************************** */
-/** @brief Transform a mat44 to a mat33 matrix
-*/
-mat33 reg_mat44_to_mat33(mat44 const* A);
-void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum);
-/* *************************************************************** */
-template <class T>
-void reg_heapSort(T *array_tmp,int blockNum);
-/* *************************************************************** */
-bool operator==(mat44 A,mat44 B);
-/* *************************************************************** */
-bool operator!=(mat44 A,mat44 B);
-/* *************************************************************** */
-/** @brief Multiply two 4-by-4 matrices
- */
-mat44 reg_mat44_mul(mat44 const* A,
-                    mat44 const* B);
-mat44 operator*(mat44 A,
-                mat44 B);
-/* *************************************************************** */
-/** @brief Multiply a vector with a 4-by-4 matrix
- */
-void reg_mat44_mul(mat44 const* mat,
-                   float const* in,
-                   float *out);
-
-void reg_mat44_mul(mat44 const* mat,
-                   double const* in,
-                   double *out);
-/* *************************************************************** */
-/** @brief Multiply a 4-by-4 matrix with a scalar
- */
-mat44 reg_mat44_mul(mat44 const* mat,
-                    double scalar);
-/* *************************************************************** */
-/** @brief Add two 4-by-4 matrices
- */
-mat44 reg_mat44_add(mat44 const* A, mat44 const* B);
-mat44 operator+(mat44 A,mat44 B);
-/* *************************************************************** */
-/** @brief Subtract two 4-by-4 matrices
- */
-mat44 reg_mat44_minus(mat44 const* A, mat44 const* B);
-mat44 operator-(mat44 A,mat44 B);
-/* *************************************************************** */
-/** @brief Set up a 4-by-4 matrix with an identity
- */
-void reg_mat44_eye(mat44 *mat);
-/* *************************************************************** */
-/** @brief Compute the determinant of a 4-by-4 matrix
- */
-template<class T> T reg_mat44_det(mat44 const* A);
-/* *************************************************************** */
-/** @brief Display a mat44 matrix
- */
-void reg_mat44_disp(const mat44& mat, const std::string& title);
-/* *************************************************************** */
-double get_square_distance3D(float * first_point3D, float * second_point3D);
-/* *************************************************************** */
-double get_square_distance2D(float * first_point2D, float * second_point2D);
-/* *************************************************************** */
diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp
deleted file mode 100644
index 444a1721..00000000
--- a/reg-lib/cpu/_reg_maths_eigen.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-#define USE_EIGEN
-
-#include "_reg_maths_eigen.h"
-#include "_reg_maths.h"
-#include "Debug.hpp"
-
-// Eigen headers are in there because of the nvcc preprocessing step
-#include "Eigen/Core"
-#include "Eigen/SVD"
-#include "unsupported/Eigen/MatrixFunctions"
-
-//_reg_maths_eigen.cpp
-/* *************************************************************** */
-/** @brief SVD
-* @param in input matrix to decompose - in place
-* @param size_m row
-* @param size_n colomn
-* @param w diagonal term
-* @param v rotation part
-*/
-template<class T>
-void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) {
-   if (size_m == 0 || size_n == 0)
-      NR_FATAL_ERROR("The specified matrix is empty");
-
-#ifdef _WIN32
-   long sm, sn, sn2;
-   long size__m = (long)size_m, size__n = (long)size_n;
-#else
-   size_t sm, sn, sn2;
-   size_t size__m = size_m, size__n = size_n;
-#endif
-   Eigen::MatrixXd m(size_m, size_n);
-
-   //Convert to Eigen matrix
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(in,m, size__m, size__n) \
-   private(sn)
-#endif
-   for (sm = 0; sm < size__m; sm++)
-   {
-      for (sn = 0; sn < size__n; sn++)
-      {
-         m(sm, sn) = static_cast<double>(in[sm][sn]);
-      }
-   }
-
-   Eigen::JacobiSVD<Eigen::MatrixXd> svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV);
-
-#ifdef _OPENMP
-#pragma omp parallel for default(none) \
-   shared(in,svd,v,w, size__n,size__m) \
-   private(sn2, sm)
-#endif
-   for (sn = 0; sn < size__n; sn++) {
-      w[sn] = static_cast<T>(svd.singularValues()(sn));
-      for (sn2 = 0; sn2 < size__n; sn2++) {
-         v[sn2][sn] = static_cast<T>(svd.matrixV()(sn2, sn));
-      }
-      for (sm = 0; sm < size__m; sm++) {
-         in[sm][sn] = static_cast<T>(svd.matrixU()(sm, sn));
-      }
-   }
-}
-template void svd<float>(float **in, size_t m, size_t n, float * w, float **v);
-template void svd<double>(double **in, size_t m, size_t n, double * w, double **v);
-/* *************************************************************** */
-template<class T>
-T reg_matrix2DDet(T** mat, size_t m, size_t n) {
-   if (m != n)
-      NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]");
-
-   double res;
-   if (m == 2) {
-      res = static_cast<double>(mat[0][0]) * static_cast<double>(mat[1][1]) - static_cast<double>(mat[1][0]) * static_cast<double>(mat[0][1]);
-   }
-   else if (m == 3) {
-      res = (static_cast<double>(mat[0][0]) * (static_cast<double>(mat[1][1]) * static_cast<double>(mat[2][2]) - static_cast<double>(mat[1][2]) * static_cast<double>(mat[2][1]))) -
-            (static_cast<double>(mat[0][1]) * (static_cast<double>(mat[1][0]) * static_cast<double>(mat[2][2]) - static_cast<double>(mat[1][2]) * static_cast<double>(mat[2][0]))) +
-            (static_cast<double>(mat[0][2]) * (static_cast<double>(mat[1][0]) * static_cast<double>(mat[2][1]) - static_cast<double>(mat[1][1]) * static_cast<double>(mat[2][0])));
-   }
-   else {
-      // Convert to Eigen format
-      Eigen::MatrixXd eigenRes(m, n);
-      for (size_t i = 0; i < m; i++) {
-         for (size_t j = 0; j < n; j++) {
-            eigenRes(i, j) = static_cast<double>(mat[i][j]);
-         }
-      }
-      res = eigenRes.determinant();
-   }
-   return static_cast<T>(res);
-}
-template float reg_matrix2DDet<float>(float** mat, size_t m, size_t n);
-template double reg_matrix2DDet<double>(double** mat, size_t m, size_t n);
-/* *************************************************************** */
-void reg_mat33_expm(mat33 *in_tensor)
-{
-   int sm, sn;
-   Eigen::Matrix3d tensor;
-
-   // Convert to Eigen format
-   for (sm = 0; sm < 3; sm++){
-      for (sn = 0; sn < 3; sn++){
-         float val=in_tensor->m[sm][sn];
-         if(val!=val) return;
-         tensor(sm, sn) = static_cast<double>(val);
-      }
-   }
-
-   // Compute exp(E)
-   tensor = tensor.exp();
-
-   // Convert the result to mat33 format
-   for (sm = 0; sm < 3; sm++)
-      for (sn = 0; sn < 3; sn++)
-         in_tensor->m[sm][sn] = static_cast<float>(tensor(sm, sn));
-}
-/* *************************************************************** */
-mat44 reg_mat44_expm(mat44 const* mat)
-{
-   mat44 X;
-   Eigen::Matrix4d m;
-   for (size_t i = 0; i < 4; ++i) {
-      for (size_t j = 0; j < 4; ++j) {
-         m(i, j) = static_cast<double>(mat->m[i][j]);
-      }
-   }
-   m = m.exp();
-   //
-   for (size_t i = 0; i < 4; ++i)
-      for (size_t j = 0; j < 4; ++j)
-         X.m[i][j] = static_cast<float>(m(i, j));
-
-   return X;
-}
-/* *************************************************************** */
-void reg_mat33_logm(mat33 *in_tensor)
-{
-   int sm, sn;
-   Eigen::Matrix3d tensor;
-
-   // Convert to Eigen format
-   bool all_zeros = true;
-   double det = 0;
-   for (sm = 0; sm < 3; sm++){
-      for (sn = 0; sn < 3; sn++){
-         float val=in_tensor->m[sm][sn];
-         if(val!=0.f) all_zeros=false;
-         if(val!=val) return;
-         tensor(sm, sn) = static_cast<double>(val);
-      }
-   }
-   // Actually R case requires invertible and no negative real ev,
-   // but the only observed case so far was non-invertible.
-   // determinant is not a perfect check for invertibility and
-   // identity with zero not great either, but the alternative
-   // is a general eigensolver and the logarithm function should
-   // suceed unless convergence just isn't happening.
-   det = tensor.determinant();
-   if(all_zeros || det == 0){
-      reg_mat33_to_nan(in_tensor);
-      return;
-   }
-
-   // Compute the actual matrix log
-   tensor = tensor.log();
-
-   // Convert the result to mat33 format
-   for (sm = 0; sm < 3; sm++)
-      for (sn = 0; sn < 3; sn++)
-         in_tensor->m[sm][sn] = static_cast<float>(tensor(sm, sn));
-}
-/* *************************************************************** */
-mat44 reg_mat44_logm(mat44 const* mat)
-{
-   mat44 X;
-   Eigen::Matrix4d m;
-   for (size_t i = 0; i < 4; ++i) {
-      for (size_t j = 0; j < 4; ++j) {
-         m(i, j) = static_cast<double>(mat->m[i][j]);
-      }
-   }
-   m = m.log();
-   for (size_t i = 0; i < 4; ++i)
-      for (size_t j = 0; j < 4; ++j)
-         X.m[i][j] = static_cast<float>(m(i, j));
-   return X;
-}
-/* *************************************************************** */
-mat44 reg_mat44_avg2(mat44 const* A, mat44 const* B)
-{
-   mat44 out;
-   mat44 logA = reg_mat44_logm(A);
-   mat44 logB = reg_mat44_logm(B);
-   for (int i = 0; i < 4; ++i) {
-      logA.m[3][i] = 0.f;
-      logB.m[3][i] = 0.f;
-   }
-   logA = reg_mat44_add(&logA, &logB);
-   out = reg_mat44_mul(&logA, 0.5);
-   return reg_mat44_expm(&out);
-
-}
diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h
deleted file mode 100644
index 20867b69..00000000
--- a/reg-lib/cpu/_reg_maths_eigen.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-#include "RNifti.h"
-
-/* *************************************************************** */
-/* Functions calling the Eigen library                             */
-/* See http://eigen.tuxfamily.org/index.php?title=Main_Page        */
-/* *************************************************************** */
-
-/* *************************************************************** */
-template <class T>
-void svd(T **in, size_t m, size_t n, T * w, T **v);
-/* *************************************************************** */
-template<class T>
-T reg_matrix2DDet(T** mat, size_t m, size_t n);
-/* *************************************************************** */
-/** @brief Compute the log of a 3-by-3 matrix
-*/
-void reg_mat33_expm(mat33 *in_tensor);
-/* *************************************************************** */
-/** @brief Compute the exp of a 4-by-4 matrix
-*/
-mat44 reg_mat44_expm(const mat44 *mat);
-/* *************************************************************** */
-/** @brief Compute the log of a 3-by-3 matrix
-*/
-void reg_mat33_logm(mat33 *in_tensor);
-/* *************************************************************** */
-/** @brief Compute the log of a 4-by-4 matrix
-*/
-mat44 reg_mat44_logm(const mat44 *mat);
-/* *************************************************************** */
-/** @brief Compute the average of two matrices using a log-euclidean
-* framework
-*/
-mat44 reg_mat44_avg2(mat44 const* A, mat44 const* b);
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 0d9d1785..6fe684c5 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -11,8 +11,7 @@
  */
 
 #include "_reg_resampling.h"
-#include "_reg_maths.h"
-#include "_reg_maths_eigen.h"
+#include "Maths.hpp"
 #include "_reg_tools.h"
 
 #define SINC_KERNEL_RADIUS 3
@@ -165,7 +164,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage,
             diffTensor[tid].m[2][2] = static_cast<float>(floatingIntensityZZ[floatingIndex]);
 
             // Compute the log of the diffusion tensor.
-            reg_mat33_logm(&diffTensor[tid]);
+            Mat33Logm(&diffTensor[tid]);
 
             // Write this out as a new image
             floatingIntensityXX[floatingIndex] = static_cast<DataType>(diffTensor[tid].m[0][0]);
@@ -257,10 +256,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage,
                     inputTensor[tid].m[2][2] = static_cast<float>(inputIntensityZZ[warpedIndex]);
                     // Exponentiate the warped tensor
                     if (warpedImage == nullptr) {
-                        reg_mat33_expm(&inputTensor[tid]);
+                        Mat33Expm(&inputTensor[tid]);
                         testSum = 0;
                     } else {
-                        reg_mat33_eye(&warpedTensor[tid]);
+                        Mat33Eye(&warpedTensor[tid]);
                         warpedTensor[tid].m[0][0] = static_cast<float>(warpedXX[warpedIndex]);
                         warpedTensor[tid].m[0][1] = static_cast<float>(warpedXY[warpedIndex]);
                         warpedTensor[tid].m[1][0] = warpedTensor[tid].m[0][1];
@@ -396,7 +395,7 @@ void ResampleImage3D(const nifti_image *floatingImage,
                 world[2] = static_cast<float>(deformationFieldPtrZ[index]);
 
                 // real -> voxel; floating space
-                reg_mat44_mul(floatingIJKMatrix, world, position);
+                Mat44Mul(*floatingIJKMatrix, world, position);
 
                 previous[0] = Floor(position[0]);
                 previous[1] = Floor(position[1]);
@@ -576,7 +575,7 @@ void ResampleImage2D(const nifti_image *floatingImage,
                 world[2] = 0;
 
                 // real -> voxel; floating space
-                reg_mat44_mul(floatingIJKMatrix, world, position);
+                Mat44Mul(*floatingIJKMatrix, world, position);
 
                 previous[0] = Floor(position[0]);
                 previous[1] = Floor(position[1]);
@@ -921,7 +920,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                                     psfWorld[2] /= resamplingWeightSum;
 
                                     // real -> voxel; floating space
-                                    reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
+                                    Mat44Mul(*floatingIJKMatrix, psfWorld, position);
 
                                     previous[0] = Floor(position[0]);
                                     previous[1] = Floor(position[1]);
@@ -1128,11 +1127,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                     // T=P+A*S*At
                     A = nifti_mat33_inverse(jacMat[index]);
 
-                    ASAt = A * S * reg_mat33_trans(A);
+                    ASAt = A * S * Mat33Trans(A);
 
                     TmS = T - ASAt;
 
-                    reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
+                    Mat33Diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
 
                     // If eigen values are less than 0, set them to 0.
                     // Also, invert the eigenvalues to estimate the inverse.
@@ -1148,7 +1147,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                         }
                     }
 
-                    TmS_EigVec_trans = reg_mat33_trans(TmS_EigVec);
+                    TmS_EigVec_trans = Mat33Trans(TmS_EigVec);
                     P = TmS_EigVec * TmS_EigVal * TmS_EigVec_trans;
                     invP = TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans;
                     currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2];
@@ -1157,16 +1156,16 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
 
                     A = nifti_mat33_inverse(jacMat[index]);
 
-                    ASAt = A * S * reg_mat33_trans(A);
+                    ASAt = A * S * Mat33Trans(A);
 
                     mat33 S_EigVec, S_EigVal;
 
                     //                % rotate S
                     //                [ZS, DS] = eig(S);
-                    reg_mat33_diagonalize(&ASAt, &S_EigVec, &S_EigVal);
+                    Mat33Diagonalize(&ASAt, &S_EigVec, &S_EigVal);
 
                     //                T1 = ZS'*T*ZS;
-                    mat33 T1 = reg_mat33_trans(S_EigVec) * T * S_EigVec;
+                    mat33 T1 = Mat33Trans(S_EigVec) * T * S_EigVec;
 
                     //                % Volume-preserving scale of S to make it isotropic
                     //                detS = prod(diag(DS));
@@ -1191,12 +1190,12 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                     }
 
                     //                T2 = LambdaN*T1*LambdaN';
-                    mat33 T2 = LambdaN * T1 * reg_mat33_trans(LambdaN);
+                    mat33 T2 = LambdaN * T1 * Mat33Trans(LambdaN);
 
                     //                % Rotate to make thing axis-aligned
                     //                [ZT2, DT2] = eig(T2);
                     mat33 T2_EigVec, T2_EigVal;
-                    reg_mat33_diagonalize(&T2, &T2_EigVec, &T2_EigVal);
+                    Mat33Diagonalize(&T2, &T2_EigVec, &T2_EigVal);
 
                     //                % Optimal solution in the transformed axis-aligned space
                     //                DP2 = diag(max(sqrt(detS),diag(DT2)));
@@ -1213,11 +1212,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
 
                     //                % Roll back the transforms
                     //                Q = ZS*invLambdaN*ZT2*DQ2*ZT2'*invLambdaN*ZS'
-                    mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * reg_mat33_trans(T2_EigVec) * invLambdaN * reg_mat33_trans(S_EigVec);
+                    mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * Mat33Trans(T2_EigVec) * invLambdaN * Mat33Trans(S_EigVec);
                     //                P=Q-S
                     TmS = Q - S;
                     invP = nifti_mat33_inverse(TmS);
-                    reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
+                    Mat33Diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal);
 
                     currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2];
                     currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant;
@@ -1322,7 +1321,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                                         psfWorld[2] /= resamplingWeightSum;
 
                                         // real -> voxel; floating space
-                                        reg_mat44_mul(floatingIJKMatrix, psfWorld, position);
+                                        Mat44Mul(*floatingIJKMatrix, psfWorld, position);
 
                                         previous[0] = Floor(position[0]);
                                         previous[1] = Floor(position[1]);
@@ -1531,7 +1530,7 @@ void reg_bilinearResampleGradient(const nifti_image *floatingImage,
     }
 
     // Reorientation matrix is assessed in order to remove the rigid component
-    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz)));
 
     // Some useful variables
     mat33 jacMat;
@@ -1701,7 +1700,7 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage,
     }
 
     // Reorientation matrix is assessed in order to remove the rigid component
-    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+    mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz)));
 
     // Some useful variables
     mat33 jacMat;
@@ -1980,7 +1979,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
             world[2] = (FieldType)deformationFieldPtrZ[index];
 
             /* real -> voxel; floating space */
-            reg_mat44_mul(floatingIJKMatrix, world, position);
+            Mat44Mul(*floatingIJKMatrix, world, position);
 
             previous[0] = Floor(position[0]);
             previous[1] = Floor(position[1]);
@@ -2257,7 +2256,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
             world[2] = (FieldType)deformationFieldPtrZ[index];
 
             /* real -> voxel; floating space */
-            reg_mat44_mul(floatingIJKMatrix, world, position);
+            Mat44Mul(*floatingIJKMatrix, world, position);
 
             previous[0] = Floor(position[0]);
             previous[1] = Floor(position[1]);
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index b000fbd4..2a4bddfb 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -349,7 +349,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
     mat44 *image_mm2vox = &refImage->qto_ijk;
     if (refImage->sform_code > 0)
         image_mm2vox = &refImage->sto_ijk;
-    mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm);
+    mat44 grid2img_vox = *image_mm2vox * *grid_vox2mm;
 
     // Compute the block size
     const int blockSize[3] = {
@@ -404,7 +404,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
                     controlPointGridImage->nx * cpy + cpx;
 
                 // Compute the corresponding image voxel position
-                reg_mat44_mul(&grid2img_vox, gridVox, imageVox);
+                Mat44Mul(grid2img_vox, gridVox, imageVox);
                 imageVox[0] = static_cast<float>(Round(imageVox[0]));
                 imageVox[1] = static_cast<float>(Round(imageVox[1]));
                 imageVox[2] = static_cast<float>(Round(imageVox[2]));
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 017d6029..21aa5869 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -253,22 +253,22 @@ void reg_tools_removeSCLInfo(nifti_image *image) {
 void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) {
     float indexVoxel1[3] = { 0, 0, 0 };
     float indexVoxel2[3], realVoxel1[3], realVoxel2[3];
-    reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1);
+    Mat44Mul(image->sto_xyz, indexVoxel1, realVoxel1);
 
     indexVoxel2[1] = indexVoxel2[2] = 0;
     indexVoxel2[0] = 1;
-    reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+    Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
     spacingValues[0] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     indexVoxel2[0] = indexVoxel2[2] = 0;
     indexVoxel2[1] = 1;
-    reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+    Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
     spacingValues[1] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     if (image->nz > 1) {
         indexVoxel2[0] = indexVoxel2[1] = 0;
         indexVoxel2[2] = 1;
-        reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2);
+        Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
         spacingValues[2] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
     }
 }
diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h
index 650e6c71..f0475e5d 100755
--- a/reg-lib/cpu/_reg_tools.h
+++ b/reg-lib/cpu/_reg_tools.h
@@ -20,7 +20,7 @@
 #include <cmath>
 #include <algorithm>
 #include <functional>
-#include "_reg_maths.h"
+#include "Maths.hpp"
 #include "Debug.hpp"
 
 using namespace NiftyReg;
diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp
index b5872e56..9bd528c3 100644
--- a/reg-lib/cuda/CudaCommon.hpp
+++ b/reg-lib/cuda/CudaCommon.hpp
@@ -118,5 +118,21 @@ UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr,
                                            const cudaChannelFormatKind channelFormat,
                                            const unsigned channelCount);
 /* *************************************************************** */
+template<bool is3d>
+__device__ __inline__ int3 IndexToDims(const int index, const int3 dims) {
+    int quot = 0, rem;
+    if constexpr (is3d)
+        Divide(index, dims.x * dims.y, quot, rem);
+    else rem = index;
+    const int z = quot;
+    Divide(rem, dims.x, quot, rem);
+    const int y = quot, x = rem;
+    return { x, y, z };
+}
+/* *************************************************************** */
+__device__ __inline__ int3 IndexToDims(const int index, const int3 dims) {
+    return dims.z > 1 ? IndexToDims<true>(index, dims) : IndexToDims<false>(index, dims);
+}
+/* *************************************************************** */
 } // namespace NiftyReg::Cuda
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaGlobalTransformation.cu b/reg-lib/cuda/CudaGlobalTransformation.cu
index a5c0b82f..076f91b2 100644
--- a/reg-lib/cuda/CudaGlobalTransformation.cu
+++ b/reg-lib/cuda/CudaGlobalTransformation.cu
@@ -11,7 +11,6 @@
  */
 
 #include "CudaGlobalTransformation.hpp"
-#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
 template<bool is3d, bool compose>
@@ -21,7 +20,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, is3d ? 3 : 2);
     const int3 imageDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
     const mat44 *targetMatrix = deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz;
-    const mat44 transMatrix = compose ? *affineMatrix : reg_mat44_mul(affineMatrix, targetMatrix);
+    const mat44 transMatrix = compose ? *affineMatrix : *affineMatrix * *targetMatrix;
     Cuda::UniqueTextureObjectPtr deformationFieldTexturePtr; cudaTextureObject_t deformationFieldTexture = 0;
     if constexpr (compose) {
         deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
@@ -36,7 +35,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix,
             float4 defVal = tex1Dfetch<float4>(deformationFieldTexture, index);
             voxel[0] = defVal.x; voxel[1] = defVal.y; voxel[2] = defVal.z;
         } else {
-            auto dims = reg_indexToDims_cuda<is3d>(index, imageDims);
+            auto dims = Cuda::IndexToDims<is3d>(index, imageDims);
             voxel[0] = static_cast<float>(dims.x);
             voxel[1] = static_cast<float>(dims.y);
             voxel[2] = static_cast<float>(dims.z);
@@ -44,7 +43,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix,
 
         // The transformation is applied
         float position[3];
-        reg_mat44_mul_cuda<is3d>(transMatrix, voxel, position);
+        Mat44Mul<float, is3d>(transMatrix, voxel, position);
 
         // The deformation field (real coordinates) is stored
         deformationFieldCuda[index] = make_float4(position[0], position[1], position[2], 0);
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 2c98a8ca..ce733da6 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -84,7 +84,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const int index,
                                                             cudaTextureObject_t controlPointTexture,
                                                             const int3 controlPointImageDim,
                                                             const Basis2nd<is3d> basis) {
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+    const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDim);
     if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 ||
                         y < 1 || y >= controlPointImageDim.y - 1 ||
                         (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {};
@@ -206,7 +206,7 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
     const float approxRatio = bendingEnergyWeight / (float)controlPointNumber;
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber,
                        [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) {
-        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, controlPointImageDim);
+        const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDim);
         typename SecondDerivative<is3d>::Type gradientValue{};
         if constexpr (is3d) {
             for (int c = z - 1, basInd = 0; c < z + 2; c++) {
@@ -270,7 +270,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
     auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
+    const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
 
     // The Jacobian matrix is computed for every control point
     if (controlPointImage->nz > 1) {
@@ -306,7 +306,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
     auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
+    const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz);
 
     // The Jacobian matrix is computed for every voxel
     if (controlPointImage->nz > 1) {
@@ -398,7 +398,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
     }
 
     // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
 
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -511,7 +511,7 @@ double CorrectFolding(const nifti_image *referenceImage,
     }
 
     // Need to disorient the Jacobian matrix using the header information - voxel to real conversion
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
 
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
     const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
@@ -555,7 +555,7 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) {
     const int3 imageDim{ image->nx, image->ny, image->nz };
 
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) {
-        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, imageDim);
+        const auto [x, y, z] = IndexToDims<is3d>(index, imageDim);
 
         const float4 initialPosition{
             float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3],
@@ -866,7 +866,7 @@ double ApproxLinearEnergy(const nifti_image *controlPointGrid,
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
 
     // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
 
     // Store the basis values since they are constant as the value is approximated at the control point positions only
     Basis1st<is3d> basis;
@@ -903,7 +903,7 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid,
     const float approxRatio = weight / static_cast<float>(voxelNumber);
 
     // Matrix to use to convert the gradient from mm to voxel
-    const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
+    const mat33 reorientation = Mat44ToMat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk);
     const mat33 invReorientation = nifti_mat33_inverse(reorientation);
 
     // Store the basis values since they are constant as the value is approximated at the control point positions only
@@ -932,7 +932,7 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid,
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [
         transGradCuda, dispMatricesTexture, cppDims, approxRatio, basis, invReorientation
     ]__device__(const int index) {
-        const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, cppDims);
+        const auto [x, y, z] = IndexToDims<is3d>(index, cppDims);
         auto gradVal = transGradCuda[index];
 
         if constexpr (is3d) {
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index b7639f76..ebb95539 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -10,11 +10,112 @@
  *
  */
 
-#include "_reg_common_cuda_kernels.cu"
+#include "CudaCommon.hpp"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
 /* *************************************************************** */
+__device__ __inline__ mat33 Mat33Inverse(const mat33 r) {
+    /*  INPUT MATRIX:  */
+    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+
+    double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
+                   r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
+
+    if (deti != 0.0) deti = 1.0 / deti;
+
+    mat33 q;
+    q.m[0][0] = float(deti * (r22 * r33 - r32 * r23));
+    q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13));
+    q.m[0][2] = float(deti * (r12 * r23 - r22 * r13));
+
+    q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23));
+    q.m[1][1] = float(deti * (r11 * r33 - r31 * r13));
+    q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13));
+
+    q.m[2][0] = float(deti * (r21 * r32 - r31 * r22));
+    q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12));
+    q.m[2][2] = float(deti * (r11 * r22 - r21 * r12));
+
+    return q;
+}
+/* *************************************************************** */
+__device__ __inline__ float Mat33Determ(const mat33 r) {
+    /*  INPUT MATRIX:  */
+    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
+    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
+    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
+
+    return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
+                 r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
+}
+/* *************************************************************** */
+__device__ __inline__ float Mat33RowNorm(const mat33 a) {
+    float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]);
+    const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]);
+    const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]);
+    if (r1 < r2) r1 = r2;
+    if (r1 < r3) r1 = r3;
+    return r1;
+}
+/* *************************************************************** */
+__device__ __inline__ float Mat33ColNorm(const mat33 a) {
+    float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]);
+    const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]);
+    const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]);
+    if (r1 < r2) r1 = r2;
+    if (r1 < r3) r1 = r3;
+    return r1;
+}
+/* *************************************************************** */
+__device__ __inline__ mat33 Mat33Polar(mat33 x) {
+    // Force matrix to be nonsingular
+    float gam = Mat33Determ(x);
+    while (gam == 0.0) {        // Perturb matrix
+        gam = 0.00001f * (0.001f + Mat33RowNorm(x));
+        x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam;
+        gam = Mat33Determ(x);
+    }
+
+    mat33 z;
+    float gmi, dif = 1.0f;
+    int k = 0;
+    while (1) {
+        const mat33 y = Mat33Inverse(x);
+        if (dif > 0.3) {     // Far from convergence
+            const float alp = sqrt(Mat33RowNorm(x) * Mat33ColNorm(x));
+            const float bet = sqrt(Mat33RowNorm(y) * Mat33ColNorm(y));
+            gam = sqrt(bet / alp);
+            gmi = 1.f / gam;
+        } else {
+            gam = gmi = 1.0f;  // Close to convergence
+        }
+        z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]);
+        z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]);
+        z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]);
+        z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]);
+        z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]);
+        z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]);
+        z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]);
+        z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]);
+        z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]);
+
+        dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) +
+               fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) +
+               fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) +
+               fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) +
+               fabs(z.m[2][2] - x.m[2][2]));
+
+        k = k + 1;
+        if (k > 100 || dif < 3.e-6) break;  // Convergence or exhaustion
+        x = z;
+    }
+
+    return z;
+}
+/* *************************************************************** */
 template<bool bspline>
 __device__ __inline__ void GetBasisSplineValues(const float basis, float *values) {
     const float ff = Square(basis);
@@ -206,7 +307,7 @@ __device__ void GetDeformationField3d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
-        const auto [x, y, z] = reg_indexToDims_cuda<true>(index, referenceImageDim);
+        const auto [x, y, z] = IndexToDims<true>(index, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -269,7 +370,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
         nodePre = { Floor(xVoxel), Floor(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
-        const auto [x, y, z] = reg_indexToDims_cuda<false>(index, referenceImageDim);
+        const auto [x, y, z] = IndexToDims<false>(index, referenceImageDim);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -312,7 +413,7 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        Divide(tid, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) {
@@ -374,9 +475,9 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        Divide(rem, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) {
@@ -459,7 +560,7 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
-        reg_div_cuda(tid, referenceImageDim.x, quot, rem);
+        Divide(tid, referenceImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
@@ -528,9 +629,9 @@ __global__ void GetJacobianValues3d(float *jacobianMatrices,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
         int quot, rem;
-        reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
+        Divide(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, referenceImageDim.x, quot, rem);
+        Divide(rem, referenceImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         // the "nearest previous" node is determined [0,0,0]
@@ -677,7 +778,7 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        Divide(tid, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float2 jacobianGradient{};
@@ -729,9 +830,9 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        Divide(rem, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float3 jacobianGradient{};
@@ -787,7 +888,7 @@ __global__ void ComputeJacGradient2d(float4 *gradient,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x, quot, rem);
+        Divide(tid, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float2 jacobianGradient{};
@@ -842,9 +943,9 @@ __global__ void ComputeJacGradient3d(float4 *gradient,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        Divide(rem, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float3 jacobianGradient{};
@@ -917,9 +1018,9 @@ __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        Divide(rem, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float3 foldingCorrection{};
@@ -988,9 +1089,9 @@ __global__ void CorrectFolding3d(float4 *controlPointGrid,
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
         int quot, rem;
-        reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
+        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
         const int z = quot;
-        reg_div_cuda(rem, controlPointImageDim.x, quot, rem);
+        Divide(rem, controlPointImageDim.x, quot, rem);
         const int y = quot, x = rem;
 
         float3 foldingCorrection{};
@@ -1144,7 +1245,7 @@ __device__ static mat33 CreateDisplacementMatrix(const int index,
                                                  const int3& cppDims,
                                                  const Basis1st<is3d>& basis,
                                                  const mat33& reorientation) {
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, cppDims);
+    const auto [x, y, z] = IndexToDims<is3d>(index, cppDims);
     if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 ||
         (is3d && (z < 1 || z >= cppDims.z - 1))) return {};
 
@@ -1189,10 +1290,10 @@ __device__ static mat33 CreateDisplacementMatrix(const int index,
         }
     }
     // Convert from mm to voxel
-    matrix = reg_mat33_mul_cuda(reorientation, matrix);
+    matrix = reorientation * matrix;
     // Removing the rotation component
-    const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix));
-    matrix = reg_mat33_mul_cuda(r, matrix);
+    const mat33 r = Mat33Inverse(Mat33Polar(matrix));
+    matrix = r * matrix;
     // Convert to displacement
     matrix.m[0][0]--; matrix.m[1][1]--;
     if constexpr (is3d) matrix.m[2][2]--;
diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu
index fb662d23..7a4e13a7 100644
--- a/reg-lib/cuda/CudaOptimiser.cu
+++ b/reg-lib/cuda/CudaOptimiser.cu
@@ -1,5 +1,4 @@
 #include "CudaOptimiser.hpp"
-#include "_reg_common_cuda_kernels.cu"
 #include <curand_kernel.h>
 
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index 5c21bee8..58c33998 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -11,7 +11,6 @@
  */
 
 #include "CudaResampling.hpp"
-#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
@@ -237,19 +236,19 @@ template<bool is3d>
 static float3 GetRealImageSpacing(const nifti_image *image) {
     float3 spacing{};
     float indexVoxel1[3]{}, indexVoxel2[3], realVoxel1[3], realVoxel2[3];
-    reg_mat44_mul(&image->sto_xyz, indexVoxel1, realVoxel1);
+    Mat44Mul(image->sto_xyz, indexVoxel1, realVoxel1);
 
     indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1;
-    reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+    Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
     spacing.x = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1;
-    reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+    Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
     spacing.y = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
 
     if constexpr (is3d) {
         indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1;
-        reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2);
+        Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2);
         spacing.z = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2]));
     }
 
@@ -290,7 +289,7 @@ void ResampleGradient(const nifti_image *floatingImage,
                                                              make_float3(warpedImage->dx, warpedImage->dy, warpedImage->dz);
 
     // Reorientation matrix is assessed in order to remove the rigid component
-    const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz)));
+    const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz)));
 
     thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [
         warpedImageCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue
@@ -344,7 +343,7 @@ void ResampleGradient(const nifti_image *floatingImage,
         // Compute the Jacobian matrix
         constexpr float basis[] = { 1.f, 0.f };
         constexpr float deriv[] = { -1.f, 1.f };
-        auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, defFieldDims);
+        auto [x, y, z] = IndexToDims<is3d>(index, defFieldDims);
         mat33 jacMat{};
         for (char c = 0; c < (is3d ? 2 : 1); c++) {
             if constexpr (is3d) {
@@ -407,7 +406,7 @@ void ResampleGradient(const nifti_image *floatingImage,
             }
         }
         // reorient and scale the Jacobian matrix
-        jacMat = reg_mat33_mul_cuda(reorient, jacMat);
+        jacMat = reorient * jacMat;
         jacMat.m[0][0] /= realSpacing.x;
         jacMat.m[0][1] /= realSpacing.y;
         jacMat.m[1][0] /= realSpacing.x;
diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu
index 91455a2c..6a3f39dd 100644
--- a/reg-lib/cuda/CudaTools.cu
+++ b/reg-lib/cuda/CudaTools.cu
@@ -41,23 +41,23 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage,
     if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
         mat44 temp = *(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
         temp = nifti_mat44_inverse(temp);
-        transformation = reg_mat44_mul(&temp, &transformation);
+        transformation = temp * transformation;
     }
     // Millimetre to voxel in the reference image
-    transformation = reg_mat44_mul(voxelImage->sform_code > 0 ? &voxelImage->sto_ijk : &voxelImage->qto_ijk, &transformation);
+    transformation = (voxelImage->sform_code > 0 ? voxelImage->sto_ijk : voxelImage->qto_ijk) * transformation;
 
     // The information has to be reoriented
     // Voxel to millimetre contains the orientation of the image that is used
     // to compute the spatial gradient (floating image)
     mat33 reorientation;
     if (voxelToMillimetre) {
-        reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        reorientation = Mat44ToMat33(voxelToMillimetre);
         if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) {
-            mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
+            mat33 temp = Mat44ToMat33(reinterpret_cast<mat44*>(nodeImage->ext_list[0].edata));
             temp = nifti_mat33_inverse(temp);
             reorientation = nifti_mat33_mul(temp, reorientation);
         }
-    } else reg_mat33_eye(&reorientation);
+    } else Mat33Eye(&reorientation);
     // The information has to be weighted
     float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz };
     for (int i = 0; i < (is3d ? 3 : 2); ++i) {
diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
index 361bbdac..f502ac4f 100644
--- a/reg-lib/cuda/CudaToolsKernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -8,7 +8,7 @@
  *  See the LICENSE.txt file in the nifty_reg root folder
  */
 
-#include "_reg_common_cuda_kernels.cu"
+#include "CudaCommon.hpp"
 
 /* *************************************************************** */
 namespace NiftyReg::Cuda {
@@ -23,10 +23,10 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
                                                 const mat33 reorientation,
                                                 const int index) {
     // Calculate the node coordinates
-    const auto [x, y, z] = reg_indexToDims_cuda<is3d>(index, nodeImageDims);
+    const auto [x, y, z] = IndexToDims<is3d>(index, nodeImageDims);
     // Transform into voxel coordinates
     float voxelCoord[3], nodeCoord[3] = { static_cast<float>(x), static_cast<float>(y), static_cast<float>(z) };
-    reg_mat44_mul_cuda<is3d>(transformation, nodeCoord, voxelCoord);
+    Mat44Mul<float, is3d>(transformation, nodeCoord, voxelCoord);
 
     // Linear interpolation
     float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
@@ -64,7 +64,7 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
     }
 
     float reorientedValue[3];
-    reg_mat33_mul_cuda<is3d>(reorientation, interpolatedValue, weight, reorientedValue);
+    Mat33Mul<is3d>(reorientation, interpolatedValue, weight, reorientedValue);
     nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 };
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu
deleted file mode 100644
index 4206931d..00000000
--- a/reg-lib/cuda/_reg_common_cuda_kernels.cu
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- *  Copyright (c) 2009-2018, University College London
- *  Copyright (c) 2018, NiftyReg Developers.
- *  All rights reserved.
- *  See the LICENSE.txt file in the nifty_reg root folder
- */
-
-#pragma once
-
-/* *************************************************************** */
-template<bool is3d>
-__device__ __inline__ void reg_mat33_mul_cuda(const mat33 mat, const float (&in)[3], const double weight, float (&out)[3]) {
-    out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]);
-    out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]);
-    if constexpr (is3d)
-        out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]);
-}
-/* *************************************************************** */
-template<bool is3d>
-__device__ __inline__ void reg_mat44_mul_cuda(const mat44 mat, const float (&in)[3], float (&out)[3]) {
-    out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]);
-    out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]);
-    if constexpr (is3d)
-        out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]);
-}
-/* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33 a, const mat33 b) {
-    mat33 c;
-    for (int i = 0; i < 3; i++)
-        for (int j = 0; j < 3; j++)
-            c.m[i][j] = a.m[i][0] * b.m[0][j] + a.m[i][1] * b.m[1][j] + a.m[i][2] * b.m[2][j];
-    return c;
-}
-/* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33 r) {
-    /*  INPUT MATRIX:  */
-    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
-    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
-    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
-
-    double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
-                   r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
-
-    if (deti != 0.0) deti = 1.0 / deti;
-
-    mat33 q;
-    q.m[0][0] = float(deti * (r22 * r33 - r32 * r23));
-    q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13));
-    q.m[0][2] = float(deti * (r12 * r23 - r22 * r13));
-
-    q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23));
-    q.m[1][1] = float(deti * (r11 * r33 - r31 * r13));
-    q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13));
-
-    q.m[2][0] = float(deti * (r21 * r32 - r31 * r22));
-    q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12));
-    q.m[2][2] = float(deti * (r11 * r22 - r21 * r12));
-
-    return q;
-}
-/* *************************************************************** */
-__device__ __inline__ float reg_mat33_determ_cuda(const mat33 r) {
-    /*  INPUT MATRIX:  */
-    const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2];  /* [ r11 r12 r13 ] */
-    const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2];  /* [ r21 r22 r23 ] */
-    const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2];  /* [ r31 r32 r33 ] */
-
-    return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 +
-                 r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13);
-}
-/* *************************************************************** */
-__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33 a) {
-    float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]);
-    const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]);
-    const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]);
-    if (r1 < r2) r1 = r2;
-    if (r1 < r3) r1 = r3;
-    return r1;
-}
-/* *************************************************************** */
-__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33 a) {
-    float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]);
-    const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]);
-    const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]);
-    if (r1 < r2) r1 = r2;
-    if (r1 < r3) r1 = r3;
-    return r1;
-}
-/* *************************************************************** */
-__device__ __inline__ mat33 reg_mat33_polar_cuda(mat33 x) {
-    // Force matrix to be nonsingular
-    float gam = reg_mat33_determ_cuda(x);
-    while (gam == 0.0) {        // Perturb matrix
-        gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x));
-        x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam;
-        gam = reg_mat33_determ_cuda(x);
-    }
-
-    mat33 z;
-    float gmi, dif = 1.0f;
-    int k = 0;
-    while (1) {
-        const mat33 y = reg_mat33_inverse_cuda(x);
-        if (dif > 0.3) {     // Far from convergence
-            const float alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x));
-            const float bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y));
-            gam = sqrt(bet / alp);
-            gmi = 1.f / gam;
-        } else {
-            gam = gmi = 1.0f;  // Close to convergence
-        }
-        z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]);
-        z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]);
-        z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]);
-        z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]);
-        z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]);
-        z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]);
-        z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]);
-        z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]);
-        z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]);
-
-        dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) +
-               fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) +
-               fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) +
-               fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) +
-               fabs(z.m[2][2] - x.m[2][2]));
-
-        k = k + 1;
-        if (k > 100 || dif < 3.e-6) break;  // Convergence or exhaustion
-        x = z;
-    }
-
-    return z;
-}
-/* *************************************************************** */
-__device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) {
-    // This will be optimised by the compiler into a single div instruction
-    quot = num / denom;
-    rem = num % denom;
-}
-/* *************************************************************** */
-template<bool is3d>
-__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) {
-    int quot = 0, rem;
-    if constexpr (is3d)
-        reg_div_cuda(index, dims.x * dims.y, quot, rem);
-    else rem = index;
-    const int z = quot;
-    reg_div_cuda(rem, dims.x, quot, rem);
-    const int y = quot, x = rem;
-    return { x, y, z };
-}
-/* *************************************************************** */
-__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) {
-    return dims.z > 1 ? reg_indexToDims_cuda<true>(index, dims) : reg_indexToDims_cuda<false>(index, dims);
-}
-/* *************************************************************** */
diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu
index b117e568..d97b569f 100755
--- a/reg-lib/cuda/_reg_nmi_gpu.cu
+++ b/reg-lib/cuda/_reg_nmi_gpu.cu
@@ -11,7 +11,6 @@
  */
 
 #include "_reg_nmi_gpu.h"
-#include "_reg_common_cuda_kernels.cu"
 
 /* *************************************************************** */
 reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() {
diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu
index a9ec43a1..e9bfa38d 100644
--- a/reg-lib/cuda/affineDeformationKernel.cu
+++ b/reg-lib/cuda/affineDeformationKernel.cu
@@ -61,7 +61,7 @@ void launchAffine(mat44 *affineTransformation,
 
    float* trans = (float *)malloc(16 * sizeof(float));
    const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz);
-   mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix);
+   mat44 transformationMatrix = compose ? *affineTransformation : *affineTransformation * *targetMatrix;
    mat44ToCptr(transformationMatrix, trans);
    NR_CUDA_SAFE_CALL(cudaMemcpy(trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice));
    free(trans);
diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu
index f70f277f..55781c4f 100644
--- a/reg-lib/cuda/blockMatchingKernel.cu
+++ b/reg-lib/cuda/blockMatchingKernel.cu
@@ -15,7 +15,7 @@
 #include "_reg_tools.h"
 
 #include <vector>
-#include "_reg_maths.h"
+#include "Maths.hpp"
 
 ////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index cfbe514f..50bcb91c 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -3,7 +3,7 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include"_reg_resampling.h"
-#include"_reg_maths.h"
+#include"Maths.hpp"
 #include "resampleKernel.h"
 #include "CudaCommon.hpp"
 #include"_reg_tools.h"
@@ -59,18 +59,6 @@ __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *ba
     basis[3] = (FieldTYPE) ((ratio - (double)1.0) * FF / (double)2.0);
 }
 /* *************************************************************** */
-__device__ __inline__
-void reg_mat44_eye(float *mat) {
-	mat[0 * 4 + 0] = 1.f;
-	mat[0 * 4 + 1] = mat[0 * 4 + 2] = mat[0 * 4 + 3] = 0.f;
-	mat[1 * 4 + 1] = 1.f;
-	mat[1 * 4 + 0] = mat[1 * 4 + 2] = mat[1 * 4 + 3] = 0.f;
-	mat[2 * 4 + 2] = 1.f;
-	mat[2 * 4 + 0] = mat[2 * 4 + 1] = mat[2 * 4 + 3] = 0.f;
-	mat[3 * 4 + 3] = 1.f;
-	mat[3 * 4 + 0] = mat[3 * 4 + 1] = mat[3 * 4 + 2] = 0.f;
-}
-/* *************************************************************** */
 __inline__ __device__ void interpWindowedSincKernel(double relative, double *basis)
 {
 	if (relative < 0)
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index 858b541b..dc551cf8 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -39,7 +39,7 @@ class AffineDeformationFieldTest {
 
         // Identity use case - 2D
         mat44 identity;
-        reg_mat44_eye(&identity);
+        Mat44Eye(&identity);
         // Test order [0,0] [1,0] [0,1] [1,1]
         vector<float3> identityResult2d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 } };
         testData.emplace_back(TestData(
@@ -63,7 +63,7 @@ class AffineDeformationFieldTest {
 
         // Translation - 2D
         mat44 translation;
-        reg_mat44_eye(&translation);
+        Mat44Eye(&translation);
         translation.m[0][3] = -0.5;
         translation.m[1][3] = 1.5;
         translation.m[2][3] = 0.75;
@@ -109,7 +109,7 @@ class AffineDeformationFieldTest {
         // Full affine - 2D
         // Test order [0,0] [1,0] [0,1] [1,1]
         mat44 affine;
-        reg_mat44_eye(&affine);
+        Mat44Eye(&affine);
         affine.m[0][3] = -0.5;
         affine.m[1][3] = 1.5;
         affine.m[2][3] = 0.75;
diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp
index cdc57493..7e8e8611 100644
--- a/reg-test/reg_test_be.cpp
+++ b/reg-test/reg_test_be.cpp
@@ -75,8 +75,8 @@ class BendingEnergyTest {
 
         // Set some scaling transformation in the transformations
         mat44 affine2d, affine3d;
-        reg_mat44_eye(&affine2d);
-        reg_mat44_eye(&affine3d);
+        Mat44Eye(&affine2d);
+        Mat44Eye(&affine3d);
         affine3d.m[0][0] = affine2d.m[0][0] = 0.8f;
         affine3d.m[1][1] = affine2d.m[1][1] = 1.2f;
         affine3d.m[2][2] = 1.1f;
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index aa66259a..2243ec2d 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -46,7 +46,7 @@ class BMTest {
 
         // Create a translation matrix to apply OFFSET voxels along each axis
         mat44 translationMatrix;
-        reg_mat44_eye(&translationMatrix);
+        Mat44Eye(&translationMatrix);
         translationMatrix.m[0][3] = -OFFSET;
         translationMatrix.m[1][3] = -OFFSET;
         translationMatrix.m[2][3] = -OFFSET;
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 0cc60f7f..681a8ffc 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -79,8 +79,8 @@ class LtsTest {
             auto&& [testName, reference, floating, ttype, inlier] = data;
 
             // Create identity transformations
-            unique_ptr<mat44> matCpu{ new mat44 }; reg_mat44_eye(matCpu.get());
-            unique_ptr<mat44> matCuda{ new mat44 }; reg_mat44_eye(matCuda.get());
+            unique_ptr<mat44> matCpu{ new mat44 }; Mat44Eye(matCpu.get());
+            unique_ptr<mat44> matCuda{ new mat44 }; Mat44Eye(matCuda.get());
 
             // Create images
             NiftiImage referenceCpu(reference), referenceCuda(reference);
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 551fe96d..aa42def3 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -154,19 +154,19 @@ class VoxelCentricToNodeCentricTest {
         if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) {
             mat44 temp = *(reinterpret_cast<mat44*>(nodeGrad->ext_list[0].edata));
             temp = nifti_mat44_inverse(temp);
-            transformation = reg_mat44_mul(&temp, &transformation);
+            transformation = temp * transformation;
         }
         // Millimetre to voxel in the reference image
         if (voxelGrad->sform_code > 0)
-            transformation = reg_mat44_mul(&voxelGrad->sto_ijk, &transformation);
-        else transformation = reg_mat44_mul(&voxelGrad->qto_ijk, &transformation);
+            transformation = voxelGrad->sto_ijk * transformation;
+        else transformation = voxelGrad->qto_ijk * transformation;
 
         // The information has to be reoriented
         // Voxel to millimetre contains the orientation of the image that is used
         // to compute the spatial gradient (floating image)
-        mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre);
+        mat33 reorientation = Mat44ToMat33(voxelToMillimetre);
         if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) {
-            mat33 temp = reg_mat44_to_mat33(reinterpret_cast<mat44*>(nodeGrad->ext_list[0].edata));
+            mat33 temp = Mat44ToMat33(reinterpret_cast<mat44*>(nodeGrad->ext_list[0].edata));
             temp = nifti_mat33_inverse(temp);
             reorientation = nifti_mat33_mul(temp, reorientation);
         }
@@ -189,7 +189,7 @@ class VoxelCentricToNodeCentricTest {
                 nodeCoord[1] = static_cast<float>(y);
                 for (int x = 0; x < nodeGrad->nx; x++) {
                     nodeCoord[0] = static_cast<float>(x);
-                    reg_mat44_mul(&transformation, nodeCoord, voxelCoord);
+                    Mat44Mul(transformation, nodeCoord, voxelCoord);
                     // Linear interpolation
                     DataType basisX[2], basisY[2], basisZ[2];
                     const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };

From e9f51fee07b26d71260fa3917658cb081dea9f0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 6 Mar 2024 10:56:20 +0000
Subject: [PATCH 301/314] Embed Eigen library

---
 .github/workflows/analysis.yml                |     2 +-
 CMakeLists.txt                                |     6 +-
 niftyreg_build_version.txt                    |     2 +-
 reg-lib/cpu/Maths.cpp                         |     2 +-
 third-party/CMakeLists.txt                    |    21 -
 third-party/Eigen/Cholesky                    |    46 +
 third-party/Eigen/Core                        |   542 +
 third-party/Eigen/Eigenvalues                 |    61 +
 third-party/Eigen/Geometry                    |    62 +
 third-party/Eigen/Householder                 |    30 +
 third-party/Eigen/Jacobi                      |    33 +
 third-party/Eigen/LU                          |    50 +
 third-party/Eigen/MatrixFunctions             |   500 +
 third-party/Eigen/QR                          |    51 +
 third-party/Eigen/SVD                         |    51 +
 third-party/Eigen/src/Cholesky/LDLT.h         |   673 +
 third-party/Eigen/src/Cholesky/LLT.h          |   542 +
 third-party/Eigen/src/Cholesky/LLT_LAPACKE.h  |    99 +
 third-party/Eigen/src/Core/Array.h            |   329 +
 third-party/Eigen/src/Core/ArrayBase.h        |   226 +
 third-party/Eigen/src/Core/ArrayWrapper.h     |   209 +
 third-party/Eigen/src/Core/Assign.h           |    90 +
 third-party/Eigen/src/Core/AssignEvaluator.h  |   935 +
 third-party/Eigen/src/Core/Assign_MKL.h       |   178 +
 third-party/Eigen/src/Core/BandMatrix.h       |   353 +
 third-party/Eigen/src/Core/Block.h            |   452 +
 third-party/Eigen/src/Core/BooleanRedux.h     |   164 +
 third-party/Eigen/src/Core/CommaInitializer.h |   160 +
 .../Eigen/src/Core/ConditionEstimator.h       |   175 +
 third-party/Eigen/src/Core/CoreEvaluators.h   |  1688 ++
 third-party/Eigen/src/Core/CoreIterators.h    |   127 +
 third-party/Eigen/src/Core/CwiseBinaryOp.h    |   184 +
 third-party/Eigen/src/Core/CwiseNullaryOp.h   |   866 +
 third-party/Eigen/src/Core/CwiseTernaryOp.h   |   197 +
 third-party/Eigen/src/Core/CwiseUnaryOp.h     |   103 +
 third-party/Eigen/src/Core/CwiseUnaryView.h   |   130 +
 third-party/Eigen/src/Core/DenseBase.h        |   612 +
 third-party/Eigen/src/Core/DenseCoeffsBase.h  |   681 +
 third-party/Eigen/src/Core/DenseStorage.h     |   570 +
 third-party/Eigen/src/Core/Diagonal.h         |   260 +
 third-party/Eigen/src/Core/DiagonalMatrix.h   |   343 +
 third-party/Eigen/src/Core/DiagonalProduct.h  |    28 +
 third-party/Eigen/src/Core/Dot.h              |   318 +
 third-party/Eigen/src/Core/EigenBase.h        |   159 +
 .../Eigen/src/Core/ForceAlignedAccess.h       |   146 +
 third-party/Eigen/src/Core/Fuzzy.h            |   155 +
 third-party/Eigen/src/Core/GeneralProduct.h   |   455 +
 .../Eigen/src/Core/GenericPacketMath.h        |   590 +
 third-party/Eigen/src/Core/GlobalFunctions.h  |   187 +
 third-party/Eigen/src/Core/IO.h               |   225 +
 third-party/Eigen/src/Core/Inverse.h          |   118 +
 third-party/Eigen/src/Core/Map.h              |   171 +
 third-party/Eigen/src/Core/MapBase.h          |   308 +
 third-party/Eigen/src/Core/MathFunctions.h    |  1490 ++
 .../Eigen/src/Core/MathFunctionsImpl.h        |   101 +
 third-party/Eigen/src/Core/Matrix.h           |   459 +
 third-party/Eigen/src/Core/MatrixBase.h       |   530 +
 third-party/Eigen/src/Core/NestByValue.h      |   110 +
 third-party/Eigen/src/Core/NoAlias.h          |   108 +
 third-party/Eigen/src/Core/NumTraits.h        |   248 +
 .../Eigen/src/Core/PermutationMatrix.h        |   605 +
 third-party/Eigen/src/Core/PlainObjectBase.h  |  1037 +
 third-party/Eigen/src/Core/Product.h          |   186 +
 .../Eigen/src/Core/ProductEvaluators.h        |  1138 ++
 third-party/Eigen/src/Core/Random.h           |   182 +
 third-party/Eigen/src/Core/Redux.h            |   505 +
 third-party/Eigen/src/Core/Ref.h              |   284 +
 third-party/Eigen/src/Core/Replicate.h        |   142 +
 third-party/Eigen/src/Core/ReturnByValue.h    |   117 +
 third-party/Eigen/src/Core/Reverse.h          |   211 +
 third-party/Eigen/src/Core/Select.h           |   162 +
 third-party/Eigen/src/Core/SelfAdjointView.h  |   352 +
 .../Eigen/src/Core/SelfCwiseBinaryOp.h        |    47 +
 third-party/Eigen/src/Core/Solve.h            |   188 +
 third-party/Eigen/src/Core/SolveTriangular.h  |   235 +
 third-party/Eigen/src/Core/SolverBase.h       |   130 +
 third-party/Eigen/src/Core/StableNorm.h       |   221 +
 third-party/Eigen/src/Core/Stride.h           |   111 +
 third-party/Eigen/src/Core/Swap.h             |    67 +
 third-party/Eigen/src/Core/Transpose.h        |   405 +
 third-party/Eigen/src/Core/Transpositions.h   |   368 +
 third-party/Eigen/src/Core/TriangularMatrix.h |   985 +
 third-party/Eigen/src/Core/VectorBlock.h      |    96 +
 third-party/Eigen/src/Core/VectorwiseOp.h     |   695 +
 third-party/Eigen/src/Core/Visitor.h          |   273 +
 third-party/Eigen/src/Core/arch/AVX/Complex.h |   451 +
 .../Eigen/src/Core/arch/AVX/MathFunctions.h   |   439 +
 .../Eigen/src/Core/arch/AVX/PacketMath.h      |   637 +
 .../Eigen/src/Core/arch/AVX/TypeCasting.h     |    51 +
 .../src/Core/arch/AVX512/MathFunctions.h      |   389 +
 .../Eigen/src/Core/arch/AVX512/PacketMath.h   |  1305 ++
 .../Eigen/src/Core/arch/AltiVec/Complex.h     |   430 +
 .../src/Core/arch/AltiVec/MathFunctions.h     |   322 +
 .../Eigen/src/Core/arch/AltiVec/PacketMath.h  |  1061 +
 .../Eigen/src/Core/arch/CUDA/Complex.h        |   103 +
 third-party/Eigen/src/Core/arch/CUDA/Half.h   |   675 +
 .../Eigen/src/Core/arch/CUDA/MathFunctions.h  |    91 +
 .../Eigen/src/Core/arch/CUDA/PacketMath.h     |   333 +
 .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h |  1124 ++
 .../Eigen/src/Core/arch/CUDA/TypeCasting.h    |   212 +
 .../Eigen/src/Core/arch/Default/ConjHelper.h  |    29 +
 .../Eigen/src/Core/arch/Default/Settings.h    |    49 +
 .../Eigen/src/Core/arch/NEON/Complex.h        |   490 +
 .../Eigen/src/Core/arch/NEON/MathFunctions.h  |    91 +
 .../Eigen/src/Core/arch/NEON/PacketMath.h     |   760 +
 third-party/Eigen/src/Core/arch/SSE/Complex.h |   471 +
 .../Eigen/src/Core/arch/SSE/MathFunctions.h   |   562 +
 .../Eigen/src/Core/arch/SSE/PacketMath.h      |   895 +
 .../Eigen/src/Core/arch/SSE/TypeCasting.h     |    77 +
 .../Eigen/src/Core/arch/ZVector/Complex.h     |   397 +
 .../src/Core/arch/ZVector/MathFunctions.h     |   137 +
 .../Eigen/src/Core/arch/ZVector/PacketMath.h  |   943 +
 .../src/Core/functors/AssignmentFunctors.h    |   168 +
 .../Eigen/src/Core/functors/BinaryFunctors.h  |   475 +
 .../Eigen/src/Core/functors/NullaryFunctors.h |   188 +
 .../Eigen/src/Core/functors/StlFunctors.h     |   136 +
 .../Eigen/src/Core/functors/TernaryFunctors.h |    25 +
 .../Eigen/src/Core/functors/UnaryFunctors.h   |   792 +
 .../Core/products/GeneralBlockPanelKernel.h   |  2157 ++
 .../src/Core/products/GeneralMatrixMatrix.h   |   495 +
 .../products/GeneralMatrixMatrixTriangular.h  |   317 +
 .../GeneralMatrixMatrixTriangular_BLAS.h      |   145 +
 .../Core/products/GeneralMatrixMatrix_BLAS.h  |   124 +
 .../src/Core/products/GeneralMatrixVector.h   |   619 +
 .../Core/products/GeneralMatrixVector_BLAS.h  |   136 +
 .../Eigen/src/Core/products/Parallelizer.h    |   166 +
 .../Core/products/SelfadjointMatrixMatrix.h   |   527 +
 .../products/SelfadjointMatrixMatrix_BLAS.h   |   295 +
 .../Core/products/SelfadjointMatrixVector.h   |   260 +
 .../products/SelfadjointMatrixVector_BLAS.h   |   118 +
 .../src/Core/products/SelfadjointProduct.h    |   133 +
 .../Core/products/SelfadjointRank2Update.h    |    93 +
 .../Core/products/TriangularMatrixMatrix.h    |   472 +
 .../products/TriangularMatrixMatrix_BLAS.h    |   317 +
 .../Core/products/TriangularMatrixVector.h    |   350 +
 .../products/TriangularMatrixVector_BLAS.h    |   255 +
 .../Core/products/TriangularSolverMatrix.h    |   335 +
 .../products/TriangularSolverMatrix_BLAS.h    |   167 +
 .../Core/products/TriangularSolverVector.h    |   145 +
 third-party/Eigen/src/Core/util/BlasUtil.h    |   499 +
 third-party/Eigen/src/Core/util/Constants.h   |   547 +
 .../src/Core/util/DisableStupidWarnings.h     |    94 +
 .../Eigen/src/Core/util/ForwardDeclarations.h |   298 +
 third-party/Eigen/src/Core/util/MKL_support.h |   130 +
 third-party/Eigen/src/Core/util/Macros.h      |  1053 +
 third-party/Eigen/src/Core/util/Memory.h      |   993 +
 third-party/Eigen/src/Core/util/Meta.h        |   575 +
 third-party/Eigen/src/Core/util/NonMPL2.h     |     3 +
 .../src/Core/util/ReenableStupidWarnings.h    |    31 +
 .../Eigen/src/Core/util/StaticAssert.h        |   218 +
 third-party/Eigen/src/Core/util/XprHelper.h   |   838 +
 .../src/Eigenvalues/ComplexEigenSolver.h      |   346 +
 .../Eigen/src/Eigenvalues/ComplexSchur.h      |   462 +
 .../src/Eigenvalues/ComplexSchur_LAPACKE.h    |    91 +
 .../Eigen/src/Eigenvalues/EigenSolver.h       |   622 +
 .../src/Eigenvalues/GeneralizedEigenSolver.h  |   418 +
 .../GeneralizedSelfAdjointEigenSolver.h       |   226 +
 .../src/Eigenvalues/HessenbergDecomposition.h |   374 +
 .../src/Eigenvalues/MatrixBaseEigenvalues.h   |   158 +
 third-party/Eigen/src/Eigenvalues/RealQZ.h    |   654 +
 third-party/Eigen/src/Eigenvalues/RealSchur.h |   553 +
 .../Eigen/src/Eigenvalues/RealSchur_LAPACKE.h |    77 +
 .../src/Eigenvalues/SelfAdjointEigenSolver.h  |   871 +
 .../SelfAdjointEigenSolver_LAPACKE.h          |    87 +
 .../src/Eigenvalues/Tridiagonalization.h      |   556 +
 third-party/Eigen/src/Geometry/AlignedBox.h   |   392 +
 third-party/Eigen/src/Geometry/AngleAxis.h    |   247 +
 third-party/Eigen/src/Geometry/EulerAngles.h  |   114 +
 third-party/Eigen/src/Geometry/Homogeneous.h  |   497 +
 third-party/Eigen/src/Geometry/Hyperplane.h   |   282 +
 third-party/Eigen/src/Geometry/OrthoMethods.h |   234 +
 .../Eigen/src/Geometry/ParametrizedLine.h     |   195 +
 third-party/Eigen/src/Geometry/Quaternion.h   |   832 +
 third-party/Eigen/src/Geometry/Rotation2D.h   |   199 +
 third-party/Eigen/src/Geometry/RotationBase.h |   206 +
 third-party/Eigen/src/Geometry/Scaling.h      |   170 +
 third-party/Eigen/src/Geometry/Transform.h    |  1542 ++
 third-party/Eigen/src/Geometry/Translation.h  |   202 +
 third-party/Eigen/src/Geometry/Umeyama.h      |   166 +
 .../Eigen/src/Geometry/arch/Geometry_SSE.h    |   161 +
 .../Eigen/src/Householder/BlockHouseholder.h  |   103 +
 .../Eigen/src/Householder/Householder.h       |   172 +
 .../src/Householder/HouseholderSequence.h     |   470 +
 third-party/Eigen/src/Jacobi/Jacobi.h         |   462 +
 third-party/Eigen/src/LU/Determinant.h        |   101 +
 third-party/Eigen/src/LU/FullPivLU.h          |   891 +
 third-party/Eigen/src/LU/InverseImpl.h        |   415 +
 third-party/Eigen/src/LU/PartialPivLU.h       |   614 +
 .../Eigen/src/LU/PartialPivLU_LAPACKE.h       |    83 +
 third-party/Eigen/src/LU/arch/Inverse_SSE.h   |   338 +
 .../src/MatrixFunctions/MatrixExponential.h   |   442 +
 .../src/MatrixFunctions/MatrixFunction.h      |   580 +
 .../src/MatrixFunctions/MatrixLogarithm.h     |   373 +
 .../Eigen/src/MatrixFunctions/MatrixPower.h   |   709 +
 .../src/MatrixFunctions/MatrixSquareRoot.h    |   368 +
 .../Eigen/src/MatrixFunctions/StemFunction.h  |   117 +
 .../Eigen/src/QR/ColPivHouseholderQR.h        |   653 +
 .../src/QR/ColPivHouseholderQR_LAPACKE.h      |    97 +
 .../src/QR/CompleteOrthogonalDecomposition.h  |   562 +
 .../Eigen/src/QR/FullPivHouseholderQR.h       |   676 +
 third-party/Eigen/src/QR/HouseholderQR.h      |   409 +
 .../Eigen/src/QR/HouseholderQR_LAPACKE.h      |    68 +
 third-party/Eigen/src/SVD/BDCSVD.h            |  1277 ++
 third-party/Eigen/src/SVD/JacobiSVD.h         |   804 +
 third-party/Eigen/src/SVD/JacobiSVD_LAPACKE.h |    91 +
 third-party/Eigen/src/SVD/SVDBase.h           |   315 +
 .../Eigen/src/SVD/UpperBidiagonalization.h    |   414 +
 third-party/Eigen/src/misc/Image.h            |    82 +
 third-party/Eigen/src/misc/Kernel.h           |    79 +
 third-party/Eigen/src/misc/RealSvd2x2.h       |    55 +
 third-party/Eigen/src/misc/blas.h             |   440 +
 third-party/Eigen/src/misc/lapack.h           |   152 +
 third-party/Eigen/src/misc/lapacke.h          | 16291 ++++++++++++++++
 third-party/Eigen/src/misc/lapacke_mangling.h |    17 +
 .../Eigen/src/plugins/ArrayCwiseBinaryOps.h   |   332 +
 .../Eigen/src/plugins/ArrayCwiseUnaryOps.h    |   552 +
 third-party/Eigen/src/plugins/BlockMethods.h  |  1058 +
 .../Eigen/src/plugins/CommonCwiseBinaryOps.h  |   115 +
 .../Eigen/src/plugins/CommonCwiseUnaryOps.h   |   163 +
 .../Eigen/src/plugins/MatrixCwiseBinaryOps.h  |   152 +
 .../Eigen/src/plugins/MatrixCwiseUnaryOps.h   |    85 +
 221 files changed, 95749 insertions(+), 28 deletions(-)
 delete mode 100644 third-party/CMakeLists.txt
 create mode 100644 third-party/Eigen/Cholesky
 create mode 100644 third-party/Eigen/Core
 create mode 100644 third-party/Eigen/Eigenvalues
 create mode 100644 third-party/Eigen/Geometry
 create mode 100644 third-party/Eigen/Householder
 create mode 100644 third-party/Eigen/Jacobi
 create mode 100644 third-party/Eigen/LU
 create mode 100644 third-party/Eigen/MatrixFunctions
 create mode 100644 third-party/Eigen/QR
 create mode 100644 third-party/Eigen/SVD
 create mode 100644 third-party/Eigen/src/Cholesky/LDLT.h
 create mode 100644 third-party/Eigen/src/Cholesky/LLT.h
 create mode 100644 third-party/Eigen/src/Cholesky/LLT_LAPACKE.h
 create mode 100644 third-party/Eigen/src/Core/Array.h
 create mode 100644 third-party/Eigen/src/Core/ArrayBase.h
 create mode 100644 third-party/Eigen/src/Core/ArrayWrapper.h
 create mode 100644 third-party/Eigen/src/Core/Assign.h
 create mode 100644 third-party/Eigen/src/Core/AssignEvaluator.h
 create mode 100644 third-party/Eigen/src/Core/Assign_MKL.h
 create mode 100644 third-party/Eigen/src/Core/BandMatrix.h
 create mode 100644 third-party/Eigen/src/Core/Block.h
 create mode 100644 third-party/Eigen/src/Core/BooleanRedux.h
 create mode 100644 third-party/Eigen/src/Core/CommaInitializer.h
 create mode 100644 third-party/Eigen/src/Core/ConditionEstimator.h
 create mode 100644 third-party/Eigen/src/Core/CoreEvaluators.h
 create mode 100644 third-party/Eigen/src/Core/CoreIterators.h
 create mode 100644 third-party/Eigen/src/Core/CwiseBinaryOp.h
 create mode 100644 third-party/Eigen/src/Core/CwiseNullaryOp.h
 create mode 100644 third-party/Eigen/src/Core/CwiseTernaryOp.h
 create mode 100644 third-party/Eigen/src/Core/CwiseUnaryOp.h
 create mode 100644 third-party/Eigen/src/Core/CwiseUnaryView.h
 create mode 100644 third-party/Eigen/src/Core/DenseBase.h
 create mode 100644 third-party/Eigen/src/Core/DenseCoeffsBase.h
 create mode 100644 third-party/Eigen/src/Core/DenseStorage.h
 create mode 100644 third-party/Eigen/src/Core/Diagonal.h
 create mode 100644 third-party/Eigen/src/Core/DiagonalMatrix.h
 create mode 100644 third-party/Eigen/src/Core/DiagonalProduct.h
 create mode 100644 third-party/Eigen/src/Core/Dot.h
 create mode 100644 third-party/Eigen/src/Core/EigenBase.h
 create mode 100644 third-party/Eigen/src/Core/ForceAlignedAccess.h
 create mode 100644 third-party/Eigen/src/Core/Fuzzy.h
 create mode 100644 third-party/Eigen/src/Core/GeneralProduct.h
 create mode 100644 third-party/Eigen/src/Core/GenericPacketMath.h
 create mode 100644 third-party/Eigen/src/Core/GlobalFunctions.h
 create mode 100644 third-party/Eigen/src/Core/IO.h
 create mode 100644 third-party/Eigen/src/Core/Inverse.h
 create mode 100644 third-party/Eigen/src/Core/Map.h
 create mode 100644 third-party/Eigen/src/Core/MapBase.h
 create mode 100644 third-party/Eigen/src/Core/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/MathFunctionsImpl.h
 create mode 100644 third-party/Eigen/src/Core/Matrix.h
 create mode 100644 third-party/Eigen/src/Core/MatrixBase.h
 create mode 100644 third-party/Eigen/src/Core/NestByValue.h
 create mode 100644 third-party/Eigen/src/Core/NoAlias.h
 create mode 100644 third-party/Eigen/src/Core/NumTraits.h
 create mode 100644 third-party/Eigen/src/Core/PermutationMatrix.h
 create mode 100644 third-party/Eigen/src/Core/PlainObjectBase.h
 create mode 100644 third-party/Eigen/src/Core/Product.h
 create mode 100644 third-party/Eigen/src/Core/ProductEvaluators.h
 create mode 100644 third-party/Eigen/src/Core/Random.h
 create mode 100644 third-party/Eigen/src/Core/Redux.h
 create mode 100644 third-party/Eigen/src/Core/Ref.h
 create mode 100644 third-party/Eigen/src/Core/Replicate.h
 create mode 100644 third-party/Eigen/src/Core/ReturnByValue.h
 create mode 100644 third-party/Eigen/src/Core/Reverse.h
 create mode 100644 third-party/Eigen/src/Core/Select.h
 create mode 100644 third-party/Eigen/src/Core/SelfAdjointView.h
 create mode 100644 third-party/Eigen/src/Core/SelfCwiseBinaryOp.h
 create mode 100644 third-party/Eigen/src/Core/Solve.h
 create mode 100644 third-party/Eigen/src/Core/SolveTriangular.h
 create mode 100644 third-party/Eigen/src/Core/SolverBase.h
 create mode 100644 third-party/Eigen/src/Core/StableNorm.h
 create mode 100644 third-party/Eigen/src/Core/Stride.h
 create mode 100644 third-party/Eigen/src/Core/Swap.h
 create mode 100644 third-party/Eigen/src/Core/Transpose.h
 create mode 100644 third-party/Eigen/src/Core/Transpositions.h
 create mode 100644 third-party/Eigen/src/Core/TriangularMatrix.h
 create mode 100644 third-party/Eigen/src/Core/VectorBlock.h
 create mode 100644 third-party/Eigen/src/Core/VectorwiseOp.h
 create mode 100644 third-party/Eigen/src/Core/Visitor.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX/TypeCasting.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX512/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/AVX512/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/AltiVec/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/AltiVec/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/AltiVec/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/Half.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
 create mode 100644 third-party/Eigen/src/Core/arch/CUDA/TypeCasting.h
 create mode 100644 third-party/Eigen/src/Core/arch/Default/ConjHelper.h
 create mode 100644 third-party/Eigen/src/Core/arch/Default/Settings.h
 create mode 100644 third-party/Eigen/src/Core/arch/NEON/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/NEON/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/NEON/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/SSE/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/SSE/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/SSE/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/arch/SSE/TypeCasting.h
 create mode 100644 third-party/Eigen/src/Core/arch/ZVector/Complex.h
 create mode 100644 third-party/Eigen/src/Core/arch/ZVector/MathFunctions.h
 create mode 100644 third-party/Eigen/src/Core/arch/ZVector/PacketMath.h
 create mode 100644 third-party/Eigen/src/Core/functors/AssignmentFunctors.h
 create mode 100644 third-party/Eigen/src/Core/functors/BinaryFunctors.h
 create mode 100644 third-party/Eigen/src/Core/functors/NullaryFunctors.h
 create mode 100644 third-party/Eigen/src/Core/functors/StlFunctors.h
 create mode 100644 third-party/Eigen/src/Core/functors/TernaryFunctors.h
 create mode 100644 third-party/Eigen/src/Core/functors/UnaryFunctors.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralBlockPanelKernel.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixMatrix.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixVector.h
 create mode 100644 third-party/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/Parallelizer.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointMatrixVector.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointProduct.h
 create mode 100644 third-party/Eigen/src/Core/products/SelfadjointRank2Update.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularMatrixMatrix.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularMatrixVector.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularSolverMatrix.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
 create mode 100644 third-party/Eigen/src/Core/products/TriangularSolverVector.h
 create mode 100644 third-party/Eigen/src/Core/util/BlasUtil.h
 create mode 100644 third-party/Eigen/src/Core/util/Constants.h
 create mode 100644 third-party/Eigen/src/Core/util/DisableStupidWarnings.h
 create mode 100644 third-party/Eigen/src/Core/util/ForwardDeclarations.h
 create mode 100644 third-party/Eigen/src/Core/util/MKL_support.h
 create mode 100644 third-party/Eigen/src/Core/util/Macros.h
 create mode 100644 third-party/Eigen/src/Core/util/Memory.h
 create mode 100644 third-party/Eigen/src/Core/util/Meta.h
 create mode 100644 third-party/Eigen/src/Core/util/NonMPL2.h
 create mode 100644 third-party/Eigen/src/Core/util/ReenableStupidWarnings.h
 create mode 100644 third-party/Eigen/src/Core/util/StaticAssert.h
 create mode 100644 third-party/Eigen/src/Core/util/XprHelper.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/ComplexEigenSolver.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/ComplexSchur.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/EigenSolver.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/HessenbergDecomposition.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/RealQZ.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/RealSchur.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
 create mode 100644 third-party/Eigen/src/Eigenvalues/Tridiagonalization.h
 create mode 100644 third-party/Eigen/src/Geometry/AlignedBox.h
 create mode 100644 third-party/Eigen/src/Geometry/AngleAxis.h
 create mode 100644 third-party/Eigen/src/Geometry/EulerAngles.h
 create mode 100644 third-party/Eigen/src/Geometry/Homogeneous.h
 create mode 100644 third-party/Eigen/src/Geometry/Hyperplane.h
 create mode 100644 third-party/Eigen/src/Geometry/OrthoMethods.h
 create mode 100644 third-party/Eigen/src/Geometry/ParametrizedLine.h
 create mode 100644 third-party/Eigen/src/Geometry/Quaternion.h
 create mode 100644 third-party/Eigen/src/Geometry/Rotation2D.h
 create mode 100644 third-party/Eigen/src/Geometry/RotationBase.h
 create mode 100644 third-party/Eigen/src/Geometry/Scaling.h
 create mode 100644 third-party/Eigen/src/Geometry/Transform.h
 create mode 100644 third-party/Eigen/src/Geometry/Translation.h
 create mode 100644 third-party/Eigen/src/Geometry/Umeyama.h
 create mode 100644 third-party/Eigen/src/Geometry/arch/Geometry_SSE.h
 create mode 100644 third-party/Eigen/src/Householder/BlockHouseholder.h
 create mode 100644 third-party/Eigen/src/Householder/Householder.h
 create mode 100644 third-party/Eigen/src/Householder/HouseholderSequence.h
 create mode 100644 third-party/Eigen/src/Jacobi/Jacobi.h
 create mode 100644 third-party/Eigen/src/LU/Determinant.h
 create mode 100644 third-party/Eigen/src/LU/FullPivLU.h
 create mode 100644 third-party/Eigen/src/LU/InverseImpl.h
 create mode 100644 third-party/Eigen/src/LU/PartialPivLU.h
 create mode 100644 third-party/Eigen/src/LU/PartialPivLU_LAPACKE.h
 create mode 100644 third-party/Eigen/src/LU/arch/Inverse_SSE.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/MatrixExponential.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/MatrixFunction.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/MatrixLogarithm.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/MatrixPower.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
 create mode 100644 third-party/Eigen/src/MatrixFunctions/StemFunction.h
 create mode 100644 third-party/Eigen/src/QR/ColPivHouseholderQR.h
 create mode 100644 third-party/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
 create mode 100644 third-party/Eigen/src/QR/CompleteOrthogonalDecomposition.h
 create mode 100644 third-party/Eigen/src/QR/FullPivHouseholderQR.h
 create mode 100644 third-party/Eigen/src/QR/HouseholderQR.h
 create mode 100644 third-party/Eigen/src/QR/HouseholderQR_LAPACKE.h
 create mode 100644 third-party/Eigen/src/SVD/BDCSVD.h
 create mode 100644 third-party/Eigen/src/SVD/JacobiSVD.h
 create mode 100644 third-party/Eigen/src/SVD/JacobiSVD_LAPACKE.h
 create mode 100644 third-party/Eigen/src/SVD/SVDBase.h
 create mode 100644 third-party/Eigen/src/SVD/UpperBidiagonalization.h
 create mode 100644 third-party/Eigen/src/misc/Image.h
 create mode 100644 third-party/Eigen/src/misc/Kernel.h
 create mode 100644 third-party/Eigen/src/misc/RealSvd2x2.h
 create mode 100644 third-party/Eigen/src/misc/blas.h
 create mode 100644 third-party/Eigen/src/misc/lapack.h
 create mode 100644 third-party/Eigen/src/misc/lapacke.h
 create mode 100644 third-party/Eigen/src/misc/lapacke_mangling.h
 create mode 100644 third-party/Eigen/src/plugins/ArrayCwiseBinaryOps.h
 create mode 100644 third-party/Eigen/src/plugins/ArrayCwiseUnaryOps.h
 create mode 100644 third-party/Eigen/src/plugins/BlockMethods.h
 create mode 100644 third-party/Eigen/src/plugins/CommonCwiseBinaryOps.h
 create mode 100644 third-party/Eigen/src/plugins/CommonCwiseUnaryOps.h
 create mode 100644 third-party/Eigen/src/plugins/MatrixCwiseBinaryOps.h
 create mode 100644 third-party/Eigen/src/plugins/MatrixCwiseUnaryOps.h

diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
index a411c00d..3151d789 100644
--- a/.github/workflows/analysis.yml
+++ b/.github/workflows/analysis.yml
@@ -53,7 +53,7 @@ jobs:
             REPORT_PR_CHANGES_ONLY: false
         run: |
             analysis_file="analysis.txt"
-            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen/*"
+            cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/Eigen/*"
             cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file
             # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately
             find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 57d91902..72ee7d5a 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -129,12 +129,11 @@ endif(NOT BUILD_ALL_DEP)
 include_directories(${CMAKE_SOURCE_DIR}/reg-io/png)
 include_directories(${PNG_INCLUDE_DIR})
 #-----------------------------------------------------------------------------
+include_directories(${CMAKE_BINARY_DIR})
+include_directories(${CMAKE_SOURCE_DIR}/reg-io)
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib)
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io)
 include_directories(${CMAKE_SOURCE_DIR}/third-party)
-include_directories(${CMAKE_BINARY_DIR})
-include_directories(${CMAKE_BINARY_DIR}/third-party/eigen)
 #-----------------------------------------------------------------------------
 if(USE_OPENCL)
   # Find the OpenCL package
@@ -203,7 +202,6 @@ if(BUILD_TESTING)
   add_definitions(-DBUILD_TESTS)
 endif(BUILD_TESTING)
 #-----------------------------------------------------------------------------
-add_subdirectory(third-party)
 add_subdirectory(reg-io)
 add_subdirectory(reg-lib)
 add_subdirectory(reg-apps)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 7b53aa00..816d01be 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-419
+420
diff --git a/reg-lib/cpu/Maths.cpp b/reg-lib/cpu/Maths.cpp
index 6abb2f72..b1c4c89f 100644
--- a/reg-lib/cpu/Maths.cpp
+++ b/reg-lib/cpu/Maths.cpp
@@ -2,7 +2,7 @@
 
 #include "_reg_tools.h"
 #include "Eigen/Core"
-#include "unsupported/Eigen/MatrixFunctions"
+#include "Eigen/MatrixFunctions"
 
 /* *************************************************************** */
 namespace NiftyReg {
diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt
deleted file mode 100644
index a2cbbd99..00000000
--- a/third-party/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-#-----------------------------------------------------------------------------
-# Eigen version 3.3.*
-if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
-  message(STATUS "Cloning Eigen...")
-  execute_process(
-    COMMAND git clone -q -b 3.3 https://gitlab.com/libeigen/eigen.git
-    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/third-party
-    RESULT_VARIABLE result
-  )
-  if(result)
-    message(FATAL_ERROR "Failed to clone Eigen!")
-  endif(result)
-  message(STATUS "Eigen has been cloned into ${CMAKE_BINARY_DIR}/third-party/eigen")
-endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen)
-#-----------------------------------------------------------------------------
-if(OPENMP_FOUND)
-	if(USE_OPENMP)
-		add_definitions(-DEIGEN_HAS_OPENMP)
-	endif(USE_OPENMP)
-endif(OPENMP_FOUND)
-#-----------------------------------------------------------------------------
diff --git a/third-party/Eigen/Cholesky b/third-party/Eigen/Cholesky
new file mode 100644
index 00000000..1332b540
--- /dev/null
+++ b/third-party/Eigen/Cholesky
@@ -0,0 +1,46 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CHOLESKY_MODULE_H
+#define EIGEN_CHOLESKY_MODULE_H
+
+#include "Core"
+#include "Jacobi"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup Cholesky_Module Cholesky module
+  *
+  *
+  *
+  * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices.
+  * Those decompositions are also accessible via the following methods:
+  *  - MatrixBase::llt()
+  *  - MatrixBase::ldlt()
+  *  - SelfAdjointView::llt()
+  *  - SelfAdjointView::ldlt()
+  *
+  * \code
+  * #include <Eigen/Cholesky>
+  * \endcode
+  */
+
+#include "src/Cholesky/LLT.h"
+#include "src/Cholesky/LDLT.h"
+#ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "src/misc/lapacke.h"
+#endif
+#include "src/Cholesky/LLT_LAPACKE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_CHOLESKY_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/Core b/third-party/Eigen/Core
new file mode 100644
index 00000000..524c2f51
--- /dev/null
+++ b/third-party/Eigen/Core
@@ -0,0 +1,542 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CORE_H
+#define EIGEN_CORE_H
+
+// first thing Eigen does: stop the compiler from committing suicide
+#include "src/Core/util/DisableStupidWarnings.h"
+
+#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
+  #define EIGEN_CUDACC __CUDACC__
+#endif
+
+#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
+  #define EIGEN_CUDA_ARCH __CUDA_ARCH__
+#endif
+
+#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
+#define EIGEN_CUDACC_VER  ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
+#elif defined(__CUDACC_VER__)
+#define EIGEN_CUDACC_VER __CUDACC_VER__
+#else
+#define EIGEN_CUDACC_VER 0
+#endif
+
+// Handle NVCC/CUDA/SYCL
+#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__)
+  // Do not try asserts on CUDA and SYCL!
+  #ifndef EIGEN_NO_DEBUG
+  #define EIGEN_NO_DEBUG
+  #endif
+
+  #ifdef EIGEN_INTERNAL_DEBUGGING
+  #undef EIGEN_INTERNAL_DEBUGGING
+  #endif
+
+  #ifdef EIGEN_EXCEPTIONS
+  #undef EIGEN_EXCEPTIONS
+  #endif
+
+  // All functions callable from CUDA code must be qualified with __device__
+  #ifdef EIGEN_CUDACC
+    // Do not try to vectorize on CUDA and SYCL!
+    #ifndef EIGEN_DONT_VECTORIZE
+    #define EIGEN_DONT_VECTORIZE
+    #endif
+
+    #define EIGEN_DEVICE_FUNC __host__ __device__
+    // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
+    // works properly on the device side
+    #include <cuda_runtime.h>
+  #else
+    #define EIGEN_DEVICE_FUNC
+  #endif
+
+#else
+  #define EIGEN_DEVICE_FUNC
+
+#endif
+
+// When compiling CUDA device code with NVCC, pull in math functions from the
+// global namespace.  In host mode, and when device doee with clang, use the
+// std versions.
+#if defined(__CUDA_ARCH__) && defined(__NVCC__)
+  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
+#else
+  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
+#endif
+
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL)
+  #define EIGEN_EXCEPTIONS
+#endif
+
+#ifdef EIGEN_EXCEPTIONS
+  #include <new>
+#endif
+
+// then include this file where all our macros are defined. It's really important to do it first because
+// it's where we do all the alignment settings (platform detection and honoring the user's will if he
+// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
+#include "src/Core/util/Macros.h"
+
+// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
+// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
+#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6)
+  #pragma GCC optimize ("-fno-ipa-cp-clone")
+#endif
+
+#include <complex>
+
+// this include file manages BLAS and MKL related macros
+// and inclusion of their respective header files
+#include "src/Core/util/MKL_support.h"
+
+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
+#if EIGEN_MAX_ALIGN_BYTES==0
+  #ifndef EIGEN_DONT_VECTORIZE
+    #define EIGEN_DONT_VECTORIZE
+  #endif
+#endif
+
+#if EIGEN_COMP_MSVC
+  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
+  #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
+    // Remember that usage of defined() in a #define is undefined by the standard.
+    // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
+    #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+      #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
+    #endif
+  #endif
+#else
+  // Remember that usage of defined() in a #define is undefined by the standard
+  #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
+  #endif
+#endif
+
+#ifndef EIGEN_DONT_VECTORIZE
+
+  #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+
+    // Defines symbols for compile-time detection of which instructions are
+    // used.
+    // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
+    #define EIGEN_VECTORIZE
+    #define EIGEN_VECTORIZE_SSE
+    #define EIGEN_VECTORIZE_SSE2
+
+    // Detect sse3/ssse3/sse4:
+    // gcc and icc defines __SSE3__, ...
+    // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
+    // want to force the use of those instructions with msvc.
+    #ifdef __SSE3__
+      #define EIGEN_VECTORIZE_SSE3
+    #endif
+    #ifdef __SSSE3__
+      #define EIGEN_VECTORIZE_SSSE3
+    #endif
+    #ifdef __SSE4_1__
+      #define EIGEN_VECTORIZE_SSE4_1
+    #endif
+    #ifdef __SSE4_2__
+      #define EIGEN_VECTORIZE_SSE4_2
+    #endif
+    #ifdef __AVX__
+      #define EIGEN_VECTORIZE_AVX
+      #define EIGEN_VECTORIZE_SSE3
+      #define EIGEN_VECTORIZE_SSSE3
+      #define EIGEN_VECTORIZE_SSE4_1
+      #define EIGEN_VECTORIZE_SSE4_2
+    #endif
+    #ifdef __AVX2__
+      #define EIGEN_VECTORIZE_AVX2
+    #endif
+    #ifdef __FMA__
+      #define EIGEN_VECTORIZE_FMA
+    #endif
+    #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512)
+      #define EIGEN_VECTORIZE_AVX512
+      #define EIGEN_VECTORIZE_AVX2
+      #define EIGEN_VECTORIZE_AVX
+      #define EIGEN_VECTORIZE_FMA
+      #ifdef __AVX512DQ__
+        #define EIGEN_VECTORIZE_AVX512DQ
+      #endif
+      #ifdef __AVX512ER__
+        #define EIGEN_VECTORIZE_AVX512ER
+      #endif
+    #endif
+
+    // include files
+
+    // This extern "C" works around a MINGW-w64 compilation issue
+    // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
+    // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
+    // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
+    // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
+    // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
+    // notice that since these are C headers, the extern "C" is theoretically needed anyways.
+    extern "C" {
+      // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
+      // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
+      #if EIGEN_COMP_ICC >= 1110
+        #include <immintrin.h>
+      #else
+        #include <mmintrin.h>
+        #include <emmintrin.h>
+        #include <xmmintrin.h>
+        #ifdef  EIGEN_VECTORIZE_SSE3
+        #include <pmmintrin.h>
+        #endif
+        #ifdef EIGEN_VECTORIZE_SSSE3
+        #include <tmmintrin.h>
+        #endif
+        #ifdef EIGEN_VECTORIZE_SSE4_1
+        #include <smmintrin.h>
+        #endif
+        #ifdef EIGEN_VECTORIZE_SSE4_2
+        #include <nmmintrin.h>
+        #endif
+        #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
+        #include <immintrin.h>
+        #endif
+      #endif
+    } // end extern "C"
+  #elif defined __VSX__
+    #define EIGEN_VECTORIZE
+    #define EIGEN_VECTORIZE_VSX
+    #include <altivec.h>
+    // We need to #undef all these ugly tokens defined in <altivec.h>
+    // => use __vector instead of vector
+    #undef bool
+    #undef vector
+    #undef pixel
+  #elif defined __ALTIVEC__
+    #define EIGEN_VECTORIZE
+    #define EIGEN_VECTORIZE_ALTIVEC
+    #include <altivec.h>
+    // We need to #undef all these ugly tokens defined in <altivec.h>
+    // => use __vector instead of vector
+    #undef bool
+    #undef vector
+    #undef pixel
+  #elif (defined  __ARM_NEON) || (defined __ARM_NEON__)
+    #define EIGEN_VECTORIZE
+    #define EIGEN_VECTORIZE_NEON
+    #include <arm_neon.h>
+  #elif (defined __s390x__ && defined __VEC__)
+    #define EIGEN_VECTORIZE
+    #define EIGEN_VECTORIZE_ZVECTOR
+    #include <vecintrin.h>
+  #endif
+#endif
+
+#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG)
+  // We can use the optimized fp16 to float and float to fp16 conversion routines
+  #define EIGEN_HAS_FP16_C
+#endif
+
+#if defined EIGEN_CUDACC
+  #define EIGEN_VECTORIZE_CUDA
+  #include <vector_types.h>
+  #if EIGEN_CUDACC_VER >= 70500
+    #define EIGEN_HAS_CUDA_FP16
+  #endif
+#endif
+
+#if defined EIGEN_HAS_CUDA_FP16
+  #include <host_defines.h>
+  #include <cuda_fp16.h>
+#endif
+
+#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE)
+  #define EIGEN_HAS_OPENMP
+#endif
+
+#ifdef EIGEN_HAS_OPENMP
+#include <omp.h>
+#endif
+
+// MSVC for windows mobile does not have the errno.h file
+#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM
+#define EIGEN_HAS_ERRNO
+#endif
+
+#ifdef EIGEN_HAS_ERRNO
+#include <cerrno>
+#endif
+#include <cstddef>
+#include <cstdlib>
+#include <cmath>
+#include <cassert>
+#include <functional>
+#include <sstream>
+#ifndef EIGEN_NO_IO
+  #include <iosfwd>
+#endif
+#include <cstring>
+#include <string>
+#include <limits>
+#include <climits> // for CHAR_BIT
+// for min/max:
+#include <algorithm>
+
+// for std::is_nothrow_move_assignable
+#ifdef EIGEN_INCLUDE_TYPE_TRAITS
+#include <type_traits>
+#endif
+
+// for outputting debug info
+#ifdef EIGEN_DEBUG_ASSIGN
+#include <iostream>
+#endif
+
+// required for __cpuid, needs to be included after cmath
+#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE
+  #include <intrin.h>
+#endif
+
+/** \brief Namespace containing all symbols from the %Eigen library. */
+namespace Eigen {
+
+inline static const char *SimdInstructionSetsInUse(void) {
+#if defined(EIGEN_VECTORIZE_AVX512)
+  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_AVX)
+  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
+  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
+  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+  return "SSE, SSE2, SSE3, SSSE3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+  return "SSE, SSE2, SSE3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+  return "SSE, SSE2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+  return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+  return "VSX";
+#elif defined(EIGEN_VECTORIZE_NEON)
+  return "ARM NEON";
+#elif defined(EIGEN_VECTORIZE_ZVECTOR)
+  return "S390X ZVECTOR";
+#else
+  return "None";
+#endif
+}
+
+} // end namespace Eigen
+
+#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT
+// This will generate an error message:
+#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
+#endif
+
+namespace Eigen {
+
+// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
+// ensure QNX/QCC support
+using std::size_t;
+// gcc 4.6.0 wants std:: for ptrdiff_t
+using std::ptrdiff_t;
+
+}
+
+/** \defgroup Core_Module Core module
+  * This is the main module of Eigen providing dense matrix and vector support
+  * (both fixed and dynamic size) with all the features corresponding to a BLAS library
+  * and much more...
+  *
+  * \code
+  * #include <Eigen/Core>
+  * \endcode
+  */
+
+#include "src/Core/util/Constants.h"
+#include "src/Core/util/Meta.h"
+#include "src/Core/util/ForwardDeclarations.h"
+#include "src/Core/util/StaticAssert.h"
+#include "src/Core/util/XprHelper.h"
+#include "src/Core/util/Memory.h"
+
+#include "src/Core/NumTraits.h"
+#include "src/Core/MathFunctions.h"
+#include "src/Core/GenericPacketMath.h"
+#include "src/Core/MathFunctionsImpl.h"
+#include "src/Core/arch/Default/ConjHelper.h"
+
+#if defined EIGEN_VECTORIZE_AVX512
+  #include "src/Core/arch/SSE/PacketMath.h"
+  #include "src/Core/arch/SSE/MathFunctions.h"
+  #include "src/Core/arch/AVX/PacketMath.h"
+  #include "src/Core/arch/AVX/MathFunctions.h"
+  #include "src/Core/arch/AVX512/PacketMath.h"
+  #include "src/Core/arch/AVX512/MathFunctions.h"
+#elif defined EIGEN_VECTORIZE_AVX
+  // Use AVX for floats and doubles, SSE for integers
+  #include "src/Core/arch/SSE/PacketMath.h"
+  #include "src/Core/arch/SSE/Complex.h"
+  #include "src/Core/arch/SSE/MathFunctions.h"
+  #include "src/Core/arch/AVX/PacketMath.h"
+  #include "src/Core/arch/AVX/MathFunctions.h"
+  #include "src/Core/arch/AVX/Complex.h"
+  #include "src/Core/arch/AVX/TypeCasting.h"
+  #include "src/Core/arch/SSE/TypeCasting.h"
+#elif defined EIGEN_VECTORIZE_SSE
+  #include "src/Core/arch/SSE/PacketMath.h"
+  #include "src/Core/arch/SSE/MathFunctions.h"
+  #include "src/Core/arch/SSE/Complex.h"
+  #include "src/Core/arch/SSE/TypeCasting.h"
+#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
+  #include "src/Core/arch/AltiVec/PacketMath.h"
+  #include "src/Core/arch/AltiVec/MathFunctions.h"
+  #include "src/Core/arch/AltiVec/Complex.h"
+#elif defined EIGEN_VECTORIZE_NEON
+  #include "src/Core/arch/NEON/PacketMath.h"
+  #include "src/Core/arch/NEON/MathFunctions.h"
+  #include "src/Core/arch/NEON/Complex.h"
+#elif defined EIGEN_VECTORIZE_ZVECTOR
+  #include "src/Core/arch/ZVector/PacketMath.h"
+  #include "src/Core/arch/ZVector/MathFunctions.h"
+  #include "src/Core/arch/ZVector/Complex.h"
+#endif
+
+// Half float support
+#include "src/Core/arch/CUDA/Half.h"
+#include "src/Core/arch/CUDA/PacketMathHalf.h"
+#include "src/Core/arch/CUDA/TypeCasting.h"
+
+#if defined EIGEN_VECTORIZE_CUDA
+  #include "src/Core/arch/CUDA/PacketMath.h"
+  #include "src/Core/arch/CUDA/MathFunctions.h"
+#endif
+
+#include "src/Core/arch/Default/Settings.h"
+
+#include "src/Core/functors/TernaryFunctors.h"
+#include "src/Core/functors/BinaryFunctors.h"
+#include "src/Core/functors/UnaryFunctors.h"
+#include "src/Core/functors/NullaryFunctors.h"
+#include "src/Core/functors/StlFunctors.h"
+#include "src/Core/functors/AssignmentFunctors.h"
+
+// Specialized functors to enable the processing of complex numbers
+// on CUDA devices
+#include "src/Core/arch/CUDA/Complex.h"
+
+#include "src/Core/IO.h"
+#include "src/Core/DenseCoeffsBase.h"
+#include "src/Core/DenseBase.h"
+#include "src/Core/MatrixBase.h"
+#include "src/Core/EigenBase.h"
+
+#include "src/Core/Product.h"
+#include "src/Core/CoreEvaluators.h"
+#include "src/Core/AssignEvaluator.h"
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
+                                // at least confirmed with Doxygen 1.5.5 and 1.5.6
+  #include "src/Core/Assign.h"
+#endif
+
+#include "src/Core/ArrayBase.h"
+#include "src/Core/util/BlasUtil.h"
+#include "src/Core/DenseStorage.h"
+#include "src/Core/NestByValue.h"
+
+// #include "src/Core/ForceAlignedAccess.h"
+
+#include "src/Core/ReturnByValue.h"
+#include "src/Core/NoAlias.h"
+#include "src/Core/PlainObjectBase.h"
+#include "src/Core/Matrix.h"
+#include "src/Core/Array.h"
+#include "src/Core/CwiseTernaryOp.h"
+#include "src/Core/CwiseBinaryOp.h"
+#include "src/Core/CwiseUnaryOp.h"
+#include "src/Core/CwiseNullaryOp.h"
+#include "src/Core/CwiseUnaryView.h"
+#include "src/Core/SelfCwiseBinaryOp.h"
+#include "src/Core/Dot.h"
+#include "src/Core/StableNorm.h"
+#include "src/Core/Stride.h"
+#include "src/Core/MapBase.h"
+#include "src/Core/Map.h"
+#include "src/Core/Ref.h"
+#include "src/Core/Block.h"
+#include "src/Core/VectorBlock.h"
+#include "src/Core/Transpose.h"
+#include "src/Core/DiagonalMatrix.h"
+#include "src/Core/Diagonal.h"
+#include "src/Core/DiagonalProduct.h"
+#include "src/Core/Redux.h"
+#include "src/Core/Visitor.h"
+#include "src/Core/Fuzzy.h"
+#include "src/Core/Swap.h"
+#include "src/Core/CommaInitializer.h"
+#include "src/Core/GeneralProduct.h"
+#include "src/Core/Solve.h"
+#include "src/Core/Inverse.h"
+#include "src/Core/SolverBase.h"
+#include "src/Core/PermutationMatrix.h"
+#include "src/Core/Transpositions.h"
+#include "src/Core/TriangularMatrix.h"
+#include "src/Core/SelfAdjointView.h"
+#include "src/Core/products/GeneralBlockPanelKernel.h"
+#include "src/Core/products/Parallelizer.h"
+#include "src/Core/ProductEvaluators.h"
+#include "src/Core/products/GeneralMatrixVector.h"
+#include "src/Core/products/GeneralMatrixMatrix.h"
+#include "src/Core/SolveTriangular.h"
+#include "src/Core/products/GeneralMatrixMatrixTriangular.h"
+#include "src/Core/products/SelfadjointMatrixVector.h"
+#include "src/Core/products/SelfadjointMatrixMatrix.h"
+#include "src/Core/products/SelfadjointProduct.h"
+#include "src/Core/products/SelfadjointRank2Update.h"
+#include "src/Core/products/TriangularMatrixVector.h"
+#include "src/Core/products/TriangularMatrixMatrix.h"
+#include "src/Core/products/TriangularSolverMatrix.h"
+#include "src/Core/products/TriangularSolverVector.h"
+#include "src/Core/BandMatrix.h"
+#include "src/Core/CoreIterators.h"
+#include "src/Core/ConditionEstimator.h"
+
+#include "src/Core/BooleanRedux.h"
+#include "src/Core/Select.h"
+#include "src/Core/VectorwiseOp.h"
+#include "src/Core/Random.h"
+#include "src/Core/Replicate.h"
+#include "src/Core/Reverse.h"
+#include "src/Core/ArrayWrapper.h"
+
+#ifdef EIGEN_USE_BLAS
+#include "src/Core/products/GeneralMatrixMatrix_BLAS.h"
+#include "src/Core/products/GeneralMatrixVector_BLAS.h"
+#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h"
+#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h"
+#include "src/Core/products/SelfadjointMatrixVector_BLAS.h"
+#include "src/Core/products/TriangularMatrixMatrix_BLAS.h"
+#include "src/Core/products/TriangularMatrixVector_BLAS.h"
+#include "src/Core/products/TriangularSolverMatrix_BLAS.h"
+#endif // EIGEN_USE_BLAS
+
+#ifdef EIGEN_USE_MKL_VML
+#include "src/Core/Assign_MKL.h"
+#endif
+
+#include "src/Core/GlobalFunctions.h"
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_CORE_H
diff --git a/third-party/Eigen/Eigenvalues b/third-party/Eigen/Eigenvalues
new file mode 100644
index 00000000..7d6ac787
--- /dev/null
+++ b/third-party/Eigen/Eigenvalues
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EIGENVALUES_MODULE_H
+#define EIGEN_EIGENVALUES_MODULE_H
+
+#include "Core"
+
+#include "Cholesky"
+#include "Jacobi"
+#include "Householder"
+#include "LU"
+#include "Geometry"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup Eigenvalues_Module Eigenvalues module
+  *
+  *
+  *
+  * This module mainly provides various eigenvalue solvers.
+  * This module also provides some MatrixBase methods, including:
+  *  - MatrixBase::eigenvalues(),
+  *  - MatrixBase::operatorNorm()
+  *
+  * \code
+  * #include <Eigen/Eigenvalues>
+  * \endcode
+  */
+
+#include "src/misc/RealSvd2x2.h"
+#include "src/Eigenvalues/Tridiagonalization.h"
+#include "src/Eigenvalues/RealSchur.h"
+#include "src/Eigenvalues/EigenSolver.h"
+#include "src/Eigenvalues/SelfAdjointEigenSolver.h"
+#include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h"
+#include "src/Eigenvalues/HessenbergDecomposition.h"
+#include "src/Eigenvalues/ComplexSchur.h"
+#include "src/Eigenvalues/ComplexEigenSolver.h"
+#include "src/Eigenvalues/RealQZ.h"
+#include "src/Eigenvalues/GeneralizedEigenSolver.h"
+#include "src/Eigenvalues/MatrixBaseEigenvalues.h"
+#ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "src/misc/lapacke.h"
+#endif
+#include "src/Eigenvalues/RealSchur_LAPACKE.h"
+#include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
+#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_EIGENVALUES_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/Geometry b/third-party/Eigen/Geometry
new file mode 100644
index 00000000..da88c03b
--- /dev/null
+++ b/third-party/Eigen/Geometry
@@ -0,0 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GEOMETRY_MODULE_H
+#define EIGEN_GEOMETRY_MODULE_H
+
+#include "Core"
+
+#include "SVD"
+#include "LU"
+#include <limits>
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup Geometry_Module Geometry module
+  *
+  * This module provides support for:
+  *  - fixed-size homogeneous transformations
+  *  - translation, scaling, 2D and 3D rotations
+  *  - \link Quaternion quaternions \endlink
+  *  - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3)
+  *  - orthognal vector generation (\ref MatrixBase::unitOrthogonal)
+  *  - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink
+  *  - \link AlignedBox axis aligned bounding boxes \endlink
+  *  - \link umeyama least-square transformation fitting \endlink
+  *
+  * \code
+  * #include <Eigen/Geometry>
+  * \endcode
+  */
+
+#include "src/Geometry/OrthoMethods.h"
+#include "src/Geometry/EulerAngles.h"
+
+#include "src/Geometry/Homogeneous.h"
+#include "src/Geometry/RotationBase.h"
+#include "src/Geometry/Rotation2D.h"
+#include "src/Geometry/Quaternion.h"
+#include "src/Geometry/AngleAxis.h"
+#include "src/Geometry/Transform.h"
+#include "src/Geometry/Translation.h"
+#include "src/Geometry/Scaling.h"
+#include "src/Geometry/Hyperplane.h"
+#include "src/Geometry/ParametrizedLine.h"
+#include "src/Geometry/AlignedBox.h"
+#include "src/Geometry/Umeyama.h"
+
+// Use the SSE optimized version whenever possible. At the moment the
+// SSE version doesn't compile when AVX is enabled
+#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
+#include "src/Geometry/arch/Geometry_SSE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_GEOMETRY_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/third-party/Eigen/Householder b/third-party/Eigen/Householder
new file mode 100644
index 00000000..89cd81b1
--- /dev/null
+++ b/third-party/Eigen/Householder
@@ -0,0 +1,30 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HOUSEHOLDER_MODULE_H
+#define EIGEN_HOUSEHOLDER_MODULE_H
+
+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup Householder_Module Householder module
+  * This module provides Householder transformations.
+  *
+  * \code
+  * #include <Eigen/Householder>
+  * \endcode
+  */
+
+#include "src/Householder/Householder.h"
+#include "src/Householder/HouseholderSequence.h"
+#include "src/Householder/BlockHouseholder.h"
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_HOUSEHOLDER_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/Jacobi b/third-party/Eigen/Jacobi
new file mode 100644
index 00000000..17c1d785
--- /dev/null
+++ b/third-party/Eigen/Jacobi
@@ -0,0 +1,33 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_JACOBI_MODULE_H
+#define EIGEN_JACOBI_MODULE_H
+
+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup Jacobi_Module Jacobi module
+  * This module provides Jacobi and Givens rotations.
+  *
+  * \code
+  * #include <Eigen/Jacobi>
+  * \endcode
+  *
+  * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation:
+  *  - MatrixBase::applyOnTheLeft()
+  *  - MatrixBase::applyOnTheRight().
+  */
+
+#include "src/Jacobi/Jacobi.h"
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_JACOBI_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/third-party/Eigen/LU b/third-party/Eigen/LU
new file mode 100644
index 00000000..6418a86e
--- /dev/null
+++ b/third-party/Eigen/LU
@@ -0,0 +1,50 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LU_MODULE_H
+#define EIGEN_LU_MODULE_H
+
+#include "Core"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup LU_Module LU module
+  * This module includes %LU decomposition and related notions such as matrix inversion and determinant.
+  * This module defines the following MatrixBase methods:
+  *  - MatrixBase::inverse()
+  *  - MatrixBase::determinant()
+  *
+  * \code
+  * #include <Eigen/LU>
+  * \endcode
+  */
+
+#include "src/misc/Kernel.h"
+#include "src/misc/Image.h"
+#include "src/LU/FullPivLU.h"
+#include "src/LU/PartialPivLU.h"
+#ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "src/misc/lapacke.h"
+#endif
+#include "src/LU/PartialPivLU_LAPACKE.h"
+#endif
+#include "src/LU/Determinant.h"
+#include "src/LU/InverseImpl.h"
+
+// Use the SSE optimized version whenever possible. At the moment the
+// SSE version doesn't compile when AVX is enabled
+#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX
+  #include "src/LU/arch/Inverse_SSE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_LU_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/MatrixFunctions b/third-party/Eigen/MatrixFunctions
new file mode 100644
index 00000000..60dc0a69
--- /dev/null
+++ b/third-party/Eigen/MatrixFunctions
@@ -0,0 +1,500 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_FUNCTIONS
+#define EIGEN_MATRIX_FUNCTIONS
+
+#include <cfloat>
+#include <list>
+
+#include <Eigen/Core>
+#include <Eigen/LU>
+#include <Eigen/Eigenvalues>
+
+/**
+  * \defgroup MatrixFunctions_Module Matrix functions module
+  * \brief This module aims to provide various methods for the computation of
+  * matrix functions. 
+  *
+  * To use this module, add 
+  * \code
+  * #include <unsupported/Eigen/MatrixFunctions>
+  * \endcode
+  * at the start of your source file.
+  *
+  * This module defines the following MatrixBase methods.
+  *  - \ref matrixbase_cos "MatrixBase::cos()", for computing the matrix cosine
+  *  - \ref matrixbase_cosh "MatrixBase::cosh()", for computing the matrix hyperbolic cosine
+  *  - \ref matrixbase_exp "MatrixBase::exp()", for computing the matrix exponential
+  *  - \ref matrixbase_log "MatrixBase::log()", for computing the matrix logarithm
+  *  - \ref matrixbase_pow "MatrixBase::pow()", for computing the matrix power
+  *  - \ref matrixbase_matrixfunction "MatrixBase::matrixFunction()", for computing general matrix functions
+  *  - \ref matrixbase_sin "MatrixBase::sin()", for computing the matrix sine
+  *  - \ref matrixbase_sinh "MatrixBase::sinh()", for computing the matrix hyperbolic sine
+  *  - \ref matrixbase_sqrt "MatrixBase::sqrt()", for computing the matrix square root
+  *
+  * These methods are the main entry points to this module. 
+  *
+  * %Matrix functions are defined as follows.  Suppose that \f$ f \f$
+  * is an entire function (that is, a function on the complex plane
+  * that is everywhere complex differentiable).  Then its Taylor
+  * series
+  * \f[ f(0) + f'(0) x + \frac{f''(0)}{2} x^2 + \frac{f'''(0)}{3!} x^3 + \cdots \f]
+  * converges to \f$ f(x) \f$. In this case, we can define the matrix
+  * function by the same series:
+  * \f[ f(M) = f(0) + f'(0) M + \frac{f''(0)}{2} M^2 + \frac{f'''(0)}{3!} M^3 + \cdots \f]
+  *
+  */
+
+#include "src/MatrixFunctions/MatrixExponential.h"
+#include "src/MatrixFunctions/MatrixFunction.h"
+#include "src/MatrixFunctions/MatrixSquareRoot.h"
+#include "src/MatrixFunctions/MatrixLogarithm.h"
+#include "src/MatrixFunctions/MatrixPower.h"
+
+
+/** 
+\page matrixbaseextra_page
+\ingroup MatrixFunctions_Module
+
+\section matrixbaseextra MatrixBase methods defined in the MatrixFunctions module
+
+The remainder of the page documents the following MatrixBase methods
+which are defined in the MatrixFunctions module.
+
+
+
+\subsection matrixbase_cos MatrixBase::cos()
+
+Compute the matrix cosine.
+
+\code
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cos() const
+\endcode
+
+\param[in]  M  a square matrix.
+\returns  expression representing \f$ \cos(M) \f$.
+
+This function computes the matrix cosine. Use ArrayBase::cos() for computing the entry-wise cosine.
+
+The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cos().
+
+\sa \ref matrixbase_sin "sin()" for an example.
+
+
+
+\subsection matrixbase_cosh MatrixBase::cosh()
+
+Compute the matrix hyberbolic cosine.
+
+\code
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cosh() const
+\endcode
+
+\param[in]  M  a square matrix.
+\returns  expression representing \f$ \cosh(M) \f$
+
+This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::cosh().
+
+\sa \ref matrixbase_sinh "sinh()" for an example.
+
+
+
+\subsection matrixbase_exp MatrixBase::exp()
+
+Compute the matrix exponential.
+
+\code
+const MatrixExponentialReturnValue<Derived> MatrixBase<Derived>::exp() const
+\endcode
+
+\param[in]  M  matrix whose exponential is to be computed.
+\returns    expression representing the matrix exponential of \p M.
+
+The matrix exponential of \f$ M \f$ is defined by
+\f[ \exp(M) = \sum_{k=0}^\infty \frac{M^k}{k!}. \f]
+The matrix exponential can be used to solve linear ordinary
+differential equations: the solution of \f$ y' = My \f$ with the
+initial condition \f$ y(0) = y_0 \f$ is given by
+\f$ y(t) = \exp(M) y_0 \f$.
+
+The matrix exponential is different from applying the exp function to all the entries in the matrix.
+Use ArrayBase::exp() if you want to do the latter.
+
+The cost of the computation is approximately \f$ 20 n^3 \f$ for
+matrices of size \f$ n \f$. The number 20 depends weakly on the
+norm of the matrix.
+
+The matrix exponential is computed using the scaling-and-squaring
+method combined with Pad&eacute; approximation. The matrix is first
+rescaled, then the exponential of the reduced matrix is computed
+approximant, and then the rescaling is undone by repeated
+squaring. The degree of the Pad&eacute; approximant is chosen such
+that the approximation error is less than the round-off
+error. However, errors may accumulate during the squaring phase.
+
+Details of the algorithm can be found in: Nicholas J. Higham, "The
+scaling and squaring method for the matrix exponential revisited,"
+<em>SIAM J. %Matrix Anal. Applic.</em>, <b>26</b>:1179&ndash;1193,
+2005.
+
+Example: The following program checks that
+\f[ \exp \left[ \begin{array}{ccc}
+      0 & \frac14\pi & 0 \\
+      -\frac14\pi & 0 & 0 \\
+      0 & 0 & 0
+    \end{array} \right] = \left[ \begin{array}{ccc}
+      \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\
+      \frac12\sqrt2 & \frac12\sqrt2 & 0 \\
+      0 & 0 & 1
+    \end{array} \right]. \f]
+This corresponds to a rotation of \f$ \frac14\pi \f$ radians around
+the z-axis.
+
+\include MatrixExponential.cpp
+Output: \verbinclude MatrixExponential.out
+
+\note \p M has to be a matrix of \c float, \c double, `long double`
+\c complex<float>, \c complex<double>, or `complex<long double>` .
+
+
+\subsection matrixbase_log MatrixBase::log()
+
+Compute the matrix logarithm.
+
+\code
+const MatrixLogarithmReturnValue<Derived> MatrixBase<Derived>::log() const
+\endcode
+
+\param[in]  M  invertible matrix whose logarithm is to be computed.
+\returns    expression representing the matrix logarithm root of \p M.
+
+The matrix logarithm of \f$ M \f$ is a matrix \f$ X \f$ such that 
+\f$ \exp(X) = M \f$ where exp denotes the matrix exponential. As for
+the scalar logarithm, the equation \f$ \exp(X) = M \f$ may have
+multiple solutions; this function returns a matrix whose eigenvalues
+have imaginary part in the interval \f$ (-\pi,\pi] \f$.
+
+The matrix logarithm is different from applying the log function to all the entries in the matrix.
+Use ArrayBase::log() if you want to do the latter.
+
+In the real case, the matrix \f$ M \f$ should be invertible and
+it should have no eigenvalues which are real and negative (pairs of
+complex conjugate eigenvalues are allowed). In the complex case, it
+only needs to be invertible.
+
+This function computes the matrix logarithm using the Schur-Parlett
+algorithm as implemented by MatrixBase::matrixFunction(). The
+logarithm of an atomic block is computed by MatrixLogarithmAtomic,
+which uses direct computation for 1-by-1 and 2-by-2 blocks and an
+inverse scaling-and-squaring algorithm for bigger blocks, with the
+square roots computed by MatrixBase::sqrt().
+
+Details of the algorithm can be found in Section 11.6.2 of:
+Nicholas J. Higham,
+<em>Functions of Matrices: Theory and Computation</em>,
+SIAM 2008. ISBN 978-0-898716-46-7.
+
+Example: The following program checks that
+\f[ \log \left[ \begin{array}{ccc} 
+      \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\
+      \frac12\sqrt2 & \frac12\sqrt2 & 0 \\
+      0 & 0 & 1
+    \end{array} \right] = \left[ \begin{array}{ccc}
+      0 & \frac14\pi & 0 \\ 
+      -\frac14\pi & 0 & 0 \\
+      0 & 0 & 0 
+    \end{array} \right]. \f]
+This corresponds to a rotation of \f$ \frac14\pi \f$ radians around
+the z-axis. This is the inverse of the example used in the
+documentation of \ref matrixbase_exp "exp()".
+
+\include MatrixLogarithm.cpp
+Output: \verbinclude MatrixLogarithm.out
+
+\note \p M has to be a matrix of \c float, \c double, `long
+double`, \c complex<float>, \c complex<double>, or `complex<long double>`.
+
+\sa MatrixBase::exp(), MatrixBase::matrixFunction(), 
+    class MatrixLogarithmAtomic, MatrixBase::sqrt().
+
+
+\subsection matrixbase_pow MatrixBase::pow()
+
+Compute the matrix raised to arbitrary real power.
+
+\code
+const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(RealScalar p) const
+\endcode
+
+\param[in]  M  base of the matrix power, should be a square matrix.
+\param[in]  p  exponent of the matrix power.
+
+The matrix power \f$ M^p \f$ is defined as \f$ \exp(p \log(M)) \f$,
+where exp denotes the matrix exponential, and log denotes the matrix
+logarithm. This is different from raising all the entries in the matrix
+to the p-th power. Use ArrayBase::pow() if you want to do the latter.
+
+If \p p is complex, the scalar type of \p M should be the type of \p
+p . \f$ M^p \f$ simply evaluates into \f$ \exp(p \log(M)) \f$.
+Therefore, the matrix \f$ M \f$ should meet the conditions to be an
+argument of matrix logarithm.
+
+If \p p is real, it is casted into the real scalar type of \p M. Then
+this function computes the matrix power using the Schur-Pad&eacute;
+algorithm as implemented by class MatrixPower. The exponent is split
+into integral part and fractional part, where the fractional part is
+in the interval \f$ (-1, 1) \f$. The main diagonal and the first
+super-diagonal is directly computed.
+
+If \p M is singular with a semisimple zero eigenvalue and \p p is
+positive, the Schur factor \f$ T \f$ is reordered with Givens
+rotations, i.e.
+
+\f[ T = \left[ \begin{array}{cc}
+      T_1 & T_2 \\
+      0   & 0
+    \end{array} \right] \f]
+
+where \f$ T_1 \f$ is invertible. Then \f$ T^p \f$ is given by
+
+\f[ T^p = \left[ \begin{array}{cc}
+      T_1^p & T_1^{-1} T_1^p T_2 \\
+      0     & 0
+    \end{array}. \right] \f]
+
+\warning Fractional power of a matrix with a non-semisimple zero
+eigenvalue is not well-defined. We introduce an assertion failure
+against inaccurate result, e.g. \code
+#include <unsupported/Eigen/MatrixFunctions>
+#include <iostream>
+
+int main()
+{
+  Eigen::Matrix4d A;
+  A << 0, 0, 2, 3,
+       0, 0, 4, 5,
+       0, 0, 6, 7,
+       0, 0, 8, 9;
+  std::cout << A.pow(0.37) << std::endl;
+  
+  // The 1 makes eigenvalue 0 non-semisimple.
+  A.coeffRef(0, 1) = 1;
+
+  // This fails if EIGEN_NO_DEBUG is undefined.
+  std::cout << A.pow(0.37) << std::endl;
+
+  return 0;
+}
+\endcode
+
+Details of the algorithm can be found in: Nicholas J. Higham and
+Lijing Lin, "A Schur-Pad&eacute; algorithm for fractional powers of a
+matrix," <em>SIAM J. %Matrix Anal. Applic.</em>,
+<b>32(3)</b>:1056&ndash;1078, 2011.
+
+Example: The following program checks that
+\f[ \left[ \begin{array}{ccc}
+      \cos1 & -\sin1 & 0 \\
+      \sin1 & \cos1 & 0 \\
+      0 & 0 & 1
+    \end{array} \right]^{\frac14\pi} = \left[ \begin{array}{ccc}
+      \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\
+      \frac12\sqrt2 & \frac12\sqrt2 & 0 \\
+      0 & 0 & 1
+    \end{array} \right]. \f]
+This corresponds to \f$ \frac14\pi \f$ rotations of 1 radian around
+the z-axis.
+
+\include MatrixPower.cpp
+Output: \verbinclude MatrixPower.out
+
+MatrixBase::pow() is user-friendly. However, there are some
+circumstances under which you should use class MatrixPower directly.
+MatrixPower can save the result of Schur decomposition, so it's
+better for computing various powers for the same matrix.
+
+Example:
+\include MatrixPower_optimal.cpp
+Output: \verbinclude MatrixPower_optimal.out
+
+\note \p M has to be a matrix of \c float, \c double, `long
+double`, \c complex<float>, \c complex<double>, or
+\c complex<long double> .
+
+\sa MatrixBase::exp(), MatrixBase::log(), class MatrixPower.
+
+
+\subsection matrixbase_matrixfunction MatrixBase::matrixFunction()
+
+Compute a matrix function.
+
+\code
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::matrixFunction(typename internal::stem_function<typename internal::traits<Derived>::Scalar>::type f) const
+\endcode
+
+\param[in]  M  argument of matrix function, should be a square matrix.
+\param[in]  f  an entire function; \c f(x,n) should compute the n-th
+derivative of f at x.
+\returns  expression representing \p f applied to \p M.
+
+Suppose that \p M is a matrix whose entries have type \c Scalar. 
+Then, the second argument, \p f, should be a function with prototype
+\code 
+ComplexScalar f(ComplexScalar, int) 
+\endcode
+where \c ComplexScalar = \c std::complex<Scalar> if \c Scalar is
+real (e.g., \c float or \c double) and \c ComplexScalar =
+\c Scalar if \c Scalar is complex. The return value of \c f(x,n)
+should be \f$ f^{(n)}(x) \f$, the n-th derivative of f at x.
+
+This routine uses the algorithm described in:
+Philip Davies and Nicholas J. Higham, 
+"A Schur-Parlett algorithm for computing matrix functions", 
+<em>SIAM J. %Matrix Anal. Applic.</em>, <b>25</b>:464&ndash;485, 2003.
+
+The actual work is done by the MatrixFunction class.
+
+Example: The following program checks that
+\f[ \exp \left[ \begin{array}{ccc} 
+      0 & \frac14\pi & 0 \\ 
+      -\frac14\pi & 0 & 0 \\
+      0 & 0 & 0 
+    \end{array} \right] = \left[ \begin{array}{ccc}
+      \frac12\sqrt2 & -\frac12\sqrt2 & 0 \\
+      \frac12\sqrt2 & \frac12\sqrt2 & 0 \\
+      0 & 0 & 1
+    \end{array} \right]. \f]
+This corresponds to a rotation of \f$ \frac14\pi \f$ radians around
+the z-axis. This is the same example as used in the documentation
+of \ref matrixbase_exp "exp()".
+
+\include MatrixFunction.cpp
+Output: \verbinclude MatrixFunction.out
+
+Note that the function \c expfn is defined for complex numbers 
+\c x, even though the matrix \c A is over the reals. Instead of
+\c expfn, we could also have used StdStemFunctions::exp:
+\code
+A.matrixFunction(StdStemFunctions<std::complex<double> >::exp, &B);
+\endcode
+
+
+
+\subsection matrixbase_sin MatrixBase::sin()
+
+Compute the matrix sine.
+
+\code
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sin() const
+\endcode
+
+\param[in]  M  a square matrix.
+\returns  expression representing \f$ \sin(M) \f$.
+
+This function computes the matrix sine. Use ArrayBase::sin() for computing the entry-wise sine.
+
+The implementation calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sin().
+
+Example: \include MatrixSine.cpp
+Output: \verbinclude MatrixSine.out
+
+
+
+\subsection matrixbase_sinh MatrixBase::sinh()
+
+Compute the matrix hyperbolic sine.
+
+\code
+MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sinh() const
+\endcode
+
+\param[in]  M  a square matrix.
+\returns  expression representing \f$ \sinh(M) \f$
+
+This function calls \ref matrixbase_matrixfunction "matrixFunction()" with StdStemFunctions::sinh().
+
+Example: \include MatrixSinh.cpp
+Output: \verbinclude MatrixSinh.out
+
+
+\subsection matrixbase_sqrt MatrixBase::sqrt()
+
+Compute the matrix square root.
+
+\code
+const MatrixSquareRootReturnValue<Derived> MatrixBase<Derived>::sqrt() const
+\endcode
+
+\param[in]  M  invertible matrix whose square root is to be computed.
+\returns    expression representing the matrix square root of \p M.
+
+The matrix square root of \f$ M \f$ is the matrix \f$ M^{1/2} \f$
+whose square is the original matrix; so if \f$ S = M^{1/2} \f$ then
+\f$ S^2 = M \f$. This is different from taking the square root of all
+the entries in the matrix; use ArrayBase::sqrt() if you want to do the
+latter.
+
+In the <b>real case</b>, the matrix \f$ M \f$ should be invertible and
+it should have no eigenvalues which are real and negative (pairs of
+complex conjugate eigenvalues are allowed). In that case, the matrix
+has a square root which is also real, and this is the square root
+computed by this function. 
+
+The matrix square root is computed by first reducing the matrix to
+quasi-triangular form with the real Schur decomposition. The square
+root of the quasi-triangular matrix can then be computed directly. The
+cost is approximately \f$ 25 n^3 \f$ real flops for the real Schur
+decomposition and \f$ 3\frac13 n^3 \f$ real flops for the remainder
+(though the computation time in practice is likely more than this
+indicates).
+
+Details of the algorithm can be found in: Nicholas J. Highan,
+"Computing real square roots of a real matrix", <em>Linear Algebra
+Appl.</em>, 88/89:405&ndash;430, 1987.
+
+If the matrix is <b>positive-definite symmetric</b>, then the square
+root is also positive-definite symmetric. In this case, it is best to
+use SelfAdjointEigenSolver::operatorSqrt() to compute it.
+
+In the <b>complex case</b>, the matrix \f$ M \f$ should be invertible;
+this is a restriction of the algorithm. The square root computed by
+this algorithm is the one whose eigenvalues have an argument in the
+interval \f$ (-\frac12\pi, \frac12\pi] \f$. This is the usual branch
+cut.
+
+The computation is the same as in the real case, except that the
+complex Schur decomposition is used to reduce the matrix to a
+triangular matrix. The theoretical cost is the same. Details are in:
+&Aring;ke Bj&ouml;rck and Sven Hammarling, "A Schur method for the
+square root of a matrix", <em>Linear Algebra Appl.</em>,
+52/53:127&ndash;140, 1983.
+
+Example: The following program checks that the square root of
+\f[ \left[ \begin{array}{cc} 
+              \cos(\frac13\pi) & -\sin(\frac13\pi) \\
+              \sin(\frac13\pi) & \cos(\frac13\pi)
+    \end{array} \right], \f]
+corresponding to a rotation over 60 degrees, is a rotation over 30 degrees:
+\f[ \left[ \begin{array}{cc} 
+              \cos(\frac16\pi) & -\sin(\frac16\pi) \\
+              \sin(\frac16\pi) & \cos(\frac16\pi)
+    \end{array} \right]. \f]
+
+\include MatrixSquareRoot.cpp
+Output: \verbinclude MatrixSquareRoot.out
+
+\sa class RealSchur, class ComplexSchur, class MatrixSquareRoot,
+    SelfAdjointEigenSolver::operatorSqrt().
+
+*/
+
+#endif // EIGEN_MATRIX_FUNCTIONS
+
diff --git a/third-party/Eigen/QR b/third-party/Eigen/QR
new file mode 100644
index 00000000..1be1863a
--- /dev/null
+++ b/third-party/Eigen/QR
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_QR_MODULE_H
+#define EIGEN_QR_MODULE_H
+
+#include "Core"
+
+#include "Cholesky"
+#include "Jacobi"
+#include "Householder"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup QR_Module QR module
+  *
+  *
+  *
+  * This module provides various QR decompositions
+  * This module also provides some MatrixBase methods, including:
+  *  - MatrixBase::householderQr()
+  *  - MatrixBase::colPivHouseholderQr()
+  *  - MatrixBase::fullPivHouseholderQr()
+  *
+  * \code
+  * #include <Eigen/QR>
+  * \endcode
+  */
+
+#include "src/QR/HouseholderQR.h"
+#include "src/QR/FullPivHouseholderQR.h"
+#include "src/QR/ColPivHouseholderQR.h"
+#include "src/QR/CompleteOrthogonalDecomposition.h"
+#ifdef EIGEN_USE_LAPACKE
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "src/misc/lapacke.h"
+#endif
+#include "src/QR/HouseholderQR_LAPACKE.h"
+#include "src/QR/ColPivHouseholderQR_LAPACKE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_QR_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/SVD b/third-party/Eigen/SVD
new file mode 100644
index 00000000..5d0e75f7
--- /dev/null
+++ b/third-party/Eigen/SVD
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SVD_MODULE_H
+#define EIGEN_SVD_MODULE_H
+
+#include "QR"
+#include "Householder"
+#include "Jacobi"
+
+#include "src/Core/util/DisableStupidWarnings.h"
+
+/** \defgroup SVD_Module SVD module
+  *
+  *
+  *
+  * This module provides SVD decomposition for matrices (both real and complex).
+  * Two decomposition algorithms are provided:
+  *  - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones.
+  *  - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems.
+  * These decompositions are accessible via the respective classes and following MatrixBase methods:
+  *  - MatrixBase::jacobiSvd()
+  *  - MatrixBase::bdcSvd()
+  *
+  * \code
+  * #include <Eigen/SVD>
+  * \endcode
+  */
+
+#include "src/misc/RealSvd2x2.h"
+#include "src/SVD/UpperBidiagonalization.h"
+#include "src/SVD/SVDBase.h"
+#include "src/SVD/JacobiSVD.h"
+#include "src/SVD/BDCSVD.h"
+#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
+#ifdef EIGEN_USE_MKL
+#include "mkl_lapacke.h"
+#else
+#include "src/misc/lapacke.h"
+#endif
+#include "src/SVD/JacobiSVD_LAPACKE.h"
+#endif
+
+#include "src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_SVD_MODULE_H
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/third-party/Eigen/src/Cholesky/LDLT.h b/third-party/Eigen/src/Cholesky/LDLT.h
new file mode 100644
index 00000000..15ccf24f
--- /dev/null
+++ b/third-party/Eigen/src/Cholesky/LDLT.h
@@ -0,0 +1,673 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Keir Mierle <mierle@gmail.com>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011 Timothy E. Holy <tim.holy@gmail.com >
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LDLT_H
+#define EIGEN_LDLT_H
+
+namespace Eigen {
+
+namespace internal {
+  template<typename MatrixType, int UpLo> struct LDLT_Traits;
+
+  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
+  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
+}
+
+/** \ingroup Cholesky_Module
+  *
+  * \class LDLT
+  *
+  * \brief Robust Cholesky decomposition of a matrix with pivoting
+  *
+  * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition
+  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  *             The other triangular part won't be read.
+  *
+  * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite
+  * matrix \f$ A \f$ such that \f$ A =  P^TLDL^*P \f$, where P is a permutation matrix, L
+  * is lower triangular with a unit diagonal and D is a diagonal matrix.
+  *
+  * The decomposition uses pivoting to ensure stability, so that L will have
+  * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root
+  * on D also stabilizes the computation.
+  *
+  * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky
+  * decomposition to determine whether a system of equations has a solution.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
+  */
+template<typename _MatrixType, int _UpLo> class LDLT
+{
+  public:
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+      UpLo = _UpLo
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef Matrix<Scalar, RowsAtCompileTime, 1, 0, MaxRowsAtCompileTime, 1> TmpMatrixType;
+
+    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
+
+    typedef internal::LDLT_Traits<MatrixType,UpLo> Traits;
+
+    /** \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LDLT::compute(const MatrixType&).
+      */
+    LDLT()
+      : m_matrix(),
+        m_transpositions(),
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false)
+    {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa LDLT()
+      */
+    explicit LDLT(Index size)
+      : m_matrix(size, size),
+        m_transpositions(size),
+        m_temporary(size),
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false)
+    {}
+
+    /** \brief Constructor with decomposition
+      *
+      * This calculates the decomposition for the input \a matrix.
+      *
+      * \sa LDLT(Index size)
+      */
+    template<typename InputType>
+    explicit LDLT(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.rows(), matrix.cols()),
+        m_transpositions(matrix.rows()),
+        m_temporary(matrix.rows()),
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** \brief Constructs a LDLT factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa LDLT(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit LDLT(EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
+        m_transpositions(matrix.rows()),
+        m_temporary(matrix.rows()),
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** Clear any existing decomposition
+     * \sa rankUpdate(w,sigma)
+     */
+    void setZero()
+    {
+      m_isInitialized = false;
+    }
+
+    /** \returns a view of the upper triangular matrix U */
+    inline typename Traits::MatrixU matrixU() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return Traits::getU(m_matrix);
+    }
+
+    /** \returns a view of the lower triangular matrix L */
+    inline typename Traits::MatrixL matrixL() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return Traits::getL(m_matrix);
+    }
+
+    /** \returns the permutation matrix P as a transposition sequence.
+      */
+    inline const TranspositionType& transpositionsP() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_transpositions;
+    }
+
+    /** \returns the coefficients of the diagonal matrix D */
+    inline Diagonal<const MatrixType> vectorD() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_matrix.diagonal();
+    }
+
+    /** \returns true if the matrix is positive (semidefinite) */
+    inline bool isPositive() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
+    }
+
+    /** \returns true if the matrix is negative (semidefinite) */
+    inline bool isNegative(void) const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
+    }
+
+    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * This function also supports in-place solves using the syntax <tt>x = decompositionObject.solve(x)</tt> .
+      *
+      * \note_about_checking_solutions
+      *
+      * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$
+      * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$,
+      * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then
+      * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the
+      * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function
+      * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular.
+      *
+      * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt()
+      */
+    template<typename Rhs>
+    inline const Solve<LDLT, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      eigen_assert(m_matrix.rows()==b.rows()
+                && "LDLT::solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<LDLT, Rhs>(*this, b.derived());
+    }
+
+    template<typename Derived>
+    bool solveInPlace(MatrixBase<Derived> &bAndX) const;
+
+    template<typename InputType>
+    LDLT& compute(const EigenBase<InputType>& matrix);
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of
+     *  which \c *this is the LDLT decomposition.
+     */
+    RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    template <typename Derived>
+    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);
+
+    /** \returns the internal LDLT decomposition matrix
+      *
+      * TODO: document the storage layout
+      */
+    inline const MatrixType& matrixLDLT() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_matrix;
+    }
+
+    MatrixType reconstructedMatrix() const;
+
+    /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
+      *
+      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
+      * \code x = decomposition.adjoint().solve(b) \endcode
+      */
+    const LDLT& adjoint() const { return *this; };
+
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
+
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful,
+      *          \c NumericalIssue if the factorization failed because of a zero pivot.
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "LDLT is not initialized.");
+      return m_info;
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    /** \internal
+      * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U.
+      * The strict upper part is used during the decomposition, the strict lower
+      * part correspond to the coefficients of L (its diagonal is equal to 1 and
+      * is not stored), and the diagonal entries correspond to D.
+      */
+    MatrixType m_matrix;
+    RealScalar m_l1_norm;
+    TranspositionType m_transpositions;
+    TmpMatrixType m_temporary;
+    internal::SignMatrix m_sign;
+    bool m_isInitialized;
+    ComputationInfo m_info;
+};
+
+namespace internal {
+
+template<int UpLo> struct ldlt_inplace;
+
+template<> struct ldlt_inplace<Lower>
+{
+  template<typename MatrixType, typename TranspositionType, typename Workspace>
+  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
+  {
+    using std::abs;
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    typedef typename TranspositionType::StorageIndex IndexType;
+    eigen_assert(mat.rows()==mat.cols());
+    const Index size = mat.rows();
+    bool found_zero_pivot = false;
+    bool ret = true;
+
+    if (size <= 1)
+    {
+      transpositions.setIdentity();
+      if(size==0) sign = ZeroSign;
+      else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
+      else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
+      else sign = ZeroSign;
+      return true;
+    }
+
+    for (Index k = 0; k < size; ++k)
+    {
+      // Find largest diagonal element
+      Index index_of_biggest_in_corner;
+      mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner);
+      index_of_biggest_in_corner += k;
+
+      transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner);
+      if(k != index_of_biggest_in_corner)
+      {
+        // apply the transposition while taking care to consider only
+        // the lower triangular part
+        Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element
+        mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k));
+        mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s));
+        std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner));
+        for(Index i=k+1;i<index_of_biggest_in_corner;++i)
+        {
+          Scalar tmp = mat.coeffRef(i,k);
+          mat.coeffRef(i,k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner,i));
+          mat.coeffRef(index_of_biggest_in_corner,i) = numext::conj(tmp);
+        }
+        if(NumTraits<Scalar>::IsComplex)
+          mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k));
+      }
+
+      // partition the matrix:
+      //       A00 |  -  |  -
+      // lu  = A10 | A11 |  -
+      //       A20 | A21 | A22
+      Index rs = size - k - 1;
+      Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);
+      Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
+      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
+
+      if(k>0)
+      {
+        temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint();
+        mat.coeffRef(k,k) -= (A10 * temp.head(k)).value();
+        if(rs>0)
+          A21.noalias() -= A20 * temp.head(k);
+      }
+
+      // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot
+      // was smaller than the cutoff value. However, since LDLT is not rank-revealing
+      // we should only make sure that we do not introduce INF or NaN values.
+      // Remark that LAPACK also uses 0 as the cutoff value.
+      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
+      bool pivot_is_valid = (abs(realAkk) > RealScalar(0));
+
+      if(k==0 && !pivot_is_valid)
+      {
+        // The entire diagonal is zero, there is nothing more to do
+        // except filling the transpositions, and checking whether the matrix is zero.
+        sign = ZeroSign;
+        for(Index j = 0; j<size; ++j)
+        {
+          transpositions.coeffRef(j) = IndexType(j);
+          ret = ret && (mat.col(j).tail(size-j-1).array()==Scalar(0)).all();
+        }
+        return ret;
+      }
+
+      if((rs>0) && pivot_is_valid)
+        A21 /= realAkk;
+      else if(rs>0)
+        ret = ret && (A21.array()==Scalar(0)).all();
+
+      if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
+      else if(!pivot_is_valid) found_zero_pivot = true;
+
+      if (sign == PositiveSemiDef) {
+        if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite;
+      } else if (sign == NegativeSemiDef) {
+        if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite;
+      } else if (sign == ZeroSign) {
+        if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef;
+        else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
+      }
+    }
+
+    return ret;
+  }
+
+  // Reference for the algorithm: Davis and Hager, "Multiple Rank
+  // Modifications of a Sparse Cholesky Factorization" (Algorithm 1)
+  // Trivial rearrangements of their computations (Timothy E. Holy)
+  // allow their algorithm to work for rank-1 updates even if the
+  // original matrix is not of full rank.
+  // Here only rank-1 updates are implemented, to reduce the
+  // requirement for intermediate storage and improve accuracy
+  template<typename MatrixType, typename WDerived>
+  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
+  {
+    using numext::isfinite;
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+
+    const Index size = mat.rows();
+    eigen_assert(mat.cols() == size && w.size()==size);
+
+    RealScalar alpha = 1;
+
+    // Apply the update
+    for (Index j = 0; j < size; j++)
+    {
+      // Check for termination due to an original decomposition of low-rank
+      if (!(isfinite)(alpha))
+        break;
+
+      // Update the diagonal terms
+      RealScalar dj = numext::real(mat.coeff(j,j));
+      Scalar wj = w.coeff(j);
+      RealScalar swj2 = sigma*numext::abs2(wj);
+      RealScalar gamma = dj*alpha + swj2;
+
+      mat.coeffRef(j,j) += swj2/alpha;
+      alpha += swj2/dj;
+
+
+      // Update the terms of L
+      Index rs = size-j-1;
+      w.tail(rs) -= wj * mat.col(j).tail(rs);
+      if(gamma != 0)
+        mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs);
+    }
+    return true;
+  }
+
+  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
+  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
+  {
+    // Apply the permutation to the input w
+    tmp = transpositions * w;
+
+    return ldlt_inplace<Lower>::updateInPlace(mat,tmp,sigma);
+  }
+};
+
+template<> struct ldlt_inplace<Upper>
+{
+  template<typename MatrixType, typename TranspositionType, typename Workspace>
+  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
+  {
+    Transpose<MatrixType> matt(mat);
+    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
+  }
+
+  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
+  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
+  {
+    Transpose<MatrixType> matt(mat);
+    return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
+  }
+};
+
+template<typename MatrixType> struct LDLT_Traits<MatrixType,Lower>
+{
+  typedef const TriangularView<const MatrixType, UnitLower> MatrixL;
+  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitUpper> MatrixU;
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+};
+
+template<typename MatrixType> struct LDLT_Traits<MatrixType,Upper>
+{
+  typedef const TriangularView<const typename MatrixType::AdjointReturnType, UnitLower> MatrixL;
+  typedef const TriangularView<const MatrixType, UnitUpper> MatrixU;
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
+};
+
+} // end namespace internal
+
+/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix
+  */
+template<typename MatrixType, int _UpLo>
+template<typename InputType>
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
+{
+  check_template_parameters();
+
+  eigen_assert(a.rows()==a.cols());
+  const Index size = a.rows();
+
+  m_matrix = a.derived();
+
+  // Compute matrix L1 norm = max abs column sum.
+  m_l1_norm = RealScalar(0);
+  // TODO move this code to SelfAdjointView
+  for (Index col = 0; col < size; ++col) {
+    RealScalar abs_col_sum;
+    if (_UpLo == Lower)
+      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
+    else
+      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
+    if (abs_col_sum > m_l1_norm)
+      m_l1_norm = abs_col_sum;
+  }
+
+  m_transpositions.resize(size);
+  m_isInitialized = false;
+  m_temporary.resize(size);
+  m_sign = internal::ZeroSign;
+
+  m_info = internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue;
+
+  m_isInitialized = true;
+  return *this;
+}
+
+/** Update the LDLT decomposition:  given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T.
+ * \param w a vector to be incorporated into the decomposition.
+ * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1.
+ * \sa setZero()
+  */
+template<typename MatrixType, int _UpLo>
+template<typename Derived>
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename LDLT<MatrixType,_UpLo>::RealScalar& sigma)
+{
+  typedef typename TranspositionType::StorageIndex IndexType;
+  const Index size = w.rows();
+  if (m_isInitialized)
+  {
+    eigen_assert(m_matrix.rows()==size);
+  }
+  else
+  {
+    m_matrix.resize(size,size);
+    m_matrix.setZero();
+    m_transpositions.resize(size);
+    for (Index i = 0; i < size; i++)
+      m_transpositions.coeffRef(i) = IndexType(i);
+    m_temporary.resize(size);
+    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
+    m_isInitialized = true;
+  }
+
+  internal::ldlt_inplace<UpLo>::update(m_matrix, m_transpositions, m_temporary, w, sigma);
+
+  return *this;
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType, int _UpLo>
+template<typename RhsType, typename DstType>
+void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  eigen_assert(rhs.rows() == rows());
+  // dst = P b
+  dst = m_transpositions * rhs;
+
+  // dst = L^-1 (P b)
+  matrixL().solveInPlace(dst);
+
+  // dst = D^-1 (L^-1 P b)
+  // more precisely, use pseudo-inverse of D (see bug 241)
+  using std::abs;
+  const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
+  // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
+  // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
+  // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
+  // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
+  // diagonal element is not well justified and leads to numerical issues in some cases.
+  // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
+  // Using numeric_limits::min() gives us more robustness to denormals.
+  RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
+
+  for (Index i = 0; i < vecD.size(); ++i)
+  {
+    if(abs(vecD(i)) > tolerance)
+      dst.row(i) /= vecD(i);
+    else
+      dst.row(i).setZero();
+  }
+
+  // dst = L^-T (D^-1 L^-1 P b)
+  matrixU().solveInPlace(dst);
+
+  // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b
+  dst = m_transpositions.transpose() * dst;
+}
+#endif
+
+/** \internal use x = ldlt_object.solve(x);
+  *
+  * This is the \em in-place version of solve().
+  *
+  * \param bAndX represents both the right-hand side matrix b and result x.
+  *
+  * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD.
+  *
+  * This version avoids a copy when the right hand side matrix b is not
+  * needed anymore.
+  *
+  * \sa LDLT::solve(), MatrixBase::ldlt()
+  */
+template<typename MatrixType,int _UpLo>
+template<typename Derived>
+bool LDLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
+{
+  eigen_assert(m_isInitialized && "LDLT is not initialized.");
+  eigen_assert(m_matrix.rows() == bAndX.rows());
+
+  bAndX = this->solve(bAndX);
+
+  return true;
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^T L D L^* P.
+ * This function is provided for debug purpose. */
+template<typename MatrixType, int _UpLo>
+MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LDLT is not initialized.");
+  const Index size = m_matrix.rows();
+  MatrixType res(size,size);
+
+  // P
+  res.setIdentity();
+  res = transpositionsP() * res;
+  // L^* P
+  res = matrixU() * res;
+  // D(L^*P)
+  res = vectorD().real().asDiagonal() * res;
+  // L(DL^*P)
+  res = matrixL() * res;
+  // P^T (LDL^*P)
+  res = transpositionsP().transpose() * res;
+
+  return res;
+}
+
+/** \cholesky_module
+  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
+  * \sa MatrixBase::ldlt()
+  */
+template<typename MatrixType, unsigned int UpLo>
+inline const LDLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
+SelfAdjointView<MatrixType, UpLo>::ldlt() const
+{
+  return LDLT<PlainObject,UpLo>(m_matrix);
+}
+
+/** \cholesky_module
+  * \returns the Cholesky decomposition with full pivoting without square root of \c *this
+  * \sa SelfAdjointView::ldlt()
+  */
+template<typename Derived>
+inline const LDLT<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::ldlt() const
+{
+  return LDLT<PlainObject>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_LDLT_H
diff --git a/third-party/Eigen/src/Cholesky/LLT.h b/third-party/Eigen/src/Cholesky/LLT.h
new file mode 100644
index 00000000..e1624d21
--- /dev/null
+++ b/third-party/Eigen/src/Cholesky/LLT.h
@@ -0,0 +1,542 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LLT_H
+#define EIGEN_LLT_H
+
+namespace Eigen {
+
+namespace internal{
+template<typename MatrixType, int UpLo> struct LLT_Traits;
+}
+
+/** \ingroup Cholesky_Module
+  *
+  * \class LLT
+  *
+  * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
+  * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
+  *               The other triangular part won't be read.
+  *
+  * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
+  * matrix A such that A = LL^* = U^*U, where L is lower triangular.
+  *
+  * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like  D^*D x = b,
+  * for that purpose, we recommend the Cholesky decomposition without square root which is more stable
+  * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other
+  * situations like generalised eigen problems with hermitian matrices.
+  *
+  * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices,
+  * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations
+  * has a solution.
+  *
+  * Example: \include LLT_example.cpp
+  * Output: \verbinclude LLT_example.out
+  *
+  * \b Performance: for best performance, it is recommended to use a column-major storage format
+  * with the Lower triangular part (the default), or, equivalently, a row-major storage format
+  * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
+  * step, and rank-updates can be up to 3 times slower.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  *
+  * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
+  * Therefore, the strict lower part does not have to store correct values.
+  *
+  * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
+  */
+template<typename _MatrixType, int _UpLo> class LLT
+{
+  public:
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef typename MatrixType::StorageIndex StorageIndex;
+
+    enum {
+      PacketSize = internal::packet_traits<Scalar>::size,
+      AlignmentMask = int(PacketSize)-1,
+      UpLo = _UpLo
+    };
+
+    typedef internal::LLT_Traits<MatrixType,UpLo> Traits;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LLT::compute(const MatrixType&).
+      */
+    LLT() : m_matrix(), m_isInitialized(false) {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa LLT()
+      */
+    explicit LLT(Index size) : m_matrix(size, size),
+                    m_isInitialized(false) {}
+
+    template<typename InputType>
+    explicit LLT(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.rows(), matrix.cols()),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** \brief Constructs a LDLT factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
+      * \c MatrixType is a Eigen::Ref.
+      *
+      * \sa LLT(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit LLT(EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** \returns a view of the upper triangular matrix U */
+    inline typename Traits::MatrixU matrixU() const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      return Traits::getU(m_matrix);
+    }
+
+    /** \returns a view of the lower triangular matrix L */
+    inline typename Traits::MatrixL matrixL() const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      return Traits::getL(m_matrix);
+    }
+
+    /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      *
+      * Since this LLT class assumes anyway that the matrix A is invertible, the solution
+      * theoretically exists and is unique regardless of b.
+      *
+      * Example: \include LLT_solve.cpp
+      * Output: \verbinclude LLT_solve.out
+      *
+      * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt()
+      */
+    template<typename Rhs>
+    inline const Solve<LLT, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      eigen_assert(m_matrix.rows()==b.rows()
+                && "LLT::solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<LLT, Rhs>(*this, b.derived());
+    }
+
+    template<typename Derived>
+    void solveInPlace(const MatrixBase<Derived> &bAndX) const;
+
+    template<typename InputType>
+    LLT& compute(const EigenBase<InputType>& matrix);
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of
+      *  which \c *this is the Cholesky decomposition.
+      */
+    RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    /** \returns the LLT decomposition matrix
+      *
+      * TODO: document the storage layout
+      */
+    inline const MatrixType& matrixLLT() const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      return m_matrix;
+    }
+
+    MatrixType reconstructedMatrix() const;
+
+
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful,
+      *          \c NumericalIssue if the matrix.appears not to be positive definite.
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "LLT is not initialized.");
+      return m_info;
+    }
+
+    /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint.
+      *
+      * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as:
+      * \code x = decomposition.adjoint().solve(b) \endcode
+      */
+    const LLT& adjoint() const { return *this; };
+
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }
+
+    template<typename VectorType>
+    LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1);
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    /** \internal
+      * Used to compute and store L
+      * The strict upper part is not used and even not initialized.
+      */
+    MatrixType m_matrix;
+    RealScalar m_l1_norm;
+    bool m_isInitialized;
+    ComputationInfo m_info;
+};
+
+namespace internal {
+
+template<typename Scalar, int UpLo> struct llt_inplace;
+
+template<typename MatrixType, typename VectorType>
+static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma)
+{
+  using std::sqrt;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::ColXpr ColXpr;
+  typedef typename internal::remove_all<ColXpr>::type ColXprCleaned;
+  typedef typename ColXprCleaned::SegmentReturnType ColXprSegment;
+  typedef Matrix<Scalar,Dynamic,1> TempVectorType;
+  typedef typename TempVectorType::SegmentReturnType TempVecSegment;
+
+  Index n = mat.cols();
+  eigen_assert(mat.rows()==n && vec.size()==n);
+
+  TempVectorType temp;
+
+  if(sigma>0)
+  {
+    // This version is based on Givens rotations.
+    // It is faster than the other one below, but only works for updates,
+    // i.e., for sigma > 0
+    temp = sqrt(sigma) * vec;
+
+    for(Index i=0; i<n; ++i)
+    {
+      JacobiRotation<Scalar> g;
+      g.makeGivens(mat(i,i), -temp(i), &mat(i,i));
+
+      Index rs = n-i-1;
+      if(rs>0)
+      {
+        ColXprSegment x(mat.col(i).tail(rs));
+        TempVecSegment y(temp.tail(rs));
+        apply_rotation_in_the_plane(x, y, g);
+      }
+    }
+  }
+  else
+  {
+    temp = vec;
+    RealScalar beta = 1;
+    for(Index j=0; j<n; ++j)
+    {
+      RealScalar Ljj = numext::real(mat.coeff(j,j));
+      RealScalar dj = numext::abs2(Ljj);
+      Scalar wj = temp.coeff(j);
+      RealScalar swj2 = sigma*numext::abs2(wj);
+      RealScalar gamma = dj*beta + swj2;
+
+      RealScalar x = dj + swj2/beta;
+      if (x<=RealScalar(0))
+        return j;
+      RealScalar nLjj = sqrt(x);
+      mat.coeffRef(j,j) = nLjj;
+      beta += swj2/dj;
+
+      // Update the terms of L
+      Index rs = n-j-1;
+      if(rs)
+      {
+        temp.tail(rs) -= (wj/Ljj) * mat.col(j).tail(rs);
+        if(gamma != 0)
+          mat.col(j).tail(rs) = (nLjj/Ljj) * mat.col(j).tail(rs) + (nLjj * sigma*numext::conj(wj)/gamma)*temp.tail(rs);
+      }
+    }
+  }
+  return -1;
+}
+
+template<typename Scalar> struct llt_inplace<Scalar, Lower>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template<typename MatrixType>
+  static Index unblocked(MatrixType& mat)
+  {
+    using std::sqrt;
+
+    eigen_assert(mat.rows()==mat.cols());
+    const Index size = mat.rows();
+    for(Index k = 0; k < size; ++k)
+    {
+      Index rs = size-k-1; // remaining size
+
+      Block<MatrixType,Dynamic,1> A21(mat,k+1,k,rs,1);
+      Block<MatrixType,1,Dynamic> A10(mat,k,0,1,k);
+      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
+
+      RealScalar x = numext::real(mat.coeff(k,k));
+      if (k>0) x -= A10.squaredNorm();
+      if (x<=RealScalar(0))
+        return k;
+      mat.coeffRef(k,k) = x = sqrt(x);
+      if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint();
+      if (rs>0) A21 /= x;
+    }
+    return -1;
+  }
+
+  template<typename MatrixType>
+  static Index blocked(MatrixType& m)
+  {
+    eigen_assert(m.rows()==m.cols());
+    Index size = m.rows();
+    if(size<32)
+      return unblocked(m);
+
+    Index blockSize = size/8;
+    blockSize = (blockSize/16)*16;
+    blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128));
+
+    for (Index k=0; k<size; k+=blockSize)
+    {
+      // partition the matrix:
+      //       A00 |  -  |  -
+      // lu  = A10 | A11 |  -
+      //       A20 | A21 | A22
+      Index bs = (std::min)(blockSize, size-k);
+      Index rs = size - k - bs;
+      Block<MatrixType,Dynamic,Dynamic> A11(m,k,   k,   bs,bs);
+      Block<MatrixType,Dynamic,Dynamic> A21(m,k+bs,k,   rs,bs);
+      Block<MatrixType,Dynamic,Dynamic> A22(m,k+bs,k+bs,rs,rs);
+
+      Index ret;
+      if((ret=unblocked(A11))>=0) return k+ret;
+      if(rs>0) A11.adjoint().template triangularView<Upper>().template solveInPlace<OnTheRight>(A21);
+      if(rs>0) A22.template selfadjointView<Lower>().rankUpdate(A21,typename NumTraits<RealScalar>::Literal(-1)); // bottleneck
+    }
+    return -1;
+  }
+
+  template<typename MatrixType, typename VectorType>
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  {
+    return Eigen::internal::llt_rank_update_lower(mat, vec, sigma);
+  }
+};
+
+template<typename Scalar> struct llt_inplace<Scalar, Upper>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+
+  template<typename MatrixType>
+  static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat)
+  {
+    Transpose<MatrixType> matt(mat);
+    return llt_inplace<Scalar, Lower>::unblocked(matt);
+  }
+  template<typename MatrixType>
+  static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat)
+  {
+    Transpose<MatrixType> matt(mat);
+    return llt_inplace<Scalar, Lower>::blocked(matt);
+  }
+  template<typename MatrixType, typename VectorType>
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma)
+  {
+    Transpose<MatrixType> matt(mat);
+    return llt_inplace<Scalar, Lower>::rankUpdate(matt, vec.conjugate(), sigma);
+  }
+};
+
+template<typename MatrixType> struct LLT_Traits<MatrixType,Lower>
+{
+  typedef const TriangularView<const MatrixType, Lower> MatrixL;
+  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Upper> MatrixU;
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); }
+  static bool inplace_decomposition(MatrixType& m)
+  { return llt_inplace<typename MatrixType::Scalar, Lower>::blocked(m)==-1; }
+};
+
+template<typename MatrixType> struct LLT_Traits<MatrixType,Upper>
+{
+  typedef const TriangularView<const typename MatrixType::AdjointReturnType, Lower> MatrixL;
+  typedef const TriangularView<const MatrixType, Upper> MatrixU;
+  static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); }
+  static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); }
+  static bool inplace_decomposition(MatrixType& m)
+  { return llt_inplace<typename MatrixType::Scalar, Upper>::blocked(m)==-1; }
+};
+
+} // end namespace internal
+
+/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix
+  *
+  * \returns a reference to *this
+  *
+  * Example: \include TutorialLinAlgComputeTwice.cpp
+  * Output: \verbinclude TutorialLinAlgComputeTwice.out
+  */
+template<typename MatrixType, int _UpLo>
+template<typename InputType>
+LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>& a)
+{
+  check_template_parameters();
+
+  eigen_assert(a.rows()==a.cols());
+  const Index size = a.rows();
+  m_matrix.resize(size, size);
+  if (!internal::is_same_dense(m_matrix, a.derived()))
+    m_matrix = a.derived();
+
+  // Compute matrix L1 norm = max abs column sum.
+  m_l1_norm = RealScalar(0);
+  // TODO move this code to SelfAdjointView
+  for (Index col = 0; col < size; ++col) {
+    RealScalar abs_col_sum;
+    if (_UpLo == Lower)
+      abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>();
+    else
+      abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>();
+    if (abs_col_sum > m_l1_norm)
+      m_l1_norm = abs_col_sum;
+  }
+
+  m_isInitialized = true;
+  bool ok = Traits::inplace_decomposition(m_matrix);
+  m_info = ok ? Success : NumericalIssue;
+
+  return *this;
+}
+
+/** Performs a rank one update (or dowdate) of the current decomposition.
+  * If A = LL^* before the rank one update,
+  * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector
+  * of same dimension.
+  */
+template<typename _MatrixType, int _UpLo>
+template<typename VectorType>
+LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType);
+  eigen_assert(v.size()==m_matrix.cols());
+  eigen_assert(m_isInitialized);
+  if(internal::llt_inplace<typename MatrixType::Scalar, UpLo>::rankUpdate(m_matrix,v,sigma)>=0)
+    m_info = NumericalIssue;
+  else
+    m_info = Success;
+
+  return *this;
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType,int _UpLo>
+template<typename RhsType, typename DstType>
+void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  dst = rhs;
+  solveInPlace(dst);
+}
+#endif
+
+/** \internal use x = llt_object.solve(x);
+  *
+  * This is the \em in-place version of solve().
+  *
+  * \param bAndX represents both the right-hand side matrix b and result x.
+  *
+  * This version avoids a copy when the right hand side matrix b is not needed anymore.
+  *
+  * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+  * This function will const_cast it, so constness isn't honored here.
+  *
+  * \sa LLT::solve(), MatrixBase::llt()
+  */
+template<typename MatrixType, int _UpLo>
+template<typename Derived>
+void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
+{
+  eigen_assert(m_isInitialized && "LLT is not initialized.");
+  eigen_assert(m_matrix.rows()==bAndX.rows());
+  matrixL().solveInPlace(bAndX);
+  matrixU().solveInPlace(bAndX);
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: L L^*.
+ * This function is provided for debug purpose. */
+template<typename MatrixType, int _UpLo>
+MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LLT is not initialized.");
+  return matrixL() * matrixL().adjoint().toDenseMatrix();
+}
+
+/** \cholesky_module
+  * \returns the LLT decomposition of \c *this
+  * \sa SelfAdjointView::llt()
+  */
+template<typename Derived>
+inline const LLT<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::llt() const
+{
+  return LLT<PlainObject>(derived());
+}
+
+/** \cholesky_module
+  * \returns the LLT decomposition of \c *this
+  * \sa SelfAdjointView::llt()
+  */
+template<typename MatrixType, unsigned int UpLo>
+inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
+SelfAdjointView<MatrixType, UpLo>::llt() const
+{
+  return LLT<PlainObject,UpLo>(m_matrix);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_LLT_H
diff --git a/third-party/Eigen/src/Cholesky/LLT_LAPACKE.h b/third-party/Eigen/src/Cholesky/LLT_LAPACKE.h
new file mode 100644
index 00000000..bc6489e6
--- /dev/null
+++ b/third-party/Eigen/src/Cholesky/LLT_LAPACKE.h
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *     LLt decomposition based on LAPACKE_?potrf function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_LLT_LAPACKE_H
+#define EIGEN_LLT_LAPACKE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Scalar> struct lapacke_llt;
+
+#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \
+template<> struct lapacke_llt<EIGTYPE> \
+{ \
+  template<typename MatrixType> \
+  static inline Index potrf(MatrixType& m, char uplo) \
+  { \
+    lapack_int matrix_order; \
+    lapack_int size, lda, info, StorageOrder; \
+    EIGTYPE* a; \
+    eigen_assert(m.rows()==m.cols()); \
+    /* Set up parameters for ?potrf */ \
+    size = convert_index<lapack_int>(m.rows()); \
+    StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \
+    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    a = &(m.coeffRef(0,0)); \
+    lda = convert_index<lapack_int>(m.outerStride()); \
+\
+    info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \
+    info = (info==0) ? -1 : info>0 ? info-1 : size; \
+    return info; \
+  } \
+}; \
+template<> struct llt_inplace<EIGTYPE, Lower> \
+{ \
+  template<typename MatrixType> \
+  static Index blocked(MatrixType& m) \
+  { \
+    return lapacke_llt<EIGTYPE>::potrf(m, 'L'); \
+  } \
+  template<typename MatrixType, typename VectorType> \
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \
+}; \
+template<> struct llt_inplace<EIGTYPE, Upper> \
+{ \
+  template<typename MatrixType> \
+  static Index blocked(MatrixType& m) \
+  { \
+    return lapacke_llt<EIGTYPE>::potrf(m, 'U'); \
+  } \
+  template<typename MatrixType, typename VectorType> \
+  static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \
+  { \
+    Transpose<MatrixType> matt(mat); \
+    return llt_inplace<EIGTYPE, Lower>::rankUpdate(matt, vec.conjugate(), sigma); \
+  } \
+};
+
+EIGEN_LAPACKE_LLT(double, double, d)
+EIGEN_LAPACKE_LLT(float, float, s)
+EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_LLT_LAPACKE_H
diff --git a/third-party/Eigen/src/Core/Array.h b/third-party/Eigen/src/Core/Array.h
new file mode 100644
index 00000000..16770fc7
--- /dev/null
+++ b/third-party/Eigen/src/Core/Array.h
@@ -0,0 +1,329 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAY_H
+#define EIGEN_ARRAY_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > : traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+  typedef ArrayXpr XprKind;
+  typedef ArrayBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> > XprBase;
+};
+}
+
+/** \class Array
+  * \ingroup Core_Module
+  *
+  * \brief General-purpose arrays with easy API for coefficient-wise operations
+  *
+  * The %Array class is very similar to the Matrix class. It provides
+  * general-purpose one- and two-dimensional arrays. The difference between the
+  * %Array and the %Matrix class is primarily in the API: the API for the
+  * %Array class provides easy access to coefficient-wise operations, while the
+  * API for the %Matrix class provides easy access to linear-algebra
+  * operations.
+  *
+  * See documentation of class Matrix for detailed information on the template parameters
+  * storage layout.
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN.
+  *
+  * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Array
+  : public PlainObjectBase<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+  public:
+
+    typedef PlainObjectBase<Array> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Array)
+
+    enum { Options = _Options };
+    typedef typename Base::PlainObject PlainObject;
+
+  protected:
+    template <typename Derived, typename OtherDerived, bool IsVector>
+    friend struct internal::conservative_resize_like_impl;
+
+    using Base::m_storage;
+
+  public:
+
+    using Base::base;
+    using Base::coeff;
+    using Base::coeffRef;
+
+    /**
+      * The usage of
+      *   using Base::operator=;
+      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
+      * the usage of 'using'. This should be done only for operator=.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
+    {
+      return Base::operator=(other);
+    }
+
+    /** Set all the entries to \a value.
+      * \sa DenseBase::setConstant(), DenseBase::fill()
+      */
+    /* This overload is needed because the usage of
+      *   using Base::operator=;
+      * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped
+      * the usage of 'using'. This should be done only for operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(const Scalar &value)
+    {
+      Base::setConstant(value);
+      return *this;
+    }
+
+    /** Copies the value of the expression \a other into \c *this with automatic resizing.
+      *
+      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+      * it will be initialized.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(const DenseBase<OtherDerived>& other)
+    {
+      return Base::_set(other);
+    }
+
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array& operator=(const Array& other)
+    {
+      return Base::_set(other);
+    }
+    
+    /** Default constructor.
+      *
+      * For fixed-size matrices, does nothing.
+      *
+      * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+      * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+      * a matrix to 0 is not supported.
+      *
+      * \sa resize(Index,Index)
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array() : Base()
+    {
+      Base::_check_template_params();
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    // FIXME is it still needed ??
+    /** \internal */
+    EIGEN_DEVICE_FUNC
+    Array(internal::constructor_without_unaligned_array_assert)
+      : Base(internal::constructor_without_unaligned_array_assert())
+    {
+      Base::_check_template_params();
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+#endif
+
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+      : Base(std::move(other))
+    {
+      Base::_check_template_params();
+    }
+    EIGEN_DEVICE_FUNC
+    Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+    {
+      other.swap(*this);
+      return *this;
+    }
+#endif
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE explicit Array(const T& x)
+    {
+      Base::_check_template_params();
+      Base::template _init1<T>(x);
+    }
+
+    template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
+    {
+      Base::_check_template_params();
+      this->template _init2<T0,T1>(val0, val1);
+    }
+    #else
+    /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */
+    EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
+    /** Constructs a vector or row-vector with given dimension. \only_for_vectors
+      *
+      * Note that this is only useful for dynamic-size vectors. For fixed-size vectors,
+      * it is redundant to pass the dimension here, so it makes more sense to use the default
+      * constructor Array() instead.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE explicit Array(Index dim);
+    /** constructs an initialized 1x1 Array with the given coefficient */
+    Array(const Scalar& value);
+    /** constructs an uninitialized array with \a rows rows and \a cols columns.
+      *
+      * This is useful for dynamic-size arrays. For fixed-size arrays,
+      * it is redundant to pass these parameters, so one should use the default constructor
+      * Array() instead. */
+    Array(Index rows, Index cols);
+    /** constructs an initialized 2D vector with given coefficients */
+    Array(const Scalar& val0, const Scalar& val1);
+    #endif
+
+    /** constructs an initialized 3D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
+    {
+      Base::_check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3)
+      m_storage.data()[0] = val0;
+      m_storage.data()[1] = val1;
+      m_storage.data()[2] = val2;
+    }
+    /** constructs an initialized 4D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
+    {
+      Base::_check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4)
+      m_storage.data()[0] = val0;
+      m_storage.data()[1] = val1;
+      m_storage.data()[2] = val2;
+      m_storage.data()[3] = val3;
+    }
+
+    /** Copy constructor */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const Array& other)
+            : Base(other)
+    { }
+
+  private:
+    struct PrivateType {};
+  public:
+
+    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
+                              typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
+                                                           PrivateType>::type = PrivateType())
+      : Base(other.derived())
+    { }
+
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+
+    #ifdef EIGEN_ARRAY_PLUGIN
+    #include EIGEN_ARRAY_PLUGIN
+    #endif
+
+  private:
+
+    template<typename MatrixType, typename OtherDerived, bool SwapPointers>
+    friend struct internal::matrix_swap_impl;
+};
+
+/** \defgroup arraytypedefs Global array typedefs
+  * \ingroup Core_Module
+  *
+  * Eigen defines several typedef shortcuts for most common 1D and 2D array types.
+  *
+  * The general patterns are the following:
+  *
+  * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
+  * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
+  * for complex double.
+  *
+  * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats.
+  *
+  * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is
+  * a fixed-size 1D array of 4 complex floats.
+  *
+  * \sa class Array
+  */
+
+#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)   \
+/** \ingroup arraytypedefs */                                    \
+typedef Array<Type, Size, Size> Array##SizeSuffix##SizeSuffix##TypeSuffix;  \
+/** \ingroup arraytypedefs */                                    \
+typedef Array<Type, Size, 1>    Array##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \
+/** \ingroup arraytypedefs */                                    \
+typedef Array<Type, Size, Dynamic> Array##Size##X##TypeSuffix;  \
+/** \ingroup arraytypedefs */                                    \
+typedef Array<Type, Dynamic, Size> Array##X##Size##TypeSuffix;
+
+#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int,                  i)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float,                f)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double,               d)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<float>,  cf)
+EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS
+
+#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE
+
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \
+using Eigen::Matrix##SizeSuffix##TypeSuffix; \
+using Eigen::Vector##SizeSuffix##TypeSuffix; \
+using Eigen::RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \
+
+#define EIGEN_USING_ARRAY_TYPEDEFS \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \
+EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd)
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAY_H
diff --git a/third-party/Eigen/src/Core/ArrayBase.h b/third-party/Eigen/src/Core/ArrayBase.h
new file mode 100644
index 00000000..33f644e2
--- /dev/null
+++ b/third-party/Eigen/src/Core/ArrayBase.h
@@ -0,0 +1,226 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAYBASE_H
+#define EIGEN_ARRAYBASE_H
+
+namespace Eigen { 
+
+template<typename ExpressionType> class MatrixWrapper;
+
+/** \class ArrayBase
+  * \ingroup Core_Module
+  *
+  * \brief Base class for all 1D and 2D array, and related expressions
+  *
+  * An array is similar to a dense vector or matrix. While matrices are mathematical
+  * objects with well defined linear algebra operators, an array is just a collection
+  * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence,
+  * all operations applied to an array are performed coefficient wise. Furthermore,
+  * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient
+  * constructors allowing to easily write generic code working for both scalar values
+  * and arrays.
+  *
+  * This class is the base that is inherited by all array expression types.
+  *
+  * \tparam Derived is the derived type, e.g., an array or an expression type.
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN.
+  *
+  * \sa class MatrixBase, \ref TopicClassHierarchy
+  */
+template<typename Derived> class ArrayBase
+  : public DenseBase<Derived>
+{
+  public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** The base class for a given storage type. */
+    typedef ArrayBase StorageBaseType;
+
+    typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl;
+
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    typedef DenseBase<Derived> Base;
+    using Base::RowsAtCompileTime;
+    using Base::ColsAtCompileTime;
+    using Base::SizeAtCompileTime;
+    using Base::MaxRowsAtCompileTime;
+    using Base::MaxColsAtCompileTime;
+    using Base::MaxSizeAtCompileTime;
+    using Base::IsVectorAtCompileTime;
+    using Base::Flags;
+    
+    using Base::derived;
+    using Base::const_cast_derived;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::coeff;
+    using Base::coeffRef;
+    using Base::lazyAssign;
+    using Base::operator=;
+    using Base::operator+=;
+    using Base::operator-=;
+    using Base::operator*=;
+    using Base::operator/=;
+
+    typedef typename Base::CoeffReturnType CoeffReturnType;
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename Base::PlainObject PlainObject;
+
+    /** \internal Represents a matrix with all coefficients equal to one another*/
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+#   include "../plugins/CommonCwiseUnaryOps.h"
+#   include "../plugins/MatrixCwiseUnaryOps.h"
+#   include "../plugins/ArrayCwiseUnaryOps.h"
+#   include "../plugins/CommonCwiseBinaryOps.h"
+#   include "../plugins/MatrixCwiseBinaryOps.h"
+#   include "../plugins/ArrayCwiseBinaryOps.h"
+#   ifdef EIGEN_ARRAYBASE_PLUGIN
+#     include EIGEN_ARRAYBASE_PLUGIN
+#   endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+
+    /** Special case of the template operator=, in order to prevent the compiler
+      * from generating a default operator= (issue hit with g++ 4.1)
+      */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const ArrayBase& other)
+    {
+      internal::call_assignment(derived(), other.derived());
+      return derived();
+    }
+    
+    /** Set all the entries to \a value.
+      * \sa DenseBase::setConstant(), DenseBase::fill() */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const Scalar &value)
+    { Base::setConstant(value); return derived(); }
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator+=(const Scalar& scalar);
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator-=(const Scalar& scalar);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator+=(const ArrayBase<OtherDerived>& other);
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator-=(const ArrayBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator*=(const ArrayBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator/=(const ArrayBase<OtherDerived>& other);
+
+  public:
+    EIGEN_DEVICE_FUNC
+    ArrayBase<Derived>& array() { return *this; }
+    EIGEN_DEVICE_FUNC
+    const ArrayBase<Derived>& array() const { return *this; }
+
+    /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
+      * \sa MatrixBase::array() */
+    EIGEN_DEVICE_FUNC
+    MatrixWrapper<Derived> matrix() { return MatrixWrapper<Derived>(derived()); }
+    EIGEN_DEVICE_FUNC
+    const MatrixWrapper<const Derived> matrix() const { return MatrixWrapper<const Derived>(derived()); }
+
+//     template<typename Dest>
+//     inline void evalTo(Dest& dst) const { dst = matrix(); }
+
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase)
+
+  private:
+    explicit ArrayBase(Index);
+    ArrayBase(Index,Index);
+    template<typename OtherDerived> explicit ArrayBase(const ArrayBase<OtherDerived>&);
+  protected:
+    // mixing arrays and matrices is not legal
+    template<typename OtherDerived> Derived& operator+=(const MatrixBase<OtherDerived>& )
+    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    // mixing arrays and matrices is not legal
+    template<typename OtherDerived> Derived& operator-=(const MatrixBase<OtherDerived>& )
+    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+
+/** replaces \c *this by \c *this - \a other.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
+{
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+/** replaces \c *this by \c *this + \a other.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
+{
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other coefficient wise.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
+{
+  call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+/** replaces \c *this by \c *this / \a other coefficient wise.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
+{
+  call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAYBASE_H
diff --git a/third-party/Eigen/src/Core/ArrayWrapper.h b/third-party/Eigen/src/Core/ArrayWrapper.h
new file mode 100644
index 00000000..688aadd6
--- /dev/null
+++ b/third-party/Eigen/src/Core/ArrayWrapper.h
@@ -0,0 +1,209 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARRAYWRAPPER_H
+#define EIGEN_ARRAYWRAPPER_H
+
+namespace Eigen { 
+
+/** \class ArrayWrapper
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a mathematical vector or matrix as an array object
+  *
+  * This class is the return type of MatrixBase::array(), and most of the time
+  * this is the only way it is use.
+  *
+  * \sa MatrixBase::array(), class MatrixWrapper
+  */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<ArrayWrapper<ExpressionType> >
+  : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+  typedef ArrayXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
+  };
+};
+}
+
+template<typename ExpressionType>
+class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
+{
+  public:
+    typedef ArrayBase<ArrayWrapper> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper)
+    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+
+    typedef typename internal::conditional<
+                       internal::is_lvalue<ExpressionType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
+    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+
+    using Base::coeffRef;
+
+    EIGEN_DEVICE_FUNC
+    explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const { return m_expression.innerStride(); }
+
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return m_expression.data(); }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      return m_expression.coeffRef(rowId, colId);
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return m_expression.coeffRef(index);
+    }
+
+    template<typename Dest>
+    EIGEN_DEVICE_FUNC
+    inline void evalTo(Dest& dst) const { dst = m_expression; }
+
+    const typename internal::remove_all<NestedExpressionType>::type& 
+    EIGEN_DEVICE_FUNC
+    nestedExpression() const 
+    {
+      return m_expression;
+    }
+
+    /** Forwards the resizing request to the nested expression
+      * \sa DenseBase::resize(Index)  */
+    EIGEN_DEVICE_FUNC
+    void resize(Index newSize) { m_expression.resize(newSize); }
+    /** Forwards the resizing request to the nested expression
+      * \sa DenseBase::resize(Index,Index)*/
+    EIGEN_DEVICE_FUNC
+    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
+
+  protected:
+    NestedExpressionType m_expression;
+};
+
+/** \class MatrixWrapper
+  * \ingroup Core_Module
+  *
+  * \brief Expression of an array as a mathematical vector or matrix
+  *
+  * This class is the return type of ArrayBase::matrix(), and most of the time
+  * this is the only way it is use.
+  *
+  * \sa MatrixBase::matrix(), class ArrayWrapper
+  */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<MatrixWrapper<ExpressionType> >
+ : public traits<typename remove_all<typename ExpressionType::Nested>::type >
+{
+  typedef MatrixXpr XprKind;
+  // Let's remove NestByRefBit
+  enum {
+    Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
+  };
+};
+}
+
+template<typename ExpressionType>
+class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
+{
+  public:
+    typedef MatrixBase<MatrixWrapper<ExpressionType> > Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper)
+    typedef typename internal::remove_all<ExpressionType>::type NestedExpression;
+
+    typedef typename internal::conditional<
+                       internal::is_lvalue<ExpressionType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
+    typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
+
+    using Base::coeffRef;
+
+    EIGEN_DEVICE_FUNC
+    explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const { return m_expression.innerStride(); }
+
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return m_expression.data(); }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      return m_expression.derived().coeffRef(rowId, colId);
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return m_expression.coeffRef(index);
+    }
+
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<NestedExpressionType>::type& 
+    nestedExpression() const 
+    {
+      return m_expression;
+    }
+
+    /** Forwards the resizing request to the nested expression
+      * \sa DenseBase::resize(Index)  */
+    EIGEN_DEVICE_FUNC
+    void resize(Index newSize) { m_expression.resize(newSize); }
+    /** Forwards the resizing request to the nested expression
+      * \sa DenseBase::resize(Index,Index)*/
+    EIGEN_DEVICE_FUNC
+    void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
+
+  protected:
+    NestedExpressionType m_expression;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_ARRAYWRAPPER_H
diff --git a/third-party/Eigen/src/Core/Assign.h b/third-party/Eigen/src/Core/Assign.h
new file mode 100644
index 00000000..53806ba3
--- /dev/null
+++ b/third-party/Eigen/src/Core/Assign.h
@@ -0,0 +1,90 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Michael Olbrich <michael.olbrich@gmx.net>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGN_H
+#define EIGEN_ASSIGN_H
+
+namespace Eigen {
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
+  ::lazyAssign(const DenseBase<OtherDerived>& other)
+{
+  enum{
+    SameType = internal::is_same<typename Derived::Scalar,typename OtherDerived::Scalar>::value
+  };
+
+  EIGEN_STATIC_ASSERT_LVALUE(Derived)
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+  eigen_assert(rows() == other.rows() && cols() == other.cols());
+  internal::call_assignment_no_alias(derived(),other.derived());
+  
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
+{
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
+{
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
+{
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
+{
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+  other.derived().evalTo(derived());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_H
diff --git a/third-party/Eigen/src/Core/AssignEvaluator.h b/third-party/Eigen/src/Core/AssignEvaluator.h
new file mode 100644
index 00000000..dbe435d8
--- /dev/null
+++ b/third-party/Eigen/src/Core/AssignEvaluator.h
@@ -0,0 +1,935 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+
+namespace Eigen {
+
+// This implementation is based on Assign.h
+
+namespace internal {
+  
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling       *
+***************************************************************************/
+
+// copy_using_evaluator_traits is based on assign_traits
+
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
+struct copy_using_evaluator_traits
+{
+  typedef typename DstEvaluator::XprType Dst;
+  typedef typename Dst::Scalar DstScalar;
+  
+  enum {
+    DstFlags = DstEvaluator::Flags,
+    SrcFlags = SrcEvaluator::Flags
+  };
+  
+public:
+  enum {
+    DstAlignment = DstEvaluator::Alignment,
+    SrcAlignment = SrcEvaluator::Alignment,
+    DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
+    JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
+  };
+
+private:
+  enum {
+    InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+              : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+              : int(Dst::RowsAtCompileTime),
+    InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+              : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+              : int(Dst::MaxRowsAtCompileTime),
+    OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
+    MaxSizeAtCompileTime = Dst::SizeAtCompileTime
+  };
+
+  // TODO distinguish between linear traversal and inner-traversals
+  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
+  typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+
+  enum {
+    LinearPacketSize = unpacket_traits<LinearPacketType>::size,
+    InnerPacketSize = unpacket_traits<InnerPacketType>::size
+  };
+
+public:
+  enum {
+    LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
+    InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
+  };
+
+private:
+  enum {
+    DstIsRowMajor = DstFlags&RowMajorBit,
+    SrcIsRowMajor = SrcFlags&RowMajorBit,
+    StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
+    MightVectorize = bool(StorageOrdersAgree)
+                  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
+                  && bool(functor_traits<AssignFunc>::PacketAccess),
+    MayInnerVectorize  = MightVectorize
+                       && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
+                       && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
+                       && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
+    MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
+    MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
+                       && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
+      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+         so it's only good for large enough sizes. */
+    MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
+                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
+      /* slice vectorization can be slow, so we only want it if the slices are big, which is
+         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+         in a fixed-size matrix
+         However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
+  };
+
+public:
+  enum {
+    Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
+              : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
+              : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
+              : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
+              : int(MayLinearize)        ? int(LinearTraversal)
+                                         : int(DefaultTraversal),
+    Vectorized = int(Traversal) == InnerVectorizedTraversal
+              || int(Traversal) == LinearVectorizedTraversal
+              || int(Traversal) == SliceVectorizedTraversal
+  };
+
+  typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
+
+private:
+  enum {
+    ActualPacketSize    = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
+                        : Vectorized ? InnerPacketSize
+                        : 1,
+    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
+    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
+                       && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
+    MayUnrollInner      = int(InnerSize) != Dynamic
+                       && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
+  };
+
+public:
+  enum {
+    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+                ? (
+                    int(MayUnrollCompletely) ? int(CompleteUnrolling)
+                  : int(MayUnrollInner)      ? int(InnerUnrolling)
+                                             : int(NoUnrolling)
+                  )
+              : int(Traversal) == int(LinearVectorizedTraversal)
+                ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
+                          ? int(CompleteUnrolling)
+                          : int(NoUnrolling) )
+              : int(Traversal) == int(LinearTraversal)
+                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
+                                              : int(NoUnrolling) )
+#if EIGEN_UNALIGNED_VECTORIZE
+              : int(Traversal) == int(SliceVectorizedTraversal)
+                ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
+                                         : int(NoUnrolling) )
+#endif
+              : int(NoUnrolling)
+  };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
+    std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
+    std::cerr.setf(std::ios::hex, std::ios::basefield);
+    std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
+    std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
+    std::cerr.unsetf(std::ios::hex);
+    EIGEN_DEBUG_VAR(DstAlignment)
+    EIGEN_DEBUG_VAR(SrcAlignment)
+    EIGEN_DEBUG_VAR(LinearRequiredAlignment)
+    EIGEN_DEBUG_VAR(InnerRequiredAlignment)
+    EIGEN_DEBUG_VAR(JointAlignment)
+    EIGEN_DEBUG_VAR(InnerSize)
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(LinearPacketSize)
+    EIGEN_DEBUG_VAR(InnerPacketSize)
+    EIGEN_DEBUG_VAR(ActualPacketSize)
+    EIGEN_DEBUG_VAR(StorageOrdersAgree)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearize)
+    EIGEN_DEBUG_VAR(MayInnerVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
+    EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(MayUnrollCompletely)
+    EIGEN_DEBUG_VAR(MayUnrollInner)
+    std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
+    std::cerr << std::endl;
+  }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
+  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime
+  };
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    kernel.assignCoeffByOuterInner(outer, inner);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+  }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+template<typename Kernel, int Index_, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+  {
+    kernel.assignCoeffByOuterInner(outer, Index_);
+    copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
+  }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
+  {
+    kernel.assignCoeff(Index);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
+  }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Kernel, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
+  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  typedef typename Kernel::PacketType PacketType;
+  
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime,
+    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+    DstAlignment = Kernel::AssignmentTraits::DstAlignment
+  };
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+    enum { NextIndex = Index + unpacket_traits<PacketType>::size };
+    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
+  }
+};
+
+template<typename Kernel, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
+};
+
+template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+  typedef typename Kernel::PacketType PacketType;
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
+  {
+    kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
+    enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
+    copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
+  }
+};
+
+template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
+struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+// dense_assignment_loop is based on assign_impl
+
+template<typename Kernel,
+         int Traversal = Kernel::AssignmentTraits::Traversal,
+         int Unrolling = Kernel::AssignmentTraits::Unrolling>
+struct dense_assignment_loop;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
+{
+  EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
+  {
+    for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
+      for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
+        kernel.assignCoeffByOuterInner(outer, inner);
+      }
+    }
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+
+    const Index outerSize = kernel.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
+  }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+
+// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
+// of the non vectorizable beginning and ending parts
+
+template <bool IsAligned = false>
+struct unaligned_dense_assignment_loop
+{
+  // if IsAligned = true, then do nothing
+  template <typename Kernel>
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
+};
+
+template <>
+struct unaligned_dense_assignment_loop<false>
+{
+  // MSVC must not inline this functions. If it does, it fails to optimize the
+  // packet access path.
+  // FIXME check which version exhibits this issue
+#if EIGEN_COMP_MSVC
+  template <typename Kernel>
+  static EIGEN_DONT_INLINE void run(Kernel &kernel,
+                                    Index start,
+                                    Index end)
+#else
+  template <typename Kernel>
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
+                                      Index start,
+                                      Index end)
+#endif
+  {
+    for (Index index = start; index < end; ++index)
+      kernel.assignCoeff(index);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    const Index size = kernel.size();
+    typedef typename Kernel::Scalar Scalar;
+    typedef typename Kernel::PacketType PacketType;
+    enum {
+      requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
+      packetSize = unpacket_traits<PacketType>::size,
+      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+      dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
+                                                            : int(Kernel::AssignmentTraits::DstAlignment),
+      srcAlignment = Kernel::AssignmentTraits::JointAlignment
+    };
+    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
+    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+    unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
+
+    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+      kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
+
+    unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    typedef typename Kernel::PacketType PacketType;
+    
+    enum { size = DstXprType::SizeAtCompileTime,
+           packetSize =unpacket_traits<PacketType>::size,
+           alignedSize = (size/packetSize)*packetSize };
+
+    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+  }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
+{
+  typedef typename Kernel::PacketType PacketType;
+  enum {
+    SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
+    DstAlignment = Kernel::AssignmentTraits::DstAlignment
+  };
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    const Index innerSize = kernel.innerSize();
+    const Index outerSize = kernel.outerSize();
+    const Index packetSize = unpacket_traits<PacketType>::size;
+    for(Index outer = 0; outer < outerSize; ++outer)
+      for(Index inner = 0; inner < innerSize; inner+=packetSize)
+        kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    typedef typename Kernel::AssignmentTraits Traits;
+    const Index outerSize = kernel.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
+                                                   Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
+  }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    const Index size = kernel.size();
+    for(Index i = 0; i < size; ++i)
+      kernel.assignCoeff(i);
+  }
+};
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+  }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::Scalar Scalar;
+    typedef typename Kernel::PacketType PacketType;
+    enum {
+      packetSize = unpacket_traits<PacketType>::size,
+      requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
+      alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
+      dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
+      dstAlignment = alignable ? int(requestedAlignment)
+                               : int(Kernel::AssignmentTraits::DstAlignment)
+    };
+    const Scalar *dst_ptr = kernel.dstDataPtr();
+    if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
+    {
+      // the pointer is not aligend-on scalar, so alignment is not possible
+      return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
+    }
+    const Index packetAlignedMask = packetSize - 1;
+    const Index innerSize = kernel.innerSize();
+    const Index outerSize = kernel.outerSize();
+    const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+    Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+
+    for(Index outer = 0; outer < outerSize; ++outer)
+    {
+      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      // do the non-vectorizable part of the assignment
+      for(Index inner = 0; inner<alignedStart ; ++inner)
+        kernel.assignCoeffByOuterInner(outer, inner);
+
+      // do the vectorizable part of the assignment
+      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
+        kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+
+      // do the non-vectorizable part of the assignment
+      for(Index inner = alignedEnd; inner<innerSize ; ++inner)
+        kernel.assignCoeffByOuterInner(outer, inner);
+
+      alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
+    }
+  }
+};
+
+#if EIGEN_UNALIGNED_VECTORIZE
+template<typename Kernel>
+struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
+  {
+    typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+    typedef typename Kernel::PacketType PacketType;
+
+    enum { size = DstXprType::InnerSizeAtCompileTime,
+           packetSize =unpacket_traits<PacketType>::size,
+           vectorizableSize = (size/packetSize)*packetSize };
+
+    for(Index outer = 0; outer < kernel.outerSize(); ++outer)
+    {
+      copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+    }
+  }
+};
+#endif
+
+
+/***************************************************************************
+* Part 4 : Generic dense assignment kernel
+***************************************************************************/
+
+// This class generalize the assignment of a coefficient (or packet) from one dense evaluator
+// to another dense writable evaluator.
+// It is parametrized by the two evaluators, and the actual assignment functor.
+// This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
+// One can customize the assignment using this generic dense_assignment_kernel with different
+// functors, or by completely overloading it, by-passing a functor.
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class generic_dense_assignment_kernel
+{
+protected:
+  typedef typename DstEvaluatorTypeT::XprType DstXprType;
+  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
+public:
+  
+  typedef DstEvaluatorTypeT DstEvaluatorType;
+  typedef SrcEvaluatorTypeT SrcEvaluatorType;
+  typedef typename DstEvaluatorType::Scalar Scalar;
+  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
+  typedef typename AssignmentTraits::PacketType PacketType;
+  
+  
+  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+    : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
+  {
+    #ifdef EIGEN_DEBUG_ASSIGN
+    AssignmentTraits::debug();
+    #endif
+  }
+  
+  EIGEN_DEVICE_FUNC Index size() const        { return m_dstExpr.size(); }
+  EIGEN_DEVICE_FUNC Index innerSize() const   { return m_dstExpr.innerSize(); }
+  EIGEN_DEVICE_FUNC Index outerSize() const   { return m_dstExpr.outerSize(); }
+  EIGEN_DEVICE_FUNC Index rows() const        { return m_dstExpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
+  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
+  
+  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
+  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
+  
+  /// Assign src(row,col) to dst(row,col) through the assignment functor.
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
+  {
+    m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
+  }
+  
+  /// \sa assignCoeff(Index,Index)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
+  {
+    m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
+  }
+  
+  /// \sa assignCoeff(Index,Index)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
+  {
+    Index row = rowIndexByOuterInner(outer, inner); 
+    Index col = colIndexByOuterInner(outer, inner); 
+    assignCoeff(row, col);
+  }
+  
+  
+  template<int StoreMode, int LoadMode, typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
+  {
+    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
+  }
+  
+  template<int StoreMode, int LoadMode, typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
+  {
+    m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
+  }
+  
+  template<int StoreMode, int LoadMode, typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
+  {
+    Index row = rowIndexByOuterInner(outer, inner); 
+    Index col = colIndexByOuterInner(outer, inner);
+    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
+  }
+  
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
+  {
+    typedef typename DstEvaluatorType::ExpressionTraits Traits;
+    return int(Traits::RowsAtCompileTime) == 1 ? 0
+      : int(Traits::ColsAtCompileTime) == 1 ? inner
+      : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
+      : inner;
+  }
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
+  {
+    typedef typename DstEvaluatorType::ExpressionTraits Traits;
+    return int(Traits::ColsAtCompileTime) == 1 ? 0
+      : int(Traits::RowsAtCompileTime) == 1 ? inner
+      : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
+      : outer;
+  }
+
+  EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
+  {
+    return m_dstExpr.data();
+  }
+  
+protected:
+  DstEvaluatorType& m_dst;
+  const SrcEvaluatorType& m_src;
+  const Functor &m_functor;
+  // TODO find a way to avoid the needs of the original expression
+  DstXprType& m_dstExpr;
+};
+
+/***************************************************************************
+* Part 5 : Entry point for dense rectangular assignment
+***************************************************************************/
+
+template<typename DstXprType,typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
+{
+  EIGEN_ONLY_USED_FOR_DEBUG(dst);
+  EIGEN_ONLY_USED_FOR_DEBUG(src);
+  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+}
+
+template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
+{
+  Index dstRows = src.rows();
+  Index dstCols = src.cols();
+  if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
+    dst.resize(dstRows, dstCols);
+  eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
+}
+
+template<typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+  typedef evaluator<DstXprType> DstEvaluatorType;
+  typedef evaluator<SrcXprType> SrcEvaluatorType;
+
+  SrcEvaluatorType srcEvaluator(src);
+
+  // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
+  // we need to resize the destination after the source evaluator has been created.
+  resize_if_allowed(dst, src, func);
+
+  DstEvaluatorType dstEvaluator(dst);
+    
+  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+  dense_assignment_loop<Kernel>::run(kernel);
+}
+
+template<typename DstXprType, typename SrcXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
+{
+  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
+}
+
+/***************************************************************************
+* Part 6 : Generic assignment
+***************************************************************************/
+
+// Based on the respective shapes of the destination and source,
+// the class AssignmentKind determine the kind of assignment mechanism.
+// AssignmentKind must define a Kind typedef.
+template<typename DstShape, typename SrcShape> struct AssignmentKind;
+
+// Assignement kind defined in this file:
+struct Dense2Dense {};
+struct EigenBase2EigenBase {};
+
+template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
+template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
+    
+// This is the main assignment class
+template< typename DstXprType, typename SrcXprType, typename Functor,
+          typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
+          typename EnableIf = void>
+struct Assignment;
+
+
+// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
+// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
+// So this intermediate function removes everything related to "assume-aliasing" such that Assignment
+// does not has to bother about these annoying details.
+
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src)
+{
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(const Dst& dst, const Src& src)
+{
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+                     
+// Deal with "assume-aliasing"
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+  typename plain_matrix_type<Src>::type tmp(src);
+  call_assignment_no_alias(dst, tmp, func);
+}
+
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
+{
+  call_assignment_no_alias(dst, src, func);
+}
+
+// by-pass "assume-aliasing"
+// When there is no aliasing, we require that 'dst' has been properly resized
+template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+{
+  call_assignment_no_alias(dst.expression(), src, func);
+}
+
+
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+{
+  enum {
+    NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
+                        || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
+                      ) && int(Dst::SizeAtCompileTime) != 1
+  };
+
+  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
+  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
+  ActualDstType actualDst(dst);
+  
+  // TODO check whether this is the right place to perform these checks:
+  EIGEN_STATIC_ASSERT_LVALUE(Dst)
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
+  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
+  
+  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias(Dst& dst, const Src& src)
+{
+  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
+{
+  // TODO check whether this is the right place to perform these checks:
+  EIGEN_STATIC_ASSERT_LVALUE(Dst)
+  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
+  EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+
+  Assignment<Dst,Src,Func>::run(dst, src, func);
+}
+template<typename Dst, typename Src>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
+{
+  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
+}
+
+// forward declaration
+template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
+
+// Generic Dense to Dense assignment
+// Note that the last template argument "Weak" is needed to make it possible to perform
+// both partial specialization+SFINAE without ambiguous specialization
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+#ifndef EIGEN_NO_DEBUG
+    internal::check_for_aliasing(dst, src);
+#endif
+    
+    call_dense_assignment_loop(dst, src, func);
+  }
+};
+
+// Generic assignment through evalTo.
+// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
+// Note that the last template argument "Weak" is needed to make it possible to perform
+// both partial specialization+SFINAE without ambiguous specialization
+template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
+struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    src.evalTo(dst);
+  }
+
+  // NOTE The following two functions are templated to avoid their instanciation if not needed
+  //      This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
+  template<typename SrcScalarType>
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    src.addTo(dst);
+  }
+
+  template<typename SrcScalarType>
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    src.subTo(dst);
+  }
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_EVALUATOR_H
diff --git a/third-party/Eigen/src/Core/Assign_MKL.h b/third-party/Eigen/src/Core/Assign_MKL.h
new file mode 100644
index 00000000..6866095b
--- /dev/null
+++ b/third-party/Eigen/src/Core/Assign_MKL.h
@@ -0,0 +1,178 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+ Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+ 
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_ASSIGN_VML_H
+#define EIGEN_ASSIGN_VML_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Dst, typename Src>
+class vml_assign_traits
+{
+  private:
+    enum {
+      DstHasDirectAccess = Dst::Flags & DirectAccessBit,
+      SrcHasDirectAccess = Src::Flags & DirectAccessBit,
+      StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
+      InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
+                : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
+                : int(Dst::RowsAtCompileTime),
+      InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
+                    : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
+                    : int(Dst::MaxRowsAtCompileTime),
+      MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
+
+      MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
+      MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
+      VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
+      LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
+    };
+  public:
+    enum {
+      EnableVml = MightEnableVml && LargeEnough,
+      Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
+    };
+};
+
+#define EIGEN_PP_EXPAND(ARG) ARG
+#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
+#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
+#else
+#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
+#endif
+
+#define EIGEN_VMLMODE_EXPAND__ 
+
+#define EIGEN_VMLMODE_PREFIX_LA vm
+#define EIGEN_VMLMODE_PREFIX__  v
+#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
+  template< typename DstXprType, typename SrcXprNested>                                                                         \
+  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
+                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
+    typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                       \
+      resize_if_allowed(dst, src, func);                                                                                        \
+      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
+      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
+        VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
+              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                           \
+      } else {                                                                                                                  \
+        const Index outerSize = dst.outerSize();                                                                                \
+        for(Index outer = 0; outer < outerSize; ++outer) {                                                                      \
+          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                             \
+                                                      &(src.nestedExpression().coeffRef(0, outer));                             \
+          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                           \
+          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr,                                                                      \
+                (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                             \
+        }                                                                                                                       \
+      }                                                                                                                         \
+    }                                                                                                                           \
+  };                                                                                                                            \
+
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                         \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE)           \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
+
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                                         \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
+  
+#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)                                                              \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                               \
+  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
+
+  
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin,   Sin,   LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin,  Asin,  LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh,  Sinh,  LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos,   Cos,   LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos,  Acos,  LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh,  Cosh,  LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan,   Tan,   LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan,  Atan,  LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh,  Tanh,  LA)
+// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp,   Exp,   LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log,   Ln,    LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt,  Sqrt,  _)
+
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr,   _)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg,      _)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round,  _)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor,  _)
+EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
+
+#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
+  template< typename DstXprType, typename SrcXprNested, typename Plain>                                                       \
+  struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                       \
+                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>,    \
+                   Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
+    typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
+                    const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
+    static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) {                     \
+      resize_if_allowed(dst, src, func);                                                                                      \
+      eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
+      VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \
+      if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
+      {                                                                                                                       \
+        VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent,                                                        \
+              (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                         \
+      } else {                                                                                                                \
+        const Index outerSize = dst.outerSize();                                                                              \
+        for(Index outer = 0; outer < outerSize; ++outer) {                                                                    \
+          const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) :                                        \
+                                                      &(src.lhs().coeffRef(0, outer));                                        \
+          EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                         \
+          VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent,                                                          \
+                 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                          \
+        }                                                                                                                     \
+      }                                                                                                                       \
+    }                                                                                                                         \
+  };
+  
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float,    float,         LA)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double,   double,        LA)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8,  LA)
+EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_ASSIGN_VML_H
diff --git a/third-party/Eigen/src/Core/BandMatrix.h b/third-party/Eigen/src/Core/BandMatrix.h
new file mode 100644
index 00000000..4978c914
--- /dev/null
+++ b/third-party/Eigen/src/Core/BandMatrix.h
@@ -0,0 +1,353 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BANDMATRIX_H
+#define EIGEN_BANDMATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived>
+class BandMatrixBase : public EigenBase<Derived>
+{
+  public:
+
+    enum {
+      Flags = internal::traits<Derived>::Flags,
+      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+      Supers = internal::traits<Derived>::Supers,
+      Subs   = internal::traits<Derived>::Subs,
+      Options = internal::traits<Derived>::Options
+    };
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> DenseMatrixType;
+    typedef typename DenseMatrixType::StorageIndex StorageIndex;
+    typedef typename internal::traits<Derived>::CoefficientsType CoefficientsType;
+    typedef EigenBase<Derived> Base;
+
+  protected:
+    enum {
+      DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic))
+                            ? 1 + Supers + Subs
+                            : Dynamic,
+      SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime)
+    };
+
+  public:
+    
+    using Base::derived;
+    using Base::rows;
+    using Base::cols;
+
+    /** \returns the number of super diagonals */
+    inline Index supers() const { return derived().supers(); }
+
+    /** \returns the number of sub diagonals */
+    inline Index subs() const { return derived().subs(); }
+    
+    /** \returns an expression of the underlying coefficient matrix */
+    inline const CoefficientsType& coeffs() const { return derived().coeffs(); }
+    
+    /** \returns an expression of the underlying coefficient matrix */
+    inline CoefficientsType& coeffs() { return derived().coeffs(); }
+
+    /** \returns a vector expression of the \a i -th column,
+      * only the meaningful part is returned.
+      * \warning the internal storage must be column major. */
+    inline Block<CoefficientsType,Dynamic,1> col(Index i)
+    {
+      EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
+      Index start = 0;
+      Index len = coeffs().rows();
+      if (i<=supers())
+      {
+        start = supers()-i;
+        len = (std::min)(rows(),std::max<Index>(0,coeffs().rows() - (supers()-i)));
+      }
+      else if (i>=rows()-subs())
+        len = std::max<Index>(0,coeffs().rows() - (i + 1 - rows() + subs()));
+      return Block<CoefficientsType,Dynamic,1>(coeffs(), start, i, len, 1);
+    }
+
+    /** \returns a vector expression of the main diagonal */
+    inline Block<CoefficientsType,1,SizeAtCompileTime> diagonal()
+    { return Block<CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+
+    /** \returns a vector expression of the main diagonal (const version) */
+    inline const Block<const CoefficientsType,1,SizeAtCompileTime> diagonal() const
+    { return Block<const CoefficientsType,1,SizeAtCompileTime>(coeffs(),supers(),0,1,(std::min)(rows(),cols())); }
+
+    template<int Index> struct DiagonalIntReturnType {
+      enum {
+        ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)),
+        Conjugate = ReturnOpposite && NumTraits<Scalar>::IsComplex,
+        ActualIndex = ReturnOpposite ? -Index : Index,
+        DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic)
+                     ? Dynamic
+                     : (ActualIndex<0
+                     ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex)
+                     : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex))
+      };
+      typedef Block<CoefficientsType,1, DiagonalSize> BuildType;
+      typedef typename internal::conditional<Conjugate,
+                 CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>,BuildType >,
+                 BuildType>::type Type;
+    };
+
+    /** \returns a vector expression of the \a N -th sub or super diagonal */
+    template<int N> inline typename DiagonalIntReturnType<N>::Type diagonal()
+    {
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+    }
+
+    /** \returns a vector expression of the \a N -th sub or super diagonal */
+    template<int N> inline const typename DiagonalIntReturnType<N>::Type diagonal() const
+    {
+      return typename DiagonalIntReturnType<N>::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N));
+    }
+
+    /** \returns a vector expression of the \a i -th sub or super diagonal */
+    inline Block<CoefficientsType,1,Dynamic> diagonal(Index i)
+    {
+      eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
+      return Block<CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
+    }
+
+    /** \returns a vector expression of the \a i -th sub or super diagonal */
+    inline const Block<const CoefficientsType,1,Dynamic> diagonal(Index i) const
+    {
+      eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers()));
+      return Block<const CoefficientsType,1,Dynamic>(coeffs(), supers()-i, std::max<Index>(0,i), 1, diagonalLength(i));
+    }
+    
+    template<typename Dest> inline void evalTo(Dest& dst) const
+    {
+      dst.resize(rows(),cols());
+      dst.setZero();
+      dst.diagonal() = diagonal();
+      for (Index i=1; i<=supers();++i)
+        dst.diagonal(i) = diagonal(i);
+      for (Index i=1; i<=subs();++i)
+        dst.diagonal(-i) = diagonal(-i);
+    }
+
+    DenseMatrixType toDenseMatrix() const
+    {
+      DenseMatrixType res(rows(),cols());
+      evalTo(res);
+      return res;
+    }
+
+  protected:
+
+    inline Index diagonalLength(Index i) const
+    { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); }
+};
+
+/**
+  * \class BandMatrix
+  * \ingroup Core_Module
+  *
+  * \brief Represents a rectangular matrix with a banded storage
+  *
+  * \tparam _Scalar Numeric type, i.e. float, double, int
+  * \tparam _Rows Number of rows, or \b Dynamic
+  * \tparam _Cols Number of columns, or \b Dynamic
+  * \tparam _Supers Number of super diagonal
+  * \tparam _Subs Number of sub diagonal
+  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint
+  *                  The former controls \ref TopicStorageOrders "storage order", and defaults to
+  *                  column-major. The latter controls whether the matrix represents a selfadjoint
+  *                  matrix in which case either Supers of Subs have to be null.
+  *
+  * \sa class TridiagonalMatrix
+  */
+
+template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
+struct traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef _Scalar Scalar;
+  typedef Dense StorageKind;
+  typedef Eigen::Index StorageIndex;
+  enum {
+    CoeffReadCost = NumTraits<Scalar>::ReadCost,
+    RowsAtCompileTime = _Rows,
+    ColsAtCompileTime = _Cols,
+    MaxRowsAtCompileTime = _Rows,
+    MaxColsAtCompileTime = _Cols,
+    Flags = LvalueBit,
+    Supers = _Supers,
+    Subs = _Subs,
+    Options = _Options,
+    DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
+  };
+  typedef Matrix<Scalar,DataRowsAtCompileTime,ColsAtCompileTime,Options&RowMajor?RowMajor:ColMajor> CoefficientsType;
+};
+
+template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
+class BandMatrix : public BandMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
+{
+  public:
+
+    typedef typename internal::traits<BandMatrix>::Scalar Scalar;
+    typedef typename internal::traits<BandMatrix>::StorageIndex StorageIndex;
+    typedef typename internal::traits<BandMatrix>::CoefficientsType CoefficientsType;
+
+    explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs)
+      : m_coeffs(1+supers+subs,cols),
+        m_rows(rows), m_supers(supers), m_subs(subs)
+    {
+    }
+
+    /** \returns the number of columns */
+    inline Index rows() const { return m_rows.value(); }
+
+    /** \returns the number of rows */
+    inline Index cols() const { return m_coeffs.cols(); }
+
+    /** \returns the number of super diagonals */
+    inline Index supers() const { return m_supers.value(); }
+
+    /** \returns the number of sub diagonals */
+    inline Index subs() const { return m_subs.value(); }
+
+    inline const CoefficientsType& coeffs() const { return m_coeffs; }
+    inline CoefficientsType& coeffs() { return m_coeffs; }
+
+  protected:
+
+    CoefficientsType m_coeffs;
+    internal::variable_if_dynamic<Index, Rows>   m_rows;
+    internal::variable_if_dynamic<Index, Supers> m_supers;
+    internal::variable_if_dynamic<Index, Subs>   m_subs;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+class BandMatrixWrapper;
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+struct traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef typename _CoefficientsType::Scalar Scalar;
+  typedef typename _CoefficientsType::StorageKind StorageKind;
+  typedef typename _CoefficientsType::StorageIndex StorageIndex;
+  enum {
+    CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost,
+    RowsAtCompileTime = _Rows,
+    ColsAtCompileTime = _Cols,
+    MaxRowsAtCompileTime = _Rows,
+    MaxColsAtCompileTime = _Cols,
+    Flags = LvalueBit,
+    Supers = _Supers,
+    Subs = _Subs,
+    Options = _Options,
+    DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic
+  };
+  typedef _CoefficientsType CoefficientsType;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+class BandMatrixWrapper : public BandMatrixBase<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  public:
+
+    typedef typename internal::traits<BandMatrixWrapper>::Scalar Scalar;
+    typedef typename internal::traits<BandMatrixWrapper>::CoefficientsType CoefficientsType;
+    typedef typename internal::traits<BandMatrixWrapper>::StorageIndex StorageIndex;
+
+    explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs)
+      : m_coeffs(coeffs),
+        m_rows(rows), m_supers(supers), m_subs(subs)
+    {
+      EIGEN_UNUSED_VARIABLE(cols);
+      //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows());
+    }
+
+    /** \returns the number of columns */
+    inline Index rows() const { return m_rows.value(); }
+
+    /** \returns the number of rows */
+    inline Index cols() const { return m_coeffs.cols(); }
+
+    /** \returns the number of super diagonals */
+    inline Index supers() const { return m_supers.value(); }
+
+    /** \returns the number of sub diagonals */
+    inline Index subs() const { return m_subs.value(); }
+
+    inline const CoefficientsType& coeffs() const { return m_coeffs; }
+
+  protected:
+
+    const CoefficientsType& m_coeffs;
+    internal::variable_if_dynamic<Index, _Rows>   m_rows;
+    internal::variable_if_dynamic<Index, _Supers> m_supers;
+    internal::variable_if_dynamic<Index, _Subs>   m_subs;
+};
+
+/**
+  * \class TridiagonalMatrix
+  * \ingroup Core_Module
+  *
+  * \brief Represents a tridiagonal matrix with a compact banded storage
+  *
+  * \tparam Scalar Numeric type, i.e. float, double, int
+  * \tparam Size Number of rows and cols, or \b Dynamic
+  * \tparam Options Can be 0 or \b SelfAdjoint
+  *
+  * \sa class BandMatrix
+  */
+template<typename Scalar, int Size, int Options>
+class TridiagonalMatrix : public BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor>
+{
+    typedef BandMatrix<Scalar,Size,Size,Options&SelfAdjoint?0:1,1,Options|RowMajor> Base;
+    typedef typename Base::StorageIndex StorageIndex;
+  public:
+    explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {}
+
+    inline typename Base::template DiagonalIntReturnType<1>::Type super()
+    { return Base::template diagonal<1>(); }
+    inline const typename Base::template DiagonalIntReturnType<1>::Type super() const
+    { return Base::template diagonal<1>(); }
+    inline typename Base::template DiagonalIntReturnType<-1>::Type sub()
+    { return Base::template diagonal<-1>(); }
+    inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const
+    { return Base::template diagonal<-1>(); }
+  protected:
+};
+
+
+struct BandShape {};
+
+template<typename _Scalar, int _Rows, int _Cols, int _Supers, int _Subs, int _Options>
+struct evaluator_traits<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+  : public evaluator_traits_base<BandMatrix<_Scalar,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef BandShape Shape;
+};
+
+template<typename _CoefficientsType,int _Rows, int _Cols, int _Supers, int _Subs,int _Options>
+struct evaluator_traits<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+  : public evaluator_traits_base<BandMatrixWrapper<_CoefficientsType,_Rows,_Cols,_Supers,_Subs,_Options> >
+{
+  typedef BandShape Shape;
+};
+
+template<> struct AssignmentKind<DenseShape,BandShape> { typedef EigenBase2EigenBase Kind; };
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BANDMATRIX_H
diff --git a/third-party/Eigen/src/Core/Block.h b/third-party/Eigen/src/Core/Block.h
new file mode 100644
index 00000000..11de45c2
--- /dev/null
+++ b/third-party/Eigen/src/Core/Block.h
@@ -0,0 +1,452 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BLOCK_H
+#define EIGEN_BLOCK_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprType>
+{
+  typedef typename traits<XprType>::Scalar Scalar;
+  typedef typename traits<XprType>::StorageKind StorageKind;
+  typedef typename traits<XprType>::XprKind XprKind;
+  typedef typename ref_selector<XprType>::type XprTypeNested;
+  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  enum{
+    MatrixRows = traits<XprType>::RowsAtCompileTime,
+    MatrixCols = traits<XprType>::ColsAtCompileTime,
+    RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows,
+    ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols,
+    MaxRowsAtCompileTime = BlockRows==0 ? 0
+                         : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime)
+                         : int(traits<XprType>::MaxRowsAtCompileTime),
+    MaxColsAtCompileTime = BlockCols==0 ? 0
+                         : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
+                         : int(traits<XprType>::MaxColsAtCompileTime),
+
+    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
+    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+               : XprTypeIsRowMajor,
+    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+    InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
+                             ? int(inner_stride_at_compile_time<XprType>::ret)
+                             : int(outer_stride_at_compile_time<XprType>::ret),
+    OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
+                             ? int(outer_stride_at_compile_time<XprType>::ret)
+                             : int(inner_stride_at_compile_time<XprType>::ret),
+
+    // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further
+    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+    Flags = (traits<XprType>::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit,
+    // FIXME DirectAccessBit should not be handled by expressions
+    // 
+    // Alignment is needed by MapBase's assertions
+    // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
+    Alignment = 0
+  };
+};
+
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
+         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class BlockImpl_dense;
+         
+} // end namespace internal
+
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, typename StorageKind> class BlockImpl;
+
+/** \class Block
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a fixed-size or dynamic-size block
+  *
+  * \tparam XprType the type of the expression in which we are taking a block
+  * \tparam BlockRows the number of rows of the block we are taking at compile time (optional)
+  * \tparam BlockCols the number of columns of the block we are taking at compile time (optional)
+  * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or
+  *         to set of columns of a column major matrix (optional). The parameter allows to determine
+  *         at compile time whether aligned access is possible on the block expression.
+  *
+  * This class represents an expression of either a fixed-size or dynamic-size block. It is the return
+  * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
+  * most of the time this is the only way it is used.
+  *
+  * However, if you want to directly maniputate block expressions,
+  * for instance if you want to write a function returning such an expression, you
+  * will need to use this class.
+  *
+  * Here is an example illustrating the dynamic case:
+  * \include class_Block.cpp
+  * Output: \verbinclude class_Block.out
+  *
+  * \note Even though this expression has dynamic size, in the case where \a XprType
+  * has fixed size, this expression inherits a fixed maximal size which means that evaluating
+  * it does not cause a dynamic memory allocation.
+  *
+  * Here is an example illustrating the fixed-size case:
+  * \include class_FixedBlock.cpp
+  * Output: \verbinclude class_FixedBlock.out
+  *
+  * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
+  */
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class Block
+  : public BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind>
+{
+    typedef BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, typename internal::traits<XprType>::StorageKind> Impl;
+  public:
+    //typedef typename Impl::Base Base;
+    typedef Impl Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
+    
+    typedef typename internal::remove_all<XprType>::type NestedExpression;
+  
+    /** Column or Row constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline Block(XprType& xpr, Index i) : Impl(xpr,i)
+    {
+      eigen_assert( (i>=0) && (
+          ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i<xpr.rows())
+        ||((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && i<xpr.cols())));
+    }
+
+    /** Fixed-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline Block(XprType& xpr, Index startRow, Index startCol)
+      : Impl(xpr, startRow, startCol)
+    {
+      EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
+      eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
+             && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
+    }
+
+    /** Dynamic-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline Block(XprType& xpr,
+          Index startRow, Index startCol,
+          Index blockRows, Index blockCols)
+      : Impl(xpr, startRow, startCol, blockRows, blockCols)
+    {
+      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
+          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
+      eigen_assert(startRow >= 0 && blockRows >= 0 && startRow  <= xpr.rows() - blockRows
+          && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols);
+    }
+};
+         
+// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense
+// that must be specialized for direct and non-direct access...
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
+  : public internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel>
+{
+    typedef internal::BlockImpl_dense<XprType, BlockRows, BlockCols, InnerPanel> Impl;
+    typedef typename XprType::StorageIndex StorageIndex;
+  public:
+    typedef Impl Base;
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {}
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
+      : Impl(xpr, startRow, startCol, blockRows, blockCols) {}
+};
+
+namespace internal {
+
+/** \internal Internal implementation of dense Blocks in the general case. */
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class BlockImpl_dense
+  : public internal::dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel> >::type
+{
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+  public:
+
+    typedef typename internal::dense_xpr_base<BlockType>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+
+    // class InnerIterator; // FIXME apparently never used
+
+    /** Column or Row constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr, Index i)
+      : m_xpr(xpr),
+        // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
+        // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1,
+        // all other cases are invalid.
+        // The case a 1x1 matrix seems ambiguous, but the result is the same anyway.
+        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
+        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+        m_blockCols(BlockCols==1 ? 1 : xpr.cols())
+    {}
+
+    /** Fixed-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+                    m_blockRows(BlockRows), m_blockCols(BlockCols)
+    {}
+
+    /** Dynamic-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr,
+          Index startRow, Index startCol,
+          Index blockRows, Index blockCols)
+      : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol),
+                    m_blockRows(blockRows), m_blockCols(blockCols)
+    {}
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index rowId, Index colId)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(XprType)
+      return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
+    {
+      return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index index)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(XprType)
+      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                            m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const CoeffReturnType coeff(Index index) const
+    {
+      return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+                         m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+    }
+
+    template<int LoadMode>
+    inline PacketScalar packet(Index rowId, Index colId) const
+    {
+      return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    {
+      m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
+    }
+
+    template<int LoadMode>
+    inline PacketScalar packet(Index index) const
+    {
+      return m_xpr.template packet<Unaligned>
+              (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+               m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& val)
+    {
+      m_xpr.template writePacket<Unaligned>
+         (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
+          m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
+    }
+
+    #ifdef EIGEN_PARSED_BY_DOXYGEN
+    /** \sa MapBase::data() */
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const;
+    EIGEN_DEVICE_FUNC inline Index innerStride() const;
+    EIGEN_DEVICE_FUNC inline Index outerStride() const;
+    #endif
+
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+    { 
+      return m_xpr; 
+    }
+
+    EIGEN_DEVICE_FUNC
+    XprType& nestedExpression() { return m_xpr; }
+      
+    EIGEN_DEVICE_FUNC
+    StorageIndex startRow() const
+    { 
+      return m_startRow.value(); 
+    }
+      
+    EIGEN_DEVICE_FUNC
+    StorageIndex startCol() const
+    { 
+      return m_startCol.value(); 
+    }
+
+  protected:
+
+    XprTypeNested m_xpr;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+    const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
+    const internal::variable_if_dynamic<StorageIndex, ColsAtCompileTime> m_blockCols;
+};
+
+/** \internal Internal implementation of dense Blocks in the direct access case.*/
+template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
+  : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel> >
+{
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    typedef typename internal::ref_selector<XprType>::non_const_type XprTypeNested;
+    enum {
+      XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0
+    };
+  public:
+
+    typedef MapBase<BlockType> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(BlockType)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense)
+
+    /** Column or Row constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr, Index i)
+      : Base(xpr.data() + i * (    ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) 
+                                || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()),
+             BlockRows==1 ? 1 : xpr.rows(),
+             BlockCols==1 ? 1 : xpr.cols()),
+        m_xpr(xpr),
+        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
+        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)
+    {
+      init();
+    }
+
+    /** Fixed-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)),
+        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+    {
+      init();
+    }
+
+    /** Dynamic-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr,
+          Index startRow, Index startCol,
+          Index blockRows, Index blockCols)
+      : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols),
+        m_xpr(xpr), m_startRow(startRow), m_startCol(startCol)
+    {
+      init();
+    }
+
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<XprTypeNested>::type& nestedExpression() const
+    { 
+      return m_xpr; 
+    }
+
+    EIGEN_DEVICE_FUNC
+    XprType& nestedExpression() { return m_xpr; }
+      
+    /** \sa MapBase::innerStride() */
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const
+    {
+      return internal::traits<BlockType>::HasSameStorageOrderAsXprType
+             ? m_xpr.innerStride()
+             : m_xpr.outerStride();
+    }
+
+    /** \sa MapBase::outerStride() */
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const
+    {
+      return m_outerStride;
+    }
+
+    EIGEN_DEVICE_FUNC
+    StorageIndex startRow() const
+    {
+      return m_startRow.value();
+    }
+
+    EIGEN_DEVICE_FUNC
+    StorageIndex startCol() const
+    {
+      return m_startCol.value();
+    }
+
+  #ifndef __SUNPRO_CC
+  // FIXME sunstudio is not friendly with the above friend...
+  // META-FIXME there is no 'friend' keyword around here. Is this obsolete?
+  protected:
+  #endif
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal used by allowAligned() */
+    EIGEN_DEVICE_FUNC
+    inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
+      : Base(data, blockRows, blockCols), m_xpr(xpr)
+    {
+      init();
+    }
+    #endif
+
+  protected:
+    EIGEN_DEVICE_FUNC
+    void init()
+    {
+      m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
+                    ? m_xpr.outerStride()
+                    : m_xpr.innerStride();
+    }
+
+    XprTypeNested m_xpr;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+    const internal::variable_if_dynamic<StorageIndex, (XprType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+    Index m_outerStride;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BLOCK_H
diff --git a/third-party/Eigen/src/Core/BooleanRedux.h b/third-party/Eigen/src/Core/BooleanRedux.h
new file mode 100644
index 00000000..6fd50f96
--- /dev/null
+++ b/third-party/Eigen/src/Core/BooleanRedux.h
@@ -0,0 +1,164 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ALLANDANY_H
+#define EIGEN_ALLANDANY_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived, int UnrollCount>
+struct all_unroller
+{
+  typedef typename Derived::ExpressionTraits Traits;
+  enum {
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
+  };
+
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
+  {
+    return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
+  }
+};
+
+template<typename Derived>
+struct all_unroller<Derived, 0>
+{
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &/*mat*/) { return true; }
+};
+
+template<typename Derived>
+struct all_unroller<Derived, Dynamic>
+{
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
+};
+
+template<typename Derived, int UnrollCount>
+struct any_unroller
+{
+  typedef typename Derived::ExpressionTraits Traits;
+  enum {
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
+  };
+  
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &mat)
+  {
+    return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
+  }
+};
+
+template<typename Derived>
+struct any_unroller<Derived, 0>
+{
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived & /*mat*/) { return false; }
+};
+
+template<typename Derived>
+struct any_unroller<Derived, Dynamic>
+{
+  EIGEN_DEVICE_FUNC static inline bool run(const Derived &) { return false; }
+};
+
+} // end namespace internal
+
+/** \returns true if all coefficients are true
+  *
+  * Example: \include MatrixBase_all.cpp
+  * Output: \verbinclude MatrixBase_all.out
+  *
+  * \sa any(), Cwise::operator<()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
+{
+  typedef internal::evaluator<Derived> Evaluator;
+  enum {
+    unroll = SizeAtCompileTime != Dynamic
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+  };
+  Evaluator evaluator(derived());
+  if(unroll)
+    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
+  else
+  {
+    for(Index j = 0; j < cols(); ++j)
+      for(Index i = 0; i < rows(); ++i)
+        if (!evaluator.coeff(i, j)) return false;
+    return true;
+  }
+}
+
+/** \returns true if at least one coefficient is true
+  *
+  * \sa all()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
+{
+  typedef internal::evaluator<Derived> Evaluator;
+  enum {
+    unroll = SizeAtCompileTime != Dynamic
+          && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
+  };
+  Evaluator evaluator(derived());
+  if(unroll)
+    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
+  else
+  {
+    for(Index j = 0; j < cols(); ++j)
+      for(Index i = 0; i < rows(); ++i)
+        if (evaluator.coeff(i, j)) return true;
+    return false;
+  }
+}
+
+/** \returns the number of coefficients which evaluate to true
+  *
+  * \sa all(), any()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
+{
+  return derived().template cast<bool>().template cast<Index>().sum();
+}
+
+/** \returns true is \c *this contains at least one Not A Number (NaN).
+  *
+  * \sa allFinite()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
+  return derived().array().isNaN().any();
+#else
+  return !((derived().array()==derived().array()).all());
+#endif
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+  *
+  * \sa hasNaN()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::allFinite() const
+{
+#if EIGEN_COMP_MSVC || (defined __FAST_MATH__)
+  return derived().array().isFinite().all();
+#else
+  return !((derived()-derived()).hasNaN());
+#endif
+}
+    
+} // end namespace Eigen
+
+#endif // EIGEN_ALLANDANY_H
diff --git a/third-party/Eigen/src/Core/CommaInitializer.h b/third-party/Eigen/src/Core/CommaInitializer.h
new file mode 100644
index 00000000..d218e981
--- /dev/null
+++ b/third-party/Eigen/src/Core/CommaInitializer.h
@@ -0,0 +1,160 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMMAINITIALIZER_H
+#define EIGEN_COMMAINITIALIZER_H
+
+namespace Eigen { 
+
+/** \class CommaInitializer
+  * \ingroup Core_Module
+  *
+  * \brief Helper class used by the comma initializer operator
+  *
+  * This class is internally used to implement the comma initializer feature. It is
+  * the return type of MatrixBase::operator<<, and most of the time this is the only
+  * way it is used.
+  *
+  * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished()
+  */
+template<typename XprType>
+struct CommaInitializer
+{
+  typedef typename XprType::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC
+  inline CommaInitializer(XprType& xpr, const Scalar& s)
+    : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
+  {
+    m_xpr.coeffRef(0,0) = s;
+  }
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
+  inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
+    : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
+  {
+    m_xpr.block(0, 0, other.rows(), other.cols()) = other;
+  }
+
+  /* Copy/Move constructor which transfers ownership. This is crucial in 
+   * absence of return value optimization to avoid assertions during destruction. */
+  // FIXME in C++11 mode this could be replaced by a proper RValue constructor
+  EIGEN_DEVICE_FUNC
+  inline CommaInitializer(const CommaInitializer& o)
+  : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) {
+    // Mark original object as finished. In absence of R-value references we need to const_cast:
+    const_cast<CommaInitializer&>(o).m_row = m_xpr.rows();
+    const_cast<CommaInitializer&>(o).m_col = m_xpr.cols();
+    const_cast<CommaInitializer&>(o).m_currentBlockRows = 0;
+  }
+
+  /* inserts a scalar value in the target matrix */
+  EIGEN_DEVICE_FUNC
+  CommaInitializer& operator,(const Scalar& s)
+  {
+    if (m_col==m_xpr.cols())
+    {
+      m_row+=m_currentBlockRows;
+      m_col = 0;
+      m_currentBlockRows = 1;
+      eigen_assert(m_row<m_xpr.rows()
+        && "Too many rows passed to comma initializer (operator<<)");
+    }
+    eigen_assert(m_col<m_xpr.cols()
+      && "Too many coefficients passed to comma initializer (operator<<)");
+    eigen_assert(m_currentBlockRows==1);
+    m_xpr.coeffRef(m_row, m_col++) = s;
+    return *this;
+  }
+
+  /* inserts a matrix expression in the target matrix */
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
+  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
+  {
+    if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows))
+    {
+      m_row+=m_currentBlockRows;
+      m_col = 0;
+      m_currentBlockRows = other.rows();
+      eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
+        && "Too many rows passed to comma initializer (operator<<)");
+    }
+    eigen_assert((m_col + other.cols() <= m_xpr.cols())
+      && "Too many coefficients passed to comma initializer (operator<<)");
+    eigen_assert(m_currentBlockRows==other.rows());
+    m_xpr.template block<OtherDerived::RowsAtCompileTime, OtherDerived::ColsAtCompileTime>
+                    (m_row, m_col, other.rows(), other.cols()) = other;
+    m_col += other.cols();
+    return *this;
+  }
+
+  EIGEN_DEVICE_FUNC
+  inline ~CommaInitializer()
+#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS
+  EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception)
+#endif
+  {
+      finished();
+  }
+
+  /** \returns the built matrix once all its coefficients have been set.
+    * Calling finished is 100% optional. Its purpose is to write expressions
+    * like this:
+    * \code
+    * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
+    * \endcode
+    */
+  EIGEN_DEVICE_FUNC
+  inline XprType& finished() {
+      eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0)
+           && m_col == m_xpr.cols()
+           && "Too few coefficients passed to comma initializer (operator<<)");
+      return m_xpr;
+  }
+
+  XprType& m_xpr;           // target expression
+  Index m_row;              // current row id
+  Index m_col;              // current col id
+  Index m_currentBlockRows; // current block height
+};
+
+/** \anchor MatrixBaseCommaInitRef
+  * Convenient operator to set the coefficients of a matrix.
+  *
+  * The coefficients must be provided in a row major order and exactly match
+  * the size of the matrix. Otherwise an assertion is raised.
+  *
+  * Example: \include MatrixBase_set.cpp
+  * Output: \verbinclude MatrixBase_set.out
+  * 
+  * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order.
+  *
+  * \sa CommaInitializer::finished(), class CommaInitializer
+  */
+template<typename Derived>
+inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
+{
+  return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);
+}
+
+/** \sa operator<<(const Scalar&) */
+template<typename Derived>
+template<typename OtherDerived>
+inline CommaInitializer<Derived>
+DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)
+{
+  return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMMAINITIALIZER_H
diff --git a/third-party/Eigen/src/Core/ConditionEstimator.h b/third-party/Eigen/src/Core/ConditionEstimator.h
new file mode 100644
index 00000000..51a2e5f1
--- /dev/null
+++ b/third-party/Eigen/src/Core/ConditionEstimator.h
@@ -0,0 +1,175 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CONDITIONESTIMATOR_H
+#define EIGEN_CONDITIONESTIMATOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename Vector, typename RealVector, bool IsComplex>
+struct rcond_compute_sign {
+  static inline Vector run(const Vector& v) {
+    const RealVector v_abs = v.cwiseAbs();
+    return (v_abs.array() == static_cast<typename Vector::RealScalar>(0))
+            .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs));
+  }
+};
+
+// Partial specialization to avoid elementwise division for real vectors.
+template <typename Vector>
+struct rcond_compute_sign<Vector, Vector, false> {
+  static inline Vector run(const Vector& v) {
+    return (v.array() < static_cast<typename Vector::RealScalar>(0))
+           .select(-Vector::Ones(v.size()), Vector::Ones(v.size()));
+  }
+};
+
+/**
+  * \returns an estimate of ||inv(matrix)||_1 given a decomposition of
+  * \a matrix that implements .solve() and .adjoint().solve() methods.
+  *
+  * This function implements Algorithms 4.1 and 5.1 from
+  *   http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf
+  * which also forms the basis for the condition number estimators in
+  * LAPACK. Since at most 10 calls to the solve method of dec are
+  * performed, the total cost is O(dims^2), as opposed to O(dims^3)
+  * needed to compute the inverse matrix explicitly.
+  *
+  * The most common usage is in estimating the condition number
+  * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be
+  * computed directly in O(n^2) operations.
+  *
+  * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and
+  * LLT.
+  *
+  * \sa FullPivLU, PartialPivLU, LDLT, LLT.
+  */
+template <typename Decomposition>
+typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec)
+{
+  typedef typename Decomposition::MatrixType MatrixType;
+  typedef typename Decomposition::Scalar Scalar;
+  typedef typename Decomposition::RealScalar RealScalar;
+  typedef typename internal::plain_col_type<MatrixType>::type Vector;
+  typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVector;
+  const bool is_complex = (NumTraits<Scalar>::IsComplex != 0);
+
+  eigen_assert(dec.rows() == dec.cols());
+  const Index n = dec.rows();
+  if (n == 0)
+    return 0;
+
+  // Disable Index to float conversion warning
+#ifdef __INTEL_COMPILER
+  #pragma warning push
+  #pragma warning ( disable : 2259 )
+#endif
+  Vector v = dec.solve(Vector::Ones(n) / Scalar(n));
+#ifdef __INTEL_COMPILER
+  #pragma warning pop
+#endif
+
+  // lower_bound is a lower bound on
+  //   ||inv(matrix)||_1  = sup_v ||inv(matrix) v||_1 / ||v||_1
+  // and is the objective maximized by the ("super-") gradient ascent
+  // algorithm below.
+  RealScalar lower_bound = v.template lpNorm<1>();
+  if (n == 1)
+    return lower_bound;
+
+  // Gradient ascent algorithm follows: We know that the optimum is achieved at
+  // one of the simplices v = e_i, so in each iteration we follow a
+  // super-gradient to move towards the optimal one.
+  RealScalar old_lower_bound = lower_bound;
+  Vector sign_vector(n);
+  Vector old_sign_vector;
+  Index v_max_abs_index = -1;
+  Index old_v_max_abs_index = v_max_abs_index;
+  for (int k = 0; k < 4; ++k)
+  {
+    sign_vector = internal::rcond_compute_sign<Vector, RealVector, is_complex>::run(v);
+    if (k > 0 && !is_complex && sign_vector == old_sign_vector) {
+      // Break if the solution stagnated.
+      break;
+    }
+    // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )|
+    v = dec.adjoint().solve(sign_vector);
+    v.real().cwiseAbs().maxCoeff(&v_max_abs_index);
+    if (v_max_abs_index == old_v_max_abs_index) {
+      // Break if the solution stagnated.
+      break;
+    }
+    // Move to the new simplex e_j, where j = v_max_abs_index.
+    v = dec.solve(Vector::Unit(n, v_max_abs_index));  // v = inv(matrix) * e_j.
+    lower_bound = v.template lpNorm<1>();
+    if (lower_bound <= old_lower_bound) {
+      // Break if the gradient step did not increase the lower_bound.
+      break;
+    }
+    if (!is_complex) {
+      old_sign_vector = sign_vector;
+    }
+    old_v_max_abs_index = v_max_abs_index;
+    old_lower_bound = lower_bound;
+  }
+  // The following calculates an independent estimate of ||matrix||_1 by
+  // multiplying matrix by a vector with entries of slowly increasing
+  // magnitude and alternating sign:
+  //   v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1.
+  // This improvement to Hager's algorithm above is due to Higham. It was
+  // added to make the algorithm more robust in certain corner cases where
+  // large elements in the matrix might otherwise escape detection due to
+  // exact cancellation (especially when op and op_adjoint correspond to a
+  // sequence of backsubstitutions and permutations), which could cause
+  // Hager's algorithm to vastly underestimate ||matrix||_1.
+  Scalar alternating_sign(RealScalar(1));
+  for (Index i = 0; i < n; ++i) {
+    // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates
+    v[i] = alternating_sign * static_cast<RealScalar>(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1))));
+    alternating_sign = -alternating_sign;
+  }
+  v = dec.solve(v);
+  const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n));
+  return numext::maxi(lower_bound, alternate_lower_bound);
+}
+
+/** \brief Reciprocal condition number estimator.
+  *
+  * Computing a decomposition of a dense matrix takes O(n^3) operations, while
+  * this method estimates the condition number quickly and reliably in O(n^2)
+  * operations.
+  *
+  * \returns an estimate of the reciprocal condition number
+  * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and
+  * its decomposition. Supports the following decompositions: FullPivLU,
+  * PartialPivLU, LDLT, and LLT.
+  *
+  * \sa FullPivLU, PartialPivLU, LDLT, LLT.
+  */
+template <typename Decomposition>
+typename Decomposition::RealScalar
+rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec)
+{
+  typedef typename Decomposition::RealScalar RealScalar;
+  eigen_assert(dec.rows() == dec.cols());
+  if (dec.rows() == 0)              return NumTraits<RealScalar>::infinity();
+  if (matrix_norm == RealScalar(0)) return RealScalar(0);
+  if (dec.rows() == 1)              return RealScalar(1);
+  const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
+  return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0)
+                                               : (RealScalar(1) / inverse_matrix_norm) / matrix_norm);
+}
+
+}  // namespace internal
+
+}  // namespace Eigen
+
+#endif
diff --git a/third-party/Eigen/src/Core/CoreEvaluators.h b/third-party/Eigen/src/Core/CoreEvaluators.h
new file mode 100644
index 00000000..910889ef
--- /dev/null
+++ b/third-party/Eigen/src/Core/CoreEvaluators.h
@@ -0,0 +1,1688 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_COREEVALUATORS_H
+#define EIGEN_COREEVALUATORS_H
+
+namespace Eigen {
+  
+namespace internal {
+
+// This class returns the evaluator kind from the expression storage kind.
+// Default assumes index based accessors
+template<typename StorageKind>
+struct storage_kind_to_evaluator_kind {
+  typedef IndexBased Kind;
+};
+
+// This class returns the evaluator shape from the expression storage kind.
+// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc.
+template<typename StorageKind> struct storage_kind_to_shape;
+
+template<> struct storage_kind_to_shape<Dense>                  { typedef DenseShape Shape;           };
+template<> struct storage_kind_to_shape<SolverStorage>          { typedef SolverShape Shape;           };
+template<> struct storage_kind_to_shape<PermutationStorage>     { typedef PermutationShape Shape;     };
+template<> struct storage_kind_to_shape<TranspositionsStorage>  { typedef TranspositionsShape Shape;  };
+
+// Evaluators have to be specialized with respect to various criteria such as:
+//  - storage/structure/shape
+//  - scalar type
+//  - etc.
+// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators.
+// We currently distinguish the following kind of evaluators:
+// - unary_evaluator    for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate)
+// - binary_evaluator   for expression taking two arguments (CwiseBinaryOp)
+// - ternary_evaluator   for expression taking three arguments (CwiseTernaryOp)
+// - product_evaluator  for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching.
+// - mapbase_evaluator  for Map, Block, Ref
+// - block_evaluator    for Block (special dispatching to a mapbase_evaluator or unary_evaluator)
+
+template< typename T,
+          typename Arg1Kind   = typename evaluator_traits<typename T::Arg1>::Kind,
+          typename Arg2Kind   = typename evaluator_traits<typename T::Arg2>::Kind,
+          typename Arg3Kind   = typename evaluator_traits<typename T::Arg3>::Kind,
+          typename Arg1Scalar = typename traits<typename T::Arg1>::Scalar,
+          typename Arg2Scalar = typename traits<typename T::Arg2>::Scalar,
+          typename Arg3Scalar = typename traits<typename T::Arg3>::Scalar> struct ternary_evaluator;
+
+template< typename T,
+          typename LhsKind   = typename evaluator_traits<typename T::Lhs>::Kind,
+          typename RhsKind   = typename evaluator_traits<typename T::Rhs>::Kind,
+          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+          typename RhsScalar = typename traits<typename T::Rhs>::Scalar> struct binary_evaluator;
+
+template< typename T,
+          typename Kind   = typename evaluator_traits<typename T::NestedExpression>::Kind,
+          typename Scalar = typename T::Scalar> struct unary_evaluator;
+          
+// evaluator_traits<T> contains traits for evaluator<T> 
+
+template<typename T>
+struct evaluator_traits_base
+{
+  // by default, get evaluator kind and shape from storage
+  typedef typename storage_kind_to_evaluator_kind<typename traits<T>::StorageKind>::Kind Kind;
+  typedef typename storage_kind_to_shape<typename traits<T>::StorageKind>::Shape Shape;
+};
+
+// Default evaluator traits
+template<typename T>
+struct evaluator_traits : public evaluator_traits_base<T>
+{
+};
+
+template<typename T, typename Shape = typename evaluator_traits<T>::Shape >
+struct evaluator_assume_aliasing {
+  static const bool value = false;
+};
+
+// By default, we assume a unary expression:
+template<typename T>
+struct evaluator : public unary_evaluator<T>
+{
+  typedef unary_evaluator<T> Base;
+  EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {}
+};
+
+
+// TODO: Think about const-correctness
+template<typename T>
+struct evaluator<const T>
+  : evaluator<T>
+{
+  EIGEN_DEVICE_FUNC
+  explicit evaluator(const T& xpr) : evaluator<T>(xpr) {}
+};
+
+// ---------- base class for all evaluators ----------
+
+template<typename ExpressionType>
+struct evaluator_base : public noncopyable
+{
+  // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
+  typedef traits<ExpressionType> ExpressionTraits;
+  
+  enum {
+    Alignment = 0
+  };
+};
+
+// -------------------- Matrix and Array --------------------
+//
+// evaluator<PlainObjectBase> is a common base class for the
+// Matrix and Array evaluators.
+// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,
+// so no need for more sophisticated dispatching.
+
+template<typename Derived>
+struct evaluator<PlainObjectBase<Derived> >
+  : evaluator_base<Derived>
+{
+  typedef PlainObjectBase<Derived> PlainObjectType;
+  typedef typename PlainObjectType::Scalar Scalar;
+  typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+
+  enum {
+    IsRowMajor = PlainObjectType::IsRowMajor,
+    IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime,
+    RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
+    ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
+    
+    CoeffReadCost = NumTraits<Scalar>::ReadCost,
+    Flags = traits<Derived>::EvaluatorFlags,
+    Alignment = traits<Derived>::Alignment
+  };
+  
+  EIGEN_DEVICE_FUNC evaluator()
+    : m_data(0),
+      m_outerStride(IsVectorAtCompileTime  ? 0 
+                                           : int(IsRowMajor) ? ColsAtCompileTime 
+                                           : RowsAtCompileTime)
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
+    : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) 
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    if (IsRowMajor)
+      return m_data[row * m_outerStride.value() + col];
+    else
+      return m_data[row + col * m_outerStride.value()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_data[index];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    if (IsRowMajor)
+      return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
+    else
+      return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return const_cast<Scalar*>(m_data)[index];
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    if (IsRowMajor)
+      return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
+    else
+      return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return ploadt<PacketType, LoadMode>(m_data + index);
+  }
+
+  template<int StoreMode,typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x)
+  {
+    if (IsRowMajor)
+      return pstoret<Scalar, PacketType, StoreMode>
+	            (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
+    else
+      return pstoret<Scalar, PacketType, StoreMode>
+                    (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x)
+  {
+    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
+  }
+
+protected:
+  const Scalar *m_data;
+
+  // We do not need to know the outer stride for vectors
+  variable_if_dynamic<Index, IsVectorAtCompileTime  ? 0 
+                                                    : int(IsRowMajor) ? ColsAtCompileTime 
+                                                    : RowsAtCompileTime> m_outerStride;
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+  typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+  
+  EIGEN_DEVICE_FUNC evaluator() {}
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+    : evaluator<PlainObjectBase<XprType> >(m) 
+  { }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+  typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+  EIGEN_DEVICE_FUNC evaluator() {}
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m)
+    : evaluator<PlainObjectBase<XprType> >(m) 
+  { }
+};
+
+// -------------------- Transpose --------------------
+
+template<typename ArgType>
+struct unary_evaluator<Transpose<ArgType>, IndexBased>
+  : evaluator_base<Transpose<ArgType> >
+{
+  typedef Transpose<ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,    
+    Flags = evaluator<ArgType>::Flags ^ RowMajorBit,
+    Alignment = evaluator<ArgType>::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(col, row);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(col, row);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  typename XprType::Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    return m_argImpl.template packet<LoadMode,PacketType>(col, row);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return m_argImpl.template packet<LoadMode,PacketType>(index);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x)
+  {
+    m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x)
+  {
+    m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
+  }
+
+protected:
+  evaluator<ArgType> m_argImpl;
+};
+
+// -------------------- CwiseNullaryOp --------------------
+// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator.
+// Likewise, there is not need to more sophisticated dispatching here.
+
+template<typename Scalar,typename NullaryOp,
+         bool has_nullary = has_nullary_operator<NullaryOp>::value,
+         bool has_unary   = has_unary_operator<NullaryOp>::value,
+         bool has_binary  = has_binary_operator<NullaryOp>::value>
+struct nullary_wrapper
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
+};
+
+// We need the following specialization for vector-only functors assigned to a runtime vector,
+// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd.
+// In this case, i==0 and j is used for the actual iteration.
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+    eigen_assert(i==0 || j==0);
+    return op(i+j);
+  }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+    eigen_assert(i==0 || j==0);
+    return op.template packetOp<T>(i+j);
+  }
+
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
+
+#if 0 && EIGEN_COMP_MSVC>0
+// Disable this ugly workaround. This is now handled in traits<Ref>::match,
+// but this piece of code might still become handly if some other weird compilation
+// erros pop up again.
+
+// MSVC exhibits a weird compilation error when
+// compiling:
+//    Eigen::MatrixXf A = MatrixXf::Random(3,3);
+//    Ref<const MatrixXf> R = 2.f*A;
+// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.
+// The "problem" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>
+// and at that time has_*ary_operator<T> returns true regardless of T.
+// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.
+// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),
+// and packet() are really instantiated as implemented below:
+
+// This is a simple wrapper around Index to enforce the re-instantiation of
+// has_*ary_operator when needed.
+template<typename T> struct nullary_wrapper_workaround_msvc {
+  nullary_wrapper_workaround_msvc(const T&);
+  operator T()const;
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
+  }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
+  }
+
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
+  }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
+  }
+};
+#endif // MSVC workaround
+
+template<typename NullaryOp, typename PlainObjectType>
+struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+  : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+{
+  typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+  typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
+  
+  enum {
+    CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
+    
+    Flags = (evaluator<PlainObjectTypeCleaned>::Flags
+          &  (  HereditaryBits
+              | (functor_has_linear_access<NullaryOp>::ret  ? LinearAccessBit : 0)
+              | (functor_traits<NullaryOp>::PacketAccess    ? PacketAccessBit : 0)))
+          | (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
+    Alignment = AlignedMax
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n)
+    : m_functor(n.functor()), m_wrapper()
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(IndexType row, IndexType col) const
+  {
+    return m_wrapper(m_functor, row, col);
+  }
+
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(IndexType index) const
+  {
+    return m_wrapper(m_functor,index);
+  }
+
+  template<int LoadMode, typename PacketType, typename IndexType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(IndexType row, IndexType col) const
+  {
+    return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
+  }
+
+  template<int LoadMode, typename PacketType, typename IndexType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(IndexType index) const
+  {
+    return m_wrapper.template packetOp<PacketType>(m_functor, index);
+  }
+
+protected:
+  const NullaryOp m_functor;
+  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
+};
+
+// -------------------- CwiseUnaryOp --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
+  : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> >
+{
+  typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    
+    Flags = evaluator<ArgType>::Flags
+          & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
+    Alignment = evaluator<ArgType>::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit unary_evaluator(const XprType& op)
+    : m_functor(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(m_argImpl.coeff(row, col));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(m_argImpl.coeff(index));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
+  }
+
+protected:
+  const UnaryOp m_functor;
+  evaluator<ArgType> m_argImpl;
+};
+
+// -------------------- CwiseTernaryOp --------------------
+
+// this is a ternary expression
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+  : public ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+  typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+  typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base;
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased, IndexBased>
+  : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >
+{
+  typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost,
+    
+    Arg1Flags = evaluator<Arg1>::Flags,
+    Arg2Flags = evaluator<Arg2>::Flags,
+    Arg3Flags = evaluator<Arg3>::Flags,
+    SameType = is_same<typename Arg1::Scalar,typename Arg2::Scalar>::value && is_same<typename Arg1::Scalar,typename Arg3::Scalar>::value,
+    StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit),
+    Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & (
+        HereditaryBits
+        | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) &
+           ( (StorageOrdersAgree ? LinearAccessBit : 0)
+           | (functor_traits<TernaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+           )
+        )
+     ),
+    Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit),
+    Alignment = EIGEN_PLAIN_ENUM_MIN(
+        EIGEN_PLAIN_ENUM_MIN(evaluator<Arg1>::Alignment, evaluator<Arg2>::Alignment),
+        evaluator<Arg3>::Alignment)
+  };
+
+  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
+    : m_functor(xpr.functor()),
+      m_arg1Impl(xpr.arg1()), 
+      m_arg2Impl(xpr.arg2()), 
+      m_arg3Impl(xpr.arg3())  
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
+                              m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
+                              m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
+                              m_arg2Impl.template packet<LoadMode,PacketType>(index),
+                              m_arg3Impl.template packet<LoadMode,PacketType>(index));
+  }
+
+protected:
+  const TernaryOp m_functor;
+  evaluator<Arg1> m_arg1Impl;
+  evaluator<Arg2> m_arg2Impl;
+  evaluator<Arg3> m_arg3Impl;
+};
+
+// -------------------- CwiseBinaryOp --------------------
+
+// this is a binary expression
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+  : public binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+  typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base;
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBased>
+  : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
+    
+    LhsFlags = evaluator<Lhs>::Flags,
+    RhsFlags = evaluator<Rhs>::Flags,
+    SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
+    StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
+    Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
+        HereditaryBits
+      | (int(LhsFlags) & int(RhsFlags) &
+           ( (StorageOrdersAgree ? LinearAccessBit : 0)
+           | (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
+           )
+        )
+     ),
+    Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
+    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
+  };
+
+  EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
+                              m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
+                              m_rhsImpl.template packet<LoadMode,PacketType>(index));
+  }
+
+protected:
+  const BinaryOp m_functor;
+  evaluator<Lhs> m_lhsImpl;
+  evaluator<Rhs> m_rhsImpl;
+};
+
+// -------------------- CwiseUnaryView --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
+  : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> >
+{
+  typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
+    
+    Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)),
+    
+    Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
+  };
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+    : m_unaryOp(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_unaryOp(m_argImpl.coeff(row, col));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_unaryOp(m_argImpl.coeff(index));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_unaryOp(m_argImpl.coeffRef(row, col));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return m_unaryOp(m_argImpl.coeffRef(index));
+  }
+
+protected:
+  const UnaryOp m_unaryOp;
+  evaluator<ArgType> m_argImpl;
+};
+
+// -------------------- Map --------------------
+
+// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
+// but that might complicate template specialization
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator;
+
+template<typename Derived, typename PlainObjectType>
+struct mapbase_evaluator : evaluator_base<Derived>
+{
+  typedef Derived  XprType;
+  typedef typename XprType::PointerType PointerType;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  
+  enum {
+    IsRowMajor = XprType::RowsAtCompileTime,
+    ColsAtCompileTime = XprType::ColsAtCompileTime,
+    CoeffReadCost = NumTraits<Scalar>::ReadCost
+  };
+
+  EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map)
+    : m_data(const_cast<PointerType>(map.data())),
+      m_innerStride(map.innerStride()),
+      m_outerStride(map.outerStride())
+  {
+    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
+                        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_data[col * colStride() + row * rowStride()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_data[index * m_innerStride.value()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_data[col * colStride() + row * rowStride()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return m_data[index * m_innerStride.value()];
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    PointerType ptr = m_data + row * rowStride() + col * colStride();
+    return internal::ploadt<PacketType, LoadMode>(ptr);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x)
+  {
+    PointerType ptr = m_data + row * rowStride() + col * colStride();
+    return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x)
+  {
+    internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
+  }
+protected:
+  EIGEN_DEVICE_FUNC
+  inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); }
+  EIGEN_DEVICE_FUNC
+  inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); }
+
+  PointerType m_data;
+  const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
+  const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
+};
+
+template<typename PlainObjectType, int MapOptions, typename StrideType> 
+struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
+  : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
+{
+  typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+  typedef typename XprType::Scalar Scalar;
+  // TODO: should check for smaller packet types once we can handle multi-sized packet types
+  typedef typename packet_traits<Scalar>::type PacketScalar;
+  
+  enum {
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                             ? int(PlainObjectType::InnerStrideAtCompileTime)
+                             : int(StrideType::InnerStrideAtCompileTime),
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                             ? int(PlainObjectType::OuterStrideAtCompileTime)
+                             : int(StrideType::OuterStrideAtCompileTime),
+    HasNoInnerStride = InnerStrideAtCompileTime == 1,
+    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
+    HasNoStride = HasNoInnerStride && HasNoOuterStride,
+    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
+    
+    PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit),
+    LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit),
+    Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask),
+    
+    Alignment = int(MapOptions)&int(AlignedMask)
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map)
+    : mapbase_evaluator<XprType, PlainObjectType>(map) 
+  { }
+};
+
+// -------------------- Ref --------------------
+
+template<typename PlainObjectType, int RefOptions, typename StrideType> 
+struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
+  : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
+{
+  typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
+  
+  enum {
+    Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags,
+    Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref)
+    : mapbase_evaluator<XprType, PlainObjectType>(ref) 
+  { }
+};
+
+// -------------------- Block --------------------
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
+         bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator;
+         
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
+struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+  : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+  typedef typename XprType::Scalar Scalar;
+  // TODO: should check for smaller packet types once we can handle multi-sized packet types
+  typedef typename packet_traits<Scalar>::type PacketScalar;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    RowsAtCompileTime = traits<XprType>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<XprType>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime,
+    
+    ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0,
+    IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
+               : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+               : ArgTypeIsRowMajor,
+    HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor),
+    InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+    InnerStrideAtCompileTime = HasSameStorageOrderAsArgType
+                             ? int(inner_stride_at_compile_time<ArgType>::ret)
+                             : int(outer_stride_at_compile_time<ArgType>::ret),
+    OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
+                             ? int(outer_stride_at_compile_time<ArgType>::ret)
+                             : int(inner_stride_at_compile_time<ArgType>::ret),
+    MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
+    
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,    
+    FlagsRowMajorBit = XprType::Flags&RowMajorBit,
+    Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
+                                           DirectAccessBit |
+                                           MaskPacketAccessBit),
+    Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
+    
+    PacketAlignment = unpacket_traits<PacketScalar>::alignment,
+    Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
+                             && (OuterStrideAtCompileTime!=0)
+                             && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
+  };
+  typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block)
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+};
+
+// no direct-access => dispatch to a unary evaluator
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAccess*/ false>
+  : unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+    : unary_evaluator<XprType>(block) 
+  {}
+};
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
+struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBased>
+  : evaluator_base<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
+    : m_argImpl(block.nestedExpression()), 
+      m_startRow(block.startRow()), 
+      m_startCol(block.startCol()),
+      m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
+  { }
+ 
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  enum {
+    RowsAtCompileTime = XprType::RowsAtCompileTime,
+    ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
+  };
+ 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  { 
+    return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); 
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  { 
+    if (ForwardLinearAccess)
+      return m_argImpl.coeff(m_linear_offset.value() + index); 
+    else
+      return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  { 
+    return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); 
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  { 
+    if (ForwardLinearAccess)
+      return m_argImpl.coeffRef(m_linear_offset.value() + index); 
+    else
+      return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+  }
+ 
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const 
+  { 
+    return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); 
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const 
+  { 
+    if (ForwardLinearAccess)
+      return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
+    else
+      return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+                                         RowsAtCompileTime == 1 ? index : 0);
+  }
+  
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x) 
+  {
+    return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); 
+  }
+  
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x) 
+  {
+    if (ForwardLinearAccess)
+      return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
+    else
+      return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+                                              RowsAtCompileTime == 1 ? index : 0,
+                                              x);
+  }
+ 
+protected:
+  evaluator<ArgType> m_argImpl;
+  const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
+  const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+  const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
+};
+
+// TODO: This evaluator does not actually use the child evaluator; 
+// all action is via the data() as returned by the Block expression.
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
+struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
+  : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
+                      typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
+  typedef typename XprType::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
+    : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) 
+  {
+    // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime
+    eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned");
+  }
+};
+
+
+// -------------------- Select --------------------
+// NOTE shall we introduce a ternary_evaluator?
+
+// TODO enable vectorization for Select
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+  : evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+{
+  typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+  enum {
+    CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+                  + EIGEN_PLAIN_ENUM_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
+                                         evaluator<ElseMatrixType>::CoeffReadCost),
+
+    Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits,
+    
+    Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
+    : m_conditionImpl(select.conditionMatrix()),
+      m_thenImpl(select.thenMatrix()),
+      m_elseImpl(select.elseMatrix())
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+ 
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    if (m_conditionImpl.coeff(row, col))
+      return m_thenImpl.coeff(row, col);
+    else
+      return m_elseImpl.coeff(row, col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    if (m_conditionImpl.coeff(index))
+      return m_thenImpl.coeff(index);
+    else
+      return m_elseImpl.coeff(index);
+  }
+ 
+protected:
+  evaluator<ConditionMatrixType> m_conditionImpl;
+  evaluator<ThenMatrixType> m_thenImpl;
+  evaluator<ElseMatrixType> m_elseImpl;
+};
+
+
+// -------------------- Replicate --------------------
+
+template<typename ArgType, int RowFactor, int ColFactor> 
+struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
+  : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> >
+{
+  typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  enum {
+    Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor
+  };
+  typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested;
+  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
+    LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0,
+    Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit),
+    
+    Alignment = evaluator<ArgTypeNestedCleaned>::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate)
+    : m_arg(replicate.nestedExpression()),
+      m_argImpl(m_arg),
+      m_rows(replicate.nestedExpression().rows()),
+      m_cols(replicate.nestedExpression().cols())
+  {}
+ 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    // try to avoid using modulo; this is a pure optimization strategy
+    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+                           : RowFactor==1 ? row
+                           : row % m_rows.value();
+    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+                           : ColFactor==1 ? col
+                           : col % m_cols.value();
+    
+    return m_argImpl.coeff(actual_row, actual_col);
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    // try to avoid using modulo; this is a pure optimization strategy
+    const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+                                  ? (ColFactor==1 ?  index : index%m_cols.value())
+                                  : (RowFactor==1 ?  index : index%m_rows.value());
+    
+    return m_argImpl.coeff(actual_index);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+                           : RowFactor==1 ? row
+                           : row % m_rows.value();
+    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+                           : ColFactor==1 ? col
+                           : col % m_cols.value();
+
+    return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col);
+  }
+  
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
+                                  ? (ColFactor==1 ?  index : index%m_cols.value())
+                                  : (RowFactor==1 ?  index : index%m_rows.value());
+
+    return m_argImpl.template packet<LoadMode,PacketType>(actual_index);
+  }
+ 
+protected:
+  const ArgTypeNested m_arg;
+  evaluator<ArgTypeNestedCleaned> m_argImpl;
+  const variable_if_dynamic<Index, ArgType::RowsAtCompileTime> m_rows;
+  const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols;
+};
+
+
+// -------------------- PartialReduxExpr --------------------
+
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+  typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
+  typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+  typedef typename ArgType::Scalar InputScalar;
+  typedef typename XprType::Scalar Scalar;
+  enum {
+    TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(ArgType::ColsAtCompileTime)
+  };
+  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
+  enum {
+    CoeffReadCost = TraversalSize==Dynamic ? HugeCost
+                  : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
+    
+    Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
+    
+    Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
+    : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value));
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  const Scalar coeff(Index i, Index j) const
+  {
+    if (Direction==Vertical)
+      return m_functor(m_arg.col(j));
+    else
+      return m_functor(m_arg.row(i));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  const Scalar coeff(Index index) const
+  {
+    if (Direction==Vertical)
+      return m_functor(m_arg.col(index));
+    else
+      return m_functor(m_arg.row(index));
+  }
+
+protected:
+  typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg;
+  const MemberOp m_functor;
+};
+
+
+// -------------------- MatrixWrapper and ArrayWrapper --------------------
+//
+// evaluator_wrapper_base<T> is a common base class for the
+// MatrixWrapper and ArrayWrapper evaluators.
+
+template<typename XprType>
+struct evaluator_wrapper_base
+  : evaluator_base<XprType>
+{
+  typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    Flags = evaluator<ArgType>::Flags,
+    Alignment = evaluator<ArgType>::Alignment
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+
+  typedef typename ArgType::Scalar Scalar;
+  typedef typename ArgType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(row, col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(row, col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    return m_argImpl.template packet<LoadMode,PacketType>(row, col);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    return m_argImpl.template packet<LoadMode,PacketType>(index);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(row, col, x);
+  }
+
+  template<int StoreMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(index, x);
+  }
+
+protected:
+  evaluator<ArgType> m_argImpl;
+};
+
+template<typename TArgType>
+struct unary_evaluator<MatrixWrapper<TArgType> >
+  : evaluator_wrapper_base<MatrixWrapper<TArgType> >
+{
+  typedef MatrixWrapper<TArgType> XprType;
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+    : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression())
+  { }
+};
+
+template<typename TArgType>
+struct unary_evaluator<ArrayWrapper<TArgType> >
+  : evaluator_wrapper_base<ArrayWrapper<TArgType> >
+{
+  typedef ArrayWrapper<TArgType> XprType;
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper)
+    : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression())
+  { }
+};
+
+
+// -------------------- Reverse --------------------
+
+// defined in Reverse.h:
+template<typename PacketType, bool ReversePacket> struct reverse_packet_cond;
+
+template<typename ArgType, int Direction>
+struct unary_evaluator<Reverse<ArgType, Direction> >
+  : evaluator_base<Reverse<ArgType, Direction> >
+{
+  typedef Reverse<ArgType, Direction> XprType;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  enum {
+    IsRowMajor = XprType::IsRowMajor,
+    IsColMajor = !IsRowMajor,
+    ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),
+    ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+    ReversePacket = (Direction == BothDirections)
+                    || ((Direction == Vertical)   && IsColMajor)
+                    || ((Direction == Horizontal) && IsRowMajor),
+                    
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    // let's enable LinearAccess only with vectorization because of the product overhead
+    // FIXME enable DirectAccess with negative strides?
+    Flags0 = evaluator<ArgType>::Flags,
+    LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
+                  || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1))
+                 ? LinearAccessBit : 0,
+
+    Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess),
+    
+    Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f.
+  };
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse)
+    : m_argImpl(reverse.nestedExpression()),
+      m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1),
+      m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
+  { }
+ 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
+                           ReverseCol ? m_cols.value() - col - 1 : col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
+                              ReverseCol ? m_cols.value() - col - 1 : col);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index row, Index col) const
+  {
+    enum {
+      PacketSize = unpacket_traits<PacketType>::size,
+      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
+      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1
+    };
+    typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+    return reverse_packet::run(m_argImpl.template packet<LoadMode,PacketType>(
+                                  ReverseRow ? m_rows.value() - row - OffsetRow : row,
+                                  ReverseCol ? m_cols.value() - col - OffsetCol : col));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  PacketType packet(Index index) const
+  {
+    enum { PacketSize = unpacket_traits<PacketType>::size };
+    return preverse(m_argImpl.template packet<LoadMode,PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index row, Index col, const PacketType& x)
+  {
+    // FIXME we could factorize some code with packet(i,j)
+    enum {
+      PacketSize = unpacket_traits<PacketType>::size,
+      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
+      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1
+    };
+    typedef internal::reverse_packet_cond<PacketType,ReversePacket> reverse_packet;
+    m_argImpl.template writePacket<LoadMode>(
+                                  ReverseRow ? m_rows.value() - row - OffsetRow : row,
+                                  ReverseCol ? m_cols.value() - col - OffsetCol : col,
+                                  reverse_packet::run(x));
+  }
+
+  template<int LoadMode, typename PacketType>
+  EIGEN_STRONG_INLINE
+  void writePacket(Index index, const PacketType& x)
+  {
+    enum { PacketSize = unpacket_traits<PacketType>::size };
+    m_argImpl.template writePacket<LoadMode>
+      (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
+  }
+ 
+protected:
+  evaluator<ArgType> m_argImpl;
+
+  // If we do not reverse rows, then we do not need to know the number of rows; same for columns
+  // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors.
+  const variable_if_dynamic<Index, ReverseRow ? ArgType::RowsAtCompileTime : 1> m_rows;
+  const variable_if_dynamic<Index, ReverseCol ? ArgType::ColsAtCompileTime : 1> m_cols;
+};
+
+
+// -------------------- Diagonal --------------------
+
+template<typename ArgType, int DiagIndex>
+struct evaluator<Diagonal<ArgType, DiagIndex> >
+  : evaluator_base<Diagonal<ArgType, DiagIndex> >
+{
+  typedef Diagonal<ArgType, DiagIndex> XprType;
+  
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
+    
+    Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit,
+    
+    Alignment = 0
+  };
+
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal)
+    : m_argImpl(diagonal.nestedExpression()),
+      m_index(diagonal.index())
+  { }
+ 
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index row, Index) const
+  {
+    return m_argImpl.coeff(row + rowOffset(), row + colOffset());
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index + rowOffset(), index + colOffset());
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index row, Index)
+  {
+    return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
+  }
+
+protected:
+  evaluator<ArgType> m_argImpl;
+  const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
+
+private:
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; }
+};
+
+
+//----------------------------------------------------------------------
+// deprecated code
+//----------------------------------------------------------------------
+
+// -------------------- EvalToTemp --------------------
+
+// expression class for evaluating nested expression to a temporary
+
+template<typename ArgType> class EvalToTemp;
+
+template<typename ArgType>
+struct traits<EvalToTemp<ArgType> >
+  : public traits<ArgType>
+{ };
+
+template<typename ArgType>
+class EvalToTemp
+  : public dense_xpr_base<EvalToTemp<ArgType> >::type
+{
+ public:
+ 
+  typedef typename dense_xpr_base<EvalToTemp>::type Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp)
+ 
+  explicit EvalToTemp(const ArgType& arg)
+    : m_arg(arg)
+  { }
+ 
+  const ArgType& arg() const
+  {
+    return m_arg;
+  }
+
+  Index rows() const 
+  {
+    return m_arg.rows();
+  }
+
+  Index cols() const 
+  {
+    return m_arg.cols();
+  }
+
+ private:
+  const ArgType& m_arg;
+};
+ 
+template<typename ArgType>
+struct evaluator<EvalToTemp<ArgType> >
+  : public evaluator<typename ArgType::PlainObject>
+{
+  typedef EvalToTemp<ArgType>                   XprType;
+  typedef typename ArgType::PlainObject         PlainObject;
+  typedef evaluator<PlainObject> Base;
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+    : m_result(xpr.arg())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+  }
+
+  // This constructor is used when nesting an EvalTo evaluator in another evaluator
+  EIGEN_DEVICE_FUNC evaluator(const ArgType& arg)
+    : m_result(arg)
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+  }
+
+protected:
+  PlainObject m_result;
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COREEVALUATORS_H
diff --git a/third-party/Eigen/src/Core/CoreIterators.h b/third-party/Eigen/src/Core/CoreIterators.h
new file mode 100644
index 00000000..4eb42b93
--- /dev/null
+++ b/third-party/Eigen/src/Core/CoreIterators.h
@@ -0,0 +1,127 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COREITERATORS_H
+#define EIGEN_COREITERATORS_H
+
+namespace Eigen { 
+
+/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core
+ */
+
+namespace internal {
+
+template<typename XprType, typename EvaluatorKind>
+class inner_iterator_selector;
+
+}
+
+/** \class InnerIterator
+  * \brief An InnerIterator allows to loop over the element of any matrix expression.
+  * 
+  * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed.
+  * 
+  * TODO: add a usage example
+  */
+template<typename XprType>
+class InnerIterator
+{
+protected:
+  typedef internal::inner_iterator_selector<XprType, typename internal::evaluator_traits<XprType>::Kind> IteratorType;
+  typedef internal::evaluator<XprType> EvaluatorType;
+  typedef typename internal::traits<XprType>::Scalar Scalar;
+public:
+  /** Construct an iterator over the \a outerId -th row or column of \a xpr */
+  InnerIterator(const XprType &xpr, const Index &outerId)
+    : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize())
+  {}
+  
+  /// \returns the value of the current coefficient.
+  EIGEN_STRONG_INLINE Scalar value() const          { return m_iter.value(); }
+  /** Increment the iterator \c *this to the next non-zero coefficient.
+    * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
+    */
+  EIGEN_STRONG_INLINE InnerIterator& operator++()   { m_iter.operator++(); return *this; }
+  /// \returns the column or row index of the current coefficient.
+  EIGEN_STRONG_INLINE Index index() const           { return m_iter.index(); }
+  /// \returns the row index of the current coefficient.
+  EIGEN_STRONG_INLINE Index row() const             { return m_iter.row(); }
+  /// \returns the column index of the current coefficient.
+  EIGEN_STRONG_INLINE Index col() const             { return m_iter.col(); }
+  /// \returns \c true if the iterator \c *this still references a valid coefficient.
+  EIGEN_STRONG_INLINE operator bool() const         { return m_iter; }
+  
+protected:
+  EvaluatorType m_eval;
+  IteratorType m_iter;
+private:
+  // If you get here, then you're not using the right InnerIterator type, e.g.:
+  //   SparseMatrix<double,RowMajor> A;
+  //   SparseMatrix<double>::InnerIterator it(A,0);
+  template<typename T> InnerIterator(const EigenBase<T>&,Index outer);
+};
+
+namespace internal {
+
+// Generic inner iterator implementation for dense objects
+template<typename XprType>
+class inner_iterator_selector<XprType, IndexBased>
+{
+protected:
+  typedef evaluator<XprType> EvaluatorType;
+  typedef typename traits<XprType>::Scalar Scalar;
+  enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit };
+  
+public:
+  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize)
+    : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize)
+  {}
+
+  EIGEN_STRONG_INLINE Scalar value() const
+  {
+    return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner)
+                        : m_eval.coeff(m_inner, m_outer);
+  }
+
+  EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; }
+
+  EIGEN_STRONG_INLINE Index index() const { return m_inner; }
+  inline Index row() const { return IsRowMajor ? m_outer : index(); }
+  inline Index col() const { return IsRowMajor ? index() : m_outer; }
+
+  EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; }
+
+protected:
+  const EvaluatorType& m_eval;
+  Index m_inner;
+  const Index m_outer;
+  const Index m_end;
+};
+
+// For iterator-based evaluator, inner-iterator is already implemented as
+// evaluator<>::InnerIterator
+template<typename XprType>
+class inner_iterator_selector<XprType, IteratorBased>
+ : public evaluator<XprType>::InnerIterator
+{
+protected:
+  typedef typename evaluator<XprType>::InnerIterator Base;
+  typedef evaluator<XprType> EvaluatorType;
+  
+public:
+  EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/)
+    : Base(eval, outerId)
+  {}  
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COREITERATORS_H
diff --git a/third-party/Eigen/src/Core/CwiseBinaryOp.h b/third-party/Eigen/src/Core/CwiseBinaryOp.h
new file mode 100644
index 00000000..a36765e3
--- /dev/null
+++ b/third-party/Eigen/src/Core/CwiseBinaryOp.h
@@ -0,0 +1,184 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_BINARY_OP_H
+#define EIGEN_CWISE_BINARY_OP_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+  // we must not inherit from traits<Lhs> since it has
+  // the potential to cause problems with MSVC
+  typedef typename remove_all<Lhs>::type Ancestor;
+  typedef typename traits<Ancestor>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+  };
+
+  // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor),
+  // we still want to handle the case when the result type is different.
+  typedef typename result_of<
+                     BinaryOp(
+                       const typename Lhs::Scalar&,
+                       const typename Rhs::Scalar&
+                     )
+                   >::type Scalar;
+  typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
+                                              typename traits<Rhs>::StorageKind,
+                                              BinaryOp>::ret StorageKind;
+  typedef typename promote_index_type<typename traits<Lhs>::StorageIndex,
+                                      typename traits<Rhs>::StorageIndex>::type StorageIndex;
+  typedef typename Lhs::Nested LhsNested;
+  typedef typename Rhs::Nested RhsNested;
+  typedef typename remove_reference<LhsNested>::type _LhsNested;
+  typedef typename remove_reference<RhsNested>::type _RhsNested;
+  enum {
+    Flags = cwise_promote_storage_order<typename traits<Lhs>::StorageKind,typename traits<Rhs>::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value
+  };
+};
+} // end namespace internal
+
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl;
+
+/** \class CwiseBinaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions
+  *
+  * \tparam BinaryOp template functor implementing the operator
+  * \tparam LhsType the type of the left-hand side
+  * \tparam RhsType the type of the right-hand side
+  *
+  * This class represents an expression  where a coefficient-wise binary operator is applied to two expressions.
+  * It is the return type of binary operators, by which we mean only those binary operators where
+  * both the left-hand side and the right-hand side are Eigen expressions.
+  * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp.
+  *
+  * Most of the time, this is the only way that it is used, so you typically don't have to name
+  * CwiseBinaryOp types explicitly.
+  *
+  * \sa MatrixBase::binaryExpr(const MatrixBase<OtherDerived> &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp
+  */
+template<typename BinaryOp, typename LhsType, typename RhsType>
+class CwiseBinaryOp : 
+  public CwiseBinaryOpImpl<
+          BinaryOp, LhsType, RhsType,
+          typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+                                                        typename internal::traits<RhsType>::StorageKind,
+                                                        BinaryOp>::ret>,
+  internal::no_assignment_operator
+{
+  public:
+    
+    typedef typename internal::remove_all<BinaryOp>::type Functor;
+    typedef typename internal::remove_all<LhsType>::type Lhs;
+    typedef typename internal::remove_all<RhsType>::type Rhs;
+
+    typedef typename CwiseBinaryOpImpl<
+        BinaryOp, LhsType, RhsType,
+        typename internal::cwise_promote_storage_type<typename internal::traits<LhsType>::StorageKind,
+                                                      typename internal::traits<Rhs>::StorageKind,
+                                                      BinaryOp>::ret>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp)
+
+    typedef typename internal::ref_selector<LhsType>::type LhsNested;
+    typedef typename internal::ref_selector<RhsType>::type RhsNested;
+    typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
+    typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
+      : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
+    {
+      EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar);
+      // require the sizes to match
+      EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs)
+      eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index rows() const {
+      // return the fixed size type if available to enable compile time optimizations
+      if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
+        return m_rhs.rows();
+      else
+        return m_lhs.rows();
+    }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index cols() const {
+      // return the fixed size type if available to enable compile time optimizations
+      if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
+        return m_rhs.cols();
+      else
+        return m_lhs.cols();
+    }
+
+    /** \returns the left hand side nested expression */
+    EIGEN_DEVICE_FUNC
+    const _LhsNested& lhs() const { return m_lhs; }
+    /** \returns the right hand side nested expression */
+    EIGEN_DEVICE_FUNC
+    const _RhsNested& rhs() const { return m_rhs; }
+    /** \returns the functor representing the binary operation */
+    EIGEN_DEVICE_FUNC
+    const BinaryOp& functor() const { return m_functor; }
+
+  protected:
+    LhsNested m_lhs;
+    RhsNested m_rhs;
+    const BinaryOp m_functor;
+};
+
+// Generic API dispatcher
+template<typename BinaryOp, typename Lhs, typename Rhs, typename StorageKind>
+class CwiseBinaryOpImpl
+  : public internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
+};
+
+/** replaces \c *this by \c *this - \a other.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
+{
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+/** replaces \c *this by \c *this + \a other.
+  *
+  * \returns a reference to \c *this
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_STRONG_INLINE Derived &
+MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
+{
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_BINARY_OP_H
+
diff --git a/third-party/Eigen/src/Core/CwiseNullaryOp.h b/third-party/Eigen/src/Core/CwiseNullaryOp.h
new file mode 100644
index 00000000..ddd607e3
--- /dev/null
+++ b/third-party/Eigen/src/Core/CwiseNullaryOp.h
@@ -0,0 +1,866 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_NULLARY_OP_H
+#define EIGEN_CWISE_NULLARY_OP_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename NullaryOp, typename PlainObjectType>
+struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
+{
+  enum {
+    Flags = traits<PlainObjectType>::Flags & RowMajorBit
+  };
+};
+
+} // namespace internal
+
+/** \class CwiseNullaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression of a matrix where all coefficients are defined by a functor
+  *
+  * \tparam NullaryOp template functor implementing the operator
+  * \tparam PlainObjectType the underlying plain matrix/array type
+  *
+  * This class represents an expression of a generic nullary operator.
+  * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods,
+  * and most of the time this is the only way it is used.
+  *
+  * However, if you want to write a function returning such an expression, you
+  * will need to use this class.
+  *
+  * The functor NullaryOp must expose one of the following method:
+    <table class="manual">
+    <tr            ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
+    <tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
+    <tr            ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
+    </table>
+  * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
+  *
+  * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
+  * C++11 random number generators.
+  *
+  * A nullary expression can also be used to implement custom sophisticated matrix manipulations
+  * that cannot be covered by the existing set of natively supported matrix manipulations.
+  * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations
+  * on the behavior of CwiseNullaryOp.
+  *
+  * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr
+  */
+template<typename NullaryOp, typename PlainObjectType>
+class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp, PlainObjectType> >::type, internal::no_assignment_operator
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
+
+    EIGEN_DEVICE_FUNC
+    CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp())
+      : m_rows(rows), m_cols(cols), m_functor(func)
+    {
+      eigen_assert(rows >= 0
+            && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+            &&  cols >= 0
+            && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
+
+    /** \returns the functor representing the nullary operation */
+    EIGEN_DEVICE_FUNC
+    const NullaryOp& functor() const { return m_functor; }
+
+  protected:
+    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+    const NullaryOp m_functor;
+};
+
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+  *
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this MatrixBase type.
+  *
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
+  * instead.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
+{
+  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
+}
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+  *
+  * The parameter \a size is the size of the returned vector.
+  * Must be compatible with this MatrixBase type.
+  *
+  * \only_for_vectors
+  *
+  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+  * it is redundant to pass \a size as argument, so Zero() should be used
+  * instead.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * Here is an example with C++11 random generators: \include random_cpp11.cpp
+  * Output: \verbinclude random_cpp11.out
+  * 
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
+  else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
+}
+
+/** \returns an expression of a matrix defined by a custom functor \a func
+  *
+  * This variant is only for fixed-size DenseBase types. For dynamic-size types, you
+  * need to use the variants taking size arguments.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+template<typename CustomNullaryOp>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
+{
+  return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
+}
+
+/** \returns an expression of a constant matrix of value \a value
+  *
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this DenseBase type.
+  *
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
+  * instead.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
+{
+  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
+}
+
+/** \returns an expression of a constant matrix of value \a value
+  *
+  * The parameter \a size is the size of the returned vector.
+  * Must be compatible with this DenseBase type.
+  *
+  * \only_for_vectors
+  *
+  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+  * it is redundant to pass \a size as argument, so Zero() should be used
+  * instead.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(Index size, const Scalar& value)
+{
+  return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
+}
+
+/** \returns an expression of a constant matrix of value \a value
+  *
+  * This variant is only for fixed-size DenseBase types. For dynamic-size types, you
+  * need to use the variants taking size arguments.
+  *
+  * The template parameter \a CustomNullaryOp is the type of the functor.
+  *
+  * \sa class CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Constant(const Scalar& value)
+{
+  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+  return DenseBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op<Scalar>(value));
+}
+
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&)
+  *
+  * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+
+/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&)
+  *
+  * \sa LinSpaced(Scalar,Scalar)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+
+/**
+  * \brief Sets a linearly spaced vector.
+  *
+  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  * When size is set to 1, a vector of length 1 containing 'high' is returned.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include DenseBase_LinSpaced.cpp
+  * Output: \verbinclude DenseBase_LinSpaced.out
+  *
+  * For integer scalar types, an even spacing is possible if and only if the length of the range,
+  * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the
+  * number of values \c high-low+1 (meaning each value can be repeated the same number of time).
+  * If one of these two considions is not satisfied, then \c high is lowered to the largest value
+  * satisfying one of this constraint.
+  * Here are some examples:
+  *
+  * Example: \include DenseBase_LinSpacedInt.cpp
+  * Output: \verbinclude DenseBase_LinSpacedInt.out
+  *
+  * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return DenseBase<Derived>::NullaryExpr(size, internal::linspaced_op<Scalar,PacketScalar>(low,high,size));
+}
+
+/**
+  * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&)
+  * Special version for fixed size types which does not require the size parameter.
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+  return DenseBase<Derived>::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op<Scalar,PacketScalar>(low,high,Derived::SizeAtCompileTime));
+}
+
+/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+  typename internal::nested_eval<Derived,1>::type self(derived());
+  for(Index j = 0; j < cols(); ++j)
+    for(Index i = 0; i < rows(); ++i)
+      if(!internal::isApprox(self.coeff(i, j), val, prec))
+        return false;
+  return true;
+}
+
+/** This is just an alias for isApproxToConstant().
+  *
+  * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
+(const Scalar& val, const RealScalar& prec) const
+{
+  return isApproxToConstant(val, prec);
+}
+
+/** Alias for setConstant(): sets all coefficients in this expression to \a val.
+  *
+  * \sa setConstant(), Constant(), class CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
+{
+  setConstant(val);
+}
+
+/** Sets all coefficients in this expression to value \a val.
+  *
+  * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
+{
+  return derived() = Constant(rows(), cols(), val);
+}
+
+/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setConstant_int.cpp
+  * Output: \verbinclude Matrix_setConstant_int.out
+  *
+  * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
+{
+  resize(size);
+  return setConstant(val);
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  * \param val the value to which all coefficients are set
+  *
+  * Example: \include Matrix_setConstant_int_int.cpp
+  * Output: \verbinclude Matrix_setConstant_int_int.out
+  *
+  * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
+{
+  resize(rows, cols);
+  return setConstant(val);
+}
+
+/**
+  * \brief Sets a linearly spaced vector.
+  *
+  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  * When size is set to 1, a vector of length 1 containing 'high' is returned.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include DenseBase_setLinSpaced.cpp
+  * Output: \verbinclude DenseBase_setLinSpaced.out
+  *
+  * For integer scalar types, do not miss the explanations on the definition
+  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+  *
+  * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
+}
+
+/**
+  * \brief Sets a linearly spaced vector.
+  *
+  * The function fills \c *this with equally spaced values in the closed interval [low,high].
+  * When size is set to 1, a vector of length 1 containing 'high' is returned.
+  *
+  * \only_for_vectors
+  *
+  * For integer scalar types, do not miss the explanations on the definition
+  * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink.
+  *
+  * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return setLinSpaced(size(), low, high);
+}
+
+// zero:
+
+/** \returns an expression of a zero matrix.
+  *
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this MatrixBase type.
+  *
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_zero_int_int.cpp
+  * Output: \verbinclude MatrixBase_zero_int_int.out
+  *
+  * \sa Zero(), Zero(Index)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index rows, Index cols)
+{
+  return Constant(rows, cols, Scalar(0));
+}
+
+/** \returns an expression of a zero vector.
+  *
+  * The parameter \a size is the size of the returned vector.
+  * Must be compatible with this MatrixBase type.
+  *
+  * \only_for_vectors
+  *
+  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+  * it is redundant to pass \a size as argument, so Zero() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_zero_int.cpp
+  * Output: \verbinclude MatrixBase_zero_int.out
+  *
+  * \sa Zero(), Zero(Index,Index)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero(Index size)
+{
+  return Constant(size, Scalar(0));
+}
+
+/** \returns an expression of a fixed-size zero matrix or vector.
+  *
+  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+  * need to use the variants taking size arguments.
+  *
+  * Example: \include MatrixBase_zero.cpp
+  * Output: \verbinclude MatrixBase_zero.out
+  *
+  * \sa Zero(Index), Zero(Index,Index)
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Zero()
+{
+  return Constant(Scalar(0));
+}
+
+/** \returns true if *this is approximately equal to the zero matrix,
+  *          within the precision given by \a prec.
+  *
+  * Example: \include MatrixBase_isZero.cpp
+  * Output: \verbinclude MatrixBase_isZero.out
+  *
+  * \sa class CwiseNullaryOp, Zero()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
+{
+  typename internal::nested_eval<Derived,1>::type self(derived());
+  for(Index j = 0; j < cols(); ++j)
+    for(Index i = 0; i < rows(); ++i)
+      if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<Scalar>(1), prec))
+        return false;
+  return true;
+}
+
+/** Sets all coefficients in this expression to zero.
+  *
+  * Example: \include MatrixBase_setZero.cpp
+  * Output: \verbinclude MatrixBase_setZero.out
+  *
+  * \sa class CwiseNullaryOp, Zero()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
+{
+  return setConstant(Scalar(0));
+}
+
+/** Resizes to the given \a size, and sets all coefficients in this expression to zero.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setZero_int.cpp
+  * Output: \verbinclude Matrix_setZero_int.out
+  *
+  * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index newSize)
+{
+  resize(newSize);
+  return setConstant(Scalar(0));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to zero.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setZero_int_int.cpp
+  * Output: \verbinclude Matrix_setZero_int_int.out
+  *
+  * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setZero(Index rows, Index cols)
+{
+  resize(rows, cols);
+  return setConstant(Scalar(0));
+}
+
+// ones:
+
+/** \returns an expression of a matrix where all coefficients equal one.
+  *
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this MatrixBase type.
+  *
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_ones_int_int.cpp
+  * Output: \verbinclude MatrixBase_ones_int_int.out
+  *
+  * \sa Ones(), Ones(Index), isOnes(), class Ones
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index rows, Index cols)
+{
+  return Constant(rows, cols, Scalar(1));
+}
+
+/** \returns an expression of a vector where all coefficients equal one.
+  *
+  * The parameter \a newSize is the size of the returned vector.
+  * Must be compatible with this MatrixBase type.
+  *
+  * \only_for_vectors
+  *
+  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+  * it is redundant to pass \a size as argument, so Ones() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_ones_int.cpp
+  * Output: \verbinclude MatrixBase_ones_int.out
+  *
+  * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones(Index newSize)
+{
+  return Constant(newSize, Scalar(1));
+}
+
+/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one.
+  *
+  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+  * need to use the variants taking size arguments.
+  *
+  * Example: \include MatrixBase_ones.cpp
+  * Output: \verbinclude MatrixBase_ones.out
+  *
+  * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+DenseBase<Derived>::Ones()
+{
+  return Constant(Scalar(1));
+}
+
+/** \returns true if *this is approximately equal to the matrix where all coefficients
+  *          are equal to 1, within the precision given by \a prec.
+  *
+  * Example: \include MatrixBase_isOnes.cpp
+  * Output: \verbinclude MatrixBase_isOnes.out
+  *
+  * \sa class CwiseNullaryOp, Ones()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
+(const RealScalar& prec) const
+{
+  return isApproxToConstant(Scalar(1), prec);
+}
+
+/** Sets all coefficients in this expression to one.
+  *
+  * Example: \include MatrixBase_setOnes.cpp
+  * Output: \verbinclude MatrixBase_setOnes.out
+  *
+  * \sa class CwiseNullaryOp, Ones()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
+{
+  return setConstant(Scalar(1));
+}
+
+/** Resizes to the given \a newSize, and sets all coefficients in this expression to one.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include Matrix_setOnes_int.cpp
+  * Output: \verbinclude Matrix_setOnes_int.out
+  *
+  * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index newSize)
+{
+  resize(newSize);
+  return setConstant(Scalar(1));
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to one.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setOnes_int_int.cpp
+  * Output: \verbinclude Matrix_setOnes_int_int.out
+  *
+  * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
+{
+  resize(rows, cols);
+  return setConstant(Scalar(1));
+}
+
+// Identity:
+
+/** \returns an expression of the identity matrix (not necessarily square).
+  *
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this MatrixBase type.
+  *
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_identity_int_int.cpp
+  * Output: \verbinclude MatrixBase_identity_int_int.out
+  *
+  * \sa Identity(), setIdentity(), isIdentity()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity(Index rows, Index cols)
+{
+  return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
+}
+
+/** \returns an expression of the identity matrix (not necessarily square).
+  *
+  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+  * need to use the variant taking size arguments.
+  *
+  * Example: \include MatrixBase_identity.cpp
+  * Output: \verbinclude MatrixBase_identity.out
+  *
+  * \sa Identity(Index,Index), setIdentity(), isIdentity()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
+MatrixBase<Derived>::Identity()
+{
+  EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+  return MatrixBase<Derived>::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op<Scalar>());
+}
+
+/** \returns true if *this is approximately equal to the identity matrix
+  *          (not necessarily square),
+  *          within the precision given by \a prec.
+  *
+  * Example: \include MatrixBase_isIdentity.cpp
+  * Output: \verbinclude MatrixBase_isIdentity.out
+  *
+  * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity()
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isIdentity
+(const RealScalar& prec) const
+{
+  typename internal::nested_eval<Derived,1>::type self(derived());
+  for(Index j = 0; j < cols(); ++j)
+  {
+    for(Index i = 0; i < rows(); ++i)
+    {
+      if(i == j)
+      {
+        if(!internal::isApprox(self.coeff(i, j), static_cast<Scalar>(1), prec))
+          return false;
+      }
+      else
+      {
+        if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast<RealScalar>(1), prec))
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
+namespace internal {
+
+template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
+struct setIdentity_impl
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+  {
+    return m = Derived::Identity(m.rows(), m.cols());
+  }
+};
+
+template<typename Derived>
+struct setIdentity_impl<Derived, true>
+{
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Derived& run(Derived& m)
+  {
+    m.setZero();
+    const Index size = numext::mini(m.rows(), m.cols());
+    for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
+    return m;
+  }
+};
+
+} // end namespace internal
+
+/** Writes the identity expression (not necessarily square) into *this.
+  *
+  * Example: \include MatrixBase_setIdentity.cpp
+  * Output: \verbinclude MatrixBase_setIdentity.out
+  *
+  * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
+{
+  return internal::setIdentity_impl<Derived>::run(derived());
+}
+
+/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this.
+  *
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setIdentity_int_int.cpp
+  * Output: \verbinclude Matrix_setIdentity_int_int.out
+  *
+  * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
+{
+  derived().resize(rows, cols);
+  return setIdentity();
+}
+
+/** \returns an expression of the i-th unit (basis) vector.
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
+}
+
+/** \returns an expression of the i-th unit (basis) vector.
+  *
+  * \only_for_vectors
+  *
+  * This variant is for fixed-size vector only.
+  *
+  * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return BasisReturnType(SquareMatrixType::Identity(),i);
+}
+
+/** \returns an expression of the X axis unit vector (1{,0}^*)
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
+{ return Derived::Unit(0); }
+
+/** \returns an expression of the Y axis unit vector (0,1{,0}^*)
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
+{ return Derived::Unit(1); }
+
+/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
+{ return Derived::Unit(2); }
+
+/** \returns an expression of the W axis unit vector (0,0,0,1)
+  *
+  * \only_for_vectors
+  *
+  * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
+{ return Derived::Unit(3); }
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_NULLARY_OP_H
diff --git a/third-party/Eigen/src/Core/CwiseTernaryOp.h b/third-party/Eigen/src/Core/CwiseTernaryOp.h
new file mode 100644
index 00000000..9f3576fe
--- /dev/null
+++ b/third-party/Eigen/src/Core/CwiseTernaryOp.h
@@ -0,0 +1,197 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_TERNARY_OP_H
+#define EIGEN_CWISE_TERNARY_OP_H
+
+namespace Eigen {
+
+namespace internal {
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
+  // we must not inherit from traits<Arg1> since it has
+  // the potential to cause problems with MSVC
+  typedef typename remove_all<Arg1>::type Ancestor;
+  typedef typename traits<Ancestor>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+  };
+
+  // even though we require Arg1, Arg2, and Arg3 to have the same scalar type
+  // (see CwiseTernaryOp constructor),
+  // we still want to handle the case when the result type is different.
+  typedef typename result_of<TernaryOp(
+      const typename Arg1::Scalar&, const typename Arg2::Scalar&,
+      const typename Arg3::Scalar&)>::type Scalar;
+
+  typedef typename internal::traits<Arg1>::StorageKind StorageKind;
+  typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
+
+  typedef typename Arg1::Nested Arg1Nested;
+  typedef typename Arg2::Nested Arg2Nested;
+  typedef typename Arg3::Nested Arg3Nested;
+  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
+  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
+  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
+  enum { Flags = _Arg1Nested::Flags & RowMajorBit };
+};
+}  // end namespace internal
+
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
+          typename StorageKind>
+class CwiseTernaryOpImpl;
+
+/** \class CwiseTernaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression where a coefficient-wise ternary operator is
+ * applied to two expressions
+  *
+  * \tparam TernaryOp template functor implementing the operator
+  * \tparam Arg1Type the type of the first argument
+  * \tparam Arg2Type the type of the second argument
+  * \tparam Arg3Type the type of the third argument
+  *
+  * This class represents an expression where a coefficient-wise ternary
+ * operator is applied to three expressions.
+  * It is the return type of ternary operators, by which we mean only those
+ * ternary operators where
+  * all three arguments are Eigen expressions.
+  * For example, the return type of betainc(matrix1, matrix2, matrix3) is a
+ * CwiseTernaryOp.
+  *
+  * Most of the time, this is the only way that it is used, so you typically
+ * don't have to name
+  * CwiseTernaryOp types explicitly.
+  *
+  * \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
+ * MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
+ * class CwiseUnaryOp, class CwiseNullaryOp
+  */
+template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
+          typename Arg3Type>
+class CwiseTernaryOp : public CwiseTernaryOpImpl<
+                           TernaryOp, Arg1Type, Arg2Type, Arg3Type,
+                           typename internal::traits<Arg1Type>::StorageKind>,
+                       internal::no_assignment_operator
+{
+ public:
+  typedef typename internal::remove_all<Arg1Type>::type Arg1;
+  typedef typename internal::remove_all<Arg2Type>::type Arg2;
+  typedef typename internal::remove_all<Arg3Type>::type Arg3;
+
+  typedef typename CwiseTernaryOpImpl<
+      TernaryOp, Arg1Type, Arg2Type, Arg3Type,
+      typename internal::traits<Arg1Type>::StorageKind>::Base Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
+
+  typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
+  typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
+  typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
+  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
+  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
+  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
+                                     const Arg3& a3,
+                                     const TernaryOp& func = TernaryOp())
+      : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
+    // require the sizes to match
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
+
+    // The index types should match
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg2Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg3Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+
+    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
+                 a1.rows() == a3.rows() && a1.cols() == a3.cols());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE Index rows() const {
+    // return the fixed size type if available to enable compile time
+    // optimizations
+    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                RowsAtCompileTime == Dynamic &&
+        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+                RowsAtCompileTime == Dynamic)
+      return m_arg3.rows();
+    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                     RowsAtCompileTime == Dynamic &&
+             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+                     RowsAtCompileTime == Dynamic)
+      return m_arg2.rows();
+    else
+      return m_arg1.rows();
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE Index cols() const {
+    // return the fixed size type if available to enable compile time
+    // optimizations
+    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                ColsAtCompileTime == Dynamic &&
+        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+                ColsAtCompileTime == Dynamic)
+      return m_arg3.cols();
+    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                     ColsAtCompileTime == Dynamic &&
+             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+                     ColsAtCompileTime == Dynamic)
+      return m_arg2.cols();
+    else
+      return m_arg1.cols();
+  }
+
+  /** \returns the first argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg1Nested& arg1() const { return m_arg1; }
+  /** \returns the first argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg2Nested& arg2() const { return m_arg2; }
+  /** \returns the third argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg3Nested& arg3() const { return m_arg3; }
+  /** \returns the functor representing the ternary operation */
+  EIGEN_DEVICE_FUNC
+  const TernaryOp& functor() const { return m_functor; }
+
+ protected:
+  Arg1Nested m_arg1;
+  Arg2Nested m_arg2;
+  Arg3Nested m_arg3;
+  const TernaryOp m_functor;
+};
+
+// Generic API dispatcher
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
+          typename StorageKind>
+class CwiseTernaryOpImpl
+    : public internal::generic_xpr_base<
+          CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
+ public:
+  typedef typename internal::generic_xpr_base<
+      CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
+};
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_CWISE_TERNARY_OP_H
diff --git a/third-party/Eigen/src/Core/CwiseUnaryOp.h b/third-party/Eigen/src/Core/CwiseUnaryOp.h
new file mode 100644
index 00000000..1d2dd19f
--- /dev/null
+++ b/third-party/Eigen/src/Core/CwiseUnaryOp.h
@@ -0,0 +1,103 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_UNARY_OP_H
+#define EIGEN_CWISE_UNARY_OP_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename UnaryOp, typename XprType>
+struct traits<CwiseUnaryOp<UnaryOp, XprType> >
+ : traits<XprType>
+{
+  typedef typename result_of<
+                     UnaryOp(const typename XprType::Scalar&)
+                   >::type Scalar;
+  typedef typename XprType::Nested XprTypeNested;
+  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
+  enum {
+    Flags = _XprTypeNested::Flags & RowMajorBit 
+  };
+};
+}
+
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl;
+
+/** \class CwiseUnaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
+  *
+  * \tparam UnaryOp template functor implementing the operator
+  * \tparam XprType the type of the expression to which we are applying the unary operator
+  *
+  * This class represents an expression where a unary operator is applied to an expression.
+  * It is the return type of all operations taking exactly 1 input expression, regardless of the
+  * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
+  * is considered unary, because only the right-hand side is an expression, and its
+  * return type is a specialization of CwiseUnaryOp.
+  *
+  * Most of the time, this is the only way that it is used, so you typically don't have to name
+  * CwiseUnaryOp types explicitly.
+  *
+  * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
+  */
+template<typename UnaryOp, typename XprType>
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
+{
+  public:
+
+    typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+    typedef typename internal::ref_selector<XprType>::type XprTypeNested;
+    typedef typename internal::remove_all<XprType>::type NestedExpression;
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+      : m_xpr(xpr), m_functor(func) {}
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Index rows() const { return m_xpr.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Index cols() const { return m_xpr.cols(); }
+
+    /** \returns the functor representing the unary operation */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const UnaryOp& functor() const { return m_functor; }
+
+    /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const typename internal::remove_all<XprTypeNested>::type&
+    nestedExpression() const { return m_xpr; }
+
+    /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    typename internal::remove_all<XprTypeNested>::type&
+    nestedExpression() { return m_xpr; }
+
+  protected:
+    XprTypeNested m_xpr;
+    const UnaryOp m_functor;
+};
+
+// Generic API dispatcher
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_UNARY_OP_H
diff --git a/third-party/Eigen/src/Core/CwiseUnaryView.h b/third-party/Eigen/src/Core/CwiseUnaryView.h
new file mode 100644
index 00000000..5a30fa8d
--- /dev/null
+++ b/third-party/Eigen/src/Core/CwiseUnaryView.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_UNARY_VIEW_H
+#define EIGEN_CWISE_UNARY_VIEW_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename ViewOp, typename MatrixType>
+struct traits<CwiseUnaryView<ViewOp, MatrixType> >
+ : traits<MatrixType>
+{
+  typedef typename result_of<
+                     ViewOp(const typename traits<MatrixType>::Scalar&)
+                   >::type Scalar;
+  typedef typename MatrixType::Nested MatrixTypeNested;
+  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
+    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
+    // need to cast the sizeof's from size_t to int explicitly, otherwise:
+    // "error: no integral type can represent all of the enumerator values
+    InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
+                             ? int(Dynamic)
+                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                             ? int(Dynamic)
+                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
+  };
+};
+}
+
+template<typename ViewOp, typename MatrixType, typename StorageKind>
+class CwiseUnaryViewImpl;
+
+/** \class CwiseUnaryView
+  * \ingroup Core_Module
+  *
+  * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
+  *
+  * \tparam ViewOp template functor implementing the view
+  * \tparam MatrixType the type of the matrix we are applying the unary operator
+  *
+  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
+  * It is the return type of real() and imag(), and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
+  */
+template<typename ViewOp, typename MatrixType>
+class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
+{
+  public:
+
+    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+
+    explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
+      : m_matrix(mat), m_functor(func) {}
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
+
+    EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
+    EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
+
+    /** \returns the functor representing unary operation */
+    const ViewOp& functor() const { return m_functor; }
+
+    /** \returns the nested expression */
+    const typename internal::remove_all<MatrixTypeNested>::type&
+    nestedExpression() const { return m_matrix; }
+
+    /** \returns the nested expression */
+    typename internal::remove_reference<MatrixTypeNested>::type&
+    nestedExpression() { return m_matrix.const_cast_derived(); }
+
+  protected:
+    MatrixTypeNested m_matrix;
+    ViewOp m_functor;
+};
+
+// Generic API dispatcher
+template<typename ViewOp, typename XprType, typename StorageKind>
+class CwiseUnaryViewImpl
+  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+};
+
+template<typename ViewOp, typename MatrixType>
+class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
+  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
+{
+  public:
+
+    typedef CwiseUnaryView<ViewOp, MatrixType> Derived;
+    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;
+
+    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
+    
+    EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }
+
+    EIGEN_DEVICE_FUNC inline Index innerStride() const
+    {
+      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+    }
+
+    EIGEN_DEVICE_FUNC inline Index outerStride() const
+    {
+      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
+    }
+  protected:
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl)
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_CWISE_UNARY_VIEW_H
diff --git a/third-party/Eigen/src/Core/DenseBase.h b/third-party/Eigen/src/Core/DenseBase.h
new file mode 100644
index 00000000..c55a6823
--- /dev/null
+++ b/third-party/Eigen/src/Core/DenseBase.h
@@ -0,0 +1,612 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSEBASE_H
+#define EIGEN_DENSEBASE_H
+
+namespace Eigen {
+
+namespace internal {
+  
+// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type.
+// This dummy function simply aims at checking that at compile time.
+static inline void check_DenseIndex_is_signed() {
+  EIGEN_STATIC_ASSERT(NumTraits<DenseIndex>::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); 
+}
+
+} // end namespace internal
+  
+/** \class DenseBase
+  * \ingroup Core_Module
+  *
+  * \brief Base class for all dense matrices, vectors, and arrays
+  *
+  * This class is the base that is inherited by all dense objects (matrix, vector, arrays,
+  * and related expression types). The common Eigen API for dense objects is contained in this class.
+  *
+  * \tparam Derived is the derived type, e.g., a matrix type or an expression.
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
+  *
+  * \sa \blank \ref TopicClassHierarchy
+  */
+template<typename Derived> class DenseBase
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  : public DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value>
+#else
+  : public DenseCoeffsBase<Derived,DirectWriteAccessors>
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+{
+  public:
+
+    /** Inner iterator type to iterate over the coefficients of a row or column.
+      * \sa class InnerIterator
+      */
+    typedef Eigen::InnerIterator<Derived> InnerIterator;
+
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+
+    /**
+      * \brief The type used to store indices
+      * \details This typedef is relevant for types that store multiple indices such as
+      *          PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
+      * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
+     */
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+
+    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    
+    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
+      *
+      * It is an alias for the Scalar type */
+    typedef Scalar value_type;
+    
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef DenseCoeffsBase<Derived, internal::accessors_level<Derived>::value> Base;
+
+    using Base::derived;
+    using Base::const_cast_derived;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::rowIndexByOuterInner;
+    using Base::colIndexByOuterInner;
+    using Base::coeff;
+    using Base::coeffByOuterInner;
+    using Base::operator();
+    using Base::operator[];
+    using Base::x;
+    using Base::y;
+    using Base::z;
+    using Base::w;
+    using Base::stride;
+    using Base::innerStride;
+    using Base::outerStride;
+    using Base::rowStride;
+    using Base::colStride;
+    typedef typename Base::CoeffReturnType CoeffReturnType;
+
+    enum {
+
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+        /**< The number of rows at compile-time. This is just a copy of the value provided
+          * by the \a Derived type. If a value is not known at compile-time,
+          * it is set to the \a Dynamic constant.
+          * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */
+
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+        /**< The number of columns at compile-time. This is just a copy of the value provided
+          * by the \a Derived type. If a value is not known at compile-time,
+          * it is set to the \a Dynamic constant.
+          * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */
+
+
+      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),
+        /**< This is equal to the number of coefficients, i.e. the number of
+          * rows times the number of columns, or to \a Dynamic if this is not
+          * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
+
+      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+        /**< This value is equal to the maximum possible number of rows that this expression
+          * might have. If this expression might have an arbitrarily high number of rows,
+          * this value is set to \a Dynamic.
+          *
+          * This value is useful to know when evaluating an expression, in order to determine
+          * whether it is possible to avoid doing a dynamic memory allocation.
+          *
+          * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime
+          */
+
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+        /**< This value is equal to the maximum possible number of columns that this expression
+          * might have. If this expression might have an arbitrarily high number of columns,
+          * this value is set to \a Dynamic.
+          *
+          * This value is useful to know when evaluating an expression, in order to determine
+          * whether it is possible to avoid doing a dynamic memory allocation.
+          *
+          * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime
+          */
+
+      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                      internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+        /**< This value is equal to the maximum possible number of coefficients that this expression
+          * might have. If this expression might have an arbitrarily high number of coefficients,
+          * this value is set to \a Dynamic.
+          *
+          * This value is useful to know when evaluating an expression, in order to determine
+          * whether it is possible to avoid doing a dynamic memory allocation.
+          *
+          * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime
+          */
+
+      IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+                           || internal::traits<Derived>::MaxColsAtCompileTime == 1,
+        /**< This is set to true if either the number of rows or the number of
+          * columns is known at compile-time to be equal to 1. Indeed, in that case,
+          * we are dealing with a column-vector (if there is only one column) or with
+          * a row-vector (if there is only one row). */
+
+      Flags = internal::traits<Derived>::Flags,
+        /**< This stores expression \ref flags flags which may or may not be inherited by new expressions
+          * constructed from this one. See the \ref flags "list of flags".
+          */
+
+      IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */
+
+      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
+                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
+
+      InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
+      OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
+    };
+    
+    typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;
+
+    enum { IsPlainObjectBase = 0 };
+    
+    /** The plain matrix type corresponding to this expression.
+      * \sa PlainObject */
+    typedef Matrix<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainMatrix;
+    
+    /** The plain array type corresponding to this expression.
+      * \sa PlainObject */
+    typedef Array<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainArray;
+
+    /** \brief The plain matrix or array type corresponding to this expression.
+      *
+      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
+      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
+      * that the return type of eval() is either PlainObject or const PlainObject&.
+      */
+    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
+                                 PlainMatrix, PlainArray>::type PlainObject;
+
+    /** \returns the number of nonzero coefficients which is in practice the number
+      * of stored coefficients. */
+    EIGEN_DEVICE_FUNC
+    inline Index nonZeros() const { return size(); }
+
+    /** \returns the outer size.
+      *
+      * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
+      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
+      * column-major matrix, and the number of rows for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
+    Index outerSize() const
+    {
+      return IsVectorAtCompileTime ? 1
+           : int(IsRowMajor) ? this->rows() : this->cols();
+    }
+
+    /** \returns the inner size.
+      *
+      * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
+      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a 
+      * column-major matrix, and the number of columns for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
+    Index innerSize() const
+    {
+      return IsVectorAtCompileTime ? this->size()
+           : int(IsRowMajor) ? this->cols() : this->rows();
+    }
+
+    /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
+      * nothing else.
+      */
+    EIGEN_DEVICE_FUNC
+    void resize(Index newSize)
+    {
+      EIGEN_ONLY_USED_FOR_DEBUG(newSize);
+      eigen_assert(newSize == this->size()
+                && "DenseBase::resize() does not actually allow to resize.");
+    }
+    /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are
+      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
+      * nothing else.
+      */
+    EIGEN_DEVICE_FUNC
+    void resize(Index rows, Index cols)
+    {
+      EIGEN_ONLY_USED_FOR_DEBUG(rows);
+      EIGEN_ONLY_USED_FOR_DEBUG(cols);
+      eigen_assert(rows == this->rows() && cols == this->cols()
+                && "DenseBase::resize() does not actually allow to resize.");
+    }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal Represents a matrix with all coefficients equal to one another*/
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
+    /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
+    /** \internal the return type of MatrixBase::eigenvalues() */
+    typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+    /** Copies \a other into *this. \returns a reference to *this. */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const DenseBase<OtherDerived>& other);
+
+    /** Special case of the template operator=, in order to prevent the compiler
+      * from generating a default operator= (issue hit with g++ 4.1)
+      */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const DenseBase& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const EigenBase<OtherDerived> &other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator+=(const EigenBase<OtherDerived> &other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator-=(const EigenBase<OtherDerived> &other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const ReturnByValue<OtherDerived>& func);
+
+    /** \internal
+      * Copies \a other into *this without evaluating other. \returns a reference to *this.
+      * \deprecated */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& lazyAssign(const DenseBase<OtherDerived>& other);
+
+    EIGEN_DEVICE_FUNC
+    CommaInitializer<Derived> operator<< (const Scalar& s);
+
+    /** \deprecated it now returns \c *this */
+    template<unsigned int Added,unsigned int Removed>
+    EIGEN_DEPRECATED
+    const Derived& flagged() const
+    { return derived(); }
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
+
+    typedef Transpose<Derived> TransposeReturnType;
+    EIGEN_DEVICE_FUNC
+    TransposeReturnType transpose();
+    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    EIGEN_DEVICE_FUNC
+    ConstTransposeReturnType transpose() const;
+    EIGEN_DEVICE_FUNC
+    void transposeInPlace();
+
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    Constant(Index rows, Index cols, const Scalar& value);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    Constant(Index size, const Scalar& value);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
+    Constant(const Scalar& value);
+
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+    LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+    LinSpaced(Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
+    LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
+    LinSpaced(const Scalar& low, const Scalar& high);
+
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    NullaryExpr(Index size, const CustomNullaryOp& func);
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
+    NullaryExpr(const CustomNullaryOp& func);
+
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+
+    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setZero();
+    EIGEN_DEVICE_FUNC Derived& setOnes();
+    EIGEN_DEVICE_FUNC Derived& setRandom();
+
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
+    bool isApprox(const DenseBase<OtherDerived>& other,
+                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC 
+    bool isMuchSmallerThan(const RealScalar& other,
+                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
+    bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    
+    inline bool hasNaN() const;
+    inline bool allFinite() const;
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator*=(const Scalar& other);
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator/=(const Scalar& other);
+
+    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
+    /** \returns the matrix or vector obtained by evaluating this expression.
+      *
+      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
+      * a const reference, in order to avoid a useless copy.
+      * 
+      * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE EvalReturnType eval() const
+    {
+      // Even though MSVC does not honor strong inlining when the return type
+      // is a dynamic matrix, we desperately need strong inlining for fixed
+      // size types on MSVC.
+      return typename internal::eval<Derived>::type(derived());
+    }
+    
+    /** swaps *this with the expression \a other.
+      *
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void swap(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+    }
+
+    /** swaps *this with the matrix or array \a other.
+      *
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void swap(PlainObjectBase<OtherDerived>& other)
+    {
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
+    }
+
+    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+
+    EIGEN_DEVICE_FUNC Scalar sum() const;
+    EIGEN_DEVICE_FUNC Scalar mean() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;
+
+    EIGEN_DEVICE_FUNC Scalar prod() const;
+
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
+
+    template<typename IndexType> EIGEN_DEVICE_FUNC
+    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
+    template<typename IndexType> EIGEN_DEVICE_FUNC
+    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
+    template<typename IndexType> EIGEN_DEVICE_FUNC
+    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
+    template<typename IndexType> EIGEN_DEVICE_FUNC
+    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
+
+    template<typename BinaryOp>
+    EIGEN_DEVICE_FUNC
+    Scalar redux(const BinaryOp& func) const;
+
+    template<typename Visitor>
+    EIGEN_DEVICE_FUNC
+    void visit(Visitor& func) const;
+
+    /** \returns a WithFormat proxy object allowing to print a matrix the with given
+      * format \a fmt.
+      *
+      * See class IOFormat for some examples.
+      *
+      * \sa class IOFormat, class WithFormat
+      */
+    inline const WithFormat<Derived> format(const IOFormat& fmt) const
+    {
+      return WithFormat<Derived>(derived(), fmt);
+    }
+
+    /** \returns the unique coefficient of a 1x1 expression */
+    EIGEN_DEVICE_FUNC
+    CoeffReturnType value() const
+    {
+      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
+      eigen_assert(this->rows() == 1 && this->cols() == 1);
+      return derived().coeff(0,0);
+    }
+
+    EIGEN_DEVICE_FUNC bool all() const;
+    EIGEN_DEVICE_FUNC bool any() const;
+    EIGEN_DEVICE_FUNC Index count() const;
+
+    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
+    typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
+    typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
+    typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;
+
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+    *
+    * Example: \include MatrixBase_rowwise.cpp
+    * Output: \verbinclude MatrixBase_rowwise.out
+    *
+    * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+    */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
+      return ConstRowwiseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
+
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+    *
+    * Example: \include MatrixBase_colwise.cpp
+    * Output: \verbinclude MatrixBase_colwise.out
+    *
+    * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+    */
+    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
+      return ConstColwiseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();
+
+    typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
+    static const RandomReturnType Random(Index rows, Index cols);
+    static const RandomReturnType Random(Index size);
+    static const RandomReturnType Random();
+
+    template<typename ThenDerived,typename ElseDerived>
+    const Select<Derived,ThenDerived,ElseDerived>
+    select(const DenseBase<ThenDerived>& thenMatrix,
+           const DenseBase<ElseDerived>& elseMatrix) const;
+
+    template<typename ThenDerived>
+    inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;
+
+    template<typename ElseDerived>
+    inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+
+    template<int p> RealScalar lpNorm() const;
+
+    template<int RowFactor, int ColFactor>
+    EIGEN_DEVICE_FUNC
+    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+    /**
+    * \return an expression of the replication of \c *this
+    *
+    * Example: \include MatrixBase_replicate_int_int.cpp
+    * Output: \verbinclude MatrixBase_replicate_int_int.out
+    *
+    * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
+    */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC
+    const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
+    {
+      return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
+    }
+
+    typedef Reverse<Derived, BothDirections> ReverseReturnType;
+    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
+    EIGEN_DEVICE_FUNC ReverseReturnType reverse();
+    /** This is the const version of reverse(). */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
+    {
+      return ConstReverseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC void reverseInPlace();
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
+#   include "../plugins/BlockMethods.h"
+#   ifdef EIGEN_DENSEBASE_PLUGIN
+#     include EIGEN_DENSEBASE_PLUGIN
+#   endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
+
+    // disable the use of evalTo for dense objects with a nice compilation error
+    template<typename Dest>
+    EIGEN_DEVICE_FUNC
+    inline void evalTo(Dest& ) const
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
+    }
+
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase)
+    /** Default constructor. Do nothing. */
+    EIGEN_DEVICE_FUNC DenseBase()
+    {
+      /* Just checks for self-consistency of the flags.
+       * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down
+       */
+#ifdef EIGEN_INTERNAL_DEBUGGING
+      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor))
+                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))),
+                          INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION)
+#endif
+    }
+
+  private:
+    EIGEN_DEVICE_FUNC explicit DenseBase(int);
+    EIGEN_DEVICE_FUNC DenseBase(int,int);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSEBASE_H
diff --git a/third-party/Eigen/src/Core/DenseCoeffsBase.h b/third-party/Eigen/src/Core/DenseCoeffsBase.h
new file mode 100644
index 00000000..c4af48ab
--- /dev/null
+++ b/third-party/Eigen/src/Core/DenseCoeffsBase.h
@@ -0,0 +1,681 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSECOEFFSBASE_H
+#define EIGEN_DENSECOEFFSBASE_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename T> struct add_const_on_value_type_if_arithmetic
+{
+  typedef typename conditional<is_arithmetic<T>::value, T, typename add_const_on_value_type<T>::type>::type type;
+};
+}
+
+/** \brief Base class providing read-only coefficient access to matrices and arrays.
+  * \ingroup Core_Module
+  * \tparam Derived Type of the derived class
+  * \tparam #ReadOnlyAccessors Constant indicating read-only access
+  *
+  * This class defines the \c operator() \c const function and friends, which can be used to read specific
+  * entries of a matrix or array.
+  * 
+  * \sa DenseCoeffsBase<Derived, WriteAccessors>, DenseCoeffsBase<Derived, DirectAccessors>,
+  *     \ref TopicClassHierarchy
+  */
+template<typename Derived>
+class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
+{
+  public:
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+
+    // Explanation for this CoeffReturnType typedef.
+    // - This is the return type of the coeff() method.
+    // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
+    // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
+    // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems
+    // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
+    // not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
+    typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+                         const Scalar&,
+                         typename internal::conditional<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>::type
+                     >::type CoeffReturnType;
+
+    typedef typename internal::add_const_on_value_type_if_arithmetic<
+                         typename internal::packet_traits<Scalar>::type
+                     >::type PacketReturnType;
+
+    typedef EigenBase<Derived> Base;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::derived;
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
+    {
+      return int(Derived::RowsAtCompileTime) == 1 ? 0
+          : int(Derived::ColsAtCompileTime) == 1 ? inner
+          : int(Derived::Flags)&RowMajorBit ? outer
+          : inner;
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
+    {
+      return int(Derived::ColsAtCompileTime) == 1 ? 0
+          : int(Derived::RowsAtCompileTime) == 1 ? inner
+          : int(Derived::Flags)&RowMajorBit ? inner
+          : outer;
+    }
+
+    /** Short version: don't use this function, use
+      * \link operator()(Index,Index) const \endlink instead.
+      *
+      * Long version: this function is similar to
+      * \link operator()(Index,Index) const \endlink, but without the assertion.
+      * Use this for limiting the performance cost of debugging code when doing
+      * repeated coefficient access. Only use this when it is guaranteed that the
+      * parameters \a row and \a col are in range.
+      *
+      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+      * function equivalent to \link operator()(Index,Index) const \endlink.
+      *
+      * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
+    {
+      eigen_internal_assert(row >= 0 && row < rows()
+                         && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).coeff(row,col);
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+    {
+      return coeff(rowIndexByOuterInner(outer, inner),
+                   colIndexByOuterInner(outer, inner));
+    }
+
+    /** \returns the coefficient at given the given row and column.
+      *
+      * \sa operator()(Index,Index), operator[](Index)
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
+    {
+      eigen_assert(row >= 0 && row < rows()
+          && col >= 0 && col < cols());
+      return coeff(row, col);
+    }
+
+    /** Short version: don't use this function, use
+      * \link operator[](Index) const \endlink instead.
+      *
+      * Long version: this function is similar to
+      * \link operator[](Index) const \endlink, but without the assertion.
+      * Use this for limiting the performance cost of debugging code when doing
+      * repeated coefficient access. Only use this when it is guaranteed that the
+      * parameter \a index is in range.
+      *
+      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+      * function equivalent to \link operator[](Index) const \endlink.
+      *
+      * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    coeff(Index index) const
+    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+      eigen_internal_assert(index >= 0 && index < size());
+      return internal::evaluator<Derived>(derived()).coeff(index);
+    }
+
+
+    /** \returns the coefficient at given index.
+      *
+      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+      *
+      * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
+      * z() const, w() const
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    operator[](Index index) const
+    {
+      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+      eigen_assert(index >= 0 && index < size());
+      return coeff(index);
+    }
+
+    /** \returns the coefficient at given index.
+      *
+      * This is synonymous to operator[](Index) const.
+      *
+      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+      *
+      * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const,
+      * z() const, w() const
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    operator()(Index index) const
+    {
+      eigen_assert(index >= 0 && index < size());
+      return coeff(index);
+    }
+
+    /** equivalent to operator[](0).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    x() const { return (*this)[0]; }
+
+    /** equivalent to operator[](1).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    y() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+      return (*this)[1];
+    }
+
+    /** equivalent to operator[](2).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    z() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+      return (*this)[2];
+    }
+
+    /** equivalent to operator[](3).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE CoeffReturnType
+    w() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+      return (*this)[3];
+    }
+
+    /** \internal
+      * \returns the packet of coefficients starting at the given row and column. It is your responsibility
+      * to ensure that a packet really starts there. This method is only available on expressions having the
+      * PacketAccessBit.
+      *
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+      * starting at an address which is a multiple of the packet size.
+      */
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
+    {
+      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
+    }
+
+
+    /** \internal */
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const
+    {
+      return packet<LoadMode>(rowIndexByOuterInner(outer, inner),
+                              colIndexByOuterInner(outer, inner));
+    }
+
+    /** \internal
+      * \returns the packet of coefficients starting at the given index. It is your responsibility
+      * to ensure that a packet really starts there. This method is only available on expressions having the
+      * PacketAccessBit and the LinearAccessBit.
+      *
+      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
+      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
+      * starting at an address which is a multiple of the packet size.
+      */
+
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+      eigen_internal_assert(index >= 0 && index < size());
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
+    }
+
+  protected:
+    // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase.
+    // But some methods are only available in the DirectAccess case.
+    // So we add dummy methods here with these names, so that "using... " doesn't fail.
+    // It's not private so that the child class DenseBase can access them, and it's not public
+    // either since it's an implementation detail, so has to be protected.
+    void coeffRef();
+    void coeffRefByOuterInner();
+    void writePacket();
+    void writePacketByOuterInner();
+    void copyCoeff();
+    void copyCoeffByOuterInner();
+    void copyPacket();
+    void copyPacketByOuterInner();
+    void stride();
+    void innerStride();
+    void outerStride();
+    void rowStride();
+    void colStride();
+};
+
+/** \brief Base class providing read/write coefficient access to matrices and arrays.
+  * \ingroup Core_Module
+  * \tparam Derived Type of the derived class
+  * \tparam #WriteAccessors Constant indicating read/write access
+  *
+  * This class defines the non-const \c operator() function and friends, which can be used to write specific
+  * entries of a matrix or array. This class inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which
+  * defines the const variant for reading specific entries.
+  * 
+  * \sa DenseCoeffsBase<Derived, DirectAccessors>, \ref TopicClassHierarchy
+  */
+template<typename Derived>
+class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+  public:
+
+    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    using Base::coeff;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::derived;
+    using Base::rowIndexByOuterInner;
+    using Base::colIndexByOuterInner;
+    using Base::operator[];
+    using Base::operator();
+    using Base::x;
+    using Base::y;
+    using Base::z;
+    using Base::w;
+
+    /** Short version: don't use this function, use
+      * \link operator()(Index,Index) \endlink instead.
+      *
+      * Long version: this function is similar to
+      * \link operator()(Index,Index) \endlink, but without the assertion.
+      * Use this for limiting the performance cost of debugging code when doing
+      * repeated coefficient access. Only use this when it is guaranteed that the
+      * parameters \a row and \a col are in range.
+      *
+      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+      * function equivalent to \link operator()(Index,Index) \endlink.
+      *
+      * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
+    {
+      eigen_internal_assert(row >= 0 && row < rows()
+                         && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).coeffRef(row,col);
+    }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    coeffRefByOuterInner(Index outer, Index inner)
+    {
+      return coeffRef(rowIndexByOuterInner(outer, inner),
+                      colIndexByOuterInner(outer, inner));
+    }
+
+    /** \returns a reference to the coefficient at given the given row and column.
+      *
+      * \sa operator[](Index)
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    operator()(Index row, Index col)
+    {
+      eigen_assert(row >= 0 && row < rows()
+          && col >= 0 && col < cols());
+      return coeffRef(row, col);
+    }
+
+
+    /** Short version: don't use this function, use
+      * \link operator[](Index) \endlink instead.
+      *
+      * Long version: this function is similar to
+      * \link operator[](Index) \endlink, but without the assertion.
+      * Use this for limiting the performance cost of debugging code when doing
+      * repeated coefficient access. Only use this when it is guaranteed that the
+      * parameters \a row and \a col are in range.
+      *
+      * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this
+      * function equivalent to \link operator[](Index) \endlink.
+      *
+      * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    coeffRef(Index index)
+    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+      eigen_internal_assert(index >= 0 && index < size());
+      return internal::evaluator<Derived>(derived()).coeffRef(index);
+    }
+
+    /** \returns a reference to the coefficient at given index.
+      *
+      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+      *
+      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    operator[](Index index)
+    {
+      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
+                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
+      eigen_assert(index >= 0 && index < size());
+      return coeffRef(index);
+    }
+
+    /** \returns a reference to the coefficient at given index.
+      *
+      * This is synonymous to operator[](Index).
+      *
+      * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit.
+      *
+      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
+      */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    operator()(Index index)
+    {
+      eigen_assert(index >= 0 && index < size());
+      return coeffRef(index);
+    }
+
+    /** equivalent to operator[](0).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    x() { return (*this)[0]; }
+
+    /** equivalent to operator[](1).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    y()
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+      return (*this)[1];
+    }
+
+    /** equivalent to operator[](2).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    z()
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+      return (*this)[2];
+    }
+
+    /** equivalent to operator[](3).  */
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    w()
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+      return (*this)[3];
+    }
+};
+
+/** \brief Base class providing direct read-only coefficient access to matrices and arrays.
+  * \ingroup Core_Module
+  * \tparam Derived Type of the derived class
+  * \tparam #DirectAccessors Constant indicating direct access
+  *
+  * This class defines functions to work with strides which can be used to access entries directly. This class
+  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
+  * \c operator() .
+  *
+  * \sa \blank \ref TopicClassHierarchy
+  */
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
+{
+  public:
+
+    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::derived;
+
+    /** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
+      *
+      * \sa outerStride(), rowStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const
+    {
+      return derived().innerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
+      *          in a column-major matrix).
+      *
+      * \sa innerStride(), rowStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const
+    {
+      return derived().outerStride();
+    }
+
+    // FIXME shall we remove it ?
+    inline Index stride() const
+    {
+      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive rows.
+      *
+      * \sa innerStride(), outerStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index rowStride() const
+    {
+      return Derived::IsRowMajor ? outerStride() : innerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive columns.
+      *
+      * \sa innerStride(), outerStride(), rowStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index colStride() const
+    {
+      return Derived::IsRowMajor ? innerStride() : outerStride();
+    }
+};
+
+/** \brief Base class providing direct read/write coefficient access to matrices and arrays.
+  * \ingroup Core_Module
+  * \tparam Derived Type of the derived class
+  * \tparam #DirectWriteAccessors Constant indicating direct access
+  *
+  * This class defines functions to work with strides which can be used to access entries directly. This class
+  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
+  * \c operator().
+  *
+  * \sa \blank \ref TopicClassHierarchy
+  */
+template<typename Derived>
+class DenseCoeffsBase<Derived, DirectWriteAccessors>
+  : public DenseCoeffsBase<Derived, WriteAccessors>
+{
+  public:
+
+    typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::derived;
+
+    /** \returns the pointer increment between two consecutive elements within a slice in the inner direction.
+      *
+      * \sa outerStride(), rowStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const
+    {
+      return derived().innerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns
+      *          in a column-major matrix).
+      *
+      * \sa innerStride(), rowStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const
+    {
+      return derived().outerStride();
+    }
+
+    // FIXME shall we remove it ?
+    inline Index stride() const
+    {
+      return Derived::IsVectorAtCompileTime ? innerStride() : outerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive rows.
+      *
+      * \sa innerStride(), outerStride(), colStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index rowStride() const
+    {
+      return Derived::IsRowMajor ? outerStride() : innerStride();
+    }
+
+    /** \returns the pointer increment between two consecutive columns.
+      *
+      * \sa innerStride(), outerStride(), rowStride()
+      */
+    EIGEN_DEVICE_FUNC
+    inline Index colStride() const
+    {
+      return Derived::IsRowMajor ? innerStride() : outerStride();
+    }
+};
+
+namespace internal {
+
+template<int Alignment, typename Derived, bool JustReturnZero>
+struct first_aligned_impl
+{
+  static inline Index run(const Derived&)
+  { return 0; }
+};
+
+template<int Alignment, typename Derived>
+struct first_aligned_impl<Alignment, Derived, false>
+{
+  static inline Index run(const Derived& m)
+  {
+    return internal::first_aligned<Alignment>(m.data(), m.size());
+  }
+};
+
+/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
+  *
+  * \tparam Alignment requested alignment in Bytes.
+  *
+  * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
+  * documentation.
+  */
+template<int Alignment, typename Derived>
+static inline Index first_aligned(const DenseBase<Derived>& m)
+{
+  enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
+  return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
+}
+
+template<typename Derived>
+static inline Index first_default_aligned(const DenseBase<Derived>& m)
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename packet_traits<Scalar>::type DefaultPacketType;
+  return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
+}
+
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct inner_stride_at_compile_time
+{
+  enum { ret = traits<Derived>::InnerStrideAtCompileTime };
+};
+
+template<typename Derived>
+struct inner_stride_at_compile_time<Derived, false>
+{
+  enum { ret = 0 };
+};
+
+template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>
+struct outer_stride_at_compile_time
+{
+  enum { ret = traits<Derived>::OuterStrideAtCompileTime };
+};
+
+template<typename Derived>
+struct outer_stride_at_compile_time<Derived, false>
+{
+  enum { ret = 0 };
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSECOEFFSBASE_H
diff --git a/third-party/Eigen/src/Core/DenseStorage.h b/third-party/Eigen/src/Core/DenseStorage.h
new file mode 100644
index 00000000..7d6d4e66
--- /dev/null
+++ b/third-party/Eigen/src/Core/DenseStorage.h
@@ -0,0 +1,570 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXSTORAGE_H
+#define EIGEN_MATRIXSTORAGE_H
+
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
+#else
+  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+struct constructor_without_unaligned_array_assert {};
+
+template<typename T, int Size>
+EIGEN_DEVICE_FUNC
+void check_static_allocation_size()
+{
+  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
+  #if EIGEN_STACK_ALLOCATION_LIMIT
+  EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG);
+  #endif
+}
+
+/** \internal
+  * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
+  * to 16 bytes boundary if the total size is a multiple of 16 bytes.
+  */
+template <typename T, int Size, int MatrixOrArrayOptions,
+          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
+                        : compute_default_alignment<T,Size>::value >
+struct plain_array
+{
+  T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array()
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert)
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
+#elif EIGEN_GNUC_AT_LEAST(4,7) 
+  // GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned.
+  // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900
+  // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined:
+  template<typename PtrType>
+  EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
+  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+    eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
+              && "this assertion is explained here: " \
+              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+              " **** READ THIS WEB PAGE !!! ****");
+#else
+  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
+    eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
+              && "this assertion is explained here: " \
+              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
+              " **** READ THIS WEB PAGE !!! ****");
+#endif
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  {
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 16>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  { 
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 32>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  {
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 64>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  { 
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int MatrixOrArrayOptions, int Alignment>
+struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
+{
+  T array[1];
+  EIGEN_DEVICE_FUNC plain_array() {}
+  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
+};
+
+} // end namespace internal
+
+/** \internal
+  *
+  * \class DenseStorage
+  * \ingroup Core_Module
+  *
+  * \brief Stores the data of a matrix
+  *
+  * This class stores the data of fixed-size, dynamic-size or mixed matrices
+  * in a way as compact as possible.
+  *
+  * \sa Matrix
+  */
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage;
+
+// purely fixed-size matrix
+template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseStorage
+{
+    internal::plain_array<T,Size,_Options> m_data;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+    }
+    EIGEN_DEVICE_FUNC
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(internal::constructor_without_unaligned_array_assert()) {}
+    EIGEN_DEVICE_FUNC 
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+    }
+    EIGEN_DEVICE_FUNC 
+    DenseStorage& operator=(const DenseStorage& other)
+    { 
+      if (this != &other) m_data = other.m_data;
+      return *this; 
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
+      EIGEN_UNUSED_VARIABLE(size);
+      EIGEN_UNUSED_VARIABLE(rows);
+      EIGEN_UNUSED_VARIABLE(cols);
+    }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// null matrix
+template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
+{
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+    EIGEN_DEVICE_FUNC T *data() { return 0; }
+};
+
+// more specializations for null matrices; these are necessary to resolve ambiguities
+template<typename T, int _Options> class DenseStorage<T, 0, Dynamic, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Rows, int _Options> class DenseStorage<T, 0, _Rows, Dynamic, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic, _Cols, _Options>
+: public DenseStorage<T, 0, 0, 0, _Options> { };
+
+// dynamic-size matrix with fixed-size storage
+template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
+{
+    internal::plain_array<T,Size,_Options> m_data;
+    Index m_rows;
+    Index m_cols;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
+    { 
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_rows = other.m_rows;
+        m_cols = other.m_cols;
+      }
+      return *this; 
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// dynamic-size matrix with fixed-size storage and fixed width
+template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
+{
+    internal::plain_array<T,Size,_Options> m_data;
+    Index m_rows;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
+    {
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_rows = other.m_rows;
+      }
+      return *this; 
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// dynamic-size matrix with fixed-size storage and fixed height
+template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
+{
+    internal::plain_array<T,Size,_Options> m_data;
+    Index m_cols;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        m_data = other.m_data;
+        m_cols = other.m_cols;
+      }
+      return *this;
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+    void resize(Index, Index, Index cols) { m_cols = cols; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
+};
+
+// purely dynamic matrix.
+template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
+{
+    T *m_data;
+    Index m_rows;
+    Index m_cols;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
+       : m_data(0), m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+      , m_rows(other.m_rows)
+      , m_cols(other.m_cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
+      internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+      : m_data(std::move(other.m_data))
+      , m_rows(std::move(other.m_rows))
+      , m_cols(std::move(other.m_cols))
+    {
+      other.m_data = nullptr;
+      other.m_rows = 0;
+      other.m_cols = 0;
+    }
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_rows, other.m_rows);
+      swap(m_cols, other.m_cols);
+      return *this;
+    }
+#endif
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
+    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    void conservativeResize(Index size, Index rows, Index cols)
+    {
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
+      m_rows = rows;
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
+    {
+      if(size != m_rows*m_cols)
+      {
+        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
+          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+        else
+          m_data = 0;
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      }
+      m_rows = rows;
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+// matrix with dynamic width and fixed height (so that matrix has dynamic size).
+template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
+{
+    T *m_data;
+    Index m_cols;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
+      EIGEN_UNUSED_VARIABLE(rows);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+      , m_cols(other.m_cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
+      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }    
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+      : m_data(std::move(other.m_data))
+      , m_cols(std::move(other.m_cols))
+    {
+      other.m_data = nullptr;
+      other.m_cols = 0;
+    }
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_cols, other.m_cols);
+      return *this;
+    }
+#endif
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
+    {
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
+    {
+      if(size != _Rows*m_cols)
+      {
+        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
+          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+        else
+          m_data = 0;
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      }
+      m_cols = cols;
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+// matrix with dynamic height and fixed width (so that matrix has dynamic size).
+template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
+{
+    T *m_data;
+    Index m_rows;
+  public:
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
+      EIGEN_UNUSED_VARIABLE(cols);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+      , m_rows(other.m_rows)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
+      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }    
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
+      : m_data(std::move(other.m_data))
+      , m_rows(std::move(other.m_rows))
+    {
+      other.m_data = nullptr;
+      other.m_rows = 0;
+    }
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
+    {
+      using std::swap;
+      swap(m_data, other.m_data);
+      swap(m_rows, other.m_rows);
+      return *this;
+    }
+#endif
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    void conservativeResize(Index size, Index rows, Index)
+    {
+      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
+      m_rows = rows;
+    }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
+    {
+      if(size != m_rows*_Cols)
+      {
+        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
+          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
+        else
+          m_data = 0;
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      }
+      m_rows = rows;
+    }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_H
diff --git a/third-party/Eigen/src/Core/Diagonal.h b/third-party/Eigen/src/Core/Diagonal.h
new file mode 100644
index 00000000..afcaf357
--- /dev/null
+++ b/third-party/Eigen/src/Core/Diagonal.h
@@ -0,0 +1,260 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONAL_H
+#define EIGEN_DIAGONAL_H
+
+namespace Eigen { 
+
+/** \class Diagonal
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix
+  *
+  * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
+  * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
+  *              A positive value means a superdiagonal, a negative value means a subdiagonal.
+  *              You can also use DynamicIndex so the index can be set at runtime.
+  *
+  * The matrix is not required to be square.
+  *
+  * This class represents an expression of the main diagonal, or any sub/super diagonal
+  * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the
+  * time this is the only way it is used.
+  *
+  * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index)
+  */
+
+namespace internal {
+template<typename MatrixType, int DiagIndex>
+struct traits<Diagonal<MatrixType,DiagIndex> >
+ : traits<MatrixType>
+{
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  typedef typename MatrixType::StorageKind StorageKind;
+  enum {
+    RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic
+                      : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
+                                              MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+    ColsAtCompileTime = 1,
+    MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic
+                         : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime,
+                                                                              MatrixType::MaxColsAtCompileTime)
+                         : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
+                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
+    MaxColsAtCompileTime = 1,
+    MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
+    MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
+    InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
+    OuterStrideAtCompileTime = 0
+  };
+};
+}
+
+template<typename MatrixType, int _DiagIndex> class Diagonal
+   : public internal::dense_xpr_base< Diagonal<MatrixType,_DiagIndex> >::type
+{
+  public:
+
+    enum { DiagIndex = _DiagIndex };
+    typedef typename internal::dense_xpr_base<Diagonal>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
+
+    EIGEN_DEVICE_FUNC
+    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
+    {
+      eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
+    }
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const
+    {
+      return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
+                               : numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return 1; }
+
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const
+    {
+      return m_matrix.outerStride() + 1;
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const
+    {
+      return 0;
+    }
+
+    typedef typename internal::conditional<
+                       internal::is_lvalue<MatrixType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index row, Index)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index row, Index) const
+    {
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline CoeffReturnType coeff(Index row, Index) const
+    {
+      return m_matrix.coeff(row+rowOffset(), row+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index idx)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index idx) const
+    {
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline CoeffReturnType coeff(Index idx) const
+    {
+      return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    nestedExpression() const 
+    {
+      return m_matrix;
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index index() const
+    {
+      return m_index.value();
+    }
+
+  protected:
+    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
+    const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
+
+  private:
+    // some compilers may fail to optimize std::max etc in case of compile-time constants...
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
+    // trigger a compile-time error if someone try to call packet
+    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
+    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
+};
+
+/** \returns an expression of the main diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * Example: \include MatrixBase_diagonal.cpp
+  * Output: \verbinclude MatrixBase_diagonal.out
+  *
+  * \sa class Diagonal */
+template<typename Derived>
+inline typename MatrixBase<Derived>::DiagonalReturnType
+MatrixBase<Derived>::diagonal()
+{
+  return DiagonalReturnType(derived());
+}
+
+/** This is the const version of diagonal(). */
+template<typename Derived>
+inline typename MatrixBase<Derived>::ConstDiagonalReturnType
+MatrixBase<Derived>::diagonal() const
+{
+  return ConstDiagonalReturnType(derived());
+}
+
+/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
+  * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
+  *
+  * Example: \include MatrixBase_diagonal_int.cpp
+  * Output: \verbinclude MatrixBase_diagonal_int.out
+  *
+  * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
+MatrixBase<Derived>::diagonal(Index index)
+{
+  return DiagonalDynamicIndexReturnType(derived(), index);
+}
+
+/** This is the const version of diagonal(Index). */
+template<typename Derived>
+inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
+MatrixBase<Derived>::diagonal(Index index) const
+{
+  return ConstDiagonalDynamicIndexReturnType(derived(), index);
+}
+
+/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this
+  *
+  * \c *this is not required to be square.
+  *
+  * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0
+  * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal.
+  *
+  * Example: \include MatrixBase_diagonal_template_int.cpp
+  * Output: \verbinclude MatrixBase_diagonal_template_int.out
+  *
+  * \sa MatrixBase::diagonal(), class Diagonal */
+template<typename Derived>
+template<int Index_>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
+MatrixBase<Derived>::diagonal()
+{
+  return typename DiagonalIndexReturnType<Index_>::Type(derived());
+}
+
+/** This is the const version of diagonal<int>(). */
+template<typename Derived>
+template<int Index_>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
+MatrixBase<Derived>::diagonal() const
+{
+  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONAL_H
diff --git a/third-party/Eigen/src/Core/DiagonalMatrix.h b/third-party/Eigen/src/Core/DiagonalMatrix.h
new file mode 100644
index 00000000..ecfdce8e
--- /dev/null
+++ b/third-party/Eigen/src/Core/DiagonalMatrix.h
@@ -0,0 +1,343 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONALMATRIX_H
+#define EIGEN_DIAGONALMATRIX_H
+
+namespace Eigen { 
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename Derived>
+class DiagonalBase : public EigenBase<Derived>
+{
+  public:
+    typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
+    typedef typename DiagonalVectorType::Scalar Scalar;
+    typedef typename DiagonalVectorType::RealScalar RealScalar;
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+
+    enum {
+      RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+      ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+      MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+      MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+      IsVectorAtCompileTime = 0,
+      Flags = NoPreferredStorageOrderBit
+    };
+
+    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
+    typedef DenseMatrixType DenseType;
+    typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
+
+    EIGEN_DEVICE_FUNC
+    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC
+    inline Derived& derived() { return *static_cast<Derived*>(this); }
+
+    EIGEN_DEVICE_FUNC
+    DenseMatrixType toDenseMatrix() const { return derived(); }
+    
+    EIGEN_DEVICE_FUNC
+    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+    EIGEN_DEVICE_FUNC
+    inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return diagonal().size(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return diagonal().size(); }
+
+    template<typename MatrixDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived,MatrixDerived,LazyProduct>
+    operator*(const MatrixBase<MatrixDerived> &matrix) const
+    {
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
+    }
+
+    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
+    EIGEN_DEVICE_FUNC
+    inline const InverseReturnType
+    inverse() const
+    {
+      return InverseReturnType(diagonal().cwiseInverse());
+    }
+    
+    EIGEN_DEVICE_FUNC
+    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
+    operator*(const Scalar& scalar) const
+    {
+      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
+    }
+    EIGEN_DEVICE_FUNC
+    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
+    operator*(const Scalar& scalar, const DiagonalBase& other)
+    {
+      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
+    }
+};
+
+#endif
+
+/** \class DiagonalMatrix
+  * \ingroup Core_Module
+  *
+  * \brief Represents a diagonal matrix with its storage
+  *
+  * \param _Scalar the type of coefficients
+  * \param SizeAtCompileTime the dimension of the matrix, or Dynamic
+  * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
+  *        to SizeAtCompileTime. Most of the time, you do not need to specify it.
+  *
+  * \sa class DiagonalWrapper
+  */
+
+namespace internal {
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
+ : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
+  typedef DiagonalShape StorageKind;
+  enum {
+    Flags = LvalueBit | NoPreferredStorageOrderBit
+  };
+};
+}
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class DiagonalMatrix
+  : public DiagonalBase<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+  public:
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
+    typedef const DiagonalMatrix& Nested;
+    typedef _Scalar Scalar;
+    typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
+    typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
+    #endif
+
+  protected:
+
+    DiagonalVectorType m_diagonal;
+
+  public:
+
+    /** const version of diagonal(). */
+    EIGEN_DEVICE_FUNC
+    inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
+    /** \returns a reference to the stored vector of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
+    inline DiagonalVectorType& diagonal() { return m_diagonal; }
+
+    /** Default constructor without initialization */
+    EIGEN_DEVICE_FUNC
+    inline DiagonalMatrix() {}
+
+    /** Constructs a diagonal matrix with given dimension  */
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+
+    /** 2D constructor. */
+    EIGEN_DEVICE_FUNC
+    inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
+
+    /** 3D constructor. */
+    EIGEN_DEVICE_FUNC
+    inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
+
+    /** Copy constructor. */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
+    inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {}
+    #endif
+
+    /** generic constructor from expression of the diagonal coefficients */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
+    {}
+
+    /** Copy operator. */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
+    {
+      m_diagonal = other.diagonal();
+      return *this;
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    DiagonalMatrix& operator=(const DiagonalMatrix& other)
+    {
+      m_diagonal = other.diagonal();
+      return *this;
+    }
+    #endif
+
+    /** Resizes to given size. */
+    EIGEN_DEVICE_FUNC
+    inline void resize(Index size) { m_diagonal.resize(size); }
+    /** Sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
+    inline void setZero() { m_diagonal.setZero(); }
+    /** Resizes and sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
+    inline void setZero(Index size) { m_diagonal.setZero(size); }
+    /** Sets this matrix to be the identity matrix of the current size. */
+    EIGEN_DEVICE_FUNC
+    inline void setIdentity() { m_diagonal.setOnes(); }
+    /** Sets this matrix to be the identity matrix of the given size. */
+    EIGEN_DEVICE_FUNC
+    inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
+};
+
+/** \class DiagonalWrapper
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a diagonal matrix
+  *
+  * \param _DiagonalVectorType the type of the vector of diagonal coefficients
+  *
+  * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
+  * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
+  * and most of the time this is the only way that it is used.
+  *
+  * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
+  */
+
+namespace internal {
+template<typename _DiagonalVectorType>
+struct traits<DiagonalWrapper<_DiagonalVectorType> >
+{
+  typedef _DiagonalVectorType DiagonalVectorType;
+  typedef typename DiagonalVectorType::Scalar Scalar;
+  typedef typename DiagonalVectorType::StorageIndex StorageIndex;
+  typedef DiagonalShape StorageKind;
+  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
+  };
+};
+}
+
+template<typename _DiagonalVectorType>
+class DiagonalWrapper
+  : public DiagonalBase<DiagonalWrapper<_DiagonalVectorType> >, internal::no_assignment_operator
+{
+  public:
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef _DiagonalVectorType DiagonalVectorType;
+    typedef DiagonalWrapper Nested;
+    #endif
+
+    /** Constructor from expression of diagonal coefficients to wrap. */
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+
+    /** \returns a const reference to the wrapped expression of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
+    const DiagonalVectorType& diagonal() const { return m_diagonal; }
+
+  protected:
+    typename DiagonalVectorType::Nested m_diagonal;
+};
+
+/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
+  *
+  * \only_for_vectors
+  *
+  * Example: \include MatrixBase_asDiagonal.cpp
+  * Output: \verbinclude MatrixBase_asDiagonal.out
+  *
+  * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
+  **/
+template<typename Derived>
+inline const DiagonalWrapper<const Derived>
+MatrixBase<Derived>::asDiagonal() const
+{
+  return DiagonalWrapper<const Derived>(derived());
+}
+
+/** \returns true if *this is approximately equal to a diagonal matrix,
+  *          within the precision given by \a prec.
+  *
+  * Example: \include MatrixBase_isDiagonal.cpp
+  * Output: \verbinclude MatrixBase_isDiagonal.out
+  *
+  * \sa asDiagonal()
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
+{
+  if(cols() != rows()) return false;
+  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
+  for(Index j = 0; j < cols(); ++j)
+  {
+    RealScalar absOnDiagonal = numext::abs(coeff(j,j));
+    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
+  }
+  for(Index j = 0; j < cols(); ++j)
+    for(Index i = 0; i < j; ++i)
+    {
+      if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
+      if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
+    }
+  return true;
+}
+
+namespace internal {
+
+template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
+
+struct Diagonal2Dense {};
+
+template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
+
+// Diagonal matrix to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    dst.setZero();
+    dst.diagonal() = src.diagonal();
+  }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.diagonal() += src.diagonal(); }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.diagonal() -= src.diagonal(); }
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONALMATRIX_H
diff --git a/third-party/Eigen/src/Core/DiagonalProduct.h b/third-party/Eigen/src/Core/DiagonalProduct.h
new file mode 100644
index 00000000..7911d1cd
--- /dev/null
+++ b/third-party/Eigen/src/Core/DiagonalProduct.h
@@ -0,0 +1,28 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DIAGONALPRODUCT_H
+#define EIGEN_DIAGONALPRODUCT_H
+
+namespace Eigen { 
+
+/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
+  */
+template<typename Derived>
+template<typename DiagonalDerived>
+EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct>
+MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
+{
+  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DIAGONALPRODUCT_H
diff --git a/third-party/Eigen/src/Core/Dot.h b/third-party/Eigen/src/Core/Dot.h
new file mode 100644
index 00000000..24d7bb80
--- /dev/null
+++ b/third-party/Eigen/src/Core/Dot.h
@@ -0,0 +1,318 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DOT_H
+#define EIGEN_DOT_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot
+// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE
+// looking at the static assertions. Thus this is a trick to get better compile errors.
+template<typename T, typename U,
+// the NeedToTranspose condition here is taken straight from Assign.h
+         bool NeedToTranspose = T::IsVectorAtCompileTime
+                && U::IsVectorAtCompileTime
+                && ((int(T::RowsAtCompileTime) == 1 && int(U::ColsAtCompileTime) == 1)
+                      |  // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
+                         // revert to || as soon as not needed anymore.
+                    (int(T::ColsAtCompileTime) == 1 && int(U::RowsAtCompileTime) == 1))
+>
+struct dot_nocheck
+{
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE
+  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  {
+    return a.template binaryExpr<conj_prod>(b).sum();
+  }
+};
+
+template<typename T, typename U>
+struct dot_nocheck<T, U, true>
+{
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE
+  static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+  {
+    return a.transpose().template binaryExpr<conj_prod>(b).sum();
+  }
+};
+
+} // end namespace internal
+
+/** \fn MatrixBase::dot
+  * \returns the dot product of *this with other.
+  *
+  * \only_for_vectors
+  *
+  * \note If the scalar type is complex numbers, then this function returns the hermitian
+  * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the
+  * second variable.
+  *
+  * \sa squaredNorm(), norm()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE
+typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
+  typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
+  EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
+#endif
+  
+  eigen_assert(size() == other.size());
+
+  return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
+}
+
+//---------- implementation of L2 norm and related functions ----------
+
+/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
+  * In both cases, it consists in the sum of the square of all the matrix entries.
+  * For vectors, this is also equals to the dot product of \c *this with itself.
+  *
+  * \sa dot(), norm(), lpNorm()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
+{
+  return numext::real((*this).cwiseAbs2().sum());
+}
+
+/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
+  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
+  * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
+  *
+  * \sa lpNorm(), dot(), squaredNorm()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+{
+  return numext::sqrt(squaredNorm());
+}
+
+/** \returns an expression of the quotient of \c *this by its own norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0),
+  *          then this function returns a copy of the input.
+  *
+  * \only_for_vectors
+  *
+  * \sa norm(), normalize()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::normalized() const
+{
+  typedef typename internal::nested_eval<Derived,2>::type _Nested;
+  _Nested n(derived());
+  RealScalar z = n.squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    return n / numext::sqrt(z);
+  else
+    return n;
+}
+
+/** Normalizes the vector, i.e. divides it by its own norm.
+  *
+  * \only_for_vectors
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+  *
+  * \sa norm(), normalized()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
+{
+  RealScalar z = squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    derived() /= numext::sqrt(z);
+}
+
+/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
+  *
+  * \only_for_vectors
+  *
+  * This method is analogue to the normalized() method, but it reduces the risk of
+  * underflow and overflow when computing the norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0),
+  *          then this function returns a copy of the input.
+  *
+  * \sa stableNorm(), stableNormalize(), normalized()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::stableNormalized() const
+{
+  typedef typename internal::nested_eval<Derived,3>::type _Nested;
+  _Nested n(derived());
+  RealScalar w = n.cwiseAbs().maxCoeff();
+  RealScalar z = (n/w).squaredNorm();
+  if(z>RealScalar(0))
+    return n / (numext::sqrt(z)*w);
+  else
+    return n;
+}
+
+/** Normalizes the vector while avoid underflow and overflow
+  *
+  * \only_for_vectors
+  *
+  * This method is analogue to the normalize() method, but it reduces the risk of
+  * underflow and overflow when computing the norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+  *
+  * \sa stableNorm(), stableNormalized(), normalize()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
+{
+  RealScalar w = cwiseAbs().maxCoeff();
+  RealScalar z = (derived()/w).squaredNorm();
+  if(z>RealScalar(0))
+    derived() /= numext::sqrt(z)*w;
+}
+
+//---------- implementation of other norms ----------
+
+namespace internal {
+
+template<typename Derived, int p>
+struct lpNorm_selector
+{
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const MatrixBase<Derived>& m)
+  {
+    EIGEN_USING_STD_MATH(pow)
+    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
+  }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  {
+    return m.cwiseAbs().sum();
+  }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, 2>
+{
+  EIGEN_DEVICE_FUNC
+  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  {
+    return m.norm();
+  }
+};
+
+template<typename Derived>
+struct lpNorm_selector<Derived, Infinity>
+{
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const MatrixBase<Derived>& m)
+  {
+    if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
+      return RealScalar(0);
+    return m.cwiseAbs().maxCoeff();
+  }
+};
+
+} // end namespace internal
+
+/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+  *          of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
+  *          norm, that is the maximum of the absolute values of the coefficients of \c *this.
+  *
+  * In all cases, if \c *this is empty, then the value 0 is returned.
+  *
+  * \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
+  *
+  * \sa norm()
+  */
+template<typename Derived>
+template<int p>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+#else
+MatrixBase<Derived>::RealScalar
+#endif
+MatrixBase<Derived>::lpNorm() const
+{
+  return internal::lpNorm_selector<Derived, p>::run(*this);
+}
+
+//---------- implementation of isOrthogonal / isUnitary ----------
+
+/** \returns true if *this is approximately orthogonal to \a other,
+  *          within the precision given by \a prec.
+  *
+  * Example: \include MatrixBase_isOrthogonal.cpp
+  * Output: \verbinclude MatrixBase_isOrthogonal.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+bool MatrixBase<Derived>::isOrthogonal
+(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
+{
+  typename internal::nested_eval<Derived,2>::type nested(derived());
+  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
+  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
+}
+
+/** \returns true if *this is approximately an unitary matrix,
+  *          within the precision given by \a prec. In the case where the \a Scalar
+  *          type is real numbers, a unitary matrix is an orthogonal matrix, whence the name.
+  *
+  * \note This can be used to check whether a family of vectors forms an orthonormal basis.
+  *       Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an
+  *       orthonormal basis.
+  *
+  * Example: \include MatrixBase_isUnitary.cpp
+  * Output: \verbinclude MatrixBase_isUnitary.out
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
+{
+  typename internal::nested_eval<Derived,1>::type self(derived());
+  for(Index i = 0; i < cols(); ++i)
+  {
+    if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+      return false;
+    for(Index j = 0; j < i; ++j)
+      if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
+        return false;
+  }
+  return true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DOT_H
diff --git a/third-party/Eigen/src/Core/EigenBase.h b/third-party/Eigen/src/Core/EigenBase.h
new file mode 100644
index 00000000..b195506a
--- /dev/null
+++ b/third-party/Eigen/src/Core/EigenBase.h
@@ -0,0 +1,159 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
+
+namespace Eigen {
+
+/** \class EigenBase
+  * \ingroup Core_Module
+  * 
+  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+  *
+  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
+  *
+  * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
+  *
+  * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
+  *
+  * \sa \blank \ref TopicClassHierarchy
+  */
+template<typename Derived> struct EigenBase
+{
+//   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
+  
+  /** \brief The interface type of indices
+    * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+    * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
+    * \sa StorageIndex, \ref TopicPreprocessorDirectives.
+    */
+  typedef Eigen::Index Index;
+
+  // FIXME is it needed?
+  typedef typename internal::traits<Derived>::StorageKind StorageKind;
+
+  /** \returns a reference to the derived object */
+  EIGEN_DEVICE_FUNC
+  Derived& derived() { return *static_cast<Derived*>(this); }
+  /** \returns a const reference to the derived object */
+  EIGEN_DEVICE_FUNC
+  const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+  EIGEN_DEVICE_FUNC
+  inline Derived& const_cast_derived() const
+  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+  EIGEN_DEVICE_FUNC
+  inline const Derived& const_derived() const
+  { return *static_cast<const Derived*>(this); }
+
+  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
+  EIGEN_DEVICE_FUNC
+  inline Index rows() const { return derived().rows(); }
+  /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
+  EIGEN_DEVICE_FUNC
+  inline Index cols() const { return derived().cols(); }
+  /** \returns the number of coefficients, which is rows()*cols().
+    * \sa rows(), cols(), SizeAtCompileTime. */
+  EIGEN_DEVICE_FUNC
+  inline Index size() const { return rows() * cols(); }
+
+  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void evalTo(Dest& dst) const
+  { derived().evalTo(dst); }
+
+  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void addTo(Dest& dst) const
+  {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    typename Dest::PlainObject res(rows(),cols());
+    evalTo(res);
+    dst += res;
+  }
+
+  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void subTo(Dest& dst) const
+  {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    typename Dest::PlainObject res(rows(),cols());
+    evalTo(res);
+    dst -= res;
+  }
+
+  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
+  {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    dst = dst * this->derived();
+  }
+
+  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
+  {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    dst = this->derived() * dst;
+  }
+
+};
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \brief Copies the generic expression \a other into *this.
+  *
+  * \details The expression must provide a (templated) evalTo(Derived& dst) const
+  * function which does the actual job. In practice, this allows any user to write
+  * its own special matrix without having to modify MatrixBase
+  *
+  * \returns a reference to *this.
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
+{
+  call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
+{
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
+{
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_EIGENBASE_H
diff --git a/third-party/Eigen/src/Core/ForceAlignedAccess.h b/third-party/Eigen/src/Core/ForceAlignedAccess.h
new file mode 100644
index 00000000..7b08b45e
--- /dev/null
+++ b/third-party/Eigen/src/Core/ForceAlignedAccess.h
@@ -0,0 +1,146 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FORCEALIGNEDACCESS_H
+#define EIGEN_FORCEALIGNEDACCESS_H
+
+namespace Eigen {
+
+/** \class ForceAlignedAccess
+  * \ingroup Core_Module
+  *
+  * \brief Enforce aligned packet loads and stores regardless of what is requested
+  *
+  * \param ExpressionType the type of the object of which we are forcing aligned packet access
+  *
+  * This class is the return type of MatrixBase::forceAlignedAccess()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::forceAlignedAccess()
+  */
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<ForceAlignedAccess<ExpressionType> > : public traits<ExpressionType>
+{};
+}
+
+template<typename ExpressionType> class ForceAlignedAccess
+  : public internal::dense_xpr_base< ForceAlignedAccess<ExpressionType> >::type
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
+
+    EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
+
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
+    {
+      return m_expression.coeff(row, col);
+    }
+
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
+    {
+      return m_expression.const_cast_derived().coeffRef(row, col);
+    }
+
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
+    {
+      return m_expression.coeff(index);
+    }
+
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
+    {
+      return m_expression.const_cast_derived().coeffRef(index);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index row, Index col) const
+    {
+      return m_expression.template packet<Aligned>(row, col);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
+    {
+      m_expression.const_cast_derived().template writePacket<Aligned>(row, col, x);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index index) const
+    {
+      return m_expression.template packet<Aligned>(index);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& x)
+    {
+      m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
+    }
+
+    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
+
+  protected:
+    const ExpressionType& m_expression;
+
+  private:
+    ForceAlignedAccess& operator=(const ForceAlignedAccess&);
+};
+
+/** \returns an expression of *this with forced aligned access
+  * \sa forceAlignedAccessIf(),class ForceAlignedAccess
+  */
+template<typename Derived>
+inline const ForceAlignedAccess<Derived>
+MatrixBase<Derived>::forceAlignedAccess() const
+{
+  return ForceAlignedAccess<Derived>(derived());
+}
+
+/** \returns an expression of *this with forced aligned access
+  * \sa forceAlignedAccessIf(), class ForceAlignedAccess
+  */
+template<typename Derived>
+inline ForceAlignedAccess<Derived>
+MatrixBase<Derived>::forceAlignedAccess()
+{
+  return ForceAlignedAccess<Derived>(derived());
+}
+
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+  * \sa forceAlignedAccess(), class ForceAlignedAccess
+  */
+template<typename Derived>
+template<bool Enable>
+inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
+MatrixBase<Derived>::forceAlignedAccessIf() const
+{
+  return derived();  // FIXME This should not work but apparently is never used
+}
+
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+  * \sa forceAlignedAccess(), class ForceAlignedAccess
+  */
+template<typename Derived>
+template<bool Enable>
+inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
+MatrixBase<Derived>::forceAlignedAccessIf()
+{
+  return derived();  // FIXME This should not work but apparently is never used
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FORCEALIGNEDACCESS_H
diff --git a/third-party/Eigen/src/Core/Fuzzy.h b/third-party/Eigen/src/Core/Fuzzy.h
new file mode 100644
index 00000000..3e403a09
--- /dev/null
+++ b/third-party/Eigen/src/Core/Fuzzy.h
@@ -0,0 +1,155 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FUZZY_H
+#define EIGEN_FUZZY_H
+
+namespace Eigen { 
+
+namespace internal
+{
+
+template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isApprox_selector
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+  {
+    typename internal::nested_eval<Derived,2>::type nested(x);
+    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+  }
+};
+
+template<typename Derived, typename OtherDerived>
+struct isApprox_selector<Derived, OtherDerived, true>
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
+  {
+    return x.matrix() == y.matrix();
+  }
+};
+
+template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_object_selector
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
+  {
+    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
+  }
+};
+
+template<typename Derived, typename OtherDerived>
+struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
+  {
+    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+  }
+};
+
+template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_scalar_selector
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
+  {
+    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
+  }
+};
+
+template<typename Derived>
+struct isMuchSmallerThan_scalar_selector<Derived, true>
+{
+  EIGEN_DEVICE_FUNC
+  static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
+  {
+    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+  }
+};
+
+} // end namespace internal
+
+
+/** \returns \c true if \c *this is approximately equal to \a other, within the precision
+  * determined by \a prec.
+  *
+  * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
+  * are considered to be approximately equal within precision \f$ p \f$ if
+  * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
+  * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm
+  * L2 norm).
+  *
+  * \note Because of the multiplicativeness of this comparison, one can't use this function
+  * to check whether \c *this is approximately equal to the zero matrix or vector.
+  * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
+  * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
+  * RealScalar&, RealScalar) instead.
+  *
+  * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
+  */
+template<typename Derived>
+template<typename OtherDerived>
+bool DenseBase<Derived>::isApprox(
+  const DenseBase<OtherDerived>& other,
+  const RealScalar& prec
+) const
+{
+  return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+
+/** \returns \c true if the norm of \c *this is much smaller than \a other,
+  * within the precision determined by \a prec.
+  *
+  * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+  * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
+  * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
+  *
+  * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason,
+  * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm
+  * of a reference matrix of same dimensions.
+  *
+  * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
+  */
+template<typename Derived>
+bool DenseBase<Derived>::isMuchSmallerThan(
+  const typename NumTraits<Scalar>::Real& other,
+  const RealScalar& prec
+) const
+{
+  return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
+}
+
+/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
+  * within the precision determined by \a prec.
+  *
+  * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+  * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
+  * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
+  * For matrices, the comparison is done using the Hilbert-Schmidt norm.
+  *
+  * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
+  */
+template<typename Derived>
+template<typename OtherDerived>
+bool DenseBase<Derived>::isMuchSmallerThan(
+  const DenseBase<OtherDerived>& other,
+  const RealScalar& prec
+) const
+{
+  return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUZZY_H
diff --git a/third-party/Eigen/src/Core/GeneralProduct.h b/third-party/Eigen/src/Core/GeneralProduct.h
new file mode 100644
index 00000000..6f0cc80e
--- /dev/null
+++ b/third-party/Eigen/src/Core/GeneralProduct.h
@@ -0,0 +1,455 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+
+namespace Eigen {
+
+enum {
+  Large = 2,
+  Small = 3
+};
+
+namespace internal {
+
+template<int Rows, int Cols, int Depth> struct product_type_selector;
+
+template<int Size, int MaxSize> struct product_size_category
+{
+  enum {
+    #ifndef EIGEN_CUDA_ARCH
+    is_large = MaxSize == Dynamic ||
+               Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+               (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+    #else
+    is_large = 0,
+    #endif
+    value = is_large  ? Large
+          : Size == 1 ? 1
+                      : Small
+  };
+};
+
+template<typename Lhs, typename Rhs> struct product_type
+{
+  typedef typename remove_all<Lhs>::type _Lhs;
+  typedef typename remove_all<Rhs>::type _Rhs;
+  enum {
+    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+    Rows    = traits<_Lhs>::RowsAtCompileTime,
+    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+    Cols    = traits<_Rhs>::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+                                           traits<_Rhs>::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+                                        traits<_Rhs>::RowsAtCompileTime)
+  };
+
+  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
+  // is to work around an internal compiler error with gcc 4.1 and 4.2.
+private:
+  enum {
+    rows_select = product_size_category<Rows,MaxRows>::value,
+    cols_select = product_size_category<Cols,MaxCols>::value,
+    depth_select = product_size_category<Depth,MaxDepth>::value
+  };
+  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+
+public:
+  enum {
+    value = selector::ret,
+    ret = selector::ret
+  };
+#ifdef EIGEN_DEBUG_PRODUCT
+  static void debug()
+  {
+      EIGEN_DEBUG_VAR(Rows);
+      EIGEN_DEBUG_VAR(Cols);
+      EIGEN_DEBUG_VAR(Depth);
+      EIGEN_DEBUG_VAR(rows_select);
+      EIGEN_DEBUG_VAR(cols_select);
+      EIGEN_DEBUG_VAR(depth_select);
+      EIGEN_DEBUG_VAR(value);
+  }
+#endif
+};
+
+/* The following allows to select the kind of product at compile time
+ * based on the three dimensions of the product.
+ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
+// FIXME I'm not sure the current mapping is the ideal one.
+template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
+template<int M>         struct product_type_selector<M, 1, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
+template<int N>         struct product_type_selector<1, N, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
+template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
+template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small, Large, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
+template<>              struct product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };
+
+} // end namespace internal
+
+/***********************************************************************
+*  Implementation of Inner Vector Vector Product
+***********************************************************************/
+
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
+
+/***********************************************************************
+*  Implementation of Outer Vector Vector Product
+***********************************************************************/
+
+/***********************************************************************
+*  Implementation of General Matrix Vector Product
+***********************************************************************/
+
+/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
+ *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
+ *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
+ *   3 - all other cases are handled using a simple loop along the outer-storage direction.
+ *  Therefore we need a lower level meta selector.
+ *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
+ */
+namespace internal {
+
+template<int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector;
+
+} // end namespace internal
+
+namespace internal {
+
+template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
+{
+  EIGEN_STRONG_INLINE  Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+};
+
+template<typename Scalar,int Size>
+struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
+{
+  EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+};
+
+template<typename Scalar,int Size,int MaxSize>
+struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
+{
+  enum {
+    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
+    PacketSize      = internal::packet_traits<Scalar>::size
+  };
+  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+  #else
+  // Some architectures cannot align on the stack,
+  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() {
+    return ForceAlignment
+            ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
+            : m_data.array;
+  }
+  #endif
+};
+
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+  }
+};
+
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    typedef typename Dest::RealScalar  RealScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  
+    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+
+    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
+
+    // make sure Dest is a compile-time vector type (bug 1166)
+    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
+      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+      MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
+    };
+
+    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+    if(!MightCannotUseDest)
+    {
+      // shortcut if we are sure to be able to use dest directly,
+      // this ease the compiler to generate cleaner and more optimzized code for most common cases
+      general_matrix_vector_product
+          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+          actualLhs.rows(), actualLhs.cols(),
+          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+          dest.data(), 1,
+          compatibleAlpha);
+    }
+    else
+    {
+      gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+      const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+      const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+
+      ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                    evalToDest ? dest.data() : static_dest.data());
+
+      if(!evalToDest)
+      {
+        #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+        Index size = dest.size();
+        EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+        #endif
+        if(!alphaIsCompatible)
+        {
+          MappedDest(actualDestPtr, dest.size()).setZero();
+          compatibleAlpha = RhsScalar(1);
+        }
+        else
+          MappedDest(actualDestPtr, dest.size()) = dest;
+      }
+
+      general_matrix_vector_product
+          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+          actualLhs.rows(), actualLhs.cols(),
+          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+          actualDestPtr, 1,
+          compatibleAlpha);
+
+      if (!evalToDest)
+      {
+        if(!alphaIsCompatible)
+          dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
+        else
+          dest = MappedDest(actualDestPtr, dest.size());
+      }
+    }
+  }
+};
+
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+    };
+
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+    if(!DirectlyUseRhs)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      Index size = actualRhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    }
+
+    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+    general_matrix_vector_product
+        <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+        actualLhs.rows(), actualLhs.cols(),
+        LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+        RhsMapper(actualRhsPtr, 1),
+        dest.data(), dest.col(0).innerStride(), //NOTE  if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
+        actualAlpha);
+  }
+};
+
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
+    typename nested_eval<Rhs,1>::type actual_rhs(rhs);
+    const Index size = rhs.rows();
+    for(Index k=0; k<size; ++k)
+      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
+  }
+};
+
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
+    const Index rows = dest.rows();
+    for(Index i=0; i<rows; ++i)
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
+  }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** \returns the matrix product of \c *this and \a other.
+  *
+  * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
+  *
+  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline const Product<Derived, OtherDerived>
+MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
+{
+  // A note regarding the function declaration: In MSVC, this function will sometimes
+  // not be inlined since DenseStorage is an unwindable object for dynamic
+  // matrices and product types are holding a member to store the result.
+  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+  internal::product_type<Derived,OtherDerived>::debug();
+#endif
+
+  return Product<Derived, OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
+  *
+  * The returned product will behave like any other expressions: the coefficients of the product will be
+  * computed once at a time as requested. This might be useful in some extremely rare cases when only
+  * a small and no coherent fraction of the result's coefficients have to be computed.
+  *
+  * \warning This version of the matrix product can be much much slower. So use it only if you know
+  * what you are doing and that you measured a true speed improvement.
+  *
+  * \sa operator*(const MatrixBase&)
+  */
+template<typename Derived>
+template<typename OtherDerived>
+const Product<Derived,OtherDerived,LazyProduct>
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+{
+  enum {
+    ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
+                   || OtherDerived::RowsAtCompileTime==Dynamic
+                   || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes),
+    INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+
+  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_H
diff --git a/third-party/Eigen/src/Core/GenericPacketMath.h b/third-party/Eigen/src/Core/GenericPacketMath.h
new file mode 100644
index 00000000..4cf4120a
--- /dev/null
+++ b/third-party/Eigen/src/Core/GenericPacketMath.h
@@ -0,0 +1,590 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERIC_PACKET_MATH_H
+#define EIGEN_GENERIC_PACKET_MATH_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+  * \file GenericPacketMath.h
+  *
+  * Default implementation for types not supported by the vectorization.
+  * In practice these functions are provided to make easier the writing
+  * of generic vectorized code.
+  */
+
+#ifndef EIGEN_DEBUG_ALIGNED_LOAD
+#define EIGEN_DEBUG_ALIGNED_LOAD
+#endif
+
+#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
+#define EIGEN_DEBUG_UNALIGNED_LOAD
+#endif
+
+#ifndef EIGEN_DEBUG_ALIGNED_STORE
+#define EIGEN_DEBUG_ALIGNED_STORE
+#endif
+
+#ifndef EIGEN_DEBUG_UNALIGNED_STORE
+#define EIGEN_DEBUG_UNALIGNED_STORE
+#endif
+
+struct default_packet_traits
+{
+  enum {
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasNegate = 1,
+    HasAbs    = 1,
+    HasArg    = 0,
+    HasAbs2   = 1,
+    HasMin    = 1,
+    HasMax    = 1,
+    HasConj   = 1,
+    HasSetLinear = 1,
+    HasBlend  = 0,
+
+    HasDiv    = 0,
+    HasSqrt   = 0,
+    HasRsqrt  = 0,
+    HasExp    = 0,
+    HasLog    = 0,
+    HasLog1p  = 0,
+    HasLog10  = 0,
+    HasPow    = 0,
+
+    HasSin    = 0,
+    HasCos    = 0,
+    HasTan    = 0,
+    HasASin   = 0,
+    HasACos   = 0,
+    HasATan   = 0,
+    HasSinh   = 0,
+    HasCosh   = 0,
+    HasTanh   = 0,
+    HasLGamma = 0,
+    HasDiGamma = 0,
+    HasZeta = 0,
+    HasPolygamma = 0,
+    HasErf = 0,
+    HasErfc = 0,
+    HasIGamma = 0,
+    HasIGammac = 0,
+    HasBetaInc = 0,
+
+    HasRound  = 0,
+    HasFloor  = 0,
+    HasCeil   = 0,
+
+    HasSign   = 0
+  };
+};
+
+template<typename T> struct packet_traits : default_packet_traits
+{
+  typedef T type;
+  typedef T half;
+  enum {
+    Vectorizable = 0,
+    size = 1,
+    AlignedOnScalar = 0,
+    HasHalfPacket = 0
+  };
+  enum {
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasConj   = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+
+template <typename Src, typename Tgt> struct type_casting_traits {
+  enum {
+    VectorizedCast = 0,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+
+/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+  return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
+  return static_cast<TgtPacket>(a);
+}
+
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
+  return static_cast<TgtPacket>(a);
+}
+
+/** \internal \returns a + b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+padd(const Packet& a,
+        const Packet& b) { return a+b; }
+
+/** \internal \returns a - b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+psub(const Packet& a,
+        const Packet& b) { return a-b; }
+
+/** \internal \returns -a (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pnegate(const Packet& a) { return -a; }
+
+/** \internal \returns conj(a) (coeff-wise) */
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pconj(const Packet& a) { return numext::conj(a); }
+
+/** \internal \returns a * b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmul(const Packet& a,
+        const Packet& b) { return a*b; }
+
+/** \internal \returns a / b (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pdiv(const Packet& a,
+        const Packet& b) { return a/b; }
+
+/** \internal \returns the min of \a a and \a b  (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmin(const Packet& a,
+        const Packet& b) { return numext::mini(a, b); }
+
+/** \internal \returns the max of \a a and \a b  (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmax(const Packet& a,
+        const Packet& b) { return numext::maxi(a, b); }
+
+/** \internal \returns the absolute value of \a a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pabs(const Packet& a) { using std::abs; return abs(a); }
+
+/** \internal \returns the phase angle of \a a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+parg(const Packet& a) { using numext::arg; return arg(a); }
+
+/** \internal \returns the bitwise and of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pand(const Packet& a, const Packet& b) { return a & b; }
+
+/** \internal \returns the bitwise or of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+por(const Packet& a, const Packet& b) { return a | b; }
+
+/** \internal \returns the bitwise xor of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pxor(const Packet& a, const Packet& b) { return a ^ b; }
+
+/** \internal \returns the bitwise andnot of \a a and \a b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pandnot(const Packet& a, const Packet& b) { return a & (!b); }
+
+/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet version of \a *from, (un-aligned load) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+
+/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
+
+/** \internal \returns a packet with elements of \a *from duplicated.
+  * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
+  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
+  * Currently, this function is only used for scalar * complex products.
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
+
+/** \internal \returns a packet with elements of \a *from quadrupled.
+  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
+  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
+  * Currently, this function is only used in matrix products.
+  * For packet-size smaller or equal to 4, this function is equivalent to pload1 
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadquad(const typename unpacket_traits<Packet>::type* from)
+{ return pload1<Packet>(from); }
+
+/** \internal equivalent to
+  * \code
+  * a0 = pload1(a+0);
+  * a1 = pload1(a+1);
+  * a2 = pload1(a+2);
+  * a3 = pload1(a+3);
+  * \endcode
+  * \sa pset1, pload1, ploaddup, pbroadcast2
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
+                        Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+  a0 = pload1<Packet>(a+0);
+  a1 = pload1<Packet>(a+1);
+  a2 = pload1<Packet>(a+2);
+  a3 = pload1<Packet>(a+3);
+}
+
+/** \internal equivalent to
+  * \code
+  * a0 = pload1(a+0);
+  * a1 = pload1(a+1);
+  * \endcode
+  * \sa pset1, pload1, ploaddup, pbroadcast4
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
+                        Packet& a0, Packet& a1)
+{
+  a0 = pload1<Packet>(a+0);
+  a1 = pload1<Packet>(a+1);
+}
+
+/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+plset(const typename unpacket_traits<Packet>::type& a) { return a; }
+
+/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
+{ (*to) = from; }
+
+/** \internal copy the packet \a from to \a *to, (un-aligned store) */
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{  (*to) = from; }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
+ { return ploadu<Packet>(from); }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
+ { pstore(to, from); }
+
+/** \internal tries to do cache prefetching of \a addr */
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
+{
+#ifdef __CUDA_ARCH__
+#if defined(__LP64__)
+  // 64-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+  // 32-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+  __builtin_prefetch(addr);
+#endif
+}
+
+/** \internal \returns the first element of a packet */
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+{ return a; }
+
+/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+preduxp(const Packet* vecs) { return vecs[0]; }
+
+/** \internal \returns the sum of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+{ return a; }
+
+/** \internal \returns the sum of the elements of \a a by block of 4 elements.
+  * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
+  * For packet-size smaller or equal to 4, this boils down to a noop.
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+predux_downto4(const Packet& a)
+{ return a; }
+
+/** \internal \returns the product of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+{ return a; }
+
+/** \internal \returns the min of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+{ return a; }
+
+/** \internal \returns the max of the elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+{ return a; }
+
+/** \internal \returns the reversed elements of \a a*/
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
+{ return a; }
+
+/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
+{
+  return Packet(a.imag(),a.real());
+}
+
+/**************************
+* Special math functions
+***************************/
+
+/** \internal \returns the sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psin(const Packet& a) { using std::sin; return sin(a); }
+
+/** \internal \returns the cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcos(const Packet& a) { using std::cos; return cos(a); }
+
+/** \internal \returns the tan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptan(const Packet& a) { using std::tan; return tan(a); }
+
+/** \internal \returns the arc sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pasin(const Packet& a) { using std::asin; return asin(a); }
+
+/** \internal \returns the arc cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pacos(const Packet& a) { using std::acos; return acos(a); }
+
+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { using std::atan; return atan(a); }
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
+
+/** \internal \returns the exp of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pexp(const Packet& a) { using std::exp; return exp(a); }
+
+/** \internal \returns the log of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog(const Packet& a) { using std::log; return log(a); }
+
+/** \internal \returns the log1p of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog1p(const Packet& a) { return numext::log1p(a); }
+
+/** \internal \returns the log10 of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog10(const Packet& a) { using std::log10; return log10(a); }
+
+/** \internal \returns the square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+
+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+  return pdiv(pset1<Packet>(1), psqrt(a));
+}
+
+/** \internal \returns the rounded value of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pround(const Packet& a) { using numext::round; return round(a); }
+
+/** \internal \returns the floor of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
+
+/** \internal \returns the ceil of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+
+/***************************************************************************
+* The following functions might not have to be overwritten for vectorized types
+***************************************************************************/
+
+/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
+// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
+template<typename Packet>
+inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
+{
+  pstore(to, pset1<Packet>(a));
+}
+
+/** \internal \returns a * b + c (coeff-wise) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pmadd(const Packet&  a,
+         const Packet&  b,
+         const Packet&  c)
+{ return padd(pmul(a, b),c); }
+
+/** \internal \returns a packet version of \a *from.
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+{
+  if(Alignment >= unpacket_traits<Packet>::alignment)
+    return pload<Packet>(from);
+  else
+    return ploadu<Packet>(from);
+}
+
+/** \internal copy the packet \a from to \a *to.
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
+{
+  if(Alignment >= unpacket_traits<Packet>::alignment)
+    pstore(to, from);
+  else
+    pstoreu(to, from);
+}
+
+/** \internal \returns a packet version of \a *from.
+  * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
+  * hardware if available to speedup the loading of data that won't be modified
+  * by the current computation.
+  */
+template<typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+{
+  return ploadt<Packet, LoadMode>(from);
+}
+
+/** \internal default implementation of palign() allowing partial specialization */
+template<int Offset,typename PacketType>
+struct palign_impl
+{
+  // by default data are aligned, so there is nothing to be done :)
+  static inline void run(PacketType&, const PacketType&) {}
+};
+
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+  * of \a first and \a Offset first elements of \a second.
+  * 
+  * This function is currently only used to optimize matrix-vector products on unligned matrices.
+  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+  * at the position \a Offset. For instance, for packets of 4 elements, we have:
+  *  Input:
+  *  - first = {f0,f1,f2,f3}
+  *  - second = {s0,s1,s2,s3}
+  * Output: 
+  *   - if Offset==0 then {f0,f1,f2,f3}
+  *   - if Offset==1 then {f1,f2,f3,s0}
+  *   - if Offset==2 then {f2,f3,s0,s1}
+  *   - if Offset==3 then {f3,s0,s1,s3}
+  */
+template<int Offset,typename PacketType>
+inline void palign(PacketType& first, const PacketType& second)
+{
+  palign_impl<Offset,PacketType>::run(first,second);
+}
+
+/***************************************************************************
+* Fast complex products (GCC generates a function call which is very slow)
+***************************************************************************/
+
+// Eigen+CUDA does not support complexes.
+#ifndef EIGEN_CUDACC
+
+template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
+{ return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
+
+template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
+{ return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
+
+#endif
+
+
+/***************************************************************************
+ * PacketBlock, that is a collection of N packets where the number of words
+ * in the packet is a multiple of N.
+***************************************************************************/
+template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
+  Packet packet[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
+  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
+}
+
+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+***************************************************************************/
+template <size_t N> struct Selector {
+  bool select[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+  return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+
+/** \internal \returns \a a with the first coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  mask.select[0] = true;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+    mask.select[i] = false;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
+/** \internal \returns \a a with the last coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+    mask.select[i] = false;
+  mask.select[unpacket_traits<Packet>::size-1] = true;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERIC_PACKET_MATH_H
diff --git a/third-party/Eigen/src/Core/GlobalFunctions.h b/third-party/Eigen/src/Core/GlobalFunctions.h
new file mode 100644
index 00000000..769dc255
--- /dev/null
+++ b/third-party/Eigen/src/Core/GlobalFunctions.h
@@ -0,0 +1,187 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GLOBAL_FUNCTIONS_H
+#define EIGEN_GLOBAL_FUNCTIONS_H
+
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
+  /** \returns an expression of the coefficient-wise DOC_OP of \a x
+
+    DOC_DETAILS
+
+    \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
+    */ \
+  template<typename Derived> \
+  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
+  NAME(const Eigen::ArrayBase<Derived>& x);
+
+#else
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
+  template<typename Derived> \
+  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
+  (NAME)(const Eigen::ArrayBase<Derived>& x) { \
+    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
+  }
+
+#endif // EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
+  \
+  template<typename Derived> \
+  struct NAME##_retval<ArrayBase<Derived> > \
+  { \
+    typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
+  }; \
+  template<typename Derived> \
+  struct NAME##_impl<ArrayBase<Derived> > \
+  { \
+    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
+    { \
+      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
+    } \
+  };
+
+namespace Eigen
+{
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
+  
+  /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
+    *
+    * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
+    *
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Derived,typename ScalarExponent>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
+#else
+  template<typename Derived,typename ScalarExponent>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
+          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
+    return x.derived().pow(exponent);
+  }
+
+  template<typename Derived>
+  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
+  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
+    return x.derived().pow(exponent);
+  }
+#endif
+
+  /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
+    *
+    * This function computes the coefficient-wise power.
+    *
+    * Example: \include Cwise_array_power_array.cpp
+    * Output: \verbinclude Cwise_array_power_array.out
+    * 
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+  template<typename Derived,typename ExponentDerived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
+  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
+  {
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
+      x.derived(),
+      exponents.derived()
+    );
+  }
+  
+  /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
+    *
+    * This function computes the coefficient-wise power between a scalar and an array of exponents.
+    *
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
+    *
+    * Example: \include Cwise_scalar_power_array.cpp
+    * Output: \verbinclude Cwise_scalar_power_array.out
+    * 
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Scalar,typename Derived>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
+  pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
+#else
+  template<typename Scalar, typename Derived>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
+          const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
+  pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
+  {
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
+            typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
+  }
+
+  template<typename Derived>
+  inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
+  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
+  {
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
+      typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
+  }
+#endif
+
+
+  namespace internal
+  {
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op)
+    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op)
+  }
+}
+
+// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...)
+
+#endif // EIGEN_GLOBAL_FUNCTIONS_H
diff --git a/third-party/Eigen/src/Core/IO.h b/third-party/Eigen/src/Core/IO.h
new file mode 100644
index 00000000..da7fd6cc
--- /dev/null
+++ b/third-party/Eigen/src/Core/IO.h
@@ -0,0 +1,225 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_IO_H
+#define EIGEN_IO_H
+
+namespace Eigen { 
+
+enum { DontAlignCols = 1 };
+enum { StreamPrecision = -1,
+       FullPrecision = -2 };
+
+namespace internal {
+template<typename Derived>
+std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt);
+}
+
+/** \class IOFormat
+  * \ingroup Core_Module
+  *
+  * \brief Stores a set of parameters controlling the way matrices are printed
+  *
+  * List of available parameters:
+  *  - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision.
+  *                 The default is the special value \c StreamPrecision which means to use the
+  *                 stream's own precision setting, as set for instance using \c cout.precision(3). The other special value
+  *                 \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point
+  *                 type.
+  *  - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which
+  *             allows to disable the alignment of columns, resulting in faster code.
+  *  - \b coeffSeparator string printed between two coefficients of the same row
+  *  - \b rowSeparator string printed between two rows
+  *  - \b rowPrefix string printed at the beginning of each row
+  *  - \b rowSuffix string printed at the end of each row
+  *  - \b matPrefix string printed at the beginning of the matrix
+  *  - \b matSuffix string printed at the end of the matrix
+  *
+  * Example: \include IOFormat.cpp
+  * Output: \verbinclude IOFormat.out
+  *
+  * \sa DenseBase::format(), class WithFormat
+  */
+struct IOFormat
+{
+  /** Default constructor, see class IOFormat for the meaning of the parameters */
+  IOFormat(int _precision = StreamPrecision, int _flags = 0,
+    const std::string& _coeffSeparator = " ",
+    const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
+    const std::string& _matPrefix="", const std::string& _matSuffix="")
+  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
+    rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
+  {
+    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
+    // don't add rowSpacer if columns are not to be aligned
+    if((flags & DontAlignCols))
+      return;
+    int i = int(matSuffix.length())-1;
+    while (i>=0 && matSuffix[i]!='\n')
+    {
+      rowSpacer += ' ';
+      i--;
+    }
+  }
+  std::string matPrefix, matSuffix;
+  std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer;
+  std::string coeffSeparator;
+  int precision;
+  int flags;
+};
+
+/** \class WithFormat
+  * \ingroup Core_Module
+  *
+  * \brief Pseudo expression providing matrix output with given format
+  *
+  * \tparam ExpressionType the type of the object on which IO stream operations are performed
+  *
+  * This class represents an expression with stream operators controlled by a given IOFormat.
+  * It is the return type of DenseBase::format()
+  * and most of the time this is the only way it is used.
+  *
+  * See class IOFormat for some examples.
+  *
+  * \sa DenseBase::format(), class IOFormat
+  */
+template<typename ExpressionType>
+class WithFormat
+{
+  public:
+
+    WithFormat(const ExpressionType& matrix, const IOFormat& format)
+      : m_matrix(matrix), m_format(format)
+    {}
+
+    friend std::ostream & operator << (std::ostream & s, const WithFormat& wf)
+    {
+      return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
+    }
+
+  protected:
+    typename ExpressionType::Nested m_matrix;
+    IOFormat m_format;
+};
+
+namespace internal {
+
+// NOTE: This helper is kept for backward compatibility with previous code specializing
+//       this internal::significant_decimals_impl structure. In the future we should directly
+//       call digits10() which has been introduced in July 2016 in 3.3.
+template<typename Scalar>
+struct significant_decimals_impl
+{
+  static inline int run()
+  {
+    return NumTraits<Scalar>::digits10();
+  }
+};
+
+/** \internal
+  * print the matrix \a _m to the output stream \a s using the output format \a fmt */
+template<typename Derived>
+std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt)
+{
+  if(_m.size() == 0)
+  {
+    s << fmt.matPrefix << fmt.matSuffix;
+    return s;
+  }
+  
+  typename Derived::Nested m = _m;
+  typedef typename Derived::Scalar Scalar;
+
+  Index width = 0;
+
+  std::streamsize explicit_precision;
+  if(fmt.precision == StreamPrecision)
+  {
+    explicit_precision = 0;
+  }
+  else if(fmt.precision == FullPrecision)
+  {
+    if (NumTraits<Scalar>::IsInteger)
+    {
+      explicit_precision = 0;
+    }
+    else
+    {
+      explicit_precision = significant_decimals_impl<Scalar>::run();
+    }
+  }
+  else
+  {
+    explicit_precision = fmt.precision;
+  }
+
+  std::streamsize old_precision = 0;
+  if(explicit_precision) old_precision = s.precision(explicit_precision);
+
+  bool align_cols = !(fmt.flags & DontAlignCols);
+  if(align_cols)
+  {
+    // compute the largest width
+    for(Index j = 0; j < m.cols(); ++j)
+      for(Index i = 0; i < m.rows(); ++i)
+      {
+        std::stringstream sstr;
+        sstr.copyfmt(s);
+        sstr << m.coeff(i,j);
+        width = std::max<Index>(width, Index(sstr.str().length()));
+      }
+  }
+  s << fmt.matPrefix;
+  for(Index i = 0; i < m.rows(); ++i)
+  {
+    if (i)
+      s << fmt.rowSpacer;
+    s << fmt.rowPrefix;
+    if(width) s.width(width);
+    s << m.coeff(i, 0);
+    for(Index j = 1; j < m.cols(); ++j)
+    {
+      s << fmt.coeffSeparator;
+      if (width) s.width(width);
+      s << m.coeff(i, j);
+    }
+    s << fmt.rowSuffix;
+    if( i < m.rows() - 1)
+      s << fmt.rowSeparator;
+  }
+  s << fmt.matSuffix;
+  if(explicit_precision) s.precision(old_precision);
+  return s;
+}
+
+} // end namespace internal
+
+/** \relates DenseBase
+  *
+  * Outputs the matrix, to the given stream.
+  *
+  * If you wish to print the matrix with a format different than the default, use DenseBase::format().
+  *
+  * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.
+  * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters.
+  *
+  * \sa DenseBase::format()
+  */
+template<typename Derived>
+std::ostream & operator <<
+(std::ostream & s,
+ const DenseBase<Derived> & m)
+{
+  return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_IO_H
diff --git a/third-party/Eigen/src/Core/Inverse.h b/third-party/Eigen/src/Core/Inverse.h
new file mode 100644
index 00000000..b76f0439
--- /dev/null
+++ b/third-party/Eigen/src/Core/Inverse.h
@@ -0,0 +1,118 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+
+namespace Eigen { 
+
+template<typename XprType,typename StorageKind> class InverseImpl;
+
+namespace internal {
+
+template<typename XprType>
+struct traits<Inverse<XprType> >
+  : traits<typename XprType::PlainObject>
+{
+  typedef typename XprType::PlainObject PlainObject;
+  typedef traits<PlainObject> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit
+  };
+};
+
+} // end namespace internal
+
+/** \class Inverse
+  *
+  * \brief Expression of the inverse of another expression
+  *
+  * \tparam XprType the type of the expression we are taking the inverse
+  *
+  * This class represents an abstract expression of A.inverse()
+  * and most of the time this is the only way it is used.
+  *
+  */
+template<typename XprType>
+class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
+{
+public:
+  typedef typename XprType::StorageIndex StorageIndex;
+  typedef typename XprType::PlainObject                       PlainObject;
+  typedef typename XprType::Scalar                            Scalar;
+  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
+  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
+  typedef typename internal::ref_selector<Inverse>::type Nested;
+  typedef typename internal::remove_all<XprType>::type NestedExpression;
+  
+  explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
+    : m_xpr(xpr)
+  {}
+
+  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+
+  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+
+protected:
+  XprTypeNested m_xpr;
+};
+
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class InverseImpl
+  : public internal::generic_xpr_base<Inverse<XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
+  typedef typename XprType::Scalar Scalar;
+private:
+
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+/** \internal
+  * \brief Default evaluator for Inverse expression.
+  * 
+  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
+  * by a call to internal::call_assignment_no_alias.
+  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
+  * there own nested expression.
+  *
+  * \sa class Inverse
+  */
+template<typename ArgType>
+struct unary_evaluator<Inverse<ArgType> >
+  : public evaluator<typename Inverse<ArgType>::PlainObject>
+{
+  typedef Inverse<ArgType> InverseType;
+  typedef typename InverseType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+  
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
+
+  unary_evaluator(const InverseType& inv_xpr)
+    : m_result(inv_xpr.rows(), inv_xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::call_assignment_no_alias(m_result, inv_xpr);
+  }
+  
+protected:
+  PlainObject m_result;
+};
+  
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_H
diff --git a/third-party/Eigen/src/Core/Map.h b/third-party/Eigen/src/Core/Map.h
new file mode 100644
index 00000000..548bf9a2
--- /dev/null
+++ b/third-party/Eigen/src/Core/Map.h
@@ -0,0 +1,171 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MAP_H
+#define EIGEN_MAP_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> >
+  : public traits<PlainObjectType>
+{
+  typedef traits<PlainObjectType> TraitsBase;
+  enum {
+    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
+                             ? PlainObjectType::ColsAtCompileTime
+                             : PlainObjectType::RowsAtCompileTime,
+
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                             ? int(PlainObjectType::InnerStrideAtCompileTime)
+                             : int(StrideType::InnerStrideAtCompileTime),
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                             ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
+                                ? Dynamic
+                                : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
+                             : int(StrideType::OuterStrideAtCompileTime),
+    Alignment = int(MapOptions)&int(AlignedMask),
+    Flags0 = TraitsBase::Flags & (~NestByRefBit),
+    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+  };
+private:
+  enum { Options }; // Expressions don't have Options
+};
+}
+
+/** \class Map
+  * \ingroup Core_Module
+  *
+  * \brief A matrix or vector expression mapping an existing array of data.
+  *
+  * \tparam PlainObjectType the equivalent matrix type of the mapped data
+  * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
+  *                The default is \c #Unaligned.
+  * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout
+  *                   of an ordinary, contiguous array. This can be overridden by specifying strides.
+  *                   The type passed here must be a specialization of the Stride template, see examples below.
+  *
+  * This class represents a matrix or vector expression mapping an existing array of data.
+  * It can be used to let Eigen interface without any overhead with non-Eigen data structures,
+  * such as plain C arrays or structures from other libraries. By default, it assumes that the
+  * data is laid out contiguously in memory. You can however override this by explicitly specifying
+  * inner and outer strides.
+  *
+  * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
+  * \include Map_simple.cpp
+  * Output: \verbinclude Map_simple.out
+  *
+  * If you need to map non-contiguous arrays, you can do so by specifying strides:
+  *
+  * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
+  * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
+  * fixed value.
+  * \include Map_inner_stride.cpp
+  * Output: \verbinclude Map_inner_stride.out
+  *
+  * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
+  * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
+  * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is
+  * a short version of \c OuterStride<Dynamic> because the default template parameter of OuterStride
+  * is  \c Dynamic
+  * \include Map_outer_stride.cpp
+  * Output: \verbinclude Map_outer_stride.out
+  *
+  * For more details and for an example of specifying both an inner and an outer stride, see class Stride.
+  *
+  * \b Tip: to change the array of data mapped by a Map object, you can use the C++
+  * placement new syntax:
+  *
+  * Example: \include Map_placement_new.cpp
+  * Output: \verbinclude Map_placement_new.out
+  *
+  * This class is the return type of PlainObjectBase::Map() but can also be used directly.
+  *
+  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+  */
+template<typename PlainObjectType, int MapOptions, typename StrideType> class Map
+  : public MapBase<Map<PlainObjectType, MapOptions, StrideType> >
+{
+  public:
+
+    typedef MapBase<Map> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Map)
+
+    typedef typename Base::PointerType PointerType;
+    typedef PointerType PointerArgType;
+    EIGEN_DEVICE_FUNC
+    inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const
+    {
+      return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const
+    {
+      return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
+           : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
+           : IsVectorAtCompileTime ? (this->size() * innerStride())
+           : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
+           : (this->rows() * innerStride());
+    }
+
+    /** Constructor in the fixed-size case.
+      *
+      * \param dataPtr pointer to the array to map
+      * \param stride optional Stride object, passing the strides.
+      */
+    EIGEN_DEVICE_FUNC
+    explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride)
+    {
+      PlainObjectType::Base::_check_template_params();
+    }
+
+    /** Constructor in the dynamic-size vector case.
+      *
+      * \param dataPtr pointer to the array to map
+      * \param size the size of the vector expression
+      * \param stride optional Stride object, passing the strides.
+      */
+    EIGEN_DEVICE_FUNC
+    inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride)
+    {
+      PlainObjectType::Base::_check_template_params();
+    }
+
+    /** Constructor in the dynamic-size matrix case.
+      *
+      * \param dataPtr pointer to the array to map
+      * \param rows the number of rows of the matrix expression
+      * \param cols the number of columns of the matrix expression
+      * \param stride optional Stride object, passing the strides.
+      */
+    EIGEN_DEVICE_FUNC
+    inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride)
+    {
+      PlainObjectType::Base::_check_template_params();
+    }
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+
+  protected:
+    StrideType m_stride;
+};
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_MAP_H
diff --git a/third-party/Eigen/src/Core/MapBase.h b/third-party/Eigen/src/Core/MapBase.h
new file mode 100644
index 00000000..92c3b281
--- /dev/null
+++ b/third-party/Eigen/src/Core/MapBase.h
@@ -0,0 +1,308 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MAPBASE_H
+#define EIGEN_MAPBASE_H
+
+#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \
+      EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+                          YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
+
+namespace Eigen { 
+
+/** \ingroup Core_Module
+  *
+  * \brief Base class for dense Map and Block expression with direct access
+  *
+  * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
+  * Map and Block objects with direct access.
+  * Typical users do not have to directly deal with this class.
+  *
+  * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
+  * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
+  *
+  * The \c Derived class has to provide the following two methods describing the memory layout:
+  *  \code Index innerStride() const; \endcode
+  *  \code Index outerStride() const; \endcode
+  *
+  * \sa class Map, class Block
+  */
+template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
+  : public internal::dense_xpr_base<Derived>::type
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<Derived>::type Base;
+    enum {
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
+      SizeAtCompileTime = Base::SizeAtCompileTime
+    };
+
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef typename internal::conditional<
+                         bool(internal::is_lvalue<Derived>::value),
+                         Scalar *,
+                         const Scalar *>::type
+                     PointerType;
+
+    using Base::derived;
+//    using Base::RowsAtCompileTime;
+//    using Base::ColsAtCompileTime;
+//    using Base::SizeAtCompileTime;
+    using Base::MaxRowsAtCompileTime;
+    using Base::MaxColsAtCompileTime;
+    using Base::MaxSizeAtCompileTime;
+    using Base::IsVectorAtCompileTime;
+    using Base::Flags;
+    using Base::IsRowMajor;
+
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::coeff;
+    using Base::coeffRef;
+    using Base::lazyAssign;
+    using Base::eval;
+
+    using Base::innerStride;
+    using Base::outerStride;
+    using Base::rowStride;
+    using Base::colStride;
+
+    // bug 217 - compile error on ICC 11.1
+    using Base::operator=;
+
+    typedef typename Base::CoeffReturnType CoeffReturnType;
+
+    /** \copydoc DenseBase::rows() */
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
+    /** \copydoc DenseBase::cols() */
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
+
+    /** Returns a pointer to the first coefficient of the matrix or vector.
+      *
+      * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
+      *
+      * \sa innerStride(), outerStride()
+      */
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; }
+
+    /** \copydoc PlainObjectBase::coeff(Index,Index) const */
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeff(Index rowId, Index colId) const
+    {
+      return m_data[colId * colStride() + rowId * rowStride()];
+    }
+
+    /** \copydoc PlainObjectBase::coeff(Index) const */
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeff(Index index) const
+    {
+      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+      return m_data[index * innerStride()];
+    }
+
+    /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      return this->m_data[colId * colStride() + rowId * rowStride()];
+    }
+
+    /** \copydoc PlainObjectBase::coeffRef(Index) const */
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
+    {
+      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+      return this->m_data[index * innerStride()];
+    }
+
+    /** \internal */
+    template<int LoadMode>
+    inline PacketScalar packet(Index rowId, Index colId) const
+    {
+      return internal::ploadt<PacketScalar, LoadMode>
+               (m_data + (colId * colStride() + rowId * rowStride()));
+    }
+
+    /** \internal */
+    template<int LoadMode>
+    inline PacketScalar packet(Index index) const
+    {
+      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+      return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
+    }
+
+    /** \internal Constructor for fixed size matrices or vectors */
+    EIGEN_DEVICE_FUNC
+    explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
+    {
+      EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+      checkSanity<Derived>();
+    }
+
+    /** \internal Constructor for dynamically sized vectors */
+    EIGEN_DEVICE_FUNC
+    inline MapBase(PointerType dataPtr, Index vecSize)
+            : m_data(dataPtr),
+              m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
+              m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime))
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+      eigen_assert(vecSize >= 0);
+      eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+      checkSanity<Derived>();
+    }
+
+    /** \internal Constructor for dynamically sized matrices */
+    EIGEN_DEVICE_FUNC
+    inline MapBase(PointerType dataPtr, Index rows, Index cols)
+            : m_data(dataPtr), m_rows(rows), m_cols(cols)
+    {
+      eigen_assert( (dataPtr == 0)
+              || (   rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
+                  && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
+      checkSanity<Derived>();
+    }
+
+    #ifdef EIGEN_MAPBASE_PLUGIN
+    #include EIGEN_MAPBASE_PLUGIN
+    #endif
+
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
+
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
+    {
+#if EIGEN_MAX_ALIGN_BYTES>0
+      // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
+      const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
+      EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
+      eigen_assert((   ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
+                    || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
+#endif
+    }
+
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    void checkSanity(typename internal::enable_if<internal::traits<T>::Alignment==0,void*>::type = 0) const
+    {}
+
+    PointerType m_data;
+    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+    const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+};
+
+/** \ingroup Core_Module
+  *
+  * \brief Base class for non-const dense Map and Block expression with direct access
+  *
+  * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
+  * dense Map and Block objects with direct access.
+  * It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
+  *
+  * \sa class Map, class Block
+  */
+template<typename Derived> class MapBase<Derived, WriteAccessors>
+  : public MapBase<Derived, ReadOnlyAccessors>
+{
+    typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
+  public:
+
+    typedef MapBase<Derived, ReadOnlyAccessors> Base;
+
+    typedef typename Base::Scalar Scalar;
+    typedef typename Base::PacketScalar PacketScalar;
+    typedef typename Base::StorageIndex StorageIndex;
+    typedef typename Base::PointerType PointerType;
+
+    using Base::derived;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::coeff;
+    using Base::coeffRef;
+
+    using Base::innerStride;
+    using Base::outerStride;
+    using Base::rowStride;
+    using Base::colStride;
+
+    typedef typename internal::conditional<
+                    internal::is_lvalue<Derived>::value,
+                    Scalar,
+                    const Scalar
+                  >::type ScalarWithConstIfNotLvalue;
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return this->m_data; }
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
+
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
+    {
+      return this->m_data[col * colStride() + row * rowStride()];
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
+    {
+      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+      return this->m_data[index * innerStride()];
+    }
+
+    template<int StoreMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& val)
+    {
+      internal::pstoret<Scalar, PacketScalar, StoreMode>
+               (this->m_data + (col * colStride() + row * rowStride()), val);
+    }
+
+    template<int StoreMode>
+    inline void writePacket(Index index, const PacketScalar& val)
+    {
+      EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+      internal::pstoret<Scalar, PacketScalar, StoreMode>
+                (this->m_data + index * innerStride(), val);
+    }
+
+    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const MapBase& other)
+    {
+      ReadOnlyMapBase::Base::operator=(other);
+      return derived();
+    }
+
+    // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
+    // see bugs 821 and 920.
+    using ReadOnlyMapBase::Base::operator=;
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
+};
+
+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+
+} // end namespace Eigen
+
+#endif // EIGEN_MAPBASE_H
diff --git a/third-party/Eigen/src/Core/MathFunctions.h b/third-party/Eigen/src/Core/MathFunctions.h
new file mode 100644
index 00000000..cceffb9a
--- /dev/null
+++ b/third-party/Eigen/src/Core/MathFunctions.h
@@ -0,0 +1,1490 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONS_H
+#define EIGEN_MATHFUNCTIONS_H
+
+// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
+// TODO this should better be moved to NumTraits
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
+
+
+namespace Eigen {
+
+// On WINCE, std::abs is defined for int only, so let's defined our own overloads:
+// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too.
+#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500
+long        abs(long        x) { return (labs(x));  }
+double      abs(double      x) { return (fabs(x));  }
+float       abs(float       x) { return (fabsf(x)); }
+long double abs(long double x) { return (fabsl(x)); }
+#endif
+
+namespace internal {
+
+/** \internal \class global_math_functions_filtering_base
+  *
+  * What it does:
+  * Defines a typedef 'type' as follows:
+  * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then
+  *   global_math_functions_filtering_base<T>::type is a typedef for it.
+  * - otherwise, global_math_functions_filtering_base<T>::type is a typedef for T.
+  *
+  * How it's used:
+  * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions.
+  * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know
+  * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase<Derived>.
+  * So we must make sure to use sin_impl<ArrayBase<Derived> > and not sin_impl<Derived>, otherwise our partial specialization
+  * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it.
+  *
+  * How it's implemented:
+  * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace
+  * the typename dummy by an integer template parameter, it doesn't work anymore!
+  */
+
+template<typename T, typename dummy = void>
+struct global_math_functions_filtering_base
+{
+  typedef T type;
+};
+
+template<typename T> struct always_void { typedef void type; };
+
+template<typename T>
+struct global_math_functions_filtering_base
+  <T,
+   typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type
+  >
+{
+  typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
+};
+
+#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
+#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
+
+/****************************************************************************
+* Implementation of real                                                 *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct real_default_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    return x;
+  }
+};
+
+template<typename Scalar>
+struct real_default_impl<Scalar,true>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    using std::real;
+    return real(x);
+  }
+};
+
+template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
+
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct real_impl<std::complex<T> >
+{
+  typedef T RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline T run(const std::complex<T>& x)
+  {
+    return x.real();
+  }
+};
+#endif
+
+template<typename Scalar>
+struct real_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of imag                                                 *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct imag_default_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar&)
+  {
+    return RealScalar(0);
+  }
+};
+
+template<typename Scalar>
+struct imag_default_impl<Scalar,true>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    using std::imag;
+    return imag(x);
+  }
+};
+
+template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
+
+#ifdef __CUDA_ARCH__
+template<typename T>
+struct imag_impl<std::complex<T> >
+{
+  typedef T RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline T run(const std::complex<T>& x)
+  {
+    return x.imag();
+  }
+};
+#endif
+
+template<typename Scalar>
+struct imag_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of real_ref                                             *
+****************************************************************************/
+
+template<typename Scalar>
+struct real_ref_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar& run(Scalar& x)
+  {
+    return reinterpret_cast<RealScalar*>(&x)[0];
+  }
+  EIGEN_DEVICE_FUNC
+  static inline const RealScalar& run(const Scalar& x)
+  {
+    return reinterpret_cast<const RealScalar*>(&x)[0];
+  }
+};
+
+template<typename Scalar>
+struct real_ref_retval
+{
+  typedef typename NumTraits<Scalar>::Real & type;
+};
+
+/****************************************************************************
+* Implementation of imag_ref                                             *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex>
+struct imag_ref_default_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar& run(Scalar& x)
+  {
+    return reinterpret_cast<RealScalar*>(&x)[1];
+  }
+  EIGEN_DEVICE_FUNC
+  static inline const RealScalar& run(const Scalar& x)
+  {
+    return reinterpret_cast<RealScalar*>(&x)[1];
+  }
+};
+
+template<typename Scalar>
+struct imag_ref_default_impl<Scalar, false>
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(Scalar&)
+  {
+    return Scalar(0);
+  }
+  EIGEN_DEVICE_FUNC
+  static inline const Scalar run(const Scalar&)
+  {
+    return Scalar(0);
+  }
+};
+
+template<typename Scalar>
+struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+
+template<typename Scalar>
+struct imag_ref_retval
+{
+  typedef typename NumTraits<Scalar>::Real & type;
+};
+
+/****************************************************************************
+* Implementation of conj                                                 *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& x)
+  {
+    return x;
+  }
+};
+
+template<typename Scalar>
+struct conj_impl<Scalar,true>
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& x)
+  {
+    using std::conj;
+    return conj(x);
+  }
+};
+
+template<typename Scalar>
+struct conj_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of abs2                                                 *
+****************************************************************************/
+
+template<typename Scalar,bool IsComplex>
+struct abs2_impl_default
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    return x*x;
+  }
+};
+
+template<typename Scalar>
+struct abs2_impl_default<Scalar, true> // IsComplex
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    return x.real()*x.real() + x.imag()*x.imag();
+  }
+};
+
+template<typename Scalar>
+struct abs2_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    return abs2_impl_default<Scalar,NumTraits<Scalar>::IsComplex>::run(x);
+  }
+};
+
+template<typename Scalar>
+struct abs2_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of norm1                                                *
+****************************************************************************/
+
+template<typename Scalar, bool IsComplex>
+struct norm1_default_impl;
+
+template<typename Scalar>
+struct norm1_default_impl<Scalar,true>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const Scalar& x)
+  {
+    EIGEN_USING_STD_MATH(abs);
+    return abs(x.real()) + abs(x.imag());
+  }
+};
+
+template<typename Scalar>
+struct norm1_default_impl<Scalar, false>
+{
+  EIGEN_DEVICE_FUNC
+  static inline Scalar run(const Scalar& x)
+  {
+    EIGEN_USING_STD_MATH(abs);
+    return abs(x);
+  }
+};
+
+template<typename Scalar>
+struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+
+template<typename Scalar>
+struct norm1_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of hypot                                                *
+****************************************************************************/
+
+template<typename Scalar> struct hypot_impl;
+
+template<typename Scalar>
+struct hypot_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of cast                                                 *
+****************************************************************************/
+
+template<typename OldType, typename NewType>
+struct cast_impl
+{
+  EIGEN_DEVICE_FUNC
+  static inline NewType run(const OldType& x)
+  {
+    return static_cast<NewType>(x);
+  }
+};
+
+// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
+
+template<typename OldType, typename NewType>
+EIGEN_DEVICE_FUNC
+inline NewType cast(const OldType& x)
+{
+  return cast_impl<OldType, NewType>::run(x);
+}
+
+/****************************************************************************
+* Implementation of round                                                   *
+****************************************************************************/
+
+#if EIGEN_HAS_CXX11_MATH
+  template<typename Scalar>
+  struct round_impl {
+    static inline Scalar run(const Scalar& x)
+    {
+      EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+      using std::round;
+      return round(x);
+    }
+  };
+#else
+  template<typename Scalar>
+  struct round_impl
+  {
+    static inline Scalar run(const Scalar& x)
+    {
+      EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+      EIGEN_USING_STD_MATH(floor);
+      EIGEN_USING_STD_MATH(ceil);
+      return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
+    }
+  };
+#endif
+
+template<typename Scalar>
+struct round_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of arg                                                     *
+****************************************************************************/
+
+#if EIGEN_HAS_CXX11_MATH
+  template<typename Scalar>
+  struct arg_impl {
+    static inline Scalar run(const Scalar& x)
+    {
+      EIGEN_USING_STD_MATH(arg);
+      return arg(x);
+    }
+  };
+#else
+  template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+  struct arg_default_impl
+  {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    EIGEN_DEVICE_FUNC
+    static inline RealScalar run(const Scalar& x)
+    {
+      return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
+  };
+
+  template<typename Scalar>
+  struct arg_default_impl<Scalar,true>
+  {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    EIGEN_DEVICE_FUNC
+    static inline RealScalar run(const Scalar& x)
+    {
+      EIGEN_USING_STD_MATH(arg);
+      return arg(x);
+    }
+  };
+
+  template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
+#endif
+
+template<typename Scalar>
+struct arg_retval
+{
+  typedef typename NumTraits<Scalar>::Real type;
+};
+
+/****************************************************************************
+* Implementation of expm1                                                   *
+****************************************************************************/
+
+// This implementation is based on GSL Math's expm1.
+namespace std_fallback {
+  // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,
+  // or that there is no suitable std::expm1 function available. Implementation
+  // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.
+  template<typename Scalar>
+  EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    EIGEN_USING_STD_MATH(exp);
+    Scalar u = exp(x);
+    if (numext::equal_strict(u, Scalar(1))) {
+      return x;
+    }
+    Scalar um1 = u - RealScalar(1);
+    if (numext::equal_strict(um1, Scalar(-1))) {
+      return RealScalar(-1);
+    }
+
+    EIGEN_USING_STD_MATH(log);
+    return (u - RealScalar(1)) * x / log(u);
+  }
+}
+
+template<typename Scalar>
+struct expm1_impl {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
+  {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    #if EIGEN_HAS_CXX11_MATH
+    using std::expm1;
+    #else
+    using std_fallback::expm1;
+    #endif
+    return expm1(x);
+  }
+};
+
+// Specialization for complex types that are not supported by std::expm1.
+template <typename RealScalar>
+struct expm1_impl<std::complex<RealScalar> > {
+  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
+      const std::complex<RealScalar>& x) {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+    return std_fallback::expm1(x);
+  }
+};
+
+template<typename Scalar>
+struct expm1_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of log1p                                                   *
+****************************************************************************/
+
+namespace std_fallback {
+  // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,
+  // or that there is no suitable std::log1p function available
+  template<typename Scalar>
+  EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    EIGEN_USING_STD_MATH(log);
+    Scalar x1p = RealScalar(1) + x;
+    return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+  }
+}
+
+template<typename Scalar>
+struct log1p_impl {
+  static inline Scalar run(const Scalar& x)
+  {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    #if EIGEN_HAS_CXX11_MATH
+    using std::log1p;
+    #else
+    using std_fallback::log1p;
+    #endif
+    return log1p(x);
+  }
+};
+
+// Specialization for complex types that are not supported by std::log1p.
+template <typename RealScalar>
+struct log1p_impl<std::complex<RealScalar> > {
+  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
+      const std::complex<RealScalar>& x) {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+    return std_fallback::log1p(x);
+  }
+};
+
+template<typename Scalar>
+struct log1p_retval
+{
+  typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of pow                                                  *
+****************************************************************************/
+
+template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
+struct pow_impl
+{
+  //typedef Scalar retval;
+  typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
+  static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
+  {
+    EIGEN_USING_STD_MATH(pow);
+    return pow(x, y);
+  }
+};
+
+template<typename ScalarX,typename ScalarY>
+struct pow_impl<ScalarX,ScalarY, true>
+{
+  typedef ScalarX result_type;
+  static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y)
+  {
+    ScalarX res(1);
+    eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
+    if(y & 1) res *= x;
+    y >>= 1;
+    while(y)
+    {
+      x *= x;
+      if(y&1) res *= x;
+      y >>= 1;
+    }
+    return res;
+  }
+};
+
+/****************************************************************************
+* Implementation of random                                               *
+****************************************************************************/
+
+template<typename Scalar,
+         bool IsComplex,
+         bool IsInteger>
+struct random_default_impl {};
+
+template<typename Scalar>
+struct random_impl : random_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template<typename Scalar>
+struct random_retval
+{
+  typedef Scalar type;
+};
+
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y);
+template<typename Scalar> inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random();
+
+template<typename Scalar>
+struct random_default_impl<Scalar, false, false>
+{
+  static inline Scalar run(const Scalar& x, const Scalar& y)
+  {
+    return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX);
+  }
+  static inline Scalar run()
+  {
+    return run(Scalar(NumTraits<Scalar>::IsSigned ? -1 : 0), Scalar(1));
+  }
+};
+
+enum {
+  meta_floor_log2_terminate,
+  meta_floor_log2_move_up,
+  meta_floor_log2_move_down,
+  meta_floor_log2_bogus
+};
+
+template<unsigned int n, int lower, int upper> struct meta_floor_log2_selector
+{
+  enum { middle = (lower + upper) / 2,
+         value = (upper <= lower + 1) ? int(meta_floor_log2_terminate)
+               : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+               : (n==0) ? int(meta_floor_log2_bogus)
+               : int(meta_floor_log2_move_up)
+  };
+};
+
+template<unsigned int n,
+         int lower = 0,
+         int upper = sizeof(unsigned int) * CHAR_BIT - 1,
+         int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down>
+{
+  enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up>
+{
+  enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate>
+{
+  enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower };
+};
+
+template<unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus>
+{
+  // no value, error at compile time
+};
+
+template<typename Scalar>
+struct random_default_impl<Scalar, false, true>
+{
+  static inline Scalar run(const Scalar& x, const Scalar& y)
+  {
+    if (y <= x)
+      return x;
+    // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
+    typedef typename make_unsigned<Scalar>::type ScalarU;
+    // ScalarX is the widest of ScalarU and unsigned int.
+    // We'll deal only with ScalarX and unsigned int below thus avoiding signed
+    // types and arithmetic and signed overflows (which are undefined behavior).
+    typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
+    // The following difference doesn't overflow, provided our integer types are two's
+    // complement and have the same number of padding bits in signed and unsigned variants.
+    // This is the case in most modern implementations of C++.
+    ScalarX range = ScalarX(y) - ScalarX(x);
+    ScalarX offset = 0;
+    ScalarX divisor = 1;
+    ScalarX multiplier = 1;
+    const unsigned rand_max = RAND_MAX;
+    if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
+    else                   multiplier = 1 + range / (rand_max + 1);
+    // Rejection sampling.
+    do {
+      offset = (unsigned(std::rand()) * multiplier) / divisor;
+    } while (offset > range);
+    return Scalar(ScalarX(x) + offset);
+  }
+
+  static inline Scalar run()
+  {
+#ifdef EIGEN_MAKING_DOCS
+    return run(Scalar(NumTraits<Scalar>::IsSigned ? -10 : 0), Scalar(10));
+#else
+    enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value,
+           scalar_bits = sizeof(Scalar) * CHAR_BIT,
+           shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)),
+           offset = NumTraits<Scalar>::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0
+    };
+    return Scalar((std::rand() >> shift) - offset);
+#endif
+  }
+};
+
+template<typename Scalar>
+struct random_default_impl<Scalar, true, false>
+{
+  static inline Scalar run(const Scalar& x, const Scalar& y)
+  {
+    return Scalar(random(x.real(), y.real()),
+                  random(x.imag(), y.imag()));
+  }
+  static inline Scalar run()
+  {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    return Scalar(random<RealScalar>(), random<RealScalar>());
+  }
+};
+
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y)
+{
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y);
+}
+
+template<typename Scalar>
+inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
+{
+  return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
+}
+
+// Implementatin of is* functions
+
+// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
+#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
+#define EIGEN_USE_STD_FPCLASSIFY 1
+#else
+#define EIGEN_USE_STD_FPCLASSIFY 0
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isnan_impl(const T&) { return false; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isinf_impl(const T&) { return false; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<internal::is_integral<T>::value,bool>::type
+isfinite_impl(const T&) { return true; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isfinite_impl(const T& x)
+{
+  #ifdef __CUDA_ARCH__
+    return (::isfinite)(x);
+  #elif EIGEN_USE_STD_FPCLASSIFY
+    using std::isfinite;
+    return isfinite EIGEN_NOT_A_MACRO (x);
+  #else
+    return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
+  #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isinf_impl(const T& x)
+{
+  #ifdef __CUDA_ARCH__
+    return (::isinf)(x);
+  #elif EIGEN_USE_STD_FPCLASSIFY
+    using std::isinf;
+    return isinf EIGEN_NOT_A_MACRO (x);
+  #else
+    return x>NumTraits<T>::highest() || x<NumTraits<T>::lowest();
+  #endif
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
+isnan_impl(const T& x)
+{
+  #ifdef __CUDA_ARCH__
+    return (::isnan)(x);
+  #elif EIGEN_USE_STD_FPCLASSIFY
+    using std::isnan;
+    return isnan EIGEN_NOT_A_MACRO (x);
+  #else
+    return x != x;
+  #endif
+}
+
+#if (!EIGEN_USE_STD_FPCLASSIFY)
+
+#if EIGEN_COMP_MSVC
+
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
+{
+  return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF;
+}
+
+//MSVC defines a _isnan builtin function, but for double only
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x)      { return _isnan(x)!=0; }
+EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x)       { return _isnan(x)!=0; }
+
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x)      { return isinf_msvc_helper(x); }
+EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x)       { return isinf_msvc_helper(x); }
+
+#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC)
+
+#if EIGEN_GNUC_AT_LEAST(5,0)
+  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only")))
+#else
+  // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol),
+  //      while the second prevent too aggressive optimizations in fast-math mode:
+  #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only")))
+#endif
+
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x)      { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x)       { return __builtin_isnan(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x)      { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x)       { return __builtin_isinf(x); }
+template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); }
+
+#undef EIGEN_TMP_NOOPT_ATTRIB
+
+#endif
+
+#endif
+
+// The following overload are defined at the end of this file
+template<typename T> EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
+template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
+
+template<typename T> T generic_fast_tanh_float(const T& a_x);
+
+} // end namespace internal
+
+/****************************************************************************
+* Generic math functions                                                    *
+****************************************************************************/
+
+namespace numext {
+
+#ifndef __CUDA_ARCH__
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+  EIGEN_USING_STD_MATH(min);
+  return min EIGEN_NOT_A_MACRO (x,y);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+  EIGEN_USING_STD_MATH(max);
+  return max EIGEN_NOT_A_MACRO (x,y);
+}
+#else
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
+{
+  return y < x ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
+{
+  return fminf(x, y);
+}
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
+{
+  return x < y ? y : x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
+{
+  return fmaxf(x, y);
+}
+#endif
+
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
+{
+  return internal::real_ref_impl<Scalar>::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
+{
+  return internal::imag_ref_impl<Scalar>::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
+}
+
+EIGEN_DEVICE_FUNC
+inline bool abs2(bool x) { return x; }
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
+{
+  return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
+}
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log1p(const float &x) { return ::log1pf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log1p(const double &x) { return ::log1p(x); }
+#endif
+
+template<typename ScalarX,typename ScalarY>
+EIGEN_DEVICE_FUNC
+inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
+{
+  return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
+}
+
+template<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isinf)   (const T &x) { return internal::isinf_impl(x); }
+template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
+{
+  return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (floor)(const T& x)
+{
+  EIGEN_USING_STD_MATH(floor);
+  return floor(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float floor(const float &x) { return ::floorf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double floor(const double &x) { return ::floor(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+T (ceil)(const T& x)
+{
+  EIGEN_USING_STD_MATH(ceil);
+  return ceil(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float ceil(const float &x) { return ::ceilf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double ceil(const double &x) { return ::ceil(x); }
+#endif
+
+
+/** Log base 2 for 32 bits positive integers.
+  * Conveniently returns 0 for x==0. */
+inline int log2(int x)
+{
+  eigen_assert(x>=0);
+  unsigned int v(x);
+  static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  return table[(v * 0x07C4ACDDU) >> 27];
+}
+
+/** \returns the square root of \a x.
+  *
+  * It is essentially equivalent to
+  * \code using std::sqrt; return sqrt(x); \endcode
+  * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+  * specializations when SSE is enabled.
+  *
+  * It's usage is justified in performance critical functions, like norm/normalize.
+  */
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+  EIGEN_USING_STD_MATH(sqrt);
+  return sqrt(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T log(const T &x) {
+  EIGEN_USING_STD_MATH(log);
+  return log(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float log(const float &x) { return ::logf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double log(const double &x) { return ::log(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
+abs(const T &x) {
+  EIGEN_USING_STD_MATH(abs);
+  return abs(x);
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
+abs(const T &x) {
+  return x;
+}
+
+#if defined(__SYCL_DEVICE_ONLY__)
+EIGEN_ALWAYS_INLINE float   abs(float x) { return cl::sycl::fabs(x); }
+EIGEN_ALWAYS_INLINE double  abs(double x) { return cl::sycl::fabs(x); }
+#endif // defined(__SYCL_DEVICE_ONLY__)
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const float &x) { return ::fabsf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const double &x) { return ::fabs(x); }
+
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float abs(const std::complex<float>& x) {
+  return ::hypotf(x.real(), x.imag());
+}
+
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double abs(const std::complex<double>& x) {
+  return ::hypot(x.real(), x.imag());
+}
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T exp(const T &x) {
+  EIGEN_USING_STD_MATH(exp);
+  return exp(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float exp(const float &x) { return ::expf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double exp(const double &x) { return ::exp(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cos(const T &x) {
+  EIGEN_USING_STD_MATH(cos);
+  return cos(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cos(const float &x) { return ::cosf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cos(const double &x) { return ::cos(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sin(const T &x) {
+  EIGEN_USING_STD_MATH(sin);
+  return sin(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sin(const float &x) { return ::sinf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sin(const double &x) { return ::sin(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tan(const T &x) {
+  EIGEN_USING_STD_MATH(tan);
+  return tan(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tan(const float &x) { return ::tanf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tan(const double &x) { return ::tan(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T acos(const T &x) {
+  EIGEN_USING_STD_MATH(acos);
+  return acos(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float acos(const float &x) { return ::acosf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double acos(const double &x) { return ::acos(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T asin(const T &x) {
+  EIGEN_USING_STD_MATH(asin);
+  return asin(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float asin(const float &x) { return ::asinf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double asin(const double &x) { return ::asin(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T atan(const T &x) {
+  EIGEN_USING_STD_MATH(atan);
+  return atan(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float atan(const float &x) { return ::atanf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double atan(const double &x) { return ::atan(x); }
+#endif
+
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T cosh(const T &x) {
+  EIGEN_USING_STD_MATH(cosh);
+  return cosh(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float cosh(const float &x) { return ::coshf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double cosh(const double &x) { return ::cosh(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sinh(const T &x) {
+  EIGEN_USING_STD_MATH(sinh);
+  return sinh(x);
+}
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sinh(const float &x) { return ::sinhf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sinh(const double &x) { return ::sinh(x); }
+#endif
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T tanh(const T &x) {
+  EIGEN_USING_STD_MATH(tanh);
+  return tanh(x);
+}
+
+#if (!defined(EIGEN_CUDACC)) && EIGEN_FAST_MATH
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(float x) { return internal::generic_fast_tanh_float(x); }
+#endif
+
+#ifdef EIGEN_CUDACC
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float tanh(const float &x) { return ::tanhf(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double tanh(const double &x) { return ::tanh(x); }
+#endif
+
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T fmod(const T& a, const T& b) {
+  EIGEN_USING_STD_MATH(fmod);
+  return fmod(a, b);
+}
+
+#ifdef EIGEN_CUDACC
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float fmod(const float& a, const float& b) {
+  return ::fmodf(a, b);
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double fmod(const double& a, const double& b) {
+  return ::fmod(a, b);
+}
+#endif
+
+} // end namespace numext
+
+namespace internal {
+
+template<typename T>
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x)
+{
+  return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
+{
+  return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
+}
+
+template<typename T>
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x)
+{
+  return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
+}
+
+/****************************************************************************
+* Implementation of fuzzy comparisons                                       *
+****************************************************************************/
+
+template<typename Scalar,
+         bool IsComplex,
+         bool IsInteger>
+struct scalar_fuzzy_default_impl {};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, false>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
+  static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+  {
+    return numext::abs(x) <= numext::abs(y) * prec;
+  }
+  EIGEN_DEVICE_FUNC
+  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+  {
+    return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
+  }
+  EIGEN_DEVICE_FUNC
+  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
+  {
+    return x <= y || isApprox(x, y, prec);
+  }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, true>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
+  static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
+  {
+    return x == Scalar(0);
+  }
+  EIGEN_DEVICE_FUNC
+  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
+  {
+    return x == y;
+  }
+  EIGEN_DEVICE_FUNC
+  static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
+  {
+    return x <= y;
+  }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, true, false>
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
+  static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
+  {
+    return numext::abs2(x) <= numext::abs2(y) * prec * prec;
+  }
+  EIGEN_DEVICE_FUNC
+  static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
+  {
+    return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
+  }
+};
+
+template<typename Scalar>
+struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+
+template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
+inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+                              const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+  return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
+}
+
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApprox(const Scalar& x, const Scalar& y,
+                     const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+  return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
+}
+
+template<typename Scalar> EIGEN_DEVICE_FUNC
+inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
+                               const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
+{
+  return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
+}
+
+/******************************************
+***  The special case of the  bool type ***
+******************************************/
+
+template<> struct random_impl<bool>
+{
+  static inline bool run()
+  {
+    return random<int>(0,1)==0 ? false : true;
+  }
+};
+
+template<> struct scalar_fuzzy_impl<bool>
+{
+  typedef bool RealScalar;
+  
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
+  static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
+  {
+    return !x;
+  }
+  
+  EIGEN_DEVICE_FUNC
+  static inline bool isApprox(bool x, bool y, bool)
+  {
+    return x == y;
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
+  {
+    return (!x) || y;
+  }
+  
+};
+
+  
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONS_H
diff --git a/third-party/Eigen/src/Core/MathFunctionsImpl.h b/third-party/Eigen/src/Core/MathFunctionsImpl.h
new file mode 100644
index 00000000..9c1ceb0e
--- /dev/null
+++ b/third-party/Eigen/src/Core/MathFunctionsImpl.h
@@ -0,0 +1,101 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATHFUNCTIONSIMPL_H
+#define EIGEN_MATHFUNCTIONSIMPL_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
+    Doesn't do anything fancy, just a 13/6-degree rational interpolant which
+    is accurate up to a couple of ulp in the range [-9, 9], outside of which
+    the tanh(x) = +/-1.
+
+    This implementation works on both scalars and packets.
+*/
+template<typename T>
+T generic_fast_tanh_float(const T& a_x)
+{
+  // Clamp the inputs to the range [-9, 9] since anything outside
+  // this range is +/-1.0f in single-precision.
+  const T plus_9 = pset1<T>(9.f);
+  const T minus_9 = pset1<T>(-9.f);
+  // NOTE GCC prior to 6.3 might improperly optimize this max/min
+  //      step such that if a_x is nan, x will be either 9 or -9,
+  //      and tanh will return 1 or -1 instead of nan.
+  //      This is supposed to be fixed in gcc6.3,
+  //      see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
+  const T x = pmax(minus_9,pmin(plus_9,a_x));
+  // The monomial coefficients of the numerator polynomial (odd).
+  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
+  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
+  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
+  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
+  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
+  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
+  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
+
+  // The monomial coefficients of the denominator polynomial (even).
+  const T beta_0 = pset1<T>(4.89352518554385e-03f);
+  const T beta_2 = pset1<T>(2.26843463243900e-03f);
+  const T beta_4 = pset1<T>(1.18534705686654e-04f);
+  const T beta_6 = pset1<T>(1.19825839466702e-06f);
+
+  // Since the polynomials are odd/even, we need x^2.
+  const T x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial p.
+  T p = pmadd(x2, alpha_13, alpha_11);
+  p = pmadd(x2, p, alpha_9);
+  p = pmadd(x2, p, alpha_7);
+  p = pmadd(x2, p, alpha_5);
+  p = pmadd(x2, p, alpha_3);
+  p = pmadd(x2, p, alpha_1);
+  p = pmul(x, p);
+
+  // Evaluate the denominator polynomial p.
+  T q = pmadd(x2, beta_6, beta_4);
+  q = pmadd(x2, q, beta_2);
+  q = pmadd(x2, q, beta_0);
+
+  // Divide the numerator by the denominator.
+  return pdiv(p, q);
+}
+
+template<typename RealScalar>
+EIGEN_STRONG_INLINE
+RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
+{
+  EIGEN_USING_STD_MATH(sqrt);
+  RealScalar p, qp;
+  p = numext::maxi(x,y);
+  if(p==RealScalar(0)) return RealScalar(0);
+  qp = numext::mini(y,x) / p;    
+  return p * sqrt(RealScalar(1) + qp*qp);
+}
+
+template<typename Scalar>
+struct hypot_impl
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static inline RealScalar run(const Scalar& x, const Scalar& y)
+  {
+    EIGEN_USING_STD_MATH(abs);
+    return positive_real_hypot<RealScalar>(abs(x), abs(y));
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATHFUNCTIONSIMPL_H
diff --git a/third-party/Eigen/src/Core/Matrix.h b/third-party/Eigen/src/Core/Matrix.h
new file mode 100644
index 00000000..7f4a7af9
--- /dev/null
+++ b/third-party/Eigen/src/Core/Matrix.h
@@ -0,0 +1,459 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_H
+#define EIGEN_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct traits<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+private:
+  enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret };
+  typedef typename find_best_packet<_Scalar,size>::type PacketScalar;
+  enum {
+      row_major_bit = _Options&RowMajor ? RowMajorBit : 0,
+      is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic,
+      max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols,
+      default_alignment = compute_default_alignment<_Scalar,max_size>::value,
+      actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
+      required_alignment = unpacket_traits<PacketScalar>::alignment,
+      packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
+    };
+    
+public:
+  typedef _Scalar Scalar;
+  typedef Dense StorageKind;
+  typedef Eigen::Index StorageIndex;
+  typedef MatrixXpr XprKind;
+  enum {
+    RowsAtCompileTime = _Rows,
+    ColsAtCompileTime = _Cols,
+    MaxRowsAtCompileTime = _MaxRows,
+    MaxColsAtCompileTime = _MaxCols,
+    Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret,
+    Options = _Options,
+    InnerStrideAtCompileTime = 1,
+    OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime,
+    
+    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+    Alignment = actual_alignment
+  };
+};
+}
+
+/** \class Matrix
+  * \ingroup Core_Module
+  *
+  * \brief The matrix class, also used for vectors and row-vectors
+  *
+  * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen.
+  * Vectors are matrices with one column, and row-vectors are matrices with one row.
+  *
+  * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
+  *
+  * The first three template parameters are required:
+  * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex<float>.
+  *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
+  * \tparam _Rows Number of rows, or \b Dynamic
+  * \tparam _Cols Number of columns, or \b Dynamic
+  *
+  * The remaining template parameters are optional -- in most cases you don't have to worry about them.
+  * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either
+  *                 \b #AutoAlign or \b #DontAlign.
+  *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required
+  *                 for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size.
+  * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note").
+  * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note").
+  *
+  * Eigen provides a number of typedefs covering the usual cases. Here are some examples:
+  *
+  * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix<double, 2, 2>)
+  * \li \c Vector4f is a vector of 4 floats (\c Matrix<float, 4, 1>)
+  * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix<int, 1, 3>)
+  *
+  * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
+  * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
+  *
+  * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix<float, 2, Dynamic>)
+  * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix<double, Dynamic, 3>)
+  *
+  * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
+  *
+  * You can access elements of vectors and matrices using normal subscripting:
+  *
+  * \code
+  * Eigen::VectorXd v(10);
+  * v[0] = 0.1;
+  * v[1] = 0.2;
+  * v(0) = 0.3;
+  * v(1) = 0.4;
+  *
+  * Eigen::MatrixXi m(10, 10);
+  * m(0, 1) = 1;
+  * m(0, 2) = 2;
+  * m(0, 3) = 3;
+  * \endcode
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
+  *
+  * <i><b>Some notes:</b></i>
+  *
+  * <dl>
+  * <dt><b>\anchor dense Dense versus sparse:</b></dt>
+  * <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module.
+  *
+  * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array.
+  * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.</dd>
+  *
+  * <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
+  * <dd>Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array
+  * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up
+  * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time.
+  *
+  * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime
+  * variables, and the array of coefficients is allocated dynamically on the heap.
+  *
+  * Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of a std::map.
+  * If you want this behavior, see the Sparse module.</dd>
+  *
+  * <dt><b>\anchor maxrows _MaxRows and _MaxCols:</b></dt>
+  * <dd>In most cases, one just leaves these parameters to the default values.
+  * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
+  * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot
+  * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols
+  * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.</dd>
+  * </dl>
+  *
+  * <i><b>ABI and storage layout</b></i>
+  *
+  * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
+  * <table  class="manual">
+  * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
+  * <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
+  * struct {
+  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
+  *   Eigen::Index rows, cols;
+  *  };
+  * \endcode</td></tr>
+  * <tr class="alt"><td>\code
+  * Matrix<T,Dynamic,1>
+  * Matrix<T,1,Dynamic> \endcode</td><td>\code
+  * struct {
+  *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
+  *   Eigen::Index size;
+  *  };
+  * \endcode</td></tr>
+  * <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
+  * struct {
+  *   T data[Rows*Cols];        // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
+  *  };
+  * \endcode</td></tr>
+  * <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
+  * struct {
+  *   T data[MaxRows*MaxCols];  // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
+  *   Eigen::Index rows, cols;
+  *  };
+  * \endcode</td></tr>
+  * </table>
+  * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two
+  * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
+  *
+  * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
+  * \ref TopicStorageOrders
+  */
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+class Matrix
+  : public PlainObjectBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+{
+  public:
+
+    /** \brief Base class typedef.
+      * \sa PlainObjectBase
+      */
+    typedef PlainObjectBase<Matrix> Base;
+
+    enum { Options = _Options };
+
+    EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
+
+    typedef typename Base::PlainObject PlainObject;
+
+    using Base::base;
+    using Base::coeffRef;
+
+    /**
+      * \brief Assigns matrices to each other.
+      *
+      * \note This is a special case of the templated operator=. Its purpose is
+      * to prevent a default operator= from hiding the templated operator=.
+      *
+      * \callgraph
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
+    {
+      return Base::_set(other);
+    }
+
+    /** \internal
+      * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
+      *
+      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+      * it will be initialized.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other)
+    {
+      return Base::_set(other);
+    }
+
+    /* Here, doxygen failed to copy the brief information when using \copydoc */
+
+    /**
+      * \brief Copies the generic expression \a other into *this.
+      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
+    {
+      return Base::operator=(other);
+    }
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
+    {
+      return Base::operator=(func);
+    }
+
+    /** \brief Default constructor.
+      *
+      * For fixed-size matrices, does nothing.
+      *
+      * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+      * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+      * a matrix to 0 is not supported.
+      *
+      * \sa resize(Index,Index)
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix() : Base()
+    {
+      Base::_check_template_params();
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+
+    // FIXME is it still needed
+    EIGEN_DEVICE_FUNC
+    explicit Matrix(internal::constructor_without_unaligned_array_assert)
+      : Base(internal::constructor_without_unaligned_array_assert())
+    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
+      : Base(std::move(other))
+    {
+      Base::_check_template_params();
+    }
+    EIGEN_DEVICE_FUNC
+    Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
+    {
+      other.swap(*this);
+      return *this;
+    }
+#endif
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+
+    // This constructor is for both 1x1 matrices and dynamic vectors
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE explicit Matrix(const T& x)
+    {
+      Base::_check_template_params();
+      Base::template _init1<T>(x);
+    }
+
+    template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
+    {
+      Base::_check_template_params();
+      Base::template _init2<T0,T1>(x, y);
+    }
+    #else
+    /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
+    EIGEN_DEVICE_FUNC
+    explicit Matrix(const Scalar *data);
+
+    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
+      *
+      * This is useful for dynamic-size vectors. For fixed-size vectors,
+      * it is redundant to pass these parameters, so one should use the default constructor
+      * Matrix() instead.
+      * 
+      * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
+      * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
+      * For fixed-size \c 1x1 matrices it is therefore recommended to use the default
+      * constructor Matrix() instead, especially when using one of the non standard
+      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
+      */
+    EIGEN_STRONG_INLINE explicit Matrix(Index dim);
+    /** \brief Constructs an initialized 1x1 matrix with the given coefficient */
+    Matrix(const Scalar& x);
+    /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
+      *
+      * This is useful for dynamic-size matrices. For fixed-size matrices,
+      * it is redundant to pass these parameters, so one should use the default constructor
+      * Matrix() instead.
+      * 
+      * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
+      * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
+      * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
+      * constructor Matrix() instead, especially when using one of the non standard
+      * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
+      */
+    EIGEN_DEVICE_FUNC
+    Matrix(Index rows, Index cols);
+    
+    /** \brief Constructs an initialized 2D vector with given coefficients */
+    Matrix(const Scalar& x, const Scalar& y);
+    #endif
+
+    /** \brief Constructs an initialized 3D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
+    {
+      Base::_check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
+      m_storage.data()[2] = z;
+    }
+    /** \brief Constructs an initialized 4D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
+    {
+      Base::_check_template_params();
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
+      m_storage.data()[0] = x;
+      m_storage.data()[1] = y;
+      m_storage.data()[2] = z;
+      m_storage.data()[3] = w;
+    }
+
+
+    /** \brief Copy constructor */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other)
+    { }
+
+    /** \brief Copy constructor for generic expressions.
+      * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
+      : Base(other.derived())
+    { }
+
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
+
+    /////////// Geometry module ///////////
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
+
+    // allow to extend Matrix outside Eigen
+    #ifdef EIGEN_MATRIX_PLUGIN
+    #include EIGEN_MATRIX_PLUGIN
+    #endif
+
+  protected:
+    template <typename Derived, typename OtherDerived, bool IsVector>
+    friend struct internal::conservative_resize_like_impl;
+
+    using Base::m_storage;
+};
+
+/** \defgroup matrixtypedefs Global matrix typedefs
+  *
+  * \ingroup Core_Module
+  *
+  * Eigen defines several typedef shortcuts for most common matrix and vector types.
+  *
+  * The general patterns are the following:
+  *
+  * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
+  * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
+  * for complex double.
+  *
+  * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats.
+  *
+  * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is
+  * a fixed-size vector of 4 complex floats.
+  *
+  * \sa class Matrix
+  */
+
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)   \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix;  \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Size, 1>    Vector##SizeSuffix##TypeSuffix;  \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, 1, Size>    RowVector##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size)         \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix;  \
+/** \ingroup matrixtypedefs */                                    \
+typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \
+EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double,               d)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>,  cf)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_H
diff --git a/third-party/Eigen/src/Core/MatrixBase.h b/third-party/Eigen/src/Core/MatrixBase.h
new file mode 100644
index 00000000..f8bcc8c6
--- /dev/null
+++ b/third-party/Eigen/src/Core/MatrixBase.h
@@ -0,0 +1,530 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXBASE_H
+#define EIGEN_MATRIXBASE_H
+
+namespace Eigen {
+
+/** \class MatrixBase
+  * \ingroup Core_Module
+  *
+  * \brief Base class for all dense matrices, vectors, and expressions
+  *
+  * This class is the base that is inherited by all matrix, vector, and related expression
+  * types. Most of the Eigen API is contained in this class, and its base classes. Other important
+  * classes for the Eigen API are Matrix, and VectorwiseOp.
+  *
+  * Note that some methods are defined in other modules such as the \ref LU_Module LU module
+  * for all functions related to matrix inversions.
+  *
+  * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc.
+  *
+  * When writing a function taking Eigen objects as argument, if you want your function
+  * to take as argument any matrix, vector, or expression, just let it take a
+  * MatrixBase argument. As an example, here is a function printFirstRow which, given
+  * a matrix, vector, or expression \a x, prints the first row of \a x.
+  *
+  * \code
+    template<typename Derived>
+    void printFirstRow(const Eigen::MatrixBase<Derived>& x)
+    {
+      cout << x.row(0) << endl;
+    }
+  * \endcode
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN.
+  *
+  * \sa \blank \ref TopicClassHierarchy
+  */
+template<typename Derived> class MatrixBase
+  : public DenseBase<Derived>
+{
+  public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef MatrixBase StorageBaseType;
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    typedef DenseBase<Derived> Base;
+    using Base::RowsAtCompileTime;
+    using Base::ColsAtCompileTime;
+    using Base::SizeAtCompileTime;
+    using Base::MaxRowsAtCompileTime;
+    using Base::MaxColsAtCompileTime;
+    using Base::MaxSizeAtCompileTime;
+    using Base::IsVectorAtCompileTime;
+    using Base::Flags;
+
+    using Base::derived;
+    using Base::const_cast_derived;
+    using Base::rows;
+    using Base::cols;
+    using Base::size;
+    using Base::coeff;
+    using Base::coeffRef;
+    using Base::lazyAssign;
+    using Base::eval;
+    using Base::operator+=;
+    using Base::operator-=;
+    using Base::operator*=;
+    using Base::operator/=;
+
+    typedef typename Base::CoeffReturnType CoeffReturnType;
+    typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
+    typedef typename Base::RowXpr RowXpr;
+    typedef typename Base::ColXpr ColXpr;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** type of the equivalent square matrix */
+    typedef Matrix<Scalar,EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime),
+                          EIGEN_SIZE_MAX(RowsAtCompileTime,ColsAtCompileTime)> SquareMatrixType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+    /** \returns the size of the main diagonal, which is min(rows(),cols()).
+      * \sa rows(), cols(), SizeAtCompileTime. */
+    EIGEN_DEVICE_FUNC
+    inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); }
+
+    typedef typename Base::PlainObject PlainObject;
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal Represents a matrix with all coefficients equal to one another*/
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    /** \internal the return type of MatrixBase::adjoint() */
+    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+                        ConstTransposeReturnType
+                     >::type AdjointReturnType;
+    /** \internal Return type of eigenvalues() */
+    typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
+    /** \internal the return type of identity */
+    typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
+    /** \internal the return type of unit vectors */
+    typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
+                  internal::traits<Derived>::RowsAtCompileTime,
+                  internal::traits<Derived>::ColsAtCompileTime> BasisReturnType;
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
+#define EIGEN_DOC_UNARY_ADDONS(X,Y)
+#   include "../plugins/CommonCwiseUnaryOps.h"
+#   include "../plugins/CommonCwiseBinaryOps.h"
+#   include "../plugins/MatrixCwiseUnaryOps.h"
+#   include "../plugins/MatrixCwiseBinaryOps.h"
+#   ifdef EIGEN_MATRIXBASE_PLUGIN
+#     include EIGEN_MATRIXBASE_PLUGIN
+#   endif
+#undef EIGEN_CURRENT_STORAGE_BASE_CLASS
+#undef EIGEN_DOC_UNARY_ADDONS
+
+    /** Special case of the template operator=, in order to prevent the compiler
+      * from generating a default operator= (issue hit with g++ 4.1)
+      */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const MatrixBase& other);
+
+    // We cannot inherit here via Base::operator= since it is causing
+    // trouble with MSVC.
+
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator=(const DenseBase<OtherDerived>& other);
+
+    template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const EigenBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    Derived& operator=(const ReturnByValue<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator+=(const MatrixBase<OtherDerived>& other);
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator-=(const MatrixBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived,OtherDerived>
+    operator*(const MatrixBase<OtherDerived> &other) const;
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived,OtherDerived,LazyProduct>
+    lazyProduct(const MatrixBase<OtherDerived> &other) const;
+
+    template<typename OtherDerived>
+    Derived& operator*=(const EigenBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    void applyOnTheLeft(const EigenBase<OtherDerived>& other);
+
+    template<typename OtherDerived>
+    void applyOnTheRight(const EigenBase<OtherDerived>& other);
+
+    template<typename DiagonalDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived, DiagonalDerived, LazyProduct>
+    operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+    dot(const MatrixBase<OtherDerived>& other) const;
+
+    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
+    EIGEN_DEVICE_FUNC RealScalar norm() const;
+    RealScalar stableNorm() const;
+    RealScalar blueNorm() const;
+    RealScalar hypotNorm() const;
+    EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+    EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
+    EIGEN_DEVICE_FUNC void normalize();
+    EIGEN_DEVICE_FUNC void stableNormalize();
+
+    EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
+    EIGEN_DEVICE_FUNC void adjointInPlace();
+
+    typedef Diagonal<Derived> DiagonalReturnType;
+    EIGEN_DEVICE_FUNC
+    DiagonalReturnType diagonal();
+
+    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+    EIGEN_DEVICE_FUNC
+    ConstDiagonalReturnType diagonal() const;
+
+    template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
+    template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
+
+    template<int Index>
+    EIGEN_DEVICE_FUNC
+    typename DiagonalIndexReturnType<Index>::Type diagonal();
+
+    template<int Index>
+    EIGEN_DEVICE_FUNC
+    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+
+    typedef Diagonal<Derived,DynamicIndex> DiagonalDynamicIndexReturnType;
+    typedef typename internal::add_const<Diagonal<const Derived,DynamicIndex> >::type ConstDiagonalDynamicIndexReturnType;
+
+    EIGEN_DEVICE_FUNC
+    DiagonalDynamicIndexReturnType diagonal(Index index);
+    EIGEN_DEVICE_FUNC
+    ConstDiagonalDynamicIndexReturnType diagonal(Index index) const;
+
+    template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
+    template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
+
+    template<unsigned int Mode>
+    EIGEN_DEVICE_FUNC
+    typename TriangularViewReturnType<Mode>::Type triangularView();
+    template<unsigned int Mode>
+    EIGEN_DEVICE_FUNC
+    typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+
+    template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
+    template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
+
+    template<unsigned int UpLo>
+    EIGEN_DEVICE_FUNC
+    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+    template<unsigned int UpLo>
+    EIGEN_DEVICE_FUNC
+    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+
+    const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
+                                         const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
+
+    EIGEN_DEVICE_FUNC
+    const DiagonalWrapper<const Derived> asDiagonal() const;
+    const PermutationWrapper<const Derived> asPermutation() const;
+
+    EIGEN_DEVICE_FUNC
+    Derived& setIdentity();
+    EIGEN_DEVICE_FUNC
+    Derived& setIdentity(Index rows, Index cols);
+
+    bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isDiagonal(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+    bool isUpperTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isLowerTriangular(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+    template<typename OtherDerived>
+    bool isOrthogonal(const MatrixBase<OtherDerived>& other,
+                      const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    bool isUnitary(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+
+    /** \returns true if each coefficients of \c *this and \a other are all exactly equal.
+      * \warning When using floating point scalar values you probably should rather use a
+      *          fuzzy comparison such as isApprox()
+      * \sa isApprox(), operator!= */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
+    { return cwiseEqual(other).all(); }
+
+    /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
+      * \warning When using floating point scalar values you probably should rather use a
+      *          fuzzy comparison such as isApprox()
+      * \sa isApprox(), operator== */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
+    { return cwiseNotEqual(other).any(); }
+
+    NoAlias<Derived,Eigen::MatrixBase > noalias();
+
+    // TODO forceAlignedAccess is temporarily disabled
+    // Need to find a nicer workaround.
+    inline const Derived& forceAlignedAccess() const { return derived(); }
+    inline Derived& forceAlignedAccess() { return derived(); }
+    template<bool Enable> inline const Derived& forceAlignedAccessIf() const { return derived(); }
+    template<bool Enable> inline Derived& forceAlignedAccessIf() { return derived(); }
+
+    EIGEN_DEVICE_FUNC Scalar trace() const;
+
+    template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
+
+    EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+    EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
+
+    /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
+      * \sa ArrayBase::matrix() */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper<Derived> array() { return ArrayWrapper<Derived>(derived()); }
+    /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix
+      * \sa ArrayBase::matrix() */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper<const Derived> array() const { return ArrayWrapper<const Derived>(derived()); }
+
+/////////// LU module ///////////
+
+    inline const FullPivLU<PlainObject> fullPivLu() const;
+    inline const PartialPivLU<PlainObject> partialPivLu() const;
+
+    inline const PartialPivLU<PlainObject> lu() const;
+
+    inline const Inverse<Derived> inverse() const;
+
+    template<typename ResultType>
+    inline void computeInverseAndDetWithCheck(
+      ResultType& inverse,
+      typename ResultType::Scalar& determinant,
+      bool& invertible,
+      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+    ) const;
+    template<typename ResultType>
+    inline void computeInverseWithCheck(
+      ResultType& inverse,
+      bool& invertible,
+      const RealScalar& absDeterminantThreshold = NumTraits<Scalar>::dummy_precision()
+    ) const;
+    Scalar determinant() const;
+
+/////////// Cholesky module ///////////
+
+    inline const LLT<PlainObject>  llt() const;
+    inline const LDLT<PlainObject> ldlt() const;
+
+/////////// QR module ///////////
+
+    inline const HouseholderQR<PlainObject> householderQr() const;
+    inline const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+    inline const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
+    inline const CompleteOrthogonalDecomposition<PlainObject> completeOrthogonalDecomposition() const;
+
+/////////// Eigenvalues module ///////////
+
+    inline EigenvaluesReturnType eigenvalues() const;
+    inline RealScalar operatorNorm() const;
+
+/////////// SVD module ///////////
+
+    inline JacobiSVD<PlainObject> jacobiSvd(unsigned int computationOptions = 0) const;
+    inline BDCSVD<PlainObject>    bdcSvd(unsigned int computationOptions = 0) const;
+
+/////////// Geometry module ///////////
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /// \internal helper struct to form the return type of the cross product
+    template<typename OtherDerived> struct cross_product_return_type {
+      typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+      typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
+    };
+    #endif // EIGEN_PARSED_BY_DOXYGEN
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    inline typename cross_product_return_type<OtherDerived>::type
+#else
+    inline PlainObject
+#endif
+    cross(const MatrixBase<OtherDerived>& other) const;
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    inline PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+
+    EIGEN_DEVICE_FUNC
+    inline PlainObject unitOrthogonal(void) const;
+
+    EIGEN_DEVICE_FUNC
+    inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
+
+    // put this as separate enum value to work around possible GCC 4.3 bug (?)
+    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
+                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };
+    typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
+    inline HomogeneousReturnType homogeneous() const;
+
+    enum {
+      SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1
+    };
+    typedef Block<const Derived,
+                  internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
+                  internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
+    typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
+    EIGEN_DEVICE_FUNC
+    inline const HNormalizedReturnType hnormalized() const;
+
+////////// Householder module ///////////
+
+    void makeHouseholderInPlace(Scalar& tau, RealScalar& beta);
+    template<typename EssentialPart>
+    void makeHouseholder(EssentialPart& essential,
+                         Scalar& tau, RealScalar& beta) const;
+    template<typename EssentialPart>
+    void applyHouseholderOnTheLeft(const EssentialPart& essential,
+                                   const Scalar& tau,
+                                   Scalar* workspace);
+    template<typename EssentialPart>
+    void applyHouseholderOnTheRight(const EssentialPart& essential,
+                                    const Scalar& tau,
+                                    Scalar* workspace);
+
+///////// Jacobi module /////////
+
+    template<typename OtherScalar>
+    void applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+    template<typename OtherScalar>
+    void applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j);
+
+///////// SparseCore module /////////
+
+    template<typename OtherDerived>
+    EIGEN_STRONG_INLINE const typename SparseMatrixBase<OtherDerived>::template CwiseProductDenseReturnType<Derived>::Type
+    cwiseProduct(const SparseMatrixBase<OtherDerived> &other) const
+    {
+      return other.cwiseProduct(derived());
+    }
+
+///////// MatrixFunctions module /////////
+
+    typedef typename internal::stem_function<Scalar>::type StemFunction;
+#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
+    /** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
+    const ReturnType<Derived> Name() const;
+#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
+    /** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
+    const ReturnType<Derived> Name(Argument) const;
+
+    EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
+    /** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
+    const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
+    EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
+    EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
+    EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
+    EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue,        pow, power to \c p, const RealScalar& p)
+    EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
+
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MatrixBase)
+
+  private:
+    EIGEN_DEVICE_FUNC explicit MatrixBase(int);
+    EIGEN_DEVICE_FUNC MatrixBase(int,int);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
+  protected:
+    // mixing arrays and matrices is not legal
+    template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
+    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+    // mixing arrays and matrices is not legal
+    template<typename OtherDerived> Derived& operator-=(const ArrayBase<OtherDerived>& )
+    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
+};
+
+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** replaces \c *this by \c *this * \a other.
+  *
+  * \returns a reference to \c *this
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \a other * \c *this.
+  *
+  * Example: \include MatrixBase_applyOnTheLeft.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheLeft.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheLeft(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIXBASE_H
diff --git a/third-party/Eigen/src/Core/NestByValue.h b/third-party/Eigen/src/Core/NestByValue.h
new file mode 100644
index 00000000..13adf070
--- /dev/null
+++ b/third-party/Eigen/src/Core/NestByValue.h
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NESTBYVALUE_H
+#define EIGEN_NESTBYVALUE_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename ExpressionType>
+struct traits<NestByValue<ExpressionType> > : public traits<ExpressionType>
+{};
+}
+
+/** \class NestByValue
+  * \ingroup Core_Module
+  *
+  * \brief Expression which must be nested by value
+  *
+  * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value
+  *
+  * This class is the return type of MatrixBase::nestByValue()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::nestByValue()
+  */
+template<typename ExpressionType> class NestByValue
+  : public internal::dense_xpr_base< NestByValue<ExpressionType> >::type
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<NestByValue>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue)
+
+    EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {}
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }
+
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
+    {
+      return m_expression.coeff(row, col);
+    }
+
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
+    {
+      return m_expression.const_cast_derived().coeffRef(row, col);
+    }
+
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
+    {
+      return m_expression.coeff(index);
+    }
+
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
+    {
+      return m_expression.const_cast_derived().coeffRef(index);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index row, Index col) const
+    {
+      return m_expression.template packet<LoadMode>(row, col);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
+    {
+      m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
+    }
+
+    template<int LoadMode>
+    inline const PacketScalar packet(Index index) const
+    {
+      return m_expression.template packet<LoadMode>(index);
+    }
+
+    template<int LoadMode>
+    inline void writePacket(Index index, const PacketScalar& x)
+    {
+      m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
+    }
+
+    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
+
+  protected:
+    const ExpressionType m_expression;
+};
+
+/** \returns an expression of the temporary version of *this.
+  */
+template<typename Derived>
+inline const NestByValue<Derived>
+DenseBase<Derived>::nestByValue() const
+{
+  return NestByValue<Derived>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_NESTBYVALUE_H
diff --git a/third-party/Eigen/src/Core/NoAlias.h b/third-party/Eigen/src/Core/NoAlias.h
new file mode 100644
index 00000000..33908010
--- /dev/null
+++ b/third-party/Eigen/src/Core/NoAlias.h
@@ -0,0 +1,108 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NOALIAS_H
+#define EIGEN_NOALIAS_H
+
+namespace Eigen {
+
+/** \class NoAlias
+  * \ingroup Core_Module
+  *
+  * \brief Pseudo expression providing an operator = assuming no aliasing
+  *
+  * \tparam ExpressionType the type of the object on which to do the lazy assignment
+  *
+  * This class represents an expression with special assignment operators
+  * assuming no aliasing between the target expression and the source expression.
+  * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression.
+  * It is the return type of MatrixBase::noalias()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::noalias()
+  */
+template<typename ExpressionType, template <typename> class StorageBase>
+class NoAlias
+{
+  public:
+    typedef typename ExpressionType::Scalar Scalar;
+    
+    explicit NoAlias(ExpressionType& expression) : m_expression(expression) {}
+    
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
+    {
+      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+      return m_expression;
+    }
+    
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
+    {
+      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
+      return m_expression;
+    }
+    
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
+    {
+      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
+      return m_expression;
+    }
+
+    EIGEN_DEVICE_FUNC
+    ExpressionType& expression() const
+    {
+      return m_expression;
+    }
+
+  protected:
+    ExpressionType& m_expression;
+};
+
+/** \returns a pseudo expression of \c *this with an operator= assuming
+  * no aliasing between \c *this and the source expression.
+  *
+  * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag.
+  * Currently, even though several expressions may alias, only product
+  * expressions have this flag. Therefore, noalias() is only usefull when
+  * the source expression contains a matrix product.
+  *
+  * Here are some examples where noalias is usefull:
+  * \code
+  * D.noalias()  = A * B;
+  * D.noalias() += A.transpose() * B;
+  * D.noalias() -= 2 * A * B.adjoint();
+  * \endcode
+  *
+  * On the other hand the following example will lead to a \b wrong result:
+  * \code
+  * A.noalias() = A * B;
+  * \endcode
+  * because the result matrix A is also an operand of the matrix product. Therefore,
+  * there is no alternative than evaluating A * B in a temporary, that is the default
+  * behavior when you write:
+  * \code
+  * A = A * B;
+  * \endcode
+  *
+  * \sa class NoAlias
+  */
+template<typename Derived>
+NoAlias<Derived,MatrixBase> MatrixBase<Derived>::noalias()
+{
+  return NoAlias<Derived, Eigen::MatrixBase >(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_NOALIAS_H
diff --git a/third-party/Eigen/src/Core/NumTraits.h b/third-party/Eigen/src/Core/NumTraits.h
new file mode 100644
index 00000000..daf48987
--- /dev/null
+++ b/third-party/Eigen/src/Core/NumTraits.h
@@ -0,0 +1,248 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NUMTRAITS_H
+#define EIGEN_NUMTRAITS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// default implementation of digits10(), based on numeric_limits if specialized,
+// 0 for integer types, and log10(epsilon()) otherwise.
+template< typename T,
+          bool use_numeric_limits = std::numeric_limits<T>::is_specialized,
+          bool is_integer = NumTraits<T>::IsInteger>
+struct default_digits10_impl
+{
+  static int run() { return std::numeric_limits<T>::digits10; }
+};
+
+template<typename T>
+struct default_digits10_impl<T,false,false> // Floating point
+{
+  static int run() {
+    using std::log10;
+    using std::ceil;
+    typedef typename NumTraits<T>::Real Real;
+    return int(ceil(-log10(NumTraits<Real>::epsilon())));
+  }
+};
+
+template<typename T>
+struct default_digits10_impl<T,false,true> // Integer
+{
+  static int run() { return 0; }
+};
+
+} // end namespace internal
+
+/** \class NumTraits
+  * \ingroup Core_Module
+  *
+  * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
+  *
+  * \tparam T the numeric type at hand
+  *
+  * This class stores enums, typedefs and static methods giving information about a numeric type.
+  *
+  * The provided data consists of:
+  * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
+  *     then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
+  *     is a typedef to \a U.
+  * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
+  *     such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
+  *     \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
+  *     take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
+  *     only intended as a helper for code that needs to explicitly promote types.
+  * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
+  *     Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
+  * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
+  *     this means, just use \a T here.
+  * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
+  *     type, and to 0 otherwise.
+  * \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int,
+  *     and to \c 0 otherwise.
+  * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
+  *     to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
+  *     Stay vague here. No need to do architecture-specific stuff.
+  * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
+  * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
+  *     be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
+  * \li An epsilon() function which, unlike <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/epsilon">std::numeric_limits::epsilon()</a>,
+  *     it returns a \a Real instead of a \a T.
+  * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default
+  *     value by the fuzzy comparison operators.
+  * \li highest() and lowest() functions returning the highest and lowest possible values respectively.
+  * \li digits10() function returning the number of decimal digits that can be represented without change. This is
+  *     the analogue of <a href="http://en.cppreference.com/w/cpp/types/numeric_limits/digits10">std::numeric_limits<T>::digits10</a>
+  *     which is used as the default implementation if specialized.
+  */
+
+template<typename T> struct GenericNumTraits
+{
+  enum {
+    IsInteger = std::numeric_limits<T>::is_integer,
+    IsSigned = std::numeric_limits<T>::is_signed,
+    IsComplex = 0,
+    RequireInitialization = internal::is_arithmetic<T>::value ? 0 : 1,
+    ReadCost = 1,
+    AddCost = 1,
+    MulCost = 1
+  };
+
+  typedef T Real;
+  typedef typename internal::conditional<
+                     IsInteger,
+                     typename internal::conditional<sizeof(T)<=2, float, double>::type,
+                     T
+                   >::type NonInteger;
+  typedef T Nested;
+  typedef T Literal;
+
+  EIGEN_DEVICE_FUNC
+  static inline Real epsilon()
+  {
+    return numext::numeric_limits<T>::epsilon();
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline int digits10()
+  {
+    return internal::default_digits10_impl<T>::run();
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline Real dummy_precision()
+  {
+    // make sure to override this for floating-point types
+    return Real(0);
+  }
+
+
+  EIGEN_DEVICE_FUNC
+  static inline T highest() {
+    return (numext::numeric_limits<T>::max)();
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline T lowest()  {
+    return IsInteger ? (numext::numeric_limits<T>::min)() : (-(numext::numeric_limits<T>::max)());
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline T infinity() {
+    return numext::numeric_limits<T>::infinity();
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline T quiet_NaN() {
+    return numext::numeric_limits<T>::quiet_NaN();
+  }
+};
+
+template<typename T> struct NumTraits : GenericNumTraits<T>
+{};
+
+template<> struct NumTraits<float>
+  : GenericNumTraits<float>
+{
+  EIGEN_DEVICE_FUNC
+  static inline float dummy_precision() { return 1e-5f; }
+};
+
+template<> struct NumTraits<double> : GenericNumTraits<double>
+{
+  EIGEN_DEVICE_FUNC
+  static inline double dummy_precision() { return 1e-12; }
+};
+
+template<> struct NumTraits<long double>
+  : GenericNumTraits<long double>
+{
+  static inline long double dummy_precision() { return 1e-15l; }
+};
+
+template<typename _Real> struct NumTraits<std::complex<_Real> >
+  : GenericNumTraits<std::complex<_Real> >
+{
+  typedef _Real Real;
+  typedef typename NumTraits<_Real>::Literal Literal;
+  enum {
+    IsComplex = 1,
+    RequireInitialization = NumTraits<_Real>::RequireInitialization,
+    ReadCost = 2 * NumTraits<_Real>::ReadCost,
+    AddCost = 2 * NumTraits<Real>::AddCost,
+    MulCost = 4 * NumTraits<Real>::MulCost + 2 * NumTraits<Real>::AddCost
+  };
+
+  EIGEN_DEVICE_FUNC
+  static inline Real epsilon() { return NumTraits<Real>::epsilon(); }
+  EIGEN_DEVICE_FUNC
+  static inline Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
+  EIGEN_DEVICE_FUNC
+  static inline int digits10() { return NumTraits<Real>::digits10(); }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+{
+  typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> ArrayType;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Array<RealScalar, Rows, Cols, Options, MaxRows, MaxCols> Real;
+  typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
+  typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
+  typedef ArrayType & Nested;
+  typedef typename NumTraits<Scalar>::Literal Literal;
+
+  enum {
+    IsComplex = NumTraits<Scalar>::IsComplex,
+    IsInteger = NumTraits<Scalar>::IsInteger,
+    IsSigned  = NumTraits<Scalar>::IsSigned,
+    RequireInitialization = 1,
+    ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::ReadCost,
+    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
+    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
+  };
+
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
+
+  static inline int digits10() { return NumTraits<Scalar>::digits10(); }
+};
+
+template<> struct NumTraits<std::string>
+  : GenericNumTraits<std::string>
+{
+  enum {
+    RequireInitialization = 1,
+    ReadCost = HugeCost,
+    AddCost  = HugeCost,
+    MulCost  = HugeCost
+  };
+
+  static inline int digits10() { return 0; }
+
+private:
+  static inline std::string epsilon();
+  static inline std::string dummy_precision();
+  static inline std::string lowest();
+  static inline std::string highest();
+  static inline std::string infinity();
+  static inline std::string quiet_NaN();
+};
+
+// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
+template<> struct NumTraits<void> {};
+
+} // end namespace Eigen
+
+#endif // EIGEN_NUMTRAITS_H
diff --git a/third-party/Eigen/src/Core/PermutationMatrix.h b/third-party/Eigen/src/Core/PermutationMatrix.h
new file mode 100644
index 00000000..47c06ba7
--- /dev/null
+++ b/third-party/Eigen/src/Core/PermutationMatrix.h
@@ -0,0 +1,605 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PERMUTATIONMATRIX_H
+#define EIGEN_PERMUTATIONMATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+enum PermPermProduct_t {PermPermProduct};
+
+} // end namespace internal
+
+/** \class PermutationBase
+  * \ingroup Core_Module
+  *
+  * \brief Base class for permutations
+  *
+  * \tparam Derived the derived class
+  *
+  * This class is the base class for all expressions representing a permutation matrix,
+  * internally stored as a vector of integers.
+  * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix
+  * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have:
+  *  \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f]
+  * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have:
+  *  \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f]
+  *
+  * Permutation matrices are square and invertible.
+  *
+  * Notice that in addition to the member functions and operators listed here, there also are non-member
+  * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase)
+  * on either side.
+  *
+  * \sa class PermutationMatrix, class PermutationWrapper
+  */
+template<typename Derived>
+class PermutationBase : public EigenBase<Derived>
+{
+    typedef internal::traits<Derived> Traits;
+    typedef EigenBase<Derived> Base;
+  public:
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename Traits::IndicesType IndicesType;
+    enum {
+      Flags = Traits::Flags,
+      RowsAtCompileTime = Traits::RowsAtCompileTime,
+      ColsAtCompileTime = Traits::ColsAtCompileTime,
+      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+    };
+    typedef typename Traits::StorageIndex StorageIndex;
+    typedef Matrix<StorageIndex,RowsAtCompileTime,ColsAtCompileTime,0,MaxRowsAtCompileTime,MaxColsAtCompileTime>
+            DenseMatrixType;
+    typedef PermutationMatrix<IndicesType::SizeAtCompileTime,IndicesType::MaxSizeAtCompileTime,StorageIndex>
+            PlainPermutationType;
+    typedef PlainPermutationType PlainObject;
+    using Base::derived;
+    typedef Inverse<Derived> InverseReturnType;
+    typedef void Scalar;
+    #endif
+
+    /** Copies the other permutation into *this */
+    template<typename OtherDerived>
+    Derived& operator=(const PermutationBase<OtherDerived>& other)
+    {
+      indices() = other.indices();
+      return derived();
+    }
+
+    /** Assignment from the Transpositions \a tr */
+    template<typename OtherDerived>
+    Derived& operator=(const TranspositionsBase<OtherDerived>& tr)
+    {
+      setIdentity(tr.size());
+      for(Index k=size()-1; k>=0; --k)
+        applyTranspositionOnTheRight(k,tr.coeff(k));
+      return derived();
+    }
+
+    /** \returns the number of rows */
+    inline Index rows() const { return Index(indices().size()); }
+
+    /** \returns the number of columns */
+    inline Index cols() const { return Index(indices().size()); }
+
+    /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
+    inline Index size() const { return Index(indices().size()); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename DenseDerived>
+    void evalTo(MatrixBase<DenseDerived>& other) const
+    {
+      other.setZero();
+      for (Index i=0; i<rows(); ++i)
+        other.coeffRef(indices().coeff(i),i) = typename DenseDerived::Scalar(1);
+    }
+    #endif
+
+    /** \returns a Matrix object initialized from this permutation matrix. Notice that it
+      * is inefficient to return this Matrix object by value. For efficiency, favor using
+      * the Matrix constructor taking EigenBase objects.
+      */
+    DenseMatrixType toDenseMatrix() const
+    {
+      return derived();
+    }
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return derived().indices(); }
+    /** \returns a reference to the stored array representing the permutation. */
+    IndicesType& indices() { return derived().indices(); }
+
+    /** Resizes to given size.
+      */
+    inline void resize(Index newSize)
+    {
+      indices().resize(newSize);
+    }
+
+    /** Sets *this to be the identity permutation matrix */
+    void setIdentity()
+    {
+      StorageIndex n = StorageIndex(size());
+      for(StorageIndex i = 0; i < n; ++i)
+        indices().coeffRef(i) = i;
+    }
+
+    /** Sets *this to be the identity permutation matrix of given size.
+      */
+    void setIdentity(Index newSize)
+    {
+      resize(newSize);
+      setIdentity();
+    }
+
+    /** Multiplies *this by the transposition \f$(ij)\f$ on the left.
+      *
+      * \returns a reference to *this.
+      *
+      * \warning This is much slower than applyTranspositionOnTheRight(Index,Index):
+      * this has linear complexity and requires a lot of branching.
+      *
+      * \sa applyTranspositionOnTheRight(Index,Index)
+      */
+    Derived& applyTranspositionOnTheLeft(Index i, Index j)
+    {
+      eigen_assert(i>=0 && j>=0 && i<size() && j<size());
+      for(Index k = 0; k < size(); ++k)
+      {
+        if(indices().coeff(k) == i) indices().coeffRef(k) = StorageIndex(j);
+        else if(indices().coeff(k) == j) indices().coeffRef(k) = StorageIndex(i);
+      }
+      return derived();
+    }
+
+    /** Multiplies *this by the transposition \f$(ij)\f$ on the right.
+      *
+      * \returns a reference to *this.
+      *
+      * This is a fast operation, it only consists in swapping two indices.
+      *
+      * \sa applyTranspositionOnTheLeft(Index,Index)
+      */
+    Derived& applyTranspositionOnTheRight(Index i, Index j)
+    {
+      eigen_assert(i>=0 && j>=0 && i<size() && j<size());
+      std::swap(indices().coeffRef(i), indices().coeffRef(j));
+      return derived();
+    }
+
+    /** \returns the inverse permutation matrix.
+      *
+      * \note \blank \note_try_to_help_rvo
+      */
+    inline InverseReturnType inverse() const
+    { return InverseReturnType(derived()); }
+    /** \returns the tranpose permutation matrix.
+      *
+      * \note \blank \note_try_to_help_rvo
+      */
+    inline InverseReturnType transpose() const
+    { return InverseReturnType(derived()); }
+
+    /**** multiplication helpers to hopefully get RVO ****/
+
+  
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  protected:
+    template<typename OtherDerived>
+    void assignTranspose(const PermutationBase<OtherDerived>& other)
+    {
+      for (Index i=0; i<rows();++i) indices().coeffRef(other.indices().coeff(i)) = i;
+    }
+    template<typename Lhs,typename Rhs>
+    void assignProduct(const Lhs& lhs, const Rhs& rhs)
+    {
+      eigen_assert(lhs.cols() == rhs.rows());
+      for (Index i=0; i<rows();++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i));
+    }
+#endif
+
+  public:
+
+    /** \returns the product permutation matrix.
+      *
+      * \note \blank \note_try_to_help_rvo
+      */
+    template<typename Other>
+    inline PlainPermutationType operator*(const PermutationBase<Other>& other) const
+    { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); }
+
+    /** \returns the product of a permutation with another inverse permutation.
+      *
+      * \note \blank \note_try_to_help_rvo
+      */
+    template<typename Other>
+    inline PlainPermutationType operator*(const InverseImpl<Other,PermutationStorage>& other) const
+    { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); }
+
+    /** \returns the product of an inverse permutation with another permutation.
+      *
+      * \note \blank \note_try_to_help_rvo
+      */
+    template<typename Other> friend
+    inline PlainPermutationType operator*(const InverseImpl<Other, PermutationStorage>& other, const PermutationBase& perm)
+    { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); }
+    
+    /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation.
+      *
+      * This function is O(\c n) procedure allocating a buffer of \c n booleans.
+      */
+    Index determinant() const
+    {
+      Index res = 1;
+      Index n = size();
+      Matrix<bool,RowsAtCompileTime,1,0,MaxRowsAtCompileTime> mask(n);
+      mask.fill(false);
+      Index r = 0;
+      while(r < n)
+      {
+        // search for the next seed
+        while(r<n && mask[r]) r++;
+        if(r>=n)
+          break;
+        // we got one, let's follow it until we are back to the seed
+        Index k0 = r++;
+        mask.coeffRef(k0) = true;
+        for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k))
+        {
+          mask.coeffRef(k) = true;
+          res = -res;
+        }
+      }
+      return res;
+    }
+
+  protected:
+
+};
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
+struct traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
+ : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+  typedef PermutationStorage StorageKind;
+  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+  typedef _StorageIndex StorageIndex;
+  typedef void Scalar;
+};
+}
+
+/** \class PermutationMatrix
+  * \ingroup Core_Module
+  *
+  * \brief Permutation matrix
+  *
+  * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic
+  * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+  * \tparam _StorageIndex the integer type of the indices
+  *
+  * This class represents a permutation matrix, internally stored as a vector of integers.
+  *
+  * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix
+  */
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
+class PermutationMatrix : public PermutationBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex> >
+{
+    typedef PermutationBase<PermutationMatrix> Base;
+    typedef internal::traits<PermutationMatrix> Traits;
+  public:
+
+    typedef const PermutationMatrix& Nested;
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename Traits::StorageIndex StorageIndex;
+    #endif
+
+    inline PermutationMatrix()
+    {}
+
+    /** Constructs an uninitialized permutation matrix of given size.
+      */
+    explicit inline PermutationMatrix(Index size) : m_indices(size)
+    {
+      eigen_internal_assert(size <= NumTraits<StorageIndex>::highest());
+    }
+
+    /** Copy constructor. */
+    template<typename OtherDerived>
+    inline PermutationMatrix(const PermutationBase<OtherDerived>& other)
+      : m_indices(other.indices()) {}
+
+    /** Generic constructor from expression of the indices. The indices
+      * array has the meaning that the permutations sends each integer i to indices[i].
+      *
+      * \warning It is your responsibility to check that the indices array that you passes actually
+      * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the
+      * array's size.
+      */
+    template<typename Other>
+    explicit inline PermutationMatrix(const MatrixBase<Other>& indices) : m_indices(indices)
+    {}
+
+    /** Convert the Transpositions \a tr to a permutation matrix */
+    template<typename Other>
+    explicit PermutationMatrix(const TranspositionsBase<Other>& tr)
+      : m_indices(tr.size())
+    {
+      *this = tr;
+    }
+
+    /** Copies the other permutation into *this */
+    template<typename Other>
+    PermutationMatrix& operator=(const PermutationBase<Other>& other)
+    {
+      m_indices = other.indices();
+      return *this;
+    }
+
+    /** Assignment from the Transpositions \a tr */
+    template<typename Other>
+    PermutationMatrix& operator=(const TranspositionsBase<Other>& tr)
+    {
+      return Base::operator=(tr.derived());
+    }
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return m_indices; }
+    /** \returns a reference to the stored array representing the permutation. */
+    IndicesType& indices() { return m_indices; }
+
+
+    /**** multiplication helpers to hopefully get RVO ****/
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename Other>
+    PermutationMatrix(const InverseImpl<Other,PermutationStorage>& other)
+      : m_indices(other.derived().nestedExpression().size())
+    {
+      eigen_internal_assert(m_indices.size() <= NumTraits<StorageIndex>::highest());
+      StorageIndex end = StorageIndex(m_indices.size());
+      for (StorageIndex i=0; i<end;++i)
+        m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i;
+    }
+    template<typename Lhs,typename Rhs>
+    PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs)
+      : m_indices(lhs.indices().size())
+    {
+      Base::assignProduct(lhs,rhs);
+    }
+#endif
+
+  protected:
+
+    IndicesType m_indices;
+};
+
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
+struct traits<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
+ : traits<Matrix<_StorageIndex,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{
+  typedef PermutationStorage StorageKind;
+  typedef Map<const Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1>, _PacketAccess> IndicesType;
+  typedef _StorageIndex StorageIndex;
+  typedef void Scalar;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
+class Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess>
+  : public PermutationBase<Map<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime, _StorageIndex>,_PacketAccess> >
+{
+    typedef PermutationBase<Map> Base;
+    typedef internal::traits<Map> Traits;
+  public:
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename IndicesType::Scalar StorageIndex;
+    #endif
+
+    inline Map(const StorageIndex* indicesPtr)
+      : m_indices(indicesPtr)
+    {}
+
+    inline Map(const StorageIndex* indicesPtr, Index size)
+      : m_indices(indicesPtr,size)
+    {}
+
+    /** Copies the other permutation into *this */
+    template<typename Other>
+    Map& operator=(const PermutationBase<Other>& other)
+    { return Base::operator=(other.derived()); }
+
+    /** Assignment from the Transpositions \a tr */
+    template<typename Other>
+    Map& operator=(const TranspositionsBase<Other>& tr)
+    { return Base::operator=(tr.derived()); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    Map& operator=(const Map& other)
+    {
+      m_indices = other.m_indices;
+      return *this;
+    }
+    #endif
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return m_indices; }
+    /** \returns a reference to the stored array representing the permutation. */
+    IndicesType& indices() { return m_indices; }
+
+  protected:
+
+    IndicesType m_indices;
+};
+
+template<typename _IndicesType> class TranspositionsWrapper;
+namespace internal {
+template<typename _IndicesType>
+struct traits<PermutationWrapper<_IndicesType> >
+{
+  typedef PermutationStorage StorageKind;
+  typedef void Scalar;
+  typedef typename _IndicesType::Scalar StorageIndex;
+  typedef _IndicesType IndicesType;
+  enum {
+    RowsAtCompileTime = _IndicesType::SizeAtCompileTime,
+    ColsAtCompileTime = _IndicesType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime,
+    Flags = 0
+  };
+};
+}
+
+/** \class PermutationWrapper
+  * \ingroup Core_Module
+  *
+  * \brief Class to view a vector of integers as a permutation matrix
+  *
+  * \tparam _IndicesType the type of the vector of integer (can be any compatible expression)
+  *
+  * This class allows to view any vector expression of integers as a permutation matrix.
+  *
+  * \sa class PermutationBase, class PermutationMatrix
+  */
+template<typename _IndicesType>
+class PermutationWrapper : public PermutationBase<PermutationWrapper<_IndicesType> >
+{
+    typedef PermutationBase<PermutationWrapper> Base;
+    typedef internal::traits<PermutationWrapper> Traits;
+  public:
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename Traits::IndicesType IndicesType;
+    #endif
+
+    inline PermutationWrapper(const IndicesType& indices)
+      : m_indices(indices)
+    {}
+
+    /** const version of indices(). */
+    const typename internal::remove_all<typename IndicesType::Nested>::type&
+    indices() const { return m_indices; }
+
+  protected:
+
+    typename IndicesType::Nested m_indices;
+};
+
+
+/** \returns the matrix with the permutation applied to the columns.
+  */
+template<typename MatrixDerived, typename PermutationDerived>
+EIGEN_DEVICE_FUNC
+const Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
+operator*(const MatrixBase<MatrixDerived> &matrix,
+          const PermutationBase<PermutationDerived>& permutation)
+{
+  return Product<MatrixDerived, PermutationDerived, AliasFreeProduct>
+            (matrix.derived(), permutation.derived());
+}
+
+/** \returns the matrix with the permutation applied to the rows.
+  */
+template<typename PermutationDerived, typename MatrixDerived>
+EIGEN_DEVICE_FUNC
+const Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
+operator*(const PermutationBase<PermutationDerived> &permutation,
+          const MatrixBase<MatrixDerived>& matrix)
+{
+  return Product<PermutationDerived, MatrixDerived, AliasFreeProduct>
+            (permutation.derived(), matrix.derived());
+}
+
+
+template<typename PermutationType>
+class InverseImpl<PermutationType, PermutationStorage>
+  : public EigenBase<Inverse<PermutationType> >
+{
+    typedef typename PermutationType::PlainPermutationType PlainPermutationType;
+    typedef internal::traits<PermutationType> PermTraits;
+  protected:
+    InverseImpl() {}
+  public:
+    typedef Inverse<PermutationType> InverseType;
+    using EigenBase<Inverse<PermutationType> >::derived;
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef typename PermutationType::DenseMatrixType DenseMatrixType;
+    enum {
+      RowsAtCompileTime = PermTraits::RowsAtCompileTime,
+      ColsAtCompileTime = PermTraits::ColsAtCompileTime,
+      MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime
+    };
+    #endif
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename DenseDerived>
+    void evalTo(MatrixBase<DenseDerived>& other) const
+    {
+      other.setZero();
+      for (Index i=0; i<derived().rows();++i)
+        other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1);
+    }
+    #endif
+
+    /** \return the equivalent permutation matrix */
+    PlainPermutationType eval() const { return derived(); }
+
+    DenseMatrixType toDenseMatrix() const { return derived(); }
+
+    /** \returns the matrix with the inverse permutation applied to the columns.
+      */
+    template<typename OtherDerived> friend
+    const Product<OtherDerived, InverseType, AliasFreeProduct>
+    operator*(const MatrixBase<OtherDerived>& matrix, const InverseType& trPerm)
+    {
+      return Product<OtherDerived, InverseType, AliasFreeProduct>(matrix.derived(), trPerm.derived());
+    }
+
+    /** \returns the matrix with the inverse permutation applied to the rows.
+      */
+    template<typename OtherDerived>
+    const Product<InverseType, OtherDerived, AliasFreeProduct>
+    operator*(const MatrixBase<OtherDerived>& matrix) const
+    {
+      return Product<InverseType, OtherDerived, AliasFreeProduct>(derived(), matrix.derived());
+    }
+};
+
+template<typename Derived>
+const PermutationWrapper<const Derived> MatrixBase<Derived>::asPermutation() const
+{
+  return derived();
+}
+
+namespace internal {
+
+template<> struct AssignmentKind<DenseShape,PermutationShape> { typedef EigenBase2EigenBase Kind; };
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/third-party/Eigen/src/Core/PlainObjectBase.h b/third-party/Eigen/src/Core/PlainObjectBase.h
new file mode 100644
index 00000000..0f3632cf
--- /dev/null
+++ b/third-party/Eigen/src/Core/PlainObjectBase.h
@@ -0,0 +1,1037 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DENSESTORAGEBASE_H
+#define EIGEN_DENSESTORAGEBASE_H
+
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
+#else
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
+  template<typename Index>
+  EIGEN_DEVICE_FUNC
+  static EIGEN_ALWAYS_INLINE void run(Index, Index)
+  {
+  }
+};
+
+template<> struct check_rows_cols_for_overflow<Dynamic> {
+  template<typename Index>
+  EIGEN_DEVICE_FUNC
+  static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
+  {
+    // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
+    // we assume Index is signed
+    Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed
+    bool error = (rows == 0 || cols == 0) ? false
+               : (rows > max_index / cols);
+    if (error)
+      throw_std_bad_alloc();
+  }
+};
+
+template <typename Derived,
+          typename OtherDerived = Derived,
+          bool IsVector = bool(Derived::IsVectorAtCompileTime) && bool(OtherDerived::IsVectorAtCompileTime)>
+struct conservative_resize_like_impl;
+
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers> struct matrix_swap_impl;
+
+} // end namespace internal
+
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+namespace doxygen {
+
+// This is a workaround to doxygen not being able to understand the inheritance logic
+// when it is hidden by the dense_xpr_base helper struct.
+// Moreover, doxygen fails to include members that are not documented in the declaration body of
+// MatrixBase if we inherits MatrixBase<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >,
+// this is why we simply inherits MatrixBase, though this does not make sense.
+
+/** This class is just a workaround for Doxygen and it does not not actually exist. */
+template<typename Derived> struct dense_xpr_base_dispatcher;
+/** This class is just a workaround for Doxygen and it does not not actually exist. */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+    : public MatrixBase {};
+/** This class is just a workaround for Doxygen and it does not not actually exist. */
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct dense_xpr_base_dispatcher<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> >
+    : public ArrayBase {};
+
+} // namespace doxygen
+
+/** \class PlainObjectBase
+  * \ingroup Core_Module
+  * \brief %Dense storage base class for matrices and arrays.
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN.
+  *
+  * \tparam Derived is the derived type, e.g., a Matrix or Array
+  *
+  * \sa \ref TopicClassHierarchy
+  */
+template<typename Derived>
+class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher<Derived>
+#else
+template<typename Derived>
+class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
+#endif
+{
+  public:
+    enum { Options = internal::traits<Derived>::Options };
+    typedef typename internal::dense_xpr_base<Derived>::type Base;
+
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    
+    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Derived DenseType;
+
+    using Base::RowsAtCompileTime;
+    using Base::ColsAtCompileTime;
+    using Base::SizeAtCompileTime;
+    using Base::MaxRowsAtCompileTime;
+    using Base::MaxColsAtCompileTime;
+    using Base::MaxSizeAtCompileTime;
+    using Base::IsVectorAtCompileTime;
+    using Base::Flags;
+
+    template<typename PlainObjectType, int MapOptions, typename StrideType> friend class Eigen::Map;
+    friend  class Eigen::Map<Derived, Unaligned>;
+    typedef Eigen::Map<Derived, Unaligned>  MapType;
+    friend  class Eigen::Map<const Derived, Unaligned>;
+    typedef const Eigen::Map<const Derived, Unaligned> ConstMapType;
+#if EIGEN_MAX_ALIGN_BYTES>0
+    // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice.
+    friend  class Eigen::Map<Derived, AlignedMax>;
+    friend  class Eigen::Map<const Derived, AlignedMax>;
+#endif
+    typedef Eigen::Map<Derived, AlignedMax> AlignedMapType;
+    typedef const Eigen::Map<const Derived, AlignedMax> ConstAlignedMapType;
+    template<typename StrideType> struct StridedMapType { typedef Eigen::Map<Derived, Unaligned, StrideType> type; };
+    template<typename StrideType> struct StridedConstMapType { typedef Eigen::Map<const Derived, Unaligned, StrideType> type; };
+    template<typename StrideType> struct StridedAlignedMapType { typedef Eigen::Map<Derived, AlignedMax, StrideType> type; };
+    template<typename StrideType> struct StridedConstAlignedMapType { typedef Eigen::Map<const Derived, AlignedMax, StrideType> type; };
+
+  protected:
+    DenseStorage<Scalar, Base::MaxSizeAtCompileTime, Base::RowsAtCompileTime, Base::ColsAtCompileTime, Options> m_storage;
+
+  public:
+    enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits<Derived>::Alignment>0) };
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+
+    EIGEN_DEVICE_FUNC
+    Base& base() { return *static_cast<Base*>(this); }
+    EIGEN_DEVICE_FUNC
+    const Base& base() const { return *static_cast<const Base*>(this); }
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
+
+    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index,Index) const
+      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
+      *
+      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
+    {
+      if(Flags & RowMajorBit)
+        return m_storage.data()[colId + rowId * m_storage.cols()];
+      else // column-major
+        return m_storage.data()[rowId + colId * m_storage.rows()];
+    }
+
+    /** This is an overloaded version of DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const
+      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
+      *
+      * See DenseCoeffsBase<Derived,ReadOnlyAccessors>::coeff(Index) const for details. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
+    {
+      return m_storage.data()[index];
+    }
+
+    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const
+      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
+      *
+      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index,Index) const for details. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
+    {
+      if(Flags & RowMajorBit)
+        return m_storage.data()[colId + rowId * m_storage.cols()];
+      else // column-major
+        return m_storage.data()[rowId + colId * m_storage.rows()];
+    }
+
+    /** This is an overloaded version of DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const
+      * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts.
+      *
+      * See DenseCoeffsBase<Derived,WriteAccessors>::coeffRef(Index) const for details. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
+    {
+      return m_storage.data()[index];
+    }
+
+    /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index).
+      * It is provided for convenience. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      if(Flags & RowMajorBit)
+        return m_storage.data()[colId + rowId * m_storage.cols()];
+      else // column-major
+        return m_storage.data()[rowId + colId * m_storage.rows()];
+    }
+
+    /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index).
+      * It is provided for convenience. */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
+    {
+      return m_storage.data()[index];
+    }
+
+    /** \internal */
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
+    {
+      return internal::ploadt<PacketScalar, LoadMode>
+               (m_storage.data() + (Flags & RowMajorBit
+                                   ? colId + rowId * m_storage.cols()
+                                   : rowId + colId * m_storage.rows()));
+    }
+
+    /** \internal */
+    template<int LoadMode>
+    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
+    {
+      return internal::ploadt<PacketScalar, LoadMode>(m_storage.data() + index);
+    }
+
+    /** \internal */
+    template<int StoreMode>
+    EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val)
+    {
+      internal::pstoret<Scalar, PacketScalar, StoreMode>
+              (m_storage.data() + (Flags & RowMajorBit
+                                   ? colId + rowId * m_storage.cols()
+                                   : rowId + colId * m_storage.rows()), val);
+    }
+
+    /** \internal */
+    template<int StoreMode>
+    EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val)
+    {
+      internal::pstoret<Scalar, PacketScalar, StoreMode>(m_storage.data() + index, val);
+    }
+
+    /** \returns a const pointer to the data array of this matrix */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const
+    { return m_storage.data(); }
+
+    /** \returns a pointer to the data array of this matrix */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data()
+    { return m_storage.data(); }
+
+    /** Resizes \c *this to a \a rows x \a cols matrix.
+      *
+      * This method is intended for dynamic-size matrices, although it is legal to call it on any
+      * matrix as long as fixed dimensions are left unchanged. If you only want to change the number
+      * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t).
+      *
+      * If the current number of coefficients of \c *this exactly matches the
+      * product \a rows * \a cols, then no memory allocation is performed and
+      * the current values are left unchanged. In all other cases, including
+      * shrinking, the data is reallocated and all previous values are lost.
+      *
+      * Example: \include Matrix_resize_int_int.cpp
+      * Output: \verbinclude Matrix_resize_int_int.out
+      *
+      * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void resize(Index rows, Index cols)
+    {
+      eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime)
+                   && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime)
+                   && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime)
+                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime)
+                   && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array.");
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(rows, cols);
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        Index size = rows*cols;
+        bool size_changed = size != this->size();
+        m_storage.resize(size, rows, cols);
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      #else
+        m_storage.resize(rows*cols, rows, cols);
+      #endif
+    }
+
+    /** Resizes \c *this to a vector of length \a size
+      *
+      * \only_for_vectors. This method does not work for
+      * partially dynamic matrices when the static dimension is anything other
+      * than 1. For example it will not work with Matrix<double, 2, Dynamic>.
+      *
+      * Example: \include Matrix_resize_int.cpp
+      * Output: \verbinclude Matrix_resize_int.out
+      *
+      * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
+      */
+    EIGEN_DEVICE_FUNC
+    inline void resize(Index size)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
+      eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        bool size_changed = size != this->size();
+      #endif
+      if(RowsAtCompileTime == 1)
+        m_storage.resize(size, 1, size);
+      else
+        m_storage.resize(size, size, 1);
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+      #endif
+    }
+
+    /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \c NoChange
+      * as in the example below.
+      *
+      * Example: \include Matrix_resize_NoChange_int.cpp
+      * Output: \verbinclude Matrix_resize_NoChange_int.out
+      *
+      * \sa resize(Index,Index)
+      */
+    EIGEN_DEVICE_FUNC
+    inline void resize(NoChange_t, Index cols)
+    {
+      resize(rows(), cols);
+    }
+
+    /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange
+      * as in the example below.
+      *
+      * Example: \include Matrix_resize_int_NoChange.cpp
+      * Output: \verbinclude Matrix_resize_int_NoChange.out
+      *
+      * \sa resize(Index,Index)
+      */
+    EIGEN_DEVICE_FUNC
+    inline void resize(Index rows, NoChange_t)
+    {
+      resize(rows, cols());
+    }
+
+    /** Resizes \c *this to have the same dimensions as \a other.
+      * Takes care of doing all the checking that's needed.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
+    {
+      const OtherDerived& other = _other.derived();
+      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(other.rows(), other.cols());
+      const Index othersize = other.rows()*other.cols();
+      if(RowsAtCompileTime == 1)
+      {
+        eigen_assert(other.rows() == 1 || other.cols() == 1);
+        resize(1, othersize);
+      }
+      else if(ColsAtCompileTime == 1)
+      {
+        eigen_assert(other.rows() == 1 || other.cols() == 1);
+        resize(othersize, 1);
+      }
+      else resize(other.rows(), other.cols());
+    }
+
+    /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+      *
+      * The method is intended for matrices of dynamic size. If you only want to change the number
+      * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
+      * conservativeResize(Index, NoChange_t).
+      *
+      * Matrices are resized relative to the top-left element. In case values need to be 
+      * appended to the matrix they will be uninitialized.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols)
+    {
+      internal::conservative_resize_like_impl<Derived>::run(*this, rows, cols);
+    }
+
+    /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+      *
+      * As opposed to conservativeResize(Index rows, Index cols), this version leaves
+      * the number of columns unchanged.
+      *
+      * In case the matrix is growing, new rows will be uninitialized.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t)
+    {
+      // Note: see the comment in conservativeResize(Index,Index)
+      conservativeResize(rows, cols());
+    }
+
+    /** Resizes the matrix to \a rows x \a cols while leaving old values untouched.
+      *
+      * As opposed to conservativeResize(Index rows, Index cols), this version leaves
+      * the number of rows unchanged.
+      *
+      * In case the matrix is growing, new columns will be uninitialized.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols)
+    {
+      // Note: see the comment in conservativeResize(Index,Index)
+      conservativeResize(rows(), cols);
+    }
+
+    /** Resizes the vector to \a size while retaining old values.
+      *
+      * \only_for_vectors. This method does not work for
+      * partially dynamic matrices when the static dimension is anything other
+      * than 1. For example it will not work with Matrix<double, 2, Dynamic>.
+      *
+      * When values are appended, they will be uninitialized.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void conservativeResize(Index size)
+    {
+      internal::conservative_resize_like_impl<Derived>::run(*this, size);
+    }
+
+    /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched.
+      *
+      * The method is intended for matrices of dynamic size. If you only want to change the number
+      * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or
+      * conservativeResize(Index, NoChange_t).
+      *
+      * Matrices are resized relative to the top-left element. In case values need to be 
+      * appended to the matrix they will copied from \c other.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
+    {
+      internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
+    }
+
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
+    {
+      return _set(other);
+    }
+
+    /** \sa MatrixBase::lazyAssign() */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
+    {
+      _resize_to_match(other);
+      return Base::lazyAssign(other.derived());
+    }
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
+    {
+      resize(func.rows(), func.cols());
+      return Base::operator=(func);
+    }
+
+    // Prevent user from trying to instantiate PlainObjectBase objects
+    // by making all its constructor protected. See bug 1074.
+  protected:
+
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
+    {
+//       _check_template_params();
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    // FIXME is it still needed ?
+    /** \internal */
+    EIGEN_DEVICE_FUNC
+    explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert)
+      : m_storage(internal::constructor_without_unaligned_array_assert())
+    {
+//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+#endif
+
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT
+      : m_storage( std::move(other.m_storage) )
+    {
+    }
+
+    EIGEN_DEVICE_FUNC
+    PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT
+    {
+      using std::swap;
+      swap(m_storage, other.m_storage);
+      return *this;
+    }
+#endif
+
+    /** Copy constructor */
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other)
+      : Base(), m_storage(other.m_storage) { }
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols)
+      : m_storage(size, rows, cols)
+    {
+//       _check_template_params();
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
+    }
+
+    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase<OtherDerived> &other)
+      : m_storage()
+    {
+      _check_template_params();
+      resizeLike(other);
+      _set_noalias(other);
+    }
+
+    /** \sa PlainObjectBase::operator=(const EigenBase<OtherDerived>&) */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
+      : m_storage()
+    {
+      _check_template_params();
+      resizeLike(other);
+      *this = other.derived();
+    }
+    /** \brief Copy constructor with in-place evaluation */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue<OtherDerived>& other)
+    {
+      _check_template_params();
+      // FIXME this does not automatically transpose vectors if necessary
+      resize(other.rows(), other.cols());
+      other.evalTo(this->derived());
+    }
+
+  public:
+
+    /** \brief Copies the generic expression \a other into *this.
+      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
+    {
+      _resize_to_match(other);
+      Base::operator=(other.derived());
+      return this->derived();
+    }
+
+    /** \name Map
+      * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects,
+      * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
+      * \a data pointers.
+      *
+      * Here is an example using strides:
+      * \include Matrix_Map_stride.cpp
+      * Output: \verbinclude Matrix_Map_stride.out
+      *
+      * \see class Map
+      */
+    //@{
+    static inline ConstMapType Map(const Scalar* data)
+    { return ConstMapType(data); }
+    static inline MapType Map(Scalar* data)
+    { return MapType(data); }
+    static inline ConstMapType Map(const Scalar* data, Index size)
+    { return ConstMapType(data, size); }
+    static inline MapType Map(Scalar* data, Index size)
+    { return MapType(data, size); }
+    static inline ConstMapType Map(const Scalar* data, Index rows, Index cols)
+    { return ConstMapType(data, rows, cols); }
+    static inline MapType Map(Scalar* data, Index rows, Index cols)
+    { return MapType(data, rows, cols); }
+
+    static inline ConstAlignedMapType MapAligned(const Scalar* data)
+    { return ConstAlignedMapType(data); }
+    static inline AlignedMapType MapAligned(Scalar* data)
+    { return AlignedMapType(data); }
+    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size)
+    { return ConstAlignedMapType(data, size); }
+    static inline AlignedMapType MapAligned(Scalar* data, Index size)
+    { return AlignedMapType(data, size); }
+    static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols)
+    { return ConstAlignedMapType(data, rows, cols); }
+    static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols)
+    { return AlignedMapType(data, rows, cols); }
+
+    template<int Outer, int Inner>
+    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, const Stride<Outer, Inner>& stride)
+    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedConstMapType<Stride<Outer, Inner> >::type Map(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedMapType<Stride<Outer, Inner> >::type Map(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    { return typename StridedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+
+    template<int Outer, int Inner>
+    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, const Stride<Outer, Inner>& stride)
+    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index size, const Stride<Outer, Inner>& stride)
+    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, size, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    { return typename StridedConstAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+    template<int Outer, int Inner>
+    static inline typename StridedAlignedMapType<Stride<Outer, Inner> >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride<Outer, Inner>& stride)
+    { return typename StridedAlignedMapType<Stride<Outer, Inner> >::type(data, rows, cols, stride); }
+    //@}
+
+    using Base::setConstant;
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val);
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val);
+
+    using Base::setZero;
+    EIGEN_DEVICE_FUNC Derived& setZero(Index size);
+    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
+
+    using Base::setOnes;
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
+
+    using Base::setRandom;
+    Derived& setRandom(Index size);
+    Derived& setRandom(Index rows, Index cols);
+
+    #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN
+    #include EIGEN_PLAINOBJECTBASE_PLUGIN
+    #endif
+
+  protected:
+    /** \internal Resizes *this in preparation for assigning \a other to it.
+      * Takes care of doing all the checking that's needed.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
+    {
+      #ifdef EIGEN_NO_AUTOMATIC_RESIZING
+      eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size())
+                 : (rows() == other.rows() && cols() == other.cols())))
+        && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined");
+      EIGEN_ONLY_USED_FOR_DEBUG(other);
+      #else
+      resizeLike(other);
+      #endif
+    }
+
+    /**
+      * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
+      *
+      * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+      * it will be initialized.
+      *
+      * Note that copying a row-vector into a vector (and conversely) is allowed.
+      * The resizing, if any, is then done in the appropriate way so that row-vectors
+      * remain row-vectors and vectors remain vectors.
+      *
+      * \sa operator=(const MatrixBase<OtherDerived>&), _set_noalias()
+      *
+      * \internal
+      */
+    // aliasing is dealt once in internall::call_assignment
+    // so at this stage we have to assume aliasing... and resising has to be done later.
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
+    {
+      internal::call_assignment(this->derived(), other.derived());
+      return this->derived();
+    }
+
+    /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
+      * is the case when creating a new matrix) so one can enforce lazy evaluation.
+      *
+      * \sa operator=(const MatrixBase<OtherDerived>&), _set()
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
+    {
+      // I don't think we need this resize call since the lazyAssign will anyways resize
+      // and lazyAssign will be called by the assign selector.
+      //_resize_to_match(other);
+      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
+      // it wouldn't allow to copy a row-vector into a column-vector.
+      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+      return this->derived();
+    }
+
+    template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
+    {
+      const bool t0_is_integer_alike = internal::is_valid_index_type<T0>::value;
+      const bool t1_is_integer_alike = internal::is_valid_index_type<T1>::value;
+      EIGEN_STATIC_ASSERT(t0_is_integer_alike &&
+                          t1_is_integer_alike,
+                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+      resize(rows,cols);
+    }
+    
+    template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+      m_storage.data()[0] = Scalar(val0);
+      m_storage.data()[1] = Scalar(val1);
+    }
+    
+    template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC 
+    EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1,
+                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
+                                                                  && (internal::is_same<T0,Index>::value)
+                                                                  && (internal::is_same<T1,Index>::value)
+                                                                  && Base::SizeAtCompileTime==2,T1>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
+      m_storage.data()[0] = Scalar(val0);
+      m_storage.data()[1] = Scalar(val1);
+    }
+
+    // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array,
+    // then the argument is meant to be the size of the object.
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if<    (Base::SizeAtCompileTime!=1 || !internal::is_convertible<T, Scalar>::value)
+                                                                              && ((!internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0)
+    {
+      // NOTE MSVC 2008 complains if we directly put bool(NumTraits<T>::IsInteger) as the EIGEN_STATIC_ASSERT argument.
+      const bool is_integer_alike = internal::is_valid_index_type<T>::value;
+      EIGEN_UNUSED_VARIABLE(is_integer_alike);
+      EIGEN_STATIC_ASSERT(is_integer_alike,
+                          FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED)
+      resize(size);
+    }
+    
+    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted)
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if<Base::SizeAtCompileTime==1 && internal::is_convertible<T, Scalar>::value,T>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+      m_storage.data()[0] = val0;
+    }
+    
+    // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type)
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Index& val0,
+                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
+                                                                  && (internal::is_same<Index,T>::value)
+                                                                  && Base::SizeAtCompileTime==1
+                                                                  && internal::is_convertible<T, Scalar>::value,T*>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1)
+      m_storage.data()[0] = Scalar(val0);
+    }
+
+    // Initialize a fixed size matrix from a pointer to raw data
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Scalar* data){
+      this->_set_noalias(ConstMapType(data));
+    }
+
+    // Initialize an arbitrary matrix from a dense expression
+    template<typename T, typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const DenseBase<OtherDerived>& other){
+      this->_set_noalias(other);
+    }
+
+    // Initialize an arbitrary matrix from an object convertible to the Derived type.
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Derived& other){
+      this->_set_noalias(other);
+    }
+
+    // Initialize an arbitrary matrix from a generic Eigen expression
+    template<typename T, typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const EigenBase<OtherDerived>& other){
+      this->derived() = other;
+    }
+
+    template<typename T, typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const ReturnByValue<OtherDerived>& other)
+    {
+      resize(other.rows(), other.cols());
+      other.evalTo(this->derived());
+    }
+
+    template<typename T, typename OtherDerived, int ColsAtCompileTime>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+    {
+      this->derived() = r;
+    }
+    
+    // For fixed-size Array<Scalar,...>
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Scalar& val0,
+                                    typename internal::enable_if<    Base::SizeAtCompileTime!=Dynamic
+                                                                  && Base::SizeAtCompileTime!=1
+                                                                  && internal::is_convertible<T, Scalar>::value
+                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T>::type* = 0)
+    {
+      Base::setConstant(val0);
+    }
+    
+    // For fixed-size Array<Index,...>
+    template<typename T>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _init1(const Index& val0,
+                                    typename internal::enable_if<    (!internal::is_same<Index,Scalar>::value)
+                                                                  && (internal::is_same<Index,T>::value)
+                                                                  && Base::SizeAtCompileTime!=Dynamic
+                                                                  && Base::SizeAtCompileTime!=1
+                                                                  && internal::is_convertible<T, Scalar>::value
+                                                                  && internal::is_same<typename internal::traits<Derived>::XprKind,ArrayXpr>::value,T*>::type* = 0)
+    {
+      Base::setConstant(val0);
+    }
+    
+    template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+    friend struct internal::matrix_swap_impl;
+
+  public:
+    
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** \internal
+      * \brief Override DenseBase::swap() since for dynamic-sized matrices
+      * of same type it is enough to swap the data pointers.
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void swap(DenseBase<OtherDerived> & other)
+    {
+      enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
+      internal::matrix_swap_impl<Derived, OtherDerived, bool(SwapPointers)>::run(this->derived(), other.derived());
+    }
+    
+    /** \internal
+      * \brief const version forwarded to DenseBase::swap
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void swap(DenseBase<OtherDerived> const & other)
+    { Base::swap(other.derived()); }
+    
+    EIGEN_DEVICE_FUNC 
+    static EIGEN_STRONG_INLINE void _check_template_params()
+    {
+      EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
+                        && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0)
+                        && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0))
+                        && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0))
+                        && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0))
+                        && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0))
+                        && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic)
+                        && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic)
+                        && (Options & (DontAlign|RowMajor)) == Options),
+        INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    }
+
+    enum { IsPlainObjectBase = 1 };
+#endif
+};
+
+namespace internal {
+
+template <typename Derived, typename OtherDerived, bool IsVector>
+struct conservative_resize_like_impl
+{
+  static void run(DenseBase<Derived>& _this, Index rows, Index cols)
+  {
+    if (_this.rows() == rows && _this.cols() == cols) return;
+    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+
+    if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows
+         (!Derived::IsRowMajor && _this.rows() == rows) )  // column-major and we change only the number of columns
+    {
+      internal::check_rows_cols_for_overflow<Derived::MaxSizeAtCompileTime>::run(rows, cols);
+      _this.derived().m_storage.conservativeResize(rows*cols,rows,cols);
+    }
+    else
+    {
+      // The storage order does not allow us to use reallocation.
+      typename Derived::PlainObject tmp(rows,cols);
+      const Index common_rows = numext::mini(rows, _this.rows());
+      const Index common_cols = numext::mini(cols, _this.cols());
+      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+      _this.derived().swap(tmp);
+    }
+  }
+
+  static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+  {
+    if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+
+    // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index),
+    // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the
+    // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or
+    // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like
+    // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good.
+    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
+    EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
+
+    if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows
+         (!Derived::IsRowMajor && _this.rows() == other.rows()) )  // column-major and we change only the number of columns
+    {
+      const Index new_rows = other.rows() - _this.rows();
+      const Index new_cols = other.cols() - _this.cols();
+      _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols());
+      if (new_rows>0)
+        _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows);
+      else if (new_cols>0)
+        _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols);
+    }
+    else
+    {
+      // The storage order does not allow us to use reallocation.
+      typename Derived::PlainObject tmp(other);
+      const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+      const Index common_cols = numext::mini(tmp.cols(), _this.cols());
+      tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
+      _this.derived().swap(tmp);
+    }
+  }
+};
+
+// Here, the specialization for vectors inherits from the general matrix case
+// to allow calling .conservativeResize(rows,cols) on vectors.
+template <typename Derived, typename OtherDerived>
+struct conservative_resize_like_impl<Derived,OtherDerived,true>
+  : conservative_resize_like_impl<Derived,OtherDerived,false>
+{
+  using conservative_resize_like_impl<Derived,OtherDerived,false>::run;
+  
+  static void run(DenseBase<Derived>& _this, Index size)
+  {
+    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size;
+    const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1;
+    _this.derived().m_storage.conservativeResize(size,new_rows,new_cols);
+  }
+
+  static void run(DenseBase<Derived>& _this, const DenseBase<OtherDerived>& other)
+  {
+    if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
+
+    const Index num_new_elements = other.size() - _this.size();
+
+    const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows();
+    const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1;
+    _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols);
+
+    if (num_new_elements > 0)
+      _this.tail(num_new_elements) = other.tail(num_new_elements);
+  }
+};
+
+template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
+struct matrix_swap_impl
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+  {
+    a.base().swap(b);
+  }
+};
+
+template<typename MatrixTypeA, typename MatrixTypeB>
+struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(MatrixTypeA& a, MatrixTypeB& b)
+  {
+    static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_DENSESTORAGEBASE_H
diff --git a/third-party/Eigen/src/Core/Product.h b/third-party/Eigen/src/Core/Product.h
new file mode 100644
index 00000000..676c4802
--- /dev/null
+++ b/third-party/Eigen/src/Core/Product.h
@@ -0,0 +1,186 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PRODUCT_H
+#define EIGEN_PRODUCT_H
+
+namespace Eigen {
+
+template<typename Lhs, typename Rhs, int Option, typename StorageKind> class ProductImpl;
+
+namespace internal {
+
+template<typename Lhs, typename Rhs, int Option>
+struct traits<Product<Lhs, Rhs, Option> >
+{
+  typedef typename remove_all<Lhs>::type LhsCleaned;
+  typedef typename remove_all<Rhs>::type RhsCleaned;
+  typedef traits<LhsCleaned> LhsTraits;
+  typedef traits<RhsCleaned> RhsTraits;
+  
+  typedef MatrixXpr XprKind;
+  
+  typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
+  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
+                                                typename RhsTraits::StorageKind,
+                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
+  typedef typename promote_index_type<typename LhsTraits::StorageIndex,
+                                      typename RhsTraits::StorageIndex>::type StorageIndex;
+  
+  enum {
+    RowsAtCompileTime    = LhsTraits::RowsAtCompileTime,
+    ColsAtCompileTime    = RhsTraits::ColsAtCompileTime,
+    MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime,
+    
+    // FIXME: only needed by GeneralMatrixMatrixTriangular
+    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime),
+    
+    // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
+    Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit
+          : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
+          : (   ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit))
+             || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit
+          : NoPreferredStorageOrderBit
+  };
+};
+
+} // end namespace internal
+
+/** \class Product
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the product of two arbitrary matrices or vectors
+  *
+  * \tparam _Lhs the type of the left-hand side expression
+  * \tparam _Rhs the type of the right-hand side expression
+  *
+  * This class represents an expression of the product of two arbitrary matrices.
+  *
+  * The other template parameters are:
+  * \tparam Option     can be DefaultProduct, AliasFreeProduct, or LazyProduct
+  *
+  */
+template<typename _Lhs, typename _Rhs, int Option>
+class Product : public ProductImpl<_Lhs,_Rhs,Option,
+                                   typename internal::product_promote_storage_type<typename internal::traits<_Lhs>::StorageKind,
+                                                                                   typename internal::traits<_Rhs>::StorageKind,
+                                                                                   internal::product_type<_Lhs,_Rhs>::ret>::ret>
+{
+  public:
+    
+    typedef _Lhs Lhs;
+    typedef _Rhs Rhs;
+    
+    typedef typename ProductImpl<
+        Lhs, Rhs, Option,
+        typename internal::product_promote_storage_type<typename internal::traits<Lhs>::StorageKind,
+                                                        typename internal::traits<Rhs>::StorageKind,
+                                                        internal::product_type<Lhs,Rhs>::ret>::ret>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
+
+    typedef typename internal::ref_selector<Lhs>::type LhsNested;
+    typedef typename internal::ref_selector<Rhs>::type RhsNested;
+    typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+    typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+    EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs)
+    {
+      eigen_assert(lhs.cols() == rhs.rows()
+        && "invalid matrix product"
+        && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
+    }
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
+
+    EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
+    EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
+
+  protected:
+
+    LhsNested m_lhs;
+    RhsNested m_rhs;
+};
+
+namespace internal {
+  
+template<typename Lhs, typename Rhs, int Option, int ProductTag = internal::product_type<Lhs,Rhs>::ret>
+class dense_product_base
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{};
+
+/** Convertion to scalar for inner-products */
+template<typename Lhs, typename Rhs, int Option>
+class dense_product_base<Lhs, Rhs, Option, InnerProduct>
+ : public internal::dense_xpr_base<Product<Lhs,Rhs,Option> >::type
+{
+  typedef Product<Lhs,Rhs,Option> ProductXpr;
+  typedef typename internal::dense_xpr_base<ProductXpr>::type Base;
+public:
+  using Base::derived;
+  typedef typename Base::Scalar Scalar;
+  
+  EIGEN_STRONG_INLINE operator const Scalar() const
+  {
+    return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
+  }
+};
+
+} // namespace internal
+
+// Generic API dispatcher
+template<typename Lhs, typename Rhs, int Option, typename StorageKind>
+class ProductImpl : public internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type
+{
+  public:
+    typedef typename internal::generic_xpr_base<Product<Lhs,Rhs,Option>, MatrixXpr, StorageKind>::type Base;
+};
+
+template<typename Lhs, typename Rhs, int Option>
+class ProductImpl<Lhs,Rhs,Option,Dense>
+  : public internal::dense_product_base<Lhs,Rhs,Option>
+{
+    typedef Product<Lhs, Rhs, Option> Derived;
+    
+  public:
+    
+    typedef typename internal::dense_product_base<Lhs, Rhs, Option> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+  protected:
+    enum {
+      IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && 
+                   (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic),
+      EnableCoeff = IsOneByOne || Option==LazyProduct
+    };
+    
+  public:
+  
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
+    {
+      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+      
+      return internal::evaluator<Derived>(derived()).coeff(row,col);
+    }
+
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
+    {
+      EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
+      eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
+      
+      return internal::evaluator<Derived>(derived()).coeff(i);
+    }
+    
+  
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_H
diff --git a/third-party/Eigen/src/Core/ProductEvaluators.h b/third-party/Eigen/src/Core/ProductEvaluators.h
new file mode 100644
index 00000000..bce1310c
--- /dev/null
+++ b/third-party/Eigen/src/Core/ProductEvaluators.h
@@ -0,0 +1,1138 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_PRODUCTEVALUATORS_H
+#define EIGEN_PRODUCTEVALUATORS_H
+
+namespace Eigen {
+  
+namespace internal {
+
+/** \internal
+  * Evaluator of a product expression.
+  * Since products require special treatments to handle all possible cases,
+  * we simply deffer the evaluation logic to a product_evaluator class
+  * which offers more partial specialization possibilities.
+  * 
+  * \sa class product_evaluator
+  */
+template<typename Lhs, typename Rhs, int Options>
+struct evaluator<Product<Lhs, Rhs, Options> > 
+ : public product_evaluator<Product<Lhs, Rhs, Options> >
+{
+  typedef Product<Lhs, Rhs, Options> XprType;
+  typedef product_evaluator<XprType> Base;
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
+};
+ 
+// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
+// TODO we should apply that rule only if that's really helpful
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                                               const Product<Lhs, Rhs, DefaultProduct> > >
+{
+  static const bool value = true;
+};
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                               const Product<Lhs, Rhs, DefaultProduct> > >
+ : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
+{
+  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                               const Product<Lhs, Rhs, DefaultProduct> > XprType;
+  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
+    : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
+  {}
+};
+
+
+template<typename Lhs, typename Rhs, int DiagIndex>
+struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > 
+ : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
+{
+  typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
+  typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
+    : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
+        Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
+        xpr.index() ))
+  {}
+};
+
+
+// Helper class to perform a matrix product with the destination at hand.
+// Depending on the sizes of the factors, there are different evaluation strategies
+// as controlled by internal::product_type.
+template< typename Lhs, typename Rhs,
+          typename LhsShape = typename evaluator_traits<Lhs>::Shape,
+          typename RhsShape = typename evaluator_traits<Rhs>::Shape,
+          int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct generic_product_impl;
+
+template<typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
+  static const bool value = true;
+};
+
+// This is the default evaluator implementation for products:
+// It creates a temporary and call generic_product_impl
+template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
+struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
+  : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
+{
+  typedef Product<Lhs, Rhs, Options> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+  enum {
+    Flags = Base::Flags | EvalBeforeNestingBit
+  };
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit product_evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    
+// FIXME shall we handle nested_eval here?,
+// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
+//     typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+//     typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+//     typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+//     typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+//     
+//     const LhsNested lhs(xpr.lhs());
+//     const RhsNested rhs(xpr.rhs());
+//   
+//     generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
+
+    generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+// The following three shortcuts are enabled only if the scalar types match excatly.
+// TODO: we could enable them for different scalar types when the product is not vectorized.
+
+// Dense = Product
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
+  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+  typedef Product<Lhs,Rhs,Options> SrcXprType;
+  static EIGEN_STRONG_INLINE
+  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
+  }
+};
+
+// Dense += Product
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
+  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+  typedef Product<Lhs,Rhs,Options> SrcXprType;
+  static EIGEN_STRONG_INLINE
+  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
+  }
+};
+
+// Dense -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
+  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
+{
+  typedef Product<Lhs,Rhs,Options> SrcXprType;
+  static EIGEN_STRONG_INLINE
+  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
+  {
+    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
+    // FIXME shall we handle nested_eval here?
+    generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
+  }
+};
+
+
+// Dense ?= scalar * Product
+// TODO we should apply that rule if that's really helpful
+// for instance, this is not good for inner products
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
+{
+  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
+                        const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+                        const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+  static EIGEN_STRONG_INLINE
+  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
+  {
+    call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
+  }
+};
+
+//----------------------------------------
+// Catch "Dense ?= xpr + Product<>" expression to save one temporary
+// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
+
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+  static const bool value = true;
+};
+
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+  static const bool value = true;
+};
+
+template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
+struct assignment_from_xpr_op_product
+{
+  template<typename SrcXprType, typename InitialFunc>
+  static EIGEN_STRONG_INLINE
+  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
+  {
+    call_assignment_no_alias(dst, src.lhs(), Func1());
+    call_assignment_no_alias(dst, src.rhs(), Func2());
+  }
+};
+
+#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
+  template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
+  struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
+                                            const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
+    : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
+  {}
+
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
+
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op,    scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
+EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
+
+//----------------------------------------
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
+{
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
+};
+
+
+/***********************************************************************
+*  Implementation of outer dense * dense vector product
+***********************************************************************/
+
+// Column major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
+{
+  evaluator<Rhs> rhsEval(rhs);
+  typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
+  // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
+  // FIXME not very good if rhs is real and lhs complex while alpha is real too
+  const Index cols = dst.cols();
+  for (Index j=0; j<cols; ++j)
+    func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
+}
+
+// Row major result
+template<typename Dst, typename Lhs, typename Rhs, typename Func>
+void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
+{
+  evaluator<Lhs> lhsEval(lhs);
+  typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
+  // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
+  // FIXME not very good if lhs is real and rhs complex while alpha is real too
+  const Index rows = dst.rows();
+  for (Index i=0; i<rows; ++i)
+    func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
+}
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
+{
+  template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
+  struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
+  struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+  struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+  struct adds {
+    Scalar m_scale;
+    explicit adds(const Scalar& s) : m_scale(s) {}
+    template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+      dst.const_cast_derived() += m_scale * src;
+    }
+  };
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
+  }
+  
+};
+
+
+// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
+template<typename Lhs, typename Rhs, typename Derived>
+struct generic_product_impl_base
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
+
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
+
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
+
+};
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
+{
+  typedef typename nested_eval<Lhs,1>::type LhsNested;
+  typedef typename nested_eval<Rhs,1>::type RhsNested;
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
+  typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
+
+  template<typename Dest>
+  static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    LhsNested actual_lhs(lhs);
+    RhsNested actual_rhs(rhs);
+    internal::gemv_dense_selector<Side,
+                            (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                            bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
+                           >::run(actual_lhs, actual_rhs, dst, alpha);
+  }
+};
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> 
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    // Same as: dst.noalias() = lhs.lazyProduct(rhs);
+    // but easier on the compiler side
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
+  }
+
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    // dst.noalias() += lhs.lazyProduct(rhs);
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
+  }
+  
+  template<typename Dst>
+  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    // dst.noalias() -= lhs.lazyProduct(rhs);
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
+  }
+
+  // Catch "dst {,+,-}= (s*A)*B" and evaluate it lazily by moving out the scalar factor:
+  //    dst {,+,-}= s * (A.lazyProduct(B))
+  // This is a huge benefit for heap-allocated matrix types as it save one costly allocation.
+  // For them, this strategy is also faster than simply by-passing the heap allocation through
+  // stack allocation.
+  // For fixed sizes matrices, this is less obvious, it is sometimes x2 faster, but sometimes x3 slower,
+  // and the behavior depends also a lot on the compiler... so let's be conservative and enable them for dynamic-size only,
+  // that is when coming from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h
+  template<typename Dst, typename Scalar1, typename Scalar2, typename Plain1, typename Xpr2, typename Func>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void eval_dynamic(Dst& dst, const CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                                           const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, Xpr2>& lhs, const Rhs& rhs, const Func &func)
+  {
+    call_assignment_no_alias(dst, lhs.lhs().functor().m_other * lhs.rhs().lazyProduct(rhs), func);
+  }
+
+  // Here, we we always have LhsT==Lhs, but we need to make it a template type to make the above
+  // overload more specialized.
+  template<typename Dst, typename LhsT, typename Func>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void eval_dynamic(Dst& dst, const LhsT& lhs, const Rhs& rhs, const Func &func)
+  {
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), func);
+  }
+  
+  
+//   template<typename Dst>
+//   static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+//   { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
+};
+
+// This specialization enforces the use of a coefficient-based evaluation strategy
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
+  : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
+
+// Case 2: Evaluate coeff by coeff
+//
+// This is mostly taken from CoeffBasedProduct.h
+// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
+// for the inner dimension of the product, because evaluator object do not know their size.
+
+template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
+struct etor_product_coeff_impl;
+
+template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
+    : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
+{
+  typedef Product<Lhs, Rhs, LazyProduct> XprType;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  explicit product_evaluator(const XprType& xpr)
+    : m_lhs(xpr.lhs()),
+      m_rhs(xpr.rhs()),
+      m_lhsImpl(m_lhs),     // FIXME the creation of the evaluator objects should result in a no-op, but check that!
+      m_rhsImpl(m_rhs),     //       Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
+                            //       or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
+      m_innerDim(xpr.lhs().cols())
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+#if 0
+    std::cerr << "LhsOuterStrideBytes=  " << LhsOuterStrideBytes << "\n";
+    std::cerr << "RhsOuterStrideBytes=  " << RhsOuterStrideBytes << "\n";
+    std::cerr << "LhsAlignment=         " << LhsAlignment << "\n";
+    std::cerr << "RhsAlignment=         " << RhsAlignment << "\n";
+    std::cerr << "CanVectorizeLhs=      " << CanVectorizeLhs << "\n";
+    std::cerr << "CanVectorizeRhs=      " << CanVectorizeRhs << "\n";
+    std::cerr << "CanVectorizeInner=    " << CanVectorizeInner << "\n";
+    std::cerr << "EvalToRowMajor=       " << EvalToRowMajor << "\n";
+    std::cerr << "Alignment=            " << Alignment << "\n";
+    std::cerr << "Flags=                " << Flags << "\n";
+#endif
+  }
+
+  // Everything below here is taken from CoeffBasedProduct.h
+
+  typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
+  typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
+  
+  typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
+  typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
+
+  typedef evaluator<LhsNestedCleaned> LhsEtorType;
+  typedef evaluator<RhsNestedCleaned> RhsEtorType;
+
+  enum {
+    RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
+    ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
+    InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
+    MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
+  };
+
+  typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
+  typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
+
+  enum {
+      
+    LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
+    RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
+    CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
+                  : InnerSize == Dynamic ? HugeCost
+                  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+
+    Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
+    
+    LhsFlags = LhsEtorType::Flags,
+    RhsFlags = RhsEtorType::Flags,
+    
+    LhsRowMajor = LhsFlags & RowMajorBit,
+    RhsRowMajor = RhsFlags & RowMajorBit,
+
+    LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
+    RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
+
+    // Here, we don't care about alignment larger than the usable packet size.
+    LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
+    RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
+      
+    SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
+
+    CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
+    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
+
+    EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
+                    : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
+                    : (bool(RhsRowMajor) && !CanVectorizeLhs),
+
+    Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
+          | (EvalToRowMajor ? RowMajorBit : 0)
+          // TODO enable vectorization for mixed types
+          | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
+          | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
+          
+    LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
+    RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
+
+    Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
+              : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
+              : 0,
+
+    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+     * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+     * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+     * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+     */
+    CanVectorizeInner =    SameType
+                        && LhsRowMajor
+                        && (!RhsRowMajor)
+                        && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+                        && (InnerSize % packet_traits<Scalar>::size == 0)
+  };
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
+  {
+    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+  }
+
+  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
+   * which is why we don't set the LinearAccessBit.
+   * TODO: this seems possible when the result is a vector
+   */
+  EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
+  {
+    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+    return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
+  }
+
+  template<int LoadMode, typename PacketType>
+  const PacketType packet(Index row, Index col) const
+  {
+    PacketType res;
+    typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
+                                     Unroll ? int(InnerSize) : Dynamic,
+                                     LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
+    PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
+    return res;
+  }
+
+  template<int LoadMode, typename PacketType>
+  const PacketType packet(Index index) const
+  {
+    const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
+    const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
+    return packet<LoadMode,PacketType>(row,col);
+  }
+
+protected:
+  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
+  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
+  
+  LhsEtorType m_lhsImpl;
+  RhsEtorType m_rhsImpl;
+
+  // TODO: Get rid of m_innerDim if known at compile time
+  Index m_innerDim;
+};
+
+template<typename Lhs, typename Rhs>
+struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
+  : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
+{
+  typedef Product<Lhs, Rhs, DefaultProduct> XprType;
+  typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
+  typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
+  enum {
+    Flags = Base::Flags | EvalBeforeNestingBit
+  };
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+    : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
+  {}
+};
+
+/****************************************
+*** Coeff based product, Packet path  ***
+****************************************/
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  {
+    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+    res =  pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
+  }
+};
+
+template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  {
+    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
+    res =  pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  {
+    res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  {
+    res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
+  {
+    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
+  {
+    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  {
+    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+    for(Index i = 0; i < innerDim; ++i)
+      res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
+  }
+};
+
+template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
+struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
+{
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  {
+    res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
+    for(Index i = 0; i < innerDim; ++i)
+      res =  pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
+  }
+};
+
+
+/***************************************************************************
+* Triangular products
+***************************************************************************/
+template<int Mode, bool LhsIsTriangular,
+         typename Lhs, bool LhsIsVector,
+         typename Rhs, bool RhsIsVector>
+struct triangular_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
+        ::run(dst, lhs.nestedExpression(), rhs, alpha);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+  }
+};
+
+
+/***************************************************************************
+* SelfAdjoint products
+***************************************************************************/
+template <typename Lhs, int LhsMode, bool LhsIsVector,
+          typename Rhs, int RhsMode, bool RhsIsVector>
+struct selfadjoint_product_impl;
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
+: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+  {
+    selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
+  }
+};
+
+
+/***************************************************************************
+* Diagonal products
+***************************************************************************/
+  
+template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
+struct diagonal_product_evaluator_base
+  : evaluator_base<Derived>
+{
+   typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+public:
+  enum {
+    CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
+    
+    MatrixFlags = evaluator<MatrixType>::Flags,
+    DiagFlags = evaluator<DiagonalType>::Flags,
+    _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
+    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
+                           ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
+    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
+    // FIXME currently we need same types, but in the future the next rule should be the one
+    //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
+    _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
+    _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
+    Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
+    Alignment = evaluator<MatrixType>::Alignment,
+
+    AsScalarProduct =     (DiagonalType::SizeAtCompileTime==1)
+                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
+                      ||  (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
+  };
+  
+  diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
+    : m_diagImpl(diag), m_matImpl(mat)
+  {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
+  {
+    if(AsScalarProduct)
+      return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
+    else
+      return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+  }
+  
+protected:
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
+  {
+    return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+                          internal::pset1<PacketType>(m_diagImpl.coeff(id)));
+  }
+  
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
+  {
+    enum {
+      InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
+      DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
+    };
+    return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
+                          m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
+  }
+  
+  evaluator<DiagonalType> m_diagImpl;
+  evaluator<MatrixType>   m_matImpl;
+};
+
+// diagonal * dense
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
+  : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
+{
+  typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
+  using Base::m_diagImpl;
+  using Base::m_matImpl;
+  using Base::coeff;
+  typedef typename Base::Scalar Scalar;
+  
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  
+  enum {
+    StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+  };
+
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+    : Base(xpr.rhs(), xpr.lhs().diagonal())
+  {
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+  {
+    return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
+  }
+  
+#ifndef __CUDACC__
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+  {
+    // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
+    // See also similar calls below.
+    return this->template packet_impl<LoadMode,PacketType>(row,col, row,
+                                 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
+  }
+  
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+  {
+    return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+  }
+#endif
+};
+
+// dense * diagonal
+template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
+  : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
+{
+  typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
+  using Base::m_diagImpl;
+  using Base::m_matImpl;
+  using Base::coeff;
+  typedef typename Base::Scalar Scalar;
+  
+  typedef Product<Lhs, Rhs, ProductKind> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  
+  enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
+
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+    : Base(xpr.lhs(), xpr.rhs().diagonal())
+  {
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
+  {
+    return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
+  }
+  
+#ifndef __CUDACC__
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
+  {
+    return this->template packet_impl<LoadMode,PacketType>(row,col, col,
+                                 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
+  }
+  
+  template<int LoadMode,typename PacketType>
+  EIGEN_STRONG_INLINE PacketType packet(Index idx) const
+  {
+    return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
+  }
+#endif
+};
+
+/***************************************************************************
+* Products with permutation matrices
+***************************************************************************/
+
+/** \internal
+  * \class permutation_matrix_product
+  * Internal helper class implementing the product between a permutation matrix and a matrix.
+  * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h
+  */
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct permutation_matrix_product;
+
+template<typename ExpressionType, int Side, bool Transposed>
+struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
+{
+    typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+    typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+
+    template<typename Dest, typename PermutationType>
+    static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
+    {
+      MatrixType mat(xpr);
+      const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
+      // FIXME we need an is_same for expression that is not sensitive to constness. For instance
+      // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
+      //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
+      if(is_same_dense(dst, mat))
+      {
+        // apply the permutation inplace
+        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
+        mask.fill(false);
+        Index r = 0;
+        while(r < perm.size())
+        {
+          // search for the next seed
+          while(r<perm.size() && mask[r]) r++;
+          if(r>=perm.size())
+            break;
+          // we got one, let's follow it until we are back to the seed
+          Index k0 = r++;
+          Index kPrev = k0;
+          mask.coeffRef(k0) = true;
+          for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
+          {
+                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
+
+            mask.coeffRef(k) = true;
+            kPrev = k;
+          }
+        }
+      }
+      else
+      {
+        for(Index i = 0; i < n; ++i)
+        {
+          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+               (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
+
+          =
+
+          Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
+               (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
+        }
+      }
+    }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
+  {
+    permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
+  {
+    permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+  }
+};
+
+
+/***************************************************************************
+* Products with transpositions matrices
+***************************************************************************/
+
+// FIXME could we unify Transpositions and Permutation into a single "shape"??
+
+/** \internal
+  * \class transposition_matrix_product
+  * Internal helper class implementing the product between a permutation matrix and a matrix.
+  */
+template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
+struct transposition_matrix_product
+{
+  typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
+  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+  
+  template<typename Dest, typename TranspositionType>
+  static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
+  {
+    MatrixType mat(xpr);
+    typedef typename TranspositionType::StorageIndex StorageIndex;
+    const Index size = tr.size();
+    StorageIndex j = 0;
+
+    if(!is_same_dense(dst,mat))
+      dst = mat;
+
+    for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
+      if(Index(j=tr.coeff(k))!=k)
+      {
+        if(Side==OnTheLeft)        dst.row(k).swap(dst.row(j));
+        else if(Side==OnTheRight)  dst.col(k).swap(dst.col(j));
+      }
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
+  }
+};
+
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
+  {
+    transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
+struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
+  {
+    transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PRODUCT_EVALUATORS_H
diff --git a/third-party/Eigen/src/Core/Random.h b/third-party/Eigen/src/Core/Random.h
new file mode 100644
index 00000000..6faf789c
--- /dev/null
+++ b/third-party/Eigen/src/Core/Random.h
@@ -0,0 +1,182 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RANDOM_H
+#define EIGEN_RANDOM_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Scalar> struct scalar_random_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op)
+  inline const Scalar operator() () const { return random<Scalar>(); }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_random_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false, IsRepeatable = false }; };
+
+} // end namespace internal
+
+/** \returns a random matrix expression
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  * 
+  * The parameters \a rows and \a cols are the number of rows and of columns of
+  * the returned matrix. Must be compatible with this MatrixBase type.
+  *
+  * \not_reentrant
+  * 
+  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
+  * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
+  * instead.
+  * 
+  *
+  * Example: \include MatrixBase_random_int_int.cpp
+  * Output: \verbinclude MatrixBase_random_int_int.out
+  *
+  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
+  * behavior with expressions involving random matrices.
+  * 
+  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
+  *
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
+  */
+template<typename Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
+DenseBase<Derived>::Random(Index rows, Index cols)
+{
+  return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
+}
+
+/** \returns a random vector expression
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  *
+  * The parameter \a size is the size of the returned vector.
+  * Must be compatible with this MatrixBase type.
+  *
+  * \only_for_vectors
+  * \not_reentrant
+  *
+  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
+  * it is redundant to pass \a size as argument, so Random() should be used
+  * instead.
+  *
+  * Example: \include MatrixBase_random_int.cpp
+  * Output: \verbinclude MatrixBase_random_int.out
+  *
+  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+  * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
+  * behavior with expressions involving random matrices.
+  *
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
+  */
+template<typename Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
+DenseBase<Derived>::Random(Index size)
+{
+  return NullaryExpr(size, internal::scalar_random_op<Scalar>());
+}
+
+/** \returns a fixed-size random matrix or vector expression
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  * 
+  * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you
+  * need to use the variants taking size arguments.
+  *
+  * Example: \include MatrixBase_random.cpp
+  * Output: \verbinclude MatrixBase_random.out
+  *
+  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
+  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
+  * behavior with expressions involving random matrices.
+  * 
+  * \not_reentrant
+  *
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
+  */
+template<typename Derived>
+inline const typename DenseBase<Derived>::RandomReturnType
+DenseBase<Derived>::Random()
+{
+  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
+}
+
+/** Sets all coefficients in this expression to random values.
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  * 
+  * \not_reentrant
+  * 
+  * Example: \include MatrixBase_setRandom.cpp
+  * Output: \verbinclude MatrixBase_setRandom.out
+  *
+  * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
+  */
+template<typename Derived>
+inline Derived& DenseBase<Derived>::setRandom()
+{
+  return *this = Random(rows(), cols());
+}
+
+/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values.
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  * 
+  * \only_for_vectors
+  * \not_reentrant
+  *
+  * Example: \include Matrix_setRandom_int.cpp
+  * Output: \verbinclude Matrix_setRandom_int.out
+  *
+  * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setRandom(Index newSize)
+{
+  resize(newSize);
+  return setRandom();
+}
+
+/** Resizes to the given size, and sets all coefficients in this expression to random values.
+  *
+  * Numbers are uniformly spread through their whole definition range for integer types,
+  * and in the [-1:1] range for floating point scalar types.
+  *
+  * \not_reentrant
+  * 
+  * \param rows the new number of rows
+  * \param cols the new number of columns
+  *
+  * Example: \include Matrix_setRandom_int_int.cpp
+  * Output: \verbinclude Matrix_setRandom_int_int.out
+  *
+  * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived&
+PlainObjectBase<Derived>::setRandom(Index rows, Index cols)
+{
+  resize(rows, cols);
+  return setRandom();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_RANDOM_H
diff --git a/third-party/Eigen/src/Core/Redux.h b/third-party/Eigen/src/Core/Redux.h
new file mode 100644
index 00000000..760e9f86
--- /dev/null
+++ b/third-party/Eigen/src/Core/Redux.h
@@ -0,0 +1,505 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REDUX_H
+#define EIGEN_REDUX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// TODO
+//  * implement other kind of vectorization
+//  * factorize code
+
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for vectorization and unrolling
+***************************************************************************/
+
+template<typename Func, typename Derived>
+struct redux_traits
+{
+public:
+    typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
+  enum {
+    PacketSize = unpacket_traits<PacketType>::size,
+    InnerMaxSize = int(Derived::IsRowMajor)
+                 ? Derived::MaxColsAtCompileTime
+                 : Derived::MaxRowsAtCompileTime
+  };
+
+  enum {
+    MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
+                  && (functor_traits<Func>::PacketAccess),
+    MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
+    MaySliceVectorize  = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
+  };
+
+public:
+  enum {
+    Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
+                                        : int(DefaultTraversal)
+  };
+
+public:
+  enum {
+    Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
+         : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
+    UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
+  };
+
+public:
+  enum {
+    Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
+  };
+  
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
+    std::cerr.setf(std::ios::hex, std::ios::basefield);
+    EIGEN_DEBUG_VAR(Derived::Flags)
+    std::cerr.unsetf(std::ios::hex);
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(PacketSize)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    EIGEN_DEBUG_VAR(Traversal)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(Unrolling)
+    std::cerr << std::endl;
+  }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : unrollers
+***************************************************************************/
+
+/*** no vectorization ***/
+
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_novec_unroller
+{
+  enum {
+    HalfLength = Length/2
+  };
+
+  typedef typename Derived::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  {
+    return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+                redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
+  }
+};
+
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 1>
+{
+  enum {
+    outer = Start / Derived::InnerSizeAtCompileTime,
+    inner = Start % Derived::InnerSizeAtCompileTime
+  };
+
+  typedef typename Derived::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
+  {
+    return mat.coeffByOuterInner(outer, inner);
+  }
+};
+
+// This is actually dead code and will never be called. It is required
+// to prevent false warnings regarding failed inlining though
+// for 0 length run() will never be called at all.
+template<typename Func, typename Derived, int Start>
+struct redux_novec_unroller<Func, Derived, Start, 0>
+{
+  typedef typename Derived::Scalar Scalar;
+  EIGEN_DEVICE_FUNC 
+  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
+};
+
+/*** vectorization ***/
+
+template<typename Func, typename Derived, int Start, int Length>
+struct redux_vec_unroller
+{
+  enum {
+    PacketSize = redux_traits<Func, Derived>::PacketSize,
+    HalfLength = Length/2
+  };
+
+  typedef typename Derived::Scalar Scalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+
+  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
+  {
+    return func.packetOp(
+            redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
+            redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
+  }
+};
+
+template<typename Func, typename Derived, int Start>
+struct redux_vec_unroller<Func, Derived, Start, 1>
+{
+  enum {
+    index = Start * redux_traits<Func, Derived>::PacketSize,
+    outer = index / int(Derived::InnerSizeAtCompileTime),
+    inner = index % int(Derived::InnerSizeAtCompileTime),
+    alignment = Derived::Alignment
+  };
+
+  typedef typename Derived::Scalar Scalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+
+  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
+  {
+    return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
+  }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+template<typename Func, typename Derived,
+         int Traversal = redux_traits<Func, Derived>::Traversal,
+         int Unrolling = redux_traits<Func, Derived>::Unrolling
+>
+struct redux_impl;
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  {
+    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+    Scalar res;
+    res = mat.coeffByOuterInner(0, 0);
+    for(Index i = 1; i < mat.innerSize(); ++i)
+      res = func(res, mat.coeffByOuterInner(0, i));
+    for(Index i = 1; i < mat.outerSize(); ++i)
+      for(Index j = 0; j < mat.innerSize(); ++j)
+        res = func(res, mat.coeffByOuterInner(i, j));
+    return res;
+  }
+};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
+  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
+{};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+
+  static Scalar run(const Derived &mat, const Func& func)
+  {
+    const Index size = mat.size();
+    
+    const Index packetSize = redux_traits<Func, Derived>::PacketSize;
+    const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
+    enum {
+      alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
+      alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
+    };
+    const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
+    const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
+    const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
+    const Index alignedEnd2 = alignedStart + alignedSize2;
+    const Index alignedEnd  = alignedStart + alignedSize;
+    Scalar res;
+    if(alignedSize)
+    {
+      PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
+      if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
+      {
+        PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
+        for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
+        {
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
+          packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
+        }
+
+        packet_res0 = func.packetOp(packet_res0,packet_res1);
+        if(alignedEnd>alignedEnd2)
+          packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
+      }
+      res = func.predux(packet_res0);
+
+      for(Index index = 0; index < alignedStart; ++index)
+        res = func(res,mat.coeff(index));
+
+      for(Index index = alignedEnd; index < size; ++index)
+        res = func(res,mat.coeff(index));
+    }
+    else // too small to vectorize anything.
+         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+    {
+      res = mat.coeff(0);
+      for(Index index = 1; index < size; ++index)
+        res = func(res,mat.coeff(index));
+    }
+
+    return res;
+  }
+};
+
+// NOTE: for SliceVectorizedTraversal we simply bypass unrolling
+template<typename Func, typename Derived, int Unrolling>
+struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+  typedef typename redux_traits<Func, Derived>::PacketType PacketType;
+
+  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
+  {
+    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+    const Index innerSize = mat.innerSize();
+    const Index outerSize = mat.outerSize();
+    enum {
+      packetSize = redux_traits<Func, Derived>::PacketSize
+    };
+    const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
+    Scalar res;
+    if(packetedInnerSize)
+    {
+      PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
+      for(Index j=0; j<outerSize; ++j)
+        for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
+          packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
+
+      res = func.predux(packet_res);
+      for(Index j=0; j<outerSize; ++j)
+        for(Index i=packetedInnerSize; i<innerSize; ++i)
+          res = func(res, mat.coeffByOuterInner(j,i));
+    }
+    else // too small to vectorize anything.
+         // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
+    {
+      res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
+    }
+
+    return res;
+  }
+};
+
+template<typename Func, typename Derived>
+struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
+{
+  typedef typename Derived::Scalar Scalar;
+
+  typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
+  enum {
+    PacketSize = redux_traits<Func, Derived>::PacketSize,
+    Size = Derived::SizeAtCompileTime,
+    VectorizedSize = (Size / PacketSize) * PacketSize
+  };
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
+  {
+    eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
+    if (VectorizedSize > 0) {
+      Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
+      if (VectorizedSize != Size)
+        res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
+      return res;
+    }
+    else {
+      return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
+    }
+  }
+};
+
+// evaluator adaptor
+template<typename _XprType>
+class redux_evaluator
+{
+public:
+  typedef _XprType XprType;
+  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+  
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+  
+  enum {
+    MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
+    // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
+    Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
+    IsRowMajor = XprType::IsRowMajor,
+    SizeAtCompileTime = XprType::SizeAtCompileTime,
+    InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
+    CoeffReadCost = evaluator<XprType>::CoeffReadCost,
+    Alignment = evaluator<XprType>::Alignment
+  };
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
+  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
+  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
+
+  EIGEN_DEVICE_FUNC
+  CoeffReturnType coeff(Index row, Index col) const
+  { return m_evaluator.coeff(row, col); }
+
+  EIGEN_DEVICE_FUNC
+  CoeffReturnType coeff(Index index) const
+  { return m_evaluator.coeff(index); }
+
+  template<int LoadMode, typename PacketType>
+  PacketType packet(Index row, Index col) const
+  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
+
+  template<int LoadMode, typename PacketType>
+  PacketType packet(Index index) const
+  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
+  
+  EIGEN_DEVICE_FUNC
+  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
+  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+  
+  template<int LoadMode, typename PacketType>
+  PacketType packetByOuterInner(Index outer, Index inner) const
+  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
+  
+  const XprType & nestedExpression() const { return m_xpr; }
+  
+protected:
+  internal::evaluator<XprType> m_evaluator;
+  const XprType &m_xpr;
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Part 4 : public API
+***************************************************************************/
+
+
+/** \returns the result of a full redux operation on the whole matrix or vector using \a func
+  *
+  * The template parameter \a BinaryOp is the type of the functor \a func which must be
+  * an associative operator. Both current C++98 and C++11 functor styles are handled.
+  *
+  * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
+  */
+template<typename Derived>
+template<typename Func>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::redux(const Func& func) const
+{
+  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
+
+  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
+  ThisEvaluator thisEval(derived());
+  
+  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
+}
+
+/** \returns the minimum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff() const
+{
+  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
+}
+
+/** \returns the maximum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff() const
+{
+  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
+}
+
+/** \returns the sum of all coefficients of \c *this
+  *
+  * If \c *this is empty, then the value 0 is returned.
+  *
+  * \sa trace(), prod(), mean()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::sum() const
+{
+  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+    return Scalar(0);
+  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
+}
+
+/** \returns the mean of all coefficients of *this
+*
+* \sa trace(), prod(), sum()
+*/
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::mean() const
+{
+#ifdef __INTEL_COMPILER
+  #pragma warning push
+  #pragma warning ( disable : 2259 )
+#endif
+  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
+#ifdef __INTEL_COMPILER
+  #pragma warning pop
+#endif
+}
+
+/** \returns the product of all coefficients of *this
+  *
+  * Example: \include MatrixBase_prod.cpp
+  * Output: \verbinclude MatrixBase_prod.out
+  *
+  * \sa sum(), mean(), trace()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::prod() const
+{
+  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
+    return Scalar(1);
+  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
+}
+
+/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
+  *
+  * \c *this can be any matrix, not necessarily square.
+  *
+  * \sa diagonal(), sum()
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+MatrixBase<Derived>::trace() const
+{
+  return derived().diagonal().sum();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REDUX_H
diff --git a/third-party/Eigen/src/Core/Ref.h b/third-party/Eigen/src/Core/Ref.h
new file mode 100644
index 00000000..17a1496b
--- /dev/null
+++ b/third-party/Eigen/src/Core/Ref.h
@@ -0,0 +1,284 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REF_H
+#define EIGEN_REF_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename _PlainObjectType, int _Options, typename _StrideType>
+struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
+  : public traits<Map<_PlainObjectType, _Options, _StrideType> >
+{
+  typedef _PlainObjectType PlainObjectType;
+  typedef _StrideType StrideType;
+  enum {
+    Options = _Options,
+    Flags = traits<Map<_PlainObjectType, _Options, _StrideType> >::Flags | NestByRefBit,
+    Alignment = traits<Map<_PlainObjectType, _Options, _StrideType> >::Alignment
+  };
+
+  template<typename Derived> struct match {
+    enum {
+      IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime,
+      HasDirectAccess = internal::has_direct_access<Derived>::ret,
+      StorageOrderMatch = IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)),
+      InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic)
+                      || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime)
+                      || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
+      OuterStrideMatch = IsVectorAtCompileTime
+                      || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
+      // NOTE, this indirection of evaluator<Derived>::Alignment is needed
+      // to workaround a very strange bug in MSVC related to the instantiation
+      // of has_*ary_operator in evaluator<CwiseNullaryOp>.
+      // This line is surprisingly very sensitive. For instance, simply adding parenthesis
+      // as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
+      DerivedAlignment = int(evaluator<Derived>::Alignment),
+      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
+      ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
+      MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
+    };
+    typedef typename internal::conditional<MatchAtCompileTime,internal::true_type,internal::false_type>::type type;
+  };
+  
+};
+
+template<typename Derived>
+struct traits<RefBase<Derived> > : public traits<Derived> {};
+
+}
+
+template<typename Derived> class RefBase
+ : public MapBase<Derived>
+{
+  typedef typename internal::traits<Derived>::PlainObjectType PlainObjectType;
+  typedef typename internal::traits<Derived>::StrideType StrideType;
+
+public:
+
+  typedef MapBase<Derived> Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(RefBase)
+
+  EIGEN_DEVICE_FUNC inline Index innerStride() const
+  {
+    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+  }
+
+  EIGEN_DEVICE_FUNC inline Index outerStride() const
+  {
+    return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+         : IsVectorAtCompileTime ? this->size()
+         : int(Flags)&RowMajorBit ? this->cols()
+         : this->rows();
+  }
+
+  EIGEN_DEVICE_FUNC RefBase()
+    : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime),
+      // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values:
+      m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
+               StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
+  {}
+  
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)
+
+protected:
+
+  typedef Stride<StrideType::OuterStrideAtCompileTime,StrideType::InnerStrideAtCompileTime> StrideBase;
+
+  template<typename Expression>
+  EIGEN_DEVICE_FUNC void construct(Expression& expr)
+  {
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
+
+    if(PlainObjectType::RowsAtCompileTime==1)
+    {
+      eigen_assert(expr.rows()==1 || expr.cols()==1);
+      ::new (static_cast<Base*>(this)) Base(expr.data(), 1, expr.size());
+    }
+    else if(PlainObjectType::ColsAtCompileTime==1)
+    {
+      eigen_assert(expr.rows()==1 || expr.cols()==1);
+      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.size(), 1);
+    }
+    else
+      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
+    
+    if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit)))
+      ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1);
+    else
+      ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
+                                   StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
+  }
+
+  StrideBase m_stride;
+};
+
+/** \class Ref
+  * \ingroup Core_Module
+  *
+  * \brief A matrix or vector expression mapping an existing expression
+  *
+  * \tparam PlainObjectType the equivalent matrix type of the mapped data
+  * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned.
+  *                 The default is \c #Unaligned.
+  * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1),
+  *                   but accepts a variable outer stride (leading dimension).
+  *                   This can be overridden by specifying strides.
+  *                   The type passed here must be a specialization of the Stride template, see examples below.
+  *
+  * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies.
+  * A Ref<> object can represent either a const expression or a l-value:
+  * \code
+  * // in-out argument:
+  * void foo1(Ref<VectorXf> x);
+  *
+  * // read-only const argument:
+  * void foo2(const Ref<const VectorXf>& x);
+  * \endcode
+  *
+  * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered.
+  * By default, a Ref<VectorXf> can reference any dense vector expression of float having a contiguous memory layout.
+  * Likewise, a Ref<MatrixXf> can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with
+  * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension)
+  * can be greater than the number of rows.
+  *
+  * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function.
+  * Here are some examples:
+  * \code
+  * MatrixXf A;
+  * VectorXf a;
+  * foo1(a.head());             // OK
+  * foo1(A.col());              // OK
+  * foo1(A.row());              // Compilation error because here innerstride!=1
+  * foo2(A.row());              // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object
+  * foo2(A.row().transpose());  // The row is copied into a contiguous temporary
+  * foo2(2*a);                  // The expression is evaluated into a temporary
+  * foo2(A.col().segment(2,4)); // No temporary
+  * \endcode
+  *
+  * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters.
+  * Here is an example accepting an innerstride!=1:
+  * \code
+  * // in-out argument:
+  * void foo3(Ref<VectorXf,0,InnerStride<> > x);
+  * foo3(A.row());              // OK
+  * \endcode
+  * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more
+  * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a
+  * template function, e.g.:
+  * \code
+  * // in the .h:
+  * void foo(const Ref<MatrixXf>& A);
+  * void foo(const Ref<MatrixXf,0,Stride<> >& A);
+  *
+  * // in the .cpp:
+  * template<typename TypeOfA> void foo_impl(const TypeOfA& A) {
+  *     ... // crazy code goes here
+  * }
+  * void foo(const Ref<MatrixXf>& A) { foo_impl(A); }
+  * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
+  * \endcode
+  *
+  *
+  * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+  */
+template<typename PlainObjectType, int Options, typename StrideType> class Ref
+  : public RefBase<Ref<PlainObjectType, Options, StrideType> >
+{
+  private:
+    typedef internal::traits<Ref> Traits;
+    template<typename Derived>
+    EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase<Derived>& expr,
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0);
+  public:
+
+    typedef RefBase<Ref> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
+
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename Derived>
+    EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase<Derived>& expr,
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+    {
+      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      Base::construct(expr.derived());
+    }
+    template<typename Derived>
+    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::MatchAtCompileTime),Derived>::type* = 0)
+    #else
+    /** Implicit constructor from any dense expression */
+    template<typename Derived>
+    inline Ref(DenseBase<Derived>& expr)
+    #endif
+    {
+      EIGEN_STATIC_ASSERT(bool(internal::is_lvalue<Derived>::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      EIGEN_STATIC_ASSERT(bool(Traits::template match<Derived>::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH);
+      EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      Base::construct(expr.const_cast_derived());
+    }
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref)
+
+};
+
+// this is the const ref version
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
+{
+    typedef internal::traits<Ref> Traits;
+  public:
+
+    typedef RefBase<Ref> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Ref)
+
+    template<typename Derived>
+    EIGEN_DEVICE_FUNC inline Ref(const DenseBase<Derived>& expr,
+                                 typename internal::enable_if<bool(Traits::template match<Derived>::ScalarTypeMatch),Derived>::type* = 0)
+    {
+//      std::cout << match_helper<Derived>::HasDirectAccess << "," << match_helper<Derived>::OuterStrideMatch << "," << match_helper<Derived>::InnerStrideMatch << "\n";
+//      std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n";
+//      std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n";
+      construct(expr.derived(), typename Traits::template match<Derived>::type());
+    }
+
+    EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) {
+      // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy
+    }
+
+    template<typename OtherRef>
+    EIGEN_DEVICE_FUNC inline Ref(const RefBase<OtherRef>& other) {
+      construct(other.derived(), typename Traits::template match<OtherRef>::type());
+    }
+
+  protected:
+
+    template<typename Expression>
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type)
+    {
+      Base::construct(expr);
+    }
+
+    template<typename Expression>
+    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
+    {
+      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
+      Base::construct(m_object);
+    }
+
+  protected:
+    TPlainObjectType m_object;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_REF_H
diff --git a/third-party/Eigen/src/Core/Replicate.h b/third-party/Eigen/src/Core/Replicate.h
new file mode 100644
index 00000000..9960ef88
--- /dev/null
+++ b/third-party/Eigen/src/Core/Replicate.h
@@ -0,0 +1,142 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REPLICATE_H
+#define EIGEN_REPLICATE_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename MatrixType,int RowFactor,int ColFactor>
+struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
+ : traits<MatrixType>
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename traits<MatrixType>::StorageKind StorageKind;
+  typedef typename traits<MatrixType>::XprKind XprKind;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic
+                      ? Dynamic
+                      : RowFactor * MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic
+                      ? Dynamic
+                      : ColFactor * MatrixType::ColsAtCompileTime,
+   //FIXME we don't propagate the max sizes !!!
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
+               : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
+               : (MatrixType::Flags & RowMajorBit) ? 1 : 0,
+    
+    // FIXME enable DirectAccess with negative strides?
+    Flags = IsRowMajor ? RowMajorBit : 0
+  };
+};
+}
+
+/**
+  * \class Replicate
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the multiple replication of a matrix or vector
+  *
+  * \tparam MatrixType the type of the object we are replicating
+  * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic.
+  * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic.
+  *
+  * This class represents an expression of the multiple replication of a matrix or vector.
+  * It is the return type of DenseBase::replicate() and most of the time
+  * this is the only way it is used.
+  *
+  * \sa DenseBase::replicate()
+  */
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
+  : public internal::dense_xpr_base< Replicate<MatrixType,RowFactor,ColFactor> >::type
+{
+    typedef typename internal::traits<Replicate>::MatrixTypeNested MatrixTypeNested;
+    typedef typename internal::traits<Replicate>::_MatrixTypeNested _MatrixTypeNested;
+  public:
+
+    typedef typename internal::dense_xpr_base<Replicate>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Replicate)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+
+    template<typename OriginalMatrixType>
+    EIGEN_DEVICE_FUNC
+    inline explicit Replicate(const OriginalMatrixType& matrix)
+      : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+      eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic);
+    }
+
+    template<typename OriginalMatrixType>
+    EIGEN_DEVICE_FUNC
+    inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor)
+      : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor)
+    {
+      EIGEN_STATIC_ASSERT((internal::is_same<typename internal::remove_const<MatrixType>::type,OriginalMatrixType>::value),
+                          THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE)
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); }
+
+    EIGEN_DEVICE_FUNC
+    const _MatrixTypeNested& nestedExpression() const
+    { 
+      return m_matrix; 
+    }
+
+  protected:
+    MatrixTypeNested m_matrix;
+    const internal::variable_if_dynamic<Index, RowFactor> m_rowFactor;
+    const internal::variable_if_dynamic<Index, ColFactor> m_colFactor;
+};
+
+/**
+  * \return an expression of the replication of \c *this
+  *
+  * Example: \include MatrixBase_replicate.cpp
+  * Output: \verbinclude MatrixBase_replicate.out
+  *
+  * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate
+  */
+template<typename Derived>
+template<int RowFactor, int ColFactor>
+const Replicate<Derived,RowFactor,ColFactor>
+DenseBase<Derived>::replicate() const
+{
+  return Replicate<Derived,RowFactor,ColFactor>(derived());
+}
+
+/**
+  * \return an expression of the replication of each column (or row) of \c *this
+  *
+  * Example: \include DirectionWise_replicate_int.cpp
+  * Output: \verbinclude DirectionWise_replicate_int.out
+  *
+  * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate
+  */
+template<typename ExpressionType, int Direction>
+const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
+{
+  return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+          (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REPLICATE_H
diff --git a/third-party/Eigen/src/Core/ReturnByValue.h b/third-party/Eigen/src/Core/ReturnByValue.h
new file mode 100644
index 00000000..c44b7673
--- /dev/null
+++ b/third-party/Eigen/src/Core/ReturnByValue.h
@@ -0,0 +1,117 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_RETURNBYVALUE_H
+#define EIGEN_RETURNBYVALUE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Derived>
+struct traits<ReturnByValue<Derived> >
+  : public traits<typename traits<Derived>::ReturnType>
+{
+  enum {
+    // We're disabling the DirectAccess because e.g. the constructor of
+    // the Block-with-DirectAccess expression requires to have a coeffRef method.
+    // Also, we don't want to have to implement the stride stuff.
+    Flags = (traits<typename traits<Derived>::ReturnType>::Flags
+             | EvalBeforeNestingBit) & ~DirectAccessBit
+  };
+};
+
+/* The ReturnByValue object doesn't even have a coeff() method.
+ * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
+ * So internal::nested always gives the plain return matrix type.
+ *
+ * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ??
+ * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators
+ */
+template<typename Derived,int n,typename PlainObject>
+struct nested_eval<ReturnByValue<Derived>, n, PlainObject>
+{
+  typedef typename traits<Derived>::ReturnType type;
+};
+
+} // end namespace internal
+
+/** \class ReturnByValue
+  * \ingroup Core_Module
+  *
+  */
+template<typename Derived> class ReturnByValue
+  : public internal::dense_xpr_base< ReturnByValue<Derived> >::type, internal::no_assignment_operator
+{
+  public:
+    typedef typename internal::traits<Derived>::ReturnType ReturnType;
+
+    typedef typename internal::dense_xpr_base<ReturnByValue>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
+
+    template<typename Dest>
+    EIGEN_DEVICE_FUNC
+    inline void evalTo(Dest& dst) const
+    { static_cast<const Derived*>(this)->evalTo(dst); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
+    class Unusable{
+      Unusable(const Unusable&) {}
+      Unusable& operator=(const Unusable&) {return *this;}
+    };
+    const Unusable& coeff(Index) const { return *reinterpret_cast<const Unusable*>(this); }
+    const Unusable& coeff(Index,Index) const { return *reinterpret_cast<const Unusable*>(this); }
+    Unusable& coeffRef(Index) { return *reinterpret_cast<Unusable*>(this); }
+    Unusable& coeffRef(Index,Index) { return *reinterpret_cast<Unusable*>(this); }
+#undef Unusable
+#endif
+};
+
+template<typename Derived>
+template<typename OtherDerived>
+Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+{
+  other.evalTo(derived());
+  return derived();
+}
+
+namespace internal {
+
+// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that
+// when a ReturnByValue expression is assigned, the evaluator is not constructed.
+// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world
+  
+template<typename Derived>
+struct evaluator<ReturnByValue<Derived> >
+  : public evaluator<typename internal::traits<Derived>::ReturnType>
+{
+  typedef ReturnByValue<Derived> XprType;
+  typedef typename internal::traits<Derived>::ReturnType PlainObject;
+  typedef evaluator<PlainObject> Base;
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+    : m_result(xpr.rows(), xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    xpr.evalTo(m_result);
+  }
+
+protected:
+  PlainObject m_result;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_RETURNBYVALUE_H
diff --git a/third-party/Eigen/src/Core/Reverse.h b/third-party/Eigen/src/Core/Reverse.h
new file mode 100644
index 00000000..0640cda2
--- /dev/null
+++ b/third-party/Eigen/src/Core/Reverse.h
@@ -0,0 +1,211 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Ricard Marxer <email@ricardmarxer.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REVERSE_H
+#define EIGEN_REVERSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename MatrixType, int Direction>
+struct traits<Reverse<MatrixType, Direction> >
+ : traits<MatrixType>
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename traits<MatrixType>::StorageKind StorageKind;
+  typedef typename traits<MatrixType>::XprKind XprKind;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
+  };
+};
+
+template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
+{
+  static inline PacketType run(const PacketType& x) { return preverse(x); }
+};
+
+template<typename PacketType> struct reverse_packet_cond<PacketType,false>
+{
+  static inline PacketType run(const PacketType& x) { return x; }
+};
+
+} // end namespace internal 
+
+/** \class Reverse
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the reverse of a vector or matrix
+  *
+  * \tparam MatrixType the type of the object of which we are taking the reverse
+  * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
+  *
+  * This class represents an expression of the reverse of a vector.
+  * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::reverse(), VectorwiseOp::reverse()
+  */
+template<typename MatrixType, int Direction> class Reverse
+  : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<Reverse>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+    using Base::IsRowMajor;
+
+  protected:
+    enum {
+      PacketSize = internal::packet_traits<Scalar>::size,
+      IsColMajor = !IsRowMajor,
+      ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),
+      ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+      OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
+      OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1,
+      ReversePacket = (Direction == BothDirections)
+                    || ((Direction == Vertical)   && IsColMajor)
+                    || ((Direction == Horizontal) && IsRowMajor)
+    };
+    typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
+  public:
+
+    EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+
+    EIGEN_DEVICE_FUNC inline Index innerStride() const
+    {
+      return -m_matrix.innerStride();
+    }
+
+    EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
+    nestedExpression() const 
+    {
+      return m_matrix;
+    }
+
+  protected:
+    typename MatrixType::Nested m_matrix;
+};
+
+/** \returns an expression of the reverse of *this.
+  *
+  * Example: \include MatrixBase_reverse.cpp
+  * Output: \verbinclude MatrixBase_reverse.out
+  *
+  */
+template<typename Derived>
+inline typename DenseBase<Derived>::ReverseReturnType
+DenseBase<Derived>::reverse()
+{
+  return ReverseReturnType(derived());
+}
+
+
+//reverse const overload moved DenseBase.h due to a CUDA compiler bug
+
+/** This is the "in place" version of reverse: it reverses \c *this.
+  *
+  * In most cases it is probably better to simply use the reversed expression
+  * of a matrix. However, when reversing the matrix data itself is really needed,
+  * then this "in-place" version is probably the right choice because it provides
+  * the following additional benefits:
+  *  - less error prone: doing the same operation with .reverse() requires special care:
+  *    \code m = m.reverse().eval(); \endcode
+  *  - this API enables reverse operations without the need for a temporary
+  *  - it allows future optimizations (cache friendliness, etc.)
+  *
+  * \sa VectorwiseOp::reverseInPlace(), reverse() */
+template<typename Derived>
+inline void DenseBase<Derived>::reverseInPlace()
+{
+  if(cols()>rows())
+  {
+    Index half = cols()/2;
+    leftCols(half).swap(rightCols(half).reverse());
+    if((cols()%2)==1)
+    {
+      Index half2 = rows()/2;
+      col(half).head(half2).swap(col(half).tail(half2).reverse());
+    }
+  }
+  else
+  {
+    Index half = rows()/2;
+    topRows(half).swap(bottomRows(half).reverse());
+    if((rows()%2)==1)
+    {
+      Index half2 = cols()/2;
+      row(half).head(half2).swap(row(half).tail(half2).reverse());
+    }
+  }
+}
+
+namespace internal {
+  
+template<int Direction>
+struct vectorwise_reverse_inplace_impl;
+
+template<>
+struct vectorwise_reverse_inplace_impl<Vertical>
+{
+  template<typename ExpressionType>
+  static void run(ExpressionType &xpr)
+  {
+    Index half = xpr.rows()/2;
+    xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
+  }
+};
+
+template<>
+struct vectorwise_reverse_inplace_impl<Horizontal>
+{
+  template<typename ExpressionType>
+  static void run(ExpressionType &xpr)
+  {
+    Index half = xpr.cols()/2;
+    xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
+  }
+};
+
+} // end namespace internal
+
+/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
+  *
+  * In most cases it is probably better to simply use the reversed expression
+  * of a matrix. However, when reversing the matrix data itself is really needed,
+  * then this "in-place" version is probably the right choice because it provides
+  * the following additional benefits:
+  *  - less error prone: doing the same operation with .reverse() requires special care:
+  *    \code m = m.reverse().eval(); \endcode
+  *  - this API enables reverse operations without the need for a temporary
+  *
+  * \sa DenseBase::reverseInPlace(), reverse() */
+template<typename ExpressionType, int Direction>
+void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
+{
+  internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REVERSE_H
diff --git a/third-party/Eigen/src/Core/Select.h b/third-party/Eigen/src/Core/Select.h
new file mode 100644
index 00000000..79eec1b5
--- /dev/null
+++ b/third-party/Eigen/src/Core/Select.h
@@ -0,0 +1,162 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELECT_H
+#define EIGEN_SELECT_H
+
+namespace Eigen { 
+
+/** \class Select
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a coefficient wise version of the C++ ternary operator ?:
+  *
+  * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix
+  * \param ThenMatrixType the type of the \em then expression
+  * \param ElseMatrixType the type of the \em else expression
+  *
+  * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:.
+  * It is the return type of DenseBase::select() and most of the time this is the only way it is used.
+  *
+  * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const
+  */
+
+namespace internal {
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+ : traits<ThenMatrixType>
+{
+  typedef typename traits<ThenMatrixType>::Scalar Scalar;
+  typedef Dense StorageKind;
+  typedef typename traits<ThenMatrixType>::XprKind XprKind;
+  typedef typename ConditionMatrixType::Nested ConditionMatrixNested;
+  typedef typename ThenMatrixType::Nested ThenMatrixNested;
+  typedef typename ElseMatrixType::Nested ElseMatrixNested;
+  enum {
+    RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
+    Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
+  };
+};
+}
+
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
+               internal::no_assignment_operator
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<Select>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Select)
+
+    inline EIGEN_DEVICE_FUNC
+    Select(const ConditionMatrixType& a_conditionMatrix,
+           const ThenMatrixType& a_thenMatrix,
+           const ElseMatrixType& a_elseMatrix)
+      : m_condition(a_conditionMatrix), m_then(a_thenMatrix), m_else(a_elseMatrix)
+    {
+      eigen_assert(m_condition.rows() == m_then.rows() && m_condition.rows() == m_else.rows());
+      eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
+    }
+
+    inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
+    inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
+
+    inline EIGEN_DEVICE_FUNC
+    const Scalar coeff(Index i, Index j) const
+    {
+      if (m_condition.coeff(i,j))
+        return m_then.coeff(i,j);
+      else
+        return m_else.coeff(i,j);
+    }
+
+    inline EIGEN_DEVICE_FUNC
+    const Scalar coeff(Index i) const
+    {
+      if (m_condition.coeff(i))
+        return m_then.coeff(i);
+      else
+        return m_else.coeff(i);
+    }
+
+    inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
+    {
+      return m_condition;
+    }
+
+    inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
+    {
+      return m_then;
+    }
+
+    inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
+    {
+      return m_else;
+    }
+
+  protected:
+    typename ConditionMatrixType::Nested m_condition;
+    typename ThenMatrixType::Nested m_then;
+    typename ElseMatrixType::Nested m_else;
+};
+
+
+/** \returns a matrix where each coefficient (i,j) is equal to \a thenMatrix(i,j)
+  * if \c *this(i,j), and \a elseMatrix(i,j) otherwise.
+  *
+  * Example: \include MatrixBase_select.cpp
+  * Output: \verbinclude MatrixBase_select.out
+  *
+  * \sa class Select
+  */
+template<typename Derived>
+template<typename ThenDerived,typename ElseDerived>
+inline const Select<Derived,ThenDerived,ElseDerived>
+DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
+                            const DenseBase<ElseDerived>& elseMatrix) const
+{
+  return Select<Derived,ThenDerived,ElseDerived>(derived(), thenMatrix.derived(), elseMatrix.derived());
+}
+
+/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
+  * the \em else expression being a scalar value.
+  *
+  * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
+  */
+template<typename Derived>
+template<typename ThenDerived>
+inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
+DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
+                           const typename ThenDerived::Scalar& elseScalar) const
+{
+  return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
+    derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
+}
+
+/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with
+  * the \em then expression being a scalar value.
+  *
+  * \sa DenseBase::select(const DenseBase<ThenDerived>&, const DenseBase<ElseDerived>&) const, class Select
+  */
+template<typename Derived>
+template<typename ElseDerived>
+inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+                           const DenseBase<ElseDerived>& elseMatrix) const
+{
+  return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
+    derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELECT_H
diff --git a/third-party/Eigen/src/Core/SelfAdjointView.h b/third-party/Eigen/src/Core/SelfAdjointView.h
new file mode 100644
index 00000000..b2e51f37
--- /dev/null
+++ b/third-party/Eigen/src/Core/SelfAdjointView.h
@@ -0,0 +1,352 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINTMATRIX_H
+#define EIGEN_SELFADJOINTMATRIX_H
+
+namespace Eigen { 
+
+/** \class SelfAdjointView
+  * \ingroup Core_Module
+  *
+  *
+  * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix
+  *
+  * \param MatrixType the type of the dense matrix storing the coefficients
+  * \param TriangularPart can be either \c #Lower or \c #Upper
+  *
+  * This class is an expression of a sefladjoint matrix from a triangular part of a matrix
+  * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView()
+  * and most of the time this is the only way that it is used.
+  *
+  * \sa class TriangularBase, MatrixBase::selfadjointView()
+  */
+
+namespace internal {
+template<typename MatrixType, unsigned int UpLo>
+struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
+{
+  typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  typedef MatrixType ExpressionType;
+  typedef typename MatrixType::PlainObject FullMatrixType;
+  enum {
+    Mode = UpLo | SelfAdjoint,
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags =  MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
+           & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
+  };
+};
+}
+
+
+template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
+  : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    typedef TriangularBase<SelfAdjointView> Base;
+    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
+    typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
+    typedef MatrixTypeNestedCleaned NestedExpression;
+
+    /** \brief The type of coefficients in this matrix */
+    typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; 
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
+
+    enum {
+      Mode = internal::traits<SelfAdjointView>::Mode,
+      Flags = internal::traits<SelfAdjointView>::Flags,
+      TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
+    };
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    EIGEN_DEVICE_FUNC
+    explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
+    {
+      EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const { return m_matrix.outerStride(); }
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const { return m_matrix.innerStride(); }
+
+    /** \sa MatrixBase::coeff()
+      * \warning the coordinates must fit into the referenced triangular part
+      */
+    EIGEN_DEVICE_FUNC
+    inline Scalar coeff(Index row, Index col) const
+    {
+      Base::check_coordinates_internal(row, col);
+      return m_matrix.coeff(row, col);
+    }
+
+    /** \sa MatrixBase::coeffRef()
+      * \warning the coordinates must fit into the referenced triangular part
+      */
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
+      Base::check_coordinates_internal(row, col);
+      return m_matrix.coeffRef(row, col);
+    }
+
+    /** \internal */
+    EIGEN_DEVICE_FUNC
+    const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
+
+    EIGEN_DEVICE_FUNC
+    const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC
+    MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
+
+    /** Efficient triangular matrix times vector/matrix product */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<SelfAdjointView,OtherDerived>
+    operator*(const MatrixBase<OtherDerived>& rhs) const
+    {
+      return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
+    }
+
+    /** Efficient vector/matrix times triangular matrix product */
+    template<typename OtherDerived> friend
+    EIGEN_DEVICE_FUNC
+    const Product<OtherDerived,SelfAdjointView>
+    operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
+    {
+      return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
+    }
+    
+    friend EIGEN_DEVICE_FUNC
+    const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
+    operator*(const Scalar& s, const SelfAdjointView& mat)
+    {
+      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
+    }
+
+    /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
+      * \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$
+      * \returns a reference to \c *this
+      *
+      * The vectors \a u and \c v \b must be column vectors, however they can be
+      * a adjoint expression without any overhead. Only the meaningful triangular
+      * part of the matrix is updated, the rest is left unchanged.
+      *
+      * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
+      */
+    template<typename DerivedU, typename DerivedV>
+    EIGEN_DEVICE_FUNC
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
+
+    /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
+      * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
+      *
+      * \returns a reference to \c *this
+      *
+      * Note that to perform \f$ this = this + \alpha ( u^* u ) \f$ you can simply
+      * call this function with u.adjoint().
+      *
+      * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
+      */
+    template<typename DerivedU>
+    EIGEN_DEVICE_FUNC
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
+
+    /** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
+      *
+      * The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
+      * \c #Lower, \c #StrictlyLower, \c #UnitLower.
+      *
+      * If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
+      * otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
+      *
+      * \sa MatrixBase::triangularView(), class TriangularView
+      */
+    template<unsigned int TriMode>
+    EIGEN_DEVICE_FUNC
+    typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
+                                   TriangularView<MatrixType,TriMode>,
+                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
+    triangularView() const
+    {
+      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
+      typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
+      return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
+                                   TriangularView<MatrixType,TriMode>,
+                                   TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
+    }
+
+    typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
+    /** \sa MatrixBase::conjugate() const */
+    EIGEN_DEVICE_FUNC
+    inline const ConjugateReturnType conjugate() const
+    { return ConjugateReturnType(m_matrix.conjugate()); }
+
+    typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
+    /** \sa MatrixBase::adjoint() const */
+    EIGEN_DEVICE_FUNC
+    inline const AdjointReturnType adjoint() const
+    { return AdjointReturnType(m_matrix.adjoint()); }
+
+    typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
+     /** \sa MatrixBase::transpose() */
+    EIGEN_DEVICE_FUNC
+    inline TransposeReturnType transpose()
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      typename MatrixType::TransposeReturnType tmp(m_matrix);
+      return TransposeReturnType(tmp);
+    }
+
+    typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
+    /** \sa MatrixBase::transpose() const */
+    EIGEN_DEVICE_FUNC
+    inline const ConstTransposeReturnType transpose() const
+    {
+      return ConstTransposeReturnType(m_matrix.transpose());
+    }
+
+    /** \returns a const expression of the main diagonal of the matrix \c *this
+      *
+      * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
+      *
+      * \sa MatrixBase::diagonal(), class Diagonal */
+    EIGEN_DEVICE_FUNC
+    typename MatrixType::ConstDiagonalReturnType diagonal() const
+    {
+      return typename MatrixType::ConstDiagonalReturnType(m_matrix);
+    }
+
+/////////// Cholesky module ///////////
+
+    const LLT<PlainObject, UpLo> llt() const;
+    const LDLT<PlainObject, UpLo> ldlt() const;
+
+/////////// Eigenvalue module ///////////
+
+    /** Real part of #Scalar */
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    /** Return type of eigenvalues() */
+    typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
+
+    EIGEN_DEVICE_FUNC
+    EigenvaluesReturnType eigenvalues() const;
+    EIGEN_DEVICE_FUNC
+    RealScalar operatorNorm() const;
+
+  protected:
+    MatrixTypeNested m_matrix;
+};
+
+
+// template<typename OtherDerived, typename MatrixType, unsigned int UpLo>
+// internal::selfadjoint_matrix_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >
+// operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView<MatrixType,UpLo>& rhs)
+// {
+//   return internal::matrix_selfadjoint_product_returntype<OtherDerived,SelfAdjointView<MatrixType,UpLo> >(lhs.derived(),rhs);
+// }
+
+// selfadjoint to dense matrix
+
+namespace internal {
+
+// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
+//      in the future selfadjoint-ness should be defined by the expression traits
+//      such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+  typedef SelfAdjointShape Shape;
+};
+
+template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
+class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
+  : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
+{
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+  typedef typename Base::DstXprType DstXprType;
+  typedef typename Base::SrcXprType SrcXprType;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+public:
+  
+  typedef typename Base::DstEvaluatorType DstEvaluatorType;
+  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::AssignmentTraits AssignmentTraits;
+  
+  
+  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
+  {
+    eigen_internal_assert(row!=col);
+    Scalar tmp = m_src.coeff(row,col);
+    m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
+    m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
+  }
+  
+  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
+  {
+    Base::assignCoeff(id,id);
+  }
+  
+  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
+  { eigen_internal_assert(false && "should never be called"); }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* Implementation of MatrixBase methods
+***************************************************************************/
+
+/** This is the const version of MatrixBase::selfadjointView() */
+template<typename Derived>
+template<unsigned int UpLo>
+typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
+MatrixBase<Derived>::selfadjointView() const
+{
+  return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
+}
+
+/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
+  *
+  * The parameter \a UpLo can be either \c #Upper or \c #Lower
+  *
+  * Example: \include MatrixBase_selfadjointView.cpp
+  * Output: \verbinclude MatrixBase_selfadjointView.out
+  *
+  * \sa class SelfAdjointView
+  */
+template<typename Derived>
+template<unsigned int UpLo>
+typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
+MatrixBase<Derived>::selfadjointView()
+{
+  return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINTMATRIX_H
diff --git a/third-party/Eigen/src/Core/SelfCwiseBinaryOp.h b/third-party/Eigen/src/Core/SelfCwiseBinaryOp.h
new file mode 100644
index 00000000..7c89c2e2
--- /dev/null
+++ b/third-party/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -0,0 +1,47 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFCWISEBINARYOP_H
+#define EIGEN_SELFCWISEBINARYOP_H
+
+namespace Eigen { 
+
+// TODO generalize the scalar type of 'other'
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
+{
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
+  return derived();
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
+{
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
+  return derived();
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
+{
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
+  return derived();
+}
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
+{
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFCWISEBINARYOP_H
diff --git a/third-party/Eigen/src/Core/Solve.h b/third-party/Eigen/src/Core/Solve.h
new file mode 100644
index 00000000..a8daea51
--- /dev/null
+++ b/third-party/Eigen/src/Core/Solve.h
@@ -0,0 +1,188 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVE_H
+#define EIGEN_SOLVE_H
+
+namespace Eigen {
+
+template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
+  
+/** \class Solve
+  * \ingroup Core_Module
+  *
+  * \brief Pseudo expression representing a solving operation
+  *
+  * \tparam Decomposition the type of the matrix or decomposion object
+  * \tparam Rhstype the type of the right-hand side
+  *
+  * This class represents an expression of A.solve(B)
+  * and most of the time this is the only way it is used.
+  *
+  */
+namespace internal {
+
+// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
+template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
+
+template<typename Decomposition, typename RhsType>
+struct solve_traits<Decomposition,RhsType,Dense>
+{
+  typedef typename make_proper_matrix_type<typename RhsType::Scalar,
+                 Decomposition::ColsAtCompileTime,
+                 RhsType::ColsAtCompileTime,
+                 RhsType::PlainObject::Options,
+                 Decomposition::MaxColsAtCompileTime,
+                 RhsType::MaxColsAtCompileTime>::type PlainObject;
+};
+
+template<typename Decomposition, typename RhsType>
+struct traits<Solve<Decomposition, RhsType> >
+  : traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
+{
+  typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
+  typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;
+  typedef traits<PlainObject> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = HugeCost
+  };
+};
+
+}
+
+
+template<typename Decomposition, typename RhsType>
+class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
+{
+public:
+  typedef typename internal::traits<Solve>::PlainObject PlainObject;
+  typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
+  
+  Solve(const Decomposition &dec, const RhsType &rhs)
+    : m_dec(dec), m_rhs(rhs)
+  {}
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
+
+  EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
+  EIGEN_DEVICE_FUNC const RhsType&       rhs() const { return m_rhs; }
+
+protected:
+  const Decomposition &m_dec;
+  const RhsType       &m_rhs;
+};
+
+
+// Specialization of the Solve expression for dense results
+template<typename Decomposition, typename RhsType>
+class SolveImpl<Decomposition,RhsType,Dense>
+  : public MatrixBase<Solve<Decomposition,RhsType> >
+{
+  typedef Solve<Decomposition,RhsType> Derived;
+  
+public:
+  
+  typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+
+private:
+  
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+// Generic API dispatcher
+template<typename Decomposition, typename RhsType, typename StorageKind>
+class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
+{
+  public:
+    typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
+};
+
+namespace internal {
+
+// Evaluator of Solve -> eval into a temporary
+template<typename Decomposition, typename RhsType>
+struct evaluator<Solve<Decomposition,RhsType> >
+  : public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
+{
+  typedef Solve<Decomposition,RhsType> SolveType;
+  typedef typename SolveType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
+  
+  EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
+    : m_result(solve.rows(), solve.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    solve.dec()._solve_impl(solve.rhs(), m_result);
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+// Specialization for "dst = dec.solve(rhs)"
+// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
+{
+  typedef Solve<DecType,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    src.dec()._solve_impl(src.rhs(), dst);
+  }
+};
+
+// Specialization for "dst = dec.transpose().solve(rhs)"
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
+{
+  typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
+  }
+};
+
+// Specialization for "dst = dec.adjoint().solve(rhs)"
+template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
+struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
+                  internal::assign_op<Scalar,Scalar>, Dense2Dense>
+{
+  typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
+  }
+};
+
+} // end namepsace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVE_H
diff --git a/third-party/Eigen/src/Core/SolveTriangular.h b/third-party/Eigen/src/Core/SolveTriangular.h
new file mode 100644
index 00000000..fd0acb1a
--- /dev/null
+++ b/third-party/Eigen/src/Core/SolveTriangular.h
@@ -0,0 +1,235 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVETRIANGULAR_H
+#define EIGEN_SOLVETRIANGULAR_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// Forward declarations:
+// The following two routines are implemented in the products/TriangularSolver*.h files
+template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
+struct triangular_solve_vector;
+
+template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder, int OtherInnerStride>
+struct triangular_solve_matrix;
+
+// small helper struct extracting some traits on the underlying solver operation
+template<typename Lhs, typename Rhs, int Side>
+class trsolve_traits
+{
+  private:
+    enum {
+      RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1
+    };
+  public:
+    enum {
+      Unrolling   = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime != Dynamic && Rhs::SizeAtCompileTime <= 8)
+                  ? CompleteUnrolling : NoUnrolling,
+      RhsVectors  = RhsIsVectorAtCompileTime ? 1 : Dynamic
+    };
+};
+
+template<typename Lhs, typename Rhs,
+  int Side, // can be OnTheLeft/OnTheRight
+  int Mode, // can be Upper/Lower | UnitDiag
+  int Unrolling = trsolve_traits<Lhs,Rhs,Side>::Unrolling,
+  int RhsVectors = trsolve_traits<Lhs,Rhs,Side>::RhsVectors
+  >
+struct triangular_solver_selector;
+
+template<typename Lhs, typename Rhs, int Side, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
+{
+  typedef typename Lhs::Scalar LhsScalar;
+  typedef typename Rhs::Scalar RhsScalar;
+  typedef blas_traits<Lhs> LhsProductTraits;
+  typedef typename LhsProductTraits::ExtractType ActualLhsType;
+  typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
+  static void run(const Lhs& lhs, Rhs& rhs)
+  {
+    ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
+
+    // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
+
+    bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
+
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
+                                                  (useRhsDirectly ? rhs.data() : 0));
+                                                  
+    if(!useRhsDirectly)
+      MappedRhs(actualRhs,rhs.size()) = rhs;
+
+    triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
+                            (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
+      ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
+
+    if(!useRhsDirectly)
+      rhs = MappedRhs(actualRhs, rhs.size());
+  }
+};
+
+// the rhs is a matrix
+template<typename Lhs, typename Rhs, int Side, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
+{
+  typedef typename Rhs::Scalar Scalar;
+  typedef blas_traits<Lhs> LhsProductTraits;
+  typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
+
+  static void run(const Lhs& lhs, Rhs& rhs)
+  {
+    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
+
+    const Index size = lhs.rows();
+    const Index othersize = Side==OnTheLeft? rhs.cols() : rhs.rows();
+
+    typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+              Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
+
+    BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
+
+    triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
+                               (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime>
+      ::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking);
+  }
+};
+
+/***************************************************************************
+* meta-unrolling implementation
+***************************************************************************/
+
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,
+         bool Stop = LoopIndex==Size>
+struct triangular_solver_unroller;
+
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
+  enum {
+    IsLower = ((Mode&Lower)==Lower),
+    DiagIndex  = IsLower ? LoopIndex : Size - LoopIndex - 1,
+    StartIndex = IsLower ? 0         : DiagIndex+1
+  };
+  static void run(const Lhs& lhs, Rhs& rhs)
+  {
+    if (LoopIndex>0)
+      rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
+                                .cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();
+
+    if(!(Mode & UnitDiag))
+      rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);
+
+    triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);
+  }
+};
+
+template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
+struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
+  static void run(const Lhs&, Rhs&) {}
+};
+
+template<typename Lhs, typename Rhs, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
+  static void run(const Lhs& lhs, Rhs& rhs)
+  { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
+};
+
+template<typename Lhs, typename Rhs, int Mode>
+struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
+  static void run(const Lhs& lhs, Rhs& rhs)
+  {
+    Transpose<const Lhs> trLhs(lhs);
+    Transpose<Rhs> trRhs(rhs);
+    
+    triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
+                              ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
+                              0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);
+  }
+};
+
+} // end namespace internal
+
+/***************************************************************************
+* TriangularView methods
+***************************************************************************/
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename MatrixType, unsigned int Mode>
+template<int Side, typename OtherDerived>
+void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+{
+  OtherDerived& other = _other.const_cast_derived();
+  eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
+  eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
+  // If solving for a 0x0 matrix, nothing to do, simply return.
+  if (derived().cols() == 0)
+    return;
+
+  enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit)  && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
+  typedef typename internal::conditional<copy,
+    typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
+  OtherCopy otherCopy(other);
+
+  internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
+    Side, Mode>::run(derived().nestedExpression(), otherCopy);
+
+  if (copy)
+    other = otherCopy;
+}
+
+template<typename Derived, unsigned int Mode>
+template<int Side, typename Other>
+const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
+TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
+{
+  return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
+}
+#endif
+
+namespace internal {
+
+
+template<int Side, typename TriangularType, typename Rhs>
+struct traits<triangular_solve_retval<Side, TriangularType, Rhs> >
+{
+  typedef typename internal::plain_matrix_type_column_major<Rhs>::type ReturnType;
+};
+
+template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval
+ : public ReturnByValue<triangular_solve_retval<Side, TriangularType, Rhs> >
+{
+  typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
+  typedef ReturnByValue<triangular_solve_retval> Base;
+
+  triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
+    : m_triangularMatrix(tri), m_rhs(rhs)
+  {}
+
+  inline Index rows() const { return m_rhs.rows(); }
+  inline Index cols() const { return m_rhs.cols(); }
+
+  template<typename Dest> inline void evalTo(Dest& dst) const
+  {
+    if(!is_same_dense(dst,m_rhs))
+      dst = m_rhs;
+    m_triangularMatrix.template solveInPlace<Side>(dst);
+  }
+
+  protected:
+    const TriangularType& m_triangularMatrix;
+    typename Rhs::Nested m_rhs;
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVETRIANGULAR_H
diff --git a/third-party/Eigen/src/Core/SolverBase.h b/third-party/Eigen/src/Core/SolverBase.h
new file mode 100644
index 00000000..8a4adc22
--- /dev/null
+++ b/third-party/Eigen/src/Core/SolverBase.h
@@ -0,0 +1,130 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SOLVERBASE_H
+#define EIGEN_SOLVERBASE_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+
+} // end namespace internal
+
+/** \class SolverBase
+  * \brief A base class for matrix decomposition and solvers
+  *
+  * \tparam Derived the actual type of the decomposition/solver.
+  *
+  * Any matrix decomposition inheriting this base class provide the following API:
+  *
+  * \code
+  * MatrixType A, b, x;
+  * DecompositionType dec(A);
+  * x = dec.solve(b);             // solve A   * x = b
+  * x = dec.transpose().solve(b); // solve A^T * x = b
+  * x = dec.adjoint().solve(b);   // solve A'  * x = b
+  * \endcode
+  *
+  * \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors.
+  *
+  * \sa class PartialPivLU, class FullPivLU
+  */
+template<typename Derived>
+class SolverBase : public EigenBase<Derived>
+{
+  public:
+
+    typedef EigenBase<Derived> Base;
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef Scalar CoeffReturnType;
+
+    enum {
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+                                                          internal::traits<Derived>::ColsAtCompileTime>::ret),
+      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                             internal::traits<Derived>::MaxColsAtCompileTime>::ret),
+      IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
+                           || internal::traits<Derived>::MaxColsAtCompileTime == 1
+    };
+
+    /** Default constructor */
+    SolverBase()
+    {}
+
+    ~SolverBase()
+    {}
+
+    using Base::derived;
+
+    /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
+      */
+    template<typename Rhs>
+    inline const Solve<Derived, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
+      return Solve<Derived, Rhs>(derived(), b.derived());
+    }
+
+    /** \internal the return type of transpose() */
+    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    /** \returns an expression of the transposed of the factored matrix.
+      *
+      * A typical usage is to solve for the transposed problem A^T x = b:
+      * \code x = dec.transpose().solve(b); \endcode
+      *
+      * \sa adjoint(), solve()
+      */
+    inline ConstTransposeReturnType transpose() const
+    {
+      return ConstTransposeReturnType(derived());
+    }
+
+    /** \internal the return type of adjoint() */
+    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                        CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
+                        ConstTransposeReturnType
+                     >::type AdjointReturnType;
+    /** \returns an expression of the adjoint of the factored matrix
+      *
+      * A typical usage is to solve for the adjoint problem A' x = b:
+      * \code x = dec.adjoint().solve(b); \endcode
+      *
+      * For real scalar types, this function is equivalent to transpose().
+      *
+      * \sa transpose(), solve()
+      */
+    inline AdjointReturnType adjoint() const
+    {
+      return AdjointReturnType(derived().transpose());
+    }
+
+  protected:
+};
+
+namespace internal {
+
+template<typename Derived>
+struct generic_xpr_base<Derived, MatrixXpr, SolverStorage>
+{
+  typedef SolverBase<Derived> type;
+
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SOLVERBASE_H
diff --git a/third-party/Eigen/src/Core/StableNorm.h b/third-party/Eigen/src/Core/StableNorm.h
new file mode 100644
index 00000000..88c8d989
--- /dev/null
+++ b/third-party/Eigen/src/Core/StableNorm.h
@@ -0,0 +1,221 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STABLENORM_H
+#define EIGEN_STABLENORM_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename ExpressionType, typename Scalar>
+inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
+{
+  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
+  
+  if(maxCoeff>scale)
+  {
+    ssq = ssq * numext::abs2(scale/maxCoeff);
+    Scalar tmp = Scalar(1)/maxCoeff;
+    if(tmp > NumTraits<Scalar>::highest())
+    {
+      invScale = NumTraits<Scalar>::highest();
+      scale = Scalar(1)/invScale;
+    }
+    else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
+    {
+      invScale = Scalar(1);
+      scale = maxCoeff;
+    }
+    else
+    {
+      scale = maxCoeff;
+      invScale = tmp;
+    }
+  }
+  else if(maxCoeff!=maxCoeff) // we got a NaN
+  {
+    scale = maxCoeff;
+  }
+  
+  // TODO if the maxCoeff is much much smaller than the current scale,
+  // then we can neglect this sub vector
+  if(scale>Scalar(0)) // if scale==0, then bl is 0 
+    ssq += (bl*invScale).squaredNorm();
+}
+
+template<typename Derived>
+inline typename NumTraits<typename traits<Derived>::Scalar>::Real
+blueNorm_impl(const EigenBase<Derived>& _vec)
+{
+  typedef typename Derived::RealScalar RealScalar;  
+  using std::pow;
+  using std::sqrt;
+  using std::abs;
+  const Derived& vec(_vec.derived());
+  static bool initialized = false;
+  static RealScalar b1, b2, s1m, s2m, rbig, relerr;
+  if(!initialized)
+  {
+    int ibeta, it, iemin, iemax, iexp;
+    RealScalar eps;
+    // This program calculates the machine-dependent constants
+    // bl, b2, slm, s2m, relerr overfl
+    // from the "basic" machine-dependent numbers
+    // nbig, ibeta, it, iemin, iemax, rbig.
+    // The following define the basic machine-dependent constants.
+    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
+    // are used. For any specific computer, each of the assignment
+    // statements can be replaced
+    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
+    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
+    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
+    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
+    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number
+
+    iexp  = -((1-iemin)/2);
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
+    iexp  = (iemax + 1 - it)/2;
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange
+
+    iexp  = (2-iemin)/2;
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
+    iexp  = - ((iemax+it)/2);
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range
+
+    eps     = RealScalar(pow(double(ibeta), 1-it));
+    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
+    initialized = true;
+  }
+  Index n = vec.size();
+  RealScalar ab2 = b2 / RealScalar(n);
+  RealScalar asml = RealScalar(0);
+  RealScalar amed = RealScalar(0);
+  RealScalar abig = RealScalar(0);
+  for(typename Derived::InnerIterator it(vec, 0); it; ++it)
+  {
+    RealScalar ax = abs(it.value());
+    if(ax > ab2)     abig += numext::abs2(ax*s2m);
+    else if(ax < b1) asml += numext::abs2(ax*s1m);
+    else             amed += numext::abs2(ax);
+  }
+  if(amed!=amed)
+    return amed;  // we got a NaN
+  if(abig > RealScalar(0))
+  {
+    abig = sqrt(abig);
+    if(abig > rbig) // overflow, or *this contains INF values
+      return abig;  // return INF
+    if(amed > RealScalar(0))
+    {
+      abig = abig/s2m;
+      amed = sqrt(amed);
+    }
+    else
+      return abig/s2m;
+  }
+  else if(asml > RealScalar(0))
+  {
+    if (amed > RealScalar(0))
+    {
+      abig = sqrt(amed);
+      amed = sqrt(asml) / s1m;
+    }
+    else
+      return sqrt(asml)/s1m;
+  }
+  else
+    return sqrt(amed);
+  asml = numext::mini(abig, amed);
+  abig = numext::maxi(abig, amed);
+  if(asml <= abig*relerr)
+    return abig;
+  else
+    return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
+}
+
+} // end namespace internal
+
+/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
+  * This version use a blockwise two passes algorithm:
+  *  1 - find the absolute largest coefficient \c s
+  *  2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
+  *
+  * For architecture/scalar types supporting vectorization, this version
+  * is faster than blueNorm(). Otherwise the blueNorm() is much faster.
+  *
+  * \sa norm(), blueNorm(), hypotNorm()
+  */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::stableNorm() const
+{
+  using std::sqrt;
+  using std::abs;
+  const Index blockSize = 4096;
+  RealScalar scale(0);
+  RealScalar invScale(1);
+  RealScalar ssq(0); // sum of square
+  
+  typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
+  typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
+  const DerivedCopy copy(derived());
+  
+  enum {
+    CanAlign = (   (int(DerivedCopyClean::Flags)&DirectAccessBit)
+                || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
+               ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
+                 && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
+  };
+  typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
+                                                   typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
+  Index n = size();
+  
+  if(n==1)
+    return abs(this->coeff(0));
+  
+  Index bi = internal::first_default_aligned(copy);
+  if (bi>0)
+    internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
+  for (; bi<n; bi+=blockSize)
+    internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
+  return scale * sqrt(ssq);
+}
+
+/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
+  * A Portable Fortran Program to Find the Euclidean Norm of a Vector,
+  * ACM TOMS, Vol 4, Issue 1, 1978.
+  *
+  * For architecture/scalar types without vectorization, this version
+  * is much faster than stableNorm(). Otherwise the stableNorm() is faster.
+  *
+  * \sa norm(), stableNorm(), hypotNorm()
+  */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::blueNorm() const
+{
+  return internal::blueNorm_impl(*this);
+}
+
+/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
+  * This version use a concatenation of hypot() calls, and it is very slow.
+  *
+  * \sa norm(), stableNorm()
+  */
+template<typename Derived>
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::hypotNorm() const
+{
+  return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_STABLENORM_H
diff --git a/third-party/Eigen/src/Core/Stride.h b/third-party/Eigen/src/Core/Stride.h
new file mode 100644
index 00000000..513742f3
--- /dev/null
+++ b/third-party/Eigen/src/Core/Stride.h
@@ -0,0 +1,111 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STRIDE_H
+#define EIGEN_STRIDE_H
+
+namespace Eigen { 
+
+/** \class Stride
+  * \ingroup Core_Module
+  *
+  * \brief Holds strides information for Map
+  *
+  * This class holds the strides information for mapping arrays with strides with class Map.
+  *
+  * It holds two values: the inner stride and the outer stride.
+  *
+  * The inner stride is the pointer increment between two consecutive entries within a given row of a
+  * row-major matrix or within a given column of a column-major matrix.
+  *
+  * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or
+  * between two consecutive columns of a column-major matrix.
+  *
+  * These two values can be passed either at compile-time as template parameters, or at runtime as
+  * arguments to the constructor.
+  *
+  * Indeed, this class takes two template parameters:
+  *  \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
+  *  \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
+  *
+  * Here is an example:
+  * \include Map_general_stride.cpp
+  * Output: \verbinclude Map_general_stride.out
+  *
+  * \sa class InnerStride, class OuterStride, \ref TopicStorageOrders
+  */
+template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
+class Stride
+{
+  public:
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    enum {
+      InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
+      OuterStrideAtCompileTime = _OuterStrideAtCompileTime
+    };
+
+    /** Default constructor, for use when strides are fixed at compile time */
+    EIGEN_DEVICE_FUNC
+    Stride()
+      : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
+    {
+      eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic);
+    }
+
+    /** Constructor allowing to pass the strides at runtime */
+    EIGEN_DEVICE_FUNC
+    Stride(Index outerStride, Index innerStride)
+      : m_outer(outerStride), m_inner(innerStride)
+    {
+      eigen_assert(innerStride>=0 && outerStride>=0);
+    }
+
+    /** Copy constructor */
+    EIGEN_DEVICE_FUNC
+    Stride(const Stride& other)
+      : m_outer(other.outer()), m_inner(other.inner())
+    {}
+
+    /** \returns the outer stride */
+    EIGEN_DEVICE_FUNC
+    inline Index outer() const { return m_outer.value(); }
+    /** \returns the inner stride */
+    EIGEN_DEVICE_FUNC
+    inline Index inner() const { return m_inner.value(); }
+
+  protected:
+    internal::variable_if_dynamic<Index, OuterStrideAtCompileTime> m_outer;
+    internal::variable_if_dynamic<Index, InnerStrideAtCompileTime> m_inner;
+};
+
+/** \brief Convenience specialization of Stride to specify only an inner stride
+  * See class Map for some examples */
+template<int Value>
+class InnerStride : public Stride<0, Value>
+{
+    typedef Stride<0, Value> Base;
+  public:
+    EIGEN_DEVICE_FUNC InnerStride() : Base() {}
+    EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code
+};
+
+/** \brief Convenience specialization of Stride to specify only an outer stride
+  * See class Map for some examples */
+template<int Value>
+class OuterStride : public Stride<Value, 0>
+{
+    typedef Stride<Value, 0> Base;
+  public:
+    EIGEN_DEVICE_FUNC OuterStride() : Base() {}
+    EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_STRIDE_H
diff --git a/third-party/Eigen/src/Core/Swap.h b/third-party/Eigen/src/Core/Swap.h
new file mode 100644
index 00000000..d7020091
--- /dev/null
+++ b/third-party/Eigen/src/Core/Swap.h
@@ -0,0 +1,67 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SWAP_H
+#define EIGEN_SWAP_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// Overload default assignPacket behavior for swapping them
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
+class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
+ : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
+{
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+  
+public:
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::DstXprType DstXprType;
+  typedef swap_assign_op<Scalar> Functor;
+  
+  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+  template<int StoreMode, int LoadMode, typename PacketType>
+  void assignPacket(Index row, Index col)
+  {
+    PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
+    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
+    m_dst.template writePacket<StoreMode>(row,col,tmp);
+  }
+  
+  template<int StoreMode, int LoadMode, typename PacketType>
+  void assignPacket(Index index)
+  {
+    PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
+    const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
+    m_dst.template writePacket<StoreMode>(index,tmp);
+  }
+  
+  // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
+  template<int StoreMode, int LoadMode, typename PacketType>
+  void assignPacketByOuterInner(Index outer, Index inner)
+  {
+    Index row = Base::rowIndexByOuterInner(outer, inner); 
+    Index col = Base::colIndexByOuterInner(outer, inner);
+    assignPacket<StoreMode,LoadMode,PacketType>(row, col);
+  }
+};
+
+} // namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SWAP_H
diff --git a/third-party/Eigen/src/Core/Transpose.h b/third-party/Eigen/src/Core/Transpose.h
new file mode 100644
index 00000000..960dc451
--- /dev/null
+++ b/third-party/Eigen/src/Core/Transpose.h
@@ -0,0 +1,405 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSPOSE_H
+#define EIGEN_TRANSPOSE_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename MatrixType>
+struct traits<Transpose<MatrixType> > : public traits<MatrixType>
+{
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
+  enum {
+    RowsAtCompileTime = MatrixType::ColsAtCompileTime,
+    ColsAtCompileTime = MatrixType::RowsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
+    Flags1 = Flags0 | FlagsLvalueBit,
+    Flags = Flags1 ^ RowMajorBit,
+    InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
+    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+  };
+};
+}
+
+template<typename MatrixType, typename StorageKind> class TransposeImpl;
+
+/** \class Transpose
+  * \ingroup Core_Module
+  *
+  * \brief Expression of the transpose of a matrix
+  *
+  * \tparam MatrixType the type of the object of which we are taking the transpose
+  *
+  * This class represents an expression of the transpose of a matrix.
+  * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
+  * and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::transpose(), MatrixBase::adjoint()
+  */
+template<typename MatrixType> class Transpose
+  : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
+{
+  public:
+
+    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+
+    typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;
+
+    EIGEN_DEVICE_FUNC
+    explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
+
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
+
+    /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<MatrixTypeNested>::type&
+    nestedExpression() const { return m_matrix; }
+
+    /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
+    typename internal::remove_reference<MatrixTypeNested>::type&
+    nestedExpression() { return m_matrix; }
+
+    /** \internal */
+    void resize(Index nrows, Index ncols) {
+      m_matrix.resize(ncols,nrows);
+    }
+
+  protected:
+    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
+};
+
+namespace internal {
+
+template<typename MatrixType, bool HasDirectAccess = has_direct_access<MatrixType>::ret>
+struct TransposeImpl_base
+{
+  typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+
+template<typename MatrixType>
+struct TransposeImpl_base<MatrixType, false>
+{
+  typedef typename dense_xpr_base<Transpose<MatrixType> >::type type;
+};
+
+} // end namespace internal
+
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class TransposeImpl
+  : public internal::generic_xpr_base<Transpose<XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
+};
+
+template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
+  : public internal::TransposeImpl_base<MatrixType>::type
+{
+  public:
+
+    typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
+    using Base::coeffRef;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
+
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+
+    typedef typename internal::conditional<
+                       internal::is_lvalue<MatrixType>::value,
+                       Scalar,
+                       const Scalar
+                     >::type ScalarWithConstIfNotLvalue;
+
+    EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
+
+    // FIXME: shall we keep the const version of coeffRef?
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index rowId, Index colId) const
+    {
+      return derived().nestedExpression().coeffRef(colId, rowId);
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline const Scalar& coeffRef(Index index) const
+    {
+      return derived().nestedExpression().coeffRef(index);
+    }
+  protected:
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl)
+};
+
+/** \returns an expression of the transpose of *this.
+  *
+  * Example: \include MatrixBase_transpose.cpp
+  * Output: \verbinclude MatrixBase_transpose.out
+  *
+  * \warning If you want to replace a matrix by its own transpose, do \b NOT do this:
+  * \code
+  * m = m.transpose(); // bug!!! caused by aliasing effect
+  * \endcode
+  * Instead, use the transposeInPlace() method:
+  * \code
+  * m.transposeInPlace();
+  * \endcode
+  * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+  * \code
+  * m = m.transpose().eval();
+  * \endcode
+  *
+  * \sa transposeInPlace(), adjoint() */
+template<typename Derived>
+inline Transpose<Derived>
+DenseBase<Derived>::transpose()
+{
+  return TransposeReturnType(derived());
+}
+
+/** This is the const version of transpose().
+  *
+  * Make sure you read the warning for transpose() !
+  *
+  * \sa transposeInPlace(), adjoint() */
+template<typename Derived>
+inline typename DenseBase<Derived>::ConstTransposeReturnType
+DenseBase<Derived>::transpose() const
+{
+  return ConstTransposeReturnType(derived());
+}
+
+/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this.
+  *
+  * Example: \include MatrixBase_adjoint.cpp
+  * Output: \verbinclude MatrixBase_adjoint.out
+  *
+  * \warning If you want to replace a matrix by its own adjoint, do \b NOT do this:
+  * \code
+  * m = m.adjoint(); // bug!!! caused by aliasing effect
+  * \endcode
+  * Instead, use the adjointInPlace() method:
+  * \code
+  * m.adjointInPlace();
+  * \endcode
+  * which gives Eigen good opportunities for optimization, or alternatively you can also do:
+  * \code
+  * m = m.adjoint().eval();
+  * \endcode
+  *
+  * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::AdjointReturnType
+MatrixBase<Derived>::adjoint() const
+{
+  return AdjointReturnType(this->transpose());
+}
+
+/***************************************************************************
+* "in place" transpose implementation
+***************************************************************************/
+
+namespace internal {
+
+template<typename MatrixType,
+  bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
+  bool MatchPacketSize =
+        (int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
+    &&  (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
+struct inplace_transpose_selector;
+
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
+  static void run(MatrixType& m) {
+    m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+  }
+};
+
+// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
+template<typename MatrixType>
+struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
+  static void run(MatrixType& m) {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
+    const Index PacketSize = internal::packet_traits<Scalar>::size;
+    const Index Alignment = internal::evaluator<MatrixType>::Alignment;
+    PacketBlock<Packet> A;
+    for (Index i=0; i<PacketSize; ++i)
+      A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
+    internal::ptranspose(A);
+    for (Index i=0; i<PacketSize; ++i)
+      m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
+  }
+};
+
+template<typename MatrixType,bool MatchPacketSize>
+struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
+  static void run(MatrixType& m) {
+    if (m.rows()==m.cols())
+      m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
+    else
+      m = m.transpose().eval();
+  }
+};
+
+} // end namespace internal
+
+/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose.
+  * Thus, doing
+  * \code
+  * m.transposeInPlace();
+  * \endcode
+  * has the same effect on m as doing
+  * \code
+  * m = m.transpose().eval();
+  * \endcode
+  * and is faster and also safer because in the latter line of code, forgetting the eval() results
+  * in a bug caused by \ref TopicAliasing "aliasing".
+  *
+  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
+  * If you just need the transpose of a matrix, use transpose().
+  *
+  * \note if the matrix is not square, then \c *this must be a resizable matrix. 
+  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
+  *
+  * \sa transpose(), adjoint(), adjointInPlace() */
+template<typename Derived>
+inline void DenseBase<Derived>::transposeInPlace()
+{
+  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+               && "transposeInPlace() called on a non-square non-resizable matrix");
+  internal::inplace_transpose_selector<Derived>::run(derived());
+}
+
+/***************************************************************************
+* "in place" adjoint implementation
+***************************************************************************/
+
+/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose.
+  * Thus, doing
+  * \code
+  * m.adjointInPlace();
+  * \endcode
+  * has the same effect on m as doing
+  * \code
+  * m = m.adjoint().eval();
+  * \endcode
+  * and is faster and also safer because in the latter line of code, forgetting the eval() results
+  * in a bug caused by aliasing.
+  *
+  * Notice however that this method is only useful if you want to replace a matrix by its own adjoint.
+  * If you just need the adjoint of a matrix, use adjoint().
+  *
+  * \note if the matrix is not square, then \c *this must be a resizable matrix.
+  * This excludes (non-square) fixed-size matrices, block-expressions and maps.
+  *
+  * \sa transpose(), adjoint(), transposeInPlace() */
+template<typename Derived>
+inline void MatrixBase<Derived>::adjointInPlace()
+{
+  derived() = adjoint().eval();
+}
+
+#ifndef EIGEN_NO_DEBUG
+
+// The following is to detect aliasing problems in most common cases.
+
+namespace internal {
+
+template<bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_compile_time_selector
+{
+  enum { ret = bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed };
+};
+
+template<bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_compile_time_selector<DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+  enum { ret =    bool(blas_traits<DerivedA>::IsTransposed) != DestIsTransposed
+               || bool(blas_traits<DerivedB>::IsTransposed) != DestIsTransposed
+  };
+};
+
+template<typename Scalar, bool DestIsTransposed, typename OtherDerived>
+struct check_transpose_aliasing_run_time_selector
+{
+  static bool run(const Scalar* dest, const OtherDerived& src)
+  {
+    return (bool(blas_traits<OtherDerived>::IsTransposed) != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src));
+  }
+};
+
+template<typename Scalar, bool DestIsTransposed, typename BinOp, typename DerivedA, typename DerivedB>
+struct check_transpose_aliasing_run_time_selector<Scalar,DestIsTransposed,CwiseBinaryOp<BinOp,DerivedA,DerivedB> >
+{
+  static bool run(const Scalar* dest, const CwiseBinaryOp<BinOp,DerivedA,DerivedB>& src)
+  {
+    return ((blas_traits<DerivedA>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.lhs())))
+        || ((blas_traits<DerivedB>::IsTransposed != DestIsTransposed) && (dest!=0 && dest==(const Scalar*)extract_data(src.rhs())));
+  }
+};
+
+// the following selector, checkTransposeAliasing_impl, based on MightHaveTransposeAliasing,
+// is because when the condition controlling the assert is known at compile time, ICC emits a warning.
+// This is actually a good warning: in expressions that don't have any transposing, the condition is
+// known at compile time to be false, and using that, we can avoid generating the code of the assert again
+// and again for all these expressions that don't need it.
+
+template<typename Derived, typename OtherDerived,
+         bool MightHaveTransposeAliasing
+                 = check_transpose_aliasing_compile_time_selector
+                     <blas_traits<Derived>::IsTransposed,OtherDerived>::ret
+        >
+struct checkTransposeAliasing_impl
+{
+    static void run(const Derived& dst, const OtherDerived& other)
+    {
+        eigen_assert((!check_transpose_aliasing_run_time_selector
+                      <typename Derived::Scalar,blas_traits<Derived>::IsTransposed,OtherDerived>
+                      ::run(extract_data(dst), other))
+          && "aliasing detected during transposition, use transposeInPlace() "
+             "or evaluate the rhs into a temporary using .eval()");
+
+    }
+};
+
+template<typename Derived, typename OtherDerived>
+struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
+{
+    static void run(const Derived&, const OtherDerived&)
+    {
+    }
+};
+
+template<typename Dst, typename Src>
+void check_for_aliasing(const Dst &dst, const Src &src)
+{
+  internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
+}
+
+} // end namespace internal
+
+#endif // EIGEN_NO_DEBUG
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSPOSE_H
diff --git a/third-party/Eigen/src/Core/Transpositions.h b/third-party/Eigen/src/Core/Transpositions.h
new file mode 100644
index 00000000..7718625e
--- /dev/null
+++ b/third-party/Eigen/src/Core/Transpositions.h
@@ -0,0 +1,368 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSPOSITIONS_H
+#define EIGEN_TRANSPOSITIONS_H
+
+namespace Eigen { 
+
+template<typename Derived>
+class TranspositionsBase
+{
+    typedef internal::traits<Derived> Traits;
+    
+  public:
+
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename IndicesType::Scalar StorageIndex;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    Derived& derived() { return *static_cast<Derived*>(this); }
+    const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+    /** Copies the \a other transpositions into \c *this */
+    template<typename OtherDerived>
+    Derived& operator=(const TranspositionsBase<OtherDerived>& other)
+    {
+      indices() = other.indices();
+      return derived();
+    }
+
+    /** \returns the number of transpositions */
+    Index size() const { return indices().size(); }
+    /** \returns the number of rows of the equivalent permutation matrix */
+    Index rows() const { return indices().size(); }
+    /** \returns the number of columns of the equivalent permutation matrix */
+    Index cols() const { return indices().size(); }
+
+    /** Direct access to the underlying index vector */
+    inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
+    /** Direct access to the underlying index vector */
+    inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
+    /** Direct access to the underlying index vector */
+    inline const StorageIndex& operator()(Index i) const { return indices()(i); }
+    /** Direct access to the underlying index vector */
+    inline StorageIndex& operator()(Index i) { return indices()(i); }
+    /** Direct access to the underlying index vector */
+    inline const StorageIndex& operator[](Index i) const { return indices()(i); }
+    /** Direct access to the underlying index vector */
+    inline StorageIndex& operator[](Index i) { return indices()(i); }
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return derived().indices(); }
+    /** \returns a reference to the stored array representing the transpositions. */
+    IndicesType& indices() { return derived().indices(); }
+
+    /** Resizes to given size. */
+    inline void resize(Index newSize)
+    {
+      indices().resize(newSize);
+    }
+
+    /** Sets \c *this to represents an identity transformation */
+    void setIdentity()
+    {
+      for(StorageIndex i = 0; i < indices().size(); ++i)
+        coeffRef(i) = i;
+    }
+
+    // FIXME: do we want such methods ?
+    // might be usefull when the target matrix expression is complex, e.g.:
+    // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..);
+    /*
+    template<typename MatrixType>
+    void applyForwardToRows(MatrixType& mat) const
+    {
+      for(Index k=0 ; k<size() ; ++k)
+        if(m_indices(k)!=k)
+          mat.row(k).swap(mat.row(m_indices(k)));
+    }
+
+    template<typename MatrixType>
+    void applyBackwardToRows(MatrixType& mat) const
+    {
+      for(Index k=size()-1 ; k>=0 ; --k)
+        if(m_indices(k)!=k)
+          mat.row(k).swap(mat.row(m_indices(k)));
+    }
+    */
+
+    /** \returns the inverse transformation */
+    inline Transpose<TranspositionsBase> inverse() const
+    { return Transpose<TranspositionsBase>(derived()); }
+
+    /** \returns the tranpose transformation */
+    inline Transpose<TranspositionsBase> transpose() const
+    { return Transpose<TranspositionsBase>(derived()); }
+
+  protected:
+};
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
+struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+ : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+{
+  typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
+  typedef TranspositionsStorage StorageKind;
+};
+}
+
+/** \class Transpositions
+  * \ingroup Core_Module
+  *
+  * \brief Represents a sequence of transpositions (row/column interchange)
+  *
+  * \tparam SizeAtCompileTime the number of transpositions, or Dynamic
+  * \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
+  *
+  * This class represents a permutation transformation as a sequence of \em n transpositions
+  * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
+  * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
+  * the rows \c i and \c indices[i] of the matrix \c M.
+  * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
+  *
+  * Compared to the class PermutationMatrix, such a sequence of transpositions is what is
+  * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
+  *
+  * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
+  * \code
+  * Transpositions tr;
+  * MatrixXf mat;
+  * mat = tr * mat;
+  * \endcode
+  * In this example, we detect that the matrix appears on both side, and so the transpositions
+  * are applied in-place without any temporary or extra copy.
+  *
+  * \sa class PermutationMatrix
+  */
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
+class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+{
+    typedef internal::traits<Transpositions> Traits;
+  public:
+
+    typedef TranspositionsBase<Transpositions> Base;
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename IndicesType::Scalar StorageIndex;
+
+    inline Transpositions() {}
+
+    /** Copy constructor. */
+    template<typename OtherDerived>
+    inline Transpositions(const TranspositionsBase<OtherDerived>& other)
+      : m_indices(other.indices()) {}
+
+    /** Generic constructor from expression of the transposition indices. */
+    template<typename Other>
+    explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
+    {}
+
+    /** Copies the \a other transpositions into \c *this */
+    template<typename OtherDerived>
+    Transpositions& operator=(const TranspositionsBase<OtherDerived>& other)
+    {
+      return Base::operator=(other);
+    }
+
+    /** Constructs an uninitialized permutation matrix of given size.
+      */
+    inline Transpositions(Index size) : m_indices(size)
+    {}
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return m_indices; }
+    /** \returns a reference to the stored array representing the transpositions. */
+    IndicesType& indices() { return m_indices; }
+
+  protected:
+
+    IndicesType m_indices;
+};
+
+
+namespace internal {
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
+struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
+ : traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
+{
+  typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
+  typedef _StorageIndex StorageIndex;
+  typedef TranspositionsStorage StorageKind;
+};
+}
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
+class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
+ : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
+{
+    typedef internal::traits<Map> Traits;
+  public:
+
+    typedef TranspositionsBase<Map> Base;
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename IndicesType::Scalar StorageIndex;
+
+    explicit inline Map(const StorageIndex* indicesPtr)
+      : m_indices(indicesPtr)
+    {}
+
+    inline Map(const StorageIndex* indicesPtr, Index size)
+      : m_indices(indicesPtr,size)
+    {}
+
+    /** Copies the \a other transpositions into \c *this */
+    template<typename OtherDerived>
+    Map& operator=(const TranspositionsBase<OtherDerived>& other)
+    {
+      return Base::operator=(other);
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    /** This is a special case of the templated operator=. Its purpose is to
+      * prevent a default operator= from hiding the templated operator=.
+      */
+    Map& operator=(const Map& other)
+    {
+      m_indices = other.m_indices;
+      return *this;
+    }
+    #endif
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return m_indices; }
+    
+    /** \returns a reference to the stored array representing the transpositions. */
+    IndicesType& indices() { return m_indices; }
+
+  protected:
+
+    IndicesType m_indices;
+};
+
+namespace internal {
+template<typename _IndicesType>
+struct traits<TranspositionsWrapper<_IndicesType> >
+ : traits<PermutationWrapper<_IndicesType> >
+{
+  typedef TranspositionsStorage StorageKind;
+};
+}
+
+template<typename _IndicesType>
+class TranspositionsWrapper
+ : public TranspositionsBase<TranspositionsWrapper<_IndicesType> >
+{
+    typedef internal::traits<TranspositionsWrapper> Traits;
+  public:
+
+    typedef TranspositionsBase<TranspositionsWrapper> Base;
+    typedef typename Traits::IndicesType IndicesType;
+    typedef typename IndicesType::Scalar StorageIndex;
+
+    explicit inline TranspositionsWrapper(IndicesType& indices)
+      : m_indices(indices)
+    {}
+
+    /** Copies the \a other transpositions into \c *this */
+    template<typename OtherDerived>
+    TranspositionsWrapper& operator=(const TranspositionsBase<OtherDerived>& other)
+    {
+      return Base::operator=(other);
+    }
+
+    /** const version of indices(). */
+    const IndicesType& indices() const { return m_indices; }
+
+    /** \returns a reference to the stored array representing the transpositions. */
+    IndicesType& indices() { return m_indices; }
+
+  protected:
+
+    typename IndicesType::Nested m_indices;
+};
+
+
+
+/** \returns the \a matrix with the \a transpositions applied to the columns.
+  */
+template<typename MatrixDerived, typename TranspositionsDerived>
+EIGEN_DEVICE_FUNC
+const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
+operator*(const MatrixBase<MatrixDerived> &matrix,
+          const TranspositionsBase<TranspositionsDerived>& transpositions)
+{
+  return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
+            (matrix.derived(), transpositions.derived());
+}
+
+/** \returns the \a matrix with the \a transpositions applied to the rows.
+  */
+template<typename TranspositionsDerived, typename MatrixDerived>
+EIGEN_DEVICE_FUNC
+const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
+operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
+          const MatrixBase<MatrixDerived>& matrix)
+{
+  return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
+            (transpositions.derived(), matrix.derived());
+}
+
+// Template partial specialization for transposed/inverse transpositions
+
+namespace internal {
+
+template<typename Derived>
+struct traits<Transpose<TranspositionsBase<Derived> > >
+ : traits<Derived>
+{};
+
+} // end namespace internal
+
+template<typename TranspositionsDerived>
+class Transpose<TranspositionsBase<TranspositionsDerived> >
+{
+    typedef TranspositionsDerived TranspositionType;
+    typedef typename TranspositionType::IndicesType IndicesType;
+  public:
+
+    explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
+
+    Index size() const { return m_transpositions.size(); }
+    Index rows() const { return m_transpositions.size(); }
+    Index cols() const { return m_transpositions.size(); }
+
+    /** \returns the \a matrix with the inverse transpositions applied to the columns.
+      */
+    template<typename OtherDerived> friend
+    const Product<OtherDerived, Transpose, AliasFreeProduct>
+    operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
+    {
+      return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
+    }
+
+    /** \returns the \a matrix with the inverse transpositions applied to the rows.
+      */
+    template<typename OtherDerived>
+    const Product<Transpose, OtherDerived, AliasFreeProduct>
+    operator*(const MatrixBase<OtherDerived>& matrix) const
+    {
+      return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
+    }
+    
+    const TranspositionType& nestedExpression() const { return m_transpositions; }
+
+  protected:
+    const TranspositionType& m_transpositions;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSPOSITIONS_H
diff --git a/third-party/Eigen/src/Core/TriangularMatrix.h b/third-party/Eigen/src/Core/TriangularMatrix.h
new file mode 100644
index 00000000..9abb7e31
--- /dev/null
+++ b/third-party/Eigen/src/Core/TriangularMatrix.h
@@ -0,0 +1,985 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULARMATRIX_H
+#define EIGEN_TRIANGULARMATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+  
+template<int Side, typename TriangularType, typename Rhs> struct triangular_solve_retval;
+  
+}
+
+/** \class TriangularBase
+  * \ingroup Core_Module
+  *
+  * \brief Base class for triangular part in a matrix
+  */
+template<typename Derived> class TriangularBase : public EigenBase<Derived>
+{
+  public:
+
+    enum {
+      Mode = internal::traits<Derived>::Mode,
+      RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+      MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
+      
+      SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
+                                                   internal::traits<Derived>::ColsAtCompileTime>::ret),
+      /**< This is equal to the number of coefficients, i.e. the number of
+          * rows times the number of columns, or to \a Dynamic if this is not
+          * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
+      
+      MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
+                                                   internal::traits<Derived>::MaxColsAtCompileTime>::ret)
+        
+    };
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+    typedef typename internal::traits<Derived>::StorageKind StorageKind;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+    typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;
+    typedef DenseMatrixType DenseType;
+    typedef Derived const& Nested;
+
+    EIGEN_DEVICE_FUNC
+    inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
+
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return derived().rows(); }
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return derived().cols(); }
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const { return derived().outerStride(); }
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const { return derived().innerStride(); }
+    
+    // dummy resize function
+    void resize(Index rows, Index cols)
+    {
+      EIGEN_UNUSED_VARIABLE(rows);
+      EIGEN_UNUSED_VARIABLE(cols);
+      eigen_assert(rows==this->rows() && cols==this->cols());
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar coeff(Index row, Index col) const  { return derived().coeff(row,col); }
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
+
+    /** \see MatrixBase::copyCoeff(row,col)
+      */
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
+    {
+      derived().coeffRef(row, col) = other.coeff(row, col);
+    }
+
+    EIGEN_DEVICE_FUNC
+    inline Scalar operator()(Index row, Index col) const
+    {
+      check_coordinates(row, col);
+      return coeff(row,col);
+    }
+    EIGEN_DEVICE_FUNC
+    inline Scalar& operator()(Index row, Index col)
+    {
+      check_coordinates(row, col);
+      return coeffRef(row,col);
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    EIGEN_DEVICE_FUNC
+    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC
+    inline Derived& derived() { return *static_cast<Derived*>(this); }
+    #endif // not EIGEN_PARSED_BY_DOXYGEN
+
+    template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
+    void evalTo(MatrixBase<DenseDerived> &other) const;
+    template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
+    void evalToLazy(MatrixBase<DenseDerived> &other) const;
+
+    EIGEN_DEVICE_FUNC
+    DenseMatrixType toDenseMatrix() const
+    {
+      DenseMatrixType res(rows(), cols());
+      evalToLazy(res);
+      return res;
+    }
+
+  protected:
+
+    void check_coordinates(Index row, Index col) const
+    {
+      EIGEN_ONLY_USED_FOR_DEBUG(row);
+      EIGEN_ONLY_USED_FOR_DEBUG(col);
+      eigen_assert(col>=0 && col<cols() && row>=0 && row<rows());
+      const int mode = int(Mode) & ~SelfAdjoint;
+      EIGEN_ONLY_USED_FOR_DEBUG(mode);
+      eigen_assert((mode==Upper && col>=row)
+                || (mode==Lower && col<=row)
+                || ((mode==StrictlyUpper || mode==UnitUpper) && col>row)
+                || ((mode==StrictlyLower || mode==UnitLower) && col<row));
+    }
+
+    #ifdef EIGEN_INTERNAL_DEBUGGING
+    void check_coordinates_internal(Index row, Index col) const
+    {
+      check_coordinates(row, col);
+    }
+    #else
+    void check_coordinates_internal(Index , Index ) const {}
+    #endif
+
+};
+
+/** \class TriangularView
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a triangular part in a matrix
+  *
+  * \param MatrixType the type of the object in which we are taking the triangular part
+  * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
+  *             #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
+  *             This is in fact a bit field; it must have either #Upper or #Lower, 
+  *             and additionally it may have #UnitDiag or #ZeroDiag or neither.
+  *
+  * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
+  * matrices one should speak of "trapezoid" parts. This class is the return type
+  * of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::triangularView()
+  */
+namespace internal {
+template<typename MatrixType, unsigned int _Mode>
+struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
+{
+  typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
+  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  typedef typename MatrixType::PlainObject FullMatrixType;
+  typedef MatrixType ExpressionType;
+  enum {
+    Mode = _Mode,
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
+  };
+};
+}
+
+template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
+
+template<typename _MatrixType, unsigned int _Mode> class TriangularView
+  : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
+{
+  public:
+
+    typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
+    typedef typename internal::traits<TriangularView>::Scalar Scalar;
+    typedef _MatrixType MatrixType;
+
+  protected:
+    typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
+    typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
+
+    typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
+    
+  public:
+
+    typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
+    typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
+
+    enum {
+      Mode = _Mode,
+      Flags = internal::traits<TriangularView>::Flags,
+      TransposeMode = (Mode & Upper ? Lower : 0)
+                    | (Mode & Lower ? Upper : 0)
+                    | (Mode & (UnitDiag))
+                    | (Mode & (ZeroDiag)),
+      IsVectorAtCompileTime = false
+    };
+
+    EIGEN_DEVICE_FUNC
+    explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
+    {}
+    
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView)
+
+    /** \copydoc EigenBase::rows() */
+    EIGEN_DEVICE_FUNC
+    inline Index rows() const { return m_matrix.rows(); }
+    /** \copydoc EigenBase::cols() */
+    EIGEN_DEVICE_FUNC
+    inline Index cols() const { return m_matrix.cols(); }
+
+    /** \returns a const reference to the nested expression */
+    EIGEN_DEVICE_FUNC
+    const NestedExpression& nestedExpression() const { return m_matrix; }
+
+    /** \returns a reference to the nested expression */
+    EIGEN_DEVICE_FUNC
+    NestedExpression& nestedExpression() { return m_matrix; }
+    
+    typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+    /** \sa MatrixBase::conjugate() const */
+    EIGEN_DEVICE_FUNC
+    inline const ConjugateReturnType conjugate() const
+    { return ConjugateReturnType(m_matrix.conjugate()); }
+
+    typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
+    /** \sa MatrixBase::adjoint() const */
+    EIGEN_DEVICE_FUNC
+    inline const AdjointReturnType adjoint() const
+    { return AdjointReturnType(m_matrix.adjoint()); }
+
+    typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
+     /** \sa MatrixBase::transpose() */
+    EIGEN_DEVICE_FUNC
+    inline TransposeReturnType transpose()
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
+      typename MatrixType::TransposeReturnType tmp(m_matrix);
+      return TransposeReturnType(tmp);
+    }
+    
+    typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
+    /** \sa MatrixBase::transpose() const */
+    EIGEN_DEVICE_FUNC
+    inline const ConstTransposeReturnType transpose() const
+    {
+      return ConstTransposeReturnType(m_matrix.transpose());
+    }
+
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    inline const Solve<TriangularView, Other> 
+    solve(const MatrixBase<Other>& other) const
+    { return Solve<TriangularView, Other>(*this, other.derived()); }
+    
+  // workaround MSVC ICE
+  #if EIGEN_COMP_MSVC
+    template<int Side, typename Other>
+    EIGEN_DEVICE_FUNC
+    inline const internal::triangular_solve_retval<Side,TriangularView, Other>
+    solve(const MatrixBase<Other>& other) const
+    { return Base::template solve<Side>(other); }
+  #else
+    using Base::solve;
+  #endif
+
+    /** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower.
+      *
+      * This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode
+      * \sa MatrixBase::selfadjointView() */
+    EIGEN_DEVICE_FUNC
+    SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
+    {
+      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
+      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+    }
+
+    /** This is the const version of selfadjointView() */
+    EIGEN_DEVICE_FUNC
+    const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
+    {
+      EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
+      return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
+    }
+
+
+    /** \returns the determinant of the triangular matrix
+      * \sa MatrixBase::determinant() */
+    EIGEN_DEVICE_FUNC
+    Scalar determinant() const
+    {
+      if (Mode & UnitDiag)
+        return 1;
+      else if (Mode & ZeroDiag)
+        return 0;
+      else
+        return m_matrix.diagonal().prod();
+    }
+      
+  protected:
+
+    MatrixTypeNested m_matrix;
+};
+
+/** \ingroup Core_Module
+  *
+  * \brief Base class for a triangular part in a \b dense matrix
+  *
+  * This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.
+  * It extends class TriangularView with additional methods which available for dense expressions only.
+  *
+  * \sa class TriangularView, MatrixBase::triangularView()
+  */
+template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
+  : public TriangularBase<TriangularView<_MatrixType, _Mode> >
+{
+  public:
+
+    typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
+    typedef TriangularBase<TriangularViewType> Base;
+    typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
+
+    typedef _MatrixType MatrixType;
+    typedef typename MatrixType::PlainObject DenseMatrixType;
+    typedef DenseMatrixType PlainObject;
+
+  public:
+    using Base::evalToLazy;
+    using Base::derived;
+
+    typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
+
+    enum {
+      Mode = _Mode,
+      Flags = internal::traits<TriangularViewType>::Flags
+    };
+
+    /** \returns the outer-stride of the underlying dense matrix
+      * \sa DenseCoeffsBase::outerStride() */
+    EIGEN_DEVICE_FUNC
+    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+    /** \returns the inner-stride of the underlying dense matrix
+      * \sa DenseCoeffsBase::innerStride() */
+    EIGEN_DEVICE_FUNC
+    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+
+    /** \sa MatrixBase::operator+=() */
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    TriangularViewType&  operator+=(const DenseBase<Other>& other) {
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
+      return derived();
+    }
+    /** \sa MatrixBase::operator-=() */
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    TriangularViewType&  operator-=(const DenseBase<Other>& other) {
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
+      return derived();
+    }
+    
+    /** \sa MatrixBase::operator*=() */
+    EIGEN_DEVICE_FUNC
+    TriangularViewType&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
+    /** \sa DenseBase::operator/=() */
+    EIGEN_DEVICE_FUNC
+    TriangularViewType&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }
+
+    /** \sa MatrixBase::fill() */
+    EIGEN_DEVICE_FUNC
+    void fill(const Scalar& value) { setConstant(value); }
+    /** \sa MatrixBase::setConstant() */
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& setConstant(const Scalar& value)
+    { return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }
+    /** \sa MatrixBase::setZero() */
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& setZero() { return setConstant(Scalar(0)); }
+    /** \sa MatrixBase::setOnes() */
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& setOnes() { return setConstant(Scalar(1)); }
+
+    /** \sa MatrixBase::coeff()
+      * \warning the coordinates must fit into the referenced triangular part
+      */
+    EIGEN_DEVICE_FUNC
+    inline Scalar coeff(Index row, Index col) const
+    {
+      Base::check_coordinates_internal(row, col);
+      return derived().nestedExpression().coeff(row, col);
+    }
+
+    /** \sa MatrixBase::coeffRef()
+      * \warning the coordinates must fit into the referenced triangular part
+      */
+    EIGEN_DEVICE_FUNC
+    inline Scalar& coeffRef(Index row, Index col)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
+      Base::check_coordinates_internal(row, col);
+      return derived().nestedExpression().coeffRef(row, col);
+    }
+
+    /** Assigns a triangular matrix to a triangular part of a dense matrix */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);
+
+    /** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    EIGEN_DEVICE_FUNC
+    TriangularViewType& operator=(const TriangularViewImpl& other)
+    { return *this = other.derived().nestedExpression(); }
+
+    /** \deprecated */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void lazyAssign(const TriangularBase<OtherDerived>& other);
+
+    /** \deprecated */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void lazyAssign(const MatrixBase<OtherDerived>& other);
+#endif
+
+    /** Efficient triangular matrix times vector/matrix product */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const Product<TriangularViewType,OtherDerived>
+    operator*(const MatrixBase<OtherDerived>& rhs) const
+    {
+      return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());
+    }
+
+    /** Efficient vector/matrix times triangular matrix product */
+    template<typename OtherDerived> friend
+    EIGEN_DEVICE_FUNC
+    const Product<OtherDerived,TriangularViewType>
+    operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)
+    {
+      return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
+    }
+
+    /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
+      *
+      * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
+      * \a Side==OnTheLeft (the default), or the right-inverse-multiply  \a other * inverse(\c *this) if
+      * \a Side==OnTheRight.
+      *
+      * Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
+      *
+      * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
+      * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
+      * is an upper (resp. lower) triangular matrix.
+      *
+      * Example: \include Triangular_solve.cpp
+      * Output: \verbinclude Triangular_solve.out
+      *
+      * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
+      * to the same matrix or vector \a other.
+      *
+      * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
+      * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
+      *
+      * \sa TriangularView::solveInPlace()
+      */
+    template<int Side, typename Other>
+    EIGEN_DEVICE_FUNC
+    inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
+    solve(const MatrixBase<Other>& other) const;
+
+    /** "in-place" version of TriangularView::solve() where the result is written in \a other
+      *
+      * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
+      * This function will const_cast it, so constness isn't honored here.
+      *
+      * Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
+      *
+      * See TriangularView:solve() for the details.
+      */
+    template<int Side, typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void solveInPlace(const MatrixBase<OtherDerived>& other) const;
+
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void solveInPlace(const MatrixBase<OtherDerived>& other) const
+    { return solveInPlace<OnTheLeft>(other); }
+
+    /** Swaps the coefficients of the common triangular parts of two matrices */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+    void swap(TriangularBase<OtherDerived> &other)
+#else
+    void swap(TriangularBase<OtherDerived> const & other)
+#endif
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+    }
+
+    /** \deprecated
+      * Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    void swap(MatrixBase<OtherDerived> const & other)
+    {
+      EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
+    }
+
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
+      if(!internal::is_same_dense(dst,rhs))
+        dst = rhs;
+      this->solveInPlace(dst);
+    }
+
+    template<typename ProductType>
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(TriangularViewImpl)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TriangularViewImpl)
+
+};
+
+/***************************************************************************
+* Implementation of triangular evaluation/assignment
+***************************************************************************/
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+// FIXME should we keep that possibility
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+inline TriangularView<MatrixType, Mode>&
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
+{
+  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
+  return derived();
+}
+
+// FIXME should we keep that possibility
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
+{
+  internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
+}
+
+
+
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+inline TriangularView<MatrixType, Mode>&
+TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
+{
+  eigen_assert(Mode == int(OtherDerived::Mode));
+  internal::call_assignment(derived(), other.derived());
+  return derived();
+}
+
+template<typename MatrixType, unsigned int Mode>
+template<typename OtherDerived>
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
+{
+  eigen_assert(Mode == int(OtherDerived::Mode));
+  internal::call_assignment_no_alias(derived(), other.derived());
+}
+#endif
+
+/***************************************************************************
+* Implementation of TriangularBase methods
+***************************************************************************/
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+  * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+{
+  evalToLazy(other.derived());
+}
+
+/***************************************************************************
+* Implementation of TriangularView methods
+***************************************************************************/
+
+/***************************************************************************
+* Implementation of MatrixBase methods
+***************************************************************************/
+
+/**
+  * \returns an expression of a triangular view extracted from the current matrix
+  *
+  * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
+  * \c #Lower, \c #StrictlyLower, \c #UnitLower.
+  *
+  * Example: \include MatrixBase_triangularView.cpp
+  * Output: \verbinclude MatrixBase_triangularView.out
+  *
+  * \sa class TriangularView
+  */
+template<typename Derived>
+template<unsigned int Mode>
+typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
+MatrixBase<Derived>::triangularView()
+{
+  return typename TriangularViewReturnType<Mode>::Type(derived());
+}
+
+/** This is the const version of MatrixBase::triangularView() */
+template<typename Derived>
+template<unsigned int Mode>
+typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
+MatrixBase<Derived>::triangularView() const
+{
+  return typename ConstTriangularViewReturnType<Mode>::Type(derived());
+}
+
+/** \returns true if *this is approximately equal to an upper triangular matrix,
+  *          within the precision given by \a prec.
+  *
+  * \sa isLowerTriangular()
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
+{
+  RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
+  for(Index j = 0; j < cols(); ++j)
+  {
+    Index maxi = numext::mini(j, rows()-1);
+    for(Index i = 0; i <= maxi; ++i)
+    {
+      RealScalar absValue = numext::abs(coeff(i,j));
+      if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
+    }
+  }
+  RealScalar threshold = maxAbsOnUpperPart * prec;
+  for(Index j = 0; j < cols(); ++j)
+    for(Index i = j+1; i < rows(); ++i)
+      if(numext::abs(coeff(i, j)) > threshold) return false;
+  return true;
+}
+
+/** \returns true if *this is approximately equal to a lower triangular matrix,
+  *          within the precision given by \a prec.
+  *
+  * \sa isUpperTriangular()
+  */
+template<typename Derived>
+bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
+{
+  RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
+  for(Index j = 0; j < cols(); ++j)
+    for(Index i = j; i < rows(); ++i)
+    {
+      RealScalar absValue = numext::abs(coeff(i,j));
+      if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
+    }
+  RealScalar threshold = maxAbsOnLowerPart * prec;
+  for(Index j = 1; j < cols(); ++j)
+  {
+    Index maxi = numext::mini(j, rows()-1);
+    for(Index i = 0; i < maxi; ++i)
+      if(numext::abs(coeff(i, j)) > threshold) return false;
+  }
+  return true;
+}
+
+
+/***************************************************************************
+****************************************************************************
+* Evaluators and Assignment of triangular expressions
+***************************************************************************
+***************************************************************************/
+
+namespace internal {
+
+  
+// TODO currently a triangular expression has the form TriangularView<.,.>
+//      in the future triangular-ness should be defined by the expression traits
+//      such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
+template<typename MatrixType, unsigned int Mode>
+struct evaluator_traits<TriangularView<MatrixType,Mode> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
+  typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
+};
+
+template<typename MatrixType, unsigned int Mode>
+struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
+ : evaluator<typename internal::remove_all<MatrixType>::type>
+{
+  typedef TriangularView<MatrixType,Mode> XprType;
+  typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
+  unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
+};
+
+// Additional assignment kinds:
+struct Triangular2Triangular    {};
+struct Triangular2Dense         {};
+struct Dense2Triangular         {};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
+
+ 
+/** \internal Specialization of the dense assignment kernel for triangular matrices.
+  * The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
+  * \tparam UpLo must be either Lower or Upper
+  * \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint
+  */
+template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
+class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
+{
+protected:
+  typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
+  typedef typename Base::DstXprType DstXprType;
+  typedef typename Base::SrcXprType SrcXprType;
+  using Base::m_dst;
+  using Base::m_src;
+  using Base::m_functor;
+public:
+  
+  typedef typename Base::DstEvaluatorType DstEvaluatorType;
+  typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::AssignmentTraits AssignmentTraits;
+  
+  
+  EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+    : Base(dst, src, func, dstExpr)
+  {}
+  
+#ifdef EIGEN_INTERNAL_DEBUGGING
+  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
+  {
+    eigen_internal_assert(row!=col);
+    Base::assignCoeff(row,col);
+  }
+#else
+  using Base::assignCoeff;
+#endif
+  
+  EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
+  {
+         if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
+    else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
+    else if(Mode==0)                       Base::assignCoeff(id,id);
+  }
+  
+  EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
+  { 
+    eigen_internal_assert(row!=col);
+    if(SetOpposite)
+      m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
+  }
+};
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
+{
+  typedef evaluator<DstXprType> DstEvaluatorType;
+  typedef evaluator<SrcXprType> SrcEvaluatorType;
+
+  SrcEvaluatorType srcEvaluator(src);
+
+  Index dstRows = src.rows();
+  Index dstCols = src.cols();
+  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+    dst.resize(dstRows, dstCols);
+  DstEvaluatorType dstEvaluator(dst);
+    
+  typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
+                                              DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
+  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+  
+  enum {
+      unroll = DstXprType::SizeAtCompileTime != Dynamic
+            && SrcEvaluatorType::CoeffReadCost < HugeCost
+            && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
+    };
+  
+  triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
+}
+
+template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
+{
+  call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
+}
+
+template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
+template<> struct AssignmentKind<DenseShape,TriangularShape>      { typedef Triangular2Dense      Kind; };
+template<> struct AssignmentKind<TriangularShape,DenseShape>      { typedef Dense2Triangular      Kind; };
+
+
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
+{
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
+    
+    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);  
+  }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
+{
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);  
+  }
+};
+
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
+{
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
+  {
+    call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);  
+  }
+};
+
+
+template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
+struct triangular_assignment_loop
+{
+  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
+  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  
+  enum {
+    col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
+    row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
+  };
+  
+  typedef typename Kernel::Scalar Scalar;
+
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &kernel)
+  {
+    triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
+    
+    if(row==col)
+      kernel.assignDiagonalCoeff(row);
+    else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
+      kernel.assignCoeff(row,col);
+    else if(SetOpposite)
+      kernel.assignOppositeCoeff(row,col);
+  }
+};
+
+// prevent buggy user code from causing an infinite recursion
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &) {}
+};
+
+
+
+// TODO: experiment with a recursive assignment procedure splitting the current
+//       triangular part into one rectangular and two triangular parts.
+
+
+template<typename Kernel, unsigned int Mode, bool SetOpposite>
+struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
+{
+  typedef typename Kernel::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
+  static inline void run(Kernel &kernel)
+  {
+    for(Index j = 0; j < kernel.cols(); ++j)
+    {
+      Index maxi = numext::mini(j, kernel.rows());
+      Index i = 0;
+      if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
+      {
+        for(; i < maxi; ++i)
+          if(Mode&Upper) kernel.assignCoeff(i, j);
+          else           kernel.assignOppositeCoeff(i, j);
+      }
+      else
+        i = maxi;
+      
+      if(i<kernel.rows()) // then i==j
+        kernel.assignDiagonalCoeff(i++);
+      
+      if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
+      {
+        for(; i < kernel.rows(); ++i)
+          if(Mode&Lower) kernel.assignCoeff(i, j);
+          else           kernel.assignOppositeCoeff(i, j);
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+/** Assigns a triangular or selfadjoint matrix to a dense matrix.
+  * If the matrix is triangular, the opposite part is set to zero. */
+template<typename Derived>
+template<typename DenseDerived>
+void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
+{
+  other.derived().resize(this->rows(), this->cols());
+  internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
+}
+
+namespace internal {
+  
+// Triangular = Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    dst._assignProduct(src, 1, 0);
+  }
+};
+
+// Triangular += Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
+  {
+    dst._assignProduct(src, 1, 1);
+  }
+};
+
+// Triangular -= Product
+template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
+{
+  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
+  {
+    dst._assignProduct(src, -1, 1);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULARMATRIX_H
diff --git a/third-party/Eigen/src/Core/VectorBlock.h b/third-party/Eigen/src/Core/VectorBlock.h
new file mode 100644
index 00000000..d72fbf7e
--- /dev/null
+++ b/third-party/Eigen/src/Core/VectorBlock.h
@@ -0,0 +1,96 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_VECTORBLOCK_H
+#define EIGEN_VECTORBLOCK_H
+
+namespace Eigen { 
+
+namespace internal {
+template<typename VectorType, int Size>
+struct traits<VectorBlock<VectorType, Size> >
+  : public traits<Block<VectorType,
+                     traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+                     traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
+{
+};
+}
+
+/** \class VectorBlock
+  * \ingroup Core_Module
+  *
+  * \brief Expression of a fixed-size or dynamic-size sub-vector
+  *
+  * \tparam VectorType the type of the object in which we are taking a sub-vector
+  * \tparam Size size of the sub-vector we are taking at compile time (optional)
+  *
+  * This class represents an expression of either a fixed-size or dynamic-size sub-vector.
+  * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
+  * most of the time this is the only way it is used.
+  *
+  * However, if you want to directly maniputate sub-vector expressions,
+  * for instance if you want to write a function returning such an expression, you
+  * will need to use this class.
+  *
+  * Here is an example illustrating the dynamic case:
+  * \include class_VectorBlock.cpp
+  * Output: \verbinclude class_VectorBlock.out
+  *
+  * \note Even though this expression has dynamic size, in the case where \a VectorType
+  * has fixed size, this expression inherits a fixed maximal size which means that evaluating
+  * it does not cause a dynamic memory allocation.
+  *
+  * Here is an example illustrating the fixed-size case:
+  * \include class_FixedVectorBlock.cpp
+  * Output: \verbinclude class_FixedVectorBlock.out
+  *
+  * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
+  */
+template<typename VectorType, int Size> class VectorBlock
+  : public Block<VectorType,
+                     internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+                     internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1>
+{
+    typedef Block<VectorType,
+                     internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
+                     internal::traits<VectorType>::Flags & RowMajorBit ? Size : 1> Base;
+    enum {
+      IsColVector = !(internal::traits<VectorType>::Flags & RowMajorBit)
+    };
+  public:
+    EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock)
+
+    using Base::operator=;
+
+    /** Dynamic-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline VectorBlock(VectorType& vector, Index start, Index size)
+      : Base(vector,
+             IsColVector ? start : 0, IsColVector ? 0 : start,
+             IsColVector ? size  : 1, IsColVector ? 1 : size)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
+    }
+
+    /** Fixed-size constructor
+      */
+    EIGEN_DEVICE_FUNC
+    inline VectorBlock(VectorType& vector, Index start)
+      : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock);
+    }
+};
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_VECTORBLOCK_H
diff --git a/third-party/Eigen/src/Core/VectorwiseOp.h b/third-party/Eigen/src/Core/VectorwiseOp.h
new file mode 100644
index 00000000..4fe267e9
--- /dev/null
+++ b/third-party/Eigen/src/Core/VectorwiseOp.h
@@ -0,0 +1,695 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIAL_REDUX_H
+#define EIGEN_PARTIAL_REDUX_H
+
+namespace Eigen {
+
+/** \class PartialReduxExpr
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression of a partially reduxed matrix
+  *
+  * \tparam MatrixType the type of the matrix we are applying the redux operation
+  * \tparam MemberOp type of the member functor
+  * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
+  *
+  * This class represents an expression of a partial redux operator of a matrix.
+  * It is the return type of some VectorwiseOp functions,
+  * and most of the time this is the only way it is used.
+  *
+  * \sa class VectorwiseOp
+  */
+
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr;
+
+namespace internal {
+template<typename MatrixType, typename MemberOp, int Direction>
+struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
+ : traits<MatrixType>
+{
+  typedef typename MemberOp::result_type Scalar;
+  typedef typename traits<MatrixType>::StorageKind StorageKind;
+  typedef typename traits<MatrixType>::XprKind XprKind;
+  typedef typename MatrixType::Scalar InputScalar;
+  enum {
+    RowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = Direction==Vertical   ? 1 : MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
+    Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
+    TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime :  MatrixType::ColsAtCompileTime
+  };
+};
+}
+
+template< typename MatrixType, typename MemberOp, int Direction>
+class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
+                         internal::no_assignment_operator
+{
+  public:
+
+    typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
+
+    EIGEN_DEVICE_FUNC
+    explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
+      : m_matrix(mat), m_functor(func) {}
+
+    EIGEN_DEVICE_FUNC
+    Index rows() const { return (Direction==Vertical   ? 1 : m_matrix.rows()); }
+    EIGEN_DEVICE_FUNC
+    Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
+
+    EIGEN_DEVICE_FUNC
+    typename MatrixType::Nested nestedExpression() const { return m_matrix; }
+
+    EIGEN_DEVICE_FUNC
+    const MemberOp& functor() const { return m_functor; }
+
+  protected:
+    typename MatrixType::Nested m_matrix;
+    const MemberOp m_functor;
+};
+
+#define EIGEN_MEMBER_FUNCTOR(MEMBER,COST)                               \
+  template <typename ResultType>                                        \
+  struct member_##MEMBER {                                              \
+    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                            \
+    typedef ResultType result_type;                                     \
+    template<typename Scalar, int Size> struct Cost                     \
+    { enum { value = COST }; };                                         \
+    template<typename XprType>                                          \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                               \
+    ResultType operator()(const XprType& mat) const                     \
+    { return mat.MEMBER(); } \
+  }
+
+namespace internal {
+
+EIGEN_MEMBER_FUNCTOR(squaredNorm, Size * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(norm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(stableNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(blueNorm, (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size-1) * functor_traits<scalar_hypot_op<Scalar> >::Cost );
+EIGEN_MEMBER_FUNCTOR(sum, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(mean, (Size-1)*NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost);
+EIGEN_MEMBER_FUNCTOR(minCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(maxCoeff, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(all, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
+EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
+
+template <int p, typename ResultType>
+struct member_lpnorm {
+  typedef ResultType result_type;
+  template<typename Scalar, int Size> struct Cost
+  { enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
+  EIGEN_DEVICE_FUNC member_lpnorm() {}
+  template<typename XprType>
+  EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
+  { return mat.template lpNorm<p>(); }
+};
+
+template <typename BinaryOp, typename Scalar>
+struct member_redux {
+  typedef typename result_of<
+                     BinaryOp(const Scalar&,const Scalar&)
+                   >::type  result_type;
+  template<typename _Scalar, int Size> struct Cost
+  { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
+  EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
+  { return mat.redux(m_functor); }
+  const BinaryOp m_functor;
+};
+}
+
+/** \class VectorwiseOp
+  * \ingroup Core_Module
+  *
+  * \brief Pseudo expression providing partial reduction operations
+  *
+  * \tparam ExpressionType the type of the object on which to do partial reductions
+  * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
+  *
+  * This class represents a pseudo expression with partial reduction features.
+  * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
+  * and most of the time this is the only way it is used.
+  *
+  * Example: \include MatrixBase_colwise.cpp
+  * Output: \verbinclude MatrixBase_colwise.out
+  *
+  * \sa DenseBase::colwise(), DenseBase::rowwise(), class PartialReduxExpr
+  */
+template<typename ExpressionType, int Direction> class VectorwiseOp
+{
+  public:
+
+    typedef typename ExpressionType::Scalar Scalar;
+    typedef typename ExpressionType::RealScalar RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
+    typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
+
+    template<template<typename _Scalar> class Functor,
+                      typename Scalar_=Scalar> struct ReturnType
+    {
+      typedef PartialReduxExpr<ExpressionType,
+                               Functor<Scalar_>,
+                               Direction
+                              > Type;
+    };
+
+    template<typename BinaryOp> struct ReduxReturnType
+    {
+      typedef PartialReduxExpr<ExpressionType,
+                               internal::member_redux<BinaryOp,Scalar>,
+                               Direction
+                              > Type;
+    };
+
+    enum {
+      isVertical   = (Direction==Vertical) ? 1 : 0,
+      isHorizontal = (Direction==Horizontal) ? 1 : 0
+    };
+
+  protected:
+
+    typedef typename internal::conditional<isVertical,
+                               typename ExpressionType::ColXpr,
+                               typename ExpressionType::RowXpr>::type SubVector;
+    /** \internal
+      * \returns the i-th subvector according to the \c Direction */
+    EIGEN_DEVICE_FUNC
+    SubVector subVector(Index i)
+    {
+      return SubVector(m_matrix.derived(),i);
+    }
+
+    /** \internal
+      * \returns the number of subvectors in the direction \c Direction */
+    EIGEN_DEVICE_FUNC
+    Index subVectors() const
+    { return isVertical?m_matrix.cols():m_matrix.rows(); }
+
+    template<typename OtherDerived> struct ExtendedType {
+      typedef Replicate<OtherDerived,
+                        isVertical   ? 1 : ExpressionType::RowsAtCompileTime,
+                        isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
+    };
+
+    /** \internal
+      * Replicates a vector to match the size of \c *this */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    typename ExtendedType<OtherDerived>::Type
+    extendedTo(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      return typename ExtendedType<OtherDerived>::Type
+                      (other.derived(),
+                       isVertical   ? 1 : m_matrix.rows(),
+                       isHorizontal ? 1 : m_matrix.cols());
+    }
+
+    template<typename OtherDerived> struct OppositeExtendedType {
+      typedef Replicate<OtherDerived,
+                        isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
+                        isVertical   ? 1 : ExpressionType::ColsAtCompileTime> Type;
+    };
+
+    /** \internal
+      * Replicates a vector in the opposite direction to match the size of \c *this */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    typename OppositeExtendedType<OtherDerived>::Type
+    extendedToOpposite(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      return typename OppositeExtendedType<OtherDerived>::Type
+                      (other.derived(),
+                       isHorizontal  ? 1 : m_matrix.rows(),
+                       isVertical    ? 1 : m_matrix.cols());
+    }
+
+  public:
+    EIGEN_DEVICE_FUNC
+    explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
+
+    /** \internal */
+    EIGEN_DEVICE_FUNC
+    inline const ExpressionType& _expression() const { return m_matrix; }
+
+    /** \returns a row or column vector expression of \c *this reduxed by \a func
+      *
+      * The template parameter \a BinaryOp is the type of the functor
+      * of the custom redux operator. Note that func must be an associative operator.
+      *
+      * \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
+      */
+    template<typename BinaryOp>
+    EIGEN_DEVICE_FUNC
+    const typename ReduxReturnType<BinaryOp>::Type
+    redux(const BinaryOp& func = BinaryOp()) const
+    { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
+
+    typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
+    typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
+    typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
+    typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
+    typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
+    typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
+    typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
+    typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
+    typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
+    typedef typename ReturnType<internal::member_all>::Type AllReturnType;
+    typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
+    typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
+    typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
+    typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
+    typedef Reverse<ExpressionType, Direction> ReverseReturnType;
+
+    template<int p> struct LpNormReturnType {
+      typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
+    };
+
+    /** \returns a row (or column) vector expression of the smallest coefficient
+      * of each column (or row) of the referenced expression.
+      *
+      * \warning the result is undefined if \c *this contains NaN.
+      *
+      * Example: \include PartialRedux_minCoeff.cpp
+      * Output: \verbinclude PartialRedux_minCoeff.out
+      *
+      * \sa DenseBase::minCoeff() */
+    EIGEN_DEVICE_FUNC
+    const MinCoeffReturnType minCoeff() const
+    { return MinCoeffReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the largest coefficient
+      * of each column (or row) of the referenced expression.
+      *
+      * \warning the result is undefined if \c *this contains NaN.
+      *
+      * Example: \include PartialRedux_maxCoeff.cpp
+      * Output: \verbinclude PartialRedux_maxCoeff.out
+      *
+      * \sa DenseBase::maxCoeff() */
+    EIGEN_DEVICE_FUNC
+    const MaxCoeffReturnType maxCoeff() const
+    { return MaxCoeffReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the squared norm
+      * of each column (or row) of the referenced expression.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * Example: \include PartialRedux_squaredNorm.cpp
+      * Output: \verbinclude PartialRedux_squaredNorm.out
+      *
+      * \sa DenseBase::squaredNorm() */
+    EIGEN_DEVICE_FUNC
+    const SquaredNormReturnType squaredNorm() const
+    { return SquaredNormReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * Example: \include PartialRedux_norm.cpp
+      * Output: \verbinclude PartialRedux_norm.out
+      *
+      * \sa DenseBase::norm() */
+    EIGEN_DEVICE_FUNC
+    const NormReturnType norm() const
+    { return NormReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * Example: \include PartialRedux_norm.cpp
+      * Output: \verbinclude PartialRedux_norm.out
+      *
+      * \sa DenseBase::norm() */
+    template<int p>
+    EIGEN_DEVICE_FUNC
+    const typename LpNormReturnType<p>::Type lpNorm() const
+    { return typename LpNormReturnType<p>::Type(_expression()); }
+
+
+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression, using
+      * Blue's algorithm.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * \sa DenseBase::blueNorm() */
+    EIGEN_DEVICE_FUNC
+    const BlueNormReturnType blueNorm() const
+    { return BlueNormReturnType(_expression()); }
+
+
+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression, avoiding
+      * underflow and overflow.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * \sa DenseBase::stableNorm() */
+    EIGEN_DEVICE_FUNC
+    const StableNormReturnType stableNorm() const
+    { return StableNormReturnType(_expression()); }
+
+
+    /** \returns a row (or column) vector expression of the norm
+      * of each column (or row) of the referenced expression, avoiding
+      * underflow and overflow using a concatenation of hypot() calls.
+      * This is a vector with real entries, even if the original matrix has complex entries.
+      *
+      * \sa DenseBase::hypotNorm() */
+    EIGEN_DEVICE_FUNC
+    const HypotNormReturnType hypotNorm() const
+    { return HypotNormReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the sum
+      * of each column (or row) of the referenced expression.
+      *
+      * Example: \include PartialRedux_sum.cpp
+      * Output: \verbinclude PartialRedux_sum.out
+      *
+      * \sa DenseBase::sum() */
+    EIGEN_DEVICE_FUNC
+    const SumReturnType sum() const
+    { return SumReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the mean
+    * of each column (or row) of the referenced expression.
+    *
+    * \sa DenseBase::mean() */
+    EIGEN_DEVICE_FUNC
+    const MeanReturnType mean() const
+    { return MeanReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression representing
+      * whether \b all coefficients of each respective column (or row) are \c true.
+      * This expression can be assigned to a vector with entries of type \c bool.
+      *
+      * \sa DenseBase::all() */
+    EIGEN_DEVICE_FUNC
+    const AllReturnType all() const
+    { return AllReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression representing
+      * whether \b at \b least one coefficient of each respective column (or row) is \c true.
+      * This expression can be assigned to a vector with entries of type \c bool.
+      *
+      * \sa DenseBase::any() */
+    EIGEN_DEVICE_FUNC
+    const AnyReturnType any() const
+    { return AnyReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression representing
+      * the number of \c true coefficients of each respective column (or row).
+      * This expression can be assigned to a vector whose entries have the same type as is used to
+      * index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
+      *
+      * Example: \include PartialRedux_count.cpp
+      * Output: \verbinclude PartialRedux_count.out
+      *
+      * \sa DenseBase::count() */
+    EIGEN_DEVICE_FUNC
+    const CountReturnType count() const
+    { return CountReturnType(_expression()); }
+
+    /** \returns a row (or column) vector expression of the product
+      * of each column (or row) of the referenced expression.
+      *
+      * Example: \include PartialRedux_prod.cpp
+      * Output: \verbinclude PartialRedux_prod.out
+      *
+      * \sa DenseBase::prod() */
+    EIGEN_DEVICE_FUNC
+    const ProdReturnType prod() const
+    { return ProdReturnType(_expression()); }
+
+
+    /** \returns a matrix expression
+      * where each column (or row) are reversed.
+      *
+      * Example: \include Vectorwise_reverse.cpp
+      * Output: \verbinclude Vectorwise_reverse.out
+      *
+      * \sa DenseBase::reverse() */
+    EIGEN_DEVICE_FUNC
+    const ConstReverseReturnType reverse() const
+    { return ConstReverseReturnType( _expression() ); }
+
+    /** \returns a writable matrix expression
+      * where each column (or row) are reversed.
+      *
+      * \sa reverse() const */
+    EIGEN_DEVICE_FUNC
+    ReverseReturnType reverse()
+    { return ReverseReturnType( _expression() ); }
+
+    typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
+    EIGEN_DEVICE_FUNC
+    const ReplicateReturnType replicate(Index factor) const;
+
+    /**
+      * \return an expression of the replication of each column (or row) of \c *this
+      *
+      * Example: \include DirectionWise_replicate.cpp
+      * Output: \verbinclude DirectionWise_replicate.out
+      *
+      * \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
+      */
+    // NOTE implemented here because of sunstudio's compilation errors
+    // isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator
+    template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
+    EIGEN_DEVICE_FUNC
+    replicate(Index factor = Factor) const
+    {
+      return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
+          (_expression(),isVertical?factor:1,isHorizontal?factor:1);
+    }
+
+/////////// Artithmetic operators ///////////
+
+    /** Copies the vector \a other to each subvector of \c *this */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    ExpressionType& operator=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      //eigen_assert((m_matrix.isNull()) == (other.isNull())); FIXME
+      return const_cast<ExpressionType&>(m_matrix = extendedTo(other.derived()));
+    }
+
+    /** Adds the vector \a other to each subvector of \c *this */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return const_cast<ExpressionType&>(m_matrix += extendedTo(other.derived()));
+    }
+
+    /** Substracts the vector \a other to each subvector of \c *this */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return const_cast<ExpressionType&>(m_matrix -= extendedTo(other.derived()));
+    }
+
+    /** Multiples each subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      m_matrix *= extendedTo(other.derived());
+      return const_cast<ExpressionType&>(m_matrix);
+    }
+
+    /** Divides each subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      m_matrix /= extendedTo(other.derived());
+      return const_cast<ExpressionType&>(m_matrix);
+    }
+
+    /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
+    template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+    CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    operator+(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix + extendedTo(other.derived());
+    }
+
+    /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    operator-(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix - extendedTo(other.derived());
+    }
+
+    /** Returns the expression where each subvector is the product of the vector \a other
+      * by the corresponding subvector of \c *this */
+    template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+    CwiseBinaryOp<internal::scalar_product_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    EIGEN_DEVICE_FUNC
+    operator*(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix * extendedTo(other.derived());
+    }
+
+    /** Returns the expression where each subvector is the quotient of the corresponding
+      * subvector of \c *this by the vector \a other */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename ExtendedType<OtherDerived>::Type>
+    operator/(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+      EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
+      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
+      return m_matrix / extendedTo(other.derived());
+    }
+
+    /** \returns an expression where each column (or row) of the referenced matrix are normalized.
+      * The referenced matrix is \b not modified.
+      * \sa MatrixBase::normalized(), normalize()
+      */
+    EIGEN_DEVICE_FUNC
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+
+
+    /** Normalize in-place each row or columns of the referenced matrix.
+      * \sa MatrixBase::normalize(), normalized()
+      */
+    EIGEN_DEVICE_FUNC void normalize() {
+      m_matrix = this->normalized();
+    }
+
+    EIGEN_DEVICE_FUNC inline void reverseInPlace();
+
+/////////// Geometry module ///////////
+
+    typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
+    HomogeneousReturnType homogeneous() const;
+
+    typedef typename ExpressionType::PlainObject CrossReturnType;
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
+
+    enum {
+      HNormalized_Size = Direction==Vertical ? internal::traits<ExpressionType>::RowsAtCompileTime
+                                             : internal::traits<ExpressionType>::ColsAtCompileTime,
+      HNormalized_SizeMinusOne = HNormalized_Size==Dynamic ? Dynamic : HNormalized_Size-1
+    };
+    typedef Block<const ExpressionType,
+                  Direction==Vertical   ? int(HNormalized_SizeMinusOne)
+                                        : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+                  Direction==Horizontal ? int(HNormalized_SizeMinusOne)
+                                        : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+            HNormalized_Block;
+    typedef Block<const ExpressionType,
+                  Direction==Vertical   ? 1 : int(internal::traits<ExpressionType>::RowsAtCompileTime),
+                  Direction==Horizontal ? 1 : int(internal::traits<ExpressionType>::ColsAtCompileTime)>
+            HNormalized_Factors;
+    typedef CwiseBinaryOp<internal::scalar_quotient_op<typename internal::traits<ExpressionType>::Scalar>,
+                const HNormalized_Block,
+                const Replicate<HNormalized_Factors,
+                  Direction==Vertical   ? HNormalized_SizeMinusOne : 1,
+                  Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
+            HNormalizedReturnType;
+
+    EIGEN_DEVICE_FUNC
+    const HNormalizedReturnType hnormalized() const;
+
+  protected:
+    ExpressionTypeNested m_matrix;
+};
+
+//const colwise moved to DenseBase.h due to CUDA compiler bug
+
+
+/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
+  *
+  * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  */
+template<typename Derived>
+inline typename DenseBase<Derived>::ColwiseReturnType
+DenseBase<Derived>::colwise()
+{
+  return ColwiseReturnType(derived());
+}
+
+//const rowwise moved to DenseBase.h due to CUDA compiler bug
+
+
+/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
+  *
+  * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+  */
+template<typename Derived>
+inline typename DenseBase<Derived>::RowwiseReturnType
+DenseBase<Derived>::rowwise()
+{
+  return RowwiseReturnType(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIAL_REDUX_H
diff --git a/third-party/Eigen/src/Core/Visitor.h b/third-party/Eigen/src/Core/Visitor.h
new file mode 100644
index 00000000..54c1883d
--- /dev/null
+++ b/third-party/Eigen/src/Core/Visitor.h
@@ -0,0 +1,273 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_VISITOR_H
+#define EIGEN_VISITOR_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Visitor, typename Derived, int UnrollCount>
+struct visitor_impl
+{
+  enum {
+    col = (UnrollCount-1) / Derived::RowsAtCompileTime,
+    row = (UnrollCount-1) % Derived::RowsAtCompileTime
+  };
+
+  EIGEN_DEVICE_FUNC
+  static inline void run(const Derived &mat, Visitor& visitor)
+  {
+    visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
+    visitor(mat.coeff(row, col), row, col);
+  }
+};
+
+template<typename Visitor, typename Derived>
+struct visitor_impl<Visitor, Derived, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const Derived &mat, Visitor& visitor)
+  {
+    return visitor.init(mat.coeff(0, 0), 0, 0);
+  }
+};
+
+template<typename Visitor, typename Derived>
+struct visitor_impl<Visitor, Derived, Dynamic>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const Derived& mat, Visitor& visitor)
+  {
+    visitor.init(mat.coeff(0,0), 0, 0);
+    for(Index i = 1; i < mat.rows(); ++i)
+      visitor(mat.coeff(i, 0), i, 0);
+    for(Index j = 1; j < mat.cols(); ++j)
+      for(Index i = 0; i < mat.rows(); ++i)
+        visitor(mat.coeff(i, j), i, j);
+  }
+};
+
+// evaluator adaptor
+template<typename XprType>
+class visitor_evaluator
+{
+public:
+  EIGEN_DEVICE_FUNC
+  explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
+  
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  
+  enum {
+    RowsAtCompileTime = XprType::RowsAtCompileTime,
+    CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
+  };
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
+
+  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
+  { return m_evaluator.coeff(row, col); }
+  
+protected:
+  internal::evaluator<XprType> m_evaluator;
+  const XprType &m_xpr;
+};
+} // end namespace internal
+
+/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
+  *
+  * The template parameter \a Visitor is the type of the visitor and provides the following interface:
+  * \code
+  * struct MyVisitor {
+  *   // called for the first coefficient
+  *   void init(const Scalar& value, Index i, Index j);
+  *   // called for all other coefficients
+  *   void operator() (const Scalar& value, Index i, Index j);
+  * };
+  * \endcode
+  *
+  * \note compared to one or two \em for \em loops, visitors offer automatic
+  * unrolling for small fixed size matrix.
+  *
+  * \sa minCoeff(Index*,Index*), maxCoeff(Index*,Index*), DenseBase::redux()
+  */
+template<typename Derived>
+template<typename Visitor>
+EIGEN_DEVICE_FUNC
+void DenseBase<Derived>::visit(Visitor& visitor) const
+{
+  typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
+  ThisEvaluator thisEval(derived());
+  
+  enum {
+    unroll =  SizeAtCompileTime != Dynamic
+           && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
+  };
+  return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
+}
+
+namespace internal {
+
+/** \internal
+  * \brief Base class to implement min and max visitors
+  */
+template <typename Derived>
+struct coeff_visitor
+{
+  typedef typename Derived::Scalar Scalar;
+  Index row, col;
+  Scalar res;
+  EIGEN_DEVICE_FUNC
+  inline void init(const Scalar& value, Index i, Index j)
+  {
+    res = value;
+    row = i;
+    col = j;
+  }
+};
+
+/** \internal
+  * \brief Visitor computing the min coefficient with its value and coordinates
+  *
+  * \sa DenseBase::minCoeff(Index*, Index*)
+  */
+template <typename Derived>
+struct min_coeff_visitor : coeff_visitor<Derived>
+{
+  typedef typename Derived::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
+  void operator() (const Scalar& value, Index i, Index j)
+  {
+    if(value < this->res)
+    {
+      this->res = value;
+      this->row = i;
+      this->col = j;
+    }
+  }
+};
+
+template<typename Scalar>
+struct functor_traits<min_coeff_visitor<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost
+  };
+};
+
+/** \internal
+  * \brief Visitor computing the max coefficient with its value and coordinates
+  *
+  * \sa DenseBase::maxCoeff(Index*, Index*)
+  */
+template <typename Derived>
+struct max_coeff_visitor : coeff_visitor<Derived>
+{
+  typedef typename Derived::Scalar Scalar; 
+  EIGEN_DEVICE_FUNC
+  void operator() (const Scalar& value, Index i, Index j)
+  {
+    if(value > this->res)
+    {
+      this->res = value;
+      this->row = i;
+      this->col = j;
+    }
+  }
+};
+
+template<typename Scalar>
+struct functor_traits<max_coeff_visitor<Scalar> > {
+  enum {
+    Cost = NumTraits<Scalar>::AddCost
+  };
+};
+
+} // end namespace internal
+
+/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
+  * \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN.
+  *
+  * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
+  */
+template<typename Derived>
+template<typename IndexType>
+EIGEN_DEVICE_FUNC
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
+{
+  internal::min_coeff_visitor<Derived> minVisitor;
+  this->visit(minVisitor);
+  *rowId = minVisitor.row;
+  if (colId) *colId = minVisitor.col;
+  return minVisitor.res;
+}
+
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN. 
+  *
+  * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
+  */
+template<typename Derived>
+template<typename IndexType>
+EIGEN_DEVICE_FUNC
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::minCoeff(IndexType* index) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  internal::min_coeff_visitor<Derived> minVisitor;
+  this->visit(minVisitor);
+  *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
+  return minVisitor.res;
+}
+
+/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
+  * \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN. 
+  *
+  * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
+  */
+template<typename Derived>
+template<typename IndexType>
+EIGEN_DEVICE_FUNC
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
+{
+  internal::max_coeff_visitor<Derived> maxVisitor;
+  this->visit(maxVisitor);
+  *rowPtr = maxVisitor.row;
+  if (colPtr) *colPtr = maxVisitor.col;
+  return maxVisitor.res;
+}
+
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN.
+  *
+  * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
+  */
+template<typename Derived>
+template<typename IndexType>
+EIGEN_DEVICE_FUNC
+typename internal::traits<Derived>::Scalar
+DenseBase<Derived>::maxCoeff(IndexType* index) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  internal::max_coeff_visitor<Derived> maxVisitor;
+  this->visit(maxVisitor);
+  *index = (RowsAtCompileTime==1) ? maxVisitor.col : maxVisitor.row;
+  return maxVisitor.res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_VISITOR_H
diff --git a/third-party/Eigen/src/Core/arch/AVX/Complex.h b/third-party/Eigen/src/Core/arch/AVX/Complex.h
new file mode 100644
index 00000000..7fa61969
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX/Complex.h
@@ -0,0 +1,451 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_AVX_H
+#define EIGEN_COMPLEX_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- float ----------
+struct Packet4cf
+{
+  EIGEN_STRONG_INLINE Packet4cf() {}
+  EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
+  __m256  v;
+};
+
+template<> struct packet_traits<std::complex<float> >  : default_packet_traits
+{
+  typedef Packet4cf type;
+  typedef Packet2cf half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 4,
+    HasHalfPacket = 1,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
+{
+  return Packet4cf(pnegate(a.v));
+}
+template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
+{
+  const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
+  return Packet4cf(_mm256_xor_ps(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+  __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
+  __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
+  __m256 result = _mm256_addsub_ps(tmp1, tmp2);
+  return Packet4cf(result);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pand   <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf por    <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pxor   <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
+
+
+template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
+{
+  return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
+{
+  // FIXME The following might be optimized using _mm256_movedup_pd
+  Packet2cf a = ploaddup<Packet2cf>(from);
+  Packet2cf b = ploaddup<Packet2cf>(from+1);
+  return  Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
+}
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
+{
+  return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
+                                 std::imag(from[2*stride]), std::real(from[2*stride]),
+                                 std::imag(from[1*stride]), std::real(from[1*stride]),
+                                 std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
+{
+  __m128 low = _mm256_extractf128_ps(from.v, 0);
+  to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
+  to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
+
+  __m128 high = _mm256_extractf128_ps(from.v, 1);
+  to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
+  to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
+
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet4cf>(const Packet4cf& a)
+{
+  return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
+  __m128 low  = _mm256_extractf128_ps(a.v, 0);
+  __m128 high = _mm256_extractf128_ps(a.v, 1);
+  __m128d lowd  = _mm_castps_pd(low);
+  __m128d highd = _mm_castps_pd(high);
+  low  = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
+  high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
+  __m256 result = _mm256_setzero_ps();
+  result = _mm256_insertf128_ps(result, low, 1);
+  result = _mm256_insertf128_ps(result, high, 0);
+  return Packet4cf(result);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
+{
+  return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
+                     Packet2cf(_mm256_extractf128_ps(a.v,1))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
+{
+  Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
+  Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
+  t0 = _mm256_hadd_ps(t0,t1);
+  Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
+  Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
+  t2 = _mm256_hadd_ps(t2,t3);
+  
+  t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
+  t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
+
+  return Packet4cf(_mm256_add_ps(t1,t3));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
+{
+  return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
+                         Packet2cf(_mm256_extractf128_ps(a.v, 1))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4cf>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
+  {
+    if (Offset==0) return;
+    palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
+  }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
+{
+  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
+{
+  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
+{
+  EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
+
+template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
+{
+  Packet4cf num = pmul(a, pconj(b));
+  __m256 tmp = _mm256_mul_ps(b.v, b.v);
+  __m256 tmp2    = _mm256_shuffle_ps(tmp,tmp,0xB1);
+  __m256 denom = _mm256_add_ps(tmp, tmp2);
+  return Packet4cf(_mm256_div_ps(num.v, denom));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
+{
+  return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
+}
+
+//---------- double ----------
+struct Packet2cd
+{
+  EIGEN_STRONG_INLINE Packet2cd() {}
+  EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
+  __m256d  v;
+};
+
+template<> struct packet_traits<std::complex<double> >  : default_packet_traits
+{
+  typedef Packet2cd type;
+  typedef Packet1cd half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 0,
+    size = 2,
+    HasHalfPacket = 1,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
+{
+  const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
+  return Packet2cd(_mm256_xor_pd(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+  __m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
+  __m256d even = _mm256_mul_pd(tmp1, b.v);
+  __m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
+  __m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
+  __m256d odd  = _mm256_mul_pd(tmp2, tmp3);
+  return Packet2cd(_mm256_addsub_pd(even, odd));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pand   <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd por    <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pxor   <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
+{
+  // in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
+//   return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
+    return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
+{
+  return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
+				 std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
+{
+  __m128d low = _mm256_extractf128_pd(from.v, 0);
+  to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
+  __m128d high = _mm256_extractf128_pd(from.v, 1);
+  to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
+{
+  __m128d low = _mm256_extractf128_pd(a.v, 0);
+  EIGEN_ALIGN16 double res[2];
+  _mm_store_pd(res, low);
+  return std::complex<double>(res[0],res[1]);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
+  __m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
+  return Packet2cd(result);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
+{
+  return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+                     Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
+{
+  Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
+  Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
+
+  return Packet2cd(_mm256_add_pd(t0,t1));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
+{
+  return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
+                     Packet1cd(_mm256_extractf128_pd(a.v,1))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cd>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
+  {
+    if (Offset==0) return;
+    palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
+  }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
+{
+  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
+{
+  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
+{
+  EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
+
+template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
+{
+  Packet2cd num = pmul(a, pconj(b));
+  __m256d tmp = _mm256_mul_pd(b.v, b.v);
+  __m256d denom = _mm256_hadd_pd(tmp, tmp);
+  return Packet2cd(_mm256_div_pd(num.v, denom));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
+{
+  return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4cf,4>& kernel) {
+  __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
+  __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
+  __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
+  __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
+
+  __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
+  __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
+  __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
+  __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
+
+  kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
+  kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
+  kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
+  kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cd,2>& kernel) {
+  __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
+  kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
+ kernel.packet[0].v = tmp;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
+{
+  return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
+{
+  return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
+{
+  return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
+{
+  return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_AVX_H
diff --git a/third-party/Eigen/src/Core/arch/AVX/MathFunctions.h b/third-party/Eigen/src/Core/arch/AVX/MathFunctions.h
new file mode 100644
index 00000000..6af67ce2
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -0,0 +1,439 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
+#define EIGEN_MATH_FUNCTIONS_AVX_H
+
+/* The sin, cos, exp, and log functions of this file are loosely derived from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+namespace Eigen {
+
+namespace internal {
+
+inline Packet8i pshiftleft(Packet8i v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_slli_epi32(v, n);
+#else
+  __m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
+  __m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
+#endif
+}
+
+inline Packet8f pshiftright(Packet8f v, int n)
+{
+#ifdef EIGEN_VECTORIZE_AVX2
+  return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
+#else
+  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
+  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
+  return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
+#endif
+}
+
+// Sine function
+// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
+// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
+// are (anti-)symmetric and thus have only odd/even coefficients
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psin<Packet8f>(const Packet8f& _x) {
+  Packet8f x = _x;
+
+  // Some useful values.
+  _EIGEN_DECLARE_CONST_Packet8i(one, 1);
+  _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+  _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
+  _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
+  _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
+  _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
+
+  // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
+  Packet8f z = pmul(x, p8f_one_over_pi);
+  Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
+  x = pmadd(shift, p8f_neg_pi_first, x);
+  x = pmadd(shift, p8f_neg_pi_second, x);
+  x = pmadd(shift, p8f_neg_pi_third, x);
+  z = pmul(x, p8f_four_over_pi);
+
+  // Make a mask for the entries that need flipping, i.e. wherever the shift
+  // is odd.
+  Packet8i shift_ints = _mm256_cvtps_epi32(shift);
+  Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
+  Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
+
+  // Create a mask for which interpolant to use, i.e. if z > 1, then the mask
+  // is set to ones for that entry.
+  Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
+
+  // Evaluate the polynomial for the interval [1,3] in z.
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
+  Packet8f z_minus_two = psub(z, p8f_two);
+  Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
+  Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
+  right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
+  right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
+
+  // Evaluate the polynomial for the interval [-1,1] in z.
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
+  Packet8f z2 = pmul(z, z);
+  Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
+  left = pmadd(left, z2, p8f_coeff_left_3);
+  left = pmadd(left, z2, p8f_coeff_left_1);
+  left = pmul(left, z);
+
+  // Assemble the results, i.e. select the left and right polynomials.
+  left = _mm256_andnot_ps(ival_mask, left);
+  right = _mm256_and_ps(ival_mask, right);
+  Packet8f res = _mm256_or_ps(left, right);
+
+  // Flip the sign on the odd intervals and return the result.
+  res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
+  return res;
+}
+
+// Natural logarithm
+// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
+// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
+// be easily approximated by a polynomial centered on m=1 for stability.
+// TODO(gonnet): Further reduce the interval allowing for lower-degree
+//               polynomial interpolants -> ... -> profit!
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+plog<Packet8f>(const Packet8f& _x) {
+  Packet8f x = _x;
+  _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+  _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
+
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+  // The smallest non denormalized float number.
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
+
+  // Polynomial coefficients.
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
+
+  Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
+  Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
+
+  // Truncate input values to the minimum positive normal.
+  x = pmax(x, p8f_min_norm_pos);
+
+  Packet8f emm0 = pshiftright(x,23);
+  Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
+
+  // Set the exponents to -1, i.e. x are in the range [0.5,1).
+  x = _mm256_and_ps(x, p8f_inv_mant_mask);
+  x = _mm256_or_ps(x, p8f_half);
+
+  // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
+  // and shift by -1. The values are then centered around 0, which improves
+  // the stability of the polynomial evaluation.
+  //   if( x < SQRTHF ) {
+  //     e -= 1;
+  //     x = x + x - 1.0;
+  //   } else { x = x - 1.0; }
+  Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
+  Packet8f tmp = _mm256_and_ps(x, mask);
+  x = psub(x, p8f_1);
+  e = psub(e, _mm256_and_ps(p8f_1, mask));
+  x = padd(x, tmp);
+
+  Packet8f x2 = pmul(x, x);
+  Packet8f x3 = pmul(x2, x);
+
+  // Evaluate the polynomial approximant of degree 8 in three parts, probably
+  // to improve instruction-level parallelism.
+  Packet8f y, y1, y2;
+  y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
+  y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
+  y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
+  y = pmadd(y, x, p8f_cephes_log_p2);
+  y1 = pmadd(y1, x, p8f_cephes_log_p5);
+  y2 = pmadd(y2, x, p8f_cephes_log_p8);
+  y = pmadd(y, x3, y1);
+  y = pmadd(y, x3, y2);
+  y = pmul(y, x3);
+
+  // Add the logarithm of the exponent back to the result of the interpolation.
+  y1 = pmul(e, p8f_cephes_log_q1);
+  tmp = pmul(x2, p8f_half);
+  y = padd(y, y1);
+  x = psub(x, tmp);
+  y2 = pmul(e, p8f_cephes_log_q2);
+  x = padd(x, y);
+  x = padd(x, y2);
+
+  // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
+  return _mm256_or_ps(
+      _mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
+      _mm256_and_ps(iszero_mask, p8f_minus_inf));
+}
+
+// Exponential function. Works by writing "x = m*log(2) + r" where
+// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
+// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+pexp<Packet8f>(const Packet8f& _x) {
+  _EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
+  _EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
+
+  _EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
+  _EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
+
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
+
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
+
+  // Clamp x.
+  Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
+
+  // Express exp(x) as exp(m*ln(2) + r), start by extracting
+  // m = floor(x/ln(2) + 0.5).
+  Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
+
+// Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
+// subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
+// truncation errors. Note that we don't use the "pmadd" function here to
+// ensure that a precision-preserving FMA instruction is used.
+#ifdef EIGEN_VECTORIZE_FMA
+  _EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
+  Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
+#else
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
+  _EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
+  Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
+  r = psub(r, pmul(m, p8f_cephes_exp_C2));
+#endif
+
+  Packet8f r2 = pmul(r, r);
+
+  // TODO(gonnet): Split into odd/even polynomials and try to exploit
+  //               instruction-level parallelism.
+  Packet8f y = p8f_cephes_exp_p0;
+  y = pmadd(y, r, p8f_cephes_exp_p1);
+  y = pmadd(y, r, p8f_cephes_exp_p2);
+  y = pmadd(y, r, p8f_cephes_exp_p3);
+  y = pmadd(y, r, p8f_cephes_exp_p4);
+  y = pmadd(y, r, p8f_cephes_exp_p5);
+  y = pmadd(y, r2, r);
+  y = padd(y, p8f_1);
+
+  // Build emm0 = 2^m.
+  Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
+  emm0 = pshiftleft(emm0, 23);
+
+  // Return 2^m * exp(r).
+  return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
+}
+
+// Hyperbolic Tangent function.
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+ptanh<Packet8f>(const Packet8f& x) {
+  return internal::generic_fast_tanh_float(x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
+pexp<Packet4d>(const Packet4d& _x) {
+  Packet4d x = _x;
+
+  _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
+  _EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
+  _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
+
+  _EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
+  _EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
+
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
+
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
+
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
+  _EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
+  _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+
+  Packet4d tmp, fx;
+
+  // clamp x
+  x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
+  // Express exp(x) as exp(g + n*log(2)).
+  fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
+
+  // Get the integer modulus of log(2), i.e. the "n" described above.
+  fx = _mm256_floor_pd(fx);
+
+  // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
+  // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
+  // digits right.
+  tmp = pmul(fx, p4d_cephes_exp_C1);
+  Packet4d z = pmul(fx, p4d_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  Packet4d x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial of the rational interpolant.
+  Packet4d px = p4d_cephes_exp_p0;
+  px = pmadd(px, x2, p4d_cephes_exp_p1);
+  px = pmadd(px, x2, p4d_cephes_exp_p2);
+  px = pmul(px, x);
+
+  // Evaluate the denominator polynomial of the rational interpolant.
+  Packet4d qx = p4d_cephes_exp_q0;
+  qx = pmadd(qx, x2, p4d_cephes_exp_q1);
+  qx = pmadd(qx, x2, p4d_cephes_exp_q2);
+  qx = pmadd(qx, x2, p4d_cephes_exp_q3);
+
+  // I don't really get this bit, copied from the SSE2 routines, so...
+  // TODO(gonnet): Figure out what is going on here, perhaps find a better
+  // rational interpolant?
+  x = _mm256_div_pd(px, psub(qx, px));
+  x = pmadd(p4d_2, x, p4d_1);
+
+  // Build e=2^n by constructing the exponents in a 128-bit vector and
+  // shifting them to where they belong in double-precision values.
+  __m128i emm0 = _mm256_cvtpd_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, p4i_1023);
+  emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
+  __m128i lo = _mm_slli_epi64(emm0, 52);
+  __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
+  __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
+  e = _mm256_insertf128_si256(e, hi, 1);
+
+  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
+  // non-finite values in the input.
+  return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
+}
+
+// Functions for sqrt.
+// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
+// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
+// exact solution. It does not handle +inf, or denormalized numbers correctly.
+// The main advantage of this approach is not just speed, but also the fact that
+// it can be inlined and pipelined with other computations, further reducing its
+// effective latency. This is similar to Quake3's fast inverse square root.
+// For detail see here: http://www.beyond3d.com/content/articles/8/
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
+psqrt<Packet8f>(const Packet8f& _x) {
+  Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
+  Packet8f denormal_mask = _mm256_and_ps(
+      _mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
+                    _CMP_LT_OQ),
+      _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
+
+  // Compute approximate reciprocal sqrt.
+  Packet8f x = _mm256_rsqrt_ps(_x);
+  // Do a single step of Newton's iteration.
+  x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
+  // Flush results for denormals to zero.
+  return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
+}
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f psqrt<Packet8f>(const Packet8f& x) {
+  return _mm256_sqrt_ps(x);
+}
+#endif
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d psqrt<Packet4d>(const Packet4d& x) {
+  return _mm256_sqrt_pd(x);
+}
+#if EIGEN_FAST_MATH
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
+  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
+  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
+
+  Packet8f neg_half = pmul(_x, p8f_minus_half);
+
+  // select only the inverse sqrt of positive normal inputs (denormals are
+  // flushed to zero and cause infs as well).
+  Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
+  Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
+
+  // Fill in NaNs and Infs for the negative/zero entries.
+  Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
+  Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
+  Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
+                                        _mm256_and_ps(zero_mask, p8f_inf));
+
+  // Do a single step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
+
+  // Insert NaNs and Infs in all the right places.
+  return _mm256_or_ps(x, infs_and_nans);
+}
+
+#else
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet8f prsqrt<Packet8f>(const Packet8f& x) {
+  _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
+  return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
+}
+#endif
+
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4d prsqrt<Packet4d>(const Packet4d& x) {
+  _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
+  return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
+}
+
+
+}  // end namespace internal
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_MATH_FUNCTIONS_AVX_H
diff --git a/third-party/Eigen/src/Core/arch/AVX/PacketMath.h b/third-party/Eigen/src/Core/arch/AVX/PacketMath.h
new file mode 100644
index 00000000..923a124b
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -0,0 +1,637 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_AVX_H
+#define EIGEN_PACKET_MATH_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+
+typedef __m256  Packet8f;
+typedef __m256i Packet8i;
+typedef __m256d Packet4d;
+
+template<> struct is_arithmetic<__m256>  { enum { value = true }; };
+template<> struct is_arithmetic<__m256i> { enum { value = true }; };
+template<> struct is_arithmetic<__m256d> { enum { value = true }; };
+
+#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
+  const Packet8f p8f_##NAME = pset1<Packet8f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
+  const Packet4d p4d_##NAME = pset1<Packet4d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
+  const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
+  const Packet8i p8i_##NAME = pset1<Packet8i>(X)
+
+// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
+// to leverage AVX512 instructions.
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> struct packet_traits<float>  : default_packet_traits
+{
+  typedef Packet8f type;
+  typedef Packet4f half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=8,
+    HasHalfPacket = 1,
+
+    HasDiv  = 1,
+    HasSin  = EIGEN_FAST_MATH,
+    HasCos  = 0,
+    HasLog  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasTanh  = EIGEN_FAST_MATH,
+    HasBlend = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+  };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef Packet4d type;
+  typedef Packet2d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket = 1,
+
+    HasDiv  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasBlend = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+  };
+};
+#endif
+
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+
+/* Proper support for integers is only provided by AVX2. In the meantime, we'll
+   use SSE instructions and packets to deal with integers.
+template<> struct packet_traits<int>    : default_packet_traits
+{
+  typedef Packet8i type;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=8
+  };
+};
+*/
+
+template<> struct unpacket_traits<Packet8f> { typedef float  type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int    type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+
+template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float&  from) { return _mm256_set1_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int&    from) { return _mm256_set1_epi32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float*  from) { return _mm256_broadcast_ss(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
+
+template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
+template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
+
+template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
+{
+  return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
+{
+  return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
+
+
+template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by AVX");
+  return pset1<Packet8i>(0);
+}
+
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+  // Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
+  //  and even register spilling with clang>=6.0 (bug 1637).
+  // Gcc stupidly generates a vfmadd132ps instruction.
+  // So let's enforce it to generate a vfmadd231ps instruction since the most common use
+  //  case is to accumulate the result of the product.
+  Packet8f res = c;
+  __asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+  return res;
+#else
+  return _mm256_fmadd_ps(a,b,c);
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
+#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+  // see above
+  Packet4d res = c;
+  __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
+  return res;
+#else
+  return _mm256_fmadd_pd(a,b,c);
+#endif
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
+template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float*   from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double*  from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int*     from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
+
+template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
+
+// Loads 4 floats from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3, a3}
+template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
+{
+  // TODO try to find a way to avoid the need of a temporary register
+//   Packet8f tmp  = _mm256_castps128_ps256(_mm_loadu_ps(from));
+//   tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
+//   return _mm256_unpacklo_ps(tmp,tmp);
+  
+  // _mm256_insertf128_ps is very slow on Haswell, thus:
+  Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
+  // mimic an "inplace" permutation of the lower 128bits using a blend
+  tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
+  // then we can perform a consistent permutation on the global register to get everything in shape:
+  return  _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
+}
+// Loads 2 doubles from memory a returns the packet {a0, a0  a1, a1}
+template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
+{
+  Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+  return  _mm256_permute_pd(tmp, 3<<2);
+}
+
+// Loads 2 floats from memory a returns the packet {a0, a0  a0, a0, a1, a1, a1, a1}
+template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
+{
+  Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
+  return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
+
+// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
+// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
+template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
+{
+  return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
+                       from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
+{
+  return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
+{
+  __m128 low = _mm256_extractf128_ps(from, 0);
+  to[stride*0] = _mm_cvtss_f32(low);
+  to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
+  to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
+  to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
+
+  __m128 high = _mm256_extractf128_ps(from, 1);
+  to[stride*4] = _mm_cvtss_f32(high);
+  to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
+  to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
+  to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
+{
+  __m128d low = _mm256_extractf128_pd(from, 0);
+  to[stride*0] = _mm_cvtsd_f64(low);
+  to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
+  __m128d high = _mm256_extractf128_pd(from, 1);
+  to[stride*2] = _mm_cvtsd_f64(high);
+  to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
+}
+
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
+{
+  Packet8f pa = pset1<Packet8f>(a);
+  pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
+{
+  Packet4d pa = pset1<Packet4d>(a);
+  pstore(to, pa);
+}
+template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
+{
+  Packet8i pa = pset1<Packet8i>(a);
+  pstore(to, pa);
+}
+
+#ifndef EIGEN_VECTORIZE_AVX512
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+#endif
+
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet8f>(const Packet8f& a) {
+  return _mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
+template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
+  return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
+}
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet8i>(const Packet8i& a) {
+  return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
+
+
+template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
+{
+  __m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
+  return _mm256_permute2f128_ps(tmp, tmp, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
+{
+   __m256d tmp = _mm256_shuffle_pd(a,a,5);
+  return _mm256_permute2f128_pd(tmp, tmp, 1);
+  #if 0
+  // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
+  // exhibit the same latency/throughput, but it is here for future reference/benchmarking...
+  __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
+    return _mm256_permute_pd(swap_halves,5);
+  #endif
+}
+
+// pabs should be ok
+template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
+{
+  const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+  return _mm256_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
+{
+  const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+  return _mm256_and_pd(a,mask);
+}
+
+// preduxp should be ok
+// FIXME: why is this ok? why isn't the simply implementation working as expected?
+template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
+{
+    __m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
+    __m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
+    __m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
+    __m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
+
+    __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+    __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+    __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+    __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+    __m256 perm1 =  _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+    __m256 perm2 =  _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+    __m256 perm3 =  _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+    __m256 perm4 =  _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+    __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+    __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+    __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+    __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+
+    __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+    __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+    __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+    return final;
+}
+template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
+{
+ Packet4d tmp0, tmp1;
+
+  tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
+  tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+  tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
+  tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+  return _mm256_blend_pd(tmp0, tmp1, 0xC);
+}
+
+template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
+{
+  return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
+}
+template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
+{
+  return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
+{
+  return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
+{
+  Packet8f tmp;
+  tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
+  tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+  return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
+{
+  Packet4d tmp;
+  tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
+  return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
+{
+  Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
+  tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+  return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
+{
+  Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
+  return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
+{
+  Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
+  tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
+  return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
+}
+
+template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
+{
+  Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
+  return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
+}
+
+
+template<int Offset>
+struct palign_impl<Offset,Packet8f>
+{
+  static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
+  {
+    if (Offset==1)
+    {
+      first = _mm256_blend_ps(first, second, 1);
+      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+      first = _mm256_blend_ps(tmp1, tmp2, 0x88);
+    }
+    else if (Offset==2)
+    {
+      first = _mm256_blend_ps(first, second, 3);
+      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+      first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
+    }
+    else if (Offset==3)
+    {
+      first = _mm256_blend_ps(first, second, 7);
+      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+      first = _mm256_blend_ps(tmp1, tmp2, 0xee);
+    }
+    else if (Offset==4)
+    {
+      first = _mm256_blend_ps(first, second, 15);
+      Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
+      Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
+      first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
+    }
+    else if (Offset==5)
+    {
+      first = _mm256_blend_ps(first, second, 31);
+      first = _mm256_permute2f128_ps(first, first, 1);
+      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
+      first = _mm256_permute2f128_ps(tmp, tmp, 1);
+      first = _mm256_blend_ps(tmp, first, 0x88);
+    }
+    else if (Offset==6)
+    {
+      first = _mm256_blend_ps(first, second, 63);
+      first = _mm256_permute2f128_ps(first, first, 1);
+      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
+      first = _mm256_permute2f128_ps(tmp, tmp, 1);
+      first = _mm256_blend_ps(tmp, first, 0xcc);
+    }
+    else if (Offset==7)
+    {
+      first = _mm256_blend_ps(first, second, 127);
+      first = _mm256_permute2f128_ps(first, first, 1);
+      Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
+      first = _mm256_permute2f128_ps(tmp, tmp, 1);
+      first = _mm256_blend_ps(tmp, first, 0xee);
+    }
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4d>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
+  {
+    if (Offset==1)
+    {
+      first = _mm256_blend_pd(first, second, 1);
+      __m256d tmp = _mm256_permute_pd(first, 5);
+      first = _mm256_permute2f128_pd(tmp, tmp, 1);
+      first = _mm256_blend_pd(tmp, first, 0xA);
+    }
+    else if (Offset==2)
+    {
+      first = _mm256_blend_pd(first, second, 3);
+      first = _mm256_permute2f128_pd(first, first, 1);
+    }
+    else if (Offset==3)
+    {
+      first = _mm256_blend_pd(first, second, 7);
+      __m256d tmp = _mm256_permute_pd(first, 5);
+      first = _mm256_permute2f128_pd(tmp, tmp, 1);
+      first = _mm256_blend_pd(tmp, first, 5);
+    }
+  }
+};
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,8>& kernel) {
+  __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+  __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+  __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+  __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+  __m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+  __m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+  __m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+  __m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+  __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+  __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+  __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+  __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+  __m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
+  __m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
+  __m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
+  __m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
+  kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
+  kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
+  kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
+  kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
+  kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
+  kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
+  kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
+  kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet8f,4>& kernel) {
+  __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+  __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+  __m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+  __m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+
+  __m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
+  __m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
+  __m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
+  __m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
+
+  kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
+  kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
+  kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
+  kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4d,4>& kernel) {
+  __m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
+  __m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+  __m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
+  __m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+
+  kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
+  kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
+  kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
+  kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
+  const __m256 zero = _mm256_setzero_ps();
+  const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
+  return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
+  const __m256d zero = _mm256_setzero_pd();
+  const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
+  return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
+{
+  return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
+{
+  return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
+{
+  return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
+{
+  return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_AVX_H
diff --git a/third-party/Eigen/src/Core/arch/AVX/TypeCasting.h b/third-party/Eigen/src/Core/arch/AVX/TypeCasting.h
new file mode 100644
index 00000000..83bfdc60
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX/TypeCasting.h
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_AVX_H
+#define EIGEN_TYPE_CASTING_AVX_H
+
+namespace Eigen {
+
+namespace internal {
+
+// For now we use SSE to handle integers, so we can't use AVX instructions to cast
+// from int to float
+template <>
+struct type_casting_traits<float, int> {
+  enum {
+    VectorizedCast = 0,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+struct type_casting_traits<int, float> {
+  enum {
+    VectorizedCast = 0,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+
+
+template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
+  return _mm256_cvtps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
+  return _mm256_cvtepi32_ps(a);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_AVX_H
diff --git a/third-party/Eigen/src/Core/arch/AVX512/MathFunctions.h b/third-party/Eigen/src/Core/arch/AVX512/MathFunctions.h
new file mode 100644
index 00000000..b259c1e1
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -0,0 +1,389 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+#define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
+
+namespace Eigen {
+
+namespace internal {
+
+// Disable the code for older versions of gcc that don't support many of the required avx512 instrinsics.
+#if EIGEN_GNUC_AT_LEAST(5, 3)
+
+#define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
+  const Packet16f p16f_##NAME = pset1<Packet16f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
+  const Packet16f p16f_##NAME = (__m512)pset1<Packet16i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
+  const Packet8d p8d_##NAME = pset1<Packet8d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
+  const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
+
+
+// Natural logarithm
+// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
+// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
+// be easily approximated by a polynomial centered on m=1 for stability.
+#if defined(EIGEN_VECTORIZE_AVX512DQ)
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+plog<Packet16f>(const Packet16f& _x) {
+  Packet16f x = _x;
+  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet16f(126f, 126.0f);
+
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+  // The smallest non denormalized float number.
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(pos_inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+
+  // Polynomial coefficients.
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_SQRTHF, 0.707106781186547524f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p0, 7.0376836292E-2f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p1, -1.1514610310E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p2, 1.1676998740E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p3, -1.2420140846E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p4, +1.4249322787E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p5, -1.6668057665E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p6, +2.0000714765E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p7, -2.4999993993E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_p8, +3.3333331174E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q1, -2.12194440e-4f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_log_q2, 0.693359375f);
+
+  // invalid_mask is set to true when x is NaN
+  __mmask16 invalid_mask =  _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_NGE_UQ);
+  __mmask16 iszero_mask  =  _mm512_cmp_ps_mask(x, _mm512_setzero_ps(), _CMP_EQ_OQ);
+      
+  // Truncate input values to the minimum positive normal.
+  x = pmax(x, p16f_min_norm_pos);
+
+  // Extract the shifted exponents.
+  Packet16f emm0 = _mm512_cvtepi32_ps(_mm512_srli_epi32((__m512i)x, 23));
+  Packet16f e = _mm512_sub_ps(emm0, p16f_126f);
+
+  // Set the exponents to -1, i.e. x are in the range [0.5,1).
+  x = _mm512_and_ps(x, p16f_inv_mant_mask);
+  x = _mm512_or_ps(x, p16f_half);
+
+  // part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
+  // and shift by -1. The values are then centered around 0, which improves
+  // the stability of the polynomial evaluation.
+  //   if( x < SQRTHF ) {
+  //     e -= 1;
+  //     x = x + x - 1.0;
+  //   } else { x = x - 1.0; }
+  __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
+  Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
+  x = psub(x, p16f_1);
+  e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
+  x = padd(x, tmp);
+
+  Packet16f x2 = pmul(x, x);
+  Packet16f x3 = pmul(x2, x);
+
+  // Evaluate the polynomial approximant of degree 8 in three parts, probably
+  // to improve instruction-level parallelism.
+  Packet16f y, y1, y2;
+  y = pmadd(p16f_cephes_log_p0, x, p16f_cephes_log_p1);
+  y1 = pmadd(p16f_cephes_log_p3, x, p16f_cephes_log_p4);
+  y2 = pmadd(p16f_cephes_log_p6, x, p16f_cephes_log_p7);
+  y = pmadd(y, x, p16f_cephes_log_p2);
+  y1 = pmadd(y1, x, p16f_cephes_log_p5);
+  y2 = pmadd(y2, x, p16f_cephes_log_p8);
+  y = pmadd(y, x3, y1);
+  y = pmadd(y, x3, y2);
+  y = pmul(y, x3);
+
+  // Add the logarithm of the exponent back to the result of the interpolation.
+  y1 = pmul(e, p16f_cephes_log_q1);
+  tmp = pmul(x2, p16f_half);
+  y = padd(y, y1);
+  x = psub(x, tmp);
+  y2 = pmul(e, p16f_cephes_log_q2);
+  x = padd(x, y);
+  x = padd(x, y2);
+
+  __mmask16 pos_inf_mask = _mm512_cmp_ps_mask(_x,p16f_pos_inf,_CMP_EQ_OQ);
+  // Filter out invalid inputs, i.e.:
+  //  - negative arg will be NAN,
+  //  - 0 will be -INF.
+  //  - +INF will be +INF
+  return _mm512_mask_blend_ps(iszero_mask,
+            _mm512_mask_blend_ps(invalid_mask,
+              _mm512_mask_blend_ps(pos_inf_mask,x,p16f_pos_inf),
+              p16f_nan),
+            p16f_minus_inf);
+}
+
+#endif
+
+// Exponential function. Works by writing "x = m*log(2) + r" where
+// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
+// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+pexp<Packet16f>(const Packet16f& _x) {
+  _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
+  _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
+
+  _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
+  _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
+
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
+
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
+  _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
+
+  // Clamp x.
+  Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
+
+  // Express exp(x) as exp(m*ln(2) + r), start by extracting
+  // m = floor(x/ln(2) + 0.5).
+  Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
+
+  // Get r = x - m*ln(2). Note that we can do this without losing more than one
+  // ulp precision due to the FMA instruction.
+  _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
+  Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
+  Packet16f r2 = pmul(r, r);
+
+  // TODO(gonnet): Split into odd/even polynomials and try to exploit
+  //               instruction-level parallelism.
+  Packet16f y = p16f_cephes_exp_p0;
+  y = pmadd(y, r, p16f_cephes_exp_p1);
+  y = pmadd(y, r, p16f_cephes_exp_p2);
+  y = pmadd(y, r, p16f_cephes_exp_p3);
+  y = pmadd(y, r, p16f_cephes_exp_p4);
+  y = pmadd(y, r, p16f_cephes_exp_p5);
+  y = pmadd(y, r2, r);
+  y = padd(y, p16f_1);
+
+  // Build emm0 = 2^m.
+  Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
+  emm0 = _mm512_slli_epi32(emm0, 23);
+
+  // Return 2^m * exp(r).
+  return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
+}
+
+/*template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+pexp<Packet8d>(const Packet8d& _x) {
+  Packet8d x = _x;
+
+  _EIGEN_DECLARE_CONST_Packet8d(1, 1.0);
+  _EIGEN_DECLARE_CONST_Packet8d(2, 2.0);
+
+  _EIGEN_DECLARE_CONST_Packet8d(exp_hi, 709.437);
+  _EIGEN_DECLARE_CONST_Packet8d(exp_lo, -709.436139303);
+
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_LOG2EF, 1.4426950408889634073599);
+
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p0, 1.26177193074810590878e-4);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p1, 3.02994407707441961300e-2);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q0, 3.00198505138664455042e-6);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q1, 2.52448340349684104192e-3);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q2, 2.27265548208155028766e-1);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_q3, 2.00000000000000000009e0);
+
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C1, 0.693145751953125);
+  _EIGEN_DECLARE_CONST_Packet8d(cephes_exp_C2, 1.42860682030941723212e-6);
+
+  // clamp x
+  x = pmax(pmin(x, p8d_exp_hi), p8d_exp_lo);
+
+  // Express exp(x) as exp(g + n*log(2)).
+  const Packet8d n =
+      _mm512_mul_round_pd(p8d_cephes_LOG2EF, x, _MM_FROUND_TO_NEAREST_INT);
+
+  // Get the remainder modulo log(2), i.e. the "g" described above. Subtract
+  // n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
+  // digits right.
+  const Packet8d nC1 = pmul(n, p8d_cephes_exp_C1);
+  const Packet8d nC2 = pmul(n, p8d_cephes_exp_C2);
+  x = psub(x, nC1);
+  x = psub(x, nC2);
+
+  const Packet8d x2 = pmul(x, x);
+
+  // Evaluate the numerator polynomial of the rational interpolant.
+  Packet8d px = p8d_cephes_exp_p0;
+  px = pmadd(px, x2, p8d_cephes_exp_p1);
+  px = pmadd(px, x2, p8d_cephes_exp_p2);
+  px = pmul(px, x);
+
+  // Evaluate the denominator polynomial of the rational interpolant.
+  Packet8d qx = p8d_cephes_exp_q0;
+  qx = pmadd(qx, x2, p8d_cephes_exp_q1);
+  qx = pmadd(qx, x2, p8d_cephes_exp_q2);
+  qx = pmadd(qx, x2, p8d_cephes_exp_q3);
+
+  // I don't really get this bit, copied from the SSE2 routines, so...
+  // TODO(gonnet): Figure out what is going on here, perhaps find a better
+  // rational interpolant?
+  x = _mm512_div_pd(px, psub(qx, px));
+  x = pmadd(p8d_2, x, p8d_1);
+
+  // Build e=2^n.
+  const Packet8d e = _mm512_castsi512_pd(_mm512_slli_epi64(
+      _mm512_add_epi64(_mm512_cvtpd_epi64(n), _mm512_set1_epi64(1023)), 52));
+
+  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
+  // non-finite values in the input.
+  return pmax(pmul(x, e), _x);
+  }*/
+
+// Functions for sqrt.
+// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
+// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
+// exact solution. The main advantage of this approach is not just speed, but
+// also the fact that it can be inlined and pipelined with other computations,
+// further reducing its effective latency.
+#if EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+psqrt<Packet16f>(const Packet16f& _x) {
+  Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
+  __mmask16 denormal_mask = _mm512_kand(
+      _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
+                        _CMP_LT_OQ),
+      _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
+
+  Packet16f x = _mm512_rsqrt14_ps(_x);
+
+  // Do a single step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
+
+  // Flush results for denormals to zero.
+  return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+psqrt<Packet8d>(const Packet8d& _x) {
+  Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5));
+  __mmask16 denormal_mask = _mm512_kand(
+      _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
+                        _CMP_LT_OQ),
+      _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
+
+  Packet8d x = _mm512_rsqrt14_pd(_x);
+
+  // Do a single step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
+
+  // Do a second step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5)));
+
+  return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
+  return _mm512_sqrt_ps(x);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
+  return _mm512_sqrt_pd(x);
+}
+#endif
+
+// Functions for rsqrt.
+// Almost identical to the sqrt routine, just leave out the last multiplication
+// and fill in NaN/Inf where needed. Note that this function only exists as an
+// iterative version for doubles since there is no instruction for diretly
+// computing the reciprocal square root in AVX-512.
+#ifdef EIGEN_FAST_MATH
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
+prsqrt<Packet16f>(const Packet16f& _x) {
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);
+  _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
+  _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000);
+
+  Packet16f neg_half = pmul(_x, p16f_minus_half);
+
+  // select only the inverse sqrt of positive normal inputs (denormals are
+  // flushed to zero and cause infs as well).
+  __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
+  Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
+
+  // Fill in NaNs and Infs for the negative/zero entries.
+  __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
+  Packet16f infs_and_nans = _mm512_mask_blend_ps(
+      neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
+
+  // Do a single step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
+
+  // Insert NaNs and Infs in all the right places.
+  return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
+prsqrt<Packet8d>(const Packet8d& _x) {
+  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
+  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(nan, 0x7ff1000000000000LL);
+  _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
+  _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
+  _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL);
+
+  Packet8d neg_half = pmul(_x, p8d_minus_half);
+
+  // select only the inverse sqrt of positive normal inputs (denormals are
+  // flushed to zero and cause infs as well).
+  __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
+  Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
+
+  // Fill in NaNs and Infs for the negative/zero entries.
+  __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
+  Packet8d infs_and_nans = _mm512_mask_blend_pd(
+      neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
+
+  // Do a first step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+
+  // Do a second step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
+
+  // Insert NaNs and Infs in all the right places.
+  return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
+}
+#elif defined(EIGEN_VECTORIZE_AVX512ER)
+template <>
+EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
+  return _mm512_rsqrt28_ps(x);
+}
+#endif
+#endif
+
+}  // end namespace internal
+
+}  // end namespace Eigen
+
+#endif  // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
diff --git a/third-party/Eigen/src/Core/arch/AVX512/PacketMath.h b/third-party/Eigen/src/Core/arch/AVX512/PacketMath.h
new file mode 100644
index 00000000..000b7762
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -0,0 +1,1305 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner (benoit.steiner.goog@gmail.com)
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_AVX512_H
+#define EIGEN_PACKET_MATH_AVX512_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#endif
+
+#ifdef EIGEN_VECTORIZE_FMA
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+#endif
+
+typedef __m512 Packet16f;
+typedef __m512i Packet16i;
+typedef __m512d Packet8d;
+
+template <>
+struct is_arithmetic<__m512> {
+  enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512i> {
+  enum { value = true };
+};
+template <>
+struct is_arithmetic<__m512d> {
+  enum { value = true };
+};
+
+template<> struct packet_traits<float>  : default_packet_traits
+{
+  typedef Packet16f type;
+  typedef Packet8f half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 16,
+    HasHalfPacket = 1,
+    HasBlend = 0,
+#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+    HasLog = 1,
+#endif
+    HasExp = 1,
+    HasSqrt = EIGEN_FAST_MATH,
+    HasRsqrt = EIGEN_FAST_MATH,
+#endif
+    HasDiv = 1
+  };
+ };
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef Packet8d type;
+  typedef Packet4d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 8,
+    HasHalfPacket = 1,
+#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT)
+    HasSqrt = EIGEN_FAST_MATH,
+    HasRsqrt = EIGEN_FAST_MATH,
+#endif
+    HasDiv = 1
+  };
+};
+
+/* TODO Implement AVX512 for integers
+template<> struct packet_traits<int>    : default_packet_traits
+{
+  typedef Packet16i type;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=8
+  };
+};
+*/
+
+template <>
+struct unpacket_traits<Packet16f> {
+  typedef float type;
+  typedef Packet8f half;
+  typedef Packet16i integer_packet;
+  enum { size = 16, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet8d> {
+  typedef double type;
+  typedef Packet4d half;
+  enum { size = 8, alignment=Aligned64 };
+};
+template <>
+struct unpacket_traits<Packet16i> {
+  typedef int type;
+  typedef Packet8i half;
+  enum { size = 16, alignment=Aligned64 };
+};
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pset1<Packet16f>(const float& from) {
+  return _mm512_set1_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pset1<Packet8d>(const double& from) {
+  return _mm512_set1_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pset1<Packet16i>(const int& from) {
+  return _mm512_set1_epi32(from);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pload1<Packet16f>(const float* from) {
+  return _mm512_broadcastss_ps(_mm_load_ps1(from));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload1<Packet8d>(const double* from) {
+  return _mm512_set1_pd(*from);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f plset<Packet16f>(const float& a) {
+  return _mm512_add_ps(
+      _mm512_set1_ps(a),
+      _mm512_set_ps(15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f,
+                    4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d plset<Packet8d>(const double& a) {
+  return _mm512_add_pd(_mm512_set1_pd(a),
+                       _mm512_set_pd(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f padd<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  return _mm512_add_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d padd<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  return _mm512_add_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i padd<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_add_epi32(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f psub<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  return _mm512_sub_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d psub<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  return _mm512_sub_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_sub_epi32(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
+  return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
+  return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pconj(const Packet16f& a) {
+  return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pconj(const Packet8d& a) {
+  return a;
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pconj(const Packet16i& a) {
+  return a;
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pmul<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  return _mm512_mul_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmul<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  return _mm512_mul_pd(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pmul<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_mul_epi32(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pdiv<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  return _mm512_div_ps(a, b);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pdiv<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  return _mm512_div_pd(a, b);
+}
+
+#ifdef EIGEN_VECTORIZE_FMA
+template <>
+EIGEN_STRONG_INLINE Packet16f pmadd(const Packet16f& a, const Packet16f& b,
+                                    const Packet16f& c) {
+  return _mm512_fmadd_ps(a, b, c);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
+                                   const Packet8d& c) {
+  return _mm512_fmadd_pd(a, b, c);
+}
+#endif
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  // Arguments are reversed to match NaN propagation behavior of std::min.
+  return _mm512_min_ps(b, a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  // Arguments are reversed to match NaN propagation behavior of std::min.
+  return _mm512_min_pd(b, a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+  // Arguments are reversed to match NaN propagation behavior of std::max.
+  return _mm512_max_ps(b, a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+  // Arguments are reversed to match NaN propagation behavior of std::max.
+  return _mm512_max_pd(b, a);
+}
+
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I_); }
+template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) { return _mm512_extractf64x2_pd(x,I_); }
+EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
+#else
+// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
+template<int I_> EIGEN_STRONG_INLINE Packet8f extract256(Packet16f x) {
+  return  _mm256_castsi256_ps(_mm512_extracti64x4_epi64( _mm512_castps_si512(x),I_));
+}
+
+// AVX512F does not define _mm512_extractf64x2_pd to extract _m128 from _m512
+template<int I_> EIGEN_STRONG_INLINE Packet2d extract128(Packet8d x) {
+  return _mm_castsi128_pd(_mm512_extracti32x4_epi32( _mm512_castpd_si512(x),I_));
+}
+
+EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {
+  return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
+                                                _mm256_castps_si256(b),1));
+}
+#endif
+
+// Helper function for bit packing snippet of low precision comparison.
+// It packs the flags from 32x16 to 16x16.
+EIGEN_STRONG_INLINE __m256i Pack32To16(Packet16f rf) {
+  // Split data into small pieces and handle with AVX instructions
+  // to guarantee internal order of vector.
+  // Operation:
+  //   dst[15:0]    := Saturate16(rf[31:0])
+  //   dst[31:16]   := Saturate16(rf[63:32])
+  //   ...
+  //   dst[255:240] := Saturate16(rf[255:224])
+  __m256i lo = _mm256_castps_si256(extract256<0>(rf));
+  __m256i hi = _mm256_castps_si256(extract256<1>(rf));
+  __m128i result_lo = _mm_packs_epi32(_mm256_extractf128_si256(lo, 0),
+                                      _mm256_extractf128_si256(lo, 1));
+  __m128i result_hi = _mm_packs_epi32(_mm256_extractf128_si256(hi, 0),
+                                      _mm256_extractf128_si256(hi, 1));
+  return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16i pand<Packet16i>(const Packet16i& a,
+                                              const Packet16i& b) {
+  return _mm512_and_si512(a,b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pand<Packet16f>(const Packet16f& a,
+                                              const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_and_ps(a, b);
+#else
+  return _mm512_castsi512_ps(pand(_mm512_castps_si512(a),_mm512_castps_si512(b)));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pand<Packet8d>(const Packet8d& a,
+                                            const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_and_pd(a, b);
+#else
+  Packet8d res = _mm512_undefined_pd();
+  Packet4d lane0_a = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane0_b = _mm512_extractf64x4_pd(b, 0);
+  res = _mm512_insertf64x4(res, _mm256_and_pd(lane0_a, lane0_b), 0);
+
+  Packet4d lane1_a = _mm512_extractf64x4_pd(a, 1);
+  Packet4d lane1_b = _mm512_extractf64x4_pd(b, 1);
+  return _mm512_insertf64x4(res, _mm256_and_pd(lane1_a, lane1_b), 1);
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16i por<Packet16i>(const Packet16i& a, const Packet16i& b) {
+  return _mm512_or_si512(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f por<Packet16f>(const Packet16f& a, const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_or_ps(a, b);
+#else
+  return _mm512_castsi512_ps(por(_mm512_castps_si512(a),_mm512_castps_si512(b)));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8d por<Packet8d>(const Packet8d& a,
+                                           const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_or_pd(a, b);
+#else
+  return _mm512_castsi512_pd(por(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16i pxor<Packet16i>(const Packet16i& a, const Packet16i& b) {
+  return _mm512_xor_si512(a, b);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pxor<Packet16f>(const Packet16f& a, const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_xor_ps(a, b);
+#else
+  return _mm512_castsi512_ps(pxor(_mm512_castps_si512(a),_mm512_castps_si512(b)));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8d pxor<Packet8d>(const Packet8d& a, const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_xor_pd(a, b);
+#else
+  return _mm512_castsi512_pd(pxor(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
+#endif
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16i pandnot<Packet16i>(const Packet16i& a, const Packet16i& b) {
+  return _mm512_andnot_si512(b, a);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a, const Packet16f& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_andnot_ps(b, a);
+#else
+  return _mm512_castsi512_ps(pandnot(_mm512_castps_si512(a),_mm512_castps_si512(b)));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pandnot<Packet8d>(const Packet8d& a,const Packet8d& b) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  return _mm512_andnot_pd(b, a);
+#else
+  return _mm512_castsi512_pd(pandnot(_mm512_castpd_si512(a),_mm512_castpd_si512(b)));
+#endif
+}
+
+template<int N> EIGEN_STRONG_INLINE Packet16i parithmetic_shift_right(Packet16i a) {
+  return _mm512_srai_epi32(a, N);
+}
+
+template<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_right(Packet16i a) {
+  return _mm512_srli_epi32(a, N);
+}
+
+template<int N> EIGEN_STRONG_INLINE Packet16i plogical_shift_left(Packet16i a) {
+  return _mm512_slli_epi32(a, N);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f pload<Packet16f>(const float* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pload<Packet8d>(const double* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i pload<Packet16i>(const int* from) {
+  EIGEN_DEBUG_ALIGNED_LOAD return _mm512_load_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadu<Packet16f>(const float* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_ps(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_pd(from);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_si512(
+      reinterpret_cast<const __m512i*>(from));
+}
+
+// Loads 8 floats from memory a returns the packet
+// {a0, a0  a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
+  // an unaligned load is required here as there is no requirement
+  // on the alignment of input pointer 'from'
+  __m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+  __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
+  __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
+  return pairs;
+}
+
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+// FIXME: this does not look optimal, better load a Packet4d and shuffle...
+// Loads 4 doubles from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3,
+// a3}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
+ __m512d x = _mm512_setzero_pd();
+  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0);
+  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1);
+  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2);
+  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3);
+  return x;
+}
+#else
+template <>
+EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
+  __m512d x = _mm512_setzero_pd();
+  x = _mm512_mask_broadcastsd_pd(x, 0x3<<0, _mm_load_sd(from+0));
+  x = _mm512_mask_broadcastsd_pd(x, 0x3<<2, _mm_load_sd(from+1));
+  x = _mm512_mask_broadcastsd_pd(x, 0x3<<4, _mm_load_sd(from+2));
+  x = _mm512_mask_broadcastsd_pd(x, 0x3<<6, _mm_load_sd(from+3));
+  return x;
+}
+#endif
+
+// Loads 4 floats from memory a returns the packet
+// {a0, a0  a0, a0, a1, a1, a1, a1, a2, a2, a2, a2, a3, a3, a3, a3}
+template <>
+EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
+  Packet16f tmp = _mm512_castps128_ps512(ploadu<Packet4f>(from));
+  const Packet16i scatter_mask = _mm512_set_epi32(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);
+  return _mm512_permutexvar_ps(scatter_mask, tmp);
+}
+
+// Loads 2 doubles from memory a returns the packet
+// {a0, a0  a0, a0, a1, a1, a1, a1}
+template <>
+EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
+  __m256d lane0 = _mm256_set1_pd(*from);
+  __m256d lane1 = _mm256_set1_pd(*(from+1));
+  __m512d tmp = _mm512_undefined_pd();
+  tmp = _mm512_insertf64x4(tmp, lane0, 0);
+  return _mm512_insertf64x4(tmp, lane1, 1);
+}
+
+template <>
+EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet16f& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet8d& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_store_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet16i& from) {
+  EIGEN_DEBUG_ALIGNED_STORE _mm512_storeu_si512(reinterpret_cast<__m512i*>(to),
+                                                from);
+}
+
+template <>
+EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_ps(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_pd(to, from);
+}
+template <>
+EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
+  EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_si512(
+      reinterpret_cast<__m512i*>(to), from);
+}
+
+template <>
+EIGEN_DEVICE_FUNC inline Packet16f pgather<float, Packet16f>(const float* from,
+                                                             Index stride) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+
+  return _mm512_i32gather_ps(indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline Packet8d pgather<double, Packet8d>(const double* from,
+                                                            Index stride) {
+  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+
+  return _mm512_i32gather_pd(indices, from, 8);
+}
+
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, Packet16f>(float* to,
+                                                         const Packet16f& from,
+                                                         Index stride) {
+  Packet16i stride_vector = _mm512_set1_epi32(convert_index<int>(stride));
+  Packet16i stride_multiplier =
+      _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  Packet16i indices = _mm512_mullo_epi32(stride_vector, stride_multiplier);
+  _mm512_i32scatter_ps(to, indices, from, 4);
+}
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, Packet8d>(double* to,
+                                                         const Packet8d& from,
+                                                         Index stride) {
+  Packet8i stride_vector = _mm256_set1_epi32(convert_index<int>(stride));
+  Packet8i stride_multiplier = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+  Packet8i indices = _mm256_mullo_epi32(stride_vector, stride_multiplier);
+  _mm512_i32scatter_pd(to, indices, from, 8);
+}
+
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16f>(float* to, const float& a) {
+  Packet16f pa = pset1<Packet16f>(a);
+  pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet8d>(double* to, const double& a) {
+  Packet8d pa = pset1<Packet8d>(a);
+  pstore(to, pa);
+}
+template <>
+EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
+  Packet16i pa = pset1<Packet16i>(a);
+  pstore(to, pa);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+
+template <>
+EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
+  return _mm_cvtss_f32(_mm512_extractf32x4_ps(a, 0));
+}
+template <>
+EIGEN_STRONG_INLINE double pfirst<Packet8d>(const Packet8d& a) {
+  return _mm_cvtsd_f64(_mm256_extractf128_pd(_mm512_extractf64x4_pd(a, 0), 0));
+}
+template <>
+EIGEN_STRONG_INLINE int pfirst<Packet16i>(const Packet16i& a) {
+  return _mm_extract_epi32(_mm512_extracti32x4_epi32(a, 0), 0);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preverse(const Packet16f& a)
+{
+  return _mm512_permutexvar_ps(_mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
+{
+  return _mm512_permutexvar_pd(_mm512_set_epi32(0, 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7), a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
+{
+  // _mm512_abs_ps intrinsic not found, so hack around it
+  return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
+  // _mm512_abs_ps intrinsic not found, so hack around it
+  return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
+                                   _mm512_set1_epi64(0x7fffffffffffffff)));
+}
+
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                           \
+  __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0);                    \
+  __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)
+#else
+#define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                \
+  __m256 OUTPUT##_0 = _mm256_insertf128_ps(                     \
+      _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 0)), \
+      _mm512_extractf32x4_ps(INPUT, 1), 1);                     \
+  __m256 OUTPUT##_1 = _mm256_insertf128_ps(                     \
+      _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \
+      _mm512_extractf32x4_ps(INPUT, 3), 1);
+#endif
+
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB) \
+  OUTPUT = _mm512_insertf32x8(_mm512_castps256_ps512(INPUTA), INPUTB, 1);
+#else
+#define EIGEN_INSERT_8f_INTO_16f(OUTPUT, INPUTA, INPUTB)                    \
+  OUTPUT = _mm512_undefined_ps();                                           \
+  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 0), 0); \
+  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTA, 1), 1); \
+  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 0), 2); \
+  OUTPUT = _mm512_insertf32x4(OUTPUT, _mm256_extractf128_ps(INPUTB, 1), 3);
+#endif
+
+template <>
+EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  __m256 lane0 = _mm512_extractf32x8_ps(a, 0);
+  __m256 lane1 = _mm512_extractf32x8_ps(a, 1);
+  Packet8f x = _mm256_add_ps(lane0, lane1);
+  return predux<Packet8f>(x);
+#else
+  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
+  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
+  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
+  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
+  __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));
+  sum = _mm_hadd_ps(sum, sum);
+  sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
+  return _mm_cvtss_f32(sum);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
+  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
+  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
+  __m256d sum = _mm256_add_pd(lane0, lane1);
+  __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
+  return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+  Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+  return padd(lane0, lane1);
+#else
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f sum0 = padd(lane0, lane2);
+  Packet4f sum1 = padd(lane1, lane3);
+  return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d res = padd(lane0, lane1);
+  return res;
+}
+
+template <>
+EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
+//#ifdef EIGEN_VECTORIZE_AVX512DQ
+#if 0
+  Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+  Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+  Packet8f res = pmul(lane0, lane1);
+  res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
+  res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#else
+  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
+  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
+  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
+  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
+  __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
+  res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+#endif
+}
+template <>
+EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
+  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
+  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
+  __m256d res = pmul(lane0, lane1);
+  res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
+  return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
+}
+
+template <>
+EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
+  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
+  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
+  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
+  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
+  __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
+  res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+template <>
+EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
+  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
+  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
+  __m256d res = _mm256_min_pd(lane0, lane1);
+  res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
+  return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+
+template <>
+EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
+  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
+  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
+  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
+  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
+  __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
+  res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
+  return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
+}
+
+template <>
+EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
+  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
+  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
+  __m256d res = _mm256_max_pd(lane0, lane1);
+  res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
+  return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f preduxp<Packet16f>(const Packet16f* vecs)
+{
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[0], vecs0);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[1], vecs1);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[2], vecs2);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[3], vecs3);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[4], vecs4);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[5], vecs5);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[6], vecs6);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[7], vecs7);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[8], vecs8);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[9], vecs9);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[10], vecs10);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[11], vecs11);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[12], vecs12);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[13], vecs13);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[14], vecs14);
+  EIGEN_EXTRACT_8f_FROM_16f(vecs[15], vecs15);
+
+  __m256 hsum1 = _mm256_hadd_ps(vecs0_0, vecs1_0);
+  __m256 hsum2 = _mm256_hadd_ps(vecs2_0, vecs3_0);
+  __m256 hsum3 = _mm256_hadd_ps(vecs4_0, vecs5_0);
+  __m256 hsum4 = _mm256_hadd_ps(vecs6_0, vecs7_0);
+
+  __m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+  __m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+  __m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+  __m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+  __m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+  __m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+  __m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+  __m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+  __m256 sum1 = _mm256_add_ps(perm1, hsum5);
+  __m256 sum2 = _mm256_add_ps(perm2, hsum6);
+  __m256 sum3 = _mm256_add_ps(perm3, hsum7);
+  __m256 sum4 = _mm256_add_ps(perm4, hsum8);
+
+  __m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+  __m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+  __m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
+
+  hsum1 = _mm256_hadd_ps(vecs0_1, vecs1_1);
+  hsum2 = _mm256_hadd_ps(vecs2_1, vecs3_1);
+  hsum3 = _mm256_hadd_ps(vecs4_1, vecs5_1);
+  hsum4 = _mm256_hadd_ps(vecs6_1, vecs7_1);
+
+  hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+  hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+  hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+  hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+  perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+  perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+  perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+  perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+  sum1 = _mm256_add_ps(perm1, hsum5);
+  sum2 = _mm256_add_ps(perm2, hsum6);
+  sum3 = _mm256_add_ps(perm3, hsum7);
+  sum4 = _mm256_add_ps(perm4, hsum8);
+
+  blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+  blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+  final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0));
+
+  hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
+  hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
+  hsum3 = _mm256_hadd_ps(vecs12_0, vecs13_0);
+  hsum4 = _mm256_hadd_ps(vecs14_0, vecs15_0);
+
+  hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+  hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+  hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+  hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+  perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+  perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+  perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+  perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+  sum1 = _mm256_add_ps(perm1, hsum5);
+  sum2 = _mm256_add_ps(perm2, hsum6);
+  sum3 = _mm256_add_ps(perm3, hsum7);
+  sum4 = _mm256_add_ps(perm4, hsum8);
+
+  blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+  blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+  __m256 final_1 = _mm256_blend_ps(blend1, blend2, 0xf0);
+
+  hsum1 = _mm256_hadd_ps(vecs8_1, vecs9_1);
+  hsum2 = _mm256_hadd_ps(vecs10_1, vecs11_1);
+  hsum3 = _mm256_hadd_ps(vecs12_1, vecs13_1);
+  hsum4 = _mm256_hadd_ps(vecs14_1, vecs15_1);
+
+  hsum5 = _mm256_hadd_ps(hsum1, hsum1);
+  hsum6 = _mm256_hadd_ps(hsum2, hsum2);
+  hsum7 = _mm256_hadd_ps(hsum3, hsum3);
+  hsum8 = _mm256_hadd_ps(hsum4, hsum4);
+
+  perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
+  perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
+  perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
+  perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
+
+  sum1 = _mm256_add_ps(perm1, hsum5);
+  sum2 = _mm256_add_ps(perm2, hsum6);
+  sum3 = _mm256_add_ps(perm3, hsum7);
+  sum4 = _mm256_add_ps(perm4, hsum8);
+
+  blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
+  blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
+
+  final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
+
+  __m512 final_output;
+
+  EIGEN_INSERT_8f_INTO_16f(final_output, final, final_1);
+  return final_output;
+}
+
+template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
+{
+  Packet4d vecs0_0 = _mm512_extractf64x4_pd(vecs[0], 0);
+  Packet4d vecs0_1 = _mm512_extractf64x4_pd(vecs[0], 1);
+
+  Packet4d vecs1_0 = _mm512_extractf64x4_pd(vecs[1], 0);
+  Packet4d vecs1_1 = _mm512_extractf64x4_pd(vecs[1], 1);
+
+  Packet4d vecs2_0 = _mm512_extractf64x4_pd(vecs[2], 0);
+  Packet4d vecs2_1 = _mm512_extractf64x4_pd(vecs[2], 1);
+
+  Packet4d vecs3_0 = _mm512_extractf64x4_pd(vecs[3], 0);
+  Packet4d vecs3_1 = _mm512_extractf64x4_pd(vecs[3], 1);
+
+  Packet4d vecs4_0 = _mm512_extractf64x4_pd(vecs[4], 0);
+  Packet4d vecs4_1 = _mm512_extractf64x4_pd(vecs[4], 1);
+
+  Packet4d vecs5_0 = _mm512_extractf64x4_pd(vecs[5], 0);
+  Packet4d vecs5_1 = _mm512_extractf64x4_pd(vecs[5], 1);
+
+  Packet4d vecs6_0 = _mm512_extractf64x4_pd(vecs[6], 0);
+  Packet4d vecs6_1 = _mm512_extractf64x4_pd(vecs[6], 1);
+
+  Packet4d vecs7_0 = _mm512_extractf64x4_pd(vecs[7], 0);
+  Packet4d vecs7_1 = _mm512_extractf64x4_pd(vecs[7], 1);
+
+  Packet4d tmp0, tmp1;
+
+  tmp0 = _mm256_hadd_pd(vecs0_0, vecs1_0);
+  tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+  tmp1 = _mm256_hadd_pd(vecs2_0, vecs3_0);
+  tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+  __m256d final_0 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+
+  tmp0 = _mm256_hadd_pd(vecs0_1, vecs1_1);
+  tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+  tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
+  tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+  final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
+
+  tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
+  tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+  tmp1 = _mm256_hadd_pd(vecs6_0, vecs7_0);
+  tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+  __m256d final_1 = _mm256_blend_pd(tmp0, tmp1, 0xC);
+
+  tmp0 = _mm256_hadd_pd(vecs4_1, vecs5_1);
+  tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
+
+  tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
+  tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
+
+  final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
+
+  __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0);
+
+  return _mm512_insertf64x4(final_output, final_1, 1);
+}
+ 
+
+
+#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
+  EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 16>& kernel) {
+  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+  __m512 T4 = _mm512_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
+  __m512 T5 = _mm512_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
+  __m512 T6 = _mm512_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
+  __m512 T7 = _mm512_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
+  __m512 T8 = _mm512_unpacklo_ps(kernel.packet[8], kernel.packet[9]);
+  __m512 T9 = _mm512_unpackhi_ps(kernel.packet[8], kernel.packet[9]);
+  __m512 T10 = _mm512_unpacklo_ps(kernel.packet[10], kernel.packet[11]);
+  __m512 T11 = _mm512_unpackhi_ps(kernel.packet[10], kernel.packet[11]);
+  __m512 T12 = _mm512_unpacklo_ps(kernel.packet[12], kernel.packet[13]);
+  __m512 T13 = _mm512_unpackhi_ps(kernel.packet[12], kernel.packet[13]);
+  __m512 T14 = _mm512_unpacklo_ps(kernel.packet[14], kernel.packet[15]);
+  __m512 T15 = _mm512_unpackhi_ps(kernel.packet[14], kernel.packet[15]);
+  __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S4 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S5 = _mm512_shuffle_ps(T4, T6, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S6 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S7 = _mm512_shuffle_ps(T5, T7, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S8 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S9 = _mm512_shuffle_ps(T8, T10, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S10 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S11 = _mm512_shuffle_ps(T9, T11, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S12 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S13 = _mm512_shuffle_ps(T12, T14, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S14 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S15 = _mm512_shuffle_ps(T13, T15, _MM_SHUFFLE(3, 2, 3, 2));
+
+  EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+  EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+  EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+  EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+  EIGEN_EXTRACT_8f_FROM_16f(S4, S4);
+  EIGEN_EXTRACT_8f_FROM_16f(S5, S5);
+  EIGEN_EXTRACT_8f_FROM_16f(S6, S6);
+  EIGEN_EXTRACT_8f_FROM_16f(S7, S7);
+  EIGEN_EXTRACT_8f_FROM_16f(S8, S8);
+  EIGEN_EXTRACT_8f_FROM_16f(S9, S9);
+  EIGEN_EXTRACT_8f_FROM_16f(S10, S10);
+  EIGEN_EXTRACT_8f_FROM_16f(S11, S11);
+  EIGEN_EXTRACT_8f_FROM_16f(S12, S12);
+  EIGEN_EXTRACT_8f_FROM_16f(S13, S13);
+  EIGEN_EXTRACT_8f_FROM_16f(S14, S14);
+  EIGEN_EXTRACT_8f_FROM_16f(S15, S15);
+
+  PacketBlock<Packet8f, 32> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S4_0, 0x20);
+  tmp.packet[1] = _mm256_permute2f128_ps(S1_0, S5_0, 0x20);
+  tmp.packet[2] = _mm256_permute2f128_ps(S2_0, S6_0, 0x20);
+  tmp.packet[3] = _mm256_permute2f128_ps(S3_0, S7_0, 0x20);
+  tmp.packet[4] = _mm256_permute2f128_ps(S0_0, S4_0, 0x31);
+  tmp.packet[5] = _mm256_permute2f128_ps(S1_0, S5_0, 0x31);
+  tmp.packet[6] = _mm256_permute2f128_ps(S2_0, S6_0, 0x31);
+  tmp.packet[7] = _mm256_permute2f128_ps(S3_0, S7_0, 0x31);
+
+  tmp.packet[8] = _mm256_permute2f128_ps(S0_1, S4_1, 0x20);
+  tmp.packet[9] = _mm256_permute2f128_ps(S1_1, S5_1, 0x20);
+  tmp.packet[10] = _mm256_permute2f128_ps(S2_1, S6_1, 0x20);
+  tmp.packet[11] = _mm256_permute2f128_ps(S3_1, S7_1, 0x20);
+  tmp.packet[12] = _mm256_permute2f128_ps(S0_1, S4_1, 0x31);
+  tmp.packet[13] = _mm256_permute2f128_ps(S1_1, S5_1, 0x31);
+  tmp.packet[14] = _mm256_permute2f128_ps(S2_1, S6_1, 0x31);
+  tmp.packet[15] = _mm256_permute2f128_ps(S3_1, S7_1, 0x31);
+
+  // Second set of _m256 outputs
+  tmp.packet[16] = _mm256_permute2f128_ps(S8_0, S12_0, 0x20);
+  tmp.packet[17] = _mm256_permute2f128_ps(S9_0, S13_0, 0x20);
+  tmp.packet[18] = _mm256_permute2f128_ps(S10_0, S14_0, 0x20);
+  tmp.packet[19] = _mm256_permute2f128_ps(S11_0, S15_0, 0x20);
+  tmp.packet[20] = _mm256_permute2f128_ps(S8_0, S12_0, 0x31);
+  tmp.packet[21] = _mm256_permute2f128_ps(S9_0, S13_0, 0x31);
+  tmp.packet[22] = _mm256_permute2f128_ps(S10_0, S14_0, 0x31);
+  tmp.packet[23] = _mm256_permute2f128_ps(S11_0, S15_0, 0x31);
+
+  tmp.packet[24] = _mm256_permute2f128_ps(S8_1, S12_1, 0x20);
+  tmp.packet[25] = _mm256_permute2f128_ps(S9_1, S13_1, 0x20);
+  tmp.packet[26] = _mm256_permute2f128_ps(S10_1, S14_1, 0x20);
+  tmp.packet[27] = _mm256_permute2f128_ps(S11_1, S15_1, 0x20);
+  tmp.packet[28] = _mm256_permute2f128_ps(S8_1, S12_1, 0x31);
+  tmp.packet[29] = _mm256_permute2f128_ps(S9_1, S13_1, 0x31);
+  tmp.packet[30] = _mm256_permute2f128_ps(S10_1, S14_1, 0x31);
+  tmp.packet[31] = _mm256_permute2f128_ps(S11_1, S15_1, 0x31);
+
+  // Pack them into the output
+  PACK_OUTPUT(kernel.packet, tmp.packet, 0, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 1, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 2, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 3, 16);
+
+  PACK_OUTPUT(kernel.packet, tmp.packet, 4, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 5, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 6, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 7, 16);
+
+  PACK_OUTPUT(kernel.packet, tmp.packet, 8, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 9, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 10, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 11, 16);
+
+  PACK_OUTPUT(kernel.packet, tmp.packet, 12, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 13, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 14, 16);
+  PACK_OUTPUT(kernel.packet, tmp.packet, 15, 16);
+}
+#define PACK_OUTPUT_2(OUTPUT, INPUT, INDEX, STRIDE)         \
+  EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[2 * INDEX], \
+                           INPUT[2 * INDEX + STRIDE]);
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet16f, 4>& kernel) {
+  __m512 T0 = _mm512_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T1 = _mm512_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
+  __m512 T2 = _mm512_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
+  __m512 T3 = _mm512_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
+
+  __m512 S0 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S1 = _mm512_shuffle_ps(T0, T2, _MM_SHUFFLE(3, 2, 3, 2));
+  __m512 S2 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(1, 0, 1, 0));
+  __m512 S3 = _mm512_shuffle_ps(T1, T3, _MM_SHUFFLE(3, 2, 3, 2));
+
+  EIGEN_EXTRACT_8f_FROM_16f(S0, S0);
+  EIGEN_EXTRACT_8f_FROM_16f(S1, S1);
+  EIGEN_EXTRACT_8f_FROM_16f(S2, S2);
+  EIGEN_EXTRACT_8f_FROM_16f(S3, S3);
+
+  PacketBlock<Packet8f, 8> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_ps(S0_0, S1_0, 0x20);
+  tmp.packet[1] = _mm256_permute2f128_ps(S2_0, S3_0, 0x20);
+  tmp.packet[2] = _mm256_permute2f128_ps(S0_0, S1_0, 0x31);
+  tmp.packet[3] = _mm256_permute2f128_ps(S2_0, S3_0, 0x31);
+
+  tmp.packet[4] = _mm256_permute2f128_ps(S0_1, S1_1, 0x20);
+  tmp.packet[5] = _mm256_permute2f128_ps(S2_1, S3_1, 0x20);
+  tmp.packet[6] = _mm256_permute2f128_ps(S0_1, S1_1, 0x31);
+  tmp.packet[7] = _mm256_permute2f128_ps(S2_1, S3_1, 0x31);
+
+  PACK_OUTPUT_2(kernel.packet, tmp.packet, 0, 1);
+  PACK_OUTPUT_2(kernel.packet, tmp.packet, 1, 1);
+  PACK_OUTPUT_2(kernel.packet, tmp.packet, 2, 1);
+  PACK_OUTPUT_2(kernel.packet, tmp.packet, 3, 1);
+}
+
+#define PACK_OUTPUT_SQ_D(OUTPUT, INPUT, INDEX, STRIDE)                \
+  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX], 0); \
+  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[INDEX + STRIDE], 1);
+
+#define PACK_OUTPUT_D(OUTPUT, INPUT, INDEX, STRIDE)                         \
+  OUTPUT[INDEX] = _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX)], 0); \
+  OUTPUT[INDEX] =                                                           \
+      _mm512_insertf64x4(OUTPUT[INDEX], INPUT[(2 * INDEX) + STRIDE], 1);
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 4>& kernel) {
+  __m512d T0 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
+  __m512d T1 = _mm512_shuffle_pd(kernel.packet[0], kernel.packet[1], 0xff);
+  __m512d T2 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
+  __m512d T3 = _mm512_shuffle_pd(kernel.packet[2], kernel.packet[3], 0xff);
+
+  PacketBlock<Packet4d, 8> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+                                         _mm512_extractf64x4_pd(T2, 0), 0x20);
+  tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+                                         _mm512_extractf64x4_pd(T3, 0), 0x20);
+  tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+                                         _mm512_extractf64x4_pd(T2, 0), 0x31);
+  tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+                                         _mm512_extractf64x4_pd(T3, 0), 0x31);
+
+  tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+                                         _mm512_extractf64x4_pd(T2, 1), 0x20);
+  tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+                                         _mm512_extractf64x4_pd(T3, 1), 0x20);
+  tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+                                         _mm512_extractf64x4_pd(T2, 1), 0x31);
+  tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+                                         _mm512_extractf64x4_pd(T3, 1), 0x31);
+
+  PACK_OUTPUT_D(kernel.packet, tmp.packet, 0, 1);
+  PACK_OUTPUT_D(kernel.packet, tmp.packet, 1, 1);
+  PACK_OUTPUT_D(kernel.packet, tmp.packet, 2, 1);
+  PACK_OUTPUT_D(kernel.packet, tmp.packet, 3, 1);
+}
+
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8d, 8>& kernel) {
+  __m512d T0 = _mm512_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+  __m512d T1 = _mm512_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+  __m512d T2 = _mm512_unpacklo_pd(kernel.packet[2], kernel.packet[3]);
+  __m512d T3 = _mm512_unpackhi_pd(kernel.packet[2], kernel.packet[3]);
+  __m512d T4 = _mm512_unpacklo_pd(kernel.packet[4], kernel.packet[5]);
+  __m512d T5 = _mm512_unpackhi_pd(kernel.packet[4], kernel.packet[5]);
+  __m512d T6 = _mm512_unpacklo_pd(kernel.packet[6], kernel.packet[7]);
+  __m512d T7 = _mm512_unpackhi_pd(kernel.packet[6], kernel.packet[7]);
+
+  PacketBlock<Packet4d, 16> tmp;
+
+  tmp.packet[0] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+                                         _mm512_extractf64x4_pd(T2, 0), 0x20);
+  tmp.packet[1] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+                                         _mm512_extractf64x4_pd(T3, 0), 0x20);
+  tmp.packet[2] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 0),
+                                         _mm512_extractf64x4_pd(T2, 0), 0x31);
+  tmp.packet[3] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 0),
+                                         _mm512_extractf64x4_pd(T3, 0), 0x31);
+
+  tmp.packet[4] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+                                         _mm512_extractf64x4_pd(T2, 1), 0x20);
+  tmp.packet[5] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+                                         _mm512_extractf64x4_pd(T3, 1), 0x20);
+  tmp.packet[6] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T0, 1),
+                                         _mm512_extractf64x4_pd(T2, 1), 0x31);
+  tmp.packet[7] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T1, 1),
+                                         _mm512_extractf64x4_pd(T3, 1), 0x31);
+
+  tmp.packet[8] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+                                         _mm512_extractf64x4_pd(T6, 0), 0x20);
+  tmp.packet[9] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+                                         _mm512_extractf64x4_pd(T7, 0), 0x20);
+  tmp.packet[10] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 0),
+                                          _mm512_extractf64x4_pd(T6, 0), 0x31);
+  tmp.packet[11] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 0),
+                                          _mm512_extractf64x4_pd(T7, 0), 0x31);
+
+  tmp.packet[12] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+                                          _mm512_extractf64x4_pd(T6, 1), 0x20);
+  tmp.packet[13] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+                                          _mm512_extractf64x4_pd(T7, 1), 0x20);
+  tmp.packet[14] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T4, 1),
+                                          _mm512_extractf64x4_pd(T6, 1), 0x31);
+  tmp.packet[15] = _mm256_permute2f128_pd(_mm512_extractf64x4_pd(T5, 1),
+                                          _mm512_extractf64x4_pd(T7, 1), 0x31);
+
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 0, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 1, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 2, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 3, 8);
+
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 4, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 5, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 6, 8);
+  PACK_OUTPUT_SQ_D(kernel.packet, tmp.packet, 7, 8);
+}
+template <>
+EIGEN_STRONG_INLINE Packet16f pblend(const Selector<16>& /*ifPacket*/,
+                                     const Packet16f& /*thenPacket*/,
+                                     const Packet16f& /*elsePacket*/) {
+  assert(false && "To be implemented");
+  return Packet16f();
+}
+template <>
+EIGEN_STRONG_INLINE Packet8d pblend(const Selector<8>& ifPacket,
+                                    const Packet8d& thenPacket,
+                                    const Packet8d& elsePacket) {
+  __mmask8 m = (ifPacket.select[0]   )
+             | (ifPacket.select[1]<<1)
+             | (ifPacket.select[2]<<2)
+             | (ifPacket.select[3]<<3)
+             | (ifPacket.select[4]<<4)
+             | (ifPacket.select[5]<<5)
+             | (ifPacket.select[6]<<6)
+             | (ifPacket.select[7]<<7);
+  return _mm512_mask_blend_pd(m, elsePacket, thenPacket);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
+  return _mm512_cvttps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
+  return _mm512_cvtepi32_ps(a);
+}
+
+template <int Offset>
+struct palign_impl<Offset, Packet16f> {
+  static EIGEN_STRONG_INLINE void run(Packet16f& first,
+                                      const Packet16f& second) {
+    if (Offset != 0) {
+      __m512i first_idx = _mm512_set_epi32(
+          Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
+          Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
+          Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
+
+      __m512i second_idx =
+          _mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
+                           Offset - 5, Offset - 6, Offset - 7, Offset - 8,
+                           Offset - 9, Offset - 10, Offset - 11, Offset - 12,
+                           Offset - 13, Offset - 14, Offset - 15, Offset - 16);
+
+      unsigned short mask = 0xFFFF;
+      mask <<= (16 - Offset);
+
+      first = _mm512_permutexvar_ps(first_idx, first);
+      Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
+      first = _mm512_mask_blend_ps(mask, first, tmp);
+    }
+  }
+};
+template <int Offset>
+struct palign_impl<Offset, Packet8d> {
+  static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
+    if (Offset != 0) {
+      __m512i first_idx = _mm512_set_epi32(
+          0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
+          Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
+
+      __m512i second_idx = _mm512_set_epi32(
+          0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
+          Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
+
+      unsigned char mask = 0xFF;
+      mask <<= (8 - Offset);
+
+      first = _mm512_permutexvar_pd(first_idx, first);
+      Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
+      first = _mm512_mask_blend_pd(mask, first, tmp);
+    }
+  }
+};
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_AVX512_H
diff --git a/third-party/Eigen/src/Core/arch/AltiVec/Complex.h b/third-party/Eigen/src/Core/arch/AltiVec/Complex.h
new file mode 100644
index 00000000..3e665730
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -0,0 +1,430 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010-2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+static Packet4ui  p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+#ifdef __VSX__
+#if defined(_BIG_ENDIAN)
+static Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#else
+static Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+#endif
+#endif
+
+//---------- float ----------
+struct Packet2cf
+{
+  EIGEN_STRONG_INLINE explicit Packet2cf() : v(p4f_ZERO) {}
+  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+  Packet4f  v;
+};
+
+template<> struct packet_traits<std::complex<float> >  : default_packet_traits
+{
+  typedef Packet2cf type;
+  typedef Packet2cf half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+#ifdef __VSX__
+    HasBlend  = 1,
+#endif
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
+{
+  Packet2cf res;
+  if((std::ptrdiff_t(&from) % 16) == 0)
+    res.v = pload<Packet4f>((const float *)&from);
+  else
+    res.v = ploadu<Packet4f>((const float *)&from);
+  res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>*        from) { return Packet2cf(pload<Packet4f>((const float *) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>*       from) { return Packet2cf(ploadu<Packet4f>((const float*) from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>*     from) { return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+  std::complex<float> EIGEN_ALIGN16 af[2];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+  return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+  std::complex<float> EIGEN_ALIGN16 af[2];
+  pstore<std::complex<float> >((std::complex<float> *) af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(pxor<Packet4f>(a.v, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR))); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  Packet4f v1, v2;
+
+  // Permute and multiply the real parts of a and b
+  v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
+  // Get the imaginary parts of a
+  v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
+  // multiply a_re * b 
+  v1 = vec_madd(v1, b.v, p4f_ZERO);
+  // multiply a_im * b and get the conjugate result
+  v2 = vec_madd(v2, b.v, p4f_ZERO);
+  v2 = reinterpret_cast<Packet4f>(pxor(v2, reinterpret_cast<Packet4f>(p4ui_CONJ_XOR)));
+  // permute back to a proper order
+  v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
+  
+  return Packet2cf(padd<Packet4f>(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v, b.v)); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr)    { EIGEN_PPC_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
+{
+  std::complex<float> EIGEN_ALIGN16 res[2];
+  pstore((float *)&res, a.v);
+
+  return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+  Packet4f rev_a;
+  rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
+  return Packet2cf(rev_a);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+  Packet4f b;
+  b = vec_sld(a.v, a.v, 8);
+  b = padd<Packet4f>(a.v, b);
+  return pfirst<Packet2cf>(Packet2cf(b));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+  Packet4f b1, b2;
+#ifdef _BIG_ENDIAN  
+  b1 = vec_sld(vecs[0].v, vecs[1].v, 8);
+  b2 = vec_sld(vecs[1].v, vecs[0].v, 8);
+#else
+  b1 = vec_sld(vecs[1].v, vecs[0].v, 8);
+  b2 = vec_sld(vecs[0].v, vecs[1].v, 8);
+#endif
+  b2 = vec_sld(b2, b2, 8);
+  b2 = padd<Packet4f>(b1, b2);
+
+  return Packet2cf(b2);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+  Packet4f b;
+  Packet2cf prod;
+  b = vec_sld(a.v, a.v, 8);
+  prod = pmul<Packet2cf>(a, Packet2cf(b));
+
+  return pfirst<Packet2cf>(prod);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+  {
+    if (Offset==1)
+    {
+#ifdef _BIG_ENDIAN
+      first.v = vec_sld(first.v, second.v, 8);
+#else
+      first.v = vec_sld(second.v, first.v, 8);
+#endif
+    }
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  // TODO optimize it for AltiVec
+  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a, b);
+  Packet4f s = pmul<Packet4f>(b.v, b.v);
+  return Packet2cf(pdiv(res.v, padd<Packet4f>(s, vec_perm(s, s, p16uc_COMPLEX32_REV))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
+{
+  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+  Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+  kernel.packet[0].v = tmp;
+}
+
+#ifdef __VSX__
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+  Packet2cf result;
+  result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
+  return result;
+}
+#endif
+
+//---------- double ----------
+#ifdef __VSX__
+struct Packet1cd
+{
+  EIGEN_STRONG_INLINE Packet1cd() {}
+  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+  Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> >  : default_packet_traits
+{
+  typedef Packet1cd type;
+  typedef Packet1cd half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 0,
+    size = 1,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+  std::complex<double> EIGEN_ALIGN16 af[2];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+  return pload<Packet1cd>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+  std::complex<double> EIGEN_ALIGN16 af[2];
+  pstore<std::complex<double> >(af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR2))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  Packet2d a_re, a_im, v1, v2;
+
+  // Permute and multiply the real parts of a and b
+  a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+  // Get the imaginary parts of a
+  a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+  // multiply a_re * b
+  v1 = vec_madd(a_re, b.v, p2d_ZERO);
+  // multiply a_im * b and get the conjugate result
+  v2 = vec_madd(a_im, b.v, p2d_ZERO);
+  v2 = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v2), reinterpret_cast<Packet4ui>(v2), 8));
+  v2 = pxor(v2, reinterpret_cast<Packet2d>(p2ul_CONJ_XOR1));
+
+  return Packet1cd(padd<Packet2d>(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pand(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(por(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pxor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(pandnot(a.v, b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>*     from)  { return pset1<Packet1cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr)    { EIGEN_PPC_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
+{
+  std::complex<double> EIGEN_ALIGN16 res[2];
+  pstore<std::complex<double> >(res, a);
+
+  return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)        { return vecs[0]; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+  {
+    // FIXME is it sure we never have to align a Packet1cd?
+    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  // TODO optimize it for AltiVec
+  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+  Packet2d s = pmul<Packet2d>(b.v, b.v);
+  return Packet1cd(pdiv(res.v, padd<Packet2d>(s, vec_perm(s, s, p16uc_REVERSE64))));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+  return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+  kernel.packet[0].v = tmp;
+}
+#endif // __VSX__
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX32_ALTIVEC_H
diff --git a/third-party/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/third-party/Eigen/src/Core/arch/AltiVec/MathFunctions.h
new file mode 100644
index 00000000..c5e4bede
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -0,0 +1,322 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
+
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+/* the smallest non denormalized float number */
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
+static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
+  
+/* natural logarithm computed for 4 simultaneous float
+  return NaN for x <= 0
+*/
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+
+static _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
+static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+#ifdef __VSX__
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+
+#ifdef __POWER8_VECTOR__
+static Packet2l p2l_1023 = { 1023, 1023 };
+static Packet2ul p2ul_52 = { 52, 52 };
+#endif
+
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+
+  Packet4i emm0;
+
+  /* isvalid_mask is 0 if x < 0 or x is NaN. */
+  Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
+  Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
+
+  x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
+  emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
+                reinterpret_cast<Packet4ui>(p4i_23));
+
+  /* keep only the fractional part */
+  x = pand(x, p4f_inv_mant_mask);
+  x = por(x, p4f_half);
+
+  emm0 = psub(emm0, p4i_0x7f);
+  Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
+
+  /* part2:
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
+  Packet4f tmp = pand(x, mask);
+  x = psub(x, p4f_1);
+  e = psub(e, pand(p4f_1, mask));
+  x = padd(x, tmp);
+
+  Packet4f x2 = pmul(x,x);
+  Packet4f x3 = pmul(x2,x);
+
+  Packet4f y, y1, y2;
+  y  = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+  y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+  y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+  y  = pmadd(y , x, p4f_cephes_log_p2);
+  y1 = pmadd(y1, x, p4f_cephes_log_p5);
+  y2 = pmadd(y2, x, p4f_cephes_log_p8);
+  y = pmadd(y, x3, y1);
+  y = pmadd(y, x3, y2);
+  y = pmul(y, x3);
+
+  y1 = pmul(e, p4f_cephes_log_q1);
+  tmp = pmul(x2, p4f_half);
+  y = padd(y, y1);
+  x = psub(x, tmp);
+  y2 = pmul(e, p4f_cephes_log_q2);
+  x = padd(x, y);
+  x = padd(x, y2);
+  // negative arg will be NAN, 0 will be -INF
+  x = vec_sel(x, p4f_minus_inf, iszero_mask);
+  x = vec_sel(p4f_minus_nan, x, isvalid_mask);
+  return x;
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+
+  Packet4f tmp, fx;
+  Packet4i emm0;
+
+  // clamp x
+  x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+
+  // express exp(x) as exp(g + n*log(2))
+  fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+
+  fx = pfloor(fx);
+
+  tmp = pmul(fx, p4f_cephes_exp_C1);
+  Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  z = pmul(x,x);
+
+  Packet4f y = p4f_cephes_exp_p0;
+  y = pmadd(y, x, p4f_cephes_exp_p1);
+  y = pmadd(y, x, p4f_cephes_exp_p2);
+  y = pmadd(y, x, p4f_cephes_exp_p3);
+  y = pmadd(y, x, p4f_cephes_exp_p4);
+  y = pmadd(y, x, p4f_cephes_exp_p5);
+  y = pmadd(y, z, x);
+  y = padd(y, p4f_1);
+
+  // build 2^n
+  emm0 = vec_cts(fx, 0);
+  emm0 = vec_add(emm0, p4i_0x7f);
+  emm0 = vec_sl(emm0, reinterpret_cast<Packet4ui>(p4i_23));
+
+  // Altivec's max & min operators just drop silent NaNs. Check NaNs in 
+  // inputs and return them unmodified.
+  Packet4ui isnumber_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(_x, _x));
+  return vec_sel(_x, pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x),
+                 isnumber_mask);
+}
+
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x)
+{
+  return  vec_rsqrt(x);
+}
+#endif
+
+#ifdef __VSX__
+#ifndef EIGEN_COMP_CLANG
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x)
+{
+  return  vec_rsqrt(x);
+}
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+  return  vec_sqrt(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+  return  vec_sqrt(x);
+}
+
+// VSX support varies between different compilers and even different
+// versions of the same compiler.  For gcc version >= 4.9.3, we can use
+// vec_cts to efficiently convert Packet2d to Packet2l.  Otherwise, use
+// a slow version that works with older compilers. 
+// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
+// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
+static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
+#if EIGEN_GNUC_AT_LEAST(5, 4) || \
+    (EIGEN_GNUC_AT(6, 1) && __GNUC_PATCHLEVEL__ >= 1)
+  return vec_cts(x, 0);    // TODO: check clang version.
+#else
+  double tmp[2];
+  memcpy(tmp, &x, sizeof(tmp));
+  Packet2l l = { static_cast<long long>(tmp[0]),
+                 static_cast<long long>(tmp[1]) };
+  return l;
+#endif
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+  Packet2d x = _x;
+
+  Packet2d tmp, fx;
+  Packet2l emm0;
+
+  // clamp x
+  x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = pmadd(x, p2d_cephes_LOG2EF, p2d_half);
+
+  fx = pfloor(fx);
+
+  tmp = pmul(fx, p2d_cephes_exp_C1);
+  Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  Packet2d x2 = pmul(x,x);
+
+  Packet2d px = p2d_cephes_exp_p0;
+  px = pmadd(px, x2, p2d_cephes_exp_p1);
+  px = pmadd(px, x2, p2d_cephes_exp_p2);
+  px = pmul (px, x);
+
+  Packet2d qx = p2d_cephes_exp_q0;
+  qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+  x = pdiv(px,psub(qx,px));
+  x = pmadd(p2d_2,x,p2d_1);
+
+  // build 2^n
+  emm0 = ConvertToPacket2l(fx);
+
+#ifdef __POWER8_VECTOR__ 
+  emm0 = vec_add(emm0, p2l_1023);
+  emm0 = vec_sl(emm0, p2ul_52);
+#else
+  // Code is a bit complex for POWER7.  There is actually a
+  // vec_xxsldi intrinsic but it is not supported by some gcc versions.
+  // So we shift (52-32) bits and do a word swap with zeros.
+  _EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
+  _EIGEN_DECLARE_CONST_Packet4i(20, 20);    // 52 - 32
+
+  Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
+  emm04i = vec_add(emm04i, p4i_1023);
+  emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
+  static const Packet16uc perm = {
+    0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03, 
+    0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
+#ifdef  _BIG_ENDIAN
+  emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
+#else
+  emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
+#endif
+
+#endif
+
+  // Altivec's max & min operators just drop silent NaNs. Check NaNs in 
+  // inputs and return them unmodified.
+  Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+  return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+                 isnumber_mask);
+}
+#endif
+
+}  // end namespace internal
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
diff --git a/third-party/Eigen/src/Core/arch/AltiVec/PacketMath.h b/third-party/Eigen/src/Core/arch/AltiVec/PacketMath.h
new file mode 100644
index 00000000..08a27d15
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -0,0 +1,1061 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_ALTIVEC_H
+#define EIGEN_PACKET_MATH_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS  32
+#endif
+
+typedef __vector float          Packet4f;
+typedef __vector int            Packet4i;
+typedef __vector unsigned int   Packet4ui;
+typedef __vector __bool int     Packet4bi;
+typedef __vector short int      Packet8i;
+typedef __vector unsigned char  Packet16uc;
+
+// We don't want to write the same code all the time, but we need to reuse the constants
+// and it doesn't really work to declare them global, so we define macros instead
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
+  Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+  Packet4i p4i_##NAME = vec_splat_s32(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+  Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+  Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+  Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+  Packet2l p2l_##NAME = pset1<Packet2l>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+  const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
+
+#define DST_CHAN 1
+#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
+
+
+// These constants are endian-agnostic
+static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
+static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000}
+#ifndef __VSX__
+static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0}
+#endif
+
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+
+// Mask alignment
+#ifdef __PPC64__
+#define _EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
+#else
+#define _EIGEN_MASK_ALIGNMENT	0xfffffff0
+#endif
+
+#define _EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+
+// Handle endianness properly while loading constants
+// Define global static constants:
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
+#ifdef __VSX__
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif
+static Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#else
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+static Packet16uc p16uc_HALF64_0_16 = vec_sld(vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 0), (Packet16uc)p4i_ZERO, 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+#endif // _BIG_ENDIAN
+
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_TRANSPOSE64_HI = p16uc_PSET64_HI + p16uc_HALF64_0_16;                                         //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = p16uc_PSET64_LO + p16uc_HALF64_0_16;                                         //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+
+static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);                                         //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
+
+#ifdef _BIG_ENDIAN
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#else
+static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_LO, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+#endif // _BIG_ENDIAN
+
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+  #define EIGEN_PPC_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#else
+  #define EIGEN_PPC_PREFETCH(ADDR) asm( "   dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#endif
+
+template<> struct packet_traits<float>  : default_packet_traits
+{
+  typedef Packet4f type;
+  typedef Packet4f half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket = 1,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 1,
+    HasMin  = 1,
+    HasMax  = 1,
+    HasAbs  = 1,
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 1,
+#ifdef __VSX__
+    HasSqrt = 1,
+#if !EIGEN_COMP_CLANG
+    HasRsqrt = 1,
+#else
+    HasRsqrt = 0,
+#endif
+#else
+    HasSqrt = 0,
+    HasRsqrt = 0,
+#endif
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasNegate = 1,
+    HasBlend = 1
+  };
+};
+template<> struct packet_traits<int>    : default_packet_traits
+{
+  typedef Packet4i type;
+  typedef Packet4i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 4,
+    HasHalfPacket = 0,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 0,
+    HasBlend = 1
+  };
+};
+
+
+template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+
+inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
+{
+  union {
+    Packet16uc   v;
+    unsigned char n[16];
+  } vt;
+  vt.v = v;
+  for (int i=0; i< 16; i++)
+    s << (int)vt.n[i] << ", ";
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
+{
+  union {
+    Packet4f   v;
+    float n[4];
+  } vt;
+  vt.v = v;
+  s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+  union {
+    Packet4i   v;
+    int n[4];
+  } vt;
+  vt.v = v;
+  s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+  union {
+    Packet4ui   v;
+    unsigned int n[4];
+  } vt;
+  vt.v = v;
+  s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3];
+  return s;
+}
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+  return vec_vsx_ld(0, from);
+#else
+  return vec_ld(0, from);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+  return vec_vsx_ld(0, from);
+#else
+  return vec_ld(0, from);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+  vec_vsx_st(from, 0, to);
+#else
+  vec_st(from, 0, to);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+  vec_vsx_st(from, 0, to);
+#else
+  vec_st(from, 0, to);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) {
+  Packet4f v = {from, from, from, from};
+  return v;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)   {
+  Packet4i v = {from, from, from, from};
+  return v;
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+  a3 = pload<Packet4f>(a);
+  a0 = vec_splat(a3, 0);
+  a1 = vec_splat(a3, 1);
+  a2 = vec_splat(a3, 2);
+  a3 = vec_splat(a3, 3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+                      Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+  a3 = pload<Packet4i>(a);
+  a0 = vec_splat(a3, 0);
+  a1 = vec_splat(a3, 1);
+  a2 = vec_splat(a3, 2);
+  a3 = vec_splat(a3, 3);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+  float EIGEN_ALIGN16 af[4];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+  af[2] = from[2*stride];
+  af[3] = from[3*stride];
+ return pload<Packet4f>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+  int EIGEN_ALIGN16 ai[4];
+  ai[0] = from[0*stride];
+  ai[1] = from[1*stride];
+  ai[2] = from[2*stride];
+  ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+  float EIGEN_ALIGN16 af[4];
+  pstore<float>(af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+  to[2*stride] = af[2];
+  to[3*stride] = af[3];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+  int EIGEN_ALIGN16 ai[4];
+  pstore<int>((int *)ai, from);
+  to[0*stride] = ai[0];
+  to[1*stride] = ai[1];
+  to[2*stride] = ai[2];
+  to[3*stride] = ai[3];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return pset1<Packet4f>(a) + p4f_COUNTDOWN; }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)   { return pset1<Packet4i>(a) + p4i_COUNTDOWN; }
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return a + b; }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return a + b; }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return a - b; }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return a - b; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#ifndef __VSX__  // VSX actually provides a div instruction
+  Packet4f t, y_0, y_1;
+
+  // Altivec does not offer a divide instruction, we have to do a reciprocal approximation
+  y_0 = vec_re(b);
+
+  // Do one Newton-Raphson iteration to get the needed accuracy
+  t   = vec_nmsub(y_0, b, p4f_ONE);
+  y_1 = vec_madd(y_0, t, y_0);
+
+  return vec_madd(a, y_1, p4f_MZERO);
+#else
+  return vec_div(a, b);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by AltiVec");
+  return pset1<Packet4i>(0);
+}
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  #ifdef __VSX__
+  Packet4f ret;
+  __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+  #else
+  return vec_min(a, b);
+  #endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  #ifdef __VSX__
+  Packet4f ret;
+  __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+  #else
+  return vec_max(a, b);
+  #endif
+}
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const  Packet4f& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
+
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+  Packet16uc MSQ, LSQ;
+  Packet16uc mask;
+  MSQ = vec_ld(0, (unsigned char *)from);          // most significant quadword
+  LSQ = vec_ld(15, (unsigned char *)from);         // least significant quadword
+  mask = vec_lvsl(0, from);                        // create the permute mask
+  return (Packet4f) vec_perm(MSQ, LSQ, mask);           // align the data
+
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+  // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+  Packet16uc MSQ, LSQ;
+  Packet16uc mask;
+  MSQ = vec_ld(0, (unsigned char *)from);          // most significant quadword
+  LSQ = vec_ld(15, (unsigned char *)from);         // least significant quadword
+  mask = vec_lvsl(0, from);                        // create the permute mask
+  return (Packet4i) vec_perm(MSQ, LSQ, mask);    // align the data
+}
+#else
+// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return (Packet4i) vec_vsx_ld((long)from & 15, (const int*) _EIGEN_ALIGNED_PTR(from));
+}
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return (Packet4f) vec_vsx_ld((long)from & 15, (const float*) _EIGEN_ALIGNED_PTR(from));
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
+{
+  Packet4f p;
+  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet4f>(from);
+  else                                  p = ploadu<Packet4f>(from);
+  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)
+{
+  Packet4i p;
+  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet4i>(from);
+  else                                  p = ploadu<Packet4i>(from);
+  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+
+#ifdef _BIG_ENDIAN
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const Packet4f& from)
+{
+  EIGEN_DEBUG_UNALIGNED_STORE
+  // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+  // Warning: not thread safe!
+  Packet16uc MSQ, LSQ, edges;
+  Packet16uc edgeAlign, align;
+
+  MSQ = vec_ld(0, (unsigned char *)to);                     // most significant quadword
+  LSQ = vec_ld(15, (unsigned char *)to);                    // least significant quadword
+  edgeAlign = vec_lvsl(0, to);                              // permute map to extract edges
+  edges=vec_perm(LSQ,MSQ,edgeAlign);                        // extract the edges
+  align = vec_lvsr( 0, to );                                // permute map to misalign data
+  MSQ = vec_perm(edges,(Packet16uc)from,align);             // misalign the data (MSQ)
+  LSQ = vec_perm((Packet16uc)from,edges,align);             // misalign the data (LSQ)
+  vec_st( LSQ, 15, (unsigned char *)to );                   // Store the LSQ part first
+  vec_st( MSQ, 0, (unsigned char *)to );                    // Store the MSQ part
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*      to, const Packet4i& from)
+{
+  EIGEN_DEBUG_UNALIGNED_STORE
+  // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
+  // Warning: not thread safe!
+  Packet16uc MSQ, LSQ, edges;
+  Packet16uc edgeAlign, align;
+
+  MSQ = vec_ld(0, (unsigned char *)to);                     // most significant quadword
+  LSQ = vec_ld(15, (unsigned char *)to);                    // least significant quadword
+  edgeAlign = vec_lvsl(0, to);                              // permute map to extract edges
+  edges=vec_perm(LSQ, MSQ, edgeAlign);                      // extract the edges
+  align = vec_lvsr( 0, to );                                // permute map to misalign data
+  MSQ = vec_perm(edges, (Packet16uc) from, align);          // misalign the data (MSQ)
+  LSQ = vec_perm((Packet16uc) from, edges, align);          // misalign the data (LSQ)
+  vec_st( LSQ, 15, (unsigned char *)to );                   // Store the LSQ part first
+  vec_st( MSQ, 0, (unsigned char *)to );                    // Store the MSQ part
+}
+#else
+// We also need ot redefine little endian loading of Packet4i/Packet4f using VSX
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet4i& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+  vec_vsx_st(from, (long)to & 15, (int*) _EIGEN_ALIGNED_PTR(to));
+}
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet4f& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+  vec_vsx_st(from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr)    { EIGEN_PPC_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*     addr)    { EIGEN_PPC_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int   EIGEN_ALIGN16 x; vec_ste(a, 0, &x); return x; }
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+  return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+  return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32)); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+  Packet4f b, sum;
+  b   = vec_sld(a, a, 8);
+  sum = a + b;
+  b   = vec_sld(sum, sum, 4);
+  sum += b;
+  return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+  Packet4f v[4], sum[4];
+
+  // It's easier and faster to transpose then add as columns
+  // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+  // Do the transpose, first set of moves
+  v[0] = vec_mergeh(vecs[0], vecs[2]);
+  v[1] = vec_mergel(vecs[0], vecs[2]);
+  v[2] = vec_mergeh(vecs[1], vecs[3]);
+  v[3] = vec_mergel(vecs[1], vecs[3]);
+  // Get the resulting vectors
+  sum[0] = vec_mergeh(v[0], v[2]);
+  sum[1] = vec_mergel(v[0], v[2]);
+  sum[2] = vec_mergeh(v[1], v[3]);
+  sum[3] = vec_mergel(v[1], v[3]);
+
+  // Now do the summation:
+  // Lines 0+1
+  sum[0] = sum[0] + sum[1];
+  // Lines 2+3
+  sum[1] = sum[2] + sum[3];
+  // Add the results
+  sum[0] = sum[0] + sum[1];
+
+  return sum[0];
+}
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+  Packet4i sum;
+  sum = vec_sums(a, p4i_ZERO);
+#ifdef _BIG_ENDIAN
+  sum = vec_sld(sum, p4i_ZERO, 12);
+#else
+  sum = vec_sld(p4i_ZERO, sum, 4);
+#endif
+  return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  Packet4i v[4], sum[4];
+
+  // It's easier and faster to transpose then add as columns
+  // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+  // Do the transpose, first set of moves
+  v[0] = vec_mergeh(vecs[0], vecs[2]);
+  v[1] = vec_mergel(vecs[0], vecs[2]);
+  v[2] = vec_mergeh(vecs[1], vecs[3]);
+  v[3] = vec_mergel(vecs[1], vecs[3]);
+  // Get the resulting vectors
+  sum[0] = vec_mergeh(v[0], v[2]);
+  sum[1] = vec_mergel(v[0], v[2]);
+  sum[2] = vec_mergeh(v[1], v[3]);
+  sum[3] = vec_mergel(v[1], v[3]);
+
+  // Now do the summation:
+  // Lines 0+1
+  sum[0] = sum[0] + sum[1];
+  // Lines 2+3
+  sum[1] = sum[2] + sum[3];
+  // Add the results
+  sum[0] = sum[0] + sum[1];
+
+  return sum[0];
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+  Packet4f prod;
+  prod = pmul(a, vec_sld(a, a, 8));
+  return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
+}
+
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+  EIGEN_ALIGN16 int aux[4];
+  pstore(aux, a);
+  return aux[0] * aux[1] * aux[2] * aux[3];
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+  Packet4f b, res;
+  b = vec_min(a, vec_sld(a, a, 8));
+  res = vec_min(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+  Packet4i b, res;
+  b = vec_min(a, vec_sld(a, a, 8));
+  res = vec_min(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+  Packet4f b, res;
+  b = vec_max(a, vec_sld(a, a, 8));
+  res = vec_max(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+  Packet4i b, res;
+  b = vec_max(a, vec_sld(a, a, 8));
+  res = vec_max(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  {
+#ifdef _BIG_ENDIAN
+    switch (Offset % 4) {
+    case 1:
+      first = vec_sld(first, second, 4); break;
+    case 2:
+      first = vec_sld(first, second, 8); break;
+    case 3:
+      first = vec_sld(first, second, 12); break;
+    }
+#else
+    switch (Offset % 4) {
+    case 1:
+      first = vec_sld(second, first, 12); break;
+    case 2:
+      first = vec_sld(second, first, 8); break;
+    case 3:
+      first = vec_sld(second, first, 4); break;
+    }
+#endif
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  {
+#ifdef _BIG_ENDIAN
+    switch (Offset % 4) {
+    case 1:
+      first = vec_sld(first, second, 4); break;
+    case 2:
+      first = vec_sld(first, second, 8); break;
+    case 3:
+      first = vec_sld(first, second, 12); break;
+    }
+#else
+    switch (Offset % 4) {
+    case 1:
+      first = vec_sld(second, first, 12); break;
+    case 2:
+      first = vec_sld(second, first, 8); break;
+    case 3:
+      first = vec_sld(second, first, 4); break;
+    }
+#endif
+  }
+};
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+  Packet4f t0, t1, t2, t3;
+  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+  kernel.packet[0] = vec_mergeh(t0, t2);
+  kernel.packet[1] = vec_mergel(t0, t2);
+  kernel.packet[2] = vec_mergeh(t1, t3);
+  kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+  Packet4i t0, t1, t2, t3;
+  t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+  t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+  t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+  t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+  kernel.packet[0] = vec_mergeh(t0, t2);
+  kernel.packet[1] = vec_mergel(t0, t2);
+  kernel.packet[2] = vec_mergeh(t1, t3);
+  kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+  Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+  return vec_sel(elsePacket, thenPacket, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+  Packet4ui mask = reinterpret_cast<Packet4ui>(vec_cmpeq(reinterpret_cast<Packet4ui>(select), reinterpret_cast<Packet4ui>(p4i_ONE)));
+  return vec_sel(elsePacket, thenPacket, mask);
+}
+
+
+//---------- double ----------
+#ifdef __VSX__
+typedef __vector double              Packet2d;
+typedef __vector unsigned long long  Packet2ul;
+typedef __vector long long           Packet2l;
+#if EIGEN_COMP_CLANG
+typedef Packet2ul                    Packet2bl;
+#else
+typedef __vector __bool long         Packet2bl;
+#endif
+
+static Packet2l  p2l_ONE  = { 1, 1 };
+static Packet2l  p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
+static Packet2d  p2d_ONE  = { 1.0, 1.0 };
+static Packet2d  p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
+static Packet2d  p2d_MZERO = { -0.0, -0.0 };
+
+#ifdef _BIG_ENDIAN
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8));
+#else
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ONE), reinterpret_cast<Packet4f>(p2d_ZERO), 8));
+#endif
+
+template<int index> Packet2d vec_splat_dbl(Packet2d& a);
+
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<0>(Packet2d& a)
+{
+  return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_HI));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d vec_splat_dbl<1>(Packet2d& a)
+{
+  return reinterpret_cast<Packet2d>(vec_perm(a, a, p16uc_PSET64_LO));
+}
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef Packet2d type;
+  typedef Packet2d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=2,
+    HasHalfPacket = 1,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 1,
+    HasMin  = 1,
+    HasMax  = 1,
+    HasAbs  = 1,
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasNegate = 1,
+    HasBlend = 1
+  };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+  union {
+    Packet2l   v;
+    int64_t n[2];
+  } vt;
+  vt.v = v;
+  s << vt.n[0] << ", " << vt.n[1];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+  union {
+    Packet2d   v;
+    double n[2];
+  } vt;
+  vt.v = v;
+  s << vt.n[0] << ", " << vt.n[1];
+  return s;
+}
+
+// Need to define them first or we get specialization after instantiation errors
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+#ifdef __VSX__
+  return vec_vsx_ld(0, from);
+#else
+  return vec_ld(0, from);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+#ifdef __VSX__
+  vec_vsx_st(from, 0, to);
+#else
+  vec_st(from, 0, to);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) {
+  Packet2d v = {from, from};
+  return v;
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+  a1 = pload<Packet2d>(a);
+  a0 = vec_splat_dbl<0>(a1);
+  a1 = vec_splat_dbl<1>(a1);
+  a3 = pload<Packet2d>(a+2);
+  a2 = vec_splat_dbl<0>(a3);
+  a3 = vec_splat_dbl<1>(a3);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+  double EIGEN_ALIGN16 af[2];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+  double EIGEN_ALIGN16 af[2];
+  pstore<double>(af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return pset1<Packet2d>(a) + p2d_COUNTDOWN; }
+
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return a + b; }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); }
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  Packet2d ret;
+  __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+ }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  Packet2d ret;
+  __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+  return ret;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const  Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+  EIGEN_DEBUG_ALIGNED_LOAD
+  return (Packet2d) vec_vsx_ld((long)from & 15, (const double*) _EIGEN_ALIGNED_PTR(from));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)
+{
+  Packet2d p;
+  if((std::ptrdiff_t(from) % 16) == 0)  p = pload<Packet2d>(from);
+  else                                  p = ploadu<Packet2d>(from);
+  return vec_splat_dbl<0>(p);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d& from)
+{
+  EIGEN_DEBUG_ALIGNED_STORE
+  vec_vsx_st((Packet4f)from, (long)to & 15, (float*) _EIGEN_ALIGNED_PTR(to));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE double  pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore<double>(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+  return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vec_abs(a); }
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+  Packet2d b, sum;
+  b   = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(a), reinterpret_cast<Packet4f>(a), 8));
+  sum = a + b;
+  return pfirst<Packet2d>(sum);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+  Packet2d v[2], sum;
+  v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
+  v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
+
+#ifdef _BIG_ENDIAN
+  sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
+#else
+  sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[1]), reinterpret_cast<Packet4f>(v[0]), 8));
+#endif
+
+  return sum;
+}
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmin(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  {
+    if (Offset == 1)
+#ifdef _BIG_ENDIAN
+      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
+#else
+      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
+#endif
+  }
+};
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+  Packet2d t0, t1;
+  t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+  t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+  kernel.packet[0] = t0;
+  kernel.packet[1] = t1;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+  Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
+  Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
+  return vec_sel(elsePacket, thenPacket, mask);
+}
+#endif // __VSX__
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_ALTIVEC_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/Complex.h b/third-party/Eigen/src/Core/arch/CUDA/Complex.h
new file mode 100644
index 00000000..526e59fd
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/Complex.h
@@ -0,0 +1,103 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_CUDA_H
+#define EIGEN_COMPLEX_CUDA_H
+
+// clang-format off
+
+namespace Eigen {
+
+namespace internal {
+
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+
+// Many std::complex methods such as operator+, operator-, operator* and
+// operator/ are not constexpr. Due to this, clang does not treat them as device
+// functions and thus Eigen functors making use of these operators fail to
+// compile. Here, we manually specialize these functors for complex types when
+// building for CUDA to avoid non-constexpr methods.
+
+// Sum
+template<typename T> struct scalar_sum_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+  typedef typename std::complex<T> result_type;
+
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+    return std::complex<T>(numext::real(a) + numext::real(b),
+                           numext::imag(a) + numext::imag(b));
+  }
+};
+
+template<typename T> struct scalar_sum_op<std::complex<T>, std::complex<T> > : scalar_sum_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Difference
+template<typename T> struct scalar_difference_op<const std::complex<T>, const std::complex<T> >  : binary_op_base<const std::complex<T>, const std::complex<T> > {
+  typedef typename std::complex<T> result_type;
+
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+    return std::complex<T>(numext::real(a) - numext::real(b),
+                           numext::imag(a) - numext::imag(b));
+  }
+};
+
+template<typename T> struct scalar_difference_op<std::complex<T>, std::complex<T> > : scalar_difference_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Product
+template<typename T> struct scalar_product_op<const std::complex<T>, const std::complex<T> >  : binary_op_base<const std::complex<T>, const std::complex<T> > {
+  enum {
+    Vectorizable = packet_traits<std::complex<T>>::HasMul
+  };
+  typedef typename std::complex<T> result_type;
+
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+    const T a_real = numext::real(a);
+    const T a_imag = numext::imag(a);
+    const T b_real = numext::real(b);
+    const T b_imag = numext::imag(b);
+    return std::complex<T>(a_real * b_real - a_imag * b_imag,
+                           a_real * b_imag + a_imag * b_real);
+  }
+};
+
+template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T> > : scalar_product_op<const std::complex<T>, const std::complex<T> > {};
+
+
+// Quotient
+template<typename T> struct scalar_quotient_op<const std::complex<T>, const std::complex<T> > : binary_op_base<const std::complex<T>, const std::complex<T> > {
+  enum {
+    Vectorizable = packet_traits<std::complex<T>>::HasDiv
+  };
+  typedef typename std::complex<T> result_type;
+
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
+    const T a_real = numext::real(a);
+    const T a_imag = numext::imag(a);
+    const T b_real = numext::real(b);
+    const T b_imag = numext::imag(b);
+    const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
+    return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
+                           (a_imag * b_real - a_real * b_imag) * norm);
+  }
+};
+
+template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T> > : scalar_quotient_op<const std::complex<T>, const std::complex<T> > {};
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/Half.h b/third-party/Eigen/src/Core/arch/CUDA/Half.h
new file mode 100644
index 00000000..59717b4f
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/Half.h
@@ -0,0 +1,675 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+//
+// The conversion routines are Copyright (c) Fabian Giesen, 2016.
+// The original license follows:
+//
+// Copyright (c) Fabian Giesen, 2016
+// All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted.
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Standard 16-bit float type, mostly useful for GPUs. Defines a new
+// type Eigen::half (inheriting from CUDA's __half struct) with
+// operator overloads such that it behaves basically as an arithmetic
+// type. It will be quite slow on CPUs (so it is recommended to stay
+// in float32_bits for CPUs, except for simple parameter conversions, I/O
+// to disk and the likes), but fast on GPUs.
+
+
+#ifndef EIGEN_HALF_CUDA_H
+#define EIGEN_HALF_CUDA_H
+
+#if __cplusplus > 199711L
+#define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type()
+#else
+#define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type()
+#endif
+
+#include <sstream>
+
+namespace Eigen {
+
+struct half;
+
+namespace half_impl {
+
+#if !defined(EIGEN_HAS_CUDA_FP16)
+// Make our own __half_raw definition that is similar to CUDA's.
+struct __half_raw {
+  EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
+  explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
+  unsigned short x;
+};
+#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
+// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
+typedef __half __half_raw;
+#endif
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
+
+struct half_base : public __half_raw {
+  EIGEN_DEVICE_FUNC half_base() {}
+  EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
+  EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+  EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
+#endif
+};
+
+} // namespace half_impl
+
+// Class definition.
+struct half : public half_impl::half_base {
+  #if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
+    typedef half_impl::__half_raw __half_raw;
+  #endif
+
+  EIGEN_DEVICE_FUNC half() {}
+
+  EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
+  EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+  EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
+#endif
+
+  explicit EIGEN_DEVICE_FUNC half(bool b)
+      : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
+  template<class T>
+  explicit EIGEN_DEVICE_FUNC half(const T& val)
+      : half_impl::half_base(half_impl::float_to_half_rtne(static_cast<float>(val))) {}
+  explicit EIGEN_DEVICE_FUNC half(float f)
+      : half_impl::half_base(half_impl::float_to_half_rtne(f)) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const {
+    // +0.0 and -0.0 become false, everything else becomes true.
+    return (x & 0x7fff) != 0;
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const {
+    return static_cast<signed char>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const {
+    return static_cast<unsigned char>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const {
+    return static_cast<short>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const {
+    return static_cast<unsigned short>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const {
+    return static_cast<int>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const {
+    return static_cast<unsigned int>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const {
+    return static_cast<long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const {
+    return static_cast<unsigned long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const {
+    return static_cast<long long>(half_impl::half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
+    return static_cast<unsigned long long>(half_to_float(*this));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
+    return half_impl::half_to_float(*this);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
+    return static_cast<double>(half_impl::half_to_float(*this));
+  }
+
+  EIGEN_DEVICE_FUNC half& operator=(const half& other) {
+    x = other.x;
+    return *this;
+  }
+};
+
+} // end namespace Eigen
+
+namespace std {
+template<>
+struct numeric_limits<Eigen::half> {
+  static const bool is_specialized = true;
+  static const bool is_signed = true;
+  static const bool is_integer = false;
+  static const bool is_exact = false;
+  static const bool has_infinity = true;
+  static const bool has_quiet_NaN = true;
+  static const bool has_signaling_NaN = true;
+  static const float_denorm_style has_denorm = denorm_present;
+  static const bool has_denorm_loss = false;
+  static const std::float_round_style round_style = std::round_to_nearest;
+  static const bool is_iec559 = false;
+  static const bool is_bounded = false;
+  static const bool is_modulo = false;
+  static const int digits = 11;
+  static const int digits10 = 3;      // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static const int max_digits10 = 5;  // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+  static const int radix = 2;
+  static const int min_exponent = -13;
+  static const int min_exponent10 = -4;
+  static const int max_exponent = 16;
+  static const int max_exponent10 = 4;
+  static const bool traps = true;
+  static const bool tinyness_before = false;
+
+  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
+  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
+  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
+  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
+  static Eigen::half round_error() { return Eigen::half(0.5); }
+  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
+  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+};
+
+// If std::numeric_limits<T> is specialized, should also specialize
+// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
+// std::numeric_limits<const volatile T>
+// https://stackoverflow.com/a/16519653/
+template<>
+struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
+template<>
+struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
+template<>
+struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
+} // end namespace std
+
+namespace Eigen {
+
+namespace half_impl {
+
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+
+// Intrinsics for native fp16 support. Note that on current hardware,
+// these are no faster than float32_bits arithmetic (you need to use the half2
+// versions to get the ALU speed increased), but you do save the
+// conversion steps back and forth.
+
+EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
+  return __hadd(a, b);
+}
+EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
+  return __hmul(a, b);
+}
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
+  return __hsub(a, b);
+}
+EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
+  float num = __half2float(a);
+  float denom = __half2float(b);
+  return __float2half(num / denom);
+}
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
+  return __hneg(a);
+}
+EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
+  a = a + b;
+  return a;
+}
+EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
+  a = a * b;
+  return a;
+}
+EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
+  a = a - b;
+  return a;
+}
+EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
+  a = a / b;
+  return a;
+}
+EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
+  return __heq(a, b);
+}
+EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
+  return __hne(a, b);
+}
+EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
+  return __hlt(a, b);
+}
+EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
+  return __hle(a, b);
+}
+EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
+  return __hgt(a, b);
+}
+EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
+  return __hge(a, b);
+}
+
+#else  // Emulate support for half floats
+
+// Definitions for CPUs and older CUDA, mostly working through conversion
+// to/from float32_bits.
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
+  return half(float(a) + float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator * (const half& a, const half& b) {
+  return half(float(a) * float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a, const half& b) {
+  return half(float(a) - float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, const half& b) {
+  return half(float(a) / float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (const half& a) {
+  half result;
+  result.x = a.x ^ 0x8000;
+  return result;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a, const half& b) {
+  a = half(float(a) + float(b));
+  return a;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a, const half& b) {
+  a = half(float(a) * float(b));
+  return a;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a, const half& b) {
+  a = half(float(a) - float(b));
+  return a;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) {
+  a = half(float(a) / float(b));
+  return a;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
+  return numext::equal_strict(float(a),float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
+  return numext::not_equal_strict(float(a), float(b));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
+  return float(a) < float(b);
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const half& a, const half& b) {
+  return float(a) <= float(b);
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
+  return float(a) > float(b);
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const half& a, const half& b) {
+  return float(a) >= float(b);
+}
+
+#endif  // Emulate support for half floats
+
+// Division by an index. Do it in full float precision to avoid accuracy
+// issues in converting the denominator to half.
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
+  return half(static_cast<float>(a) / static_cast<float>(b));
+}
+
+// Conversion routines, including fallbacks for the host or older CUDA.
+// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
+// these in hardware. If we need more performance on older/other CPUs, they are
+// also possible to vectorize directly.
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
+  __half_raw h;
+  h.x = x;
+  return h;
+}
+
+union float32_bits {
+  unsigned int u;
+  float f;
+};
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
+  __half tmp_ff = __float2half(ff);
+  return *(__half_raw*)&tmp_ff;
+
+#elif defined(EIGEN_HAS_FP16_C)
+  __half_raw h;
+  h.x = _cvtss_sh(ff, 0);
+  return h;
+
+#else
+  float32_bits f; f.f = ff;
+
+  const float32_bits f32infty = { 255 << 23 };
+  const float32_bits f16max = { (127 + 16) << 23 };
+  const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
+  unsigned int sign_mask = 0x80000000u;
+  __half_raw o;
+  o.x = static_cast<unsigned short>(0x0u);
+
+  unsigned int sign = f.u & sign_mask;
+  f.u ^= sign;
+
+  // NOTE all the integer compares in this function can be safely
+  // compiled into signed compares since all operands are below
+  // 0x80000000. Important if you want fast straight SSE2 code
+  // (since there's no unsigned PCMPGTD).
+
+  if (f.u >= f16max.u) {  // result is Inf or NaN (all exponent bits set)
+    o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+  } else {  // (De)normalized number or zero
+    if (f.u < (113 << 23)) {  // resulting FP16 is subnormal or zero
+      // use a magic value to align our 10 mantissa bits at the bottom of
+      // the float. as long as FP addition is round-to-nearest-even this
+      // just works.
+      f.f += denorm_magic.f;
+
+      // and one integer subtract of the bias later, we have our final float!
+      o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
+    } else {
+      unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+
+      // update exponent, rounding bias part 1
+      f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
+      // rounding bias part 2
+      f.u += mant_odd;
+      // take the bits!
+      o.x = static_cast<unsigned short>(f.u >> 13);
+    }
+  }
+
+  o.x |= static_cast<unsigned short>(sign >> 16);
+  return o;
+#endif
+}
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
+  return __half2float(h);
+
+#elif defined(EIGEN_HAS_FP16_C)
+  return _cvtsh_ss(h.x);
+
+#else
+  const float32_bits magic = { 113 << 23 };
+  const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
+  float32_bits o;
+
+  o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits
+  unsigned int exp = shifted_exp & o.u;   // just the exponent
+  o.u += (127 - 15) << 23;                // exponent adjust
+
+  // handle exponent special cases
+  if (exp == shifted_exp) {     // Inf/NaN?
+    o.u += (128 - 16) << 23;    // extra exp adjust
+  } else if (exp == 0) {        // Zero/Denormal?
+    o.u += 1 << 23;             // extra exp adjust
+    o.f -= magic.f;             // renormalize
+  }
+
+  o.u |= (h.x & 0x8000) << 16;    // sign bit
+  return o.f;
+#endif
+}
+
+// --- standard functions ---
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
+  return (a.x & 0x7fff) == 0x7c00;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return __hisnan(a);
+#else
+  return (a.x & 0x7fff) > 0x7c00;
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const half& a) {
+  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
+}
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
+  half result;
+  result.x = a.x & 0x7FFF;
+  return result;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+  return half(hexp(a));
+#else
+   return half(::expf(float(a)));
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
+#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return half(::hlog(a));
+#else
+  return half(::logf(float(a)));
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) {
+  return half(numext::log1p(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
+  return half(::log10f(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+  return half(hsqrt(a));
+#else
+    return half(::sqrtf(float(a)));
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
+  return half(::powf(float(a), float(b)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sin(const half& a) {
+  return half(::sinf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half cos(const half& a) {
+  return half(::cosf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tan(const half& a) {
+  return half(::tanf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
+  return half(::tanhf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+  return half(hfloor(a));
+#else
+  return half(::floorf(float(a)));
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+  return half(hceil(a));
+#else
+  return half(::ceilf(float(a)));
+#endif
+}
+
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return __hlt(b, a) ? b : a;
+#else
+  const float f1 = static_cast<float>(a);
+  const float f2 = static_cast<float>(b);
+  return f2 < f1 ? b : a;
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return __hlt(a, b) ? b : a;
+#else
+  const float f1 = static_cast<float>(a);
+  const float f2 = static_cast<float>(b);
+  return f1 < f2 ? b : a;
+#endif
+}
+
+EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) {
+  os << static_cast<float>(v);
+  return os;
+}
+
+} // end namespace half_impl
+
+// import Eigen::half_impl::half into Eigen namespace
+// using half_impl::half;
+
+namespace internal {
+
+template<>
+struct random_default_impl<half, false, false>
+{
+  static inline half run(const half& x, const half& y)
+  {
+    return x + (y-x) * half(float(std::rand()) / float(RAND_MAX));
+  }
+  static inline half run()
+  {
+    return run(half(-1.f), half(1.f));
+  }
+};
+
+template<> struct is_arithmetic<half> { enum { value = true }; };
+
+} // end namespace internal
+
+template<> struct NumTraits<Eigen::half>
+    : GenericNumTraits<Eigen::half>
+{
+  enum {
+    IsSigned = true,
+    IsInteger = false,
+    IsComplex = false,
+    RequireInitialization = false
+  };
+
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
+    return half_impl::raw_uint16_to_half(0x0800);
+  }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() {
+    return half_impl::raw_uint16_to_half(0x7bff);
+  }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() {
+    return half_impl::raw_uint16_to_half(0xfbff);
+  }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() {
+    return half_impl::raw_uint16_to_half(0x7c00);
+  }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() {
+    return half_impl::raw_uint16_to_half(0x7c01);
+  }
+};
+
+} // end namespace Eigen
+
+// C-like standard mathematical functions and trancendentals.
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(const Eigen::half& a) {
+  Eigen::half result;
+  result.x = a.x & 0x7FFF;
+  return result;
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
+  return Eigen::half(::expf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+  return Eigen::half(::hlog(a));
+#else
+  return Eigen::half(::logf(float(a)));
+#endif
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
+  return Eigen::half(::sqrtf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
+  return Eigen::half(::powf(float(a), float(b)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
+  return Eigen::half(::floorf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(const Eigen::half& a) {
+  return Eigen::half(::ceilf(float(a)));
+}
+
+namespace std {
+
+#if __cplusplus > 199711L
+template <>
+struct hash<Eigen::half> {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::half& a) const {
+    return static_cast<std::size_t>(a.x);
+  }
+};
+#endif
+
+} // end namespace std
+
+
+// Add the missing shfl_xor intrinsic
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
+__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
+  #if EIGEN_CUDACC_VER < 90000
+  return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
+  #else
+  return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
+  #endif
+}
+#endif
+
+// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
+  return Eigen::half_impl::raw_uint16_to_half(
+      __ldg(reinterpret_cast<const unsigned short*>(ptr)));
+}
+#endif
+
+
+#if defined(EIGEN_CUDA_ARCH)
+namespace Eigen {
+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+bool (isnan)(const Eigen::half& h) {
+  return (half_impl::isnan)(h);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+bool (isinf)(const Eigen::half& h) {
+  return (half_impl::isinf)(h);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+bool (isfinite)(const Eigen::half& h) {
+  return (half_impl::isfinite)(h);
+}
+
+} // namespace Eigen
+}  // namespace numext
+#endif
+
+#endif // EIGEN_HALF_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/MathFunctions.h b/third-party/Eigen/src/Core/arch/CUDA/MathFunctions.h
new file mode 100644
index 00000000..9dd92c2b
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -0,0 +1,91 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
+#define EIGEN_MATH_FUNCTIONS_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog<float4>(const float4& a)
+{
+  return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
+}
+
+template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog<double2>(const double2& a)
+{
+  using ::log;
+  return make_double2(log(a.x), log(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plog1p<float4>(const float4& a)
+{
+  return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
+}
+
+template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plog1p<double2>(const double2& a)
+{
+  return make_double2(log1p(a.x), log1p(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pexp<float4>(const float4& a)
+{
+  return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pexp<double2>(const double2& a)
+{
+  using ::exp;
+  return make_double2(exp(a.x), exp(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 psqrt<float4>(const float4& a)
+{
+  return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 psqrt<double2>(const double2& a)
+{
+  using ::sqrt;
+  return make_double2(sqrt(a.x), sqrt(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 prsqrt<float4>(const float4& a)
+{
+  return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 prsqrt<double2>(const double2& a)
+{
+  return make_double2(rsqrt(a.x), rsqrt(a.y));
+}
+
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/PacketMath.h b/third-party/Eigen/src/Core/arch/CUDA/PacketMath.h
new file mode 100644
index 00000000..a567002a
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -0,0 +1,333 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_CUDA_H
+#define EIGEN_PACKET_MATH_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+template<> struct is_arithmetic<float4>  { enum { value = true }; };
+template<> struct is_arithmetic<double2> { enum { value = true }; };
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+  typedef float4 type;
+  typedef float4 half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket = 0,
+
+    HasDiv  = 1,
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasLGamma = 1,
+    HasDiGamma = 1,
+    HasZeta = 1,
+    HasPolygamma = 1,
+    HasErf = 1,
+    HasErfc = 1,
+    HasIGamma = 1,
+    HasIGammac = 1,
+    HasBetaInc = 1,
+
+    HasBlend = 0,
+  };
+};
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef double2 type;
+  typedef double2 half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=2,
+    HasHalfPacket = 0,
+
+    HasDiv  = 1,
+    HasLog  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasLGamma = 1,
+    HasDiGamma = 1,
+    HasZeta = 1,
+    HasPolygamma = 1,
+    HasErf = 1,
+    HasErfc = 1,
+    HasIGamma = 1,
+    HasIGammac = 1,
+    HasBetaInc = 1,
+
+    HasBlend = 0,
+  };
+};
+
+
+template<> struct unpacket_traits<float4>  { typedef float  type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float&  from) {
+  return make_float4(from, from, from, from);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+  return make_double2(from, from);
+}
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+  return make_float4(a, a+1, a+2, a+3);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
+  return make_double2(a, a+1);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x+b.x, a.y+b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x-b.x, a.y-b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+  return make_float4(-a.x, -a.y, -a.z, -a.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+  return make_double2(-a.x, -a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x*b.x, a.y*b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x/b.x, a.y/b.y);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+  return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+  return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+  return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+  return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+  return *reinterpret_cast<const float4*>(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+  return *reinterpret_cast<const double2*>(from);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+  return make_float4(from[0], from[1], from[2], from[3]);
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+  return make_double2(from[0], from[1]);
+}
+
+template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float*   from) {
+  return make_float4(from[0], from[0], from[1], from[1]);
+}
+template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double*  from) {
+  return make_double2(from[0], from[0]);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float*   to, const float4& from) {
+  *reinterpret_cast<float4*>(to) = from;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+  *reinterpret_cast<double2*>(to) = from;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const float4& from) {
+  to[0] = from.x;
+  to[1] = from.y;
+  to[2] = from.z;
+  to[3] = from.w;
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+  to[0] = from.x;
+  to[1] = from.y;
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return __ldg((const float4*)from);
+#else
+  return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return __ldg((const double2*)from);
+#else
+  return make_double2(from[0], from[1]);
+#endif
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+#else
+  return make_float4(from[0], from[1], from[2], from[3]);
+#endif
+}
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+  return make_double2(__ldg(from+0), __ldg(from+1));
+#else
+  return make_double2(from[0], from[1]);
+#endif
+}
+
+template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
+  return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
+  return make_double2(from[0*stride], from[1*stride]);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
+  to[stride*0] = from.x;
+  to[stride*1] = from.y;
+  to[stride*2] = from.z;
+  to[stride*3] = from.w;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
+  to[stride*0] = from.x;
+  to[stride*1] = from.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float  pfirst<float4>(const float4& a) {
+  return a.x;
+}
+template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+  return a.x;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float  predux<float4>(const float4& a) {
+  return a.x + a.y + a.z + a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+  return a.x + a.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float  predux_max<float4>(const float4& a) {
+  return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+  return fmax(a.x, a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC inline float  predux_min<float4>(const float4& a) {
+  return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+  return fmin(a.x, a.y);
+}
+
+template<> EIGEN_DEVICE_FUNC inline float  predux_mul<float4>(const float4& a) {
+  return a.x * a.y * a.z * a.w;
+}
+template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+  return a.x * a.y;
+}
+
+template<> EIGEN_DEVICE_FUNC inline float4  pabs<float4>(const float4& a) {
+  return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+}
+template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+  return make_double2(fabs(a.x), fabs(a.y));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<float4,4>& kernel) {
+  float tmp = kernel.packet[0].y;
+  kernel.packet[0].y = kernel.packet[1].x;
+  kernel.packet[1].x = tmp;
+
+  tmp = kernel.packet[0].z;
+  kernel.packet[0].z = kernel.packet[2].x;
+  kernel.packet[2].x = tmp;
+
+  tmp = kernel.packet[0].w;
+  kernel.packet[0].w = kernel.packet[3].x;
+  kernel.packet[3].x = tmp;
+
+  tmp = kernel.packet[1].z;
+  kernel.packet[1].z = kernel.packet[2].y;
+  kernel.packet[2].y = tmp;
+
+  tmp = kernel.packet[1].w;
+  kernel.packet[1].w = kernel.packet[3].y;
+  kernel.packet[3].y = tmp;
+
+  tmp = kernel.packet[2].w;
+  kernel.packet[2].w = kernel.packet[3].z;
+  kernel.packet[3].z = tmp;
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<double2,2>& kernel) {
+  double tmp = kernel.packet[0].y;
+  kernel.packet[0].y = kernel.packet[1].x;
+  kernel.packet[1].x = tmp;
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+
+#endif // EIGEN_PACKET_MATH_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/third-party/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
new file mode 100644
index 00000000..aa6c11f5
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -0,0 +1,1124 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
+#define EIGEN_PACKET_MATH_HALF_CUDA_H
+
+
+namespace Eigen {
+namespace internal {
+
+// Most of the following operations require arch >= 3.0
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+
+template<> struct is_arithmetic<half2> { enum { value = true }; };
+
+template<> struct packet_traits<Eigen::half> : default_packet_traits
+{
+  typedef half2 type;
+  typedef half2 half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=2,
+    HasHalfPacket = 0,
+    HasAdd    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasSqrt   = 1,
+    HasRsqrt  = 1,
+    HasExp    = 1,
+    HasLog    = 1,
+    HasLog1p  = 1
+  };
+};
+
+template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
+  return __half2half2(from);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
+  return *reinterpret_cast<const half2*>(from);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
+  return __halves2half2(from[0], from[1]);
+}
+
+template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half*  from) {
+  return __halves2half2(from[0], from[0]);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
+  *reinterpret_cast<half2*>(to) = from;
+}
+
+template<> __device__ EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
+  to[0] = __low2half(from);
+  to[1] = __high2half(from);
+}
+
+template<>
+ __device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
+#if __CUDA_ARCH__ >= 350
+   return __ldg((const half2*)from);
+#else
+  return __halves2half2(*(from+0), *(from+1));
+#endif
+}
+
+template<>
+__device__ EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
+#if __CUDA_ARCH__ >= 350
+   return __halves2half2(__ldg(from+0), __ldg(from+1));
+#else
+  return __halves2half2(*(from+0), *(from+1));
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
+  return __halves2half2(from[0*stride], from[1*stride]);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
+  to[stride*0] = __low2half(from);
+  to[stride*1] = __high2half(from);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2& a) {
+  return __low2half(a);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
+  half2 result;
+  unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
+  *(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
+  return result;
+}
+
+
+__device__ EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<half2,2>& kernel) {
+  __half a1 = __low2half(kernel.packet[0]);
+  __half a2 = __high2half(kernel.packet[0]);
+  __half b1 = __low2half(kernel.packet[1]);
+  __half b2 = __high2half(kernel.packet[1]);
+  kernel.packet[0] = __halves2half2(a1, b1);
+  kernel.packet[1] = __halves2half2(a2, b2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
+#if __CUDA_ARCH__ >= 530
+  return __halves2half2(a, __hadd(a, __float2half(1.0f)));
+#else
+  float f = __half2float(a) + 1.0f;
+  return __halves2half2(a, __float2half(f));
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
+  return __hadd2(a, b);
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  float r1 = a1 + b1;
+  float r2 = a2 + b2;
+  return __floats2half2_rn(r1, r2);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 psub<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
+  return __hsub2(a, b);
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  float r1 = a1 - b1;
+  float r2 = a2 - b2;
+  return __floats2half2_rn(r1, r2);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
+#if __CUDA_ARCH__ >= 530
+  return __hneg2(a);
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  return __floats2half2_rn(-a1, -a2);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
+#if __CUDA_ARCH__ >= 530
+  return __hmul2(a, b);
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  float r1 = a1 * b1;
+  float r2 = a2 * b2;
+  return __floats2half2_rn(r1, r2);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2& a, const half2& b, const half2& c) {
+#if __CUDA_ARCH__ >= 530
+   return __hfma2(a, b, c);
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  float c1 = __low2float(c);
+  float c2 = __high2float(c);
+  float r1 = a1 * b1 + c1;
+  float r2 = a2 * b2 + c2;
+  return __floats2half2_rn(r1, r2);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  float r1 = a1 / b1;
+  float r2 = a2 / b2;
+  return __floats2half2_rn(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float b1 = __low2float(b);
+  float b2 = __high2float(b);
+  __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
+  __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
+  return __halves2half2(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
+  return __hadd(__low2half(a), __high2half(a));
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  return Eigen::half(__float2half_rn(a1 + a2));
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_max<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
+  __half first = __low2half(a);
+  __half second = __high2half(a);
+  return __hgt(first, second) ? first : second;
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  return a1 > a2 ? __low2half(a) : __high2half(a);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_min<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
+  __half first = __low2half(a);
+  __half second = __high2half(a);
+  return __hlt(first, second) ? first : second;
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  return a1 < a2 ? __low2half(a) : __high2half(a);
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(const half2& a) {
+#if __CUDA_ARCH__ >= 530
+  return __hmul(__low2half(a), __high2half(a));
+#else
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  return Eigen::half(__float2half_rn(a1 * a2));
+#endif
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = log1pf(a1);
+  float r2 = log1pf(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+
+template<>  __device__ EIGEN_STRONG_INLINE
+half2 plog<half2>(const half2& a) {
+  return h2log(a);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE
+half2 pexp<half2>(const half2& a) {
+  return h2exp(a);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE
+half2 psqrt<half2>(const half2& a) {
+  return h2sqrt(a);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE
+half2 prsqrt<half2>(const half2& a) {
+  return h2rsqrt(a);
+}
+
+#else
+
+template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = logf(a1);
+  float r2 = logf(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 pexp<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = expf(a1);
+  float r2 = expf(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 psqrt<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = sqrtf(a1);
+  float r2 = sqrtf(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
+template<> __device__ EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2& a) {
+  float a1 = __low2float(a);
+  float a2 = __high2float(a);
+  float r1 = rsqrtf(a1);
+  float r2 = rsqrtf(a2);
+  return __floats2half2_rn(r1, r2);
+}
+
+#endif
+
+#elif defined EIGEN_VECTORIZE_AVX512
+
+typedef struct {
+  __m256i x;
+} Packet16h;
+
+
+template<> struct is_arithmetic<Packet16h> { enum { value = true }; };
+
+template <>
+struct packet_traits<half> : default_packet_traits {
+  typedef Packet16h type;
+  // There is no half-size packet for Packet16h.
+  typedef Packet16h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 16,
+    HasHalfPacket = 0,
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasConj   = 0,
+    HasSetLinear = 0,
+    HasDiv = 0,
+    HasSqrt = 0,
+    HasRsqrt = 0,
+    HasExp = 0,
+    HasLog = 0,
+    HasBlend = 0
+  };
+};
+
+
+template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32}; typedef Packet16h half; };
+
+template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
+  Packet16h result;
+  result.x = _mm256_set1_epi16(from.x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet16h>(const Packet16h& from) {
+  return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm256_extract_epi16(from.x, 0)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pload<Packet16h>(const Eigen::half* from) {
+  Packet16h result;
+  result.x = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h ploadu<Packet16h>(const Eigen::half* from) {
+  Packet16h result;
+  result.x = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<half>(Eigen::half* to, const Packet16h& from) {
+  _mm256_store_si256((__m256i*)to, from.x);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<half>(Eigen::half* to, const Packet16h& from) {
+  _mm256_storeu_si256((__m256i*)to, from.x);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h
+ploadquad(const Eigen::half* from) {
+  Packet16h result;
+  unsigned short a = from[0].x;
+  unsigned short b = from[1].x;
+  unsigned short c = from[2].x;
+  unsigned short d = from[3].x;
+  result.x = _mm256_set_epi16(d, d, d, d, c, c, c, c, b, b, b, b, a, a, a, a);
+  return result;
+}
+
+EIGEN_STRONG_INLINE Packet16f half2float(const Packet16h& a) {
+#ifdef EIGEN_HAS_FP16_C
+  return _mm512_cvtph_ps(a.x);
+#else
+  EIGEN_ALIGN64 half aux[16];
+  pstore(aux, a);
+  float f0(aux[0]);
+  float f1(aux[1]);
+  float f2(aux[2]);
+  float f3(aux[3]);
+  float f4(aux[4]);
+  float f5(aux[5]);
+  float f6(aux[6]);
+  float f7(aux[7]);
+  float f8(aux[8]);
+  float f9(aux[9]);
+  float fa(aux[10]);
+  float fb(aux[11]);
+  float fc(aux[12]);
+  float fd(aux[13]);
+  float fe(aux[14]);
+  float ff(aux[15]);
+
+  return _mm512_set_ps(
+      ff, fe, fd, fc, fb, fa, f9, f8, f7, f6, f5, f4, f3, f2, f1, f0);
+#endif
+}
+
+EIGEN_STRONG_INLINE Packet16h float2half(const Packet16f& a) {
+#ifdef EIGEN_HAS_FP16_C
+  Packet16h result;
+  result.x = _mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
+  return result;
+#else
+  EIGEN_ALIGN64 float aux[16];
+  pstore(aux, a);
+  half h0(aux[0]);
+  half h1(aux[1]);
+  half h2(aux[2]);
+  half h3(aux[3]);
+  half h4(aux[4]);
+  half h5(aux[5]);
+  half h6(aux[6]);
+  half h7(aux[7]);
+  half h8(aux[8]);
+  half h9(aux[9]);
+  half ha(aux[10]);
+  half hb(aux[11]);
+  half hc(aux[12]);
+  half hd(aux[13]);
+  half he(aux[14]);
+  half hf(aux[15]);
+
+  Packet16h result;
+  result.x = _mm256_set_epi16(
+      hf.x, he.x, hd.x, hc.x, hb.x, ha.x, h9.x, h8.x,
+      h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
+  return result;
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h padd<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  Packet16f af = half2float(a);
+  Packet16f bf = half2float(b);
+  Packet16f rf = padd(af, bf);
+  return float2half(rf);
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pmul<Packet16h>(const Packet16h& a, const Packet16h& b) {
+  Packet16f af = half2float(a);
+  Packet16f bf = half2float(b);
+  Packet16f rf = pmul(af, bf);
+  return float2half(rf);
+}
+
+template<> EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
+  Packet16f from_float = half2float(from);
+  return half(predux(from_float));
+}
+
+template<> EIGEN_STRONG_INLINE Packet16h pgather<Eigen::half, Packet16h>(const Eigen::half* from, Index stride)
+{
+  Packet16h result;
+  result.x = _mm256_set_epi16(
+      from[15*stride].x, from[14*stride].x, from[13*stride].x, from[12*stride].x,
+      from[11*stride].x, from[10*stride].x, from[9*stride].x, from[8*stride].x,
+      from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x,
+      from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pscatter<half, Packet16h>(half* to, const Packet16h& from, Index stride)
+{
+  EIGEN_ALIGN64 half aux[16];
+  pstore(aux, from);
+  to[stride*0].x = aux[0].x;
+  to[stride*1].x = aux[1].x;
+  to[stride*2].x = aux[2].x;
+  to[stride*3].x = aux[3].x;
+  to[stride*4].x = aux[4].x;
+  to[stride*5].x = aux[5].x;
+  to[stride*6].x = aux[6].x;
+  to[stride*7].x = aux[7].x;
+  to[stride*8].x = aux[8].x;
+  to[stride*9].x = aux[9].x;
+  to[stride*10].x = aux[10].x;
+  to[stride*11].x = aux[11].x;
+  to[stride*12].x = aux[12].x;
+  to[stride*13].x = aux[13].x;
+  to[stride*14].x = aux[14].x;
+  to[stride*15].x = aux[15].x;
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet16h,16>& kernel) {
+  __m256i a = kernel.packet[0].x;
+  __m256i b = kernel.packet[1].x;
+  __m256i c = kernel.packet[2].x;
+  __m256i d = kernel.packet[3].x;
+  __m256i e = kernel.packet[4].x;
+  __m256i f = kernel.packet[5].x;
+  __m256i g = kernel.packet[6].x;
+  __m256i h = kernel.packet[7].x;
+  __m256i i = kernel.packet[8].x;
+  __m256i j = kernel.packet[9].x;
+  __m256i k = kernel.packet[10].x;
+  __m256i l = kernel.packet[11].x;
+  __m256i m = kernel.packet[12].x;
+  __m256i n = kernel.packet[13].x;
+  __m256i o = kernel.packet[14].x;
+  __m256i p = kernel.packet[15].x;
+
+  __m256i ab_07 = _mm256_unpacklo_epi16(a, b);
+  __m256i cd_07 = _mm256_unpacklo_epi16(c, d);
+  __m256i ef_07 = _mm256_unpacklo_epi16(e, f);
+  __m256i gh_07 = _mm256_unpacklo_epi16(g, h);
+  __m256i ij_07 = _mm256_unpacklo_epi16(i, j);
+  __m256i kl_07 = _mm256_unpacklo_epi16(k, l);
+  __m256i mn_07 = _mm256_unpacklo_epi16(m, n);
+  __m256i op_07 = _mm256_unpacklo_epi16(o, p);
+
+  __m256i ab_8f = _mm256_unpackhi_epi16(a, b);
+  __m256i cd_8f = _mm256_unpackhi_epi16(c, d);
+  __m256i ef_8f = _mm256_unpackhi_epi16(e, f);
+  __m256i gh_8f = _mm256_unpackhi_epi16(g, h);
+  __m256i ij_8f = _mm256_unpackhi_epi16(i, j);
+  __m256i kl_8f = _mm256_unpackhi_epi16(k, l);
+  __m256i mn_8f = _mm256_unpackhi_epi16(m, n);
+  __m256i op_8f = _mm256_unpackhi_epi16(o, p);
+
+  __m256i abcd_03 = _mm256_unpacklo_epi32(ab_07, cd_07);
+  __m256i abcd_47 = _mm256_unpackhi_epi32(ab_07, cd_07);
+  __m256i efgh_03 = _mm256_unpacklo_epi32(ef_07, gh_07);
+  __m256i efgh_47 = _mm256_unpackhi_epi32(ef_07, gh_07);
+  __m256i ijkl_03 = _mm256_unpacklo_epi32(ij_07, kl_07);
+  __m256i ijkl_47 = _mm256_unpackhi_epi32(ij_07, kl_07);
+  __m256i mnop_03 = _mm256_unpacklo_epi32(mn_07, op_07);
+  __m256i mnop_47 = _mm256_unpackhi_epi32(mn_07, op_07);
+
+  __m256i abcd_8b = _mm256_unpacklo_epi32(ab_8f, cd_8f);
+  __m256i abcd_cf = _mm256_unpackhi_epi32(ab_8f, cd_8f);
+  __m256i efgh_8b = _mm256_unpacklo_epi32(ef_8f, gh_8f);
+  __m256i efgh_cf = _mm256_unpackhi_epi32(ef_8f, gh_8f);
+  __m256i ijkl_8b = _mm256_unpacklo_epi32(ij_8f, kl_8f);
+  __m256i ijkl_cf = _mm256_unpackhi_epi32(ij_8f, kl_8f);
+  __m256i mnop_8b = _mm256_unpacklo_epi32(mn_8f, op_8f);
+  __m256i mnop_cf = _mm256_unpackhi_epi32(mn_8f, op_8f);
+
+  __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);
+  __m256i abcdefgh_23 = _mm256_unpackhi_epi64(abcd_03, efgh_03);
+  __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);
+  __m256i ijklmnop_23 = _mm256_unpackhi_epi64(ijkl_03, mnop_03);
+  __m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);
+  __m256i abcdefgh_67 = _mm256_unpackhi_epi64(abcd_47, efgh_47);
+  __m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);
+  __m256i ijklmnop_67 = _mm256_unpackhi_epi64(ijkl_47, mnop_47);
+  __m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);
+  __m256i abcdefgh_ab = _mm256_unpackhi_epi64(abcd_8b, efgh_8b);
+  __m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);
+  __m256i ijklmnop_ab = _mm256_unpackhi_epi64(ijkl_8b, mnop_8b);
+  __m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);
+  __m256i abcdefgh_ef = _mm256_unpackhi_epi64(abcd_cf, efgh_cf);
+  __m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);
+  __m256i ijklmnop_ef = _mm256_unpackhi_epi64(ijkl_cf, mnop_cf);
+
+  // NOTE: no unpacklo/hi instr in this case, so using permute instr.
+  __m256i a_p_0 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x20);
+  __m256i a_p_1 = _mm256_permute2x128_si256(abcdefgh_01, ijklmnop_01, 0x31);
+  __m256i a_p_2 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x20);
+  __m256i a_p_3 = _mm256_permute2x128_si256(abcdefgh_23, ijklmnop_23, 0x31);
+  __m256i a_p_4 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x20);
+  __m256i a_p_5 = _mm256_permute2x128_si256(abcdefgh_45, ijklmnop_45, 0x31);
+  __m256i a_p_6 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x20);
+  __m256i a_p_7 = _mm256_permute2x128_si256(abcdefgh_67, ijklmnop_67, 0x31);
+  __m256i a_p_8 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x20);
+  __m256i a_p_9 = _mm256_permute2x128_si256(abcdefgh_89, ijklmnop_89, 0x31);
+  __m256i a_p_a = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x20);
+  __m256i a_p_b = _mm256_permute2x128_si256(abcdefgh_ab, ijklmnop_ab, 0x31);
+  __m256i a_p_c = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x20);
+  __m256i a_p_d = _mm256_permute2x128_si256(abcdefgh_cd, ijklmnop_cd, 0x31);
+  __m256i a_p_e = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x20);
+  __m256i a_p_f = _mm256_permute2x128_si256(abcdefgh_ef, ijklmnop_ef, 0x31);
+
+  kernel.packet[0].x = a_p_0;
+  kernel.packet[1].x = a_p_1;
+  kernel.packet[2].x = a_p_2;
+  kernel.packet[3].x = a_p_3;
+  kernel.packet[4].x = a_p_4;
+  kernel.packet[5].x = a_p_5;
+  kernel.packet[6].x = a_p_6;
+  kernel.packet[7].x = a_p_7;
+  kernel.packet[8].x = a_p_8;
+  kernel.packet[9].x = a_p_9;
+  kernel.packet[10].x = a_p_a;
+  kernel.packet[11].x = a_p_b;
+  kernel.packet[12].x = a_p_c;
+  kernel.packet[13].x = a_p_d;
+  kernel.packet[14].x = a_p_e;
+  kernel.packet[15].x = a_p_f;
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet16h,8>& kernel) {
+  EIGEN_ALIGN64 half in[8][16];
+  pstore<half>(in[0], kernel.packet[0]);
+  pstore<half>(in[1], kernel.packet[1]);
+  pstore<half>(in[2], kernel.packet[2]);
+  pstore<half>(in[3], kernel.packet[3]);
+  pstore<half>(in[4], kernel.packet[4]);
+  pstore<half>(in[5], kernel.packet[5]);
+  pstore<half>(in[6], kernel.packet[6]);
+  pstore<half>(in[7], kernel.packet[7]);
+
+  EIGEN_ALIGN64 half out[8][16];
+
+  for (int i = 0; i < 8; ++i) {
+    for (int j = 0; j < 8; ++j) {
+      out[i][j] = in[j][2*i];
+    }
+    for (int j = 0; j < 8; ++j) {
+      out[i][j+8] = in[j][2*i+1];
+    }
+  }
+
+  kernel.packet[0] = pload<Packet16h>(out[0]);
+  kernel.packet[1] = pload<Packet16h>(out[1]);
+  kernel.packet[2] = pload<Packet16h>(out[2]);
+  kernel.packet[3] = pload<Packet16h>(out[3]);
+  kernel.packet[4] = pload<Packet16h>(out[4]);
+  kernel.packet[5] = pload<Packet16h>(out[5]);
+  kernel.packet[6] = pload<Packet16h>(out[6]);
+  kernel.packet[7] = pload<Packet16h>(out[7]);
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet16h,4>& kernel) {
+  EIGEN_ALIGN64 half in[4][16];
+  pstore<half>(in[0], kernel.packet[0]);
+  pstore<half>(in[1], kernel.packet[1]);
+  pstore<half>(in[2], kernel.packet[2]);
+  pstore<half>(in[3], kernel.packet[3]);
+
+  EIGEN_ALIGN64 half out[4][16];
+
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      out[i][j] = in[j][4*i];
+    }
+    for (int j = 0; j < 4; ++j) {
+      out[i][j+4] = in[j][4*i+1];
+    }
+    for (int j = 0; j < 4; ++j) {
+      out[i][j+8] = in[j][4*i+2];
+    }
+    for (int j = 0; j < 4; ++j) {
+      out[i][j+12] = in[j][4*i+3];
+    }
+  }
+
+  kernel.packet[0] = pload<Packet16h>(out[0]);
+  kernel.packet[1] = pload<Packet16h>(out[1]);
+  kernel.packet[2] = pload<Packet16h>(out[2]);
+  kernel.packet[3] = pload<Packet16h>(out[3]);
+}
+
+
+#elif defined EIGEN_VECTORIZE_AVX
+
+typedef struct {
+  __m128i x;
+} Packet8h;
+
+
+template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
+
+template <>
+struct packet_traits<Eigen::half> : default_packet_traits {
+  typedef Packet8h type;
+  // There is no half-size packet for Packet8h.
+  typedef Packet8h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 8,
+    HasHalfPacket = 0,
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasConj   = 0,
+    HasSetLinear = 0,
+    HasDiv = 0,
+    HasSqrt = 0,
+    HasRsqrt = 0,
+    HasExp = 0,
+    HasLog = 0,
+    HasBlend = 0
+  };
+};
+
+
+template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; };
+
+template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
+  Packet8h result;
+  result.x = _mm_set1_epi16(from.x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet8h>(const Packet8h& from) {
+  return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm_extract_epi16(from.x, 0)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h pload<Packet8h>(const Eigen::half* from) {
+  Packet8h result;
+  result.x = _mm_load_si128(reinterpret_cast<const __m128i*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h ploadu<Packet8h>(const Eigen::half* from) {
+  Packet8h result;
+  result.x = _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8h& from) {
+  _mm_store_si128(reinterpret_cast<__m128i*>(to), from.x);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet8h& from) {
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from.x);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h
+ploadquad<Packet8h>(const Eigen::half* from) {
+  Packet8h result;
+  unsigned short a = from[0].x;
+  unsigned short b = from[1].x;
+  result.x = _mm_set_epi16(b, b, b, b, a, a, a, a);
+  return result;
+}
+
+EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {
+#ifdef EIGEN_HAS_FP16_C
+  return _mm256_cvtph_ps(a.x);
+#else
+  EIGEN_ALIGN32 Eigen::half aux[8];
+  pstore(aux, a);
+  float f0(aux[0]);
+  float f1(aux[1]);
+  float f2(aux[2]);
+  float f3(aux[3]);
+  float f4(aux[4]);
+  float f5(aux[5]);
+  float f6(aux[6]);
+  float f7(aux[7]);
+
+  return _mm256_set_ps(f7, f6, f5, f4, f3, f2, f1, f0);
+#endif
+}
+
+EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
+#ifdef EIGEN_HAS_FP16_C
+  Packet8h result;
+  result.x = _mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
+  return result;
+#else
+  EIGEN_ALIGN32 float aux[8];
+  pstore(aux, a);
+  Eigen::half h0(aux[0]);
+  Eigen::half h1(aux[1]);
+  Eigen::half h2(aux[2]);
+  Eigen::half h3(aux[3]);
+  Eigen::half h4(aux[4]);
+  Eigen::half h5(aux[5]);
+  Eigen::half h6(aux[6]);
+  Eigen::half h7(aux[7]);
+
+  Packet8h result;
+  result.x = _mm_set_epi16(h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
+  return result;
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  Packet8f af = half2float(a);
+  Packet8f bf = half2float(b);
+  Packet8f rf = padd(af, bf);
+  return float2half(rf);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const Packet8h& a, const Packet8h& b) {
+  Packet8f af = half2float(a);
+  Packet8f bf = half2float(b);
+  Packet8f rf = pmul(af, bf);
+  return float2half(rf);
+}
+
+template<> EIGEN_STRONG_INLINE Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
+{
+  Packet8h result;
+  result.x = _mm_set_epi16(from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x, from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half* to, const Packet8h& from, Index stride)
+{
+  EIGEN_ALIGN32 Eigen::half aux[8];
+  pstore(aux, from);
+  to[stride*0].x = aux[0].x;
+  to[stride*1].x = aux[1].x;
+  to[stride*2].x = aux[2].x;
+  to[stride*3].x = aux[3].x;
+  to[stride*4].x = aux[4].x;
+  to[stride*5].x = aux[5].x;
+  to[stride*6].x = aux[6].x;
+  to[stride*7].x = aux[7].x;
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
+  Packet8f af = half2float(a);
+  float reduced = predux<Packet8f>(af);
+  return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
+  Packet8f af = half2float(a);
+  float reduced = predux_max<Packet8f>(af);
+  return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {
+  Packet8f af = half2float(a);
+  float reduced = predux_min<Packet8f>(af);
+  return Eigen::half(reduced);
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {
+  Packet8f af = half2float(a);
+  float reduced = predux_mul<Packet8f>(af);
+  return Eigen::half(reduced);
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet8h,8>& kernel) {
+  __m128i a = kernel.packet[0].x;
+  __m128i b = kernel.packet[1].x;
+  __m128i c = kernel.packet[2].x;
+  __m128i d = kernel.packet[3].x;
+  __m128i e = kernel.packet[4].x;
+  __m128i f = kernel.packet[5].x;
+  __m128i g = kernel.packet[6].x;
+  __m128i h = kernel.packet[7].x;
+
+  __m128i a03b03 = _mm_unpacklo_epi16(a, b);
+  __m128i c03d03 = _mm_unpacklo_epi16(c, d);
+  __m128i e03f03 = _mm_unpacklo_epi16(e, f);
+  __m128i g03h03 = _mm_unpacklo_epi16(g, h);
+  __m128i a47b47 = _mm_unpackhi_epi16(a, b);
+  __m128i c47d47 = _mm_unpackhi_epi16(c, d);
+  __m128i e47f47 = _mm_unpackhi_epi16(e, f);
+  __m128i g47h47 = _mm_unpackhi_epi16(g, h);
+
+  __m128i a01b01c01d01 = _mm_unpacklo_epi32(a03b03, c03d03);
+  __m128i a23b23c23d23 = _mm_unpackhi_epi32(a03b03, c03d03);
+  __m128i e01f01g01h01 = _mm_unpacklo_epi32(e03f03, g03h03);
+  __m128i e23f23g23h23 = _mm_unpackhi_epi32(e03f03, g03h03);
+  __m128i a45b45c45d45 = _mm_unpacklo_epi32(a47b47, c47d47);
+  __m128i a67b67c67d67 = _mm_unpackhi_epi32(a47b47, c47d47);
+  __m128i e45f45g45h45 = _mm_unpacklo_epi32(e47f47, g47h47);
+  __m128i e67f67g67h67 = _mm_unpackhi_epi32(e47f47, g47h47);
+
+  __m128i a0b0c0d0e0f0g0h0 = _mm_unpacklo_epi64(a01b01c01d01, e01f01g01h01);
+  __m128i a1b1c1d1e1f1g1h1 = _mm_unpackhi_epi64(a01b01c01d01, e01f01g01h01);
+  __m128i a2b2c2d2e2f2g2h2 = _mm_unpacklo_epi64(a23b23c23d23, e23f23g23h23);
+  __m128i a3b3c3d3e3f3g3h3 = _mm_unpackhi_epi64(a23b23c23d23, e23f23g23h23);
+  __m128i a4b4c4d4e4f4g4h4 = _mm_unpacklo_epi64(a45b45c45d45, e45f45g45h45);
+  __m128i a5b5c5d5e5f5g5h5 = _mm_unpackhi_epi64(a45b45c45d45, e45f45g45h45);
+  __m128i a6b6c6d6e6f6g6h6 = _mm_unpacklo_epi64(a67b67c67d67, e67f67g67h67);
+  __m128i a7b7c7d7e7f7g7h7 = _mm_unpackhi_epi64(a67b67c67d67, e67f67g67h67);
+
+  kernel.packet[0].x = a0b0c0d0e0f0g0h0;
+  kernel.packet[1].x = a1b1c1d1e1f1g1h1;
+  kernel.packet[2].x = a2b2c2d2e2f2g2h2;
+  kernel.packet[3].x = a3b3c3d3e3f3g3h3;
+  kernel.packet[4].x = a4b4c4d4e4f4g4h4;
+  kernel.packet[5].x = a5b5c5d5e5f5g5h5;
+  kernel.packet[6].x = a6b6c6d6e6f6g6h6;
+  kernel.packet[7].x = a7b7c7d7e7f7g7h7;
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet8h,4>& kernel) {
+  EIGEN_ALIGN32 Eigen::half in[4][8];
+  pstore<Eigen::half>(in[0], kernel.packet[0]);
+  pstore<Eigen::half>(in[1], kernel.packet[1]);
+  pstore<Eigen::half>(in[2], kernel.packet[2]);
+  pstore<Eigen::half>(in[3], kernel.packet[3]);
+
+  EIGEN_ALIGN32 Eigen::half out[4][8];
+
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      out[i][j] = in[j][2*i];
+    }
+    for (int j = 0; j < 4; ++j) {
+      out[i][j+4] = in[j][2*i+1];
+    }
+  }
+
+  kernel.packet[0] = pload<Packet8h>(out[0]);
+  kernel.packet[1] = pload<Packet8h>(out[1]);
+  kernel.packet[2] = pload<Packet8h>(out[2]);
+  kernel.packet[3] = pload<Packet8h>(out[3]);
+}
+
+
+// Disable the following code since it's broken on too many platforms / compilers.
+//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
+#elif 0
+
+typedef struct {
+  __m64 x;
+} Packet4h;
+
+
+template<> struct is_arithmetic<Packet4h> { enum { value = true }; };
+
+template <>
+struct packet_traits<Eigen::half> : default_packet_traits {
+  typedef Packet4h type;
+  // There is no half-size packet for Packet4h.
+  typedef Packet4h half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 4,
+    HasHalfPacket = 0,
+    HasAdd    = 0,
+    HasSub    = 0,
+    HasMul    = 0,
+    HasNegate = 0,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasConj   = 0,
+    HasSetLinear = 0,
+    HasDiv = 0,
+    HasSqrt = 0,
+    HasRsqrt = 0,
+    HasExp = 0,
+    HasLog = 0,
+    HasBlend = 0
+  };
+};
+
+
+template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; };
+
+template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
+  Packet4h result;
+  result.x = _mm_set1_pi16(from.x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h>(const Packet4h& from) {
+  return half_impl::raw_uint16_to_half(static_cast<unsigned short>(_mm_cvtsi64_si32(from.x)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const Packet4h& b) {
+  __int64_t a64 = _mm_cvtm64_si64(a.x);
+  __int64_t b64 = _mm_cvtm64_si64(b.x);
+
+  Eigen::half h[4];
+
+  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
+  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
+  h[0] = ha + hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
+  h[1] = ha + hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
+  h[2] = ha + hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
+  h[3] = ha + hb;
+  Packet4h result;
+  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const Packet4h& a, const Packet4h& b) {
+  __int64_t a64 = _mm_cvtm64_si64(a.x);
+  __int64_t b64 = _mm_cvtm64_si64(b.x);
+
+  Eigen::half h[4];
+
+  Eigen::half ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64));
+  Eigen::half hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64));
+  h[0] = ha * hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
+  h[1] = ha * hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 32));
+  h[2] = ha * hb;
+  ha = half_impl::raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
+  hb = half_impl::raw_uint16_to_half(static_cast<unsigned short>(b64 >> 48));
+  h[3] = ha * hb;
+  Packet4h result;
+  result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {
+  Packet4h result;
+  result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h ploadu<Packet4h>(const Eigen::half* from) {
+  Packet4h result;
+  result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h& from) {
+  __int64_t r = _mm_cvtm64_si64(from.x);
+  *(reinterpret_cast<__int64_t*>(to)) = r;
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h& from) {
+  __int64_t r = _mm_cvtm64_si64(from.x);
+  *(reinterpret_cast<__int64_t*>(to)) = r;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h
+ploadquad<Packet4h>(const Eigen::half* from) {
+  return pset1<Packet4h>(*from);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4h pgather<Eigen::half, Packet4h>(const Eigen::half* from, Index stride)
+{
+  Packet4h result;
+  result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h>(Eigen::half* to, const Packet4h& from, Index stride)
+{
+  __int64_t a = _mm_cvtm64_si64(from.x);
+  to[stride*0].x = static_cast<unsigned short>(a);
+  to[stride*1].x = static_cast<unsigned short>(a >> 16);
+  to[stride*2].x = static_cast<unsigned short>(a >> 32);
+  to[stride*3].x = static_cast<unsigned short>(a >> 48);
+}
+
+EIGEN_STRONG_INLINE void
+ptranspose(PacketBlock<Packet4h,4>& kernel) {
+  __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);
+  __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);
+  __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);
+  __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);
+
+  kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);
+  kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);
+  kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);
+  kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);
+}
+
+#endif
+
+}
+}
+
+#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/CUDA/TypeCasting.h b/third-party/Eigen/src/Core/arch/CUDA/TypeCasting.h
new file mode 100644
index 00000000..aa5fbce8
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -0,0 +1,212 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_CUDA_H
+#define EIGEN_TYPE_CASTING_CUDA_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<>
+struct scalar_cast_op<float, Eigen::half> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+  typedef Eigen::half result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+      return __float2half(a);
+    #else
+      return Eigen::half(a);
+    #endif
+  }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<float, Eigen::half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<int, Eigen::half> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+  typedef Eigen::half result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+      return __float2half(static_cast<float>(a));
+    #else
+      return Eigen::half(static_cast<float>(a));
+    #endif
+  }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<int, Eigen::half> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+template<>
+struct scalar_cast_op<Eigen::half, float> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+  typedef float result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
+    #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+      return __half2float(a);
+    #else
+      return static_cast<float>(a);
+    #endif
+  }
+};
+
+template<>
+struct functor_traits<scalar_cast_op<Eigen::half, float> >
+{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
+
+
+
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+
+template <>
+struct type_casting_traits<Eigen::half, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 2,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
+  float2 r1 = __half22float2(a);
+  float2 r2 = __half22float2(b);
+  return make_float4(r1.x, r1.y, r2.x, r2.y);
+}
+
+template <>
+struct type_casting_traits<float, Eigen::half> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 2
+  };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
+  // Simply discard the second half of the input
+  return __floats2half2_rn(a.x, a.y);
+}
+
+#elif defined EIGEN_VECTORIZE_AVX512
+template <>
+struct type_casting_traits<half, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
+  return half2float(a);
+}
+
+template <>
+struct type_casting_traits<float, half> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
+  return float2half(a);
+}
+
+#elif defined EIGEN_VECTORIZE_AVX
+
+template <>
+struct type_casting_traits<Eigen::half, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
+  return half2float(a);
+}
+
+template <>
+struct type_casting_traits<float, Eigen::half> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
+  return float2half(a);
+}
+
+// Disable the following code since it's broken on too many platforms / compilers.
+//#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
+#elif 0
+
+template <>
+struct type_casting_traits<Eigen::half, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
+  __int64_t a64 = _mm_cvtm64_si64(a.x);
+  Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
+  float f1 = static_cast<float>(h);
+  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
+  float f2 = static_cast<float>(h);
+  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
+  float f3 = static_cast<float>(h);
+  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
+  float f4 = static_cast<float>(h);
+  return _mm_set_ps(f4, f3, f2, f1);
+}
+
+template <>
+struct type_casting_traits<float, Eigen::half> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
+  EIGEN_ALIGN16 float aux[4];
+  pstore(aux, a);
+  Eigen::half h0(aux[0]);
+  Eigen::half h1(aux[1]);
+  Eigen::half h2(aux[2]);
+  Eigen::half h3(aux[3]);
+
+  Packet4h result;
+  result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
+  return result;
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_CUDA_H
diff --git a/third-party/Eigen/src/Core/arch/Default/ConjHelper.h b/third-party/Eigen/src/Core/arch/Default/ConjHelper.h
new file mode 100644
index 00000000..4cfe34e0
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/Default/ConjHelper.h
@@ -0,0 +1,29 @@
+
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ARCH_CONJ_HELPER_H
+#define EIGEN_ARCH_CONJ_HELPER_H
+
+#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)                                                          \
+  template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> {                                          \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
+    { return padd(c, pmul(x,y)); }                                                                                \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const                        \
+    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); }                                           \
+  };                                                                                                              \
+                                                                                                                  \
+  template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> {                                          \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
+    { return padd(c, pmul(x,y)); }                                                                                \
+    EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const                        \
+    { return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); }                                           \
+  };
+
+#endif // EIGEN_ARCH_CONJ_HELPER_H
diff --git a/third-party/Eigen/src/Core/arch/Default/Settings.h b/third-party/Eigen/src/Core/arch/Default/Settings.h
new file mode 100644
index 00000000..097373c8
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/Default/Settings.h
@@ -0,0 +1,49 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/* All the parameters defined in this file can be specialized in the
+ * architecture specific files, and/or by the user.
+ * More to come... */
+
+#ifndef EIGEN_DEFAULT_SETTINGS_H
+#define EIGEN_DEFAULT_SETTINGS_H
+
+/** Defines the maximal loop size to enable meta unrolling of loops.
+  * Note that the value here is expressed in Eigen's own notion of "number of FLOPS",
+  * it does not correspond to the number of iterations or the number of instructions
+  */
+#ifndef EIGEN_UNROLLING_LIMIT
+#define EIGEN_UNROLLING_LIMIT 100
+#endif
+
+/** Defines the threshold between a "small" and a "large" matrix.
+  * This threshold is mainly used to select the proper product implementation.
+  */
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+/** Defines the maximal width of the blocks used in the triangular product and solver
+  * for vectors (level 2 blas xTRMV and xTRSV). The default is 8.
+  */
+#ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
+#define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8
+#endif
+
+
+/** Defines the default number of registers available for that architecture.
+  * Currently it must be 8 or 16. Other values will fail.
+  */
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8
+#endif
+
+#endif // EIGEN_DEFAULT_SETTINGS_H
diff --git a/third-party/Eigen/src/Core/arch/NEON/Complex.h b/third-party/Eigen/src/Core/arch/NEON/Complex.h
new file mode 100644
index 00000000..306a309b
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/NEON/Complex.h
@@ -0,0 +1,490 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_NEON_H
+#define EIGEN_COMPLEX_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+inline uint32x4_t p4ui_CONJ_XOR() {
+// See bug 1325, clang fails to call vld1q_u64.
+#if EIGEN_COMP_CLANG
+  uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+  return ret;
+#else
+  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+  return vld1q_u32( conj_XOR_DATA );
+#endif
+}
+
+inline uint32x2_t p2ui_CONJ_XOR() {
+  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
+  return vld1_u32( conj_XOR_DATA );
+}
+
+//---------- float ----------
+struct Packet2cf
+{
+  EIGEN_STRONG_INLINE Packet2cf() {}
+  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+  Packet4f  v;
+};
+
+template<> struct packet_traits<std::complex<float> >  : default_packet_traits
+{
+  typedef Packet2cf type;
+  typedef Packet2cf half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
+{
+  float32x2_t r64;
+  r64 = vld1_f32((const float *)&from);
+
+  return Packet2cf(vcombine_f32(r64, r64));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+  Packet4ui b = vreinterpretq_u32_f32(a.v);
+  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  Packet4f v1, v2;
+
+  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
+  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
+  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
+  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
+  // Multiply the real a with b
+  v1 = vmulq_f32(v1, b.v);
+  // Multiply the imag a with b
+  v2 = vmulq_f32(v2, b.v);
+  // Conjugate v2 
+  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
+  // Swap real/imag elements in v2.
+  v2 = vrev64q_f32(v2);
+  // Add and return the result
+  return Packet2cf(vaddq_f32(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+  Packet4f res = pset1<Packet4f>(0.f);
+  res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
+  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
+  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
+  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
+  return Packet2cf(res);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
+  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
+{
+  std::complex<float> EIGEN_ALIGN16 x[2];
+  vst1q_f32((float *)x, a.v);
+  return x[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+  float32x2_t a_lo, a_hi;
+  Packet4f a_r128;
+
+  a_lo = vget_low_f32(a.v);
+  a_hi = vget_high_f32(a.v);
+  a_r128 = vcombine_f32(a_hi, a_lo);
+
+  return Packet2cf(a_r128);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
+{
+  return Packet2cf(vrev64q_f32(a.v));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+  float32x2_t a1, a2;
+  std::complex<float> s;
+
+  a1 = vget_low_f32(a.v);
+  a2 = vget_high_f32(a.v);
+  a2 = vadd_f32(a1, a2);
+  vst1_f32((float *)&s, a2);
+
+  return s;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+  Packet4f sum1, sum2, sum;
+
+  // Add the first two 64-bit float32x2_t of vecs[0]
+  sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
+  sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
+  sum = vaddq_f32(sum1, sum2);
+
+  return Packet2cf(sum);
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+  float32x2_t a1, a2, v1, v2, prod;
+  std::complex<float> s;
+
+  a1 = vget_low_f32(a.v);
+  a2 = vget_high_f32(a.v);
+   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
+  v1 = vdup_lane_f32(a1, 0);
+  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
+  v2 = vdup_lane_f32(a1, 1);
+  // Multiply the real a with b
+  v1 = vmul_f32(v1, a2);
+  // Multiply the imag a with b
+  v2 = vmul_f32(v2, a2);
+  // Conjugate v2 
+  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
+  // Swap real/imag elements in v2.
+  v2 = vrev64_f32(v2);
+  // Add v1, v2
+  prod = vadd_f32(v1, v2);
+
+  vst1_f32((float *)&s, prod);
+
+  return s;
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+  EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
+  {
+    if (Offset==1)
+    {
+      first.v = vextq_f32(first.v, second.v, 2);
+    }
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  // TODO optimize it for NEON
+  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+  Packet4f s, rev_s;
+
+  // this computes the norm
+  s = vmulq_f32(b.v, b.v);
+  rev_s = vrev64q_f32(s);
+
+  return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
+  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
+  kernel.packet[1].v = tmp;
+}
+
+//---------- double ----------
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+
+// See bug 1325, clang fails to call vld1q_u64.
+#if EIGEN_COMP_CLANG
+  static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
+#else
+  const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
+  static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
+#endif
+
+struct Packet1cd
+{
+  EIGEN_STRONG_INLINE Packet1cd() {}
+  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+  Packet2d v;
+};
+
+template<> struct packet_traits<std::complex<double> >  : default_packet_traits
+{
+  typedef Packet1cd type;
+  typedef Packet1cd half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 0,
+    size = 1,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(padd<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(psub<Packet2d>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate<Packet2d>(a.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  Packet2d v1, v2;
+
+  // Get the real values of a 
+  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
+  // Get the imag values of a
+  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
+  // Multiply the real a with b
+  v1 = vmulq_f64(v1, b.v);
+  // Multiply the imag a with b
+  v2 = vmulq_f64(v2, b.v);
+  // Conjugate v2 
+  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
+  // Swap real/imag elements in v2.
+  v2 = preverse<Packet2d>(v2);
+  // Add and return the result
+  return Packet1cd(vaddq_f64(v1, v2));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
+{
+  Packet2d res = pset1<Packet2d>(0.0);
+  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
+  res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
+  return Packet1cd(res);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
+{
+  to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
+}
+
+
+template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
+{
+  std::complex<double> EIGEN_ALIGN16 res;
+  pstore<std::complex<double> >(&res, a);
+
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs) { return vecs[0]; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+  {
+    // FIXME is it sure we never have to align a Packet1cd?
+    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  // TODO optimize it for NEON
+  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+  Packet2d s = pmul<Packet2d>(b.v, b.v);
+  Packet2d rev_s = preverse<Packet2d>(s);
+
+  return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+  return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+  Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
+  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
+  kernel.packet[1].v = tmp;
+}
+#endif // EIGEN_ARCH_ARM64
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_NEON_H
diff --git a/third-party/Eigen/src/Core/arch/NEON/MathFunctions.h b/third-party/Eigen/src/Core/arch/NEON/MathFunctions.h
new file mode 100644
index 00000000..6bb05bb9
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -0,0 +1,91 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_NEON_H
+#define EIGEN_MATH_FUNCTIONS_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+  Packet4f tmp, fx;
+
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+  _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
+  _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+  x = vminq_f32(x, p4f_exp_hi);
+  x = vmaxq_f32(x, p4f_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF);
+
+  /* perform a floorf */
+  tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+
+  /* if greater, substract 1 */
+  Packet4ui mask = vcgtq_f32(tmp, fx);
+  mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1));
+
+  fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+
+  tmp = vmulq_f32(fx, p4f_cephes_exp_C1);
+  Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2);
+  x = vsubq_f32(x, tmp);
+  x = vsubq_f32(x, z);
+
+  Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x);
+  z = vmulq_f32(x, x);
+  y = vaddq_f32(y, p4f_cephes_exp_p1);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, p4f_cephes_exp_p2);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, p4f_cephes_exp_p3);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, p4f_cephes_exp_p4);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, p4f_cephes_exp_p5);
+
+  y = vmulq_f32(y, z);
+  y = vaddq_f32(y, x);
+  y = vaddq_f32(y, p4f_1);
+
+  /* build 2^n */
+  int32x4_t mm;
+  mm = vcvtq_s32_f32(fx);
+  mm = vaddq_s32(mm, p4i_0x7f);
+  mm = vshlq_n_s32(mm, 23);
+  Packet4f pow2n = vreinterpretq_f32_s32(mm);
+
+  y = vmulq_f32(y, pow2n);
+  return y;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_NEON_H
diff --git a/third-party/Eigen/src/Core/arch/NEON/PacketMath.h b/third-party/Eigen/src/Core/arch/NEON/PacketMath.h
new file mode 100644
index 00000000..3d5ed0d2
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -0,0 +1,760 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
+// Heavily based on Gael's SSE version.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_NEON_H
+#define EIGEN_PACKET_MATH_NEON_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#if EIGEN_ARCH_ARM64
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
+#else
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 
+#endif
+#endif
+
+#if EIGEN_COMP_MSVC
+
+// In MSVC's arm_neon.h header file, all NEON vector types
+// are aliases to the same underlying type __n128.
+// We thus have to wrap them to make them different C++ types.
+// (See also bug 1428)
+
+template<typename T,int unique_id>
+struct eigen_packet_wrapper
+{
+  operator T&() { return m_val; }
+  operator const T&() const { return m_val; }
+  eigen_packet_wrapper() {}
+  eigen_packet_wrapper(const T &v) : m_val(v) {}
+  eigen_packet_wrapper& operator=(const T &v) {
+    m_val = v;
+    return *this;
+  }
+
+  T m_val;
+};
+typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
+typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
+typedef eigen_packet_wrapper<int32x4_t  ,2> Packet4i;
+typedef eigen_packet_wrapper<int32x2_t  ,3> Packet2i;
+typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
+
+#else
+
+typedef float32x2_t Packet2f;
+typedef float32x4_t Packet4f;
+typedef int32x4_t   Packet4i;
+typedef int32x2_t   Packet2i;
+typedef uint32x4_t  Packet4ui;
+
+#endif // EIGEN_COMP_MSVC
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+  const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+  const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int32_t>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+  const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#if EIGEN_ARCH_ARM64
+  // __builtin_prefetch tends to do nothing on ARM64 compilers because the
+  // prefetch instructions there are too detailed for __builtin_prefetch to map
+  // meaningfully to them.
+  #define EIGEN_ARM_PREFETCH(ADDR)  __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
+#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+  #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+  #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif EIGEN_ARCH_ARM32
+  #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
+#else
+  // by default no explicit prefetching
+  #define EIGEN_ARM_PREFETCH(ADDR)
+#endif
+
+template<> struct packet_traits<float>  : default_packet_traits
+{
+  typedef Packet4f type;
+  typedef Packet4f half; // Packet2f intrinsics not implemented yet
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 4,
+    HasHalfPacket=0, // Packet2f intrinsics not implemented yet
+   
+    HasDiv  = 1,
+    // FIXME check the Has*
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 1,
+    HasSqrt = 0
+  };
+};
+template<> struct packet_traits<int32_t>    : default_packet_traits
+{
+  typedef Packet4i type;
+  typedef Packet4i half; // Packet2i intrinsics not implemented yet
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket=0 // Packet2i intrinsics not implemented yet
+    // FIXME check the Has*
+  };
+};
+
+#if EIGEN_GNUC_AT_MOST(4,4) && !EIGEN_COMP_LLVM
+// workaround gcc 4.2, 4.3 and 4.4 compilatin issue
+EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE float32x2_t vld1_dup_f32 (const float* x) { return ::vld1_dup_f32 ((const float32_t*)x); }
+EIGEN_STRONG_INLINE void        vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); }
+EIGEN_STRONG_INLINE void        vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); }
+#endif
+
+template<> struct unpacket_traits<Packet4f> { typedef float   type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet4i> { typedef int32_t type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return vdupq_n_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t&    from)   { return vdupq_n_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
+{
+  const float f[] = {0, 1, 2, 3};
+  Packet4f countdown = vld1q_f32(f);
+  return vaddq_f32(pset1<Packet4f>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a)
+{
+  const int32_t i[] = {0, 1, 2, 3};
+  Packet4i countdown = vld1q_s32(i);
+  return vaddq_s32(pset1<Packet4i>(a), countdown);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+#if EIGEN_ARCH_ARM64
+  return vdivq_f32(a,b);
+#else
+  Packet4f inv, restep, div;
+
+  // NEON does not offer a divide instruction, we have to do a reciprocal approximation
+  // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers
+  // a reciprocal estimate AND a reciprocal step -which saves a few instructions
+  // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with
+  // Newton-Raphson and vrecpsq_f32()
+  inv = vrecpeq_f32(b);
+
+  // This returns a differential, by which we will have to multiply inv to get a better
+  // approximation of 1/b.
+  restep = vrecpsq_f32(b, inv);
+  inv = vmulq_f32(restep, inv);
+
+  // Finally, multiply a by 1/b and get the wanted result of the division.
+  div = vmulq_f32(a, inv);
+
+  return div;
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, const Packet4i& /*b*/)
+{ eigen_assert(false && "packet integer division are not supported by NEON");
+  return pset1<Packet4i>(0);
+}
+
+// Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available,
+// then implements a slow software scalar fallback calling fmaf()!
+// Filed LLVM bug:
+//     https://llvm.org/bugs/show_bug.cgi?id=27216
+#if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM)
+// See bug 936.
+// FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4.
+// FMA is a true fused multiply-add i.e. only 1 rounding at the end, no intermediate rounding.
+// MLA is not fused i.e. does 2 roundings.
+// In addition to giving better accuracy, FMA also gives better performance here on a Krait (Nexus 4):
+// MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vfmaq_f32(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
+#if EIGEN_COMP_CLANG && EIGEN_ARCH_ARM
+  // Clang/ARM will replace VMLA by VMUL+VADD at least for some values of -mcpu,
+  // at least -mcpu=cortex-a8 and -mcpu=cortex-a7. Since the former is the default on
+  // -march=armv7-a, that is a very common case.
+  // See e.g. this thread:
+  //     http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html
+  // Filed LLVM bug:
+  //     https://llvm.org/bugs/show_bug.cgi?id=27219
+  Packet4f r = c;
+  asm volatile(
+    "vmla.f32 %q[r], %q[a], %q[b]"
+    : [r] "+w" (r)
+    : [a] "w" (a),
+      [b] "w" (b)
+    : );
+  return r;
+#else
+  return vmlaq_f32(c,a,b);
+#endif
+}
+#endif
+
+// No FMA instruction for int, so use MLA unconditionally.
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); }
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b)));
+}
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*    from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t*  from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*   from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); }
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); }
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
+{
+  float32x2_t lo, hi;
+  lo = vld1_dup_f32(from);
+  hi = vld1_dup_f32(from+1);
+  return vcombine_f32(lo, hi);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from)
+{
+  int32x2_t lo, hi;
+  lo = vld1_dup_s32(from);
+  hi = vld1_dup_s32(from+1);
+  return vcombine_s32(lo, hi);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>  (float*    to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t*  to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<float>  (float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+  Packet4f res = pset1<Packet4f>(0.f);
+  res = vsetq_lane_f32(from[0*stride], res, 0);
+  res = vsetq_lane_f32(from[1*stride], res, 1);
+  res = vsetq_lane_f32(from[2*stride], res, 2);
+  res = vsetq_lane_f32(from[3*stride], res, 3);
+  return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride)
+{
+  Packet4i res = pset1<Packet4i>(0);
+  res = vsetq_lane_s32(from[0*stride], res, 0);
+  res = vsetq_lane_s32(from[1*stride], res, 1);
+  res = vsetq_lane_s32(from[2*stride], res, 2);
+  res = vsetq_lane_s32(from[3*stride], res, 3);
+  return res;
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+  to[stride*0] = vgetq_lane_f32(from, 0);
+  to[stride*1] = vgetq_lane_f32(from, 1);
+  to[stride*2] = vgetq_lane_f32(from, 2);
+  to[stride*3] = vgetq_lane_f32(from, 3);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from, Index stride)
+{
+  to[stride*0] = vgetq_lane_s32(from, 0);
+  to[stride*1] = vgetq_lane_s32(from, 1);
+  to[stride*2] = vgetq_lane_s32(from, 2);
+  to[stride*3] = vgetq_lane_s32(from, 3);
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<float>  (const float*    addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t*  addr) { EIGEN_ARM_PREFETCH(addr); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE float   pfirst<Packet4f>(const Packet4f& a) { float   EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) { int32_t EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
+  float32x2_t a_lo, a_hi;
+  Packet4f a_r64;
+
+  a_r64 = vrev64q_f32(a);
+  a_lo = vget_low_f32(a_r64);
+  a_hi = vget_high_f32(a_r64);
+  return vcombine_f32(a_hi, a_lo);
+}
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
+  int32x2_t a_lo, a_hi;
+  Packet4i a_r64;
+
+  a_r64 = vrev64q_s32(a);
+  a_lo = vget_low_s32(a_r64);
+  a_hi = vget_high_s32(a_r64);
+  return vcombine_s32(a_hi, a_lo);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); }
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); }
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+  float32x2_t a_lo, a_hi, sum;
+
+  a_lo = vget_low_f32(a);
+  a_hi = vget_high_f32(a);
+  sum = vpadd_f32(a_lo, a_hi);
+  sum = vpadd_f32(sum, sum);
+  return vget_lane_f32(sum, 0);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+  float32x4x2_t vtrn1, vtrn2, res1, res2;
+  Packet4f sum1, sum2, sum;
+
+  // NEON zip performs interleaving of the supplied vectors.
+  // We perform two interleaves in a row to acquire the transposed vector
+  vtrn1 = vzipq_f32(vecs[0], vecs[2]);
+  vtrn2 = vzipq_f32(vecs[1], vecs[3]);
+  res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]);
+  res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]);
+
+  // Do the addition of the resulting vectors
+  sum1 = vaddq_f32(res1.val[0], res1.val[1]);
+  sum2 = vaddq_f32(res2.val[0], res2.val[1]);
+  sum = vaddq_f32(sum1, sum2);
+
+  return sum;
+}
+
+template<> EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a)
+{
+  int32x2_t a_lo, a_hi, sum;
+
+  a_lo = vget_low_s32(a);
+  a_hi = vget_high_s32(a);
+  sum = vpadd_s32(a_lo, a_hi);
+  sum = vpadd_s32(sum, sum);
+  return vget_lane_s32(sum, 0);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  int32x4x2_t vtrn1, vtrn2, res1, res2;
+  Packet4i sum1, sum2, sum;
+
+  // NEON zip performs interleaving of the supplied vectors.
+  // We perform two interleaves in a row to acquire the transposed vector
+  vtrn1 = vzipq_s32(vecs[0], vecs[2]);
+  vtrn2 = vzipq_s32(vecs[1], vecs[3]);
+  res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]);
+  res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]);
+
+  // Do the addition of the resulting vectors
+  sum1 = vaddq_s32(res1.val[0], res1.val[1]);
+  sum2 = vaddq_s32(res2.val[0], res2.val[1]);
+  sum = vaddq_s32(sum1, sum2);
+
+  return sum;
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+  float32x2_t a_lo, a_hi, prod;
+
+  // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+  a_lo = vget_low_f32(a);
+  a_hi = vget_high_f32(a);
+  // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+  prod = vmul_f32(a_lo, a_hi);
+  // Multiply prod with its swapped value |a2*a4|a1*a3|
+  prod = vmul_f32(prod, vrev64_f32(prod));
+
+  return vget_lane_f32(prod, 0);
+}
+template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
+{
+  int32x2_t a_lo, a_hi, prod;
+
+  // Get a_lo = |a1|a2| and a_hi = |a3|a4|
+  a_lo = vget_low_s32(a);
+  a_hi = vget_high_s32(a);
+  // Get the product of a_lo * a_hi -> |a1*a3|a2*a4|
+  prod = vmul_s32(a_lo, a_hi);
+  // Multiply prod with its swapped value |a2*a4|a1*a3|
+  prod = vmul_s32(prod, vrev64_s32(prod));
+
+  return vget_lane_s32(prod, 0);
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+  float32x2_t a_lo, a_hi, min;
+
+  a_lo = vget_low_f32(a);
+  a_hi = vget_high_f32(a);
+  min = vpmin_f32(a_lo, a_hi);
+  min = vpmin_f32(min, min);
+
+  return vget_lane_f32(min, 0);
+}
+
+template<> EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a)
+{
+  int32x2_t a_lo, a_hi, min;
+
+  a_lo = vget_low_s32(a);
+  a_hi = vget_high_s32(a);
+  min = vpmin_s32(a_lo, a_hi);
+  min = vpmin_s32(min, min);
+  
+  return vget_lane_s32(min, 0);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+  float32x2_t a_lo, a_hi, max;
+
+  a_lo = vget_low_f32(a);
+  a_hi = vget_high_f32(a);
+  max = vpmax_f32(a_lo, a_hi);
+  max = vpmax_f32(max, max);
+
+  return vget_lane_f32(max, 0);
+}
+
+template<> EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a)
+{
+  int32x2_t a_lo, a_hi, max;
+
+  a_lo = vget_low_s32(a);
+  a_hi = vget_high_s32(a);
+  max = vpmax_s32(a_lo, a_hi);
+  max = vpmax_s32(max, max);
+
+  return vget_lane_s32(max, 0);
+}
+
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+    {\
+        if (Offset!=0)\
+            first = Command(first, second, Offset);\
+    }\
+};\
+
+PALIGN_NEON(0,Packet4f,vextq_f32)
+PALIGN_NEON(1,Packet4f,vextq_f32)
+PALIGN_NEON(2,Packet4f,vextq_f32)
+PALIGN_NEON(3,Packet4f,vextq_f32)
+PALIGN_NEON(0,Packet4i,vextq_s32)
+PALIGN_NEON(1,Packet4i,vextq_s32)
+PALIGN_NEON(2,Packet4i,vextq_s32)
+PALIGN_NEON(3,Packet4i,vextq_s32)
+
+#undef PALIGN_NEON
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+  float32x4x2_t tmp1 = vzipq_f32(kernel.packet[0], kernel.packet[1]);
+  float32x4x2_t tmp2 = vzipq_f32(kernel.packet[2], kernel.packet[3]);
+
+  kernel.packet[0] = vcombine_f32(vget_low_f32(tmp1.val[0]), vget_low_f32(tmp2.val[0]));
+  kernel.packet[1] = vcombine_f32(vget_high_f32(tmp1.val[0]), vget_high_f32(tmp2.val[0]));
+  kernel.packet[2] = vcombine_f32(vget_low_f32(tmp1.val[1]), vget_low_f32(tmp2.val[1]));
+  kernel.packet[3] = vcombine_f32(vget_high_f32(tmp1.val[1]), vget_high_f32(tmp2.val[1]));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+  int32x4x2_t tmp1 = vzipq_s32(kernel.packet[0], kernel.packet[1]);
+  int32x4x2_t tmp2 = vzipq_s32(kernel.packet[2], kernel.packet[3]);
+  kernel.packet[0] = vcombine_s32(vget_low_s32(tmp1.val[0]), vget_low_s32(tmp2.val[0]));
+  kernel.packet[1] = vcombine_s32(vget_high_s32(tmp1.val[0]), vget_high_s32(tmp2.val[0]));
+  kernel.packet[2] = vcombine_s32(vget_low_s32(tmp1.val[1]), vget_low_s32(tmp2.val[1]));
+  kernel.packet[3] = vcombine_s32(vget_high_s32(tmp1.val[1]), vget_high_s32(tmp2.val[1]));
+}
+
+//---------- double ----------
+
+// Clang 3.5 in the iOS toolchain has an ICE triggered by NEON intrisics for double.
+// Confirmed at least with __apple_build_version__ = 6000054.
+#ifdef __apple_build_version__
+// Let's hope that by the time __apple_build_version__ hits the 601* range, the bug will be fixed.
+// https://gist.github.com/yamaya/2924292 suggests that the 3 first digits are only updated with
+// major toolchain updates.
+#define EIGEN_APPLE_DOUBLE_NEON_BUG (__apple_build_version__ < 6010000)
+#else
+#define EIGEN_APPLE_DOUBLE_NEON_BUG 0
+#endif
+
+#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+
+// Bug 907: workaround missing declarations of the following two functions in the ADK
+// Defining these functions as templates ensures that if these intrinsics are
+// already defined in arm_neon.h, then our workaround doesn't cause a conflict
+// and has lower priority in overload resolution.
+template <typename T>
+uint64x2_t vreinterpretq_u64_f64(T a)
+{
+  return (uint64x2_t) a;
+}
+
+template <typename T>
+float64x2_t vreinterpretq_f64_u64(T a)
+{
+  return (float64x2_t) a;
+}
+
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+
+template<> struct packet_traits<double>  : default_packet_traits
+{
+  typedef Packet2d type;
+  typedef Packet2d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+    HasHalfPacket=0,
+   
+    HasDiv  = 1,
+    // FIXME check the Has*
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 0,
+    HasSqrt = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet2d> { typedef double  type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) { return vdupq_n_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
+{
+  const double countdown_raw[] = {0.0,1.0};
+  const Packet2d countdown = vld1q_f64(countdown_raw);
+  return vaddq_f64(pset1<Packet2d>(a), countdown);
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return vsubq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return vnegq_f64(a); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmulq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vdivq_f64(a,b); }
+
+#ifdef __ARM_FEATURE_FMA
+// See bug 936. See above comment about FMA for float.
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vfmaq_f64(c,a,b); }
+#else
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vmlaq_f64(c,a,b); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); }
+
+// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  return vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  return vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  return vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+  return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)
+{
+  return vld1q_dup_f64(from);
+}
+template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f64(to, from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+  Packet2d res = pset1<Packet2d>(0.0);
+  res = vsetq_lane_f64(from[0*stride], res, 0);
+  res = vsetq_lane_f64(from[1*stride], res, 1);
+  return res;
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+  to[stride*0] = vgetq_lane_f64(from, 0);
+  to[stride*1] = vgetq_lane_f64(from, 1);
+}
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ARM_PREFETCH(addr); }
+
+// FIXME only store the 2 first elements ?
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) { return vcombine_f64(vget_high_f64(a), vget_low_f64(a)); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { return vabsq_f64(a); }
+
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+// workaround ICE, see bug 907
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) + vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) + vget_high_f64(a), 0); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+  float64x2_t trn1, trn2;
+
+  // NEON zip performs interleaving of the supplied vectors.
+  // We perform two interleaves in a row to acquire the transposed vector
+  trn1 = vzip1q_f64(vecs[0], vecs[1]);
+  trn2 = vzip2q_f64(vecs[0], vecs[1]);
+
+  // Do the addition of the resulting vectors
+  return vaddq_f64(trn1, trn2);
+}
+// Other reduction functions:
+// mul
+#if EIGEN_COMP_CLANG && defined(__apple_build_version__)
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return (vget_low_f64(a) * vget_high_f64(a))[0]; }
+#else
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) { return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+#endif
+
+// min
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpminq_f64(a, a), 0); }
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) { return vgetq_lane_f64(vpmaxq_f64(a, a), 0); }
+
+// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
+// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
+#define PALIGN_NEON(Offset,Type,Command) \
+template<>\
+struct palign_impl<Offset,Type>\
+{\
+    EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
+    {\
+        if (Offset!=0)\
+            first = Command(first, second, Offset);\
+    }\
+};\
+
+PALIGN_NEON(0,Packet2d,vextq_f64)
+PALIGN_NEON(1,Packet2d,vextq_f64)
+#undef PALIGN_NEON
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+  float64x2_t trn1 = vzip1q_f64(kernel.packet[0], kernel.packet[1]);
+  float64x2_t trn2 = vzip2q_f64(kernel.packet[0], kernel.packet[1]);
+
+  kernel.packet[0] = trn1;
+  kernel.packet[1] = trn2;
+}
+#endif // EIGEN_ARCH_ARM64 
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_NEON_H
diff --git a/third-party/Eigen/src/Core/arch/SSE/Complex.h b/third-party/Eigen/src/Core/arch/SSE/Complex.h
new file mode 100644
index 00000000..d075043c
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/SSE/Complex.h
@@ -0,0 +1,471 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- float ----------
+struct Packet2cf
+{
+  EIGEN_STRONG_INLINE Packet2cf() {}
+  EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+  __m128  v;
+};
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<float> >  : default_packet_traits
+{
+  typedef Packet2cf type;
+  typedef Packet2cf half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0,
+    HasBlend = 1
+  };
+};
+#endif
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
+{
+  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+  return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+  return Packet2cf(_mm_xor_ps(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  #ifdef EIGEN_VECTORIZE_SSE3
+  return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
+                                 _mm_mul_ps(_mm_movehdup_ps(a.v),
+                                            vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+//   return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+//                                  _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+//                                             vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+  #else
+  const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+  return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+                              _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+                                                    vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+  #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
+{
+  Packet2cf res;
+#if EIGEN_GNUC_AT_MOST(4,2)
+  // Workaround annoying "may be used uninitialized in this function" warning with gcc 4.2
+  res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
+#elif EIGEN_GNUC_AT_LEAST(4,6)
+  // Suppress annoying "may be used uninitialized in this function" warning with gcc >= 4.6
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wuninitialized"
+  res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+  #pragma GCC diagnostic pop
+#else
+  res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+#endif
+  return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *   to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
+
+
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+  return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
+                              std::imag(from[0*stride]), std::real(from[0*stride])));
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+  to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
+  to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
+                                     _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
+}
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
+{
+  #if EIGEN_GNUC_AT_MOST(4,3)
+  // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
+  // This workaround also fix invalid code generation with gcc 4.3
+  EIGEN_ALIGN16 std::complex<float> res[2];
+  _mm_store_ps((float*)res, a.v);
+  return res[0];
+  #else
+  std::complex<float> res;
+  _mm_storel_pi((__m64*)&res, a.v);
+  return res;
+  #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+  return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+  return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+  return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+  {
+    if (Offset==1)
+    {
+      first.v = _mm_movehl_ps(first.v, first.v);
+      first.v = _mm_movelh_ps(first.v, second.v);
+    }
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return internal::pmul(a, pconj(b));
+    #else
+    const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+    return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+                                _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+                                           vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+    #endif
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return internal::pmul(pconj(a), b);
+    #else
+    const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+    return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
+                                _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+                                                      vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
+    #endif
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return pconj(internal::pmul(a, b));
+    #else
+    const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+    return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
+                                _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
+                                           vec4f_swizzle1(b.v, 1, 0, 3, 2))));
+    #endif
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  // TODO optimize it for SSE3 and 4
+  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
+  __m128 s = _mm_mul_ps(b.v,b.v);
+  return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+
+EIGEN_STRONG_INLINE Packet2cf pcplxflip/* <Packet2cf> */(const Packet2cf& x)
+{
+  return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
+}
+
+
+//---------- double ----------
+struct Packet1cd
+{
+  EIGEN_STRONG_INLINE Packet1cd() {}
+  EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
+  __m128d  v;
+};
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<std::complex<double> >  : default_packet_traits
+{
+  typedef Packet1cd type;
+  typedef Packet1cd half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 0,
+    size = 1,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+#endif
+
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
+{
+  const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+  return Packet1cd(_mm_xor_pd(a.v,mask));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  #ifdef EIGEN_VECTORIZE_SSE3
+  return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
+                                 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+                                            vec2d_swizzle1(b.v, 1, 0))));
+  #else
+  const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+  return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+                              _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+                                                    vec2d_swizzle1(b.v, 1, 0)), mask)));
+  #endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
+
+// FIXME force unaligned load, this is a temporary fix
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
+{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
+
+// FIXME force unaligned store, this is a temporary fix
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
+{
+  EIGEN_ALIGN16 double res[2];
+  _mm_store_pd(res, a.v);
+  return std::complex<double>(res[0],res[1]);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+  return pfirst(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+  return vecs[0];
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+  return pfirst(a);
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+  {
+    // FIXME is it sure we never have to align a Packet1cd?
+    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return internal::pmul(a, pconj(b));
+    #else
+    const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+    return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+                                _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+                                           vec2d_swizzle1(b.v, 1, 0))));
+    #endif
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return internal::pmul(pconj(a), b);
+    #else
+    const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+    return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
+                                _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+                                                      vec2d_swizzle1(b.v, 1, 0)), mask)));
+    #endif
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    #ifdef EIGEN_VECTORIZE_SSE3
+    return pconj(internal::pmul(a, b));
+    #else
+    const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+    return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), mask),
+                                _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
+                                           vec2d_swizzle1(b.v, 1, 0))));
+    #endif
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  // TODO optimize it for SSE3 and 4
+  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+  __m128d s = _mm_mul_pd(b.v,b.v);
+  return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
+{
+  return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2cf,2>& kernel) {
+  __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
+  __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
+
+  __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
+  kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
+  kernel.packet[1].v = tmp;
+}
+
+template<>  EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+  __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
+  return Packet2cf(_mm_castpd_ps(result));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertfirst(const Packet2cf& a, std::complex<float> b)
+{
+  return Packet2cf(_mm_loadl_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertfirst(const Packet1cd&, std::complex<double> b)
+{
+  return pset1<Packet1cd>(b);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pinsertlast(const Packet2cf& a, std::complex<float> b)
+{
+  return Packet2cf(_mm_loadh_pi(a.v, reinterpret_cast<const __m64*>(&b)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pinsertlast(const Packet1cd&, std::complex<double> b)
+{
+  return pset1<Packet1cd>(b);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_SSE_H
diff --git a/third-party/Eigen/src/Core/arch/SSE/MathFunctions.h b/third-party/Eigen/src/Core/arch/SSE/MathFunctions.h
new file mode 100644
index 00000000..7b5f948e
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -0,0 +1,562 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
+#define EIGEN_MATH_FUNCTIONS_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f plog<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
+
+  /* the smallest non denormalized float number */
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000);//-1.f/0.f);
+
+  /* natural logarithm computed for 4 simultaneous float
+    return NaN for x <= 0
+  */
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
+
+
+  Packet4i emm0;
+
+  Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
+  Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
+
+  x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
+  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+
+  /* keep only the fractional part */
+  x = _mm_and_ps(x, p4f_inv_mant_mask);
+  x = _mm_or_ps(x, p4f_half);
+
+  emm0 = _mm_sub_epi32(emm0, p4i_0x7f);
+  Packet4f e = padd(Packet4f(_mm_cvtepi32_ps(emm0)), p4f_1);
+
+  /* part2:
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  Packet4f mask = _mm_cmplt_ps(x, p4f_cephes_SQRTHF);
+  Packet4f tmp = pand(x, mask);
+  x = psub(x, p4f_1);
+  e = psub(e, pand(p4f_1, mask));
+  x = padd(x, tmp);
+
+  Packet4f x2 = pmul(x,x);
+  Packet4f x3 = pmul(x2,x);
+
+  Packet4f y, y1, y2;
+  y  = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
+  y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
+  y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
+  y  = pmadd(y , x, p4f_cephes_log_p2);
+  y1 = pmadd(y1, x, p4f_cephes_log_p5);
+  y2 = pmadd(y2, x, p4f_cephes_log_p8);
+  y = pmadd(y, x3, y1);
+  y = pmadd(y, x3, y2);
+  y = pmul(y, x3);
+
+  y1 = pmul(e, p4f_cephes_log_q1);
+  tmp = pmul(x2, p4f_half);
+  y = padd(y, y1);
+  x = psub(x, tmp);
+  y2 = pmul(e, p4f_cephes_log_q2);
+  x = padd(x, y);
+  x = padd(x, y2);
+  // negative arg will be NAN, 0 will be -INF
+  return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+                   _mm_and_ps(iszero_mask, p4f_minus_inf));
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+  _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
+
+
+  _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
+  _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
+
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
+
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
+
+  Packet4f tmp, fx;
+  Packet4i emm0;
+
+  // clamp x
+  x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  fx = _mm_floor_ps(fx);
+#else
+  emm0 = _mm_cvttps_epi32(fx);
+  tmp  = _mm_cvtepi32_ps(emm0);
+  /* if greater, substract 1 */
+  Packet4f mask = _mm_cmpgt_ps(tmp, fx);
+  mask = _mm_and_ps(mask, p4f_1);
+  fx = psub(tmp, mask);
+#endif
+
+  tmp = pmul(fx, p4f_cephes_exp_C1);
+  Packet4f z = pmul(fx, p4f_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  z = pmul(x,x);
+
+  Packet4f y = p4f_cephes_exp_p0;
+  y = pmadd(y, x, p4f_cephes_exp_p1);
+  y = pmadd(y, x, p4f_cephes_exp_p2);
+  y = pmadd(y, x, p4f_cephes_exp_p3);
+  y = pmadd(y, x, p4f_cephes_exp_p4);
+  y = pmadd(y, x, p4f_cephes_exp_p5);
+  y = pmadd(y, z, x);
+  y = padd(y, p4f_1);
+
+  // build 2^n
+  emm0 = _mm_cvttps_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, p4i_0x7f);
+  emm0 = _mm_slli_epi32(emm0, 23);
+  return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
+}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+  Packet2d x = _x;
+
+  _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+  _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+  _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+  _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
+  _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+  _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+  static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
+
+  Packet2d tmp, fx;
+  Packet4i emm0;
+
+  // clamp x
+  x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  fx = _mm_floor_pd(fx);
+#else
+  emm0 = _mm_cvttpd_epi32(fx);
+  tmp  = _mm_cvtepi32_pd(emm0);
+  /* if greater, substract 1 */
+  Packet2d mask = _mm_cmpgt_pd(tmp, fx);
+  mask = _mm_and_pd(mask, p2d_1);
+  fx = psub(tmp, mask);
+#endif
+
+  tmp = pmul(fx, p2d_cephes_exp_C1);
+  Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  Packet2d x2 = pmul(x,x);
+
+  Packet2d px = p2d_cephes_exp_p0;
+  px = pmadd(px, x2, p2d_cephes_exp_p1);
+  px = pmadd(px, x2, p2d_cephes_exp_p2);
+  px = pmul (px, x);
+
+  Packet2d qx = p2d_cephes_exp_q0;
+  qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+  x = pdiv(px,psub(qx,px));
+  x = pmadd(p2d_2,x,p2d_1);
+
+  // build 2^n
+  emm0 = _mm_cvttpd_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, p4i_1023_0);
+  emm0 = _mm_slli_epi32(emm0, 20);
+  emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
+  return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
+}
+
+/* evaluation of 4 sines at onces, using SSE2 intrinsics.
+
+   The code is the exact rewriting of the cephes sinf function.
+   Precision is excellent as long as x < 8192 (I did not bother to
+   take into account the special handling they have for greater values
+   -- it does not return garbage for arguments over 8192, though, but
+   the extra precision is missing).
+
+   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+   surprising but correct result.
+*/
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psin<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+  _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+  _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+  _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+  _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
+
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p1,  8.3321608736E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p0,  2.443315711809948E-005f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+  Packet4f xmm1, xmm2, xmm3, sign_bit, y;
+
+  Packet4i emm0, emm2;
+  sign_bit = x;
+  /* take the absolute value */
+  x = pabs(x);
+
+  /* take the modulo */
+
+  /* extract the sign bit (upper one) */
+  sign_bit = _mm_and_ps(sign_bit, p4f_sign_mask);
+
+  /* scale by 4/Pi */
+  y = pmul(x, p4f_cephes_FOPI);
+
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, p4i_1);
+  emm2 = _mm_and_si128(emm2, p4i_not1);
+  y = _mm_cvtepi32_ps(emm2);
+  /* get the swap sign flag */
+  emm0 = _mm_and_si128(emm2, p4i_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask
+     there is one polynom for 0 <= x <= Pi/4
+     and another one for Pi/4<x<=Pi/2
+
+     Both branches will be computed.
+  */
+  emm2 = _mm_and_si128(emm2, p4i_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+  Packet4f swap_sign_bit = _mm_castsi128_ps(emm0);
+  Packet4f poly_mask = _mm_castsi128_ps(emm2);
+  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+
+  /* The magic pass: "Extended precision modular arithmetic"
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = pmul(y, p4f_minus_cephes_DP1);
+  xmm2 = pmul(y, p4f_minus_cephes_DP2);
+  xmm3 = pmul(y, p4f_minus_cephes_DP3);
+  x = padd(x, xmm1);
+  x = padd(x, xmm2);
+  x = padd(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = p4f_coscof_p0;
+  Packet4f z = _mm_mul_ps(x,x);
+
+  y = pmadd(y, z, p4f_coscof_p1);
+  y = pmadd(y, z, p4f_coscof_p2);
+  y = pmul(y, z);
+  y = pmul(y, z);
+  Packet4f tmp = pmul(z, p4f_half);
+  y = psub(y, tmp);
+  y = padd(y, p4f_1);
+
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  Packet4f y2 = p4f_sincof_p0;
+  y2 = pmadd(y2, z, p4f_sincof_p1);
+  y2 = pmadd(y2, z, p4f_sincof_p2);
+  y2 = pmul(y2, z);
+  y2 = pmul(y2, x);
+  y2 = padd(y2, x);
+
+  /* select the correct result from the two polynoms */
+  y2 = _mm_and_ps(poly_mask, y2);
+  y = _mm_andnot_ps(poly_mask, y);
+  y = _mm_or_ps(y,y2);
+  /* update the sign */
+  return _mm_xor_ps(y, sign_bit);
+}
+
+/* almost the same as psin */
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pcos<Packet4f>(const Packet4f& _x)
+{
+  Packet4f x = _x;
+  _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
+  _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
+
+  _EIGEN_DECLARE_CONST_Packet4i(1, 1);
+  _EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
+  _EIGEN_DECLARE_CONST_Packet4i(2, 2);
+  _EIGEN_DECLARE_CONST_Packet4i(4, 4);
+
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p1,  8.3321608736E-3f);
+  _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p0,  2.443315711809948E-005f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003f);
+  _EIGEN_DECLARE_CONST_Packet4f(coscof_p2,  4.166664568298827E-002f);
+  _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
+
+  Packet4f xmm1, xmm2, xmm3, y;
+  Packet4i emm0, emm2;
+
+  x = pabs(x);
+
+  /* scale by 4/Pi */
+  y = pmul(x, p4f_cephes_FOPI);
+
+  /* get the integer part of y */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, p4i_1);
+  emm2 = _mm_and_si128(emm2, p4i_not1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm2 = _mm_sub_epi32(emm2, p4i_2);
+
+  /* get the swap sign flag */
+  emm0 = _mm_andnot_si128(emm2, p4i_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask */
+  emm2 = _mm_and_si128(emm2, p4i_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+  Packet4f sign_bit = _mm_castsi128_ps(emm0);
+  Packet4f poly_mask = _mm_castsi128_ps(emm2);
+
+  /* The magic pass: "Extended precision modular arithmetic"
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = pmul(y, p4f_minus_cephes_DP1);
+  xmm2 = pmul(y, p4f_minus_cephes_DP2);
+  xmm3 = pmul(y, p4f_minus_cephes_DP3);
+  x = padd(x, xmm1);
+  x = padd(x, xmm2);
+  x = padd(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = p4f_coscof_p0;
+  Packet4f z = pmul(x,x);
+
+  y = pmadd(y,z,p4f_coscof_p1);
+  y = pmadd(y,z,p4f_coscof_p2);
+  y = pmul(y, z);
+  y = pmul(y, z);
+  Packet4f tmp = _mm_mul_ps(z, p4f_half);
+  y = psub(y, tmp);
+  y = padd(y, p4f_1);
+
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+  Packet4f y2 = p4f_sincof_p0;
+  y2 = pmadd(y2, z, p4f_sincof_p1);
+  y2 = pmadd(y2, z, p4f_sincof_p2);
+  y2 = pmul(y2, z);
+  y2 = pmadd(y2, x, x);
+
+  /* select the correct result from the two polynoms */
+  y2 = _mm_and_ps(poly_mask, y2);
+  y  = _mm_andnot_ps(poly_mask, y);
+  y  = _mm_or_ps(y,y2);
+
+  /* update the sign */
+  return _mm_xor_ps(y, sign_bit);
+}
+
+#if EIGEN_FAST_MATH
+
+// Functions for sqrt.
+// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
+// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
+// exact solution. It does not handle +inf, or denormalized numbers correctly.
+// The main advantage of this approach is not just speed, but also the fact that
+// it can be inlined and pipelined with other computations, further reducing its
+// effective latency. This is similar to Quake3's fast inverse square root.
+// For detail see here: http://www.beyond3d.com/content/articles/8/
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& _x)
+{
+  Packet4f half = pmul(_x, pset1<Packet4f>(.5f));
+  Packet4f denormal_mask = _mm_and_ps(
+      _mm_cmpge_ps(_x, _mm_setzero_ps()),
+      _mm_cmplt_ps(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())));
+
+  // Compute approximate reciprocal sqrt.
+  Packet4f x = _mm_rsqrt_ps(_x);
+  // Do a single step of Newton's iteration.
+  x = pmul(x, psub(pset1<Packet4f>(1.5f), pmul(half, pmul(x,x))));
+  // Flush results for denormals to zero.
+  return _mm_andnot_ps(denormal_mask, pmul(_x,x));
+}
+
+#else
+
+template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
+
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
+
+#if EIGEN_FAST_MATH
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(nan, 0x7fc00000);
+  _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
+  _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000);
+
+  Packet4f neg_half = pmul(_x, p4f_minus_half);
+
+  // select only the inverse sqrt of positive normal inputs (denormals are
+  // flushed to zero and cause infs as well).
+  Packet4f le_zero_mask = _mm_cmple_ps(_x, p4f_flt_min);
+  Packet4f x = _mm_andnot_ps(le_zero_mask, _mm_rsqrt_ps(_x));
+
+  // Fill in NaNs and Infs for the negative/zero entries.
+  Packet4f neg_mask = _mm_cmplt_ps(_x, _mm_setzero_ps());
+  Packet4f zero_mask = _mm_andnot_ps(neg_mask, le_zero_mask);
+  Packet4f infs_and_nans = _mm_or_ps(_mm_and_ps(neg_mask, p4f_nan),
+                                     _mm_and_ps(zero_mask, p4f_inf));
+
+  // Do a single step of Newton's iteration.
+  x = pmul(x, pmadd(neg_half, pmul(x, x), p4f_one_point_five));
+
+  // Insert NaNs and Infs in all the right places.
+  return _mm_or_ps(x, infs_and_nans);
+}
+
+#else
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+  // Unfortunately we can't use the much faster mm_rqsrt_ps since it only provides an approximation.
+  return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
+}
+
+#endif
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+  // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
+  return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
+}
+
+// Hyperbolic Tangent function.
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
+ptanh<Packet4f>(const Packet4f& x) {
+  return internal::generic_fast_tanh_float(x);
+}
+
+} // end namespace internal
+
+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+  return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC_STRICT
+  // This works around a GCC bug generating poor code for _mm_sqrt_pd
+  // See https://bitbucket.org/eigen/eigen/commits/14f468dba4d350d7c19c9b93072e19f7b3df563b
+  return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+  return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+
+} // end namespace numex
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_SSE_H
diff --git a/third-party/Eigen/src/Core/arch/SSE/PacketMath.h b/third-party/Eigen/src/Core/arch/SSE/PacketMath.h
new file mode 100644
index 00000000..60e2517e
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -0,0 +1,895 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_SSE_H
+#define EIGEN_PACKET_MATH_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
+#endif
+
+#ifdef __FMA__
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD 1
+#endif
+#endif
+
+#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
+// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
+// have overloads for both types without linking error.
+// One solution is to increase ABI version using -fabi-version=4 (or greater).
+// Otherwise, we workaround this inconvenience by wrapping 128bit types into the following helper
+// structure:
+template<typename T>
+struct eigen_packet_wrapper
+{
+  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
+    m_val = v;
+    return *this;
+  }
+  
+  T m_val;
+};
+typedef eigen_packet_wrapper<__m128>  Packet4f;
+typedef eigen_packet_wrapper<__m128i> Packet4i;
+typedef eigen_packet_wrapper<__m128d> Packet2d;
+#else
+typedef __m128  Packet4f;
+typedef __m128i Packet4i;
+typedef __m128d Packet2d;
+#endif
+
+template<> struct is_arithmetic<__m128>  { enum { value = true }; };
+template<> struct is_arithmetic<__m128i> { enum { value = true }; };
+template<> struct is_arithmetic<__m128d> { enum { value = true }; };
+
+#define vec4f_swizzle1(v,p,q,r,s) \
+  (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p)))))
+
+#define vec4i_swizzle1(v,p,q,r,s) \
+  (_mm_shuffle_epi32( v, ((s)<<6|(r)<<4|(q)<<2|(p))))
+
+#define vec2d_swizzle1(v,p,q) \
+  (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
+  
+#define vec4f_swizzle2(a,b,p,q,r,s) \
+  (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
+
+#define vec4i_swizzle2(a,b,p,q,r,s) \
+  (_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), ((s)<<6|(r)<<4|(q)<<2|(p))))))
+
+#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
+  const Packet4f p4f_##NAME = pset1<Packet4f>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+  const Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
+  const Packet4f p4f_##NAME = _mm_castsi128_ps(pset1<Packet4i>(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+  const Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+
+// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
+// to leverage AVX instructions.
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct packet_traits<float>  : default_packet_traits
+{
+  typedef Packet4f type;
+  typedef Packet4f half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket = 0,
+
+    HasDiv  = 1,
+    HasSin  = EIGEN_FAST_MATH,
+    HasCos  = EIGEN_FAST_MATH,
+    HasLog  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasTanh  = EIGEN_FAST_MATH,
+    HasBlend = 1
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+    ,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+#endif
+  };
+};
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef Packet2d type;
+  typedef Packet2d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=2,
+    HasHalfPacket = 0,
+
+    HasDiv  = 1,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasBlend = 1
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+    ,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1
+#endif
+  };
+};
+#endif
+template<> struct packet_traits<int>    : default_packet_traits
+{
+  typedef Packet4i type;
+  typedef Packet4i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+
+    HasBlend = 1
+  };
+};
+
+template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+
+#if EIGEN_COMP_MSVC==1500
+// Workaround MSVC 9 internal compiler error.
+// TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
+// TODO: let's check whether there does not exist a better fix, like adding a pset0() function. (it crashed on pset1(0)).
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps(from,from,from,from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
+#else
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }
+#endif
+
+// GCC generates a shufps instruction for _mm_set1_ps/_mm_load1_ps instead of the more efficient pshufd instruction.
+// However, using inrinsics for pset1 makes gcc to generate crappy code in some cases (see bug 203)
+// Using inline assembly is also not an option because then gcc fails to reorder properly the instructions.
+// Therefore, we introduced the pload1 functions to be used in product kernels for which bug 203 does not apply.
+// Also note that with AVX, we want it to generate a vbroadcastss.
+#if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
+template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
+  return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
+}
+#endif
+  
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
+
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+  return _mm_xor_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
+{
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
+  return _mm_xor_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
+{
+  return psub(Packet4i(_mm_setr_epi32(0,0,0,0)), a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_mul_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_mullo_epi32(a,b);
+#else
+  // this version is slightly faster than 4 scalar products
+  return vec4i_swizzle1(
+            vec4i_swizzle2(
+              _mm_mul_epu32(a,b),
+              _mm_mul_epu32(vec4i_swizzle1(a,1,0,3,2),
+                            vec4i_swizzle1(b,1,0,3,2)),
+              0,2,0,2),
+            0,2,1,3);
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
+
+// for some weird raisons, it has to be overloaded for packet of integers
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_min_epi32(a,b);
+#else
+  // after some bench, this version *is* faster than a scalar implementation
+  Packet4i mask = _mm_cmplt_epi32(a,b);
+  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_max_epi32(a,b);
+#else
+  // after some bench, this version *is* faster than a scalar implementation
+  Packet4i mask = _mm_cmpgt_epi32(a,b);
+  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
+}
+
+#ifdef EIGEN_VECTORIZE_SSE4_1
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return _mm_round_ps(a, 0); }
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return _mm_round_pd(a, 0); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return _mm_ceil_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return _mm_ceil_pd(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return _mm_floor_ps(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return _mm_floor_pd(a); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); }
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double*  from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); }
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const __m128i*>(from)); }
+
+#if EIGEN_COMP_MSVC
+  template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*  from) {
+    EIGEN_DEBUG_UNALIGNED_LOAD
+    #if (EIGEN_COMP_MSVC==1600)
+    // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
+    // (i.e., it does not generate an unaligned load!!
+    __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (const __m64*)(from));
+    res = _mm_loadh_pi(res, (const __m64*)(from+2));
+    return res;
+    #else
+    return _mm_loadu_ps(from);
+    #endif
+  }
+#else
+// NOTE: with the code below, MSVC's compiler crashes!
+
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
+{
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return _mm_loadu_ps(from);
+}
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
+{
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return _mm_loadu_pd(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
+{
+  EIGEN_DEBUG_UNALIGNED_LOAD
+  return _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
+}
+
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*   from)
+{
+  return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(reinterpret_cast<const double*>(from))), 0, 0, 1, 1);
+}
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*  from)
+{ return pset1<Packet2d>(from[0]); }
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)
+{
+  Packet4i tmp;
+  tmp = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(from));
+  return vec4i_swizzle1(tmp, 0, 0, 1, 1);
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); }
+
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*   to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*       to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); }
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+ return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+ return _mm_set_pd(from[1*stride], from[0*stride]);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+ return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
+ }
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+  to[stride*0] = _mm_cvtss_f32(from);
+  to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
+  to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
+  to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+  to[stride*0] = _mm_cvtsd_f64(from);
+  to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+  to[stride*0] = _mm_cvtsi128_si32(from);
+  to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
+  to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
+  to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
+}
+
+// some compilers might be tempted to perform multiple moves instead of using a vector path.
+template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
+{
+  Packet4f pa = _mm_set_ss(a);
+  pstore(to, Packet4f(vec4f_swizzle1(pa,0,0,0,0)));
+}
+// some compilers might be tempted to perform multiple moves instead of using a vector path.
+template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& a)
+{
+  Packet2d pa = _mm_set_sd(a);
+  pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
+}
+
+#if EIGEN_COMP_PGI
+typedef const void * SsePrefetchPtrType;
+#else
+typedef const char * SsePrefetchPtrType;
+#endif
+
+#ifndef EIGEN_VECTORIZE_AVX
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+#endif
+
+#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+// Direct of the struct members fixed bug #62.
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { return a.m128_f32[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return a.m128d_f64[0]; }
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#elif EIGEN_COMP_MSVC_STRICT
+// The temporary variable fixes an internal compilation error in vs <= 2008 and a wrong-result bug in vs 2010
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float x = _mm_cvtss_f32(a); return x; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double x = _mm_cvtsd_f64(a); return x; }
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int x = _mm_cvtsi128_si32(a); return x; }
+#else
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{ return _mm_shuffle_ps(a,a,0x1B); }
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{ return _mm_shuffle_pd(a,a,0x1); }
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{ return _mm_shuffle_epi32(a,0x1B); }
+
+template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a)
+{
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
+  return _mm_and_ps(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a)
+{
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
+  return _mm_and_pd(a,mask);
+}
+template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
+{
+  #ifdef EIGEN_VECTORIZE_SSSE3
+  return _mm_abs_epi32(a);
+  #else
+  Packet4i aux = _mm_srai_epi32(a,31);
+  return _mm_sub_epi32(_mm_xor_si128(a,aux),aux);
+  #endif
+}
+
+// with AVX, the default implementations based on pload1 are faster
+#ifndef __AVX__
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+  a3 = pload<Packet4f>(a);
+  a0 = vec4f_swizzle1(a3, 0,0,0,0);
+  a1 = vec4f_swizzle1(a3, 1,1,1,1);
+  a2 = vec4f_swizzle1(a3, 2,2,2,2);
+  a3 = vec4f_swizzle1(a3, 3,3,3,3);
+}
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+#ifdef EIGEN_VECTORIZE_SSE3
+  a0 = _mm_loaddup_pd(a+0);
+  a1 = _mm_loaddup_pd(a+1);
+  a2 = _mm_loaddup_pd(a+2);
+  a3 = _mm_loaddup_pd(a+3);
+#else
+  a1 = pload<Packet2d>(a);
+  a0 = vec2d_swizzle1(a1, 0,0);
+  a1 = vec2d_swizzle1(a1, 1,1);
+  a3 = pload<Packet2d>(a+2);
+  a2 = vec2d_swizzle1(a3, 0,0);
+  a3 = vec2d_swizzle1(a3, 1,1);
+#endif
+}
+#endif
+
+EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
+{
+  vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
+  vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
+  vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
+  vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
+}
+
+#ifdef EIGEN_VECTORIZE_SSE3
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+  return _mm_hadd_ps(_mm_hadd_ps(vecs[0], vecs[1]),_mm_hadd_ps(vecs[2], vecs[3]));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+  return _mm_hadd_pd(vecs[0], vecs[1]);
+}
+
+#else
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+  Packet4f tmp0, tmp1, tmp2;
+  tmp0 = _mm_unpacklo_ps(vecs[0], vecs[1]);
+  tmp1 = _mm_unpackhi_ps(vecs[0], vecs[1]);
+  tmp2 = _mm_unpackhi_ps(vecs[2], vecs[3]);
+  tmp0 = _mm_add_ps(tmp0, tmp1);
+  tmp1 = _mm_unpacklo_ps(vecs[2], vecs[3]);
+  tmp1 = _mm_add_ps(tmp1, tmp2);
+  tmp2 = _mm_movehl_ps(tmp1, tmp0);
+  tmp0 = _mm_movelh_ps(tmp0, tmp1);
+  return _mm_add_ps(tmp0, tmp2);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+  return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1]));
+}
+#endif  // SSE3
+
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+  // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
+  // (from Nehalem to Haswell)
+// #ifdef EIGEN_VECTORIZE_SSE3
+//   Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
+//   return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
+// #else
+  Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
+  return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+// #endif
+}
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+  // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
+  // (from Nehalem to Haswell)
+// #ifdef EIGEN_VECTORIZE_SSE3
+//   return pfirst<Packet2d>(_mm_hadd_pd(a, a));
+// #else
+  return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a,a)));
+// #endif
+}
+
+#ifdef EIGEN_VECTORIZE_SSSE3
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3]));
+}
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+  Packet4i tmp0 = _mm_hadd_epi32(a,a);
+  return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
+}
+#else
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+  Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a));
+  return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  Packet4i tmp0, tmp1, tmp2;
+  tmp0 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+  tmp1 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+  tmp2 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+  tmp0 = _mm_add_epi32(tmp0, tmp1);
+  tmp1 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+  tmp1 = _mm_add_epi32(tmp1, tmp2);
+  tmp2 = _mm_unpacklo_epi64(tmp0, tmp1);
+  tmp0 = _mm_unpackhi_epi64(tmp0, tmp1);
+  return _mm_add_epi32(tmp0, tmp2);
+}
+#endif
+// Other reduction functions:
+
+// mul
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a,a));
+  return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+  return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., reusing pmul is very slow !)
+  // TODO try to call _mm_mul_epu32 directly
+  EIGEN_ALIGN16 int aux[4];
+  pstore(aux, a);
+  return  (aux[0] * aux[1]) * (aux[2] * aux[3]);;
+}
+
+// min
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a,a));
+  return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+  return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst<Packet4i>(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., it does not like using std::min after the pstore !!)
+  EIGEN_ALIGN16 int aux[4];
+  pstore(aux, a);
+  int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+  return aux0<aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
+}
+
+// max
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+  Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a,a));
+  return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
+}
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+  return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a,a)));
+}
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst<Packet4i>(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
+  // after some experiments, it is seems this is the fastest way to implement it
+  // for GCC (eg., it does not like using std::min after the pstore !!)
+  EIGEN_ALIGN16 int aux[4];
+  pstore(aux, a);
+  int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+  return aux0>aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
+}
+
+#if EIGEN_COMP_GNUC
+// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f&  a, const Packet4f&  b, const Packet4f&  c)
+// {
+//   Packet4f res = b;
+//   asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
+//   return res;
+// }
+// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i&  a, const Packet4i&  b, const int i)
+// {
+//   Packet4i res = a;
+//   asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
+//   return res;
+// }
+#endif
+
+#ifdef EIGEN_VECTORIZE_SSSE3
+// SSSE3 versions
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  {
+    if (Offset!=0)
+      first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  {
+    if (Offset!=0)
+      first = _mm_alignr_epi8(second,first, Offset*4);
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  {
+    if (Offset==1)
+      first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
+  }
+};
+#else
+// SSE2 versions
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  {
+    if (Offset==1)
+    {
+      first = _mm_move_ss(first,second);
+      first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
+    }
+    else if (Offset==2)
+    {
+      first = _mm_movehl_ps(first,first);
+      first = _mm_movelh_ps(first,second);
+    }
+    else if (Offset==3)
+    {
+      first = _mm_move_ss(first,second);
+      first = _mm_shuffle_ps(first,second,0x93);
+    }
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  {
+    if (Offset==1)
+    {
+      first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+      first = _mm_shuffle_epi32(first,0x39);
+    }
+    else if (Offset==2)
+    {
+      first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
+      first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+    }
+    else if (Offset==3)
+    {
+      first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
+      first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
+    }
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  {
+    if (Offset==1)
+    {
+      first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
+      first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
+    }
+  }
+};
+#endif
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+  _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+  __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
+  kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
+  kernel.packet[1] = tmp;
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+  __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
+  __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
+  __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
+  __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
+
+  kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
+  kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
+  kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
+  kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m128i false_mask = _mm_cmpeq_epi32(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
+#else
+  return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+  const __m128 zero = _mm_setzero_ps();
+  const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
+  __m128 false_mask = _mm_cmpeq_ps(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
+#else
+  return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
+#endif
+}
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+  const __m128d zero = _mm_setzero_pd();
+  const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
+  __m128d false_mask = _mm_cmpeq_pd(select, zero);
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
+#else
+  return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pinsertfirst(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_ps(a,pset1<Packet4f>(b),1);
+#else
+  return _mm_move_ss(a, _mm_load_ss(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertfirst(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_pd(a,pset1<Packet2d>(b),1);
+#else
+  return _mm_move_sd(a, _mm_load_sd(&b));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pinsertlast(const Packet4f& a, float b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
+#else
+  const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
+  return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, pset1<Packet4f>(b)));
+#endif
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pinsertlast(const Packet2d& a, double b)
+{
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
+#else
+  const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
+  return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
+#endif
+}
+
+// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
+#ifdef __FMA__
+template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
+  return ::fmaf(a,b,c);
+}
+template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, const double& c) {
+  return ::fma(a,b,c);
+}
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#if EIGEN_COMP_PGI
+// PGI++ does not define the following intrinsics in C++ mode.
+static inline __m128  _mm_castpd_ps   (__m128d x) { return reinterpret_cast<__m128&>(x);  }
+static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128d _mm_castps_pd   (__m128  x) { return reinterpret_cast<__m128d&>(x); }
+static inline __m128i _mm_castps_si128(__m128  x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128  _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x);  }
+static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
+#endif
+
+#endif // EIGEN_PACKET_MATH_SSE_H
diff --git a/third-party/Eigen/src/Core/arch/SSE/TypeCasting.h b/third-party/Eigen/src/Core/arch/SSE/TypeCasting.h
new file mode 100644
index 00000000..c6ca8c71
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -0,0 +1,77 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TYPE_CASTING_SSE_H
+#define EIGEN_TYPE_CASTING_SSE_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_VECTORIZE_AVX
+template <>
+struct type_casting_traits<float, int> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+struct type_casting_traits<int, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+struct type_casting_traits<double, float> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 2,
+    TgtCoeffRatio = 1
+  };
+};
+
+template <>
+struct type_casting_traits<float, double> {
+  enum {
+    VectorizedCast = 1,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 2
+  };
+};
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+  return _mm_cvttps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+  return _mm_cvtepi32_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
+  // Simply discard the second half of the input
+  return _mm_cvtps_pd(a);
+}
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_SSE_H
diff --git a/third-party/Eigen/src/Core/arch/ZVector/Complex.h b/third-party/Eigen/src/Core/arch/ZVector/Complex.h
new file mode 100644
index 00000000..1bfb7339
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/ZVector/Complex.h
@@ -0,0 +1,397 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX32_ALTIVEC_H
+#define EIGEN_COMPLEX32_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+static Packet2ul  p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+static Packet2ul  p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO,  (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
+
+struct Packet1cd
+{
+  EIGEN_STRONG_INLINE Packet1cd() {}
+  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
+  Packet2d v;
+};
+
+struct Packet2cf
+{
+  EIGEN_STRONG_INLINE Packet2cf() {}
+  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
+  union {
+    Packet4f v;
+    Packet1cd cd[2];
+  };
+};
+
+template<> struct packet_traits<std::complex<float> >  : default_packet_traits
+{
+  typedef Packet2cf type;
+  typedef Packet2cf half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 2,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasBlend  = 1,
+    HasSetLinear = 0
+  };
+};
+
+
+template<> struct packet_traits<std::complex<double> >  : default_packet_traits
+{
+  typedef Packet1cd type;
+  typedef Packet1cd half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 1,
+    HasHalfPacket = 0,
+
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasDiv    = 1,
+    HasNegate = 1,
+    HasAbs    = 0,
+    HasAbs2   = 0,
+    HasMin    = 0,
+    HasMax    = 0,
+    HasSetLinear = 0
+  };
+};
+
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float>  type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+
+/* Forward declaration */
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
+
+template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from)  { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)  { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *     to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *     to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *   to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>&  from)
+{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>&  from)
+{
+  Packet2cf res;
+  res.cd[0] = Packet1cd(vec_ld2f((const float *)&from));
+  res.cd[1] = res.cd[0];
+  return res;
+}
+template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
+{
+  std::complex<float> EIGEN_ALIGN16 af[2];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+  return pload<Packet2cf>(af);
+}
+template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride EIGEN_UNUSED)
+{
+  return pload<Packet1cd>(from);
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
+{
+  std::complex<float> EIGEN_ALIGN16 af[2];
+  pstore<std::complex<float> >((std::complex<float> *) af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+}
+template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride EIGEN_UNUSED)
+{
+  pstore<std::complex<double> >(to, from);
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v + b.v); }
+template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(a.v - b.v); }
+template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(Packet4f(a.v))); }
+template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
+{
+  Packet2cf res;
+  res.v.v4f[0] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0]))).v;
+  res.v.v4f[1] = pconj(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1]))).v;
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  Packet2d a_re, a_im, v1, v2;
+
+  // Permute and multiply the real parts of a and b
+  a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
+  // Get the imaginary parts of a
+  a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
+  // multiply a_re * b
+  v1 = vec_madd(a_re, b.v, p2d_ZERO);
+  // multiply a_im * b and get the conjugate result
+  v2 = vec_madd(a_im, b.v, p2d_ZERO);
+  v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
+  v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
+
+  return Packet1cd(v1 + v2);
+}
+template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  Packet2cf res;
+  res.v.v4f[0] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[0])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[0]))).v;
+  res.v.v4f[1] = pmul(Packet1cd(reinterpret_cast<Packet2d>(a.v.v4f[1])), Packet1cd(reinterpret_cast<Packet2d>(b.v.v4f[1]))).v;
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd pand   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pand   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pand<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd por    <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf por    <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(por<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pxor   <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf pxor   <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pxor<Packet4f>(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
+template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(pandnot<Packet4f>(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>*     from) {  return pset1<Packet1cd>(*from); }
+template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>*      from) {  return pset1<Packet2cf>(*from); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *     addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *   addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE std::complex<double>  pfirst<Packet1cd>(const Packet1cd& a)
+{
+  std::complex<double> EIGEN_ALIGN16 res;
+  pstore<std::complex<double> >(&res, a);
+
+  return res;
+}
+template<> EIGEN_STRONG_INLINE std::complex<float>  pfirst<Packet2cf>(const Packet2cf& a)
+{
+  std::complex<float> EIGEN_ALIGN16 res[2];
+  pstore<std::complex<float> >(res, a);
+
+  return res[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
+{
+  Packet2cf res;
+  res.cd[0] = a.cd[1];
+  res.cd[1] = a.cd[0];
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
+{
+  return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
+{
+  std::complex<float> res;
+  Packet1cd b = padd<Packet1cd>(a.cd[0], a.cd[1]);
+  vec_st2f(b.v, (float*)&res);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
+{
+  return vecs[0];
+}
+template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+  PacketBlock<Packet2cf,2> transpose;
+  transpose.packet[0] = vecs[0];
+  transpose.packet[1] = vecs[1];
+  ptranspose(transpose);
+
+  return padd<Packet2cf>(transpose.packet[0], transpose.packet[1]);
+} 
+
+template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
+{
+  return pfirst(a);
+}
+template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
+{
+  std::complex<float> res;
+  Packet1cd b = pmul<Packet1cd>(a.cd[0], a.cd[1]);
+  vec_st2f(b.v, (float*)&res);
+  return res;
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet1cd>
+{
+  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
+  {
+    // FIXME is it sure we never have to align a Packet1cd?
+    // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
+  }
+};
+
+template<int Offset>
+struct palign_impl<Offset,Packet2cf>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
+  {
+    if (Offset == 1) {
+      first.cd[0] = first.cd[1];
+      first.cd[1] = second.cd[0];
+    }
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
+{
+  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(a, pconj(b));
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return internal::pmul(pconj(a), b);
+  }
+};
+
+template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
+{
+  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
+  { return padd(pmul(x,y),c); }
+
+  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
+  {
+    return pconj(internal::pmul(a, b));
+  }
+};
+
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
+template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
+{
+  // TODO optimize it for AltiVec
+  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
+  Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
+  return Packet1cd(pdiv(res.v, s + vec_perm(s, s, p16uc_REVERSE64)));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+  // TODO optimize it for AltiVec
+  Packet2cf res;
+  res.cd[0] = pdiv<Packet1cd>(a.cd[0], b.cd[0]);
+  res.cd[1] = pdiv<Packet1cd>(a.cd[1], b.cd[1]);
+  return res;
+}
+
+EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
+{
+  return Packet1cd(preverse(Packet2d(x.v)));
+}
+
+EIGEN_STRONG_INLINE Packet2cf pcplxflip/*<Packet2cf>*/(const Packet2cf& x)
+{
+  Packet2cf res;
+  res.cd[0] = pcplxflip(x.cd[0]);
+  res.cd[1] = pcplxflip(x.cd[1]);
+  return res;
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
+{
+  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
+  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
+  kernel.packet[0].v = tmp;
+}
+
+EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
+{
+  Packet1cd tmp = kernel.packet[0].cd[1];
+  kernel.packet[0].cd[1] = kernel.packet[1].cd[0];
+  kernel.packet[1].cd[0] = tmp;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
+  Packet2cf result;
+  const Selector<4> ifPacket4 = { ifPacket.select[0], ifPacket.select[0], ifPacket.select[1], ifPacket.select[1] };
+  result.v = pblend<Packet4f>(ifPacket4, thenPacket.v, elsePacket.v);
+  return result;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX32_ALTIVEC_H
diff --git a/third-party/Eigen/src/Core/arch/ZVector/MathFunctions.h b/third-party/Eigen/src/Core/arch/ZVector/MathFunctions.h
new file mode 100644
index 00000000..5c7aa725
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/ZVector/MathFunctions.h
@@ -0,0 +1,137 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007 Julien Pommier
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/* The sin, cos, exp, and log functions of this file come from
+ * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
+ */
+
+#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
+
+namespace Eigen {
+
+namespace internal {
+
+static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
+static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
+static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
+
+static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
+static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
+
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
+static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d pexp<Packet2d>(const Packet2d& _x)
+{
+  Packet2d x = _x;
+
+  Packet2d tmp, fx;
+  Packet2l emm0;
+
+  // clamp x
+  x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
+
+  fx = vec_floor(fx);
+
+  tmp = pmul(fx, p2d_cephes_exp_C1);
+  Packet2d z = pmul(fx, p2d_cephes_exp_C2);
+  x = psub(x, tmp);
+  x = psub(x, z);
+
+  Packet2d x2 = pmul(x,x);
+
+  Packet2d px = p2d_cephes_exp_p0;
+  px = pmadd(px, x2, p2d_cephes_exp_p1);
+  px = pmadd(px, x2, p2d_cephes_exp_p2);
+  px = pmul (px, x);
+
+  Packet2d qx = p2d_cephes_exp_q0;
+  qx = pmadd(qx, x2, p2d_cephes_exp_q1);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q2);
+  qx = pmadd(qx, x2, p2d_cephes_exp_q3);
+
+  x = pdiv(px,psub(qx,px));
+  x = pmadd(p2d_2,x,p2d_1);
+
+  // build 2^n
+  emm0 = vec_ctsl(fx, 0);
+
+  static const Packet2l p2l_1023 = { 1023, 1023 };
+  static const Packet2ul p2ul_52 = { 52, 52 };
+
+  emm0 = emm0 + p2l_1023;
+  emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
+
+  // Altivec's max & min operators just drop silent NaNs. Check NaNs in 
+  // inputs and return them unmodified.
+  Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
+  return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
+                 isnumber_mask);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f pexp<Packet4f>(const Packet4f& x)
+{
+  Packet4f res;
+  res.v4f[0] = pexp<Packet2d>(x.v4f[0]);
+  res.v4f[1] = pexp<Packet2d>(x.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d psqrt<Packet2d>(const Packet2d& x)
+{
+  return  __builtin_s390_vfsqdb(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f psqrt<Packet4f>(const Packet4f& x)
+{
+  Packet4f res;
+  res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
+  res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet2d prsqrt<Packet2d>(const Packet2d& x) {
+  // Unfortunately we can't use the much faster mm_rqsrt_pd since it only provides an approximation.
+  return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+Packet4f prsqrt<Packet4f>(const Packet4f& x) {
+  Packet4f res;
+  res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
+  res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
+  return res;
+}
+
+}  // end namespace internal
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H
diff --git a/third-party/Eigen/src/Core/arch/ZVector/PacketMath.h b/third-party/Eigen/src/Core/arch/ZVector/PacketMath.h
new file mode 100644
index 00000000..b085efb8
--- /dev/null
+++ b/third-party/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -0,0 +1,943 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PACKET_MATH_ZVECTOR_H
+#define EIGEN_PACKET_MATH_ZVECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif
+
+#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+#endif
+
+#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
+#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS  16
+#endif
+
+typedef __vector int                 Packet4i;
+typedef __vector unsigned int        Packet4ui;
+typedef __vector __bool int          Packet4bi;
+typedef __vector short int           Packet8i;
+typedef __vector unsigned char       Packet16uc;
+typedef __vector double              Packet2d;
+typedef __vector unsigned long long  Packet2ul;
+typedef __vector long long           Packet2l;
+
+typedef struct {
+	Packet2d  v4f[2];
+} Packet4f;
+
+typedef union {
+  numext::int32_t   i[4];
+  numext::uint32_t ui[4];
+  numext::int64_t   l[2];
+  numext::uint64_t ul[2];
+  double    d[2];
+  Packet4i  v4i;
+  Packet4ui v4ui;
+  Packet2l  v2l;
+  Packet2ul v2ul;
+  Packet2d  v2d;
+} Packet;
+
+// We don't want to write the same code all the time, but we need to reuse the constants
+// and it doesn't really work to declare them global, so we define macros instead
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
+  Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
+  Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
+
+#define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
+  Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
+
+#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
+  Packet4i p4i_##NAME = pset1<Packet4i>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
+  Packet2d p2d_##NAME = pset1<Packet2d>(X)
+
+#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
+  Packet2l p2l_##NAME = pset1<Packet2l>(X)
+
+// These constants are endian-agnostic
+//static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
+static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
+
+static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
+static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
+
+static Packet2d p2d_ONE = { 1.0, 1.0 }; 
+static Packet2d p2d_ZERO_ = { -0.0, -0.0 };
+
+static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
+static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
+static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
+
+static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
+static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
+
+// Mask alignment
+#define _EIGEN_MASK_ALIGNMENT	0xfffffffffffffff0
+
+#define _EIGEN_ALIGNED_PTR(x)	((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
+
+// Handle endianness properly while loading constants
+// Define global static constants:
+
+static Packet16uc p16uc_FORWARD =   { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
+static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
+static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+
+static Packet16uc p16uc_PSET32_WODD   = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
+static Packet16uc p16uc_PSET32_WEVEN  = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
+/*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8);      //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
+
+static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
+static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN);     //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
+/*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16);                                         //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16);                                         //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
+static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
+static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
+
+//static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8);                                         //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
+
+//static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);                                            //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
+
+
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
+  #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#else
+  #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( "   pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#endif
+
+template<> struct packet_traits<int>    : default_packet_traits
+{
+  typedef Packet4i type;
+  typedef Packet4i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 4,
+    HasHalfPacket = 0,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 1,
+    HasBlend = 1
+  };
+};
+
+template<> struct packet_traits<float> : default_packet_traits
+{
+  typedef Packet4f type;
+  typedef Packet4f half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=4,
+    HasHalfPacket = 0,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 1,
+    HasMin  = 1,
+    HasMax  = 1,
+    HasAbs  = 1,
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasNegate = 1,
+    HasBlend = 1
+  };
+};
+
+template<> struct packet_traits<double> : default_packet_traits
+{
+  typedef Packet2d type;
+  typedef Packet2d half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size=2,
+    HasHalfPacket = 1,
+
+    HasAdd  = 1,
+    HasSub  = 1,
+    HasMul  = 1,
+    HasDiv  = 1,
+    HasMin  = 1,
+    HasMax  = 1,
+    HasAbs  = 1,
+    HasSin  = 0,
+    HasCos  = 0,
+    HasLog  = 0,
+    HasExp  = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasRound = 1,
+    HasFloor = 1,
+    HasCeil = 1,
+    HasNegate = 1,
+    HasBlend = 1
+  };
+};
+
+template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+
+/* Forward declaration */
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
+ 
+inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
+{
+  Packet vt;
+  vt.v4i = v;
+  s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
+{
+  Packet vt;
+  vt.v4ui = v;
+  s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
+{
+  Packet vt;
+  vt.v2l = v;
+  s << vt.l[0] << ", " << vt.l[1];
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
+{
+  Packet vt;
+  vt.v2ul = v;
+  s << vt.ul[0] << ", " << vt.ul[1] ;
+  return s;
+}
+
+inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
+{
+  Packet vt;
+  vt.v2d = v;
+  s << vt.d[0] << ", " << vt.d[1];
+  return s;
+}
+
+/* Helper function to simulate a vec_splat_packet4f
+ */
+template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f&   from)
+{
+  Packet4f splat;
+  switch (element) {
+  case 0:
+    splat.v4f[0] = vec_splat(from.v4f[0], 0);
+    splat.v4f[1] = splat.v4f[0];
+    break;
+  case 1:
+    splat.v4f[0] = vec_splat(from.v4f[0], 1);
+    splat.v4f[1] = splat.v4f[0];
+    break;
+  case 2:
+    splat.v4f[0] = vec_splat(from.v4f[1], 0);
+    splat.v4f[1] = splat.v4f[0];
+    break;
+  case 3:
+    splat.v4f[0] = vec_splat(from.v4f[1], 1);
+    splat.v4f[1] = splat.v4f[0];
+    break;
+  }
+  return splat;
+}
+
+template<int Offset>
+struct palign_impl<Offset,Packet4i>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
+  {
+    switch (Offset % 4) {
+    case 1:
+      first = vec_sld(first, second, 4); break;
+    case 2:
+      first = vec_sld(first, second, 8); break;
+    case 3:
+      first = vec_sld(first, second, 12); break;
+    }
+  }
+};
+
+/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
+ */
+template<int Offset>
+struct palign_impl<Offset,Packet4f>
+{
+  static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
+  {
+    switch (Offset % 4) {
+    case 1:
+      first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
+      first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
+      break;
+    case 2:
+      first.v4f[0] = first.v4f[1];
+      first.v4f[1] = second.v4f[0];
+      break;
+    case 3:
+      first.v4f[0] = vec_sld(first.v4f[1],  second.v4f[0], 8);
+      first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
+      break;
+    }
+  }
+};
+
+
+template<int Offset>
+struct palign_impl<Offset,Packet2d>
+{
+  static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
+  {
+    if (Offset == 1)
+      first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
+  }
+};
+
+template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_LOAD
+  Packet *vfrom;
+  vfrom = (Packet *) from;
+  return vfrom->v4i;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float*   from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_LOAD
+  Packet4f vfrom;
+  vfrom.v4f[0] = vec_ld2f(&from[0]);
+  vfrom.v4f[1] = vec_ld2f(&from[2]);
+  return vfrom;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_LOAD
+  Packet *vfrom;
+  vfrom = (Packet *) from;
+  return vfrom->v2d;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<int>(int*       to, const Packet4i& from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_STORE
+  Packet *vto;
+  vto = (Packet *) to;
+  vto->v4i = from;
+}
+
+template<> EIGEN_STRONG_INLINE void pstore<float>(float*   to, const Packet4f& from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_STORE
+  vec_st2f(from.v4f[0], &to[0]);
+  vec_st2f(from.v4f[1], &to[2]);
+}
+
+
+template<> EIGEN_STRONG_INLINE void pstore<double>(double*   to, const Packet2d& from)
+{
+  // FIXME: No intrinsic yet
+  EIGEN_DEBUG_ALIGNED_STORE
+  Packet *vto;
+  vto = (Packet *) to;
+  vto->v2d = from;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)
+{
+  return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
+  return vec_splats(from);
+}
+template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&    from)
+{
+  Packet4f to;
+  to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
+  to.v4f[1] = to.v4f[0];
+  return to;
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4i>(const int *a,
+                      Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
+{
+  a3 = pload<Packet4i>(a);
+  a0 = vec_splat(a3, 0);
+  a1 = vec_splat(a3, 1);
+  a2 = vec_splat(a3, 2);
+  a3 = vec_splat(a3, 3);
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet4f>(const float *a,
+                      Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
+{
+  a3 = pload<Packet4f>(a);
+  a0 = vec_splat_packet4f<0>(a3);
+  a1 = vec_splat_packet4f<1>(a3);
+  a2 = vec_splat_packet4f<2>(a3);
+  a3 = vec_splat_packet4f<3>(a3);
+}
+
+template<> EIGEN_STRONG_INLINE void
+pbroadcast4<Packet2d>(const double *a,
+                      Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
+{
+  a1 = pload<Packet2d>(a);
+  a0 = vec_splat(a1, 0);
+  a1 = vec_splat(a1, 1);
+  a3 = pload<Packet2d>(a+2);
+  a2 = vec_splat(a3, 0);
+  a3 = vec_splat(a3, 1);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
+{
+  int EIGEN_ALIGN16 ai[4];
+  ai[0] = from[0*stride];
+  ai[1] = from[1*stride];
+  ai[2] = from[2*stride];
+  ai[3] = from[3*stride];
+ return pload<Packet4i>(ai);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
+{
+  float EIGEN_ALIGN16 ai[4];
+  ai[0] = from[0*stride];
+  ai[1] = from[1*stride];
+  ai[2] = from[2*stride];
+  ai[3] = from[3*stride];
+ return pload<Packet4f>(ai);
+}
+
+template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
+{
+  double EIGEN_ALIGN16 af[2];
+  af[0] = from[0*stride];
+  af[1] = from[1*stride];
+ return pload<Packet2d>(af);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
+{
+  int EIGEN_ALIGN16 ai[4];
+  pstore<int>((int *)ai, from);
+  to[0*stride] = ai[0];
+  to[1*stride] = ai[1];
+  to[2*stride] = ai[2];
+  to[3*stride] = ai[3];
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
+{
+  float EIGEN_ALIGN16 ai[4];
+  pstore<float>((float *)ai, from);
+  to[0*stride] = ai[0];
+  to[1*stride] = ai[1];
+  to[2*stride] = ai[2];
+  to[3*stride] = ai[3];
+}
+
+template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
+{
+  double EIGEN_ALIGN16 af[2];
+  pstore<double>(af, from);
+  to[0*stride] = af[0];
+  to[1*stride] = af[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
+template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f c;
+  c.v4f[0] = a.v4f[0] + b.v4f[0];
+  c.v4f[1] = a.v4f[1] + b.v4f[1];
+  return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
+template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f c;
+  c.v4f[0] = a.v4f[0] - b.v4f[0];
+  c.v4f[1] = a.v4f[1] - b.v4f[1];
+  return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f c;
+  c.v4f[0] = a.v4f[0] * b.v4f[0];
+  c.v4f[1] = a.v4f[1] * b.v4f[1];
+  return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
+template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f c;
+  c.v4f[0] = a.v4f[0] / b.v4f[0];
+  c.v4f[1] = a.v4f[1] / b.v4f[1];
+  return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
+template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
+{
+  Packet4f c;
+  c.v4f[0] = -a.v4f[0];
+  c.v4f[1] = -a.v4f[1];
+  return c;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
+template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
+template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
+{
+  Packet4f res;
+  res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
+  res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
+  return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
+
+template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)    { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)  { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
+template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
+template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+  Packet4f res;
+  res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
+  res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
+{
+  Packet4f res;
+  res.v4f[0] = vec_round(a.v4f[0]);
+  res.v4f[1] = vec_round(a.v4f[1]);
+  return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const  Packet4f& a)
+{
+  Packet4f res;
+  res.v4f[0] = vec_ceil(a.v4f[0]);
+  res.v4f[1] = vec_ceil(a.v4f[1]);
+  return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const  Packet2d& a) { return vec_ceil(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
+{
+  Packet4f res;
+  res.v4f[0] = vec_floor(a.v4f[0]);
+  res.v4f[1] = vec_floor(a.v4f[1]);
+  return res;
+}
+template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
+
+template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int*       from) { return pload<Packet4i>(from); }
+template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*     from) { return pload<Packet4f>(from); }
+template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double*    from) { return pload<Packet2d>(from); }
+
+
+template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int*     from)
+{
+  Packet4i p = pload<Packet4i>(from);
+  return vec_perm(p, p, p16uc_DUPLICATE32_HI);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float*    from)
+{
+  Packet4f p = pload<Packet4f>(from);
+  p.v4f[1] = vec_splat(p.v4f[0], 1);
+  p.v4f[0] = vec_splat(p.v4f[0], 0);
+  return p;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double*   from)
+{
+  Packet2d p = pload<Packet2d>(from);
+  return vec_perm(p, p, p16uc_PSET64_HI);
+}
+
+template<> EIGEN_STRONG_INLINE void pstoreu<int>(int*        to, const Packet4i& from) { pstore<int>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<float>(float*    to, const Packet4f& from) { pstore<float>(to, from); }
+template<> EIGEN_STRONG_INLINE void pstoreu<double>(double*  to, const Packet2d& from) { pstore<double>(to, from); }
+
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int*       addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float*   addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
+
+template<> EIGEN_STRONG_INLINE int    pfirst<Packet4i>(const Packet4i& a) { int    EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
+template<> EIGEN_STRONG_INLINE float  pfirst<Packet4f>(const Packet4f& a) { float  EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
+template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
+
+template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
+{
+  return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
+{
+  return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
+{
+  Packet4f rev;
+  rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
+  rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
+  return rev;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
+template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
+{
+  Packet4f res;
+  res.v4f[0] = pabs(a.v4f[0]);
+  res.v4f[1] = pabs(a.v4f[1]);
+  return res;
+}
+
+template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
+{
+  Packet4i b, sum;
+  b   = vec_sld(a, a, 8);
+  sum = padd<Packet4i>(a, b);
+  b   = vec_sld(sum, sum, 4);
+  sum = padd<Packet4i>(sum, b);
+  return pfirst(sum);
+}
+
+template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
+{
+  Packet2d b, sum;
+  b   = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
+  sum = padd<Packet2d>(a, b);
+  return pfirst(sum);
+}
+template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
+{
+  Packet2d sum;
+  sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
+  double first = predux<Packet2d>(sum);
+  return static_cast<float>(first);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
+{
+  Packet4i v[4], sum[4];
+
+  // It's easier and faster to transpose then add as columns
+  // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation
+  // Do the transpose, first set of moves
+  v[0] = vec_mergeh(vecs[0], vecs[2]);
+  v[1] = vec_mergel(vecs[0], vecs[2]);
+  v[2] = vec_mergeh(vecs[1], vecs[3]);
+  v[3] = vec_mergel(vecs[1], vecs[3]);
+  // Get the resulting vectors
+  sum[0] = vec_mergeh(v[0], v[2]);
+  sum[1] = vec_mergel(v[0], v[2]);
+  sum[2] = vec_mergeh(v[1], v[3]);
+  sum[3] = vec_mergel(v[1], v[3]);
+
+  // Now do the summation:
+  // Lines 0+1
+  sum[0] = padd<Packet4i>(sum[0], sum[1]);
+  // Lines 2+3
+  sum[1] = padd<Packet4i>(sum[2], sum[3]);
+  // Add the results
+  sum[0] = padd<Packet4i>(sum[0], sum[1]);
+
+  return sum[0];
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
+{
+  Packet2d v[2], sum;
+  v[0] = padd<Packet2d>(vecs[0], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[0]), reinterpret_cast<Packet4ui>(vecs[0]), 8)));
+  v[1] = padd<Packet2d>(vecs[1], reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(vecs[1]), reinterpret_cast<Packet4ui>(vecs[1]), 8)));
+ 
+  sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(v[0]), reinterpret_cast<Packet4ui>(v[1]), 8));
+
+  return sum;
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
+{
+  PacketBlock<Packet4f,4> transpose;
+  transpose.packet[0] = vecs[0];
+  transpose.packet[1] = vecs[1];
+  transpose.packet[2] = vecs[2];
+  transpose.packet[3] = vecs[3];
+  ptranspose(transpose);
+
+  Packet4f sum = padd(transpose.packet[0], transpose.packet[1]);
+  sum = padd(sum, transpose.packet[2]);
+  sum = padd(sum, transpose.packet[3]);
+  return sum;
+}
+
+// Other reduction functions:
+// mul
+template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
+{
+  EIGEN_ALIGN16 int aux[4];
+  pstore(aux, a);
+  return aux[0] * aux[1] * aux[2] * aux[3];
+}
+
+template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
+{
+  // Return predux_mul<Packet2d> of the subvectors product
+  return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
+}
+
+// min
+template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
+{
+  Packet4i b, res;
+  b   = pmin<Packet4i>(a, vec_sld(a, a, 8));
+  res = pmin<Packet4i>(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
+{
+  Packet2d b, res;
+  b   = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
+  res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+  return static_cast<float>(pfirst(res));
+}
+
+// max
+template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
+{
+  Packet4i b, res;
+  b = pmax<Packet4i>(a, vec_sld(a, a, 8));
+  res = pmax<Packet4i>(b, vec_sld(b, b, 4));
+  return pfirst(res);
+}
+
+// max
+template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
+{
+  return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
+}
+
+template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
+{
+  Packet2d b, res;
+  b   = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
+  res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
+  return static_cast<float>(pfirst(res));
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4i,4>& kernel) {
+  Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
+  Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
+  Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
+  Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
+  kernel.packet[0] = vec_mergeh(t0, t2);
+  kernel.packet[1] = vec_mergel(t0, t2);
+  kernel.packet[2] = vec_mergeh(t1, t3);
+  kernel.packet[3] = vec_mergel(t1, t3);
+}
+
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet2d,2>& kernel) {
+  Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
+  Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
+  kernel.packet[0] = t0;
+  kernel.packet[1] = t1;
+}
+
+/* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
+ */
+EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet4f,4>& kernel) {
+  PacketBlock<Packet2d,2> t0,t1,t2,t3;
+  // copy top-left 2x2 Packet2d block
+  t0.packet[0] = kernel.packet[0].v4f[0];
+  t0.packet[1] = kernel.packet[1].v4f[0];
+
+  // copy top-right 2x2 Packet2d block
+  t1.packet[0] = kernel.packet[0].v4f[1];
+  t1.packet[1] = kernel.packet[1].v4f[1];
+
+  // copy bottom-left 2x2 Packet2d block
+  t2.packet[0] = kernel.packet[2].v4f[0];
+  t2.packet[1] = kernel.packet[3].v4f[0];
+
+  // copy bottom-right 2x2 Packet2d block
+  t3.packet[0] = kernel.packet[2].v4f[1];
+  t3.packet[1] = kernel.packet[3].v4f[1];
+
+  // Transpose all 2x2 blocks
+  ptranspose(t0);
+  ptranspose(t1);
+  ptranspose(t2);
+  ptranspose(t3);
+
+  // Copy back transposed blocks, but exchange t1 and t2 due to transposition
+  kernel.packet[0].v4f[0] = t0.packet[0];
+  kernel.packet[0].v4f[1] = t2.packet[0];
+  kernel.packet[1].v4f[0] = t0.packet[1];
+  kernel.packet[1].v4f[1] = t2.packet[1];
+  kernel.packet[2].v4f[0] = t1.packet[0];
+  kernel.packet[2].v4f[1] = t3.packet[0];
+  kernel.packet[3].v4f[0] = t1.packet[1];
+  kernel.packet[3].v4f[1] = t3.packet[1];
+}
+
+template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
+  Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
+  Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
+  return vec_sel(elsePacket, thenPacket, mask);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
+  Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
+  Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
+  Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
+  Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
+  Packet4f result;
+  result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
+  result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
+  return result;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
+  Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
+  Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
+  return vec_sel(elsePacket, thenPacket, mask);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_ZVECTOR_H
diff --git a/third-party/Eigen/src/Core/functors/AssignmentFunctors.h b/third-party/Eigen/src/Core/functors/AssignmentFunctors.h
new file mode 100644
index 00000000..4153b877
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -0,0 +1,168 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
+#define EIGEN_ASSIGNMENT_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+  
+/** \internal
+  * \brief Template functor for scalar/packet assignment
+  *
+  */
+template<typename DstScalar,typename SrcScalar> struct assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+  
+  template<int Alignment, typename Packet>
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
+};
+
+// Empty overload for void type (used by PermutationMatrix)
+template<typename DstScalar> struct assign_op<DstScalar,void> {};
+
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<assign_op<DstScalar,SrcScalar> > {
+  enum {
+    Cost = NumTraits<DstScalar>::ReadCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
+  };
+};
+
+/** \internal
+  * \brief Template functor for scalar/packet assignment with addition
+  *
+  */
+template<typename DstScalar,typename SrcScalar> struct add_assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
+  
+  template<int Alignment, typename Packet>
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
+  enum {
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
+  };
+};
+
+/** \internal
+  * \brief Template functor for scalar/packet assignment with subtraction
+  *
+  */
+template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
+  
+  template<int Alignment, typename Packet>
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
+  enum {
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
+  };
+};
+
+/** \internal
+  * \brief Template functor for scalar/packet assignment with multiplication
+  *
+  */
+template<typename DstScalar, typename SrcScalar=DstScalar>
+struct mul_assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
+  
+  template<int Alignment, typename Packet>
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
+  enum {
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
+  };
+};
+
+/** \internal
+  * \brief Template functor for scalar/packet assignment with diviving
+  *
+  */
+template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
+  
+  template<int Alignment, typename Packet>
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
+};
+template<typename DstScalar, typename SrcScalar>
+struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
+  enum {
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
+  };
+};
+
+/** \internal
+  * \brief Template functor for scalar/packet assignment with swapping
+  *
+  * It works as follow. For a non-vectorized evaluation loop, we have:
+  *   for(i) func(A.coeffRef(i), B.coeff(i));
+  * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
+  * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
+  * B.coeff already returns a const reference to the underlying scalar value.
+  * 
+  * The case of a vectorized loop is more tricky:
+  *   for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
+  * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
+  * the actual alignment and Packet type.
+  *
+  */
+template<typename Scalar> struct swap_assign_op {
+
+  EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
+  {
+#ifdef __CUDACC__
+    // FIXME is there some kind of cuda::swap?
+    Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
+#else
+    using std::swap;
+    swap(a,const_cast<Scalar&>(b));
+#endif
+  }
+};
+template<typename Scalar>
+struct functor_traits<swap_assign_op<Scalar> > {
+  enum {
+    Cost = 3 * NumTraits<Scalar>::ReadCost,
+    PacketAccess = packet_traits<Scalar>::Vectorizable
+  };
+};
+
+} // namespace internal
+
+} // namespace Eigen
+
+#endif // EIGEN_ASSIGNMENT_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/functors/BinaryFunctors.h b/third-party/Eigen/src/Core/functors/BinaryFunctors.h
new file mode 100644
index 00000000..3eae6b8c
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/BinaryFunctors.h
@@ -0,0 +1,475 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BINARY_FUNCTORS_H
+#define EIGEN_BINARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- associative binary functors ----------
+
+template<typename Arg1, typename Arg2>
+struct binary_op_base
+{
+  typedef Arg1 first_argument_type;
+  typedef Arg2 second_argument_type;
+};
+
+/** \internal
+  * \brief Template functor to compute the sum of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
+#else
+  scalar_sum_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::padd(a,b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  { return internal::predux(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
+    // TODO vectorize mixed sum
+  };
+};
+
+/** \internal
+  * \brief Template specialization to deprecate the summation of boolean expressions.
+  * This is required to solve Bug 426.
+  * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
+  */
+template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
+  EIGEN_DEPRECATED
+  scalar_sum_op() {}
+};
+
+
+/** \internal
+  * \brief Template functor to compute the product of two scalars
+  *
+  * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_product_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+  scalar_product_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmul(a,b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  { return internal::predux_mul(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+    // TODO vectorize mixed product
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the conjugate product of two scalars
+  *
+  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_conj_product_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+
+  enum {
+    Conj = NumTraits<LhsScalar>::IsComplex
+  };
+  
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
+  
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+  { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
+  
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = NumTraits<LhsScalar>::MulCost,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the min of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmin(a,b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  { return internal::predux_min(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the max of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_max_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pmax(a,b); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
+  { return internal::predux_max(a); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
+  };
+};
+
+/** \internal
+  * \brief Template functors for comparison of two scalars
+  * \todo Implement packet-comparisons
+  */
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;
+
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = false
+  };
+};
+
+template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
+struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
+  typedef bool type;
+};
+
+
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
+};
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef bool result_type;
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
+};
+
+
+/** \internal
+  * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
+  *
+  * \sa MatrixBase::stableNorm(), class Redux
+  */
+template<typename Scalar>
+struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
+{
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
+  {
+    // This functor is used by hypotNorm only for which it is faster to first apply abs
+    // on all coefficients prior to reduction through hypot.
+    // This way we avoid calling abs on positive and real entries, and this also permits
+    // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
+    // through the same functor...
+    return internal::positive_real_hypot(x,y);
+  }
+};
+template<typename Scalar>
+struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
+  enum
+  {
+    Cost = 3 * NumTraits<Scalar>::AddCost +
+           2 * NumTraits<Scalar>::MulCost +
+           2 * scalar_div_cost<Scalar,false>::value,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the pow of two scalars
+  */
+template<typename Scalar, typename Exponent>
+struct scalar_pow_op  : binary_op_base<Scalar,Exponent>
+{
+  typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
+#else
+  scalar_pow_op() {
+    typedef Scalar LhsScalar;
+    typedef Exponent RhsScalar;
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC
+  inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
+};
+template<typename Scalar, typename Exponent>
+struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
+  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+};
+
+
+
+//---------- non associative binary functors ----------
+
+/** \internal
+  * \brief Template functor to compute the difference of two scalars
+  *
+  * \sa class CwiseBinaryOp, MatrixBase::operator-
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
+#else
+  scalar_difference_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::psub(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
+  enum {
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the quotient of two scalars
+  *
+  * \sa class CwiseBinaryOp, Cwise::operator/()
+  */
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_quotient_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+#else
+  scalar_quotient_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
+  { return internal::pdiv(a,b); }
+};
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
+  typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
+  enum {
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
+    Cost = scalar_div_cost<result_type,PacketAccess>::value
+  };
+};
+
+
+
+/** \internal
+  * \brief Template functor to compute the and of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator&&
+  */
+struct scalar_boolean_and_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+};
+template<> struct functor_traits<scalar_boolean_and_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the or of two booleans
+  *
+  * \sa class CwiseBinaryOp, ArrayBase::operator||
+  */
+struct scalar_boolean_or_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+};
+template<> struct functor_traits<scalar_boolean_or_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+ * \brief Template functor to compute the xor of two booleans
+ *
+ * \sa class CwiseBinaryOp, ArrayBase::operator^
+ */
+struct scalar_boolean_xor_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; }
+};
+template<> struct functor_traits<scalar_boolean_xor_op> {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+
+
+//---------- binary functors bound to a constant, thus appearing as a unary functor ----------
+
+// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value.
+// They are analogues to std::binder1st/binder2nd but with the following differences:
+//  - they are compatible with packetOp
+//  - they are portable across C++ versions (the std::binder* are deprecated in C++11)
+template<typename BinaryOp> struct bind1st_op : BinaryOp {
+
+  typedef typename BinaryOp::first_argument_type  first_argument_type;
+  typedef typename BinaryOp::second_argument_type second_argument_type;
+  typedef typename BinaryOp::result_type          result_type;
+
+  bind1st_op(const first_argument_type &val) : m_value(val) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }
+
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
+  { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
+
+  first_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
+
+
+template<typename BinaryOp> struct bind2nd_op : BinaryOp {
+
+  typedef typename BinaryOp::first_argument_type  first_argument_type;
+  typedef typename BinaryOp::second_argument_type second_argument_type;
+  typedef typename BinaryOp::result_type          result_type;
+
+  bind2nd_op(const second_argument_type &val) : m_value(val) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
+
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
+
+  second_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BINARY_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/functors/NullaryFunctors.h b/third-party/Eigen/src/Core/functors/NullaryFunctors.h
new file mode 100644
index 00000000..b03be026
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/NullaryFunctors.h
@@ -0,0 +1,188 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_NULLARY_FUNCTORS_H
+#define EIGEN_NULLARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Scalar>
+struct scalar_constant_op {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() () const { return m_other; }
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const { return internal::pset1<PacketType>(m_other); }
+  const Scalar m_other;
+};
+template<typename Scalar>
+struct functor_traits<scalar_constant_op<Scalar> >
+{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,
+         PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+
+template<typename Scalar> struct scalar_identity_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_identity_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
+
+template <typename Scalar, typename Packet, bool IsInteger> struct linspaced_op_impl;
+
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
+{
+  linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+    m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
+    m_flip(numext::abs(high)<numext::abs(low))
+  {}
+
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    if(m_flip)
+      return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
+    else
+      return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
+  }
+
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
+  {
+    // Principle:
+    // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+    if(m_flip)
+    {
+      Packet pi = plset<Packet>(Scalar(i-m_size1));
+      Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
+      if(i==0)
+        res = pinsertfirst(res, m_low);
+      return res;
+    }
+    else
+    {
+      Packet pi = plset<Packet>(Scalar(i));
+      Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
+      if(i==m_size1-unpacket_traits<Packet>::size+1)
+        res = pinsertlast(res, m_high);
+      return res;
+    }
+  }
+
+  const Scalar m_low;
+  const Scalar m_high;
+  const Index m_size1;
+  const Scalar m_step;
+  const bool m_flip;
+};
+
+template <typename Scalar, typename Packet>
+struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
+{
+  linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
+    m_low(low),
+    m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
+    m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
+    m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
+  {}
+
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  const Scalar operator() (IndexType i) const
+  {
+    if(m_use_divisor) return m_low + convert_index<Scalar>(i)/m_divisor;
+    else              return m_low + convert_index<Scalar>(i)*m_multiplier;
+  }
+
+  const Scalar m_low;
+  const Scalar m_multiplier;
+  const Scalar m_divisor;
+  const bool m_use_divisor;
+};
+
+// ----- Linspace functor ----------------------------------------------------------------
+
+// Forward declaration (we default to random access which does not really give
+// us a speed gain when using packet access but it allows to use the functor in
+// nested expressions).
+template <typename Scalar, typename PacketType> struct linspaced_op;
+template <typename Scalar, typename PacketType> struct functor_traits< linspaced_op<Scalar,PacketType> >
+{
+  enum
+  {
+    Cost = 1,
+    PacketAccess =   (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear && packet_traits<Scalar>::HasBlend,
+                  /*&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),*/ // <- vectorization for integer is currently disabled
+    IsRepeatable = true
+  };
+};
+template <typename Scalar, typename PacketType> struct linspaced_op
+{
+  linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
+    : impl((num_steps==1 ? high : low),high,num_steps)
+  {}
+
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
+
+  template<typename Packet,typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
+
+  // This proxy object handles the actual required temporaries and the different
+  // implementations (integer vs. floating point).
+  const linspaced_op_impl<Scalar,PacketType,NumTraits<Scalar>::IsInteger> impl;
+};
+
+// Linear access is automatically determined from the operator() prototypes available for the given functor.
+// If it exposes an operator()(i,j), then we assume the i and j coefficients are required independently
+// and linear access is not possible. In all other cases, linear access is enabled.
+// Users should not have to deal with this structure.
+template<typename Functor> struct functor_has_linear_access { enum { ret = !has_binary_operator<Functor>::value }; };
+
+// For unreliable compilers, let's specialize the has_*ary_operator
+// helpers so that at least built-in nullary functors work fine.
+#if !( (EIGEN_COMP_MSVC>1600) || (EIGEN_GNUC_AT_LEAST(4,8)) || (EIGEN_COMP_ICC>=1600))
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_constant_op<Scalar>,IndexType> { enum { value = 0}; };
+
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_identity_op<Scalar>,IndexType> { enum { value = 1}; };
+
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_nullary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_unary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 1}; };
+template<typename Scalar, typename PacketType,typename IndexType>
+struct has_binary_operator<linspaced_op<Scalar,PacketType>,IndexType> { enum { value = 0}; };
+
+template<typename Scalar,typename IndexType>
+struct has_nullary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 1}; };
+template<typename Scalar,typename IndexType>
+struct has_unary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+template<typename Scalar,typename IndexType>
+struct has_binary_operator<scalar_random_op<Scalar>,IndexType> { enum { value = 0}; };
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_NULLARY_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/functors/StlFunctors.h b/third-party/Eigen/src/Core/functors/StlFunctors.h
new file mode 100644
index 00000000..9c1d7585
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/StlFunctors.h
@@ -0,0 +1,136 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STL_FUNCTORS_H
+#define EIGEN_STL_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// default functor traits for STL functors:
+
+template<typename T>
+struct functor_traits<std::multiplies<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::divides<T> >
+{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::plus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::minus<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::negate<T> >
+{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_or<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_and<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::logical_not<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::greater_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::less_equal<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::not_equal_to<T> >
+{ enum { Cost = 1, PacketAccess = false }; };
+
+#if (__cplusplus < 201103L) && (EIGEN_COMP_MSVC <= 1900)
+// std::binder* are deprecated since c++11 and will be removed in c++17
+template<typename T>
+struct functor_traits<std::binder2nd<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+
+template<typename T>
+struct functor_traits<std::binder1st<T> >
+{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
+#endif
+
+#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
+// std::unary_negate is deprecated since c++17 and will be removed in c++20
+template<typename T>
+struct functor_traits<std::unary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+
+// std::binary_negate is deprecated since c++17 and will be removed in c++20
+template<typename T>
+struct functor_traits<std::binary_negate<T> >
+{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+#endif
+
+#ifdef EIGEN_STDEXT_SUPPORT
+
+template<typename T0,typename T1>
+struct functor_traits<std::project1st<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::project2nd<T0,T1> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select2nd<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::select1st<std::pair<T0,T1> > >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+template<typename T0,typename T1>
+struct functor_traits<std::unary_compose<T0,T1> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
+
+template<typename T0,typename T1,typename T2>
+struct functor_traits<std::binary_compose<T0,T1,T2> >
+{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
+
+#endif // EIGEN_STDEXT_SUPPORT
+
+// allow to add new functors and specializations of functor_traits from outside Eigen.
+// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
+#ifdef EIGEN_FUNCTORS_PLUGIN
+#include EIGEN_FUNCTORS_PLUGIN
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_STL_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/functors/TernaryFunctors.h b/third-party/Eigen/src/Core/functors/TernaryFunctors.h
new file mode 100644
index 00000000..b254e96c
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/TernaryFunctors.h
@@ -0,0 +1,25 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TERNARY_FUNCTORS_H
+#define EIGEN_TERNARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+//---------- associative ternary functors ----------
+
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TERNARY_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/functors/UnaryFunctors.h b/third-party/Eigen/src/Core/functors/UnaryFunctors.h
new file mode 100644
index 00000000..b56e7afd
--- /dev/null
+++ b/third-party/Eigen/src/Core/functors/UnaryFunctors.h
@@ -0,0 +1,792 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_UNARY_FUNCTORS_H
+#define EIGEN_UNARY_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+  * \brief Template functor to compute the opposite of a scalar
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::operator-
+  */
+template<typename Scalar> struct scalar_opposite_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pnegate(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_opposite_op<Scalar> >
+{ enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasNegate };
+};
+
+/** \internal
+  * \brief Template functor to compute the absolute value of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::abs
+  */
+template<typename Scalar> struct scalar_abs_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs(a); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pabs(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasAbs
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the score of a scalar, to chose a pivot
+  *
+  * \sa class CwiseUnaryOp
+  */
+template<typename Scalar> struct scalar_score_coeff_op : scalar_abs_op<Scalar>
+{
+  typedef void Score_is_abs;
+};
+template<typename Scalar>
+struct functor_traits<scalar_score_coeff_op<Scalar> > : functor_traits<scalar_abs_op<Scalar> > {};
+
+/* Avoid recomputing abs when we know the score and they are the same. Not a true Eigen functor.  */
+template<typename Scalar, typename=void> struct abs_knowing_score
+{
+  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  template<typename Score>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a, const Score&) const { return numext::abs(a); }
+};
+template<typename Scalar> struct abs_knowing_score<Scalar, typename scalar_score_coeff_op<Scalar>::Score_is_abs>
+{
+  EIGEN_EMPTY_STRUCT_CTOR(abs_knowing_score)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  template<typename Scal>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scal&, const result_type& a) const { return a; }
+};
+
+/** \internal
+  * \brief Template functor to compute the squared absolute value of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::abs2
+  */
+template<typename Scalar> struct scalar_abs2_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_abs2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
+
+/** \internal
+  * \brief Template functor to compute the conjugate of a complex value
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::conjugate()
+  */
+template<typename Scalar> struct scalar_conjugate_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_conjugate_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
+    PacketAccess = packet_traits<Scalar>::HasConj
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the phase angle of a complex
+  *
+  * \sa class CwiseUnaryOp, Cwise::arg
+  */
+template<typename Scalar> struct scalar_arg_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using numext::arg; return arg(a); }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return internal::parg(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_arg_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::IsComplex ? 5 * NumTraits<Scalar>::MulCost : NumTraits<Scalar>::AddCost,
+    PacketAccess = packet_traits<Scalar>::HasArg
+  };
+};
+/** \internal
+  * \brief Template functor to cast a scalar to another type
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::cast()
+  */
+template<typename Scalar, typename NewType>
+struct scalar_cast_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
+  typedef NewType result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+};
+template<typename Scalar, typename NewType>
+struct functor_traits<scalar_cast_op<Scalar,NewType> >
+{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the real part of a complex
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::real()
+  */
+template<typename Scalar>
+struct scalar_real_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the imaginary part of a complex
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::imag()
+  */
+template<typename Scalar>
+struct scalar_imag_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the real part of a complex as a reference
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::real()
+  */
+template<typename Scalar>
+struct scalar_real_ref_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_real_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  * \brief Template functor to extract the imaginary part of a complex as a reference
+  *
+  * \sa class CwiseUnaryOp, MatrixBase::imag()
+  */
+template<typename Scalar>
+struct scalar_imag_ref_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
+  typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_imag_ref_op<Scalar> >
+{ enum { Cost = 0, PacketAccess = false }; };
+
+/** \internal
+  *
+  * \brief Template functor to compute the exponential of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::exp()
+  */
+template<typename Scalar> struct scalar_exp_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::exp(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_exp_op<Scalar> > {
+  enum {
+    PacketAccess = packet_traits<Scalar>::HasExp,
+    // The following numbers are based on the AVX implementation.
+#ifdef EIGEN_VECTORIZE_FMA
+    // Haswell can issue 2 add/mul/madd per cycle.
+    Cost =
+    (sizeof(Scalar) == 4
+     // float: 8 pmadd, 4 pmul, 2 padd/psub, 6 other
+     ? (8 * NumTraits<Scalar>::AddCost + 6 * NumTraits<Scalar>::MulCost)
+     // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
+     : (14 * NumTraits<Scalar>::AddCost +
+        6 * NumTraits<Scalar>::MulCost +
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#else
+    Cost =
+    (sizeof(Scalar) == 4
+     // float: 7 pmadd, 6 pmul, 4 padd/psub, 10 other
+     ? (21 * NumTraits<Scalar>::AddCost + 13 * NumTraits<Scalar>::MulCost)
+     // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
+     : (23 * NumTraits<Scalar>::AddCost +
+        12 * NumTraits<Scalar>::MulCost +
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
+#endif
+  };
+};
+
+/** \internal
+  *
+  * \brief Template functor to compute the logarithm of a scalar
+  *
+  * \sa class CwiseUnaryOp, ArrayBase::log()
+  */
+template<typename Scalar> struct scalar_log_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log_op<Scalar> > {
+  enum {
+    PacketAccess = packet_traits<Scalar>::HasLog,
+    Cost =
+    (PacketAccess
+     // The following numbers are based on the AVX implementation.
+#ifdef EIGEN_VECTORIZE_FMA
+     // 8 pmadd, 6 pmul, 8 padd/psub, 16 other, can issue 2 add/mul/madd per cycle.
+     ? (20 * NumTraits<Scalar>::AddCost + 7 * NumTraits<Scalar>::MulCost)
+#else
+     // 8 pmadd, 6 pmul, 8 padd/psub, 20 other
+     ? (36 * NumTraits<Scalar>::AddCost + 14 * NumTraits<Scalar>::MulCost)
+#endif
+     // Measured cost of std::log.
+     : sizeof(Scalar)==4 ? 40 : 85)
+  };
+};
+
+/** \internal
+  *
+  * \brief Template functor to compute the logarithm of 1 plus a scalar value
+  *
+  * \sa class CwiseUnaryOp, ArrayBase::log1p()
+  */
+template<typename Scalar> struct scalar_log1p_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_log1p_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::log1p(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog1p(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_log1p_op<Scalar> > {
+  enum {
+    PacketAccess = packet_traits<Scalar>::HasLog1p,
+    Cost = functor_traits<scalar_log_op<Scalar> >::Cost // TODO measure cost of log1p
+  };
+};
+
+/** \internal
+  *
+  * \brief Template functor to compute the base-10 logarithm of a scalar
+  *
+  * \sa class CwiseUnaryOp, Cwise::log10()
+  */
+template<typename Scalar> struct scalar_log10_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_log10_op<Scalar> >
+{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog10 }; };
+
+/** \internal
+  * \brief Template functor to compute the square root of a scalar
+  * \sa class CwiseUnaryOp, Cwise::sqrt()
+  */
+template<typename Scalar> struct scalar_sqrt_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sqrt(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
+};
+template <typename Scalar>
+struct functor_traits<scalar_sqrt_op<Scalar> > {
+  enum {
+#if EIGEN_FAST_MATH
+    // The following numbers are based on the AVX implementation.
+    Cost = (sizeof(Scalar) == 8 ? 28
+                                // 4 pmul, 1 pmadd, 3 other
+                                : (3 * NumTraits<Scalar>::AddCost +
+                                   5 * NumTraits<Scalar>::MulCost)),
+#else
+    // The following numbers are based on min VSQRT throughput on Haswell.
+    Cost = (sizeof(Scalar) == 8 ? 28 : 14),
+#endif
+    PacketAccess = packet_traits<Scalar>::HasSqrt
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the reciprocal square root of a scalar
+  * \sa class CwiseUnaryOp, Cwise::rsqrt()
+  */
+template<typename Scalar> struct scalar_rsqrt_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return Scalar(1)/numext::sqrt(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::prsqrt(a); }
+};
+
+template<typename Scalar>
+struct functor_traits<scalar_rsqrt_op<Scalar> >
+{ enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasRsqrt
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the cosine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::cos()
+  */
+template<typename Scalar> struct scalar_cos_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return numext::cos(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cos_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasCos
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the sine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::sin()
+  */
+template<typename Scalar> struct scalar_sin_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sin(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sin_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasSin
+  };
+};
+
+
+/** \internal
+  * \brief Template functor to compute the tan of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::tan()
+  */
+template<typename Scalar> struct scalar_tan_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tan(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_tan_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasTan
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the arc cosine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::acos()
+  */
+template<typename Scalar> struct scalar_acos_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::acos(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_acos_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasACos
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the arc sine of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::asin()
+  */
+template<typename Scalar> struct scalar_asin_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::asin(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_asin_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasASin
+  };
+};
+
+
+/** \internal
+  * \brief Template functor to compute the atan of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::atan()
+  */
+template<typename Scalar> struct scalar_atan_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_atan_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::atan(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::patan(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_atan_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasATan
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the tanh of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::tanh()
+  */
+template <typename Scalar>
+struct scalar_tanh_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::tanh(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return ptanh(x); }
+};
+
+template <typename Scalar>
+struct functor_traits<scalar_tanh_op<Scalar> > {
+  enum {
+    PacketAccess = packet_traits<Scalar>::HasTanh,
+    Cost = ( (EIGEN_FAST_MATH && is_same<Scalar,float>::value)
+// The following numbers are based on the AVX implementation,
+#ifdef EIGEN_VECTORIZE_FMA
+                // Haswell can issue 2 add/mul/madd per cycle.
+                // 9 pmadd, 2 pmul, 1 div, 2 other
+                ? (2 * NumTraits<Scalar>::AddCost +
+                   6 * NumTraits<Scalar>::MulCost +
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#else
+                ? (11 * NumTraits<Scalar>::AddCost +
+                   11 * NumTraits<Scalar>::MulCost +
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
+#endif
+                // This number assumes a naive implementation of tanh
+                : (6 * NumTraits<Scalar>::AddCost +
+                   3 * NumTraits<Scalar>::MulCost +
+                   2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
+                   functor_traits<scalar_exp_op<Scalar> >::Cost))
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the sinh of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::sinh()
+  */
+template<typename Scalar> struct scalar_sinh_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sinh_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::sinh(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psinh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sinh_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasSinh
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the cosh of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::cosh()
+  */
+template<typename Scalar> struct scalar_cosh_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cosh_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::cosh(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pcosh(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cosh_op<Scalar> >
+{
+  enum {
+    Cost = 5 * NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasCosh
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the inverse of a scalar
+  * \sa class CwiseUnaryOp, Cwise::inverse()
+  */
+template<typename Scalar>
+struct scalar_inverse_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_inverse_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+
+/** \internal
+  * \brief Template functor to compute the square of a scalar
+  * \sa class CwiseUnaryOp, Cwise::square()
+  */
+template<typename Scalar>
+struct scalar_square_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_square_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+/** \internal
+  * \brief Template functor to compute the cube of a scalar
+  * \sa class CwiseUnaryOp, Cwise::cube()
+  */
+template<typename Scalar>
+struct scalar_cube_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
+  { return internal::pmul(a,pmul(a,a)); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_cube_op<Scalar> >
+{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+
+/** \internal
+  * \brief Template functor to compute the rounded value of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::round()
+  */
+template<typename Scalar> struct scalar_round_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_round_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::round(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pround(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_round_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasRound
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the floor of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::floor()
+  */
+template<typename Scalar> struct scalar_floor_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_floor_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::floor(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pfloor(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_floor_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasFloor
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the ceil of a scalar
+  * \sa class CwiseUnaryOp, ArrayBase::ceil()
+  */
+template<typename Scalar> struct scalar_ceil_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_ceil_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = packet_traits<Scalar>::HasCeil
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute whether a scalar is NaN
+  * \sa class CwiseUnaryOp, ArrayBase::isnan()
+  */
+template<typename Scalar> struct scalar_isnan_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
+  typedef bool result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isnan_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to check whether a scalar is +/-inf
+  * \sa class CwiseUnaryOp, ArrayBase::isinf()
+  */
+template<typename Scalar> struct scalar_isinf_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
+  typedef bool result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isinf_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to check whether a scalar has a finite value
+  * \sa class CwiseUnaryOp, ArrayBase::isfinite()
+  */
+template<typename Scalar> struct scalar_isfinite_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
+  typedef bool result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_isfinite_op<Scalar> >
+{
+  enum {
+    Cost = NumTraits<Scalar>::MulCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the logical not of a boolean
+  *
+  * \sa class CwiseUnaryOp, ArrayBase::operator!
+  */
+template<typename Scalar> struct scalar_boolean_not_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_not_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a) const { return !a; }
+};
+template<typename Scalar>
+struct functor_traits<scalar_boolean_not_op<Scalar> > {
+  enum {
+    Cost = NumTraits<bool>::AddCost,
+    PacketAccess = false
+  };
+};
+
+/** \internal
+  * \brief Template functor to compute the signum of a scalar
+  * \sa class CwiseUnaryOp, Cwise::sign()
+  */
+template<typename Scalar,bool iscpx=(NumTraits<Scalar>::IsComplex!=0) > struct scalar_sign_op;
+template<typename Scalar>
+struct scalar_sign_op<Scalar,false> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+  {
+      return Scalar( (a>Scalar(0)) - (a<Scalar(0)) );
+  }
+  //TODO
+  //template <typename Packet>
+  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
+};
+template<typename Scalar>
+struct scalar_sign_op<Scalar,true> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_sign_op)
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const
+  {
+    typedef typename NumTraits<Scalar>::Real real_type;
+    real_type aa = numext::abs(a);
+    if (aa==real_type(0))
+      return Scalar(0);
+    aa = real_type(1)/aa;
+    return Scalar(a.real()*aa, a.imag()*aa );
+  }
+  //TODO
+  //template <typename Packet>
+  //EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::psign(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_sign_op<Scalar> >
+{ enum {
+    Cost = 
+        NumTraits<Scalar>::IsComplex
+        ? ( 8*NumTraits<Scalar>::MulCost  ) // roughly
+        : ( 3*NumTraits<Scalar>::AddCost),
+    PacketAccess = packet_traits<Scalar>::HasSign
+  };
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_FUNCTORS_H
diff --git a/third-party/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/third-party/Eigen/src/Core/products/GeneralBlockPanelKernel.h
new file mode 100644
index 00000000..681451cc
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -0,0 +1,2157 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
+#define EIGEN_GENERAL_BLOCK_PANEL_H
+
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs=false, bool _ConjRhs=false>
+class gebp_traits;
+
+
+/** \internal \returns b if a<=0, and returns a otherwise. */
+inline std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
+{
+  return a<=0 ? b : a;
+}
+
+#if EIGEN_ARCH_i386_OR_x86_64
+const std::ptrdiff_t defaultL1CacheSize = 32*1024;
+const std::ptrdiff_t defaultL2CacheSize = 256*1024;
+const std::ptrdiff_t defaultL3CacheSize = 2*1024*1024;
+#else
+const std::ptrdiff_t defaultL1CacheSize = 16*1024;
+const std::ptrdiff_t defaultL2CacheSize = 512*1024;
+const std::ptrdiff_t defaultL3CacheSize = 512*1024;
+#endif
+
+/** \internal */
+struct CacheSizes {
+  CacheSizes(): m_l1(-1),m_l2(-1),m_l3(-1) {
+    int l1CacheSize, l2CacheSize, l3CacheSize;
+    queryCacheSizes(l1CacheSize, l2CacheSize, l3CacheSize);
+    m_l1 = manage_caching_sizes_helper(l1CacheSize, defaultL1CacheSize);
+    m_l2 = manage_caching_sizes_helper(l2CacheSize, defaultL2CacheSize);
+    m_l3 = manage_caching_sizes_helper(l3CacheSize, defaultL3CacheSize);
+  }
+
+  std::ptrdiff_t m_l1;
+  std::ptrdiff_t m_l2;
+  std::ptrdiff_t m_l3;
+};
+
+
+/** \internal */
+inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1, std::ptrdiff_t* l2, std::ptrdiff_t* l3)
+{
+  static CacheSizes m_cacheSizes;
+
+  if(action==SetAction)
+  {
+    // set the cpu cache size and cache all block sizes from a global cache size in byte
+    eigen_internal_assert(l1!=0 && l2!=0);
+    m_cacheSizes.m_l1 = *l1;
+    m_cacheSizes.m_l2 = *l2;
+    m_cacheSizes.m_l3 = *l3;
+  }
+  else if(action==GetAction)
+  {
+    eigen_internal_assert(l1!=0 && l2!=0);
+    *l1 = m_cacheSizes.m_l1;
+    *l2 = m_cacheSizes.m_l2;
+    *l3 = m_cacheSizes.m_l3;
+  }
+  else
+  {
+    eigen_internal_assert(false);
+  }
+}
+
+/* Helper for computeProductBlockingSizes.
+ *
+ * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+ * this function computes the blocking size parameters along the respective dimensions
+ * for matrix products and related algorithms. The blocking sizes depends on various
+ * parameters:
+ * - the L1 and L2 cache sizes,
+ * - the register level blocking sizes defined by gebp_traits,
+ * - the number of scalars that fit into a packet (when vectorization is enabled).
+ *
+ * \sa setCpuCacheSizes */
+
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+  typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+  // Explanations:
+  // Let's recall that the product algorithms form mc x kc vertical panels A' on the lhs and
+  // kc x nc blocks B' on the rhs. B' has to fit into L2/L3 cache. Moreover, A' is processed
+  // per mr x kc horizontal small panels where mr is the blocking size along the m dimension
+  // at the register level. This small horizontal panel has to stay within L1 cache.
+  std::ptrdiff_t l1, l2, l3;
+  manage_caching_sizes(GetAction, &l1, &l2, &l3);
+
+  if (num_threads > 1) {
+    typedef typename Traits::ResScalar ResScalar;
+    enum {
+      kdiv = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+      ksub = Traits::mr * Traits::nr * sizeof(ResScalar),
+      kr = 8,
+      mr = Traits::mr,
+      nr = Traits::nr
+    };
+    // Increasing k gives us more time to prefetch the content of the "C"
+    // registers. However once the latency is hidden there is no point in
+    // increasing the value of k, so we'll cap it at 320 (value determined
+    // experimentally).
+    // To avoid that k vanishes, we make k_cache at least as big as kr
+    const Index k_cache = numext::maxi<Index>(kr, (numext::mini<Index>)((l1-ksub)/kdiv, 320));
+    if (k_cache < k) {
+      k = k_cache - (k_cache % kr);
+      eigen_internal_assert(k > 0);
+    }
+
+    const Index n_cache = (l2-l1) / (nr * sizeof(RhsScalar) * k);
+    const Index n_per_thread = numext::div_ceil(n, num_threads);
+    if (n_cache <= n_per_thread) {
+      // Don't exceed the capacity of the l2 cache.
+      eigen_internal_assert(n_cache >= static_cast<Index>(nr));
+      n = n_cache - (n_cache % nr);
+      eigen_internal_assert(n > 0);
+    } else {
+      n = (numext::mini<Index>)(n, (n_per_thread + nr - 1) - ((n_per_thread + nr - 1) % nr));
+    }
+
+    if (l3 > l2) {
+      // l3 is shared between all cores, so we'll give each thread its own chunk of l3.
+      const Index m_cache = (l3-l2) / (sizeof(LhsScalar) * k * num_threads);
+      const Index m_per_thread = numext::div_ceil(m, num_threads);
+      if(m_cache < m_per_thread && m_cache >= static_cast<Index>(mr)) {
+        m = m_cache - (m_cache % mr);
+        eigen_internal_assert(m > 0);
+      } else {
+        m = (numext::mini<Index>)(m, (m_per_thread + mr - 1) - ((m_per_thread + mr - 1) % mr));
+      }
+    }
+  }
+  else {
+    // In unit tests we do not want to use extra large matrices,
+    // so we reduce the cache size to check the blocking strategy is not flawed
+#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+    l1 = 9*1024;
+    l2 = 32*1024;
+    l3 = 512*1024;
+#endif
+
+    // Early return for small problems because the computation below are time consuming for small problems.
+    // Perhaps it would make more sense to consider k*n*m??
+    // Note that for very tiny problem, this function should be bypassed anyway
+    // because we use the coefficient-based implementation for them.
+    if((numext::maxi)(k,(numext::maxi)(m,n))<48)
+      return;
+
+    typedef typename Traits::ResScalar ResScalar;
+    enum {
+      k_peeling = 8,
+      k_div = KcFactor * (Traits::mr * sizeof(LhsScalar) + Traits::nr * sizeof(RhsScalar)),
+      k_sub = Traits::mr * Traits::nr * sizeof(ResScalar)
+    };
+
+    // ---- 1st level of blocking on L1, yields kc ----
+
+    // Blocking on the third dimension (i.e., k) is chosen so that an horizontal panel
+    // of size mr x kc of the lhs plus a vertical panel of kc x nr of the rhs both fits within L1 cache.
+    // We also include a register-level block of the result (mx x nr).
+    // (In an ideal world only the lhs panel would stay in L1)
+    // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
+    const Index max_kc = numext::maxi<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
+    const Index old_k = k;
+    if(k>max_kc)
+    {
+      // We are really blocking on the third dimension:
+      // -> reduce blocking size to make sure the last block is as large as possible
+      //    while keeping the same number of sweeps over the result.
+      k = (k%max_kc)==0 ? max_kc
+                        : max_kc - k_peeling * ((max_kc-1-(k%max_kc))/(k_peeling*(k/max_kc+1)));
+
+      eigen_internal_assert(((old_k/k) == (old_k/max_kc)) && "the number of sweeps has to remain the same");
+    }
+
+    // ---- 2nd level of blocking on max(L2,L3), yields nc ----
+
+    // TODO find a reliable way to get the actual amount of cache per core to use for 2nd level blocking, that is:
+    //      actual_l2 = max(l2, l3/nb_core_sharing_l3)
+    // The number below is quite conservative: it is better to underestimate the cache size rather than overestimating it)
+    // For instance, it corresponds to 6MB of L3 shared among 4 cores.
+    #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
+    const Index actual_l2 = l3;
+    #else
+    const Index actual_l2 = 1572864; // == 1.5 MB
+    #endif
+
+    // Here, nc is chosen such that a block of kc x nc of the rhs fit within half of L2.
+    // The second half is implicitly reserved to access the result and lhs coefficients.
+    // When k<max_kc, then nc can arbitrarily growth. In practice, it seems to be fruitful
+    // to limit this growth: we bound nc to growth by a factor x1.5.
+    // However, if the entire lhs block fit within L1, then we are not going to block on the rows at all,
+    // and it becomes fruitful to keep the packed rhs blocks in L1 if there is enough remaining space.
+    Index max_nc;
+    const Index lhs_bytes = m * k * sizeof(LhsScalar);
+    const Index remaining_l1 = l1- k_sub - lhs_bytes;
+    if(remaining_l1 >= Index(Traits::nr*sizeof(RhsScalar))*k)
+    {
+      // L1 blocking
+      max_nc = remaining_l1 / (k*sizeof(RhsScalar));
+    }
+    else
+    {
+      // L2 blocking
+      max_nc = (3*actual_l2)/(2*2*max_kc*sizeof(RhsScalar));
+    }
+    // WARNING Below, we assume that Traits::nr is a power of two.
+    Index nc = numext::mini<Index>(actual_l2/(2*k*sizeof(RhsScalar)), max_nc) & (~(Traits::nr-1));
+    if(n>nc)
+    {
+      // We are really blocking over the columns:
+      // -> reduce blocking size to make sure the last block is as large as possible
+      //    while keeping the same number of sweeps over the packed lhs.
+      //    Here we allow one more sweep if this gives us a perfect match, thus the commented "-1"
+      n = (n%nc)==0 ? nc
+                    : (nc - Traits::nr * ((nc/*-1*/-(n%nc))/(Traits::nr*(n/nc+1))));
+    }
+    else if(old_k==k)
+    {
+      // So far, no blocking at all, i.e., kc==k, and nc==n.
+      // In this case, let's perform a blocking over the rows such that the packed lhs data is kept in cache L1/L2
+      // TODO: part of this blocking strategy is now implemented within the kernel itself, so the L1-based heuristic here should be obsolete.
+      Index problem_size = k*n*sizeof(LhsScalar);
+      Index actual_lm = actual_l2;
+      Index max_mc = m;
+      if(problem_size<=1024)
+      {
+        // problem is small enough to keep in L1
+        // Let's choose m such that lhs's block fit in 1/3 of L1
+        actual_lm = l1;
+      }
+      else if(l3!=0 && problem_size<=32768)
+      {
+        // we have both L2 and L3, and problem is small enough to be kept in L2
+        // Let's choose m such that lhs's block fit in 1/3 of L2
+        actual_lm = l2;
+        max_mc = (numext::mini<Index>)(576,max_mc);
+      }
+      Index mc = (numext::mini<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
+      if (mc > Traits::mr) mc -= mc % Traits::mr;
+      else if (mc==0) return;
+      m = (m%mc)==0 ? mc
+                    : (mc - Traits::mr * ((mc/*-1*/-(m%mc))/(Traits::mr*(m/mc+1))));
+    }
+  }
+}
+
+template <typename Index>
+inline bool useSpecificBlockingSizes(Index& k, Index& m, Index& n)
+{
+#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
+  if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
+    k = numext::mini<Index>(k, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K);
+    m = numext::mini<Index>(m, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M);
+    n = numext::mini<Index>(n, EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N);
+    return true;
+  }
+#else
+  EIGEN_UNUSED_VARIABLE(k)
+  EIGEN_UNUSED_VARIABLE(m)
+  EIGEN_UNUSED_VARIABLE(n)
+#endif
+  return false;
+}
+
+/** \brief Computes the blocking parameters for a m x k times k x n matrix product
+  *
+  * \param[in,out] k Input: the third dimension of the product. Output: the blocking size along the same dimension.
+  * \param[in,out] m Input: the number of rows of the left hand side. Output: the blocking size along the same dimension.
+  * \param[in,out] n Input: the number of columns of the right hand side. Output: the blocking size along the same dimension.
+  *
+  * Given a m x k times k x n matrix product of scalar types \c LhsScalar and \c RhsScalar,
+  * this function computes the blocking size parameters along the respective dimensions
+  * for matrix products and related algorithms.
+  *
+  * The blocking size parameters may be evaluated:
+  *   - either by a heuristic based on cache sizes;
+  *   - or using fixed prescribed values (for testing purposes).
+  *
+  * \sa setCpuCacheSizes */
+
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename Index>
+void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+  if (!useSpecificBlockingSizes(k, m, n)) {
+    evaluateProductBlockingSizesHeuristic<LhsScalar, RhsScalar, KcFactor, Index>(k, m, n, num_threads);
+  }
+}
+
+template<typename LhsScalar, typename RhsScalar, typename Index>
+inline void computeProductBlockingSizes(Index& k, Index& m, Index& n, Index num_threads = 1)
+{
+  computeProductBlockingSizes<LhsScalar,RhsScalar,1,Index>(k, m, n, num_threads);
+}
+
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD
+  #define CJMADD(CJ,A,B,C,T)  C = CJ.pmadd(A,B,C);
+#else
+
+  // FIXME (a bit overkill maybe ?)
+
+  template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
+    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
+    {
+      c = cj.pmadd(a,b,c);
+    }
+  };
+
+  template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
+    EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
+    {
+      t = b; t = cj.pmul(a,t); c = padd(c,t);
+    }
+  };
+
+  template<typename CJ, typename A, typename B, typename C, typename T>
+  EIGEN_STRONG_INLINE void gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
+  {
+    gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
+  }
+
+  #define CJMADD(CJ,A,B,C,T)  gebp_madd(CJ,A,B,C,T);
+//   #define CJMADD(CJ,A,B,C,T)  T = B; T = CJ.pmul(A,T); C = padd(C,T);
+#endif
+
+/* Vectorization logic
+ *  real*real: unpack rhs to constant packets, ...
+ * 
+ *  cd*cd : unpack rhs to (b_r,b_r), (b_i,b_i), mul to get (a_r b_r,a_i b_r) (a_r b_i,a_i b_i),
+ *          storing each res packet into two packets (2x2),
+ *          at the end combine them: swap the second and addsub them 
+ *  cf*cf : same but with 2x4 blocks
+ *  cplx*real : unpack rhs to constant packets, ...
+ *  real*cplx : load lhs as (a0,a0,a1,a1), and mul as usual
+ */
+template<typename _LhsScalar, typename _RhsScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits
+{
+public:
+  typedef _LhsScalar LhsScalar;
+  typedef _RhsScalar RhsScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+  enum {
+    ConjLhs = _ConjLhs,
+    ConjRhs = _ConjRhs,
+    Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+    LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+    ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+    
+    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+
+    // register block size along the N direction must be 1 or 4
+    nr = 4,
+
+    // register block size along the M direction (currently, this one cannot be modified)
+    default_mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+    // we assume 16 registers
+    // See bug 992, if the scalar type is not vectorizable but that EIGEN_HAS_SINGLE_INSTRUCTION_MADD is defined,
+    // then using 3*LhsPacketSize triggers non-implemented paths in syrk.
+    mr = Vectorizable ? 3*LhsPacketSize : default_mr,
+#else
+    mr = default_mr,
+#endif
+    
+    LhsProgress = LhsPacketSize,
+    RhsProgress = 1
+  };
+
+  typedef typename packet_traits<LhsScalar>::type  _LhsPacket;
+  typedef typename packet_traits<RhsScalar>::type  _RhsPacket;
+  typedef typename packet_traits<ResScalar>::type  _ResPacket;
+
+  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+  typedef ResPacket AccPacket;
+  
+  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+  {
+    p = pset1<ResPacket>(ResScalar(0));
+  }
+  
+  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+  {
+    pbroadcast4(b, b0, b1, b2, b3);
+  }
+  
+//   EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+//   {
+//     pbroadcast2(b, b0, b1);
+//   }
+  
+  template<typename RhsPacketType>
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const
+  {
+    dest = pset1<RhsPacketType>(*b);
+  }
+  
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = ploadquad<RhsPacket>(b);
+  }
+
+  template<typename LhsPacketType>
+  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacketType& dest) const
+  {
+    dest = pload<LhsPacketType>(a);
+  }
+
+  template<typename LhsPacketType>
+  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const
+  {
+    dest = ploadu<LhsPacketType>(a);
+  }
+
+  template<typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
+  EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, AccPacketType& tmp) const
+  {
+    conj_helper<LhsPacketType,RhsPacketType,ConjLhs,ConjRhs> cj;
+    // It would be a lot cleaner to call pmadd all the time. Unfortunately if we
+    // let gcc allocate the register in which to store the result of the pmul
+    // (in the case where there is no FMA) gcc fails to figure out how to avoid
+    // spilling register.
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+    EIGEN_UNUSED_VARIABLE(tmp);
+    c = cj.pmadd(a,b,c);
+#else
+    tmp = b; tmp = cj.pmul(a,tmp); c = padd(c,tmp);
+#endif
+  }
+
+  EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+  {
+    r = pmadd(c,alpha,r);
+  }
+  
+  template<typename ResPacketHalf>
+  EIGEN_STRONG_INLINE void acc(const ResPacketHalf& c, const ResPacketHalf& alpha, ResPacketHalf& r) const
+  {
+    r = pmadd(c,alpha,r);
+  }
+
+};
+
+template<typename RealScalar, bool _ConjLhs>
+class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
+{
+public:
+  typedef std::complex<RealScalar> LhsScalar;
+  typedef RealScalar RhsScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+  enum {
+    ConjLhs = _ConjLhs,
+    ConjRhs = false,
+    Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable,
+    LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+    ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+    
+    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+    nr = 4,
+#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
+    // we assume 16 registers
+    mr = 3*LhsPacketSize,
+#else
+    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*LhsPacketSize,
+#endif
+
+    LhsProgress = LhsPacketSize,
+    RhsProgress = 1
+  };
+
+  typedef typename packet_traits<LhsScalar>::type  _LhsPacket;
+  typedef typename packet_traits<RhsScalar>::type  _RhsPacket;
+  typedef typename packet_traits<ResScalar>::type  _ResPacket;
+
+  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+  typedef ResPacket AccPacket;
+
+  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+  {
+    p = pset1<ResPacket>(ResScalar(0));
+  }
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = pset1<RhsPacket>(*b);
+  }
+  
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = pset1<RhsPacket>(*b);
+  }
+
+  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = pload<LhsPacket>(a);
+  }
+
+  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = ploadu<LhsPacket>(a);
+  }
+
+  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+  {
+    pbroadcast4(b, b0, b1, b2, b3);
+  }
+  
+//   EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+//   {
+//     pbroadcast2(b, b0, b1);
+//   }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+  {
+    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+  }
+
+  EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+  {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+    EIGEN_UNUSED_VARIABLE(tmp);
+    c.v = pmadd(a.v,b,c.v);
+#else
+    tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
+#endif
+  }
+
+  EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
+  {
+    c += a * b;
+  }
+
+  EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+  {
+    r = cj.pmadd(c,alpha,r);
+  }
+
+protected:
+  conj_helper<ResPacket,ResPacket,ConjLhs,false> cj;
+};
+
+template<typename Packet>
+struct DoublePacket
+{
+  Packet first;
+  Packet second;
+};
+
+template<typename Packet>
+DoublePacket<Packet> padd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+{
+  DoublePacket<Packet> res;
+  res.first  = padd(a.first, b.first);
+  res.second = padd(a.second,b.second);
+  return res;
+}
+
+template<typename Packet>
+const DoublePacket<Packet>& predux_downto4(const DoublePacket<Packet> &a)
+{
+  return a;
+}
+
+template<typename Packet> struct unpacket_traits<DoublePacket<Packet> > { typedef DoublePacket<Packet> half; };
+// template<typename Packet>
+// DoublePacket<Packet> pmadd(const DoublePacket<Packet> &a, const DoublePacket<Packet> &b)
+// {
+//   DoublePacket<Packet> res;
+//   res.first  = padd(a.first, b.first);
+//   res.second = padd(a.second,b.second);
+//   return res;
+// }
+
+template<typename RealScalar, bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, _ConjLhs, _ConjRhs >
+{
+public:
+  typedef std::complex<RealScalar>  Scalar;
+  typedef std::complex<RealScalar>  LhsScalar;
+  typedef std::complex<RealScalar>  RhsScalar;
+  typedef std::complex<RealScalar>  ResScalar;
+  
+  enum {
+    ConjLhs = _ConjLhs,
+    ConjRhs = _ConjRhs,
+    Vectorizable = packet_traits<RealScalar>::Vectorizable
+                && packet_traits<Scalar>::Vectorizable,
+    RealPacketSize  = Vectorizable ? packet_traits<RealScalar>::size : 1,
+    ResPacketSize   = Vectorizable ? packet_traits<ResScalar>::size : 1,
+    LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+
+    // FIXME: should depend on NumberOfRegisters
+    nr = 4,
+    mr = ResPacketSize,
+
+    LhsProgress = ResPacketSize,
+    RhsProgress = 1
+  };
+  
+  typedef typename packet_traits<RealScalar>::type RealPacket;
+  typedef typename packet_traits<Scalar>::type     ScalarPacket;
+  typedef DoublePacket<RealPacket> DoublePacketType;
+
+  typedef typename conditional<Vectorizable,RealPacket,  Scalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,ScalarPacket,Scalar>::type ResPacket;
+  typedef typename conditional<Vectorizable,DoublePacketType,Scalar>::type AccPacket;
+  
+  EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
+
+  EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p)
+  {
+    p.first   = pset1<RealPacket>(RealScalar(0));
+    p.second  = pset1<RealPacket>(RealScalar(0));
+  }
+
+  // Scalar path
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ResPacket& dest) const
+  {
+    dest = pset1<ResPacket>(*b);
+  }
+
+  // Vectorized path
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacketType& dest) const
+  {
+    dest.first  = pset1<RealPacket>(numext::real(*b));
+    dest.second = pset1<RealPacket>(numext::imag(*b));
+  }
+  
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const
+  {
+    loadRhs(b,dest);
+  }
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacketType& dest) const
+  {
+    eigen_internal_assert(unpacket_traits<ScalarPacket>::size<=4);
+    loadRhs(b,dest);
+  }
+  
+  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+  {
+    // FIXME not sure that's the best way to implement it!
+    loadRhs(b+0, b0);
+    loadRhs(b+1, b1);
+    loadRhs(b+2, b2);
+    loadRhs(b+3, b3);
+  }
+  
+  // Vectorized path
+  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, DoublePacketType& b0, DoublePacketType& b1)
+  {
+    // FIXME not sure that's the best way to implement it!
+    loadRhs(b+0, b0);
+    loadRhs(b+1, b1);
+  }
+  
+  // Scalar path
+  EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsScalar& b0, RhsScalar& b1)
+  {
+    // FIXME not sure that's the best way to implement it!
+    loadRhs(b+0, b0);
+    loadRhs(b+1, b1);
+  }
+
+  // nothing special here
+  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+  }
+
+  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = ploadu<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
+  }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, DoublePacketType& c, RhsPacket& /*tmp*/) const
+  {
+    c.first   = padd(pmul(a,b.first), c.first);
+    c.second  = padd(pmul(a,b.second),c.second);
+  }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/) const
+  {
+    c = cj.pmadd(a,b,c);
+  }
+  
+  EIGEN_STRONG_INLINE void acc(const Scalar& c, const Scalar& alpha, Scalar& r) const { r += alpha * c; }
+  
+  EIGEN_STRONG_INLINE void acc(const DoublePacketType& c, const ResPacket& alpha, ResPacket& r) const
+  {
+    // assemble c
+    ResPacket tmp;
+    if((!ConjLhs)&&(!ConjRhs))
+    {
+      tmp = pcplxflip(pconj(ResPacket(c.second)));
+      tmp = padd(ResPacket(c.first),tmp);
+    }
+    else if((!ConjLhs)&&(ConjRhs))
+    {
+      tmp = pconj(pcplxflip(ResPacket(c.second)));
+      tmp = padd(ResPacket(c.first),tmp);
+    }
+    else if((ConjLhs)&&(!ConjRhs))
+    {
+      tmp = pcplxflip(ResPacket(c.second));
+      tmp = padd(pconj(ResPacket(c.first)),tmp);
+    }
+    else if((ConjLhs)&&(ConjRhs))
+    {
+      tmp = pcplxflip(ResPacket(c.second));
+      tmp = psub(pconj(ResPacket(c.first)),tmp);
+    }
+    
+    r = pmadd(tmp,alpha,r);
+  }
+
+protected:
+  conj_helper<LhsScalar,RhsScalar,ConjLhs,ConjRhs> cj;
+};
+
+template<typename RealScalar, bool _ConjRhs>
+class gebp_traits<RealScalar, std::complex<RealScalar>, false, _ConjRhs >
+{
+public:
+  typedef std::complex<RealScalar>  Scalar;
+  typedef RealScalar  LhsScalar;
+  typedef Scalar      RhsScalar;
+  typedef Scalar      ResScalar;
+
+  enum {
+    ConjLhs = false,
+    ConjRhs = _ConjRhs,
+    Vectorizable = packet_traits<RealScalar>::Vectorizable
+                && packet_traits<Scalar>::Vectorizable,
+    LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+    RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+    ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1,
+    
+    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
+    // FIXME: should depend on NumberOfRegisters
+    nr = 4,
+    mr = (EIGEN_PLAIN_ENUM_MIN(16,NumberOfRegisters)/2/nr)*ResPacketSize,
+
+    LhsProgress = ResPacketSize,
+    RhsProgress = 1
+  };
+
+  typedef typename packet_traits<LhsScalar>::type  _LhsPacket;
+  typedef typename packet_traits<RhsScalar>::type  _RhsPacket;
+  typedef typename packet_traits<ResScalar>::type  _ResPacket;
+
+  typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+  typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+  typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+  typedef ResPacket AccPacket;
+
+  EIGEN_STRONG_INLINE void initAcc(AccPacket& p)
+  {
+    p = pset1<ResPacket>(ResScalar(0));
+  }
+
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
+  {
+    dest = pset1<RhsPacket>(*b);
+  }
+  
+  void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1, RhsPacket& b2, RhsPacket& b3)
+  {
+    pbroadcast4(b, b0, b1, b2, b3);
+  }
+  
+//   EIGEN_STRONG_INLINE void broadcastRhs(const RhsScalar* b, RhsPacket& b0, RhsPacket& b1)
+//   {
+//     // FIXME not sure that's the best way to implement it!
+//     b0 = pload1<RhsPacket>(b+0);
+//     b1 = pload1<RhsPacket>(b+1);
+//   }
+
+  EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = ploaddup<LhsPacket>(a);
+  }
+  
+  EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const
+  {
+    eigen_internal_assert(unpacket_traits<RhsPacket>::size<=4);
+    loadRhs(b,dest);
+  }
+
+  EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacket& dest) const
+  {
+    dest = ploaddup<LhsPacket>(a);
+  }
+
+  EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp) const
+  {
+    madd_impl(a, b, c, tmp, typename conditional<Vectorizable,true_type,false_type>::type());
+  }
+
+  EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
+  {
+#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+    EIGEN_UNUSED_VARIABLE(tmp);
+    c.v = pmadd(a,b.v,c.v);
+#else
+    tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
+#endif
+    
+  }
+
+  EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
+  {
+    c += a * b;
+  }
+
+  EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
+  {
+    r = cj.pmadd(alpha,c,r);
+  }
+
+protected:
+  conj_helper<ResPacket,ResPacket,false,ConjRhs> cj;
+};
+
+/* optimized GEneral packed Block * packed Panel product kernel
+ *
+ * Mixing type logic: C += A * B
+ *  |  A  |  B  | comments
+ *  |real |cplx | no vectorization yet, would require to pack A with duplication
+ *  |cplx |real | easy vectorization
+ */
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel
+{
+  typedef gebp_traits<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> Traits;
+  typedef typename Traits::ResScalar ResScalar;
+  typedef typename Traits::LhsPacket LhsPacket;
+  typedef typename Traits::RhsPacket RhsPacket;
+  typedef typename Traits::ResPacket ResPacket;
+  typedef typename Traits::AccPacket AccPacket;
+
+  typedef gebp_traits<RhsScalar,LhsScalar,ConjugateRhs,ConjugateLhs> SwappedTraits;
+  typedef typename SwappedTraits::ResScalar SResScalar;
+  typedef typename SwappedTraits::LhsPacket SLhsPacket;
+  typedef typename SwappedTraits::RhsPacket SRhsPacket;
+  typedef typename SwappedTraits::ResPacket SResPacket;
+  typedef typename SwappedTraits::AccPacket SAccPacket;
+
+  typedef typename DataMapper::LinearMapper LinearMapper;
+
+  enum {
+    Vectorizable  = Traits::Vectorizable,
+    LhsProgress   = Traits::LhsProgress,
+    RhsProgress   = Traits::RhsProgress,
+    ResPacketSize = Traits::ResPacketSize
+  };
+
+  EIGEN_DONT_INLINE
+  void operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+                  Index rows, Index depth, Index cols, ResScalar alpha,
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>
+  ::operator()(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
+               Index rows, Index depth, Index cols, ResScalar alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB)
+  {
+    Traits traits;
+    SwappedTraits straits;
+    
+    if(strideA==-1) strideA = depth;
+    if(strideB==-1) strideB = depth;
+    conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+    Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+    const Index peeled_mc3 = mr>=3*Traits::LhsProgress ? (rows/(3*LhsProgress))*(3*LhsProgress) : 0;
+    const Index peeled_mc2 = mr>=2*Traits::LhsProgress ? peeled_mc3+((rows-peeled_mc3)/(2*LhsProgress))*(2*LhsProgress) : 0;
+    const Index peeled_mc1 = mr>=1*Traits::LhsProgress ? (rows/(1*LhsProgress))*(1*LhsProgress) : 0;
+    enum { pk = 8 }; // NOTE Such a large peeling factor is important for large matrices (~ +5% when >1000 on Haswell)
+    const Index peeled_kc  = depth & ~(pk-1);
+    const Index prefetch_res_offset = 32/sizeof(ResScalar);    
+//     const Index depth2     = depth & ~1;
+
+    //---------- Process 3 * LhsProgress rows at once ----------
+    // This corresponds to 3*LhsProgress x nr register blocks.
+    // Usually, make sense only with FMA
+    if(mr>=3*Traits::LhsProgress)
+    {
+      // Here, the general idea is to loop on each largest micro horizontal panel of the lhs (3*Traits::LhsProgress x depth)
+      // and on each largest micro vertical panel of the rhs (depth * nr).
+      // Blocking sizes, i.e., 'depth' has been computed so that the micro horizontal panel of the lhs fit in L1.
+      // However, if depth is too small, we can extend the number of rows of these horizontal panels.
+      // This actual number of rows is computed as follow:
+      const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
+      // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
+      // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
+      // or because we are testing specific blocking sizes.
+      const Index actual_panel_rows = (3*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 3*LhsProgress) ));
+      for(Index i1=0; i1<peeled_mc3; i1+=actual_panel_rows)
+      {
+        const Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc3);
+        for(Index j2=0; j2<packet_cols4; j2+=nr)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+          {
+          
+          // We selected a 3*Traits::LhsProgress x nr micro block of res which is entirely
+          // stored into 3 x nr registers.
+          
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0, C1, C2,  C3,
+                    C4, C5, C6,  C7,
+                    C8, C9, C10, C11;
+          traits.initAcc(C0);  traits.initAcc(C1);  traits.initAcc(C2);  traits.initAcc(C3);
+          traits.initAcc(C4);  traits.initAcc(C5);  traits.initAcc(C6);  traits.initAcc(C7);
+          traits.initAcc(C8);  traits.initAcc(C9);  traits.initAcc(C10); traits.initAcc(C11);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+          LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+          LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+          LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+          r0.prefetch(0);
+          r1.prefetch(0);
+          r2.prefetch(0);
+          r3.prefetch(0);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          prefetch(&blB[0]);
+          LhsPacket A0, A1;
+
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX4");
+            RhsPacket B_0, T0;
+            LhsPacket A2;
+
+#define EIGEN_GEBP_ONESTEP(K) \
+            do { \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
+              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+              internal::prefetch(blA+(3*K+16)*LhsProgress); \
+              if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
+              traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0);  \
+              traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1);  \
+              traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2);  \
+              traits.loadRhs(blB + (0+4*K)*Traits::RhsProgress, B_0); \
+              traits.madd(A0, B_0, C0, T0); \
+              traits.madd(A1, B_0, C4, T0); \
+              traits.madd(A2, B_0, C8, B_0); \
+              traits.loadRhs(blB + (1+4*K)*Traits::RhsProgress, B_0); \
+              traits.madd(A0, B_0, C1, T0); \
+              traits.madd(A1, B_0, C5, T0); \
+              traits.madd(A2, B_0, C9, B_0); \
+              traits.loadRhs(blB + (2+4*K)*Traits::RhsProgress, B_0); \
+              traits.madd(A0, B_0, C2,  T0); \
+              traits.madd(A1, B_0, C6,  T0); \
+              traits.madd(A2, B_0, C10, B_0); \
+              traits.loadRhs(blB + (3+4*K)*Traits::RhsProgress, B_0); \
+              traits.madd(A0, B_0, C3 , T0); \
+              traits.madd(A1, B_0, C7,  T0); \
+              traits.madd(A2, B_0, C11, B_0); \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
+            } while(false)
+
+            internal::prefetch(blB);
+            EIGEN_GEBP_ONESTEP(0);
+            EIGEN_GEBP_ONESTEP(1);
+            EIGEN_GEBP_ONESTEP(2);
+            EIGEN_GEBP_ONESTEP(3);
+            EIGEN_GEBP_ONESTEP(4);
+            EIGEN_GEBP_ONESTEP(5);
+            EIGEN_GEBP_ONESTEP(6);
+            EIGEN_GEBP_ONESTEP(7);
+
+            blB += pk*4*RhsProgress;
+            blA += pk*3*Traits::LhsProgress;
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4");
+          }
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0, T0;
+            LhsPacket A2;
+            EIGEN_GEBP_ONESTEP(0);
+            blB += 4*RhsProgress;
+            blA += 3*Traits::LhsProgress;
+          }
+
+#undef EIGEN_GEBP_ONESTEP
+
+          ResPacket R0, R1, R2;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C4, alphav, R1);
+          traits.acc(C8, alphav, R2);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+          r0.storePacket(1 * Traits::ResPacketSize, R1);
+          r0.storePacket(2 * Traits::ResPacketSize, R2);
+
+          R0 = r1.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r1.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r1.loadPacket(2 * Traits::ResPacketSize);
+          traits.acc(C1, alphav, R0);
+          traits.acc(C5, alphav, R1);
+          traits.acc(C9, alphav, R2);
+          r1.storePacket(0 * Traits::ResPacketSize, R0);
+          r1.storePacket(1 * Traits::ResPacketSize, R1);
+          r1.storePacket(2 * Traits::ResPacketSize, R2);
+
+          R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r2.loadPacket(2 * Traits::ResPacketSize);
+          traits.acc(C2, alphav, R0);
+          traits.acc(C6, alphav, R1);
+          traits.acc(C10, alphav, R2);
+          r2.storePacket(0 * Traits::ResPacketSize, R0);
+          r2.storePacket(1 * Traits::ResPacketSize, R1);
+          r2.storePacket(2 * Traits::ResPacketSize, R2);
+
+          R0 = r3.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r3.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r3.loadPacket(2 * Traits::ResPacketSize);
+          traits.acc(C3, alphav, R0);
+          traits.acc(C7, alphav, R1);
+          traits.acc(C11, alphav, R2);
+          r3.storePacket(0 * Traits::ResPacketSize, R0);
+          r3.storePacket(1 * Traits::ResPacketSize, R1);
+          r3.storePacket(2 * Traits::ResPacketSize, R2);          
+          }
+        }
+
+        // Deal with remaining columns of the rhs
+        for(Index j2=packet_cols4; j2<cols; j2++)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=3*LhsProgress)
+          {
+          // One column at a time
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(3*Traits::LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0, C4, C8;
+          traits.initAcc(C0);
+          traits.initAcc(C4);
+          traits.initAcc(C8);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2);
+          r0.prefetch(0);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+          LhsPacket A0, A1, A2;
+          
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 3pX1");
+            RhsPacket B_0;
+#define EIGEN_GEBGP_ONESTEP(K) \
+            do { \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
+              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+              traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0);  \
+              traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1);  \
+              traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2);  \
+              traits.loadRhs(&blB[(0+K)*RhsProgress], B_0);   \
+              traits.madd(A0, B_0, C0, B_0); \
+              traits.madd(A1, B_0, C4, B_0); \
+              traits.madd(A2, B_0, C8, B_0); \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
+            } while(false)
+        
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*RhsProgress;
+            blA += pk*3*Traits::LhsProgress;
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1");
+          }
+
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += RhsProgress;
+            blA += 3*Traits::LhsProgress;
+          }
+#undef EIGEN_GEBGP_ONESTEP
+          ResPacket R0, R1, R2;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r0.loadPacket(2 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C4, alphav, R1);
+          traits.acc(C8, alphav, R2);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+          r0.storePacket(1 * Traits::ResPacketSize, R1);
+          r0.storePacket(2 * Traits::ResPacketSize, R2);          
+          }
+        }
+      }
+    }
+
+    //---------- Process 2 * LhsProgress rows at once ----------
+    if(mr>=2*Traits::LhsProgress)
+    {
+      const Index l1 = defaultL1CacheSize; // in Bytes, TODO, l1 should be passed to this function.
+      // The max(1, ...) here is needed because we may be using blocking params larger than what our known l1 cache size
+      // suggests we should be using: either because our known l1 cache size is inaccurate (e.g. on Android, we can only guess),
+      // or because we are testing specific blocking sizes.
+      Index actual_panel_rows = (2*LhsProgress) * std::max<Index>(1,( (l1 - sizeof(ResScalar)*mr*nr - depth*nr*sizeof(RhsScalar)) / (depth * sizeof(LhsScalar) * 2*LhsProgress) ));
+
+      for(Index i1=peeled_mc3; i1<peeled_mc2; i1+=actual_panel_rows)
+      {
+        Index actual_panel_end = (std::min)(i1+actual_panel_rows, peeled_mc2);
+        for(Index j2=0; j2<packet_cols4; j2+=nr)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+          {
+          
+          // We selected a 2*Traits::LhsProgress x nr micro block of res which is entirely
+          // stored into 2 x nr registers.
+          
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0, C1, C2, C3,
+                    C4, C5, C6, C7;
+          traits.initAcc(C0); traits.initAcc(C1); traits.initAcc(C2); traits.initAcc(C3);
+          traits.initAcc(C4); traits.initAcc(C5); traits.initAcc(C6); traits.initAcc(C7);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+          LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+          LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+          LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+          r0.prefetch(prefetch_res_offset);
+          r1.prefetch(prefetch_res_offset);
+          r2.prefetch(prefetch_res_offset);
+          r3.prefetch(prefetch_res_offset);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          prefetch(&blB[0]);
+          LhsPacket A0, A1;
+
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
+            RhsPacket B_0, B1, B2, B3, T0;
+
+          // NOTE: the begin/end asm comments below work around bug 935!
+          // but they are not enough for gcc>=6 without FMA (bug 1637)
+          #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__  ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
+          #else
+            #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
+          #endif
+          #define EIGEN_GEBGP_ONESTEP(K) \
+            do {                                                                \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4");        \
+              traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0);                    \
+              traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1);                    \
+              traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3);  \
+              traits.madd(A0, B_0, C0, T0);                                     \
+              traits.madd(A1, B_0, C4, B_0);                                    \
+              traits.madd(A0, B1,  C1, T0);                                     \
+              traits.madd(A1, B1,  C5, B1);                                     \
+              traits.madd(A0, B2,  C2, T0);                                     \
+              traits.madd(A1, B2,  C6, B2);                                     \
+              traits.madd(A0, B3,  C3, T0);                                     \
+              traits.madd(A1, B3,  C7, B3);                                     \
+              EIGEN_GEBP_2PX4_SPILLING_WORKAROUND                               \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4");          \
+            } while(false)
+            
+            internal::prefetch(blB+(48+0));
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            internal::prefetch(blB+(48+16));
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*4*RhsProgress;
+            blA += pk*(2*Traits::LhsProgress);
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4");
+          }
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0, B1, B2, B3, T0;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += 4*RhsProgress;
+            blA += 2*Traits::LhsProgress;
+          }
+#undef EIGEN_GEBGP_ONESTEP
+
+          ResPacket R0, R1, R2, R3;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r1.loadPacket(0 * Traits::ResPacketSize);
+          R3 = r1.loadPacket(1 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C4, alphav, R1);
+          traits.acc(C1, alphav, R2);
+          traits.acc(C5, alphav, R3);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+          r0.storePacket(1 * Traits::ResPacketSize, R1);
+          r1.storePacket(0 * Traits::ResPacketSize, R2);
+          r1.storePacket(1 * Traits::ResPacketSize, R3);
+
+          R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r2.loadPacket(1 * Traits::ResPacketSize);
+          R2 = r3.loadPacket(0 * Traits::ResPacketSize);
+          R3 = r3.loadPacket(1 * Traits::ResPacketSize);
+          traits.acc(C2,  alphav, R0);
+          traits.acc(C6,  alphav, R1);
+          traits.acc(C3,  alphav, R2);
+          traits.acc(C7,  alphav, R3);
+          r2.storePacket(0 * Traits::ResPacketSize, R0);
+          r2.storePacket(1 * Traits::ResPacketSize, R1);
+          r3.storePacket(0 * Traits::ResPacketSize, R2);
+          r3.storePacket(1 * Traits::ResPacketSize, R3);
+          }
+        }
+      
+        // Deal with remaining columns of the rhs
+        for(Index j2=packet_cols4; j2<cols; j2++)
+        {
+          for(Index i=i1; i<actual_panel_end; i+=2*LhsProgress)
+          {
+          // One column at a time
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(2*Traits::LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0, C4;
+          traits.initAcc(C0);
+          traits.initAcc(C4);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2);
+          r0.prefetch(prefetch_res_offset);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+          LhsPacket A0, A1;
+
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX1");
+            RhsPacket B_0, B1;
+        
+#define EIGEN_GEBGP_ONESTEP(K) \
+            do {                                                                  \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1");          \
+              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+              traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0);                      \
+              traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1);                      \
+              traits.loadRhs(&blB[(0+K)*RhsProgress], B_0);                       \
+              traits.madd(A0, B_0, C0, B1);                                       \
+              traits.madd(A1, B_0, C4, B_0);                                      \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1");            \
+            } while(false)
+        
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*RhsProgress;
+            blA += pk*2*Traits::LhsProgress;
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1");
+          }
+
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0, B1;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += RhsProgress;
+            blA += 2*Traits::LhsProgress;
+          }
+#undef EIGEN_GEBGP_ONESTEP
+          ResPacket R0, R1;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r0.loadPacket(1 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C4, alphav, R1);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+          r0.storePacket(1 * Traits::ResPacketSize, R1);
+          }
+        }
+      }
+    }
+    //---------- Process 1 * LhsProgress rows at once ----------
+    if(mr>=1*Traits::LhsProgress)
+    {
+      // loops on each largest micro horizontal panel of lhs (1*LhsProgress x depth)
+      for(Index i=peeled_mc2; i<peeled_mc1; i+=1*LhsProgress)
+      {
+        // loops on each largest micro vertical panel of rhs (depth * nr)
+        for(Index j2=0; j2<packet_cols4; j2+=nr)
+        {
+          // We select a 1*Traits::LhsProgress x nr micro block of res which is entirely
+          // stored into 1 x nr registers.
+          
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0, C1, C2, C3;
+          traits.initAcc(C0);
+          traits.initAcc(C1);
+          traits.initAcc(C2);
+          traits.initAcc(C3);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2 + 0);
+          LinearMapper r1 = res.getLinearMapper(i, j2 + 1);
+          LinearMapper r2 = res.getLinearMapper(i, j2 + 2);
+          LinearMapper r3 = res.getLinearMapper(i, j2 + 3);
+
+          r0.prefetch(prefetch_res_offset);
+          r1.prefetch(prefetch_res_offset);
+          r2.prefetch(prefetch_res_offset);
+          r3.prefetch(prefetch_res_offset);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+          prefetch(&blB[0]);
+          LhsPacket A0;
+
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX4");
+            RhsPacket B_0, B1, B2, B3;
+               
+#define EIGEN_GEBGP_ONESTEP(K) \
+            do {                                                                \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX4");        \
+              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+              traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0);                    \
+              traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3);  \
+              traits.madd(A0, B_0, C0, B_0);                                    \
+              traits.madd(A0, B1,  C1, B1);                                     \
+              traits.madd(A0, B2,  C2, B2);                                     \
+              traits.madd(A0, B3,  C3, B3);                                     \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX4");          \
+            } while(false)
+            
+            internal::prefetch(blB+(48+0));
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            internal::prefetch(blB+(48+16));
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*4*RhsProgress;
+            blA += pk*1*LhsProgress;
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4");
+          }
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0, B1, B2, B3;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += 4*RhsProgress;
+            blA += 1*LhsProgress;
+          }
+#undef EIGEN_GEBGP_ONESTEP
+
+          ResPacket R0, R1;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r1.loadPacket(0 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          traits.acc(C1,  alphav, R1);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+          r1.storePacket(0 * Traits::ResPacketSize, R1);
+
+          R0 = r2.loadPacket(0 * Traits::ResPacketSize);
+          R1 = r3.loadPacket(0 * Traits::ResPacketSize);
+          traits.acc(C2,  alphav, R0);
+          traits.acc(C3,  alphav, R1);
+          r2.storePacket(0 * Traits::ResPacketSize, R0);
+          r3.storePacket(0 * Traits::ResPacketSize, R1);
+        }
+
+        // Deal with remaining columns of the rhs
+        for(Index j2=packet_cols4; j2<cols; j2++)
+        {
+          // One column at a time
+          const LhsScalar* blA = &blockA[i*strideA+offsetA*(1*Traits::LhsProgress)];
+          prefetch(&blA[0]);
+
+          // gets res block as register
+          AccPacket C0;
+          traits.initAcc(C0);
+
+          LinearMapper r0 = res.getLinearMapper(i, j2);
+
+          // performs "inner" products
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+          LhsPacket A0;
+
+          for(Index k=0; k<peeled_kc; k+=pk)
+          {
+            EIGEN_ASM_COMMENT("begin gebp micro kernel 1pX1");
+            RhsPacket B_0;
+        
+#define EIGEN_GEBGP_ONESTEP(K) \
+            do {                                                                \
+              EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1pX1");        \
+              EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
+              traits.loadLhs(&blA[(0+1*K)*LhsProgress], A0);                    \
+              traits.loadRhs(&blB[(0+K)*RhsProgress], B_0);                     \
+              traits.madd(A0, B_0, C0, B_0);                                    \
+              EIGEN_ASM_COMMENT("end step of gebp micro kernel 1pX1");          \
+            } while(false);
+
+            EIGEN_GEBGP_ONESTEP(0);
+            EIGEN_GEBGP_ONESTEP(1);
+            EIGEN_GEBGP_ONESTEP(2);
+            EIGEN_GEBGP_ONESTEP(3);
+            EIGEN_GEBGP_ONESTEP(4);
+            EIGEN_GEBGP_ONESTEP(5);
+            EIGEN_GEBGP_ONESTEP(6);
+            EIGEN_GEBGP_ONESTEP(7);
+
+            blB += pk*RhsProgress;
+            blA += pk*1*Traits::LhsProgress;
+
+            EIGEN_ASM_COMMENT("end gebp micro kernel 1pX1");
+          }
+
+          // process remaining peeled loop
+          for(Index k=peeled_kc; k<depth; k++)
+          {
+            RhsPacket B_0;
+            EIGEN_GEBGP_ONESTEP(0);
+            blB += RhsProgress;
+            blA += 1*Traits::LhsProgress;
+          }
+#undef EIGEN_GEBGP_ONESTEP
+          ResPacket R0;
+          ResPacket alphav = pset1<ResPacket>(alpha);
+          R0 = r0.loadPacket(0 * Traits::ResPacketSize);
+          traits.acc(C0, alphav, R0);
+          r0.storePacket(0 * Traits::ResPacketSize, R0);
+        }
+      }
+    }
+    //---------- Process remaining rows, 1 at once ----------
+    if(peeled_mc1<rows)
+    {
+      // loop on each panel of the rhs
+      for(Index j2=0; j2<packet_cols4; j2+=nr)
+      {
+        // loop on each row of the lhs (1*LhsProgress x depth)
+        for(Index i=peeled_mc1; i<rows; i+=1)
+        {
+          const LhsScalar* blA = &blockA[i*strideA+offsetA];
+          prefetch(&blA[0]);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
+
+          // The following piece of code wont work for 512 bit registers
+          // Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
+          // as nr (which is currently 4) for the return type.
+          const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
+          if ((SwappedTraits::LhsProgress % 4) == 0 &&
+              (SwappedTraits::LhsProgress <= 8) &&
+              (SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr))
+          {
+            SAccPacket C0, C1, C2, C3;
+            straits.initAcc(C0);
+            straits.initAcc(C1);
+            straits.initAcc(C2);
+            straits.initAcc(C3);
+
+            const Index spk   = (std::max)(1,SwappedTraits::LhsProgress/4);
+            const Index endk  = (depth/spk)*spk;
+            const Index endk4 = (depth/(spk*4))*(spk*4);
+
+            Index k=0;
+            for(; k<endk4; k+=4*spk)
+            {
+              SLhsPacket A0,A1;
+              SRhsPacket B_0,B_1;
+
+              straits.loadLhsUnaligned(blB+0*SwappedTraits::LhsProgress, A0);
+              straits.loadLhsUnaligned(blB+1*SwappedTraits::LhsProgress, A1);
+
+              straits.loadRhsQuad(blA+0*spk, B_0);
+              straits.loadRhsQuad(blA+1*spk, B_1);
+              straits.madd(A0,B_0,C0,B_0);
+              straits.madd(A1,B_1,C1,B_1);
+
+              straits.loadLhsUnaligned(blB+2*SwappedTraits::LhsProgress, A0);
+              straits.loadLhsUnaligned(blB+3*SwappedTraits::LhsProgress, A1);
+              straits.loadRhsQuad(blA+2*spk, B_0);
+              straits.loadRhsQuad(blA+3*spk, B_1);
+              straits.madd(A0,B_0,C2,B_0);
+              straits.madd(A1,B_1,C3,B_1);
+
+              blB += 4*SwappedTraits::LhsProgress;
+              blA += 4*spk;
+            }
+            C0 = padd(padd(C0,C1),padd(C2,C3));
+            for(; k<endk; k+=spk)
+            {
+              SLhsPacket A0;
+              SRhsPacket B_0;
+
+              straits.loadLhsUnaligned(blB, A0);
+              straits.loadRhsQuad(blA, B_0);
+              straits.madd(A0,B_0,C0,B_0);
+
+              blB += SwappedTraits::LhsProgress;
+              blA += spk;
+            }
+            if(SwappedTraits::LhsProgress==8)
+            {
+              // Special case where we have to first reduce the accumulation register C0
+              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SResPacket>::half,SResPacket>::type SResPacketHalf;
+              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SLhsPacket>::type SLhsPacketHalf;
+              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SLhsPacket>::half,SRhsPacket>::type SRhsPacketHalf;
+              typedef typename conditional<SwappedTraits::LhsProgress>=8,typename unpacket_traits<SAccPacket>::half,SAccPacket>::type SAccPacketHalf;
+
+              SResPacketHalf R = res.template gatherPacket<SResPacketHalf>(i, j2);
+              SResPacketHalf alphav = pset1<SResPacketHalf>(alpha);
+
+              if(depth-endk>0)
+              {
+                // We have to handle the last row of the rhs which corresponds to a half-packet
+                SLhsPacketHalf a0;
+                SRhsPacketHalf b0;
+                straits.loadLhsUnaligned(blB, a0);
+                straits.loadRhs(blA, b0);
+                SAccPacketHalf c0 = predux_downto4(C0);
+                straits.madd(a0,b0,c0,b0);
+                straits.acc(c0, alphav, R);
+              }
+              else
+              {
+                straits.acc(predux_downto4(C0), alphav, R);
+              }
+              res.scatterPacket(i, j2, R);
+            }
+            else
+            {
+              SResPacket R = res.template gatherPacket<SResPacket>(i, j2);
+              SResPacket alphav = pset1<SResPacket>(alpha);
+              straits.acc(C0, alphav, R);
+              res.scatterPacket(i, j2, R);
+            }
+          }
+          else // scalar path
+          {
+            // get a 1 x 4 res block as registers
+            ResScalar C0(0), C1(0), C2(0), C3(0);
+
+            for(Index k=0; k<depth; k++)
+            {
+              LhsScalar A0;
+              RhsScalar B_0, B_1;
+
+              A0 = blA[k];
+
+              B_0 = blB[0];
+              B_1 = blB[1];
+              CJMADD(cj,A0,B_0,C0,  B_0);
+              CJMADD(cj,A0,B_1,C1,  B_1);
+              
+              B_0 = blB[2];
+              B_1 = blB[3];
+              CJMADD(cj,A0,B_0,C2,  B_0);
+              CJMADD(cj,A0,B_1,C3,  B_1);
+              
+              blB += 4;
+            }
+            res(i, j2 + 0) += alpha * C0;
+            res(i, j2 + 1) += alpha * C1;
+            res(i, j2 + 2) += alpha * C2;
+            res(i, j2 + 3) += alpha * C3;
+          }
+        }
+      }
+      // remaining columns
+      for(Index j2=packet_cols4; j2<cols; j2++)
+      {
+        // loop on each row of the lhs (1*LhsProgress x depth)
+        for(Index i=peeled_mc1; i<rows; i+=1)
+        {
+          const LhsScalar* blA = &blockA[i*strideA+offsetA];
+          prefetch(&blA[0]);
+          // gets a 1 x 1 res block as registers
+          ResScalar C0(0);
+          const RhsScalar* blB = &blockB[j2*strideB+offsetB];
+          for(Index k=0; k<depth; k++)
+          {
+            LhsScalar A0 = blA[k];
+            RhsScalar B_0 = blB[k];
+            CJMADD(cj, A0, B_0, C0, B_0);
+          }
+          res(i, j2) += alpha * C0;
+        }
+      }
+    }
+  }
+
+
+#undef CJMADD
+
+// pack a block of the lhs
+// The traversal is as follow (mr==4):
+//   0  4  8 12 ...
+//   1  5  9 13 ...
+//   2  6 10 14 ...
+//   3  7 11 15 ...
+//
+//  16 20 24 28 ...
+//  17 21 25 29 ...
+//  18 22 26 30 ...
+//  19 23 27 31 ...
+//
+//  32 33 34 35 ...
+//  36 36 38 39 ...
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+{
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  EIGEN_UNUSED_VARIABLE(stride);
+  EIGEN_UNUSED_VARIABLE(offset);
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  eigen_assert( ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) || (Pack1<=4) );
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index count = 0;
+
+  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+  const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+  const Index peeled_mc0 = Pack2>=1*PacketSize ? peeled_mc1
+                         : Pack2>1             ? (rows/Pack2)*Pack2 : 0;
+
+  Index i=0;
+
+  // Pack 3 packets
+  if(Pack1>=3*PacketSize)
+  {
+    for(; i<peeled_mc3; i+=3*PacketSize)
+    {
+      if(PanelMode) count += (3*PacketSize) * offset;
+
+      for(Index k=0; k<depth; k++)
+      {
+        Packet A, B, C;
+        A = lhs.loadPacket(i+0*PacketSize, k);
+        B = lhs.loadPacket(i+1*PacketSize, k);
+        C = lhs.loadPacket(i+2*PacketSize, k);
+        pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+        pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+        pstore(blockA+count, cj.pconj(C)); count+=PacketSize;
+      }
+      if(PanelMode) count += (3*PacketSize) * (stride-offset-depth);
+    }
+  }
+  // Pack 2 packets
+  if(Pack1>=2*PacketSize)
+  {
+    for(; i<peeled_mc2; i+=2*PacketSize)
+    {
+      if(PanelMode) count += (2*PacketSize) * offset;
+
+      for(Index k=0; k<depth; k++)
+      {
+        Packet A, B;
+        A = lhs.loadPacket(i+0*PacketSize, k);
+        B = lhs.loadPacket(i+1*PacketSize, k);
+        pstore(blockA+count, cj.pconj(A)); count+=PacketSize;
+        pstore(blockA+count, cj.pconj(B)); count+=PacketSize;
+      }
+      if(PanelMode) count += (2*PacketSize) * (stride-offset-depth);
+    }
+  }
+  // Pack 1 packets
+  if(Pack1>=1*PacketSize)
+  {
+    for(; i<peeled_mc1; i+=1*PacketSize)
+    {
+      if(PanelMode) count += (1*PacketSize) * offset;
+
+      for(Index k=0; k<depth; k++)
+      {
+        Packet A;
+        A = lhs.loadPacket(i+0*PacketSize, k);
+        pstore(blockA+count, cj.pconj(A));
+        count+=PacketSize;
+      }
+      if(PanelMode) count += (1*PacketSize) * (stride-offset-depth);
+    }
+  }
+  // Pack scalars
+  if(Pack2<PacketSize && Pack2>1)
+  {
+    for(; i<peeled_mc0; i+=Pack2)
+    {
+      if(PanelMode) count += Pack2 * offset;
+
+      for(Index k=0; k<depth; k++)
+        for(Index w=0; w<Pack2; w++)
+          blockA[count++] = cj(lhs(i+w, k));
+
+      if(PanelMode) count += Pack2 * (stride-offset-depth);
+    }
+  }
+  for(; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+{
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, DataMapper, Pack1, Pack2, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const DataMapper& lhs, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  EIGEN_UNUSED_VARIABLE(stride);
+  EIGEN_UNUSED_VARIABLE(offset);
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index count = 0;
+
+//   const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+//   const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+//   const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+
+  int pack = Pack1;
+  Index i = 0;
+  while(pack>0)
+  {
+    Index remaining_rows = rows-i;
+    Index peeled_mc = i+(remaining_rows/pack)*pack;
+    for(; i<peeled_mc; i+=pack)
+    {
+      if(PanelMode) count += pack * offset;
+
+      const Index peeled_k = (depth/PacketSize)*PacketSize;
+      Index k=0;
+      if(pack>=PacketSize)
+      {
+        for(; k<peeled_k; k+=PacketSize)
+        {
+          for (Index m = 0; m < pack; m += PacketSize)
+          {
+            PacketBlock<Packet> kernel;
+            for (int p = 0; p < PacketSize; ++p) kernel.packet[p] = lhs.loadPacket(i+p+m, k);
+            ptranspose(kernel);
+            for (int p = 0; p < PacketSize; ++p) pstore(blockA+count+m+(pack)*p, cj.pconj(kernel.packet[p]));
+          }
+          count += PacketSize*pack;
+        }
+      }
+      for(; k<depth; k++)
+      {
+        Index w=0;
+        for(; w<pack-3; w+=4)
+        {
+          Scalar a(cj(lhs(i+w+0, k))),
+                 b(cj(lhs(i+w+1, k))),
+                 c(cj(lhs(i+w+2, k))),
+                 d(cj(lhs(i+w+3, k)));
+          blockA[count++] = a;
+          blockA[count++] = b;
+          blockA[count++] = c;
+          blockA[count++] = d;
+        }
+        if(pack%4)
+          for(;w<pack;++w)
+            blockA[count++] = cj(lhs(i+w, k));
+      }
+
+      if(PanelMode) count += pack * (stride-offset-depth);
+    }
+
+    pack -= PacketSize;
+    if(pack<Pack2 && (pack+PacketSize)!=Pack2)
+      pack = Pack2;
+  }
+
+  for(; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
+
+// copy a complete panel of the rhs
+// this version is optimized for column major matrices
+// The traversal order is as follow: (nr==4):
+//  0  1  2  3   12 13 14 15   24 27
+//  4  5  6  7   16 17 18 19   25 28
+//  8  9 10 11   20 21 22 23   26 29
+//  .  .  .  .    .  .  .  .    .  .
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  enum { PacketSize = packet_traits<Scalar>::size };
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride);
+  EIGEN_UNUSED_VARIABLE(offset);
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+  Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+  Index count = 0;
+  const Index peeled_k = (depth/PacketSize)*PacketSize;
+//   if(nr>=8)
+//   {
+//     for(Index j2=0; j2<packet_cols8; j2+=8)
+//     {
+//       // skip what we have before
+//       if(PanelMode) count += 8 * offset;
+//       const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+//       const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+//       const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+//       const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+//       const Scalar* b4 = &rhs[(j2+4)*rhsStride];
+//       const Scalar* b5 = &rhs[(j2+5)*rhsStride];
+//       const Scalar* b6 = &rhs[(j2+6)*rhsStride];
+//       const Scalar* b7 = &rhs[(j2+7)*rhsStride];
+//       Index k=0;
+//       if(PacketSize==8) // TODO enbale vectorized transposition for PacketSize==4
+//       {
+//         for(; k<peeled_k; k+=PacketSize) {
+//           PacketBlock<Packet> kernel;
+//           for (int p = 0; p < PacketSize; ++p) {
+//             kernel.packet[p] = ploadu<Packet>(&rhs[(j2+p)*rhsStride+k]);
+//           }
+//           ptranspose(kernel);
+//           for (int p = 0; p < PacketSize; ++p) {
+//             pstoreu(blockB+count, cj.pconj(kernel.packet[p]));
+//             count+=PacketSize;
+//           }
+//         }
+//       }
+//       for(; k<depth; k++)
+//       {
+//         blockB[count+0] = cj(b0[k]);
+//         blockB[count+1] = cj(b1[k]);
+//         blockB[count+2] = cj(b2[k]);
+//         blockB[count+3] = cj(b3[k]);
+//         blockB[count+4] = cj(b4[k]);
+//         blockB[count+5] = cj(b5[k]);
+//         blockB[count+6] = cj(b6[k]);
+//         blockB[count+7] = cj(b7[k]);
+//         count += 8;
+//       }
+//       // skip what we have after
+//       if(PanelMode) count += 8 * (stride-offset-depth);
+//     }
+//   }
+
+  if(nr>=4)
+  {
+    for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+    {
+      // skip what we have before
+      if(PanelMode) count += 4 * offset;
+      const LinearMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
+      const LinearMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
+      const LinearMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
+      const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
+
+      Index k=0;
+      if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??
+      {
+        for(; k<peeled_k; k+=PacketSize) {
+          PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
+          kernel.packet[0] = dm0.loadPacket(k);
+          kernel.packet[1%PacketSize] = dm1.loadPacket(k);
+          kernel.packet[2%PacketSize] = dm2.loadPacket(k);
+          kernel.packet[3%PacketSize] = dm3.loadPacket(k);
+          ptranspose(kernel);
+          pstoreu(blockB+count+0*PacketSize, cj.pconj(kernel.packet[0]));
+          pstoreu(blockB+count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
+          pstoreu(blockB+count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
+          pstoreu(blockB+count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
+          count+=4*PacketSize;
+        }
+      }
+      for(; k<depth; k++)
+      {
+        blockB[count+0] = cj(dm0(k));
+        blockB[count+1] = cj(dm1(k));
+        blockB[count+2] = cj(dm2(k));
+        blockB[count+3] = cj(dm3(k));
+        count += 4;
+      }
+      // skip what we have after
+      if(PanelMode) count += 4 * (stride-offset-depth);
+    }
+  }
+
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols4; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const LinearMapper dm0 = rhs.getLinearMapper(0, j2);
+    for(Index k=0; k<depth; k++)
+    {
+      blockB[count] = cj(dm0(k));
+      count += 1;
+    }
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
+
+// this version is optimized for row major matrices
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename DataMapper::LinearMapper LinearMapper;
+  enum { PacketSize = packet_traits<Scalar>::size };
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const DataMapper& rhs, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride);
+  EIGEN_UNUSED_VARIABLE(offset);
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+  Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+  Index count = 0;
+
+//   if(nr>=8)
+//   {
+//     for(Index j2=0; j2<packet_cols8; j2+=8)
+//     {
+//       // skip what we have before
+//       if(PanelMode) count += 8 * offset;
+//       for(Index k=0; k<depth; k++)
+//       {
+//         if (PacketSize==8) {
+//           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
+//           pstoreu(blockB+count, cj.pconj(A));
+//         } else if (PacketSize==4) {
+//           Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
+//           Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);
+//           pstoreu(blockB+count, cj.pconj(A));
+//           pstoreu(blockB+count+PacketSize, cj.pconj(B));
+//         } else {
+//           const Scalar* b0 = &rhs[k*rhsStride + j2];
+//           blockB[count+0] = cj(b0[0]);
+//           blockB[count+1] = cj(b0[1]);
+//           blockB[count+2] = cj(b0[2]);
+//           blockB[count+3] = cj(b0[3]);
+//           blockB[count+4] = cj(b0[4]);
+//           blockB[count+5] = cj(b0[5]);
+//           blockB[count+6] = cj(b0[6]);
+//           blockB[count+7] = cj(b0[7]);
+//         }
+//         count += 8;
+//       }
+//       // skip what we have after
+//       if(PanelMode) count += 8 * (stride-offset-depth);
+//     }
+//   }
+  if(nr>=4)
+  {
+    for(Index j2=packet_cols8; j2<packet_cols4; j2+=4)
+    {
+      // skip what we have before
+      if(PanelMode) count += 4 * offset;
+      for(Index k=0; k<depth; k++)
+      {
+        if (PacketSize==4) {
+          Packet A = rhs.loadPacket(k, j2);
+          pstoreu(blockB+count, cj.pconj(A));
+          count += PacketSize;
+        } else {
+          const LinearMapper dm0 = rhs.getLinearMapper(k, j2);
+          blockB[count+0] = cj(dm0(0));
+          blockB[count+1] = cj(dm0(1));
+          blockB[count+2] = cj(dm0(2));
+          blockB[count+3] = cj(dm0(3));
+          count += 4;
+        }
+      }
+      // skip what we have after
+      if(PanelMode) count += 4 * (stride-offset-depth);
+    }
+  }
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols4; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+    {
+      blockB[count] = cj(rhs(k, j2));
+      count += 1;
+    }
+    if(PanelMode) count += stride-offset-depth;
+  }
+}
+
+} // end namespace internal
+
+/** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+  * \sa setCpuCacheSize */
+inline std::ptrdiff_t l1CacheSize()
+{
+  std::ptrdiff_t l1, l2, l3;
+  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+  return l1;
+}
+
+/** \returns the currently set level 2 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
+  * \sa setCpuCacheSize */
+inline std::ptrdiff_t l2CacheSize()
+{
+  std::ptrdiff_t l1, l2, l3;
+  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+  return l2;
+}
+
+/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
+rs.                                                                                                                
+* \sa setCpuCacheSize */
+inline std::ptrdiff_t l3CacheSize()
+{
+  std::ptrdiff_t l1, l2, l3;
+  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+  return l3;
+}
+
+/** Set the cpu L1 and L2 cache sizes (in bytes).
+  * These values are use to adjust the size of the blocks
+  * for the algorithms working per blocks.
+  *
+  * \sa computeProductBlockingSizes */
+inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
+{
+  internal::manage_caching_sizes(SetAction, &l1, &l2, &l3);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_BLOCK_PANEL_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixMatrix.h b/third-party/Eigen/src/Core/products/GeneralMatrixMatrix.h
new file mode 100644
index 00000000..ed6234c3
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -0,0 +1,495 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename _LhsScalar, typename _RhsScalar> class level3_blocking;
+
+/* Specialization for a row-major destination matrix => simple transposition of the product */
+template<
+  typename Index,
+  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+  int ResInnerStride>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride>
+{
+  typedef gebp_traits<RhsScalar,LhsScalar> Traits;
+
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  static EIGEN_STRONG_INLINE void run(
+    Index rows, Index cols, Index depth,
+    const LhsScalar* lhs, Index lhsStride,
+    const RhsScalar* rhs, Index rhsStride,
+    ResScalar* res, Index resIncr, Index resStride,
+    ResScalar alpha,
+    level3_blocking<RhsScalar,LhsScalar>& blocking,
+    GemmParallelInfo<Index>* info = 0)
+  {
+    // transpose the product such that the result is column major
+    general_matrix_matrix_product<Index,
+      RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+      LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+      ColMajor,ResInnerStride>
+    ::run(cols,rows,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking,info);
+  }
+};
+
+/*  Specialization for a col-major destination matrix
+ *    => Blocking algorithm following Goto's paper */
+template<
+  typename Index,
+  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+  int ResInnerStride>
+struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride>
+{
+
+typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+static void run(Index rows, Index cols, Index depth,
+  const LhsScalar* _lhs, Index lhsStride,
+  const RhsScalar* _rhs, Index rhsStride,
+  ResScalar* _res, Index resIncr, Index resStride,
+  ResScalar alpha,
+  level3_blocking<LhsScalar,RhsScalar>& blocking,
+  GemmParallelInfo<Index>* info = 0)
+{
+  typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+  typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+  typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor,Unaligned,ResInnerStride> ResMapper;
+  LhsMapper lhs(_lhs, lhsStride);
+  RhsMapper rhs(_rhs, rhsStride);
+  ResMapper res(_res, resStride, resIncr);
+
+  Index kc = blocking.kc();                   // cache block size along the K direction
+  Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+  Index nc = (std::min)(cols,blocking.nc());  // cache block size along the N direction
+
+  gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+  gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+  gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+
+#ifdef EIGEN_HAS_OPENMP
+  if(info)
+  {
+    // this is the parallel version!
+    int tid = omp_get_thread_num();
+    int threads = omp_get_num_threads();
+
+    LhsScalar* blockA = blocking.blockA();
+    eigen_internal_assert(blockA!=0);
+
+    std::size_t sizeB = kc*nc;
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
+
+    // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
+    for(Index k=0; k<depth; k+=kc)
+    {
+      const Index actual_kc = (std::min)(k+kc,depth)-k; // => rows of B', and cols of the A'
+
+      // In order to reduce the chance that a thread has to wait for the other,
+      // let's start by packing B'.
+      pack_rhs(blockB, rhs.getSubMapper(k,0), actual_kc, nc);
+
+      // Pack A_k to A' in a parallel fashion:
+      // each thread packs the sub block A_k,i to A'_i where i is the thread id.
+
+      // However, before copying to A'_i, we have to make sure that no other thread is still using it,
+      // i.e., we test that info[tid].users equals 0.
+      // Then, we set info[tid].users to the number of threads to mark that all other threads are going to use it.
+      while(info[tid].users!=0) {}
+      info[tid].users += threads;
+
+      pack_lhs(blockA+info[tid].lhs_start*actual_kc, lhs.getSubMapper(info[tid].lhs_start,k), actual_kc, info[tid].lhs_length);
+
+      // Notify the other threads that the part A'_i is ready to go.
+      info[tid].sync = k;
+
+      // Computes C_i += A' * B' per A'_i
+      for(int shift=0; shift<threads; ++shift)
+      {
+        int i = (tid+shift)%threads;
+
+        // At this point we have to make sure that A'_i has been updated by the thread i,
+        // we use testAndSetOrdered to mimic a volatile access.
+        // However, no need to wait for the B' part which has been updated by the current thread!
+        if (shift>0) {
+          while(info[i].sync!=k) {
+          }
+        }
+
+        gebp(res.getSubMapper(info[i].lhs_start, 0), blockA+info[i].lhs_start*actual_kc, blockB, info[i].lhs_length, actual_kc, nc, alpha);
+      }
+
+      // Then keep going as usual with the remaining B'
+      for(Index j=nc; j<cols; j+=nc)
+      {
+        const Index actual_nc = (std::min)(j+nc,cols)-j;
+
+        // pack B_k,j to B'
+        pack_rhs(blockB, rhs.getSubMapper(k,j), actual_kc, actual_nc);
+
+        // C_j += A' * B'
+        gebp(res.getSubMapper(0, j), blockA, blockB, rows, actual_kc, actual_nc, alpha);
+      }
+
+      // Release all the sub blocks A'_i of A' for the current thread,
+      // i.e., we simply decrement the number of users by 1
+      for(Index i=0; i<threads; ++i)
+        #pragma omp atomic
+        info[i].users -= 1;
+    }
+  }
+  else
+#endif // EIGEN_HAS_OPENMP
+  {
+    EIGEN_UNUSED_VARIABLE(info);
+
+    // this is the sequential version!
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*nc;
+
+    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+
+    const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
+
+    // For each horizontal panel of the rhs, and corresponding panel of the lhs...
+    for(Index i2=0; i2<rows; i2+=mc)
+    {
+      const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+
+      for(Index k2=0; k2<depth; k2+=kc)
+      {
+        const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+
+        // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
+        // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
+        // Note that this panel will be read as many times as the number of blocks in the rhs's
+        // horizontal panel which is, in practice, a very low number.
+        pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
+
+        // For each kc x nc block of the rhs's horizontal panel...
+        for(Index j2=0; j2<cols; j2+=nc)
+        {
+          const Index actual_nc = (std::min)(j2+nc,cols)-j2;
+
+          // We pack the rhs's block into a sequential chunk of memory (L2 caching)
+          // Note that this block will be read a very high number of times, which is equal to the number of
+          // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
+          if((!pack_rhs_once) || i2==0)
+            pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
+
+          // Everything is packed, we can now call the panel * block kernel:
+          gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
+        }
+      }
+    }
+  }
+}
+
+};
+
+/*********************************************************************************
+*  Specialization of generic_product_impl for "large" GEMM, i.e.,
+*  implementation of the high level wrapper to general_matrix_matrix_product
+**********************************************************************************/
+
+template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
+struct gemm_functor
+{
+  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha, BlockingType& blocking)
+    : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
+  {}
+
+  void initParallelSession(Index num_threads) const
+  {
+    m_blocking.initParallel(m_lhs.rows(), m_rhs.cols(), m_lhs.cols(), num_threads);
+    m_blocking.allocateA();
+  }
+
+  void operator() (Index row, Index rows, Index col=0, Index cols=-1, GemmParallelInfo<Index>* info=0) const
+  {
+    if(cols==-1)
+      cols = m_rhs.cols();
+
+    Gemm::run(rows, cols, m_lhs.cols(),
+              &m_lhs.coeffRef(row,0), m_lhs.outerStride(),
+              &m_rhs.coeffRef(0,col), m_rhs.outerStride(),
+              (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.innerStride(), m_dest.outerStride(),
+              m_actualAlpha, m_blocking, info);
+  }
+
+  typedef typename Gemm::Traits Traits;
+
+  protected:
+    const Lhs& m_lhs;
+    const Rhs& m_rhs;
+    Dest& m_dest;
+    Scalar m_actualAlpha;
+    BlockingType& m_blocking;
+};
+
+template<int StorageOrder, typename LhsScalar, typename RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor=1,
+bool FiniteAtCompileTime = MaxRows!=Dynamic && MaxCols!=Dynamic && MaxDepth != Dynamic> class gemm_blocking_space;
+
+template<typename _LhsScalar, typename _RhsScalar>
+class level3_blocking
+{
+    typedef _LhsScalar LhsScalar;
+    typedef _RhsScalar RhsScalar;
+
+  protected:
+    LhsScalar* m_blockA;
+    RhsScalar* m_blockB;
+
+    Index m_mc;
+    Index m_nc;
+    Index m_kc;
+
+  public:
+
+    level3_blocking()
+      : m_blockA(0), m_blockB(0), m_mc(0), m_nc(0), m_kc(0)
+    {}
+
+    inline Index mc() const { return m_mc; }
+    inline Index nc() const { return m_nc; }
+    inline Index kc() const { return m_kc; }
+
+    inline LhsScalar* blockA() { return m_blockA; }
+    inline RhsScalar* blockB() { return m_blockB; }
+};
+
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, true /* == FiniteAtCompileTime */>
+  : public level3_blocking<
+      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+    enum {
+      Transpose = StorageOrder==RowMajor,
+      ActualRows = Transpose ? MaxCols : MaxRows,
+      ActualCols = Transpose ? MaxRows : MaxCols
+    };
+    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+    enum {
+      SizeA = ActualRows * MaxDepth,
+      SizeB = ActualCols * MaxDepth
+    };
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+    EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
+    EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
+#else
+    EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+    EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
+#endif
+
+  public:
+
+    gemm_blocking_space(Index /*rows*/, Index /*cols*/, Index /*depth*/, Index /*num_threads*/, bool /*full_rows = false*/)
+    {
+      this->m_mc = ActualRows;
+      this->m_nc = ActualCols;
+      this->m_kc = MaxDepth;
+#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
+      this->m_blockA = m_staticA;
+      this->m_blockB = m_staticB;
+#else
+      this->m_blockA = reinterpret_cast<LhsScalar*>((internal::UIntPtr(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+      this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
+#endif
+    }
+
+    void initParallel(Index, Index, Index, Index)
+    {}
+
+    inline void allocateA() {}
+    inline void allocateB() {}
+    inline void allocateAll() {}
+};
+
+template<int StorageOrder, typename _LhsScalar, typename _RhsScalar, int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, MaxDepth, KcFactor, false>
+  : public level3_blocking<
+      typename conditional<StorageOrder==RowMajor,_RhsScalar,_LhsScalar>::type,
+      typename conditional<StorageOrder==RowMajor,_LhsScalar,_RhsScalar>::type>
+{
+    enum {
+      Transpose = StorageOrder==RowMajor
+    };
+    typedef typename conditional<Transpose,_RhsScalar,_LhsScalar>::type LhsScalar;
+    typedef typename conditional<Transpose,_LhsScalar,_RhsScalar>::type RhsScalar;
+    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+    Index m_sizeA;
+    Index m_sizeB;
+
+  public:
+
+    gemm_blocking_space(Index rows, Index cols, Index depth, Index num_threads, bool l3_blocking)
+    {
+      this->m_mc = Transpose ? cols : rows;
+      this->m_nc = Transpose ? rows : cols;
+      this->m_kc = depth;
+
+      if(l3_blocking)
+      {
+        computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, this->m_nc, num_threads);
+      }
+      else  // no l3 blocking
+      {
+        Index n = this->m_nc;
+        computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
+      }
+
+      m_sizeA = this->m_mc * this->m_kc;
+      m_sizeB = this->m_kc * this->m_nc;
+    }
+
+    void initParallel(Index rows, Index cols, Index depth, Index num_threads)
+    {
+      this->m_mc = Transpose ? cols : rows;
+      this->m_nc = Transpose ? rows : cols;
+      this->m_kc = depth;
+
+      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
+      Index m = this->m_mc;
+      computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
+      m_sizeA = this->m_mc * this->m_kc;
+      m_sizeB = this->m_kc * this->m_nc;
+    }
+
+    void allocateA()
+    {
+      if(this->m_blockA==0)
+        this->m_blockA = aligned_new<LhsScalar>(m_sizeA);
+    }
+
+    void allocateB()
+    {
+      if(this->m_blockB==0)
+        this->m_blockB = aligned_new<RhsScalar>(m_sizeB);
+    }
+
+    void allocateAll()
+    {
+      allocateA();
+      allocateB();
+    }
+
+    ~gemm_blocking_space()
+    {
+      aligned_delete(this->m_blockA, m_sizeA);
+      aligned_delete(this->m_blockB, m_sizeB);
+    }
+};
+
+} // end namespace internal
+
+namespace internal {
+
+template<typename Lhs, typename Rhs>
+struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  typedef typename Lhs::Scalar LhsScalar;
+  typedef typename Rhs::Scalar RhsScalar;
+
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+  enum {
+    MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
+  };
+
+  typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
+
+  template<typename Dst>
+  static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::assign_op<typename Dst::Scalar,Scalar>());
+    else
+    {
+      dst.setZero();
+      scaleAndAddTo(dst, lhs, rhs, Scalar(1));
+    }
+  }
+
+  template<typename Dst>
+  static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::add_assign_op<typename Dst::Scalar,Scalar>());
+    else
+      scaleAndAddTo(dst,lhs, rhs, Scalar(1));
+  }
+
+  template<typename Dst>
+  static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+  {
+    if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0)
+      lazyproduct::eval_dynamic(dst, lhs, rhs, internal::sub_assign_op<typename Dst::Scalar,Scalar>());
+    else
+      scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
+  }
+
+  template<typename Dest>
+  static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
+  {
+    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
+    if(a_lhs.cols()==0 || a_lhs.rows()==0 || a_rhs.cols()==0)
+      return;
+
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
+
+    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,LhsScalar,RhsScalar,
+            Dest::MaxRowsAtCompileTime,Dest::MaxColsAtCompileTime,MaxDepthAtCompileTime> BlockingType;
+
+    typedef internal::gemm_functor<
+      Scalar, Index,
+      internal::general_matrix_matrix_product<
+        Index,
+        LhsScalar, (ActualLhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(LhsBlasTraits::NeedToConjugate),
+        RhsScalar, (ActualRhsTypeCleaned::Flags&RowMajorBit) ? RowMajor : ColMajor, bool(RhsBlasTraits::NeedToConjugate),
+        (Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,
+        Dest::InnerStrideAtCompileTime>,
+      ActualLhsTypeCleaned, ActualRhsTypeCleaned, Dest, BlockingType> GemmFunctor;
+
+    BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
+    internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
+        (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
new file mode 100644
index 00000000..d68d2f96
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -0,0 +1,317 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
+
+namespace Eigen { 
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
+namespace internal {
+
+/**********************************************************************
+* This file implements a general A * B product while
+* evaluating only one triangular part of the product.
+* This is a more general version of self adjoint product (C += A A^T)
+* as the level 3 SYRK Blas routine.
+**********************************************************************/
+
+// forward declarations (defined at the end of this file)
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
+struct tribb_kernel;
+  
+/* Optimized matrix-matrix product evaluating only one triangular half */
+template <typename Index,
+          typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+                              int ResStorageOrder, int ResInnerStride, int  UpLo, int Version = Specialized>
+struct general_matrix_matrix_triangular_product;
+
+// as usual if the result is row major => we transpose the product
+template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+                          int ResInnerStride, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,UpLo,Version>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
+                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resIncr, Index resStride,
+                                      const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
+  {
+    general_matrix_matrix_triangular_product<Index,
+        RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
+        LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
+        ColMajor, ResInnerStride, UpLo==Lower?Upper:Lower>
+      ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resIncr,resStride,alpha,blocking);
+  }
+};
+
+template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+                          int ResInnerStride, int  UpLo, int Version>
+struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,UpLo,Version>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
+                                      const RhsScalar* _rhs, Index rhsStride,
+                                      ResScalar* _res, Index resIncr, Index resStride,
+                                      const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
+  {
+    typedef gebp_traits<LhsScalar,RhsScalar> Traits;
+
+    typedef const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> RhsMapper;
+    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    LhsMapper lhs(_lhs,lhsStride);
+    RhsMapper rhs(_rhs,rhsStride);
+    ResMapper res(_res, resStride, resIncr);
+
+    Index kc = blocking.kc();
+    Index mc = (std::min)(size,blocking.mc());
+
+    // !!! mc must be a multiple of nr:
+    if(mc > Traits::nr)
+      mc = (mc/Traits::nr)*Traits::nr;
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*size;
+
+    ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
+
+    gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+    gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+    gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
+    tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, ResInnerStride, UpLo> sybb;
+
+    for(Index k2=0; k2<depth; k2+=kc)
+    {
+      const Index actual_kc = (std::min)(k2+kc,depth)-k2;
+
+      // note that the actual rhs is the transpose/adjoint of mat
+      pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, size);
+
+      for(Index i2=0; i2<size; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(i2+mc,size)-i2;
+
+        pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+        // the selected actual_mc * size panel of res is split into three different part:
+        //  1 - before the diagonal => processed with gebp or skipped
+        //  2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
+        //  3 - after the diagonal => processed with gebp or skipped
+        if (UpLo==Lower)
+          gebp(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc,
+               (std::min)(size,i2), alpha, -1, -1, 0, 0);
+
+        sybb(_res+resStride*i2 + resIncr*i2, resIncr, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha);
+
+        if (UpLo==Upper)
+        {
+          Index j2 = i2+actual_mc;
+          gebp(res.getSubMapper(i2, j2), blockA, blockB+actual_kc*j2, actual_mc,
+               actual_kc, (std::max)(Index(0), size-j2), alpha, -1, -1, 0, 0);
+        }
+      }
+    }
+  }
+};
+
+// Optimized packed Block * packed Block product kernel evaluating only one given triangular part
+// This kernel is built on top of the gebp kernel:
+// - the current destination block is processed per panel of actual_mc x BlockSize
+//   where BlockSize is set to the minimal value allowing gebp to be as fast as possible
+// - then, as usual, each panel is split into three parts along the diagonal,
+//   the sub blocks above and below the diagonal are processed as usual,
+//   while the triangular block overlapping the diagonal is evaluated into a
+//   small temporary buffer which is then accumulated into the result using a
+//   triangular traversal.
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int ResInnerStride, int UpLo>
+struct tribb_kernel
+{
+  typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
+  typedef typename Traits::ResScalar ResScalar;
+
+  enum {
+    BlockSize  = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
+  };
+  void operator()(ResScalar* _res, Index resIncr, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
+  {
+    typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    typedef blas_data_mapper<ResScalar, Index, ColMajor, Unaligned> BufferMapper;
+    ResMapper res(_res, resStride, resIncr);
+    gebp_kernel<LhsScalar, RhsScalar, Index, ResMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel1;
+    gebp_kernel<LhsScalar, RhsScalar, Index, BufferMapper, mr, nr, ConjLhs, ConjRhs> gebp_kernel2;
+
+    Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer((internal::constructor_without_unaligned_array_assert()));
+
+    // let's process the block per panel of actual_mc x BlockSize,
+    // again, each is split into three parts, etc.
+    for (Index j=0; j<size; j+=BlockSize)
+    {
+      Index actualBlockSize = std::min<Index>(BlockSize,size - j);
+      const RhsScalar* actual_b = blockB+j*depth;
+
+      if(UpLo==Upper)
+        gebp_kernel1(res.getSubMapper(0, j), blockA, actual_b, j, depth, actualBlockSize, alpha,
+                     -1, -1, 0, 0);
+      
+      // selfadjoint micro block
+      {
+        Index i = j;
+        buffer.setZero();
+        // 1 - apply the kernel on the temporary buffer
+        gebp_kernel2(BufferMapper(buffer.data(), BlockSize), blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
+                     -1, -1, 0, 0);
+
+        // 2 - triangular accumulation
+        for(Index j1=0; j1<actualBlockSize; ++j1)
+        {
+          typename ResMapper::LinearMapper r = res.getLinearMapper(i,j+j1);
+          for(Index i1=UpLo==Lower ? j1 : 0;
+              UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
+            r(i1) += buffer(i1,j1);
+        }
+      }
+
+      if(UpLo==Lower)
+      {
+        Index i = j+actualBlockSize;
+        gebp_kernel1(res.getSubMapper(i, j), blockA+depth*i, actual_b, size-i, 
+                     depth, actualBlockSize, alpha, -1, -1, 0, 0);
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+// high level API
+
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    if(!beta)
+      mat.template triangularView<UpLo>().setZero();
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+    };
+    
+    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    
+    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha, bool beta)
+  {
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    if(!beta)
+      mat.template triangularView<UpLo>().setZero();
+
+    enum {
+      IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
+      LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
+      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
+      SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
+    };
+
+    Index size = mat.cols();
+    if(SkipDiag)
+      size--;
+    Index depth = actualLhs.cols();
+
+    typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
+          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
+
+    BlockingType blocking(size, size, depth, 1, false);
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo&(Lower|Upper)>
+      ::run(size, depth,
+            &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
+            &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
+            mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? mat.innerStride() : mat.outerStride() ) : 0),
+            mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
+  }
+};
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename ProductType>
+TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
+{
+  EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
+  eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
+  
+  general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
+  
+  return derived();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
new file mode 100644
index 00000000..691f95d6
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Level 3 BLAS SYRK/HERK implementation.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
+
+namespace Eigen {
+
+namespace internal {
+
+template <typename Index, typename Scalar, int AStorageOrder, bool ConjugateA, int ResStorageOrder, int  UpLo>
+struct general_matrix_matrix_rankupdate :
+       general_matrix_matrix_triangular_product<
+         Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,1,UpLo,BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_BLAS_RANKUPDATE_SPECIALIZE(Scalar) \
+template <typename Index, int LhsStorageOrder, bool ConjugateLhs, \
+                          int RhsStorageOrder, bool ConjugateRhs, int  UpLo> \
+struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,ConjugateLhs, \
+               Scalar,RhsStorageOrder,ConjugateRhs,ColMajor,1,UpLo,Specialized> { \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
+                          const Scalar* rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
+  { \
+    if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
+      general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
+    } else { \
+      general_matrix_matrix_triangular_product<Index, \
+        Scalar, LhsStorageOrder, ConjugateLhs, \
+        Scalar, RhsStorageOrder, ConjugateRhs, \
+        ColMajor, 1, UpLo, BuiltIn> \
+      ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resIncr,resStride,alpha,blocking); \
+    } \
+  } \
+};
+
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(double)
+EIGEN_BLAS_RANKUPDATE_SPECIALIZE(float)
+// TODO handle complex cases
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(dcomplex)
+// EIGEN_BLAS_RANKUPDATE_SPECIALIZE(scomplex)
+
+// SYRK for float/double
+#define EIGEN_BLAS_RANKUPDATE_R(EIGTYPE, BLASTYPE, BLASFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+  /* typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs;*/ \
+\
+   BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
+   char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
+   EIGTYPE beta(1); \
+   BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
+  } \
+};
+
+// HERK for complex data
+#define EIGEN_BLAS_RANKUPDATE_C(EIGTYPE, BLASTYPE, RTYPE, BLASFUNC) \
+template <typename Index, int AStorageOrder, bool ConjugateA, int  UpLo> \
+struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,ColMajor,UpLo> { \
+  enum { \
+    IsLower = (UpLo&Lower) == Lower, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \
+  }; \
+  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \
+                          const EIGTYPE* /*rhs*/, Index /*rhsStride*/, EIGTYPE* res, Index resStride, EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, AStorageOrder> MatrixType; \
+\
+   BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
+   char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'C':'N'); \
+   RTYPE alpha_, beta_; \
+   const EIGTYPE* a_ptr; \
+\
+   alpha_ = alpha.real(); \
+   beta_ = 1.0; \
+/* Copy with conjugation in some cases*/ \
+   MatrixType a; \
+   if (conjA) { \
+     Map<const MatrixType, 0, OuterStride<> > mapA(lhs,n,k,OuterStride<>(lhsStride)); \
+     a = mapA.conjugate(); \
+     lda = a.outerStride(); \
+     a_ptr = a.data(); \
+   } else a_ptr=lhs; \
+   BLASFUNC(&uplo, &trans, &n, &k, &alpha_, (BLASTYPE*)a_ptr, &lda, &beta_, (BLASTYPE*)res, &ldc); \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk)
+#else
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
+EIGEN_BLAS_RANKUPDATE_R(float,  float,  ssyrk_)
+#endif
+
+// TODO hanlde complex cases
+// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
+// EIGEN_BLAS_RANKUPDATE_C(scomplex, float,  float, cherk_)
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/third-party/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
new file mode 100644
index 00000000..71abf401
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -0,0 +1,124 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   General matrix-matrix product functionality based on ?GEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
+#define EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-matrix multiplication using BLAS
+* gemm function via partial specialization of
+* general_matrix_matrix_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemm specialization
+
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
+template< \
+  typename Index, \
+  int LhsStorageOrder, bool ConjugateLhs, \
+  int RhsStorageOrder, bool ConjugateRhs> \
+struct general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1> \
+{ \
+typedef gebp_traits<EIGTYPE,EIGTYPE> Traits; \
+\
+static void run(Index rows, Index cols, Index depth, \
+  const EIGTYPE* _lhs, Index lhsStride, \
+  const EIGTYPE* _rhs, Index rhsStride, \
+  EIGTYPE* res, Index resIncr, Index resStride, \
+  EIGTYPE alpha, \
+  level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, \
+  GemmParallelInfo<Index>* /*info = 0*/) \
+{ \
+  using std::conj; \
+\
+  EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+  eigen_assert(resIncr == 1); \
+  char transa, transb; \
+  BlasIndex m, n, k, lda, ldb, ldc; \
+  const EIGTYPE *a, *b; \
+  EIGTYPE beta(1); \
+  MatrixX##EIGPREFIX a_tmp, b_tmp; \
+\
+/* Set transpose options */ \
+  transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+  transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set m, n, k */ \
+  m = convert_index<BlasIndex>(rows);  \
+  n = convert_index<BlasIndex>(cols);  \
+  k = convert_index<BlasIndex>(depth); \
+\
+/* Set lda, ldb, ldc */ \
+  lda = convert_index<BlasIndex>(lhsStride); \
+  ldb = convert_index<BlasIndex>(rhsStride); \
+  ldc = convert_index<BlasIndex>(resStride); \
+\
+/* Set a, b, c */ \
+  if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \
+    a_tmp = lhs.conjugate(); \
+    a = a_tmp.data(); \
+    lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+  } else a = _lhs; \
+\
+  if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \
+    Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \
+    b_tmp = rhs.conjugate(); \
+    b = b_tmp.data(); \
+    ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+  } else b = _rhs; \
+\
+  BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+}};
+
+#ifdef EIGEN_USE_MKL
+GEMM_SPECIALIZATION(double,   d,  double, dgemm)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8,  cgemm)
+#else
+GEMM_SPECIALIZATION(double,   d,  double, dgemm_)
+GEMM_SPECIALIZATION(float,    f,  float,  sgemm_)
+GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
+GEMM_SPECIALIZATION(scomplex, cf, float,  cgemm_)
+#endif
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_MATRIX_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixVector.h b/third-party/Eigen/src/Core/products/GeneralMatrixVector.h
new file mode 100644
index 00000000..a597c1f4
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -0,0 +1,619 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+/* Optimized col-major matrix * vector product:
+ * This algorithm processes 4 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 4 and to reduce
+ * the instruction dependency. Moreover, we know that all bands have the
+ * same alignment pattern.
+ *
+ * Mixing type logic: C += alpha * A * B
+ *  |  A  |  B  |alpha| comments
+ *  |real |cplx |cplx | no vectorization
+ *  |real |cplx |real | alpha is converted to a cplx when calling the run function, no vectorization
+ *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
+ *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul
+ *
+ * Accesses to the matrix coefficients follow the following logic:
+ *
+ * - if all columns have the same alignment then
+ *   - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
+ *   - otherwise perform unaligned loads only (-> NoneAligned case)
+ * - otherwise
+ *   - if even columns have the same alignment then
+ *     // odd columns are guaranteed to have the same alignment too
+ *     - if even or odd columns have the same alignment as the result, then
+ *       // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
+ *       - perform half aligned and half unaligned loads (-> EvenAligned case)
+ *     - otherwise perform unaligned loads only (-> NoneAligned case)
+ *   - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
+ *     - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
+ *       perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
+ *       // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
+ *   - otherwise,
+ *     // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
+ *     // we currently fall back to the NoneAligned case
+ *
+ * The same reasoning apply for the transposed case.
+ *
+ * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
+ * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
+ * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
+ * compared to unaligned loads on a 4 byte boundary.
+ *
+ */
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+enum {
+  Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+              && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+  LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+  RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+  ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+
+typedef typename packet_traits<LhsScalar>::type  _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type  _RhsPacket;
+typedef typename packet_traits<ResScalar>::type  _ResPacket;
+
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+EIGEN_DONT_INLINE static void run(
+  Index rows, Index cols,
+  const LhsMapper& lhs,
+  const RhsMapper& rhs,
+        ResScalar* res, Index resIncr,
+  RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsMapper& lhs,
+  const RhsMapper& rhs,
+        ResScalar* res, Index resIncr,
+  RhsScalar alpha)
+{
+  EIGEN_UNUSED_VARIABLE(resIncr);
+  eigen_internal_assert(resIncr==1);
+  #ifdef _EIGEN_ACCUMULATE_PACKETS
+  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+  #endif
+  #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
+    pstore(&res[j], \
+      padd(pload<ResPacket>(&res[j]), \
+        padd( \
+      padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j),    ptmp0), \
+      pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j),   ptmp1)),   \
+      padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j),    ptmp2), \
+      pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j),   ptmp3)) )))
+
+  typedef typename LhsMapper::VectorMapper LhsScalars;
+
+  conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+  conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+  if(ConjugateRhs)
+    alpha = numext::conj(alpha);
+
+  enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
+  const Index columnsAtOnce = 4;
+  const Index peels = 2;
+  const Index LhsPacketAlignedMask = LhsPacketSize-1;
+  const Index ResPacketAlignedMask = ResPacketSize-1;
+//  const Index PeelAlignedMask = ResPacketSize*peels-1;
+  const Index size = rows;
+
+  const Index lhsStride = lhs.stride();
+
+  // How many coeffs of the result do we have to skip to be aligned.
+  // Here we assume data are at least aligned on the base scalar type.
+  Index alignedStart = internal::first_default_aligned(res,size);
+  Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+  const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+  Index alignmentPattern = alignmentStep==0 ? AllAligned
+                       : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+                       : FirstAligned;
+
+  // we cannot assume the first element is aligned because of sub-matrices
+  const Index lhsAlignmentOffset = lhs.firstAligned(size);
+
+  // find how many columns do we have to skip to be aligned with the result (if possible)
+  Index skipColumns = 0;
+  // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+  if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
+  {
+    alignedSize = 0;
+    alignedStart = 0;
+    alignmentPattern = NoneAligned;
+  }
+  else if(LhsPacketSize > 4)
+  {
+    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+    // Currently, it seems to be better to perform unaligned loads anyway
+    alignmentPattern = NoneAligned;
+  }
+  else if (LhsPacketSize>1)
+  {
+  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
+
+    while (skipColumns<LhsPacketSize &&
+          alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
+      ++skipColumns;
+    if (skipColumns==LhsPacketSize)
+    {
+      // nothing can be aligned, no need to skip any column
+      alignmentPattern = NoneAligned;
+      skipColumns = 0;
+    }
+    else
+    {
+      skipColumns = (std::min)(skipColumns,cols);
+      // note that the skiped columns are processed later.
+    }
+
+    /*    eigen_internal_assert(  (alignmentPattern==NoneAligned)
+                      || (skipColumns + columnsAtOnce >= cols)
+                      || LhsPacketSize > size
+                      || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/
+  }
+  else if(Vectorizable)
+  {
+    alignedStart = 0;
+    alignedSize = size;
+    alignmentPattern = AllAligned;
+  }
+
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
+
+  Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
+  for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
+  {
+    RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
+              ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
+              ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
+              ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
+
+    // this helps a lot generating better binary code
+    const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0),   lhs1 = lhs.getVectorMapper(0, i+offset1),
+                     lhs2 = lhs.getVectorMapper(0, i+2),   lhs3 = lhs.getVectorMapper(0, i+offset3);
+
+    if (Vectorizable)
+    {
+      /* explicit vectorization */
+      // process initial unaligned coeffs
+      for (Index j=0; j<alignedStart; ++j)
+      {
+        res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+        res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+        res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+        res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+      }
+
+      if (alignedSize>alignedStart)
+      {
+        switch(alignmentPattern)
+        {
+          case AllAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+            break;
+          case EvenAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+            break;
+          case FirstAligned:
+          {
+            Index j = alignedStart;
+            if(peels>1)
+            {
+              LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
+              ResPacket T0, T1;
+
+              A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+              A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+              A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+              for (; j<peeledSize; j+=peels*ResPacketSize)
+              {
+                A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize);  palign<1>(A01,A11);
+                A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize);  palign<2>(A02,A12);
+                A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize);  palign<3>(A03,A13);
+
+                A00 = lhs0.template load<LhsPacket, Aligned>(j);
+                A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
+                T0  = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+                T1  = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
+
+                T0  = pcj.pmadd(A01, ptmp1, T0);
+                A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize);  palign<1>(A11,A01);
+                T0  = pcj.pmadd(A02, ptmp2, T0);
+                A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize);  palign<2>(A12,A02);
+                T0  = pcj.pmadd(A03, ptmp3, T0);
+                pstore(&res[j],T0);
+                A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize);  palign<3>(A13,A03);
+                T1  = pcj.pmadd(A11, ptmp1, T1);
+                T1  = pcj.pmadd(A12, ptmp2, T1);
+                T1  = pcj.pmadd(A13, ptmp3, T1);
+                pstore(&res[j+ResPacketSize],T1);
+              }
+            }
+            for (; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+            break;
+          }
+          default:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+            break;
+        }
+      }
+    } // end explicit vectorization
+
+    /* process remaining coeffs (or all if there is no explicit vectorization) */
+    for (Index j=alignedSize; j<size; ++j)
+    {
+      res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+      res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+      res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+      res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+    }
+  }
+
+  // process remaining first and last columns (at most columnsAtOnce-1)
+  Index end = cols;
+  Index start = columnBound;
+  do
+  {
+    for (Index k=start; k<end; ++k)
+    {
+      RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
+      const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
+
+      if (Vectorizable)
+      {
+        /* explicit vectorization */
+        // process first unaligned result's coeffs
+        for (Index j=0; j<alignedStart; ++j)
+          res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
+        // process aligned result's coeffs
+        if (lhs0.template aligned<LhsPacket>(alignedStart))
+          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+            pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+        else
+          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+            pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+      }
+
+      // process remaining scalars (or all if no explicit vectorization)
+      for (Index i=alignedSize; i<size; ++i)
+        res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
+    }
+    if (skipColumns)
+    {
+      start = 0;
+      end = skipColumns;
+      skipColumns = 0;
+    }
+    else
+      break;
+  } while(Vectorizable);
+  #undef _EIGEN_ACCUMULATE_PACKETS
+}
+
+/* Optimized row-major matrix * vector product:
+ * This algorithm processes 4 rows at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 4 and to reduce
+ * the instruction dependency. Moreover, we know that all bands have the
+ * same alignment pattern.
+ *
+ * Mixing type logic:
+ *  - alpha is always a complex (or converted to a complex)
+ *  - no vectorization
+ */
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
+{
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+
+enum {
+  Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
+              && int(packet_traits<LhsScalar>::size)==int(packet_traits<RhsScalar>::size),
+  LhsPacketSize = Vectorizable ? packet_traits<LhsScalar>::size : 1,
+  RhsPacketSize = Vectorizable ? packet_traits<RhsScalar>::size : 1,
+  ResPacketSize = Vectorizable ? packet_traits<ResScalar>::size : 1
+};
+
+typedef typename packet_traits<LhsScalar>::type  _LhsPacket;
+typedef typename packet_traits<RhsScalar>::type  _RhsPacket;
+typedef typename packet_traits<ResScalar>::type  _ResPacket;
+
+typedef typename conditional<Vectorizable,_LhsPacket,LhsScalar>::type LhsPacket;
+typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
+typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
+
+EIGEN_DONT_INLINE static void run(
+  Index rows, Index cols,
+  const LhsMapper& lhs,
+  const RhsMapper& rhs,
+        ResScalar* res, Index resIncr,
+  ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsMapper& lhs,
+  const RhsMapper& rhs,
+  ResScalar* res, Index resIncr,
+  ResScalar alpha)
+{
+  eigen_internal_assert(rhs.stride()==1);
+
+  #ifdef _EIGEN_ACCUMULATE_PACKETS
+  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+  #endif
+
+  #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
+    RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);  \
+    ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
+    ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
+    ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
+    ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
+
+  conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
+  conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+
+  typedef typename LhsMapper::VectorMapper LhsScalars;
+
+  enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
+  const Index rowsAtOnce = 4;
+  const Index peels = 2;
+  const Index RhsPacketAlignedMask = RhsPacketSize-1;
+  const Index LhsPacketAlignedMask = LhsPacketSize-1;
+  const Index depth = cols;
+  const Index lhsStride = lhs.stride();
+
+  // How many coeffs of the result do we have to skip to be aligned.
+  // Here we assume data are at least aligned on the base scalar type
+  // if that's not the case then vectorization is discarded, see below.
+  Index alignedStart = rhs.firstAligned(depth);
+  Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+  const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+  Index alignmentPattern = alignmentStep==0 ? AllAligned
+                           : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+                           : FirstAligned;
+
+  // we cannot assume the first element is aligned because of sub-matrices
+  const Index lhsAlignmentOffset = lhs.firstAligned(depth);
+  const Index rhsAlignmentOffset = rhs.firstAligned(rows);
+
+  // find how many rows do we have to skip to be aligned with rhs (if possible)
+  Index skipRows = 0;
+  // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+  if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) ||
+      (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) ||
+      (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) )
+  {
+    alignedSize = 0;
+    alignedStart = 0;
+    alignmentPattern = NoneAligned;
+  }
+  else if(LhsPacketSize > 4)
+  {
+    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+    alignmentPattern = NoneAligned;
+  }
+  else if (LhsPacketSize>1)
+  {
+  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0  || depth<LhsPacketSize);
+
+    while (skipRows<LhsPacketSize &&
+           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
+      ++skipRows;
+    if (skipRows==LhsPacketSize)
+    {
+      // nothing can be aligned, no need to skip any column
+      alignmentPattern = NoneAligned;
+      skipRows = 0;
+    }
+    else
+    {
+      skipRows = (std::min)(skipRows,Index(rows));
+      // note that the skiped columns are processed later.
+    }
+    /*    eigen_internal_assert(  alignmentPattern==NoneAligned
+                      || LhsPacketSize==1
+                      || (skipRows + rowsAtOnce >= rows)
+                      || LhsPacketSize > depth
+                      || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/
+  }
+  else if(Vectorizable)
+  {
+    alignedStart = 0;
+    alignedSize = depth;
+    alignmentPattern = AllAligned;
+  }
+
+  const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
+
+  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
+  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
+  {
+    // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
+    EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
+
+    // this helps the compiler generating good binary code
+    const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0),    lhs1 = lhs.getVectorMapper(i+offset1, 0),
+                     lhs2 = lhs.getVectorMapper(i+2, 0),    lhs3 = lhs.getVectorMapper(i+offset3, 0);
+
+    if (Vectorizable)
+    {
+      /* explicit vectorization */
+      ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+                ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
+
+      // process initial unaligned coeffs
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=0; j<alignedStart; ++j)
+      {
+        RhsScalar b = rhs(j, 0);
+        tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+        tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+      }
+
+      if (alignedSize>alignedStart)
+      {
+        switch(alignmentPattern)
+        {
+          case AllAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+            break;
+          case EvenAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+            break;
+          case FirstAligned:
+          {
+            Index j = alignedStart;
+            if (peels>1)
+            {
+              /* Here we proccess 4 rows with with two peeled iterations to hide
+               * the overhead of unaligned loads. Moreover unaligned loads are handled
+               * using special shift/move operations between the two aligned packets
+               * overlaping the desired unaligned packet. This is *much* more efficient
+               * than basic unaligned loads.
+               */
+              LhsPacket A01, A02, A03, A11, A12, A13;
+              A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+              A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+              A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+              for (; j<peeledSize; j+=peels*RhsPacketSize)
+              {
+                RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
+                A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize);  palign<1>(A01,A11);
+                A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize);  palign<2>(A02,A12);
+                A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize);  palign<3>(A03,A13);
+
+                ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
+                ptmp1 = pcj.pmadd(A01, b, ptmp1);
+                A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize);  palign<1>(A11,A01);
+                ptmp2 = pcj.pmadd(A02, b, ptmp2);
+                A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize);  palign<2>(A12,A02);
+                ptmp3 = pcj.pmadd(A03, b, ptmp3);
+                A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize);  palign<3>(A13,A03);
+
+                b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
+                ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
+                ptmp1 = pcj.pmadd(A11, b, ptmp1);
+                ptmp2 = pcj.pmadd(A12, b, ptmp2);
+                ptmp3 = pcj.pmadd(A13, b, ptmp3);
+              }
+            }
+            for (; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+            break;
+          }
+          default:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+            break;
+        }
+        tmp0 += predux(ptmp0);
+        tmp1 += predux(ptmp1);
+        tmp2 += predux(ptmp2);
+        tmp3 += predux(ptmp3);
+      }
+    } // end explicit vectorization
+
+    // process remaining coeffs (or all if no explicit vectorization)
+    // FIXME this loop get vectorized by the compiler !
+    for (Index j=alignedSize; j<depth; ++j)
+    {
+      RhsScalar b = rhs(j, 0);
+      tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+      tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+    }
+    res[i*resIncr]            += alpha*tmp0;
+    res[(i+offset1)*resIncr]  += alpha*tmp1;
+    res[(i+2)*resIncr]        += alpha*tmp2;
+    res[(i+offset3)*resIncr]  += alpha*tmp3;
+  }
+
+  // process remaining first and last rows (at most columnsAtOnce-1)
+  Index end = rows;
+  Index start = rowBound;
+  do
+  {
+    for (Index i=start; i<end; ++i)
+    {
+      EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
+      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
+      // process first unaligned result's coeffs
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=0; j<alignedStart; ++j)
+        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+
+      if (alignedSize>alignedStart)
+      {
+        // process aligned rhs coeffs
+        if (lhs0.template aligned<LhsPacket>(alignedStart))
+          for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+            ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+        else
+          for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+            ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+        tmp0 += predux(ptmp0);
+      }
+
+      // process remaining scalars
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=alignedSize; j<depth; ++j)
+        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+      res[i*resIncr] += alpha*tmp0;
+    }
+    if (skipRows)
+    {
+      start = 0;
+      end = skipRows;
+      skipRows = 0;
+    }
+    else
+      break;
+  } while(Vectorizable);
+
+  #undef _EIGEN_ACCUMULATE_PACKETS
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_H
diff --git a/third-party/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/third-party/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
new file mode 100644
index 00000000..6e36c2b3
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   General matrix-vector product functionality based on ?GEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
+#define EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements general matrix-vector multiplication using BLAS
+* gemv function via partial specialization of
+* general_matrix_vector_product::run(..) method for float, double,
+* std::complex<float> and std::complex<double> types
+**********************************************************************/
+
+// gemv specialization
+
+template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
+struct general_matrix_vector_product_gemv;
+
+#define EIGEN_BLAS_GEMV_SPECIALIZE(Scalar) \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
+static void run( \
+  Index rows, Index cols, \
+  const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
+  const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+  if (ConjugateLhs) { \
+    general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
+      rows, cols, lhs, rhs, res, resIncr, alpha); \
+  } else { \
+    general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
+  } \
+} \
+}; \
+template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
+static void run( \
+  Index rows, Index cols, \
+  const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
+  const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
+  Scalar* res, Index resIncr, Scalar alpha) \
+{ \
+    general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
+      rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
+} \
+}; \
+
+EIGEN_BLAS_GEMV_SPECIALIZE(double)
+EIGEN_BLAS_GEMV_SPECIALIZE(float)
+EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
+
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
+template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
+struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
+\
+static void run( \
+  Index rows, Index cols, \
+  const EIGTYPE* lhs, Index lhsStride, \
+  const EIGTYPE* rhs, Index rhsIncr, \
+  EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
+{ \
+  BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
+            lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
+  const EIGTYPE beta(1); \
+  const EIGTYPE *x_ptr; \
+  char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
+  if (LhsStorageOrder==RowMajor) { \
+    m = convert_index<BlasIndex>(cols); \
+    n = convert_index<BlasIndex>(rows); \
+  }\
+  GEMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+    incx=1; \
+  } else x_ptr=rhs; \
+  BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+}\
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
+#else
+EIGEN_BLAS_GEMV_SPECIALIZATION(double,   double, dgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float,    float,  sgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float,  cgemv_)
+#endif
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERAL_MATRIX_VECTOR_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/Parallelizer.h b/third-party/Eigen/src/Core/products/Parallelizer.h
new file mode 100644
index 00000000..a3cc05b7
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/Parallelizer.h
@@ -0,0 +1,166 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARALLELIZER_H
+#define EIGEN_PARALLELIZER_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal */
+inline void manage_multi_threading(Action action, int* v)
+{
+  static int m_maxThreads = -1;
+  EIGEN_UNUSED_VARIABLE(m_maxThreads);
+
+  if(action==SetAction)
+  {
+    eigen_internal_assert(v!=0);
+    m_maxThreads = *v;
+  }
+  else if(action==GetAction)
+  {
+    eigen_internal_assert(v!=0);
+    #ifdef EIGEN_HAS_OPENMP
+    if(m_maxThreads>0)
+      *v = m_maxThreads;
+    else
+      *v = omp_get_max_threads();
+    #else
+    *v = 1;
+    #endif
+  }
+  else
+  {
+    eigen_internal_assert(false);
+  }
+}
+
+}
+
+/** Must be call first when calling Eigen from multiple threads */
+inline void initParallel()
+{
+  int nbt;
+  internal::manage_multi_threading(GetAction, &nbt);
+  std::ptrdiff_t l1, l2, l3;
+  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+}
+
+/** \returns the max number of threads reserved for Eigen
+  * \sa setNbThreads */
+inline int nbThreads()
+{
+  int ret;
+  internal::manage_multi_threading(GetAction, &ret);
+  return ret;
+}
+
+/** Sets the max number of threads reserved for Eigen
+  * \sa nbThreads */
+inline void setNbThreads(int v)
+{
+  internal::manage_multi_threading(SetAction, &v);
+}
+
+namespace internal {
+
+template<typename Index> struct GemmParallelInfo
+{
+  GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}
+
+  Index volatile sync;
+  int volatile users;
+
+  Index lhs_start;
+  Index lhs_length;
+};
+
+template<bool Condition, typename Functor, typename Index>
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
+{
+  // TODO when EIGEN_USE_BLAS is defined,
+  // we should still enable OMP for other scalar types
+#if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
+  // FIXME the transpose variable is only needed to properly split
+  // the matrix product when multithreading is enabled. This is a temporary
+  // fix to support row-major destination matrices. This whole
+  // parallelizer mechanism has to be redisigned anyway.
+  EIGEN_UNUSED_VARIABLE(depth);
+  EIGEN_UNUSED_VARIABLE(transpose);
+  func(0,rows, 0,cols);
+#else
+
+  // Dynamically check whether we should enable or disable OpenMP.
+  // The conditions are:
+  // - the max number of threads we can create is greater than 1
+  // - we are not already in a parallel code
+  // - the sizes are large enough
+
+  // compute the maximal number of threads from the size of the product:
+  // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
+  Index size = transpose ? rows : cols;
+  Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
+
+  // compute the maximal number of threads from the total amount of work:
+  double work = static_cast<double>(rows) * static_cast<double>(cols) *
+      static_cast<double>(depth);
+  double kMinTaskSize = 50000;  // FIXME improve this heuristic.
+  pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
+
+  // compute the number of threads we are going to use
+  Index threads = std::min<Index>(nbThreads(), pb_max_threads);
+
+  // if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
+  // then abort multi-threading
+  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
+  if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
+    return func(0,rows, 0,cols);
+
+  Eigen::initParallel();
+  func.initParallelSession(threads);
+
+  if(transpose)
+    std::swap(rows,cols);
+
+  ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
+
+  #pragma omp parallel num_threads(threads)
+  {
+    Index i = omp_get_thread_num();
+    // Note that the actual number of threads might be lower than the number of request ones.
+    Index actual_threads = omp_get_num_threads();
+
+    Index blockCols = (cols / actual_threads) & ~Index(0x3);
+    Index blockRows = (rows / actual_threads);
+    blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
+
+    Index r0 = i*blockRows;
+    Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
+
+    Index c0 = i*blockCols;
+    Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
+
+    info[i].lhs_start = r0;
+    info[i].lhs_length = actualBlockRows;
+
+    if(transpose)
+      func(c0, actualBlockCols, 0, rows, info);
+    else
+      func(0, rows, c0, actualBlockCols, info);
+  }
+#endif
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARALLELIZER_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
new file mode 100644
index 00000000..04c93348
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -0,0 +1,527 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// pack a selfadjoint block diagonal for use with the gebp_kernel
+template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
+struct symm_pack_lhs
+{
+  template<int BlockRows> inline
+  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
+  {
+    // normal copy
+    for(Index k=0; k<i; k++)
+      for(Index w=0; w<BlockRows; w++)
+        blockA[count++] = lhs(i+w,k);           // normal
+    // symmetric copy
+    Index h = 0;
+    for(Index k=i; k<i+BlockRows; k++)
+    {
+      for(Index w=0; w<h; w++)
+        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
+
+      blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
+
+      for(Index w=h+1; w<BlockRows; w++)
+        blockA[count++] = lhs(i+w, k);          // normal
+      ++h;
+    }
+    // transposed copy
+    for(Index k=i+BlockRows; k<cols; k++)
+      for(Index w=0; w<BlockRows; w++)
+        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
+  }
+  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
+  {
+    enum { PacketSize = packet_traits<Scalar>::size };
+    const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
+    Index count = 0;
+    //Index peeled_mc3 = (rows/Pack1)*Pack1;
+    
+    const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
+    const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
+    const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
+    
+    if(Pack1>=3*PacketSize)
+      for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
+        pack<3*PacketSize>(blockA, lhs, cols, i, count);
+    
+    if(Pack1>=2*PacketSize)
+      for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
+        pack<2*PacketSize>(blockA, lhs, cols, i, count);
+    
+    if(Pack1>=1*PacketSize)
+      for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
+        pack<1*PacketSize>(blockA, lhs, cols, i, count);
+
+    // do the same with mr==1
+    for(Index i=peeled_mc1; i<rows; i++)
+    {
+      for(Index k=0; k<i; k++)
+        blockA[count++] = lhs(i, k);                   // normal
+
+      blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
+
+      for(Index k=i+1; k<cols; k++)
+        blockA[count++] = numext::conj(lhs(k, i));     // transposed
+    }
+  }
+};
+
+template<typename Scalar, typename Index, int nr, int StorageOrder>
+struct symm_pack_rhs
+{
+  enum { PacketSize = packet_traits<Scalar>::size };
+  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
+  {
+    Index end_k = k2 + rows;
+    Index count = 0;
+    const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
+    Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
+    Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
+
+    // first part: normal case
+    for(Index j2=0; j2<k2; j2+=nr)
+    {
+      for(Index k=k2; k<end_k; k++)
+      {
+        blockB[count+0] = rhs(k,j2+0);
+        blockB[count+1] = rhs(k,j2+1);
+        if (nr>=4)
+        {
+          blockB[count+2] = rhs(k,j2+2);
+          blockB[count+3] = rhs(k,j2+3);
+        }
+        if (nr>=8)
+        {
+          blockB[count+4] = rhs(k,j2+4);
+          blockB[count+5] = rhs(k,j2+5);
+          blockB[count+6] = rhs(k,j2+6);
+          blockB[count+7] = rhs(k,j2+7);
+        }
+        count += nr;
+      }
+    }
+
+    // second part: diagonal block
+    Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
+    if(nr>=8)
+    {
+      for(Index j2=k2; j2<end8; j2+=8)
+      {
+        // again we can split vertically in three different parts (transpose, symmetric, normal)
+        // transpose
+        for(Index k=k2; k<j2; k++)
+        {
+          blockB[count+0] = numext::conj(rhs(j2+0,k));
+          blockB[count+1] = numext::conj(rhs(j2+1,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
+          blockB[count+4] = numext::conj(rhs(j2+4,k));
+          blockB[count+5] = numext::conj(rhs(j2+5,k));
+          blockB[count+6] = numext::conj(rhs(j2+6,k));
+          blockB[count+7] = numext::conj(rhs(j2+7,k));
+          count += 8;
+        }
+        // symmetric
+        Index h = 0;
+        for(Index k=j2; k<j2+8; k++)
+        {
+          // normal
+          for (Index w=0 ; w<h; ++w)
+            blockB[count+w] = rhs(k,j2+w);
+
+          blockB[count+h] = numext::real(rhs(k,k));
+
+          // transpose
+          for (Index w=h+1 ; w<8; ++w)
+            blockB[count+w] = numext::conj(rhs(j2+w,k));
+          count += 8;
+          ++h;
+        }
+        // normal
+        for(Index k=j2+8; k<end_k; k++)
+        {
+          blockB[count+0] = rhs(k,j2+0);
+          blockB[count+1] = rhs(k,j2+1);
+          blockB[count+2] = rhs(k,j2+2);
+          blockB[count+3] = rhs(k,j2+3);
+          blockB[count+4] = rhs(k,j2+4);
+          blockB[count+5] = rhs(k,j2+5);
+          blockB[count+6] = rhs(k,j2+6);
+          blockB[count+7] = rhs(k,j2+7);
+          count += 8;
+        }
+      }
+    }
+    if(nr>=4)
+    {
+      for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
+      {
+        // again we can split vertically in three different parts (transpose, symmetric, normal)
+        // transpose
+        for(Index k=k2; k<j2; k++)
+        {
+          blockB[count+0] = numext::conj(rhs(j2+0,k));
+          blockB[count+1] = numext::conj(rhs(j2+1,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
+          count += 4;
+        }
+        // symmetric
+        Index h = 0;
+        for(Index k=j2; k<j2+4; k++)
+        {
+          // normal
+          for (Index w=0 ; w<h; ++w)
+            blockB[count+w] = rhs(k,j2+w);
+
+          blockB[count+h] = numext::real(rhs(k,k));
+
+          // transpose
+          for (Index w=h+1 ; w<4; ++w)
+            blockB[count+w] = numext::conj(rhs(j2+w,k));
+          count += 4;
+          ++h;
+        }
+        // normal
+        for(Index k=j2+4; k<end_k; k++)
+        {
+          blockB[count+0] = rhs(k,j2+0);
+          blockB[count+1] = rhs(k,j2+1);
+          blockB[count+2] = rhs(k,j2+2);
+          blockB[count+3] = rhs(k,j2+3);
+          count += 4;
+        }
+      }
+    }
+
+    // third part: transposed
+    if(nr>=8)
+    {
+      for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
+      {
+        for(Index k=k2; k<end_k; k++)
+        {
+          blockB[count+0] = numext::conj(rhs(j2+0,k));
+          blockB[count+1] = numext::conj(rhs(j2+1,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
+          blockB[count+4] = numext::conj(rhs(j2+4,k));
+          blockB[count+5] = numext::conj(rhs(j2+5,k));
+          blockB[count+6] = numext::conj(rhs(j2+6,k));
+          blockB[count+7] = numext::conj(rhs(j2+7,k));
+          count += 8;
+        }
+      }
+    }
+    if(nr>=4)
+    {
+      for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
+      {
+        for(Index k=k2; k<end_k; k++)
+        {
+          blockB[count+0] = numext::conj(rhs(j2+0,k));
+          blockB[count+1] = numext::conj(rhs(j2+1,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
+          count += 4;
+        }
+      }
+    }
+
+    // copy the remaining columns one at a time (=> the same with nr==1)
+    for(Index j2=packet_cols4; j2<cols; ++j2)
+    {
+      // transpose
+      Index half = (std::min)(end_k,j2);
+      for(Index k=k2; k<half; k++)
+      {
+        blockB[count] = numext::conj(rhs(j2,k));
+        count += 1;
+      }
+
+      if(half==j2 && half<k2+rows)
+      {
+        blockB[count] = numext::real(rhs(j2,j2));
+        count += 1;
+      }
+      else
+        half--;
+
+      // normal
+      for(Index k=half+1; k<k2+rows; k++)
+      {
+        blockB[count] = rhs(k,j2);
+        count += 1;
+      }
+    }
+  }
+};
+
+/* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
+ * the general matrix matrix product.
+ */
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
+          int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
+          int ResStorageOrder, int ResInnerStride>
+struct product_selfadjoint_matrix;
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
+          int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
+          int ResInnerStride>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
+{
+
+  static EIGEN_STRONG_INLINE void run(
+    Index rows, Index cols,
+    const Scalar* lhs, Index lhsStride,
+    const Scalar* rhs, Index rhsStride,
+    Scalar* res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    product_selfadjoint_matrix<Scalar, Index,
+      EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+      RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
+      EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
+      LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
+      ColMajor,ResInnerStride>
+      ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resIncr, resStride,  alpha, blocking);
+  }
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
+{
+
+  static EIGEN_DONT_INLINE void run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    Index size = rows;
+
+    typedef gebp_traits<Scalar,Scalar> Traits;
+
+    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+    typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
+    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    LhsMapper lhs(_lhs,lhsStride);
+    LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
+    RhsMapper rhs(_rhs,rhsStride);
+    ResMapper res(_res, resStride, resIncr);
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+    // kc must be smaller than mc
+    kc = (std::min)(kc,mc);
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+    symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+    gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
+
+    for(Index k2=0; k2<size; k2+=kc)
+    {
+      const Index actual_kc = (std::min)(k2+kc,size)-k2;
+
+      // we have selected one row panel of rhs and one column panel of lhs
+      // pack rhs's panel into a sequential chunk of memory
+      // and expand each coeff to a constant packet for further reuse
+      pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
+
+      // the select lhs's panel has to be split in three different parts:
+      //  1 - the transposed panel above the diagonal block => transposed packed copy
+      //  2 - the diagonal block => special packed copy
+      //  3 - the panel below the diagonal block => generic packed copy
+      for(Index i2=0; i2<k2; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(i2+mc,k2)-i2;
+        // transposed packed copy
+        pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+      }
+      // the block diagonal
+      {
+        const Index actual_mc = (std::min)(k2+kc,size)-k2;
+        // symmetric packed copy
+        pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
+
+        gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+      }
+
+      for(Index i2=k2+kc; i2<size; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(i2+mc,size)-i2;
+        gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
+          (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+      }
+    }
+  }
+
+// matrix * selfadjoint product
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride>
+struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
+{
+
+  static EIGEN_DONT_INLINE void run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    Index size = cols;
+
+    typedef gebp_traits<Scalar,Scalar> Traits;
+
+    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    LhsMapper lhs(_lhs,lhsStride);
+    ResMapper res(_res,resStride, resIncr);
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+    symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
+
+    for(Index k2=0; k2<size; k2+=kc)
+    {
+      const Index actual_kc = (std::min)(k2+kc,size)-k2;
+
+      pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
+
+      // => GEPP
+      for(Index i2=0; i2<rows; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(i2+mc,rows)-i2;
+        pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
+
+        gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
+      }
+    }
+  }
+
+} // end namespace internal
+
+/***************************************************************************
+* Wrapper to product_selfadjoint_matrix
+***************************************************************************/
+
+namespace internal {
+  
+template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  
+  enum {
+    LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
+    LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
+    RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
+    RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
+  };
+  
+  template<typename Dest>
+  static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
+  {
+    eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
+
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
+
+    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
+
+    BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
+
+    internal::product_selfadjoint_matrix<Scalar, Index,
+      EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
+      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
+      EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
+      NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
+      internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor,
+      Dest::InnerStrideAtCompileTime>
+      ::run(
+        lhs.rows(), rhs.cols(),                 // sizes
+        &lhs.coeffRef(0,0), lhs.outerStride(),  // lhs info
+        &rhs.coeffRef(0,0), rhs.outerStride(),  // rhs info
+        &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(),  // result info
+        actualAlpha, blocking                   // alpha
+      );
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
new file mode 100644
index 00000000..61396dbd
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -0,0 +1,295 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+
+/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
+{\
+\
+  static void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resIncr, Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+    eigen_assert(resIncr == 1); \
+    char side='L', uplo='L'; \
+    BlasIndex m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    EIGTYPE beta(1); \
+    MatrixX##EIGPREFIX b_tmp; \
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = convert_index<BlasIndex>(rows);  \
+    n = convert_index<BlasIndex>(cols);  \
+\
+/* Set lda, ldb, ldc */ \
+    lda = convert_index<BlasIndex>(lhsStride); \
+    ldb = convert_index<BlasIndex>(rhsStride); \
+    ldc = convert_index<BlasIndex>(resStride); \
+\
+/* Set a, b, c */ \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+    a = _lhs; \
+\
+    if (RhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = rhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+    } else b = _rhs; \
+\
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
+{\
+  static void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resIncr, Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+    eigen_assert(resIncr == 1); \
+    char side='L', uplo='L'; \
+    BlasIndex m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    EIGTYPE beta(1); \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
+\
+/* Set transpose options */ \
+/* Set m, n, k */ \
+    m = convert_index<BlasIndex>(rows); \
+    n = convert_index<BlasIndex>(cols); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = convert_index<BlasIndex>(lhsStride); \
+    ldb = convert_index<BlasIndex>(rhsStride); \
+    ldc = convert_index<BlasIndex>(resStride); \
+\
+/* Set a, b, c */ \
+    if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
+      a_tmp = lhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+    } else a = _lhs; \
+    if (LhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
+       b = _rhs; } \
+    else { \
+      if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.conjugate(); \
+      } else \
+      if (ConjugateRhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
+        b_tmp = rhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+    } \
+\
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+\
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
+#endif
+
+/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
+
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
+{\
+\
+  static void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resIncr, Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+    eigen_assert(resIncr == 1); \
+    char side='R', uplo='L'; \
+    BlasIndex m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    EIGTYPE beta(1); \
+    MatrixX##EIGPREFIX b_tmp; \
+\
+/* Set m, n, k */ \
+    m = convert_index<BlasIndex>(rows);  \
+    n = convert_index<BlasIndex>(cols);  \
+\
+/* Set lda, ldb, ldc */ \
+    lda = convert_index<BlasIndex>(rhsStride); \
+    ldb = convert_index<BlasIndex>(lhsStride); \
+    ldc = convert_index<BlasIndex>(resStride); \
+\
+/* Set a, b, c */ \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+    a = _rhs; \
+\
+    if (LhsStorageOrder==RowMajor) { \
+      Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
+      b_tmp = lhs.adjoint(); \
+      b = b_tmp.data(); \
+      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+    } else b = _lhs; \
+\
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+\
+  } \
+};
+
+
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
+{\
+  static void run( \
+    Index rows, Index cols, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resIncr, Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
+  { \
+    EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+    eigen_assert(resIncr == 1); \
+    char side='R', uplo='L'; \
+    BlasIndex m, n, lda, ldb, ldc; \
+    const EIGTYPE *a, *b; \
+    EIGTYPE beta(1); \
+    MatrixX##EIGPREFIX b_tmp; \
+    Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
+\
+/* Set m, n, k */ \
+    m = convert_index<BlasIndex>(rows); \
+    n = convert_index<BlasIndex>(cols); \
+\
+/* Set lda, ldb, ldc */ \
+    lda = convert_index<BlasIndex>(rhsStride); \
+    ldb = convert_index<BlasIndex>(lhsStride); \
+    ldc = convert_index<BlasIndex>(resStride); \
+\
+/* Set a, b, c */ \
+    if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
+      Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
+      a_tmp = rhs.conjugate(); \
+      a = a_tmp.data(); \
+      lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+    } else a = _rhs; \
+    if (RhsStorageOrder==RowMajor) uplo='U'; \
+\
+    if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
+       b = _lhs; } \
+    else { \
+      if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.conjugate(); \
+      } else \
+      if (ConjugateLhs) { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.adjoint(); \
+      } else { \
+        Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
+        b_tmp = lhs.transpose(); \
+      } \
+      b = b_tmp.data(); \
+      ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+    } \
+\
+    BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
+#endif
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointMatrixVector.h b/third-party/Eigen/src/Core/products/SelfadjointMatrixVector.h
new file mode 100644
index 00000000..3fd180e6
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -0,0 +1,260 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/* Optimized selfadjoint matrix * vector product:
+ * This algorithm processes 2 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 2 and to reduce
+ * the instruction dependency.
+ */
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
+struct selfadjoint_matrix_vector_product;
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+struct selfadjoint_matrix_vector_product
+
+{
+static EIGEN_DONT_INLINE void run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar*  rhs,
+  Scalar* res,
+  Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar*  rhs,
+  Scalar* res,
+  Scalar alpha)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
+
+  enum {
+    IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
+    IsLower = UpLo == Lower ? 1 : 0,
+    FirstTriangular = IsRowMajor == IsLower
+  };
+
+  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
+  conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
+  conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;
+
+  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
+  conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
+
+  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
+
+
+  Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
+  if (FirstTriangular)
+    bound = size - bound;
+
+  for (Index j=FirstTriangular ? bound : 0;
+       j<(FirstTriangular ? size : bound);j+=2)
+  {
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+
+    Scalar t0 = cjAlpha * rhs[j];
+    Packet ptmp0 = pset1<Packet>(t0);
+    Scalar t1 = cjAlpha * rhs[j+1];
+    Packet ptmp1 = pset1<Packet>(t1);
+
+    Scalar t2(0);
+    Packet ptmp2 = pset1<Packet>(t2);
+    Scalar t3(0);
+    Packet ptmp3 = pset1<Packet>(t3);
+
+    Index starti = FirstTriangular ? 0 : j+2;
+    Index endi   = FirstTriangular ? j : size;
+    Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
+    Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
+
+    res[j]   += cjd.pmul(numext::real(A0[j]), t0);
+    res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
+    if(FirstTriangular)
+    {
+      res[j]   += cj0.pmul(A1[j],   t1);
+      t3       += cj1.pmul(A1[j],   rhs[j]);
+    }
+    else
+    {
+      res[j+1] += cj0.pmul(A0[j+1],t0);
+      t2 += cj1.pmul(A0[j+1], rhs[j+1]);
+    }
+
+    for (Index i=starti; i<alignedStart; ++i)
+    {
+      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
+      t2 += cj1.pmul(A0[i], rhs[i]);
+      t3 += cj1.pmul(A1[i], rhs[i]);
+    }
+    // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
+    // gcc 4.2 does this optimization automatically.
+    const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;
+    const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;
+    const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
+          Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
+    for (Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+    {
+      Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;
+      Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;
+      Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
+      Packet Xi  = pload <Packet>(resIt);
+
+      Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
+      ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);
+      ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);
+      pstore(resIt,Xi); resIt += PacketSize;
+    }
+    for (Index i=alignedEnd; i<endi; i++)
+    {
+      res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
+      t2 += cj1.pmul(A0[i], rhs[i]);
+      t3 += cj1.pmul(A1[i], rhs[i]);
+    }
+
+    res[j]   += alpha * (t2 + predux(ptmp2));
+    res[j+1] += alpha * (t3 + predux(ptmp3));
+  }
+  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
+  {
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+
+    Scalar t1 = cjAlpha * rhs[j];
+    Scalar t2(0);
+    res[j] += cjd.pmul(numext::real(A0[j]), t1);
+    for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
+    {
+      res[i] += cj0.pmul(A0[i], t1);
+      t2 += cj1.pmul(A0[i], rhs[i]);
+    }
+    res[j] += alpha * t2;
+  }
+}
+
+} // end namespace internal 
+
+/***************************************************************************
+* Wrapper to product_selfadjoint_vector
+***************************************************************************/
+
+namespace internal {
+
+template<typename Lhs, int LhsMode, typename Rhs>
+struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  
+  typedef internal::blas_traits<Lhs> LhsBlasTraits;
+  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+  typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+  
+  typedef internal::blas_traits<Rhs> RhsBlasTraits;
+  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+  enum { LhsUpLo = LhsMode&(Upper|Lower) };
+
+  template<typename Dest>
+  static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
+  {
+    typedef typename Dest::Scalar ResScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+    
+    eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
+
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
+                               * RhsBlasTraits::extractScalarFactor(a_rhs);
+
+    enum {
+      EvalToDest = (Dest::InnerStrideAtCompileTime==1),
+      UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)
+    };
+    
+    internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
+    internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                  EvalToDest ? dest.data() : static_dest.data());
+                                                  
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
+        UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
+    
+    if(!EvalToDest)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      Index size = dest.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      MappedDest(actualDestPtr, dest.size()) = dest;
+    }
+      
+    if(!UseRhs)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      Index size = rhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
+    }
+      
+      
+    internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+                                                int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
+      (
+        lhs.rows(),                             // size
+        &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
+        actualRhsPtr,                           // rhs info
+        actualDestPtr,                          // result info
+        actualAlpha                             // scale factor
+      );
+    
+    if(!EvalToDest)
+      dest = MappedDest(actualDestPtr, dest.size());
+  }
+};
+
+template<typename Lhs, typename Rhs, int RhsMode>
+struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>
+{
+  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
+  enum { RhsUpLo = RhsMode&(Upper|Lower)  };
+
+  template<typename Dest>
+  static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
+  {
+    // let's simply transpose the product
+    Transpose<Dest> destT(dest);
+    selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
+                             Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/third-party/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
new file mode 100644
index 00000000..1238345e
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -0,0 +1,118 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
+#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements selfadjoint matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// symv/hemv specialization
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
+struct selfadjoint_matrix_vector_product_symv :
+  selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
+
+#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
+static void run( \
+  Index size, const Scalar*  lhs, Index lhsStride, \
+  const Scalar* _rhs, Scalar* res, Scalar alpha) { \
+    enum {\
+      IsColMajor = StorageOrder==ColMajor \
+    }; \
+    if (IsColMajor == ConjugateLhs) {\
+      selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
+        size, lhs, lhsStride, _rhs, res, alpha);  \
+    } else {\
+      selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
+        size, lhs, lhsStride, _rhs, res, alpha);  \
+    }\
+  } \
+}; \
+
+EIGEN_BLAS_SYMV_SPECIALIZE(double)
+EIGEN_BLAS_SYMV_SPECIALIZE(float)
+EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
+
+#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
+template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
+struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
+{ \
+typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
+\
+static void run( \
+Index size, const EIGTYPE*  lhs, Index lhsStride, \
+const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
+{ \
+  enum {\
+    IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
+    IsLower = UpLo == Lower ? 1 : 0 \
+  }; \
+  BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
+  EIGTYPE beta(1); \
+  const EIGTYPE *x_ptr; \
+  char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
+  SYMVVector x_tmp; \
+  if (ConjugateRhs) { \
+    Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
+    x_tmp=map_x.conjugate(); \
+    x_ptr=x_tmp.data(); \
+  } else x_ptr=_rhs; \
+  BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+}\
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8,  chemv)
+#else
+EIGEN_BLAS_SYMV_SPECIALIZATION(double,   double, dsymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float,    float,  ssymv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float,  chemv_)
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointProduct.h b/third-party/Eigen/src/Core/products/SelfadjointProduct.h
new file mode 100644
index 00000000..ef12c98f
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointProduct.h
@@ -0,0 +1,133 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINT_PRODUCT_H
+#define EIGEN_SELFADJOINT_PRODUCT_H
+
+/**********************************************************************
+* This file implements a self adjoint product: C += A A^T updating only
+* half of the selfadjoint matrix C.
+* It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
+**********************************************************************/
+
+namespace Eigen { 
+
+
+template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
+{
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
+  {
+    internal::conj_if<ConjRhs> cj;
+    typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
+    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
+    for (Index i=0; i<size; ++i)
+    {
+      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
+          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+    }
+  }
+};
+
+template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
+{
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
+  {
+    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
+  }
+};
+
+template<typename MatrixType, typename OtherType, int UpLo, bool OtherIsVector = OtherType::IsVectorAtCompileTime>
+struct selfadjoint_product_selector;
+
+template<typename MatrixType, typename OtherType, int UpLo>
+struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
+{
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef internal::blas_traits<OtherType> OtherBlasTraits;
+    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
+    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+
+    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
+    };
+    internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
+      (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
+      
+    if(!UseOtherDirectly)
+      Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
+                            (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
+          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename OtherType, int UpLo>
+struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
+{
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef internal::blas_traits<OtherType> OtherBlasTraits;
+    typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
+    typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
+    typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
+
+    Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
+
+    enum {
+      IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
+      OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
+    };
+
+    Index size = mat.cols();
+    Index depth = actualOther.cols();
+
+    typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
+              MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
+
+    BlockingType blocking(size, size, depth, 1, false);
+
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      Scalar, OtherIsRowMajor ? RowMajor : ColMajor,   OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
+      Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
+      IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
+      ::run(size, depth,
+            &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
+            mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
+  }
+};
+
+// high level API
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename DerivedU>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
+{
+  selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
+
+  return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINT_PRODUCT_H
diff --git a/third-party/Eigen/src/Core/products/SelfadjointRank2Update.h b/third-party/Eigen/src/Core/products/SelfadjointRank2Update.h
new file mode 100644
index 00000000..2ae36411
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -0,0 +1,93 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
+#define EIGEN_SELFADJOINTRANK2UPTADE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
+ * It corresponds to the Level2 syr2 BLAS routine
+ */
+
+template<typename Scalar, typename Index, typename UType, typename VType, int UpLo>
+struct selfadjoint_rank2_update_selector;
+
+template<typename Scalar, typename Index, typename UType, typename VType>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
+{
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
+  {
+    const Index size = u.size();
+    for (Index i=0; i<size; ++i)
+    {
+      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
+                        (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)
+                      + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);
+    }
+  }
+};
+
+template<typename Scalar, typename Index, typename UType, typename VType>
+struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
+{
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
+  {
+    const Index size = u.size();
+    for (Index i=0; i<size; ++i)
+      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
+                        (numext::conj(alpha)  * numext::conj(u.coeff(i))) * v.head(i+1)
+                      + (alpha * numext::conj(v.coeff(i))) * u.head(i+1);
+  }
+};
+
+template<bool Cond, typename T> struct conj_expr_if
+  : conditional<!Cond, const T&,
+      CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
+
+} // end namespace internal
+
+template<typename MatrixType, unsigned int UpLo>
+template<typename DerivedU, typename DerivedV>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
+{
+  typedef internal::blas_traits<DerivedU> UBlasTraits;
+  typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
+  typedef typename internal::remove_all<ActualUType>::type _ActualUType;
+  typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
+
+  typedef internal::blas_traits<DerivedV> VBlasTraits;
+  typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
+  typedef typename internal::remove_all<ActualVType>::type _ActualVType;
+  typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
+
+  // If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
+  // vice versa, and take the complex conjugate of all coefficients and vector entries.
+
+  enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
+  Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
+                             * numext::conj(VBlasTraits::extractScalarFactor(v.derived()));
+  if (IsRowMajor)
+    actualAlpha = numext::conj(actualAlpha);
+
+  typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
+  typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
+  internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
+    (IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
+    ::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
+
+  return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINTRANK2UPTADE_H
diff --git a/third-party/Eigen/src/Core/products/TriangularMatrixMatrix.h b/third-party/Eigen/src/Core/products/TriangularMatrixMatrix.h
new file mode 100644
index 00000000..2fb408d1
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -0,0 +1,472 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
+// struct gemm_pack_lhs_triangular
+// {
+//   Matrix<Scalar,mr,mr,
+//   void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
+//   {
+//     conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+//     const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
+//     int count = 0;
+//     const int peeled_mc = (rows/mr)*mr;
+//     for(int i=0; i<peeled_mc; i+=mr)
+//     {
+//       for(int k=0; k<depth; k++)
+//         for(int w=0; w<mr; w++)
+//           blockA[count++] = cj(lhs(i+w, k));
+//     }
+//     for(int i=peeled_mc; i<rows; i++)
+//     {
+//       for(int k=0; k<depth; k++)
+//         blockA[count++] = cj(lhs(i, k));
+//     }
+//   }
+// };
+
+/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
+ * the general matrix matrix product.
+ */
+template <typename Scalar, typename Index,
+          int Mode, bool LhsIsTriangular,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResStorageOrder, int ResInnerStride,
+          int Version = Specialized>
+struct product_triangular_matrix_matrix;
+
+template <typename Scalar, typename Index,
+          int Mode, bool LhsIsTriangular,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
+                                           LhsStorageOrder,ConjugateLhs,
+                                           RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
+{
+  static EIGEN_STRONG_INLINE void run(
+    Index rows, Index cols, Index depth,
+    const Scalar* lhs, Index lhsStride,
+    const Scalar* rhs, Index rhsStride,
+    Scalar* res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    product_triangular_matrix_matrix<Scalar, Index,
+      (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
+      (!LhsIsTriangular),
+      RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
+      ConjugateRhs,
+      LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
+      ConjugateLhs,
+      ColMajor, ResInnerStride>
+      ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
+  }
+};
+
+// implements col-major += alpha * op(triangular) * op(general)
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                           LhsStorageOrder,ConjugateLhs,
+                                           RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
+{
+  
+  typedef gebp_traits<Scalar,Scalar> Traits;
+  enum {
+    SmallPanelWidth   = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    IsLower = (Mode&Lower) == Lower,
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+  };
+
+  static EIGEN_DONT_INLINE void run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    // strip zeros
+    Index diagSize  = (std::min)(_rows,_depth);
+    Index rows      = IsLower ? _rows : diagSize;
+    Index depth     = IsLower ? diagSize : _depth;
+    Index cols      = _cols;
+    
+    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    LhsMapper lhs(_lhs,lhsStride);
+    RhsMapper rhs(_rhs,rhsStride);
+    ResMapper res(_res, resStride, resIncr);
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+    // The small panel size must not be larger than blocking size.
+    // Usually this should never be the case because SmallPanelWidth^2 is very small
+    // compared to L2 cache size, but let's be safe:
+    Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    // To work around an "error: member reference base type 'Matrix<...>
+    // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
+    // not a structure or union" compilation error in nvcc (tested V8.0.61),
+    // create a dummy internal::constructor_without_unaligned_array_assert
+    // object to pass to the Matrix constructor.
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
+    triangularBuffer.setZero();
+    if((Mode&ZeroDiag)==ZeroDiag)
+      triangularBuffer.diagonal().setZero();
+    else
+      triangularBuffer.diagonal().setOnes();
+
+    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+
+    for(Index k2=IsLower ? depth : 0;
+        IsLower ? k2>0 : k2<depth;
+        IsLower ? k2-=kc : k2+=kc)
+    {
+      Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
+      Index actual_k2 = IsLower ? k2-actual_kc : k2;
+
+      // align blocks with the end of the triangular part for trapezoidal lhs
+      if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
+      {
+        actual_kc = rows-k2;
+        k2 = k2+actual_kc-kc;
+      }
+
+      pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
+
+      // the selected lhs's panel has to be split in three different parts:
+      //  1 - the part which is zero => skip it
+      //  2 - the diagonal block => special kernel
+      //  3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
+
+      // the block diagonal, if any:
+      if(IsLower || actual_k2<rows)
+      {
+        // for each small vertical panels of lhs
+        for (Index k1=0; k1<actual_kc; k1+=panelWidth)
+        {
+          Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
+          Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
+          Index startBlock   = actual_k2+k1;
+          Index blockBOffset = k1;
+
+          // => GEBP with the micro triangular block
+          // The trick is to pack this micro block while filling the opposite triangular part with zeros.
+          // To this end we do an extra triangular copy to a small temporary buffer
+          for (Index k=0;k<actualPanelWidth;++k)
+          {
+            if (SetDiag)
+              triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
+            for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
+              triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
+          }
+          pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
+
+          gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
+                      actualPanelWidth, actualPanelWidth, cols, alpha,
+                      actualPanelWidth, actual_kc, 0, blockBOffset);
+
+          // GEBP with remaining micro panel
+          if (lengthTarget>0)
+          {
+            Index startTarget  = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
+
+            pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
+
+            gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
+                        lengthTarget, actualPanelWidth, cols, alpha,
+                        actualPanelWidth, actual_kc, 0, blockBOffset);
+          }
+        }
+      }
+      // the part below (lower case) or above (upper case) the diagonal => GEPP
+      {
+        Index start = IsLower ? k2 : 0;
+        Index end   = IsLower ? rows : (std::min)(actual_k2,rows);
+        for(Index i2=start; i2<end; i2+=mc)
+        {
+          const Index actual_mc = (std::min)(i2+mc,end)-i2;
+          gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
+            (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
+
+          gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
+                      actual_kc, cols, alpha, -1, -1, 0, 0);
+        }
+      }
+    }
+  }
+
+// implements col-major += alpha * op(general) * op(triangular)
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride, int Version>
+struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                        LhsStorageOrder,ConjugateLhs,
+                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
+{
+  typedef gebp_traits<Scalar,Scalar> Traits;
+  enum {
+    SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+    IsLower = (Mode&Lower) == Lower,
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
+  };
+
+  static EIGEN_DONT_INLINE void run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResInnerStride, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* _res,       Index resIncr, Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
+  {
+    const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
+    // strip zeros
+    Index diagSize  = (std::min)(_cols,_depth);
+    Index rows      = _rows;
+    Index depth     = IsLower ? _depth : diagSize;
+    Index cols      = IsLower ? diagSize : _cols;
+    
+    typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
+    typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
+    typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
+    LhsMapper lhs(_lhs,lhsStride);
+    RhsMapper rhs(_rhs,rhsStride);
+    ResMapper res(_res, resStride, resIncr);
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    internal::constructor_without_unaligned_array_assert a;
+    Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
+    triangularBuffer.setZero();
+    if((Mode&ZeroDiag)==ZeroDiag)
+      triangularBuffer.diagonal().setZero();
+    else
+      triangularBuffer.diagonal().setOnes();
+
+    gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
+    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
+
+    for(Index k2=IsLower ? 0 : depth;
+        IsLower ? k2<depth  : k2>0;
+        IsLower ? k2+=kc   : k2-=kc)
+    {
+      Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
+      Index actual_k2 = IsLower ? k2 : k2-actual_kc;
+
+      // align blocks with the end of the triangular part for trapezoidal rhs
+      if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
+      {
+        actual_kc = cols-k2;
+        k2 = actual_k2 + actual_kc - kc;
+      }
+
+      // remaining size
+      Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
+      // size of the triangular part
+      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
+
+      Scalar* geb = blockB+ts*ts;
+      geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
+
+      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
+
+      // pack the triangular part of the rhs padding the unrolled blocks with zeros
+      if(ts>0)
+      {
+        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+        {
+          Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+          Index actual_j2 = actual_k2 + j2;
+          Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+          Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
+          // general part
+          pack_rhs_panel(blockB+j2*actual_kc,
+                         rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
+                         panelLength, actualPanelWidth,
+                         actual_kc, panelOffset);
+
+          // append the triangular part via a temporary buffer
+          for (Index j=0;j<actualPanelWidth;++j)
+          {
+            if (SetDiag)
+              triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
+            for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
+              triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
+          }
+
+          pack_rhs_panel(blockB+j2*actual_kc,
+                         RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
+                         actualPanelWidth, actualPanelWidth,
+                         actual_kc, j2);
+        }
+      }
+
+      for (Index i2=0; i2<rows; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(mc,rows-i2);
+        pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
+
+        // triangular kernel
+        if(ts>0)
+        {
+          for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+          {
+            Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+            Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
+            Index blockOffset = IsLower ? j2 : 0;
+
+            gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
+                        blockA, blockB+j2*actual_kc,
+                        actual_mc, panelLength, actualPanelWidth,
+                        alpha,
+                        actual_kc, actual_kc,  // strides
+                        blockOffset, blockOffset);// offsets
+          }
+        }
+        gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
+                    blockA, geb, actual_mc, actual_kc, rs,
+                    alpha,
+                    -1, -1, 0, 0);
+      }
+    }
+  }
+
+/***************************************************************************
+* Wrapper to product_triangular_matrix_matrix
+***************************************************************************/
+
+} // end namespace internal
+
+namespace internal {
+template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
+struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
+{
+  template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
+  {
+    typedef typename Lhs::Scalar  LhsScalar;
+    typedef typename Rhs::Scalar  RhsScalar;
+    typedef typename Dest::Scalar Scalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+    
+    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
+
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
+    Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
+
+    typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
+              Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
+
+    enum { IsLower = (Mode&Lower) == Lower };
+    Index stripedRows  = ((!LhsIsTriangular) || (IsLower))  ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
+    Index stripedCols  = ((LhsIsTriangular)  || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
+    Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
+                                         : ((IsLower)  ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
+
+    BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
+
+    internal::product_triangular_matrix_matrix<Scalar, Index,
+      Mode, LhsIsTriangular,
+      (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      (internal::traits<Dest          >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
+      ::run(
+        stripedRows, stripedCols, stripedDepth,   // sizes
+        &lhs.coeffRef(0,0), lhs.outerStride(),    // lhs info
+        &rhs.coeffRef(0,0), rhs.outerStride(),    // rhs info
+        &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(),    // result info
+        actualAlpha, blocking
+      );
+
+    // Apply correction if the diagonal is unit and a scalar factor was nested:
+    if ((Mode&UnitDiag)==UnitDiag)
+    {
+      if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
+      {
+        Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+        dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
+      }
+      else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
+      {
+        Index diagSize = (std::min)(rhs.rows(),rhs.cols());
+        dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
diff --git a/third-party/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/third-party/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
new file mode 100644
index 00000000..a98d12e4
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -0,0 +1,317 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+
+template <typename Scalar, typename Index,
+          int Mode, bool LhsIsTriangular,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs,
+          int ResStorageOrder>
+struct product_triangular_matrix_matrix_trmm :
+       product_triangular_matrix_matrix<Scalar,Index,Mode,
+          LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
+          RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};
+
+
+// try to go to BLAS specialization
+#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
+           LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
+  static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
+    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
+      EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
+      eigen_assert(resIncr == 1); \
+      product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
+        LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
+        RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
+          _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
+  } \
+};
+
+EIGEN_BLAS_TRMM_SPECIALIZE(double, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(double, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(float, false)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
+EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
+
+// implements col-major += alpha * op(triangular) * op(general)
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
+  }; \
+\
+  static void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+  { \
+   Index diagSize  = (std::min)(_rows,_depth); \
+   Index rows      = IsLower ? _rows : diagSize; \
+   Index depth     = IsLower ? diagSize : _depth; \
+   Index cols      = _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
+   if (rows != depth) { \
+\
+     /* FIXME handle mkl_domain_get_max_threads */ \
+     /*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
+\
+     if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
+     /* Most likely no benefit to call TRMM or GEMM from BLAS */ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
+     /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+       MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
+       BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
+       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \
+\
+     /*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'L', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   BlasIndex m, n, lda, ldb; \
+\
+/* Set m, n */ \
+   m = convert_index<BlasIndex>(diagSize); \
+   n = convert_index<BlasIndex>(cols); \
+\
+/* Set trans */ \
+   transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
+   b = b_tmp.data(); \
+   ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixLhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+   } else { \
+     a = _lhs; \
+     lda = convert_index<BlasIndex>(lhsStride); \
+   } \
+   /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm)
+EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
+#endif
+
+// implements col-major += alpha * op(general) * op(triangular)
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
+template <typename Index, int Mode, \
+          int LhsStorageOrder, bool ConjugateLhs, \
+          int RhsStorageOrder, bool ConjugateRhs> \
+struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
+         LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper, \
+    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
+  }; \
+\
+  static void run( \
+    Index _rows, Index _cols, Index _depth, \
+    const EIGTYPE* _lhs, Index lhsStride, \
+    const EIGTYPE* _rhs, Index rhsStride, \
+    EIGTYPE* res,        Index resStride, \
+    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+  { \
+   Index diagSize  = (std::min)(_cols,_depth); \
+   Index rows      = _rows; \
+   Index depth     = IsLower ? _depth : diagSize; \
+   Index cols      = IsLower ? diagSize : _cols; \
+\
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
+\
+/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
+   if (cols != depth) { \
+\
+     int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \
+\
+     if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
+     /* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
+       product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
+       LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
+       /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
+     } else { \
+     /* Make sense to call GEMM */ \
+       Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+       MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
+       BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
+       general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
+       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \
+\
+     /*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
+     } \
+     return; \
+   } \
+   char side = 'R', transa, uplo, diag = 'N'; \
+   EIGTYPE *b; \
+   const EIGTYPE *a; \
+   BlasIndex m, n, lda, ldb; \
+\
+/* Set m, n */ \
+   m = convert_index<BlasIndex>(rows); \
+   n = convert_index<BlasIndex>(diagSize); \
+\
+/* Set trans */ \
+   transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
+\
+/* Set b, ldb */ \
+   Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
+   MatrixX##EIGPREFIX b_tmp; \
+\
+   if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
+   b = b_tmp.data(); \
+   ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
+\
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
+   MatrixRhs a_tmp; \
+\
+   if ((conjA!=0) || (SetDiag==0)) { \
+     if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
+     if (IsZeroDiag) \
+       a_tmp.diagonal().setZero(); \
+     else if (IsUnitDiag) \
+       a_tmp.diagonal().setOnes();\
+     a = a_tmp.data(); \
+     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+   } else { \
+     a = _rhs; \
+     lda = convert_index<BlasIndex>(rhsStride); \
+   } \
+   /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
+/* call ?trmm*/ \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+\
+/* Add op(a_triangular)*b into res*/ \
+   Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
+   res_tmp=res_tmp+b_tmp; \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm)
+EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
+#endif
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/TriangularMatrixVector.h b/third-party/Eigen/src/Core/products/TriangularMatrixVector.h
new file mode 100644
index 00000000..76bfa159
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -0,0 +1,350 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
+#define EIGEN_TRIANGULARMATRIXVECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
+struct triangular_matrix_vector_product;
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  enum {
+    IsLower = ((Mode&Lower)==Lower),
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+  };
+  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha)
+  {
+    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index size = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
+    Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
+
+    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
+    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
+    typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
+
+    typedef Map<const Matrix<RhsScalar,Dynamic,1>, 0, InnerStride<> > RhsMap;
+    const RhsMap rhs(_rhs,cols,InnerStride<>(rhsIncr));
+    typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
+
+    typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
+    ResMap res(_res,rows);
+
+    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+
+    for (Index pi=0; pi<size; pi+=PanelWidth)
+    {
+      Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
+      for (Index k=0; k<actualPanelWidth; ++k)
+      {
+        Index i = pi + k;
+        Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
+        Index r = IsLower ? actualPanelWidth-k : k+1;
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+          res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
+        if (HasUnitDiag)
+          res.coeffRef(i) += alpha * cjRhs.coeff(i);
+      }
+      Index r = IsLower ? rows - pi - actualPanelWidth : pi;
+      if (r>0)
+      {
+        Index s = IsLower ? pi+actualPanelWidth : 0;
+        general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
+            r, actualPanelWidth,
+            LhsMapper(&lhs.coeffRef(s,pi), lhsStride),
+            RhsMapper(&rhs.coeffRef(pi), rhsIncr),
+            &res.coeffRef(s), resIncr, alpha);
+      }
+    }
+    if((!IsLower) && cols>size)
+    {
+      general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
+          rows, cols-size,
+          LhsMapper(&lhs.coeffRef(0,size), lhsStride),
+          RhsMapper(&rhs.coeffRef(size), rhsIncr),
+          _res, resIncr, alpha);
+    }
+  }
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  enum {
+    IsLower = ((Mode&Lower)==Lower),
+    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
+    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
+  };
+  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
+  {
+    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    Index diagSize = (std::min)(_rows,_cols);
+    Index rows = IsLower ? _rows : diagSize;
+    Index cols = IsLower ? diagSize : _cols;
+
+    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
+    const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
+    typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
+
+    typedef Map<const Matrix<RhsScalar,Dynamic,1> > RhsMap;
+    const RhsMap rhs(_rhs,cols);
+    typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
+
+    typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
+    ResMap res(_res,rows,InnerStride<>(resIncr));
+
+    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
+
+    for (Index pi=0; pi<diagSize; pi+=PanelWidth)
+    {
+      Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
+      for (Index k=0; k<actualPanelWidth; ++k)
+      {
+        Index i = pi + k;
+        Index s = IsLower ? pi  : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
+        Index r = IsLower ? k+1 : actualPanelWidth-k;
+        if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
+          res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
+        if (HasUnitDiag)
+          res.coeffRef(i) += alpha * cjRhs.coeff(i);
+      }
+      Index r = IsLower ? pi : cols - pi - actualPanelWidth;
+      if (r>0)
+      {
+        Index s = IsLower ? 0 : pi + actualPanelWidth;
+        general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
+            actualPanelWidth, r,
+            LhsMapper(&lhs.coeffRef(pi,s), lhsStride),
+            RhsMapper(&rhs.coeffRef(s), rhsIncr),
+            &res.coeffRef(pi), resIncr, alpha);
+      }
+    }
+    if(IsLower && rows>diagSize)
+    {
+      general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
+            rows-diagSize, cols,
+            LhsMapper(&lhs.coeffRef(diagSize,0), lhsStride),
+            RhsMapper(&rhs.coeffRef(0), rhsIncr),
+            &res.coeffRef(diagSize), resIncr, alpha);
+    }
+  }
+
+/***************************************************************************
+* Wrapper to product_triangular_vector
+***************************************************************************/
+
+template<int Mode,int StorageOrder>
+struct trmv_selector;
+
+} // end namespace internal
+
+namespace internal {
+
+template<int Mode, typename Lhs, typename Rhs>
+struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true>
+{
+  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
+  {
+    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
+  
+    internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha);
+  }
+};
+
+template<int Mode, typename Lhs, typename Rhs>
+struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false>
+{
+  template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
+  {
+    eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
+
+    Transpose<Dest> dstT(dst);
+    internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),
+                            (int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>
+            ::run(rhs.transpose(),lhs.transpose(), dstT, alpha);
+  }
+};
+
+} // end namespace internal
+
+namespace internal {
+
+// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
+  
+template<int Mode> struct trmv_selector<Mode,ColMajor>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    typedef typename Lhs::Scalar      LhsScalar;
+    typedef typename Rhs::Scalar      RhsScalar;
+    typedef typename Dest::Scalar     ResScalar;
+    typedef typename Dest::RealScalar RealScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    
+    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+
+    typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
+
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
+      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+      MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
+    };
+
+    gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+
+    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
+
+    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                  evalToDest ? dest.data() : static_dest.data());
+
+    if(!evalToDest)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      Index size = dest.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      if(!alphaIsCompatible)
+      {
+        MappedDest(actualDestPtr, dest.size()).setZero();
+        compatibleAlpha = RhsScalar(1);
+      }
+      else
+        MappedDest(actualDestPtr, dest.size()) = dest;
+    }
+
+    internal::triangular_matrix_vector_product
+      <Index,Mode,
+       LhsScalar, LhsBlasTraits::NeedToConjugate,
+       RhsScalar, RhsBlasTraits::NeedToConjugate,
+       ColMajor>
+      ::run(actualLhs.rows(),actualLhs.cols(),
+            actualLhs.data(),actualLhs.outerStride(),
+            actualRhs.data(),actualRhs.innerStride(),
+            actualDestPtr,1,compatibleAlpha);
+
+    if (!evalToDest)
+    {
+      if(!alphaIsCompatible)
+        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
+      else
+        dest = MappedDest(actualDestPtr, dest.size());
+    }
+
+    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    {
+      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+    }
+  }
+};
+
+template<int Mode> struct trmv_selector<Mode,RowMajor>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    typedef typename Lhs::Scalar      LhsScalar;
+    typedef typename Rhs::Scalar      RhsScalar;
+    typedef typename Dest::Scalar     ResScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+    RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+    ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
+
+    enum {
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+    };
+
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+
+    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+
+    if(!DirectlyUseRhs)
+    {
+      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      Index size = actualRhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      #endif
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    }
+
+    internal::triangular_matrix_vector_product
+      <Index,Mode,
+       LhsScalar, LhsBlasTraits::NeedToConjugate,
+       RhsScalar, RhsBlasTraits::NeedToConjugate,
+       RowMajor>
+      ::run(actualLhs.rows(),actualLhs.cols(),
+            actualLhs.data(),actualLhs.outerStride(),
+            actualRhsPtr,1,
+            dest.data(),dest.innerStride(),
+            actualAlpha);
+
+    if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+    {
+      Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+      dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+    }
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULARMATRIXVECTOR_H
diff --git a/third-party/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/third-party/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
new file mode 100644
index 00000000..3d47a2b9
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -0,0 +1,255 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Triangular matrix-vector product functionality based on ?TRMV.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
+#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************************************************
+* This file implements triangular matrix-vector multiplication using BLAS
+**********************************************************************/
+
+// trmv/hemv specialization
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
+struct triangular_matrix_vector_product_trmv :
+  triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
+
+#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+}; \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
+      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
+        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+  } \
+};
+
+EIGEN_BLAS_TRMV_SPECIALIZE(double)
+EIGEN_BLAS_TRMV_SPECIALIZE(float)
+EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
+EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
+
+// implements col-major: res += alpha * op(triangular) * vector
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+   if (ConjLhs || IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   BlasIndex m, n, lda, incx, incy; \
+   EIGTYPE const *a; \
+   EIGTYPE beta(1); \
+\
+/* Set m, n */ \
+   n = convert_index<BlasIndex>(size); \
+   lda = convert_index<BlasIndex>(lhsStride); \
+   incx = 1; \
+   incy = convert_index<BlasIndex>(resIncr); \
+\
+/* Set uplo, trans and diag*/ \
+   trans = 'N'; \
+   uplo = IsLower ? 'L' : 'U'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size; \
+       m = convert_index<BlasIndex>(rows-size); \
+       n = convert_index<BlasIndex>(size); \
+     } \
+     else { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size*lda; \
+       m = convert_index<BlasIndex>(size); \
+       n = convert_index<BlasIndex>(cols-size); \
+     } \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_CM(double,   double, d,  d, _)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
+EIGEN_BLAS_TRMV_CM(float,    float,  f,  s, _)
+EIGEN_BLAS_TRMV_CM(scomplex, float,  cf, c, _)
+#endif
+
+// implements row-major: res += alpha * op(triangular) * vector
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
+template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
+struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    LowUp = IsLower ? Lower : Upper \
+  }; \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ { \
+   if (IsZeroDiag) { \
+     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
+     return; \
+   }\
+   Index size = (std::min)(_rows,_cols); \
+   Index rows = IsLower ? _rows : size; \
+   Index cols = IsLower ? size : _cols; \
+\
+   typedef VectorX##EIGPREFIX VectorRhs; \
+   EIGTYPE *x, *y;\
+\
+/* Set x*/ \
+   Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
+   VectorRhs x_tmp; \
+   if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+   x = x_tmp.data(); \
+\
+/* Square part handling */\
+\
+   char trans, uplo, diag; \
+   BlasIndex m, n, lda, incx, incy; \
+   EIGTYPE const *a; \
+   EIGTYPE beta(1); \
+\
+/* Set m, n */ \
+   n = convert_index<BlasIndex>(size); \
+   lda = convert_index<BlasIndex>(lhsStride); \
+   incx = 1; \
+   incy = convert_index<BlasIndex>(resIncr); \
+\
+/* Set uplo, trans and diag*/ \
+   trans = ConjLhs ? 'C' : 'T'; \
+   uplo = IsLower ? 'U' : 'L'; \
+   diag = IsUnitDiag ? 'U' : 'N'; \
+\
+/* call ?TRMV*/ \
+   BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+\
+/* Add op(a_tr)rhs into res*/ \
+   BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
+   if (size<(std::max)(rows,cols)) { \
+     if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
+     x = x_tmp.data(); \
+     if (size<rows) { \
+       y = _res + size*resIncr; \
+       a = _lhs + size*lda; \
+       m = convert_index<BlasIndex>(rows-size); \
+       n = convert_index<BlasIndex>(size); \
+     } \
+     else { \
+       x += size; \
+       y = _res; \
+       a = _lhs + size; \
+       m = convert_index<BlasIndex>(size); \
+       n = convert_index<BlasIndex>(cols-size); \
+     } \
+     BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+   } \
+  } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,)
+EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,)
+EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8,  cf, c,)
+#else
+EIGEN_BLAS_TRMV_RM(double,   double, d,  d,_)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
+EIGEN_BLAS_TRMV_RM(float,    float,  f,  s,_)
+EIGEN_BLAS_TRMV_RM(scomplex, float,  cf, c,_)
+#endif
+
+} // end namespase internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/TriangularSolverMatrix.h b/third-party/Eigen/src/Core/products/TriangularSolverMatrix.h
new file mode 100644
index 00000000..e3ed2cd1
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -0,0 +1,335 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
+
+namespace Eigen { 
+
+namespace internal {
+
+// if the rhs is row major, let's transpose the product
+template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
+{
+  static void run(
+    Index size, Index cols,
+    const Scalar*  tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
+  {
+    triangular_solve_matrix<
+      Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
+      (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
+      NumTraits<Scalar>::IsComplex && Conjugate,
+      TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
+      ::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
+  }
+};
+
+/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
+ */
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
+struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
+{
+  static EIGEN_DONT_INLINE void run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
+  {
+    Index cols = otherSize;
+
+    typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
+    typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
+    TriMapper tri(_tri, triStride);
+    OtherMapper other(_other, otherStride, otherIncr);
+
+    typedef gebp_traits<Scalar,Scalar> Traits;
+
+    enum {
+      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      IsLower = (Mode&Lower) == Lower
+    };
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(size,blocking.mc());  // cache block size along the M direction
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*cols;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    conj_if<Conjugate> conj;
+    gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
+    gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
+    gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
+
+    // the goal here is to subdivise the Rhs panels such that we keep some cache
+    // coherence when accessing the rhs elements
+    std::ptrdiff_t l1, l2, l3;
+    manage_caching_sizes(GetAction, &l1, &l2, &l3);
+    Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
+    subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
+
+    for(Index k2=IsLower ? 0 : size;
+        IsLower ? k2<size : k2>0;
+        IsLower ? k2+=kc : k2-=kc)
+    {
+      const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
+
+      // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
+      // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
+      // A11 (the triangular part) and A21 the remaining rectangular part.
+      // Then the high level algorithm is:
+      //  - B = R1                    => general block copy (done during the next step)
+      //  - R1 = A11^-1 B             => tricky part
+      //  - update B from the new R1  => actually this has to be performed continuously during the above step
+      //  - R2 -= A21 * B             => GEPP
+
+      // The tricky part: compute R1 = A11^-1 B while updating B from R1
+      // The idea is to split A11 into multiple small vertical panels.
+      // Each panel can be split into a small triangular part T1k which is processed without optimization,
+      // and the remaining small part T2k which is processed using gebp with appropriate block strides
+      for(Index j2=0; j2<cols; j2+=subcols)
+      {
+        Index actual_cols = (std::min)(cols-j2,subcols);
+        // for each small vertical panels [T1k^T, T2k^T]^T of lhs
+        for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
+        {
+          Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
+          // tr solve
+          for (Index k=0; k<actualPanelWidth; ++k)
+          {
+            // TODO write a small kernel handling this (can be shared with trsv)
+            Index i  = IsLower ? k2+k1+k : k2-k1-k-1;
+            Index rs = actualPanelWidth - k - 1; // remaining size
+            Index s  = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
+                                                 :  IsLower ? i+1 : i-rs;
+
+            Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
+            for (Index j=j2; j<j2+actual_cols; ++j)
+            {
+              if (TriStorageOrder==RowMajor)
+              {
+                Scalar b(0);
+                const Scalar* l = &tri(i,s);
+                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
+                for (Index i3=0; i3<k; ++i3)
+                  b += conj(l[i3]) * r(i3);
+
+                other(i,j) = (other(i,j) - b)*a;
+              }
+              else
+              {
+                Scalar b = (other(i,j) *= a);
+                typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
+                typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
+                for (Index i3=0;i3<rs;++i3)
+                  r(i3) -= b * conj(l(i3));
+              }
+            }
+          }
+
+          Index lengthTarget = actual_kc-k1-actualPanelWidth;
+          Index startBlock   = IsLower ? k2+k1 : k2-k1-actualPanelWidth;
+          Index blockBOffset = IsLower ? k1 : lengthTarget;
+
+          // update the respective rows of B from other
+          pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);
+
+          // GEBP
+          if (lengthTarget>0)
+          {
+            Index startTarget  = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
+
+            pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
+
+            gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
+                        actualPanelWidth, actual_kc, 0, blockBOffset);
+          }
+        }
+      }
+      
+      // R2 -= A21 * B => GEPP
+      {
+        Index start = IsLower ? k2+kc : 0;
+        Index end   = IsLower ? size : k2-kc;
+        for(Index i2=start; i2<end; i2+=mc)
+        {
+          const Index actual_mc = (std::min)(mc,end-i2);
+          if (actual_mc>0)
+          {
+            pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
+
+            gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
+          }
+        }
+      }
+    }
+  }
+
+/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
+ */
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
+{
+  static EIGEN_DONT_INLINE void run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherIncr, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking)
+  {
+    Index rows = otherSize;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
+    typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
+    LhsMapper lhs(_other, otherStride, otherIncr);
+    RhsMapper rhs(_tri, triStride);
+
+    typedef gebp_traits<Scalar,Scalar> Traits;
+    enum {
+      RhsStorageOrder   = TriStorageOrder,
+      SmallPanelWidth   = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
+      IsLower = (Mode&Lower) == Lower
+    };
+
+    Index kc = blocking.kc();                   // cache block size along the K direction
+    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction
+
+    std::size_t sizeA = kc*mc;
+    std::size_t sizeB = kc*size;
+
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
+    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
+
+    conj_if<Conjugate> conj;
+    gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
+    gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
+    gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
+
+    for(Index k2=IsLower ? size : 0;
+        IsLower ? k2>0 : k2<size;
+        IsLower ? k2-=kc : k2+=kc)
+    {
+      const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
+      Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
+
+      Index startPanel = IsLower ? 0 : k2+actual_kc;
+      Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
+      Scalar* geb = blockB+actual_kc*actual_kc;
+
+      if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);
+
+      // triangular packing (we only pack the panels off the diagonal,
+      // neglecting the blocks overlapping the diagonal
+      {
+        for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
+        {
+          Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+          Index actual_j2 = actual_k2 + j2;
+          Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+          Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
+
+          if (panelLength>0)
+          pack_rhs_panel(blockB+j2*actual_kc,
+                         rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
+                         panelLength, actualPanelWidth,
+                         actual_kc, panelOffset);
+        }
+      }
+
+      for(Index i2=0; i2<rows; i2+=mc)
+      {
+        const Index actual_mc = (std::min)(mc,rows-i2);
+
+        // triangular solver kernel
+        {
+          // for each small block of the diagonal (=> vertical panels of rhs)
+          for (Index j2 = IsLower
+                      ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)
+                                                                  : Index(SmallPanelWidth)))
+                      : 0;
+               IsLower ? j2>=0 : j2<actual_kc;
+               IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
+          {
+            Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
+            Index absolute_j2 = actual_k2 + j2;
+            Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
+            Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
+
+            // GEBP
+            if(panelLength>0)
+            {
+              gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
+                          blockA, blockB+j2*actual_kc,
+                          actual_mc, panelLength, actualPanelWidth,
+                          Scalar(-1),
+                          actual_kc, actual_kc, // strides
+                          panelOffset, panelOffset); // offsets
+            }
+
+            // unblocked triangular solve
+            for (Index k=0; k<actualPanelWidth; ++k)
+            {
+              Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
+
+              typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
+              for (Index k3=0; k3<k; ++k3)
+              {
+                Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
+                typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
+                for (Index i=0; i<actual_mc; ++i)
+                  r(i) -= a(i) * b;
+              }
+              if((Mode & UnitDiag)==0)
+              {
+                Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
+                for (Index i=0; i<actual_mc; ++i)
+                  r(i) *= inv_rjj;
+              }
+            }
+
+            // pack the just computed part of lhs to A
+            pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
+                           actualPanelWidth, actual_mc,
+                           actual_kc, j2);
+          }
+        }
+
+        if (rs>0)
+          gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
+                      actual_mc, actual_kc, rs, Scalar(-1),
+                      -1, -1, 0, 0);
+      }
+    }
+  }
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
diff --git a/third-party/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/third-party/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
new file mode 100644
index 00000000..621194ce
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -0,0 +1,167 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to BLAS F77
+ *   Triangular matrix * matrix product functionality based on ?TRMM.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
+#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// implements LeftSide op(triangular)^-1 * general
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+  { \
+   EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
+   eigen_assert(otherIncr == 1); \
+   BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
+   char side = 'L', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   EIGTYPE alpha(1); \
+   ldb = convert_index<BlasIndex>(otherStride);\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+   } else { \
+     a = _tri; \
+     lda = convert_index<BlasIndex>(triStride); \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+ } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm)
+EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
+#else
+EIGEN_BLAS_TRSM_L(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_L(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_L(scomplex, float,  ctrsm_)
+#endif
+
+// implements RightSide general * op(triangular)^-1
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
+template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
+struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
+{ \
+  enum { \
+    IsLower = (Mode&Lower) == Lower, \
+    IsUnitDiag  = (Mode&UnitDiag) ? 1 : 0, \
+    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
+    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
+  }; \
+  static void run( \
+      Index size, Index otherSize, \
+      const EIGTYPE* _tri, Index triStride, \
+      EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
+  { \
+   EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
+   eigen_assert(otherIncr == 1); \
+   BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
+   char side = 'R', uplo, diag='N', transa; \
+   /* Set alpha_ */ \
+   EIGTYPE alpha(1); \
+   ldb = convert_index<BlasIndex>(otherStride);\
+\
+   const EIGTYPE *a; \
+/* Set trans */ \
+   transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
+/* Set uplo */ \
+   uplo = IsLower ? 'L' : 'U'; \
+   if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
+/* Set a, lda */ \
+   typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
+   Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
+   MatrixTri a_tmp; \
+\
+   if (conjA) { \
+     a_tmp = tri.conjugate(); \
+     a = a_tmp.data(); \
+     lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
+   } else { \
+     a = _tri; \
+     lda = convert_index<BlasIndex>(triStride); \
+   } \
+   if (IsUnitDiag) diag='U'; \
+/* call ?trsm*/ \
+   BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+   /*std::cout << "TRMS_L specialization!\n";*/ \
+ } \
+};
+
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm)
+EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm)
+EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8,  ctrsm)
+#else
+EIGEN_BLAS_TRSM_R(double,   double, dtrsm_)
+EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_R(float,    float,  strsm_)
+EIGEN_BLAS_TRSM_R(scomplex, float,  ctrsm_)
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
diff --git a/third-party/Eigen/src/Core/products/TriangularSolverVector.h b/third-party/Eigen/src/Core/products/TriangularSolverVector.h
new file mode 100644
index 00000000..b994759b
--- /dev/null
+++ b/third-party/Eigen/src/Core/products/TriangularSolverVector.h
@@ -0,0 +1,145 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
+#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Conjugate, StorageOrder>
+{
+  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+  {
+    triangular_solve_vector<LhsScalar,RhsScalar,Index,OnTheLeft,
+        ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
+        Conjugate,StorageOrder==RowMajor?ColMajor:RowMajor
+      >::run(size, _lhs, lhsStride, rhs);
+  }
+};
+
+// forward and backward substitution, row-major, rhs is a vector
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, RowMajor>
+{
+  enum {
+    IsLower = ((Mode&Lower)==Lower)
+  };
+  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+  {
+    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
+    const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
+
+    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+
+    typename internal::conditional<
+                          Conjugate,
+                          const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                          const LhsMap&>
+                        ::type cjLhs(lhs);
+    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+    for(Index pi=IsLower ? 0 : size;
+        IsLower ? pi<size : pi>0;
+        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
+    {
+      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+
+      Index r = IsLower ? pi : size - pi; // remaining size
+      if (r > 0)
+      {
+        // let's directly call the low level product function because:
+        // 1 - it is faster to compile
+        // 2 - it is slighlty faster at runtime
+        Index startRow = IsLower ? pi : pi-actualPanelWidth;
+        Index startCol = IsLower ? 0 : pi;
+
+        general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
+          actualPanelWidth, r,
+          LhsMapper(&lhs.coeffRef(startRow,startCol), lhsStride),
+          RhsMapper(rhs + startCol, 1),
+          rhs + startRow, 1,
+          RhsScalar(-1));
+      }
+
+      for(Index k=0; k<actualPanelWidth; ++k)
+      {
+        Index i = IsLower ? pi+k : pi-k-1;
+        Index s = IsLower ? pi   : i+1;
+        if (k>0)
+          rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();
+
+        if(!(Mode & UnitDiag))
+          rhs[i] /= cjLhs(i,i);
+      }
+    }
+  }
+};
+
+// forward and backward substitution, column-major, rhs is a vector
+template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
+struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, ColMajor>
+{
+  enum {
+    IsLower = ((Mode&Lower)==Lower)
+  };
+  static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
+  {
+    typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
+    const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
+    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
+    typename internal::conditional<Conjugate,
+                                   const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
+                                   const LhsMap&
+                                  >::type cjLhs(lhs);
+    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
+
+    for(Index pi=IsLower ? 0 : size;
+        IsLower ? pi<size : pi>0;
+        IsLower ? pi+=PanelWidth : pi-=PanelWidth)
+    {
+      Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
+      Index startBlock = IsLower ? pi : pi-actualPanelWidth;
+      Index endBlock = IsLower ? pi + actualPanelWidth : 0;
+
+      for(Index k=0; k<actualPanelWidth; ++k)
+      {
+        Index i = IsLower ? pi+k : pi-k-1;
+        if(!(Mode & UnitDiag))
+          rhs[i] /= cjLhs.coeff(i,i);
+
+        Index r = actualPanelWidth - k - 1; // remaining size
+        Index s = IsLower ? i+1 : i-r;
+        if (r>0)
+          Map<Matrix<RhsScalar,Dynamic,1> >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r);
+      }
+      Index r = IsLower ? size - endBlock : startBlock; // remaining size
+      if (r > 0)
+      {
+        // let's directly call the low level product function because:
+        // 1 - it is faster to compile
+        // 2 - it is slighlty faster at runtime
+        general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
+            r, actualPanelWidth,
+            LhsMapper(&lhs.coeffRef(endBlock,startBlock), lhsStride),
+            RhsMapper(rhs+startBlock, 1),
+            rhs+endBlock, 1, RhsScalar(-1));
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H
diff --git a/third-party/Eigen/src/Core/util/BlasUtil.h b/third-party/Eigen/src/Core/util/BlasUtil.h
new file mode 100644
index 00000000..3dff9bc9
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/BlasUtil.h
@@ -0,0 +1,499 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BLASUTIL_H
+#define EIGEN_BLASUTIL_H
+
+// This file contains many lightweight helper classes used to
+// implement and control fast level 2 and level 3 BLAS-like routines.
+
+namespace Eigen {
+
+namespace internal {
+
+// forward declarations
+template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
+struct gebp_kernel;
+
+template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
+struct gemm_pack_rhs;
+
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+struct gemm_pack_lhs;
+
+template<
+  typename Index,
+  typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
+  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
+  int ResStorageOrder, int ResInnerStride>
+struct general_matrix_matrix_product;
+
+template<typename Index,
+         typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
+         typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
+struct general_matrix_vector_product;
+
+
+template<bool Conjugate> struct conj_if;
+
+template<> struct conj_if<true> {
+  template<typename T>
+  inline T operator()(const T& x) const { return numext::conj(x); }
+  template<typename T>
+  inline T pconj(const T& x) const { return internal::pconj(x); }
+};
+
+template<> struct conj_if<false> {
+  template<typename T>
+  inline const T& operator()(const T& x) const { return x; }
+  template<typename T>
+  inline const T& pconj(const T& x) const { return x; }
+};
+
+// Generic implementation for custom complex types.
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
+struct conj_helper
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
+
+  EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
+  { return padd(c, pmul(x,y)); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
+  { return conj_if<ConjLhs>()(x) *  conj_if<ConjRhs>()(y); }
+};
+
+template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
+{
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
+{
+  typedef std::complex<RealScalar> Scalar;
+  EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+  { return c + pmul(x,y); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+  { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
+{
+  typedef std::complex<RealScalar> Scalar;
+  EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+  { return c + pmul(x,y); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+  { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+
+template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
+{
+  typedef std::complex<RealScalar> Scalar;
+  EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
+  { return c + pmul(x,y); }
+
+  EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
+  { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
+};
+
+template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
+{
+  typedef std::complex<RealScalar> Scalar;
+  EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
+  { return padd(c, pmul(x,y)); }
+  EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
+  { return conj_if<Conj>()(x)*y; }
+};
+
+template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
+{
+  typedef std::complex<RealScalar> Scalar;
+  EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
+  { return padd(c, pmul(x,y)); }
+  EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
+  { return x*conj_if<Conj>()(y); }
+};
+
+template<typename From,typename To> struct get_factor {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
+};
+
+template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
+};
+
+
+template<typename Scalar, typename Index>
+class BlasVectorMapper {
+  public:
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
+    return m_data[i];
+  }
+  template <typename Packet, int AlignmentType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
+    return ploadt<Packet, AlignmentType>(m_data + i);
+  }
+
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC bool aligned(Index i) const {
+    return (UIntPtr(m_data+i)%sizeof(Packet))==0;
+  }
+
+  protected:
+  Scalar* m_data;
+};
+
+template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
+class BlasLinearMapper;
+
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper<Scalar,Index,AlignmentType,1> {
+  public:
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename packet_traits<Scalar>::half HalfPacket;
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
+    : m_data(data)
+  {
+    EIGEN_ONLY_USED_FOR_DEBUG(incr);
+    eigen_assert(incr==1);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+    internal::prefetch(&operator()(i));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+    return m_data[i];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+    return ploadt<Packet, AlignmentType>(m_data + i);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
+    return ploadt<HalfPacket, AlignmentType>(m_data + i);
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
+    pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+  }
+
+  protected:
+  Scalar *m_data;
+};
+
+// Lightweight helper class to access matrix coefficients.
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
+class blas_data_mapper;
+
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
+class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
+{
+public:
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename packet_traits<Scalar>::half HalfPacket;
+
+  typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
+  typedef BlasVectorMapper<Scalar, Index> VectorMapper;
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
+   : m_data(data), m_stride(stride)
+  {
+    EIGEN_ONLY_USED_FOR_DEBUG(incr);
+    eigen_assert(incr==1);
+  }
+
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
+  getSubMapper(Index i, Index j) const {
+    return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
+  }
+
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+    return LinearMapper(&operator()(i, j));
+  }
+
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
+    return VectorMapper(&operator()(i, j));
+  }
+
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+    return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+    return ploadt<Packet, AlignmentType>(&operator()(i, j));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
+    return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+  }
+
+  template<typename SubPacket>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+    pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+  }
+
+  template<typename SubPacket>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+    return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+  }
+
+  EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
+  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
+
+  EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
+    if (UIntPtr(m_data)%sizeof(Scalar)) {
+      return -1;
+    }
+    return internal::first_default_aligned(m_data, size);
+  }
+
+  protected:
+  Scalar* EIGEN_RESTRICT m_data;
+  const Index m_stride;
+};
+
+// Implementation of non-natural increment (i.e. inner-stride != 1)
+// The exposed API is not complete yet compared to the Incr==1 case
+// because some features makes less sense in this case.
+template<typename Scalar, typename Index, int AlignmentType, int Incr>
+class BlasLinearMapper
+{
+public:
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename packet_traits<Scalar>::half HalfPacket;
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+    internal::prefetch(&operator()(i));
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+    return m_data[i*m_incr.value()];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
+    return pgather<Scalar,Packet>(m_data + i*m_incr.value(), m_incr.value());
+  }
+
+  template<typename PacketType>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
+    pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
+  }
+
+protected:
+  Scalar *m_data;
+  const internal::variable_if_dynamic<Index,Incr> m_incr;
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
+class blas_data_mapper
+{
+public:
+  typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename packet_traits<Scalar>::half HalfPacket;
+
+  typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
+
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper
+  getSubMapper(Index i, Index j) const {
+    return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
+  }
+
+  EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+    return LinearMapper(&operator()(i, j), m_incr.value());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+    return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
+    return pgather<Scalar,Packet>(&operator()(i, j),m_incr.value());
+  }
+
+  template <typename PacketT, int AlignmentT>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
+    return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
+  }
+
+  template<typename SubPacket>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+    pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+  }
+
+  template<typename SubPacket>
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+    return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+  }
+
+protected:
+  Scalar* EIGEN_RESTRICT m_data;
+  const Index m_stride;
+  const internal::variable_if_dynamic<Index,Incr> m_incr;
+};
+
+// lightweight helper class to access matrix coefficients (const version)
+template<typename Scalar, typename Index, int StorageOrder>
+class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
+  public:
+  EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
+
+  EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
+    return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
+  }
+};
+
+
+/* Helper class to analyze the factors of a Product expression.
+ * In particular it allows to pop out operator-, scalar multiples,
+ * and conjugate */
+template<typename XprType> struct blas_traits
+{
+  typedef typename traits<XprType>::Scalar Scalar;
+  typedef const XprType& ExtractType;
+  typedef XprType _ExtractType;
+  enum {
+    IsComplex = NumTraits<Scalar>::IsComplex,
+    IsTransposed = false,
+    NeedToConjugate = false,
+    HasUsableDirectAccess = (    (int(XprType::Flags)&DirectAccessBit)
+                              && (   bool(XprType::IsVectorAtCompileTime)
+                                  || int(inner_stride_at_compile_time<XprType>::ret) == 1)
+                             ) ?  1 : 0
+  };
+  typedef typename conditional<bool(HasUsableDirectAccess),
+    ExtractType,
+    typename _ExtractType::PlainObject
+    >::type DirectLinearAccessType;
+  static inline ExtractType extract(const XprType& x) { return x; }
+  static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+};
+
+// pop conjugate
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+  typedef blas_traits<NestedXpr> Base;
+  typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
+  typedef typename Base::ExtractType ExtractType;
+
+  enum {
+    IsComplex = NumTraits<Scalar>::IsComplex,
+    NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
+  };
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
+};
+
+// pop scalar multiple
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+  typedef blas_traits<NestedXpr> Base;
+  typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
+  typedef typename Base::ExtractType ExtractType;
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
+  static inline Scalar extractScalarFactor(const XprType& x)
+  { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
+ : blas_traits<NestedXpr>
+{
+  typedef blas_traits<NestedXpr> Base;
+  typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
+  typedef typename Base::ExtractType ExtractType;
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
+  static inline Scalar extractScalarFactor(const XprType& x)
+  { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
+};
+template<typename Scalar, typename Plain1, typename Plain2>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
+                                                            const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
+ : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
+{};
+
+// pop opposite
+template<typename Scalar, typename NestedXpr>
+struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+  typedef blas_traits<NestedXpr> Base;
+  typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
+  typedef typename Base::ExtractType ExtractType;
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline Scalar extractScalarFactor(const XprType& x)
+  { return - Base::extractScalarFactor(x.nestedExpression()); }
+};
+
+// pop/push transpose
+template<typename NestedXpr>
+struct blas_traits<Transpose<NestedXpr> >
+ : blas_traits<NestedXpr>
+{
+  typedef typename NestedXpr::Scalar Scalar;
+  typedef blas_traits<NestedXpr> Base;
+  typedef Transpose<NestedXpr> XprType;
+  typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
+  typedef Transpose<const typename Base::_ExtractType> _ExtractType;
+  typedef typename conditional<bool(Base::HasUsableDirectAccess),
+    ExtractType,
+    typename ExtractType::PlainObject
+    >::type DirectLinearAccessType;
+  enum {
+    IsTransposed = Base::IsTransposed ? 0 : 1
+  };
+  static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
+  static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
+};
+
+template<typename T>
+struct blas_traits<const T>
+     : blas_traits<T>
+{};
+
+template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
+struct extract_data_selector {
+  static const typename T::Scalar* run(const T& m)
+  {
+    return blas_traits<T>::extract(m).data();
+  }
+};
+
+template<typename T>
+struct extract_data_selector<T,false> {
+  static typename T::Scalar* run(const T&) { return 0; }
+};
+
+template<typename T> const typename T::Scalar* extract_data(const T& m)
+{
+  return extract_data_selector<T>::run(m);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BLASUTIL_H
diff --git a/third-party/Eigen/src/Core/util/Constants.h b/third-party/Eigen/src/Core/util/Constants.h
new file mode 100644
index 00000000..7587d684
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/Constants.h
@@ -0,0 +1,547 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CONSTANTS_H
+#define EIGEN_CONSTANTS_H
+
+namespace Eigen {
+
+/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
+  * stored in some runtime variable.
+  *
+  * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
+  */
+const int Dynamic = -1;
+
+/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
+  * has to be specified at runtime.
+  */
+const int DynamicIndex = 0xffffff;
+
+/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
+  * The value Infinity there means the L-infinity norm.
+  */
+const int Infinity = -1;
+
+/** This value means that the cost to evaluate an expression coefficient is either very expensive or
+  * cannot be known at compile time.
+  *
+  * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions.
+  * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow.
+  */
+const int HugeCost = 10000;
+
+/** \defgroup flags Flags
+  * \ingroup Core_Module
+  *
+  * These are the possible bits which can be OR'ed to constitute the flags of a matrix or
+  * expression.
+  *
+  * It is important to note that these flags are a purely compile-time notion. They are a compile-time property of
+  * an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any
+  * runtime overhead.
+  *
+  * \sa MatrixBase::Flags
+  */
+
+/** \ingroup flags
+  *
+  * for a matrix, this means that the storage order is row-major.
+  * If this bit is not set, the storage order is column-major.
+  * For an expression, this determines the storage order of
+  * the matrix created by evaluation of that expression.
+  * \sa \blank  \ref TopicStorageOrders */
+const unsigned int RowMajorBit = 0x1;
+
+/** \ingroup flags
+  * means the expression should be evaluated by the calling expression */
+const unsigned int EvalBeforeNestingBit = 0x2;
+
+/** \ingroup flags
+  * \deprecated
+  * means the expression should be evaluated before any assignment */
+EIGEN_DEPRECATED
+const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
+
+/** \ingroup flags
+  *
+  * Short version: means the expression might be vectorized
+  *
+  * Long version: means that the coefficients can be handled by packets
+  * and start at a memory location whose alignment meets the requirements
+  * of the present CPU architecture for optimized packet access. In the fixed-size
+  * case, there is the additional condition that it be possible to access all the
+  * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,
+  * and that any nontrivial strides don't break the alignment). In the dynamic-size case,
+  * there is no such condition on the total size and strides, so it might not be possible to access
+  * all coeffs by packets.
+  *
+  * \note This bit can be set regardless of whether vectorization is actually enabled.
+  *       To check for actual vectorizability, see \a ActualPacketAccessBit.
+  */
+const unsigned int PacketAccessBit = 0x8;
+
+#ifdef EIGEN_VECTORIZE
+/** \ingroup flags
+  *
+  * If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
+  * is set to the value \a PacketAccessBit.
+  *
+  * If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
+  * is set to the value 0.
+  */
+const unsigned int ActualPacketAccessBit = PacketAccessBit;
+#else
+const unsigned int ActualPacketAccessBit = 0x0;
+#endif
+
+/** \ingroup flags
+  *
+  * Short version: means the expression can be seen as 1D vector.
+  *
+  * Long version: means that one can access the coefficients
+  * of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These
+  * index-based access methods are guaranteed
+  * to not have to do any runtime computation of a (row, col)-pair from the index, so that it
+  * is guaranteed that whenever it is available, index-based access is at least as fast as
+  * (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit.
+  *
+  * If both PacketAccessBit and LinearAccessBit are set, then the
+  * packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a
+  * lvalue expression.
+  *
+  * Typically, all vector expressions have the LinearAccessBit, but there is one exception:
+  * Product expressions don't have it, because it would be troublesome for vectorization, even when the
+  * Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but
+  * not index-based packet access, so they don't have the LinearAccessBit.
+  */
+const unsigned int LinearAccessBit = 0x10;
+
+/** \ingroup flags
+  *
+  * Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable.
+  * This rules out read-only expressions.
+  *
+  * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note
+  * the other:
+  *   \li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit
+  *   \li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit
+  *
+  * Expressions having LvalueBit also have their coeff() method returning a const reference instead of returning a new value.
+  */
+const unsigned int LvalueBit = 0x20;
+
+/** \ingroup flags
+  *
+  * Means that the underlying array of coefficients can be directly accessed as a plain strided array. The memory layout
+  * of the array of coefficients must be exactly the natural one suggested by rows(), cols(),
+  * outerStride(), innerStride(), and the RowMajorBit. This rules out expressions such as Diagonal, whose coefficients,
+  * though referencable, do not have such a regular memory layout.
+  *
+  * See the comment on LvalueBit for an explanation of how LvalueBit and DirectAccessBit are mutually orthogonal.
+  */
+const unsigned int DirectAccessBit = 0x40;
+
+/** \deprecated \ingroup flags
+  *
+  * means the first coefficient packet is guaranteed to be aligned.
+  * An expression cannot has the AlignedBit without the PacketAccessBit flag.
+  * In other words, this means we are allow to perform an aligned packet access to the first element regardless
+  * of the expression kind:
+  * \code
+  * expression.packet<Aligned>(0);
+  * \endcode
+  */
+EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
+
+const unsigned int NestByRefBit = 0x100;
+
+/** \ingroup flags
+  *
+  * for an expression, this means that the storage order
+  * can be either row-major or column-major.
+  * The precise choice will be decided at evaluation time or when
+  * combined with other expressions.
+  * \sa \blank  \ref RowMajorBit, \ref TopicStorageOrders */
+const unsigned int NoPreferredStorageOrderBit = 0x200;
+
+/** \ingroup flags
+  *
+  * Means that the underlying coefficients can be accessed through pointers to the sparse (un)compressed storage format,
+  * that is, the expression provides:
+  * \code
+    inline const Scalar* valuePtr() const;
+    inline const Index* innerIndexPtr() const;
+    inline const Index* outerIndexPtr() const;
+    inline const Index* innerNonZeroPtr() const;
+    \endcode
+  */
+const unsigned int CompressedAccessBit = 0x400;
+
+
+// list of flags that are inherited by default
+const unsigned int HereditaryBits = RowMajorBit
+                                  | EvalBeforeNestingBit;
+
+/** \defgroup enums Enumerations
+  * \ingroup Core_Module
+  *
+  * Various enumerations used in %Eigen. Many of these are used as template parameters.
+  */
+
+/** \ingroup enums
+  * Enum containing possible values for the \c Mode or \c UpLo parameter of
+  * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */
+enum UpLoType {
+  /** View matrix as a lower triangular matrix. */
+  Lower=0x1,                      
+  /** View matrix as an upper triangular matrix. */
+  Upper=0x2,                      
+  /** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */
+  UnitDiag=0x4, 
+  /** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */
+  ZeroDiag=0x8,
+  /** View matrix as a lower triangular matrix with ones on the diagonal. */
+  UnitLower=UnitDiag|Lower, 
+  /** View matrix as an upper triangular matrix with ones on the diagonal. */
+  UnitUpper=UnitDiag|Upper,
+  /** View matrix as a lower triangular matrix with zeros on the diagonal. */
+  StrictlyLower=ZeroDiag|Lower, 
+  /** View matrix as an upper triangular matrix with zeros on the diagonal. */
+  StrictlyUpper=ZeroDiag|Upper,
+  /** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
+  SelfAdjoint=0x10,
+  /** Used to support symmetric, non-selfadjoint, complex matrices. */
+  Symmetric=0x20
+};
+
+/** \ingroup enums
+  * Enum for indicating whether a buffer is aligned or not. */
+enum AlignmentType {
+  Unaligned=0,        /**< Data pointer has no specific alignment. */
+  Aligned8=8,         /**< Data pointer is aligned on a 8 bytes boundary. */
+  Aligned16=16,       /**< Data pointer is aligned on a 16 bytes boundary. */
+  Aligned32=32,       /**< Data pointer is aligned on a 32 bytes boundary. */
+  Aligned64=64,       /**< Data pointer is aligned on a 64 bytes boundary. */
+  Aligned128=128,     /**< Data pointer is aligned on a 128 bytes boundary. */
+  AlignedMask=255,
+  Aligned=16,         /**< \deprecated Synonym for Aligned16. */
+#if EIGEN_MAX_ALIGN_BYTES==128
+  AlignedMax = Aligned128
+#elif EIGEN_MAX_ALIGN_BYTES==64
+  AlignedMax = Aligned64
+#elif EIGEN_MAX_ALIGN_BYTES==32
+  AlignedMax = Aligned32
+#elif EIGEN_MAX_ALIGN_BYTES==16
+  AlignedMax = Aligned16
+#elif EIGEN_MAX_ALIGN_BYTES==8
+  AlignedMax = Aligned8
+#elif EIGEN_MAX_ALIGN_BYTES==0
+  AlignedMax = Unaligned
+#else
+#error Invalid value for EIGEN_MAX_ALIGN_BYTES
+#endif
+};
+
+/** \ingroup enums
+ * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
+// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
+// TODO: find out what to do with that. Adapt the AlignedBox API ?
+enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
+
+/** \ingroup enums
+  * Enum containing possible values for the \p Direction parameter of
+  * Reverse, PartialReduxExpr and VectorwiseOp. */
+enum DirectionType { 
+  /** For Reverse, all columns are reversed; 
+    * for PartialReduxExpr and VectorwiseOp, act on columns. */
+  Vertical, 
+  /** For Reverse, all rows are reversed; 
+    * for PartialReduxExpr and VectorwiseOp, act on rows. */
+  Horizontal, 
+  /** For Reverse, both rows and columns are reversed; 
+    * not used for PartialReduxExpr and VectorwiseOp. */
+  BothDirections 
+};
+
+/** \internal \ingroup enums
+  * Enum to specify how to traverse the entries of a matrix. */
+enum TraversalType {
+  /** \internal Default traversal, no vectorization, no index-based access */
+  DefaultTraversal,
+  /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
+  LinearTraversal,
+  /** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
+    * and good size */
+  InnerVectorizedTraversal,
+  /** \internal Vectorization path using a single loop plus scalar loops for the
+    * unaligned boundaries */
+  LinearVectorizedTraversal,
+  /** \internal Generic vectorization path using one vectorized loop per row/column with some
+    * scalar loops to handle the unaligned boundaries */
+  SliceVectorizedTraversal,
+  /** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
+  InvalidTraversal,
+  /** \internal Evaluate all entries at once */
+  AllAtOnceTraversal
+};
+
+/** \internal \ingroup enums
+  * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
+enum UnrollingType {
+  /** \internal Do not unroll loops. */
+  NoUnrolling,
+  /** \internal Unroll only the inner loop, but not the outer loop. */
+  InnerUnrolling,
+  /** \internal Unroll both the inner and the outer loop. If there is only one loop, 
+    * because linear traversal is used, then unroll that loop. */
+  CompleteUnrolling
+};
+
+/** \internal \ingroup enums
+  * Enum to specify whether to use the default (built-in) implementation or the specialization. */
+enum SpecializedType {
+  Specialized,
+  BuiltIn
+};
+
+/** \ingroup enums
+  * Enum containing possible values for the \p _Options template parameter of
+  * Matrix, Array and BandMatrix. */
+enum StorageOptions {
+  /** Storage order is column major (see \ref TopicStorageOrders). */
+  ColMajor = 0,
+  /** Storage order is row major (see \ref TopicStorageOrders). */
+  RowMajor = 0x1,  // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
+  /** Align the matrix itself if it is vectorizable fixed-size */
+  AutoAlign = 0,
+  /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
+  DontAlign = 0x2
+};
+
+/** \ingroup enums
+  * Enum for specifying whether to apply or solve on the left or right. */
+enum SideType {
+  /** Apply transformation on the left. */
+  OnTheLeft = 1,  
+  /** Apply transformation on the right. */
+  OnTheRight = 2  
+};
+
+/* the following used to be written as:
+ *
+ *   struct NoChange_t {};
+ *   namespace {
+ *     EIGEN_UNUSED NoChange_t NoChange;
+ *   }
+ *
+ * on the ground that it feels dangerous to disambiguate overloaded functions on enum/integer types.  
+ * However, this leads to "variable declared but never referenced" warnings on Intel Composer XE,
+ * and we do not know how to get rid of them (bug 450).
+ */
+
+enum NoChange_t   { NoChange };
+enum Sequential_t { Sequential };
+enum Default_t    { Default };
+
+/** \internal \ingroup enums
+  * Used in AmbiVector. */
+enum AmbiVectorMode {
+  IsDense         = 0,
+  IsSparse
+};
+
+/** \ingroup enums
+  * Used as template parameter in DenseCoeffBase and MapBase to indicate 
+  * which accessors should be provided. */
+enum AccessorLevels {
+  /** Read-only access via a member function. */
+  ReadOnlyAccessors, 
+  /** Read/write access via member functions. */
+  WriteAccessors, 
+  /** Direct read-only access to the coefficients. */
+  DirectAccessors, 
+  /** Direct read/write access to the coefficients. */
+  DirectWriteAccessors
+};
+
+/** \ingroup enums
+  * Enum with options to give to various decompositions. */
+enum DecompositionOptions {
+  /** \internal Not used (meant for LDLT?). */
+  Pivoting            = 0x01, 
+  /** \internal Not used (meant for LDLT?). */
+  NoPivoting          = 0x02, 
+  /** Used in JacobiSVD to indicate that the square matrix U is to be computed. */
+  ComputeFullU        = 0x04,
+  /** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */
+  ComputeThinU        = 0x08,
+  /** Used in JacobiSVD to indicate that the square matrix V is to be computed. */
+  ComputeFullV        = 0x10,
+  /** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */
+  ComputeThinV        = 0x20,
+  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+    * that only the eigenvalues are to be computed and not the eigenvectors. */
+  EigenvaluesOnly     = 0x40,
+  /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
+    * that both the eigenvalues and the eigenvectors are to be computed. */
+  ComputeEigenvectors = 0x80,
+  /** \internal */
+  EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ Ax = \lambda B x \f$. */
+  Ax_lBx              = 0x100,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ ABx = \lambda x \f$. */
+  ABx_lx              = 0x200,
+  /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
+    * solve the generalized eigenproblem \f$ BAx = \lambda x \f$. */
+  BAx_lx              = 0x400,
+  /** \internal */
+  GenEigMask = Ax_lBx | ABx_lx | BAx_lx
+};
+
+/** \ingroup enums
+  * Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */
+enum QRPreconditioners {
+  /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
+  NoQRPreconditioner,
+  /** Use a QR decomposition without pivoting as the first step. */
+  HouseholderQRPreconditioner,
+  /** Use a QR decomposition with column pivoting as the first step. */
+  ColPivHouseholderQRPreconditioner,
+  /** Use a QR decomposition with full pivoting as the first step. */
+  FullPivHouseholderQRPreconditioner
+};
+
+#ifdef Success
+#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
+#endif
+
+/** \ingroup enums
+  * Enum for reporting the status of a computation. */
+enum ComputationInfo {
+  /** Computation was successful. */
+  Success = 0,        
+  /** The provided data did not satisfy the prerequisites. */
+  NumericalIssue = 1, 
+  /** Iterative procedure did not converge. */
+  NoConvergence = 2,
+  /** The inputs are invalid, or the algorithm has been improperly called.
+    * When assertions are enabled, such errors trigger an assert. */
+  InvalidInput = 3
+};
+
+/** \ingroup enums
+  * Enum used to specify how a particular transformation is stored in a matrix.
+  * \sa Transform, Hyperplane::transform(). */
+enum TransformTraits {
+  /** Transformation is an isometry. */
+  Isometry      = 0x1,
+  /** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is 
+    * assumed to be [0 ... 0 1]. */
+  Affine        = 0x2,
+  /** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */
+  AffineCompact = 0x10 | Affine,
+  /** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */
+  Projective    = 0x20
+};
+
+/** \internal \ingroup enums
+  * Enum used to choose between implementation depending on the computer architecture. */
+namespace Architecture
+{
+  enum Type {
+    Generic = 0x0,
+    SSE = 0x1,
+    AltiVec = 0x2,
+    VSX = 0x3,
+    NEON = 0x4,
+#if defined EIGEN_VECTORIZE_SSE
+    Target = SSE
+#elif defined EIGEN_VECTORIZE_ALTIVEC
+    Target = AltiVec
+#elif defined EIGEN_VECTORIZE_VSX
+    Target = VSX
+#elif defined EIGEN_VECTORIZE_NEON
+    Target = NEON
+#else
+    Target = Generic
+#endif
+  };
+}
+
+/** \internal \ingroup enums
+  * Enum used as template parameter in Product and product evaluators. */
+enum ProductImplType
+{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
+
+/** \internal \ingroup enums
+  * Enum used in experimental parallel implementation. */
+enum Action {GetAction, SetAction};
+
+/** The type used to identify a dense storage. */
+struct Dense {};
+
+/** The type used to identify a general sparse storage. */
+struct Sparse {};
+
+/** The type used to identify a general solver (factored) storage. */
+struct SolverStorage {};
+
+/** The type used to identify a permutation storage. */
+struct PermutationStorage {};
+
+/** The type used to identify a permutation storage. */
+struct TranspositionsStorage {};
+
+/** The type used to identify a matrix expression */
+struct MatrixXpr {};
+
+/** The type used to identify an array expression */
+struct ArrayXpr {};
+
+// An evaluator must define its shape. By default, it can be one of the following:
+struct DenseShape             { static std::string debugName() { return "DenseShape"; } };
+struct SolverShape            { static std::string debugName() { return "SolverShape"; } };
+struct HomogeneousShape       { static std::string debugName() { return "HomogeneousShape"; } };
+struct DiagonalShape          { static std::string debugName() { return "DiagonalShape"; } };
+struct BandShape              { static std::string debugName() { return "BandShape"; } };
+struct TriangularShape        { static std::string debugName() { return "TriangularShape"; } };
+struct SelfAdjointShape       { static std::string debugName() { return "SelfAdjointShape"; } };
+struct PermutationShape       { static std::string debugName() { return "PermutationShape"; } };
+struct TranspositionsShape    { static std::string debugName() { return "TranspositionsShape"; } };
+struct SparseShape            { static std::string debugName() { return "SparseShape"; } };
+
+namespace internal {
+
+  // random access iterators based on coeff*() accessors.
+struct IndexBased {};
+
+// evaluator based on iterators to access coefficients. 
+struct IteratorBased {};
+
+/** \internal
+ * Constants for comparison functors
+ */
+enum ComparisonName {
+  cmp_EQ = 0,
+  cmp_LT = 1,
+  cmp_LE = 2,
+  cmp_UNORD = 3,
+  cmp_NEQ = 4,
+  cmp_GT = 5,
+  cmp_GE = 6
+};
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_CONSTANTS_H
diff --git a/third-party/Eigen/src/Core/util/DisableStupidWarnings.h b/third-party/Eigen/src/Core/util/DisableStupidWarnings.h
new file mode 100644
index 00000000..74f74cc4
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -0,0 +1,94 @@
+#ifndef EIGEN_WARNINGS_DISABLED
+#define EIGEN_WARNINGS_DISABLED
+
+#ifdef _MSC_VER
+  // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
+  // 4101 - unreferenced local variable
+  // 4127 - conditional expression is constant
+  // 4181 - qualifier applied to reference type ignored
+  // 4211 - nonstandard extension used : redefined extern to static
+  // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
+  // 4273 - QtAlignedMalloc, inconsistent DLL linkage
+  // 4324 - structure was padded due to declspec(align())
+  // 4503 - decorated name length exceeded, name was truncated
+  // 4512 - assignment operator could not be generated
+  // 4522 - 'class' : multiple assignment operators specified
+  // 4700 - uninitialized local variable 'xyz' used
+  // 4714 - function marked as __forceinline not inlined
+  // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
+  // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
+  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+    #pragma warning( push )
+  #endif
+  #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+
+#elif defined __INTEL_COMPILER
+  // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
+  //        ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
+  //        typedef that may be a reference type.
+  // 279  - controlling expression is constant
+  //        ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
+  // 1684 - conversion from pointer to same-sized integral type (potential portability problem)
+  // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
+  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+    #pragma warning push
+  #endif
+  #pragma warning disable 2196 279 1684 2259
+
+#elif defined __clang__
+  // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
+  //     this is really a stupid warning as it warns on compile-time expressions involving enums
+  #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+    #pragma clang diagnostic push
+  #endif
+  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
+
+#elif defined __GNUC__
+
+  #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+    #pragma GCC diagnostic push
+  #endif
+  // g++ warns about local variables shadowing member functions, which is too strict
+  #pragma GCC diagnostic ignored "-Wshadow"
+  #if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+    // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
+    #pragma GCC diagnostic ignored "-Wtype-limits"
+  #endif
+  #if __GNUC__>=6
+    #pragma GCC diagnostic ignored "-Wignored-attributes"
+  #endif
+  #if __GNUC__==7
+    // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
+    #pragma GCC diagnostic ignored "-Wattributes"
+  #endif
+#endif
+
+#if defined __NVCC__
+  // Disable the "statement is unreachable" message
+  #pragma diag_suppress code_is_unreachable
+  // Disable the "dynamic initialization in unreachable code" message
+  #pragma diag_suppress initialization_not_reachable
+  // Disable the "invalid error number" message that we get with older versions of nvcc
+  #pragma diag_suppress 1222
+  // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
+  #pragma diag_suppress 2527
+  #pragma diag_suppress 2529
+  #pragma diag_suppress 2651
+  #pragma diag_suppress 2653
+  #pragma diag_suppress 2668
+  #pragma diag_suppress 2669
+  #pragma diag_suppress 2670
+  #pragma diag_suppress 2671
+  #pragma diag_suppress 2735
+  #pragma diag_suppress 2737
+#endif
+
+#else
+// warnings already disabled:
+# ifndef EIGEN_WARNINGS_DISABLED_2
+#  define EIGEN_WARNINGS_DISABLED_2
+# elif defined(EIGEN_INTERNAL_DEBUGGING)
+#  error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
+# endif
+
+#endif // not EIGEN_WARNINGS_DISABLED
diff --git a/third-party/Eigen/src/Core/util/ForwardDeclarations.h b/third-party/Eigen/src/Core/util/ForwardDeclarations.h
new file mode 100644
index 00000000..134544f9
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/ForwardDeclarations.h
@@ -0,0 +1,298 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FORWARDDECLARATIONS_H
+#define EIGEN_FORWARDDECLARATIONS_H
+
+namespace Eigen {
+namespace internal {
+
+template<typename T> struct traits;
+
+// here we say once and for all that traits<const T> == traits<T>
+// When constness must affect traits, it has to be constness on template parameters on which T itself depends.
+// For example, traits<Map<const T> > != traits<Map<T> >, but
+//              traits<const Map<T> > == traits<Map<T> >
+template<typename T> struct traits<const T> : traits<T> {};
+
+template<typename Derived> struct has_direct_access
+{
+  enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
+};
+
+template<typename Derived> struct accessors_level
+{
+  enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
+         has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
+         value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
+                                   : (has_write_access ? WriteAccessors       : ReadOnlyAccessors)
+  };
+};
+
+template<typename T> struct evaluator_traits;
+
+template< typename T> struct evaluator;
+
+} // end namespace internal
+
+template<typename T> struct NumTraits;
+
+template<typename Derived> struct EigenBase;
+template<typename Derived> class DenseBase;
+template<typename Derived> class PlainObjectBase;
+template<typename Derived, int Level> class DenseCoeffsBase;
+
+template<typename _Scalar, int _Rows, int _Cols,
+         int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+    // workaround a bug in at least gcc 3.4.6
+    // the innermost ?: ternary operator is misparsed. We write it slightly
+    // differently and this makes gcc 3.4.6 happy, but it's ugly.
+    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
+    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+                          : Eigen::ColMajor ),
+#else
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+         int _MaxRows = _Rows,
+         int _MaxCols = _Cols
+> class Matrix;
+
+template<typename Derived> class MatrixBase;
+template<typename Derived> class ArrayBase;
+
+template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
+template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
+template<typename ExpressionType> class NestByValue;
+template<typename ExpressionType> class ForceAlignedAccess;
+template<typename ExpressionType> class SwapWrapper;
+
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
+
+template<typename MatrixType, int Size=Dynamic> class VectorBlock;
+template<typename MatrixType> class Transpose;
+template<typename MatrixType> class Conjugate;
+template<typename NullaryOp, typename MatrixType>         class CwiseNullaryOp;
+template<typename UnaryOp,   typename MatrixType>         class CwiseUnaryOp;
+template<typename ViewOp,    typename MatrixType>         class CwiseUnaryView;
+template<typename BinaryOp,  typename Lhs, typename Rhs>  class CwiseBinaryOp;
+template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>  class CwiseTernaryOp;
+template<typename Decomposition, typename Rhstype>        class Solve;
+template<typename XprType>                                class Inverse;
+
+template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
+
+template<typename Derived> class DiagonalBase;
+template<typename _DiagonalVectorType> class DiagonalWrapper;
+template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
+template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
+template<typename MatrixType, int Index = 0> class Diagonal;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
+template<typename Derived> class PermutationBase;
+template<typename Derived> class TranspositionsBase;
+template<typename _IndicesType> class PermutationWrapper;
+template<typename _IndicesType> class TranspositionsWrapper;
+
+template<typename Derived,
+         int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
+> class MapBase;
+template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int Value = Dynamic> class InnerStride;
+template<int Value = Dynamic> class OuterStride;
+template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
+template<typename Derived> class RefBase;
+template<typename PlainObjectType, int Options = 0,
+         typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
+
+template<typename Derived> class TriangularBase;
+template<typename MatrixType, unsigned int Mode> class TriangularView;
+template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
+template<typename MatrixType> class SparseView;
+template<typename ExpressionType> class WithFormat;
+template<typename MatrixType> struct CommaInitializer;
+template<typename Derived> class ReturnByValue;
+template<typename ExpressionType> class ArrayWrapper;
+template<typename ExpressionType> class MatrixWrapper;
+template<typename Derived> class SolverBase;
+template<typename XprType> class InnerIterator;
+
+namespace internal {
+template<typename DecompositionType> struct kernel_retval_base;
+template<typename DecompositionType> struct kernel_retval;
+template<typename DecompositionType> struct image_retval_base;
+template<typename DecompositionType> struct image_retval;
+} // end namespace internal
+
+namespace internal {
+template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
+}
+
+namespace internal {
+template<typename Lhs, typename Rhs> struct product_type;
+
+template<bool> struct EnableIf;
+
+/** \internal
+  * \class product_evaluator
+  * Products need their own evaluator with more template arguments allowing for
+  * easier partial template specializations.
+  */
+template< typename T,
+          int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
+          typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
+          typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
+          typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
+          typename RhsScalar = typename traits<typename T::Rhs>::Scalar
+        > struct product_evaluator;
+}
+
+template<typename Lhs, typename Rhs,
+         int ProductType = internal::product_type<Lhs,Rhs>::value>
+struct ProductReturnType;
+
+// this is a workaround for sun CC
+template<typename Lhs, typename Rhs> struct LazyProductReturnType;
+
+namespace internal {
+
+// Provides scalar/packet-wise product and product with accumulation
+// with optional conjugation of the arguments.
+template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
+
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
+template<typename Scalar> struct scalar_opposite_op;
+template<typename Scalar> struct scalar_conjugate_op;
+template<typename Scalar> struct scalar_real_op;
+template<typename Scalar> struct scalar_imag_op;
+template<typename Scalar> struct scalar_abs_op;
+template<typename Scalar> struct scalar_abs2_op;
+template<typename Scalar> struct scalar_sqrt_op;
+template<typename Scalar> struct scalar_rsqrt_op;
+template<typename Scalar> struct scalar_exp_op;
+template<typename Scalar> struct scalar_log_op;
+template<typename Scalar> struct scalar_cos_op;
+template<typename Scalar> struct scalar_sin_op;
+template<typename Scalar> struct scalar_acos_op;
+template<typename Scalar> struct scalar_asin_op;
+template<typename Scalar> struct scalar_tan_op;
+template<typename Scalar> struct scalar_inverse_op;
+template<typename Scalar> struct scalar_square_op;
+template<typename Scalar> struct scalar_cube_op;
+template<typename Scalar, typename NewType> struct scalar_cast_op;
+template<typename Scalar> struct scalar_random_op;
+template<typename Scalar> struct scalar_constant_op;
+template<typename Scalar> struct scalar_identity_op;
+template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
+
+// SpecialFunctions module
+template<typename Scalar> struct scalar_lgamma_op;
+template<typename Scalar> struct scalar_digamma_op;
+template<typename Scalar> struct scalar_erf_op;
+template<typename Scalar> struct scalar_erfc_op;
+template<typename Scalar> struct scalar_igamma_op;
+template<typename Scalar> struct scalar_igammac_op;
+template<typename Scalar> struct scalar_zeta_op;
+template<typename Scalar> struct scalar_betainc_op;
+
+} // end namespace internal
+
+struct IOFormat;
+
+// Array module
+template<typename _Scalar, int _Rows, int _Cols,
+         int _Options = AutoAlign |
+#if EIGEN_GNUC_AT(3,4)
+    // workaround a bug in at least gcc 3.4.6
+    // the innermost ?: ternary operator is misparsed. We write it slightly
+    // differently and this makes gcc 3.4.6 happy, but it's ugly.
+    // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
+    // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
+                          : Eigen::ColMajor ),
+#else
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
+                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+#endif
+         int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
+template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
+template<typename ExpressionType, int Direction> class VectorwiseOp;
+template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
+template<typename MatrixType, int Direction = BothDirections> class Reverse;
+
+template<typename MatrixType> class FullPivLU;
+template<typename MatrixType> class PartialPivLU;
+namespace internal {
+template<typename MatrixType> struct inverse_impl;
+}
+template<typename MatrixType> class HouseholderQR;
+template<typename MatrixType> class ColPivHouseholderQR;
+template<typename MatrixType> class FullPivHouseholderQR;
+template<typename MatrixType> class CompleteOrthogonalDecomposition;
+template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
+template<typename MatrixType> class BDCSVD;
+template<typename MatrixType, int UpLo = Lower> class LLT;
+template<typename MatrixType, int UpLo = Lower> class LDLT;
+template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
+template<typename Scalar>     class JacobiRotation;
+
+// Geometry module:
+template<typename Derived, int _Dim> class RotationBase;
+template<typename Lhs, typename Rhs> class Cross;
+template<typename Derived> class QuaternionBase;
+template<typename Scalar> class Rotation2D;
+template<typename Scalar> class AngleAxis;
+template<typename Scalar,int Dim> class Translation;
+template<typename Scalar,int Dim> class AlignedBox;
+template<typename Scalar, int Options = AutoAlign> class Quaternion;
+template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
+template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
+template<typename Scalar> class UniformScaling;
+template<typename MatrixType,int Direction> class Homogeneous;
+
+// Sparse module:
+template<typename Derived> class SparseMatrixBase;
+
+// MatrixFunctions module
+template<typename Derived> struct MatrixExponentialReturnValue;
+template<typename Derived> class MatrixFunctionReturnValue;
+template<typename Derived> class MatrixSquareRootReturnValue;
+template<typename Derived> class MatrixLogarithmReturnValue;
+template<typename Derived> class MatrixPowerReturnValue;
+template<typename Derived> class MatrixComplexPowerReturnValue;
+
+namespace internal {
+template <typename Scalar>
+struct stem_function
+{
+  typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+  typedef ComplexScalar type(ComplexScalar, int);
+};
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FORWARDDECLARATIONS_H
diff --git a/third-party/Eigen/src/Core/util/MKL_support.h b/third-party/Eigen/src/Core/util/MKL_support.h
new file mode 100644
index 00000000..b7d6ecc7
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/MKL_support.h
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *   Include file with common MKL declarations
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_MKL_SUPPORT_H
+#define EIGEN_MKL_SUPPORT_H
+
+#ifdef EIGEN_USE_MKL_ALL
+  #ifndef EIGEN_USE_BLAS
+    #define EIGEN_USE_BLAS
+  #endif
+  #ifndef EIGEN_USE_LAPACKE
+    #define EIGEN_USE_LAPACKE
+  #endif
+  #ifndef EIGEN_USE_MKL_VML
+    #define EIGEN_USE_MKL_VML
+  #endif
+#endif
+
+#ifdef EIGEN_USE_LAPACKE_STRICT
+  #define EIGEN_USE_LAPACKE
+#endif
+
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
+  #define EIGEN_USE_MKL
+#endif
+
+
+#if defined EIGEN_USE_MKL
+#   include <mkl.h> 
+/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
+#   ifndef INTEL_MKL_VERSION
+#       undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */
+#   elif INTEL_MKL_VERSION < 100305    /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/
+#       undef EIGEN_USE_MKL
+#   endif
+#   ifndef EIGEN_USE_MKL
+    /*If the MKL version is too old, undef everything*/
+#       undef   EIGEN_USE_MKL_ALL
+#       undef   EIGEN_USE_LAPACKE
+#       undef   EIGEN_USE_MKL_VML
+#       undef   EIGEN_USE_LAPACKE_STRICT
+#       undef   EIGEN_USE_LAPACKE
+#   endif
+#endif
+
+#if defined EIGEN_USE_MKL
+
+#define EIGEN_MKL_VML_THRESHOLD 128
+
+/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */
+/* MKL_BLAS, etc are not defined in 11.2 */
+#ifdef MKL_DOMAIN_ALL
+#define EIGEN_MKL_DOMAIN_ALL MKL_DOMAIN_ALL
+#else
+#define EIGEN_MKL_DOMAIN_ALL MKL_ALL
+#endif
+
+#ifdef MKL_DOMAIN_BLAS
+#define EIGEN_MKL_DOMAIN_BLAS MKL_DOMAIN_BLAS
+#else
+#define EIGEN_MKL_DOMAIN_BLAS MKL_BLAS
+#endif
+
+#ifdef MKL_DOMAIN_FFT
+#define EIGEN_MKL_DOMAIN_FFT MKL_DOMAIN_FFT
+#else
+#define EIGEN_MKL_DOMAIN_FFT MKL_FFT
+#endif
+
+#ifdef MKL_DOMAIN_VML
+#define EIGEN_MKL_DOMAIN_VML MKL_DOMAIN_VML
+#else
+#define EIGEN_MKL_DOMAIN_VML MKL_VML
+#endif
+
+#ifdef MKL_DOMAIN_PARDISO
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_DOMAIN_PARDISO
+#else
+#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO
+#endif
+#endif
+
+#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
+#include "../../misc/blas.h"
+#endif
+
+namespace Eigen {
+
+typedef std::complex<double> dcomplex;
+typedef std::complex<float>  scomplex;
+
+#if defined(EIGEN_USE_MKL)
+typedef MKL_INT BlasIndex;
+#else
+typedef int BlasIndex;
+#endif
+
+} // end namespace Eigen
+
+
+#endif // EIGEN_MKL_SUPPORT_H
diff --git a/third-party/Eigen/src/Core/util/Macros.h b/third-party/Eigen/src/Core/util/Macros.h
new file mode 100644
index 00000000..6b0399eb
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/Macros.h
@@ -0,0 +1,1053 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MACROS_H
+#define EIGEN_MACROS_H
+
+#define EIGEN_WORLD_VERSION 3
+#define EIGEN_MAJOR_VERSION 3
+#define EIGEN_MINOR_VERSION 9
+
+#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
+                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
+                                                                 EIGEN_MINOR_VERSION>=z))))
+
+// Compiler identification, EIGEN_COMP_*
+
+/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
+#ifdef __GNUC__
+  #define EIGEN_COMP_GNUC 1
+#else
+  #define EIGEN_COMP_GNUC 0
+#endif
+
+/// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang
+#if defined(__clang__)
+  #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__)
+#else
+  #define EIGEN_COMP_CLANG 0
+#endif
+
+
+/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm
+#if defined(__llvm__)
+  #define EIGEN_COMP_LLVM 1
+#else
+  #define EIGEN_COMP_LLVM 0
+#endif
+
+/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise
+#if defined(__INTEL_COMPILER)
+  #define EIGEN_COMP_ICC __INTEL_COMPILER
+#else
+  #define EIGEN_COMP_ICC 0
+#endif
+
+/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw
+#if defined(__MINGW32__)
+  #define EIGEN_COMP_MINGW 1
+#else
+  #define EIGEN_COMP_MINGW 0
+#endif
+
+/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio
+#if defined(__SUNPRO_CC)
+  #define EIGEN_COMP_SUNCC 1
+#else
+  #define EIGEN_COMP_SUNCC 0
+#endif
+
+/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise.
+#if defined(_MSC_VER)
+  #define EIGEN_COMP_MSVC _MSC_VER
+#else
+  #define EIGEN_COMP_MSVC 0
+#endif
+
+// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
+//  name  ver   MSC_VER
+//  2008    9      1500
+//  2010   10      1600
+//  2012   11      1700
+//  2013   12      1800
+//  2015   14      1900
+//  "15"   15      1900
+
+/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl
+#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
+  #define EIGEN_COMP_MSVC_STRICT _MSC_VER
+#else
+  #define EIGEN_COMP_MSVC_STRICT 0
+#endif
+
+/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
+#if defined(__IBMCPP__) || defined(__xlc__)
+  #define EIGEN_COMP_IBM 1
+#else
+  #define EIGEN_COMP_IBM 0
+#endif
+
+/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
+#if defined(__PGI)
+  #define EIGEN_COMP_PGI 1
+#else
+  #define EIGEN_COMP_PGI 0
+#endif
+
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__CC_ARM) || defined(__ARMCC_VERSION)
+  #define EIGEN_COMP_ARM 1
+#else
+  #define EIGEN_COMP_ARM 0
+#endif
+
+/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+#if defined(__EMSCRIPTEN__)
+  #define EIGEN_COMP_EMSCRIPTEN 1
+#else
+  #define EIGEN_COMP_EMSCRIPTEN 0
+#endif
+
+
+/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.)
+#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN)
+  #define EIGEN_COMP_GNUC_STRICT 1
+#else
+  #define EIGEN_COMP_GNUC_STRICT 0
+#endif
+
+
+#if EIGEN_COMP_GNUC
+  #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
+  #define EIGEN_GNUC_AT_MOST(x,y)  ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
+  #define EIGEN_GNUC_AT(x,y)       ( __GNUC__==x && __GNUC_MINOR__==y )
+#else
+  #define EIGEN_GNUC_AT_LEAST(x,y) 0
+  #define EIGEN_GNUC_AT_MOST(x,y)  0
+  #define EIGEN_GNUC_AT(x,y)       0
+#endif
+
+// FIXME: could probably be removed as we do not support gcc 3.x anymore
+#if EIGEN_COMP_GNUC && (__GNUC__ <= 3)
+#define EIGEN_GCC3_OR_OLDER 1
+#else
+#define EIGEN_GCC3_OR_OLDER 0
+#endif
+
+
+// Architecture identification, EIGEN_ARCH_*
+
+#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+  #define EIGEN_ARCH_x86_64 1
+#else
+  #define EIGEN_ARCH_x86_64 0
+#endif
+
+#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
+  #define EIGEN_ARCH_i386 1
+#else
+  #define EIGEN_ARCH_i386 0
+#endif
+
+#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386
+  #define EIGEN_ARCH_i386_OR_x86_64 1
+#else
+  #define EIGEN_ARCH_i386_OR_x86_64 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM
+#if defined(__arm__)
+  #define EIGEN_ARCH_ARM 1
+#else
+  #define EIGEN_ARCH_ARM 0
+#endif
+
+/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64
+#if defined(__aarch64__)
+  #define EIGEN_ARCH_ARM64 1
+#else
+  #define EIGEN_ARCH_ARM64 0
+#endif
+
+#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
+  #define EIGEN_ARCH_ARM_OR_ARM64 1
+#else
+  #define EIGEN_ARCH_ARM_OR_ARM64 0
+#endif
+
+/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
+#if defined(__mips__) || defined(__mips)
+  #define EIGEN_ARCH_MIPS 1
+#else
+  #define EIGEN_ARCH_MIPS 0
+#endif
+
+/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC
+#if defined(__sparc__) || defined(__sparc)
+  #define EIGEN_ARCH_SPARC 1
+#else
+  #define EIGEN_ARCH_SPARC 0
+#endif
+
+/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium
+#if defined(__ia64__)
+  #define EIGEN_ARCH_IA64 1
+#else
+  #define EIGEN_ARCH_IA64 0
+#endif
+
+/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC
+#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC)
+  #define EIGEN_ARCH_PPC 1
+#else
+  #define EIGEN_ARCH_PPC 0
+#endif
+
+
+
+// Operating system identification, EIGEN_OS_*
+
+/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant
+#if defined(__unix__) || defined(__unix)
+  #define EIGEN_OS_UNIX 1
+#else
+  #define EIGEN_OS_UNIX 0
+#endif
+
+/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel
+#if defined(__linux__)
+  #define EIGEN_OS_LINUX 1
+#else
+  #define EIGEN_OS_LINUX 0
+#endif
+
+/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android
+// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain.
+#if defined(__ANDROID__) || defined(ANDROID)
+  #define EIGEN_OS_ANDROID 1
+#else
+  #define EIGEN_OS_ANDROID 0
+#endif
+
+/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android)
+#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID)
+  #define EIGEN_OS_GNULINUX 1
+#else
+  #define EIGEN_OS_GNULINUX 0
+#endif
+
+/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__)
+  #define EIGEN_OS_BSD 1
+#else
+  #define EIGEN_OS_BSD 0
+#endif
+
+/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS
+#if defined(__APPLE__)
+  #define EIGEN_OS_MAC 1
+#else
+  #define EIGEN_OS_MAC 0
+#endif
+
+/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX
+#if defined(__QNX__)
+  #define EIGEN_OS_QNX 1
+#else
+  #define EIGEN_OS_QNX 0
+#endif
+
+/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based
+#if defined(_WIN32)
+  #define EIGEN_OS_WIN 1
+#else
+  #define EIGEN_OS_WIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits
+#if defined(_WIN64)
+  #define EIGEN_OS_WIN64 1
+#else
+  #define EIGEN_OS_WIN64 0
+#endif
+
+/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE
+#if defined(_WIN32_WCE)
+  #define EIGEN_OS_WINCE 1
+#else
+  #define EIGEN_OS_WINCE 0
+#endif
+
+/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin
+#if defined(__CYGWIN__)
+  #define EIGEN_OS_CYGWIN 1
+#else
+  #define EIGEN_OS_CYGWIN 0
+#endif
+
+/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants
+#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN )
+  #define EIGEN_OS_WIN_STRICT 1
+#else
+  #define EIGEN_OS_WIN_STRICT 0
+#endif
+
+/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
+#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
+  #define EIGEN_OS_SUN 1
+#else
+  #define EIGEN_OS_SUN 0
+#endif
+
+/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris
+#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__))
+  #define EIGEN_OS_SOLARIS 1
+#else
+  #define EIGEN_OS_SOLARIS 0
+#endif
+
+
+
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+  // see bug 89
+  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+#endif
+
+// This macro can be used to prevent from macro expansion, e.g.:
+//   std::max EIGEN_NOT_A_MACRO(a,b)
+#define EIGEN_NOT_A_MACRO
+
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+#endif
+
+#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#endif
+
+// Cross compiler wrapper around LLVM's __has_builtin
+#ifdef __has_builtin
+#  define EIGEN_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define EIGEN_HAS_BUILTIN(x) 0
+#endif
+
+// A Clang feature extension to determine compiler features.
+// We use it to determine 'cxx_rvalue_references'
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+// Upperbound on the C++ version to use.
+// Expected values are 03, 11, 14, 17, etc.
+// By default, let's use an arbitrarily large C++ version.
+#ifndef EIGEN_MAX_CPP_VER
+#define EIGEN_MAX_CPP_VER 99
+#endif
+
+#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
+#define EIGEN_HAS_CXX11 1
+#else
+#define EIGEN_HAS_CXX11 0
+#endif
+
+
+// Do we support r-value references?
+#ifndef EIGEN_HAS_RVALUE_REFERENCES
+#if EIGEN_MAX_CPP_VER>=11 && \
+    (__has_feature(cxx_rvalue_references) || \
+    (defined(__cplusplus) && __cplusplus >= 201103L) || \
+    (EIGEN_COMP_MSVC >= 1600))
+  #define EIGEN_HAS_RVALUE_REFERENCES 1
+#else
+  #define EIGEN_HAS_RVALUE_REFERENCES 0
+#endif
+#endif
+
+// Does the compiler support C99?
+#ifndef EIGEN_HAS_C99_MATH
+#if EIGEN_MAX_CPP_VER>=11 && \
+    ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \
+  || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
+  || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
+  || (EIGEN_COMP_MSVC >= 1900) )
+  #define EIGEN_HAS_C99_MATH 1
+#else
+  #define EIGEN_HAS_C99_MATH 0
+#endif
+#endif
+
+// Does the compiler support result_of?
+#ifndef EIGEN_HAS_STD_RESULT_OF
+#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
+#define EIGEN_HAS_STD_RESULT_OF 1
+#else
+#define EIGEN_HAS_STD_RESULT_OF 0
+#endif
+#endif
+
+// Does the compiler support type_traits?
+// - full support of type traits was added only to GCC 5.1.0.
+// - 20150626 corresponds to the last release of 4.x libstdc++
+#ifndef EIGEN_HAS_TYPE_TRAITS
+#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \
+  && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \
+  && ((!defined(__GLIBCXX__))   || __GLIBCXX__ > 20150626)
+#define EIGEN_HAS_TYPE_TRAITS 1
+#define EIGEN_INCLUDE_TYPE_TRAITS
+#else
+#define EIGEN_HAS_TYPE_TRAITS 0
+#endif
+#endif
+
+// Does the compiler support variadic templates?
+#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
+#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
+  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
+    // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
+    //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#else
+#define EIGEN_HAS_VARIADIC_TEMPLATES 0
+#endif
+#endif
+
+// Does the compiler fully support const expressions? (as in c++14)
+#ifndef EIGEN_HAS_CONSTEXPR
+
+#ifdef __CUDACC__
+// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
+  #define EIGEN_HAS_CONSTEXPR 1
+#endif
+#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
+  (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
+#define EIGEN_HAS_CONSTEXPR 1
+#endif
+
+#ifndef EIGEN_HAS_CONSTEXPR
+#define EIGEN_HAS_CONSTEXPR 0
+#endif
+
+#endif
+
+// Does the compiler support C++11 math?
+// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
+#ifndef EIGEN_HAS_CXX11_MATH
+  #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC)  \
+      && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
+    #define EIGEN_HAS_CXX11_MATH 1
+  #else
+    #define EIGEN_HAS_CXX11_MATH 0
+  #endif
+#endif
+
+// Does the compiler support proper C++11 containers?
+#ifndef EIGEN_HAS_CXX11_CONTAINERS
+  #if    EIGEN_MAX_CPP_VER>=11 && \
+         ((__cplusplus > 201103L) \
+      || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+      || EIGEN_COMP_MSVC >= 1900)
+    #define EIGEN_HAS_CXX11_CONTAINERS 1
+  #else
+    #define EIGEN_HAS_CXX11_CONTAINERS 0
+  #endif
+#endif
+
+// Does the compiler support C++11 noexcept?
+#ifndef EIGEN_HAS_CXX11_NOEXCEPT
+  #if    EIGEN_MAX_CPP_VER>=11 && \
+         (__has_feature(cxx_noexcept) \
+      || (__cplusplus > 201103L) \
+      || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
+      || EIGEN_COMP_MSVC >= 1900)
+    #define EIGEN_HAS_CXX11_NOEXCEPT 1
+  #else
+    #define EIGEN_HAS_CXX11_NOEXCEPT 0
+  #endif
+#endif
+
+/** Allows to disable some optimizations which might affect the accuracy of the result.
+  * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
+  * They currently include:
+  *   - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization.
+  */
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
+#endif
+
+#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
+
+// concatenate two tokens
+#define EIGEN_CAT2(a,b) a ## b
+#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
+
+#define EIGEN_COMMA ,
+
+// convert a token to a string
+#define EIGEN_MAKESTRING2(a) #a
+#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
+
+// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
+// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
+// but GCC is still doing fine with just inline.
+#ifndef EIGEN_STRONG_INLINE
+#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#define EIGEN_STRONG_INLINE __forceinline
+#else
+#define EIGEN_STRONG_INLINE inline
+#endif
+#endif
+
+// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
+// attribute to maximize inlining. This should only be used when really necessary: in particular,
+// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
+// FIXME with the always_inline attribute,
+// gcc 3.4.x and 4.1 reports the following compilation error:
+//   Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
+//    : function body not available
+//   See also bug 1367
+#if EIGEN_GNUC_AT_LEAST(4,2)
+#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
+#else
+#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_DONT_INLINE __attribute__((noinline))
+#elif EIGEN_COMP_MSVC
+#define EIGEN_DONT_INLINE __declspec(noinline)
+#else
+#define EIGEN_DONT_INLINE
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_PERMISSIVE_EXPR __extension__
+#else
+#define EIGEN_PERMISSIVE_EXPR
+#endif
+
+// this macro allows to get rid of linking errors about multiply defined functions.
+//  - static is not very good because it prevents definitions from different object files to be merged.
+//           So static causes the resulting linked executable to be bloated with multiple copies of the same function.
+//  - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
+
+#ifdef NDEBUG
+# ifndef EIGEN_NO_DEBUG
+#  define EIGEN_NO_DEBUG
+# endif
+#endif
+
+// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
+#ifdef EIGEN_NO_DEBUG
+  #define eigen_plain_assert(x)
+#else
+  #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
+    namespace Eigen {
+    namespace internal {
+    inline bool copy_bool(bool b) { return b; }
+    }
+    }
+    #define eigen_plain_assert(x) assert(x)
+  #else
+    // work around bug 89
+    #include <cstdlib>   // for abort
+    #include <iostream>  // for std::cerr
+
+    namespace Eigen {
+    namespace internal {
+    // trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers.
+    // see bug 89.
+    namespace {
+    EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
+    }
+    inline void assert_fail(const char *condition, const char *function, const char *file, int line)
+    {
+      std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
+      abort();
+    }
+    }
+    }
+    #define eigen_plain_assert(x) \
+      do { \
+        if(!Eigen::internal::copy_bool(x)) \
+          Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
+      } while(false)
+  #endif
+#endif
+
+// eigen_assert can be overridden
+#ifndef eigen_assert
+#define eigen_assert(x) eigen_plain_assert(x)
+#endif
+
+#ifdef EIGEN_INTERNAL_DEBUGGING
+#define eigen_internal_assert(x) eigen_assert(x)
+#else
+#define eigen_internal_assert(x)
+#endif
+
+#ifdef EIGEN_NO_DEBUG
+#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x)
+#else
+#define EIGEN_ONLY_USED_FOR_DEBUG(x)
+#endif
+
+#ifndef EIGEN_NO_DEPRECATED_WARNING
+  #if EIGEN_COMP_GNUC
+    #define EIGEN_DEPRECATED __attribute__((deprecated))
+  #elif EIGEN_COMP_MSVC
+    #define EIGEN_DEPRECATED __declspec(deprecated)
+  #else
+    #define EIGEN_DEPRECATED
+  #endif
+#else
+  #define EIGEN_DEPRECATED
+#endif
+
+#if EIGEN_COMP_GNUC
+#define EIGEN_UNUSED __attribute__((unused))
+#else
+#define EIGEN_UNUSED
+#endif
+
+// Suppresses 'unused variable' warnings.
+namespace Eigen {
+  namespace internal {
+    template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
+  }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
+
+#if !defined(EIGEN_ASM_COMMENT)
+  #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
+    #define EIGEN_ASM_COMMENT(X)  __asm__("#" X)
+  #else
+    #define EIGEN_ASM_COMMENT(X)
+  #endif
+#endif
+
+
+//------------------------------------------------------------------------------------------
+// Static and dynamic alignment control
+//
+// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
+// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
+// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
+// a default value is automatically computed based on architecture, compiler, and OS.
+//
+// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
+// to be used to declare statically aligned buffers.
+//------------------------------------------------------------------------------------------
+
+
+/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
+ * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
+ * so that vectorization doesn't affect binary compatibility.
+ *
+ * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
+ * vectorized and non-vectorized code.
+ */
+#if (defined __CUDACC__)
+  #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#elif EIGEN_COMP_MSVC
+  #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
+#elif EIGEN_COMP_SUNCC
+  // FIXME not sure about this one:
+  #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+#else
+  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
+#endif
+
+// If the user explicitly disable vectorization, then we also disable alignment
+#if defined(EIGEN_DONT_VECTORIZE)
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
+#elif defined(EIGEN_VECTORIZE_AVX512)
+  // 64 bytes static alignmeent is preferred only if really required
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
+#elif defined(__AVX__)
+  // 32 bytes static alignmeent is preferred only if really required
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
+#else
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+#endif
+
+
+// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
+#define EIGEN_MIN_ALIGN_BYTES 16
+
+// Defined the boundary (in bytes) on which the data needs to be aligned. Note
+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
+// aligned at all regardless of the value of this #define.
+
+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
+#endif
+
+// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
+// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
+  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
+    #undef EIGEN_MAX_STATIC_ALIGN_BYTES
+  #endif
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+#endif
+
+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
+
+  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
+
+  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
+  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
+  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
+  // certain common platform (compiler+architecture combinations) to avoid these problems.
+  // Only static alignment is really problematic (relies on nonstandard compiler extensions),
+  // try to keep heap alignment even when we have to disable static alignment.
+  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+  #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
+  // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
+  // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
+  // 4.8 and newer seem definitely unaffected.
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+  #else
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+  #endif
+
+  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
+  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+  && !EIGEN_GCC3_OR_OLDER \
+  && !EIGEN_COMP_SUNCC \
+  && !EIGEN_OS_QNX
+    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+  #else
+    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+  #endif
+
+  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
+    #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+  #else
+    #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+  #endif
+
+#endif
+
+// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
+#undef EIGEN_MAX_STATIC_ALIGN_BYTES
+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+#endif
+
+// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
+// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
+// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
+// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
+
+
+// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
+#define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
+#else
+#define EIGEN_ALIGN_MAX
+#endif
+
+
+// Dynamic alignment control
+
+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
+#endif
+
+#ifdef EIGEN_DONT_ALIGN
+  #ifdef EIGEN_MAX_ALIGN_BYTES
+    #undef EIGEN_MAX_ALIGN_BYTES
+  #endif
+  #define EIGEN_MAX_ALIGN_BYTES 0
+#elif !defined(EIGEN_MAX_ALIGN_BYTES)
+  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#endif
+
+#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#else
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+
+
+#ifndef EIGEN_UNALIGNED_VECTORIZE
+#define EIGEN_UNALIGNED_VECTORIZE 1
+#endif
+
+//----------------------------------------------------------------------
+
+
+#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
+  #define EIGEN_RESTRICT
+#endif
+#ifndef EIGEN_RESTRICT
+  #define EIGEN_RESTRICT __restrict
+#endif
+
+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+// 131072 == 128 KB
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
+#endif
+
+#ifndef EIGEN_DEFAULT_IO_FORMAT
+#ifdef EIGEN_MAKING_DOCS
+// format used in Eigen's documentation
+// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
+#else
+#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
+#endif
+#endif
+
+// just an empty macro !
+#define EIGEN_EMPTY
+
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
+  // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+    using Base::operator =;
+#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
+  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+    using Base::operator =; \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+    template <typename OtherDerived> \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
+#else
+  #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+    using Base::operator =; \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
+    { \
+      Base::operator=(other); \
+      return *this; \
+    }
+#endif
+
+
+/**
+ * \internal
+ * \brief Macro to explicitly define the default copy constructor.
+ * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden.
+ */
+#if EIGEN_HAS_CXX11
+#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) EIGEN_DEVICE_FUNC CLASS(const CLASS&) = default;
+#else
+#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS)
+#endif
+
+
+
+/** \internal
+ * \brief Macro to manually inherit assignment operators.
+ * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
+ * With C++11 or later this also default-implements the copy-constructor
+ */
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived)  \
+    EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived)
+
+/** \internal
+ * \brief Macro to manually define default constructors and destructors.
+ * This is necessary when the copy constructor is re-defined.
+ * For empty helper classes this should usually be protected, to avoid accidentally creating empty objects.
+ *
+ * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision
+ */
+#if EIGEN_HAS_CXX11
+#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
+    EIGEN_DEVICE_FUNC Derived() = default; \
+    EIGEN_DEVICE_FUNC ~Derived() = default;
+#else
+#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived)  \
+    EIGEN_DEVICE_FUNC Derived() {}; \
+    /* EIGEN_DEVICE_FUNC ~Derived() {}; */
+#endif
+
+
+
+
+
+/**
+* Just a side note. Commenting within defines works only by documenting
+* behind the object (via '!<'). Comments cannot be multi-line and thus
+* we have these extra long lines. What is confusing doxygen over here is
+* that we use '\' and basically have a bunch of typedefs with their
+* documentation in a single line.
+**/
+
+#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+  typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
+  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
+  typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
+  typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
+  typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
+  typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
+  enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+        ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
+        Flags = Eigen::internal::traits<Derived>::Flags, \
+        SizeAtCompileTime = Base::SizeAtCompileTime, \
+        MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
+        IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
+  using Base::derived; \
+  using Base::const_cast_derived;
+
+
+// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed
+#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
+  EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
+  typedef typename Base::PacketScalar PacketScalar;
+
+
+#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
+#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
+
+// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1,
+// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over
+// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.
+#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
+                           : ((int)a == 1 || (int)b == 1) ? 1 \
+                           : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+                           : ((int)a <= (int)b) ? (int)a : (int)b)
+
+// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values
+// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is
+// (between 0 and 3), it is not more than 3.
+#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b)  (((int)a == 0 || (int)b == 0) ? 0 \
+                           : ((int)a == 1 || (int)b == 1) ? 1 \
+                           : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
+                           : ((int)a == Dynamic) ? (int)b \
+                           : ((int)b == Dynamic) ? (int)a \
+                           : ((int)a <= (int)b) ? (int)a : (int)b)
+
+// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here.
+#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
+                           : ((int)a >= (int)b) ? (int)a : (int)b)
+
+#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
+
+#define EIGEN_IMPLIES(a,b) (!(a) || (b))
+
+// the expression type of a standard coefficient wise binary operation
+#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
+    CwiseBinaryOp< \
+      EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
+          typename internal::traits<LHS>::Scalar, \
+          typename internal::traits<RHS>::Scalar \
+      >, \
+      const LHS, \
+      const RHS \
+    >
+
+#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
+  template<typename OtherDerived> \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
+  (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+  { \
+    return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
+  }
+
+#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \
+  (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB>  > >::value)
+
+#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
+  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
+                const typename internal::plain_constant_type<EXPR,SCALAR>::type>
+
+#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
+  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
+                const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
+
+// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010")
+#if EIGEN_COMP_MSVC_STRICT<=1600
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
+#else
+#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
+#endif
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
+  template <typename T> EIGEN_DEVICE_FUNC inline \
+  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
+  (METHOD)(const T& scalar) const { \
+    typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
+    return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
+           typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
+  }
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+  template <typename T> EIGEN_DEVICE_FUNC inline friend \
+  EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
+  (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
+    typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
+           typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
+  }
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
+
+
+#ifdef EIGEN_EXCEPTIONS
+#  define EIGEN_THROW_X(X) throw X
+#  define EIGEN_THROW throw
+#  define EIGEN_TRY try
+#  define EIGEN_CATCH(X) catch (X)
+#else
+#  ifdef __CUDA_ARCH__
+#    define EIGEN_THROW_X(X) asm("trap;")
+#    define EIGEN_THROW asm("trap;")
+#  else
+#    define EIGEN_THROW_X(X) std::abort()
+#    define EIGEN_THROW std::abort()
+#  endif
+#  define EIGEN_TRY if (true)
+#  define EIGEN_CATCH(X) else
+#endif
+
+
+#if EIGEN_HAS_CXX11_NOEXCEPT
+#   define EIGEN_INCLUDE_TYPE_TRAITS
+#   define EIGEN_NOEXCEPT noexcept
+#   define EIGEN_NOEXCEPT_IF(x) noexcept(x)
+#   define EIGEN_NO_THROW noexcept(true)
+#   define EIGEN_EXCEPTION_SPEC(X) noexcept(false)
+#else
+#   define EIGEN_NOEXCEPT
+#   define EIGEN_NOEXCEPT_IF(x)
+#   define EIGEN_NO_THROW throw()
+#   if EIGEN_COMP_MSVC
+      // MSVC does not support exception specifications (warning C4290),
+      // and they are deprecated in c++11 anyway.
+#     define EIGEN_EXCEPTION_SPEC(X) throw()
+#   else
+#     define EIGEN_EXCEPTION_SPEC(X) throw(X)
+#   endif
+#endif
+
+#endif // EIGEN_MACROS_H
diff --git a/third-party/Eigen/src/Core/util/Memory.h b/third-party/Eigen/src/Core/util/Memory.h
new file mode 100644
index 00000000..291383c5
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/Memory.h
@@ -0,0 +1,993 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
+// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
+// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/*****************************************************************************
+*** Platform checks for aligned malloc functions                           ***
+*****************************************************************************/
+
+#ifndef EIGEN_MEMORY_H
+#define EIGEN_MEMORY_H
+
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
+// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
+//   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
+// This is true at least since glibc 2.8.
+// This leaves the question how to detect 64-bit. According to this document,
+//   http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
+// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
+// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
+#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
+#else
+  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
+#endif
+
+// FreeBSD 6 seems to have 16-byte aligned malloc
+//   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
+// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
+//   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
+  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
+#else
+  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
+#endif
+
+#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16))     \
+ || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16))   \
+ || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED              \
+ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
+  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
+#else
+  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
+#endif
+
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+EIGEN_DEVICE_FUNC 
+inline void throw_std_bad_alloc()
+{
+  #ifdef EIGEN_EXCEPTIONS
+    throw std::bad_alloc();
+  #else
+    std::size_t huge = static_cast<std::size_t>(-1);
+    ::operator new(huge);
+  #endif
+}
+
+/*****************************************************************************
+*** Implementation of handmade aligned functions                           ***
+*****************************************************************************/
+
+/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
+
+/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
+  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
+  */
+inline void* handmade_aligned_malloc(std::size_t size)
+{
+  void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
+  if (original == 0) return 0;
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+  *(reinterpret_cast<void**>(aligned) - 1) = original;
+  return aligned;
+}
+
+/** \internal Frees memory allocated with handmade_aligned_malloc */
+inline void handmade_aligned_free(void *ptr)
+{
+  if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+}
+
+/** \internal
+  * \brief Reallocates aligned memory.
+  * Since we know that our handmade version is based on std::malloc
+  * we can use std::realloc to implement efficient reallocation.
+  */
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
+{
+  if (ptr == 0) return handmade_aligned_malloc(size);
+  void *original = *(reinterpret_cast<void**>(ptr) - 1);
+  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
+  original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
+  if (original == 0) return 0;
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+  void *previous_aligned = static_cast<char *>(original)+previous_offset;
+  if(aligned!=previous_aligned)
+    std::memmove(aligned, previous_aligned, size);
+  
+  *(reinterpret_cast<void**>(aligned) - 1) = original;
+  return aligned;
+}
+
+/*****************************************************************************
+*** Implementation of portable aligned versions of malloc/free/realloc     ***
+*****************************************************************************/
+
+#ifdef EIGEN_NO_MALLOC
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+  eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
+}
+#elif defined EIGEN_RUNTIME_NO_MALLOC
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
+{
+  static bool value = true;
+  if (update == 1)
+    value = new_value;
+  return value;
+}
+EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
+EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{
+  eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
+}
+#else 
+EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
+{}
+#endif
+
+/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
+  * On allocation error, the returned pointer is null, and std::bad_alloc is thrown.
+  */
+EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
+{
+  check_that_malloc_is_allowed();
+
+  void *result;
+  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+    result = std::malloc(size);
+    #if EIGEN_DEFAULT_ALIGN_BYTES==16
+    eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+    #endif
+  #else
+    result = handmade_aligned_malloc(size);
+  #endif
+
+  if(!result && size)
+    throw_std_bad_alloc();
+
+  return result;
+}
+
+/** \internal Frees memory allocated with aligned_malloc. */
+EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
+{
+  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+    std::free(ptr);
+  #else
+    handmade_aligned_free(ptr);
+  #endif
+}
+
+/**
+  * \internal
+  * \brief Reallocates an aligned block of memory.
+  * \throws std::bad_alloc on allocation failure
+  */
+inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
+{
+  EIGEN_UNUSED_VARIABLE(old_size);
+
+  void *result;
+#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
+  result = std::realloc(ptr,new_size);
+#else
+  result = handmade_aligned_realloc(ptr,new_size,old_size);
+#endif
+
+  if (!result && new_size)
+    throw_std_bad_alloc();
+
+  return result;
+}
+
+/*****************************************************************************
+*** Implementation of conditionally aligned functions                      ***
+*****************************************************************************/
+
+/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
+  * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown.
+  */
+template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
+{
+  return aligned_malloc(size);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
+{
+  check_that_malloc_is_allowed();
+
+  void *result = std::malloc(size);
+  if(!result && size)
+    throw_std_bad_alloc();
+  return result;
+}
+
+/** \internal Frees memory allocated with conditional_aligned_malloc */
+template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
+{
+  aligned_free(ptr);
+}
+
+template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
+{
+  std::free(ptr);
+}
+
+template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
+{
+  return aligned_realloc(ptr, new_size, old_size);
+}
+
+template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
+{
+  return std::realloc(ptr, new_size);
+}
+
+/*****************************************************************************
+*** Construction/destruction of array elements                             ***
+*****************************************************************************/
+
+/** \internal Destructs the elements of an array.
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
+{
+  // always destruct an array starting from the end.
+  if(ptr)
+    while(size) ptr[--size].~T();
+}
+
+/** \internal Constructs the elements of an array.
+  * The \a size parameter tells on how many objects to call the constructor of T.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
+{
+  std::size_t i;
+  EIGEN_TRY
+  {
+      for (i = 0; i < size; ++i) ::new (ptr + i) T;
+      return ptr;
+  }
+  EIGEN_CATCH(...)
+  {
+    destruct_elements_of_array(ptr, i);
+    EIGEN_THROW;
+  }
+  return NULL;
+}
+
+/*****************************************************************************
+*** Implementation of aligned new/delete-like functions                    ***
+*****************************************************************************/
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
+{
+  if(size > std::size_t(-1) / sizeof(T))
+    throw_std_bad_alloc();
+}
+
+/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
+  * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown.
+  * The default constructor of T is called.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
+{
+  check_size_for_overflow<T>(size);
+  T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
+  EIGEN_TRY
+  {
+    return construct_elements_of_array(result, size);
+  }
+  EIGEN_CATCH(...)
+  {
+    aligned_free(result);
+    EIGEN_THROW;
+  }
+  return result;
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
+{
+  check_size_for_overflow<T>(size);
+  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+  EIGEN_TRY
+  {
+    return construct_elements_of_array(result, size);
+  }
+  EIGEN_CATCH(...)
+  {
+    conditional_aligned_free<Align>(result);
+    EIGEN_THROW;
+  }
+  return result;
+}
+
+/** \internal Deletes objects constructed with aligned_new
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
+{
+  destruct_elements_of_array<T>(ptr, size);
+  aligned_free(ptr);
+}
+
+/** \internal Deletes objects constructed with conditional_aligned_new
+  * The \a size parameters tells on how many objects to call the destructor of T.
+  */
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
+{
+  destruct_elements_of_array<T>(ptr, size);
+  conditional_aligned_free<Align>(ptr);
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
+{
+  check_size_for_overflow<T>(new_size);
+  check_size_for_overflow<T>(old_size);
+  if(new_size < old_size)
+    destruct_elements_of_array(pts+new_size, old_size-new_size);
+  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+  if(new_size > old_size)
+  {
+    EIGEN_TRY
+    {
+      construct_elements_of_array(result+old_size, new_size-old_size);
+    }
+    EIGEN_CATCH(...)
+    {
+      conditional_aligned_free<Align>(result);
+      EIGEN_THROW;
+    }
+  }
+  return result;
+}
+
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
+{
+  if(size==0)
+    return 0; // short-cut. Also fixes Bug 884
+  check_size_for_overflow<T>(size);
+  T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
+  if(NumTraits<T>::RequireInitialization)
+  {
+    EIGEN_TRY
+    {
+      construct_elements_of_array(result, size);
+    }
+    EIGEN_CATCH(...)
+    {
+      conditional_aligned_free<Align>(result);
+      EIGEN_THROW;
+    }
+  }
+  return result;
+}
+
+template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
+{
+  check_size_for_overflow<T>(new_size);
+  check_size_for_overflow<T>(old_size);
+  if(NumTraits<T>::RequireInitialization && (new_size < old_size))
+    destruct_elements_of_array(pts+new_size, old_size-new_size);
+  T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
+  if(NumTraits<T>::RequireInitialization && (new_size > old_size))
+  {
+    EIGEN_TRY
+    {
+      construct_elements_of_array(result+old_size, new_size-old_size);
+    }
+    EIGEN_CATCH(...)
+    {
+      conditional_aligned_free<Align>(result);
+      EIGEN_THROW;
+    }
+  }
+  return result;
+}
+
+template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
+{
+  if(NumTraits<T>::RequireInitialization)
+    destruct_elements_of_array<T>(ptr, size);
+  conditional_aligned_free<Align>(ptr);
+}
+
+/****************************************************************************/
+
+/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
+  *
+  * \tparam Alignment requested alignment in Bytes.
+  * \param array the address of the start of the array
+  * \param size the size of the array
+  *
+  * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar,
+  * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If
+  * packet size for the given scalar type is 1, then everything is considered well-aligned.
+  *
+  * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a
+  * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
+  * example with Scalar=double on certain 32-bit platforms, see bug #79.
+  *
+  * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h.
+  * \sa first_default_aligned()
+  */
+template<int Alignment, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
+{
+  const Index ScalarSize = sizeof(Scalar);
+  const Index AlignmentSize = Alignment / ScalarSize;
+  const Index AlignmentMask = AlignmentSize-1;
+
+  if(AlignmentSize<=1)
+  {
+    // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
+    // so that all elements of the array have the same alignment.
+    return 0;
+  }
+  else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
+  {
+    // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
+    // Consequently, no element of the array is well aligned.
+    return size;
+  }
+  else
+  {
+    Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
+    return (first < size) ? first : size;
+  }
+}
+
+/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
+   * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */
+template<typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
+{
+  typedef typename packet_traits<Scalar>::type DefaultPacketType;
+  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
+}
+
+/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
+  */ 
+template<typename Index> 
+inline Index first_multiple(Index size, Index base)
+{
+  return ((size+base-1)/base)*base;
+}
+
+// std::copy is much slower than memcpy, so let's introduce a smart_copy which
+// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
+template<typename T, bool UseMemcpy> struct smart_copy_helper;
+
+template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
+{
+  smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+
+template<typename T> struct smart_copy_helper<T,true> {
+  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+  {
+    IntPtr size = IntPtr(end)-IntPtr(start);
+    if(size==0) return;
+    eigen_internal_assert(start!=0 && end!=0 && target!=0);
+    std::memcpy(target, start, size);
+  }
+};
+
+template<typename T> struct smart_copy_helper<T,false> {
+  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
+  { std::copy(start, end, target); }
+};
+
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. 
+template<typename T, bool UseMemmove> struct smart_memmove_helper;
+
+template<typename T> void smart_memmove(const T* start, const T* end, T* target)
+{
+  smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
+}
+
+template<typename T> struct smart_memmove_helper<T,true> {
+  static inline void run(const T* start, const T* end, T* target)
+  {
+    IntPtr size = IntPtr(end)-IntPtr(start);
+    if(size==0) return;
+    eigen_internal_assert(start!=0 && end!=0 && target!=0);
+    std::memmove(target, start, size);
+  }
+};
+
+template<typename T> struct smart_memmove_helper<T,false> {
+  static inline void run(const T* start, const T* end, T* target)
+  { 
+    if (UIntPtr(target) < UIntPtr(start))
+    {
+      std::copy(start, end, target);
+    }
+    else                                 
+    {
+      std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
+      std::copy_backward(start, end, target + count); 
+    }
+  }
+};
+
+
+/*****************************************************************************
+*** Implementation of runtime stack allocation (falling back to malloc)    ***
+*****************************************************************************/
+
+// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
+// to the appropriate stack allocation function
+#ifndef EIGEN_ALLOCA
+  #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
+    #define EIGEN_ALLOCA alloca
+  #elif EIGEN_COMP_MSVC
+    #define EIGEN_ALLOCA _alloca
+  #endif
+#endif
+
+// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
+// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
+template<typename T> class aligned_stack_memory_handler : noncopyable
+{
+  public:
+    /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
+     * Note that \a ptr can be 0 regardless of the other parameters.
+     * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
+     * In this case, the buffer elements will also be destructed when this handler will be destructed.
+     * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
+     **/
+    aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
+      : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
+    {
+      if(NumTraits<T>::RequireInitialization && m_ptr)
+        Eigen::internal::construct_elements_of_array(m_ptr, size);
+    }
+    ~aligned_stack_memory_handler()
+    {
+      if(NumTraits<T>::RequireInitialization && m_ptr)
+        Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
+      if(m_deallocate)
+        Eigen::internal::aligned_free(m_ptr);
+    }
+  protected:
+    T* m_ptr;
+    std::size_t m_size;
+    bool m_deallocate;
+};
+
+template<typename T> class scoped_array : noncopyable
+{
+  T* m_ptr;
+public:
+  explicit scoped_array(std::ptrdiff_t size)
+  {
+    m_ptr = new T[size];
+  }
+  ~scoped_array()
+  {
+    delete[] m_ptr;
+  }
+  T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
+  const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
+  T* &ptr() { return m_ptr; }
+  const T* ptr() const { return m_ptr; }
+  operator const T*() const { return m_ptr; }
+};
+
+template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
+{
+  std::swap(a.ptr(),b.ptr());
+}
+    
+} // end namespace internal
+
+/** \internal
+  * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+  * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+  * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+  * The allocated buffer is automatically deleted when exiting the scope of this declaration.
+  * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
+  * Here is an example:
+  * \code
+  * {
+  *   ei_declare_aligned_stack_constructed_variable(float,data,size,0);
+  *   // use data[0] to data[size-1]
+  * }
+  * \endcode
+  * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+  */
+#ifdef EIGEN_ALLOCA
+  
+  #if EIGEN_DEFAULT_ALIGN_BYTES>0
+    // We always manually re-align the result of EIGEN_ALLOCA.
+    // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
+    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
+  #else
+    #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
+  #endif
+
+  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+    TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
+               : reinterpret_cast<TYPE*>( \
+                      (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
+                    : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) );  \
+    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+#else
+
+  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
+    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
+    TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE));    \
+    Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+    
+#endif
+
+
+/*****************************************************************************
+*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***
+*****************************************************************************/
+
+#if EIGEN_MAX_ALIGN_BYTES!=0
+  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+      void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
+        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
+        EIGEN_CATCH (...) { return 0; } \
+      }
+  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+      void *operator new(std::size_t size) { \
+        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+      } \
+      void *operator new[](std::size_t size) { \
+        return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
+      } \
+      void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+      /* in-place new and delete. since (at least afaik) there is no actual   */ \
+      /* memory allocated we can safely let the default implementation handle */ \
+      /* this particular case. */ \
+      static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+      void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
+      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
+      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+      void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
+        Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
+      } \
+      typedef void eigen_aligned_operator_new_marker_type;
+#else
+  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#endif
+
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
+
+/****************************************************************************/
+
+/** \class aligned_allocator
+* \ingroup Core_Module
+*
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+*
+* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
+* By default, it will thus provide at least 16 bytes alignment and more in following cases:
+*  - 32 bytes alignment if AVX is enabled.
+*  - 64 bytes alignment if AVX512 is enabled.
+*
+* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
+* \link TopicPreprocessorDirectivesPerformance there \endlink.
+*
+* Example:
+* \code
+* // Matrix4f requires 16 bytes alignment:
+* std::map< int, Matrix4f, std::less<int>, 
+*           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
+* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
+* std::map< int, Vector3f > my_map_vec3;
+* \endcode
+*
+* \sa \blank \ref TopicStlContainers.
+*/
+template<class T>
+class aligned_allocator : public std::allocator<T>
+{
+public:
+  typedef std::size_t     size_type;
+  typedef std::ptrdiff_t  difference_type;
+  typedef T*              pointer;
+  typedef const T*        const_pointer;
+  typedef T&              reference;
+  typedef const T&        const_reference;
+  typedef T               value_type;
+
+  template<class U>
+  struct rebind
+  {
+    typedef aligned_allocator<U> other;
+  };
+
+  aligned_allocator() : std::allocator<T>() {}
+
+  aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
+
+  template<class U>
+  aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
+
+  ~aligned_allocator() {}
+
+  pointer allocate(size_type num, const void* /*hint*/ = 0)
+  {
+    internal::check_size_for_overflow<T>(num);
+    size_type size = num * sizeof(T);
+#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
+    // workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+    // It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+    if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
+      return 0;
+    else
+#endif
+      return static_cast<pointer>( internal::aligned_malloc(size) );
+  }
+
+  void deallocate(pointer p, size_type /*num*/)
+  {
+    internal::aligned_free(p);
+  }
+};
+
+//---------- Cache sizes ----------
+
+#if !defined(EIGEN_NO_CPUID)
+#  if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
+#    if defined(__PIC__) && EIGEN_ARCH_i386
+       // Case for x86 with PIC
+#      define EIGEN_CPUID(abcd,func,id) \
+         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+#    elif defined(__PIC__) && EIGEN_ARCH_x86_64
+       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+#      define EIGEN_CPUID(abcd,func,id) \
+        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
+#    else
+       // Case for x86_64 or x86 w/o PIC
+#      define EIGEN_CPUID(abcd,func,id) \
+         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
+#    endif
+#  elif EIGEN_COMP_MSVC
+#    if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
+#      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
+#    endif
+#  endif
+#endif
+
+namespace internal {
+
+#ifdef EIGEN_CPUID
+
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
+{
+  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
+}
+
+inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
+{
+  int abcd[4];
+  l1 = l2 = l3 = 0;
+  int cache_id = 0;
+  int cache_type = 0;
+  do {
+    abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+    EIGEN_CPUID(abcd,0x4,cache_id);
+    cache_type  = (abcd[0] & 0x0F) >> 0;
+    if(cache_type==1||cache_type==3) // data or unified cache
+    {
+      int cache_level = (abcd[0] & 0xE0) >> 5;  // A[7:5]
+      int ways        = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
+      int partitions  = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
+      int line_size   = (abcd[1] & 0x00000FFF) >>  0; // B[11:0]
+      int sets        = (abcd[2]);                    // C[31:0]
+
+      int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
+
+      switch(cache_level)
+      {
+        case 1: l1 = cache_size; break;
+        case 2: l2 = cache_size; break;
+        case 3: l3 = cache_size; break;
+        default: break;
+      }
+    }
+    cache_id++;
+  } while(cache_type>0 && cache_id<16);
+}
+
+inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
+{
+  int abcd[4];
+  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+  l1 = l2 = l3 = 0;
+  EIGEN_CPUID(abcd,0x00000002,0);
+  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
+  bool check_for_p2_core2 = false;
+  for(int i=0; i<14; ++i)
+  {
+    switch(bytes[i])
+    {
+      case 0x0A: l1 = 8; break;   // 0Ah   data L1 cache, 8 KB, 2 ways, 32 byte lines
+      case 0x0C: l1 = 16; break;  // 0Ch   data L1 cache, 16 KB, 4 ways, 32 byte lines
+      case 0x0E: l1 = 24; break;  // 0Eh   data L1 cache, 24 KB, 6 ways, 64 byte lines
+      case 0x10: l1 = 16; break;  // 10h   data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+      case 0x15: l1 = 16; break;  // 15h   code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
+      case 0x2C: l1 = 32; break;  // 2Ch   data L1 cache, 32 KB, 8 ways, 64 byte lines
+      case 0x30: l1 = 32; break;  // 30h   code L1 cache, 32 KB, 8 ways, 64 byte lines
+      case 0x60: l1 = 16; break;  // 60h   data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
+      case 0x66: l1 = 8; break;   // 66h   data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
+      case 0x67: l1 = 16; break;  // 67h   data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
+      case 0x68: l1 = 32; break;  // 68h   data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
+      case 0x1A: l2 = 96; break;   // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
+      case 0x22: l3 = 512; break;   // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
+      case 0x23: l3 = 1024; break;   // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x25: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x29: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x39: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
+      case 0x3A: l2 = 192; break;   // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
+      case 0x3B: l2 = 128; break;   // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
+      case 0x3C: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
+      case 0x3D: l2 = 384; break;   // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
+      case 0x3E: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
+      case 0x40: l2 = 0; break;   // no integrated L2 cache (P6 core) or L3 cache (P4 core)
+      case 0x41: l2 = 128; break;   // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
+      case 0x42: l2 = 256; break;   // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
+      case 0x43: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
+      case 0x44: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
+      case 0x45: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
+      case 0x46: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
+      case 0x47: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
+      case 0x48: l2 = 3072; break;   // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
+      case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
+      case 0x4A: l3 = 6144; break;   // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
+      case 0x4B: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
+      case 0x4C: l3 = 12288; break;   // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
+      case 0x4D: l3 = 16384; break;   // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
+      case 0x4E: l2 = 6144; break;   // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
+      case 0x78: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
+      case 0x79: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x7A: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x7B: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x7C: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
+      case 0x7D: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
+      case 0x7E: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
+      case 0x7F: l2 = 512; break;   // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
+      case 0x80: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
+      case 0x81: l2 = 128; break;   // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
+      case 0x82: l2 = 256; break;   // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
+      case 0x83: l2 = 512; break;   // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
+      case 0x84: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
+      case 0x85: l2 = 2048; break;   // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
+      case 0x86: l2 = 512; break;   // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
+      case 0x87: l2 = 1024; break;   // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
+      case 0x88: l3 = 2048; break;   // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
+      case 0x89: l3 = 4096; break;   // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
+      case 0x8A: l3 = 8192; break;   // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
+      case 0x8D: l3 = 3072; break;   // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
+
+      default: break;
+    }
+  }
+  if(check_for_p2_core2 && l2 == l3)
+    l3 = 0;
+  l1 *= 1024;
+  l2 *= 1024;
+  l3 *= 1024;
+}
+
+inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
+{
+  if(max_std_funcs>=4)
+    queryCacheSizes_intel_direct(l1,l2,l3);
+  else
+    queryCacheSizes_intel_codes(l1,l2,l3);
+}
+
+inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
+{
+  int abcd[4];
+  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+  EIGEN_CPUID(abcd,0x80000005,0);
+  l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+  EIGEN_CPUID(abcd,0x80000006,0);
+  l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+  l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+}
+#endif
+
+/** \internal
+ * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */
+inline void queryCacheSizes(int& l1, int& l2, int& l3)
+{
+  #ifdef EIGEN_CPUID
+  int abcd[4];
+  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
+
+  // identify the CPU vendor
+  EIGEN_CPUID(abcd,0x0,0);
+  int max_std_funcs = abcd[1];
+  if(cpuid_is_vendor(abcd,GenuineIntel))
+    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
+    queryCacheSizes_amd(l1,l2,l3);
+  else
+    // by default let's use Intel's API
+    queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
+
+  // here is the list of other vendors:
+//   ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
+//   ||cpuid_is_vendor(abcd,"CyrixInstead")
+//   ||cpuid_is_vendor(abcd,"CentaurHauls")
+//   ||cpuid_is_vendor(abcd,"GenuineTMx86")
+//   ||cpuid_is_vendor(abcd,"TransmetaCPU")
+//   ||cpuid_is_vendor(abcd,"RiseRiseRise")
+//   ||cpuid_is_vendor(abcd,"Geode by NSC")
+//   ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
+//   ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
+//   ||cpuid_is_vendor(abcd,"NexGenDriven")
+  #else
+  l1 = l2 = l3 = -1;
+  #endif
+}
+
+/** \internal
+ * \returns the size in Bytes of the L1 data cache */
+inline int queryL1CacheSize()
+{
+  int l1(-1), l2, l3;
+  queryCacheSizes(l1,l2,l3);
+  return l1;
+}
+
+/** \internal
+ * \returns the size in Bytes of the L2 or L3 cache if this later is present */
+inline int queryTopLevelCacheSize()
+{
+  int l1, l2(-1), l3(-1);
+  queryCacheSizes(l1,l2,l3);
+  return (std::max)(l2,l3);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MEMORY_H
diff --git a/third-party/Eigen/src/Core/util/Meta.h b/third-party/Eigen/src/Core/util/Meta.h
new file mode 100644
index 00000000..0a0a863a
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/Meta.h
@@ -0,0 +1,575 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_META_H
+#define EIGEN_META_H
+
+#if defined(__CUDA_ARCH__)
+#include <cfloat>
+#include <math_constants.h>
+#endif
+
+// Recent versions of ICC require <cstdint> for pointer types below.
+#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11)
+
+// Define portable (u)int{32,64} types
+#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT
+#include <cstdint>
+namespace Eigen {
+namespace numext {
+typedef std::uint8_t  uint8_t;
+typedef std::int8_t   int8_t;
+typedef std::uint16_t uint16_t;
+typedef std::int16_t  int16_t;
+typedef std::uint32_t uint32_t;
+typedef std::int32_t  int32_t;
+typedef std::uint64_t uint64_t;
+typedef std::int64_t  int64_t;
+}
+}
+#else
+// Without c++11, all compilers able to compile Eigen also
+// provide the C99 stdint.h header file.
+#include <stdint.h>
+namespace Eigen {
+namespace numext {
+typedef ::uint8_t  uint8_t;
+typedef ::int8_t   int8_t;
+typedef ::uint16_t uint16_t;
+typedef ::int16_t  int16_t;
+typedef ::uint32_t uint32_t;
+typedef ::int32_t  int32_t;
+typedef ::uint64_t uint64_t;
+typedef ::int64_t  int64_t;
+}
+}
+#endif
+
+namespace Eigen {
+
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex;
+
+/**
+ * \brief The Index type as used for the API.
+ * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+ * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex.
+ */
+
+typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index;
+
+namespace internal {
+
+/** \internal
+  * \file Meta.h
+  * This file contains generic metaprogramming classes which are not specifically related to Eigen.
+  * \note In case you wonder, yes we're aware that Boost already provides all these features,
+  * we however don't want to add a dependency to Boost.
+  */
+
+// Only recent versions of ICC complain about using ptrdiff_t to hold pointers,
+// and older versions do not provide *intptr_t types.
+#if EIGEN_ICC_NEEDS_CSTDINT
+typedef std::intptr_t  IntPtr;
+typedef std::uintptr_t UIntPtr;
+#else
+typedef std::ptrdiff_t IntPtr;
+typedef std::size_t UIntPtr;
+#endif
+#undef EIGEN_ICC_NEEDS_CSTDINT
+
+struct true_type {  enum { value = 1 }; };
+struct false_type { enum { value = 0 }; };
+
+template<bool Condition, typename Then, typename Else>
+struct conditional { typedef Then type; };
+
+template<typename Then, typename Else>
+struct conditional <false, Then, Else> { typedef Else type; };
+
+template<typename T, typename U> struct is_same { enum { value = 0 }; };
+template<typename T> struct is_same<T,T> { enum { value = 1 }; };
+
+template<typename T> struct remove_reference { typedef T type; };
+template<typename T> struct remove_reference<T&> { typedef T type; };
+
+template<typename T> struct remove_pointer { typedef T type; };
+template<typename T> struct remove_pointer<T*> { typedef T type; };
+template<typename T> struct remove_pointer<T*const> { typedef T type; };
+
+template <class T> struct remove_const { typedef T type; };
+template <class T> struct remove_const<const T> { typedef T type; };
+template <class T> struct remove_const<const T[]> { typedef T type[]; };
+template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; };
+
+template<typename T> struct remove_all { typedef T type; };
+template<typename T> struct remove_all<const T>   { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const&>  { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T&>        { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T const*>  { typedef typename remove_all<T>::type type; };
+template<typename T> struct remove_all<T*>        { typedef typename remove_all<T>::type type; };
+
+template<typename T> struct is_arithmetic      { enum { value = false }; };
+template<> struct is_arithmetic<float>         { enum { value = true }; };
+template<> struct is_arithmetic<double>        { enum { value = true }; };
+template<> struct is_arithmetic<long double>   { enum { value = true }; };
+template<> struct is_arithmetic<bool>          { enum { value = true }; };
+template<> struct is_arithmetic<char>          { enum { value = true }; };
+template<> struct is_arithmetic<signed char>   { enum { value = true }; };
+template<> struct is_arithmetic<unsigned char> { enum { value = true }; };
+template<> struct is_arithmetic<signed short>  { enum { value = true }; };
+template<> struct is_arithmetic<unsigned short>{ enum { value = true }; };
+template<> struct is_arithmetic<signed int>    { enum { value = true }; };
+template<> struct is_arithmetic<unsigned int>  { enum { value = true }; };
+template<> struct is_arithmetic<signed long>   { enum { value = true }; };
+template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
+
+#if EIGEN_HAS_CXX11
+using std::is_integral;
+#else
+template<typename T> struct is_integral        { enum { value = false }; };
+template<> struct is_integral<bool>            { enum { value = true }; };
+template<> struct is_integral<char>            { enum { value = true }; };
+template<> struct is_integral<signed char>     { enum { value = true }; };
+template<> struct is_integral<unsigned char>   { enum { value = true }; };
+template<> struct is_integral<signed short>    { enum { value = true }; };
+template<> struct is_integral<unsigned short>  { enum { value = true }; };
+template<> struct is_integral<signed int>      { enum { value = true }; };
+template<> struct is_integral<unsigned int>    { enum { value = true }; };
+template<> struct is_integral<signed long>     { enum { value = true }; };
+template<> struct is_integral<unsigned long>   { enum { value = true }; };
+#if EIGEN_COMP_MSVC
+template<> struct is_integral<signed __int64>  { enum { value = true }; };
+template<> struct is_integral<unsigned __int64>{ enum { value = true }; };
+#endif
+#endif
+
+#if EIGEN_HAS_CXX11
+using std::make_unsigned;
+#else
+// TODO: Possibly improve this implementation of make_unsigned.
+// It is currently used only by
+// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
+template<typename> struct make_unsigned;
+template<> struct make_unsigned<char>             { typedef unsigned char type; };
+template<> struct make_unsigned<signed char>      { typedef unsigned char type; };
+template<> struct make_unsigned<unsigned char>    { typedef unsigned char type; };
+template<> struct make_unsigned<signed short>     { typedef unsigned short type; };
+template<> struct make_unsigned<unsigned short>   { typedef unsigned short type; };
+template<> struct make_unsigned<signed int>       { typedef unsigned int type; };
+template<> struct make_unsigned<unsigned int>     { typedef unsigned int type; };
+template<> struct make_unsigned<signed long>      { typedef unsigned long type; };
+template<> struct make_unsigned<unsigned long>    { typedef unsigned long type; };
+#if EIGEN_COMP_MSVC
+template<> struct make_unsigned<signed __int64>   { typedef unsigned __int64 type; };
+template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
+#endif
+#endif
+
+template <typename T> struct add_const { typedef const T type; };
+template <typename T> struct add_const<T&> { typedef T& type; };
+
+template <typename T> struct is_const { enum { value = 0 }; };
+template <typename T> struct is_const<T const> { enum { value = 1 }; };
+
+template<typename T> struct add_const_on_value_type            { typedef const T type;  };
+template<typename T> struct add_const_on_value_type<T&>        { typedef T const& type; };
+template<typename T> struct add_const_on_value_type<T*>        { typedef T const* type; };
+template<typename T> struct add_const_on_value_type<T* const>  { typedef T const* const type; };
+template<typename T> struct add_const_on_value_type<T const* const>  { typedef T const* const type; };
+
+
+template<typename From, typename To>
+struct is_convertible_impl
+{
+private:
+  struct any_conversion
+  {
+    template <typename T> any_conversion(const volatile T&);
+    template <typename T> any_conversion(T&);
+  };
+  struct yes {int a[1];};
+  struct no  {int a[2];};
+
+  static yes test(const To&, int);
+  static no  test(any_conversion, ...);
+
+public:
+  static From ms_from;
+#ifdef __INTEL_COMPILER
+  #pragma warning push
+  #pragma warning ( disable : 2259 )
+#endif
+  enum { value = sizeof(test(ms_from, 0))==sizeof(yes) };
+#ifdef __INTEL_COMPILER
+  #pragma warning pop
+#endif
+};
+
+template<typename From, typename To>
+struct is_convertible
+{
+  enum { value = is_convertible_impl<typename remove_all<From>::type,
+                                     typename remove_all<To  >::type>::value };
+};
+
+/** \internal Allows to enable/disable an overload
+  * according to a compile time condition.
+  */
+template<bool Condition, typename T=void> struct enable_if;
+
+template<typename T> struct enable_if<true,T>
+{ typedef T type; };
+
+#if defined(__CUDA_ARCH__)
+#if !defined(__FLT_EPSILON__)
+#define __FLT_EPSILON__ FLT_EPSILON
+#define __DBL_EPSILON__ DBL_EPSILON
+#endif
+
+namespace device {
+
+template<typename T> struct numeric_limits
+{
+  EIGEN_DEVICE_FUNC
+  static T epsilon() { return 0; }
+  static T (max)() { assert(false && "Highest not supported for this type"); }
+  static T (min)() { assert(false && "Lowest not supported for this type"); }
+  static T infinity() { assert(false && "Infinity not supported for this type"); }
+  static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); }
+};
+template<> struct numeric_limits<float>
+{
+  EIGEN_DEVICE_FUNC
+  static float epsilon() { return __FLT_EPSILON__; }
+  EIGEN_DEVICE_FUNC
+  static float (max)() { return CUDART_MAX_NORMAL_F; }
+  EIGEN_DEVICE_FUNC
+  static float (min)() { return FLT_MIN; }
+  EIGEN_DEVICE_FUNC
+  static float infinity() { return CUDART_INF_F; }
+  EIGEN_DEVICE_FUNC
+  static float quiet_NaN() { return CUDART_NAN_F; }
+};
+template<> struct numeric_limits<double>
+{
+  EIGEN_DEVICE_FUNC
+  static double epsilon() { return __DBL_EPSILON__; }
+  EIGEN_DEVICE_FUNC
+  static double (max)() { return DBL_MAX; }
+  EIGEN_DEVICE_FUNC
+  static double (min)() { return DBL_MIN; }
+  EIGEN_DEVICE_FUNC
+  static double infinity() { return CUDART_INF; }
+  EIGEN_DEVICE_FUNC
+  static double quiet_NaN() { return CUDART_NAN; }
+};
+template<> struct numeric_limits<int>
+{
+  EIGEN_DEVICE_FUNC
+  static int epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static int (max)() { return INT_MAX; }
+  EIGEN_DEVICE_FUNC
+  static int (min)() { return INT_MIN; }
+};
+template<> struct numeric_limits<unsigned int>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned int epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned int (max)() { return UINT_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned int (min)() { return 0; }
+};
+template<> struct numeric_limits<long>
+{
+  EIGEN_DEVICE_FUNC
+  static long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static long (max)() { return LONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static long (min)() { return LONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long (max)() { return ULONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long (min)() { return 0; }
+};
+template<> struct numeric_limits<long long>
+{
+  EIGEN_DEVICE_FUNC
+  static long long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static long long (max)() { return LLONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static long long (min)() { return LLONG_MIN; }
+};
+template<> struct numeric_limits<unsigned long long>
+{
+  EIGEN_DEVICE_FUNC
+  static unsigned long long epsilon() { return 0; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long long (max)() { return ULLONG_MAX; }
+  EIGEN_DEVICE_FUNC
+  static unsigned long long (min)() { return 0; }
+};
+
+}
+
+#endif
+
+/** \internal
+  * A base class do disable default copy ctor and copy assignement operator.
+  */
+class noncopyable
+{
+  EIGEN_DEVICE_FUNC noncopyable(const noncopyable&);
+  EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&);
+protected:
+  EIGEN_DEVICE_FUNC noncopyable() {}
+  EIGEN_DEVICE_FUNC ~noncopyable() {}
+};
+
+/** \internal
+  * Convenient struct to get the result type of a unary or binary functor.
+  *
+  * It supports both the current STL mechanism (using the result_type member) as well as
+  * upcoming next STL generation (using a templated result member).
+  * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
+  */
+#if EIGEN_HAS_STD_RESULT_OF
+template<typename T> struct result_of {
+  typedef typename std::result_of<T>::type type1;
+  typedef typename remove_all<type1>::type type;
+};
+#else
+template<typename T> struct result_of { };
+
+struct has_none {int a[1];};
+struct has_std_result_type {int a[2];};
+struct has_tr1_result {int a[3];};
+
+template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
+struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
+
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType>
+struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;};
+
+template<typename Func, typename ArgType>
+struct result_of<Func(ArgType)> {
+    template<typename T>
+    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
+    template<typename T>
+    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0);
+    static has_none               testFunctor(...);
+
+    // note that the following indirection is needed for gcc-3.3
+    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+    typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type;
+};
+
+template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
+struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1>
+struct result_of<Func(ArgType0,ArgType1)> {
+    template<typename T>
+    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
+    template<typename T>
+    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0);
+    static has_none               testFunctor(...);
+
+    // note that the following indirection is needed for gcc-3.3
+    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+    typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
+};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
+struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
+    template<typename T>
+    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
+    template<typename T>
+    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
+    static has_none               testFunctor(...);
+
+    // note that the following indirection is needed for gcc-3.3
+    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+    typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
+};
+#endif
+
+struct meta_yes { char a[1]; };
+struct meta_no  { char a[2]; };
+
+// Check whether T::ReturnType does exist
+template <typename T>
+struct has_ReturnType
+{
+  template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
+  template <typename C> static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
+};
+
+template<typename T> const T* return_ptr();
+
+template <typename T, typename IndexType=Index>
+struct has_nullary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0);
+  static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+
+template <typename T, typename IndexType=Index>
+struct has_unary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0);
+  static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+
+template <typename T, typename IndexType=Index>
+struct has_binary_operator
+{
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
+  static meta_no testFunctor(...);
+
+  enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
+};
+
+/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
+  * Usage example: \code meta_sqrt<1023>::ret \endcode
+  */
+template<int Y,
+         int InfX = 0,
+         int SupX = ((Y==1) ? 1 : Y/2),
+         bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) >
+                                // use ?: instead of || just to shut up a stupid gcc 4.3 warning
+class meta_sqrt
+{
+    enum {
+      MidX = (InfX+SupX)/2,
+      TakeInf = MidX*MidX > Y ? 1 : 0,
+      NewInf = int(TakeInf) ? InfX : int(MidX),
+      NewSup = int(TakeInf) ? int(MidX) : SupX
+    };
+  public:
+    enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret };
+};
+
+template<int Y, int InfX, int SupX>
+class meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
+
+
+/** \internal Computes the least common multiple of two positive integer A and B
+  * at compile-time. It implements a naive algorithm testing all multiples of A.
+  * It thus works better if A>=B.
+  */
+template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
+struct meta_least_common_multiple
+{
+  enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
+};
+template<int A, int B, int K>
+struct meta_least_common_multiple<A,B,K,true>
+{
+  enum { ret = A*K };
+};
+
+/** \internal determines whether the product of two numeric types is allowed and what the return type is */
+template<typename T, typename U> struct scalar_product_traits
+{
+  enum { Defined = 0 };
+};
+
+// FIXME quick workaround around current limitation of result_of
+// template<typename Scalar, typename ArgType0, typename ArgType1>
+// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
+// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
+// };
+
+} // end namespace internal
+
+namespace numext {
+  
+#if defined(__CUDA_ARCH__)
+template<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
+#else
+template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
+#endif
+
+#if defined(__CUDA_ARCH__)
+using internal::device::numeric_limits;
+#else
+using std::numeric_limits;
+#endif
+
+// Integer division with rounding up.
+// T is assumed to be an integer type with a>=0, and b>0
+template<typename T>
+T div_ceil(const T &a, const T &b)
+{
+  return (a+b-1) / b;
+}
+
+// The aim of the following functions is to bypass -Wfloat-equal warnings
+// when we really want a strict equality comparison on floating points.
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool equal_strict(const X& x,const Y& y) { return x == y; }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
+
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool not_equal_strict(const X& x,const Y& y) { return x != y; }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
+
+} // end namespace numext
+
+} // end namespace Eigen
+
+#endif // EIGEN_META_H
diff --git a/third-party/Eigen/src/Core/util/NonMPL2.h b/third-party/Eigen/src/Core/util/NonMPL2.h
new file mode 100644
index 00000000..1af67cf1
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/NonMPL2.h
@@ -0,0 +1,3 @@
+#ifdef EIGEN_MPL2_ONLY
+#error Including non-MPL2 code in EIGEN_MPL2_ONLY mode
+#endif
diff --git a/third-party/Eigen/src/Core/util/ReenableStupidWarnings.h b/third-party/Eigen/src/Core/util/ReenableStupidWarnings.h
new file mode 100644
index 00000000..1ce6fd1b
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -0,0 +1,31 @@
+#ifdef EIGEN_WARNINGS_DISABLED_2
+// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet!
+#  undef EIGEN_WARNINGS_DISABLED_2
+
+#elif defined(EIGEN_WARNINGS_DISABLED)
+#undef EIGEN_WARNINGS_DISABLED
+
+#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+  #ifdef _MSC_VER
+    #pragma warning( pop )
+  #elif defined __INTEL_COMPILER
+    #pragma warning pop
+  #elif defined __clang__
+    #pragma clang diagnostic pop
+  #elif defined __GNUC__  &&  (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+    #pragma GCC diagnostic pop
+  #endif
+
+  #if defined __NVCC__
+//    Don't reenable the diagnostic messages, as it turns out these messages need
+//    to be disabled at the point of the template instantiation (i.e the user code)
+//    otherwise they'll be triggered by nvcc.
+//    #pragma diag_default code_is_unreachable
+//    #pragma diag_default initialization_not_reachable
+//    #pragma diag_default 2651
+//    #pragma diag_default 2653
+  #endif
+
+#endif
+
+#endif // EIGEN_WARNINGS_DISABLED
diff --git a/third-party/Eigen/src/Core/util/StaticAssert.h b/third-party/Eigen/src/Core/util/StaticAssert.h
new file mode 100644
index 00000000..500e4779
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/StaticAssert.h
@@ -0,0 +1,218 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STATIC_ASSERT_H
+#define EIGEN_STATIC_ASSERT_H
+
+/* Some notes on Eigen's static assertion mechanism:
+ *
+ *  - in EIGEN_STATIC_ASSERT(CONDITION,MSG) the parameter CONDITION must be a compile time boolean
+ *    expression, and MSG an enum listed in struct internal::static_assertion<true>
+ *
+ *  - define EIGEN_NO_STATIC_ASSERT to disable them (and save compilation time)
+ *    in that case, the static assertion is converted to the following runtime assert:
+ *      eigen_assert(CONDITION && "MSG")
+ *
+ *  - currently EIGEN_STATIC_ASSERT can only be used in function scope
+ *
+ */
+
+#ifndef EIGEN_STATIC_ASSERT
+#ifndef EIGEN_NO_STATIC_ASSERT
+
+  #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
+
+    // if native static_assert is enabled, let's use it
+    #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
+
+  #else // not CXX0X
+
+    namespace Eigen {
+
+    namespace internal {
+
+    template<bool condition>
+    struct static_assertion {};
+
+    template<>
+    struct static_assertion<true>
+    {
+      enum {
+        YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
+        YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
+        YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
+        THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
+        THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
+        THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
+        OUT_OF_RANGE_ACCESS=1,
+        YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
+        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+        YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
+        YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
+        UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
+        THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
+        FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
+        NUMERIC_TYPE_MUST_BE_REAL=1,
+        COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
+        WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
+        THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
+        INVALID_MATRIX_PRODUCT=1,
+        INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
+        INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
+        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
+        THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
+        THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
+        INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
+        INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
+        BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
+        THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
+        THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
+        YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
+        INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
+        THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
+        PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
+        THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
+        YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
+        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
+        THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
+        YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
+        THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
+        THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
+        THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
+        THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
+        YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
+        YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
+        THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
+        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
+        IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
+        STORAGE_LAYOUT_DOES_NOT_MATCH=1,
+        EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
+        THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
+        MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
+        THIS_TYPE_IS_NOT_SUPPORTED=1,
+        STORAGE_KIND_MUST_MATCH=1,
+        STORAGE_INDEX_MUST_MATCH=1,
+        CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
+        SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
+      };
+    };
+
+    } // end namespace internal
+
+    } // end namespace Eigen
+
+    // Specialized implementation for MSVC to avoid "conditional
+    // expression is constant" warnings.  This implementation doesn't
+    // appear to work under GCC, hence the multiple implementations.
+    #if EIGEN_COMP_MSVC
+
+      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+        {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;}
+
+    #else
+      // In some cases clang interprets bool(CONDITION) as function declaration
+      #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \
+        if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {}
+
+    #endif
+
+  #endif // not CXX0X
+
+#else // EIGEN_NO_STATIC_ASSERT
+
+  #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
+
+#endif // EIGEN_NO_STATIC_ASSERT
+#endif // EIGEN_STATIC_ASSERT
+
+// static assertion failing if the type \a TYPE is not a vector type
+#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
+  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \
+                      YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX)
+
+// static assertion failing if the type \a TYPE is not fixed-size
+#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \
+  EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \
+                      YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR)
+
+// static assertion failing if the type \a TYPE is not dynamic-size
+#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \
+  EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \
+                      YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR)
+
+// static assertion failing if the type \a TYPE is not a vector type of the given size
+#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \
+  EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \
+                      THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE)
+
+// static assertion failing if the type \a TYPE is not a vector type of the given size
+#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \
+  EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \
+                      THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE)
+
+// static assertion failing if the two vector expression types are not compatible (same fixed-size or dynamic size)
+#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \
+  EIGEN_STATIC_ASSERT( \
+      (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \
+    || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \
+    || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\
+    YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES)
+
+#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+     ( \
+        (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \
+    || (\
+          (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \
+        || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \
+        || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \
+      &&  (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \
+        || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \
+        || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\
+       ) \
+     )
+
+#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
+    EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+
+
+// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes
+#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \
+  EIGEN_STATIC_ASSERT( \
+     EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\
+    YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
+
+#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
+      EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
+                          (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
+                          THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
+
+#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
+      EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \
+                          THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY)
+
+#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \
+      EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \
+                          THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES)
+
+#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \
+      EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \
+                                             typename Eigen::internal::traits<Derived2>::XprKind \
+                                            >::value), \
+                          YOU_CANNOT_MIX_ARRAYS_AND_MATRICES)
+
+// Check that a cost value is positive, and that is stay within a reasonable range
+// TODO this check could be enabled for internal debugging only
+#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \
+      EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE);
+
+#endif // EIGEN_STATIC_ASSERT_H
diff --git a/third-party/Eigen/src/Core/util/XprHelper.h b/third-party/Eigen/src/Core/util/XprHelper.h
new file mode 100644
index 00000000..6bb49708
--- /dev/null
+++ b/third-party/Eigen/src/Core/util/XprHelper.h
@@ -0,0 +1,838 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_XPRHELPER_H
+#define EIGEN_XPRHELPER_H
+
+// just a workaround because GCC seems to not really like empty structs
+// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled
+// so currently we simply disable this optimization for gcc 4.3
+#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3)
+  #define EIGEN_EMPTY_STRUCT_CTOR(X) \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
+#else
+  #define EIGEN_EMPTY_STRUCT_CTOR(X)
+#endif
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename IndexDest, typename IndexSrc>
+EIGEN_DEVICE_FUNC
+inline IndexDest convert_index(const IndexSrc& idx) {
+  // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away:
+  eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type");
+  return IndexDest(idx);
+}
+
+// true if T can be considered as an integral index (i.e., and integral type or enum)
+template<typename T> struct is_valid_index_type
+{
+  enum { value =
+#if EIGEN_HAS_TYPE_TRAITS
+    internal::is_integral<T>::value || std::is_enum<T>::value
+#elif EIGEN_COMP_MSVC
+    internal::is_integral<T>::value || __is_enum(T)
+#else
+    // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index.
+    internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value
+#endif
+  };
+};
+
+// promote_scalar_arg is an helper used in operation between an expression and a scalar, like:
+//    expression * scalar
+// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression.
+// The IsSupported template parameter must be provided by the caller as: internal::has_ReturnType<ScalarBinaryOpTraits<ExprScalar,T,op> >::value using the proper order for ExprScalar and T.
+// Then the logic is as follows:
+//  - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned.
+//  - otherwise, NumTraits<ExprScalar>::Literal is returned if T is implicitly convertible to NumTraits<ExprScalar>::Literal AND that this does not imply a float to integer conversion.
+//  - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion.
+//  - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE).
+template<typename ExprScalar,typename T, bool IsSupported>
+struct promote_scalar_arg;
+
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,true>
+{
+  typedef T type;
+};
+
+// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different.
+template<typename ExprScalar,typename T,typename PromotedType,
+  bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,
+  bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>
+struct promote_scalar_arg_unsupported;
+
+// Start recursion with NumTraits<ExprScalar>::Literal
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};
+
+// We found a match!
+template<typename S,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>
+{
+  typedef PromotedType type;
+};
+
+// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different,
+// so let's try to promote to ExprScalar
+template<typename ExprScalar,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>
+   : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>
+{};
+
+// Unsafe real-to-integer, let's stop.
+template<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {};
+
+// T is not even convertible to ExprScalar, let's stop.
+template<typename S,typename T>
+struct promote_scalar_arg_unsupported<S,T,S,false,true> {};
+
+//classes inheriting no_assignment_operator don't generate a default operator=.
+class no_assignment_operator
+{
+  private:
+    no_assignment_operator& operator=(const no_assignment_operator&);
+  protected:
+    EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator)
+    EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator)
+};
+
+/** \internal return the index type with the largest number of bits */
+template<typename I1, typename I2>
+struct promote_index_type
+{
+  typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type;
+};
+
+/** \internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that
+  * can be accessed using value() and setValue().
+  * Otherwise, this class is an empty structure and value() just returns the template parameter Value.
+  */
+template<typename T, int Value> class variable_if_dynamic
+{
+  public:
+    EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+
+template<typename T> class variable_if_dynamic<T, Dynamic>
+{
+    T m_value;
+    EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
+  public:
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+
+/** \internal like variable_if_dynamic but for DynamicIndex
+  */
+template<typename T, int Value> class variable_if_dynamicindex
+{
+  public:
+    EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+};
+
+template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
+{
+    T m_value;
+    EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
+  public:
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {}
+    EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
+};
+
+template<typename T> struct functor_traits
+{
+  enum
+  {
+    Cost = 10,
+    PacketAccess = false,
+    IsRepeatable = false
+  };
+};
+
+template<typename T> struct packet_traits;
+
+template<typename T> struct unpacket_traits
+{
+  typedef T type;
+  typedef T half;
+  enum
+  {
+    size = 1,
+    alignment = 1
+  };
+};
+
+template<int Size, typename PacketType,
+         bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
+struct find_best_packet_helper;
+
+template< int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,true>
+{
+  typedef PacketType type;
+};
+
+template<int Size, typename PacketType>
+struct find_best_packet_helper<Size,PacketType,false>
+{
+  typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type;
+};
+
+template<typename T, int Size>
+struct find_best_packet
+{
+  typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type;
+};
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+template<int ArrayBytes, int AlignmentBytes,
+         bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),
+         bool TryHalf   =  bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) >
+struct compute_default_alignment_helper
+{
+  enum { value = 0 };
+};
+
+template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
+{
+  enum { value = AlignmentBytes };
+};
+
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half
+{
+  // current packet too large, try with an half-packet
+  enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value };
+};
+#else
+// If static alignment is disabled, no need to bother.
+// This also avoids a division by zero in "bool Match =  bool((ArrayBytes%AlignmentBytes)==0)"
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment_helper
+{
+  enum { value = 0 };
+};
+#endif
+
+template<typename T, int Size> struct compute_default_alignment {
+  enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value };
+};
+
+template<typename T> struct compute_default_alignment<T,Dynamic> {
+  enum { value = EIGEN_MAX_ALIGN_BYTES };
+};
+
+template<typename _Scalar, int _Rows, int _Cols,
+         int _Options = AutoAlign |
+                          ( (_Rows==1 && _Cols!=1) ? RowMajor
+                          : (_Cols==1 && _Rows!=1) ? ColMajor
+                          : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
+         int _MaxRows = _Rows,
+         int _MaxCols = _Cols
+> class make_proper_matrix_type
+{
+    enum {
+      IsColVector = _Cols==1 && _Rows!=1,
+      IsRowVector = _Rows==1 && _Cols!=1,
+      Options = IsColVector ? (_Options | ColMajor) & ~RowMajor
+              : IsRowVector ? (_Options | RowMajor) & ~ColMajor
+              : _Options
+    };
+  public:
+    typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+class compute_matrix_flags
+{
+    enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
+  public:
+    // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
+    // and then propagate this information to the evaluator's flags.
+    // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
+    enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
+};
+
+template<int _Rows, int _Cols> struct size_at_compile_time
+{
+  enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };
+};
+
+template<typename XprType> struct size_of_xpr_at_compile_time
+{
+  enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret };
+};
+
+/* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type,
+ * whereas eval is a const reference in the case of a matrix
+ */
+
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type;
+template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense;
+template<typename T> struct plain_matrix_type<T,Dense>
+{
+  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type;
+};
+template<typename T> struct plain_matrix_type<T,DiagonalShape>
+{
+  typedef typename T::PlainObject type;
+};
+
+template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags>
+{
+  typedef Matrix<typename traits<T>::Scalar,
+                traits<T>::RowsAtCompileTime,
+                traits<T>::ColsAtCompileTime,
+                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+                traits<T>::MaxRowsAtCompileTime,
+                traits<T>::MaxColsAtCompileTime
+          > type;
+};
+
+template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags>
+{
+  typedef Array<typename traits<T>::Scalar,
+                traits<T>::RowsAtCompileTime,
+                traits<T>::ColsAtCompileTime,
+                AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor),
+                traits<T>::MaxRowsAtCompileTime,
+                traits<T>::MaxColsAtCompileTime
+          > type;
+};
+
+/* eval : the return type of eval(). For matrices, this is just a const reference
+ * in order to avoid a useless copy
+ */
+
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval;
+
+template<typename T> struct eval<T,Dense>
+{
+  typedef typename plain_matrix_type<T>::type type;
+//   typedef typename T::PlainObject type;
+//   typedef T::Matrix<typename traits<T>::Scalar,
+//                 traits<T>::RowsAtCompileTime,
+//                 traits<T>::ColsAtCompileTime,
+//                 AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor),
+//                 traits<T>::MaxRowsAtCompileTime,
+//                 traits<T>::MaxColsAtCompileTime
+//           > type;
+};
+
+template<typename T> struct eval<T,DiagonalShape>
+{
+  typedef typename plain_matrix_type<T>::type type;
+};
+
+// for matrices, no need to evaluate, just use a const reference to avoid a useless copy
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+  typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+
+template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols>
+struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense>
+{
+  typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type;
+};
+
+
+/* similar to plain_matrix_type, but using the evaluator's Flags */
+template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval;
+
+template<typename T>
+struct plain_object_eval<T,Dense>
+{
+  typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type;
+};
+
+
+/* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major
+ */
+template<typename T> struct plain_matrix_type_column_major
+{
+  enum { Rows = traits<T>::RowsAtCompileTime,
+         Cols = traits<T>::ColsAtCompileTime,
+         MaxRows = traits<T>::MaxRowsAtCompileTime,
+         MaxCols = traits<T>::MaxColsAtCompileTime
+  };
+  typedef Matrix<typename traits<T>::Scalar,
+                Rows,
+                Cols,
+                (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor,
+                MaxRows,
+                MaxCols
+          > type;
+};
+
+/* plain_matrix_type_row_major : same as plain_matrix_type but guaranteed to be row-major
+ */
+template<typename T> struct plain_matrix_type_row_major
+{
+  enum { Rows = traits<T>::RowsAtCompileTime,
+         Cols = traits<T>::ColsAtCompileTime,
+         MaxRows = traits<T>::MaxRowsAtCompileTime,
+         MaxCols = traits<T>::MaxColsAtCompileTime
+  };
+  typedef Matrix<typename traits<T>::Scalar,
+                Rows,
+                Cols,
+                (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+                MaxRows,
+                MaxCols
+          > type;
+};
+
+/** \internal The reference selector for template expressions. The idea is that we don't
+  * need to use references for expressions since they are light weight proxy
+  * objects which should generate no copying overhead. */
+template <typename T>
+struct ref_selector
+{
+  typedef typename conditional<
+    bool(traits<T>::Flags & NestByRefBit),
+    T const&,
+    const T
+  >::type type;
+  
+  typedef typename conditional<
+    bool(traits<T>::Flags & NestByRefBit),
+    T &,
+    T
+  >::type non_const_type;
+};
+
+/** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */
+template<typename T1, typename T2>
+struct transfer_constness
+{
+  typedef typename conditional<
+    bool(internal::is_const<T1>::value),
+    typename internal::add_const_on_value_type<T2>::type,
+    T2
+  >::type type;
+};
+
+
+// However, we still need a mechanism to detect whether an expression which is evaluated multiple time
+// has to be evaluated into a temporary.
+// That's the purpose of this new nested_eval helper:
+/** \internal Determines how a given expression should be nested when evaluated multiple times.
+  * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be
+  * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or
+  * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is
+  * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes
+  * many coefficient accesses in the nested expressions -- as is the case with matrix product for example.
+  *
+  * \tparam T the type of the expression being nested.
+  * \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression.
+  * \tparam PlainObject the type of the temporary if needed.
+  */
+template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval
+{
+  enum {
+    ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
+    CoeffReadCost = evaluator<T>::CoeffReadCost,  // NOTE What if an evaluator evaluate itself into a tempory?
+                                                  //      Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1.
+                                                  //      This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON
+                                                  //      for all evaluator creating a temporary. This flag is then propagated by the parent evaluators.
+                                                  //      Another solution could be to count the number of temps?
+    NAsInteger = n == Dynamic ? HugeCost : n,
+    CostEval   = (NAsInteger+1) * ScalarReadCost + CoeffReadCost,
+    CostNoEval = NAsInteger * CoeffReadCost,
+    Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval))
+  };
+
+  typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type;
+};
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+inline T* const_cast_ptr(const T* ptr)
+{
+  return const_cast<T*>(ptr);
+}
+
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind>
+struct dense_xpr_base
+{
+  /* dense_xpr_base should only ever be used on dense expressions, thus falling either into the MatrixXpr or into the ArrayXpr cases */
+};
+
+template<typename Derived>
+struct dense_xpr_base<Derived, MatrixXpr>
+{
+  typedef MatrixBase<Derived> type;
+};
+
+template<typename Derived>
+struct dense_xpr_base<Derived, ArrayXpr>
+{
+  typedef ArrayBase<Derived> type;
+};
+
+template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind>
+struct generic_xpr_base;
+
+template<typename Derived, typename XprKind>
+struct generic_xpr_base<Derived, XprKind, Dense>
+{
+  typedef typename dense_xpr_base<Derived,XprKind>::type type;
+};
+
+template<typename XprType, typename CastType> struct cast_return_type
+{
+  typedef typename XprType::Scalar CurrentScalarType;
+  typedef typename remove_all<CastType>::type _CastType;
+  typedef typename _CastType::Scalar NewScalarType;
+  typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value,
+                              const XprType&,CastType>::type type;
+};
+
+template <typename A, typename B> struct promote_storage_type;
+
+template <typename A> struct promote_storage_type<A,A>
+{
+  typedef A ret;
+};
+template <typename A> struct promote_storage_type<A, const A>
+{
+  typedef A ret;
+};
+template <typename A> struct promote_storage_type<const A, A>
+{
+  typedef A ret;
+};
+
+/** \internal Specify the "storage kind" of applying a coefficient-wise
+  * binary operations between two expressions of kinds A and B respectively.
+  * The template parameter Functor permits to specialize the resulting storage kind wrt to
+  * the functor.
+  * The default rules are as follows:
+  * \code
+  * A      op A      -> A
+  * A      op dense  -> dense
+  * dense  op B      -> dense
+  * sparse op dense  -> sparse
+  * dense  op sparse -> sparse
+  * \endcode
+  */
+template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
+
+template <typename A, typename Functor>                   struct cwise_promote_storage_type<A,A,Functor>                                      { typedef A      ret; };
+template <typename Functor>                               struct cwise_promote_storage_type<Dense,Dense,Functor>                              { typedef Dense  ret; };
+template <typename A, typename Functor>                   struct cwise_promote_storage_type<A,Dense,Functor>                                  { typedef Dense  ret; };
+template <typename B, typename Functor>                   struct cwise_promote_storage_type<Dense,B,Functor>                                  { typedef Dense  ret; };
+template <typename Functor>                               struct cwise_promote_storage_type<Sparse,Dense,Functor>                             { typedef Sparse ret; };
+template <typename Functor>                               struct cwise_promote_storage_type<Dense,Sparse,Functor>                             { typedef Sparse ret; };
+
+template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order {
+  enum { value = LhsOrder };
+};
+
+template <typename LhsKind, int LhsOrder, int RhsOrder>   struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder>                { enum { value = RhsOrder }; };
+template <typename RhsKind, int LhsOrder, int RhsOrder>   struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder>                { enum { value = LhsOrder }; };
+template <int Order>                                      struct cwise_promote_storage_order<Sparse,Sparse,Order,Order>                       { enum { value = Order }; };
+
+
+/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
+  * The template parameter ProductTag permits to specialize the resulting storage kind wrt to
+  * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct.
+  * The default rules are as follows:
+  * \code
+  *  K * K            -> K
+  *  dense * K        -> dense
+  *  K * dense        -> dense
+  *  diag * K         -> K
+  *  K * diag         -> K
+  *  Perm * K         -> K
+  * K * Perm          -> K
+  * \endcode
+  */
+template <typename A, typename B, int ProductTag> struct product_promote_storage_type;
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  A,                  ProductTag> { typedef A     ret;};
+template <int ProductTag>             struct product_promote_storage_type<Dense,              Dense,              ProductTag> { typedef Dense ret;};
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  Dense,              ProductTag> { typedef Dense ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<Dense,              B,                  ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  DiagonalShape,      ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape,      B,                  ProductTag> { typedef B ret; };
+template <int ProductTag>             struct product_promote_storage_type<Dense,              DiagonalShape,      ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<DiagonalShape,      Dense,              ProductTag> { typedef Dense ret; };
+
+template <typename A, int ProductTag> struct product_promote_storage_type<A,                  PermutationStorage, ProductTag> { typedef A ret; };
+template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B,                  ProductTag> { typedef B ret; };
+template <int ProductTag>             struct product_promote_storage_type<Dense,              PermutationStorage, ProductTag> { typedef Dense ret; };
+template <int ProductTag>             struct product_promote_storage_type<PermutationStorage, Dense,              ProductTag> { typedef Dense ret; };
+
+/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type.
+  * \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType.
+  */
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_row_type
+{
+  typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
+                 ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+  typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
+                 ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+
+  typedef typename conditional<
+    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+    MatrixRowType,
+    ArrayRowType 
+  >::type type;
+};
+
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_col_type
+{
+  typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1,
+                 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType;
+  typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1,
+                 ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType;
+
+  typedef typename conditional<
+    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+    MatrixColType,
+    ArrayColType 
+  >::type type;
+};
+
+template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar>
+struct plain_diag_type
+{
+  enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime),
+         max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime)
+  };
+  typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType;
+  typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType;
+
+  typedef typename conditional<
+    is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
+    MatrixDiagType,
+    ArrayDiagType 
+  >::type type;
+};
+
+template<typename Expr,typename Scalar = typename Expr::Scalar>
+struct plain_constant_type
+{
+  enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };
+
+  typedef Array<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,
+                Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;
+
+  typedef Matrix<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,
+                 Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
+
+  typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
+};
+
+template<typename ExpressionType>
+struct is_lvalue
+{
+  enum { value = (!bool(is_const<ExpressionType>::value)) &&
+                 bool(traits<ExpressionType>::Flags & LvalueBit) };
+};
+
+template<typename T> struct is_diagonal
+{ enum { ret = false }; };
+
+template<typename T> struct is_diagonal<DiagonalBase<T> >
+{ enum { ret = true }; };
+
+template<typename T> struct is_diagonal<DiagonalWrapper<T> >
+{ enum { ret = true }; };
+
+template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
+{ enum { ret = true }; };
+
+template<typename S1, typename S2> struct glue_shapes;
+template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type;  };
+
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
+{
+  return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
+}
+
+template<typename T1, typename T2>
+bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
+{
+  return false;
+}
+
+// Internal helper defining the cost of a scalar division for the type T.
+// The default heuristic can be specialized for each scalar type and architecture.
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+  enum { value = 8*NumTraits<T>::MulCost };
+};
+
+template<typename T,bool Vectorized>
+struct scalar_div_cost<std::complex<T>, Vectorized> {
+  enum { value = 2*scalar_div_cost<T>::value
+               + 6*NumTraits<T>::MulCost
+               + 3*NumTraits<T>::AddCost
+  };
+};
+
+
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+
+
+#ifdef EIGEN_DEBUG_ASSIGN
+std::string demangle_traversal(int t)
+{
+  if(t==DefaultTraversal) return "DefaultTraversal";
+  if(t==LinearTraversal) return "LinearTraversal";
+  if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal";
+  if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal";
+  if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal";
+  return "?";
+}
+std::string demangle_unrolling(int t)
+{
+  if(t==NoUnrolling) return "NoUnrolling";
+  if(t==InnerUnrolling) return "InnerUnrolling";
+  if(t==CompleteUnrolling) return "CompleteUnrolling";
+  return "?";
+}
+std::string demangle_flags(int f)
+{
+  std::string res;
+  if(f&RowMajorBit)                 res += " | RowMajor";
+  if(f&PacketAccessBit)             res += " | Packet";
+  if(f&LinearAccessBit)             res += " | Linear";
+  if(f&LvalueBit)                   res += " | Lvalue";
+  if(f&DirectAccessBit)             res += " | Direct";
+  if(f&NestByRefBit)                res += " | NestByRef";
+  if(f&NoPreferredStorageOrderBit)  res += " | NoPreferredStorageOrderBit";
+  
+  return res;
+}
+#endif
+
+} // end namespace internal
+
+
+/** \class ScalarBinaryOpTraits
+  * \ingroup Core_Module
+  *
+  * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is.
+  *
+  * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations.
+  *
+  * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3.
+  * You can let %Eigen knows that by defining:
+    \code
+    template<typename BinaryOp>
+    struct ScalarBinaryOpTraits<U1,U2,BinaryOp> { typedef U3 ReturnType;  };
+    template<typename BinaryOp>
+    struct ScalarBinaryOpTraits<U2,U1,BinaryOp> { typedef U3 ReturnType;  };
+    \endcode
+  * You can then explicitly disable some particular operations to get more explicit error messages:
+    \code
+    template<>
+    struct ScalarBinaryOpTraits<U1,U2,internal::scalar_max_op<U1,U2> > {};
+    \endcode
+  * Or customize the return type for individual operation:
+    \code
+    template<>
+    struct ScalarBinaryOpTraits<U1,U2,internal::scalar_sum_op<U1,U2> > { typedef U1 ReturnType; };
+    \endcode
+  *
+  * By default, the following generic combinations are supported:
+  <table class="manual">
+  <tr><th>ScalarA</th><th>ScalarB</th><th>BinaryOp</th><th>ReturnType</th><th>Note</th></tr>
+  <tr            ><td>\c T </td><td>\c T </td><td>\c * </td><td>\c T </td><td></td></tr>
+  <tr class="alt"><td>\c NumTraits<T>::Real </td><td>\c T </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  <tr            ><td>\c T </td><td>\c NumTraits<T>::Real </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  </table>
+  *
+  * \sa CwiseBinaryOp
+  */
+template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
+struct ScalarBinaryOpTraits
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class.
+  : internal::scalar_product_traits<ScalarA,ScalarB>
+#endif // EIGEN_PARSED_BY_DOXYGEN
+{};
+
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,T,BinaryOp>
+{
+  typedef T ReturnType;
+};
+
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+{
+  typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+{
+  typedef T ReturnType;
+};
+
+// For Matrix * Permutation
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,void,BinaryOp>
+{
+  typedef T ReturnType;
+};
+
+// For Permutation * Matrix
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<void,T,BinaryOp>
+{
+  typedef T ReturnType;
+};
+
+// for Permutation*Permutation
+template<typename BinaryOp>
+struct ScalarBinaryOpTraits<void,void,BinaryOp>
+{
+  typedef void ReturnType;
+};
+
+// We require Lhs and Rhs to have "compatible" scalar types.
+// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
+// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
+// add together a float matrix and a double matrix.
+#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
+  EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+    
+} // end namespace Eigen
+
+#endif // EIGEN_XPRHELPER_H
diff --git a/third-party/Eigen/src/Eigenvalues/ComplexEigenSolver.h b/third-party/Eigen/src/Eigenvalues/ComplexEigenSolver.h
new file mode 100644
index 00000000..dc5fae06
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@@ -0,0 +1,346 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Claire Maurice
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_EIGEN_SOLVER_H
+#define EIGEN_COMPLEX_EIGEN_SOLVER_H
+
+#include "./ComplexSchur.h"
+
+namespace Eigen { 
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class ComplexEigenSolver
+  *
+  * \brief Computes eigenvalues and eigenvectors of general complex matrices
+  *
+  * \tparam _MatrixType the type of the matrix of which we are
+  * computing the eigendecomposition; this is expected to be an
+  * instantiation of the Matrix class template.
+  *
+  * The eigenvalues and eigenvectors of a matrix \f$ A \f$ are scalars
+  * \f$ \lambda \f$ and vectors \f$ v \f$ such that \f$ Av = \lambda v
+  * \f$.  If \f$ D \f$ is a diagonal matrix with the eigenvalues on
+  * the diagonal, and \f$ V \f$ is a matrix with the eigenvectors as
+  * its columns, then \f$ A V = V D \f$. The matrix \f$ V \f$ is
+  * almost always invertible, in which case we have \f$ A = V D V^{-1}
+  * \f$. This is called the eigendecomposition.
+  *
+  * The main function in this class is compute(), which computes the
+  * eigenvalues and eigenvectors of a given function. The
+  * documentation for that function contains an example showing the
+  * main features of the class.
+  *
+  * \sa class EigenSolver, class SelfAdjointEigenSolver
+  */
+template<typename _MatrixType> class ComplexEigenSolver
+{
+  public:
+
+    /** \brief Synonym for the template parameter \p _MatrixType. */
+    typedef _MatrixType MatrixType;
+
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+
+    /** \brief Scalar type for matrices of type #MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    /** \brief Complex scalar type for #MatrixType.
+      *
+      * This is \c std::complex<Scalar> if #Scalar is real (e.g.,
+      * \c float or \c double) and just \c Scalar if #Scalar is
+      * complex.
+      */
+    typedef std::complex<RealScalar> ComplexScalar;
+
+    /** \brief Type for vector of eigenvalues as returned by eigenvalues().
+      *
+      * This is a column vector with entries of type #ComplexScalar.
+      * The length of the vector is the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options&(~RowMajor), MaxColsAtCompileTime, 1> EigenvalueType;
+
+    /** \brief Type for matrix of eigenvectors as returned by eigenvectors().
+      *
+      * This is a square matrix with entries of type #ComplexScalar.
+      * The size is the same as the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorType;
+
+    /** \brief Default constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute().
+      */
+    ComplexEigenSolver()
+            : m_eivec(),
+              m_eivalues(),
+              m_schur(),
+              m_isInitialized(false),
+              m_eigenvectorsOk(false),
+              m_matX()
+    {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa ComplexEigenSolver()
+      */
+    explicit ComplexEigenSolver(Index size)
+            : m_eivec(size, size),
+              m_eivalues(size),
+              m_schur(size),
+              m_isInitialized(false),
+              m_eigenvectorsOk(false),
+              m_matX(size, size)
+    {}
+
+    /** \brief Constructor; computes eigendecomposition of given matrix.
+      *
+      * \param[in]  matrix  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are
+      *    computed.
+      *
+      * This constructor calls compute() to compute the eigendecomposition.
+      */
+    template<typename InputType>
+    explicit ComplexEigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)
+            : m_eivec(matrix.rows(),matrix.cols()),
+              m_eivalues(matrix.cols()),
+              m_schur(matrix.rows()),
+              m_isInitialized(false),
+              m_eigenvectorsOk(false),
+              m_matX(matrix.rows(),matrix.cols())
+    {
+      compute(matrix.derived(), computeEigenvectors);
+    }
+
+    /** \brief Returns the eigenvectors of given matrix.
+      *
+      * \returns  A const reference to the matrix whose columns are the eigenvectors.
+      *
+      * \pre Either the constructor
+      * ComplexEigenSolver(const MatrixType& matrix, bool) or the member
+      * function compute(const MatrixType& matrix, bool) has been called before
+      * to compute the eigendecomposition of a matrix, and
+      * \p computeEigenvectors was set to true (the default).
+      *
+      * This function returns a matrix whose columns are the eigenvectors. Column
+      * \f$ k \f$ is an eigenvector corresponding to eigenvalue number \f$ k
+      * \f$ as returned by eigenvalues().  The eigenvectors are normalized to
+      * have (Euclidean) norm equal to one. The matrix returned by this
+      * function is the matrix \f$ V \f$ in the eigendecomposition \f$ A = V D
+      * V^{-1} \f$, if it exists.
+      *
+      * Example: \include ComplexEigenSolver_eigenvectors.cpp
+      * Output: \verbinclude ComplexEigenSolver_eigenvectors.out
+      */
+    const EigenvectorType& eigenvectors() const
+    {
+      eigen_assert(m_isInitialized && "ComplexEigenSolver is not initialized.");
+      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+      return m_eivec;
+    }
+
+    /** \brief Returns the eigenvalues of given matrix.
+      *
+      * \returns A const reference to the column vector containing the eigenvalues.
+      *
+      * \pre Either the constructor
+      * ComplexEigenSolver(const MatrixType& matrix, bool) or the member
+      * function compute(const MatrixType& matrix, bool) has been called before
+      * to compute the eigendecomposition of a matrix.
+      *
+      * This function returns a column vector containing the
+      * eigenvalues. Eigenvalues are repeated according to their
+      * algebraic multiplicity, so there are as many eigenvalues as
+      * rows in the matrix. The eigenvalues are not sorted in any particular
+      * order.
+      *
+      * Example: \include ComplexEigenSolver_eigenvalues.cpp
+      * Output: \verbinclude ComplexEigenSolver_eigenvalues.out
+      */
+    const EigenvalueType& eigenvalues() const
+    {
+      eigen_assert(m_isInitialized && "ComplexEigenSolver is not initialized.");
+      return m_eivalues;
+    }
+
+    /** \brief Computes eigendecomposition of given matrix.
+      *
+      * \param[in]  matrix  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are
+      *    computed.
+      * \returns    Reference to \c *this
+      *
+      * This function computes the eigenvalues of the complex matrix \p matrix.
+      * The eigenvalues() function can be used to retrieve them.  If
+      * \p computeEigenvectors is true, then the eigenvectors are also computed
+      * and can be retrieved by calling eigenvectors().
+      *
+      * The matrix is first reduced to Schur form using the
+      * ComplexSchur class. The Schur decomposition is then used to
+      * compute the eigenvalues and eigenvectors.
+      *
+      * The cost of the computation is dominated by the cost of the
+      * Schur decomposition, which is \f$ O(n^3) \f$ where \f$ n \f$
+      * is the size of the matrix.
+      *
+      * Example: \include ComplexEigenSolver_compute.cpp
+      * Output: \verbinclude ComplexEigenSolver_compute.out
+      */
+    template<typename InputType>
+    ComplexEigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);
+
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "ComplexEigenSolver is not initialized.");
+      return m_schur.info();
+    }
+
+    /** \brief Sets the maximum number of iterations allowed. */
+    ComplexEigenSolver& setMaxIterations(Index maxIters)
+    {
+      m_schur.setMaxIterations(maxIters);
+      return *this;
+    }
+
+    /** \brief Returns the maximum number of iterations. */
+    Index getMaxIterations()
+    {
+      return m_schur.getMaxIterations();
+    }
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    EigenvectorType m_eivec;
+    EigenvalueType m_eivalues;
+    ComplexSchur<MatrixType> m_schur;
+    bool m_isInitialized;
+    bool m_eigenvectorsOk;
+    EigenvectorType m_matX;
+
+  private:
+    void doComputeEigenvectors(RealScalar matrixnorm);
+    void sortEigenvalues(bool computeEigenvectors);
+};
+
+
+template<typename MatrixType>
+template<typename InputType>
+ComplexEigenSolver<MatrixType>& 
+ComplexEigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)
+{
+  check_template_parameters();
+  
+  // this code is inspired from Jampack
+  eigen_assert(matrix.cols() == matrix.rows());
+
+  // Do a complex Schur decomposition, A = U T U^*
+  // The eigenvalues are on the diagonal of T.
+  m_schur.compute(matrix.derived(), computeEigenvectors);
+
+  if(m_schur.info() == Success)
+  {
+    m_eivalues = m_schur.matrixT().diagonal();
+    if(computeEigenvectors)
+      doComputeEigenvectors(m_schur.matrixT().norm());
+    sortEigenvalues(computeEigenvectors);
+  }
+
+  m_isInitialized = true;
+  m_eigenvectorsOk = computeEigenvectors;
+  return *this;
+}
+
+
+template<typename MatrixType>
+void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)
+{
+  const Index n = m_eivalues.size();
+
+  matrixnorm = numext::maxi(matrixnorm,(std::numeric_limits<RealScalar>::min)());
+
+  // Compute X such that T = X D X^(-1), where D is the diagonal of T.
+  // The matrix X is unit triangular.
+  m_matX = EigenvectorType::Zero(n, n);
+  for(Index k=n-1 ; k>=0 ; k--)
+  {
+    m_matX.coeffRef(k,k) = ComplexScalar(1.0,0.0);
+    // Compute X(i,k) using the (i,k) entry of the equation X T = D X
+    for(Index i=k-1 ; i>=0 ; i--)
+    {
+      m_matX.coeffRef(i,k) = -m_schur.matrixT().coeff(i,k);
+      if(k-i-1>0)
+        m_matX.coeffRef(i,k) -= (m_schur.matrixT().row(i).segment(i+1,k-i-1) * m_matX.col(k).segment(i+1,k-i-1)).value();
+      ComplexScalar z = m_schur.matrixT().coeff(i,i) - m_schur.matrixT().coeff(k,k);
+      if(z==ComplexScalar(0))
+      {
+        // If the i-th and k-th eigenvalue are equal, then z equals 0.
+        // Use a small value instead, to prevent division by zero.
+        numext::real_ref(z) = NumTraits<RealScalar>::epsilon() * matrixnorm;
+      }
+      m_matX.coeffRef(i,k) = m_matX.coeff(i,k) / z;
+    }
+  }
+
+  // Compute V as V = U X; now A = U T U^* = U X D X^(-1) U^* = V D V^(-1)
+  m_eivec.noalias() = m_schur.matrixU() * m_matX;
+  // .. and normalize the eigenvectors
+  for(Index k=0 ; k<n ; k++)
+  {
+    m_eivec.col(k).normalize();
+  }
+}
+
+
+template<typename MatrixType>
+void ComplexEigenSolver<MatrixType>::sortEigenvalues(bool computeEigenvectors)
+{
+  const Index n =  m_eivalues.size();
+  for (Index i=0; i<n; i++)
+  {
+    Index k;
+    m_eivalues.cwiseAbs().tail(n-i).minCoeff(&k);
+    if (k != 0)
+    {
+      k += i;
+      std::swap(m_eivalues[k],m_eivalues[i]);
+      if(computeEigenvectors)
+	m_eivec.col(i).swap(m_eivec.col(k));
+    }
+  }
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_EIGEN_SOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/ComplexSchur.h b/third-party/Eigen/src/Eigenvalues/ComplexSchur.h
new file mode 100644
index 00000000..4354e401
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -0,0 +1,462 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Claire Maurice
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_SCHUR_H
+#define EIGEN_COMPLEX_SCHUR_H
+
+#include "./HessenbergDecomposition.h"
+
+namespace Eigen { 
+
+namespace internal {
+template<typename MatrixType, bool IsComplex> struct complex_schur_reduce_to_hessenberg;
+}
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class ComplexSchur
+  *
+  * \brief Performs a complex Schur decomposition of a real or complex square matrix
+  *
+  * \tparam _MatrixType the type of the matrix of which we are
+  * computing the Schur decomposition; this is expected to be an
+  * instantiation of the Matrix class template.
+  *
+  * Given a real or complex square matrix A, this class computes the
+  * Schur decomposition: \f$ A = U T U^*\f$ where U is a unitary
+  * complex matrix, and T is a complex upper triangular matrix.  The
+  * diagonal of the matrix T corresponds to the eigenvalues of the
+  * matrix A.
+  *
+  * Call the function compute() to compute the Schur decomposition of
+  * a given matrix. Alternatively, you can use the 
+  * ComplexSchur(const MatrixType&, bool) constructor which computes
+  * the Schur decomposition at construction time. Once the
+  * decomposition is computed, you can use the matrixU() and matrixT()
+  * functions to retrieve the matrices U and V in the decomposition.
+  *
+  * \note This code is inspired from Jampack
+  *
+  * \sa class RealSchur, class EigenSolver, class ComplexEigenSolver
+  */
+template<typename _MatrixType> class ComplexSchur
+{
+  public:
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+
+    /** \brief Scalar type for matrices of type \p _MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    /** \brief Complex scalar type for \p _MatrixType. 
+      *
+      * This is \c std::complex<Scalar> if #Scalar is real (e.g.,
+      * \c float or \c double) and just \c Scalar if #Scalar is
+      * complex.
+      */
+    typedef std::complex<RealScalar> ComplexScalar;
+
+    /** \brief Type for the matrices in the Schur decomposition.
+      *
+      * This is a square matrix with entries of type #ComplexScalar. 
+      * The size is the same as the size of \p _MatrixType.
+      */
+    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> ComplexMatrixType;
+
+    /** \brief Default constructor.
+      *
+      * \param [in] size  Positive integer, size of the matrix whose Schur decomposition will be computed.
+      *
+      * The default constructor is useful in cases in which the user
+      * intends to perform decompositions via compute().  The \p size
+      * parameter is only used as a hint. It is not an error to give a
+      * wrong \p size, but it may impair performance.
+      *
+      * \sa compute() for an example.
+      */
+    explicit ComplexSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
+      : m_matT(size,size),
+        m_matU(size,size),
+        m_hess(size),
+        m_isInitialized(false),
+        m_matUisUptodate(false),
+        m_maxIters(-1)
+    {}
+
+    /** \brief Constructor; computes Schur decomposition of given matrix. 
+      * 
+      * \param[in]  matrix    Square matrix whose Schur decomposition is to be computed.
+      * \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.
+      *
+      * This constructor calls compute() to compute the Schur decomposition.
+      *
+      * \sa matrixT() and matrixU() for examples.
+      */
+    template<typename InputType>
+    explicit ComplexSchur(const EigenBase<InputType>& matrix, bool computeU = true)
+      : m_matT(matrix.rows(),matrix.cols()),
+        m_matU(matrix.rows(),matrix.cols()),
+        m_hess(matrix.rows()),
+        m_isInitialized(false),
+        m_matUisUptodate(false),
+        m_maxIters(-1)
+    {
+      compute(matrix.derived(), computeU);
+    }
+
+    /** \brief Returns the unitary matrix in the Schur decomposition. 
+      *
+      * \returns A const reference to the matrix U.
+      *
+      * It is assumed that either the constructor
+      * ComplexSchur(const MatrixType& matrix, bool computeU) or the
+      * member function compute(const MatrixType& matrix, bool computeU)
+      * has been called before to compute the Schur decomposition of a
+      * matrix, and that \p computeU was set to true (the default
+      * value).
+      *
+      * Example: \include ComplexSchur_matrixU.cpp
+      * Output: \verbinclude ComplexSchur_matrixU.out
+      */
+    const ComplexMatrixType& matrixU() const
+    {
+      eigen_assert(m_isInitialized && "ComplexSchur is not initialized.");
+      eigen_assert(m_matUisUptodate && "The matrix U has not been computed during the ComplexSchur decomposition.");
+      return m_matU;
+    }
+
+    /** \brief Returns the triangular matrix in the Schur decomposition. 
+      *
+      * \returns A const reference to the matrix T.
+      *
+      * It is assumed that either the constructor
+      * ComplexSchur(const MatrixType& matrix, bool computeU) or the
+      * member function compute(const MatrixType& matrix, bool computeU)
+      * has been called before to compute the Schur decomposition of a
+      * matrix.
+      *
+      * Note that this function returns a plain square matrix. If you want to reference
+      * only the upper triangular part, use:
+      * \code schur.matrixT().triangularView<Upper>() \endcode 
+      *
+      * Example: \include ComplexSchur_matrixT.cpp
+      * Output: \verbinclude ComplexSchur_matrixT.out
+      */
+    const ComplexMatrixType& matrixT() const
+    {
+      eigen_assert(m_isInitialized && "ComplexSchur is not initialized.");
+      return m_matT;
+    }
+
+    /** \brief Computes Schur decomposition of given matrix. 
+      * 
+      * \param[in]  matrix  Square matrix whose Schur decomposition is to be computed.
+      * \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.
+
+      * \returns    Reference to \c *this
+      *
+      * The Schur decomposition is computed by first reducing the
+      * matrix to Hessenberg form using the class
+      * HessenbergDecomposition. The Hessenberg matrix is then reduced
+      * to triangular form by performing QR iterations with a single
+      * shift. The cost of computing the Schur decomposition depends
+      * on the number of iterations; as a rough guide, it may be taken
+      * on the number of iterations; as a rough guide, it may be taken
+      * to be \f$25n^3\f$ complex flops, or \f$10n^3\f$ complex flops
+      * if \a computeU is false.
+      *
+      * Example: \include ComplexSchur_compute.cpp
+      * Output: \verbinclude ComplexSchur_compute.out
+      *
+      * \sa compute(const MatrixType&, bool, Index)
+      */
+    template<typename InputType>
+    ComplexSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);
+    
+    /** \brief Compute Schur decomposition from a given Hessenberg matrix
+     *  \param[in] matrixH Matrix in Hessenberg form H
+     *  \param[in] matrixQ orthogonal matrix Q that transform a matrix A to H : A = Q H Q^T
+     *  \param computeU Computes the matriX U of the Schur vectors
+     * \return Reference to \c *this
+     * 
+     *  This routine assumes that the matrix is already reduced in Hessenberg form matrixH
+     *  using either the class HessenbergDecomposition or another mean. 
+     *  It computes the upper quasi-triangular matrix T of the Schur decomposition of H
+     *  When computeU is true, this routine computes the matrix U such that 
+     *  A = U T U^T =  (QZ) T (QZ)^T = Q H Q^T where A is the initial matrix
+     * 
+     * NOTE Q is referenced if computeU is true; so, if the initial orthogonal matrix
+     * is not available, the user should give an identity matrix (Q.setIdentity())
+     * 
+     * \sa compute(const MatrixType&, bool)
+     */
+    template<typename HessMatrixType, typename OrthMatrixType>
+    ComplexSchur& computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU=true);
+
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "ComplexSchur is not initialized.");
+      return m_info;
+    }
+
+    /** \brief Sets the maximum number of iterations allowed. 
+      *
+      * If not specified by the user, the maximum number of iterations is m_maxIterationsPerRow times the size
+      * of the matrix.
+      */
+    ComplexSchur& setMaxIterations(Index maxIters)
+    {
+      m_maxIters = maxIters;
+      return *this;
+    }
+
+    /** \brief Returns the maximum number of iterations. */
+    Index getMaxIterations()
+    {
+      return m_maxIters;
+    }
+
+    /** \brief Maximum number of iterations per row.
+      *
+      * If not otherwise specified, the maximum number of iterations is this number times the size of the
+      * matrix. It is currently set to 30.
+      */
+    static const int m_maxIterationsPerRow = 30;
+
+  protected:
+    ComplexMatrixType m_matT, m_matU;
+    HessenbergDecomposition<MatrixType> m_hess;
+    ComputationInfo m_info;
+    bool m_isInitialized;
+    bool m_matUisUptodate;
+    Index m_maxIters;
+
+  private:  
+    bool subdiagonalEntryIsNeglegible(Index i);
+    ComplexScalar computeShift(Index iu, Index iter);
+    void reduceToTriangularForm(bool computeU);
+    friend struct internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>;
+};
+
+/** If m_matT(i+1,i) is neglegible in floating point arithmetic
+  * compared to m_matT(i,i) and m_matT(j,j), then set it to zero and
+  * return true, else return false. */
+template<typename MatrixType>
+inline bool ComplexSchur<MatrixType>::subdiagonalEntryIsNeglegible(Index i)
+{
+  RealScalar d = numext::norm1(m_matT.coeff(i,i)) + numext::norm1(m_matT.coeff(i+1,i+1));
+  RealScalar sd = numext::norm1(m_matT.coeff(i+1,i));
+  if (internal::isMuchSmallerThan(sd, d, NumTraits<RealScalar>::epsilon()))
+  {
+    m_matT.coeffRef(i+1,i) = ComplexScalar(0);
+    return true;
+  }
+  return false;
+}
+
+
+/** Compute the shift in the current QR iteration. */
+template<typename MatrixType>
+typename ComplexSchur<MatrixType>::ComplexScalar ComplexSchur<MatrixType>::computeShift(Index iu, Index iter)
+{
+  using std::abs;
+  if (iter == 10 || iter == 20) 
+  {
+    // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f
+    return abs(numext::real(m_matT.coeff(iu,iu-1))) + abs(numext::real(m_matT.coeff(iu-1,iu-2)));
+  }
+
+  // compute the shift as one of the eigenvalues of t, the 2x2
+  // diagonal block on the bottom of the active submatrix
+  Matrix<ComplexScalar,2,2> t = m_matT.template block<2,2>(iu-1,iu-1);
+  RealScalar normt = t.cwiseAbs().sum();
+  t /= normt;     // the normalization by sf is to avoid under/overflow
+
+  ComplexScalar b = t.coeff(0,1) * t.coeff(1,0);
+  ComplexScalar c = t.coeff(0,0) - t.coeff(1,1);
+  ComplexScalar disc = sqrt(c*c + RealScalar(4)*b);
+  ComplexScalar det = t.coeff(0,0) * t.coeff(1,1) - b;
+  ComplexScalar trace = t.coeff(0,0) + t.coeff(1,1);
+  ComplexScalar eival1 = (trace + disc) / RealScalar(2);
+  ComplexScalar eival2 = (trace - disc) / RealScalar(2);
+  RealScalar eival1_norm = numext::norm1(eival1);
+  RealScalar eival2_norm = numext::norm1(eival2);
+  // A division by zero can only occur if eival1==eival2==0.
+  // In this case, det==0, and all we have to do is checking that eival2_norm!=0
+  if(eival1_norm > eival2_norm)
+    eival2 = det / eival1;
+  else if(eival2_norm!=RealScalar(0))
+    eival1 = det / eival2;
+
+  // choose the eigenvalue closest to the bottom entry of the diagonal
+  if(numext::norm1(eival1-t.coeff(1,1)) < numext::norm1(eival2-t.coeff(1,1)))
+    return normt * eival1;
+  else
+    return normt * eival2;
+}
+
+
+template<typename MatrixType>
+template<typename InputType>
+ComplexSchur<MatrixType>& ComplexSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)
+{
+  m_matUisUptodate = false;
+  eigen_assert(matrix.cols() == matrix.rows());
+
+  if(matrix.cols() == 1)
+  {
+    m_matT = matrix.derived().template cast<ComplexScalar>();
+    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1);
+    m_info = Success;
+    m_isInitialized = true;
+    m_matUisUptodate = computeU;
+    return *this;
+  }
+
+  internal::complex_schur_reduce_to_hessenberg<MatrixType, NumTraits<Scalar>::IsComplex>::run(*this, matrix.derived(), computeU);
+  computeFromHessenberg(m_matT, m_matU, computeU);
+  return *this;
+}
+
+template<typename MatrixType>
+template<typename HessMatrixType, typename OrthMatrixType>
+ComplexSchur<MatrixType>& ComplexSchur<MatrixType>::computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ, bool computeU)
+{
+  m_matT = matrixH;
+  if(computeU)
+    m_matU = matrixQ;
+  reduceToTriangularForm(computeU);
+  return *this;
+}
+namespace internal {
+
+/* Reduce given matrix to Hessenberg form */
+template<typename MatrixType, bool IsComplex>
+struct complex_schur_reduce_to_hessenberg
+{
+  // this is the implementation for the case IsComplex = true
+  static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)
+  {
+    _this.m_hess.compute(matrix);
+    _this.m_matT = _this.m_hess.matrixH();
+    if(computeU)  _this.m_matU = _this.m_hess.matrixQ();
+  }
+};
+
+template<typename MatrixType>
+struct complex_schur_reduce_to_hessenberg<MatrixType, false>
+{
+  static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)
+  {
+    typedef typename ComplexSchur<MatrixType>::ComplexScalar ComplexScalar;
+
+    // Note: m_hess is over RealScalar; m_matT and m_matU is over ComplexScalar
+    _this.m_hess.compute(matrix);
+    _this.m_matT = _this.m_hess.matrixH().template cast<ComplexScalar>();
+    if(computeU)  
+    {
+      // This may cause an allocation which seems to be avoidable
+      MatrixType Q = _this.m_hess.matrixQ(); 
+      _this.m_matU = Q.template cast<ComplexScalar>();
+    }
+  }
+};
+
+} // end namespace internal
+
+// Reduce the Hessenberg matrix m_matT to triangular form by QR iteration.
+template<typename MatrixType>
+void ComplexSchur<MatrixType>::reduceToTriangularForm(bool computeU)
+{  
+  Index maxIters = m_maxIters;
+  if (maxIters == -1)
+    maxIters = m_maxIterationsPerRow * m_matT.rows();
+
+  // The matrix m_matT is divided in three parts. 
+  // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. 
+  // Rows il,...,iu is the part we are working on (the active submatrix).
+  // Rows iu+1,...,end are already brought in triangular form.
+  Index iu = m_matT.cols() - 1;
+  Index il;
+  Index iter = 0; // number of iterations we are working on the (iu,iu) element
+  Index totalIter = 0; // number of iterations for whole matrix
+
+  while(true)
+  {
+    // find iu, the bottom row of the active submatrix
+    while(iu > 0)
+    {
+      if(!subdiagonalEntryIsNeglegible(iu-1)) break;
+      iter = 0;
+      --iu;
+    }
+
+    // if iu is zero then we are done; the whole matrix is triangularized
+    if(iu==0) break;
+
+    // if we spent too many iterations, we give up
+    iter++;
+    totalIter++;
+    if(totalIter > maxIters) break;
+
+    // find il, the top row of the active submatrix
+    il = iu-1;
+    while(il > 0 && !subdiagonalEntryIsNeglegible(il-1))
+    {
+      --il;
+    }
+
+    /* perform the QR step using Givens rotations. The first rotation
+       creates a bulge; the (il+2,il) element becomes nonzero. This
+       bulge is chased down to the bottom of the active submatrix. */
+
+    ComplexScalar shift = computeShift(iu, iter);
+    JacobiRotation<ComplexScalar> rot;
+    rot.makeGivens(m_matT.coeff(il,il) - shift, m_matT.coeff(il+1,il));
+    m_matT.rightCols(m_matT.cols()-il).applyOnTheLeft(il, il+1, rot.adjoint());
+    m_matT.topRows((std::min)(il+2,iu)+1).applyOnTheRight(il, il+1, rot);
+    if(computeU) m_matU.applyOnTheRight(il, il+1, rot);
+
+    for(Index i=il+1 ; i<iu ; i++)
+    {
+      rot.makeGivens(m_matT.coeffRef(i,i-1), m_matT.coeffRef(i+1,i-1), &m_matT.coeffRef(i,i-1));
+      m_matT.coeffRef(i+1,i-1) = ComplexScalar(0);
+      m_matT.rightCols(m_matT.cols()-i).applyOnTheLeft(i, i+1, rot.adjoint());
+      m_matT.topRows((std::min)(i+2,iu)+1).applyOnTheRight(i, i+1, rot);
+      if(computeU) m_matU.applyOnTheRight(i, i+1, rot);
+    }
+  }
+
+  if(totalIter <= maxIters)
+    m_info = Success;
+  else
+    m_info = NoConvergence;
+
+  m_isInitialized = true;
+  m_matUisUptodate = computeU;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_SCHUR_H
diff --git a/third-party/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h b/third-party/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
new file mode 100644
index 00000000..4980a3ed
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_COMPLEX_SCHUR_LAPACKE_H
+#define EIGEN_COMPLEX_SCHUR_LAPACKE_H
+
+namespace Eigen { 
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_SCHUR_COMPLEX(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \
+template<> template<typename InputType> inline \
+ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
+ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
+{ \
+  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> MatrixType; \
+  typedef MatrixType::RealScalar RealScalar; \
+  typedef std::complex<RealScalar> ComplexScalar; \
+\
+  eigen_assert(matrix.cols() == matrix.rows()); \
+\
+  m_matUisUptodate = false; \
+  if(matrix.cols() == 1) \
+  { \
+    m_matT = matrix.derived().template cast<ComplexScalar>(); \
+    if(computeU)  m_matU = ComplexMatrixType::Identity(1,1); \
+      m_info = Success; \
+      m_isInitialized = true; \
+      m_matUisUptodate = computeU; \
+      return *this; \
+  } \
+  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), sdim, info; \
+  lapack_int matrix_order = LAPACKE_COLROW; \
+  char jobvs, sort='N'; \
+  LAPACK_##LAPACKE_PREFIX_U##_SELECT1 select = 0; \
+  jobvs = (computeU) ? 'V' : 'N'; \
+  m_matU.resize(n, n); \
+  lapack_int ldvs  = internal::convert_index<lapack_int>(m_matU.outerStride()); \
+  m_matT = matrix; \
+  lapack_int lda = internal::convert_index<lapack_int>(m_matT.outerStride()); \
+  Matrix<EIGTYPE, Dynamic, Dynamic> w; \
+  w.resize(n, 1);\
+  info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)w.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \
+  if(info == 0) \
+    m_info = Success; \
+  else \
+    m_info = NoConvergence; \
+\
+  m_isInitialized = true; \
+  m_matUisUptodate = computeU; \
+  return *this; \
+\
+}
+
+EIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float,  c, C, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SCHUR_COMPLEX(dcomplex, lapack_complex_double, z, Z, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_SCHUR_COMPLEX(scomplex, lapack_complex_float,  c, C, RowMajor, LAPACK_ROW_MAJOR)
+
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_SCHUR_LAPACKE_H
diff --git a/third-party/Eigen/src/Eigenvalues/EigenSolver.h b/third-party/Eigen/src/Eigenvalues/EigenSolver.h
new file mode 100644
index 00000000..f205b185
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/EigenSolver.h
@@ -0,0 +1,622 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EIGENSOLVER_H
+#define EIGEN_EIGENSOLVER_H
+
+#include "./RealSchur.h"
+
+namespace Eigen { 
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class EigenSolver
+  *
+  * \brief Computes eigenvalues and eigenvectors of general matrices
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * eigendecomposition; this is expected to be an instantiation of the Matrix
+  * class template. Currently, only real matrices are supported.
+  *
+  * The eigenvalues and eigenvectors of a matrix \f$ A \f$ are scalars
+  * \f$ \lambda \f$ and vectors \f$ v \f$ such that \f$ Av = \lambda v \f$.  If
+  * \f$ D \f$ is a diagonal matrix with the eigenvalues on the diagonal, and
+  * \f$ V \f$ is a matrix with the eigenvectors as its columns, then \f$ A V =
+  * V D \f$. The matrix \f$ V \f$ is almost always invertible, in which case we
+  * have \f$ A = V D V^{-1} \f$. This is called the eigendecomposition.
+  *
+  * The eigenvalues and eigenvectors of a matrix may be complex, even when the
+  * matrix is real. However, we can choose real matrices \f$ V \f$ and \f$ D
+  * \f$ satisfying \f$ A V = V D \f$, just like the eigendecomposition, if the
+  * matrix \f$ D \f$ is not required to be diagonal, but if it is allowed to
+  * have blocks of the form
+  * \f[ \begin{bmatrix} u & v \\ -v & u \end{bmatrix} \f]
+  * (where \f$ u \f$ and \f$ v \f$ are real numbers) on the diagonal.  These
+  * blocks correspond to complex eigenvalue pairs \f$ u \pm iv \f$. We call
+  * this variant of the eigendecomposition the pseudo-eigendecomposition.
+  *
+  * Call the function compute() to compute the eigenvalues and eigenvectors of
+  * a given matrix. Alternatively, you can use the 
+  * EigenSolver(const MatrixType&, bool) constructor which computes the
+  * eigenvalues and eigenvectors at construction time. Once the eigenvalue and
+  * eigenvectors are computed, they can be retrieved with the eigenvalues() and
+  * eigenvectors() functions. The pseudoEigenvalueMatrix() and
+  * pseudoEigenvectors() methods allow the construction of the
+  * pseudo-eigendecomposition.
+  *
+  * The documentation for EigenSolver(const MatrixType&, bool) contains an
+  * example of the typical use of this class.
+  *
+  * \note The implementation is adapted from
+  * <a href="http://math.nist.gov/javanumerics/jama/">JAMA</a> (public domain).
+  * Their code is based on EISPACK.
+  *
+  * \sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver
+  */
+template<typename _MatrixType> class EigenSolver
+{
+  public:
+
+    /** \brief Synonym for the template parameter \p _MatrixType. */
+    typedef _MatrixType MatrixType;
+
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+
+    /** \brief Scalar type for matrices of type #MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    /** \brief Complex scalar type for #MatrixType. 
+      *
+      * This is \c std::complex<Scalar> if #Scalar is real (e.g.,
+      * \c float or \c double) and just \c Scalar if #Scalar is
+      * complex.
+      */
+    typedef std::complex<RealScalar> ComplexScalar;
+
+    /** \brief Type for vector of eigenvalues as returned by eigenvalues(). 
+      *
+      * This is a column vector with entries of type #ComplexScalar.
+      * The length of the vector is the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
+
+    /** \brief Type for matrix of eigenvectors as returned by eigenvectors(). 
+      *
+      * This is a square matrix with entries of type #ComplexScalar. 
+      * The size is the same as the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorsType;
+
+    /** \brief Default constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via EigenSolver::compute(const MatrixType&, bool).
+      *
+      * \sa compute() for an example.
+      */
+    EigenSolver() : m_eivec(), m_eivalues(), m_isInitialized(false), m_realSchur(), m_matT(), m_tmp() {}
+
+    /** \brief Default constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa EigenSolver()
+      */
+    explicit EigenSolver(Index size)
+      : m_eivec(size, size),
+        m_eivalues(size),
+        m_isInitialized(false),
+        m_eigenvectorsOk(false),
+        m_realSchur(size),
+        m_matT(size, size), 
+        m_tmp(size)
+    {}
+
+    /** \brief Constructor; computes eigendecomposition of given matrix. 
+      * 
+      * \param[in]  matrix  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are
+      *    computed. 
+      *
+      * This constructor calls compute() to compute the eigenvalues
+      * and eigenvectors.
+      *
+      * Example: \include EigenSolver_EigenSolver_MatrixType.cpp
+      * Output: \verbinclude EigenSolver_EigenSolver_MatrixType.out
+      *
+      * \sa compute()
+      */
+    template<typename InputType>
+    explicit EigenSolver(const EigenBase<InputType>& matrix, bool computeEigenvectors = true)
+      : m_eivec(matrix.rows(), matrix.cols()),
+        m_eivalues(matrix.cols()),
+        m_isInitialized(false),
+        m_eigenvectorsOk(false),
+        m_realSchur(matrix.cols()),
+        m_matT(matrix.rows(), matrix.cols()), 
+        m_tmp(matrix.cols())
+    {
+      compute(matrix.derived(), computeEigenvectors);
+    }
+
+    /** \brief Returns the eigenvectors of given matrix. 
+      *
+      * \returns  %Matrix whose columns are the (possibly complex) eigenvectors.
+      *
+      * \pre Either the constructor 
+      * EigenSolver(const MatrixType&,bool) or the member function
+      * compute(const MatrixType&, bool) has been called before, and
+      * \p computeEigenvectors was set to true (the default).
+      *
+      * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding
+      * to eigenvalue number \f$ k \f$ as returned by eigenvalues().  The
+      * eigenvectors are normalized to have (Euclidean) norm equal to one. The
+      * matrix returned by this function is the matrix \f$ V \f$ in the
+      * eigendecomposition \f$ A = V D V^{-1} \f$, if it exists.
+      *
+      * Example: \include EigenSolver_eigenvectors.cpp
+      * Output: \verbinclude EigenSolver_eigenvectors.out
+      *
+      * \sa eigenvalues(), pseudoEigenvectors()
+      */
+    EigenvectorsType eigenvectors() const;
+
+    /** \brief Returns the pseudo-eigenvectors of given matrix. 
+      *
+      * \returns  Const reference to matrix whose columns are the pseudo-eigenvectors.
+      *
+      * \pre Either the constructor 
+      * EigenSolver(const MatrixType&,bool) or the member function
+      * compute(const MatrixType&, bool) has been called before, and
+      * \p computeEigenvectors was set to true (the default).
+      *
+      * The real matrix \f$ V \f$ returned by this function and the
+      * block-diagonal matrix \f$ D \f$ returned by pseudoEigenvalueMatrix()
+      * satisfy \f$ AV = VD \f$.
+      *
+      * Example: \include EigenSolver_pseudoEigenvectors.cpp
+      * Output: \verbinclude EigenSolver_pseudoEigenvectors.out
+      *
+      * \sa pseudoEigenvalueMatrix(), eigenvectors()
+      */
+    const MatrixType& pseudoEigenvectors() const
+    {
+      eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+      return m_eivec;
+    }
+
+    /** \brief Returns the block-diagonal matrix in the pseudo-eigendecomposition.
+      *
+      * \returns  A block-diagonal matrix.
+      *
+      * \pre Either the constructor 
+      * EigenSolver(const MatrixType&,bool) or the member function
+      * compute(const MatrixType&, bool) has been called before.
+      *
+      * The matrix \f$ D \f$ returned by this function is real and
+      * block-diagonal. The blocks on the diagonal are either 1-by-1 or 2-by-2
+      * blocks of the form
+      * \f$ \begin{bmatrix} u & v \\ -v & u \end{bmatrix} \f$.
+      * These blocks are not sorted in any particular order.
+      * The matrix \f$ D \f$ and the matrix \f$ V \f$ returned by
+      * pseudoEigenvectors() satisfy \f$ AV = VD \f$.
+      *
+      * \sa pseudoEigenvectors() for an example, eigenvalues()
+      */
+    MatrixType pseudoEigenvalueMatrix() const;
+
+    /** \brief Returns the eigenvalues of given matrix. 
+      *
+      * \returns A const reference to the column vector containing the eigenvalues.
+      *
+      * \pre Either the constructor 
+      * EigenSolver(const MatrixType&,bool) or the member function
+      * compute(const MatrixType&, bool) has been called before.
+      *
+      * The eigenvalues are repeated according to their algebraic multiplicity,
+      * so there are as many eigenvalues as rows in the matrix. The eigenvalues 
+      * are not sorted in any particular order.
+      *
+      * Example: \include EigenSolver_eigenvalues.cpp
+      * Output: \verbinclude EigenSolver_eigenvalues.out
+      *
+      * \sa eigenvectors(), pseudoEigenvalueMatrix(),
+      *     MatrixBase::eigenvalues()
+      */
+    const EigenvalueType& eigenvalues() const
+    {
+      eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+      return m_eivalues;
+    }
+
+    /** \brief Computes eigendecomposition of given matrix. 
+      * 
+      * \param[in]  matrix  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are
+      *    computed. 
+      * \returns    Reference to \c *this
+      *
+      * This function computes the eigenvalues of the real matrix \p matrix.
+      * The eigenvalues() function can be used to retrieve them.  If 
+      * \p computeEigenvectors is true, then the eigenvectors are also computed
+      * and can be retrieved by calling eigenvectors().
+      *
+      * The matrix is first reduced to real Schur form using the RealSchur
+      * class. The Schur decomposition is then used to compute the eigenvalues
+      * and eigenvectors.
+      *
+      * The cost of the computation is dominated by the cost of the
+      * Schur decomposition, which is very approximately \f$ 25n^3 \f$
+      * (where \f$ n \f$ is the size of the matrix) if \p computeEigenvectors 
+      * is true, and \f$ 10n^3 \f$ if \p computeEigenvectors is false.
+      *
+      * This method reuses of the allocated data in the EigenSolver object.
+      *
+      * Example: \include EigenSolver_compute.cpp
+      * Output: \verbinclude EigenSolver_compute.out
+      */
+    template<typename InputType>
+    EigenSolver& compute(const EigenBase<InputType>& matrix, bool computeEigenvectors = true);
+
+    /** \returns NumericalIssue if the input contains INF or NaN values or overflow occured. Returns Success otherwise. */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+      return m_info;
+    }
+
+    /** \brief Sets the maximum number of iterations allowed. */
+    EigenSolver& setMaxIterations(Index maxIters)
+    {
+      m_realSchur.setMaxIterations(maxIters);
+      return *this;
+    }
+
+    /** \brief Returns the maximum number of iterations. */
+    Index getMaxIterations()
+    {
+      return m_realSchur.getMaxIterations();
+    }
+
+  private:
+    void doComputeEigenvectors();
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+    }
+    
+    MatrixType m_eivec;
+    EigenvalueType m_eivalues;
+    bool m_isInitialized;
+    bool m_eigenvectorsOk;
+    ComputationInfo m_info;
+    RealSchur<MatrixType> m_realSchur;
+    MatrixType m_matT;
+
+    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;
+    ColumnVectorType m_tmp;
+};
+
+template<typename MatrixType>
+MatrixType EigenSolver<MatrixType>::pseudoEigenvalueMatrix() const
+{
+  eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+  const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();
+  Index n = m_eivalues.rows();
+  MatrixType matD = MatrixType::Zero(n,n);
+  for (Index i=0; i<n; ++i)
+  {
+    if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i)), precision))
+      matD.coeffRef(i,i) = numext::real(m_eivalues.coeff(i));
+    else
+    {
+      matD.template block<2,2>(i,i) <<  numext::real(m_eivalues.coeff(i)), numext::imag(m_eivalues.coeff(i)),
+                                       -numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i));
+      ++i;
+    }
+  }
+  return matD;
+}
+
+template<typename MatrixType>
+typename EigenSolver<MatrixType>::EigenvectorsType EigenSolver<MatrixType>::eigenvectors() const
+{
+  eigen_assert(m_isInitialized && "EigenSolver is not initialized.");
+  eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+  const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();
+  Index n = m_eivec.cols();
+  EigenvectorsType matV(n,n);
+  for (Index j=0; j<n; ++j)
+  {
+    if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(j)), numext::real(m_eivalues.coeff(j)), precision) || j+1==n)
+    {
+      // we have a real eigen value
+      matV.col(j) = m_eivec.col(j).template cast<ComplexScalar>();
+      matV.col(j).normalize();
+    }
+    else
+    {
+      // we have a pair of complex eigen values
+      for (Index i=0; i<n; ++i)
+      {
+        matV.coeffRef(i,j)   = ComplexScalar(m_eivec.coeff(i,j),  m_eivec.coeff(i,j+1));
+        matV.coeffRef(i,j+1) = ComplexScalar(m_eivec.coeff(i,j), -m_eivec.coeff(i,j+1));
+      }
+      matV.col(j).normalize();
+      matV.col(j+1).normalize();
+      ++j;
+    }
+  }
+  return matV;
+}
+
+template<typename MatrixType>
+template<typename InputType>
+EigenSolver<MatrixType>& 
+EigenSolver<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeEigenvectors)
+{
+  check_template_parameters();
+  
+  using std::sqrt;
+  using std::abs;
+  using numext::isfinite;
+  eigen_assert(matrix.cols() == matrix.rows());
+
+  // Reduce to real Schur form.
+  m_realSchur.compute(matrix.derived(), computeEigenvectors);
+  
+  m_info = m_realSchur.info();
+
+  if (m_info == Success)
+  {
+    m_matT = m_realSchur.matrixT();
+    if (computeEigenvectors)
+      m_eivec = m_realSchur.matrixU();
+  
+    // Compute eigenvalues from matT
+    m_eivalues.resize(matrix.cols());
+    Index i = 0;
+    while (i < matrix.cols()) 
+    {
+      if (i == matrix.cols() - 1 || m_matT.coeff(i+1, i) == Scalar(0)) 
+      {
+        m_eivalues.coeffRef(i) = m_matT.coeff(i, i);
+        if(!(isfinite)(m_eivalues.coeffRef(i)))
+        {
+          m_isInitialized = true;
+          m_eigenvectorsOk = false;
+          m_info = NumericalIssue;
+          return *this;
+        }
+        ++i;
+      }
+      else
+      {
+        Scalar p = Scalar(0.5) * (m_matT.coeff(i, i) - m_matT.coeff(i+1, i+1));
+        Scalar z;
+        // Compute z = sqrt(abs(p * p + m_matT.coeff(i+1, i) * m_matT.coeff(i, i+1)));
+        // without overflow
+        {
+          Scalar t0 = m_matT.coeff(i+1, i);
+          Scalar t1 = m_matT.coeff(i, i+1);
+          Scalar maxval = numext::maxi<Scalar>(abs(p),numext::maxi<Scalar>(abs(t0),abs(t1)));
+          t0 /= maxval;
+          t1 /= maxval;
+          Scalar p0 = p/maxval;
+          z = maxval * sqrt(abs(p0 * p0 + t0 * t1));
+        }
+        
+        m_eivalues.coeffRef(i)   = ComplexScalar(m_matT.coeff(i+1, i+1) + p, z);
+        m_eivalues.coeffRef(i+1) = ComplexScalar(m_matT.coeff(i+1, i+1) + p, -z);
+        if(!((isfinite)(m_eivalues.coeffRef(i)) && (isfinite)(m_eivalues.coeffRef(i+1))))
+        {
+          m_isInitialized = true;
+          m_eigenvectorsOk = false;
+          m_info = NumericalIssue;
+          return *this;
+        }
+        i += 2;
+      }
+    }
+    
+    // Compute eigenvectors.
+    if (computeEigenvectors)
+      doComputeEigenvectors();
+  }
+
+  m_isInitialized = true;
+  m_eigenvectorsOk = computeEigenvectors;
+
+  return *this;
+}
+
+
+template<typename MatrixType>
+void EigenSolver<MatrixType>::doComputeEigenvectors()
+{
+  using std::abs;
+  const Index size = m_eivec.cols();
+  const Scalar eps = NumTraits<Scalar>::epsilon();
+
+  // inefficient! this is already computed in RealSchur
+  Scalar norm(0);
+  for (Index j = 0; j < size; ++j)
+  {
+    norm += m_matT.row(j).segment((std::max)(j-1,Index(0)), size-(std::max)(j-1,Index(0))).cwiseAbs().sum();
+  }
+  
+  // Backsubstitute to find vectors of upper triangular form
+  if (norm == Scalar(0))
+  {
+    return;
+  }
+
+  for (Index n = size-1; n >= 0; n--)
+  {
+    Scalar p = m_eivalues.coeff(n).real();
+    Scalar q = m_eivalues.coeff(n).imag();
+
+    // Scalar vector
+    if (q == Scalar(0))
+    {
+      Scalar lastr(0), lastw(0);
+      Index l = n;
+
+      m_matT.coeffRef(n,n) = Scalar(1);
+      for (Index i = n-1; i >= 0; i--)
+      {
+        Scalar w = m_matT.coeff(i,i) - p;
+        Scalar r = m_matT.row(i).segment(l,n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
+
+        if (m_eivalues.coeff(i).imag() < Scalar(0))
+        {
+          lastw = w;
+          lastr = r;
+        }
+        else
+        {
+          l = i;
+          if (m_eivalues.coeff(i).imag() == Scalar(0))
+          {
+            if (w != Scalar(0))
+              m_matT.coeffRef(i,n) = -r / w;
+            else
+              m_matT.coeffRef(i,n) = -r / (eps * norm);
+          }
+          else // Solve real equations
+          {
+            Scalar x = m_matT.coeff(i,i+1);
+            Scalar y = m_matT.coeff(i+1,i);
+            Scalar denom = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag();
+            Scalar t = (x * lastr - lastw * r) / denom;
+            m_matT.coeffRef(i,n) = t;
+            if (abs(x) > abs(lastw))
+              m_matT.coeffRef(i+1,n) = (-r - w * t) / x;
+            else
+              m_matT.coeffRef(i+1,n) = (-lastr - y * t) / lastw;
+          }
+
+          // Overflow control
+          Scalar t = abs(m_matT.coeff(i,n));
+          if ((eps * t) * t > Scalar(1))
+            m_matT.col(n).tail(size-i) /= t;
+        }
+      }
+    }
+    else if (q < Scalar(0) && n > 0) // Complex vector
+    {
+      Scalar lastra(0), lastsa(0), lastw(0);
+      Index l = n-1;
+
+      // Last vector component imaginary so matrix is triangular
+      if (abs(m_matT.coeff(n,n-1)) > abs(m_matT.coeff(n-1,n)))
+      {
+        m_matT.coeffRef(n-1,n-1) = q / m_matT.coeff(n,n-1);
+        m_matT.coeffRef(n-1,n) = -(m_matT.coeff(n,n) - p) / m_matT.coeff(n,n-1);
+      }
+      else
+      {
+        ComplexScalar cc = ComplexScalar(Scalar(0),-m_matT.coeff(n-1,n)) / ComplexScalar(m_matT.coeff(n-1,n-1)-p,q);
+        m_matT.coeffRef(n-1,n-1) = numext::real(cc);
+        m_matT.coeffRef(n-1,n) = numext::imag(cc);
+      }
+      m_matT.coeffRef(n,n-1) = Scalar(0);
+      m_matT.coeffRef(n,n) = Scalar(1);
+      for (Index i = n-2; i >= 0; i--)
+      {
+        Scalar ra = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n-1).segment(l, n-l+1));
+        Scalar sa = m_matT.row(i).segment(l, n-l+1).dot(m_matT.col(n).segment(l, n-l+1));
+        Scalar w = m_matT.coeff(i,i) - p;
+
+        if (m_eivalues.coeff(i).imag() < Scalar(0))
+        {
+          lastw = w;
+          lastra = ra;
+          lastsa = sa;
+        }
+        else
+        {
+          l = i;
+          if (m_eivalues.coeff(i).imag() == RealScalar(0))
+          {
+            ComplexScalar cc = ComplexScalar(-ra,-sa) / ComplexScalar(w,q);
+            m_matT.coeffRef(i,n-1) = numext::real(cc);
+            m_matT.coeffRef(i,n) = numext::imag(cc);
+          }
+          else
+          {
+            // Solve complex equations
+            Scalar x = m_matT.coeff(i,i+1);
+            Scalar y = m_matT.coeff(i+1,i);
+            Scalar vr = (m_eivalues.coeff(i).real() - p) * (m_eivalues.coeff(i).real() - p) + m_eivalues.coeff(i).imag() * m_eivalues.coeff(i).imag() - q * q;
+            Scalar vi = (m_eivalues.coeff(i).real() - p) * Scalar(2) * q;
+            if ((vr == Scalar(0)) && (vi == Scalar(0)))
+              vr = eps * norm * (abs(w) + abs(q) + abs(x) + abs(y) + abs(lastw));
+
+            ComplexScalar cc = ComplexScalar(x*lastra-lastw*ra+q*sa,x*lastsa-lastw*sa-q*ra) / ComplexScalar(vr,vi);
+            m_matT.coeffRef(i,n-1) = numext::real(cc);
+            m_matT.coeffRef(i,n) = numext::imag(cc);
+            if (abs(x) > (abs(lastw) + abs(q)))
+            {
+              m_matT.coeffRef(i+1,n-1) = (-ra - w * m_matT.coeff(i,n-1) + q * m_matT.coeff(i,n)) / x;
+              m_matT.coeffRef(i+1,n) = (-sa - w * m_matT.coeff(i,n) - q * m_matT.coeff(i,n-1)) / x;
+            }
+            else
+            {
+              cc = ComplexScalar(-lastra-y*m_matT.coeff(i,n-1),-lastsa-y*m_matT.coeff(i,n)) / ComplexScalar(lastw,q);
+              m_matT.coeffRef(i+1,n-1) = numext::real(cc);
+              m_matT.coeffRef(i+1,n) = numext::imag(cc);
+            }
+          }
+
+          // Overflow control
+          Scalar t = numext::maxi<Scalar>(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
+          if ((eps * t) * t > Scalar(1))
+            m_matT.block(i, n-1, size-i, 2) /= t;
+
+        }
+      }
+      
+      // We handled a pair of complex conjugate eigenvalues, so need to skip them both
+      n--;
+    }
+    else
+    {
+      eigen_assert(0 && "Internal bug in EigenSolver (INF or NaN has not been detected)"); // this should not happen
+    }
+  }
+
+  // Back transformation to get eigenvectors of original matrix
+  for (Index j = size-1; j >= 0; j--)
+  {
+    m_tmp.noalias() = m_eivec.leftCols(j+1) * m_matT.col(j).segment(0, j+1);
+    m_eivec.col(j) = m_tmp;
+  }
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_EIGENSOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/third-party/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
new file mode 100644
index 00000000..87d789b3
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@@ -0,0 +1,418 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2016 Tobias Wood <tobias@spinicist.org.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERALIZEDEIGENSOLVER_H
+#define EIGEN_GENERALIZEDEIGENSOLVER_H
+
+#include "./RealQZ.h"
+
+namespace Eigen { 
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class GeneralizedEigenSolver
+  *
+  * \brief Computes the generalized eigenvalues and eigenvectors of a pair of general matrices
+  *
+  * \tparam _MatrixType the type of the matrices of which we are computing the
+  * eigen-decomposition; this is expected to be an instantiation of the Matrix
+  * class template. Currently, only real matrices are supported.
+  *
+  * The generalized eigenvalues and eigenvectors of a matrix pair \f$ A \f$ and \f$ B \f$ are scalars
+  * \f$ \lambda \f$ and vectors \f$ v \f$ such that \f$ Av = \lambda Bv \f$.  If
+  * \f$ D \f$ is a diagonal matrix with the eigenvalues on the diagonal, and
+  * \f$ V \f$ is a matrix with the eigenvectors as its columns, then \f$ A V =
+  * B V D \f$. The matrix \f$ V \f$ is almost always invertible, in which case we
+  * have \f$ A = B V D V^{-1} \f$. This is called the generalized eigen-decomposition.
+  *
+  * The generalized eigenvalues and eigenvectors of a matrix pair may be complex, even when the
+  * matrices are real. Moreover, the generalized eigenvalue might be infinite if the matrix B is
+  * singular. To workaround this difficulty, the eigenvalues are provided as a pair of complex \f$ \alpha \f$
+  * and real \f$ \beta \f$ such that: \f$ \lambda_i = \alpha_i / \beta_i \f$. If \f$ \beta_i \f$ is (nearly) zero,
+  * then one can consider the well defined left eigenvalue \f$ \mu = \beta_i / \alpha_i\f$ such that:
+  * \f$ \mu_i A v_i = B v_i \f$, or even \f$ \mu_i u_i^T A  = u_i^T B \f$ where \f$ u_i \f$ is
+  * called the left eigenvector.
+  *
+  * Call the function compute() to compute the generalized eigenvalues and eigenvectors of
+  * a given matrix pair. Alternatively, you can use the
+  * GeneralizedEigenSolver(const MatrixType&, const MatrixType&, bool) constructor which computes the
+  * eigenvalues and eigenvectors at construction time. Once the eigenvalue and
+  * eigenvectors are computed, they can be retrieved with the eigenvalues() and
+  * eigenvectors() functions.
+  *
+  * Here is an usage example of this class:
+  * Example: \include GeneralizedEigenSolver.cpp
+  * Output: \verbinclude GeneralizedEigenSolver.out
+  *
+  * \sa MatrixBase::eigenvalues(), class ComplexEigenSolver, class SelfAdjointEigenSolver
+  */
+template<typename _MatrixType> class GeneralizedEigenSolver
+{
+  public:
+
+    /** \brief Synonym for the template parameter \p _MatrixType. */
+    typedef _MatrixType MatrixType;
+
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+
+    /** \brief Scalar type for matrices of type #MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    /** \brief Complex scalar type for #MatrixType. 
+      *
+      * This is \c std::complex<Scalar> if #Scalar is real (e.g.,
+      * \c float or \c double) and just \c Scalar if #Scalar is
+      * complex.
+      */
+    typedef std::complex<RealScalar> ComplexScalar;
+
+    /** \brief Type for vector of real scalar values eigenvalues as returned by betas().
+      *
+      * This is a column vector with entries of type #Scalar.
+      * The length of the vector is the size of #MatrixType.
+      */
+    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> VectorType;
+
+    /** \brief Type for vector of complex scalar values eigenvalues as returned by alphas().
+      *
+      * This is a column vector with entries of type #ComplexScalar.
+      * The length of the vector is the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ComplexVectorType;
+
+    /** \brief Expression type for the eigenvalues as returned by eigenvalues().
+      */
+    typedef CwiseBinaryOp<internal::scalar_quotient_op<ComplexScalar,Scalar>,ComplexVectorType,VectorType> EigenvalueType;
+
+    /** \brief Type for matrix of eigenvectors as returned by eigenvectors(). 
+      *
+      * This is a square matrix with entries of type #ComplexScalar. 
+      * The size is the same as the size of #MatrixType.
+      */
+    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, Options, MaxRowsAtCompileTime, MaxColsAtCompileTime> EigenvectorsType;
+
+    /** \brief Default constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via EigenSolver::compute(const MatrixType&, bool).
+      *
+      * \sa compute() for an example.
+      */
+    GeneralizedEigenSolver()
+      : m_eivec(),
+        m_alphas(),
+        m_betas(),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
+        m_realQZ()
+    {}
+
+    /** \brief Default constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa GeneralizedEigenSolver()
+      */
+    explicit GeneralizedEigenSolver(Index size)
+      : m_eivec(size, size),
+        m_alphas(size),
+        m_betas(size),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
+        m_realQZ(size),
+        m_tmp(size)
+    {}
+
+    /** \brief Constructor; computes the generalized eigendecomposition of given matrix pair.
+      * 
+      * \param[in]  A  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  B  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are computed.
+      *
+      * This constructor calls compute() to compute the generalized eigenvalues
+      * and eigenvectors.
+      *
+      * \sa compute()
+      */
+    GeneralizedEigenSolver(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true)
+      : m_eivec(A.rows(), A.cols()),
+        m_alphas(A.cols()),
+        m_betas(A.cols()),
+        m_valuesOkay(false),
+        m_vectorsOkay(false),
+        m_realQZ(A.cols()),
+        m_tmp(A.cols())
+    {
+      compute(A, B, computeEigenvectors);
+    }
+
+    /* \brief Returns the computed generalized eigenvectors.
+      *
+      * \returns  %Matrix whose columns are the (possibly complex) right eigenvectors.
+      * i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues.
+      *
+      * \pre Either the constructor 
+      * GeneralizedEigenSolver(const MatrixType&,const MatrixType&, bool) or the member function
+      * compute(const MatrixType&, const MatrixType& bool) has been called before, and
+      * \p computeEigenvectors was set to true (the default).
+      *
+      * \sa eigenvalues()
+      */
+    EigenvectorsType eigenvectors() const {
+      eigen_assert(m_vectorsOkay && "Eigenvectors for GeneralizedEigenSolver were not calculated.");
+      return m_eivec;
+    }
+
+    /** \brief Returns an expression of the computed generalized eigenvalues.
+      *
+      * \returns An expression of the column vector containing the eigenvalues.
+      *
+      * It is a shortcut for \code this->alphas().cwiseQuotient(this->betas()); \endcode
+      * Not that betas might contain zeros. It is therefore not recommended to use this function,
+      * but rather directly deal with the alphas and betas vectors.
+      *
+      * \pre Either the constructor 
+      * GeneralizedEigenSolver(const MatrixType&,const MatrixType&,bool) or the member function
+      * compute(const MatrixType&,const MatrixType&,bool) has been called before.
+      *
+      * The eigenvalues are repeated according to their algebraic multiplicity,
+      * so there are as many eigenvalues as rows in the matrix. The eigenvalues 
+      * are not sorted in any particular order.
+      *
+      * \sa alphas(), betas(), eigenvectors()
+      */
+    EigenvalueType eigenvalues() const
+    {
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      return EigenvalueType(m_alphas,m_betas);
+    }
+
+    /** \returns A const reference to the vectors containing the alpha values
+      *
+      * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).
+      *
+      * \sa betas(), eigenvalues() */
+    ComplexVectorType alphas() const
+    {
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      return m_alphas;
+    }
+
+    /** \returns A const reference to the vectors containing the beta values
+      *
+      * This vector permits to reconstruct the j-th eigenvalues as alphas(i)/betas(j).
+      *
+      * \sa alphas(), eigenvalues() */
+    VectorType betas() const
+    {
+      eigen_assert(m_valuesOkay && "GeneralizedEigenSolver is not initialized.");
+      return m_betas;
+    }
+
+    /** \brief Computes generalized eigendecomposition of given matrix.
+      * 
+      * \param[in]  A  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  B  Square matrix whose eigendecomposition is to be computed.
+      * \param[in]  computeEigenvectors  If true, both the eigenvectors and the
+      *    eigenvalues are computed; if false, only the eigenvalues are
+      *    computed. 
+      * \returns    Reference to \c *this
+      *
+      * This function computes the eigenvalues of the real matrix \p matrix.
+      * The eigenvalues() function can be used to retrieve them.  If 
+      * \p computeEigenvectors is true, then the eigenvectors are also computed
+      * and can be retrieved by calling eigenvectors().
+      *
+      * The matrix is first reduced to real generalized Schur form using the RealQZ
+      * class. The generalized Schur decomposition is then used to compute the eigenvalues
+      * and eigenvectors.
+      *
+      * The cost of the computation is dominated by the cost of the
+      * generalized Schur decomposition.
+      *
+      * This method reuses of the allocated data in the GeneralizedEigenSolver object.
+      */
+    GeneralizedEigenSolver& compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors = true);
+
+    ComputationInfo info() const
+    {
+      eigen_assert(m_valuesOkay && "EigenSolver is not initialized.");
+      return m_realQZ.info();
+    }
+
+    /** Sets the maximal number of iterations allowed.
+    */
+    GeneralizedEigenSolver& setMaxIterations(Index maxIters)
+    {
+      m_realQZ.setMaxIterations(maxIters);
+      return *this;
+    }
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+      EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL);
+    }
+    
+    EigenvectorsType m_eivec;
+    ComplexVectorType m_alphas;
+    VectorType m_betas;
+    bool m_valuesOkay, m_vectorsOkay;
+    RealQZ<MatrixType> m_realQZ;
+    ComplexVectorType m_tmp;
+};
+
+template<typename MatrixType>
+GeneralizedEigenSolver<MatrixType>&
+GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixType& B, bool computeEigenvectors)
+{
+  check_template_parameters();
+  
+  using std::sqrt;
+  using std::abs;
+  eigen_assert(A.cols() == A.rows() && B.cols() == A.rows() && B.cols() == B.rows());
+  Index size = A.cols();
+  m_valuesOkay = false;
+  m_vectorsOkay = false;
+  // Reduce to generalized real Schur form:
+  // A = Q S Z and B = Q T Z
+  m_realQZ.compute(A, B, computeEigenvectors);
+  if (m_realQZ.info() == Success)
+  {
+    // Resize storage
+    m_alphas.resize(size);
+    m_betas.resize(size);
+    if (computeEigenvectors)
+    {
+      m_eivec.resize(size,size);
+      m_tmp.resize(size);
+    }
+
+    // Aliases:
+    Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
+    ComplexVectorType &cv = m_tmp;
+    const MatrixType &mS = m_realQZ.matrixS();
+    const MatrixType &mT = m_realQZ.matrixT();
+
+    Index i = 0;
+    while (i < size)
+    {
+      if (i == size - 1 || mS.coeff(i+1, i) == Scalar(0))
+      {
+        // Real eigenvalue
+        m_alphas.coeffRef(i) = mS.diagonal().coeff(i);
+        m_betas.coeffRef(i)  = mT.diagonal().coeff(i);
+        if (computeEigenvectors)
+        {
+          v.setConstant(Scalar(0.0));
+          v.coeffRef(i) = Scalar(1.0);
+          // For singular eigenvalues do nothing more
+          if(abs(m_betas.coeffRef(i)) >= (std::numeric_limits<RealScalar>::min)())
+          {
+            // Non-singular eigenvalue
+            const Scalar alpha = real(m_alphas.coeffRef(i));
+            const Scalar beta = m_betas.coeffRef(i);
+            for (Index j = i-1; j >= 0; j--)
+            {
+              const Index st = j+1;
+              const Index sz = i-j;
+              if (j > 0 && mS.coeff(j, j-1) != Scalar(0))
+              {
+                // 2x2 block
+                Matrix<Scalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( v.segment(st,sz) );
+                Matrix<Scalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);
+                v.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);
+                j--;
+              }
+              else
+              {
+                v.coeffRef(j) = -v.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum() / (beta*mS.coeffRef(j,j) - alpha*mT.coeffRef(j,j));
+              }
+            }
+          }
+          m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
+          m_eivec.col(i).real().normalize();
+          m_eivec.col(i).imag().setConstant(0);
+        }
+        ++i;
+      }
+      else
+      {
+        // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T
+        // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00):
+
+        // T =  [a 0]
+        //      [0 b]
+        RealScalar a = mT.diagonal().coeff(i),
+                   b = mT.diagonal().coeff(i+1);
+        const RealScalar beta = m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*b;
+
+        // ^^ NOTE: using diagonal()(i) instead of coeff(i,i) workarounds a MSVC bug.
+        Matrix<RealScalar,2,2> S2 = mS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();
+
+        Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1));
+        Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1)));
+        const ComplexScalar alpha = ComplexScalar(S2.coeff(1,1) + p, (beta > 0) ? z : -z);
+        m_alphas.coeffRef(i)   = conj(alpha);
+        m_alphas.coeffRef(i+1) = alpha;
+
+        if (computeEigenvectors) {
+          // Compute eigenvector in position (i+1) and then position (i) is just the conjugate
+          cv.setZero();
+          cv.coeffRef(i+1) = Scalar(1.0);
+          // here, the "static_cast" workaound expression template issues.
+          cv.coeffRef(i) = -(static_cast<Scalar>(beta*mS.coeffRef(i,i+1)) - alpha*mT.coeffRef(i,i+1))
+                          / (static_cast<Scalar>(beta*mS.coeffRef(i,i))   - alpha*mT.coeffRef(i,i));
+          for (Index j = i-1; j >= 0; j--)
+          {
+            const Index st = j+1;
+            const Index sz = i+1-j;
+            if (j > 0 && mS.coeff(j, j-1) != Scalar(0))
+            {
+              // 2x2 block
+              Matrix<ComplexScalar, 2, 1> rhs = (alpha*mT.template block<2,Dynamic>(j-1,st,2,sz) - beta*mS.template block<2,Dynamic>(j-1,st,2,sz)) .lazyProduct( cv.segment(st,sz) );
+              Matrix<ComplexScalar, 2, 2> lhs = beta * mS.template block<2,2>(j-1,j-1) - alpha * mT.template block<2,2>(j-1,j-1);
+              cv.template segment<2>(j-1) = lhs.partialPivLu().solve(rhs);
+              j--;
+            } else {
+              cv.coeffRef(j) =  cv.segment(st,sz).transpose().cwiseProduct(beta*mS.block(j,st,1,sz) - alpha*mT.block(j,st,1,sz)).sum()
+                              / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
+            }
+          }
+          m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
+          m_eivec.col(i+1).normalize();
+          m_eivec.col(i) = m_eivec.col(i+1).conjugate();
+        }
+        i += 2;
+      }
+    }
+
+    m_valuesOkay = true;
+    m_vectorsOkay = computeEigenvectors;
+  }
+  return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERALIZEDEIGENSOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h b/third-party/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
new file mode 100644
index 00000000..5f6bb828
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h
@@ -0,0 +1,226 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H
+#define EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H
+
+#include "./Tridiagonalization.h"
+
+namespace Eigen { 
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class GeneralizedSelfAdjointEigenSolver
+  *
+  * \brief Computes eigenvalues and eigenvectors of the generalized selfadjoint eigen problem
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * eigendecomposition; this is expected to be an instantiation of the Matrix
+  * class template.
+  *
+  * This class solves the generalized eigenvalue problem
+  * \f$ Av = \lambda Bv \f$. In this case, the matrix \f$ A \f$ should be
+  * selfadjoint and the matrix \f$ B \f$ should be positive definite.
+  *
+  * Only the \b lower \b triangular \b part of the input matrix is referenced.
+  *
+  * Call the function compute() to compute the eigenvalues and eigenvectors of
+  * a given matrix. Alternatively, you can use the
+  * GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)
+  * constructor which computes the eigenvalues and eigenvectors at construction time.
+  * Once the eigenvalue and eigenvectors are computed, they can be retrieved with the eigenvalues()
+  * and eigenvectors() functions.
+  *
+  * The documentation for GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)
+  * contains an example of the typical use of this class.
+  *
+  * \sa class SelfAdjointEigenSolver, class EigenSolver, class ComplexEigenSolver
+  */
+template<typename _MatrixType>
+class GeneralizedSelfAdjointEigenSolver : public SelfAdjointEigenSolver<_MatrixType>
+{
+    typedef SelfAdjointEigenSolver<_MatrixType> Base;
+  public:
+
+    typedef _MatrixType MatrixType;
+
+    /** \brief Default constructor for fixed-size matrices.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute(). This constructor
+      * can only be used if \p _MatrixType is a fixed-size matrix; use
+      * GeneralizedSelfAdjointEigenSolver(Index) for dynamic-size matrices.
+      */
+    GeneralizedSelfAdjointEigenSolver() : Base() {}
+
+    /** \brief Constructor, pre-allocates memory for dynamic-size matrices.
+      *
+      * \param [in]  size  Positive integer, size of the matrix whose
+      * eigenvalues and eigenvectors will be computed.
+      *
+      * This constructor is useful for dynamic-size matrices, when the user
+      * intends to perform decompositions via compute(). The \p size
+      * parameter is only used as a hint. It is not an error to give a wrong
+      * \p size, but it may impair performance.
+      *
+      * \sa compute() for an example
+      */
+    explicit GeneralizedSelfAdjointEigenSolver(Index size)
+        : Base(size)
+    {}
+
+    /** \brief Constructor; computes generalized eigendecomposition of given matrix pencil.
+      *
+      * \param[in]  matA  Selfadjoint matrix in matrix pencil.
+      *                   Only the lower triangular part of the matrix is referenced.
+      * \param[in]  matB  Positive-definite matrix in matrix pencil.
+      *                   Only the lower triangular part of the matrix is referenced.
+      * \param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.
+      *                     Default is #ComputeEigenvectors|#Ax_lBx.
+      *
+      * This constructor calls compute(const MatrixType&, const MatrixType&, int)
+      * to compute the eigenvalues and (if requested) the eigenvectors of the
+      * generalized eigenproblem \f$ Ax = \lambda B x \f$ with \a matA the
+      * selfadjoint matrix \f$ A \f$ and \a matB the positive definite matrix
+      * \f$ B \f$. Each eigenvector \f$ x \f$ satisfies the property
+      * \f$ x^* B x = 1 \f$. The eigenvectors are computed if
+      * \a options contains ComputeEigenvectors.
+      *
+      * In addition, the two following variants can be solved via \p options:
+      * - \c ABx_lx: \f$ ABx = \lambda x \f$
+      * - \c BAx_lx: \f$ BAx = \lambda x \f$
+      *
+      * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.out
+      *
+      * \sa compute(const MatrixType&, const MatrixType&, int)
+      */
+    GeneralizedSelfAdjointEigenSolver(const MatrixType& matA, const MatrixType& matB,
+                                      int options = ComputeEigenvectors|Ax_lBx)
+      : Base(matA.cols())
+    {
+      compute(matA, matB, options);
+    }
+
+    /** \brief Computes generalized eigendecomposition of given matrix pencil.
+      *
+      * \param[in]  matA  Selfadjoint matrix in matrix pencil.
+      *                   Only the lower triangular part of the matrix is referenced.
+      * \param[in]  matB  Positive-definite matrix in matrix pencil.
+      *                   Only the lower triangular part of the matrix is referenced.
+      * \param[in]  options A or-ed set of flags {#ComputeEigenvectors,#EigenvaluesOnly} | {#Ax_lBx,#ABx_lx,#BAx_lx}.
+      *                     Default is #ComputeEigenvectors|#Ax_lBx.
+      *
+      * \returns    Reference to \c *this
+      *
+      * Accoring to \p options, this function computes eigenvalues and (if requested)
+      * the eigenvectors of one of the following three generalized eigenproblems:
+      * - \c Ax_lBx: \f$ Ax = \lambda B x \f$
+      * - \c ABx_lx: \f$ ABx = \lambda x \f$
+      * - \c BAx_lx: \f$ BAx = \lambda x \f$
+      * with \a matA the selfadjoint matrix \f$ A \f$ and \a matB the positive definite
+      * matrix \f$ B \f$.
+      * In addition, each eigenvector \f$ x \f$ satisfies the property \f$ x^* B x = 1 \f$.
+      *
+      * The eigenvalues() function can be used to retrieve
+      * the eigenvalues. If \p options contains ComputeEigenvectors, then the
+      * eigenvectors are also computed and can be retrieved by calling
+      * eigenvectors().
+      *
+      * The implementation uses LLT to compute the Cholesky decomposition
+      * \f$ B = LL^* \f$ and computes the classical eigendecomposition
+      * of the selfadjoint matrix \f$ L^{-1} A (L^*)^{-1} \f$ if \p options contains Ax_lBx
+      * and of \f$ L^{*} A L \f$ otherwise. This solves the
+      * generalized eigenproblem, because any solution of the generalized
+      * eigenproblem \f$ Ax = \lambda B x \f$ corresponds to a solution
+      * \f$ L^{-1} A (L^*)^{-1} (L^* x) = \lambda (L^* x) \f$ of the
+      * eigenproblem for \f$ L^{-1} A (L^*)^{-1} \f$. Similar statements
+      * can be made for the two other variants.
+      *
+      * Example: \include SelfAdjointEigenSolver_compute_MatrixType2.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_compute_MatrixType2.out
+      *
+      * \sa GeneralizedSelfAdjointEigenSolver(const MatrixType&, const MatrixType&, int)
+      */
+    GeneralizedSelfAdjointEigenSolver& compute(const MatrixType& matA, const MatrixType& matB,
+                                               int options = ComputeEigenvectors|Ax_lBx);
+
+  protected:
+
+};
+
+
+template<typename MatrixType>
+GeneralizedSelfAdjointEigenSolver<MatrixType>& GeneralizedSelfAdjointEigenSolver<MatrixType>::
+compute(const MatrixType& matA, const MatrixType& matB, int options)
+{
+  eigen_assert(matA.cols()==matA.rows() && matB.rows()==matA.rows() && matB.cols()==matB.rows());
+  eigen_assert((options&~(EigVecMask|GenEigMask))==0
+          && (options&EigVecMask)!=EigVecMask
+          && ((options&GenEigMask)==0 || (options&GenEigMask)==Ax_lBx
+           || (options&GenEigMask)==ABx_lx || (options&GenEigMask)==BAx_lx)
+          && "invalid option parameter");
+
+  bool computeEigVecs = ((options&EigVecMask)==0) || ((options&EigVecMask)==ComputeEigenvectors);
+
+  // Compute the cholesky decomposition of matB = L L' = U'U
+  LLT<MatrixType> cholB(matB);
+
+  int type = (options&GenEigMask);
+  if(type==0)
+    type = Ax_lBx;
+
+  if(type==Ax_lBx)
+  {
+    // compute C = inv(L) A inv(L')
+    MatrixType matC = matA.template selfadjointView<Lower>();
+    cholB.matrixL().template solveInPlace<OnTheLeft>(matC);
+    cholB.matrixU().template solveInPlace<OnTheRight>(matC);
+
+    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly );
+
+    // transform back the eigen vectors: evecs = inv(U) * evecs
+    if(computeEigVecs)
+      cholB.matrixU().solveInPlace(Base::m_eivec);
+  }
+  else if(type==ABx_lx)
+  {
+    // compute C = L' A L
+    MatrixType matC = matA.template selfadjointView<Lower>();
+    matC = matC * cholB.matrixL();
+    matC = cholB.matrixU() * matC;
+
+    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly);
+
+    // transform back the eigen vectors: evecs = inv(U) * evecs
+    if(computeEigVecs)
+      cholB.matrixU().solveInPlace(Base::m_eivec);
+  }
+  else if(type==BAx_lx)
+  {
+    // compute C = L' A L
+    MatrixType matC = matA.template selfadjointView<Lower>();
+    matC = matC * cholB.matrixL();
+    matC = cholB.matrixU() * matC;
+
+    Base::compute(matC, computeEigVecs ? ComputeEigenvectors : EigenvaluesOnly);
+
+    // transform back the eigen vectors: evecs = L * evecs
+    if(computeEigVecs)
+      Base::m_eivec = cholB.matrixL() * Base::m_eivec;
+  }
+
+  return *this;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_GENERALIZEDSELFADJOINTEIGENSOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/HessenbergDecomposition.h b/third-party/Eigen/src/Eigenvalues/HessenbergDecomposition.h
new file mode 100644
index 00000000..f647f69b
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@@ -0,0 +1,374 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HESSENBERGDECOMPOSITION_H
+#define EIGEN_HESSENBERGDECOMPOSITION_H
+
+namespace Eigen { 
+
+namespace internal {
+  
+template<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType;
+template<typename MatrixType>
+struct traits<HessenbergDecompositionMatrixHReturnType<MatrixType> >
+{
+  typedef MatrixType ReturnType;
+};
+
+}
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class HessenbergDecomposition
+  *
+  * \brief Reduces a square matrix to Hessenberg form by an orthogonal similarity transformation
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the Hessenberg decomposition
+  *
+  * This class performs an Hessenberg decomposition of a matrix \f$ A \f$. In
+  * the real case, the Hessenberg decomposition consists of an orthogonal
+  * matrix \f$ Q \f$ and a Hessenberg matrix \f$ H \f$ such that \f$ A = Q H
+  * Q^T \f$. An orthogonal matrix is a matrix whose inverse equals its
+  * transpose (\f$ Q^{-1} = Q^T \f$). A Hessenberg matrix has zeros below the
+  * subdiagonal, so it is almost upper triangular. The Hessenberg decomposition
+  * of a complex matrix is \f$ A = Q H Q^* \f$ with \f$ Q \f$ unitary (that is,
+  * \f$ Q^{-1} = Q^* \f$).
+  *
+  * Call the function compute() to compute the Hessenberg decomposition of a
+  * given matrix. Alternatively, you can use the
+  * HessenbergDecomposition(const MatrixType&) constructor which computes the
+  * Hessenberg decomposition at construction time. Once the decomposition is
+  * computed, you can use the matrixH() and matrixQ() functions to construct
+  * the matrices H and Q in the decomposition.
+  *
+  * The documentation for matrixH() contains an example of the typical use of
+  * this class.
+  *
+  * \sa class ComplexSchur, class Tridiagonalization, \ref QR_Module "QR Module"
+  */
+template<typename _MatrixType> class HessenbergDecomposition
+{
+  public:
+
+    /** \brief Synonym for the template parameter \p _MatrixType. */
+    typedef _MatrixType MatrixType;
+
+    enum {
+      Size = MatrixType::RowsAtCompileTime,
+      SizeMinusOne = Size == Dynamic ? Dynamic : Size - 1,
+      Options = MatrixType::Options,
+      MaxSize = MatrixType::MaxRowsAtCompileTime,
+      MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : MaxSize - 1
+    };
+
+    /** \brief Scalar type for matrices of type #MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    /** \brief Type for vector of Householder coefficients.
+      *
+      * This is column vector with entries of type #Scalar. The length of the
+      * vector is one less than the size of #MatrixType, if it is a fixed-side
+      * type.
+      */
+    typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;
+
+    /** \brief Return type of matrixQ() */
+    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;
+    
+    typedef internal::HessenbergDecompositionMatrixHReturnType<MatrixType> MatrixHReturnType;
+
+    /** \brief Default constructor; the decomposition will be computed later.
+      *
+      * \param [in] size  The size of the matrix whose Hessenberg decomposition will be computed.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute().  The \p size parameter is only
+      * used as a hint. It is not an error to give a wrong \p size, but it may
+      * impair performance.
+      *
+      * \sa compute() for an example.
+      */
+    explicit HessenbergDecomposition(Index size = Size==Dynamic ? 2 : Size)
+      : m_matrix(size,size),
+        m_temp(size),
+        m_isInitialized(false)
+    {
+      if(size>1)
+        m_hCoeffs.resize(size-1);
+    }
+
+    /** \brief Constructor; computes Hessenberg decomposition of given matrix.
+      *
+      * \param[in]  matrix  Square matrix whose Hessenberg decomposition is to be computed.
+      *
+      * This constructor calls compute() to compute the Hessenberg
+      * decomposition.
+      *
+      * \sa matrixH() for an example.
+      */
+    template<typename InputType>
+    explicit HessenbergDecomposition(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
+        m_temp(matrix.rows()),
+        m_isInitialized(false)
+    {
+      if(matrix.rows()<2)
+      {
+        m_isInitialized = true;
+        return;
+      }
+      m_hCoeffs.resize(matrix.rows()-1,1);
+      _compute(m_matrix, m_hCoeffs, m_temp);
+      m_isInitialized = true;
+    }
+
+    /** \brief Computes Hessenberg decomposition of given matrix.
+      *
+      * \param[in]  matrix  Square matrix whose Hessenberg decomposition is to be computed.
+      * \returns    Reference to \c *this
+      *
+      * The Hessenberg decomposition is computed by bringing the columns of the
+      * matrix successively in the required form using Householder reflections
+      * (see, e.g., Algorithm 7.4.2 in Golub \& Van Loan, <i>%Matrix
+      * Computations</i>). The cost is \f$ 10n^3/3 \f$ flops, where \f$ n \f$
+      * denotes the size of the given matrix.
+      *
+      * This method reuses of the allocated data in the HessenbergDecomposition
+      * object.
+      *
+      * Example: \include HessenbergDecomposition_compute.cpp
+      * Output: \verbinclude HessenbergDecomposition_compute.out
+      */
+    template<typename InputType>
+    HessenbergDecomposition& compute(const EigenBase<InputType>& matrix)
+    {
+      m_matrix = matrix.derived();
+      if(matrix.rows()<2)
+      {
+        m_isInitialized = true;
+        return *this;
+      }
+      m_hCoeffs.resize(matrix.rows()-1,1);
+      _compute(m_matrix, m_hCoeffs, m_temp);
+      m_isInitialized = true;
+      return *this;
+    }
+
+    /** \brief Returns the Householder coefficients.
+      *
+      * \returns a const reference to the vector of Householder coefficients
+      *
+      * \pre Either the constructor HessenbergDecomposition(const MatrixType&)
+      * or the member function compute(const MatrixType&) has been called
+      * before to compute the Hessenberg decomposition of a matrix.
+      *
+      * The Householder coefficients allow the reconstruction of the matrix
+      * \f$ Q \f$ in the Hessenberg decomposition from the packed data.
+      *
+      * \sa packedMatrix(), \ref Householder_Module "Householder module"
+      */
+    const CoeffVectorType& householderCoefficients() const
+    {
+      eigen_assert(m_isInitialized && "HessenbergDecomposition is not initialized.");
+      return m_hCoeffs;
+    }
+
+    /** \brief Returns the internal representation of the decomposition
+      *
+      *	\returns a const reference to a matrix with the internal representation
+      *	         of the decomposition.
+      *
+      * \pre Either the constructor HessenbergDecomposition(const MatrixType&)
+      * or the member function compute(const MatrixType&) has been called
+      * before to compute the Hessenberg decomposition of a matrix.
+      *
+      * The returned matrix contains the following information:
+      *  - the upper part and lower sub-diagonal represent the Hessenberg matrix H
+      *  - the rest of the lower part contains the Householder vectors that, combined with
+      *    Householder coefficients returned by householderCoefficients(),
+      *    allows to reconstruct the matrix Q as
+      *       \f$ Q = H_{N-1} \ldots H_1 H_0 \f$.
+      *    Here, the matrices \f$ H_i \f$ are the Householder transformations
+      *       \f$ H_i = (I - h_i v_i v_i^T) \f$
+      *    where \f$ h_i \f$ is the \f$ i \f$th Householder coefficient and
+      *    \f$ v_i \f$ is the Householder vector defined by
+      *       \f$ v_i = [ 0, \ldots, 0, 1, M(i+2,i), \ldots, M(N-1,i) ]^T \f$
+      *    with M the matrix returned by this function.
+      *
+      * See LAPACK for further details on this packed storage.
+      *
+      * Example: \include HessenbergDecomposition_packedMatrix.cpp
+      * Output: \verbinclude HessenbergDecomposition_packedMatrix.out
+      *
+      * \sa householderCoefficients()
+      */
+    const MatrixType& packedMatrix() const
+    {
+      eigen_assert(m_isInitialized && "HessenbergDecomposition is not initialized.");
+      return m_matrix;
+    }
+
+    /** \brief Reconstructs the orthogonal matrix Q in the decomposition
+      *
+      * \returns object representing the matrix Q
+      *
+      * \pre Either the constructor HessenbergDecomposition(const MatrixType&)
+      * or the member function compute(const MatrixType&) has been called
+      * before to compute the Hessenberg decomposition of a matrix.
+      *
+      * This function returns a light-weight object of template class
+      * HouseholderSequence. You can either apply it directly to a matrix or
+      * you can convert it to a matrix of type #MatrixType.
+      *
+      * \sa matrixH() for an example, class HouseholderSequence
+      */
+    HouseholderSequenceType matrixQ() const
+    {
+      eigen_assert(m_isInitialized && "HessenbergDecomposition is not initialized.");
+      return HouseholderSequenceType(m_matrix, m_hCoeffs.conjugate())
+             .setLength(m_matrix.rows() - 1)
+             .setShift(1);
+    }
+
+    /** \brief Constructs the Hessenberg matrix H in the decomposition
+      *
+      * \returns expression object representing the matrix H
+      *
+      * \pre Either the constructor HessenbergDecomposition(const MatrixType&)
+      * or the member function compute(const MatrixType&) has been called
+      * before to compute the Hessenberg decomposition of a matrix.
+      *
+      * The object returned by this function constructs the Hessenberg matrix H
+      * when it is assigned to a matrix or otherwise evaluated. The matrix H is
+      * constructed from the packed matrix as returned by packedMatrix(): The
+      * upper part (including the subdiagonal) of the packed matrix contains
+      * the matrix H. It may sometimes be better to directly use the packed
+      * matrix instead of constructing the matrix H.
+      *
+      * Example: \include HessenbergDecomposition_matrixH.cpp
+      * Output: \verbinclude HessenbergDecomposition_matrixH.out
+      *
+      * \sa matrixQ(), packedMatrix()
+      */
+    MatrixHReturnType matrixH() const
+    {
+      eigen_assert(m_isInitialized && "HessenbergDecomposition is not initialized.");
+      return MatrixHReturnType(*this);
+    }
+
+  private:
+
+    typedef Matrix<Scalar, 1, Size, Options | RowMajor, 1, MaxSize> VectorType;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    static void _compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp);
+
+  protected:
+    MatrixType m_matrix;
+    CoeffVectorType m_hCoeffs;
+    VectorType m_temp;
+    bool m_isInitialized;
+};
+
+/** \internal
+  * Performs a tridiagonal decomposition of \a matA in place.
+  *
+  * \param matA the input selfadjoint matrix
+  * \param hCoeffs returned Householder coefficients
+  *
+  * The result is written in the lower triangular part of \a matA.
+  *
+  * Implemented from Golub's "%Matrix Computations", algorithm 8.3.1.
+  *
+  * \sa packedMatrix()
+  */
+template<typename MatrixType>
+void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp)
+{
+  eigen_assert(matA.rows()==matA.cols());
+  Index n = matA.rows();
+  temp.resize(n);
+  for (Index i = 0; i<n-1; ++i)
+  {
+    // let's consider the vector v = i-th column starting at position i+1
+    Index remainingSize = n-i-1;
+    RealScalar beta;
+    Scalar h;
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
+    matA.col(i).coeffRef(i+1) = beta;
+    hCoeffs.coeffRef(i) = h;
+
+    // Apply similarity transformation to remaining columns,
+    // i.e., compute A = H A H'
+
+    // A = H A
+    matA.bottomRightCorner(remainingSize, remainingSize)
+        .applyHouseholderOnTheLeft(matA.col(i).tail(remainingSize-1), h, &temp.coeffRef(0));
+
+    // A = A H'
+    matA.rightCols(remainingSize)
+        .applyHouseholderOnTheRight(matA.col(i).tail(remainingSize-1).conjugate(), numext::conj(h), &temp.coeffRef(0));
+  }
+}
+
+namespace internal {
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \brief Expression type for return value of HessenbergDecomposition::matrixH()
+  *
+  * \tparam MatrixType type of matrix in the Hessenberg decomposition
+  *
+  * Objects of this type represent the Hessenberg matrix in the Hessenberg
+  * decomposition of some matrix. The object holds a reference to the
+  * HessenbergDecomposition class until the it is assigned or evaluated for
+  * some other reason (the reference should remain valid during the life time
+  * of this object). This class is the return type of
+  * HessenbergDecomposition::matrixH(); there is probably no other use for this
+  * class.
+  */
+template<typename MatrixType> struct HessenbergDecompositionMatrixHReturnType
+: public ReturnByValue<HessenbergDecompositionMatrixHReturnType<MatrixType> >
+{
+  public:
+    /** \brief Constructor.
+      *
+      * \param[in] hess  Hessenberg decomposition
+      */
+    HessenbergDecompositionMatrixHReturnType(const HessenbergDecomposition<MatrixType>& hess) : m_hess(hess) { }
+
+    /** \brief Hessenberg matrix in decomposition.
+      *
+      * \param[out] result  Hessenberg matrix in decomposition \p hess which
+      *                     was passed to the constructor
+      */
+    template <typename ResultType>
+    inline void evalTo(ResultType& result) const
+    {
+      result = m_hess.packedMatrix();
+      Index n = result.rows();
+      if (n>2)
+        result.bottomLeftCorner(n-2, n-2).template triangularView<Lower>().setZero();
+    }
+
+    Index rows() const { return m_hess.packedMatrix().rows(); }
+    Index cols() const { return m_hess.packedMatrix().cols(); }
+
+  protected:
+    const HessenbergDecomposition<MatrixType>& m_hess;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_HESSENBERGDECOMPOSITION_H
diff --git a/third-party/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/third-party/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
new file mode 100644
index 00000000..e4e42607
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
@@ -0,0 +1,158 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIXBASEEIGENVALUES_H
+#define EIGEN_MATRIXBASEEIGENVALUES_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived, bool IsComplex>
+struct eigenvalues_selector
+{
+  // this is the implementation for the case IsComplex = true
+  static inline typename MatrixBase<Derived>::EigenvaluesReturnType const
+  run(const MatrixBase<Derived>& m)
+  {
+    typedef typename Derived::PlainObject PlainObject;
+    PlainObject m_eval(m);
+    return ComplexEigenSolver<PlainObject>(m_eval, false).eigenvalues();
+  }
+};
+
+template<typename Derived>
+struct eigenvalues_selector<Derived, false>
+{
+  static inline typename MatrixBase<Derived>::EigenvaluesReturnType const
+  run(const MatrixBase<Derived>& m)
+  {
+    typedef typename Derived::PlainObject PlainObject;
+    PlainObject m_eval(m);
+    return EigenSolver<PlainObject>(m_eval, false).eigenvalues();
+  }
+};
+
+} // end namespace internal
+
+/** \brief Computes the eigenvalues of a matrix 
+  * \returns Column vector containing the eigenvalues.
+  *
+  * \eigenvalues_module
+  * This function computes the eigenvalues with the help of the EigenSolver
+  * class (for real matrices) or the ComplexEigenSolver class (for complex
+  * matrices). 
+  *
+  * The eigenvalues are repeated according to their algebraic multiplicity,
+  * so there are as many eigenvalues as rows in the matrix.
+  *
+  * The SelfAdjointView class provides a better algorithm for selfadjoint
+  * matrices.
+  *
+  * Example: \include MatrixBase_eigenvalues.cpp
+  * Output: \verbinclude MatrixBase_eigenvalues.out
+  *
+  * \sa EigenSolver::eigenvalues(), ComplexEigenSolver::eigenvalues(),
+  *     SelfAdjointView::eigenvalues()
+  */
+template<typename Derived>
+inline typename MatrixBase<Derived>::EigenvaluesReturnType
+MatrixBase<Derived>::eigenvalues() const
+{
+  return internal::eigenvalues_selector<Derived, NumTraits<Scalar>::IsComplex>::run(derived());
+}
+
+/** \brief Computes the eigenvalues of a matrix
+  * \returns Column vector containing the eigenvalues.
+  *
+  * \eigenvalues_module
+  * This function computes the eigenvalues with the help of the
+  * SelfAdjointEigenSolver class.  The eigenvalues are repeated according to
+  * their algebraic multiplicity, so there are as many eigenvalues as rows in
+  * the matrix.
+  *
+  * Example: \include SelfAdjointView_eigenvalues.cpp
+  * Output: \verbinclude SelfAdjointView_eigenvalues.out
+  *
+  * \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues()
+  */
+template<typename MatrixType, unsigned int UpLo> 
+inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
+SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
+{
+  PlainObject thisAsMatrix(*this);
+  return SelfAdjointEigenSolver<PlainObject>(thisAsMatrix, false).eigenvalues();
+}
+
+
+
+/** \brief Computes the L2 operator norm
+  * \returns Operator norm of the matrix.
+  *
+  * \eigenvalues_module
+  * This function computes the L2 operator norm of a matrix, which is also
+  * known as the spectral norm. The norm of a matrix \f$ A \f$ is defined to be
+  * \f[ \|A\|_2 = \max_x \frac{\|Ax\|_2}{\|x\|_2} \f]
+  * where the maximum is over all vectors and the norm on the right is the
+  * Euclidean vector norm. The norm equals the largest singular value, which is
+  * the square root of the largest eigenvalue of the positive semi-definite
+  * matrix \f$ A^*A \f$.
+  *
+  * The current implementation uses the eigenvalues of \f$ A^*A \f$, as computed
+  * by SelfAdjointView::eigenvalues(), to compute the operator norm of a
+  * matrix.  The SelfAdjointView class provides a better algorithm for
+  * selfadjoint matrices.
+  *
+  * Example: \include MatrixBase_operatorNorm.cpp
+  * Output: \verbinclude MatrixBase_operatorNorm.out
+  *
+  * \sa SelfAdjointView::eigenvalues(), SelfAdjointView::operatorNorm()
+  */
+template<typename Derived>
+inline typename MatrixBase<Derived>::RealScalar
+MatrixBase<Derived>::operatorNorm() const
+{
+  using std::sqrt;
+  typename Derived::PlainObject m_eval(derived());
+  // FIXME if it is really guaranteed that the eigenvalues are already sorted,
+  // then we don't need to compute a maxCoeff() here, comparing the 1st and last ones is enough.
+  return sqrt((m_eval*m_eval.adjoint())
+                 .eval()
+		 .template selfadjointView<Lower>()
+		 .eigenvalues()
+		 .maxCoeff()
+		 );
+}
+
+/** \brief Computes the L2 operator norm
+  * \returns Operator norm of the matrix.
+  *
+  * \eigenvalues_module
+  * This function computes the L2 operator norm of a self-adjoint matrix. For a
+  * self-adjoint matrix, the operator norm is the largest eigenvalue.
+  *
+  * The current implementation uses the eigenvalues of the matrix, as computed
+  * by eigenvalues(), to compute the operator norm of the matrix.
+  *
+  * Example: \include SelfAdjointView_operatorNorm.cpp
+  * Output: \verbinclude SelfAdjointView_operatorNorm.out
+  *
+  * \sa eigenvalues(), MatrixBase::operatorNorm()
+  */
+template<typename MatrixType, unsigned int UpLo>
+inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar
+SelfAdjointView<MatrixType, UpLo>::operatorNorm() const
+{
+  return eigenvalues().cwiseAbs().maxCoeff();
+}
+
+} // end namespace Eigen
+
+#endif
diff --git a/third-party/Eigen/src/Eigenvalues/RealQZ.h b/third-party/Eigen/src/Eigenvalues/RealQZ.h
new file mode 100644
index 00000000..b3a910dd
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/RealQZ.h
@@ -0,0 +1,654 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012 Alexey Korepanov <kaikaikai@yandex.ru>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REAL_QZ_H
+#define EIGEN_REAL_QZ_H
+
+namespace Eigen {
+
+  /** \eigenvalues_module \ingroup Eigenvalues_Module
+   *
+   *
+   * \class RealQZ
+   *
+   * \brief Performs a real QZ decomposition of a pair of square matrices
+   *
+   * \tparam _MatrixType the type of the matrix of which we are computing the
+   * real QZ decomposition; this is expected to be an instantiation of the
+   * Matrix class template.
+   *
+   * Given a real square matrices A and B, this class computes the real QZ
+   * decomposition: \f$ A = Q S Z \f$, \f$ B = Q T Z \f$ where Q and Z are
+   * real orthogonal matrixes, T is upper-triangular matrix, and S is upper
+   * quasi-triangular matrix. An orthogonal matrix is a matrix whose
+   * inverse is equal to its transpose, \f$ U^{-1} = U^T \f$. A quasi-triangular
+   * matrix is a block-triangular matrix whose diagonal consists of 1-by-1
+   * blocks and 2-by-2 blocks where further reduction is impossible due to
+   * complex eigenvalues. 
+   *
+   * The eigenvalues of the pencil \f$ A - z B \f$ can be obtained from
+   * 1x1 and 2x2 blocks on the diagonals of S and T.
+   *
+   * Call the function compute() to compute the real QZ decomposition of a
+   * given pair of matrices. Alternatively, you can use the 
+   * RealQZ(const MatrixType& B, const MatrixType& B, bool computeQZ)
+   * constructor which computes the real QZ decomposition at construction
+   * time. Once the decomposition is computed, you can use the matrixS(),
+   * matrixT(), matrixQ() and matrixZ() functions to retrieve the matrices
+   * S, T, Q and Z in the decomposition. If computeQZ==false, some time
+   * is saved by not computing matrices Q and Z.
+   *
+   * Example: \include RealQZ_compute.cpp
+   * Output: \include RealQZ_compute.out
+   *
+   * \note The implementation is based on the algorithm in "Matrix Computations"
+   * by Gene H. Golub and Charles F. Van Loan, and a paper "An algorithm for
+   * generalized eigenvalue problems" by C.B.Moler and G.W.Stewart.
+   *
+   * \sa class RealSchur, class ComplexSchur, class EigenSolver, class ComplexEigenSolver
+   */
+
+  template<typename _MatrixType> class RealQZ
+  {
+    public:
+      typedef _MatrixType MatrixType;
+      enum {
+        RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+        ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+        Options = MatrixType::Options,
+        MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+        MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+      };
+      typedef typename MatrixType::Scalar Scalar;
+      typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+      typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+      typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
+      typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;
+
+      /** \brief Default constructor.
+       *
+       * \param [in] size  Positive integer, size of the matrix whose QZ decomposition will be computed.
+       *
+       * The default constructor is useful in cases in which the user intends to
+       * perform decompositions via compute().  The \p size parameter is only
+       * used as a hint. It is not an error to give a wrong \p size, but it may
+       * impair performance.
+       *
+       * \sa compute() for an example.
+       */
+      explicit RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) :
+        m_S(size, size),
+        m_T(size, size),
+        m_Q(size, size),
+        m_Z(size, size),
+        m_workspace(size*2),
+        m_maxIters(400),
+        m_isInitialized(false)
+        { }
+
+      /** \brief Constructor; computes real QZ decomposition of given matrices
+       * 
+       * \param[in]  A          Matrix A.
+       * \param[in]  B          Matrix B.
+       * \param[in]  computeQZ  If false, A and Z are not computed.
+       *
+       * This constructor calls compute() to compute the QZ decomposition.
+       */
+      RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
+        m_S(A.rows(),A.cols()),
+        m_T(A.rows(),A.cols()),
+        m_Q(A.rows(),A.cols()),
+        m_Z(A.rows(),A.cols()),
+        m_workspace(A.rows()*2),
+        m_maxIters(400),
+        m_isInitialized(false) {
+          compute(A, B, computeQZ);
+        }
+
+      /** \brief Returns matrix Q in the QZ decomposition. 
+       *
+       * \returns A const reference to the matrix Q.
+       */
+      const MatrixType& matrixQ() const {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        eigen_assert(m_computeQZ && "The matrices Q and Z have not been computed during the QZ decomposition.");
+        return m_Q;
+      }
+
+      /** \brief Returns matrix Z in the QZ decomposition. 
+       *
+       * \returns A const reference to the matrix Z.
+       */
+      const MatrixType& matrixZ() const {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        eigen_assert(m_computeQZ && "The matrices Q and Z have not been computed during the QZ decomposition.");
+        return m_Z;
+      }
+
+      /** \brief Returns matrix S in the QZ decomposition. 
+       *
+       * \returns A const reference to the matrix S.
+       */
+      const MatrixType& matrixS() const {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        return m_S;
+      }
+
+      /** \brief Returns matrix S in the QZ decomposition. 
+       *
+       * \returns A const reference to the matrix S.
+       */
+      const MatrixType& matrixT() const {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        return m_T;
+      }
+
+      /** \brief Computes QZ decomposition of given matrix. 
+       * 
+       * \param[in]  A          Matrix A.
+       * \param[in]  B          Matrix B.
+       * \param[in]  computeQZ  If false, A and Z are not computed.
+       * \returns    Reference to \c *this
+       */
+      RealQZ& compute(const MatrixType& A, const MatrixType& B, bool computeQZ = true);
+
+      /** \brief Reports whether previous computation was successful.
+       *
+       * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+       */
+      ComputationInfo info() const
+      {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        return m_info;
+      }
+
+      /** \brief Returns number of performed QR-like iterations.
+      */
+      Index iterations() const
+      {
+        eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+        return m_global_iter;
+      }
+
+      /** Sets the maximal number of iterations allowed to converge to one eigenvalue
+       * or decouple the problem.
+      */
+      RealQZ& setMaxIterations(Index maxIters)
+      {
+        m_maxIters = maxIters;
+        return *this;
+      }
+
+    private:
+
+      MatrixType m_S, m_T, m_Q, m_Z;
+      Matrix<Scalar,Dynamic,1> m_workspace;
+      ComputationInfo m_info;
+      Index m_maxIters;
+      bool m_isInitialized;
+      bool m_computeQZ;
+      Scalar m_normOfT, m_normOfS;
+      Index m_global_iter;
+
+      typedef Matrix<Scalar,3,1> Vector3s;
+      typedef Matrix<Scalar,2,1> Vector2s;
+      typedef Matrix<Scalar,2,2> Matrix2s;
+      typedef JacobiRotation<Scalar> JRs;
+
+      void hessenbergTriangular();
+      void computeNorms();
+      Index findSmallSubdiagEntry(Index iu);
+      Index findSmallDiagEntry(Index f, Index l);
+      void splitOffTwoRows(Index i);
+      void pushDownZero(Index z, Index f, Index l);
+      void step(Index f, Index l, Index iter);
+
+  }; // RealQZ
+
+  /** \internal Reduces S and T to upper Hessenberg - triangular form */
+  template<typename MatrixType>
+    void RealQZ<MatrixType>::hessenbergTriangular()
+    {
+
+      const Index dim = m_S.cols();
+
+      // perform QR decomposition of T, overwrite T with R, save Q
+      HouseholderQR<MatrixType> qrT(m_T);
+      m_T = qrT.matrixQR();
+      m_T.template triangularView<StrictlyLower>().setZero();
+      m_Q = qrT.householderQ();
+      // overwrite S with Q* S
+      m_S.applyOnTheLeft(m_Q.adjoint());
+      // init Z as Identity
+      if (m_computeQZ)
+        m_Z = MatrixType::Identity(dim,dim);
+      // reduce S to upper Hessenberg with Givens rotations
+      for (Index j=0; j<=dim-3; j++) {
+        for (Index i=dim-1; i>=j+2; i--) {
+          JRs G;
+          // kill S(i,j)
+          if(m_S.coeff(i,j) != 0)
+          {
+            G.makeGivens(m_S.coeff(i-1,j), m_S.coeff(i,j), &m_S.coeffRef(i-1, j));
+            m_S.coeffRef(i,j) = Scalar(0.0);
+            m_S.rightCols(dim-j-1).applyOnTheLeft(i-1,i,G.adjoint());
+            m_T.rightCols(dim-i+1).applyOnTheLeft(i-1,i,G.adjoint());
+            // update Q
+            if (m_computeQZ)
+              m_Q.applyOnTheRight(i-1,i,G);
+          }
+          // kill T(i,i-1)
+          if(m_T.coeff(i,i-1)!=Scalar(0))
+          {
+            G.makeGivens(m_T.coeff(i,i), m_T.coeff(i,i-1), &m_T.coeffRef(i,i));
+            m_T.coeffRef(i,i-1) = Scalar(0.0);
+            m_S.applyOnTheRight(i,i-1,G);
+            m_T.topRows(i).applyOnTheRight(i,i-1,G);
+            // update Z
+            if (m_computeQZ)
+              m_Z.applyOnTheLeft(i,i-1,G.adjoint());
+          }
+        }
+      }
+    }
+
+  /** \internal Computes vector L1 norms of S and T when in Hessenberg-Triangular form already */
+  template<typename MatrixType>
+    inline void RealQZ<MatrixType>::computeNorms()
+    {
+      const Index size = m_S.cols();
+      m_normOfS = Scalar(0.0);
+      m_normOfT = Scalar(0.0);
+      for (Index j = 0; j < size; ++j)
+      {
+        m_normOfS += m_S.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();
+        m_normOfT += m_T.row(j).segment(j, size - j).cwiseAbs().sum();
+      }
+    }
+
+
+  /** \internal Look for single small sub-diagonal element S(res, res-1) and return res (or 0) */
+  template<typename MatrixType>
+    inline Index RealQZ<MatrixType>::findSmallSubdiagEntry(Index iu)
+    {
+      using std::abs;
+      Index res = iu;
+      while (res > 0)
+      {
+        Scalar s = abs(m_S.coeff(res-1,res-1)) + abs(m_S.coeff(res,res));
+        if (s == Scalar(0.0))
+          s = m_normOfS;
+        if (abs(m_S.coeff(res,res-1)) < NumTraits<Scalar>::epsilon() * s)
+          break;
+        res--;
+      }
+      return res;
+    }
+
+  /** \internal Look for single small diagonal element T(res, res) for res between f and l, and return res (or f-1)  */
+  template<typename MatrixType>
+    inline Index RealQZ<MatrixType>::findSmallDiagEntry(Index f, Index l)
+    {
+      using std::abs;
+      Index res = l;
+      while (res >= f) {
+        if (abs(m_T.coeff(res,res)) <= NumTraits<Scalar>::epsilon() * m_normOfT)
+          break;
+        res--;
+      }
+      return res;
+    }
+
+  /** \internal decouple 2x2 diagonal block in rows i, i+1 if eigenvalues are real */
+  template<typename MatrixType>
+    inline void RealQZ<MatrixType>::splitOffTwoRows(Index i)
+    {
+      using std::abs;
+      using std::sqrt;
+      const Index dim=m_S.cols();
+      if (abs(m_S.coeff(i+1,i))==Scalar(0))
+        return;
+      Index j = findSmallDiagEntry(i,i+1);
+      if (j==i-1)
+      {
+        // block of (S T^{-1})
+        Matrix2s STi = m_T.template block<2,2>(i,i).template triangularView<Upper>().
+          template solve<OnTheRight>(m_S.template block<2,2>(i,i));
+        Scalar p = Scalar(0.5)*(STi(0,0)-STi(1,1));
+        Scalar q = p*p + STi(1,0)*STi(0,1);
+        if (q>=0) {
+          Scalar z = sqrt(q);
+          // one QR-like iteration for ABi - lambda I
+          // is enough - when we know exact eigenvalue in advance,
+          // convergence is immediate
+          JRs G;
+          if (p>=0)
+            G.makeGivens(p + z, STi(1,0));
+          else
+            G.makeGivens(p - z, STi(1,0));
+          m_S.rightCols(dim-i).applyOnTheLeft(i,i+1,G.adjoint());
+          m_T.rightCols(dim-i).applyOnTheLeft(i,i+1,G.adjoint());
+          // update Q
+          if (m_computeQZ)
+            m_Q.applyOnTheRight(i,i+1,G);
+
+          G.makeGivens(m_T.coeff(i+1,i+1), m_T.coeff(i+1,i));
+          m_S.topRows(i+2).applyOnTheRight(i+1,i,G);
+          m_T.topRows(i+2).applyOnTheRight(i+1,i,G);
+          // update Z
+          if (m_computeQZ)
+            m_Z.applyOnTheLeft(i+1,i,G.adjoint());
+
+          m_S.coeffRef(i+1,i) = Scalar(0.0);
+          m_T.coeffRef(i+1,i) = Scalar(0.0);
+        }
+      }
+      else
+      {
+        pushDownZero(j,i,i+1);
+      }
+    }
+
+  /** \internal use zero in T(z,z) to zero S(l,l-1), working in block f..l */
+  template<typename MatrixType>
+    inline void RealQZ<MatrixType>::pushDownZero(Index z, Index f, Index l)
+    {
+      JRs G;
+      const Index dim = m_S.cols();
+      for (Index zz=z; zz<l; zz++)
+      {
+        // push 0 down
+        Index firstColS = zz>f ? (zz-1) : zz;
+        G.makeGivens(m_T.coeff(zz, zz+1), m_T.coeff(zz+1, zz+1));
+        m_S.rightCols(dim-firstColS).applyOnTheLeft(zz,zz+1,G.adjoint());
+        m_T.rightCols(dim-zz).applyOnTheLeft(zz,zz+1,G.adjoint());
+        m_T.coeffRef(zz+1,zz+1) = Scalar(0.0);
+        // update Q
+        if (m_computeQZ)
+          m_Q.applyOnTheRight(zz,zz+1,G);
+        // kill S(zz+1, zz-1)
+        if (zz>f)
+        {
+          G.makeGivens(m_S.coeff(zz+1, zz), m_S.coeff(zz+1,zz-1));
+          m_S.topRows(zz+2).applyOnTheRight(zz, zz-1,G);
+          m_T.topRows(zz+1).applyOnTheRight(zz, zz-1,G);
+          m_S.coeffRef(zz+1,zz-1) = Scalar(0.0);
+          // update Z
+          if (m_computeQZ)
+            m_Z.applyOnTheLeft(zz,zz-1,G.adjoint());
+        }
+      }
+      // finally kill S(l,l-1)
+      G.makeGivens(m_S.coeff(l,l), m_S.coeff(l,l-1));
+      m_S.applyOnTheRight(l,l-1,G);
+      m_T.applyOnTheRight(l,l-1,G);
+      m_S.coeffRef(l,l-1)=Scalar(0.0);
+      // update Z
+      if (m_computeQZ)
+        m_Z.applyOnTheLeft(l,l-1,G.adjoint());
+    }
+
+  /** \internal QR-like iterative step for block f..l */
+  template<typename MatrixType>
+    inline void RealQZ<MatrixType>::step(Index f, Index l, Index iter)
+    {
+      using std::abs;
+      const Index dim = m_S.cols();
+
+      // x, y, z
+      Scalar x, y, z;
+      if (iter==10)
+      {
+        // Wilkinson ad hoc shift
+        const Scalar
+          a11=m_S.coeff(f+0,f+0), a12=m_S.coeff(f+0,f+1),
+          a21=m_S.coeff(f+1,f+0), a22=m_S.coeff(f+1,f+1), a32=m_S.coeff(f+2,f+1),
+          b12=m_T.coeff(f+0,f+1),
+          b11i=Scalar(1.0)/m_T.coeff(f+0,f+0),
+          b22i=Scalar(1.0)/m_T.coeff(f+1,f+1),
+          a87=m_S.coeff(l-1,l-2),
+          a98=m_S.coeff(l-0,l-1),
+          b77i=Scalar(1.0)/m_T.coeff(l-2,l-2),
+          b88i=Scalar(1.0)/m_T.coeff(l-1,l-1);
+        Scalar ss = abs(a87*b77i) + abs(a98*b88i),
+               lpl = Scalar(1.5)*ss,
+               ll = ss*ss;
+        x = ll + a11*a11*b11i*b11i - lpl*a11*b11i + a12*a21*b11i*b22i
+          - a11*a21*b12*b11i*b11i*b22i;
+        y = a11*a21*b11i*b11i - lpl*a21*b11i + a21*a22*b11i*b22i 
+          - a21*a21*b12*b11i*b11i*b22i;
+        z = a21*a32*b11i*b22i;
+      }
+      else if (iter==16)
+      {
+        // another exceptional shift
+        x = m_S.coeff(f,f)/m_T.coeff(f,f)-m_S.coeff(l,l)/m_T.coeff(l,l) + m_S.coeff(l,l-1)*m_T.coeff(l-1,l) /
+          (m_T.coeff(l-1,l-1)*m_T.coeff(l,l));
+        y = m_S.coeff(f+1,f)/m_T.coeff(f,f);
+        z = 0;
+      }
+      else if (iter>23 && !(iter%8))
+      {
+        // extremely exceptional shift
+        x = internal::random<Scalar>(-1.0,1.0);
+        y = internal::random<Scalar>(-1.0,1.0);
+        z = internal::random<Scalar>(-1.0,1.0);
+      }
+      else
+      {
+        // Compute the shifts: (x,y,z,0...) = (AB^-1 - l1 I) (AB^-1 - l2 I) e1
+        // where l1 and l2 are the eigenvalues of the 2x2 matrix C = U V^-1 where
+        // U and V are 2x2 bottom right sub matrices of A and B. Thus:
+        //  = AB^-1AB^-1 + l1 l2 I - (l1+l2)(AB^-1)
+        //  = AB^-1AB^-1 + det(M) - tr(M)(AB^-1)
+        // Since we are only interested in having x, y, z with a correct ratio, we have:
+        const Scalar
+          a11 = m_S.coeff(f,f),     a12 = m_S.coeff(f,f+1),
+          a21 = m_S.coeff(f+1,f),   a22 = m_S.coeff(f+1,f+1),
+                                    a32 = m_S.coeff(f+2,f+1),
+
+          a88 = m_S.coeff(l-1,l-1), a89 = m_S.coeff(l-1,l),
+          a98 = m_S.coeff(l,l-1),   a99 = m_S.coeff(l,l),
+
+          b11 = m_T.coeff(f,f),     b12 = m_T.coeff(f,f+1),
+                                    b22 = m_T.coeff(f+1,f+1),
+
+          b88 = m_T.coeff(l-1,l-1), b89 = m_T.coeff(l-1,l),
+                                    b99 = m_T.coeff(l,l);
+
+        x = ( (a88/b88 - a11/b11)*(a99/b99 - a11/b11) - (a89/b99)*(a98/b88) + (a98/b88)*(b89/b99)*(a11/b11) ) * (b11/a21)
+          + a12/b22 - (a11/b11)*(b12/b22);
+        y = (a22/b22-a11/b11) - (a21/b11)*(b12/b22) - (a88/b88-a11/b11) - (a99/b99-a11/b11) + (a98/b88)*(b89/b99);
+        z = a32/b22;
+      }
+
+      JRs G;
+
+      for (Index k=f; k<=l-2; k++)
+      {
+        // variables for Householder reflections
+        Vector2s essential2;
+        Scalar tau, beta;
+
+        Vector3s hr(x,y,z);
+
+        // Q_k to annihilate S(k+1,k-1) and S(k+2,k-1)
+        hr.makeHouseholderInPlace(tau, beta);
+        essential2 = hr.template bottomRows<2>();
+        Index fc=(std::max)(k-1,Index(0));  // first col to update
+        m_S.template middleRows<3>(k).rightCols(dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());
+        m_T.template middleRows<3>(k).rightCols(dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());
+        if (m_computeQZ)
+          m_Q.template middleCols<3>(k).applyHouseholderOnTheRight(essential2, tau, m_workspace.data());
+        if (k>f)
+          m_S.coeffRef(k+2,k-1) = m_S.coeffRef(k+1,k-1) = Scalar(0.0);
+
+        // Z_{k1} to annihilate T(k+2,k+1) and T(k+2,k)
+        hr << m_T.coeff(k+2,k+2),m_T.coeff(k+2,k),m_T.coeff(k+2,k+1);
+        hr.makeHouseholderInPlace(tau, beta);
+        essential2 = hr.template bottomRows<2>();
+        {
+          Index lr = (std::min)(k+4,dim); // last row to update
+          Map<Matrix<Scalar,Dynamic,1> > tmp(m_workspace.data(),lr);
+          // S
+          tmp = m_S.template middleCols<2>(k).topRows(lr) * essential2;
+          tmp += m_S.col(k+2).head(lr);
+          m_S.col(k+2).head(lr) -= tau*tmp;
+          m_S.template middleCols<2>(k).topRows(lr) -= (tau*tmp) * essential2.adjoint();
+          // T
+          tmp = m_T.template middleCols<2>(k).topRows(lr) * essential2;
+          tmp += m_T.col(k+2).head(lr);
+          m_T.col(k+2).head(lr) -= tau*tmp;
+          m_T.template middleCols<2>(k).topRows(lr) -= (tau*tmp) * essential2.adjoint();
+        }
+        if (m_computeQZ)
+        {
+          // Z
+          Map<Matrix<Scalar,1,Dynamic> > tmp(m_workspace.data(),dim);
+          tmp = essential2.adjoint()*(m_Z.template middleRows<2>(k));
+          tmp += m_Z.row(k+2);
+          m_Z.row(k+2) -= tau*tmp;
+          m_Z.template middleRows<2>(k) -= essential2 * (tau*tmp);
+        }
+        m_T.coeffRef(k+2,k) = m_T.coeffRef(k+2,k+1) = Scalar(0.0);
+
+        // Z_{k2} to annihilate T(k+1,k)
+        G.makeGivens(m_T.coeff(k+1,k+1), m_T.coeff(k+1,k));
+        m_S.applyOnTheRight(k+1,k,G);
+        m_T.applyOnTheRight(k+1,k,G);
+        // update Z
+        if (m_computeQZ)
+          m_Z.applyOnTheLeft(k+1,k,G.adjoint());
+        m_T.coeffRef(k+1,k) = Scalar(0.0);
+
+        // update x,y,z
+        x = m_S.coeff(k+1,k);
+        y = m_S.coeff(k+2,k);
+        if (k < l-2)
+          z = m_S.coeff(k+3,k);
+      } // loop over k
+
+      // Q_{n-1} to annihilate y = S(l,l-2)
+      G.makeGivens(x,y);
+      m_S.applyOnTheLeft(l-1,l,G.adjoint());
+      m_T.applyOnTheLeft(l-1,l,G.adjoint());
+      if (m_computeQZ)
+        m_Q.applyOnTheRight(l-1,l,G);
+      m_S.coeffRef(l,l-2) = Scalar(0.0);
+
+      // Z_{n-1} to annihilate T(l,l-1)
+      G.makeGivens(m_T.coeff(l,l),m_T.coeff(l,l-1));
+      m_S.applyOnTheRight(l,l-1,G);
+      m_T.applyOnTheRight(l,l-1,G);
+      if (m_computeQZ)
+        m_Z.applyOnTheLeft(l,l-1,G.adjoint());
+      m_T.coeffRef(l,l-1) = Scalar(0.0);
+    }
+
+  template<typename MatrixType>
+    RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ)
+    {
+
+      const Index dim = A_in.cols();
+
+      eigen_assert (A_in.rows()==dim && A_in.cols()==dim 
+          && B_in.rows()==dim && B_in.cols()==dim 
+          && "Need square matrices of the same dimension");
+
+      m_isInitialized = true;
+      m_computeQZ = computeQZ;
+      m_S = A_in; m_T = B_in;
+      m_workspace.resize(dim*2);
+      m_global_iter = 0;
+
+      // entrance point: hessenberg triangular decomposition
+      hessenbergTriangular();
+      // compute L1 vector norms of T, S into m_normOfS, m_normOfT
+      computeNorms();
+
+      Index l = dim-1, 
+            f, 
+            local_iter = 0;
+
+      while (l>0 && local_iter<m_maxIters)
+      {
+        f = findSmallSubdiagEntry(l);
+        // now rows and columns f..l (including) decouple from the rest of the problem
+        if (f>0) m_S.coeffRef(f,f-1) = Scalar(0.0);
+        if (f == l) // One root found
+        {
+          l--;
+          local_iter = 0;
+        }
+        else if (f == l-1) // Two roots found
+        {
+          splitOffTwoRows(f);
+          l -= 2;
+          local_iter = 0;
+        }
+        else // No convergence yet
+        {
+          // if there's zero on diagonal of T, we can isolate an eigenvalue with Givens rotations
+          Index z = findSmallDiagEntry(f,l);
+          if (z>=f)
+          {
+            // zero found
+            pushDownZero(z,f,l);
+          }
+          else
+          {
+            // We are sure now that S.block(f,f, l-f+1,l-f+1) is underuced upper-Hessenberg 
+            // and T.block(f,f, l-f+1,l-f+1) is invertible uper-triangular, which allows to
+            // apply a QR-like iteration to rows and columns f..l.
+            step(f,l, local_iter);
+            local_iter++;
+            m_global_iter++;
+          }
+        }
+      }
+      // check if we converged before reaching iterations limit
+      m_info = (local_iter<m_maxIters) ? Success : NoConvergence;
+
+      // For each non triangular 2x2 diagonal block of S,
+      //    reduce the respective 2x2 diagonal block of T to positive diagonal form using 2x2 SVD.
+      // This step is not mandatory for QZ, but it does help further extraction of eigenvalues/eigenvectors,
+      // and is in par with Lapack/Matlab QZ.
+      if(m_info==Success)
+      {
+        for(Index i=0; i<dim-1; ++i)
+        {
+          if(m_S.coeff(i+1, i) != Scalar(0))
+          {
+            JacobiRotation<Scalar> j_left, j_right;
+            internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right);
+
+            // Apply resulting Jacobi rotations
+            m_S.applyOnTheLeft(i,i+1,j_left);
+            m_S.applyOnTheRight(i,i+1,j_right);
+            m_T.applyOnTheLeft(i,i+1,j_left);
+            m_T.applyOnTheRight(i,i+1,j_right);
+            m_T(i+1,i) = m_T(i,i+1) = Scalar(0);
+
+            if(m_computeQZ) {
+              m_Q.applyOnTheRight(i,i+1,j_left.transpose());
+              m_Z.applyOnTheLeft(i,i+1,j_right.transpose());
+            }
+
+            i++;
+          }
+        }
+      }
+
+      return *this;
+    } // end compute
+
+} // end namespace Eigen
+
+#endif //EIGEN_REAL_QZ
diff --git a/third-party/Eigen/src/Eigenvalues/RealSchur.h b/third-party/Eigen/src/Eigenvalues/RealSchur.h
new file mode 100644
index 00000000..9191519a
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/RealSchur.h
@@ -0,0 +1,553 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010,2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REAL_SCHUR_H
+#define EIGEN_REAL_SCHUR_H
+
+#include "./HessenbergDecomposition.h"
+
+namespace Eigen { 
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class RealSchur
+  *
+  * \brief Performs a real Schur decomposition of a square matrix
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * real Schur decomposition; this is expected to be an instantiation of the
+  * Matrix class template.
+  *
+  * Given a real square matrix A, this class computes the real Schur
+  * decomposition: \f$ A = U T U^T \f$ where U is a real orthogonal matrix and
+  * T is a real quasi-triangular matrix. An orthogonal matrix is a matrix whose
+  * inverse is equal to its transpose, \f$ U^{-1} = U^T \f$. A quasi-triangular
+  * matrix is a block-triangular matrix whose diagonal consists of 1-by-1
+  * blocks and 2-by-2 blocks with complex eigenvalues. The eigenvalues of the
+  * blocks on the diagonal of T are the same as the eigenvalues of the matrix
+  * A, and thus the real Schur decomposition is used in EigenSolver to compute
+  * the eigendecomposition of a matrix.
+  *
+  * Call the function compute() to compute the real Schur decomposition of a
+  * given matrix. Alternatively, you can use the RealSchur(const MatrixType&, bool)
+  * constructor which computes the real Schur decomposition at construction
+  * time. Once the decomposition is computed, you can use the matrixU() and
+  * matrixT() functions to retrieve the matrices U and T in the decomposition.
+  *
+  * The documentation of RealSchur(const MatrixType&, bool) contains an example
+  * of the typical use of this class.
+  *
+  * \note The implementation is adapted from
+  * <a href="http://math.nist.gov/javanumerics/jama/">JAMA</a> (public domain).
+  * Their code is based on EISPACK.
+  *
+  * \sa class ComplexSchur, class EigenSolver, class ComplexEigenSolver
+  */
+template<typename _MatrixType> class RealSchur
+{
+  public:
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
+    typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;
+
+    /** \brief Default constructor.
+      *
+      * \param [in] size  Positive integer, size of the matrix whose Schur decomposition will be computed.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute().  The \p size parameter is only
+      * used as a hint. It is not an error to give a wrong \p size, but it may
+      * impair performance.
+      *
+      * \sa compute() for an example.
+      */
+    explicit RealSchur(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime)
+            : m_matT(size, size),
+              m_matU(size, size),
+              m_workspaceVector(size),
+              m_hess(size),
+              m_isInitialized(false),
+              m_matUisUptodate(false),
+              m_maxIters(-1)
+    { }
+
+    /** \brief Constructor; computes real Schur decomposition of given matrix. 
+      * 
+      * \param[in]  matrix    Square matrix whose Schur decomposition is to be computed.
+      * \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.
+      *
+      * This constructor calls compute() to compute the Schur decomposition.
+      *
+      * Example: \include RealSchur_RealSchur_MatrixType.cpp
+      * Output: \verbinclude RealSchur_RealSchur_MatrixType.out
+      */
+    template<typename InputType>
+    explicit RealSchur(const EigenBase<InputType>& matrix, bool computeU = true)
+            : m_matT(matrix.rows(),matrix.cols()),
+              m_matU(matrix.rows(),matrix.cols()),
+              m_workspaceVector(matrix.rows()),
+              m_hess(matrix.rows()),
+              m_isInitialized(false),
+              m_matUisUptodate(false),
+              m_maxIters(-1)
+    {
+      compute(matrix.derived(), computeU);
+    }
+
+    /** \brief Returns the orthogonal matrix in the Schur decomposition. 
+      *
+      * \returns A const reference to the matrix U.
+      *
+      * \pre Either the constructor RealSchur(const MatrixType&, bool) or the
+      * member function compute(const MatrixType&, bool) has been called before
+      * to compute the Schur decomposition of a matrix, and \p computeU was set
+      * to true (the default value).
+      *
+      * \sa RealSchur(const MatrixType&, bool) for an example
+      */
+    const MatrixType& matrixU() const
+    {
+      eigen_assert(m_isInitialized && "RealSchur is not initialized.");
+      eigen_assert(m_matUisUptodate && "The matrix U has not been computed during the RealSchur decomposition.");
+      return m_matU;
+    }
+
+    /** \brief Returns the quasi-triangular matrix in the Schur decomposition. 
+      *
+      * \returns A const reference to the matrix T.
+      *
+      * \pre Either the constructor RealSchur(const MatrixType&, bool) or the
+      * member function compute(const MatrixType&, bool) has been called before
+      * to compute the Schur decomposition of a matrix.
+      *
+      * \sa RealSchur(const MatrixType&, bool) for an example
+      */
+    const MatrixType& matrixT() const
+    {
+      eigen_assert(m_isInitialized && "RealSchur is not initialized.");
+      return m_matT;
+    }
+  
+    /** \brief Computes Schur decomposition of given matrix. 
+      * 
+      * \param[in]  matrix    Square matrix whose Schur decomposition is to be computed.
+      * \param[in]  computeU  If true, both T and U are computed; if false, only T is computed.
+      * \returns    Reference to \c *this
+      *
+      * The Schur decomposition is computed by first reducing the matrix to
+      * Hessenberg form using the class HessenbergDecomposition. The Hessenberg
+      * matrix is then reduced to triangular form by performing Francis QR
+      * iterations with implicit double shift. The cost of computing the Schur
+      * decomposition depends on the number of iterations; as a rough guide, it
+      * may be taken to be \f$25n^3\f$ flops if \a computeU is true and
+      * \f$10n^3\f$ flops if \a computeU is false.
+      *
+      * Example: \include RealSchur_compute.cpp
+      * Output: \verbinclude RealSchur_compute.out
+      *
+      * \sa compute(const MatrixType&, bool, Index)
+      */
+    template<typename InputType>
+    RealSchur& compute(const EigenBase<InputType>& matrix, bool computeU = true);
+
+    /** \brief Computes Schur decomposition of a Hessenberg matrix H = Z T Z^T
+     *  \param[in] matrixH Matrix in Hessenberg form H
+     *  \param[in] matrixQ orthogonal matrix Q that transform a matrix A to H : A = Q H Q^T
+     *  \param computeU Computes the matriX U of the Schur vectors
+     * \return Reference to \c *this
+     * 
+     *  This routine assumes that the matrix is already reduced in Hessenberg form matrixH
+     *  using either the class HessenbergDecomposition or another mean. 
+     *  It computes the upper quasi-triangular matrix T of the Schur decomposition of H
+     *  When computeU is true, this routine computes the matrix U such that 
+     *  A = U T U^T =  (QZ) T (QZ)^T = Q H Q^T where A is the initial matrix
+     * 
+     * NOTE Q is referenced if computeU is true; so, if the initial orthogonal matrix
+     * is not available, the user should give an identity matrix (Q.setIdentity())
+     * 
+     * \sa compute(const MatrixType&, bool)
+     */
+    template<typename HessMatrixType, typename OrthMatrixType>
+    RealSchur& computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU);
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "RealSchur is not initialized.");
+      return m_info;
+    }
+
+    /** \brief Sets the maximum number of iterations allowed. 
+      *
+      * If not specified by the user, the maximum number of iterations is m_maxIterationsPerRow times the size
+      * of the matrix.
+      */
+    RealSchur& setMaxIterations(Index maxIters)
+    {
+      m_maxIters = maxIters;
+      return *this;
+    }
+
+    /** \brief Returns the maximum number of iterations. */
+    Index getMaxIterations()
+    {
+      return m_maxIters;
+    }
+
+    /** \brief Maximum number of iterations per row.
+      *
+      * If not otherwise specified, the maximum number of iterations is this number times the size of the
+      * matrix. It is currently set to 40.
+      */
+    static const int m_maxIterationsPerRow = 40;
+
+  private:
+    
+    MatrixType m_matT;
+    MatrixType m_matU;
+    ColumnVectorType m_workspaceVector;
+    HessenbergDecomposition<MatrixType> m_hess;
+    ComputationInfo m_info;
+    bool m_isInitialized;
+    bool m_matUisUptodate;
+    Index m_maxIters;
+
+    typedef Matrix<Scalar,3,1> Vector3s;
+
+    Scalar computeNormOfT();
+    Index findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero);
+    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
+    void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
+    void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
+    void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace);
+};
+
+
+template<typename MatrixType>
+template<typename InputType>
+RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const EigenBase<InputType>& matrix, bool computeU)
+{
+  const Scalar considerAsZero = (std::numeric_limits<Scalar>::min)();
+
+  eigen_assert(matrix.cols() == matrix.rows());
+  Index maxIters = m_maxIters;
+  if (maxIters == -1)
+    maxIters = m_maxIterationsPerRow * matrix.rows();
+
+  Scalar scale = matrix.derived().cwiseAbs().maxCoeff();
+  if(scale<considerAsZero)
+  {
+    m_matT.setZero(matrix.rows(),matrix.cols());
+    if(computeU)
+      m_matU.setIdentity(matrix.rows(),matrix.cols());
+    m_info = Success;
+    m_isInitialized = true;
+    m_matUisUptodate = computeU;
+    return *this;
+  }
+
+  // Step 1. Reduce to Hessenberg form
+  m_hess.compute(matrix.derived()/scale);
+
+  // Step 2. Reduce to real Schur form  
+  computeFromHessenberg(m_hess.matrixH(), m_hess.matrixQ(), computeU);
+
+  m_matT *= scale;
+  
+  return *this;
+}
+template<typename MatrixType>
+template<typename HessMatrixType, typename OrthMatrixType>
+RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMatrixType& matrixH, const OrthMatrixType& matrixQ,  bool computeU)
+{
+  using std::abs;
+
+  m_matT = matrixH;
+  if(computeU)
+    m_matU = matrixQ;
+  
+  Index maxIters = m_maxIters;
+  if (maxIters == -1)
+    maxIters = m_maxIterationsPerRow * matrixH.rows();
+  m_workspaceVector.resize(m_matT.cols());
+  Scalar* workspace = &m_workspaceVector.coeffRef(0);
+
+  // The matrix m_matT is divided in three parts. 
+  // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. 
+  // Rows il,...,iu is the part we are working on (the active window).
+  // Rows iu+1,...,end are already brought in triangular form.
+  Index iu = m_matT.cols() - 1;
+  Index iter = 0;      // iteration count for current eigenvalue
+  Index totalIter = 0; // iteration count for whole matrix
+  Scalar exshift(0);   // sum of exceptional shifts
+  Scalar norm = computeNormOfT();
+  // sub-diagonal entries smaller than considerAsZero will be treated as zero.
+  // We use eps^2 to enable more precision in small eigenvalues.
+  Scalar considerAsZero = numext::maxi<Scalar>( norm * numext::abs2(NumTraits<Scalar>::epsilon()),
+                                                (std::numeric_limits<Scalar>::min)() );
+
+  if(norm!=Scalar(0))
+  {
+    while (iu >= 0)
+    {
+      Index il = findSmallSubdiagEntry(iu,considerAsZero);
+
+      // Check for convergence
+      if (il == iu) // One root found
+      {
+        m_matT.coeffRef(iu,iu) = m_matT.coeff(iu,iu) + exshift;
+        if (iu > 0)
+          m_matT.coeffRef(iu, iu-1) = Scalar(0);
+        iu--;
+        iter = 0;
+      }
+      else if (il == iu-1) // Two roots found
+      {
+        splitOffTwoRows(iu, computeU, exshift);
+        iu -= 2;
+        iter = 0;
+      }
+      else // No convergence yet
+      {
+        // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
+        Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
+        computeShift(iu, iter, exshift, shiftInfo);
+        iter = iter + 1;
+        totalIter = totalIter + 1;
+        if (totalIter > maxIters) break;
+        Index im;
+        initFrancisQRStep(il, iu, shiftInfo, im, firstHouseholderVector);
+        performFrancisQRStep(il, im, iu, computeU, firstHouseholderVector, workspace);
+      }
+    }
+  }
+  if(totalIter <= maxIters)
+    m_info = Success;
+  else
+    m_info = NoConvergence;
+
+  m_isInitialized = true;
+  m_matUisUptodate = computeU;
+  return *this;
+}
+
+/** \internal Computes and returns vector L1 norm of T */
+template<typename MatrixType>
+inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()
+{
+  const Index size = m_matT.cols();
+  // FIXME to be efficient the following would requires a triangular reduxion code
+  // Scalar norm = m_matT.upper().cwiseAbs().sum() 
+  //               + m_matT.bottomLeftCorner(size-1,size-1).diagonal().cwiseAbs().sum();
+  Scalar norm(0);
+  for (Index j = 0; j < size; ++j)
+    norm += m_matT.col(j).segment(0, (std::min)(size,j+2)).cwiseAbs().sum();
+  return norm;
+}
+
+/** \internal Look for single small sub-diagonal element and returns its index */
+template<typename MatrixType>
+inline Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& considerAsZero)
+{
+  using std::abs;
+  Index res = iu;
+  while (res > 0)
+  {
+    Scalar s = abs(m_matT.coeff(res-1,res-1)) + abs(m_matT.coeff(res,res));
+
+    s = numext::maxi<Scalar>(s * NumTraits<Scalar>::epsilon(), considerAsZero);
+    
+    if (abs(m_matT.coeff(res,res-1)) <= s)
+      break;
+    res--;
+  }
+  return res;
+}
+
+/** \internal Update T given that rows iu-1 and iu decouple from the rest. */
+template<typename MatrixType>
+inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift)
+{
+  using std::sqrt;
+  using std::abs;
+  const Index size = m_matT.cols();
+
+  // The eigenvalues of the 2x2 matrix [a b; c d] are 
+  // trace +/- sqrt(discr/4) where discr = tr^2 - 4*det, tr = a + d, det = ad - bc
+  Scalar p = Scalar(0.5) * (m_matT.coeff(iu-1,iu-1) - m_matT.coeff(iu,iu));
+  Scalar q = p * p + m_matT.coeff(iu,iu-1) * m_matT.coeff(iu-1,iu);   // q = tr^2 / 4 - det = discr/4
+  m_matT.coeffRef(iu,iu) += exshift;
+  m_matT.coeffRef(iu-1,iu-1) += exshift;
+
+  if (q >= Scalar(0)) // Two real eigenvalues
+  {
+    Scalar z = sqrt(abs(q));
+    JacobiRotation<Scalar> rot;
+    if (p >= Scalar(0))
+      rot.makeGivens(p + z, m_matT.coeff(iu, iu-1));
+    else
+      rot.makeGivens(p - z, m_matT.coeff(iu, iu-1));
+
+    m_matT.rightCols(size-iu+1).applyOnTheLeft(iu-1, iu, rot.adjoint());
+    m_matT.topRows(iu+1).applyOnTheRight(iu-1, iu, rot);
+    m_matT.coeffRef(iu, iu-1) = Scalar(0); 
+    if (computeU)
+      m_matU.applyOnTheRight(iu-1, iu, rot);
+  }
+
+  if (iu > 1) 
+    m_matT.coeffRef(iu-1, iu-2) = Scalar(0);
+}
+
+/** \internal Form shift in shiftInfo, and update exshift if an exceptional shift is performed. */
+template<typename MatrixType>
+inline void RealSchur<MatrixType>::computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo)
+{
+  using std::sqrt;
+  using std::abs;
+  shiftInfo.coeffRef(0) = m_matT.coeff(iu,iu);
+  shiftInfo.coeffRef(1) = m_matT.coeff(iu-1,iu-1);
+  shiftInfo.coeffRef(2) = m_matT.coeff(iu,iu-1) * m_matT.coeff(iu-1,iu);
+
+  // Wilkinson's original ad hoc shift
+  if (iter == 10)
+  {
+    exshift += shiftInfo.coeff(0);
+    for (Index i = 0; i <= iu; ++i)
+      m_matT.coeffRef(i,i) -= shiftInfo.coeff(0);
+    Scalar s = abs(m_matT.coeff(iu,iu-1)) + abs(m_matT.coeff(iu-1,iu-2));
+    shiftInfo.coeffRef(0) = Scalar(0.75) * s;
+    shiftInfo.coeffRef(1) = Scalar(0.75) * s;
+    shiftInfo.coeffRef(2) = Scalar(-0.4375) * s * s;
+  }
+
+  // MATLAB's new ad hoc shift
+  if (iter == 30)
+  {
+    Scalar s = (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+    s = s * s + shiftInfo.coeff(2);
+    if (s > Scalar(0))
+    {
+      s = sqrt(s);
+      if (shiftInfo.coeff(1) < shiftInfo.coeff(0))
+        s = -s;
+      s = s + (shiftInfo.coeff(1) - shiftInfo.coeff(0)) / Scalar(2.0);
+      s = shiftInfo.coeff(0) - shiftInfo.coeff(2) / s;
+      exshift += s;
+      for (Index i = 0; i <= iu; ++i)
+        m_matT.coeffRef(i,i) -= s;
+      shiftInfo.setConstant(Scalar(0.964));
+    }
+  }
+}
+
+/** \internal Compute index im at which Francis QR step starts and the first Householder vector. */
+template<typename MatrixType>
+inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector)
+{
+  using std::abs;
+  Vector3s& v = firstHouseholderVector; // alias to save typing
+
+  for (im = iu-2; im >= il; --im)
+  {
+    const Scalar Tmm = m_matT.coeff(im,im);
+    const Scalar r = shiftInfo.coeff(0) - Tmm;
+    const Scalar s = shiftInfo.coeff(1) - Tmm;
+    v.coeffRef(0) = (r * s - shiftInfo.coeff(2)) / m_matT.coeff(im+1,im) + m_matT.coeff(im,im+1);
+    v.coeffRef(1) = m_matT.coeff(im+1,im+1) - Tmm - r - s;
+    v.coeffRef(2) = m_matT.coeff(im+2,im+1);
+    if (im == il) {
+      break;
+    }
+    const Scalar lhs = m_matT.coeff(im,im-1) * (abs(v.coeff(1)) + abs(v.coeff(2)));
+    const Scalar rhs = v.coeff(0) * (abs(m_matT.coeff(im-1,im-1)) + abs(Tmm) + abs(m_matT.coeff(im+1,im+1)));
+    if (abs(lhs) < NumTraits<Scalar>::epsilon() * rhs)
+      break;
+  }
+}
+
+/** \internal Perform a Francis QR step involving rows il:iu and columns im:iu. */
+template<typename MatrixType>
+inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace)
+{
+  eigen_assert(im >= il);
+  eigen_assert(im <= iu-2);
+
+  const Index size = m_matT.cols();
+
+  for (Index k = im; k <= iu-2; ++k)
+  {
+    bool firstIteration = (k == im);
+
+    Vector3s v;
+    if (firstIteration)
+      v = firstHouseholderVector;
+    else
+      v = m_matT.template block<3,1>(k,k-1);
+
+    Scalar tau, beta;
+    Matrix<Scalar, 2, 1> ess;
+    v.makeHouseholder(ess, tau, beta);
+    
+    if (beta != Scalar(0)) // if v is not zero
+    {
+      if (firstIteration && k > il)
+        m_matT.coeffRef(k,k-1) = -m_matT.coeff(k,k-1);
+      else if (!firstIteration)
+        m_matT.coeffRef(k,k-1) = beta;
+
+      // These Householder transformations form the O(n^3) part of the algorithm
+      m_matT.block(k, k, 3, size-k).applyHouseholderOnTheLeft(ess, tau, workspace);
+      m_matT.block(0, k, (std::min)(iu,k+3) + 1, 3).applyHouseholderOnTheRight(ess, tau, workspace);
+      if (computeU)
+        m_matU.block(0, k, size, 3).applyHouseholderOnTheRight(ess, tau, workspace);
+    }
+  }
+
+  Matrix<Scalar, 2, 1> v = m_matT.template block<2,1>(iu-1, iu-2);
+  Scalar tau, beta;
+  Matrix<Scalar, 1, 1> ess;
+  v.makeHouseholder(ess, tau, beta);
+
+  if (beta != Scalar(0)) // if v is not zero
+  {
+    m_matT.coeffRef(iu-1, iu-2) = beta;
+    m_matT.block(iu-1, iu-1, 2, size-iu+1).applyHouseholderOnTheLeft(ess, tau, workspace);
+    m_matT.block(0, iu-1, iu+1, 2).applyHouseholderOnTheRight(ess, tau, workspace);
+    if (computeU)
+      m_matU.block(0, iu-1, size, 2).applyHouseholderOnTheRight(ess, tau, workspace);
+  }
+
+  // clean up pollution due to round-off errors
+  for (Index i = im+2; i <= iu; ++i)
+  {
+    m_matT.coeffRef(i,i-2) = Scalar(0);
+    if (i > im+2)
+      m_matT.coeffRef(i,i-3) = Scalar(0);
+  }
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_REAL_SCHUR_H
diff --git a/third-party/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h b/third-party/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
new file mode 100644
index 00000000..2c225171
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Real Schur needed to real unsymmetrical eigenvalues/eigenvectors.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_REAL_SCHUR_LAPACKE_H
+#define EIGEN_REAL_SCHUR_LAPACKE_H
+
+namespace Eigen { 
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_SCHUR_REAL(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, LAPACKE_PREFIX_U, EIGCOLROW, LAPACKE_COLROW) \
+template<> template<typename InputType> inline \
+RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
+RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, bool computeU) \
+{ \
+  eigen_assert(matrix.cols() == matrix.rows()); \
+\
+  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), sdim, info; \
+  lapack_int matrix_order = LAPACKE_COLROW; \
+  char jobvs, sort='N'; \
+  LAPACK_##LAPACKE_PREFIX_U##_SELECT2 select = 0; \
+  jobvs = (computeU) ? 'V' : 'N'; \
+  m_matU.resize(n, n); \
+  lapack_int ldvs  = internal::convert_index<lapack_int>(m_matU.outerStride()); \
+  m_matT = matrix; \
+  lapack_int lda = internal::convert_index<lapack_int>(m_matT.outerStride()); \
+  Matrix<EIGTYPE, Dynamic, Dynamic> wr, wi; \
+  wr.resize(n, 1); wi.resize(n, 1); \
+  info = LAPACKE_##LAPACKE_PREFIX##gees( matrix_order, jobvs, sort, select, n, (LAPACKE_TYPE*)m_matT.data(), lda, &sdim, (LAPACKE_TYPE*)wr.data(), (LAPACKE_TYPE*)wi.data(), (LAPACKE_TYPE*)m_matU.data(), ldvs ); \
+  if(info == 0) \
+    m_info = Success; \
+  else \
+    m_info = NoConvergence; \
+\
+  m_isInitialized = true; \
+  m_matUisUptodate = computeU; \
+  return *this; \
+\
+}
+
+EIGEN_LAPACKE_SCHUR_REAL(double,   double, d, D, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SCHUR_REAL(float,    float,  s, S, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SCHUR_REAL(double,   double, d, D, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_SCHUR_REAL(float,    float,  s, S, RowMajor, LAPACK_ROW_MAJOR)
+
+} // end namespace Eigen
+
+#endif // EIGEN_REAL_SCHUR_LAPACKE_H
diff --git a/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
new file mode 100644
index 00000000..d37656fa
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -0,0 +1,871 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SELFADJOINTEIGENSOLVER_H
+#define EIGEN_SELFADJOINTEIGENSOLVER_H
+
+#include "./Tridiagonalization.h"
+
+namespace Eigen { 
+
+template<typename _MatrixType>
+class GeneralizedSelfAdjointEigenSolver;
+
+namespace internal {
+template<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues;
+template<typename MatrixType, typename DiagType, typename SubDiagType>
+ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag, const Index maxIterations, bool computeEigenvectors, MatrixType& eivec);
+}
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class SelfAdjointEigenSolver
+  *
+  * \brief Computes eigenvalues and eigenvectors of selfadjoint matrices
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * eigendecomposition; this is expected to be an instantiation of the Matrix
+  * class template.
+  *
+  * A matrix \f$ A \f$ is selfadjoint if it equals its adjoint. For real
+  * matrices, this means that the matrix is symmetric: it equals its
+  * transpose. This class computes the eigenvalues and eigenvectors of a
+  * selfadjoint matrix. These are the scalars \f$ \lambda \f$ and vectors
+  * \f$ v \f$ such that \f$ Av = \lambda v \f$.  The eigenvalues of a
+  * selfadjoint matrix are always real. If \f$ D \f$ is a diagonal matrix with
+  * the eigenvalues on the diagonal, and \f$ V \f$ is a matrix with the
+  * eigenvectors as its columns, then \f$ A = V D V^{-1} \f$ (for selfadjoint
+  * matrices, the matrix \f$ V \f$ is always invertible). This is called the
+  * eigendecomposition.
+  *
+  * The algorithm exploits the fact that the matrix is selfadjoint, making it
+  * faster and more accurate than the general purpose eigenvalue algorithms
+  * implemented in EigenSolver and ComplexEigenSolver.
+  *
+  * Only the \b lower \b triangular \b part of the input matrix is referenced.
+  *
+  * Call the function compute() to compute the eigenvalues and eigenvectors of
+  * a given matrix. Alternatively, you can use the
+  * SelfAdjointEigenSolver(const MatrixType&, int) constructor which computes
+  * the eigenvalues and eigenvectors at construction time. Once the eigenvalue
+  * and eigenvectors are computed, they can be retrieved with the eigenvalues()
+  * and eigenvectors() functions.
+  *
+  * The documentation for SelfAdjointEigenSolver(const MatrixType&, int)
+  * contains an example of the typical use of this class.
+  *
+  * To solve the \em generalized eigenvalue problem \f$ Av = \lambda Bv \f$ and
+  * the likes, see the class GeneralizedSelfAdjointEigenSolver.
+  *
+  * \sa MatrixBase::eigenvalues(), class EigenSolver, class ComplexEigenSolver
+  */
+template<typename _MatrixType> class SelfAdjointEigenSolver
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      Size = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    
+    /** \brief Scalar type for matrices of type \p _MatrixType. */
+    typedef typename MatrixType::Scalar Scalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    
+    typedef Matrix<Scalar,Size,Size,ColMajor,MaxColsAtCompileTime,MaxColsAtCompileTime> EigenvectorsType;
+
+    /** \brief Real scalar type for \p _MatrixType.
+      *
+      * This is just \c Scalar if #Scalar is real (e.g., \c float or
+      * \c double), and the type of the real part of \c Scalar if #Scalar is
+      * complex.
+      */
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    
+    friend struct internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>;
+
+    /** \brief Type for vector of eigenvalues as returned by eigenvalues().
+      *
+      * This is a column vector with entries of type #RealScalar.
+      * The length of the vector is the size of \p _MatrixType.
+      */
+    typedef typename internal::plain_col_type<MatrixType, RealScalar>::type RealVectorType;
+    typedef Tridiagonalization<MatrixType> TridiagonalizationType;
+    typedef typename TridiagonalizationType::SubDiagonalType SubDiagonalType;
+
+    /** \brief Default constructor for fixed-size matrices.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute(). This constructor
+      * can only be used if \p _MatrixType is a fixed-size matrix; use
+      * SelfAdjointEigenSolver(Index) for dynamic-size matrices.
+      *
+      * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver.out
+      */
+    EIGEN_DEVICE_FUNC
+    SelfAdjointEigenSolver()
+        : m_eivec(),
+          m_eivalues(),
+          m_subdiag(),
+          m_isInitialized(false)
+    { }
+
+    /** \brief Constructor, pre-allocates memory for dynamic-size matrices.
+      *
+      * \param [in]  size  Positive integer, size of the matrix whose
+      * eigenvalues and eigenvectors will be computed.
+      *
+      * This constructor is useful for dynamic-size matrices, when the user
+      * intends to perform decompositions via compute(). The \p size
+      * parameter is only used as a hint. It is not an error to give a wrong
+      * \p size, but it may impair performance.
+      *
+      * \sa compute() for an example
+      */
+    EIGEN_DEVICE_FUNC
+    explicit SelfAdjointEigenSolver(Index size)
+        : m_eivec(size, size),
+          m_eivalues(size),
+          m_subdiag(size > 1 ? size - 1 : 1),
+          m_isInitialized(false)
+    {}
+
+    /** \brief Constructor; computes eigendecomposition of given matrix.
+      *
+      * \param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to
+      *    be computed. Only the lower triangular part of the matrix is referenced.
+      * \param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
+      *
+      * This constructor calls compute(const MatrixType&, int) to compute the
+      * eigenvalues of the matrix \p matrix. The eigenvectors are computed if
+      * \p options equals #ComputeEigenvectors.
+      *
+      * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.out
+      *
+      * \sa compute(const MatrixType&, int)
+      */
+    template<typename InputType>
+    EIGEN_DEVICE_FUNC
+    explicit SelfAdjointEigenSolver(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors)
+      : m_eivec(matrix.rows(), matrix.cols()),
+        m_eivalues(matrix.cols()),
+        m_subdiag(matrix.rows() > 1 ? matrix.rows() - 1 : 1),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived(), options);
+    }
+
+    /** \brief Computes eigendecomposition of given matrix.
+      *
+      * \param[in]  matrix  Selfadjoint matrix whose eigendecomposition is to
+      *    be computed. Only the lower triangular part of the matrix is referenced.
+      * \param[in]  options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
+      * \returns    Reference to \c *this
+      *
+      * This function computes the eigenvalues of \p matrix.  The eigenvalues()
+      * function can be used to retrieve them.  If \p options equals #ComputeEigenvectors,
+      * then the eigenvectors are also computed and can be retrieved by
+      * calling eigenvectors().
+      *
+      * This implementation uses a symmetric QR algorithm. The matrix is first
+      * reduced to tridiagonal form using the Tridiagonalization class. The
+      * tridiagonal matrix is then brought to diagonal form with implicit
+      * symmetric QR steps with Wilkinson shift. Details can be found in
+      * Section 8.3 of Golub \& Van Loan, <i>%Matrix Computations</i>.
+      *
+      * The cost of the computation is about \f$ 9n^3 \f$ if the eigenvectors
+      * are required and \f$ 4n^3/3 \f$ if they are not required.
+      *
+      * This method reuses the memory in the SelfAdjointEigenSolver object that
+      * was allocated when the object was constructed, if the size of the
+      * matrix does not change.
+      *
+      * Example: \include SelfAdjointEigenSolver_compute_MatrixType.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_compute_MatrixType.out
+      *
+      * \sa SelfAdjointEigenSolver(const MatrixType&, int)
+      */
+    template<typename InputType>
+    EIGEN_DEVICE_FUNC
+    SelfAdjointEigenSolver& compute(const EigenBase<InputType>& matrix, int options = ComputeEigenvectors);
+    
+    /** \brief Computes eigendecomposition of given matrix using a closed-form algorithm
+      *
+      * This is a variant of compute(const MatrixType&, int options) which
+      * directly solves the underlying polynomial equation.
+      * 
+      * Currently only 2x2 and 3x3 matrices for which the sizes are known at compile time are supported (e.g., Matrix3d).
+      * 
+      * This method is usually significantly faster than the QR iterative algorithm
+      * but it might also be less accurate. It is also worth noting that
+      * for 3x3 matrices it involves trigonometric operations which are
+      * not necessarily available for all scalar types.
+      * 
+      * For the 3x3 case, we observed the following worst case relative error regarding the eigenvalues:
+      *   - double: 1e-8
+      *   - float:  1e-3
+      *
+      * \sa compute(const MatrixType&, int options)
+      */
+    EIGEN_DEVICE_FUNC
+    SelfAdjointEigenSolver& computeDirect(const MatrixType& matrix, int options = ComputeEigenvectors);
+
+    /**
+      *\brief Computes the eigen decomposition from a tridiagonal symmetric matrix
+      *
+      * \param[in] diag The vector containing the diagonal of the matrix.
+      * \param[in] subdiag The subdiagonal of the matrix.
+      * \param[in] options Can be #ComputeEigenvectors (default) or #EigenvaluesOnly.
+      * \returns Reference to \c *this
+      *
+      * This function assumes that the matrix has been reduced to tridiagonal form.
+      *
+      * \sa compute(const MatrixType&, int) for more information
+      */
+    SelfAdjointEigenSolver& computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options=ComputeEigenvectors);
+
+    /** \brief Returns the eigenvectors of given matrix.
+      *
+      * \returns  A const reference to the matrix whose columns are the eigenvectors.
+      *
+      * \pre The eigenvectors have been computed before.
+      *
+      * Column \f$ k \f$ of the returned matrix is an eigenvector corresponding
+      * to eigenvalue number \f$ k \f$ as returned by eigenvalues().  The
+      * eigenvectors are normalized to have (Euclidean) norm equal to one. If
+      * this object was used to solve the eigenproblem for the selfadjoint
+      * matrix \f$ A \f$, then the matrix returned by this function is the
+      * matrix \f$ V \f$ in the eigendecomposition \f$ A = V D V^{-1} \f$.
+      *
+      * Example: \include SelfAdjointEigenSolver_eigenvectors.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_eigenvectors.out
+      *
+      * \sa eigenvalues()
+      */
+    EIGEN_DEVICE_FUNC
+    const EigenvectorsType& eigenvectors() const
+    {
+      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+      return m_eivec;
+    }
+
+    /** \brief Returns the eigenvalues of given matrix.
+      *
+      * \returns A const reference to the column vector containing the eigenvalues.
+      *
+      * \pre The eigenvalues have been computed before.
+      *
+      * The eigenvalues are repeated according to their algebraic multiplicity,
+      * so there are as many eigenvalues as rows in the matrix. The eigenvalues
+      * are sorted in increasing order.
+      *
+      * Example: \include SelfAdjointEigenSolver_eigenvalues.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_eigenvalues.out
+      *
+      * \sa eigenvectors(), MatrixBase::eigenvalues()
+      */
+    EIGEN_DEVICE_FUNC
+    const RealVectorType& eigenvalues() const
+    {
+      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      return m_eivalues;
+    }
+
+    /** \brief Computes the positive-definite square root of the matrix.
+      *
+      * \returns the positive-definite square root of the matrix
+      *
+      * \pre The eigenvalues and eigenvectors of a positive-definite matrix
+      * have been computed before.
+      *
+      * The square root of a positive-definite matrix \f$ A \f$ is the
+      * positive-definite matrix whose square equals \f$ A \f$. This function
+      * uses the eigendecomposition \f$ A = V D V^{-1} \f$ to compute the
+      * square root as \f$ A^{1/2} = V D^{1/2} V^{-1} \f$.
+      *
+      * Example: \include SelfAdjointEigenSolver_operatorSqrt.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_operatorSqrt.out
+      *
+      * \sa operatorInverseSqrt(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a>
+      */
+    EIGEN_DEVICE_FUNC
+    MatrixType operatorSqrt() const
+    {
+      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+      return m_eivec * m_eivalues.cwiseSqrt().asDiagonal() * m_eivec.adjoint();
+    }
+
+    /** \brief Computes the inverse square root of the matrix.
+      *
+      * \returns the inverse positive-definite square root of the matrix
+      *
+      * \pre The eigenvalues and eigenvectors of a positive-definite matrix
+      * have been computed before.
+      *
+      * This function uses the eigendecomposition \f$ A = V D V^{-1} \f$ to
+      * compute the inverse square root as \f$ V D^{-1/2} V^{-1} \f$. This is
+      * cheaper than first computing the square root with operatorSqrt() and
+      * then its inverse with MatrixBase::inverse().
+      *
+      * Example: \include SelfAdjointEigenSolver_operatorInverseSqrt.cpp
+      * Output: \verbinclude SelfAdjointEigenSolver_operatorInverseSqrt.out
+      *
+      * \sa operatorSqrt(), MatrixBase::inverse(), <a href="unsupported/group__MatrixFunctions__Module.html">MatrixFunctions Module</a>
+      */
+    EIGEN_DEVICE_FUNC
+    MatrixType operatorInverseSqrt() const
+    {
+      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      eigen_assert(m_eigenvectorsOk && "The eigenvectors have not been computed together with the eigenvalues.");
+      return m_eivec * m_eivalues.cwiseInverse().cwiseSqrt().asDiagonal() * m_eivec.adjoint();
+    }
+
+    /** \brief Reports whether previous computation was successful.
+      *
+      * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+      */
+    EIGEN_DEVICE_FUNC
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
+      return m_info;
+    }
+
+    /** \brief Maximum number of iterations.
+      *
+      * The algorithm terminates if it does not converge within m_maxIterations * n iterations, where n
+      * denotes the size of the matrix. This value is currently set to 30 (copied from LAPACK).
+      */
+    static const int m_maxIterations = 30;
+
+  protected:
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    EigenvectorsType m_eivec;
+    RealVectorType m_eivalues;
+    typename TridiagonalizationType::SubDiagonalType m_subdiag;
+    ComputationInfo m_info;
+    bool m_isInitialized;
+    bool m_eigenvectorsOk;
+};
+
+namespace internal {
+/** \internal
+  *
+  * \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  * Performs a QR step on a tridiagonal symmetric matrix represented as a
+  * pair of two vectors \a diag and \a subdiag.
+  *
+  * \param diag the diagonal part of the input selfadjoint tridiagonal matrix
+  * \param subdiag the sub-diagonal part of the input selfadjoint tridiagonal matrix
+  * \param start starting index of the submatrix to work on
+  * \param end last+1 index of the submatrix to work on
+  * \param matrixQ pointer to the column-major matrix holding the eigenvectors, can be 0
+  * \param n size of the input matrix
+  *
+  * For compilation efficiency reasons, this procedure does not use eigen expression
+  * for its arguments.
+  *
+  * Implemented from Golub's "Matrix Computations", algorithm 8.3.2:
+  * "implicit symmetric QR step with Wilkinson shift"
+  */
+template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC
+static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n);
+}
+
+template<typename MatrixType>
+template<typename InputType>
+EIGEN_DEVICE_FUNC
+SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
+::compute(const EigenBase<InputType>& a_matrix, int options)
+{
+  check_template_parameters();
+  
+  const InputType &matrix(a_matrix.derived());
+  
+  using std::abs;
+  eigen_assert(matrix.cols() == matrix.rows());
+  eigen_assert((options&~(EigVecMask|GenEigMask))==0
+          && (options&EigVecMask)!=EigVecMask
+          && "invalid option parameter");
+  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+  Index n = matrix.cols();
+  m_eivalues.resize(n,1);
+
+  if(n==1)
+  {
+    m_eivec = matrix;
+    m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0));
+    if(computeEigenvectors)
+      m_eivec.setOnes(n,n);
+    m_info = Success;
+    m_isInitialized = true;
+    m_eigenvectorsOk = computeEigenvectors;
+    return *this;
+  }
+
+  // declare some aliases
+  RealVectorType& diag = m_eivalues;
+  EigenvectorsType& mat = m_eivec;
+
+  // map the matrix coefficients to [-1:1] to avoid over- and underflow.
+  mat = matrix.template triangularView<Lower>();
+  RealScalar scale = mat.cwiseAbs().maxCoeff();
+  if(scale==RealScalar(0)) scale = RealScalar(1);
+  mat.template triangularView<Lower>() /= scale;
+  m_subdiag.resize(n-1);
+  internal::tridiagonalization_inplace(mat, diag, m_subdiag, computeEigenvectors);
+
+  m_info = internal::computeFromTridiagonal_impl(diag, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec);
+  
+  // scale back the eigen values
+  m_eivalues *= scale;
+
+  m_isInitialized = true;
+  m_eigenvectorsOk = computeEigenvectors;
+  return *this;
+}
+
+template<typename MatrixType>
+SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
+::computeFromTridiagonal(const RealVectorType& diag, const SubDiagonalType& subdiag , int options)
+{
+  //TODO : Add an option to scale the values beforehand
+  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+
+  m_eivalues = diag;
+  m_subdiag = subdiag;
+  if (computeEigenvectors)
+  {
+    m_eivec.setIdentity(diag.size(), diag.size());
+  }
+  m_info = internal::computeFromTridiagonal_impl(m_eivalues, m_subdiag, m_maxIterations, computeEigenvectors, m_eivec);
+
+  m_isInitialized = true;
+  m_eigenvectorsOk = computeEigenvectors;
+  return *this;
+}
+
+namespace internal {
+/**
+  * \internal
+  * \brief Compute the eigendecomposition from a tridiagonal matrix
+  *
+  * \param[in,out] diag : On input, the diagonal of the matrix, on output the eigenvalues
+  * \param[in,out] subdiag : The subdiagonal part of the matrix (entries are modified during the decomposition)
+  * \param[in] maxIterations : the maximum number of iterations
+  * \param[in] computeEigenvectors : whether the eigenvectors have to be computed or not
+  * \param[out] eivec : The matrix to store the eigenvectors if computeEigenvectors==true. Must be allocated on input.
+  * \returns \c Success or \c NoConvergence
+  */
+template<typename MatrixType, typename DiagType, typename SubDiagType>
+ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag, const Index maxIterations, bool computeEigenvectors, MatrixType& eivec)
+{
+  using std::abs;
+
+  ComputationInfo info;
+  typedef typename MatrixType::Scalar Scalar;
+
+  Index n = diag.size();
+  Index end = n-1;
+  Index start = 0;
+  Index iter = 0; // total number of iterations
+  
+  typedef typename DiagType::RealScalar RealScalar;
+  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+  const RealScalar precision = RealScalar(2)*NumTraits<RealScalar>::epsilon();
+  
+  while (end>0)
+  {
+    for (Index i = start; i<end; ++i)
+      if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1])),precision) || abs(subdiag[i]) <= considerAsZero)
+        subdiag[i] = 0;
+
+    // find the largest unreduced block
+    while (end>0 && subdiag[end-1]==RealScalar(0))
+    {
+      end--;
+    }
+    if (end<=0)
+      break;
+
+    // if we spent too many iterations, we give up
+    iter++;
+    if(iter > maxIterations * n) break;
+
+    start = end - 1;
+    while (start>0 && subdiag[start-1]!=0)
+      start--;
+
+    internal::tridiagonal_qr_step<MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor>(diag.data(), subdiag.data(), start, end, computeEigenvectors ? eivec.data() : (Scalar*)0, n);
+  }
+  if (iter <= maxIterations * n)
+    info = Success;
+  else
+    info = NoConvergence;
+
+  // Sort eigenvalues and corresponding vectors.
+  // TODO make the sort optional ?
+  // TODO use a better sort algorithm !!
+  if (info == Success)
+  {
+    for (Index i = 0; i < n-1; ++i)
+    {
+      Index k;
+      diag.segment(i,n-i).minCoeff(&k);
+      if (k > 0)
+      {
+        std::swap(diag[i], diag[k+i]);
+        if(computeEigenvectors)
+          eivec.col(i).swap(eivec.col(k+i));
+      }
+    }
+  }
+  return info;
+}
+  
+template<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(SolverType& eig, const typename SolverType::MatrixType& A, int options)
+  { eig.compute(A,options); }
+};
+
+template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3,false>
+{
+  typedef typename SolverType::MatrixType MatrixType;
+  typedef typename SolverType::RealVectorType VectorType;
+  typedef typename SolverType::Scalar Scalar;
+  typedef typename SolverType::EigenvectorsType EigenvectorsType;
+  
+
+  /** \internal
+   * Computes the roots of the characteristic polynomial of \a m.
+   * For numerical stability m.trace() should be near zero and to avoid over- or underflow m should be normalized.
+   */
+  EIGEN_DEVICE_FUNC
+  static inline void computeRoots(const MatrixType& m, VectorType& roots)
+  {
+    EIGEN_USING_STD_MATH(sqrt)
+    EIGEN_USING_STD_MATH(atan2)
+    EIGEN_USING_STD_MATH(cos)
+    EIGEN_USING_STD_MATH(sin)
+    const Scalar s_inv3 = Scalar(1)/Scalar(3);
+    const Scalar s_sqrt3 = sqrt(Scalar(3));
+
+    // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0.  The
+    // eigenvalues are the roots to this equation, all guaranteed to be
+    // real-valued, because the matrix is symmetric.
+    Scalar c0 = m(0,0)*m(1,1)*m(2,2) + Scalar(2)*m(1,0)*m(2,0)*m(2,1) - m(0,0)*m(2,1)*m(2,1) - m(1,1)*m(2,0)*m(2,0) - m(2,2)*m(1,0)*m(1,0);
+    Scalar c1 = m(0,0)*m(1,1) - m(1,0)*m(1,0) + m(0,0)*m(2,2) - m(2,0)*m(2,0) + m(1,1)*m(2,2) - m(2,1)*m(2,1);
+    Scalar c2 = m(0,0) + m(1,1) + m(2,2);
+
+    // Construct the parameters used in classifying the roots of the equation
+    // and in solving the equation for the roots in closed form.
+    Scalar c2_over_3 = c2*s_inv3;
+    Scalar a_over_3 = (c2*c2_over_3 - c1)*s_inv3;
+    a_over_3 = numext::maxi(a_over_3, Scalar(0));
+
+    Scalar half_b = Scalar(0.5)*(c0 + c2_over_3*(Scalar(2)*c2_over_3*c2_over_3 - c1));
+
+    Scalar q = a_over_3*a_over_3*a_over_3 - half_b*half_b;
+    q = numext::maxi(q, Scalar(0));
+
+    // Compute the eigenvalues by solving for the roots of the polynomial.
+    Scalar rho = sqrt(a_over_3);
+    Scalar theta = atan2(sqrt(q),half_b)*s_inv3;  // since sqrt(q) > 0, atan2 is in [0, pi] and theta is in [0, pi/3]
+    Scalar cos_theta = cos(theta);
+    Scalar sin_theta = sin(theta);
+    // roots are already sorted, since cos is monotonically decreasing on [0, pi]
+    roots(0) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); // == 2*rho*cos(theta+2pi/3)
+    roots(1) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); // == 2*rho*cos(theta+ pi/3)
+    roots(2) = c2_over_3 + Scalar(2)*rho*cos_theta;
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)
+  {
+    EIGEN_USING_STD_MATH(sqrt)
+    EIGEN_USING_STD_MATH(abs)
+    Index i0;
+    // Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):
+    mat.diagonal().cwiseAbs().maxCoeff(&i0);
+    // mat.col(i0) is a good candidate for an orthogonal vector to the current eigenvector,
+    // so let's save it:
+    representative = mat.col(i0);
+    Scalar n0, n1;
+    VectorType c0, c1;
+    n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();
+    n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();
+    if(n0>n1) res = c0/sqrt(n0);
+    else      res = c1/sqrt(n1);
+
+    return true;
+  }
+
+  EIGEN_DEVICE_FUNC
+  static inline void run(SolverType& solver, const MatrixType& mat, int options)
+  {
+    eigen_assert(mat.cols() == 3 && mat.cols() == mat.rows());
+    eigen_assert((options&~(EigVecMask|GenEigMask))==0
+            && (options&EigVecMask)!=EigVecMask
+            && "invalid option parameter");
+    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+    
+    EigenvectorsType& eivecs = solver.m_eivec;
+    VectorType& eivals = solver.m_eivalues;
+  
+    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.
+    Scalar shift = mat.trace() / Scalar(3);
+    // TODO Avoid this copy. Currently it is necessary to suppress bogus values when determining maxCoeff and for computing the eigenvectors later
+    MatrixType scaledMat = mat.template selfadjointView<Lower>();
+    scaledMat.diagonal().array() -= shift;
+    Scalar scale = scaledMat.cwiseAbs().maxCoeff();
+    if(scale > 0) scaledMat /= scale;   // TODO for scale==0 we could save the remaining operations
+
+    // compute the eigenvalues
+    computeRoots(scaledMat,eivals);
+
+    // compute the eigenvectors
+    if(computeEigenvectors)
+    {
+      if((eivals(2)-eivals(0))<=Eigen::NumTraits<Scalar>::epsilon())
+      {
+        // All three eigenvalues are numerically the same
+        eivecs.setIdentity();
+      }
+      else
+      {
+        MatrixType tmp;
+        tmp = scaledMat;
+
+        // Compute the eigenvector of the most distinct eigenvalue
+        Scalar d0 = eivals(2) - eivals(1);
+        Scalar d1 = eivals(1) - eivals(0);
+        Index k(0), l(2);
+        if(d0 > d1)
+        {
+          numext::swap(k,l);
+          d0 = d1;
+        }
+
+        // Compute the eigenvector of index k
+        {
+          tmp.diagonal().array () -= eivals(k);
+          // By construction, 'tmp' is of rank 2, and its kernel corresponds to the respective eigenvector.
+          extract_kernel(tmp, eivecs.col(k), eivecs.col(l));
+        }
+
+        // Compute eigenvector of index l
+        if(d0<=2*Eigen::NumTraits<Scalar>::epsilon()*d1)
+        {
+          // If d0 is too small, then the two other eigenvalues are numerically the same,
+          // and thus we only have to ortho-normalize the near orthogonal vector we saved above.
+          eivecs.col(l) -= eivecs.col(k).dot(eivecs.col(l))*eivecs.col(l);
+          eivecs.col(l).normalize();
+        }
+        else
+        {
+          tmp = scaledMat;
+          tmp.diagonal().array () -= eivals(l);
+
+          VectorType dummy;
+          extract_kernel(tmp, eivecs.col(l), dummy);
+        }
+
+        // Compute last eigenvector from the other two
+        eivecs.col(1) = eivecs.col(2).cross(eivecs.col(0)).normalized();
+      }
+    }
+
+    // Rescale back to the original size.
+    eivals *= scale;
+    eivals.array() += shift;
+    
+    solver.m_info = Success;
+    solver.m_isInitialized = true;
+    solver.m_eigenvectorsOk = computeEigenvectors;
+  }
+};
+
+// 2x2 direct eigenvalues decomposition, code from Hauke Heibel
+template<typename SolverType> 
+struct direct_selfadjoint_eigenvalues<SolverType,2,false>
+{
+  typedef typename SolverType::MatrixType MatrixType;
+  typedef typename SolverType::RealVectorType VectorType;
+  typedef typename SolverType::Scalar Scalar;
+  typedef typename SolverType::EigenvectorsType EigenvectorsType;
+  
+  EIGEN_DEVICE_FUNC
+  static inline void computeRoots(const MatrixType& m, VectorType& roots)
+  {
+    using std::sqrt;
+    const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0)));
+    const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));
+    roots(0) = t1 - t0;
+    roots(1) = t1 + t0;
+  }
+  
+  EIGEN_DEVICE_FUNC
+  static inline void run(SolverType& solver, const MatrixType& mat, int options)
+  {
+    EIGEN_USING_STD_MATH(sqrt);
+    EIGEN_USING_STD_MATH(abs);
+    
+    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
+    eigen_assert((options&~(EigVecMask|GenEigMask))==0
+            && (options&EigVecMask)!=EigVecMask
+            && "invalid option parameter");
+    bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors;
+    
+    EigenvectorsType& eivecs = solver.m_eivec;
+    VectorType& eivals = solver.m_eivalues;
+  
+    // Shift the matrix to the mean eigenvalue and map the matrix coefficients to [-1:1] to avoid over- and underflow.
+    Scalar shift = mat.trace() / Scalar(2);
+    MatrixType scaledMat = mat;
+    scaledMat.coeffRef(0,1) = mat.coeff(1,0);
+    scaledMat.diagonal().array() -= shift;
+    Scalar scale = scaledMat.cwiseAbs().maxCoeff();
+    if(scale > Scalar(0))
+      scaledMat /= scale;
+
+    // Compute the eigenvalues
+    computeRoots(scaledMat,eivals);
+
+    // compute the eigen vectors
+    if(computeEigenvectors)
+    {
+      if((eivals(1)-eivals(0))<=abs(eivals(1))*Eigen::NumTraits<Scalar>::epsilon())
+      {
+        eivecs.setIdentity();
+      }
+      else
+      {
+        scaledMat.diagonal().array () -= eivals(1);
+        Scalar a2 = numext::abs2(scaledMat(0,0));
+        Scalar c2 = numext::abs2(scaledMat(1,1));
+        Scalar b2 = numext::abs2(scaledMat(1,0));
+        if(a2>c2)
+        {
+          eivecs.col(1) << -scaledMat(1,0), scaledMat(0,0);
+          eivecs.col(1) /= sqrt(a2+b2);
+        }
+        else
+        {
+          eivecs.col(1) << -scaledMat(1,1), scaledMat(1,0);
+          eivecs.col(1) /= sqrt(c2+b2);
+        }
+
+        eivecs.col(0) << eivecs.col(1).unitOrthogonal();
+      }
+    }
+
+    // Rescale back to the original size.
+    eivals *= scale;
+    eivals.array() += shift;
+
+    solver.m_info = Success;
+    solver.m_isInitialized = true;
+    solver.m_eigenvectorsOk = computeEigenvectors;
+  }
+};
+
+}
+
+template<typename MatrixType>
+EIGEN_DEVICE_FUNC
+SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
+::computeDirect(const MatrixType& matrix, int options)
+{
+  internal::direct_selfadjoint_eigenvalues<SelfAdjointEigenSolver,Size,NumTraits<Scalar>::IsComplex>::run(*this,matrix,options);
+  return *this;
+}
+
+namespace internal {
+template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC
+static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)
+{
+  using std::abs;
+  RealScalar td = (diag[end-1] - diag[end])*RealScalar(0.5);
+  RealScalar e = subdiag[end-1];
+  // Note that thanks to scaling, e^2 or td^2 cannot overflow, however they can still
+  // underflow thus leading to inf/NaN values when using the following commented code:
+//   RealScalar e2 = numext::abs2(subdiag[end-1]);
+//   RealScalar mu = diag[end] - e2 / (td + (td>0 ? 1 : -1) * sqrt(td*td + e2));
+  // This explain the following, somewhat more complicated, version:
+  RealScalar mu = diag[end];
+  if(td==RealScalar(0))
+    mu -= abs(e);
+  else
+  {
+    RealScalar e2 = numext::abs2(subdiag[end-1]);
+    RealScalar h = numext::hypot(td,e);
+    if(e2==RealScalar(0)) mu -= (e / (td + (td>RealScalar(0) ? RealScalar(1) : RealScalar(-1)))) * (e / h);
+    else                  mu -= e2 / (td + (td>RealScalar(0) ? h : -h));
+  }
+  
+  RealScalar x = diag[start] - mu;
+  RealScalar z = subdiag[start];
+  for (Index k = start; k < end; ++k)
+  {
+    JacobiRotation<RealScalar> rot;
+    rot.makeGivens(x, z);
+
+    // do T = G' T G
+    RealScalar sdk = rot.s() * diag[k] + rot.c() * subdiag[k];
+    RealScalar dkp1 = rot.s() * subdiag[k] + rot.c() * diag[k+1];
+
+    diag[k] = rot.c() * (rot.c() * diag[k] - rot.s() * subdiag[k]) - rot.s() * (rot.c() * subdiag[k] - rot.s() * diag[k+1]);
+    diag[k+1] = rot.s() * sdk + rot.c() * dkp1;
+    subdiag[k] = rot.c() * sdk - rot.s() * dkp1;
+    
+
+    if (k > start)
+      subdiag[k - 1] = rot.c() * subdiag[k-1] - rot.s() * z;
+
+    x = subdiag[k];
+
+    if (k < end - 1)
+    {
+      z = -rot.s() * subdiag[k+1];
+      subdiag[k + 1] = rot.c() * subdiag[k+1];
+    }
+    
+    // apply the givens rotation to the unit matrix Q = Q * G
+    if (matrixQ)
+    {
+      // FIXME if StorageOrder == RowMajor this operation is not very efficient
+      Map<Matrix<Scalar,Dynamic,Dynamic,StorageOrder> > q(matrixQ,n,n);
+      q.applyOnTheRight(k,k+1,rot);
+    }
+  }
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SELFADJOINTEIGENSOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h b/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
new file mode 100644
index 00000000..b0c947dc
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Self-adjoint eigenvalues/eigenvectors.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_SAEIGENSOLVER_LAPACKE_H
+#define EIGEN_SAEIGENSOLVER_LAPACKE_H
+
+namespace Eigen { 
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
+template<> template<typename InputType> inline \
+SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
+SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
+{ \
+  eigen_assert(matrix.cols() == matrix.rows()); \
+  eigen_assert((options&~(EigVecMask|GenEigMask))==0 \
+          && (options&EigVecMask)!=EigVecMask \
+          && "invalid option parameter"); \
+  bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
+  lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
+  m_eivalues.resize(n,1); \
+  m_subdiag.resize(n-1); \
+  m_eivec = matrix; \
+\
+  if(n==1) \
+  { \
+    m_eivalues.coeffRef(0,0) = numext::real(m_eivec.coeff(0,0)); \
+    if(computeEigenvectors) m_eivec.setOnes(n,n); \
+    m_info = Success; \
+    m_isInitialized = true; \
+    m_eigenvectorsOk = computeEigenvectors; \
+    return *this; \
+  } \
+\
+  lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
+  char jobz, uplo='L'/*, range='A'*/; \
+  jobz = computeEigenvectors ? 'V' : 'N'; \
+\
+  info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
+  m_info = (info==0) ? Success : NoConvergence; \
+  m_isInitialized = true; \
+  m_eigenvectorsOk = computeEigenvectors; \
+  return *this; \
+}
+
+#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME )              \
+        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor )  \
+        EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) 
+
+EIGEN_LAPACKE_EIG_SELFADJ(double,   double,                double, dsyev)
+EIGEN_LAPACKE_EIG_SELFADJ(float,    float,                 float,  ssyev)
+EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
+EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float,  float,  cheev)
+
+} // end namespace Eigen
+
+#endif // EIGEN_SAEIGENSOLVER_H
diff --git a/third-party/Eigen/src/Eigenvalues/Tridiagonalization.h b/third-party/Eigen/src/Eigenvalues/Tridiagonalization.h
new file mode 100644
index 00000000..1d102c17
--- /dev/null
+++ b/third-party/Eigen/src/Eigenvalues/Tridiagonalization.h
@@ -0,0 +1,556 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRIDIAGONALIZATION_H
+#define EIGEN_TRIDIAGONALIZATION_H
+
+namespace Eigen { 
+
+namespace internal {
+  
+template<typename MatrixType> struct TridiagonalizationMatrixTReturnType;
+template<typename MatrixType>
+struct traits<TridiagonalizationMatrixTReturnType<MatrixType> >
+  : public traits<typename MatrixType::PlainObject>
+{
+  typedef typename MatrixType::PlainObject ReturnType; // FIXME shall it be a BandMatrix?
+  enum { Flags = 0 };
+};
+
+template<typename MatrixType, typename CoeffVectorType>
+void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs);
+}
+
+/** \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  *
+  * \class Tridiagonalization
+  *
+  * \brief Tridiagonal decomposition of a selfadjoint matrix
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the
+  * tridiagonal decomposition; this is expected to be an instantiation of the
+  * Matrix class template.
+  *
+  * This class performs a tridiagonal decomposition of a selfadjoint matrix \f$ A \f$ such that:
+  * \f$ A = Q T Q^* \f$ where \f$ Q \f$ is unitary and \f$ T \f$ a real symmetric tridiagonal matrix.
+  *
+  * A tridiagonal matrix is a matrix which has nonzero elements only on the
+  * main diagonal and the first diagonal below and above it. The Hessenberg
+  * decomposition of a selfadjoint matrix is in fact a tridiagonal
+  * decomposition. This class is used in SelfAdjointEigenSolver to compute the
+  * eigenvalues and eigenvectors of a selfadjoint matrix.
+  *
+  * Call the function compute() to compute the tridiagonal decomposition of a
+  * given matrix. Alternatively, you can use the Tridiagonalization(const MatrixType&)
+  * constructor which computes the tridiagonal Schur decomposition at
+  * construction time. Once the decomposition is computed, you can use the
+  * matrixQ() and matrixT() functions to retrieve the matrices Q and T in the
+  * decomposition.
+  *
+  * The documentation of Tridiagonalization(const MatrixType&) contains an
+  * example of the typical use of this class.
+  *
+  * \sa class HessenbergDecomposition, class SelfAdjointEigenSolver
+  */
+template<typename _MatrixType> class Tridiagonalization
+{
+  public:
+
+    /** \brief Synonym for the template parameter \p _MatrixType. */
+    typedef _MatrixType MatrixType;
+
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+
+    enum {
+      Size = MatrixType::RowsAtCompileTime,
+      SizeMinusOne = Size == Dynamic ? Dynamic : (Size > 1 ? Size - 1 : 1),
+      Options = MatrixType::Options,
+      MaxSize = MatrixType::MaxRowsAtCompileTime,
+      MaxSizeMinusOne = MaxSize == Dynamic ? Dynamic : (MaxSize > 1 ? MaxSize - 1 : 1)
+    };
+
+    typedef Matrix<Scalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> CoeffVectorType;
+    typedef typename internal::plain_col_type<MatrixType, RealScalar>::type DiagonalType;
+    typedef Matrix<RealScalar, SizeMinusOne, 1, Options & ~RowMajor, MaxSizeMinusOne, 1> SubDiagonalType;
+    typedef typename internal::remove_all<typename MatrixType::RealReturnType>::type MatrixTypeRealView;
+    typedef internal::TridiagonalizationMatrixTReturnType<MatrixTypeRealView> MatrixTReturnType;
+
+    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType>::RealReturnType>::type,
+              const Diagonal<const MatrixType>
+            >::type DiagonalReturnType;
+
+    typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+              typename internal::add_const_on_value_type<typename Diagonal<const MatrixType, -1>::RealReturnType>::type,
+              const Diagonal<const MatrixType, -1>
+            >::type SubDiagonalReturnType;
+
+    /** \brief Return type of matrixQ() */
+    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename CoeffVectorType::ConjugateReturnType>::type> HouseholderSequenceType;
+
+    /** \brief Default constructor.
+      *
+      * \param [in]  size  Positive integer, size of the matrix whose tridiagonal
+      * decomposition will be computed.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via compute().  The \p size parameter is only
+      * used as a hint. It is not an error to give a wrong \p size, but it may
+      * impair performance.
+      *
+      * \sa compute() for an example.
+      */
+    explicit Tridiagonalization(Index size = Size==Dynamic ? 2 : Size)
+      : m_matrix(size,size),
+        m_hCoeffs(size > 1 ? size-1 : 1),
+        m_isInitialized(false)
+    {}
+
+    /** \brief Constructor; computes tridiagonal decomposition of given matrix.
+      *
+      * \param[in]  matrix  Selfadjoint matrix whose tridiagonal decomposition
+      * is to be computed.
+      *
+      * This constructor calls compute() to compute the tridiagonal decomposition.
+      *
+      * Example: \include Tridiagonalization_Tridiagonalization_MatrixType.cpp
+      * Output: \verbinclude Tridiagonalization_Tridiagonalization_MatrixType.out
+      */
+    template<typename InputType>
+    explicit Tridiagonalization(const EigenBase<InputType>& matrix)
+      : m_matrix(matrix.derived()),
+        m_hCoeffs(matrix.cols() > 1 ? matrix.cols()-1 : 1),
+        m_isInitialized(false)
+    {
+      internal::tridiagonalization_inplace(m_matrix, m_hCoeffs);
+      m_isInitialized = true;
+    }
+
+    /** \brief Computes tridiagonal decomposition of given matrix.
+      *
+      * \param[in]  matrix  Selfadjoint matrix whose tridiagonal decomposition
+      * is to be computed.
+      * \returns    Reference to \c *this
+      *
+      * The tridiagonal decomposition is computed by bringing the columns of
+      * the matrix successively in the required form using Householder
+      * reflections. The cost is \f$ 4n^3/3 \f$ flops, where \f$ n \f$ denotes
+      * the size of the given matrix.
+      *
+      * This method reuses of the allocated data in the Tridiagonalization
+      * object, if the size of the matrix does not change.
+      *
+      * Example: \include Tridiagonalization_compute.cpp
+      * Output: \verbinclude Tridiagonalization_compute.out
+      */
+    template<typename InputType>
+    Tridiagonalization& compute(const EigenBase<InputType>& matrix)
+    {
+      m_matrix = matrix.derived();
+      m_hCoeffs.resize(matrix.rows()-1, 1);
+      internal::tridiagonalization_inplace(m_matrix, m_hCoeffs);
+      m_isInitialized = true;
+      return *this;
+    }
+
+    /** \brief Returns the Householder coefficients.
+      *
+      * \returns a const reference to the vector of Householder coefficients
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * The Householder coefficients allow the reconstruction of the matrix
+      * \f$ Q \f$ in the tridiagonal decomposition from the packed data.
+      *
+      * Example: \include Tridiagonalization_householderCoefficients.cpp
+      * Output: \verbinclude Tridiagonalization_householderCoefficients.out
+      *
+      * \sa packedMatrix(), \ref Householder_Module "Householder module"
+      */
+    inline CoeffVectorType householderCoefficients() const
+    {
+      eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+      return m_hCoeffs;
+    }
+
+    /** \brief Returns the internal representation of the decomposition
+      *
+      *	\returns a const reference to a matrix with the internal representation
+      *	         of the decomposition.
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * The returned matrix contains the following information:
+      *  - the strict upper triangular part is equal to the input matrix A.
+      *  - the diagonal and lower sub-diagonal represent the real tridiagonal
+      *    symmetric matrix T.
+      *  - the rest of the lower part contains the Householder vectors that,
+      *    combined with Householder coefficients returned by
+      *    householderCoefficients(), allows to reconstruct the matrix Q as
+      *       \f$ Q = H_{N-1} \ldots H_1 H_0 \f$.
+      *    Here, the matrices \f$ H_i \f$ are the Householder transformations
+      *       \f$ H_i = (I - h_i v_i v_i^T) \f$
+      *    where \f$ h_i \f$ is the \f$ i \f$th Householder coefficient and
+      *    \f$ v_i \f$ is the Householder vector defined by
+      *       \f$ v_i = [ 0, \ldots, 0, 1, M(i+2,i), \ldots, M(N-1,i) ]^T \f$
+      *    with M the matrix returned by this function.
+      *
+      * See LAPACK for further details on this packed storage.
+      *
+      * Example: \include Tridiagonalization_packedMatrix.cpp
+      * Output: \verbinclude Tridiagonalization_packedMatrix.out
+      *
+      * \sa householderCoefficients()
+      */
+    inline const MatrixType& packedMatrix() const
+    {
+      eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+      return m_matrix;
+    }
+
+    /** \brief Returns the unitary matrix Q in the decomposition
+      *
+      * \returns object representing the matrix Q
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * This function returns a light-weight object of template class
+      * HouseholderSequence. You can either apply it directly to a matrix or
+      * you can convert it to a matrix of type #MatrixType.
+      *
+      * \sa Tridiagonalization(const MatrixType&) for an example,
+      *     matrixT(), class HouseholderSequence
+      */
+    HouseholderSequenceType matrixQ() const
+    {
+      eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+      return HouseholderSequenceType(m_matrix, m_hCoeffs.conjugate())
+             .setLength(m_matrix.rows() - 1)
+             .setShift(1);
+    }
+
+    /** \brief Returns an expression of the tridiagonal matrix T in the decomposition
+      *
+      * \returns expression object representing the matrix T
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * Currently, this function can be used to extract the matrix T from internal
+      * data and copy it to a dense matrix object. In most cases, it may be
+      * sufficient to directly use the packed matrix or the vector expressions
+      * returned by diagonal() and subDiagonal() instead of creating a new
+      * dense copy matrix with this function.
+      *
+      * \sa Tridiagonalization(const MatrixType&) for an example,
+      * matrixQ(), packedMatrix(), diagonal(), subDiagonal()
+      */
+    MatrixTReturnType matrixT() const
+    {
+      eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+      return MatrixTReturnType(m_matrix.real());
+    }
+
+    /** \brief Returns the diagonal of the tridiagonal matrix T in the decomposition.
+      *
+      * \returns expression representing the diagonal of T
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * Example: \include Tridiagonalization_diagonal.cpp
+      * Output: \verbinclude Tridiagonalization_diagonal.out
+      *
+      * \sa matrixT(), subDiagonal()
+      */
+    DiagonalReturnType diagonal() const;
+
+    /** \brief Returns the subdiagonal of the tridiagonal matrix T in the decomposition.
+      *
+      * \returns expression representing the subdiagonal of T
+      *
+      * \pre Either the constructor Tridiagonalization(const MatrixType&) or
+      * the member function compute(const MatrixType&) has been called before
+      * to compute the tridiagonal decomposition of a matrix.
+      *
+      * \sa diagonal() for an example, matrixT()
+      */
+    SubDiagonalReturnType subDiagonal() const;
+
+  protected:
+
+    MatrixType m_matrix;
+    CoeffVectorType m_hCoeffs;
+    bool m_isInitialized;
+};
+
+template<typename MatrixType>
+typename Tridiagonalization<MatrixType>::DiagonalReturnType
+Tridiagonalization<MatrixType>::diagonal() const
+{
+  eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+  return m_matrix.diagonal().real();
+}
+
+template<typename MatrixType>
+typename Tridiagonalization<MatrixType>::SubDiagonalReturnType
+Tridiagonalization<MatrixType>::subDiagonal() const
+{
+  eigen_assert(m_isInitialized && "Tridiagonalization is not initialized.");
+  return m_matrix.template diagonal<-1>().real();
+}
+
+namespace internal {
+
+/** \internal
+  * Performs a tridiagonal decomposition of the selfadjoint matrix \a matA in-place.
+  *
+  * \param[in,out] matA On input the selfadjoint matrix. Only the \b lower triangular part is referenced.
+  *                     On output, the strict upper part is left unchanged, and the lower triangular part
+  *                     represents the T and Q matrices in packed format has detailed below.
+  * \param[out]    hCoeffs returned Householder coefficients (see below)
+  *
+  * On output, the tridiagonal selfadjoint matrix T is stored in the diagonal
+  * and lower sub-diagonal of the matrix \a matA.
+  * The unitary matrix Q is represented in a compact way as a product of
+  * Householder reflectors \f$ H_i \f$ such that:
+  *       \f$ Q = H_{N-1} \ldots H_1 H_0 \f$.
+  * The Householder reflectors are defined as
+  *       \f$ H_i = (I - h_i v_i v_i^T) \f$
+  * where \f$ h_i = hCoeffs[i]\f$ is the \f$ i \f$th Householder coefficient and
+  * \f$ v_i \f$ is the Householder vector defined by
+  *       \f$ v_i = [ 0, \ldots, 0, 1, matA(i+2,i), \ldots, matA(N-1,i) ]^T \f$.
+  *
+  * Implemented from Golub's "Matrix Computations", algorithm 8.3.1.
+  *
+  * \sa Tridiagonalization::packedMatrix()
+  */
+template<typename MatrixType, typename CoeffVectorType>
+void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
+{
+  using numext::conj;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  Index n = matA.rows();
+  eigen_assert(n==matA.cols());
+  eigen_assert(n==hCoeffs.size()+1 || n==1);
+  
+  for (Index i = 0; i<n-1; ++i)
+  {
+    Index remainingSize = n-i-1;
+    RealScalar beta;
+    Scalar h;
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
+
+    // Apply similarity transformation to remaining columns,
+    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)
+    matA.col(i).coeffRef(i+1) = 1;
+
+    hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()
+                                  * (conj(h) * matA.col(i).tail(remainingSize)));
+
+    hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
+
+    matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()
+      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1));
+
+    matA.col(i).coeffRef(i+1) = beta;
+    hCoeffs.coeffRef(i) = h;
+  }
+}
+
+// forward declaration, implementation at the end of this file
+template<typename MatrixType,
+         int Size=MatrixType::ColsAtCompileTime,
+         bool IsComplex=NumTraits<typename MatrixType::Scalar>::IsComplex>
+struct tridiagonalization_inplace_selector;
+
+/** \brief Performs a full tridiagonalization in place
+  *
+  * \param[in,out]  mat  On input, the selfadjoint matrix whose tridiagonal
+  *    decomposition is to be computed. Only the lower triangular part referenced.
+  *    The rest is left unchanged. On output, the orthogonal matrix Q
+  *    in the decomposition if \p extractQ is true.
+  * \param[out]  diag  The diagonal of the tridiagonal matrix T in the
+  *    decomposition.
+  * \param[out]  subdiag  The subdiagonal of the tridiagonal matrix T in
+  *    the decomposition.
+  * \param[in]  extractQ  If true, the orthogonal matrix Q in the
+  *    decomposition is computed and stored in \p mat.
+  *
+  * Computes the tridiagonal decomposition of the selfadjoint matrix \p mat in place
+  * such that \f$ mat = Q T Q^* \f$ where \f$ Q \f$ is unitary and \f$ T \f$ a real
+  * symmetric tridiagonal matrix.
+  *
+  * The tridiagonal matrix T is passed to the output parameters \p diag and \p subdiag. If
+  * \p extractQ is true, then the orthogonal matrix Q is passed to \p mat. Otherwise the lower
+  * part of the matrix \p mat is destroyed.
+  *
+  * The vectors \p diag and \p subdiag are not resized. The function
+  * assumes that they are already of the correct size. The length of the
+  * vector \p diag should equal the number of rows in \p mat, and the
+  * length of the vector \p subdiag should be one left.
+  *
+  * This implementation contains an optimized path for 3-by-3 matrices
+  * which is especially useful for plane fitting.
+  *
+  * \note Currently, it requires two temporary vectors to hold the intermediate
+  * Householder coefficients, and to reconstruct the matrix Q from the Householder
+  * reflectors.
+  *
+  * Example (this uses the same matrix as the example in
+  *    Tridiagonalization::Tridiagonalization(const MatrixType&)):
+  *    \include Tridiagonalization_decomposeInPlace.cpp
+  * Output: \verbinclude Tridiagonalization_decomposeInPlace.out
+  *
+  * \sa class Tridiagonalization
+  */
+template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
+void tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
+{
+  eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
+  tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
+}
+
+/** \internal
+  * General full tridiagonalization
+  */
+template<typename MatrixType, int Size, bool IsComplex>
+struct tridiagonalization_inplace_selector
+{
+  typedef typename Tridiagonalization<MatrixType>::CoeffVectorType CoeffVectorType;
+  typedef typename Tridiagonalization<MatrixType>::HouseholderSequenceType HouseholderSequenceType;
+  template<typename DiagonalType, typename SubDiagonalType>
+  static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
+  {
+    CoeffVectorType hCoeffs(mat.cols()-1);
+    tridiagonalization_inplace(mat,hCoeffs);
+    diag = mat.diagonal().real();
+    subdiag = mat.template diagonal<-1>().real();
+    if(extractQ)
+      mat = HouseholderSequenceType(mat, hCoeffs.conjugate())
+            .setLength(mat.rows() - 1)
+            .setShift(1);
+  }
+};
+
+/** \internal
+  * Specialization for 3x3 real matrices.
+  * Especially useful for plane fitting.
+  */
+template<typename MatrixType>
+struct tridiagonalization_inplace_selector<MatrixType,3,false>
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+
+  template<typename DiagonalType, typename SubDiagonalType>
+  static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
+  {
+    using std::sqrt;
+    const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
+    diag[0] = mat(0,0);
+    RealScalar v1norm2 = numext::abs2(mat(2,0));
+    if(v1norm2 <= tol)
+    {
+      diag[1] = mat(1,1);
+      diag[2] = mat(2,2);
+      subdiag[0] = mat(1,0);
+      subdiag[1] = mat(2,1);
+      if (extractQ)
+        mat.setIdentity();
+    }
+    else
+    {
+      RealScalar beta = sqrt(numext::abs2(mat(1,0)) + v1norm2);
+      RealScalar invBeta = RealScalar(1)/beta;
+      Scalar m01 = mat(1,0) * invBeta;
+      Scalar m02 = mat(2,0) * invBeta;
+      Scalar q = RealScalar(2)*m01*mat(2,1) + m02*(mat(2,2) - mat(1,1));
+      diag[1] = mat(1,1) + m02*q;
+      diag[2] = mat(2,2) - m02*q;
+      subdiag[0] = beta;
+      subdiag[1] = mat(2,1) - m01 * q;
+      if (extractQ)
+      {
+        mat << 1,   0,    0,
+               0, m01,  m02,
+               0, m02, -m01;
+      }
+    }
+  }
+};
+
+/** \internal
+  * Trivial specialization for 1x1 matrices
+  */
+template<typename MatrixType, bool IsComplex>
+struct tridiagonalization_inplace_selector<MatrixType,1,IsComplex>
+{
+  typedef typename MatrixType::Scalar Scalar;
+
+  template<typename DiagonalType, typename SubDiagonalType>
+  static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType&, bool extractQ)
+  {
+    diag(0,0) = numext::real(mat(0,0));
+    if(extractQ)
+      mat(0,0) = Scalar(1);
+  }
+};
+
+/** \internal
+  * \eigenvalues_module \ingroup Eigenvalues_Module
+  *
+  * \brief Expression type for return value of Tridiagonalization::matrixT()
+  *
+  * \tparam MatrixType type of underlying dense matrix
+  */
+template<typename MatrixType> struct TridiagonalizationMatrixTReturnType
+: public ReturnByValue<TridiagonalizationMatrixTReturnType<MatrixType> >
+{
+  public:
+    /** \brief Constructor.
+      *
+      * \param[in] mat The underlying dense matrix
+      */
+    TridiagonalizationMatrixTReturnType(const MatrixType& mat) : m_matrix(mat) { }
+
+    template <typename ResultType>
+    inline void evalTo(ResultType& result) const
+    {
+      result.setZero();
+      result.template diagonal<1>() = m_matrix.template diagonal<-1>().conjugate();
+      result.diagonal() = m_matrix.diagonal();
+      result.template diagonal<-1>() = m_matrix.template diagonal<-1>();
+    }
+
+    Index rows() const { return m_matrix.rows(); }
+    Index cols() const { return m_matrix.cols(); }
+
+  protected:
+    typename MatrixType::Nested m_matrix;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRIDIAGONALIZATION_H
diff --git a/third-party/Eigen/src/Geometry/AlignedBox.h b/third-party/Eigen/src/Geometry/AlignedBox.h
new file mode 100644
index 00000000..066eae4f
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/AlignedBox.h
@@ -0,0 +1,392 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ALIGNEDBOX_H
+#define EIGEN_ALIGNEDBOX_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  *
+  * \class AlignedBox
+  *
+  * \brief An axis aligned box
+  *
+  * \tparam _Scalar the type of the scalar coefficients
+  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  *
+  * This class represents an axis aligned box as a pair of the minimal and maximal corners.
+  * \warning The result of most methods is undefined when applied to an empty box. You can check for empty boxes using isEmpty().
+  * \sa alignedboxtypedefs
+  */
+template <typename _Scalar, int _AmbientDim>
+class AlignedBox
+{
+public:
+EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
+  enum { AmbientDimAtCompileTime = _AmbientDim };
+  typedef _Scalar                                   Scalar;
+  typedef NumTraits<Scalar>                         ScalarTraits;
+  typedef Eigen::Index                              Index; ///< \deprecated since Eigen 3.3
+  typedef typename ScalarTraits::Real               RealScalar;
+  typedef typename ScalarTraits::NonInteger         NonInteger;
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1>  VectorType;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const VectorType, const VectorType> VectorTypeSum;
+
+  /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */
+  enum CornerType
+  {
+    /** 1D names @{ */
+    Min=0, Max=1,
+    /** @} */
+
+    /** Identifier for 2D corner @{ */
+    BottomLeft=0, BottomRight=1,
+    TopLeft=2, TopRight=3,
+    /** @} */
+
+    /** Identifier for 3D corner  @{ */
+    BottomLeftFloor=0, BottomRightFloor=1,
+    TopLeftFloor=2, TopRightFloor=3,
+    BottomLeftCeil=4, BottomRightCeil=5,
+    TopLeftCeil=6, TopRightCeil=7
+    /** @} */
+  };
+
+
+  /** Default constructor initializing a null box. */
+  EIGEN_DEVICE_FUNC inline AlignedBox()
+  { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
+
+  /** Constructs a null box with \a _dim the dimension of the ambient space. */
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
+  { setEmpty(); }
+
+  /** Constructs a box with extremities \a _min and \a _max.
+   * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
+  template<typename OtherVectorType1, typename OtherVectorType2>
+  EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
+
+  /** Constructs a box containing a single point \a p. */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
+  { }
+
+  EIGEN_DEVICE_FUNC ~AlignedBox() {}
+
+  /** \returns the dimension in which the box holds */
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
+
+  /** \deprecated use isEmpty() */
+  EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }
+
+  /** \deprecated use setEmpty() */
+  EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }
+
+  /** \returns true if the box is empty.
+   * \sa setEmpty */
+  EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
+
+  /** Makes \c *this an empty box.
+   * \sa isEmpty */
+  EIGEN_DEVICE_FUNC inline void setEmpty()
+  {
+    m_min.setConstant( ScalarTraits::highest() );
+    m_max.setConstant( ScalarTraits::lowest() );
+  }
+
+  /** \returns the minimal corner */
+  EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }
+  /** \returns a non const reference to the minimal corner */
+  EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }
+  /** \returns the maximal corner */
+  EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }
+  /** \returns a non const reference to the maximal corner */
+  EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }
+
+  /** \returns the center of the box */
+  EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
+  center() const
+  { return (m_min+m_max)/RealScalar(2); }
+
+  /** \returns the lengths of the sides of the bounding box.
+    * Note that this function does not get the same
+    * result for integral or floating scalar types: see
+    */
+  EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
+  { return m_max - m_min; }
+
+  /** \returns the volume of the bounding box */
+  EIGEN_DEVICE_FUNC inline Scalar volume() const
+  { return sizes().prod(); }
+
+  /** \returns an expression for the bounding box diagonal vector
+    * if the length of the diagonal is needed: diagonal().norm()
+    * will provide it.
+    */
+  EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
+  { return sizes(); }
+
+  /** \returns the vertex of the bounding box at the corner defined by
+    * the corner-id corner. It works only for a 1D, 2D or 3D bounding box.
+    * For 1D bounding boxes corners are named by 2 enum constants:
+    * BottomLeft and BottomRight.
+    * For 2D bounding boxes, corners are named by 4 enum constants:
+    * BottomLeft, BottomRight, TopLeft, TopRight.
+    * For 3D bounding boxes, the following names are added:
+    * BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.
+    */
+  EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
+  {
+    EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
+
+    VectorType res;
+
+    Index mult = 1;
+    for(Index d=0; d<dim(); ++d)
+    {
+      if( mult & corner ) res[d] = m_max[d];
+      else                res[d] = m_min[d];
+      mult *= 2;
+    }
+    return res;
+  }
+
+  /** \returns a random point inside the bounding box sampled with
+   * a uniform distribution */
+  EIGEN_DEVICE_FUNC inline VectorType sample() const
+  {
+    VectorType r(dim());
+    for(Index d=0; d<dim(); ++d)
+    {
+      if(!ScalarTraits::IsInteger)
+      {
+        r[d] = m_min[d] + (m_max[d]-m_min[d])
+             * internal::random<Scalar>(Scalar(0), Scalar(1));
+      }
+      else
+        r[d] = internal::random(m_min[d], m_max[d]);
+    }
+    return r;
+  }
+
+  /** \returns true if the point \a p is inside the box \c *this. */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const
+  {
+    typename internal::nested_eval<Derived,2>::type p_n(p.derived());
+    return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
+  }
+
+  /** \returns true if the box \a b is entirely inside the box \c *this. */
+  EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const
+  { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
+
+  /** \returns true if the box \a b is intersecting the box \c *this.
+   * \sa intersection, clamp */
+  EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const
+  { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
+
+  /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
+   * \sa extend(const AlignedBox&) */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)
+  {
+    typename internal::nested_eval<Derived,2>::type p_n(p.derived());
+    m_min = m_min.cwiseMin(p_n);
+    m_max = m_max.cwiseMax(p_n);
+    return *this;
+  }
+
+  /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
+   * \sa merged, extend(const MatrixBase&) */
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)
+  {
+    m_min = m_min.cwiseMin(b.m_min);
+    m_max = m_max.cwiseMax(b.m_max);
+    return *this;
+  }
+
+  /** Clamps \c *this by the box \a b and returns a reference to \c *this.
+   * \note If the boxes don't intersect, the resulting box is empty.
+   * \sa intersection(), intersects() */
+  EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)
+  {
+    m_min = m_min.cwiseMax(b.m_min);
+    m_max = m_max.cwiseMin(b.m_max);
+    return *this;
+  }
+
+  /** Returns an AlignedBox that is the intersection of \a b and \c *this
+   * \note If the boxes don't intersect, the resulting box is empty.
+   * \sa intersects(), clamp, contains()  */
+  EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const
+  {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
+
+  /** Returns an AlignedBox that is the union of \a b and \c *this.
+   * \note Merging with an empty box may result in a box bigger than \c *this. 
+   * \sa extend(const AlignedBox&) */
+  EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const
+  { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
+
+  /** Translate \c *this by the vector \a t and returns a reference to \c *this. */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
+  {
+    const typename internal::nested_eval<Derived,2>::type t(a_t.derived());
+    m_min += t;
+    m_max += t;
+    return *this;
+  }
+
+  /** \returns the squared distance between the point \a p and the box \c *this,
+    * and zero if \a p is inside the box.
+    * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
+    */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
+
+  /** \returns the squared distance between the boxes \a b and \c *this,
+    * and zero if the boxes intersect.
+    * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
+    */
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
+
+  /** \returns the distance between the point \a p and the box \c *this,
+    * and zero if \a p is inside the box.
+    * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
+    */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }
+
+  /** \returns the distance between the boxes \a b and \c *this,
+    * and zero if the boxes intersect.
+    * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
+    */
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,
+           AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
+  {
+    return typename internal::cast_return_type<AlignedBox,
+                    AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type(*this);
+  }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
+  {
+    m_min = (other.min)().template cast<Scalar>();
+    m_max = (other.max)().template cast<Scalar>();
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
+  { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
+
+protected:
+
+  VectorType m_min, m_max;
+};
+
+
+
+template<typename Scalar,int AmbientDim>
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
+{
+  typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());
+  Scalar dist2(0);
+  Scalar aux;
+  for (Index k=0; k<dim(); ++k)
+  {
+    if( m_min[k] > p[k] )
+    {
+      aux = m_min[k] - p[k];
+      dist2 += aux*aux;
+    }
+    else if( p[k] > m_max[k] )
+    {
+      aux = p[k] - m_max[k];
+      dist2 += aux*aux;
+    }
+  }
+  return dist2;
+}
+
+template<typename Scalar,int AmbientDim>
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
+{
+  Scalar dist2(0);
+  Scalar aux;
+  for (Index k=0; k<dim(); ++k)
+  {
+    if( m_min[k] > b.m_max[k] )
+    {
+      aux = m_min[k] - b.m_max[k];
+      dist2 += aux*aux;
+    }
+    else if( b.m_min[k] > m_max[k] )
+    {
+      aux = b.m_min[k] - m_max[k];
+      dist2 += aux*aux;
+    }
+  }
+  return dist2;
+}
+
+/** \defgroup alignedboxtypedefs Global aligned box typedefs
+  *
+  * \ingroup Geometry_Module
+  *
+  * Eigen defines several typedef shortcuts for most common aligned box types.
+  *
+  * The general patterns are the following:
+  *
+  * \c AlignedBoxSizeType where \c Size can be \c 1, \c 2,\c 3,\c 4 for fixed size boxes or \c X for dynamic size,
+  * and where \c Type can be \c i for integer, \c f for float, \c d for double.
+  *
+  * For example, \c AlignedBox3d is a fixed-size 3x3 aligned box type of doubles, and \c AlignedBoxXf is a dynamic-size aligned box of floats.
+  *
+  * \sa class AlignedBox
+  */
+
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)    \
+/** \ingroup alignedboxtypedefs */                                 \
+typedef AlignedBox<Type, Size>   AlignedBox##SizeSuffix##TypeSuffix;
+
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 1, 1) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \
+EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X)
+
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int,                  i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float,                f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double,               d)
+
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+
+} // end namespace Eigen
+
+#endif // EIGEN_ALIGNEDBOX_H
diff --git a/third-party/Eigen/src/Geometry/AngleAxis.h b/third-party/Eigen/src/Geometry/AngleAxis.h
new file mode 100644
index 00000000..83ee1be4
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/AngleAxis.h
@@ -0,0 +1,247 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ANGLEAXIS_H
+#define EIGEN_ANGLEAXIS_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class AngleAxis
+  *
+  * \brief Represents a 3D rotation as a rotation angle around an arbitrary 3D axis
+  *
+  * \param _Scalar the scalar type, i.e., the type of the coefficients.
+  *
+  * \warning When setting up an AngleAxis object, the axis vector \b must \b be \b normalized.
+  *
+  * The following two typedefs are provided for convenience:
+  * \li \c AngleAxisf for \c float
+  * \li \c AngleAxisd for \c double
+  *
+  * Combined with MatrixBase::Unit{X,Y,Z}, AngleAxis can be used to easily
+  * mimic Euler-angles. Here is an example:
+  * \include AngleAxis_mimic_euler.cpp
+  * Output: \verbinclude AngleAxis_mimic_euler.out
+  *
+  * \note This class is not aimed to be used to store a rotation transformation,
+  * but rather to make easier the creation of other rotation (Quaternion, rotation Matrix)
+  * and transformation objects.
+  *
+  * \sa class Quaternion, class Transform, MatrixBase::UnitX()
+  */
+
+namespace internal {
+template<typename _Scalar> struct traits<AngleAxis<_Scalar> >
+{
+  typedef _Scalar Scalar;
+};
+}
+
+template<typename _Scalar>
+class AngleAxis : public RotationBase<AngleAxis<_Scalar>,3>
+{
+  typedef RotationBase<AngleAxis<_Scalar>,3> Base;
+
+public:
+
+  using Base::operator*;
+
+  enum { Dim = 3 };
+  /** the scalar type of the coefficients */
+  typedef _Scalar Scalar;
+  typedef Matrix<Scalar,3,3> Matrix3;
+  typedef Matrix<Scalar,3,1> Vector3;
+  typedef Quaternion<Scalar> QuaternionType;
+
+protected:
+
+  Vector3 m_axis;
+  Scalar m_angle;
+
+public:
+
+  /** Default constructor without initialization. */
+  EIGEN_DEVICE_FUNC AngleAxis() {}
+  /** Constructs and initialize the angle-axis rotation from an \a angle in radian
+    * and an \a axis which \b must \b be \b normalized.
+    *
+    * \warning If the \a axis vector is not normalized, then the angle-axis object
+    *          represents an invalid rotation. */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC 
+  inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
+  /** Constructs and initialize the angle-axis rotation from a quaternion \a q.
+    * This function implicitly normalizes the quaternion \a q.
+    */
+  template<typename QuatDerived> 
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
+  /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
+
+  /** \returns the value of the rotation angle in radian */
+  EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }
+  /** \returns a read-write reference to the stored angle in radian */
+  EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }
+
+  /** \returns the rotation axis */
+  EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }
+  /** \returns a read-write reference to the stored rotation axis.
+    *
+    * \warning The rotation axis must remain a \b unit vector.
+    */
+  EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }
+
+  /** Concatenates two rotations */
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const
+  { return QuaternionType(*this) * QuaternionType(other); }
+
+  /** Concatenates two rotations */
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const
+  { return QuaternionType(*this) * other; }
+
+  /** Concatenates two rotations */
+  friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
+  { return a * QuaternionType(b); }
+
+  /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
+  EIGEN_DEVICE_FUNC AngleAxis inverse() const
+  { return AngleAxis(-m_angle, m_axis); }
+
+  template<class QuatDerived>
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);
+
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
+  { return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
+  {
+    m_axis = other.axis().template cast<Scalar>();
+    m_angle = Scalar(other.angle());
+  }
+
+  EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }
+};
+
+/** \ingroup Geometry_Module
+  * single precision angle-axis type */
+typedef AngleAxis<float> AngleAxisf;
+/** \ingroup Geometry_Module
+  * double precision angle-axis type */
+typedef AngleAxis<double> AngleAxisd;
+
+/** Set \c *this from a \b unit quaternion.
+  *
+  * The resulting axis is normalized, and the computed angle is in the [0,pi] range.
+  * 
+  * This function implicitly normalizes the quaternion \a q.
+  */
+template<typename Scalar>
+template<typename QuatDerived>
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
+{
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(abs)
+  Scalar n = q.vec().norm();
+  if(n<NumTraits<Scalar>::epsilon())
+    n = q.vec().stableNorm();
+
+  if (n != Scalar(0))
+  {
+    m_angle = Scalar(2)*atan2(n, abs(q.w()));
+    if(q.w() < Scalar(0))
+      n = -n;
+    m_axis  = q.vec() / n;
+  }
+  else
+  {
+    m_angle = Scalar(0);
+    m_axis << Scalar(1), Scalar(0), Scalar(0);
+  }
+  return *this;
+}
+
+/** Set \c *this from a 3x3 rotation matrix \a mat.
+  */
+template<typename Scalar>
+template<typename Derived>
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
+{
+  // Since a direct conversion would not be really faster,
+  // let's use the robust Quaternion implementation:
+  return *this = QuaternionType(mat);
+}
+
+/**
+* \brief Sets \c *this from a 3x3 rotation matrix.
+**/
+template<typename Scalar>
+template<typename Derived>
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+{
+  return *this = QuaternionType(mat);
+}
+
+/** Constructs and \returns an equivalent 3x3 rotation matrix.
+  */
+template<typename Scalar>
+typename AngleAxis<Scalar>::Matrix3
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const
+{
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
+  Matrix3 res;
+  Vector3 sin_axis  = sin(m_angle) * m_axis;
+  Scalar c = cos(m_angle);
+  Vector3 cos1_axis = (Scalar(1)-c) * m_axis;
+
+  Scalar tmp;
+  tmp = cos1_axis.x() * m_axis.y();
+  res.coeffRef(0,1) = tmp - sin_axis.z();
+  res.coeffRef(1,0) = tmp + sin_axis.z();
+
+  tmp = cos1_axis.x() * m_axis.z();
+  res.coeffRef(0,2) = tmp + sin_axis.y();
+  res.coeffRef(2,0) = tmp - sin_axis.y();
+
+  tmp = cos1_axis.y() * m_axis.z();
+  res.coeffRef(1,2) = tmp - sin_axis.x();
+  res.coeffRef(2,1) = tmp + sin_axis.x();
+
+  res.diagonal() = (cos1_axis.cwiseProduct(m_axis)).array() + c;
+
+  return res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ANGLEAXIS_H
diff --git a/third-party/Eigen/src/Geometry/EulerAngles.h b/third-party/Eigen/src/Geometry/EulerAngles.h
new file mode 100644
index 00000000..c633268a
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/EulerAngles.h
@@ -0,0 +1,114 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EULERANGLES_H
+#define EIGEN_EULERANGLES_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  *
+  * \returns the Euler-angles of the rotation matrix \c *this using the convention defined by the triplet (\a a0,\a a1,\a a2)
+  *
+  * Each of the three parameters \a a0,\a a1,\a a2 represents the respective rotation axis as an integer in {0,1,2}.
+  * For instance, in:
+  * \code Vector3f ea = mat.eulerAngles(2, 0, 2); \endcode
+  * "2" represents the z axis and "0" the x axis, etc. The returned angles are such that
+  * we have the following equality:
+  * \code
+  * mat == AngleAxisf(ea[0], Vector3f::UnitZ())
+  *      * AngleAxisf(ea[1], Vector3f::UnitX())
+  *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \endcode
+  * This corresponds to the right-multiply conventions (with right hand side frames).
+  * 
+  * The returned angles are in the ranges [0:pi]x[-pi:pi]x[-pi:pi].
+  * 
+  * \sa class AngleAxis
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
+MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
+{
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
+  /* Implemented from Graphics Gems IV */
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)
+
+  Matrix<Scalar,3,1> res;
+  typedef Matrix<typename Derived::Scalar,2,1> Vector2;
+
+  const Index odd = ((a0+1)%3 == a1) ? 0 : 1;
+  const Index i = a0;
+  const Index j = (a0 + 1 + odd)%3;
+  const Index k = (a0 + 2 - odd)%3;
+  
+  if (a0==a2)
+  {
+    res[0] = atan2(coeff(j,i), coeff(k,i));
+    if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))
+    {
+      if(res[0] > Scalar(0)) {
+        res[0] -= Scalar(EIGEN_PI);
+      }
+      else {
+        res[0] += Scalar(EIGEN_PI);
+      }
+      Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
+      res[1] = -atan2(s2, coeff(i,i));
+    }
+    else
+    {
+      Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
+      res[1] = atan2(s2, coeff(i,i));
+    }
+    
+    // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles,
+    // we can compute their respective rotation, and apply its inverse to M. Since the result must
+    // be a rotation around x, we have:
+    //
+    //  c2  s1.s2 c1.s2                   1  0   0 
+    //  0   c1    -s1       *    M    =   0  c3  s3
+    //  -s2 s1.c2 c1.c2                   0 -s3  c3
+    //
+    //  Thus:  m11.c1 - m21.s1 = c3  &   m12.c1 - m22.s1 = s3
+    
+    Scalar s1 = sin(res[0]);
+    Scalar c1 = cos(res[0]);
+    res[2] = atan2(c1*coeff(j,k)-s1*coeff(k,k), c1*coeff(j,j) - s1 * coeff(k,j));
+  } 
+  else
+  {
+    res[0] = atan2(coeff(j,k), coeff(k,k));
+    Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();
+    if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {
+      if(res[0] > Scalar(0)) {
+        res[0] -= Scalar(EIGEN_PI);
+      }
+      else {
+        res[0] += Scalar(EIGEN_PI);
+      }
+      res[1] = atan2(-coeff(i,k), -c2);
+    }
+    else
+      res[1] = atan2(-coeff(i,k), c2);
+    Scalar s1 = sin(res[0]);
+    Scalar c1 = cos(res[0]);
+    res[2] = atan2(s1*coeff(k,i)-c1*coeff(j,i), c1*coeff(j,j) - s1 * coeff(k,j));
+  }
+  if (!odd)
+    res = -res;
+  
+  return res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_EULERANGLES_H
diff --git a/third-party/Eigen/src/Geometry/Homogeneous.h b/third-party/Eigen/src/Geometry/Homogeneous.h
new file mode 100644
index 00000000..5f0da1a9
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Homogeneous.h
@@ -0,0 +1,497 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HOMOGENEOUS_H
+#define EIGEN_HOMOGENEOUS_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Homogeneous
+  *
+  * \brief Expression of one (or a set of) homogeneous vector(s)
+  *
+  * \param MatrixType the type of the object in which we are making homogeneous
+  *
+  * This class represents an expression of one (or a set of) homogeneous vector(s).
+  * It is the return type of MatrixBase::homogeneous() and most of the time
+  * this is the only way it is used.
+  *
+  * \sa MatrixBase::homogeneous()
+  */
+
+namespace internal {
+
+template<typename MatrixType,int Direction>
+struct traits<Homogeneous<MatrixType,Direction> >
+ : traits<MatrixType>
+{
+  typedef typename traits<MatrixType>::StorageKind StorageKind;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
+  enum {
+    RowsPlusOne = (MatrixType::RowsAtCompileTime != Dynamic) ?
+                  int(MatrixType::RowsAtCompileTime) + 1 : Dynamic,
+    ColsPlusOne = (MatrixType::ColsAtCompileTime != Dynamic) ?
+                  int(MatrixType::ColsAtCompileTime) + 1 : Dynamic,
+    RowsAtCompileTime = Direction==Vertical  ?  RowsPlusOne : MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = Direction==Horizontal ? ColsPlusOne : MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    TmpFlags = _MatrixTypeNested::Flags & HereditaryBits,
+    Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit)
+          : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit)
+          : TmpFlags
+  };
+};
+
+template<typename MatrixType,typename Lhs> struct homogeneous_left_product_impl;
+template<typename MatrixType,typename Rhs> struct homogeneous_right_product_impl;
+
+} // end namespace internal
+
+template<typename MatrixType,int _Direction> class Homogeneous
+  : public MatrixBase<Homogeneous<MatrixType,_Direction> >, internal::no_assignment_operator
+{
+  public:
+
+    typedef MatrixType NestedExpression;
+    enum { Direction = _Direction };
+
+    typedef MatrixBase<Homogeneous> Base;
+    EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
+
+    EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)
+      : m_matrix(matrix)
+    {}
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
+    
+    EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }
+
+    template<typename Rhs>
+    EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>
+    operator* (const MatrixBase<Rhs>& rhs) const
+    {
+      eigen_assert(int(Direction)==Horizontal);
+      return Product<Homogeneous,Rhs>(*this,rhs.derived());
+    }
+
+    template<typename Lhs> friend
+    EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>
+    operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
+    {
+      eigen_assert(int(Direction)==Vertical);
+      return Product<Lhs,Homogeneous>(lhs.derived(),rhs);
+    }
+
+    template<typename Scalar, int Dim, int Mode, int Options> friend
+    EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
+    operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
+    {
+      eigen_assert(int(Direction)==Vertical);
+      return Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous>(lhs,rhs);
+    }
+
+    template<typename Func>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
+    redux(const Func& func) const
+    {
+      return func(m_matrix.redux(func), Scalar(1));
+    }
+
+  protected:
+    typename MatrixType::Nested m_matrix;
+};
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns a vector expression that is one longer than the vector argument, with the value 1 symbolically appended as the last coefficient.
+  *
+  * This can be used to convert affine coordinates to homogeneous coordinates.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include MatrixBase_homogeneous.cpp
+  * Output: \verbinclude MatrixBase_homogeneous.out
+  *
+  * \sa VectorwiseOp::homogeneous(), class Homogeneous
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType
+MatrixBase<Derived>::homogeneous() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  return HomogeneousReturnType(derived());
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns an expression where the value 1 is symbolically appended as the final coefficient to each column (or row) of the matrix.
+  *
+  * This can be used to convert affine coordinates to homogeneous coordinates.
+  *
+  * Example: \include VectorwiseOp_homogeneous.cpp
+  * Output: \verbinclude VectorwiseOp_homogeneous.out
+  *
+  * \sa MatrixBase::homogeneous(), class Homogeneous */
+template<typename ExpressionType, int Direction>
+EIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>
+VectorwiseOp<ExpressionType,Direction>::homogeneous() const
+{
+  return HomogeneousReturnType(_expression());
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \brief homogeneous normalization
+  *
+  * \returns a vector expression of the N-1 first coefficients of \c *this divided by that last coefficient.
+  *
+  * This can be used to convert homogeneous coordinates to affine coordinates.
+  *
+  * It is essentially a shortcut for:
+  * \code
+    this->head(this->size()-1)/this->coeff(this->size()-1);
+    \endcode
+  *
+  * Example: \include MatrixBase_hnormalized.cpp
+  * Output: \verbinclude MatrixBase_hnormalized.out
+  *
+  * \sa VectorwiseOp::hnormalized() */
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType
+MatrixBase<Derived>::hnormalized() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
+  return ConstStartMinusOne(derived(),0,0,
+    ColsAtCompileTime==1?size()-1:1,
+    ColsAtCompileTime==1?1:size()-1) / coeff(size()-1);
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \brief column or row-wise homogeneous normalization
+  *
+  * \returns an expression of the first N-1 coefficients of each column (or row) of \c *this divided by the last coefficient of each column (or row).
+  *
+  * This can be used to convert homogeneous coordinates to affine coordinates.
+  *
+  * It is conceptually equivalent to calling MatrixBase::hnormalized() to each column (or row) of \c *this.
+  *
+  * Example: \include DirectionWise_hnormalized.cpp
+  * Output: \verbinclude DirectionWise_hnormalized.out
+  *
+  * \sa MatrixBase::hnormalized() */
+template<typename ExpressionType, int Direction>
+EIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
+VectorwiseOp<ExpressionType,Direction>::hnormalized() const
+{
+  return HNormalized_Block(_expression(),0,0,
+      Direction==Vertical   ? _expression().rows()-1 : _expression().rows(),
+      Direction==Horizontal ? _expression().cols()-1 : _expression().cols()).cwiseQuotient(
+      Replicate<HNormalized_Factors,
+                Direction==Vertical   ? HNormalized_SizeMinusOne : 1,
+                Direction==Horizontal ? HNormalized_SizeMinusOne : 1>
+        (HNormalized_Factors(_expression(),
+          Direction==Vertical    ? _expression().rows()-1:0,
+          Direction==Horizontal  ? _expression().cols()-1:0,
+          Direction==Vertical    ? 1 : _expression().rows(),
+          Direction==Horizontal  ? 1 : _expression().cols()),
+         Direction==Vertical   ? _expression().rows()-1 : 1,
+         Direction==Horizontal ? _expression().cols()-1 : 1));
+}
+
+namespace internal {
+
+template<typename MatrixOrTransformType>
+struct take_matrix_for_product
+{
+  typedef MatrixOrTransformType type;
+  EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }
+};
+
+template<typename Scalar, int Dim, int Mode,int Options>
+struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
+{
+  typedef Transform<Scalar, Dim, Mode, Options> TransformType;
+  typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
+  EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
+};
+
+template<typename Scalar, int Dim, int Options>
+struct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >
+{
+  typedef Transform<Scalar, Dim, Projective, Options> TransformType;
+  typedef typename TransformType::MatrixType type;
+  EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }
+};
+
+template<typename MatrixType,typename Lhs>
+struct traits<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >
+{
+  typedef typename take_matrix_for_product<Lhs>::type LhsMatrixType;
+  typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
+  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
+  typedef typename make_proper_matrix_type<
+                 typename traits<MatrixTypeCleaned>::Scalar,
+                 LhsMatrixTypeCleaned::RowsAtCompileTime,
+                 MatrixTypeCleaned::ColsAtCompileTime,
+                 MatrixTypeCleaned::PlainObject::Options,
+                 LhsMatrixTypeCleaned::MaxRowsAtCompileTime,
+                 MatrixTypeCleaned::MaxColsAtCompileTime>::type ReturnType;
+};
+
+template<typename MatrixType,typename Lhs>
+struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
+  : public ReturnByValue<homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs> >
+{
+  typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
+  typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
+  typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
+  EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
+    : m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
+      m_rhs(rhs)
+  {}
+
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
+  {
+    // FIXME investigate how to allow lazy evaluation of this product when possible
+    dst = Block<const LhsMatrixTypeNested,
+              LhsMatrixTypeNested::RowsAtCompileTime,
+              LhsMatrixTypeNested::ColsAtCompileTime==Dynamic?Dynamic:LhsMatrixTypeNested::ColsAtCompileTime-1>
+            (m_lhs,0,0,m_lhs.rows(),m_lhs.cols()-1) * m_rhs;
+    dst += m_lhs.col(m_lhs.cols()-1).rowwise()
+            .template replicate<MatrixType::ColsAtCompileTime>(m_rhs.cols());
+  }
+
+  typename LhsMatrixTypeCleaned::Nested m_lhs;
+  typename MatrixType::Nested m_rhs;
+};
+
+template<typename MatrixType,typename Rhs>
+struct traits<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
+{
+  typedef typename make_proper_matrix_type<typename traits<MatrixType>::Scalar,
+                 MatrixType::RowsAtCompileTime,
+                 Rhs::ColsAtCompileTime,
+                 MatrixType::PlainObject::Options,
+                 MatrixType::MaxRowsAtCompileTime,
+                 Rhs::MaxColsAtCompileTime>::type ReturnType;
+};
+
+template<typename MatrixType,typename Rhs>
+struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
+  : public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
+{
+  typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
+  EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
+    : m_lhs(lhs), m_rhs(rhs)
+  {}
+
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
+  {
+    // FIXME investigate how to allow lazy evaluation of this product when possible
+    dst = m_lhs * Block<const RhsNested,
+                        RhsNested::RowsAtCompileTime==Dynamic?Dynamic:RhsNested::RowsAtCompileTime-1,
+                        RhsNested::ColsAtCompileTime>
+            (m_rhs,0,0,m_rhs.rows()-1,m_rhs.cols());
+    dst += m_rhs.row(m_rhs.rows()-1).colwise()
+            .template replicate<MatrixType::RowsAtCompileTime>(m_lhs.rows());
+  }
+
+  typename MatrixType::Nested m_lhs;
+  typename Rhs::Nested m_rhs;
+};
+
+template<typename ArgType,int Direction>
+struct evaluator_traits<Homogeneous<ArgType,Direction> >
+{
+  typedef typename storage_kind_to_evaluator_kind<typename ArgType::StorageKind>::Kind Kind;
+  typedef HomogeneousShape Shape;  
+};
+
+template<> struct AssignmentKind<DenseShape,HomogeneousShape> { typedef Dense2Dense Kind; };
+
+
+template<typename ArgType,int Direction>
+struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
+  : evaluator<typename Homogeneous<ArgType,Direction>::PlainObject >
+{
+  typedef Homogeneous<ArgType,Direction> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+    : Base(), m_temp(op)
+  {
+    ::new (static_cast<Base*>(this)) Base(m_temp);
+  }
+
+protected:
+  PlainObject m_temp;
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
+{
+  typedef Homogeneous<ArgType,Vertical> SrcXprType;
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
+    dst.row(dst.rows()-1).setOnes();
+  }
+};
+
+// dense = homogeneous
+template< typename DstXprType, typename ArgType, typename Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
+{
+  typedef Homogeneous<ArgType,Horizontal> SrcXprType;
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+
+    dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
+    dst.col(dst.cols()-1).setOnes();
+  }
+};
+
+template<typename LhsArg, typename Rhs, int ProductTag>
+struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
+{
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
+  {
+    homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
+  }
+};
+
+template<typename Lhs,typename Rhs>
+struct homogeneous_right_product_refactoring_helper
+{
+  enum {
+    Dim  = Lhs::ColsAtCompileTime,
+    Rows = Lhs::RowsAtCompileTime
+  };
+  typedef typename Rhs::template ConstNRowsBlockXpr<Dim>::Type          LinearBlockConst;
+  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;
+  typedef typename Rhs::ConstRowXpr                                     ConstantColumn;
+  typedef Replicate<const ConstantColumn,Rows,1>                        ConstantBlock;
+  typedef Product<Lhs,LinearBlock,LazyProduct>                          LinearProduct;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, HomogeneousShape, DenseShape>
+ : public evaluator<typename homogeneous_right_product_refactoring_helper<typename Lhs::NestedExpression,Rhs>::Xpr>
+{
+  typedef Product<Lhs, Rhs, LazyProduct> XprType;
+  typedef homogeneous_right_product_refactoring_helper<typename Lhs::NestedExpression,Rhs> helper;
+  typedef typename helper::ConstantBlock ConstantBlock;
+  typedef typename helper::Xpr RefactoredXpr;
+  typedef evaluator<RefactoredXpr> Base;
+  
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+    : Base(  xpr.lhs().nestedExpression() .lazyProduct(  xpr.rhs().template topRows<helper::Dim>(xpr.lhs().nestedExpression().cols()) )
+            + ConstantBlock(xpr.rhs().row(xpr.rhs().rows()-1),xpr.lhs().rows(), 1) )
+  {}
+};
+
+template<typename Lhs, typename RhsArg, int ProductTag>
+struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  {
+    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
+  }
+};
+
+// TODO: the following specialization is to address a regression from 3.2 to 3.3
+// In the future, this path should be optimized.
+template<typename Lhs, typename RhsArg, int ProductTag>
+struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, TriangularShape, HomogeneousShape, ProductTag>
+{
+  template<typename Dest>
+  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  {
+    dst.noalias() = lhs * rhs.eval();
+  }
+};
+
+template<typename Lhs,typename Rhs>
+struct homogeneous_left_product_refactoring_helper
+{
+  enum {
+    Dim = Rhs::RowsAtCompileTime,
+    Cols = Rhs::ColsAtCompileTime
+  };
+  typedef typename Lhs::template ConstNColsBlockXpr<Dim>::Type          LinearBlockConst;
+  typedef typename remove_const<LinearBlockConst>::type                 LinearBlock;
+  typedef typename Lhs::ConstColXpr                                     ConstantColumn;
+  typedef Replicate<const ConstantColumn,1,Cols>                        ConstantBlock;
+  typedef Product<LinearBlock,Rhs,LazyProduct>                          LinearProduct;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
+};
+
+template<typename Lhs, typename Rhs, int ProductTag>
+struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, HomogeneousShape>
+ : public evaluator<typename homogeneous_left_product_refactoring_helper<Lhs,typename Rhs::NestedExpression>::Xpr>
+{
+  typedef Product<Lhs, Rhs, LazyProduct> XprType;
+  typedef homogeneous_left_product_refactoring_helper<Lhs,typename Rhs::NestedExpression> helper;
+  typedef typename helper::ConstantBlock ConstantBlock;
+  typedef typename helper::Xpr RefactoredXpr;
+  typedef evaluator<RefactoredXpr> Base;
+  
+  EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
+    : Base(   xpr.lhs().template leftCols<helper::Dim>(xpr.rhs().nestedExpression().rows()) .lazyProduct( xpr.rhs().nestedExpression() )
+            + ConstantBlock(xpr.lhs().col(xpr.lhs().cols()-1),1,xpr.rhs().cols()) )
+  {}
+};
+
+template<typename Scalar, int Dim, int Mode,int Options, typename RhsArg, int ProductTag>
+struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
+{
+  typedef Transform<Scalar,Dim,Mode,Options> TransformType;
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  {
+    homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
+  }
+};
+
+template<typename ExpressionType, int Side, bool Transposed>
+struct permutation_matrix_product<ExpressionType, Side, Transposed, HomogeneousShape>
+  : public permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
+{};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_HOMOGENEOUS_H
diff --git a/third-party/Eigen/src/Geometry/Hyperplane.h b/third-party/Eigen/src/Geometry/Hyperplane.h
new file mode 100644
index 00000000..05929b29
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Hyperplane.h
@@ -0,0 +1,282 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HYPERPLANE_H
+#define EIGEN_HYPERPLANE_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Hyperplane
+  *
+  * \brief A hyperplane
+  *
+  * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n.
+  * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane.
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  *             Notice that the dimension of the hyperplane is _AmbientDim-1.
+  *
+  * This class represents an hyperplane as the zero set of the implicit equation
+  * \f$ n \cdot x + d = 0 \f$ where \f$ n \f$ is a unit normal vector of the plane (linear part)
+  * and \f$ d \f$ is the distance (offset) to the origin.
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+class Hyperplane
+{
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1)
+  enum {
+    AmbientDimAtCompileTime = _AmbientDim,
+    Options = _Options
+  };
+  typedef _Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1> VectorType;
+  typedef Matrix<Scalar,Index(AmbientDimAtCompileTime)==Dynamic
+                        ? Dynamic
+                        : Index(AmbientDimAtCompileTime)+1,1,Options> Coefficients;
+  typedef Block<Coefficients,AmbientDimAtCompileTime,1> NormalReturnType;
+  typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;
+
+  /** Default constructor without initialization */
+  EIGEN_DEVICE_FUNC inline Hyperplane() {}
+  
+  template<int OtherOptions>
+  EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+   : m_coeffs(other.coeffs())
+  {}
+
+  /** Constructs a dynamic-size hyperplane with \a _dim the dimension
+    * of the ambient space */
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
+
+  /** Construct a plane from its normal \a n and a point \a e onto the plane.
+    * \warning the vector normal is assumed to be normalized.
+    */
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)
+    : m_coeffs(n.size()+1)
+  {
+    normal() = n;
+    offset() = -n.dot(e);
+  }
+
+  /** Constructs a plane from its normal \a n and distance to the origin \a d
+    * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
+    * \warning the vector normal is assumed to be normalized.
+    */
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)
+    : m_coeffs(n.size()+1)
+  {
+    normal() = n;
+    offset() = d;
+  }
+
+  /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
+    * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
+    */
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
+  {
+    Hyperplane result(p0.size());
+    result.normal() = (p1 - p0).unitOrthogonal();
+    result.offset() = -p0.dot(result.normal());
+    return result;
+  }
+
+  /** Constructs a hyperplane passing through the three points. The dimension of the ambient space
+    * is required to be exactly 3.
+    */
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
+    Hyperplane result(p0.size());
+    VectorType v0(p2 - p0), v1(p1 - p0);
+    result.normal() = v0.cross(v1);
+    RealScalar norm = result.normal().norm();
+    if(norm <= v0.norm() * v1.norm() * NumTraits<RealScalar>::epsilon())
+    {
+      Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
+      JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+      result.normal() = svd.matrixV().col(2);
+    }
+    else
+      result.normal() /= norm;
+    result.offset() = -p0.dot(result.normal());
+    return result;
+  }
+
+  /** Constructs a hyperplane passing through the parametrized line \a parametrized.
+    * If the dimension of the ambient space is greater than 2, then there isn't uniqueness,
+    * so an arbitrary choice is made.
+    */
+  // FIXME to be consitent with the rest this could be implemented as a static Through function ??
+  EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
+  {
+    normal() = parametrized.direction().unitOrthogonal();
+    offset() = -parametrized.origin().dot(normal());
+  }
+
+  EIGEN_DEVICE_FUNC ~Hyperplane() {}
+
+  /** \returns the dimension in which the plane holds */
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
+
+  /** normalizes \c *this */
+  EIGEN_DEVICE_FUNC void normalize(void)
+  {
+    m_coeffs /= normal().norm();
+  }
+
+  /** \returns the signed distance between the plane \c *this and a point \a p.
+    * \sa absDistance()
+    */
+  EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
+
+  /** \returns the absolute distance between the plane \c *this and a point \a p.
+    * \sa signedDistance()
+    */
+  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }
+
+  /** \returns the projection of a point \a p onto the plane \c *this.
+    */
+  EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
+
+  /** \returns a constant reference to the unit normal vector of the plane, which corresponds
+    * to the linear part of the implicit equation.
+    */
+  EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
+
+  /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
+    * to the linear part of the implicit equation.
+    */
+  EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
+
+  /** \returns the distance to the origin, which is also the "constant term" of the implicit equation
+    * \warning the vector normal is assumed to be normalized.
+    */
+  EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
+
+  /** \returns a non-constant reference to the distance to the origin, which is also the constant part
+    * of the implicit equation */
+  EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }
+
+  /** \returns a constant reference to the coefficients c_i of the plane equation:
+    * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
+    */
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
+
+  /** \returns a non-constant reference to the coefficients c_i of the plane equation:
+    * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
+    */
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
+
+  /** \returns the intersection of *this with \a other.
+    *
+    * \warning The ambient space must be a plane, i.e. have dimension 2, so that \c *this and \a other are lines.
+    *
+    * \note If \a other is approximately parallel to *this, this method will return any point on *this.
+    */
+  EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
+  {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
+    Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
+    // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
+    // whether the two lines are approximately parallel.
+    if(internal::isMuchSmallerThan(det, Scalar(1)))
+    {   // special case where the two lines are approximately parallel. Pick any point on the first line.
+        if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))
+            return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
+        else
+            return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
+    }
+    else
+    {   // general case
+        Scalar invdet = Scalar(1) / det;
+        return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)),
+                          invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2)));
+    }
+  }
+
+  /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this.
+    *
+    * \param mat the Dim x Dim transformation matrix
+    * \param traits specifies whether the matrix \a mat represents an #Isometry
+    *               or a more generic #Affine transformation. The default is #Affine.
+    */
+  template<typename XprType>
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
+  {
+    if (traits==Affine)
+    {
+      normal() = mat.inverse().transpose() * normal();
+      m_coeffs /= normal().norm();
+    }
+    else if (traits==Isometry)
+      normal() = mat * normal();
+    else
+    {
+      eigen_assert(0 && "invalid traits value in Hyperplane::transform()");
+    }
+    return *this;
+  }
+
+  /** Applies the transformation \a t to \c *this and returns a reference to \c *this.
+    *
+    * \param t the transformation of dimension Dim
+    * \param traits specifies whether the transformation \a t represents an #Isometry
+    *               or a more generic #Affine transformation. The default is #Affine.
+    *               Other kind of transformations are not supported.
+    */
+  template<int TrOptions>
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
+                                TransformTraits traits = Affine)
+  {
+    transform(t.linear(), traits);
+    offset() -= normal().dot(t.translation());
+    return *this;
+  }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,
+           Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
+  {
+    return typename internal::cast_return_type<Hyperplane,
+                    Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type(*this);
+  }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType,int OtherOptions>
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  template<int OtherOptions>
+  EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return m_coeffs.isApprox(other.m_coeffs, prec); }
+
+protected:
+
+  Coefficients m_coeffs;
+};
+
+} // end namespace Eigen
+
+#endif // EIGEN_HYPERPLANE_H
diff --git a/third-party/Eigen/src/Geometry/OrthoMethods.h b/third-party/Eigen/src/Geometry/OrthoMethods.h
new file mode 100644
index 00000000..a035e631
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/OrthoMethods.h
@@ -0,0 +1,234 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ORTHOMETHODS_H
+#define EIGEN_ORTHOMETHODS_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns the cross product of \c *this and \a other
+  *
+  * Here is a very good explanation of cross-product: http://xkcd.com/199/
+  * 
+  * With complex numbers, the cross product is implemented as
+  * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$
+  * 
+  * \sa MatrixBase::cross3()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+#else
+inline typename MatrixBase<Derived>::PlainObject
+#endif
+MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
+
+  // Note that there is no need for an expression here since the compiler
+  // optimize such a small temporary very well (even within a complex expression)
+  typename internal::nested_eval<Derived,2>::type lhs(derived());
+  typename internal::nested_eval<OtherDerived,2>::type rhs(other.derived());
+  return typename cross_product_return_type<OtherDerived>::type(
+    numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
+    numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
+    numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0))
+  );
+}
+
+namespace internal {
+
+template< int Arch,typename VectorLhs,typename VectorRhs,
+          typename Scalar = typename VectorLhs::Scalar,
+          bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>
+struct cross3_impl {
+  EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type
+  run(const VectorLhs& lhs, const VectorRhs& rhs)
+  {
+    return typename internal::plain_matrix_type<VectorLhs>::type(
+      numext::conj(lhs.coeff(1) * rhs.coeff(2) - lhs.coeff(2) * rhs.coeff(1)),
+      numext::conj(lhs.coeff(2) * rhs.coeff(0) - lhs.coeff(0) * rhs.coeff(2)),
+      numext::conj(lhs.coeff(0) * rhs.coeff(1) - lhs.coeff(1) * rhs.coeff(0)),
+      0
+    );
+  }
+};
+
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns the cross product of \c *this and \a other using only the x, y, and z coefficients
+  *
+  * The size of \c *this and \a other must be four. This function is especially useful
+  * when using 4D vectors instead of 3D ones to get advantage of SSE/AltiVec vectorization.
+  *
+  * \sa MatrixBase::cross()
+  */
+template<typename Derived>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,4)
+
+  typedef typename internal::nested_eval<Derived,2>::type DerivedNested;
+  typedef typename internal::nested_eval<OtherDerived,2>::type OtherDerivedNested;
+  DerivedNested lhs(derived());
+  OtherDerivedNested rhs(other.derived());
+
+  return internal::cross3_impl<Architecture::Target,
+                        typename internal::remove_all<DerivedNested>::type,
+                        typename internal::remove_all<OtherDerivedNested>::type>::run(lhs,rhs);
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns a matrix expression of the cross product of each column or row
+  * of the referenced expression with the \a other vector.
+  *
+  * The referenced matrix must have one dimension equal to 3.
+  * The result matrix has the same dimensions than the referenced one.
+  *
+  * \sa MatrixBase::cross() */
+template<typename ExpressionType, int Direction>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC 
+const typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType
+VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,3)
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+  
+  typename internal::nested_eval<ExpressionType,2>::type mat(_expression());
+  typename internal::nested_eval<OtherDerived,2>::type vec(other.derived());
+
+  CrossReturnType res(_expression().rows(),_expression().cols());
+  if(Direction==Vertical)
+  {
+    eigen_assert(CrossReturnType::RowsAtCompileTime==3 && "the matrix must have exactly 3 rows");
+    res.row(0) = (mat.row(1) * vec.coeff(2) - mat.row(2) * vec.coeff(1)).conjugate();
+    res.row(1) = (mat.row(2) * vec.coeff(0) - mat.row(0) * vec.coeff(2)).conjugate();
+    res.row(2) = (mat.row(0) * vec.coeff(1) - mat.row(1) * vec.coeff(0)).conjugate();
+  }
+  else
+  {
+    eigen_assert(CrossReturnType::ColsAtCompileTime==3 && "the matrix must have exactly 3 columns");
+    res.col(0) = (mat.col(1) * vec.coeff(2) - mat.col(2) * vec.coeff(1)).conjugate();
+    res.col(1) = (mat.col(2) * vec.coeff(0) - mat.col(0) * vec.coeff(2)).conjugate();
+    res.col(2) = (mat.col(0) * vec.coeff(1) - mat.col(1) * vec.coeff(0)).conjugate();
+  }
+  return res;
+}
+
+namespace internal {
+
+template<typename Derived, int Size = Derived::SizeAtCompileTime>
+struct unitOrthogonal_selector
+{
+  typedef typename plain_matrix_type<Derived>::type VectorType;
+  typedef typename traits<Derived>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Matrix<Scalar,2,1> Vector2;
+  EIGEN_DEVICE_FUNC
+  static inline VectorType run(const Derived& src)
+  {
+    VectorType perp = VectorType::Zero(src.size());
+    Index maxi = 0;
+    Index sndi = 0;
+    src.cwiseAbs().maxCoeff(&maxi);
+    if (maxi==0)
+      sndi = 1;
+    RealScalar invnm = RealScalar(1)/(Vector2() << src.coeff(sndi),src.coeff(maxi)).finished().norm();
+    perp.coeffRef(maxi) = -numext::conj(src.coeff(sndi)) * invnm;
+    perp.coeffRef(sndi) =  numext::conj(src.coeff(maxi)) * invnm;
+
+    return perp;
+   }
+};
+
+template<typename Derived>
+struct unitOrthogonal_selector<Derived,3>
+{
+  typedef typename plain_matrix_type<Derived>::type VectorType;
+  typedef typename traits<Derived>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline VectorType run(const Derived& src)
+  {
+    VectorType perp;
+    /* Let us compute the crossed product of *this with a vector
+     * that is not too close to being colinear to *this.
+     */
+
+    /* unless the x and y coords are both close to zero, we can
+     * simply take ( -y, x, 0 ) and normalize it.
+     */
+    if((!isMuchSmallerThan(src.x(), src.z()))
+    || (!isMuchSmallerThan(src.y(), src.z())))
+    {
+      RealScalar invnm = RealScalar(1)/src.template head<2>().norm();
+      perp.coeffRef(0) = -numext::conj(src.y())*invnm;
+      perp.coeffRef(1) = numext::conj(src.x())*invnm;
+      perp.coeffRef(2) = 0;
+    }
+    /* if both x and y are close to zero, then the vector is close
+     * to the z-axis, so it's far from colinear to the x-axis for instance.
+     * So we take the crossed product with (1,0,0) and normalize it.
+     */
+    else
+    {
+      RealScalar invnm = RealScalar(1)/src.template tail<2>().norm();
+      perp.coeffRef(0) = 0;
+      perp.coeffRef(1) = -numext::conj(src.z())*invnm;
+      perp.coeffRef(2) = numext::conj(src.y())*invnm;
+    }
+
+    return perp;
+   }
+};
+
+template<typename Derived>
+struct unitOrthogonal_selector<Derived,2>
+{
+  typedef typename plain_matrix_type<Derived>::type VectorType;
+  EIGEN_DEVICE_FUNC
+  static inline VectorType run(const Derived& src)
+  { return VectorType(-numext::conj(src.y()), numext::conj(src.x())).normalized(); }
+};
+
+} // end namespace internal
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \returns a unit vector which is orthogonal to \c *this
+  *
+  * The size of \c *this must be at least 2. If the size is exactly 2,
+  * then the returned vector is a counter clock wise rotation of \c *this, i.e., (-y,x).normalized().
+  *
+  * \sa cross()
+  */
+template<typename Derived>
+EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::unitOrthogonal() const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return internal::unitOrthogonal_selector<Derived>::run(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ORTHOMETHODS_H
diff --git a/third-party/Eigen/src/Geometry/ParametrizedLine.h b/third-party/Eigen/src/Geometry/ParametrizedLine.h
new file mode 100644
index 00000000..1e985d8c
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/ParametrizedLine.h
@@ -0,0 +1,195 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARAMETRIZEDLINE_H
+#define EIGEN_PARAMETRIZEDLINE_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class ParametrizedLine
+  *
+  * \brief A parametrized line
+  *
+  * A parametrized line is defined by an origin point \f$ \mathbf{o} \f$ and a unit
+  * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to
+  * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ t \in \mathbf{R} \f$.
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  * \tparam _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic.
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+class ParametrizedLine
+{
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
+  enum {
+    AmbientDimAtCompileTime = _AmbientDim,
+    Options = _Options
+  };
+  typedef _Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+  typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
+
+  /** Default constructor without initialization */
+  EIGEN_DEVICE_FUNC inline ParametrizedLine() {}
+  
+  template<int OtherOptions>
+  EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+   : m_origin(other.origin()), m_direction(other.direction())
+  {}
+
+  /** Constructs a dynamic-size line with \a _dim the dimension
+    * of the ambient space */
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
+
+  /** Initializes a parametrized line of direction \a direction and origin \a origin.
+    * \warning the vector direction is assumed to be normalized.
+    */
+  EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)
+    : m_origin(origin), m_direction(direction) {}
+
+  template <int OtherOptions>
+  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
+
+  /** Constructs a parametrized line going from \a p0 to \a p1. */
+  EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
+  { return ParametrizedLine(p0, (p1-p0).normalized()); }
+
+  EIGEN_DEVICE_FUNC ~ParametrizedLine() {}
+
+  /** \returns the dimension in which the line holds */
+  EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }
+
+  EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }
+  EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }
+
+  EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }
+  EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }
+
+  /** \returns the squared distance of a point \a p to its projection onto the line \c *this.
+    * \sa distance()
+    */
+  EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const
+  {
+    VectorType diff = p - origin();
+    return (diff - direction().dot(diff) * direction()).squaredNorm();
+  }
+  /** \returns the distance of a point \a p to its projection onto the line \c *this.
+    * \sa squaredDistance()
+    */
+  EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); }
+
+  /** \returns the projection of a point \a p onto the line \c *this. */
+  EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const
+  { return origin() + direction().dot(p-origin()) * direction(); }
+
+  EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
+  
+  template <int OtherOptions>
+  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+ 
+  template <int OtherOptions>
+  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  
+  template <int OtherOptions>
+  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,
+           ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
+  {
+    return typename internal::cast_return_type<ParametrizedLine,
+                    ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type(*this);
+  }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType,int OtherOptions>
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  {
+    m_origin = other.origin().template cast<Scalar>();
+    m_direction = other.direction().template cast<Scalar>();
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
+
+protected:
+
+  VectorType m_origin, m_direction;
+};
+
+/** Constructs a parametrized line from a 2D hyperplane
+  *
+  * \warning the ambient space must have dimension 2 such that the hyperplane actually describes a line
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+template <int OtherOptions>
+EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
+  direction() = hyperplane.normal().unitOrthogonal();
+  origin() = -hyperplane.normal()*hyperplane.offset();
+}
+
+/** \returns the point at \a t along this line
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
+{
+  return origin() + (direction()*t); 
+}
+
+/** \returns the parameter value of the intersection between \c *this and the given \a hyperplane
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+template <int OtherOptions>
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+{
+  return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
+          / hyperplane.normal().dot(direction());
+}
+
+
+/** \deprecated use intersectionParameter()
+  * \returns the parameter value of the intersection between \c *this and the given \a hyperplane
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+template <int OtherOptions>
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+{
+  return intersectionParameter(hyperplane);
+}
+
+/** \returns the point of the intersection between \c *this and the given hyperplane
+  */
+template <typename _Scalar, int _AmbientDim, int _Options>
+template <int OtherOptions>
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+{
+  return pointAt(intersectionParameter(hyperplane));
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARAMETRIZEDLINE_H
diff --git a/third-party/Eigen/src/Geometry/Quaternion.h b/third-party/Eigen/src/Geometry/Quaternion.h
new file mode 100644
index 00000000..b8182065
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Quaternion.h
@@ -0,0 +1,832 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Mathieu Gautier <mathieu.gautier@cea.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_QUATERNION_H
+#define EIGEN_QUATERNION_H
+namespace Eigen { 
+
+
+/***************************************************************************
+* Definition of QuaternionBase<Derived>
+* The implementation is at the end of the file
+***************************************************************************/
+
+namespace internal {
+template<typename Other,
+         int OtherRows=Other::RowsAtCompileTime,
+         int OtherCols=Other::ColsAtCompileTime>
+struct quaternionbase_assign_impl;
+}
+
+/** \geometry_module \ingroup Geometry_Module
+  * \class QuaternionBase
+  * \brief Base class for quaternion expressions
+  * \tparam Derived derived type (CRTP)
+  * \sa class Quaternion
+  */
+template<class Derived>
+class QuaternionBase : public RotationBase<Derived, 3>
+{
+ public:
+  typedef RotationBase<Derived, 3> Base;
+
+  using Base::operator*;
+  using Base::derived;
+
+  typedef typename internal::traits<Derived>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename internal::traits<Derived>::Coefficients Coefficients;
+  typedef typename Coefficients::CoeffReturnType CoeffReturnType;
+  typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
+                                        Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
+
+
+  enum {
+    Flags = Eigen::internal::traits<Derived>::Flags
+  };
+
+ // typedef typename Matrix<Scalar,4,1> Coefficients;
+  /** the type of a 3D vector */
+  typedef Matrix<Scalar,3,1> Vector3;
+  /** the equivalent rotation matrix type */
+  typedef Matrix<Scalar,3,3> Matrix3;
+  /** the equivalent angle-axis type */
+  typedef AngleAxis<Scalar> AngleAxisType;
+
+
+
+  /** \returns the \c x coefficient */
+  EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
+  /** \returns the \c y coefficient */
+  EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
+  /** \returns the \c z coefficient */
+  EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
+  /** \returns the \c w coefficient */
+  EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
+
+  /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
+  /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
+  /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
+  /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
+  EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
+
+  /** \returns a read-only vector expression of the imaginary part (x,y,z) */
+  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
+
+  /** \returns a vector expression of the imaginary part (x,y,z) */
+  EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
+
+  /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
+  EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
+
+  /** \returns a vector expression of the coefficients (x,y,z,w) */
+  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
+
+// disabled this copy operator as it is giving very strange compilation errors when compiling
+// test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
+// useful; however notice that we already have the templated operator= above and e.g. in MatrixBase
+// we didn't have to add, in addition to templated operator=, such a non-templated copy operator.
+//  Derived& operator=(const QuaternionBase& other)
+//  { return operator=<Derived>(other); }
+
+  EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
+
+  /** \returns a quaternion representing an identity rotation
+    * \sa MatrixBase::Identity()
+    */
+  EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
+
+  /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
+    */
+  EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
+
+  /** \returns the squared norm of the quaternion's coefficients
+    * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
+    */
+  EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
+
+  /** \returns the norm of the quaternion's coefficients
+    * \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
+    */
+  EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }
+
+  /** Normalizes the quaternion \c *this
+    * \sa normalized(), MatrixBase::normalize() */
+  EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }
+  /** \returns a normalized copy of \c *this
+    * \sa normalize(), MatrixBase::normalized() */
+  EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
+
+    /** \returns the dot product of \c *this and \a other
+    * Geometrically speaking, the dot product of two unit quaternions
+    * corresponds to the cosine of half the angle between the two rotations.
+    * \sa angularDistance()
+    */
+  template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
+
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
+
+  /** \returns an equivalent 3x3 rotation matrix */
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const;
+
+  /** \returns the quaternion which transform \a a into \a b through a rotation */
+  template<typename Derived1, typename Derived2>
+  EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
+
+  /** \returns the quaternion describing the inverse rotation */
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
+
+  /** \returns the conjugated quaternion */
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
+
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  template<class OtherDerived>
+  EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return coeffs().isApprox(other.coeffs(), prec); }
+
+  /** return the result vector of \a v through the rotation*/
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
+
+  #ifdef EIGEN_PARSED_BY_DOXYGEN
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const;
+
+  #else
+
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline 
+  typename internal::enable_if<internal::is_same<Scalar,NewScalarType>::value,const Derived&>::type cast() const
+  {
+    return derived();
+  }
+
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline 
+  typename internal::enable_if<!internal::is_same<Scalar,NewScalarType>::value,Quaternion<NewScalarType> >::type cast() const
+  {
+    return Quaternion<NewScalarType>(coeffs().template cast<NewScalarType>());
+  }
+  #endif
+
+#ifdef EIGEN_QUATERNIONBASE_PLUGIN
+# include EIGEN_QUATERNIONBASE_PLUGIN
+#endif
+protected:
+  EIGEN_DEFAULT_COPY_CONSTRUCTOR(QuaternionBase)
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(QuaternionBase)
+};
+
+/***************************************************************************
+* Definition/implementation of Quaternion<Scalar>
+***************************************************************************/
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Quaternion
+  *
+  * \brief The quaternion class used to represent 3D orientations and rotations
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  * \tparam _Options controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
+  *
+  * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of
+  * orientations and rotations of objects in three dimensions. Compared to other representations
+  * like Euler angles or 3x3 matrices, quaternions offer the following advantages:
+  * \li \b compact storage (4 scalars)
+  * \li \b efficient to compose (28 flops),
+  * \li \b stable spherical interpolation
+  *
+  * The following two typedefs are provided for convenience:
+  * \li \c Quaternionf for \c float
+  * \li \c Quaterniond for \c double
+  *
+  * \warning Operations interpreting the quaternion as rotation have undefined behavior if the quaternion is not normalized.
+  *
+  * \sa  class AngleAxis, class Transform
+  */
+
+namespace internal {
+template<typename _Scalar,int _Options>
+struct traits<Quaternion<_Scalar,_Options> >
+{
+  typedef Quaternion<_Scalar,_Options> PlainObject;
+  typedef _Scalar Scalar;
+  typedef Matrix<_Scalar,4,1,_Options> Coefficients;
+  enum{
+    Alignment = internal::traits<Coefficients>::Alignment,
+    Flags = LvalueBit
+  };
+};
+}
+
+template<typename _Scalar, int _Options>
+class Quaternion : public QuaternionBase<Quaternion<_Scalar,_Options> >
+{
+public:
+  typedef QuaternionBase<Quaternion<_Scalar,_Options> > Base;
+  enum { NeedsAlignment = internal::traits<Quaternion>::Alignment>0 };
+
+  typedef _Scalar Scalar;
+
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Quaternion)
+  using Base::operator*=;
+
+  typedef typename internal::traits<Quaternion>::Coefficients Coefficients;
+  typedef typename Base::AngleAxisType AngleAxisType;
+
+  /** Default constructor leaving the quaternion uninitialized. */
+  EIGEN_DEVICE_FUNC inline Quaternion() {}
+
+  /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
+    * its four coefficients \a w, \a x, \a y and \a z.
+    *
+    * \warning Note the order of the arguments: the real \a w coefficient first,
+    * while internally the coefficients are stored in the following order:
+    * [\c x, \c y, \c z, \c w]
+    */
+  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
+
+  /** Constructs and initialize a quaternion from the array data */
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
+
+  /** Copy constructor */
+  template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
+
+  /** Constructs and initializes a quaternion from the angle-axis \a aa */
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
+
+  /** Constructs and initializes a quaternion from either:
+    *  - a rotation matrix expression,
+    *  - a 4D vector expression representing quaternion coefficients.
+    */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
+
+  /** Explicit copy constructor with scalar conversion */
+  template<typename OtherScalar, int OtherOptions>
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
+  { m_coeffs = other.coeffs().template cast<Scalar>(); }
+
+  EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
+
+  template<typename Derived1, typename Derived2>
+  EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
+
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
+  
+#ifdef EIGEN_QUATERNION_PLUGIN
+# include EIGEN_QUATERNION_PLUGIN
+#endif
+
+protected:
+  Coefficients m_coeffs;
+  
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+    static EIGEN_STRONG_INLINE void _check_template_params()
+    {
+      EIGEN_STATIC_ASSERT( (_Options & DontAlign) == _Options,
+        INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    }
+#endif
+};
+
+/** \ingroup Geometry_Module
+  * single precision quaternion type */
+typedef Quaternion<float> Quaternionf;
+/** \ingroup Geometry_Module
+  * double precision quaternion type */
+typedef Quaternion<double> Quaterniond;
+
+/***************************************************************************
+* Specialization of Map<Quaternion<Scalar>>
+***************************************************************************/
+
+namespace internal {
+  template<typename _Scalar, int _Options>
+  struct traits<Map<Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >
+  {
+    typedef Map<Matrix<_Scalar,4,1>, _Options> Coefficients;
+  };
+}
+
+namespace internal {
+  template<typename _Scalar, int _Options>
+  struct traits<Map<const Quaternion<_Scalar>, _Options> > : traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> >
+  {
+    typedef Map<const Matrix<_Scalar,4,1>, _Options> Coefficients;
+    typedef traits<Quaternion<_Scalar, (int(_Options)&Aligned)==Aligned ? AutoAlign : DontAlign> > TraitsBase;
+    enum {
+      Flags = TraitsBase::Flags & ~LvalueBit
+    };
+  };
+}
+
+/** \ingroup Geometry_Module
+  * \brief Quaternion expression mapping a constant memory buffer
+  *
+  * \tparam _Scalar the type of the Quaternion coefficients
+  * \tparam _Options see class Map
+  *
+  * This is a specialization of class Map for Quaternion. This class allows to view
+  * a 4 scalar memory buffer as an Eigen's Quaternion object.
+  *
+  * \sa class Map, class Quaternion, class QuaternionBase
+  */
+template<typename _Scalar, int _Options>
+class Map<const Quaternion<_Scalar>, _Options >
+  : public QuaternionBase<Map<const Quaternion<_Scalar>, _Options> >
+{
+  public:
+    typedef QuaternionBase<Map<const Quaternion<_Scalar>, _Options> > Base;
+
+    typedef _Scalar Scalar;
+    typedef typename internal::traits<Map>::Coefficients Coefficients;
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+    using Base::operator*=;
+
+    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
+      *
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
+      * \code *coeffs == {x, y, z, w} \endcode
+      *
+      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
+
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
+
+  protected:
+    const Coefficients m_coeffs;
+};
+
+/** \ingroup Geometry_Module
+  * \brief Expression of a quaternion from a memory buffer
+  *
+  * \tparam _Scalar the type of the Quaternion coefficients
+  * \tparam _Options see class Map
+  *
+  * This is a specialization of class Map for Quaternion. This class allows to view
+  * a 4 scalar memory buffer as an Eigen's  Quaternion object.
+  *
+  * \sa class Map, class Quaternion, class QuaternionBase
+  */
+template<typename _Scalar, int _Options>
+class Map<Quaternion<_Scalar>, _Options >
+  : public QuaternionBase<Map<Quaternion<_Scalar>, _Options> >
+{
+  public:
+    typedef QuaternionBase<Map<Quaternion<_Scalar>, _Options> > Base;
+
+    typedef _Scalar Scalar;
+    typedef typename internal::traits<Map>::Coefficients Coefficients;
+    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+    using Base::operator*=;
+
+    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
+      *
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
+      * \code *coeffs == {x, y, z, w} \endcode
+      *
+      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
+
+    EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
+
+  protected:
+    Coefficients m_coeffs;
+};
+
+/** \ingroup Geometry_Module
+  * Map an unaligned array of single precision scalars as a quaternion */
+typedef Map<Quaternion<float>, 0>         QuaternionMapf;
+/** \ingroup Geometry_Module
+  * Map an unaligned array of double precision scalars as a quaternion */
+typedef Map<Quaternion<double>, 0>        QuaternionMapd;
+/** \ingroup Geometry_Module
+  * Map a 16-byte aligned array of single precision scalars as a quaternion */
+typedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;
+/** \ingroup Geometry_Module
+  * Map a 16-byte aligned array of double precision scalars as a quaternion */
+typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
+
+/***************************************************************************
+* Implementation of QuaternionBase methods
+***************************************************************************/
+
+// Generic Quaternion * Quaternion product
+// This product can be specialized for a given architecture via the Arch template argument.
+namespace internal {
+template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
+    return Quaternion<Scalar>
+    (
+      a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
+      a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(),
+      a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(),
+      a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x()
+    );
+  }
+};
+}
+
+/** \returns the concatenation of two rotations as a quaternion-quaternion product */
+template <class Derived>
+template <class OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
+QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
+   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+  return internal::quat_product<Architecture::Target, Derived, OtherDerived,
+                         typename internal::traits<Derived>::Scalar>::run(*this, other);
+}
+
+/** \sa operator*(Quaternion) */
+template <class Derived>
+template <class OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
+{
+  derived() = derived() * other.derived();
+  return derived();
+}
+
+/** Rotation of a vector by a quaternion.
+  * \remarks If the quaternion is used to rotate several points (>1)
+  * then it is much more efficient to first convert it to a 3x3 Matrix.
+  * Comparison of the operation cost for n transformations:
+  *   - Quaternion2:    30n
+  *   - Via a Matrix3: 24 + 15n
+  */
+template <class Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
+QuaternionBase<Derived>::_transformVector(const Vector3& v) const
+{
+    // Note that this algorithm comes from the optimization by hand
+    // of the conversion to a Matrix followed by a Matrix/Vector product.
+    // It appears to be much faster than the common algorithm found
+    // in the literature (30 versus 39 flops). It also requires two
+    // Vector3 as temporaries.
+    Vector3 uv = this->vec().cross(v);
+    uv += uv;
+    return v + this->w() * uv + this->vec().cross(uv);
+}
+
+template<class Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
+{
+  coeffs() = other.coeffs();
+  return derived();
+}
+
+template<class Derived>
+template<class OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
+{
+  coeffs() = other.coeffs();
+  return derived();
+}
+
+/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
+  */
+template<class Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
+{
+  EIGEN_USING_STD_MATH(cos)
+  EIGEN_USING_STD_MATH(sin)
+  Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
+  this->w() = cos(ha);
+  this->vec() = sin(ha) * aa.axis();
+  return derived();
+}
+
+/** Set \c *this from the expression \a xpr:
+  *   - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion
+  *   - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix
+  *     and \a xpr is converted to a quaternion
+  */
+
+template<class Derived>
+template<class MatrixDerived>
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
+   YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+  internal::quaternionbase_assign_impl<MatrixDerived>::run(*this, xpr.derived());
+  return derived();
+}
+
+/** Convert the quaternion to a 3x3 rotation matrix. The quaternion is required to
+  * be normalized, otherwise the result is undefined.
+  */
+template<class Derived>
+EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
+QuaternionBase<Derived>::toRotationMatrix(void) const
+{
+  // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
+  // if not inlined then the cost of the return by value is huge ~ +35%,
+  // however, not inlining this function is an order of magnitude slower, so
+  // it has to be inlined, and so the return by value is not an issue
+  Matrix3 res;
+
+  const Scalar tx  = Scalar(2)*this->x();
+  const Scalar ty  = Scalar(2)*this->y();
+  const Scalar tz  = Scalar(2)*this->z();
+  const Scalar twx = tx*this->w();
+  const Scalar twy = ty*this->w();
+  const Scalar twz = tz*this->w();
+  const Scalar txx = tx*this->x();
+  const Scalar txy = ty*this->x();
+  const Scalar txz = tz*this->x();
+  const Scalar tyy = ty*this->y();
+  const Scalar tyz = tz*this->y();
+  const Scalar tzz = tz*this->z();
+
+  res.coeffRef(0,0) = Scalar(1)-(tyy+tzz);
+  res.coeffRef(0,1) = txy-twz;
+  res.coeffRef(0,2) = txz+twy;
+  res.coeffRef(1,0) = txy+twz;
+  res.coeffRef(1,1) = Scalar(1)-(txx+tzz);
+  res.coeffRef(1,2) = tyz-twx;
+  res.coeffRef(2,0) = txz-twy;
+  res.coeffRef(2,1) = tyz+twx;
+  res.coeffRef(2,2) = Scalar(1)-(txx+tyy);
+
+  return res;
+}
+
+/** Sets \c *this to be a quaternion representing a rotation between
+  * the two arbitrary vectors \a a and \a b. In other words, the built
+  * rotation represent a rotation sending the line of direction \a a
+  * to the line of direction \a b, both lines passing through the origin.
+  *
+  * \returns a reference to \c *this.
+  *
+  * Note that the two input vectors do \b not have to be normalized, and
+  * do not need to have the same norm.
+  */
+template<class Derived>
+template<typename Derived1, typename Derived2>
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+{
+  EIGEN_USING_STD_MATH(sqrt)
+  Vector3 v0 = a.normalized();
+  Vector3 v1 = b.normalized();
+  Scalar c = v1.dot(v0);
+
+  // if dot == -1, vectors are nearly opposites
+  // => accurately compute the rotation axis by computing the
+  //    intersection of the two planes. This is done by solving:
+  //       x^T v0 = 0
+  //       x^T v1 = 0
+  //    under the constraint:
+  //       ||x|| = 1
+  //    which yields a singular value problem
+  if (c < Scalar(-1)+NumTraits<Scalar>::dummy_precision())
+  {
+    c = numext::maxi(c,Scalar(-1));
+    Matrix<Scalar,2,3> m; m << v0.transpose(), v1.transpose();
+    JacobiSVD<Matrix<Scalar,2,3> > svd(m, ComputeFullV);
+    Vector3 axis = svd.matrixV().col(2);
+
+    Scalar w2 = (Scalar(1)+c)*Scalar(0.5);
+    this->w() = sqrt(w2);
+    this->vec() = axis * sqrt(Scalar(1) - w2);
+    return derived();
+  }
+  Vector3 axis = v0.cross(v1);
+  Scalar s = sqrt((Scalar(1)+c)*Scalar(2));
+  Scalar invs = Scalar(1)/s;
+  this->vec() = axis * invs;
+  this->w() = s * Scalar(0.5);
+
+  return derived();
+}
+
+/** \returns a random unit quaternion following a uniform distribution law on SO(3)
+  *
+  * \note The implementation is based on http://planning.cs.uiuc.edu/node198.html
+  */
+template<typename Scalar, int Options>
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
+{
+  EIGEN_USING_STD_MATH(sqrt)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
+  const Scalar u1 = internal::random<Scalar>(0, 1),
+               u2 = internal::random<Scalar>(0, 2*EIGEN_PI),
+               u3 = internal::random<Scalar>(0, 2*EIGEN_PI);
+  const Scalar a = sqrt(1 - u1),
+               b = sqrt(u1);
+  return Quaternion (a * sin(u2), a * cos(u2), b * sin(u3), b * cos(u3));
+}
+
+
+/** Returns a quaternion representing a rotation between
+  * the two arbitrary vectors \a a and \a b. In other words, the built
+  * rotation represent a rotation sending the line of direction \a a
+  * to the line of direction \a b, both lines passing through the origin.
+  *
+  * \returns resulting quaternion
+  *
+  * Note that the two input vectors do \b not have to be normalized, and
+  * do not need to have the same norm.
+  */
+template<typename Scalar, int Options>
+template<typename Derived1, typename Derived2>
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+{
+    Quaternion quat;
+    quat.setFromTwoVectors(a, b);
+    return quat;
+}
+
+
+/** \returns the multiplicative inverse of \c *this
+  * Note that in most cases, i.e., if you simply want the opposite rotation,
+  * and/or the quaternion is normalized, then it is enough to use the conjugate.
+  *
+  * \sa QuaternionBase::conjugate()
+  */
+template <class Derived>
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
+{
+  // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
+  Scalar n2 = this->squaredNorm();
+  if (n2 > Scalar(0))
+    return Quaternion<Scalar>(conjugate().coeffs() / n2);
+  else
+  {
+    // return an invalid result to flag the error
+    return Quaternion<Scalar>(Coefficients::Zero());
+  }
+}
+
+// Generic conjugate of a Quaternion
+namespace internal {
+template<int Arch, class Derived, typename Scalar> struct quat_conj
+{
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
+    return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
+  }
+};
+}
+                         
+/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse
+  * if the quaternion is normalized.
+  * The conjugate of a quaternion represents the opposite rotation.
+  *
+  * \sa Quaternion2::inverse()
+  */
+template <class Derived>
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
+QuaternionBase<Derived>::conjugate() const
+{
+  return internal::quat_conj<Architecture::Target, Derived,
+                         typename internal::traits<Derived>::Scalar>::run(*this);
+                         
+}
+
+/** \returns the angle (in radian) between two rotations
+  * \sa dot()
+  */
+template <class Derived>
+template <class OtherDerived>
+EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
+QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
+{
+  EIGEN_USING_STD_MATH(atan2)
+  Quaternion<Scalar> d = (*this) * other.conjugate();
+  return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );
+}
+
+ 
+    
+/** \returns the spherical linear interpolation between the two quaternions
+  * \c *this and \a other at the parameter \a t in [0;1].
+  * 
+  * This represents an interpolation for a constant motion between \c *this and \a other,
+  * see also http://en.wikipedia.org/wiki/Slerp.
+  */
+template <class Derived>
+template <class OtherDerived>
+EIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>
+QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
+{
+  EIGEN_USING_STD_MATH(acos)
+  EIGEN_USING_STD_MATH(sin)
+  const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
+  Scalar d = this->dot(other);
+  Scalar absD = numext::abs(d);
+
+  Scalar scale0;
+  Scalar scale1;
+
+  if(absD>=one)
+  {
+    scale0 = Scalar(1) - t;
+    scale1 = t;
+  }
+  else
+  {
+    // theta is the angle between the 2 quaternions
+    Scalar theta = acos(absD);
+    Scalar sinTheta = sin(theta);
+
+    scale0 = sin( ( Scalar(1) - t ) * theta) / sinTheta;
+    scale1 = sin( ( t * theta) ) / sinTheta;
+  }
+  if(d<Scalar(0)) scale1 = -scale1;
+
+  return Quaternion<Scalar>(scale0 * coeffs() + scale1 * other.coeffs());
+}
+
+namespace internal {
+
+// set from a rotation matrix
+template<typename Other>
+struct quaternionbase_assign_impl<Other,3,3>
+{
+  typedef typename Other::Scalar Scalar;
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
+  {
+    const typename internal::nested_eval<Other,2>::type mat(a_mat);
+    EIGEN_USING_STD_MATH(sqrt)
+    // This algorithm comes from  "Quaternion Calculus and Fast Animation",
+    // Ken Shoemake, 1987 SIGGRAPH course notes
+    Scalar t = mat.trace();
+    if (t > Scalar(0))
+    {
+      t = sqrt(t + Scalar(1.0));
+      q.w() = Scalar(0.5)*t;
+      t = Scalar(0.5)/t;
+      q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t;
+      q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t;
+      q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t;
+    }
+    else
+    {
+      Index i = 0;
+      if (mat.coeff(1,1) > mat.coeff(0,0))
+        i = 1;
+      if (mat.coeff(2,2) > mat.coeff(i,i))
+        i = 2;
+      Index j = (i+1)%3;
+      Index k = (j+1)%3;
+
+      t = sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0));
+      q.coeffs().coeffRef(i) = Scalar(0.5) * t;
+      t = Scalar(0.5)/t;
+      q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t;
+      q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t;
+      q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t;
+    }
+  }
+};
+
+// set from a vector of coefficients assumed to be a quaternion
+template<typename Other>
+struct quaternionbase_assign_impl<Other,4,1>
+{
+  typedef typename Other::Scalar Scalar;
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
+  {
+    q.coeffs() = vec;
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_QUATERNION_H
diff --git a/third-party/Eigen/src/Geometry/Rotation2D.h b/third-party/Eigen/src/Geometry/Rotation2D.h
new file mode 100644
index 00000000..884b7d0e
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Rotation2D.h
@@ -0,0 +1,199 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ROTATION2D_H
+#define EIGEN_ROTATION2D_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Rotation2D
+  *
+  * \brief Represents a rotation/orientation in a 2 dimensional space.
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  *
+  * This class is equivalent to a single scalar representing a counter clock wise rotation
+  * as a single angle in radian. It provides some additional features such as the automatic
+  * conversion from/to a 2x2 rotation matrix. Moreover this class aims to provide a similar
+  * interface to Quaternion in order to facilitate the writing of generic algorithms
+  * dealing with rotations.
+  *
+  * \sa class Quaternion, class Transform
+  */
+
+namespace internal {
+
+template<typename _Scalar> struct traits<Rotation2D<_Scalar> >
+{
+  typedef _Scalar Scalar;
+};
+} // end namespace internal
+
+template<typename _Scalar>
+class Rotation2D : public RotationBase<Rotation2D<_Scalar>,2>
+{
+  typedef RotationBase<Rotation2D<_Scalar>,2> Base;
+
+public:
+
+  using Base::operator*;
+
+  enum { Dim = 2 };
+  /** the scalar type of the coefficients */
+  typedef _Scalar Scalar;
+  typedef Matrix<Scalar,2,1> Vector2;
+  typedef Matrix<Scalar,2,2> Matrix2;
+
+protected:
+
+  Scalar m_angle;
+
+public:
+
+  /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
+  EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
+  
+  /** Default constructor wihtout initialization. The represented rotation is undefined. */
+  EIGEN_DEVICE_FUNC Rotation2D() {}
+
+  /** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
+    *
+    * \sa fromRotationMatrix()
+    */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)
+  {
+    fromRotationMatrix(m.derived());
+  }
+
+  /** \returns the rotation angle */
+  EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }
+
+  /** \returns a read-write reference to the rotation angle */
+  EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }
+  
+  /** \returns the rotation angle in [0,2pi] */
+  EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {
+    Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
+    return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
+  }
+  
+  /** \returns the rotation angle in [-pi,pi] */
+  EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {
+    Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
+    if(tmp>Scalar(EIGEN_PI))       tmp -= Scalar(2*EIGEN_PI);
+    else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
+    return tmp;
+  }
+
+  /** \returns the inverse rotation */
+  EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
+
+  /** Concatenates two rotations */
+  EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const
+  { return Rotation2D(m_angle + other.m_angle); }
+
+  /** Concatenates two rotations */
+  EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)
+  { m_angle += other.m_angle; return *this; }
+
+  /** Applies the rotation to a 2D vector */
+  EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const
+  { return toRotationMatrix() * vec; }
+  
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;
+
+  /** Set \c *this from a 2x2 rotation matrix \a mat.
+    * In other words, this function extract the rotation angle from the rotation matrix.
+    *
+    * This method is an alias for fromRotationMatrix()
+    *
+    * \sa fromRotationMatrix()
+    */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC Rotation2D& operator=(const  MatrixBase<Derived>& m)
+  { return fromRotationMatrix(m.derived()); }
+
+  /** \returns the spherical interpolation between \c *this and \a other using
+    * parameter \a t. It is in fact equivalent to a linear interpolation.
+    */
+  EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
+  {
+    Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();
+    return Rotation2D(m_angle + dist*t);
+  }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
+  { return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
+  {
+    m_angle = Scalar(other.angle());
+  }
+
+  EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return internal::isApprox(m_angle,other.m_angle, prec); }
+  
+};
+
+/** \ingroup Geometry_Module
+  * single precision 2D rotation type */
+typedef Rotation2D<float> Rotation2Df;
+/** \ingroup Geometry_Module
+  * double precision 2D rotation type */
+typedef Rotation2D<double> Rotation2Dd;
+
+/** Set \c *this from a 2x2 rotation matrix \a mat.
+  * In other words, this function extract the rotation angle
+  * from the rotation matrix.
+  */
+template<typename Scalar>
+template<typename Derived>
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+{
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
+  m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));
+  return *this;
+}
+
+/** Constructs and \returns an equivalent 2x2 rotation matrix.
+  */
+template<typename Scalar>
+typename Rotation2D<Scalar>::Matrix2
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const
+{
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
+  Scalar sinA = sin(m_angle);
+  Scalar cosA = cos(m_angle);
+  return (Matrix2() << cosA, -sinA, sinA, cosA).finished();
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_ROTATION2D_H
diff --git a/third-party/Eigen/src/Geometry/RotationBase.h b/third-party/Eigen/src/Geometry/RotationBase.h
new file mode 100644
index 00000000..f0ee0bd0
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/RotationBase.h
@@ -0,0 +1,206 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_ROTATIONBASE_H
+#define EIGEN_ROTATIONBASE_H
+
+namespace Eigen { 
+
+// forward declaration
+namespace internal {
+template<typename RotationDerived, typename MatrixType, bool IsVector=MatrixType::IsVectorAtCompileTime>
+struct rotation_base_generic_product_selector;
+}
+
+/** \class RotationBase
+  *
+  * \brief Common base class for compact rotation representations
+  *
+  * \tparam Derived is the derived type, i.e., a rotation type
+  * \tparam _Dim the dimension of the space
+  */
+template<typename Derived, int _Dim>
+class RotationBase
+{
+  public:
+    enum { Dim = _Dim };
+    /** the scalar type of the coefficients */
+    typedef typename internal::traits<Derived>::Scalar Scalar;
+
+    /** corresponding linear transformation matrix type */
+    typedef Matrix<Scalar,Dim,Dim> RotationMatrixType;
+    typedef Matrix<Scalar,Dim,1> VectorType;
+
+  public:
+    EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
+
+    /** \returns an equivalent rotation matrix */
+    EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
+
+    /** \returns an equivalent rotation matrix 
+      * This function is added to be conform with the Transform class' naming scheme.
+      */
+    EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
+
+    /** \returns the inverse rotation */
+    EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }
+
+    /** \returns the concatenation of the rotation \c *this with a translation \a t */
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
+    { return Transform<Scalar,Dim,Isometry>(*this) * t; }
+
+    /** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
+    { return toRotationMatrix() * s.factor(); }
+
+    /** \returns the concatenation of the rotation \c *this with a generic expression \a e
+      * \a e can be:
+      *  - a DimxDim linear transformation matrix
+      *  - a DimxDim diagonal matrix (axis aligned scaling)
+      *  - a vector of size Dim
+      */
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
+    operator*(const EigenBase<OtherDerived>& e) const
+    { return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
+
+    /** \returns the concatenation of a linear transformation \a l with the rotation \a r */
+    template<typename OtherDerived> friend
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
+    { return l.derived() * r.toRotationMatrix(); }
+
+    /** \returns the concatenation of a scaling \a l with the rotation \a r */
+    EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
+    { 
+      Transform<Scalar,Dim,Affine> res(r);
+      res.linear().applyOnTheLeft(l);
+      return res;
+    }
+
+    /** \returns the concatenation of the rotation \c *this with a transformation \a t */
+    template<int Mode, int Options>
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
+    { return toRotationMatrix() * t; }
+
+    template<typename OtherVectorType>
+    EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const
+    { return toRotationMatrix() * v; }
+};
+
+namespace internal {
+
+// implementation of the generic product rotation * matrix
+template<typename RotationDerived, typename MatrixType>
+struct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>
+{
+  enum { Dim = RotationDerived::Dim };
+  typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
+  { return r.toRotationMatrix() * m; }
+};
+
+template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
+struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
+{
+  typedef Transform<Scalar,Dim,Affine> ReturnType;
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
+  {
+    ReturnType res(r);
+    res.linear() *= m;
+    return res;
+  }
+};
+
+template<typename RotationDerived,typename OtherVectorType>
+struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,true>
+{
+  enum { Dim = RotationDerived::Dim };
+  typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
+  {
+    return r._transformVector(v);
+  }
+};
+
+} // end namespace internal
+
+/** \geometry_module
+  *
+  * \brief Constructs a Dim x Dim rotation matrix from the rotation \a r
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+{
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
+  *this = r.toRotationMatrix();
+}
+
+/** \geometry_module
+  *
+  * \brief Set a Dim x Dim rotation matrix from the rotation \a r
+  */
+template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
+Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
+{
+  EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
+  return *this = r.toRotationMatrix();
+}
+
+namespace internal {
+
+/** \internal
+  *
+  * Helper function to return an arbitrary rotation object to a rotation matrix.
+  *
+  * \tparam Scalar the numeric type of the matrix coefficients
+  * \tparam Dim the dimension of the current space
+  *
+  * It returns a Dim x Dim fixed size matrix.
+  *
+  * Default specializations are provided for:
+  *   - any scalar type (2D),
+  *   - any matrix expression,
+  *   - any type based on RotationBase (e.g., Quaternion, AngleAxis, Rotation2D)
+  *
+  * Currently toRotationMatrix is only used by Transform.
+  *
+  * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis
+  */
+template<typename Scalar, int Dim>
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
+{
+  EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
+  return Rotation2D<Scalar>(s).toRotationMatrix();
+}
+
+template<typename Scalar, int Dim, typename OtherDerived>
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
+{
+  return r.toRotationMatrix();
+}
+
+template<typename Scalar, int Dim, typename OtherDerived>
+EIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
+{
+  EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
+    YOU_MADE_A_PROGRAMMING_MISTAKE)
+  return mat;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_ROTATIONBASE_H
diff --git a/third-party/Eigen/src/Geometry/Scaling.h b/third-party/Eigen/src/Geometry/Scaling.h
new file mode 100644
index 00000000..33eabd81
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Scaling.h
@@ -0,0 +1,170 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SCALING_H
+#define EIGEN_SCALING_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class UniformScaling
+  *
+  * \brief Represents a generic uniform scaling transformation
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
+  *
+  * This class represent a uniform scaling transformation. It is the return
+  * type of Scaling(Scalar), and most of the time this is the only way it
+  * is used. In particular, this class is not aimed to be used to store a scaling transformation,
+  * but rather to make easier the constructions and updates of Transform objects.
+  *
+  * To represent an axis aligned scaling, use the DiagonalMatrix class.
+  *
+  * \sa Scaling(), class DiagonalMatrix, MatrixBase::asDiagonal(), class Translation, class Transform
+  */
+template<typename _Scalar>
+class UniformScaling
+{
+public:
+  /** the scalar type of the coefficients */
+  typedef _Scalar Scalar;
+
+protected:
+
+  Scalar m_factor;
+
+public:
+
+  /** Default constructor without initialization. */
+  UniformScaling() {}
+  /** Constructs and initialize a uniform scaling transformation */
+  explicit inline UniformScaling(const Scalar& s) : m_factor(s) {}
+
+  inline const Scalar& factor() const { return m_factor; }
+  inline Scalar& factor() { return m_factor; }
+
+  /** Concatenates two uniform scaling */
+  inline UniformScaling operator* (const UniformScaling& other) const
+  { return UniformScaling(m_factor * other.factor()); }
+
+  /** Concatenates a uniform scaling and a translation */
+  template<int Dim>
+  inline Transform<Scalar,Dim,Affine> operator* (const Translation<Scalar,Dim>& t) const;
+
+  /** Concatenates a uniform scaling and an affine transformation */
+  template<int Dim, int Mode, int Options>
+  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> operator* (const Transform<Scalar,Dim, Mode, Options>& t) const
+  {
+    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> res = t;
+    res.prescale(factor());
+    return res;
+  }
+
+  /** Concatenates a uniform scaling and a linear transformation matrix */
+  // TODO returns an expression
+  template<typename Derived>
+  inline typename internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const
+  { return other * m_factor; }
+
+  template<typename Derived,int Dim>
+  inline Matrix<Scalar,Dim,Dim> operator*(const RotationBase<Derived,Dim>& r) const
+  { return r.toRotationMatrix() * m_factor; }
+
+  /** \returns the inverse scaling */
+  inline UniformScaling inverse() const
+  { return UniformScaling(Scalar(1)/m_factor); }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  inline UniformScaling<NewScalarType> cast() const
+  { return UniformScaling<NewScalarType>(NewScalarType(m_factor)); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  inline explicit UniformScaling(const UniformScaling<OtherScalarType>& other)
+  { m_factor = Scalar(other.factor()); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  bool isApprox(const UniformScaling& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return internal::isApprox(m_factor, other.factor(), prec); }
+
+};
+
+/** \addtogroup Geometry_Module */
+//@{
+
+/** Concatenates a linear transformation matrix and a uniform scaling
+  * \relates UniformScaling
+  */
+// NOTE this operator is defiend in MatrixBase and not as a friend function
+// of UniformScaling to fix an internal crash of Intel's ICC
+template<typename Derived,typename Scalar>
+EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product)
+operator*(const MatrixBase<Derived>& matrix, const UniformScaling<Scalar>& s)
+{ return matrix.derived() * s.factor(); }
+
+/** Constructs a uniform scaling from scale factor \a s */
+inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
+/** Constructs a uniform scaling from scale factor \a s */
+inline UniformScaling<double> Scaling(double s) { return UniformScaling<double>(s); }
+/** Constructs a uniform scaling from scale factor \a s */
+template<typename RealScalar>
+inline UniformScaling<std::complex<RealScalar> > Scaling(const std::complex<RealScalar>& s)
+{ return UniformScaling<std::complex<RealScalar> >(s); }
+
+/** Constructs a 2D axis aligned scaling */
+template<typename Scalar>
+inline DiagonalMatrix<Scalar,2> Scaling(const Scalar& sx, const Scalar& sy)
+{ return DiagonalMatrix<Scalar,2>(sx, sy); }
+/** Constructs a 3D axis aligned scaling */
+template<typename Scalar>
+inline DiagonalMatrix<Scalar,3> Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz)
+{ return DiagonalMatrix<Scalar,3>(sx, sy, sz); }
+
+/** Constructs an axis aligned scaling expression from vector expression \a coeffs
+  * This is an alias for coeffs.asDiagonal()
+  */
+template<typename Derived>
+inline const DiagonalWrapper<const Derived> Scaling(const MatrixBase<Derived>& coeffs)
+{ return coeffs.asDiagonal(); }
+
+/** \deprecated */
+typedef DiagonalMatrix<float, 2> AlignedScaling2f;
+/** \deprecated */
+typedef DiagonalMatrix<double,2> AlignedScaling2d;
+/** \deprecated */
+typedef DiagonalMatrix<float, 3> AlignedScaling3f;
+/** \deprecated */
+typedef DiagonalMatrix<double,3> AlignedScaling3d;
+//@}
+
+template<typename Scalar>
+template<int Dim>
+inline Transform<Scalar,Dim,Affine>
+UniformScaling<Scalar>::operator* (const Translation<Scalar,Dim>& t) const
+{
+  Transform<Scalar,Dim,Affine> res;
+  res.matrix().setZero();
+  res.linear().diagonal().fill(factor());
+  res.translation() = factor() * t.vector();
+  res(Dim,Dim) = Scalar(1);
+  return res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SCALING_H
diff --git a/third-party/Eigen/src/Geometry/Transform.h b/third-party/Eigen/src/Geometry/Transform.h
new file mode 100644
index 00000000..c21d9e55
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Transform.h
@@ -0,0 +1,1542 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSFORM_H
+#define EIGEN_TRANSFORM_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Transform>
+struct transform_traits
+{
+  enum
+  {
+    Dim = Transform::Dim,
+    HDim = Transform::HDim,
+    Mode = Transform::Mode,
+    IsProjective = (int(Mode)==int(Projective))
+  };
+};
+
+template< typename TransformType,
+          typename MatrixType,
+          int Case = transform_traits<TransformType>::IsProjective ? 0
+                   : int(MatrixType::RowsAtCompileTime) == int(transform_traits<TransformType>::HDim) ? 1
+                   : 2,
+          int RhsCols = MatrixType::ColsAtCompileTime>
+struct transform_right_product_impl;
+
+template< typename Other,
+          int Mode,
+          int Options,
+          int Dim,
+          int HDim,
+          int OtherRows=Other::RowsAtCompileTime,
+          int OtherCols=Other::ColsAtCompileTime>
+struct transform_left_product_impl;
+
+template< typename Lhs,
+          typename Rhs,
+          bool AnyProjective = 
+            transform_traits<Lhs>::IsProjective ||
+            transform_traits<Rhs>::IsProjective>
+struct transform_transform_product_impl;
+
+template< typename Other,
+          int Mode,
+          int Options,
+          int Dim,
+          int HDim,
+          int OtherRows=Other::RowsAtCompileTime,
+          int OtherCols=Other::ColsAtCompileTime>
+struct transform_construct_from_matrix;
+
+template<typename TransformType> struct transform_take_affine_part;
+
+template<typename _Scalar, int _Dim, int _Mode, int _Options>
+struct traits<Transform<_Scalar,_Dim,_Mode,_Options> >
+{
+  typedef _Scalar Scalar;
+  typedef Eigen::Index StorageIndex;
+  typedef Dense StorageKind;
+  enum {
+    Dim1 = _Dim==Dynamic ? _Dim : _Dim + 1,
+    RowsAtCompileTime = _Mode==Projective ? Dim1 : _Dim,
+    ColsAtCompileTime = Dim1,
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    Flags = 0
+  };
+};
+
+template<int Mode> struct transform_make_affine;
+
+} // end namespace internal
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Transform
+  *
+  * \brief Represents an homogeneous transformation in a N dimensional space
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
+  * \tparam _Dim the dimension of the space
+  * \tparam _Mode the type of the transformation. Can be:
+  *              - #Affine: the transformation is stored as a (Dim+1)^2 matrix,
+  *                         where the last row is assumed to be [0 ... 0 1].
+  *              - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix.
+  *              - #Projective: the transformation is stored as a (Dim+1)^2 matrix
+  *                             without any assumption.
+  * \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor.
+  *                  These Options are passed directly to the underlying matrix type.
+  *
+  * The homography is internally represented and stored by a matrix which
+  * is available through the matrix() method. To understand the behavior of
+  * this class you have to think a Transform object as its internal
+  * matrix representation. The chosen convention is right multiply:
+  *
+  * \code v' = T * v \endcode
+  *
+  * Therefore, an affine transformation matrix M is shaped like this:
+  *
+  * \f$ \left( \begin{array}{cc}
+  * linear & translation\\
+  * 0 ... 0 & 1
+  * \end{array} \right) \f$
+  *
+  * Note that for a projective transformation the last row can be anything,
+  * and then the interpretation of different parts might be sightly different.
+  *
+  * However, unlike a plain matrix, the Transform class provides many features
+  * simplifying both its assembly and usage. In particular, it can be composed
+  * with any other transformations (Transform,Translation,RotationBase,DiagonalMatrix)
+  * and can be directly used to transform implicit homogeneous vectors. All these
+  * operations are handled via the operator*. For the composition of transformations,
+  * its principle consists to first convert the right/left hand sides of the product
+  * to a compatible (Dim+1)^2 matrix and then perform a pure matrix product.
+  * Of course, internally, operator* tries to perform the minimal number of operations
+  * according to the nature of each terms. Likewise, when applying the transform
+  * to points, the latters are automatically promoted to homogeneous vectors
+  * before doing the matrix product. The conventions to homogeneous representations
+  * are performed as follow:
+  *
+  * \b Translation t (Dim)x(1):
+  * \f$ \left( \begin{array}{cc}
+  * I & t \\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
+  *
+  * \b Rotation R (Dim)x(Dim):
+  * \f$ \left( \begin{array}{cc}
+  * R & 0\\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
+  *<!--
+  * \b Linear \b Matrix L (Dim)x(Dim):
+  * \f$ \left( \begin{array}{cc}
+  * L & 0\\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
+  *
+  * \b Affine \b Matrix A (Dim)x(Dim+1):
+  * \f$ \left( \begin{array}{c}
+  * A\\
+  * 0\,...\,0\,1
+  * \end{array} \right) \f$
+  *-->
+  * \b Scaling \b DiagonalMatrix S (Dim)x(Dim):
+  * \f$ \left( \begin{array}{cc}
+  * S & 0\\
+  * 0\,...\,0 & 1
+  * \end{array} \right) \f$
+  *
+  * \b Column \b point v (Dim)x(1):
+  * \f$ \left( \begin{array}{c}
+  * v\\
+  * 1
+  * \end{array} \right) \f$
+  *
+  * \b Set \b of \b column \b points V1...Vn (Dim)x(n):
+  * \f$ \left( \begin{array}{ccc}
+  * v_1 & ... & v_n\\
+  * 1 & ... & 1
+  * \end{array} \right) \f$
+  *
+  * The concatenation of a Transform object with any kind of other transformation
+  * always returns a Transform object.
+  *
+  * A little exception to the "as pure matrix product" rule is the case of the
+  * transformation of non homogeneous vectors by an affine transformation. In
+  * that case the last matrix row can be ignored, and the product returns non
+  * homogeneous vectors.
+  *
+  * Since, for instance, a Dim x Dim matrix is interpreted as a linear transformation,
+  * it is not possible to directly transform Dim vectors stored in a Dim x Dim matrix.
+  * The solution is either to use a Dim x Dynamic matrix or explicitly request a
+  * vector transformation by making the vector homogeneous:
+  * \code
+  * m' = T * m.colwise().homogeneous();
+  * \endcode
+  * Note that there is zero overhead.
+  *
+  * Conversion methods from/to Qt's QMatrix and QTransform are available if the
+  * preprocessor token EIGEN_QT_SUPPORT is defined.
+  *
+  * This class can be extended with the help of the plugin mechanism described on the page
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_TRANSFORM_PLUGIN.
+  *
+  * \sa class Matrix, class Quaternion
+  */
+template<typename _Scalar, int _Dim, int _Mode, int _Options>
+class Transform
+{
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim==Dynamic ? Dynamic : (_Dim+1)*(_Dim+1))
+  enum {
+    Mode = _Mode,
+    Options = _Options,
+    Dim = _Dim,     ///< space dimension in which the transformation holds
+    HDim = _Dim+1,  ///< size of a respective homogeneous vector
+    Rows = int(Mode)==(AffineCompact) ? Dim : HDim
+  };
+  /** the scalar type of the coefficients */
+  typedef _Scalar Scalar;
+  typedef Eigen::Index StorageIndex;
+  typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+  /** type of the matrix used to represent the transformation */
+  typedef typename internal::make_proper_matrix_type<Scalar,Rows,HDim,Options>::type MatrixType;
+  /** constified MatrixType */
+  typedef const MatrixType ConstMatrixType;
+  /** type of the matrix used to represent the linear part of the transformation */
+  typedef Matrix<Scalar,Dim,Dim,Options> LinearMatrixType;
+  /** type of read/write reference to the linear part of the transformation */
+  typedef Block<MatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> LinearPart;
+  /** type of read reference to the linear part of the transformation */
+  typedef const Block<ConstMatrixType,Dim,Dim,int(Mode)==(AffineCompact) && (Options&RowMajor)==0> ConstLinearPart;
+  /** type of read/write reference to the affine part of the transformation */
+  typedef typename internal::conditional<int(Mode)==int(AffineCompact),
+                              MatrixType&,
+                              Block<MatrixType,Dim,HDim> >::type AffinePart;
+  /** type of read reference to the affine part of the transformation */
+  typedef typename internal::conditional<int(Mode)==int(AffineCompact),
+                              const MatrixType&,
+                              const Block<const MatrixType,Dim,HDim> >::type ConstAffinePart;
+  /** type of a vector */
+  typedef Matrix<Scalar,Dim,1> VectorType;
+  /** type of a read/write reference to the translation part of the rotation */
+  typedef Block<MatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> TranslationPart;
+  /** type of a read reference to the translation part of the rotation */
+  typedef const Block<ConstMatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> ConstTranslationPart;
+  /** corresponding translation type */
+  typedef Translation<Scalar,Dim> TranslationType;
+  
+  // this intermediate enum is needed to avoid an ICE with gcc 3.4 and 4.0
+  enum { TransformTimeDiagonalMode = ((Mode==int(Isometry))?Affine:int(Mode)) };
+  /** The return type of the product between a diagonal matrix and a transform */
+  typedef Transform<Scalar,Dim,TransformTimeDiagonalMode> TransformTimeDiagonalReturnType;
+
+protected:
+
+  MatrixType m_matrix;
+
+public:
+
+  /** Default constructor without initialization of the meaningful coefficients.
+    * If Mode==Affine or Mode==Isometry, then the last row is set to [0 ... 0 1] */
+  EIGEN_DEVICE_FUNC inline Transform()
+  {
+    check_template_params();
+    internal::transform_make_affine<(int(Mode)==Affine || int(Mode)==Isometry) ? Affine : AffineCompact>::run(m_matrix);
+  }
+
+  EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
+  {
+    check_template_params();
+    m_matrix = other.m_matrix;
+  }
+
+  EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)
+  {
+    check_template_params();
+    *this = t;
+  }
+  EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)
+  {
+    check_template_params();
+    *this = s;
+  }
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)
+  {
+    check_template_params();
+    *this = r;
+  }
+
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other)
+  { m_matrix = other.m_matrix; return *this; }
+
+  typedef internal::transform_take_affine_part<Transform> take_affine_part;
+
+  /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)
+  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
+      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
+
+    check_template_params();
+    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());
+  }
+
+  /** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)
+  {
+    EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
+      YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
+
+    internal::transform_construct_from_matrix<OtherDerived,Mode,Options,Dim,HDim>::run(this, other.derived());
+    return *this;
+  }
+  
+  template<int OtherOptions>
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
+  {
+    check_template_params();
+    // only the options change, we can directly copy the matrices
+    m_matrix = other.matrix();
+  }
+
+  template<int OtherMode,int OtherOptions>
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
+  {
+    check_template_params();
+    // prevent conversions as:
+    // Affine | AffineCompact | Isometry = Projective
+    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Projective), Mode==int(Projective)),
+                        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
+
+    // prevent conversions as:
+    // Isometry = Affine | AffineCompact
+    EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(OtherMode==int(Affine)||OtherMode==int(AffineCompact), Mode!=int(Isometry)),
+                        YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION)
+
+    enum { ModeIsAffineCompact = Mode == int(AffineCompact),
+           OtherModeIsAffineCompact = OtherMode == int(AffineCompact)
+    };
+
+    if(ModeIsAffineCompact == OtherModeIsAffineCompact)
+    {
+      // We need the block expression because the code is compiled for all
+      // combinations of transformations and will trigger a compile time error
+      // if one tries to assign the matrices directly
+      m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0);
+      makeAffine();
+    }
+    else if(OtherModeIsAffineCompact)
+    {
+      typedef typename Transform<Scalar,Dim,OtherMode,OtherOptions>::MatrixType OtherMatrixType;
+      internal::transform_construct_from_matrix<OtherMatrixType,Mode,Options,Dim,HDim>::run(this, other.matrix());
+    }
+    else
+    {
+      // here we know that Mode == AffineCompact and OtherMode != AffineCompact.
+      // if OtherMode were Projective, the static assert above would already have caught it.
+      // So the only possibility is that OtherMode == Affine
+      linear() = other.linear();
+      translation() = other.translation();
+    }
+  }
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)
+  {
+    check_template_params();
+    other.evalTo(*this);
+  }
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)
+  {
+    other.evalTo(*this);
+    return *this;
+  }
+
+  #ifdef EIGEN_QT_SUPPORT
+  inline Transform(const QMatrix& other);
+  inline Transform& operator=(const QMatrix& other);
+  inline QMatrix toQMatrix(void) const;
+  inline Transform(const QTransform& other);
+  inline Transform& operator=(const QTransform& other);
+  inline QTransform toQTransform(void) const;
+  #endif
+  
+  EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); }
+
+  /** shortcut for m_matrix(row,col);
+    * \sa MatrixBase::operator(Index,Index) const */
+  EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
+  /** shortcut for m_matrix(row,col);
+    * \sa MatrixBase::operator(Index,Index) */
+  EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
+
+  /** \returns a read-only expression of the transformation matrix */
+  EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }
+  /** \returns a writable expression of the transformation matrix */
+  EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }
+
+  /** \returns a read-only expression of the linear part of the transformation */
+  EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
+  /** \returns a writable expression of the linear part of the transformation */
+  EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
+
+  /** \returns a read-only expression of the Dim x HDim affine part of the transformation */
+  EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
+  /** \returns a writable expression of the Dim x HDim affine part of the transformation */
+  EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }
+
+  /** \returns a read-only expression of the translation vector of the transformation */
+  EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
+  /** \returns a writable expression of the translation vector of the transformation */
+  EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
+
+  /** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
+    *
+    * The right-hand-side \a other can be either:
+    * \li an homogeneous vector of size Dim+1,
+    * \li a set of homogeneous vectors of size Dim+1 x N,
+    * \li a transformation matrix of size Dim+1 x Dim+1.
+    *
+    * Moreover, if \c *this represents an affine transformation (i.e., Mode!=Projective), then \a other can also be:
+    * \li a point of size Dim (computes: \code this->linear() * other + this->translation()\endcode),
+    * \li a set of N points as a Dim x N matrix (computes: \code (this->linear() * other).colwise() + this->translation()\endcode),
+    *
+    * In all cases, the return type is a matrix or vector of same sizes as the right-hand-side \a other.
+    *
+    * If you want to interpret \a other as a linear or affine transformation, then first convert it to a Transform<> type,
+    * or do your own cooking.
+    *
+    * Finally, if you want to apply Affine transformations to vectors, then explicitly apply the linear part only:
+    * \code
+    * Affine3f A;
+    * Vector3f v1, v2;
+    * v2 = A.linear() * v1;
+    * \endcode
+    *
+    */
+  // note: this function is defined here because some compilers cannot find the respective declaration
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
+  operator * (const EigenBase<OtherDerived> &other) const
+  { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
+
+  /** \returns the product expression of a transformation matrix \a a times a transform \a b
+    *
+    * The left hand side \a other can be either:
+    * \li a linear transformation matrix of size Dim x Dim,
+    * \li an affine transformation matrix of size Dim x Dim+1,
+    * \li a general transformation matrix of size Dim+1 x Dim+1.
+    */
+  template<typename OtherDerived> friend
+  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
+    operator * (const EigenBase<OtherDerived> &a, const Transform &b)
+  { return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
+
+  /** \returns The product expression of a transform \a a times a diagonal matrix \a b
+    *
+    * The rhs diagonal matrix is interpreted as an affine scaling transformation. The
+    * product results in a Transform of the same type (mode) as the lhs only if the lhs 
+    * mode is no isometry. In that case, the returned transform is an affinity.
+    */
+  template<typename DiagonalDerived>
+  EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType
+    operator * (const DiagonalBase<DiagonalDerived> &b) const
+  {
+    TransformTimeDiagonalReturnType res(*this);
+    res.linearExt() *= b;
+    return res;
+  }
+
+  /** \returns The product expression of a diagonal matrix \a a times a transform \a b
+    *
+    * The lhs diagonal matrix is interpreted as an affine scaling transformation. The
+    * product results in a Transform of the same type (mode) as the lhs only if the lhs 
+    * mode is no isometry. In that case, the returned transform is an affinity.
+    */
+  template<typename DiagonalDerived>
+  EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType
+    operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)
+  {
+    TransformTimeDiagonalReturnType res;
+    res.linear().noalias() = a*b.linear();
+    res.translation().noalias() = a*b.translation();
+    if (Mode!=int(AffineCompact))
+      res.matrix().row(Dim) = b.matrix().row(Dim);
+    return res;
+  }
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
+
+  /** Concatenates two transformations */
+  EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const
+  {
+    return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
+  }
+  
+  #if EIGEN_COMP_ICC
+private:
+  // this intermediate structure permits to workaround a bug in ICC 11:
+  //   error: template instantiation resulted in unexpected function type of "Eigen::Transform<double, 3, 32, 0>
+  //             (const Eigen::Transform<double, 3, 2, 0> &) const"
+  //  (the meaning of a name may have changed since the template declaration -- the type of the template is:
+  // "Eigen::internal::transform_transform_product_impl<Eigen::Transform<double, 3, 32, 0>,
+  //     Eigen::Transform<double, 3, Mode, Options>, <expression>>::ResultType (const Eigen::Transform<double, 3, Mode, Options> &) const")
+  // 
+  template<int OtherMode,int OtherOptions> struct icc_11_workaround
+  {
+    typedef internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> > ProductType;
+    typedef typename ProductType::ResultType ResultType;
+  };
+  
+public:
+  /** Concatenates two different transformations */
+  template<int OtherMode,int OtherOptions>
+  inline typename icc_11_workaround<OtherMode,OtherOptions>::ResultType
+    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
+  {
+    typedef typename icc_11_workaround<OtherMode,OtherOptions>::ProductType ProductType;
+    return ProductType::run(*this,other);
+  }
+  #else
+  /** Concatenates two different transformations */
+  template<int OtherMode,int OtherOptions>
+  EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
+    operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
+  {
+    return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
+  }
+  #endif
+
+  /** \sa MatrixBase::setIdentity() */
+  EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }
+
+  /**
+   * \brief Returns an identity transformation.
+   * \todo In the future this function should be returning a Transform expression.
+   */
+  EIGEN_DEVICE_FUNC static const Transform Identity()
+  {
+    return Transform(MatrixType::Identity());
+  }
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC 
+  inline Transform& scale(const MatrixBase<OtherDerived> &other);
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
+  inline Transform& prescale(const MatrixBase<OtherDerived> &other);
+
+  EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);
+  EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
+  inline Transform& translate(const MatrixBase<OtherDerived> &other);
+
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
+  inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);
+
+  template<typename RotationType>
+  EIGEN_DEVICE_FUNC
+  inline Transform& rotate(const RotationType& rotation);
+
+  template<typename RotationType>
+  EIGEN_DEVICE_FUNC
+  inline Transform& prerotate(const RotationType& rotation);
+
+  EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);
+  EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);
+
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);
+  
+  EIGEN_DEVICE_FUNC
+  inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }
+  
+  EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;
+
+  EIGEN_DEVICE_FUNC 
+  inline Transform& operator=(const UniformScaling<Scalar>& t);
+  
+  EIGEN_DEVICE_FUNC
+  inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
+  
+  EIGEN_DEVICE_FUNC
+  inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const
+  {
+    TransformTimeDiagonalReturnType res = *this;
+    res.scale(s.factor());
+    return res;
+  }
+
+  EIGEN_DEVICE_FUNC
+  inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
+
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
+
+  EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const;
+  template<typename RotationMatrixType, typename ScalingMatrixType>
+  EIGEN_DEVICE_FUNC
+  void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;
+  template<typename ScalingMatrixType, typename RotationMatrixType>
+  EIGEN_DEVICE_FUNC
+  void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;
+
+  template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
+  EIGEN_DEVICE_FUNC
+  Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
+    const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);
+
+  EIGEN_DEVICE_FUNC
+  inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
+
+  /** \returns a const pointer to the column major internal matrix */
+  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
+  /** \returns a non-const pointer to the column major internal matrix */
+  EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
+  { return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
+  {
+    check_template_params();
+    m_matrix = other.matrix().template cast<Scalar>();
+  }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return m_matrix.isApprox(other.m_matrix, prec); }
+
+  /** Sets the last row to [0 ... 0 1]
+    */
+  EIGEN_DEVICE_FUNC void makeAffine()
+  {
+    internal::transform_make_affine<int(Mode)>::run(m_matrix);
+  }
+
+  /** \internal
+    * \returns the Dim x Dim linear part if the transformation is affine,
+    *          and the HDim x Dim part for projective transformations.
+    */
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
+  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
+  /** \internal
+    * \returns the Dim x Dim linear part if the transformation is affine,
+    *          and the HDim x Dim part for projective transformations.
+    */
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
+  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
+
+  /** \internal
+    * \returns the translation part if the transformation is affine,
+    *          and the last column for projective transformations.
+    */
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
+  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
+  /** \internal
+    * \returns the translation part if the transformation is affine,
+    *          and the last column for projective transformations.
+    */
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
+  { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
+
+
+  #ifdef EIGEN_TRANSFORM_PLUGIN
+  #include EIGEN_TRANSFORM_PLUGIN
+  #endif
+  
+protected:
+  #ifndef EIGEN_PARSED_BY_DOXYGEN
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()
+    {
+      EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)
+    }
+  #endif
+
+};
+
+/** \ingroup Geometry_Module */
+typedef Transform<float,2,Isometry> Isometry2f;
+/** \ingroup Geometry_Module */
+typedef Transform<float,3,Isometry> Isometry3f;
+/** \ingroup Geometry_Module */
+typedef Transform<double,2,Isometry> Isometry2d;
+/** \ingroup Geometry_Module */
+typedef Transform<double,3,Isometry> Isometry3d;
+
+/** \ingroup Geometry_Module */
+typedef Transform<float,2,Affine> Affine2f;
+/** \ingroup Geometry_Module */
+typedef Transform<float,3,Affine> Affine3f;
+/** \ingroup Geometry_Module */
+typedef Transform<double,2,Affine> Affine2d;
+/** \ingroup Geometry_Module */
+typedef Transform<double,3,Affine> Affine3d;
+
+/** \ingroup Geometry_Module */
+typedef Transform<float,2,AffineCompact> AffineCompact2f;
+/** \ingroup Geometry_Module */
+typedef Transform<float,3,AffineCompact> AffineCompact3f;
+/** \ingroup Geometry_Module */
+typedef Transform<double,2,AffineCompact> AffineCompact2d;
+/** \ingroup Geometry_Module */
+typedef Transform<double,3,AffineCompact> AffineCompact3d;
+
+/** \ingroup Geometry_Module */
+typedef Transform<float,2,Projective> Projective2f;
+/** \ingroup Geometry_Module */
+typedef Transform<float,3,Projective> Projective3f;
+/** \ingroup Geometry_Module */
+typedef Transform<double,2,Projective> Projective2d;
+/** \ingroup Geometry_Module */
+typedef Transform<double,3,Projective> Projective3d;
+
+/**************************
+*** Optional QT support ***
+**************************/
+
+#ifdef EIGEN_QT_SUPPORT
+/** Initializes \c *this from a QMatrix assuming the dimension is 2.
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode,int Options>
+Transform<Scalar,Dim,Mode,Options>::Transform(const QMatrix& other)
+{
+  check_template_params();
+  *this = other;
+}
+
+/** Set \c *this from a QMatrix assuming the dimension is 2.
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode,int Options>
+Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)
+{
+  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  if (Mode == int(AffineCompact))
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy();
+  else
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy(),
+                0, 0, 1;
+  return *this;
+}
+
+/** \returns a QMatrix from \c *this assuming the dimension is 2.
+  *
+  * \warning this conversion might loss data if \c *this is not affine
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+QMatrix Transform<Scalar,Dim,Mode,Options>::toQMatrix(void) const
+{
+  check_template_params();
+  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  return QMatrix(m_matrix.coeff(0,0), m_matrix.coeff(1,0),
+                 m_matrix.coeff(0,1), m_matrix.coeff(1,1),
+                 m_matrix.coeff(0,2), m_matrix.coeff(1,2));
+}
+
+/** Initializes \c *this from a QTransform assuming the dimension is 2.
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode,int Options>
+Transform<Scalar,Dim,Mode,Options>::Transform(const QTransform& other)
+{
+  check_template_params();
+  *this = other;
+}
+
+/** Set \c *this from a QTransform assuming the dimension is 2.
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QTransform& other)
+{
+  check_template_params();
+  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  if (Mode == int(AffineCompact))
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy();
+  else
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy(),
+                other.m13(), other.m23(), other.m33();
+  return *this;
+}
+
+/** \returns a QTransform from \c *this assuming the dimension is 2.
+  *
+  * This function is available only if the token EIGEN_QT_SUPPORT is defined.
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
+{
+  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  if (Mode == int(AffineCompact))
+    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0),
+                      m_matrix.coeff(0,1), m_matrix.coeff(1,1),
+                      m_matrix.coeff(0,2), m_matrix.coeff(1,2));
+  else
+    return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(2,0),
+                      m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(2,1),
+                      m_matrix.coeff(0,2), m_matrix.coeff(1,2), m_matrix.coeff(2,2));
+}
+#endif
+
+/*********************
+*** Procedural API ***
+*********************/
+
+/** Applies on the right the non uniform scale transformation represented
+  * by the vector \a other to \c *this and returns a reference to \c *this.
+  * \sa prescale()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  linearExt().noalias() = (linearExt() * other.asDiagonal());
+  return *this;
+}
+
+/** Applies on the right a uniform scale of a factor \a c to \c *this
+  * and returns a reference to \c *this.
+  * \sa prescale(Scalar)
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
+{
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  linearExt() *= s;
+  return *this;
+}
+
+/** Applies on the left the non uniform scale transformation represented
+  * by the vector \a other to \c *this and returns a reference to \c *this.
+  * \sa scale()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  affine().noalias() = (other.asDiagonal() * affine());
+  return *this;
+}
+
+/** Applies on the left a uniform scale of a factor \a c to \c *this
+  * and returns a reference to \c *this.
+  * \sa scale(Scalar)
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
+{
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  m_matrix.template topRows<Dim>() *= s;
+  return *this;
+}
+
+/** Applies on the right the translation matrix represented by the vector \a other
+  * to \c *this and returns a reference to \c *this.
+  * \sa pretranslate()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
+  translationExt() += linearExt() * other;
+  return *this;
+}
+
+/** Applies on the left the translation matrix represented by the vector \a other
+  * to \c *this and returns a reference to \c *this.
+  * \sa translate()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
+  if(int(Mode)==int(Projective))
+    affine() += other * m_matrix.row(Dim);
+  else
+    translation() += other;
+  return *this;
+}
+
+/** Applies on the right the rotation represented by the rotation \a rotation
+  * to \c *this and returns a reference to \c *this.
+  *
+  * The template parameter \a RotationType is the type of the rotation which
+  * must be known by internal::toRotationMatrix<>.
+  *
+  * Natively supported types includes:
+  *   - any scalar (2D),
+  *   - a Dim x Dim matrix expression,
+  *   - a Quaternion (3D),
+  *   - a AngleAxis (3D)
+  *
+  * This mechanism is easily extendable to support user types such as Euler angles,
+  * or a pair of Quaternion for 4D rotations.
+  *
+  * \sa rotate(Scalar), class Quaternion, class AngleAxis, prerotate(RotationType)
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename RotationType>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
+{
+  linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);
+  return *this;
+}
+
+/** Applies on the left the rotation represented by the rotation \a rotation
+  * to \c *this and returns a reference to \c *this.
+  *
+  * See rotate() for further details.
+  *
+  * \sa rotate()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename RotationType>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
+{
+  m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)
+                                         * m_matrix.template block<Dim,HDim>(0,0);
+  return *this;
+}
+
+/** Applies on the right the shear transformation represented
+  * by the vector \a other to \c *this and returns a reference to \c *this.
+  * \warning 2D only.
+  * \sa preshear()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
+{
+  EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  VectorType tmp = linear().col(0)*sy + linear().col(1);
+  linear() << linear().col(0) + linear().col(1)*sx, tmp;
+  return *this;
+}
+
+/** Applies on the left the shear transformation represented
+  * by the vector \a other to \c *this and returns a reference to \c *this.
+  * \warning 2D only.
+  * \sa shear()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
+{
+  EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
+  EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
+  m_matrix.template block<Dim,HDim>(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block<Dim,HDim>(0,0);
+  return *this;
+}
+
+/******************************************************
+*** Scaling, Translation and Rotation compatibility ***
+******************************************************/
+
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
+{
+  linear().setIdentity();
+  translation() = t.vector();
+  makeAffine();
+  return *this;
+}
+
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
+{
+  Transform res = *this;
+  res.translate(t.vector());
+  return res;
+}
+
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
+{
+  m_matrix.setZero();
+  linear().diagonal().fill(s.factor());
+  makeAffine();
+  return *this;
+}
+
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
+{
+  linear() = internal::toRotationMatrix<Scalar,Dim>(r);
+  translation().setZero();
+  makeAffine();
+  return *this;
+}
+
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename Derived>
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
+{
+  Transform res = *this;
+  res.rotate(r.derived());
+  return res;
+}
+
+/************************
+*** Special functions ***
+************************/
+
+/** \returns the rotation part of the transformation
+  *
+  *
+  * \svd_module
+  *
+  * \sa computeRotationScaling(), computeScalingRotation(), class SVD
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
+Transform<Scalar,Dim,Mode,Options>::rotation() const
+{
+  LinearMatrixType result;
+  computeRotationScaling(&result, (LinearMatrixType*)0);
+  return result;
+}
+
+
+/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being
+  * not necessarily positive.
+  *
+  * If either pointer is zero, the corresponding computation is skipped.
+  *
+  *
+  *
+  * \svd_module
+  *
+  * \sa computeScalingRotation(), rotation(), class SVD
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename RotationMatrixType, typename ScalingMatrixType>
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
+{
+  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
+
+  Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
+  VectorType sv(svd.singularValues());
+  sv.coeffRef(0) *= x;
+  if(scaling) scaling->lazyAssign(svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint());
+  if(rotation)
+  {
+    LinearMatrixType m(svd.matrixU());
+    m.col(0) /= x;
+    rotation->lazyAssign(m * svd.matrixV().adjoint());
+  }
+}
+
+/** decomposes the linear part of the transformation as a product scaling x rotation, the scaling being
+  * not necessarily positive.
+  *
+  * If either pointer is zero, the corresponding computation is skipped.
+  *
+  *
+  *
+  * \svd_module
+  *
+  * \sa computeRotationScaling(), rotation(), class SVD
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename ScalingMatrixType, typename RotationMatrixType>
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
+{
+  JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
+
+  Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1
+  VectorType sv(svd.singularValues());
+  sv.coeffRef(0) *= x;
+  if(scaling) scaling->lazyAssign(svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint());
+  if(rotation)
+  {
+    LinearMatrixType m(svd.matrixU());
+    m.col(0) /= x;
+    rotation->lazyAssign(m * svd.matrixV().adjoint());
+  }
+}
+
+/** Convenient method to set \c *this from a position, orientation and scale
+  * of a 3D object.
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
+Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
+  const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)
+{
+  linear() = internal::toRotationMatrix<Scalar,Dim>(orientation);
+  linear() *= scale.asDiagonal();
+  translation() = position;
+  makeAffine();
+  return *this;
+}
+
+namespace internal {
+
+template<int Mode>
+struct transform_make_affine
+{
+  template<typename MatrixType>
+  EIGEN_DEVICE_FUNC static void run(MatrixType &mat)
+  {
+    static const int Dim = MatrixType::ColsAtCompileTime-1;
+    mat.template block<1,Dim>(Dim,0).setZero();
+    mat.coeffRef(Dim,Dim) = typename MatrixType::Scalar(1);
+  }
+};
+
+template<>
+struct transform_make_affine<AffineCompact>
+{
+  template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }
+};
+    
+// selector needed to avoid taking the inverse of a 3x4 matrix
+template<typename TransformType, int Mode=TransformType::Mode>
+struct projective_transform_inverse
+{
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)
+  {}
+};
+
+template<typename TransformType>
+struct projective_transform_inverse<TransformType, Projective>
+{
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)
+  {
+    res.matrix() = m.matrix().inverse();
+  }
+};
+
+} // end namespace internal
+
+
+/**
+  *
+  * \returns the inverse transformation according to some given knowledge
+  * on \c *this.
+  *
+  * \param hint allows to optimize the inversion process when the transformation
+  * is known to be not a general transformation (optional). The possible values are:
+  *  - #Projective if the transformation is not necessarily affine, i.e., if the
+  *    last row is not guaranteed to be [0 ... 0 1]
+  *  - #Affine if the last row can be assumed to be [0 ... 0 1]
+  *  - #Isometry if the transformation is only a concatenations of translations
+  *    and rotations.
+  *  The default is the template class parameter \c Mode.
+  *
+  * \warning unless \a traits is always set to NoShear or NoScaling, this function
+  * requires the generic inverse method of MatrixBase defined in the LU module. If
+  * you forget to include this module, then you will get hard to debug linking errors.
+  *
+  * \sa MatrixBase::inverse()
+  */
+template<typename Scalar, int Dim, int Mode, int Options>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>
+Transform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const
+{
+  Transform res;
+  if (hint == Projective)
+  {
+    internal::projective_transform_inverse<Transform>::run(*this, res);
+  }
+  else
+  {
+    if (hint == Isometry)
+    {
+      res.matrix().template topLeftCorner<Dim,Dim>() = linear().transpose();
+    }
+    else if(hint&Affine)
+    {
+      res.matrix().template topLeftCorner<Dim,Dim>() = linear().inverse();
+    }
+    else
+    {
+      eigen_assert(false && "Invalid transform traits in Transform::Inverse");
+    }
+    // translation and remaining parts
+    res.matrix().template topRightCorner<Dim,1>()
+      = - res.matrix().template topLeftCorner<Dim,Dim>() * translation();
+    res.makeAffine(); // we do need this, because in the beginning res is uninitialized
+  }
+  return res;
+}
+
+namespace internal {
+
+/*****************************************************
+*** Specializations of take affine part            ***
+*****************************************************/
+
+template<typename TransformType> struct transform_take_affine_part {
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef typename TransformType::AffinePart AffinePart;
+  typedef typename TransformType::ConstAffinePart ConstAffinePart;
+  static inline AffinePart run(MatrixType& m)
+  { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }
+  static inline ConstAffinePart run(const MatrixType& m)
+  { return m.template block<TransformType::Dim,TransformType::HDim>(0,0); }
+};
+
+template<typename Scalar, int Dim, int Options>
+struct transform_take_affine_part<Transform<Scalar,Dim,AffineCompact, Options> > {
+  typedef typename Transform<Scalar,Dim,AffineCompact,Options>::MatrixType MatrixType;
+  static inline MatrixType& run(MatrixType& m) { return m; }
+  static inline const MatrixType& run(const MatrixType& m) { return m; }
+};
+
+/*****************************************************
+*** Specializations of construct from matrix       ***
+*****************************************************/
+
+template<typename Other, int Mode, int Options, int Dim, int HDim>
+struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,Dim>
+{
+  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  {
+    transform->linear() = other;
+    transform->translation().setZero();
+    transform->makeAffine();
+  }
+};
+
+template<typename Other, int Mode, int Options, int Dim, int HDim>
+struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, Dim,HDim>
+{
+  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  {
+    transform->affine() = other;
+    transform->makeAffine();
+  }
+};
+
+template<typename Other, int Mode, int Options, int Dim, int HDim>
+struct transform_construct_from_matrix<Other, Mode,Options,Dim,HDim, HDim,HDim>
+{
+  static inline void run(Transform<typename Other::Scalar,Dim,Mode,Options> *transform, const Other& other)
+  { transform->matrix() = other; }
+};
+
+template<typename Other, int Options, int Dim, int HDim>
+struct transform_construct_from_matrix<Other, AffineCompact,Options,Dim,HDim, HDim,HDim>
+{
+  static inline void run(Transform<typename Other::Scalar,Dim,AffineCompact,Options> *transform, const Other& other)
+  { transform->matrix() = other.template block<Dim,HDim>(0,0); }
+};
+
+/**********************************************************
+***   Specializations of operator* with rhs EigenBase   ***
+**********************************************************/
+
+template<int LhsMode,int RhsMode>
+struct transform_product_result
+{
+  enum 
+  { 
+    Mode =
+      (LhsMode == (int)Projective    || RhsMode == (int)Projective    ) ? Projective :
+      (LhsMode == (int)Affine        || RhsMode == (int)Affine        ) ? Affine :
+      (LhsMode == (int)AffineCompact || RhsMode == (int)AffineCompact ) ? AffineCompact :
+      (LhsMode == (int)Isometry      || RhsMode == (int)Isometry      ) ? Isometry : Projective
+  };
+};
+
+template< typename TransformType, typename MatrixType, int RhsCols>
+struct transform_right_product_impl< TransformType, MatrixType, 0, RhsCols>
+{
+  typedef typename MatrixType::PlainObject ResultType;
+
+  static EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)
+  {
+    return T.matrix() * other;
+  }
+};
+
+template< typename TransformType, typename MatrixType, int RhsCols>
+struct transform_right_product_impl< TransformType, MatrixType, 1, RhsCols>
+{
+  enum { 
+    Dim = TransformType::Dim, 
+    HDim = TransformType::HDim,
+    OtherRows = MatrixType::RowsAtCompileTime,
+    OtherCols = MatrixType::ColsAtCompileTime
+  };
+
+  typedef typename MatrixType::PlainObject ResultType;
+
+  static EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)
+  {
+    EIGEN_STATIC_ASSERT(OtherRows==HDim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
+
+    typedef Block<ResultType, Dim, OtherCols, int(MatrixType::RowsAtCompileTime)==Dim> TopLeftLhs;
+
+    ResultType res(other.rows(),other.cols());
+    TopLeftLhs(res, 0, 0, Dim, other.cols()).noalias() = T.affine() * other;
+    res.row(OtherRows-1) = other.row(OtherRows-1);
+    
+    return res;
+  }
+};
+
+template< typename TransformType, typename MatrixType, int RhsCols>
+struct transform_right_product_impl< TransformType, MatrixType, 2, RhsCols>
+{
+  enum { 
+    Dim = TransformType::Dim, 
+    HDim = TransformType::HDim,
+    OtherRows = MatrixType::RowsAtCompileTime,
+    OtherCols = MatrixType::ColsAtCompileTime
+  };
+
+  typedef typename MatrixType::PlainObject ResultType;
+
+  static EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)
+  {
+    EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
+
+    typedef Block<ResultType, Dim, OtherCols, true> TopLeftLhs;
+    ResultType res(Replicate<typename TransformType::ConstTranslationPart, 1, OtherCols>(T.translation(),1,other.cols()));
+    TopLeftLhs(res, 0, 0, Dim, other.cols()).noalias() += T.linear() * other;
+
+    return res;
+  }
+};
+
+template< typename TransformType, typename MatrixType >
+struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is a vector of size Dim
+{
+  typedef typename TransformType::MatrixType TransformMatrix;
+  enum {
+    Dim = TransformType::Dim,
+    HDim = TransformType::HDim,
+    OtherRows = MatrixType::RowsAtCompileTime,
+    WorkingRows = EIGEN_PLAIN_ENUM_MIN(TransformMatrix::RowsAtCompileTime,HDim)
+  };
+
+  typedef typename MatrixType::PlainObject ResultType;
+
+  static EIGEN_STRONG_INLINE ResultType run(const TransformType& T, const MatrixType& other)
+  {
+    EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);
+
+    Matrix<typename ResultType::Scalar, Dim+1, 1> rhs;
+    rhs.template head<Dim>() = other; rhs[Dim] = typename ResultType::Scalar(1);
+    Matrix<typename ResultType::Scalar, WorkingRows, 1> res(T.matrix() * rhs);
+    return res.template head<Dim>();
+  }
+};
+
+/**********************************************************
+***   Specializations of operator* with lhs EigenBase   ***
+**********************************************************/
+
+// generic HDim x HDim matrix * T => Projective
+template<typename Other,int Mode, int Options, int Dim, int HDim>
+struct transform_left_product_impl<Other,Mode,Options,Dim,HDim, HDim,HDim>
+{
+  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef Transform<typename Other::Scalar,Dim,Projective,Options> ResultType;
+  static ResultType run(const Other& other,const TransformType& tr)
+  { return ResultType(other * tr.matrix()); }
+};
+
+// generic HDim x HDim matrix * AffineCompact => Projective
+template<typename Other, int Options, int Dim, int HDim>
+struct transform_left_product_impl<Other,AffineCompact,Options,Dim,HDim, HDim,HDim>
+{
+  typedef Transform<typename Other::Scalar,Dim,AffineCompact,Options> TransformType;
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef Transform<typename Other::Scalar,Dim,Projective,Options> ResultType;
+  static ResultType run(const Other& other,const TransformType& tr)
+  {
+    ResultType res;
+    res.matrix().noalias() = other.template block<HDim,Dim>(0,0) * tr.matrix();
+    res.matrix().col(Dim) += other.col(Dim);
+    return res;
+  }
+};
+
+// affine matrix * T
+template<typename Other,int Mode, int Options, int Dim, int HDim>
+struct transform_left_product_impl<Other,Mode,Options,Dim,HDim, Dim,HDim>
+{
+  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef TransformType ResultType;
+  static ResultType run(const Other& other,const TransformType& tr)
+  {
+    ResultType res;
+    res.affine().noalias() = other * tr.matrix();
+    res.matrix().row(Dim) = tr.matrix().row(Dim);
+    return res;
+  }
+};
+
+// affine matrix * AffineCompact
+template<typename Other, int Options, int Dim, int HDim>
+struct transform_left_product_impl<Other,AffineCompact,Options,Dim,HDim, Dim,HDim>
+{
+  typedef Transform<typename Other::Scalar,Dim,AffineCompact,Options> TransformType;
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef TransformType ResultType;
+  static ResultType run(const Other& other,const TransformType& tr)
+  {
+    ResultType res;
+    res.matrix().noalias() = other.template block<Dim,Dim>(0,0) * tr.matrix();
+    res.translation() += other.col(Dim);
+    return res;
+  }
+};
+
+// linear matrix * T
+template<typename Other,int Mode, int Options, int Dim, int HDim>
+struct transform_left_product_impl<Other,Mode,Options,Dim,HDim, Dim,Dim>
+{
+  typedef Transform<typename Other::Scalar,Dim,Mode,Options> TransformType;
+  typedef typename TransformType::MatrixType MatrixType;
+  typedef TransformType ResultType;
+  static ResultType run(const Other& other, const TransformType& tr)
+  {
+    TransformType res;
+    if(Mode!=int(AffineCompact))
+      res.matrix().row(Dim) = tr.matrix().row(Dim);
+    res.matrix().template topRows<Dim>().noalias()
+      = other * tr.matrix().template topRows<Dim>();
+    return res;
+  }
+};
+
+/**********************************************************
+*** Specializations of operator* with another Transform ***
+**********************************************************/
+
+template<typename Scalar, int Dim, int LhsMode, int LhsOptions, int RhsMode, int RhsOptions>
+struct transform_transform_product_impl<Transform<Scalar,Dim,LhsMode,LhsOptions>,Transform<Scalar,Dim,RhsMode,RhsOptions>,false >
+{
+  enum { ResultMode = transform_product_result<LhsMode,RhsMode>::Mode };
+  typedef Transform<Scalar,Dim,LhsMode,LhsOptions> Lhs;
+  typedef Transform<Scalar,Dim,RhsMode,RhsOptions> Rhs;
+  typedef Transform<Scalar,Dim,ResultMode,LhsOptions> ResultType;
+  static ResultType run(const Lhs& lhs, const Rhs& rhs)
+  {
+    ResultType res;
+    res.linear() = lhs.linear() * rhs.linear();
+    res.translation() = lhs.linear() * rhs.translation() + lhs.translation();
+    res.makeAffine();
+    return res;
+  }
+};
+
+template<typename Scalar, int Dim, int LhsMode, int LhsOptions, int RhsMode, int RhsOptions>
+struct transform_transform_product_impl<Transform<Scalar,Dim,LhsMode,LhsOptions>,Transform<Scalar,Dim,RhsMode,RhsOptions>,true >
+{
+  typedef Transform<Scalar,Dim,LhsMode,LhsOptions> Lhs;
+  typedef Transform<Scalar,Dim,RhsMode,RhsOptions> Rhs;
+  typedef Transform<Scalar,Dim,Projective> ResultType;
+  static ResultType run(const Lhs& lhs, const Rhs& rhs)
+  {
+    return ResultType( lhs.matrix() * rhs.matrix() );
+  }
+};
+
+template<typename Scalar, int Dim, int LhsOptions, int RhsOptions>
+struct transform_transform_product_impl<Transform<Scalar,Dim,AffineCompact,LhsOptions>,Transform<Scalar,Dim,Projective,RhsOptions>,true >
+{
+  typedef Transform<Scalar,Dim,AffineCompact,LhsOptions> Lhs;
+  typedef Transform<Scalar,Dim,Projective,RhsOptions> Rhs;
+  typedef Transform<Scalar,Dim,Projective> ResultType;
+  static ResultType run(const Lhs& lhs, const Rhs& rhs)
+  {
+    ResultType res;
+    res.matrix().template topRows<Dim>() = lhs.matrix() * rhs.matrix();
+    res.matrix().row(Dim) = rhs.matrix().row(Dim);
+    return res;
+  }
+};
+
+template<typename Scalar, int Dim, int LhsOptions, int RhsOptions>
+struct transform_transform_product_impl<Transform<Scalar,Dim,Projective,LhsOptions>,Transform<Scalar,Dim,AffineCompact,RhsOptions>,true >
+{
+  typedef Transform<Scalar,Dim,Projective,LhsOptions> Lhs;
+  typedef Transform<Scalar,Dim,AffineCompact,RhsOptions> Rhs;
+  typedef Transform<Scalar,Dim,Projective> ResultType;
+  static ResultType run(const Lhs& lhs, const Rhs& rhs)
+  {
+    ResultType res(lhs.matrix().template leftCols<Dim>() * rhs.matrix());
+    res.matrix().col(Dim) += lhs.matrix().col(Dim);
+    return res;
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSFORM_H
diff --git a/third-party/Eigen/src/Geometry/Translation.h b/third-party/Eigen/src/Geometry/Translation.h
new file mode 100644
index 00000000..0e99ce68
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Translation.h
@@ -0,0 +1,202 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TRANSLATION_H
+#define EIGEN_TRANSLATION_H
+
+namespace Eigen { 
+
+/** \geometry_module \ingroup Geometry_Module
+  *
+  * \class Translation
+  *
+  * \brief Represents a translation transformation
+  *
+  * \tparam _Scalar the scalar type, i.e., the type of the coefficients.
+  * \tparam _Dim the  dimension of the space, can be a compile time value or Dynamic
+  *
+  * \note This class is not aimed to be used to store a translation transformation,
+  * but rather to make easier the constructions and updates of Transform objects.
+  *
+  * \sa class Scaling, class Transform
+  */
+template<typename _Scalar, int _Dim>
+class Translation
+{
+public:
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim)
+  /** dimension of the space */
+  enum { Dim = _Dim };
+  /** the scalar type of the coefficients */
+  typedef _Scalar Scalar;
+  /** corresponding vector type */
+  typedef Matrix<Scalar,Dim,1> VectorType;
+  /** corresponding linear transformation matrix type */
+  typedef Matrix<Scalar,Dim,Dim> LinearMatrixType;
+  /** corresponding affine transformation type */
+  typedef Transform<Scalar,Dim,Affine> AffineTransformType;
+  /** corresponding isometric transformation type */
+  typedef Transform<Scalar,Dim,Isometry> IsometryTransformType;
+
+protected:
+
+  VectorType m_coeffs;
+
+public:
+
+  /** Default constructor without initialization. */
+  EIGEN_DEVICE_FUNC Translation() {}
+  /**  */
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)
+  {
+    eigen_assert(Dim==2);
+    m_coeffs.x() = sx;
+    m_coeffs.y() = sy;
+  }
+  /**  */
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
+  {
+    eigen_assert(Dim==3);
+    m_coeffs.x() = sx;
+    m_coeffs.y() = sy;
+    m_coeffs.z() = sz;
+  }
+  /** Constructs and initialize the translation transformation from a vector of translation coefficients */
+  EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
+
+  /** \brief Retruns the x-translation by value. **/
+  EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
+  /** \brief Retruns the y-translation by value. **/
+  EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
+  /** \brief Retruns the z-translation by value. **/
+  EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
+
+  /** \brief Retruns the x-translation as a reference. **/
+  EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
+  /** \brief Retruns the y-translation as a reference. **/
+  EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
+  /** \brief Retruns the z-translation as a reference. **/
+  EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
+
+  EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
+
+  EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }
+
+  /** Concatenates two translation */
+  EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const
+  { return Translation(m_coeffs + other.m_coeffs); }
+
+  /** Concatenates a translation and a uniform scaling */
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
+
+  /** Concatenates a translation and a linear transformation */
+  template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
+
+  /** Concatenates a translation and a rotation */
+  template<typename Derived>
+  EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
+  { return *this * IsometryTransformType(r); }
+
+  /** \returns the concatenation of a linear transformation \a l with the translation \a t */
+  // its a nightmare to define a templated friend function outside its declaration
+  template<typename OtherDerived> friend
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
+  {
+    AffineTransformType res;
+    res.matrix().setZero();
+    res.linear() = linear.derived();
+    res.translation() = linear.derived() * t.m_coeffs;
+    res.matrix().row(Dim).setZero();
+    res(Dim,Dim) = Scalar(1);
+    return res;
+  }
+
+  /** Concatenates a translation and a transformation */
+  template<int Mode, int Options>
+  EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
+  {
+    Transform<Scalar,Dim,Mode> res = t;
+    res.pretranslate(m_coeffs);
+    return res;
+  }
+
+  /** Applies translation to vector */
+  template<typename Derived>
+  inline typename internal::enable_if<Derived::IsVectorAtCompileTime,VectorType>::type
+  operator* (const MatrixBase<Derived>& vec) const
+  { return m_coeffs + vec.derived(); }
+
+  /** \returns the inverse translation (opposite) */
+  Translation inverse() const { return Translation(-m_coeffs); }
+
+  static const Translation Identity() { return Translation(VectorType::Zero()); }
+
+  /** \returns \c *this with scalar type casted to \a NewScalarType
+    *
+    * Note that if \a NewScalarType is equal to the current scalar type of \c *this
+    * then this function smartly returns a const reference to \c *this.
+    */
+  template<typename NewScalarType>
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
+  { return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
+
+  /** Copy constructor with scalar type conversion */
+  template<typename OtherScalarType>
+  EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
+  { m_coeffs = other.vector().template cast<Scalar>(); }
+
+  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+    * determined by \a prec.
+    *
+    * \sa MatrixBase::isApprox() */
+  EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  { return m_coeffs.isApprox(other.m_coeffs, prec); }
+
+};
+
+/** \addtogroup Geometry_Module */
+//@{
+typedef Translation<float, 2> Translation2f;
+typedef Translation<double,2> Translation2d;
+typedef Translation<float, 3> Translation3f;
+typedef Translation<double,3> Translation3d;
+//@}
+
+template<typename Scalar, int Dim>
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
+Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
+{
+  AffineTransformType res;
+  res.matrix().setZero();
+  res.linear().diagonal().fill(other.factor());
+  res.translation() = m_coeffs;
+  res(Dim,Dim) = Scalar(1);
+  return res;
+}
+
+template<typename Scalar, int Dim>
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
+Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
+{
+  AffineTransformType res;
+  res.matrix().setZero();
+  res.linear() = linear.derived();
+  res.translation() = m_coeffs;
+  res.matrix().row(Dim).setZero();
+  res(Dim,Dim) = Scalar(1);
+  return res;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_TRANSLATION_H
diff --git a/third-party/Eigen/src/Geometry/Umeyama.h b/third-party/Eigen/src/Geometry/Umeyama.h
new file mode 100644
index 00000000..6b755008
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/Umeyama.h
@@ -0,0 +1,166 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Hauke Heibel <hauke.heibel@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_UMEYAMA_H
+#define EIGEN_UMEYAMA_H
+
+// This file requires the user to include 
+// * Eigen/Core
+// * Eigen/LU 
+// * Eigen/SVD
+// * Eigen/Array
+
+namespace Eigen { 
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+// These helpers are required since it allows to use mixed types as parameters
+// for the Umeyama. The problem with mixed parameters is that the return type
+// cannot trivially be deduced when float and double types are mixed.
+namespace internal {
+
+// Compile time return type deduction for different MatrixBase types.
+// Different means here different alignment and parameters but the same underlying
+// real scalar type.
+template<typename MatrixType, typename OtherMatrixType>
+struct umeyama_transform_matrix_type
+{
+  enum {
+    MinRowsAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(MatrixType::RowsAtCompileTime, OtherMatrixType::RowsAtCompileTime),
+
+    // When possible we want to choose some small fixed size value since the result
+    // is likely to fit on the stack. So here, EIGEN_SIZE_MIN_PREFER_DYNAMIC is not what we want.
+    HomogeneousDimension = int(MinRowsAtCompileTime) == Dynamic ? Dynamic : int(MinRowsAtCompileTime)+1
+  };
+
+  typedef Matrix<typename traits<MatrixType>::Scalar,
+    HomogeneousDimension,
+    HomogeneousDimension,
+    AutoAlign | (traits<MatrixType>::Flags & RowMajorBit ? RowMajor : ColMajor),
+    HomogeneousDimension,
+    HomogeneousDimension
+  > type;
+};
+
+}
+
+#endif
+
+/**
+* \geometry_module \ingroup Geometry_Module
+*
+* \brief Returns the transformation between two point sets.
+*
+* The algorithm is based on:
+* "Least-squares estimation of transformation parameters between two point patterns",
+* Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573
+*
+* It estimates parameters \f$ c, \mathbf{R}, \f$ and \f$ \mathbf{t} \f$ such that
+* \f{align*}
+*   \frac{1}{n} \sum_{i=1}^n \vert\vert y_i - (c\mathbf{R}x_i + \mathbf{t}) \vert\vert_2^2
+* \f}
+* is minimized.
+*
+* The algorithm is based on the analysis of the covariance matrix
+* \f$ \Sigma_{\mathbf{x}\mathbf{y}} \in \mathbb{R}^{d \times d} \f$
+* of the input point sets \f$ \mathbf{x} \f$ and \f$ \mathbf{y} \f$ where 
+* \f$d\f$ is corresponding to the dimension (which is typically small).
+* The analysis is involving the SVD having a complexity of \f$O(d^3)\f$
+* though the actual computational effort lies in the covariance
+* matrix computation which has an asymptotic lower bound of \f$O(dm)\f$ when 
+* the input point sets have dimension \f$d \times m\f$.
+*
+* Currently the method is working only for floating point matrices.
+*
+* \todo Should the return type of umeyama() become a Transform?
+*
+* \param src Source points \f$ \mathbf{x} = \left( x_1, \hdots, x_n \right) \f$.
+* \param dst Destination points \f$ \mathbf{y} = \left( y_1, \hdots, y_n \right) \f$.
+* \param with_scaling Sets \f$ c=1 \f$ when <code>false</code> is passed.
+* \return The homogeneous transformation 
+* \f{align*}
+*   T = \begin{bmatrix} c\mathbf{R} & \mathbf{t} \\ \mathbf{0} & 1 \end{bmatrix}
+* \f}
+* minimizing the residual above. This transformation is always returned as an 
+* Eigen::Matrix.
+*/
+template <typename Derived, typename OtherDerived>
+typename internal::umeyama_transform_matrix_type<Derived, OtherDerived>::type
+umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, bool with_scaling = true)
+{
+  typedef typename internal::umeyama_transform_matrix_type<Derived, OtherDerived>::type TransformationMatrixType;
+  typedef typename internal::traits<TransformationMatrixType>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsComplex, NUMERIC_TYPE_MUST_BE_REAL)
+  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename internal::traits<OtherDerived>::Scalar>::value),
+    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+
+  enum { Dimension = EIGEN_SIZE_MIN_PREFER_DYNAMIC(Derived::RowsAtCompileTime, OtherDerived::RowsAtCompileTime) };
+
+  typedef Matrix<Scalar, Dimension, 1> VectorType;
+  typedef Matrix<Scalar, Dimension, Dimension> MatrixType;
+  typedef typename internal::plain_matrix_type_row_major<Derived>::type RowMajorMatrixType;
+
+  const Index m = src.rows(); // dimension
+  const Index n = src.cols(); // number of measurements
+
+  // required for demeaning ...
+  const RealScalar one_over_n = RealScalar(1) / static_cast<RealScalar>(n);
+
+  // computation of mean
+  const VectorType src_mean = src.rowwise().sum() * one_over_n;
+  const VectorType dst_mean = dst.rowwise().sum() * one_over_n;
+
+  // demeaning of src and dst points
+  const RowMajorMatrixType src_demean = src.colwise() - src_mean;
+  const RowMajorMatrixType dst_demean = dst.colwise() - dst_mean;
+
+  // Eq. (36)-(37)
+  const Scalar src_var = src_demean.rowwise().squaredNorm().sum() * one_over_n;
+
+  // Eq. (38)
+  const MatrixType sigma = one_over_n * dst_demean * src_demean.transpose();
+
+  JacobiSVD<MatrixType> svd(sigma, ComputeFullU | ComputeFullV);
+
+  // Initialize the resulting transformation with an identity matrix...
+  TransformationMatrixType Rt = TransformationMatrixType::Identity(m+1,m+1);
+
+  // Eq. (39)
+  VectorType S = VectorType::Ones(m);
+
+  if  ( svd.matrixU().determinant() * svd.matrixV().determinant() < 0 )
+    S(m-1) = -1;
+
+  // Eq. (40) and (43)
+  Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose();
+
+  if (with_scaling)
+  {
+    // Eq. (42)
+    const Scalar c = Scalar(1)/src_var * svd.singularValues().dot(S);
+
+    // Eq. (41)
+    Rt.col(m).head(m) = dst_mean;
+    Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean;
+    Rt.block(0,0,m,m) *= c;
+  }
+  else
+  {
+    Rt.col(m).head(m) = dst_mean;
+    Rt.col(m).head(m).noalias() -= Rt.topLeftCorner(m,m)*src_mean;
+  }
+
+  return Rt;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_UMEYAMA_H
diff --git a/third-party/Eigen/src/Geometry/arch/Geometry_SSE.h b/third-party/Eigen/src/Geometry/arch/Geometry_SSE.h
new file mode 100644
index 00000000..f68cab58
--- /dev/null
+++ b/third-party/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -0,0 +1,161 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com>
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GEOMETRY_SSE_H
+#define EIGEN_GEOMETRY_SSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<class Derived, class OtherDerived>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
+{
+  enum {
+    AAlignment = traits<Derived>::Alignment,
+    BAlignment = traits<OtherDerived>::Alignment,
+    ResAlignment = traits<Quaternion<float> >::Alignment
+  };
+  static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
+  {
+    Quaternion<float> res;
+    const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
+    __m128 a = _a.coeffs().template packet<AAlignment>(0);
+    __m128 b = _b.coeffs().template packet<BAlignment>(0);
+    __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
+    __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
+    pstoret<float,Packet4f,ResAlignment>(
+              &res.x(),
+              _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
+                                    _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
+                                               vec4f_swizzle1(b,1,2,0,0))),
+                         _mm_xor_ps(mask,_mm_add_ps(s1,s2))));
+    
+    return res;
+  }
+};
+
+template<class Derived>
+struct quat_conj<Architecture::SSE, Derived, float>
+{
+  enum {
+    ResAlignment = traits<Quaternion<float> >::Alignment
+  };
+  static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
+  {
+    Quaternion<float> res;
+    const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
+    pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
+    return res;
+  }
+};
+
+
+template<typename VectorLhs,typename VectorRhs>
+struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
+{
+  enum {
+    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
+  };
+  static inline typename plain_matrix_type<VectorLhs>::type
+  run(const VectorLhs& lhs, const VectorRhs& rhs)
+  {
+    __m128 a = lhs.template packet<traits<VectorLhs>::Alignment>(0);
+    __m128 b = rhs.template packet<traits<VectorRhs>::Alignment>(0);
+    __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
+    __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
+    typename plain_matrix_type<VectorLhs>::type res;
+    pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
+    return res;
+  }
+};
+
+
+
+
+template<class Derived, class OtherDerived>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
+{
+  enum {
+    BAlignment = traits<OtherDerived>::Alignment,
+    ResAlignment = traits<Quaternion<double> >::Alignment
+  };
+
+  static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
+  {
+  const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+
+  Quaternion<double> res;
+
+  const double* a = _a.coeffs().data();
+  Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
+  Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
+  Packet2d a_xx = pset1<Packet2d>(a[0]);
+  Packet2d a_yy = pset1<Packet2d>(a[1]);
+  Packet2d a_zz = pset1<Packet2d>(a[2]);
+  Packet2d a_ww = pset1<Packet2d>(a[3]);
+
+  // two temporaries:
+  Packet2d t1, t2;
+
+  /*
+   * t1 = ww*xy + yy*zw
+   * t2 = zz*xy - xx*zw
+   * res.xy = t1 +/- swap(t2)
+   */
+  t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw));
+  t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
+#ifdef EIGEN_VECTORIZE_SSE3
+  EIGEN_UNUSED_VARIABLE(mask)
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
+#else
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
+#endif
+  
+  /*
+   * t1 = ww*zw - yy*xy
+   * t2 = zz*zw + xx*xy
+   * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2)
+   */
+  t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy));
+  t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
+#ifdef EIGEN_VECTORIZE_SSE3
+  EIGEN_UNUSED_VARIABLE(mask)
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
+#else
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
+#endif
+
+  return res;
+}
+};
+
+template<class Derived>
+struct quat_conj<Architecture::SSE, Derived, double>
+{
+  enum {
+    ResAlignment = traits<Quaternion<double> >::Alignment
+  };
+  static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
+  {
+    Quaternion<double> res;
+    const __m128d mask0 = _mm_setr_pd(-0.,-0.);
+    const __m128d mask2 = _mm_setr_pd(-0.,0.);
+    pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
+    pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
+    return res;
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GEOMETRY_SSE_H
diff --git a/third-party/Eigen/src/Householder/BlockHouseholder.h b/third-party/Eigen/src/Householder/BlockHouseholder.h
new file mode 100644
index 00000000..01a7ed18
--- /dev/null
+++ b/third-party/Eigen/src/Householder/BlockHouseholder.h
@@ -0,0 +1,103 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Vincent Lejeune
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BLOCK_HOUSEHOLDER_H
+#define EIGEN_BLOCK_HOUSEHOLDER_H
+
+// This file contains some helper function to deal with block householder reflectors
+
+namespace Eigen { 
+
+namespace internal {
+  
+/** \internal */
+// template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
+// void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
+// {
+//   typedef typename VectorsType::Scalar Scalar;
+//   const Index nbVecs = vectors.cols();
+//   eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
+// 
+//   for(Index i = 0; i < nbVecs; i++)
+//   {
+//     Index rs = vectors.rows() - i;
+//     // Warning, note that hCoeffs may alias with vectors.
+//     // It is then necessary to copy it before modifying vectors(i,i). 
+//     typename CoeffsType::Scalar h = hCoeffs(i);
+//     // This hack permits to pass trough nested Block<> and Transpose<> expressions.
+//     Scalar *Vii_ptr = const_cast<Scalar*>(vectors.data() + vectors.outerStride()*i + vectors.innerStride()*i);
+//     Scalar Vii = *Vii_ptr;
+//     *Vii_ptr = Scalar(1);
+//     triFactor.col(i).head(i).noalias() = -h * vectors.block(i, 0, rs, i).adjoint()
+//                                        * vectors.col(i).tail(rs);
+//     *Vii_ptr = Vii;
+//     // FIXME add .noalias() once the triangular product can work inplace
+//     triFactor.col(i).head(i) = triFactor.block(0,0,i,i).template triangularView<Upper>()
+//                              * triFactor.col(i).head(i);
+//     triFactor(i,i) = hCoeffs(i);
+//   }
+// }
+
+/** \internal */
+// This variant avoid modifications in vectors
+template<typename TriangularFactorType,typename VectorsType,typename CoeffsType>
+void make_block_householder_triangular_factor(TriangularFactorType& triFactor, const VectorsType& vectors, const CoeffsType& hCoeffs)
+{
+  const Index nbVecs = vectors.cols();
+  eigen_assert(triFactor.rows() == nbVecs && triFactor.cols() == nbVecs && vectors.rows()>=nbVecs);
+
+  for(Index i = nbVecs-1; i >=0 ; --i)
+  {
+    Index rs = vectors.rows() - i - 1;
+    Index rt = nbVecs-i-1;
+
+    if(rt>0)
+    {
+      triFactor.row(i).tail(rt).noalias() = -hCoeffs(i) * vectors.col(i).tail(rs).adjoint()
+                                                        * vectors.bottomRightCorner(rs, rt).template triangularView<UnitLower>();
+            
+      // FIXME add .noalias() once the triangular product can work inplace
+      triFactor.row(i).tail(rt) = triFactor.row(i).tail(rt) * triFactor.bottomRightCorner(rt,rt).template triangularView<Upper>();
+      
+    }
+    triFactor(i,i) = hCoeffs(i);
+  }
+}
+
+/** \internal
+  * if forward then perform   mat = H0 * H1 * H2 * mat
+  * otherwise perform         mat = H2 * H1 * H0 * mat
+  */
+template<typename MatrixType,typename VectorsType,typename CoeffsType>
+void apply_block_householder_on_the_left(MatrixType& mat, const VectorsType& vectors, const CoeffsType& hCoeffs, bool forward)
+{
+  enum { TFactorSize = MatrixType::ColsAtCompileTime };
+  Index nbVecs = vectors.cols();
+  Matrix<typename MatrixType::Scalar, TFactorSize, TFactorSize, RowMajor> T(nbVecs,nbVecs);
+  
+  if(forward) make_block_householder_triangular_factor(T, vectors, hCoeffs);
+  else        make_block_householder_triangular_factor(T, vectors, hCoeffs.conjugate());  
+  const TriangularView<const VectorsType, UnitLower> V(vectors);
+
+  // A -= V T V^* A
+  Matrix<typename MatrixType::Scalar,VectorsType::ColsAtCompileTime,MatrixType::ColsAtCompileTime,
+         (VectorsType::MaxColsAtCompileTime==1 && MatrixType::MaxColsAtCompileTime!=1)?RowMajor:ColMajor,
+         VectorsType::MaxColsAtCompileTime,MatrixType::MaxColsAtCompileTime> tmp = V.adjoint() * mat;
+  // FIXME add .noalias() once the triangular product can work inplace
+  if(forward) tmp = T.template triangularView<Upper>()           * tmp;
+  else        tmp = T.template triangularView<Upper>().adjoint() * tmp;
+  mat.noalias() -= V * tmp;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BLOCK_HOUSEHOLDER_H
diff --git a/third-party/Eigen/src/Householder/Householder.h b/third-party/Eigen/src/Householder/Householder.h
new file mode 100644
index 00000000..80de2c30
--- /dev/null
+++ b/third-party/Eigen/src/Householder/Householder.h
@@ -0,0 +1,172 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HOUSEHOLDER_H
+#define EIGEN_HOUSEHOLDER_H
+
+namespace Eigen { 
+
+namespace internal {
+template<int n> struct decrement_size
+{
+  enum {
+    ret = n==Dynamic ? n : n-1
+  };
+};
+}
+
+/** Computes the elementary reflector H such that:
+  * \f$ H *this = [ beta 0 ... 0]^T \f$
+  * where the transformation H is:
+  * \f$ H = I - tau v v^*\f$
+  * and the vector v is:
+  * \f$ v^T = [1 essential^T] \f$
+  *
+  * The essential part of the vector \c v is stored in *this.
+  * 
+  * On output:
+  * \param tau the scaling factor of the Householder transformation
+  * \param beta the result of H * \c *this
+  *
+  * \sa MatrixBase::makeHouseholder(), MatrixBase::applyHouseholderOnTheLeft(),
+  *     MatrixBase::applyHouseholderOnTheRight()
+  */
+template<typename Derived>
+void MatrixBase<Derived>::makeHouseholderInPlace(Scalar& tau, RealScalar& beta)
+{
+  VectorBlock<Derived, internal::decrement_size<Base::SizeAtCompileTime>::ret> essentialPart(derived(), 1, size()-1);
+  makeHouseholder(essentialPart, tau, beta);
+}
+
+/** Computes the elementary reflector H such that:
+  * \f$ H *this = [ beta 0 ... 0]^T \f$
+  * where the transformation H is:
+  * \f$ H = I - tau v v^*\f$
+  * and the vector v is:
+  * \f$ v^T = [1 essential^T] \f$
+  *
+  * On output:
+  * \param essential the essential part of the vector \c v
+  * \param tau the scaling factor of the Householder transformation
+  * \param beta the result of H * \c *this
+  *
+  * \sa MatrixBase::makeHouseholderInPlace(), MatrixBase::applyHouseholderOnTheLeft(),
+  *     MatrixBase::applyHouseholderOnTheRight()
+  */
+template<typename Derived>
+template<typename EssentialPart>
+void MatrixBase<Derived>::makeHouseholder(
+  EssentialPart& essential,
+  Scalar& tau,
+  RealScalar& beta) const
+{
+  using std::sqrt;
+  using numext::conj;
+  
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(EssentialPart)
+  VectorBlock<const Derived, EssentialPart::SizeAtCompileTime> tail(derived(), 1, size()-1);
+  
+  RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();
+  Scalar c0 = coeff(0);
+  const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
+
+  if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol)
+  {
+    tau = RealScalar(0);
+    beta = numext::real(c0);
+    essential.setZero();
+  }
+  else
+  {
+    beta = sqrt(numext::abs2(c0) + tailSqNorm);
+    if (numext::real(c0)>=RealScalar(0))
+      beta = -beta;
+    essential = tail / (c0 - beta);
+    tau = conj((beta - c0) / beta);
+  }
+}
+
+/** Apply the elementary reflector H given by
+  * \f$ H = I - tau v v^*\f$
+  * with
+  * \f$ v^T = [1 essential^T] \f$
+  * from the left to a vector or matrix.
+  *
+  * On input:
+  * \param essential the essential part of the vector \c v
+  * \param tau the scaling factor of the Householder transformation
+  * \param workspace a pointer to working space with at least
+  *                  this->cols() * essential.size() entries
+  *
+  * \sa MatrixBase::makeHouseholder(), MatrixBase::makeHouseholderInPlace(), 
+  *     MatrixBase::applyHouseholderOnTheRight()
+  */
+template<typename Derived>
+template<typename EssentialPart>
+void MatrixBase<Derived>::applyHouseholderOnTheLeft(
+  const EssentialPart& essential,
+  const Scalar& tau,
+  Scalar* workspace)
+{
+  if(rows() == 1)
+  {
+    *this *= Scalar(1)-tau;
+  }
+  else if(tau!=Scalar(0))
+  {
+    Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
+    Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
+    tmp.noalias() = essential.adjoint() * bottom;
+    tmp += this->row(0);
+    this->row(0) -= tau * tmp;
+    bottom.noalias() -= tau * essential * tmp;
+  }
+}
+
+/** Apply the elementary reflector H given by
+  * \f$ H = I - tau v v^*\f$
+  * with
+  * \f$ v^T = [1 essential^T] \f$
+  * from the right to a vector or matrix.
+  *
+  * On input:
+  * \param essential the essential part of the vector \c v
+  * \param tau the scaling factor of the Householder transformation
+  * \param workspace a pointer to working space with at least
+  *                  this->cols() * essential.size() entries
+  *
+  * \sa MatrixBase::makeHouseholder(), MatrixBase::makeHouseholderInPlace(), 
+  *     MatrixBase::applyHouseholderOnTheLeft()
+  */
+template<typename Derived>
+template<typename EssentialPart>
+void MatrixBase<Derived>::applyHouseholderOnTheRight(
+  const EssentialPart& essential,
+  const Scalar& tau,
+  Scalar* workspace)
+{
+  if(cols() == 1)
+  {
+    *this *= Scalar(1)-tau;
+  }
+  else if(tau!=Scalar(0))
+  {
+    Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
+    Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
+    tmp.noalias() = right * essential.conjugate();
+    tmp += this->col(0);
+    this->col(0) -= tau * tmp;
+    right.noalias() -= tau * tmp * essential.transpose();
+  }
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_HOUSEHOLDER_H
diff --git a/third-party/Eigen/src/Householder/HouseholderSequence.h b/third-party/Eigen/src/Householder/HouseholderSequence.h
new file mode 100644
index 00000000..3ce0a693
--- /dev/null
+++ b/third-party/Eigen/src/Householder/HouseholderSequence.h
@@ -0,0 +1,470 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_HOUSEHOLDER_SEQUENCE_H
+#define EIGEN_HOUSEHOLDER_SEQUENCE_H
+
+namespace Eigen { 
+
+/** \ingroup Householder_Module
+  * \householder_module
+  * \class HouseholderSequence
+  * \brief Sequence of Householder reflections acting on subspaces with decreasing size
+  * \tparam VectorsType type of matrix containing the Householder vectors
+  * \tparam CoeffsType  type of vector containing the Householder coefficients
+  * \tparam Side        either OnTheLeft (the default) or OnTheRight
+  *
+  * This class represents a product sequence of Householder reflections where the first Householder reflection
+  * acts on the whole space, the second Householder reflection leaves the one-dimensional subspace spanned by
+  * the first unit vector invariant, the third Householder reflection leaves the two-dimensional subspace
+  * spanned by the first two unit vectors invariant, and so on up to the last reflection which leaves all but
+  * one dimensions invariant and acts only on the last dimension. Such sequences of Householder reflections
+  * are used in several algorithms to zero out certain parts of a matrix. Indeed, the methods
+  * HessenbergDecomposition::matrixQ(), Tridiagonalization::matrixQ(), HouseholderQR::householderQ(),
+  * and ColPivHouseholderQR::householderQ() all return a %HouseholderSequence.
+  *
+  * More precisely, the class %HouseholderSequence represents an \f$ n \times n \f$ matrix \f$ H \f$ of the
+  * form \f$ H = \prod_{i=0}^{n-1} H_i \f$ where the i-th Householder reflection is \f$ H_i = I - h_i v_i
+  * v_i^* \f$. The i-th Householder coefficient \f$ h_i \f$ is a scalar and the i-th Householder vector \f$
+  * v_i \f$ is a vector of the form
+  * \f[ 
+  * v_i = [\underbrace{0, \ldots, 0}_{i-1\mbox{ zeros}}, 1, \underbrace{*, \ldots,*}_{n-i\mbox{ arbitrary entries}} ]. 
+  * \f]
+  * The last \f$ n-i \f$ entries of \f$ v_i \f$ are called the essential part of the Householder vector.
+  *
+  * Typical usages are listed below, where H is a HouseholderSequence:
+  * \code
+  * A.applyOnTheRight(H);             // A = A * H
+  * A.applyOnTheLeft(H);              // A = H * A
+  * A.applyOnTheRight(H.adjoint());   // A = A * H^*
+  * A.applyOnTheLeft(H.adjoint());    // A = H^* * A
+  * MatrixXd Q = H;                   // conversion to a dense matrix
+  * \endcode
+  * In addition to the adjoint, you can also apply the inverse (=adjoint), the transpose, and the conjugate operators.
+  *
+  * See the documentation for HouseholderSequence(const VectorsType&, const CoeffsType&) for an example.
+  *
+  * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+
+namespace internal {
+
+template<typename VectorsType, typename CoeffsType, int Side>
+struct traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
+{
+  typedef typename VectorsType::Scalar Scalar;
+  typedef typename VectorsType::StorageIndex StorageIndex;
+  typedef typename VectorsType::StorageKind StorageKind;
+  enum {
+    RowsAtCompileTime = Side==OnTheLeft ? traits<VectorsType>::RowsAtCompileTime
+                                        : traits<VectorsType>::ColsAtCompileTime,
+    ColsAtCompileTime = RowsAtCompileTime,
+    MaxRowsAtCompileTime = Side==OnTheLeft ? traits<VectorsType>::MaxRowsAtCompileTime
+                                           : traits<VectorsType>::MaxColsAtCompileTime,
+    MaxColsAtCompileTime = MaxRowsAtCompileTime,
+    Flags = 0
+  };
+};
+
+struct HouseholderSequenceShape {};
+
+template<typename VectorsType, typename CoeffsType, int Side>
+struct evaluator_traits<HouseholderSequence<VectorsType,CoeffsType,Side> >
+  : public evaluator_traits_base<HouseholderSequence<VectorsType,CoeffsType,Side> >
+{
+  typedef HouseholderSequenceShape Shape;
+};
+
+template<typename VectorsType, typename CoeffsType, int Side>
+struct hseq_side_dependent_impl
+{
+  typedef Block<const VectorsType, Dynamic, 1> EssentialVectorType;
+  typedef HouseholderSequence<VectorsType, CoeffsType, OnTheLeft> HouseholderSequenceType;
+  static inline const EssentialVectorType essentialVector(const HouseholderSequenceType& h, Index k)
+  {
+    Index start = k+1+h.m_shift;
+    return Block<const VectorsType,Dynamic,1>(h.m_vectors, start, k, h.rows()-start, 1);
+  }
+};
+
+template<typename VectorsType, typename CoeffsType>
+struct hseq_side_dependent_impl<VectorsType, CoeffsType, OnTheRight>
+{
+  typedef Transpose<Block<const VectorsType, 1, Dynamic> > EssentialVectorType;
+  typedef HouseholderSequence<VectorsType, CoeffsType, OnTheRight> HouseholderSequenceType;
+  static inline const EssentialVectorType essentialVector(const HouseholderSequenceType& h, Index k)
+  {
+    Index start = k+1+h.m_shift;
+    return Block<const VectorsType,1,Dynamic>(h.m_vectors, k, start, 1, h.rows()-start).transpose();
+  }
+};
+
+template<typename OtherScalarType, typename MatrixType> struct matrix_type_times_scalar_type
+{
+  typedef typename ScalarBinaryOpTraits<OtherScalarType, typename MatrixType::Scalar>::ReturnType
+    ResultScalar;
+  typedef Matrix<ResultScalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
+                 0, MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime> Type;
+};
+
+} // end namespace internal
+
+template<typename VectorsType, typename CoeffsType, int Side> class HouseholderSequence
+  : public EigenBase<HouseholderSequence<VectorsType,CoeffsType,Side> >
+{
+    typedef typename internal::hseq_side_dependent_impl<VectorsType,CoeffsType,Side>::EssentialVectorType EssentialVectorType;
+  
+  public:
+    enum {
+      RowsAtCompileTime = internal::traits<HouseholderSequence>::RowsAtCompileTime,
+      ColsAtCompileTime = internal::traits<HouseholderSequence>::ColsAtCompileTime,
+      MaxRowsAtCompileTime = internal::traits<HouseholderSequence>::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = internal::traits<HouseholderSequence>::MaxColsAtCompileTime
+    };
+    typedef typename internal::traits<HouseholderSequence>::Scalar Scalar;
+
+    typedef HouseholderSequence<
+      typename internal::conditional<NumTraits<Scalar>::IsComplex,
+        typename internal::remove_all<typename VectorsType::ConjugateReturnType>::type,
+        VectorsType>::type,
+      typename internal::conditional<NumTraits<Scalar>::IsComplex,
+        typename internal::remove_all<typename CoeffsType::ConjugateReturnType>::type,
+        CoeffsType>::type,
+      Side
+    > ConjugateReturnType;
+
+    /** \brief Constructor.
+      * \param[in]  v      %Matrix containing the essential parts of the Householder vectors
+      * \param[in]  h      Vector containing the Householder coefficients
+      *
+      * Constructs the Householder sequence with coefficients given by \p h and vectors given by \p v. The
+      * i-th Householder coefficient \f$ h_i \f$ is given by \p h(i) and the essential part of the i-th
+      * Householder vector \f$ v_i \f$ is given by \p v(k,i) with \p k > \p i (the subdiagonal part of the
+      * i-th column). If \p v has fewer columns than rows, then the Householder sequence contains as many
+      * Householder reflections as there are columns.
+      *
+      * \note The %HouseholderSequence object stores \p v and \p h by reference.
+      *
+      * Example: \include HouseholderSequence_HouseholderSequence.cpp
+      * Output: \verbinclude HouseholderSequence_HouseholderSequence.out
+      *
+      * \sa setLength(), setShift()
+      */
+    HouseholderSequence(const VectorsType& v, const CoeffsType& h)
+      : m_vectors(v), m_coeffs(h), m_trans(false), m_length(v.diagonalSize()),
+        m_shift(0)
+    {
+    }
+
+    /** \brief Copy constructor. */
+    HouseholderSequence(const HouseholderSequence& other)
+      : m_vectors(other.m_vectors),
+        m_coeffs(other.m_coeffs),
+        m_trans(other.m_trans),
+        m_length(other.m_length),
+        m_shift(other.m_shift)
+    {
+    }
+
+    /** \brief Number of rows of transformation viewed as a matrix.
+      * \returns Number of rows 
+      * \details This equals the dimension of the space that the transformation acts on.
+      */
+    Index rows() const { return Side==OnTheLeft ? m_vectors.rows() : m_vectors.cols(); }
+
+    /** \brief Number of columns of transformation viewed as a matrix.
+      * \returns Number of columns
+      * \details This equals the dimension of the space that the transformation acts on.
+      */
+    Index cols() const { return rows(); }
+
+    /** \brief Essential part of a Householder vector.
+      * \param[in]  k  Index of Householder reflection
+      * \returns    Vector containing non-trivial entries of k-th Householder vector
+      *
+      * This function returns the essential part of the Householder vector \f$ v_i \f$. This is a vector of
+      * length \f$ n-i \f$ containing the last \f$ n-i \f$ entries of the vector
+      * \f[ 
+      * v_i = [\underbrace{0, \ldots, 0}_{i-1\mbox{ zeros}}, 1, \underbrace{*, \ldots,*}_{n-i\mbox{ arbitrary entries}} ]. 
+      * \f]
+      * The index \f$ i \f$ equals \p k + shift(), corresponding to the k-th column of the matrix \p v
+      * passed to the constructor.
+      *
+      * \sa setShift(), shift()
+      */
+    const EssentialVectorType essentialVector(Index k) const
+    {
+      eigen_assert(k >= 0 && k < m_length);
+      return internal::hseq_side_dependent_impl<VectorsType,CoeffsType,Side>::essentialVector(*this, k);
+    }
+
+    /** \brief %Transpose of the Householder sequence. */
+    HouseholderSequence transpose() const
+    {
+      return HouseholderSequence(*this).setTrans(!m_trans);
+    }
+
+    /** \brief Complex conjugate of the Householder sequence. */
+    ConjugateReturnType conjugate() const
+    {
+      return ConjugateReturnType(m_vectors.conjugate(), m_coeffs.conjugate())
+             .setTrans(m_trans)
+             .setLength(m_length)
+             .setShift(m_shift);
+    }
+
+    /** \brief Adjoint (conjugate transpose) of the Householder sequence. */
+    ConjugateReturnType adjoint() const
+    {
+      return conjugate().setTrans(!m_trans);
+    }
+
+    /** \brief Inverse of the Householder sequence (equals the adjoint). */
+    ConjugateReturnType inverse() const { return adjoint(); }
+
+    /** \internal */
+    template<typename DestType> inline void evalTo(DestType& dst) const
+    {
+      Matrix<Scalar, DestType::RowsAtCompileTime, 1,
+             AutoAlign|ColMajor, DestType::MaxRowsAtCompileTime, 1> workspace(rows());
+      evalTo(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    void evalTo(Dest& dst, Workspace& workspace) const
+    {
+      workspace.resize(rows());
+      Index vecs = m_length;
+      if(internal::is_same_dense(dst,m_vectors))
+      {
+        // in-place
+        dst.diagonal().setOnes();
+        dst.template triangularView<StrictlyUpper>().setZero();
+        for(Index k = vecs-1; k >= 0; --k)
+        {
+          Index cornerSize = rows() - k - m_shift;
+          if(m_trans)
+            dst.bottomRightCorner(cornerSize, cornerSize)
+               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), workspace.data());
+          else
+            dst.bottomRightCorner(cornerSize, cornerSize)
+               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), workspace.data());
+
+          // clear the off diagonal vector
+          dst.col(k).tail(rows()-k-1).setZero();
+        }
+        // clear the remaining columns if needed
+        for(Index k = 0; k<cols()-vecs ; ++k)
+          dst.col(k).tail(rows()-k-1).setZero();
+      }
+      else
+      {
+        dst.setIdentity(rows(), rows());
+        for(Index k = vecs-1; k >= 0; --k)
+        {
+          Index cornerSize = rows() - k - m_shift;
+          if(m_trans)
+            dst.bottomRightCorner(cornerSize, cornerSize)
+               .applyHouseholderOnTheRight(essentialVector(k), m_coeffs.coeff(k), &workspace.coeffRef(0));
+          else
+            dst.bottomRightCorner(cornerSize, cornerSize)
+               .applyHouseholderOnTheLeft(essentialVector(k), m_coeffs.coeff(k), &workspace.coeffRef(0));
+        }
+      }
+    }
+
+    /** \internal */
+    template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
+    {
+      Matrix<Scalar,1,Dest::RowsAtCompileTime,RowMajor,1,Dest::MaxRowsAtCompileTime> workspace(dst.rows());
+      applyThisOnTheRight(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    inline void applyThisOnTheRight(Dest& dst, Workspace& workspace) const
+    {
+      workspace.resize(dst.rows());
+      for(Index k = 0; k < m_length; ++k)
+      {
+        Index actual_k = m_trans ? m_length-k-1 : k;
+        dst.rightCols(rows()-m_shift-actual_k)
+           .applyHouseholderOnTheRight(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+      }
+    }
+
+    /** \internal */
+    template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
+    {
+      Matrix<Scalar,1,Dest::ColsAtCompileTime,RowMajor,1,Dest::MaxColsAtCompileTime> workspace;
+      applyThisOnTheLeft(dst, workspace);
+    }
+
+    /** \internal */
+    template<typename Dest, typename Workspace>
+    inline void applyThisOnTheLeft(Dest& dst, Workspace& workspace) const
+    {
+      const Index BlockSize = 48;
+      // if the entries are large enough, then apply the reflectors by block
+      if(m_length>=BlockSize && dst.cols()>1)
+      {
+        for(Index i = 0; i < m_length; i+=BlockSize)
+        {
+          Index end = m_trans ? (std::min)(m_length,i+BlockSize) : m_length-i;
+          Index k = m_trans ? i : (std::max)(Index(0),end-BlockSize);
+          Index bs = end-k;
+          Index start = k + m_shift;
+          
+          typedef Block<typename internal::remove_all<VectorsType>::type,Dynamic,Dynamic> SubVectorsType;
+          SubVectorsType sub_vecs1(m_vectors.const_cast_derived(), Side==OnTheRight ? k : start,
+                                                                   Side==OnTheRight ? start : k,
+                                                                   Side==OnTheRight ? bs : m_vectors.rows()-start,
+                                                                   Side==OnTheRight ? m_vectors.cols()-start : bs);
+          typename internal::conditional<Side==OnTheRight, Transpose<SubVectorsType>, SubVectorsType&>::type sub_vecs(sub_vecs1);
+          Block<Dest,Dynamic,Dynamic> sub_dst(dst,dst.rows()-rows()+m_shift+k,0, rows()-m_shift-k,dst.cols());
+          apply_block_householder_on_the_left(sub_dst, sub_vecs, m_coeffs.segment(k, bs), !m_trans);
+        }
+      }
+      else
+      {
+        workspace.resize(dst.cols());
+        for(Index k = 0; k < m_length; ++k)
+        {
+          Index actual_k = m_trans ? k : m_length-k-1;
+          dst.bottomRows(rows()-m_shift-actual_k)
+            .applyHouseholderOnTheLeft(essentialVector(actual_k), m_coeffs.coeff(actual_k), workspace.data());
+        }
+      }
+    }
+
+    /** \brief Computes the product of a Householder sequence with a matrix.
+      * \param[in]  other  %Matrix being multiplied.
+      * \returns    Expression object representing the product.
+      *
+      * This function computes \f$ HM \f$ where \f$ H \f$ is the Householder sequence represented by \p *this
+      * and \f$ M \f$ is the matrix \p other.
+      */
+    template<typename OtherDerived>
+    typename internal::matrix_type_times_scalar_type<Scalar, OtherDerived>::Type operator*(const MatrixBase<OtherDerived>& other) const
+    {
+      typename internal::matrix_type_times_scalar_type<Scalar, OtherDerived>::Type
+        res(other.template cast<typename internal::matrix_type_times_scalar_type<Scalar,OtherDerived>::ResultScalar>());
+      applyThisOnTheLeft(res);
+      return res;
+    }
+
+    template<typename _VectorsType, typename _CoeffsType, int _Side> friend struct internal::hseq_side_dependent_impl;
+
+    /** \brief Sets the length of the Householder sequence.
+      * \param [in]  length  New value for the length.
+      *
+      * By default, the length \f$ n \f$ of the Householder sequence \f$ H = H_0 H_1 \ldots H_{n-1} \f$ is set
+      * to the number of columns of the matrix \p v passed to the constructor, or the number of rows if that
+      * is smaller. After this function is called, the length equals \p length.
+      *
+      * \sa length()
+      */
+    HouseholderSequence& setLength(Index length)
+    {
+      m_length = length;
+      return *this;
+    }
+
+    /** \brief Sets the shift of the Householder sequence.
+      * \param [in]  shift  New value for the shift.
+      *
+      * By default, a %HouseholderSequence object represents \f$ H = H_0 H_1 \ldots H_{n-1} \f$ and the i-th
+      * column of the matrix \p v passed to the constructor corresponds to the i-th Householder
+      * reflection. After this function is called, the object represents \f$ H = H_{\mathrm{shift}}
+      * H_{\mathrm{shift}+1} \ldots H_{n-1} \f$ and the i-th column of \p v corresponds to the (shift+i)-th
+      * Householder reflection.
+      *
+      * \sa shift()
+      */
+    HouseholderSequence& setShift(Index shift)
+    {
+      m_shift = shift;
+      return *this;
+    }
+
+    Index length() const { return m_length; }  /**< \brief Returns the length of the Householder sequence. */
+    Index shift() const { return m_shift; }    /**< \brief Returns the shift of the Householder sequence. */
+
+    /* Necessary for .adjoint() and .conjugate() */
+    template <typename VectorsType2, typename CoeffsType2, int Side2> friend class HouseholderSequence;
+
+  protected:
+
+    /** \brief Sets the transpose flag.
+      * \param [in]  trans  New value of the transpose flag.
+      *
+      * By default, the transpose flag is not set. If the transpose flag is set, then this object represents 
+      * \f$ H^T = H_{n-1}^T \ldots H_1^T H_0^T \f$ instead of \f$ H = H_0 H_1 \ldots H_{n-1} \f$.
+      *
+      * \sa trans()
+      */
+    HouseholderSequence& setTrans(bool trans)
+    {
+      m_trans = trans;
+      return *this;
+    }
+
+    bool trans() const { return m_trans; }     /**< \brief Returns the transpose flag. */
+
+    typename VectorsType::Nested m_vectors;
+    typename CoeffsType::Nested m_coeffs;
+    bool m_trans;
+    Index m_length;
+    Index m_shift;
+};
+
+/** \brief Computes the product of a matrix with a Householder sequence.
+  * \param[in]  other  %Matrix being multiplied.
+  * \param[in]  h      %HouseholderSequence being multiplied.
+  * \returns    Expression object representing the product.
+  *
+  * This function computes \f$ MH \f$ where \f$ M \f$ is the matrix \p other and \f$ H \f$ is the
+  * Householder sequence represented by \p h.
+  */
+template<typename OtherDerived, typename VectorsType, typename CoeffsType, int Side>
+typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::Type operator*(const MatrixBase<OtherDerived>& other, const HouseholderSequence<VectorsType,CoeffsType,Side>& h)
+{
+  typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::Type
+    res(other.template cast<typename internal::matrix_type_times_scalar_type<typename VectorsType::Scalar,OtherDerived>::ResultScalar>());
+  h.applyThisOnTheRight(res);
+  return res;
+}
+
+/** \ingroup Householder_Module \householder_module
+  * \brief Convenience function for constructing a Householder sequence. 
+  * \returns A HouseholderSequence constructed from the specified arguments.
+  */
+template<typename VectorsType, typename CoeffsType>
+HouseholderSequence<VectorsType,CoeffsType> householderSequence(const VectorsType& v, const CoeffsType& h)
+{
+  return HouseholderSequence<VectorsType,CoeffsType,OnTheLeft>(v, h);
+}
+
+/** \ingroup Householder_Module \householder_module
+  * \brief Convenience function for constructing a Householder sequence. 
+  * \returns A HouseholderSequence constructed from the specified arguments.
+  * \details This function differs from householderSequence() in that the template argument \p OnTheSide of
+  * the constructed HouseholderSequence is set to OnTheRight, instead of the default OnTheLeft.
+  */
+template<typename VectorsType, typename CoeffsType>
+HouseholderSequence<VectorsType,CoeffsType,OnTheRight> rightHouseholderSequence(const VectorsType& v, const CoeffsType& h)
+{
+  return HouseholderSequence<VectorsType,CoeffsType,OnTheRight>(v, h);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_HOUSEHOLDER_SEQUENCE_H
diff --git a/third-party/Eigen/src/Jacobi/Jacobi.h b/third-party/Eigen/src/Jacobi/Jacobi.h
new file mode 100644
index 00000000..1998c632
--- /dev/null
+++ b/third-party/Eigen/src/Jacobi/Jacobi.h
@@ -0,0 +1,462 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_JACOBI_H
+#define EIGEN_JACOBI_H
+
+namespace Eigen { 
+
+/** \ingroup Jacobi_Module
+  * \jacobi_module
+  * \class JacobiRotation
+  * \brief Rotation given by a cosine-sine pair.
+  *
+  * This class represents a Jacobi or Givens rotation.
+  * This is a 2D rotation in the plane \c J of angle \f$ \theta \f$ defined by
+  * its cosine \c c and sine \c s as follow:
+  * \f$ J = \left ( \begin{array}{cc} c & \overline s \\ -s  & \overline c \end{array} \right ) \f$
+  *
+  * You can apply the respective counter-clockwise rotation to a column vector \c v by
+  * applying its adjoint on the left: \f$ v = J^* v \f$ that translates to the following Eigen code:
+  * \code
+  * v.applyOnTheLeft(J.adjoint());
+  * \endcode
+  *
+  * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+template<typename Scalar> class JacobiRotation
+{
+  public:
+    typedef typename NumTraits<Scalar>::Real RealScalar;
+
+    /** Default constructor without any initialization. */
+    JacobiRotation() {}
+
+    /** Construct a planar rotation from a cosine-sine pair (\a c, \c s). */
+    JacobiRotation(const Scalar& c, const Scalar& s) : m_c(c), m_s(s) {}
+
+    Scalar& c() { return m_c; }
+    Scalar c() const { return m_c; }
+    Scalar& s() { return m_s; }
+    Scalar s() const { return m_s; }
+
+    /** Concatenates two planar rotation */
+    JacobiRotation operator*(const JacobiRotation& other)
+    {
+      using numext::conj;
+      return JacobiRotation(m_c * other.m_c - conj(m_s) * other.m_s,
+                            conj(m_c * conj(other.m_s) + conj(m_s) * conj(other.m_c)));
+    }
+
+    /** Returns the transposed transformation */
+    JacobiRotation transpose() const { using numext::conj; return JacobiRotation(m_c, -conj(m_s)); }
+
+    /** Returns the adjoint transformation */
+    JacobiRotation adjoint() const { using numext::conj; return JacobiRotation(conj(m_c), -m_s); }
+
+    template<typename Derived>
+    bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q);
+    bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
+
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0);
+
+  protected:
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type);
+    void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type);
+
+    Scalar m_c, m_s;
+};
+
+/** Makes \c *this as a Jacobi rotation \a J such that applying \a J on both the right and left sides of the selfadjoint 2x2 matrix
+  * \f$ B = \left ( \begin{array}{cc} x & y \\ \overline y & z \end{array} \right )\f$ yields a diagonal matrix \f$ A = J^* B J \f$
+  *
+  * \sa MatrixBase::makeJacobi(const MatrixBase<Derived>&, Index, Index), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+template<typename Scalar>
+bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z)
+{
+  using std::sqrt;
+  using std::abs;
+  RealScalar deno = RealScalar(2)*abs(y);
+  if(deno < (std::numeric_limits<RealScalar>::min)())
+  {
+    m_c = Scalar(1);
+    m_s = Scalar(0);
+    return false;
+  }
+  else
+  {
+    RealScalar tau = (x-z)/deno;
+    RealScalar w = sqrt(numext::abs2(tau) + RealScalar(1));
+    RealScalar t;
+    if(tau>RealScalar(0))
+    {
+      t = RealScalar(1) / (tau + w);
+    }
+    else
+    {
+      t = RealScalar(1) / (tau - w);
+    }
+    RealScalar sign_t = t > RealScalar(0) ? RealScalar(1) : RealScalar(-1);
+    RealScalar n = RealScalar(1) / sqrt(numext::abs2(t)+RealScalar(1));
+    m_s = - sign_t * (numext::conj(y) / abs(y)) * abs(t) * n;
+    m_c = n;
+    return true;
+  }
+}
+
+/** Makes \c *this as a Jacobi rotation \c J such that applying \a J on both the right and left sides of the 2x2 selfadjoint matrix
+  * \f$ B = \left ( \begin{array}{cc} \text{this}_{pp} & \text{this}_{pq} \\ (\text{this}_{pq})^* & \text{this}_{qq} \end{array} \right )\f$ yields
+  * a diagonal matrix \f$ A = J^* B J \f$
+  *
+  * Example: \include Jacobi_makeJacobi.cpp
+  * Output: \verbinclude Jacobi_makeJacobi.out
+  *
+  * \sa JacobiRotation::makeJacobi(RealScalar, Scalar, RealScalar), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+template<typename Scalar>
+template<typename Derived>
+inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Index p, Index q)
+{
+  return makeJacobi(numext::real(m.coeff(p,p)), m.coeff(p,q), numext::real(m.coeff(q,q)));
+}
+
+/** Makes \c *this as a Givens rotation \c G such that applying \f$ G^* \f$ to the left of the vector
+  * \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields:
+  * \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$.
+  *
+  * The value of \a r is returned if \a r is not null (the default is null).
+  * Also note that G is built such that the cosine is always real.
+  *
+  * Example: \include Jacobi_makeGivens.cpp
+  * Output: \verbinclude Jacobi_makeGivens.out
+  *
+  * This function implements the continuous Givens rotation generation algorithm
+  * found in Anderson (2000), Discontinuous Plane Rotations and the Symmetric Eigenvalue Problem.
+  * LAPACK Working Note 150, University of Tennessee, UT-CS-00-454, December 4, 2000.
+  *
+  * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+template<typename Scalar>
+void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)
+{
+  makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
+}
+
+
+// specialization for complexes
+template<typename Scalar>
+void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type)
+{
+  using std::sqrt;
+  using std::abs;
+  using numext::conj;
+  
+  if(q==Scalar(0))
+  {
+    m_c = numext::real(p)<0 ? Scalar(-1) : Scalar(1);
+    m_s = 0;
+    if(r) *r = m_c * p;
+  }
+  else if(p==Scalar(0))
+  {
+    m_c = 0;
+    m_s = -q/abs(q);
+    if(r) *r = abs(q);
+  }
+  else
+  {
+    RealScalar p1 = numext::norm1(p);
+    RealScalar q1 = numext::norm1(q);
+    if(p1>=q1)
+    {
+      Scalar ps = p / p1;
+      RealScalar p2 = numext::abs2(ps);
+      Scalar qs = q / p1;
+      RealScalar q2 = numext::abs2(qs);
+
+      RealScalar u = sqrt(RealScalar(1) + q2/p2);
+      if(numext::real(p)<RealScalar(0))
+        u = -u;
+
+      m_c = Scalar(1)/u;
+      m_s = -qs*conj(ps)*(m_c/p2);
+      if(r) *r = p * u;
+    }
+    else
+    {
+      Scalar ps = p / q1;
+      RealScalar p2 = numext::abs2(ps);
+      Scalar qs = q / q1;
+      RealScalar q2 = numext::abs2(qs);
+
+      RealScalar u = q1 * sqrt(p2 + q2);
+      if(numext::real(p)<RealScalar(0))
+        u = -u;
+
+      p1 = abs(p);
+      ps = p/p1;
+      m_c = p1/u;
+      m_s = -conj(ps) * (q/u);
+      if(r) *r = ps * u;
+    }
+  }
+}
+
+// specialization for reals
+template<typename Scalar>
+void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type)
+{
+  using std::sqrt;
+  using std::abs;
+  if(q==Scalar(0))
+  {
+    m_c = p<Scalar(0) ? Scalar(-1) : Scalar(1);
+    m_s = Scalar(0);
+    if(r) *r = abs(p);
+  }
+  else if(p==Scalar(0))
+  {
+    m_c = Scalar(0);
+    m_s = q<Scalar(0) ? Scalar(1) : Scalar(-1);
+    if(r) *r = abs(q);
+  }
+  else if(abs(p) > abs(q))
+  {
+    Scalar t = q/p;
+    Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+    if(p<Scalar(0))
+      u = -u;
+    m_c = Scalar(1)/u;
+    m_s = -t * m_c;
+    if(r) *r = p * u;
+  }
+  else
+  {
+    Scalar t = p/q;
+    Scalar u = sqrt(Scalar(1) + numext::abs2(t));
+    if(q<Scalar(0))
+      u = -u;
+    m_s = -Scalar(1)/u;
+    m_c = -t * m_s;
+    if(r) *r = q * u;
+  }
+
+}
+
+/****************************************************************************************
+*   Implementation of MatrixBase methods
+****************************************************************************************/
+
+namespace internal {
+/** \jacobi_module
+  * Applies the clock wise 2D rotation \a j to the set of 2D vectors of cordinates \a x and \a y:
+  * \f$ \left ( \begin{array}{cc} x \\ y \end{array} \right )  =  J \left ( \begin{array}{cc} x \\ y \end{array} \right ) \f$
+  *
+  * \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
+  */
+template<typename VectorX, typename VectorY, typename OtherScalar>
+void apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j);
+}
+
+/** \jacobi_module
+  * Applies the rotation in the plane \a j to the rows \a p and \a q of \c *this, i.e., it computes B = J * B,
+  * with \f$ B = \left ( \begin{array}{cc} \text{*this.row}(p) \\ \text{*this.row}(q) \end{array} \right ) \f$.
+  *
+  * \sa class JacobiRotation, MatrixBase::applyOnTheRight(), internal::apply_rotation_in_the_plane()
+  */
+template<typename Derived>
+template<typename OtherScalar>
+inline void MatrixBase<Derived>::applyOnTheLeft(Index p, Index q, const JacobiRotation<OtherScalar>& j)
+{
+  RowXpr x(this->row(p));
+  RowXpr y(this->row(q));
+  internal::apply_rotation_in_the_plane(x, y, j);
+}
+
+/** \ingroup Jacobi_Module
+  * Applies the rotation in the plane \a j to the columns \a p and \a q of \c *this, i.e., it computes B = B * J
+  * with \f$ B = \left ( \begin{array}{cc} \text{*this.col}(p) & \text{*this.col}(q) \end{array} \right ) \f$.
+  *
+  * \sa class JacobiRotation, MatrixBase::applyOnTheLeft(), internal::apply_rotation_in_the_plane()
+  */
+template<typename Derived>
+template<typename OtherScalar>
+inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiRotation<OtherScalar>& j)
+{
+  ColXpr x(this->col(p));
+  ColXpr y(this->col(q));
+  internal::apply_rotation_in_the_plane(x, y, j.transpose());
+}
+
+namespace internal {
+
+template<typename Scalar, typename OtherScalar,
+         int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
+struct apply_rotation_in_the_plane_selector
+{
+  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+  {
+    for(Index i=0; i<size; ++i)
+    {
+      Scalar xi = *x;
+      Scalar yi = *y;
+      *x =  c * xi + numext::conj(s) * yi;
+      *y = -s * xi + numext::conj(c) * yi;
+      x += incrx;
+      y += incry;
+    }
+  }
+};
+
+template<typename Scalar, typename OtherScalar,
+         int SizeAtCompileTime, int MinAlignment>
+struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
+{
+  static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
+  {
+    enum {
+      PacketSize = packet_traits<Scalar>::size,
+      OtherPacketSize = packet_traits<OtherScalar>::size
+    };
+    typedef typename packet_traits<Scalar>::type Packet;
+    typedef typename packet_traits<OtherScalar>::type OtherPacket;
+
+    /*** dynamic-size vectorized paths ***/
+    if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
+    {
+      // both vectors are sequentially stored in memory => vectorization
+      enum { Peeling = 2 };
+
+      Index alignedStart = internal::first_default_aligned(y, size);
+      Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
+
+      const OtherPacket pc = pset1<OtherPacket>(c);
+      const OtherPacket ps = pset1<OtherPacket>(s);
+      conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,false,false> pm;
+
+      for(Index i=0; i<alignedStart; ++i)
+      {
+        Scalar xi = x[i];
+        Scalar yi = y[i];
+        x[i] =  c * xi + numext::conj(s) * yi;
+        y[i] = -s * xi + numext::conj(c) * yi;
+      }
+
+      Scalar* EIGEN_RESTRICT px = x + alignedStart;
+      Scalar* EIGEN_RESTRICT py = y + alignedStart;
+
+      if(internal::first_default_aligned(x, size)==alignedStart)
+      {
+        for(Index i=alignedStart; i<alignedEnd; i+=PacketSize)
+        {
+          Packet xi = pload<Packet>(px);
+          Packet yi = pload<Packet>(py);
+          pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+          px += PacketSize;
+          py += PacketSize;
+        }
+      }
+      else
+      {
+        Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize);
+        for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize)
+        {
+          Packet xi   = ploadu<Packet>(px);
+          Packet xi1  = ploadu<Packet>(px+PacketSize);
+          Packet yi   = pload <Packet>(py);
+          Packet yi1  = pload <Packet>(py+PacketSize);
+          pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
+          pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+          pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
+          px += Peeling*PacketSize;
+          py += Peeling*PacketSize;
+        }
+        if(alignedEnd!=peelingEnd)
+        {
+          Packet xi = ploadu<Packet>(x+peelingEnd);
+          Packet yi = pload <Packet>(y+peelingEnd);
+          pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+          pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+        }
+      }
+
+      for(Index i=alignedEnd; i<size; ++i)
+      {
+        Scalar xi = x[i];
+        Scalar yi = y[i];
+        x[i] =  c * xi + numext::conj(s) * yi;
+        y[i] = -s * xi + numext::conj(c) * yi;
+      }
+    }
+
+    /*** fixed-size vectorized path ***/
+    else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
+    {
+      const OtherPacket pc = pset1<OtherPacket>(c);
+      const OtherPacket ps = pset1<OtherPacket>(s);
+      conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+      conj_helper<OtherPacket,Packet,false,false> pm;
+      Scalar* EIGEN_RESTRICT px = x;
+      Scalar* EIGEN_RESTRICT py = y;
+      for(Index i=0; i<size; i+=PacketSize)
+      {
+        Packet xi = pload<Packet>(px);
+        Packet yi = pload<Packet>(py);
+        pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+        pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+        px += PacketSize;
+        py += PacketSize;
+      }
+    }
+
+    /*** non-vectorized path ***/
+    else
+    {
+      apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
+    }
+  }
+};
+
+template<typename VectorX, typename VectorY, typename OtherScalar>
+void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
+{
+  typedef typename VectorX::Scalar Scalar;
+  const bool Vectorizable =    (VectorX::Flags & VectorY::Flags & PacketAccessBit)
+                            && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
+
+  eigen_assert(xpr_x.size() == xpr_y.size());
+  Index size = xpr_x.size();
+  Index incrx = xpr_x.derived().innerStride();
+  Index incry = xpr_y.derived().innerStride();
+
+  Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
+  Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
+  
+  OtherScalar c = j.c();
+  OtherScalar s = j.s();
+  if (c==OtherScalar(1) && s==OtherScalar(0))
+    return;
+
+  apply_rotation_in_the_plane_selector<
+    Scalar,OtherScalar,
+    VectorX::SizeAtCompileTime,
+    EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
+    Vectorizable>::run(x,incrx,y,incry,size,c,s);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_JACOBI_H
diff --git a/third-party/Eigen/src/LU/Determinant.h b/third-party/Eigen/src/LU/Determinant.h
new file mode 100644
index 00000000..d6a3c1e5
--- /dev/null
+++ b/third-party/Eigen/src/LU/Determinant.h
@@ -0,0 +1,101 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DETERMINANT_H
+#define EIGEN_DETERMINANT_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived>
+inline const typename Derived::Scalar bruteforce_det3_helper
+(const MatrixBase<Derived>& matrix, int a, int b, int c)
+{
+  return matrix.coeff(0,a)
+         * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));
+}
+
+template<typename Derived>
+const typename Derived::Scalar bruteforce_det4_helper
+(const MatrixBase<Derived>& matrix, int j, int k, int m, int n)
+{
+  return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1))
+       * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3));
+}
+
+template<typename Derived,
+         int DeterminantType = Derived::RowsAtCompileTime
+> struct determinant_impl
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0)
+      return typename traits<Derived>::Scalar(1);
+    return m.partialPivLu().determinant();
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 1>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 2>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 3>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return bruteforce_det3_helper(m,0,1,2)
+          - bruteforce_det3_helper(m,1,0,2)
+          + bruteforce_det3_helper(m,2,0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 4>
+{
+  static typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    // trick by Martin Costabel to compute 4x4 det with only 30 muls
+    return bruteforce_det4_helper(m,0,1,2,3)
+          - bruteforce_det4_helper(m,0,2,1,3)
+          + bruteforce_det4_helper(m,0,3,1,2)
+          + bruteforce_det4_helper(m,1,2,0,3)
+          - bruteforce_det4_helper(m,1,3,0,2)
+          + bruteforce_det4_helper(m,2,3,0,1);
+  }
+};
+
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the determinant of this matrix
+  */
+template<typename Derived>
+inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
+  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DETERMINANT_H
diff --git a/third-party/Eigen/src/LU/FullPivLU.h b/third-party/Eigen/src/LU/FullPivLU.h
new file mode 100644
index 00000000..03b6af70
--- /dev/null
+++ b/third-party/Eigen/src/LU/FullPivLU.h
@@ -0,0 +1,891 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LU_H
+#define EIGEN_LU_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
+
+/** \ingroup LU_Module
+  *
+  * \class FullPivLU
+  *
+  * \brief LU decomposition of a matrix with complete pivoting, and related features
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
+  * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
+  * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU
+  * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any
+  * zeros are at the end.
+  *
+  * This decomposition provides the generic approach to solving systems of linear equations, computing
+  * the rank, invertibility, inverse, kernel, and determinant.
+  *
+  * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD
+  * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix,
+  * working with the SVD allows to select the smallest singular values of the matrix, something that
+  * the LU decomposition doesn't see.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(),
+  * permutationP(), permutationQ().
+  *
+  * As an exemple, here is how the original matrix can be retrieved:
+  * \include class_FullPivLU.cpp
+  * Output: \verbinclude class_FullPivLU.out
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
+  */
+template<typename _MatrixType> class FullPivLU
+  : public SolverBase<FullPivLU<_MatrixType> >
+{
+  public:
+    typedef _MatrixType MatrixType;
+    typedef SolverBase<FullPivLU> Base;
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
+    enum {
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;
+    typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;
+    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LU::compute(const MatrixType&).
+      */
+    FullPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa FullPivLU()
+      */
+    FullPivLU(Index rows, Index cols);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      */
+    template<typename InputType>
+    explicit FullPivLU(const EigenBase<InputType>& matrix);
+
+    /** \brief Constructs a LU factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa FullPivLU(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit FullPivLU(EigenBase<InputType>& matrix);
+
+    /** Computes the LU decomposition of the given matrix.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      *
+      * \returns a reference to *this
+      */
+    template<typename InputType>
+    FullPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      computeInPlace();
+      return *this;
+    }
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the number of nonzero pivots in the LU decomposition.
+      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
+      * So that notion isn't really intrinsically interesting, but it is
+      * still useful when implementing algorithms.
+      *
+      * \sa rank()
+      */
+    inline Index nonzeroPivots() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_nonzero_pivots;
+    }
+
+    /** \returns the absolute value of the biggest pivot, i.e. the biggest
+      *          diagonal coefficient of U.
+      */
+    RealScalar maxPivot() const { return m_maxpivot; }
+
+    /** \returns the permutation matrix P
+      *
+      * \sa permutationQ()
+      */
+    EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_p;
+    }
+
+    /** \returns the permutation matrix Q
+      *
+      * \sa permutationP()
+      */
+    inline const PermutationQType& permutationQ() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_q;
+    }
+
+    /** \returns the kernel of the matrix, also called its null-space. The columns of the returned matrix
+      * will form a basis of the kernel.
+      *
+      * \note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_kernel.cpp
+      * Output: \verbinclude FullPivLU_kernel.out
+      *
+      * \sa image()
+      */
+    inline const internal::kernel_retval<FullPivLU> kernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::kernel_retval<FullPivLU>(*this);
+    }
+
+    /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix
+      * will form a basis of the image (column-space).
+      *
+      * \param originalMatrix the original matrix, of which *this is the LU decomposition.
+      *                       The reason why it is needed to pass it here, is that this allows
+      *                       a large optimization, as otherwise this method would need to reconstruct it
+      *                       from the LU decomposition.
+      *
+      * \note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_image.cpp
+      * Output: \verbinclude FullPivLU_image.out
+      *
+      * \sa kernel()
+      */
+    inline const internal::image_retval<FullPivLU>
+      image(const MatrixType& originalMatrix) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::image_retval<FullPivLU>(*this, originalMatrix);
+    }
+
+    /** \return a solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns a solution.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      * \note_about_using_kernel_to_study_multiple_solutions
+      *
+      * Example: \include FullPivLU_solve.cpp
+      * Output: \verbinclude FullPivLU_solve.out
+      *
+      * \sa TriangularView::solve(), kernel(), inverse()
+      */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
+    template<typename Rhs>
+    inline const Solve<FullPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return Solve<FullPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    typename internal::traits<MatrixType>::Scalar determinant() const;
+
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
+      * who need to determine when pivots are to be considered nonzero. This is not used for the
+      * LU decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
+      * uses a formula to automatically determine a reasonable threshold.
+      * Once you have called the present method setThreshold(const RealScalar&),
+      * your value is used instead.
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A pivot will be considered nonzero if its absolute value is strictly greater than
+      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+      * where maxpivot is the biggest pivot.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    FullPivLU& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code lu.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    FullPivLU& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
+      // and turns out to be identical to Higham's formula used already in LDLt.
+                                      : NumTraits<Scalar>::epsilon() * m_lu.diagonalSize();
+    }
+
+    /** \returns the rank of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();
+      Index result = 0;
+      for(Index i = 0; i < m_nonzero_pivots; ++i)
+        result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold);
+      return result;
+    }
+
+    /** \returns the dimension of the kernel of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index dimensionOfKernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return cols() - rank();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents an injective
+      *          linear map, i.e. has trivial kernel; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == cols();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents a surjective
+      *          linear map; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isSurjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == rows();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition is invertible.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInvertible() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return isInjective() && (m_lu.rows() == m_lu.cols());
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
+      *       Use isInvertible() to first determine whether this matrix is invertible.
+      *
+      * \sa MatrixBase::inverse()
+      */
+    inline const Inverse<FullPivLU> inverse() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
+      return Inverse<FullPivLU>(*this);
+    }
+
+    MatrixType reconstructedMatrix() const;
+
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void computeInPlace();
+
+    MatrixType m_lu;
+    PermutationPType m_p;
+    PermutationQType m_q;
+    IntColVectorType m_rowsTranspositions;
+    IntRowVectorType m_colsTranspositions;
+    Index m_nonzero_pivots;
+    RealScalar m_l1_norm;
+    RealScalar m_maxpivot, m_prescribedThreshold;
+    signed char m_det_pq;
+    bool m_isInitialized, m_usePrescribedThreshold;
+};
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU()
+  : m_isInitialized(false), m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols)
+  : m_lu(rows, cols),
+    m_p(rows),
+    m_q(cols),
+    m_rowsTranspositions(rows),
+    m_colsTranspositions(cols),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix)
+  : m_lu(matrix.rows(), matrix.cols()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  compute(matrix.derived());
+}
+
+template<typename MatrixType>
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  computeInPlace();
+}
+
+template<typename MatrixType>
+void FullPivLU<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());
+
+  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
+  const Index size = m_lu.diagonalSize();
+  const Index rows = m_lu.rows();
+  const Index cols = m_lu.cols();
+
+  // will store the transpositions, before we accumulate them at the end.
+  // can't accumulate on-the-fly because that will be done in reverse order for the rows.
+  m_rowsTranspositions.resize(m_lu.rows());
+  m_colsTranspositions.resize(m_lu.cols());
+  Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i
+
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
+  m_maxpivot = RealScalar(0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // First, we need to find the pivot.
+
+    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
+    Index row_of_biggest_in_corner, col_of_biggest_in_corner;
+    typedef internal::scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+    Score biggest_in_corner;
+    biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)
+                        .unaryExpr(Scoring())
+                        .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
+    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
+    col_of_biggest_in_corner += k; // need to add k to them.
+
+    if(biggest_in_corner==Score(0))
+    {
+      // before exiting, make sure to initialize the still uninitialized transpositions
+      // in a sane state without destroying what we already have.
+      m_nonzero_pivots = k;
+      for(Index i = k; i < size; ++i)
+      {
+        m_rowsTranspositions.coeffRef(i) = i;
+        m_colsTranspositions.coeffRef(i) = i;
+      }
+      break;
+    }
+
+    RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner);
+    if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot;
+
+    // Now that we've found the pivot, we need to apply the row/col swaps to
+    // bring it to the location (k,k).
+
+    m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner;
+    m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner;
+    if(k != row_of_biggest_in_corner) {
+      m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+    if(k != col_of_biggest_in_corner) {
+      m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+
+    // Now that the pivot is at the right location, we update the remaining
+    // bottom-right corner by Gaussian elimination.
+
+    if(k<rows-1)
+      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);
+    if(k<size-1)
+      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);
+  }
+
+  // the main loop is over, we still have to accumulate the transpositions to find the
+  // permutations P and Q
+
+  m_p.setIdentity(rows);
+  for(Index k = size-1; k >= 0; --k)
+    m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k));
+
+  m_q.setIdentity(cols);
+  for(Index k = 0; k < size; ++k)
+    m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+
+  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+
+  m_isInitialized = true;
+}
+
+template<typename MatrixType>
+typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the determinant of a non-square matrix!");
+  return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$.
+ * This function is provided for debug purposes. */
+template<typename MatrixType>
+MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());
+  // LU
+  MatrixType res(m_lu.rows(),m_lu.cols());
+  // FIXME the .toDenseMatrix() should not be needed...
+  res = m_lu.leftCols(smalldim)
+            .template triangularView<UnitLower>().toDenseMatrix()
+      * m_lu.topRows(smalldim)
+            .template triangularView<Upper>().toDenseMatrix();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  // (P^{-1}LU)Q^{-1}
+  res = res * m_q.inverse();
+
+  return res;
+}
+
+/********* Implementation of kernel() **************************************************/
+
+namespace internal {
+template<typename _MatrixType>
+struct kernel_retval<FullPivLU<_MatrixType> >
+  : kernel_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    const Index cols = dec().matrixLU().cols(), dimker = cols - rank();
+    if(dimker == 0)
+    {
+      // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    /* Let us use the following lemma:
+      *
+      * Lemma: If the matrix A has the LU decomposition PAQ = LU,
+      * then Ker A = Q(Ker U).
+      *
+      * Proof: trivial: just keep in mind that P, Q, L are invertible.
+      */
+
+    /* Thus, all we need to do is to compute Ker U, and then apply Q.
+      *
+      * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end.
+      * Thus, the diagonal of U ends with exactly
+      * dimKer zero's. Let us use that to construct dimKer linearly
+      * independent vectors in Ker U.
+      */
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
+    // permuting the rows and cols to bring the nonnegligible pivots to the top of
+    // the main diagonal. We need that to be able to apply our triangular solvers.
+    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
+    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
+           MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
+      m(dec().matrixLU().block(0, 0, rank(), cols));
+    for(Index i = 0; i < rank(); ++i)
+    {
+      if(i) m.row(i).head(i).setZero();
+      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
+    }
+    m.block(0, 0, rank(), rank());
+    m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero();
+    for(Index i = 0; i < rank(); ++i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // ok, we have our trapezoid matrix, we can apply the triangular solver.
+    // notice that the math behind this suggests that we should apply this to the
+    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.
+    m.topLeftCorner(rank(), rank())
+     .template triangularView<Upper>().solveInPlace(
+        m.topRightCorner(rank(), dimker)
+      );
+
+    // now we must undo the column permutation that we had applied!
+    for(Index i = rank()-1; i >= 0; --i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // see the negative sign in the next line, that's what we were talking about above.
+    for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);
+    for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+    for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);
+  }
+};
+
+/***** Implementation of image() *****************************************************/
+
+template<typename _MatrixType>
+struct image_retval<FullPivLU<_MatrixType> >
+  : image_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    if(rank() == 0)
+    {
+      // The Image is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    for(Index i = 0; i < rank(); ++i)
+      dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i)));
+  }
+};
+
+/***** Implementation of solve() *****************************************************/
+
+} // end namespace internal
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+  * So we proceed as follows:
+  * Step 1: compute c = P * rhs.
+  * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+  * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+  * Step 4: result = Q * c;
+  */
+
+  const Index rows = this->rows(),
+              cols = this->cols(),
+              nonzero_pivots = this->rank();
+  eigen_assert(rhs.rows() == rows);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
+
+  // Step 1
+  c = permutationP() * rhs;
+
+  // Step 2
+  m_lu.topLeftCorner(smalldim,smalldim)
+      .template triangularView<UnitLower>()
+      .solveInPlace(c.topRows(smalldim));
+  if(rows>cols)
+    c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);
+
+  // Step 3
+  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
+
+  // Step 4
+  for(Index i = 0; i < nonzero_pivots; ++i)
+    dst.row(permutationQ().indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)
+    dst.row(permutationQ().indices().coeff(i)).setZero();
+}
+
+template<typename _MatrixType>
+template<bool Conjugate, typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+{
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},
+   * and since permutations are real and unitary, we can write this
+   * as   A^T = Q U^T L^T P,
+   * So we proceed as follows:
+   * Step 1: compute c = Q^T rhs.
+   * Step 2: replace c by the solution x to U^T x = c. May or may not exist.
+   * Step 3: replace c by the solution x to L^T x = c.
+   * Step 4: result = P^T c.
+   * If Conjugate is true, replace "^T" by "^*" above.
+   */
+
+  const Index rows = this->rows(), cols = this->cols(),
+    nonzero_pivots = this->rank();
+   eigen_assert(rhs.rows() == cols);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
+
+  // Step 1
+  c = permutationQ().inverse() * rhs;
+
+  if (Conjugate) {
+    // Step 2
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .adjoint()
+        .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .adjoint()
+        .solveInPlace(c.topRows(smalldim));
+  } else {
+    // Step 2
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .transpose()
+        .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .transpose()
+        .solveInPlace(c.topRows(smalldim));
+  }
+
+  // Step 4
+  PermutationPType invp = permutationP().inverse().eval();
+  for(Index i = 0; i < smalldim; ++i)
+    dst.row(invp.indices().coeff(i)) = c.row(i);
+  for(Index i = smalldim; i < rows; ++i)
+    dst.row(invp.indices().coeff(i)).setZero();
+}
+
+#endif
+
+namespace internal {
+
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef FullPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+} // end namespace internal
+
+/******* MatrixBase methods *****************************************************************/
+
+/** \lu_module
+  *
+  * \return the full-pivoting LU decomposition of \c *this.
+  *
+  * \sa class FullPivLU
+  */
+template<typename Derived>
+inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::fullPivLu() const
+{
+  return FullPivLU<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_LU_H
diff --git a/third-party/Eigen/src/LU/InverseImpl.h b/third-party/Eigen/src/LU/InverseImpl.h
new file mode 100644
index 00000000..f49f2336
--- /dev/null
+++ b/third-party/Eigen/src/LU/InverseImpl.h
@@ -0,0 +1,415 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_IMPL_H
+#define EIGEN_INVERSE_IMPL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************
+*** General case implementation ***
+**********************************/
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    result = matrix.partialPivLu().inverse();
+  }
+};
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ };
+
+/****************************
+*** Size 1 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    internal::evaluator<MatrixType> matrixEval(matrix);
+    result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& result,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.coeff(0,0);
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant;
+  }
+};
+
+/****************************
+*** Size 2 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC 
+inline void compute_inverse_size2_helper(
+    const MatrixType& matrix, const typename ResultType::Scalar& invdet,
+    ResultType& result)
+{
+  result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;
+  result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
+  result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
+  result.coeffRef(1,1) =  matrix.coeff(0,0) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 2>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant();
+    compute_inverse_size2_helper(matrix, invdet, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size2_helper(matrix, invdet, inverse);
+  }
+};
+
+/****************************
+*** Size 3 implementation ***
+****************************/
+
+template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
+inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
+{
+  enum {
+    i1 = (i+1) % 3,
+    i2 = (i+2) % 3,
+    j1 = (j+1) % 3,
+    j2 = (j+2) % 3
+  };
+  return m.coeff(i1, j1) * m.coeff(i2, j2)
+       - m.coeff(i1, j2) * m.coeff(i2, j1);
+}
+
+template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC
+inline void compute_inverse_size3_helper(
+    const MatrixType& matrix,
+    const typename ResultType::Scalar& invdet,
+    const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
+    ResultType& result)
+{
+  result.row(0) = cofactors_col0 * invdet;
+  result.coeffRef(1,0) =  cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
+  result.coeffRef(1,1) =  cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
+  result.coeffRef(2,0) =  cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
+  result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
+  result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 3>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<typename MatrixType::Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    const Scalar invdet = Scalar(1) / det;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
+  }
+};
+
+/****************************
+*** Size 4 implementation ***
+****************************/
+
+template<typename Derived>
+EIGEN_DEVICE_FUNC 
+inline const typename Derived::Scalar general_det3_helper
+(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
+{
+  return matrix.coeff(i1,j1)
+         * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2));
+}
+
+template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
+inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
+{
+  enum {
+    i1 = (i+1) % 4,
+    i2 = (i+2) % 4,
+    i3 = (i+3) % 4,
+    j1 = (j+1) % 4,
+    j2 = (j+2) % 4,
+    j3 = (j+3) % 4
+  };
+  return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3)
+       + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3)
+       + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3);
+}
+
+template<int Arch, typename Scalar, typename MatrixType, typename ResultType>
+struct compute_inverse_size4
+{
+  EIGEN_DEVICE_FUNC
+  static void run(const MatrixType& matrix, ResultType& result)
+  {
+    result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);
+    result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix);
+    result.coeffRef(2,0) =  cofactor_4x4<MatrixType,0,2>(matrix);
+    result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix);
+    result.coeffRef(0,2) =  cofactor_4x4<MatrixType,2,0>(matrix);
+    result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix);
+    result.coeffRef(2,2) =  cofactor_4x4<MatrixType,2,2>(matrix);
+    result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix);
+    result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix);
+    result.coeffRef(1,1) =  cofactor_4x4<MatrixType,1,1>(matrix);
+    result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix);
+    result.coeffRef(3,1) =  cofactor_4x4<MatrixType,1,3>(matrix);
+    result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix);
+    result.coeffRef(1,3) =  cofactor_4x4<MatrixType,3,1>(matrix);
+    result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix);
+    result.coeffRef(3,3) =  cofactor_4x4<MatrixType,3,3>(matrix);
+    result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum();
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 4>
+ : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar,
+                            MatrixType, ResultType>
+{
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
+{
+  EIGEN_DEVICE_FUNC
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) compute_inverse<MatrixType, ResultType>::run(matrix, inverse);
+  }
+};
+
+/*************************
+*** MatrixBase methods ***
+*************************/
+
+} // end namespace internal
+
+namespace internal {
+
+// Specialization for "dense = dense_xpr.inverse()"
+template<typename DstXprType, typename XprType>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
+{
+  typedef Inverse<XprType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
+    EIGEN_ONLY_USED_FOR_DEBUG(Size);
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
+              && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
+
+    typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;
+    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;
+    
+    ActualXprType actual_xpr(src.nestedExpression());
+    
+    compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
+  }
+};
+
+  
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the matrix inverse of this matrix.
+  *
+  * For small fixed sizes up to 4x4, this method uses cofactors.
+  * In the general case, this method uses class PartialPivLU.
+  *
+  * \note This matrix must be invertible, otherwise the result is undefined. If you need an
+  * invertibility check, do the following:
+  * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck().
+  * \li for the general case, use class FullPivLU.
+  *
+  * Example: \include MatrixBase_inverse.cpp
+  * Output: \verbinclude MatrixBase_inverse.out
+  *
+  * \sa computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
+{
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+  eigen_assert(rows() == cols());
+  return Inverse<Derived>(derived());
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse and determinant, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param determinant Reference to the variable in which to store the determinant.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out
+  *
+  * \sa inverse(), computeInverseWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  // for 2x2, it's worth giving a chance to avoid evaluating.
+  // for larger sizes, evaluating has negligible cost and limits code size.
+  typedef typename internal::conditional<
+    RowsAtCompileTime == 2,
+    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
+    PlainObject
+  >::type MatrixType;
+  internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
+    (derived(), absDeterminantThreshold, inverse, determinant, invertible);
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseWithCheck.out
+  *
+  * \sa inverse(), computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseWithCheck(
+    ResultType& inverse,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  Scalar determinant;
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_IMPL_H
diff --git a/third-party/Eigen/src/LU/PartialPivLU.h b/third-party/Eigen/src/LU/PartialPivLU.h
new file mode 100644
index 00000000..6b10f39f
--- /dev/null
+++ b/third-party/Eigen/src/LU/PartialPivLU.h
@@ -0,0 +1,614 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIALLU_H
+#define EIGEN_PARTIALLU_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  typedef traits<_MatrixType> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+template<typename T,typename Derived>
+struct enable_if_ref;
+// {
+//   typedef Derived type;
+// };
+
+template<typename T,typename Derived>
+struct enable_if_ref<Ref<T>,Derived> {
+  typedef Derived type;
+};
+
+} // end namespace internal
+
+/** \ingroup LU_Module
+  *
+  * \class PartialPivLU
+  *
+  * \brief LU decomposition of a matrix with partial pivoting, and related features
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
+  * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
+  * is a permutation matrix.
+  *
+  * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible
+  * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class
+  * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the
+  * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.
+  *
+  * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided
+  * by class FullPivLU.
+  *
+  * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
+  * such as rank computation. If you need these features, use class FullPivLU.
+  *
+  * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses
+  * in the general case.
+  * On the other hand, it is \b not suitable to determine whether a given matrix is invertible.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
+  */
+template<typename _MatrixType> class PartialPivLU
+  : public SolverBase<PartialPivLU<_MatrixType> >
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    typedef SolverBase<PartialPivLU> Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
+    enum {
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
+    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via PartialPivLU::compute(const MatrixType&).
+      */
+    PartialPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa PartialPivLU()
+      */
+    explicit PartialPivLU(Index size);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    template<typename InputType>
+    explicit PartialPivLU(const EigenBase<InputType>& matrix);
+
+    /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    template<typename InputType>
+    explicit PartialPivLU(EigenBase<InputType>& matrix);
+
+    template<typename InputType>
+    PartialPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      compute();
+      return *this;
+    }
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the permutation matrix P.
+      */
+    inline const PermutationType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_p;
+    }
+
+    /** This method returns the solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns the solution.
+      *
+      * Example: \include PartialPivLU_solve.cpp
+      * Output: \verbinclude PartialPivLU_solve.out
+      *
+      * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution
+      * theoretically exists and is unique regardless of b.
+      *
+      * \sa TriangularView::solve(), inverse(), computeInverse()
+      */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
+    template<typename Rhs>
+    inline const Solve<PartialPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return Solve<PartialPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
+      *          invertibility, use class FullPivLU instead.
+      *
+      * \sa MatrixBase::inverse(), LU::inverse()
+      */
+    inline const Inverse<PartialPivLU> inverse() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return Inverse<PartialPivLU>(*this);
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    Scalar determinant() const;
+
+    MatrixType reconstructedMatrix() const;
+
+    inline Index rows() const { return m_lu.rows(); }
+    inline Index cols() const { return m_lu.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.rows());
+
+      // Step 1
+      dst = permutationP() * rhs;
+
+      // Step 2
+      m_lu.template triangularView<UnitLower>().solveInPlace(dst);
+
+      // Step 3
+      m_lu.template triangularView<Upper>().solveInPlace(dst);
+    }
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.cols());
+
+      if (Conjugate) {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().adjoint().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().adjoint().solveInPlace(dst);
+      } else {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().transpose().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().transpose().solveInPlace(dst);
+      }
+      // Step 3
+      dst = permutationP().transpose() * dst;
+    }
+    #endif
+
+  protected:
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void compute();
+
+    MatrixType m_lu;
+    PermutationType m_p;
+    TranspositionType m_rowsTranspositions;
+    RealScalar m_l1_norm;
+    signed char m_det_p;
+    bool m_isInitialized;
+};
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU()
+  : m_lu(),
+    m_p(),
+    m_rowsTranspositions(),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU(Index size)
+  : m_lu(size, size),
+    m_p(size),
+    m_rowsTranspositions(size),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
+  : m_lu(matrix.rows(),matrix.cols()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute(matrix.derived());
+}
+
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute();
+}
+
+namespace internal {
+
+/** \internal This is the blocked version of fullpivlu_unblocked() */
+template<typename Scalar, int StorageOrder, typename PivIndex>
+struct partial_lu_impl
+{
+  // FIXME add a stride to Map, so that the following mapping becomes easier,
+  // another option would be to create an expression being able to automatically
+  // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
+  // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
+  // and Block.
+  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
+  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
+  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  typedef typename MatrixType::RealScalar RealScalar;
+
+  /** \internal performs the LU decomposition in-place of the matrix \a lu
+    * using an unblocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    */
+  static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
+  {
+    typedef scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+    const Index rows = lu.rows();
+    const Index cols = lu.cols();
+    const Index size = (std::min)(rows,cols);
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; ++k)
+    {
+      Index rrows = rows-k-1;
+      Index rcols = cols-k-1;
+
+      Index row_of_biggest_in_col;
+      Score biggest_in_corner
+        = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
+      row_of_biggest_in_col += k;
+
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
+
+      if(biggest_in_corner != Score(0))
+      {
+        if(k != row_of_biggest_in_col)
+        {
+          lu.row(k).swap(lu.row(row_of_biggest_in_col));
+          ++nb_transpositions;
+        }
+
+        // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
+        // overflow but not the actual quotient?
+        lu.col(k).tail(rrows) /= lu.coeff(k,k);
+      }
+      else if(first_zero_pivot==-1)
+      {
+        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
+        // and continue the factorization such we still have A = PLU
+        first_zero_pivot = k;
+      }
+
+      if(k<rows-1)
+        lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols);
+    }
+    return first_zero_pivot;
+  }
+
+  /** \internal performs the LU decomposition in-place of the matrix represented
+    * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a
+    * recursive, blocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    *
+    * \note This very low level interface using pointers, etc. is to:
+    *   1 - reduce the number of instanciations to the strict minimum
+    *   2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > >
+    */
+  static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
+  {
+    MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
+    MatrixType lu(lu1,0,0,rows,cols);
+
+    const Index size = (std::min)(rows,cols);
+
+    // if the matrix is too small, no blocking:
+    if(size<=16)
+    {
+      return unblocked_lu(lu, row_transpositions, nb_transpositions);
+    }
+
+    // automatically adjust the number of subdivisions to the size
+    // of the matrix so that there is enough sub blocks:
+    Index blockSize;
+    {
+      blockSize = size/8;
+      blockSize = (blockSize/16)*16;
+      blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);
+    }
+
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; k+=blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize); // actual size of the block
+      Index trows = rows - k - bs; // trailing rows
+      Index tsize = size - k - bs; // trailing size
+
+      // partition the matrix:
+      //                          A00 | A01 | A02
+      // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
+      //                          A20 | A21 | A22
+      BlockType A_0(lu,0,0,rows,k);
+      BlockType A_2(lu,0,k+bs,rows,tsize);
+      BlockType A11(lu,k,k,bs,bs);
+      BlockType A12(lu,k,k+bs,bs,tsize);
+      BlockType A21(lu,k+bs,k,trows,bs);
+      BlockType A22(lu,k+bs,k+bs,trows,tsize);
+
+      PivIndex nb_transpositions_in_panel;
+      // recursively call the blocked LU algorithm on [A11^T A21^T]^T
+      // with a very small blocking size:
+      Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
+                   row_transpositions+k, nb_transpositions_in_panel, 16);
+      if(ret>=0 && first_zero_pivot==-1)
+        first_zero_pivot = k+ret;
+
+      nb_transpositions += nb_transpositions_in_panel;
+      // update permutations and apply them to A_0
+      for(Index i=k; i<k+bs; ++i)
+      {
+        Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
+        A_0.row(i).swap(A_0.row(piv));
+      }
+
+      if(trows)
+      {
+        // apply permutations to A_2
+        for(Index i=k;i<k+bs; ++i)
+          A_2.row(i).swap(A_2.row(row_transpositions[i]));
+
+        // A12 = A11^-1 A12
+        A11.template triangularView<UnitLower>().solveInPlace(A12);
+
+        A22.noalias() -= A21 * A12;
+      }
+    }
+    return first_zero_pivot;
+  }
+};
+
+/** \internal performs the LU decomposition with partial pivoting in-place.
+  */
+template<typename MatrixType, typename TranspositionType>
+void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)
+{
+  eigen_assert(lu.cols() == row_transpositions.size());
+  eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
+
+  partial_lu_impl
+    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex>
+    ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
+}
+
+} // end namespace internal
+
+template<typename MatrixType>
+void PartialPivLU<MatrixType>::compute()
+{
+  check_template_parameters();
+
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<NumTraits<int>::highest());
+
+  if(m_lu.cols()>0)
+    m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+  else
+    m_l1_norm = RealScalar(0);
+
+  eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
+  const Index size = m_lu.rows();
+
+  m_rowsTranspositions.resize(size);
+
+  typename TranspositionType::StorageIndex nb_transpositions;
+  internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
+  m_det_p = (nb_transpositions%2) ? -1 : 1;
+
+  m_p = m_rowsTranspositions;
+
+  m_isInitialized = true;
+}
+
+template<typename MatrixType>
+typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+  return Scalar(m_det_p) * m_lu.diagonal().prod();
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  // LU
+  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
+                 * m_lu.template triangularView<Upper>();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  return res;
+}
+
+/***** Implementation details *****************************************************/
+
+namespace internal {
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef PartialPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+} // end namespace internal
+
+/******** MatrixBase methods *******/
+
+/** \lu_module
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::partialPivLu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+
+/** \lu_module
+  *
+  * Synonym of partialPivLu().
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::lu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_H
diff --git a/third-party/Eigen/src/LU/PartialPivLU_LAPACKE.h b/third-party/Eigen/src/LU/PartialPivLU_LAPACKE.h
new file mode 100644
index 00000000..755168a9
--- /dev/null
+++ b/third-party/Eigen/src/LU/PartialPivLU_LAPACKE.h
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *     LU decomposition with partial pivoting based on LAPACKE_?getrf function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_PARTIALLU_LAPACK_H
+#define EIGEN_PARTIALLU_LAPACK_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
+template<int StorageOrder> \
+struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
+{ \
+  /* \internal performs the LU decomposition in-place of the matrix represented */ \
+  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+  { \
+    EIGEN_UNUSED_VARIABLE(maxBlockSize);\
+    lapack_int matrix_order, first_zero_pivot; \
+    lapack_int m, n, lda, *ipiv, info; \
+    EIGTYPE* a; \
+/* Set up parameters for ?getrf */ \
+    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    lda = convert_index<lapack_int>(luStride); \
+    a = lu_data; \
+    ipiv = row_transpositions; \
+    m = convert_index<lapack_int>(rows); \
+    n = convert_index<lapack_int>(cols); \
+    nb_transpositions = 0; \
+\
+    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
+\
+    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
+\
+    eigen_assert(info >= 0); \
+/* something should be done with nb_transpositions */ \
+\
+    first_zero_pivot = info; \
+    return first_zero_pivot; \
+  } \
+};
+
+EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
+EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
+EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_LAPACK_H
diff --git a/third-party/Eigen/src/LU/arch/Inverse_SSE.h b/third-party/Eigen/src/LU/arch/Inverse_SSE.h
new file mode 100644
index 00000000..4dce2ef2
--- /dev/null
+++ b/third-party/Eigen/src/LU/arch/Inverse_SSE.h
@@ -0,0 +1,338 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2001 Intel Corporation
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// The SSE code for the 4x4 float and double matrix inverse in this file
+// comes from the following Intel's library:
+// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/
+//
+// Here is the respective copyright and license statement:
+//
+//   Copyright (c) 2001 Intel Corporation.
+//
+// Permition is granted to use, copy, distribute and prepare derivative works
+// of this library for any purpose and without fee, provided, that the above
+// copyright notice and this statement appear in all copies.
+// Intel makes no representations about the suitability of this software for
+// any purpose, and specifically disclaims all warranties.
+// See LEGAL.TXT for all the legal information.
+
+#ifndef EIGEN_INVERSE_SSE_H
+#define EIGEN_INVERSE_SSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
+  {
+    ActualMatrixType matrix(mat);
+    const Packet4f p4f_sign_PNNP = _mm_castsi128_ps(_mm_set_epi32(0x00000000, 0x80000000, 0x80000000, 0x00000000));
+
+    // Load the full matrix into registers
+    __m128 _L1 = matrix.template packet<MatrixAlignment>( 0);
+    __m128 _L2 = matrix.template packet<MatrixAlignment>( 4);
+    __m128 _L3 = matrix.template packet<MatrixAlignment>( 8);
+    __m128 _L4 = matrix.template packet<MatrixAlignment>(12);
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register holds four matrix element, the smaller matrices are
+    // represented as a registers. Hence we get a better locality of the
+    // calculations.
+
+    __m128 A, B, C, D; // the four sub-matrices
+    if(!StorageOrdersMatch)
+    {
+      A = _mm_unpacklo_ps(_L1, _L2);
+      B = _mm_unpacklo_ps(_L3, _L4);
+      C = _mm_unpackhi_ps(_L1, _L2);
+      D = _mm_unpackhi_ps(_L3, _L4);
+    }
+    else
+    {
+      A = _mm_movelh_ps(_L1, _L2);
+      B = _mm_movehl_ps(_L2, _L1);
+      C = _mm_movelh_ps(_L3, _L4);
+      D = _mm_movehl_ps(_L4, _L3);
+    }
+
+    __m128 iA, iB, iC, iD,                 // partial inverse of the sub-matrices
+            DC, AB;
+    __m128 dA, dB, dC, dD;                 // determinant of the sub-matrices
+    __m128 det, d, d1, d2;
+    __m128 rd;                             // reciprocal of the determinant
+
+    //  AB = A# * B
+    AB = _mm_mul_ps(_mm_shuffle_ps(A,A,0x0F), B);
+    AB = _mm_sub_ps(AB,_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E)));
+    //  DC = D# * C
+    DC = _mm_mul_ps(_mm_shuffle_ps(D,D,0x0F), C);
+    DC = _mm_sub_ps(DC,_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E)));
+
+    //  dA = |A|
+    dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A);
+    dA = _mm_sub_ss(dA, _mm_movehl_ps(dA,dA));
+    //  dB = |B|
+    dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B);
+    dB = _mm_sub_ss(dB, _mm_movehl_ps(dB,dB));
+
+    //  dC = |C|
+    dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C);
+    dC = _mm_sub_ss(dC, _mm_movehl_ps(dC,dC));
+    //  dD = |D|
+    dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D);
+    dD = _mm_sub_ss(dD, _mm_movehl_ps(dD,dD));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C)
+    d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB);
+
+    //  iD = C*A#*B
+    iD = _mm_mul_ps(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB));
+    iD = _mm_add_ps(iD,_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB)));
+    //  iA = B*D#*C
+    iA = _mm_mul_ps(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC));
+    iA = _mm_add_ps(iA,_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC)));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C) [continue]
+    d  = _mm_add_ps(d, _mm_movehl_ps(d, d));
+    d  = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1));
+    d1 = _mm_mul_ss(dA,dD);
+    d2 = _mm_mul_ss(dB,dC);
+
+    //  iD = D*|A| - C*A#*B
+    iD = _mm_sub_ps(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD);
+
+    //  iA = A*|D| - B*D#*C;
+    iA = _mm_sub_ps(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA);
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_sub_ss(_mm_add_ss(d1,d2),d);
+    rd  = _mm_div_ss(_mm_set_ss(1.0f), det);
+
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd);
+//     #endif
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB = _mm_mul_ps(D, _mm_shuffle_ps(AB,AB,0x33));
+    iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66)));
+    //  iC = A * (D#C)# = A*C#*D
+    iC = _mm_mul_ps(A, _mm_shuffle_ps(DC,DC,0x33));
+    iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66)));
+
+    rd = _mm_shuffle_ps(rd,rd,0);
+    rd = _mm_xor_ps(rd, p4f_sign_PNNP);
+
+    //  iB = C*|B| - D*B#*A
+    iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB);
+
+    //  iC = B*|C| - A*C#*D;
+    iC = _mm_sub_ps(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC);
+
+    //  iX = iX / det
+    iA = _mm_mul_ps(rd,iA);
+    iB = _mm_mul_ps(rd,iB);
+    iC = _mm_mul_ps(rd,iC);
+    iD = _mm_mul_ps(rd,iD);
+
+    Index res_stride = result.outerStride();
+    float* res = result.data();
+    pstoret<float, Packet4f, ResultAlignment>(res+0,            _mm_shuffle_ps(iA,iB,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+res_stride,   _mm_shuffle_ps(iA,iB,0x22));
+    pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
+  }
+
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
+  {
+    ActualMatrixType matrix(mat);
+    const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+    const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register of the matrix holds two elements, the smaller matrices are
+    // consisted of two registers. Hence we get a better locality of the
+    // calculations.
+
+    // the four sub-matrices
+    __m128d A1, A2, B1, B2, C1, C2, D1, D2;
+    
+    if(StorageOrdersMatch)
+    {
+      A1 = matrix.template packet<MatrixAlignment>( 0); B1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); B2 = matrix.template packet<MatrixAlignment>( 6);
+      C1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      C2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+    }
+    else
+    {
+      __m128d tmp;
+      A1 = matrix.template packet<MatrixAlignment>( 0); C1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); C2 = matrix.template packet<MatrixAlignment>( 6);
+      tmp = A1;
+      A1 = _mm_unpacklo_pd(A1,A2);
+      A2 = _mm_unpackhi_pd(tmp,A2);
+      tmp = C1;
+      C1 = _mm_unpacklo_pd(C1,C2);
+      C2 = _mm_unpackhi_pd(tmp,C2);
+      
+      B1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      B2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+      tmp = B1;
+      B1 = _mm_unpacklo_pd(B1,B2);
+      B2 = _mm_unpackhi_pd(tmp,B2);
+      tmp = D1;
+      D1 = _mm_unpacklo_pd(D1,D2);
+      D2 = _mm_unpackhi_pd(tmp,D2);
+    }
+    
+    __m128d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2,     // partial invese of the sub-matrices
+            DC1, DC2, AB1, AB2;
+    __m128d dA, dB, dC, dD;     // determinant of the sub-matrices
+    __m128d det, d1, d2, rd;
+
+    //  dA = |A|
+    dA = _mm_shuffle_pd(A2, A2, 1);
+    dA = _mm_mul_pd(A1, dA);
+    dA = _mm_sub_sd(dA, _mm_shuffle_pd(dA,dA,3));
+    //  dB = |B|
+    dB = _mm_shuffle_pd(B2, B2, 1);
+    dB = _mm_mul_pd(B1, dB);
+    dB = _mm_sub_sd(dB, _mm_shuffle_pd(dB,dB,3));
+
+    //  AB = A# * B
+    AB1 = _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,3));
+    AB2 = _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,0));
+    AB1 = _mm_sub_pd(AB1, _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,3)));
+    AB2 = _mm_sub_pd(AB2, _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,0)));
+
+    //  dC = |C|
+    dC = _mm_shuffle_pd(C2, C2, 1);
+    dC = _mm_mul_pd(C1, dC);
+    dC = _mm_sub_sd(dC, _mm_shuffle_pd(dC,dC,3));
+    //  dD = |D|
+    dD = _mm_shuffle_pd(D2, D2, 1);
+    dD = _mm_mul_pd(D1, dD);
+    dD = _mm_sub_sd(dD, _mm_shuffle_pd(dD,dD,3));
+
+    //  DC = D# * C
+    DC1 = _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,3));
+    DC2 = _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,0));
+    DC1 = _mm_sub_pd(DC1, _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,3)));
+    DC2 = _mm_sub_pd(DC2, _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,0)));
+
+    //  rd = trace(AB*DC) = trace(A#*B*D#*C)
+    d1 = _mm_mul_pd(AB1, _mm_shuffle_pd(DC1, DC2, 0));
+    d2 = _mm_mul_pd(AB2, _mm_shuffle_pd(DC1, DC2, 3));
+    rd = _mm_add_pd(d1, d2);
+    rd = _mm_add_sd(rd, _mm_shuffle_pd(rd, rd,3));
+
+    //  iD = C*A#*B
+    iD1 = _mm_mul_pd(AB1, _mm_shuffle_pd(C1,C1,0));
+    iD2 = _mm_mul_pd(AB1, _mm_shuffle_pd(C2,C2,0));
+    iD1 = _mm_add_pd(iD1, _mm_mul_pd(AB2, _mm_shuffle_pd(C1,C1,3)));
+    iD2 = _mm_add_pd(iD2, _mm_mul_pd(AB2, _mm_shuffle_pd(C2,C2,3)));
+
+    //  iA = B*D#*C
+    iA1 = _mm_mul_pd(DC1, _mm_shuffle_pd(B1,B1,0));
+    iA2 = _mm_mul_pd(DC1, _mm_shuffle_pd(B2,B2,0));
+    iA1 = _mm_add_pd(iA1, _mm_mul_pd(DC2, _mm_shuffle_pd(B1,B1,3)));
+    iA2 = _mm_add_pd(iA2, _mm_mul_pd(DC2, _mm_shuffle_pd(B2,B2,3)));
+
+    //  iD = D*|A| - C*A#*B
+    dA = _mm_shuffle_pd(dA,dA,0);
+    iD1 = _mm_sub_pd(_mm_mul_pd(D1, dA), iD1);
+    iD2 = _mm_sub_pd(_mm_mul_pd(D2, dA), iD2);
+
+    //  iA = A*|D| - B*D#*C;
+    dD = _mm_shuffle_pd(dD,dD,0);
+    iA1 = _mm_sub_pd(_mm_mul_pd(A1, dD), iA1);
+    iA2 = _mm_sub_pd(_mm_mul_pd(A2, dD), iA2);
+
+    d1 = _mm_mul_sd(dA, dD);
+    d2 = _mm_mul_sd(dB, dC);
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB1 = _mm_mul_pd(D1, _mm_shuffle_pd(AB2,AB1,1));
+    iB2 = _mm_mul_pd(D2, _mm_shuffle_pd(AB2,AB1,1));
+    iB1 = _mm_sub_pd(iB1, _mm_mul_pd(_mm_shuffle_pd(D1,D1,1), _mm_shuffle_pd(AB2,AB1,2)));
+    iB2 = _mm_sub_pd(iB2, _mm_mul_pd(_mm_shuffle_pd(D2,D2,1), _mm_shuffle_pd(AB2,AB1,2)));
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_add_sd(d1, d2);
+    det = _mm_sub_sd(det, rd);
+
+    //  iC = A * (D#C)# = A*C#*D
+    iC1 = _mm_mul_pd(A1, _mm_shuffle_pd(DC2,DC1,1));
+    iC2 = _mm_mul_pd(A2, _mm_shuffle_pd(DC2,DC1,1));
+    iC1 = _mm_sub_pd(iC1, _mm_mul_pd(_mm_shuffle_pd(A1,A1,1), _mm_shuffle_pd(DC2,DC1,2)));
+    iC2 = _mm_sub_pd(iC2, _mm_mul_pd(_mm_shuffle_pd(A2,A2,1), _mm_shuffle_pd(DC2,DC1,2)));
+
+    rd = _mm_div_sd(_mm_set_sd(1.0), det);
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_pd(_mm_cmpneq_sd(det,_mm_setzero_pd()), rd);
+//     #endif
+    rd = _mm_shuffle_pd(rd,rd,0);
+
+    //  iB = C*|B| - D*B#*A
+    dB = _mm_shuffle_pd(dB,dB,0);
+    iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
+    iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
+
+    d1 = _mm_xor_pd(rd, _Sign_PN);
+    d2 = _mm_xor_pd(rd, _Sign_NP);
+
+    //  iC = B*|C| - A*C#*D;
+    dC = _mm_shuffle_pd(dC,dC,0);
+    iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
+    iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
+
+    Index res_stride = result.outerStride();
+    double* res = result.data();
+    pstoret<double, Packet2d, ResultAlignment>(res+0,             _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride,    _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2,             _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2,  _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_SSE_H
diff --git a/third-party/Eigen/src/MatrixFunctions/MatrixExponential.h b/third-party/Eigen/src/MatrixFunctions/MatrixExponential.h
new file mode 100644
index 00000000..0b0ee654
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -0,0 +1,442 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009, 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2011, 2013 Chen-Pang He <jdh8@ms63.hinet.net>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_EXPONENTIAL
+#define EIGEN_MATRIX_EXPONENTIAL
+
+#include "StemFunction.h"
+
+namespace Eigen {
+namespace internal {
+
+/** \brief Scaling operator.
+ *
+ * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$.
+ */
+template <typename RealScalar>
+struct MatrixExponentialScalingOp
+{
+  /** \brief Constructor.
+   *
+   * \param[in] squarings  The integer \f$ s \f$ in this document.
+   */
+  MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) { }
+
+
+  /** \brief Scale a matrix coefficient.
+   *
+   * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
+   */
+  inline const RealScalar operator() (const RealScalar& x) const
+  {
+    using std::ldexp;
+    return ldexp(x, -m_squarings);
+  }
+
+  typedef std::complex<RealScalar> ComplexScalar;
+
+  /** \brief Scale a matrix coefficient.
+   *
+   * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
+   */
+  inline const ComplexScalar operator() (const ComplexScalar& x) const
+  {
+    using std::ldexp;
+    return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings));
+  }
+
+  private:
+    int m_squarings;
+};
+
+/** \brief Compute the (3,3)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade3(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatA>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {120.L, 60.L, 12.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType tmp = b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (5,5)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade5(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {30240.L, 15120.L, 3360.L, 420.L, 30.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType tmp = b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (7,7)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade7(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {17297280.L, 8648640.L, 1995840.L, 277200.L, 25200.L, 1512.L, 56.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType tmp = b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+
+}
+
+/** \brief Compute the (9,9)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade9(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {17643225600.L, 8821612800.L, 2075673600.L, 302702400.L, 30270240.L,
+                          2162160.L, 110880.L, 3960.L, 90.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType A8 = A6 * A2;
+  const MatrixType tmp = b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (13,13)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade13(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {64764752532480000.L, 32382376266240000.L, 7771770303897600.L,
+                          1187353796428800.L, 129060195264000.L, 10559470521600.L, 670442572800.L,
+                          33522128640.L, 1323241920.L, 40840800.L, 960960.L, 16380.L, 182.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  V = b[13] * A6 + b[11] * A4 + b[9] * A2; // used for temporary storage
+  MatrixType tmp = A6 * V;
+  tmp += b[7] * A6 + b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  tmp = b[12] * A6 + b[10] * A4 + b[8] * A2;
+  V.noalias() = A6 * tmp;
+  V += b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (17,17)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ *
+ *  This function activates only if your long double is double-double or quadruple.
+ */
+#if LDBL_MANT_DIG > 64
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade17(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {830034394580628357120000.L, 415017197290314178560000.L,
+                          100610229646136770560000.L, 15720348382208870400000.L,
+                          1774878043152614400000.L, 153822763739893248000.L, 10608466464820224000.L,
+                          595373117923584000.L, 27563570274240000.L, 1060137318240000.L,
+                          33924394183680.L, 899510451840.L, 19554575040.L, 341863200.L, 4651200.L,
+                          46512.L, 306.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType A8 = A4 * A4;
+  V = b[17] * A8 + b[15] * A6 + b[13] * A4 + b[11] * A2; // used for temporary storage
+  MatrixType tmp = A8 * V;
+  tmp += b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  tmp = b[16] * A8 + b[14] * A6 + b[12] * A4 + b[10] * A2;
+  V.noalias() = tmp * A8;
+  V += b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 
+    + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+#endif
+
+template <typename MatrixType, typename RealScalar = typename NumTraits<typename traits<MatrixType>::Scalar>::Real>
+struct matrix_exp_computeUV
+{
+  /** \brief Compute Pad&eacute; approximant to the exponential.
+    *
+    * Computes \c U, \c V and \c squarings such that \f$ (V+U)(V-U)^{-1} \f$ is a Pad&eacute;
+    * approximant of \f$ \exp(2^{-\mbox{squarings}}M) \f$ around \f$ M = 0 \f$, where \f$ M \f$
+    * denotes the matrix \c arg. The degree of the Pad&eacute; approximant and the value of squarings
+    * are chosen such that the approximation error is no more than the round-off error.
+    */
+  static void run(const MatrixType& arg, MatrixType& U, MatrixType& V, int& squarings);
+};
+
+template <typename MatrixType>
+struct matrix_exp_computeUV<MatrixType, float>
+{
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+    using std::frexp;
+    using std::pow;
+    const float l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+    if (l1norm < 4.258730016922831e-001f) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 1.880152677804762e+000f) {
+      matrix_exp_pade5(arg, U, V);
+    } else {
+      const float maxnorm = 3.925724783138660f;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings));
+      matrix_exp_pade7(A, U, V);
+    }
+  }
+};
+
+template <typename MatrixType>
+struct matrix_exp_computeUV<MatrixType, double>
+{
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+    using std::frexp;
+    using std::pow;
+    const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+    if (l1norm < 1.495585217958292e-002) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 2.539398330063230e-001) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 9.504178996162932e-001) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.097847961257068e+000) {
+      matrix_exp_pade9(arg, U, V);
+    } else {
+      const RealScalar maxnorm = 5.371920351148152;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings));
+      matrix_exp_pade13(A, U, V);
+    }
+  }
+};
+  
+template <typename MatrixType>
+struct matrix_exp_computeUV<MatrixType, long double>
+{
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+#if   LDBL_MANT_DIG == 53   // double precision
+    matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings);
+  
+#else
+  
+    using std::frexp;
+    using std::pow;
+    const long double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+  
+#if LDBL_MANT_DIG <= 64   // extended precision
+  
+    if (l1norm < 4.1968497232266989671e-003L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 1.1848116734693823091e-001L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 5.5170388480686700274e-001L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 1.3759868875587845383e+000L) {
+      matrix_exp_pade9(arg, U, V);
+    } else {
+      const long double maxnorm = 4.0246098906697353063L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade13(A, U, V);
+    }
+  
+#elif LDBL_MANT_DIG <= 106  // double-double
+  
+    if (l1norm < 3.2787892205607026992947488108213e-005L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 6.4467025060072760084130906076332e-003L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 6.8988028496595374751374122881143e-002L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.7339737518502231741495857201670e-001L) {
+      matrix_exp_pade9(arg, U, V);
+    } else if (l1norm < 1.3203382096514474905666448850278e+000L) {
+      matrix_exp_pade13(arg, U, V);
+    } else {
+      const long double maxnorm = 3.2579440895405400856599663723517L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade17(A, U, V);
+    }
+  
+#elif LDBL_MANT_DIG <= 112  // quadruple precison
+  
+    if (l1norm < 1.639394610288918690547467954466970e-005L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 4.253237712165275566025884344433009e-003L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 5.125804063165764409885122032933142e-002L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.170000765161155195453205651889853e-001L) {
+      matrix_exp_pade9(arg, U, V);
+    } else if (l1norm < 1.125358383453143065081397882891878e+000L) {
+      matrix_exp_pade13(arg, U, V);
+    } else {
+      const long double maxnorm = 2.884233277829519311757165057717815L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade17(A, U, V);
+    }
+  
+#else
+  
+    // this case should be handled in compute()
+    eigen_assert(false && "Bug in MatrixExponential"); 
+  
+#endif
+#endif  // LDBL_MANT_DIG
+  }
+};
+
+template<typename T> struct is_exp_known_type : false_type {};
+template<> struct is_exp_known_type<float> : true_type {};
+template<> struct is_exp_known_type<double> : true_type {};
+#if LDBL_MANT_DIG <= 112
+template<> struct is_exp_known_type<long double> : true_type {};
+#endif
+
+template <typename ArgType, typename ResultType>
+void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type
+{
+  typedef typename ArgType::PlainObject MatrixType;
+  MatrixType U, V;
+  int squarings;
+  matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V)
+  MatrixType numer = U + V;
+  MatrixType denom = -U + V;
+  result = denom.partialPivLu().solve(numer);
+  for (int i=0; i<squarings; i++)
+    result *= result;   // undo scaling by repeated squaring
+}
+
+
+/* Computes the matrix exponential
+ *
+ * \param arg    argument of matrix exponential (should be plain object)
+ * \param result variable in which result will be stored
+ */
+template <typename ArgType, typename ResultType>
+void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default
+{
+  typedef typename ArgType::PlainObject MatrixType;
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename std::complex<RealScalar> ComplexScalar;
+  result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
+}
+
+} // end namespace Eigen::internal
+
+/** \ingroup MatrixFunctions_Module
+  *
+  * \brief Proxy for the matrix exponential of some matrix (expression).
+  *
+  * \tparam Derived  Type of the argument to the matrix exponential.
+  *
+  * This class holds the argument to the matrix exponential until it is assigned or evaluated for
+  * some other reason (so the argument should not be changed in the meantime). It is the return type
+  * of MatrixBase::exp() and most of the time this is the only way it is used.
+  */
+template<typename Derived> struct MatrixExponentialReturnValue
+: public ReturnByValue<MatrixExponentialReturnValue<Derived> >
+{
+    typedef typename Derived::Index Index;
+  public:
+    /** \brief Constructor.
+      *
+      * \param src %Matrix (expression) forming the argument of the matrix exponential.
+      */
+    MatrixExponentialReturnValue(const Derived& src) : m_src(src) { }
+
+    /** \brief Compute the matrix exponential.
+      *
+      * \param result the matrix exponential of \p src in the constructor.
+      */
+    template <typename ResultType>
+    inline void evalTo(ResultType& result) const
+    {
+      const typename internal::nested_eval<Derived, 10>::type tmp(m_src);
+      internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type<typename Derived::RealScalar>());
+    }
+
+    Index rows() const { return m_src.rows(); }
+    Index cols() const { return m_src.cols(); }
+
+  protected:
+    const typename internal::ref_selector<Derived>::type m_src;
+};
+
+namespace internal {
+template<typename Derived>
+struct traits<MatrixExponentialReturnValue<Derived> >
+{
+  typedef typename Derived::PlainObject ReturnType;
+};
+}
+
+template <typename Derived>
+const MatrixExponentialReturnValue<Derived> MatrixBase<Derived>::exp() const
+{
+  eigen_assert(rows() == cols());
+  return MatrixExponentialReturnValue<Derived>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_EXPONENTIAL
diff --git a/third-party/Eigen/src/MatrixFunctions/MatrixFunction.h b/third-party/Eigen/src/MatrixFunctions/MatrixFunction.h
new file mode 100644
index 00000000..3df82394
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -0,0 +1,580 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_FUNCTION_H
+#define EIGEN_MATRIX_FUNCTION_H
+
+#include "StemFunction.h"
+
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \brief Maximum distance allowed between eigenvalues to be considered "close". */
+static const float matrix_function_separation = 0.1f;
+
+/** \ingroup MatrixFunctions_Module
+  * \class MatrixFunctionAtomic
+  * \brief Helper class for computing matrix functions of atomic matrices.
+  *
+  * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other.
+  */
+template <typename MatrixType>
+class MatrixFunctionAtomic 
+{
+  public:
+
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename stem_function<Scalar>::type StemFunction;
+
+    /** \brief Constructor
+      * \param[in]  f  matrix function to compute.
+      */
+    MatrixFunctionAtomic(StemFunction f) : m_f(f) { }
+
+    /** \brief Compute matrix function of atomic matrix
+      * \param[in]  A  argument of matrix function, should be upper triangular and atomic
+      * \returns  f(A), the matrix function evaluated at the given matrix
+      */
+    MatrixType compute(const MatrixType& A);
+
+  private:
+    StemFunction* m_f;
+};
+
+template <typename MatrixType>
+typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A)
+{
+  typedef typename plain_col_type<MatrixType>::type VectorType;
+  typename MatrixType::Index rows = A.rows();
+  const MatrixType N = MatrixType::Identity(rows, rows) - A;
+  VectorType e = VectorType::Ones(rows);
+  N.template triangularView<Upper>().solveInPlace(e);
+  return e.cwiseAbs().maxCoeff();
+}
+
+template <typename MatrixType>
+MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
+{
+  // TODO: Use that A is upper triangular
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename MatrixType::Index Index;
+  Index rows = A.rows();
+  Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
+  MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
+  RealScalar mu = matrix_function_compute_mu(Ashifted);
+  MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows);
+  MatrixType P = Ashifted;
+  MatrixType Fincr;
+  for (Index s = 1; s < 1.1 * rows + 10; s++) { // upper limit is fairly arbitrary
+    Fincr = m_f(avgEival, static_cast<int>(s)) * P;
+    F += Fincr;
+    P = Scalar(RealScalar(1.0/(s + 1))) * P * Ashifted;
+
+    // test whether Taylor series converged
+    const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff();
+    const RealScalar Fincr_norm = Fincr.cwiseAbs().rowwise().sum().maxCoeff();
+    if (Fincr_norm < NumTraits<Scalar>::epsilon() * F_norm) {
+      RealScalar delta = 0;
+      RealScalar rfactorial = 1;
+      for (Index r = 0; r < rows; r++) {
+        RealScalar mx = 0;
+        for (Index i = 0; i < rows; i++)
+          mx = (std::max)(mx, std::abs(m_f(Ashifted(i, i) + avgEival, static_cast<int>(s+r))));
+        if (r != 0)
+          rfactorial *= RealScalar(r);
+        delta = (std::max)(delta, mx / rfactorial);
+      }
+      const RealScalar P_norm = P.cwiseAbs().rowwise().sum().maxCoeff();
+      if (mu * delta * P_norm < NumTraits<Scalar>::epsilon() * F_norm) // series converged
+        break;
+    }
+  }
+  return F;
+}
+
+/** \brief Find cluster in \p clusters containing some value 
+  * \param[in] key Value to find
+  * \returns Iterator to cluster containing \p key, or \c clusters.end() if no cluster in \p m_clusters
+  * contains \p key.
+  */
+template <typename Index, typename ListOfClusters>
+typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOfClusters& clusters)
+{
+  typename std::list<Index>::iterator j;
+  for (typename ListOfClusters::iterator i = clusters.begin(); i != clusters.end(); ++i) {
+    j = std::find(i->begin(), i->end(), key);
+    if (j != i->end())
+      return i;
+  }
+  return clusters.end();
+}
+
+/** \brief Partition eigenvalues in clusters of ei'vals close to each other
+  * 
+  * \param[in]  eivals    Eigenvalues
+  * \param[out] clusters  Resulting partition of eigenvalues
+  *
+  * The partition satisfies the following two properties:
+  * # Any eigenvalue in a certain cluster is at most matrix_function_separation() away from another eigenvalue
+  *   in the same cluster.
+  * # The distance between two eigenvalues in different clusters is more than matrix_function_separation().  
+  * The implementation follows Algorithm 4.1 in the paper of Davies and Higham.
+  */
+template <typename EivalsType, typename Cluster>
+void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters)
+{
+  typedef typename EivalsType::Index Index;
+  typedef typename EivalsType::RealScalar RealScalar;
+  for (Index i=0; i<eivals.rows(); ++i) {
+    // Find cluster containing i-th ei'val, adding a new cluster if necessary
+    typename std::list<Cluster>::iterator qi = matrix_function_find_cluster(i, clusters);
+    if (qi == clusters.end()) {
+      Cluster l;
+      l.push_back(i);
+      clusters.push_back(l);
+      qi = clusters.end();
+      --qi;
+    }
+
+    // Look for other element to add to the set
+    for (Index j=i+1; j<eivals.rows(); ++j) {
+      if (abs(eivals(j) - eivals(i)) <= RealScalar(matrix_function_separation)
+          && std::find(qi->begin(), qi->end(), j) == qi->end()) {
+        typename std::list<Cluster>::iterator qj = matrix_function_find_cluster(j, clusters);
+        if (qj == clusters.end()) {
+          qi->push_back(j);
+        } else {
+          qi->insert(qi->end(), qj->begin(), qj->end());
+          clusters.erase(qj);
+        }
+      }
+    }
+  }
+}
+
+/** \brief Compute size of each cluster given a partitioning */
+template <typename ListOfClusters, typename Index>
+void matrix_function_compute_cluster_size(const ListOfClusters& clusters, Matrix<Index, Dynamic, 1>& clusterSize)
+{
+  const Index numClusters = static_cast<Index>(clusters.size());
+  clusterSize.setZero(numClusters);
+  Index clusterIndex = 0;
+  for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
+    clusterSize[clusterIndex] = cluster->size();
+    ++clusterIndex;
+  }
+}
+
+/** \brief Compute start of each block using clusterSize */
+template <typename VectorType>
+void matrix_function_compute_block_start(const VectorType& clusterSize, VectorType& blockStart)
+{
+  blockStart.resize(clusterSize.rows());
+  blockStart(0) = 0;
+  for (typename VectorType::Index i = 1; i < clusterSize.rows(); i++) {
+    blockStart(i) = blockStart(i-1) + clusterSize(i-1);
+  }
+}
+
+/** \brief Compute mapping of eigenvalue indices to cluster indices */
+template <typename EivalsType, typename ListOfClusters, typename VectorType>
+void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster)
+{
+  typedef typename EivalsType::Index Index;
+  eivalToCluster.resize(eivals.rows());
+  Index clusterIndex = 0;
+  for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
+    for (Index i = 0; i < eivals.rows(); ++i) {
+      if (std::find(cluster->begin(), cluster->end(), i) != cluster->end()) {
+        eivalToCluster[i] = clusterIndex;
+      }
+    }
+    ++clusterIndex;
+  }
+}
+
+/** \brief Compute permutation which groups ei'vals in same cluster together */
+template <typename DynVectorType, typename VectorType>
+void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation)
+{
+  typedef typename VectorType::Index Index;
+  DynVectorType indexNextEntry = blockStart;
+  permutation.resize(eivalToCluster.rows());
+  for (Index i = 0; i < eivalToCluster.rows(); i++) {
+    Index cluster = eivalToCluster[i];
+    permutation[i] = indexNextEntry[cluster];
+    ++indexNextEntry[cluster];
+  }
+}  
+
+/** \brief Permute Schur decomposition in U and T according to permutation */
+template <typename VectorType, typename MatrixType>
+void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T)
+{
+  typedef typename VectorType::Index Index;
+  for (Index i = 0; i < permutation.rows() - 1; i++) {
+    Index j;
+    for (j = i; j < permutation.rows(); j++) {
+      if (permutation(j) == i) break;
+    }
+    eigen_assert(permutation(j) == i);
+    for (Index k = j-1; k >= i; k--) {
+      JacobiRotation<typename MatrixType::Scalar> rotation;
+      rotation.makeGivens(T(k, k+1), T(k+1, k+1) - T(k, k));
+      T.applyOnTheLeft(k, k+1, rotation.adjoint());
+      T.applyOnTheRight(k, k+1, rotation);
+      U.applyOnTheRight(k, k+1, rotation);
+      std::swap(permutation.coeffRef(k), permutation.coeffRef(k+1));
+    }
+  }
+}
+
+/** \brief Compute block diagonal part of matrix function.
+  *
+  * This routine computes the matrix function applied to the block diagonal part of \p T (which should be
+  * upper triangular), with the blocking given by \p blockStart and \p clusterSize. The matrix function of
+  * each diagonal block is computed by \p atomic. The off-diagonal parts of \p fT are set to zero.
+  */
+template <typename MatrixType, typename AtomicType, typename VectorType>
+void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
+{ 
+  fT.setZero(T.rows(), T.cols());
+  for (typename VectorType::Index i = 0; i < clusterSize.rows(); ++i) {
+    fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
+      = atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)));
+  }
+}
+
+/** \brief Solve a triangular Sylvester equation AX + XB = C 
+  *
+  * \param[in]  A  the matrix A; should be square and upper triangular
+  * \param[in]  B  the matrix B; should be square and upper triangular
+  * \param[in]  C  the matrix C; should have correct size.
+  *
+  * \returns the solution X.
+  *
+  * If A is m-by-m and B is n-by-n, then both C and X are m-by-n.  The (i,j)-th component of the Sylvester
+  * equation is
+  * \f[ 
+  *     \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. 
+  * \f]
+  * This can be re-arranged to yield:
+  * \f[ 
+  *     X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij}
+  *     - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr).
+  * \f]
+  * It is assumed that A and B are such that the numerator is never zero (otherwise the Sylvester equation
+  * does not have a unique solution). In that case, these equations can be evaluated in the order 
+  * \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$.
+  */
+template <typename MatrixType>
+MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C)
+{
+  eigen_assert(A.rows() == A.cols());
+  eigen_assert(A.isUpperTriangular());
+  eigen_assert(B.rows() == B.cols());
+  eigen_assert(B.isUpperTriangular());
+  eigen_assert(C.rows() == A.rows());
+  eigen_assert(C.cols() == B.rows());
+
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::Scalar Scalar;
+
+  Index m = A.rows();
+  Index n = B.rows();
+  MatrixType X(m, n);
+
+  for (Index i = m - 1; i >= 0; --i) {
+    for (Index j = 0; j < n; ++j) {
+
+      // Compute AX = \sum_{k=i+1}^m A_{ik} X_{kj}
+      Scalar AX;
+      if (i == m - 1) {
+	AX = 0; 
+      } else {
+	Matrix<Scalar,1,1> AXmatrix = A.row(i).tail(m-1-i) * X.col(j).tail(m-1-i);
+	AX = AXmatrix(0,0);
+      }
+
+      // Compute XB = \sum_{k=1}^{j-1} X_{ik} B_{kj}
+      Scalar XB;
+      if (j == 0) {
+	XB = 0; 
+      } else {
+	Matrix<Scalar,1,1> XBmatrix = X.row(i).head(j) * B.col(j).head(j);
+	XB = XBmatrix(0,0);
+      }
+
+      X(i,j) = (C(i,j) - AX - XB) / (A(i,i) + B(j,j));
+    }
+  }
+  return X;
+}
+
+/** \brief Compute part of matrix function above block diagonal.
+  *
+  * This routine completes the computation of \p fT, denoting a matrix function applied to the triangular
+  * matrix \p T. It assumes that the block diagonal part of \p fT has already been computed. The part below
+  * the diagonal is zero, because \p T is upper triangular.
+  */
+template <typename MatrixType, typename VectorType>
+void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
+{ 
+  typedef internal::traits<MatrixType> Traits;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
+  static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
+  static const int Options = MatrixType::Options;
+  typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+
+  for (Index k = 1; k < clusterSize.rows(); k++) {
+    for (Index i = 0; i < clusterSize.rows() - k; i++) {
+      // compute (i, i+k) block
+      DynMatrixType A = T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i));
+      DynMatrixType B = -T.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k));
+      DynMatrixType C = fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
+        * T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k));
+      C -= T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k))
+        * fT.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k));
+      for (Index m = i + 1; m < i + k; m++) {
+        C += fT.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m))
+          * T.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k));
+        C -= T.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m))
+          * fT.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k));
+      }
+      fT.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k))
+        = matrix_function_solve_triangular_sylvester(A, B, C);
+    }
+  }
+}
+
+/** \ingroup MatrixFunctions_Module
+  * \brief Class for computing matrix functions.
+  * \tparam  MatrixType  type of the argument of the matrix function,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  AtomicType  type for computing matrix function of atomic blocks.
+  * \tparam  IsComplex   used internally to select correct specialization.
+  *
+  * This class implements the Schur-Parlett algorithm for computing matrix functions. The spectrum of the
+  * matrix is divided in clustered of eigenvalues that lies close together. This class delegates the
+  * computation of the matrix function on every block corresponding to these clusters to an object of type
+  * \p AtomicType and uses these results to compute the matrix function of the whole matrix. The class
+  * \p AtomicType should have a \p compute() member function for computing the matrix function of a block.
+  *
+  * \sa class MatrixFunctionAtomic, class MatrixLogarithmAtomic
+  */
+template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
+struct matrix_function_compute
+{  
+    /** \brief Compute the matrix function.
+      *
+      * \param[in]  A       argument of matrix function, should be a square matrix.
+      * \param[in]  atomic  class for computing matrix function of atomic blocks.
+      * \param[out] result  the function \p f applied to \p A, as
+      * specified in the constructor.
+      *
+      * See MatrixBase::matrixFunction() for details on how this computation
+      * is implemented.
+      */
+    template <typename AtomicType, typename ResultType> 
+    static void run(const MatrixType& A, AtomicType& atomic, ResultType &result);    
+};
+
+/** \internal \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for real matrices
+  *
+  * This converts the real matrix to a complex matrix, compute the matrix function of that matrix, and then
+  * converts the result back to a real matrix.
+  */
+template <typename MatrixType>
+struct matrix_function_compute<MatrixType, 0>
+{  
+  template <typename MatA, typename AtomicType, typename ResultType>
+  static void run(const MatA& A, AtomicType& atomic, ResultType &result)
+  {
+    typedef internal::traits<MatrixType> Traits;
+    typedef typename Traits::Scalar Scalar;
+    static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime;
+    static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime;
+
+    typedef std::complex<Scalar> ComplexScalar;
+    typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix;
+
+    ComplexMatrix CA = A.template cast<ComplexScalar>();
+    ComplexMatrix Cresult;
+    matrix_function_compute<ComplexMatrix>::run(CA, atomic, Cresult);
+    result = Cresult.real();
+  }
+};
+
+/** \internal \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for complex matrices
+  */
+template <typename MatrixType>
+struct matrix_function_compute<MatrixType, 1>
+{
+  template <typename MatA, typename AtomicType, typename ResultType>
+  static void run(const MatA& A, AtomicType& atomic, ResultType &result)
+  {
+    typedef internal::traits<MatrixType> Traits;
+    
+    // compute Schur decomposition of A
+    const ComplexSchur<MatrixType> schurOfA(A);  
+    MatrixType T = schurOfA.matrixT();
+    MatrixType U = schurOfA.matrixU();
+
+    // partition eigenvalues into clusters of ei'vals "close" to each other
+    std::list<std::list<Index> > clusters; 
+    matrix_function_partition_eigenvalues(T.diagonal(), clusters);
+
+    // compute size of each cluster
+    Matrix<Index, Dynamic, 1> clusterSize;
+    matrix_function_compute_cluster_size(clusters, clusterSize);
+
+    // blockStart[i] is row index at which block corresponding to i-th cluster starts 
+    Matrix<Index, Dynamic, 1> blockStart; 
+    matrix_function_compute_block_start(clusterSize, blockStart);
+
+    // compute map so that eivalToCluster[i] = j means that i-th ei'val is in j-th cluster 
+    Matrix<Index, Dynamic, 1> eivalToCluster;
+    matrix_function_compute_map(T.diagonal(), clusters, eivalToCluster);
+
+    // compute permutation which groups ei'vals in same cluster together 
+    Matrix<Index, Traits::RowsAtCompileTime, 1> permutation;
+    matrix_function_compute_permutation(blockStart, eivalToCluster, permutation);
+
+    // permute Schur decomposition
+    matrix_function_permute_schur(permutation, U, T);
+
+    // compute result
+    MatrixType fT; // matrix function applied to T
+    matrix_function_compute_block_atomic(T, atomic, blockStart, clusterSize, fT);
+    matrix_function_compute_above_diagonal(T, blockStart, clusterSize, fT);
+    result = U * (fT.template triangularView<Upper>() * U.adjoint());
+  }
+};
+
+} // end of namespace internal
+
+/** \ingroup MatrixFunctions_Module
+  *
+  * \brief Proxy for the matrix function of some matrix (expression).
+  *
+  * \tparam Derived  Type of the argument to the matrix function.
+  *
+  * This class holds the argument to the matrix function until it is assigned or evaluated for some other
+  * reason (so the argument should not be changed in the meantime). It is the return type of
+  * matrixBase::matrixFunction() and related functions and most of the time this is the only way it is used.
+  */
+template<typename Derived> class MatrixFunctionReturnValue
+: public ReturnByValue<MatrixFunctionReturnValue<Derived> >
+{
+  public:
+    typedef typename Derived::Scalar Scalar;
+    typedef typename Derived::Index Index;
+    typedef typename internal::stem_function<Scalar>::type StemFunction;
+
+  protected:
+    typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
+  public:
+
+    /** \brief Constructor.
+      *
+      * \param[in] A  %Matrix (expression) forming the argument of the matrix function.
+      * \param[in] f  Stem function for matrix function under consideration.
+      */
+    MatrixFunctionReturnValue(const Derived& A, StemFunction f) : m_A(A), m_f(f) { }
+
+    /** \brief Compute the matrix function.
+      *
+      * \param[out] result \p f applied to \p A, where \p f and \p A are as in the constructor.
+      */
+    template <typename ResultType>
+    inline void evalTo(ResultType& result) const
+    {
+      typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
+      typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
+      typedef internal::traits<NestedEvalTypeClean> Traits;
+      static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
+      static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
+      typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+      typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+
+      typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
+      AtomicType atomic(m_f);
+
+      internal::matrix_function_compute<typename NestedEvalTypeClean::PlainObject>::run(m_A, atomic, result);
+    }
+
+    Index rows() const { return m_A.rows(); }
+    Index cols() const { return m_A.cols(); }
+
+  private:
+    const DerivedNested m_A;
+    StemFunction *m_f;
+};
+
+namespace internal {
+template<typename Derived>
+struct traits<MatrixFunctionReturnValue<Derived> >
+{
+  typedef typename Derived::PlainObject ReturnType;
+};
+}
+
+
+/********** MatrixBase methods **********/
+
+
+template <typename Derived>
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::matrixFunction(typename internal::stem_function<typename internal::traits<Derived>::Scalar>::type f) const
+{
+  eigen_assert(rows() == cols());
+  return MatrixFunctionReturnValue<Derived>(derived(), f);
+}
+
+template <typename Derived>
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sin() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sin<ComplexScalar>);
+}
+
+template <typename Derived>
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cos() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cos<ComplexScalar>);
+}
+
+template <typename Derived>
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::sinh() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sinh<ComplexScalar>);
+}
+
+template <typename Derived>
+const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cosh() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cosh<ComplexScalar>);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_FUNCTION_H
diff --git a/third-party/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/third-party/Eigen/src/MatrixFunctions/MatrixLogarithm.h
new file mode 100644
index 00000000..cf5fffad
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -0,0 +1,373 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2011 Chen-Pang He <jdh8@ms63.hinet.net>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_LOGARITHM
+#define EIGEN_MATRIX_LOGARITHM
+
+namespace Eigen { 
+
+namespace internal { 
+
+template <typename Scalar>
+struct matrix_log_min_pade_degree 
+{
+  static const int value = 3;
+};
+
+template <typename Scalar>
+struct matrix_log_max_pade_degree 
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static const int value = std::numeric_limits<RealScalar>::digits<= 24?  5:  // single precision
+                           std::numeric_limits<RealScalar>::digits<= 53?  7:  // double precision
+                           std::numeric_limits<RealScalar>::digits<= 64?  8:  // extended precision
+                           std::numeric_limits<RealScalar>::digits<=106? 10:  // double-double
+                                                                         11;  // quadruple precision
+};
+
+/** \brief Compute logarithm of 2x2 triangular matrix. */
+template <typename MatrixType>
+void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  using std::abs;
+  using std::ceil;
+  using std::imag;
+  using std::log;
+
+  Scalar logA00 = log(A(0,0));
+  Scalar logA11 = log(A(1,1));
+
+  result(0,0) = logA00;
+  result(1,0) = Scalar(0);
+  result(1,1) = logA11;
+
+  Scalar y = A(1,1) - A(0,0);
+  if (y==Scalar(0))
+  {
+    result(0,1) = A(0,1) / A(0,0);
+  }
+  else if ((abs(A(0,0)) < RealScalar(0.5)*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1))))
+  {
+    result(0,1) = A(0,1) * (logA11 - logA00) / y;
+  }
+  else
+  {
+    // computation in previous branch is inaccurate if A(1,1) \approx A(0,0)
+    int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)));
+    result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,2*EIGEN_PI*unwindingNumber)) / y;
+  }
+}
+
+/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = float) */
+inline int matrix_log_get_pade_degree(float normTminusI)
+{
+  const float maxNormForPade[] = { 2.5111573934555054e-1 /* degree = 3 */ , 4.0535837411880493e-1,
+            5.3149729967117310e-1 };
+  const int minPadeDegree = matrix_log_min_pade_degree<float>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<float>::value;
+  int degree = minPadeDegree;
+  for (; degree <= maxPadeDegree; ++degree) 
+    if (normTminusI <= maxNormForPade[degree - minPadeDegree])
+      break;
+  return degree;
+}
+
+/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = double) */
+inline int matrix_log_get_pade_degree(double normTminusI)
+{
+  const double maxNormForPade[] = { 1.6206284795015624e-2 /* degree = 3 */ , 5.3873532631381171e-2,
+            1.1352802267628681e-1, 1.8662860613541288e-1, 2.642960831111435e-1 };
+  const int minPadeDegree = matrix_log_min_pade_degree<double>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<double>::value;
+  int degree = minPadeDegree;
+  for (; degree <= maxPadeDegree; ++degree)
+    if (normTminusI <= maxNormForPade[degree - minPadeDegree])
+      break;
+  return degree;
+}
+
+/* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = long double) */
+inline int matrix_log_get_pade_degree(long double normTminusI)
+{
+#if   LDBL_MANT_DIG == 53         // double precision
+  const long double maxNormForPade[] = { 1.6206284795015624e-2L /* degree = 3 */ , 5.3873532631381171e-2L,
+            1.1352802267628681e-1L, 1.8662860613541288e-1L, 2.642960831111435e-1L };
+#elif LDBL_MANT_DIG <= 64         // extended precision
+  const long double maxNormForPade[] = { 5.48256690357782863103e-3L /* degree = 3 */, 2.34559162387971167321e-2L,
+            5.84603923897347449857e-2L, 1.08486423756725170223e-1L, 1.68385767881294446649e-1L,
+            2.32777776523703892094e-1L };
+#elif LDBL_MANT_DIG <= 106        // double-double
+  const long double maxNormForPade[] = { 8.58970550342939562202529664318890e-5L /* degree = 3 */,
+            9.34074328446359654039446552677759e-4L, 4.26117194647672175773064114582860e-3L,
+            1.21546224740281848743149666560464e-2L, 2.61100544998339436713088248557444e-2L,
+            4.66170074627052749243018566390567e-2L, 7.32585144444135027565872014932387e-2L,
+            1.05026503471351080481093652651105e-1L };
+#else                             // quadruple precision
+  const long double maxNormForPade[] = { 4.7419931187193005048501568167858103e-5L /* degree = 3 */,
+            5.8853168473544560470387769480192666e-4L, 2.9216120366601315391789493628113520e-3L,
+            8.8415758124319434347116734705174308e-3L, 1.9850836029449446668518049562565291e-2L,
+            3.6688019729653446926585242192447447e-2L, 5.9290962294020186998954055264528393e-2L,
+            8.6998436081634343903250580992127677e-2L, 1.1880960220216759245467951592883642e-1L };
+#endif
+  const int minPadeDegree = matrix_log_min_pade_degree<long double>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<long double>::value;
+  int degree = minPadeDegree;
+  for (; degree <= maxPadeDegree; ++degree)
+    if (normTminusI <= maxNormForPade[degree - minPadeDegree])
+      break;
+  return degree;
+}
+
+/* \brief Compute Pade approximation to matrix logarithm */
+template <typename MatrixType>
+void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree)
+{
+  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  const int minPadeDegree = 3;
+  const int maxPadeDegree = 11;
+  assert(degree >= minPadeDegree && degree <= maxPadeDegree);
+
+  const RealScalar nodes[][maxPadeDegree] = { 
+    { 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L,  // degree 3
+      0.8872983346207416885179265399782400L }, 
+    { 0.0694318442029737123880267555535953L, 0.3300094782075718675986671204483777L,  // degree 4
+      0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L },
+    { 0.0469100770306680036011865608503035L, 0.2307653449471584544818427896498956L,  // degree 5
+      0.5000000000000000000000000000000000L, 0.7692346550528415455181572103501044L,
+      0.9530899229693319963988134391496965L },
+    { 0.0337652428984239860938492227530027L, 0.1693953067668677431693002024900473L,  // degree 6
+      0.3806904069584015456847491391596440L, 0.6193095930415984543152508608403560L,
+      0.8306046932331322568306997975099527L, 0.9662347571015760139061507772469973L },
+    { 0.0254460438286207377369051579760744L, 0.1292344072003027800680676133596058L,  // degree 7
+      0.2970774243113014165466967939615193L, 0.5000000000000000000000000000000000L,
+      0.7029225756886985834533032060384807L, 0.8707655927996972199319323866403942L,
+      0.9745539561713792622630948420239256L },
+    { 0.0198550717512318841582195657152635L, 0.1016667612931866302042230317620848L,  // degree 8
+      0.2372337950418355070911304754053768L, 0.4082826787521750975302619288199080L,
+      0.5917173212478249024697380711800920L, 0.7627662049581644929088695245946232L,
+      0.8983332387068133697957769682379152L, 0.9801449282487681158417804342847365L },
+    { 0.0159198802461869550822118985481636L, 0.0819844463366821028502851059651326L,  // degree 9
+      0.1933142836497048013456489803292629L, 0.3378732882980955354807309926783317L,
+      0.5000000000000000000000000000000000L, 0.6621267117019044645192690073216683L,
+      0.8066857163502951986543510196707371L, 0.9180155536633178971497148940348674L,
+      0.9840801197538130449177881014518364L },
+    { 0.0130467357414141399610179939577740L, 0.0674683166555077446339516557882535L,  // degree 10
+      0.1602952158504877968828363174425632L, 0.2833023029353764046003670284171079L,
+      0.4255628305091843945575869994351400L, 0.5744371694908156054424130005648600L,
+      0.7166976970646235953996329715828921L, 0.8397047841495122031171636825574368L,
+      0.9325316833444922553660483442117465L, 0.9869532642585858600389820060422260L },
+    { 0.0108856709269715035980309994385713L, 0.0564687001159523504624211153480364L,  // degree 11
+      0.1349239972129753379532918739844233L, 0.2404519353965940920371371652706952L,
+      0.3652284220238275138342340072995692L, 0.5000000000000000000000000000000000L,
+      0.6347715779761724861657659927004308L, 0.7595480646034059079628628347293048L,
+      0.8650760027870246620467081260155767L, 0.9435312998840476495375788846519636L,
+      0.9891143290730284964019690005614287L } };
+
+  const RealScalar weights[][maxPadeDegree] = { 
+    { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L,  // degree 3
+      0.2777777777777777777777777777777778L },
+    { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L,  // degree 4
+      0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L },
+    { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L,  // degree 5
+      0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L,
+      0.1184634425280945437571320203599587L },
+    { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L,  // degree 6
+      0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L,
+      0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L },
+    { 0.0647424830844348466353057163395410L, 0.1398526957446383339507338857118898L,  // degree 7
+      0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L,
+      0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L,
+      0.0647424830844348466353057163395410L },
+    { 0.0506142681451881295762656771549811L, 0.1111905172266872352721779972131204L,  // degree 8
+      0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L,
+      0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L,
+      0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L },
+    { 0.0406371941807872059859460790552618L, 0.0903240803474287020292360156214564L,  // degree 9
+      0.1303053482014677311593714347093164L, 0.1561735385200014200343152032922218L,
+      0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L,
+      0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L,
+      0.0406371941807872059859460790552618L },
+    { 0.0333356721543440687967844049466659L, 0.0747256745752902965728881698288487L,  // degree 10
+      0.1095431812579910219977674671140816L, 0.1346333596549981775456134607847347L,
+      0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L,
+      0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L,
+      0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L },
+    { 0.0278342835580868332413768602212743L, 0.0627901847324523123173471496119701L,  // degree 11
+      0.0931451054638671257130488207158280L, 0.1165968822959952399592618524215876L,
+      0.1314022722551233310903444349452546L, 0.1364625433889503153572417641681711L,
+      0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L,
+      0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L,
+      0.0278342835580868332413768602212743L } };
+
+  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
+  result.setZero(T.rows(), T.rows());
+  for (int k = 0; k < degree; ++k) {
+    RealScalar weight = weights[degree-minPadeDegree][k];
+    RealScalar node = nodes[degree-minPadeDegree][k];
+    result += weight * (MatrixType::Identity(T.rows(), T.rows()) + node * TminusI)
+                       .template triangularView<Upper>().solve(TminusI);
+  }
+} 
+
+/** \brief Compute logarithm of triangular matrices with size > 2. 
+  * \details This uses a inverse scale-and-square algorithm. */
+template <typename MatrixType>
+void matrix_log_compute_big(const MatrixType& A, MatrixType& result)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  using std::pow;
+
+  int numberOfSquareRoots = 0;
+  int numberOfExtraSquareRoots = 0;
+  int degree;
+  MatrixType T = A, sqrtT;
+
+  int maxPadeDegree = matrix_log_max_pade_degree<Scalar>::value;
+  const RealScalar maxNormForPade = maxPadeDegree<= 5? 5.3149729967117310e-1L:                    // single precision
+                                    maxPadeDegree<= 7? 2.6429608311114350e-1L:                    // double precision
+                                    maxPadeDegree<= 8? 2.32777776523703892094e-1L:                // extended precision
+                                    maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L:    // double-double
+                                                       1.1880960220216759245467951592883642e-1L;  // quadruple precision
+
+  while (true) {
+    RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff();
+    if (normTminusI < maxNormForPade) {
+      degree = matrix_log_get_pade_degree(normTminusI);
+      int degree2 = matrix_log_get_pade_degree(normTminusI / RealScalar(2));
+      if ((degree - degree2 <= 1) || (numberOfExtraSquareRoots == 1)) 
+        break;
+      ++numberOfExtraSquareRoots;
+    }
+    matrix_sqrt_triangular(T, sqrtT);
+    T = sqrtT.template triangularView<Upper>();
+    ++numberOfSquareRoots;
+  }
+
+  matrix_log_compute_pade(result, T, degree);
+  result *= pow(RealScalar(2), numberOfSquareRoots);
+}
+
+/** \ingroup MatrixFunctions_Module
+  * \class MatrixLogarithmAtomic
+  * \brief Helper class for computing matrix logarithm of atomic matrices.
+  *
+  * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other.
+  *
+  * \sa class MatrixFunctionAtomic, MatrixBase::log()
+  */
+template <typename MatrixType>
+class MatrixLogarithmAtomic
+{
+public:
+  /** \brief Compute matrix logarithm of atomic matrix
+    * \param[in]  A  argument of matrix logarithm, should be upper triangular and atomic
+    * \returns  The logarithm of \p A.
+    */
+  MatrixType compute(const MatrixType& A);
+};
+
+template <typename MatrixType>
+MatrixType MatrixLogarithmAtomic<MatrixType>::compute(const MatrixType& A)
+{
+  using std::log;
+  MatrixType result(A.rows(), A.rows());
+  if (A.rows() == 1)
+    result(0,0) = log(A(0,0));
+  else if (A.rows() == 2)
+    matrix_log_compute_2x2(A, result);
+  else
+    matrix_log_compute_big(A, result);
+  return result;
+}
+
+} // end of namespace internal
+
+/** \ingroup MatrixFunctions_Module
+  *
+  * \brief Proxy for the matrix logarithm of some matrix (expression).
+  *
+  * \tparam Derived  Type of the argument to the matrix function.
+  *
+  * This class holds the argument to the matrix function until it is
+  * assigned or evaluated for some other reason (so the argument
+  * should not be changed in the meantime). It is the return type of
+  * MatrixBase::log() and most of the time this is the only way it
+  * is used.
+  */
+template<typename Derived> class MatrixLogarithmReturnValue
+: public ReturnByValue<MatrixLogarithmReturnValue<Derived> >
+{
+public:
+  typedef typename Derived::Scalar Scalar;
+  typedef typename Derived::Index Index;
+
+protected:
+  typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
+public:
+
+  /** \brief Constructor.
+    *
+    * \param[in]  A  %Matrix (expression) forming the argument of the matrix logarithm.
+    */
+  explicit MatrixLogarithmReturnValue(const Derived& A) : m_A(A) { }
+  
+  /** \brief Compute the matrix logarithm.
+    *
+    * \param[out]  result  Logarithm of \c A, where \c A is as specified in the constructor.
+    */
+  template <typename ResultType>
+  inline void evalTo(ResultType& result) const
+  {
+    typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+    typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+    typedef internal::traits<DerivedEvalTypeClean> Traits;
+    static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
+    static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
+    typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+    typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+    typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
+    AtomicType atomic;
+    
+    internal::matrix_function_compute<typename DerivedEvalTypeClean::PlainObject>::run(m_A, atomic, result);
+  }
+
+  Index rows() const { return m_A.rows(); }
+  Index cols() const { return m_A.cols(); }
+  
+private:
+  const DerivedNested m_A;
+};
+
+namespace internal {
+  template<typename Derived>
+  struct traits<MatrixLogarithmReturnValue<Derived> >
+  {
+    typedef typename Derived::PlainObject ReturnType;
+  };
+}
+
+
+/********** MatrixBase method **********/
+
+
+template <typename Derived>
+const MatrixLogarithmReturnValue<Derived> MatrixBase<Derived>::log() const
+{
+  eigen_assert(rows() == cols());
+  return MatrixLogarithmReturnValue<Derived>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_LOGARITHM
diff --git a/third-party/Eigen/src/MatrixFunctions/MatrixPower.h b/third-party/Eigen/src/MatrixFunctions/MatrixPower.h
new file mode 100644
index 00000000..a3273da4
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -0,0 +1,709 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2012, 2013 Chen-Pang He <jdh8@ms63.hinet.net>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_POWER
+#define EIGEN_MATRIX_POWER
+
+namespace Eigen {
+
+template<typename MatrixType> class MatrixPower;
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Proxy for the matrix power of some matrix.
+ *
+ * \tparam MatrixType  type of the base, a matrix.
+ *
+ * This class holds the arguments to the matrix power until it is
+ * assigned or evaluated for some other reason (so the argument
+ * should not be changed in the meantime). It is the return type of
+ * MatrixPower::operator() and related functions and most of the
+ * time this is the only way it is used.
+ */
+/* TODO This class is only used by MatrixPower, so it should be nested
+ * into MatrixPower, like MatrixPower::ReturnValue. However, my
+ * compiler complained about unused template parameter in the
+ * following declaration in namespace internal.
+ *
+ * template<typename MatrixType>
+ * struct traits<MatrixPower<MatrixType>::ReturnValue>;
+ */
+template<typename MatrixType>
+class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParenthesesReturnValue<MatrixType> >
+{
+  public:
+    typedef typename MatrixType::RealScalar RealScalar;
+    typedef typename MatrixType::Index Index;
+
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] pow  %MatrixPower storing the base.
+     * \param[in] p    scalar, the exponent of the matrix power.
+     */
+    MatrixPowerParenthesesReturnValue(MatrixPower<MatrixType>& pow, RealScalar p) : m_pow(pow), m_p(p)
+    { }
+
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[out] result
+     */
+    template<typename ResultType>
+    inline void evalTo(ResultType& result) const
+    { m_pow.compute(result, m_p); }
+
+    Index rows() const { return m_pow.rows(); }
+    Index cols() const { return m_pow.cols(); }
+
+  private:
+    MatrixPower<MatrixType>& m_pow;
+    const RealScalar m_p;
+};
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Class for computing matrix powers.
+ *
+ * \tparam MatrixType  type of the base, expected to be an instantiation
+ * of the Matrix class template.
+ *
+ * This class is capable of computing triangular real/complex matrices
+ * raised to a power in the interval \f$ (-1, 1) \f$.
+ *
+ * \note Currently this class is only used by MatrixPower. One may
+ * insist that this be nested into MatrixPower. This class is here to
+ * faciliate future development of triangular matrix functions.
+ */
+template<typename MatrixType>
+class MatrixPowerAtomic : internal::noncopyable
+{
+  private:
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    typedef std::complex<RealScalar> ComplexScalar;
+    typedef typename MatrixType::Index Index;
+    typedef Block<MatrixType,Dynamic,Dynamic> ResultType;
+
+    const MatrixType& m_A;
+    RealScalar m_p;
+
+    void computePade(int degree, const MatrixType& IminusT, ResultType& res) const;
+    void compute2x2(ResultType& res, RealScalar p) const;
+    void computeBig(ResultType& res) const;
+    static int getPadeDegree(float normIminusT);
+    static int getPadeDegree(double normIminusT);
+    static int getPadeDegree(long double normIminusT);
+    static ComplexScalar computeSuperDiag(const ComplexScalar&, const ComplexScalar&, RealScalar p);
+    static RealScalar computeSuperDiag(RealScalar, RealScalar, RealScalar p);
+
+  public:
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] T  the base of the matrix power.
+     * \param[in] p  the exponent of the matrix power, should be in
+     * \f$ (-1, 1) \f$.
+     *
+     * The class stores a reference to T, so it should not be changed
+     * (or destroyed) before evaluation. Only the upper triangular
+     * part of T is read.
+     */
+    MatrixPowerAtomic(const MatrixType& T, RealScalar p);
+    
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[out] res  \f$ A^p \f$ where A and p are specified in the
+     * constructor.
+     */
+    void compute(ResultType& res) const;
+};
+
+template<typename MatrixType>
+MatrixPowerAtomic<MatrixType>::MatrixPowerAtomic(const MatrixType& T, RealScalar p) :
+  m_A(T), m_p(p)
+{
+  eigen_assert(T.rows() == T.cols());
+  eigen_assert(p > -1 && p < 1);
+}
+
+template<typename MatrixType>
+void MatrixPowerAtomic<MatrixType>::compute(ResultType& res) const
+{
+  using std::pow;
+  switch (m_A.rows()) {
+    case 0:
+      break;
+    case 1:
+      res(0,0) = pow(m_A(0,0), m_p);
+      break;
+    case 2:
+      compute2x2(res, m_p);
+      break;
+    default:
+      computeBig(res);
+  }
+}
+
+template<typename MatrixType>
+void MatrixPowerAtomic<MatrixType>::computePade(int degree, const MatrixType& IminusT, ResultType& res) const
+{
+  int i = 2*degree;
+  res = (m_p-degree) / (2*i-2) * IminusT;
+
+  for (--i; i; --i) {
+    res = (MatrixType::Identity(IminusT.rows(), IminusT.cols()) + res).template triangularView<Upper>()
+	.solve((i==1 ? -m_p : i&1 ? (-m_p-i/2)/(2*i) : (m_p-i/2)/(2*i-2)) * IminusT).eval();
+  }
+  res += MatrixType::Identity(IminusT.rows(), IminusT.cols());
+}
+
+// This function assumes that res has the correct size (see bug 614)
+template<typename MatrixType>
+void MatrixPowerAtomic<MatrixType>::compute2x2(ResultType& res, RealScalar p) const
+{
+  using std::abs;
+  using std::pow;
+  res.coeffRef(0,0) = pow(m_A.coeff(0,0), p);
+
+  for (Index i=1; i < m_A.cols(); ++i) {
+    res.coeffRef(i,i) = pow(m_A.coeff(i,i), p);
+    if (m_A.coeff(i-1,i-1) == m_A.coeff(i,i))
+      res.coeffRef(i-1,i) = p * pow(m_A.coeff(i,i), p-1);
+    else if (2*abs(m_A.coeff(i-1,i-1)) < abs(m_A.coeff(i,i)) || 2*abs(m_A.coeff(i,i)) < abs(m_A.coeff(i-1,i-1)))
+      res.coeffRef(i-1,i) = (res.coeff(i,i)-res.coeff(i-1,i-1)) / (m_A.coeff(i,i)-m_A.coeff(i-1,i-1));
+    else
+      res.coeffRef(i-1,i) = computeSuperDiag(m_A.coeff(i,i), m_A.coeff(i-1,i-1), p);
+    res.coeffRef(i-1,i) *= m_A.coeff(i-1,i);
+  }
+}
+
+template<typename MatrixType>
+void MatrixPowerAtomic<MatrixType>::computeBig(ResultType& res) const
+{
+  using std::ldexp;
+  const int digits = std::numeric_limits<RealScalar>::digits;
+  const RealScalar maxNormForPade = digits <=  24? 4.3386528e-1L                            // single precision
+                                  : digits <=  53? 2.789358995219730e-1L                    // double precision
+                                  : digits <=  64? 2.4471944416607995472e-1L                // extended precision
+                                  : digits <= 106? 1.1016843812851143391275867258512e-1L    // double-double
+                                  :                9.134603732914548552537150753385375e-2L; // quadruple precision
+  MatrixType IminusT, sqrtT, T = m_A.template triangularView<Upper>();
+  RealScalar normIminusT;
+  int degree, degree2, numberOfSquareRoots = 0;
+  bool hasExtraSquareRoot = false;
+
+  for (Index i=0; i < m_A.cols(); ++i)
+    eigen_assert(m_A(i,i) != RealScalar(0));
+
+  while (true) {
+    IminusT = MatrixType::Identity(m_A.rows(), m_A.cols()) - T;
+    normIminusT = IminusT.cwiseAbs().colwise().sum().maxCoeff();
+    if (normIminusT < maxNormForPade) {
+      degree = getPadeDegree(normIminusT);
+      degree2 = getPadeDegree(normIminusT/2);
+      if (degree - degree2 <= 1 || hasExtraSquareRoot)
+	break;
+      hasExtraSquareRoot = true;
+    }
+    matrix_sqrt_triangular(T, sqrtT);
+    T = sqrtT.template triangularView<Upper>();
+    ++numberOfSquareRoots;
+  }
+  computePade(degree, IminusT, res);
+
+  for (; numberOfSquareRoots; --numberOfSquareRoots) {
+    compute2x2(res, ldexp(m_p, -numberOfSquareRoots));
+    res = res.template triangularView<Upper>() * res;
+  }
+  compute2x2(res, m_p);
+}
+  
+template<typename MatrixType>
+inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(float normIminusT)
+{
+  const float maxNormForPade[] = { 2.8064004e-1f /* degree = 3 */ , 4.3386528e-1f };
+  int degree = 3;
+  for (; degree <= 4; ++degree)
+    if (normIminusT <= maxNormForPade[degree - 3])
+      break;
+  return degree;
+}
+
+template<typename MatrixType>
+inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(double normIminusT)
+{
+  const double maxNormForPade[] = { 1.884160592658218e-2 /* degree = 3 */ , 6.038881904059573e-2, 1.239917516308172e-1,
+      1.999045567181744e-1, 2.789358995219730e-1 };
+  int degree = 3;
+  for (; degree <= 7; ++degree)
+    if (normIminusT <= maxNormForPade[degree - 3])
+      break;
+  return degree;
+}
+
+template<typename MatrixType>
+inline int MatrixPowerAtomic<MatrixType>::getPadeDegree(long double normIminusT)
+{
+#if   LDBL_MANT_DIG == 53
+  const int maxPadeDegree = 7;
+  const double maxNormForPade[] = { 1.884160592658218e-2L /* degree = 3 */ , 6.038881904059573e-2L, 1.239917516308172e-1L,
+      1.999045567181744e-1L, 2.789358995219730e-1L };
+#elif LDBL_MANT_DIG <= 64
+  const int maxPadeDegree = 8;
+  const long double maxNormForPade[] = { 6.3854693117491799460e-3L /* degree = 3 */ , 2.6394893435456973676e-2L,
+      6.4216043030404063729e-2L, 1.1701165502926694307e-1L, 1.7904284231268670284e-1L, 2.4471944416607995472e-1L };
+#elif LDBL_MANT_DIG <= 106
+  const int maxPadeDegree = 10;
+  const double maxNormForPade[] = { 1.0007161601787493236741409687186e-4L /* degree = 3 */ ,
+      1.0007161601787493236741409687186e-3L, 4.7069769360887572939882574746264e-3L, 1.3220386624169159689406653101695e-2L,
+      2.8063482381631737920612944054906e-2L, 4.9625993951953473052385361085058e-2L, 7.7367040706027886224557538328171e-2L,
+      1.1016843812851143391275867258512e-1L };
+#else
+  const int maxPadeDegree = 10;
+  const double maxNormForPade[] = { 5.524506147036624377378713555116378e-5L /* degree = 3 */ ,
+      6.640600568157479679823602193345995e-4L, 3.227716520106894279249709728084626e-3L,
+      9.619593944683432960546978734646284e-3L, 2.134595382433742403911124458161147e-2L,
+      3.908166513900489428442993794761185e-2L, 6.266780814639442865832535460550138e-2L,
+      9.134603732914548552537150753385375e-2L };
+#endif
+  int degree = 3;
+  for (; degree <= maxPadeDegree; ++degree)
+    if (normIminusT <= maxNormForPade[degree - 3])
+      break;
+  return degree;
+}
+
+template<typename MatrixType>
+inline typename MatrixPowerAtomic<MatrixType>::ComplexScalar
+MatrixPowerAtomic<MatrixType>::computeSuperDiag(const ComplexScalar& curr, const ComplexScalar& prev, RealScalar p)
+{
+  using std::ceil;
+  using std::exp;
+  using std::log;
+  using std::sinh;
+
+  ComplexScalar logCurr = log(curr);
+  ComplexScalar logPrev = log(prev);
+  int unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI));
+  ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, EIGEN_PI*unwindingNumber);
+  return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev);
+}
+
+template<typename MatrixType>
+inline typename MatrixPowerAtomic<MatrixType>::RealScalar
+MatrixPowerAtomic<MatrixType>::computeSuperDiag(RealScalar curr, RealScalar prev, RealScalar p)
+{
+  using std::exp;
+  using std::log;
+  using std::sinh;
+
+  RealScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2);
+  return 2 * exp(p * (log(curr) + log(prev)) / 2) * sinh(p * w) / (curr - prev);
+}
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Class for computing matrix powers.
+ *
+ * \tparam MatrixType  type of the base, expected to be an instantiation
+ * of the Matrix class template.
+ *
+ * This class is capable of computing real/complex matrices raised to
+ * an arbitrary real power. Meanwhile, it saves the result of Schur
+ * decomposition if an non-integral power has even been calculated.
+ * Therefore, if you want to compute multiple (>= 2) matrix powers
+ * for the same matrix, using the class directly is more efficient than
+ * calling MatrixBase::pow().
+ *
+ * Example:
+ * \include MatrixPower_optimal.cpp
+ * Output: \verbinclude MatrixPower_optimal.out
+ */
+template<typename MatrixType>
+class MatrixPower : internal::noncopyable
+{
+  private:
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    typedef typename MatrixType::Index Index;
+
+  public:
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] A  the base of the matrix power.
+     *
+     * The class stores a reference to A, so it should not be changed
+     * (or destroyed) before evaluation.
+     */
+    explicit MatrixPower(const MatrixType& A) :
+      m_A(A),
+      m_conditionNumber(0),
+      m_rank(A.cols()),
+      m_nulls(0)
+    { eigen_assert(A.rows() == A.cols()); }
+
+    /**
+     * \brief Returns the matrix power.
+     *
+     * \param[in] p  exponent, a real scalar.
+     * \return The expression \f$ A^p \f$, where A is specified in the
+     * constructor.
+     */
+    const MatrixPowerParenthesesReturnValue<MatrixType> operator()(RealScalar p)
+    { return MatrixPowerParenthesesReturnValue<MatrixType>(*this, p); }
+
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[in]  p    exponent, a real scalar.
+     * \param[out] res  \f$ A^p \f$ where A is specified in the
+     * constructor.
+     */
+    template<typename ResultType>
+    void compute(ResultType& res, RealScalar p);
+    
+    Index rows() const { return m_A.rows(); }
+    Index cols() const { return m_A.cols(); }
+
+  private:
+    typedef std::complex<RealScalar> ComplexScalar;
+    typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0,
+              MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> ComplexMatrix;
+
+    /** \brief Reference to the base of matrix power. */
+    typename MatrixType::Nested m_A;
+
+    /** \brief Temporary storage. */
+    MatrixType m_tmp;
+
+    /** \brief Store the result of Schur decomposition. */
+    ComplexMatrix m_T, m_U;
+    
+    /** \brief Store fractional power of m_T. */
+    ComplexMatrix m_fT;
+
+    /**
+     * \brief Condition number of m_A.
+     *
+     * It is initialized as 0 to avoid performing unnecessary Schur
+     * decomposition, which is the bottleneck.
+     */
+    RealScalar m_conditionNumber;
+
+    /** \brief Rank of m_A. */
+    Index m_rank;
+    
+    /** \brief Rank deficiency of m_A. */
+    Index m_nulls;
+
+    /**
+     * \brief Split p into integral part and fractional part.
+     *
+     * \param[in]  p        The exponent.
+     * \param[out] p        The fractional part ranging in \f$ (-1, 1) \f$.
+     * \param[out] intpart  The integral part.
+     *
+     * Only if the fractional part is nonzero, it calls initialize().
+     */
+    void split(RealScalar& p, RealScalar& intpart);
+
+    /** \brief Perform Schur decomposition for fractional power. */
+    void initialize();
+
+    template<typename ResultType>
+    void computeIntPower(ResultType& res, RealScalar p);
+
+    template<typename ResultType>
+    void computeFracPower(ResultType& res, RealScalar p);
+
+    template<int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+    static void revertSchur(
+        Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols>& res,
+        const ComplexMatrix& T,
+        const ComplexMatrix& U);
+
+    template<int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+    static void revertSchur(
+        Matrix<RealScalar, Rows, Cols, Options, MaxRows, MaxCols>& res,
+        const ComplexMatrix& T,
+        const ComplexMatrix& U);
+};
+
+template<typename MatrixType>
+template<typename ResultType>
+void MatrixPower<MatrixType>::compute(ResultType& res, RealScalar p)
+{
+  using std::pow;
+  switch (cols()) {
+    case 0:
+      break;
+    case 1:
+      res(0,0) = pow(m_A.coeff(0,0), p);
+      break;
+    default:
+      RealScalar intpart;
+      split(p, intpart);
+
+      res = MatrixType::Identity(rows(), cols());
+      computeIntPower(res, intpart);
+      if (p) computeFracPower(res, p);
+  }
+}
+
+template<typename MatrixType>
+void MatrixPower<MatrixType>::split(RealScalar& p, RealScalar& intpart)
+{
+  using std::floor;
+  using std::pow;
+
+  intpart = floor(p);
+  p -= intpart;
+
+  // Perform Schur decomposition if it is not yet performed and the power is
+  // not an integer.
+  if (!m_conditionNumber && p)
+    initialize();
+
+  // Choose the more stable of intpart = floor(p) and intpart = ceil(p).
+  if (p > RealScalar(0.5) && p > (1-p) * pow(m_conditionNumber, p)) {
+    --p;
+    ++intpart;
+  }
+}
+
+template<typename MatrixType>
+void MatrixPower<MatrixType>::initialize()
+{
+  const ComplexSchur<MatrixType> schurOfA(m_A);
+  JacobiRotation<ComplexScalar> rot;
+  ComplexScalar eigenvalue;
+
+  m_fT.resizeLike(m_A);
+  m_T = schurOfA.matrixT();
+  m_U = schurOfA.matrixU();
+  m_conditionNumber = m_T.diagonal().array().abs().maxCoeff() / m_T.diagonal().array().abs().minCoeff();
+
+  // Move zero eigenvalues to the bottom right corner.
+  for (Index i = cols()-1; i>=0; --i) {
+    if (m_rank <= 2)
+      return;
+    if (m_T.coeff(i,i) == RealScalar(0)) {
+      for (Index j=i+1; j < m_rank; ++j) {
+        eigenvalue = m_T.coeff(j,j);
+        rot.makeGivens(m_T.coeff(j-1,j), eigenvalue);
+        m_T.applyOnTheRight(j-1, j, rot);
+        m_T.applyOnTheLeft(j-1, j, rot.adjoint());
+        m_T.coeffRef(j-1,j-1) = eigenvalue;
+        m_T.coeffRef(j,j) = RealScalar(0);
+        m_U.applyOnTheRight(j-1, j, rot);
+      }
+      --m_rank;
+    }
+  }
+
+  m_nulls = rows() - m_rank;
+  if (m_nulls) {
+    eigen_assert(m_T.bottomRightCorner(m_nulls, m_nulls).isZero()
+        && "Base of matrix power should be invertible or with a semisimple zero eigenvalue.");
+    m_fT.bottomRows(m_nulls).fill(RealScalar(0));
+  }
+}
+
+template<typename MatrixType>
+template<typename ResultType>
+void MatrixPower<MatrixType>::computeIntPower(ResultType& res, RealScalar p)
+{
+  using std::abs;
+  using std::fmod;
+  RealScalar pp = abs(p);
+
+  if (p<0) 
+    m_tmp = m_A.inverse();
+  else     
+    m_tmp = m_A;
+
+  while (true) {
+    if (fmod(pp, 2) >= 1)
+      res = m_tmp * res;
+    pp /= 2;
+    if (pp < 1)
+      break;
+    m_tmp *= m_tmp;
+  }
+}
+
+template<typename MatrixType>
+template<typename ResultType>
+void MatrixPower<MatrixType>::computeFracPower(ResultType& res, RealScalar p)
+{
+  Block<ComplexMatrix,Dynamic,Dynamic> blockTp(m_fT, 0, 0, m_rank, m_rank);
+  eigen_assert(m_conditionNumber);
+  eigen_assert(m_rank + m_nulls == rows());
+
+  MatrixPowerAtomic<ComplexMatrix>(m_T.topLeftCorner(m_rank, m_rank), p).compute(blockTp);
+  if (m_nulls) {
+    m_fT.topRightCorner(m_rank, m_nulls) = m_T.topLeftCorner(m_rank, m_rank).template triangularView<Upper>()
+        .solve(blockTp * m_T.topRightCorner(m_rank, m_nulls));
+  }
+  revertSchur(m_tmp, m_fT, m_U);
+  res = m_tmp * res;
+}
+
+template<typename MatrixType>
+template<int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+inline void MatrixPower<MatrixType>::revertSchur(
+    Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols>& res,
+    const ComplexMatrix& T,
+    const ComplexMatrix& U)
+{ res.noalias() = U * (T.template triangularView<Upper>() * U.adjoint()); }
+
+template<typename MatrixType>
+template<int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+inline void MatrixPower<MatrixType>::revertSchur(
+    Matrix<RealScalar, Rows, Cols, Options, MaxRows, MaxCols>& res,
+    const ComplexMatrix& T,
+    const ComplexMatrix& U)
+{ res.noalias() = (U * (T.template triangularView<Upper>() * U.adjoint())).real(); }
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Proxy for the matrix power of some matrix (expression).
+ *
+ * \tparam Derived  type of the base, a matrix (expression).
+ *
+ * This class holds the arguments to the matrix power until it is
+ * assigned or evaluated for some other reason (so the argument
+ * should not be changed in the meantime). It is the return type of
+ * MatrixBase::pow() and related functions and most of the
+ * time this is the only way it is used.
+ */
+template<typename Derived>
+class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue<Derived> >
+{
+  public:
+    typedef typename Derived::PlainObject PlainObject;
+    typedef typename Derived::RealScalar RealScalar;
+    typedef typename Derived::Index Index;
+
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] A  %Matrix (expression), the base of the matrix power.
+     * \param[in] p  real scalar, the exponent of the matrix power.
+     */
+    MatrixPowerReturnValue(const Derived& A, RealScalar p) : m_A(A), m_p(p)
+    { }
+
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[out] result  \f$ A^p \f$ where \p A and \p p are as in the
+     * constructor.
+     */
+    template<typename ResultType>
+    inline void evalTo(ResultType& result) const
+    { MatrixPower<PlainObject>(m_A.eval()).compute(result, m_p); }
+
+    Index rows() const { return m_A.rows(); }
+    Index cols() const { return m_A.cols(); }
+
+  private:
+    const Derived& m_A;
+    const RealScalar m_p;
+};
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Proxy for the matrix power of some matrix (expression).
+ *
+ * \tparam Derived  type of the base, a matrix (expression).
+ *
+ * This class holds the arguments to the matrix power until it is
+ * assigned or evaluated for some other reason (so the argument
+ * should not be changed in the meantime). It is the return type of
+ * MatrixBase::pow() and related functions and most of the
+ * time this is the only way it is used.
+ */
+template<typename Derived>
+class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerReturnValue<Derived> >
+{
+  public:
+    typedef typename Derived::PlainObject PlainObject;
+    typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
+    typedef typename Derived::Index Index;
+
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] A  %Matrix (expression), the base of the matrix power.
+     * \param[in] p  complex scalar, the exponent of the matrix power.
+     */
+    MatrixComplexPowerReturnValue(const Derived& A, const ComplexScalar& p) : m_A(A), m_p(p)
+    { }
+
+    /**
+     * \brief Compute the matrix power.
+     *
+     * Because \p p is complex, \f$ A^p \f$ is simply evaluated as \f$
+     * \exp(p \log(A)) \f$.
+     *
+     * \param[out] result  \f$ A^p \f$ where \p A and \p p are as in the
+     * constructor.
+     */
+    template<typename ResultType>
+    inline void evalTo(ResultType& result) const
+    { result = (m_p * m_A.log()).exp(); }
+
+    Index rows() const { return m_A.rows(); }
+    Index cols() const { return m_A.cols(); }
+
+  private:
+    const Derived& m_A;
+    const ComplexScalar m_p;
+};
+
+namespace internal {
+
+template<typename MatrixPowerType>
+struct traits< MatrixPowerParenthesesReturnValue<MatrixPowerType> >
+{ typedef typename MatrixPowerType::PlainObject ReturnType; };
+
+template<typename Derived>
+struct traits< MatrixPowerReturnValue<Derived> >
+{ typedef typename Derived::PlainObject ReturnType; };
+
+template<typename Derived>
+struct traits< MatrixComplexPowerReturnValue<Derived> >
+{ typedef typename Derived::PlainObject ReturnType; };
+
+}
+
+template<typename Derived>
+const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(const RealScalar& p) const
+{ return MatrixPowerReturnValue<Derived>(derived(), p); }
+
+template<typename Derived>
+const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const
+{ return MatrixComplexPowerReturnValue<Derived>(derived(), p); }
+
+} // namespace Eigen
+
+#endif // EIGEN_MATRIX_POWER
diff --git a/third-party/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/third-party/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
new file mode 100644
index 00000000..9de0c357
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -0,0 +1,368 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MATRIX_SQUARE_ROOT
+#define EIGEN_MATRIX_SQUARE_ROOT
+
+namespace Eigen { 
+
+namespace internal {
+
+// pre:  T.block(i,i,2,2) has complex conjugate eigenvalues
+// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT)
+{
+  // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
+  //       in EigenSolver. If we expose it, we could call it directly from here.
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,2,2> block = T.template block<2,2>(i,i);
+  EigenSolver<Matrix<Scalar,2,2> > es(block);
+  sqrtT.template block<2,2>(i,i)
+    = (es.eigenvectors() * es.eigenvalues().cwiseSqrt().asDiagonal() * es.eigenvectors().inverse()).real();
+}
+
+// pre:  block structure of T is such that (i,j) is a 1x1 block,
+//       all blocks of sqrtT to left of and below (i,j) are correct
+// post: sqrtT(i,j) has the correct value
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value();
+  sqrtT.coeffRef(i,j) = (T.coeff(i,j) - tmp) / (sqrtT.coeff(i,i) + sqrtT.coeff(j,j));
+}
+
+// similar to compute1x1offDiagonalBlock()
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j);
+  if (j-i > 1)
+    rhs -= sqrtT.block(i, i+1, 1, j-i-1) * sqrtT.block(i+1, j, j-i-1, 2);
+  Matrix<Scalar,2,2> A = sqrtT.coeff(i,i) * Matrix<Scalar,2,2>::Identity();
+  A += sqrtT.template block<2,2>(j,j).transpose();
+  sqrtT.template block<1,2>(i,j).transpose() = A.fullPivLu().solve(rhs.transpose());
+}
+
+// similar to compute1x1offDiagonalBlock()
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j);
+  if (j-i > 2)
+    rhs -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 1);
+  Matrix<Scalar,2,2> A = sqrtT.coeff(j,j) * Matrix<Scalar,2,2>::Identity();
+  A += sqrtT.template block<2,2>(i,i);
+  sqrtT.template block<2,1>(i,j) = A.fullPivLu().solve(rhs);
+}
+
+// solves the equation A X + X B = C where all matrices are 2-by-2
+template <typename MatrixType>
+void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const MatrixType& A, const MatrixType& B, const MatrixType& C)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,4,4> coeffMatrix = Matrix<Scalar,4,4>::Zero();
+  coeffMatrix.coeffRef(0,0) = A.coeff(0,0) + B.coeff(0,0);
+  coeffMatrix.coeffRef(1,1) = A.coeff(0,0) + B.coeff(1,1);
+  coeffMatrix.coeffRef(2,2) = A.coeff(1,1) + B.coeff(0,0);
+  coeffMatrix.coeffRef(3,3) = A.coeff(1,1) + B.coeff(1,1);
+  coeffMatrix.coeffRef(0,1) = B.coeff(1,0);
+  coeffMatrix.coeffRef(0,2) = A.coeff(0,1);
+  coeffMatrix.coeffRef(1,0) = B.coeff(0,1);
+  coeffMatrix.coeffRef(1,3) = A.coeff(0,1);
+  coeffMatrix.coeffRef(2,0) = A.coeff(1,0);
+  coeffMatrix.coeffRef(2,3) = B.coeff(1,0);
+  coeffMatrix.coeffRef(3,1) = A.coeff(1,0);
+  coeffMatrix.coeffRef(3,2) = B.coeff(0,1);
+
+  Matrix<Scalar,4,1> rhs;
+  rhs.coeffRef(0) = C.coeff(0,0);
+  rhs.coeffRef(1) = C.coeff(0,1);
+  rhs.coeffRef(2) = C.coeff(1,0);
+  rhs.coeffRef(3) = C.coeff(1,1);
+
+  Matrix<Scalar,4,1> result;
+  result = coeffMatrix.fullPivLu().solve(rhs);
+
+  X.coeffRef(0,0) = result.coeff(0);
+  X.coeffRef(0,1) = result.coeff(1);
+  X.coeffRef(1,0) = result.coeff(2);
+  X.coeffRef(1,1) = result.coeff(3);
+}
+
+// similar to compute1x1offDiagonalBlock()
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i);
+  Matrix<Scalar,2,2> B = sqrtT.template block<2,2>(j,j);
+  Matrix<Scalar,2,2> C = T.template block<2,2>(i,j);
+  if (j-i > 2)
+    C -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 2);
+  Matrix<Scalar,2,2> X;
+  matrix_sqrt_quasi_triangular_solve_auxiliary_equation(X, A, B, C);
+  sqrtT.template block<2,2>(i,j) = X;
+}
+
+// pre:  T is quasi-upper-triangular and sqrtT is a zero matrix of the same size
+// post: the diagonal blocks of sqrtT are the square roots of the diagonal blocks of T
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT)
+{
+  using std::sqrt;
+  const Index size = T.rows();
+  for (Index i = 0; i < size; i++) {
+    if (i == size - 1 || T.coeff(i+1, i) == 0) {
+      eigen_assert(T(i,i) >= 0);
+      sqrtT.coeffRef(i,i) = sqrt(T.coeff(i,i));
+    }
+    else {
+      matrix_sqrt_quasi_triangular_2x2_diagonal_block(T, i, sqrtT);
+      ++i;
+    }
+  }
+}
+
+// pre:  T is quasi-upper-triangular and diagonal blocks of sqrtT are square root of diagonal blocks of T.
+// post: sqrtT is the square root of T.
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT)
+{
+  const Index size = T.rows();
+  for (Index j = 1; j < size; j++) {
+      if (T.coeff(j, j-1) != 0)  // if T(j-1:j, j-1:j) is a 2-by-2 block
+	continue;
+    for (Index i = j-1; i >= 0; i--) {
+      if (i > 0 && T.coeff(i, i-1) != 0)  // if T(i-1:i, i-1:i) is a 2-by-2 block
+	continue;
+      bool iBlockIs2x2 = (i < size - 1) && (T.coeff(i+1, i) != 0);
+      bool jBlockIs2x2 = (j < size - 1) && (T.coeff(j+1, j) != 0);
+      if (iBlockIs2x2 && jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(T, i, j, sqrtT);
+      else if (iBlockIs2x2 && !jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(T, i, j, sqrtT);
+      else if (!iBlockIs2x2 && jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(T, i, j, sqrtT);
+      else if (!iBlockIs2x2 && !jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(T, i, j, sqrtT);
+    }
+  }
+}
+
+} // end of namespace internal
+
+/** \ingroup MatrixFunctions_Module
+  * \brief Compute matrix square root of quasi-triangular matrix.
+  *
+  * \tparam  MatrixType  type of \p arg, the argument of matrix square root,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  ResultType  type of \p result, where result is to be stored.
+  * \param[in]  arg      argument of matrix square root.
+  * \param[out] result   matrix square root of upper Hessenberg part of \p arg.
+  *
+  * This function computes the square root of the upper quasi-triangular matrix stored in the upper
+  * Hessenberg part of \p arg.  Only the upper Hessenberg part of \p result is updated, the rest is
+  * not touched.  See MatrixBase::sqrt() for details on how this computation is implemented.
+  *
+  * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular
+  */
+template <typename MatrixType, typename ResultType> 
+void matrix_sqrt_quasi_triangular(const MatrixType &arg, ResultType &result)
+{
+  eigen_assert(arg.rows() == arg.cols());
+  result.resize(arg.rows(), arg.cols());
+  internal::matrix_sqrt_quasi_triangular_diagonal(arg, result);
+  internal::matrix_sqrt_quasi_triangular_off_diagonal(arg, result);
+}
+
+
+/** \ingroup MatrixFunctions_Module
+  * \brief Compute matrix square root of triangular matrix.
+  *
+  * \tparam  MatrixType  type of \p arg, the argument of matrix square root,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  ResultType  type of \p result, where result is to be stored.
+  * \param[in]  arg      argument of matrix square root.
+  * \param[out] result   matrix square root of upper triangular part of \p arg.
+  *
+  * Only the upper triangular part (including the diagonal) of \p result is updated, the rest is not
+  * touched.  See MatrixBase::sqrt() for details on how this computation is implemented.
+  *
+  * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular
+  */
+template <typename MatrixType, typename ResultType> 
+void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result)
+{
+  using std::sqrt;
+      typedef typename MatrixType::Scalar Scalar;
+
+  eigen_assert(arg.rows() == arg.cols());
+
+  // Compute square root of arg and store it in upper triangular part of result
+  // This uses that the square root of triangular matrices can be computed directly.
+  result.resize(arg.rows(), arg.cols());
+  for (Index i = 0; i < arg.rows(); i++) {
+    result.coeffRef(i,i) = sqrt(arg.coeff(i,i));
+  }
+  for (Index j = 1; j < arg.cols(); j++) {
+    for (Index i = j-1; i >= 0; i--) {
+      // if i = j-1, then segment has length 0 so tmp = 0
+      Scalar tmp = (result.row(i).segment(i+1,j-i-1) * result.col(j).segment(i+1,j-i-1)).value();
+      // denominator may be zero if original matrix is singular
+      result.coeffRef(i,j) = (arg.coeff(i,j) - tmp) / (result.coeff(i,i) + result.coeff(j,j));
+    }
+  }
+}
+
+
+namespace internal {
+
+/** \ingroup MatrixFunctions_Module
+  * \brief Helper struct for computing matrix square roots of general matrices.
+  * \tparam  MatrixType  type of the argument of the matrix square root,
+  *                      expected to be an instantiation of the Matrix class template.
+  *
+  * \sa MatrixSquareRootTriangular, MatrixSquareRootQuasiTriangular, MatrixBase::sqrt()
+  */
+template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
+struct matrix_sqrt_compute
+{
+  /** \brief Compute the matrix square root
+    *
+    * \param[in]  arg     matrix whose square root is to be computed.
+    * \param[out] result  square root of \p arg.
+    *
+    * See MatrixBase::sqrt() for details on how this computation is implemented.
+    */
+  template <typename ResultType> static void run(const MatrixType &arg, ResultType &result);    
+};
+
+
+// ********** Partial specialization for real matrices **********
+
+template <typename MatrixType>
+struct matrix_sqrt_compute<MatrixType, 0>
+{
+  typedef typename MatrixType::PlainObject PlainType;
+  template <typename ResultType>
+  static void run(const MatrixType &arg, ResultType &result)
+  {
+    eigen_assert(arg.rows() == arg.cols());
+
+    // Compute Schur decomposition of arg
+    const RealSchur<PlainType> schurOfA(arg);
+    const PlainType& T = schurOfA.matrixT();
+    const PlainType& U = schurOfA.matrixU();
+    
+    // Compute square root of T
+    PlainType sqrtT = PlainType::Zero(arg.rows(), arg.cols());
+    matrix_sqrt_quasi_triangular(T, sqrtT);
+    
+    // Compute square root of arg
+    result = U * sqrtT * U.adjoint();
+  }
+};
+
+
+// ********** Partial specialization for complex matrices **********
+
+template <typename MatrixType>
+struct matrix_sqrt_compute<MatrixType, 1>
+{
+  typedef typename MatrixType::PlainObject PlainType;
+  template <typename ResultType>
+  static void run(const MatrixType &arg, ResultType &result)
+  {
+    eigen_assert(arg.rows() == arg.cols());
+
+    // Compute Schur decomposition of arg
+    const ComplexSchur<PlainType> schurOfA(arg);
+    const PlainType& T = schurOfA.matrixT();
+    const PlainType& U = schurOfA.matrixU();
+    
+    // Compute square root of T
+    PlainType sqrtT;
+    matrix_sqrt_triangular(T, sqrtT);
+    
+    // Compute square root of arg
+    result = U * (sqrtT.template triangularView<Upper>() * U.adjoint());
+  }
+};
+
+} // end namespace internal
+
+/** \ingroup MatrixFunctions_Module
+  *
+  * \brief Proxy for the matrix square root of some matrix (expression).
+  *
+  * \tparam Derived  Type of the argument to the matrix square root.
+  *
+  * This class holds the argument to the matrix square root until it
+  * is assigned or evaluated for some other reason (so the argument
+  * should not be changed in the meantime). It is the return type of
+  * MatrixBase::sqrt() and most of the time this is the only way it is
+  * used.
+  */
+template<typename Derived> class MatrixSquareRootReturnValue
+: public ReturnByValue<MatrixSquareRootReturnValue<Derived> >
+{
+  protected:
+    typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
+  public:
+    /** \brief Constructor.
+      *
+      * \param[in]  src  %Matrix (expression) forming the argument of the
+      * matrix square root.
+      */
+    explicit MatrixSquareRootReturnValue(const Derived& src) : m_src(src) { }
+
+    /** \brief Compute the matrix square root.
+      *
+      * \param[out]  result  the matrix square root of \p src in the
+      * constructor.
+      */
+    template <typename ResultType>
+    inline void evalTo(ResultType& result) const
+    {
+      typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+      typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+      DerivedEvalType tmp(m_src);
+      internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result);
+    }
+
+    Index rows() const { return m_src.rows(); }
+    Index cols() const { return m_src.cols(); }
+
+  protected:
+    const DerivedNested m_src;
+};
+
+namespace internal {
+template<typename Derived>
+struct traits<MatrixSquareRootReturnValue<Derived> >
+{
+  typedef typename Derived::PlainObject ReturnType;
+};
+}
+
+template <typename Derived>
+const MatrixSquareRootReturnValue<Derived> MatrixBase<Derived>::sqrt() const
+{
+  eigen_assert(rows() == cols());
+  return MatrixSquareRootReturnValue<Derived>(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATRIX_FUNCTION
diff --git a/third-party/Eigen/src/MatrixFunctions/StemFunction.h b/third-party/Eigen/src/MatrixFunctions/StemFunction.h
new file mode 100644
index 00000000..7604df90
--- /dev/null
+++ b/third-party/Eigen/src/MatrixFunctions/StemFunction.h
@@ -0,0 +1,117 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_STEM_FUNCTION
+#define EIGEN_STEM_FUNCTION
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \brief The exponential function (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_exp(Scalar x, int)
+{
+  using std::exp;
+  return exp(x);
+}
+
+/** \brief Cosine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_cos(Scalar x, int n)
+{
+  using std::cos;
+  using std::sin;
+  Scalar res;
+
+  switch (n % 4) {
+  case 0: 
+    res = std::cos(x);
+    break;
+  case 1:
+    res = -std::sin(x);
+    break;
+  case 2:
+    res = -std::cos(x);
+    break;
+  case 3:
+    res = std::sin(x);
+    break;
+  }
+  return res;
+}
+
+/** \brief Sine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_sin(Scalar x, int n)
+{
+  using std::cos;
+  using std::sin;
+  Scalar res;
+
+  switch (n % 4) {
+  case 0:
+    res = std::sin(x);
+    break;
+  case 1:
+    res = std::cos(x);
+    break;
+  case 2:
+    res = -std::sin(x);
+    break;
+  case 3:
+    res = -std::cos(x);
+    break;
+  }
+  return res;
+}
+
+/** \brief Hyperbolic cosine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_cosh(Scalar x, int n)
+{
+  using std::cosh;
+  using std::sinh;
+  Scalar res;
+  
+  switch (n % 2) {
+  case 0:
+    res = std::cosh(x);
+    break;
+  case 1:
+    res = std::sinh(x);
+    break;
+  }
+  return res;
+}
+	
+/** \brief Hyperbolic sine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_sinh(Scalar x, int n)
+{
+  using std::cosh;
+  using std::sinh;
+  Scalar res;
+  
+  switch (n % 2) {
+  case 0:
+    res = std::sinh(x);
+    break;
+  case 1:
+    res = std::cosh(x);
+    break;
+  }
+  return res;
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_STEM_FUNCTION
diff --git a/third-party/Eigen/src/QR/ColPivHouseholderQR.h b/third-party/Eigen/src/QR/ColPivHouseholderQR.h
new file mode 100644
index 00000000..a7b47d55
--- /dev/null
+++ b/third-party/Eigen/src/QR/ColPivHouseholderQR.h
@@ -0,0 +1,653 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
+#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
+
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<ColPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
+
+/** \ingroup QR_Module
+  *
+  * \class ColPivHouseholderQR
+  *
+  * \brief Householder rank-revealing QR decomposition of a matrix with column-pivoting
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  *
+  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
+  * such that
+  * \f[
+  *  \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \mathbf{R}
+  * \f]
+  * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an
+  * upper triangular matrix.
+  *
+  * This decomposition performs column pivoting in order to be rank-revealing and improve
+  * numerical stability. It is slower than HouseholderQR, and faster than FullPivHouseholderQR.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::colPivHouseholderQr()
+  */
+template<typename _MatrixType> class ColPivHouseholderQR
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    // FIXME should be int
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
+    typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
+    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
+    typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
+    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+  private:
+
+    typedef typename PermutationType::StorageIndex PermIndexType;
+
+  public:
+
+    /**
+    * \brief Default Constructor.
+    *
+    * The default constructor is useful in cases in which the user intends to
+    * perform decompositions via ColPivHouseholderQR::compute(const MatrixType&).
+    */
+    ColPivHouseholderQR()
+      : m_qr(),
+        m_hCoeffs(),
+        m_colsPermutation(),
+        m_colsTranspositions(),
+        m_temp(),
+        m_colNormsUpdated(),
+        m_colNormsDirect(),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false) {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa ColPivHouseholderQR()
+      */
+    ColPivHouseholderQR(Index rows, Index cols)
+      : m_qr(rows, cols),
+        m_hCoeffs((std::min)(rows,cols)),
+        m_colsPermutation(PermIndexType(cols)),
+        m_colsTranspositions(cols),
+        m_temp(cols),
+        m_colNormsUpdated(cols),
+        m_colNormsDirect(cols),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false) {}
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This constructor computes the QR factorization of the matrix \a matrix by calling
+      * the method compute(). It is a short cut for:
+      *
+      * \code
+      * ColPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
+      * qr.compute(matrix);
+      * \endcode
+      *
+      * \sa compute()
+      */
+    template<typename InputType>
+    explicit ColPivHouseholderQR(const EigenBase<InputType>& matrix)
+      : m_qr(matrix.rows(), matrix.cols()),
+        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
+        m_colsPermutation(PermIndexType(matrix.cols())),
+        m_colsTranspositions(matrix.cols()),
+        m_temp(matrix.cols()),
+        m_colNormsUpdated(matrix.cols()),
+        m_colNormsDirect(matrix.cols()),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa ColPivHouseholderQR(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit ColPivHouseholderQR(EigenBase<InputType>& matrix)
+      : m_qr(matrix.derived()),
+        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
+        m_colsPermutation(PermIndexType(matrix.cols())),
+        m_colsTranspositions(matrix.cols()),
+        m_temp(matrix.cols()),
+        m_colNormsUpdated(matrix.cols()),
+        m_colNormsDirect(matrix.cols()),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false)
+    {
+      computeInPlace();
+    }
+
+    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the QR decomposition, if any exists.
+      *
+      * \param b the right-hand-side of the equation to solve.
+      *
+      * \returns a solution.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      *
+      * Example: \include ColPivHouseholderQR_solve.cpp
+      * Output: \verbinclude ColPivHouseholderQR_solve.out
+      */
+    template<typename Rhs>
+    inline const Solve<ColPivHouseholderQR, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return Solve<ColPivHouseholderQR, Rhs>(*this, b.derived());
+    }
+
+    HouseholderSequenceType householderQ() const;
+    HouseholderSequenceType matrixQ() const
+    {
+      return householderQ();
+    }
+
+    /** \returns a reference to the matrix where the Householder QR decomposition is stored
+      */
+    const MatrixType& matrixQR() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+
+    /** \returns a reference to the matrix where the result Householder QR is stored
+     * \warning The strict lower part of this matrix contains internal values.
+     * Only the upper triangular part should be referenced. To get it, use
+     * \code matrixR().template triangularView<Upper>() \endcode
+     * For rank-deficient matrices, use
+     * \code
+     * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
+     * \endcode
+     */
+    const MatrixType& matrixR() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+
+    template<typename InputType>
+    ColPivHouseholderQR& compute(const EigenBase<InputType>& matrix);
+
+    /** \returns a const reference to the column permutation matrix */
+    const PermutationType& colsPermutation() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_colsPermutation;
+    }
+
+    /** \returns the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar absDeterminant() const;
+
+    /** \returns the natural log of the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note This method is useful to work around the risk of overflow/underflow that's inherent
+      * to determinant computation.
+      *
+      * \sa absDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar logAbsDeterminant() const;
+
+    /** \returns the rank of the matrix of which *this is the QR decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();
+      Index result = 0;
+      for(Index i = 0; i < m_nonzero_pivots; ++i)
+        result += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);
+      return result;
+    }
+
+    /** \returns the dimension of the kernel of the matrix of which *this is the QR decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index dimensionOfKernel() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return cols() - rank();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition represents an injective
+      *          linear map, i.e. has trivial kernel; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInjective() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return rank() == cols();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition represents a surjective
+      *          linear map; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isSurjective() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return rank() == rows();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition is invertible.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInvertible() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return isInjective() && isSurjective();
+    }
+
+    /** \returns the inverse of the matrix of which *this is the QR decomposition.
+      *
+      * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
+      *       Use isInvertible() to first determine whether this matrix is invertible.
+      */
+    inline const Inverse<ColPivHouseholderQR> inverse() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return Inverse<ColPivHouseholderQR>(*this);
+    }
+
+    inline Index rows() const { return m_qr.rows(); }
+    inline Index cols() const { return m_qr.cols(); }
+
+    /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
+      *
+      * For advanced uses only.
+      */
+    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
+
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
+      * who need to determine when pivots are to be considered nonzero. This is not used for the
+      * QR decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
+      * uses a formula to automatically determine a reasonable threshold.
+      * Once you have called the present method setThreshold(const RealScalar&),
+      * your value is used instead.
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A pivot will be considered nonzero if its absolute value is strictly greater than
+      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+      * where maxpivot is the biggest pivot.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    ColPivHouseholderQR& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code qr.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    ColPivHouseholderQR& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
+      // and turns out to be identical to Higham's formula used already in LDLt.
+                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());
+    }
+
+    /** \returns the number of nonzero pivots in the QR decomposition.
+      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
+      * So that notion isn't really intrinsically interesting, but it is
+      * still useful when implementing algorithms.
+      *
+      * \sa rank()
+      */
+    inline Index nonzeroPivots() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_nonzero_pivots;
+    }
+
+    /** \returns the absolute value of the biggest pivot, i.e. the biggest
+      *          diagonal coefficient of R.
+      */
+    RealScalar maxPivot() const { return m_maxpivot; }
+
+    /** \brief Reports whether the QR factorization was succesful.
+      *
+      * \note This function always returns \c Success. It is provided for compatibility
+      * with other factorization routines.
+      * \returns \c Success
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+      return Success;
+    }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+
+    friend class CompleteOrthogonalDecomposition<MatrixType>;
+
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void computeInPlace();
+
+    MatrixType m_qr;
+    HCoeffsType m_hCoeffs;
+    PermutationType m_colsPermutation;
+    IntRowVectorType m_colsTranspositions;
+    RowVectorType m_temp;
+    RealRowVectorType m_colNormsUpdated;
+    RealRowVectorType m_colNormsDirect;
+    bool m_isInitialized, m_usePrescribedThreshold;
+    RealScalar m_prescribedThreshold, m_maxpivot;
+    Index m_nonzero_pivots;
+    Index m_det_pq;
+};
+
+template<typename MatrixType>
+typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::absDeterminant() const
+{
+  using std::abs;
+  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return abs(m_qr.diagonal().prod());
+}
+
+template<typename MatrixType>
+typename MatrixType::RealScalar ColPivHouseholderQR<MatrixType>::logAbsDeterminant() const
+{
+  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return m_qr.diagonal().cwiseAbs().array().log().sum();
+}
+
+/** Performs the QR factorization of the given matrix \a matrix. The result of
+  * the factorization is stored into \c *this, and a reference to \c *this
+  * is returned.
+  *
+  * \sa class ColPivHouseholderQR, ColPivHouseholderQR(const MatrixType&)
+  */
+template<typename MatrixType>
+template<typename InputType>
+ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix)
+{
+  m_qr = matrix.derived();
+  computeInPlace();
+  return *this;
+}
+
+template<typename MatrixType>
+void ColPivHouseholderQR<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+
+  // the column permutation is stored as int indices, so just to be sure:
+  eigen_assert(m_qr.cols()<=NumTraits<int>::highest());
+
+  using std::abs;
+
+  Index rows = m_qr.rows();
+  Index cols = m_qr.cols();
+  Index size = m_qr.diagonalSize();
+
+  m_hCoeffs.resize(size);
+
+  m_temp.resize(cols);
+
+  m_colsTranspositions.resize(m_qr.cols());
+  Index number_of_transpositions = 0;
+
+  m_colNormsUpdated.resize(cols);
+  m_colNormsDirect.resize(cols);
+  for (Index k = 0; k < cols; ++k) {
+    // colNormsDirect(k) caches the most recent directly computed norm of
+    // column k.
+    m_colNormsDirect.coeffRef(k) = m_qr.col(k).norm();
+    m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
+  }
+
+  RealScalar threshold_helper =  numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
+  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
+
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
+  m_maxpivot = RealScalar(0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // first, we look up in our table m_colNormsUpdated which column has the biggest norm
+    Index biggest_col_index;
+    RealScalar biggest_col_sq_norm = numext::abs2(m_colNormsUpdated.tail(cols-k).maxCoeff(&biggest_col_index));
+    biggest_col_index += k;
+
+    // Track the number of meaningful pivots but do not stop the decomposition to make
+    // sure that the initial matrix is properly reproduced. See bug 941.
+    if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
+      m_nonzero_pivots = k;
+
+    // apply the transposition to the columns
+    m_colsTranspositions.coeffRef(k) = biggest_col_index;
+    if(k != biggest_col_index) {
+      m_qr.col(k).swap(m_qr.col(biggest_col_index));
+      std::swap(m_colNormsUpdated.coeffRef(k), m_colNormsUpdated.coeffRef(biggest_col_index));
+      std::swap(m_colNormsDirect.coeffRef(k), m_colNormsDirect.coeffRef(biggest_col_index));
+      ++number_of_transpositions;
+    }
+
+    // generate the householder vector, store it below the diagonal
+    RealScalar beta;
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+
+    // apply the householder transformation to the diagonal coefficient
+    m_qr.coeffRef(k,k) = beta;
+
+    // remember the maximum absolute value of diagonal coefficients
+    if(abs(beta) > m_maxpivot) m_maxpivot = abs(beta);
+
+    // apply the householder transformation
+    m_qr.bottomRightCorner(rows-k, cols-k-1)
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));
+
+    // update our table of norms of the columns
+    for (Index j = k + 1; j < cols; ++j) {
+      // The following implements the stable norm downgrade step discussed in
+      // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
+      // and used in LAPACK routines xGEQPF and xGEQP3.
+      // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
+      if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
+        RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
+        temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
+        temp = temp <  RealScalar(0) ? RealScalar(0) : temp;
+        RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
+                                                           m_colNormsDirect.coeffRef(j));
+        if (temp2 <= norm_downdate_threshold) {
+          // The updated norm has become too inaccurate so re-compute the column
+          // norm directly.
+          m_colNormsDirect.coeffRef(j) = m_qr.col(j).tail(rows - k - 1).norm();
+          m_colNormsUpdated.coeffRef(j) = m_colNormsDirect.coeffRef(j);
+        } else {
+          m_colNormsUpdated.coeffRef(j) *= numext::sqrt(temp);
+        }
+      }
+    }
+  }
+
+  m_colsPermutation.setIdentity(PermIndexType(cols));
+  for(PermIndexType k = 0; k < size/*m_nonzero_pivots*/; ++k)
+    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));
+
+  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+  m_isInitialized = true;
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  eigen_assert(rhs.rows() == rows());
+
+  const Index nonzero_pivots = nonzeroPivots();
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs);
+
+  // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+  c.applyOnTheLeft(householderSequence(m_qr, m_hCoeffs)
+                    .setLength(nonzero_pivots)
+                    .transpose()
+    );
+
+  m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
+
+  for(Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();
+}
+#endif
+
+namespace internal {
+
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename ColPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef ColPivHouseholderQR<MatrixType> QrType;
+  typedef Inverse<QrType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+
+} // end namespace internal
+
+/** \returns the matrix Q as a sequence of householder transformations.
+  * You can extract the meaningful part only by using:
+  * \code qr.householderQ().setLength(qr.nonzeroPivots()) \endcode*/
+template<typename MatrixType>
+typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType>
+  ::householderQ() const
+{
+  eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
+}
+
+/** \return the column-pivoting Householder QR decomposition of \c *this.
+  *
+  * \sa class ColPivHouseholderQR
+  */
+template<typename Derived>
+const ColPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::colPivHouseholderQr() const
+{
+  return ColPivHouseholderQR<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_COLPIVOTINGHOUSEHOLDERQR_H
diff --git a/third-party/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h b/third-party/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
new file mode 100644
index 00000000..4e9651f8
--- /dev/null
+++ b/third-party/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Householder QR decomposition of a matrix with column pivoting based on
+ *    LAPACKE_?geqp3 function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H
+#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H
+
+namespace Eigen { 
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_QR_COLPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \
+template<> template<typename InputType> inline \
+ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >& \
+ColPivHouseholderQR<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> >::compute( \
+              const EigenBase<InputType>& matrix) \
+\
+{ \
+  using std::abs; \
+  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
+  typedef MatrixType::RealScalar RealScalar; \
+  Index rows = matrix.rows();\
+  Index cols = matrix.cols();\
+\
+  m_qr = matrix;\
+  Index size = m_qr.diagonalSize();\
+  m_hCoeffs.resize(size);\
+\
+  m_colsTranspositions.resize(cols);\
+  /*Index number_of_transpositions = 0;*/ \
+\
+  m_nonzero_pivots = 0; \
+  m_maxpivot = RealScalar(0);\
+  m_colsPermutation.resize(cols); \
+  m_colsPermutation.indices().setZero(); \
+\
+  lapack_int lda = internal::convert_index<lapack_int,Index>(m_qr.outerStride()); \
+  lapack_int matrix_order = LAPACKE_COLROW; \
+  LAPACKE_##LAPACKE_PREFIX##geqp3( matrix_order, internal::convert_index<lapack_int,Index>(rows), internal::convert_index<lapack_int,Index>(cols), \
+                              (LAPACKE_TYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (LAPACKE_TYPE*)m_hCoeffs.data()); \
+  m_isInitialized = true; \
+  m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \
+  m_hCoeffs.adjointInPlace(); \
+  RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); \
+  lapack_int *perm = m_colsPermutation.indices().data(); \
+  for(Index i=0;i<size;i++) { \
+    m_nonzero_pivots += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);\
+  } \
+  for(Index i=0;i<cols;i++) perm[i]--;\
+\
+  /*m_det_pq = (number_of_transpositions%2) ? -1 : 1;  // TODO: It's not needed now; fix upon availability in Eigen */ \
+\
+  return *this; \
+}
+
+EIGEN_LAPACKE_QR_COLPIV(double,   double,        d, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(float,    float,         s, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(dcomplex, lapack_complex_double, z, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(scomplex, lapack_complex_float,  c, ColMajor, LAPACK_COL_MAJOR)
+
+EIGEN_LAPACKE_QR_COLPIV(double,   double,        d, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(float,    float,         s, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(dcomplex, lapack_complex_double, z, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_QR_COLPIV(scomplex, lapack_complex_float,  c, RowMajor, LAPACK_ROW_MAJOR)
+
+} // end namespace Eigen
+
+#endif // EIGEN_COLPIVOTINGHOUSEHOLDERQR_LAPACKE_H
diff --git a/third-party/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/third-party/Eigen/src/QR/CompleteOrthogonalDecomposition.h
new file mode 100644
index 00000000..34c637b7
--- /dev/null
+++ b/third-party/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -0,0 +1,562 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Rasmus Munk Larsen <rmlarsen@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
+#define EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
+
+namespace Eigen {
+
+namespace internal {
+template <typename _MatrixType>
+struct traits<CompleteOrthogonalDecomposition<_MatrixType> >
+    : traits<_MatrixType> {
+  enum { Flags = 0 };
+};
+
+}  // end namespace internal
+
+/** \ingroup QR_Module
+  *
+  * \class CompleteOrthogonalDecomposition
+  *
+  * \brief Complete orthogonal decomposition (COD) of a matrix.
+  *
+  * \param MatrixType the type of the matrix of which we are computing the COD.
+  *
+  * This class performs a rank-revealing complete orthogonal decomposition of a
+  * matrix  \b A into matrices \b P, \b Q, \b T, and \b Z such that
+  * \f[
+  *  \mathbf{A} \, \mathbf{P} = \mathbf{Q} \,
+  *                     \begin{bmatrix} \mathbf{T} &  \mathbf{0} \\
+  *                                     \mathbf{0} & \mathbf{0} \end{bmatrix} \, \mathbf{Z}
+  * \f]
+  * by using Householder transformations. Here, \b P is a permutation matrix,
+  * \b Q and \b Z are unitary matrices and \b T an upper triangular matrix of
+  * size rank-by-rank. \b A may be rank deficient.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::completeOrthogonalDecomposition()
+  */
+template <typename _MatrixType>
+class CompleteOrthogonalDecomposition {
+ public:
+  typedef _MatrixType MatrixType;
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+  };
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+  typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime>
+      PermutationType;
+  typedef typename internal::plain_row_type<MatrixType, Index>::type
+      IntRowVectorType;
+  typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
+  typedef typename internal::plain_row_type<MatrixType, RealScalar>::type
+      RealRowVectorType;
+  typedef HouseholderSequence<
+      MatrixType, typename internal::remove_all<
+                      typename HCoeffsType::ConjugateReturnType>::type>
+      HouseholderSequenceType;
+  typedef typename MatrixType::PlainObject PlainObject;
+
+ private:
+  typedef typename PermutationType::Index PermIndexType;
+
+ public:
+  /**
+   * \brief Default Constructor.
+   *
+   * The default constructor is useful in cases in which the user intends to
+   * perform decompositions via
+   * \c CompleteOrthogonalDecomposition::compute(const* MatrixType&).
+   */
+  CompleteOrthogonalDecomposition() : m_cpqr(), m_zCoeffs(), m_temp() {}
+
+  /** \brief Default Constructor with memory preallocation
+   *
+   * Like the default constructor but with preallocation of the internal data
+   * according to the specified problem \a size.
+   * \sa CompleteOrthogonalDecomposition()
+   */
+  CompleteOrthogonalDecomposition(Index rows, Index cols)
+      : m_cpqr(rows, cols), m_zCoeffs((std::min)(rows, cols)), m_temp(cols) {}
+
+  /** \brief Constructs a complete orthogonal decomposition from a given
+   * matrix.
+   *
+   * This constructor computes the complete orthogonal decomposition of the
+   * matrix \a matrix by calling the method compute(). The default
+   * threshold for rank determination will be used. It is a short cut for:
+   *
+   * \code
+   * CompleteOrthogonalDecomposition<MatrixType> cod(matrix.rows(),
+   *                                                 matrix.cols());
+   * cod.setThreshold(Default);
+   * cod.compute(matrix);
+   * \endcode
+   *
+   * \sa compute()
+   */
+  template <typename InputType>
+  explicit CompleteOrthogonalDecomposition(const EigenBase<InputType>& matrix)
+      : m_cpqr(matrix.rows(), matrix.cols()),
+        m_zCoeffs((std::min)(matrix.rows(), matrix.cols())),
+        m_temp(matrix.cols())
+  {
+    compute(matrix.derived());
+  }
+
+  /** \brief Constructs a complete orthogonal decomposition from a given matrix
+    *
+    * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+    *
+    * \sa CompleteOrthogonalDecomposition(const EigenBase&)
+    */
+  template<typename InputType>
+  explicit CompleteOrthogonalDecomposition(EigenBase<InputType>& matrix)
+    : m_cpqr(matrix.derived()),
+      m_zCoeffs((std::min)(matrix.rows(), matrix.cols())),
+      m_temp(matrix.cols())
+  {
+    computeInPlace();
+  }
+
+
+  /** This method computes the minimum-norm solution X to a least squares
+   * problem \f[\mathrm{minimize} \|A X - B\|, \f] where \b A is the matrix of
+   * which \c *this is the complete orthogonal decomposition.
+   *
+   * \param b the right-hand sides of the problem to solve.
+   *
+   * \returns a solution.
+   *
+   */
+  template <typename Rhs>
+  inline const Solve<CompleteOrthogonalDecomposition, Rhs> solve(
+      const MatrixBase<Rhs>& b) const {
+    eigen_assert(m_cpqr.m_isInitialized &&
+                 "CompleteOrthogonalDecomposition is not initialized.");
+    return Solve<CompleteOrthogonalDecomposition, Rhs>(*this, b.derived());
+  }
+
+  HouseholderSequenceType householderQ(void) const;
+  HouseholderSequenceType matrixQ(void) const { return m_cpqr.householderQ(); }
+
+  /** \returns the matrix \b Z.
+   */
+  MatrixType matrixZ() const {
+    MatrixType Z = MatrixType::Identity(m_cpqr.cols(), m_cpqr.cols());
+    applyZAdjointOnTheLeftInPlace(Z);
+    return Z.adjoint();
+  }
+
+  /** \returns a reference to the matrix where the complete orthogonal
+   * decomposition is stored
+   */
+  const MatrixType& matrixQTZ() const { return m_cpqr.matrixQR(); }
+
+  /** \returns a reference to the matrix where the complete orthogonal
+   * decomposition is stored.
+   * \warning The strict lower part and \code cols() - rank() \endcode right
+   * columns of this matrix contains internal values.
+   * Only the upper triangular part should be referenced. To get it, use
+   * \code matrixT().template triangularView<Upper>() \endcode
+   * For rank-deficient matrices, use
+   * \code
+   * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>()
+   * \endcode
+   */
+  const MatrixType& matrixT() const { return m_cpqr.matrixQR(); }
+
+  template <typename InputType>
+  CompleteOrthogonalDecomposition& compute(const EigenBase<InputType>& matrix) {
+    // Compute the column pivoted QR factorization A P = Q R.
+    m_cpqr.compute(matrix);
+    computeInPlace();
+    return *this;
+  }
+
+  /** \returns a const reference to the column permutation matrix */
+  const PermutationType& colsPermutation() const {
+    return m_cpqr.colsPermutation();
+  }
+
+  /** \returns the absolute value of the determinant of the matrix of which
+   * *this is the complete orthogonal decomposition. It has only linear
+   * complexity (that is, O(n) where n is the dimension of the square matrix)
+   * as the complete orthogonal decomposition has already been computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \warning a determinant can be very big or small, so for matrices
+   * of large enough dimension, there is a risk of overflow/underflow.
+   * One way to work around that is to use logAbsDeterminant() instead.
+   *
+   * \sa logAbsDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::RealScalar absDeterminant() const;
+
+  /** \returns the natural log of the absolute value of the determinant of the
+   * matrix of which *this is the complete orthogonal decomposition. It has
+   * only linear complexity (that is, O(n) where n is the dimension of the
+   * square matrix) as the complete orthogonal decomposition has already been
+   * computed.
+   *
+   * \note This is only for square matrices.
+   *
+   * \note This method is useful to work around the risk of overflow/underflow
+   * that's inherent to determinant computation.
+   *
+   * \sa absDeterminant(), MatrixBase::determinant()
+   */
+  typename MatrixType::RealScalar logAbsDeterminant() const;
+
+  /** \returns the rank of the matrix of which *this is the complete orthogonal
+   * decomposition.
+   *
+   * \note This method has to determine which pivots should be considered
+   * nonzero. For that, it uses the threshold value that you can control by
+   * calling setThreshold(const RealScalar&).
+   */
+  inline Index rank() const { return m_cpqr.rank(); }
+
+  /** \returns the dimension of the kernel of the matrix of which *this is the
+   * complete orthogonal decomposition.
+   *
+   * \note This method has to determine which pivots should be considered
+   * nonzero. For that, it uses the threshold value that you can control by
+   * calling setThreshold(const RealScalar&).
+   */
+  inline Index dimensionOfKernel() const { return m_cpqr.dimensionOfKernel(); }
+
+  /** \returns true if the matrix of which *this is the decomposition represents
+   * an injective linear map, i.e. has trivial kernel; false otherwise.
+   *
+   * \note This method has to determine which pivots should be considered
+   * nonzero. For that, it uses the threshold value that you can control by
+   * calling setThreshold(const RealScalar&).
+   */
+  inline bool isInjective() const { return m_cpqr.isInjective(); }
+
+  /** \returns true if the matrix of which *this is the decomposition represents
+   * a surjective linear map; false otherwise.
+   *
+   * \note This method has to determine which pivots should be considered
+   * nonzero. For that, it uses the threshold value that you can control by
+   * calling setThreshold(const RealScalar&).
+   */
+  inline bool isSurjective() const { return m_cpqr.isSurjective(); }
+
+  /** \returns true if the matrix of which *this is the complete orthogonal
+   * decomposition is invertible.
+   *
+   * \note This method has to determine which pivots should be considered
+   * nonzero. For that, it uses the threshold value that you can control by
+   * calling setThreshold(const RealScalar&).
+   */
+  inline bool isInvertible() const { return m_cpqr.isInvertible(); }
+
+  /** \returns the pseudo-inverse of the matrix of which *this is the complete
+   * orthogonal decomposition.
+   * \warning: Do not compute \c this->pseudoInverse()*rhs to solve a linear systems.
+   * It is more efficient and numerically stable to call \c this->solve(rhs).
+   */
+  inline const Inverse<CompleteOrthogonalDecomposition> pseudoInverse() const
+  {
+    return Inverse<CompleteOrthogonalDecomposition>(*this);
+  }
+
+  inline Index rows() const { return m_cpqr.rows(); }
+  inline Index cols() const { return m_cpqr.cols(); }
+
+  /** \returns a const reference to the vector of Householder coefficients used
+   * to represent the factor \c Q.
+   *
+   * For advanced uses only.
+   */
+  inline const HCoeffsType& hCoeffs() const { return m_cpqr.hCoeffs(); }
+
+  /** \returns a const reference to the vector of Householder coefficients
+   * used to represent the factor \c Z.
+   *
+   * For advanced uses only.
+   */
+  const HCoeffsType& zCoeffs() const { return m_zCoeffs; }
+
+  /** Allows to prescribe a threshold to be used by certain methods, such as
+   * rank(), who need to determine when pivots are to be considered nonzero.
+   * Most be called before calling compute().
+   *
+   * When it needs to get the threshold value, Eigen calls threshold(). By
+   * default, this uses a formula to automatically determine a reasonable
+   * threshold. Once you have called the present method
+   * setThreshold(const RealScalar&), your value is used instead.
+   *
+   * \param threshold The new value to use as the threshold.
+   *
+   * A pivot will be considered nonzero if its absolute value is strictly
+   * greater than
+   *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+   * where maxpivot is the biggest pivot.
+   *
+   * If you want to come back to the default behavior, call
+   * setThreshold(Default_t)
+   */
+  CompleteOrthogonalDecomposition& setThreshold(const RealScalar& threshold) {
+    m_cpqr.setThreshold(threshold);
+    return *this;
+  }
+
+  /** Allows to come back to the default behavior, letting Eigen use its default
+   * formula for determining the threshold.
+   *
+   * You should pass the special object Eigen::Default as parameter here.
+   * \code qr.setThreshold(Eigen::Default); \endcode
+   *
+   * See the documentation of setThreshold(const RealScalar&).
+   */
+  CompleteOrthogonalDecomposition& setThreshold(Default_t) {
+    m_cpqr.setThreshold(Default);
+    return *this;
+  }
+
+  /** Returns the threshold that will be used by certain methods such as rank().
+   *
+   * See the documentation of setThreshold(const RealScalar&).
+   */
+  RealScalar threshold() const { return m_cpqr.threshold(); }
+
+  /** \returns the number of nonzero pivots in the complete orthogonal
+   * decomposition. Here nonzero is meant in the exact sense, not in a
+   * fuzzy sense. So that notion isn't really intrinsically interesting,
+   * but it is still useful when implementing algorithms.
+   *
+   * \sa rank()
+   */
+  inline Index nonzeroPivots() const { return m_cpqr.nonzeroPivots(); }
+
+  /** \returns the absolute value of the biggest pivot, i.e. the biggest
+   *          diagonal coefficient of R.
+   */
+  inline RealScalar maxPivot() const { return m_cpqr.maxPivot(); }
+
+  /** \brief Reports whether the complete orthogonal decomposition was
+   * succesful.
+   *
+   * \note This function always returns \c Success. It is provided for
+   * compatibility
+   * with other factorization routines.
+   * \returns \c Success
+   */
+  ComputationInfo info() const {
+    eigen_assert(m_cpqr.m_isInitialized && "Decomposition is not initialized.");
+    return Success;
+  }
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  template <typename RhsType, typename DstType>
+  EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const;
+#endif
+
+ protected:
+  static void check_template_parameters() {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+  }
+
+  void computeInPlace();
+
+  /** Overwrites \b rhs with \f$ \mathbf{Z}^* * \mathbf{rhs} \f$.
+   */
+  template <typename Rhs>
+  void applyZAdjointOnTheLeftInPlace(Rhs& rhs) const;
+
+  ColPivHouseholderQR<MatrixType> m_cpqr;
+  HCoeffsType m_zCoeffs;
+  RowVectorType m_temp;
+};
+
+template <typename MatrixType>
+typename MatrixType::RealScalar
+CompleteOrthogonalDecomposition<MatrixType>::absDeterminant() const {
+  return m_cpqr.absDeterminant();
+}
+
+template <typename MatrixType>
+typename MatrixType::RealScalar
+CompleteOrthogonalDecomposition<MatrixType>::logAbsDeterminant() const {
+  return m_cpqr.logAbsDeterminant();
+}
+
+/** Performs the complete orthogonal decomposition of the given matrix \a
+ * matrix. The result of the factorization is stored into \c *this, and a
+ * reference to \c *this is returned.
+ *
+ * \sa class CompleteOrthogonalDecomposition,
+ * CompleteOrthogonalDecomposition(const MatrixType&)
+ */
+template <typename MatrixType>
+void CompleteOrthogonalDecomposition<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+
+  // the column permutation is stored as int indices, so just to be sure:
+  eigen_assert(m_cpqr.cols() <= NumTraits<int>::highest());
+
+  const Index rank = m_cpqr.rank();
+  const Index cols = m_cpqr.cols();
+  const Index rows = m_cpqr.rows();
+  m_zCoeffs.resize((std::min)(rows, cols));
+  m_temp.resize(cols);
+
+  if (rank < cols) {
+    // We have reduced the (permuted) matrix to the form
+    //   [R11 R12]
+    //   [ 0  R22]
+    // where R11 is r-by-r (r = rank) upper triangular, R12 is
+    // r-by-(n-r), and R22 is empty or the norm of R22 is negligible.
+    // We now compute the complete orthogonal decomposition by applying
+    // Householder transformations from the right to the upper trapezoidal
+    // matrix X = [R11 R12] to zero out R12 and obtain the factorization
+    // [R11 R12] = [T11 0] * Z, where T11 is r-by-r upper triangular and
+    // Z = Z(0) * Z(1) ... Z(r-1) is an n-by-n orthogonal matrix.
+    // We store the data representing Z in R12 and m_zCoeffs.
+    for (Index k = rank - 1; k >= 0; --k) {
+      if (k != rank - 1) {
+        // Given the API for Householder reflectors, it is more convenient if
+        // we swap the leading parts of columns k and r-1 (zero-based) to form
+        // the matrix X_k = [X(0:k, k), X(0:k, r:n)]
+        m_cpqr.m_qr.col(k).head(k + 1).swap(
+            m_cpqr.m_qr.col(rank - 1).head(k + 1));
+      }
+      // Construct Householder reflector Z(k) to zero out the last row of X_k,
+      // i.e. choose Z(k) such that
+      // [X(k, k), X(k, r:n)] * Z(k) = [beta, 0, .., 0].
+      RealScalar beta;
+      m_cpqr.m_qr.row(k)
+          .tail(cols - rank + 1)
+          .makeHouseholderInPlace(m_zCoeffs(k), beta);
+      m_cpqr.m_qr(k, rank - 1) = beta;
+      if (k > 0) {
+        // Apply Z(k) to the first k rows of X_k
+        m_cpqr.m_qr.topRightCorner(k, cols - rank + 1)
+            .applyHouseholderOnTheRight(
+                m_cpqr.m_qr.row(k).tail(cols - rank).transpose(), m_zCoeffs(k),
+                &m_temp(0));
+      }
+      if (k != rank - 1) {
+        // Swap X(0:k,k) back to its proper location.
+        m_cpqr.m_qr.col(k).head(k + 1).swap(
+            m_cpqr.m_qr.col(rank - 1).head(k + 1));
+      }
+    }
+  }
+}
+
+template <typename MatrixType>
+template <typename Rhs>
+void CompleteOrthogonalDecomposition<MatrixType>::applyZAdjointOnTheLeftInPlace(
+    Rhs& rhs) const {
+  const Index cols = this->cols();
+  const Index nrhs = rhs.cols();
+  const Index rank = this->rank();
+  Matrix<typename MatrixType::Scalar, Dynamic, 1> temp((std::max)(cols, nrhs));
+  for (Index k = 0; k < rank; ++k) {
+    if (k != rank - 1) {
+      rhs.row(k).swap(rhs.row(rank - 1));
+    }
+    rhs.middleRows(rank - 1, cols - rank + 1)
+        .applyHouseholderOnTheLeft(
+            matrixQTZ().row(k).tail(cols - rank).adjoint(), zCoeffs()(k),
+            &temp(0));
+    if (k != rank - 1) {
+      rhs.row(k).swap(rhs.row(rank - 1));
+    }
+  }
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template <typename _MatrixType>
+template <typename RhsType, typename DstType>
+void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
+    const RhsType& rhs, DstType& dst) const {
+  eigen_assert(rhs.rows() == this->rows());
+
+  const Index rank = this->rank();
+  if (rank == 0) {
+    dst.setZero();
+    return;
+  }
+
+  // Compute c = Q^* * rhs
+  // Note that the matrix Q = H_0^* H_1^*... so its inverse is
+  // Q^* = (H_0 H_1 ...)^T
+  typename RhsType::PlainObject c(rhs);
+  c.applyOnTheLeft(
+      householderSequence(matrixQTZ(), hCoeffs()).setLength(rank).transpose());
+
+  // Solve T z = c(1:rank, :)
+  dst.topRows(rank) = matrixT()
+                          .topLeftCorner(rank, rank)
+                          .template triangularView<Upper>()
+                          .solve(c.topRows(rank));
+
+  const Index cols = this->cols();
+  if (rank < cols) {
+    // Compute y = Z^* * [ z ]
+    //                   [ 0 ]
+    dst.bottomRows(cols - rank).setZero();
+    applyZAdjointOnTheLeftInPlace(dst);
+  }
+
+  // Undo permutation to get x = P^{-1} * y.
+  dst = colsPermutation() * dst;
+}
+#endif
+
+namespace internal {
+
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename CompleteOrthogonalDecomposition<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef CompleteOrthogonalDecomposition<MatrixType> CodType;
+  typedef Inverse<CodType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename CodType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows()));
+  }
+};
+
+} // end namespace internal
+
+/** \returns the matrix Q as a sequence of householder transformations */
+template <typename MatrixType>
+typename CompleteOrthogonalDecomposition<MatrixType>::HouseholderSequenceType
+CompleteOrthogonalDecomposition<MatrixType>::householderQ() const {
+  return m_cpqr.householderQ();
+}
+
+/** \return the complete orthogonal decomposition of \c *this.
+  *
+  * \sa class CompleteOrthogonalDecomposition
+  */
+template <typename Derived>
+const CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::completeOrthogonalDecomposition() const {
+  return CompleteOrthogonalDecomposition<PlainObject>(eval());
+}
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_COMPLETEORTHOGONALDECOMPOSITION_H
diff --git a/third-party/Eigen/src/QR/FullPivHouseholderQR.h b/third-party/Eigen/src/QR/FullPivHouseholderQR.h
new file mode 100644
index 00000000..e489bddc
--- /dev/null
+++ b/third-party/Eigen/src/QR/FullPivHouseholderQR.h
@@ -0,0 +1,676 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
+#define EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename _MatrixType> struct traits<FullPivHouseholderQR<_MatrixType> >
+ : traits<_MatrixType>
+{
+  enum { Flags = 0 };
+};
+
+template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType;
+
+template<typename MatrixType>
+struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
+{
+  typedef typename MatrixType::PlainObject ReturnType;
+};
+
+} // end namespace internal
+
+/** \ingroup QR_Module
+  *
+  * \class FullPivHouseholderQR
+  *
+  * \brief Householder rank-revealing QR decomposition of a matrix with full pivoting
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  *
+  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
+  * such that 
+  * \f[
+  *  \mathbf{P} \, \mathbf{A} \, \mathbf{P}' = \mathbf{Q} \, \mathbf{R}
+  * \f]
+  * by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix 
+  * and \b R an upper triangular matrix.
+  *
+  * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal
+  * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
+  * \sa MatrixBase::fullPivHouseholderQr()
+  */
+template<typename _MatrixType> class FullPivHouseholderQR
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    // FIXME should be int
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef internal::FullPivHouseholderQRMatrixQReturnType<MatrixType> MatrixQReturnType;
+    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+    typedef Matrix<StorageIndex, 1,
+                   EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime,RowsAtCompileTime), RowMajor, 1,
+                   EIGEN_SIZE_MIN_PREFER_FIXED(MaxColsAtCompileTime,MaxRowsAtCompileTime)> IntDiagSizeVectorType;
+    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationType;
+    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
+    typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;
+    typedef typename MatrixType::PlainObject PlainObject;
+
+    /** \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via FullPivHouseholderQR::compute(const MatrixType&).
+      */
+    FullPivHouseholderQR()
+      : m_qr(),
+        m_hCoeffs(),
+        m_rows_transpositions(),
+        m_cols_transpositions(),
+        m_cols_permutation(),
+        m_temp(),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false) {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa FullPivHouseholderQR()
+      */
+    FullPivHouseholderQR(Index rows, Index cols)
+      : m_qr(rows, cols),
+        m_hCoeffs((std::min)(rows,cols)),
+        m_rows_transpositions((std::min)(rows,cols)),
+        m_cols_transpositions((std::min)(rows,cols)),
+        m_cols_permutation(cols),
+        m_temp(cols),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false) {}
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This constructor computes the QR factorization of the matrix \a matrix by calling
+      * the method compute(). It is a short cut for:
+      * 
+      * \code
+      * FullPivHouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
+      * qr.compute(matrix);
+      * \endcode
+      * 
+      * \sa compute()
+      */
+    template<typename InputType>
+    explicit FullPivHouseholderQR(const EigenBase<InputType>& matrix)
+      : m_qr(matrix.rows(), matrix.cols()),
+        m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),
+        m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),
+        m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())),
+        m_cols_permutation(matrix.cols()),
+        m_temp(matrix.cols()),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false)
+    {
+      compute(matrix.derived());
+    }
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa FullPivHouseholderQR(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit FullPivHouseholderQR(EigenBase<InputType>& matrix)
+      : m_qr(matrix.derived()),
+        m_hCoeffs((std::min)(matrix.rows(), matrix.cols())),
+        m_rows_transpositions((std::min)(matrix.rows(), matrix.cols())),
+        m_cols_transpositions((std::min)(matrix.rows(), matrix.cols())),
+        m_cols_permutation(matrix.cols()),
+        m_temp(matrix.cols()),
+        m_isInitialized(false),
+        m_usePrescribedThreshold(false)
+    {
+      computeInPlace();
+    }
+
+    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which
+      * \c *this is the QR decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve.
+      *
+      * \returns the exact or least-square solution if the rank is greater or equal to the number of columns of A,
+      * and an arbitrary solution otherwise.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      *
+      * Example: \include FullPivHouseholderQR_solve.cpp
+      * Output: \verbinclude FullPivHouseholderQR_solve.out
+      */
+    template<typename Rhs>
+    inline const Solve<FullPivHouseholderQR, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return Solve<FullPivHouseholderQR, Rhs>(*this, b.derived());
+    }
+
+    /** \returns Expression object representing the matrix Q
+      */
+    MatrixQReturnType matrixQ(void) const;
+
+    /** \returns a reference to the matrix where the Householder QR decomposition is stored
+      */
+    const MatrixType& matrixQR() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+
+    template<typename InputType>
+    FullPivHouseholderQR& compute(const EigenBase<InputType>& matrix);
+
+    /** \returns a const reference to the column permutation matrix */
+    const PermutationType& colsPermutation() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return m_cols_permutation;
+    }
+
+    /** \returns a const reference to the vector of indices representing the rows transpositions */
+    const IntDiagSizeVectorType& rowsTranspositions() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return m_rows_transpositions;
+    }
+
+    /** \returns the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar absDeterminant() const;
+
+    /** \returns the natural log of the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note This method is useful to work around the risk of overflow/underflow that's inherent
+      * to determinant computation.
+      *
+      * \sa absDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar logAbsDeterminant() const;
+
+    /** \returns the rank of the matrix of which *this is the QR decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();
+      Index result = 0;
+      for(Index i = 0; i < m_nonzero_pivots; ++i)
+        result += (abs(m_qr.coeff(i,i)) > premultiplied_threshold);
+      return result;
+    }
+
+    /** \returns the dimension of the kernel of the matrix of which *this is the QR decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index dimensionOfKernel() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return cols() - rank();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition represents an injective
+      *          linear map, i.e. has trivial kernel; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInjective() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return rank() == cols();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition represents a surjective
+      *          linear map; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isSurjective() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return rank() == rows();
+    }
+
+    /** \returns true if the matrix of which *this is the QR decomposition is invertible.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInvertible() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return isInjective() && isSurjective();
+    }
+
+    /** \returns the inverse of the matrix of which *this is the QR decomposition.
+      *
+      * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
+      *       Use isInvertible() to first determine whether this matrix is invertible.
+      */
+    inline const Inverse<FullPivHouseholderQR> inverse() const
+    {
+      eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+      return Inverse<FullPivHouseholderQR>(*this);
+    }
+
+    inline Index rows() const { return m_qr.rows(); }
+    inline Index cols() const { return m_qr.cols(); }
+    
+    /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
+      * 
+      * For advanced uses only.
+      */
+    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
+
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
+      * who need to determine when pivots are to be considered nonzero. This is not used for the
+      * QR decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
+      * uses a formula to automatically determine a reasonable threshold.
+      * Once you have called the present method setThreshold(const RealScalar&),
+      * your value is used instead.
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A pivot will be considered nonzero if its absolute value is strictly greater than
+      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+      * where maxpivot is the biggest pivot.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    FullPivHouseholderQR& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code qr.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    FullPivHouseholderQR& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
+      // and turns out to be identical to Higham's formula used already in LDLt.
+                                      : NumTraits<Scalar>::epsilon() * RealScalar(m_qr.diagonalSize());
+    }
+
+    /** \returns the number of nonzero pivots in the QR decomposition.
+      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
+      * So that notion isn't really intrinsically interesting, but it is
+      * still useful when implementing algorithms.
+      *
+      * \sa rank()
+      */
+    inline Index nonzeroPivots() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_nonzero_pivots;
+    }
+
+    /** \returns the absolute value of the biggest pivot, i.e. the biggest
+      *          diagonal coefficient of U.
+      */
+    RealScalar maxPivot() const { return m_maxpivot; }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    void computeInPlace();
+    
+    MatrixType m_qr;
+    HCoeffsType m_hCoeffs;
+    IntDiagSizeVectorType m_rows_transpositions;
+    IntDiagSizeVectorType m_cols_transpositions;
+    PermutationType m_cols_permutation;
+    RowVectorType m_temp;
+    bool m_isInitialized, m_usePrescribedThreshold;
+    RealScalar m_prescribedThreshold, m_maxpivot;
+    Index m_nonzero_pivots;
+    RealScalar m_precision;
+    Index m_det_pq;
+};
+
+template<typename MatrixType>
+typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::absDeterminant() const
+{
+  using std::abs;
+  eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return abs(m_qr.diagonal().prod());
+}
+
+template<typename MatrixType>
+typename MatrixType::RealScalar FullPivHouseholderQR<MatrixType>::logAbsDeterminant() const
+{
+  eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return m_qr.diagonal().cwiseAbs().array().log().sum();
+}
+
+/** Performs the QR factorization of the given matrix \a matrix. The result of
+  * the factorization is stored into \c *this, and a reference to \c *this
+  * is returned.
+  *
+  * \sa class FullPivHouseholderQR, FullPivHouseholderQR(const MatrixType&)
+  */
+template<typename MatrixType>
+template<typename InputType>
+FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(const EigenBase<InputType>& matrix)
+{
+  m_qr = matrix.derived();
+  computeInPlace();
+  return *this;
+}
+
+template<typename MatrixType>
+void FullPivHouseholderQR<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+
+  using std::abs;
+  Index rows = m_qr.rows();
+  Index cols = m_qr.cols();
+  Index size = (std::min)(rows,cols);
+
+  
+  m_hCoeffs.resize(size);
+
+  m_temp.resize(cols);
+
+  m_precision = NumTraits<Scalar>::epsilon() * RealScalar(size);
+
+  m_rows_transpositions.resize(size);
+  m_cols_transpositions.resize(size);
+  Index number_of_transpositions = 0;
+
+  RealScalar biggest(0);
+
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
+  m_maxpivot = RealScalar(0);
+
+  for (Index k = 0; k < size; ++k)
+  {
+    Index row_of_biggest_in_corner, col_of_biggest_in_corner;
+    typedef internal::scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+
+    Score score = m_qr.bottomRightCorner(rows-k, cols-k)
+                      .unaryExpr(Scoring())
+                      .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
+    row_of_biggest_in_corner += k;
+    col_of_biggest_in_corner += k;
+    RealScalar biggest_in_corner = internal::abs_knowing_score<Scalar>()(m_qr(row_of_biggest_in_corner, col_of_biggest_in_corner), score);
+    if(k==0) biggest = biggest_in_corner;
+
+    // if the corner is negligible, then we have less than full rank, and we can finish early
+    if(internal::isMuchSmallerThan(biggest_in_corner, biggest, m_precision))
+    {
+      m_nonzero_pivots = k;
+      for(Index i = k; i < size; i++)
+      {
+        m_rows_transpositions.coeffRef(i) = i;
+        m_cols_transpositions.coeffRef(i) = i;
+        m_hCoeffs.coeffRef(i) = Scalar(0);
+      }
+      break;
+    }
+
+    m_rows_transpositions.coeffRef(k) = row_of_biggest_in_corner;
+    m_cols_transpositions.coeffRef(k) = col_of_biggest_in_corner;
+    if(k != row_of_biggest_in_corner) {
+      m_qr.row(k).tail(cols-k).swap(m_qr.row(row_of_biggest_in_corner).tail(cols-k));
+      ++number_of_transpositions;
+    }
+    if(k != col_of_biggest_in_corner) {
+      m_qr.col(k).swap(m_qr.col(col_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+
+    RealScalar beta;
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.coeffRef(k,k) = beta;
+
+    // remember the maximum absolute value of diagonal coefficients
+    if(abs(beta) > m_maxpivot) m_maxpivot = abs(beta);
+
+    m_qr.bottomRightCorner(rows-k, cols-k-1)
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));
+  }
+
+  m_cols_permutation.setIdentity(cols);
+  for(Index k = 0; k < size; ++k)
+    m_cols_permutation.applyTranspositionOnTheRight(k, m_cols_transpositions.coeff(k));
+
+  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+  m_isInitialized = true;
+}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  eigen_assert(rhs.rows() == rows());
+  const Index l_rank = rank();
+
+  // FIXME introduce nonzeroPivots() and use it here. and more generally,
+  // make the same improvements in this dec as in FullPivLU.
+  if(l_rank==0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs);
+
+  Matrix<Scalar,1,RhsType::ColsAtCompileTime> temp(rhs.cols());
+  for (Index k = 0; k < l_rank; ++k)
+  {
+    Index remainingSize = rows()-k;
+    c.row(k).swap(c.row(m_rows_transpositions.coeff(k)));
+    c.bottomRightCorner(remainingSize, rhs.cols())
+      .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingSize-1),
+                               m_hCoeffs.coeff(k), &temp.coeffRef(0));
+  }
+
+  m_qr.topLeftCorner(l_rank, l_rank)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(l_rank));
+
+  for(Index i = 0; i < l_rank; ++i) dst.row(m_cols_permutation.indices().coeff(i)) = c.row(i);
+  for(Index i = l_rank; i < cols(); ++i) dst.row(m_cols_permutation.indices().coeff(i)).setZero();
+}
+#endif
+
+namespace internal {
+  
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivHouseholderQR<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef FullPivHouseholderQR<MatrixType> QrType;
+  typedef Inverse<QrType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename QrType::Scalar> &)
+  {    
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
+  }
+};
+
+/** \ingroup QR_Module
+  *
+  * \brief Expression type for return value of FullPivHouseholderQR::matrixQ()
+  *
+  * \tparam MatrixType type of underlying dense matrix
+  */
+template<typename MatrixType> struct FullPivHouseholderQRMatrixQReturnType
+  : public ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
+{
+public:
+  typedef typename FullPivHouseholderQR<MatrixType>::IntDiagSizeVectorType IntDiagSizeVectorType;
+  typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+  typedef Matrix<typename MatrixType::Scalar, 1, MatrixType::RowsAtCompileTime, RowMajor, 1,
+                 MatrixType::MaxRowsAtCompileTime> WorkVectorType;
+
+  FullPivHouseholderQRMatrixQReturnType(const MatrixType&       qr,
+                                        const HCoeffsType&      hCoeffs,
+                                        const IntDiagSizeVectorType& rowsTranspositions)
+    : m_qr(qr),
+      m_hCoeffs(hCoeffs),
+      m_rowsTranspositions(rowsTranspositions)
+  {}
+
+  template <typename ResultType>
+  void evalTo(ResultType& result) const
+  {
+    const Index rows = m_qr.rows();
+    WorkVectorType workspace(rows);
+    evalTo(result, workspace);
+  }
+
+  template <typename ResultType>
+  void evalTo(ResultType& result, WorkVectorType& workspace) const
+  {
+    using numext::conj;
+    // compute the product H'_0 H'_1 ... H'_n-1,
+    // where H_k is the k-th Householder transformation I - h_k v_k v_k'
+    // and v_k is the k-th Householder vector [1,m_qr(k+1,k), m_qr(k+2,k), ...]
+    const Index rows = m_qr.rows();
+    const Index cols = m_qr.cols();
+    const Index size = (std::min)(rows, cols);
+    workspace.resize(rows);
+    result.setIdentity(rows, rows);
+    for (Index k = size-1; k >= 0; k--)
+    {
+      result.block(k, k, rows-k, rows-k)
+            .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), conj(m_hCoeffs.coeff(k)), &workspace.coeffRef(k));
+      result.row(k).swap(result.row(m_rowsTranspositions.coeff(k)));
+    }
+  }
+
+  Index rows() const { return m_qr.rows(); }
+  Index cols() const { return m_qr.rows(); }
+
+protected:
+  typename MatrixType::Nested m_qr;
+  typename HCoeffsType::Nested m_hCoeffs;
+  typename IntDiagSizeVectorType::Nested m_rowsTranspositions;
+};
+
+// template<typename MatrixType>
+// struct evaluator<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
+//  : public evaluator<ReturnByValue<FullPivHouseholderQRMatrixQReturnType<MatrixType> > >
+// {};
+
+} // end namespace internal
+
+template<typename MatrixType>
+inline typename FullPivHouseholderQR<MatrixType>::MatrixQReturnType FullPivHouseholderQR<MatrixType>::matrixQ() const
+{
+  eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
+  return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions);
+}
+
+/** \return the full-pivoting Householder QR decomposition of \c *this.
+  *
+  * \sa class FullPivHouseholderQR
+  */
+template<typename Derived>
+const FullPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::fullPivHouseholderQr() const
+{
+  return FullPivHouseholderQR<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
diff --git a/third-party/Eigen/src/QR/HouseholderQR.h b/third-party/Eigen/src/QR/HouseholderQR.h
new file mode 100644
index 00000000..3513d995
--- /dev/null
+++ b/third-party/Eigen/src/QR/HouseholderQR.h
@@ -0,0 +1,409 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2010 Vincent Lejeune
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_QR_H
+#define EIGEN_QR_H
+
+namespace Eigen { 
+
+/** \ingroup QR_Module
+  *
+  *
+  * \class HouseholderQR
+  *
+  * \brief Householder QR decomposition of a matrix
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the QR decomposition
+  *
+  * This class performs a QR decomposition of a matrix \b A into matrices \b Q and \b R
+  * such that 
+  * \f[
+  *  \mathbf{A} = \mathbf{Q} \, \mathbf{R}
+  * \f]
+  * by using Householder transformations. Here, \b Q a unitary matrix and \b R an upper triangular matrix.
+  * The result is stored in a compact way compatible with LAPACK.
+  *
+  * Note that no pivoting is performed. This is \b not a rank-revealing decomposition.
+  * If you want that feature, use FullPivHouseholderQR or ColPivHouseholderQR instead.
+  *
+  * This Householder QR decomposition is faster, but less numerically stable and less feature-full than
+  * FullPivHouseholderQR or ColPivHouseholderQR.
+  *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  *
+  * \sa MatrixBase::householderQr()
+  */
+template<typename _MatrixType> class HouseholderQR
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    // FIXME should be int
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, (MatrixType::Flags&RowMajorBit) ? RowMajor : ColMajor, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixQType;
+    typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
+    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
+    typedef HouseholderSequence<MatrixType,typename internal::remove_all<typename HCoeffsType::ConjugateReturnType>::type> HouseholderSequenceType;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via HouseholderQR::compute(const MatrixType&).
+      */
+    HouseholderQR() : m_qr(), m_hCoeffs(), m_temp(), m_isInitialized(false) {}
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa HouseholderQR()
+      */
+    HouseholderQR(Index rows, Index cols)
+      : m_qr(rows, cols),
+        m_hCoeffs((std::min)(rows,cols)),
+        m_temp(cols),
+        m_isInitialized(false) {}
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This constructor computes the QR factorization of the matrix \a matrix by calling
+      * the method compute(). It is a short cut for:
+      * 
+      * \code
+      * HouseholderQR<MatrixType> qr(matrix.rows(), matrix.cols());
+      * qr.compute(matrix);
+      * \endcode
+      * 
+      * \sa compute()
+      */
+    template<typename InputType>
+    explicit HouseholderQR(const EigenBase<InputType>& matrix)
+      : m_qr(matrix.rows(), matrix.cols()),
+        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
+        m_temp(matrix.cols()),
+        m_isInitialized(false)
+    {
+      compute(matrix.derived());
+    }
+
+
+    /** \brief Constructs a QR factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when
+      * \c MatrixType is a Eigen::Ref.
+      *
+      * \sa HouseholderQR(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit HouseholderQR(EigenBase<InputType>& matrix)
+      : m_qr(matrix.derived()),
+        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
+        m_temp(matrix.cols()),
+        m_isInitialized(false)
+    {
+      computeInPlace();
+    }
+
+    /** This method finds a solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the QR decomposition, if any exists.
+      *
+      * \param b the right-hand-side of the equation to solve.
+      *
+      * \returns a solution.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      *
+      * Example: \include HouseholderQR_solve.cpp
+      * Output: \verbinclude HouseholderQR_solve.out
+      */
+    template<typename Rhs>
+    inline const Solve<HouseholderQR, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+      return Solve<HouseholderQR, Rhs>(*this, b.derived());
+    }
+
+    /** This method returns an expression of the unitary matrix Q as a sequence of Householder transformations.
+      *
+      * The returned expression can directly be used to perform matrix products. It can also be assigned to a dense Matrix object.
+      * Here is an example showing how to recover the full or thin matrix Q, as well as how to perform matrix products using operator*:
+      *
+      * Example: \include HouseholderQR_householderQ.cpp
+      * Output: \verbinclude HouseholderQR_householderQ.out
+      */
+    HouseholderSequenceType householderQ() const
+    {
+      eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+      return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
+    }
+
+    /** \returns a reference to the matrix where the Householder QR decomposition is stored
+      * in a LAPACK-compatible way.
+      */
+    const MatrixType& matrixQR() const
+    {
+        eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+        return m_qr;
+    }
+
+    template<typename InputType>
+    HouseholderQR& compute(const EigenBase<InputType>& matrix) {
+      m_qr = matrix.derived();
+      computeInPlace();
+      return *this;
+    }
+
+    /** \returns the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      * One way to work around that is to use logAbsDeterminant() instead.
+      *
+      * \sa logAbsDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar absDeterminant() const;
+
+    /** \returns the natural log of the absolute value of the determinant of the matrix of which
+      * *this is the QR decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the QR decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note This method is useful to work around the risk of overflow/underflow that's inherent
+      * to determinant computation.
+      *
+      * \sa absDeterminant(), MatrixBase::determinant()
+      */
+    typename MatrixType::RealScalar logAbsDeterminant() const;
+
+    inline Index rows() const { return m_qr.rows(); }
+    inline Index cols() const { return m_qr.cols(); }
+    
+    /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
+      * 
+      * For advanced uses only.
+      */
+    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
+    
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+    #endif
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+
+    void computeInPlace();
+    
+    MatrixType m_qr;
+    HCoeffsType m_hCoeffs;
+    RowVectorType m_temp;
+    bool m_isInitialized;
+};
+
+template<typename MatrixType>
+typename MatrixType::RealScalar HouseholderQR<MatrixType>::absDeterminant() const
+{
+  using std::abs;
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return abs(m_qr.diagonal().prod());
+}
+
+template<typename MatrixType>
+typename MatrixType::RealScalar HouseholderQR<MatrixType>::logAbsDeterminant() const
+{
+  eigen_assert(m_isInitialized && "HouseholderQR is not initialized.");
+  eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
+  return m_qr.diagonal().cwiseAbs().array().log().sum();
+}
+
+namespace internal {
+
+/** \internal */
+template<typename MatrixQR, typename HCoeffs>
+void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename MatrixQR::Scalar* tempData = 0)
+{
+  typedef typename MatrixQR::Scalar Scalar;
+  typedef typename MatrixQR::RealScalar RealScalar;
+  Index rows = mat.rows();
+  Index cols = mat.cols();
+  Index size = (std::min)(rows,cols);
+
+  eigen_assert(hCoeffs.size() == size);
+
+  typedef Matrix<Scalar,MatrixQR::ColsAtCompileTime,1> TempType;
+  TempType tempVector;
+  if(tempData==0)
+  {
+    tempVector.resize(cols);
+    tempData = tempVector.data();
+  }
+
+  for(Index k = 0; k < size; ++k)
+  {
+    Index remainingRows = rows - k;
+    Index remainingCols = cols - k - 1;
+
+    RealScalar beta;
+    mat.col(k).tail(remainingRows).makeHouseholderInPlace(hCoeffs.coeffRef(k), beta);
+    mat.coeffRef(k,k) = beta;
+
+    // apply H to remaining part of m_qr from the left
+    mat.bottomRightCorner(remainingRows, remainingCols)
+        .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), hCoeffs.coeffRef(k), tempData+k+1);
+  }
+}
+
+/** \internal */
+template<typename MatrixQR, typename HCoeffs,
+  typename MatrixQRScalar = typename MatrixQR::Scalar,
+  bool InnerStrideIsOne = (MatrixQR::InnerStrideAtCompileTime == 1 && HCoeffs::InnerStrideAtCompileTime == 1)>
+struct householder_qr_inplace_blocked
+{
+  // This is specialized for MKL-supported Scalar types in HouseholderQR_MKL.h
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index maxBlockSize=32,
+      typename MatrixQR::Scalar* tempData = 0)
+  {
+    typedef typename MatrixQR::Scalar Scalar;
+    typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
+
+    Index rows = mat.rows();
+    Index cols = mat.cols();
+    Index size = (std::min)(rows, cols);
+
+    typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
+    TempType tempVector;
+    if(tempData==0)
+    {
+      tempVector.resize(cols);
+      tempData = tempVector.data();
+    }
+
+    Index blockSize = (std::min)(maxBlockSize,size);
+
+    Index k = 0;
+    for (k = 0; k < size; k += blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize);  // actual size of the block
+      Index tcols = cols - k - bs;              // trailing columns
+      Index brows = rows-k;                     // rows of the block
+
+      // partition the matrix:
+      //        A00 | A01 | A02
+      // mat  = A10 | A11 | A12
+      //        A20 | A21 | A22
+      // and performs the qr dec of [A11^T A12^T]^T
+      // and update [A21^T A22^T]^T using level 3 operations.
+      // Finally, the algorithm continue on A22
+
+      BlockType A11_21 = mat.block(k,k,brows,bs);
+      Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
+
+      householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
+
+      if(tcols)
+      {
+        BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
+        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment, false); // false == backward
+      }
+    }
+  }
+};
+
+} // end namespace internal
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void HouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  const Index rank = (std::min)(rows(), cols());
+  eigen_assert(rhs.rows() == rows());
+
+  typename RhsType::PlainObject c(rhs);
+
+  // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
+  c.applyOnTheLeft(householderSequence(
+    m_qr.leftCols(rank),
+    m_hCoeffs.head(rank)).transpose()
+  );
+
+  m_qr.topLeftCorner(rank, rank)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(rank));
+
+  dst.topRows(rank) = c.topRows(rank);
+  dst.bottomRows(cols()-rank).setZero();
+}
+#endif
+
+/** Performs the QR factorization of the given matrix \a matrix. The result of
+  * the factorization is stored into \c *this, and a reference to \c *this
+  * is returned.
+  *
+  * \sa class HouseholderQR, HouseholderQR(const MatrixType&)
+  */
+template<typename MatrixType>
+void HouseholderQR<MatrixType>::computeInPlace()
+{
+  check_template_parameters();
+  
+  Index rows = m_qr.rows();
+  Index cols = m_qr.cols();
+  Index size = (std::min)(rows,cols);
+
+  m_hCoeffs.resize(size);
+
+  m_temp.resize(cols);
+
+  internal::householder_qr_inplace_blocked<MatrixType, HCoeffsType>::run(m_qr, m_hCoeffs, 48, m_temp.data());
+
+  m_isInitialized = true;
+}
+
+/** \return the Householder QR decomposition of \c *this.
+  *
+  * \sa class HouseholderQR
+  */
+template<typename Derived>
+const HouseholderQR<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::householderQr() const
+{
+  return HouseholderQR<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_QR_H
diff --git a/third-party/Eigen/src/QR/HouseholderQR_LAPACKE.h b/third-party/Eigen/src/QR/HouseholderQR_LAPACKE.h
new file mode 100644
index 00000000..1dc7d536
--- /dev/null
+++ b/third-party/Eigen/src/QR/HouseholderQR_LAPACKE.h
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Householder QR decomposition of a matrix w/o pivoting based on
+ *    LAPACKE_?geqrf function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_QR_LAPACKE_H
+#define EIGEN_QR_LAPACKE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
+template<typename MatrixQR, typename HCoeffs> \
+struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
+{ \
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \
+      typename MatrixQR::Scalar* = 0) \
+  { \
+    lapack_int m = (lapack_int) mat.rows(); \
+    lapack_int n = (lapack_int) mat.cols(); \
+    lapack_int lda = (lapack_int) mat.outerStride(); \
+    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \
+    hCoeffs.adjointInPlace(); \
+  } \
+};
+
+EIGEN_LAPACKE_QR_NOPIV(double, double, d)
+EIGEN_LAPACKE_QR_NOPIV(float, float, s)
+EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_QR_LAPACKE_H
diff --git a/third-party/Eigen/src/SVD/BDCSVD.h b/third-party/Eigen/src/SVD/BDCSVD.h
new file mode 100644
index 00000000..a5b73f8f
--- /dev/null
+++ b/third-party/Eigen/src/SVD/BDCSVD.h
@@ -0,0 +1,1277 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+// 
+// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
+// research report written by Ming Gu and Stanley C.Eisenstat
+// The code variable names correspond to the names they used in their 
+// report
+//
+// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
+// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
+// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
+// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
+// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BDCSVD_H
+#define EIGEN_BDCSVD_H
+// #define EIGEN_BDCSVD_DEBUG_VERBOSE
+// #define EIGEN_BDCSVD_SANITY_CHECKS
+
+namespace Eigen {
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+IOFormat bdcsvdfmt(8, 0, ", ", "\n", "  [", "]");
+#endif
+  
+template<typename _MatrixType> class BDCSVD;
+
+namespace internal {
+
+template<typename _MatrixType> 
+struct traits<BDCSVD<_MatrixType> >
+{
+  typedef _MatrixType MatrixType;
+};  
+
+} // end namespace internal
+  
+  
+/** \ingroup SVD_Module
+ *
+ *
+ * \class BDCSVD
+ *
+ * \brief class Bidiagonal Divide and Conquer SVD
+ *
+ * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
+ *
+ * This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
+ * and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
+ * You can control the switching size with the setSwitchSize() method, default is 16.
+ * For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly
+ * recommended and can several order of magnitude faster.
+ *
+ * \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.
+ * For instance, this concerns Intel's compiler (ICC), which perfroms such optimization by default unless
+ * you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will
+ * significantly degrade the accuracy.
+ *
+ * \sa class JacobiSVD
+ */
+template<typename _MatrixType> 
+class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
+{
+  typedef SVDBase<BDCSVD> Base;
+    
+public:
+  using Base::rows;
+  using Base::cols;
+  using Base::computeU;
+  using Base::computeV;
+  
+  typedef _MatrixType MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  typedef typename NumTraits<RealScalar>::Literal Literal;
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime, 
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime, 
+    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime), 
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, 
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, 
+    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime), 
+    MatrixOptions = MatrixType::Options
+  };
+
+  typedef typename Base::MatrixUType MatrixUType;
+  typedef typename Base::MatrixVType MatrixVType;
+  typedef typename Base::SingularValuesType SingularValuesType;
+  
+  typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;
+  typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;
+  typedef Matrix<RealScalar, Dynamic, 1> VectorType;
+  typedef Array<RealScalar, Dynamic, 1> ArrayXr;
+  typedef Array<Index,1,Dynamic> ArrayXi;
+  typedef Ref<ArrayXr> ArrayRef;
+  typedef Ref<ArrayXi> IndicesRef;
+
+  /** \brief Default Constructor.
+   *
+   * The default constructor is useful in cases in which the user intends to
+   * perform decompositions via BDCSVD::compute(const MatrixType&).
+   */
+  BDCSVD() : m_algoswap(16), m_numIters(0)
+  {}
+
+
+  /** \brief Default Constructor with memory preallocation
+   *
+   * Like the default constructor but with preallocation of the internal data
+   * according to the specified problem size.
+   * \sa BDCSVD()
+   */
+  BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
+    : m_algoswap(16), m_numIters(0)
+  {
+    allocate(rows, cols, computationOptions);
+  }
+
+  /** \brief Constructor performing the decomposition of given matrix.
+   *
+   * \param matrix the matrix to decompose
+   * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, 
+   *                           #ComputeFullV, #ComputeThinV.
+   *
+   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+   * available with the (non - default) FullPivHouseholderQR preconditioner.
+   */
+  BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
+    : m_algoswap(16), m_numIters(0)
+  {
+    compute(matrix, computationOptions);
+  }
+
+  ~BDCSVD() 
+  {
+  }
+  
+  /** \brief Method performing the decomposition of given matrix using custom options.
+   *
+   * \param matrix the matrix to decompose
+   * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+   *                           By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU, 
+   *                           #ComputeFullV, #ComputeThinV.
+   *
+   * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+   * available with the (non - default) FullPivHouseholderQR preconditioner.
+   */
+  BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
+
+  /** \brief Method performing the decomposition of given matrix using current options.
+   *
+   * \param matrix the matrix to decompose
+   *
+   * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
+   */
+  BDCSVD& compute(const MatrixType& matrix)
+  {
+    return compute(matrix, this->m_computationOptions);
+  }
+
+  void setSwitchSize(int s) 
+  {
+    eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
+    m_algoswap = s;
+  }
+ 
+private:
+  void allocate(Index rows, Index cols, unsigned int computationOptions);
+  void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
+  void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
+  void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
+  void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
+  void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
+  void deflation43(Index firstCol, Index shift, Index i, Index size);
+  void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
+  void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
+  template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+  void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
+  void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
+  static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
+
+protected:
+  MatrixXr m_naiveU, m_naiveV;
+  MatrixXr m_computed;
+  Index m_nRec;
+  ArrayXr m_workspace;
+  ArrayXi m_workspaceI;
+  int m_algoswap;
+  bool m_isTranspose, m_compU, m_compV;
+  
+  using Base::m_singularValues;
+  using Base::m_diagSize;
+  using Base::m_computeFullU;
+  using Base::m_computeFullV;
+  using Base::m_computeThinU;
+  using Base::m_computeThinV;
+  using Base::m_matrixU;
+  using Base::m_matrixV;
+  using Base::m_isInitialized;
+  using Base::m_nonzeroSingularValues;
+
+public:  
+  int m_numIters;
+}; //end class BDCSVD
+
+
+// Method to allocate and initialize matrix and attributes
+template<typename MatrixType>
+void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
+{
+  m_isTranspose = (cols > rows);
+
+  if (Base::allocate(rows, cols, computationOptions))
+    return;
+  
+  m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
+  m_compU = computeV();
+  m_compV = computeU();
+  if (m_isTranspose)
+    std::swap(m_compU, m_compV);
+  
+  if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
+  else         m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
+  
+  if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
+  
+  m_workspace.resize((m_diagSize+1)*(m_diagSize+1)*3);
+  m_workspaceI.resize(3*m_diagSize);
+}// end allocate
+
+template<typename MatrixType>
+BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions) 
+{
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "\n\n\n======================================================================================================================\n\n\n";
+#endif
+  allocate(matrix.rows(), matrix.cols(), computationOptions);
+  using std::abs;
+
+  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
+  
+  //**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
+  if(matrix.cols() < m_algoswap)
+  {
+    // FIXME this line involves temporaries
+    JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
+    if(computeU()) m_matrixU = jsvd.matrixU();
+    if(computeV()) m_matrixV = jsvd.matrixV();
+    m_singularValues = jsvd.singularValues();
+    m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
+    m_isInitialized = true;
+    return *this;
+  }
+  
+  //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
+  RealScalar scale = matrix.cwiseAbs().maxCoeff();
+  if(scale==Literal(0)) scale = Literal(1);
+  MatrixX copy;
+  if (m_isTranspose) copy = matrix.adjoint()/scale;
+  else               copy = matrix/scale;
+  
+  //**** step 1 - Bidiagonalization
+  // FIXME this line involves temporaries
+  internal::UpperBidiagonalization<MatrixX> bid(copy);
+
+  //**** step 2 - Divide & Conquer
+  m_naiveU.setZero();
+  m_naiveV.setZero();
+  // FIXME this line involves a temporary matrix
+  m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
+  m_computed.template bottomRows<1>().setZero();
+  divide(0, m_diagSize - 1, 0, 0, 0);
+
+  //**** step 3 - Copy singular values and vectors
+  for (int i=0; i<m_diagSize; i++)
+  {
+    RealScalar a = abs(m_computed.coeff(i, i));
+    m_singularValues.coeffRef(i) = a * scale;
+    if (a<considerZero)
+    {
+      m_nonzeroSingularValues = i;
+      m_singularValues.tail(m_diagSize - i - 1).setZero();
+      break;
+    }
+    else if (i == m_diagSize - 1)
+    {
+      m_nonzeroSingularValues = i + 1;
+      break;
+    }
+  }
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+//   std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
+//   std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
+#endif
+  if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
+  else              copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
+
+  m_isInitialized = true;
+  return *this;
+}// end compute
+
+
+template<typename MatrixType>
+template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
+void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
+{
+  // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
+  if (computeU())
+  {
+    Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
+    m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
+    m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+    householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
+  }
+  if (computeV())
+  {
+    Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
+    m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
+    m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
+    householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
+  }
+}
+
+/** \internal
+  * Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
+  *  A = [A1]
+  *      [A2]
+  * such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
+  * We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
+  * enough.
+  */
+template<typename MatrixType>
+void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
+{
+  Index n = A.rows();
+  if(n>100)
+  {
+    // If the matrices are large enough, let's exploit the sparse structure of A by
+    // splitting it in half (wrt n1), and packing the non-zero columns.
+    Index n2 = n - n1;
+    Map<MatrixXr> A1(m_workspace.data()      , n1, n);
+    Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
+    Map<MatrixXr> B1(m_workspace.data()+  n*n, n,  n);
+    Map<MatrixXr> B2(m_workspace.data()+2*n*n, n,  n);
+    Index k1=0, k2=0;
+    for(Index j=0; j<n; ++j)
+    {
+      if( (A.col(j).head(n1).array()!=Literal(0)).any() )
+      {
+        A1.col(k1) = A.col(j).head(n1);
+        B1.row(k1) = B.row(j);
+        ++k1;
+      }
+      if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
+      {
+        A2.col(k2) = A.col(j).tail(n2);
+        B2.row(k2) = B.row(j);
+        ++k2;
+      }
+    }
+  
+    A.topRows(n1).noalias()    = A1.leftCols(k1) * B1.topRows(k1);
+    A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
+  }
+  else
+  {
+    Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
+    tmp.noalias() = A*B;
+    A = tmp;
+  }
+}
+
+// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the 
+// place of the submatrix we are currently working on.
+
+//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
+//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU; 
+// lastCol + 1 - firstCol is the size of the submatrix.
+//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
+//@param firstRowW : Same as firstRowW with the column.
+//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix 
+// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
+template<typename MatrixType>
+void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift)
+{
+  // requires rows = cols + 1;
+  using std::pow;
+  using std::sqrt;
+  using std::abs;
+  const Index n = lastCol - firstCol + 1;
+  const Index k = n/2;
+  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
+  RealScalar alphaK;
+  RealScalar betaK; 
+  RealScalar r0; 
+  RealScalar lambda, phi, c0, s0;
+  VectorType l, f;
+  // We use the other algorithm which is more efficient for small 
+  // matrices.
+  if (n < m_algoswap)
+  {
+    // FIXME this line involves temporaries
+    JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU | (m_compV ? ComputeFullV : 0));
+    if (m_compU)
+      m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
+    else 
+    {
+      m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
+      m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
+    }
+    if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
+    m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
+    m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
+    return;
+  }
+  // We use the divide and conquer algorithm
+  alphaK =  m_computed(firstCol + k, firstCol + k);
+  betaK = m_computed(firstCol + k + 1, firstCol + k);
+  // The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
+  // and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the 
+  // right submatrix before the left one. 
+  divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
+  divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
+
+  if (m_compU)
+  {
+    lambda = m_naiveU(firstCol + k, firstCol + k);
+    phi = m_naiveU(firstCol + k + 1, lastCol + 1);
+  } 
+  else 
+  {
+    lambda = m_naiveU(1, firstCol + k);
+    phi = m_naiveU(0, lastCol + 1);
+  }
+  r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
+  if (m_compU)
+  {
+    l = m_naiveU.row(firstCol + k).segment(firstCol, k);
+    f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
+  } 
+  else 
+  {
+    l = m_naiveU.row(1).segment(firstCol, k);
+    f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
+  }
+  if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
+  if (r0<considerZero)
+  {
+    c0 = Literal(1);
+    s0 = Literal(0);
+  }
+  else
+  {
+    c0 = alphaK * lambda / r0;
+    s0 = betaK * phi / r0;
+  }
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+  
+  if (m_compU)
+  {
+    MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));     
+    // we shiftW Q1 to the right
+    for (Index i = firstCol + k - 1; i >= firstCol; i--) 
+      m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
+    // we shift q1 at the left with a factor c0
+    m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
+    // last column = q1 * - s0
+    m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
+    // first column = q2 * s0
+    m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0; 
+    // q2 *= c0
+    m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
+  } 
+  else 
+  {
+    RealScalar q1 = m_naiveU(0, firstCol + k);
+    // we shift Q1 to the right
+    for (Index i = firstCol + k - 1; i >= firstCol; i--) 
+      m_naiveU(0, i + 1) = m_naiveU(0, i);
+    // we shift q1 at the left with a factor c0
+    m_naiveU(0, firstCol) = (q1 * c0);
+    // last column = q1 * - s0
+    m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
+    // first column = q2 * s0
+    m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0; 
+    // q2 *= c0
+    m_naiveU(1, lastCol + 1) *= c0;
+    m_naiveU.row(1).segment(firstCol + 1, k).setZero();
+    m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
+  }
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+  
+  m_computed(firstCol + shift, firstCol + shift) = r0;
+  m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
+  m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
+#endif
+  // Second part: try to deflate singular values in combined matrix
+  deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
+  std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n";
+  std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n";
+  std::cout << "err:      " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
+  static int count = 0;
+  std::cout << "# " << ++count << "\n\n";
+  assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
+//   assert(count<681);
+//   assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
+#endif
+  
+  // Third part: compute SVD of combined matrix
+  MatrixXr UofSVD, VofSVD;
+  VectorType singVals;
+  computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(UofSVD.allFinite());
+  assert(VofSVD.allFinite());
+#endif
+  
+  if (m_compU)
+    structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
+  else
+  {
+    Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
+    tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
+    m_naiveU.middleCols(firstCol, n + 1) = tmp;
+  }
+  
+  if (m_compV)  structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+  
+  m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
+  m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
+}// end divide
+
+// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
+// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
+// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
+// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
+//
+// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
+// handling of round-off errors, be consistent in ordering
+// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
+{
+  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
+  using std::abs;
+  ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
+  m_workspace.head(n) =  m_computed.block(firstCol, firstCol, n, n).diagonal();
+  ArrayRef diag = m_workspace.head(n);
+  diag(0) = Literal(0);
+
+  // Allocate space for singular values and vectors
+  singVals.resize(n);
+  U.resize(n+1, n+1);
+  if (m_compV) V.resize(n, n);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  if (col0.hasNaN() || diag.hasNaN())
+    std::cout << "\n\nHAS NAN\n\n";
+#endif
+  
+  // Many singular values might have been deflated, the zero ones have been moved to the end,
+  // but others are interleaved and we must ignore them at this stage.
+  // To this end, let's compute a permutation skipping them:
+  Index actual_n = n;
+  while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
+  Index m = 0; // size of the deflated problem
+  for(Index k=0;k<actual_n;++k)
+    if(abs(col0(k))>considerZero)
+      m_workspaceI(m++) = k;
+  Map<ArrayXi> perm(m_workspaceI.data(),m);
+  
+  Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
+  Map<ArrayXr> mus(m_workspace.data()+2*n, n);
+  Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
+
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "computeSVDofM using:\n";
+  std::cout << "  z: " << col0.transpose() << "\n";
+  std::cout << "  d: " << diag.transpose() << "\n";
+#endif
+  
+  // Compute singVals, shifts, and mus
+  computeSingVals(col0, diag, perm, singVals, shifts, mus);
+  
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "  j:        " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
+  std::cout << "  sing-val: " << singVals.transpose() << "\n";
+  std::cout << "  mu:       " << mus.transpose() << "\n";
+  std::cout << "  shift:    " << shifts.transpose() << "\n";
+  
+  {
+    Index actual_n = n;
+    while(actual_n>1 && abs(col0(actual_n-1))<considerZero) --actual_n;
+    std::cout << "\n\n    mus:    " << mus.head(actual_n).transpose() << "\n\n";
+    std::cout << "    check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
+    std::cout << "    check2 (>0)      : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
+    std::cout << "    check3 (>0)      : " << ((diag.segment(1,actual_n-1)-singVals.head(actual_n-1).array()) / singVals.head(actual_n-1).array()).transpose() << "\n\n\n";
+    std::cout << "    check4 (>0)      : " << ((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).transpose() << "\n\n\n";
+  }
+#endif
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(singVals.allFinite());
+  assert(mus.allFinite());
+  assert(shifts.allFinite());
+#endif
+  
+  // Compute zhat
+  perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "  zhat: " << zhat.transpose() << "\n";
+#endif
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(zhat.allFinite());
+#endif
+  
+  computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
+  
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
+  std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
+#endif
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(U.allFinite());
+  assert(V.allFinite());
+  assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 1e-14 * n);
+  assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 1e-14 * n);
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+  
+  // Because of deflation, the singular values might not be completely sorted.
+  // Fortunately, reordering them is a O(n) problem
+  for(Index i=0; i<actual_n-1; ++i)
+  {
+    if(singVals(i)>singVals(i+1))
+    {
+      using std::swap;
+      swap(singVals(i),singVals(i+1));
+      U.col(i).swap(U.col(i+1));
+      if(m_compV) V.col(i).swap(V.col(i+1));
+    }
+  }
+  
+  // Reverse order so that singular values in increased order
+  // Because of deflation, the zeros singular-values are already at the end
+  singVals.head(actual_n).reverseInPlace();
+  U.leftCols(actual_n).rowwise().reverseInPlace();
+  if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
+  
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
+  std::cout << "  * j:        " << jsvd.singularValues().transpose() << "\n\n";
+  std::cout << "  * sing-val: " << singVals.transpose() << "\n";
+//   std::cout << "  * err:      " << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << "\n";
+#endif
+}
+
+template <typename MatrixType>
+typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
+{
+  Index m = perm.size();
+  RealScalar res = Literal(1);
+  for(Index i=0; i<m; ++i)
+  {
+    Index j = perm(i);
+    // The following expression could be rewritten to involve only a single division,
+    // but this would make the expression more sensitive to overflow.
+    res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
+  }
+  return res;
+
+}
+
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
+                                         VectorType& singVals, ArrayRef shifts, ArrayRef mus)
+{
+  using std::abs;
+  using std::swap;
+  using std::sqrt;
+
+  Index n = col0.size();
+  Index actual_n = n;
+  // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
+  // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
+  while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
+
+  for (Index k = 0; k < n; ++k)
+  {
+    if (col0(k) == Literal(0) || actual_n==1)
+    {
+      // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
+      // if actual_n==1, then the deflated problem is already diagonalized
+      singVals(k) = k==0 ? col0(0) : diag(k);
+      mus(k) = Literal(0);
+      shifts(k) = k==0 ? col0(0) : diag(k);
+      continue;
+    } 
+
+    // otherwise, use secular equation to find singular value
+    RealScalar left = diag(k);
+    RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());
+    if(k==actual_n-1)
+      right = (diag(actual_n-1) + col0.matrix().norm());
+    else
+    {
+      // Skip deflated singular values,
+      // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
+      // This should be equivalent to using perm[]
+      Index l = k+1;
+      while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
+      right = diag(l);
+    }
+
+    // first decide whether it's closer to the left end or the right end
+    RealScalar mid = left + (right-left) / Literal(2);
+    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+    std::cout << right-left << "\n";
+    std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right)   << "\n";
+    std::cout << "     = " << secularEq(0.1*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.2*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.3*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.4*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.49*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.5*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.51*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.6*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.7*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
+              << " "       << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
+#endif
+    RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
+    
+    // measure everything relative to shift
+    Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
+    diagShifted = diag - shift;
+
+    if(k!=actual_n-1)
+    {
+      // check that after the shift, f(mid) is still negative:
+      RealScalar midShifted = (right - left) / RealScalar(2);
+      if(shift==right)
+        midShifted = -midShifted;
+      RealScalar fMidShifted = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
+      if(fMidShifted>0)
+      {
+        // fMid was erroneous, fix it:
+        shift =  fMidShifted > Literal(0) ? left : right;
+        diagShifted = diag - shift;
+      }
+    }
+    
+    // initial guess
+    RealScalar muPrev, muCur;
+    if (shift == left)
+    {
+      muPrev = (right - left) * RealScalar(0.1);
+      if (k == actual_n-1) muCur = right - left;
+      else                 muCur = (right - left) * RealScalar(0.5);
+    }
+    else
+    {
+      muPrev = -(right - left) * RealScalar(0.1);
+      muCur = -(right - left) * RealScalar(0.5);
+    }
+
+    RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
+    RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);
+    if (abs(fPrev) < abs(fCur))
+    {
+      swap(fPrev, fCur);
+      swap(muPrev, muCur);
+    }
+
+    // rational interpolation: fit a function of the form a / mu + b through the two previous
+    // iterates and use its zero to compute the next iterate
+    bool useBisection = fPrev*fCur>Literal(0);
+    while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+    {
+      ++m_numIters;
+
+      // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
+      RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
+      RealScalar b = fCur - a / muCur;
+      // And find mu such that f(mu)==0:
+      RealScalar muZero = -a/b;
+      RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
+      
+      muPrev = muCur;
+      fPrev = fCur;
+      muCur = muZero;
+      fCur = fZero;
+      
+      
+      if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
+      if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
+      if (abs(fCur)>abs(fPrev)) useBisection = true;
+    }
+
+    // fall back on bisection method if rational interpolation did not work
+    if (useBisection)
+    {
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+      std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
+#endif
+      RealScalar leftShifted, rightShifted;
+      if (shift == left)
+      {
+        // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
+        // the factor 2 is to be more conservative
+        leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
+
+        // check that we did it right:
+        eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
+        // I don't understand why the case k==0 would be special there:
+        // if (k == 0) rightShifted = right - left; else
+        rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
+      }
+      else
+      {
+        leftShifted = -(right - left) * RealScalar(0.51);
+        if(k+1<n)
+          rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
+        else
+          rightShifted = -(std::numeric_limits<RealScalar>::min)();
+      }
+      
+      RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
+      eigen_internal_assert(fLeft<Literal(0));
+
+#if defined EIGEN_INTERNAL_DEBUGGING || defined EIGEN_BDCSVD_DEBUG_VERBOSE
+      RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
+#endif
+
+
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+      if(!(fLeft * fRight<0))
+      {
+        std::cout << "fLeft: " << leftShifted << " - " << diagShifted.head(10).transpose()  << "\n ; " << bool(left==shift) << " " << (left-shift) << "\n";
+        std::cout << k << " : " <<  fLeft << " * " << fRight << " == " << fLeft * fRight << "  ;  " << left << " - " << right << " -> " <<  leftShifted << " " << rightShifted << "   shift=" << shift << "\n";
+      }
+#endif
+      eigen_internal_assert(fLeft * fRight < Literal(0));
+
+      if(fLeft<Literal(0))
+      {
+        while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
+        {
+          RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
+          fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
+          eigen_internal_assert((numext::isfinite)(fMid));
+
+          if (fLeft * fMid < Literal(0))
+          {
+            rightShifted = midShifted;
+          }
+          else
+          {
+            leftShifted = midShifted;
+            fLeft = fMid;
+          }
+        }
+        muCur = (leftShifted + rightShifted) / Literal(2);
+      }
+      else 
+      {
+        // We have a problem as shifting on the left or right give either a positive or negative value
+        // at the middle of [left,right]...
+        // Instead fo abbording or entering an infinite loop,
+        // let's just use the middle as the estimated zero-crossing:
+        muCur = (right - left) * RealScalar(0.5);
+        if(shift == right)
+          muCur = -muCur;
+      }
+    }
+      
+    singVals[k] = shift + muCur;
+    shifts[k] = shift;
+    mus[k] = muCur;
+
+    // perturb singular value slightly if it equals diagonal entry to avoid division by zero later
+    // (deflation is supposed to avoid this from happening)
+    // - this does no seem to be necessary anymore -
+//     if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
+//     if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
+  }
+}
+
+
+// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
+template <typename MatrixType>
+void BDCSVD<MatrixType>::perturbCol0
+   (const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
+    const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
+{
+  using std::sqrt;
+  Index n = col0.size();
+  Index m = perm.size();
+  if(m==0)
+  {
+    zhat.setZero();
+    return;
+  }
+  Index last = perm(m-1);
+  // The offset permits to skip deflated entries while computing zhat
+  for (Index k = 0; k < n; ++k)
+  {
+    if (col0(k) == Literal(0)) // deflated
+      zhat(k) = Literal(0);
+    else
+    {
+      // see equation (3.6)
+      RealScalar dk = diag(k);
+      RealScalar prod = (singVals(last) + dk) * (mus(last) + (shifts(last) - dk));
+
+      for(Index l = 0; l<m; ++l)
+      {
+        Index i = perm(l);
+        if(i!=k)
+        {
+          Index j = i<k ? i : perm(l-1);
+          prod *= ((singVals(j)+dk) / ((diag(i)+dk))) * ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+          if(i!=k && numext::abs(((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) - 1) > 0.9 )
+            std::cout << "     " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
+                       << ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
+#endif
+        }
+      }
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+      std::cout << "zhat(" << k << ") =  sqrt( " << prod << ")  ;  " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
+#endif
+      RealScalar tmp = sqrt(prod);
+      zhat(k) = col0(k) > Literal(0) ? RealScalar(tmp) : RealScalar(-tmp);
+    }
+  }
+}
+
+// compute singular vectors
+template <typename MatrixType>
+void BDCSVD<MatrixType>::computeSingVecs
+   (const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
+    const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
+{
+  Index n = zhat.size();
+  Index m = perm.size();
+  
+  for (Index k = 0; k < n; ++k)
+  {
+    if (zhat(k) == Literal(0))
+    {
+      U.col(k) = VectorType::Unit(n+1, k);
+      if (m_compV) V.col(k) = VectorType::Unit(n, k);
+    }
+    else
+    {
+      U.col(k).setZero();
+      for(Index l=0;l<m;++l)
+      {
+        Index i = perm(l);
+        U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
+      }
+      U(n,k) = Literal(0);
+      U.col(k).normalize();
+    
+      if (m_compV)
+      {
+        V.col(k).setZero();
+        for(Index l=1;l<m;++l)
+        {
+          Index i = perm(l);
+          V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
+        }
+        V(0,k) = Literal(-1);
+        V.col(k).normalize();
+      }
+    }
+  }
+  U.col(n) = VectorType::Unit(n+1, n);
+}
+
+
+// page 12_13
+// i >= 1, di almost null and zi non null.
+// We use a rotation to zero out zi applied to the left of M
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size)
+{
+  using std::abs;
+  using std::sqrt;
+  using std::pow;
+  Index start = firstCol + shift;
+  RealScalar c = m_computed(start, start);
+  RealScalar s = m_computed(start+i, start);
+  RealScalar r = numext::hypot(c,s);
+  if (r == Literal(0))
+  {
+    m_computed(start+i, start+i) = Literal(0);
+    return;
+  }
+  m_computed(start,start) = r;  
+  m_computed(start+i, start) = Literal(0);
+  m_computed(start+i, start+i) = Literal(0);
+  
+  JacobiRotation<RealScalar> J(c/r,-s/r);
+  if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
+  else          m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
+}// end deflation 43
+
+
+// page 13
+// i,j >= 1, i!=j and |di - dj| < epsilon * norm2(M)
+// We apply two rotations to have zj = 0;
+// TODO deflation44 is still broken and not properly tested
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
+{
+  using std::abs;
+  using std::sqrt;
+  using std::conj;
+  using std::pow;
+  RealScalar c = m_computed(firstColm+i, firstColm);
+  RealScalar s = m_computed(firstColm+j, firstColm);
+  RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
+    << m_computed(firstColm + i-1, firstColm)  << " "
+    << m_computed(firstColm + i, firstColm)  << " "
+    << m_computed(firstColm + i+1, firstColm) << " "
+    << m_computed(firstColm + i+2, firstColm) << "\n";
+  std::cout << m_computed(firstColm + i-1, firstColm + i-1)  << " "
+    << m_computed(firstColm + i, firstColm+i)  << " "
+    << m_computed(firstColm + i+1, firstColm+i+1) << " "
+    << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
+#endif
+  if (r==Literal(0))
+  {
+    m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
+    return;
+  }
+  c/=r;
+  s/=r;
+  m_computed(firstColm + i, firstColm) = r;  
+  m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
+  m_computed(firstColm + j, firstColm) = Literal(0);
+
+  JacobiRotation<RealScalar> J(c,-s);
+  if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
+  else          m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
+  if (m_compV)  m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
+}// end deflation 44
+
+
+// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
+template <typename MatrixType>
+void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift)
+{
+  using std::sqrt;
+  using std::abs;
+  const Index length = lastCol + 1 - firstCol;
+  
+  Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);
+  Diagonal<MatrixXr> fulldiag(m_computed);
+  VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
+  
+  const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
+  RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
+  RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
+  RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE  
+  std::cout << "\ndeflate:" << diag.head(k+1).transpose() << "  |  " << diag.segment(k+1,length-k-1).transpose() << "\n";
+#endif
+  
+  //condition 4.1
+  if (diag(0) < epsilon_coarse)
+  { 
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+    std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n";
+#endif
+    diag(0) = epsilon_coarse;
+  }
+
+  //condition 4.2
+  for (Index i=1;i<length;++i)
+    if (abs(col0(i)) < epsilon_strict)
+    {
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+      std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << "  (diag(" << i << ")=" << diag(i) << ")\n";
+#endif
+      col0(i) = Literal(0);
+    }
+
+  //condition 4.3
+  for (Index i=1;i<length; i++)
+    if (diag(i) < epsilon_coarse)
+    {
+#ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
+      std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n";
+#endif
+      deflation43(firstCol, shift, i, length);
+    }
+
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "to be sorted: " << diag.transpose() << "\n\n";
+#endif
+  {
+    // Check for total deflation
+    // If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
+    bool total_deflation = (col0.tail(length-1).array()<considerZero).all();
+    
+    // Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
+    // First, compute the respective permutation.
+    Index *permutation = m_workspaceI.data();
+    {
+      permutation[0] = 0;
+      Index p = 1;
+      
+      // Move deflated diagonal entries at the end.
+      for(Index i=1; i<length; ++i)
+        if(abs(diag(i))<considerZero)
+          permutation[p++] = i;
+        
+      Index i=1, j=k+1;
+      for( ; p < length; ++p)
+      {
+             if (i > k)             permutation[p] = j++;
+        else if (j >= length)       permutation[p] = i++;
+        else if (diag(i) < diag(j)) permutation[p] = j++;
+        else                        permutation[p] = i++;
+      }
+    }
+    
+    // If we have a total deflation, then we have to insert diag(0) at the right place
+    if(total_deflation)
+    {
+      for(Index i=1; i<length; ++i)
+      {
+        Index pi = permutation[i];
+        if(abs(diag(pi))<considerZero || diag(0)<diag(pi))
+          permutation[i-1] = permutation[i];
+        else
+        {
+          permutation[i-1] = 0;
+          break;
+        }
+      }
+    }
+    
+    // Current index of each col, and current column of each index
+    Index *realInd = m_workspaceI.data()+length;
+    Index *realCol = m_workspaceI.data()+2*length;
+    
+    for(int pos = 0; pos< length; pos++)
+    {
+      realCol[pos] = pos;
+      realInd[pos] = pos;
+    }
+    
+    for(Index i = total_deflation?0:1; i < length; i++)
+    {
+      const Index pi = permutation[length - (total_deflation ? i+1 : i)];
+      const Index J = realCol[pi];
+      
+      using std::swap;
+      // swap diagonal and first column entries:
+      swap(diag(i), diag(J));
+      if(i!=0 && J!=0) swap(col0(i), col0(J));
+
+      // change columns
+      if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1));
+      else         m_naiveU.col(firstCol+i).segment(0, 2)                .swap(m_naiveU.col(firstCol+J).segment(0, 2));
+      if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));
+
+      //update real pos
+      const Index realI = realInd[i];
+      realCol[realI] = J;
+      realCol[pi] = i;
+      realInd[J] = realI;
+      realInd[i] = pi;
+    }
+  }
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+  std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
+  std::cout << "      : " << col0.transpose() << "\n\n";
+#endif
+    
+  //condition 4.4
+  {
+    Index i = length-1;
+    while(i>0 && (abs(diag(i))<considerZero || abs(col0(i))<considerZero)) --i;
+    for(; i>1;--i)
+       if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )
+      {
+#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
+        std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i) - diag(i-1)) << " < " << NumTraits<RealScalar>::epsilon()*diag(i) << "\n";
+#endif
+        eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && " diagonal entries are not properly sorted");
+        deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);
+      }
+  }
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  for(Index j=2;j<length;++j)
+    assert(diag(j-1)<=diag(j) || abs(diag(j))<considerZero);
+#endif
+  
+#ifdef EIGEN_BDCSVD_SANITY_CHECKS
+  assert(m_naiveU.allFinite());
+  assert(m_naiveV.allFinite());
+  assert(m_computed.allFinite());
+#endif
+}//end deflation
+
+#ifndef __CUDACC__
+/** \svd_module
+  *
+  * \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
+  *
+  * \sa class BDCSVD
+  */
+template<typename Derived>
+BDCSVD<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
+{
+  return BDCSVD<PlainObject>(*this, computationOptions);
+}
+#endif
+
+} // end namespace Eigen
+
+#endif
diff --git a/third-party/Eigen/src/SVD/JacobiSVD.h b/third-party/Eigen/src/SVD/JacobiSVD.h
new file mode 100644
index 00000000..43488b1e
--- /dev/null
+++ b/third-party/Eigen/src/SVD/JacobiSVD.h
@@ -0,0 +1,804 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_JACOBISVD_H
+#define EIGEN_JACOBISVD_H
+
+namespace Eigen { 
+
+namespace internal {
+// forward declaration (needed by ICC)
+// the empty body is required by MSVC
+template<typename MatrixType, int QRPreconditioner,
+         bool IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>
+struct svd_precondition_2x2_block_to_be_real {};
+
+/*** QR preconditioners (R-SVD)
+ ***
+ *** Their role is to reduce the problem of computing the SVD to the case of a square matrix.
+ *** This approach, known as R-SVD, is an optimization for rectangular-enough matrices, and is a requirement for
+ *** JacobiSVD which by itself is only able to work on square matrices.
+ ***/
+
+enum { PreconditionIfMoreColsThanRows, PreconditionIfMoreRowsThanCols };
+
+template<typename MatrixType, int QRPreconditioner, int Case>
+struct qr_preconditioner_should_do_anything
+{
+  enum { a = MatrixType::RowsAtCompileTime != Dynamic &&
+             MatrixType::ColsAtCompileTime != Dynamic &&
+             MatrixType::ColsAtCompileTime <= MatrixType::RowsAtCompileTime,
+         b = MatrixType::RowsAtCompileTime != Dynamic &&
+             MatrixType::ColsAtCompileTime != Dynamic &&
+             MatrixType::RowsAtCompileTime <= MatrixType::ColsAtCompileTime,
+         ret = !( (QRPreconditioner == NoQRPreconditioner) ||
+                  (Case == PreconditionIfMoreColsThanRows && bool(a)) ||
+                  (Case == PreconditionIfMoreRowsThanCols && bool(b)) )
+  };
+};
+
+template<typename MatrixType, int QRPreconditioner, int Case,
+         bool DoAnything = qr_preconditioner_should_do_anything<MatrixType, QRPreconditioner, Case>::ret
+> struct qr_preconditioner_impl {};
+
+template<typename MatrixType, int QRPreconditioner, int Case>
+class qr_preconditioner_impl<MatrixType, QRPreconditioner, Case, false>
+{
+public:
+  void allocate(const JacobiSVD<MatrixType, QRPreconditioner>&) {}
+  bool run(JacobiSVD<MatrixType, QRPreconditioner>&, const MatrixType&)
+  {
+    return false;
+  }
+};
+
+/*** preconditioner using FullPivHouseholderQR ***/
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
+{
+public:
+  typedef typename MatrixType::Scalar Scalar;
+  enum
+  {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime
+  };
+  typedef Matrix<Scalar, 1, RowsAtCompileTime, RowMajor, 1, MaxRowsAtCompileTime> WorkspaceType;
+
+  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
+  {
+    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+    }
+    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
+  }
+
+  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.rows() > matrix.cols())
+    {
+      m_qr.compute(matrix);
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
+      if(svd.m_computeFullU) m_qr.matrixQ().evalTo(svd.m_matrixU, m_workspace);
+      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();
+      return true;
+    }
+    return false;
+  }
+private:
+  typedef FullPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
+  WorkspaceType m_workspace;
+};
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, FullPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
+{
+public:
+  typedef typename MatrixType::Scalar Scalar;
+  enum
+  {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor))
+              : ColsAtCompileTime==1 ? (MatrixType::Options |   RowMajor)
+              : MatrixType::Options
+  };
+  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, TrOptions, MaxColsAtCompileTime, MaxRowsAtCompileTime>
+          TransposeTypeWithSameStorageOrder;
+
+  void allocate(const JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd)
+  {
+    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+    }
+    m_adjoint.resize(svd.cols(), svd.rows());
+    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
+  }
+
+  bool run(JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.cols() > matrix.rows())
+    {
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
+      if(svd.m_computeFullV) m_qr.matrixQ().evalTo(svd.m_matrixV, m_workspace);
+      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();
+      return true;
+    }
+    else return false;
+  }
+private:
+  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
+  typename internal::plain_row_type<MatrixType>::type m_workspace;
+};
+
+/*** preconditioner using ColPivHouseholderQR ***/
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
+{
+public:
+  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
+  {
+    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+    }
+    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
+    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
+  }
+
+  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.rows() > matrix.cols())
+    {
+      m_qr.compute(matrix);
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
+      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);
+      else if(svd.m_computeThinU)
+      {
+        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());
+        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);
+      }
+      if(svd.computeV()) svd.m_matrixV = m_qr.colsPermutation();
+      return true;
+    }
+    return false;
+  }
+
+private:
+  typedef ColPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
+  typename internal::plain_col_type<MatrixType>::type m_workspace;
+};
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, ColPivHouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
+{
+public:
+  typedef typename MatrixType::Scalar Scalar;
+  enum
+  {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    TrOptions = RowsAtCompileTime==1 ? (MatrixType::Options & ~(RowMajor))
+              : ColsAtCompileTime==1 ? (MatrixType::Options |   RowMajor)
+              : MatrixType::Options
+  };
+
+  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, TrOptions, MaxColsAtCompileTime, MaxRowsAtCompileTime>
+          TransposeTypeWithSameStorageOrder;
+
+  void allocate(const JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd)
+  {
+    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+    }
+    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
+    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
+    m_adjoint.resize(svd.cols(), svd.rows());
+  }
+
+  bool run(JacobiSVD<MatrixType, ColPivHouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.cols() > matrix.rows())
+    {
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
+
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
+      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);
+      else if(svd.m_computeThinV)
+      {
+        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());
+        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);
+      }
+      if(svd.computeU()) svd.m_matrixU = m_qr.colsPermutation();
+      return true;
+    }
+    else return false;
+  }
+
+private:
+  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
+  typename internal::plain_row_type<MatrixType>::type m_workspace;
+};
+
+/*** preconditioner using HouseholderQR ***/
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreRowsThanCols, true>
+{
+public:
+  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
+  {
+    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
+    }
+    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
+    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
+  }
+
+  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.rows() > matrix.cols())
+    {
+      m_qr.compute(matrix);
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.cols(),matrix.cols()).template triangularView<Upper>();
+      if(svd.m_computeFullU) m_qr.householderQ().evalTo(svd.m_matrixU, m_workspace);
+      else if(svd.m_computeThinU)
+      {
+        svd.m_matrixU.setIdentity(matrix.rows(), matrix.cols());
+        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixU, m_workspace);
+      }
+      if(svd.computeV()) svd.m_matrixV.setIdentity(matrix.cols(), matrix.cols());
+      return true;
+    }
+    return false;
+  }
+private:
+  typedef HouseholderQR<MatrixType> QRType;
+  QRType m_qr;
+  typename internal::plain_col_type<MatrixType>::type m_workspace;
+};
+
+template<typename MatrixType>
+class qr_preconditioner_impl<MatrixType, HouseholderQRPreconditioner, PreconditionIfMoreColsThanRows, true>
+{
+public:
+  typedef typename MatrixType::Scalar Scalar;
+  enum
+  {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    Options = MatrixType::Options
+  };
+
+  typedef Matrix<Scalar, ColsAtCompileTime, RowsAtCompileTime, Options, MaxColsAtCompileTime, MaxRowsAtCompileTime>
+          TransposeTypeWithSameStorageOrder;
+
+  void allocate(const JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd)
+  {
+    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
+    {
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
+    }
+    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
+    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
+    m_adjoint.resize(svd.cols(), svd.rows());
+  }
+
+  bool run(JacobiSVD<MatrixType, HouseholderQRPreconditioner>& svd, const MatrixType& matrix)
+  {
+    if(matrix.cols() > matrix.rows())
+    {
+      m_adjoint = matrix.adjoint();
+      m_qr.compute(m_adjoint);
+
+      svd.m_workMatrix = m_qr.matrixQR().block(0,0,matrix.rows(),matrix.rows()).template triangularView<Upper>().adjoint();
+      if(svd.m_computeFullV) m_qr.householderQ().evalTo(svd.m_matrixV, m_workspace);
+      else if(svd.m_computeThinV)
+      {
+        svd.m_matrixV.setIdentity(matrix.cols(), matrix.rows());
+        m_qr.householderQ().applyThisOnTheLeft(svd.m_matrixV, m_workspace);
+      }
+      if(svd.computeU()) svd.m_matrixU.setIdentity(matrix.rows(), matrix.rows());
+      return true;
+    }
+    else return false;
+  }
+
+private:
+  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
+  TransposeTypeWithSameStorageOrder m_adjoint;
+  typename internal::plain_row_type<MatrixType>::type m_workspace;
+};
+
+/*** 2x2 SVD implementation
+ ***
+ *** JacobiSVD consists in performing a series of 2x2 SVD subproblems
+ ***/
+
+template<typename MatrixType, int QRPreconditioner>
+struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, false>
+{
+  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
+  typedef typename MatrixType::RealScalar RealScalar;
+  static bool run(typename SVD::WorkMatrixType&, SVD&, Index, Index, RealScalar&) { return true; }
+};
+
+template<typename MatrixType, int QRPreconditioner>
+struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
+{
+  typedef JacobiSVD<MatrixType, QRPreconditioner> SVD;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  static bool run(typename SVD::WorkMatrixType& work_matrix, SVD& svd, Index p, Index q, RealScalar& maxDiagEntry)
+  {
+    using std::sqrt;
+    using std::abs;
+    Scalar z;
+    JacobiRotation<Scalar> rot;
+    RealScalar n = sqrt(numext::abs2(work_matrix.coeff(p,p)) + numext::abs2(work_matrix.coeff(q,p)));
+
+    const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+    const RealScalar precision = NumTraits<Scalar>::epsilon();
+
+    if(n==0)
+    {
+      // make sure first column is zero
+      work_matrix.coeffRef(p,p) = work_matrix.coeffRef(q,p) = Scalar(0);
+
+      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
+      {
+        // work_matrix.coeff(p,q) can be zero if work_matrix.coeff(q,p) is not zero but small enough to underflow when computing n
+        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
+        work_matrix.row(p) *= z;
+        if(svd.computeU()) svd.m_matrixU.col(p) *= conj(z);
+      }
+      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
+      {
+        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
+        work_matrix.row(q) *= z;
+        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
+      }
+      // otherwise the second row is already zero, so we have nothing to do.
+    }
+    else
+    {
+      rot.c() = conj(work_matrix.coeff(p,p)) / n;
+      rot.s() = work_matrix.coeff(q,p) / n;
+      work_matrix.applyOnTheLeft(p,q,rot);
+      if(svd.computeU()) svd.m_matrixU.applyOnTheRight(p,q,rot.adjoint());
+      if(abs(numext::imag(work_matrix.coeff(p,q)))>considerAsZero)
+      {
+        z = abs(work_matrix.coeff(p,q)) / work_matrix.coeff(p,q);
+        work_matrix.col(q) *= z;
+        if(svd.computeV()) svd.m_matrixV.col(q) *= z;
+      }
+      if(abs(numext::imag(work_matrix.coeff(q,q)))>considerAsZero)
+      {
+        z = abs(work_matrix.coeff(q,q)) / work_matrix.coeff(q,q);
+        work_matrix.row(q) *= z;
+        if(svd.computeU()) svd.m_matrixU.col(q) *= conj(z);
+      }
+    }
+
+    // update largest diagonal entry
+    maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(work_matrix.coeff(p,p)), abs(work_matrix.coeff(q,q))));
+    // and check whether the 2x2 block is already diagonal
+    RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
+    return abs(work_matrix.coeff(p,q))>threshold || abs(work_matrix.coeff(q,p)) > threshold;
+  }
+};
+
+template<typename _MatrixType, int QRPreconditioner> 
+struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
+{
+  typedef _MatrixType MatrixType;
+};
+
+} // end namespace internal
+
+/** \ingroup SVD_Module
+  *
+  *
+  * \class JacobiSVD
+  *
+  * \brief Two-sided Jacobi SVD decomposition of a rectangular matrix
+  *
+  * \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
+  * \tparam QRPreconditioner this optional parameter allows to specify the type of QR decomposition that will be used internally
+  *                        for the R-SVD step for non-square matrices. See discussion of possible values below.
+  *
+  * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
+  *   \f[ A = U S V^* \f]
+  * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero outside of its main diagonal;
+  * the diagonal entries of S are known as the \em singular \em values of \a A and the columns of \a U and \a V are known as the left
+  * and right \em singular \em vectors of \a A respectively.
+  *
+  * Singular values are always sorted in decreasing order.
+  *
+  * This JacobiSVD decomposition computes only the singular values by default. If you want \a U or \a V, you need to ask for them explicitly.
+  *
+  * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the
+  * smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
+  * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
+  * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
+  *
+  * Here's an example demonstrating basic usage:
+  * \include JacobiSVD_basic.cpp
+  * Output: \verbinclude JacobiSVD_basic.out
+  *
+  * This JacobiSVD class is a two-sided Jacobi R-SVD decomposition, ensuring optimal reliability and accuracy. The downside is that it's slower than
+  * bidiagonalizing SVD algorithms for large square matrices; however its complexity is still \f$ O(n^2p) \f$ where \a n is the smaller dimension and
+  * \a p is the greater dimension, meaning that it is still of the same order of complexity as the faster bidiagonalizing R-SVD algorithms.
+  * In particular, like any R-SVD, it takes advantage of non-squareness in that its complexity is only linear in the greater dimension.
+  *
+  * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
+  * terminate in finite (and reasonable) time.
+  *
+  * The possible values for QRPreconditioner are:
+  * \li ColPivHouseholderQRPreconditioner is the default. In practice it's very safe. It uses column-pivoting QR.
+  * \li FullPivHouseholderQRPreconditioner, is the safest and slowest. It uses full-pivoting QR.
+  *     Contrary to other QRs, it doesn't allow computing thin unitaries.
+  * \li HouseholderQRPreconditioner is the fastest, and less safe and accurate than the pivoting variants. It uses non-pivoting QR.
+  *     This is very similar in safety and accuracy to the bidiagonalization process used by bidiagonalizing SVD algorithms (since bidiagonalization
+  *     is inherently non-pivoting). However the resulting SVD is still more reliable than bidiagonalizing SVDs because the Jacobi-based iterarive
+  *     process is more reliable than the optimized bidiagonal SVD iterations.
+  * \li NoQRPreconditioner allows not to use a QR preconditioner at all. This is useful if you know that you will only be computing
+  *     JacobiSVD decompositions of square matrices. Non-square matrices require a QR preconditioner. Using this option will result in
+  *     faster compilation and smaller executable code. It won't significantly speed up computation, since JacobiSVD is always checking
+  *     if QR preconditioning is needed before applying it anyway.
+  *
+  * \sa MatrixBase::jacobiSvd()
+  */
+template<typename _MatrixType, int QRPreconditioner> class JacobiSVD
+ : public SVDBase<JacobiSVD<_MatrixType,QRPreconditioner> >
+{
+    typedef SVDBase<JacobiSVD> Base;
+  public:
+
+    typedef _MatrixType MatrixType;
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+      MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),
+      MatrixOptions = MatrixType::Options
+    };
+
+    typedef typename Base::MatrixUType MatrixUType;
+    typedef typename Base::MatrixVType MatrixVType;
+    typedef typename Base::SingularValuesType SingularValuesType;
+    
+    typedef typename internal::plain_row_type<MatrixType>::type RowType;
+    typedef typename internal::plain_col_type<MatrixType>::type ColType;
+    typedef Matrix<Scalar, DiagSizeAtCompileTime, DiagSizeAtCompileTime,
+                   MatrixOptions, MaxDiagSizeAtCompileTime, MaxDiagSizeAtCompileTime>
+            WorkMatrixType;
+
+    /** \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via JacobiSVD::compute(const MatrixType&).
+      */
+    JacobiSVD()
+    {}
+
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem size.
+      * \sa JacobiSVD()
+      */
+    JacobiSVD(Index rows, Index cols, unsigned int computationOptions = 0)
+    {
+      allocate(rows, cols, computationOptions);
+    }
+
+    /** \brief Constructor performing the decomposition of given matrix.
+     *
+     * \param matrix the matrix to decompose
+     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
+     *                           #ComputeFullV, #ComputeThinV.
+     *
+     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+     * available with the (non-default) FullPivHouseholderQR preconditioner.
+     */
+    explicit JacobiSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
+    {
+      compute(matrix, computationOptions);
+    }
+
+    /** \brief Method performing the decomposition of given matrix using custom options.
+     *
+     * \param matrix the matrix to decompose
+     * \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
+     *                           By default, none is computed. This is a bit-field, the possible bits are #ComputeFullU, #ComputeThinU,
+     *                           #ComputeFullV, #ComputeThinV.
+     *
+     * Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
+     * available with the (non-default) FullPivHouseholderQR preconditioner.
+     */
+    JacobiSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
+
+    /** \brief Method performing the decomposition of given matrix using current options.
+     *
+     * \param matrix the matrix to decompose
+     *
+     * This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
+     */
+    JacobiSVD& compute(const MatrixType& matrix)
+    {
+      return compute(matrix, m_computationOptions);
+    }
+
+    using Base::computeU;
+    using Base::computeV;
+    using Base::rows;
+    using Base::cols;
+    using Base::rank;
+
+  private:
+    void allocate(Index rows, Index cols, unsigned int computationOptions);
+
+  protected:
+    using Base::m_matrixU;
+    using Base::m_matrixV;
+    using Base::m_singularValues;
+    using Base::m_isInitialized;
+    using Base::m_isAllocated;
+    using Base::m_usePrescribedThreshold;
+    using Base::m_computeFullU;
+    using Base::m_computeThinU;
+    using Base::m_computeFullV;
+    using Base::m_computeThinV;
+    using Base::m_computationOptions;
+    using Base::m_nonzeroSingularValues;
+    using Base::m_rows;
+    using Base::m_cols;
+    using Base::m_diagSize;
+    using Base::m_prescribedThreshold;
+    WorkMatrixType m_workMatrix;
+
+    template<typename __MatrixType, int _QRPreconditioner, bool _IsComplex>
+    friend struct internal::svd_precondition_2x2_block_to_be_real;
+    template<typename __MatrixType, int _QRPreconditioner, int _Case, bool _DoAnything>
+    friend struct internal::qr_preconditioner_impl;
+
+    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreColsThanRows> m_qr_precond_morecols;
+    internal::qr_preconditioner_impl<MatrixType, QRPreconditioner, internal::PreconditionIfMoreRowsThanCols> m_qr_precond_morerows;
+    MatrixType m_scaledMatrix;
+};
+
+template<typename MatrixType, int QRPreconditioner>
+void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, unsigned int computationOptions)
+{
+  eigen_assert(rows >= 0 && cols >= 0);
+
+  if (m_isAllocated &&
+      rows == m_rows &&
+      cols == m_cols &&
+      computationOptions == m_computationOptions)
+  {
+    return;
+  }
+
+  m_rows = rows;
+  m_cols = cols;
+  m_isInitialized = false;
+  m_isAllocated = true;
+  m_computationOptions = computationOptions;
+  m_computeFullU = (computationOptions & ComputeFullU) != 0;
+  m_computeThinU = (computationOptions & ComputeThinU) != 0;
+  m_computeFullV = (computationOptions & ComputeFullV) != 0;
+  m_computeThinV = (computationOptions & ComputeThinV) != 0;
+  eigen_assert(!(m_computeFullU && m_computeThinU) && "JacobiSVD: you can't ask for both full and thin U");
+  eigen_assert(!(m_computeFullV && m_computeThinV) && "JacobiSVD: you can't ask for both full and thin V");
+  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&
+              "JacobiSVD: thin U and V are only available when your matrix has a dynamic number of columns.");
+  if (QRPreconditioner == FullPivHouseholderQRPreconditioner)
+  {
+      eigen_assert(!(m_computeThinU || m_computeThinV) &&
+              "JacobiSVD: can't compute thin U or thin V with the FullPivHouseholderQR preconditioner. "
+              "Use the ColPivHouseholderQR preconditioner instead.");
+  }
+  m_diagSize = (std::min)(m_rows, m_cols);
+  m_singularValues.resize(m_diagSize);
+  if(RowsAtCompileTime==Dynamic)
+    m_matrixU.resize(m_rows, m_computeFullU ? m_rows
+                            : m_computeThinU ? m_diagSize
+                            : 0);
+  if(ColsAtCompileTime==Dynamic)
+    m_matrixV.resize(m_cols, m_computeFullV ? m_cols
+                            : m_computeThinV ? m_diagSize
+                            : 0);
+  m_workMatrix.resize(m_diagSize, m_diagSize);
+  
+  if(m_cols>m_rows)   m_qr_precond_morecols.allocate(*this);
+  if(m_rows>m_cols)   m_qr_precond_morerows.allocate(*this);
+  if(m_rows!=m_cols)  m_scaledMatrix.resize(rows,cols);
+}
+
+template<typename MatrixType, int QRPreconditioner>
+JacobiSVD<MatrixType, QRPreconditioner>&
+JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsigned int computationOptions)
+{
+  using std::abs;
+  allocate(matrix.rows(), matrix.cols(), computationOptions);
+
+  // currently we stop when we reach precision 2*epsilon as the last bit of precision can require an unreasonable number of iterations,
+  // only worsening the precision of U and V as we accumulate more rotations
+  const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();
+
+  // limit for denormal numbers to be considered zero in order to avoid infinite loops (see bug 286)
+  const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+
+  // Scaling factor to reduce over/under-flows
+  RealScalar scale = matrix.cwiseAbs().maxCoeff();
+  if(scale==RealScalar(0)) scale = RealScalar(1);
+  
+  /*** step 1. The R-SVD step: we use a QR decomposition to reduce to the case of a square matrix */
+
+  if(m_rows!=m_cols)
+  {
+    m_scaledMatrix = matrix / scale;
+    m_qr_precond_morecols.run(*this, m_scaledMatrix);
+    m_qr_precond_morerows.run(*this, m_scaledMatrix);
+  }
+  else
+  {
+    m_workMatrix = matrix.block(0,0,m_diagSize,m_diagSize) / scale;
+    if(m_computeFullU) m_matrixU.setIdentity(m_rows,m_rows);
+    if(m_computeThinU) m_matrixU.setIdentity(m_rows,m_diagSize);
+    if(m_computeFullV) m_matrixV.setIdentity(m_cols,m_cols);
+    if(m_computeThinV) m_matrixV.setIdentity(m_cols, m_diagSize);
+  }
+
+  /*** step 2. The main Jacobi SVD iteration. ***/
+  RealScalar maxDiagEntry = m_workMatrix.cwiseAbs().diagonal().maxCoeff();
+
+  bool finished = false;
+  while(!finished)
+  {
+    finished = true;
+
+    // do a sweep: for all index pairs (p,q), perform SVD of the corresponding 2x2 sub-matrix
+
+    for(Index p = 1; p < m_diagSize; ++p)
+    {
+      for(Index q = 0; q < p; ++q)
+      {
+        // if this 2x2 sub-matrix is not diagonal already...
+        // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
+        // keep us iterating forever. Similarly, small denormal numbers are considered zero.
+        RealScalar threshold = numext::maxi<RealScalar>(considerAsZero, precision * maxDiagEntry);
+        if(abs(m_workMatrix.coeff(p,q))>threshold || abs(m_workMatrix.coeff(q,p)) > threshold)
+        {
+          finished = false;
+          // perform SVD decomposition of 2x2 sub-matrix corresponding to indices p,q to make it diagonal
+          // the complex to real operation returns true if the updated 2x2 block is not already diagonal
+          if(internal::svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner>::run(m_workMatrix, *this, p, q, maxDiagEntry))
+          {
+            JacobiRotation<RealScalar> j_left, j_right;
+            internal::real_2x2_jacobi_svd(m_workMatrix, p, q, &j_left, &j_right);
+
+            // accumulate resulting Jacobi rotations
+            m_workMatrix.applyOnTheLeft(p,q,j_left);
+            if(computeU()) m_matrixU.applyOnTheRight(p,q,j_left.transpose());
+
+            m_workMatrix.applyOnTheRight(p,q,j_right);
+            if(computeV()) m_matrixV.applyOnTheRight(p,q,j_right);
+
+            // keep track of the largest diagonal coefficient
+            maxDiagEntry = numext::maxi<RealScalar>(maxDiagEntry,numext::maxi<RealScalar>(abs(m_workMatrix.coeff(p,p)), abs(m_workMatrix.coeff(q,q))));
+          }
+        }
+      }
+    }
+  }
+
+  /*** step 3. The work matrix is now diagonal, so ensure it's positive so its diagonal entries are the singular values ***/
+
+  for(Index i = 0; i < m_diagSize; ++i)
+  {
+    // For a complex matrix, some diagonal coefficients might note have been
+    // treated by svd_precondition_2x2_block_to_be_real, and the imaginary part
+    // of some diagonal entry might not be null.
+    if(NumTraits<Scalar>::IsComplex && abs(numext::imag(m_workMatrix.coeff(i,i)))>considerAsZero)
+    {
+      RealScalar a = abs(m_workMatrix.coeff(i,i));
+      m_singularValues.coeffRef(i) = abs(a);
+      if(computeU()) m_matrixU.col(i) *= m_workMatrix.coeff(i,i)/a;
+    }
+    else
+    {
+      // m_workMatrix.coeff(i,i) is already real, no difficulty:
+      RealScalar a = numext::real(m_workMatrix.coeff(i,i));
+      m_singularValues.coeffRef(i) = abs(a);
+      if(computeU() && (a<RealScalar(0))) m_matrixU.col(i) = -m_matrixU.col(i);
+    }
+  }
+  
+  m_singularValues *= scale;
+
+  /*** step 4. Sort singular values in descending order and compute the number of nonzero singular values ***/
+
+  m_nonzeroSingularValues = m_diagSize;
+  for(Index i = 0; i < m_diagSize; i++)
+  {
+    Index pos;
+    RealScalar maxRemainingSingularValue = m_singularValues.tail(m_diagSize-i).maxCoeff(&pos);
+    if(maxRemainingSingularValue == RealScalar(0))
+    {
+      m_nonzeroSingularValues = i;
+      break;
+    }
+    if(pos)
+    {
+      pos += i;
+      std::swap(m_singularValues.coeffRef(i), m_singularValues.coeffRef(pos));
+      if(computeU()) m_matrixU.col(pos).swap(m_matrixU.col(i));
+      if(computeV()) m_matrixV.col(pos).swap(m_matrixV.col(i));
+    }
+  }
+
+  m_isInitialized = true;
+  return *this;
+}
+
+/** \svd_module
+  *
+  * \return the singular value decomposition of \c *this computed by two-sided
+  * Jacobi transformations.
+  *
+  * \sa class JacobiSVD
+  */
+template<typename Derived>
+JacobiSVD<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
+{
+  return JacobiSVD<PlainObject>(*this, computationOptions);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_JACOBISVD_H
diff --git a/third-party/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/third-party/Eigen/src/SVD/JacobiSVD_LAPACKE.h
new file mode 100644
index 00000000..ff0516f6
--- /dev/null
+++ b/third-party/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to LAPACKe
+ *    Singular Value Decomposition - SVD.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_JACOBISVD_LAPACKE_H
+#define EIGEN_JACOBISVD_LAPACKE_H
+
+namespace Eigen { 
+
+/** \internal Specialization for the data types supported by LAPACKe */
+
+#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW) \
+template<> inline \
+JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>& \
+JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPivHouseholderQRPreconditioner>::compute(const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) \
+{ \
+  typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
+  /*typedef MatrixType::Scalar Scalar;*/ \
+  /*typedef MatrixType::RealScalar RealScalar;*/ \
+  allocate(matrix.rows(), matrix.cols(), computationOptions); \
+\
+  /*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
+  m_nonzeroSingularValues = m_diagSize; \
+\
+  lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \
+  lapack_int matrix_order = LAPACKE_COLROW; \
+  char jobu, jobvt; \
+  LAPACKE_TYPE *u, *vt, dummy; \
+  jobu  = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \
+  jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \
+  if (computeU()) { \
+    ldu  = internal::convert_index<lapack_int>(m_matrixU.outerStride()); \
+    u    = (LAPACKE_TYPE*)m_matrixU.data(); \
+  } else { ldu=1; u=&dummy; }\
+  MatrixType localV; \
+  lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
+  if (computeV()) { \
+    localV.resize(vt_rows, m_cols); \
+    ldvt  = internal::convert_index<lapack_int>(localV.outerStride()); \
+    vt   = (LAPACKE_TYPE*)localV.data(); \
+  } else { ldvt=1; vt=&dummy; }\
+  Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
+  MatrixType m_temp; m_temp = matrix; \
+  LAPACKE_##LAPACKE_PREFIX##gesvd( matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(m_rows), internal::convert_index<lapack_int>(m_cols), (LAPACKE_TYPE*)m_temp.data(), lda, (LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
+  if (computeV()) m_matrixV = localV.adjoint(); \
+ /* for(int i=0;i<m_diagSize;i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
+  m_isInitialized = true; \
+  return *this; \
+}
+
+EIGEN_LAPACKE_SVD(double,   double,                double, d, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SVD(float,    float,                 float , s, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, ColMajor, LAPACK_COL_MAJOR)
+EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, ColMajor, LAPACK_COL_MAJOR)
+
+EIGEN_LAPACKE_SVD(double,   double,                double, d, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_SVD(float,    float,                 float , s, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_SVD(dcomplex, lapack_complex_double, double, z, RowMajor, LAPACK_ROW_MAJOR)
+EIGEN_LAPACKE_SVD(scomplex, lapack_complex_float,  float , c, RowMajor, LAPACK_ROW_MAJOR)
+
+} // end namespace Eigen
+
+#endif // EIGEN_JACOBISVD_LAPACKE_H
diff --git a/third-party/Eigen/src/SVD/SVDBase.h b/third-party/Eigen/src/SVD/SVDBase.h
new file mode 100644
index 00000000..53da2848
--- /dev/null
+++ b/third-party/Eigen/src/SVD/SVDBase.h
@@ -0,0 +1,315 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
+// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
+// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
+// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SVDBASE_H
+#define EIGEN_SVDBASE_H
+
+namespace Eigen {
+/** \ingroup SVD_Module
+ *
+ *
+ * \class SVDBase
+ *
+ * \brief Base class of SVD algorithms
+ *
+ * \tparam Derived the type of the actual SVD decomposition
+ *
+ * SVD decomposition consists in decomposing any n-by-p matrix \a A as a product
+ *   \f[ A = U S V^* \f]
+ * where \a U is a n-by-n unitary, \a V is a p-by-p unitary, and \a S is a n-by-p real positive matrix which is zero outside of its main diagonal;
+ * the diagonal entries of S are known as the \em singular \em values of \a A and the columns of \a U and \a V are known as the left
+ * and right \em singular \em vectors of \a A respectively.
+ *
+ * Singular values are always sorted in decreasing order.
+ *
+ * 
+ * You can ask for only \em thin \a U or \a V to be computed, meaning the following. In case of a rectangular n-by-p matrix, letting \a m be the
+ * smaller value among \a n and \a p, there are only \a m singular vectors; the remaining columns of \a U and \a V do not correspond to actual
+ * singular vectors. Asking for \em thin \a U or \a V means asking for only their \a m first columns to be formed. So \a U is then a n-by-m matrix,
+ * and \a V is then a p-by-m matrix. Notice that thin \a U and \a V are all you need for (least squares) solving.
+ *  
+ * If the input matrix has inf or nan coefficients, the result of the computation is undefined, but the computation is guaranteed to
+ * terminate in finite (and reasonable) time.
+ * \sa class BDCSVD, class JacobiSVD
+ */
+template<typename Derived>
+class SVDBase
+{
+
+public:
+  typedef typename internal::traits<Derived>::MatrixType MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  typedef typename MatrixType::StorageIndex StorageIndex;
+  typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+    DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime),
+    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
+    MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime,MaxColsAtCompileTime),
+    MatrixOptions = MatrixType::Options
+  };
+
+  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime, MatrixOptions, MaxRowsAtCompileTime, MaxRowsAtCompileTime> MatrixUType;
+  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime, MatrixOptions, MaxColsAtCompileTime, MaxColsAtCompileTime> MatrixVType;
+  typedef typename internal::plain_diag_type<MatrixType, RealScalar>::type SingularValuesType;
+  
+  Derived& derived() { return *static_cast<Derived*>(this); }
+  const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+  /** \returns the \a U matrix.
+   *
+   * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
+   * the U matrix is n-by-n if you asked for \link Eigen::ComputeFullU ComputeFullU \endlink, and is n-by-m if you asked for \link Eigen::ComputeThinU ComputeThinU \endlink.
+   *
+   * The \a m first columns of \a U are the left singular vectors of the matrix being decomposed.
+   *
+   * This method asserts that you asked for \a U to be computed.
+   */
+  const MatrixUType& matrixU() const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    eigen_assert(computeU() && "This SVD decomposition didn't compute U. Did you ask for it?");
+    return m_matrixU;
+  }
+
+  /** \returns the \a V matrix.
+   *
+   * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p,
+   * the V matrix is p-by-p if you asked for \link Eigen::ComputeFullV ComputeFullV \endlink, and is p-by-m if you asked for \link Eigen::ComputeThinV ComputeThinV \endlink.
+   *
+   * The \a m first columns of \a V are the right singular vectors of the matrix being decomposed.
+   *
+   * This method asserts that you asked for \a V to be computed.
+   */
+  const MatrixVType& matrixV() const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    eigen_assert(computeV() && "This SVD decomposition didn't compute V. Did you ask for it?");
+    return m_matrixV;
+  }
+
+  /** \returns the vector of singular values.
+   *
+   * For the SVD decomposition of a n-by-p matrix, letting \a m be the minimum of \a n and \a p, the
+   * returned vector has size \a m.  Singular values are always sorted in decreasing order.
+   */
+  const SingularValuesType& singularValues() const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    return m_singularValues;
+  }
+
+  /** \returns the number of singular values that are not exactly 0 */
+  Index nonzeroSingularValues() const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    return m_nonzeroSingularValues;
+  }
+  
+  /** \returns the rank of the matrix of which \c *this is the SVD.
+    *
+    * \note This method has to determine which singular values should be considered nonzero.
+    *       For that, it uses the threshold value that you can control by calling
+    *       setThreshold(const RealScalar&).
+    */
+  inline Index rank() const
+  {
+    using std::abs;
+    eigen_assert(m_isInitialized && "JacobiSVD is not initialized.");
+    if(m_singularValues.size()==0) return 0;
+    RealScalar premultiplied_threshold = numext::maxi<RealScalar>(m_singularValues.coeff(0) * threshold(), (std::numeric_limits<RealScalar>::min)());
+    Index i = m_nonzeroSingularValues-1;
+    while(i>=0 && m_singularValues.coeff(i) < premultiplied_threshold) --i;
+    return i+1;
+  }
+  
+  /** Allows to prescribe a threshold to be used by certain methods, such as rank() and solve(),
+    * which need to determine when singular values are to be considered nonzero.
+    * This is not used for the SVD decomposition itself.
+    *
+    * When it needs to get the threshold value, Eigen calls threshold().
+    * The default is \c NumTraits<Scalar>::epsilon()
+    *
+    * \param threshold The new value to use as the threshold.
+    *
+    * A singular value will be considered nonzero if its value is strictly greater than
+    *  \f$ \vert singular value \vert \leqslant threshold \times \vert max singular value \vert \f$.
+    *
+    * If you want to come back to the default behavior, call setThreshold(Default_t)
+    */
+  Derived& setThreshold(const RealScalar& threshold)
+  {
+    m_usePrescribedThreshold = true;
+    m_prescribedThreshold = threshold;
+    return derived();
+  }
+
+  /** Allows to come back to the default behavior, letting Eigen use its default formula for
+    * determining the threshold.
+    *
+    * You should pass the special object Eigen::Default as parameter here.
+    * \code svd.setThreshold(Eigen::Default); \endcode
+    *
+    * See the documentation of setThreshold(const RealScalar&).
+    */
+  Derived& setThreshold(Default_t)
+  {
+    m_usePrescribedThreshold = false;
+    return derived();
+  }
+
+  /** Returns the threshold that will be used by certain methods such as rank().
+    *
+    * See the documentation of setThreshold(const RealScalar&).
+    */
+  RealScalar threshold() const
+  {
+    eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+    // this temporary is needed to workaround a MSVC issue
+    Index diagSize = (std::max<Index>)(1,m_diagSize);
+    return m_usePrescribedThreshold ? m_prescribedThreshold
+                                    : RealScalar(diagSize)*NumTraits<Scalar>::epsilon();
+  }
+
+  /** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
+  inline bool computeU() const { return m_computeFullU || m_computeThinU; }
+  /** \returns true if \a V (full or thin) is asked for in this SVD decomposition */
+  inline bool computeV() const { return m_computeFullV || m_computeThinV; }
+
+  inline Index rows() const { return m_rows; }
+  inline Index cols() const { return m_cols; }
+  
+  /** \returns a (least squares) solution of \f$ A x = b \f$ using the current SVD decomposition of A.
+    *
+    * \param b the right-hand-side of the equation to solve.
+    *
+    * \note Solving requires both U and V to be computed. Thin U and V are enough, there is no need for full U or V.
+    *
+    * \note SVD solving is implicitly least-squares. Thus, this method serves both purposes of exact solving and least-squares solving.
+    * In other words, the returned solution is guaranteed to minimize the Euclidean norm \f$ \Vert A x - b \Vert \f$.
+    */
+  template<typename Rhs>
+  inline const Solve<Derived, Rhs>
+  solve(const MatrixBase<Rhs>& b) const
+  {
+    eigen_assert(m_isInitialized && "SVD is not initialized.");
+    eigen_assert(computeU() && computeV() && "SVD::solve() requires both unitaries U and V to be computed (thin unitaries suffice).");
+    return Solve<Derived, Rhs>(derived(), b.derived());
+  }
+  
+  #ifndef EIGEN_PARSED_BY_DOXYGEN
+  template<typename RhsType, typename DstType>
+  EIGEN_DEVICE_FUNC
+  void _solve_impl(const RhsType &rhs, DstType &dst) const;
+  #endif
+
+protected:
+  
+  static void check_template_parameters()
+  {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+  }
+  
+  // return true if already allocated
+  bool allocate(Index rows, Index cols, unsigned int computationOptions) ;
+
+  MatrixUType m_matrixU;
+  MatrixVType m_matrixV;
+  SingularValuesType m_singularValues;
+  bool m_isInitialized, m_isAllocated, m_usePrescribedThreshold;
+  bool m_computeFullU, m_computeThinU;
+  bool m_computeFullV, m_computeThinV;
+  unsigned int m_computationOptions;
+  Index m_nonzeroSingularValues, m_rows, m_cols, m_diagSize;
+  RealScalar m_prescribedThreshold;
+
+  /** \brief Default Constructor.
+   *
+   * Default constructor of SVDBase
+   */
+  SVDBase()
+    : m_isInitialized(false),
+      m_isAllocated(false),
+      m_usePrescribedThreshold(false),
+      m_computationOptions(0),
+      m_rows(-1), m_cols(-1), m_diagSize(0)
+  {
+    check_template_parameters();
+  }
+
+
+};
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename Derived>
+template<typename RhsType, typename DstType>
+void SVDBase<Derived>::_solve_impl(const RhsType &rhs, DstType &dst) const
+{
+  eigen_assert(rhs.rows() == rows());
+
+  // A = U S V^*
+  // So A^{-1} = V S^{-1} U^*
+
+  Matrix<Scalar, Dynamic, RhsType::ColsAtCompileTime, 0, MatrixType::MaxRowsAtCompileTime, RhsType::MaxColsAtCompileTime> tmp;
+  Index l_rank = rank();
+  tmp.noalias() =  m_matrixU.leftCols(l_rank).adjoint() * rhs;
+  tmp = m_singularValues.head(l_rank).asDiagonal().inverse() * tmp;
+  dst = m_matrixV.leftCols(l_rank) * tmp;
+}
+#endif
+
+template<typename MatrixType>
+bool SVDBase<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
+{
+  eigen_assert(rows >= 0 && cols >= 0);
+
+  if (m_isAllocated &&
+      rows == m_rows &&
+      cols == m_cols &&
+      computationOptions == m_computationOptions)
+  {
+    return true;
+  }
+
+  m_rows = rows;
+  m_cols = cols;
+  m_isInitialized = false;
+  m_isAllocated = true;
+  m_computationOptions = computationOptions;
+  m_computeFullU = (computationOptions & ComputeFullU) != 0;
+  m_computeThinU = (computationOptions & ComputeThinU) != 0;
+  m_computeFullV = (computationOptions & ComputeFullV) != 0;
+  m_computeThinV = (computationOptions & ComputeThinV) != 0;
+  eigen_assert(!(m_computeFullU && m_computeThinU) && "SVDBase: you can't ask for both full and thin U");
+  eigen_assert(!(m_computeFullV && m_computeThinV) && "SVDBase: you can't ask for both full and thin V");
+  eigen_assert(EIGEN_IMPLIES(m_computeThinU || m_computeThinV, MatrixType::ColsAtCompileTime==Dynamic) &&
+	       "SVDBase: thin U and V are only available when your matrix has a dynamic number of columns.");
+
+  m_diagSize = (std::min)(m_rows, m_cols);
+  m_singularValues.resize(m_diagSize);
+  if(RowsAtCompileTime==Dynamic)
+    m_matrixU.resize(m_rows, m_computeFullU ? m_rows : m_computeThinU ? m_diagSize : 0);
+  if(ColsAtCompileTime==Dynamic)
+    m_matrixV.resize(m_cols, m_computeFullV ? m_cols : m_computeThinV ? m_diagSize : 0);
+
+  return false;
+}
+
+}// end namespace
+
+#endif // EIGEN_SVDBASE_H
diff --git a/third-party/Eigen/src/SVD/UpperBidiagonalization.h b/third-party/Eigen/src/SVD/UpperBidiagonalization.h
new file mode 100644
index 00000000..11ac847e
--- /dev/null
+++ b/third-party/Eigen/src/SVD/UpperBidiagonalization.h
@@ -0,0 +1,414 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BIDIAGONALIZATION_H
+#define EIGEN_BIDIAGONALIZATION_H
+
+namespace Eigen { 
+
+namespace internal {
+// UpperBidiagonalization will probably be replaced by a Bidiagonalization class, don't want to make it stable API.
+// At the same time, it's useful to keep for now as it's about the only thing that is testing the BandMatrix class.
+
+template<typename _MatrixType> class UpperBidiagonalization
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      ColsAtCompileTimeMinusOne = internal::decrement_size<ColsAtCompileTime>::ret
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::RealScalar RealScalar;
+    typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
+    typedef Matrix<Scalar, 1, ColsAtCompileTime> RowVectorType;
+    typedef Matrix<Scalar, RowsAtCompileTime, 1> ColVectorType;
+    typedef BandMatrix<RealScalar, ColsAtCompileTime, ColsAtCompileTime, 1, 0, RowMajor> BidiagonalType;
+    typedef Matrix<Scalar, ColsAtCompileTime, 1> DiagVectorType;
+    typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType;
+    typedef HouseholderSequence<
+              const MatrixType,
+              const typename internal::remove_all<typename Diagonal<const MatrixType,0>::ConjugateReturnType>::type
+            > HouseholderUSequenceType;
+    typedef HouseholderSequence<
+              const typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type,
+              Diagonal<const MatrixType,1>,
+              OnTheRight
+            > HouseholderVSequenceType;
+    
+    /**
+    * \brief Default Constructor.
+    *
+    * The default constructor is useful in cases in which the user intends to
+    * perform decompositions via Bidiagonalization::compute(const MatrixType&).
+    */
+    UpperBidiagonalization() : m_householder(), m_bidiagonal(), m_isInitialized(false) {}
+
+    explicit UpperBidiagonalization(const MatrixType& matrix)
+      : m_householder(matrix.rows(), matrix.cols()),
+        m_bidiagonal(matrix.cols(), matrix.cols()),
+        m_isInitialized(false)
+    {
+      compute(matrix);
+    }
+    
+    UpperBidiagonalization& compute(const MatrixType& matrix);
+    UpperBidiagonalization& computeUnblocked(const MatrixType& matrix);
+    
+    const MatrixType& householder() const { return m_householder; }
+    const BidiagonalType& bidiagonal() const { return m_bidiagonal; }
+    
+    const HouseholderUSequenceType householderU() const
+    {
+      eigen_assert(m_isInitialized && "UpperBidiagonalization is not initialized.");
+      return HouseholderUSequenceType(m_householder, m_householder.diagonal().conjugate());
+    }
+
+    const HouseholderVSequenceType householderV() // const here gives nasty errors and i'm lazy
+    {
+      eigen_assert(m_isInitialized && "UpperBidiagonalization is not initialized.");
+      return HouseholderVSequenceType(m_householder.conjugate(), m_householder.const_derived().template diagonal<1>())
+             .setLength(m_householder.cols()-1)
+             .setShift(1);
+    }
+    
+  protected:
+    MatrixType m_householder;
+    BidiagonalType m_bidiagonal;
+    bool m_isInitialized;
+};
+
+// Standard upper bidiagonalization without fancy optimizations
+// This version should be faster for small matrix size
+template<typename MatrixType>
+void upperbidiagonalization_inplace_unblocked(MatrixType& mat,
+                                              typename MatrixType::RealScalar *diagonal,
+                                              typename MatrixType::RealScalar *upper_diagonal,
+                                              typename MatrixType::Scalar* tempData = 0)
+{
+  typedef typename MatrixType::Scalar Scalar;
+
+  Index rows = mat.rows();
+  Index cols = mat.cols();
+
+  typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixType::MaxRowsAtCompileTime,1> TempType;
+  TempType tempVector;
+  if(tempData==0)
+  {
+    tempVector.resize(rows);
+    tempData = tempVector.data();
+  }
+
+  for (Index k = 0; /* breaks at k==cols-1 below */ ; ++k)
+  {
+    Index remainingRows = rows - k;
+    Index remainingCols = cols - k - 1;
+
+    // construct left householder transform in-place in A
+    mat.col(k).tail(remainingRows)
+       .makeHouseholderInPlace(mat.coeffRef(k,k), diagonal[k]);
+    // apply householder transform to remaining part of A on the left
+    mat.bottomRightCorner(remainingRows, remainingCols)
+       .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), mat.coeff(k,k), tempData);
+
+    if(k == cols-1) break;
+
+    // construct right householder transform in-place in mat
+    mat.row(k).tail(remainingCols)
+       .makeHouseholderInPlace(mat.coeffRef(k,k+1), upper_diagonal[k]);
+    // apply householder transform to remaining part of mat on the left
+    mat.bottomRightCorner(remainingRows-1, remainingCols)
+       .applyHouseholderOnTheRight(mat.row(k).tail(remainingCols-1).transpose(), mat.coeff(k,k+1), tempData);
+  }
+}
+
+/** \internal
+  * Helper routine for the block reduction to upper bidiagonal form.
+  *
+  * Let's partition the matrix A:
+  * 
+  *      | A00 A01 |
+  *  A = |         |
+  *      | A10 A11 |
+  *
+  * This function reduces to bidiagonal form the left \c rows x \a blockSize vertical panel [A00/A10]
+  * and the \a blockSize x \c cols horizontal panel [A00 A01] of the matrix \a A. The bottom-right block A11
+  * is updated using matrix-matrix products:
+  *   A22 -= V * Y^T - X * U^T
+  * where V and U contains the left and right Householder vectors. U and V are stored in A10, and A01
+  * respectively, and the update matrices X and Y are computed during the reduction.
+  * 
+  */
+template<typename MatrixType>
+void upperbidiagonalization_blocked_helper(MatrixType& A,
+                                           typename MatrixType::RealScalar *diagonal,
+                                           typename MatrixType::RealScalar *upper_diagonal,
+                                           Index bs,
+                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,
+                                                      traits<MatrixType>::Flags & RowMajorBit> > X,
+                                           Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic,
+                                                      traits<MatrixType>::Flags & RowMajorBit> > Y)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename NumTraits<RealScalar>::Literal Literal;
+  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
+  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
+  typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
+  typedef Ref<Matrix<Scalar, Dynamic, 1>, 0, ColInnerStride>    SubColumnType;
+  typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, RowInnerStride>    SubRowType;
+  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder > > SubMatType;
+  
+  Index brows = A.rows();
+  Index bcols = A.cols();
+
+  Scalar tau_u, tau_u_prev(0), tau_v;
+
+  for(Index k = 0; k < bs; ++k)
+  {
+    Index remainingRows = brows - k;
+    Index remainingCols = bcols - k - 1;
+
+    SubMatType X_k1( X.block(k,0, remainingRows,k) );
+    SubMatType V_k1( A.block(k,0, remainingRows,k) );
+
+    // 1 - update the k-th column of A
+    SubColumnType v_k = A.col(k).tail(remainingRows);
+          v_k -= V_k1 * Y.row(k).head(k).adjoint();
+    if(k) v_k -= X_k1 * A.col(k).head(k);
+    
+    // 2 - construct left Householder transform in-place
+    v_k.makeHouseholderInPlace(tau_v, diagonal[k]);
+       
+    if(k+1<bcols)
+    {
+      SubMatType Y_k  ( Y.block(k+1,0, remainingCols, k+1) );
+      SubMatType U_k1 ( A.block(0,k+1, k,remainingCols) );
+      
+      // this eases the application of Householder transforAions
+      // A(k,k) will store tau_v later
+      A(k,k) = Scalar(1);
+
+      // 3 - Compute y_k^T = tau_v * ( A^T*v_k - Y_k-1*V_k-1^T*v_k - U_k-1*X_k-1^T*v_k )
+      {
+        SubColumnType y_k( Y.col(k).tail(remainingCols) );
+        
+        // let's use the begining of column k of Y as a temporary vector
+        SubColumnType tmp( Y.col(k).head(k) );
+        y_k.noalias()  = A.block(k,k+1, remainingRows,remainingCols).adjoint() * v_k; // bottleneck
+        tmp.noalias()  = V_k1.adjoint()  * v_k;
+        y_k.noalias() -= Y_k.leftCols(k) * tmp;
+        tmp.noalias()  = X_k1.adjoint()  * v_k;
+        y_k.noalias() -= U_k1.adjoint()  * tmp;
+        y_k *= numext::conj(tau_v);
+      }
+
+      // 4 - update k-th row of A (it will become u_k)
+      SubRowType u_k( A.row(k).tail(remainingCols) );
+      u_k = u_k.conjugate();
+      {
+        u_k -= Y_k * A.row(k).head(k+1).adjoint();
+        if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint();
+      }
+
+      // 5 - construct right Householder transform in-place
+      u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]);
+
+      // this eases the application of Householder transformations
+      // A(k,k+1) will store tau_u later
+      A(k,k+1) = Scalar(1);
+
+      // 6 - Compute x_k = tau_u * ( A*u_k - X_k-1*U_k-1^T*u_k - V_k*Y_k^T*u_k )
+      {
+        SubColumnType x_k ( X.col(k).tail(remainingRows-1) );
+        
+        // let's use the begining of column k of X as a temporary vectors
+        // note that tmp0 and tmp1 overlaps
+        SubColumnType tmp0 ( X.col(k).head(k) ),
+                      tmp1 ( X.col(k).head(k+1) );
+                    
+        x_k.noalias()   = A.block(k+1,k+1, remainingRows-1,remainingCols) * u_k.transpose(); // bottleneck
+        tmp0.noalias()  = U_k1 * u_k.transpose();
+        x_k.noalias()  -= X_k1.bottomRows(remainingRows-1) * tmp0;
+        tmp1.noalias()  = Y_k.adjoint() * u_k.transpose();
+        x_k.noalias()  -= A.block(k+1,0, remainingRows-1,k+1) * tmp1;
+        x_k *= numext::conj(tau_u);
+        tau_u = numext::conj(tau_u);
+        u_k = u_k.conjugate();
+      }
+
+      if(k>0) A.coeffRef(k-1,k) = tau_u_prev;
+      tau_u_prev = tau_u;
+    }
+    else
+      A.coeffRef(k-1,k) = tau_u_prev;
+
+    A.coeffRef(k,k) = tau_v;
+  }
+  
+  if(bs<bcols)
+    A.coeffRef(bs-1,bs) = tau_u_prev;
+
+  // update A22
+  if(bcols>bs && brows>bs)
+  {
+    SubMatType A11( A.bottomRightCorner(brows-bs,bcols-bs) );
+    SubMatType A10( A.block(bs,0, brows-bs,bs) );
+    SubMatType A01( A.block(0,bs, bs,bcols-bs) );
+    Scalar tmp = A01(bs-1,0);
+    A01(bs-1,0) = Literal(1);
+    A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
+    A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
+    A01(bs-1,0) = tmp;
+  }
+}
+
+/** \internal
+  *
+  * Implementation of a block-bidiagonal reduction.
+  * It is based on the following paper:
+  *   The Design of a Parallel Dense Linear Algebra Software Library: Reduction to Hessenberg, Tridiagonal, and Bidiagonal Form.
+  *   by Jaeyoung Choi, Jack J. Dongarra, David W. Walker. (1995)
+  *   section 3.3
+  */
+template<typename MatrixType, typename BidiagType>
+void upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagonal,
+                                            Index maxBlockSize=32,
+                                            typename MatrixType::Scalar* /*tempData*/ = 0)
+{
+  typedef typename MatrixType::Scalar Scalar;
+  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+
+  Index rows = A.rows();
+  Index cols = A.cols();
+  Index size = (std::min)(rows, cols);
+
+  // X and Y are work space
+  enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
+  Matrix<Scalar,
+         MatrixType::RowsAtCompileTime,
+         Dynamic,
+         StorageOrder,
+         MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize);
+  Matrix<Scalar,
+         MatrixType::ColsAtCompileTime,
+         Dynamic,
+         StorageOrder,
+         MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize);
+  Index blockSize = (std::min)(maxBlockSize,size);
+
+  Index k = 0;
+  for(k = 0; k < size; k += blockSize)
+  {
+    Index bs = (std::min)(size-k,blockSize);  // actual size of the block
+    Index brows = rows - k;                   // rows of the block
+    Index bcols = cols - k;                   // columns of the block
+
+    // partition the matrix A:
+    // 
+    //      | A00 A01 A02 |
+    //      |             |
+    // A  = | A10 A11 A12 |
+    //      |             |
+    //      | A20 A21 A22 |
+    //
+    // where A11 is a bs x bs diagonal block,
+    // and let:
+    //      | A11 A12 |
+    //  B = |         |
+    //      | A21 A22 |
+
+    BlockType B = A.block(k,k,brows,bcols);
+    
+    // This stage performs the bidiagonalization of A11, A21, A12, and updating of A22.
+    // Finally, the algorithm continue on the updated A22.
+    //
+    // However, if B is too small, or A22 empty, then let's use an unblocked strategy
+    if(k+bs==cols || bcols<48) // somewhat arbitrary threshold
+    {
+      upperbidiagonalization_inplace_unblocked(B,
+                                               &(bidiagonal.template diagonal<0>().coeffRef(k)),
+                                               &(bidiagonal.template diagonal<1>().coeffRef(k)),
+                                               X.data()
+                                              );
+      break; // We're done
+    }
+    else
+    {
+      upperbidiagonalization_blocked_helper<BlockType>( B,
+                                                        &(bidiagonal.template diagonal<0>().coeffRef(k)),
+                                                        &(bidiagonal.template diagonal<1>().coeffRef(k)),
+                                                        bs,
+                                                        X.topLeftCorner(brows,bs),
+                                                        Y.topLeftCorner(bcols,bs)
+                                                      );
+    }
+  }
+}
+
+template<typename _MatrixType>
+UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::computeUnblocked(const _MatrixType& matrix)
+{
+  Index rows = matrix.rows();
+  Index cols = matrix.cols();
+  EIGEN_ONLY_USED_FOR_DEBUG(cols);
+
+  eigen_assert(rows >= cols && "UpperBidiagonalization is only for Arices satisfying rows>=cols.");
+
+  m_householder = matrix;
+
+  ColVectorType temp(rows);
+
+  upperbidiagonalization_inplace_unblocked(m_householder,
+                                           &(m_bidiagonal.template diagonal<0>().coeffRef(0)),
+                                           &(m_bidiagonal.template diagonal<1>().coeffRef(0)),
+                                           temp.data());
+
+  m_isInitialized = true;
+  return *this;
+}
+
+template<typename _MatrixType>
+UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::compute(const _MatrixType& matrix)
+{
+  Index rows = matrix.rows();
+  Index cols = matrix.cols();
+  EIGEN_ONLY_USED_FOR_DEBUG(rows);
+  EIGEN_ONLY_USED_FOR_DEBUG(cols);
+
+  eigen_assert(rows >= cols && "UpperBidiagonalization is only for Arices satisfying rows>=cols.");
+
+  m_householder = matrix;
+  upperbidiagonalization_inplace_blocked(m_householder, m_bidiagonal);
+            
+  m_isInitialized = true;
+  return *this;
+}
+
+#if 0
+/** \return the Householder QR decomposition of \c *this.
+  *
+  * \sa class Bidiagonalization
+  */
+template<typename Derived>
+const UpperBidiagonalization<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::bidiagonalization() const
+{
+  return UpperBidiagonalization<PlainObject>(eval());
+}
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BIDIAGONALIZATION_H
diff --git a/third-party/Eigen/src/misc/Image.h b/third-party/Eigen/src/misc/Image.h
new file mode 100644
index 00000000..b8b8a045
--- /dev/null
+++ b/third-party/Eigen/src/misc/Image.h
@@ -0,0 +1,82 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MISC_IMAGE_H
+#define EIGEN_MISC_IMAGE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \class image_retval_base
+  *
+  */
+template<typename DecompositionType>
+struct traits<image_retval_base<DecompositionType> >
+{
+  typedef typename DecompositionType::MatrixType MatrixType;
+  typedef Matrix<
+    typename MatrixType::Scalar,
+    MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose
+                                   // dimension is the number of rows of the original matrix
+    Dynamic,                       // we don't know at compile time the dimension of the image (the rank)
+    MatrixType::Options,
+    MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
+    MatrixType::MaxColsAtCompileTime  // so it has the same number of rows and at most as many columns.
+  > ReturnType;
+};
+
+template<typename _DecompositionType> struct image_retval_base
+ : public ReturnByValue<image_retval_base<_DecompositionType> >
+{
+  typedef _DecompositionType DecompositionType;
+  typedef typename DecompositionType::MatrixType MatrixType;
+  typedef ReturnByValue<image_retval_base> Base;
+
+  image_retval_base(const DecompositionType& dec, const MatrixType& originalMatrix)
+    : m_dec(dec), m_rank(dec.rank()),
+      m_cols(m_rank == 0 ? 1 : m_rank),
+      m_originalMatrix(originalMatrix)
+  {}
+
+  inline Index rows() const { return m_dec.rows(); }
+  inline Index cols() const { return m_cols; }
+  inline Index rank() const { return m_rank; }
+  inline const DecompositionType& dec() const { return m_dec; }
+  inline const MatrixType& originalMatrix() const { return m_originalMatrix; }
+
+  template<typename Dest> inline void evalTo(Dest& dst) const
+  {
+    static_cast<const image_retval<DecompositionType>*>(this)->evalTo(dst);
+  }
+
+  protected:
+    const DecompositionType& m_dec;
+    Index m_rank, m_cols;
+    const MatrixType& m_originalMatrix;
+};
+
+} // end namespace internal
+
+#define EIGEN_MAKE_IMAGE_HELPERS(DecompositionType) \
+  typedef typename DecompositionType::MatrixType MatrixType; \
+  typedef typename MatrixType::Scalar Scalar; \
+  typedef typename MatrixType::RealScalar RealScalar; \
+  typedef Eigen::internal::image_retval_base<DecompositionType> Base; \
+  using Base::dec; \
+  using Base::originalMatrix; \
+  using Base::rank; \
+  using Base::rows; \
+  using Base::cols; \
+  image_retval(const DecompositionType& dec, const MatrixType& originalMatrix) \
+    : Base(dec, originalMatrix) {}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MISC_IMAGE_H
diff --git a/third-party/Eigen/src/misc/Kernel.h b/third-party/Eigen/src/misc/Kernel.h
new file mode 100644
index 00000000..bef5d6ff
--- /dev/null
+++ b/third-party/Eigen/src/misc/Kernel.h
@@ -0,0 +1,79 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_MISC_KERNEL_H
+#define EIGEN_MISC_KERNEL_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \class kernel_retval_base
+  *
+  */
+template<typename DecompositionType>
+struct traits<kernel_retval_base<DecompositionType> >
+{
+  typedef typename DecompositionType::MatrixType MatrixType;
+  typedef Matrix<
+    typename MatrixType::Scalar,
+    MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix"
+                                   // is the number of cols of the original matrix
+                                   // so that the product "matrix * kernel = zero" makes sense
+    Dynamic,                       // we don't know at compile-time the dimension of the kernel
+    MatrixType::Options,
+    MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
+    MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space,
+                                     // whose dimension is the number of columns of the original matrix
+  > ReturnType;
+};
+
+template<typename _DecompositionType> struct kernel_retval_base
+ : public ReturnByValue<kernel_retval_base<_DecompositionType> >
+{
+  typedef _DecompositionType DecompositionType;
+  typedef ReturnByValue<kernel_retval_base> Base;
+
+  explicit kernel_retval_base(const DecompositionType& dec)
+    : m_dec(dec),
+      m_rank(dec.rank()),
+      m_cols(m_rank==dec.cols() ? 1 : dec.cols() - m_rank)
+  {}
+
+  inline Index rows() const { return m_dec.cols(); }
+  inline Index cols() const { return m_cols; }
+  inline Index rank() const { return m_rank; }
+  inline const DecompositionType& dec() const { return m_dec; }
+
+  template<typename Dest> inline void evalTo(Dest& dst) const
+  {
+    static_cast<const kernel_retval<DecompositionType>*>(this)->evalTo(dst);
+  }
+
+  protected:
+    const DecompositionType& m_dec;
+    Index m_rank, m_cols;
+};
+
+} // end namespace internal
+
+#define EIGEN_MAKE_KERNEL_HELPERS(DecompositionType) \
+  typedef typename DecompositionType::MatrixType MatrixType; \
+  typedef typename MatrixType::Scalar Scalar; \
+  typedef typename MatrixType::RealScalar RealScalar; \
+  typedef Eigen::internal::kernel_retval_base<DecompositionType> Base; \
+  using Base::dec; \
+  using Base::rank; \
+  using Base::rows; \
+  using Base::cols; \
+  kernel_retval(const DecompositionType& dec) : Base(dec) {}
+
+} // end namespace Eigen
+
+#endif // EIGEN_MISC_KERNEL_H
diff --git a/third-party/Eigen/src/misc/RealSvd2x2.h b/third-party/Eigen/src/misc/RealSvd2x2.h
new file mode 100644
index 00000000..abb4d3c2
--- /dev/null
+++ b/third-party/Eigen/src/misc/RealSvd2x2.h
@@ -0,0 +1,55 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REALSVD2X2_H
+#define EIGEN_REALSVD2X2_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename MatrixType, typename RealScalar, typename Index>
+void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
+                         JacobiRotation<RealScalar> *j_left,
+                         JacobiRotation<RealScalar> *j_right)
+{
+  using std::sqrt;
+  using std::abs;
+  Matrix<RealScalar,2,2> m;
+  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
+       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
+  JacobiRotation<RealScalar> rot1;
+  RealScalar t = m.coeff(0,0) + m.coeff(1,1);
+  RealScalar d = m.coeff(1,0) - m.coeff(0,1);
+
+  if(abs(d) < (std::numeric_limits<RealScalar>::min)())
+  {
+    rot1.s() = RealScalar(0);
+    rot1.c() = RealScalar(1);
+  }
+  else
+  {
+    // If d!=0, then t/d cannot overflow because the magnitude of the
+    // entries forming d are not too small compared to the ones forming t.
+    RealScalar u = t / d;
+    RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
+    rot1.s() = RealScalar(1) / tmp;
+    rot1.c() = u / tmp;
+  }
+  m.applyOnTheLeft(0,1,rot1);
+  j_right->makeJacobi(m,0,1);
+  *j_left = rot1 * j_right->transpose();
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_REALSVD2X2_H
diff --git a/third-party/Eigen/src/misc/blas.h b/third-party/Eigen/src/misc/blas.h
new file mode 100644
index 00000000..25215b15
--- /dev/null
+++ b/third-party/Eigen/src/misc/blas.h
@@ -0,0 +1,440 @@
+#ifndef BLAS_H
+#define BLAS_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#define BLASFUNC(FUNC) FUNC##_
+
+#ifdef __WIN64__
+typedef long long BLASLONG;
+typedef unsigned long long BLASULONG;
+#else
+typedef long BLASLONG;
+typedef unsigned long BLASULONG;
+#endif
+
+int    BLASFUNC(xerbla)(const char *, int *info, int);
+
+float  BLASFUNC(sdot)  (int *, float  *, int *, float  *, int *);
+float  BLASFUNC(sdsdot)(int *, float  *,        float  *, int *, float  *, int *);
+
+double BLASFUNC(dsdot) (int *, float  *, int *, float  *, int *);
+double BLASFUNC(ddot)  (int *, double *, int *, double *, int *);
+double BLASFUNC(qdot)  (int *, double *, int *, double *, int *);
+
+int  BLASFUNC(cdotuw)  (int *, float  *, int *, float  *, int *, float*);
+int  BLASFUNC(cdotcw)  (int *, float  *, int *, float  *, int *, float*);
+int  BLASFUNC(zdotuw)  (int *, double  *, int *, double  *, int *, double*);
+int  BLASFUNC(zdotcw)  (int *, double  *, int *, double  *, int *, double*);
+
+int    BLASFUNC(saxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(daxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(qaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(caxpy) (const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(zaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(xaxpy) (const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(caxpyc)(const int *, const float  *, const float  *, const int *, float  *, const int *);
+int    BLASFUNC(zaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+int    BLASFUNC(xaxpyc)(const int *, const double *, const double *, const int *, double *, const int *);
+
+int    BLASFUNC(scopy) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(ccopy) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
+int    BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
+
+int    BLASFUNC(sswap) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(dswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(qswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(cswap) (int *, float  *, int *, float  *, int *);
+int    BLASFUNC(zswap) (int *, double *, int *, double *, int *);
+int    BLASFUNC(xswap) (int *, double *, int *, double *, int *);
+
+float  BLASFUNC(sasum) (int *, float  *, int *);
+float  BLASFUNC(scasum)(int *, float  *, int *);
+double BLASFUNC(dasum) (int *, double *, int *);
+double BLASFUNC(qasum) (int *, double *, int *);
+double BLASFUNC(dzasum)(int *, double *, int *);
+double BLASFUNC(qxasum)(int *, double *, int *);
+
+int    BLASFUNC(isamax)(int *, float  *, int *);
+int    BLASFUNC(idamax)(int *, double *, int *);
+int    BLASFUNC(iqamax)(int *, double *, int *);
+int    BLASFUNC(icamax)(int *, float  *, int *);
+int    BLASFUNC(izamax)(int *, double *, int *);
+int    BLASFUNC(ixamax)(int *, double *, int *);
+
+int    BLASFUNC(ismax) (int *, float  *, int *);
+int    BLASFUNC(idmax) (int *, double *, int *);
+int    BLASFUNC(iqmax) (int *, double *, int *);
+int    BLASFUNC(icmax) (int *, float  *, int *);
+int    BLASFUNC(izmax) (int *, double *, int *);
+int    BLASFUNC(ixmax) (int *, double *, int *);
+
+int    BLASFUNC(isamin)(int *, float  *, int *);
+int    BLASFUNC(idamin)(int *, double *, int *);
+int    BLASFUNC(iqamin)(int *, double *, int *);
+int    BLASFUNC(icamin)(int *, float  *, int *);
+int    BLASFUNC(izamin)(int *, double *, int *);
+int    BLASFUNC(ixamin)(int *, double *, int *);
+
+int    BLASFUNC(ismin)(int *, float  *, int *);
+int    BLASFUNC(idmin)(int *, double *, int *);
+int    BLASFUNC(iqmin)(int *, double *, int *);
+int    BLASFUNC(icmin)(int *, float  *, int *);
+int    BLASFUNC(izmin)(int *, double *, int *);
+int    BLASFUNC(ixmin)(int *, double *, int *);
+
+float  BLASFUNC(samax) (int *, float  *, int *);
+double BLASFUNC(damax) (int *, double *, int *);
+double BLASFUNC(qamax) (int *, double *, int *);
+float  BLASFUNC(scamax)(int *, float  *, int *);
+double BLASFUNC(dzamax)(int *, double *, int *);
+double BLASFUNC(qxamax)(int *, double *, int *);
+
+float  BLASFUNC(samin) (int *, float  *, int *);
+double BLASFUNC(damin) (int *, double *, int *);
+double BLASFUNC(qamin) (int *, double *, int *);
+float  BLASFUNC(scamin)(int *, float  *, int *);
+double BLASFUNC(dzamin)(int *, double *, int *);
+double BLASFUNC(qxamin)(int *, double *, int *);
+
+float  BLASFUNC(smax)  (int *, float  *, int *);
+double BLASFUNC(dmax)  (int *, double *, int *);
+double BLASFUNC(qmax)  (int *, double *, int *);
+float  BLASFUNC(scmax) (int *, float  *, int *);
+double BLASFUNC(dzmax) (int *, double *, int *);
+double BLASFUNC(qxmax) (int *, double *, int *);
+
+float  BLASFUNC(smin)  (int *, float  *, int *);
+double BLASFUNC(dmin)  (int *, double *, int *);
+double BLASFUNC(qmin)  (int *, double *, int *);
+float  BLASFUNC(scmin) (int *, float  *, int *);
+double BLASFUNC(dzmin) (int *, double *, int *);
+double BLASFUNC(qxmin) (int *, double *, int *);
+
+int    BLASFUNC(sscal) (int *,  float  *, float  *, int *);
+int    BLASFUNC(dscal) (int *,  double *, double *, int *);
+int    BLASFUNC(qscal) (int *,  double *, double *, int *);
+int    BLASFUNC(cscal) (int *,  float  *, float  *, int *);
+int    BLASFUNC(zscal) (int *,  double *, double *, int *);
+int    BLASFUNC(xscal) (int *,  double *, double *, int *);
+int    BLASFUNC(csscal)(int *,  float  *, float  *, int *);
+int    BLASFUNC(zdscal)(int *,  double *, double *, int *);
+int    BLASFUNC(xqscal)(int *,  double *, double *, int *);
+
+float  BLASFUNC(snrm2) (int *, float  *, int *);
+float  BLASFUNC(scnrm2)(int *, float  *, int *);
+
+double BLASFUNC(dnrm2) (int *, double *, int *);
+double BLASFUNC(qnrm2) (int *, double *, int *);
+double BLASFUNC(dznrm2)(int *, double *, int *);
+double BLASFUNC(qxnrm2)(int *, double *, int *);
+
+int    BLASFUNC(srot)  (int *, float  *, int *, float  *, int *, float  *, float  *);
+int    BLASFUNC(drot)  (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(qrot)  (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(csrot) (int *, float  *, int *, float  *, int *, float  *, float  *);
+int    BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
+int    BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
+
+int    BLASFUNC(srotg) (float  *, float  *, float  *, float  *);
+int    BLASFUNC(drotg) (double *, double *, double *, double *);
+int    BLASFUNC(qrotg) (double *, double *, double *, double *);
+int    BLASFUNC(crotg) (float  *, float  *, float  *, float  *);
+int    BLASFUNC(zrotg) (double *, double *, double *, double *);
+int    BLASFUNC(xrotg) (double *, double *, double *, double *);
+
+int    BLASFUNC(srotmg)(float  *, float  *, float  *, float  *, float  *);
+int    BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
+
+int    BLASFUNC(srotm) (int *, float  *, int *, float  *, int *, float  *);
+int    BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
+int    BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
+
+/* Level 2 routines */
+
+int BLASFUNC(sger)(int *,    int *, float *,  float *, int *,
+		   float *,  int *, float *,  int *);
+int BLASFUNC(dger)(int *,    int *, double *, double *, int *,
+		   double *, int *, double *, int *);
+int BLASFUNC(qger)(int *,    int *, double *, double *, int *,
+		   double *, int *, double *, int *);
+int BLASFUNC(cgeru)(int *,    int *, float *,  float *, int *,
+		    float *,  int *, float *,  int *);
+int BLASFUNC(cgerc)(int *,    int *, float *,  float *, int *,
+		    float *,  int *, float *,  int *);
+int BLASFUNC(zgeru)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(zgerc)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(xgeru)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+int BLASFUNC(xgerc)(int *,    int *, double *, double *, int *,
+		    double *, int *, double *, int *);
+
+int BLASFUNC(sgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemv)(const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemv)(const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(strsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(ztrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(stpsv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpsv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(strmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmv) (const char *, const char *, const char *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(ztrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmv) (const char *, const char *, const char *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(stpmv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(ctpmv) (char *, char *, char *, int *, float  *, float  *, int *);
+int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
+int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
+
+int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(ssymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(sspmv) (char *, int *, float  *, float *,
+		     float  *, int *, float *, float *, int *);
+int BLASFUNC(dspmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+int BLASFUNC(qspmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyr) (const char *, const int *, const float   *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr) (const char *, const int *, const double  *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(dsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(qsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(csyr2) (const char *, const int *, const float   *, const float  *, const int *, const float  *, const int *, float  *, const int *);
+int BLASFUNC(zsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+int BLASFUNC(xsyr2) (const char *, const int *, const double  *, const double *, const int *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(sspr) (char *, int *, float   *, float  *, int *,
+		    float  *);
+int BLASFUNC(dspr) (char *, int *, double  *, double *, int *,
+		    double *);
+int BLASFUNC(qspr) (char *, int *, double  *, double *, int *,
+		    double *);
+
+int BLASFUNC(sspr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(dspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(qspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(cspr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(zspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(xspr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+
+int BLASFUNC(cher) (char *, int *, float   *, float  *, int *,
+		    float  *, int *);
+int BLASFUNC(zher) (char *, int *, double  *, double *, int *,
+		    double *, int *);
+int BLASFUNC(xher) (char *, int *, double  *, double *, int *,
+		    double *, int *);
+
+int BLASFUNC(chpr) (char *, int *, float   *, float  *, int *, float  *);
+int BLASFUNC(zhpr) (char *, int *, double  *, double *, int *, double *);
+int BLASFUNC(xhpr) (char *, int *, double  *, double *, int *, double *);
+
+int BLASFUNC(cher2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *, int *);
+int BLASFUNC(zher2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *, int *);
+int BLASFUNC(xher2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *, int *);
+
+int BLASFUNC(chpr2) (char *, int *, float   *,
+		     float  *, int *, float  *, int *, float  *);
+int BLASFUNC(zhpr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+int BLASFUNC(xhpr2) (char *, int *, double  *,
+		     double *, int *, double *, int *, double *);
+
+int BLASFUNC(chemv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chpmv) (char *, int *, float  *, float *,
+		     float  *, int *, float *, float *, int *);
+int BLASFUNC(zhpmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+int BLASFUNC(xhpmv) (char *, int *, double  *, double *,
+		     double  *, int *, double *, double *, int *);
+
+int BLASFUNC(snorm)(char *, int *, int *, float  *, int *);
+int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
+int BLASFUNC(cnorm)(char *, int *, int *, float  *, int *);
+int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
+
+int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(csbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+int BLASFUNC(chbmv)(char *, int *, int *, float  *, float  *, int *,
+		    float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
+		    double *, int *, double *, double *, int *);
+
+/* Level 3 routines */
+
+int BLASFUNC(sgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cgemm)(const char *, const char *, const int *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xgemm)(const char *, const char *, const int *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
+	   float  *, int *, float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
+	   double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
+	   double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
+		     float *, float  *, int *, float  *, int *,
+		     float *, float  *, int *);
+int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
+		     double *, double  *, int *, double  *, int *,
+		     double *, double  *, int *);
+int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
+		     float *, float  *, int *, float  *, int *,
+		     float *, float  *, int *);
+int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
+		     double *, double  *, int *, double  *, int *,
+		     double *, double  *, int *);
+
+int BLASFUNC(strsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(dtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(ztrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrsm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(strmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(dtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(qtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(ctrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const float *,  const float *,  const int *, float *,  const int *);
+int BLASFUNC(ztrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+int BLASFUNC(xtrmm)(const char *, const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, double *, const int *);
+
+int BLASFUNC(ssymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csymm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(csymm3m)(char *, char *, int *, int *, float  *, float  *, int *, float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *);
+
+int BLASFUNC(ssyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(csyrk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyrk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(ssyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(dsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(qsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(csyr2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xsyr2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xhemm)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(chemm3m)(char *, char *, int *, int *, float  *, float  *, int *,
+	   float  *, int *, float  *, float  *, int *);
+int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+	   double *, int *, double *, double *, int *);
+int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
+	   double *, int *, double *, double *, int *);
+
+int BLASFUNC(cherk)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xherk)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, double *, const int *);
+
+int BLASFUNC(cher2k)(const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2k)(const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(cher2m)(const char *, const char *, const char *, const int *, const int *, const float  *, const float  *, const int *, const float *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+int BLASFUNC(xher2m)(const char *, const char *, const char *, const int *, const int *, const double *, const double *, const int *, const double*, const int *, const double *, double *, const int *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third-party/Eigen/src/misc/lapack.h b/third-party/Eigen/src/misc/lapack.h
new file mode 100644
index 00000000..249f3575
--- /dev/null
+++ b/third-party/Eigen/src/misc/lapack.h
@@ -0,0 +1,152 @@
+#ifndef LAPACK_H
+#define LAPACK_H
+
+#include "blas.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+int BLASFUNC(csymv) (const char *, const int *, const float  *, const float  *, const int *, const float  *, const int *, const float  *, float  *, const int *);
+int BLASFUNC(zsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+int BLASFUNC(xsymv) (const char *, const int *, const double *, const double *, const int *, const double *, const int *, const double *, double *, const int *);
+
+
+int BLASFUNC(cspmv) (char *, int *, float  *, float *,
+         float  *, int *, float *, float *, int *);
+int BLASFUNC(zspmv) (char *, int *, double  *, double *,
+         double  *, int *, double *, double *, int *);
+int BLASFUNC(xspmv) (char *, int *, double  *, double *,
+         double  *, int *, double *, double *, int *);
+
+int BLASFUNC(csyr) (char *, int *, float   *, float  *, int *,
+        float  *, int *);
+int BLASFUNC(zsyr) (char *, int *, double  *, double *, int *,
+        double *, int *);
+int BLASFUNC(xsyr) (char *, int *, double  *, double *, int *,
+        double *, int *);
+
+int BLASFUNC(cspr) (char *, int *, float   *, float  *, int *,
+        float  *);
+int BLASFUNC(zspr) (char *, int *, double  *, double *, int *,
+        double *);
+int BLASFUNC(xspr) (char *, int *, double  *, double *, int *,
+        double *);
+
+int BLASFUNC(sgemt)(char *, int *, int *, float  *, float  *, int *,
+        float  *, int *);
+int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
+        double *, int *);
+int BLASFUNC(cgemt)(char *, int *, int *, float  *, float  *, int *,
+        float  *, int *);
+int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
+        double *, int *);
+
+int BLASFUNC(sgema)(char *, char *, int *, int *, float  *,
+        float  *, int *, float *, float  *, int *, float *, int *);
+int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
+        double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgema)(char *, char *, int *, int *, float  *,
+        float  *, int *, float *, float  *, int *, float *, int *);
+int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
+        double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgems)(char *, char *, int *, int *, float  *,
+        float  *, int *, float *, float  *, int *, float *, int *);
+int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
+        double *, int *, double*, double *, int *, double*, int *);
+int BLASFUNC(cgems)(char *, char *, int *, int *, float  *,
+        float  *, int *, float *, float  *, int *, float *, int *);
+int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
+        double *, int *, double*, double *, int *, double*, int *);
+
+int BLASFUNC(sgetf2)(int *, int *, float  *, int *, int *, int *);
+int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetf2)(int *, int *, float  *, int *, int *, int *);
+int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(sgetrf)(int *, int *, float  *, int *, int *, int *);
+int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(cgetrf)(int *, int *, float  *, int *, int *, int *);
+int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
+int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
+
+int BLASFUNC(slaswp)(int *, float  *, int *, int *, int *, int *, int *);
+int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(claswp)(int *, float  *, int *, int *, int *, int *, int *);
+int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
+int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
+
+int BLASFUNC(sgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);
+int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(cgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);
+int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
+
+int BLASFUNC(sgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);
+int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(cgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);
+int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
+
+int BLASFUNC(spotf2)(char *, int *, float  *, int *, int *);
+int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotf2)(char *, int *, float  *, int *, int *);
+int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotrf)(char *, int *, float  *, int *, int *);
+int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotrf)(char *, int *, float  *, int *, int *);
+int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauu2)(char *, int *, float  *, int *, int *);
+int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauu2)(char *, int *, float  *, int *, int *);
+int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(slauum)(char *, int *, float  *, int *, int *);
+int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(clauum)(char *, int *, float  *, int *, int *);
+int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
+int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
+
+int BLASFUNC(strti2)(char *, char *, int *, float  *, int *, int *);
+int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrti2)(char *, char *, int *, float  *, int *, int *);
+int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(strtri)(char *, char *, int *, float  *, int *, int *);
+int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(ctrtri)(char *, char *, int *, float  *, int *, int *);
+int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
+int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
+
+int BLASFUNC(spotri)(char *, int *, float  *, int *, int *);
+int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(cpotri)(char *, int *, float  *, int *, int *);
+int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
+int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/third-party/Eigen/src/misc/lapacke.h b/third-party/Eigen/src/misc/lapacke.h
new file mode 100644
index 00000000..8c7e79b0
--- /dev/null
+++ b/third-party/Eigen/src/misc/lapacke.h
@@ -0,0 +1,16291 @@
+/*****************************************************************************
+  Copyright (c) 2010, Intel Corp.
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+  THE POSSIBILITY OF SUCH DAMAGE.
+******************************************************************************
+* Contents: Native C interface to LAPACK
+* Author: Intel Corporation
+* Generated November, 2011
+*****************************************************************************/
+
+#ifndef _MKL_LAPACKE_H_
+
+#ifndef _LAPACKE_H_
+#define _LAPACKE_H_
+
+/*
+*  Turn on HAVE_LAPACK_CONFIG_H to redefine C-LAPACK datatypes
+*/
+#ifdef HAVE_LAPACK_CONFIG_H
+#include "lapacke_config.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <stdlib.h>
+
+#ifndef lapack_int
+#define lapack_int     int
+#endif
+
+#ifndef lapack_logical
+#define lapack_logical lapack_int
+#endif
+
+/* Complex types are structures equivalent to the
+* Fortran complex types COMPLEX(4) and COMPLEX(8).
+*
+* One can also redefine the types with his own types
+* for example by including in the code definitions like
+*
+* #define lapack_complex_float std::complex<float>
+* #define lapack_complex_double std::complex<double>
+*
+* or define these types in the command line:
+*
+* -Dlapack_complex_float="std::complex<float>"
+* -Dlapack_complex_double="std::complex<double>"
+*/
+
+#ifndef LAPACK_COMPLEX_CUSTOM
+
+/* Complex type (single precision) */
+#ifndef lapack_complex_float
+#include <complex.h>
+#define lapack_complex_float    float _Complex
+#endif
+
+#ifndef lapack_complex_float_real
+#define lapack_complex_float_real(z)       (creal(z))
+#endif
+
+#ifndef lapack_complex_float_imag
+#define lapack_complex_float_imag(z)       (cimag(z))
+#endif
+
+lapack_complex_float lapack_make_complex_float( float re, float im );
+
+/* Complex type (double precision) */
+#ifndef lapack_complex_double
+#include <complex.h>
+#define lapack_complex_double   double _Complex
+#endif
+
+#ifndef lapack_complex_double_real
+#define lapack_complex_double_real(z)      (creal(z))
+#endif
+
+#ifndef lapack_complex_double_imag
+#define lapack_complex_double_imag(z)       (cimag(z))
+#endif
+
+lapack_complex_double lapack_make_complex_double( double re, double im );
+
+#endif
+
+#ifndef LAPACKE_malloc
+#define LAPACKE_malloc( size ) malloc( size )
+#endif
+#ifndef LAPACKE_free
+#define LAPACKE_free( p )      free( p )
+#endif
+
+#define LAPACK_C2INT( x ) (lapack_int)(*((float*)&x ))
+#define LAPACK_Z2INT( x ) (lapack_int)(*((double*)&x ))
+
+#define LAPACK_ROW_MAJOR               101
+#define LAPACK_COL_MAJOR               102
+
+#define LAPACK_WORK_MEMORY_ERROR       -1010
+#define LAPACK_TRANSPOSE_MEMORY_ERROR  -1011
+
+/* Callback logical functions of one, two, or three arguments are used
+*  to select eigenvalues to sort to the top left of the Schur form.
+*  The value is selected if function returns TRUE (non-zero). */
+
+typedef lapack_logical (*LAPACK_S_SELECT2) ( const float*, const float* );
+typedef lapack_logical (*LAPACK_S_SELECT3)
+    ( const float*, const float*, const float* );
+typedef lapack_logical (*LAPACK_D_SELECT2) ( const double*, const double* );
+typedef lapack_logical (*LAPACK_D_SELECT3)
+    ( const double*, const double*, const double* );
+
+typedef lapack_logical (*LAPACK_C_SELECT1) ( const lapack_complex_float* );
+typedef lapack_logical (*LAPACK_C_SELECT2)
+    ( const lapack_complex_float*, const lapack_complex_float* );
+typedef lapack_logical (*LAPACK_Z_SELECT1) ( const lapack_complex_double* );
+typedef lapack_logical (*LAPACK_Z_SELECT2)
+    ( const lapack_complex_double*, const lapack_complex_double* );
+
+#include "lapacke_mangling.h"
+
+#define LAPACK_lsame LAPACK_GLOBAL(lsame,LSAME)
+lapack_logical LAPACK_lsame( char* ca,  char* cb,
+                              lapack_int lca, lapack_int lcb );
+
+/* C-LAPACK function prototypes */
+
+lapack_int LAPACKE_sbdsdc( int matrix_order, char uplo, char compq,
+                           lapack_int n, float* d, float* e, float* u,
+                           lapack_int ldu, float* vt, lapack_int ldvt, float* q,
+                           lapack_int* iq );
+lapack_int LAPACKE_dbdsdc( int matrix_order, char uplo, char compq,
+                           lapack_int n, double* d, double* e, double* u,
+                           lapack_int ldu, double* vt, lapack_int ldvt,
+                           double* q, lapack_int* iq );
+
+lapack_int LAPACKE_sbdsqr( int matrix_order, char uplo, lapack_int n,
+                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                           float* d, float* e, float* vt, lapack_int ldvt,
+                           float* u, lapack_int ldu, float* c, lapack_int ldc );
+lapack_int LAPACKE_dbdsqr( int matrix_order, char uplo, lapack_int n,
+                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                           double* d, double* e, double* vt, lapack_int ldvt,
+                           double* u, lapack_int ldu, double* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_cbdsqr( int matrix_order, char uplo, lapack_int n,
+                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                           float* d, float* e, lapack_complex_float* vt,
+                           lapack_int ldvt, lapack_complex_float* u,
+                           lapack_int ldu, lapack_complex_float* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_zbdsqr( int matrix_order, char uplo, lapack_int n,
+                           lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                           double* d, double* e, lapack_complex_double* vt,
+                           lapack_int ldvt, lapack_complex_double* u,
+                           lapack_int ldu, lapack_complex_double* c,
+                           lapack_int ldc );
+
+lapack_int LAPACKE_sdisna( char job, lapack_int m, lapack_int n, const float* d,
+                           float* sep );
+lapack_int LAPACKE_ddisna( char job, lapack_int m, lapack_int n,
+                           const double* d, double* sep );
+
+lapack_int LAPACKE_sgbbrd( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int ncc, lapack_int kl,
+                           lapack_int ku, float* ab, lapack_int ldab, float* d,
+                           float* e, float* q, lapack_int ldq, float* pt,
+                           lapack_int ldpt, float* c, lapack_int ldc );
+lapack_int LAPACKE_dgbbrd( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int ncc, lapack_int kl,
+                           lapack_int ku, double* ab, lapack_int ldab,
+                           double* d, double* e, double* q, lapack_int ldq,
+                           double* pt, lapack_int ldpt, double* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_cgbbrd( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int ncc, lapack_int kl,
+                           lapack_int ku, lapack_complex_float* ab,
+                           lapack_int ldab, float* d, float* e,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_complex_float* pt, lapack_int ldpt,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zgbbrd( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int ncc, lapack_int kl,
+                           lapack_int ku, lapack_complex_double* ab,
+                           lapack_int ldab, double* d, double* e,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* pt, lapack_int ldpt,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sgbcon( int matrix_order, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku, const float* ab,
+                           lapack_int ldab, const lapack_int* ipiv, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_dgbcon( int matrix_order, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku, const double* ab,
+                           lapack_int ldab, const lapack_int* ipiv,
+                           double anorm, double* rcond );
+lapack_int LAPACKE_cgbcon( int matrix_order, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zgbcon( int matrix_order, char norm, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_sgbequ( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const float* ab,
+                           lapack_int ldab, float* r, float* c, float* rowcnd,
+                           float* colcnd, float* amax );
+lapack_int LAPACKE_dgbequ( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const double* ab,
+                           lapack_int ldab, double* r, double* c,
+                           double* rowcnd, double* colcnd, double* amax );
+lapack_int LAPACKE_cgbequ( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           float* r, float* c, float* rowcnd, float* colcnd,
+                           float* amax );
+lapack_int LAPACKE_zgbequ( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           double* r, double* c, double* rowcnd, double* colcnd,
+                           double* amax );
+
+lapack_int LAPACKE_sgbequb( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_int kl, lapack_int ku, const float* ab,
+                            lapack_int ldab, float* r, float* c, float* rowcnd,
+                            float* colcnd, float* amax );
+lapack_int LAPACKE_dgbequb( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_int kl, lapack_int ku, const double* ab,
+                            lapack_int ldab, double* r, double* c,
+                            double* rowcnd, double* colcnd, double* amax );
+lapack_int LAPACKE_cgbequb( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_int kl, lapack_int ku,
+                            const lapack_complex_float* ab, lapack_int ldab,
+                            float* r, float* c, float* rowcnd, float* colcnd,
+                            float* amax );
+lapack_int LAPACKE_zgbequb( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_int kl, lapack_int ku,
+                            const lapack_complex_double* ab, lapack_int ldab,
+                            double* r, double* c, double* rowcnd,
+                            double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgbrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const float* ab, lapack_int ldab, const float* afb,
+                           lapack_int ldafb, const lapack_int* ipiv,
+                           const float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_dgbrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const double* ab, lapack_int ldab, const double* afb,
+                           lapack_int ldafb, const lapack_int* ipiv,
+                           const double* b, lapack_int ldb, double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+lapack_int LAPACKE_cgbrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_complex_float* afb, lapack_int ldafb,
+                           const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zgbrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_complex_double* afb, lapack_int ldafb,
+                           const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sgbrfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, const float* ab, lapack_int ldab,
+                            const float* afb, lapack_int ldafb,
+                            const lapack_int* ipiv, const float* r,
+                            const float* c, const float* b, lapack_int ldb,
+                            float* x, lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dgbrfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, const double* ab, lapack_int ldab,
+                            const double* afb, lapack_int ldafb,
+                            const lapack_int* ipiv, const double* r,
+                            const double* c, const double* b, lapack_int ldb,
+                            double* x, lapack_int ldx, double* rcond,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+lapack_int LAPACKE_cgbrfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, const lapack_complex_float* ab,
+                            lapack_int ldab, const lapack_complex_float* afb,
+                            lapack_int ldafb, const lapack_int* ipiv,
+                            const float* r, const float* c,
+                            const lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_zgbrfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, const lapack_complex_double* ab,
+                            lapack_int ldab, const lapack_complex_double* afb,
+                            lapack_int ldafb, const lapack_int* ipiv,
+                            const double* r, const double* c,
+                            const lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+
+lapack_int LAPACKE_sgbsv( int matrix_order, lapack_int n, lapack_int kl,
+                          lapack_int ku, lapack_int nrhs, float* ab,
+                          lapack_int ldab, lapack_int* ipiv, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dgbsv( int matrix_order, lapack_int n, lapack_int kl,
+                          lapack_int ku, lapack_int nrhs, double* ab,
+                          lapack_int ldab, lapack_int* ipiv, double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_cgbsv( int matrix_order, lapack_int n, lapack_int kl,
+                          lapack_int ku, lapack_int nrhs,
+                          lapack_complex_float* ab, lapack_int ldab,
+                          lapack_int* ipiv, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zgbsv( int matrix_order, lapack_int n, lapack_int kl,
+                          lapack_int ku, lapack_int nrhs,
+                          lapack_complex_double* ab, lapack_int ldab,
+                          lapack_int* ipiv, lapack_complex_double* b,
+                          lapack_int ldb );
+
+lapack_int LAPACKE_sgbsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int kl, lapack_int ku,
+                           lapack_int nrhs, float* ab, lapack_int ldab,
+                           float* afb, lapack_int ldafb, lapack_int* ipiv,
+                           char* equed, float* r, float* c, float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr,
+                           float* rpivot );
+lapack_int LAPACKE_dgbsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int kl, lapack_int ku,
+                           lapack_int nrhs, double* ab, lapack_int ldab,
+                           double* afb, lapack_int ldafb, lapack_int* ipiv,
+                           char* equed, double* r, double* c, double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr,
+                           double* rpivot );
+lapack_int LAPACKE_cgbsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int kl, lapack_int ku,
+                           lapack_int nrhs, lapack_complex_float* ab,
+                           lapack_int ldab, lapack_complex_float* afb,
+                           lapack_int ldafb, lapack_int* ipiv, char* equed,
+                           float* r, float* c, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr, float* rpivot );
+lapack_int LAPACKE_zgbsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int kl, lapack_int ku,
+                           lapack_int nrhs, lapack_complex_double* ab,
+                           lapack_int ldab, lapack_complex_double* afb,
+                           lapack_int ldafb, lapack_int* ipiv, char* equed,
+                           double* r, double* c, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* x,
+                           lapack_int ldx, double* rcond, double* ferr,
+                           double* berr, double* rpivot );
+
+lapack_int LAPACKE_sgbsvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, float* ab, lapack_int ldab,
+                            float* afb, lapack_int ldafb, lapack_int* ipiv,
+                            char* equed, float* r, float* c, float* b,
+                            lapack_int ldb, float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dgbsvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, double* ab, lapack_int ldab,
+                            double* afb, lapack_int ldafb, lapack_int* ipiv,
+                            char* equed, double* r, double* c, double* b,
+                            lapack_int ldb, double* x, lapack_int ldx,
+                            double* rcond, double* rpvgrw, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+lapack_int LAPACKE_cgbsvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, lapack_complex_float* ab,
+                            lapack_int ldab, lapack_complex_float* afb,
+                            lapack_int ldafb, lapack_int* ipiv, char* equed,
+                            float* r, float* c, lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* x,
+                            lapack_int ldx, float* rcond, float* rpvgrw,
+                            float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_zgbsvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int kl, lapack_int ku,
+                            lapack_int nrhs, lapack_complex_double* ab,
+                            lapack_int ldab, lapack_complex_double* afb,
+                            lapack_int ldafb, lapack_int* ipiv, char* equed,
+                            double* r, double* c, lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* x,
+                            lapack_int ldx, double* rcond, double* rpvgrw,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+
+lapack_int LAPACKE_sgbtrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, float* ab,
+                           lapack_int ldab, lapack_int* ipiv );
+lapack_int LAPACKE_dgbtrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, double* ab,
+                           lapack_int ldab, lapack_int* ipiv );
+lapack_int LAPACKE_cgbtrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_zgbtrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           lapack_int* ipiv );
+
+lapack_int LAPACKE_sgbtrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const float* ab, lapack_int ldab,
+                           const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dgbtrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const double* ab, lapack_int ldab,
+                           const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_cgbtrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_int* ipiv, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zgbtrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int kl, lapack_int ku, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_int* ipiv, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_sgebak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const float* scale,
+                           lapack_int m, float* v, lapack_int ldv );
+lapack_int LAPACKE_dgebak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const double* scale,
+                           lapack_int m, double* v, lapack_int ldv );
+lapack_int LAPACKE_cgebak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const float* scale,
+                           lapack_int m, lapack_complex_float* v,
+                           lapack_int ldv );
+lapack_int LAPACKE_zgebak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const double* scale,
+                           lapack_int m, lapack_complex_double* v,
+                           lapack_int ldv );
+
+lapack_int LAPACKE_sgebal( int matrix_order, char job, lapack_int n, float* a,
+                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,
+                           float* scale );
+lapack_int LAPACKE_dgebal( int matrix_order, char job, lapack_int n, double* a,
+                           lapack_int lda, lapack_int* ilo, lapack_int* ihi,
+                           double* scale );
+lapack_int LAPACKE_cgebal( int matrix_order, char job, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* ilo, lapack_int* ihi, float* scale );
+lapack_int LAPACKE_zgebal( int matrix_order, char job, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ilo, lapack_int* ihi, double* scale );
+
+lapack_int LAPACKE_sgebrd( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* d, float* e,
+                           float* tauq, float* taup );
+lapack_int LAPACKE_dgebrd( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* d, double* e,
+                           double* tauq, double* taup );
+lapack_int LAPACKE_cgebrd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda, float* d,
+                           float* e, lapack_complex_float* tauq,
+                           lapack_complex_float* taup );
+lapack_int LAPACKE_zgebrd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda, double* d,
+                           double* e, lapack_complex_double* tauq,
+                           lapack_complex_double* taup );
+
+lapack_int LAPACKE_sgecon( int matrix_order, char norm, lapack_int n,
+                           const float* a, lapack_int lda, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_dgecon( int matrix_order, char norm, lapack_int n,
+                           const double* a, lapack_int lda, double anorm,
+                           double* rcond );
+lapack_int LAPACKE_cgecon( int matrix_order, char norm, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           float anorm, float* rcond );
+lapack_int LAPACKE_zgecon( int matrix_order, char norm, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           double anorm, double* rcond );
+
+lapack_int LAPACKE_sgeequ( int matrix_order, lapack_int m, lapack_int n,
+                           const float* a, lapack_int lda, float* r, float* c,
+                           float* rowcnd, float* colcnd, float* amax );
+lapack_int LAPACKE_dgeequ( int matrix_order, lapack_int m, lapack_int n,
+                           const double* a, lapack_int lda, double* r,
+                           double* c, double* rowcnd, double* colcnd,
+                           double* amax );
+lapack_int LAPACKE_cgeequ( int matrix_order, lapack_int m, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           float* r, float* c, float* rowcnd, float* colcnd,
+                           float* amax );
+lapack_int LAPACKE_zgeequ( int matrix_order, lapack_int m, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           double* r, double* c, double* rowcnd, double* colcnd,
+                           double* amax );
+
+lapack_int LAPACKE_sgeequb( int matrix_order, lapack_int m, lapack_int n,
+                            const float* a, lapack_int lda, float* r, float* c,
+                            float* rowcnd, float* colcnd, float* amax );
+lapack_int LAPACKE_dgeequb( int matrix_order, lapack_int m, lapack_int n,
+                            const double* a, lapack_int lda, double* r,
+                            double* c, double* rowcnd, double* colcnd,
+                            double* amax );
+lapack_int LAPACKE_cgeequb( int matrix_order, lapack_int m, lapack_int n,
+                            const lapack_complex_float* a, lapack_int lda,
+                            float* r, float* c, float* rowcnd, float* colcnd,
+                            float* amax );
+lapack_int LAPACKE_zgeequb( int matrix_order, lapack_int m, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            double* r, double* c, double* rowcnd,
+                            double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgees( int matrix_order, char jobvs, char sort,
+                          LAPACK_S_SELECT2 select, lapack_int n, float* a,
+                          lapack_int lda, lapack_int* sdim, float* wr,
+                          float* wi, float* vs, lapack_int ldvs );
+lapack_int LAPACKE_dgees( int matrix_order, char jobvs, char sort,
+                          LAPACK_D_SELECT2 select, lapack_int n, double* a,
+                          lapack_int lda, lapack_int* sdim, double* wr,
+                          double* wi, double* vs, lapack_int ldvs );
+lapack_int LAPACKE_cgees( int matrix_order, char jobvs, char sort,
+                          LAPACK_C_SELECT1 select, lapack_int n,
+                          lapack_complex_float* a, lapack_int lda,
+                          lapack_int* sdim, lapack_complex_float* w,
+                          lapack_complex_float* vs, lapack_int ldvs );
+lapack_int LAPACKE_zgees( int matrix_order, char jobvs, char sort,
+                          LAPACK_Z_SELECT1 select, lapack_int n,
+                          lapack_complex_double* a, lapack_int lda,
+                          lapack_int* sdim, lapack_complex_double* w,
+                          lapack_complex_double* vs, lapack_int ldvs );
+
+lapack_int LAPACKE_sgeesx( int matrix_order, char jobvs, char sort,
+                           LAPACK_S_SELECT2 select, char sense, lapack_int n,
+                           float* a, lapack_int lda, lapack_int* sdim,
+                           float* wr, float* wi, float* vs, lapack_int ldvs,
+                           float* rconde, float* rcondv );
+lapack_int LAPACKE_dgeesx( int matrix_order, char jobvs, char sort,
+                           LAPACK_D_SELECT2 select, char sense, lapack_int n,
+                           double* a, lapack_int lda, lapack_int* sdim,
+                           double* wr, double* wi, double* vs, lapack_int ldvs,
+                           double* rconde, double* rcondv );
+lapack_int LAPACKE_cgeesx( int matrix_order, char jobvs, char sort,
+                           LAPACK_C_SELECT1 select, char sense, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* sdim, lapack_complex_float* w,
+                           lapack_complex_float* vs, lapack_int ldvs,
+                           float* rconde, float* rcondv );
+lapack_int LAPACKE_zgeesx( int matrix_order, char jobvs, char sort,
+                           LAPACK_Z_SELECT1 select, char sense, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* sdim, lapack_complex_double* w,
+                           lapack_complex_double* vs, lapack_int ldvs,
+                           double* rconde, double* rcondv );
+
+lapack_int LAPACKE_sgeev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, float* a, lapack_int lda, float* wr,
+                          float* wi, float* vl, lapack_int ldvl, float* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_dgeev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, double* a, lapack_int lda, double* wr,
+                          double* wi, double* vl, lapack_int ldvl, double* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_cgeev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, lapack_complex_float* a, lapack_int lda,
+                          lapack_complex_float* w, lapack_complex_float* vl,
+                          lapack_int ldvl, lapack_complex_float* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_zgeev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, lapack_complex_double* a,
+                          lapack_int lda, lapack_complex_double* w,
+                          lapack_complex_double* vl, lapack_int ldvl,
+                          lapack_complex_double* vr, lapack_int ldvr );
+
+lapack_int LAPACKE_sgeevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n, float* a,
+                           lapack_int lda, float* wr, float* wi, float* vl,
+                           lapack_int ldvl, float* vr, lapack_int ldvr,
+                           lapack_int* ilo, lapack_int* ihi, float* scale,
+                           float* abnrm, float* rconde, float* rcondv );
+lapack_int LAPACKE_dgeevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n, double* a,
+                           lapack_int lda, double* wr, double* wi, double* vl,
+                           lapack_int ldvl, double* vr, lapack_int ldvr,
+                           lapack_int* ilo, lapack_int* ihi, double* scale,
+                           double* abnrm, double* rconde, double* rcondv );
+lapack_int LAPACKE_cgeevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* w, lapack_complex_float* vl,
+                           lapack_int ldvl, lapack_complex_float* vr,
+                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
+                           float* scale, float* abnrm, float* rconde,
+                           float* rcondv );
+lapack_int LAPACKE_zgeevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* w, lapack_complex_double* vl,
+                           lapack_int ldvl, lapack_complex_double* vr,
+                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
+                           double* scale, double* abnrm, double* rconde,
+                           double* rcondv );
+
+lapack_int LAPACKE_sgehrd( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, float* a, lapack_int lda,
+                           float* tau );
+lapack_int LAPACKE_dgehrd( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, double* a, lapack_int lda,
+                           double* tau );
+lapack_int LAPACKE_cgehrd( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* tau );
+lapack_int LAPACKE_zgehrd( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgejsv( int matrix_order, char joba, char jobu, char jobv,
+                           char jobr, char jobt, char jobp, lapack_int m,
+                           lapack_int n, float* a, lapack_int lda, float* sva,
+                           float* u, lapack_int ldu, float* v, lapack_int ldv,
+                           float* stat, lapack_int* istat );
+lapack_int LAPACKE_dgejsv( int matrix_order, char joba, char jobu, char jobv,
+                           char jobr, char jobt, char jobp, lapack_int m,
+                           lapack_int n, double* a, lapack_int lda, double* sva,
+                           double* u, lapack_int ldu, double* v, lapack_int ldv,
+                           double* stat, lapack_int* istat );
+
+lapack_int LAPACKE_sgelq2( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgelq2( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgelq2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgelq2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgelqf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgelqf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgelqf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgelqf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgels( int matrix_order, char trans, lapack_int m,
+                          lapack_int n, lapack_int nrhs, float* a,
+                          lapack_int lda, float* b, lapack_int ldb );
+lapack_int LAPACKE_dgels( int matrix_order, char trans, lapack_int m,
+                          lapack_int n, lapack_int nrhs, double* a,
+                          lapack_int lda, double* b, lapack_int ldb );
+lapack_int LAPACKE_cgels( int matrix_order, char trans, lapack_int m,
+                          lapack_int n, lapack_int nrhs,
+                          lapack_complex_float* a, lapack_int lda,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zgels( int matrix_order, char trans, lapack_int m,
+                          lapack_int n, lapack_int nrhs,
+                          lapack_complex_double* a, lapack_int lda,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sgelsd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, float* s, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_dgelsd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, double* a, lapack_int lda,
+                           double* b, lapack_int ldb, double* s, double rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_cgelsd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, float* s, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_zgelsd( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, double* s, double rcond,
+                           lapack_int* rank );
+
+lapack_int LAPACKE_sgelss( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, float* s, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_dgelss( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, double* a, lapack_int lda,
+                           double* b, lapack_int ldb, double* s, double rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_cgelss( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, float* s, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_zgelss( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, double* s, double rcond,
+                           lapack_int* rank );
+
+lapack_int LAPACKE_sgelsy( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, lapack_int* jpvt, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_dgelsy( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, double* a, lapack_int lda,
+                           double* b, lapack_int ldb, lapack_int* jpvt,
+                           double rcond, lapack_int* rank );
+lapack_int LAPACKE_cgelsy( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, lapack_int* jpvt, float rcond,
+                           lapack_int* rank );
+lapack_int LAPACKE_zgelsy( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, lapack_int* jpvt, double rcond,
+                           lapack_int* rank );
+
+lapack_int LAPACKE_sgeqlf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgeqlf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgeqlf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqlf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgeqp3( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, lapack_int* jpvt,
+                           float* tau );
+lapack_int LAPACKE_dgeqp3( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, lapack_int* jpvt,
+                           double* tau );
+lapack_int LAPACKE_cgeqp3( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* jpvt, lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqp3( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* jpvt, lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgeqpf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, lapack_int* jpvt,
+                           float* tau );
+lapack_int LAPACKE_dgeqpf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, lapack_int* jpvt,
+                           double* tau );
+lapack_int LAPACKE_cgeqpf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* jpvt, lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqpf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* jpvt, lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgeqr2( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgeqr2( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgeqr2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqr2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgeqrf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgeqrf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgeqrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgeqrfp( int matrix_order, lapack_int m, lapack_int n,
+                            float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgeqrfp( int matrix_order, lapack_int m, lapack_int n,
+                            double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgeqrfp( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* tau );
+lapack_int LAPACKE_zgeqrfp( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgerfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           const float* af, lapack_int ldaf,
+                           const lapack_int* ipiv, const float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dgerfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           const double* af, lapack_int ldaf,
+                           const lapack_int* ipiv, const double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cgerfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zgerfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sgerfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int nrhs, const float* a,
+                            lapack_int lda, const float* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const float* r,
+                            const float* c, const float* b, lapack_int ldb,
+                            float* x, lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dgerfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int nrhs, const double* a,
+                            lapack_int lda, const double* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const double* r,
+                            const double* c, const double* b, lapack_int ldb,
+                            double* x, lapack_int ldx, double* rcond,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+lapack_int LAPACKE_cgerfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_float* a, lapack_int lda,
+                            const lapack_complex_float* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const float* r,
+                            const float* c, const lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* x,
+                            lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_zgerfsx( int matrix_order, char trans, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const double* r,
+                            const double* c, const lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* x,
+                            lapack_int ldx, double* rcond, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+
+lapack_int LAPACKE_sgerqf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dgerqf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_cgerqf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zgerqf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_sgesdd( int matrix_order, char jobz, lapack_int m,
+                           lapack_int n, float* a, lapack_int lda, float* s,
+                           float* u, lapack_int ldu, float* vt,
+                           lapack_int ldvt );
+lapack_int LAPACKE_dgesdd( int matrix_order, char jobz, lapack_int m,
+                           lapack_int n, double* a, lapack_int lda, double* s,
+                           double* u, lapack_int ldu, double* vt,
+                           lapack_int ldvt );
+lapack_int LAPACKE_cgesdd( int matrix_order, char jobz, lapack_int m,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, float* s, lapack_complex_float* u,
+                           lapack_int ldu, lapack_complex_float* vt,
+                           lapack_int ldvt );
+lapack_int LAPACKE_zgesdd( int matrix_order, char jobz, lapack_int m,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, double* s, lapack_complex_double* u,
+                           lapack_int ldu, lapack_complex_double* vt,
+                           lapack_int ldvt );
+
+lapack_int LAPACKE_sgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          float* a, lapack_int lda, lapack_int* ipiv, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          double* a, lapack_int lda, lapack_int* ipiv,
+                          double* b, lapack_int ldb );
+lapack_int LAPACKE_cgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          lapack_complex_float* a, lapack_int lda,
+                          lapack_int* ipiv, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          lapack_complex_double* a, lapack_int lda,
+                          lapack_int* ipiv, lapack_complex_double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dsgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                           double* a, lapack_int lda, lapack_int* ipiv,
+                           double* b, lapack_int ldb, double* x, lapack_int ldx,
+                           lapack_int* iter );
+lapack_int LAPACKE_zcgesv( int matrix_order, lapack_int n, lapack_int nrhs,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ipiv, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* x,
+                           lapack_int ldx, lapack_int* iter );
+
+lapack_int LAPACKE_sgesvd( int matrix_order, char jobu, char jobvt,
+                           lapack_int m, lapack_int n, float* a, lapack_int lda,
+                           float* s, float* u, lapack_int ldu, float* vt,
+                           lapack_int ldvt, float* superb );
+lapack_int LAPACKE_dgesvd( int matrix_order, char jobu, char jobvt,
+                           lapack_int m, lapack_int n, double* a,
+                           lapack_int lda, double* s, double* u, lapack_int ldu,
+                           double* vt, lapack_int ldvt, double* superb );
+lapack_int LAPACKE_cgesvd( int matrix_order, char jobu, char jobvt,
+                           lapack_int m, lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, float* s, lapack_complex_float* u,
+                           lapack_int ldu, lapack_complex_float* vt,
+                           lapack_int ldvt, float* superb );
+lapack_int LAPACKE_zgesvd( int matrix_order, char jobu, char jobvt,
+                           lapack_int m, lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, double* s, lapack_complex_double* u,
+                           lapack_int ldu, lapack_complex_double* vt,
+                           lapack_int ldvt, double* superb );
+
+lapack_int LAPACKE_sgesvj( int matrix_order, char joba, char jobu, char jobv,
+                           lapack_int m, lapack_int n, float* a, lapack_int lda,
+                           float* sva, lapack_int mv, float* v, lapack_int ldv,
+                           float* stat );
+lapack_int LAPACKE_dgesvj( int matrix_order, char joba, char jobu, char jobv,
+                           lapack_int m, lapack_int n, double* a,
+                           lapack_int lda, double* sva, lapack_int mv,
+                           double* v, lapack_int ldv, double* stat );
+
+lapack_int LAPACKE_sgesvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs, float* a,
+                           lapack_int lda, float* af, lapack_int ldaf,
+                           lapack_int* ipiv, char* equed, float* r, float* c,
+                           float* b, lapack_int ldb, float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr,
+                           float* rpivot );
+lapack_int LAPACKE_dgesvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs, double* a,
+                           lapack_int lda, double* af, lapack_int ldaf,
+                           lapack_int* ipiv, char* equed, double* r, double* c,
+                           double* b, lapack_int ldb, double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr,
+                           double* rpivot );
+lapack_int LAPACKE_cgesvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* af, lapack_int ldaf,
+                           lapack_int* ipiv, char* equed, float* r, float* c,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr,
+                           float* rpivot );
+lapack_int LAPACKE_zgesvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* af, lapack_int ldaf,
+                           lapack_int* ipiv, char* equed, double* r, double* c,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr,
+                           double* rpivot );
+
+lapack_int LAPACKE_sgesvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int nrhs, float* a,
+                            lapack_int lda, float* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, float* r, float* c,
+                            float* b, lapack_int ldb, float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dgesvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int nrhs, double* a,
+                            lapack_int lda, double* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, double* r, double* c,
+                            double* b, lapack_int ldb, double* x,
+                            lapack_int ldx, double* rcond, double* rpvgrw,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+lapack_int LAPACKE_cgesvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, float* r, float* c,
+                            lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_zgesvxx( int matrix_order, char fact, char trans,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, double* r, double* c,
+                            lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* rpvgrw, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+
+lapack_int LAPACKE_sgetf2( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_dgetf2( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_cgetf2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_zgetf2( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ipiv );
+
+lapack_int LAPACKE_sgetrf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_dgetrf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_cgetrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_zgetrf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ipiv );
+
+lapack_int LAPACKE_sgetri( int matrix_order, lapack_int n, float* a,
+                           lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_dgetri( int matrix_order, lapack_int n, double* a,
+                           lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_cgetri( int matrix_order, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           const lapack_int* ipiv );
+lapack_int LAPACKE_zgetri( int matrix_order, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           const lapack_int* ipiv );
+
+lapack_int LAPACKE_sgetrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dgetrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_cgetrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zgetrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sggbak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const float* lscale,
+                           const float* rscale, lapack_int m, float* v,
+                           lapack_int ldv );
+lapack_int LAPACKE_dggbak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const double* lscale,
+                           const double* rscale, lapack_int m, double* v,
+                           lapack_int ldv );
+lapack_int LAPACKE_cggbak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const float* lscale,
+                           const float* rscale, lapack_int m,
+                           lapack_complex_float* v, lapack_int ldv );
+lapack_int LAPACKE_zggbak( int matrix_order, char job, char side, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, const double* lscale,
+                           const double* rscale, lapack_int m,
+                           lapack_complex_double* v, lapack_int ldv );
+
+lapack_int LAPACKE_sggbal( int matrix_order, char job, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb,
+                           lapack_int* ilo, lapack_int* ihi, float* lscale,
+                           float* rscale );
+lapack_int LAPACKE_dggbal( int matrix_order, char job, lapack_int n, double* a,
+                           lapack_int lda, double* b, lapack_int ldb,
+                           lapack_int* ilo, lapack_int* ihi, double* lscale,
+                           double* rscale );
+lapack_int LAPACKE_cggbal( int matrix_order, char job, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_int* ilo, lapack_int* ihi, float* lscale,
+                           float* rscale );
+lapack_int LAPACKE_zggbal( int matrix_order, char job, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_int* ilo, lapack_int* ihi, double* lscale,
+                           double* rscale );
+
+lapack_int LAPACKE_sgges( int matrix_order, char jobvsl, char jobvsr, char sort,
+                          LAPACK_S_SELECT3 selctg, lapack_int n, float* a,
+                          lapack_int lda, float* b, lapack_int ldb,
+                          lapack_int* sdim, float* alphar, float* alphai,
+                          float* beta, float* vsl, lapack_int ldvsl, float* vsr,
+                          lapack_int ldvsr );
+lapack_int LAPACKE_dgges( int matrix_order, char jobvsl, char jobvsr, char sort,
+                          LAPACK_D_SELECT3 selctg, lapack_int n, double* a,
+                          lapack_int lda, double* b, lapack_int ldb,
+                          lapack_int* sdim, double* alphar, double* alphai,
+                          double* beta, double* vsl, lapack_int ldvsl,
+                          double* vsr, lapack_int ldvsr );
+lapack_int LAPACKE_cgges( int matrix_order, char jobvsl, char jobvsr, char sort,
+                          LAPACK_C_SELECT2 selctg, lapack_int n,
+                          lapack_complex_float* a, lapack_int lda,
+                          lapack_complex_float* b, lapack_int ldb,
+                          lapack_int* sdim, lapack_complex_float* alpha,
+                          lapack_complex_float* beta, lapack_complex_float* vsl,
+                          lapack_int ldvsl, lapack_complex_float* vsr,
+                          lapack_int ldvsr );
+lapack_int LAPACKE_zgges( int matrix_order, char jobvsl, char jobvsr, char sort,
+                          LAPACK_Z_SELECT2 selctg, lapack_int n,
+                          lapack_complex_double* a, lapack_int lda,
+                          lapack_complex_double* b, lapack_int ldb,
+                          lapack_int* sdim, lapack_complex_double* alpha,
+                          lapack_complex_double* beta,
+                          lapack_complex_double* vsl, lapack_int ldvsl,
+                          lapack_complex_double* vsr, lapack_int ldvsr );
+
+lapack_int LAPACKE_sggesx( int matrix_order, char jobvsl, char jobvsr,
+                           char sort, LAPACK_S_SELECT3 selctg, char sense,
+                           lapack_int n, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, lapack_int* sdim, float* alphar,
+                           float* alphai, float* beta, float* vsl,
+                           lapack_int ldvsl, float* vsr, lapack_int ldvsr,
+                           float* rconde, float* rcondv );
+lapack_int LAPACKE_dggesx( int matrix_order, char jobvsl, char jobvsr,
+                           char sort, LAPACK_D_SELECT3 selctg, char sense,
+                           lapack_int n, double* a, lapack_int lda, double* b,
+                           lapack_int ldb, lapack_int* sdim, double* alphar,
+                           double* alphai, double* beta, double* vsl,
+                           lapack_int ldvsl, double* vsr, lapack_int ldvsr,
+                           double* rconde, double* rcondv );
+lapack_int LAPACKE_cggesx( int matrix_order, char jobvsl, char jobvsr,
+                           char sort, LAPACK_C_SELECT2 selctg, char sense,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, lapack_int* sdim,
+                           lapack_complex_float* alpha,
+                           lapack_complex_float* beta,
+                           lapack_complex_float* vsl, lapack_int ldvsl,
+                           lapack_complex_float* vsr, lapack_int ldvsr,
+                           float* rconde, float* rcondv );
+lapack_int LAPACKE_zggesx( int matrix_order, char jobvsl, char jobvsr,
+                           char sort, LAPACK_Z_SELECT2 selctg, char sense,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, lapack_int* sdim,
+                           lapack_complex_double* alpha,
+                           lapack_complex_double* beta,
+                           lapack_complex_double* vsl, lapack_int ldvsl,
+                           lapack_complex_double* vsr, lapack_int ldvsr,
+                           double* rconde, double* rcondv );
+
+lapack_int LAPACKE_sggev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, float* a, lapack_int lda, float* b,
+                          lapack_int ldb, float* alphar, float* alphai,
+                          float* beta, float* vl, lapack_int ldvl, float* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_dggev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, double* a, lapack_int lda, double* b,
+                          lapack_int ldb, double* alphar, double* alphai,
+                          double* beta, double* vl, lapack_int ldvl, double* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_cggev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, lapack_complex_float* a, lapack_int lda,
+                          lapack_complex_float* b, lapack_int ldb,
+                          lapack_complex_float* alpha,
+                          lapack_complex_float* beta, lapack_complex_float* vl,
+                          lapack_int ldvl, lapack_complex_float* vr,
+                          lapack_int ldvr );
+lapack_int LAPACKE_zggev( int matrix_order, char jobvl, char jobvr,
+                          lapack_int n, lapack_complex_double* a,
+                          lapack_int lda, lapack_complex_double* b,
+                          lapack_int ldb, lapack_complex_double* alpha,
+                          lapack_complex_double* beta,
+                          lapack_complex_double* vl, lapack_int ldvl,
+                          lapack_complex_double* vr, lapack_int ldvr );
+
+lapack_int LAPACKE_sggevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb,
+                           float* alphar, float* alphai, float* beta, float* vl,
+                           lapack_int ldvl, float* vr, lapack_int ldvr,
+                           lapack_int* ilo, lapack_int* ihi, float* lscale,
+                           float* rscale, float* abnrm, float* bbnrm,
+                           float* rconde, float* rcondv );
+lapack_int LAPACKE_dggevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n, double* a,
+                           lapack_int lda, double* b, lapack_int ldb,
+                           double* alphar, double* alphai, double* beta,
+                           double* vl, lapack_int ldvl, double* vr,
+                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
+                           double* lscale, double* rscale, double* abnrm,
+                           double* bbnrm, double* rconde, double* rcondv );
+lapack_int LAPACKE_cggevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* alpha,
+                           lapack_complex_float* beta, lapack_complex_float* vl,
+                           lapack_int ldvl, lapack_complex_float* vr,
+                           lapack_int ldvr, lapack_int* ilo, lapack_int* ihi,
+                           float* lscale, float* rscale, float* abnrm,
+                           float* bbnrm, float* rconde, float* rcondv );
+lapack_int LAPACKE_zggevx( int matrix_order, char balanc, char jobvl,
+                           char jobvr, char sense, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* alpha,
+                           lapack_complex_double* beta,
+                           lapack_complex_double* vl, lapack_int ldvl,
+                           lapack_complex_double* vr, lapack_int ldvr,
+                           lapack_int* ilo, lapack_int* ihi, double* lscale,
+                           double* rscale, double* abnrm, double* bbnrm,
+                           double* rconde, double* rcondv );
+
+lapack_int LAPACKE_sggglm( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, float* d, float* x, float* y );
+lapack_int LAPACKE_dggglm( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, double* a, lapack_int lda, double* b,
+                           lapack_int ldb, double* d, double* x, double* y );
+lapack_int LAPACKE_cggglm( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* d,
+                           lapack_complex_float* x, lapack_complex_float* y );
+lapack_int LAPACKE_zggglm( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* d,
+                           lapack_complex_double* x, lapack_complex_double* y );
+
+lapack_int LAPACKE_sgghrd( int matrix_order, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           float* a, lapack_int lda, float* b, lapack_int ldb,
+                           float* q, lapack_int ldq, float* z, lapack_int ldz );
+lapack_int LAPACKE_dgghrd( int matrix_order, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           double* a, lapack_int lda, double* b, lapack_int ldb,
+                           double* q, lapack_int ldq, double* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_cgghrd( int matrix_order, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zgghrd( int matrix_order, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sgglse( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int p, float* a, lapack_int lda, float* b,
+                           lapack_int ldb, float* c, float* d, float* x );
+lapack_int LAPACKE_dgglse( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int p, double* a, lapack_int lda, double* b,
+                           lapack_int ldb, double* c, double* d, double* x );
+lapack_int LAPACKE_cgglse( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int p, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* c,
+                           lapack_complex_float* d, lapack_complex_float* x );
+lapack_int LAPACKE_zgglse( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int p, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* c,
+                           lapack_complex_double* d, lapack_complex_double* x );
+
+lapack_int LAPACKE_sggqrf( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, float* a, lapack_int lda, float* taua,
+                           float* b, lapack_int ldb, float* taub );
+lapack_int LAPACKE_dggqrf( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, double* a, lapack_int lda,
+                           double* taua, double* b, lapack_int ldb,
+                           double* taub );
+lapack_int LAPACKE_cggqrf( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* taua,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* taub );
+lapack_int LAPACKE_zggqrf( int matrix_order, lapack_int n, lapack_int m,
+                           lapack_int p, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* taua,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* taub );
+
+lapack_int LAPACKE_sggrqf( int matrix_order, lapack_int m, lapack_int p,
+                           lapack_int n, float* a, lapack_int lda, float* taua,
+                           float* b, lapack_int ldb, float* taub );
+lapack_int LAPACKE_dggrqf( int matrix_order, lapack_int m, lapack_int p,
+                           lapack_int n, double* a, lapack_int lda,
+                           double* taua, double* b, lapack_int ldb,
+                           double* taub );
+lapack_int LAPACKE_cggrqf( int matrix_order, lapack_int m, lapack_int p,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* taua,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* taub );
+lapack_int LAPACKE_zggrqf( int matrix_order, lapack_int m, lapack_int p,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* taua,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* taub );
+
+lapack_int LAPACKE_sggsvd( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int n, lapack_int p,
+                           lapack_int* k, lapack_int* l, float* a,
+                           lapack_int lda, float* b, lapack_int ldb,
+                           float* alpha, float* beta, float* u, lapack_int ldu,
+                           float* v, lapack_int ldv, float* q, lapack_int ldq,
+                           lapack_int* iwork );
+lapack_int LAPACKE_dggsvd( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int n, lapack_int p,
+                           lapack_int* k, lapack_int* l, double* a,
+                           lapack_int lda, double* b, lapack_int ldb,
+                           double* alpha, double* beta, double* u,
+                           lapack_int ldu, double* v, lapack_int ldv, double* q,
+                           lapack_int ldq, lapack_int* iwork );
+lapack_int LAPACKE_cggsvd( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int n, lapack_int p,
+                           lapack_int* k, lapack_int* l,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           float* alpha, float* beta, lapack_complex_float* u,
+                           lapack_int ldu, lapack_complex_float* v,
+                           lapack_int ldv, lapack_complex_float* q,
+                           lapack_int ldq, lapack_int* iwork );
+lapack_int LAPACKE_zggsvd( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int n, lapack_int p,
+                           lapack_int* k, lapack_int* l,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           double* alpha, double* beta,
+                           lapack_complex_double* u, lapack_int ldu,
+                           lapack_complex_double* v, lapack_int ldv,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_int* iwork );
+
+lapack_int LAPACKE_sggsvp( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb, float tola,
+                           float tolb, lapack_int* k, lapack_int* l, float* u,
+                           lapack_int ldu, float* v, lapack_int ldv, float* q,
+                           lapack_int ldq );
+lapack_int LAPACKE_dggsvp( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n, double* a,
+                           lapack_int lda, double* b, lapack_int ldb,
+                           double tola, double tolb, lapack_int* k,
+                           lapack_int* l, double* u, lapack_int ldu, double* v,
+                           lapack_int ldv, double* q, lapack_int ldq );
+lapack_int LAPACKE_cggsvp( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb, float tola,
+                           float tolb, lapack_int* k, lapack_int* l,
+                           lapack_complex_float* u, lapack_int ldu,
+                           lapack_complex_float* v, lapack_int ldv,
+                           lapack_complex_float* q, lapack_int ldq );
+lapack_int LAPACKE_zggsvp( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           double tola, double tolb, lapack_int* k,
+                           lapack_int* l, lapack_complex_double* u,
+                           lapack_int ldu, lapack_complex_double* v,
+                           lapack_int ldv, lapack_complex_double* q,
+                           lapack_int ldq );
+
+lapack_int LAPACKE_sgtcon( char norm, lapack_int n, const float* dl,
+                           const float* d, const float* du, const float* du2,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_dgtcon( char norm, lapack_int n, const double* dl,
+                           const double* d, const double* du, const double* du2,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+lapack_int LAPACKE_cgtcon( char norm, lapack_int n,
+                           const lapack_complex_float* dl,
+                           const lapack_complex_float* d,
+                           const lapack_complex_float* du,
+                           const lapack_complex_float* du2,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zgtcon( char norm, lapack_int n,
+                           const lapack_complex_double* dl,
+                           const lapack_complex_double* d,
+                           const lapack_complex_double* du,
+                           const lapack_complex_double* du2,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_sgtrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const float* dl, const float* d,
+                           const float* du, const float* dlf, const float* df,
+                           const float* duf, const float* du2,
+                           const lapack_int* ipiv, const float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dgtrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const double* dl, const double* d,
+                           const double* du, const double* dlf,
+                           const double* df, const double* duf,
+                           const double* du2, const lapack_int* ipiv,
+                           const double* b, lapack_int ldb, double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+lapack_int LAPACKE_cgtrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* dl,
+                           const lapack_complex_float* d,
+                           const lapack_complex_float* du,
+                           const lapack_complex_float* dlf,
+                           const lapack_complex_float* df,
+                           const lapack_complex_float* duf,
+                           const lapack_complex_float* du2,
+                           const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zgtrfs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* dl,
+                           const lapack_complex_double* d,
+                           const lapack_complex_double* du,
+                           const lapack_complex_double* dlf,
+                           const lapack_complex_double* df,
+                           const lapack_complex_double* duf,
+                           const lapack_complex_double* du2,
+                           const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          float* dl, float* d, float* du, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          double* dl, double* d, double* du, double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_cgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          lapack_complex_float* dl, lapack_complex_float* d,
+                          lapack_complex_float* du, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zgtsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          lapack_complex_double* dl, lapack_complex_double* d,
+                          lapack_complex_double* du, lapack_complex_double* b,
+                          lapack_int ldb );
+
+lapack_int LAPACKE_sgtsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs, const float* dl,
+                           const float* d, const float* du, float* dlf,
+                           float* df, float* duf, float* du2, lapack_int* ipiv,
+                           const float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dgtsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs, const double* dl,
+                           const double* d, const double* du, double* dlf,
+                           double* df, double* duf, double* du2,
+                           lapack_int* ipiv, const double* b, lapack_int ldb,
+                           double* x, lapack_int ldx, double* rcond,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cgtsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* dl,
+                           const lapack_complex_float* d,
+                           const lapack_complex_float* du,
+                           lapack_complex_float* dlf, lapack_complex_float* df,
+                           lapack_complex_float* duf, lapack_complex_float* du2,
+                           lapack_int* ipiv, const lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zgtsvx( int matrix_order, char fact, char trans,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* dl,
+                           const lapack_complex_double* d,
+                           const lapack_complex_double* du,
+                           lapack_complex_double* dlf,
+                           lapack_complex_double* df,
+                           lapack_complex_double* duf,
+                           lapack_complex_double* du2, lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_sgttrf( lapack_int n, float* dl, float* d, float* du,
+                           float* du2, lapack_int* ipiv );
+lapack_int LAPACKE_dgttrf( lapack_int n, double* dl, double* d, double* du,
+                           double* du2, lapack_int* ipiv );
+lapack_int LAPACKE_cgttrf( lapack_int n, lapack_complex_float* dl,
+                           lapack_complex_float* d, lapack_complex_float* du,
+                           lapack_complex_float* du2, lapack_int* ipiv );
+lapack_int LAPACKE_zgttrf( lapack_int n, lapack_complex_double* dl,
+                           lapack_complex_double* d, lapack_complex_double* du,
+                           lapack_complex_double* du2, lapack_int* ipiv );
+
+lapack_int LAPACKE_sgttrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const float* dl, const float* d,
+                           const float* du, const float* du2,
+                           const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dgttrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const double* dl, const double* d,
+                           const double* du, const double* du2,
+                           const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_cgttrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* dl,
+                           const lapack_complex_float* d,
+                           const lapack_complex_float* du,
+                           const lapack_complex_float* du2,
+                           const lapack_int* ipiv, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zgttrs( int matrix_order, char trans, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* dl,
+                           const lapack_complex_double* d,
+                           const lapack_complex_double* du,
+                           const lapack_complex_double* du2,
+                           const lapack_int* ipiv, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_chbev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int kd, lapack_complex_float* ab,
+                          lapack_int ldab, float* w, lapack_complex_float* z,
+                          lapack_int ldz );
+lapack_int LAPACKE_zhbev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int kd, lapack_complex_double* ab,
+                          lapack_int ldab, double* w, lapack_complex_double* z,
+                          lapack_int ldz );
+
+lapack_int LAPACKE_chbevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_float* ab,
+                           lapack_int ldab, float* w, lapack_complex_float* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_zhbevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_double* ab,
+                           lapack_int ldab, double* w, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_chbevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int kd,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* q, lapack_int ldq, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* ifail );
+lapack_int LAPACKE_zhbevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int kd,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* q, lapack_int ldq, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_chbgst( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_complex_float* bb, lapack_int ldbb,
+                           lapack_complex_float* x, lapack_int ldx );
+lapack_int LAPACKE_zhbgst( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_complex_double* bb, lapack_int ldbb,
+                           lapack_complex_double* x, lapack_int ldx );
+
+lapack_int LAPACKE_chbgv( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int ka, lapack_int kb,
+                          lapack_complex_float* ab, lapack_int ldab,
+                          lapack_complex_float* bb, lapack_int ldbb, float* w,
+                          lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhbgv( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int ka, lapack_int kb,
+                          lapack_complex_double* ab, lapack_int ldab,
+                          lapack_complex_double* bb, lapack_int ldbb, double* w,
+                          lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* bb, lapack_int ldbb, float* w,
+                           lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* bb, lapack_int ldbb,
+                           double* w, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_chbgvx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int ka, lapack_int kb,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* bb, lapack_int ldbb,
+                           lapack_complex_float* q, lapack_int ldq, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* ifail );
+lapack_int LAPACKE_zhbgvx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int ka, lapack_int kb,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* bb, lapack_int ldbb,
+                           lapack_complex_double* q, lapack_int ldq, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_chbtrd( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_float* ab,
+                           lapack_int ldab, float* d, float* e,
+                           lapack_complex_float* q, lapack_int ldq );
+lapack_int LAPACKE_zhbtrd( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_double* ab,
+                           lapack_int ldab, double* d, double* e,
+                           lapack_complex_double* q, lapack_int ldq );
+
+lapack_int LAPACKE_checon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zhecon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_cheequb( int matrix_order, char uplo, lapack_int n,
+                            const lapack_complex_float* a, lapack_int lda,
+                            float* s, float* scond, float* amax );
+lapack_int LAPACKE_zheequb( int matrix_order, char uplo, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_cheev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_complex_float* a, lapack_int lda, float* w );
+lapack_int LAPACKE_zheev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_complex_double* a, lapack_int lda, double* w );
+
+lapack_int LAPACKE_cheevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda, float* w );
+lapack_int LAPACKE_zheevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           double* w );
+
+lapack_int LAPACKE_cheevr( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, float vl, float vu, lapack_int il,
+                           lapack_int iu, float abstol, lapack_int* m, float* w,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_int* isuppz );
+lapack_int LAPACKE_zheevr( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, double vl, double vu, lapack_int il,
+                           lapack_int iu, double abstol, lapack_int* m,
+                           double* w, lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* isuppz );
+
+lapack_int LAPACKE_cheevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, float vl, float vu, lapack_int il,
+                           lapack_int iu, float abstol, lapack_int* m, float* w,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_zheevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, double vl, double vu, lapack_int il,
+                           lapack_int iu, double abstol, lapack_int* m,
+                           double* w, lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_chegst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zhegst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_chegv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, lapack_complex_float* a,
+                          lapack_int lda, lapack_complex_float* b,
+                          lapack_int ldb, float* w );
+lapack_int LAPACKE_zhegv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, lapack_complex_double* a,
+                          lapack_int lda, lapack_complex_double* b,
+                          lapack_int ldb, double* w );
+
+lapack_int LAPACKE_chegvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, float* w );
+lapack_int LAPACKE_zhegvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, double* w );
+
+lapack_int LAPACKE_chegvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* ifail );
+lapack_int LAPACKE_zhegvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_cherfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zherfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_cherfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_float* a, lapack_int lda,
+                            const lapack_complex_float* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const float* s,
+                            const lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_zherfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const double* s,
+                            const lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+
+lapack_int LAPACKE_chesv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* a,
+                          lapack_int lda, lapack_int* ipiv,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zhesv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* a,
+                          lapack_int lda, lapack_int* ipiv,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_chesvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* af,
+                           lapack_int ldaf, lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zhesvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* af,
+                           lapack_int ldaf, lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_chesvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, float* s,
+                            lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_zhesvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, double* s,
+                            lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* rpvgrw, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+
+lapack_int LAPACKE_chetrd( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda, float* d,
+                           float* e, lapack_complex_float* tau );
+lapack_int LAPACKE_zhetrd( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda, double* d,
+                           double* e, lapack_complex_double* tau );
+
+lapack_int LAPACKE_chetrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_zhetrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ipiv );
+
+lapack_int LAPACKE_chetri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           const lapack_int* ipiv );
+lapack_int LAPACKE_zhetri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           const lapack_int* ipiv );
+
+lapack_int LAPACKE_chetrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zhetrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_chfrk( int matrix_order, char transr, char uplo, char trans,
+                          lapack_int n, lapack_int k, float alpha,
+                          const lapack_complex_float* a, lapack_int lda,
+                          float beta, lapack_complex_float* c );
+lapack_int LAPACKE_zhfrk( int matrix_order, char transr, char uplo, char trans,
+                          lapack_int n, lapack_int k, double alpha,
+                          const lapack_complex_double* a, lapack_int lda,
+                          double beta, lapack_complex_double* c );
+
+lapack_int LAPACKE_shgeqz( int matrix_order, char job, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           float* h, lapack_int ldh, float* t, lapack_int ldt,
+                           float* alphar, float* alphai, float* beta, float* q,
+                           lapack_int ldq, float* z, lapack_int ldz );
+lapack_int LAPACKE_dhgeqz( int matrix_order, char job, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           double* h, lapack_int ldh, double* t, lapack_int ldt,
+                           double* alphar, double* alphai, double* beta,
+                           double* q, lapack_int ldq, double* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_chgeqz( int matrix_order, char job, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           lapack_complex_float* h, lapack_int ldh,
+                           lapack_complex_float* t, lapack_int ldt,
+                           lapack_complex_float* alpha,
+                           lapack_complex_float* beta, lapack_complex_float* q,
+                           lapack_int ldq, lapack_complex_float* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_zhgeqz( int matrix_order, char job, char compq, char compz,
+                           lapack_int n, lapack_int ilo, lapack_int ihi,
+                           lapack_complex_double* h, lapack_int ldh,
+                           lapack_complex_double* t, lapack_int ldt,
+                           lapack_complex_double* alpha,
+                           lapack_complex_double* beta,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chpcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zhpcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_chpev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_complex_float* ap, float* w,
+                          lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhpev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_complex_double* ap, double* w,
+                          lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chpevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_complex_float* ap, float* w,
+                           lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhpevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_complex_double* ap, double* w,
+                           lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chpevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_float* ap, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* ifail );
+lapack_int LAPACKE_zhpevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_complex_double* ap, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_chpgst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, lapack_complex_float* ap,
+                           const lapack_complex_float* bp );
+lapack_int LAPACKE_zhpgst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, lapack_complex_double* ap,
+                           const lapack_complex_double* bp );
+
+lapack_int LAPACKE_chpgv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, lapack_complex_float* ap,
+                          lapack_complex_float* bp, float* w,
+                          lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhpgv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, lapack_complex_double* ap,
+                          lapack_complex_double* bp, double* w,
+                          lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chpgvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, lapack_complex_float* ap,
+                           lapack_complex_float* bp, float* w,
+                           lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zhpgvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, lapack_complex_double* ap,
+                           lapack_complex_double* bp, double* w,
+                           lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_chpgvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n,
+                           lapack_complex_float* ap, lapack_complex_float* bp,
+                           float vl, float vu, lapack_int il, lapack_int iu,
+                           float abstol, lapack_int* m, float* w,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_zhpgvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n,
+                           lapack_complex_double* ap, lapack_complex_double* bp,
+                           double vl, double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_chprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           const lapack_complex_float* afp,
+                           const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zhprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           const lapack_complex_double* afp,
+                           const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_chpsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* ap,
+                          lapack_int* ipiv, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zhpsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* ap,
+                          lapack_int* ipiv, lapack_complex_double* b,
+                          lapack_int ldb );
+
+lapack_int LAPACKE_chpsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           lapack_complex_float* afp, lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zhpsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           lapack_complex_double* afp, lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_chptrd( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap, float* d, float* e,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zhptrd( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap, double* d, double* e,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_chptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap, lapack_int* ipiv );
+lapack_int LAPACKE_zhptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap, lapack_int* ipiv );
+
+lapack_int LAPACKE_chptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap, const lapack_int* ipiv );
+lapack_int LAPACKE_zhptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap, const lapack_int* ipiv );
+
+lapack_int LAPACKE_chptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           const lapack_int* ipiv, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zhptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           const lapack_int* ipiv, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_shsein( int matrix_order, char job, char eigsrc, char initv,
+                           lapack_logical* select, lapack_int n, const float* h,
+                           lapack_int ldh, float* wr, const float* wi,
+                           float* vl, lapack_int ldvl, float* vr,
+                           lapack_int ldvr, lapack_int mm, lapack_int* m,
+                           lapack_int* ifaill, lapack_int* ifailr );
+lapack_int LAPACKE_dhsein( int matrix_order, char job, char eigsrc, char initv,
+                           lapack_logical* select, lapack_int n,
+                           const double* h, lapack_int ldh, double* wr,
+                           const double* wi, double* vl, lapack_int ldvl,
+                           double* vr, lapack_int ldvr, lapack_int mm,
+                           lapack_int* m, lapack_int* ifaill,
+                           lapack_int* ifailr );
+lapack_int LAPACKE_chsein( int matrix_order, char job, char eigsrc, char initv,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_float* h, lapack_int ldh,
+                           lapack_complex_float* w, lapack_complex_float* vl,
+                           lapack_int ldvl, lapack_complex_float* vr,
+                           lapack_int ldvr, lapack_int mm, lapack_int* m,
+                           lapack_int* ifaill, lapack_int* ifailr );
+lapack_int LAPACKE_zhsein( int matrix_order, char job, char eigsrc, char initv,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_double* h, lapack_int ldh,
+                           lapack_complex_double* w, lapack_complex_double* vl,
+                           lapack_int ldvl, lapack_complex_double* vr,
+                           lapack_int ldvr, lapack_int mm, lapack_int* m,
+                           lapack_int* ifaill, lapack_int* ifailr );
+
+lapack_int LAPACKE_shseqr( int matrix_order, char job, char compz, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, float* h,
+                           lapack_int ldh, float* wr, float* wi, float* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_dhseqr( int matrix_order, char job, char compz, lapack_int n,
+                           lapack_int ilo, lapack_int ihi, double* h,
+                           lapack_int ldh, double* wr, double* wi, double* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_chseqr( int matrix_order, char job, char compz, lapack_int n,
+                           lapack_int ilo, lapack_int ihi,
+                           lapack_complex_float* h, lapack_int ldh,
+                           lapack_complex_float* w, lapack_complex_float* z,
+                           lapack_int ldz );
+lapack_int LAPACKE_zhseqr( int matrix_order, char job, char compz, lapack_int n,
+                           lapack_int ilo, lapack_int ihi,
+                           lapack_complex_double* h, lapack_int ldh,
+                           lapack_complex_double* w, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_clacgv( lapack_int n, lapack_complex_float* x,
+                           lapack_int incx );
+lapack_int LAPACKE_zlacgv( lapack_int n, lapack_complex_double* x,
+                           lapack_int incx );
+
+lapack_int LAPACKE_slacpy( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, const float* a, lapack_int lda, float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_dlacpy( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, const double* a, lapack_int lda, double* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_clacpy( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, const lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zlacpy( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, const lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_zlag2c( int matrix_order, lapack_int m, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_float* sa, lapack_int ldsa );
+
+lapack_int LAPACKE_slag2d( int matrix_order, lapack_int m, lapack_int n,
+                           const float* sa, lapack_int ldsa, double* a,
+                           lapack_int lda );
+
+lapack_int LAPACKE_dlag2s( int matrix_order, lapack_int m, lapack_int n,
+                           const double* a, lapack_int lda, float* sa,
+                           lapack_int ldsa );
+
+lapack_int LAPACKE_clag2z( int matrix_order, lapack_int m, lapack_int n,
+                           const lapack_complex_float* sa, lapack_int ldsa,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_slagge( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const float* d,
+                           float* a, lapack_int lda, lapack_int* iseed );
+lapack_int LAPACKE_dlagge( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const double* d,
+                           double* a, lapack_int lda, lapack_int* iseed );
+lapack_int LAPACKE_clagge( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const float* d,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* iseed );
+lapack_int LAPACKE_zlagge( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int kl, lapack_int ku, const double* d,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* iseed );
+
+float LAPACKE_slamch( char cmach );
+double LAPACKE_dlamch( char cmach );
+
+float LAPACKE_slange( int matrix_order, char norm, lapack_int m,
+                           lapack_int n, const float* a, lapack_int lda );
+double LAPACKE_dlange( int matrix_order, char norm, lapack_int m,
+                           lapack_int n, const double* a, lapack_int lda );
+float LAPACKE_clange( int matrix_order, char norm, lapack_int m,
+                           lapack_int n, const lapack_complex_float* a,
+                           lapack_int lda );
+double LAPACKE_zlange( int matrix_order, char norm, lapack_int m,
+                           lapack_int n, const lapack_complex_double* a,
+                           lapack_int lda );
+
+float LAPACKE_clanhe( int matrix_order, char norm, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda );
+double LAPACKE_zlanhe( int matrix_order, char norm, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda );
+
+float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n,
+                           const float* a, lapack_int lda );
+double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n,
+                           const double* a, lapack_int lda );
+float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda );
+double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda );
+
+float LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int m, lapack_int n, const float* a,
+                           lapack_int lda );
+double LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int m, lapack_int n, const double* a,
+                           lapack_int lda );
+float LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int m, lapack_int n, const lapack_complex_float* a,
+                           lapack_int lda );
+double LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int m, lapack_int n, const lapack_complex_double* a,
+                           lapack_int lda );
+
+
+lapack_int LAPACKE_slarfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, const float* v, lapack_int ldv,
+                           const float* t, lapack_int ldt, float* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_dlarfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, const double* v, lapack_int ldv,
+                           const double* t, lapack_int ldt, double* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_clarfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, const lapack_complex_float* v,
+                           lapack_int ldv, const lapack_complex_float* t,
+                           lapack_int ldt, lapack_complex_float* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_zlarfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, const lapack_complex_double* v,
+                           lapack_int ldv, const lapack_complex_double* t,
+                           lapack_int ldt, lapack_complex_double* c,
+                           lapack_int ldc );
+
+lapack_int LAPACKE_slarfg( lapack_int n, float* alpha, float* x,
+                           lapack_int incx, float* tau );
+lapack_int LAPACKE_dlarfg( lapack_int n, double* alpha, double* x,
+                           lapack_int incx, double* tau );
+lapack_int LAPACKE_clarfg( lapack_int n, lapack_complex_float* alpha,
+                           lapack_complex_float* x, lapack_int incx,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_zlarfg( lapack_int n, lapack_complex_double* alpha,
+                           lapack_complex_double* x, lapack_int incx,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_slarft( int matrix_order, char direct, char storev,
+                           lapack_int n, lapack_int k, const float* v,
+                           lapack_int ldv, const float* tau, float* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_dlarft( int matrix_order, char direct, char storev,
+                           lapack_int n, lapack_int k, const double* v,
+                           lapack_int ldv, const double* tau, double* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_clarft( int matrix_order, char direct, char storev,
+                           lapack_int n, lapack_int k,
+                           const lapack_complex_float* v, lapack_int ldv,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zlarft( int matrix_order, char direct, char storev,
+                           lapack_int n, lapack_int k,
+                           const lapack_complex_double* v, lapack_int ldv,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_slarfx( int matrix_order, char side, lapack_int m,
+                           lapack_int n, const float* v, float tau, float* c,
+                           lapack_int ldc, float* work );
+lapack_int LAPACKE_dlarfx( int matrix_order, char side, lapack_int m,
+                           lapack_int n, const double* v, double tau, double* c,
+                           lapack_int ldc, double* work );
+lapack_int LAPACKE_clarfx( int matrix_order, char side, lapack_int m,
+                           lapack_int n, const lapack_complex_float* v,
+                           lapack_complex_float tau, lapack_complex_float* c,
+                           lapack_int ldc, lapack_complex_float* work );
+lapack_int LAPACKE_zlarfx( int matrix_order, char side, lapack_int m,
+                           lapack_int n, const lapack_complex_double* v,
+                           lapack_complex_double tau, lapack_complex_double* c,
+                           lapack_int ldc, lapack_complex_double* work );
+
+lapack_int LAPACKE_slarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
+                           float* x );
+lapack_int LAPACKE_dlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
+                           double* x );
+lapack_int LAPACKE_clarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
+                           lapack_complex_float* x );
+lapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n,
+                           lapack_complex_double* x );
+
+lapack_int LAPACKE_slaset( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, float alpha, float beta, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dlaset( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, double alpha, double beta, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_claset( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, lapack_complex_float alpha,
+                           lapack_complex_float beta, lapack_complex_float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_zlaset( int matrix_order, char uplo, lapack_int m,
+                           lapack_int n, lapack_complex_double alpha,
+                           lapack_complex_double beta, lapack_complex_double* a,
+                           lapack_int lda );
+
+lapack_int LAPACKE_slasrt( char id, lapack_int n, float* d );
+lapack_int LAPACKE_dlasrt( char id, lapack_int n, double* d );
+
+lapack_int LAPACKE_slaswp( int matrix_order, lapack_int n, float* a,
+                           lapack_int lda, lapack_int k1, lapack_int k2,
+                           const lapack_int* ipiv, lapack_int incx );
+lapack_int LAPACKE_dlaswp( int matrix_order, lapack_int n, double* a,
+                           lapack_int lda, lapack_int k1, lapack_int k2,
+                           const lapack_int* ipiv, lapack_int incx );
+lapack_int LAPACKE_claswp( int matrix_order, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,
+                           lapack_int incx );
+lapack_int LAPACKE_zlaswp( int matrix_order, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int k1, lapack_int k2, const lapack_int* ipiv,
+                           lapack_int incx );
+
+lapack_int LAPACKE_slatms( int matrix_order, lapack_int m, lapack_int n,
+                           char dist, lapack_int* iseed, char sym, float* d,
+                           lapack_int mode, float cond, float dmax,
+                           lapack_int kl, lapack_int ku, char pack, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dlatms( int matrix_order, lapack_int m, lapack_int n,
+                           char dist, lapack_int* iseed, char sym, double* d,
+                           lapack_int mode, double cond, double dmax,
+                           lapack_int kl, lapack_int ku, char pack, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_clatms( int matrix_order, lapack_int m, lapack_int n,
+                           char dist, lapack_int* iseed, char sym, float* d,
+                           lapack_int mode, float cond, float dmax,
+                           lapack_int kl, lapack_int ku, char pack,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zlatms( int matrix_order, lapack_int m, lapack_int n,
+                           char dist, lapack_int* iseed, char sym, double* d,
+                           lapack_int mode, double cond, double dmax,
+                           lapack_int kl, lapack_int ku, char pack,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_slauum( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dlauum( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_clauum( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zlauum( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_sopgtr( int matrix_order, char uplo, lapack_int n,
+                           const float* ap, const float* tau, float* q,
+                           lapack_int ldq );
+lapack_int LAPACKE_dopgtr( int matrix_order, char uplo, lapack_int n,
+                           const double* ap, const double* tau, double* q,
+                           lapack_int ldq );
+
+lapack_int LAPACKE_sopmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n, const float* ap,
+                           const float* tau, float* c, lapack_int ldc );
+lapack_int LAPACKE_dopmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n, const double* ap,
+                           const double* tau, double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sorgbr( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int k, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorgbr( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int k, double* a,
+                           lapack_int lda, const double* tau );
+
+lapack_int LAPACKE_sorghr( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorghr( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, double* a, lapack_int lda,
+                           const double* tau );
+
+lapack_int LAPACKE_sorglq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorglq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, double* a, lapack_int lda,
+                           const double* tau );
+
+lapack_int LAPACKE_sorgql( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorgql( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, double* a, lapack_int lda,
+                           const double* tau );
+
+lapack_int LAPACKE_sorgqr( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorgqr( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, double* a, lapack_int lda,
+                           const double* tau );
+
+lapack_int LAPACKE_sorgrq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, float* a, lapack_int lda,
+                           const float* tau );
+lapack_int LAPACKE_dorgrq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, double* a, lapack_int lda,
+                           const double* tau );
+
+lapack_int LAPACKE_sorgtr( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda, const float* tau );
+lapack_int LAPACKE_dorgtr( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda, const double* tau );
+
+lapack_int LAPACKE_sormbr( int matrix_order, char vect, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const float* a, lapack_int lda, const float* tau,
+                           float* c, lapack_int ldc );
+lapack_int LAPACKE_dormbr( int matrix_order, char vect, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const double* a, lapack_int lda, const double* tau,
+                           double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormhr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, const float* a, lapack_int lda,
+                           const float* tau, float* c, lapack_int ldc );
+lapack_int LAPACKE_dormhr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, const double* a, lapack_int lda,
+                           const double* tau, double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormlq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const float* a, lapack_int lda, const float* tau,
+                           float* c, lapack_int ldc );
+lapack_int LAPACKE_dormlq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const double* a, lapack_int lda, const double* tau,
+                           double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormql( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const float* a, lapack_int lda, const float* tau,
+                           float* c, lapack_int ldc );
+lapack_int LAPACKE_dormql( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const double* a, lapack_int lda, const double* tau,
+                           double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormqr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const float* a, lapack_int lda, const float* tau,
+                           float* c, lapack_int ldc );
+lapack_int LAPACKE_dormqr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const double* a, lapack_int lda, const double* tau,
+                           double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormrq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const float* a, lapack_int lda, const float* tau,
+                           float* c, lapack_int ldc );
+lapack_int LAPACKE_dormrq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const double* a, lapack_int lda, const double* tau,
+                           double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormrz( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           lapack_int l, const float* a, lapack_int lda,
+                           const float* tau, float* c, lapack_int ldc );
+lapack_int LAPACKE_dormrz( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           lapack_int l, const double* a, lapack_int lda,
+                           const double* tau, double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sormtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n, const float* a,
+                           lapack_int lda, const float* tau, float* c,
+                           lapack_int ldc );
+lapack_int LAPACKE_dormtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n, const double* a,
+                           lapack_int lda, const double* tau, double* c,
+                           lapack_int ldc );
+
+lapack_int LAPACKE_spbcon( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const float* ab, lapack_int ldab,
+                           float anorm, float* rcond );
+lapack_int LAPACKE_dpbcon( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const double* ab, lapack_int ldab,
+                           double anorm, double* rcond );
+lapack_int LAPACKE_cpbcon( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const lapack_complex_float* ab,
+                           lapack_int ldab, float anorm, float* rcond );
+lapack_int LAPACKE_zpbcon( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const lapack_complex_double* ab,
+                           lapack_int ldab, double anorm, double* rcond );
+
+lapack_int LAPACKE_spbequ( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const float* ab, lapack_int ldab,
+                           float* s, float* scond, float* amax );
+lapack_int LAPACKE_dpbequ( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const double* ab, lapack_int ldab,
+                           double* s, double* scond, double* amax );
+lapack_int LAPACKE_cpbequ( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const lapack_complex_float* ab,
+                           lapack_int ldab, float* s, float* scond,
+                           float* amax );
+lapack_int LAPACKE_zpbequ( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, const lapack_complex_double* ab,
+                           lapack_int ldab, double* s, double* scond,
+                           double* amax );
+
+lapack_int LAPACKE_spbrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, const float* ab,
+                           lapack_int ldab, const float* afb, lapack_int ldafb,
+                           const float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_dpbrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, const double* ab,
+                           lapack_int ldab, const double* afb, lapack_int ldafb,
+                           const double* b, lapack_int ldb, double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+lapack_int LAPACKE_cpbrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_complex_float* afb, lapack_int ldafb,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zpbrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_complex_double* afb, lapack_int ldafb,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_spbstf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kb, float* bb, lapack_int ldbb );
+lapack_int LAPACKE_dpbstf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kb, double* bb, lapack_int ldbb );
+lapack_int LAPACKE_cpbstf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kb, lapack_complex_float* bb,
+                           lapack_int ldbb );
+lapack_int LAPACKE_zpbstf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kb, lapack_complex_double* bb,
+                           lapack_int ldbb );
+
+lapack_int LAPACKE_spbsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int kd, lapack_int nrhs, float* ab,
+                          lapack_int ldab, float* b, lapack_int ldb );
+lapack_int LAPACKE_dpbsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int kd, lapack_int nrhs, double* ab,
+                          lapack_int ldab, double* b, lapack_int ldb );
+lapack_int LAPACKE_cpbsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int kd, lapack_int nrhs,
+                          lapack_complex_float* ab, lapack_int ldab,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpbsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int kd, lapack_int nrhs,
+                          lapack_complex_double* ab, lapack_int ldab,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spbsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, float* ab,
+                           lapack_int ldab, float* afb, lapack_int ldafb,
+                           char* equed, float* s, float* b, lapack_int ldb,
+                           float* x, lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, double* ab,
+                           lapack_int ldab, double* afb, lapack_int ldafb,
+                           char* equed, double* s, double* b, lapack_int ldb,
+                           double* x, lapack_int ldx, double* rcond,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* afb, lapack_int ldafb,
+                           char* equed, float* s, lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zpbsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* afb, lapack_int ldafb,
+                           char* equed, double* s, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* x,
+                           lapack_int ldx, double* rcond, double* ferr,
+                           double* berr );
+
+lapack_int LAPACKE_spbtrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, float* ab, lapack_int ldab );
+lapack_int LAPACKE_dpbtrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, double* ab, lapack_int ldab );
+lapack_int LAPACKE_cpbtrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_float* ab,
+                           lapack_int ldab );
+lapack_int LAPACKE_zpbtrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_complex_double* ab,
+                           lapack_int ldab );
+
+lapack_int LAPACKE_spbtrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, const float* ab,
+                           lapack_int ldab, float* b, lapack_int ldb );
+lapack_int LAPACKE_dpbtrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs, const double* ab,
+                           lapack_int ldab, double* b, lapack_int ldb );
+lapack_int LAPACKE_cpbtrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpbtrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spftrf( int matrix_order, char transr, char uplo,
+                           lapack_int n, float* a );
+lapack_int LAPACKE_dpftrf( int matrix_order, char transr, char uplo,
+                           lapack_int n, double* a );
+lapack_int LAPACKE_cpftrf( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_complex_float* a );
+lapack_int LAPACKE_zpftrf( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_complex_double* a );
+
+lapack_int LAPACKE_spftri( int matrix_order, char transr, char uplo,
+                           lapack_int n, float* a );
+lapack_int LAPACKE_dpftri( int matrix_order, char transr, char uplo,
+                           lapack_int n, double* a );
+lapack_int LAPACKE_cpftri( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_complex_float* a );
+lapack_int LAPACKE_zpftri( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_complex_double* a );
+
+lapack_int LAPACKE_spftrs( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_int nrhs, const float* a,
+                           float* b, lapack_int ldb );
+lapack_int LAPACKE_dpftrs( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_int nrhs, const double* a,
+                           double* b, lapack_int ldb );
+lapack_int LAPACKE_cpftrs( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* a,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpftrs( int matrix_order, char transr, char uplo,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* a,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spocon( int matrix_order, char uplo, lapack_int n,
+                           const float* a, lapack_int lda, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_dpocon( int matrix_order, char uplo, lapack_int n,
+                           const double* a, lapack_int lda, double anorm,
+                           double* rcond );
+lapack_int LAPACKE_cpocon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           float anorm, float* rcond );
+lapack_int LAPACKE_zpocon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           double anorm, double* rcond );
+
+lapack_int LAPACKE_spoequ( int matrix_order, lapack_int n, const float* a,
+                           lapack_int lda, float* s, float* scond,
+                           float* amax );
+lapack_int LAPACKE_dpoequ( int matrix_order, lapack_int n, const double* a,
+                           lapack_int lda, double* s, double* scond,
+                           double* amax );
+lapack_int LAPACKE_cpoequ( int matrix_order, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           float* s, float* scond, float* amax );
+lapack_int LAPACKE_zpoequ( int matrix_order, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_spoequb( int matrix_order, lapack_int n, const float* a,
+                            lapack_int lda, float* s, float* scond,
+                            float* amax );
+lapack_int LAPACKE_dpoequb( int matrix_order, lapack_int n, const double* a,
+                            lapack_int lda, double* s, double* scond,
+                            double* amax );
+lapack_int LAPACKE_cpoequb( int matrix_order, lapack_int n,
+                            const lapack_complex_float* a, lapack_int lda,
+                            float* s, float* scond, float* amax );
+lapack_int LAPACKE_zpoequb( int matrix_order, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_sporfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           const float* af, lapack_int ldaf, const float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dporfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           const double* af, lapack_int ldaf, const double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cporfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* af,
+                           lapack_int ldaf, const lapack_complex_float* b,
+                           lapack_int ldb, lapack_complex_float* x,
+                           lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_zporfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* af,
+                           lapack_int ldaf, const lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+
+lapack_int LAPACKE_sporfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs, const float* a,
+                            lapack_int lda, const float* af, lapack_int ldaf,
+                            const float* s, const float* b, lapack_int ldb,
+                            float* x, lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dporfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs, const double* a,
+                            lapack_int lda, const double* af, lapack_int ldaf,
+                            const double* s, const double* b, lapack_int ldb,
+                            double* x, lapack_int ldx, double* rcond,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+lapack_int LAPACKE_cporfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_float* a, lapack_int lda,
+                            const lapack_complex_float* af, lapack_int ldaf,
+                            const float* s, const lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* x,
+                            lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_zporfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* af, lapack_int ldaf,
+                            const double* s, const lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* x,
+                            lapack_int ldx, double* rcond, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+
+lapack_int LAPACKE_sposv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, float* a, lapack_int lda, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dposv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, double* a, lapack_int lda, double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_cposv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* a,
+                          lapack_int lda, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zposv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* a,
+                          lapack_int lda, lapack_complex_double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dsposv( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, double* a, lapack_int lda,
+                           double* b, lapack_int ldb, double* x, lapack_int ldx,
+                           lapack_int* iter );
+lapack_int LAPACKE_zcposv( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, lapack_complex_double* x,
+                           lapack_int ldx, lapack_int* iter );
+
+lapack_int LAPACKE_sposvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, float* a, lapack_int lda, float* af,
+                           lapack_int ldaf, char* equed, float* s, float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_dposvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, double* a, lapack_int lda,
+                           double* af, lapack_int ldaf, char* equed, double* s,
+                           double* b, lapack_int ldb, double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+lapack_int LAPACKE_cposvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* af,
+                           lapack_int ldaf, char* equed, float* s,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zposvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* af,
+                           lapack_int ldaf, char* equed, double* s,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_sposvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs, float* a,
+                            lapack_int lda, float* af, lapack_int ldaf,
+                            char* equed, float* s, float* b, lapack_int ldb,
+                            float* x, lapack_int ldx, float* rcond,
+                            float* rpvgrw, float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_dposvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs, double* a,
+                            lapack_int lda, double* af, lapack_int ldaf,
+                            char* equed, double* s, double* b, lapack_int ldb,
+                            double* x, lapack_int ldx, double* rcond,
+                            double* rpvgrw, double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+lapack_int LAPACKE_cposvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* af, lapack_int ldaf,
+                            char* equed, float* s, lapack_complex_float* b,
+                            lapack_int ldb, lapack_complex_float* x,
+                            lapack_int ldx, float* rcond, float* rpvgrw,
+                            float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_zposvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* af, lapack_int ldaf,
+                            char* equed, double* s, lapack_complex_double* b,
+                            lapack_int ldb, lapack_complex_double* x,
+                            lapack_int ldx, double* rcond, double* rpvgrw,
+                            double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+
+lapack_int LAPACKE_spotrf( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dpotrf( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_cpotrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zpotrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_spotri( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dpotri( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_cpotri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zpotri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_spotrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           float* b, lapack_int ldb );
+lapack_int LAPACKE_dpotrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           double* b, lapack_int ldb );
+lapack_int LAPACKE_cpotrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zpotrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_sppcon( int matrix_order, char uplo, lapack_int n,
+                           const float* ap, float anorm, float* rcond );
+lapack_int LAPACKE_dppcon( int matrix_order, char uplo, lapack_int n,
+                           const double* ap, double anorm, double* rcond );
+lapack_int LAPACKE_cppcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_zppcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_sppequ( int matrix_order, char uplo, lapack_int n,
+                           const float* ap, float* s, float* scond,
+                           float* amax );
+lapack_int LAPACKE_dppequ( int matrix_order, char uplo, lapack_int n,
+                           const double* ap, double* s, double* scond,
+                           double* amax );
+lapack_int LAPACKE_cppequ( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap, float* s,
+                           float* scond, float* amax );
+lapack_int LAPACKE_zppequ( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap, double* s,
+                           double* scond, double* amax );
+
+lapack_int LAPACKE_spprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* ap, const float* afp,
+                           const float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_dpprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* ap, const double* afp,
+                           const double* b, lapack_int ldb, double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+lapack_int LAPACKE_cpprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           const lapack_complex_float* afp,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zpprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           const lapack_complex_double* afp,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sppsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, float* ap, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dppsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, double* ap, double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_cppsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* ap,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zppsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* ap,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sppsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, float* ap, float* afp, char* equed,
+                           float* s, float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dppsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, double* ap, double* afp,
+                           char* equed, double* s, double* b, lapack_int ldb,
+                           double* x, lapack_int ldx, double* rcond,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cppsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, lapack_complex_float* ap,
+                           lapack_complex_float* afp, char* equed, float* s,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zppsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, lapack_complex_double* ap,
+                           lapack_complex_double* afp, char* equed, double* s,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_spptrf( int matrix_order, char uplo, lapack_int n,
+                           float* ap );
+lapack_int LAPACKE_dpptrf( int matrix_order, char uplo, lapack_int n,
+                           double* ap );
+lapack_int LAPACKE_cpptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap );
+lapack_int LAPACKE_zpptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap );
+
+lapack_int LAPACKE_spptri( int matrix_order, char uplo, lapack_int n,
+                           float* ap );
+lapack_int LAPACKE_dpptri( int matrix_order, char uplo, lapack_int n,
+                           double* ap );
+lapack_int LAPACKE_cpptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap );
+lapack_int LAPACKE_zpptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap );
+
+lapack_int LAPACKE_spptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* ap, float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_dpptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* ap, double* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_cpptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spstrf( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda, lapack_int* piv, lapack_int* rank,
+                           float tol );
+lapack_int LAPACKE_dpstrf( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda, lapack_int* piv, lapack_int* rank,
+                           double tol );
+lapack_int LAPACKE_cpstrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* piv, lapack_int* rank, float tol );
+lapack_int LAPACKE_zpstrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* piv, lapack_int* rank, double tol );
+
+lapack_int LAPACKE_sptcon( lapack_int n, const float* d, const float* e,
+                           float anorm, float* rcond );
+lapack_int LAPACKE_dptcon( lapack_int n, const double* d, const double* e,
+                           double anorm, double* rcond );
+lapack_int LAPACKE_cptcon( lapack_int n, const float* d,
+                           const lapack_complex_float* e, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_zptcon( lapack_int n, const double* d,
+                           const lapack_complex_double* e, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_spteqr( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, float* z, lapack_int ldz );
+lapack_int LAPACKE_dpteqr( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, double* z, lapack_int ldz );
+lapack_int LAPACKE_cpteqr( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zpteqr( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_sptrfs( int matrix_order, lapack_int n, lapack_int nrhs,
+                           const float* d, const float* e, const float* df,
+                           const float* ef, const float* b, lapack_int ldb,
+                           float* x, lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_dptrfs( int matrix_order, lapack_int n, lapack_int nrhs,
+                           const double* d, const double* e, const double* df,
+                           const double* ef, const double* b, lapack_int ldb,
+                           double* x, lapack_int ldx, double* ferr,
+                           double* berr );
+lapack_int LAPACKE_cptrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* d,
+                           const lapack_complex_float* e, const float* df,
+                           const lapack_complex_float* ef,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zptrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* d,
+                           const lapack_complex_double* e, const double* df,
+                           const lapack_complex_double* ef,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sptsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          float* d, float* e, float* b, lapack_int ldb );
+lapack_int LAPACKE_dptsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          double* d, double* e, double* b, lapack_int ldb );
+lapack_int LAPACKE_cptsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          float* d, lapack_complex_float* e,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zptsv( int matrix_order, lapack_int n, lapack_int nrhs,
+                          double* d, lapack_complex_double* e,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sptsvx( int matrix_order, char fact, lapack_int n,
+                           lapack_int nrhs, const float* d, const float* e,
+                           float* df, float* ef, const float* b, lapack_int ldb,
+                           float* x, lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dptsvx( int matrix_order, char fact, lapack_int n,
+                           lapack_int nrhs, const double* d, const double* e,
+                           double* df, double* ef, const double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+lapack_int LAPACKE_cptsvx( int matrix_order, char fact, lapack_int n,
+                           lapack_int nrhs, const float* d,
+                           const lapack_complex_float* e, float* df,
+                           lapack_complex_float* ef,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zptsvx( int matrix_order, char fact, lapack_int n,
+                           lapack_int nrhs, const double* d,
+                           const lapack_complex_double* e, double* df,
+                           lapack_complex_double* ef,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_spttrf( lapack_int n, float* d, float* e );
+lapack_int LAPACKE_dpttrf( lapack_int n, double* d, double* e );
+lapack_int LAPACKE_cpttrf( lapack_int n, float* d, lapack_complex_float* e );
+lapack_int LAPACKE_zpttrf( lapack_int n, double* d, lapack_complex_double* e );
+
+lapack_int LAPACKE_spttrs( int matrix_order, lapack_int n, lapack_int nrhs,
+                           const float* d, const float* e, float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_dpttrs( int matrix_order, lapack_int n, lapack_int nrhs,
+                           const double* d, const double* e, double* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_cpttrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* d,
+                           const lapack_complex_float* e,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpttrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* d,
+                           const lapack_complex_double* e,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_ssbev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int kd, float* ab, lapack_int ldab, float* w,
+                          float* z, lapack_int ldz );
+lapack_int LAPACKE_dsbev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int kd, double* ab, lapack_int ldab, double* w,
+                          double* z, lapack_int ldz );
+
+lapack_int LAPACKE_ssbevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int kd, float* ab, lapack_int ldab, float* w,
+                           float* z, lapack_int ldz );
+lapack_int LAPACKE_dsbevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int kd, double* ab, lapack_int ldab,
+                           double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_ssbevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int kd, float* ab,
+                           lapack_int ldab, float* q, lapack_int ldq, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dsbevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int kd, double* ab,
+                           lapack_int ldab, double* q, lapack_int ldq,
+                           double vl, double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* ifail );
+
+lapack_int LAPACKE_ssbgst( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb, float* ab,
+                           lapack_int ldab, const float* bb, lapack_int ldbb,
+                           float* x, lapack_int ldx );
+lapack_int LAPACKE_dsbgst( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb, double* ab,
+                           lapack_int ldab, const double* bb, lapack_int ldbb,
+                           double* x, lapack_int ldx );
+
+lapack_int LAPACKE_ssbgv( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int ka, lapack_int kb, float* ab,
+                          lapack_int ldab, float* bb, lapack_int ldbb, float* w,
+                          float* z, lapack_int ldz );
+lapack_int LAPACKE_dsbgv( int matrix_order, char jobz, char uplo, lapack_int n,
+                          lapack_int ka, lapack_int kb, double* ab,
+                          lapack_int ldab, double* bb, lapack_int ldbb,
+                          double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_ssbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb, float* ab,
+                           lapack_int ldab, float* bb, lapack_int ldbb,
+                           float* w, float* z, lapack_int ldz );
+lapack_int LAPACKE_dsbgvd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           lapack_int ka, lapack_int kb, double* ab,
+                           lapack_int ldab, double* bb, lapack_int ldbb,
+                           double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_ssbgvx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int ka, lapack_int kb,
+                           float* ab, lapack_int ldab, float* bb,
+                           lapack_int ldbb, float* q, lapack_int ldq, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dsbgvx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, lapack_int ka, lapack_int kb,
+                           double* ab, lapack_int ldab, double* bb,
+                           lapack_int ldbb, double* q, lapack_int ldq,
+                           double vl, double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* ifail );
+
+lapack_int LAPACKE_ssbtrd( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int kd, float* ab, lapack_int ldab, float* d,
+                           float* e, float* q, lapack_int ldq );
+lapack_int LAPACKE_dsbtrd( int matrix_order, char vect, char uplo, lapack_int n,
+                           lapack_int kd, double* ab, lapack_int ldab,
+                           double* d, double* e, double* q, lapack_int ldq );
+
+lapack_int LAPACKE_ssfrk( int matrix_order, char transr, char uplo, char trans,
+                          lapack_int n, lapack_int k, float alpha,
+                          const float* a, lapack_int lda, float beta,
+                          float* c );
+lapack_int LAPACKE_dsfrk( int matrix_order, char transr, char uplo, char trans,
+                          lapack_int n, lapack_int k, double alpha,
+                          const double* a, lapack_int lda, double beta,
+                          double* c );
+
+lapack_int LAPACKE_sspcon( int matrix_order, char uplo, lapack_int n,
+                           const float* ap, const lapack_int* ipiv, float anorm,
+                           float* rcond );
+lapack_int LAPACKE_dspcon( int matrix_order, char uplo, lapack_int n,
+                           const double* ap, const lapack_int* ipiv,
+                           double anorm, double* rcond );
+lapack_int LAPACKE_cspcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zspcon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_sspev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          float* ap, float* w, float* z, lapack_int ldz );
+lapack_int LAPACKE_dspev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          double* ap, double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sspevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           float* ap, float* w, float* z, lapack_int ldz );
+lapack_int LAPACKE_dspevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           double* ap, double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sspevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, float* ap, float vl, float vu,
+                           lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dspevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, double* ap, double vl, double vu,
+                           lapack_int il, lapack_int iu, double abstol,
+                           lapack_int* m, double* w, double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_sspgst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, float* ap, const float* bp );
+lapack_int LAPACKE_dspgst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, double* ap, const double* bp );
+
+lapack_int LAPACKE_sspgv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, float* ap, float* bp,
+                          float* w, float* z, lapack_int ldz );
+lapack_int LAPACKE_dspgv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, double* ap, double* bp,
+                          double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sspgvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, float* ap, float* bp,
+                           float* w, float* z, lapack_int ldz );
+lapack_int LAPACKE_dspgvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, double* ap, double* bp,
+                           double* w, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sspgvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n, float* ap,
+                           float* bp, float vl, float vu, lapack_int il,
+                           lapack_int iu, float abstol, lapack_int* m, float* w,
+                           float* z, lapack_int ldz, lapack_int* ifail );
+lapack_int LAPACKE_dspgvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n, double* ap,
+                           double* bp, double vl, double vu, lapack_int il,
+                           lapack_int iu, double abstol, lapack_int* m,
+                           double* w, double* z, lapack_int ldz,
+                           lapack_int* ifail );
+
+lapack_int LAPACKE_ssprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* ap, const float* afp,
+                           const lapack_int* ipiv, const float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dsprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* ap, const double* afp,
+                           const lapack_int* ipiv, const double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_csprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           const lapack_complex_float* afp,
+                           const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zsprfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           const lapack_complex_double* afp,
+                           const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_sspsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, float* ap, lapack_int* ipiv,
+                          float* b, lapack_int ldb );
+lapack_int LAPACKE_dspsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, double* ap, lapack_int* ipiv,
+                          double* b, lapack_int ldb );
+lapack_int LAPACKE_cspsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* ap,
+                          lapack_int* ipiv, lapack_complex_float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_zspsv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* ap,
+                          lapack_int* ipiv, lapack_complex_double* b,
+                          lapack_int ldb );
+
+lapack_int LAPACKE_sspsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* ap, float* afp,
+                           lapack_int* ipiv, const float* b, lapack_int ldb,
+                           float* x, lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dspsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* ap, double* afp,
+                           lapack_int* ipiv, const double* b, lapack_int ldb,
+                           double* x, lapack_int ldx, double* rcond,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_cspsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           lapack_complex_float* afp, lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zspsvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           lapack_complex_double* afp, lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_ssptrd( int matrix_order, char uplo, lapack_int n, float* ap,
+                           float* d, float* e, float* tau );
+lapack_int LAPACKE_dsptrd( int matrix_order, char uplo, lapack_int n,
+                           double* ap, double* d, double* e, double* tau );
+
+lapack_int LAPACKE_ssptrf( int matrix_order, char uplo, lapack_int n, float* ap,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_dsptrf( int matrix_order, char uplo, lapack_int n,
+                           double* ap, lapack_int* ipiv );
+lapack_int LAPACKE_csptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap, lapack_int* ipiv );
+lapack_int LAPACKE_zsptrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap, lapack_int* ipiv );
+
+lapack_int LAPACKE_ssptri( int matrix_order, char uplo, lapack_int n, float* ap,
+                           const lapack_int* ipiv );
+lapack_int LAPACKE_dsptri( int matrix_order, char uplo, lapack_int n,
+                           double* ap, const lapack_int* ipiv );
+lapack_int LAPACKE_csptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* ap, const lapack_int* ipiv );
+lapack_int LAPACKE_zsptri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* ap, const lapack_int* ipiv );
+
+lapack_int LAPACKE_ssptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* ap,
+                           const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dsptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* ap,
+                           const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_csptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* ap,
+                           const lapack_int* ipiv, lapack_complex_float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_zsptrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* ap,
+                           const lapack_int* ipiv, lapack_complex_double* b,
+                           lapack_int ldb );
+
+lapack_int LAPACKE_sstebz( char range, char order, lapack_int n, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           const float* d, const float* e, lapack_int* m,
+                           lapack_int* nsplit, float* w, lapack_int* iblock,
+                           lapack_int* isplit );
+lapack_int LAPACKE_dstebz( char range, char order, lapack_int n, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, const double* d, const double* e,
+                           lapack_int* m, lapack_int* nsplit, double* w,
+                           lapack_int* iblock, lapack_int* isplit );
+
+lapack_int LAPACKE_sstedc( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, float* z, lapack_int ldz );
+lapack_int LAPACKE_dstedc( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, double* z, lapack_int ldz );
+lapack_int LAPACKE_cstedc( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zstedc( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_sstegr( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* isuppz );
+lapack_int LAPACKE_dstegr( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* isuppz );
+lapack_int LAPACKE_cstegr( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* isuppz );
+lapack_int LAPACKE_zstegr( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* isuppz );
+
+lapack_int LAPACKE_sstein( int matrix_order, lapack_int n, const float* d,
+                           const float* e, lapack_int m, const float* w,
+                           const lapack_int* iblock, const lapack_int* isplit,
+                           float* z, lapack_int ldz, lapack_int* ifailv );
+lapack_int LAPACKE_dstein( int matrix_order, lapack_int n, const double* d,
+                           const double* e, lapack_int m, const double* w,
+                           const lapack_int* iblock, const lapack_int* isplit,
+                           double* z, lapack_int ldz, lapack_int* ifailv );
+lapack_int LAPACKE_cstein( int matrix_order, lapack_int n, const float* d,
+                           const float* e, lapack_int m, const float* w,
+                           const lapack_int* iblock, const lapack_int* isplit,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_int* ifailv );
+lapack_int LAPACKE_zstein( int matrix_order, lapack_int n, const double* d,
+                           const double* e, lapack_int m, const double* w,
+                           const lapack_int* iblock, const lapack_int* isplit,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* ifailv );
+
+lapack_int LAPACKE_sstemr( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, lapack_int* m,
+                           float* w, float* z, lapack_int ldz, lapack_int nzc,
+                           lapack_int* isuppz, lapack_logical* tryrac );
+lapack_int LAPACKE_dstemr( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           lapack_int* m, double* w, double* z, lapack_int ldz,
+                           lapack_int nzc, lapack_int* isuppz,
+                           lapack_logical* tryrac );
+lapack_int LAPACKE_cstemr( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, lapack_int* m,
+                           float* w, lapack_complex_float* z, lapack_int ldz,
+                           lapack_int nzc, lapack_int* isuppz,
+                           lapack_logical* tryrac );
+lapack_int LAPACKE_zstemr( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           lapack_int* m, double* w, lapack_complex_double* z,
+                           lapack_int ldz, lapack_int nzc, lapack_int* isuppz,
+                           lapack_logical* tryrac );
+
+lapack_int LAPACKE_ssteqr( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, float* z, lapack_int ldz );
+lapack_int LAPACKE_dsteqr( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, double* z, lapack_int ldz );
+lapack_int LAPACKE_csteqr( int matrix_order, char compz, lapack_int n, float* d,
+                           float* e, lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zsteqr( int matrix_order, char compz, lapack_int n,
+                           double* d, double* e, lapack_complex_double* z,
+                           lapack_int ldz );
+
+lapack_int LAPACKE_ssterf( lapack_int n, float* d, float* e );
+lapack_int LAPACKE_dsterf( lapack_int n, double* d, double* e );
+
+lapack_int LAPACKE_sstev( int matrix_order, char jobz, lapack_int n, float* d,
+                          float* e, float* z, lapack_int ldz );
+lapack_int LAPACKE_dstev( int matrix_order, char jobz, lapack_int n, double* d,
+                          double* e, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sstevd( int matrix_order, char jobz, lapack_int n, float* d,
+                           float* e, float* z, lapack_int ldz );
+lapack_int LAPACKE_dstevd( int matrix_order, char jobz, lapack_int n, double* d,
+                           double* e, double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sstevr( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* isuppz );
+lapack_int LAPACKE_dstevr( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* isuppz );
+
+lapack_int LAPACKE_sstevx( int matrix_order, char jobz, char range,
+                           lapack_int n, float* d, float* e, float vl, float vu,
+                           lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dstevx( int matrix_order, char jobz, char range,
+                           lapack_int n, double* d, double* e, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* ifail );
+
+lapack_int LAPACKE_ssycon( int matrix_order, char uplo, lapack_int n,
+                           const float* a, lapack_int lda,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_dsycon( int matrix_order, char uplo, lapack_int n,
+                           const double* a, lapack_int lda,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+lapack_int LAPACKE_csycon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_int* ipiv, float anorm, float* rcond );
+lapack_int LAPACKE_zsycon( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_int* ipiv, double anorm,
+                           double* rcond );
+
+lapack_int LAPACKE_ssyequb( int matrix_order, char uplo, lapack_int n,
+                            const float* a, lapack_int lda, float* s,
+                            float* scond, float* amax );
+lapack_int LAPACKE_dsyequb( int matrix_order, char uplo, lapack_int n,
+                            const double* a, lapack_int lda, double* s,
+                            double* scond, double* amax );
+lapack_int LAPACKE_csyequb( int matrix_order, char uplo, lapack_int n,
+                            const lapack_complex_float* a, lapack_int lda,
+                            float* s, float* scond, float* amax );
+lapack_int LAPACKE_zsyequb( int matrix_order, char uplo, lapack_int n,
+                            const lapack_complex_double* a, lapack_int lda,
+                            double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_ssyev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          float* a, lapack_int lda, float* w );
+lapack_int LAPACKE_dsyev( int matrix_order, char jobz, char uplo, lapack_int n,
+                          double* a, lapack_int lda, double* w );
+
+lapack_int LAPACKE_ssyevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           float* a, lapack_int lda, float* w );
+lapack_int LAPACKE_dsyevd( int matrix_order, char jobz, char uplo, lapack_int n,
+                           double* a, lapack_int lda, double* w );
+
+lapack_int LAPACKE_ssyevr( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, float* a, lapack_int lda, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* isuppz );
+lapack_int LAPACKE_dsyevr( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, double* a, lapack_int lda, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* isuppz );
+
+lapack_int LAPACKE_ssyevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, float* a, lapack_int lda, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dsyevx( int matrix_order, char jobz, char range, char uplo,
+                           lapack_int n, double* a, lapack_int lda, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* ifail );
+
+lapack_int LAPACKE_ssygst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, float* a, lapack_int lda,
+                           const float* b, lapack_int ldb );
+lapack_int LAPACKE_dsygst( int matrix_order, lapack_int itype, char uplo,
+                           lapack_int n, double* a, lapack_int lda,
+                           const double* b, lapack_int ldb );
+
+lapack_int LAPACKE_ssygv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, float* a, lapack_int lda,
+                          float* b, lapack_int ldb, float* w );
+lapack_int LAPACKE_dsygv( int matrix_order, lapack_int itype, char jobz,
+                          char uplo, lapack_int n, double* a, lapack_int lda,
+                          double* b, lapack_int ldb, double* w );
+
+lapack_int LAPACKE_ssygvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, float* a, lapack_int lda,
+                           float* b, lapack_int ldb, float* w );
+lapack_int LAPACKE_dsygvd( int matrix_order, lapack_int itype, char jobz,
+                           char uplo, lapack_int n, double* a, lapack_int lda,
+                           double* b, lapack_int ldb, double* w );
+
+lapack_int LAPACKE_ssygvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb, float vl,
+                           float vu, lapack_int il, lapack_int iu, float abstol,
+                           lapack_int* m, float* w, float* z, lapack_int ldz,
+                           lapack_int* ifail );
+lapack_int LAPACKE_dsygvx( int matrix_order, lapack_int itype, char jobz,
+                           char range, char uplo, lapack_int n, double* a,
+                           lapack_int lda, double* b, lapack_int ldb, double vl,
+                           double vu, lapack_int il, lapack_int iu,
+                           double abstol, lapack_int* m, double* w, double* z,
+                           lapack_int ldz, lapack_int* ifail );
+
+lapack_int LAPACKE_ssyrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           const float* af, lapack_int ldaf,
+                           const lapack_int* ipiv, const float* b,
+                           lapack_int ldb, float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dsyrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           const double* af, lapack_int ldaf,
+                           const lapack_int* ipiv, const double* b,
+                           lapack_int ldb, double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_csyrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_zsyrfs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* af,
+                           lapack_int ldaf, const lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_ssyrfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs, const float* a,
+                            lapack_int lda, const float* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const float* s,
+                            const float* b, lapack_int ldb, float* x,
+                            lapack_int ldx, float* rcond, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dsyrfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs, const double* a,
+                            lapack_int lda, const double* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const double* s,
+                            const double* b, lapack_int ldb, double* x,
+                            lapack_int ldx, double* rcond, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+lapack_int LAPACKE_csyrfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_float* a, lapack_int lda,
+                            const lapack_complex_float* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const float* s,
+                            const lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* berr, lapack_int n_err_bnds,
+                            float* err_bnds_norm, float* err_bnds_comp,
+                            lapack_int nparams, float* params );
+lapack_int LAPACKE_zsyrfsx( int matrix_order, char uplo, char equed,
+                            lapack_int n, lapack_int nrhs,
+                            const lapack_complex_double* a, lapack_int lda,
+                            const lapack_complex_double* af, lapack_int ldaf,
+                            const lapack_int* ipiv, const double* s,
+                            const lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* berr, lapack_int n_err_bnds,
+                            double* err_bnds_norm, double* err_bnds_comp,
+                            lapack_int nparams, double* params );
+
+lapack_int LAPACKE_ssysv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, float* a, lapack_int lda,
+                          lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dsysv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, double* a, lapack_int lda,
+                          lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_csysv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_float* a,
+                          lapack_int lda, lapack_int* ipiv,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zsysv( int matrix_order, char uplo, lapack_int n,
+                          lapack_int nrhs, lapack_complex_double* a,
+                          lapack_int lda, lapack_int* ipiv,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_ssysvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           float* af, lapack_int ldaf, lapack_int* ipiv,
+                           const float* b, lapack_int ldb, float* x,
+                           lapack_int ldx, float* rcond, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dsysvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           double* af, lapack_int ldaf, lapack_int* ipiv,
+                           const double* b, lapack_int ldb, double* x,
+                           lapack_int ldx, double* rcond, double* ferr,
+                           double* berr );
+lapack_int LAPACKE_csysvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* af,
+                           lapack_int ldaf, lapack_int* ipiv,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* x, lapack_int ldx,
+                           float* rcond, float* ferr, float* berr );
+lapack_int LAPACKE_zsysvx( int matrix_order, char fact, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* af,
+                           lapack_int ldaf, lapack_int* ipiv,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* x, lapack_int ldx,
+                           double* rcond, double* ferr, double* berr );
+
+lapack_int LAPACKE_ssysvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs, float* a,
+                            lapack_int lda, float* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, float* s, float* b,
+                            lapack_int ldb, float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_dsysvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs, double* a,
+                            lapack_int lda, double* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, double* s, double* b,
+                            lapack_int ldb, double* x, lapack_int ldx,
+                            double* rcond, double* rpvgrw, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+lapack_int LAPACKE_csysvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, float* s,
+                            lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* x, lapack_int ldx,
+                            float* rcond, float* rpvgrw, float* berr,
+                            lapack_int n_err_bnds, float* err_bnds_norm,
+                            float* err_bnds_comp, lapack_int nparams,
+                            float* params );
+lapack_int LAPACKE_zsysvxx( int matrix_order, char fact, char uplo,
+                            lapack_int n, lapack_int nrhs,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* af, lapack_int ldaf,
+                            lapack_int* ipiv, char* equed, double* s,
+                            lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* x, lapack_int ldx,
+                            double* rcond, double* rpvgrw, double* berr,
+                            lapack_int n_err_bnds, double* err_bnds_norm,
+                            double* err_bnds_comp, lapack_int nparams,
+                            double* params );
+
+lapack_int LAPACKE_ssytrd( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda, float* d, float* e, float* tau );
+lapack_int LAPACKE_dsytrd( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda, double* d, double* e, double* tau );
+
+lapack_int LAPACKE_ssytrf( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_dsytrf( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_csytrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_int* ipiv );
+lapack_int LAPACKE_zsytrf( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_int* ipiv );
+
+lapack_int LAPACKE_ssytri( int matrix_order, char uplo, lapack_int n, float* a,
+                           lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_dsytri( int matrix_order, char uplo, lapack_int n, double* a,
+                           lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_csytri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           const lapack_int* ipiv );
+lapack_int LAPACKE_zsytri( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           const lapack_int* ipiv );
+
+lapack_int LAPACKE_ssytrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const float* a, lapack_int lda,
+                           const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_dsytrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const double* a, lapack_int lda,
+                           const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_csytrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zsytrs( int matrix_order, char uplo, lapack_int n,
+                           lapack_int nrhs, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_int* ipiv,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stbcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, lapack_int kd, const float* ab,
+                           lapack_int ldab, float* rcond );
+lapack_int LAPACKE_dtbcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, lapack_int kd, const double* ab,
+                           lapack_int ldab, double* rcond );
+lapack_int LAPACKE_ctbcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, lapack_int kd,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           float* rcond );
+lapack_int LAPACKE_ztbcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, lapack_int kd,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           double* rcond );
+
+lapack_int LAPACKE_stbrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const float* ab, lapack_int ldab, const float* b,
+                           lapack_int ldb, const float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_dtbrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const double* ab, lapack_int ldab, const double* b,
+                           lapack_int ldb, const double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+lapack_int LAPACKE_ctbrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           const lapack_complex_float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_ztbrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           const lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_stbtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const float* ab, lapack_int ldab, float* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_dtbtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const double* ab, lapack_int ldab, double* b,
+                           lapack_int ldb );
+lapack_int LAPACKE_ctbtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_float* ab, lapack_int ldab,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztbtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int kd, lapack_int nrhs,
+                           const lapack_complex_double* ab, lapack_int ldab,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stfsm( int matrix_order, char transr, char side, char uplo,
+                          char trans, char diag, lapack_int m, lapack_int n,
+                          float alpha, const float* a, float* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_dtfsm( int matrix_order, char transr, char side, char uplo,
+                          char trans, char diag, lapack_int m, lapack_int n,
+                          double alpha, const double* a, double* b,
+                          lapack_int ldb );
+lapack_int LAPACKE_ctfsm( int matrix_order, char transr, char side, char uplo,
+                          char trans, char diag, lapack_int m, lapack_int n,
+                          lapack_complex_float alpha,
+                          const lapack_complex_float* a,
+                          lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztfsm( int matrix_order, char transr, char side, char uplo,
+                          char trans, char diag, lapack_int m, lapack_int n,
+                          lapack_complex_double alpha,
+                          const lapack_complex_double* a,
+                          lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stftri( int matrix_order, char transr, char uplo, char diag,
+                           lapack_int n, float* a );
+lapack_int LAPACKE_dtftri( int matrix_order, char transr, char uplo, char diag,
+                           lapack_int n, double* a );
+lapack_int LAPACKE_ctftri( int matrix_order, char transr, char uplo, char diag,
+                           lapack_int n, lapack_complex_float* a );
+lapack_int LAPACKE_ztftri( int matrix_order, char transr, char uplo, char diag,
+                           lapack_int n, lapack_complex_double* a );
+
+lapack_int LAPACKE_stfttp( int matrix_order, char transr, char uplo,
+                           lapack_int n, const float* arf, float* ap );
+lapack_int LAPACKE_dtfttp( int matrix_order, char transr, char uplo,
+                           lapack_int n, const double* arf, double* ap );
+lapack_int LAPACKE_ctfttp( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_float* arf,
+                           lapack_complex_float* ap );
+lapack_int LAPACKE_ztfttp( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_double* arf,
+                           lapack_complex_double* ap );
+
+lapack_int LAPACKE_stfttr( int matrix_order, char transr, char uplo,
+                           lapack_int n, const float* arf, float* a,
+                           lapack_int lda );
+lapack_int LAPACKE_dtfttr( int matrix_order, char transr, char uplo,
+                           lapack_int n, const double* arf, double* a,
+                           lapack_int lda );
+lapack_int LAPACKE_ctfttr( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_float* arf,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_ztfttr( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_double* arf,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_stgevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const float* s, lapack_int lds, const float* p,
+                           lapack_int ldp, float* vl, lapack_int ldvl,
+                           float* vr, lapack_int ldvr, lapack_int mm,
+                           lapack_int* m );
+lapack_int LAPACKE_dtgevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const double* s, lapack_int lds, const double* p,
+                           lapack_int ldp, double* vl, lapack_int ldvl,
+                           double* vr, lapack_int ldvr, lapack_int mm,
+                           lapack_int* m );
+lapack_int LAPACKE_ctgevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_float* s, lapack_int lds,
+                           const lapack_complex_float* p, lapack_int ldp,
+                           lapack_complex_float* vl, lapack_int ldvl,
+                           lapack_complex_float* vr, lapack_int ldvr,
+                           lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ztgevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_double* s, lapack_int lds,
+                           const lapack_complex_double* p, lapack_int ldp,
+                           lapack_complex_double* vl, lapack_int ldvl,
+                           lapack_complex_double* vr, lapack_int ldvr,
+                           lapack_int mm, lapack_int* m );
+
+lapack_int LAPACKE_stgexc( int matrix_order, lapack_logical wantq,
+                           lapack_logical wantz, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb, float* q,
+                           lapack_int ldq, float* z, lapack_int ldz,
+                           lapack_int* ifst, lapack_int* ilst );
+lapack_int LAPACKE_dtgexc( int matrix_order, lapack_logical wantq,
+                           lapack_logical wantz, lapack_int n, double* a,
+                           lapack_int lda, double* b, lapack_int ldb, double* q,
+                           lapack_int ldq, double* z, lapack_int ldz,
+                           lapack_int* ifst, lapack_int* ilst );
+lapack_int LAPACKE_ctgexc( int matrix_order, lapack_logical wantq,
+                           lapack_logical wantz, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_complex_float* z, lapack_int ldz,
+                           lapack_int ifst, lapack_int ilst );
+lapack_int LAPACKE_ztgexc( int matrix_order, lapack_logical wantq,
+                           lapack_logical wantz, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int ifst, lapack_int ilst );
+
+lapack_int LAPACKE_stgsen( int matrix_order, lapack_int ijob,
+                           lapack_logical wantq, lapack_logical wantz,
+                           const lapack_logical* select, lapack_int n, float* a,
+                           lapack_int lda, float* b, lapack_int ldb,
+                           float* alphar, float* alphai, float* beta, float* q,
+                           lapack_int ldq, float* z, lapack_int ldz,
+                           lapack_int* m, float* pl, float* pr, float* dif );
+lapack_int LAPACKE_dtgsen( int matrix_order, lapack_int ijob,
+                           lapack_logical wantq, lapack_logical wantz,
+                           const lapack_logical* select, lapack_int n,
+                           double* a, lapack_int lda, double* b, lapack_int ldb,
+                           double* alphar, double* alphai, double* beta,
+                           double* q, lapack_int ldq, double* z, lapack_int ldz,
+                           lapack_int* m, double* pl, double* pr, double* dif );
+lapack_int LAPACKE_ctgsen( int matrix_order, lapack_int ijob,
+                           lapack_logical wantq, lapack_logical wantz,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* alpha,
+                           lapack_complex_float* beta, lapack_complex_float* q,
+                           lapack_int ldq, lapack_complex_float* z,
+                           lapack_int ldz, lapack_int* m, float* pl, float* pr,
+                           float* dif );
+lapack_int LAPACKE_ztgsen( int matrix_order, lapack_int ijob,
+                           lapack_logical wantq, lapack_logical wantz,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* alpha,
+                           lapack_complex_double* beta,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* z, lapack_int ldz,
+                           lapack_int* m, double* pl, double* pr, double* dif );
+
+lapack_int LAPACKE_stgsja( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_int k, lapack_int l, float* a, lapack_int lda,
+                           float* b, lapack_int ldb, float tola, float tolb,
+                           float* alpha, float* beta, float* u, lapack_int ldu,
+                           float* v, lapack_int ldv, float* q, lapack_int ldq,
+                           lapack_int* ncycle );
+lapack_int LAPACKE_dtgsja( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_int k, lapack_int l, double* a,
+                           lapack_int lda, double* b, lapack_int ldb,
+                           double tola, double tolb, double* alpha,
+                           double* beta, double* u, lapack_int ldu, double* v,
+                           lapack_int ldv, double* q, lapack_int ldq,
+                           lapack_int* ncycle );
+lapack_int LAPACKE_ctgsja( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_int k, lapack_int l, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* b,
+                           lapack_int ldb, float tola, float tolb, float* alpha,
+                           float* beta, lapack_complex_float* u, lapack_int ldu,
+                           lapack_complex_float* v, lapack_int ldv,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_int* ncycle );
+lapack_int LAPACKE_ztgsja( int matrix_order, char jobu, char jobv, char jobq,
+                           lapack_int m, lapack_int p, lapack_int n,
+                           lapack_int k, lapack_int l, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* b,
+                           lapack_int ldb, double tola, double tolb,
+                           double* alpha, double* beta,
+                           lapack_complex_double* u, lapack_int ldu,
+                           lapack_complex_double* v, lapack_int ldv,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_int* ncycle );
+
+lapack_int LAPACKE_stgsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const float* a, lapack_int lda, const float* b,
+                           lapack_int ldb, const float* vl, lapack_int ldvl,
+                           const float* vr, lapack_int ldvr, float* s,
+                           float* dif, lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_dtgsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const double* a, lapack_int lda, const double* b,
+                           lapack_int ldb, const double* vl, lapack_int ldvl,
+                           const double* vr, lapack_int ldvr, double* s,
+                           double* dif, lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ctgsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           const lapack_complex_float* vl, lapack_int ldvl,
+                           const lapack_complex_float* vr, lapack_int ldvr,
+                           float* s, float* dif, lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ztgsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           const lapack_complex_double* vl, lapack_int ldvl,
+                           const lapack_complex_double* vr, lapack_int ldvr,
+                           double* s, double* dif, lapack_int mm,
+                           lapack_int* m );
+
+lapack_int LAPACKE_stgsyl( int matrix_order, char trans, lapack_int ijob,
+                           lapack_int m, lapack_int n, const float* a,
+                           lapack_int lda, const float* b, lapack_int ldb,
+                           float* c, lapack_int ldc, const float* d,
+                           lapack_int ldd, const float* e, lapack_int lde,
+                           float* f, lapack_int ldf, float* scale, float* dif );
+lapack_int LAPACKE_dtgsyl( int matrix_order, char trans, lapack_int ijob,
+                           lapack_int m, lapack_int n, const double* a,
+                           lapack_int lda, const double* b, lapack_int ldb,
+                           double* c, lapack_int ldc, const double* d,
+                           lapack_int ldd, const double* e, lapack_int lde,
+                           double* f, lapack_int ldf, double* scale,
+                           double* dif );
+lapack_int LAPACKE_ctgsyl( int matrix_order, char trans, lapack_int ijob,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* c, lapack_int ldc,
+                           const lapack_complex_float* d, lapack_int ldd,
+                           const lapack_complex_float* e, lapack_int lde,
+                           lapack_complex_float* f, lapack_int ldf,
+                           float* scale, float* dif );
+lapack_int LAPACKE_ztgsyl( int matrix_order, char trans, lapack_int ijob,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* c, lapack_int ldc,
+                           const lapack_complex_double* d, lapack_int ldd,
+                           const lapack_complex_double* e, lapack_int lde,
+                           lapack_complex_double* f, lapack_int ldf,
+                           double* scale, double* dif );
+
+lapack_int LAPACKE_stpcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const float* ap, float* rcond );
+lapack_int LAPACKE_dtpcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const double* ap, double* rcond );
+lapack_int LAPACKE_ctpcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const lapack_complex_float* ap,
+                           float* rcond );
+lapack_int LAPACKE_ztpcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const lapack_complex_double* ap,
+                           double* rcond );
+
+lapack_int LAPACKE_stprfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const float* ap,
+                           const float* b, lapack_int ldb, const float* x,
+                           lapack_int ldx, float* ferr, float* berr );
+lapack_int LAPACKE_dtprfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const double* ap,
+                           const double* b, lapack_int ldb, const double* x,
+                           lapack_int ldx, double* ferr, double* berr );
+lapack_int LAPACKE_ctprfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* ap,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           const lapack_complex_float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_ztprfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* ap,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           const lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_stptri( int matrix_order, char uplo, char diag, lapack_int n,
+                           float* ap );
+lapack_int LAPACKE_dtptri( int matrix_order, char uplo, char diag, lapack_int n,
+                           double* ap );
+lapack_int LAPACKE_ctptri( int matrix_order, char uplo, char diag, lapack_int n,
+                           lapack_complex_float* ap );
+lapack_int LAPACKE_ztptri( int matrix_order, char uplo, char diag, lapack_int n,
+                           lapack_complex_double* ap );
+
+lapack_int LAPACKE_stptrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const float* ap,
+                           float* b, lapack_int ldb );
+lapack_int LAPACKE_dtptrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const double* ap,
+                           double* b, lapack_int ldb );
+lapack_int LAPACKE_ctptrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* ap,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztptrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* ap,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stpttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const float* ap, float* arf );
+lapack_int LAPACKE_dtpttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const double* ap, double* arf );
+lapack_int LAPACKE_ctpttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_float* ap,
+                           lapack_complex_float* arf );
+lapack_int LAPACKE_ztpttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_double* ap,
+                           lapack_complex_double* arf );
+
+lapack_int LAPACKE_stpttr( int matrix_order, char uplo, lapack_int n,
+                           const float* ap, float* a, lapack_int lda );
+lapack_int LAPACKE_dtpttr( int matrix_order, char uplo, lapack_int n,
+                           const double* ap, double* a, lapack_int lda );
+lapack_int LAPACKE_ctpttr( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_ztpttr( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_strcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const float* a, lapack_int lda,
+                           float* rcond );
+lapack_int LAPACKE_dtrcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const double* a, lapack_int lda,
+                           double* rcond );
+lapack_int LAPACKE_ctrcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const lapack_complex_float* a,
+                           lapack_int lda, float* rcond );
+lapack_int LAPACKE_ztrcon( int matrix_order, char norm, char uplo, char diag,
+                           lapack_int n, const lapack_complex_double* a,
+                           lapack_int lda, double* rcond );
+
+lapack_int LAPACKE_strevc( int matrix_order, char side, char howmny,
+                           lapack_logical* select, lapack_int n, const float* t,
+                           lapack_int ldt, float* vl, lapack_int ldvl,
+                           float* vr, lapack_int ldvr, lapack_int mm,
+                           lapack_int* m );
+lapack_int LAPACKE_dtrevc( int matrix_order, char side, char howmny,
+                           lapack_logical* select, lapack_int n,
+                           const double* t, lapack_int ldt, double* vl,
+                           lapack_int ldvl, double* vr, lapack_int ldvr,
+                           lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ctrevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_float* t, lapack_int ldt,
+                           lapack_complex_float* vl, lapack_int ldvl,
+                           lapack_complex_float* vr, lapack_int ldvr,
+                           lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ztrevc( int matrix_order, char side, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_double* t, lapack_int ldt,
+                           lapack_complex_double* vl, lapack_int ldvl,
+                           lapack_complex_double* vr, lapack_int ldvr,
+                           lapack_int mm, lapack_int* m );
+
+lapack_int LAPACKE_strexc( int matrix_order, char compq, lapack_int n, float* t,
+                           lapack_int ldt, float* q, lapack_int ldq,
+                           lapack_int* ifst, lapack_int* ilst );
+lapack_int LAPACKE_dtrexc( int matrix_order, char compq, lapack_int n,
+                           double* t, lapack_int ldt, double* q, lapack_int ldq,
+                           lapack_int* ifst, lapack_int* ilst );
+lapack_int LAPACKE_ctrexc( int matrix_order, char compq, lapack_int n,
+                           lapack_complex_float* t, lapack_int ldt,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_int ifst, lapack_int ilst );
+lapack_int LAPACKE_ztrexc( int matrix_order, char compq, lapack_int n,
+                           lapack_complex_double* t, lapack_int ldt,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_int ifst, lapack_int ilst );
+
+lapack_int LAPACKE_strrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const float* a,
+                           lapack_int lda, const float* b, lapack_int ldb,
+                           const float* x, lapack_int ldx, float* ferr,
+                           float* berr );
+lapack_int LAPACKE_dtrrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const double* a,
+                           lapack_int lda, const double* b, lapack_int ldb,
+                           const double* x, lapack_int ldx, double* ferr,
+                           double* berr );
+lapack_int LAPACKE_ctrrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           const lapack_complex_float* x, lapack_int ldx,
+                           float* ferr, float* berr );
+lapack_int LAPACKE_ztrrfs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           const lapack_complex_double* x, lapack_int ldx,
+                           double* ferr, double* berr );
+
+lapack_int LAPACKE_strsen( int matrix_order, char job, char compq,
+                           const lapack_logical* select, lapack_int n, float* t,
+                           lapack_int ldt, float* q, lapack_int ldq, float* wr,
+                           float* wi, lapack_int* m, float* s, float* sep );
+lapack_int LAPACKE_dtrsen( int matrix_order, char job, char compq,
+                           const lapack_logical* select, lapack_int n,
+                           double* t, lapack_int ldt, double* q, lapack_int ldq,
+                           double* wr, double* wi, lapack_int* m, double* s,
+                           double* sep );
+lapack_int LAPACKE_ctrsen( int matrix_order, char job, char compq,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_float* t, lapack_int ldt,
+                           lapack_complex_float* q, lapack_int ldq,
+                           lapack_complex_float* w, lapack_int* m, float* s,
+                           float* sep );
+lapack_int LAPACKE_ztrsen( int matrix_order, char job, char compq,
+                           const lapack_logical* select, lapack_int n,
+                           lapack_complex_double* t, lapack_int ldt,
+                           lapack_complex_double* q, lapack_int ldq,
+                           lapack_complex_double* w, lapack_int* m, double* s,
+                           double* sep );
+
+lapack_int LAPACKE_strsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const float* t, lapack_int ldt, const float* vl,
+                           lapack_int ldvl, const float* vr, lapack_int ldvr,
+                           float* s, float* sep, lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_dtrsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const double* t, lapack_int ldt, const double* vl,
+                           lapack_int ldvl, const double* vr, lapack_int ldvr,
+                           double* s, double* sep, lapack_int mm,
+                           lapack_int* m );
+lapack_int LAPACKE_ctrsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_float* t, lapack_int ldt,
+                           const lapack_complex_float* vl, lapack_int ldvl,
+                           const lapack_complex_float* vr, lapack_int ldvr,
+                           float* s, float* sep, lapack_int mm, lapack_int* m );
+lapack_int LAPACKE_ztrsna( int matrix_order, char job, char howmny,
+                           const lapack_logical* select, lapack_int n,
+                           const lapack_complex_double* t, lapack_int ldt,
+                           const lapack_complex_double* vl, lapack_int ldvl,
+                           const lapack_complex_double* vr, lapack_int ldvr,
+                           double* s, double* sep, lapack_int mm,
+                           lapack_int* m );
+
+lapack_int LAPACKE_strsyl( int matrix_order, char trana, char tranb,
+                           lapack_int isgn, lapack_int m, lapack_int n,
+                           const float* a, lapack_int lda, const float* b,
+                           lapack_int ldb, float* c, lapack_int ldc,
+                           float* scale );
+lapack_int LAPACKE_dtrsyl( int matrix_order, char trana, char tranb,
+                           lapack_int isgn, lapack_int m, lapack_int n,
+                           const double* a, lapack_int lda, const double* b,
+                           lapack_int ldb, double* c, lapack_int ldc,
+                           double* scale );
+lapack_int LAPACKE_ctrsyl( int matrix_order, char trana, char tranb,
+                           lapack_int isgn, lapack_int m, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* b, lapack_int ldb,
+                           lapack_complex_float* c, lapack_int ldc,
+                           float* scale );
+lapack_int LAPACKE_ztrsyl( int matrix_order, char trana, char tranb,
+                           lapack_int isgn, lapack_int m, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* c, lapack_int ldc,
+                           double* scale );
+
+lapack_int LAPACKE_strtri( int matrix_order, char uplo, char diag, lapack_int n,
+                           float* a, lapack_int lda );
+lapack_int LAPACKE_dtrtri( int matrix_order, char uplo, char diag, lapack_int n,
+                           double* a, lapack_int lda );
+lapack_int LAPACKE_ctrtri( int matrix_order, char uplo, char diag, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_ztrtri( int matrix_order, char uplo, char diag, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_strtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const float* a,
+                           lapack_int lda, float* b, lapack_int ldb );
+lapack_int LAPACKE_dtrtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs, const double* a,
+                           lapack_int lda, double* b, lapack_int ldb );
+lapack_int LAPACKE_ctrtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztrtrs( int matrix_order, char uplo, char trans, char diag,
+                           lapack_int n, lapack_int nrhs,
+                           const lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_strttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const float* a, lapack_int lda,
+                           float* arf );
+lapack_int LAPACKE_dtrttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const double* a, lapack_int lda,
+                           double* arf );
+lapack_int LAPACKE_ctrttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* arf );
+lapack_int LAPACKE_ztrttf( int matrix_order, char transr, char uplo,
+                           lapack_int n, const lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* arf );
+
+lapack_int LAPACKE_strttp( int matrix_order, char uplo, lapack_int n,
+                           const float* a, lapack_int lda, float* ap );
+lapack_int LAPACKE_dtrttp( int matrix_order, char uplo, lapack_int n,
+                           const double* a, lapack_int lda, double* ap );
+lapack_int LAPACKE_ctrttp( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* ap );
+lapack_int LAPACKE_ztrttp( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* ap );
+
+lapack_int LAPACKE_stzrzf( int matrix_order, lapack_int m, lapack_int n,
+                           float* a, lapack_int lda, float* tau );
+lapack_int LAPACKE_dtzrzf( int matrix_order, lapack_int m, lapack_int n,
+                           double* a, lapack_int lda, double* tau );
+lapack_int LAPACKE_ctzrzf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* tau );
+lapack_int LAPACKE_ztzrzf( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* tau );
+
+lapack_int LAPACKE_cungbr( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int k, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zungbr( int matrix_order, char vect, lapack_int m,
+                           lapack_int n, lapack_int k, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cunghr( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zunghr( int matrix_order, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cunglq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zunglq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cungql( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zungql( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cungqr( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zungqr( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cungrq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau );
+lapack_int LAPACKE_zungrq( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cungtr( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau );
+lapack_int LAPACKE_zungtr( int matrix_order, char uplo, lapack_int n,
+                           lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau );
+
+lapack_int LAPACKE_cunmbr( int matrix_order, char vect, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmbr( int matrix_order, char vect, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmhr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmhr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int ilo,
+                           lapack_int ihi, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmlq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmlq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmql( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmql( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmqr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmqr( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmrq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmrq( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmrz( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           lapack_int l, const lapack_complex_float* a,
+                           lapack_int lda, const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmrz( int matrix_order, char side, char trans,
+                           lapack_int m, lapack_int n, lapack_int k,
+                           lapack_int l, const lapack_complex_double* a,
+                           lapack_int lda, const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cunmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_float* a, lapack_int lda,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zunmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_double* a, lapack_int lda,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_cupgtr( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_float* ap,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* q, lapack_int ldq );
+lapack_int LAPACKE_zupgtr( int matrix_order, char uplo, lapack_int n,
+                           const lapack_complex_double* ap,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* q, lapack_int ldq );
+
+lapack_int LAPACKE_cupmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_float* ap,
+                           const lapack_complex_float* tau,
+                           lapack_complex_float* c, lapack_int ldc );
+lapack_int LAPACKE_zupmtr( int matrix_order, char side, char uplo, char trans,
+                           lapack_int m, lapack_int n,
+                           const lapack_complex_double* ap,
+                           const lapack_complex_double* tau,
+                           lapack_complex_double* c, lapack_int ldc );
+
+lapack_int LAPACKE_sbdsdc_work( int matrix_order, char uplo, char compq,
+                                lapack_int n, float* d, float* e, float* u,
+                                lapack_int ldu, float* vt, lapack_int ldvt,
+                                float* q, lapack_int* iq, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dbdsdc_work( int matrix_order, char uplo, char compq,
+                                lapack_int n, double* d, double* e, double* u,
+                                lapack_int ldu, double* vt, lapack_int ldvt,
+                                double* q, lapack_int* iq, double* work,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_sbdsqr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                                float* d, float* e, float* vt, lapack_int ldvt,
+                                float* u, lapack_int ldu, float* c,
+                                lapack_int ldc, float* work );
+lapack_int LAPACKE_dbdsqr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                                double* d, double* e, double* vt,
+                                lapack_int ldvt, double* u, lapack_int ldu,
+                                double* c, lapack_int ldc, double* work );
+lapack_int LAPACKE_cbdsqr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                                float* d, float* e, lapack_complex_float* vt,
+                                lapack_int ldvt, lapack_complex_float* u,
+                                lapack_int ldu, lapack_complex_float* c,
+                                lapack_int ldc, float* work );
+lapack_int LAPACKE_zbdsqr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int ncvt, lapack_int nru, lapack_int ncc,
+                                double* d, double* e, lapack_complex_double* vt,
+                                lapack_int ldvt, lapack_complex_double* u,
+                                lapack_int ldu, lapack_complex_double* c,
+                                lapack_int ldc, double* work );
+
+lapack_int LAPACKE_sdisna_work( char job, lapack_int m, lapack_int n,
+                                const float* d, float* sep );
+lapack_int LAPACKE_ddisna_work( char job, lapack_int m, lapack_int n,
+                                const double* d, double* sep );
+
+lapack_int LAPACKE_sgbbrd_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int ncc, lapack_int kl,
+                                lapack_int ku, float* ab, lapack_int ldab,
+                                float* d, float* e, float* q, lapack_int ldq,
+                                float* pt, lapack_int ldpt, float* c,
+                                lapack_int ldc, float* work );
+lapack_int LAPACKE_dgbbrd_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int ncc, lapack_int kl,
+                                lapack_int ku, double* ab, lapack_int ldab,
+                                double* d, double* e, double* q, lapack_int ldq,
+                                double* pt, lapack_int ldpt, double* c,
+                                lapack_int ldc, double* work );
+lapack_int LAPACKE_cgbbrd_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int ncc, lapack_int kl,
+                                lapack_int ku, lapack_complex_float* ab,
+                                lapack_int ldab, float* d, float* e,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* pt, lapack_int ldpt,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgbbrd_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int ncc, lapack_int kl,
+                                lapack_int ku, lapack_complex_double* ab,
+                                lapack_int ldab, double* d, double* e,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* pt, lapack_int ldpt,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgbcon_work( int matrix_order, char norm, lapack_int n,
+                                lapack_int kl, lapack_int ku, const float* ab,
+                                lapack_int ldab, const lapack_int* ipiv,
+                                float anorm, float* rcond, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgbcon_work( int matrix_order, char norm, lapack_int n,
+                                lapack_int kl, lapack_int ku, const double* ab,
+                                lapack_int ldab, const lapack_int* ipiv,
+                                double anorm, double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cgbcon_work( int matrix_order, char norm, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zgbcon_work( int matrix_order, char norm, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, const lapack_int* ipiv,
+                                double anorm, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgbequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const float* ab,
+                                lapack_int ldab, float* r, float* c,
+                                float* rowcnd, float* colcnd, float* amax );
+lapack_int LAPACKE_dgbequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const double* ab,
+                                lapack_int ldab, double* r, double* c,
+                                double* rowcnd, double* colcnd, double* amax );
+lapack_int LAPACKE_cgbequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                float* r, float* c, float* rowcnd,
+                                float* colcnd, float* amax );
+lapack_int LAPACKE_zgbequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, double* r, double* c,
+                                double* rowcnd, double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgbequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_int kl, lapack_int ku, const float* ab,
+                                 lapack_int ldab, float* r, float* c,
+                                 float* rowcnd, float* colcnd, float* amax );
+lapack_int LAPACKE_dgbequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_int kl, lapack_int ku, const double* ab,
+                                 lapack_int ldab, double* r, double* c,
+                                 double* rowcnd, double* colcnd, double* amax );
+lapack_int LAPACKE_cgbequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_int kl, lapack_int ku,
+                                 const lapack_complex_float* ab,
+                                 lapack_int ldab, float* r, float* c,
+                                 float* rowcnd, float* colcnd, float* amax );
+lapack_int LAPACKE_zgbequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_int kl, lapack_int ku,
+                                 const lapack_complex_double* ab,
+                                 lapack_int ldab, double* r, double* c,
+                                 double* rowcnd, double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgbrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const float* ab, lapack_int ldab,
+                                const float* afb, lapack_int ldafb,
+                                const lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgbrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const double* ab, lapack_int ldab,
+                                const double* afb, lapack_int ldafb,
+                                const lapack_int* ipiv, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cgbrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                const lapack_complex_float* afb,
+                                lapack_int ldafb, const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgbrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab,
+                                const lapack_complex_double* afb,
+                                lapack_int ldafb, const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgbrfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, const float* ab,
+                                 lapack_int ldab, const float* afb,
+                                 lapack_int ldafb, const lapack_int* ipiv,
+                                 const float* r, const float* c, const float* b,
+                                 lapack_int ldb, float* x, lapack_int ldx,
+                                 float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dgbrfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, const double* ab,
+                                 lapack_int ldab, const double* afb,
+                                 lapack_int ldafb, const lapack_int* ipiv,
+                                 const double* r, const double* c,
+                                 const double* b, lapack_int ldb, double* x,
+                                 lapack_int ldx, double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cgbrfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs,
+                                 const lapack_complex_float* ab,
+                                 lapack_int ldab,
+                                 const lapack_complex_float* afb,
+                                 lapack_int ldafb, const lapack_int* ipiv,
+                                 const float* r, const float* c,
+                                 const lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zgbrfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs,
+                                 const lapack_complex_double* ab,
+                                 lapack_int ldab,
+                                 const lapack_complex_double* afb,
+                                 lapack_int ldafb, const lapack_int* ipiv,
+                                 const double* r, const double* c,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_sgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
+                               lapack_int ku, lapack_int nrhs, float* ab,
+                               lapack_int ldab, lapack_int* ipiv, float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_dgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
+                               lapack_int ku, lapack_int nrhs, double* ab,
+                               lapack_int ldab, lapack_int* ipiv, double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_cgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
+                               lapack_int ku, lapack_int nrhs,
+                               lapack_complex_float* ab, lapack_int ldab,
+                               lapack_int* ipiv, lapack_complex_float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_zgbsv_work( int matrix_order, lapack_int n, lapack_int kl,
+                               lapack_int ku, lapack_int nrhs,
+                               lapack_complex_double* ab, lapack_int ldab,
+                               lapack_int* ipiv, lapack_complex_double* b,
+                               lapack_int ldb );
+
+lapack_int LAPACKE_sgbsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int kl, lapack_int ku,
+                                lapack_int nrhs, float* ab, lapack_int ldab,
+                                float* afb, lapack_int ldafb, lapack_int* ipiv,
+                                char* equed, float* r, float* c, float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dgbsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int kl, lapack_int ku,
+                                lapack_int nrhs, double* ab, lapack_int ldab,
+                                double* afb, lapack_int ldafb, lapack_int* ipiv,
+                                char* equed, double* r, double* c, double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cgbsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int kl, lapack_int ku,
+                                lapack_int nrhs, lapack_complex_float* ab,
+                                lapack_int ldab, lapack_complex_float* afb,
+                                lapack_int ldafb, lapack_int* ipiv, char* equed,
+                                float* r, float* c, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zgbsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int kl, lapack_int ku,
+                                lapack_int nrhs, lapack_complex_double* ab,
+                                lapack_int ldab, lapack_complex_double* afb,
+                                lapack_int ldafb, lapack_int* ipiv, char* equed,
+                                double* r, double* c, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_sgbsvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, float* ab, lapack_int ldab,
+                                 float* afb, lapack_int ldafb, lapack_int* ipiv,
+                                 char* equed, float* r, float* c, float* b,
+                                 lapack_int ldb, float* x, lapack_int ldx,
+                                 float* rcond, float* rpvgrw, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dgbsvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, double* ab, lapack_int ldab,
+                                 double* afb, lapack_int ldafb,
+                                 lapack_int* ipiv, char* equed, double* r,
+                                 double* c, double* b, lapack_int ldb,
+                                 double* x, lapack_int ldx, double* rcond,
+                                 double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cgbsvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, lapack_complex_float* ab,
+                                 lapack_int ldab, lapack_complex_float* afb,
+                                 lapack_int ldafb, lapack_int* ipiv,
+                                 char* equed, float* r, float* c,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* rpvgrw, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zgbsvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int kl, lapack_int ku,
+                                 lapack_int nrhs, lapack_complex_double* ab,
+                                 lapack_int ldab, lapack_complex_double* afb,
+                                 lapack_int ldafb, lapack_int* ipiv,
+                                 char* equed, double* r, double* c,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_sgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, float* ab,
+                                lapack_int ldab, lapack_int* ipiv );
+lapack_int LAPACKE_dgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, double* ab,
+                                lapack_int ldab, lapack_int* ipiv );
+lapack_int LAPACKE_cgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                lapack_int* ipiv );
+lapack_int LAPACKE_zgbtrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                lapack_int* ipiv );
+
+lapack_int LAPACKE_sgbtrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const float* ab, lapack_int ldab,
+                                const lapack_int* ipiv, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dgbtrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const double* ab, lapack_int ldab,
+                                const lapack_int* ipiv, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_cgbtrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                const lapack_int* ipiv, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zgbtrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int kl, lapack_int ku, lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sgebak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const float* scale, lapack_int m, float* v,
+                                lapack_int ldv );
+lapack_int LAPACKE_dgebak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const double* scale, lapack_int m, double* v,
+                                lapack_int ldv );
+lapack_int LAPACKE_cgebak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const float* scale, lapack_int m,
+                                lapack_complex_float* v, lapack_int ldv );
+lapack_int LAPACKE_zgebak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const double* scale, lapack_int m,
+                                lapack_complex_double* v, lapack_int ldv );
+
+lapack_int LAPACKE_sgebal_work( int matrix_order, char job, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* ilo,
+                                lapack_int* ihi, float* scale );
+lapack_int LAPACKE_dgebal_work( int matrix_order, char job, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* ilo,
+                                lapack_int* ihi, double* scale );
+lapack_int LAPACKE_cgebal_work( int matrix_order, char job, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* ilo, lapack_int* ihi,
+                                float* scale );
+lapack_int LAPACKE_zgebal_work( int matrix_order, char job, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ilo, lapack_int* ihi,
+                                double* scale );
+
+lapack_int LAPACKE_sgebrd_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* d, float* e,
+                                float* tauq, float* taup, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dgebrd_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* d, double* e,
+                                double* tauq, double* taup, double* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_cgebrd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                float* d, float* e, lapack_complex_float* tauq,
+                                lapack_complex_float* taup,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgebrd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                double* d, double* e,
+                                lapack_complex_double* tauq,
+                                lapack_complex_double* taup,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgecon_work( int matrix_order, char norm, lapack_int n,
+                                const float* a, lapack_int lda, float anorm,
+                                float* rcond, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dgecon_work( int matrix_order, char norm, lapack_int n,
+                                const double* a, lapack_int lda, double anorm,
+                                double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cgecon_work( int matrix_order, char norm, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float anorm, float* rcond,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgecon_work( int matrix_order, char norm, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double anorm, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgeequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                const float* a, lapack_int lda, float* r,
+                                float* c, float* rowcnd, float* colcnd,
+                                float* amax );
+lapack_int LAPACKE_dgeequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                const double* a, lapack_int lda, double* r,
+                                double* c, double* rowcnd, double* colcnd,
+                                double* amax );
+lapack_int LAPACKE_cgeequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float* r, float* c, float* rowcnd,
+                                float* colcnd, float* amax );
+lapack_int LAPACKE_zgeequ_work( int matrix_order, lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double* r, double* c, double* rowcnd,
+                                double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgeequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 const float* a, lapack_int lda, float* r,
+                                 float* c, float* rowcnd, float* colcnd,
+                                 float* amax );
+lapack_int LAPACKE_dgeequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 const double* a, lapack_int lda, double* r,
+                                 double* c, double* rowcnd, double* colcnd,
+                                 double* amax );
+lapack_int LAPACKE_cgeequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 float* r, float* c, float* rowcnd,
+                                 float* colcnd, float* amax );
+lapack_int LAPACKE_zgeequb_work( int matrix_order, lapack_int m, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 double* r, double* c, double* rowcnd,
+                                 double* colcnd, double* amax );
+
+lapack_int LAPACKE_sgees_work( int matrix_order, char jobvs, char sort,
+                               LAPACK_S_SELECT2 select, lapack_int n, float* a,
+                               lapack_int lda, lapack_int* sdim, float* wr,
+                               float* wi, float* vs, lapack_int ldvs,
+                               float* work, lapack_int lwork,
+                               lapack_logical* bwork );
+lapack_int LAPACKE_dgees_work( int matrix_order, char jobvs, char sort,
+                               LAPACK_D_SELECT2 select, lapack_int n, double* a,
+                               lapack_int lda, lapack_int* sdim, double* wr,
+                               double* wi, double* vs, lapack_int ldvs,
+                               double* work, lapack_int lwork,
+                               lapack_logical* bwork );
+lapack_int LAPACKE_cgees_work( int matrix_order, char jobvs, char sort,
+                               LAPACK_C_SELECT1 select, lapack_int n,
+                               lapack_complex_float* a, lapack_int lda,
+                               lapack_int* sdim, lapack_complex_float* w,
+                               lapack_complex_float* vs, lapack_int ldvs,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork, lapack_logical* bwork );
+lapack_int LAPACKE_zgees_work( int matrix_order, char jobvs, char sort,
+                               LAPACK_Z_SELECT1 select, lapack_int n,
+                               lapack_complex_double* a, lapack_int lda,
+                               lapack_int* sdim, lapack_complex_double* w,
+                               lapack_complex_double* vs, lapack_int ldvs,
+                               lapack_complex_double* work, lapack_int lwork,
+                               double* rwork, lapack_logical* bwork );
+
+lapack_int LAPACKE_sgeesx_work( int matrix_order, char jobvs, char sort,
+                                LAPACK_S_SELECT2 select, char sense,
+                                lapack_int n, float* a, lapack_int lda,
+                                lapack_int* sdim, float* wr, float* wi,
+                                float* vs, lapack_int ldvs, float* rconde,
+                                float* rcondv, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork,
+                                lapack_logical* bwork );
+lapack_int LAPACKE_dgeesx_work( int matrix_order, char jobvs, char sort,
+                                LAPACK_D_SELECT2 select, char sense,
+                                lapack_int n, double* a, lapack_int lda,
+                                lapack_int* sdim, double* wr, double* wi,
+                                double* vs, lapack_int ldvs, double* rconde,
+                                double* rcondv, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork,
+                                lapack_logical* bwork );
+lapack_int LAPACKE_cgeesx_work( int matrix_order, char jobvs, char sort,
+                                LAPACK_C_SELECT1 select, char sense,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, lapack_int* sdim,
+                                lapack_complex_float* w,
+                                lapack_complex_float* vs, lapack_int ldvs,
+                                float* rconde, float* rcondv,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_logical* bwork );
+lapack_int LAPACKE_zgeesx_work( int matrix_order, char jobvs, char sort,
+                                LAPACK_Z_SELECT1 select, char sense,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, lapack_int* sdim,
+                                lapack_complex_double* w,
+                                lapack_complex_double* vs, lapack_int ldvs,
+                                double* rconde, double* rcondv,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_logical* bwork );
+
+lapack_int LAPACKE_sgeev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, float* a, lapack_int lda,
+                               float* wr, float* wi, float* vl, lapack_int ldvl,
+                               float* vr, lapack_int ldvr, float* work,
+                               lapack_int lwork );
+lapack_int LAPACKE_dgeev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, double* a, lapack_int lda,
+                               double* wr, double* wi, double* vl,
+                               lapack_int ldvl, double* vr, lapack_int ldvr,
+                               double* work, lapack_int lwork );
+lapack_int LAPACKE_cgeev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, lapack_complex_float* a,
+                               lapack_int lda, lapack_complex_float* w,
+                               lapack_complex_float* vl, lapack_int ldvl,
+                               lapack_complex_float* vr, lapack_int ldvr,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork );
+lapack_int LAPACKE_zgeev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, lapack_complex_double* a,
+                               lapack_int lda, lapack_complex_double* w,
+                               lapack_complex_double* vl, lapack_int ldvl,
+                               lapack_complex_double* vr, lapack_int ldvr,
+                               lapack_complex_double* work, lapack_int lwork,
+                               double* rwork );
+
+lapack_int LAPACKE_sgeevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n, float* a,
+                                lapack_int lda, float* wr, float* wi, float* vl,
+                                lapack_int ldvl, float* vr, lapack_int ldvr,
+                                lapack_int* ilo, lapack_int* ihi, float* scale,
+                                float* abnrm, float* rconde, float* rcondv,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgeevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n, double* a,
+                                lapack_int lda, double* wr, double* wi,
+                                double* vl, lapack_int ldvl, double* vr,
+                                lapack_int ldvr, lapack_int* ilo,
+                                lapack_int* ihi, double* scale, double* abnrm,
+                                double* rconde, double* rcondv, double* work,
+                                lapack_int lwork, lapack_int* iwork );
+lapack_int LAPACKE_cgeevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* w,
+                                lapack_complex_float* vl, lapack_int ldvl,
+                                lapack_complex_float* vr, lapack_int ldvr,
+                                lapack_int* ilo, lapack_int* ihi, float* scale,
+                                float* abnrm, float* rconde, float* rcondv,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork );
+lapack_int LAPACKE_zgeevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* w,
+                                lapack_complex_double* vl, lapack_int ldvl,
+                                lapack_complex_double* vr, lapack_int ldvr,
+                                lapack_int* ilo, lapack_int* ihi, double* scale,
+                                double* abnrm, double* rconde, double* rcondv,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork );
+
+lapack_int LAPACKE_sgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, float* a, lapack_int lda,
+                                float* tau, float* work, lapack_int lwork );
+lapack_int LAPACKE_dgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, double* a, lapack_int lda,
+                                double* tau, double* work, lapack_int lwork );
+lapack_int LAPACKE_cgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgehrd_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgejsv_work( int matrix_order, char joba, char jobu,
+                                char jobv, char jobr, char jobt, char jobp,
+                                lapack_int m, lapack_int n, float* a,
+                                lapack_int lda, float* sva, float* u,
+                                lapack_int ldu, float* v, lapack_int ldv,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgejsv_work( int matrix_order, char joba, char jobu,
+                                char jobv, char jobr, char jobt, char jobp,
+                                lapack_int m, lapack_int n, double* a,
+                                lapack_int lda, double* sva, double* u,
+                                lapack_int ldu, double* v, lapack_int ldv,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_sgelq2_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work );
+lapack_int LAPACKE_dgelq2_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work );
+lapack_int LAPACKE_cgelq2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zgelq2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_sgelqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgelqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgelqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgelqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgels_work( int matrix_order, char trans, lapack_int m,
+                               lapack_int n, lapack_int nrhs, float* a,
+                               lapack_int lda, float* b, lapack_int ldb,
+                               float* work, lapack_int lwork );
+lapack_int LAPACKE_dgels_work( int matrix_order, char trans, lapack_int m,
+                               lapack_int n, lapack_int nrhs, double* a,
+                               lapack_int lda, double* b, lapack_int ldb,
+                               double* work, lapack_int lwork );
+lapack_int LAPACKE_cgels_work( int matrix_order, char trans, lapack_int m,
+                               lapack_int n, lapack_int nrhs,
+                               lapack_complex_float* a, lapack_int lda,
+                               lapack_complex_float* b, lapack_int ldb,
+                               lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgels_work( int matrix_order, char trans, lapack_int m,
+                               lapack_int n, lapack_int nrhs,
+                               lapack_complex_double* a, lapack_int lda,
+                               lapack_complex_double* b, lapack_int ldb,
+                               lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgelsd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, float* s, float rcond,
+                                lapack_int* rank, float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double* s,
+                                double rcond, lapack_int* rank, double* work,
+                                lapack_int lwork, lapack_int* iwork );
+lapack_int LAPACKE_cgelsd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, float* s, float rcond,
+                                lapack_int* rank, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_zgelsd_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, double* s, double rcond,
+                                lapack_int* rank, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_sgelss_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, float* s, float rcond,
+                                lapack_int* rank, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dgelss_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double* s,
+                                double rcond, lapack_int* rank, double* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_cgelss_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, float* s, float rcond,
+                                lapack_int* rank, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork );
+lapack_int LAPACKE_zgelss_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, double* s, double rcond,
+                                lapack_int* rank, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork );
+
+lapack_int LAPACKE_sgelsy_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, lapack_int* jpvt,
+                                float rcond, lapack_int* rank, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dgelsy_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, lapack_int* jpvt,
+                                double rcond, lapack_int* rank, double* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_cgelsy_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, lapack_int* jpvt, float rcond,
+                                lapack_int* rank, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork );
+lapack_int LAPACKE_zgelsy_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nrhs, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, lapack_int* jpvt, double rcond,
+                                lapack_int* rank, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork );
+
+lapack_int LAPACKE_sgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgeqlf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* jpvt,
+                                float* tau, float* work, lapack_int lwork );
+lapack_int LAPACKE_dgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* jpvt,
+                                double* tau, double* work, lapack_int lwork );
+lapack_int LAPACKE_cgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* jpvt, lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork );
+lapack_int LAPACKE_zgeqp3_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* jpvt, lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork );
+
+lapack_int LAPACKE_sgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* jpvt,
+                                float* tau, float* work );
+lapack_int LAPACKE_dgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* jpvt,
+                                double* tau, double* work );
+lapack_int LAPACKE_cgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* jpvt, lapack_complex_float* tau,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgeqpf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* jpvt, lapack_complex_double* tau,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work );
+lapack_int LAPACKE_dgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work );
+lapack_int LAPACKE_cgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zgeqr2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_sgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgeqrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
+                                 float* a, lapack_int lda, float* tau,
+                                 float* work, lapack_int lwork );
+lapack_int LAPACKE_dgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
+                                 double* a, lapack_int lda, double* tau,
+                                 double* work, lapack_int lwork );
+lapack_int LAPACKE_cgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* tau,
+                                 lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgeqrfp_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* tau,
+                                 lapack_complex_double* work,
+                                 lapack_int lwork );
+
+lapack_int LAPACKE_sgerfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                const float* af, lapack_int ldaf,
+                                const lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgerfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, const double* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cgerfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgerfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_complex_double* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgerfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int nrhs, const float* a,
+                                 lapack_int lda, const float* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const float* r, const float* c, const float* b,
+                                 lapack_int ldb, float* x, lapack_int ldx,
+                                 float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dgerfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int nrhs, const double* a,
+                                 lapack_int lda, const double* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const double* r, const double* c,
+                                 const double* b, lapack_int ldb, double* x,
+                                 lapack_int ldx, double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cgerfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const float* r, const float* c,
+                                 const lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zgerfsx_work( int matrix_order, char trans, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const double* r, const double* c,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_sgerqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgerqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgerqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgerqf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgesdd_work( int matrix_order, char jobz, lapack_int m,
+                                lapack_int n, float* a, lapack_int lda,
+                                float* s, float* u, lapack_int ldu, float* vt,
+                                lapack_int ldvt, float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgesdd_work( int matrix_order, char jobz, lapack_int m,
+                                lapack_int n, double* a, lapack_int lda,
+                                double* s, double* u, lapack_int ldu,
+                                double* vt, lapack_int ldvt, double* work,
+                                lapack_int lwork, lapack_int* iwork );
+lapack_int LAPACKE_cgesdd_work( int matrix_order, char jobz, lapack_int m,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, float* s,
+                                lapack_complex_float* u, lapack_int ldu,
+                                lapack_complex_float* vt, lapack_int ldvt,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int* iwork );
+lapack_int LAPACKE_zgesdd_work( int matrix_order, char jobz, lapack_int m,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, double* s,
+                                lapack_complex_double* u, lapack_int ldu,
+                                lapack_complex_double* vt, lapack_int ldvt,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int* iwork );
+
+lapack_int LAPACKE_sgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               float* a, lapack_int lda, lapack_int* ipiv,
+                               float* b, lapack_int ldb );
+lapack_int LAPACKE_dgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               double* a, lapack_int lda, lapack_int* ipiv,
+                               double* b, lapack_int ldb );
+lapack_int LAPACKE_cgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               lapack_complex_float* a, lapack_int lda,
+                               lapack_int* ipiv, lapack_complex_float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_zgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               lapack_complex_double* a, lapack_int lda,
+                               lapack_int* ipiv, lapack_complex_double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_dsgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                double* a, lapack_int lda, lapack_int* ipiv,
+                                double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* work, float* swork,
+                                lapack_int* iter );
+lapack_int LAPACKE_zcgesv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ipiv, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* work,
+                                lapack_complex_float* swork, double* rwork,
+                                lapack_int* iter );
+
+lapack_int LAPACKE_sgesvd_work( int matrix_order, char jobu, char jobvt,
+                                lapack_int m, lapack_int n, float* a,
+                                lapack_int lda, float* s, float* u,
+                                lapack_int ldu, float* vt, lapack_int ldvt,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgesvd_work( int matrix_order, char jobu, char jobvt,
+                                lapack_int m, lapack_int n, double* a,
+                                lapack_int lda, double* s, double* u,
+                                lapack_int ldu, double* vt, lapack_int ldvt,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgesvd_work( int matrix_order, char jobu, char jobvt,
+                                lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                float* s, lapack_complex_float* u,
+                                lapack_int ldu, lapack_complex_float* vt,
+                                lapack_int ldvt, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork );
+lapack_int LAPACKE_zgesvd_work( int matrix_order, char jobu, char jobvt,
+                                lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                double* s, lapack_complex_double* u,
+                                lapack_int ldu, lapack_complex_double* vt,
+                                lapack_int ldvt, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork );
+
+lapack_int LAPACKE_sgesvj_work( int matrix_order, char joba, char jobu,
+                                char jobv, lapack_int m, lapack_int n, float* a,
+                                lapack_int lda, float* sva, lapack_int mv,
+                                float* v, lapack_int ldv, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dgesvj_work( int matrix_order, char joba, char jobu,
+                                char jobv, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* sva,
+                                lapack_int mv, double* v, lapack_int ldv,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgesvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs, float* a,
+                                lapack_int lda, float* af, lapack_int ldaf,
+                                lapack_int* ipiv, char* equed, float* r,
+                                float* c, float* b, lapack_int ldb, float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dgesvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs, double* a,
+                                lapack_int lda, double* af, lapack_int ldaf,
+                                lapack_int* ipiv, char* equed, double* r,
+                                double* c, double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, double* work, lapack_int* iwork );
+lapack_int LAPACKE_cgesvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* af, lapack_int ldaf,
+                                lapack_int* ipiv, char* equed, float* r,
+                                float* c, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zgesvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* af, lapack_int ldaf,
+                                lapack_int* ipiv, char* equed, double* r,
+                                double* c, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_sgesvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int nrhs, float* a,
+                                 lapack_int lda, float* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, float* r,
+                                 float* c, float* b, lapack_int ldb, float* x,
+                                 lapack_int ldx, float* rcond, float* rpvgrw,
+                                 float* berr, lapack_int n_err_bnds,
+                                 float* err_bnds_norm, float* err_bnds_comp,
+                                 lapack_int nparams, float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dgesvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int nrhs, double* a,
+                                 lapack_int lda, double* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, double* r,
+                                 double* c, double* b, lapack_int ldb,
+                                 double* x, lapack_int ldx, double* rcond,
+                                 double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cgesvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, float* r,
+                                 float* c, lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* x,
+                                 lapack_int ldx, float* rcond, float* rpvgrw,
+                                 float* berr, lapack_int n_err_bnds,
+                                 float* err_bnds_norm, float* err_bnds_comp,
+                                 lapack_int nparams, float* params,
+                                 lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgesvxx_work( int matrix_order, char fact, char trans,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, double* r,
+                                 double* c, lapack_complex_double* b,
+                                 lapack_int ldb, lapack_complex_double* x,
+                                 lapack_int ldx, double* rcond, double* rpvgrw,
+                                 double* berr, lapack_int n_err_bnds,
+                                 double* err_bnds_norm, double* err_bnds_comp,
+                                 lapack_int nparams, double* params,
+                                 lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgetf2_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_dgetf2_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_cgetf2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* ipiv );
+lapack_int LAPACKE_zgetf2_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ipiv );
+
+lapack_int LAPACKE_sgetrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_dgetrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* ipiv );
+lapack_int LAPACKE_cgetrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* ipiv );
+lapack_int LAPACKE_zgetrf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ipiv );
+
+lapack_int LAPACKE_sgetri_work( int matrix_order, lapack_int n, float* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dgetri_work( int matrix_order, lapack_int n, double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_cgetri_work( int matrix_order, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgetri_work( int matrix_order, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgetrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                const lapack_int* ipiv, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dgetrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                double* b, lapack_int ldb );
+lapack_int LAPACKE_cgetrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zgetrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sggbak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const float* lscale, const float* rscale,
+                                lapack_int m, float* v, lapack_int ldv );
+lapack_int LAPACKE_dggbak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const double* lscale, const double* rscale,
+                                lapack_int m, double* v, lapack_int ldv );
+lapack_int LAPACKE_cggbak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const float* lscale, const float* rscale,
+                                lapack_int m, lapack_complex_float* v,
+                                lapack_int ldv );
+lapack_int LAPACKE_zggbak_work( int matrix_order, char job, char side,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                const double* lscale, const double* rscale,
+                                lapack_int m, lapack_complex_double* v,
+                                lapack_int ldv );
+
+lapack_int LAPACKE_sggbal_work( int matrix_order, char job, lapack_int n,
+                                float* a, lapack_int lda, float* b,
+                                lapack_int ldb, lapack_int* ilo,
+                                lapack_int* ihi, float* lscale, float* rscale,
+                                float* work );
+lapack_int LAPACKE_dggbal_work( int matrix_order, char job, lapack_int n,
+                                double* a, lapack_int lda, double* b,
+                                lapack_int ldb, lapack_int* ilo,
+                                lapack_int* ihi, double* lscale, double* rscale,
+                                double* work );
+lapack_int LAPACKE_cggbal_work( int matrix_order, char job, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_int* ilo, lapack_int* ihi, float* lscale,
+                                float* rscale, float* work );
+lapack_int LAPACKE_zggbal_work( int matrix_order, char job, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_int* ilo, lapack_int* ihi,
+                                double* lscale, double* rscale, double* work );
+
+lapack_int LAPACKE_sgges_work( int matrix_order, char jobvsl, char jobvsr,
+                               char sort, LAPACK_S_SELECT3 selctg, lapack_int n,
+                               float* a, lapack_int lda, float* b,
+                               lapack_int ldb, lapack_int* sdim, float* alphar,
+                               float* alphai, float* beta, float* vsl,
+                               lapack_int ldvsl, float* vsr, lapack_int ldvsr,
+                               float* work, lapack_int lwork,
+                               lapack_logical* bwork );
+lapack_int LAPACKE_dgges_work( int matrix_order, char jobvsl, char jobvsr,
+                               char sort, LAPACK_D_SELECT3 selctg, lapack_int n,
+                               double* a, lapack_int lda, double* b,
+                               lapack_int ldb, lapack_int* sdim, double* alphar,
+                               double* alphai, double* beta, double* vsl,
+                               lapack_int ldvsl, double* vsr, lapack_int ldvsr,
+                               double* work, lapack_int lwork,
+                               lapack_logical* bwork );
+lapack_int LAPACKE_cgges_work( int matrix_order, char jobvsl, char jobvsr,
+                               char sort, LAPACK_C_SELECT2 selctg, lapack_int n,
+                               lapack_complex_float* a, lapack_int lda,
+                               lapack_complex_float* b, lapack_int ldb,
+                               lapack_int* sdim, lapack_complex_float* alpha,
+                               lapack_complex_float* beta,
+                               lapack_complex_float* vsl, lapack_int ldvsl,
+                               lapack_complex_float* vsr, lapack_int ldvsr,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork, lapack_logical* bwork );
+lapack_int LAPACKE_zgges_work( int matrix_order, char jobvsl, char jobvsr,
+                               char sort, LAPACK_Z_SELECT2 selctg, lapack_int n,
+                               lapack_complex_double* a, lapack_int lda,
+                               lapack_complex_double* b, lapack_int ldb,
+                               lapack_int* sdim, lapack_complex_double* alpha,
+                               lapack_complex_double* beta,
+                               lapack_complex_double* vsl, lapack_int ldvsl,
+                               lapack_complex_double* vsr, lapack_int ldvsr,
+                               lapack_complex_double* work, lapack_int lwork,
+                               double* rwork, lapack_logical* bwork );
+
+lapack_int LAPACKE_sggesx_work( int matrix_order, char jobvsl, char jobvsr,
+                                char sort, LAPACK_S_SELECT3 selctg, char sense,
+                                lapack_int n, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, lapack_int* sdim,
+                                float* alphar, float* alphai, float* beta,
+                                float* vsl, lapack_int ldvsl, float* vsr,
+                                lapack_int ldvsr, float* rconde, float* rcondv,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork,
+                                lapack_logical* bwork );
+lapack_int LAPACKE_dggesx_work( int matrix_order, char jobvsl, char jobvsr,
+                                char sort, LAPACK_D_SELECT3 selctg, char sense,
+                                lapack_int n, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, lapack_int* sdim,
+                                double* alphar, double* alphai, double* beta,
+                                double* vsl, lapack_int ldvsl, double* vsr,
+                                lapack_int ldvsr, double* rconde,
+                                double* rcondv, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork,
+                                lapack_logical* bwork );
+lapack_int LAPACKE_cggesx_work( int matrix_order, char jobvsl, char jobvsr,
+                                char sort, LAPACK_C_SELECT2 selctg, char sense,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, lapack_int* sdim,
+                                lapack_complex_float* alpha,
+                                lapack_complex_float* beta,
+                                lapack_complex_float* vsl, lapack_int ldvsl,
+                                lapack_complex_float* vsr, lapack_int ldvsr,
+                                float* rconde, float* rcondv,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int* iwork,
+                                lapack_int liwork, lapack_logical* bwork );
+lapack_int LAPACKE_zggesx_work( int matrix_order, char jobvsl, char jobvsr,
+                                char sort, LAPACK_Z_SELECT2 selctg, char sense,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, lapack_int* sdim,
+                                lapack_complex_double* alpha,
+                                lapack_complex_double* beta,
+                                lapack_complex_double* vsl, lapack_int ldvsl,
+                                lapack_complex_double* vsr, lapack_int ldvsr,
+                                double* rconde, double* rcondv,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int* iwork,
+                                lapack_int liwork, lapack_logical* bwork );
+
+lapack_int LAPACKE_sggev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, float* a, lapack_int lda, float* b,
+                               lapack_int ldb, float* alphar, float* alphai,
+                               float* beta, float* vl, lapack_int ldvl,
+                               float* vr, lapack_int ldvr, float* work,
+                               lapack_int lwork );
+lapack_int LAPACKE_dggev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, double* a, lapack_int lda,
+                               double* b, lapack_int ldb, double* alphar,
+                               double* alphai, double* beta, double* vl,
+                               lapack_int ldvl, double* vr, lapack_int ldvr,
+                               double* work, lapack_int lwork );
+lapack_int LAPACKE_cggev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, lapack_complex_float* a,
+                               lapack_int lda, lapack_complex_float* b,
+                               lapack_int ldb, lapack_complex_float* alpha,
+                               lapack_complex_float* beta,
+                               lapack_complex_float* vl, lapack_int ldvl,
+                               lapack_complex_float* vr, lapack_int ldvr,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork );
+lapack_int LAPACKE_zggev_work( int matrix_order, char jobvl, char jobvr,
+                               lapack_int n, lapack_complex_double* a,
+                               lapack_int lda, lapack_complex_double* b,
+                               lapack_int ldb, lapack_complex_double* alpha,
+                               lapack_complex_double* beta,
+                               lapack_complex_double* vl, lapack_int ldvl,
+                               lapack_complex_double* vr, lapack_int ldvr,
+                               lapack_complex_double* work, lapack_int lwork,
+                               double* rwork );
+
+lapack_int LAPACKE_sggevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n, float* a,
+                                lapack_int lda, float* b, lapack_int ldb,
+                                float* alphar, float* alphai, float* beta,
+                                float* vl, lapack_int ldvl, float* vr,
+                                lapack_int ldvr, lapack_int* ilo,
+                                lapack_int* ihi, float* lscale, float* rscale,
+                                float* abnrm, float* bbnrm, float* rconde,
+                                float* rcondv, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_logical* bwork );
+lapack_int LAPACKE_dggevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                double* alphar, double* alphai, double* beta,
+                                double* vl, lapack_int ldvl, double* vr,
+                                lapack_int ldvr, lapack_int* ilo,
+                                lapack_int* ihi, double* lscale, double* rscale,
+                                double* abnrm, double* bbnrm, double* rconde,
+                                double* rcondv, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_logical* bwork );
+lapack_int LAPACKE_cggevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* alpha,
+                                lapack_complex_float* beta,
+                                lapack_complex_float* vl, lapack_int ldvl,
+                                lapack_complex_float* vr, lapack_int ldvr,
+                                lapack_int* ilo, lapack_int* ihi, float* lscale,
+                                float* rscale, float* abnrm, float* bbnrm,
+                                float* rconde, float* rcondv,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int* iwork,
+                                lapack_logical* bwork );
+lapack_int LAPACKE_zggevx_work( int matrix_order, char balanc, char jobvl,
+                                char jobvr, char sense, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* alpha,
+                                lapack_complex_double* beta,
+                                lapack_complex_double* vl, lapack_int ldvl,
+                                lapack_complex_double* vr, lapack_int ldvr,
+                                lapack_int* ilo, lapack_int* ihi,
+                                double* lscale, double* rscale, double* abnrm,
+                                double* bbnrm, double* rconde, double* rcondv,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int* iwork,
+                                lapack_logical* bwork );
+
+lapack_int LAPACKE_sggglm_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, float* d, float* x,
+                                float* y, float* work, lapack_int lwork );
+lapack_int LAPACKE_dggglm_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double* d, double* x,
+                                double* y, double* work, lapack_int lwork );
+lapack_int LAPACKE_cggglm_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* d,
+                                lapack_complex_float* x,
+                                lapack_complex_float* y,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zggglm_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* d,
+                                lapack_complex_double* x,
+                                lapack_complex_double* y,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sgghrd_work( int matrix_order, char compq, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                float* a, lapack_int lda, float* b,
+                                lapack_int ldb, float* q, lapack_int ldq,
+                                float* z, lapack_int ldz );
+lapack_int LAPACKE_dgghrd_work( int matrix_order, char compq, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                double* a, lapack_int lda, double* b,
+                                lapack_int ldb, double* q, lapack_int ldq,
+                                double* z, lapack_int ldz );
+lapack_int LAPACKE_cgghrd_work( int matrix_order, char compq, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* z, lapack_int ldz );
+lapack_int LAPACKE_zgghrd_work( int matrix_order, char compq, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* z, lapack_int ldz );
+
+lapack_int LAPACKE_sgglse_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int p, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, float* c, float* d,
+                                float* x, float* work, lapack_int lwork );
+lapack_int LAPACKE_dgglse_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int p, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double* c, double* d,
+                                double* x, double* work, lapack_int lwork );
+lapack_int LAPACKE_cgglse_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* c,
+                                lapack_complex_float* d,
+                                lapack_complex_float* x,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zgglse_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* c,
+                                lapack_complex_double* d,
+                                lapack_complex_double* x,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sggqrf_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, float* a, lapack_int lda,
+                                float* taua, float* b, lapack_int ldb,
+                                float* taub, float* work, lapack_int lwork );
+lapack_int LAPACKE_dggqrf_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, double* a, lapack_int lda,
+                                double* taua, double* b, lapack_int ldb,
+                                double* taub, double* work, lapack_int lwork );
+lapack_int LAPACKE_cggqrf_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* taua,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* taub,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zggqrf_work( int matrix_order, lapack_int n, lapack_int m,
+                                lapack_int p, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* taua,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* taub,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sggrqf_work( int matrix_order, lapack_int m, lapack_int p,
+                                lapack_int n, float* a, lapack_int lda,
+                                float* taua, float* b, lapack_int ldb,
+                                float* taub, float* work, lapack_int lwork );
+lapack_int LAPACKE_dggrqf_work( int matrix_order, lapack_int m, lapack_int p,
+                                lapack_int n, double* a, lapack_int lda,
+                                double* taua, double* b, lapack_int ldb,
+                                double* taub, double* work, lapack_int lwork );
+lapack_int LAPACKE_cggrqf_work( int matrix_order, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* taua,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* taub,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zggrqf_work( int matrix_order, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* taua,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* taub,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sggsvd_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_int* k, lapack_int* l,
+                                float* a, lapack_int lda, float* b,
+                                lapack_int ldb, float* alpha, float* beta,
+                                float* u, lapack_int ldu, float* v,
+                                lapack_int ldv, float* q, lapack_int ldq,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dggsvd_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_int* k, lapack_int* l,
+                                double* a, lapack_int lda, double* b,
+                                lapack_int ldb, double* alpha, double* beta,
+                                double* u, lapack_int ldu, double* v,
+                                lapack_int ldv, double* q, lapack_int ldq,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cggsvd_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_int* k, lapack_int* l,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                float* alpha, float* beta,
+                                lapack_complex_float* u, lapack_int ldu,
+                                lapack_complex_float* v, lapack_int ldv,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* work, float* rwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_zggsvd_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int n,
+                                lapack_int p, lapack_int* k, lapack_int* l,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                double* alpha, double* beta,
+                                lapack_complex_double* u, lapack_int ldu,
+                                lapack_complex_double* v, lapack_int ldv,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* work, double* rwork,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_sggsvp_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, float tola,
+                                float tolb, lapack_int* k, lapack_int* l,
+                                float* u, lapack_int ldu, float* v,
+                                lapack_int ldv, float* q, lapack_int ldq,
+                                lapack_int* iwork, float* tau, float* work );
+lapack_int LAPACKE_dggsvp_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double tola,
+                                double tolb, lapack_int* k, lapack_int* l,
+                                double* u, lapack_int ldu, double* v,
+                                lapack_int ldv, double* q, lapack_int ldq,
+                                lapack_int* iwork, double* tau, double* work );
+lapack_int LAPACKE_cggsvp_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb, float tola, float tolb,
+                                lapack_int* k, lapack_int* l,
+                                lapack_complex_float* u, lapack_int ldu,
+                                lapack_complex_float* v, lapack_int ldv,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_int* iwork, float* rwork,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zggsvp_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, double tola, double tolb,
+                                lapack_int* k, lapack_int* l,
+                                lapack_complex_double* u, lapack_int ldu,
+                                lapack_complex_double* v, lapack_int ldv,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_int* iwork, double* rwork,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_sgtcon_work( char norm, lapack_int n, const float* dl,
+                                const float* d, const float* du,
+                                const float* du2, const lapack_int* ipiv,
+                                float anorm, float* rcond, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgtcon_work( char norm, lapack_int n, const double* dl,
+                                const double* d, const double* du,
+                                const double* du2, const lapack_int* ipiv,
+                                double anorm, double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cgtcon_work( char norm, lapack_int n,
+                                const lapack_complex_float* dl,
+                                const lapack_complex_float* d,
+                                const lapack_complex_float* du,
+                                const lapack_complex_float* du2,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work );
+lapack_int LAPACKE_zgtcon_work( char norm, lapack_int n,
+                                const lapack_complex_double* dl,
+                                const lapack_complex_double* d,
+                                const lapack_complex_double* du,
+                                const lapack_complex_double* du2,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, lapack_complex_double* work );
+
+lapack_int LAPACKE_sgtrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const float* dl,
+                                const float* d, const float* du,
+                                const float* dlf, const float* df,
+                                const float* duf, const float* du2,
+                                const lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dgtrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const double* dl,
+                                const double* d, const double* du,
+                                const double* dlf, const double* df,
+                                const double* duf, const double* du2,
+                                const lapack_int* ipiv, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cgtrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* dl,
+                                const lapack_complex_float* d,
+                                const lapack_complex_float* du,
+                                const lapack_complex_float* dlf,
+                                const lapack_complex_float* df,
+                                const lapack_complex_float* duf,
+                                const lapack_complex_float* du2,
+                                const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgtrfs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* dl,
+                                const lapack_complex_double* d,
+                                const lapack_complex_double* du,
+                                const lapack_complex_double* dlf,
+                                const lapack_complex_double* df,
+                                const lapack_complex_double* duf,
+                                const lapack_complex_double* du2,
+                                const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               float* dl, float* d, float* du, float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_dgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               double* dl, double* d, double* du, double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_cgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               lapack_complex_float* dl,
+                               lapack_complex_float* d,
+                               lapack_complex_float* du,
+                               lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zgtsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               lapack_complex_double* dl,
+                               lapack_complex_double* d,
+                               lapack_complex_double* du,
+                               lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sgtsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs, const float* dl,
+                                const float* d, const float* du, float* dlf,
+                                float* df, float* duf, float* du2,
+                                lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dgtsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs, const double* dl,
+                                const double* d, const double* du, double* dlf,
+                                double* df, double* duf, double* du2,
+                                lapack_int* ipiv, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cgtsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* dl,
+                                const lapack_complex_float* d,
+                                const lapack_complex_float* du,
+                                lapack_complex_float* dlf,
+                                lapack_complex_float* df,
+                                lapack_complex_float* duf,
+                                lapack_complex_float* du2, lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zgtsvx_work( int matrix_order, char fact, char trans,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* dl,
+                                const lapack_complex_double* d,
+                                const lapack_complex_double* du,
+                                lapack_complex_double* dlf,
+                                lapack_complex_double* df,
+                                lapack_complex_double* duf,
+                                lapack_complex_double* du2, lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sgttrf_work( lapack_int n, float* dl, float* d, float* du,
+                                float* du2, lapack_int* ipiv );
+lapack_int LAPACKE_dgttrf_work( lapack_int n, double* dl, double* d, double* du,
+                                double* du2, lapack_int* ipiv );
+lapack_int LAPACKE_cgttrf_work( lapack_int n, lapack_complex_float* dl,
+                                lapack_complex_float* d,
+                                lapack_complex_float* du,
+                                lapack_complex_float* du2, lapack_int* ipiv );
+lapack_int LAPACKE_zgttrf_work( lapack_int n, lapack_complex_double* dl,
+                                lapack_complex_double* d,
+                                lapack_complex_double* du,
+                                lapack_complex_double* du2, lapack_int* ipiv );
+
+lapack_int LAPACKE_sgttrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const float* dl,
+                                const float* d, const float* du,
+                                const float* du2, const lapack_int* ipiv,
+                                float* b, lapack_int ldb );
+lapack_int LAPACKE_dgttrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const double* dl,
+                                const double* d, const double* du,
+                                const double* du2, const lapack_int* ipiv,
+                                double* b, lapack_int ldb );
+lapack_int LAPACKE_cgttrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* dl,
+                                const lapack_complex_float* d,
+                                const lapack_complex_float* du,
+                                const lapack_complex_float* du2,
+                                const lapack_int* ipiv, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zgttrs_work( int matrix_order, char trans, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* dl,
+                                const lapack_complex_double* d,
+                                const lapack_complex_double* du,
+                                const lapack_complex_double* du2,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_chbev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int kd,
+                               lapack_complex_float* ab, lapack_int ldab,
+                               float* w, lapack_complex_float* z,
+                               lapack_int ldz, lapack_complex_float* work,
+                               float* rwork );
+lapack_int LAPACKE_zhbev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int kd,
+                               lapack_complex_double* ab, lapack_int ldab,
+                               double* w, lapack_complex_double* z,
+                               lapack_int ldz, lapack_complex_double* work,
+                               double* rwork );
+
+lapack_int LAPACKE_chbevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int kd,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zhbevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int kd,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_chbevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int kd,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                lapack_complex_float* q, lapack_int ldq,
+                                float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                float* rwork, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_zhbevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int kd,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                lapack_complex_double* q, lapack_int ldq,
+                                double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                double* rwork, lapack_int* iwork,
+                                lapack_int* ifail );
+
+lapack_int LAPACKE_chbgst_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                const lapack_complex_float* bb, lapack_int ldbb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zhbgst_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                const lapack_complex_double* bb,
+                                lapack_int ldbb, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_chbgv_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int ka, lapack_int kb,
+                               lapack_complex_float* ab, lapack_int ldab,
+                               lapack_complex_float* bb, lapack_int ldbb,
+                               float* w, lapack_complex_float* z,
+                               lapack_int ldz, lapack_complex_float* work,
+                               float* rwork );
+lapack_int LAPACKE_zhbgv_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int ka, lapack_int kb,
+                               lapack_complex_double* ab, lapack_int ldab,
+                               lapack_complex_double* bb, lapack_int ldbb,
+                               double* w, lapack_complex_double* z,
+                               lapack_int ldz, lapack_complex_double* work,
+                               double* rwork );
+
+lapack_int LAPACKE_chbgvd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                lapack_complex_float* bb, lapack_int ldbb,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zhbgvd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                lapack_complex_double* bb, lapack_int ldbb,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_chbgvx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int ka,
+                                lapack_int kb, lapack_complex_float* ab,
+                                lapack_int ldab, lapack_complex_float* bb,
+                                lapack_int ldbb, lapack_complex_float* q,
+                                lapack_int ldq, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, float* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_zhbgvx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int ka,
+                                lapack_int kb, lapack_complex_double* ab,
+                                lapack_int ldab, lapack_complex_double* bb,
+                                lapack_int ldbb, lapack_complex_double* q,
+                                lapack_int ldq, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, double* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_chbtrd_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int kd,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                float* d, float* e, lapack_complex_float* q,
+                                lapack_int ldq, lapack_complex_float* work );
+lapack_int LAPACKE_zhbtrd_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int kd,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                double* d, double* e, lapack_complex_double* q,
+                                lapack_int ldq, lapack_complex_double* work );
+
+lapack_int LAPACKE_checon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work );
+lapack_int LAPACKE_zhecon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, lapack_complex_double* work );
+
+lapack_int LAPACKE_cheequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 float* s, float* scond, float* amax,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_zheequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 double* s, double* scond, double* amax,
+                                 lapack_complex_double* work );
+
+lapack_int LAPACKE_cheev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_complex_float* a,
+                               lapack_int lda, float* w,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork );
+lapack_int LAPACKE_zheev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_complex_double* a,
+                               lapack_int lda, double* w,
+                               lapack_complex_double* work, lapack_int lwork,
+                               double* rwork );
+
+lapack_int LAPACKE_cheevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, float* w,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_zheevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, double* w,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_cheevr_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_int* isuppz,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_zheevr_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_int* isuppz,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_cheevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_zheevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_chegst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zhegst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda, const lapack_complex_double* b,
+                                lapack_int ldb );
+
+lapack_int LAPACKE_chegv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n, lapack_complex_float* a,
+                               lapack_int lda, lapack_complex_float* b,
+                               lapack_int ldb, float* w,
+                               lapack_complex_float* work, lapack_int lwork,
+                               float* rwork );
+lapack_int LAPACKE_zhegv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n,
+                               lapack_complex_double* a, lapack_int lda,
+                               lapack_complex_double* b, lapack_int ldb,
+                               double* w, lapack_complex_double* work,
+                               lapack_int lwork, double* rwork );
+
+lapack_int LAPACKE_chegvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                float* w, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zhegvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                double* w, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_chegvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_zhegvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_cherfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zherfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_complex_double* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_cherfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const float* s, const lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* x,
+                                 lapack_int ldx, float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zherfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const double* s,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_chesv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* a,
+                               lapack_int lda, lapack_int* ipiv,
+                               lapack_complex_float* b, lapack_int ldb,
+                               lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zhesv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* a,
+                               lapack_int lda, lapack_int* ipiv,
+                               lapack_complex_double* b, lapack_int ldb,
+                               lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_chesvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* af, lapack_int ldaf,
+                                lapack_int* ipiv, const lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork );
+lapack_int LAPACKE_zhesvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* af, lapack_int ldaf,
+                                lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork );
+
+lapack_int LAPACKE_chesvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, float* s,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* rpvgrw, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zhesvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, double* s,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_chetrd_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                float* d, float* e, lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zhetrd_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                double* d, double* e,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_chetrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* ipiv, lapack_complex_float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_zhetrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ipiv, lapack_complex_double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_chetri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zhetri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_chetrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zhetrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_chfrk_work( int matrix_order, char transr, char uplo,
+                               char trans, lapack_int n, lapack_int k,
+                               float alpha, const lapack_complex_float* a,
+                               lapack_int lda, float beta,
+                               lapack_complex_float* c );
+lapack_int LAPACKE_zhfrk_work( int matrix_order, char transr, char uplo,
+                               char trans, lapack_int n, lapack_int k,
+                               double alpha, const lapack_complex_double* a,
+                               lapack_int lda, double beta,
+                               lapack_complex_double* c );
+
+lapack_int LAPACKE_shgeqz_work( int matrix_order, char job, char compq,
+                                char compz, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, float* h, lapack_int ldh,
+                                float* t, lapack_int ldt, float* alphar,
+                                float* alphai, float* beta, float* q,
+                                lapack_int ldq, float* z, lapack_int ldz,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dhgeqz_work( int matrix_order, char job, char compq,
+                                char compz, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, double* h, lapack_int ldh,
+                                double* t, lapack_int ldt, double* alphar,
+                                double* alphai, double* beta, double* q,
+                                lapack_int ldq, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_chgeqz_work( int matrix_order, char job, char compq,
+                                char compz, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_float* h,
+                                lapack_int ldh, lapack_complex_float* t,
+                                lapack_int ldt, lapack_complex_float* alpha,
+                                lapack_complex_float* beta,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork );
+lapack_int LAPACKE_zhgeqz_work( int matrix_order, char job, char compq,
+                                char compz, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_double* h,
+                                lapack_int ldh, lapack_complex_double* t,
+                                lapack_int ldt, lapack_complex_double* alpha,
+                                lapack_complex_double* beta,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork );
+
+lapack_int LAPACKE_chpcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work );
+lapack_int LAPACKE_zhpcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, lapack_complex_double* work );
+
+lapack_int LAPACKE_chpev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_complex_float* ap, float* w,
+                               lapack_complex_float* z, lapack_int ldz,
+                               lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zhpev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_complex_double* ap,
+                               double* w, lapack_complex_double* z,
+                               lapack_int ldz, lapack_complex_double* work,
+                               double* rwork );
+
+lapack_int LAPACKE_chpevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_complex_float* ap,
+                                float* w, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zhpevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_complex_double* ap,
+                                double* w, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_chpevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_float* ap, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, float* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_zhpevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n,
+                                lapack_complex_double* ap, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, double* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_chpgst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, lapack_complex_float* ap,
+                                const lapack_complex_float* bp );
+lapack_int LAPACKE_zhpgst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, lapack_complex_double* ap,
+                                const lapack_complex_double* bp );
+
+lapack_int LAPACKE_chpgv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n,
+                               lapack_complex_float* ap,
+                               lapack_complex_float* bp, float* w,
+                               lapack_complex_float* z, lapack_int ldz,
+                               lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zhpgv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n,
+                               lapack_complex_double* ap,
+                               lapack_complex_double* bp, double* w,
+                               lapack_complex_double* z, lapack_int ldz,
+                               lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_chpgvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n,
+                                lapack_complex_float* ap,
+                                lapack_complex_float* bp, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_zhpgvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n,
+                                lapack_complex_double* ap,
+                                lapack_complex_double* bp, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int lrwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_chpgvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n,
+                                lapack_complex_float* ap,
+                                lapack_complex_float* bp, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, float* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_zhpgvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n,
+                                lapack_complex_double* ap,
+                                lapack_complex_double* bp, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, double* rwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_chprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                const lapack_complex_float* afp,
+                                const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zhprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* afp,
+                                const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_chpsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* ap,
+                               lapack_int* ipiv, lapack_complex_float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_zhpsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* ap,
+                               lapack_int* ipiv, lapack_complex_double* b,
+                               lapack_int ldb );
+
+lapack_int LAPACKE_chpsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* ap,
+                                lapack_complex_float* afp, lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zhpsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                lapack_complex_double* afp, lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_chptrd_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap, float* d, float* e,
+                                lapack_complex_float* tau );
+lapack_int LAPACKE_zhptrd_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap, double* d, double* e,
+                                lapack_complex_double* tau );
+
+lapack_int LAPACKE_chptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap, lapack_int* ipiv );
+lapack_int LAPACKE_zhptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap, lapack_int* ipiv );
+
+lapack_int LAPACKE_chptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zhptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_chptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                const lapack_int* ipiv, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zhptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_shsein_work( int matrix_order, char job, char eigsrc,
+                                char initv, lapack_logical* select,
+                                lapack_int n, const float* h, lapack_int ldh,
+                                float* wr, const float* wi, float* vl,
+                                lapack_int ldvl, float* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m, float* work,
+                                lapack_int* ifaill, lapack_int* ifailr );
+lapack_int LAPACKE_dhsein_work( int matrix_order, char job, char eigsrc,
+                                char initv, lapack_logical* select,
+                                lapack_int n, const double* h, lapack_int ldh,
+                                double* wr, const double* wi, double* vl,
+                                lapack_int ldvl, double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m, double* work,
+                                lapack_int* ifaill, lapack_int* ifailr );
+lapack_int LAPACKE_chsein_work( int matrix_order, char job, char eigsrc,
+                                char initv, const lapack_logical* select,
+                                lapack_int n, const lapack_complex_float* h,
+                                lapack_int ldh, lapack_complex_float* w,
+                                lapack_complex_float* vl, lapack_int ldvl,
+                                lapack_complex_float* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_float* work, float* rwork,
+                                lapack_int* ifaill, lapack_int* ifailr );
+lapack_int LAPACKE_zhsein_work( int matrix_order, char job, char eigsrc,
+                                char initv, const lapack_logical* select,
+                                lapack_int n, const lapack_complex_double* h,
+                                lapack_int ldh, lapack_complex_double* w,
+                                lapack_complex_double* vl, lapack_int ldvl,
+                                lapack_complex_double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_double* work, double* rwork,
+                                lapack_int* ifaill, lapack_int* ifailr );
+
+lapack_int LAPACKE_shseqr_work( int matrix_order, char job, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                float* h, lapack_int ldh, float* wr, float* wi,
+                                float* z, lapack_int ldz, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dhseqr_work( int matrix_order, char job, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                double* h, lapack_int ldh, double* wr,
+                                double* wi, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_chseqr_work( int matrix_order, char job, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                lapack_complex_float* h, lapack_int ldh,
+                                lapack_complex_float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zhseqr_work( int matrix_order, char job, char compz,
+                                lapack_int n, lapack_int ilo, lapack_int ihi,
+                                lapack_complex_double* h, lapack_int ldh,
+                                lapack_complex_double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_clacgv_work( lapack_int n, lapack_complex_float* x,
+                                lapack_int incx );
+lapack_int LAPACKE_zlacgv_work( lapack_int n, lapack_complex_double* x,
+                                lapack_int incx );
+
+lapack_int LAPACKE_slacpy_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, const float* a, lapack_int lda,
+                                float* b, lapack_int ldb );
+lapack_int LAPACKE_dlacpy_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, const double* a, lapack_int lda,
+                                double* b, lapack_int ldb );
+lapack_int LAPACKE_clacpy_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, const lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zlacpy_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, const lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb );
+
+lapack_int LAPACKE_zlag2c_work( int matrix_order, lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_float* sa, lapack_int ldsa );
+
+lapack_int LAPACKE_slag2d_work( int matrix_order, lapack_int m, lapack_int n,
+                                const float* sa, lapack_int ldsa, double* a,
+                                lapack_int lda );
+
+lapack_int LAPACKE_dlag2s_work( int matrix_order, lapack_int m, lapack_int n,
+                                const double* a, lapack_int lda, float* sa,
+                                lapack_int ldsa );
+
+lapack_int LAPACKE_clag2z_work( int matrix_order, lapack_int m, lapack_int n,
+                                const lapack_complex_float* sa, lapack_int ldsa,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_slagge_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const float* d,
+                                float* a, lapack_int lda, lapack_int* iseed,
+                                float* work );
+lapack_int LAPACKE_dlagge_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const double* d,
+                                double* a, lapack_int lda, lapack_int* iseed,
+                                double* work );
+lapack_int LAPACKE_clagge_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const float* d,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* iseed, lapack_complex_float* work );
+lapack_int LAPACKE_zlagge_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int kl, lapack_int ku, const double* d,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* iseed,
+                                lapack_complex_double* work );
+                                
+lapack_int LAPACKE_claghe_work( int matrix_order, lapack_int n, lapack_int k,
+                                const float* d, lapack_complex_float* a,
+                                lapack_int lda, lapack_int* iseed,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zlaghe_work( int matrix_order, lapack_int n, lapack_int k,
+                                const double* d, lapack_complex_double* a,
+                                lapack_int lda, lapack_int* iseed,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_slagsy_work( int matrix_order, lapack_int n, lapack_int k,
+                                const float* d, float* a, lapack_int lda,
+                                lapack_int* iseed, float* work );
+lapack_int LAPACKE_dlagsy_work( int matrix_order, lapack_int n, lapack_int k,
+                                const double* d, double* a, lapack_int lda,
+                                lapack_int* iseed, double* work );
+lapack_int LAPACKE_clagsy_work( int matrix_order, lapack_int n, lapack_int k,
+                                const float* d, lapack_complex_float* a,
+                                lapack_int lda, lapack_int* iseed,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zlagsy_work( int matrix_order, lapack_int n, lapack_int k,
+                                const double* d, lapack_complex_double* a,
+                                lapack_int lda, lapack_int* iseed,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_slapmr_work( int matrix_order, lapack_logical forwrd,
+                                lapack_int m, lapack_int n, float* x,
+                                lapack_int ldx, lapack_int* k );
+lapack_int LAPACKE_dlapmr_work( int matrix_order, lapack_logical forwrd,
+                                lapack_int m, lapack_int n, double* x,
+                                lapack_int ldx, lapack_int* k );
+lapack_int LAPACKE_clapmr_work( int matrix_order, lapack_logical forwrd,
+                                lapack_int m, lapack_int n,
+                                lapack_complex_float* x, lapack_int ldx,
+                                lapack_int* k );
+lapack_int LAPACKE_zlapmr_work( int matrix_order, lapack_logical forwrd,
+                                lapack_int m, lapack_int n,
+                                lapack_complex_double* x, lapack_int ldx,
+                                lapack_int* k );
+
+lapack_int LAPACKE_slartgp_work( float f, float g, float* cs, float* sn,
+                                 float* r );
+lapack_int LAPACKE_dlartgp_work( double f, double g, double* cs, double* sn,
+                                 double* r );
+
+lapack_int LAPACKE_slartgs_work( float x, float y, float sigma, float* cs,
+                                 float* sn );
+lapack_int LAPACKE_dlartgs_work( double x, double y, double sigma, double* cs,
+                                 double* sn );
+                                
+float LAPACKE_slapy2_work( float x, float y );
+double LAPACKE_dlapy2_work( double x, double y );
+
+float LAPACKE_slapy3_work( float x, float y, float z );
+double LAPACKE_dlapy3_work( double x, double y, double z );
+
+float LAPACKE_slamch_work( char cmach );
+double LAPACKE_dlamch_work( char cmach );
+
+float LAPACKE_slange_work( int matrix_order, char norm, lapack_int m,
+                                lapack_int n, const float* a, lapack_int lda,
+                                float* work );
+double LAPACKE_dlange_work( int matrix_order, char norm, lapack_int m,
+                                lapack_int n, const double* a, lapack_int lda,
+                                double* work );
+float LAPACKE_clange_work( int matrix_order, char norm, lapack_int m,
+                                lapack_int n, const lapack_complex_float* a,
+                                lapack_int lda, float* work );
+double LAPACKE_zlange_work( int matrix_order, char norm, lapack_int m,
+                                lapack_int n, const lapack_complex_double* a,
+                                lapack_int lda, double* work );
+
+float LAPACKE_clanhe_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const lapack_complex_float* a,
+                                lapack_int lda, float* work );
+double LAPACKE_zlanhe_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const lapack_complex_double* a,
+                                lapack_int lda, double* work );
+
+float LAPACKE_slansy_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const float* a, lapack_int lda,
+                                float* work );
+double LAPACKE_dlansy_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const double* a, lapack_int lda,
+                                double* work );
+float LAPACKE_clansy_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const lapack_complex_float* a,
+                                lapack_int lda, float* work );
+double LAPACKE_zlansy_work( int matrix_order, char norm, char uplo,
+                                lapack_int n, const lapack_complex_double* a,
+                                lapack_int lda, double* work );
+
+float LAPACKE_slantr_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int m, lapack_int n, const float* a,
+                                lapack_int lda, float* work );
+double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int m, lapack_int n,
+                                const double* a, lapack_int lda, double* work );
+float LAPACKE_clantr_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int m, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float* work );
+double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double* work );
+
+lapack_int LAPACKE_slarfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, const float* v,
+                                lapack_int ldv, const float* t, lapack_int ldt,
+                                float* c, lapack_int ldc, float* work,
+                                lapack_int ldwork );
+lapack_int LAPACKE_dlarfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, const double* v,
+                                lapack_int ldv, const double* t, lapack_int ldt,
+                                double* c, lapack_int ldc, double* work,
+                                lapack_int ldwork );
+lapack_int LAPACKE_clarfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k,
+                                const lapack_complex_float* v, lapack_int ldv,
+                                const lapack_complex_float* t, lapack_int ldt,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int ldwork );
+lapack_int LAPACKE_zlarfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k,
+                                const lapack_complex_double* v, lapack_int ldv,
+                                const lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work,
+                                lapack_int ldwork );
+
+lapack_int LAPACKE_slarfg_work( lapack_int n, float* alpha, float* x,
+                                lapack_int incx, float* tau );
+lapack_int LAPACKE_dlarfg_work( lapack_int n, double* alpha, double* x,
+                                lapack_int incx, double* tau );
+lapack_int LAPACKE_clarfg_work( lapack_int n, lapack_complex_float* alpha,
+                                lapack_complex_float* x, lapack_int incx,
+                                lapack_complex_float* tau );
+lapack_int LAPACKE_zlarfg_work( lapack_int n, lapack_complex_double* alpha,
+                                lapack_complex_double* x, lapack_int incx,
+                                lapack_complex_double* tau );
+
+lapack_int LAPACKE_slarft_work( int matrix_order, char direct, char storev,
+                                lapack_int n, lapack_int k, const float* v,
+                                lapack_int ldv, const float* tau, float* t,
+                                lapack_int ldt );
+lapack_int LAPACKE_dlarft_work( int matrix_order, char direct, char storev,
+                                lapack_int n, lapack_int k, const double* v,
+                                lapack_int ldv, const double* tau, double* t,
+                                lapack_int ldt );
+lapack_int LAPACKE_clarft_work( int matrix_order, char direct, char storev,
+                                lapack_int n, lapack_int k,
+                                const lapack_complex_float* v, lapack_int ldv,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zlarft_work( int matrix_order, char direct, char storev,
+                                lapack_int n, lapack_int k,
+                                const lapack_complex_double* v, lapack_int ldv,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_slarfx_work( int matrix_order, char side, lapack_int m,
+                                lapack_int n, const float* v, float tau,
+                                float* c, lapack_int ldc, float* work );
+lapack_int LAPACKE_dlarfx_work( int matrix_order, char side, lapack_int m,
+                                lapack_int n, const double* v, double tau,
+                                double* c, lapack_int ldc, double* work );
+lapack_int LAPACKE_clarfx_work( int matrix_order, char side, lapack_int m,
+                                lapack_int n, const lapack_complex_float* v,
+                                lapack_complex_float tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zlarfx_work( int matrix_order, char side, lapack_int m,
+                                lapack_int n, const lapack_complex_double* v,
+                                lapack_complex_double tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_slarnv_work( lapack_int idist, lapack_int* iseed,
+                                lapack_int n, float* x );
+lapack_int LAPACKE_dlarnv_work( lapack_int idist, lapack_int* iseed,
+                                lapack_int n, double* x );
+lapack_int LAPACKE_clarnv_work( lapack_int idist, lapack_int* iseed,
+                                lapack_int n, lapack_complex_float* x );
+lapack_int LAPACKE_zlarnv_work( lapack_int idist, lapack_int* iseed,
+                                lapack_int n, lapack_complex_double* x );
+
+lapack_int LAPACKE_slaset_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, float alpha, float beta, float* a,
+                                lapack_int lda );
+lapack_int LAPACKE_dlaset_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, double alpha, double beta,
+                                double* a, lapack_int lda );
+lapack_int LAPACKE_claset_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, lapack_complex_float alpha,
+                                lapack_complex_float beta,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zlaset_work( int matrix_order, char uplo, lapack_int m,
+                                lapack_int n, lapack_complex_double alpha,
+                                lapack_complex_double beta,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_slasrt_work( char id, lapack_int n, float* d );
+lapack_int LAPACKE_dlasrt_work( char id, lapack_int n, double* d );
+
+lapack_int LAPACKE_slaswp_work( int matrix_order, lapack_int n, float* a,
+                                lapack_int lda, lapack_int k1, lapack_int k2,
+                                const lapack_int* ipiv, lapack_int incx );
+lapack_int LAPACKE_dlaswp_work( int matrix_order, lapack_int n, double* a,
+                                lapack_int lda, lapack_int k1, lapack_int k2,
+                                const lapack_int* ipiv, lapack_int incx );
+lapack_int LAPACKE_claswp_work( int matrix_order, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int k1, lapack_int k2,
+                                const lapack_int* ipiv, lapack_int incx );
+lapack_int LAPACKE_zlaswp_work( int matrix_order, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int k1, lapack_int k2,
+                                const lapack_int* ipiv, lapack_int incx );
+
+lapack_int LAPACKE_slatms_work( int matrix_order, lapack_int m, lapack_int n,
+                                char dist, lapack_int* iseed, char sym,
+                                float* d, lapack_int mode, float cond,
+                                float dmax, lapack_int kl, lapack_int ku,
+                                char pack, float* a, lapack_int lda,
+                                float* work );
+lapack_int LAPACKE_dlatms_work( int matrix_order, lapack_int m, lapack_int n,
+                                char dist, lapack_int* iseed, char sym,
+                                double* d, lapack_int mode, double cond,
+                                double dmax, lapack_int kl, lapack_int ku,
+                                char pack, double* a, lapack_int lda,
+                                double* work );
+lapack_int LAPACKE_clatms_work( int matrix_order, lapack_int m, lapack_int n,
+                                char dist, lapack_int* iseed, char sym,
+                                float* d, lapack_int mode, float cond,
+                                float dmax, lapack_int kl, lapack_int ku,
+                                char pack, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* work );
+lapack_int LAPACKE_zlatms_work( int matrix_order, lapack_int m, lapack_int n,
+                                char dist, lapack_int* iseed, char sym,
+                                double* d, lapack_int mode, double cond,
+                                double dmax, lapack_int kl, lapack_int ku,
+                                char pack, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* work );
+
+lapack_int LAPACKE_slauum_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda );
+lapack_int LAPACKE_dlauum_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda );
+lapack_int LAPACKE_clauum_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zlauum_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_sopgtr_work( int matrix_order, char uplo, lapack_int n,
+                                const float* ap, const float* tau, float* q,
+                                lapack_int ldq, float* work );
+lapack_int LAPACKE_dopgtr_work( int matrix_order, char uplo, lapack_int n,
+                                const double* ap, const double* tau, double* q,
+                                lapack_int ldq, double* work );
+
+lapack_int LAPACKE_sopmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const float* ap, const float* tau, float* c,
+                                lapack_int ldc, float* work );
+lapack_int LAPACKE_dopmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const double* ap, const double* tau, double* c,
+                                lapack_int ldc, double* work );
+
+lapack_int LAPACKE_sorgbr_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int k, float* a,
+                                lapack_int lda, const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorgbr_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int k, double* a,
+                                lapack_int lda, const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorghr_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, float* a, lapack_int lda,
+                                const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorghr_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, double* a, lapack_int lda,
+                                const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorglq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, float* a, lapack_int lda,
+                                const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorglq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, double* a, lapack_int lda,
+                                const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorgql_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, float* a, lapack_int lda,
+                                const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorgql_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, double* a, lapack_int lda,
+                                const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorgqr_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, float* a, lapack_int lda,
+                                const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorgqr_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, double* a, lapack_int lda,
+                                const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorgrq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, float* a, lapack_int lda,
+                                const float* tau, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorgrq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, double* a, lapack_int lda,
+                                const double* tau, double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_sorgtr_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda, const float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dorgtr_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda, const double* tau,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormbr_work( int matrix_order, char vect, char side,
+                                char trans, lapack_int m, lapack_int n,
+                                lapack_int k, const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormbr_work( int matrix_order, char vect, char side,
+                                char trans, lapack_int m, lapack_int n,
+                                lapack_int k, const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormhr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormhr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormql_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormql_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormqr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormqr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormrq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormrq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormrz_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                lapack_int l, const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormrz_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                lapack_int l, const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_sormtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const float* a, lapack_int lda,
+                                const float* tau, float* c, lapack_int ldc,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dormtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const double* a, lapack_int lda,
+                                const double* tau, double* c, lapack_int ldc,
+                                double* work, lapack_int lwork );
+
+lapack_int LAPACKE_spbcon_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const float* ab, lapack_int ldab,
+                                float anorm, float* rcond, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dpbcon_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const double* ab,
+                                lapack_int ldab, double anorm, double* rcond,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cpbcon_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const lapack_complex_float* ab,
+                                lapack_int ldab, float anorm, float* rcond,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zpbcon_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const lapack_complex_double* ab,
+                                lapack_int ldab, double anorm, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_spbequ_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const float* ab, lapack_int ldab,
+                                float* s, float* scond, float* amax );
+lapack_int LAPACKE_dpbequ_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const double* ab,
+                                lapack_int ldab, double* s, double* scond,
+                                double* amax );
+lapack_int LAPACKE_cpbequ_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const lapack_complex_float* ab,
+                                lapack_int ldab, float* s, float* scond,
+                                float* amax );
+lapack_int LAPACKE_zpbequ_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, const lapack_complex_double* ab,
+                                lapack_int ldab, double* s, double* scond,
+                                double* amax );
+
+lapack_int LAPACKE_spbrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs, const float* ab,
+                                lapack_int ldab, const float* afb,
+                                lapack_int ldafb, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dpbrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const double* ab, lapack_int ldab,
+                                const double* afb, lapack_int ldafb,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cpbrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                const lapack_complex_float* afb,
+                                lapack_int ldafb, const lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zpbrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab,
+                                const lapack_complex_double* afb,
+                                lapack_int ldafb,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_spbstf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kb, float* bb, lapack_int ldbb );
+lapack_int LAPACKE_dpbstf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kb, double* bb, lapack_int ldbb );
+lapack_int LAPACKE_cpbstf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kb, lapack_complex_float* bb,
+                                lapack_int ldbb );
+lapack_int LAPACKE_zpbstf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kb, lapack_complex_double* bb,
+                                lapack_int ldbb );
+
+lapack_int LAPACKE_spbsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int kd, lapack_int nrhs, float* ab,
+                               lapack_int ldab, float* b, lapack_int ldb );
+lapack_int LAPACKE_dpbsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int kd, lapack_int nrhs, double* ab,
+                               lapack_int ldab, double* b, lapack_int ldb );
+lapack_int LAPACKE_cpbsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int kd, lapack_int nrhs,
+                               lapack_complex_float* ab, lapack_int ldab,
+                               lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpbsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int kd, lapack_int nrhs,
+                               lapack_complex_double* ab, lapack_int ldab,
+                               lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spbsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int kd, lapack_int nrhs,
+                                float* ab, lapack_int ldab, float* afb,
+                                lapack_int ldafb, char* equed, float* s,
+                                float* b, lapack_int ldb, float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dpbsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int kd, lapack_int nrhs,
+                                double* ab, lapack_int ldab, double* afb,
+                                lapack_int ldafb, char* equed, double* s,
+                                double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, double* work, lapack_int* iwork );
+lapack_int LAPACKE_cpbsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int kd, lapack_int nrhs,
+                                lapack_complex_float* ab, lapack_int ldab,
+                                lapack_complex_float* afb, lapack_int ldafb,
+                                char* equed, float* s, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zpbsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int kd, lapack_int nrhs,
+                                lapack_complex_double* ab, lapack_int ldab,
+                                lapack_complex_double* afb, lapack_int ldafb,
+                                char* equed, double* s,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_spbtrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, float* ab, lapack_int ldab );
+lapack_int LAPACKE_dpbtrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, double* ab, lapack_int ldab );
+lapack_int LAPACKE_cpbtrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_complex_float* ab,
+                                lapack_int ldab );
+lapack_int LAPACKE_zpbtrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_complex_double* ab,
+                                lapack_int ldab );
+
+lapack_int LAPACKE_spbtrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs, const float* ab,
+                                lapack_int ldab, float* b, lapack_int ldb );
+lapack_int LAPACKE_dpbtrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const double* ab, lapack_int ldab, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_cpbtrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpbtrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int kd, lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, lapack_complex_double* b,
+                                lapack_int ldb );
+
+lapack_int LAPACKE_spftrf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, float* a );
+lapack_int LAPACKE_dpftrf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, double* a );
+lapack_int LAPACKE_cpftrf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_complex_float* a );
+lapack_int LAPACKE_zpftrf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_complex_double* a );
+
+lapack_int LAPACKE_spftri_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, float* a );
+lapack_int LAPACKE_dpftri_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, double* a );
+lapack_int LAPACKE_cpftri_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_complex_float* a );
+lapack_int LAPACKE_zpftri_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_complex_double* a );
+
+lapack_int LAPACKE_spftrs_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_int nrhs, const float* a,
+                                float* b, lapack_int ldb );
+lapack_int LAPACKE_dpftrs_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_int nrhs, const double* a,
+                                double* b, lapack_int ldb );
+lapack_int LAPACKE_cpftrs_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* a,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpftrs_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* a,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spocon_work( int matrix_order, char uplo, lapack_int n,
+                                const float* a, lapack_int lda, float anorm,
+                                float* rcond, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dpocon_work( int matrix_order, char uplo, lapack_int n,
+                                const double* a, lapack_int lda, double anorm,
+                                double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cpocon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float anorm, float* rcond,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zpocon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double anorm, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_spoequ_work( int matrix_order, lapack_int n, const float* a,
+                                lapack_int lda, float* s, float* scond,
+                                float* amax );
+lapack_int LAPACKE_dpoequ_work( int matrix_order, lapack_int n, const double* a,
+                                lapack_int lda, double* s, double* scond,
+                                double* amax );
+lapack_int LAPACKE_cpoequ_work( int matrix_order, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float* s, float* scond, float* amax );
+lapack_int LAPACKE_zpoequ_work( int matrix_order, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_spoequb_work( int matrix_order, lapack_int n, const float* a,
+                                 lapack_int lda, float* s, float* scond,
+                                 float* amax );
+lapack_int LAPACKE_dpoequb_work( int matrix_order, lapack_int n,
+                                 const double* a, lapack_int lda, double* s,
+                                 double* scond, double* amax );
+lapack_int LAPACKE_cpoequb_work( int matrix_order, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 float* s, float* scond, float* amax );
+lapack_int LAPACKE_zpoequb_work( int matrix_order, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 double* s, double* scond, double* amax );
+
+lapack_int LAPACKE_sporfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                const float* af, lapack_int ldaf,
+                                const float* b, lapack_int ldb, float* x,
+                                lapack_int ldx, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dporfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, const double* af,
+                                lapack_int ldaf, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cporfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* af,
+                                lapack_int ldaf, const lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zporfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_complex_double* af,
+                                lapack_int ldaf, const lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sporfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs, const float* a,
+                                 lapack_int lda, const float* af,
+                                 lapack_int ldaf, const float* s,
+                                 const float* b, lapack_int ldb, float* x,
+                                 lapack_int ldx, float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dporfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs, const double* a,
+                                 lapack_int lda, const double* af,
+                                 lapack_int ldaf, const double* s,
+                                 const double* b, lapack_int ldb, double* x,
+                                 lapack_int ldx, double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cporfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* af,
+                                 lapack_int ldaf, const float* s,
+                                 const lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zporfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* af,
+                                 lapack_int ldaf, const double* s,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_sposv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, float* a, lapack_int lda,
+                               float* b, lapack_int ldb );
+lapack_int LAPACKE_dposv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, double* a, lapack_int lda,
+                               double* b, lapack_int ldb );
+lapack_int LAPACKE_cposv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* a,
+                               lapack_int lda, lapack_complex_float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_zposv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* a,
+                               lapack_int lda, lapack_complex_double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_dsposv_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, double* a, lapack_int lda,
+                                double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* work, float* swork,
+                                lapack_int* iter );
+lapack_int LAPACKE_zcposv_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, lapack_complex_double* work,
+                                lapack_complex_float* swork, double* rwork,
+                                lapack_int* iter );
+
+lapack_int LAPACKE_sposvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, float* a,
+                                lapack_int lda, float* af, lapack_int ldaf,
+                                char* equed, float* s, float* b, lapack_int ldb,
+                                float* x, lapack_int ldx, float* rcond,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dposvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, double* a,
+                                lapack_int lda, double* af, lapack_int ldaf,
+                                char* equed, double* s, double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cposvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* af, lapack_int ldaf,
+                                char* equed, float* s, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zposvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* af, lapack_int ldaf,
+                                char* equed, double* s,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sposvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs, float* a,
+                                 lapack_int lda, float* af, lapack_int ldaf,
+                                 char* equed, float* s, float* b,
+                                 lapack_int ldb, float* x, lapack_int ldx,
+                                 float* rcond, float* rpvgrw, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dposvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs, double* a,
+                                 lapack_int lda, double* af, lapack_int ldaf,
+                                 char* equed, double* s, double* b,
+                                 lapack_int ldb, double* x, lapack_int ldx,
+                                 double* rcond, double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_cposvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* af, lapack_int ldaf,
+                                 char* equed, float* s, lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* x,
+                                 lapack_int ldx, float* rcond, float* rpvgrw,
+                                 float* berr, lapack_int n_err_bnds,
+                                 float* err_bnds_norm, float* err_bnds_comp,
+                                 lapack_int nparams, float* params,
+                                 lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zposvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* af, lapack_int ldaf,
+                                 char* equed, double* s,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_spotrf_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda );
+lapack_int LAPACKE_dpotrf_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda );
+lapack_int LAPACKE_cpotrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zpotrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_spotri_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda );
+lapack_int LAPACKE_dpotri_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda );
+lapack_int LAPACKE_cpotri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zpotri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_spotrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                float* b, lapack_int ldb );
+lapack_int LAPACKE_dpotrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, double* b, lapack_int ldb );
+lapack_int LAPACKE_cpotrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zpotrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* b,
+                                lapack_int ldb );
+
+lapack_int LAPACKE_sppcon_work( int matrix_order, char uplo, lapack_int n,
+                                const float* ap, float anorm, float* rcond,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dppcon_work( int matrix_order, char uplo, lapack_int n,
+                                const double* ap, double anorm, double* rcond,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cppcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap, float anorm,
+                                float* rcond, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zppcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap, double anorm,
+                                double* rcond, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_sppequ_work( int matrix_order, char uplo, lapack_int n,
+                                const float* ap, float* s, float* scond,
+                                float* amax );
+lapack_int LAPACKE_dppequ_work( int matrix_order, char uplo, lapack_int n,
+                                const double* ap, double* s, double* scond,
+                                double* amax );
+lapack_int LAPACKE_cppequ_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap, float* s,
+                                float* scond, float* amax );
+lapack_int LAPACKE_zppequ_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap, double* s,
+                                double* scond, double* amax );
+
+lapack_int LAPACKE_spprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* ap,
+                                const float* afp, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dpprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* ap,
+                                const double* afp, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cpprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                const lapack_complex_float* afp,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zpprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* afp,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sppsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, float* ap, float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_dppsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, double* ap, double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_cppsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* ap,
+                               lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zppsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* ap,
+                               lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sppsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, float* ap,
+                                float* afp, char* equed, float* s, float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dppsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, double* ap,
+                                double* afp, char* equed, double* s, double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cppsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_float* ap,
+                                lapack_complex_float* afp, char* equed,
+                                float* s, lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_zppsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                lapack_complex_double* ap,
+                                lapack_complex_double* afp, char* equed,
+                                double* s, lapack_complex_double* b,
+                                lapack_int ldb, lapack_complex_double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_spptrf_work( int matrix_order, char uplo, lapack_int n,
+                                float* ap );
+lapack_int LAPACKE_dpptrf_work( int matrix_order, char uplo, lapack_int n,
+                                double* ap );
+lapack_int LAPACKE_cpptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap );
+lapack_int LAPACKE_zpptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap );
+
+lapack_int LAPACKE_spptri_work( int matrix_order, char uplo, lapack_int n,
+                                float* ap );
+lapack_int LAPACKE_dpptri_work( int matrix_order, char uplo, lapack_int n,
+                                double* ap );
+lapack_int LAPACKE_cpptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap );
+lapack_int LAPACKE_zpptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap );
+
+lapack_int LAPACKE_spptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* ap, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dpptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* ap, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_cpptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_spstrf_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* piv,
+                                lapack_int* rank, float tol, float* work );
+lapack_int LAPACKE_dpstrf_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* piv,
+                                lapack_int* rank, double tol, double* work );
+lapack_int LAPACKE_cpstrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* piv, lapack_int* rank, float tol,
+                                float* work );
+lapack_int LAPACKE_zpstrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* piv, lapack_int* rank, double tol,
+                                double* work );
+
+lapack_int LAPACKE_sptcon_work( lapack_int n, const float* d, const float* e,
+                                float anorm, float* rcond, float* work );
+lapack_int LAPACKE_dptcon_work( lapack_int n, const double* d, const double* e,
+                                double anorm, double* rcond, double* work );
+lapack_int LAPACKE_cptcon_work( lapack_int n, const float* d,
+                                const lapack_complex_float* e, float anorm,
+                                float* rcond, float* work );
+lapack_int LAPACKE_zptcon_work( lapack_int n, const double* d,
+                                const lapack_complex_double* e, double anorm,
+                                double* rcond, double* work );
+
+lapack_int LAPACKE_spteqr_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, float* z, lapack_int ldz,
+                                float* work );
+lapack_int LAPACKE_dpteqr_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, double* z, lapack_int ldz,
+                                double* work );
+lapack_int LAPACKE_cpteqr_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, lapack_complex_float* z,
+                                lapack_int ldz, float* work );
+lapack_int LAPACKE_zpteqr_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, lapack_complex_double* z,
+                                lapack_int ldz, double* work );
+
+lapack_int LAPACKE_sptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                const float* d, const float* e, const float* df,
+                                const float* ef, const float* b, lapack_int ldb,
+                                float* x, lapack_int ldx, float* ferr,
+                                float* berr, float* work );
+lapack_int LAPACKE_dptrfs_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                const double* d, const double* e,
+                                const double* df, const double* ef,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                double* work );
+lapack_int LAPACKE_cptrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* d,
+                                const lapack_complex_float* e, const float* df,
+                                const lapack_complex_float* ef,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zptrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* d,
+                                const lapack_complex_double* e,
+                                const double* df,
+                                const lapack_complex_double* ef,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               float* d, float* e, float* b, lapack_int ldb );
+lapack_int LAPACKE_dptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               double* d, double* e, double* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_cptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               float* d, lapack_complex_float* e,
+                               lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zptsv_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                               double* d, lapack_complex_double* e,
+                               lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sptsvx_work( int matrix_order, char fact, lapack_int n,
+                                lapack_int nrhs, const float* d, const float* e,
+                                float* df, float* ef, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work );
+lapack_int LAPACKE_dptsvx_work( int matrix_order, char fact, lapack_int n,
+                                lapack_int nrhs, const double* d,
+                                const double* e, double* df, double* ef,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* rcond, double* ferr,
+                                double* berr, double* work );
+lapack_int LAPACKE_cptsvx_work( int matrix_order, char fact, lapack_int n,
+                                lapack_int nrhs, const float* d,
+                                const lapack_complex_float* e, float* df,
+                                lapack_complex_float* ef,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zptsvx_work( int matrix_order, char fact, lapack_int n,
+                                lapack_int nrhs, const double* d,
+                                const lapack_complex_double* e, double* df,
+                                lapack_complex_double* ef,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_spttrf_work( lapack_int n, float* d, float* e );
+lapack_int LAPACKE_dpttrf_work( lapack_int n, double* d, double* e );
+lapack_int LAPACKE_cpttrf_work( lapack_int n, float* d,
+                                lapack_complex_float* e );
+lapack_int LAPACKE_zpttrf_work( lapack_int n, double* d,
+                                lapack_complex_double* e );
+
+lapack_int LAPACKE_spttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                const float* d, const float* e, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dpttrs_work( int matrix_order, lapack_int n, lapack_int nrhs,
+                                const double* d, const double* e, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_cpttrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* d,
+                                const lapack_complex_float* e,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zpttrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* d,
+                                const lapack_complex_double* e,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_ssbev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int kd, float* ab,
+                               lapack_int ldab, float* w, float* z,
+                               lapack_int ldz, float* work );
+lapack_int LAPACKE_dsbev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int kd, double* ab,
+                               lapack_int ldab, double* w, double* z,
+                               lapack_int ldz, double* work );
+
+lapack_int LAPACKE_ssbevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int kd, float* ab,
+                                lapack_int ldab, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dsbevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int kd, double* ab,
+                                lapack_int ldab, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_ssbevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int kd,
+                                float* ab, lapack_int ldab, float* q,
+                                lapack_int ldq, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_dsbevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int kd,
+                                double* ab, lapack_int ldab, double* q,
+                                lapack_int ldq, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int* iwork,
+                                lapack_int* ifail );
+
+lapack_int LAPACKE_ssbgst_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                float* ab, lapack_int ldab, const float* bb,
+                                lapack_int ldbb, float* x, lapack_int ldx,
+                                float* work );
+lapack_int LAPACKE_dsbgst_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                double* ab, lapack_int ldab, const double* bb,
+                                lapack_int ldbb, double* x, lapack_int ldx,
+                                double* work );
+
+lapack_int LAPACKE_ssbgv_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int ka, lapack_int kb,
+                               float* ab, lapack_int ldab, float* bb,
+                               lapack_int ldbb, float* w, float* z,
+                               lapack_int ldz, float* work );
+lapack_int LAPACKE_dsbgv_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, lapack_int ka, lapack_int kb,
+                               double* ab, lapack_int ldab, double* bb,
+                               lapack_int ldbb, double* w, double* z,
+                               lapack_int ldz, double* work );
+
+lapack_int LAPACKE_ssbgvd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                float* ab, lapack_int ldab, float* bb,
+                                lapack_int ldbb, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dsbgvd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, lapack_int ka, lapack_int kb,
+                                double* ab, lapack_int ldab, double* bb,
+                                lapack_int ldbb, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_ssbgvx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int ka,
+                                lapack_int kb, float* ab, lapack_int ldab,
+                                float* bb, lapack_int ldbb, float* q,
+                                lapack_int ldq, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_dsbgvx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, lapack_int ka,
+                                lapack_int kb, double* ab, lapack_int ldab,
+                                double* bb, lapack_int ldbb, double* q,
+                                lapack_int ldq, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int* iwork,
+                                lapack_int* ifail );
+
+lapack_int LAPACKE_ssbtrd_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int kd, float* ab,
+                                lapack_int ldab, float* d, float* e, float* q,
+                                lapack_int ldq, float* work );
+lapack_int LAPACKE_dsbtrd_work( int matrix_order, char vect, char uplo,
+                                lapack_int n, lapack_int kd, double* ab,
+                                lapack_int ldab, double* d, double* e,
+                                double* q, lapack_int ldq, double* work );
+
+lapack_int LAPACKE_ssfrk_work( int matrix_order, char transr, char uplo,
+                               char trans, lapack_int n, lapack_int k,
+                               float alpha, const float* a, lapack_int lda,
+                               float beta, float* c );
+lapack_int LAPACKE_dsfrk_work( int matrix_order, char transr, char uplo,
+                               char trans, lapack_int n, lapack_int k,
+                               double alpha, const double* a, lapack_int lda,
+                               double beta, double* c );
+
+lapack_int LAPACKE_sspcon_work( int matrix_order, char uplo, lapack_int n,
+                                const float* ap, const lapack_int* ipiv,
+                                float anorm, float* rcond, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dspcon_work( int matrix_order, char uplo, lapack_int n,
+                                const double* ap, const lapack_int* ipiv,
+                                double anorm, double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_cspcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work );
+lapack_int LAPACKE_zspcon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, lapack_complex_double* work );
+
+lapack_int LAPACKE_sspev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, float* ap, float* w, float* z,
+                               lapack_int ldz, float* work );
+lapack_int LAPACKE_dspev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, double* ap, double* w, double* z,
+                               lapack_int ldz, double* work );
+
+lapack_int LAPACKE_sspevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, float* ap, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dspevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, double* ap, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sspevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, float* ap, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, lapack_int* m, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_dspevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, double* ap, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, lapack_int* m, double* w,
+                                double* z, lapack_int ldz, double* work,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_sspgst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, float* ap, const float* bp );
+lapack_int LAPACKE_dspgst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, double* ap, const double* bp );
+
+lapack_int LAPACKE_sspgv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n, float* ap, float* bp,
+                               float* w, float* z, lapack_int ldz,
+                               float* work );
+lapack_int LAPACKE_dspgv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n, double* ap, double* bp,
+                               double* w, double* z, lapack_int ldz,
+                               double* work );
+
+lapack_int LAPACKE_sspgvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n, float* ap, float* bp,
+                                float* w, float* z, lapack_int ldz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_dspgvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n, double* ap, double* bp,
+                                double* w, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sspgvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n, float* ap,
+                                float* bp, float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, float* z, lapack_int ldz, float* work,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_dspgvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n, double* ap,
+                                double* bp, double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, double* z, lapack_int ldz,
+                                double* work, lapack_int* iwork,
+                                lapack_int* ifail );
+
+lapack_int LAPACKE_ssprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* ap,
+                                const float* afp, const lapack_int* ipiv,
+                                const float* b, lapack_int ldb, float* x,
+                                lapack_int ldx, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dsprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* ap,
+                                const double* afp, const lapack_int* ipiv,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_csprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                const lapack_complex_float* afp,
+                                const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zsprfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* afp,
+                                const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_sspsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, float* ap, lapack_int* ipiv,
+                               float* b, lapack_int ldb );
+lapack_int LAPACKE_dspsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, double* ap, lapack_int* ipiv,
+                               double* b, lapack_int ldb );
+lapack_int LAPACKE_cspsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* ap,
+                               lapack_int* ipiv, lapack_complex_float* b,
+                               lapack_int ldb );
+lapack_int LAPACKE_zspsv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* ap,
+                               lapack_int* ipiv, lapack_complex_double* b,
+                               lapack_int ldb );
+
+lapack_int LAPACKE_sspsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, const float* ap,
+                                float* afp, lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dspsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, const double* ap,
+                                double* afp, lapack_int* ipiv, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_cspsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* ap,
+                                lapack_complex_float* afp, lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zspsvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                lapack_complex_double* afp, lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_ssptrd_work( int matrix_order, char uplo, lapack_int n,
+                                float* ap, float* d, float* e, float* tau );
+lapack_int LAPACKE_dsptrd_work( int matrix_order, char uplo, lapack_int n,
+                                double* ap, double* d, double* e, double* tau );
+
+lapack_int LAPACKE_ssptrf_work( int matrix_order, char uplo, lapack_int n,
+                                float* ap, lapack_int* ipiv );
+lapack_int LAPACKE_dsptrf_work( int matrix_order, char uplo, lapack_int n,
+                                double* ap, lapack_int* ipiv );
+lapack_int LAPACKE_csptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap, lapack_int* ipiv );
+lapack_int LAPACKE_zsptrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap, lapack_int* ipiv );
+
+lapack_int LAPACKE_ssptri_work( int matrix_order, char uplo, lapack_int n,
+                                float* ap, const lapack_int* ipiv,
+                                float* work );
+lapack_int LAPACKE_dsptri_work( int matrix_order, char uplo, lapack_int n,
+                                double* ap, const lapack_int* ipiv,
+                                double* work );
+lapack_int LAPACKE_csptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zsptri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_ssptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* ap,
+                                const lapack_int* ipiv, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dsptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* ap,
+                                const lapack_int* ipiv, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_csptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* ap,
+                                const lapack_int* ipiv, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_zsptrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_sstebz_work( char range, char order, lapack_int n, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, const float* d, const float* e,
+                                lapack_int* m, lapack_int* nsplit, float* w,
+                                lapack_int* iblock, lapack_int* isplit,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dstebz_work( char range, char order, lapack_int n, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, const double* d, const double* e,
+                                lapack_int* m, lapack_int* nsplit, double* w,
+                                lapack_int* iblock, lapack_int* isplit,
+                                double* work, lapack_int* iwork );
+
+lapack_int LAPACKE_sstedc_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, float* z, lapack_int ldz,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dstedc_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_cstedc_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, lapack_complex_float* z,
+                                lapack_int ldz, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zstedc_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, lapack_complex_double* z,
+                                lapack_int ldz, lapack_complex_double* work,
+                                lapack_int lwork, double* rwork,
+                                lapack_int lrwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_sstegr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, lapack_int* m, float* w, float* z,
+                                lapack_int ldz, lapack_int* isuppz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_dstegr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, lapack_int* m, double* w,
+                                double* z, lapack_int ldz, lapack_int* isuppz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_cstegr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, lapack_int* m, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_int* isuppz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zstegr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, lapack_int* m, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_int* isuppz, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_sstein_work( int matrix_order, lapack_int n, const float* d,
+                                const float* e, lapack_int m, const float* w,
+                                const lapack_int* iblock,
+                                const lapack_int* isplit, float* z,
+                                lapack_int ldz, float* work, lapack_int* iwork,
+                                lapack_int* ifailv );
+lapack_int LAPACKE_dstein_work( int matrix_order, lapack_int n, const double* d,
+                                const double* e, lapack_int m, const double* w,
+                                const lapack_int* iblock,
+                                const lapack_int* isplit, double* z,
+                                lapack_int ldz, double* work, lapack_int* iwork,
+                                lapack_int* ifailv );
+lapack_int LAPACKE_cstein_work( int matrix_order, lapack_int n, const float* d,
+                                const float* e, lapack_int m, const float* w,
+                                const lapack_int* iblock,
+                                const lapack_int* isplit,
+                                lapack_complex_float* z, lapack_int ldz,
+                                float* work, lapack_int* iwork,
+                                lapack_int* ifailv );
+lapack_int LAPACKE_zstein_work( int matrix_order, lapack_int n, const double* d,
+                                const double* e, lapack_int m, const double* w,
+                                const lapack_int* iblock,
+                                const lapack_int* isplit,
+                                lapack_complex_double* z, lapack_int ldz,
+                                double* work, lapack_int* iwork,
+                                lapack_int* ifailv );
+
+lapack_int LAPACKE_sstemr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                lapack_int* m, float* w, float* z,
+                                lapack_int ldz, lapack_int nzc,
+                                lapack_int* isuppz, lapack_logical* tryrac,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dstemr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                lapack_int* m, double* w, double* z,
+                                lapack_int ldz, lapack_int nzc,
+                                lapack_int* isuppz, lapack_logical* tryrac,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_cstemr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                lapack_int* m, float* w,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_int nzc, lapack_int* isuppz,
+                                lapack_logical* tryrac, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_zstemr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                lapack_int* m, double* w,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_int nzc, lapack_int* isuppz,
+                                lapack_logical* tryrac, double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_ssteqr_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, float* z, lapack_int ldz,
+                                float* work );
+lapack_int LAPACKE_dsteqr_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, double* z, lapack_int ldz,
+                                double* work );
+lapack_int LAPACKE_csteqr_work( int matrix_order, char compz, lapack_int n,
+                                float* d, float* e, lapack_complex_float* z,
+                                lapack_int ldz, float* work );
+lapack_int LAPACKE_zsteqr_work( int matrix_order, char compz, lapack_int n,
+                                double* d, double* e, lapack_complex_double* z,
+                                lapack_int ldz, double* work );
+
+lapack_int LAPACKE_ssterf_work( lapack_int n, float* d, float* e );
+lapack_int LAPACKE_dsterf_work( lapack_int n, double* d, double* e );
+
+lapack_int LAPACKE_sstev_work( int matrix_order, char jobz, lapack_int n,
+                               float* d, float* e, float* z, lapack_int ldz,
+                               float* work );
+lapack_int LAPACKE_dstev_work( int matrix_order, char jobz, lapack_int n,
+                               double* d, double* e, double* z, lapack_int ldz,
+                               double* work );
+
+lapack_int LAPACKE_sstevd_work( int matrix_order, char jobz, lapack_int n,
+                                float* d, float* e, float* z, lapack_int ldz,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dstevd_work( int matrix_order, char jobz, lapack_int n,
+                                double* d, double* e, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sstevr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, lapack_int* m, float* w, float* z,
+                                lapack_int ldz, lapack_int* isuppz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_dstevr_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, lapack_int* m, double* w,
+                                double* z, lapack_int ldz, lapack_int* isuppz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_sstevx_work( int matrix_order, char jobz, char range,
+                                lapack_int n, float* d, float* e, float vl,
+                                float vu, lapack_int il, lapack_int iu,
+                                float abstol, lapack_int* m, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_dstevx_work( int matrix_order, char jobz, char range,
+                                lapack_int n, double* d, double* e, double vl,
+                                double vu, lapack_int il, lapack_int iu,
+                                double abstol, lapack_int* m, double* w,
+                                double* z, lapack_int ldz, double* work,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_ssycon_work( int matrix_order, char uplo, lapack_int n,
+                                const float* a, lapack_int lda,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dsycon_work( int matrix_order, char uplo, lapack_int n,
+                                const double* a, lapack_int lda,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_csycon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_int* ipiv, float anorm,
+                                float* rcond, lapack_complex_float* work );
+lapack_int LAPACKE_zsycon_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_int* ipiv, double anorm,
+                                double* rcond, lapack_complex_double* work );
+
+lapack_int LAPACKE_ssyequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const float* a, lapack_int lda, float* s,
+                                 float* scond, float* amax, float* work );
+lapack_int LAPACKE_dsyequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const double* a, lapack_int lda, double* s,
+                                 double* scond, double* amax, double* work );
+lapack_int LAPACKE_csyequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 float* s, float* scond, float* amax,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_zsyequb_work( int matrix_order, char uplo, lapack_int n,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 double* s, double* scond, double* amax,
+                                 lapack_complex_double* work );
+
+lapack_int LAPACKE_ssyev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, float* a, lapack_int lda, float* w,
+                               float* work, lapack_int lwork );
+lapack_int LAPACKE_dsyev_work( int matrix_order, char jobz, char uplo,
+                               lapack_int n, double* a, lapack_int lda,
+                               double* w, double* work, lapack_int lwork );
+
+lapack_int LAPACKE_ssyevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, float* a, lapack_int lda,
+                                float* w, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dsyevd_work( int matrix_order, char jobz, char uplo,
+                                lapack_int n, double* a, lapack_int lda,
+                                double* w, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_ssyevr_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, float* a,
+                                lapack_int lda, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w, float* z,
+                                lapack_int ldz, lapack_int* isuppz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_dsyevr_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, double* a,
+                                lapack_int lda, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w, double* z,
+                                lapack_int ldz, lapack_int* isuppz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_ssyevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, float* a,
+                                lapack_int lda, float vl, float vu,
+                                lapack_int il, lapack_int iu, float abstol,
+                                lapack_int* m, float* w, float* z,
+                                lapack_int ldz, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int* ifail );
+lapack_int LAPACKE_dsyevx_work( int matrix_order, char jobz, char range,
+                                char uplo, lapack_int n, double* a,
+                                lapack_int lda, double vl, double vu,
+                                lapack_int il, lapack_int iu, double abstol,
+                                lapack_int* m, double* w, double* z,
+                                lapack_int ldz, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_ssygst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, float* a, lapack_int lda,
+                                const float* b, lapack_int ldb );
+lapack_int LAPACKE_dsygst_work( int matrix_order, lapack_int itype, char uplo,
+                                lapack_int n, double* a, lapack_int lda,
+                                const double* b, lapack_int ldb );
+
+lapack_int LAPACKE_ssygv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n, float* a,
+                               lapack_int lda, float* b, lapack_int ldb,
+                               float* w, float* work, lapack_int lwork );
+lapack_int LAPACKE_dsygv_work( int matrix_order, lapack_int itype, char jobz,
+                               char uplo, lapack_int n, double* a,
+                               lapack_int lda, double* b, lapack_int ldb,
+                               double* w, double* work, lapack_int lwork );
+
+lapack_int LAPACKE_ssygvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n, float* a,
+                                lapack_int lda, float* b, lapack_int ldb,
+                                float* w, float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dsygvd_work( int matrix_order, lapack_int itype, char jobz,
+                                char uplo, lapack_int n, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                double* w, double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+
+lapack_int LAPACKE_ssygvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n, float* a,
+                                lapack_int lda, float* b, lapack_int ldb,
+                                float vl, float vu, lapack_int il,
+                                lapack_int iu, float abstol, lapack_int* m,
+                                float* w, float* z, lapack_int ldz, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int* ifail );
+lapack_int LAPACKE_dsygvx_work( int matrix_order, lapack_int itype, char jobz,
+                                char range, char uplo, lapack_int n, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                double vl, double vu, lapack_int il,
+                                lapack_int iu, double abstol, lapack_int* m,
+                                double* w, double* z, lapack_int ldz,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int* ifail );
+
+lapack_int LAPACKE_ssyrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                const float* af, lapack_int ldaf,
+                                const lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dsyrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, const double* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const double* b, lapack_int ldb, double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                double* work, lapack_int* iwork );
+lapack_int LAPACKE_csyrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_zsyrfs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_complex_double* af,
+                                lapack_int ldaf, const lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_ssyrfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs, const float* a,
+                                 lapack_int lda, const float* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const float* s, const float* b, lapack_int ldb,
+                                 float* x, lapack_int ldx, float* rcond,
+                                 float* berr, lapack_int n_err_bnds,
+                                 float* err_bnds_norm, float* err_bnds_comp,
+                                 lapack_int nparams, float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dsyrfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs, const double* a,
+                                 lapack_int lda, const double* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const double* s, const double* b,
+                                 lapack_int ldb, double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, double* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_csyrfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_float* a, lapack_int lda,
+                                 const lapack_complex_float* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const float* s, const lapack_complex_float* b,
+                                 lapack_int ldb, lapack_complex_float* x,
+                                 lapack_int ldx, float* rcond, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zsyrfsx_work( int matrix_order, char uplo, char equed,
+                                 lapack_int n, lapack_int nrhs,
+                                 const lapack_complex_double* a, lapack_int lda,
+                                 const lapack_complex_double* af,
+                                 lapack_int ldaf, const lapack_int* ipiv,
+                                 const double* s,
+                                 const lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_ssysv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, float* a, lapack_int lda,
+                               lapack_int* ipiv, float* b, lapack_int ldb,
+                               float* work, lapack_int lwork );
+lapack_int LAPACKE_dsysv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, double* a, lapack_int lda,
+                               lapack_int* ipiv, double* b, lapack_int ldb,
+                               double* work, lapack_int lwork );
+lapack_int LAPACKE_csysv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_float* a,
+                               lapack_int lda, lapack_int* ipiv,
+                               lapack_complex_float* b, lapack_int ldb,
+                               lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zsysv_work( int matrix_order, char uplo, lapack_int n,
+                               lapack_int nrhs, lapack_complex_double* a,
+                               lapack_int lda, lapack_int* ipiv,
+                               lapack_complex_double* b, lapack_int ldb,
+                               lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_ssysvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, const float* a,
+                                lapack_int lda, float* af, lapack_int ldaf,
+                                lapack_int* ipiv, const float* b,
+                                lapack_int ldb, float* x, lapack_int ldx,
+                                float* rcond, float* ferr, float* berr,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dsysvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs, const double* a,
+                                lapack_int lda, double* af, lapack_int ldaf,
+                                lapack_int* ipiv, const double* b,
+                                lapack_int ldb, double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_csysvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* af, lapack_int ldaf,
+                                lapack_int* ipiv, const lapack_complex_float* b,
+                                lapack_int ldb, lapack_complex_float* x,
+                                lapack_int ldx, float* rcond, float* ferr,
+                                float* berr, lapack_complex_float* work,
+                                lapack_int lwork, float* rwork );
+lapack_int LAPACKE_zsysvx_work( int matrix_order, char fact, char uplo,
+                                lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* af, lapack_int ldaf,
+                                lapack_int* ipiv,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* x, lapack_int ldx,
+                                double* rcond, double* ferr, double* berr,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork );
+
+lapack_int LAPACKE_ssysvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs, float* a,
+                                 lapack_int lda, float* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, float* s,
+                                 float* b, lapack_int ldb, float* x,
+                                 lapack_int ldx, float* rcond, float* rpvgrw,
+                                 float* berr, lapack_int n_err_bnds,
+                                 float* err_bnds_norm, float* err_bnds_comp,
+                                 lapack_int nparams, float* params, float* work,
+                                 lapack_int* iwork );
+lapack_int LAPACKE_dsysvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs, double* a,
+                                 lapack_int lda, double* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, double* s,
+                                 double* b, lapack_int ldb, double* x,
+                                 lapack_int ldx, double* rcond, double* rpvgrw,
+                                 double* berr, lapack_int n_err_bnds,
+                                 double* err_bnds_norm, double* err_bnds_comp,
+                                 lapack_int nparams, double* params,
+                                 double* work, lapack_int* iwork );
+lapack_int LAPACKE_csysvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, float* s,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* x, lapack_int ldx,
+                                 float* rcond, float* rpvgrw, float* berr,
+                                 lapack_int n_err_bnds, float* err_bnds_norm,
+                                 float* err_bnds_comp, lapack_int nparams,
+                                 float* params, lapack_complex_float* work,
+                                 float* rwork );
+lapack_int LAPACKE_zsysvxx_work( int matrix_order, char fact, char uplo,
+                                 lapack_int n, lapack_int nrhs,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* af, lapack_int ldaf,
+                                 lapack_int* ipiv, char* equed, double* s,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* x, lapack_int ldx,
+                                 double* rcond, double* rpvgrw, double* berr,
+                                 lapack_int n_err_bnds, double* err_bnds_norm,
+                                 double* err_bnds_comp, lapack_int nparams,
+                                 double* params, lapack_complex_double* work,
+                                 double* rwork );
+
+lapack_int LAPACKE_ssytrd_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda, float* d, float* e,
+                                float* tau, float* work, lapack_int lwork );
+lapack_int LAPACKE_dsytrd_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda, double* d, double* e,
+                                double* tau, double* work, lapack_int lwork );
+
+lapack_int LAPACKE_ssytrf_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda, lapack_int* ipiv,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dsytrf_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda, lapack_int* ipiv,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_csytrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_int* ipiv, lapack_complex_float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_zsytrf_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_int* ipiv, lapack_complex_double* work,
+                                lapack_int lwork );
+
+lapack_int LAPACKE_ssytri_work( int matrix_order, char uplo, lapack_int n,
+                                float* a, lapack_int lda,
+                                const lapack_int* ipiv, float* work );
+lapack_int LAPACKE_dsytri_work( int matrix_order, char uplo, lapack_int n,
+                                double* a, lapack_int lda,
+                                const lapack_int* ipiv, double* work );
+lapack_int LAPACKE_csytri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zsytri_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                const lapack_int* ipiv,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_ssytrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const float* a, lapack_int lda,
+                                const lapack_int* ipiv, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dsytrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                double* b, lapack_int ldb );
+lapack_int LAPACKE_csytrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_zsytrs_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_int nrhs, const lapack_complex_double* a,
+                                lapack_int lda, const lapack_int* ipiv,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stbcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, lapack_int kd,
+                                const float* ab, lapack_int ldab, float* rcond,
+                                float* work, lapack_int* iwork );
+lapack_int LAPACKE_dtbcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, lapack_int kd,
+                                const double* ab, lapack_int ldab,
+                                double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctbcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, lapack_int kd,
+                                const lapack_complex_float* ab, lapack_int ldab,
+                                float* rcond, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_ztbcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, lapack_int kd,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_stbrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const float* ab,
+                                lapack_int ldab, const float* b, lapack_int ldb,
+                                const float* x, lapack_int ldx, float* ferr,
+                                float* berr, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dtbrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const double* ab,
+                                lapack_int ldab, const double* b,
+                                lapack_int ldb, const double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctbrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const lapack_complex_float* ab,
+                                lapack_int ldab, const lapack_complex_float* b,
+                                lapack_int ldb, const lapack_complex_float* x,
+                                lapack_int ldx, float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztbrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, const lapack_complex_double* b,
+                                lapack_int ldb, const lapack_complex_double* x,
+                                lapack_int ldx, double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_stbtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const float* ab,
+                                lapack_int ldab, float* b, lapack_int ldb );
+lapack_int LAPACKE_dtbtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const double* ab,
+                                lapack_int ldab, double* b, lapack_int ldb );
+lapack_int LAPACKE_ctbtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs, const lapack_complex_float* ab,
+                                lapack_int ldab, lapack_complex_float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_ztbtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int kd,
+                                lapack_int nrhs,
+                                const lapack_complex_double* ab,
+                                lapack_int ldab, lapack_complex_double* b,
+                                lapack_int ldb );
+
+lapack_int LAPACKE_stfsm_work( int matrix_order, char transr, char side,
+                               char uplo, char trans, char diag, lapack_int m,
+                               lapack_int n, float alpha, const float* a,
+                               float* b, lapack_int ldb );
+lapack_int LAPACKE_dtfsm_work( int matrix_order, char transr, char side,
+                               char uplo, char trans, char diag, lapack_int m,
+                               lapack_int n, double alpha, const double* a,
+                               double* b, lapack_int ldb );
+lapack_int LAPACKE_ctfsm_work( int matrix_order, char transr, char side,
+                               char uplo, char trans, char diag, lapack_int m,
+                               lapack_int n, lapack_complex_float alpha,
+                               const lapack_complex_float* a,
+                               lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztfsm_work( int matrix_order, char transr, char side,
+                               char uplo, char trans, char diag, lapack_int m,
+                               lapack_int n, lapack_complex_double alpha,
+                               const lapack_complex_double* a,
+                               lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stftri_work( int matrix_order, char transr, char uplo,
+                                char diag, lapack_int n, float* a );
+lapack_int LAPACKE_dtftri_work( int matrix_order, char transr, char uplo,
+                                char diag, lapack_int n, double* a );
+lapack_int LAPACKE_ctftri_work( int matrix_order, char transr, char uplo,
+                                char diag, lapack_int n,
+                                lapack_complex_float* a );
+lapack_int LAPACKE_ztftri_work( int matrix_order, char transr, char uplo,
+                                char diag, lapack_int n,
+                                lapack_complex_double* a );
+
+lapack_int LAPACKE_stfttp_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const float* arf, float* ap );
+lapack_int LAPACKE_dtfttp_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const double* arf, double* ap );
+lapack_int LAPACKE_ctfttp_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_float* arf,
+                                lapack_complex_float* ap );
+lapack_int LAPACKE_ztfttp_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_double* arf,
+                                lapack_complex_double* ap );
+
+lapack_int LAPACKE_stfttr_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const float* arf, float* a,
+                                lapack_int lda );
+lapack_int LAPACKE_dtfttr_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const double* arf, double* a,
+                                lapack_int lda );
+lapack_int LAPACKE_ctfttr_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_float* arf,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_ztfttr_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_double* arf,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_stgevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const float* s, lapack_int lds, const float* p,
+                                lapack_int ldp, float* vl, lapack_int ldvl,
+                                float* vr, lapack_int ldvr, lapack_int mm,
+                                lapack_int* m, float* work );
+lapack_int LAPACKE_dtgevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const double* s, lapack_int lds,
+                                const double* p, lapack_int ldp, double* vl,
+                                lapack_int ldvl, double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m, double* work );
+lapack_int LAPACKE_ctgevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_float* s, lapack_int lds,
+                                const lapack_complex_float* p, lapack_int ldp,
+                                lapack_complex_float* vl, lapack_int ldvl,
+                                lapack_complex_float* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztgevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_double* s, lapack_int lds,
+                                const lapack_complex_double* p, lapack_int ldp,
+                                lapack_complex_double* vl, lapack_int ldvl,
+                                lapack_complex_double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_stgexc_work( int matrix_order, lapack_logical wantq,
+                                lapack_logical wantz, lapack_int n, float* a,
+                                lapack_int lda, float* b, lapack_int ldb,
+                                float* q, lapack_int ldq, float* z,
+                                lapack_int ldz, lapack_int* ifst,
+                                lapack_int* ilst, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dtgexc_work( int matrix_order, lapack_logical wantq,
+                                lapack_logical wantz, lapack_int n, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                double* q, lapack_int ldq, double* z,
+                                lapack_int ldz, lapack_int* ifst,
+                                lapack_int* ilst, double* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_ctgexc_work( int matrix_order, lapack_logical wantq,
+                                lapack_logical wantz, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_int ifst, lapack_int ilst );
+lapack_int LAPACKE_ztgexc_work( int matrix_order, lapack_logical wantq,
+                                lapack_logical wantz, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_int ifst, lapack_int ilst );
+
+lapack_int LAPACKE_stgsen_work( int matrix_order, lapack_int ijob,
+                                lapack_logical wantq, lapack_logical wantz,
+                                const lapack_logical* select, lapack_int n,
+                                float* a, lapack_int lda, float* b,
+                                lapack_int ldb, float* alphar, float* alphai,
+                                float* beta, float* q, lapack_int ldq, float* z,
+                                lapack_int ldz, lapack_int* m, float* pl,
+                                float* pr, float* dif, float* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+lapack_int LAPACKE_dtgsen_work( int matrix_order, lapack_int ijob,
+                                lapack_logical wantq, lapack_logical wantz,
+                                const lapack_logical* select, lapack_int n,
+                                double* a, lapack_int lda, double* b,
+                                lapack_int ldb, double* alphar, double* alphai,
+                                double* beta, double* q, lapack_int ldq,
+                                double* z, lapack_int ldz, lapack_int* m,
+                                double* pl, double* pr, double* dif,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_ctgsen_work( int matrix_order, lapack_int ijob,
+                                lapack_logical wantq, lapack_logical wantz,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* alpha,
+                                lapack_complex_float* beta,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* z, lapack_int ldz,
+                                lapack_int* m, float* pl, float* pr, float* dif,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_ztgsen_work( int matrix_order, lapack_int ijob,
+                                lapack_logical wantq, lapack_logical wantz,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* alpha,
+                                lapack_complex_double* beta,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* z, lapack_int ldz,
+                                lapack_int* m, double* pl, double* pr,
+                                double* dif, lapack_complex_double* work,
+                                lapack_int lwork, lapack_int* iwork,
+                                lapack_int liwork );
+
+lapack_int LAPACKE_stgsja_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                float* a, lapack_int lda, float* b,
+                                lapack_int ldb, float tola, float tolb,
+                                float* alpha, float* beta, float* u,
+                                lapack_int ldu, float* v, lapack_int ldv,
+                                float* q, lapack_int ldq, float* work,
+                                lapack_int* ncycle );
+lapack_int LAPACKE_dtgsja_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                double* a, lapack_int lda, double* b,
+                                lapack_int ldb, double tola, double tolb,
+                                double* alpha, double* beta, double* u,
+                                lapack_int ldu, double* v, lapack_int ldv,
+                                double* q, lapack_int ldq, double* work,
+                                lapack_int* ncycle );
+lapack_int LAPACKE_ctgsja_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                float tola, float tolb, float* alpha,
+                                float* beta, lapack_complex_float* u,
+                                lapack_int ldu, lapack_complex_float* v,
+                                lapack_int ldv, lapack_complex_float* q,
+                                lapack_int ldq, lapack_complex_float* work,
+                                lapack_int* ncycle );
+lapack_int LAPACKE_ztgsja_work( int matrix_order, char jobu, char jobv,
+                                char jobq, lapack_int m, lapack_int p,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                double tola, double tolb, double* alpha,
+                                double* beta, lapack_complex_double* u,
+                                lapack_int ldu, lapack_complex_double* v,
+                                lapack_int ldv, lapack_complex_double* q,
+                                lapack_int ldq, lapack_complex_double* work,
+                                lapack_int* ncycle );
+
+lapack_int LAPACKE_stgsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const float* a, lapack_int lda, const float* b,
+                                lapack_int ldb, const float* vl,
+                                lapack_int ldvl, const float* vr,
+                                lapack_int ldvr, float* s, float* dif,
+                                lapack_int mm, lapack_int* m, float* work,
+                                lapack_int lwork, lapack_int* iwork );
+lapack_int LAPACKE_dtgsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const double* a, lapack_int lda,
+                                const double* b, lapack_int ldb,
+                                const double* vl, lapack_int ldvl,
+                                const double* vr, lapack_int ldvr, double* s,
+                                double* dif, lapack_int mm, lapack_int* m,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctgsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                const lapack_complex_float* vl, lapack_int ldvl,
+                                const lapack_complex_float* vr, lapack_int ldvr,
+                                float* s, float* dif, lapack_int mm,
+                                lapack_int* m, lapack_complex_float* work,
+                                lapack_int lwork, lapack_int* iwork );
+lapack_int LAPACKE_ztgsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                const lapack_complex_double* vl,
+                                lapack_int ldvl,
+                                const lapack_complex_double* vr,
+                                lapack_int ldvr, double* s, double* dif,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_stgsyl_work( int matrix_order, char trans, lapack_int ijob,
+                                lapack_int m, lapack_int n, const float* a,
+                                lapack_int lda, const float* b, lapack_int ldb,
+                                float* c, lapack_int ldc, const float* d,
+                                lapack_int ldd, const float* e, lapack_int lde,
+                                float* f, lapack_int ldf, float* scale,
+                                float* dif, float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dtgsyl_work( int matrix_order, char trans, lapack_int ijob,
+                                lapack_int m, lapack_int n, const double* a,
+                                lapack_int lda, const double* b, lapack_int ldb,
+                                double* c, lapack_int ldc, const double* d,
+                                lapack_int ldd, const double* e, lapack_int lde,
+                                double* f, lapack_int ldf, double* scale,
+                                double* dif, double* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctgsyl_work( int matrix_order, char trans, lapack_int ijob,
+                                lapack_int m, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* c, lapack_int ldc,
+                                const lapack_complex_float* d, lapack_int ldd,
+                                const lapack_complex_float* e, lapack_int lde,
+                                lapack_complex_float* f, lapack_int ldf,
+                                float* scale, float* dif,
+                                lapack_complex_float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ztgsyl_work( int matrix_order, char trans, lapack_int ijob,
+                                lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* c, lapack_int ldc,
+                                const lapack_complex_double* d, lapack_int ldd,
+                                const lapack_complex_double* e, lapack_int lde,
+                                lapack_complex_double* f, lapack_int ldf,
+                                double* scale, double* dif,
+                                lapack_complex_double* work, lapack_int lwork,
+                                lapack_int* iwork );
+
+lapack_int LAPACKE_stpcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, const float* ap,
+                                float* rcond, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dtpcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, const double* ap,
+                                double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctpcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n,
+                                const lapack_complex_float* ap, float* rcond,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztpcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n,
+                                const lapack_complex_double* ap, double* rcond,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_stprfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const float* ap, const float* b, lapack_int ldb,
+                                const float* x, lapack_int ldx, float* ferr,
+                                float* berr, float* work, lapack_int* iwork );
+lapack_int LAPACKE_dtprfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const double* ap, const double* b,
+                                lapack_int ldb, const double* x, lapack_int ldx,
+                                double* ferr, double* berr, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctprfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* ap,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                const lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztprfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                const lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_stptri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, float* ap );
+lapack_int LAPACKE_dtptri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, double* ap );
+lapack_int LAPACKE_ctptri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, lapack_complex_float* ap );
+lapack_int LAPACKE_ztptri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, lapack_complex_double* ap );
+
+lapack_int LAPACKE_stptrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const float* ap, float* b, lapack_int ldb );
+lapack_int LAPACKE_dtptrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const double* ap, double* b, lapack_int ldb );
+lapack_int LAPACKE_ctptrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* ap,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztptrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* ap,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_stpttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const float* ap, float* arf );
+lapack_int LAPACKE_dtpttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const double* ap, double* arf );
+lapack_int LAPACKE_ctpttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_float* ap,
+                                lapack_complex_float* arf );
+lapack_int LAPACKE_ztpttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_double* ap,
+                                lapack_complex_double* arf );
+
+lapack_int LAPACKE_stpttr_work( int matrix_order, char uplo, lapack_int n,
+                                const float* ap, float* a, lapack_int lda );
+lapack_int LAPACKE_dtpttr_work( int matrix_order, char uplo, lapack_int n,
+                                const double* ap, double* a, lapack_int lda );
+lapack_int LAPACKE_ctpttr_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap,
+                                lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_ztpttr_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap,
+                                lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_strcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, const float* a,
+                                lapack_int lda, float* rcond, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dtrcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n, const double* a,
+                                lapack_int lda, double* rcond, double* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctrcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                float* rcond, lapack_complex_float* work,
+                                float* rwork );
+lapack_int LAPACKE_ztrcon_work( int matrix_order, char norm, char uplo,
+                                char diag, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                double* rcond, lapack_complex_double* work,
+                                double* rwork );
+
+lapack_int LAPACKE_strevc_work( int matrix_order, char side, char howmny,
+                                lapack_logical* select, lapack_int n,
+                                const float* t, lapack_int ldt, float* vl,
+                                lapack_int ldvl, float* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m, float* work );
+lapack_int LAPACKE_dtrevc_work( int matrix_order, char side, char howmny,
+                                lapack_logical* select, lapack_int n,
+                                const double* t, lapack_int ldt, double* vl,
+                                lapack_int ldvl, double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m, double* work );
+lapack_int LAPACKE_ctrevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_float* t, lapack_int ldt,
+                                lapack_complex_float* vl, lapack_int ldvl,
+                                lapack_complex_float* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztrevc_work( int matrix_order, char side, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* vl, lapack_int ldvl,
+                                lapack_complex_double* vr, lapack_int ldvr,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_strexc_work( int matrix_order, char compq, lapack_int n,
+                                float* t, lapack_int ldt, float* q,
+                                lapack_int ldq, lapack_int* ifst,
+                                lapack_int* ilst, float* work );
+lapack_int LAPACKE_dtrexc_work( int matrix_order, char compq, lapack_int n,
+                                double* t, lapack_int ldt, double* q,
+                                lapack_int ldq, lapack_int* ifst,
+                                lapack_int* ilst, double* work );
+lapack_int LAPACKE_ctrexc_work( int matrix_order, char compq, lapack_int n,
+                                lapack_complex_float* t, lapack_int ldt,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_int ifst, lapack_int ilst );
+lapack_int LAPACKE_ztrexc_work( int matrix_order, char compq, lapack_int n,
+                                lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_int ifst, lapack_int ilst );
+
+lapack_int LAPACKE_strrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const float* a, lapack_int lda, const float* b,
+                                lapack_int ldb, const float* x, lapack_int ldx,
+                                float* ferr, float* berr, float* work,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dtrrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const double* a, lapack_int lda,
+                                const double* b, lapack_int ldb,
+                                const double* x, lapack_int ldx, double* ferr,
+                                double* berr, double* work, lapack_int* iwork );
+lapack_int LAPACKE_ctrrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                const lapack_complex_float* x, lapack_int ldx,
+                                float* ferr, float* berr,
+                                lapack_complex_float* work, float* rwork );
+lapack_int LAPACKE_ztrrfs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                const lapack_complex_double* x, lapack_int ldx,
+                                double* ferr, double* berr,
+                                lapack_complex_double* work, double* rwork );
+
+lapack_int LAPACKE_strsen_work( int matrix_order, char job, char compq,
+                                const lapack_logical* select, lapack_int n,
+                                float* t, lapack_int ldt, float* q,
+                                lapack_int ldq, float* wr, float* wi,
+                                lapack_int* m, float* s, float* sep,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_dtrsen_work( int matrix_order, char job, char compq,
+                                const lapack_logical* select, lapack_int n,
+                                double* t, lapack_int ldt, double* q,
+                                lapack_int ldq, double* wr, double* wi,
+                                lapack_int* m, double* s, double* sep,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork, lapack_int liwork );
+lapack_int LAPACKE_ctrsen_work( int matrix_order, char job, char compq,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_float* t, lapack_int ldt,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* w, lapack_int* m,
+                                float* s, float* sep,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_ztrsen_work( int matrix_order, char job, char compq,
+                                const lapack_logical* select, lapack_int n,
+                                lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* w, lapack_int* m,
+                                double* s, double* sep,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_strsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const float* t, lapack_int ldt, const float* vl,
+                                lapack_int ldvl, const float* vr,
+                                lapack_int ldvr, float* s, float* sep,
+                                lapack_int mm, lapack_int* m, float* work,
+                                lapack_int ldwork, lapack_int* iwork );
+lapack_int LAPACKE_dtrsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const double* t, lapack_int ldt,
+                                const double* vl, lapack_int ldvl,
+                                const double* vr, lapack_int ldvr, double* s,
+                                double* sep, lapack_int mm, lapack_int* m,
+                                double* work, lapack_int ldwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ctrsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_float* t, lapack_int ldt,
+                                const lapack_complex_float* vl, lapack_int ldvl,
+                                const lapack_complex_float* vr, lapack_int ldvr,
+                                float* s, float* sep, lapack_int mm,
+                                lapack_int* m, lapack_complex_float* work,
+                                lapack_int ldwork, float* rwork );
+lapack_int LAPACKE_ztrsna_work( int matrix_order, char job, char howmny,
+                                const lapack_logical* select, lapack_int n,
+                                const lapack_complex_double* t, lapack_int ldt,
+                                const lapack_complex_double* vl,
+                                lapack_int ldvl,
+                                const lapack_complex_double* vr,
+                                lapack_int ldvr, double* s, double* sep,
+                                lapack_int mm, lapack_int* m,
+                                lapack_complex_double* work, lapack_int ldwork,
+                                double* rwork );
+
+lapack_int LAPACKE_strsyl_work( int matrix_order, char trana, char tranb,
+                                lapack_int isgn, lapack_int m, lapack_int n,
+                                const float* a, lapack_int lda, const float* b,
+                                lapack_int ldb, float* c, lapack_int ldc,
+                                float* scale );
+lapack_int LAPACKE_dtrsyl_work( int matrix_order, char trana, char tranb,
+                                lapack_int isgn, lapack_int m, lapack_int n,
+                                const double* a, lapack_int lda,
+                                const double* b, lapack_int ldb, double* c,
+                                lapack_int ldc, double* scale );
+lapack_int LAPACKE_ctrsyl_work( int matrix_order, char trana, char tranb,
+                                lapack_int isgn, lapack_int m, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* b, lapack_int ldb,
+                                lapack_complex_float* c, lapack_int ldc,
+                                float* scale );
+lapack_int LAPACKE_ztrsyl_work( int matrix_order, char trana, char tranb,
+                                lapack_int isgn, lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* c, lapack_int ldc,
+                                double* scale );
+
+lapack_int LAPACKE_strtri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, float* a, lapack_int lda );
+lapack_int LAPACKE_dtrtri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, double* a, lapack_int lda );
+lapack_int LAPACKE_ctrtri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, lapack_complex_float* a,
+                                lapack_int lda );
+lapack_int LAPACKE_ztrtri_work( int matrix_order, char uplo, char diag,
+                                lapack_int n, lapack_complex_double* a,
+                                lapack_int lda );
+
+lapack_int LAPACKE_strtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const float* a, lapack_int lda, float* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_dtrtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const double* a, lapack_int lda, double* b,
+                                lapack_int ldb );
+lapack_int LAPACKE_ctrtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztrtrs_work( int matrix_order, char uplo, char trans,
+                                char diag, lapack_int n, lapack_int nrhs,
+                                const lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_strttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const float* a, lapack_int lda,
+                                float* arf );
+lapack_int LAPACKE_dtrttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const double* a, lapack_int lda,
+                                double* arf );
+lapack_int LAPACKE_ctrttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* arf );
+lapack_int LAPACKE_ztrttf_work( int matrix_order, char transr, char uplo,
+                                lapack_int n, const lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* arf );
+
+lapack_int LAPACKE_strttp_work( int matrix_order, char uplo, lapack_int n,
+                                const float* a, lapack_int lda, float* ap );
+lapack_int LAPACKE_dtrttp_work( int matrix_order, char uplo, lapack_int n,
+                                const double* a, lapack_int lda, double* ap );
+lapack_int LAPACKE_ctrttp_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* ap );
+lapack_int LAPACKE_ztrttp_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* ap );
+
+lapack_int LAPACKE_stzrzf_work( int matrix_order, lapack_int m, lapack_int n,
+                                float* a, lapack_int lda, float* tau,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_dtzrzf_work( int matrix_order, lapack_int m, lapack_int n,
+                                double* a, lapack_int lda, double* tau,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_ctzrzf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_ztzrzf_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cungbr_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int k,
+                                lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zungbr_work( int matrix_order, char vect, lapack_int m,
+                                lapack_int n, lapack_int k,
+                                lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunghr_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunghr_work( int matrix_order, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunglq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunglq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cungql_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zungql_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cungqr_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zungqr_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cungrq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zungrq_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int k, lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cungtr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zungtr_work( int matrix_order, char uplo, lapack_int n,
+                                lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmbr_work( int matrix_order, char vect, char side,
+                                char trans, lapack_int m, lapack_int n,
+                                lapack_int k, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmbr_work( int matrix_order, char vect, char side,
+                                char trans, lapack_int m, lapack_int n,
+                                lapack_int k, const lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmhr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmhr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int ilo,
+                                lapack_int ihi, const lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmql_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmql_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmqr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmqr_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmrq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmrq_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmrz_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                lapack_int l, const lapack_complex_float* a,
+                                lapack_int lda, const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmrz_work( int matrix_order, char side, char trans,
+                                lapack_int m, lapack_int n, lapack_int k,
+                                lapack_int l, const lapack_complex_double* a,
+                                lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cunmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const lapack_complex_float* a, lapack_int lda,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_zunmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const lapack_complex_double* a, lapack_int lda,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work, lapack_int lwork );
+
+lapack_int LAPACKE_cupgtr_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_float* ap,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* q, lapack_int ldq,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zupgtr_work( int matrix_order, char uplo, lapack_int n,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* q, lapack_int ldq,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_cupmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const lapack_complex_float* ap,
+                                const lapack_complex_float* tau,
+                                lapack_complex_float* c, lapack_int ldc,
+                                lapack_complex_float* work );
+lapack_int LAPACKE_zupmtr_work( int matrix_order, char side, char uplo,
+                                char trans, lapack_int m, lapack_int n,
+                                const lapack_complex_double* ap,
+                                const lapack_complex_double* tau,
+                                lapack_complex_double* c, lapack_int ldc,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_claghe( int matrix_order, lapack_int n, lapack_int k,
+                           const float* d, lapack_complex_float* a,
+                           lapack_int lda, lapack_int* iseed );
+lapack_int LAPACKE_zlaghe( int matrix_order, lapack_int n, lapack_int k,
+                           const double* d, lapack_complex_double* a,
+                           lapack_int lda, lapack_int* iseed );
+
+lapack_int LAPACKE_slagsy( int matrix_order, lapack_int n, lapack_int k,
+                           const float* d, float* a, lapack_int lda,
+                           lapack_int* iseed );
+lapack_int LAPACKE_dlagsy( int matrix_order, lapack_int n, lapack_int k,
+                           const double* d, double* a, lapack_int lda,
+                           lapack_int* iseed );
+lapack_int LAPACKE_clagsy( int matrix_order, lapack_int n, lapack_int k,
+                           const float* d, lapack_complex_float* a,
+                           lapack_int lda, lapack_int* iseed );
+lapack_int LAPACKE_zlagsy( int matrix_order, lapack_int n, lapack_int k,
+                           const double* d, lapack_complex_double* a,
+                           lapack_int lda, lapack_int* iseed );
+
+lapack_int LAPACKE_slapmr( int matrix_order, lapack_logical forwrd,
+                           lapack_int m, lapack_int n, float* x, lapack_int ldx,
+                           lapack_int* k );
+lapack_int LAPACKE_dlapmr( int matrix_order, lapack_logical forwrd,
+                           lapack_int m, lapack_int n, double* x,
+                           lapack_int ldx, lapack_int* k );
+lapack_int LAPACKE_clapmr( int matrix_order, lapack_logical forwrd,
+                           lapack_int m, lapack_int n, lapack_complex_float* x,
+                           lapack_int ldx, lapack_int* k );
+lapack_int LAPACKE_zlapmr( int matrix_order, lapack_logical forwrd,
+                           lapack_int m, lapack_int n, lapack_complex_double* x,
+                           lapack_int ldx, lapack_int* k );
+
+
+float LAPACKE_slapy2( float x, float y );
+double LAPACKE_dlapy2( double x, double y );
+
+float LAPACKE_slapy3( float x, float y, float z );
+double LAPACKE_dlapy3( double x, double y, double z );
+
+lapack_int LAPACKE_slartgp( float f, float g, float* cs, float* sn, float* r );
+lapack_int LAPACKE_dlartgp( double f, double g, double* cs, double* sn,
+                            double* r );
+
+lapack_int LAPACKE_slartgs( float x, float y, float sigma, float* cs,
+                            float* sn );
+lapack_int LAPACKE_dlartgs( double x, double y, double sigma, double* cs,
+                            double* sn );
+
+
+//LAPACK 3.3.0
+lapack_int LAPACKE_cbbcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, lapack_int m,
+                           lapack_int p, lapack_int q, float* theta, float* phi,
+                           lapack_complex_float* u1, lapack_int ldu1,
+                           lapack_complex_float* u2, lapack_int ldu2,
+                           lapack_complex_float* v1t, lapack_int ldv1t,
+                           lapack_complex_float* v2t, lapack_int ldv2t,
+                           float* b11d, float* b11e, float* b12d, float* b12e,
+                           float* b21d, float* b21e, float* b22d, float* b22e );
+lapack_int LAPACKE_cbbcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                float* theta, float* phi,
+                                lapack_complex_float* u1, lapack_int ldu1,
+                                lapack_complex_float* u2, lapack_int ldu2,
+                                lapack_complex_float* v1t, lapack_int ldv1t,
+                                lapack_complex_float* v2t, lapack_int ldv2t,
+                                float* b11d, float* b11e, float* b12d,
+                                float* b12e, float* b21d, float* b21e,
+                                float* b22d, float* b22e, float* rwork,
+                                lapack_int lrwork );
+lapack_int LAPACKE_cheswapr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_float* a, lapack_int i1,
+                             lapack_int i2 );
+lapack_int LAPACKE_cheswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_float* a, lapack_int i1,
+                                  lapack_int i2 );
+lapack_int LAPACKE_chetri2( int matrix_order, char uplo, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_chetri2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_chetri2x( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_float* a, lapack_int lda,
+                             const lapack_int* ipiv, lapack_int nb );
+lapack_int LAPACKE_chetri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_float* a, lapack_int lda,
+                                  const lapack_int* ipiv,
+                                  lapack_complex_float* work, lapack_int nb );
+lapack_int LAPACKE_chetrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const lapack_complex_float* a,
+                            lapack_int lda, const lapack_int* ipiv,
+                            lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_chetrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const lapack_complex_float* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_csyconv( int matrix_order, char uplo, char way, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_csyconv_work( int matrix_order, char uplo, char way,
+                                 lapack_int n, lapack_complex_float* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_csyswapr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_float* a, lapack_int i1,
+                             lapack_int i2 );
+lapack_int LAPACKE_csyswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_float* a, lapack_int i1,
+                                  lapack_int i2 );
+lapack_int LAPACKE_csytri2( int matrix_order, char uplo, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_csytri2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_csytri2x( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_float* a, lapack_int lda,
+                             const lapack_int* ipiv, lapack_int nb );
+lapack_int LAPACKE_csytri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_float* a, lapack_int lda,
+                                  const lapack_int* ipiv,
+                                  lapack_complex_float* work, lapack_int nb );
+lapack_int LAPACKE_csytrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const lapack_complex_float* a,
+                            lapack_int lda, const lapack_int* ipiv,
+                            lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_csytrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const lapack_complex_float* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_cunbdb( int matrix_order, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           lapack_complex_float* x11, lapack_int ldx11,
+                           lapack_complex_float* x12, lapack_int ldx12,
+                           lapack_complex_float* x21, lapack_int ldx21,
+                           lapack_complex_float* x22, lapack_int ldx22,
+                           float* theta, float* phi,
+                           lapack_complex_float* taup1,
+                           lapack_complex_float* taup2,
+                           lapack_complex_float* tauq1,
+                           lapack_complex_float* tauq2 );
+lapack_int LAPACKE_cunbdb_work( int matrix_order, char trans, char signs,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                lapack_complex_float* x11, lapack_int ldx11,
+                                lapack_complex_float* x12, lapack_int ldx12,
+                                lapack_complex_float* x21, lapack_int ldx21,
+                                lapack_complex_float* x22, lapack_int ldx22,
+                                float* theta, float* phi,
+                                lapack_complex_float* taup1,
+                                lapack_complex_float* taup2,
+                                lapack_complex_float* tauq1,
+                                lapack_complex_float* tauq2,
+                                lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_cuncsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           lapack_complex_float* x11, lapack_int ldx11,
+                           lapack_complex_float* x12, lapack_int ldx12,
+                           lapack_complex_float* x21, lapack_int ldx21,
+                           lapack_complex_float* x22, lapack_int ldx22,
+                           float* theta, lapack_complex_float* u1,
+                           lapack_int ldu1, lapack_complex_float* u2,
+                           lapack_int ldu2, lapack_complex_float* v1t,
+                           lapack_int ldv1t, lapack_complex_float* v2t,
+                           lapack_int ldv2t );
+lapack_int LAPACKE_cuncsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                char signs, lapack_int m, lapack_int p,
+                                lapack_int q, lapack_complex_float* x11,
+                                lapack_int ldx11, lapack_complex_float* x12,
+                                lapack_int ldx12, lapack_complex_float* x21,
+                                lapack_int ldx21, lapack_complex_float* x22,
+                                lapack_int ldx22, float* theta,
+                                lapack_complex_float* u1, lapack_int ldu1,
+                                lapack_complex_float* u2, lapack_int ldu2,
+                                lapack_complex_float* v1t, lapack_int ldv1t,
+                                lapack_complex_float* v2t, lapack_int ldv2t,
+                                lapack_complex_float* work, lapack_int lwork,
+                                float* rwork, lapack_int lrwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dbbcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, lapack_int m,
+                           lapack_int p, lapack_int q, double* theta,
+                           double* phi, double* u1, lapack_int ldu1, double* u2,
+                           lapack_int ldu2, double* v1t, lapack_int ldv1t,
+                           double* v2t, lapack_int ldv2t, double* b11d,
+                           double* b11e, double* b12d, double* b12e,
+                           double* b21d, double* b21e, double* b22d,
+                           double* b22e );
+lapack_int LAPACKE_dbbcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                double* theta, double* phi, double* u1,
+                                lapack_int ldu1, double* u2, lapack_int ldu2,
+                                double* v1t, lapack_int ldv1t, double* v2t,
+                                lapack_int ldv2t, double* b11d, double* b11e,
+                                double* b12d, double* b12e, double* b21d,
+                                double* b21e, double* b22d, double* b22e,
+                                double* work, lapack_int lwork );
+lapack_int LAPACKE_dorbdb( int matrix_order, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           double* x11, lapack_int ldx11, double* x12,
+                           lapack_int ldx12, double* x21, lapack_int ldx21,
+                           double* x22, lapack_int ldx22, double* theta,
+                           double* phi, double* taup1, double* taup2,
+                           double* tauq1, double* tauq2 );
+lapack_int LAPACKE_dorbdb_work( int matrix_order, char trans, char signs,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                double* x11, lapack_int ldx11, double* x12,
+                                lapack_int ldx12, double* x21, lapack_int ldx21,
+                                double* x22, lapack_int ldx22, double* theta,
+                                double* phi, double* taup1, double* taup2,
+                                double* tauq1, double* tauq2, double* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_dorcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           double* x11, lapack_int ldx11, double* x12,
+                           lapack_int ldx12, double* x21, lapack_int ldx21,
+                           double* x22, lapack_int ldx22, double* theta,
+                           double* u1, lapack_int ldu1, double* u2,
+                           lapack_int ldu2, double* v1t, lapack_int ldv1t,
+                           double* v2t, lapack_int ldv2t );
+lapack_int LAPACKE_dorcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                char signs, lapack_int m, lapack_int p,
+                                lapack_int q, double* x11, lapack_int ldx11,
+                                double* x12, lapack_int ldx12, double* x21,
+                                lapack_int ldx21, double* x22, lapack_int ldx22,
+                                double* theta, double* u1, lapack_int ldu1,
+                                double* u2, lapack_int ldu2, double* v1t,
+                                lapack_int ldv1t, double* v2t, lapack_int ldv2t,
+                                double* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_dsyconv( int matrix_order, char uplo, char way, lapack_int n,
+                            double* a, lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_dsyconv_work( int matrix_order, char uplo, char way,
+                                 lapack_int n, double* a, lapack_int lda,
+                                 const lapack_int* ipiv, double* work );
+lapack_int LAPACKE_dsyswapr( int matrix_order, char uplo, lapack_int n,
+                             double* a, lapack_int i1, lapack_int i2 );
+lapack_int LAPACKE_dsyswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  double* a, lapack_int i1, lapack_int i2 );
+lapack_int LAPACKE_dsytri2( int matrix_order, char uplo, lapack_int n,
+                            double* a, lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_dsytri2_work( int matrix_order, char uplo, lapack_int n,
+                                 double* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_double* work, lapack_int lwork );
+lapack_int LAPACKE_dsytri2x( int matrix_order, char uplo, lapack_int n,
+                             double* a, lapack_int lda, const lapack_int* ipiv,
+                             lapack_int nb );
+lapack_int LAPACKE_dsytri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  double* a, lapack_int lda,
+                                  const lapack_int* ipiv, double* work,
+                                  lapack_int nb );
+lapack_int LAPACKE_dsytrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const double* a, lapack_int lda,
+                            const lapack_int* ipiv, double* b, lapack_int ldb );
+lapack_int LAPACKE_dsytrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const double* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 double* b, lapack_int ldb, double* work );
+lapack_int LAPACKE_sbbcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, lapack_int m,
+                           lapack_int p, lapack_int q, float* theta, float* phi,
+                           float* u1, lapack_int ldu1, float* u2,
+                           lapack_int ldu2, float* v1t, lapack_int ldv1t,
+                           float* v2t, lapack_int ldv2t, float* b11d,
+                           float* b11e, float* b12d, float* b12e, float* b21d,
+                           float* b21e, float* b22d, float* b22e );
+lapack_int LAPACKE_sbbcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                float* theta, float* phi, float* u1,
+                                lapack_int ldu1, float* u2, lapack_int ldu2,
+                                float* v1t, lapack_int ldv1t, float* v2t,
+                                lapack_int ldv2t, float* b11d, float* b11e,
+                                float* b12d, float* b12e, float* b21d,
+                                float* b21e, float* b22d, float* b22e,
+                                float* work, lapack_int lwork );
+lapack_int LAPACKE_sorbdb( int matrix_order, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q, float* x11,
+                           lapack_int ldx11, float* x12, lapack_int ldx12,
+                           float* x21, lapack_int ldx21, float* x22,
+                           lapack_int ldx22, float* theta, float* phi,
+                           float* taup1, float* taup2, float* tauq1,
+                           float* tauq2 );
+lapack_int LAPACKE_sorbdb_work( int matrix_order, char trans, char signs,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                float* x11, lapack_int ldx11, float* x12,
+                                lapack_int ldx12, float* x21, lapack_int ldx21,
+                                float* x22, lapack_int ldx22, float* theta,
+                                float* phi, float* taup1, float* taup2,
+                                float* tauq1, float* tauq2, float* work,
+                                lapack_int lwork );
+lapack_int LAPACKE_sorcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q, float* x11,
+                           lapack_int ldx11, float* x12, lapack_int ldx12,
+                           float* x21, lapack_int ldx21, float* x22,
+                           lapack_int ldx22, float* theta, float* u1,
+                           lapack_int ldu1, float* u2, lapack_int ldu2,
+                           float* v1t, lapack_int ldv1t, float* v2t,
+                           lapack_int ldv2t );
+lapack_int LAPACKE_sorcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                char signs, lapack_int m, lapack_int p,
+                                lapack_int q, float* x11, lapack_int ldx11,
+                                float* x12, lapack_int ldx12, float* x21,
+                                lapack_int ldx21, float* x22, lapack_int ldx22,
+                                float* theta, float* u1, lapack_int ldu1,
+                                float* u2, lapack_int ldu2, float* v1t,
+                                lapack_int ldv1t, float* v2t, lapack_int ldv2t,
+                                float* work, lapack_int lwork,
+                                lapack_int* iwork );
+lapack_int LAPACKE_ssyconv( int matrix_order, char uplo, char way, lapack_int n,
+                            float* a, lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_ssyconv_work( int matrix_order, char uplo, char way,
+                                 lapack_int n, float* a, lapack_int lda,
+                                 const lapack_int* ipiv, float* work );
+lapack_int LAPACKE_ssyswapr( int matrix_order, char uplo, lapack_int n,
+                             float* a, lapack_int i1, lapack_int i2 );
+lapack_int LAPACKE_ssyswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  float* a, lapack_int i1, lapack_int i2 );
+lapack_int LAPACKE_ssytri2( int matrix_order, char uplo, lapack_int n, float* a,
+                            lapack_int lda, const lapack_int* ipiv );
+lapack_int LAPACKE_ssytri2_work( int matrix_order, char uplo, lapack_int n,
+                                 float* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_float* work, lapack_int lwork );
+lapack_int LAPACKE_ssytri2x( int matrix_order, char uplo, lapack_int n,
+                             float* a, lapack_int lda, const lapack_int* ipiv,
+                             lapack_int nb );
+lapack_int LAPACKE_ssytri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  float* a, lapack_int lda,
+                                  const lapack_int* ipiv, float* work,
+                                  lapack_int nb );
+lapack_int LAPACKE_ssytrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const float* a, lapack_int lda,
+                            const lapack_int* ipiv, float* b, lapack_int ldb );
+lapack_int LAPACKE_ssytrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const float* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 float* b, lapack_int ldb, float* work );
+lapack_int LAPACKE_zbbcsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, lapack_int m,
+                           lapack_int p, lapack_int q, double* theta,
+                           double* phi, lapack_complex_double* u1,
+                           lapack_int ldu1, lapack_complex_double* u2,
+                           lapack_int ldu2, lapack_complex_double* v1t,
+                           lapack_int ldv1t, lapack_complex_double* v2t,
+                           lapack_int ldv2t, double* b11d, double* b11e,
+                           double* b12d, double* b12e, double* b21d,
+                           double* b21e, double* b22d, double* b22e );
+lapack_int LAPACKE_zbbcsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                double* theta, double* phi,
+                                lapack_complex_double* u1, lapack_int ldu1,
+                                lapack_complex_double* u2, lapack_int ldu2,
+                                lapack_complex_double* v1t, lapack_int ldv1t,
+                                lapack_complex_double* v2t, lapack_int ldv2t,
+                                double* b11d, double* b11e, double* b12d,
+                                double* b12e, double* b21d, double* b21e,
+                                double* b22d, double* b22e, double* rwork,
+                                lapack_int lrwork );
+lapack_int LAPACKE_zheswapr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_double* a, lapack_int i1,
+                             lapack_int i2 );
+lapack_int LAPACKE_zheswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_double* a, lapack_int i1,
+                                  lapack_int i2 );
+lapack_int LAPACKE_zhetri2( int matrix_order, char uplo, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_zhetri2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_double* work, lapack_int lwork );
+lapack_int LAPACKE_zhetri2x( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_double* a, lapack_int lda,
+                             const lapack_int* ipiv, lapack_int nb );
+lapack_int LAPACKE_zhetri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_double* a, lapack_int lda,
+                                  const lapack_int* ipiv,
+                                  lapack_complex_double* work, lapack_int nb );
+lapack_int LAPACKE_zhetrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const lapack_complex_double* a,
+                            lapack_int lda, const lapack_int* ipiv,
+                            lapack_complex_double* b, lapack_int ldb );
+lapack_int LAPACKE_zhetrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const lapack_complex_double* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* work );
+lapack_int LAPACKE_zsyconv( int matrix_order, char uplo, char way, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_zsyconv_work( int matrix_order, char uplo, char way,
+                                 lapack_int n, lapack_complex_double* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_double* work );
+lapack_int LAPACKE_zsyswapr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_double* a, lapack_int i1,
+                             lapack_int i2 );
+lapack_int LAPACKE_zsyswapr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_double* a, lapack_int i1,
+                                  lapack_int i2 );
+lapack_int LAPACKE_zsytri2( int matrix_order, char uplo, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            const lapack_int* ipiv );
+lapack_int LAPACKE_zsytri2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 const lapack_int* ipiv,
+                                 lapack_complex_double* work, lapack_int lwork );
+lapack_int LAPACKE_zsytri2x( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_double* a, lapack_int lda,
+                             const lapack_int* ipiv, lapack_int nb );
+lapack_int LAPACKE_zsytri2x_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_double* a, lapack_int lda,
+                                  const lapack_int* ipiv,
+                                  lapack_complex_double* work, lapack_int nb );
+lapack_int LAPACKE_zsytrs2( int matrix_order, char uplo, lapack_int n,
+                            lapack_int nrhs, const lapack_complex_double* a,
+                            lapack_int lda, const lapack_int* ipiv,
+                            lapack_complex_double* b, lapack_int ldb );
+lapack_int LAPACKE_zsytrs2_work( int matrix_order, char uplo, lapack_int n,
+                                 lapack_int nrhs, const lapack_complex_double* a,
+                                 lapack_int lda, const lapack_int* ipiv,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* work );
+lapack_int LAPACKE_zunbdb( int matrix_order, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           lapack_complex_double* x11, lapack_int ldx11,
+                           lapack_complex_double* x12, lapack_int ldx12,
+                           lapack_complex_double* x21, lapack_int ldx21,
+                           lapack_complex_double* x22, lapack_int ldx22,
+                           double* theta, double* phi,
+                           lapack_complex_double* taup1,
+                           lapack_complex_double* taup2,
+                           lapack_complex_double* tauq1,
+                           lapack_complex_double* tauq2 );
+lapack_int LAPACKE_zunbdb_work( int matrix_order, char trans, char signs,
+                                lapack_int m, lapack_int p, lapack_int q,
+                                lapack_complex_double* x11, lapack_int ldx11,
+                                lapack_complex_double* x12, lapack_int ldx12,
+                                lapack_complex_double* x21, lapack_int ldx21,
+                                lapack_complex_double* x22, lapack_int ldx22,
+                                double* theta, double* phi,
+                                lapack_complex_double* taup1,
+                                lapack_complex_double* taup2,
+                                lapack_complex_double* tauq1,
+                                lapack_complex_double* tauq2,
+                                lapack_complex_double* work, lapack_int lwork );
+lapack_int LAPACKE_zuncsd( int matrix_order, char jobu1, char jobu2,
+                           char jobv1t, char jobv2t, char trans, char signs,
+                           lapack_int m, lapack_int p, lapack_int q,
+                           lapack_complex_double* x11, lapack_int ldx11,
+                           lapack_complex_double* x12, lapack_int ldx12,
+                           lapack_complex_double* x21, lapack_int ldx21,
+                           lapack_complex_double* x22, lapack_int ldx22,
+                           double* theta, lapack_complex_double* u1,
+                           lapack_int ldu1, lapack_complex_double* u2,
+                           lapack_int ldu2, lapack_complex_double* v1t,
+                           lapack_int ldv1t, lapack_complex_double* v2t,
+                           lapack_int ldv2t );
+lapack_int LAPACKE_zuncsd_work( int matrix_order, char jobu1, char jobu2,
+                                char jobv1t, char jobv2t, char trans,
+                                char signs, lapack_int m, lapack_int p,
+                                lapack_int q, lapack_complex_double* x11,
+                                lapack_int ldx11, lapack_complex_double* x12,
+                                lapack_int ldx12, lapack_complex_double* x21,
+                                lapack_int ldx21, lapack_complex_double* x22,
+                                lapack_int ldx22, double* theta,
+                                lapack_complex_double* u1, lapack_int ldu1,
+                                lapack_complex_double* u2, lapack_int ldu2,
+                                lapack_complex_double* v1t, lapack_int ldv1t,
+                                lapack_complex_double* v2t, lapack_int ldv2t,
+                                lapack_complex_double* work, lapack_int lwork,
+                                double* rwork, lapack_int lrwork,
+                                lapack_int* iwork );
+//LAPACK 3.4.0
+lapack_int LAPACKE_sgemqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int nb, const float* v, lapack_int ldv,
+                            const float* t, lapack_int ldt, float* c,
+                            lapack_int ldc );
+lapack_int LAPACKE_dgemqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int nb, const double* v, lapack_int ldv,
+                            const double* t, lapack_int ldt, double* c,
+                            lapack_int ldc );
+lapack_int LAPACKE_cgemqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int nb, const lapack_complex_float* v,
+                            lapack_int ldv, const lapack_complex_float* t,
+                            lapack_int ldt, lapack_complex_float* c,
+                            lapack_int ldc );
+lapack_int LAPACKE_zgemqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int nb, const lapack_complex_double* v,
+                            lapack_int ldv, const lapack_complex_double* t,
+                            lapack_int ldt, lapack_complex_double* c,
+                            lapack_int ldc );
+
+lapack_int LAPACKE_sgeqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nb, float* a, lapack_int lda, float* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_dgeqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nb, double* a, lapack_int lda, double* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_cgeqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nb, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_zgeqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int nb, lapack_complex_double* a,
+                           lapack_int lda, lapack_complex_double* t,
+                           lapack_int ldt );
+
+lapack_int LAPACKE_sgeqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            float* a, lapack_int lda, float* t,
+                            lapack_int ldt );
+lapack_int LAPACKE_dgeqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            double* a, lapack_int lda, double* t,
+                            lapack_int ldt );
+lapack_int LAPACKE_cgeqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zgeqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_sgeqrt3( int matrix_order, lapack_int m, lapack_int n,
+                            float* a, lapack_int lda, float* t,
+                            lapack_int ldt );
+lapack_int LAPACKE_dgeqrt3( int matrix_order, lapack_int m, lapack_int n,
+                            double* a, lapack_int lda, double* t,
+                            lapack_int ldt );
+lapack_int LAPACKE_cgeqrt3( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zgeqrt3( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_stpmqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int l, lapack_int nb, const float* v,
+                            lapack_int ldv, const float* t, lapack_int ldt,
+                            float* a, lapack_int lda, float* b,
+                            lapack_int ldb );
+lapack_int LAPACKE_dtpmqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int l, lapack_int nb, const double* v,
+                            lapack_int ldv, const double* t, lapack_int ldt,
+                            double* a, lapack_int lda, double* b,
+                            lapack_int ldb );
+lapack_int LAPACKE_ctpmqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int l, lapack_int nb,
+                            const lapack_complex_float* v, lapack_int ldv,
+                            const lapack_complex_float* t, lapack_int ldt,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* b, lapack_int ldb );
+lapack_int LAPACKE_ztpmqrt( int matrix_order, char side, char trans,
+                            lapack_int m, lapack_int n, lapack_int k,
+                            lapack_int l, lapack_int nb,
+                            const lapack_complex_double* v, lapack_int ldv,
+                            const lapack_complex_double* t, lapack_int ldt,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* b, lapack_int ldb );
+
+lapack_int LAPACKE_dtpqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int l, lapack_int nb, double* a,
+                           lapack_int lda, double* b, lapack_int ldb, double* t,
+                           lapack_int ldt );
+lapack_int LAPACKE_ctpqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int l, lapack_int nb, lapack_complex_float* a,
+                           lapack_int lda, lapack_complex_float* t,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_int ldt );
+lapack_int LAPACKE_ztpqrt( int matrix_order, lapack_int m, lapack_int n,
+                           lapack_int l, lapack_int nb,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_stpqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            float* a, lapack_int lda, float* b, lapack_int ldb,
+                            float* t, lapack_int ldt );
+lapack_int LAPACKE_dtpqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            double* a, lapack_int lda, double* b,
+                            lapack_int ldb, double* t, lapack_int ldt );
+lapack_int LAPACKE_ctpqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_float* a, lapack_int lda,
+                            lapack_complex_float* b, lapack_int ldb,
+                            lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_ztpqrt2( int matrix_order, lapack_int m, lapack_int n,
+                            lapack_complex_double* a, lapack_int lda,
+                            lapack_complex_double* b, lapack_int ldb,
+                            lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_stprfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_int l, const float* v,
+                           lapack_int ldv, const float* t, lapack_int ldt,
+                           float* a, lapack_int lda, float* b, lapack_int ldb,
+                           lapack_int myldwork );
+lapack_int LAPACKE_dtprfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_int l, const double* v,
+                           lapack_int ldv, const double* t, lapack_int ldt,
+                           double* a, lapack_int lda, double* b, lapack_int ldb,
+                           lapack_int myldwork );
+lapack_int LAPACKE_ctprfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_int l,
+                           const lapack_complex_float* v, lapack_int ldv,
+                           const lapack_complex_float* t, lapack_int ldt,
+                           lapack_complex_float* a, lapack_int lda,
+                           lapack_complex_float* b, lapack_int ldb,
+                           lapack_int myldwork );
+lapack_int LAPACKE_ztprfb( int matrix_order, char side, char trans, char direct,
+                           char storev, lapack_int m, lapack_int n,
+                           lapack_int k, lapack_int l,
+                           const lapack_complex_double* v, lapack_int ldv,
+                           const lapack_complex_double* t, lapack_int ldt,
+                           lapack_complex_double* a, lapack_int lda,
+                           lapack_complex_double* b, lapack_int ldb,
+                           lapack_int myldwork );
+
+lapack_int LAPACKE_sgemqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int nb, const float* v, lapack_int ldv,
+                                 const float* t, lapack_int ldt, float* c,
+                                 lapack_int ldc, float* work );
+lapack_int LAPACKE_dgemqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int nb, const double* v, lapack_int ldv,
+                                 const double* t, lapack_int ldt, double* c,
+                                 lapack_int ldc, double* work );
+lapack_int LAPACKE_cgemqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int nb, const lapack_complex_float* v,
+                                 lapack_int ldv, const lapack_complex_float* t,
+                                 lapack_int ldt, lapack_complex_float* c,
+                                 lapack_int ldc, lapack_complex_float* work );
+lapack_int LAPACKE_zgemqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int nb, const lapack_complex_double* v,
+                                 lapack_int ldv, const lapack_complex_double* t,
+                                 lapack_int ldt, lapack_complex_double* c,
+                                 lapack_int ldc, lapack_complex_double* work );
+
+lapack_int LAPACKE_sgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nb, float* a, lapack_int lda,
+                                float* t, lapack_int ldt, float* work );
+lapack_int LAPACKE_dgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nb, double* a, lapack_int lda,
+                                double* t, lapack_int ldt, double* work );
+lapack_int LAPACKE_cgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nb, lapack_complex_float* a,
+                                lapack_int lda, lapack_complex_float* t,
+                                lapack_int ldt, lapack_complex_float* work );
+lapack_int LAPACKE_zgeqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int nb, lapack_complex_double* a,
+                                lapack_int lda, lapack_complex_double* t,
+                                lapack_int ldt, lapack_complex_double* work );
+
+lapack_int LAPACKE_sgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 float* a, lapack_int lda, float* t,
+                                 lapack_int ldt );
+lapack_int LAPACKE_dgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 double* a, lapack_int lda, double* t,
+                                 lapack_int ldt );
+lapack_int LAPACKE_cgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zgeqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_sgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
+                                 float* a, lapack_int lda, float* t,
+                                 lapack_int ldt );
+lapack_int LAPACKE_dgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
+                                 double* a, lapack_int lda, double* t,
+                                 lapack_int ldt );
+lapack_int LAPACKE_cgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_zgeqrt3_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_stpmqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int l, lapack_int nb, const float* v,
+                                 lapack_int ldv, const float* t, lapack_int ldt,
+                                 float* a, lapack_int lda, float* b,
+                                 lapack_int ldb, float* work );
+lapack_int LAPACKE_dtpmqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int l, lapack_int nb, const double* v,
+                                 lapack_int ldv, const double* t,
+                                 lapack_int ldt, double* a, lapack_int lda,
+                                 double* b, lapack_int ldb, double* work );
+lapack_int LAPACKE_ctpmqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int l, lapack_int nb,
+                                 const lapack_complex_float* v, lapack_int ldv,
+                                 const lapack_complex_float* t, lapack_int ldt,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* work );
+lapack_int LAPACKE_ztpmqrt_work( int matrix_order, char side, char trans,
+                                 lapack_int m, lapack_int n, lapack_int k,
+                                 lapack_int l, lapack_int nb,
+                                 const lapack_complex_double* v, lapack_int ldv,
+                                 const lapack_complex_double* t, lapack_int ldt,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* work );
+
+lapack_int LAPACKE_dtpqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int l, lapack_int nb, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                double* t, lapack_int ldt, double* work );
+lapack_int LAPACKE_ctpqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int l, lapack_int nb,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* t,
+                                lapack_complex_float* b, lapack_int ldb,
+                                lapack_int ldt, lapack_complex_float* work );
+lapack_int LAPACKE_ztpqrt_work( int matrix_order, lapack_int m, lapack_int n,
+                                lapack_int l, lapack_int nb,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* work );
+
+lapack_int LAPACKE_stpqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 float* a, lapack_int lda, float* b,
+                                 lapack_int ldb, float* t, lapack_int ldt );
+lapack_int LAPACKE_dtpqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 double* a, lapack_int lda, double* b,
+                                 lapack_int ldb, double* t, lapack_int ldt );
+lapack_int LAPACKE_ctpqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_float* a, lapack_int lda,
+                                 lapack_complex_float* b, lapack_int ldb,
+                                 lapack_complex_float* t, lapack_int ldt );
+lapack_int LAPACKE_ztpqrt2_work( int matrix_order, lapack_int m, lapack_int n,
+                                 lapack_complex_double* a, lapack_int lda,
+                                 lapack_complex_double* b, lapack_int ldb,
+                                 lapack_complex_double* t, lapack_int ldt );
+
+lapack_int LAPACKE_stprfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                const float* v, lapack_int ldv, const float* t,
+                                lapack_int ldt, float* a, lapack_int lda,
+                                float* b, lapack_int ldb, const float* mywork,
+                                lapack_int myldwork );
+lapack_int LAPACKE_dtprfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                const double* v, lapack_int ldv,
+                                const double* t, lapack_int ldt, double* a,
+                                lapack_int lda, double* b, lapack_int ldb,
+                                const double* mywork, lapack_int myldwork );
+lapack_int LAPACKE_ctprfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                const lapack_complex_float* v, lapack_int ldv,
+                                const lapack_complex_float* t, lapack_int ldt,
+                                lapack_complex_float* a, lapack_int lda,
+                                lapack_complex_float* b, lapack_int ldb,
+                                const float* mywork, lapack_int myldwork );
+lapack_int LAPACKE_ztprfb_work( int matrix_order, char side, char trans,
+                                char direct, char storev, lapack_int m,
+                                lapack_int n, lapack_int k, lapack_int l,
+                                const lapack_complex_double* v, lapack_int ldv,
+                                const lapack_complex_double* t, lapack_int ldt,
+                                lapack_complex_double* a, lapack_int lda,
+                                lapack_complex_double* b, lapack_int ldb,
+                                const double* mywork, lapack_int myldwork );
+//LAPACK 3.X.X
+lapack_int LAPACKE_csyr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_float alpha,
+                             const lapack_complex_float* x, lapack_int incx,
+                             lapack_complex_float* a, lapack_int lda );
+lapack_int LAPACKE_zsyr( int matrix_order, char uplo, lapack_int n,
+                             lapack_complex_double alpha,
+                             const lapack_complex_double* x, lapack_int incx,
+                             lapack_complex_double* a, lapack_int lda );
+
+lapack_int LAPACKE_csyr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_float alpha,
+                                  const lapack_complex_float* x,
+                                  lapack_int incx, lapack_complex_float* a,
+                                  lapack_int lda );
+lapack_int LAPACKE_zsyr_work( int matrix_order, char uplo, lapack_int n,
+                                  lapack_complex_double alpha,
+                                  const lapack_complex_double* x,
+                                  lapack_int incx, lapack_complex_double* a,
+                                  lapack_int lda );
+
+
+
+#define LAPACK_sgetrf LAPACK_GLOBAL(sgetrf,SGETRF)
+#define LAPACK_dgetrf LAPACK_GLOBAL(dgetrf,DGETRF)
+#define LAPACK_cgetrf LAPACK_GLOBAL(cgetrf,CGETRF)
+#define LAPACK_zgetrf LAPACK_GLOBAL(zgetrf,ZGETRF)
+#define LAPACK_sgbtrf LAPACK_GLOBAL(sgbtrf,SGBTRF)
+#define LAPACK_dgbtrf LAPACK_GLOBAL(dgbtrf,DGBTRF)
+#define LAPACK_cgbtrf LAPACK_GLOBAL(cgbtrf,CGBTRF)
+#define LAPACK_zgbtrf LAPACK_GLOBAL(zgbtrf,ZGBTRF)
+#define LAPACK_sgttrf LAPACK_GLOBAL(sgttrf,SGTTRF)
+#define LAPACK_dgttrf LAPACK_GLOBAL(dgttrf,DGTTRF)
+#define LAPACK_cgttrf LAPACK_GLOBAL(cgttrf,CGTTRF)
+#define LAPACK_zgttrf LAPACK_GLOBAL(zgttrf,ZGTTRF)
+#define LAPACK_spotrf LAPACK_GLOBAL(spotrf,SPOTRF)
+#define LAPACK_dpotrf LAPACK_GLOBAL(dpotrf,DPOTRF)
+#define LAPACK_cpotrf LAPACK_GLOBAL(cpotrf,CPOTRF)
+#define LAPACK_zpotrf LAPACK_GLOBAL(zpotrf,ZPOTRF)
+#define LAPACK_dpstrf LAPACK_GLOBAL(dpstrf,DPSTRF)
+#define LAPACK_spstrf LAPACK_GLOBAL(spstrf,SPSTRF)
+#define LAPACK_zpstrf LAPACK_GLOBAL(zpstrf,ZPSTRF)
+#define LAPACK_cpstrf LAPACK_GLOBAL(cpstrf,CPSTRF)
+#define LAPACK_dpftrf LAPACK_GLOBAL(dpftrf,DPFTRF)
+#define LAPACK_spftrf LAPACK_GLOBAL(spftrf,SPFTRF)
+#define LAPACK_zpftrf LAPACK_GLOBAL(zpftrf,ZPFTRF)
+#define LAPACK_cpftrf LAPACK_GLOBAL(cpftrf,CPFTRF)
+#define LAPACK_spptrf LAPACK_GLOBAL(spptrf,SPPTRF)
+#define LAPACK_dpptrf LAPACK_GLOBAL(dpptrf,DPPTRF)
+#define LAPACK_cpptrf LAPACK_GLOBAL(cpptrf,CPPTRF)
+#define LAPACK_zpptrf LAPACK_GLOBAL(zpptrf,ZPPTRF)
+#define LAPACK_spbtrf LAPACK_GLOBAL(spbtrf,SPBTRF)
+#define LAPACK_dpbtrf LAPACK_GLOBAL(dpbtrf,DPBTRF)
+#define LAPACK_cpbtrf LAPACK_GLOBAL(cpbtrf,CPBTRF)
+#define LAPACK_zpbtrf LAPACK_GLOBAL(zpbtrf,ZPBTRF)
+#define LAPACK_spttrf LAPACK_GLOBAL(spttrf,SPTTRF)
+#define LAPACK_dpttrf LAPACK_GLOBAL(dpttrf,DPTTRF)
+#define LAPACK_cpttrf LAPACK_GLOBAL(cpttrf,CPTTRF)
+#define LAPACK_zpttrf LAPACK_GLOBAL(zpttrf,ZPTTRF)
+#define LAPACK_ssytrf LAPACK_GLOBAL(ssytrf,SSYTRF)
+#define LAPACK_dsytrf LAPACK_GLOBAL(dsytrf,DSYTRF)
+#define LAPACK_csytrf LAPACK_GLOBAL(csytrf,CSYTRF)
+#define LAPACK_zsytrf LAPACK_GLOBAL(zsytrf,ZSYTRF)
+#define LAPACK_chetrf LAPACK_GLOBAL(chetrf,CHETRF)
+#define LAPACK_zhetrf LAPACK_GLOBAL(zhetrf,ZHETRF)
+#define LAPACK_ssptrf LAPACK_GLOBAL(ssptrf,SSPTRF)
+#define LAPACK_dsptrf LAPACK_GLOBAL(dsptrf,DSPTRF)
+#define LAPACK_csptrf LAPACK_GLOBAL(csptrf,CSPTRF)
+#define LAPACK_zsptrf LAPACK_GLOBAL(zsptrf,ZSPTRF)
+#define LAPACK_chptrf LAPACK_GLOBAL(chptrf,CHPTRF)
+#define LAPACK_zhptrf LAPACK_GLOBAL(zhptrf,ZHPTRF)
+#define LAPACK_sgetrs LAPACK_GLOBAL(sgetrs,SGETRS)
+#define LAPACK_dgetrs LAPACK_GLOBAL(dgetrs,DGETRS)
+#define LAPACK_cgetrs LAPACK_GLOBAL(cgetrs,CGETRS)
+#define LAPACK_zgetrs LAPACK_GLOBAL(zgetrs,ZGETRS)
+#define LAPACK_sgbtrs LAPACK_GLOBAL(sgbtrs,SGBTRS)
+#define LAPACK_dgbtrs LAPACK_GLOBAL(dgbtrs,DGBTRS)
+#define LAPACK_cgbtrs LAPACK_GLOBAL(cgbtrs,CGBTRS)
+#define LAPACK_zgbtrs LAPACK_GLOBAL(zgbtrs,ZGBTRS)
+#define LAPACK_sgttrs LAPACK_GLOBAL(sgttrs,SGTTRS)
+#define LAPACK_dgttrs LAPACK_GLOBAL(dgttrs,DGTTRS)
+#define LAPACK_cgttrs LAPACK_GLOBAL(cgttrs,CGTTRS)
+#define LAPACK_zgttrs LAPACK_GLOBAL(zgttrs,ZGTTRS)
+#define LAPACK_spotrs LAPACK_GLOBAL(spotrs,SPOTRS)
+#define LAPACK_dpotrs LAPACK_GLOBAL(dpotrs,DPOTRS)
+#define LAPACK_cpotrs LAPACK_GLOBAL(cpotrs,CPOTRS)
+#define LAPACK_zpotrs LAPACK_GLOBAL(zpotrs,ZPOTRS)
+#define LAPACK_dpftrs LAPACK_GLOBAL(dpftrs,DPFTRS)
+#define LAPACK_spftrs LAPACK_GLOBAL(spftrs,SPFTRS)
+#define LAPACK_zpftrs LAPACK_GLOBAL(zpftrs,ZPFTRS)
+#define LAPACK_cpftrs LAPACK_GLOBAL(cpftrs,CPFTRS)
+#define LAPACK_spptrs LAPACK_GLOBAL(spptrs,SPPTRS)
+#define LAPACK_dpptrs LAPACK_GLOBAL(dpptrs,DPPTRS)
+#define LAPACK_cpptrs LAPACK_GLOBAL(cpptrs,CPPTRS)
+#define LAPACK_zpptrs LAPACK_GLOBAL(zpptrs,ZPPTRS)
+#define LAPACK_spbtrs LAPACK_GLOBAL(spbtrs,SPBTRS)
+#define LAPACK_dpbtrs LAPACK_GLOBAL(dpbtrs,DPBTRS)
+#define LAPACK_cpbtrs LAPACK_GLOBAL(cpbtrs,CPBTRS)
+#define LAPACK_zpbtrs LAPACK_GLOBAL(zpbtrs,ZPBTRS)
+#define LAPACK_spttrs LAPACK_GLOBAL(spttrs,SPTTRS)
+#define LAPACK_dpttrs LAPACK_GLOBAL(dpttrs,DPTTRS)
+#define LAPACK_cpttrs LAPACK_GLOBAL(cpttrs,CPTTRS)
+#define LAPACK_zpttrs LAPACK_GLOBAL(zpttrs,ZPTTRS)
+#define LAPACK_ssytrs LAPACK_GLOBAL(ssytrs,SSYTRS)
+#define LAPACK_dsytrs LAPACK_GLOBAL(dsytrs,DSYTRS)
+#define LAPACK_csytrs LAPACK_GLOBAL(csytrs,CSYTRS)
+#define LAPACK_zsytrs LAPACK_GLOBAL(zsytrs,ZSYTRS)
+#define LAPACK_chetrs LAPACK_GLOBAL(chetrs,CHETRS)
+#define LAPACK_zhetrs LAPACK_GLOBAL(zhetrs,ZHETRS)
+#define LAPACK_ssptrs LAPACK_GLOBAL(ssptrs,SSPTRS)
+#define LAPACK_dsptrs LAPACK_GLOBAL(dsptrs,DSPTRS)
+#define LAPACK_csptrs LAPACK_GLOBAL(csptrs,CSPTRS)
+#define LAPACK_zsptrs LAPACK_GLOBAL(zsptrs,ZSPTRS)
+#define LAPACK_chptrs LAPACK_GLOBAL(chptrs,CHPTRS)
+#define LAPACK_zhptrs LAPACK_GLOBAL(zhptrs,ZHPTRS)
+#define LAPACK_strtrs LAPACK_GLOBAL(strtrs,STRTRS)
+#define LAPACK_dtrtrs LAPACK_GLOBAL(dtrtrs,DTRTRS)
+#define LAPACK_ctrtrs LAPACK_GLOBAL(ctrtrs,CTRTRS)
+#define LAPACK_ztrtrs LAPACK_GLOBAL(ztrtrs,ZTRTRS)
+#define LAPACK_stptrs LAPACK_GLOBAL(stptrs,STPTRS)
+#define LAPACK_dtptrs LAPACK_GLOBAL(dtptrs,DTPTRS)
+#define LAPACK_ctptrs LAPACK_GLOBAL(ctptrs,CTPTRS)
+#define LAPACK_ztptrs LAPACK_GLOBAL(ztptrs,ZTPTRS)
+#define LAPACK_stbtrs LAPACK_GLOBAL(stbtrs,STBTRS)
+#define LAPACK_dtbtrs LAPACK_GLOBAL(dtbtrs,DTBTRS)
+#define LAPACK_ctbtrs LAPACK_GLOBAL(ctbtrs,CTBTRS)
+#define LAPACK_ztbtrs LAPACK_GLOBAL(ztbtrs,ZTBTRS)
+#define LAPACK_sgecon LAPACK_GLOBAL(sgecon,SGECON)
+#define LAPACK_dgecon LAPACK_GLOBAL(dgecon,DGECON)
+#define LAPACK_cgecon LAPACK_GLOBAL(cgecon,CGECON)
+#define LAPACK_zgecon LAPACK_GLOBAL(zgecon,ZGECON)
+#define LAPACK_sgbcon LAPACK_GLOBAL(sgbcon,SGBCON)
+#define LAPACK_dgbcon LAPACK_GLOBAL(dgbcon,DGBCON)
+#define LAPACK_cgbcon LAPACK_GLOBAL(cgbcon,CGBCON)
+#define LAPACK_zgbcon LAPACK_GLOBAL(zgbcon,ZGBCON)
+#define LAPACK_sgtcon LAPACK_GLOBAL(sgtcon,SGTCON)
+#define LAPACK_dgtcon LAPACK_GLOBAL(dgtcon,DGTCON)
+#define LAPACK_cgtcon LAPACK_GLOBAL(cgtcon,CGTCON)
+#define LAPACK_zgtcon LAPACK_GLOBAL(zgtcon,ZGTCON)
+#define LAPACK_spocon LAPACK_GLOBAL(spocon,SPOCON)
+#define LAPACK_dpocon LAPACK_GLOBAL(dpocon,DPOCON)
+#define LAPACK_cpocon LAPACK_GLOBAL(cpocon,CPOCON)
+#define LAPACK_zpocon LAPACK_GLOBAL(zpocon,ZPOCON)
+#define LAPACK_sppcon LAPACK_GLOBAL(sppcon,SPPCON)
+#define LAPACK_dppcon LAPACK_GLOBAL(dppcon,DPPCON)
+#define LAPACK_cppcon LAPACK_GLOBAL(cppcon,CPPCON)
+#define LAPACK_zppcon LAPACK_GLOBAL(zppcon,ZPPCON)
+#define LAPACK_spbcon LAPACK_GLOBAL(spbcon,SPBCON)
+#define LAPACK_dpbcon LAPACK_GLOBAL(dpbcon,DPBCON)
+#define LAPACK_cpbcon LAPACK_GLOBAL(cpbcon,CPBCON)
+#define LAPACK_zpbcon LAPACK_GLOBAL(zpbcon,ZPBCON)
+#define LAPACK_sptcon LAPACK_GLOBAL(sptcon,SPTCON)
+#define LAPACK_dptcon LAPACK_GLOBAL(dptcon,DPTCON)
+#define LAPACK_cptcon LAPACK_GLOBAL(cptcon,CPTCON)
+#define LAPACK_zptcon LAPACK_GLOBAL(zptcon,ZPTCON)
+#define LAPACK_ssycon LAPACK_GLOBAL(ssycon,SSYCON)
+#define LAPACK_dsycon LAPACK_GLOBAL(dsycon,DSYCON)
+#define LAPACK_csycon LAPACK_GLOBAL(csycon,CSYCON)
+#define LAPACK_zsycon LAPACK_GLOBAL(zsycon,ZSYCON)
+#define LAPACK_checon LAPACK_GLOBAL(checon,CHECON)
+#define LAPACK_zhecon LAPACK_GLOBAL(zhecon,ZHECON)
+#define LAPACK_sspcon LAPACK_GLOBAL(sspcon,SSPCON)
+#define LAPACK_dspcon LAPACK_GLOBAL(dspcon,DSPCON)
+#define LAPACK_cspcon LAPACK_GLOBAL(cspcon,CSPCON)
+#define LAPACK_zspcon LAPACK_GLOBAL(zspcon,ZSPCON)
+#define LAPACK_chpcon LAPACK_GLOBAL(chpcon,CHPCON)
+#define LAPACK_zhpcon LAPACK_GLOBAL(zhpcon,ZHPCON)
+#define LAPACK_strcon LAPACK_GLOBAL(strcon,STRCON)
+#define LAPACK_dtrcon LAPACK_GLOBAL(dtrcon,DTRCON)
+#define LAPACK_ctrcon LAPACK_GLOBAL(ctrcon,CTRCON)
+#define LAPACK_ztrcon LAPACK_GLOBAL(ztrcon,ZTRCON)
+#define LAPACK_stpcon LAPACK_GLOBAL(stpcon,STPCON)
+#define LAPACK_dtpcon LAPACK_GLOBAL(dtpcon,DTPCON)
+#define LAPACK_ctpcon LAPACK_GLOBAL(ctpcon,CTPCON)
+#define LAPACK_ztpcon LAPACK_GLOBAL(ztpcon,ZTPCON)
+#define LAPACK_stbcon LAPACK_GLOBAL(stbcon,STBCON)
+#define LAPACK_dtbcon LAPACK_GLOBAL(dtbcon,DTBCON)
+#define LAPACK_ctbcon LAPACK_GLOBAL(ctbcon,CTBCON)
+#define LAPACK_ztbcon LAPACK_GLOBAL(ztbcon,ZTBCON)
+#define LAPACK_sgerfs LAPACK_GLOBAL(sgerfs,SGERFS)
+#define LAPACK_dgerfs LAPACK_GLOBAL(dgerfs,DGERFS)
+#define LAPACK_cgerfs LAPACK_GLOBAL(cgerfs,CGERFS)
+#define LAPACK_zgerfs LAPACK_GLOBAL(zgerfs,ZGERFS)
+#define LAPACK_dgerfsx LAPACK_GLOBAL(dgerfsx,DGERFSX)
+#define LAPACK_sgerfsx LAPACK_GLOBAL(sgerfsx,SGERFSX)
+#define LAPACK_zgerfsx LAPACK_GLOBAL(zgerfsx,ZGERFSX)
+#define LAPACK_cgerfsx LAPACK_GLOBAL(cgerfsx,CGERFSX)
+#define LAPACK_sgbrfs LAPACK_GLOBAL(sgbrfs,SGBRFS)
+#define LAPACK_dgbrfs LAPACK_GLOBAL(dgbrfs,DGBRFS)
+#define LAPACK_cgbrfs LAPACK_GLOBAL(cgbrfs,CGBRFS)
+#define LAPACK_zgbrfs LAPACK_GLOBAL(zgbrfs,ZGBRFS)
+#define LAPACK_dgbrfsx LAPACK_GLOBAL(dgbrfsx,DGBRFSX)
+#define LAPACK_sgbrfsx LAPACK_GLOBAL(sgbrfsx,SGBRFSX)
+#define LAPACK_zgbrfsx LAPACK_GLOBAL(zgbrfsx,ZGBRFSX)
+#define LAPACK_cgbrfsx LAPACK_GLOBAL(cgbrfsx,CGBRFSX)
+#define LAPACK_sgtrfs LAPACK_GLOBAL(sgtrfs,SGTRFS)
+#define LAPACK_dgtrfs LAPACK_GLOBAL(dgtrfs,DGTRFS)
+#define LAPACK_cgtrfs LAPACK_GLOBAL(cgtrfs,CGTRFS)
+#define LAPACK_zgtrfs LAPACK_GLOBAL(zgtrfs,ZGTRFS)
+#define LAPACK_sporfs LAPACK_GLOBAL(sporfs,SPORFS)
+#define LAPACK_dporfs LAPACK_GLOBAL(dporfs,DPORFS)
+#define LAPACK_cporfs LAPACK_GLOBAL(cporfs,CPORFS)
+#define LAPACK_zporfs LAPACK_GLOBAL(zporfs,ZPORFS)
+#define LAPACK_dporfsx LAPACK_GLOBAL(dporfsx,DPORFSX)
+#define LAPACK_sporfsx LAPACK_GLOBAL(sporfsx,SPORFSX)
+#define LAPACK_zporfsx LAPACK_GLOBAL(zporfsx,ZPORFSX)
+#define LAPACK_cporfsx LAPACK_GLOBAL(cporfsx,CPORFSX)
+#define LAPACK_spprfs LAPACK_GLOBAL(spprfs,SPPRFS)
+#define LAPACK_dpprfs LAPACK_GLOBAL(dpprfs,DPPRFS)
+#define LAPACK_cpprfs LAPACK_GLOBAL(cpprfs,CPPRFS)
+#define LAPACK_zpprfs LAPACK_GLOBAL(zpprfs,ZPPRFS)
+#define LAPACK_spbrfs LAPACK_GLOBAL(spbrfs,SPBRFS)
+#define LAPACK_dpbrfs LAPACK_GLOBAL(dpbrfs,DPBRFS)
+#define LAPACK_cpbrfs LAPACK_GLOBAL(cpbrfs,CPBRFS)
+#define LAPACK_zpbrfs LAPACK_GLOBAL(zpbrfs,ZPBRFS)
+#define LAPACK_sptrfs LAPACK_GLOBAL(sptrfs,SPTRFS)
+#define LAPACK_dptrfs LAPACK_GLOBAL(dptrfs,DPTRFS)
+#define LAPACK_cptrfs LAPACK_GLOBAL(cptrfs,CPTRFS)
+#define LAPACK_zptrfs LAPACK_GLOBAL(zptrfs,ZPTRFS)
+#define LAPACK_ssyrfs LAPACK_GLOBAL(ssyrfs,SSYRFS)
+#define LAPACK_dsyrfs LAPACK_GLOBAL(dsyrfs,DSYRFS)
+#define LAPACK_csyrfs LAPACK_GLOBAL(csyrfs,CSYRFS)
+#define LAPACK_zsyrfs LAPACK_GLOBAL(zsyrfs,ZSYRFS)
+#define LAPACK_dsyrfsx LAPACK_GLOBAL(dsyrfsx,DSYRFSX)
+#define LAPACK_ssyrfsx LAPACK_GLOBAL(ssyrfsx,SSYRFSX)
+#define LAPACK_zsyrfsx LAPACK_GLOBAL(zsyrfsx,ZSYRFSX)
+#define LAPACK_csyrfsx LAPACK_GLOBAL(csyrfsx,CSYRFSX)
+#define LAPACK_cherfs LAPACK_GLOBAL(cherfs,CHERFS)
+#define LAPACK_zherfs LAPACK_GLOBAL(zherfs,ZHERFS)
+#define LAPACK_zherfsx LAPACK_GLOBAL(zherfsx,ZHERFSX)
+#define LAPACK_cherfsx LAPACK_GLOBAL(cherfsx,CHERFSX)
+#define LAPACK_ssprfs LAPACK_GLOBAL(ssprfs,SSPRFS)
+#define LAPACK_dsprfs LAPACK_GLOBAL(dsprfs,DSPRFS)
+#define LAPACK_csprfs LAPACK_GLOBAL(csprfs,CSPRFS)
+#define LAPACK_zsprfs LAPACK_GLOBAL(zsprfs,ZSPRFS)
+#define LAPACK_chprfs LAPACK_GLOBAL(chprfs,CHPRFS)
+#define LAPACK_zhprfs LAPACK_GLOBAL(zhprfs,ZHPRFS)
+#define LAPACK_strrfs LAPACK_GLOBAL(strrfs,STRRFS)
+#define LAPACK_dtrrfs LAPACK_GLOBAL(dtrrfs,DTRRFS)
+#define LAPACK_ctrrfs LAPACK_GLOBAL(ctrrfs,CTRRFS)
+#define LAPACK_ztrrfs LAPACK_GLOBAL(ztrrfs,ZTRRFS)
+#define LAPACK_stprfs LAPACK_GLOBAL(stprfs,STPRFS)
+#define LAPACK_dtprfs LAPACK_GLOBAL(dtprfs,DTPRFS)
+#define LAPACK_ctprfs LAPACK_GLOBAL(ctprfs,CTPRFS)
+#define LAPACK_ztprfs LAPACK_GLOBAL(ztprfs,ZTPRFS)
+#define LAPACK_stbrfs LAPACK_GLOBAL(stbrfs,STBRFS)
+#define LAPACK_dtbrfs LAPACK_GLOBAL(dtbrfs,DTBRFS)
+#define LAPACK_ctbrfs LAPACK_GLOBAL(ctbrfs,CTBRFS)
+#define LAPACK_ztbrfs LAPACK_GLOBAL(ztbrfs,ZTBRFS)
+#define LAPACK_sgetri LAPACK_GLOBAL(sgetri,SGETRI)
+#define LAPACK_dgetri LAPACK_GLOBAL(dgetri,DGETRI)
+#define LAPACK_cgetri LAPACK_GLOBAL(cgetri,CGETRI)
+#define LAPACK_zgetri LAPACK_GLOBAL(zgetri,ZGETRI)
+#define LAPACK_spotri LAPACK_GLOBAL(spotri,SPOTRI)
+#define LAPACK_dpotri LAPACK_GLOBAL(dpotri,DPOTRI)
+#define LAPACK_cpotri LAPACK_GLOBAL(cpotri,CPOTRI)
+#define LAPACK_zpotri LAPACK_GLOBAL(zpotri,ZPOTRI)
+#define LAPACK_dpftri LAPACK_GLOBAL(dpftri,DPFTRI)
+#define LAPACK_spftri LAPACK_GLOBAL(spftri,SPFTRI)
+#define LAPACK_zpftri LAPACK_GLOBAL(zpftri,ZPFTRI)
+#define LAPACK_cpftri LAPACK_GLOBAL(cpftri,CPFTRI)
+#define LAPACK_spptri LAPACK_GLOBAL(spptri,SPPTRI)
+#define LAPACK_dpptri LAPACK_GLOBAL(dpptri,DPPTRI)
+#define LAPACK_cpptri LAPACK_GLOBAL(cpptri,CPPTRI)
+#define LAPACK_zpptri LAPACK_GLOBAL(zpptri,ZPPTRI)
+#define LAPACK_ssytri LAPACK_GLOBAL(ssytri,SSYTRI)
+#define LAPACK_dsytri LAPACK_GLOBAL(dsytri,DSYTRI)
+#define LAPACK_csytri LAPACK_GLOBAL(csytri,CSYTRI)
+#define LAPACK_zsytri LAPACK_GLOBAL(zsytri,ZSYTRI)
+#define LAPACK_chetri LAPACK_GLOBAL(chetri,CHETRI)
+#define LAPACK_zhetri LAPACK_GLOBAL(zhetri,ZHETRI)
+#define LAPACK_ssptri LAPACK_GLOBAL(ssptri,SSPTRI)
+#define LAPACK_dsptri LAPACK_GLOBAL(dsptri,DSPTRI)
+#define LAPACK_csptri LAPACK_GLOBAL(csptri,CSPTRI)
+#define LAPACK_zsptri LAPACK_GLOBAL(zsptri,ZSPTRI)
+#define LAPACK_chptri LAPACK_GLOBAL(chptri,CHPTRI)
+#define LAPACK_zhptri LAPACK_GLOBAL(zhptri,ZHPTRI)
+#define LAPACK_strtri LAPACK_GLOBAL(strtri,STRTRI)
+#define LAPACK_dtrtri LAPACK_GLOBAL(dtrtri,DTRTRI)
+#define LAPACK_ctrtri LAPACK_GLOBAL(ctrtri,CTRTRI)
+#define LAPACK_ztrtri LAPACK_GLOBAL(ztrtri,ZTRTRI)
+#define LAPACK_dtftri LAPACK_GLOBAL(dtftri,DTFTRI)
+#define LAPACK_stftri LAPACK_GLOBAL(stftri,STFTRI)
+#define LAPACK_ztftri LAPACK_GLOBAL(ztftri,ZTFTRI)
+#define LAPACK_ctftri LAPACK_GLOBAL(ctftri,CTFTRI)
+#define LAPACK_stptri LAPACK_GLOBAL(stptri,STPTRI)
+#define LAPACK_dtptri LAPACK_GLOBAL(dtptri,DTPTRI)
+#define LAPACK_ctptri LAPACK_GLOBAL(ctptri,CTPTRI)
+#define LAPACK_ztptri LAPACK_GLOBAL(ztptri,ZTPTRI)
+#define LAPACK_sgeequ LAPACK_GLOBAL(sgeequ,SGEEQU)
+#define LAPACK_dgeequ LAPACK_GLOBAL(dgeequ,DGEEQU)
+#define LAPACK_cgeequ LAPACK_GLOBAL(cgeequ,CGEEQU)
+#define LAPACK_zgeequ LAPACK_GLOBAL(zgeequ,ZGEEQU)
+#define LAPACK_dgeequb LAPACK_GLOBAL(dgeequb,DGEEQUB)
+#define LAPACK_sgeequb LAPACK_GLOBAL(sgeequb,SGEEQUB)
+#define LAPACK_zgeequb LAPACK_GLOBAL(zgeequb,ZGEEQUB)
+#define LAPACK_cgeequb LAPACK_GLOBAL(cgeequb,CGEEQUB)
+#define LAPACK_sgbequ LAPACK_GLOBAL(sgbequ,SGBEQU)
+#define LAPACK_dgbequ LAPACK_GLOBAL(dgbequ,DGBEQU)
+#define LAPACK_cgbequ LAPACK_GLOBAL(cgbequ,CGBEQU)
+#define LAPACK_zgbequ LAPACK_GLOBAL(zgbequ,ZGBEQU)
+#define LAPACK_dgbequb LAPACK_GLOBAL(dgbequb,DGBEQUB)
+#define LAPACK_sgbequb LAPACK_GLOBAL(sgbequb,SGBEQUB)
+#define LAPACK_zgbequb LAPACK_GLOBAL(zgbequb,ZGBEQUB)
+#define LAPACK_cgbequb LAPACK_GLOBAL(cgbequb,CGBEQUB)
+#define LAPACK_spoequ LAPACK_GLOBAL(spoequ,SPOEQU)
+#define LAPACK_dpoequ LAPACK_GLOBAL(dpoequ,DPOEQU)
+#define LAPACK_cpoequ LAPACK_GLOBAL(cpoequ,CPOEQU)
+#define LAPACK_zpoequ LAPACK_GLOBAL(zpoequ,ZPOEQU)
+#define LAPACK_dpoequb LAPACK_GLOBAL(dpoequb,DPOEQUB)
+#define LAPACK_spoequb LAPACK_GLOBAL(spoequb,SPOEQUB)
+#define LAPACK_zpoequb LAPACK_GLOBAL(zpoequb,ZPOEQUB)
+#define LAPACK_cpoequb LAPACK_GLOBAL(cpoequb,CPOEQUB)
+#define LAPACK_sppequ LAPACK_GLOBAL(sppequ,SPPEQU)
+#define LAPACK_dppequ LAPACK_GLOBAL(dppequ,DPPEQU)
+#define LAPACK_cppequ LAPACK_GLOBAL(cppequ,CPPEQU)
+#define LAPACK_zppequ LAPACK_GLOBAL(zppequ,ZPPEQU)
+#define LAPACK_spbequ LAPACK_GLOBAL(spbequ,SPBEQU)
+#define LAPACK_dpbequ LAPACK_GLOBAL(dpbequ,DPBEQU)
+#define LAPACK_cpbequ LAPACK_GLOBAL(cpbequ,CPBEQU)
+#define LAPACK_zpbequ LAPACK_GLOBAL(zpbequ,ZPBEQU)
+#define LAPACK_dsyequb LAPACK_GLOBAL(dsyequb,DSYEQUB)
+#define LAPACK_ssyequb LAPACK_GLOBAL(ssyequb,SSYEQUB)
+#define LAPACK_zsyequb LAPACK_GLOBAL(zsyequb,ZSYEQUB)
+#define LAPACK_csyequb LAPACK_GLOBAL(csyequb,CSYEQUB)
+#define LAPACK_zheequb LAPACK_GLOBAL(zheequb,ZHEEQUB)
+#define LAPACK_cheequb LAPACK_GLOBAL(cheequb,CHEEQUB)
+#define LAPACK_sgesv LAPACK_GLOBAL(sgesv,SGESV)
+#define LAPACK_dgesv LAPACK_GLOBAL(dgesv,DGESV)
+#define LAPACK_cgesv LAPACK_GLOBAL(cgesv,CGESV)
+#define LAPACK_zgesv LAPACK_GLOBAL(zgesv,ZGESV)
+#define LAPACK_dsgesv LAPACK_GLOBAL(dsgesv,DSGESV)
+#define LAPACK_zcgesv LAPACK_GLOBAL(zcgesv,ZCGESV)
+#define LAPACK_sgesvx LAPACK_GLOBAL(sgesvx,SGESVX)
+#define LAPACK_dgesvx LAPACK_GLOBAL(dgesvx,DGESVX)
+#define LAPACK_cgesvx LAPACK_GLOBAL(cgesvx,CGESVX)
+#define LAPACK_zgesvx LAPACK_GLOBAL(zgesvx,ZGESVX)
+#define LAPACK_dgesvxx LAPACK_GLOBAL(dgesvxx,DGESVXX)
+#define LAPACK_sgesvxx LAPACK_GLOBAL(sgesvxx,SGESVXX)
+#define LAPACK_zgesvxx LAPACK_GLOBAL(zgesvxx,ZGESVXX)
+#define LAPACK_cgesvxx LAPACK_GLOBAL(cgesvxx,CGESVXX)
+#define LAPACK_sgbsv LAPACK_GLOBAL(sgbsv,SGBSV)
+#define LAPACK_dgbsv LAPACK_GLOBAL(dgbsv,DGBSV)
+#define LAPACK_cgbsv LAPACK_GLOBAL(cgbsv,CGBSV)
+#define LAPACK_zgbsv LAPACK_GLOBAL(zgbsv,ZGBSV)
+#define LAPACK_sgbsvx LAPACK_GLOBAL(sgbsvx,SGBSVX)
+#define LAPACK_dgbsvx LAPACK_GLOBAL(dgbsvx,DGBSVX)
+#define LAPACK_cgbsvx LAPACK_GLOBAL(cgbsvx,CGBSVX)
+#define LAPACK_zgbsvx LAPACK_GLOBAL(zgbsvx,ZGBSVX)
+#define LAPACK_dgbsvxx LAPACK_GLOBAL(dgbsvxx,DGBSVXX)
+#define LAPACK_sgbsvxx LAPACK_GLOBAL(sgbsvxx,SGBSVXX)
+#define LAPACK_zgbsvxx LAPACK_GLOBAL(zgbsvxx,ZGBSVXX)
+#define LAPACK_cgbsvxx LAPACK_GLOBAL(cgbsvxx,CGBSVXX)
+#define LAPACK_sgtsv LAPACK_GLOBAL(sgtsv,SGTSV)
+#define LAPACK_dgtsv LAPACK_GLOBAL(dgtsv,DGTSV)
+#define LAPACK_cgtsv LAPACK_GLOBAL(cgtsv,CGTSV)
+#define LAPACK_zgtsv LAPACK_GLOBAL(zgtsv,ZGTSV)
+#define LAPACK_sgtsvx LAPACK_GLOBAL(sgtsvx,SGTSVX)
+#define LAPACK_dgtsvx LAPACK_GLOBAL(dgtsvx,DGTSVX)
+#define LAPACK_cgtsvx LAPACK_GLOBAL(cgtsvx,CGTSVX)
+#define LAPACK_zgtsvx LAPACK_GLOBAL(zgtsvx,ZGTSVX)
+#define LAPACK_sposv LAPACK_GLOBAL(sposv,SPOSV)
+#define LAPACK_dposv LAPACK_GLOBAL(dposv,DPOSV)
+#define LAPACK_cposv LAPACK_GLOBAL(cposv,CPOSV)
+#define LAPACK_zposv LAPACK_GLOBAL(zposv,ZPOSV)
+#define LAPACK_dsposv LAPACK_GLOBAL(dsposv,DSPOSV)
+#define LAPACK_zcposv LAPACK_GLOBAL(zcposv,ZCPOSV)
+#define LAPACK_sposvx LAPACK_GLOBAL(sposvx,SPOSVX)
+#define LAPACK_dposvx LAPACK_GLOBAL(dposvx,DPOSVX)
+#define LAPACK_cposvx LAPACK_GLOBAL(cposvx,CPOSVX)
+#define LAPACK_zposvx LAPACK_GLOBAL(zposvx,ZPOSVX)
+#define LAPACK_dposvxx LAPACK_GLOBAL(dposvxx,DPOSVXX)
+#define LAPACK_sposvxx LAPACK_GLOBAL(sposvxx,SPOSVXX)
+#define LAPACK_zposvxx LAPACK_GLOBAL(zposvxx,ZPOSVXX)
+#define LAPACK_cposvxx LAPACK_GLOBAL(cposvxx,CPOSVXX)
+#define LAPACK_sppsv LAPACK_GLOBAL(sppsv,SPPSV)
+#define LAPACK_dppsv LAPACK_GLOBAL(dppsv,DPPSV)
+#define LAPACK_cppsv LAPACK_GLOBAL(cppsv,CPPSV)
+#define LAPACK_zppsv LAPACK_GLOBAL(zppsv,ZPPSV)
+#define LAPACK_sppsvx LAPACK_GLOBAL(sppsvx,SPPSVX)
+#define LAPACK_dppsvx LAPACK_GLOBAL(dppsvx,DPPSVX)
+#define LAPACK_cppsvx LAPACK_GLOBAL(cppsvx,CPPSVX)
+#define LAPACK_zppsvx LAPACK_GLOBAL(zppsvx,ZPPSVX)
+#define LAPACK_spbsv LAPACK_GLOBAL(spbsv,SPBSV)
+#define LAPACK_dpbsv LAPACK_GLOBAL(dpbsv,DPBSV)
+#define LAPACK_cpbsv LAPACK_GLOBAL(cpbsv,CPBSV)
+#define LAPACK_zpbsv LAPACK_GLOBAL(zpbsv,ZPBSV)
+#define LAPACK_spbsvx LAPACK_GLOBAL(spbsvx,SPBSVX)
+#define LAPACK_dpbsvx LAPACK_GLOBAL(dpbsvx,DPBSVX)
+#define LAPACK_cpbsvx LAPACK_GLOBAL(cpbsvx,CPBSVX)
+#define LAPACK_zpbsvx LAPACK_GLOBAL(zpbsvx,ZPBSVX)
+#define LAPACK_sptsv LAPACK_GLOBAL(sptsv,SPTSV)
+#define LAPACK_dptsv LAPACK_GLOBAL(dptsv,DPTSV)
+#define LAPACK_cptsv LAPACK_GLOBAL(cptsv,CPTSV)
+#define LAPACK_zptsv LAPACK_GLOBAL(zptsv,ZPTSV)
+#define LAPACK_sptsvx LAPACK_GLOBAL(sptsvx,SPTSVX)
+#define LAPACK_dptsvx LAPACK_GLOBAL(dptsvx,DPTSVX)
+#define LAPACK_cptsvx LAPACK_GLOBAL(cptsvx,CPTSVX)
+#define LAPACK_zptsvx LAPACK_GLOBAL(zptsvx,ZPTSVX)
+#define LAPACK_ssysv LAPACK_GLOBAL(ssysv,SSYSV)
+#define LAPACK_dsysv LAPACK_GLOBAL(dsysv,DSYSV)
+#define LAPACK_csysv LAPACK_GLOBAL(csysv,CSYSV)
+#define LAPACK_zsysv LAPACK_GLOBAL(zsysv,ZSYSV)
+#define LAPACK_ssysvx LAPACK_GLOBAL(ssysvx,SSYSVX)
+#define LAPACK_dsysvx LAPACK_GLOBAL(dsysvx,DSYSVX)
+#define LAPACK_csysvx LAPACK_GLOBAL(csysvx,CSYSVX)
+#define LAPACK_zsysvx LAPACK_GLOBAL(zsysvx,ZSYSVX)
+#define LAPACK_dsysvxx LAPACK_GLOBAL(dsysvxx,DSYSVXX)
+#define LAPACK_ssysvxx LAPACK_GLOBAL(ssysvxx,SSYSVXX)
+#define LAPACK_zsysvxx LAPACK_GLOBAL(zsysvxx,ZSYSVXX)
+#define LAPACK_csysvxx LAPACK_GLOBAL(csysvxx,CSYSVXX)
+#define LAPACK_chesv LAPACK_GLOBAL(chesv,CHESV)
+#define LAPACK_zhesv LAPACK_GLOBAL(zhesv,ZHESV)
+#define LAPACK_chesvx LAPACK_GLOBAL(chesvx,CHESVX)
+#define LAPACK_zhesvx LAPACK_GLOBAL(zhesvx,ZHESVX)
+#define LAPACK_zhesvxx LAPACK_GLOBAL(zhesvxx,ZHESVXX)
+#define LAPACK_chesvxx LAPACK_GLOBAL(chesvxx,CHESVXX)
+#define LAPACK_sspsv LAPACK_GLOBAL(sspsv,SSPSV)
+#define LAPACK_dspsv LAPACK_GLOBAL(dspsv,DSPSV)
+#define LAPACK_cspsv LAPACK_GLOBAL(cspsv,CSPSV)
+#define LAPACK_zspsv LAPACK_GLOBAL(zspsv,ZSPSV)
+#define LAPACK_sspsvx LAPACK_GLOBAL(sspsvx,SSPSVX)
+#define LAPACK_dspsvx LAPACK_GLOBAL(dspsvx,DSPSVX)
+#define LAPACK_cspsvx LAPACK_GLOBAL(cspsvx,CSPSVX)
+#define LAPACK_zspsvx LAPACK_GLOBAL(zspsvx,ZSPSVX)
+#define LAPACK_chpsv LAPACK_GLOBAL(chpsv,CHPSV)
+#define LAPACK_zhpsv LAPACK_GLOBAL(zhpsv,ZHPSV)
+#define LAPACK_chpsvx LAPACK_GLOBAL(chpsvx,CHPSVX)
+#define LAPACK_zhpsvx LAPACK_GLOBAL(zhpsvx,ZHPSVX)
+#define LAPACK_sgeqrf LAPACK_GLOBAL(sgeqrf,SGEQRF)
+#define LAPACK_dgeqrf LAPACK_GLOBAL(dgeqrf,DGEQRF)
+#define LAPACK_cgeqrf LAPACK_GLOBAL(cgeqrf,CGEQRF)
+#define LAPACK_zgeqrf LAPACK_GLOBAL(zgeqrf,ZGEQRF)
+#define LAPACK_sgeqpf LAPACK_GLOBAL(sgeqpf,SGEQPF)
+#define LAPACK_dgeqpf LAPACK_GLOBAL(dgeqpf,DGEQPF)
+#define LAPACK_cgeqpf LAPACK_GLOBAL(cgeqpf,CGEQPF)
+#define LAPACK_zgeqpf LAPACK_GLOBAL(zgeqpf,ZGEQPF)
+#define LAPACK_sgeqp3 LAPACK_GLOBAL(sgeqp3,SGEQP3)
+#define LAPACK_dgeqp3 LAPACK_GLOBAL(dgeqp3,DGEQP3)
+#define LAPACK_cgeqp3 LAPACK_GLOBAL(cgeqp3,CGEQP3)
+#define LAPACK_zgeqp3 LAPACK_GLOBAL(zgeqp3,ZGEQP3)
+#define LAPACK_sorgqr LAPACK_GLOBAL(sorgqr,SORGQR)
+#define LAPACK_dorgqr LAPACK_GLOBAL(dorgqr,DORGQR)
+#define LAPACK_sormqr LAPACK_GLOBAL(sormqr,SORMQR)
+#define LAPACK_dormqr LAPACK_GLOBAL(dormqr,DORMQR)
+#define LAPACK_cungqr LAPACK_GLOBAL(cungqr,CUNGQR)
+#define LAPACK_zungqr LAPACK_GLOBAL(zungqr,ZUNGQR)
+#define LAPACK_cunmqr LAPACK_GLOBAL(cunmqr,CUNMQR)
+#define LAPACK_zunmqr LAPACK_GLOBAL(zunmqr,ZUNMQR)
+#define LAPACK_sgelqf LAPACK_GLOBAL(sgelqf,SGELQF)
+#define LAPACK_dgelqf LAPACK_GLOBAL(dgelqf,DGELQF)
+#define LAPACK_cgelqf LAPACK_GLOBAL(cgelqf,CGELQF)
+#define LAPACK_zgelqf LAPACK_GLOBAL(zgelqf,ZGELQF)
+#define LAPACK_sorglq LAPACK_GLOBAL(sorglq,SORGLQ)
+#define LAPACK_dorglq LAPACK_GLOBAL(dorglq,DORGLQ)
+#define LAPACK_sormlq LAPACK_GLOBAL(sormlq,SORMLQ)
+#define LAPACK_dormlq LAPACK_GLOBAL(dormlq,DORMLQ)
+#define LAPACK_cunglq LAPACK_GLOBAL(cunglq,CUNGLQ)
+#define LAPACK_zunglq LAPACK_GLOBAL(zunglq,ZUNGLQ)
+#define LAPACK_cunmlq LAPACK_GLOBAL(cunmlq,CUNMLQ)
+#define LAPACK_zunmlq LAPACK_GLOBAL(zunmlq,ZUNMLQ)
+#define LAPACK_sgeqlf LAPACK_GLOBAL(sgeqlf,SGEQLF)
+#define LAPACK_dgeqlf LAPACK_GLOBAL(dgeqlf,DGEQLF)
+#define LAPACK_cgeqlf LAPACK_GLOBAL(cgeqlf,CGEQLF)
+#define LAPACK_zgeqlf LAPACK_GLOBAL(zgeqlf,ZGEQLF)
+#define LAPACK_sorgql LAPACK_GLOBAL(sorgql,SORGQL)
+#define LAPACK_dorgql LAPACK_GLOBAL(dorgql,DORGQL)
+#define LAPACK_cungql LAPACK_GLOBAL(cungql,CUNGQL)
+#define LAPACK_zungql LAPACK_GLOBAL(zungql,ZUNGQL)
+#define LAPACK_sormql LAPACK_GLOBAL(sormql,SORMQL)
+#define LAPACK_dormql LAPACK_GLOBAL(dormql,DORMQL)
+#define LAPACK_cunmql LAPACK_GLOBAL(cunmql,CUNMQL)
+#define LAPACK_zunmql LAPACK_GLOBAL(zunmql,ZUNMQL)
+#define LAPACK_sgerqf LAPACK_GLOBAL(sgerqf,SGERQF)
+#define LAPACK_dgerqf LAPACK_GLOBAL(dgerqf,DGERQF)
+#define LAPACK_cgerqf LAPACK_GLOBAL(cgerqf,CGERQF)
+#define LAPACK_zgerqf LAPACK_GLOBAL(zgerqf,ZGERQF)
+#define LAPACK_sorgrq LAPACK_GLOBAL(sorgrq,SORGRQ)
+#define LAPACK_dorgrq LAPACK_GLOBAL(dorgrq,DORGRQ)
+#define LAPACK_cungrq LAPACK_GLOBAL(cungrq,CUNGRQ)
+#define LAPACK_zungrq LAPACK_GLOBAL(zungrq,ZUNGRQ)
+#define LAPACK_sormrq LAPACK_GLOBAL(sormrq,SORMRQ)
+#define LAPACK_dormrq LAPACK_GLOBAL(dormrq,DORMRQ)
+#define LAPACK_cunmrq LAPACK_GLOBAL(cunmrq,CUNMRQ)
+#define LAPACK_zunmrq LAPACK_GLOBAL(zunmrq,ZUNMRQ)
+#define LAPACK_stzrzf LAPACK_GLOBAL(stzrzf,STZRZF)
+#define LAPACK_dtzrzf LAPACK_GLOBAL(dtzrzf,DTZRZF)
+#define LAPACK_ctzrzf LAPACK_GLOBAL(ctzrzf,CTZRZF)
+#define LAPACK_ztzrzf LAPACK_GLOBAL(ztzrzf,ZTZRZF)
+#define LAPACK_sormrz LAPACK_GLOBAL(sormrz,SORMRZ)
+#define LAPACK_dormrz LAPACK_GLOBAL(dormrz,DORMRZ)
+#define LAPACK_cunmrz LAPACK_GLOBAL(cunmrz,CUNMRZ)
+#define LAPACK_zunmrz LAPACK_GLOBAL(zunmrz,ZUNMRZ)
+#define LAPACK_sggqrf LAPACK_GLOBAL(sggqrf,SGGQRF)
+#define LAPACK_dggqrf LAPACK_GLOBAL(dggqrf,DGGQRF)
+#define LAPACK_cggqrf LAPACK_GLOBAL(cggqrf,CGGQRF)
+#define LAPACK_zggqrf LAPACK_GLOBAL(zggqrf,ZGGQRF)
+#define LAPACK_sggrqf LAPACK_GLOBAL(sggrqf,SGGRQF)
+#define LAPACK_dggrqf LAPACK_GLOBAL(dggrqf,DGGRQF)
+#define LAPACK_cggrqf LAPACK_GLOBAL(cggrqf,CGGRQF)
+#define LAPACK_zggrqf LAPACK_GLOBAL(zggrqf,ZGGRQF)
+#define LAPACK_sgebrd LAPACK_GLOBAL(sgebrd,SGEBRD)
+#define LAPACK_dgebrd LAPACK_GLOBAL(dgebrd,DGEBRD)
+#define LAPACK_cgebrd LAPACK_GLOBAL(cgebrd,CGEBRD)
+#define LAPACK_zgebrd LAPACK_GLOBAL(zgebrd,ZGEBRD)
+#define LAPACK_sgbbrd LAPACK_GLOBAL(sgbbrd,SGBBRD)
+#define LAPACK_dgbbrd LAPACK_GLOBAL(dgbbrd,DGBBRD)
+#define LAPACK_cgbbrd LAPACK_GLOBAL(cgbbrd,CGBBRD)
+#define LAPACK_zgbbrd LAPACK_GLOBAL(zgbbrd,ZGBBRD)
+#define LAPACK_sorgbr LAPACK_GLOBAL(sorgbr,SORGBR)
+#define LAPACK_dorgbr LAPACK_GLOBAL(dorgbr,DORGBR)
+#define LAPACK_sormbr LAPACK_GLOBAL(sormbr,SORMBR)
+#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)
+#define LAPACK_cungbr LAPACK_GLOBAL(cungbr,CUNGBR)
+#define LAPACK_zungbr LAPACK_GLOBAL(zungbr,ZUNGBR)
+#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)
+#define LAPACK_zunmbr LAPACK_GLOBAL(zunmbr,ZUNMBR)
+#define LAPACK_sbdsqr LAPACK_GLOBAL(sbdsqr,SBDSQR)
+#define LAPACK_dbdsqr LAPACK_GLOBAL(dbdsqr,DBDSQR)
+#define LAPACK_cbdsqr LAPACK_GLOBAL(cbdsqr,CBDSQR)
+#define LAPACK_zbdsqr LAPACK_GLOBAL(zbdsqr,ZBDSQR)
+#define LAPACK_sbdsdc LAPACK_GLOBAL(sbdsdc,SBDSDC)
+#define LAPACK_dbdsdc LAPACK_GLOBAL(dbdsdc,DBDSDC)
+#define LAPACK_ssytrd LAPACK_GLOBAL(ssytrd,SSYTRD)
+#define LAPACK_dsytrd LAPACK_GLOBAL(dsytrd,DSYTRD)
+#define LAPACK_sorgtr LAPACK_GLOBAL(sorgtr,SORGTR)
+#define LAPACK_dorgtr LAPACK_GLOBAL(dorgtr,DORGTR)
+#define LAPACK_sormtr LAPACK_GLOBAL(sormtr,SORMTR)
+#define LAPACK_dormtr LAPACK_GLOBAL(dormtr,DORMTR)
+#define LAPACK_chetrd LAPACK_GLOBAL(chetrd,CHETRD)
+#define LAPACK_zhetrd LAPACK_GLOBAL(zhetrd,ZHETRD)
+#define LAPACK_cungtr LAPACK_GLOBAL(cungtr,CUNGTR)
+#define LAPACK_zungtr LAPACK_GLOBAL(zungtr,ZUNGTR)
+#define LAPACK_cunmtr LAPACK_GLOBAL(cunmtr,CUNMTR)
+#define LAPACK_zunmtr LAPACK_GLOBAL(zunmtr,ZUNMTR)
+#define LAPACK_ssptrd LAPACK_GLOBAL(ssptrd,SSPTRD)
+#define LAPACK_dsptrd LAPACK_GLOBAL(dsptrd,DSPTRD)
+#define LAPACK_sopgtr LAPACK_GLOBAL(sopgtr,SOPGTR)
+#define LAPACK_dopgtr LAPACK_GLOBAL(dopgtr,DOPGTR)
+#define LAPACK_sopmtr LAPACK_GLOBAL(sopmtr,SOPMTR)
+#define LAPACK_dopmtr LAPACK_GLOBAL(dopmtr,DOPMTR)
+#define LAPACK_chptrd LAPACK_GLOBAL(chptrd,CHPTRD)
+#define LAPACK_zhptrd LAPACK_GLOBAL(zhptrd,ZHPTRD)
+#define LAPACK_cupgtr LAPACK_GLOBAL(cupgtr,CUPGTR)
+#define LAPACK_zupgtr LAPACK_GLOBAL(zupgtr,ZUPGTR)
+#define LAPACK_cupmtr LAPACK_GLOBAL(cupmtr,CUPMTR)
+#define LAPACK_zupmtr LAPACK_GLOBAL(zupmtr,ZUPMTR)
+#define LAPACK_ssbtrd LAPACK_GLOBAL(ssbtrd,SSBTRD)
+#define LAPACK_dsbtrd LAPACK_GLOBAL(dsbtrd,DSBTRD)
+#define LAPACK_chbtrd LAPACK_GLOBAL(chbtrd,CHBTRD)
+#define LAPACK_zhbtrd LAPACK_GLOBAL(zhbtrd,ZHBTRD)
+#define LAPACK_ssterf LAPACK_GLOBAL(ssterf,SSTERF)
+#define LAPACK_dsterf LAPACK_GLOBAL(dsterf,DSTERF)
+#define LAPACK_ssteqr LAPACK_GLOBAL(ssteqr,SSTEQR)
+#define LAPACK_dsteqr LAPACK_GLOBAL(dsteqr,DSTEQR)
+#define LAPACK_csteqr LAPACK_GLOBAL(csteqr,CSTEQR)
+#define LAPACK_zsteqr LAPACK_GLOBAL(zsteqr,ZSTEQR)
+#define LAPACK_sstemr LAPACK_GLOBAL(sstemr,SSTEMR)
+#define LAPACK_dstemr LAPACK_GLOBAL(dstemr,DSTEMR)
+#define LAPACK_cstemr LAPACK_GLOBAL(cstemr,CSTEMR)
+#define LAPACK_zstemr LAPACK_GLOBAL(zstemr,ZSTEMR)
+#define LAPACK_sstedc LAPACK_GLOBAL(sstedc,SSTEDC)
+#define LAPACK_dstedc LAPACK_GLOBAL(dstedc,DSTEDC)
+#define LAPACK_cstedc LAPACK_GLOBAL(cstedc,CSTEDC)
+#define LAPACK_zstedc LAPACK_GLOBAL(zstedc,ZSTEDC)
+#define LAPACK_sstegr LAPACK_GLOBAL(sstegr,SSTEGR)
+#define LAPACK_dstegr LAPACK_GLOBAL(dstegr,DSTEGR)
+#define LAPACK_cstegr LAPACK_GLOBAL(cstegr,CSTEGR)
+#define LAPACK_zstegr LAPACK_GLOBAL(zstegr,ZSTEGR)
+#define LAPACK_spteqr LAPACK_GLOBAL(spteqr,SPTEQR)
+#define LAPACK_dpteqr LAPACK_GLOBAL(dpteqr,DPTEQR)
+#define LAPACK_cpteqr LAPACK_GLOBAL(cpteqr,CPTEQR)
+#define LAPACK_zpteqr LAPACK_GLOBAL(zpteqr,ZPTEQR)
+#define LAPACK_sstebz LAPACK_GLOBAL(sstebz,SSTEBZ)
+#define LAPACK_dstebz LAPACK_GLOBAL(dstebz,DSTEBZ)
+#define LAPACK_sstein LAPACK_GLOBAL(sstein,SSTEIN)
+#define LAPACK_dstein LAPACK_GLOBAL(dstein,DSTEIN)
+#define LAPACK_cstein LAPACK_GLOBAL(cstein,CSTEIN)
+#define LAPACK_zstein LAPACK_GLOBAL(zstein,ZSTEIN)
+#define LAPACK_sdisna LAPACK_GLOBAL(sdisna,SDISNA)
+#define LAPACK_ddisna LAPACK_GLOBAL(ddisna,DDISNA)
+#define LAPACK_ssygst LAPACK_GLOBAL(ssygst,SSYGST)
+#define LAPACK_dsygst LAPACK_GLOBAL(dsygst,DSYGST)
+#define LAPACK_chegst LAPACK_GLOBAL(chegst,CHEGST)
+#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST)
+#define LAPACK_sspgst LAPACK_GLOBAL(sspgst,SSPGST)
+#define LAPACK_dspgst LAPACK_GLOBAL(dspgst,DSPGST)
+#define LAPACK_chpgst LAPACK_GLOBAL(chpgst,CHPGST)
+#define LAPACK_zhpgst LAPACK_GLOBAL(zhpgst,ZHPGST)
+#define LAPACK_ssbgst LAPACK_GLOBAL(ssbgst,SSBGST)
+#define LAPACK_dsbgst LAPACK_GLOBAL(dsbgst,DSBGST)
+#define LAPACK_chbgst LAPACK_GLOBAL(chbgst,CHBGST)
+#define LAPACK_zhbgst LAPACK_GLOBAL(zhbgst,ZHBGST)
+#define LAPACK_spbstf LAPACK_GLOBAL(spbstf,SPBSTF)
+#define LAPACK_dpbstf LAPACK_GLOBAL(dpbstf,DPBSTF)
+#define LAPACK_cpbstf LAPACK_GLOBAL(cpbstf,CPBSTF)
+#define LAPACK_zpbstf LAPACK_GLOBAL(zpbstf,ZPBSTF)
+#define LAPACK_sgehrd LAPACK_GLOBAL(sgehrd,SGEHRD)
+#define LAPACK_dgehrd LAPACK_GLOBAL(dgehrd,DGEHRD)
+#define LAPACK_cgehrd LAPACK_GLOBAL(cgehrd,CGEHRD)
+#define LAPACK_zgehrd LAPACK_GLOBAL(zgehrd,ZGEHRD)
+#define LAPACK_sorghr LAPACK_GLOBAL(sorghr,SORGHR)
+#define LAPACK_dorghr LAPACK_GLOBAL(dorghr,DORGHR)
+#define LAPACK_sormhr LAPACK_GLOBAL(sormhr,SORMHR)
+#define LAPACK_dormhr LAPACK_GLOBAL(dormhr,DORMHR)
+#define LAPACK_cunghr LAPACK_GLOBAL(cunghr,CUNGHR)
+#define LAPACK_zunghr LAPACK_GLOBAL(zunghr,ZUNGHR)
+#define LAPACK_cunmhr LAPACK_GLOBAL(cunmhr,CUNMHR)
+#define LAPACK_zunmhr LAPACK_GLOBAL(zunmhr,ZUNMHR)
+#define LAPACK_sgebal LAPACK_GLOBAL(sgebal,SGEBAL)
+#define LAPACK_dgebal LAPACK_GLOBAL(dgebal,DGEBAL)
+#define LAPACK_cgebal LAPACK_GLOBAL(cgebal,CGEBAL)
+#define LAPACK_zgebal LAPACK_GLOBAL(zgebal,ZGEBAL)
+#define LAPACK_sgebak LAPACK_GLOBAL(sgebak,SGEBAK)
+#define LAPACK_dgebak LAPACK_GLOBAL(dgebak,DGEBAK)
+#define LAPACK_cgebak LAPACK_GLOBAL(cgebak,CGEBAK)
+#define LAPACK_zgebak LAPACK_GLOBAL(zgebak,ZGEBAK)
+#define LAPACK_shseqr LAPACK_GLOBAL(shseqr,SHSEQR)
+#define LAPACK_dhseqr LAPACK_GLOBAL(dhseqr,DHSEQR)
+#define LAPACK_chseqr LAPACK_GLOBAL(chseqr,CHSEQR)
+#define LAPACK_zhseqr LAPACK_GLOBAL(zhseqr,ZHSEQR)
+#define LAPACK_shsein LAPACK_GLOBAL(shsein,SHSEIN)
+#define LAPACK_dhsein LAPACK_GLOBAL(dhsein,DHSEIN)
+#define LAPACK_chsein LAPACK_GLOBAL(chsein,CHSEIN)
+#define LAPACK_zhsein LAPACK_GLOBAL(zhsein,ZHSEIN)
+#define LAPACK_strevc LAPACK_GLOBAL(strevc,STREVC)
+#define LAPACK_dtrevc LAPACK_GLOBAL(dtrevc,DTREVC)
+#define LAPACK_ctrevc LAPACK_GLOBAL(ctrevc,CTREVC)
+#define LAPACK_ztrevc LAPACK_GLOBAL(ztrevc,ZTREVC)
+#define LAPACK_strsna LAPACK_GLOBAL(strsna,STRSNA)
+#define LAPACK_dtrsna LAPACK_GLOBAL(dtrsna,DTRSNA)
+#define LAPACK_ctrsna LAPACK_GLOBAL(ctrsna,CTRSNA)
+#define LAPACK_ztrsna LAPACK_GLOBAL(ztrsna,ZTRSNA)
+#define LAPACK_strexc LAPACK_GLOBAL(strexc,STREXC)
+#define LAPACK_dtrexc LAPACK_GLOBAL(dtrexc,DTREXC)
+#define LAPACK_ctrexc LAPACK_GLOBAL(ctrexc,CTREXC)
+#define LAPACK_ztrexc LAPACK_GLOBAL(ztrexc,ZTREXC)
+#define LAPACK_strsen LAPACK_GLOBAL(strsen,STRSEN)
+#define LAPACK_dtrsen LAPACK_GLOBAL(dtrsen,DTRSEN)
+#define LAPACK_ctrsen LAPACK_GLOBAL(ctrsen,CTRSEN)
+#define LAPACK_ztrsen LAPACK_GLOBAL(ztrsen,ZTRSEN)
+#define LAPACK_strsyl LAPACK_GLOBAL(strsyl,STRSYL)
+#define LAPACK_dtrsyl LAPACK_GLOBAL(dtrsyl,DTRSYL)
+#define LAPACK_ctrsyl LAPACK_GLOBAL(ctrsyl,CTRSYL)
+#define LAPACK_ztrsyl LAPACK_GLOBAL(ztrsyl,ZTRSYL)
+#define LAPACK_sgghrd LAPACK_GLOBAL(sgghrd,SGGHRD)
+#define LAPACK_dgghrd LAPACK_GLOBAL(dgghrd,DGGHRD)
+#define LAPACK_cgghrd LAPACK_GLOBAL(cgghrd,CGGHRD)
+#define LAPACK_zgghrd LAPACK_GLOBAL(zgghrd,ZGGHRD)
+#define LAPACK_sggbal LAPACK_GLOBAL(sggbal,SGGBAL)
+#define LAPACK_dggbal LAPACK_GLOBAL(dggbal,DGGBAL)
+#define LAPACK_cggbal LAPACK_GLOBAL(cggbal,CGGBAL)
+#define LAPACK_zggbal LAPACK_GLOBAL(zggbal,ZGGBAL)
+#define LAPACK_sggbak LAPACK_GLOBAL(sggbak,SGGBAK)
+#define LAPACK_dggbak LAPACK_GLOBAL(dggbak,DGGBAK)
+#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)
+#define LAPACK_zggbak LAPACK_GLOBAL(zggbak,ZGGBAK)
+#define LAPACK_shgeqz LAPACK_GLOBAL(shgeqz,SHGEQZ)
+#define LAPACK_dhgeqz LAPACK_GLOBAL(dhgeqz,DHGEQZ)
+#define LAPACK_chgeqz LAPACK_GLOBAL(chgeqz,CHGEQZ)
+#define LAPACK_zhgeqz LAPACK_GLOBAL(zhgeqz,ZHGEQZ)
+#define LAPACK_stgevc LAPACK_GLOBAL(stgevc,STGEVC)
+#define LAPACK_dtgevc LAPACK_GLOBAL(dtgevc,DTGEVC)
+#define LAPACK_ctgevc LAPACK_GLOBAL(ctgevc,CTGEVC)
+#define LAPACK_ztgevc LAPACK_GLOBAL(ztgevc,ZTGEVC)
+#define LAPACK_stgexc LAPACK_GLOBAL(stgexc,STGEXC)
+#define LAPACK_dtgexc LAPACK_GLOBAL(dtgexc,DTGEXC)
+#define LAPACK_ctgexc LAPACK_GLOBAL(ctgexc,CTGEXC)
+#define LAPACK_ztgexc LAPACK_GLOBAL(ztgexc,ZTGEXC)
+#define LAPACK_stgsen LAPACK_GLOBAL(stgsen,STGSEN)
+#define LAPACK_dtgsen LAPACK_GLOBAL(dtgsen,DTGSEN)
+#define LAPACK_ctgsen LAPACK_GLOBAL(ctgsen,CTGSEN)
+#define LAPACK_ztgsen LAPACK_GLOBAL(ztgsen,ZTGSEN)
+#define LAPACK_stgsyl LAPACK_GLOBAL(stgsyl,STGSYL)
+#define LAPACK_dtgsyl LAPACK_GLOBAL(dtgsyl,DTGSYL)
+#define LAPACK_ctgsyl LAPACK_GLOBAL(ctgsyl,CTGSYL)
+#define LAPACK_ztgsyl LAPACK_GLOBAL(ztgsyl,ZTGSYL)
+#define LAPACK_stgsna LAPACK_GLOBAL(stgsna,STGSNA)
+#define LAPACK_dtgsna LAPACK_GLOBAL(dtgsna,DTGSNA)
+#define LAPACK_ctgsna LAPACK_GLOBAL(ctgsna,CTGSNA)
+#define LAPACK_ztgsna LAPACK_GLOBAL(ztgsna,ZTGSNA)
+#define LAPACK_sggsvp LAPACK_GLOBAL(sggsvp,SGGSVP)
+#define LAPACK_dggsvp LAPACK_GLOBAL(dggsvp,DGGSVP)
+#define LAPACK_cggsvp LAPACK_GLOBAL(cggsvp,CGGSVP)
+#define LAPACK_zggsvp LAPACK_GLOBAL(zggsvp,ZGGSVP)
+#define LAPACK_stgsja LAPACK_GLOBAL(stgsja,STGSJA)
+#define LAPACK_dtgsja LAPACK_GLOBAL(dtgsja,DTGSJA)
+#define LAPACK_ctgsja LAPACK_GLOBAL(ctgsja,CTGSJA)
+#define LAPACK_ztgsja LAPACK_GLOBAL(ztgsja,ZTGSJA)
+#define LAPACK_sgels LAPACK_GLOBAL(sgels,SGELS)
+#define LAPACK_dgels LAPACK_GLOBAL(dgels,DGELS)
+#define LAPACK_cgels LAPACK_GLOBAL(cgels,CGELS)
+#define LAPACK_zgels LAPACK_GLOBAL(zgels,ZGELS)
+#define LAPACK_sgelsy LAPACK_GLOBAL(sgelsy,SGELSY)
+#define LAPACK_dgelsy LAPACK_GLOBAL(dgelsy,DGELSY)
+#define LAPACK_cgelsy LAPACK_GLOBAL(cgelsy,CGELSY)
+#define LAPACK_zgelsy LAPACK_GLOBAL(zgelsy,ZGELSY)
+#define LAPACK_sgelss LAPACK_GLOBAL(sgelss,SGELSS)
+#define LAPACK_dgelss LAPACK_GLOBAL(dgelss,DGELSS)
+#define LAPACK_cgelss LAPACK_GLOBAL(cgelss,CGELSS)
+#define LAPACK_zgelss LAPACK_GLOBAL(zgelss,ZGELSS)
+#define LAPACK_sgelsd LAPACK_GLOBAL(sgelsd,SGELSD)
+#define LAPACK_dgelsd LAPACK_GLOBAL(dgelsd,DGELSD)
+#define LAPACK_cgelsd LAPACK_GLOBAL(cgelsd,CGELSD)
+#define LAPACK_zgelsd LAPACK_GLOBAL(zgelsd,ZGELSD)
+#define LAPACK_sgglse LAPACK_GLOBAL(sgglse,SGGLSE)
+#define LAPACK_dgglse LAPACK_GLOBAL(dgglse,DGGLSE)
+#define LAPACK_cgglse LAPACK_GLOBAL(cgglse,CGGLSE)
+#define LAPACK_zgglse LAPACK_GLOBAL(zgglse,ZGGLSE)
+#define LAPACK_sggglm LAPACK_GLOBAL(sggglm,SGGGLM)
+#define LAPACK_dggglm LAPACK_GLOBAL(dggglm,DGGGLM)
+#define LAPACK_cggglm LAPACK_GLOBAL(cggglm,CGGGLM)
+#define LAPACK_zggglm LAPACK_GLOBAL(zggglm,ZGGGLM)
+#define LAPACK_ssyev LAPACK_GLOBAL(ssyev,SSYEV)
+#define LAPACK_dsyev LAPACK_GLOBAL(dsyev,DSYEV)
+#define LAPACK_cheev LAPACK_GLOBAL(cheev,CHEEV)
+#define LAPACK_zheev LAPACK_GLOBAL(zheev,ZHEEV)
+#define LAPACK_ssyevd LAPACK_GLOBAL(ssyevd,SSYEVD)
+#define LAPACK_dsyevd LAPACK_GLOBAL(dsyevd,DSYEVD)
+#define LAPACK_cheevd LAPACK_GLOBAL(cheevd,CHEEVD)
+#define LAPACK_zheevd LAPACK_GLOBAL(zheevd,ZHEEVD)
+#define LAPACK_ssyevx LAPACK_GLOBAL(ssyevx,SSYEVX)
+#define LAPACK_dsyevx LAPACK_GLOBAL(dsyevx,DSYEVX)
+#define LAPACK_cheevx LAPACK_GLOBAL(cheevx,CHEEVX)
+#define LAPACK_zheevx LAPACK_GLOBAL(zheevx,ZHEEVX)
+#define LAPACK_ssyevr LAPACK_GLOBAL(ssyevr,SSYEVR)
+#define LAPACK_dsyevr LAPACK_GLOBAL(dsyevr,DSYEVR)
+#define LAPACK_cheevr LAPACK_GLOBAL(cheevr,CHEEVR)
+#define LAPACK_zheevr LAPACK_GLOBAL(zheevr,ZHEEVR)
+#define LAPACK_sspev LAPACK_GLOBAL(sspev,SSPEV)
+#define LAPACK_dspev LAPACK_GLOBAL(dspev,DSPEV)
+#define LAPACK_chpev LAPACK_GLOBAL(chpev,CHPEV)
+#define LAPACK_zhpev LAPACK_GLOBAL(zhpev,ZHPEV)
+#define LAPACK_sspevd LAPACK_GLOBAL(sspevd,SSPEVD)
+#define LAPACK_dspevd LAPACK_GLOBAL(dspevd,DSPEVD)
+#define LAPACK_chpevd LAPACK_GLOBAL(chpevd,CHPEVD)
+#define LAPACK_zhpevd LAPACK_GLOBAL(zhpevd,ZHPEVD)
+#define LAPACK_sspevx LAPACK_GLOBAL(sspevx,SSPEVX)
+#define LAPACK_dspevx LAPACK_GLOBAL(dspevx,DSPEVX)
+#define LAPACK_chpevx LAPACK_GLOBAL(chpevx,CHPEVX)
+#define LAPACK_zhpevx LAPACK_GLOBAL(zhpevx,ZHPEVX)
+#define LAPACK_ssbev LAPACK_GLOBAL(ssbev,SSBEV)
+#define LAPACK_dsbev LAPACK_GLOBAL(dsbev,DSBEV)
+#define LAPACK_chbev LAPACK_GLOBAL(chbev,CHBEV)
+#define LAPACK_zhbev LAPACK_GLOBAL(zhbev,ZHBEV)
+#define LAPACK_ssbevd LAPACK_GLOBAL(ssbevd,SSBEVD)
+#define LAPACK_dsbevd LAPACK_GLOBAL(dsbevd,DSBEVD)
+#define LAPACK_chbevd LAPACK_GLOBAL(chbevd,CHBEVD)
+#define LAPACK_zhbevd LAPACK_GLOBAL(zhbevd,ZHBEVD)
+#define LAPACK_ssbevx LAPACK_GLOBAL(ssbevx,SSBEVX)
+#define LAPACK_dsbevx LAPACK_GLOBAL(dsbevx,DSBEVX)
+#define LAPACK_chbevx LAPACK_GLOBAL(chbevx,CHBEVX)
+#define LAPACK_zhbevx LAPACK_GLOBAL(zhbevx,ZHBEVX)
+#define LAPACK_sstev LAPACK_GLOBAL(sstev,SSTEV)
+#define LAPACK_dstev LAPACK_GLOBAL(dstev,DSTEV)
+#define LAPACK_sstevd LAPACK_GLOBAL(sstevd,SSTEVD)
+#define LAPACK_dstevd LAPACK_GLOBAL(dstevd,DSTEVD)
+#define LAPACK_sstevx LAPACK_GLOBAL(sstevx,SSTEVX)
+#define LAPACK_dstevx LAPACK_GLOBAL(dstevx,DSTEVX)
+#define LAPACK_sstevr LAPACK_GLOBAL(sstevr,SSTEVR)
+#define LAPACK_dstevr LAPACK_GLOBAL(dstevr,DSTEVR)
+#define LAPACK_sgees LAPACK_GLOBAL(sgees,SGEES)
+#define LAPACK_dgees LAPACK_GLOBAL(dgees,DGEES)
+#define LAPACK_cgees LAPACK_GLOBAL(cgees,CGEES)
+#define LAPACK_zgees LAPACK_GLOBAL(zgees,ZGEES)
+#define LAPACK_sgeesx LAPACK_GLOBAL(sgeesx,SGEESX)
+#define LAPACK_dgeesx LAPACK_GLOBAL(dgeesx,DGEESX)
+#define LAPACK_cgeesx LAPACK_GLOBAL(cgeesx,CGEESX)
+#define LAPACK_zgeesx LAPACK_GLOBAL(zgeesx,ZGEESX)
+#define LAPACK_sgeev LAPACK_GLOBAL(sgeev,SGEEV)
+#define LAPACK_dgeev LAPACK_GLOBAL(dgeev,DGEEV)
+#define LAPACK_cgeev LAPACK_GLOBAL(cgeev,CGEEV)
+#define LAPACK_zgeev LAPACK_GLOBAL(zgeev,ZGEEV)
+#define LAPACK_sgeevx LAPACK_GLOBAL(sgeevx,SGEEVX)
+#define LAPACK_dgeevx LAPACK_GLOBAL(dgeevx,DGEEVX)
+#define LAPACK_cgeevx LAPACK_GLOBAL(cgeevx,CGEEVX)
+#define LAPACK_zgeevx LAPACK_GLOBAL(zgeevx,ZGEEVX)
+#define LAPACK_sgesvd LAPACK_GLOBAL(sgesvd,SGESVD)
+#define LAPACK_dgesvd LAPACK_GLOBAL(dgesvd,DGESVD)
+#define LAPACK_cgesvd LAPACK_GLOBAL(cgesvd,CGESVD)
+#define LAPACK_zgesvd LAPACK_GLOBAL(zgesvd,ZGESVD)
+#define LAPACK_sgesdd LAPACK_GLOBAL(sgesdd,SGESDD)
+#define LAPACK_dgesdd LAPACK_GLOBAL(dgesdd,DGESDD)
+#define LAPACK_cgesdd LAPACK_GLOBAL(cgesdd,CGESDD)
+#define LAPACK_zgesdd LAPACK_GLOBAL(zgesdd,ZGESDD)
+#define LAPACK_dgejsv LAPACK_GLOBAL(dgejsv,DGEJSV)
+#define LAPACK_sgejsv LAPACK_GLOBAL(sgejsv,SGEJSV)
+#define LAPACK_dgesvj LAPACK_GLOBAL(dgesvj,DGESVJ)
+#define LAPACK_sgesvj LAPACK_GLOBAL(sgesvj,SGESVJ)
+#define LAPACK_sggsvd LAPACK_GLOBAL(sggsvd,SGGSVD)
+#define LAPACK_dggsvd LAPACK_GLOBAL(dggsvd,DGGSVD)
+#define LAPACK_cggsvd LAPACK_GLOBAL(cggsvd,CGGSVD)
+#define LAPACK_zggsvd LAPACK_GLOBAL(zggsvd,ZGGSVD)
+#define LAPACK_ssygv LAPACK_GLOBAL(ssygv,SSYGV)
+#define LAPACK_dsygv LAPACK_GLOBAL(dsygv,DSYGV)
+#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV)
+#define LAPACK_zhegv LAPACK_GLOBAL(zhegv,ZHEGV)
+#define LAPACK_ssygvd LAPACK_GLOBAL(ssygvd,SSYGVD)
+#define LAPACK_dsygvd LAPACK_GLOBAL(dsygvd,DSYGVD)
+#define LAPACK_chegvd LAPACK_GLOBAL(chegvd,CHEGVD)
+#define LAPACK_zhegvd LAPACK_GLOBAL(zhegvd,ZHEGVD)
+#define LAPACK_ssygvx LAPACK_GLOBAL(ssygvx,SSYGVX)
+#define LAPACK_dsygvx LAPACK_GLOBAL(dsygvx,DSYGVX)
+#define LAPACK_chegvx LAPACK_GLOBAL(chegvx,CHEGVX)
+#define LAPACK_zhegvx LAPACK_GLOBAL(zhegvx,ZHEGVX)
+#define LAPACK_sspgv LAPACK_GLOBAL(sspgv,SSPGV)
+#define LAPACK_dspgv LAPACK_GLOBAL(dspgv,DSPGV)
+#define LAPACK_chpgv LAPACK_GLOBAL(chpgv,CHPGV)
+#define LAPACK_zhpgv LAPACK_GLOBAL(zhpgv,ZHPGV)
+#define LAPACK_sspgvd LAPACK_GLOBAL(sspgvd,SSPGVD)
+#define LAPACK_dspgvd LAPACK_GLOBAL(dspgvd,DSPGVD)
+#define LAPACK_chpgvd LAPACK_GLOBAL(chpgvd,CHPGVD)
+#define LAPACK_zhpgvd LAPACK_GLOBAL(zhpgvd,ZHPGVD)
+#define LAPACK_sspgvx LAPACK_GLOBAL(sspgvx,SSPGVX)
+#define LAPACK_dspgvx LAPACK_GLOBAL(dspgvx,DSPGVX)
+#define LAPACK_chpgvx LAPACK_GLOBAL(chpgvx,CHPGVX)
+#define LAPACK_zhpgvx LAPACK_GLOBAL(zhpgvx,ZHPGVX)
+#define LAPACK_ssbgv LAPACK_GLOBAL(ssbgv,SSBGV)
+#define LAPACK_dsbgv LAPACK_GLOBAL(dsbgv,DSBGV)
+#define LAPACK_chbgv LAPACK_GLOBAL(chbgv,CHBGV)
+#define LAPACK_zhbgv LAPACK_GLOBAL(zhbgv,ZHBGV)
+#define LAPACK_ssbgvd LAPACK_GLOBAL(ssbgvd,SSBGVD)
+#define LAPACK_dsbgvd LAPACK_GLOBAL(dsbgvd,DSBGVD)
+#define LAPACK_chbgvd LAPACK_GLOBAL(chbgvd,CHBGVD)
+#define LAPACK_zhbgvd LAPACK_GLOBAL(zhbgvd,ZHBGVD)
+#define LAPACK_ssbgvx LAPACK_GLOBAL(ssbgvx,SSBGVX)
+#define LAPACK_dsbgvx LAPACK_GLOBAL(dsbgvx,DSBGVX)
+#define LAPACK_chbgvx LAPACK_GLOBAL(chbgvx,CHBGVX)
+#define LAPACK_zhbgvx LAPACK_GLOBAL(zhbgvx,ZHBGVX)
+#define LAPACK_sgges LAPACK_GLOBAL(sgges,SGGES)
+#define LAPACK_dgges LAPACK_GLOBAL(dgges,DGGES)
+#define LAPACK_cgges LAPACK_GLOBAL(cgges,CGGES)
+#define LAPACK_zgges LAPACK_GLOBAL(zgges,ZGGES)
+#define LAPACK_sggesx LAPACK_GLOBAL(sggesx,SGGESX)
+#define LAPACK_dggesx LAPACK_GLOBAL(dggesx,DGGESX)
+#define LAPACK_cggesx LAPACK_GLOBAL(cggesx,CGGESX)
+#define LAPACK_zggesx LAPACK_GLOBAL(zggesx,ZGGESX)
+#define LAPACK_sggev LAPACK_GLOBAL(sggev,SGGEV)
+#define LAPACK_dggev LAPACK_GLOBAL(dggev,DGGEV)
+#define LAPACK_cggev LAPACK_GLOBAL(cggev,CGGEV)
+#define LAPACK_zggev LAPACK_GLOBAL(zggev,ZGGEV)
+#define LAPACK_sggevx LAPACK_GLOBAL(sggevx,SGGEVX)
+#define LAPACK_dggevx LAPACK_GLOBAL(dggevx,DGGEVX)
+#define LAPACK_cggevx LAPACK_GLOBAL(cggevx,CGGEVX)
+#define LAPACK_zggevx LAPACK_GLOBAL(zggevx,ZGGEVX)
+#define LAPACK_dsfrk LAPACK_GLOBAL(dsfrk,DSFRK)
+#define LAPACK_ssfrk LAPACK_GLOBAL(ssfrk,SSFRK)
+#define LAPACK_zhfrk LAPACK_GLOBAL(zhfrk,ZHFRK)
+#define LAPACK_chfrk LAPACK_GLOBAL(chfrk,CHFRK)
+#define LAPACK_dtfsm LAPACK_GLOBAL(dtfsm,DTFSM)
+#define LAPACK_stfsm LAPACK_GLOBAL(stfsm,STFSM)
+#define LAPACK_ztfsm LAPACK_GLOBAL(ztfsm,ZTFSM)
+#define LAPACK_ctfsm LAPACK_GLOBAL(ctfsm,CTFSM)
+#define LAPACK_dtfttp LAPACK_GLOBAL(dtfttp,DTFTTP)
+#define LAPACK_stfttp LAPACK_GLOBAL(stfttp,STFTTP)
+#define LAPACK_ztfttp LAPACK_GLOBAL(ztfttp,ZTFTTP)
+#define LAPACK_ctfttp LAPACK_GLOBAL(ctfttp,CTFTTP)
+#define LAPACK_dtfttr LAPACK_GLOBAL(dtfttr,DTFTTR)
+#define LAPACK_stfttr LAPACK_GLOBAL(stfttr,STFTTR)
+#define LAPACK_ztfttr LAPACK_GLOBAL(ztfttr,ZTFTTR)
+#define LAPACK_ctfttr LAPACK_GLOBAL(ctfttr,CTFTTR)
+#define LAPACK_dtpttf LAPACK_GLOBAL(dtpttf,DTPTTF)
+#define LAPACK_stpttf LAPACK_GLOBAL(stpttf,STPTTF)
+#define LAPACK_ztpttf LAPACK_GLOBAL(ztpttf,ZTPTTF)
+#define LAPACK_ctpttf LAPACK_GLOBAL(ctpttf,CTPTTF)
+#define LAPACK_dtpttr LAPACK_GLOBAL(dtpttr,DTPTTR)
+#define LAPACK_stpttr LAPACK_GLOBAL(stpttr,STPTTR)
+#define LAPACK_ztpttr LAPACK_GLOBAL(ztpttr,ZTPTTR)
+#define LAPACK_ctpttr LAPACK_GLOBAL(ctpttr,CTPTTR)
+#define LAPACK_dtrttf LAPACK_GLOBAL(dtrttf,DTRTTF)
+#define LAPACK_strttf LAPACK_GLOBAL(strttf,STRTTF)
+#define LAPACK_ztrttf LAPACK_GLOBAL(ztrttf,ZTRTTF)
+#define LAPACK_ctrttf LAPACK_GLOBAL(ctrttf,CTRTTF)
+#define LAPACK_dtrttp LAPACK_GLOBAL(dtrttp,DTRTTP)
+#define LAPACK_strttp LAPACK_GLOBAL(strttp,STRTTP)
+#define LAPACK_ztrttp LAPACK_GLOBAL(ztrttp,ZTRTTP)
+#define LAPACK_ctrttp LAPACK_GLOBAL(ctrttp,CTRTTP)
+#define LAPACK_sgeqrfp LAPACK_GLOBAL(sgeqrfp,SGEQRFP)
+#define LAPACK_dgeqrfp LAPACK_GLOBAL(dgeqrfp,DGEQRFP)
+#define LAPACK_cgeqrfp LAPACK_GLOBAL(cgeqrfp,CGEQRFP)
+#define LAPACK_zgeqrfp LAPACK_GLOBAL(zgeqrfp,ZGEQRFP)
+#define LAPACK_clacgv LAPACK_GLOBAL(clacgv,CLACGV)
+#define LAPACK_zlacgv LAPACK_GLOBAL(zlacgv,ZLACGV)
+#define LAPACK_slarnv LAPACK_GLOBAL(slarnv,SLARNV)
+#define LAPACK_dlarnv LAPACK_GLOBAL(dlarnv,DLARNV)
+#define LAPACK_clarnv LAPACK_GLOBAL(clarnv,CLARNV)
+#define LAPACK_zlarnv LAPACK_GLOBAL(zlarnv,ZLARNV)
+#define LAPACK_sgeqr2 LAPACK_GLOBAL(sgeqr2,SGEQR2)
+#define LAPACK_dgeqr2 LAPACK_GLOBAL(dgeqr2,DGEQR2)
+#define LAPACK_cgeqr2 LAPACK_GLOBAL(cgeqr2,CGEQR2)
+#define LAPACK_zgeqr2 LAPACK_GLOBAL(zgeqr2,ZGEQR2)
+#define LAPACK_slacpy LAPACK_GLOBAL(slacpy,SLACPY)
+#define LAPACK_dlacpy LAPACK_GLOBAL(dlacpy,DLACPY)
+#define LAPACK_clacpy LAPACK_GLOBAL(clacpy,CLACPY)
+#define LAPACK_zlacpy LAPACK_GLOBAL(zlacpy,ZLACPY)
+#define LAPACK_sgetf2 LAPACK_GLOBAL(sgetf2,SGETF2)
+#define LAPACK_dgetf2 LAPACK_GLOBAL(dgetf2,DGETF2)
+#define LAPACK_cgetf2 LAPACK_GLOBAL(cgetf2,CGETF2)
+#define LAPACK_zgetf2 LAPACK_GLOBAL(zgetf2,ZGETF2)
+#define LAPACK_slaswp LAPACK_GLOBAL(slaswp,SLASWP)
+#define LAPACK_dlaswp LAPACK_GLOBAL(dlaswp,DLASWP)
+#define LAPACK_claswp LAPACK_GLOBAL(claswp,CLASWP)
+#define LAPACK_zlaswp LAPACK_GLOBAL(zlaswp,ZLASWP)
+#define LAPACK_slange LAPACK_GLOBAL(slange,SLANGE)
+#define LAPACK_dlange LAPACK_GLOBAL(dlange,DLANGE)
+#define LAPACK_clange LAPACK_GLOBAL(clange,CLANGE)
+#define LAPACK_zlange LAPACK_GLOBAL(zlange,ZLANGE)
+#define LAPACK_clanhe LAPACK_GLOBAL(clanhe,CLANHE)
+#define LAPACK_zlanhe LAPACK_GLOBAL(zlanhe,ZLANHE)
+#define LAPACK_slansy LAPACK_GLOBAL(slansy,SLANSY)
+#define LAPACK_dlansy LAPACK_GLOBAL(dlansy,DLANSY)
+#define LAPACK_clansy LAPACK_GLOBAL(clansy,CLANSY)
+#define LAPACK_zlansy LAPACK_GLOBAL(zlansy,ZLANSY)
+#define LAPACK_slantr LAPACK_GLOBAL(slantr,SLANTR)
+#define LAPACK_dlantr LAPACK_GLOBAL(dlantr,DLANTR)
+#define LAPACK_clantr LAPACK_GLOBAL(clantr,CLANTR)
+#define LAPACK_zlantr LAPACK_GLOBAL(zlantr,ZLANTR)
+#define LAPACK_slamch LAPACK_GLOBAL(slamch,SLAMCH)
+#define LAPACK_dlamch LAPACK_GLOBAL(dlamch,DLAMCH)
+#define LAPACK_sgelq2 LAPACK_GLOBAL(sgelq2,SGELQ2)
+#define LAPACK_dgelq2 LAPACK_GLOBAL(dgelq2,DGELQ2)
+#define LAPACK_cgelq2 LAPACK_GLOBAL(cgelq2,CGELQ2)
+#define LAPACK_zgelq2 LAPACK_GLOBAL(zgelq2,ZGELQ2)
+#define LAPACK_slarfb LAPACK_GLOBAL(slarfb,SLARFB)
+#define LAPACK_dlarfb LAPACK_GLOBAL(dlarfb,DLARFB)
+#define LAPACK_clarfb LAPACK_GLOBAL(clarfb,CLARFB)
+#define LAPACK_zlarfb LAPACK_GLOBAL(zlarfb,ZLARFB)
+#define LAPACK_slarfg LAPACK_GLOBAL(slarfg,SLARFG)
+#define LAPACK_dlarfg LAPACK_GLOBAL(dlarfg,DLARFG)
+#define LAPACK_clarfg LAPACK_GLOBAL(clarfg,CLARFG)
+#define LAPACK_zlarfg LAPACK_GLOBAL(zlarfg,ZLARFG)
+#define LAPACK_slarft LAPACK_GLOBAL(slarft,SLARFT)
+#define LAPACK_dlarft LAPACK_GLOBAL(dlarft,DLARFT)
+#define LAPACK_clarft LAPACK_GLOBAL(clarft,CLARFT)
+#define LAPACK_zlarft LAPACK_GLOBAL(zlarft,ZLARFT)
+#define LAPACK_slarfx LAPACK_GLOBAL(slarfx,SLARFX)
+#define LAPACK_dlarfx LAPACK_GLOBAL(dlarfx,DLARFX)
+#define LAPACK_clarfx LAPACK_GLOBAL(clarfx,CLARFX)
+#define LAPACK_zlarfx LAPACK_GLOBAL(zlarfx,ZLARFX)
+#define LAPACK_slatms LAPACK_GLOBAL(slatms,SLATMS)
+#define LAPACK_dlatms LAPACK_GLOBAL(dlatms,DLATMS)
+#define LAPACK_clatms LAPACK_GLOBAL(clatms,CLATMS)
+#define LAPACK_zlatms LAPACK_GLOBAL(zlatms,ZLATMS)
+#define LAPACK_slag2d LAPACK_GLOBAL(slag2d,SLAG2D)
+#define LAPACK_dlag2s LAPACK_GLOBAL(dlag2s,DLAG2S)
+#define LAPACK_clag2z LAPACK_GLOBAL(clag2z,CLAG2Z)
+#define LAPACK_zlag2c LAPACK_GLOBAL(zlag2c,ZLAG2C)
+#define LAPACK_slauum LAPACK_GLOBAL(slauum,SLAUUM)
+#define LAPACK_dlauum LAPACK_GLOBAL(dlauum,DLAUUM)
+#define LAPACK_clauum LAPACK_GLOBAL(clauum,CLAUUM)
+#define LAPACK_zlauum LAPACK_GLOBAL(zlauum,ZLAUUM)
+#define LAPACK_slagge LAPACK_GLOBAL(slagge,SLAGGE)
+#define LAPACK_dlagge LAPACK_GLOBAL(dlagge,DLAGGE)
+#define LAPACK_clagge LAPACK_GLOBAL(clagge,CLAGGE)
+#define LAPACK_zlagge LAPACK_GLOBAL(zlagge,ZLAGGE)
+#define LAPACK_slaset LAPACK_GLOBAL(slaset,SLASET)
+#define LAPACK_dlaset LAPACK_GLOBAL(dlaset,DLASET)
+#define LAPACK_claset LAPACK_GLOBAL(claset,CLASET)
+#define LAPACK_zlaset LAPACK_GLOBAL(zlaset,ZLASET)
+#define LAPACK_slasrt LAPACK_GLOBAL(slasrt,SLASRT)
+#define LAPACK_dlasrt LAPACK_GLOBAL(dlasrt,DLASRT)
+#define LAPACK_slagsy LAPACK_GLOBAL(slagsy,SLAGSY)
+#define LAPACK_dlagsy LAPACK_GLOBAL(dlagsy,DLAGSY)
+#define LAPACK_clagsy LAPACK_GLOBAL(clagsy,CLAGSY)
+#define LAPACK_zlagsy LAPACK_GLOBAL(zlagsy,ZLAGSY)
+#define LAPACK_claghe LAPACK_GLOBAL(claghe,CLAGHE)
+#define LAPACK_zlaghe LAPACK_GLOBAL(zlaghe,ZLAGHE)
+#define LAPACK_slapmr LAPACK_GLOBAL(slapmr,SLAPMR)
+#define LAPACK_dlapmr LAPACK_GLOBAL(dlapmr,DLAPMR)
+#define LAPACK_clapmr LAPACK_GLOBAL(clapmr,CLAPMR)
+#define LAPACK_zlapmr LAPACK_GLOBAL(zlapmr,ZLAPMR)
+#define LAPACK_slapy2 LAPACK_GLOBAL(slapy2,SLAPY2)
+#define LAPACK_dlapy2 LAPACK_GLOBAL(dlapy2,DLAPY2)
+#define LAPACK_slapy3 LAPACK_GLOBAL(slapy3,SLAPY3)
+#define LAPACK_dlapy3 LAPACK_GLOBAL(dlapy3,DLAPY3)
+#define LAPACK_slartgp LAPACK_GLOBAL(slartgp,SLARTGP)
+#define LAPACK_dlartgp LAPACK_GLOBAL(dlartgp,DLARTGP)
+#define LAPACK_slartgs LAPACK_GLOBAL(slartgs,SLARTGS)
+#define LAPACK_dlartgs LAPACK_GLOBAL(dlartgs,DLARTGS)
+// LAPACK 3.3.0
+#define LAPACK_cbbcsd LAPACK_GLOBAL(cbbcsd,CBBCSD)
+#define LAPACK_cheswapr LAPACK_GLOBAL(cheswapr,CHESWAPR)
+#define LAPACK_chetri2 LAPACK_GLOBAL(chetri2,CHETRI2)
+#define LAPACK_chetri2x LAPACK_GLOBAL(chetri2x,CHETRI2X)
+#define LAPACK_chetrs2 LAPACK_GLOBAL(chetrs2,CHETRS2)
+#define LAPACK_csyconv LAPACK_GLOBAL(csyconv,CSYCONV)
+#define LAPACK_csyswapr LAPACK_GLOBAL(csyswapr,CSYSWAPR)
+#define LAPACK_csytri2 LAPACK_GLOBAL(csytri2,CSYTRI2)
+#define LAPACK_csytri2x LAPACK_GLOBAL(csytri2x,CSYTRI2X)
+#define LAPACK_csytrs2 LAPACK_GLOBAL(csytrs2,CSYTRS2)
+#define LAPACK_cunbdb LAPACK_GLOBAL(cunbdb,CUNBDB)
+#define LAPACK_cuncsd LAPACK_GLOBAL(cuncsd,CUNCSD)
+#define LAPACK_dbbcsd LAPACK_GLOBAL(dbbcsd,DBBCSD)
+#define LAPACK_dorbdb LAPACK_GLOBAL(dorbdb,DORBDB)
+#define LAPACK_dorcsd LAPACK_GLOBAL(dorcsd,DORCSD)
+#define LAPACK_dsyconv LAPACK_GLOBAL(dsyconv,DSYCONV)
+#define LAPACK_dsyswapr LAPACK_GLOBAL(dsyswapr,DSYSWAPR)
+#define LAPACK_dsytri2 LAPACK_GLOBAL(dsytri2,DSYTRI2)
+#define LAPACK_dsytri2x LAPACK_GLOBAL(dsytri2x,DSYTRI2X)
+#define LAPACK_dsytrs2 LAPACK_GLOBAL(dsytrs2,DSYTRS2)
+#define LAPACK_sbbcsd LAPACK_GLOBAL(sbbcsd,SBBCSD)
+#define LAPACK_sorbdb LAPACK_GLOBAL(sorbdb,SORBDB)
+#define LAPACK_sorcsd LAPACK_GLOBAL(sorcsd,SORCSD)
+#define LAPACK_ssyconv LAPACK_GLOBAL(ssyconv,SSYCONV)
+#define LAPACK_ssyswapr LAPACK_GLOBAL(ssyswapr,SSYSWAPR)
+#define LAPACK_ssytri2 LAPACK_GLOBAL(ssytri2,SSYTRI2)
+#define LAPACK_ssytri2x LAPACK_GLOBAL(ssytri2x,SSYTRI2X)
+#define LAPACK_ssytrs2 LAPACK_GLOBAL(ssytrs2,SSYTRS2)
+#define LAPACK_zbbcsd LAPACK_GLOBAL(zbbcsd,ZBBCSD)
+#define LAPACK_zheswapr LAPACK_GLOBAL(zheswapr,ZHESWAPR)
+#define LAPACK_zhetri2 LAPACK_GLOBAL(zhetri2,ZHETRI2)
+#define LAPACK_zhetri2x LAPACK_GLOBAL(zhetri2x,ZHETRI2X)
+#define LAPACK_zhetrs2 LAPACK_GLOBAL(zhetrs2,ZHETRS2)
+#define LAPACK_zsyconv LAPACK_GLOBAL(zsyconv,ZSYCONV)
+#define LAPACK_zsyswapr LAPACK_GLOBAL(zsyswapr,ZSYSWAPR)
+#define LAPACK_zsytri2 LAPACK_GLOBAL(zsytri2,ZSYTRI2)
+#define LAPACK_zsytri2x LAPACK_GLOBAL(zsytri2x,ZSYTRI2X)
+#define LAPACK_zsytrs2 LAPACK_GLOBAL(zsytrs2,ZSYTRS2)
+#define LAPACK_zunbdb LAPACK_GLOBAL(zunbdb,ZUNBDB)
+#define LAPACK_zuncsd LAPACK_GLOBAL(zuncsd,ZUNCSD)
+// LAPACK 3.4.0
+#define LAPACK_sgemqrt LAPACK_GLOBAL(sgemqrt,SGEMQRT)
+#define LAPACK_dgemqrt LAPACK_GLOBAL(dgemqrt,DGEMQRT)
+#define LAPACK_cgemqrt LAPACK_GLOBAL(cgemqrt,CGEMQRT)
+#define LAPACK_zgemqrt LAPACK_GLOBAL(zgemqrt,ZGEMQRT)
+#define LAPACK_sgeqrt LAPACK_GLOBAL(sgeqrt,SGEQRT)
+#define LAPACK_dgeqrt LAPACK_GLOBAL(dgeqrt,DGEQRT)
+#define LAPACK_cgeqrt LAPACK_GLOBAL(cgeqrt,CGEQRT)
+#define LAPACK_zgeqrt LAPACK_GLOBAL(zgeqrt,ZGEQRT)
+#define LAPACK_sgeqrt2 LAPACK_GLOBAL(sgeqrt2,SGEQRT2)
+#define LAPACK_dgeqrt2 LAPACK_GLOBAL(dgeqrt2,DGEQRT2)
+#define LAPACK_cgeqrt2 LAPACK_GLOBAL(cgeqrt2,CGEQRT2)
+#define LAPACK_zgeqrt2 LAPACK_GLOBAL(zgeqrt2,ZGEQRT2)
+#define LAPACK_sgeqrt3 LAPACK_GLOBAL(sgeqrt3,SGEQRT3)
+#define LAPACK_dgeqrt3 LAPACK_GLOBAL(dgeqrt3,DGEQRT3)
+#define LAPACK_cgeqrt3 LAPACK_GLOBAL(cgeqrt3,CGEQRT3)
+#define LAPACK_zgeqrt3 LAPACK_GLOBAL(zgeqrt3,ZGEQRT3)
+#define LAPACK_stpmqrt LAPACK_GLOBAL(stpmqrt,STPMQRT)
+#define LAPACK_dtpmqrt LAPACK_GLOBAL(dtpmqrt,DTPMQRT)
+#define LAPACK_ctpmqrt LAPACK_GLOBAL(ctpmqrt,CTPMQRT)
+#define LAPACK_ztpmqrt LAPACK_GLOBAL(ztpmqrt,ZTPMQRT)
+#define LAPACK_dtpqrt LAPACK_GLOBAL(dtpqrt,DTPQRT)
+#define LAPACK_ctpqrt LAPACK_GLOBAL(ctpqrt,CTPQRT)
+#define LAPACK_ztpqrt LAPACK_GLOBAL(ztpqrt,ZTPQRT)
+#define LAPACK_stpqrt2 LAPACK_GLOBAL(stpqrt2,STPQRT2)
+#define LAPACK_dtpqrt2 LAPACK_GLOBAL(dtpqrt2,DTPQRT2)
+#define LAPACK_ctpqrt2 LAPACK_GLOBAL(ctpqrt2,CTPQRT2)
+#define LAPACK_ztpqrt2 LAPACK_GLOBAL(ztpqrt2,ZTPQRT2)
+#define LAPACK_stprfb LAPACK_GLOBAL(stprfb,STPRFB)
+#define LAPACK_dtprfb LAPACK_GLOBAL(dtprfb,DTPRFB)
+#define LAPACK_ctprfb LAPACK_GLOBAL(ctprfb,CTPRFB)
+#define LAPACK_ztprfb LAPACK_GLOBAL(ztprfb,ZTPRFB)
+// LAPACK 3.X.X
+#define LAPACK_csyr LAPACK_GLOBAL(csyr,CSYR)
+#define LAPACK_zsyr LAPACK_GLOBAL(zsyr,ZSYR)
+
+
+void LAPACK_sgetrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_dgetrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_cgetrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
+void LAPACK_zgetrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
+void LAPACK_sgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, float* ab, lapack_int* ldab,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_dgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, double* ab, lapack_int* ldab,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_cgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_zgbtrf( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_sgttrf( lapack_int* n, float* dl, float* d, float* du, float* du2,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_dgttrf( lapack_int* n, double* dl, double* d, double* du,
+                    double* du2, lapack_int* ipiv, lapack_int *info );
+void LAPACK_cgttrf( lapack_int* n, lapack_complex_float* dl,
+                    lapack_complex_float* d, lapack_complex_float* du,
+                    lapack_complex_float* du2, lapack_int* ipiv,
+                    lapack_int *info );
+void LAPACK_zgttrf( lapack_int* n, lapack_complex_double* dl,
+                    lapack_complex_double* d, lapack_complex_double* du,
+                    lapack_complex_double* du2, lapack_int* ipiv,
+                    lapack_int *info );
+void LAPACK_spotrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dpotrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_cpotrf( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_zpotrf( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_dpstrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* piv, lapack_int* rank, double* tol,
+                    double* work, lapack_int *info );
+void LAPACK_spstrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* piv, lapack_int* rank, float* tol, float* work,
+                    lapack_int *info );
+void LAPACK_zpstrf( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* piv, lapack_int* rank,
+                    double* tol, double* work, lapack_int *info );
+void LAPACK_cpstrf( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* piv, lapack_int* rank,
+                    float* tol, float* work, lapack_int *info );
+void LAPACK_dpftrf( char* transr, char* uplo, lapack_int* n, double* a,
+                    lapack_int *info );
+void LAPACK_spftrf( char* transr, char* uplo, lapack_int* n, float* a,
+                    lapack_int *info );
+void LAPACK_zpftrf( char* transr, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int *info );
+void LAPACK_cpftrf( char* transr, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int *info );
+void LAPACK_spptrf( char* uplo, lapack_int* n, float* ap, lapack_int *info );
+void LAPACK_dpptrf( char* uplo, lapack_int* n, double* ap, lapack_int *info );
+void LAPACK_cpptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    lapack_int *info );
+void LAPACK_zpptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    lapack_int *info );
+void LAPACK_spbtrf( char* uplo, lapack_int* n, lapack_int* kd, float* ab,
+                    lapack_int* ldab, lapack_int *info );
+void LAPACK_dpbtrf( char* uplo, lapack_int* n, lapack_int* kd, double* ab,
+                    lapack_int* ldab, lapack_int *info );
+void LAPACK_cpbtrf( char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_int *info );
+void LAPACK_zpbtrf( char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_int *info );
+void LAPACK_spttrf( lapack_int* n, float* d, float* e, lapack_int *info );
+void LAPACK_dpttrf( lapack_int* n, double* d, double* e, lapack_int *info );
+void LAPACK_cpttrf( lapack_int* n, float* d, lapack_complex_float* e,
+                    lapack_int *info );
+void LAPACK_zpttrf( lapack_int* n, double* d, lapack_complex_double* e,
+                    lapack_int *info );
+void LAPACK_ssytrf( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* ipiv, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dsytrf( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* ipiv, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_csytrf( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* ipiv,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zsytrf( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ipiv,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_chetrf( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* ipiv,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zhetrf( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ipiv,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_ssptrf( char* uplo, lapack_int* n, float* ap, lapack_int* ipiv,
+                    lapack_int *info );
+void LAPACK_dsptrf( char* uplo, lapack_int* n, double* ap, lapack_int* ipiv,
+                    lapack_int *info );
+void LAPACK_csptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_zsptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_chptrf( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_zhptrf( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_sgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const float* a, lapack_int* lda, const lapack_int* ipiv,
+                    float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_dgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, const lapack_int* ipiv,
+                    double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_cgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_zgetrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_sgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const float* ab, lapack_int* ldab,
+                    const lapack_int* ipiv, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const double* ab, lapack_int* ldab,
+                    const lapack_int* ipiv, double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_cgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const lapack_complex_float* ab,
+                    lapack_int* ldab, const lapack_int* ipiv,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zgbtrs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const lapack_complex_double* ab,
+                    lapack_int* ldab, const lapack_int* ipiv,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_sgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const float* dl, const float* d, const float* du,
+                    const float* du2, const lapack_int* ipiv, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const double* dl, const double* d, const double* du,
+                    const double* du2, const lapack_int* ipiv, double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_cgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* dl,
+                    const lapack_complex_float* d,
+                    const lapack_complex_float* du,
+                    const lapack_complex_float* du2, const lapack_int* ipiv,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zgttrs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* dl,
+                    const lapack_complex_double* d,
+                    const lapack_complex_double* du,
+                    const lapack_complex_double* du2, const lapack_int* ipiv,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_spotrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_cpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zpotrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_spftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* a, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_cpftrs( char* transr, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_spptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* ap, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* ap, double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_cpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_zpptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_spbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const float* ab, lapack_int* ldab, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const double* ab, lapack_int* ldab, double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_cpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zpbtrs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_spttrs( lapack_int* n, lapack_int* nrhs, const float* d,
+                    const float* e, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dpttrs( lapack_int* n, lapack_int* nrhs, const double* d,
+                    const double* e, double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_cpttrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,
+                    const lapack_complex_float* e, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_zpttrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* d, const lapack_complex_double* e,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_ssytrs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
+                    lapack_int* lda, const lapack_int* ipiv, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, const lapack_int* ipiv,
+                    double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_csytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_zsytrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_chetrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_zhetrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_ssptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* ap, const lapack_int* ipiv, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* ap, const lapack_int* ipiv, double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_csptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap, const lapack_int* ipiv,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zsptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap, const lapack_int* ipiv,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_chptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap, const lapack_int* ipiv,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zhptrs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap, const lapack_int* ipiv,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_strtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dtrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const double* a, lapack_int* lda,
+                    double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_ctrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_ztrtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_stptrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const float* ap, float* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_dtptrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const double* ap, double* b,
+                    lapack_int* ldb, lapack_int *info );
+void LAPACK_ctptrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_float* ap,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_ztptrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_double* ap,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_stbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs, const float* ab,
+                    lapack_int* ldab, float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dtbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs, const double* ab,
+                    lapack_int* ldab, double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_ctbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_ztbtrs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_sgecon( char* norm, lapack_int* n, const float* a, lapack_int* lda,
+                    float* anorm, float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgecon( char* norm, lapack_int* n, const double* a, lapack_int* lda,
+                    double* anorm, double* rcond, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cgecon( char* norm, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, float* anorm, float* rcond,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgecon( char* norm, lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, double* anorm, double* rcond,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    const float* ab, lapack_int* ldab, const lapack_int* ipiv,
+                    float* anorm, float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    const double* ab, lapack_int* ldab, const lapack_int* ipiv,
+                    double* anorm, double* rcond, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    const lapack_int* ipiv, float* anorm, float* rcond,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgbcon( char* norm, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sgtcon( char* norm, lapack_int* n, const float* dl, const float* d,
+                    const float* du, const float* du2, const lapack_int* ipiv,
+                    float* anorm, float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgtcon( char* norm, lapack_int* n, const double* dl,
+                    const double* d, const double* du, const double* du2,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cgtcon( char* norm, lapack_int* n, const lapack_complex_float* dl,
+                    const lapack_complex_float* d,
+                    const lapack_complex_float* du,
+                    const lapack_complex_float* du2, const lapack_int* ipiv,
+                    float* anorm, float* rcond, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zgtcon( char* norm, lapack_int* n, const lapack_complex_double* dl,
+                    const lapack_complex_double* d,
+                    const lapack_complex_double* du,
+                    const lapack_complex_double* du2, const lapack_int* ipiv,
+                    double* anorm, double* rcond, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_spocon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
+                    float* anorm, float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dpocon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
+                    double* anorm, double* rcond, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cpocon( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, float* anorm, float* rcond,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zpocon( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, double* anorm, double* rcond,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sppcon( char* uplo, lapack_int* n, const float* ap, float* anorm,
+                    float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dppcon( char* uplo, lapack_int* n, const double* ap, double* anorm,
+                    double* rcond, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cppcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    float* anorm, float* rcond, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zppcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    double* anorm, double* rcond, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_spbcon( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,
+                    lapack_int* ldab, float* anorm, float* rcond, float* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dpbcon( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,
+                    lapack_int* ldab, double* anorm, double* rcond,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cpbcon( char* uplo, lapack_int* n, lapack_int* kd,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    float* anorm, float* rcond, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zpbcon( char* uplo, lapack_int* n, lapack_int* kd,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    double* anorm, double* rcond, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_sptcon( lapack_int* n, const float* d, const float* e, float* anorm,
+                    float* rcond, float* work, lapack_int *info );
+void LAPACK_dptcon( lapack_int* n, const double* d, const double* e,
+                    double* anorm, double* rcond, double* work,
+                    lapack_int *info );
+void LAPACK_cptcon( lapack_int* n, const float* d,
+                    const lapack_complex_float* e, float* anorm, float* rcond,
+                    float* work, lapack_int *info );
+void LAPACK_zptcon( lapack_int* n, const double* d,
+                    const lapack_complex_double* e, double* anorm,
+                    double* rcond, double* work, lapack_int *info );
+void LAPACK_ssycon( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
+                    const lapack_int* ipiv, float* anorm, float* rcond,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dsycon( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_csycon( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_int* ipiv, float* anorm,
+                    float* rcond, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zsycon( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_int* ipiv, double* anorm,
+                    double* rcond, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_checon( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_int* ipiv, float* anorm,
+                    float* rcond, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zhecon( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_int* ipiv, double* anorm,
+                    double* rcond, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_sspcon( char* uplo, lapack_int* n, const float* ap,
+                    const lapack_int* ipiv, float* anorm, float* rcond,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dspcon( char* uplo, lapack_int* n, const double* ap,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cspcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    const lapack_int* ipiv, float* anorm, float* rcond,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zspcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_chpcon( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    const lapack_int* ipiv, float* anorm, float* rcond,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zhpcon( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    const lapack_int* ipiv, double* anorm, double* rcond,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_strcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const float* a, lapack_int* lda, float* rcond, float* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dtrcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const double* a, lapack_int* lda, double* rcond,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_ctrcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    float* rcond, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztrcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    double* rcond, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_stpcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const float* ap, float* rcond, float* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dtpcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const double* ap, double* rcond, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ctpcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const lapack_complex_float* ap, float* rcond,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztpcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    const lapack_complex_double* ap, double* rcond,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_stbcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    lapack_int* kd, const float* ab, lapack_int* ldab,
+                    float* rcond, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dtbcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    lapack_int* kd, const double* ab, lapack_int* ldab,
+                    double* rcond, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_ctbcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    lapack_int* kd, const lapack_complex_float* ab,
+                    lapack_int* ldab, float* rcond, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_ztbcon( char* norm, char* uplo, char* diag, lapack_int* n,
+                    lapack_int* kd, const lapack_complex_double* ab,
+                    lapack_int* ldab, double* rcond,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const float* a, lapack_int* lda, const float* af,
+                    lapack_int* ldaf, const lapack_int* ipiv, const float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
+                    float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, const double* af,
+                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zgerfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_dgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const double* a, lapack_int* lda, const double* af,
+                     lapack_int* ldaf, const lapack_int* ipiv, const double* r,
+                     const double* c, const double* b, lapack_int* ldb,
+                     double* x, lapack_int* ldx, double* rcond, double* berr,
+                     lapack_int* n_err_bnds, double* err_bnds_norm,
+                     double* err_bnds_comp, lapack_int* nparams, double* params,
+                     double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_sgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const float* a, lapack_int* lda, const float* af,
+                     lapack_int* ldaf, const lapack_int* ipiv, const float* r,
+                     const float* c, const float* b, lapack_int* ldb, float* x,
+                     lapack_int* ldx, float* rcond, float* berr,
+                     lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_zgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_complex_double* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const double* r, const double* c,
+                     const lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cgerfsx( char* trans, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_float* a, lapack_int* lda,
+                     const lapack_complex_float* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const float* r, const float* c,
+                     const lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const float* ab, lapack_int* ldab,
+                    const float* afb, lapack_int* ldafb, const lapack_int* ipiv,
+                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const double* ab, lapack_int* ldab,
+                    const double* afb, lapack_int* ldafb,
+                    const lapack_int* ipiv, const double* b, lapack_int* ldb,
+                    double* x, lapack_int* ldx, double* ferr, double* berr,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const lapack_complex_float* ab,
+                    lapack_int* ldab, const lapack_complex_float* afb,
+                    lapack_int* ldafb, const lapack_int* ipiv,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgbrfs( char* trans, lapack_int* n, lapack_int* kl, lapack_int* ku,
+                    lapack_int* nrhs, const lapack_complex_double* ab,
+                    lapack_int* ldab, const lapack_complex_double* afb,
+                    lapack_int* ldafb, const lapack_int* ipiv,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_dgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs, const double* ab,
+                     lapack_int* ldab, const double* afb, lapack_int* ldafb,
+                     const lapack_int* ipiv, const double* r, const double* c,
+                     const double* b, lapack_int* ldb, double* x,
+                     lapack_int* ldx, double* rcond, double* berr,
+                     lapack_int* n_err_bnds, double* err_bnds_norm,
+                     double* err_bnds_comp, lapack_int* nparams, double* params,
+                     double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_sgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs, const float* ab,
+                     lapack_int* ldab, const float* afb, lapack_int* ldafb,
+                     const lapack_int* ipiv, const float* r, const float* c,
+                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                     float* rcond, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params, float* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_zgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs,
+                     const lapack_complex_double* ab, lapack_int* ldab,
+                     const lapack_complex_double* afb, lapack_int* ldafb,
+                     const lapack_int* ipiv, const double* r, const double* c,
+                     const lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cgbrfsx( char* trans, char* equed, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs,
+                     const lapack_complex_float* ab, lapack_int* ldab,
+                     const lapack_complex_float* afb, lapack_int* ldafb,
+                     const lapack_int* ipiv, const float* r, const float* c,
+                     const lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const float* dl, const float* d, const float* du,
+                    const float* dlf, const float* df, const float* duf,
+                    const float* du2, const lapack_int* ipiv, const float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
+                    float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const double* dl, const double* d, const double* du,
+                    const double* dlf, const double* df, const double* duf,
+                    const double* du2, const lapack_int* ipiv, const double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* dl,
+                    const lapack_complex_float* d,
+                    const lapack_complex_float* du,
+                    const lapack_complex_float* dlf,
+                    const lapack_complex_float* df,
+                    const lapack_complex_float* duf,
+                    const lapack_complex_float* du2, const lapack_int* ipiv,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgtrfs( char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* dl,
+                    const lapack_complex_double* d,
+                    const lapack_complex_double* du,
+                    const lapack_complex_double* dlf,
+                    const lapack_complex_double* df,
+                    const lapack_complex_double* duf,
+                    const lapack_complex_double* du2, const lapack_int* ipiv,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sporfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
+                    lapack_int* lda, const float* af, lapack_int* ldaf,
+                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, const double* af,
+                    lapack_int* ldaf, const double* b, lapack_int* ldb,
+                    double* x, lapack_int* ldx, double* ferr, double* berr,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* af, lapack_int* ldaf,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zporfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* af, lapack_int* ldaf,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_dporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const double* a, lapack_int* lda, const double* af,
+                     lapack_int* ldaf, const double* s, const double* b,
+                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params, double* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_sporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const float* a, lapack_int* lda, const float* af,
+                     lapack_int* ldaf, const float* s, const float* b,
+                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_zporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_complex_double* af, lapack_int* ldaf,
+                     const double* s, const lapack_complex_double* b,
+                     lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                     double* rcond, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cporfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_float* a, lapack_int* lda,
+                     const lapack_complex_float* af, lapack_int* ldaf,
+                     const float* s, const lapack_complex_float* b,
+                     lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                     float* rcond, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_spprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* ap, const float* afp, const float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* ferr,
+                    float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* ap, const double* afp, const double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap,
+                    const lapack_complex_float* afp,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zpprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap,
+                    const lapack_complex_double* afp,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_spbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const float* ab, lapack_int* ldab, const float* afb,
+                    lapack_int* ldafb, const float* b, lapack_int* ldb,
+                    float* x, lapack_int* ldx, float* ferr, float* berr,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const double* ab, lapack_int* ldab, const double* afb,
+                    lapack_int* ldafb, const double* b, lapack_int* ldb,
+                    double* x, lapack_int* ldx, double* ferr, double* berr,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    const lapack_complex_float* afb, lapack_int* ldafb,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zpbrfs( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    const lapack_complex_double* afb, lapack_int* ldafb,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sptrfs( lapack_int* n, lapack_int* nrhs, const float* d,
+                    const float* e, const float* df, const float* ef,
+                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                    float* ferr, float* berr, float* work, lapack_int *info );
+void LAPACK_dptrfs( lapack_int* n, lapack_int* nrhs, const double* d,
+                    const double* e, const double* df, const double* ef,
+                    const double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* ferr, double* berr, double* work,
+                    lapack_int *info );
+void LAPACK_cptrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* d,
+                    const lapack_complex_float* e, const float* df,
+                    const lapack_complex_float* ef,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zptrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* d, const lapack_complex_double* e,
+                    const double* df, const lapack_complex_double* ef,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_ssyrfs( char* uplo, lapack_int* n, lapack_int* nrhs, const float* a,
+                    lapack_int* lda, const float* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const float* b, lapack_int* ldb,
+                    float* x, lapack_int* ldx, float* ferr, float* berr,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, const double* af,
+                    lapack_int* ldaf, const lapack_int* ipiv, const double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_csyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zsyrfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_dsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const double* a, lapack_int* lda, const double* af,
+                     lapack_int* ldaf, const lapack_int* ipiv, const double* s,
+                     const double* b, lapack_int* ldb, double* x,
+                     lapack_int* ldx, double* rcond, double* berr,
+                     lapack_int* n_err_bnds, double* err_bnds_norm,
+                     double* err_bnds_comp, lapack_int* nparams, double* params,
+                     double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_ssyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const float* a, lapack_int* lda, const float* af,
+                     lapack_int* ldaf, const lapack_int* ipiv, const float* s,
+                     const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                     float* rcond, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params, float* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_zsyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_complex_double* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const double* s,
+                     const lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_csyrfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_float* a, lapack_int* lda,
+                     const lapack_complex_float* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const float* s,
+                     const lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_cherfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zherfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* af, lapack_int* ldaf,
+                    const lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_zherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_complex_double* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const double* s,
+                     const lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cherfsx( char* uplo, char* equed, lapack_int* n, lapack_int* nrhs,
+                     const lapack_complex_float* a, lapack_int* lda,
+                     const lapack_complex_float* af, lapack_int* ldaf,
+                     const lapack_int* ipiv, const float* s,
+                     const lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* berr, lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_ssprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* ap, const float* afp, const lapack_int* ipiv,
+                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* ap, const double* afp, const lapack_int* ipiv,
+                    const double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* ferr, double* berr, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_csprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap,
+                    const lapack_complex_float* afp, const lapack_int* ipiv,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zsprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap,
+                    const lapack_complex_double* afp, const lapack_int* ipiv,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_chprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap,
+                    const lapack_complex_float* afp, const lapack_int* ipiv,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zhprfs( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap,
+                    const lapack_complex_double* afp, const lapack_int* ipiv,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* ferr,
+                    double* berr, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_strrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const float* a, lapack_int* lda,
+                    const float* b, lapack_int* ldb, const float* x,
+                    lapack_int* ldx, float* ferr, float* berr, float* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dtrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const double* a, lapack_int* lda,
+                    const double* b, lapack_int* ldb, const double* x,
+                    lapack_int* ldx, double* ferr, double* berr, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ctrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* b,
+                    lapack_int* ldb, const lapack_complex_float* x,
+                    lapack_int* ldx, float* ferr, float* berr,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztrrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* b,
+                    lapack_int* ldb, const lapack_complex_double* x,
+                    lapack_int* ldx, double* ferr, double* berr,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_stprfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const float* ap, const float* b,
+                    lapack_int* ldb, const float* x, lapack_int* ldx,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dtprfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const double* ap, const double* b,
+                    lapack_int* ldb, const double* x, lapack_int* ldx,
+                    double* ferr, double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_ctprfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_float* ap,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztprfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* nrhs, const lapack_complex_double* ap,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    const lapack_complex_double* x, lapack_int* ldx,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_stbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs, const float* ab,
+                    lapack_int* ldab, const float* b, lapack_int* ldb,
+                    const float* x, lapack_int* ldx, float* ferr, float* berr,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dtbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs, const double* ab,
+                    lapack_int* ldab, const double* b, lapack_int* ldb,
+                    const double* x, lapack_int* ldx, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_ctbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_float* ab, lapack_int* ldab,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    const lapack_complex_float* x, lapack_int* ldx, float* ferr,
+                    float* berr, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztbrfs( char* uplo, char* trans, char* diag, lapack_int* n,
+                    lapack_int* kd, lapack_int* nrhs,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    const lapack_complex_double* x, lapack_int* ldx,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_sgetri( lapack_int* n, float* a, lapack_int* lda,
+                    const lapack_int* ipiv, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgetri( lapack_int* n, double* a, lapack_int* lda,
+                    const lapack_int* ipiv, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgetri( lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zgetri( lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    const lapack_int* ipiv, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_spotri( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dpotri( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_cpotri( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_zpotri( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_dpftri( char* transr, char* uplo, lapack_int* n, double* a,
+                    lapack_int *info );
+void LAPACK_spftri( char* transr, char* uplo, lapack_int* n, float* a,
+                    lapack_int *info );
+void LAPACK_zpftri( char* transr, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int *info );
+void LAPACK_cpftri( char* transr, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int *info );
+void LAPACK_spptri( char* uplo, lapack_int* n, float* ap, lapack_int *info );
+void LAPACK_dpptri( char* uplo, lapack_int* n, double* ap, lapack_int *info );
+void LAPACK_cpptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    lapack_int *info );
+void LAPACK_zpptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    lapack_int *info );
+void LAPACK_ssytri( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    const lapack_int* ipiv, float* work, lapack_int *info );
+void LAPACK_dsytri( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    const lapack_int* ipiv, double* work, lapack_int *info );
+void LAPACK_csytri( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, const lapack_int* ipiv,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zsytri( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, const lapack_int* ipiv,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_chetri( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, const lapack_int* ipiv,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zhetri( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, const lapack_int* ipiv,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_ssptri( char* uplo, lapack_int* n, float* ap,
+                    const lapack_int* ipiv, float* work, lapack_int *info );
+void LAPACK_dsptri( char* uplo, lapack_int* n, double* ap,
+                    const lapack_int* ipiv, double* work, lapack_int *info );
+void LAPACK_csptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    const lapack_int* ipiv, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zsptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    const lapack_int* ipiv, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_chptri( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    const lapack_int* ipiv, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zhptri( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    const lapack_int* ipiv, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_strtri( char* uplo, char* diag, lapack_int* n, float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_dtrtri( char* uplo, char* diag, lapack_int* n, double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_ctrtri( char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_ztrtri( char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dtftri( char* transr, char* uplo, char* diag, lapack_int* n,
+                    double* a, lapack_int *info );
+void LAPACK_stftri( char* transr, char* uplo, char* diag, lapack_int* n,
+                    float* a, lapack_int *info );
+void LAPACK_ztftri( char* transr, char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_double* a, lapack_int *info );
+void LAPACK_ctftri( char* transr, char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_float* a, lapack_int *info );
+void LAPACK_stptri( char* uplo, char* diag, lapack_int* n, float* ap,
+                    lapack_int *info );
+void LAPACK_dtptri( char* uplo, char* diag, lapack_int* n, double* ap,
+                    lapack_int *info );
+void LAPACK_ctptri( char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_float* ap, lapack_int *info );
+void LAPACK_ztptri( char* uplo, char* diag, lapack_int* n,
+                    lapack_complex_double* ap, lapack_int *info );
+void LAPACK_sgeequ( lapack_int* m, lapack_int* n, const float* a,
+                    lapack_int* lda, float* r, float* c, float* rowcnd,
+                    float* colcnd, float* amax, lapack_int *info );
+void LAPACK_dgeequ( lapack_int* m, lapack_int* n, const double* a,
+                    lapack_int* lda, double* r, double* c, double* rowcnd,
+                    double* colcnd, double* amax, lapack_int *info );
+void LAPACK_cgeequ( lapack_int* m, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, float* r, float* c, float* rowcnd,
+                    float* colcnd, float* amax, lapack_int *info );
+void LAPACK_zgeequ( lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda, double* r,
+                    double* c, double* rowcnd, double* colcnd, double* amax,
+                    lapack_int *info );
+void LAPACK_dgeequb( lapack_int* m, lapack_int* n, const double* a,
+                     lapack_int* lda, double* r, double* c, double* rowcnd,
+                     double* colcnd, double* amax, lapack_int *info );
+void LAPACK_sgeequb( lapack_int* m, lapack_int* n, const float* a,
+                     lapack_int* lda, float* r, float* c, float* rowcnd,
+                     float* colcnd, float* amax, lapack_int *info );
+void LAPACK_zgeequb( lapack_int* m, lapack_int* n,
+                     const lapack_complex_double* a, lapack_int* lda, double* r,
+                     double* c, double* rowcnd, double* colcnd, double* amax,
+                     lapack_int *info );
+void LAPACK_cgeequb( lapack_int* m, lapack_int* n,
+                     const lapack_complex_float* a, lapack_int* lda, float* r,
+                     float* c, float* rowcnd, float* colcnd, float* amax,
+                     lapack_int *info );
+void LAPACK_sgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const float* ab, lapack_int* ldab, float* r,
+                    float* c, float* rowcnd, float* colcnd, float* amax,
+                    lapack_int *info );
+void LAPACK_dgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const double* ab, lapack_int* ldab,
+                    double* r, double* c, double* rowcnd, double* colcnd,
+                    double* amax, lapack_int *info );
+void LAPACK_cgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const lapack_complex_float* ab,
+                    lapack_int* ldab, float* r, float* c, float* rowcnd,
+                    float* colcnd, float* amax, lapack_int *info );
+void LAPACK_zgbequ( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const lapack_complex_double* ab,
+                    lapack_int* ldab, double* r, double* c, double* rowcnd,
+                    double* colcnd, double* amax, lapack_int *info );
+void LAPACK_dgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, const double* ab, lapack_int* ldab,
+                     double* r, double* c, double* rowcnd, double* colcnd,
+                     double* amax, lapack_int *info );
+void LAPACK_sgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, const float* ab, lapack_int* ldab,
+                     float* r, float* c, float* rowcnd, float* colcnd,
+                     float* amax, lapack_int *info );
+void LAPACK_zgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, const lapack_complex_double* ab,
+                     lapack_int* ldab, double* r, double* c, double* rowcnd,
+                     double* colcnd, double* amax, lapack_int *info );
+void LAPACK_cgbequb( lapack_int* m, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, const lapack_complex_float* ab,
+                     lapack_int* ldab, float* r, float* c, float* rowcnd,
+                     float* colcnd, float* amax, lapack_int *info );
+void LAPACK_spoequ( lapack_int* n, const float* a, lapack_int* lda, float* s,
+                    float* scond, float* amax, lapack_int *info );
+void LAPACK_dpoequ( lapack_int* n, const double* a, lapack_int* lda, double* s,
+                    double* scond, double* amax, lapack_int *info );
+void LAPACK_cpoequ( lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, float* s, float* scond, float* amax,
+                    lapack_int *info );
+void LAPACK_zpoequ( lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, double* s, double* scond, double* amax,
+                    lapack_int *info );
+void LAPACK_dpoequb( lapack_int* n, const double* a, lapack_int* lda, double* s,
+                     double* scond, double* amax, lapack_int *info );
+void LAPACK_spoequb( lapack_int* n, const float* a, lapack_int* lda, float* s,
+                     float* scond, float* amax, lapack_int *info );
+void LAPACK_zpoequb( lapack_int* n, const lapack_complex_double* a,
+                     lapack_int* lda, double* s, double* scond, double* amax,
+                     lapack_int *info );
+void LAPACK_cpoequb( lapack_int* n, const lapack_complex_float* a,
+                     lapack_int* lda, float* s, float* scond, float* amax,
+                     lapack_int *info );
+void LAPACK_sppequ( char* uplo, lapack_int* n, const float* ap, float* s,
+                    float* scond, float* amax, lapack_int *info );
+void LAPACK_dppequ( char* uplo, lapack_int* n, const double* ap, double* s,
+                    double* scond, double* amax, lapack_int *info );
+void LAPACK_cppequ( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    float* s, float* scond, float* amax, lapack_int *info );
+void LAPACK_zppequ( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    double* s, double* scond, double* amax, lapack_int *info );
+void LAPACK_spbequ( char* uplo, lapack_int* n, lapack_int* kd, const float* ab,
+                    lapack_int* ldab, float* s, float* scond, float* amax,
+                    lapack_int *info );
+void LAPACK_dpbequ( char* uplo, lapack_int* n, lapack_int* kd, const double* ab,
+                    lapack_int* ldab, double* s, double* scond, double* amax,
+                    lapack_int *info );
+void LAPACK_cpbequ( char* uplo, lapack_int* n, lapack_int* kd,
+                    const lapack_complex_float* ab, lapack_int* ldab, float* s,
+                    float* scond, float* amax, lapack_int *info );
+void LAPACK_zpbequ( char* uplo, lapack_int* n, lapack_int* kd,
+                    const lapack_complex_double* ab, lapack_int* ldab,
+                    double* s, double* scond, double* amax, lapack_int *info );
+void LAPACK_dsyequb( char* uplo, lapack_int* n, const double* a,
+                     lapack_int* lda, double* s, double* scond, double* amax,
+                     double* work, lapack_int *info );
+void LAPACK_ssyequb( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
+                     float* s, float* scond, float* amax, float* work,
+                     lapack_int *info );
+void LAPACK_zsyequb( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                     lapack_int* lda, double* s, double* scond, double* amax,
+                     lapack_complex_double* work, lapack_int *info );
+void LAPACK_csyequb( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                     lapack_int* lda, float* s, float* scond, float* amax,
+                     lapack_complex_float* work, lapack_int *info );
+void LAPACK_zheequb( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                     lapack_int* lda, double* s, double* scond, double* amax,
+                     lapack_complex_double* work, lapack_int *info );
+void LAPACK_cheequb( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                     lapack_int* lda, float* s, float* scond, float* amax,
+                     lapack_complex_float* work, lapack_int *info );
+void LAPACK_sgesv( lapack_int* n, lapack_int* nrhs, float* a, lapack_int* lda,
+                   lapack_int* ipiv, float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,
+                   lapack_int* ipiv, double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_cgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* a,
+                   lapack_int* lda, lapack_int* ipiv, lapack_complex_float* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_zgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,
+                   lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_dsgesv( lapack_int* n, lapack_int* nrhs, double* a, lapack_int* lda,
+                    lapack_int* ipiv, double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* work, float* swork,
+                    lapack_int* iter, lapack_int *info );
+void LAPACK_zcgesv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ipiv, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    lapack_complex_double* work, lapack_complex_float* swork,
+                    double* rwork, lapack_int* iter, lapack_int *info );
+void LAPACK_sgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,
+                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,
+                    lapack_int* ipiv, char* equed, double* r, double* c,
+                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* af, lapack_int* ldaf,
+                    lapack_int* ipiv, char* equed, float* r, float* c,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zgesvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* af, lapack_int* ldaf,
+                    lapack_int* ipiv, char* equed, double* r, double* c,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_dgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, double* r, double* c,
+                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,
+                     double* rcond, double* rpvgrw, double* berr,
+                     lapack_int* n_err_bnds, double* err_bnds_norm,
+                     double* err_bnds_comp, lapack_int* nparams, double* params,
+                     double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_sgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, float* r, float* c,
+                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                     float* rcond, float* rpvgrw, float* berr,
+                     lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_zgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_double* a, lapack_int* lda,
+                     lapack_complex_double* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, double* r, double* c,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cgesvxx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_float* a, lapack_int* lda,
+                     lapack_complex_float* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, float* r, float* c,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
+                   lapack_int* nrhs, float* ab, lapack_int* ldab,
+                   lapack_int* ipiv, float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
+                   lapack_int* nrhs, double* ab, lapack_int* ldab,
+                   lapack_int* ipiv, double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_cgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
+                   lapack_int* nrhs, lapack_complex_float* ab, lapack_int* ldab,
+                   lapack_int* ipiv, lapack_complex_float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_zgbsv( lapack_int* n, lapack_int* kl, lapack_int* ku,
+                   lapack_int* nrhs, lapack_complex_double* ab,
+                   lapack_int* ldab, lapack_int* ipiv, lapack_complex_double* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_sgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_int* nrhs, float* ab,
+                    lapack_int* ldab, float* afb, lapack_int* ldafb,
+                    lapack_int* ipiv, char* equed, float* r, float* c, float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_int* nrhs, double* ab,
+                    lapack_int* ldab, double* afb, lapack_int* ldafb,
+                    lapack_int* ipiv, char* equed, double* r, double* c,
+                    double* b, lapack_int* ldb, double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,
+                    lapack_int* ldab, lapack_complex_float* afb,
+                    lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,
+                    float* c, lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zgbsvx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, lapack_int* nrhs, lapack_complex_double* ab,
+                    lapack_int* ldab, lapack_complex_double* afb,
+                    lapack_int* ldafb, lapack_int* ipiv, char* equed, double* r,
+                    double* c, lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_dgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs, double* ab,
+                     lapack_int* ldab, double* afb, lapack_int* ldafb,
+                     lapack_int* ipiv, char* equed, double* r, double* c,
+                     double* b, lapack_int* ldb, double* x, lapack_int* ldx,
+                     double* rcond, double* rpvgrw, double* berr,
+                     lapack_int* n_err_bnds, double* err_bnds_norm,
+                     double* err_bnds_comp, lapack_int* nparams, double* params,
+                     double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_sgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs, float* ab,
+                     lapack_int* ldab, float* afb, lapack_int* ldafb,
+                     lapack_int* ipiv, char* equed, float* r, float* c,
+                     float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                     float* rcond, float* rpvgrw, float* berr,
+                     lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_zgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs,
+                     lapack_complex_double* ab, lapack_int* ldab,
+                     lapack_complex_double* afb, lapack_int* ldafb,
+                     lapack_int* ipiv, char* equed, double* r, double* c,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cgbsvxx( char* fact, char* trans, lapack_int* n, lapack_int* kl,
+                     lapack_int* ku, lapack_int* nrhs, lapack_complex_float* ab,
+                     lapack_int* ldab, lapack_complex_float* afb,
+                     lapack_int* ldafb, lapack_int* ipiv, char* equed, float* r,
+                     float* c, lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sgtsv( lapack_int* n, lapack_int* nrhs, float* dl, float* d,
+                   float* du, float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_dgtsv( lapack_int* n, lapack_int* nrhs, double* dl, double* d,
+                   double* du, double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_cgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_float* dl,
+                   lapack_complex_float* d, lapack_complex_float* du,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_zgtsv( lapack_int* n, lapack_int* nrhs, lapack_complex_double* dl,
+                   lapack_complex_double* d, lapack_complex_double* du,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_sgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    const float* dl, const float* d, const float* du,
+                    float* dlf, float* df, float* duf, float* du2,
+                    lapack_int* ipiv, const float* b, lapack_int* ldb, float* x,
+                    lapack_int* ldx, float* rcond, float* ferr, float* berr,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    const double* dl, const double* d, const double* du,
+                    double* dlf, double* df, double* duf, double* du2,
+                    lapack_int* ipiv, const double* b, lapack_int* ldb,
+                    double* x, lapack_int* ldx, double* rcond, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* dl,
+                    const lapack_complex_float* d,
+                    const lapack_complex_float* du, lapack_complex_float* dlf,
+                    lapack_complex_float* df, lapack_complex_float* duf,
+                    lapack_complex_float* du2, lapack_int* ipiv,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zgtsvx( char* fact, char* trans, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* dl,
+                    const lapack_complex_double* d,
+                    const lapack_complex_double* du, lapack_complex_double* dlf,
+                    lapack_complex_double* df, lapack_complex_double* duf,
+                    lapack_complex_double* du2, lapack_int* ipiv,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_sposv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,
+                   lapack_int* lda, float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
+                   lapack_int* lda, double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_cposv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_zposv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dsposv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* work, float* swork,
+                    lapack_int* iter, lapack_int *info );
+void LAPACK_zcposv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx,
+                    lapack_complex_double* work, lapack_complex_float* swork,
+                    double* rwork, lapack_int* iter, lapack_int *info );
+void LAPACK_sposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    float* a, lapack_int* lda, float* af, lapack_int* ldaf,
+                    char* equed, float* s, float* b, lapack_int* ldb, float* x,
+                    lapack_int* ldx, float* rcond, float* ferr, float* berr,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    double* a, lapack_int* lda, double* af, lapack_int* ldaf,
+                    char* equed, double* s, double* b, lapack_int* ldb,
+                    double* x, lapack_int* ldx, double* rcond, double* ferr,
+                    double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* af, lapack_int* ldaf, char* equed,
+                    float* s, lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zposvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* af, lapack_int* ldaf, char* equed,
+                    double* s, lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_dposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
+                     char* equed, double* s, double* b, lapack_int* ldb,
+                     double* x, lapack_int* ldx, double* rcond, double* rpvgrw,
+                     double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params, double* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_sposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
+                     char* equed, float* s, float* b, lapack_int* ldb, float* x,
+                     lapack_int* ldx, float* rcond, float* rpvgrw, float* berr,
+                     lapack_int* n_err_bnds, float* err_bnds_norm,
+                     float* err_bnds_comp, lapack_int* nparams, float* params,
+                     float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_zposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_double* a, lapack_int* lda,
+                     lapack_complex_double* af, lapack_int* ldaf, char* equed,
+                     double* s, lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_cposvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_float* a, lapack_int* lda,
+                     lapack_complex_float* af, lapack_int* ldaf, char* equed,
+                     float* s, lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sppsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,
+                   float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_dppsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,
+                   double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_cppsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* ap, lapack_complex_float* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_zppsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* ap, lapack_complex_double* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_sppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    float* ap, float* afp, char* equed, float* s, float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    double* ap, double* afp, char* equed, double* s, double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* ap, lapack_complex_float* afp,
+                    char* equed, float* s, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zppsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* ap, lapack_complex_double* afp,
+                    char* equed, double* s, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_spbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                   float* ab, lapack_int* ldab, float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                   double* ab, lapack_int* ldab, double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_cpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                   lapack_complex_float* ab, lapack_int* ldab,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_zpbsv( char* uplo, lapack_int* n, lapack_int* kd, lapack_int* nrhs,
+                   lapack_complex_double* ab, lapack_int* ldab,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_spbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_int* nrhs, float* ab, lapack_int* ldab, float* afb,
+                    lapack_int* ldafb, char* equed, float* s, float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_int* nrhs, double* ab, lapack_int* ldab, double* afb,
+                    lapack_int* ldafb, char* equed, double* s, double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_cpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_int* nrhs, lapack_complex_float* ab,
+                    lapack_int* ldab, lapack_complex_float* afb,
+                    lapack_int* ldafb, char* equed, float* s,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zpbsvx( char* fact, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_int* nrhs, lapack_complex_double* ab,
+                    lapack_int* ldab, lapack_complex_double* afb,
+                    lapack_int* ldafb, char* equed, double* s,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_sptsv( lapack_int* n, lapack_int* nrhs, float* d, float* e,
+                   float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_dptsv( lapack_int* n, lapack_int* nrhs, double* d, double* e,
+                   double* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_cptsv( lapack_int* n, lapack_int* nrhs, float* d,
+                   lapack_complex_float* e, lapack_complex_float* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_zptsv( lapack_int* n, lapack_int* nrhs, double* d,
+                   lapack_complex_double* e, lapack_complex_double* b,
+                   lapack_int* ldb, lapack_int *info );
+void LAPACK_sptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,
+                    const float* e, float* df, float* ef, const float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int *info );
+void LAPACK_dptsvx( char* fact, lapack_int* n, lapack_int* nrhs,
+                    const double* d, const double* e, double* df, double* ef,
+                    const double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* rcond, double* ferr, double* berr,
+                    double* work, lapack_int *info );
+void LAPACK_cptsvx( char* fact, lapack_int* n, lapack_int* nrhs, const float* d,
+                    const lapack_complex_float* e, float* df,
+                    lapack_complex_float* ef, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zptsvx( char* fact, lapack_int* n, lapack_int* nrhs,
+                    const double* d, const lapack_complex_double* e, double* df,
+                    lapack_complex_double* ef, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_ssysv( char* uplo, lapack_int* n, lapack_int* nrhs, float* a,
+                   lapack_int* lda, lapack_int* ipiv, float* b, lapack_int* ldb,
+                   float* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_dsysv( char* uplo, lapack_int* n, lapack_int* nrhs, double* a,
+                   lapack_int* lda, lapack_int* ipiv, double* b,
+                   lapack_int* ldb, double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_csysv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,
+                   lapack_complex_float* b, lapack_int* ldb,
+                   lapack_complex_float* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_zsysv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_ssysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* a, lapack_int* lda, float* af,
+                    lapack_int* ldaf, lapack_int* ipiv, const float* b,
+                    lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                    float* ferr, float* berr, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* a, lapack_int* lda, double* af,
+                    lapack_int* ldaf, lapack_int* ipiv, const double* b,
+                    lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
+                    double* ferr, double* berr, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_csysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* af, lapack_int* ldaf,
+                    lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int *info );
+void LAPACK_zsysvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* af, lapack_int* ldaf,
+                    lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_dsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     double* a, lapack_int* lda, double* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, double* s, double* b,
+                     lapack_int* ldb, double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params, double* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_ssysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     float* a, lapack_int* lda, float* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, float* s, float* b,
+                     lapack_int* ldb, float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params, float* work,
+                     lapack_int* iwork, lapack_int *info );
+void LAPACK_zsysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_double* a, lapack_int* lda,
+                     lapack_complex_double* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, double* s,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_csysvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_float* a, lapack_int* lda,
+                     lapack_complex_float* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, float* s,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_chesv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* a, lapack_int* lda, lapack_int* ipiv,
+                   lapack_complex_float* b, lapack_int* ldb,
+                   lapack_complex_float* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_zhesv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* a, lapack_int* lda, lapack_int* ipiv,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_chesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* af, lapack_int* ldaf,
+                    lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int *info );
+void LAPACK_zhesvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* af, lapack_int* ldaf,
+                    lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_zhesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_double* a, lapack_int* lda,
+                     lapack_complex_double* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, double* s,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* x, lapack_int* ldx, double* rcond,
+                     double* rpvgrw, double* berr, lapack_int* n_err_bnds,
+                     double* err_bnds_norm, double* err_bnds_comp,
+                     lapack_int* nparams, double* params,
+                     lapack_complex_double* work, double* rwork,
+                     lapack_int *info );
+void LAPACK_chesvxx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                     lapack_complex_float* a, lapack_int* lda,
+                     lapack_complex_float* af, lapack_int* ldaf,
+                     lapack_int* ipiv, char* equed, float* s,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* x, lapack_int* ldx, float* rcond,
+                     float* rpvgrw, float* berr, lapack_int* n_err_bnds,
+                     float* err_bnds_norm, float* err_bnds_comp,
+                     lapack_int* nparams, float* params,
+                     lapack_complex_float* work, float* rwork,
+                     lapack_int *info );
+void LAPACK_sspsv( char* uplo, lapack_int* n, lapack_int* nrhs, float* ap,
+                   lapack_int* ipiv, float* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_dspsv( char* uplo, lapack_int* n, lapack_int* nrhs, double* ap,
+                   lapack_int* ipiv, double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_cspsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* ap, lapack_int* ipiv,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_zspsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* ap, lapack_int* ipiv,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_sspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const float* ap, float* afp, lapack_int* ipiv,
+                    const float* b, lapack_int* ldb, float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr, float* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const double* ap, double* afp, lapack_int* ipiv,
+                    const double* b, lapack_int* ldb, double* x,
+                    lapack_int* ldx, double* rcond, double* ferr, double* berr,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap, lapack_complex_float* afp,
+                    lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zspsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap, lapack_complex_double* afp,
+                    lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_chpsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* ap, lapack_int* ipiv,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int *info );
+void LAPACK_zhpsv( char* uplo, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* ap, lapack_int* ipiv,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_int *info );
+void LAPACK_chpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_float* ap, lapack_complex_float* afp,
+                    lapack_int* ipiv, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* x, lapack_int* ldx,
+                    float* rcond, float* ferr, float* berr,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zhpsvx( char* fact, char* uplo, lapack_int* n, lapack_int* nrhs,
+                    const lapack_complex_double* ap, lapack_complex_double* afp,
+                    lapack_int* ipiv, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* x, lapack_int* ldx,
+                    double* rcond, double* ferr, double* berr,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sgeqrf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgeqrf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgeqrf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zgeqrf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgeqpf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* jpvt, float* tau, float* work,
+                    lapack_int *info );
+void LAPACK_dgeqpf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* jpvt, double* tau, double* work,
+                    lapack_int *info );
+void LAPACK_cgeqpf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* jpvt,
+                    lapack_complex_float* tau, lapack_complex_float* work,
+                    float* rwork, lapack_int *info );
+void LAPACK_zgeqpf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* jpvt,
+                    lapack_complex_double* tau, lapack_complex_double* work,
+                    double* rwork, lapack_int *info );
+void LAPACK_sgeqp3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* jpvt, float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dgeqp3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* jpvt, double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cgeqp3( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* jpvt,
+                    lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int *info );
+void LAPACK_zgeqp3( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* jpvt,
+                    lapack_complex_double* tau, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int *info );
+void LAPACK_sorgqr( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
+                    lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorgqr( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
+                    lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sormqr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const float* a, lapack_int* lda,
+                    const float* tau, float* c, lapack_int* ldc, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dormqr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const double* a, lapack_int* lda,
+                    const double* tau, double* c, lapack_int* ldc, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cungqr( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zungqr( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmqr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgelqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgelqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgelqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zgelqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sorglq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
+                    lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorglq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
+                    lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sormlq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const float* a, lapack_int* lda,
+                    const float* tau, float* c, lapack_int* ldc, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dormlq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const double* a, lapack_int* lda,
+                    const double* tau, double* c, lapack_int* ldc, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cunglq( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zunglq( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmlq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgeqlf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgeqlf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgeqlf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zgeqlf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sorgql( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
+                    lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorgql( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
+                    lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cungql( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zungql( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sormql( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const float* a, lapack_int* lda,
+                    const float* tau, float* c, lapack_int* ldc, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dormql( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const double* a, lapack_int* lda,
+                    const double* tau, double* c, lapack_int* ldc, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cunmql( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmql( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgerqf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgerqf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgerqf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zgerqf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sorgrq( lapack_int* m, lapack_int* n, lapack_int* k, float* a,
+                    lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorgrq( lapack_int* m, lapack_int* n, lapack_int* k, double* a,
+                    lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cungrq( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zungrq( lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sormrq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const float* a, lapack_int* lda,
+                    const float* tau, float* c, lapack_int* ldc, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dormrq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const double* a, lapack_int* lda,
+                    const double* tau, double* c, lapack_int* ldc, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmrq( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_stzrzf( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dtzrzf( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_ctzrzf( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_ztzrzf( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sormrz( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, lapack_int* l, const float* a,
+                    lapack_int* lda, const float* tau, float* c,
+                    lapack_int* ldc, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dormrz( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, lapack_int* l, const double* a,
+                    lapack_int* lda, const double* tau, double* c,
+                    lapack_int* ldc, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, lapack_int* l, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmrz( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* k, lapack_int* l,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau, lapack_complex_double* c,
+                    lapack_int* ldc, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sggqrf( lapack_int* n, lapack_int* m, lapack_int* p, float* a,
+                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,
+                    float* taub, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dggqrf( lapack_int* n, lapack_int* m, lapack_int* p, double* a,
+                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,
+                    double* taub, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cggqrf( lapack_int* n, lapack_int* m, lapack_int* p,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* taua, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* taub,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zggqrf( lapack_int* n, lapack_int* m, lapack_int* p,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* taua, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* taub,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sggrqf( lapack_int* m, lapack_int* p, lapack_int* n, float* a,
+                    lapack_int* lda, float* taua, float* b, lapack_int* ldb,
+                    float* taub, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dggrqf( lapack_int* m, lapack_int* p, lapack_int* n, double* a,
+                    lapack_int* lda, double* taua, double* b, lapack_int* ldb,
+                    double* taub, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cggrqf( lapack_int* m, lapack_int* p, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* taua, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* taub,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zggrqf( lapack_int* m, lapack_int* p, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* taua, lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* taub,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgebrd( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* d, float* e, float* tauq, float* taup, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dgebrd( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* d, double* e, double* tauq, double* taup,
+                    double* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_cgebrd( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, float* d, float* e,
+                    lapack_complex_float* tauq, lapack_complex_float* taup,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zgebrd( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, double* d, double* e,
+                    lapack_complex_double* tauq, lapack_complex_double* taup,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
+                    lapack_int* kl, lapack_int* ku, float* ab, lapack_int* ldab,
+                    float* d, float* e, float* q, lapack_int* ldq, float* pt,
+                    lapack_int* ldpt, float* c, lapack_int* ldc, float* work,
+                    lapack_int *info );
+void LAPACK_dgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
+                    lapack_int* kl, lapack_int* ku, double* ab,
+                    lapack_int* ldab, double* d, double* e, double* q,
+                    lapack_int* ldq, double* pt, lapack_int* ldpt, double* c,
+                    lapack_int* ldc, double* work, lapack_int *info );
+void LAPACK_cgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
+                    lapack_int* kl, lapack_int* ku, lapack_complex_float* ab,
+                    lapack_int* ldab, float* d, float* e,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* pt, lapack_int* ldpt,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgbbrd( char* vect, lapack_int* m, lapack_int* n, lapack_int* ncc,
+                    lapack_int* kl, lapack_int* ku, lapack_complex_double* ab,
+                    lapack_int* ldab, double* d, double* e,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* pt, lapack_int* ldpt,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_sorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
+                    float* a, lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorgbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
+                    double* a, lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sormbr( char* vect, char* side, char* trans, lapack_int* m,
+                    lapack_int* n, lapack_int* k, const float* a,
+                    lapack_int* lda, const float* tau, float* c,
+                    lapack_int* ldc, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dormbr( char* vect, char* side, char* trans, lapack_int* m,
+                    lapack_int* n, lapack_int* k, const double* a,
+                    lapack_int* lda, const double* tau, double* c,
+                    lapack_int* ldc, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zungbr( char* vect, lapack_int* m, lapack_int* n, lapack_int* k,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmbr( char* vect, char* side, char* trans, lapack_int* m,
+                    lapack_int* n, lapack_int* k, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmbr( char* vect, char* side, char* trans, lapack_int* m,
+                    lapack_int* n, lapack_int* k,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau, lapack_complex_double* c,
+                    lapack_int* ldc, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
+                    lapack_int* nru, lapack_int* ncc, float* d, float* e,
+                    float* vt, lapack_int* ldvt, float* u, lapack_int* ldu,
+                    float* c, lapack_int* ldc, float* work, lapack_int *info );
+void LAPACK_dbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
+                    lapack_int* nru, lapack_int* ncc, double* d, double* e,
+                    double* vt, lapack_int* ldvt, double* u, lapack_int* ldu,
+                    double* c, lapack_int* ldc, double* work,
+                    lapack_int *info );
+void LAPACK_cbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
+                    lapack_int* nru, lapack_int* ncc, float* d, float* e,
+                    lapack_complex_float* vt, lapack_int* ldvt,
+                    lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* c, lapack_int* ldc, float* work,
+                    lapack_int *info );
+void LAPACK_zbdsqr( char* uplo, lapack_int* n, lapack_int* ncvt,
+                    lapack_int* nru, lapack_int* ncc, double* d, double* e,
+                    lapack_complex_double* vt, lapack_int* ldvt,
+                    lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* c, lapack_int* ldc, double* work,
+                    lapack_int *info );
+void LAPACK_sbdsdc( char* uplo, char* compq, lapack_int* n, float* d, float* e,
+                    float* u, lapack_int* ldu, float* vt, lapack_int* ldvt,
+                    float* q, lapack_int* iq, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dbdsdc( char* uplo, char* compq, lapack_int* n, double* d,
+                    double* e, double* u, lapack_int* ldu, double* vt,
+                    lapack_int* ldvt, double* q, lapack_int* iq, double* work,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ssytrd( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    float* d, float* e, float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dsytrd( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    double* d, double* e, double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sorgtr( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    const float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dorgtr( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    const double* tau, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_sormtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const float* a, lapack_int* lda,
+                    const float* tau, float* c, lapack_int* ldc, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dormtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const double* a, lapack_int* lda,
+                    const double* tau, double* c, lapack_int* ldc, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_chetrd( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, float* d, float* e,
+                    lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zhetrd( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, double* d, double* e,
+                    lapack_complex_double* tau, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cungtr( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zungtr( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_zunmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_ssptrd( char* uplo, lapack_int* n, float* ap, float* d, float* e,
+                    float* tau, lapack_int *info );
+void LAPACK_dsptrd( char* uplo, lapack_int* n, double* ap, double* d, double* e,
+                    double* tau, lapack_int *info );
+void LAPACK_sopgtr( char* uplo, lapack_int* n, const float* ap,
+                    const float* tau, float* q, lapack_int* ldq, float* work,
+                    lapack_int *info );
+void LAPACK_dopgtr( char* uplo, lapack_int* n, const double* ap,
+                    const double* tau, double* q, lapack_int* ldq, double* work,
+                    lapack_int *info );
+void LAPACK_sopmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const float* ap, const float* tau, float* c,
+                    lapack_int* ldc, float* work, lapack_int *info );
+void LAPACK_dopmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const double* ap, const double* tau,
+                    double* c, lapack_int* ldc, double* work,
+                    lapack_int *info );
+void LAPACK_chptrd( char* uplo, lapack_int* n, lapack_complex_float* ap,
+                    float* d, float* e, lapack_complex_float* tau,
+                    lapack_int *info );
+void LAPACK_zhptrd( char* uplo, lapack_int* n, lapack_complex_double* ap,
+                    double* d, double* e, lapack_complex_double* tau,
+                    lapack_int *info );
+void LAPACK_cupgtr( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    const lapack_complex_float* tau, lapack_complex_float* q,
+                    lapack_int* ldq, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zupgtr( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    const lapack_complex_double* tau, lapack_complex_double* q,
+                    lapack_int* ldq, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_cupmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const lapack_complex_float* ap,
+                    const lapack_complex_float* tau, lapack_complex_float* c,
+                    lapack_int* ldc, lapack_complex_float* work,
+                    lapack_int *info );
+void LAPACK_zupmtr( char* side, char* uplo, char* trans, lapack_int* m,
+                    lapack_int* n, const lapack_complex_double* ap,
+                    const lapack_complex_double* tau, lapack_complex_double* c,
+                    lapack_int* ldc, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_ssbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
+                    float* ab, lapack_int* ldab, float* d, float* e, float* q,
+                    lapack_int* ldq, float* work, lapack_int *info );
+void LAPACK_dsbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
+                    double* ab, lapack_int* ldab, double* d, double* e,
+                    double* q, lapack_int* ldq, double* work,
+                    lapack_int *info );
+void LAPACK_chbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_float* ab, lapack_int* ldab, float* d,
+                    float* e, lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zhbtrd( char* vect, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_double* ab, lapack_int* ldab, double* d,
+                    double* e, lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_ssterf( lapack_int* n, float* d, float* e, lapack_int *info );
+void LAPACK_dsterf( lapack_int* n, double* d, double* e, lapack_int *info );
+void LAPACK_ssteqr( char* compz, lapack_int* n, float* d, float* e, float* z,
+                    lapack_int* ldz, float* work, lapack_int *info );
+void LAPACK_dsteqr( char* compz, lapack_int* n, double* d, double* e, double* z,
+                    lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_csteqr( char* compz, lapack_int* n, float* d, float* e,
+                    lapack_complex_float* z, lapack_int* ldz, float* work,
+                    lapack_int *info );
+void LAPACK_zsteqr( char* compz, lapack_int* n, double* d, double* e,
+                    lapack_complex_double* z, lapack_int* ldz, double* work,
+                    lapack_int *info );
+void LAPACK_sstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    lapack_int* m, float* w, float* z, lapack_int* ldz,
+                    lapack_int* nzc, lapack_int* isuppz, lapack_logical* tryrac,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_dstemr( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, lapack_int* m, double* w, double* z,
+                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,
+                    lapack_logical* tryrac, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_cstemr( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_int* nzc, lapack_int* isuppz,
+                    lapack_logical* tryrac, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_zstemr( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, lapack_int* m, double* w,
+                    lapack_complex_double* z, lapack_int* ldz, lapack_int* nzc,
+                    lapack_int* isuppz, lapack_logical* tryrac, double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_sstedc( char* compz, lapack_int* n, float* d, float* e, float* z,
+                    lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dstedc( char* compz, lapack_int* n, double* d, double* e, double* z,
+                    lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_cstedc( char* compz, lapack_int* n, float* d, float* e,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zstedc( char* compz, lapack_int* n, double* d, double* e,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_sstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w, float* z,
+                    lapack_int* ldz, lapack_int* isuppz, float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_dstegr( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, lapack_int* isuppz,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_cstegr( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_int* isuppz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_zstegr( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_int* isuppz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_spteqr( char* compz, lapack_int* n, float* d, float* e, float* z,
+                    lapack_int* ldz, float* work, lapack_int *info );
+void LAPACK_dpteqr( char* compz, lapack_int* n, double* d, double* e, double* z,
+                    lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_cpteqr( char* compz, lapack_int* n, float* d, float* e,
+                    lapack_complex_float* z, lapack_int* ldz, float* work,
+                    lapack_int *info );
+void LAPACK_zpteqr( char* compz, lapack_int* n, double* d, double* e,
+                    lapack_complex_double* z, lapack_int* ldz, double* work,
+                    lapack_int *info );
+void LAPACK_sstebz( char* range, char* order, lapack_int* n, float* vl,
+                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
+                    const float* d, const float* e, lapack_int* m,
+                    lapack_int* nsplit, float* w, lapack_int* iblock,
+                    lapack_int* isplit, float* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dstebz( char* range, char* order, lapack_int* n, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    const double* d, const double* e, lapack_int* m,
+                    lapack_int* nsplit, double* w, lapack_int* iblock,
+                    lapack_int* isplit, double* work, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_sstein( lapack_int* n, const float* d, const float* e,
+                    lapack_int* m, const float* w, const lapack_int* iblock,
+                    const lapack_int* isplit, float* z, lapack_int* ldz,
+                    float* work, lapack_int* iwork, lapack_int* ifailv,
+                    lapack_int *info );
+void LAPACK_dstein( lapack_int* n, const double* d, const double* e,
+                    lapack_int* m, const double* w, const lapack_int* iblock,
+                    const lapack_int* isplit, double* z, lapack_int* ldz,
+                    double* work, lapack_int* iwork, lapack_int* ifailv,
+                    lapack_int *info );
+void LAPACK_cstein( lapack_int* n, const float* d, const float* e,
+                    lapack_int* m, const float* w, const lapack_int* iblock,
+                    const lapack_int* isplit, lapack_complex_float* z,
+                    lapack_int* ldz, float* work, lapack_int* iwork,
+                    lapack_int* ifailv, lapack_int *info );
+void LAPACK_zstein( lapack_int* n, const double* d, const double* e,
+                    lapack_int* m, const double* w, const lapack_int* iblock,
+                    const lapack_int* isplit, lapack_complex_double* z,
+                    lapack_int* ldz, double* work, lapack_int* iwork,
+                    lapack_int* ifailv, lapack_int *info );
+void LAPACK_sdisna( char* job, lapack_int* m, lapack_int* n, const float* d,
+                    float* sep, lapack_int *info );
+void LAPACK_ddisna( char* job, lapack_int* m, lapack_int* n, const double* d,
+                    double* sep, lapack_int *info );
+void LAPACK_ssygst( lapack_int* itype, char* uplo, lapack_int* n, float* a,
+                    lapack_int* lda, const float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_dsygst( lapack_int* itype, char* uplo, lapack_int* n, double* a,
+                    lapack_int* lda, const double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_chegst( lapack_int* itype, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_zhegst( lapack_int* itype, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int *info );
+void LAPACK_sspgst( lapack_int* itype, char* uplo, lapack_int* n, float* ap,
+                    const float* bp, lapack_int *info );
+void LAPACK_dspgst( lapack_int* itype, char* uplo, lapack_int* n, double* ap,
+                    const double* bp, lapack_int *info );
+void LAPACK_chpgst( lapack_int* itype, char* uplo, lapack_int* n,
+                    lapack_complex_float* ap, const lapack_complex_float* bp,
+                    lapack_int *info );
+void LAPACK_zhpgst( lapack_int* itype, char* uplo, lapack_int* n,
+                    lapack_complex_double* ap, const lapack_complex_double* bp,
+                    lapack_int *info );
+void LAPACK_ssbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, float* ab, lapack_int* ldab,
+                    const float* bb, lapack_int* ldbb, float* x,
+                    lapack_int* ldx, float* work, lapack_int *info );
+void LAPACK_dsbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, double* ab, lapack_int* ldab,
+                    const double* bb, lapack_int* ldbb, double* x,
+                    lapack_int* ldx, double* work, lapack_int *info );
+void LAPACK_chbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
+                    const lapack_complex_float* bb, lapack_int* ldbb,
+                    lapack_complex_float* x, lapack_int* ldx,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_zhbgst( char* vect, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
+                    const lapack_complex_double* bb, lapack_int* ldbb,
+                    lapack_complex_double* x, lapack_int* ldx,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_spbstf( char* uplo, lapack_int* n, lapack_int* kb, float* bb,
+                    lapack_int* ldbb, lapack_int *info );
+void LAPACK_dpbstf( char* uplo, lapack_int* n, lapack_int* kb, double* bb,
+                    lapack_int* ldbb, lapack_int *info );
+void LAPACK_cpbstf( char* uplo, lapack_int* n, lapack_int* kb,
+                    lapack_complex_float* bb, lapack_int* ldbb,
+                    lapack_int *info );
+void LAPACK_zpbstf( char* uplo, lapack_int* n, lapack_int* kb,
+                    lapack_complex_double* bb, lapack_int* ldbb,
+                    lapack_int *info );
+void LAPACK_sgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,
+                    lapack_int* lda, float* tau, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,
+                    lapack_int* lda, double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zgehrd( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* tau, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, float* a,
+                    lapack_int* lda, const float* tau, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dorghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi, double* a,
+                    lapack_int* lda, const double* tau, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sormhr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, const float* a,
+                    lapack_int* lda, const float* tau, float* c,
+                    lapack_int* ldc, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dormhr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, const double* a,
+                    lapack_int* lda, const double* tau, double* c,
+                    lapack_int* ldc, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
+                    lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zunghr( lapack_int* n, lapack_int* ilo, lapack_int* ihi,
+                    lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* tau, lapack_complex_float* c,
+                    lapack_int* ldc, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zunmhr( char* side, char* trans, lapack_int* m, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* tau, lapack_complex_double* c,
+                    lapack_int* ldc, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sgebal( char* job, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* ilo, lapack_int* ihi, float* scale,
+                    lapack_int *info );
+void LAPACK_dgebal( char* job, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* ilo, lapack_int* ihi, double* scale,
+                    lapack_int *info );
+void LAPACK_cgebal( char* job, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,
+                    float* scale, lapack_int *info );
+void LAPACK_zgebal( char* job, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ilo, lapack_int* ihi,
+                    double* scale, lapack_int *info );
+void LAPACK_sgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const float* scale, lapack_int* m,
+                    float* v, lapack_int* ldv, lapack_int *info );
+void LAPACK_dgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const double* scale, lapack_int* m,
+                    double* v, lapack_int* ldv, lapack_int *info );
+void LAPACK_cgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const float* scale, lapack_int* m,
+                    lapack_complex_float* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_zgebak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const double* scale, lapack_int* m,
+                    lapack_complex_double* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_shseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, float* h, lapack_int* ldh, float* wr,
+                    float* wi, float* z, lapack_int* ldz, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, double* h, lapack_int* ldh, double* wr,
+                    double* wi, double* z, lapack_int* ldz, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_chseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, lapack_complex_float* h, lapack_int* ldh,
+                    lapack_complex_float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zhseqr( char* job, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, lapack_complex_double* h, lapack_int* ldh,
+                    lapack_complex_double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_shsein( char* job, char* eigsrc, char* initv,
+                    lapack_logical* select, lapack_int* n, const float* h,
+                    lapack_int* ldh, float* wr, const float* wi, float* vl,
+                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, float* work,
+                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
+void LAPACK_dhsein( char* job, char* eigsrc, char* initv,
+                    lapack_logical* select, lapack_int* n, const double* h,
+                    lapack_int* ldh, double* wr, const double* wi, double* vl,
+                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, double* work,
+                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
+void LAPACK_chsein( char* job, char* eigsrc, char* initv,
+                    const lapack_logical* select, lapack_int* n,
+                    const lapack_complex_float* h, lapack_int* ldh,
+                    lapack_complex_float* w, lapack_complex_float* vl,
+                    lapack_int* ldvl, lapack_complex_float* vr,
+                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
+                    lapack_complex_float* work, float* rwork,
+                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
+void LAPACK_zhsein( char* job, char* eigsrc, char* initv,
+                    const lapack_logical* select, lapack_int* n,
+                    const lapack_complex_double* h, lapack_int* ldh,
+                    lapack_complex_double* w, lapack_complex_double* vl,
+                    lapack_int* ldvl, lapack_complex_double* vr,
+                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int* ifaill, lapack_int* ifailr, lapack_int *info );
+void LAPACK_strevc( char* side, char* howmny, lapack_logical* select,
+                    lapack_int* n, const float* t, lapack_int* ldt, float* vl,
+                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, float* work,
+                    lapack_int *info );
+void LAPACK_dtrevc( char* side, char* howmny, lapack_logical* select,
+                    lapack_int* n, const double* t, lapack_int* ldt, double* vl,
+                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, double* work,
+                    lapack_int *info );
+void LAPACK_ctrevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* vl, lapack_int* ldvl,
+                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,
+                    lapack_int* m, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztrevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* vl, lapack_int* ldvl,
+                    lapack_complex_double* vr, lapack_int* ldvr, lapack_int* mm,
+                    lapack_int* m, lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_strsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const float* t, lapack_int* ldt,
+                    const float* vl, lapack_int* ldvl, const float* vr,
+                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,
+                    lapack_int* m, float* work, lapack_int* ldwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dtrsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const double* t, lapack_int* ldt,
+                    const double* vl, lapack_int* ldvl, const double* vr,
+                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,
+                    lapack_int* m, double* work, lapack_int* ldwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ctrsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_float* t,
+                    lapack_int* ldt, const lapack_complex_float* vl,
+                    lapack_int* ldvl, const lapack_complex_float* vr,
+                    lapack_int* ldvr, float* s, float* sep, lapack_int* mm,
+                    lapack_int* m, lapack_complex_float* work,
+                    lapack_int* ldwork, float* rwork, lapack_int *info );
+void LAPACK_ztrsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_double* t,
+                    lapack_int* ldt, const lapack_complex_double* vl,
+                    lapack_int* ldvl, const lapack_complex_double* vr,
+                    lapack_int* ldvr, double* s, double* sep, lapack_int* mm,
+                    lapack_int* m, lapack_complex_double* work,
+                    lapack_int* ldwork, double* rwork, lapack_int *info );
+void LAPACK_strexc( char* compq, lapack_int* n, float* t, lapack_int* ldt,
+                    float* q, lapack_int* ldq, lapack_int* ifst,
+                    lapack_int* ilst, float* work, lapack_int *info );
+void LAPACK_dtrexc( char* compq, lapack_int* n, double* t, lapack_int* ldt,
+                    double* q, lapack_int* ldq, lapack_int* ifst,
+                    lapack_int* ilst, double* work, lapack_int *info );
+void LAPACK_ctrexc( char* compq, lapack_int* n, lapack_complex_float* t,
+                    lapack_int* ldt, lapack_complex_float* q, lapack_int* ldq,
+                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );
+void LAPACK_ztrexc( char* compq, lapack_int* n, lapack_complex_double* t,
+                    lapack_int* ldt, lapack_complex_double* q, lapack_int* ldq,
+                    lapack_int* ifst, lapack_int* ilst, lapack_int *info );
+void LAPACK_strsen( char* job, char* compq, const lapack_logical* select,
+                    lapack_int* n, float* t, lapack_int* ldt, float* q,
+                    lapack_int* ldq, float* wr, float* wi, lapack_int* m,
+                    float* s, float* sep, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dtrsen( char* job, char* compq, const lapack_logical* select,
+                    lapack_int* n, double* t, lapack_int* ldt, double* q,
+                    lapack_int* ldq, double* wr, double* wi, lapack_int* m,
+                    double* s, double* sep, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_ctrsen( char* job, char* compq, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* w, lapack_int* m, float* s,
+                    float* sep, lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_ztrsen( char* job, char* compq, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* w, lapack_int* m, double* s,
+                    double* sep, lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_strsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
+                    lapack_int* n, const float* a, lapack_int* lda,
+                    const float* b, lapack_int* ldb, float* c, lapack_int* ldc,
+                    float* scale, lapack_int *info );
+void LAPACK_dtrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
+                    lapack_int* n, const double* a, lapack_int* lda,
+                    const double* b, lapack_int* ldb, double* c,
+                    lapack_int* ldc, double* scale, lapack_int *info );
+void LAPACK_ctrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
+                    lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* b,
+                    lapack_int* ldb, lapack_complex_float* c, lapack_int* ldc,
+                    float* scale, lapack_int *info );
+void LAPACK_ztrsyl( char* trana, char* tranb, lapack_int* isgn, lapack_int* m,
+                    lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* b,
+                    lapack_int* ldb, lapack_complex_double* c, lapack_int* ldc,
+                    double* scale, lapack_int *info );
+void LAPACK_sgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, float* q, lapack_int* ldq, float* z,
+                    lapack_int* ldz, lapack_int *info );
+void LAPACK_dgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, double* q, lapack_int* ldq, double* z,
+                    lapack_int* ldz, lapack_int *info );
+void LAPACK_cgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_int *info );
+void LAPACK_zgghrd( char* compq, char* compz, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_int *info );
+void LAPACK_sggbal( char* job, lapack_int* n, float* a, lapack_int* lda,
+                    float* b, lapack_int* ldb, lapack_int* ilo, lapack_int* ihi,
+                    float* lscale, float* rscale, float* work,
+                    lapack_int *info );
+void LAPACK_dggbal( char* job, lapack_int* n, double* a, lapack_int* lda,
+                    double* b, lapack_int* ldb, lapack_int* ilo,
+                    lapack_int* ihi, double* lscale, double* rscale,
+                    double* work, lapack_int *info );
+void LAPACK_cggbal( char* job, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
+                    lapack_int* ilo, lapack_int* ihi, float* lscale,
+                    float* rscale, float* work, lapack_int *info );
+void LAPACK_zggbal( char* job, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
+                    lapack_int* ilo, lapack_int* ihi, double* lscale,
+                    double* rscale, double* work, lapack_int *info );
+void LAPACK_sggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const float* lscale, const float* rscale,
+                    lapack_int* m, float* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_dggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const double* lscale, const double* rscale,
+                    lapack_int* m, double* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_cggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const float* lscale, const float* rscale,
+                    lapack_int* m, lapack_complex_float* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_zggbak( char* job, char* side, lapack_int* n, lapack_int* ilo,
+                    lapack_int* ihi, const double* lscale, const double* rscale,
+                    lapack_int* m, lapack_complex_double* v, lapack_int* ldv,
+                    lapack_int *info );
+void LAPACK_shgeqz( char* job, char* compq, char* compz, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, float* h, lapack_int* ldh,
+                    float* t, lapack_int* ldt, float* alphar, float* alphai,
+                    float* beta, float* q, lapack_int* ldq, float* z,
+                    lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dhgeqz( char* job, char* compq, char* compz, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, double* h,
+                    lapack_int* ldh, double* t, lapack_int* ldt, double* alphar,
+                    double* alphai, double* beta, double* q, lapack_int* ldq,
+                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_chgeqz( char* job, char* compq, char* compz, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, lapack_complex_float* h,
+                    lapack_int* ldh, lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* alpha, lapack_complex_float* beta,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int *info );
+void LAPACK_zhgeqz( char* job, char* compq, char* compz, lapack_int* n,
+                    lapack_int* ilo, lapack_int* ihi, lapack_complex_double* h,
+                    lapack_int* ldh, lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* alpha, lapack_complex_double* beta,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_stgevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const float* s, lapack_int* lds,
+                    const float* p, lapack_int* ldp, float* vl,
+                    lapack_int* ldvl, float* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, float* work,
+                    lapack_int *info );
+void LAPACK_dtgevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const double* s, lapack_int* lds,
+                    const double* p, lapack_int* ldp, double* vl,
+                    lapack_int* ldvl, double* vr, lapack_int* ldvr,
+                    lapack_int* mm, lapack_int* m, double* work,
+                    lapack_int *info );
+void LAPACK_ctgevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_float* s,
+                    lapack_int* lds, const lapack_complex_float* p,
+                    lapack_int* ldp, lapack_complex_float* vl, lapack_int* ldvl,
+                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* mm,
+                    lapack_int* m, lapack_complex_float* work, float* rwork,
+                    lapack_int *info );
+void LAPACK_ztgevc( char* side, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_double* s,
+                    lapack_int* lds, const lapack_complex_double* p,
+                    lapack_int* ldp, lapack_complex_double* vl,
+                    lapack_int* ldvl, lapack_complex_double* vr,
+                    lapack_int* ldvr, lapack_int* mm, lapack_int* m,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int *info );
+void LAPACK_stgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
+                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,
+                    lapack_int* ifst, lapack_int* ilst, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dtgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    double* q, lapack_int* ldq, double* z, lapack_int* ldz,
+                    lapack_int* ifst, lapack_int* ilst, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_ctgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* z, lapack_int* ldz, lapack_int* ifst,
+                    lapack_int* ilst, lapack_int *info );
+void LAPACK_ztgexc( lapack_logical* wantq, lapack_logical* wantz, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* z, lapack_int* ldz, lapack_int* ifst,
+                    lapack_int* ilst, lapack_int *info );
+void LAPACK_stgsen( lapack_int* ijob, lapack_logical* wantq,
+                    lapack_logical* wantz, const lapack_logical* select,
+                    lapack_int* n, float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, float* alphar, float* alphai, float* beta,
+                    float* q, lapack_int* ldq, float* z, lapack_int* ldz,
+                    lapack_int* m, float* pl, float* pr, float* dif,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_dtgsen( lapack_int* ijob, lapack_logical* wantq,
+                    lapack_logical* wantz, const lapack_logical* select,
+                    lapack_int* n, double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, double* alphar, double* alphai,
+                    double* beta, double* q, lapack_int* ldq, double* z,
+                    lapack_int* ldz, lapack_int* m, double* pl, double* pr,
+                    double* dif, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_ctgsen( lapack_int* ijob, lapack_logical* wantq,
+                    lapack_logical* wantz, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* alpha, lapack_complex_float* beta,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* z, lapack_int* ldz, lapack_int* m,
+                    float* pl, float* pr, float* dif,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_ztgsen( lapack_int* ijob, lapack_logical* wantq,
+                    lapack_logical* wantz, const lapack_logical* select,
+                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* alpha, lapack_complex_double* beta,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* z, lapack_int* ldz, lapack_int* m,
+                    double* pl, double* pr, double* dif,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_stgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
+                    const float* a, lapack_int* lda, const float* b,
+                    lapack_int* ldb, float* c, lapack_int* ldc, const float* d,
+                    lapack_int* ldd, const float* e, lapack_int* lde, float* f,
+                    lapack_int* ldf, float* scale, float* dif, float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_dtgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
+                    const double* a, lapack_int* lda, const double* b,
+                    lapack_int* ldb, double* c, lapack_int* ldc,
+                    const double* d, lapack_int* ldd, const double* e,
+                    lapack_int* lde, double* f, lapack_int* ldf, double* scale,
+                    double* dif, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ctgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    const lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    const lapack_complex_float* d, lapack_int* ldd,
+                    const lapack_complex_float* e, lapack_int* lde,
+                    lapack_complex_float* f, lapack_int* ldf, float* scale,
+                    float* dif, lapack_complex_float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ztgsyl( char* trans, lapack_int* ijob, lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    const lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    const lapack_complex_double* d, lapack_int* ldd,
+                    const lapack_complex_double* e, lapack_int* lde,
+                    lapack_complex_double* f, lapack_int* ldf, double* scale,
+                    double* dif, lapack_complex_double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_stgsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const float* a, lapack_int* lda,
+                    const float* b, lapack_int* ldb, const float* vl,
+                    lapack_int* ldvl, const float* vr, lapack_int* ldvr,
+                    float* s, float* dif, lapack_int* mm, lapack_int* m,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dtgsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const double* a, lapack_int* lda,
+                    const double* b, lapack_int* ldb, const double* vl,
+                    lapack_int* ldvl, const double* vr, lapack_int* ldvr,
+                    double* s, double* dif, lapack_int* mm, lapack_int* m,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_ctgsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, const lapack_complex_float* b,
+                    lapack_int* ldb, const lapack_complex_float* vl,
+                    lapack_int* ldvl, const lapack_complex_float* vr,
+                    lapack_int* ldvr, float* s, float* dif, lapack_int* mm,
+                    lapack_int* m, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_ztgsna( char* job, char* howmny, const lapack_logical* select,
+                    lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, const lapack_complex_double* b,
+                    lapack_int* ldb, const lapack_complex_double* vl,
+                    lapack_int* ldvl, const lapack_complex_double* vr,
+                    lapack_int* ldvr, double* s, double* dif, lapack_int* mm,
+                    lapack_int* m, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_sggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, float* a, lapack_int* lda,
+                    float* b, lapack_int* ldb, float* tola, float* tolb,
+                    lapack_int* k, lapack_int* l, float* u, lapack_int* ldu,
+                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,
+                    lapack_int* iwork, float* tau, float* work,
+                    lapack_int *info );
+void LAPACK_dggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, double* a, lapack_int* lda,
+                    double* b, lapack_int* ldb, double* tola, double* tolb,
+                    lapack_int* k, lapack_int* l, double* u, lapack_int* ldu,
+                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,
+                    lapack_int* iwork, double* tau, double* work,
+                    lapack_int *info );
+void LAPACK_cggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
+                    float* tola, float* tolb, lapack_int* k, lapack_int* l,
+                    lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* v, lapack_int* ldv,
+                    lapack_complex_float* q, lapack_int* ldq, lapack_int* iwork,
+                    float* rwork, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zggsvp( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
+                    double* tola, double* tolb, lapack_int* k, lapack_int* l,
+                    lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* v, lapack_int* ldv,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_int* iwork, double* rwork,
+                    lapack_complex_double* tau, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_stgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
+                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                    float* tola, float* tolb, float* alpha, float* beta,
+                    float* u, lapack_int* ldu, float* v, lapack_int* ldv,
+                    float* q, lapack_int* ldq, float* work, lapack_int* ncycle,
+                    lapack_int *info );
+void LAPACK_dtgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    double* tola, double* tolb, double* alpha, double* beta,
+                    double* u, lapack_int* ldu, double* v, lapack_int* ldv,
+                    double* q, lapack_int* ldq, double* work,
+                    lapack_int* ncycle, lapack_int *info );
+void LAPACK_ctgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* tola,
+                    float* tolb, float* alpha, float* beta,
+                    lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* v, lapack_int* ldv,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* work, lapack_int* ncycle,
+                    lapack_int *info );
+void LAPACK_ztgsja( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* p, lapack_int* n, lapack_int* k, lapack_int* l,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* tola,
+                    double* tolb, double* alpha, double* beta,
+                    lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* v, lapack_int* ldv,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* work, lapack_int* ncycle,
+                    lapack_int *info );
+void LAPACK_sgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                   float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                   float* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_dgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                   double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                   double* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_cgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* b, lapack_int* ldb,
+                   lapack_complex_float* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_zgels( char* trans, lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_sgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb,
+                    lapack_int* jpvt, float* rcond, lapack_int* rank,
+                    float* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_dgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb,
+                    lapack_int* jpvt, double* rcond, lapack_int* rank,
+                    double* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_cgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, lapack_int* jpvt,
+                    float* rcond, lapack_int* rank, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int *info );
+void LAPACK_zgelsy( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, lapack_int* jpvt,
+                    double* rcond, lapack_int* rank,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_sgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb, float* s,
+                    float* rcond, lapack_int* rank, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb, double* s,
+                    double* rcond, lapack_int* rank, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* s,
+                    float* rcond, lapack_int* rank, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int *info );
+void LAPACK_zgelss( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* s,
+                    double* rcond, lapack_int* rank,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_sgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb, float* s,
+                    float* rcond, lapack_int* rank, float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_dgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb, double* s,
+                    double* rcond, lapack_int* rank, double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_cgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* s,
+                    float* rcond, lapack_int* rank, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_zgelsd( lapack_int* m, lapack_int* n, lapack_int* nrhs,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* s,
+                    double* rcond, lapack_int* rank,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_sgglse( lapack_int* m, lapack_int* n, lapack_int* p, float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb, float* c,
+                    float* d, float* x, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dgglse( lapack_int* m, lapack_int* n, lapack_int* p, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb, double* c,
+                    double* d, double* x, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cgglse( lapack_int* m, lapack_int* n, lapack_int* p,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* c, lapack_complex_float* d,
+                    lapack_complex_float* x, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zgglse( lapack_int* m, lapack_int* n, lapack_int* p,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* c, lapack_complex_double* d,
+                    lapack_complex_double* x, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sggglm( lapack_int* n, lapack_int* m, lapack_int* p, float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb, float* d,
+                    float* x, float* y, float* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_dggglm( lapack_int* n, lapack_int* m, lapack_int* p, double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb, double* d,
+                    double* x, double* y, double* work, lapack_int* lwork,
+                    lapack_int *info );
+void LAPACK_cggglm( lapack_int* n, lapack_int* m, lapack_int* p,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* d, lapack_complex_float* x,
+                    lapack_complex_float* y, lapack_complex_float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_zggglm( lapack_int* n, lapack_int* m, lapack_int* p,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* d, lapack_complex_double* x,
+                    lapack_complex_double* y, lapack_complex_double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_ssyev( char* jobz, char* uplo, lapack_int* n, float* a,
+                   lapack_int* lda, float* w, float* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_dsyev( char* jobz, char* uplo, lapack_int* n, double* a,
+                   lapack_int* lda, double* w, double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_cheev( char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_float* a, lapack_int* lda, float* w,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_int *info );
+void LAPACK_zheev( char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_double* a, lapack_int* lda, double* w,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   double* rwork, lapack_int *info );
+void LAPACK_ssyevd( char* jobz, char* uplo, lapack_int* n, float* a,
+                    lapack_int* lda, float* w, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dsyevd( char* jobz, char* uplo, lapack_int* n, double* a,
+                    lapack_int* lda, double* w, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_cheevd( char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda, float* w,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zheevd( char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda, double* w,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_ssyevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    float* a, lapack_int* lda, float* vl, float* vu,
+                    lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, float* z, lapack_int* ldz,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_dsyevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    double* a, lapack_int* lda, double* vl, double* vu,
+                    lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, double* z, lapack_int* ldz,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_cheevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda, float* vl,
+                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_zheevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_ssyevr( char* jobz, char* range, char* uplo, lapack_int* n,
+                    float* a, lapack_int* lda, float* vl, float* vu,
+                    lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, float* z, lapack_int* ldz,
+                    lapack_int* isuppz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dsyevr( char* jobz, char* range, char* uplo, lapack_int* n,
+                    double* a, lapack_int* lda, double* vl, double* vu,
+                    lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, double* z, lapack_int* ldz,
+                    lapack_int* isuppz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_cheevr( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda, float* vl,
+                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_int* isuppz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zheevr( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_int* isuppz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_sspev( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,
+                   float* z, lapack_int* ldz, float* work, lapack_int *info );
+void LAPACK_dspev( char* jobz, char* uplo, lapack_int* n, double* ap, double* w,
+                   double* z, lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_chpev( char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_float* ap, float* w, lapack_complex_float* z,
+                   lapack_int* ldz, lapack_complex_float* work, float* rwork,
+                   lapack_int *info );
+void LAPACK_zhpev( char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_double* ap, double* w,
+                   lapack_complex_double* z, lapack_int* ldz,
+                   lapack_complex_double* work, double* rwork,
+                   lapack_int *info );
+void LAPACK_sspevd( char* jobz, char* uplo, lapack_int* n, float* ap, float* w,
+                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dspevd( char* jobz, char* uplo, lapack_int* n, double* ap,
+                    double* w, double* z, lapack_int* ldz, double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_chpevd( char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_float* ap, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int* lrwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_zhpevd( char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_double* ap, double* w,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_sspevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    float* ap, float* vl, float* vu, lapack_int* il,
+                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
+                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_dspevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    double* ap, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_chpevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_float* ap, float* vl, float* vu,
+                    lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_zhpevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_complex_double* ap, double* vl, double* vu,
+                    lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_ssbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                   float* ab, lapack_int* ldab, float* w, float* z,
+                   lapack_int* ldz, float* work, lapack_int *info );
+void LAPACK_dsbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                   double* ab, lapack_int* ldab, double* w, double* z,
+                   lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_chbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                   lapack_complex_float* ab, lapack_int* ldab, float* w,
+                   lapack_complex_float* z, lapack_int* ldz,
+                   lapack_complex_float* work, float* rwork, lapack_int *info );
+void LAPACK_zhbev( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                   lapack_complex_double* ab, lapack_int* ldab, double* w,
+                   lapack_complex_double* z, lapack_int* ldz,
+                   lapack_complex_double* work, double* rwork,
+                   lapack_int *info );
+void LAPACK_ssbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                    float* ab, lapack_int* ldab, float* w, float* z,
+                    lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dsbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                    double* ab, lapack_int* ldab, double* w, double* z,
+                    lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_chbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_float* ab, lapack_int* ldab, float* w,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zhbevd( char* jobz, char* uplo, lapack_int* n, lapack_int* kd,
+                    lapack_complex_double* ab, lapack_int* ldab, double* w,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_ssbevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* kd, float* ab, lapack_int* ldab, float* q,
+                    lapack_int* ldq, float* vl, float* vu, lapack_int* il,
+                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
+                    float* z, lapack_int* ldz, float* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_dsbevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* kd, double* ab, lapack_int* ldab, double* q,
+                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_chbevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* kd, lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_complex_float* q, lapack_int* ldq, float* vl,
+                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_zhbevx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* kd, lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_complex_double* q, lapack_int* ldq, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_sstev( char* jobz, lapack_int* n, float* d, float* e, float* z,
+                   lapack_int* ldz, float* work, lapack_int *info );
+void LAPACK_dstev( char* jobz, lapack_int* n, double* d, double* e, double* z,
+                   lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_sstevd( char* jobz, lapack_int* n, float* d, float* e, float* z,
+                    lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_dstevd( char* jobz, lapack_int* n, double* d, double* e, double* z,
+                    lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_sstevx( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w, float* z,
+                    lapack_int* ldz, float* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_dstevx( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_sstevr( char* jobz, char* range, lapack_int* n, float* d, float* e,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w, float* z,
+                    lapack_int* ldz, lapack_int* isuppz, float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_dstevr( char* jobz, char* range, lapack_int* n, double* d,
+                    double* e, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, lapack_int* isuppz,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_sgees( char* jobvs, char* sort, LAPACK_S_SELECT2 select,
+                   lapack_int* n, float* a, lapack_int* lda, lapack_int* sdim,
+                   float* wr, float* wi, float* vs, lapack_int* ldvs,
+                   float* work, lapack_int* lwork, lapack_logical* bwork,
+                   lapack_int *info );
+void LAPACK_dgees( char* jobvs, char* sort, LAPACK_D_SELECT2 select,
+                   lapack_int* n, double* a, lapack_int* lda, lapack_int* sdim,
+                   double* wr, double* wi, double* vs, lapack_int* ldvs,
+                   double* work, lapack_int* lwork, lapack_logical* bwork,
+                   lapack_int *info );
+void LAPACK_cgees( char* jobvs, char* sort, LAPACK_C_SELECT1 select,
+                   lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                   lapack_int* sdim, lapack_complex_float* w,
+                   lapack_complex_float* vs, lapack_int* ldvs,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_logical* bwork, lapack_int *info );
+void LAPACK_zgees( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,
+                   lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                   lapack_int* sdim, lapack_complex_double* w,
+                   lapack_complex_double* vs, lapack_int* ldvs,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   double* rwork, lapack_logical* bwork, lapack_int *info );
+void LAPACK_sgeesx( char* jobvs, char* sort, LAPACK_S_SELECT2 select,
+                    char* sense, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* sdim, float* wr, float* wi, float* vs,
+                    lapack_int* ldvs, float* rconde, float* rcondv, float* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_logical* bwork, lapack_int *info );
+void LAPACK_dgeesx( char* jobvs, char* sort, LAPACK_D_SELECT2 select,
+                    char* sense, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* sdim, double* wr, double* wi, double* vs,
+                    lapack_int* ldvs, double* rconde, double* rcondv,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_cgeesx( char* jobvs, char* sort, LAPACK_C_SELECT1 select,
+                    char* sense, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* sdim, lapack_complex_float* w,
+                    lapack_complex_float* vs, lapack_int* ldvs, float* rconde,
+                    float* rcondv, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_zgeesx( char* jobvs, char* sort, LAPACK_Z_SELECT1 select,
+                    char* sense, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* sdim, lapack_complex_double* w,
+                    lapack_complex_double* vs, lapack_int* ldvs, double* rconde,
+                    double* rcondv, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_sgeev( char* jobvl, char* jobvr, lapack_int* n, float* a,
+                   lapack_int* lda, float* wr, float* wi, float* vl,
+                   lapack_int* ldvl, float* vr, lapack_int* ldvr, float* work,
+                   lapack_int* lwork, lapack_int *info );
+void LAPACK_dgeev( char* jobvl, char* jobvr, lapack_int* n, double* a,
+                   lapack_int* lda, double* wr, double* wi, double* vl,
+                   lapack_int* ldvl, double* vr, lapack_int* ldvr, double* work,
+                   lapack_int* lwork, lapack_int *info );
+void LAPACK_cgeev( char* jobvl, char* jobvr, lapack_int* n,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* w, lapack_complex_float* vl,
+                   lapack_int* ldvl, lapack_complex_float* vr, lapack_int* ldvr,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_int *info );
+void LAPACK_zgeev( char* jobvl, char* jobvr, lapack_int* n,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* w, lapack_complex_double* vl,
+                   lapack_int* ldvl, lapack_complex_double* vr,
+                   lapack_int* ldvr, lapack_complex_double* work,
+                   lapack_int* lwork, double* rwork, lapack_int *info );
+void LAPACK_sgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, float* a, lapack_int* lda, float* wr,
+                    float* wi, float* vl, lapack_int* ldvl, float* vr,
+                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
+                    float* scale, float* abnrm, float* rconde, float* rcondv,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_dgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, double* a, lapack_int* lda, double* wr,
+                    double* wi, double* vl, lapack_int* ldvl, double* vr,
+                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
+                    double* scale, double* abnrm, double* rconde,
+                    double* rcondv, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_cgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* w, lapack_complex_float* vl,
+                    lapack_int* ldvl, lapack_complex_float* vr,
+                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
+                    float* scale, float* abnrm, float* rconde, float* rcondv,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgeevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* w, lapack_complex_double* vl,
+                    lapack_int* ldvl, lapack_complex_double* vr,
+                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
+                    double* scale, double* abnrm, double* rconde,
+                    double* rcondv, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int *info );
+void LAPACK_sgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
+                    float* a, lapack_int* lda, float* s, float* u,
+                    lapack_int* ldu, float* vt, lapack_int* ldvt, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_dgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
+                    double* a, lapack_int* lda, double* s, double* u,
+                    lapack_int* ldu, double* vt, lapack_int* ldvt, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_cgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda, float* s,
+                    lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* vt, lapack_int* ldvt,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int *info );
+void LAPACK_zgesvd( char* jobu, char* jobvt, lapack_int* m, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda, double* s,
+                    lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* vt, lapack_int* ldvt,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int *info );
+void LAPACK_sgesdd( char* jobz, lapack_int* m, lapack_int* n, float* a,
+                    lapack_int* lda, float* s, float* u, lapack_int* ldu,
+                    float* vt, lapack_int* ldvt, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dgesdd( char* jobz, lapack_int* m, lapack_int* n, double* a,
+                    lapack_int* lda, double* s, double* u, lapack_int* ldu,
+                    double* vt, lapack_int* ldvt, double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_cgesdd( char* jobz, lapack_int* m, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda, float* s,
+                    lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* vt, lapack_int* ldvt,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_zgesdd( char* jobz, lapack_int* m, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda, double* s,
+                    lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* vt, lapack_int* ldvt,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* iwork, lapack_int *info );
+void LAPACK_dgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,
+                    char* jobp, lapack_int* m, lapack_int* n, double* a,
+                    lapack_int* lda, double* sva, double* u, lapack_int* ldu,
+                    double* v, lapack_int* ldv, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_sgejsv( char* joba, char* jobu, char* jobv, char* jobr, char* jobt,
+                    char* jobp, lapack_int* m, lapack_int* n, float* a,
+                    lapack_int* lda, float* sva, float* u, lapack_int* ldu,
+                    float* v, lapack_int* ldv, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_dgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,
+                    lapack_int* n, double* a, lapack_int* lda, double* sva,
+                    lapack_int* mv, double* v, lapack_int* ldv, double* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sgesvj( char* joba, char* jobu, char* jobv, lapack_int* m,
+                    lapack_int* n, float* a, lapack_int* lda, float* sva,
+                    lapack_int* mv, float* v, lapack_int* ldv, float* work,
+                    lapack_int* lwork, lapack_int *info );
+void LAPACK_sggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
+                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                    float* alpha, float* beta, float* u, lapack_int* ldu,
+                    float* v, lapack_int* ldv, float* q, lapack_int* ldq,
+                    float* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_dggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    double* alpha, double* beta, double* u, lapack_int* ldu,
+                    double* v, lapack_int* ldv, double* q, lapack_int* ldq,
+                    double* work, lapack_int* iwork, lapack_int *info );
+void LAPACK_cggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* alpha,
+                    float* beta, lapack_complex_float* u, lapack_int* ldu,
+                    lapack_complex_float* v, lapack_int* ldv,
+                    lapack_complex_float* q, lapack_int* ldq,
+                    lapack_complex_float* work, float* rwork, lapack_int* iwork,
+                    lapack_int *info );
+void LAPACK_zggsvd( char* jobu, char* jobv, char* jobq, lapack_int* m,
+                    lapack_int* n, lapack_int* p, lapack_int* k, lapack_int* l,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* alpha,
+                    double* beta, lapack_complex_double* u, lapack_int* ldu,
+                    lapack_complex_double* v, lapack_int* ldv,
+                    lapack_complex_double* q, lapack_int* ldq,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int* iwork, lapack_int *info );
+void LAPACK_ssygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                   float* w, float* work, lapack_int* lwork, lapack_int *info );
+void LAPACK_dsygv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                   double* w, double* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_chegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* b, lapack_int* ldb, float* w,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_int *info );
+void LAPACK_zhegv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* b, lapack_int* ldb, double* w,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   double* rwork, lapack_int *info );
+void LAPACK_ssygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                    float* w, float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_dsygvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    double* w, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_chegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* w,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zhegvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* w,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_ssygvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, float* vl, float* vu, lapack_int* il,
+                    lapack_int* iu, float* abstol, lapack_int* m, float* w,
+                    float* z, lapack_int* ldz, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_dsygvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_chegvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, float* vl,
+                    float* vu, lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_zhegvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_sspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   float* ap, float* bp, float* w, float* z, lapack_int* ldz,
+                   float* work, lapack_int *info );
+void LAPACK_dspgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   double* ap, double* bp, double* w, double* z,
+                   lapack_int* ldz, double* work, lapack_int *info );
+void LAPACK_chpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_float* ap, lapack_complex_float* bp, float* w,
+                   lapack_complex_float* z, lapack_int* ldz,
+                   lapack_complex_float* work, float* rwork, lapack_int *info );
+void LAPACK_zhpgv( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                   lapack_complex_double* ap, lapack_complex_double* bp,
+                   double* w, lapack_complex_double* z, lapack_int* ldz,
+                   lapack_complex_double* work, double* rwork,
+                   lapack_int *info );
+void LAPACK_sspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    float* ap, float* bp, float* w, float* z, lapack_int* ldz,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_dspgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    double* ap, double* bp, double* w, double* z,
+                    lapack_int* ldz, double* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_int* liwork, lapack_int *info );
+void LAPACK_chpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_float* ap, lapack_complex_float* bp,
+                    float* w, lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zhpgvd( lapack_int* itype, char* jobz, char* uplo, lapack_int* n,
+                    lapack_complex_double* ap, lapack_complex_double* bp,
+                    double* w, lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_sspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, float* ap, float* bp, float* vl, float* vu,
+                    lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, float* z, lapack_int* ldz,
+                    float* work, lapack_int* iwork, lapack_int* ifail,
+                    lapack_int *info );
+void LAPACK_dspgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, double* ap, double* bp, double* vl,
+                    double* vu, lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, double* z, lapack_int* ldz,
+                    double* work, lapack_int* iwork, lapack_int* ifail,
+                    lapack_int *info );
+void LAPACK_chpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, lapack_complex_float* ap,
+                    lapack_complex_float* bp, float* vl, float* vu,
+                    lapack_int* il, lapack_int* iu, float* abstol,
+                    lapack_int* m, float* w, lapack_complex_float* z,
+                    lapack_int* ldz, lapack_complex_float* work, float* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_zhpgvx( lapack_int* itype, char* jobz, char* range, char* uplo,
+                    lapack_int* n, lapack_complex_double* ap,
+                    lapack_complex_double* bp, double* vl, double* vu,
+                    lapack_int* il, lapack_int* iu, double* abstol,
+                    lapack_int* m, double* w, lapack_complex_double* z,
+                    lapack_int* ldz, lapack_complex_double* work, double* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_ssbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                   lapack_int* kb, float* ab, lapack_int* ldab, float* bb,
+                   lapack_int* ldbb, float* w, float* z, lapack_int* ldz,
+                   float* work, lapack_int *info );
+void LAPACK_dsbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                   lapack_int* kb, double* ab, lapack_int* ldab, double* bb,
+                   lapack_int* ldbb, double* w, double* z, lapack_int* ldz,
+                   double* work, lapack_int *info );
+void LAPACK_chbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                   lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
+                   lapack_complex_float* bb, lapack_int* ldbb, float* w,
+                   lapack_complex_float* z, lapack_int* ldz,
+                   lapack_complex_float* work, float* rwork, lapack_int *info );
+void LAPACK_zhbgv( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                   lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
+                   lapack_complex_double* bb, lapack_int* ldbb, double* w,
+                   lapack_complex_double* z, lapack_int* ldz,
+                   lapack_complex_double* work, double* rwork,
+                   lapack_int *info );
+void LAPACK_ssbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, float* ab, lapack_int* ldab, float* bb,
+                    lapack_int* ldbb, float* w, float* z, lapack_int* ldz,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_dsbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, double* ab, lapack_int* ldab, double* bb,
+                    lapack_int* ldbb, double* w, double* z, lapack_int* ldz,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_chbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, lapack_complex_float* ab, lapack_int* ldab,
+                    lapack_complex_float* bb, lapack_int* ldbb, float* w,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* lrwork, lapack_int* iwork, lapack_int* liwork,
+                    lapack_int *info );
+void LAPACK_zhbgvd( char* jobz, char* uplo, lapack_int* n, lapack_int* ka,
+                    lapack_int* kb, lapack_complex_double* ab, lapack_int* ldab,
+                    lapack_complex_double* bb, lapack_int* ldbb, double* w,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_int *info );
+void LAPACK_ssbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* ka, lapack_int* kb, float* ab, lapack_int* ldab,
+                    float* bb, lapack_int* ldbb, float* q, lapack_int* ldq,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w, float* z,
+                    lapack_int* ldz, float* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_dsbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* ka, lapack_int* kb, double* ab,
+                    lapack_int* ldab, double* bb, lapack_int* ldbb, double* q,
+                    lapack_int* ldq, double* vl, double* vu, lapack_int* il,
+                    lapack_int* iu, double* abstol, lapack_int* m, double* w,
+                    double* z, lapack_int* ldz, double* work, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_chbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* ka, lapack_int* kb, lapack_complex_float* ab,
+                    lapack_int* ldab, lapack_complex_float* bb,
+                    lapack_int* ldbb, lapack_complex_float* q, lapack_int* ldq,
+                    float* vl, float* vu, lapack_int* il, lapack_int* iu,
+                    float* abstol, lapack_int* m, float* w,
+                    lapack_complex_float* z, lapack_int* ldz,
+                    lapack_complex_float* work, float* rwork, lapack_int* iwork,
+                    lapack_int* ifail, lapack_int *info );
+void LAPACK_zhbgvx( char* jobz, char* range, char* uplo, lapack_int* n,
+                    lapack_int* ka, lapack_int* kb, lapack_complex_double* ab,
+                    lapack_int* ldab, lapack_complex_double* bb,
+                    lapack_int* ldbb, lapack_complex_double* q, lapack_int* ldq,
+                    double* vl, double* vu, lapack_int* il, lapack_int* iu,
+                    double* abstol, lapack_int* m, double* w,
+                    lapack_complex_double* z, lapack_int* ldz,
+                    lapack_complex_double* work, double* rwork,
+                    lapack_int* iwork, lapack_int* ifail, lapack_int *info );
+void LAPACK_sgges( char* jobvsl, char* jobvsr, char* sort,
+                   LAPACK_S_SELECT3 selctg, lapack_int* n, float* a,
+                   lapack_int* lda, float* b, lapack_int* ldb, lapack_int* sdim,
+                   float* alphar, float* alphai, float* beta, float* vsl,
+                   lapack_int* ldvsl, float* vsr, lapack_int* ldvsr,
+                   float* work, lapack_int* lwork, lapack_logical* bwork,
+                   lapack_int *info );
+void LAPACK_dgges( char* jobvsl, char* jobvsr, char* sort,
+                   LAPACK_D_SELECT3 selctg, lapack_int* n, double* a,
+                   lapack_int* lda, double* b, lapack_int* ldb,
+                   lapack_int* sdim, double* alphar, double* alphai,
+                   double* beta, double* vsl, lapack_int* ldvsl, double* vsr,
+                   lapack_int* ldvsr, double* work, lapack_int* lwork,
+                   lapack_logical* bwork, lapack_int *info );
+void LAPACK_cgges( char* jobvsl, char* jobvsr, char* sort,
+                   LAPACK_C_SELECT2 selctg, lapack_int* n,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,
+                   lapack_complex_float* alpha, lapack_complex_float* beta,
+                   lapack_complex_float* vsl, lapack_int* ldvsl,
+                   lapack_complex_float* vsr, lapack_int* ldvsr,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_logical* bwork, lapack_int *info );
+void LAPACK_zgges( char* jobvsl, char* jobvsr, char* sort,
+                   LAPACK_Z_SELECT2 selctg, lapack_int* n,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,
+                   lapack_complex_double* alpha, lapack_complex_double* beta,
+                   lapack_complex_double* vsl, lapack_int* ldvsl,
+                   lapack_complex_double* vsr, lapack_int* ldvsr,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   double* rwork, lapack_logical* bwork, lapack_int *info );
+void LAPACK_sggesx( char* jobvsl, char* jobvsr, char* sort,
+                    LAPACK_S_SELECT3 selctg, char* sense, lapack_int* n,
+                    float* a, lapack_int* lda, float* b, lapack_int* ldb,
+                    lapack_int* sdim, float* alphar, float* alphai, float* beta,
+                    float* vsl, lapack_int* ldvsl, float* vsr,
+                    lapack_int* ldvsr, float* rconde, float* rcondv,
+                    float* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_dggesx( char* jobvsl, char* jobvsr, char* sort,
+                    LAPACK_D_SELECT3 selctg, char* sense, lapack_int* n,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    lapack_int* sdim, double* alphar, double* alphai,
+                    double* beta, double* vsl, lapack_int* ldvsl, double* vsr,
+                    lapack_int* ldvsr, double* rconde, double* rcondv,
+                    double* work, lapack_int* lwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_cggesx( char* jobvsl, char* jobvsr, char* sort,
+                    LAPACK_C_SELECT2 selctg, char* sense, lapack_int* n,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb, lapack_int* sdim,
+                    lapack_complex_float* alpha, lapack_complex_float* beta,
+                    lapack_complex_float* vsl, lapack_int* ldvsl,
+                    lapack_complex_float* vsr, lapack_int* ldvsr, float* rconde,
+                    float* rcondv, lapack_complex_float* work,
+                    lapack_int* lwork, float* rwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_zggesx( char* jobvsl, char* jobvsr, char* sort,
+                    LAPACK_Z_SELECT2 selctg, char* sense, lapack_int* n,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb, lapack_int* sdim,
+                    lapack_complex_double* alpha, lapack_complex_double* beta,
+                    lapack_complex_double* vsl, lapack_int* ldvsl,
+                    lapack_complex_double* vsr, lapack_int* ldvsr,
+                    double* rconde, double* rcondv, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int* iwork,
+                    lapack_int* liwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_sggev( char* jobvl, char* jobvr, lapack_int* n, float* a,
+                   lapack_int* lda, float* b, lapack_int* ldb, float* alphar,
+                   float* alphai, float* beta, float* vl, lapack_int* ldvl,
+                   float* vr, lapack_int* ldvr, float* work, lapack_int* lwork,
+                   lapack_int *info );
+void LAPACK_dggev( char* jobvl, char* jobvr, lapack_int* n, double* a,
+                   lapack_int* lda, double* b, lapack_int* ldb, double* alphar,
+                   double* alphai, double* beta, double* vl, lapack_int* ldvl,
+                   double* vr, lapack_int* ldvr, double* work,
+                   lapack_int* lwork, lapack_int *info );
+void LAPACK_cggev( char* jobvl, char* jobvr, lapack_int* n,
+                   lapack_complex_float* a, lapack_int* lda,
+                   lapack_complex_float* b, lapack_int* ldb,
+                   lapack_complex_float* alpha, lapack_complex_float* beta,
+                   lapack_complex_float* vl, lapack_int* ldvl,
+                   lapack_complex_float* vr, lapack_int* ldvr,
+                   lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                   lapack_int *info );
+void LAPACK_zggev( char* jobvl, char* jobvr, lapack_int* n,
+                   lapack_complex_double* a, lapack_int* lda,
+                   lapack_complex_double* b, lapack_int* ldb,
+                   lapack_complex_double* alpha, lapack_complex_double* beta,
+                   lapack_complex_double* vl, lapack_int* ldvl,
+                   lapack_complex_double* vr, lapack_int* ldvr,
+                   lapack_complex_double* work, lapack_int* lwork,
+                   double* rwork, lapack_int *info );
+void LAPACK_sggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, float* alphar, float* alphai, float* beta,
+                    float* vl, lapack_int* ldvl, float* vr, lapack_int* ldvr,
+                    lapack_int* ilo, lapack_int* ihi, float* lscale,
+                    float* rscale, float* abnrm, float* bbnrm, float* rconde,
+                    float* rcondv, float* work, lapack_int* lwork,
+                    lapack_int* iwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_dggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, double* alphar, double* alphai,
+                    double* beta, double* vl, lapack_int* ldvl, double* vr,
+                    lapack_int* ldvr, lapack_int* ilo, lapack_int* ihi,
+                    double* lscale, double* rscale, double* abnrm,
+                    double* bbnrm, double* rconde, double* rcondv, double* work,
+                    lapack_int* lwork, lapack_int* iwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_cggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    lapack_complex_float* alpha, lapack_complex_float* beta,
+                    lapack_complex_float* vl, lapack_int* ldvl,
+                    lapack_complex_float* vr, lapack_int* ldvr, lapack_int* ilo,
+                    lapack_int* ihi, float* lscale, float* rscale, float* abnrm,
+                    float* bbnrm, float* rconde, float* rcondv,
+                    lapack_complex_float* work, lapack_int* lwork, float* rwork,
+                    lapack_int* iwork, lapack_logical* bwork,
+                    lapack_int *info );
+void LAPACK_zggevx( char* balanc, char* jobvl, char* jobvr, char* sense,
+                    lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* alpha, lapack_complex_double* beta,
+                    lapack_complex_double* vl, lapack_int* ldvl,
+                    lapack_complex_double* vr, lapack_int* ldvr,
+                    lapack_int* ilo, lapack_int* ihi, double* lscale,
+                    double* rscale, double* abnrm, double* bbnrm,
+                    double* rconde, double* rcondv, lapack_complex_double* work,
+                    lapack_int* lwork, double* rwork, lapack_int* iwork,
+                    lapack_logical* bwork, lapack_int *info );
+void LAPACK_dsfrk( char* transr, char* uplo, char* trans, lapack_int* n,
+                   lapack_int* k, double* alpha, const double* a,
+                   lapack_int* lda, double* beta, double* c );
+void LAPACK_ssfrk( char* transr, char* uplo, char* trans, lapack_int* n,
+                   lapack_int* k, float* alpha, const float* a, lapack_int* lda,
+                   float* beta, float* c );
+void LAPACK_zhfrk( char* transr, char* uplo, char* trans, lapack_int* n,
+                   lapack_int* k, double* alpha, const lapack_complex_double* a,
+                   lapack_int* lda, double* beta, lapack_complex_double* c );
+void LAPACK_chfrk( char* transr, char* uplo, char* trans, lapack_int* n,
+                   lapack_int* k, float* alpha, const lapack_complex_float* a,
+                   lapack_int* lda, float* beta, lapack_complex_float* c );
+void LAPACK_dtfsm( char* transr, char* side, char* uplo, char* trans,
+                   char* diag, lapack_int* m, lapack_int* n, double* alpha,
+                   const double* a, double* b, lapack_int* ldb );
+void LAPACK_stfsm( char* transr, char* side, char* uplo, char* trans,
+                   char* diag, lapack_int* m, lapack_int* n, float* alpha,
+                   const float* a, float* b, lapack_int* ldb );
+void LAPACK_ztfsm( char* transr, char* side, char* uplo, char* trans,
+                   char* diag, lapack_int* m, lapack_int* n,
+                   lapack_complex_double* alpha, const lapack_complex_double* a,
+                   lapack_complex_double* b, lapack_int* ldb );
+void LAPACK_ctfsm( char* transr, char* side, char* uplo, char* trans,
+                   char* diag, lapack_int* m, lapack_int* n,
+                   lapack_complex_float* alpha, const lapack_complex_float* a,
+                   lapack_complex_float* b, lapack_int* ldb );
+void LAPACK_dtfttp( char* transr, char* uplo, lapack_int* n, const double* arf,
+                    double* ap, lapack_int *info );
+void LAPACK_stfttp( char* transr, char* uplo, lapack_int* n, const float* arf,
+                    float* ap, lapack_int *info );
+void LAPACK_ztfttp( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_double* arf, lapack_complex_double* ap,
+                    lapack_int *info );
+void LAPACK_ctfttp( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_float* arf, lapack_complex_float* ap,
+                    lapack_int *info );
+void LAPACK_dtfttr( char* transr, char* uplo, lapack_int* n, const double* arf,
+                    double* a, lapack_int* lda, lapack_int *info );
+void LAPACK_stfttr( char* transr, char* uplo, lapack_int* n, const float* arf,
+                    float* a, lapack_int* lda, lapack_int *info );
+void LAPACK_ztfttr( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_double* arf, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_ctfttr( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_float* arf, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_dtpttf( char* transr, char* uplo, lapack_int* n, const double* ap,
+                    double* arf, lapack_int *info );
+void LAPACK_stpttf( char* transr, char* uplo, lapack_int* n, const float* ap,
+                    float* arf, lapack_int *info );
+void LAPACK_ztpttf( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_double* ap, lapack_complex_double* arf,
+                    lapack_int *info );
+void LAPACK_ctpttf( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_float* ap, lapack_complex_float* arf,
+                    lapack_int *info );
+void LAPACK_dtpttr( char* uplo, lapack_int* n, const double* ap, double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_stpttr( char* uplo, lapack_int* n, const float* ap, float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_ztpttr( char* uplo, lapack_int* n, const lapack_complex_double* ap,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_ctpttr( char* uplo, lapack_int* n, const lapack_complex_float* ap,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dtrttf( char* transr, char* uplo, lapack_int* n, const double* a,
+                    lapack_int* lda, double* arf, lapack_int *info );
+void LAPACK_strttf( char* transr, char* uplo, lapack_int* n, const float* a,
+                    lapack_int* lda, float* arf, lapack_int *info );
+void LAPACK_ztrttf( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* arf, lapack_int *info );
+void LAPACK_ctrttf( char* transr, char* uplo, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* arf, lapack_int *info );
+void LAPACK_dtrttp( char* uplo, lapack_int* n, const double* a, lapack_int* lda,
+                    double* ap, lapack_int *info );
+void LAPACK_strttp( char* uplo, lapack_int* n, const float* a, lapack_int* lda,
+                    float* ap, lapack_int *info );
+void LAPACK_ztrttp( char* uplo, lapack_int* n, const lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* ap,
+                    lapack_int *info );
+void LAPACK_ctrttp( char* uplo, lapack_int* n, const lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* ap,
+                    lapack_int *info );
+void LAPACK_sgeqrfp( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                     float* tau, float* work, lapack_int* lwork,
+                     lapack_int *info );
+void LAPACK_dgeqrfp( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                     double* tau, double* work, lapack_int* lwork,
+                     lapack_int *info );
+void LAPACK_cgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                     lapack_int* lda, lapack_complex_float* tau,
+                     lapack_complex_float* work, lapack_int* lwork,
+                     lapack_int *info );
+void LAPACK_zgeqrfp( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                     lapack_int* lda, lapack_complex_double* tau,
+                     lapack_complex_double* work, lapack_int* lwork,
+                     lapack_int *info );
+void LAPACK_clacgv( lapack_int* n, lapack_complex_float* x, lapack_int* incx );
+void LAPACK_zlacgv( lapack_int* n, lapack_complex_double* x, lapack_int* incx );
+void LAPACK_slarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
+                    float* x );
+void LAPACK_dlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
+                    double* x );
+void LAPACK_clarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
+                    lapack_complex_float* x );
+void LAPACK_zlarnv( lapack_int* idist, lapack_int* iseed, lapack_int* n,
+                    lapack_complex_double* x );
+void LAPACK_sgeqr2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int *info );
+void LAPACK_dgeqr2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int *info );
+void LAPACK_cgeqr2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zgeqr2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_slacpy( char* uplo, lapack_int* m, lapack_int* n, const float* a,
+                    lapack_int* lda, float* b, lapack_int* ldb );
+void LAPACK_dlacpy( char* uplo, lapack_int* m, lapack_int* n, const double* a,
+                    lapack_int* lda, double* b, lapack_int* ldb );
+void LAPACK_clacpy( char* uplo, lapack_int* m, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb );
+void LAPACK_zlacpy( char* uplo, lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb );
+void LAPACK_sgetf2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_dgetf2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int* ipiv, lapack_int *info );
+void LAPACK_cgetf2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
+void LAPACK_zgetf2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* ipiv, lapack_int *info );
+void LAPACK_slaswp( lapack_int* n, float* a, lapack_int* lda, lapack_int* k1,
+                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );
+void LAPACK_dlaswp( lapack_int* n, double* a, lapack_int* lda, lapack_int* k1,
+                    lapack_int* k2, const lapack_int* ipiv, lapack_int* incx );
+void LAPACK_claswp( lapack_int* n, lapack_complex_float* a, lapack_int* lda,
+                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,
+                    lapack_int* incx );
+void LAPACK_zlaswp( lapack_int* n, lapack_complex_double* a, lapack_int* lda,
+                    lapack_int* k1, lapack_int* k2, const lapack_int* ipiv,
+                    lapack_int* incx );
+float LAPACK_slange( char* norm, lapack_int* m, lapack_int* n, const float* a,
+                    lapack_int* lda, float* work );
+double LAPACK_dlange( char* norm, lapack_int* m, lapack_int* n, const double* a,
+                    lapack_int* lda, double* work );
+float LAPACK_clange( char* norm, lapack_int* m, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda, float* work );
+double LAPACK_zlange( char* norm, lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda, double* work );
+float LAPACK_clanhe( char* norm, char* uplo, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda, float* work );
+double LAPACK_zlanhe( char* norm, char* uplo, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda, double* work );
+float LAPACK_slansy( char* norm, char* uplo, lapack_int* n, const float* a,
+                    lapack_int* lda, float* work );
+double LAPACK_dlansy( char* norm, char* uplo, lapack_int* n, const double* a,
+                    lapack_int* lda, double* work );
+float LAPACK_clansy( char* norm, char* uplo, lapack_int* n,
+                    const lapack_complex_float* a, lapack_int* lda, float* work );
+double LAPACK_zlansy( char* norm, char* uplo, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda, double* work );
+float LAPACK_slantr( char* norm, char* uplo, char* diag, lapack_int* m,
+                    lapack_int* n, const float* a, lapack_int* lda, float* work );
+double LAPACK_dlantr( char* norm, char* uplo, char* diag, lapack_int* m,
+                    lapack_int* n, const double* a, lapack_int* lda, double* work );
+float LAPACK_clantr( char* norm, char* uplo, char* diag, lapack_int* m,
+                    lapack_int* n, const lapack_complex_float* a, lapack_int* lda,
+                    float* work );
+double LAPACK_zlantr( char* norm, char* uplo, char* diag, lapack_int* m,
+                    lapack_int* n, const lapack_complex_double* a, lapack_int* lda,
+                    double* work );
+float LAPACK_slamch( char* cmach );
+double LAPACK_dlamch( char* cmach );
+void LAPACK_sgelq2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                    float* tau, float* work, lapack_int *info );
+void LAPACK_dgelq2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                    double* tau, double* work, lapack_int *info );
+void LAPACK_cgelq2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_complex_float* tau,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zgelq2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_complex_double* tau,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_slarfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k, const float* v,
+                    lapack_int* ldv, const float* t, lapack_int* ldt, float* c,
+                    lapack_int* ldc, float* work, lapack_int* ldwork );
+void LAPACK_dlarfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k,
+                    const double* v, lapack_int* ldv, const double* t,
+                    lapack_int* ldt, double* c, lapack_int* ldc, double* work,
+                    lapack_int* ldwork );
+void LAPACK_clarfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k,
+                    const lapack_complex_float* v, lapack_int* ldv,
+                    const lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work, lapack_int* ldwork );
+void LAPACK_zlarfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k,
+                    const lapack_complex_double* v, lapack_int* ldv,
+                    const lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work, lapack_int* ldwork );
+void LAPACK_slarfg( lapack_int* n, float* alpha, float* x, lapack_int* incx,
+                    float* tau );
+void LAPACK_dlarfg( lapack_int* n, double* alpha, double* x, lapack_int* incx,
+                    double* tau );
+void LAPACK_clarfg( lapack_int* n, lapack_complex_float* alpha,
+                    lapack_complex_float* x, lapack_int* incx,
+                    lapack_complex_float* tau );
+void LAPACK_zlarfg( lapack_int* n, lapack_complex_double* alpha,
+                    lapack_complex_double* x, lapack_int* incx,
+                    lapack_complex_double* tau );
+void LAPACK_slarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
+                    const float* v, lapack_int* ldv, const float* tau, float* t,
+                    lapack_int* ldt );
+void LAPACK_dlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
+                    const double* v, lapack_int* ldv, const double* tau,
+                    double* t, lapack_int* ldt );
+void LAPACK_clarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
+                    const lapack_complex_float* v, lapack_int* ldv,
+                    const lapack_complex_float* tau, lapack_complex_float* t,
+                    lapack_int* ldt );
+void LAPACK_zlarft( char* direct, char* storev, lapack_int* n, lapack_int* k,
+                    const lapack_complex_double* v, lapack_int* ldv,
+                    const lapack_complex_double* tau, lapack_complex_double* t,
+                    lapack_int* ldt );
+void LAPACK_slarfx( char* side, lapack_int* m, lapack_int* n, const float* v,
+                    float* tau, float* c, lapack_int* ldc, float* work );
+void LAPACK_dlarfx( char* side, lapack_int* m, lapack_int* n, const double* v,
+                    double* tau, double* c, lapack_int* ldc, double* work );
+void LAPACK_clarfx( char* side, lapack_int* m, lapack_int* n,
+                    const lapack_complex_float* v, lapack_complex_float* tau,
+                    lapack_complex_float* c, lapack_int* ldc,
+                    lapack_complex_float* work );
+void LAPACK_zlarfx( char* side, lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* v, lapack_complex_double* tau,
+                    lapack_complex_double* c, lapack_int* ldc,
+                    lapack_complex_double* work );
+void LAPACK_slatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
+                    char* sym, float* d, lapack_int* mode, float* cond,
+                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,
+                    float* a, lapack_int* lda, float* work, lapack_int *info );
+void LAPACK_dlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
+                    char* sym, double* d, lapack_int* mode, double* cond,
+                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,
+                    double* a, lapack_int* lda, double* work,
+                    lapack_int *info );
+void LAPACK_clatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
+                    char* sym, float* d, lapack_int* mode, float* cond,
+                    float* dmax, lapack_int* kl, lapack_int* ku, char* pack,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zlatms( lapack_int* m, lapack_int* n, char* dist, lapack_int* iseed,
+                    char* sym, double* d, lapack_int* mode, double* cond,
+                    double* dmax, lapack_int* kl, lapack_int* ku, char* pack,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_slag2d( lapack_int* m, lapack_int* n, const float* sa,
+                    lapack_int* ldsa, double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dlag2s( lapack_int* m, lapack_int* n, const double* a,
+                    lapack_int* lda, float* sa, lapack_int* ldsa,
+                    lapack_int *info );
+void LAPACK_clag2z( lapack_int* m, lapack_int* n,
+                    const lapack_complex_float* sa, lapack_int* ldsa,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_zlag2c( lapack_int* m, lapack_int* n,
+                    const lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_float* sa, lapack_int* ldsa,
+                    lapack_int *info );
+void LAPACK_slauum( char* uplo, lapack_int* n, float* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_dlauum( char* uplo, lapack_int* n, double* a, lapack_int* lda,
+                    lapack_int *info );
+void LAPACK_clauum( char* uplo, lapack_int* n, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_zlauum( char* uplo, lapack_int* n, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int *info );
+void LAPACK_slagge( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const float* d, float* a, lapack_int* lda,
+                    lapack_int* iseed, float* work, lapack_int *info );
+void LAPACK_dlagge( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const double* d, double* a, lapack_int* lda,
+                    lapack_int* iseed, double* work, lapack_int *info );
+void LAPACK_clagge( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const float* d, lapack_complex_float* a,
+                    lapack_int* lda, lapack_int* iseed,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zlagge( lapack_int* m, lapack_int* n, lapack_int* kl,
+                    lapack_int* ku, const double* d, lapack_complex_double* a,
+                    lapack_int* lda, lapack_int* iseed,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_slaset( char* uplo, lapack_int* m, lapack_int* n, float* alpha,
+                    float* beta, float* a, lapack_int* lda );
+void LAPACK_dlaset( char* uplo, lapack_int* m, lapack_int* n, double* alpha,
+                    double* beta, double* a, lapack_int* lda );
+void LAPACK_claset( char* uplo, lapack_int* m, lapack_int* n,
+                    lapack_complex_float* alpha, lapack_complex_float* beta,
+                    lapack_complex_float* a, lapack_int* lda );
+void LAPACK_zlaset( char* uplo, lapack_int* m, lapack_int* n,
+                    lapack_complex_double* alpha, lapack_complex_double* beta,
+                    lapack_complex_double* a, lapack_int* lda );
+void LAPACK_slasrt( char* id, lapack_int* n, float* d, lapack_int *info );
+void LAPACK_dlasrt( char* id, lapack_int* n, double* d, lapack_int *info );
+void LAPACK_claghe( lapack_int* n, lapack_int* k, const float* d,
+                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zlaghe( lapack_int* n, lapack_int* k, const double* d,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_int* iseed, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_slagsy( lapack_int* n, lapack_int* k, const float* d, float* a,
+                    lapack_int* lda, lapack_int* iseed, float* work,
+                    lapack_int *info );
+void LAPACK_dlagsy( lapack_int* n, lapack_int* k, const double* d, double* a,
+                    lapack_int* lda, lapack_int* iseed, double* work,
+                    lapack_int *info );
+void LAPACK_clagsy( lapack_int* n, lapack_int* k, const float* d,
+                    lapack_complex_float* a, lapack_int* lda, lapack_int* iseed,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zlagsy( lapack_int* n, lapack_int* k, const double* d,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_int* iseed, lapack_complex_double* work,
+                    lapack_int *info );
+void LAPACK_slapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
+                    float* x, lapack_int* ldx, lapack_int* k );
+void LAPACK_dlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
+                    double* x, lapack_int* ldx, lapack_int* k );
+void LAPACK_clapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
+                    lapack_complex_float* x, lapack_int* ldx, lapack_int* k );
+void LAPACK_zlapmr( lapack_logical* forwrd, lapack_int* m, lapack_int* n,
+                    lapack_complex_double* x, lapack_int* ldx, lapack_int* k );
+float LAPACK_slapy2( float* x, float* y );
+double LAPACK_dlapy2( double* x, double* y );
+float LAPACK_slapy3( float* x, float* y, float* z );
+double LAPACK_dlapy3( double* x, double* y, double* z );
+void LAPACK_slartgp( float* f, float* g, float* cs, float* sn, float* r );
+void LAPACK_dlartgp( double* f, double* g, double* cs, double* sn, double* r );
+void LAPACK_slartgs( float* x, float* y, float* sigma, float* cs, float* sn );
+void LAPACK_dlartgs( double* x, double* y, double* sigma, double* cs,
+                     double* sn );
+// LAPACK 3.3.0
+void LAPACK_cbbcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    float* theta, float* phi,
+                    lapack_complex_float* u1, lapack_int* ldu1,
+                    lapack_complex_float* u2, lapack_int* ldu2,
+                    lapack_complex_float* v1t, lapack_int* ldv1t,
+                    lapack_complex_float* v2t, lapack_int* ldv2t,
+                    float* b11d, float* b11e, float* b12d,
+                    float* b12e, float* b21d, float* b21e,
+                    float* b22d, float* b22e, float* rwork,
+                    lapack_int* lrwork , lapack_int *info );
+void LAPACK_cheswapr( char* uplo, lapack_int* n,
+                      lapack_complex_float* a, lapack_int* i1,
+                      lapack_int* i2 );
+void LAPACK_chetri2( char* uplo, lapack_int* n,
+                     lapack_complex_float* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_chetri2x( char* uplo, lapack_int* n,
+                      lapack_complex_float* a, lapack_int* lda,
+                      const lapack_int* ipiv,
+                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );
+void LAPACK_chetrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs, const lapack_complex_float* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* work , lapack_int *info );
+void LAPACK_csyconv( char* uplo, char* way,
+                     lapack_int* n, lapack_complex_float* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     lapack_complex_float* work , lapack_int *info );
+void LAPACK_csyswapr( char* uplo, lapack_int* n,
+                      lapack_complex_float* a, lapack_int* i1,
+                      lapack_int* i2 );
+void LAPACK_csytri2( char* uplo, lapack_int* n,
+                     lapack_complex_float* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_csytri2x( char* uplo, lapack_int* n,
+                      lapack_complex_float* a, lapack_int* lda,
+                      const lapack_int* ipiv,
+                      lapack_complex_float* work, lapack_int* nb , lapack_int *info );
+void LAPACK_csytrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs, const lapack_complex_float* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* work , lapack_int *info );
+void LAPACK_cunbdb( char* trans, char* signs,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    lapack_complex_float* x11, lapack_int* ldx11,
+                    lapack_complex_float* x12, lapack_int* ldx12,
+                    lapack_complex_float* x21, lapack_int* ldx21,
+                    lapack_complex_float* x22, lapack_int* ldx22,
+                    float* theta, float* phi,
+                    lapack_complex_float* taup1,
+                    lapack_complex_float* taup2,
+                    lapack_complex_float* tauq1,
+                    lapack_complex_float* tauq2,
+                    lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_cuncsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    char* signs, lapack_int* m, lapack_int* p,
+                    lapack_int* q, lapack_complex_float* x11,
+                    lapack_int* ldx11, lapack_complex_float* x12,
+                    lapack_int* ldx12, lapack_complex_float* x21,
+                    lapack_int* ldx21, lapack_complex_float* x22,
+                    lapack_int* ldx22, float* theta,
+                    lapack_complex_float* u1, lapack_int* ldu1,
+                    lapack_complex_float* u2, lapack_int* ldu2,
+                    lapack_complex_float* v1t, lapack_int* ldv1t,
+                    lapack_complex_float* v2t, lapack_int* ldv2t,
+                    lapack_complex_float* work, lapack_int* lwork,
+                    float* rwork, lapack_int* lrwork,
+                    lapack_int* iwork , lapack_int *info );
+void LAPACK_dbbcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    double* theta, double* phi, double* u1,
+                    lapack_int* ldu1, double* u2, lapack_int* ldu2,
+                    double* v1t, lapack_int* ldv1t, double* v2t,
+                    lapack_int* ldv2t, double* b11d, double* b11e,
+                    double* b12d, double* b12e, double* b21d,
+                    double* b21e, double* b22d, double* b22e,
+                    double* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_dorbdb( char* trans, char* signs,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    double* x11, lapack_int* ldx11, double* x12,
+                    lapack_int* ldx12, double* x21, lapack_int* ldx21,
+                    double* x22, lapack_int* ldx22, double* theta,
+                    double* phi, double* taup1, double* taup2,
+                    double* tauq1, double* tauq2, double* work,
+                    lapack_int* lwork , lapack_int *info );
+void LAPACK_dorcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    char* signs, lapack_int* m, lapack_int* p,
+                    lapack_int* q, double* x11, lapack_int* ldx11,
+                    double* x12, lapack_int* ldx12, double* x21,
+                    lapack_int* ldx21, double* x22, lapack_int* ldx22,
+                    double* theta, double* u1, lapack_int* ldu1,
+                    double* u2, lapack_int* ldu2, double* v1t,
+                    lapack_int* ldv1t, double* v2t, lapack_int* ldv2t,
+                    double* work, lapack_int* lwork,
+                    lapack_int* iwork , lapack_int *info );
+void LAPACK_dsyconv( char* uplo, char* way,
+                     lapack_int* n, double* a, lapack_int* lda,
+                     const lapack_int* ipiv, double* work , lapack_int *info );
+void LAPACK_dsyswapr( char* uplo, lapack_int* n,
+                      double* a, lapack_int* i1, lapack_int* i2 );
+void LAPACK_dsytri2( char* uplo, lapack_int* n,
+                     double* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_dsytri2x( char* uplo, lapack_int* n,
+                      double* a, lapack_int* lda,
+                      const lapack_int* ipiv, double* work,
+                      lapack_int* nb , lapack_int *info );
+void LAPACK_dsytrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs, const double* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     double* b, lapack_int* ldb, double* work , lapack_int *info );
+void LAPACK_sbbcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    float* theta, float* phi, float* u1,
+                    lapack_int* ldu1, float* u2, lapack_int* ldu2,
+                    float* v1t, lapack_int* ldv1t, float* v2t,
+                    lapack_int* ldv2t, float* b11d, float* b11e,
+                    float* b12d, float* b12e, float* b21d,
+                    float* b21e, float* b22d, float* b22e,
+                    float* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_sorbdb( char* trans, char* signs,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    float* x11, lapack_int* ldx11, float* x12,
+                    lapack_int* ldx12, float* x21, lapack_int* ldx21,
+                    float* x22, lapack_int* ldx22, float* theta,
+                    float* phi, float* taup1, float* taup2,
+                    float* tauq1, float* tauq2, float* work,
+                    lapack_int* lwork , lapack_int *info );
+void LAPACK_sorcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    char* signs, lapack_int* m, lapack_int* p,
+                    lapack_int* q, float* x11, lapack_int* ldx11,
+                    float* x12, lapack_int* ldx12, float* x21,
+                    lapack_int* ldx21, float* x22, lapack_int* ldx22,
+                    float* theta, float* u1, lapack_int* ldu1,
+                    float* u2, lapack_int* ldu2, float* v1t,
+                    lapack_int* ldv1t, float* v2t, lapack_int* ldv2t,
+                    float* work, lapack_int* lwork,
+                    lapack_int* iwork , lapack_int *info );
+void LAPACK_ssyconv( char* uplo, char* way,
+                     lapack_int* n, float* a, lapack_int* lda,
+                     const lapack_int* ipiv, float* work , lapack_int *info );
+void LAPACK_ssyswapr( char* uplo, lapack_int* n,
+                      float* a, lapack_int* i1, lapack_int* i2 );
+void LAPACK_ssytri2( char* uplo, lapack_int* n,
+                     float* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_float* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_ssytri2x( char* uplo, lapack_int* n,
+                      float* a, lapack_int* lda,
+                      const lapack_int* ipiv, float* work,
+                      lapack_int* nb , lapack_int *info );
+void LAPACK_ssytrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs, const float* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     float* b, lapack_int* ldb, float* work , lapack_int *info );
+void LAPACK_zbbcsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    double* theta, double* phi,
+                    lapack_complex_double* u1, lapack_int* ldu1,
+                    lapack_complex_double* u2, lapack_int* ldu2,
+                    lapack_complex_double* v1t, lapack_int* ldv1t,
+                    lapack_complex_double* v2t, lapack_int* ldv2t,
+                    double* b11d, double* b11e, double* b12d,
+                    double* b12e, double* b21d, double* b21e,
+                    double* b22d, double* b22e, double* rwork,
+                    lapack_int* lrwork , lapack_int *info );
+void LAPACK_zheswapr( char* uplo, lapack_int* n,
+                      lapack_complex_double* a, lapack_int* i1,
+                      lapack_int* i2 );
+void LAPACK_zhetri2( char* uplo, lapack_int* n,
+                     lapack_complex_double* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_zhetri2x( char* uplo, lapack_int* n,
+                      lapack_complex_double* a, lapack_int* lda,
+                      const lapack_int* ipiv,
+                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );
+void LAPACK_zhetrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* work , lapack_int *info );
+void LAPACK_zsyconv( char* uplo, char* way,
+                     lapack_int* n, lapack_complex_double* a,
+                     lapack_int* lda, const lapack_int* ipiv,
+                     lapack_complex_double* work , lapack_int *info );
+void LAPACK_zsyswapr( char* uplo, lapack_int* n,
+                      lapack_complex_double* a, lapack_int* i1,
+                      lapack_int* i2 );
+void LAPACK_zsytri2( char* uplo, lapack_int* n,
+                     lapack_complex_double* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_zsytri2x( char* uplo, lapack_int* n,
+                      lapack_complex_double* a, lapack_int* lda,
+                      const lapack_int* ipiv,
+                      lapack_complex_double* work, lapack_int* nb , lapack_int *info );
+void LAPACK_zsytrs2( char* uplo, lapack_int* n,
+                     lapack_int* nrhs,
+                     const lapack_complex_double* a, lapack_int* lda,
+                     const lapack_int* ipiv,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* work , lapack_int *info );
+void LAPACK_zunbdb( char* trans, char* signs,
+                    lapack_int* m, lapack_int* p, lapack_int* q,
+                    lapack_complex_double* x11, lapack_int* ldx11,
+                    lapack_complex_double* x12, lapack_int* ldx12,
+                    lapack_complex_double* x21, lapack_int* ldx21,
+                    lapack_complex_double* x22, lapack_int* ldx22,
+                    double* theta, double* phi,
+                    lapack_complex_double* taup1,
+                    lapack_complex_double* taup2,
+                    lapack_complex_double* tauq1,
+                    lapack_complex_double* tauq2,
+                    lapack_complex_double* work, lapack_int* lwork , lapack_int *info );
+void LAPACK_zuncsd( char* jobu1, char* jobu2,
+                    char* jobv1t, char* jobv2t, char* trans,
+                    char* signs, lapack_int* m, lapack_int* p,
+                    lapack_int* q, lapack_complex_double* x11,
+                    lapack_int* ldx11, lapack_complex_double* x12,
+                    lapack_int* ldx12, lapack_complex_double* x21,
+                    lapack_int* ldx21, lapack_complex_double* x22,
+                    lapack_int* ldx22, double* theta,
+                    lapack_complex_double* u1, lapack_int* ldu1,
+                    lapack_complex_double* u2, lapack_int* ldu2,
+                    lapack_complex_double* v1t, lapack_int* ldv1t,
+                    lapack_complex_double* v2t, lapack_int* ldv2t,
+                    lapack_complex_double* work, lapack_int* lwork,
+                    double* rwork, lapack_int* lrwork,
+                    lapack_int* iwork , lapack_int *info );
+// LAPACK 3.4.0
+void LAPACK_sgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* nb, const float* v,
+                     lapack_int* ldv, const float* t, lapack_int* ldt, float* c,
+                     lapack_int* ldc, float* work, lapack_int *info );
+void LAPACK_dgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* nb, const double* v,
+                     lapack_int* ldv, const double* t, lapack_int* ldt,
+                     double* c, lapack_int* ldc, double* work,
+                     lapack_int *info );
+void LAPACK_cgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* nb,
+                     const lapack_complex_float* v, lapack_int* ldv,
+                     const lapack_complex_float* t, lapack_int* ldt,
+                     lapack_complex_float* c, lapack_int* ldc,
+                     lapack_complex_float* work, lapack_int *info );
+void LAPACK_zgemqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* nb,
+                     const lapack_complex_double* v, lapack_int* ldv,
+                     const lapack_complex_double* t, lapack_int* ldt,
+                     lapack_complex_double* c, lapack_int* ldc,
+                     lapack_complex_double* work, lapack_int *info );
+void LAPACK_sgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, float* a,
+                    lapack_int* lda, float* t, lapack_int* ldt, float* work,
+                    lapack_int *info );
+void LAPACK_dgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb, double* a,
+                    lapack_int* lda, double* t, lapack_int* ldt, double* work,
+                    lapack_int *info );
+void LAPACK_cgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_zgeqrt( lapack_int* m, lapack_int* n, lapack_int* nb,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_sgeqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                     float* t, lapack_int* ldt, lapack_int *info );
+void LAPACK_dgeqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                     double* t, lapack_int* ldt, lapack_int *info );
+void LAPACK_cgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_zgeqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_sgeqrt3( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                     float* t, lapack_int* ldt, lapack_int *info );
+void LAPACK_dgeqrt3( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                     double* t, lapack_int* ldt, lapack_int *info );
+void LAPACK_cgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                     lapack_int* lda, lapack_complex_float* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_zgeqrt3( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                     lapack_int* lda, lapack_complex_double* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_stpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* l, lapack_int* nb,
+                     const float* v, lapack_int* ldv, const float* t,
+                     lapack_int* ldt, float* a, lapack_int* lda, float* b,
+                     lapack_int* ldb, float* work, lapack_int *info );
+void LAPACK_dtpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* l, lapack_int* nb,
+                     const double* v, lapack_int* ldv, const double* t,
+                     lapack_int* ldt, double* a, lapack_int* lda, double* b,
+                     lapack_int* ldb, double* work, lapack_int *info );
+void LAPACK_ctpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* l, lapack_int* nb,
+                     const lapack_complex_float* v, lapack_int* ldv,
+                     const lapack_complex_float* t, lapack_int* ldt,
+                     lapack_complex_float* a, lapack_int* lda,
+                     lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* work, lapack_int *info );
+void LAPACK_ztpmqrt( char* side, char* trans, lapack_int* m, lapack_int* n,
+                     lapack_int* k, lapack_int* l, lapack_int* nb,
+                     const lapack_complex_double* v, lapack_int* ldv,
+                     const lapack_complex_double* t, lapack_int* ldt,
+                     lapack_complex_double* a, lapack_int* lda,
+                     lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* work, lapack_int *info );
+void LAPACK_dtpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
+                    double* a, lapack_int* lda, double* b, lapack_int* ldb,
+                    double* t, lapack_int* ldt, double* work,
+                    lapack_int *info );
+void LAPACK_ctpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* t, lapack_complex_float* b,
+                    lapack_int* ldb, lapack_int* ldt,
+                    lapack_complex_float* work, lapack_int *info );
+void LAPACK_ztpqrt( lapack_int* m, lapack_int* n, lapack_int* l, lapack_int* nb,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* work, lapack_int *info );
+void LAPACK_stpqrt2( lapack_int* m, lapack_int* n, float* a, lapack_int* lda,
+                     float* b, lapack_int* ldb, float* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_dtpqrt2( lapack_int* m, lapack_int* n, double* a, lapack_int* lda,
+                     double* b, lapack_int* ldb, double* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_ctpqrt2( lapack_int* m, lapack_int* n, lapack_complex_float* a,
+                     lapack_int* lda, lapack_complex_float* b, lapack_int* ldb,
+                     lapack_complex_float* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_ztpqrt2( lapack_int* m, lapack_int* n, lapack_complex_double* a,
+                     lapack_int* lda, lapack_complex_double* b, lapack_int* ldb,
+                     lapack_complex_double* t, lapack_int* ldt,
+                     lapack_int *info );
+void LAPACK_stprfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
+                    const float* v, lapack_int* ldv, const float* t,
+                    lapack_int* ldt, float* a, lapack_int* lda, float* b,
+                    lapack_int* ldb, const float* mywork,
+                    lapack_int* myldwork );
+void LAPACK_dtprfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
+                    const double* v, lapack_int* ldv, const double* t,
+                    lapack_int* ldt, double* a, lapack_int* lda, double* b,
+                    lapack_int* ldb, const double* mywork,
+                    lapack_int* myldwork );
+void LAPACK_ctprfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
+                    const lapack_complex_float* v, lapack_int* ldv,
+                    const lapack_complex_float* t, lapack_int* ldt,
+                    lapack_complex_float* a, lapack_int* lda,
+                    lapack_complex_float* b, lapack_int* ldb,
+                    const float* mywork, lapack_int* myldwork );
+void LAPACK_ztprfb( char* side, char* trans, char* direct, char* storev,
+                    lapack_int* m, lapack_int* n, lapack_int* k, lapack_int* l,
+                    const lapack_complex_double* v, lapack_int* ldv,
+                    const lapack_complex_double* t, lapack_int* ldt,
+                    lapack_complex_double* a, lapack_int* lda,
+                    lapack_complex_double* b, lapack_int* ldb,
+                    const double* mywork, lapack_int* myldwork );
+// LAPACK 3.X.X
+void LAPACK_csyr( char* uplo, lapack_int* n, lapack_complex_float* alpha,
+                      const lapack_complex_float* x, lapack_int* incx,
+                      lapack_complex_float* a, lapack_int* lda );
+void LAPACK_zsyr( char* uplo, lapack_int* n, lapack_complex_double* alpha,
+                      const lapack_complex_double* x, lapack_int* incx,
+                      lapack_complex_double* a, lapack_int* lda );
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _LAPACKE_H_ */
+
+#endif /* _MKL_LAPACKE_H_ */
diff --git a/third-party/Eigen/src/misc/lapacke_mangling.h b/third-party/Eigen/src/misc/lapacke_mangling.h
new file mode 100644
index 00000000..6211fd14
--- /dev/null
+++ b/third-party/Eigen/src/misc/lapacke_mangling.h
@@ -0,0 +1,17 @@
+#ifndef LAPACK_HEADER_INCLUDED
+#define LAPACK_HEADER_INCLUDED
+
+#ifndef LAPACK_GLOBAL
+#if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_)
+#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
+#elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER)
+#define LAPACK_GLOBAL(lcname,UCNAME)  UCNAME
+#elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
+#define LAPACK_GLOBAL(lcname,UCNAME)  lcname
+#else
+#define LAPACK_GLOBAL(lcname,UCNAME)  lcname##_
+#endif
+#endif
+
+#endif
+
diff --git a/third-party/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/third-party/Eigen/src/plugins/ArrayCwiseBinaryOps.h
new file mode 100644
index 00000000..05a7449b
--- /dev/null
+++ b/third-party/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -0,0 +1,332 @@
+
+/** \returns an expression of the coefficient wise product of \c *this and \a other
+  *
+  * \sa MatrixBase::cwiseProduct
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient wise quotient of \c *this and \a other
+  *
+  * \sa MatrixBase::cwiseQuotient
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>
+operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise min of \c *this and \a other
+  *
+  * Example: \include Cwise_min.cpp
+  * Output: \verbinclude Cwise_min.out
+  *
+  * \sa max()
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(min,min)
+
+/** \returns an expression of the coefficient-wise min of \c *this and scalar \a other
+  *
+  * \sa max()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
+                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+min
+#else
+(min)
+#endif
+(const Scalar &other) const
+{
+  return (min)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+
+/** \returns an expression of the coefficient-wise max of \c *this and \a other
+  *
+  * Example: \include Cwise_max.cpp
+  * Output: \verbinclude Cwise_max.out
+  *
+  * \sa min()
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(max,max)
+
+/** \returns an expression of the coefficient-wise max of \c *this and scalar \a other
+  *
+  * \sa min()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
+                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+max
+#else
+(max)
+#endif
+(const Scalar &other) const
+{
+  return (max)(Derived::PlainObject::Constant(rows(), cols(), other));
+}
+
+/** \returns an expression of the coefficient-wise power of \c *this to the given array of \a exponents.
+  *
+  * This function computes the coefficient-wise power.
+  *
+  * Example: \include Cwise_array_power_array.cpp
+  * Output: \verbinclude Cwise_array_power_array.out
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
+#else
+/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent
+  *
+  * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression.
+  *
+  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
+  * unsupported module MatrixFunctions computes the matrix power.
+  *
+  * Example: \include Cwise_pow.cpp
+  * Output: \verbinclude Cwise_pow.out
+  *
+  * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log()
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
+#endif
+
+
+// TODO code generating macros could be moved to Macros.h and could include generation of documentation
+#define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
+}\
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+  return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
+} \
+EIGEN_DEVICE_FUNC friend EIGEN_STRONG_INLINE const RCmp ## COMPARATOR ## ReturnType \
+OP(const Scalar& s, const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& d) { \
+  return Derived::PlainObject::Constant(d.rows(), d.cols(), s).OP(d); \
+}
+
+#define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \
+template<typename OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
+OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+{ \
+  return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
+} \
+EIGEN_DEVICE_FUNC \
+inline const RCmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s) const { \
+  return Derived::PlainObject::Constant(rows(), cols(), s).R_OP(*this); \
+} \
+friend inline const Cmp ## RCOMPARATOR ## ReturnType \
+OP(const Scalar& s, const Derived& d) { \
+  return d.R_OP(Derived::PlainObject::Constant(d.rows(), d.cols(), s)); \
+}
+
+
+
+/** \returns an expression of the coefficient-wise \< operator of *this and \a other
+  *
+  * Example: \include Cwise_less.cpp
+  * Output: \verbinclude Cwise_less.out
+  *
+  * \sa all(), any(), operator>(), operator<=()
+  */
+EIGEN_MAKE_CWISE_COMP_OP(operator<, LT)
+
+/** \returns an expression of the coefficient-wise \<= operator of *this and \a other
+  *
+  * Example: \include Cwise_less_equal.cpp
+  * Output: \verbinclude Cwise_less_equal.out
+  *
+  * \sa all(), any(), operator>=(), operator<()
+  */
+EIGEN_MAKE_CWISE_COMP_OP(operator<=, LE)
+
+/** \returns an expression of the coefficient-wise \> operator of *this and \a other
+  *
+  * Example: \include Cwise_greater.cpp
+  * Output: \verbinclude Cwise_greater.out
+  *
+  * \sa all(), any(), operator>=(), operator<()
+  */
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>, operator<, LT)
+
+/** \returns an expression of the coefficient-wise \>= operator of *this and \a other
+  *
+  * Example: \include Cwise_greater_equal.cpp
+  * Output: \verbinclude Cwise_greater_equal.out
+  *
+  * \sa all(), any(), operator>(), operator<=()
+  */
+EIGEN_MAKE_CWISE_COMP_R_OP(operator>=, operator<=, LE)
+
+/** \returns an expression of the coefficient-wise == operator of *this and \a other
+  *
+  * \warning this performs an exact comparison, which is generally a bad idea with floating-point types.
+  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is
+  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and
+  * isMuchSmallerThan().
+  *
+  * Example: \include Cwise_equal_equal.cpp
+  * Output: \verbinclude Cwise_equal_equal.out
+  *
+  * \sa all(), any(), isApprox(), isMuchSmallerThan()
+  */
+EIGEN_MAKE_CWISE_COMP_OP(operator==, EQ)
+
+/** \returns an expression of the coefficient-wise != operator of *this and \a other
+  *
+  * \warning this performs an exact comparison, which is generally a bad idea with floating-point types.
+  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is
+  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and
+  * isMuchSmallerThan().
+  *
+  * Example: \include Cwise_not_equal.cpp
+  * Output: \verbinclude Cwise_not_equal.out
+  *
+  * \sa all(), any(), isApprox(), isMuchSmallerThan()
+  */
+EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)
+
+
+#undef EIGEN_MAKE_CWISE_COMP_OP
+#undef EIGEN_MAKE_CWISE_COMP_R_OP
+
+// scalar addition
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)
+#else
+/** \returns an expression of \c *this with each coeff incremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  *
+  * Example: \include Cwise_plus.cpp
+  * Output: \verbinclude Cwise_plus.out
+  *
+  * \sa operator+=(), operator-()
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;
+/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);
+#endif
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)
+#else
+/** \returns an expression of \c *this with each coeff decremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  *
+  * Example: \include Cwise_minus.cpp
+  * Output: \verbinclude Cwise_minus.out
+  *
+  * \sa operator+=(), operator-()
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;
+/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);
+#endif
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)
+#else
+  /**
+    * \brief Component-wise division of the scalar \a s by array elements of \a a.
+    *
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
+    */
+  template<typename T> friend
+  inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>
+  operator/(const T& s,const StorageBaseType& a);
+#endif
+
+/** \returns an expression of the coefficient-wise ^ operator of *this and \a other
+ *
+ * \warning this operator is for expression of bool only.
+ *
+ * Example: \include Cwise_boolean_xor.cpp
+ * Output: \verbinclude Cwise_boolean_xor.out
+ *
+ * \sa operator&&(), select()
+ */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>
+operator^(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+  return CwiseBinaryOp<internal::scalar_boolean_xor_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+
+// NOTE disabled until we agree on argument order
+#if 0
+/** \cpp11 \returns an expression of the coefficient-wise polygamma function.
+  *
+  * \specialfunctions_module
+  *
+  * It returns the \a n -th derivative of the digamma(psi) evaluated at \c *this.
+  *
+  * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x)
+  *
+  * \sa Eigen::polygamma()
+  */
+template<typename DerivedN>
+inline const CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>
+polygamma(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedN> &n) const
+{
+  return CwiseBinaryOp<internal::scalar_polygamma_op<Scalar>, const DerivedN, const Derived>(n.derived(), this->derived());
+}
+#endif
+
+/** \returns an expression of the coefficient-wise zeta function.
+  *
+  * \specialfunctions_module
+  *
+  * It returns the Riemann zeta function of two arguments \c *this and \a q:
+  *
+  * \param *this is the exposent, it must be > 1
+  * \param q is the shift, it must be > 0
+  *
+  * \note This function supports only float and double scalar types. To support other scalar types, the user has
+  * to provide implementations of zeta(T,T) for any scalar type T to be supported.
+  *
+  * This method is an alias for zeta(*this,q);
+  *
+  * \sa Eigen::zeta()
+  */
+template<typename DerivedQ>
+inline const CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>
+zeta(const EIGEN_CURRENT_STORAGE_BASE_CLASS<DerivedQ> &q) const
+{
+  return CwiseBinaryOp<internal::scalar_zeta_op<Scalar>, const Derived, const DerivedQ>(this->derived(), q.derived());
+}
diff --git a/third-party/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/third-party/Eigen/src/plugins/ArrayCwiseUnaryOps.h
new file mode 100644
index 00000000..ebaa3f19
--- /dev/null
+++ b/third-party/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -0,0 +1,552 @@
+
+
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> AbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_arg_op<Scalar>, const Derived> ArgReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> Abs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> SqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_rsqrt_op<Scalar>, const Derived> RsqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> SignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> InverseReturnType;
+typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;
+
+typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
+typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
+typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
+typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;
+typedef CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived> CosReturnType;
+typedef CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived> SinReturnType;
+typedef CwiseUnaryOp<internal::scalar_tan_op<Scalar>, const Derived> TanReturnType;
+typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturnType;
+typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
+typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
+typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
+typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
+typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
+typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
+typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
+typedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;
+typedef CwiseUnaryOp<internal::scalar_floor_op<Scalar>, const Derived> FloorReturnType;
+typedef CwiseUnaryOp<internal::scalar_ceil_op<Scalar>, const Derived> CeilReturnType;
+typedef CwiseUnaryOp<internal::scalar_isnan_op<Scalar>, const Derived> IsNaNReturnType;
+typedef CwiseUnaryOp<internal::scalar_isinf_op<Scalar>, const Derived> IsInfReturnType;
+typedef CwiseUnaryOp<internal::scalar_isfinite_op<Scalar>, const Derived> IsFiniteReturnType;
+
+/** \returns an expression of the coefficient-wise absolute value of \c *this
+  *
+  * Example: \include Cwise_abs.cpp
+  * Output: \verbinclude Cwise_abs.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_abs">Math functions</a>, abs2()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const AbsReturnType
+abs() const
+{
+  return AbsReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise phase angle of \c *this
+  *
+  * Example: \include Cwise_arg.cpp
+  * Output: \verbinclude Cwise_arg.out
+  *
+  * \sa abs()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const ArgReturnType
+arg() const
+{
+  return ArgReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise squared absolute value of \c *this
+  *
+  * Example: \include Cwise_abs2.cpp
+  * Output: \verbinclude Cwise_abs2.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_abs2">Math functions</a>, abs(), square()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const Abs2ReturnType
+abs2() const
+{
+  return Abs2ReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise exponential of *this.
+  *
+  * This function computes the coefficient-wise exponential. The function MatrixBase::exp() in the
+  * unsupported module MatrixFunctions computes the matrix exponential.
+  *
+  * Example: \include Cwise_exp.cpp
+  * Output: \verbinclude Cwise_exp.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_exp">Math functions</a>, pow(), log(), sin(), cos()
+  */
+EIGEN_DEVICE_FUNC
+inline const ExpReturnType
+exp() const
+{
+  return ExpReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise logarithm of *this.
+  *
+  * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the
+  * unsupported module MatrixFunctions computes the matrix logarithm.
+  *
+  * Example: \include Cwise_log.cpp
+  * Output: \verbinclude Cwise_log.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log">Math functions</a>, exp()
+  */
+EIGEN_DEVICE_FUNC
+inline const LogReturnType
+log() const
+{
+  return LogReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise logarithm of 1 plus \c *this.
+  *
+  * In exact arithmetic, \c x.log() is equivalent to \c (x+1).log(),
+  * however, with finite precision, this function is much more accurate when \c x is close to zero.
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log1p">Math functions</a>, log()
+  */
+EIGEN_DEVICE_FUNC
+inline const Log1pReturnType
+log1p() const
+{
+  return Log1pReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise base-10 logarithm of *this.
+  *
+  * This function computes the coefficient-wise base-10 logarithm.
+  *
+  * Example: \include Cwise_log10.cpp
+  * Output: \verbinclude Cwise_log10.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log10">Math functions</a>, log()
+  */
+EIGEN_DEVICE_FUNC
+inline const Log10ReturnType
+log10() const
+{
+  return Log10ReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise square root of *this.
+  *
+  * This function computes the coefficient-wise square root. The function MatrixBase::sqrt() in the
+  * unsupported module MatrixFunctions computes the matrix square root.
+  *
+  * Example: \include Cwise_sqrt.cpp
+  * Output: \verbinclude Cwise_sqrt.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_sqrt">Math functions</a>, pow(), square()
+  */
+EIGEN_DEVICE_FUNC
+inline const SqrtReturnType
+sqrt() const
+{
+  return SqrtReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise inverse square root of *this.
+  *
+  * This function computes the coefficient-wise inverse square root.
+  *
+  * Example: \include Cwise_sqrt.cpp
+  * Output: \verbinclude Cwise_sqrt.out
+  *
+  * \sa pow(), square()
+  */
+EIGEN_DEVICE_FUNC
+inline const RsqrtReturnType
+rsqrt() const
+{
+  return RsqrtReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise signum of *this.
+  *
+  * This function computes the coefficient-wise signum.
+  *
+  * Example: \include Cwise_sign.cpp
+  * Output: \verbinclude Cwise_sign.out
+  *
+  * \sa pow(), square()
+  */
+EIGEN_DEVICE_FUNC
+inline const SignReturnType
+sign() const
+{
+  return SignReturnType(derived());
+}
+
+
+/** \returns an expression of the coefficient-wise cosine of *this.
+  *
+  * This function computes the coefficient-wise cosine. The function MatrixBase::cos() in the
+  * unsupported module MatrixFunctions computes the matrix cosine.
+  *
+  * Example: \include Cwise_cos.cpp
+  * Output: \verbinclude Cwise_cos.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_cos">Math functions</a>, sin(), acos()
+  */
+EIGEN_DEVICE_FUNC
+inline const CosReturnType
+cos() const
+{
+  return CosReturnType(derived());
+}
+
+
+/** \returns an expression of the coefficient-wise sine of *this.
+  *
+  * This function computes the coefficient-wise sine. The function MatrixBase::sin() in the
+  * unsupported module MatrixFunctions computes the matrix sine.
+  *
+  * Example: \include Cwise_sin.cpp
+  * Output: \verbinclude Cwise_sin.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_sin">Math functions</a>, cos(), asin()
+  */
+EIGEN_DEVICE_FUNC
+inline const SinReturnType
+sin() const
+{
+  return SinReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise tan of *this.
+  *
+  * Example: \include Cwise_tan.cpp
+  * Output: \verbinclude Cwise_tan.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_tan">Math functions</a>, cos(), sin()
+  */
+EIGEN_DEVICE_FUNC
+inline const TanReturnType
+tan() const
+{
+  return TanReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise arc tan of *this.
+  *
+  * Example: \include Cwise_atan.cpp
+  * Output: \verbinclude Cwise_atan.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_atan">Math functions</a>, tan(), asin(), acos()
+  */
+EIGEN_DEVICE_FUNC
+inline const AtanReturnType
+atan() const
+{
+  return AtanReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise arc cosine of *this.
+  *
+  * Example: \include Cwise_acos.cpp
+  * Output: \verbinclude Cwise_acos.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_acos">Math functions</a>, cos(), asin()
+  */
+EIGEN_DEVICE_FUNC
+inline const AcosReturnType
+acos() const
+{
+  return AcosReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise arc sine of *this.
+  *
+  * Example: \include Cwise_asin.cpp
+  * Output: \verbinclude Cwise_asin.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_asin">Math functions</a>, sin(), acos()
+  */
+EIGEN_DEVICE_FUNC
+inline const AsinReturnType
+asin() const
+{
+  return AsinReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise hyperbolic tan of *this.
+  *
+  * Example: \include Cwise_tanh.cpp
+  * Output: \verbinclude Cwise_tanh.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_tanh">Math functions</a>, tan(), sinh(), cosh()
+  */
+EIGEN_DEVICE_FUNC
+inline const TanhReturnType
+tanh() const
+{
+  return TanhReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise hyperbolic sin of *this.
+  *
+  * Example: \include Cwise_sinh.cpp
+  * Output: \verbinclude Cwise_sinh.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_sinh">Math functions</a>, sin(), tanh(), cosh()
+  */
+EIGEN_DEVICE_FUNC
+inline const SinhReturnType
+sinh() const
+{
+  return SinhReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise hyperbolic cos of *this.
+  *
+  * Example: \include Cwise_cosh.cpp
+  * Output: \verbinclude Cwise_cosh.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_cosh">Math functions</a>, tan(), sinh(), cosh()
+  */
+EIGEN_DEVICE_FUNC
+inline const CoshReturnType
+cosh() const
+{
+  return CoshReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise inverse of *this.
+  *
+  * Example: \include Cwise_inverse.cpp
+  * Output: \verbinclude Cwise_inverse.out
+  *
+  * \sa operator/(), operator*()
+  */
+EIGEN_DEVICE_FUNC
+inline const InverseReturnType
+inverse() const
+{
+  return InverseReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise square of *this.
+  *
+  * Example: \include Cwise_square.cpp
+  * Output: \verbinclude Cwise_square.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_squareE">Math functions</a>, abs2(), cube(), pow()
+  */
+EIGEN_DEVICE_FUNC
+inline const SquareReturnType
+square() const
+{
+  return SquareReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise cube of *this.
+  *
+  * Example: \include Cwise_cube.cpp
+  * Output: \verbinclude Cwise_cube.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_cube">Math functions</a>, square(), pow()
+  */
+EIGEN_DEVICE_FUNC
+inline const CubeReturnType
+cube() const
+{
+  return CubeReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise round of *this.
+  *
+  * Example: \include Cwise_round.cpp
+  * Output: \verbinclude Cwise_round.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_round">Math functions</a>, ceil(), floor()
+  */
+EIGEN_DEVICE_FUNC
+inline const RoundReturnType
+round() const
+{
+  return RoundReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise floor of *this.
+  *
+  * Example: \include Cwise_floor.cpp
+  * Output: \verbinclude Cwise_floor.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_floor">Math functions</a>, ceil(), round()
+  */
+EIGEN_DEVICE_FUNC
+inline const FloorReturnType
+floor() const
+{
+  return FloorReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise ceil of *this.
+  *
+  * Example: \include Cwise_ceil.cpp
+  * Output: \verbinclude Cwise_ceil.out
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ceil">Math functions</a>, floor(), round()
+  */
+EIGEN_DEVICE_FUNC
+inline const CeilReturnType
+ceil() const
+{
+  return CeilReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise isnan of *this.
+  *
+  * Example: \include Cwise_isNaN.cpp
+  * Output: \verbinclude Cwise_isNaN.out
+  *
+  * \sa isfinite(), isinf()
+  */
+EIGEN_DEVICE_FUNC
+inline const IsNaNReturnType
+isNaN() const
+{
+  return IsNaNReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise isinf of *this.
+  *
+  * Example: \include Cwise_isInf.cpp
+  * Output: \verbinclude Cwise_isInf.out
+  *
+  * \sa isnan(), isfinite()
+  */
+EIGEN_DEVICE_FUNC
+inline const IsInfReturnType
+isInf() const
+{
+  return IsInfReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise isfinite of *this.
+  *
+  * Example: \include Cwise_isFinite.cpp
+  * Output: \verbinclude Cwise_isFinite.out
+  *
+  * \sa isnan(), isinf()
+  */
+EIGEN_DEVICE_FUNC
+inline const IsFiniteReturnType
+isFinite() const
+{
+  return IsFiniteReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise ! operator of *this
+  *
+  * \warning this operator is for expression of bool only.
+  *
+  * Example: \include Cwise_boolean_not.cpp
+  * Output: \verbinclude Cwise_boolean_not.out
+  *
+  * \sa operator!=()
+  */
+EIGEN_DEVICE_FUNC
+inline const BooleanNotReturnType
+operator!() const
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value),
+                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+  return BooleanNotReturnType(derived());
+}
+
+
+// --- SpecialFunctions module ---
+
+typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
+typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
+typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
+
+/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
+  *
+  * \specialfunctions_module
+  *
+  * Example: \include Cwise_lgamma.cpp
+  * Output: \verbinclude Cwise_lgamma.out
+  *
+  * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+  * or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
+  * type T to be supported.
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_lgamma">Math functions</a>, digamma()
+  */
+EIGEN_DEVICE_FUNC
+inline const LgammaReturnType
+lgamma() const
+{
+  return LgammaReturnType(derived());
+}
+
+/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma).
+  *
+  * \specialfunctions_module
+  *
+  * \note This function supports only float and double scalar types. To support other scalar types,
+  * the user has to provide implementations of digamma(T) for any scalar
+  * type T to be supported.
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_digamma">Math functions</a>, Eigen::digamma(), Eigen::polygamma(), lgamma()
+  */
+EIGEN_DEVICE_FUNC
+inline const DigammaReturnType
+digamma() const
+{
+  return DigammaReturnType(derived());
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise Gauss error
+  * function of *this.
+  *
+  * \specialfunctions_module
+  *
+  * Example: \include Cwise_erf.cpp
+  * Output: \verbinclude Cwise_erf.out
+  *
+  * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+  * or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
+  * type T to be supported.
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_erf">Math functions</a>, erfc()
+  */
+EIGEN_DEVICE_FUNC
+inline const ErfReturnType
+erf() const
+{
+  return ErfReturnType(derived());
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise Complementary error
+  * function of *this.
+  *
+  * \specialfunctions_module
+  *
+  * Example: \include Cwise_erfc.cpp
+  * Output: \verbinclude Cwise_erfc.out
+  *
+  * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+  * or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
+  * type T to be supported.
+  *
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_erfc">Math functions</a>, erf()
+  */
+EIGEN_DEVICE_FUNC
+inline const ErfcReturnType
+erfc() const
+{
+  return ErfcReturnType(derived());
+}
diff --git a/third-party/Eigen/src/plugins/BlockMethods.h b/third-party/Eigen/src/plugins/BlockMethods.h
new file mode 100644
index 00000000..ac35a008
--- /dev/null
+++ b/third-party/Eigen/src/plugins/BlockMethods.h
@@ -0,0 +1,1058 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+/// \internal expression type of a column */
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ConstColXpr;
+/// \internal expression type of a row */
+typedef Block<Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
+typedef const Block<const Derived, 1, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowXpr;
+/// \internal expression type of a block of whole columns */
+typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
+typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ConstColsBlockXpr;
+/// \internal expression type of a block of whole rows */
+typedef Block<Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
+typedef const Block<const Derived, Dynamic, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> ConstRowsBlockXpr;
+/// \internal expression type of a block of whole columns */
+template<int N> struct NColsBlockXpr { typedef Block<Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+template<int N> struct ConstNColsBlockXpr { typedef const Block<const Derived, internal::traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
+/// \internal expression type of a block of whole rows */
+template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+template<int N> struct ConstNRowsBlockXpr { typedef const Block<const Derived, N, internal::traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
+/// \internal expression of a block */
+typedef Block<Derived> BlockXpr;
+typedef const Block<const Derived> ConstBlockXpr;
+/// \internal expression of a block of fixed sizes */
+template<int Rows, int Cols> struct FixedBlockXpr { typedef Block<Derived,Rows,Cols> Type; };
+template<int Rows, int Cols> struct ConstFixedBlockXpr { typedef Block<const Derived,Rows,Cols> Type; };
+
+typedef VectorBlock<Derived> SegmentReturnType;
+typedef const VectorBlock<const Derived> ConstSegmentReturnType;
+template<int Size> struct FixedSegmentReturnType { typedef VectorBlock<Derived, Size> Type; };
+template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBlock<const Derived, Size> Type; };
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+/// \returns a dynamic-size expression of a block in *this.
+///
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+/// \param blockRows the number of rows in the block
+/// \param blockCols the number of columns in the block
+///
+/// Example: \include MatrixBase_block_int_int_int_int.cpp
+/// Output: \verbinclude MatrixBase_block_int_int_int_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
+{
+  return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+
+/// This is the const version of block(Index,Index,Index,Index). */
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
+{
+  return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
+}
+
+
+
+
+/// \returns a dynamic-size expression of a top-right corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_topRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_topRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline BlockXpr topRightCorner(Index cRows, Index cCols)
+{
+  return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+
+/// This is the const version of topRightCorner(Index, Index).
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
+{
+  return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
+}
+
+/// \returns an expression of a fixed-size top-right corner of *this.
+///
+/// \tparam CRows the number of rows in the corner
+/// \tparam CCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_template_int_int_topRightCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block<int,int>(Index,Index)
+///
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner()
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+
+/// This is the const version of topRightCorner<int, int>().
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
+}
+
+/// \returns an expression of a top-right corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param  cRows number of rows in corner as specified at run-time
+/// \param  cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_topRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols)
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+
+/// This is the const version of topRightCorner<int, int>(Index, Index).
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index cRows, Index cCols) const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - cCols, cRows, cCols);
+}
+
+
+
+/// \returns a dynamic-size expression of a top-left corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_topLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline BlockXpr topLeftCorner(Index cRows, Index cCols)
+{
+  return BlockXpr(derived(), 0, 0, cRows, cCols);
+}
+
+/// This is the const version of topLeftCorner(Index, Index).
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
+{
+  return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
+}
+
+/// \returns an expression of a fixed-size top-left corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_topLeftCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner()
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+
+/// This is the const version of topLeftCorner<int, int>().
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
+}
+
+/// \returns an expression of a top-left corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param  cRows number of rows in corner as specified at run-time
+/// \param  cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_topLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_topLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols)
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+
+/// This is the const version of topLeftCorner<int, int>(Index, Index).
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index cRows, Index cCols) const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0, cRows, cCols);
+}
+
+
+
+/// \returns a dynamic-size expression of a bottom-right corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_bottomRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
+{
+  return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+
+/// This is the const version of bottomRightCorner(Index, Index).
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
+{
+  return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+
+/// \returns an expression of a fixed-size bottom-right corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_bottomRightCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+
+/// This is the const version of bottomRightCorner<int, int>().
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
+}
+
+/// \returns an expression of a bottom-right corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param  cRows number of rows in corner as specified at run-time
+/// \param  cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_bottomRightCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomRightCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols)
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+
+/// This is the const version of bottomRightCorner<int, int>(Index, Index).
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(Index cRows, Index cCols) const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
+}
+
+
+
+/// \returns a dynamic-size expression of a bottom-left corner of *this.
+///
+/// \param cRows the number of rows in the corner
+/// \param cCols the number of columns in the corner
+///
+/// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
+{
+  return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+
+/// This is the const version of bottomLeftCorner(Index, Index).
+EIGEN_DEVICE_FUNC
+inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
+{
+  return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
+}
+
+/// \returns an expression of a fixed-size bottom-left corner of *this.
+///
+/// The template parameters CRows and CCols are the number of rows and columns in the corner.
+///
+/// Example: \include MatrixBase_template_int_int_bottomLeftCorner.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner()
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+
+/// This is the const version of bottomLeftCorner<int, int>().
+template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
+}
+
+/// \returns an expression of a bottom-left corner of *this.
+///
+/// \tparam CRows number of rows in corner as specified at compile-time
+/// \tparam CCols number of columns in corner as specified at compile-time
+/// \param  cRows number of rows in corner as specified at run-time
+/// \param  cCols number of columns in corner as specified at run-time
+///
+/// This function is mainly useful for corners where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a cRows should equal \a CRows unless
+/// \a CRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_bottomLeftCorner_int_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block
+///
+template<int CRows, int CCols>
+inline typename FixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols)
+{
+  return typename FixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+
+/// This is the const version of bottomLeftCorner<int, int>(Index, Index).
+template<int CRows, int CCols>
+inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Index cRows, Index cCols) const
+{
+  return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - cRows, 0, cRows, cCols);
+}
+
+
+
+/// \returns a block consisting of the top rows of *this.
+///
+/// \param n the number of rows in the block
+///
+/// Example: \include MatrixBase_topRows_int.cpp
+/// Output: \verbinclude MatrixBase_topRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr topRows(Index n)
+{
+  return RowsBlockXpr(derived(), 0, 0, n, cols());
+}
+
+/// This is the const version of topRows(Index).
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr topRows(Index n) const
+{
+  return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
+}
+
+/// \returns a block consisting of the top rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_topRows.cpp
+/// Output: \verbinclude MatrixBase_template_int_topRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
+{
+  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+
+/// This is the const version of topRows<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
+{
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
+}
+
+
+
+/// \returns a block consisting of the bottom rows of *this.
+///
+/// \param n the number of rows in the block
+///
+/// Example: \include MatrixBase_bottomRows_int.cpp
+/// Output: \verbinclude MatrixBase_bottomRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr bottomRows(Index n)
+{
+  return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+
+/// This is the const version of bottomRows(Index).
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr bottomRows(Index n) const
+{
+  return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
+}
+
+/// \returns a block consisting of the bottom rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_bottomRows.cpp
+/// Output: \verbinclude MatrixBase_template_int_bottomRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
+{
+  return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+
+/// This is the const version of bottomRows<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
+{
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
+}
+
+
+
+/// \returns a block consisting of a range of rows of *this.
+///
+/// \param startRow the index of the first row in the block
+/// \param n the number of rows in the block
+///
+/// Example: \include DenseBase_middleRows_int.cpp
+/// Output: \verbinclude DenseBase_middleRows_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline RowsBlockXpr middleRows(Index startRow, Index n)
+{
+  return RowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+
+/// This is the const version of middleRows(Index,Index).
+EIGEN_DEVICE_FUNC
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
+{
+  return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
+}
+
+/// \returns a block consisting of a range of rows of *this.
+///
+/// \tparam N the number of rows in the block as specified at compile-time
+/// \param startRow the index of the first row in the block
+/// \param n the number of rows in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include DenseBase_template_int_middleRows.cpp
+/// Output: \verbinclude DenseBase_template_int_middleRows.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
+{
+  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+
+/// This is the const version of middleRows<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
+{
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
+}
+
+
+
+/// \returns a block consisting of the left columns of *this.
+///
+/// \param n the number of columns in the block
+///
+/// Example: \include MatrixBase_leftCols_int.cpp
+/// Output: \verbinclude MatrixBase_leftCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr leftCols(Index n)
+{
+  return ColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+
+/// This is the const version of leftCols(Index).
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr leftCols(Index n) const
+{
+  return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
+}
+
+/// \returns a block consisting of the left columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_leftCols.cpp
+/// Output: \verbinclude MatrixBase_template_int_leftCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
+{
+  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+
+/// This is the const version of leftCols<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
+{
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
+}
+
+
+
+/// \returns a block consisting of the right columns of *this.
+///
+/// \param n the number of columns in the block
+///
+/// Example: \include MatrixBase_rightCols_int.cpp
+/// Output: \verbinclude MatrixBase_rightCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr rightCols(Index n)
+{
+  return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+
+/// This is the const version of rightCols(Index).
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr rightCols(Index n) const
+{
+  return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
+}
+
+/// \returns a block consisting of the right columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_rightCols.cpp
+/// Output: \verbinclude MatrixBase_template_int_rightCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
+{
+  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+
+/// This is the const version of rightCols<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
+{
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
+}
+
+
+
+/// \returns a block consisting of a range of columns of *this.
+///
+/// \param startCol the index of the first column in the block
+/// \param numCols the number of columns in the block
+///
+/// Example: \include DenseBase_middleCols_int.cpp
+/// Output: \verbinclude DenseBase_middleCols_int.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline ColsBlockXpr middleCols(Index startCol, Index numCols)
+{
+  return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+
+/// This is the const version of middleCols(Index,Index).
+EIGEN_DEVICE_FUNC
+inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
+{
+  return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
+}
+
+/// \returns a block consisting of a range of columns of *this.
+///
+/// \tparam N the number of columns in the block as specified at compile-time
+/// \param startCol the index of the first column in the block
+/// \param n the number of columns in the block as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include DenseBase_template_int_middleCols.cpp
+/// Output: \verbinclude DenseBase_template_int_middleCols.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
+{
+  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+
+/// This is the const version of middleCols<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
+{
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
+}
+
+
+
+/// \returns a fixed-size expression of a block in *this.
+///
+/// The template parameters \a NRows and \a NCols are the number of
+/// rows and columns in the block.
+///
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+///
+/// Example: \include MatrixBase_block_int_int.cpp
+/// Output: \verbinclude MatrixBase_block_int_int.out
+///
+/// \note since block is a templated member, the keyword template has to be used
+/// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol)
+{
+  return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+
+/// This is the const version of block<>(Index, Index). */
+template<int NRows, int NCols>
+EIGEN_DEVICE_FUNC
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol) const
+{
+  return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
+}
+
+/// \returns an expression of a block in *this.
+///
+/// \tparam NRows number of rows in block as specified at compile-time
+/// \tparam NCols number of columns in block as specified at compile-time
+/// \param  startRow  the first row in the block
+/// \param  startCol  the first column in the block
+/// \param  blockRows number of rows in block as specified at run-time
+/// \param  blockCols number of columns in block as specified at run-time
+///
+/// This function is mainly useful for blocks where the number of rows is specified at compile-time
+/// and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
+/// information should not contradict. In other words, \a blockRows should equal \a NRows unless
+/// \a NRows is \a Dynamic, and the same for the number of columns.
+///
+/// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp
+/// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp
+///
+EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+///
+/// \sa class Block, block(Index,Index,Index,Index)
+///
+template<int NRows, int NCols>
+inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+                                                  Index blockRows, Index blockCols)
+{
+  return typename FixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+
+/// This is the const version of block<>(Index, Index, Index, Index).
+template<int NRows, int NCols>
+inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
+                                                              Index blockRows, Index blockCols) const
+{
+  return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
+}
+
+/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
+///
+/// Example: \include MatrixBase_col.cpp
+/// Output: \verbinclude MatrixBase_col.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
+/**
+  * \sa row(), class Block */
+EIGEN_DEVICE_FUNC
+inline ColXpr col(Index i)
+{
+  return ColXpr(derived(), i);
+}
+
+/// This is the const version of col().
+EIGEN_DEVICE_FUNC
+inline ConstColXpr col(Index i) const
+{
+  return ConstColXpr(derived(), i);
+}
+
+/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
+///
+/// Example: \include MatrixBase_row.cpp
+/// Output: \verbinclude MatrixBase_row.out
+///
+EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
+/**
+  * \sa col(), class Block */
+EIGEN_DEVICE_FUNC
+inline RowXpr row(Index i)
+{
+  return RowXpr(derived(), i);
+}
+
+/// This is the const version of row(). */
+EIGEN_DEVICE_FUNC
+inline ConstRowXpr row(Index i) const
+{
+  return ConstRowXpr(derived(), i);
+}
+
+/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this.
+///
+/// \only_for_vectors
+///
+/// \param start the first coefficient in the segment
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_segment_int_int.cpp
+/// Output: \verbinclude MatrixBase_segment_int_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, segment(Index)
+///
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType segment(Index start, Index n)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return SegmentReturnType(derived(), start, n);
+}
+
+
+/// This is the const version of segment(Index,Index).
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType segment(Index start, Index n) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return ConstSegmentReturnType(derived(), start, n);
+}
+
+/// \returns a dynamic-size expression of the first coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_start_int.cpp
+/// Output: \verbinclude MatrixBase_start_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, block(Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType head(Index n)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return SegmentReturnType(derived(), 0, n);
+}
+
+/// This is the const version of head(Index).
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType head(Index n) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return ConstSegmentReturnType(derived(), 0, n);
+}
+
+/// \returns a dynamic-size expression of the last coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \param n the number of coefficients in the segment
+///
+/// Example: \include MatrixBase_end_int.cpp
+/// Output: \verbinclude MatrixBase_end_int.out
+///
+/// \note Even though the returned expression has dynamic size, in the case
+/// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
+/// which means that evaluating it does not cause a dynamic memory allocation.
+///
+/// \sa class Block, block(Index,Index)
+///
+EIGEN_DEVICE_FUNC
+inline SegmentReturnType tail(Index n)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return SegmentReturnType(derived(), this->size() - n, n);
+}
+
+/// This is the const version of tail(Index).
+EIGEN_DEVICE_FUNC
+inline ConstSegmentReturnType tail(Index n) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return ConstSegmentReturnType(derived(), this->size() - n, n);
+}
+
+/// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param start the index of the first element in the segment
+/// \param n the number of coefficients in the segment as specified at compile-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_segment.cpp
+/// Output: \verbinclude MatrixBase_template_int_segment.out
+///
+/// \sa class Block
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+
+/// This is the const version of segment<int>(Index).
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
+}
+
+/// \returns a fixed-size expression of the first coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param  n the number of coefficients in the segment as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_start.cpp
+/// Output: \verbinclude MatrixBase_template_int_start.out
+///
+/// \sa class Block
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+
+/// This is the const version of head<int>().
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
+}
+
+/// \returns a fixed-size expression of the last coefficients of *this.
+///
+/// \only_for_vectors
+///
+/// \tparam N the number of coefficients in the segment as specified at compile-time
+/// \param  n the number of coefficients in the segment as specified at run-time
+///
+/// The compile-time and run-time information should not contradict. In other words,
+/// \a n should equal \a N unless \a N is \a Dynamic.
+///
+/// Example: \include MatrixBase_template_int_end.cpp
+/// Output: \verbinclude MatrixBase_template_int_end.out
+///
+/// \sa class Block
+///
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
+
+/// This is the const version of tail<int>.
+template<int N>
+EIGEN_DEVICE_FUNC
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
+}
diff --git a/third-party/Eigen/src/plugins/CommonCwiseBinaryOps.h b/third-party/Eigen/src/plugins/CommonCwiseBinaryOps.h
new file mode 100644
index 00000000..8b6730ed
--- /dev/null
+++ b/third-party/Eigen/src/plugins/CommonCwiseBinaryOps.h
@@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// This file is a base class plugin containing common coefficient wise functions.
+
+/** \returns an expression of the difference of \c *this and \a other
+  *
+  * \note If you want to substract a given scalar from all coefficients, see Cwise::operator-().
+  *
+  * \sa class CwiseBinaryOp, operator-=()
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)
+
+/** \returns an expression of the sum of \c *this and \a other
+  *
+  * \note If you want to add a given scalar to all coefficients, see Cwise::operator+().
+  *
+  * \sa class CwiseBinaryOp, operator+=()
+  */
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)
+
+/** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other
+  *
+  * The template parameter \a CustomBinaryOp is the type of the functor
+  * of the custom operator (see class CwiseBinaryOp for an example)
+  *
+  * Here is an example illustrating the use of custom functors:
+  * \include class_CwiseBinaryOp.cpp
+  * Output: \verbinclude class_CwiseBinaryOp.out
+  *
+  * \sa class CwiseBinaryOp, operator+(), operator-(), cwiseProduct()
+  */
+template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
+binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
+{
+  return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
+}
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+/** \returns an expression of \c *this scaled by the scalar factor \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+/** \returns an expression of \a expr scaled by the scalar factor \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+/** \returns an expression of \c *this divided by the scalar value \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
+
+/** \returns an expression of the coefficient-wise boolean \b and operator of \c *this and \a other
+  *
+  * \warning this operator is for expression of bool only.
+  *
+  * Example: \include Cwise_boolean_and.cpp
+  * Output: \verbinclude Cwise_boolean_and.out
+  *
+  * \sa operator||(), select()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
+operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+  return CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
+
+/** \returns an expression of the coefficient-wise boolean \b or operator of \c *this and \a other
+  *
+  * \warning this operator is for expression of bool only.
+  *
+  * Example: \include Cwise_boolean_or.cpp
+  * Output: \verbinclude Cwise_boolean_or.out
+  *
+  * \sa operator&&(), select()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
+operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  EIGEN_STATIC_ASSERT((internal::is_same<bool,Scalar>::value && internal::is_same<bool,typename OtherDerived::Scalar>::value),
+                      THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
+  return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
+}
diff --git a/third-party/Eigen/src/plugins/CommonCwiseUnaryOps.h b/third-party/Eigen/src/plugins/CommonCwiseUnaryOps.h
new file mode 100644
index 00000000..89f4faaa
--- /dev/null
+++ b/third-party/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -0,0 +1,163 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// This file is a base class plugin containing common coefficient wise functions.
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+/** \internal the return type of conjugate() */
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                    const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
+                    const Derived&
+                  >::type ConjugateReturnType;
+/** \internal the return type of real() const */
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                    const CwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>,
+                    const Derived&
+                  >::type RealReturnType;
+/** \internal the return type of real() */
+typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+                    CwiseUnaryView<internal::scalar_real_ref_op<Scalar>, Derived>,
+                    Derived&
+                  >::type NonConstRealReturnType;
+/** \internal the return type of imag() const */
+typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturnType;
+/** \internal the return type of imag() */
+typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;
+
+typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
+
+#endif // not EIGEN_PARSED_BY_DOXYGEN
+
+/// \returns an expression of the opposite of \c *this
+///
+EIGEN_DOC_UNARY_ADDONS(operator-,opposite)
+///
+EIGEN_DEVICE_FUNC
+inline const NegativeReturnType
+operator-() const { return NegativeReturnType(derived()); }
+
+
+template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };
+
+/// \returns an expression of \c *this with the \a Scalar type casted to
+/// \a NewScalar.
+///
+/// The template parameter \a NewScalar is the type we are casting the scalars to.
+///
+EIGEN_DOC_UNARY_ADDONS(cast,conversion function)
+///
+/// \sa class CwiseUnaryOp
+///
+template<typename NewType>
+EIGEN_DEVICE_FUNC
+typename CastXpr<NewType>::Type
+cast() const
+{
+  return typename CastXpr<NewType>::Type(derived());
+}
+
+/// \returns an expression of the complex conjugate of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(conjugate,complex conjugate)
+///
+/// \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_conj">Math functions</a>, MatrixBase::adjoint()
+EIGEN_DEVICE_FUNC
+inline ConjugateReturnType
+conjugate() const
+{
+  return ConjugateReturnType(derived());
+}
+
+/// \returns a read-only expression of the real part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+///
+/// \sa imag()
+EIGEN_DEVICE_FUNC
+inline RealReturnType
+real() const { return RealReturnType(derived()); }
+
+/// \returns an read-only expression of the imaginary part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+///
+/// \sa real()
+EIGEN_DEVICE_FUNC
+inline const ImagReturnType
+imag() const { return ImagReturnType(derived()); }
+
+/// \brief Apply a unary operator coefficient-wise
+/// \param[in]  func  Functor implementing the unary operator
+/// \tparam  CustomUnaryOp Type of \a func
+/// \returns An expression of a custom coefficient-wise unary operator \a func of *this
+///
+/// The function \c ptr_fun() from the C++ standard library can be used to make functors out of normal functions.
+///
+/// Example:
+/// \include class_CwiseUnaryOp_ptrfun.cpp
+/// Output: \verbinclude class_CwiseUnaryOp_ptrfun.out
+///
+/// Genuine functors allow for more possibilities, for instance it may contain a state.
+///
+/// Example:
+/// \include class_CwiseUnaryOp.cpp
+/// Output: \verbinclude class_CwiseUnaryOp.out
+///
+EIGEN_DOC_UNARY_ADDONS(unaryExpr,unary function)
+///
+/// \sa unaryViewExpr, binaryExpr, class CwiseUnaryOp
+///
+template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
+unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
+{
+  return CwiseUnaryOp<CustomUnaryOp, const Derived>(derived(), func);
+}
+
+/// \returns an expression of a custom coefficient-wise unary operator \a func of *this
+///
+/// The template parameter \a CustomUnaryOp is the type of the functor
+/// of the custom unary operator.
+///
+/// Example:
+/// \include class_CwiseUnaryOp.cpp
+/// Output: \verbinclude class_CwiseUnaryOp.out
+///
+EIGEN_DOC_UNARY_ADDONS(unaryViewExpr,unary function)
+///
+/// \sa unaryExpr, binaryExpr class CwiseUnaryOp
+///
+template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
+inline const CwiseUnaryView<CustomViewOp, const Derived>
+unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
+{
+  return CwiseUnaryView<CustomViewOp, const Derived>(derived(), func);
+}
+
+/// \returns a non const expression of the real part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(real,real part function)
+///
+/// \sa imag()
+EIGEN_DEVICE_FUNC
+inline NonConstRealReturnType
+real() { return NonConstRealReturnType(derived()); }
+
+/// \returns a non const expression of the imaginary part of \c *this.
+///
+EIGEN_DOC_UNARY_ADDONS(imag,imaginary part function)
+///
+/// \sa real()
+EIGEN_DEVICE_FUNC
+inline NonConstImagReturnType
+imag() { return NonConstImagReturnType(derived()); }
diff --git a/third-party/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/third-party/Eigen/src/plugins/MatrixCwiseBinaryOps.h
new file mode 100644
index 00000000..f1084abe
--- /dev/null
+++ b/third-party/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -0,0 +1,152 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// This file is a base class plugin containing matrix specifics coefficient wise functions.
+
+/** \returns an expression of the Schur product (coefficient wise product) of *this and \a other
+  *
+  * Example: \include MatrixBase_cwiseProduct.cpp
+  * Output: \verbinclude MatrixBase_cwiseProduct.out
+  *
+  * \sa class CwiseBinaryOp, cwiseAbs2
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
+cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise == operator of *this and \a other
+  *
+  * \warning this performs an exact comparison, which is generally a bad idea with floating-point types.
+  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is
+  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and
+  * isMuchSmallerThan().
+  *
+  * Example: \include MatrixBase_cwiseEqual.cpp
+  * Output: \verbinclude MatrixBase_cwiseEqual.out
+  *
+  * \sa cwiseNotEqual(), isApprox(), isMuchSmallerThan()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise != operator of *this and \a other
+  *
+  * \warning this performs an exact comparison, which is generally a bad idea with floating-point types.
+  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is
+  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and
+  * isMuchSmallerThan().
+  *
+  * Example: \include MatrixBase_cwiseNotEqual.cpp
+  * Output: \verbinclude MatrixBase_cwiseNotEqual.out
+  *
+  * \sa cwiseEqual(), isApprox(), isMuchSmallerThan()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
+cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise min of *this and \a other
+  *
+  * Example: \include MatrixBase_cwiseMin.cpp
+  * Output: \verbinclude MatrixBase_cwiseMin.out
+  *
+  * \sa class CwiseBinaryOp, max()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise min of *this and scalar \a other
+  *
+  * \sa class CwiseBinaryOp, min()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMin(const Scalar &other) const
+{
+  return cwiseMin(Derived::Constant(rows(), cols(), other));
+}
+
+/** \returns an expression of the coefficient-wise max of *this and \a other
+  *
+  * Example: \include MatrixBase_cwiseMax.cpp
+  * Output: \verbinclude MatrixBase_cwiseMax.out
+  *
+  * \sa class CwiseBinaryOp, min()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
+cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+/** \returns an expression of the coefficient-wise max of *this and scalar \a other
+  *
+  * \sa class CwiseBinaryOp, min()
+  */
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
+cwiseMax(const Scalar &other) const
+{
+  return cwiseMax(Derived::Constant(rows(), cols(), other));
+}
+
+
+/** \returns an expression of the coefficient-wise quotient of *this and \a other
+  *
+  * Example: \include MatrixBase_cwiseQuotient.cpp
+  * Output: \verbinclude MatrixBase_cwiseQuotient.out
+  *
+  * \sa class CwiseBinaryOp, cwiseProduct(), cwiseInverse()
+  */
+template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
+{
+  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+}
+
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+
+/** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s
+  *
+  * \warning this performs an exact comparison, which is generally a bad idea with floating-point types.
+  * In order to check for equality between two vectors or matrices with floating-point coefficients, it is
+  * generally a far better idea to use a fuzzy comparison as provided by isApprox() and
+  * isMuchSmallerThan().
+  *
+  * \sa cwiseEqual(const MatrixBase<OtherDerived> &) const
+  */
+EIGEN_DEVICE_FUNC
+inline const CwiseScalarEqualReturnType
+cwiseEqual(const Scalar& s) const
+{
+  return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
+}
diff --git a/third-party/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/third-party/Eigen/src/plugins/MatrixCwiseUnaryOps.h
new file mode 100644
index 00000000..b1be3d56
--- /dev/null
+++ b/third-party/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -0,0 +1,85 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// This file is included into the body of the base classes supporting matrix specific coefficient-wise functions.
+// This include MatrixBase and SparseMatrixBase.
+
+
+typedef CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> CwiseAbsReturnType;
+typedef CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived> CwiseAbs2ReturnType;
+typedef CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived> CwiseSqrtReturnType;
+typedef CwiseUnaryOp<internal::scalar_sign_op<Scalar>, const Derived> CwiseSignReturnType;
+typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> CwiseInverseReturnType;
+
+/// \returns an expression of the coefficient-wise absolute value of \c *this
+///
+/// Example: \include MatrixBase_cwiseAbs.cpp
+/// Output: \verbinclude MatrixBase_cwiseAbs.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value)
+///
+/// \sa cwiseAbs2()
+///
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbsReturnType
+cwiseAbs() const { return CwiseAbsReturnType(derived()); }
+
+/// \returns an expression of the coefficient-wise squared absolute value of \c *this
+///
+/// Example: \include MatrixBase_cwiseAbs2.cpp
+/// Output: \verbinclude MatrixBase_cwiseAbs2.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value)
+///
+/// \sa cwiseAbs()
+///
+EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE const CwiseAbs2ReturnType
+cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); }
+
+/// \returns an expression of the coefficient-wise square root of *this.
+///
+/// Example: \include MatrixBase_cwiseSqrt.cpp
+/// Output: \verbinclude MatrixBase_cwiseSqrt.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root)
+///
+/// \sa cwisePow(), cwiseSquare()
+///
+EIGEN_DEVICE_FUNC
+inline const CwiseSqrtReturnType
+cwiseSqrt() const { return CwiseSqrtReturnType(derived()); }
+
+/// \returns an expression of the coefficient-wise signum of *this.
+///
+/// Example: \include MatrixBase_cwiseSign.cpp
+/// Output: \verbinclude MatrixBase_cwiseSign.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function)
+///
+EIGEN_DEVICE_FUNC
+inline const CwiseSignReturnType
+cwiseSign() const { return CwiseSignReturnType(derived()); }
+
+
+/// \returns an expression of the coefficient-wise inverse of *this.
+///
+/// Example: \include MatrixBase_cwiseInverse.cpp
+/// Output: \verbinclude MatrixBase_cwiseInverse.out
+///
+EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse)
+///
+/// \sa cwiseProduct()
+///
+EIGEN_DEVICE_FUNC
+inline const CwiseInverseReturnType
+cwiseInverse() const { return CwiseInverseReturnType(derived()); }
+
+

From ebf03873c810261319d27bd7aebb7b7138b2f85a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 6 Mar 2024 12:10:34 +0000
Subject: [PATCH 302/314] Replace Divide() with IndexToDims()

---
 niftyreg_build_version.txt                    |   2 +-
 reg-lib/cuda/CudaLocalTransformation.cu       | 104 +++++-----
 .../cuda/CudaLocalTransformationKernels.cu    | 194 ++++++++----------
 reg-lib/cuda/_reg_ssd_gpu.cu                  |   4 +-
 4 files changed, 136 insertions(+), 168 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 816d01be..1bb7ac53 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-420
+421
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index ce733da6..8e901204 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -26,8 +26,8 @@ void GetDeformationField(const nifti_image *controlPointImage,
                          const int *maskCuda,
                          const size_t activeVoxelNumber) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                         controlPointImage->dy / referenceImage->dy,
                                                         controlPointImage->dz / referenceImage->dz);
@@ -46,12 +46,12 @@ void GetDeformationField(const nifti_image *controlPointImage,
     if (referenceImage->nz > 1) {
         thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
             GetDeformationField3d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
-                                                        referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
+                                                        referenceImageDims, controlPointImageDims, controlPointVoxelSpacing, index);
         });
     } else {
         thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) {
             GetDeformationField2d<composition, bspline>(deformationFieldCuda, controlPointTexture, realToVoxelCuda,
-                                                        referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index);
+                                                        referenceImageDims, controlPointImageDims, controlPointVoxelSpacing, index);
         });
     }
 }
@@ -82,23 +82,23 @@ struct SecondDerivative<false> {
 template<bool is3d, bool isGradient>
 __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const int index,
                                                             cudaTextureObject_t controlPointTexture,
-                                                            const int3 controlPointImageDim,
+                                                            const int3 controlPointImageDims,
                                                             const Basis2nd<is3d> basis) {
-    const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDim);
-    if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 ||
-                        y < 1 || y >= controlPointImageDim.y - 1 ||
-                        (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {};
+    const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDims);
+    if (!isGradient && (x < 1 || x >= controlPointImageDims.x - 1 ||
+                        y < 1 || y >= controlPointImageDims.y - 1 ||
+                        (is3d && (z < 1 || z >= controlPointImageDims.z - 1)))) return {};
 
     SecondDerivative<is3d> secondDerivative{};
     if constexpr (is3d) {
         for (int c = z - 1, basInd = 0; c < z + 2; c++) {
-            if (isGradient && (c < 0 || c >= controlPointImageDim.z)) { basInd += 9; continue; }
-            const int indexZ = c * controlPointImageDim.y;
+            if (isGradient && (c < 0 || c >= controlPointImageDims.z)) { basInd += 9; continue; }
+            const int indexZ = c * controlPointImageDims.y;
             for (int b = y - 1; b < y + 2; b++) {
-                if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; }
-                int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1;
+                if (isGradient && (b < 0 || b >= controlPointImageDims.y)) { basInd += 3; continue; }
+                int indexXYZ = (indexZ + b) * controlPointImageDims.x + x - 1;
                 for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) {
-                    if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
+                    if (isGradient && (a < 0 || a >= controlPointImageDims.x)) continue;
                     const float3 controlPointValue = make_float3(tex1Dfetch<float4>(controlPointTexture, indexXYZ));
                     secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                     secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
@@ -111,10 +111,10 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const int index,
         }
     } else {
         for (int b = y - 1, basInd = 0; b < y + 2; b++) {
-            if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; }
-            int indexXY = b * controlPointImageDim.x + x - 1;
+            if (isGradient && (b < 0 || b >= controlPointImageDims.y)) { basInd += 3; continue; }
+            int indexXY = b * controlPointImageDims.x + x - 1;
             for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) {
-                if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue;
+                if (isGradient && (a < 0 || a >= controlPointImageDims.x)) continue;
                 const float2 controlPointValue = make_float2(tex1Dfetch<float4>(controlPointTexture, indexXY));
                 secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue;
                 secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue;
@@ -128,7 +128,7 @@ __device__ SecondDerivative<is3d> GetApproxSecondDerivative(const int index,
 template<bool is3d>
 double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
     auto controlPointTexture = *controlPointTexturePtr;
 
@@ -141,7 +141,7 @@ double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *c
 
     thrust::counting_iterator index(0);
     return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const int index) {
-        const auto secondDerivative = GetApproxSecondDerivative<is3d, false>(index, controlPointTexture, controlPointImageDim, basis);
+        const auto secondDerivative = GetApproxSecondDerivative<is3d, false>(index, controlPointTexture, controlPointImageDims, basis);
         if constexpr (is3d)
             return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) +
                     Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) +
@@ -163,7 +163,7 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
                                  float4 *transGradientCuda,
                                  float bendingEnergyWeight) {
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
     auto controlPointTexture = *controlPointTexturePtr;
 
@@ -180,8 +180,8 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
     thrust::device_vector<typename SecondDerivative<is3d>::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber);
     auto secondDerivativesCuda = secondDerivativesCudaVec.data().get();
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber,
-                       [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const int index) {
-        const auto secondDerivative = GetApproxSecondDerivative<is3d, true>(index, controlPointTexture, controlPointImageDim, basis);
+                       [controlPointTexture, controlPointImageDims, basis, secondDerivativesCuda]__device__(const int index) {
+        const auto secondDerivative = GetApproxSecondDerivative<is3d, true>(index, controlPointTexture, controlPointImageDims, basis);
         if constexpr (is3d) {
             int derInd = 6 * index;
             secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx);
@@ -205,18 +205,18 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
     // Compute the gradient
     const float approxRatio = bendingEnergyWeight / (float)controlPointNumber;
     thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber,
-                       [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) {
-        const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDim);
+                       [controlPointImageDims, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) {
+        const auto [x, y, z] = IndexToDims<is3d>(index, controlPointImageDims);
         typename SecondDerivative<is3d>::Type gradientValue{};
         if constexpr (is3d) {
             for (int c = z - 1, basInd = 0; c < z + 2; c++) {
-                if (c < 0 || c >= controlPointImageDim.z) { basInd += 9; continue; }
-                const int indexZ = c * controlPointImageDim.y;
+                if (c < 0 || c >= controlPointImageDims.z) { basInd += 9; continue; }
+                const int indexZ = c * controlPointImageDims.y;
                 for (int b = y - 1; b < y + 2; b++) {
-                    if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; }
-                    int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6;
+                    if (b < 0 || b >= controlPointImageDims.y) { basInd += 3; continue; }
+                    int indexXYZ = ((indexZ + b) * controlPointImageDims.x + x - 1) * 6;
                     for (int a = x - 1; a < x + 2; a++, basInd++) {
-                        if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; }
+                        if (a < 0 || a >= controlPointImageDims.x) { indexXYZ += 6; continue; }
                         const float3 secondDerivativeXX = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
                         gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
                         const float3 secondDerivativeYY = make_float3(tex1Dfetch<float4>(secondDerivativesTexture, indexXYZ++));
@@ -234,10 +234,10 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage,
             }
         } else {
             for (int b = y - 1, basInd = 0; b < y + 2; b++) {
-                if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; }
-                int indexXY = (b * controlPointImageDim.x + x - 1) * 3;
+                if (b < 0 || b >= controlPointImageDims.y) { basInd += 3; continue; }
+                int indexXY = (b * controlPointImageDims.x + x - 1) * 3;
                 for (int a = x - 1; a < x + 2; a++, basInd++) {
-                    if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; }
+                    if (a < 0 || a >= controlPointImageDims.x) { indexXY += 3; continue; }
                     const float2 secondDerivativeXX = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
                     gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd];
                     const float2 secondDerivativeYY = tex1Dfetch<float2>(secondDerivativesTexture, indexXY++);
@@ -266,7 +266,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
                                  float *jacobianDetCuda) {
     auto blockSize = CudaContext::GetBlockSize();
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
     // Need to reorient the Jacobian matrix using the header information - real to voxel conversion
@@ -279,7 +279,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetApproxJacobianValues3d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                           controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+                                                           controlPointImageDims, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->GetApproxJacobianValues2d;
@@ -287,7 +287,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetApproxJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                           controlPointImageDim, (unsigned)controlPointNumber, reorientation);
+                                                           controlPointImageDims, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
 }
@@ -300,8 +300,8 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
     auto blockSize = CudaContext::GetBlockSize();
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
     auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4);
 
@@ -317,7 +317,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
         // 8 floats of shared memory are allocated per thread
         const unsigned sharedMemSize = blocks * 8 * sizeof(float);
         GetJacobianValues3d<<<gridDims, blockDims, sharedMemSize>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                                    controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                                    controlPointImageDims, controlPointSpacing, referenceImageDims,
                                                                     (unsigned)voxelNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
@@ -326,7 +326,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
-                                                     controlPointImageDim, controlPointSpacing, referenceImageDim,
+                                                     controlPointImageDims, controlPointSpacing, referenceImageDims,
                                                      (unsigned)voxelNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
@@ -401,7 +401,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
     const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
 
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
     const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx),
                                       referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy),
@@ -416,7 +416,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeApproxJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                *jacobianMatricesTexture, controlPointImageDim,
+                                                                *jacobianMatricesTexture, controlPointImageDims,
                                                                 (unsigned)controlPointNumber, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
@@ -425,12 +425,12 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeApproxJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                                *jacobianMatricesTexture, controlPointImageDim,
+                                                                *jacobianMatricesTexture, controlPointImageDims,
                                                                 (unsigned)controlPointNumber, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     } else {
-        const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+        const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
         const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
@@ -440,9 +440,9 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                          *jacobianMatricesTexture, controlPointImageDim,
+                                                          *jacobianMatricesTexture, controlPointImageDims,
                                                           controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                          referenceImageDim, reorientation, weight);
+                                                          referenceImageDims, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
             const unsigned blocks = blockSize->ComputeJacGradient2d;
@@ -450,9 +450,9 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
-                                                          *jacobianMatricesTexture, controlPointImageDim,
+                                                          *jacobianMatricesTexture, controlPointImageDims,
                                                           controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                          referenceImageDim, reorientation, weight);
+                                                          referenceImageDims, reorientation, weight);
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         }
     }
@@ -514,7 +514,7 @@ double CorrectFolding(const nifti_image *referenceImage,
     const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk);
 
     const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3);
-    const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
+    const int3 controlPointImageDims = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz);
     const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz);
     auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1);
     auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1);
@@ -524,11 +524,11 @@ double CorrectFolding(const nifti_image *referenceImage,
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         ApproxCorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
-                                                        *jacobianMatricesTexture, controlPointImageDim,
+                                                        *jacobianMatricesTexture, controlPointImageDims,
                                                         controlPointSpacing, (unsigned)controlPointNumber, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
-        const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+        const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
         const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
@@ -537,9 +537,9 @@ double CorrectFolding(const nifti_image *referenceImage,
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         CorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
-                                                  *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing,
+                                                  *jacobianMatricesTexture, controlPointImageDims, controlPointSpacing,
                                                   controlPointVoxelSpacing, (unsigned)controlPointNumber,
-                                                  referenceImageDim, reorientation);
+                                                  referenceImageDims, reorientation);
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     }
     NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda));
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index ebb95539..c3c344be 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -217,49 +217,49 @@ __device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis,
 /* *************************************************************** */
 __device__ float4 GetSlidedValues(int x, int y,
                                   cudaTextureObject_t deformationFieldTexture,
-                                  const int3& referenceImageDim,
+                                  const int3& referenceImageDims,
                                   const mat44& affineMatrix) {
     int newX = x;
     if (x < 0)
         newX = 0;
-    else if (x >= referenceImageDim.x)
-        newX = referenceImageDim.x - 1;
+    else if (x >= referenceImageDims.x)
+        newX = referenceImageDims.x - 1;
 
     int newY = y;
     if (y < 0)
         newY = 0;
-    else if (y >= referenceImageDim.y)
-        newY = referenceImageDim.y - 1;
+    else if (y >= referenceImageDims.y)
+        newY = referenceImageDims.y - 1;
 
     x -= newX;
     y -= newY;
     const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1],
                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1],
                                             0.f, 0.f);
-    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDim.x + newX);
+    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, newY * referenceImageDims.x + newX);
 }
 /* *************************************************************** */
 __device__ float4 GetSlidedValues(int x, int y, int z,
                                   cudaTextureObject_t deformationFieldTexture,
-                                  const int3& referenceImageDim,
+                                  const int3& referenceImageDims,
                                   const mat44& affineMatrix) {
     int newX = x;
     if (x < 0)
         newX = 0;
-    else if (x >= referenceImageDim.x)
-        newX = referenceImageDim.x - 1;
+    else if (x >= referenceImageDims.x)
+        newX = referenceImageDims.x - 1;
 
     int newY = y;
     if (y < 0)
         newY = 0;
-    else if (y >= referenceImageDim.y)
-        newY = referenceImageDim.y - 1;
+    else if (y >= referenceImageDims.y)
+        newY = referenceImageDims.y - 1;
 
     int newZ = z;
     if (z < 0)
         newZ = 0;
-    else if (z >= referenceImageDim.z)
-        newZ = referenceImageDim.z - 1;
+    else if (z >= referenceImageDims.z)
+        newZ = referenceImageDims.z - 1;
 
     x -= newX;
     y -= newY;
@@ -268,15 +268,15 @@ __device__ float4 GetSlidedValues(int x, int y, int z,
                                             x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2],
                                             x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2],
                                             0.f);
-    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX);
+    return slidedValues + tex1Dfetch<float4>(deformationFieldTexture, (newZ * referenceImageDims.y + newY) * referenceImageDims.x + newX);
 }
 /* *************************************************************** */
 template<bool composition, bool bspline>
 __device__ void GetDeformationField3d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
                                       const mat44 *realToVoxel,
-                                      const int3 referenceImageDim,
-                                      const int3 controlPointImageDim,
+                                      const int3 referenceImageDims,
+                                      const int3 controlPointImageDims,
                                       const float3 controlPointVoxelSpacing,
                                       const int index) {
     int3 nodePre;
@@ -300,14 +300,14 @@ __device__ void GetDeformationField3d(float4 *deformationField,
                               realToVoxel->m[2][2] * node.z +
                               realToVoxel->m[2][3]);
 
-        if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
-            yVoxel < 0 || yVoxel >= referenceImageDim.y ||
-            zVoxel < 0 || zVoxel >= referenceImageDim.z) return;
+        if (xVoxel < 0 || xVoxel >= referenceImageDims.x ||
+            yVoxel < 0 || yVoxel >= referenceImageDims.y ||
+            zVoxel < 0 || zVoxel >= referenceImageDims.z) return;
 
         nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
-        const auto [x, y, z] = IndexToDims<true>(index, referenceImageDim);
+        const auto [x, y, z] = IndexToDims<true>(index, referenceImageDims);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -324,9 +324,9 @@ __device__ void GetDeformationField3d(float4 *deformationField,
 
     float4 displacement{};
     for (char c = 0; c < 4; c++) {
-        int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x;
+        int indexYZ = ((nodePre.z + c) * controlPointImageDims.y + nodePre.y) * controlPointImageDims.x;
         const float basisZ = zBasis[c];
-        for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) {
+        for (char b = 0; b < 4; b++, indexYZ += controlPointImageDims.x) {
             int indexXYZ = indexYZ + nodePre.x;
             const float basisY = yBasis[b];
             for (char a = 0; a < 4; a++, indexXYZ++) {
@@ -345,8 +345,8 @@ template<bool composition, bool bspline>
 __device__ void GetDeformationField2d(float4 *deformationField,
                                       cudaTextureObject_t controlPointTexture,
                                       const mat44 *realToVoxel,
-                                      const int3 referenceImageDim,
-                                      const int3 controlPointImageDim,
+                                      const int3 referenceImageDims,
+                                      const int3 controlPointImageDims,
                                       const float3 controlPointVoxelSpacing,
                                       const int index) {
     int2 nodePre;
@@ -364,13 +364,13 @@ __device__ void GetDeformationField2d(float4 *deformationField,
                               realToVoxel->m[1][1] * node.y +
                               realToVoxel->m[1][3]);
 
-        if (xVoxel < 0 || xVoxel >= referenceImageDim.x ||
-            yVoxel < 0 || yVoxel >= referenceImageDim.y) return;
+        if (xVoxel < 0 || xVoxel >= referenceImageDims.x ||
+            yVoxel < 0 || yVoxel >= referenceImageDims.y) return;
 
         nodePre = { Floor(xVoxel), Floor(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
-        const auto [x, y, z] = IndexToDims<false>(index, referenceImageDim);
+        const auto [x, y, z] = IndexToDims<false>(index, referenceImageDims);
         // The "nearest previous" node is determined [0,0,0]
         const float xVoxel = float(x) / controlPointVoxelSpacing.x;
         const float yVoxel = float(y) / controlPointVoxelSpacing.y;
@@ -385,7 +385,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
 
     float4 displacement{};
     for (char b = 0; b < 4; b++) {
-        int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
+        int index = (nodePre.y + b) * controlPointImageDims.x + nodePre.x;
         const float basis = yBasis[b];
         for (char a = 0; a < 4; a++, index++) {
             const float4 nodeCoeff = tex1Dfetch<float4>(controlPointTexture, index);
@@ -400,7 +400,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
 __global__ void GetApproxJacobianValues2d(float *jacobianMatrices,
                                           float *jacobianDet,
                                           cudaTextureObject_t controlPointTexture,
-                                          const int3 controlPointImageDim,
+                                          const int3 controlPointImageDims,
                                           const unsigned controlPointNumber,
                                           const mat33 reorientation) {
     __shared__ float xbasis[9];
@@ -412,16 +412,14 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices,
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<false>(tid, controlPointImageDims);
 
-        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) {
+        if (0 < x && x < controlPointImageDims.x - 1 && 0 < y && y < controlPointImageDims.y - 1) {
             float2 tx{}, ty{};
             unsigned index = 0;
             for (int b = y - 1; b < y + 2; ++b) {
                 for (int a = x - 1; a < x + 2; ++a) {
-                    const int indexXY = b * controlPointImageDim.x + a;
+                    const int indexXY = b * controlPointImageDims.x + a;
                     const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXY);
                     tx.x += xbasis[index] * controlPointValues.x;
                     tx.y += ybasis[index] * controlPointValues.x;
@@ -461,7 +459,7 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices,
 __global__ void GetApproxJacobianValues3d(float *jacobianMatrices,
                                           float *jacobianDet,
                                           cudaTextureObject_t controlPointTexture,
-                                          const int3 controlPointImageDim,
+                                          const int3 controlPointImageDims,
                                           const unsigned controlPointNumber,
                                           const mat33 reorientation) {
     __shared__ float xbasis[27];
@@ -474,19 +472,15 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices,
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
-        if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) {
+        if (0 < x && x < controlPointImageDims.x - 1 && 0 < y && y < controlPointImageDims.y - 1 && 0 < z && z < controlPointImageDims.z - 1) {
             float3 tx{}, ty{}, tz{};
             unsigned index = 0;
             for (int c = z - 1; c < z + 2; ++c) {
                 for (int b = y - 1; b < y + 2; ++b) {
                     for (int a = x - 1; a < x + 2; ++a) {
-                        const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a;
+                        const int indexXYZ = (c * controlPointImageDims.y + b) * controlPointImageDims.x + a;
                         const float4 controlPointValues = tex1Dfetch<float4>(controlPointTexture, indexXYZ);
                         tx.x += xbasis[index] * controlPointValues.x;
                         tx.y += ybasis[index] * controlPointValues.x;
@@ -552,16 +546,14 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices,
 __global__ void GetJacobianValues2d(float *jacobianMatrices,
                                     float *jacobianDet,
                                     cudaTextureObject_t controlPointTexture,
-                                    const int3 controlPointImageDim,
+                                    const int3 controlPointImageDims,
                                     const float3 controlPointSpacing,
-                                    const int3 referenceImageDim,
+                                    const int3 referenceImageDims,
                                     const unsigned voxelNumber,
                                     const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
-        int quot, rem;
-        Divide(tid, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<false>(tid, referenceImageDims);
 
         // the "nearest previous" node is determined [0,0,0]
         const int2 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) };
@@ -576,7 +568,7 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices,
 
         float2 tx{}, ty{};
         for (int b = 0; b < 4; ++b) {
-            int indexXY = (nodePre.y + b) * controlPointImageDim.x + nodePre.x;
+            int indexXY = (nodePre.y + b) * controlPointImageDims.x + nodePre.x;
 
             float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXY++);
             float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]);
@@ -621,18 +613,14 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices,
 __global__ void GetJacobianValues3d(float *jacobianMatrices,
                                     float *jacobianDet,
                                     cudaTextureObject_t controlPointTexture,
-                                    const int3 controlPointImageDim,
+                                    const int3 controlPointImageDims,
                                     const float3 controlPointSpacing,
-                                    const int3 referenceImageDim,
+                                    const int3 referenceImageDims,
                                     const unsigned voxelNumber,
                                     const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < voxelNumber) {
-        int quot, rem;
-        Divide(tid, referenceImageDim.x * referenceImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, referenceImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, referenceImageDims);
 
         // the "nearest previous" node is determined [0,0,0]
         const int3 nodePre = {
@@ -659,7 +647,7 @@ __global__ void GetJacobianValues3d(float *jacobianMatrices,
         float3 tx{}, ty{}, tz{};
         for (int c = 0; c < 4; ++c) {
             for (int b = 0; b < 4; ++b) {
-                int indexXYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y + b) * controlPointImageDim.x + nodePre.x;
+                int indexXYZ = ((nodePre.z + c) * controlPointImageDims.y + nodePre.y + b) * controlPointImageDims.x + nodePre.x;
                 float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] };
 
                 float4 nodeCoefficient = tex1Dfetch<float4>(controlPointTexture, indexXYZ++);
@@ -764,7 +752,7 @@ __device__ void GetJacobianGradientValues3d(float *jacobianMatrix,
 __global__ void ComputeApproxJacGradient2d(float4 *gradient,
                                            cudaTextureObject_t jacobianDeterminantTexture,
                                            cudaTextureObject_t jacobianMatricesTexture,
-                                           const int3 controlPointImageDim,
+                                           const int3 controlPointImageDims,
                                            const unsigned controlPointNumber,
                                            const mat33 reorientation,
                                            const float3 weight) {
@@ -777,17 +765,15 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient,
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<false>(tid, controlPointImageDims);
 
         float2 jacobianGradient{};
         unsigned index = 8;
         for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-            if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
-                int jacIndex = pixelY * controlPointImageDim.x + x - 1;
+            if (0 < pixelY && pixelY < controlPointImageDims.y - 1) {
+                int jacIndex = pixelY * controlPointImageDims.x + x - 1;
                 for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) {
-                    if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
+                    if (0 < pixelX && pixelX < controlPointImageDims.x - 1) {
                         float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                         if (detJac > 0.f) {
                             detJac = 2.f * logf(detJac) / detJac;
@@ -815,7 +801,7 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient,
 __global__ void ComputeApproxJacGradient3d(float4 *gradient,
                                            cudaTextureObject_t jacobianDeterminantTexture,
                                            cudaTextureObject_t jacobianMatricesTexture,
-                                           const int3 controlPointImageDim,
+                                           const int3 controlPointImageDims,
                                            const unsigned controlPointNumber,
                                            const mat33 reorientation,
                                            const float3 weight) {
@@ -829,21 +815,17 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient,
 
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 jacobianGradient{};
         unsigned index = 26;
         for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
-            if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) {
+            if (0 < pixelZ && pixelZ < controlPointImageDims.z - 1) {
                 for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-                    if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
-                        int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + x - 1;
+                    if (0 < pixelY && pixelY < controlPointImageDims.y - 1) {
+                        int jacIndex = (pixelZ * controlPointImageDims.y + pixelY) * controlPointImageDims.x + x - 1;
                         for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
-                            if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
+                            if (0 < pixelX && pixelX < controlPointImageDims.x - 1) {
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac > 0.f) {
                                     detJac = 2.f * logf(detJac) / detJac;
@@ -879,34 +861,32 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient,
 __global__ void ComputeJacGradient2d(float4 *gradient,
                                      cudaTextureObject_t jacobianDeterminantTexture,
                                      cudaTextureObject_t jacobianMatricesTexture,
-                                     const int3 controlPointImageDim,
+                                     const int3 controlPointImageDims,
                                      const float3 controlPointVoxelSpacing,
                                      const unsigned controlPointNumber,
-                                     const int3 referenceImageDim,
+                                     const int3 referenceImageDims,
                                      const mat33 reorientation,
                                      const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<false>(tid, controlPointImageDims);
 
         float2 jacobianGradient{};
         for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
-            if (-1 < pixelY && pixelY < referenceImageDim.y) {
+            if (-1 < pixelY && pixelY < referenceImageDims.y) {
                 const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                 float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                 float yBasis, yFirst;
                 GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
                 for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
-                    if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                    if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) {
                         const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                         basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
                         float xBasis, xFirst;
                         GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
 
-                        int jacIndex = pixelY * referenceImageDim.x + pixelX;
+                        int jacIndex = pixelY * referenceImageDims.x + pixelX;
                         float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
 
                         if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
@@ -934,43 +914,39 @@ __global__ void ComputeJacGradient2d(float4 *gradient,
 __global__ void ComputeJacGradient3d(float4 *gradient,
                                      cudaTextureObject_t jacobianDeterminantTexture,
                                      cudaTextureObject_t jacobianMatricesTexture,
-                                     const int3 controlPointImageDim,
+                                     const int3 controlPointImageDims,
                                      const float3 controlPointVoxelSpacing,
                                      const unsigned controlPointNumber,
-                                     const int3 referenceImageDim,
+                                     const int3 referenceImageDims,
                                      const mat33 reorientation,
                                      const float3 weight) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 jacobianGradient{};
         for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
-            if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
+            if (-1 < pixelZ && pixelZ < referenceImageDims.z) {
                 const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z);
                 float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre;
                 float zBasis, zFirst;
                 GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst);
 
                 for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
-                    if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) {
+                    if (-1 < pixelY && pixelY < referenceImageDims.y && (zFirst != 0.f || zBasis != 0.f)) {
                         const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                         basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                         float yBasis, yFirst;
                         GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
                         for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
-                            if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) {
+                            if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) {
                                 const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                                 basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
                                 float xBasis, xFirst;
                                 GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst);
 
-                                int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX;
+                                int jacIndex = (pixelZ * referenceImageDims.y + pixelY) * referenceImageDims.x + pixelX;
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
 
                                 if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) {
@@ -1011,26 +987,22 @@ __global__ void ComputeJacGradient3d(float4 *gradient,
 __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid,
                                        cudaTextureObject_t jacobianDeterminantTexture,
                                        cudaTextureObject_t jacobianMatricesTexture,
-                                       const int3 controlPointImageDim,
+                                       const int3 controlPointImageDims,
                                        const float3 controlPointSpacing,
                                        const unsigned controlPointNumber,
                                        const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 foldingCorrection{};
         for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) {
-            if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) {
+            if (0 < pixelZ && pixelZ < controlPointImageDims.z - 1) {
                 for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) {
-                    if (0 < pixelY && pixelY < controlPointImageDim.y - 1) {
+                    if (0 < pixelY && pixelY < controlPointImageDims.y - 1) {
                         for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) {
-                            if (0 < pixelX && pixelX < controlPointImageDim.x - 1) {
-                                int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + pixelX;
+                            if (0 < pixelX && pixelX < controlPointImageDims.x - 1) {
+                                int jacIndex = (pixelZ * controlPointImageDims.y + pixelY) * controlPointImageDims.x + pixelX;
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac <= 0.f) {
                                     float jacobianMatrix[9];
@@ -1080,28 +1052,24 @@ __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid,
 __global__ void CorrectFolding3d(float4 *controlPointGrid,
                                  cudaTextureObject_t jacobianDeterminantTexture,
                                  cudaTextureObject_t jacobianMatricesTexture,
-                                 const int3 controlPointImageDim,
+                                 const int3 controlPointImageDims,
                                  const float3 controlPointSpacing,
                                  const float3 controlPointVoxelSpacing,
                                  const unsigned controlPointNumber,
-                                 const int3 referenceImageDim,
+                                 const int3 referenceImageDims,
                                  const mat33 reorientation) {
     const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
     if (tid < controlPointNumber) {
-        int quot, rem;
-        Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem);
-        const int z = quot;
-        Divide(rem, controlPointImageDim.x, quot, rem);
-        const int y = quot, x = rem;
+        const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 foldingCorrection{};
         for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
-            if (-1 < pixelZ && pixelZ < referenceImageDim.z) {
+            if (-1 < pixelZ && pixelZ < referenceImageDims.z) {
                 for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
-                    if (-1 < pixelY && pixelY < referenceImageDim.y) {
+                    if (-1 < pixelY && pixelY < referenceImageDims.y) {
                         for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
-                            if (-1 < pixelX && pixelX < referenceImageDim.x) {
-                                int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX;
+                            if (-1 < pixelX && pixelX < referenceImageDims.x) {
+                                int jacIndex = (pixelZ * referenceImageDims.y + pixelY) * referenceImageDims.x + pixelX;
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
                                 if (detJac <= 0.f) {
                                     float jacobianMatrix[9];
diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu
index 073906b7..03f6d253 100755
--- a/reg-lib/cuda/_reg_ssd_gpu.cu
+++ b/reg-lib/cuda/_reg_ssd_gpu.cu
@@ -57,7 +57,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage,
                            const size_t activeVoxelNumber,
                            const double *timePointWeights,
                            const int referenceTimePoints) {
-    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
     Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0;
@@ -123,7 +123,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage,
                                       const size_t activeVoxelNumber,
                                       const double timePointWeight,
                                       const int currentTimePoint) {
-    const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
+    const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz);
     const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3);
 
     auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1);

From f680bce0a7819abd043c87e22eed92e2a9f8769d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 8 Mar 2024 15:09:04 +0000
Subject: [PATCH 303/314] Add include directories for OpenMP

---
 CMakeLists.txt             | 6 ++++++
 niftyreg_build_version.txt | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 72ee7d5a..a7d5e6c7 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -182,6 +182,12 @@ if(USE_OPENMP)
     message(STATUS "Found OpenMP")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    if (OpenMP_C_INCLUDE_DIRS)
+      include_directories(${OpenMP_C_INCLUDE_DIRS})
+    endif(OpenMP_C_INCLUDE_DIRS)
+    if (OpenMP_CXX_INCLUDE_DIRS)
+      include_directories(${OpenMP_CXX_INCLUDE_DIRS})
+    endif(OpenMP_CXX_INCLUDE_DIRS)
   endif(NOT OPENMP_FOUND)
 endif(USE_OPENMP)
 #-----------------------------------------------------------------------------
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 1bb7ac53..8d73f938 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-421
+422

From 43c39fa86e29e5e03c66e9317e23ac8ff0b34fd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Sun, 2 Jun 2024 00:37:46 +0100
Subject: [PATCH 304/314] Refactorisations

---
 CMakeLists.txt             | 103 +++++++++++++++++++------------------
 Doxyfile.in                |   7 +++
 niftyreg_build_version.txt |   2 +-
 reg-io/RNifti/NiftiImage.h |   1 +
 reg-test/CMakeLists.txt    |  14 ++---
 5 files changed, 68 insertions(+), 59 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a7d5e6c7..c8c0415d 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,7 @@ project(NiftyReg)
 #-----------------------------------------------------------------------------
 # Set C++ standard version
 set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 #-----------------------------------------------------------------------------
 if(APPLE)
   set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
@@ -45,8 +46,7 @@ add_definitions(-DNR_VERSION="${NR_VERSION}")
 find_package(Git)
 if(GIT_FOUND)
   message(STATUS "Found Git")
-  file(COPY "${CMAKE_SOURCE_DIR}/update_version_hook" DESTINATION "${CMAKE_SOURCE_DIR}/.git/hooks" USE_SOURCE_PERMISSIONS)
-  file(RENAME "${CMAKE_SOURCE_DIR}/.git/hooks/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit")
+  execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_SOURCE_DIR}/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit")
 endif(GIT_FOUND)
 #-----------------------------------------------------------------------------
 if(MSVC)
@@ -55,7 +55,7 @@ if(MSVC)
 endif(MSVC)
 #-----------------------------------------------------------------------------
 if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    add_definitions(-fPIC)
+  add_definitions(-fPIC)
 endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
 #-----------------------------------------------------------------------------
 option(BUILD_ALL_DEP "All the dependencies are build" OFF)
@@ -71,69 +71,69 @@ option(USE_NRRD "To use the NRRD file format" OFF)
 mark_as_advanced(USE_NRRD)
 #-----------------------------------------------------------------------------
 if(WIN32)
-    set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
+  set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE)
 endif(WIN32)
 #-----------------------------------------------------------------------------
 # All dependencies are build to create the 3DSlicer package
 if(BUILD_NR_SLICER_EXT)
-    set(BUILD_ALL_DEP ON)
-    mark_as_advanced(FORCE BUILD_ALL_DEP)
+  set(BUILD_ALL_DEP ON)
+  mark_as_advanced(FORCE BUILD_ALL_DEP)
 else(BUILD_NR_SLICER_EXT)
-    mark_as_advanced(CLEAR BUILD_ALL_DEP)
+  mark_as_advanced(CLEAR BUILD_ALL_DEP)
 endif(BUILD_NR_SLICER_EXT)
 #-----------------------------------------------------------------------------
 # Z library
 # Try first to find the z library on the system and built is from the sources if it can not be find
 if(NOT BUILD_ALL_DEP)
-    find_package(ZLIB)
-    if(ZLIB_FOUND)
-        include_directories(${ZLIB_INCLUDE_DIR})
-        message(STATUS "Found zlib - the z library will not be built")
-    else(ZLIB_FOUND)
-        include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
-        message(STATUS "zlib not found - the z library will be built")
-    endif(ZLIB_FOUND)
+  find_package(ZLIB)
+  if(ZLIB_FOUND)
+    include_directories(SYSTEM ${ZLIB_INCLUDE_DIR})
+    message(STATUS "Found zlib - the z library will not be built")
+  else(ZLIB_FOUND)
+    include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/reg-io/zlib)
+    message(STATUS "zlib not found - the z library will be built")
+  endif(ZLIB_FOUND)
 else(NOT BUILD_ALL_DEP)
-    include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib)
+  include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/reg-io/zlib)
 endif(NOT BUILD_ALL_DEP)
 #-----------------------------------------------------------------------------
 # Try to find the png library and header on the system
 if(NOT BUILD_ALL_DEP)
-    ## PNG support - First try to find the PNG library on the system and build it if it is not found
-    ## I did not use the FindPNG.cmake here as the zlib is also included into the project
-    if(CYGWIN)
-        if(NOT BUILD_SHARED_LIBS)
-            set (PNG_DEFINITIONS -DPNG_STATIC)
-        endif(NOT BUILD_SHARED_LIBS)
-    endif(CYGWIN)
-    set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d)
-    find_library(PNG_LIBRARY NAMES ${PNG_NAMES})
-    find_path(PNG_INCLUDE_DIR png.h
-        /usr/local/include/libpng
-        /sw/include
-    )
-    # If the png library and header can not be found, it is build from the sources
-    if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-        message(STATUS "libpng not found - the png library will be built")
-        set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng)
-        set(PNG_LIBRARY png)
-        set(BUILD_INTERNAL_PNG true)
-    else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-        message(STATUS "Found libpng - the png library will not be built")
-        set(BUILD_INTERNAL_PNG false)
-    endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
-else(NOT BUILD_ALL_DEP)
+  # PNG support - First try to find the PNG library on the system and build it if it is not found
+  # I did not use the FindPNG.cmake here as the zlib is also included into the project
+  if(CYGWIN)
+    if(NOT BUILD_SHARED_LIBS)
+      set(PNG_DEFINITIONS -DPNG_STATIC)
+    endif(NOT BUILD_SHARED_LIBS)
+  endif(CYGWIN)
+  set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d)
+  find_library(PNG_LIBRARY NAMES ${PNG_NAMES})
+  find_path(PNG_INCLUDE_DIR png.h
+    /usr/local/include/libpng
+    /sw/include
+  )
+  # If the png library and header can not be found, it is build from the sources
+  if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+    message(STATUS "libpng not found - the png library will be built")
     set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng)
     set(PNG_LIBRARY png)
+    set(BUILD_INTERNAL_PNG true)
+  else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+    message(STATUS "Found libpng - the png library will not be built")
+    set(BUILD_INTERNAL_PNG false)
+  endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR)
+else(NOT BUILD_ALL_DEP)
+  set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng)
+  set(PNG_LIBRARY png)
 endif(NOT BUILD_ALL_DEP)
-include_directories(${CMAKE_SOURCE_DIR}/reg-io/png)
-include_directories(${PNG_INCLUDE_DIR})
+include_directories(SYSTEM ${PNG_INCLUDE_DIR})
+include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/reg-io/png)
 #-----------------------------------------------------------------------------
 include_directories(${CMAKE_BINARY_DIR})
 include_directories(${CMAKE_SOURCE_DIR}/reg-io)
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib)
 include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu)
-include_directories(${CMAKE_SOURCE_DIR}/third-party)
+include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third-party)
 #-----------------------------------------------------------------------------
 if(USE_OPENCL)
   # Find the OpenCL package
@@ -143,8 +143,8 @@ if(USE_OPENCL)
     message(SEND_ERROR "OpenCL not found. The USE_OPENCL flag is turned OFF")
   else(NOT OpenCL_FOUND)
     message(STATUS "Found OpenCL")
+    include_directories(SYSTEM ${OpenCL_INCLUDE_DIRS})
     include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl)
-    include_directories(${OpenCL_INCLUDE_DIRS})
     add_definitions(-DUSE_OPENCL)
   endif(NOT OpenCL_FOUND)
 endif(USE_OPENCL)
@@ -159,8 +159,8 @@ if(USE_CUDA)
     set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE)
     message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF")
   else(NOT CMAKE_CUDA_COMPILER)
+    include_directories(SYSTEM ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
     include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda)
-    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
     add_definitions(-DUSE_CUDA)
   endif(NOT CMAKE_CUDA_COMPILER)
 endif(USE_CUDA)
@@ -183,19 +183,20 @@ if(USE_OPENMP)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
     if (OpenMP_C_INCLUDE_DIRS)
-      include_directories(${OpenMP_C_INCLUDE_DIRS})
+      include_directories(SYSTEM ${OpenMP_C_INCLUDE_DIRS})
     endif(OpenMP_C_INCLUDE_DIRS)
     if (OpenMP_CXX_INCLUDE_DIRS)
-      include_directories(${OpenMP_CXX_INCLUDE_DIRS})
+      include_directories(SYSTEM ${OpenMP_CXX_INCLUDE_DIRS})
     endif(OpenMP_CXX_INCLUDE_DIRS)
+    link_libraries(${OpenMP_CXX_LIBRARIES})
   endif(NOT OPENMP_FOUND)
 endif(USE_OPENMP)
 #-----------------------------------------------------------------------------
 if(BUILD_SHARED_LIBS)
   if(USE_CUDA)
-     set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE)
-     message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF")
-     set(NIFTYREG_LIBRARY_TYPE STATIC)
+    set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE)
+    message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF")
+    set(NIFTYREG_LIBRARY_TYPE STATIC)
   else(USE_CUDA)
     set(NIFTYREG_LIBRARY_TYPE SHARED)
   endif(USE_CUDA)
@@ -219,7 +220,7 @@ endif(BUILD_TESTING)
 # add a target to generate API documentation with Doxygen
 find_package(Doxygen)
 if(DOXYGEN_FOUND)
-  set(DOXY_EXCLUDED_PATTERNS "")
+  set(DOXY_EXCLUDED_PATTERNS "*/build/* */reg-io/RNifti/*")
   if(NOT BUILD_TESTING)
     set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-test/*")
   endif(NOT BUILD_TESTING)
diff --git a/Doxyfile.in b/Doxyfile.in
index df013886..42344ddc 100644
--- a/Doxyfile.in
+++ b/Doxyfile.in
@@ -728,6 +728,13 @@ FILTER_SOURCE_FILES    = YES
 
 FILTER_SOURCE_PATTERNS =
 
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = README.md
+
 #---------------------------------------------------------------------------
 # configuration options related to source browsing
 #---------------------------------------------------------------------------
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 8d73f938..57214136 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-422
+423
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 26cffe98..991d2447 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1721,6 +1721,7 @@ class NiftiImage
         if (image == nullptr)
             return;
         switch (dim) {
+        case Dim::NDim: break;
         case Dim::X:
             image->pixdim[1] = image->dx = value;
             break;
diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt
index 4bf3d667..26a5a825 100755
--- a/reg-test/CMakeLists.txt
+++ b/reg-test/CMakeLists.txt
@@ -13,7 +13,7 @@ endif(NOT Catch2_FOUND)
 # Build the coverage test
 option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF)
 if(WITH_COVERAGE)
-  if(NOT MSVC)
+  if(UNIX)
     # Check prerequisites
     find_program(LCOV lcov REQUIRED)
     find_program(GENHTML genhtml REQUIRED)
@@ -53,7 +53,7 @@ if(WITH_COVERAGE)
     add_custom_target(clean_coverage
       COMMAND ${LCOV} --directory . --zerocounters
       WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-  else(NOT MSVC)
+  else(UNIX)
     # Check prerequisites
     find_program(OPENCPPCOVERAGE OpenCppCoverage REQUIRED)
 
@@ -76,7 +76,7 @@ if(WITH_COVERAGE)
       # Gather data only for the reg-lib directory
       COMMAND ${OPENCPPCOVERAGE} --sources=${COVERAGE_SOURCE} --cover_children -- ctest -C Debug
       WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-  endif(NOT MSVC)
+  endif(UNIX)
 endif(WITH_COVERAGE)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------
@@ -94,14 +94,14 @@ endif(USE_CUDA)
 if(USE_OPENCL)
   set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl")
 endif(USE_OPENCL)
-if(NOT MSVC)
+if(UNIX)
   unset(BUILDNAME CACHE)
   unset(BUILDNAME)
   set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE)
-else(MSVC)
+else(UNIX)
   set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash")
-  message(STATUS "The buildname might need manual editing")
-endif(NOT MSVC)
+  message(STATUS "The build name might need manual editing")
+endif(UNIX)
 mark_as_advanced(BUILDNAME)
 #-----------------------------------------------------------------------------
 #-----------------------------------------------------------------------------

From 7cfe1465efe2efafaebe3e5431cfd15077b129ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 12 Jun 2024 18:32:36 +0100
Subject: [PATCH 305/314] Update macOS images in GitHub Actions

---
 .github/workflows/release.yml | 6 +++---
 .github/workflows/tests.yml   | 6 +++---
 niftyreg_build_version.txt    | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9b001d7d..ff398f5f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-latest, windows-2019]
+        os: [ubuntu-20.04, macos-12, windows-2019]
         platform: [cpu, cuda]
         include:
           - platform: cpu
@@ -22,7 +22,7 @@ jobs:
             use-opencl: "ON"
           - os: ubuntu-20.04    # For Ubuntu only
             os-name: "Ubuntu"
-          - os: macos-latest    # For macOS only
+          - os: macos-12        # For macOS only
             os-name: "macOS"
             use-opencl: "ON"
           - sudo: "sudo"        # For Ubuntu and macOS
@@ -35,7 +35,7 @@ jobs:
             cxx-compiler: "cl.exe"
           - build_type: "Release" # For all platforms
         exclude:
-          - os: macos-latest
+          - os: macos-12
             platform: cuda
 
     steps:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5f1f5660..f523b0f5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -5,12 +5,12 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-12, windows-latest]
         include:
-          - sudo: "sudo"        # For ubuntu and macos
+          - sudo: "sudo"        # For Ubuntu and macOS
             c-compiler: "gcc"
             cxx-compiler: "g++"
-          - os: windows-latest  # For windows only
+          - os: windows-latest  # For Windows only
             sudo: ""
             c-compiler: "cl.exe"
             cxx-compiler: "cl.exe"
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 57214136..9524ef49 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-423
+424

From 41cd5a16d6e1221e124134d49bf6069a23cb9f2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 12 Jun 2024 18:44:17 +0100
Subject: [PATCH 306/314] Update the OS image for Coverage

---
 .github/workflows/coverage.yml | 2 +-
 niftyreg_build_version.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index e54b253e..45727dfd 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -2,7 +2,7 @@ name: Coverage
 on: [push, pull_request]
 jobs:
   Coverage:
-    runs-on: [self-hosted, linux, gpu]
+    runs-on: [self-hosted, ubuntu-22.04, gpu]
     steps:
       - uses: actions/checkout@v3
 
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 9524ef49..5e4a5228 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-424
+425

From 65934f20964a0e1762cfc85286012c45bcfb42f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Wed, 12 Jun 2024 21:22:45 +0100
Subject: [PATCH 307/314] Fix linting issues

---
 niftyreg_build_version.txt        | 2 +-
 reg-lib/ComputeFactory.h          | 1 +
 reg-lib/ContentCreatorFactory.h   | 2 ++
 reg-lib/MeasureCreatorFactory.hpp | 1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 5e4a5228..43d371af 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-425
+428
diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h
index 426e1d1f..8c37125d 100644
--- a/reg-lib/ComputeFactory.h
+++ b/reg-lib/ComputeFactory.h
@@ -4,5 +4,6 @@
 
 class ComputeFactory {
 public:
+    virtual ~ComputeFactory() = default;
     virtual Compute* Produce(Content& con) { return new Compute(con); }
 };
diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h
index 4d9ddddc..3200bdd7 100644
--- a/reg-lib/ContentCreatorFactory.h
+++ b/reg-lib/ContentCreatorFactory.h
@@ -10,6 +10,8 @@ enum class ContentType { Base, Aladin, Def, F3d, F3d2 };
 
 class ContentCreatorFactory {
 public:
+    virtual ~ContentCreatorFactory() = default;
+
     virtual ContentCreator* Produce(const ContentType conType) {
         switch (conType) {
         case ContentType::Base:
diff --git a/reg-lib/MeasureCreatorFactory.hpp b/reg-lib/MeasureCreatorFactory.hpp
index d51b6db1..d44d584e 100644
--- a/reg-lib/MeasureCreatorFactory.hpp
+++ b/reg-lib/MeasureCreatorFactory.hpp
@@ -4,5 +4,6 @@
 
 class MeasureCreatorFactory {
 public:
+    virtual ~MeasureCreatorFactory() = default;
     virtual MeasureCreator* Produce() { return new MeasureCreator(); }
 };

From dd78e8e9c1e4c2fe1c5efb9f75ec447a625e03bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 12 Jul 2024 11:55:25 +0300
Subject: [PATCH 308/314] Use NiftiImage instead of nifti_image in
 _reg_ReadWriteImage

Delete duplicate reg_io_ReadImageHeader() and add this ability to reg_io_ReadImageFile()
---
 niftyreg_build_version.txt     |  2 +-
 reg-apps/reg_average.cpp       | 13 +++----
 reg-apps/reg_jacobian.cpp      | 11 ++----
 reg-apps/reg_resample.cpp      | 10 ++----
 reg-apps/reg_tools.cpp         | 12 +++----
 reg-apps/reg_transform.cpp     | 63 +++++++++++++---------------------
 reg-io/_reg_ReadWriteImage.cpp | 45 ++++--------------------
 reg-io/_reg_ReadWriteImage.h   | 22 ++++--------
 8 files changed, 52 insertions(+), 126 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 43d371af..35606667 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-428
+429
diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp
index 07446e4d..ba42ebbe 100644
--- a/reg-apps/reg_average.cpp
+++ b/reg-apps/reg_average.cpp
@@ -253,7 +253,7 @@ int compute_nrr_demean(nifti_image *demean_field,
    // iterate over all transformations
    for(size_t t=0; t<transformationNumber; ++t){
       // read the transformation
-      nifti_image *transformation = reg_io_ReadImageFile(inputNRRName[t]);
+      NiftiImage transformation = reg_io_ReadImageFile(inputNRRName[t]);
       // Generate the deformation or flow field
       nifti_image *deformationField = nifti_dup(*demean_field, false);
       reg_tools_multiplyValueToImage(deformationField,deformationField,0.f);
@@ -308,7 +308,6 @@ int compute_nrr_demean(nifti_image *demean_field,
             deformationField->intent_p1=DISP_VEL_FIELD;
       }
       else reg_getDisplacementFromDeformation(deformationField);
-      nifti_image_free(transformation);
       // The current field is added to the average image
       reg_tools_addImageToImage(demean_field,deformationField,demean_field);
       nifti_image_free(deformationField);
@@ -364,7 +363,7 @@ int compute_average_image(nifti_image *averageImage,
       reg_createDeformationField<float>(deformationField, averageImage);
       // Compute the transformation if required
       if(inputNRRName!=nullptr){
-         nifti_image *current_transformation = reg_io_ReadImageFile(inputNRRName[i]);
+         NiftiImage current_transformation = reg_io_ReadImageFile(inputNRRName[i]);
          switch(static_cast<int>(current_transformation->intent_p1)){
          case DISP_FIELD:
             reg_getDeformationFromDisplacement(current_transformation);
@@ -388,7 +387,6 @@ int compute_average_image(nifti_image *averageImage,
             NR_ERROR("Unsupported transformation type");
             return EXIT_FAILURE;
          }
-         nifti_image_free(current_transformation);
          if(demeanField!=nullptr){
             if(deformationField->intent_p1==DEF_VEL_FIELD){
                reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField);
@@ -419,7 +417,7 @@ int compute_average_image(nifti_image *averageImage,
       warpedImage->nbyper = sizeof(float);
       warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper);
       // Read the input image
-      nifti_image *current_input_image = reg_io_ReadImageFile(inputImageName[i]);
+      NiftiImage current_input_image = reg_io_ReadImageFile(inputImageName[i]);
       reg_tools_changeDatatype<PrecisionType>(current_input_image);
       // Apply the transformation
       reg_resampleImage(current_input_image,
@@ -428,7 +426,6 @@ int compute_average_image(nifti_image *averageImage,
                         nullptr,
                         interpolation_order,
                         std::numeric_limits<float>::quiet_NaN());
-      nifti_image_free(current_input_image);
       // Add the image to the average
       remove_nan_and_add(averageImage, warpedImage, definedValue);
       nifti_image_free(warpedImage);
@@ -668,7 +665,7 @@ int main(int argc, char **argv)
    }
 
    mat44 avg_output_matrix;
-   nifti_image *avg_output_image=nullptr;
+   NiftiImage avg_output_image;
 
    // Go over the different operations
    if(operation==AVG_INPUT && trans_is_affine){
@@ -716,8 +713,6 @@ int main(int argc, char **argv)
          free(pointer_to_command[i]);
       free(pointer_to_command);
    }
-   if(avg_output_image!=nullptr)
-      nifti_image_free(avg_output_image);
    if(input_image_names!=nullptr){
       free(input_image_names);
    }
diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp
index 06507407..74142fca 100644
--- a/reg-apps/reg_jacobian.cpp
+++ b/reg-apps/reg_jacobian.cpp
@@ -226,7 +226,7 @@ int main(int argc, char **argv)
    /* ******************* */
    /* READ TRANSFORMATION */
    /* ******************* */
-   nifti_image *inputTransformation=nullptr;
+   NiftiImage inputTransformation;
    if(flag->inputTransFlag)
    {
       // Check of the input transformation is an affine
@@ -254,7 +254,7 @@ int main(int argc, char **argv)
    /* COMPUTE JACOBIAN MAT OR DET */
    /* *************************** */
    // Create a deformation field if needed
-   nifti_image *referenceImage=nullptr;
+   NiftiImage referenceImage;
    if(inputTransformation->intent_p1==LIN_SPLINE_GRID ||
          inputTransformation->intent_p1==CUB_SPLINE_GRID ||
          inputTransformation->intent_p1==SPLINE_VEL_GRID){
@@ -263,7 +263,7 @@ int main(int argc, char **argv)
          return EXIT_FAILURE;
       }
       // Read the reference image
-      referenceImage = reg_io_ReadImageHeader(param->refImageName);
+      referenceImage = reg_io_ReadImageFile(param->refImageName, true);
       if(referenceImage == nullptr)
       {
          NR_ERROR("Error when reading the reference image.");
@@ -277,7 +277,6 @@ int main(int argc, char **argv)
       nifti_image *jacobianImage=nullptr;
       if(referenceImage!=nullptr){
          jacobianImage=nifti_copy_nim_info(referenceImage);
-         nifti_image_free(referenceImage);referenceImage=nullptr;
       }
       else jacobianImage=nifti_copy_nim_info(inputTransformation);
       jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2;
@@ -331,7 +330,6 @@ int main(int argc, char **argv)
       nifti_image *jacobianImage=nullptr;
       if(referenceImage!=nullptr){
          jacobianImage=nifti_copy_nim_info(referenceImage);
-         nifti_image_free(referenceImage);referenceImage=nullptr;
       }
       else jacobianImage=nifti_copy_nim_info(inputTransformation);
       jacobianImage->ndim=jacobianImage->dim[0]=5;
@@ -380,8 +378,5 @@ int main(int argc, char **argv)
       nifti_image_free(jacobianImage);jacobianImage=nullptr;
    }
 
-   // Free the allocated image
-   nifti_image_free(inputTransformation);inputTransformation=nullptr;
-
    return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index 9ab79df2..bfcfe963 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -251,7 +251,7 @@ int main(int argc, char **argv)
    }
 
    /* Read the reference image */
-   nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+   NiftiImage referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
    if(referenceImage == nullptr)
    {
       NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
@@ -259,7 +259,7 @@ int main(int argc, char **argv)
    }
 
    /* Read the floating image */
-   nifti_image *floatingImage = reg_io_ReadImageFile(param->floatingImageName);
+   NiftiImage floatingImage = reg_io_ReadImageFile(param->floatingImageName);
    if(floatingImage == nullptr)
    {
       NR_ERROR("Error when reading the floating image: " << param->floatingImageName);
@@ -282,7 +282,7 @@ int main(int argc, char **argv)
    /* *********************** */
    /* READ THE TRANSFORMATION */
    /* *********************** */
-   nifti_image *inputTransformationImage = nullptr;
+   NiftiImage inputTransformationImage;
    mat44 inputAffineTransformation;
    // Check if a transformation has been specified
    if(flag->inputTransFlag)
@@ -395,8 +395,6 @@ int main(int argc, char **argv)
                                nullptr);
           break;
       }
-      nifti_image_free(inputTransformationImage);
-      inputTransformationImage=nullptr;
    }
    else
    {
@@ -602,8 +600,6 @@ int main(int argc, char **argv)
    //   // Tell the CLI that we finished
    //   closeProgress("reg_resample", "Normal exit");
 
-   nifti_image_free(referenceImage);
-   nifti_image_free(floatingImage);
    nifti_image_free(deformationFieldImage);
 
    free(flag);
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 76b55ba5..451def0d 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -463,7 +463,7 @@ int main(int argc, char **argv)
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
 
     /* Read the image */
-    nifti_image *image = reg_io_ReadImageFile(param->inputImageName);
+    NiftiImage image = reg_io_ReadImageFile(param->inputImageName);
     if(image == nullptr)
     {
         NR_ERROR("Error when reading the input image: " << param->inputImageName);
@@ -573,7 +573,7 @@ int main(int argc, char **argv)
 
     if(flag->operationTypeFlag>-1)
     {
-        nifti_image *image2=nullptr;
+        NiftiImage image2;
         if(param->operationImageName!=nullptr)
         {
             image2 = reg_io_ReadImageFile(param->operationImageName);
@@ -668,14 +668,13 @@ int main(int argc, char **argv)
         else reg_io_WriteImageFile(outputImage,"output.nii");
 
         nifti_image_free(outputImage);
-        if(image2!=nullptr) nifti_image_free(image2);
     }
 
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
 
     if(flag->rmsImageFlag)
     {
-        nifti_image *image2 = reg_io_ReadImageFile(param->rmsImageName);
+        NiftiImage image2 = reg_io_ReadImageFile(param->rmsImageName);
         if(image2 == nullptr)
         {
             NR_ERROR("Error when reading the image: " << param->rmsImageName);
@@ -697,7 +696,6 @@ int main(int argc, char **argv)
 
         double meanRMSerror = reg_tools_getMeanRMS(image, image2);
         NR_COUT << "Mean RMS error: " << meanRMSerror << std::endl;
-        nifti_image_free(image2);
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->binarisedImageFlag)
@@ -720,7 +718,7 @@ int main(int argc, char **argv)
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->nanMaskFlag)
     {
-        nifti_image *maskImage = reg_io_ReadImageFile(param->operationImageName);
+        NiftiImage maskImage = reg_io_ReadImageFile(param->operationImageName);
         if(maskImage == nullptr)
         {
             NR_ERROR("Error when reading the image: " << param->operationImageName);
@@ -736,7 +734,6 @@ int main(int argc, char **argv)
         else reg_io_WriteImageFile(outputImage,"output.nii");
 
         nifti_image_free(outputImage);
-        nifti_image_free(maskImage);
     }
     //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\//
     if(flag->iso)
@@ -1173,6 +1170,5 @@ int main(int argc, char **argv)
         outputImage=nullptr;
     }
 
-    nifti_image_free(image);
     return EXIT_SUCCESS;
 }
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index ba427d31..4ab60681 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -275,8 +275,8 @@ int main(int argc, char **argv) {
     if (flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag) {
         // Create some variables
         mat44 *affineTransformation = nullptr;
-        nifti_image *referenceImage = nullptr;
-        nifti_image *inputTransformationImage = nullptr;
+        NiftiImage referenceImage;
+        NiftiImage inputTransformationImage;
         nifti_image *outputTransformationImage = nullptr;
         // First check if the input filename is an image
         if (reg_isAnImageFileName(param->inputTransName)) {
@@ -294,7 +294,7 @@ int main(int argc, char **argv) {
                              " a reference image should be specified (-ref flag)");
                     return EXIT_FAILURE;
                 }
-                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
                 if (referenceImage == nullptr) {
                     NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                     return EXIT_FAILURE;
@@ -309,7 +309,7 @@ int main(int argc, char **argv) {
                          " a reference image should be specified (-ref flag)");
                 return EXIT_FAILURE;
             }
-            referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
             if (referenceImage == nullptr) {
                 NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                 return EXIT_FAILURE;
@@ -484,8 +484,6 @@ int main(int argc, char **argv) {
         }
         // Free the allocated images and arrays
         if (affineTransformation != nullptr) free(affineTransformation);
-        if (referenceImage != nullptr) nifti_image_free(referenceImage);
-        if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage);
         nifti_image_free(outputTransformationImage);
     }
 
@@ -497,10 +495,10 @@ int main(int argc, char **argv) {
         // Create some variables
         mat44 *affine1Trans = nullptr;
         mat44 *affine2Trans = nullptr;
-        nifti_image *referenceImage = nullptr;
-        nifti_image *referenceImage2 = nullptr;
-        nifti_image *input1TransImage = nullptr;
-        nifti_image *input2TransImage = nullptr;
+        NiftiImage referenceImage;
+        NiftiImage referenceImage2;
+        NiftiImage input1TransImage;
+        NiftiImage input2TransImage;
         nifti_image *output1TransImage = nullptr;
         nifti_image *output2TransImage = nullptr;
         // Read the first transformation
@@ -541,7 +539,7 @@ int main(int argc, char **argv) {
                              " a reference image should be specified (-res flag).");
                     return EXIT_FAILURE;
                 }
-                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
                 if (referenceImage == nullptr) {
                     NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                     return EXIT_FAILURE;
@@ -554,7 +552,7 @@ int main(int argc, char **argv) {
                              " a reference image should be specified (-ref flag).");
                     return EXIT_FAILURE;
                 }
-                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
                 if (referenceImage == nullptr) {
                     NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                     return EXIT_FAILURE;
@@ -562,7 +560,7 @@ int main(int argc, char **argv) {
             }
             // Read the second reference image if specified
             if (flag->referenceImage2Flag) {
-                referenceImage2 = reg_io_ReadImageHeader(param->referenceImage2Name);
+                referenceImage2 = reg_io_ReadImageFile(param->referenceImage2Name, true);
                 if (referenceImage2 == nullptr) {
                     NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name);
                     return EXIT_FAILURE;
@@ -744,10 +742,6 @@ int main(int argc, char **argv) {
         // Free allocated object
         if (affine1Trans != nullptr) free(affine1Trans);
         if (affine2Trans != nullptr) free(affine2Trans);
-        if (referenceImage != nullptr) nifti_image_free(referenceImage);
-        if (referenceImage2 != nullptr) nifti_image_free(referenceImage2);
-        if (input1TransImage != nullptr) nifti_image_free(input1TransImage);
-        if (input2TransImage != nullptr) nifti_image_free(input2TransImage);
         if (output1TransImage != nullptr) nifti_image_free(output1TransImage);
         if (output2TransImage != nullptr) nifti_image_free(output2TransImage);
     }
@@ -759,8 +753,8 @@ int main(int argc, char **argv) {
     if (flag->outputLandFlag) {
         // Create some variables
         mat44 *affineTransformation = nullptr;
-        nifti_image *referenceImage = nullptr;
-        nifti_image *inputTransformationImage = nullptr;
+        NiftiImage referenceImage;
+        NiftiImage inputTransformationImage;
         nifti_image *deformationFieldImage = nullptr;
         // First check if the input filename is an image
         if (reg_isAnImageFileName(param->inputTransName)) {
@@ -778,7 +772,7 @@ int main(int argc, char **argv) {
                              " a reference image should be specified (-ref flag).");
                     return EXIT_FAILURE;
                 }
-                referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+                referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
                 if (referenceImage == nullptr) {
                     NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                     return EXIT_FAILURE;
@@ -793,7 +787,7 @@ int main(int argc, char **argv) {
                          " a reference image should be specified (-ref flag).");
                 return EXIT_FAILURE;
             }
-            referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
             if (referenceImage == nullptr) {
                 NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                 return EXIT_FAILURE;
@@ -894,8 +888,6 @@ int main(int argc, char **argv) {
         deformationFieldImage->intent_p2 = 0;
         // Free all allocated input
         if (affineTransformation != nullptr) free(affineTransformation);
-        if (referenceImage != nullptr) nifti_image_free(referenceImage);
-        if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage);
         // Read the landmark file
         std::pair<size_t, size_t> inputMatrixSize = reg_tool_sizeInputMatrixFile(param->inputLandmarkName);
         size_t landmarkNumber = inputMatrixSize.first;
@@ -941,7 +933,7 @@ int main(int argc, char **argv) {
     /* **************************************** */
     if (flag->updSFormFlag) {
         // Read the input image
-        nifti_image *image = reg_io_ReadImageFile(param->inputTransName);
+        NiftiImage image = reg_io_ReadImageFile(param->inputTransName);
         if (image == nullptr) {
             NR_ERROR("Error when reading the input image: " << param->inputTransName);
             return EXIT_FAILURE;
@@ -964,7 +956,6 @@ int main(int argc, char **argv) {
         // Write the output image
         reg_io_WriteImageFile(image, param->outputTransName);
         // Free the allocated image and array
-        nifti_image_free(image);
         free(affineTransformation);
     }
     /* ******************************** */
@@ -973,7 +964,7 @@ int main(int argc, char **argv) {
     if (flag->halfTransFlag) {
         // Read the input transformation
         mat44 *affineTrans = nullptr;
-        nifti_image *inputTransImage = nullptr;
+        NiftiImage inputTransImage;
         if (!reg_isAnImageFileName(param->inputTransName)) {
             // An affine transformation is considered
             affineTrans = (mat44 *)malloc(sizeof(mat44));
@@ -1039,13 +1030,13 @@ int main(int argc, char **argv) {
     /* ******************************************** */
     if (flag->invertNRRFlag) {
         // Read the provided transformation
-        nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName);
+        NiftiImage inputTransImage = reg_io_ReadImageFile(param->inputTransName);
         if (inputTransImage == nullptr) {
             NR_ERROR("Error when reading the input image: " << param->inputTransName);
             return EXIT_FAILURE;
         }
         // Read the provided floating space image
-        nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName);
+        NiftiImage floatingImage = reg_io_ReadImageFile(param->input2TransName);
         if (floatingImage == nullptr) {
             NR_ERROR("Error when reading the input image: " << param->input2TransName);
             return EXIT_FAILURE;
@@ -1060,13 +1051,13 @@ int main(int argc, char **argv) {
                          " a reference image should be specified (-ref flag).");
                 return EXIT_FAILURE;
             }
-            nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
+            NiftiImage referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
             if (referenceImage == nullptr) {
                 NR_ERROR("Error when reading the reference image: " << param->referenceImageName);
                 return EXIT_FAILURE;
             }
             // Create a deformation field or a flow field
-            nifti_image *tempField = nifti_copy_nim_info(referenceImage);
+            NiftiImage tempField(referenceImage, NiftiImage::Copy::ImageInfo);
             tempField->ndim = tempField->dim[0] = 5;
             tempField->nt = tempField->dim[4] = 1;
             tempField->nu = tempField->dim[5] = tempField->nz > 1 ? 3 : 2;
@@ -1091,10 +1082,7 @@ int main(int argc, char **argv) {
             else
                 reg_spline_getFlowFieldFromVelocityGrid(inputTransImage, tempField);
             // The provided transformation file is replaced by the compute dense field
-            nifti_image_free(referenceImage);
-            nifti_image_free(inputTransImage);
-            inputTransImage = tempField;
-            tempField = nullptr;
+            inputTransImage = std::move(tempField);
         }
         // Create a field to store the transformation
         nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage);
@@ -1161,7 +1149,6 @@ int main(int argc, char **argv) {
         // Save the inverted transformation
         reg_io_WriteImageFile(outputTransImage, param->outputTransName);
         // Free the allocated images
-        nifti_image_free(inputTransImage);
         nifti_image_free(outputTransImage);
     }
     /* ***************************************** */
@@ -1246,12 +1233,10 @@ int main(int argc, char **argv) {
     /* ********************************************************** */
     if (flag->flirtAff2NRFlag) {
         mat44 affine;
-        nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName);
-        nifti_image *floatingImage = reg_io_ReadImageHeader(param->referenceImage2Name);
+        NiftiImage referenceImage = reg_io_ReadImageFile(param->referenceImageName, true);
+        NiftiImage floatingImage = reg_io_ReadImageFile(param->referenceImage2Name, true);
         reg_tool_ReadAffineFile(&affine, referenceImage, floatingImage, param->inputTransName, true);
         reg_tool_WriteAffineFile(&affine, param->outputTransName);
-        nifti_image_free(referenceImage);
-        nifti_image_free(floatingImage);
     }
     // Free allocated object
     free(param);
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index fa945192..1a70197c 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -10,7 +10,6 @@
  */
 
 #include "_reg_ReadWriteImage.h"
-#include "_reg_tools.h"
 #include <filesystem>
 
 /* *************************************************************** */
@@ -49,52 +48,21 @@ int reg_io_checkFileFormat(const std::string& filename) {
     return NR_NII_FORMAT;
 }
 /* *************************************************************** */
-nifti_image* reg_io_ReadImageFile(const char *filename) {
+NiftiImage reg_io_ReadImageFile(const char *filename, const bool onlyHeader) {
     // First read the file format in order to use the correct library
     const int fileFormat = reg_io_checkFileFormat(filename);
 
-    // Create the nifti image pointer
-    nifti_image *image = nullptr;
+    // Create a nifti image
+    nifti_image *image;
 
     // Read the image and convert it to nifti format if required
     switch (fileFormat) {
     case NR_NII_FORMAT:
-        image = nifti_image_read(filename, true);
+        image = nifti_image_read(filename, !onlyHeader);
         reg_hack_filename(image, filename);
         break;
     case NR_PNG_FORMAT:
-        image = reg_io_readPNGfile(filename, true);
-        reg_hack_filename(image, filename);
-        break;
-#ifdef USE_NRRD
-    case NR_NRRD_FORMAT:
-        Nrrd *nrrdImage = reg_io_readNRRDfile(filename);
-        image = reg_io_nrdd2nifti(nrrdImage);
-        nrrdNuke(nrrdImage);
-        reg_hack_filename(image, filename);
-        break;
-#endif
-    }
-    reg_checkAndCorrectDimension(image);
-
-    // Return the nifti image
-    return image;
-}
-/* *************************************************************** */
-nifti_image* reg_io_ReadImageHeader(const char *filename) {
-    // First read the file format in order to use the correct library
-    const int fileFormat = reg_io_checkFileFormat(filename);
-
-    // Create the nifti image pointer
-    nifti_image *image = nullptr;
-
-    // Read the image and convert it to nifti format if required
-    switch (fileFormat) {
-    case NR_NII_FORMAT:
-        image = nifti_image_read(filename, false);
-        break;
-    case NR_PNG_FORMAT:
-        image = reg_io_readPNGfile(filename, false);
+        image = reg_io_readPNGfile(filename, !onlyHeader);
         reg_hack_filename(image, filename);
         break;
 #ifdef USE_NRRD
@@ -130,8 +98,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
          image->nv > 1 ||
          image->nw > 1) &&
         fileFormat == NR_PNG_FORMAT) {
-        // If the image has more than two dimension,
-        // the filename is converted to nifti
+        // If the image has more than two dimensions, the filename is converted to nifti
         fname = filename;
         fname.replace(fname.find(".png"), 4, ".nii.gz");
         NR_WARN("The file can not be saved as png and is converted to nifti " << filename << " -> " << fname);
diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h
index 5eb0f372..aaea702f 100644
--- a/reg-io/_reg_ReadWriteImage.h
+++ b/reg-io/_reg_ReadWriteImage.h
@@ -36,28 +36,20 @@
 /** The function checks the file format using the provided filename
   * Nifti is returned by default if no format are specified
   * @param filename Filename of the input images
-  * @return Code, NIFTYREG_FILEFORMAT_TYPE,  that encode the file format
+  * @return Code, NIFTYREG_FILEFORMAT_TYPE, that encode the file format
   */
 int reg_io_checkFileFormat(const std::string& filename);
 /* *************************************************************** */
-/** The function expects a filename and returns a nifti_image structure
-  * The function will use to correct library and will return a NULL image
+/** The function expects a filename and returns a NiftiImage
+  * The function will use to correct library and will return an empty image
   * if the image can not be read
   * @param filename Filename of the input images
-  * @return Image as a nifti image
+  * @param onlyHeader If true, only the header information is read and the actual data is not stored
+  * @return Image as a NiftiImage
   */
-nifti_image *reg_io_ReadImageFile(const char *filename);
+NiftiImage reg_io_ReadImageFile(const char *filename, const bool onlyHeader = false);
 /* *************************************************************** */
-/** The function expects a filename and returns a nifti_image structure
-  * The function will use to correct library and will return a NULL image
-  * if the image can not be read
-  * Only the header information is read and the actual data is not store
-  * @param filename Filename of the input images
-  * @return Image as a nifti image
-  */
-nifti_image *reg_io_ReadImageHeader(const char *filename);
-/* *************************************************************** */
-/** The function expects a filename and nifti_image structure
+/** The function expects a filename and a nifti_image
   * The image will be converted to the format specified in the
   * filename before being saved
   * @param image Nifti image to be saved

From cbbdd00ef227b9b8ad769d33c8bc9125e0e4def2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Fri, 12 Jul 2024 20:21:13 +0300
Subject: [PATCH 309/314] Change the image acquisition behaviour of NiftiImage

Don't own the image pointer if it's constructed by using a nifti_image pointer
---
 niftyreg_build_version.txt                     |  2 +-
 reg-apps/reg_ppcnr.cpp                         |  2 +-
 reg-io/RNifti/NiftiImage.h                     | 18 ++++++++++--------
 reg-io/RNifti/NiftiImage_impl.h                | 15 ++++++++++-----
 reg-io/_reg_ReadWriteImage.cpp                 |  2 +-
 reg-lib/Compute.cpp                            |  2 --
 reg-test/reg_test_conjugateGradient.cpp        |  8 --------
 reg-test/reg_test_imageGradient.cpp            |  2 --
 reg-test/reg_test_interpolation.cpp            |  1 -
 reg-test/reg_test_normaliseGradient.cpp        |  1 -
 .../reg_test_regr_exponentiateGradient.cpp     |  2 --
 reg-test/reg_test_regr_resampleGradient.cpp    |  2 --
 reg-test/reg_test_regr_updateVelocityField.cpp |  2 --
 .../reg_test_voxelCentricToNodeCentric.cpp     |  1 -
 14 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 35606667..c15fb930 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-429
+430
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index 1724475a..e3c664f3 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -184,7 +184,7 @@ int main(int argc, char **argv)
          nifti_image_free(source);
          makesource->ndim=makesource->dim[0] = 4;
          makesource->nt = makesource->dim[4] = atoi(argv[++i]);
-         makesource->nvox = NiftiImage::calcVoxelNumber(makesource->nx, makesource->ndim);
+         makesource->nvox = NiftiImage::calcVoxelNumber(makesource, makesource->ndim);
          makesource->data = malloc(makesource->nvox * makesource->nbyper);
          char *temp_data = reinterpret_cast<char *>(makesource->data);
          for(int ii=0; ii<makesource->nt; ii++) // fill with file data
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 991d2447..361a71c4 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -876,7 +876,8 @@ class NiftiImage
     enum class Dim { NDim, X, Y, Z, T, U, V, W };    /**< Dimension enumeration */
 
     enum class Copy {           /**< Enumeration of copy options of the constructor */
-        None,                   /**< Do not copy the image */
+        None,                   /**< Do not copy the image but acquire the pointer without ownership */
+        Acquire,                /**< Do not copy the image but acquire the pointer with ownership */
         Image,                  /**< Copy the entire image */
         ImageInfo,              /**< Copy only the image info, and do not allocate data */
         ImageInfoAndAllocData   /**< Copy only the image info, and allocate and zero the data */
@@ -1306,8 +1307,9 @@ class NiftiImage
      * responsibility for freeing the associated memory. If the object currently wraps another
      * pointer, it will be released
      * @param image The pointer to wrap
+     * @param own If \c true, the object will take responsibility for freeing the memory
     **/
-    void acquire (nifti_image * const image);
+    void acquire (nifti_image * const image, const bool own = true);
 
     /**
      * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count
@@ -1316,7 +1318,7 @@ class NiftiImage
     void acquire (const NiftiImage &source)
     {
         refCount = source.refCount;
-        acquire(source.image);
+        acquire(source.image, refCount);
     }
 
     /**
@@ -1420,12 +1422,12 @@ class NiftiImage
     /**
      * Copy constructor
      * @param source Another \c NiftiImage object
-     * @param copy If \c Copy::None, the new object just wraps the same pointer as \c source; otherwise the image data is copied
+     * @param copy If \c Copy::None or \c Copy::Acquire, the new object just wraps the same pointer as \c source; otherwise the image data is copied
     **/
     NiftiImage (const NiftiImage &source, const Copy copy = Copy::Image)
         : NiftiImage()
     {
-        if (copy != Copy::None) {
+        if (copy != Copy::None && copy != Copy::Acquire) {
             this->copy(source, copy);
         } else {
             acquire(source);
@@ -1458,15 +1460,15 @@ class NiftiImage
     /**
      * Initialise using an existing \c nifti_image pointer
      * @param image An existing \c nifti_image pointer, possibly \c nullptr
-     * @param copy If \c Copy::None, the new object just wraps the pointer passed to it; otherwise the image data is copied
+     * @param copy If \c Copy::None or \c Copy::Acquire, the new object just wraps the pointer passed to it with or without ownership; otherwise the image data is copied
     **/
     NiftiImage (nifti_image * const image, const Copy copy = Copy::None)
         : NiftiImage()
     {
-        if (copy != Copy::None)
+        if (copy != Copy::None && copy != Copy::Acquire)
             this->copy(image, copy);
         else
-            acquire(image);
+            acquire(image, copy == Copy::Acquire);
         RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from pointer)", RNIFTI_NIFTILIB_VERSION, this->image);
     }
 
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index bf4b359b..7672f407 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -705,7 +705,7 @@ inline int NiftiImage::fileVersion (const std::string &path)
 #endif
 }
 
-inline void NiftiImage::acquire (nifti_image * const image)
+inline void NiftiImage::acquire (nifti_image * const image, const bool own)
 {
     // If we're taking ownership of a new image, release the old one
     if (this->image != nullptr && this->image != image)
@@ -715,11 +715,16 @@ inline void NiftiImage::acquire (nifti_image * const image)
     this->image = image;
     if (image != nullptr)
     {
-        if (this->refCount == nullptr)
-            this->refCount = new int(1);
+        if (own) {
+            if (this->refCount == nullptr)
+                this->refCount = new int(1);
+            else
+                (*this->refCount)++;
+        }
+        if (this->refCount != nullptr)
+            RN_DEBUG("Acquiring pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
         else
-            (*this->refCount)++;
-        RN_DEBUG("Acquiring pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount);
+            RN_DEBUG("Acquiring pointer %p without ownership (v%d)", this->image, RNIFTI_NIFTILIB_VERSION);
     }
 }
 
diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp
index 1a70197c..28a9e998 100644
--- a/reg-io/_reg_ReadWriteImage.cpp
+++ b/reg-io/_reg_ReadWriteImage.cpp
@@ -77,7 +77,7 @@ NiftiImage reg_io_ReadImageFile(const char *filename, const bool onlyHeader) {
     reg_checkAndCorrectDimension(image);
 
     // Return the nifti image
-    return image;
+    return NiftiImage(image, NiftiImage::Copy::Acquire);
 }
 /* *************************************************************** */
 void reg_io_WriteImageFile(nifti_image *image, const char *filename) {
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 5c7882a9..329e48fd 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -167,8 +167,6 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim
     const size_t voxelsPerVolume = transformationGradient.nVoxelsPerVolume();
 #endif
 
-    transformationGradient.disown();
-
 #ifdef _OPENMP
 #pragma omp parallel for default(none) \
     shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLenInv)
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 411af7c5..6f39ef3c 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -246,17 +246,14 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
             NiftiImage img = content->GetControlPointGrid();
             // Use bestControlPointGrid to store bestDof during initialisation of the optimiser
             img.copyData(bestControlPointGrid);
-            img.disown();
             content->UpdateControlPointGrid();
 
             // Set the transformation gradients
             img = content->GetTransformationGradient();
             img.copyData(transGrad);
-            img.disown();
             content->UpdateTransformationGradient();
             img = contentBw->GetTransformationGradient();
             img.copyData(transGradBw);
-            img.disown();
             contentBw->UpdateTransformationGradient();
 
             // Create a copy of the control point grid for expected results
@@ -272,7 +269,6 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
             img = content->GetControlPointGrid();
             const auto cppPtr = img.data();
             const auto cppExpPtr = controlPointGridExpected.data();
-            img.disown();
             for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) {
                 const float cppVal = cppPtr[i];
                 const float cppExpVal = cppExpPtr[i];
@@ -312,12 +308,10 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
                     // Update the transformation gradients
                     img = content->GetTransformationGradient();
                     img.copyData(transGrad);
-                    img.disown();
                     content->UpdateTransformationGradient();
                     if (isSymmetric) {
                         img = contentBw->GetTransformationGradient();
                         img.copyData(transGradBw);
-                        img.disown();
                         contentBw->UpdateTransformationGradient();
                     }
 
@@ -329,13 +323,11 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
                     img = content->GetTransformationGradient();
                     const auto gradPtr = img.data();
                     const auto gradExpPtr = transGrad.data();
-                    img.disown();
                     NiftiImageData gradBwPtr, gradExpBwPtr;
                     if (isSymmetric) {
                         img = contentBw->GetTransformationGradient();
                         gradBwPtr = img.data();
                         gradExpBwPtr = transGradBw.data();
-                        img.disown();
                     }
                     for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
                         const float gradVal = gradPtr[i];
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 25cbd12a..22e53ad1 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -186,7 +186,6 @@ TEST_CASE("Image Gradient", "[unit]") {
                 warpedGradient.setDim(NiftiDim::Z, 1);
                 warpedGradient.setDim(NiftiDim::U, defField->nu);
                 warpedGradient.recalcVoxelNumber();
-                warpedGradient.disown();
 
                 // Set the deformation field
                 content->SetDeformationField(defField.disown());
@@ -199,7 +198,6 @@ TEST_CASE("Image Gradient", "[unit]") {
                 warpedGradient = content->GetWarpedGradient();
                 const auto warpedGradPtr = warpedGradient.data();
                 const size_t nVoxels = warpedGradient.nVoxels();
-                warpedGradient.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedGradVal = warpedGradPtr[i];
                     const auto diff = abs(warpedGradVal - testResult[i]);
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index e2699492..c46e817e 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -226,7 +226,6 @@ TEST_CASE("Interpolation", "[unit]") {
                 warped = content->GetWarped();
                 const auto warpedPtr = warped.data();
                 const size_t nVoxels = warped.nVoxels();
-                warped.disown();
                 for (size_t i = 0; i < nVoxels; ++i) {
                     const float warpedValue = warpedPtr[i];
                     const float diff = abs(warpedValue - testResult[i]);
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index cba026ce..08e9b0d3 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -100,7 +100,6 @@ class NormaliseGradientTest {
                             // Set the transformation gradient image to host the computation
                             NiftiImage transGrad = content->GetTransformationGradient();
                             transGrad.copyData(expTransGrad);
-                            transGrad.disown();
                             content->UpdateTransformationGradient();
 
                             // Calculate the maximal length
diff --git a/reg-test/reg_test_regr_exponentiateGradient.cpp b/reg-test/reg_test_regr_exponentiateGradient.cpp
index 81f50055..db24ff79 100644
--- a/reg-test/reg_test_regr_exponentiateGradient.cpp
+++ b/reg-test/reg_test_regr_exponentiateGradient.cpp
@@ -126,7 +126,6 @@ class ExponentiateGradientTest {
             voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
             voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
             voxelGrad.copyData(voxelBasedGrad);
-            voxelGrad.disown();
             contentCpu->UpdateVoxelBasedMeasureGradient();
             voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
             voxelGrad->sform_code = voxelBasedGrad->sform_code;
@@ -135,7 +134,6 @@ class ExponentiateGradientTest {
             voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
             voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
             voxelGrad.copyData(voxelBasedGrad);
-            voxelGrad.disown();
             contentCuda->UpdateVoxelBasedMeasureGradient();
 
             // Create the computes
diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp
index 638cb190..0eadbce3 100644
--- a/reg-test/reg_test_regr_resampleGradient.cpp
+++ b/reg-test/reg_test_regr_resampleGradient.cpp
@@ -103,7 +103,6 @@ class ResampleGradientTest {
             voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
             voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
             voxelGrad.copyData(voxelBasedGrad);
-            voxelGrad.disown();
             contentCpu->UpdateVoxelBasedMeasureGradient();
             voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
             voxelGrad->sform_code = voxelBasedGrad->sform_code;
@@ -112,7 +111,6 @@ class ResampleGradientTest {
             voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
             voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
             voxelGrad.copyData(voxelBasedGrad);
-            voxelGrad.disown();
             contentCuda->UpdateVoxelBasedMeasureGradient();
 
             // Create the computes
diff --git a/reg-test/reg_test_regr_updateVelocityField.cpp b/reg-test/reg_test_regr_updateVelocityField.cpp
index 48a1aefb..5e85062b 100644
--- a/reg-test/reg_test_regr_updateVelocityField.cpp
+++ b/reg-test/reg_test_regr_updateVelocityField.cpp
@@ -80,11 +80,9 @@ class UpdateVelocityFieldTest {
                         // Set the transformation gradient image to host the computation
                         NiftiImage transGradCpu = contentCpu->GetTransformationGradient();
                         transGradCpu.copyData(transGrad);
-                        transGradCpu.disown();
                         contentCpu->UpdateTransformationGradient();
                         NiftiImage transGradCuda = contentCuda->GetTransformationGradient();
                         transGradCuda.copyData(transGrad);
-                        transGradCuda.disown();
                         contentCuda->UpdateTransformationGradient();
 
                         // Create the computes
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index aa42def3..7d807217 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -115,7 +115,6 @@ class VoxelCentricToNodeCentricTest {
                 const float weight = distr(gen);
                 NiftiImage expTransGrad(transGrad, NiftiImage::Copy::ImageInfoAndAllocData);
                 VoxelCentricToNodeCentric<float>(floating, expTransGrad, voxelGrad, weight);
-                transGrad.disown(); voxelGrad.disown();
 
                 // Extract the node-based NMI gradient from the voxel-based NMI gradient
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };

From 1cea3dfa3aac0ab06aa404e250efec4d6954b402 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Aug 2024 15:45:54 +0100
Subject: [PATCH 310/314] Revert "Enable CRT secure warnings"

This reverts commit 6cbbccd4d1452ad7870126a3f455164888dcd703.
---
 CMakeLists.txt             | 2 ++
 niftyreg_build_version.txt | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8c0415d..a5b24daf 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,6 +50,8 @@ if(GIT_FOUND)
 endif(GIT_FOUND)
 #-----------------------------------------------------------------------------
 if(MSVC)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS")
   set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
 endif(MSVC)
diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index c15fb930..ed4f1620 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-430
+431

From 291590086b4fa106413afd9d9bdacd8b7659c1ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Mon, 19 Aug 2024 15:53:17 +0100
Subject: [PATCH 311/314] Fix *Content::CastImageData() and eliminate its
 duplicates

---
 niftyreg_build_version.txt         |   2 +-
 reg-io/RNifti/NiftiImage.h         |  51 +++++++++--
 reg-lib/cl/ClAladinContent.cpp     |  53 +++---------
 reg-lib/cl/ClAladinContent.h       |   5 +-
 reg-lib/cpu/_reg_resampling.cpp    | 134 ++---------------------------
 reg-lib/cuda/CudaAladinContent.cpp |  50 +++--------
 reg-lib/cuda/CudaAladinContent.h   |   8 +-
 reg-lib/cuda/CudaContent.cpp       |  40 ++-------
 reg-lib/cuda/CudaContent.h         |   3 +-
 9 files changed, 80 insertions(+), 266 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index ed4f1620..84796bf1 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-431
+432
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 361a71c4..16461ddd 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1814,8 +1814,8 @@ class NiftiImage
     /**
      * Return the datatype of the image
      * @return A variant holding a NIfTI datatype
-    */
-    static DataType getDataType(const nifti_image *image)
+    **/
+    static DataType getDataType (const nifti_image *image)
     {
         if (image == nullptr)
             throw std::runtime_error("Cannot get datatype of null image");
@@ -1840,14 +1840,14 @@ class NiftiImage
     /**
      * Return the datatype of the image
      * @return A variant holding a NIfTI datatype
-    */
-    DataType getDataType() const { return getDataType(image); }
+    **/
+    DataType getDataType () const { return getDataType(image); }
 
     /**
      * Return the datatype of the image, if it is a floating-point type
      * @return A variant holding a NIfTI datatype
-    */
-    static std::variant<float, double> getFloatingDataType(const nifti_image *image)
+    **/
+    static std::variant<float, double> getFloatingDataType (const nifti_image *image)
     {
         if (image == nullptr)
             throw std::runtime_error("Cannot get datatype of null image");
@@ -1864,8 +1864,8 @@ class NiftiImage
     /**
      * Return the datatype of the image, if it is a floating-point type
      * @return A variant holding a NIfTI datatype
-    */
-    std::variant<float, double> getFloatingDataType() const { return getFloatingDataType(image); }
+    **/
+    std::variant<float, double> getFloatingDataType () const { return getFloatingDataType(image); }
 
     /**
      * Replace the pixel data in the image with the contents of a vector
@@ -1891,7 +1891,7 @@ class NiftiImage
      * @param other The image from which to copy the data
      * @exception runtime_error If the lengths and datatypes of the two images do not match
      * @return Self, after copying the data
-    */
+    **/
     NiftiImage & copyData (const nifti_image *other);
 
     /**
@@ -1909,6 +1909,39 @@ class NiftiImage
         return *this;
     }
 
+    /**
+     * Clamp an image value to the range of the datatype of the image
+     * @param image The image holding the datatype information
+     * @param value The image value to be clamped
+     * @return The clamped value
+    **/
+    static long double clampData(const nifti_image *image, const long double value)
+    {
+        return std::visit([&](auto&& dataType) -> long double {
+            using DataType = std::decay_t<decltype(dataType)>;
+
+            if (image->datatype == DT_FLOAT32 || image->datatype == DT_FLOAT64)
+                return value;
+            if (value != value) return 0; // Check for NaN
+            if (value < std::numeric_limits<DataType>::min())
+                return std::numeric_limits<DataType>::min();
+            else if (value > static_cast<long double>(std::numeric_limits<DataType>::max()))
+                return static_cast<long double>(std::numeric_limits<DataType>::max());
+            else
+                return value;
+        }, NiftiImage::getDataType(image));
+    }
+
+    // Delete the overload that accepts NiftiImage; use the member function instead
+    static long double clampData (const NiftiImage& image, const long double value) = delete;
+
+    /**
+     * Clamp an image value to the range of the datatype of the image
+     * @param value The image value to be clamped
+     * @return The clamped value
+     */
+    long double clampData (const long double value) const { return clampData(image, value); }
+
     /**
      * Reallocate the image data, preserving the metadata
      * @note Recalculates the number of voxels in the image and updates the nvox field
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 49a78646..45fac34e 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -222,55 +222,22 @@ cl_mem ClAladinContent::GetFloMatClmem() {
     return floMatClmem;
 }
 /* *************************************************************** */
-template<class DataType>
-DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        return static_cast<DataType>(intensity);
-    case NIFTI_TYPE_FLOAT64:
-        return static_cast<DataType>(intensity);
-    case NIFTI_TYPE_UINT8:
-        if (intensity != intensity)
-            intensity = 0;
-        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
-    case NIFTI_TYPE_UINT16:
-        if (intensity != intensity)
-            intensity = 0;
-        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
-    case NIFTI_TYPE_UINT32:
-        if (intensity != intensity)
-            intensity = 0;
-        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
-    default:
-        if (intensity != intensity)
-            intensity = 0;
-        return static_cast<DataType>(Round(intensity));
-    }
-}
-/* *************************************************************** */
-template<class T>
-void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int type) {
-    size_t size = image->nvox;
-    float* buffer = nullptr;
-    buffer = (float*)malloc(size * sizeof(float));
-    if (buffer == nullptr)
-        NR_FATAL_ERROR("Memory allocation did not complete successfully");
+template<typename DataType>
+void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int datatype) {
+    const size_t size = image->nvox;
+    unique_ptr<float[]> buffer(new float[size]);
 
     errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0,
-                                 size * sizeof(float), buffer, 0, nullptr, nullptr);
+                                 size * sizeof(float), buffer.get(), 0, nullptr, nullptr);
     sContext->CheckErrNum(errNum, "Error reading warped buffer.");
 
     free(image->data);
-    image->datatype = type;
-    image->nbyper = sizeof(T);
-    image->data = malloc(image->nvox * image->nbyper);
-    T* dataT = static_cast<T*>(image->data);
+    image->datatype = datatype;
+    image->nbyper = sizeof(DataType);
+    image->data = malloc(size * image->nbyper);
+    DataType *data = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < size; ++i)
-        dataT[i] = FillWarpedImageData<T>(buffer[i], type);
-    free(buffer);
+        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
 }
 /* *************************************************************** */
 void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype) {
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 3c184871..7de5039b 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -62,11 +62,8 @@ class ClAladinContent: public AladinContent {
     cl_mem refMatClmem;
     cl_mem floMatClmem;
 
+    template<typename DataType> void FillImageData(nifti_image *image, cl_mem memoryObject, int datatype);
     void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
-    template<class T>
-    void FillImageData(nifti_image *image, cl_mem memoryObject, int type);
-    template<class T>
-    T FillWarpedImageData(float intensity, int datatype);
 
 #ifdef NR_TESTING
 public:
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 6fe684c5..4a6f9447 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -458,37 +458,7 @@ void ResampleImage3D(const nifti_image *floatingImage,
                 }
             }
 
-            switch (floatingImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_UINT8:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT16:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT32:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            default:
-                if (intensity != intensity)
-                    intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
-                break;
-            }
+            warpedIntensity[index] = static_cast<FloatingType>(NiftiImage::clampData(floatingImage, intensity));
         }
     }
 }
@@ -606,29 +576,7 @@ void ResampleImage2D(const nifti_image *floatingImage,
                     intensity += xTempNewValue * yBasis[b];
                 }
 
-                switch (floatingImage->datatype) {
-                case NIFTI_TYPE_FLOAT32:
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                    break;
-                case NIFTI_TYPE_FLOAT64:
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                    break;
-                case NIFTI_TYPE_UINT8:
-                    intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                    break;
-                case NIFTI_TYPE_UINT16:
-                    intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                    break;
-                case NIFTI_TYPE_UINT32:
-                    intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-                    warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                    break;
-                default:
-                    warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
-                    break;
-                }
+                warpedIntensity[index] = static_cast<FloatingType>(NiftiImage::clampData(floatingImage, intensity));
             }
         }
     }
@@ -976,37 +924,8 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                     intensity = paddingValue;
                 }
             } // if in mask
-            switch (floatingImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_UINT8:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT16:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT32:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            default:
-                if (intensity != intensity)
-                    intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
-                break;
-            }
+
+            warpedIntensity[index] = static_cast<FloatingType>(NiftiImage::clampData(floatingImage, intensity));
         }
     }
 }
@@ -1379,49 +1298,8 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                     intensity = paddingValue;
                 }
             } // if in mask
-            switch (floatingImage->datatype) {
-            case NIFTI_TYPE_FLOAT32:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_FLOAT64:
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_UINT8:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT16:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_UINT32:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity > 0 ? Round(intensity) : 0);
-                break;
-            case NIFTI_TYPE_INT16:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 32767 ? Round(intensity) : 32767); // 32767=2^15-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            case NIFTI_TYPE_INT32:
-                if (intensity != intensity)
-                    intensity = 0;
-                intensity = (intensity <= 2147483647 ? Round(intensity) : 2147483647); // 2147483647=2^31-1
-                warpedIntensity[index] = static_cast<FloatingType>(intensity);
-                break;
-            default:
-                if (intensity != intensity)
-                    intensity = 0;
-                warpedIntensity[index] = static_cast<FloatingType>(Round(intensity));
-                break;
-            }
+
+            warpedIntensity[index] = static_cast<FloatingType>(NiftiImage::clampData(floatingImage, intensity));
         }
     }
 }
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index 84be113d..e3bf130e 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -192,48 +192,20 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     }
 }
 /* *************************************************************** */
-template<class DataType>
-DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        return static_cast<float>(intensity);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        return static_cast<double>(intensity);
-        break;
-    case NIFTI_TYPE_UINT8:
-        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    case NIFTI_TYPE_UINT16:
-        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    case NIFTI_TYPE_UINT32:
-        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    default:
-        return static_cast<DataType>(Round(intensity));
-        break;
-    }
-}
-/* *************************************************************** */
-template<class T>
-void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, int type) {
-    size_t size = image->nvox;
-    float *buffer = (float*)malloc(size * sizeof(float));
+template<typename DataType>
+void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) {
+    const size_t size = image->nvox;
+    unique_ptr<float[]> buffer(new float[size]);
 
-    Cuda::TransferFromDeviceToHost<float>(buffer, memoryObject, size);
+    Cuda::TransferFromDeviceToHost(buffer.get(), memoryObject, size);
 
     free(image->data);
-    image->datatype = type;
-    image->nbyper = sizeof(T);
-    image->data = malloc(image->nvox * image->nbyper);
-    T* dataT = static_cast<T*>(image->data);
+    image->datatype = datatype;
+    image->nbyper = sizeof(DataType);
+    image->data = malloc(size * image->nbyper);
+    DataType *data = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < size; ++i)
-        dataT[i] = FillWarpedImageData<T>(buffer[i], type);
-    free(buffer);
+        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
 }
 /* *************************************************************** */
 void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
@@ -263,7 +235,7 @@ void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, i
         FillImageData<int>(image, memoryObject, datatype);
         break;
     default:
-        NR_FATAL_ERROR("CUDA: unsupported type");
+        NR_FATAL_ERROR("Unsupported type");
     }
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index bae204bf..6521829a 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -54,12 +54,8 @@ class CudaAladinContent: public AladinContent {
     float *referenceMat_d;
     float *floIJKMat_d;
 
-    void DownloadImage(nifti_image *image, float* memoryObject, int datatype);
-    template<class T>
-    void FillImageData(nifti_image *image, float* memoryObject, int type);
-
-    template<class FloatingTYPE>
-    FloatingTYPE FillWarpedImageData(float intensity, int datatype);
+    template<typename DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
+    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
 
 #ifdef NR_TESTING
 public:
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index 08ed8e91..a1f02b0e 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -144,48 +144,20 @@ void CudaContent::UpdateWarped() {
     Cuda::TransferNiftiToDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
-template<class DataType>
-DataType CudaContent::CastImageData(float intensity, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        return static_cast<float>(intensity);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        return static_cast<double>(intensity);
-        break;
-    case NIFTI_TYPE_UINT8:
-        intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1
-        return static_cast<unsigned char>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    case NIFTI_TYPE_UINT16:
-        intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1
-        return static_cast<unsigned short>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    case NIFTI_TYPE_UINT32:
-        intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1
-        return static_cast<unsigned>(intensity > 0 ? Round(intensity) : 0);
-        break;
-    default:
-        return static_cast<DataType>(Round(intensity));
-        break;
-    }
-}
-/* *************************************************************** */
-template<class DataType>
+template<typename DataType>
 void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) {
-    size_t size = image->nvox;
-    float *buffer = (float*)malloc(size * sizeof(float));
+    const size_t size = image->nvox;
+    unique_ptr<float[]> buffer(new float[size]);
 
-    Cuda::TransferFromDeviceToHost(buffer, memoryObject, size);
+    Cuda::TransferFromDeviceToHost(buffer.get(), memoryObject, size);
 
     free(image->data);
     image->datatype = datatype;
     image->nbyper = sizeof(DataType);
     image->data = malloc(size * image->nbyper);
-    DataType* data = static_cast<DataType*>(image->data);
+    DataType *data = static_cast<DataType*>(image->data);
     for (size_t i = 0; i < size; ++i)
-        data[i] = CastImageData<DataType>(buffer[i], datatype);
-    free(buffer);
+        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
 }
 /* *************************************************************** */
 void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index bf3230c4..d5225ba6 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -46,8 +46,7 @@ class CudaContent: public virtual Content {
     void DeallocateDeformationField();
     void AllocateWarped();
     void DeallocateWarped();
-    template<class DataType> DataType CastImageData(float intensity, int datatype);
-    template<class DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
+    template<typename DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
     void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
     void SetReferenceCuda(float *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; }
     void SetFloatingCuda(float *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; }

From d515493bb6bd8feb8d97111d4976c41500e2dcaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 27 Aug 2024 12:19:35 +0100
Subject: [PATCH 312/314] Use NiftiImage in Content classes

---
 niftyreg_build_version.txt                    |   2 +-
 reg-apps/reg_tools.cpp                        |   2 +-
 reg-apps/reg_transform.cpp                    |   6 +-
 reg-io/RNifti/NiftiImage.h                    |  33 ++--
 reg-io/RNifti/NiftiImage_impl.h               |  18 +--
 reg-lib/AladinContent.cpp                     |   4 +-
 reg-lib/AladinContent.h                       |   6 +-
 reg-lib/AladinContentCreator.h                |   4 +-
 reg-lib/Compute.cpp                           |  81 ++++------
 reg-lib/Compute.h                             |   6 +-
 reg-lib/Content.cpp                           |  70 +++------
 reg-lib/Content.h                             |  28 ++--
 reg-lib/ContentCreator.h                      |   4 +-
 reg-lib/DefContent.cpp                        |  68 ++-------
 reg-lib/DefContent.h                          |  26 ++--
 reg-lib/DefContentCreator.h                   |   6 +-
 reg-lib/F3d2ContentCreator.h                  |  10 +-
 reg-lib/F3dContent.cpp                        |  29 +---
 reg-lib/F3dContent.h                          |  21 +--
 reg-lib/F3dContentCreator.h                   |   8 +-
 reg-lib/MeasureCreator.cpp                    |   6 +-
 reg-lib/Platform.cpp                          |  18 +--
 reg-lib/_reg_aladin.cpp                       |  10 +-
 reg-lib/_reg_aladin.h                         |   4 +-
 reg-lib/_reg_aladin_sym.cpp                   |   8 +-
 reg-lib/_reg_aladin_sym.h                     |   4 +-
 reg-lib/_reg_f3d.cpp                          |  22 +--
 reg-lib/_reg_f3d.h                            |   2 +-
 reg-lib/_reg_f3d2.cpp                         |  21 ++-
 reg-lib/_reg_f3d2.h                           |   2 +-
 reg-lib/cl/ClAladinContent.cpp                | 121 ++++++---------
 reg-lib/cl/ClAladinContent.h                  |  15 +-
 reg-lib/cl/ClAladinContentCreator.h           |   4 +-
 reg-lib/cl/ClBlockMatchingKernel.cpp          |   1 -
 reg-lib/cpu/CpuResampleImageKernel.cpp        |   2 +-
 reg-lib/cpu/_reg_localTrans.cpp               |  44 +++---
 reg-lib/cpu/_reg_localTrans.h                 |   4 +-
 reg-lib/cuda/CudaAladinContent.cpp            | 143 +++++++-----------
 reg-lib/cuda/CudaAladinContent.h              |  15 +-
 reg-lib/cuda/CudaAladinContentCreator.h       |   4 +-
 reg-lib/cuda/CudaCompute.cu                   |  58 +++----
 reg-lib/cuda/CudaCompute.h                    |   4 +-
 reg-lib/cuda/CudaContent.cpp                  |  76 +++-------
 reg-lib/cuda/CudaContent.h                    |  15 +-
 reg-lib/cuda/CudaContentCreator.h             |   4 +-
 reg-lib/cuda/CudaDefContent.cpp               |  14 +-
 reg-lib/cuda/CudaDefContent.h                 |  12 +-
 reg-lib/cuda/CudaDefContentCreator.h          |   6 +-
 reg-lib/cuda/CudaF3d2ContentCreator.h         |  10 +-
 reg-lib/cuda/CudaF3dContent.cpp               |  14 +-
 reg-lib/cuda/CudaF3dContent.h                 |  12 +-
 reg-lib/cuda/CudaF3dContentCreator.h          |   8 +-
 reg-lib/cuda/CudaMeasureCreator.cpp           |   6 +-
 reg-test/reg_test_affineDeformationField.cpp  |  18 +--
 reg-test/reg_test_blockMatching.cpp           |   7 +-
 reg-test/reg_test_composeField.cpp            |   8 +-
 reg-test/reg_test_conjugateGradient.cpp       |  27 ++--
 reg-test/reg_test_getDeformationField.cpp     |  14 +-
 reg-test/reg_test_imageGradient.cpp           |   6 +-
 reg-test/reg_test_interpolation.cpp           |   6 +-
 reg-test/reg_test_lncc.cpp                    |   4 +-
 reg-test/reg_test_nmi.cpp                     |   4 +-
 reg-test/reg_test_nmi_gradient.cpp            |   6 +-
 reg-test/reg_test_normaliseGradient.cpp       |  10 +-
 ..._test_regr_approxBendingEnergyGradient.cpp |   9 +-
 ...g_test_regr_approxLinearEnergyGradient.cpp |   9 +-
 reg-test/reg_test_regr_blockMatching.cpp      |   4 +-
 .../reg_test_regr_exponentiateGradient.cpp    |  40 +++--
 .../reg_test_regr_getDeformationField.cpp     |   7 +-
 reg-test/reg_test_regr_kernelConvolution.cpp  |  12 +-
 reg-test/reg_test_regr_lts.cpp                |   4 +-
 reg-test/reg_test_regr_measure.cpp            |  13 +-
 reg-test/reg_test_regr_resampleGradient.cpp   |  34 ++---
 ...reg_test_regr_symmetriseVelocityFields.cpp |   8 +-
 .../reg_test_regr_updateVelocityField.cpp     |  15 +-
 .../reg_test_voxelCentricToNodeCentric.cpp    |  17 ++-
 76 files changed, 579 insertions(+), 824 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 84796bf1..21fbd2eb 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-432
+433
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 451def0d..033077e5 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -1071,7 +1071,7 @@ int main(int argc, char **argv)
         if(image->datatype!=NIFTI_TYPE_FLOAT32)
             reg_tools_changeDatatype<float>(image);
         // Create a temporary mask
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3);
+        const size_t voxelNumber = image.nVoxelsPerVolume();
         int *temp_mask = (int *)malloc(voxelNumber * sizeof(int));
         for (size_t i = 0; i < voxelNumber; ++i)
             temp_mask[i]=i;
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index 4ab60681..cb19fd66 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -1061,12 +1061,11 @@ int main(int argc, char **argv) {
             tempField->ndim = tempField->dim[0] = 5;
             tempField->nt = tempField->dim[4] = 1;
             tempField->nu = tempField->dim[5] = tempField->nz > 1 ? 3 : 2;
-            tempField->nvox = NiftiImage::calcVoxelNumber(tempField, tempField->ndim);
             tempField->nbyper = inputTransImage->nbyper;
             tempField->datatype = inputTransImage->datatype;
+            tempField.realloc();
             tempField->intent_code = NIFTI_INTENT_VECTOR;
-            memset(tempField->intent_name, 0, 16);
-            strcpy(tempField->intent_name, "NREG_TRANS");
+            tempField.setIntentName("NREG_TRANS"s);
             tempField->intent_p1 = DEF_FIELD;
             if (inputTransImage->intent_p1 == SPLINE_VEL_GRID) {
                 tempField->intent_p1 = DEF_VEL_FIELD;
@@ -1074,7 +1073,6 @@ int main(int argc, char **argv) {
             }
             tempField->scl_slope = 1.f;
             tempField->scl_inter = 0.f;
-            tempField->data = calloc(tempField->nvox, tempField->nbyper);
             // Compute the dense field
             if (inputTransImage->intent_p1 == LIN_SPLINE_GRID ||
                 inputTransImage->intent_p1 == CUB_SPLINE_GRID)
diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h
index 16461ddd..20646c77 100644
--- a/reg-io/RNifti/NiftiImage.h
+++ b/reg-io/RNifti/NiftiImage.h
@@ -1550,17 +1550,6 @@ class NiftiImage
     **/
     virtual ~NiftiImage () { release(); }
 
-    /**
-     * Disown the wrapped pointer, removing responsibility for freeing it upon destruction
-     * @return The wrapped pointer
-    */
-    nifti_image* disown ()
-    {
-        nifti_image *img = image;
-        image = nullptr;
-        return img;
-    }
-
     /**
      * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image
     **/
@@ -1813,6 +1802,7 @@ class NiftiImage
 
     /**
      * Return the datatype of the image
+     * @param image A pointer to a NIfTI image
      * @return A variant holding a NIfTI datatype
     **/
     static DataType getDataType (const nifti_image *image)
@@ -1837,6 +1827,9 @@ class NiftiImage
         }
     }
 
+    // Delete the overload that accepts NiftiImage; use the member function instead
+    static DataType getDataType(const NiftiImage&) = delete;
+
     /**
      * Return the datatype of the image
      * @return A variant holding a NIfTI datatype
@@ -1845,6 +1838,7 @@ class NiftiImage
 
     /**
      * Return the datatype of the image, if it is a floating-point type
+     * @param image A pointer to a NIfTI image
      * @return A variant holding a NIfTI datatype
     **/
     static std::variant<float, double> getFloatingDataType (const nifti_image *image)
@@ -1861,6 +1855,9 @@ class NiftiImage
         }
     }
 
+    // Delete the overload that accepts NiftiImage; use the member function instead
+    static std::variant<float, double> getFloatingDataType(const NiftiImage&) = delete;
+
     /**
      * Return the datatype of the image, if it is a floating-point type
      * @return A variant holding a NIfTI datatype
@@ -1888,11 +1885,11 @@ class NiftiImage
 
     /**
      * Copy the pixel data from another image
-     * @param other The image from which to copy the data
+     * @param source The image from which to copy the data
      * @exception runtime_error If the lengths and datatypes of the two images do not match
      * @return Self, after copying the data
     **/
-    NiftiImage & copyData (const nifti_image *other);
+    NiftiImage & copyData (const nifti_image *source);
 
     /**
      * Drop the data from the image, retaining only the metadata. This method invalidates any
@@ -2116,6 +2113,16 @@ class NiftiImage
         return voxelNumber;
     }
 
+    // Delete the overload that accepts NiftiImage; use the member function instead
+    static size_t calcVoxelNumber(const NiftiImage&, const int) = delete;
+
+    /**
+     * Calculate the number of voxels in the image
+     * @param dimCount Number of dimensions to consider
+     * @return The number of voxels in the image
+     */
+    size_t calcVoxelNumber(const int dimCount) const { return calcVoxelNumber(image, dimCount); }
+
     /**
      * Recalculate the number of voxels in the image and update the nvox field
     */
diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h
index 7672f407..15e503ec 100644
--- a/reg-io/RNifti/NiftiImage_impl.h
+++ b/reg-io/RNifti/NiftiImage_impl.h
@@ -1871,23 +1871,23 @@ inline NiftiImage & NiftiImage::replaceData (const NiftiImageData &data)
     return *this;
 }
 
-inline NiftiImage & NiftiImage::copyData (const nifti_image *other)
+inline NiftiImage & NiftiImage::copyData (const nifti_image *source)
 {
     if (this->isNull())
         return *this;
-    else if (other == nullptr || other->data == nullptr)
+    else if (source == nullptr || source->data == nullptr)
         throw std::runtime_error("Cannot copy data from a null image");
-    else if (other->nvox != image->nvox)
+    else if (source->nvox != image->nvox)
         throw std::runtime_error("Cannot copy data from an image with a different length");
-    else if (other->datatype != image->datatype)
+    else if (source->datatype != image->datatype || source->nbyper != image->nbyper)
         throw std::runtime_error("Cannot copy data from an image with a different datatype");
 
     // Copy the data
-    memcpy(image->data, other->data, totalBytes());
-    image->scl_slope = other->scl_slope;
-    image->scl_inter = other->scl_inter;
-    image->cal_min = other->cal_min;
-    image->cal_max = other->cal_max;
+    memcpy(image->data, source->data, totalBytes());
+    image->scl_slope = source->scl_slope;
+    image->scl_inter = source->scl_inter;
+    image->cal_min = source->cal_min;
+    image->cal_max = source->cal_max;
 
     return *this;
 }
diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp
index ab1a07af..f8299cdb 100755
--- a/reg-lib/AladinContent.cpp
+++ b/reg-lib/AladinContent.cpp
@@ -3,8 +3,8 @@
 using namespace std;
 
 /* *************************************************************** */
-AladinContent::AladinContent(nifti_image *referenceIn,
-                             nifti_image *floatingIn,
+AladinContent::AladinContent(NiftiImage& referenceIn,
+                             NiftiImage& floatingIn,
                              int *referenceMaskIn,
                              mat44 *transformationMatrixIn,
                              size_t bytesIn,
diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h
index 19cf8c28..8c8ba85b 100755
--- a/reg-lib/AladinContent.h
+++ b/reg-lib/AladinContent.h
@@ -12,8 +12,8 @@
 class AladinContent: public Content {
 public:
     AladinContent(const AladinContent&) = delete;
-    AladinContent(nifti_image *referenceIn,
-                  nifti_image *floatingIn,
+    AladinContent(NiftiImage& referenceIn,
+                  NiftiImage& floatingIn,
                   int *referenceMaskIn = nullptr,
                   mat44 *transformationMatrixIn = nullptr,
                   size_t bytesIn = sizeof(float),
@@ -28,7 +28,7 @@ class AladinContent: public Content {
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; }
 
 protected:
-    _reg_blockMatchingParam* blockMatchingParams;
+    _reg_blockMatchingParam *blockMatchingParams;
     unsigned currentPercentageOfBlockToUse;
     unsigned inlierLts;
     int stepSizeBlock;
diff --git a/reg-lib/AladinContentCreator.h b/reg-lib/AladinContentCreator.h
index 91d03be8..939fa524 100644
--- a/reg-lib/AladinContentCreator.h
+++ b/reg-lib/AladinContentCreator.h
@@ -5,8 +5,8 @@
 
 class AladinContentCreator: public ContentCreator {
 public:
-    virtual AladinContent* Create(nifti_image *reference,
-                                  nifti_image *floating,
+    virtual AladinContent* Create(NiftiImage& reference,
+                                  NiftiImage& floating,
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp
index 329e48fd..5b23e2da 100644
--- a/reg-lib/Compute.cpp
+++ b/reg-lib/Compute.cpp
@@ -95,7 +95,7 @@ void Compute::UpdateControlPointPosition(float *currentDof,
                                          const bool optimiseX,
                                          const bool optimiseY,
                                          const bool optimiseZ) {
-    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
+    const NiftiImage& controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
     if (optimiseX && optimiseY && optimiseZ) {
         // Update the values for all axis displacement
         for (size_t i = 0; i < controlPointGrid->nvox; ++i)
@@ -139,7 +139,7 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active
 /* *************************************************************** */
 double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (!optimiseX && !optimiseY && !optimiseZ) return 0;
-    const nifti_image *transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
+    const NiftiImage& transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     switch (transformationGradient->datatype) {
     case NIFTI_TYPE_FLOAT32:
         return reg_getMaximalLength<float>(transformationGradient, optimiseX, optimiseY, optimiseZ);
@@ -151,7 +151,7 @@ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ)
 /* *************************************************************** */
 void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
-    NiftiImage transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
+    NiftiImage& transformationGradient = dynamic_cast<F3dContent&>(con).GetTransformationGradient();
     const bool hasZ = transformationGradient->nz > 1;
     if (!hasZ) optimiseZ = false;
     NiftiImageData ptrX = transformationGradient.data(0);
@@ -191,8 +191,8 @@ void Compute::SmoothGradient(float sigma) {
 /* *************************************************************** */
 void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    nifti_image *controlPointGrid = con.GetControlPointGrid();
-    nifti_image *transformationGradient = con.GetTransformationGradient();
+    NiftiImage& controlPointGrid = con.GetControlPointGrid();
+    NiftiImage& transformationGradient = con.GetTransformationGradient();
     std::visit([&](auto&& cppDataType) {
         using Type = std::decay_t<decltype(cppDataType)>;
 
@@ -217,7 +217,7 @@ void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) {
         // Update the changes for GPU
         con.UpdateControlPointGrid();
         con.UpdateTransformationGradient();
-    }, NiftiImage::getFloatingDataType(controlPointGrid));
+    }, controlPointGrid.getFloatingDataType());
 }
 /* *************************************************************** */
 void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
@@ -227,8 +227,8 @@ void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
                                            updateStepNumber);
 }
 /* *************************************************************** */
-void Compute::ConvolveImage(nifti_image *image) {
-    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
+void Compute::ConvolveImage(NiftiImage& image) {
+    const NiftiImage& controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
     constexpr ConvKernelType kernelType = ConvKernelType::Cubic;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
@@ -283,27 +283,27 @@ void Compute::ConvolveVoxelBasedMeasureGradient(float weight) {
 void Compute::ExponentiateGradient(Content& conBwIn) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
     F3dContent& conBw = dynamic_cast<F3dContent&>(conBwIn);
-    const nifti_image *deformationField = con.Content::GetDeformationField();
-    nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
-    nifti_image *controlPointGridBw = conBw.GetControlPointGrid();
+    const NiftiImage& deformationField = con.Content::GetDeformationField();
+    NiftiImage& voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
+    NiftiImage& controlPointGridBw = conBw.GetControlPointGrid();
     mat44 *affineTransformationBw = conBw.GetTransformationMatrix();
     const size_t compNum = size_t(fabs(controlPointGridBw->intent_p2)); // The number of composition
 
     /* Allocate a temporary gradient image to store the backward gradient */
-    nifti_image *tempGrad = nifti_dup(*voxelBasedMeasureGradient, false);
+    NiftiImage tempGrad(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfoAndAllocData);
 
     // Create all deformation field images needed for resampling
-    nifti_image **tempDef = (nifti_image**)malloc((compNum + 1) * sizeof(nifti_image*));
+    unique_ptr<NiftiImage[]> tempDef(new NiftiImage[compNum + 1]);
     for (size_t i = 0; i <= compNum; ++i)
-        tempDef[i] = nifti_dup(*deformationField, false);
+        tempDef[i] = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
 
     // Generate all intermediate deformation fields
-    reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef);
+    reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef.get());
 
     // Remove the affine component
-    nifti_image *affineDisp = nullptr;
+    NiftiImage affineDisp;
     if (affineTransformationBw) {
-        affineDisp = nifti_dup(*deformationField, false);
+        affineDisp = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
         reg_affine_getDeformationField(affineTransformationBw, affineDisp);
         reg_getDisplacementFromDeformation(affineDisp);
     }
@@ -325,25 +325,18 @@ void Compute::ExponentiateGradient(Content& conBwIn) {
     reg_tools_divideValueToImage(voxelBasedMeasureGradient, // in
                                  voxelBasedMeasureGradient, // out
                                  pow(2, compNum)); // value
-
-    for (size_t i = 0; i <= compNum; ++i)
-        nifti_image_free(tempDef[i]);
-    free(tempDef);
-    nifti_image_free(tempGrad);
-    if (affineDisp)
-        nifti_image_free(affineDisp);
 }
 /* *************************************************************** */
-nifti_image* Compute::ScaleGradient(const nifti_image& transformationGradient, float scale) {
-    nifti_image *scaledGradient = nifti_dup(transformationGradient, false);
-    reg_tools_multiplyValueToImage(&transformationGradient, scaledGradient, scale);
+NiftiImage Compute::ScaleGradient(const NiftiImage& transformationGradient, float scale) {
+    NiftiImage scaledGradient(transformationGradient, NiftiImage::Copy::ImageInfoAndAllocData);
+    reg_tools_multiplyValueToImage(transformationGradient, scaledGradient, scale);
     return scaledGradient;
 }
 /* *************************************************************** */
 void Compute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale);
-    nifti_image *controlPointGrid = con.GetControlPointGrid();
+    NiftiImage scaledGradient = ScaleGradient(con.GetTransformationGradient(), scale);
+    NiftiImage& controlPointGrid = con.GetControlPointGrid();
 
     // Reset the gradient along the axes if appropriate
     reg_setGradientToZero(scaledGradient, !optimiseX, !optimiseY, !optimiseZ);
@@ -352,36 +345,31 @@ void Compute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, b
     reg_tools_addImageToImage(controlPointGrid,  // in
                               scaledGradient,    // in
                               controlPointGrid); // out
-
-    nifti_image_free(scaledGradient);
 }
 /* *************************************************************** */
 void Compute::BchUpdate(float scale, int bchUpdateValue) {
     F3dContent& con = dynamic_cast<F3dContent&>(this->con);
-    nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale);
-    nifti_image *controlPointGrid = con.GetControlPointGrid();
-
+    NiftiImage scaledGradient = ScaleGradient(con.GetTransformationGradient(), scale);
+    NiftiImage& controlPointGrid = con.GetControlPointGrid();
     compute_BCH_update(controlPointGrid, scaledGradient, bchUpdateValue);
-
-    nifti_image_free(scaledGradient);
 }
 /* *************************************************************** */
 void Compute::SymmetriseVelocityFields(Content& conBwIn) {
-    nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(this->con).GetControlPointGrid();
-    nifti_image *controlPointGridBw = dynamic_cast<F3dContent&>(conBwIn).GetControlPointGrid();
+    NiftiImage& controlPointGrid = dynamic_cast<F3dContent&>(this->con).GetControlPointGrid();
+    NiftiImage& controlPointGridBw = dynamic_cast<F3dContent&>(conBwIn).GetControlPointGrid();
 
     // In order to ensure symmetry, the forward and backward velocity fields
     // are averaged in both image spaces: reference and floating
-    nifti_image *warpedTrans = nifti_dup(*controlPointGridBw, false);
-    nifti_image *warpedTransBw = nifti_dup(*controlPointGrid, false);
+    NiftiImage warpedTrans(controlPointGridBw, NiftiImage::Copy::ImageInfoAndAllocData);
+    NiftiImage warpedTransBw(controlPointGrid, NiftiImage::Copy::ImageInfoAndAllocData);
 
     // Both parametrisations are converted into displacement
     reg_getDisplacementFromDeformation(controlPointGrid);
     reg_getDisplacementFromDeformation(controlPointGridBw);
 
     // Both parametrisations are copied over
-    memcpy(warpedTransBw->data, controlPointGridBw->data, warpedTransBw->nvox * warpedTransBw->nbyper);
-    memcpy(warpedTrans->data, controlPointGrid->data, warpedTrans->nvox * warpedTrans->nbyper);
+    warpedTrans.copyData(controlPointGrid);
+    warpedTransBw.copyData(controlPointGridBw);
 
     // and subtracted (sum and negation)
     reg_tools_subtractImageFromImage(controlPointGridBw,  // displacement
@@ -402,19 +390,16 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) {
     // Convert the velocity field from displacement to deformation
     reg_getDeformationFromDisplacement(controlPointGrid);
     reg_getDeformationFromDisplacement(controlPointGridBw);
-
-    nifti_image_free(warpedTrans);
-    nifti_image_free(warpedTransBw);
 }
 /* *************************************************************** */
-void Compute::DefFieldCompose(const nifti_image *defField) {
+void Compute::DefFieldCompose(const NiftiImage& defField) {
     reg_defField_compose(defField, con.GetDeformationField(), nullptr);
 }
 /* *************************************************************** */
 NiftiImage Compute::ResampleGradient(int interpolation, float padding) {
     DefContent& con = dynamic_cast<DefContent&>(this->con);
-    nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
-    NiftiImage warpedImage = NiftiImage(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfoAndAllocData);
+    NiftiImage& voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient();
+    NiftiImage warpedImage(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfoAndAllocData);
     reg_resampleGradient(voxelBasedMeasureGradient, warpedImage, con.GetDeformationField(), interpolation, padding);
     return warpedImage;
 }
diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h
index fdf3e673..a8a8852d 100644
--- a/reg-lib/Compute.h
+++ b/reg-lib/Compute.h
@@ -39,11 +39,11 @@ class Compute {
 #ifdef NR_TESTING
 public:
 #endif
-    virtual void DefFieldCompose(const nifti_image *defField);
+    virtual void DefFieldCompose(const NiftiImage& defField);
     virtual NiftiImage ResampleGradient(int interpolation, float padding);
     virtual void VoxelCentricToNodeCentric(float weight);
 
 private:
-    void ConvolveImage(nifti_image*);
-    nifti_image* ScaleGradient(const nifti_image&, float);
+    void ConvolveImage(NiftiImage&);
+    NiftiImage ScaleGradient(const NiftiImage&, float);
 };
diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp
index b64a48b8..2b8fbcbd 100644
--- a/reg-lib/Content.cpp
+++ b/reg-lib/Content.cpp
@@ -2,63 +2,49 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-Content::Content(nifti_image *referenceIn,
-                 nifti_image *floatingIn,
+Content::Content(NiftiImage& referenceIn,
+                 NiftiImage& floatingIn,
                  int *referenceMaskIn,
                  mat44 *transformationMatrixIn,
                  size_t bytesIn):
-    reference(referenceIn),
-    floating(floatingIn),
+    reference(NiftiImage(referenceIn, NiftiImage::Copy::Acquire)),
+    floating(NiftiImage(floatingIn, NiftiImage::Copy::Acquire)),
     referenceMask(referenceMaskIn),
     transformationMatrix(transformationMatrixIn) {
     if (!referenceIn || !floatingIn)
         NR_FATAL_ERROR("referenceIn or floatingIn can't be nullptr");
     AllocateWarped();
     AllocateDeformationField(bytesIn);
-    activeVoxelNumber = NiftiImage::calcVoxelNumber(reference, 3);
+    activeVoxelNumber = reference.nVoxelsPerVolume();
     if (!referenceMask) {
         referenceMaskManaged.reset(new int[activeVoxelNumber]());
         referenceMask = referenceMaskManaged.get();
     }
 }
 /* *************************************************************** */
-Content::~Content() {
-    DeallocateWarped();
-    DeallocateDeformationField();
-}
-/* *************************************************************** */
 void Content::AllocateWarped() {
-    warped = nifti_copy_nim_info(reference);
-    warped->dim[0] = warped->ndim = floating->ndim;
-    warped->dim[4] = warped->nt = floating->nt;
-    warped->pixdim[4] = warped->dt = 1;
-    warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim);
+    warped = NiftiImage(reference, NiftiImage::Copy::ImageInfo);
+    warped.setDim(NiftiDim::NDim, floating->ndim);
+    warped.setDim(NiftiDim::T, floating->nt);
+    warped.setPixDim(NiftiDim::T, 1);
     warped->datatype = floating->datatype;
     warped->nbyper = floating->nbyper;
-    warped->data = calloc(warped->nvox, warped->nbyper);
-}
-/* *************************************************************** */
-void Content::DeallocateWarped() {
-    if (warped) {
-        nifti_image_free(warped);
-        warped = nullptr;
-    }
+    warped.realloc();
 }
 /* *************************************************************** */
 void Content::AllocateDeformationField(size_t bytes) {
-    deformationField = nifti_copy_nim_info(reference);
-    deformationField->dim[0] = deformationField->ndim = 5;
-    if (reference->dim[0] == 2)
-        deformationField->dim[3] = deformationField->nz = 1;
-    deformationField->dim[4] = deformationField->nt = 1;
-    deformationField->pixdim[4] = deformationField->dt = 1;
-    deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2;
-    deformationField->pixdim[5] = deformationField->du = 1;
-    deformationField->dim[6] = deformationField->nv = 1;
-    deformationField->pixdim[6] = deformationField->dv = 1;
-    deformationField->dim[7] = deformationField->nw = 1;
-    deformationField->pixdim[7] = deformationField->dw = 1;
-    deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim);
+    deformationField = NiftiImage(reference, NiftiImage::Copy::ImageInfo);
+    deformationField.setDim(NiftiDim::NDim, 5);
+    if (reference->ndim == 2)
+        deformationField.setDim(NiftiDim::Z, 1);
+    deformationField.setDim(NiftiDim::T, 1);
+    deformationField.setPixDim(NiftiDim::T, 1);
+    deformationField.setDim(NiftiDim::U, reference->nz > 1 ? 3 : 2);
+    deformationField.setPixDim(NiftiDim::U, 1);
+    deformationField.setDim(NiftiDim::V, 1);
+    deformationField.setPixDim(NiftiDim::V, 1);
+    deformationField.setDim(NiftiDim::W, 1);
+    deformationField.setPixDim(NiftiDim::W, 1);
     deformationField->nbyper = (int)bytes;
     if (bytes == 4)
         deformationField->datatype = NIFTI_TYPE_FLOAT32;
@@ -67,21 +53,13 @@ void Content::AllocateDeformationField(size_t bytes) {
     else
         NR_FATAL_ERROR("Only float or double are expected for the deformation field");
     deformationField->intent_code = NIFTI_INTENT_VECTOR;
-    memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name));
-    strcpy(deformationField->intent_name, "NREG_TRANS");
+    deformationField.setIntentName("NREG_TRANS"s);
     // First create a displacement field filled with 0 to obtain an identity disp
     deformationField->intent_p1 = DISP_FIELD;
     deformationField->scl_slope = 1;
     deformationField->scl_inter = 0;
-    deformationField->data = calloc(deformationField->nvox, deformationField->nbyper);
+    deformationField.realloc();
     // Convert to an identity deformation field
     reg_getDeformationFromDisplacement(deformationField);
 }
 /* *************************************************************** */
-void Content::DeallocateDeformationField() {
-    if (deformationField) {
-        nifti_image_free(deformationField);
-        deformationField = nullptr;
-    }
-}
-/* *************************************************************** */
diff --git a/reg-lib/Content.h b/reg-lib/Content.h
index 7beb9e4a..7dd16957 100644
--- a/reg-lib/Content.h
+++ b/reg-lib/Content.h
@@ -5,23 +5,23 @@
 class Content {
 public:
     Content() = delete; // Can't be initialised without reference and floating images
-    Content(nifti_image *referenceIn,
-            nifti_image *floatingIn,
+    Content(NiftiImage& referenceIn,
+            NiftiImage& floatingIn,
             int *referenceMaskIn = nullptr,
             mat44 *transformationMatrixIn = nullptr,
             size_t bytesIn = sizeof(float));
-    virtual ~Content();
+    virtual ~Content() = default;
 
     virtual bool IsCurrentComputationDoubleCapable() { return true; }
 
     // Getters
     virtual size_t GetActiveVoxelNumber() { return activeVoxelNumber; }
-    virtual nifti_image* GetReference() { return reference; }
-    virtual nifti_image* GetFloating() { return floating; }
-    virtual nifti_image* GetDeformationField() { return deformationField; }
+    virtual NiftiImage& GetReference() { return reference; }
+    virtual NiftiImage& GetFloating() { return floating; }
+    virtual NiftiImage& GetDeformationField() { return deformationField; }
     virtual int* GetReferenceMask() { return referenceMask; }
     virtual mat44* GetTransformationMatrix() { return transformationMatrix; }
-    virtual nifti_image* GetWarped() { return warped; }
+    virtual NiftiImage& GetWarped() { return warped; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateDeformationField() {}
@@ -37,19 +37,17 @@ class Content {
 
 protected:
     size_t activeVoxelNumber = 0;
-    nifti_image *reference = nullptr;
-    nifti_image *floating = nullptr;
-    nifti_image *deformationField = nullptr;
+    NiftiImage reference;
+    NiftiImage floating;
+    NiftiImage deformationField;
     int *referenceMask = nullptr;
     unique_ptr<int[]> referenceMaskManaged;
     mat44 *transformationMatrix = nullptr;
-    nifti_image *warped = nullptr;
+    NiftiImage warped;
 
 private:
     void AllocateWarped();
-    void DeallocateWarped();
     void AllocateDeformationField(size_t bytes);
-    void DeallocateDeformationField();
 
 #ifdef NR_TESTING
 public:
@@ -57,8 +55,8 @@ class Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) { DeallocateDeformationField(); deformationField = deformationFieldIn; }
+    virtual void SetDeformationField(NiftiImage&& deformationFieldIn) { deformationField = std::move(deformationFieldIn); }
     virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; }
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; }
-    virtual void SetWarped(nifti_image *warpedIn) { DeallocateWarped(); warped = warpedIn; }
+    virtual void SetWarped(NiftiImage&& warpedIn) { warped = std::move(warpedIn); }
 };
diff --git a/reg-lib/ContentCreator.h b/reg-lib/ContentCreator.h
index 050bdba8..52b586f8 100644
--- a/reg-lib/ContentCreator.h
+++ b/reg-lib/ContentCreator.h
@@ -4,8 +4,8 @@
 
 class ContentCreator {
 public:
-    virtual Content* Create(nifti_image *reference,
-                            nifti_image *floating,
+    virtual Content* Create(NiftiImage& reference,
+                            NiftiImage& floating,
                             int *referenceMask = nullptr,
                             mat44 *transformationMatrix = nullptr,
                             size_t bytes = sizeof(float)) {
diff --git a/reg-lib/DefContent.cpp b/reg-lib/DefContent.cpp
index 6885153e..b8fc2a57 100644
--- a/reg-lib/DefContent.cpp
+++ b/reg-lib/DefContent.cpp
@@ -2,66 +2,30 @@
 #include "_reg_resampling.h"
 
 /* *************************************************************** */
-DefContent::DefContent(nifti_image *referenceIn,
-                       nifti_image *floatingIn,
-                       nifti_image *localWeightSimIn,
+DefContent::DefContent(NiftiImage& referenceIn,
+                       NiftiImage& floatingIn,
+                       NiftiImage *localWeightSimIn,
                        int *referenceMaskIn,
                        mat44 *transformationMatrixIn,
                        size_t bytesIn):
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn) {
-    AllocateWarpedGradient();
-    AllocateVoxelBasedMeasureGradient();
-    AllocateLocalWeightSim(localWeightSimIn);
-}
-/* *************************************************************** */
-DefContent::~DefContent() {
-    DeallocateWarpedGradient();
-    DeallocateVoxelBasedMeasureGradient();
-    DeallocateLocalWeightSim();
-}
-/* *************************************************************** */
-void DefContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) {
-    if (!localWeightSimIn) return;
-    localWeightSim = nifti_copy_nim_info(reference);
-    localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0];
-    localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4];
-    localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5];
-    localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim);
-    localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper);
+    warpedGradient = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
+    voxelBasedMeasureGradient = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData);
+    if (localWeightSimIn && *localWeightSimIn)
+        AllocateLocalWeightSim(*localWeightSimIn);
+}
+/* *************************************************************** */
+void DefContent::AllocateLocalWeightSim(NiftiImage& localWeightSimIn) {
+    localWeightSim = NiftiImage(reference, NiftiImage::Copy::ImageInfo);
+    localWeightSim.setDim(NiftiDim::NDim, localWeightSimIn->dim[0]);
+    localWeightSim.setDim(NiftiDim::T, localWeightSimIn->dim[4]);
+    localWeightSim.setDim(NiftiDim::U, localWeightSimIn->dim[5]);
+    localWeightSim.realloc();
     reg_getDeformationFromDisplacement(voxelBasedMeasureGradient);
     reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0);
 }
 /* *************************************************************** */
-void DefContent::DeallocateLocalWeightSim() {
-    if (localWeightSim) {
-        nifti_image_free(localWeightSim);
-        localWeightSim = nullptr;
-    }
-}
-/* *************************************************************** */
-void DefContent::AllocateWarpedGradient() {
-    warpedGradient = nifti_dup(*deformationField, false);
-}
-/* *************************************************************** */
-void DefContent::DeallocateWarpedGradient() {
-    if (warpedGradient) {
-        nifti_image_free(warpedGradient);
-        warpedGradient = nullptr;
-    }
-}
-/* *************************************************************** */
-void DefContent::AllocateVoxelBasedMeasureGradient() {
-    voxelBasedMeasureGradient = nifti_dup(*deformationField, false);
-}
-/* *************************************************************** */
-void DefContent::DeallocateVoxelBasedMeasureGradient() {
-    if (voxelBasedMeasureGradient) {
-        nifti_image_free(voxelBasedMeasureGradient);
-        voxelBasedMeasureGradient = nullptr;
-    }
-}
-/* *************************************************************** */
 void DefContent::ZeroVoxelBasedMeasureGradient() {
-    memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper);
+    memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient.totalBytes());
 }
 /* *************************************************************** */
diff --git a/reg-lib/DefContent.h b/reg-lib/DefContent.h
index a5ccab6f..d6ee0313 100644
--- a/reg-lib/DefContent.h
+++ b/reg-lib/DefContent.h
@@ -5,18 +5,17 @@
 class DefContent: public virtual Content {
 public:
     DefContent() = delete;
-    DefContent(nifti_image *referenceIn,
-               nifti_image *floatingIn,
-               nifti_image *localWeightSimIn = nullptr,
+    DefContent(NiftiImage& referenceIn,
+               NiftiImage& floatingIn,
+               NiftiImage *localWeightSimIn = nullptr,
                int *referenceMaskIn = nullptr,
                mat44 *transformationMatrixIn = nullptr,
                size_t bytesIn = sizeof(float));
-    virtual ~DefContent();
 
     // Getters
-    virtual nifti_image* GetLocalWeightSim() { return localWeightSim; }
-    virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
-    virtual nifti_image* GetWarpedGradient() { return warpedGradient; }
+    virtual NiftiImage& GetLocalWeightSim() { return localWeightSim; }
+    virtual NiftiImage& GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; }
+    virtual NiftiImage& GetWarpedGradient() { return warpedGradient; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateVoxelBasedMeasureGradient() {}
@@ -26,15 +25,10 @@ class DefContent: public virtual Content {
     virtual void ZeroVoxelBasedMeasureGradient();
 
 protected:
-    nifti_image *localWeightSim = nullptr;
-    nifti_image *voxelBasedMeasureGradient = nullptr;
-    nifti_image *warpedGradient = nullptr;
+    NiftiImage localWeightSim;
+    NiftiImage voxelBasedMeasureGradient;
+    NiftiImage warpedGradient;
 
 private:
-    void AllocateLocalWeightSim(nifti_image*);
-    void DeallocateLocalWeightSim();
-    void AllocateVoxelBasedMeasureGradient();
-    void DeallocateVoxelBasedMeasureGradient();
-    void AllocateWarpedGradient();
-    void DeallocateWarpedGradient();
+    void AllocateLocalWeightSim(NiftiImage&);
 };
diff --git a/reg-lib/DefContentCreator.h b/reg-lib/DefContentCreator.h
index dce3ba86..e8302616 100644
--- a/reg-lib/DefContentCreator.h
+++ b/reg-lib/DefContentCreator.h
@@ -5,9 +5,9 @@
 
 class DefContentCreator: public ContentCreator {
 public:
-    virtual DefContent* Create(nifti_image *reference,
-                               nifti_image *floating,
-                               nifti_image *localWeightSim = nullptr,
+    virtual DefContent* Create(NiftiImage& reference,
+                               NiftiImage& floating,
+                               NiftiImage *localWeightSim = nullptr,
                                int *referenceMask = nullptr,
                                mat44 *transformationMatrix = nullptr,
                                size_t bytes = sizeof(float)) {
diff --git a/reg-lib/F3d2ContentCreator.h b/reg-lib/F3d2ContentCreator.h
index 106b5ede..6141d6bb 100644
--- a/reg-lib/F3d2ContentCreator.h
+++ b/reg-lib/F3d2ContentCreator.h
@@ -5,11 +5,11 @@
 
 class F3d2ContentCreator: public ContentCreator {
 public:
-    virtual std::pair<F3dContent*, F3dContent*> Create(nifti_image *reference,
-                                                       nifti_image *floating,
-                                                       nifti_image *controlPointGrid,
-                                                       nifti_image *controlPointGridBw,
-                                                       nifti_image *localWeightSim = nullptr,
+    virtual std::pair<F3dContent*, F3dContent*> Create(NiftiImage& reference,
+                                                       NiftiImage& floating,
+                                                       NiftiImage& controlPointGrid,
+                                                       NiftiImage& controlPointGridBw,
+                                                       NiftiImage *localWeightSim = nullptr,
                                                        int *referenceMask = nullptr,
                                                        int *floatingMask = nullptr,
                                                        mat44 *transformationMatrix = nullptr,
diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp
index 6dee6030..e0a36c6d 100644
--- a/reg-lib/F3dContent.cpp
+++ b/reg-lib/F3dContent.cpp
@@ -1,37 +1,22 @@
 #include "F3dContent.h"
 
 /* *************************************************************** */
-F3dContent::F3dContent(nifti_image *referenceIn,
-                       nifti_image *floatingIn,
-                       nifti_image *controlPointGridIn,
-                       nifti_image *localWeightSimIn,
+F3dContent::F3dContent(NiftiImage& referenceIn,
+                       NiftiImage& floatingIn,
+                       NiftiImage& controlPointGridIn,
+                       NiftiImage *localWeightSimIn,
                        int *referenceMaskIn,
                        mat44 *transformationMatrixIn,
                        size_t bytesIn):
     DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, bytesIn),
     Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn),
-    controlPointGrid(controlPointGridIn) {
+    controlPointGrid(NiftiImage(controlPointGridIn, NiftiImage::Copy::Acquire)) {
     if (!controlPointGridIn)
         NR_FATAL_ERROR("controlPointGridIn can't be nullptr");
-    AllocateTransformationGradient();
-}
-/* *************************************************************** */
-F3dContent::~F3dContent() {
-    DeallocateTransformationGradient();
-}
-/* *************************************************************** */
-void F3dContent::AllocateTransformationGradient() {
-    transformationGradient = nifti_dup(*controlPointGrid, false);
-}
-/* *************************************************************** */
-void F3dContent::DeallocateTransformationGradient() {
-    if (transformationGradient != nullptr) {
-        nifti_image_free(transformationGradient);
-        transformationGradient = nullptr;
-    }
+    transformationGradient = NiftiImage(controlPointGrid, NiftiImage::Copy::ImageInfoAndAllocData);
 }
 /* *************************************************************** */
 void F3dContent::ZeroTransformationGradient() {
-    memset(transformationGradient->data, 0, transformationGradient->nvox * transformationGradient->nbyper);
+    memset(transformationGradient->data, 0, transformationGradient.totalBytes());
 }
 /* *************************************************************** */
diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h
index f09157c0..c36f5634 100644
--- a/reg-lib/F3dContent.h
+++ b/reg-lib/F3dContent.h
@@ -5,18 +5,17 @@
 class F3dContent: public virtual DefContent {
 public:
     F3dContent() = delete;
-    F3dContent(nifti_image *referenceIn,
-               nifti_image *floatingIn,
-               nifti_image *controlPointGridIn,
-               nifti_image *localWeightSimIn = nullptr,
+    F3dContent(NiftiImage& referenceIn,
+               NiftiImage& floatingIn,
+               NiftiImage& controlPointGridIn,
+               NiftiImage *localWeightSimIn = nullptr,
                int *referenceMaskIn = nullptr,
                mat44 *transformationMatrixIn = nullptr,
                size_t bytesIn = sizeof(float));
-    virtual ~F3dContent();
 
     // Getters
-    virtual nifti_image* GetControlPointGrid() { return controlPointGrid; }
-    virtual nifti_image* GetTransformationGradient() { return transformationGradient; }
+    virtual NiftiImage& GetControlPointGrid() { return controlPointGrid; }
+    virtual NiftiImage& GetTransformationGradient() { return transformationGradient; }
 
     // Methods for transferring data from nifti to device
     virtual void UpdateControlPointGrid() {}
@@ -26,10 +25,6 @@ class F3dContent: public virtual DefContent {
     virtual void ZeroTransformationGradient();
 
 protected:
-    nifti_image *controlPointGrid = nullptr;
-    nifti_image *transformationGradient = nullptr;
-
-private:
-    void AllocateTransformationGradient();
-    void DeallocateTransformationGradient();
+    NiftiImage controlPointGrid;
+    NiftiImage transformationGradient;
 };
\ No newline at end of file
diff --git a/reg-lib/F3dContentCreator.h b/reg-lib/F3dContentCreator.h
index d57657b0..2ee586dc 100644
--- a/reg-lib/F3dContentCreator.h
+++ b/reg-lib/F3dContentCreator.h
@@ -5,10 +5,10 @@
 
 class F3dContentCreator: public ContentCreator {
 public:
-    virtual F3dContent* Create(nifti_image *reference,
-                               nifti_image *floating,
-                               nifti_image *controlPointGrid,
-                               nifti_image *localWeightSim = nullptr,
+    virtual F3dContent* Create(NiftiImage& reference,
+                               NiftiImage& floating,
+                               NiftiImage& controlPointGrid,
+                               NiftiImage *localWeightSim = nullptr,
                                int *referenceMask = nullptr,
                                mat44 *transformationMatrix = nullptr,
                                size_t bytes = sizeof(float)) {
diff --git a/reg-lib/MeasureCreator.cpp b/reg-lib/MeasureCreator.cpp
index 6ff56f71..473f8359 100644
--- a/reg-lib/MeasureCreator.cpp
+++ b/reg-lib/MeasureCreator.cpp
@@ -38,8 +38,8 @@ void MeasureCreator::Initialise(reg_measure& measure, DefContent& con, DefConten
                               con.GetVoxelBasedMeasureGradient(),
                               con.GetLocalWeightSim(),
                               conBw ? conBw->GetReferenceMask() : nullptr,
-                              conBw ? conBw->GetWarped() : nullptr,
-                              conBw ? conBw->GetWarpedGradient() : nullptr,
-                              conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr);
+                              conBw ? static_cast<nifti_image*>(conBw->GetWarped()) : nullptr,
+                              conBw ? static_cast<nifti_image*>(conBw->GetWarpedGradient()) : nullptr,
+                              conBw ? static_cast<nifti_image*>(conBw->GetVoxelBasedMeasureGradient()) : nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp
index 482089fa..8a88df01 100755
--- a/reg-lib/Platform.cpp
+++ b/reg-lib/Platform.cpp
@@ -119,27 +119,27 @@ Optimiser<Type>* Platform::CreateOptimiser(F3dContent& con,
                                            F3dContent *conBw) const {
     Optimiser<Type> *optimiser;
     nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    nifti_image *controlPointGridBw = conBw ? conBw->F3dContent::GetControlPointGrid() : nullptr;
+    nifti_image *controlPointGridBw = conBw ? static_cast<nifti_image*>(conBw->F3dContent::GetControlPointGrid()) : nullptr;
     Type *controlPointGridData, *transformationGradientData;
     Type *controlPointGridDataBw = nullptr, *transformationGradientDataBw = nullptr;
 
     if (platformType == PlatformType::Cpu) {
         optimiser = useConjGradient ? new ConjugateGradient<Type>() : new Optimiser<Type>();
-        controlPointGridData = (Type*)controlPointGrid->data;
-        transformationGradientData = (Type*)con.GetTransformationGradient()->data;
+        controlPointGridData = static_cast<Type*>(controlPointGrid->data);
+        transformationGradientData = static_cast<Type*>(con.GetTransformationGradient()->data);
         if (conBw) {
-            controlPointGridDataBw = (Type*)controlPointGridBw->data;
-            transformationGradientDataBw = (Type*)conBw->GetTransformationGradient()->data;
+            controlPointGridDataBw = static_cast<Type*>(controlPointGridBw->data);
+            transformationGradientDataBw = static_cast<Type*>(conBw->GetTransformationGradient()->data);
         }
     }
 #ifdef USE_CUDA
     else if (platformType == PlatformType::Cuda) {
         optimiser = dynamic_cast<Optimiser<Type>*>(useConjGradient ? new CudaConjugateGradient() : new CudaOptimiser());
-        controlPointGridData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda();
-        transformationGradientData = (Type*)dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda();
+        controlPointGridData = reinterpret_cast<Type*>(dynamic_cast<CudaF3dContent&>(con).GetControlPointGridCuda());
+        transformationGradientData = reinterpret_cast<Type*>(dynamic_cast<CudaF3dContent&>(con).GetTransformationGradientCuda());
         if (conBw) {
-            controlPointGridDataBw = (Type*)dynamic_cast<CudaF3dContent*>(conBw)->GetControlPointGridCuda();
-            transformationGradientDataBw = (Type*)dynamic_cast<CudaF3dContent*>(conBw)->GetTransformationGradientCuda();
+            controlPointGridDataBw = reinterpret_cast<Type*>(dynamic_cast<CudaF3dContent*>(conBw)->GetControlPointGridCuda());
+            transformationGradientDataBw = reinterpret_cast<Type*>(dynamic_cast<CudaF3dContent*>(conBw)->GetTransformationGradientCuda());
         }
     }
 #endif
diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp
index 032aeb97..959c7470 100644
--- a/reg-lib/_reg_aladin.cpp
+++ b/reg-lib/_reg_aladin.cpp
@@ -287,8 +287,8 @@ void reg_aladin<T>::UpdateTransformationMatrix(int type) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_aladin<T>::InitAladinContent(nifti_image *ref,
-                                      nifti_image *flo,
+void reg_aladin<T>::InitAladinContent(NiftiImage& ref,
+                                      NiftiImage& flo,
                                       int *mask,
                                       mat44 *transMat,
                                       size_t bytes,
@@ -384,7 +384,7 @@ NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
 
     reg_aladin<T>::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation
 
-    NiftiImage warpedImage(this->con->GetWarped(), NiftiImage::Copy::Image);
+    NiftiImage warpedImage(this->con->GetWarped());
     warpedImage->cal_min = this->inputFloating->cal_min;
     warpedImage->cal_max = this->inputFloating->cal_max;
     warpedImage->scl_slope = this->inputFloating->scl_slope;
@@ -397,8 +397,8 @@ NiftiImage reg_aladin<T>::GetFinalWarpedImage() {
 /* *************************************************************** */
 template<class T>
 void reg_aladin<T>::DebugPrintLevelInfoStart() {
-    const nifti_image *ref = this->con->Content::GetReference();
-    const nifti_image *flo = this->con->Content::GetFloating();
+    const NiftiImage& ref = this->con->Content::GetReference();
+    const NiftiImage& flo = this->con->Content::GetFloating();
     NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels);
     NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" <<
                ref->dx << "x" << ref->dy << "x" << ref->dz << " mm");
diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h
index 9096688d..17f544b1 100644
--- a/reg-lib/_reg_aladin.h
+++ b/reg-lib/_reg_aladin.h
@@ -117,8 +117,8 @@ class reg_aladin {
     void *paramsProgressCallback;
 
     //platform factory methods
-    virtual void InitAladinContent(nifti_image *ref,
-                                   nifti_image *flo,
+    virtual void InitAladinContent(NiftiImage& ref,
+                                   NiftiImage& flo,
                                    int *mask,
                                    mat44 *transMat,
                                    size_t bytes,
diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp
index 62cdd753..2fa2ff18 100644
--- a/reg-lib/_reg_aladin_sym.cpp
+++ b/reg-lib/_reg_aladin_sym.cpp
@@ -157,8 +157,8 @@ void reg_aladin_sym<T>::UpdateTransformationMatrix(int type) {
 }
 /* *************************************************************** */
 template <class T>
-void reg_aladin_sym<T>::InitAladinContent(nifti_image *ref,
-                                          nifti_image *flo,
+void reg_aladin_sym<T>::InitAladinContent(NiftiImage& ref,
+                                          NiftiImage& flo,
                                           int *mask,
                                           mat44 *transMat,
                                           size_t bytes,
@@ -203,8 +203,8 @@ void reg_aladin_sym<T>::DeallocateKernels() {
 /* *************************************************************** */
 template <class T>
 void reg_aladin_sym<T>::DebugPrintLevelInfoStart() {
-    const nifti_image *ref = this->con->Content::GetReference();
-    const nifti_image *flo = this->con->Content::GetFloating();
+    const NiftiImage& ref = this->con->Content::GetReference();
+    const NiftiImage& flo = this->con->Content::GetFloating();
     NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels);
     NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" <<
                ref->dx << "x" << ref->dy << "x" << ref->dz << " mm");
diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h
index 6da18e76..028c5cb1 100644
--- a/reg-lib/_reg_aladin_sym.h
+++ b/reg-lib/_reg_aladin_sym.h
@@ -21,8 +21,8 @@ class reg_aladin_sym: public reg_aladin<T> {
     unique_ptr<AladinContent> backCon;
     unique_ptr<Kernel> bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bLtsKernel, bResamplingKernel;
 
-    virtual void InitAladinContent(nifti_image *ref,
-                                   nifti_image *flo,
+    virtual void InitAladinContent(NiftiImage& ref,
+                                   NiftiImage& flo,
                                    int *mask,
                                    mat44 *transMat,
                                    size_t bytes,
diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp
index 1f005525..8aa5f57b 100644
--- a/reg-lib/_reg_f3d.cpp
+++ b/reg-lib/_reg_f3d.cpp
@@ -82,29 +82,29 @@ void reg_f3d<T>::SetSpacing(unsigned i, T s) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d<T>::InitContent(nifti_image *reference, nifti_image *floating, int *mask) {
+void reg_f3d<T>::InitContent(NiftiImage& reference, NiftiImage& floating, int *mask) {
     unique_ptr<F3dContentCreator> contentCreator{ dynamic_cast<F3dContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d)) };
-    this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation.get(), sizeof(T)));
+    this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, &this->localWeightSimInput, mask, this->affineTransformation.get(), sizeof(T)));
     this->compute.reset(this->platform->CreateCompute(*this->con));
 }
 /* *************************************************************** */
 template<class T>
 T reg_f3d<T>::InitCurrentLevel(int currentLevel) {
     // Set the current input images
-    nifti_image *reference, *floating;
+    NiftiImage reference, floating;
     int *mask;
     if (currentLevel < 0) {
         // Settings for GetWarpedImage()
         // Use CPU for warping since CUDA isn't supporting Cubic interpolation
         // TODO Remove this when CUDA supports Cubic interpolation
         this->SetPlatformType(PlatformType::Cpu);
-        reference = this->inputReference;
-        floating = this->inputFloating;
+        reference = NiftiImage(this->inputReference, NiftiImage::Copy::Acquire);
+        floating = NiftiImage(this->inputFloating, NiftiImage::Copy::Acquire);
         mask = nullptr;
     } else {
         const int index = this->usePyramid ? currentLevel : 0;
-        reference = this->referencePyramid[index];
-        floating = this->floatingPyramid[index];
+        reference = NiftiImage(this->referencePyramid[index], NiftiImage::Copy::Acquire);
+        floating = NiftiImage(this->floatingPyramid[index], NiftiImage::Copy::Acquire);
         mask = this->maskPyramid[index].get();
     }
 
@@ -411,8 +411,8 @@ T reg_f3d<T>::NormaliseGradient() {
 /* *************************************************************** */
 template<class T>
 void reg_f3d<T>::DisplayCurrentLevelParameters(int currentLevel) {
-    const nifti_image *reference = this->con->Content::GetReference();
-    const nifti_image *floating = this->con->Content::GetFloating();
+    const NiftiImage& reference = this->con->Content::GetReference();
+    const NiftiImage& floating = this->con->Content::GetFloating();
     NR_VERBOSE("Current level: " << currentLevel + 1 << " / " << this->levelNumber);
     NR_VERBOSE("Maximum iteration number: " << this->maxIterationNumber);
     NR_VERBOSE("Current reference image");
@@ -507,11 +507,11 @@ vector<NiftiImage> reg_f3d<T>::GetWarpedImage() {
 
     InitCurrentLevel(-1);
     this->WarpFloatingImage(3); // cubic spline interpolation
-    NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), NiftiImage::Copy::Image);
+    NiftiImage warpedImage = std::move(this->con->GetWarped());
     DeinitCurrentLevel(-1);
 
     NR_FUNC_CALLED();
-    return { warpedImage };
+    return { std::move(warpedImage) };
 }
 /* *************************************************************** */
 template<class T>
diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h
index a7a793ca..e13fbe1f 100644
--- a/reg-lib/_reg_f3d.h
+++ b/reg-lib/_reg_f3d.h
@@ -33,7 +33,7 @@ class reg_f3d: public reg_base<T> {
     double bestWBE;
     double bestWLE;
 
-    void InitContent(nifti_image*, nifti_image*, int*);
+    void InitContent(NiftiImage&, NiftiImage&, int*);
     virtual T InitCurrentLevel(int) override;
     virtual void DeinitCurrentLevel(int) override;
     virtual T NormaliseGradient() override;
diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp
index eaa7a6f0..47dae410 100644
--- a/reg-lib/_reg_f3d2.cpp
+++ b/reg-lib/_reg_f3d2.cpp
@@ -38,10 +38,10 @@ void reg_f3d2<T>::SetInverseConsistencyWeight(T w) {
 }
 /* *************************************************************** */
 template<class T>
-void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int *referenceMask, int *floatingMask) {
+void reg_f3d2<T>::InitContent(NiftiImage& reference, NiftiImage& floating, int *referenceMask, int *floatingMask) {
     unique_ptr<F3d2ContentCreator> contentCreator{ dynamic_cast<F3d2ContentCreator*>(this->platform->CreateContentCreator(ContentType::F3d2)) };
     auto&& [con, conBw] = contentCreator->Create(reference, floating, this->controlPointGrid, controlPointGridBw,
-                                                 this->localWeightSimInput, referenceMask, floatingMask,
+                                                 &this->localWeightSimInput, referenceMask, floatingMask,
                                                  this->affineTransformation.get(), affineTransformationBw.get(), sizeof(T));
     this->con.reset(con);
     this->conBw.reset(conBw);
@@ -52,21 +52,21 @@ void reg_f3d2<T>::InitContent(nifti_image *reference, nifti_image *floating, int
 template <class T>
 T reg_f3d2<T>::InitCurrentLevel(int currentLevel) {
     // Set the current input images
-    nifti_image *reference, *floating;
+    NiftiImage reference, floating;
     int *referenceMask, *floatingMask;
     if (currentLevel < 0) {
         // Settings for GetWarpedImage()
         // Use CPU for warping since CUDA isn't supporting Cubic interpolation
         // TODO Remove this when CUDA supports Cubic interpolation
         this->SetPlatformType(PlatformType::Cpu);
-        reference = this->inputReference;
-        floating = this->inputFloating;
+        reference = NiftiImage(this->inputReference, NiftiImage::Copy::Acquire);
+        floating = NiftiImage(this->inputFloating, NiftiImage::Copy::Acquire);
         referenceMask = nullptr;
         floatingMask = nullptr;
     } else {
         const int index = this->usePyramid ? currentLevel : 0;
-        reference = this->referencePyramid[index];
-        floating = this->floatingPyramid[index];
+        reference = NiftiImage(this->referencePyramid[index], NiftiImage::Copy::Acquire);
+        floating = NiftiImage(this->floatingPyramid[index], NiftiImage::Copy::Acquire);
         referenceMask = this->maskPyramid[index].get();
         floatingMask = floatingMaskPyramid[index].get();
     }
@@ -666,15 +666,12 @@ vector<NiftiImage> reg_f3d2<T>::GetWarpedImage() {
     WarpFloatingImage(3); // cubic spline interpolation
 
     F3dContent& con = dynamic_cast<F3dContent&>(*this->con);
-    vector<NiftiImage> warpedImage{
-        NiftiImage(con.GetWarped(), NiftiImage::Copy::Image),
-        NiftiImage(conBw->GetWarped(), NiftiImage::Copy::Image)
-    };
+    vector<NiftiImage> warpedImages{ std::move(con.GetWarped()), std::move(conBw->GetWarped()) };
 
     DeinitCurrentLevel(-1);
 
     NR_FUNC_CALLED();
-    return warpedImage;
+    return warpedImages;
 }
 /* *************************************************************** */
 template class reg_f3d2<float>;
diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h
index c11c857e..12f83917 100644
--- a/reg-lib/_reg_f3d2.h
+++ b/reg-lib/_reg_f3d2.h
@@ -55,7 +55,7 @@ class reg_f3d2: public reg_f3d<T> {
     virtual void PrintCurrentObjFunctionValue(T) override;
     virtual void UpdateBestObjFunctionValue() override;
     virtual double GetObjectiveFunctionValue() override;
-    void InitContent(nifti_image*, nifti_image*, int*, int*);
+    void InitContent(NiftiImage&, NiftiImage&, int*, int*);
     virtual T InitCurrentLevel(int) override;
     virtual void DeinitCurrentLevel(int) override;
     virtual void UpdateParameters(float) override;
diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp
index 45fac34e..62456523 100644
--- a/reg-lib/cl/ClAladinContent.cpp
+++ b/reg-lib/cl/ClAladinContent.cpp
@@ -2,8 +2,8 @@
 #include "_reg_tools.h"
 
 /* *************************************************************** */
-ClAladinContent::ClAladinContent(nifti_image *referenceIn,
-                                 nifti_image *floatingIn,
+ClAladinContent::ClAladinContent(NiftiImage& referenceIn,
+                                 NiftiImage& floatingIn,
                                  int *referenceMaskIn,
                                  mat44 *transformationMatrixIn,
                                  size_t bytesIn,
@@ -36,29 +36,28 @@ void ClAladinContent::InitVars() {
     totalBlockClmem = nullptr;
     maskClmem = nullptr;
 
-    if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32)
+    if (reference && reference->nbyper != NIFTI_TYPE_FLOAT32)
         reg_tools_changeDatatype<float>(reference);
-    if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) {
+    if (floating && floating->nbyper != NIFTI_TYPE_FLOAT32) {
         reg_tools_changeDatatype<float>(floating);
-        if (warped != nullptr)
+        if (warped)
             reg_tools_changeDatatype<float>(warped);
     }
     sContext = &ClContextSingleton::GetInstance();
     clContext = sContext->GetContext();
     commandQueue = sContext->GetCommandQueue();
-    //numBlocks = (blockMatchingParams != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
 }
 /* *************************************************************** */
 void ClAladinContent::AllocateClPtrs() {
-    if (warped != nullptr) {
+    if (warped) {
         warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
         sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): ");
     }
-    if (deformationField != nullptr) {
+    if (deformationField) {
         deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * deformationField->nvox, deformationField->data, &errNum);
         sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): ");
     }
-    if (floating != nullptr) {
+    if (floating) {
         floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * floating->nvox, floating->data, &errNum);
         sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): ");
 
@@ -68,7 +67,7 @@ void ClAladinContent::AllocateClPtrs() {
         sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): ");
         free(sourceIJKMatrix_h);
     }
-    if (reference != nullptr) {
+    if (reference) {
         referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                              sizeof(float) * reference->nvox,
                                              reference->data, &errNum);
@@ -80,22 +79,22 @@ void ClAladinContent::AllocateClPtrs() {
         sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): ");
         free(targetMat);
     }
-    if (blockMatchingParams != nullptr) {
-        if (blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams) {
+        if (blockMatchingParams->referencePosition) {
             //targetPositionClmem
             referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                                     blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
                                                     blockMatchingParams->referencePosition, &errNum);
             sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): ");
         }
-        if (blockMatchingParams->warpedPosition != nullptr) {
+        if (blockMatchingParams->warpedPosition) {
             //resultPositionClmem
             warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                                  blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float),
                                                  blockMatchingParams->warpedPosition, &errNum);
             sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): ");
         }
-        if (blockMatchingParams->totalBlock != nullptr) {
+        if (blockMatchingParams->totalBlock) {
             //totalBlockClmem
             totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                              blockMatchingParams->totalBlockNumber * sizeof(int),
@@ -103,19 +102,19 @@ void ClAladinContent::AllocateClPtrs() {
             sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): ");
         }
     }
-    if (referenceMask != nullptr && reference != nullptr) {
+    if (referenceMask && reference) {
         maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                   NiftiImage::calcVoxelNumber(reference, 3) * sizeof(int), referenceMask, &errNum);
+                                   reference.nVoxelsPerVolume() * sizeof(int), referenceMask, &errNum);
         sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): ");
     }
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetWarped() {
+NiftiImage& ClAladinContent::GetWarped() {
     DownloadImage(warped, warpedImageClmem, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
-nifti_image* ClAladinContent::GetDeformationField() {
+NiftiImage& ClAladinContent::GetDeformationField() {
     errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, deformationField->nvox * sizeof(float), deformationField->data, 0, nullptr, nullptr); //CLCONTEXT
     sContext->CheckErrNum(errNum, "Get: failed deformationField: ");
     return deformationField;
@@ -133,48 +132,48 @@ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
 }
 /* *************************************************************** */
-void ClAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
-    if (deformationField != nullptr)
+void ClAladinContent::SetDeformationField(NiftiImage&& deformationFieldIn) {
+    if (deformationField)
         clReleaseMemObject(deformationFieldClmem);
 
-    AladinContent::SetDeformationField(deformationFieldIn);
+    AladinContent::SetDeformationField(std::move(deformationFieldIn));
     deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, deformationField->nvox * sizeof(float), deformationField->data, &errNum);
     sContext->CheckErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetReferenceMask(int *referenceMaskIn) {
-    if (referenceMask != nullptr)
+    if (referenceMask)
         clReleaseMemObject(maskClmem);
     AladinContent::SetReferenceMask(referenceMaskIn);
     maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, reference->nvox * sizeof(int), referenceMask, &errNum);
     sContext->CheckErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): ");
 }
 /* *************************************************************** */
-void ClAladinContent::SetWarped(nifti_image *warpedIn) {
+void ClAladinContent::SetWarped(NiftiImage&& warpedIn) {
     if (warpedIn->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(warpedIn);
-    if (warped != nullptr)
+        warpedIn.changeDatatype(NIFTI_TYPE_FLOAT32);
+    if (warped)
         clReleaseMemObject(warpedImageClmem);
-    AladinContent::SetWarped(warpedIn);
-    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warpedIn->nvox * sizeof(float), warpedIn->data, &errNum);
+    AladinContent::SetWarped(std::move(warpedIn));
+    warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum);
     sContext->CheckErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): ");
 }
 /* *************************************************************** */
 void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
-    if (blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams->referencePosition) {
         clReleaseMemObject(referencePositionClmem);
         //referencePositionClmem
         referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum);
         sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): ");
     }
-    if (blockMatchingParams->warpedPosition != nullptr) {
+    if (blockMatchingParams->warpedPosition) {
         clReleaseMemObject(warpedPositionClmem);
         //warpedPositionClmem
         warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum);
         sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): ");
     }
-    if (blockMatchingParams->totalBlock != nullptr) {
+    if (blockMatchingParams->totalBlock) {
         clReleaseMemObject(totalBlockClmem);
         //totalBlockClmem
         totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum);
@@ -222,8 +221,7 @@ cl_mem ClAladinContent::GetFloMatClmem() {
     return floMatClmem;
 }
 /* *************************************************************** */
-template<typename DataType>
-void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int datatype) {
+void ClAladinContent::DownloadImage(NiftiImage& image, cl_mem memoryObject, int datatype) {
     const size_t size = image->nvox;
     unique_ptr<float[]> buffer(new float[size]);
 
@@ -231,62 +229,33 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int
                                  size * sizeof(float), buffer.get(), 0, nullptr, nullptr);
     sContext->CheckErrNum(errNum, "Error reading warped buffer.");
 
-    free(image->data);
-    image->datatype = datatype;
-    image->nbyper = sizeof(DataType);
-    image->data = malloc(size * image->nbyper);
-    DataType *data = static_cast<DataType*>(image->data);
-    for (size_t i = 0; i < size; ++i)
-        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
-}
-/* *************************************************************** */
-void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        FillImageData<float>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        FillImageData<double>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT8:
-        FillImageData<unsigned char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT8:
-        FillImageData<char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT16:
-        FillImageData<unsigned short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT16:
-        FillImageData<short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT32:
-        FillImageData<int>(image, memoryObject, datatype);
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported type");
-    }
+    std::visit([&](auto&& dataType) {
+        using DataType = std::decay_t<decltype(dataType)>;
+        image->datatype = datatype;
+        image->nbyper = sizeof(DataType);
+        image.realloc();
+        DataType *data = static_cast<DataType*>(image->data);
+        for (size_t i = 0; i < size; ++i)
+            data[i] = static_cast<DataType>(image.clampData(buffer[i]));
+    }, image.getDataType());
 }
 /* *************************************************************** */
 void ClAladinContent::FreeClPtrs() {
-    if (reference != nullptr) {
+    if (reference) {
         clReleaseMemObject(referenceImageClmem);
         clReleaseMemObject(refMatClmem);
     }
-    if (floating != nullptr) {
+    if (floating) {
         clReleaseMemObject(floatingImageClmem);
         clReleaseMemObject(floMatClmem);
     }
-    if (warped != nullptr)
+    if (warped)
         clReleaseMemObject(warpedImageClmem);
-    if (deformationField != nullptr)
+    if (deformationField)
         clReleaseMemObject(deformationFieldClmem);
-    if (referenceMask != nullptr)
+    if (referenceMask)
         clReleaseMemObject(maskClmem);
-    if (blockMatchingParams != nullptr) {
+    if (blockMatchingParams) {
         clReleaseMemObject(totalBlockClmem);
         clReleaseMemObject(referencePositionClmem);
         clReleaseMemObject(warpedPositionClmem);
diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h
index 7de5039b..3a76552b 100644
--- a/reg-lib/cl/ClAladinContent.h
+++ b/reg-lib/cl/ClAladinContent.h
@@ -12,8 +12,8 @@
 class ClAladinContent: public AladinContent {
 public:
     //constructors
-    ClAladinContent(nifti_image *referenceIn,
-                    nifti_image *floatingIn,
+    ClAladinContent(NiftiImage& referenceIn,
+                    NiftiImage& floatingIn,
                     int *referenceMaskIn = nullptr,
                     mat44 *transformationMatrixIn = nullptr,
                     size_t bytesIn = sizeof(float),
@@ -38,8 +38,8 @@ class ClAladinContent: public AladinContent {
 
     // CPU getters with data downloaded from device
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
+    virtual NiftiImage& GetDeformationField() override;
+    virtual NiftiImage& GetWarped() override;
 
 private:
     void InitVars();
@@ -62,8 +62,7 @@ class ClAladinContent: public AladinContent {
     cl_mem refMatClmem;
     cl_mem floMatClmem;
 
-    template<typename DataType> void FillImageData(nifti_image *image, cl_mem memoryObject, int datatype);
-    void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype);
+    void DownloadImage(NiftiImage& image, cl_mem memoryObject, int datatype);
 
 #ifdef NR_TESTING
 public:
@@ -72,8 +71,8 @@ class ClAladinContent: public AladinContent {
 #endif
     // Functions for testing
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetWarped(NiftiImage&& warpedIn) override;
+    virtual void SetDeformationField(NiftiImage&& deformationFieldIn) override;
     virtual void SetReferenceMask(int *referenceMaskIn) override;
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override;
 };
diff --git a/reg-lib/cl/ClAladinContentCreator.h b/reg-lib/cl/ClAladinContentCreator.h
index 84442142..ed688de7 100644
--- a/reg-lib/cl/ClAladinContentCreator.h
+++ b/reg-lib/cl/ClAladinContentCreator.h
@@ -5,8 +5,8 @@
 
 class ClAladinContentCreator: public AladinContentCreator {
 public:
-    virtual AladinContent* Create(nifti_image *reference,
-                                  nifti_image *floating,
+    virtual AladinContent* Create(NiftiImage& reference,
+                                  NiftiImage& floating,
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp
index 06002aa9..1553d36e 100644
--- a/reg-lib/cl/ClBlockMatchingKernel.cpp
+++ b/reg-lib/cl/ClBlockMatchingKernel.cpp
@@ -57,7 +57,6 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern
    //get cpu ptrs
    reference = con->AladinContent::GetReference();
    params = con->AladinContent::GetBlockMatchingParams();
-
 }
 /* *************************************************************** */
 void ClBlockMatchingKernel::Calculate() {
diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp
index 1544e9d5..d723e023 100644
--- a/reg-lib/cpu/CpuResampleImageKernel.cpp
+++ b/reg-lib/cpu/CpuResampleImageKernel.cpp
@@ -13,7 +13,7 @@ CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKe
 void CpuResampleImageKernel::Calculate(int interp,
                                        float paddingValue,
                                        bool *dtiTimePoint,
-                                       mat33 * jacMat) {
+                                       mat33 *jacMat) {
     reg_resampleImage(floatingImage,
                       warpedImage,
                       deformationField,
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index d070bee1..418b310d 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -3613,15 +3613,14 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
     } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation");
 }
 /* *************************************************************** */
-void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
-                                                   nifti_image **deformationField) {
+void reg_spline_getIntermediateDefFieldFromVelGrid(NiftiImage& velocityFieldGrid,
+                                                   NiftiImage deformationFields[]) {
     // Check if the velocity field is actually a velocity field
     if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) {
         // Create an image to store the flow field
-        nifti_image *flowField = nifti_dup(*deformationField[0], false);
+        NiftiImage flowField(deformationFields[0], NiftiImage::Copy::ImageInfoAndAllocData);
         flowField->intent_code = NIFTI_INTENT_VECTOR;
-        memset(flowField->intent_name, 0, 16);
-        strcpy(flowField->intent_name, "NREG_TRANS");
+        flowField.setIntentName("NREG_TRANS"s);
         flowField->intent_p1 = DEF_VEL_FIELD;
         flowField->intent_p2 = velocityFieldGrid->intent_p2;
         if (velocityFieldGrid->num_ext > 0 && flowField->ext_list == nullptr)
@@ -3630,11 +3629,11 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
         // Generate the velocity field
         reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField);
         // Remove the affine component from the flow field
-        nifti_image *affineOnly = nullptr;
+        NiftiImage affineOnly;
         if (flowField->num_ext > 0) {
             if (flowField->ext_list[0].edata != nullptr) {
                 // Create a field that contains the affine component only
-                affineOnly = nifti_dup(*deformationField[0], false);
+                affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfoAndAllocData);
                 reg_affine_getDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata), affineOnly, false);
                 reg_tools_subtractImageFromImage(flowField, affineOnly, flowField);
             }
@@ -3647,45 +3646,38 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri
         float scalingValue = pow(2.0f, std::abs((float)squaringNumber));
         if (velocityFieldGrid->intent_p2 < 0)
             // backward deformation field is scaled down
-            reg_tools_divideValueToImage(flowField, deformationField[0], -scalingValue);
+            reg_tools_divideValueToImage(flowField, deformationFields[0], -scalingValue);
         else
             // forward deformation field is scaled down
-            reg_tools_divideValueToImage(flowField, deformationField[0], scalingValue);
-
-        // Deallocate the allocated flow field
-        nifti_image_free(flowField);
-        flowField = nullptr;
+            reg_tools_divideValueToImage(flowField, deformationFields[0], scalingValue);
 
         // Conversion from displacement to deformation
-        reg_getDeformationFromDisplacement(deformationField[0]);
+        reg_getDeformationFromDisplacement(deformationFields[0]);
 
         // The deformation field is squared
         for (unsigned short i = 0; i < squaringNumber; ++i) {
             // The computed scaled deformation field is copied over
-            memcpy(deformationField[i + 1]->data, deformationField[i]->data,
-                   deformationField[i]->nvox * deformationField[i]->nbyper);
+            deformationFields[i + 1].copyData(deformationFields[i]);
             // The deformation field is applied to itself
-            reg_defField_compose(deformationField[i], // to apply
-                                 deformationField[i + 1], // to update
+            reg_defField_compose(deformationFields[i], // to apply
+                                 deformationFields[i + 1], // to update
                                  nullptr);
             NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber);
         }
         // The affine conponent of the transformation is restored
-        if (affineOnly != nullptr) {
+        if (affineOnly) {
             for (unsigned short i = 0; i <= squaringNumber; ++i) {
-                reg_getDisplacementFromDeformation(deformationField[i]);
-                reg_tools_addImageToImage(deformationField[i], affineOnly, deformationField[i]);
-                deformationField[i]->intent_p1 = DEF_FIELD;
-                deformationField[i]->intent_p2 = 0;
+                reg_getDisplacementFromDeformation(deformationFields[i]);
+                reg_tools_addImageToImage(deformationFields[i], affineOnly, deformationFields[i]);
+                deformationFields[i]->intent_p1 = DEF_FIELD;
+                deformationFields[i]->intent_p2 = 0;
             }
-            nifti_image_free(affineOnly);
-            affineOnly = nullptr;
         }
         // If required an affine component is composed
         if (velocityFieldGrid->num_ext > 1) {
             for (unsigned short i = 0; i <= squaringNumber; ++i) {
                 reg_affine_getDeformationField(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
-                                               deformationField[i],
+                                               deformationFields[i],
                                                true);
             }
         }
diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h
index c2a06195..955a704e 100755
--- a/reg-lib/cpu/_reg_localTrans.h
+++ b/reg-lib/cpu/_reg_localTrans.h
@@ -166,8 +166,8 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                             nifti_image *deformationFieldImage,
                                             const bool updateStepNumber);
 /* *************************************************************** */
-void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
-                                                   nifti_image **deformationFieldImage);
+void reg_spline_getIntermediateDefFieldFromVelGrid(NiftiImage& velocityFieldGrid,
+                                                   NiftiImage deformationFields[]);
 /* *************************************************************** */
 void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
                                              nifti_image *flowField);
diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp
index e3bf130e..9e1f94ed 100644
--- a/reg-lib/cuda/CudaAladinContent.cpp
+++ b/reg-lib/cuda/CudaAladinContent.cpp
@@ -4,8 +4,8 @@
 #include <algorithm>
 
 /* *************************************************************** */
-CudaAladinContent::CudaAladinContent(nifti_image *referenceIn,
-                                     nifti_image *floatingIn,
+CudaAladinContent::CudaAladinContent(NiftiImage& referenceIn,
+                                     NiftiImage& floatingIn,
                                      int *referenceMaskIn,
                                      mat44 *transformationMatrixIn,
                                      size_t bytesIn,
@@ -42,19 +42,17 @@ void CudaAladinContent::InitVars() {
     mask_d = nullptr;
     floIJKMat_d = nullptr;
 
-    if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(reference);
-    if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) {
-        reg_tools_changeDatatype<float>(floating);
-        if (warped != nullptr)
-            reg_tools_changeDatatype<float>(warped);
+    if (reference && reference->nbyper != NIFTI_TYPE_FLOAT32)
+        reference.changeDatatype(NIFTI_TYPE_FLOAT32);
+    if (floating && floating->nbyper != NIFTI_TYPE_FLOAT32) {
+        floating.changeDatatype(NIFTI_TYPE_FLOAT32);
+        if (warped)
+            warped.changeDatatype(NIFTI_TYPE_FLOAT32);
     }
-
-    //numBlocks = (blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0;
 }
 /* *************************************************************** */
 void CudaAladinContent::AllocateCuPtrs() {
-    if (transformationMatrix != nullptr) {
+    if (transformationMatrix) {
         Cuda::Allocate<float>(&transformationMatrix_d, sizeof(mat44) / sizeof(float));
 
         float *tmpMat_h = (float*)malloc(sizeof(mat44));
@@ -63,11 +61,11 @@ void CudaAladinContent::AllocateCuPtrs() {
 
         free(tmpMat_h);
     }
-    if (referenceMask != nullptr) {
+    if (referenceMask) {
         Cuda::Allocate<int>(&mask_d, reference->nvox);
         Cuda::TransferNiftiToDevice(mask_d, referenceMask, reference->nvox);
     }
-    if (reference != nullptr) {
+    if (reference) {
         Cuda::Allocate<float>(&referenceImageArray_d, reference->nvox);
         Cuda::Allocate<float>(&referenceMat_d, sizeof(mat44) / sizeof(float));
 
@@ -78,15 +76,15 @@ void CudaAladinContent::AllocateCuPtrs() {
         Cuda::TransferNiftiToDevice(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float));
         free(targetMat);
     }
-    if (warped != nullptr) {
+    if (warped) {
         Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
         Cuda::TransferNiftiToDevice(warpedImageArray_d, warped);
     }
-    if (deformationField != nullptr) {
+    if (deformationField) {
         Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
         Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField);
     }
-    if (floating != nullptr) {
+    if (floating) {
         Cuda::Allocate<float>(&floatingImageArray_d, floating->nvox);
         Cuda::Allocate<float>(&floIJKMat_d, sizeof(mat44) / sizeof(float));
 
@@ -98,28 +96,28 @@ void CudaAladinContent::AllocateCuPtrs() {
         free(sourceIJKMatrix_h);
     }
 
-    if (blockMatchingParams != nullptr) {
-        if (blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams) {
+        if (blockMatchingParams->referencePosition) {
             Cuda::Allocate<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
             Cuda::TransferFromHostToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
-        if (blockMatchingParams->warpedPosition != nullptr) {
+        if (blockMatchingParams->warpedPosition) {
             Cuda::Allocate<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
             Cuda::TransferFromHostToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         }
-        if (blockMatchingParams->totalBlock != nullptr) {
+        if (blockMatchingParams->totalBlock) {
             Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
             Cuda::TransferNiftiToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber);
         }
     }
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetWarped() {
+NiftiImage& CudaAladinContent::GetWarped() {
     DownloadImage(warped, warpedImageArray_d, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
-nifti_image* CudaAladinContent::GetDeformationField() {
+NiftiImage& CudaAladinContent::GetDeformationField() {
     Cuda::TransferFromDeviceToHost<float>((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox);
     return deformationField;
 }
@@ -131,7 +129,7 @@ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() {
 }
 /* *************************************************************** */
 void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
-    if (transformationMatrix != nullptr)
+    if (transformationMatrix)
         Cuda::Free(transformationMatrix_d);
 
     AladinContent::SetTransformationMatrix(transformationMatrixIn);
@@ -143,28 +141,33 @@ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     free(tmpMat_h);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) {
-    if (deformationField != nullptr)
+void CudaAladinContent::SetDeformationField(NiftiImage&& deformationFieldIn) {
+    if (deformationField)
         Cuda::Free(deformationFieldArray_d);
-    AladinContent::SetDeformationField(deformationFieldIn);
+    AladinContent::SetDeformationField(std::move(deformationFieldIn));
 
     Cuda::Allocate<float>(&deformationFieldArray_d, deformationField->nvox);
     Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField);
 }
 /* *************************************************************** */
 void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) {
-    if (referenceMask != nullptr)
+    if (referenceMask)
         Cuda::Free(mask_d);
     AladinContent::SetReferenceMask(referenceMaskIn);
     Cuda::Allocate<int>(&mask_d, reference->nvox);
     Cuda::TransferNiftiToDevice(mask_d, referenceMaskIn, reference->nvox);
 }
 /* *************************************************************** */
-void CudaAladinContent::SetWarped(nifti_image *warped) {
-    if (warped != nullptr)
+void CudaAladinContent::SetWarped(NiftiImage&& warpedIn) {
+    AladinContent::SetWarped(std::move(warpedIn));
+    if (warpedImageArray_d) {
         Cuda::Free(warpedImageArray_d);
-    AladinContent::SetWarped(warped);
-    reg_tools_changeDatatype<float>(warped);
+        warpedImageArray_d = nullptr;
+    }
+    if (!warped) return;
+
+    if (warped->nbyper != NIFTI_TYPE_FLOAT32)
+        warped.changeDatatype(NIFTI_TYPE_FLOAT32);
 
     Cuda::Allocate<float>(&warpedImageArray_d, warped->nvox);
     Cuda::TransferNiftiToDevice(warpedImageArray_d, warped);
@@ -172,19 +175,19 @@ void CudaAladinContent::SetWarped(nifti_image *warped) {
 /* *************************************************************** */
 void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     AladinContent::SetBlockMatchingParams(bmp);
-    if (blockMatchingParams->referencePosition != nullptr) {
+    if (blockMatchingParams->referencePosition) {
         Cuda::Free(referencePosition_d);
         //referencePosition
         Cuda::Allocate<float>(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         Cuda::TransferFromHostToDevice<float>(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
-    if (blockMatchingParams->warpedPosition != nullptr) {
+    if (blockMatchingParams->warpedPosition) {
         Cuda::Free(warpedPosition_d);
         //warpedPosition
         Cuda::Allocate<float>(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
         Cuda::TransferFromHostToDevice<float>(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim);
     }
-    if (blockMatchingParams->totalBlock != nullptr) {
+    if (blockMatchingParams->totalBlock) {
         Cuda::Free(totalBlock_d);
         //activeBlock
         Cuda::Allocate<int>(&totalBlock_d, blockMatchingParams->totalBlockNumber);
@@ -192,51 +195,21 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) {
     }
 }
 /* *************************************************************** */
-template<typename DataType>
-void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) {
+void CudaAladinContent::DownloadImage(NiftiImage& image, float *memoryObject, int datatype) {
     const size_t size = image->nvox;
     unique_ptr<float[]> buffer(new float[size]);
 
     Cuda::TransferFromDeviceToHost(buffer.get(), memoryObject, size);
 
-    free(image->data);
-    image->datatype = datatype;
-    image->nbyper = sizeof(DataType);
-    image->data = malloc(size * image->nbyper);
-    DataType *data = static_cast<DataType*>(image->data);
-    for (size_t i = 0; i < size; ++i)
-        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
-}
-/* *************************************************************** */
-void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        FillImageData<float>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        FillImageData<double>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT8:
-        FillImageData<unsigned char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT8:
-        FillImageData<char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT16:
-        FillImageData<unsigned short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT16:
-        FillImageData<short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT32:
-        FillImageData<int>(image, memoryObject, datatype);
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported type");
-    }
+    std::visit([&](auto&& dataType) {
+        using DataType = std::decay_t<decltype(dataType)>;
+        image->datatype = datatype;
+        image->nbyper = sizeof(DataType);
+        image.realloc();
+        DataType *data = static_cast<DataType*>(image->data);
+        for (size_t i = 0; i < size; ++i)
+            data[i] = static_cast<DataType>(image.clampData(buffer[i]));
+    }, image.getDataType());
 }
 /* *************************************************************** */
 float* CudaAladinContent::GetReferenceImageArray_d() {
@@ -284,33 +257,33 @@ int* CudaAladinContent::GetMask_d() {
 }
 /* *************************************************************** */
 void CudaAladinContent::FreeCuPtrs() {
-    if (transformationMatrix_d != nullptr)
+    if (transformationMatrix_d)
         Cuda::Free(transformationMatrix_d);
 
-    if (referenceImageArray_d != nullptr)
+    if (referenceImageArray_d)
         Cuda::Free(referenceImageArray_d);
-    if (referenceMat_d != nullptr)
+    if (referenceMat_d)
         Cuda::Free(referenceMat_d);
 
-    if (floatingImageArray_d != nullptr)
+    if (floatingImageArray_d)
         Cuda::Free(floatingImageArray_d);
-    if (floIJKMat_d != nullptr)
+    if (floIJKMat_d)
         Cuda::Free(floIJKMat_d);
 
-    if (warpedImageArray_d != nullptr)
+    if (warpedImageArray_d)
         Cuda::Free(warpedImageArray_d);
 
-    if (deformationFieldArray_d != nullptr)
+    if (deformationFieldArray_d)
         Cuda::Free(deformationFieldArray_d);
 
-    if (mask_d != nullptr)
+    if (mask_d)
         Cuda::Free(mask_d);
 
-    if (totalBlock_d != nullptr)
+    if (totalBlock_d)
         Cuda::Free(totalBlock_d);
-    if (referencePosition_d != nullptr)
+    if (referencePosition_d)
         Cuda::Free(referencePosition_d);
-    if (warpedPosition_d != nullptr)
+    if (warpedPosition_d)
         Cuda::Free(warpedPosition_d);
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h
index 6521829a..ac649f21 100644
--- a/reg-lib/cuda/CudaAladinContent.h
+++ b/reg-lib/cuda/CudaAladinContent.h
@@ -6,8 +6,8 @@
 
 class CudaAladinContent: public AladinContent {
 public:
-    CudaAladinContent(nifti_image *referenceIn,
-                      nifti_image *floatingIn,
+    CudaAladinContent(NiftiImage& referenceIn,
+                      NiftiImage& floatingIn,
                       int *referenceMaskIn = nullptr,
                       mat44 *transformationMatrixIn = nullptr,
                       size_t bytesIn = sizeof(float),
@@ -34,8 +34,8 @@ class CudaAladinContent: public AladinContent {
 
     // CPU getters with data downloaded from device
     virtual _reg_blockMatchingParam* GetBlockMatchingParams() override;
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
+    virtual NiftiImage& GetDeformationField() override;
+    virtual NiftiImage& GetWarped() override;
 
 private:
     void InitVars();
@@ -54,8 +54,7 @@ class CudaAladinContent: public AladinContent {
     float *referenceMat_d;
     float *floIJKMat_d;
 
-    template<typename DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
-    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+    void DownloadImage(NiftiImage& image, float *memoryObject, int datatype);
 
 #ifdef NR_TESTING
 public:
@@ -64,8 +63,8 @@ class CudaAladinContent: public AladinContent {
 #endif
     // Functions for testing
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedIn) override;
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetWarped(NiftiImage&& warpedIn) override;
+    virtual void SetDeformationField(NiftiImage&& deformationFieldIn) override;
     virtual void SetReferenceMask(int *referenceMaskIn) override;
     virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override;
 };
diff --git a/reg-lib/cuda/CudaAladinContentCreator.h b/reg-lib/cuda/CudaAladinContentCreator.h
index 7da8c0fd..4928f685 100644
--- a/reg-lib/cuda/CudaAladinContentCreator.h
+++ b/reg-lib/cuda/CudaAladinContentCreator.h
@@ -5,8 +5,8 @@
 
 class CudaAladinContentCreator: public AladinContentCreator {
 public:
-    virtual AladinContent* Create(nifti_image *reference,
-                                  nifti_image *floating,
+    virtual AladinContent* Create(NiftiImage& reference,
+                                  NiftiImage& floating,
                                   int *referenceMask = nullptr,
                                   mat44 *transformationMatrix = nullptr,
                                   size_t bytes = sizeof(float),
diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu
index 08493d4a..db63657c 100644
--- a/reg-lib/cuda/CudaCompute.cu
+++ b/reg-lib/cuda/CudaCompute.cu
@@ -10,7 +10,7 @@
 /* *************************************************************** */
 void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
-    const nifti_image *floating = con.Content::GetFloating();
+    const NiftiImage& floating = con.Content::GetFloating();
     auto resampleImage = floating->nz > 1 ? Cuda::ResampleImage<true> : Cuda::ResampleImage<false>;
     resampleImage(floating,
                   con.GetFloatingCuda(),
@@ -52,7 +52,7 @@ double CudaCompute::CorrectFolding(bool approx) {
 /* *************************************************************** */
 double CudaCompute::ApproxBendingEnergy() {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    const NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
     auto approxBendingEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergy<true> :
                                                           Cuda::ApproxBendingEnergy<false>;
     return approxBendingEnergy(controlPointGrid, con.GetControlPointGridCuda());
@@ -60,7 +60,7 @@ double CudaCompute::ApproxBendingEnergy() {
 /* *************************************************************** */
 void CudaCompute::ApproxBendingEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
     auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergyGradient<true> :
                                                                   Cuda::ApproxBendingEnergyGradient<false>;
     approxBendingEnergyGradient(controlPointGrid,
@@ -71,7 +71,7 @@ void CudaCompute::ApproxBendingEnergyGradient(float weight) {
 /* *************************************************************** */
 double CudaCompute::ApproxLinearEnergy() {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    const NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
     auto approxLinearEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergy<true> :
                                                          Cuda::ApproxLinearEnergy<false>;
     return approxLinearEnergy(controlPointGrid, con.GetControlPointGridCuda());
@@ -79,7 +79,7 @@ double CudaCompute::ApproxLinearEnergy() {
 /* *************************************************************** */
 void CudaCompute::ApproxLinearEnergyGradient(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
+    const NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
     auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergyGradient<true> :
                                                                  Cuda::ApproxLinearEnergyGradient<false>;
     approxLinearEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), weight);
@@ -145,9 +145,9 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
                                              const bool optimiseX,
                                              const bool optimiseY,
                                              const bool optimiseZ) {
-    const nifti_image *controlPointGrid = dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid();
+    const NiftiImage& controlPointGrid = dynamic_cast<CudaF3dContent&>(con).F3dContent::GetControlPointGrid();
     const bool optZ = optimiseZ && controlPointGrid->nz > 1;
-    const size_t nVoxels = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    const size_t nVoxels = controlPointGrid.nVoxelsPerVolume();
     auto bestDofTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(bestDof), nVoxels, cudaChannelFormatKindFloat, 4);
     auto gradientTexturePtr = Cuda::CreateTextureObject(reinterpret_cast<const float4*>(gradient), nVoxels, cudaChannelFormatKindFloat, 4);
 
@@ -172,7 +172,7 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof,
 /* *************************************************************** */
 void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) {
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    const nifti_image *floating = con.Content::GetFloating();
+    const NiftiImage& floating = con.Content::GetFloating();
     auto getImageGradient = floating->nz > 1 ? Cuda::GetImageGradient<true> : Cuda::GetImageGradient<false>;
     getImageGradient(floating,
                      con.GetFloatingCuda(),
@@ -187,8 +187,8 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac
 double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (!optimiseX && !optimiseY && !optimiseZ) return 0;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
-    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3);
+    NiftiImage& transGrad = con.F3dContent::GetTransformationGradient();
+    const size_t voxelsPerVolume = transGrad.nVoxelsPerVolume();
     if (transGrad->nz <= 1) optimiseZ = false;
     return Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ);
 }
@@ -196,8 +196,8 @@ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimi
 void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) {
     if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return;
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
-    const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3);
+    NiftiImage& transGrad = con.F3dContent::GetTransformationGradient();
+    const size_t voxelsPerVolume = transGrad.nVoxelsPerVolume();
     if (transGrad->nz <= 1) optimiseZ = false;
     Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, maxGradLength, optimiseX, optimiseY, optimiseZ);
 }
@@ -224,8 +224,8 @@ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) {
                                       updateStepNumber);
 }
 /* *************************************************************** */
-void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
-    const nifti_image *controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
+void CudaCompute::ConvolveImage(const NiftiImage& image, float4 *imageCuda) {
+    const NiftiImage& controlPointGrid = dynamic_cast<F3dContent&>(con).F3dContent::GetControlPointGrid();
     constexpr ConvKernelType kernelType = ConvKernelType::Cubic;
     float currentNodeSpacing[3];
     currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx;
@@ -260,7 +260,7 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) {
 void CudaCompute::VoxelCentricToNodeCentric(float weight) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating());
-    const nifti_image *transGrad = con.F3dContent::GetTransformationGradient();
+    const NiftiImage& transGrad = con.F3dContent::GetTransformationGradient();
     auto voxelCentricToNodeCentric = transGrad->nz > 1 ? Cuda::VoxelCentricToNodeCentric<true> :
                                                          Cuda::VoxelCentricToNodeCentric<false>;
     voxelCentricToNodeCentric(transGrad,
@@ -281,21 +281,21 @@ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) {
 void CudaCompute::ExponentiateGradient(Content& conBwIn) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     CudaF3dContent& conBw = dynamic_cast<CudaF3dContent&>(conBwIn);
-    nifti_image *deformationField = con.Content::GetDeformationField();
-    nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
+    NiftiImage& deformationField = con.Content::GetDeformationField();
+    NiftiImage& voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
     float4 *voxelBasedMeasureGradientCuda = con.GetVoxelBasedMeasureGradientCuda();
-    nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
+    NiftiImage& controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
     float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda();
     mat44 *affineTransformationBw = conBw.Content::GetTransformationMatrix();
     const int compNum = std::abs(static_cast<int>(controlPointGridBw->intent_p2)); // The number of composition
 
     /* Allocate a temporary gradient image to store the backward gradient */
-    const size_t voxelGradNumber = NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3);
+    const size_t voxelGradNumber = voxelBasedMeasureGradient.nVoxelsPerVolume();
     NiftiImage warped(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfo);
     thrust::device_vector<float4> warpedCudaVec(voxelGradNumber);
 
     // Create all deformation field images needed for resampling
-    const size_t defFieldNumber = NiftiImage::calcVoxelNumber(deformationField, 3);
+    const size_t defFieldNumber = deformationField.nVoxelsPerVolume();
     vector<NiftiImage> defFields(compNum + 1, NiftiImage(deformationField, NiftiImage::Copy::ImageInfo));
     vector<thrust::device_vector<float4>> defFieldCudaVecs(compNum + 1, thrust::device_vector<float4>(defFieldNumber));
 
@@ -341,8 +341,8 @@ void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimise
     if (!optimiseX && !optimiseY && !optimiseZ) return;
 
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
-    const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    const NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
+    const size_t voxelNumber = controlPointGrid.nVoxelsPerVolume();
     auto scaledGradientCudaPtr = ScaleGradient(con.GetTransformationGradientCuda(), voxelNumber, scale);
 
     // Reset the gradient along the axes if appropriate
@@ -365,11 +365,11 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
     CudaF3dContent& con = dynamic_cast<CudaF3dContent&>(this->con);
     CudaF3dContent& conBw = dynamic_cast<CudaF3dContent&>(conBwIn);
 
-    nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid();
-    nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
+    NiftiImage& controlPointGrid = con.F3dContent::GetControlPointGrid();
+    NiftiImage& controlPointGridBw = conBw.F3dContent::GetControlPointGrid();
     float4 *controlPointGridCuda = con.GetControlPointGridCuda();
     float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda();
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3);
+    const size_t voxelNumber = controlPointGrid.nVoxelsPerVolume();
 
     // In order to ensure symmetry, the forward and backward velocity fields
     // are averaged in both image spaces: reference and floating
@@ -395,9 +395,9 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) {
     Cuda::GetDeformationFromDisplacement(controlPointGridBw, controlPointGridBwCuda);
 }
 /* *************************************************************** */
-void CudaCompute::DefFieldCompose(const nifti_image *defField) {
+void CudaCompute::DefFieldCompose(const NiftiImage& defField) {
     CudaContent& con = dynamic_cast<CudaContent&>(this->con);
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3);
+    const size_t voxelNumber = defField.nVoxelsPerVolume();
     thrust::device_vector<float4> defFieldCuda(voxelNumber);
     Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField);
     auto defFieldCompose = defField->nz > 1 ? Cuda::DefFieldCompose<true> : Cuda::DefFieldCompose<false>;
@@ -406,7 +406,7 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) {
 /* *************************************************************** */
 NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) {
     CudaDefContent& con = dynamic_cast<CudaDefContent&>(this->con);
-    const nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
+    const NiftiImage& voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient();
     auto resampleGradient = voxelBasedMeasureGradient->nz > 1 ? Cuda::ResampleGradient<true> : Cuda::ResampleGradient<false>;
     resampleGradient(voxelBasedMeasureGradient,
                      con.GetVoxelBasedMeasureGradientCuda(),
@@ -418,7 +418,7 @@ NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) {
                      con.GetActiveVoxelNumber(),
                      interpolation,
                      padding);
-    return NiftiImage(con.GetWarpedGradient(), NiftiImage::Copy::Image);
+    return con.GetWarpedGradient();
 }
 /* *************************************************************** */
 void CudaCompute::GetAffineDeformationField(bool compose) {
diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h
index 0982623d..ff342a0c 100644
--- a/reg-lib/cuda/CudaCompute.h
+++ b/reg-lib/cuda/CudaCompute.h
@@ -35,11 +35,11 @@ class CudaCompute: public Compute {
 #ifndef NR_TESTING
 protected:
 #endif
-    virtual void DefFieldCompose(const nifti_image *defField) override;
+    virtual void DefFieldCompose(const NiftiImage& defField) override;
     virtual NiftiImage ResampleGradient(int interpolation, float padding) override;
     virtual void VoxelCentricToNodeCentric(float weight) override;
 
 private:
-    void ConvolveImage(const nifti_image*, float4*);
+    void ConvolveImage(const NiftiImage&, float4*);
     Cuda::UniquePtr<float4> ScaleGradient(const float4*, const size_t, const float);
 };
diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp
index a1f02b0e..51428168 100644
--- a/reg-lib/cuda/CudaContent.cpp
+++ b/reg-lib/cuda/CudaContent.cpp
@@ -1,8 +1,8 @@
 #include "CudaContent.h"
 
 /* *************************************************************** */
-CudaContent::CudaContent(nifti_image *referenceIn,
-                         nifti_image *floatingIn,
+CudaContent::CudaContent(NiftiImage& referenceIn,
+                         NiftiImage& floatingIn,
                          int *referenceMaskIn,
                          mat44 *transformationMatrixIn,
                          size_t bytesIn):
@@ -24,7 +24,7 @@ CudaContent::~CudaContent() {
 /* *************************************************************** */
 void CudaContent::AllocateReference() {
     if (reference->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(reference);
+        reference.changeDatatype(NIFTI_TYPE_FLOAT32);
     Cuda::Allocate(&referenceCuda, reference->nvox);
     referenceCudaManaged.reset(referenceCuda);
     Cuda::TransferNiftiToDevice(referenceCuda, reference);
@@ -32,7 +32,7 @@ void CudaContent::AllocateReference() {
 /* *************************************************************** */
 void CudaContent::AllocateFloating() {
     if (floating->nbyper != NIFTI_TYPE_FLOAT32)
-        reg_tools_changeDatatype<float>(floating);
+        floating.changeDatatype(NIFTI_TYPE_FLOAT32);
     Cuda::Allocate(&floatingCuda, floating->nvox);
     floatingCudaManaged.reset(floatingCuda);
     Cuda::TransferNiftiToDevice(floatingCuda, floating);
@@ -65,13 +65,13 @@ bool CudaContent::IsCurrentComputationDoubleCapable() {
     return CudaContext::GetInstance().IsCardDoubleCapable();
 }
 /* *************************************************************** */
-nifti_image* CudaContent::GetDeformationField() {
+NiftiImage& CudaContent::GetDeformationField() {
     Cuda::TransferFromDeviceToNifti(deformationField, deformationFieldCuda);
     return deformationField;
 }
 /* *************************************************************** */
-void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) {
-    Content::SetDeformationField(deformationFieldIn);
+void CudaContent::SetDeformationField(NiftiImage&& deformationFieldIn) {
+    Content::SetDeformationField(std::move(deformationFieldIn));
     DeallocateDeformationField();
     if (!deformationField) return;
 
@@ -94,7 +94,7 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) {
     activeVoxelNumber = 0;
     if (!referenceMask) return;
 
-    const size_t voxelNumber = NiftiImage::calcVoxelNumber(reference, 3);
+    const size_t voxelNumber = reference.nVoxelsPerVolume();
     thrust::host_vector<int> mask(voxelNumber);
     int *maskPtr = mask.data();
     for (size_t i = 0; i < voxelNumber; i++) {
@@ -125,17 +125,19 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) {
     free(transformationMatrixCptr);
 }
 /* *************************************************************** */
-nifti_image* CudaContent::GetWarped() {
+NiftiImage& CudaContent::GetWarped() {
     DownloadImage(warped, warpedCuda, warped->datatype);
     return warped;
 }
 /* *************************************************************** */
-void CudaContent::SetWarped(nifti_image *warpedIn) {
-    Content::SetWarped(warpedIn);
+void CudaContent::SetWarped(NiftiImage&& warpedIn) {
+    Content::SetWarped(std::move(warpedIn));
     DeallocateWarped();
     if (!warped) return;
 
-    reg_tools_changeDatatype<float>(warped);
+    if (warped->nbyper != NIFTI_TYPE_FLOAT32)
+        warped.changeDatatype(NIFTI_TYPE_FLOAT32);
+
     AllocateWarped();
     Cuda::TransferNiftiToDevice(warpedCuda, warped);
 }
@@ -144,50 +146,20 @@ void CudaContent::UpdateWarped() {
     Cuda::TransferNiftiToDevice(warpedCuda, warped);
 }
 /* *************************************************************** */
-template<typename DataType>
-void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) {
+void CudaContent::DownloadImage(NiftiImage& image, float *memoryObject, int datatype) {
     const size_t size = image->nvox;
     unique_ptr<float[]> buffer(new float[size]);
 
     Cuda::TransferFromDeviceToHost(buffer.get(), memoryObject, size);
 
-    free(image->data);
-    image->datatype = datatype;
-    image->nbyper = sizeof(DataType);
-    image->data = malloc(size * image->nbyper);
-    DataType *data = static_cast<DataType*>(image->data);
-    for (size_t i = 0; i < size; ++i)
-        data[i] = static_cast<DataType>(NiftiImage::clampData(image, buffer[i]));
-}
-/* *************************************************************** */
-void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) {
-    switch (datatype) {
-    case NIFTI_TYPE_FLOAT32:
-        FillImageData<float>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_FLOAT64:
-        FillImageData<double>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT8:
-        FillImageData<unsigned char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT8:
-        FillImageData<char>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT16:
-        FillImageData<unsigned short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT16:
-        FillImageData<short>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_UINT32:
-        FillImageData<unsigned>(image, memoryObject, datatype);
-        break;
-    case NIFTI_TYPE_INT32:
-        FillImageData<int>(image, memoryObject, datatype);
-        break;
-    default:
-        NR_FATAL_ERROR("Unsupported type");
-    }
+    std::visit([&](auto&& dataType) {
+        using DataType = std::decay_t<decltype(dataType)>;
+        image->datatype = datatype;
+        image->nbyper = sizeof(DataType);
+        image.realloc();
+        DataType *data = static_cast<DataType*>(image->data);
+        for (size_t i = 0; i < size; ++i)
+            data[i] = static_cast<DataType>(image.clampData(buffer[i]));
+    }, image.getDataType());
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h
index d5225ba6..f3deee15 100644
--- a/reg-lib/cuda/CudaContent.h
+++ b/reg-lib/cuda/CudaContent.h
@@ -6,8 +6,8 @@
 class CudaContent: public virtual Content {
 public:
     CudaContent() = delete;
-    CudaContent(nifti_image *referenceIn,
-                nifti_image *floatingIn,
+    CudaContent(NiftiImage& referenceIn,
+                NiftiImage& floatingIn,
                 int *referenceMaskIn = nullptr,
                 mat44 *transformationMatrixIn = nullptr,
                 size_t bytesIn = sizeof(float));
@@ -16,8 +16,8 @@ class CudaContent: public virtual Content {
     virtual bool IsCurrentComputationDoubleCapable() override;
 
     // Getters
-    virtual nifti_image* GetDeformationField() override;
-    virtual nifti_image* GetWarped() override;
+    virtual NiftiImage& GetDeformationField() override;
+    virtual NiftiImage& GetWarped() override;
     virtual float* GetReferenceCuda() { return referenceCuda; }
     virtual float* GetFloatingCuda() { return floatingCuda; }
     virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; }
@@ -46,8 +46,7 @@ class CudaContent: public virtual Content {
     void DeallocateDeformationField();
     void AllocateWarped();
     void DeallocateWarped();
-    template<typename DataType> void FillImageData(nifti_image *image, float *memoryObject, int datatype);
-    void DownloadImage(nifti_image *image, float *memoryObject, int datatype);
+    void DownloadImage(NiftiImage& image, float *memoryObject, int datatype);
     void SetReferenceCuda(float *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; }
     void SetFloatingCuda(float *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; }
 
@@ -60,8 +59,8 @@ class CudaContent: public virtual Content {
 protected:
 #endif
     // Functions for testing
-    virtual void SetDeformationField(nifti_image *deformationFieldIn) override;
+    virtual void SetDeformationField(NiftiImage&& deformationFieldIn) override;
     virtual void SetReferenceMask(int *referenceMaskIn) override;
     virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override;
-    virtual void SetWarped(nifti_image *warpedIn) override;
+    virtual void SetWarped(NiftiImage&& warpedIn) override;
 };
diff --git a/reg-lib/cuda/CudaContentCreator.h b/reg-lib/cuda/CudaContentCreator.h
index 2bd82113..a889c67c 100644
--- a/reg-lib/cuda/CudaContentCreator.h
+++ b/reg-lib/cuda/CudaContentCreator.h
@@ -5,8 +5,8 @@
 
 class CudaContentCreator: public ContentCreator {
 public:
-    virtual Content* Create(nifti_image *reference,
-                            nifti_image *floating,
+    virtual Content* Create(NiftiImage& reference,
+                            NiftiImage& floating,
                             int *referenceMask = nullptr,
                             mat44 *transformationMatrix = nullptr,
                             size_t bytes = sizeof(float)) override {
diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp
index 72f1c88c..bae8967f 100644
--- a/reg-lib/cuda/CudaDefContent.cpp
+++ b/reg-lib/cuda/CudaDefContent.cpp
@@ -1,9 +1,9 @@
 #include "CudaDefContent.h"
 
 /* *************************************************************** */
-CudaDefContent::CudaDefContent(nifti_image *referenceIn,
-                               nifti_image *floatingIn,
-                               nifti_image *localWeightSimIn,
+CudaDefContent::CudaDefContent(NiftiImage& referenceIn,
+                               NiftiImage& floatingIn,
+                               NiftiImage *localWeightSimIn,
                                int *referenceMaskIn,
                                mat44 *transformationMatrixIn,
                                size_t bytesIn):
@@ -56,12 +56,12 @@ void CudaDefContent::DeallocateVoxelBasedMeasureGradient() {
     }
 }
 /* *************************************************************** */
-nifti_image* CudaDefContent::GetLocalWeightSim() {
+NiftiImage& CudaDefContent::GetLocalWeightSim() {
     Cuda::TransferFromDeviceToNifti(localWeightSim, localWeightSimCuda);
     return localWeightSim;
 }
 /* *************************************************************** */
-nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() {
+NiftiImage& CudaDefContent::GetVoxelBasedMeasureGradient() {
     Cuda::TransferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda);
     return voxelBasedMeasureGradient;
 }
@@ -70,7 +70,7 @@ void CudaDefContent::UpdateVoxelBasedMeasureGradient() {
     Cuda::TransferNiftiToDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient);
 }
 /* *************************************************************** */
-nifti_image* CudaDefContent::GetWarpedGradient() {
+NiftiImage& CudaDefContent::GetWarpedGradient() {
     Cuda::TransferFromDeviceToNifti(warpedGradient, warpedGradientCuda);
     return warpedGradient;
 }
@@ -80,6 +80,6 @@ void CudaDefContent::UpdateWarpedGradient() {
 }
 /* *************************************************************** */
 void CudaDefContent::ZeroVoxelBasedMeasureGradient() {
-    cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4));
+    cudaMemset(voxelBasedMeasureGradientCuda, 0, voxelBasedMeasureGradient.nVoxelsPerVolume() * sizeof(float4));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaDefContent.h b/reg-lib/cuda/CudaDefContent.h
index 76e09b21..42030543 100644
--- a/reg-lib/cuda/CudaDefContent.h
+++ b/reg-lib/cuda/CudaDefContent.h
@@ -6,18 +6,18 @@
 class CudaDefContent: public virtual DefContent, public virtual CudaContent {
 public:
     CudaDefContent() = delete;
-    CudaDefContent(nifti_image *referenceIn,
-                   nifti_image *floatingIn,
-                   nifti_image *localWeightSimIn = nullptr,
+    CudaDefContent(NiftiImage& referenceIn,
+                   NiftiImage& floatingIn,
+                   NiftiImage *localWeightSimIn = nullptr,
                    int *referenceMaskIn = nullptr,
                    mat44 *transformationMatrixIn = nullptr,
                    size_t bytesIn = sizeof(float));
     virtual ~CudaDefContent();
 
     // Getters
-    virtual nifti_image* GetLocalWeightSim() override;
-    virtual nifti_image* GetVoxelBasedMeasureGradient() override;
-    virtual nifti_image* GetWarpedGradient() override;
+    virtual NiftiImage& GetLocalWeightSim() override;
+    virtual NiftiImage& GetVoxelBasedMeasureGradient() override;
+    virtual NiftiImage& GetWarpedGradient() override;
     virtual float* GetLocalWeightSimCuda() { return localWeightSimCuda; }
     virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; }
     virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; }
diff --git a/reg-lib/cuda/CudaDefContentCreator.h b/reg-lib/cuda/CudaDefContentCreator.h
index af3fb561..499d2717 100644
--- a/reg-lib/cuda/CudaDefContentCreator.h
+++ b/reg-lib/cuda/CudaDefContentCreator.h
@@ -5,9 +5,9 @@
 
 class CudaDefContentCreator: public DefContentCreator {
 public:
-    virtual DefContent* Create(nifti_image *reference,
-                               nifti_image *floating,
-                               nifti_image *localWeightSim = nullptr,
+    virtual DefContent* Create(NiftiImage& reference,
+                               NiftiImage& floating,
+                               NiftiImage *localWeightSim = nullptr,
                                int *referenceMask = nullptr,
                                mat44 *transformationMatrix = nullptr,
                                size_t bytes = sizeof(float)) override {
diff --git a/reg-lib/cuda/CudaF3d2ContentCreator.h b/reg-lib/cuda/CudaF3d2ContentCreator.h
index 347e07cc..fa6da14a 100644
--- a/reg-lib/cuda/CudaF3d2ContentCreator.h
+++ b/reg-lib/cuda/CudaF3d2ContentCreator.h
@@ -5,11 +5,11 @@
 
 class CudaF3d2ContentCreator: public F3d2ContentCreator {
 public:
-    virtual std::pair<F3dContent*, F3dContent*> Create(nifti_image *reference,
-                                                       nifti_image *floating,
-                                                       nifti_image *controlPointGrid,
-                                                       nifti_image *controlPointGridBw,
-                                                       nifti_image *localWeightSim = nullptr,
+    virtual std::pair<F3dContent*, F3dContent*> Create(NiftiImage& reference,
+                                                       NiftiImage& floating,
+                                                       NiftiImage& controlPointGrid,
+                                                       NiftiImage& controlPointGridBw,
+                                                       NiftiImage *localWeightSim = nullptr,
                                                        int *referenceMask = nullptr,
                                                        int *floatingMask = nullptr,
                                                        mat44 *transformationMatrix = nullptr,
diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp
index c6722b9e..c673ca8f 100644
--- a/reg-lib/cuda/CudaF3dContent.cpp
+++ b/reg-lib/cuda/CudaF3dContent.cpp
@@ -1,10 +1,10 @@
 #include "CudaF3dContent.h"
 
 /* *************************************************************** */
-CudaF3dContent::CudaF3dContent(nifti_image *referenceIn,
-                               nifti_image *floatingIn,
-                               nifti_image *controlPointGridIn,
-                               nifti_image *localWeightSimIn,
+CudaF3dContent::CudaF3dContent(NiftiImage& referenceIn,
+                               NiftiImage& floatingIn,
+                               NiftiImage& controlPointGridIn,
+                               NiftiImage *localWeightSimIn,
                                int *referenceMaskIn,
                                mat44 *transformationMatrixIn,
                                size_t bytesIn):
@@ -46,7 +46,7 @@ void CudaF3dContent::DeallocateTransformationGradient() {
     }
 }
 /* *************************************************************** */
-nifti_image* CudaF3dContent::GetControlPointGrid() {
+NiftiImage& CudaF3dContent::GetControlPointGrid() {
     Cuda::TransferFromDeviceToNifti(controlPointGrid, controlPointGridCuda);
     return controlPointGrid;
 }
@@ -55,7 +55,7 @@ void CudaF3dContent::UpdateControlPointGrid() {
     Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid);
 }
 /* *************************************************************** */
-nifti_image* CudaF3dContent::GetTransformationGradient() {
+NiftiImage& CudaF3dContent::GetTransformationGradient() {
     Cuda::TransferFromDeviceToNifti(transformationGradient, transformationGradientCuda);
     return transformationGradient;
 }
@@ -65,6 +65,6 @@ void CudaF3dContent::UpdateTransformationGradient() {
 }
 /* *************************************************************** */
 void CudaF3dContent::ZeroTransformationGradient() {
-    cudaMemset(transformationGradientCuda, 0, NiftiImage::calcVoxelNumber(transformationGradient, 3) * sizeof(float4));
+    cudaMemset(transformationGradientCuda, 0, transformationGradient.nVoxelsPerVolume() * sizeof(float4));
 }
 /* *************************************************************** */
diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h
index ca085945..01d63923 100644
--- a/reg-lib/cuda/CudaF3dContent.h
+++ b/reg-lib/cuda/CudaF3dContent.h
@@ -6,18 +6,18 @@
 class CudaF3dContent: public F3dContent, public CudaDefContent {
 public:
     CudaF3dContent() = delete;
-    CudaF3dContent(nifti_image *referenceIn,
-                   nifti_image *floatingIn,
-                   nifti_image *controlPointGridIn,
-                   nifti_image *localWeightSimIn = nullptr,
+    CudaF3dContent(NiftiImage& referenceIn,
+                   NiftiImage& floatingIn,
+                   NiftiImage& controlPointGridIn,
+                   NiftiImage *localWeightSimIn = nullptr,
                    int *referenceMaskIn = nullptr,
                    mat44 *transformationMatrixIn = nullptr,
                    size_t bytesIn = sizeof(float));
     virtual ~CudaF3dContent();
 
     // Getters
-    virtual nifti_image* GetControlPointGrid() override;
-    virtual nifti_image* GetTransformationGradient() override;
+    virtual NiftiImage& GetControlPointGrid() override;
+    virtual NiftiImage& GetTransformationGradient() override;
     virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; }
     virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; }
 
diff --git a/reg-lib/cuda/CudaF3dContentCreator.h b/reg-lib/cuda/CudaF3dContentCreator.h
index 3e741eb6..af972868 100644
--- a/reg-lib/cuda/CudaF3dContentCreator.h
+++ b/reg-lib/cuda/CudaF3dContentCreator.h
@@ -5,10 +5,10 @@
 
 class CudaF3dContentCreator: public F3dContentCreator {
 public:
-    virtual F3dContent* Create(nifti_image *reference,
-                               nifti_image *floating,
-                               nifti_image *controlPointGrid,
-                               nifti_image *localWeightSim = nullptr,
+    virtual F3dContent* Create(NiftiImage& reference,
+                               NiftiImage& floating,
+                               NiftiImage& controlPointGrid,
+                               NiftiImage *localWeightSim = nullptr,
                                int *referenceMask = nullptr,
                                mat44 *transformationMatrix = nullptr,
                                size_t bytes = sizeof(float)) override {
diff --git a/reg-lib/cuda/CudaMeasureCreator.cpp b/reg-lib/cuda/CudaMeasureCreator.cpp
index 3795297d..711b66fb 100644
--- a/reg-lib/cuda/CudaMeasureCreator.cpp
+++ b/reg-lib/cuda/CudaMeasureCreator.cpp
@@ -47,11 +47,11 @@ void CudaMeasureCreator::Initialise(reg_measure& measure, DefContent& con, DefCo
                                  cudaCon.GetLocalWeightSimCuda(),
                                  cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr,
                                  cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr,
-                                 cudaConBw ? cudaConBw->Content::GetWarped() : nullptr,
+                                 cudaConBw ? static_cast<nifti_image*>(cudaConBw->Content::GetWarped()) : nullptr,
                                  cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr,
-                                 cudaConBw ? cudaConBw->DefContent::GetWarpedGradient() : nullptr,
+                                 cudaConBw ? static_cast<nifti_image*>(cudaConBw->DefContent::GetWarpedGradient()) : nullptr,
                                  cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr,
-                                 cudaConBw ? cudaConBw->DefContent::GetVoxelBasedMeasureGradient() : nullptr,
+                                 cudaConBw ? static_cast<nifti_image*>(cudaConBw->DefContent::GetVoxelBasedMeasureGradient()) : nullptr,
                                  cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr);
 }
 /* *************************************************************** */
diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp
index dc551cf8..055ae1e1 100644
--- a/reg-test/reg_test_affineDeformationField.cpp
+++ b/reg-test/reg_test_affineDeformationField.cpp
@@ -169,17 +169,14 @@ class AffineDeformationFieldTest {
 
                 // Set the deformation field if composition is required
                 if (defField)
-                    aladinContent->SetDeformationField(NiftiImage(defField).disown());
+                    aladinContent->SetDeformationField(NiftiImage(defField));
 
                 // Do the calculation for Aladin
                 unique_ptr<Kernel> affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), aladinContent.get()) };
                 affineDeformKernel->castTo<AffineDeformationFieldKernel>()->Calculate(defField);
 
-                // Get the result
-                NiftiImage resDefField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image);
-
-                // Save for testing
-                testCases.push_back({ testName + " - Aladin", std::move(resDefField), expRes });
+                // Save the results for testing
+                testCases.push_back({ testName + " - Aladin", std::move(aladinContent->GetDeformationField()), expRes });
 
                 // Do the calculation also for Compute using Content
                 // Skip OpenCL as it is not supported
@@ -192,17 +189,14 @@ class AffineDeformationFieldTest {
 
                 // Set the deformation field if composition is required
                 if (defField)
-                    content->SetDeformationField(NiftiImage(defField).disown());
+                    content->SetDeformationField(NiftiImage(defField));
 
                 // Do the calculation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->GetAffineDeformationField(defField);
 
-                // Get the result
-                resDefField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image);
-
-                // Save for testing
-                testCases.push_back({ testName, std::move(resDefField), std::move(expRes) });
+                // Save the results for testing
+                testCases.push_back({ testName, std::move(content->GetDeformationField()), std::move(expRes) });
             }
         }
     }
diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp
index 2243ec2d..9283047c 100644
--- a/reg-test/reg_test_blockMatching.cpp
+++ b/reg-test/reg_test_blockMatching.cpp
@@ -101,14 +101,14 @@ class BMTest {
         testData.emplace_back(TestData(
             "BlockMatching 2D",
             reference2d,
-            NiftiImage(contentResampling2d->GetWarped()),
+            contentResampling2d->GetWarped(),
             mask2d.get()
         ));
         contentResampling2d.release();
         testData.emplace_back(TestData(
             "BlockMatching 3D",
             reference3d,
-            NiftiImage(contentResampling3d->GetWarped()),
+            contentResampling3d->GetWarped(),
             mask3d.get()
         ));
         contentResampling3d.release();
@@ -120,7 +120,6 @@ class BMTest {
             for (auto&& platformType : PlatformTypes) {
                 // Create images
                 NiftiImage referenceTest(reference);
-                NiftiImage warpedTest(warped);
 
                 // Create the contents
                 shared_ptr<Platform> platform{ new Platform(platformType) };
@@ -137,7 +136,7 @@ class BMTest {
                     100,
                     1
                 ) };
-                content->SetWarped(warpedTest.disown());
+                content->SetWarped(NiftiImage(warped));
 
                 // Initialise the block matching
                 unique_ptr<Kernel> bmKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), content.get()) };
diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp
index affaa42d..ba9395bf 100644
--- a/reg-test/reg_test_composeField.cpp
+++ b/reg-test/reg_test_composeField.cpp
@@ -137,12 +137,10 @@ class ComposeDeformationFieldTest {
                 unique_ptr<Content> content{ contentCreator->Create(reference, reference) };
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Run the compose
-                content->SetDeformationField(NiftiImage(outDefField).disown());
+                content->SetDeformationField(NiftiImage(outDefField));
                 compute->DefFieldCompose(defField);
-                // Get the result
-                NiftiImage resDefField(content->GetDeformationField(), NiftiImage::Copy::Image);
-                // Save for testing
-                testCases.push_back({ testName + " "s + platform->GetName(), std::move(resDefField), expDefField });
+                // Save the results for testing
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(content->GetDeformationField()), expDefField });
             }
         }
     }
diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp
index 6f39ef3c..b4c2d212 100644
--- a/reg-test/reg_test_conjugateGradient.cpp
+++ b/reg-test/reg_test_conjugateGradient.cpp
@@ -242,18 +242,14 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
             // Increase the precision for the output
             NR_COUT << std::fixed << std::setprecision(10);
 
-            // Set the control point grid
-            NiftiImage img = content->GetControlPointGrid();
-            // Use bestControlPointGrid to store bestDof during initialisation of the optimiser
-            img.copyData(bestControlPointGrid);
+            // Set the control point grid by using bestControlPointGrid to store bestDof during initialisation of the optimiser
+            content->F3dContent::GetControlPointGrid().copyData(bestControlPointGrid);
             content->UpdateControlPointGrid();
 
             // Set the transformation gradients
-            img = content->GetTransformationGradient();
-            img.copyData(transGrad);
+            content->F3dContent::GetTransformationGradient().copyData(transGrad);
             content->UpdateTransformationGradient();
-            img = contentBw->GetTransformationGradient();
-            img.copyData(transGradBw);
+            contentBw->F3dContent::GetTransformationGradient().copyData(transGradBw);
             contentBw->UpdateTransformationGradient();
 
             // Create a copy of the control point grid for expected results
@@ -266,8 +262,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
             UpdateControlPointPosition(controlPointGridExpected, bestControlPointGrid, transGrad, scale, optimiseX, optimiseY, optimiseZ);
 
             // Check the results
-            img = content->GetControlPointGrid();
-            const auto cppPtr = img.data();
+            const auto cppPtr = content->GetControlPointGrid().data();
             const auto cppExpPtr = controlPointGridExpected.data();
             for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) {
                 const float cppVal = cppPtr[i];
@@ -306,12 +301,10 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
                             gradientBwPtr[i] = distr(gen);
                     }
                     // Update the transformation gradients
-                    img = content->GetTransformationGradient();
-                    img.copyData(transGrad);
+                    content->F3dContent::GetTransformationGradient().copyData(transGrad);
                     content->UpdateTransformationGradient();
                     if (isSymmetric) {
-                        img = contentBw->GetTransformationGradient();
-                        img.copyData(transGradBw);
+                        contentBw->F3dContent::GetTransformationGradient().copyData(transGradBw);
                         contentBw->UpdateTransformationGradient();
                     }
 
@@ -320,13 +313,11 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") {
                     UpdateGradientValues(transGrad, false, isSymmetric, &transGradBw);
 
                     // Check the results
-                    img = content->GetTransformationGradient();
-                    const auto gradPtr = img.data();
+                    const auto gradPtr = content->GetTransformationGradient().data();
                     const auto gradExpPtr = transGrad.data();
                     NiftiImageData gradBwPtr, gradExpBwPtr;
                     if (isSymmetric) {
-                        img = contentBw->GetTransformationGradient();
-                        gradBwPtr = img.data();
+                        gradBwPtr = contentBw->GetTransformationGradient().data();
                         gradExpBwPtr = transGradBw.data();
                     }
                     for (size_t i = 0; i < transGrad.nVoxels(); ++i) {
diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp
index 8c6e0c67..5856d89e 100644
--- a/reg-test/reg_test_getDeformationField.cpp
+++ b/reg-test/reg_test_getDeformationField.cpp
@@ -114,10 +114,8 @@ class GetDeformationFieldTest {
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Compute the deformation field
                 compute->GetDeformationField(false, true); // no composition - use bspline
-                // Retrieve the deformation field
-                NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
-                // Save for testing
-                testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) });
+                // Save the results for testing
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(content->GetDeformationField()), std::move(expDefField) });
             }
         }
 
@@ -186,12 +184,10 @@ class GetDeformationFieldTest {
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 // Compute the deformation field
-                content->SetDeformationField(defField.disown());
+                content->SetDeformationField(std::move(defField));
                 compute->GetDeformationField(true, true); // with composition - use bspline
-                // Retrieve the deformation field
-                defField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image);
-                // Save for testing
-                testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) });
+                // Save the results for testing
+                testCases.push_back({ testName + " "s + platform->GetName(), std::move(content->GetDeformationField()), std::move(expDefField) });
             }
         }
     }
diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp
index 22e53ad1..310451c4 100644
--- a/reg-test/reg_test_imageGradient.cpp
+++ b/reg-test/reg_test_imageGradient.cpp
@@ -179,7 +179,7 @@ TEST_CASE("Image Gradient", "[unit]") {
                 NR_COUT << std::fixed << std::setprecision(10);
 
                 // Set the warped gradient image to host the computation
-                NiftiImage warpedGradient(content->GetWarpedGradient());
+                NiftiImage& warpedGradient = content->GetWarpedGradient();
                 warpedGradient.setDim(NiftiDim::NDim, defField->ndim);
                 warpedGradient.setDim(NiftiDim::X, 1);
                 warpedGradient.setDim(NiftiDim::Y, 1);
@@ -188,14 +188,14 @@ TEST_CASE("Image Gradient", "[unit]") {
                 warpedGradient.recalcVoxelNumber();
 
                 // Set the deformation field
-                content->SetDeformationField(defField.disown());
+                content->SetDeformationField(std::move(defField));
 
                 // Do the computation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->GetImageGradient(interp, 0, 0);
 
                 // Check all values
-                warpedGradient = content->GetWarpedGradient();
+                content->GetWarpedGradient();
                 const auto warpedGradPtr = warpedGradient.data();
                 const size_t nVoxels = warpedGradient.nVoxels();
                 for (size_t i = 0; i < nVoxels; ++i) {
diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp
index c46e817e..de8df81f 100644
--- a/reg-test/reg_test_interpolation.cpp
+++ b/reg-test/reg_test_interpolation.cpp
@@ -208,10 +208,10 @@ TEST_CASE("Interpolation", "[unit]") {
                 warped.setDim(NiftiDim::Z, 1);
                 warped.setDim(NiftiDim::U, 1);
                 warped.realloc();
-                content->SetWarped(warped.disown());
+                content->SetWarped(std::move(warped));
 
                 // Set the deformation field
-                content->SetDeformationField(defField.disown());
+                content->SetDeformationField(std::move(defField));
 
                 // Do the computation
                 if (isAladinContent) {
@@ -223,7 +223,7 @@ TEST_CASE("Interpolation", "[unit]") {
                 }
 
                 // Check all values
-                warped = content->GetWarped();
+                warped = std::move(content->GetWarped());
                 const auto warpedPtr = warped.data();
                 const size_t nVoxels = warped.nVoxels();
                 for (size_t i = 0; i < nVoxels; ++i) {
diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp
index e1bcd0ad..f37d0b1c 100644
--- a/reg-test/reg_test_lncc.cpp
+++ b/reg-test/reg_test_lncc.cpp
@@ -150,7 +150,7 @@ class LnccTest {
                 // Initialise the warped image using the nearest-neighbour interpolation
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->ResampleImage(0, 0);
-                content->SetWarped(floating.disown());
+                content->SetWarped(NiftiImage(floating));
                 // Create the measure creator
                 unique_ptr<MeasureCreator> measureCreator{ platform->CreateMeasureCreator() };
                 // Use LNCC as a measure
@@ -159,7 +159,7 @@ class LnccTest {
                 measure_lncc->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measureCreator->Initialise(*measure_lncc, *content);
                 const double lncc = measure_lncc->GetSimilarityMeasureValue();
-                // Save for testing
+                // Save the results for testing
                 testCases.push_back({ testName, lncc, expLncc });
             }
         }
diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp
index d3a2770e..c0c7f9f2 100644
--- a/reg-test/reg_test_nmi.cpp
+++ b/reg-test/reg_test_nmi.cpp
@@ -81,7 +81,7 @@ class NmiTest {
                 // Create the content
                 unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Initialise the warped image using floating image
-                content->SetWarped(floating.disown());
+                content->SetWarped(NiftiImage(floating));
                 // Create the measure creator
                 unique_ptr<MeasureCreator> measureCreator{ platform->CreateMeasureCreator() };
                 // Use NMI as a measure
@@ -89,7 +89,7 @@ class NmiTest {
                 measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0
                 measureCreator->Initialise(*measure_nmi, *content);
                 const double nmi = measure_nmi->GetSimilarityMeasureValue();
-
+                // Save the results for testing
                 testCases.push_back({ testName + " " + platform->GetName(), nmi, expected });
             }
         }
diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp
index 5342e1b1..2ab098b9 100644
--- a/reg-test/reg_test_nmi_gradient.cpp
+++ b/reg-test/reg_test_nmi_gradient.cpp
@@ -81,7 +81,7 @@ class NmiGradientTest {
                 // Create the content
                 unique_ptr<DefContent> content{ contentCreator->Create(reference, floating) };
                 // Add some displacements to the deformation field to avoid grid effect
-                nifti_image *defField = content->Content::GetDeformationField();
+                NiftiImage& defField = content->Content::GetDeformationField();
                 float *defPtr = static_cast<float*>(defField->data);
                 for (size_t index = 0; index < defField->nvox; ++index)
                     defPtr[index] += 0.1f;
@@ -101,9 +101,9 @@ class NmiGradientTest {
                 // Compute the NMI gradient
                 measure_nmi->GetVoxelBasedSimilarityMeasureGradient(0);
                 // Create an image to store the gradient values
-                NiftiImage gradientImage(content->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
+                NiftiImage gradientImage(content->GetVoxelBasedMeasureGradient());
                 // Create an image to store the expected gradient values
-                NiftiImage expectedGradientImage(content->GetDeformationField(), NiftiImage::Copy::Image);
+                NiftiImage expectedGradientImage(content->GetDeformationField());
                 // Apply perturbations to each value in the deformation field
                 float *gradPtr = static_cast<float*>(expectedGradientImage->data);
                 constexpr float delta = 0.00001f;
diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp
index 08e9b0d3..f77cd68c 100644
--- a/reg-test/reg_test_normaliseGradient.cpp
+++ b/reg-test/reg_test_normaliseGradient.cpp
@@ -98,8 +98,7 @@ class NormaliseGradientTest {
                             unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
 
                             // Set the transformation gradient image to host the computation
-                            NiftiImage transGrad = content->GetTransformationGradient();
-                            transGrad.copyData(expTransGrad);
+                            content->F3dContent::GetTransformationGradient().copyData(expTransGrad);
                             content->UpdateTransformationGradient();
 
                             // Calculate the maximal length
@@ -111,11 +110,8 @@ class NormaliseGradientTest {
                             compute->NormaliseGradient(expMaxLength, optimiseX, optimiseY, optimiseZ);
                             NormaliseGradient<float>(expTransGrad, expMaxLength, optimiseX, optimiseY, optimiseZ);
 
-                            // Get the results
-                            transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image);
-
-                            // Save for testing
-                            testCases.push_back({ testName, maxLength, expMaxLength, std::move(transGrad), std::move(expTransGrad) });
+                            // Save the results for testing
+                            testCases.push_back({ testName, maxLength, expMaxLength, std::move(content->GetTransformationGradient()), std::move(expTransGrad) });
                         }
                     }
                 }
diff --git a/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp
index a2a01bdf..ac30c7c0 100644
--- a/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp
@@ -112,12 +112,9 @@ class ApproxBendingEnergyGradientTest {
             computeCpu->ApproxBendingEnergyGradient(weight);
             computeCuda->ApproxBendingEnergyGradient(weight);
 
-            // Get the transformation gradients
-            NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
-            NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
-
-            // Save for testing
-            testCases.push_back({ testName, approxBendingEnergyCpu, approxBendingEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) });
+            // Save the results for testing
+            testCases.push_back({ testName, approxBendingEnergyCpu, approxBendingEnergyCuda,
+                                std::move(contentCpu->GetTransformationGradient()), std::move(contentCuda->GetTransformationGradient()) });
         }
     }
 };
diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
index 530d404b..ab66f84f 100644
--- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
+++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp
@@ -114,12 +114,9 @@ class ApproxLinearEnergyGradientTest {
             computeCpu->ApproxLinearEnergyGradient(weight);
             computeCuda->ApproxLinearEnergyGradient(weight);
 
-            // Get the transformation gradients
-            NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
-            NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
-
-            // Save for testing
-            testCases.push_back({ testName, approxLinearEnergyCpu, approxLinearEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) });
+            // Save the results for testing
+            testCases.push_back({ testName, approxLinearEnergyCpu, approxLinearEnergyCuda,
+                                std::move(contentCpu->GetTransformationGradient()), std::move(contentCuda->GetTransformationGradient()) });
         }
     }
 };
diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp
index 8676f005..076a9945 100644
--- a/reg-test/reg_test_regr_blockMatching.cpp
+++ b/reg-test/reg_test_regr_blockMatching.cpp
@@ -98,8 +98,8 @@ class BMTest {
             ) };
 
             // Initialise the warped images
-            contentCpu->SetWarped(warpedCpu.disown());
-            contentCuda->SetWarped(warpedCuda.disown());
+            contentCpu->SetWarped(std::move(warpedCpu));
+            contentCuda->SetWarped(std::move(warpedCuda));
 
             // Initialise the block matching
             unique_ptr<BlockMatchingKernel> kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
diff --git a/reg-test/reg_test_regr_exponentiateGradient.cpp b/reg-test/reg_test_regr_exponentiateGradient.cpp
index db24ff79..69e0ee40 100644
--- a/reg-test/reg_test_regr_exponentiateGradient.cpp
+++ b/reg-test/reg_test_regr_exponentiateGradient.cpp
@@ -115,25 +115,25 @@ class ExponentiateGradientTest {
             unique_ptr<F3dContent> contentBwCuda{ new CudaF3dContent(referenceBwCuda, referenceBwCuda, cppBwCuda) };
 
             // Set the deformation fields
-            contentCpu->SetDeformationField(defFieldCpu.disown());
-            contentCuda->SetDeformationField(defFieldCuda.disown());
+            contentCpu->SetDeformationField(std::move(defFieldCpu));
+            contentCuda->SetDeformationField(std::move(defFieldCuda));
 
             // Set the voxel-based measure gradient images
-            NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient();
-            voxelGrad->sform_code = voxelBasedGrad->sform_code;
-            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
-            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
-            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
-            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
-            voxelGrad.copyData(voxelBasedGrad);
+            NiftiImage& voxelGradCpu = contentCpu->GetVoxelBasedMeasureGradient();
+            voxelGradCpu->sform_code = voxelBasedGrad->sform_code;
+            voxelGradCpu->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGradCpu->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGradCpu->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGradCpu->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGradCpu.copyData(voxelBasedGrad);
             contentCpu->UpdateVoxelBasedMeasureGradient();
-            voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
-            voxelGrad->sform_code = voxelBasedGrad->sform_code;
-            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
-            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
-            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
-            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
-            voxelGrad.copyData(voxelBasedGrad);
+            NiftiImage& voxelGradCuda = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
+            voxelGradCuda->sform_code = voxelBasedGrad->sform_code;
+            voxelGradCuda->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGradCuda->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGradCuda->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGradCuda->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGradCuda.copyData(voxelBasedGrad);
             contentCuda->UpdateVoxelBasedMeasureGradient();
 
             // Create the computes
@@ -144,12 +144,8 @@ class ExponentiateGradientTest {
             computeCpu->ExponentiateGradient(*contentBwCpu);
             computeCuda->ExponentiateGradient(*contentBwCuda);
 
-            // Get the results
-            NiftiImage voxelGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
-            NiftiImage voxelGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
-
-            // Save for testing
-            testCases.push_back({ testName, std::move(voxelGradCpu), std::move(voxelGradCuda) });
+            // Save the results for testing
+            testCases.push_back({ testName, std::move(contentCpu->GetVoxelBasedMeasureGradient()), std::move(contentCuda->GetVoxelBasedMeasureGradient()) });
         }
     }
 };
diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp
index a4e8cc11..14daeb93 100644
--- a/reg-test/reg_test_regr_getDeformationField.cpp
+++ b/reg-test/reg_test_regr_getDeformationField.cpp
@@ -72,14 +72,13 @@ class GetDeformationFieldTest {
                         testName += " "s + platform->GetName() + " Composition="s + std::to_string(composition) + " Bspline="s + std::to_string(bspline);
                         unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
                         unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
-                        NiftiImage expDefField(content->Content::GetDeformationField(), NiftiImage::Copy::Image);
+                        NiftiImage expDefField(content->Content::GetDeformationField());
                         // Compute the deformation field
                         compute->GetDeformationField(composition, bspline);
-                        NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image);
                         // Compute the expected deformation field
                         GetDeformationField<float>(controlPointGrid, expDefField, content->GetReferenceMask(), composition, bspline);
-                        // Save for testing
-                        testCases.push_back({ std::move(testName), std::move(defField), std::move(expDefField) });
+                        // Save the results for testing
+                        testCases.push_back({ std::move(testName), std::move(content->GetDeformationField()), std::move(expDefField) });
                     }
                 }
             }
diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp
index a65e4879..6d5f6a63 100644
--- a/reg-test/reg_test_regr_kernelConvolution.cpp
+++ b/reg-test/reg_test_regr_kernelConvolution.cpp
@@ -121,8 +121,8 @@ class KernelConvolutionTest {
             ) };
 
             // Use deformation fields to store images
-            contentCpu->SetDeformationField(imageCpu.disown());
-            contentCuda->SetDeformationField(imageCuda.disown());
+            contentCpu->SetDeformationField(std::move(imageCpu));
+            contentCuda->SetDeformationField(std::move(imageCuda));
 
             // Create the kernel convolution function for CUDA
             auto cudaKernelConvolution = Cuda::KernelConvolution<ConvKernelType(0)>;
@@ -136,12 +136,8 @@ class KernelConvolutionTest {
             reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), ConvKernelType(kernelType), nullptr, activeTimePoints, activeAxes);
             cudaKernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), activeTimePoints, activeAxes);
 
-            // Get the images
-            imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image);
-            imageCuda = NiftiImage(contentCuda->GetDeformationField(), NiftiImage::Copy::Image);
-
-            // Save for testing
-            testCases.push_back({ testName, std::move(imageCpu), std::move(imageCuda) });
+            // Save the results for testing
+            testCases.push_back({ testName, std::move(contentCpu->GetDeformationField()), std::move(contentCuda->GetDeformationField()) });
         }
     }
 };
diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp
index 681a8ffc..d215df69 100644
--- a/reg-test/reg_test_regr_lts.cpp
+++ b/reg-test/reg_test_regr_lts.cpp
@@ -110,8 +110,8 @@ class LtsTest {
             ) };
 
             // Initialise the warped images
-            contentCpu->SetWarped(warpedCpu.disown());
-            contentCuda->SetWarped(warpedCuda.disown());
+            contentCpu->SetWarped(std::move(warpedCpu));
+            contentCuda->SetWarped(std::move(warpedCuda));
 
             // Initialise the block matching and run it on the CPU
             unique_ptr<BlockMatchingKernel> bmKernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) };
diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp
index 08b25515..3deba91b 100644
--- a/reg-test/reg_test_regr_measure.cpp
+++ b/reg-test/reg_test_regr_measure.cpp
@@ -113,8 +113,8 @@ class MeasureTest {
             NiftiImage localWeightSimCpu(localWeightSim), localWeightSimCuda(localWeightSim);
 
             // Create the contents
-            auto contentsCpu = contentCreatorCpu->Create(referenceCpu, floatingCpu, controlPointGridCpu, controlPointGridCpuBw, localWeightSimCpu, nullptr, nullptr, nullptr, nullptr, sizeof(float));
-            auto contentsCuda = contentCreatorCuda->Create(referenceCuda, floatingCuda, controlPointGridCuda, controlPointGridCudaBw, localWeightSimCuda, nullptr, nullptr, nullptr, nullptr, sizeof(float));
+            auto contentsCpu = contentCreatorCpu->Create(referenceCpu, floatingCpu, controlPointGridCpu, controlPointGridCpuBw, &localWeightSimCpu, nullptr, nullptr, nullptr, nullptr, sizeof(float));
+            auto contentsCuda = contentCreatorCuda->Create(referenceCuda, floatingCuda, controlPointGridCuda, controlPointGridCudaBw, &localWeightSimCuda, nullptr, nullptr, nullptr, nullptr, sizeof(float));
             if (!isSymmetric) {
                 delete contentsCpu.second;
                 delete contentsCuda.second;
@@ -184,12 +184,9 @@ class MeasureTest {
                 measureCuda->GetVoxelBasedSimilarityMeasureGradient(t);
             }
 
-            // Get the voxel-based similarity measure gradients
-            NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
-            NiftiImage voxelBasedGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image);
-
-            // Save for testing
-            testCases.push_back({ testName, simMeasureCpu, simMeasureCuda, std::move(voxelBasedGradCpu), std::move(voxelBasedGradCuda) });
+            // Save the results for testing
+            testCases.push_back({ testName, simMeasureCpu, simMeasureCuda, std::move(contentCpu->GetVoxelBasedMeasureGradient()),
+                                std::move(contentCuda->GetVoxelBasedMeasureGradient()) });
         }
     }
 };
diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp
index 0eadbce3..5a24573b 100644
--- a/reg-test/reg_test_regr_resampleGradient.cpp
+++ b/reg-test/reg_test_regr_resampleGradient.cpp
@@ -92,25 +92,25 @@ class ResampleGradientTest {
             unique_ptr<DefContent> contentCuda{ new CudaDefContent(referenceCuda, referenceCuda) };
 
             // Set the deformation fields
-            contentCpu->SetDeformationField(defFieldCpu.disown());
-            contentCuda->SetDeformationField(defFieldCuda.disown());
+            contentCpu->SetDeformationField(std::move(defFieldCpu));
+            contentCuda->SetDeformationField(std::move(defFieldCuda));
 
             // Set the voxel-based measure gradient images
-            NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient();
-            voxelGrad->sform_code = voxelBasedGrad->sform_code;
-            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
-            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
-            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
-            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
-            voxelGrad.copyData(voxelBasedGrad);
+            NiftiImage& voxelGradCpu = contentCpu->GetVoxelBasedMeasureGradient();
+            voxelGradCpu->sform_code = voxelBasedGrad->sform_code;
+            voxelGradCpu->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGradCpu->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGradCpu->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGradCpu->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGradCpu.copyData(voxelBasedGrad);
             contentCpu->UpdateVoxelBasedMeasureGradient();
-            voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
-            voxelGrad->sform_code = voxelBasedGrad->sform_code;
-            voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk;
-            voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz;
-            voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk;
-            voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz;
-            voxelGrad.copyData(voxelBasedGrad);
+            NiftiImage& voxelGradCuda = contentCuda->DefContent::GetVoxelBasedMeasureGradient();
+            voxelGradCuda->sform_code = voxelBasedGrad->sform_code;
+            voxelGradCuda->qto_ijk = voxelBasedGrad->qto_ijk;
+            voxelGradCuda->qto_xyz = voxelBasedGrad->qto_xyz;
+            voxelGradCuda->sto_ijk = voxelBasedGrad->sto_ijk;
+            voxelGradCuda->sto_xyz = voxelBasedGrad->sto_xyz;
+            voxelGradCuda.copyData(voxelBasedGrad);
             contentCuda->UpdateVoxelBasedMeasureGradient();
 
             // Create the computes
@@ -121,7 +121,7 @@ class ResampleGradientTest {
             NiftiImage warpedCpu = computeCpu->ResampleGradient(1, -2.f);
             NiftiImage warpedCuda = computeCuda->ResampleGradient(1, -2.f);
 
-            // Save for testing
+            // Save the results for testing
             testCases.push_back({ testName, std::move(warpedCpu), std::move(warpedCuda) });
         }
     }
diff --git a/reg-test/reg_test_regr_symmetriseVelocityFields.cpp b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp
index d7149814..0abbd3b4 100644
--- a/reg-test/reg_test_regr_symmetriseVelocityFields.cpp
+++ b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp
@@ -103,11 +103,11 @@ class SymmetriseVelocityFieldsTest {
             computeCpu->SymmetriseVelocityFields(*contentBwCpu);
             computeCuda->SymmetriseVelocityFields(*contentBwCuda);
 
-            // Get the results of CUDA since CPU results are already inplace
-            contentCuda->GetControlPointGrid();
-            contentBwCuda->GetControlPointGrid();
+            // Since CPU results are already inplace, get CUDA results by destructing their contents
+            contentCuda = nullptr;
+            contentBwCuda = nullptr;
 
-            // Save for testing
+            // Save the results for testing
             testCases.push_back({ testName, std::move(cppCpu), std::move(cppBwCpu), std::move(cppCuda), std::move(cppBwCuda) });
         }
     }
diff --git a/reg-test/reg_test_regr_updateVelocityField.cpp b/reg-test/reg_test_regr_updateVelocityField.cpp
index 5e85062b..4d40e18a 100644
--- a/reg-test/reg_test_regr_updateVelocityField.cpp
+++ b/reg-test/reg_test_regr_updateVelocityField.cpp
@@ -78,11 +78,9 @@ class UpdateVelocityFieldTest {
                         unique_ptr<F3dContent> contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) };
 
                         // Set the transformation gradient image to host the computation
-                        NiftiImage transGradCpu = contentCpu->GetTransformationGradient();
-                        transGradCpu.copyData(transGrad);
+                        contentCpu->GetTransformationGradient().copyData(transGrad);
                         contentCpu->UpdateTransformationGradient();
-                        NiftiImage transGradCuda = contentCuda->GetTransformationGradient();
-                        transGradCuda.copyData(transGrad);
+                        contentCuda->F3dContent::GetTransformationGradient().copyData(transGrad);
                         contentCuda->UpdateTransformationGradient();
 
                         // Create the computes
@@ -93,12 +91,9 @@ class UpdateVelocityFieldTest {
                         computeCpu->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
                         computeCuda->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ);
 
-                        // Get the results
-                        transGradCpu = NiftiImage(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image);
-                        transGradCuda = NiftiImage(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image);
-
-                        // Save for testing
-                        testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) });
+                        // Save the results for testing
+                        testCases.push_back({ testName, std::move(contentCpu->GetTransformationGradient()),
+                                            std::move(contentCuda->GetTransformationGradient()) });
                     }
                 }
             }
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index 7d807217..b588989a 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -90,11 +90,11 @@ class VoxelCentricToNodeCentricTest {
                 unique_ptr<F3dContent> content{ contentCreator->Create(reference, reference, controlPointGrid) };
 
                 // Set the matrices required for computation
-                nifti_image *floating = content->Content::GetFloating();
+                NiftiImage& floating = content->Content::GetFloating();
                 if (floating->sform_code > 0)
                     floating->sto_ijk = matrices[0];
                 else floating->qto_ijk = matrices[0];
-                NiftiImage transGrad = content->F3dContent::GetTransformationGradient();
+                NiftiImage& transGrad = content->F3dContent::GetTransformationGradient();
                 static int sfc = 0;
                 transGrad->sform_code = sfc++ % 2;
                 if (transGrad->sform_code > 0)
@@ -104,7 +104,7 @@ class VoxelCentricToNodeCentricTest {
                 nifti_add_extension(transGrad, reinterpret_cast<const char*>(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE);
 
                 // Set the voxel-based measure gradient to host the computation
-                NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient();
+                NiftiImage& voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient();
                 if (voxelGrad->sform_code > 0)
                     voxelGrad->sto_ijk = matrices[3];
                 else voxelGrad->qto_ijk = matrices[3];
@@ -119,25 +119,26 @@ class VoxelCentricToNodeCentricTest {
                 // Extract the node-based NMI gradient from the voxel-based NMI gradient
                 unique_ptr<Compute> compute{ platform->CreateCompute(*content) };
                 compute->VoxelCentricToNodeCentric(weight);
-                transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image);
 
-                testCases.push_back({ testName + " "s + platform->GetName() + " Weight="s + std::to_string(weight), std::move(transGrad), std::move(expTransGrad) });
+                // Save the results for testing
+                testCases.push_back({ testName + " "s + platform->GetName() + " Weight="s + std::to_string(weight),
+                                    std::move(content->GetTransformationGradient()), std::move(expTransGrad) });
             }
         }
     }
 
     template<typename DataType>
-    void VoxelCentricToNodeCentric(const nifti_image *floating, NiftiImage& nodeGrad, const NiftiImage& voxelGrad, float weight) {
+    void VoxelCentricToNodeCentric(const NiftiImage& floating, NiftiImage& nodeGrad, const NiftiImage& voxelGrad, float weight) {
         const mat44 *voxelToMillimetre = floating->sform_code > 0 ? &floating->sto_ijk : &floating->qto_ijk;
         const bool is3d = nodeGrad->nz > 1;
 
-        const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeGrad, 3);
+        const size_t nodeNumber = nodeGrad.nVoxelsPerVolume();
         auto nodePtr = nodeGrad.data();
         auto nodePtrX = nodePtr.begin();
         auto nodePtrY = nodePtrX + nodeNumber;
         auto nodePtrZ = nodePtrY + nodeNumber;
 
-        const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelGrad, 3);
+        const size_t voxelNumber = voxelGrad.nVoxelsPerVolume();
         auto voxelPtr = voxelGrad.data();
         auto voxelPtrX = voxelPtr.begin();
         auto voxelPtrY = voxelPtrX + voxelNumber;

From 7e1e926623f988cdf1a81f9cbca4cc3d3ca0aec3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 27 Aug 2024 22:29:52 +0100
Subject: [PATCH 313/314] Revert "Use float gam instead of double in
 CudaOptimiser"

This reverts commit b9c9beca65c9c7f6862e1c1ca50c70eaafd1fbfa.
---
 niftyreg_build_version.txt    | 2 +-
 reg-lib/cuda/CudaOptimiser.cu | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index 21fbd2eb..e828e5d0 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-433
+434
diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu
index 7a4e13a7..9b1618c9 100644
--- a/reg-lib/cuda/CudaOptimiser.cu
+++ b/reg-lib/cuda/CudaOptimiser.cu
@@ -236,7 +236,7 @@ void GetConjugateGradient(float4 *gradientCuda,
         return make_double2(dgg, gg);
     };
 
-    float gam;
+    double gam;
     thrust::counting_iterator<int> it(0);
     const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) {
         return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index);
@@ -246,8 +246,8 @@ void GetConjugateGradient(float4 *gradientCuda,
         const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) {
             return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index);
         }, make_double2(0, 0), thrust::plus<double2>());
-        gam = static_cast<float>((gg.x + ggBw.x) / (gg.y + ggBw.y));
-    } else gam = static_cast<float>(gg.x / gg.y);
+        gam = (gg.x + ggBw.x) / (gg.y + ggBw.y);
+    } else gam = gg.x / gg.y;
 
     // Conjugate gradient
     auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda,

From a148f146a44e5ee30281a89063df1a7be7e35503 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Onur=20=C3=9Clgen?= <onur.ulgen@kcl.ac.uk>
Date: Tue, 27 Aug 2024 22:42:59 +0100
Subject: [PATCH 314/314] Fix Floor(), Ceil(), and Round() functions

---
 niftyreg_build_version.txt                    |  2 +-
 reg-apps/reg_aladin.cpp                       |  2 +-
 reg-apps/reg_benchmark.cpp                    | 60 +++++++--------
 reg-apps/reg_f3d.cpp                          |  2 +-
 reg-apps/reg_ppcnr.cpp                        |  2 +-
 reg-apps/reg_resample.cpp                     |  2 +-
 reg-apps/reg_tools.cpp                        | 16 ++--
 reg-apps/reg_transform.cpp                    | 14 ++--
 reg-lib/_reg_base.cpp                         |  8 +-
 reg-lib/cpu/Maths.hpp                         | 22 +++---
 reg-lib/cpu/_reg_blockMatching.cpp            |  6 +-
 reg-lib/cpu/_reg_localTrans.cpp               | 66 ++++++++--------
 reg-lib/cpu/_reg_localTrans_jac.cpp           | 40 +++++-----
 reg-lib/cpu/_reg_localTrans_regul.cpp         | 12 +--
 reg-lib/cpu/_reg_resampling.cpp               | 76 +++++++++----------
 reg-lib/cpu/_reg_ssd.cpp                      | 12 +--
 reg-lib/cpu/_reg_tools.cpp                    |  8 +-
 reg-lib/cuda/CudaLocalTransformation.cu       | 26 +++----
 .../cuda/CudaLocalTransformationKernels.cu    | 32 ++++----
 reg-lib/cuda/CudaResampling.cu                |  6 +-
 reg-lib/cuda/CudaToolsKernels.cu              |  2 +-
 reg-lib/cuda/resampleKernel.cu                | 10 +--
 .../reg_test_voxelCentricToNodeCentric.cpp    |  2 +-
 23 files changed, 214 insertions(+), 214 deletions(-)

diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt
index e828e5d0..5910394b 100644
--- a/niftyreg_build_version.txt
+++ b/niftyreg_build_version.txt
@@ -1 +1 @@
-434
+435
diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp
index 6cf515a4..ff41859b 100755
--- a/reg-apps/reg_aladin.cpp
+++ b/reg-apps/reg_aladin.cpp
@@ -455,7 +455,7 @@ int main(int argc, char **argv) {
 
     time_t end;
     time(&end);
-    const int minutes = Floor((end - start) / 60.0f);
+    const int minutes = Floor<int>((end - start) / 60.0f);
     const int seconds = static_cast<int>(end - start) - 60 * minutes;
     NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
     NR_VERBOSE_APP("Have a good day!");
diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp
index dd439f62..8ff35f9a 100644
--- a/reg-apps/reg_benchmark.cpp
+++ b/reg-apps/reg_benchmark.cpp
@@ -122,9 +122,9 @@ int main(int argc, char **argv)
 
    // A control point image is created
    dim_img[0]=5;
-   dim_img[1]=Floor(targetImage->nx*targetImage->dx/gridSpacing)+4;
-   dim_img[2]=Floor(targetImage->ny*targetImage->dy/gridSpacing)+4;
-   dim_img[3]=Floor(targetImage->nz*targetImage->dz/gridSpacing)+4;
+   dim_img[1]=Floor<int>(targetImage->nx*targetImage->dx/gridSpacing)+4;
+   dim_img[2]=Floor<int>(targetImage->ny*targetImage->dy/gridSpacing)+4;
+   dim_img[3]=Floor<int>(targetImage->nz*targetImage->dz/gridSpacing)+4;
    dim_img[5]=3;
    dim_img[4]=dim_img[6]=dim_img[7]=1;
    nifti_image *controlPointImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true);
@@ -245,7 +245,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf( "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -261,7 +261,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -314,7 +314,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -345,7 +345,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds);
@@ -362,7 +362,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds);
@@ -395,7 +395,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -416,7 +416,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -447,7 +447,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -466,7 +466,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -525,7 +525,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -558,7 +558,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -588,9 +588,9 @@ int main(int argc, char **argv)
       maxIt=10000 / dimension;
 //        maxIt=1;
       int smoothingRadius[3];
-      smoothingRadius[0] = Floor( 2.0*controlPointImage->dx/targetImage->dx );
-      smoothingRadius[1] = Floor( 2.0*controlPointImage->dy/targetImage->dy );
-      smoothingRadius[2] = Floor( 2.0*controlPointImage->dz/targetImage->dz );
+      smoothingRadius[0] = Floor<int>( 2.0*controlPointImage->dx/targetImage->dx );
+      smoothingRadius[1] = Floor<int>( 2.0*controlPointImage->dy/targetImage->dy );
+      smoothingRadius[2] = Floor<int>( 2.0*controlPointImage->dz/targetImage->dz );
       time(&start);
       for(int i=0; i<maxIt; ++i)
       {
@@ -599,7 +599,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -620,7 +620,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -650,7 +650,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -665,7 +665,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -690,7 +690,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -708,7 +708,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -729,7 +729,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -743,7 +743,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -764,7 +764,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -778,7 +778,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -831,7 +831,7 @@ int main(int argc, char **argv)
       }
       time(&end);
       cpuTime=(end-start);
-      minutes = Floor(float(cpuTime)/60.0f);
+      minutes = Floor<int>(float(cpuTime)/60.0f);
       seconds = (int)(cpuTime - 60*minutes);
       printf("CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
       fprintf(outputFile, "CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
@@ -852,7 +852,7 @@ int main(int argc, char **argv)
          }
          time(&end);
          gpuTime=(end-start);
-         minutes = Floor(float(gpuTime)/60.0f);
+         minutes = Floor<int>(float(gpuTime)/60.0f);
          seconds = (int)(gpuTime - 60*minutes);
          printf("GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
          fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds);
diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp
index e0b3fe48..e8b16d53 100755
--- a/reg-apps/reg_f3d.cpp
+++ b/reg-apps/reg_f3d.cpp
@@ -711,7 +711,7 @@ int main(int argc, char **argv) {
 
     time_t end;
     time(&end);
-    const int minutes = Floor((end - start) / 60.0f);
+    const int minutes = Floor<int>((end - start) / 60.0f);
     const int seconds = static_cast<int>(end - start) - 60 * minutes;
     NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec");
     NR_VERBOSE_APP("Have a good day!");
diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp
index e3c664f3..d3ad209a 100755
--- a/reg-apps/reg_ppcnr.cpp
+++ b/reg-apps/reg_ppcnr.cpp
@@ -963,7 +963,7 @@ int main(int argc, char **argv)
 
    time_t end;
    time( &end );
-   int minutes = Floor(float(end-start)/60.0f);
+   int minutes = Floor<int>(float(end-start)/60.0f);
    int seconds = (int)(end-start - 60*minutes);
    NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n";
    if(flag->locality)
diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp
index bfcfe963..fc2590ed 100755
--- a/reg-apps/reg_resample.cpp
+++ b/reg-apps/reg_resample.cpp
@@ -483,7 +483,7 @@ int main(int argc, char **argv)
                                   param->interpolation,
                                   param->paddingValue,
                                   jacobian,
-                                  (char)Round(param->PSF_Algorithm));
+                                  Round<char>(param->PSF_Algorithm));
             NR_DEBUG("PSF resampling completed");
             free(jacobian);
          }
diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp
index 033077e5..a4172fa0 100755
--- a/reg-apps/reg_tools.cpp
+++ b/reg-apps/reg_tools.cpp
@@ -499,8 +499,8 @@ int main(int argc, char **argv)
         reg_tools_changeDatatype<float>(image);
         nifti_image *normImage = nifti_dup(*image);
         HeapSort(static_cast<float *>(normImage->data), normImage->nvox);
-        float minValue = static_cast<float *>(normImage->data)[Floor(03*(int)normImage->nvox/100)];
-        float maxValue = static_cast<float *>(normImage->data)[Floor(97*(int)normImage->nvox/100)];
+        float minValue = static_cast<float *>(normImage->data)[Floor<size_t>(0.03*normImage->nvox)];
+        float maxValue = static_cast<float *>(normImage->data)[Floor<size_t>(0.97*normImage->nvox)];
         reg_tools_subtractValueFromImage(image,normImage,minValue);
         reg_tools_divideValueToImage(normImage,normImage,maxValue-minValue);
         if(flag->outputImageFlag)
@@ -803,10 +803,10 @@ int main(int argc, char **argv)
         // Define the size of the new image
         int newDim[8];
         for(size_t i=0; i<8; ++i) newDim[i]=image->dim[i];
-        newDim[1]=Ceil((float)image->dim[1]*image->pixdim[1]/param->pixdimX);
-        newDim[2]=Ceil((float)image->dim[2]*image->pixdim[2]/param->pixdimY);
+        newDim[1]=Ceil<int>((float)image->dim[1]*image->pixdim[1]/param->pixdimX);
+        newDim[2]=Ceil<int>((float)image->dim[2]*image->pixdim[2]/param->pixdimY);
         if(image->nz>1)
-            newDim[3]=Ceil((float)image->dim[3]*image->pixdim[3]/param->pixdimZ);
+            newDim[3]=Ceil<int>((float)image->dim[3]*image->pixdim[3]/param->pixdimZ);
         // Create the new image
         nifti_image *newImg=nifti_make_new_nim(newDim,image->datatype,true);
         newImg->pixdim[1]=newImg->dx=param->pixdimX;
@@ -954,7 +954,7 @@ int main(int argc, char **argv)
                 for(int y=0; y<image->ny; ++y){
                     for(int x=0; x<image->nx; ++x){
                         size_t outIndex = ((z*image->ny+y)*image->nx+x)*image->nt*image->nu+t;
-                        outPtr[outIndex] = Round(*inPtr);
+                        outPtr[outIndex] = Round<unsigned char>(*inPtr);
                         ++inPtr;
                     }
                 }
@@ -997,8 +997,8 @@ int main(int argc, char **argv)
                     float value = *inPtr * 255.f;
                     size_t outIndex = ((z*image->ny+y)*image->nx+x)*3;
                     if (value > 0)
-                        outPtr[outIndex] = static_cast<unsigned char>(Round(value>255?255:value));
-                    else outPtr[outIndex+1] = static_cast<unsigned char>(Round(-value<-255?-255:-value));
+                        outPtr[outIndex] = Round<unsigned char>(value > 255 ? 255 : value);
+                    else outPtr[outIndex + 1] = Round<unsigned char>(-value < -255 ? -255 : -value);
                     outPtr[outIndex+2] = 0;
                     ++inPtr;
                 }
diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp
index cb19fd66..5b796154 100755
--- a/reg-apps/reg_transform.cpp
+++ b/reg-apps/reg_transform.cpp
@@ -398,7 +398,7 @@ int main(int argc, char **argv) {
             if (affineTransformation != nullptr) {
                 reg_affine_getDeformationField(affineTransformation, outputTransformationImage);
             } else {
-                switch (Round(inputTransformationImage->intent_p1)) {
+                switch (Round<int>(inputTransformationImage->intent_p1)) {
                 case DEF_FIELD:
                     NR_INFO("The specified transformation is a deformation field:");
                     NR_INFO(inputTransformationImage->fname);
@@ -468,7 +468,7 @@ int main(int argc, char **argv) {
         }
         // Save the generated transformation
         reg_io_WriteImageFile(outputTransformationImage, param->outputTransName);
-        switch (Round(outputTransformationImage->intent_p1)) {
+        switch (Round<int>(outputTransformationImage->intent_p1)) {
         case DEF_FIELD:
             NR_INFO("The deformation field has been saved as:");
             NR_INFO(param->outputTransName);
@@ -593,7 +593,7 @@ int main(int argc, char **argv) {
             output1TransImage->data = calloc(output1TransImage->nvox, output1TransImage->nbyper);
             if (affine1Trans != nullptr) {
                 reg_affine_getDeformationField(affine1Trans, output1TransImage);
-            } else switch (Round(input1TransImage->intent_p1)) {
+            } else switch (Round<int>(input1TransImage->intent_p1)) {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
                 NR_INFO("Transformation 1 is a spline parametrisation:");
@@ -659,7 +659,7 @@ int main(int argc, char **argv) {
                 reg_affine_getDeformationField(affine2Trans, output2TransImage);
                 reg_defField_compose(output2TransImage, output1TransImage, nullptr);
             } else {
-                switch (Round(input2TransImage->intent_p1)) {
+                switch (Round<int>(input2TransImage->intent_p1)) {
                 case LIN_SPLINE_GRID:
                 case CUB_SPLINE_GRID:
                     NR_INFO("Transformation 2 is a spline parametrisation:");
@@ -821,7 +821,7 @@ int main(int argc, char **argv) {
         if (affineTransformation != nullptr) {
             reg_affine_getDeformationField(affineTransformation, deformationFieldImage);
         } else if (inputTransformationImage != nullptr) {
-            switch (Round(inputTransformationImage->intent_p1)) {
+            switch (Round<int>(inputTransformationImage->intent_p1)) {
             case DEF_FIELD:
                 NR_INFO("The specified transformation is a deformation field:");
                 NR_INFO(inputTransformationImage->fname);
@@ -982,7 +982,7 @@ int main(int argc, char **argv) {
                 NR_ERROR("Error when reading the input image: " << param->inputTransName);
                 return EXIT_FAILURE;
             }
-            switch (Round(inputTransImage->intent_p1)) {
+            switch (Round<int>(inputTransImage->intent_p1)) {
             case LIN_SPLINE_GRID:
             case CUB_SPLINE_GRID:
                 reg_getDisplacementFromDeformation(inputTransImage);
@@ -1099,7 +1099,7 @@ int main(int argc, char **argv) {
         outputTransImage->scl_inter = 0.f;
         outputTransImage->data = malloc(outputTransImage->nvox * outputTransImage->nbyper);
         // Invert the provided
-        switch (Round(inputTransImage->intent_p1)) {
+        switch (Round<int>(inputTransImage->intent_p1)) {
         case DEF_FIELD:
             reg_defFieldInvert(inputTransImage, outputTransImage, 1.0e-6f);
             memset(outputTransImage->descrip, 0, 80);
diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp
index 4eb441ef..d5c7624f 100644
--- a/reg-lib/_reg_base.cpp
+++ b/reg-lib/_reg_base.cpp
@@ -405,9 +405,9 @@ void reg_base<T>::Initialise() {
         HeapSort(refDataPtr, tmpReference->nvox);
         // Update the reference threshold values if no value has been setup by the user
         if (referenceThresholdLow[0] == std::numeric_limits<T>::lowest())
-            referenceThresholdLow[0] = refDataPtr[Round((float)tmpReference->nvox * 0.02f)];
+            referenceThresholdLow[0] = refDataPtr[Round<size_t>(tmpReference->nvox * 0.02)];
         if (referenceThresholdUp[0] == std::numeric_limits<T>::max())
-            referenceThresholdUp[0] = refDataPtr[Round((float)tmpReference->nvox * 0.98f)];
+            referenceThresholdUp[0] = refDataPtr[Round<size_t>(tmpReference->nvox * 0.98)];
 
         // Create a copy of the floating image to extract the robust range
         NiftiImage tmpFloating = inputFloating;
@@ -417,9 +417,9 @@ void reg_base<T>::Initialise() {
         HeapSort(floDataPtr, tmpFloating->nvox);
         // Update the floating threshold values if no value has been setup by the user
         if (floatingThresholdLow[0] == std::numeric_limits<T>::lowest())
-            floatingThresholdLow[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.02f)];
+            floatingThresholdLow[0] = floDataPtr[Round<size_t>(tmpFloating->nvox * 0.02)];
         if (floatingThresholdUp[0] == std::numeric_limits<T>::max())
-            floatingThresholdUp[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.98f)];
+            floatingThresholdUp[0] = floDataPtr[Round<size_t>(tmpFloating->nvox * 0.98)];
     }
 
     // FINEST LEVEL OF REGISTRATION
diff --git a/reg-lib/cpu/Maths.hpp b/reg-lib/cpu/Maths.hpp
index 56782eda..58dbe38a 100644
--- a/reg-lib/cpu/Maths.hpp
+++ b/reg-lib/cpu/Maths.hpp
@@ -59,19 +59,19 @@ template<typename T>
 DEVICE inline T Cube(const T& x) {
     return x * x * x;
 }
-template<typename T>
-DEVICE inline int Floor(const T& x) {
-    const int i = static_cast<int>(x);
-    return i - (x < i);
+template<typename RetT, typename T>
+DEVICE inline RetT Floor(const T& x) {
+    const int64_t i = static_cast<int64_t>(x);
+    return static_cast<RetT>(i - (x < i));
 }
-template<typename T>
-DEVICE inline int Ceil(const T& x) {
-    const int i = static_cast<int>(x);
-    return i + (x > i);
+template<typename RetT, typename T>
+DEVICE inline RetT Ceil(const T& x) {
+    const int64_t i = static_cast<int64_t>(x);
+    return static_cast<RetT>(i + (x > i));
 }
-template<typename T>
-DEVICE inline int Round(const T& x) {
-    return static_cast<int>(x + (x >= 0 ? 0.5 : -0.5));
+template<typename RetT, typename T>
+DEVICE inline RetT Round(const T& x) {
+    return static_cast<RetT>(static_cast<int64_t>(x + (x >= 0 ? 0.5 : -0.5)));
 }
 /* *************************************************************** */
 DEVICE inline void Divide(const int num, const int denom, int& quot, int& rem) {
diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp
index fce081f5..7f48c1f2 100755
--- a/reg-lib/cpu/_reg_blockMatching.cpp
+++ b/reg-lib/cpu/_reg_blockMatching.cpp
@@ -232,10 +232,10 @@ void initialise_block_matching_method(nifti_image * reference,
    }
 
    params->voxelCaptureRange = 3;
-   params->blockNumber[0] = Ceil((double)reference->nx / (double)BLOCK_WIDTH);
-   params->blockNumber[1] = Ceil((double)reference->ny / (double)BLOCK_WIDTH);
+   params->blockNumber[0] = Ceil<unsigned>((double)reference->nx / (double)BLOCK_WIDTH);
+   params->blockNumber[1] = Ceil<unsigned>((double)reference->ny / (double)BLOCK_WIDTH);
    if (reference->nz > 1) {
-      params->blockNumber[2] = Ceil((double)reference->nz / (double)BLOCK_WIDTH);
+      params->blockNumber[2] = Ceil<unsigned>((double)reference->nz / (double)BLOCK_WIDTH);
       params->dim = 3;
    }
    else {
diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp
index 418b310d..0939a91e 100755
--- a/reg-lib/cpu/_reg_localTrans.cpp
+++ b/reg-lib/cpu/_reg_localTrans.cpp
@@ -24,9 +24,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage,
                                 const float *spacing) {
     // Define the control point grid dimensions
     vector<NiftiImage::dim_t> dims{
-        Ceil(referenceImage->nx * referenceImage->dx / spacing[0] + 3.f),
-        Ceil(referenceImage->ny * referenceImage->dy / spacing[1] + 3.f),
-        referenceImage->nz > 1 ? Ceil(referenceImage->nz * referenceImage->dz / spacing[2] + 3.f) : 1,
+        Ceil<NiftiImage::dim_t>(referenceImage->nx * referenceImage->dx / spacing[0] + 3.f),
+        Ceil<NiftiImage::dim_t>(referenceImage->ny * referenceImage->dy / spacing[1] + 3.f),
+        referenceImage->nz > 1 ? Ceil<NiftiImage::dim_t>(referenceImage->nz * referenceImage->dz / spacing[2] + 3.f) : 1,
         1,
         referenceImage->nz > 1 ? 3 : 2
     };
@@ -277,9 +277,9 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage,
 
     // Compute the dimension of the control point grids
     const vector<NiftiImage::dim_t> dims{
-        Ceil((maxPosition[0] - minPosition[0]) / spacing[0] + 3.f),
-        Ceil((maxPosition[1] - minPosition[1]) / spacing[1] + 3.f),
-        referenceImage->nz > 1 ? Ceil((maxPosition[2] - minPosition[2]) / spacing[2] + 3.f) : 1,
+        Ceil<NiftiImage::dim_t>((maxPosition[0] - minPosition[0]) / spacing[0] + 3.f),
+        Ceil<NiftiImage::dim_t>((maxPosition[1] - minPosition[1]) / spacing[1] + 3.f),
+        referenceImage->nz > 1 ? Ceil<NiftiImage::dim_t>((maxPosition[2] - minPosition[2]) / spacing[2] + 3.f) : 1,
         1,
         referenceImage->nz > 1 ? 3 : 2
     };
@@ -452,17 +452,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint,
                             referenceMatrix_real_to_voxel.m[2][3];
 
                         // The spline coefficients are computed
-                        xPre = Floor(voxel[0]);
+                        xPre = Floor<int>(voxel[0]);
                         xBasis[1] = voxel[0] - static_cast<DataType>(xPre);
                         if (xBasis[1] < 0) xBasis[1] = 0; //rounding error
                         xBasis[0] = 1.f - xBasis[1];
 
-                        yPre = Floor(voxel[1]);
+                        yPre = Floor<int>(voxel[1]);
                         yBasis[1] = voxel[1] - static_cast<DataType>(yPre);
                         if (yBasis[1] < 0) yBasis[1] = 0; //rounding error
                         yBasis[0] = 1.f - yBasis[1];
 
-                        zPre = Floor(voxel[2]);
+                        zPre = Floor<int>(voxel[2]);
                         zBasis[1] = voxel[2] - static_cast<DataType>(zPre);
                         if (zBasis[1] < 0) zBasis[1] = 0; //rounding error
                         zBasis[0] = 1.f - zBasis[1];
@@ -643,13 +643,13 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint,
                         + referenceMatrix_real_to_voxel->m[1][3];
 
                     // The spline coefficients are computed
-                    xPre = Floor(xVoxel);
+                    xPre = Floor<int>(xVoxel);
                     basis = xVoxel - static_cast<DataType>(xPre--);
                     if (basis < 0) basis = 0; //rounding error
                     if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                     else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                    yPre = Floor(yVoxel);
+                    yPre = Floor<int>(yVoxel);
                     basis = yVoxel - static_cast<DataType>(yPre--);
                     if (basis < 0) basis = 0; //rounding error
                     if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
@@ -970,19 +970,19 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint,
                             referenceMatrix_real_to_voxel.m[2][3];
 
                         // The spline coefficients are computed
-                        xPre = Floor(voxel[0]);
+                        xPre = Floor<int>(voxel[0]);
                         basis = voxel[0] - static_cast<DataType>(xPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                         else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                        yPre = Floor(voxel[1]);
+                        yPre = Floor<int>(voxel[1]);
                         basis = voxel[1] - static_cast<DataType>(yPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
                         else get_SplineBasisValues<DataType>(basis, yBasis);
 
-                        zPre = Floor(voxel[2]);
+                        zPre = Floor<int>(voxel[2]);
                         basis = voxel[2] - static_cast<DataType>(zPre--);
                         if (basis < 0) basis = 0; //rounding error
                         if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
@@ -1614,9 +1614,9 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage,
                 // linear interpolation is performed
                 DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 };
                 int pre[3] = {
-                    Floor(voxelCoord[0]),
-                    Floor(voxelCoord[1]),
-                    Floor(voxelCoord[2])
+                    Floor<int>(voxelCoord[0]),
+                    Floor<int>(voxelCoord[1]),
+                    Floor<int>(voxelCoord[2])
                 };
                 basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
                 basisX[0] = static_cast<DataType>(1) - basisX[1];
@@ -1731,8 +1731,8 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint,
     splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f;
     splineControlPoint->dz = 1.0f;
     if (referenceImage != nullptr) {
-        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
-        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
+        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil<int>(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil<int>(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
     } else {
         splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
         splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
@@ -1819,9 +1819,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_
     splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f;
 
     if (referenceImage != nullptr) {
-        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
-        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
-        splineControlPoint->dim[3] = splineControlPoint->nz = Ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz + 3.f);
+        splineControlPoint->dim[1] = splineControlPoint->nx = Ceil<int>(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f);
+        splineControlPoint->dim[2] = splineControlPoint->ny = Ceil<int>(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f);
+        splineControlPoint->dim[3] = splineControlPoint->nz = Ceil<int>(referenceImage->nz * referenceImage->dz / splineControlPoint->dz + 3.f);
     } else {
         splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3;
         splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3;
@@ -2292,8 +2292,8 @@ void reg_defField_compose2D(const nifti_image *deformationField,
                 df_real2Voxel->m[1][3];
 
             // Linear interpolation to compute the new deformation
-            pre[0] = Floor(voxelX);
-            pre[1] = Floor(voxelY);
+            pre[0] = Floor<int>(voxelX);
+            pre[1] = Floor<int>(voxelY);
             relX[1] = voxelX - static_cast<DataType>(pre[0]);
             relX[0] = 1.f - relX[1];
             relY[1] = voxelY - static_cast<DataType>(pre[1]);
@@ -2399,9 +2399,9 @@ void reg_defField_compose3D(const nifti_image *deformationField,
                 df_real2Voxel.m[2][3];
 
             // Linear interpolation to compute the new deformation
-            pre[0] = Floor(voxel[0]);
-            pre[1] = Floor(voxel[1]);
-            pre[2] = Floor(voxel[2]);
+            pre[0] = Floor<int>(voxel[0]);
+            pre[1] = Floor<int>(voxel[1]);
+            pre[2] = Floor<int>(voxel[2]);
             relX[1] = voxel[0] - static_cast<DataType>(pre[0]);
             relX[0] = 1.f - relX[1];
             relY[1] = voxel[1] - static_cast<DataType>(pre[1]);
@@ -3110,13 +3110,13 @@ void reg_spline_cppComposition_2D(nifti_image *grid1,
                 + matrix_real_to_voxel1->m[1][3];
 
             // The spline coefficients are computed
-            int xPre = Floor(xVoxel);
+            int xPre = Floor<int>(xVoxel);
             basis = xVoxel - static_cast<DataType>(xPre--);
             if (basis < 0) basis = 0; //rounding error
             if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
             else get_SplineBasisValues<DataType>(basis, xBasis);
 
-            int yPre = Floor(yVoxel);
+            int yPre = Floor<int>(yVoxel);
             basis = yVoxel - static_cast<DataType>(yPre--);
             if (basis < 0) basis = 0; //rounding error
             if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
@@ -3322,19 +3322,19 @@ void reg_spline_cppComposition_3D(nifti_image *grid1,
                     + matrix_real_to_voxel1->m[2][3];
 
                 // The spline coefficients are computed
-                xPre = Floor(xVoxel);
+                xPre = Floor<int>(xVoxel);
                 basis = xVoxel - static_cast<DataType>(xPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, xBasis);
                 else get_SplineBasisValues<DataType>(basis, xBasis);
 
-                yPre = Floor(yVoxel);
+                yPre = Floor<int>(yVoxel);
                 basis = yVoxel - static_cast<DataType>(yPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, yBasis);
                 else get_SplineBasisValues<DataType>(basis, yBasis);
 
-                zPre = Floor(zVoxel);
+                zPre = Floor<int>(zVoxel);
                 basis = zVoxel - static_cast<DataType>(zPre--);
                 if (basis < 0) basis = 0; //rounding error
                 if (bspline) get_BSplineBasisValues<DataType>(basis, zBasis);
@@ -3528,7 +3528,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowField,
         squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
         // Set the number of squaring step in the flow field
         if (fabs(flowField->intent_p2) != squaringNumber) {
-            NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
+            NR_WARN("Changing from " << Round<int>(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
                     " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
         }
         // Update the number of squaring step required
diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp
index 303057cb..e023ed9f 100755
--- a/reg-lib/cpu/_reg_localTrans_jac.cpp
+++ b/reg-lib/cpu/_reg_localTrans_jac.cpp
@@ -163,9 +163,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint,
                   // Compute the position in the grid
                   Mat44Mul(transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
-                  pre[0]=Floor(gridCoord[0]);
-                  pre[1]=Floor(gridCoord[1]);
-                  pre[2]=Floor(gridCoord[2]);
+                  pre[0]=Floor<DataType>(gridCoord[0]);
+                  pre[1]=Floor<DataType>(gridCoord[1]);
+                  pre[2]=Floor<DataType>(gridCoord[2]);
                   int controlPoint_index=(pre[2]*splineControlPoint->ny+pre[1])*splineControlPoint->nx+pre[0];
 
                   jacobianMatrix.m[0][0] = (coeffPtrX[controlPoint_index+1] - coeffPtrX[controlPoint_index]);
@@ -378,8 +378,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint,
                // Compute the position in the grid
                Mat44Mul(transformation,imageCoord,gridCoord);
                // Compute the anterior node coord
-               pre[0]=Floor(gridCoord[0]);
-               pre[1]=Floor(gridCoord[1]);
+               pre[0]=Floor<int>(gridCoord[0]);
+               pre[1]=Floor<int>(gridCoord[1]);
                // Compute the basis values and their first derivatives
                basis = gridCoord[0] - pre[0];
                get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
@@ -788,9 +788,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint,
                   // Compute the position in the grid
                   Mat44Mul(transformation,imageCoord,gridCoord);
                   // Compute the anterior node coord
-                  pre[0]=Floor(gridCoord[0]);
-                  pre[1]=Floor(gridCoord[1]);
-                  pre[2]=Floor(gridCoord[2]);
+                  pre[0]=Floor<int>(gridCoord[0]);
+                  pre[1]=Floor<int>(gridCoord[1]);
+                  pre[2]=Floor<int>(gridCoord[2]);
                   // Compute the basis values and their first derivatives
                   basis = gridCoord[0] - pre[0];
                   get_BSplineBasisValues<DataType>(basis, xBasis, xFirst);
@@ -1476,7 +1476,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
 
                // Loop over all the control points in the surrounding area
 
-               for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
+               for(pixelY=Ceil<int>((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil<int>((y+1)*gridVoxelSpacing[1]); pixelY++)
                {
                   if(pixelY>-1 && pixelY<referenceImage->ny)
                   {
@@ -1485,9 +1485,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint,
                      basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
                      get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
-                     jacIndex = pixelY*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]);
+                     jacIndex = pixelY*referenceImage->nx+Ceil<int>((x-3)*gridVoxelSpacing[0]);
 
-                     for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
+                     for(pixelX=Ceil<int>((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil<int>((x+1)*gridVoxelSpacing[0]); pixelX++)
                      {
                         if(pixelX>-1 && pixelX<referenceImage->nx && (yFirst!=0 || yBasis!=0))
                         {
@@ -1740,7 +1740,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                   jacobianConstraint[0]=jacobianConstraint[1]=jacobianConstraint[2]=0.;
 
                   // Loop over all the control points in the surrounding area
-                  for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ<=Ceil((z+1)*gridVoxelSpacing[2]); pixelZ++)
+                  for(pixelZ=Ceil<int>((z-3)*gridVoxelSpacing[2]); pixelZ<=Ceil<int>((z+1)*gridVoxelSpacing[2]); pixelZ++)
                   {
                      if(pixelZ>-1 && pixelZ<referenceImage->nz)
                      {
@@ -1749,7 +1749,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                         basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre;
                         get_BSplineBasisValue<DataType>(basis,z-zPre,zBasis,zFirst);
 
-                        for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++)
+                        for(pixelY=Ceil<int>((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil<int>((y+1)*gridVoxelSpacing[1]); pixelY++)
                         {
                            if(pixelY>-1 && pixelY<referenceImage->ny && (zFirst!=0 || zBasis!=0))
                            {
@@ -1758,9 +1758,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint,
                               basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre;
                               get_BSplineBasisValue<DataType>(basis,y-yPre,yBasis,yFirst);
 
-                              jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]);
+                              jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+Ceil<int>((x-3)*gridVoxelSpacing[0]);
 
-                              for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++)
+                              for(pixelX=Ceil<int>((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil<int>((x+1)*gridVoxelSpacing[0]); pixelX++)
                               {
                                  if(pixelX>-1 && pixelX<referenceImage->nx && (yFirst!=0 || yBasis!=0))
                                  {
@@ -2068,12 +2068,12 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint,
 
                // Loop over all the control points in the surrounding area
 
-               for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<Floor((y+1)*gridVoxelSpacing[1]); pixelY++)
+               for(pixelY=Ceil<int>((y-3)*gridVoxelSpacing[1]); pixelY<Floor<int>((y+1)*gridVoxelSpacing[1]); pixelY++)
                {
                   if(pixelY>-1 && pixelY<referenceImage->ny)
                   {
 
-                     for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<Floor((x+1)*gridVoxelSpacing[0]); pixelX++)
+                     for(pixelX=Ceil<int>((x-3)*gridVoxelSpacing[0]); pixelX<Floor<int>((x+1)*gridVoxelSpacing[0]); pixelX++)
                      {
                         if(pixelX>-1 && pixelX<referenceImage->nx)
                         {
@@ -2340,17 +2340,17 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint,
                   correctFolding=false;
 
                   // Loop over all the control points in the surrounding area
-                  for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ<Floor((z+1)*gridVoxelSpacing[2]); pixelZ++)
+                  for(pixelZ=Ceil<int>((z-3)*gridVoxelSpacing[2]); pixelZ<Floor<int>((z+1)*gridVoxelSpacing[2]); pixelZ++)
                   {
                      if(pixelZ>-1 && pixelZ<referenceImage->nz)
                      {
 
-                        for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<Floor((y+1)*gridVoxelSpacing[1]); pixelY++)
+                        for(pixelY=Ceil<int>((y-3)*gridVoxelSpacing[1]); pixelY<Floor<int>((y+1)*gridVoxelSpacing[1]); pixelY++)
                         {
                            if(pixelY>-1 && pixelY<referenceImage->ny)
                            {
 
-                              for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<Floor((x+1)*gridVoxelSpacing[0]); pixelX++)
+                              for(pixelX=Ceil<int>((x-3)*gridVoxelSpacing[0]); pixelX<Floor<int>((x+1)*gridVoxelSpacing[0]); pixelX++)
                               {
                                  if(pixelX>-1 && pixelX<referenceImage->nx)
                                  {
diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp
index 907b3f3b..d1175e4b 100755
--- a/reg-lib/cpu/_reg_localTrans_regul.cpp
+++ b/reg-lib/cpu/_reg_localTrans_regul.cpp
@@ -892,9 +892,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage,
         Mat44Mul(*gridRealToVox, refPosition, defPosition);
 
         // Extract the corresponding nodes
-        previous[0] = Floor(defPosition[0]) - 1;
-        previous[1] = Floor(defPosition[1]) - 1;
-        previous[2] = Floor(defPosition[2]) - 1;
+        previous[0] = Floor<int>(defPosition[0]) - 1;
+        previous[1] = Floor<int>(defPosition[1]) - 1;
+        previous[2] = Floor<int>(defPosition[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
@@ -1005,9 +1005,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint
         Mat44Mul(*gridRealToVox, refPosition, defPosition);
         if (imageDim == 2) defPosition[2] = 0;
         // Extract the corresponding nodes
-        previous[0] = Floor(defPosition[0]) - 1;
-        previous[1] = Floor(defPosition[1]) - 1;
-        previous[2] = Floor(defPosition[2]) - 1;
+        previous[0] = Floor<int>(defPosition[0]) - 1;
+        previous[1] = Floor<int>(defPosition[1]) - 1;
+        previous[2] = Floor<int>(defPosition[2]) - 1;
         // Check that the specified landmark belongs to the input image
         if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx &&
             previous[1] > -1 && previous[1] + 3 < controlPointImage->ny &&
diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp
index 4a6f9447..dec53254 100755
--- a/reg-lib/cpu/_reg_resampling.cpp
+++ b/reg-lib/cpu/_reg_resampling.cpp
@@ -397,9 +397,9 @@ void ResampleImage3D(const nifti_image *floatingImage,
                 // real -> voxel; floating space
                 Mat44Mul(*floatingIJKMatrix, world, position);
 
-                previous[0] = Floor(position[0]);
-                previous[1] = Floor(position[1]);
-                previous[2] = Floor(position[2]);
+                previous[0] = Floor<int>(position[0]);
+                previous[1] = Floor<int>(position[1]);
+                previous[2] = Floor<int>(position[2]);
 
                 relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
                 relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
@@ -547,8 +547,8 @@ void ResampleImage2D(const nifti_image *floatingImage,
                 // real -> voxel; floating space
                 Mat44Mul(*floatingIJKMatrix, world, position);
 
-                previous[0] = Floor(position[0]);
-                previous[1] = Floor(position[1]);
+                previous[0] = Floor<int>(position[0]);
+                previous[1] = Floor<int>(position[1]);
 
                 relative[0] = static_cast<double>(position[0]) - static_cast<double>(previous[0]);
                 relative[1] = static_cast<double>(position[1]) - static_cast<double>(previous[1]);
@@ -818,13 +818,13 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
 
                             // Interpolate (trilinearly) the deformation field for non-integer positions
                             float scalling = 1.0f;
-                            currentAPre = (float)Floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling);
+                            currentAPre = Floor<float>(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling);
                             currentARel = currentA + (shiftSamp[0] / warpedImage->pixdim[1] * scalling) - (float)(currentAPre);
 
-                            currentBPre = (float)Floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2]));
+                            currentBPre = Floor<float>(currentB + (shiftSamp[1] / warpedImage->pixdim[2]));
                             currentBRel = currentB + (shiftSamp[1] / warpedImage->pixdim[2] * scalling) - (float)(currentBPre);
 
-                            currentCPre = (float)Floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling));
+                            currentCPre = Floor<float>(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling));
                             currentCRel = currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling) - (float)(currentCPre);
 
                             // Interpolate the PSF world coordinates
@@ -870,9 +870,9 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage,
                                     // real -> voxel; floating space
                                     Mat44Mul(*floatingIJKMatrix, psfWorld, position);
 
-                                    previous[0] = Floor(position[0]);
-                                    previous[1] = Floor(position[1]);
-                                    previous[2] = Floor(position[2]);
+                                    previous[0] = Floor<int>(position[0]);
+                                    previous[1] = Floor<int>(position[1]);
+                                    previous[2] = Floor<int>(position[2]);
 
                                     relative[0] = position[0] - static_cast<double>(previous[0]);
                                     relative[1] = position[1] - static_cast<double>(previous[1]);
@@ -1191,13 +1191,13 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
 
                                 if (psfWeight != 0.f) { // If the relative weight is above 0
                                     // Interpolate (trilinearly) the deformation field for non-integer positions
-                                    currentAPre = (size_t)(currentA + (size_t)Floor(psf_xyz[0] / (float)warpedImage->pixdim[1]));
+                                    currentAPre = currentA + Floor<size_t>(psf_xyz[0] / (float)warpedImage->pixdim[1]);
                                     currentARel = (float)currentA + (float)(psf_xyz[0] / (float)warpedImage->pixdim[1]) - (float)(currentAPre);
 
-                                    currentBPre = (size_t)(currentB + (size_t)Floor(psf_xyz[1] / (float)warpedImage->pixdim[2]));
+                                    currentBPre = currentB + Floor<size_t>(psf_xyz[1] / (float)warpedImage->pixdim[2]);
                                     currentBRel = (float)currentB + (float)(psf_xyz[1] / (float)warpedImage->pixdim[2]) - (float)(currentBPre);
 
-                                    currentCPre = (size_t)(currentC + (size_t)Floor(psf_xyz[2] / (float)warpedImage->pixdim[3]));
+                                    currentCPre = currentC + Floor<size_t>(psf_xyz[2] / (float)warpedImage->pixdim[3]);
                                     currentCRel = (float)currentC + (float)(psf_xyz[2] / (float)warpedImage->pixdim[3]) - (float)(currentCPre);
 
                                     // Interpolate the PSF world coordinates
@@ -1242,9 +1242,9 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage,
                                         // real -> voxel; floating space
                                         Mat44Mul(*floatingIJKMatrix, psfWorld, position);
 
-                                        previous[0] = Floor(position[0]);
-                                        previous[1] = Floor(position[1]);
-                                        previous[2] = Floor(position[2]);
+                                        previous[0] = Floor<int>(position[0]);
+                                        previous[1] = Floor<int>(position[1]);
+                                        previous[2] = Floor<int>(position[2]);
 
                                         relative[0] = position[0] - static_cast<double>(previous[0]);
                                         relative[1] = position[1] - static_cast<double>(previous[1]);
@@ -1453,10 +1453,10 @@ void reg_bilinearResampleGradient(const nifti_image *floatingImage,
                 floating_mm_to_voxel->m[1][3];
 
             // Extract the floating value using bilinear interpolation
-            anteIntX[0] = Floor(xFloCoord);
-            anteIntX[1] = Ceil(xFloCoord);
-            anteIntY[0] = Floor(yFloCoord);
-            anteIntY[1] = Ceil(yFloCoord);
+            anteIntX[0] = Floor<int>(xFloCoord);
+            anteIntX[1] = Ceil<int>(xFloCoord);
+            anteIntY[0] = Floor<int>(yFloCoord);
+            anteIntY[1] = Ceil<int>(yFloCoord);
             val_x = 0;
             val_y = 0;
             basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]);
@@ -1633,12 +1633,12 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage,
                     floating_mm_to_voxel->m[2][3];
 
                 // Extract the floating value using bilinear interpolation
-                anteIntX[0] = Floor(xFloCoord);
-                anteIntX[1] = Ceil(xFloCoord);
-                anteIntY[0] = Floor(yFloCoord);
-                anteIntY[1] = Ceil(yFloCoord);
-                anteIntZ[0] = Floor(zFloCoord);
-                anteIntZ[1] = Ceil(zFloCoord);
+                anteIntX[0] = Floor<int>(xFloCoord);
+                anteIntX[1] = Ceil<int>(xFloCoord);
+                anteIntY[0] = Floor<int>(yFloCoord);
+                anteIntY[1] = Ceil<int>(yFloCoord);
+                anteIntZ[0] = Floor<int>(zFloCoord);
+                anteIntZ[1] = Ceil<int>(zFloCoord);
                 val_x = 0;
                 val_y = 0;
                 val_z = 0;
@@ -1859,9 +1859,9 @@ void TrilinearImageGradient(const nifti_image *floatingImage,
             /* real -> voxel; floating space */
             Mat44Mul(*floatingIJKMatrix, world, position);
 
-            previous[0] = Floor(position[0]);
-            previous[1] = Floor(position[1]);
-            previous[2] = Floor(position[2]);
+            previous[0] = Floor<int>(position[0]);
+            previous[1] = Floor<int>(position[1]);
+            previous[2] = Floor<int>(position[2]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             xBasis[0] = (FieldType)(1.0 - relative);
@@ -2026,8 +2026,8 @@ void BilinearImageGradient(const nifti_image *floatingImage,
             position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
             position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
-            previous[0] = Floor(position[0]);
-            previous[1] = Floor(position[1]);
+            previous[0] = Floor<int>(position[0]);
+            previous[1] = Floor<int>(position[1]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             relative = relative > 0 ? relative : 0;
@@ -2136,9 +2136,9 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage,
             /* real -> voxel; floating space */
             Mat44Mul(*floatingIJKMatrix, world, position);
 
-            previous[0] = Floor(position[0]);
-            previous[1] = Floor(position[1]);
-            previous[2] = Floor(position[2]);
+            previous[0] = Floor<int>(position[0]);
+            previous[1] = Floor<int>(position[1]);
+            previous[2] = Floor<int>(position[2]);
 
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
@@ -2273,8 +2273,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage,
             position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3];
             position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3];
 
-            previous[0] = Floor(position[0]);
-            previous[1] = Floor(position[1]);
+            previous[0] = Floor<int>(position[0]);
+            previous[1] = Floor<int>(position[1]);
             // basis values along the x axis
             relative = position[0] - (FieldType)previous[0];
             relative = relative > 0 ? relative : 0;
@@ -2530,7 +2530,7 @@ nifti_image* reg_makeIsotropic(nifti_image *img, int inter) {
     for (size_t i = 0; i < 8; ++i) newDim[i] = img->dim[i];
     for (size_t i = 1; i < 4; ++i) {
         if (i < static_cast<size_t>(img->dim[0] + 1))
-            newDim[i] = Ceil(img->dim[i] * img->pixdim[i] / smallestPixDim);
+            newDim[i] = Ceil<int>(img->dim[i] * img->pixdim[i] / smallestPixDim);
     }
     // Create the new image
     nifti_image *newImg = nifti_make_new_nim(newDim, img->datatype, true);
diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp
index 2a4bddfb..50968e3c 100755
--- a/reg-lib/cpu/_reg_ssd.cpp
+++ b/reg-lib/cpu/_reg_ssd.cpp
@@ -353,9 +353,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 
     // Compute the block size
     const int blockSize[3] = {
-        Ceil(controlPointGridImage->dx / refImage->dx),
-        Ceil(controlPointGridImage->dy / refImage->dy),
-        Ceil(controlPointGridImage->dz / refImage->dz),
+        Ceil<int>(controlPointGridImage->dx / refImage->dx),
+        Ceil<int>(controlPointGridImage->dy / refImage->dy),
+        Ceil<int>(controlPointGridImage->dz / refImage->dz),
     };
     int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2];
     int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt;
@@ -405,9 +405,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage,
 
                 // Compute the corresponding image voxel position
                 Mat44Mul(grid2img_vox, gridVox, imageVox);
-                imageVox[0] = static_cast<float>(Round(imageVox[0]));
-                imageVox[1] = static_cast<float>(Round(imageVox[1]));
-                imageVox[2] = static_cast<float>(Round(imageVox[2]));
+                imageVox[0] = Round<float>(imageVox[0]);
+                imageVox[1] = Round<float>(imageVox[1]);
+                imageVox[2] = Round<float>(imageVox[2]);
 
                 //INIT
                 for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) {
diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp
index 21aa5869..d3581339 100755
--- a/reg-lib/cpu/_reg_tools.cpp
+++ b/reg-lib/cpu/_reg_tools.cpp
@@ -1368,7 +1368,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
     int oldDim[4];
     for (int i = 1; i < 4; i++) {
         oldDim[i] = image->dim[i];
-        if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = Ceil(image->dim[i] / 2.0);
+        if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = Ceil<int>(image->dim[i] / 2.0);
         if (image->pixdim[i] > 0 && downsampleAxis[i]) image->pixdim[i] = image->pixdim[i] * 2.0f;
     }
     image->nx = image->dim[1];
@@ -1446,9 +1446,9 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) {
                         z * image->qto_xyz.m[2][2] +
                         image->qto_xyz.m[2][3];
                     // Extract the position in voxel in the old image;
-                    position[0] = Round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
-                    position[1] = Round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
-                    position[2] = Round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
+                    position[0] = Round<int>(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]);
+                    position[1] = Round<int>(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]);
+                    position[2] = Round<int>(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]);
                     if (oldDim[3] == 1) position[2] = 0;
                     // Nearest neighbour is used as downsampling ratio is constant
                     intensity = std::numeric_limits<ImageType>::quiet_NaN();
diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu
index 8e901204..10e3ef92 100644
--- a/reg-lib/cuda/CudaLocalTransformation.cu
+++ b/reg-lib/cuda/CudaLocalTransformation.cu
@@ -275,7 +275,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
     // The Jacobian matrix is computed for every control point
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->GetApproxJacobianValues3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetApproxJacobianValues3d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -283,7 +283,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->GetApproxJacobianValues2d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetApproxJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -311,7 +311,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
     // The Jacobian matrix is computed for every voxel
     if (controlPointImage->nz > 1) {
         const unsigned blocks = blockSize->GetJacobianValues3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         // 8 floats of shared memory are allocated per thread
@@ -322,7 +322,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage,
         NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
     } else {
         const unsigned blocks = blockSize->GetJacobianValues2d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)voxelNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         GetJacobianValues2d<<<gridDims, blockDims>>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture,
@@ -360,7 +360,7 @@ double GetJacobianPenaltyTerm(const nifti_image *referenceImage,
 
     // The Jacobian determinant are squared and logged (might not be english but will do)
     const unsigned blocks = CudaContext::GetBlockSize()->LogSquaredValues;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
+    const unsigned grids = Ceil<unsigned>(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     LogSquaredValues<<<gridDims, blockDims>>>(jacobianDetCuda, (unsigned)jacNumber);
@@ -412,7 +412,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
     if (approx) {
         if (controlPointImage->nz > 1) {
             const unsigned blocks = blockSize->ComputeApproxJacGradient3d;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeApproxJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -421,7 +421,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
             const unsigned blocks = blockSize->ComputeApproxJacGradient2d;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeApproxJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -436,7 +436,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
                                                             controlPointImage->dz / referenceImage->dz);
         if (controlPointImage->nz > 1) {
             const unsigned blocks = blockSize->ComputeJacGradient3d;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeJacGradient3d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -446,7 +446,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage,
             NR_CUDA_CHECK_KERNEL(gridDims, blockDims);
         } else {
             const unsigned blocks = blockSize->ComputeJacGradient2d;
-            const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+            const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
             const dim3 gridDims(grids, grids, 1);
             const dim3 blockDims(blocks, 1, 1);
             ComputeJacGradient2d<<<gridDims, blockDims>>>(transGradientCuda, *jacobianDeterminantTexture,
@@ -491,7 +491,7 @@ double CorrectFolding(const nifti_image *referenceImage,
     NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize));
     NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice));
     const unsigned blocks = blockSize->LogSquaredValues;
-    const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks));
+    const unsigned grids = Ceil<unsigned>(sqrtf((float)jacNumber / (float)blocks));
     const dim3 gridDims(grids, grids, 1);
     const dim3 blockDims(blocks, 1, 1);
     LogSquaredValues<<<gridDims, blockDims>>>(jacobianDet2Cuda, (unsigned)jacNumber);
@@ -520,7 +520,7 @@ double CorrectFolding(const nifti_image *referenceImage,
     auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1);
     if (approx) {
         const unsigned blocks = blockSize->ApproxCorrectFolding3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         ApproxCorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
@@ -533,7 +533,7 @@ double CorrectFolding(const nifti_image *referenceImage,
                                                             controlPointImage->dy / referenceImage->dy,
                                                             controlPointImage->dz / referenceImage->dz);
         const unsigned blocks = blockSize->CorrectFolding3d;
-        const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks));
+        const unsigned grids = Ceil<unsigned>(sqrtf((float)controlPointNumber / (float)blocks));
         const dim3 gridDims(grids, grids, 1);
         const dim3 blockDims(blocks, 1, 1);
         CorrectFolding3d<<<gridDims, blockDims>>>(controlPointImageCuda, *jacobianDeterminantTexture,
@@ -691,7 +691,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
         squaringNumber = squaringNumber < 6 ? 6 : squaringNumber;
         // Set the number of squaring step in the flow field
         if (fabs(flowField->intent_p2) != squaringNumber)
-            NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
+            NR_WARN("Changing from " << Round<int>(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) <<
                     " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")");
         // Update the number of squaring step required
         flowField->intent_p2 = static_cast<float>(flowField->intent_p2 >= 0 ? squaringNumber : -squaringNumber);
diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu
index c3c344be..ba380b20 100644
--- a/reg-lib/cuda/CudaLocalTransformationKernels.cu
+++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu
@@ -304,7 +304,7 @@ __device__ void GetDeformationField3d(float4 *deformationField,
             yVoxel < 0 || yVoxel >= referenceImageDims.y ||
             zVoxel < 0 || zVoxel >= referenceImageDims.z) return;
 
-        nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) };
+        nodePre = { Floor<int>(xVoxel), Floor<int>(yVoxel), Floor<int>(zVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) };
     } else { // starting deformation field is blank - !composition
         const auto [x, y, z] = IndexToDims<true>(index, referenceImageDims);
@@ -367,7 +367,7 @@ __device__ void GetDeformationField2d(float4 *deformationField,
         if (xVoxel < 0 || xVoxel >= referenceImageDims.x ||
             yVoxel < 0 || yVoxel >= referenceImageDims.y) return;
 
-        nodePre = { Floor(xVoxel), Floor(yVoxel) };
+        nodePre = { Floor<int>(xVoxel), Floor<int>(yVoxel) };
         basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) };
     } else { // starting deformation field is blank - !composition
         const auto [x, y, z] = IndexToDims<false>(index, referenceImageDims);
@@ -556,7 +556,7 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices,
         const auto [x, y, z] = IndexToDims<false>(tid, referenceImageDims);
 
         // the "nearest previous" node is determined [0,0,0]
-        const int2 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) };
+        const int2 nodePre = { Floor<int>((float)x / controlPointSpacing.x), Floor<int>((float)y / controlPointSpacing.y) };
 
         float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative;
 
@@ -624,9 +624,9 @@ __global__ void GetJacobianValues3d(float *jacobianMatrices,
 
         // the "nearest previous" node is determined [0,0,0]
         const int3 nodePre = {
-            Floor((float)x / controlPointSpacing.x),
-            Floor((float)y / controlPointSpacing.y),
-            Floor((float)z / controlPointSpacing.z)
+            Floor<int>((float)x / controlPointSpacing.x),
+            Floor<int>((float)y / controlPointSpacing.y),
+            Floor<int>((float)z / controlPointSpacing.z)
         };
 
         extern __shared__ float yFirst[];
@@ -872,14 +872,14 @@ __global__ void ComputeJacGradient2d(float4 *gradient,
         const auto [x, y, z] = IndexToDims<false>(tid, controlPointImageDims);
 
         float2 jacobianGradient{};
-        for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+        for (int pixelY = Ceil<int>((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil<int>((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
             if (-1 < pixelY && pixelY < referenceImageDims.y) {
                 const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                 float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                 float yBasis, yFirst;
                 GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                for (int pixelX = Ceil<int>((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil<int>((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                     if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) {
                         const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                         basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
@@ -925,21 +925,21 @@ __global__ void ComputeJacGradient3d(float4 *gradient,
         const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 jacobianGradient{};
-        for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+        for (int pixelZ = Ceil<int>((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil<int>((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
             if (-1 < pixelZ && pixelZ < referenceImageDims.z) {
                 const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z);
                 float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre;
                 float zBasis, zFirst;
                 GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst);
 
-                for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                for (int pixelY = Ceil<int>((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil<int>((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
                     if (-1 < pixelY && pixelY < referenceImageDims.y && (zFirst != 0.f || zBasis != 0.f)) {
                         const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y);
                         basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre;
                         float yBasis, yFirst;
                         GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst);
 
-                        for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                        for (int pixelX = Ceil<int>((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil<int>((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                             if (-1 < pixelX && pixelX < referenceImageDims.x && (yFirst != 0.f || yBasis != 0.f)) {
                                 const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x);
                                 basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre;
@@ -1063,11 +1063,11 @@ __global__ void CorrectFolding3d(float4 *controlPointGrid,
         const auto [x, y, z] = IndexToDims<true>(tid, controlPointImageDims);
 
         float3 foldingCorrection{};
-        for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
+        for (int pixelZ = Ceil<int>((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil<int>((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) {
             if (-1 < pixelZ && pixelZ < referenceImageDims.z) {
-                for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
+                for (int pixelY = Ceil<int>((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil<int>((y + 1) * controlPointVoxelSpacing.y); ++pixelY) {
                     if (-1 < pixelY && pixelY < referenceImageDims.y) {
-                        for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
+                        for (int pixelX = Ceil<int>((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil<int>((x + 1) * controlPointVoxelSpacing.x); ++pixelX) {
                             if (-1 < pixelX && pixelX < referenceImageDims.x) {
                                 int jacIndex = (pixelZ * referenceImageDims.y + pixelY) * referenceImageDims.x + pixelX;
                                 float detJac = tex1Dfetch<float>(jacobianDeterminantTexture, jacIndex);
@@ -1141,7 +1141,7 @@ __device__ void DefFieldComposeKernel(float4 *deformationField,
         };
 
         // Linear interpolation
-        const int3 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y), Floor(voxelPosition.z) };
+        const int3 ante = { Floor<int>(voxelPosition.x), Floor<int>(voxelPosition.y), Floor<int>(voxelPosition.z) };
         float relX[2], relY[2], relZ[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
@@ -1173,7 +1173,7 @@ __device__ void DefFieldComposeKernel(float4 *deformationField,
         };
 
         // Linear interpolation
-        const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) };
+        const int2 ante = { Floor<int>(voxelPosition.x), Floor<int>(voxelPosition.y) };
         float relX[2], relY[2];
         relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1];
         relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1];
diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu
index 58c33998..37722ed1 100644
--- a/reg-lib/cuda/CudaResampling.cu
+++ b/reg-lib/cuda/CudaResampling.cu
@@ -49,12 +49,12 @@ __inline__ __device__ void TransformInterpolate(const mat44 matrix, const float4
     }
 
     // Compute the linear interpolation
-    previous.x = Floor(voxelDeformation[0]);
-    previous.y = Floor(voxelDeformation[1]);
+    previous.x = Floor<int>(voxelDeformation[0]);
+    previous.y = Floor<int>(voxelDeformation[1]);
     InterpLinearKernel(voxelDeformation[0] - static_cast<T>(previous.x), xBasis);
     InterpLinearKernel(voxelDeformation[1] - static_cast<T>(previous.y), yBasis);
     if constexpr (is3d) {
-        previous.z = Floor(voxelDeformation[2]);
+        previous.z = Floor<int>(voxelDeformation[2]);
         InterpLinearKernel(voxelDeformation[2] - static_cast<T>(previous.z), zBasis);
     }
 }
diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu
index f502ac4f..c0bba323 100644
--- a/reg-lib/cuda/CudaToolsKernels.cu
+++ b/reg-lib/cuda/CudaToolsKernels.cu
@@ -30,7 +30,7 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda,
 
     // Linear interpolation
     float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{};
-    const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
+    const int pre[3] = { Floor<int>(voxelCoord[0]), Floor<int>(voxelCoord[1]), Floor<int>(voxelCoord[2]) };
     basisX[1] = voxelCoord[0] - static_cast<float>(pre[0]);
     basisX[0] = 1.f - basisX[1];
     basisY[1] = voxelCoord[1] - static_cast<float>(pre[1]);
diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu
index 50bcb91c..b4df65fc 100644
--- a/reg-lib/cuda/resampleKernel.cu
+++ b/reg-lib/cuda/resampleKernel.cu
@@ -214,8 +214,8 @@ __global__ void ResampleImage2D(float* floatingImage,
                 // real -> voxel; floating space
                 reg_mat44_mul_cuda<float>(sourceIJKMatrix, world, position);
 
-                previous[0] = Floor(position[0]);
-                previous[1] = Floor(position[1]);
+                previous[0] = Floor<int>(position[0]);
+                previous[1] = Floor<int>(position[1]);
 
                 relative[0] = (double)(position[0]) - (double)(previous[0]);
                 relative[1] = (double)(position[1]) - (double)(previous[1]);
@@ -307,9 +307,9 @@ __global__ void ResampleImage3D(float* floatingImage,
 				// real -> voxel; floating space
 				reg_mat44_mul_cuda<float>(sourceIJKMatrix, world, position);
 
-				previous[0] = Floor(position[0]);
-				previous[1] = Floor(position[1]);
-				previous[2] = Floor(position[2]);
+				previous[0] = Floor<int>(position[0]);
+				previous[1] = Floor<int>(position[1]);
+				previous[2] = Floor<int>(position[2]);
 
                 relative[0] = (double)(position[0]) - (double)(previous[0]);
                 relative[1] = (double)(position[1]) - (double)(previous[1]);
diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
index b588989a..8a5d099f 100644
--- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp
+++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp
@@ -192,7 +192,7 @@ class VoxelCentricToNodeCentricTest {
                     Mat44Mul(transformation, nodeCoord, voxelCoord);
                     // Linear interpolation
                     DataType basisX[2], basisY[2], basisZ[2];
-                    const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };
+                    const int pre[3] = { Floor<int>(voxelCoord[0]), Floor<int>(voxelCoord[1]), Floor<int>(voxelCoord[2]) };
                     basisX[1] = voxelCoord[0] - static_cast<DataType>(pre[0]);
                     basisX[0] = static_cast<DataType>(1) - basisX[1];
                     basisY[1] = voxelCoord[1] - static_cast<DataType>(pre[1]);